From: wanchao.xu Date: Thu, 10 Apr 2025 05:50:17 +0000 (+0800) Subject: Add qemu-tools package based on qemu 5.2 X-Git-Tag: qemu-tools_5.2~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c4ba6c33eb0517eaa8c46c97853758a3626a27b5;p=tools%2Fqemu-tools.git Add qemu-tools package based on qemu 5.2 * Remove the source code of qemu which are not used when building qemu-tools. * Update the configuration and meson build for qemu-tools. Change-Id: I5243150229901ed0ea4728d28ca88fff3fdb4c7c Signed-off-by: wanchao.xu --- diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..22681d91c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,44 @@ +# EditorConfig is a file format and collection of text editor plugins +# for maintaining consistent coding styles between different editors +# and IDEs. Most popular editors support this either natively or via +# plugin. +# +# Check https://editorconfig.org for details. + +root = true + +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 + +[*.mak] +indent_style = tab +indent_size = 8 +file_type_emacs = makefile + +[Makefile*] +indent_style = tab +indent_size = 8 +file_type_emacs = makefile + +[*.{c,h,c.inc,h.inc}] +indent_style = space +indent_size = 4 +file_type_emacs = c + +[*.sh] +indent_style = space +indent_size = 4 + +[*.{s,S}] +indent_style = tab +indent_size = 8 +file_type_emacs = asm + +[*.{vert,frag}] +file_type_emacs = glsl + +[*.json] +indent_style = space +file_type_emacs = python diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..3d2fe2ecd --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.c.inc diff=c +*.h.inc diff=c diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..b32bca131 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/GNUmakefile +/build/ +*.pyc +.sdk +.stgit-* +.git-submodule-status +cscope.* +tags +TAGS +*~ +*.ast_raw +*.depend_raw diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..2f69d31f2 --- /dev/null +++ b/Makefile @@ -0,0 +1,319 @@ +# Makefile for QEMU. + +ifneq ($(words $(subst :, ,$(CURDIR))), 1) + $(error main directory cannot contain spaces nor colons) +endif + +# Always point to the root of the build tree (needs GNU make). +BUILD_DIR=$(CURDIR) + +# Before including a proper config-host.mak, assume we are in the source tree +SRC_PATH=. + +# Don't use implicit rules or variables +# we have explicit rules for everything +MAKEFLAGS += -rR + +SHELL = /usr/bin/env bash -o pipefail + +# Usage: $(call quiet-command,command and args,"NAME","args to print") +# This will run "command and args", and either: +# if V=1 just print the whole command and args +# otherwise print the 'quiet' output in the format " NAME args to print" +# NAME should be a short name of the command, 7 letters or fewer. +# If called with only a single argument, will print nothing in quiet mode. +quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 +quiet-@ = $(if $(V),,@) +quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) + +UNCHECKED_GOALS := %clean TAGS cscope ctags dist \ + help check-help print-% \ + docker docker-% vm-help vm-test vm-build-% + +all: +.PHONY: all clean distclean recurse-all dist msi FORCE + +# Don't try to regenerate Makefile or configure +# We don't generate any of them +Makefile: ; +configure: ; + +# All following code might depend on configuration variables +ifneq ($(wildcard config-host.mak),) +include config-host.mak + +git-submodule-update: +.git-submodule-status: git-submodule-update config-host.mak +Makefile: .git-submodule-status + +.PHONY: git-submodule-update + +git_module_status := $(shell \ + cd '$(SRC_PATH)' && \ + GIT="$(GIT)" ./scripts/git-submodule.sh status $(GIT_SUBMODULES); \ + echo $$?; \ +) + +ifeq (1,$(git_module_status)) +ifeq (no,$(GIT_UPDATE)) +git-submodule-update: + $(call quiet-command, \ + echo && \ + echo "GIT submodule checkout is out of date. Please run" && \ + echo " scripts/git-submodule.sh update $(GIT_SUBMODULES)" && \ + echo "from the source directory checkout $(SRC_PATH)" && \ + echo && \ + exit 1) +else +git-submodule-update: + $(call quiet-command, \ + (cd $(SRC_PATH) && GIT="$(GIT)" ./scripts/git-submodule.sh update $(GIT_SUBMODULES)), \ + "GIT","$(GIT_SUBMODULES)") +endif +endif + +# 0. ensure the build tree is okay + +# Check that we're not trying to do an out-of-tree build from +# a tree that's been used for an in-tree build. +ifneq ($(realpath $(SRC_PATH)),$(realpath .)) +ifneq ($(wildcard $(SRC_PATH)/config-host.mak),) +$(error This is an out of tree build but your source tree ($(SRC_PATH)) \ +seems to have been used for an in-tree build. You can fix this by running \ +"$(MAKE) distclean && rm -rf *-linux-user *-softmmu" in your source tree) +endif +endif + +# force a rerun of configure if config-host.mak is too old or corrupted +ifeq ($(MESON),) +.PHONY: config-host.mak +x := $(shell rm -rf meson-private meson-info meson-logs) +endif +ifeq ($(NINJA),) +.PHONY: config-host.mak +x := $(shell rm -rf meson-private meson-info meson-logs) +else +export NINJA +endif +ifeq ($(wildcard build.ninja),) +.PHONY: config-host.mak +x := $(shell rm -rf meson-private meson-info meson-logs) +endif +ifeq ($(origin prefix),file) +.PHONY: config-host.mak +x := $(shell rm -rf meson-private meson-info meson-logs) +endif + +# 1. ensure config-host.mak is up-to-date +config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/VERSION + @echo config-host.mak is out-of-date, running configure + @if test -f meson-private/coredata.dat; then \ + ./config.status --skip-meson; \ + else \ + ./config.status && touch build.ninja.stamp; \ + fi + +# 2. meson.stamp exists if meson has run at least once (so ninja reconfigure +# works), but otherwise never needs to be updated +meson-private/coredata.dat: meson.stamp +meson.stamp: config-host.mak + @touch meson.stamp + +# 3. ensure generated build files are up-to-date + +ifneq ($(NINJA),) +Makefile.ninja: build.ninja + $(quiet-@){ \ + echo 'ninja-targets = \'; \ + $(NINJA) -t targets all | sed 's/:.*//; $$!s/$$/ \\/'; \ + echo 'build-files = \'; \ + $(NINJA) -t query build.ninja | sed -n '1,/^ input:/d; /^ outputs:/q; s/$$/ \\/p'; \ + } > $@.tmp && mv $@.tmp $@ +-include Makefile.ninja + +# A separate rule is needed for Makefile dependencies to avoid -n +build.ninja: build.ninja.stamp +build.ninja.stamp: meson.stamp $(build-files) + $(NINJA) $(if $V,-v,) build.ninja && touch $@ +endif + +ifneq ($(MESON),) +Makefile.mtest: build.ninja scripts/mtest2make.py + $(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@ +-include Makefile.mtest +endif + +# 4. Rules to bridge to other makefiles + +ifneq ($(NINJA),) +MAKE.n = $(findstring n,$(firstword $(MAKEFLAGS))) +MAKE.k = $(findstring k,$(firstword $(MAKEFLAGS))) +MAKE.q = $(findstring q,$(firstword $(MAKEFLAGS))) +MAKE.nq = $(if $(word 2, $(MAKE.n) $(MAKE.q)),nq) +NINJAFLAGS = $(if $V,-v) $(if $(MAKE.n), -n) $(if $(MAKE.k), -k0) \ + $(filter-out -j, $(lastword -j1 $(filter -l% -j%, $(MAKEFLAGS)))) \ + +ninja-cmd-goals = $(or $(MAKECMDGOALS), all) +ninja-cmd-goals += $(foreach t, $(.tests), $(.test.deps.$t)) + +makefile-targets := build.ninja ctags TAGS cscope dist clean uninstall +# "ninja -t targets" also lists all prerequisites. If build system +# files are marked as PHONY, however, Make will always try to execute +# "ninja build.ninja". +ninja-targets := $(filter-out $(build-files) $(makefile-targets), $(ninja-targets)) +.PHONY: $(ninja-targets) run-ninja +$(ninja-targets): run-ninja + +# Use "| cat" to give Ninja a more "make-y" output. Use "+" to bypass the +# --output-sync line. +run-ninja: config-host.mak +ifneq ($(filter $(ninja-targets), $(ninja-cmd-goals)),) + +$(quiet-@)$(if $(MAKE.nq),@:, $(NINJA) \ + $(NINJAFLAGS) $(sort $(filter $(ninja-targets), $(ninja-cmd-goals))) | cat) +endif +endif + +# Force configure to re-run if the API symbols are updated +ifeq ($(CONFIG_PLUGIN),y) +config-host.mak: $(SRC_PATH)/plugins/qemu-plugins.symbols + +.PHONY: plugins +plugins: + $(call quiet-command,\ + $(MAKE) $(SUBDIR_MAKEFLAGS) -C contrib/plugins V="$(V)", \ + "BUILD", "example plugins") +endif # $(CONFIG_PLUGIN) + +else # config-host.mak does not exist +config-host.mak: +ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail)) + @echo "Please call configure before running make!" + @exit 1 +endif +endif # config-host.mak does not exist + +SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory --quiet) + +#include $(SRC_PATH)/tests/Makefile.include + +all: recurse-all + +ROM_DIRS = $(addprefix pc-bios/, $(ROMS)) +ROM_DIRS_RULES=$(foreach t, all clean, $(addsuffix /$(t), $(ROM_DIRS))) +# Only keep -O and -g cflags +.PHONY: $(ROM_DIRS_RULES) +$(ROM_DIRS_RULES): + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $(dir $@) V="$(V)" TARGET_DIR="$(dir $@)" $(notdir $@),) + +.PHONY: recurse-all recurse-clean +recurse-all: $(addsuffix /all, $(ROM_DIRS)) +recurse-clean: $(addsuffix /clean, $(ROM_DIRS)) + +###################################################################### + +clean: recurse-clean + -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean || : + -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) clean-ctlist || : +# avoid old build problems by removing potentially incorrect old files + rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h + find . \( -name '*.so' -o -name '*.dll' -o -name '*.[oda]' \) -type f \ + ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \ + ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \ + -exec rm {} + + rm -f TAGS cscope.* *.pod *~ */*~ + rm -f fsdev/*.pod scsi/*.pod + +VERSION = $(shell cat $(SRC_PATH)/VERSION) + +dist: qemu-$(VERSION).tar.bz2 + +qemu-%.tar.bz2: + $(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)" + +distclean: clean + -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || : + rm -f config-host.mak config-host.h* + rm -f tests/tcg/config-*.mak + rm -f config-all-disas.mak config.status + rm -f tests/qemu-iotests/common.env + rm -f roms/seabios/config.mak roms/vgabios/config.mak + rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols + rm -f *-config-target.h *-config-devices.mak *-config-devices.h + rm -rf meson-private meson-logs meson-info compile_commands.json + rm -f Makefile.ninja Makefile.mtest build.ninja.stamp meson.stamp + rm -f config.log + rm -f linux-headers/asm + rm -Rf .sdk + +find-src-path = find "$(SRC_PATH)/" -path "$(SRC_PATH)/meson" -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \) + +.PHONY: ctags +ctags: + rm -f "$(SRC_PATH)/"tags + $(find-src-path) -exec ctags -f "$(SRC_PATH)/"tags --append {} + + +.PHONY: TAGS +TAGS: + rm -f "$(SRC_PATH)/"TAGS + $(find-src-path) -exec etags -f "$(SRC_PATH)/"TAGS --append {} + + +.PHONY: cscope +cscope: + rm -f "$(SRC_PATH)"/cscope.* + $(find-src-path) -print | sed -e 's,^\./,,' > "$(SRC_PATH)/cscope.files" + cscope -b -i"$(SRC_PATH)/cscope.files" -f"$(SRC_PATH)"/cscope.out + +# Needed by "meson install" +export DESTDIR + +#include $(SRC_PATH)/tests/docker/Makefile.include +#include $(SRC_PATH)/tests/vm/Makefile.include + +print-help-run = printf " %-30s - %s\\n" "$1" "$2" +print-help = @$(call print-help-run,$1,$2) + +.PHONY: help +help: + @echo 'Generic targets:' + $(call print-help,all,Build all) + $(call print-help,dir/file.o,Build specified target only) + $(call print-help,install,Install QEMU, documentation and tools) + $(call print-help,ctags/TAGS,Generate tags file for editors) + $(call print-help,cscope,Generate cscope index) + $(call print-help,sparse,Run sparse on the QEMU source) + @echo '' +ifeq ($(CONFIG_PLUGIN),y) + @echo 'Plugin targets:' + $(call print-help,plugins,Build the example TCG plugins) + @echo '' +endif + @echo 'Cleaning targets:' + $(call print-help,clean,Remove most generated files but keep the config) + $(call print-help,distclean,Remove all generated files) + $(call print-help,dist,Build a distributable tarball) + @echo '' + @echo 'Test targets:' + $(call print-help,check,Run all tests (check-help for details)) + $(call print-help,bench,Run all benchmarks) + $(call print-help,docker,Help about targets running tests inside containers) + $(call print-help,vm-help,Help about targets running tests inside VM) + @echo '' + @echo 'Documentation targets:' + $(call print-help,html man,Build documentation in specified format) + @echo '' +ifdef CONFIG_WIN32 + @echo 'Windows targets:' + $(call print-help,installer,Build NSIS-based installer for QEMU) +ifdef CONFIG_QGA_MSI + $(call print-help,msi,Build MSI-based installer for qemu-ga) +endif + @echo '' +endif + $(call print-help,$(MAKE) [targets],(quiet build, default)) + $(call print-help,$(MAKE) V=1 [targets],(verbose build)) + +# will delete the target of a rule if commands exit with a nonzero exit status +.DELETE_ON_ERROR: + +print-%: + @echo '$*=$($*)' diff --git a/VERSION b/VERSION new file mode 100644 index 000000000..91ff57278 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +5.2.0 diff --git a/authz/base.c b/authz/base.c new file mode 100644 index 000000000..f2b7fbe9c --- /dev/null +++ b/authz/base.c @@ -0,0 +1,83 @@ +/* + * QEMU authorization framework base class + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "authz/base.h" +#include "qemu/module.h" +#include "trace.h" + +bool qauthz_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp) +{ + QAuthZClass *cls = QAUTHZ_GET_CLASS(authz); + bool allowed; + + allowed = cls->is_allowed(authz, identity, errp); + trace_qauthz_is_allowed(authz, identity, allowed); + + return allowed; +} + + +bool qauthz_is_allowed_by_id(const char *authzid, + const char *identity, + Error **errp) +{ + QAuthZ *authz; + Object *obj; + Object *container; + + container = object_get_objects_root(); + obj = object_resolve_path_component(container, + authzid); + if (!obj) { + error_setg(errp, "Cannot find QAuthZ object ID %s", + authzid); + return false; + } + + if (!object_dynamic_cast(obj, TYPE_QAUTHZ)) { + error_setg(errp, "Object '%s' is not a QAuthZ subclass", + authzid); + return false; + } + + authz = QAUTHZ(obj); + + return qauthz_is_allowed(authz, identity, errp); +} + + +static const TypeInfo authz_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QAUTHZ, + .instance_size = sizeof(QAuthZ), + .class_size = sizeof(QAuthZClass), + .abstract = true, +}; + +static void qauthz_register_types(void) +{ + type_register_static(&authz_info); +} + +type_init(qauthz_register_types) + diff --git a/authz/list.c b/authz/list.c new file mode 100644 index 000000000..0e17eed89 --- /dev/null +++ b/authz/list.c @@ -0,0 +1,270 @@ +/* + * QEMU access control list authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "authz/list.h" +#include "trace.h" +#include "qom/object_interfaces.h" +#include "qapi/qapi-visit-authz.h" +#include "qemu/module.h" + +static bool qauthz_list_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp) +{ + QAuthZList *lauthz = QAUTHZ_LIST(authz); + QAuthZListRuleList *rules = lauthz->rules; + + while (rules) { + QAuthZListRule *rule = rules->value; + QAuthZListFormat format = rule->has_format ? rule->format : + QAUTHZ_LIST_FORMAT_EXACT; + + trace_qauthz_list_check_rule(authz, rule->match, identity, + format, rule->policy); + switch (format) { + case QAUTHZ_LIST_FORMAT_EXACT: + if (g_str_equal(rule->match, identity)) { + return rule->policy == QAUTHZ_LIST_POLICY_ALLOW; + } + break; + case QAUTHZ_LIST_FORMAT_GLOB: + if (g_pattern_match_simple(rule->match, identity)) { + return rule->policy == QAUTHZ_LIST_POLICY_ALLOW; + } + break; + default: + g_warn_if_reached(); + return false; + } + rules = rules->next; + } + + trace_qauthz_list_default_policy(authz, identity, lauthz->policy); + return lauthz->policy == QAUTHZ_LIST_POLICY_ALLOW; +} + + +static void +qauthz_list_prop_set_policy(Object *obj, + int value, + Error **errp G_GNUC_UNUSED) +{ + QAuthZList *lauthz = QAUTHZ_LIST(obj); + + lauthz->policy = value; +} + + +static int +qauthz_list_prop_get_policy(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QAuthZList *lauthz = QAUTHZ_LIST(obj); + + return lauthz->policy; +} + + +static void +qauthz_list_prop_get_rules(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + QAuthZList *lauthz = QAUTHZ_LIST(obj); + + visit_type_QAuthZListRuleList(v, name, &lauthz->rules, errp); +} + +static void +qauthz_list_prop_set_rules(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + QAuthZList *lauthz = QAUTHZ_LIST(obj); + QAuthZListRuleList *oldrules; + + oldrules = lauthz->rules; + visit_type_QAuthZListRuleList(v, name, &lauthz->rules, errp); + + qapi_free_QAuthZListRuleList(oldrules); +} + + +static void +qauthz_list_finalize(Object *obj) +{ + QAuthZList *lauthz = QAUTHZ_LIST(obj); + + qapi_free_QAuthZListRuleList(lauthz->rules); +} + + +static void +qauthz_list_class_init(ObjectClass *oc, void *data) +{ + QAuthZClass *authz = QAUTHZ_CLASS(oc); + + object_class_property_add_enum(oc, "policy", + "QAuthZListPolicy", + &QAuthZListPolicy_lookup, + qauthz_list_prop_get_policy, + qauthz_list_prop_set_policy); + + object_class_property_add(oc, "rules", "QAuthZListRule", + qauthz_list_prop_get_rules, + qauthz_list_prop_set_rules, + NULL, NULL); + + authz->is_allowed = qauthz_list_is_allowed; +} + + +QAuthZList *qauthz_list_new(const char *id, + QAuthZListPolicy policy, + Error **errp) +{ + return QAUTHZ_LIST( + object_new_with_props(TYPE_QAUTHZ_LIST, + object_get_objects_root(), + id, errp, + "policy", QAuthZListPolicy_str(policy), + NULL)); +} + +ssize_t qauthz_list_append_rule(QAuthZList *auth, + const char *match, + QAuthZListPolicy policy, + QAuthZListFormat format, + Error **errp) +{ + QAuthZListRule *rule; + QAuthZListRuleList *rules, *tmp; + size_t i = 0; + + rule = g_new0(QAuthZListRule, 1); + rule->policy = policy; + rule->match = g_strdup(match); + rule->format = format; + rule->has_format = true; + + tmp = g_new0(QAuthZListRuleList, 1); + tmp->value = rule; + + rules = auth->rules; + if (rules) { + while (rules->next) { + i++; + rules = rules->next; + } + rules->next = tmp; + return i + 1; + } else { + auth->rules = tmp; + return 0; + } +} + + +ssize_t qauthz_list_insert_rule(QAuthZList *auth, + const char *match, + QAuthZListPolicy policy, + QAuthZListFormat format, + size_t index, + Error **errp) +{ + QAuthZListRule *rule; + QAuthZListRuleList *rules, *tmp; + size_t i = 0; + + rule = g_new0(QAuthZListRule, 1); + rule->policy = policy; + rule->match = g_strdup(match); + rule->format = format; + rule->has_format = true; + + tmp = g_new0(QAuthZListRuleList, 1); + tmp->value = rule; + + rules = auth->rules; + if (rules && index > 0) { + while (rules->next && i < (index - 1)) { + i++; + rules = rules->next; + } + tmp->next = rules->next; + rules->next = tmp; + return i + 1; + } else { + tmp->next = auth->rules; + auth->rules = tmp; + return 0; + } +} + + +ssize_t qauthz_list_delete_rule(QAuthZList *auth, const char *match) +{ + QAuthZListRule *rule; + QAuthZListRuleList *rules, *prev; + size_t i = 0; + + prev = NULL; + rules = auth->rules; + while (rules) { + rule = rules->value; + if (g_str_equal(rule->match, match)) { + if (prev) { + prev->next = rules->next; + } else { + auth->rules = rules->next; + } + rules->next = NULL; + qapi_free_QAuthZListRuleList(rules); + return i; + } + prev = rules; + rules = rules->next; + i++; + } + + return -1; +} + + +static const TypeInfo qauthz_list_info = { + .parent = TYPE_QAUTHZ, + .name = TYPE_QAUTHZ_LIST, + .instance_size = sizeof(QAuthZList), + .instance_finalize = qauthz_list_finalize, + .class_init = qauthz_list_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qauthz_list_register_types(void) +{ + type_register_static(&qauthz_list_info); +} + + +type_init(qauthz_list_register_types); diff --git a/authz/listfile.c b/authz/listfile.c new file mode 100644 index 000000000..da3a0e69a --- /dev/null +++ b/authz/listfile.c @@ -0,0 +1,290 @@ +/* + * QEMU access control list file authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "authz/listfile.h" +#include "trace.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/sockets.h" +#include "qemu/filemonitor.h" +#include "qom/object_interfaces.h" +#include "qapi/qapi-visit-authz.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qobject.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qobject-input-visitor.h" + + +static bool +qauthz_list_file_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(authz); + if (fauthz->list) { + return qauthz_is_allowed(fauthz->list, identity, errp); + } + + return false; +} + + +static QAuthZ * +qauthz_list_file_load(QAuthZListFile *fauthz, Error **errp) +{ + GError *err = NULL; + gchar *content = NULL; + gsize len; + QObject *obj = NULL; + QDict *pdict; + Visitor *v = NULL; + QAuthZ *ret = NULL; + + trace_qauthz_list_file_load(fauthz, fauthz->filename); + if (!g_file_get_contents(fauthz->filename, &content, &len, &err)) { + error_setg(errp, "Unable to read '%s': %s", + fauthz->filename, err->message); + goto cleanup; + } + + obj = qobject_from_json(content, errp); + if (!obj) { + goto cleanup; + } + + pdict = qobject_to(QDict, obj); + if (!pdict) { + error_setg(errp, "File '%s' must contain a JSON object", + fauthz->filename); + goto cleanup; + } + + v = qobject_input_visitor_new(obj); + + ret = (QAuthZ *)user_creatable_add_type(TYPE_QAUTHZ_LIST, + NULL, pdict, v, errp); + + cleanup: + visit_free(v); + qobject_unref(obj); + if (err) { + g_error_free(err); + } + g_free(content); + return ret; +} + + +static void +qauthz_list_file_event(int64_t wd G_GNUC_UNUSED, + QFileMonitorEvent ev G_GNUC_UNUSED, + const char *name G_GNUC_UNUSED, + void *opaque) +{ + QAuthZListFile *fauthz = opaque; + Error *err = NULL; + + if (ev != QFILE_MONITOR_EVENT_MODIFIED && + ev != QFILE_MONITOR_EVENT_CREATED) { + return; + } + + object_unref(OBJECT(fauthz->list)); + fauthz->list = qauthz_list_file_load(fauthz, &err); + trace_qauthz_list_file_refresh(fauthz, + fauthz->filename, fauthz->list ? 1 : 0); + if (!fauthz->list) { + error_report_err(err); + } +} + +static void +qauthz_list_file_complete(UserCreatable *uc, Error **errp) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(uc); + gchar *dir = NULL, *file = NULL; + + if (!fauthz->filename) { + error_setg(errp, "filename not provided"); + return; + } + + fauthz->list = qauthz_list_file_load(fauthz, errp); + if (!fauthz->list) { + return; + } + + if (!fauthz->refresh) { + return; + } + + fauthz->file_monitor = qemu_file_monitor_new(errp); + if (!fauthz->file_monitor) { + return; + } + + dir = g_path_get_dirname(fauthz->filename); + if (g_str_equal(dir, ".")) { + error_setg(errp, "Filename must be an absolute path"); + goto cleanup; + } + file = g_path_get_basename(fauthz->filename); + if (g_str_equal(file, ".")) { + error_setg(errp, "Path has no trailing filename component"); + goto cleanup; + } + + fauthz->file_watch = qemu_file_monitor_add_watch( + fauthz->file_monitor, dir, file, + qauthz_list_file_event, fauthz, errp); + if (fauthz->file_watch < 0) { + goto cleanup; + } + + cleanup: + g_free(file); + g_free(dir); +} + + +static void +qauthz_list_file_prop_set_filename(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(obj); + + g_free(fauthz->filename); + fauthz->filename = g_strdup(value); +} + + +static char * +qauthz_list_file_prop_get_filename(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(obj); + + return g_strdup(fauthz->filename); +} + + +static void +qauthz_list_file_prop_set_refresh(Object *obj, + bool value, + Error **errp G_GNUC_UNUSED) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(obj); + + fauthz->refresh = value; +} + + +static bool +qauthz_list_file_prop_get_refresh(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(obj); + + return fauthz->refresh; +} + + +static void +qauthz_list_file_finalize(Object *obj) +{ + QAuthZListFile *fauthz = QAUTHZ_LIST_FILE(obj); + + object_unref(OBJECT(fauthz->list)); + g_free(fauthz->filename); + qemu_file_monitor_free(fauthz->file_monitor); +} + + +static void +qauthz_list_file_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + QAuthZClass *authz = QAUTHZ_CLASS(oc); + + ucc->complete = qauthz_list_file_complete; + + object_class_property_add_str(oc, "filename", + qauthz_list_file_prop_get_filename, + qauthz_list_file_prop_set_filename); + object_class_property_add_bool(oc, "refresh", + qauthz_list_file_prop_get_refresh, + qauthz_list_file_prop_set_refresh); + + authz->is_allowed = qauthz_list_file_is_allowed; +} + + +static void +qauthz_list_file_init(Object *obj) +{ + QAuthZListFile *authz = QAUTHZ_LIST_FILE(obj); + + authz->file_watch = -1; +#ifdef CONFIG_INOTIFY1 + authz->refresh = true; +#endif +} + + +QAuthZListFile *qauthz_list_file_new(const char *id, + const char *filename, + bool refresh, + Error **errp) +{ + return QAUTHZ_LIST_FILE( + object_new_with_props(TYPE_QAUTHZ_LIST_FILE, + object_get_objects_root(), + id, errp, + "filename", filename, + "refresh", refresh ? "yes" : "no", + NULL)); +} + + +static const TypeInfo qauthz_list_file_info = { + .parent = TYPE_QAUTHZ, + .name = TYPE_QAUTHZ_LIST_FILE, + .instance_init = qauthz_list_file_init, + .instance_size = sizeof(QAuthZListFile), + .instance_finalize = qauthz_list_file_finalize, + .class_init = qauthz_list_file_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qauthz_list_file_register_types(void) +{ + type_register_static(&qauthz_list_file_info); +} + + +type_init(qauthz_list_file_register_types); diff --git a/authz/meson.build b/authz/meson.build new file mode 100644 index 000000000..88fa7769c --- /dev/null +++ b/authz/meson.build @@ -0,0 +1,9 @@ +authz_ss.add(genh) +authz_ss.add(files( + 'base.c', + 'list.c', + 'listfile.c', + 'simple.c', +)) + +authz_ss.add(when: ['CONFIG_AUTH_PAM', pam], if_true: files('pamacct.c')) diff --git a/authz/pamacct.c b/authz/pamacct.c new file mode 100644 index 000000000..c862d9ff3 --- /dev/null +++ b/authz/pamacct.c @@ -0,0 +1,153 @@ +/* + * QEMU PAM authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "authz/pamacct.h" +#include "trace.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" + +#include + + +static bool qauthz_pam_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp) +{ + QAuthZPAM *pauthz = QAUTHZ_PAM(authz); + const struct pam_conv pam_conversation = { 0 }; + pam_handle_t *pamh = NULL; + int ret; + + trace_qauthz_pam_check(authz, identity, pauthz->service); + ret = pam_start(pauthz->service, + identity, + &pam_conversation, + &pamh); + if (ret != PAM_SUCCESS) { + error_setg(errp, "Unable to start PAM transaction: %s", + pam_strerror(NULL, ret)); + return false; + } + + ret = pam_acct_mgmt(pamh, PAM_SILENT); + pam_end(pamh, ret); + if (ret != PAM_SUCCESS) { + error_setg(errp, "Unable to authorize user '%s': %s", + identity, pam_strerror(pamh, ret)); + return false; + } + + return true; +} + + +static void +qauthz_pam_prop_set_service(Object *obj, + const char *service, + Error **errp G_GNUC_UNUSED) +{ + QAuthZPAM *pauthz = QAUTHZ_PAM(obj); + + g_free(pauthz->service); + pauthz->service = g_strdup(service); +} + + +static char * +qauthz_pam_prop_get_service(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QAuthZPAM *pauthz = QAUTHZ_PAM(obj); + + return g_strdup(pauthz->service); +} + + +static void +qauthz_pam_complete(UserCreatable *uc, Error **errp) +{ + QAuthZPAM *pauthz = QAUTHZ_PAM(uc); + + if (!pauthz->service) { + error_setg(errp, "The 'service' property must be set"); + return; + } +} + + +static void +qauthz_pam_finalize(Object *obj) +{ + QAuthZPAM *pauthz = QAUTHZ_PAM(obj); + + g_free(pauthz->service); +} + + +static void +qauthz_pam_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + QAuthZClass *authz = QAUTHZ_CLASS(oc); + + ucc->complete = qauthz_pam_complete; + authz->is_allowed = qauthz_pam_is_allowed; + + object_class_property_add_str(oc, "service", + qauthz_pam_prop_get_service, + qauthz_pam_prop_set_service); +} + + +QAuthZPAM *qauthz_pam_new(const char *id, + const char *service, + Error **errp) +{ + return QAUTHZ_PAM( + object_new_with_props(TYPE_QAUTHZ_PAM, + object_get_objects_root(), + id, errp, + "service", service, + NULL)); +} + + +static const TypeInfo qauthz_pam_info = { + .parent = TYPE_QAUTHZ, + .name = TYPE_QAUTHZ_PAM, + .instance_size = sizeof(QAuthZPAM), + .instance_finalize = qauthz_pam_finalize, + .class_init = qauthz_pam_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qauthz_pam_register_types(void) +{ + type_register_static(&qauthz_pam_info); +} + + +type_init(qauthz_pam_register_types); diff --git a/authz/simple.c b/authz/simple.c new file mode 100644 index 000000000..0597dcd8e --- /dev/null +++ b/authz/simple.c @@ -0,0 +1,128 @@ +/* + * QEMU simple authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "authz/simple.h" +#include "trace.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" + +static bool qauthz_simple_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp) +{ + QAuthZSimple *sauthz = QAUTHZ_SIMPLE(authz); + + trace_qauthz_simple_is_allowed(authz, sauthz->identity, identity); + return g_str_equal(identity, sauthz->identity); +} + +static void +qauthz_simple_prop_set_identity(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QAuthZSimple *sauthz = QAUTHZ_SIMPLE(obj); + + g_free(sauthz->identity); + sauthz->identity = g_strdup(value); +} + + +static char * +qauthz_simple_prop_get_identity(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QAuthZSimple *sauthz = QAUTHZ_SIMPLE(obj); + + return g_strdup(sauthz->identity); +} + + +static void +qauthz_simple_finalize(Object *obj) +{ + QAuthZSimple *sauthz = QAUTHZ_SIMPLE(obj); + + g_free(sauthz->identity); +} + + +static void +qauthz_simple_complete(UserCreatable *uc, Error **errp) +{ + QAuthZSimple *sauthz = QAUTHZ_SIMPLE(uc); + + if (!sauthz->identity) { + error_setg(errp, "The 'identity' property must be set"); + return; + } +} + + +static void +qauthz_simple_class_init(ObjectClass *oc, void *data) +{ + QAuthZClass *authz = QAUTHZ_CLASS(oc); + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = qauthz_simple_complete; + authz->is_allowed = qauthz_simple_is_allowed; + + object_class_property_add_str(oc, "identity", + qauthz_simple_prop_get_identity, + qauthz_simple_prop_set_identity); +} + + +QAuthZSimple *qauthz_simple_new(const char *id, + const char *identity, + Error **errp) +{ + return QAUTHZ_SIMPLE( + object_new_with_props(TYPE_QAUTHZ_SIMPLE, + object_get_objects_root(), + id, errp, + "identity", identity, + NULL)); +} + + +static const TypeInfo qauthz_simple_info = { + .parent = TYPE_QAUTHZ, + .name = TYPE_QAUTHZ_SIMPLE, + .instance_size = sizeof(QAuthZSimple), + .instance_finalize = qauthz_simple_finalize, + .class_init = qauthz_simple_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qauthz_simple_register_types(void) +{ + type_register_static(&qauthz_simple_info); +} + + +type_init(qauthz_simple_register_types); diff --git a/authz/trace-events b/authz/trace-events new file mode 100644 index 000000000..e62ebb36b --- /dev/null +++ b/authz/trace-events @@ -0,0 +1,18 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# base.c +qauthz_is_allowed(void *authz, const char *identity, bool allowed) "AuthZ %p check identity=%s allowed=%d" + +# simple.c +qauthz_simple_is_allowed(void *authz, const char *wantidentity, const char *gotidentity) "AuthZ simple %p check want identity=%s got identity=%s" + +# list.c +qauthz_list_check_rule(void *authz, const char *identity, const char *rule, int format, int policy) "AuthZ list %p check rule=%s identity=%s format=%d policy=%d" +qauthz_list_default_policy(void *authz, const char *identity, int policy) "AuthZ list %p default identity=%s policy=%d" + +# listfile.c +qauthz_list_file_load(void *authz, const char *filename) "AuthZ file %p load filename=%s" +qauthz_list_file_refresh(void *authz, const char *filename, int success) "AuthZ file %p load filename=%s success=%d" + +# pamacct.c +qauthz_pam_check(void *authz, const char *identity, const char *service) "AuthZ PAM %p identity=%s service=%s" diff --git a/authz/trace.h b/authz/trace.h new file mode 100644 index 000000000..3176c127f --- /dev/null +++ b/authz/trace.h @@ -0,0 +1 @@ +#include "trace/trace-authz.h" diff --git a/block.c b/block.c new file mode 100644 index 000000000..f1cedac36 --- /dev/null +++ b/block.c @@ -0,0 +1,7244 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "block/nbd.h" +#include "block/qdict.h" +#include "qemu/error-report.h" +#include "block/module_block.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qnull.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-output-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "sysemu/block-backend.h" +#include "sysemu/sysemu.h" +#include "qemu/notify.h" +#include "qemu/option.h" +#include "qemu/coroutine.h" +#include "block/qapi.h" +#include "qemu/timer.h" +#include "qemu/cutils.h" +#include "qemu/id.h" +#include "block/coroutines.h" + +#ifdef CONFIG_BSD +#include +#include +#ifndef __DragonFly__ +#include +#endif +#endif + +#ifdef _WIN32 +#include +#endif + +#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + +static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); + +static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = + QTAILQ_HEAD_INITIALIZER(all_bdrv_states); + +static QLIST_HEAD(, BlockDriver) bdrv_drivers = + QLIST_HEAD_INITIALIZER(bdrv_drivers); + +static BlockDriverState *bdrv_open_inherit(const char *filename, + const char *reference, + QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp); + +/* If non-zero, use only whitelisted block drivers */ +static int use_bdrv_whitelist; + +#ifdef _WIN32 +static int is_windows_drive_prefix(const char *filename) +{ + return (((filename[0] >= 'a' && filename[0] <= 'z') || + (filename[0] >= 'A' && filename[0] <= 'Z')) && + filename[1] == ':'); +} + +int is_windows_drive(const char *filename) +{ + if (is_windows_drive_prefix(filename) && + filename[2] == '\0') + return 1; + if (strstart(filename, "\\\\.\\", NULL) || + strstart(filename, "//./", NULL)) + return 1; + return 0; +} +#endif + +size_t bdrv_opt_mem_align(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, qemu_real_host_page_size); + } + + return bs->bl.opt_mem_alignment; +} + +size_t bdrv_min_mem_align(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, qemu_real_host_page_size); + } + + return bs->bl.min_mem_alignment; +} + +/* check if the path starts with ":" */ +int path_has_protocol(const char *path) +{ + const char *p; + +#ifdef _WIN32 + if (is_windows_drive(path) || + is_windows_drive_prefix(path)) { + return 0; + } + p = path + strcspn(path, ":/\\"); +#else + p = path + strcspn(path, ":/"); +#endif + + return *p == ':'; +} + +int path_is_absolute(const char *path) +{ +#ifdef _WIN32 + /* specific case for names like: "\\.\d:" */ + if (is_windows_drive(path) || is_windows_drive_prefix(path)) { + return 1; + } + return (*path == '/' || *path == '\\'); +#else + return (*path == '/'); +#endif +} + +/* if filename is absolute, just return its duplicate. Otherwise, build a + path to it by considering it is relative to base_path. URL are + supported. */ +char *path_combine(const char *base_path, const char *filename) +{ + const char *protocol_stripped = NULL; + const char *p, *p1; + char *result; + int len; + + if (path_is_absolute(filename)) { + return g_strdup(filename); + } + + if (path_has_protocol(base_path)) { + protocol_stripped = strchr(base_path, ':'); + if (protocol_stripped) { + protocol_stripped++; + } + } + p = protocol_stripped ?: base_path; + + p1 = strrchr(base_path, '/'); +#ifdef _WIN32 + { + const char *p2; + p2 = strrchr(base_path, '\\'); + if (!p1 || p2 > p1) { + p1 = p2; + } + } +#endif + if (p1) { + p1++; + } else { + p1 = base_path; + } + if (p1 > p) { + p = p1; + } + len = p - base_path; + + result = g_malloc(len + strlen(filename) + 1); + memcpy(result, base_path, len); + strcpy(result + len, filename); + + return result; +} + +/* + * Helper function for bdrv_parse_filename() implementations to remove optional + * protocol prefixes (especially "file:") from a filename and for putting the + * stripped filename into the options QDict if there is such a prefix. + */ +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options) +{ + if (strstart(filename, prefix, &filename)) { + /* Stripping the explicit protocol prefix may result in a protocol + * prefix being (wrongly) detected (if the filename contains a colon) */ + if (path_has_protocol(filename)) { + QString *fat_filename; + + /* This means there is some colon before the first slash; therefore, + * this cannot be an absolute path */ + assert(!path_is_absolute(filename)); + + /* And we can thus fix the protocol detection issue by prefixing it + * by "./" */ + fat_filename = qstring_from_str("./"); + qstring_append(fat_filename, filename); + + assert(!path_has_protocol(qstring_get_str(fat_filename))); + + qdict_put(options, "filename", fat_filename); + } else { + /* If no protocol prefix was detected, we can use the shortened + * filename as-is */ + qdict_put_str(options, "filename", filename); + } + } +} + + +/* Returns whether the image file is opened as read-only. Note that this can + * return false and writing to the image file is still not possible because the + * image is inactivated. */ +bool bdrv_is_read_only(BlockDriverState *bs) +{ + return bs->read_only; +} + +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp) +{ + /* Do not set read_only if copy_on_read is enabled */ + if (bs->copy_on_read && read_only) { + error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", + bdrv_get_device_or_node_name(bs)); + return -EINVAL; + } + + /* Do not clear read_only if it is prohibited */ + if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && + !ignore_allow_rdw) + { + error_setg(errp, "Node '%s' is read only", + bdrv_get_device_or_node_name(bs)); + return -EPERM; + } + + return 0; +} + +/* + * Called by a driver that can only provide a read-only image. + * + * Returns 0 if the node is already read-only or it could switch the node to + * read-only because BDRV_O_AUTO_RDONLY is set. + * + * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set + * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg + * is not NULL, it is used as the error message for the Error object. + */ +int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp) +{ + int ret = 0; + + if (!(bs->open_flags & BDRV_O_RDWR)) { + return 0; + } + if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { + goto fail; + } + + ret = bdrv_can_set_read_only(bs, true, false, NULL); + if (ret < 0) { + goto fail; + } + + bs->read_only = true; + bs->open_flags &= ~BDRV_O_RDWR; + + return 0; + +fail: + error_setg(errp, "%s", errmsg ?: "Image is read-only"); + return -EACCES; +} + +/* + * If @backing is empty, this function returns NULL without setting + * @errp. In all other cases, NULL will only be returned with @errp + * set. + * + * Therefore, a return value of NULL without @errp set means that + * there is no backing file; if @errp is set, there is one but its + * absolute filename cannot be generated. + */ +char *bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + Error **errp) +{ + if (backing[0] == '\0') { + return NULL; + } else if (path_has_protocol(backing) || path_is_absolute(backing)) { + return g_strdup(backing); + } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { + error_setg(errp, "Cannot use relative backing file names for '%s'", + backed); + return NULL; + } else { + return path_combine(backed, backing); + } +} + +/* + * If @filename is empty or NULL, this function returns NULL without + * setting @errp. In all other cases, NULL will only be returned with + * @errp set. + */ +static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, + const char *filename, Error **errp) +{ + char *dir, *full_name; + + if (!filename || filename[0] == '\0') { + return NULL; + } else if (path_has_protocol(filename) || path_is_absolute(filename)) { + return g_strdup(filename); + } + + dir = bdrv_dirname(relative_to, errp); + if (!dir) { + return NULL; + } + + full_name = g_strconcat(dir, filename, NULL); + g_free(dir); + return full_name; +} + +char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) +{ + return bdrv_make_absolute_filename(bs, bs->backing_file, errp); +} + +void bdrv_register(BlockDriver *bdrv) +{ + assert(bdrv->format_name); + QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); +} + +BlockDriverState *bdrv_new(void) +{ + BlockDriverState *bs; + int i; + + bs = g_new0(BlockDriverState, 1); + QLIST_INIT(&bs->dirty_bitmaps); + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + QLIST_INIT(&bs->op_blockers[i]); + } + notifier_with_return_list_init(&bs->before_write_notifiers); + qemu_co_mutex_init(&bs->reqs_lock); + qemu_mutex_init(&bs->dirty_bitmap_mutex); + bs->refcnt = 1; + bs->aio_context = qemu_get_aio_context(); + + qemu_co_queue_init(&bs->flush_queue); + + for (i = 0; i < bdrv_drain_all_count; i++) { + bdrv_drained_begin(bs); + } + + QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); + + return bs; +} + +static BlockDriver *bdrv_do_find_format(const char *format_name) +{ + BlockDriver *drv1; + + QLIST_FOREACH(drv1, &bdrv_drivers, list) { + if (!strcmp(drv1->format_name, format_name)) { + return drv1; + } + } + + return NULL; +} + +BlockDriver *bdrv_find_format(const char *format_name) +{ + BlockDriver *drv1; + int i; + + drv1 = bdrv_do_find_format(format_name); + if (drv1) { + return drv1; + } + + /* The driver isn't registered, maybe we need to load a module */ + for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { + if (!strcmp(block_driver_modules[i].format_name, format_name)) { + block_module_load_one(block_driver_modules[i].library_name); + break; + } + } + + return bdrv_do_find_format(format_name); +} + +static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) +{ + static const char *whitelist_rw[] = { + CONFIG_BDRV_RW_WHITELIST + NULL + }; + static const char *whitelist_ro[] = { + CONFIG_BDRV_RO_WHITELIST + NULL + }; + const char **p; + + if (!whitelist_rw[0] && !whitelist_ro[0]) { + return 1; /* no whitelist, anything goes */ + } + + for (p = whitelist_rw; *p; p++) { + if (!strcmp(format_name, *p)) { + return 1; + } + } + if (read_only) { + for (p = whitelist_ro; *p; p++) { + if (!strcmp(format_name, *p)) { + return 1; + } + } + } + return 0; +} + +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) +{ + return bdrv_format_is_whitelisted(drv->format_name, read_only); +} + +bool bdrv_uses_whitelist(void) +{ + return use_bdrv_whitelist; +} + +typedef struct CreateCo { + BlockDriver *drv; + char *filename; + QemuOpts *opts; + int ret; + Error *err; +} CreateCo; + +static void coroutine_fn bdrv_create_co_entry(void *opaque) +{ + Error *local_err = NULL; + int ret; + + CreateCo *cco = opaque; + assert(cco->drv); + + ret = cco->drv->bdrv_co_create_opts(cco->drv, + cco->filename, cco->opts, &local_err); + error_propagate(&cco->err, local_err); + cco->ret = ret; +} + +int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp) +{ + int ret; + + Coroutine *co; + CreateCo cco = { + .drv = drv, + .filename = g_strdup(filename), + .opts = opts, + .ret = NOT_DONE, + .err = NULL, + }; + + if (!drv->bdrv_co_create_opts) { + error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); + ret = -ENOTSUP; + goto out; + } + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_create_co_entry(&cco); + } else { + co = qemu_coroutine_create(bdrv_create_co_entry, &cco); + qemu_coroutine_enter(co); + while (cco.ret == NOT_DONE) { + aio_poll(qemu_get_aio_context(), true); + } + } + + ret = cco.ret; + if (ret < 0) { + if (cco.err) { + error_propagate(errp, cco.err); + } else { + error_setg_errno(errp, -ret, "Could not create image"); + } + } + +out: + g_free(cco.filename); + return ret; +} + +/** + * Helper function for bdrv_create_file_fallback(): Resize @blk to at + * least the given @minimum_size. + * + * On success, return @blk's actual length. + * Otherwise, return -errno. + */ +static int64_t create_file_fallback_truncate(BlockBackend *blk, + int64_t minimum_size, Error **errp) +{ + Error *local_err = NULL; + int64_t size; + int ret; + + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, + &local_err); + if (ret < 0 && ret != -ENOTSUP) { + error_propagate(errp, local_err); + return ret; + } + + size = blk_getlength(blk); + if (size < 0) { + error_free(local_err); + error_setg_errno(errp, -size, + "Failed to inquire the new image file's length"); + return size; + } + + if (size < minimum_size) { + /* Need to grow the image, but we failed to do that */ + error_propagate(errp, local_err); + return -ENOTSUP; + } + + error_free(local_err); + local_err = NULL; + + return size; +} + +/** + * Helper function for bdrv_create_file_fallback(): Zero the first + * sector to remove any potentially pre-existing image header. + */ +static int create_file_fallback_zero_first_sector(BlockBackend *blk, + int64_t current_size, + Error **errp) +{ + int64_t bytes_to_clear; + int ret; + + bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); + if (bytes_to_clear) { + ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to clear the new image's first sector"); + return ret; + } + } + + return 0; +} + +/** + * Simple implementation of bdrv_co_create_opts for protocol drivers + * which only support creation via opening a file + * (usually existing raw storage device) + */ +int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockBackend *blk; + QDict *options; + int64_t size = 0; + char *buf = NULL; + PreallocMode prealloc; + Error *local_err = NULL; + int ret; + + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, + PREALLOC_MODE_OFF, &local_err); + g_free(buf); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + options = qdict_new(); + qdict_put_str(options, "driver", drv->format_name); + + blk = blk_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_RESIZE, errp); + if (!blk) { + error_prepend(errp, "Protocol driver '%s' does not support image " + "creation, and opening the image failed: ", + drv->format_name); + return -EINVAL; + } + + size = create_file_fallback_truncate(blk, size, errp); + if (size < 0) { + ret = size; + goto out; + } + + ret = create_file_fallback_zero_first_sector(blk, size, errp); + if (ret < 0) { + goto out; + } + + ret = 0; +out: + blk_unref(blk); + return ret; +} + +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) +{ + BlockDriver *drv; + + drv = bdrv_find_protocol(filename, true, errp); + if (drv == NULL) { + return -ENOENT; + } + + return bdrv_create(drv, filename, opts, errp); +} + +int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) +{ + Error *local_err = NULL; + int ret; + + assert(bs != NULL); + + if (!bs->drv) { + error_setg(errp, "Block node '%s' is not opened", bs->filename); + return -ENOMEDIUM; + } + + if (!bs->drv->bdrv_co_delete_file) { + error_setg(errp, "Driver '%s' does not support image deletion", + bs->drv->format_name); + return -ENOTSUP; + } + + ret = bs->drv->bdrv_co_delete_file(bs, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + } + + return ret; +} + +/** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz struct and return 0. + * On failure return -errno. + * @bs must not be empty. + */ +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *filtered = bdrv_filter_bs(bs); + + if (drv && drv->bdrv_probe_blocksizes) { + return drv->bdrv_probe_blocksizes(bs, bsz); + } else if (filtered) { + return bdrv_probe_blocksizes(filtered, bsz); + } + + return -ENOTSUP; +} + +/** + * Try to get @bs's geometry (cyls, heads, sectors). + * On success, store them in @geo struct and return 0. + * On failure return -errno. + * @bs must not be empty. + */ +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *filtered = bdrv_filter_bs(bs); + + if (drv && drv->bdrv_probe_geometry) { + return drv->bdrv_probe_geometry(bs, geo); + } else if (filtered) { + return bdrv_probe_geometry(filtered, geo); + } + + return -ENOTSUP; +} + +/* + * Create a uniquely-named empty temporary file. + * Return 0 upon success, otherwise a negative errno value. + */ +int get_tmp_filename(char *filename, int size) +{ +#ifdef _WIN32 + char temp_dir[MAX_PATH]; + /* GetTempFileName requires that its output buffer (4th param) + have length MAX_PATH or greater. */ + assert(size >= MAX_PATH); + return (GetTempPath(MAX_PATH, temp_dir) + && GetTempFileName(temp_dir, "qem", 0, filename) + ? 0 : -GetLastError()); +#else + int fd; + const char *tmpdir; + tmpdir = getenv("TMPDIR"); + if (!tmpdir) { + tmpdir = "/var/tmp"; + } + if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { + return -EOVERFLOW; + } + fd = mkstemp(filename); + if (fd < 0) { + return -errno; + } + if (close(fd) != 0) { + unlink(filename); + return -errno; + } + return 0; +#endif +} + +/* + * Detect host devices. By convention, /dev/cdrom[N] is always + * recognized as a host CDROM. + */ +static BlockDriver *find_hdev_driver(const char *filename) +{ + int score_max = 0, score; + BlockDriver *drv = NULL, *d; + + QLIST_FOREACH(d, &bdrv_drivers, list) { + if (d->bdrv_probe_device) { + score = d->bdrv_probe_device(filename); + if (score > score_max) { + score_max = score; + drv = d; + } + } + } + + return drv; +} + +static BlockDriver *bdrv_do_find_protocol(const char *protocol) +{ + BlockDriver *drv1; + + QLIST_FOREACH(drv1, &bdrv_drivers, list) { + if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { + return drv1; + } + } + + return NULL; +} + +BlockDriver *bdrv_find_protocol(const char *filename, + bool allow_protocol_prefix, + Error **errp) +{ + BlockDriver *drv1; + char protocol[128]; + int len; + const char *p; + int i; + + /* TODO Drivers without bdrv_file_open must be specified explicitly */ + + /* + * XXX(hch): we really should not let host device detection + * override an explicit protocol specification, but moving this + * later breaks access to device names with colons in them. + * Thanks to the brain-dead persistent naming schemes on udev- + * based Linux systems those actually are quite common. + */ + drv1 = find_hdev_driver(filename); + if (drv1) { + return drv1; + } + + if (!path_has_protocol(filename) || !allow_protocol_prefix) { + return &bdrv_file; + } + + p = strchr(filename, ':'); + assert(p != NULL); + len = p - filename; + if (len > sizeof(protocol) - 1) + len = sizeof(protocol) - 1; + memcpy(protocol, filename, len); + protocol[len] = '\0'; + + drv1 = bdrv_do_find_protocol(protocol); + if (drv1) { + return drv1; + } + + for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { + if (block_driver_modules[i].protocol_name && + !strcmp(block_driver_modules[i].protocol_name, protocol)) { + block_module_load_one(block_driver_modules[i].library_name); + break; + } + } + + drv1 = bdrv_do_find_protocol(protocol); + if (!drv1) { + error_setg(errp, "Unknown protocol '%s'", protocol); + } + return drv1; +} + +/* + * Guess image format by probing its contents. + * This is not a good idea when your image is raw (CVE-2008-2004), but + * we do it anyway for backward compatibility. + * + * @buf contains the image's first @buf_size bytes. + * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, + * but can be smaller if the image file is smaller) + * @filename is its filename. + * + * For all block drivers, call the bdrv_probe() method to get its + * probing score. + * Return the first block driver with the highest probing score. + */ +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename) +{ + int score_max = 0, score; + BlockDriver *drv = NULL, *d; + + QLIST_FOREACH(d, &bdrv_drivers, list) { + if (d->bdrv_probe) { + score = d->bdrv_probe(buf, buf_size, filename); + if (score > score_max) { + score_max = score; + drv = d; + } + } + } + + return drv; +} + +static int find_image_format(BlockBackend *file, const char *filename, + BlockDriver **pdrv, Error **errp) +{ + BlockDriver *drv; + uint8_t buf[BLOCK_PROBE_BUF_SIZE]; + int ret = 0; + + /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ + if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { + *pdrv = &bdrv_raw; + return ret; + } + + ret = blk_pread(file, 0, buf, sizeof(buf)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read image for determining its " + "format"); + *pdrv = NULL; + return ret; + } + + drv = bdrv_probe_all(buf, ret, filename); + if (!drv) { + error_setg(errp, "Could not determine image format: No compatible " + "driver found"); + ret = -ENOENT; + } + *pdrv = drv; + return ret; +} + +/** + * Set the current 'total_sectors' value + * Return 0 on success, -errno on error. + */ +int refresh_total_sectors(BlockDriverState *bs, int64_t hint) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + return -ENOMEDIUM; + } + + /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ + if (bdrv_is_sg(bs)) + return 0; + + /* query actual device if possible, otherwise just trust the hint */ + if (drv->bdrv_getlength) { + int64_t length = drv->bdrv_getlength(bs); + if (length < 0) { + return length; + } + hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); + } + + bs->total_sectors = hint; + return 0; +} + +/** + * Combines a QDict of new block driver @options with any missing options taken + * from @old_options, so that leaving out an option defaults to its old value. + */ +static void bdrv_join_options(BlockDriverState *bs, QDict *options, + QDict *old_options) +{ + if (bs->drv && bs->drv->bdrv_join_options) { + bs->drv->bdrv_join_options(options, old_options); + } else { + qdict_join(options, old_options, false); + } +} + +static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, + int open_flags, + Error **errp) +{ + Error *local_err = NULL; + char *value = qemu_opt_get_del(opts, "detect-zeroes"); + BlockdevDetectZeroesOptions detect_zeroes = + qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); + g_free(value); + if (local_err) { + error_propagate(errp, local_err); + return detect_zeroes; + } + + if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + !(open_flags & BDRV_O_UNMAP)) + { + error_setg(errp, "setting detect-zeroes to unmap is not allowed " + "without setting discard operation to unmap"); + } + + return detect_zeroes; +} + +/** + * Set open flags for aio engine + * + * Return 0 on success, -1 if the engine specified is invalid + */ +int bdrv_parse_aio(const char *mode, int *flags) +{ + if (!strcmp(mode, "threads")) { + /* do nothing, default */ + } else if (!strcmp(mode, "native")) { + *flags |= BDRV_O_NATIVE_AIO; +#ifdef CONFIG_LINUX_IO_URING + } else if (!strcmp(mode, "io_uring")) { + *flags |= BDRV_O_IO_URING; +#endif + } else { + return -1; + } + + return 0; +} + +/** + * Set open flags for a given discard mode + * + * Return 0 on success, -1 if the discard mode was invalid. + */ +int bdrv_parse_discard_flags(const char *mode, int *flags) +{ + *flags &= ~BDRV_O_UNMAP; + + if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { + /* do nothing */ + } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { + *flags |= BDRV_O_UNMAP; + } else { + return -1; + } + + return 0; +} + +/** + * Set open flags for a given cache mode + * + * Return 0 on success, -1 if the cache mode was invalid. + */ +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) +{ + *flags &= ~BDRV_O_CACHE_MASK; + + if (!strcmp(mode, "off") || !strcmp(mode, "none")) { + *writethrough = false; + *flags |= BDRV_O_NOCACHE; + } else if (!strcmp(mode, "directsync")) { + *writethrough = true; + *flags |= BDRV_O_NOCACHE; + } else if (!strcmp(mode, "writeback")) { + *writethrough = false; + } else if (!strcmp(mode, "unsafe")) { + *writethrough = false; + *flags |= BDRV_O_NO_FLUSH; + } else if (!strcmp(mode, "writethrough")) { + *writethrough = true; + } else { + return -1; + } + + return 0; +} + +static char *bdrv_child_get_parent_desc(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + return g_strdup(bdrv_get_device_or_node_name(parent)); +} + +static void bdrv_child_cb_drained_begin(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + bdrv_do_drained_begin_quiesce(bs, NULL, false); +} + +static bool bdrv_child_cb_drained_poll(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + return bdrv_drain_poll(bs, false, NULL, false); +} + +static void bdrv_child_cb_drained_end(BdrvChild *child, + int *drained_end_counter) +{ + BlockDriverState *bs = child->opaque; + bdrv_drained_end_no_poll(bs, drained_end_counter); +} + +static int bdrv_child_cb_inactivate(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + assert(bs->open_flags & BDRV_O_INACTIVE); + return 0; +} + +static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockDriverState *bs = child->opaque; + return bdrv_can_set_aio_context(bs, ctx, ignore, errp); +} + +static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockDriverState *bs = child->opaque; + return bdrv_set_aio_context_ignore(bs, ctx, ignore); +} + +/* + * Returns the options and flags that a temporary snapshot should get, based on + * the originally requested flags (the originally requested image will have + * flags like a backing file) + */ +static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; + + /* For temporary files, unconditional cache=unsafe is fine */ + qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); + + /* Copy the read-only and discard options from the parent */ + qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); + qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); + + /* aio=native doesn't work for cache.direct=off, so disable it for the + * temporary snapshot */ + *child_flags &= ~BDRV_O_NATIVE_AIO; +} + +static void bdrv_backing_attach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + BlockDriverState *backing_hd = c->bs; + + assert(!parent->backing_blocker); + error_setg(&parent->backing_blocker, + "node is used as backing hd of '%s'", + bdrv_get_device_or_node_name(parent)); + + bdrv_refresh_filename(backing_hd); + + parent->open_flags &= ~BDRV_O_NO_BACKING; + + bdrv_op_block_all(backing_hd, parent->backing_blocker); + /* Otherwise we won't be able to commit or stream */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + parent->backing_blocker); + /* + * We do backup in 3 ways: + * 1. drive backup + * The target bs is new opened, and the source is top BDS + * 2. blockdev backup + * Both the source and the target are top BDSes. + * 3. internal backup(used for block replication) + * Both the source and the target are backing file + * + * In case 1 and 2, neither the source nor the target is the backing file. + * In case 3, we will block the top BDS, so there is only one block job + * for the top BDS and its backing chain. + */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, + parent->backing_blocker); +} + +static void bdrv_backing_detach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + + assert(parent->backing_blocker); + bdrv_op_unblock_all(c->bs, parent->backing_blocker); + error_free(parent->backing_blocker); + parent->backing_blocker = NULL; +} + +static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, + const char *filename, Error **errp) +{ + BlockDriverState *parent = c->opaque; + bool read_only = bdrv_is_read_only(parent); + int ret; + + if (read_only) { + ret = bdrv_reopen_set_read_only(parent, false, errp); + if (ret < 0) { + return ret; + } + } + + ret = bdrv_change_backing_file(parent, filename, + base->drv ? base->drv->format_name : "", + false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not update backing file link"); + } + + if (read_only) { + bdrv_reopen_set_read_only(parent, true, NULL); + } + + return ret; +} + +/* + * Returns the options and flags that a generic child of a BDS should + * get, based on the given options and flags for the parent BDS. + */ +static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + int flags = parent_flags; + + /* + * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. + * Generally, the question to answer is: Should this child be + * format-probed by default? + */ + + /* + * Pure and non-filtered data children of non-format nodes should + * be probed by default (even when the node itself has BDRV_O_PROTOCOL + * set). This only affects a very limited set of drivers (namely + * quorum and blkverify when this comment was written). + * Force-clear BDRV_O_PROTOCOL then. + */ + if (!parent_is_format && + (role & BDRV_CHILD_DATA) && + !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) + { + flags &= ~BDRV_O_PROTOCOL; + } + + /* + * All children of format nodes (except for COW children) and all + * metadata children in general should never be format-probed. + * Force-set BDRV_O_PROTOCOL then. + */ + if ((parent_is_format && !(role & BDRV_CHILD_COW)) || + (role & BDRV_CHILD_METADATA)) + { + flags |= BDRV_O_PROTOCOL; + } + + /* + * If the cache mode isn't explicitly set, inherit direct and no-flush from + * the parent. + */ + qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); + qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); + qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); + + if (role & BDRV_CHILD_COW) { + /* backing files are opened read-only by default */ + qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); + qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); + } else { + /* Inherit the read-only option from the parent if it's not set */ + qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); + qdict_copy_default(child_options, parent_options, + BDRV_OPT_AUTO_READ_ONLY); + } + + /* + * bdrv_co_pdiscard() respects unmap policy for the parent, so we + * can default to enable it on lower layers regardless of the + * parent option. + */ + qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); + + /* Clear flags that only apply to the top layer */ + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); + + if (role & BDRV_CHILD_METADATA) { + flags &= ~BDRV_O_NO_IO; + } + if (role & BDRV_CHILD_COW) { + flags &= ~BDRV_O_TEMPORARY; + } + + *child_flags = flags; +} + +static void bdrv_child_cb_attach(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + + if (child->role & BDRV_CHILD_COW) { + bdrv_backing_attach(child); + } + + bdrv_apply_subtree_drain(child, bs); +} + +static void bdrv_child_cb_detach(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + + if (child->role & BDRV_CHILD_COW) { + bdrv_backing_detach(child); + } + + bdrv_unapply_subtree_drain(child, bs); +} + +static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, + const char *filename, Error **errp) +{ + if (c->role & BDRV_CHILD_COW) { + return bdrv_backing_update_filename(c, base, filename, errp); + } + return 0; +} + +const BdrvChildClass child_of_bds = { + .parent_is_bds = true, + .get_parent_desc = bdrv_child_get_parent_desc, + .inherit_options = bdrv_inherited_options, + .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, + .drained_end = bdrv_child_cb_drained_end, + .attach = bdrv_child_cb_attach, + .detach = bdrv_child_cb_detach, + .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, + .update_filename = bdrv_child_cb_update_filename, +}; + +static int bdrv_open_flags(BlockDriverState *bs, int flags) +{ + int open_flags = flags; + + /* + * Clear flags that are internal to the block layer before opening the + * image. + */ + open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); + + return open_flags; +} + +static void update_flags_from_options(int *flags, QemuOpts *opts) +{ + *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); + + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { + *flags |= BDRV_O_NO_FLUSH; + } + + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { + *flags |= BDRV_O_NOCACHE; + } + + if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { + *flags |= BDRV_O_RDWR; + } + + if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { + *flags |= BDRV_O_AUTO_RDONLY; + } +} + +static void update_options_from_flags(QDict *options, int flags) +{ + if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { + qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); + } + if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { + qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, + flags & BDRV_O_NO_FLUSH); + } + if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { + qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); + } + if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { + qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, + flags & BDRV_O_AUTO_RDONLY); + } +} + +static void bdrv_assign_node_name(BlockDriverState *bs, + const char *node_name, + Error **errp) +{ + char *gen_node_name = NULL; + + if (!node_name) { + node_name = gen_node_name = id_generate(ID_BLOCK); + } else if (!id_wellformed(node_name)) { + /* + * Check for empty string or invalid characters, but not if it is + * generated (generated names use characters not available to the user) + */ + error_setg(errp, "Invalid node name"); + return; + } + + /* takes care of avoiding namespaces collisions */ + if (blk_by_name(node_name)) { + error_setg(errp, "node-name=%s is conflicting with a device id", + node_name); + goto out; + } + + /* takes care of avoiding duplicates node names */ + if (bdrv_find_node(node_name)) { + error_setg(errp, "Duplicate node name"); + goto out; + } + + /* Make sure that the node name isn't truncated */ + if (strlen(node_name) >= sizeof(bs->node_name)) { + error_setg(errp, "Node name too long"); + goto out; + } + + /* copy node name into the bs and insert it into the graph list */ + pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); + QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); +out: + g_free(gen_node_name); +} + +static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, + const char *node_name, QDict *options, + int open_flags, Error **errp) +{ + Error *local_err = NULL; + int i, ret; + + bdrv_assign_node_name(bs, node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + bs->drv = drv; + bs->read_only = !(bs->open_flags & BDRV_O_RDWR); + bs->opaque = g_malloc0(drv->instance_size); + + if (drv->bdrv_file_open) { + assert(!drv->bdrv_needs_filename || bs->filename[0]); + ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); + } else if (drv->bdrv_open) { + ret = drv->bdrv_open(bs, options, open_flags, &local_err); + } else { + ret = 0; + } + + if (ret < 0) { + if (local_err) { + error_propagate(errp, local_err); + } else if (bs->filename[0]) { + error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); + } else { + error_setg_errno(errp, -ret, "Could not open image"); + } + goto open_failed; + } + + ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + return ret; + } + + bdrv_refresh_limits(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + assert(bdrv_opt_mem_align(bs) != 0); + assert(bdrv_min_mem_align(bs) != 0); + assert(is_power_of_2(bs->bl.request_alignment)); + + for (i = 0; i < bs->quiesce_counter; i++) { + if (drv->bdrv_co_drain_begin) { + drv->bdrv_co_drain_begin(bs); + } + } + + return 0; +open_failed: + bs->drv = NULL; + if (bs->file != NULL) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + g_free(bs->opaque); + bs->opaque = NULL; + return ret; +} + +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp) +{ + BlockDriverState *bs; + int ret; + + bs = bdrv_new(); + bs->open_flags = flags; + bs->explicit_options = qdict_new(); + bs->options = qdict_new(); + bs->opaque = NULL; + + update_options_from_flags(bs->options, flags); + + ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); + if (ret < 0) { + qobject_unref(bs->explicit_options); + bs->explicit_options = NULL; + qobject_unref(bs->options); + bs->options = NULL; + bdrv_unref(bs); + return NULL; + } + + return bs; +} + +QemuOptsList bdrv_runtime_opts = { + .name = "bdrv_common", + .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), + .desc = { + { + .name = "node-name", + .type = QEMU_OPT_STRING, + .help = "Node name of the block device node", + }, + { + .name = "driver", + .type = QEMU_OPT_STRING, + .help = "Block driver to use for the node", + }, + { + .name = BDRV_OPT_CACHE_DIRECT, + .type = QEMU_OPT_BOOL, + .help = "Bypass software writeback cache on the host", + }, + { + .name = BDRV_OPT_CACHE_NO_FLUSH, + .type = QEMU_OPT_BOOL, + .help = "Ignore flush requests", + }, + { + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, + .help = "Node is opened in read-only mode", + }, + { + .name = BDRV_OPT_AUTO_READ_ONLY, + .type = QEMU_OPT_BOOL, + .help = "Node can become read-only if opening read-write fails", + }, + { + .name = "detect-zeroes", + .type = QEMU_OPT_STRING, + .help = "try to optimize zero writes (off, on, unmap)", + }, + { + .name = BDRV_OPT_DISCARD, + .type = QEMU_OPT_STRING, + .help = "discard operation (ignore/off, unmap/on)", + }, + { + .name = BDRV_OPT_FORCE_SHARE, + .type = QEMU_OPT_BOOL, + .help = "always accept other writers (default: off)", + }, + { /* end of list */ } + }, +}; + +QemuOptsList bdrv_create_opts_simple = { + .name = "simple-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off)" + }, + { /* end of list */ } + } +}; + +/* + * Common part for opening disk images and files + * + * Removes all processed options from *options. + */ +static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, + QDict *options, Error **errp) +{ + int ret, open_flags; + const char *filename; + const char *driver_name = NULL; + const char *node_name = NULL; + const char *discard; + QemuOpts *opts; + BlockDriver *drv; + Error *local_err = NULL; + + assert(bs->file == NULL); + assert(options != NULL && bs->options != options); + + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail_opts; + } + + update_flags_from_options(&bs->open_flags, opts); + + driver_name = qemu_opt_get(opts, "driver"); + drv = bdrv_find_format(driver_name); + assert(drv != NULL); + + bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); + + if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { + error_setg(errp, + BDRV_OPT_FORCE_SHARE + "=on can only be used with read-only images"); + ret = -EINVAL; + goto fail_opts; + } + + if (file != NULL) { + bdrv_refresh_filename(blk_bs(file)); + filename = blk_bs(file)->filename; + } else { + /* + * Caution: while qdict_get_try_str() is fine, getting + * non-string types would require more care. When @options + * come from -blockdev or blockdev_add, its members are typed + * according to the QAPI schema, but when they come from + * -drive, they're all QString. + */ + filename = qdict_get_try_str(options, "filename"); + } + + if (drv->bdrv_needs_filename && (!filename || !filename[0])) { + error_setg(errp, "The '%s' block driver requires a file name", + drv->format_name); + ret = -EINVAL; + goto fail_opts; + } + + trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, + drv->format_name); + + bs->read_only = !(bs->open_flags & BDRV_O_RDWR); + + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { + if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { + ret = bdrv_apply_auto_read_only(bs, NULL, NULL); + } else { + ret = -ENOTSUP; + } + if (ret < 0) { + error_setg(errp, + !bs->read_only && bdrv_is_whitelisted(drv, true) + ? "Driver '%s' can only be used for read-only devices" + : "Driver '%s' is not whitelisted", + drv->format_name); + goto fail_opts; + } + } + + /* bdrv_new() and bdrv_close() make it so */ + assert(qatomic_read(&bs->copy_on_read) == 0); + + if (bs->open_flags & BDRV_O_COPY_ON_READ) { + if (!bs->read_only) { + bdrv_enable_copy_on_read(bs); + } else { + error_setg(errp, "Can't use copy-on-read on read-only device"); + ret = -EINVAL; + goto fail_opts; + } + } + + discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto fail_opts; + } + } + + bs->detect_zeroes = + bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; + } + + if (filename != NULL) { + pstrcpy(bs->filename, sizeof(bs->filename), filename); + } else { + bs->filename[0] = '\0'; + } + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); + + /* Open the image, either directly or using a protocol */ + open_flags = bdrv_open_flags(bs, bs->open_flags); + node_name = qemu_opt_get(opts, "node-name"); + + assert(!drv->bdrv_file_open || file == NULL); + ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); + if (ret < 0) { + goto fail_opts; + } + + qemu_opts_del(opts); + return 0; + +fail_opts: + qemu_opts_del(opts); + return ret; +} + +static QDict *parse_json_filename(const char *filename, Error **errp) +{ + QObject *options_obj; + QDict *options; + int ret; + + ret = strstart(filename, "json:", &filename); + assert(ret); + + options_obj = qobject_from_json(filename, errp); + if (!options_obj) { + error_prepend(errp, "Could not parse the JSON options: "); + return NULL; + } + + options = qobject_to(QDict, options_obj); + if (!options) { + qobject_unref(options_obj); + error_setg(errp, "Invalid JSON object given"); + return NULL; + } + + qdict_flatten(options); + + return options; +} + +static void parse_json_protocol(QDict *options, const char **pfilename, + Error **errp) +{ + QDict *json_options; + Error *local_err = NULL; + + /* Parse json: pseudo-protocol */ + if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { + return; + } + + json_options = parse_json_filename(*pfilename, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Options given in the filename have lower priority than options + * specified directly */ + qdict_join(options, json_options, false); + qobject_unref(json_options); + *pfilename = NULL; +} + +/* + * Fills in default options for opening images and converts the legacy + * filename/flags pair to option QDict entries. + * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a + * block driver has been specified explicitly. + */ +static int bdrv_fill_options(QDict **options, const char *filename, + int *flags, Error **errp) +{ + const char *drvname; + bool protocol = *flags & BDRV_O_PROTOCOL; + bool parse_filename = false; + BlockDriver *drv = NULL; + Error *local_err = NULL; + + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ + drvname = qdict_get_try_str(*options, "driver"); + if (drvname) { + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver '%s'", drvname); + return -ENOENT; + } + /* If the user has explicitly specified the driver, this choice should + * override the BDRV_O_PROTOCOL flag */ + protocol = drv->bdrv_file_open; + } + + if (protocol) { + *flags |= BDRV_O_PROTOCOL; + } else { + *flags &= ~BDRV_O_PROTOCOL; + } + + /* Translate cache options from flags into options */ + update_options_from_flags(*options, *flags); + + /* Fetch the file name from the options QDict if necessary */ + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put_str(*options, "filename", filename); + parse_filename = true; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); + return -EINVAL; + } + } + + /* Find the right block driver */ + /* See cautionary note on accessing @options above */ + filename = qdict_get_try_str(*options, "filename"); + + if (!drvname && protocol) { + if (filename) { + drv = bdrv_find_protocol(filename, parse_filename, errp); + if (!drv) { + return -EINVAL; + } + + drvname = drv->format_name; + qdict_put_str(*options, "driver", drvname); + } else { + error_setg(errp, "Must specify either driver or file"); + return -EINVAL; + } + } + + assert(drv || !protocol); + + /* Driver-specific filename parsing */ + if (drv && drv->bdrv_parse_filename && parse_filename) { + drv->bdrv_parse_filename(filename, *options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + if (!drv->bdrv_needs_filename) { + qdict_del(*options, "filename"); + } + } + + return 0; +} + +static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + uint64_t perm, uint64_t shared, + GSList *ignore_children, + bool *tighten_restrictions, Error **errp); +static void bdrv_child_abort_perm_update(BdrvChild *c); +static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); + +typedef struct BlockReopenQueueEntry { + bool prepared; + bool perms_checked; + BDRVReopenState state; + QTAILQ_ENTRY(BlockReopenQueueEntry) entry; +} BlockReopenQueueEntry; + +/* + * Return the flags that @bs will have after the reopens in @q have + * successfully completed. If @q is NULL (or @bs is not contained in @q), + * return the current flags. + */ +static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) +{ + BlockReopenQueueEntry *entry; + + if (q != NULL) { + QTAILQ_FOREACH(entry, q, entry) { + if (entry->state.bs == bs) { + return entry->state.flags; + } + } + } + + return bs->open_flags; +} + +/* Returns whether the image file can be written to after the reopen queue @q + * has been successfully applied, or right now if @q is NULL. */ +static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, + BlockReopenQueue *q) +{ + int flags = bdrv_reopen_get_flags(q, bs); + + return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; +} + +/* + * Return whether the BDS can be written to. This is not necessarily + * the same as !bdrv_is_read_only(bs), as inactivated images may not + * be written to but do not count as read-only images. + */ +bool bdrv_is_writable(BlockDriverState *bs) +{ + return bdrv_is_writable_after_reopen(bs, NULL); +} + +static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, + BdrvChild *c, BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared) +{ + assert(bs->drv && bs->drv->bdrv_child_perm); + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, + parent_perm, parent_shared, + nperm, nshared); + /* TODO Take force_share from reopen_queue */ + if (child_bs && child_bs->force_share) { + *nshared = BLK_PERM_ALL; + } +} + +/* + * Check whether permissions on this node can be changed in a way that + * @cumulative_perms and @cumulative_shared_perms are the new cumulative + * permissions of all its parents. This involves checking whether all necessary + * permission changes to child nodes can be performed. + * + * Will set *tighten_restrictions to true if and only if new permissions have to + * be taken or currently shared permissions are to be unshared. Otherwise, + * errors are not fatal as long as the caller accepts that the restrictions + * remain tighter than they need to be. The caller still has to abort the + * transaction. + * @tighten_restrictions cannot be used together with @q: When reopening, we may + * encounter fatal errors even though no restrictions are to be tightened. For + * example, changing a node from RW to RO will fail if the WRITE permission is + * to be kept. + * + * A call to this function must always be followed by a call to bdrv_set_perm() + * or bdrv_abort_perm_update(). + */ +static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, + uint64_t cumulative_perms, + uint64_t cumulative_shared_perms, + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + int ret; + + assert(!q || !tighten_restrictions); + + if (tighten_restrictions) { + uint64_t current_perms, current_shared; + uint64_t added_perms, removed_shared_perms; + + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + + added_perms = cumulative_perms & ~current_perms; + removed_shared_perms = current_shared & ~cumulative_shared_perms; + + *tighten_restrictions = added_perms || removed_shared_perms; + } + + /* Write permissions never work with read-only images */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + !bdrv_is_writable_after_reopen(bs, q)) + { + if (!bdrv_is_writable_after_reopen(bs, NULL)) { + error_setg(errp, "Block node is read-only"); + } else { + uint64_t current_perms, current_shared; + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + error_setg(errp, "Cannot make block node read-only, there is " + "a writer on it"); + } else { + error_setg(errp, "Cannot make block node read-only and create " + "a writer on it"); + } + } + + return -EPERM; + } + + /* + * Unaligned requests will automatically be aligned to bl.request_alignment + * and without RESIZE we can't extend requests to write to space beyond the + * end of the image, so it's required that the image size is aligned. + */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + !(cumulative_perms & BLK_PERM_RESIZE)) + { + if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { + error_setg(errp, "Cannot get 'write' permission without 'resize': " + "Image size is not a multiple of request " + "alignment"); + return -EPERM; + } + } + + /* Check this node */ + if (!drv) { + return 0; + } + + if (drv->bdrv_check_perm) { + return drv->bdrv_check_perm(bs, cumulative_perms, + cumulative_shared_perms, errp); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return 0; + } + + /* Check all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + bool child_tighten_restr; + + bdrv_child_perm(bs, c->bs, c, c->role, q, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, + tighten_restrictions ? &child_tighten_restr + : NULL, + errp); + if (tighten_restrictions) { + *tighten_restrictions |= child_tighten_restr; + } + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * Notifies drivers that after a previous bdrv_check_perm() call, the + * permission update is not performed and any preparations made for it (e.g. + * taken file locks) need to be undone. + * + * This function recursively notifies all child nodes. + */ +static void bdrv_abort_perm_update(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + if (drv->bdrv_abort_perm_update) { + drv->bdrv_abort_perm_update(bs); + } + + QLIST_FOREACH(c, &bs->children, next) { + bdrv_child_abort_perm_update(c); + } +} + +static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + /* Update this node */ + if (drv->bdrv_set_perm) { + drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return; + } + + /* Update all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + bdrv_child_perm(bs, c->bs, c, c->role, NULL, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + bdrv_child_set_perm(c, cur_perm, cur_shared); + } +} + +void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm) +{ + BdrvChild *c; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + *perm = cumulative_perms; + *shared_perm = cumulative_shared_perms; +} + +static char *bdrv_child_user_desc(BdrvChild *c) +{ + if (c->klass->get_parent_desc) { + return c->klass->get_parent_desc(c); + } + + return g_strdup("another user"); +} + +char *bdrv_perm_names(uint64_t perm) +{ + struct perm_name { + uint64_t perm; + const char *name; + } permissions[] = { + { BLK_PERM_CONSISTENT_READ, "consistent read" }, + { BLK_PERM_WRITE, "write" }, + { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, + { BLK_PERM_RESIZE, "resize" }, + { BLK_PERM_GRAPH_MOD, "change children" }, + { 0, NULL } + }; + + GString *result = g_string_sized_new(30); + struct perm_name *p; + + for (p = permissions; p->name; p++) { + if (perm & p->perm) { + if (result->len > 0) { + g_string_append(result, ", "); + } + g_string_append(result, p->name); + } + } + + return g_string_free(result, FALSE); +} + +/* + * Checks whether a new reference to @bs can be added if the new user requires + * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is + * set, the BdrvChild objects in this list are ignored in the calculations; + * this allows checking permission updates for an existing reference. + * + * See bdrv_check_perm() for the semantics of @tighten_restrictions. + * + * Needs to be followed by a call to either bdrv_set_perm() or + * bdrv_abort_perm_update(). */ +static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, + uint64_t new_used_perm, + uint64_t new_shared_perm, + GSList *ignore_children, + bool *tighten_restrictions, + Error **errp) +{ + BdrvChild *c; + uint64_t cumulative_perms = new_used_perm; + uint64_t cumulative_shared_perms = new_shared_perm; + + assert(!q || !tighten_restrictions); + + /* There is no reason why anyone couldn't tolerate write_unchanged */ + assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (g_slist_find(ignore_children, c)) { + continue; + } + + if ((new_used_perm & c->shared_perm) != new_used_perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + + error_setg(errp, "Conflicts with use by %s as '%s', which does not " + "allow '%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + if ((c->perm & new_shared_perm) != c->perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + + error_setg(errp, "Conflicts with use by %s as '%s', which uses " + "'%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, + ignore_children, tighten_restrictions, errp); +} + +/* Needs to be followed by a call to either bdrv_child_set_perm() or + * bdrv_child_abort_perm_update(). */ +static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + uint64_t perm, uint64_t shared, + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) +{ + int ret; + + ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); + ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, + tighten_restrictions, errp); + g_slist_free(ignore_children); + + if (ret < 0) { + return ret; + } + + if (!c->has_backup_perm) { + c->has_backup_perm = true; + c->backup_perm = c->perm; + c->backup_shared_perm = c->shared_perm; + } + /* + * Note: it's OK if c->has_backup_perm was already set, as we can find the + * same child twice during check_perm procedure + */ + + c->perm = perm; + c->shared_perm = shared; + + return 0; +} + +static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) +{ + uint64_t cumulative_perms, cumulative_shared_perms; + + c->has_backup_perm = false; + + c->perm = perm; + c->shared_perm = shared; + + bdrv_get_cumulative_perm(c->bs, &cumulative_perms, + &cumulative_shared_perms); + bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); +} + +static void bdrv_child_abort_perm_update(BdrvChild *c) +{ + if (c->has_backup_perm) { + c->perm = c->backup_perm; + c->shared_perm = c->backup_shared_perm; + c->has_backup_perm = false; + } + + bdrv_abort_perm_update(c->bs); +} + +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + Error *local_err = NULL; + int ret; + bool tighten_restrictions; + + ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, + &tighten_restrictions, &local_err); + if (ret < 0) { + bdrv_child_abort_perm_update(c); + if (tighten_restrictions) { + error_propagate(errp, local_err); + } else { + /* + * Our caller may intend to only loosen restrictions and + * does not expect this function to fail. Errors are not + * fatal in such a case, so we can just hide them from our + * caller. + */ + error_free(local_err); + ret = 0; + } + return ret; + } + + bdrv_child_set_perm(c, perm, shared); + + return 0; +} + +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) +{ + uint64_t parent_perms, parent_shared; + uint64_t perms, shared; + + bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); + bdrv_child_perm(bs, c->bs, c, c->role, NULL, + parent_perms, parent_shared, &perms, &shared); + + return bdrv_child_try_set_perm(c, perms, shared, errp); +} + +/* + * Default implementation for .bdrv_child_perm() for block filters: + * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the + * filtered child. + */ +static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; +} + +static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + assert(role & BDRV_CHILD_COW); + + /* + * We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. + */ + perm &= BLK_PERM_CONSISTENT_READ; + + /* + * If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. + * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? + */ + if (shared & BLK_PERM_WRITE) { + shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; + } else { + shared = 0; + } + + shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + + if (bs->open_flags & BDRV_O_INACTIVE) { + shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + *nperm = perm; + *nshared = shared; +} + +static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + int flags; + + assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); + + flags = bdrv_reopen_get_flags(reopen_queue, bs); + + /* + * Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters + */ + bdrv_filter_default_perms(bs, c, role, reopen_queue, + perm, shared, &perm, &shared); + + if (role & BDRV_CHILD_METADATA) { + /* Format drivers may touch metadata even if the guest doesn't write */ + if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { + perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + /* + * bs->file always needs to be consistent because of the + * metadata. We can never allow other users to resize or write + * to it. + */ + if (!(flags & BDRV_O_NO_IO)) { + perm |= BLK_PERM_CONSISTENT_READ; + } + shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } + + if (role & BDRV_CHILD_DATA) { + /* + * Technically, everything in this block is a subset of the + * BDRV_CHILD_METADATA path taken above, and so this could + * be an "else if" branch. However, that is not obvious, and + * this function is not performance critical, therefore we let + * this be an independent "if". + */ + + /* + * We cannot allow other users to resize the file because the + * format driver might have some assumptions about the size + * (e.g. because it is stored in metadata, or because the file + * is split into fixed-size data files). + */ + shared &= ~BLK_PERM_RESIZE; + + /* + * WRITE_UNCHANGED often cannot be performed as such on the + * data file. For example, the qcow2 driver may still need to + * write copied clusters on copy-on-read. + */ + if (perm & BLK_PERM_WRITE_UNCHANGED) { + perm |= BLK_PERM_WRITE; + } + + /* + * If the data file is written to, the format driver may + * expect to be able to resize it by writing beyond the EOF. + */ + if (perm & BLK_PERM_WRITE) { + perm |= BLK_PERM_RESIZE; + } + } + + if (bs->open_flags & BDRV_O_INACTIVE) { + shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + *nperm = perm; + *nshared = shared; +} + +void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (role & BDRV_CHILD_FILTERED) { + assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | + BDRV_CHILD_COW))); + bdrv_filter_default_perms(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + } else if (role & BDRV_CHILD_COW) { + assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); + bdrv_default_perms_for_cow(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { + bdrv_default_perms_for_storage(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + } else { + g_assert_not_reached(); + } +} + +uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) +{ + static const uint64_t permissions[] = { + [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, + [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, + [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, + [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, + [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD, + }; + + QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); + QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); + + assert(qapi_perm < BLOCK_PERMISSION__MAX); + + return permissions[qapi_perm]; +} + +static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs) +{ + BlockDriverState *old_bs = child->bs; + int new_bs_quiesce_counter; + int drain_saldo; + + assert(!child->frozen); + + if (old_bs && new_bs) { + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; + + /* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ + while (drain_saldo > 0 && child->klass->drained_begin) { + bdrv_parent_drained_begin_single(child, true); + drain_saldo--; + } + + if (old_bs) { + /* Detach first so that the recursive drain sections coming from @child + * are already gone and we only end the drain sections that came from + * elsewhere. */ + if (child->klass->detach) { + child->klass->detach(child); + } + QLIST_REMOVE(child, next_parent); + } + + child->bs = new_bs; + + if (new_bs) { + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); + + /* + * Detaching the old node may have led to the new node's + * quiesce_counter having been decreased. Not a problem, we + * just need to recognize this here and then invoke + * drained_end appropriately more often. + */ + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; + + /* Attach only after starting new drained sections, so that recursive + * drain sections coming from @child don't get an extra .drained_begin + * callback. */ + if (child->klass->attach) { + child->klass->attach(child); + } + } + + /* + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been attached. + */ + while (drain_saldo < 0 && child->klass->drained_end) { + bdrv_parent_drained_end_single(child); + drain_saldo++; + } +} + +/* + * Updates @child to change its reference to point to @new_bs, including + * checking and applying the necessary permission updates both to the old node + * and to @new_bs. + * + * NULL is passed as @new_bs for removing the reference before freeing @child. + * + * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this + * function uses bdrv_set_perm() to update the permissions according to the new + * reference that @new_bs gets. + * + * Callers must ensure that child->frozen is false. + */ +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +{ + BlockDriverState *old_bs = child->bs; + uint64_t perm, shared_perm; + + /* Asserts that child->frozen == false */ + bdrv_replace_child_noperm(child, new_bs); + + /* + * Start with the new node's permissions. If @new_bs is a (direct + * or indirect) child of @old_bs, we must complete the permission + * update on @new_bs before we loosen the restrictions on @old_bs. + * Otherwise, bdrv_check_perm() on @old_bs would re-initiate + * updating the permissions of @new_bs, and thus not purely loosen + * restrictions. + */ + if (new_bs) { + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + bdrv_set_perm(new_bs, perm, shared_perm); + } + + if (old_bs) { + /* Update permissions for old node. This is guaranteed to succeed + * because we're just taking a parent away, so we're loosening + * restrictions. */ + bool tighten_restrictions; + int ret; + + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); + ret = bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(old_bs); + } else { + bdrv_set_perm(old_bs, perm, shared_perm); + } + + /* When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext */ + bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); + } +} + +/* + * This function steals the reference to child_bs from the caller. + * That reference is later dropped by bdrv_root_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * The caller must hold the AioContext lock @child_bs, but not that of @ctx + * (unless @child_bs is already in @ctx). + */ +BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + AioContext *ctx, + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp) +{ + BdrvChild *child; + Error *local_err = NULL; + int ret; + + ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, NULL, + errp); + if (ret < 0) { + bdrv_abort_perm_update(child_bs); + bdrv_unref(child_bs); + return NULL; + } + + child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = NULL, + .name = g_strdup(child_name), + .klass = child_class, + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, + }; + + /* If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. */ + if (bdrv_get_aio_context(child_bs) != ctx) { + ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); + if (ret < 0 && child_class->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, child); + ctx = bdrv_get_aio_context(child_bs); + if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, child); + child_class->set_aio_ctx(child, ctx, &ignore); + } + g_slist_free(ignore); + } + if (ret < 0) { + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); + bdrv_unref(child_bs); + return NULL; + } + } + + /* This performs the matching bdrv_set_perm() for the above check. */ + bdrv_replace_child(child, child_bs); + + return child; +} + +/* + * This function transfers the reference to child_bs from the caller + * to parent_bs. That reference is later dropped by parent_bs on + * bdrv_close() or if someone calls bdrv_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * If @parent_bs and @child_bs are in different AioContexts, the caller must + * hold the AioContext lock for @child_bs, but not for @parent_bs. + */ +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp) +{ + BdrvChild *child; + uint64_t perm, shared_perm; + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + + assert(parent_bs->drv); + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, + perm, shared_perm, &perm, &shared_perm); + + child = bdrv_root_attach_child(child_bs, child_name, child_class, + child_role, bdrv_get_aio_context(parent_bs), + perm, shared_perm, parent_bs, errp); + if (child == NULL) { + return NULL; + } + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); + return child; +} + +static void bdrv_detach_child(BdrvChild *child) +{ + QLIST_SAFE_REMOVE(child, next); + + bdrv_replace_child(child, NULL); + + g_free(child->name); + g_free(child); +} + +/* Callers must ensure that child->frozen is false. */ +void bdrv_root_unref_child(BdrvChild *child) +{ + BlockDriverState *child_bs; + + child_bs = child->bs; + bdrv_detach_child(child); + bdrv_unref(child_bs); +} + +/** + * Clear all inherits_from pointers from children and grandchildren of + * @root that point to @root, where necessary. + */ +static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) +{ + BdrvChild *c; + + if (child->bs->inherits_from == root) { + /* + * Remove inherits_from only when the last reference between root and + * child->bs goes away. + */ + QLIST_FOREACH(c, &root->children, next) { + if (c != child && c->bs == child->bs) { + break; + } + } + if (c == NULL) { + child->bs->inherits_from = NULL; + } + } + + QLIST_FOREACH(c, &child->bs->children, next) { + bdrv_unset_inherits_from(root, c); + } +} + +/* Callers must ensure that child->frozen is false. */ +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + if (child == NULL) { + return; + } + + bdrv_unset_inherits_from(parent, child); + bdrv_root_unref_child(child); +} + + +static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) +{ + BdrvChild *c; + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->klass->change_media) { + c->klass->change_media(c, load); + } + } +} + +/* Return true if you can reach parent going through child->inherits_from + * recursively. If parent or child are NULL, return false */ +static bool bdrv_inherits_from_recursive(BlockDriverState *child, + BlockDriverState *parent) +{ + while (child && child != parent) { + child = child->inherits_from; + } + + return child != NULL; +} + +/* + * Return the BdrvChildRole for @bs's backing child. bs->backing is + * mostly used for COW backing children (role = COW), but also for + * filtered children (role = FILTERED | PRIMARY). + */ +static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) +{ + if (bs->drv && bs->drv->is_filter) { + return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; + } else { + return BDRV_CHILD_COW; + } +} + +/* + * Sets the bs->backing link of a BDS. A new reference is created; callers + * which don't need their own reference any more must call bdrv_unref(). + */ +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) +{ + bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && + bdrv_inherits_from_recursive(backing_hd, bs); + + if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { + return; + } + + if (backing_hd) { + bdrv_ref(backing_hd); + } + + if (bs->backing) { + /* Cannot be frozen, we checked that above */ + bdrv_unref_child(bs, bs->backing); + bs->backing = NULL; + } + + if (!backing_hd) { + goto out; + } + + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds, + bdrv_backing_role(bs), errp); + /* If backing_hd was already part of bs's backing chain, and + * inherits_from pointed recursively to bs then let's update it to + * point directly to bs (else it will become NULL). */ + if (bs->backing && update_inherits_from) { + backing_hd->inherits_from = bs; + } + +out: + bdrv_refresh_limits(bs, NULL); +} + +/* + * Opens the backing file for a BlockDriverState if not yet open + * + * bdref_key specifies the key for the image's BlockdevRef in the options QDict. + * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * + * TODO Can this be unified with bdrv_open_image()? + */ +int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp) +{ + char *backing_filename = NULL; + char *bdref_key_dot; + const char *reference = NULL; + int ret = 0; + bool implicit_backing = false; + BlockDriverState *backing_hd; + QDict *options; + QDict *tmp_parent_options = NULL; + Error *local_err = NULL; + + if (bs->backing != NULL) { + goto free_exit; + } + + /* NULL means an empty set of options */ + if (parent_options == NULL) { + tmp_parent_options = qdict_new(); + parent_options = tmp_parent_options; + } + + bs->open_flags &= ~BDRV_O_NO_BACKING; + + bdref_key_dot = g_strdup_printf("%s.", bdref_key); + qdict_extract_subqdict(parent_options, &options, bdref_key_dot); + g_free(bdref_key_dot); + + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @parent_options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ + reference = qdict_get_try_str(parent_options, bdref_key); + if (reference || qdict_haskey(options, "file.filename")) { + /* keep backing_filename NULL */ + } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { + qobject_unref(options); + goto free_exit; + } else { + if (qdict_size(options) == 0) { + /* If the user specifies options that do not modify the + * backing file's behavior, we might still consider it the + * implicit backing file. But it's easier this way, and + * just specifying some of the backing BDS's options is + * only possible with -drive anyway (otherwise the QAPI + * schema forces the user to specify everything). */ + implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); + } + + backing_filename = bdrv_get_full_backing_filename(bs, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + qobject_unref(options); + goto free_exit; + } + } + + if (!bs->drv || !bs->drv->supports_backing) { + ret = -EINVAL; + error_setg(errp, "Driver doesn't support backing files"); + qobject_unref(options); + goto free_exit; + } + + if (!reference && + bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { + qdict_put_str(options, "driver", bs->backing_format); + } + + backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, + &child_of_bds, bdrv_backing_role(bs), errp); + if (!backing_hd) { + bs->open_flags |= BDRV_O_NO_BACKING; + error_prepend(errp, "Could not open backing file: "); + ret = -EINVAL; + goto free_exit; + } + + if (implicit_backing) { + bdrv_refresh_filename(backing_hd); + pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), + backing_hd->filename); + } + + /* Hook up the backing file link; drop our reference, bs owns the + * backing_hd reference now */ + bdrv_set_backing_hd(bs, backing_hd, &local_err); + bdrv_unref(backing_hd); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto free_exit; + } + + qdict_del(parent_options, bdref_key); + +free_exit: + g_free(backing_filename); + qobject_unref(tmp_parent_options); + return ret; +} + +static BlockDriverState * +bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + BlockDriverState *parent, const BdrvChildClass *child_class, + BdrvChildRole child_role, bool allow_none, Error **errp) +{ + BlockDriverState *bs = NULL; + QDict *image_options; + char *bdref_key_dot; + const char *reference; + + assert(child_class != NULL); + + bdref_key_dot = g_strdup_printf("%s.", bdref_key); + qdict_extract_subqdict(options, &image_options, bdref_key_dot); + g_free(bdref_key_dot); + + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ + reference = qdict_get_try_str(options, bdref_key); + if (!filename && !reference && !qdict_size(image_options)) { + if (!allow_none) { + error_setg(errp, "A block device must be specified for \"%s\"", + bdref_key); + } + qobject_unref(image_options); + goto done; + } + + bs = bdrv_open_inherit(filename, reference, image_options, 0, + parent, child_class, child_role, errp); + if (!bs) { + goto done; + } + +done: + qdict_del(options, bdref_key); + return bs; +} + +/* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. NULL will be returned, but errp remains unset. + * + * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. + * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * + * The BlockdevRef will be removed from the options QDict. + */ +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + bool allow_none, Error **errp) +{ + BlockDriverState *bs; + + bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, + child_role, allow_none, errp); + if (bs == NULL) { + return NULL; + } + + return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); +} + +/* + * TODO Future callers may need to specify parent/child_class in order for + * option inheritance to work. Existing callers use it for the root node. + */ +BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) +{ + BlockDriverState *bs = NULL; + QObject *obj = NULL; + QDict *qdict = NULL; + const char *reference = NULL; + Visitor *v = NULL; + + if (ref->type == QTYPE_QSTRING) { + reference = ref->u.reference; + } else { + BlockdevOptions *options = &ref->u.definition; + assert(ref->type == QTYPE_QDICT); + + v = qobject_output_visitor_new(&obj); + visit_type_BlockdevOptions(v, NULL, &options, &error_abort); + visit_complete(v, &obj); + + qdict = qobject_to(QDict, obj); + qdict_flatten(qdict); + + /* bdrv_open_inherit() defaults to the values in bdrv_flags (for + * compatibility with other callers) rather than what we want as the + * real defaults. Apply the defaults here instead. */ + qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); + qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); + qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); + + } + + bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); + obj = NULL; + qobject_unref(obj); + visit_free(v); + return bs; +} + +static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + int flags, + QDict *snapshot_options, + Error **errp) +{ + /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ + char *tmp_filename = g_malloc0(PATH_MAX + 1); + int64_t total_size; + QemuOpts *opts = NULL; + BlockDriverState *bs_snapshot = NULL; + Error *local_err = NULL; + int ret; + + /* if snapshot, we create a temporary backing file and open it + instead of opening 'filename' directly */ + + /* Get the required size from the image */ + total_size = bdrv_getlength(bs); + if (total_size < 0) { + error_setg_errno(errp, -total_size, "Could not get image size"); + goto out; + } + + /* Create the temporary image */ + ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not get temporary filename"); + goto out; + } + + opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, + &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); + ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); + qemu_opts_del(opts); + if (ret < 0) { + error_prepend(errp, "Could not create temporary overlay '%s': ", + tmp_filename); + goto out; + } + + /* Prepare options QDict for the temporary file */ + qdict_put_str(snapshot_options, "file.driver", "file"); + qdict_put_str(snapshot_options, "file.filename", tmp_filename); + qdict_put_str(snapshot_options, "driver", "qcow2"); + + bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); + snapshot_options = NULL; + if (!bs_snapshot) { + goto out; + } + + /* bdrv_append() consumes a strong reference to bs_snapshot + * (i.e. it will call bdrv_unref() on it) even on error, so in + * order to be able to return one, we have to increase + * bs_snapshot's refcount here */ + bdrv_ref(bs_snapshot); + bdrv_append(bs_snapshot, bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + bs_snapshot = NULL; + goto out; + } + +out: + qobject_unref(snapshot_options); + g_free(tmp_filename); + return bs_snapshot; +} + +/* + * Opens a disk image (raw, qcow2, vmdk, ...) + * + * options is a QDict of options to pass to the block drivers, or NULL for an + * empty set of options. The reference to the QDict belongs to the block layer + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + * + * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. + * If it is not NULL, the referenced BDS will be reused. + * + * The reference parameter may be used to specify an existing block device which + * should be opened. If specified, neither options nor a filename may be given, + * nor can an existing BDS be reused (that is, *pbs has to be NULL). + */ +static BlockDriverState *bdrv_open_inherit(const char *filename, + const char *reference, + QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp) +{ + int ret; + BlockBackend *file = NULL; + BlockDriverState *bs; + BlockDriver *drv = NULL; + BdrvChild *child; + const char *drvname; + const char *backing; + Error *local_err = NULL; + QDict *snapshot_options = NULL; + int snapshot_flags = 0; + + assert(!child_class || !flags); + assert(!child_class == !parent); + + if (reference) { + bool options_non_empty = options ? qdict_size(options) : false; + qobject_unref(options); + + if (filename || options_non_empty) { + error_setg(errp, "Cannot reference an existing block device with " + "additional options or a new filename"); + return NULL; + } + + bs = bdrv_lookup_bs(reference, reference, errp); + if (!bs) { + return NULL; + } + + bdrv_ref(bs); + return bs; + } + + bs = bdrv_new(); + + /* NULL means an empty set of options */ + if (options == NULL) { + options = qdict_new(); + } + + /* json: syntax counts as explicit options, as if in the QDict */ + parse_json_protocol(options, &filename, &local_err); + if (local_err) { + goto fail; + } + + bs->explicit_options = qdict_clone_shallow(options); + + if (child_class) { + bool parent_is_format; + + if (parent->drv) { + parent_is_format = parent->drv->is_format; + } else { + /* + * parent->drv is not set yet because this node is opened for + * (potential) format probing. That means that @parent is going + * to be a format node. + */ + parent_is_format = true; + } + + bs->inherits_from = parent; + child_class->inherit_options(child_role, parent_is_format, + &flags, options, + parent->open_flags, parent->options); + } + + ret = bdrv_fill_options(&options, filename, &flags, &local_err); + if (ret < 0) { + goto fail; + } + + /* + * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. + * Caution: getting a boolean member of @options requires care. + * When @options come from -blockdev or blockdev_add, members are + * typed according to the QAPI schema, but when they come from + * -drive, they're all QString. + */ + if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && + !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { + flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); + } else { + flags &= ~BDRV_O_RDWR; + } + + if (flags & BDRV_O_SNAPSHOT) { + snapshot_options = qdict_new(); + bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, + flags, options); + /* Let bdrv_backing_options() override "read-only" */ + qdict_del(options, BDRV_OPT_READ_ONLY); + bdrv_inherited_options(BDRV_CHILD_COW, true, + &flags, options, flags, options); + } + + bs->open_flags = flags; + bs->options = options; + options = qdict_clone_shallow(options); + + /* Find the right image format driver */ + /* See cautionary note on accessing @options above */ + drvname = qdict_get_try_str(options, "driver"); + if (drvname) { + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver: '%s'", drvname); + goto fail; + } + } + + assert(drvname || !(flags & BDRV_O_PROTOCOL)); + + /* See cautionary note on accessing @options above */ + backing = qdict_get_try_str(options, "backing"); + if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || + (backing && *backing == '\0')) + { + if (backing) { + warn_report("Use of \"backing\": \"\" is deprecated; " + "use \"backing\": null instead"); + } + flags |= BDRV_O_NO_BACKING; + qdict_del(bs->explicit_options, "backing"); + qdict_del(bs->options, "backing"); + qdict_del(options, "backing"); + } + + /* Open image file without format layer. This BlockBackend is only used for + * probing, the block drivers will do their own bdrv_open_child() for the + * same BDS, which is why we put the node name back into options. */ + if ((flags & BDRV_O_PROTOCOL) == 0) { + BlockDriverState *file_bs; + + file_bs = bdrv_open_child_bs(filename, options, "file", bs, + &child_of_bds, BDRV_CHILD_IMAGE, + true, &local_err); + if (local_err) { + goto fail; + } + if (file_bs != NULL) { + /* Not requesting BLK_PERM_CONSISTENT_READ because we're only + * looking at the header to guess the image format. This works even + * in cases where a guest would not see a consistent state. */ + file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); + bdrv_unref(file_bs); + if (local_err) { + goto fail; + } + + qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); + } + } + + /* Image format probing */ + bs->probed = !drv; + if (!drv && file) { + ret = find_image_format(file, filename, &drv, &local_err); + if (ret < 0) { + goto fail; + } + /* + * This option update would logically belong in bdrv_fill_options(), + * but we first need to open bs->file for the probing to work, while + * opening bs->file already requires the (mostly) final set of options + * so that cache mode etc. can be inherited. + * + * Adding the driver later is somewhat ugly, but it's not an option + * that would ever be inherited, so it's correct. We just need to make + * sure to update both bs->options (which has the full effective + * options for bs) and options (which has file.* already removed). + */ + qdict_put_str(bs->options, "driver", drv->format_name); + qdict_put_str(options, "driver", drv->format_name); + } else if (!drv) { + error_setg(errp, "Must specify either driver or file"); + goto fail; + } + + /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ + assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); + /* file must be NULL if a protocol BDS is about to be created + * (the inverse results in an error message from bdrv_open_common()) */ + assert(!(flags & BDRV_O_PROTOCOL) || !file); + + /* Open the image */ + ret = bdrv_open_common(bs, file, options, &local_err); + if (ret < 0) { + goto fail; + } + + if (file) { + blk_unref(file); + file = NULL; + } + + /* If there is a backing file, use it */ + if ((flags & BDRV_O_NO_BACKING) == 0) { + ret = bdrv_open_backing_file(bs, options, "backing", &local_err); + if (ret < 0) { + goto close_and_fail; + } + } + + /* Remove all children options and references + * from bs->options and bs->explicit_options */ + QLIST_FOREACH(child, &bs->children, next) { + char *child_key_dot; + child_key_dot = g_strdup_printf("%s.", child->name); + qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); + qdict_extract_subqdict(bs->options, NULL, child_key_dot); + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); + g_free(child_key_dot); + } + + /* Check if any unknown options were used */ + if (qdict_size(options) != 0) { + const QDictEntry *entry = qdict_first(options); + if (flags & BDRV_O_PROTOCOL) { + error_setg(errp, "Block protocol '%s' doesn't support the option " + "'%s'", drv->format_name, entry->key); + } else { + error_setg(errp, + "Block format '%s' does not support the option '%s'", + drv->format_name, entry->key); + } + + goto close_and_fail; + } + + bdrv_parent_cb_change_media(bs, true); + + qobject_unref(options); + options = NULL; + + /* For snapshot=on, create a temporary qcow2 overlay. bs points to the + * temporary snapshot afterwards. */ + if (snapshot_flags) { + BlockDriverState *snapshot_bs; + snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, + snapshot_options, &local_err); + snapshot_options = NULL; + if (local_err) { + goto close_and_fail; + } + /* We are not going to return bs but the overlay on top of it + * (snapshot_bs); thus, we have to drop the strong reference to bs + * (which we obtained by calling bdrv_new()). bs will not be deleted, + * though, because the overlay still has a reference to it. */ + bdrv_unref(bs); + bs = snapshot_bs; + } + + return bs; + +fail: + blk_unref(file); + qobject_unref(snapshot_options); + qobject_unref(bs->explicit_options); + qobject_unref(bs->options); + qobject_unref(options); + bs->options = NULL; + bs->explicit_options = NULL; + bdrv_unref(bs); + error_propagate(errp, local_err); + return NULL; + +close_and_fail: + bdrv_unref(bs); + qobject_unref(snapshot_options); + qobject_unref(options); + error_propagate(errp, local_err); + return NULL; +} + +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) +{ + return bdrv_open_inherit(filename, reference, options, flags, NULL, + NULL, 0, errp); +} + +/* Return true if the NULL-terminated @list contains @str */ +static bool is_str_in_list(const char *str, const char *const *list) +{ + if (str && list) { + int i; + for (i = 0; list[i] != NULL; i++) { + if (!strcmp(str, list[i])) { + return true; + } + } + } + return false; +} + +/* + * Check that every option set in @bs->options is also set in + * @new_opts. + * + * Options listed in the common_options list and in + * @bs->drv->mutable_opts are skipped. + * + * Return 0 on success, otherwise return -EINVAL and set @errp. + */ +static int bdrv_reset_options_allowed(BlockDriverState *bs, + const QDict *new_opts, Error **errp) +{ + const QDictEntry *e; + /* These options are common to all block drivers and are handled + * in bdrv_reopen_prepare() so they can be left out of @new_opts */ + const char *const common_options[] = { + "node-name", "discard", "cache.direct", "cache.no-flush", + "read-only", "auto-read-only", "detect-zeroes", NULL + }; + + for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { + if (!qdict_haskey(new_opts, e->key) && + !is_str_in_list(e->key, common_options) && + !is_str_in_list(e->key, bs->drv->mutable_opts)) { + error_setg(errp, "Option '%s' cannot be reset " + "to its default value", e->key); + return -EINVAL; + } + } + + return 0; +} + +/* + * Returns true if @child can be reached recursively from @bs + */ +static bool bdrv_recurse_has_child(BlockDriverState *bs, + BlockDriverState *child) +{ + BdrvChild *c; + + if (bs == child) { + return true; + } + + QLIST_FOREACH(c, &bs->children, next) { + if (bdrv_recurse_has_child(c->bs, child)) { + return true; + } + } + + return false; +} + +/* + * Adds a BlockDriverState to a simple queue for an atomic, transactional + * reopen of multiple devices. + * + * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT + * already performed, or alternatively may be NULL a new BlockReopenQueue will + * be created and initialized. This newly created BlockReopenQueue should be + * passed back in for subsequent calls that are intended to be of the same + * atomic 'set'. + * + * bs is the BlockDriverState to add to the reopen queue. + * + * options contains the changed options for the associated bs + * (the BlockReopenQueue takes ownership) + * + * flags contains the open flags for the associated bs + * + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + */ +static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, + const BdrvChildClass *klass, + BdrvChildRole role, + bool parent_is_format, + QDict *parent_options, + int parent_flags, + bool keep_old_opts) +{ + assert(bs != NULL); + + BlockReopenQueueEntry *bs_entry; + BdrvChild *child; + QDict *old_options, *explicit_options, *options_copy; + int flags; + QemuOpts *opts; + + /* Make sure that the caller remembered to use a drained section. This is + * important to avoid graph changes between the recursive queuing here and + * bdrv_reopen_multiple(). */ + assert(bs->quiesce_counter > 0); + + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QTAILQ_INIT(bs_queue); + } + + if (!options) { + options = qdict_new(); + } + + /* Check if this BlockDriverState is already in the queue */ + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + if (bs == bs_entry->state.bs) { + break; + } + } + + /* + * Precedence of options: + * 1. Explicitly passed in options (highest) + * 2. Retained from explicitly set options of bs + * 3. Inherited from parent node + * 4. Retained from effective options of bs + */ + + /* Old explicitly set values (don't overwrite by inherited value) */ + if (bs_entry || keep_old_opts) { + old_options = qdict_clone_shallow(bs_entry ? + bs_entry->state.explicit_options : + bs->explicit_options); + bdrv_join_options(bs, options, old_options); + qobject_unref(old_options); + } + + explicit_options = qdict_clone_shallow(options); + + /* Inherit from parent node */ + if (parent_options) { + flags = 0; + klass->inherit_options(role, parent_is_format, &flags, options, + parent_flags, parent_options); + } else { + flags = bdrv_get_flags(bs); + } + + if (keep_old_opts) { + /* Old values are used for options that aren't set yet */ + old_options = qdict_clone_shallow(bs->options); + bdrv_join_options(bs, options, old_options); + qobject_unref(old_options); + } + + /* We have the final set of options so let's update the flags */ + options_copy = qdict_clone_shallow(options); + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options_copy, NULL); + update_flags_from_options(&flags, opts); + qemu_opts_del(opts); + qobject_unref(options_copy); + + /* bdrv_open_inherit() sets and clears some additional flags internally */ + flags &= ~BDRV_O_PROTOCOL; + if (flags & BDRV_O_RDWR) { + flags |= BDRV_O_ALLOW_RDWR; + } + + if (!bs_entry) { + bs_entry = g_new0(BlockReopenQueueEntry, 1); + QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); + } else { + qobject_unref(bs_entry->state.options); + qobject_unref(bs_entry->state.explicit_options); + } + + bs_entry->state.bs = bs; + bs_entry->state.options = options; + bs_entry->state.explicit_options = explicit_options; + bs_entry->state.flags = flags; + + /* This needs to be overwritten in bdrv_reopen_prepare() */ + bs_entry->state.perm = UINT64_MAX; + bs_entry->state.shared_perm = 0; + + /* + * If keep_old_opts is false then it means that unspecified + * options must be reset to their original value. We don't allow + * resetting 'backing' but we need to know if the option is + * missing in order to decide if we have to return an error. + */ + if (!keep_old_opts) { + bs_entry->state.backing_missing = + !qdict_haskey(options, "backing") && + !qdict_haskey(options, "backing.driver"); + } + + QLIST_FOREACH(child, &bs->children, next) { + QDict *new_child_options = NULL; + bool child_keep_old = keep_old_opts; + + /* reopen can only change the options of block devices that were + * implicitly created and inherited options. For other (referenced) + * block devices, a syntax like "backing.foo" results in an error. */ + if (child->bs->inherits_from != bs) { + continue; + } + + /* Check if the options contain a child reference */ + if (qdict_haskey(options, child->name)) { + const char *childref = qdict_get_try_str(options, child->name); + /* + * The current child must not be reopened if the child + * reference is null or points to a different node. + */ + if (g_strcmp0(childref, child->bs->node_name)) { + continue; + } + /* + * If the child reference points to the current child then + * reopen it with its existing set of options (note that + * it can still inherit new options from the parent). + */ + child_keep_old = true; + } else { + /* Extract child options ("child-name.*") */ + char *child_key_dot = g_strdup_printf("%s.", child->name); + qdict_extract_subqdict(explicit_options, NULL, child_key_dot); + qdict_extract_subqdict(options, &new_child_options, child_key_dot); + g_free(child_key_dot); + } + + bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, + child->klass, child->role, bs->drv->is_format, + options, flags, child_keep_old); + } + + return bs_queue; +} + +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, bool keep_old_opts) +{ + return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, + NULL, 0, keep_old_opts); +} + +/* + * Reopen multiple BlockDriverStates atomically & transactionally. + * + * The queue passed in (bs_queue) must have been built up previous + * via bdrv_reopen_queue(). + * + * Reopens all BDS specified in the queue, with the appropriate + * flags. All devices are prepared for reopen, and failure of any + * device will cause all device changes to be abandoned, and intermediate + * data cleaned up. + * + * If all devices prepare successfully, then the changes are committed + * to all devices. + * + * All affected nodes must be drained between bdrv_reopen_queue() and + * bdrv_reopen_multiple(). + */ +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) +{ + int ret = -1; + BlockReopenQueueEntry *bs_entry, *next; + + assert(bs_queue != NULL); + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + assert(bs_entry->state.bs->quiesce_counter > 0); + if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) { + goto cleanup; + } + bs_entry->prepared = true; + } + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + BDRVReopenState *state = &bs_entry->state; + ret = bdrv_check_perm(state->bs, bs_queue, state->perm, + state->shared_perm, NULL, NULL, errp); + if (ret < 0) { + goto cleanup_perm; + } + /* Check if new_backing_bs would accept the new permissions */ + if (state->replace_backing_bs && state->new_backing_bs) { + uint64_t nperm, nshared; + bdrv_child_perm(state->bs, state->new_backing_bs, + NULL, bdrv_backing_role(state->bs), + bs_queue, state->perm, state->shared_perm, + &nperm, &nshared); + ret = bdrv_check_update_perm(state->new_backing_bs, NULL, + nperm, nshared, NULL, NULL, errp); + if (ret < 0) { + goto cleanup_perm; + } + } + bs_entry->perms_checked = true; + } + + /* + * If we reach this point, we have success and just need to apply the + * changes. + * + * Reverse order is used to comfort qcow2 driver: on commit it need to write + * IN_USE flag to the image, to mark bitmaps in the image as invalid. But + * children are usually goes after parents in reopen-queue, so go from last + * to first element. + */ + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + bdrv_reopen_commit(&bs_entry->state); + } + + ret = 0; +cleanup_perm: + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + BDRVReopenState *state = &bs_entry->state; + + if (!bs_entry->perms_checked) { + continue; + } + + if (ret == 0) { + bdrv_set_perm(state->bs, state->perm, state->shared_perm); + } else { + bdrv_abort_perm_update(state->bs); + if (state->replace_backing_bs && state->new_backing_bs) { + bdrv_abort_perm_update(state->new_backing_bs); + } + } + } + + if (ret == 0) { + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + BlockDriverState *bs = bs_entry->state.bs; + + if (bs->drv->bdrv_reopen_commit_post) + bs->drv->bdrv_reopen_commit_post(&bs_entry->state); + } + } +cleanup: + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (ret) { + if (bs_entry->prepared) { + bdrv_reopen_abort(&bs_entry->state); + } + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + } + if (bs_entry->state.new_backing_bs) { + bdrv_unref(bs_entry->state.new_backing_bs); + } + g_free(bs_entry); + } + g_free(bs_queue); + + return ret; +} + +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp) +{ + int ret; + BlockReopenQueue *queue; + QDict *opts = qdict_new(); + + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); + + bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, true); + ret = bdrv_reopen_multiple(queue, errp); + bdrv_subtree_drained_end(bs); + + return ret; +} + +static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, + BdrvChild *c) +{ + BlockReopenQueueEntry *entry; + + QTAILQ_FOREACH(entry, q, entry) { + BlockDriverState *bs = entry->state.bs; + BdrvChild *child; + + QLIST_FOREACH(child, &bs->children, next) { + if (child == c) { + return entry; + } + } + } + + return NULL; +} + +static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, + uint64_t *perm, uint64_t *shared) +{ + BdrvChild *c; + BlockReopenQueueEntry *parent; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + parent = find_parent_in_reopen_queue(q, c); + if (!parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } else { + uint64_t nperm, nshared; + + bdrv_child_perm(parent->state.bs, bs, c, c->role, q, + parent->state.perm, parent->state.shared_perm, + &nperm, &nshared); + + cumulative_perms |= nperm; + cumulative_shared_perms &= nshared; + } + } + *perm = cumulative_perms; + *shared = cumulative_shared_perms; +} + +static bool bdrv_reopen_can_attach(BlockDriverState *parent, + BdrvChild *child, + BlockDriverState *new_child, + Error **errp) +{ + AioContext *parent_ctx = bdrv_get_aio_context(parent); + AioContext *child_ctx = bdrv_get_aio_context(new_child); + GSList *ignore; + bool ret; + + ignore = g_slist_prepend(NULL, child); + ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); + g_slist_free(ignore); + if (ret) { + return ret; + } + + ignore = g_slist_prepend(NULL, child); + ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); + g_slist_free(ignore); + return ret; +} + +/* + * Take a BDRVReopenState and check if the value of 'backing' in the + * reopen_state->options QDict is valid or not. + * + * If 'backing' is missing from the QDict then return 0. + * + * If 'backing' contains the node name of the backing file of + * reopen_state->bs then return 0. + * + * If 'backing' contains a different node name (or is null) then check + * whether the current backing file can be replaced with the new one. + * If that's the case then reopen_state->replace_backing_bs is set to + * true and reopen_state->new_backing_bs contains a pointer to the new + * backing BlockDriverState (or NULL). + * + * Return 0 on success, otherwise return < 0 and set @errp. + */ +static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, + Error **errp) +{ + BlockDriverState *bs = reopen_state->bs; + BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; + QObject *value; + const char *str; + + value = qdict_get(reopen_state->options, "backing"); + if (value == NULL) { + return 0; + } + + switch (qobject_type(value)) { + case QTYPE_QNULL: + new_backing_bs = NULL; + break; + case QTYPE_QSTRING: + str = qobject_get_try_str(value); + new_backing_bs = bdrv_lookup_bs(NULL, str, errp); + if (new_backing_bs == NULL) { + return -EINVAL; + } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { + error_setg(errp, "Making '%s' a backing file of '%s' " + "would create a cycle", str, bs->node_name); + return -EINVAL; + } + break; + default: + /* 'backing' does not allow any other data type */ + g_assert_not_reached(); + } + + /* + * Check AioContext compatibility so that the bdrv_set_backing_hd() call in + * bdrv_reopen_commit() won't fail. + */ + if (new_backing_bs) { + if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { + return -EINVAL; + } + } + + /* + * Ensure that @bs can really handle backing files, because we are + * about to give it one (or swap the existing one) + */ + if (bs->drv->is_filter) { + /* Filters always have a file or a backing child */ + if (!bs->backing) { + error_setg(errp, "'%s' is a %s filter node that does not support a " + "backing child", bs->node_name, bs->drv->format_name); + return -EINVAL; + } + } else if (!bs->drv->supports_backing) { + error_setg(errp, "Driver '%s' of node '%s' does not support backing " + "files", bs->drv->format_name, bs->node_name); + return -EINVAL; + } + + /* + * Find the "actual" backing file by skipping all links that point + * to an implicit node, if any (e.g. a commit filter node). + * We cannot use any of the bdrv_skip_*() functions here because + * those return the first explicit node, while we are looking for + * its overlay here. + */ + overlay_bs = bs; + for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); + below_bs && below_bs->implicit; + below_bs = bdrv_filter_or_cow_bs(overlay_bs)) + { + overlay_bs = below_bs; + } + + /* If we want to replace the backing file we need some extra checks */ + if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { + /* Check for implicit nodes between bs and its backing file */ + if (bs != overlay_bs) { + error_setg(errp, "Cannot change backing link if '%s' has " + "an implicit backing file", bs->node_name); + return -EPERM; + } + /* + * Check if the backing link that we want to replace is frozen. + * Note that + * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, + * because we know that overlay_bs == bs, and that @bs + * either is a filter that uses ->backing or a COW format BDS + * with bs->drv->supports_backing == true. + */ + if (bdrv_is_backing_chain_frozen(overlay_bs, + child_bs(overlay_bs->backing), errp)) + { + return -EPERM; + } + reopen_state->replace_backing_bs = true; + if (new_backing_bs) { + bdrv_ref(new_backing_bs); + reopen_state->new_backing_bs = new_backing_bs; + } + } + + return 0; +} + +/* + * Prepares a BlockDriverState for reopen. All changes are staged in the + * 'opaque' field of the BDRVReopenState, which is used and allocated by + * the block driver layer .bdrv_reopen_prepare() + * + * bs is the BlockDriverState to reopen + * flags are the new open flags + * queue is the reopen queue + * + * Returns 0 on success, non-zero on error. On error errp will be set + * as well. + * + * On failure, bdrv_reopen_abort() will be called to clean up any data. + * It is the responsibility of the caller to then call the abort() or + * commit() for any other BDS that have been left in a prepare() state + * + */ +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, + Error **errp) +{ + int ret = -1; + int old_flags; + Error *local_err = NULL; + BlockDriver *drv; + QemuOpts *opts; + QDict *orig_reopen_opts; + char *discard = NULL; + bool read_only; + bool drv_prepared = false; + + assert(reopen_state != NULL); + assert(reopen_state->bs->drv != NULL); + drv = reopen_state->bs->drv; + + /* This function and each driver's bdrv_reopen_prepare() remove + * entries from reopen_state->options as they are processed, so + * we need to make a copy of the original QDict. */ + orig_reopen_opts = qdict_clone_shallow(reopen_state->options); + + /* Process generic block layer options */ + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { + ret = -EINVAL; + goto error; + } + + /* This was already called in bdrv_reopen_queue_child() so the flags + * are up-to-date. This time we simply want to remove the options from + * QemuOpts in order to indicate that they have been processed. */ + old_flags = reopen_state->flags; + update_flags_from_options(&reopen_state->flags, opts); + assert(old_flags == reopen_state->flags); + + discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto error; + } + } + + reopen_state->detect_zeroes = + bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto error; + } + + /* All other options (including node-name and driver) must be unchanged. + * Put them back into the QDict, so that they are checked at the end + * of this function. */ + qemu_opts_to_qdict(opts, reopen_state->options); + + /* If we are to stay read-only, do not allow permission change + * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is + * not set, or if the BDS still has copy_on_read enabled */ + read_only = !(reopen_state->flags & BDRV_O_RDWR); + ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto error; + } + + /* Calculate required permissions after reopening */ + bdrv_reopen_perm(queue, reopen_state->bs, + &reopen_state->perm, &reopen_state->shared_perm); + + ret = bdrv_flush(reopen_state->bs); + if (ret) { + error_setg_errno(errp, -ret, "Error flushing drive"); + goto error; + } + + if (drv->bdrv_reopen_prepare) { + /* + * If a driver-specific option is missing, it means that we + * should reset it to its default value. + * But not all options allow that, so we need to check it first. + */ + ret = bdrv_reset_options_allowed(reopen_state->bs, + reopen_state->options, errp); + if (ret) { + goto error; + } + + ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); + if (ret) { + if (local_err != NULL) { + error_propagate(errp, local_err); + } else { + bdrv_refresh_filename(reopen_state->bs); + error_setg(errp, "failed while preparing to reopen image '%s'", + reopen_state->bs->filename); + } + goto error; + } + } else { + /* It is currently mandatory to have a bdrv_reopen_prepare() + * handler for each supported drv. */ + error_setg(errp, "Block format '%s' used by node '%s' " + "does not support reopening files", drv->format_name, + bdrv_get_device_or_node_name(reopen_state->bs)); + ret = -1; + goto error; + } + + drv_prepared = true; + + /* + * We must provide the 'backing' option if the BDS has a backing + * file or if the image file has a backing file name as part of + * its metadata. Otherwise the 'backing' option can be omitted. + */ + if (drv->supports_backing && reopen_state->backing_missing && + (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { + error_setg(errp, "backing is missing for '%s'", + reopen_state->bs->node_name); + ret = -EINVAL; + goto error; + } + + /* + * Allow changing the 'backing' option. The new value can be + * either a reference to an existing node (using its node name) + * or NULL to simply detach the current backing file. + */ + ret = bdrv_reopen_parse_backing(reopen_state, errp); + if (ret < 0) { + goto error; + } + qdict_del(reopen_state->options, "backing"); + + /* Options that are not handled are only okay if they are unchanged + * compared to the old state. It is expected that some options are only + * used for the initial open, but not reopen (e.g. filename) */ + if (qdict_size(reopen_state->options)) { + const QDictEntry *entry = qdict_first(reopen_state->options); + + do { + QObject *new = entry->value; + QObject *old = qdict_get(reopen_state->bs->options, entry->key); + + /* Allow child references (child_name=node_name) as long as they + * point to the current child (i.e. everything stays the same). */ + if (qobject_type(new) == QTYPE_QSTRING) { + BdrvChild *child; + QLIST_FOREACH(child, &reopen_state->bs->children, next) { + if (!strcmp(child->name, entry->key)) { + break; + } + } + + if (child) { + const char *str = qobject_get_try_str(new); + if (!strcmp(child->bs->node_name, str)) { + continue; /* Found child with this name, skip option */ + } + } + } + + /* + * TODO: When using -drive to specify blockdev options, all values + * will be strings; however, when using -blockdev, blockdev-add or + * filenames using the json:{} pseudo-protocol, they will be + * correctly typed. + * In contrast, reopening options are (currently) always strings + * (because you can only specify them through qemu-io; all other + * callers do not specify any options). + * Therefore, when using anything other than -drive to create a BDS, + * this cannot detect non-string options as unchanged, because + * qobject_is_equal() always returns false for objects of different + * type. In the future, this should be remedied by correctly typing + * all options. For now, this is not too big of an issue because + * the user can simply omit options which cannot be changed anyway, + * so they will stay unchanged. + */ + if (!qobject_is_equal(new, old)) { + error_setg(errp, "Cannot change the option '%s'", entry->key); + ret = -EINVAL; + goto error; + } + } while ((entry = qdict_next(reopen_state->options, entry))); + } + + ret = 0; + + /* Restore the original reopen_state->options QDict */ + qobject_unref(reopen_state->options); + reopen_state->options = qobject_ref(orig_reopen_opts); + +error: + if (ret < 0 && drv_prepared) { + /* drv->bdrv_reopen_prepare() has succeeded, so we need to + * call drv->bdrv_reopen_abort() before signaling an error + * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() + * when the respective bdrv_reopen_prepare() has failed) */ + if (drv->bdrv_reopen_abort) { + drv->bdrv_reopen_abort(reopen_state); + } + } + qemu_opts_del(opts); + qobject_unref(orig_reopen_opts); + g_free(discard); + return ret; +} + +/* + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and + * makes them final by swapping the staging BlockDriverState contents into + * the active BlockDriverState contents. + */ +void bdrv_reopen_commit(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + BlockDriverState *bs; + BdrvChild *child; + + assert(reopen_state != NULL); + bs = reopen_state->bs; + drv = bs->drv; + assert(drv != NULL); + + /* If there are any driver level actions to take */ + if (drv->bdrv_reopen_commit) { + drv->bdrv_reopen_commit(reopen_state); + } + + /* set BDS specific flags now */ + qobject_unref(bs->explicit_options); + qobject_unref(bs->options); + + bs->explicit_options = reopen_state->explicit_options; + bs->options = reopen_state->options; + bs->open_flags = reopen_state->flags; + bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + bs->detect_zeroes = reopen_state->detect_zeroes; + + if (reopen_state->replace_backing_bs) { + qdict_del(bs->explicit_options, "backing"); + qdict_del(bs->options, "backing"); + } + + /* Remove child references from bs->options and bs->explicit_options. + * Child options were already removed in bdrv_reopen_queue_child() */ + QLIST_FOREACH(child, &bs->children, next) { + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); + } + + /* + * Change the backing file if a new one was specified. We do this + * after updating bs->options, so bdrv_refresh_filename() (called + * from bdrv_set_backing_hd()) has the new values. + */ + if (reopen_state->replace_backing_bs) { + BlockDriverState *old_backing_bs = child_bs(bs->backing); + assert(!old_backing_bs || !old_backing_bs->implicit); + /* Abort the permission update on the backing bs we're detaching */ + if (old_backing_bs) { + bdrv_abort_perm_update(old_backing_bs); + } + bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort); + } + + bdrv_refresh_limits(bs, NULL); +} + +/* + * Abort the reopen, and delete and free the staged changes in + * reopen_state + */ +void bdrv_reopen_abort(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + + assert(reopen_state != NULL); + drv = reopen_state->bs->drv; + assert(drv != NULL); + + if (drv->bdrv_reopen_abort) { + drv->bdrv_reopen_abort(reopen_state); + } +} + + +static void bdrv_close(BlockDriverState *bs) +{ + BdrvAioNotifier *ban, *ban_next; + BdrvChild *child, *next; + + assert(!bs->refcnt); + + bdrv_drained_begin(bs); /* complete I/O */ + bdrv_flush(bs); + bdrv_drain(bs); /* in case flush left pending I/O */ + + if (bs->drv) { + if (bs->drv->bdrv_close) { + /* Must unfreeze all children, so bdrv_unref_child() works */ + bs->drv->bdrv_close(bs); + } + bs->drv = NULL; + } + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_unref_child(bs, child); + } + + bs->backing = NULL; + bs->file = NULL; + g_free(bs->opaque); + bs->opaque = NULL; + qatomic_set(&bs->copy_on_read, 0); + bs->backing_file[0] = '\0'; + bs->backing_format[0] = '\0'; + bs->total_sectors = 0; + bs->encrypted = false; + bs->sg = false; + qobject_unref(bs->options); + qobject_unref(bs->explicit_options); + bs->options = NULL; + bs->explicit_options = NULL; + qobject_unref(bs->full_open_options); + bs->full_open_options = NULL; + + bdrv_release_named_dirty_bitmaps(bs); + assert(QLIST_EMPTY(&bs->dirty_bitmaps)); + + QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { + g_free(ban); + } + QLIST_INIT(&bs->aio_notifiers); + bdrv_drained_end(bs); + + /* + * If we're still inside some bdrv_drain_all_begin()/end() sections, end + * them now since this BDS won't exist anymore when bdrv_drain_all_end() + * gets called. + */ + if (bs->quiesce_counter) { + bdrv_drain_all_end_quiesce(bs); + } +} + +void bdrv_close_all(void) +{ + assert(job_next(NULL) == NULL); + blk_exp_close_all(); + + /* Drop references from requests still in flight, such as canceled block + * jobs whose AIO context has not been polled yet */ + bdrv_drain_all(); + + blk_remove_all_bs(); + blockdev_close_all_bdrv_states(); + + assert(QTAILQ_EMPTY(&all_bdrv_states)); +} + +static bool should_update_child(BdrvChild *c, BlockDriverState *to) +{ + GQueue *queue; + GHashTable *found; + bool ret; + + if (c->klass->stay_at_node) { + return false; + } + + /* If the child @c belongs to the BDS @to, replacing the current + * c->bs by @to would mean to create a loop. + * + * Such a case occurs when appending a BDS to a backing chain. + * For instance, imagine the following chain: + * + * guest device -> node A -> further backing chain... + * + * Now we create a new BDS B which we want to put on top of this + * chain, so we first attach A as its backing node: + * + * node B + * | + * v + * guest device -> node A -> further backing chain... + * + * Finally we want to replace A by B. When doing that, we want to + * replace all pointers to A by pointers to B -- except for the + * pointer from B because (1) that would create a loop, and (2) + * that pointer should simply stay intact: + * + * guest device -> node B + * | + * v + * node A -> further backing chain... + * + * In general, when replacing a node A (c->bs) by a node B (@to), + * if A is a child of B, that means we cannot replace A by B there + * because that would create a loop. Silently detaching A from B + * is also not really an option. So overall just leaving A in + * place there is the most sensible choice. + * + * We would also create a loop in any cases where @c is only + * indirectly referenced by @to. Prevent this by returning false + * if @c is found (by breadth-first search) anywhere in the whole + * subtree of @to. + */ + + ret = true; + found = g_hash_table_new(NULL, NULL); + g_hash_table_add(found, to); + queue = g_queue_new(); + g_queue_push_tail(queue, to); + + while (!g_queue_is_empty(queue)) { + BlockDriverState *v = g_queue_pop_head(queue); + BdrvChild *c2; + + QLIST_FOREACH(c2, &v->children, next) { + if (c2 == c) { + ret = false; + break; + } + + if (g_hash_table_contains(found, c2->bs)) { + continue; + } + + g_queue_push_tail(queue, c2->bs); + g_hash_table_add(found, c2->bs); + } + } + + g_queue_free(queue); + g_hash_table_destroy(found); + + return ret; +} + +/* + * With auto_skip=true bdrv_replace_node_common skips updating from parents + * if it creates a parent-child relation loop or if parent is block-job. + * + * With auto_skip=false the error is returned if from has a parent which should + * not be updated. + */ +static void bdrv_replace_node_common(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Error **errp) +{ + BdrvChild *c, *next; + GSList *list = NULL, *p; + uint64_t perm = 0, shared = BLK_PERM_ALL; + int ret; + + /* Make sure that @from doesn't go away until we have successfully attached + * all of its parents to @to. */ + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* Put all parents into @list and calculate their cumulative permissions */ + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->bs == from); + if (!should_update_child(c, to)) { + if (auto_skip) { + continue; + } + error_setg(errp, "Should not change '%s' link to '%s'", + c->name, from->node_name); + goto out; + } + if (c->frozen) { + error_setg(errp, "Cannot change '%s' link to '%s'", + c->name, from->node_name); + goto out; + } + list = g_slist_prepend(list, c); + perm |= c->perm; + shared &= c->shared_perm; + } + + /* Check whether the required permissions can be granted on @to, ignoring + * all BdrvChild in @list so that they can't block themselves. */ + ret = bdrv_check_update_perm(to, NULL, perm, shared, list, NULL, errp); + if (ret < 0) { + bdrv_abort_perm_update(to); + goto out; + } + + /* Now actually perform the change. We performed the permission check for + * all elements of @list at once, so set the permissions all at once at the + * very end. */ + for (p = list; p != NULL; p = p->next) { + c = p->data; + + bdrv_ref(to); + bdrv_replace_child_noperm(c, to); + bdrv_unref(from); + } + + bdrv_get_cumulative_perm(to, &perm, &shared); + bdrv_set_perm(to, perm, shared); + +out: + g_slist_free(list); + bdrv_drained_end(from); + bdrv_unref(from); +} + +void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + Error **errp) +{ + return bdrv_replace_node_common(from, to, true, errp); +} + +/* + * Add new bs contents at the top of an image chain while the chain is + * live, while keeping required fields on the top layer. + * + * This will modify the BlockDriverState fields, and swap contents + * between bs_new and bs_top. Both bs_new and bs_top are modified. + * + * bs_new must not be attached to a BlockBackend. + * + * This function does not create any image files. + * + * bdrv_append() takes ownership of a bs_new reference and unrefs it because + * that's what the callers commonly need. bs_new will be referenced by the old + * parents of bs_top after bdrv_append() returns. If the caller needs to keep a + * reference of its own, it must call bdrv_ref(). + */ +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) +{ + Error *local_err = NULL; + + bdrv_set_backing_hd(bs_new, bs_top, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + bdrv_replace_node(bs_top, bs_new, &local_err); + if (local_err) { + error_propagate(errp, local_err); + bdrv_set_backing_hd(bs_new, NULL, &error_abort); + goto out; + } + + /* bs_new is now referenced by its new parents, we don't need the + * additional reference any more. */ +out: + bdrv_unref(bs_new); +} + +static void bdrv_delete(BlockDriverState *bs) +{ + assert(bdrv_op_blocker_is_empty(bs)); + assert(!bs->refcnt); + + /* remove from list, if necessary */ + if (bs->node_name[0] != '\0') { + QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); + } + QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + + bdrv_close(bs); + + g_free(bs); +} + +/* + * Run consistency checks on an image + * + * Returns 0 if the check could be completed (it doesn't mean that the image is + * free of errors) or -errno when an internal error occurred. The results of the + * check are stored in res. + */ +int coroutine_fn bdrv_co_check(BlockDriverState *bs, + BdrvCheckResult *res, BdrvCheckMode fix) +{ + if (bs->drv == NULL) { + return -ENOMEDIUM; + } + if (bs->drv->bdrv_co_check == NULL) { + return -ENOTSUP; + } + + memset(res, 0, sizeof(*res)); + return bs->drv->bdrv_co_check(bs, res, fix); +} + +/* + * Return values: + * 0 - success + * -EINVAL - backing format specified, but no file + * -ENOSPC - can't update the backing file because no space is left in the + * image file header + * -ENOTSUP - format driver doesn't support changing the backing file + */ +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn) +{ + BlockDriver *drv = bs->drv; + int ret; + + if (!drv) { + return -ENOMEDIUM; + } + + /* Backing file format doesn't make sense without a backing file */ + if (backing_fmt && !backing_file) { + return -EINVAL; + } + + if (warn && backing_file && !backing_fmt) { + warn_report("Deprecated use of backing file without explicit " + "backing format, use of this image requires " + "potentially unsafe format probing"); + } + + if (drv->bdrv_change_backing_file != NULL) { + ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); + } else { + ret = -ENOTSUP; + } + + if (ret == 0) { + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); + pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), + backing_file ?: ""); + } + return ret; +} + +/* + * Finds the first non-filter node above bs in the chain between + * active and bs. The returned node is either an immediate parent of + * bs, or there are only filter nodes between the two. + * + * Returns NULL if bs is not found in active's image chain, + * or if active == bs. + * + * Returns the bottommost base image if bs == NULL. + */ +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs) +{ + bs = bdrv_skip_filters(bs); + active = bdrv_skip_filters(active); + + while (active) { + BlockDriverState *next = bdrv_backing_chain_next(active); + if (bs == next) { + return active; + } + active = next; + } + + return NULL; +} + +/* Given a BDS, searches for the base layer. */ +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + return bdrv_find_overlay(bs, NULL); +} + +/* + * Return true if at least one of the COW (backing) and filter links + * between @bs and @base is frozen. @errp is set if that's the case. + * @base must be reachable from @bs, or NULL. + */ +bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + Error **errp) +{ + BlockDriverState *i; + BdrvChild *child; + + for (i = bs; i != base; i = child_bs(child)) { + child = bdrv_filter_or_cow_child(i); + + if (child && child->frozen) { + error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", + child->name, i->node_name, child->bs->node_name); + return true; + } + } + + return false; +} + +/* + * Freeze all COW (backing) and filter links between @bs and @base. + * If any of the links is already frozen the operation is aborted and + * none of the links are modified. + * @base must be reachable from @bs, or NULL. + * Returns 0 on success. On failure returns < 0 and sets @errp. + */ +int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp) +{ + BlockDriverState *i; + BdrvChild *child; + + if (bdrv_is_backing_chain_frozen(bs, base, errp)) { + return -EPERM; + } + + for (i = bs; i != base; i = child_bs(child)) { + child = bdrv_filter_or_cow_child(i); + if (child && child->bs->never_freeze) { + error_setg(errp, "Cannot freeze '%s' link to '%s'", + child->name, child->bs->node_name); + return -EPERM; + } + } + + for (i = bs; i != base; i = child_bs(child)) { + child = bdrv_filter_or_cow_child(i); + if (child) { + child->frozen = true; + } + } + + return 0; +} + +/* + * Unfreeze all COW (backing) and filter links between @bs and @base. + * The caller must ensure that all links are frozen before using this + * function. + * @base must be reachable from @bs, or NULL. + */ +void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) +{ + BlockDriverState *i; + BdrvChild *child; + + for (i = bs; i != base; i = child_bs(child)) { + child = bdrv_filter_or_cow_child(i); + if (child) { + assert(child->frozen); + child->frozen = false; + } + } +} + +/* + * Drops images above 'base' up to and including 'top', and sets the image + * above 'top' to have base as its backing file. + * + * Requires that the overlay to 'top' is opened r/w, so that the backing file + * information in 'bs' can be properly updated. + * + * E.g., this will convert the following chain: + * bottom <- base <- intermediate <- top <- active + * + * to + * + * bottom <- base <- active + * + * It is allowed for bottom==base, in which case it converts: + * + * base <- intermediate <- top <- active + * + * to + * + * base <- active + * + * If backing_file_str is non-NULL, it will be used when modifying top's + * overlay image metadata. + * + * Error conditions: + * if active == top, that is considered an error + * + */ +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str) +{ + BlockDriverState *explicit_top = top; + bool update_inherits_from; + BdrvChild *c; + Error *local_err = NULL; + int ret = -EIO; + g_autoptr(GSList) updated_children = NULL; + GSList *p; + + bdrv_ref(top); + bdrv_subtree_drained_begin(top); + + if (!top->drv || !base->drv) { + goto exit; + } + + /* Make sure that base is in the backing chain of top */ + if (!bdrv_chain_contains(top, base)) { + goto exit; + } + + /* If 'base' recursively inherits from 'top' then we should set + * base->inherits_from to top->inherits_from after 'top' and all + * other intermediate nodes have been dropped. + * If 'top' is an implicit node (e.g. "commit_top") we should skip + * it because no one inherits from it. We use explicit_top for that. */ + explicit_top = bdrv_skip_implicit_filters(explicit_top); + update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); + + /* success - we can delete the intermediate states, and link top->base */ + /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once + * we've figured out how they should work. */ + if (!backing_file_str) { + bdrv_refresh_filename(base); + backing_file_str = base->filename; + } + + QLIST_FOREACH(c, &top->parents, next_parent) { + updated_children = g_slist_prepend(updated_children, c); + } + + bdrv_replace_node_common(top, base, false, &local_err); + if (local_err) { + error_report_err(local_err); + goto exit; + } + + for (p = updated_children; p; p = p->next) { + c = p->data; + + if (c->klass->update_filename) { + ret = c->klass->update_filename(c, base, backing_file_str, + &local_err); + if (ret < 0) { + /* + * TODO: Actually, we want to rollback all previous iterations + * of this loop, and (which is almost impossible) previous + * bdrv_replace_node()... + * + * Note, that c->klass->update_filename may lead to permission + * update, so it's a bad idea to call it inside permission + * update transaction of bdrv_replace_node. + */ + error_report_err(local_err); + goto exit; + } + } + } + + if (update_inherits_from) { + base->inherits_from = explicit_top->inherits_from; + } + + ret = 0; +exit: + bdrv_subtree_drained_end(top); + bdrv_unref(top); + return ret; +} + +/** + * Implementation of BlockDriver.bdrv_get_allocated_file_size() that + * sums the size of all data-bearing children. (This excludes backing + * children.) + */ +static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) +{ + BdrvChild *child; + int64_t child_size, sum = 0; + + QLIST_FOREACH(child, &bs->children, next) { + if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | + BDRV_CHILD_FILTERED)) + { + child_size = bdrv_get_allocated_file_size(child->bs); + if (child_size < 0) { + return child_size; + } + sum += child_size; + } + } + + return sum; +} + +/** + * Length of a allocated file in bytes. Sparse files are counted by actual + * allocated space. Return < 0 if error or unknown. + */ +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_get_allocated_file_size) { + return drv->bdrv_get_allocated_file_size(bs); + } + + if (drv->bdrv_file_open) { + /* + * Protocol drivers default to -ENOTSUP (most of their data is + * not stored in any of their children (if they even have any), + * so there is no generic way to figure it out). + */ + return -ENOTSUP; + } else if (drv->is_filter) { + /* Filter drivers default to the size of their filtered child */ + return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); + } else { + /* Other drivers default to summing their children's sizes */ + return bdrv_sum_allocated_file_size(bs); + } +} + +/* + * bdrv_measure: + * @drv: Format driver + * @opts: Creation options for new image + * @in_bs: Existing image containing data for new image (may be NULL) + * @errp: Error object + * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) + * or NULL on error + * + * Calculate file size required to create a new image. + * + * If @in_bs is given then space for allocated clusters and zero clusters + * from that image are included in the calculation. If @opts contains a + * backing file that is shared by @in_bs then backing clusters may be omitted + * from the calculation. + * + * If @in_bs is NULL then the calculation includes no allocated clusters + * unless a preallocation option is given in @opts. + * + * Note that @in_bs may use a different BlockDriver from @drv. + * + * If an error occurs the @errp pointer is set. + */ +BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, + BlockDriverState *in_bs, Error **errp) +{ + if (!drv->bdrv_measure) { + error_setg(errp, "Block driver '%s' does not support size measurement", + drv->format_name); + return NULL; + } + + return drv->bdrv_measure(opts, in_bs, errp); +} + +/** + * Return number of sectors on success, -errno on error. + */ +int64_t bdrv_nb_sectors(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + + if (!drv) + return -ENOMEDIUM; + + if (drv->has_variable_length) { + int ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + return ret; + } + } + return bs->total_sectors; +} + +/** + * Return length in bytes on success, -errno on error. + * The length is always a multiple of BDRV_SECTOR_SIZE. + */ +int64_t bdrv_getlength(BlockDriverState *bs) +{ + int64_t ret = bdrv_nb_sectors(bs); + + if (ret < 0) { + return ret; + } + if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { + return -EFBIG; + } + return ret * BDRV_SECTOR_SIZE; +} + +/* return 0 as number of sectors if no device present or error */ +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) +{ + int64_t nb_sectors = bdrv_nb_sectors(bs); + + *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; +} + +bool bdrv_is_sg(BlockDriverState *bs) +{ + return bs->sg; +} + +/** + * Return whether the given node supports compressed writes. + */ +bool bdrv_supports_compressed_writes(BlockDriverState *bs) +{ + BlockDriverState *filtered; + + if (!bs->drv || !block_driver_can_compress(bs->drv)) { + return false; + } + + filtered = bdrv_filter_bs(bs); + if (filtered) { + /* + * Filters can only forward compressed writes, so we have to + * check the child. + */ + return bdrv_supports_compressed_writes(filtered); + } + + return true; +} + +const char *bdrv_get_format_name(BlockDriverState *bs) +{ + return bs->drv ? bs->drv->format_name : NULL; +} + +static int qsort_strcmp(const void *a, const void *b) +{ + return strcmp(*(char *const *)a, *(char *const *)b); +} + +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque, bool read_only) +{ + BlockDriver *drv; + int count = 0; + int i; + const char **formats = NULL; + + QLIST_FOREACH(drv, &bdrv_drivers, list) { + if (drv->format_name) { + bool found = false; + int i = count; + + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { + continue; + } + + while (formats && i && !found) { + found = !strcmp(formats[--i], drv->format_name); + } + + if (!found) { + formats = g_renew(const char *, formats, count + 1); + formats[count++] = drv->format_name; + } + } + } + + for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { + const char *format_name = block_driver_modules[i].format_name; + + if (format_name) { + bool found = false; + int j = count; + + if (use_bdrv_whitelist && + !bdrv_format_is_whitelisted(format_name, read_only)) { + continue; + } + + while (formats && j && !found) { + found = !strcmp(formats[--j], format_name); + } + + if (!found) { + formats = g_renew(const char *, formats, count + 1); + formats[count++] = format_name; + } + } + } + + qsort(formats, count, sizeof(formats[0]), qsort_strcmp); + + for (i = 0; i < count; i++) { + it(opaque, formats[i]); + } + + g_free(formats); +} + +/* This function is to find a node in the bs graph */ +BlockDriverState *bdrv_find_node(const char *node_name) +{ + BlockDriverState *bs; + + assert(node_name); + + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + if (!strcmp(node_name, bs->node_name)) { + return bs; + } + } + return NULL; +} + +/* Put this QMP function here so it can access the static graph_bdrv_states. */ +BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, + Error **errp) +{ + BlockDeviceInfoList *list; + BlockDriverState *bs; + + list = NULL; + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); + if (!info) { + qapi_free_BlockDeviceInfoList(list); + return NULL; + } + QAPI_LIST_PREPEND(list, info); + } + + return list; +} + +typedef struct XDbgBlockGraphConstructor { + XDbgBlockGraph *graph; + GHashTable *graph_nodes; +} XDbgBlockGraphConstructor; + +static XDbgBlockGraphConstructor *xdbg_graph_new(void) +{ + XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); + + gr->graph = g_new0(XDbgBlockGraph, 1); + gr->graph_nodes = g_hash_table_new(NULL, NULL); + + return gr; +} + +static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) +{ + XDbgBlockGraph *graph = gr->graph; + + g_hash_table_destroy(gr->graph_nodes); + g_free(gr); + + return graph; +} + +static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) +{ + uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); + + if (ret != 0) { + return ret; + } + + /* + * Start counting from 1, not 0, because 0 interferes with not-found (NULL) + * answer of g_hash_table_lookup. + */ + ret = g_hash_table_size(gr->graph_nodes) + 1; + g_hash_table_insert(gr->graph_nodes, node, (void *)ret); + + return ret; +} + +static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, + XDbgBlockGraphNodeType type, const char *name) +{ + XDbgBlockGraphNode *n; + + n = g_new0(XDbgBlockGraphNode, 1); + + n->id = xdbg_graph_node_num(gr, node); + n->type = type; + n->name = g_strdup(name); + + QAPI_LIST_PREPEND(gr->graph->nodes, n); +} + +static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, + const BdrvChild *child) +{ + BlockPermission qapi_perm; + XDbgBlockGraphEdge *edge; + + edge = g_new0(XDbgBlockGraphEdge, 1); + + edge->parent = xdbg_graph_node_num(gr, parent); + edge->child = xdbg_graph_node_num(gr, child->bs); + edge->name = g_strdup(child->name); + + for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { + uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); + + if (flag & child->perm) { + QAPI_LIST_PREPEND(edge->perm, qapi_perm); + } + if (flag & child->shared_perm) { + QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); + } + } + + QAPI_LIST_PREPEND(gr->graph->edges, edge); +} + + +XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) +{ + BlockBackend *blk; + BlockJob *job; + BlockDriverState *bs; + BdrvChild *child; + XDbgBlockGraphConstructor *gr = xdbg_graph_new(); + + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { + char *allocated_name = NULL; + const char *name = blk_name(blk); + + if (!*name) { + name = allocated_name = blk_get_attached_dev_id(blk); + } + xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, + name); + g_free(allocated_name); + if (blk_root(blk)) { + xdbg_graph_add_edge(gr, blk, blk_root(blk)); + } + } + + for (job = block_job_next(NULL); job; job = block_job_next(job)) { + GSList *el; + + xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, + job->job.id); + for (el = job->nodes; el; el = el->next) { + xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); + } + } + + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, + bs->node_name); + QLIST_FOREACH(child, &bs->children, next) { + xdbg_graph_add_edge(gr, bs, child); + } + } + + return xdbg_graph_finalize(gr); +} + +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp) +{ + BlockBackend *blk; + BlockDriverState *bs; + + if (device) { + blk = blk_by_name(device); + + if (blk) { + bs = blk_bs(blk); + if (!bs) { + error_setg(errp, "Device '%s' has no medium", device); + } + + return bs; + } + } + + if (node_name) { + bs = bdrv_find_node(node_name); + + if (bs) { + return bs; + } + } + + error_setg(errp, "Cannot find device=%s nor node_name=%s", + device ? device : "", + node_name ? node_name : ""); + return NULL; +} + +/* If 'base' is in the same chain as 'top', return true. Otherwise, + * return false. If either argument is NULL, return false. */ +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) +{ + while (top && top != base) { + top = bdrv_filter_or_cow_bs(top); + } + + return top != NULL; +} + +BlockDriverState *bdrv_next_node(BlockDriverState *bs) +{ + if (!bs) { + return QTAILQ_FIRST(&graph_bdrv_states); + } + return QTAILQ_NEXT(bs, node_list); +} + +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) +{ + if (!bs) { + return QTAILQ_FIRST(&all_bdrv_states); + } + return QTAILQ_NEXT(bs, bs_list); +} + +const char *bdrv_get_node_name(const BlockDriverState *bs) +{ + return bs->node_name; +} + +const char *bdrv_get_parent_name(const BlockDriverState *bs) +{ + BdrvChild *c; + const char *name; + + /* If multiple parents have a name, just pick the first one. */ + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->klass->get_name) { + name = c->klass->get_name(c); + if (name && *name) { + return name; + } + } + } + + return NULL; +} + +/* TODO check what callers really want: bs->node_name or blk_name() */ +const char *bdrv_get_device_name(const BlockDriverState *bs) +{ + return bdrv_get_parent_name(bs) ?: ""; +} + +/* This can be used to identify nodes that might not have a device + * name associated. Since node and device names live in the same + * namespace, the result is unambiguous. The exception is if both are + * absent, then this returns an empty (non-null) string. */ +const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) +{ + return bdrv_get_parent_name(bs) ?: bs->node_name; +} + +int bdrv_get_flags(BlockDriverState *bs) +{ + return bs->open_flags; +} + +int bdrv_has_zero_init_1(BlockDriverState *bs) +{ + return 1; +} + +int bdrv_has_zero_init(BlockDriverState *bs) +{ + BlockDriverState *filtered; + + if (!bs->drv) { + return 0; + } + + /* If BS is a copy on write image, it is initialized to + the contents of the base image, which may not be zeroes. */ + if (bdrv_cow_child(bs)) { + return 0; + } + if (bs->drv->bdrv_has_zero_init) { + return bs->drv->bdrv_has_zero_init(bs); + } + + filtered = bdrv_filter_bs(bs); + if (filtered) { + return bdrv_has_zero_init(filtered); + } + + /* safe default */ + return 0; +} + +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) +{ + if (!(bs->open_flags & BDRV_O_UNMAP)) { + return false; + } + + return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; +} + +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size) +{ + pstrcpy(filename, filename_size, bs->backing_file); +} + +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BlockDriver *drv = bs->drv; + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ + if (!drv) { + return -ENOMEDIUM; + } + if (!drv->bdrv_get_info) { + BlockDriverState *filtered = bdrv_filter_bs(bs); + if (filtered) { + return bdrv_get_info(filtered, bdi); + } + return -ENOTSUP; + } + memset(bdi, 0, sizeof(*bdi)); + return drv->bdrv_get_info(bs, bdi); +} + +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, + Error **errp) +{ + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_get_specific_info) { + return drv->bdrv_get_specific_info(bs, errp); + } + return NULL; +} + +BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (!drv || !drv->bdrv_get_specific_stats) { + return NULL; + } + return drv->bdrv_get_specific_stats(bs); +} + +void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) +{ + if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { + return; + } + + bs->drv->bdrv_debug_event(bs, event); +} + +static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { + bs = bdrv_primary_bs(bs); + } + + if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { + assert(bs->drv->bdrv_debug_remove_breakpoint); + return bs; + } + + return NULL; +} + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + bs = bdrv_find_debug_node(bs); + if (bs) { + return bs->drv->bdrv_debug_breakpoint(bs, event, tag); + } + + return -ENOTSUP; +} + +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) +{ + bs = bdrv_find_debug_node(bs); + if (bs) { + return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); + } + + return -ENOTSUP; +} + +int bdrv_debug_resume(BlockDriverState *bs, const char *tag) +{ + while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { + bs = bdrv_primary_bs(bs); + } + + if (bs && bs->drv && bs->drv->bdrv_debug_resume) { + return bs->drv->bdrv_debug_resume(bs, tag); + } + + return -ENOTSUP; +} + +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { + bs = bdrv_primary_bs(bs); + } + + if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { + return bs->drv->bdrv_debug_is_suspended(bs, tag); + } + + return false; +} + +/* backing_file can either be relative, or absolute, or a protocol. If it is + * relative, it must be relative to the chain. So, passing in bs->filename + * from a BDS as backing_file should not be done, as that may be relative to + * the CWD rather than the chain. */ +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file) +{ + char *filename_full = NULL; + char *backing_file_full = NULL; + char *filename_tmp = NULL; + int is_protocol = 0; + bool filenames_refreshed = false; + BlockDriverState *curr_bs = NULL; + BlockDriverState *retval = NULL; + BlockDriverState *bs_below; + + if (!bs || !bs->drv || !backing_file) { + return NULL; + } + + filename_full = g_malloc(PATH_MAX); + backing_file_full = g_malloc(PATH_MAX); + + is_protocol = path_has_protocol(backing_file); + + /* + * Being largely a legacy function, skip any filters here + * (because filters do not have normal filenames, so they cannot + * match anyway; and allowing json:{} filenames is a bit out of + * scope). + */ + for (curr_bs = bdrv_skip_filters(bs); + bdrv_cow_child(curr_bs) != NULL; + curr_bs = bs_below) + { + bs_below = bdrv_backing_chain_next(curr_bs); + + if (bdrv_backing_overridden(curr_bs)) { + /* + * If the backing file was overridden, we can only compare + * directly against the backing node's filename. + */ + + if (!filenames_refreshed) { + /* + * This will automatically refresh all of the + * filenames in the rest of the backing chain, so we + * only need to do this once. + */ + bdrv_refresh_filename(bs_below); + filenames_refreshed = true; + } + + if (strcmp(backing_file, bs_below->filename) == 0) { + retval = bs_below; + break; + } + } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { + /* + * If either of the filename paths is actually a protocol, then + * compare unmodified paths; otherwise make paths relative. + */ + char *backing_file_full_ret; + + if (strcmp(backing_file, curr_bs->backing_file) == 0) { + retval = bs_below; + break; + } + /* Also check against the full backing filename for the image */ + backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, + NULL); + if (backing_file_full_ret) { + bool equal = strcmp(backing_file, backing_file_full_ret) == 0; + g_free(backing_file_full_ret); + if (equal) { + retval = bs_below; + break; + } + } + } else { + /* If not an absolute filename path, make it relative to the current + * image's filename path */ + filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, + NULL); + /* We are going to compare canonicalized absolute pathnames */ + if (!filename_tmp || !realpath(filename_tmp, filename_full)) { + g_free(filename_tmp); + continue; + } + g_free(filename_tmp); + + /* We need to make sure the backing filename we are comparing against + * is relative to the current image filename (or absolute) */ + filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); + if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { + g_free(filename_tmp); + continue; + } + g_free(filename_tmp); + + if (strcmp(backing_file_full, filename_full) == 0) { + retval = bs_below; + break; + } + } + } + + g_free(filename_full); + g_free(backing_file_full); + return retval; +} + +void bdrv_init(void) +{ + module_call_init(MODULE_INIT_BLOCK); +} + +void bdrv_init_with_whitelist(void) +{ + use_bdrv_whitelist = 1; + bdrv_init(); +} + +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) +{ + BdrvChild *child, *parent; + uint64_t perm, shared_perm; + Error *local_err = NULL; + int ret; + BdrvDirtyBitmap *bm; + + if (!bs->drv) { + return -ENOMEDIUM; + } + + QLIST_FOREACH(child, &bs->children, next) { + bdrv_co_invalidate_cache(child->bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + } + + /* + * Update permissions, they may differ for inactive nodes. + * + * Note that the required permissions of inactive images are always a + * subset of the permissions required after activating the image. This + * allows us to just get the permissions upfront without restricting + * drv->bdrv_invalidate_cache(). + * + * It also means that in error cases, we don't have to try and revert to + * the old permissions (which is an operation that could fail, too). We can + * just keep the extended permissions for the next time that an activation + * of the image is tried. + */ + if (bs->open_flags & BDRV_O_INACTIVE) { + bs->open_flags &= ~BDRV_O_INACTIVE; + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp); + if (ret < 0) { + bdrv_abort_perm_update(bs); + bs->open_flags |= BDRV_O_INACTIVE; + return ret; + } + bdrv_set_perm(bs, perm, shared_perm); + + if (bs->drv->bdrv_co_invalidate_cache) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return -EINVAL; + } + } + + FOR_EACH_DIRTY_BITMAP(bs, bm) { + bdrv_dirty_bitmap_skip_store(bm, false); + } + + ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + return ret; + } + } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->klass->activate) { + parent->klass->activate(parent, &local_err); + if (local_err) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return -EINVAL; + } + } + } + + return 0; +} + +void bdrv_invalidate_cache_all(Error **errp) +{ + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_invalidate_cache(bs, errp); + aio_context_release(aio_context); + if (ret < 0) { + bdrv_next_cleanup(&it); + return; + } + } +} + +static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) +{ + BdrvChild *parent; + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->klass->parent_is_bds) { + BlockDriverState *parent_bs = parent->opaque; + if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { + return true; + } + } + } + + return false; +} + +static int bdrv_inactivate_recurse(BlockDriverState *bs) +{ + BdrvChild *child, *parent; + bool tighten_restrictions; + uint64_t perm, shared_perm; + int ret; + + if (!bs->drv) { + return -ENOMEDIUM; + } + + /* Make sure that we don't inactivate a child before its parent. + * It will be covered by recursion from the yet active parent. */ + if (bdrv_has_bds_parent(bs, true)) { + return 0; + } + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + + /* Inactivate this node */ + if (bs->drv->bdrv_inactivate) { + ret = bs->drv->bdrv_inactivate(bs); + if (ret < 0) { + return ret; + } + } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->klass->inactivate) { + ret = parent->klass->inactivate(parent); + if (ret < 0) { + return ret; + } + } + } + + bs->open_flags |= BDRV_O_INACTIVE; + + /* Update permissions, they may differ for inactive nodes */ + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(bs); + } else { + bdrv_set_perm(bs, perm, shared_perm); + } + + + /* Recursively inactivate children */ + QLIST_FOREACH(child, &bs->children, next) { + ret = bdrv_inactivate_recurse(child->bs); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +int bdrv_inactivate_all(void) +{ + BlockDriverState *bs = NULL; + BdrvNextIterator it; + int ret = 0; + GSList *aio_ctxs = NULL, *ctx; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + if (!g_slist_find(aio_ctxs, aio_context)) { + aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); + aio_context_acquire(aio_context); + } + } + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Nodes with BDS parents are covered by recursion from the last + * parent that gets inactivated. Don't inactivate them a second + * time if that has already happened. */ + if (bdrv_has_bds_parent(bs, false)) { + continue; + } + ret = bdrv_inactivate_recurse(bs); + if (ret < 0) { + bdrv_next_cleanup(&it); + goto out; + } + } + +out: + for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { + AioContext *aio_context = ctx->data; + aio_context_release(aio_context); + } + g_slist_free(aio_ctxs); + + return ret; +} + +/**************************************************************/ +/* removable device support */ + +/** + * Return TRUE if the media is present + */ +bool bdrv_is_inserted(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *child; + + if (!drv) { + return false; + } + if (drv->bdrv_is_inserted) { + return drv->bdrv_is_inserted(bs); + } + QLIST_FOREACH(child, &bs->children, next) { + if (!bdrv_is_inserted(child->bs)) { + return false; + } + } + return true; +} + +/** + * If eject_flag is TRUE, eject the media. Otherwise, close the tray + */ +void bdrv_eject(BlockDriverState *bs, bool eject_flag) +{ + BlockDriver *drv = bs->drv; + + if (drv && drv->bdrv_eject) { + drv->bdrv_eject(bs, eject_flag); + } +} + +/** + * Lock or unlock the media (if it is locked, the user won't be able + * to eject it manually). + */ +void bdrv_lock_medium(BlockDriverState *bs, bool locked) +{ + BlockDriver *drv = bs->drv; + + trace_bdrv_lock_medium(bs, locked); + + if (drv && drv->bdrv_lock_medium) { + drv->bdrv_lock_medium(bs, locked); + } +} + +/* Get a reference to bs */ +void bdrv_ref(BlockDriverState *bs) +{ + bs->refcnt++; +} + +/* Release a previously grabbed reference to bs. + * If after releasing, reference count is zero, the BlockDriverState is + * deleted. */ +void bdrv_unref(BlockDriverState *bs) +{ + if (!bs) { + return; + } + assert(bs->refcnt > 0); + if (--bs->refcnt == 0) { + bdrv_delete(bs); + } +} + +struct BdrvOpBlocker { + Error *reason; + QLIST_ENTRY(BdrvOpBlocker) list; +}; + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) +{ + BdrvOpBlocker *blocker; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + if (!QLIST_EMPTY(&bs->op_blockers[op])) { + blocker = QLIST_FIRST(&bs->op_blockers[op]); + error_propagate_prepend(errp, error_copy(blocker->reason), + "Node '%s' is busy: ", + bdrv_get_device_or_node_name(bs)); + return true; + } + return false; +} + +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) +{ + BdrvOpBlocker *blocker; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + + blocker = g_new0(BdrvOpBlocker, 1); + blocker->reason = reason; + QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); +} + +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) +{ + BdrvOpBlocker *blocker, *next; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { + if (blocker->reason == reason) { + QLIST_REMOVE(blocker, list); + g_free(blocker); + } + } +} + +void bdrv_op_block_all(BlockDriverState *bs, Error *reason) +{ + int i; + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + bdrv_op_block(bs, i, reason); + } +} + +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) +{ + int i; + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + bdrv_op_unblock(bs, i, reason); + } +} + +bool bdrv_op_blocker_is_empty(BlockDriverState *bs) +{ + int i; + + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + if (!QLIST_EMPTY(&bs->op_blockers[i])) { + return false; + } + } + return true; +} + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, bool quiet, + Error **errp) +{ + QemuOptsList *create_opts = NULL; + QemuOpts *opts = NULL; + const char *backing_fmt, *backing_file; + int64_t size; + BlockDriver *drv, *proto_drv; + Error *local_err = NULL; + int ret = 0; + + /* Find driver and parse its options */ + drv = bdrv_find_format(fmt); + if (!drv) { + error_setg(errp, "Unknown file format '%s'", fmt); + return; + } + + proto_drv = bdrv_find_protocol(filename, true, errp); + if (!proto_drv) { + return; + } + + if (!drv->create_opts) { + error_setg(errp, "Format driver '%s' does not support image creation", + drv->format_name); + return; + } + + if (!proto_drv->create_opts) { + error_setg(errp, "Protocol driver '%s' does not support image creation", + proto_drv->format_name); + return; + } + + /* Create parameter list */ + create_opts = qemu_opts_append(create_opts, drv->create_opts); + create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); + + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + + /* Parse -o options */ + if (options) { + if (!qemu_opts_do_parse(opts, options, NULL, errp)) { + goto out; + } + } + + if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); + } else if (img_size != UINT64_C(-1)) { + error_setg(errp, "The image size must be specified only once"); + goto out; + } + + if (base_filename) { + if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, + NULL)) { + error_setg(errp, "Backing file not supported for file format '%s'", + fmt); + goto out; + } + } + + if (base_fmt) { + if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { + error_setg(errp, "Backing file format not supported for file " + "format '%s'", fmt); + goto out; + } + } + + backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); + if (backing_file) { + if (!strcmp(filename, backing_file)) { + error_setg(errp, "Error: Trying to create an image with the " + "same filename as the backing file"); + goto out; + } + if (backing_file[0] == '\0') { + error_setg(errp, "Expected backing file name, got empty string"); + goto out; + } + } + + backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); + + /* The size for the image must always be specified, unless we have a backing + * file and we have not been forbidden from opening it. */ + size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); + if (backing_file && !(flags & BDRV_O_NO_BACKING)) { + BlockDriverState *bs; + char *full_backing; + int back_flags; + QDict *backing_options = NULL; + + full_backing = + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + &local_err); + if (local_err) { + goto out; + } + assert(full_backing); + + /* backing files always opened read-only */ + back_flags = flags; + back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + + backing_options = qdict_new(); + if (backing_fmt) { + qdict_put_str(backing_options, "driver", backing_fmt); + } + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); + + bs = bdrv_open(full_backing, NULL, backing_options, back_flags, + &local_err); + g_free(full_backing); + if (!bs) { + error_append_hint(&local_err, "Could not open backing image.\n"); + goto out; + } else { + if (!backing_fmt) { + warn_report("Deprecated use of backing file without explicit " + "backing format (detected format of %s)", + bs->drv->format_name); + if (bs->drv != &bdrv_raw) { + /* + * A probe of raw deserves the most attention: + * leaving the backing format out of the image + * will ensure bs->probed is set (ensuring we + * don't accidentally commit into the backing + * file), and allow more spots to warn the users + * to fix their toolchain when opening this image + * later. For other images, we can safely record + * the format that we probed. + */ + backing_fmt = bs->drv->format_name; + qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, + NULL); + } + } + if (size == -1) { + /* Opened BS, have no size */ + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "Could not get size of '%s'", + backing_file); + bdrv_unref(bs); + goto out; + } + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); + } + bdrv_unref(bs); + } + /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ + } else if (backing_file && !backing_fmt) { + warn_report("Deprecated use of unopened backing file without " + "explicit backing format, use of this image requires " + "potentially unsafe format probing"); + } + + if (size == -1) { + error_setg(errp, "Image creation needs a size parameter"); + goto out; + } + + if (!quiet) { + printf("Formatting '%s', fmt=%s ", filename, fmt); + qemu_opts_print(opts, " "); + puts(""); + fflush(stdout); + } + + ret = bdrv_create(drv, filename, opts, &local_err); + + if (ret == -EFBIG) { + /* This is generally a better message than whatever the driver would + * deliver (especially because of the cluster_size_hint), since that + * is most probably not much different from "image too large". */ + const char *cluster_size_hint = ""; + if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { + cluster_size_hint = " (try using a larger cluster size)"; + } + error_setg(errp, "The image size is too large for file format '%s'" + "%s", fmt, cluster_size_hint); + error_free(local_err); + local_err = NULL; + } + +out: + qemu_opts_del(opts); + qemu_opts_free(create_opts); + error_propagate(errp, local_err); +} + +AioContext *bdrv_get_aio_context(BlockDriverState *bs) +{ + return bs ? bs->aio_context : qemu_get_aio_context(); +} + +AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) +{ + Coroutine *self = qemu_coroutine_self(); + AioContext *old_ctx = qemu_coroutine_get_aio_context(self); + AioContext *new_ctx; + + /* + * Increase bs->in_flight to ensure that this operation is completed before + * moving the node to a different AioContext. Read new_ctx only afterwards. + */ + bdrv_inc_in_flight(bs); + + new_ctx = bdrv_get_aio_context(bs); + aio_co_reschedule_self(new_ctx); + return old_ctx; +} + +void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) +{ + aio_co_reschedule_self(old_ctx); + bdrv_dec_in_flight(bs); +} + +void coroutine_fn bdrv_co_lock(BlockDriverState *bs) +{ + AioContext *ctx = bdrv_get_aio_context(bs); + + /* In the main thread, bs->aio_context won't change concurrently */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + /* + * We're in coroutine context, so we already hold the lock of the main + * loop AioContext. Don't lock it twice to avoid deadlocks. + */ + assert(qemu_in_coroutine()); + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } +} + +void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) +{ + AioContext *ctx = bdrv_get_aio_context(bs); + + assert(qemu_in_coroutine()); + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } +} + +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) +{ + aio_co_enter(bdrv_get_aio_context(bs), co); +} + +static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) +{ + QLIST_REMOVE(ban, list); + g_free(ban); +} + +static void bdrv_detach_aio_context(BlockDriverState *bs) +{ + BdrvAioNotifier *baf, *baf_tmp; + + assert(!bs->walking_aio_notifiers); + bs->walking_aio_notifiers = true; + QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { + if (baf->deleted) { + bdrv_do_remove_aio_context_notifier(baf); + } else { + baf->detach_aio_context(baf->opaque); + } + } + /* Never mind iterating again to check for ->deleted. bdrv_close() will + * remove remaining aio notifiers if we aren't called again. + */ + bs->walking_aio_notifiers = false; + + if (bs->drv && bs->drv->bdrv_detach_aio_context) { + bs->drv->bdrv_detach_aio_context(bs); + } + + if (bs->quiesce_counter) { + aio_enable_external(bs->aio_context); + } + bs->aio_context = NULL; +} + +static void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BdrvAioNotifier *ban, *ban_tmp; + + if (bs->quiesce_counter) { + aio_disable_external(new_context); + } + + bs->aio_context = new_context; + + if (bs->drv && bs->drv->bdrv_attach_aio_context) { + bs->drv->bdrv_attach_aio_context(bs, new_context); + } + + assert(!bs->walking_aio_notifiers); + bs->walking_aio_notifiers = true; + QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { + if (ban->deleted) { + bdrv_do_remove_aio_context_notifier(ban); + } else { + ban->attached_aio_context(new_context, ban->opaque); + } + } + bs->walking_aio_notifiers = false; +} + +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * Must be called from the main AioContext. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. + */ +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) +{ + AioContext *old_context = bdrv_get_aio_context(bs); + BdrvChild *child; + + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + if (old_context == new_context) { + return; + } + + bdrv_drained_begin(bs); + + QLIST_FOREACH(child, &bs->children, next) { + if (g_slist_find(*ignore, child)) { + continue; + } + *ignore = g_slist_prepend(*ignore, child); + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (g_slist_find(*ignore, child)) { + continue; + } + assert(child->klass->set_aio_ctx); + *ignore = g_slist_prepend(*ignore, child); + child->klass->set_aio_ctx(child, new_context, ignore); + } + + bdrv_detach_aio_context(bs); + + /* Acquire the new context, if necessary */ + if (qemu_get_aio_context() != new_context) { + aio_context_acquire(new_context); + } + + bdrv_attach_aio_context(bs, new_context); + + /* + * If this function was recursively called from + * bdrv_set_aio_context_ignore(), there may be nodes in the + * subtree that have not yet been moved to the new AioContext. + * Release the old one so bdrv_drained_end() can poll them. + */ + if (qemu_get_aio_context() != old_context) { + aio_context_release(old_context); + } + + bdrv_drained_end(bs); + + if (qemu_get_aio_context() != old_context) { + aio_context_acquire(old_context); + } + if (qemu_get_aio_context() != new_context) { + aio_context_release(new_context); + } +} + +static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + + /* + * A BdrvChildClass that doesn't handle AioContext changes cannot + * tolerate any AioContext changes + */ + if (!c->klass->can_set_aio_ctx) { + char *user = bdrv_child_user_desc(c); + error_setg(errp, "Changing iothreads is not supported by %s", user); + g_free(user); + return false; + } + if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { + assert(!errp || *errp); + return false; + } + return true; +} + +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); +} + +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BdrvChild *c; + + if (bdrv_get_aio_context(bs) == ctx) { + return true; + } + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + QLIST_FOREACH(c, &bs->children, next) { + if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + + return true; +} + +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + GSList *ignore; + bool ret; + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); + g_slist_free(ignore); + + if (!ret) { + return -EPERM; + } + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + bdrv_set_aio_context_ignore(bs, ctx, &ignore); + g_slist_free(ignore); + + return 0; +} + +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp) +{ + return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); +} + +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque) +{ + BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); + *ban = (BdrvAioNotifier){ + .attached_aio_context = attached_aio_context, + .detach_aio_context = detach_aio_context, + .opaque = opaque + }; + + QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); +} + +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque) +{ + BdrvAioNotifier *ban, *ban_next; + + QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { + if (ban->attached_aio_context == attached_aio_context && + ban->detach_aio_context == detach_aio_context && + ban->opaque == opaque && + ban->deleted == false) + { + if (bs->walking_aio_notifiers) { + ban->deleted = true; + } else { + bdrv_do_remove_aio_context_notifier(ban); + } + return; + } + } + + abort(); +} + +int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, + Error **errp) +{ + if (!bs->drv) { + error_setg(errp, "Node is ejected"); + return -ENOMEDIUM; + } + if (!bs->drv->bdrv_amend_options) { + error_setg(errp, "Block driver '%s' does not support option amendment", + bs->drv->format_name); + return -ENOTSUP; + } + return bs->drv->bdrv_amend_options(bs, opts, status_cb, + cb_opaque, force, errp); +} + +/* + * This function checks whether the given @to_replace is allowed to be + * replaced by a node that always shows the same data as @bs. This is + * used for example to verify whether the mirror job can replace + * @to_replace by the target mirrored from @bs. + * To be replaceable, @bs and @to_replace may either be guaranteed to + * always show the same data (because they are only connected through + * filters), or some driver may allow replacing one of its children + * because it can guarantee that this child's data is not visible at + * all (for example, for dissenting quorum children that have no other + * parents). + */ +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ + BlockDriverState *filtered; + + if (!bs || !bs->drv) { + return false; + } + + if (bs == to_replace) { + return true; + } + + /* See what the driver can do */ + if (bs->drv->bdrv_recurse_can_replace) { + return bs->drv->bdrv_recurse_can_replace(bs, to_replace); + } + + /* For filters without an own implementation, we can recurse on our own */ + filtered = bdrv_filter_bs(bs); + if (filtered) { + return bdrv_recurse_can_replace(filtered, to_replace); + } + + /* Safe default */ + return false; +} + +/* + * Check whether the given @node_name can be replaced by a node that + * has the same data as @parent_bs. If so, return @node_name's BDS; + * NULL otherwise. + * + * @node_name must be a (recursive) *child of @parent_bs (or this + * function will return NULL). + * + * The result (whether the node can be replaced or not) is only valid + * for as long as no graph or permission changes occur. + */ +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp) +{ + BlockDriverState *to_replace_bs = bdrv_find_node(node_name); + AioContext *aio_context; + + if (!to_replace_bs) { + error_setg(errp, "Node name '%s' not found", node_name); + return NULL; + } + + aio_context = bdrv_get_aio_context(to_replace_bs); + aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { + to_replace_bs = NULL; + goto out; + } + + /* We don't want arbitrary node of the BDS chain to be replaced only the top + * most non filter in order to prevent data corruption. + * Another benefit is that this tests exclude backing files which are + * blocked by the backing blockers. + */ + if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { + error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " + "because it cannot be guaranteed that doing so would not " + "lead to an abrupt change of visible data", + node_name, parent_bs->node_name); + to_replace_bs = NULL; + goto out; + } + +out: + aio_context_release(aio_context); + return to_replace_bs; +} + +/** + * Iterates through the list of runtime option keys that are said to + * be "strong" for a BDS. An option is called "strong" if it changes + * a BDS's data. For example, the null block driver's "size" and + * "read-zeroes" options are strong, but its "latency-ns" option is + * not. + * + * If a key returned by this function ends with a dot, all options + * starting with that prefix are strong. + */ +static const char *const *strong_options(BlockDriverState *bs, + const char *const *curopt) +{ + static const char *const global_options[] = { + "driver", "filename", NULL + }; + + if (!curopt) { + return &global_options[0]; + } + + curopt++; + if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { + curopt = bs->drv->strong_runtime_opts; + } + + return (curopt && *curopt) ? curopt : NULL; +} + +/** + * Copies all strong runtime options from bs->options to the given + * QDict. The set of strong option keys is determined by invoking + * strong_options(). + * + * Returns true iff any strong option was present in bs->options (and + * thus copied to the target QDict) with the exception of "filename" + * and "driver". The caller is expected to use this value to decide + * whether the existence of strong options prevents the generation of + * a plain filename. + */ +static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) +{ + bool found_any = false; + const char *const *option_name = NULL; + + if (!bs->drv) { + return false; + } + + while ((option_name = strong_options(bs, option_name))) { + bool option_given = false; + + assert(strlen(*option_name) > 0); + if ((*option_name)[strlen(*option_name) - 1] != '.') { + QObject *entry = qdict_get(bs->options, *option_name); + if (!entry) { + continue; + } + + qdict_put_obj(d, *option_name, qobject_ref(entry)); + option_given = true; + } else { + const QDictEntry *entry; + for (entry = qdict_first(bs->options); entry; + entry = qdict_next(bs->options, entry)) + { + if (strstart(qdict_entry_key(entry), *option_name, NULL)) { + qdict_put_obj(d, qdict_entry_key(entry), + qobject_ref(qdict_entry_value(entry))); + option_given = true; + } + } + } + + /* While "driver" and "filename" need to be included in a JSON filename, + * their existence does not prohibit generation of a plain filename. */ + if (!found_any && option_given && + strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) + { + found_any = true; + } + } + + if (!qdict_haskey(d, "driver")) { + /* Drivers created with bdrv_new_open_driver() may not have a + * @driver option. Add it here. */ + qdict_put_str(d, "driver", bs->drv->format_name); + } + + return found_any; +} + +/* Note: This function may return false positives; it may return true + * even if opening the backing file specified by bs's image header + * would result in exactly bs->backing. */ +bool bdrv_backing_overridden(BlockDriverState *bs) +{ + if (bs->backing) { + return strcmp(bs->auto_backing_file, + bs->backing->bs->filename); + } else { + /* No backing BDS, so if the image header reports any backing + * file, it must have been suppressed */ + return bs->auto_backing_file[0] != '\0'; + } +} + +/* Updates the following BDS fields: + * - exact_filename: A filename which may be used for opening a block device + * which (mostly) equals the given BDS (even without any + * other options; so reading and writing must return the same + * results, but caching etc. may be different) + * - full_open_options: Options which, when given when opening a block device + * (without a filename), result in a BDS (mostly) + * equalling the given one + * - filename: If exact_filename is set, it is copied here. Otherwise, + * full_open_options is converted to a JSON object, prefixed with + * "json:" (for use through the JSON pseudo protocol) and put here. + */ +void bdrv_refresh_filename(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *child; + BlockDriverState *primary_child_bs; + QDict *opts; + bool backing_overridden; + bool generate_json_filename; /* Whether our default implementation should + fill exact_filename (false) or not (true) */ + + if (!drv) { + return; + } + + /* This BDS's file name may depend on any of its children's file names, so + * refresh those first */ + QLIST_FOREACH(child, &bs->children, next) { + bdrv_refresh_filename(child->bs); + } + + if (bs->implicit) { + /* For implicit nodes, just copy everything from the single child */ + child = QLIST_FIRST(&bs->children); + assert(QLIST_NEXT(child, next) == NULL); + + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + child->bs->exact_filename); + pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); + + qobject_unref(bs->full_open_options); + bs->full_open_options = qobject_ref(child->bs->full_open_options); + + return; + } + + backing_overridden = bdrv_backing_overridden(bs); + + if (bs->open_flags & BDRV_O_NO_IO) { + /* Without I/O, the backing file does not change anything. + * Therefore, in such a case (primarily qemu-img), we can + * pretend the backing file has not been overridden even if + * it technically has been. */ + backing_overridden = false; + } + + /* Gather the options QDict */ + opts = qdict_new(); + generate_json_filename = append_strong_runtime_options(opts, bs); + generate_json_filename |= backing_overridden; + + if (drv->bdrv_gather_child_options) { + /* Some block drivers may not want to present all of their children's + * options, or name them differently from BdrvChild.name */ + drv->bdrv_gather_child_options(bs, opts, backing_overridden); + } else { + QLIST_FOREACH(child, &bs->children, next) { + if (child == bs->backing && !backing_overridden) { + /* We can skip the backing BDS if it has not been overridden */ + continue; + } + + qdict_put(opts, child->name, + qobject_ref(child->bs->full_open_options)); + } + + if (backing_overridden && !bs->backing) { + /* Force no backing file */ + qdict_put_null(opts, "backing"); + } + } + + qobject_unref(bs->full_open_options); + bs->full_open_options = opts; + + primary_child_bs = bdrv_primary_bs(bs); + + if (drv->bdrv_refresh_filename) { + /* Obsolete information is of no use here, so drop the old file name + * information before refreshing it */ + bs->exact_filename[0] = '\0'; + + drv->bdrv_refresh_filename(bs); + } else if (primary_child_bs) { + /* + * Try to reconstruct valid information from the underlying + * file -- this only works for format nodes (filter nodes + * cannot be probed and as such must be selected by the user + * either through an options dict, or through a special + * filename which the filter driver must construct in its + * .bdrv_refresh_filename() implementation). + */ + + bs->exact_filename[0] = '\0'; + + /* + * We can use the underlying file's filename if: + * - it has a filename, + * - the current BDS is not a filter, + * - the file is a protocol BDS, and + * - opening that file (as this BDS's format) will automatically create + * the BDS tree we have right now, that is: + * - the user did not significantly change this BDS's behavior with + * some explicit (strong) options + * - no non-file child of this BDS has been overridden by the user + * Both of these conditions are represented by generate_json_filename. + */ + if (primary_child_bs->exact_filename[0] && + primary_child_bs->drv->bdrv_file_open && + !drv->is_filter && !generate_json_filename) + { + strcpy(bs->exact_filename, primary_child_bs->exact_filename); + } + } + + if (bs->exact_filename[0]) { + pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); + } else { + QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); + if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", + qstring_get_str(json)) >= sizeof(bs->filename)) { + /* Give user a hint if we truncated things. */ + strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); + } + qobject_unref(json); + } +} + +char *bdrv_dirname(BlockDriverState *bs, Error **errp) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *child_bs; + + if (!drv) { + error_setg(errp, "Node '%s' is ejected", bs->node_name); + return NULL; + } + + if (drv->bdrv_dirname) { + return drv->bdrv_dirname(bs, errp); + } + + child_bs = bdrv_primary_bs(bs); + if (child_bs) { + return bdrv_dirname(child_bs, errp); + } + + bdrv_refresh_filename(bs); + if (bs->exact_filename[0] != '\0') { + return path_combine(bs->exact_filename, ""); + } + + error_setg(errp, "Cannot generate a base directory for %s nodes", + drv->format_name); + return NULL; +} + +/* + * Hot add/remove a BDS's child. So the user can take a child offline when + * it is broken and take a new child online + */ +void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, + Error **errp) +{ + + if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { + error_setg(errp, "The node %s does not support adding a child", + bdrv_get_device_or_node_name(parent_bs)); + return; + } + + if (!QLIST_EMPTY(&child_bs->parents)) { + error_setg(errp, "The node %s already has a parent", + child_bs->node_name); + return; + } + + parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); +} + +void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) +{ + BdrvChild *tmp; + + if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { + error_setg(errp, "The node %s does not support removing a child", + bdrv_get_device_or_node_name(parent_bs)); + return; + } + + QLIST_FOREACH(tmp, &parent_bs->children, next) { + if (tmp == child) { + break; + } + } + + if (!tmp) { + error_setg(errp, "The node %s does not have a child named %s", + bdrv_get_device_or_node_name(parent_bs), + bdrv_get_device_or_node_name(child->bs)); + return; + } + + parent_bs->drv->bdrv_del_child(parent_bs, child, errp); +} + +int bdrv_make_empty(BdrvChild *c, Error **errp) +{ + BlockDriver *drv = c->bs->drv; + int ret; + + assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); + + if (!drv->bdrv_make_empty) { + error_setg(errp, "%s does not support emptying nodes", + drv->format_name); + return -ENOTSUP; + } + + ret = drv->bdrv_make_empty(c->bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to empty %s", + c->bs->filename); + return ret; + } + + return 0; +} + +/* + * Return the child that @bs acts as an overlay for, and from which data may be + * copied in COW or COR operations. Usually this is the backing file. + */ +BdrvChild *bdrv_cow_child(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + return NULL; + } + + if (bs->drv->is_filter) { + return NULL; + } + + if (!bs->backing) { + return NULL; + } + + assert(bs->backing->role & BDRV_CHILD_COW); + return bs->backing; +} + +/* + * If @bs acts as a filter for exactly one of its children, return + * that child. + */ +BdrvChild *bdrv_filter_child(BlockDriverState *bs) +{ + BdrvChild *c; + + if (!bs || !bs->drv) { + return NULL; + } + + if (!bs->drv->is_filter) { + return NULL; + } + + /* Only one of @backing or @file may be used */ + assert(!(bs->backing && bs->file)); + + c = bs->backing ?: bs->file; + if (!c) { + return NULL; + } + + assert(c->role & BDRV_CHILD_FILTERED); + return c; +} + +/* + * Return either the result of bdrv_cow_child() or bdrv_filter_child(), + * whichever is non-NULL. + * + * Return NULL if both are NULL. + */ +BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) +{ + BdrvChild *cow_child = bdrv_cow_child(bs); + BdrvChild *filter_child = bdrv_filter_child(bs); + + /* Filter nodes cannot have COW backing files */ + assert(!(cow_child && filter_child)); + + return cow_child ?: filter_child; +} + +/* + * Return the primary child of this node: For filters, that is the + * filtered child. For other nodes, that is usually the child storing + * metadata. + * (A generally more helpful description is that this is (usually) the + * child that has the same filename as @bs.) + * + * Drivers do not necessarily have a primary child; for example quorum + * does not. + */ +BdrvChild *bdrv_primary_child(BlockDriverState *bs) +{ + BdrvChild *c, *found = NULL; + + QLIST_FOREACH(c, &bs->children, next) { + if (c->role & BDRV_CHILD_PRIMARY) { + assert(!found); + found = c; + } + } + + return found; +} + +static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, + bool stop_on_explicit_filter) +{ + BdrvChild *c; + + if (!bs) { + return NULL; + } + + while (!(stop_on_explicit_filter && !bs->implicit)) { + c = bdrv_filter_child(bs); + if (!c) { + /* + * A filter that is embedded in a working block graph must + * have a child. Assert this here so this function does + * not return a filter node that is not expected by the + * caller. + */ + assert(!bs->drv || !bs->drv->is_filter); + break; + } + bs = c->bs; + } + /* + * Note that this treats nodes with bs->drv == NULL as not being + * filters (bs->drv == NULL should be replaced by something else + * anyway). + * The advantage of this behavior is that this function will thus + * always return a non-NULL value (given a non-NULL @bs). + */ + + return bs; +} + +/* + * Return the first BDS that has not been added implicitly or that + * does not have a filtered child down the chain starting from @bs + * (including @bs itself). + */ +BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) +{ + return bdrv_do_skip_filters(bs, true); +} + +/* + * Return the first BDS that does not have a filtered child down the + * chain starting from @bs (including @bs itself). + */ +BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) +{ + return bdrv_do_skip_filters(bs, false); +} + +/* + * For a backing chain, return the first non-filter backing image of + * the first non-filter image. + */ +BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) +{ + return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); +} diff --git a/block/accounting.c b/block/accounting.c new file mode 100644 index 000000000..8d41c8a83 --- /dev/null +++ b/block/accounting.c @@ -0,0 +1,282 @@ +/* + * QEMU System Emulator block accounting + * + * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/accounting.h" +#include "block/block_int.h" +#include "qemu/timer.h" +#include "sysemu/qtest.h" + +static QEMUClockType clock_type = QEMU_CLOCK_REALTIME; +static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000; + +void block_acct_init(BlockAcctStats *stats) +{ + qemu_mutex_init(&stats->lock); + if (qtest_enabled()) { + clock_type = QEMU_CLOCK_VIRTUAL; + } +} + +void block_acct_setup(BlockAcctStats *stats, bool account_invalid, + bool account_failed) +{ + stats->account_invalid = account_invalid; + stats->account_failed = account_failed; +} + +void block_acct_cleanup(BlockAcctStats *stats) +{ + BlockAcctTimedStats *s, *next; + QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) { + g_free(s); + } + qemu_mutex_destroy(&stats->lock); +} + +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) +{ + BlockAcctTimedStats *s; + unsigned i; + + s = g_new0(BlockAcctTimedStats, 1); + s->interval_length = interval_length; + s->stats = stats; + qemu_mutex_lock(&stats->lock); + QSLIST_INSERT_HEAD(&stats->intervals, s, entries); + + for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { + timed_average_init(&s->latency[i], clock_type, + (uint64_t) interval_length * NANOSECONDS_PER_SECOND); + } + qemu_mutex_unlock(&stats->lock); +} + +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s) +{ + if (s == NULL) { + return QSLIST_FIRST(&stats->intervals); + } else { + return QSLIST_NEXT(s, entries); + } +} + +void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, + int64_t bytes, enum BlockAcctType type) +{ + assert(type < BLOCK_MAX_IOTYPE); + + cookie->bytes = bytes; + cookie->start_time_ns = qemu_clock_get_ns(clock_type); + cookie->type = type; +} + +/* block_latency_histogram_compare_func: + * Compare @key with interval [@it[0], @it[1]). + * Return: -1 if @key < @it[0] + * 0 if @key in [@it[0], @it[1]) + * +1 if @key >= @it[1] + */ +static int block_latency_histogram_compare_func(const void *key, const void *it) +{ + uint64_t k = *(uint64_t *)key; + uint64_t a = ((uint64_t *)it)[0]; + uint64_t b = ((uint64_t *)it)[1]; + + return k < a ? -1 : (k < b ? 0 : 1); +} + +static void block_latency_histogram_account(BlockLatencyHistogram *hist, + int64_t latency_ns) +{ + uint64_t *pos; + + if (hist->bins == NULL) { + /* histogram disabled */ + return; + } + + + if (latency_ns < hist->boundaries[0]) { + hist->bins[0]++; + return; + } + + if (latency_ns >= hist->boundaries[hist->nbins - 2]) { + hist->bins[hist->nbins - 1]++; + return; + } + + pos = bsearch(&latency_ns, hist->boundaries, hist->nbins - 2, + sizeof(hist->boundaries[0]), + block_latency_histogram_compare_func); + assert(pos != NULL); + + hist->bins[pos - hist->boundaries + 1]++; +} + +int block_latency_histogram_set(BlockAcctStats *stats, enum BlockAcctType type, + uint64List *boundaries) +{ + BlockLatencyHistogram *hist = &stats->latency_histogram[type]; + uint64List *entry; + uint64_t *ptr; + uint64_t prev = 0; + int new_nbins = 1; + + for (entry = boundaries; entry; entry = entry->next) { + if (entry->value <= prev) { + return -EINVAL; + } + new_nbins++; + prev = entry->value; + } + + hist->nbins = new_nbins; + g_free(hist->boundaries); + hist->boundaries = g_new(uint64_t, hist->nbins - 1); + for (entry = boundaries, ptr = hist->boundaries; entry; + entry = entry->next, ptr++) + { + *ptr = entry->value; + } + + g_free(hist->bins); + hist->bins = g_new0(uint64_t, hist->nbins); + + return 0; +} + +void block_latency_histograms_clear(BlockAcctStats *stats) +{ + int i; + + for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { + BlockLatencyHistogram *hist = &stats->latency_histogram[i]; + g_free(hist->bins); + g_free(hist->boundaries); + memset(hist, 0, sizeof(*hist)); + } +} + +static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie, + bool failed) +{ + BlockAcctTimedStats *s; + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + if (qtest_enabled()) { + latency_ns = qtest_latency_ns; + } + + assert(cookie->type < BLOCK_MAX_IOTYPE); + + if (cookie->type == BLOCK_ACCT_NONE) { + return; + } + + qemu_mutex_lock(&stats->lock); + + if (failed) { + stats->failed_ops[cookie->type]++; + } else { + stats->nr_bytes[cookie->type] += cookie->bytes; + stats->nr_ops[cookie->type]++; + } + + block_latency_histogram_account(&stats->latency_histogram[cookie->type], + latency_ns); + + if (!failed || stats->account_failed) { + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } + } + + qemu_mutex_unlock(&stats->lock); + + cookie->type = BLOCK_ACCT_NONE; +} + +void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + block_account_one_io(stats, cookie, false); +} + +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + block_account_one_io(stats, cookie, true); +} + +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) +{ + assert(type < BLOCK_MAX_IOTYPE); + + /* block_account_one_io() updates total_time_ns[], but this one does + * not. The reason is that invalid requests are accounted during their + * submission, therefore there's no actual I/O involved. + */ + qemu_mutex_lock(&stats->lock); + stats->invalid_ops[type]++; + + if (stats->account_invalid) { + stats->last_access_time_ns = qemu_clock_get_ns(clock_type); + } + qemu_mutex_unlock(&stats->lock); +} + +void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, + int num_requests) +{ + assert(type < BLOCK_MAX_IOTYPE); + + qemu_mutex_lock(&stats->lock); + stats->merged[type] += num_requests; + qemu_mutex_unlock(&stats->lock); +} + +int64_t block_acct_idle_time_ns(BlockAcctStats *stats) +{ + return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns; +} + +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type) +{ + uint64_t sum, elapsed; + + assert(type < BLOCK_MAX_IOTYPE); + + qemu_mutex_lock(&stats->stats->lock); + sum = timed_average_sum(&stats->latency[type], &elapsed); + qemu_mutex_unlock(&stats->stats->lock); + + return (double) sum / elapsed; +} diff --git a/block/aio_task.c b/block/aio_task.c new file mode 100644 index 000000000..88989fa24 --- /dev/null +++ b/block/aio_task.c @@ -0,0 +1,124 @@ +/* + * Aio tasks loops + * + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/aio.h" +#include "block/aio_task.h" + +struct AioTaskPool { + Coroutine *main_co; + int status; + int max_busy_tasks; + int busy_tasks; + bool waiting; +}; + +static void coroutine_fn aio_task_co(void *opaque) +{ + AioTask *task = opaque; + AioTaskPool *pool = task->pool; + + assert(pool->busy_tasks < pool->max_busy_tasks); + pool->busy_tasks++; + + task->ret = task->func(task); + + pool->busy_tasks--; + + if (task->ret < 0 && pool->status == 0) { + pool->status = task->ret; + } + + g_free(task); + + if (pool->waiting) { + pool->waiting = false; + aio_co_wake(pool->main_co); + } +} + +void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool) +{ + assert(pool->busy_tasks > 0); + assert(qemu_coroutine_self() == pool->main_co); + + pool->waiting = true; + qemu_coroutine_yield(); + + assert(!pool->waiting); + assert(pool->busy_tasks < pool->max_busy_tasks); +} + +void coroutine_fn aio_task_pool_wait_slot(AioTaskPool *pool) +{ + if (pool->busy_tasks < pool->max_busy_tasks) { + return; + } + + aio_task_pool_wait_one(pool); +} + +void coroutine_fn aio_task_pool_wait_all(AioTaskPool *pool) +{ + while (pool->busy_tasks > 0) { + aio_task_pool_wait_one(pool); + } +} + +void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task) +{ + aio_task_pool_wait_slot(pool); + + task->pool = pool; + qemu_coroutine_enter(qemu_coroutine_create(aio_task_co, task)); +} + +AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks) +{ + AioTaskPool *pool = g_new0(AioTaskPool, 1); + + pool->main_co = qemu_coroutine_self(); + pool->max_busy_tasks = max_busy_tasks; + + return pool; +} + +void aio_task_pool_free(AioTaskPool *pool) +{ + g_free(pool); +} + +int aio_task_pool_status(AioTaskPool *pool) +{ + if (!pool) { + return 0; /* Sugar for lazy allocation of aio pool */ + } + + return pool->status; +} + +bool aio_task_pool_empty(AioTaskPool *pool) +{ + return pool->busy_tasks == 0; +} diff --git a/block/amend.c b/block/amend.c new file mode 100644 index 000000000..392df9ef8 --- /dev/null +++ b/block/amend.c @@ -0,0 +1,117 @@ +/* + * Block layer code related to image options amend + * + * Copyright (c) 2018 Kevin Wolf + * Copyright (c) 2020 Red Hat. Inc + * + * Heavily based on create.c + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/job.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-block-core.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/clone-visitor.h" +#include "qapi/error.h" + +typedef struct BlockdevAmendJob { + Job common; + BlockdevAmendOptions *opts; + BlockDriverState *bs; + bool force; +} BlockdevAmendJob; + +static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) +{ + BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); + int ret; + + job_progress_set_remaining(&s->common, 1); + ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp); + job_progress_update(&s->common, 1); + qapi_free_BlockdevAmendOptions(s->opts); + return ret; +} + +static const JobDriver blockdev_amend_job_driver = { + .instance_size = sizeof(BlockdevAmendJob), + .job_type = JOB_TYPE_AMEND, + .run = blockdev_amend_run, +}; + +void qmp_x_blockdev_amend(const char *job_id, + const char *node_name, + BlockdevAmendOptions *options, + bool has_force, + bool force, + Error **errp) +{ + BlockdevAmendJob *s; + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + BlockDriverState *bs; + + bs = bdrv_lookup_bs(NULL, node_name, errp); + if (!bs) { + return; + } + + if (!drv) { + error_setg(errp, "Block driver '%s' not found or not supported", fmt); + return; + } + + /* + * If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. + */ + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; + } + + if (bs->drv != drv) { + error_setg(errp, + "x-blockdev-amend doesn't support changing the block driver"); + return; + } + + /* Error out if the driver doesn't support .bdrv_co_amend */ + if (!drv->bdrv_co_amend) { + error_setg(errp, "Driver does not support x-blockdev-amend"); + return; + } + + /* Create the block job */ + s = job_create(job_id, &blockdev_amend_job_driver, NULL, + bdrv_get_aio_context(bs), JOB_DEFAULT | JOB_MANUAL_DISMISS, + NULL, NULL, errp); + if (!s) { + return; + } + + s->bs = bs, + s->opts = QAPI_CLONE(BlockdevAmendOptions, options), + s->force = has_force ? force : false; + job_start(&s->common); +} diff --git a/block/backup-top.c b/block/backup-top.c new file mode 100644 index 000000000..fe6883cc9 --- /dev/null +++ b/block/backup-top.c @@ -0,0 +1,288 @@ +/* + * backup-top filter driver + * + * The driver performs Copy-Before-Write (CBW) operation: it is injected above + * some node, and before each write it copies _old_ data to the target node. + * + * Copyright (c) 2018-2019 Virtuozzo International GmbH. + * + * Author: + * Sementsov-Ogievskiy Vladimir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" + +#include "sysemu/block-backend.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "block/block-copy.h" + +#include "block/backup-top.h" + +typedef struct BDRVBackupTopState { + BlockCopyState *bcs; + BdrvChild *target; + bool active; + int64_t cluster_size; +} BDRVBackupTopState; + +static coroutine_fn int backup_top_co_preadv( + BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, BdrvRequestFlags flags) +{ + BDRVBackupTopState *s = bs->opaque; + uint64_t off, end; + + if (flags & BDRV_REQ_WRITE_UNCHANGED) { + return 0; + } + + off = QEMU_ALIGN_DOWN(offset, s->cluster_size); + end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size); + + return block_copy(s->bcs, off, end - off, NULL); +} + +static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + int ret = backup_top_cbw(bs, offset, bytes, 0); + if (ret < 0) { + return ret; + } + + return bdrv_co_pdiscard(bs->backing, offset, bytes); +} + +static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + int ret = backup_top_cbw(bs, offset, bytes, flags); + if (ret < 0) { + return ret; + } + + return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); +} + +static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret = backup_top_cbw(bs, offset, bytes, flags); + if (ret < 0) { + return ret; + } + + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn backup_top_co_flush(BlockDriverState *bs) +{ + if (!bs->backing) { + return 0; + } + + return bdrv_co_flush(bs->backing->bs); +} + +static void backup_top_refresh_filename(BlockDriverState *bs) +{ + if (bs->backing == NULL) { + /* + * we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd + */ + return; + } + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->backing->bs->filename); +} + +static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVBackupTopState *s = bs->opaque; + + if (!s->active) { + /* + * The filter node may be in process of bdrv_append(), which firstly do + * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that + * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So, + * let's require nothing during bdrv_append() and refresh permissions + * after it (see bdrv_backup_top_append()). + */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + return; + } + + if (!(role & BDRV_CHILD_FILTERED)) { + /* + * Target child + * + * Share write to target (child_file), to not interfere + * with guest writes to its disk which may be in target backing chain. + * Can't resize during a backup block job because we check the size + * only upfront. + */ + *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; + *nperm = BLK_PERM_WRITE; + } else { + /* Source child */ + bdrv_default_perms(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + + if (perm & BLK_PERM_WRITE) { + *nperm = *nperm | BLK_PERM_CONSISTENT_READ; + } + *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } +} + +BlockDriver bdrv_backup_top_filter = { + .format_name = "backup-top", + .instance_size = sizeof(BDRVBackupTopState), + + .bdrv_co_preadv = backup_top_co_preadv, + .bdrv_co_pwritev = backup_top_co_pwritev, + .bdrv_co_pwrite_zeroes = backup_top_co_pwrite_zeroes, + .bdrv_co_pdiscard = backup_top_co_pdiscard, + .bdrv_co_flush = backup_top_co_flush, + + .bdrv_refresh_filename = backup_top_refresh_filename, + + .bdrv_child_perm = backup_top_child_perm, + + .is_filter = true, +}; + +BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, + BlockDriverState *target, + const char *filter_node_name, + uint64_t cluster_size, + BdrvRequestFlags write_flags, + BlockCopyState **bcs, + Error **errp) +{ + Error *local_err = NULL; + BDRVBackupTopState *state; + BlockDriverState *top; + bool appended = false; + + assert(source->total_sectors == target->total_sectors); + + top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name, + BDRV_O_RDWR, errp); + if (!top) { + return NULL; + } + + state = top->opaque; + top->total_sectors = source->total_sectors; + top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & source->supported_write_flags); + top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + source->supported_zero_flags); + + bdrv_ref(target); + state->target = bdrv_attach_child(top, target, "target", &child_of_bds, + BDRV_CHILD_DATA, errp); + if (!state->target) { + bdrv_unref(target); + bdrv_unref(top); + return NULL; + } + + bdrv_drained_begin(source); + + bdrv_ref(top); + bdrv_append(top, source, &local_err); + if (local_err) { + error_prepend(&local_err, "Cannot append backup-top filter: "); + goto fail; + } + appended = true; + + /* + * bdrv_append() finished successfully, now we can require permissions + * we want. + */ + state->active = true; + bdrv_child_refresh_perms(top, top->backing, &local_err); + if (local_err) { + error_prepend(&local_err, + "Cannot set permissions for backup-top filter: "); + goto fail; + } + + state->cluster_size = cluster_size; + state->bcs = block_copy_state_new(top->backing, state->target, + cluster_size, write_flags, &local_err); + if (local_err) { + error_prepend(&local_err, "Cannot create block-copy-state: "); + goto fail; + } + *bcs = state->bcs; + + bdrv_drained_end(source); + + return top; + +fail: + if (appended) { + state->active = false; + bdrv_backup_top_drop(top); + } else { + bdrv_unref(top); + } + + bdrv_drained_end(source); + error_propagate(errp, local_err); + + return NULL; +} + +void bdrv_backup_top_drop(BlockDriverState *bs) +{ + BDRVBackupTopState *s = bs->opaque; + + bdrv_drained_begin(bs); + + block_copy_state_free(s->bcs); + + s->active = false; + bdrv_child_refresh_perms(bs, bs->backing, &error_abort); + bdrv_replace_node(bs, bs->backing->bs, &error_abort); + bdrv_set_backing_hd(bs, NULL, &error_abort); + + bdrv_drained_end(bs); + + bdrv_unref(bs); +} diff --git a/block/backup-top.h b/block/backup-top.h new file mode 100644 index 000000000..e5cabfa19 --- /dev/null +++ b/block/backup-top.h @@ -0,0 +1,41 @@ +/* + * backup-top filter driver + * + * The driver performs Copy-Before-Write (CBW) operation: it is injected above + * some node, and before each write it copies _old_ data to the target node. + * + * Copyright (c) 2018-2019 Virtuozzo International GmbH. + * + * Author: + * Sementsov-Ogievskiy Vladimir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BACKUP_TOP_H +#define BACKUP_TOP_H + +#include "block/block_int.h" +#include "block/block-copy.h" + +BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, + BlockDriverState *target, + const char *filter_node_name, + uint64_t cluster_size, + BdrvRequestFlags write_flags, + BlockCopyState **bcs, + Error **errp); +void bdrv_backup_top_drop(BlockDriverState *bs); + +#endif /* BACKUP_TOP_H */ diff --git a/block/backup.c b/block/backup.c new file mode 100644 index 000000000..9afa0bf3b --- /dev/null +++ b/block/backup.c @@ -0,0 +1,486 @@ +/* + * QEMU backup + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "trace.h" +#include "block/block.h" +#include "block/block_int.h" +#include "block/blockjob_int.h" +#include "block/block_backup.h" +#include "block/block-copy.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/ratelimit.h" +#include "qemu/cutils.h" +#include "sysemu/block-backend.h" +#include "qemu/bitmap.h" +#include "qemu/error-report.h" + +#include "block/backup-top.h" + +#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) + +typedef struct BackupBlockJob { + BlockJob common; + BlockDriverState *backup_top; + BlockDriverState *source_bs; + + BdrvDirtyBitmap *sync_bitmap; + + MirrorSyncMode sync_mode; + BitmapSyncMode bitmap_mode; + BlockdevOnError on_source_error; + BlockdevOnError on_target_error; + uint64_t len; + uint64_t bytes_read; + int64_t cluster_size; + + BlockCopyState *bcs; +} BackupBlockJob; + +static const BlockJobDriver backup_job_driver; + +static void backup_progress_bytes_callback(int64_t bytes, void *opaque) +{ + BackupBlockJob *s = opaque; + + s->bytes_read += bytes; +} + +static int coroutine_fn backup_do_cow(BackupBlockJob *job, + int64_t offset, uint64_t bytes, + bool *error_is_read) +{ + int ret = 0; + int64_t start, end; /* bytes */ + + start = QEMU_ALIGN_DOWN(offset, job->cluster_size); + end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size); + + trace_backup_do_cow_enter(job, start, offset, bytes); + + ret = block_copy(job->bcs, start, end - start, error_is_read); + + trace_backup_do_cow_return(job, offset, bytes, ret); + + return ret; +} + +static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) +{ + BdrvDirtyBitmap *bm; + bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \ + && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER)); + + if (sync) { + /* + * We succeeded, or we always intended to sync the bitmap. + * Delete this bitmap and install the child. + */ + bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL); + } else { + /* + * We failed, or we never intended to sync the bitmap anyway. + * Merge the successor back into the parent, keeping all data. + */ + bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL); + } + + assert(bm); + + if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) { + /* If we failed and synced, merge in the bits we didn't copy: */ + bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs), + NULL, true); + } +} + +static void backup_commit(Job *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, 0); + } +} + +static void backup_abort(Job *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, -1); + } +} + +static void backup_clean(Job *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); + bdrv_backup_top_drop(s->backup_top); +} + +void backup_do_checkpoint(BlockJob *job, Error **errp) +{ + BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); + + assert(block_job_driver(job) == &backup_job_driver); + + if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) { + error_setg(errp, "The backup job only supports block checkpoint in" + " sync=none mode"); + return; + } + + bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0, + backup_job->len); +} + +static BlockErrorAction backup_error_action(BackupBlockJob *job, + bool read, int error) +{ + if (read) { + return block_job_error_action(&job->common, job->on_source_error, + true, error); + } else { + return block_job_error_action(&job->common, job->on_target_error, + false, error); + } +} + +static bool coroutine_fn yield_and_check(BackupBlockJob *job) +{ + uint64_t delay_ns; + + if (job_is_cancelled(&job->common.job)) { + return true; + } + + /* + * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can + * return. Without a yield, the VM would not reboot. + */ + delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read); + job->bytes_read = 0; + job_sleep_ns(&job->common.job, delay_ns); + + if (job_is_cancelled(&job->common.job)) { + return true; + } + + return false; +} + +static int coroutine_fn backup_loop(BackupBlockJob *job) +{ + bool error_is_read; + int64_t offset; + BdrvDirtyBitmapIter *bdbi; + int ret = 0; + + bdbi = bdrv_dirty_iter_new(block_copy_dirty_bitmap(job->bcs)); + while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) { + do { + if (yield_and_check(job)) { + goto out; + } + ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read); + if (ret < 0 && backup_error_action(job, error_is_read, -ret) == + BLOCK_ERROR_ACTION_REPORT) + { + goto out; + } + } while (ret < 0); + } + + out: + bdrv_dirty_iter_free(bdbi); + return ret; +} + +static void backup_init_bcs_bitmap(BackupBlockJob *job) +{ + bool ret; + uint64_t estimate; + BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs); + + if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { + ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, + NULL, true); + assert(ret); + } else { + if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { + /* + * We can't hog the coroutine to initialize this thoroughly. + * Set a flag and resume work when we are able to yield safely. + */ + block_copy_set_skip_unallocated(job->bcs, true); + } + bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len); + } + + estimate = bdrv_get_dirty_count(bcs_bitmap); + job_progress_set_remaining(&job->common.job, estimate); +} + +static int coroutine_fn backup_run(Job *job, Error **errp) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); + int ret = 0; + + backup_init_bcs_bitmap(s); + + if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { + int64_t offset = 0; + int64_t count; + + for (offset = 0; offset < s->len; ) { + if (yield_and_check(s)) { + ret = -ECANCELED; + goto out; + } + + ret = block_copy_reset_unallocated(s->bcs, offset, &count); + if (ret < 0) { + goto out; + } + + offset += count; + } + block_copy_set_skip_unallocated(s->bcs, false); + } + + if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { + /* + * All bits are set in bcs bitmap to allow any cluster to be copied. + * This does not actually require them to be copied. + */ + while (!job_is_cancelled(job)) { + /* + * Yield until the job is cancelled. We just let our before_write + * notify callback service CoW requests. + */ + job_yield(job); + } + } else { + ret = backup_loop(s); + } + + out: + return ret; +} + +static const BlockJobDriver backup_job_driver = { + .job_driver = { + .instance_size = sizeof(BackupBlockJob), + .job_type = JOB_TYPE_BACKUP, + .free = block_job_free, + .user_resume = block_job_user_resume, + .run = backup_run, + .commit = backup_commit, + .abort = backup_abort, + .clean = backup_clean, + } +}; + +static int64_t backup_calculate_cluster_size(BlockDriverState *target, + Error **errp) +{ + int ret; + BlockDriverInfo bdi; + bool target_does_cow = bdrv_backing_chain_next(target); + + /* + * If there is no backing file on the target, we cannot rely on COW if our + * backup cluster size is smaller than the target cluster size. Even for + * targets with a backing file, try to avoid COW if possible. + */ + ret = bdrv_get_info(target, &bdi); + if (ret == -ENOTSUP && !target_does_cow) { + /* Cluster size is not defined */ + warn_report("The target block device doesn't provide " + "information about the block size and it doesn't have a " + "backing file. The default block size of %u bytes is " + "used. If the actual block size of the target exceeds " + "this default, the backup may be unusable", + BACKUP_CLUSTER_SIZE_DEFAULT); + return BACKUP_CLUSTER_SIZE_DEFAULT; + } else if (ret < 0 && !target_does_cow) { + error_setg_errno(errp, -ret, + "Couldn't determine the cluster size of the target image, " + "which has no backing file"); + error_append_hint(errp, + "Aborting, since this may create an unusable destination image\n"); + return ret; + } else if (ret < 0 && target_does_cow) { + /* Not fatal; just trudge on ahead. */ + return BACKUP_CLUSTER_SIZE_DEFAULT; + } + + return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); +} + +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, + BitmapSyncMode bitmap_mode, + bool compress, + const char *filter_node_name, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + int creation_flags, + BlockCompletionFunc *cb, void *opaque, + JobTxn *txn, Error **errp) +{ + int64_t len, target_len; + BackupBlockJob *job = NULL; + int64_t cluster_size; + BdrvRequestFlags write_flags; + BlockDriverState *backup_top = NULL; + BlockCopyState *bcs = NULL; + + assert(bs); + assert(target); + + /* QMP interface protects us from these cases */ + assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); + assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); + + if (bs == target) { + error_setg(errp, "Source and target cannot be the same"); + return NULL; + } + + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(bs)); + return NULL; + } + + if (!bdrv_is_inserted(target)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(target)); + return NULL; + } + + if (compress && !bdrv_supports_compressed_writes(target)) { + error_setg(errp, "Compression is not supported for this drive %s", + bdrv_get_device_name(target)); + return NULL; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + return NULL; + } + + if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { + return NULL; + } + + if (sync_bitmap) { + /* If we need to write to this bitmap, check that we can: */ + if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && + bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { + return NULL; + } + + /* Create a new bitmap, and freeze/disable this one. */ + if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) { + return NULL; + } + } + + len = bdrv_getlength(bs); + if (len < 0) { + error_setg_errno(errp, -len, "Unable to get length for '%s'", + bdrv_get_device_or_node_name(bs)); + goto error; + } + + target_len = bdrv_getlength(target); + if (target_len < 0) { + error_setg_errno(errp, -target_len, "Unable to get length for '%s'", + bdrv_get_device_or_node_name(bs)); + goto error; + } + + if (target_len != len) { + error_setg(errp, "Source and target image have different sizes"); + goto error; + } + + cluster_size = backup_calculate_cluster_size(target, errp); + if (cluster_size < 0) { + goto error; + } + + /* + * If source is in backing chain of target assume that target is going to be + * used for "image fleecing", i.e. it should represent a kind of snapshot of + * source at backup-start point in time. And target is going to be read by + * somebody (for example, used as NBD export) during backup job. + * + * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid + * intersection of backup writes and third party reads from target, + * otherwise reading from target we may occasionally read already updated by + * guest data. + * + * For more information see commit f8d59dfb40bb and test + * tests/qemu-iotests/222 + */ + write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | + (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), + + backup_top = bdrv_backup_top_append(bs, target, filter_node_name, + cluster_size, write_flags, &bcs, errp); + if (!backup_top) { + goto error; + } + + /* job->len is fixed, so we can't allow resize */ + job = block_job_create(job_id, &backup_job_driver, txn, backup_top, + 0, BLK_PERM_ALL, + speed, creation_flags, cb, opaque, errp); + if (!job) { + goto error; + } + + job->backup_top = backup_top; + job->source_bs = bs; + job->on_source_error = on_source_error; + job->on_target_error = on_target_error; + job->sync_mode = sync_mode; + job->sync_bitmap = sync_bitmap; + job->bitmap_mode = bitmap_mode; + job->bcs = bcs; + job->cluster_size = cluster_size; + job->len = len; + + block_copy_set_progress_callback(bcs, backup_progress_bytes_callback, job); + block_copy_set_progress_meter(bcs, &job->common.job.progress); + + /* Required permissions are already taken by backup-top target */ + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + + return &job->common; + + error: + if (sync_bitmap) { + bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL); + } + if (backup_top) { + bdrv_backup_top_drop(backup_top); + } + + return NULL; +} diff --git a/block/blkdebug.c b/block/blkdebug.c new file mode 100644 index 000000000..5fe6172da --- /dev/null +++ b/block/blkdebug.c @@ -0,0 +1,1063 @@ +/* + * Block protocol for I/O error injection + * + * Copyright (C) 2016-2017 Red Hat, Inc. + * Copyright (c) 2010 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "qemu/config-file.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "sysemu/qtest.h" + +typedef struct BDRVBlkdebugState { + int state; + int new_state; + uint64_t align; + uint64_t max_transfer; + uint64_t opt_write_zero; + uint64_t max_write_zero; + uint64_t opt_discard; + uint64_t max_discard; + + uint64_t take_child_perms; + uint64_t unshare_child_perms; + + /* For blkdebug_refresh_filename() */ + char *config_file; + + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX]; + QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; + QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; +} BDRVBlkdebugState; + +typedef struct BlkdebugAIOCB { + BlockAIOCB common; + int ret; +} BlkdebugAIOCB; + +typedef struct BlkdebugSuspendedReq { + Coroutine *co; + char *tag; + QLIST_ENTRY(BlkdebugSuspendedReq) next; +} BlkdebugSuspendedReq; + +enum { + ACTION_INJECT_ERROR, + ACTION_SET_STATE, + ACTION_SUSPEND, +}; + +typedef struct BlkdebugRule { + BlkdebugEvent event; + int action; + int state; + union { + struct { + uint64_t iotype_mask; + int error; + int immediately; + int once; + int64_t offset; + } inject; + struct { + int new_state; + } set_state; + struct { + char *tag; + } suspend; + } options; + QLIST_ENTRY(BlkdebugRule) next; + QSIMPLEQ_ENTRY(BlkdebugRule) active_next; +} BlkdebugRule; + +QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64, + "BlkdebugIOType mask does not fit into an uint64_t"); + +static QemuOptsList inject_error_opts = { + .name = "inject-error", + .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head), + .desc = { + { + .name = "event", + .type = QEMU_OPT_STRING, + }, + { + .name = "state", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "iotype", + .type = QEMU_OPT_STRING, + }, + { + .name = "errno", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "sector", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "once", + .type = QEMU_OPT_BOOL, + }, + { + .name = "immediately", + .type = QEMU_OPT_BOOL, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList set_state_opts = { + .name = "set-state", + .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head), + .desc = { + { + .name = "event", + .type = QEMU_OPT_STRING, + }, + { + .name = "state", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "new_state", + .type = QEMU_OPT_NUMBER, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList *config_groups[] = { + &inject_error_opts, + &set_state_opts, + NULL +}; + +struct add_rule_data { + BDRVBlkdebugState *s; + int action; +}; + +static int add_rule(void *opaque, QemuOpts *opts, Error **errp) +{ + struct add_rule_data *d = opaque; + BDRVBlkdebugState *s = d->s; + const char *event_name; + int event; + struct BlkdebugRule *rule; + int64_t sector; + BlkdebugIOType iotype; + Error *local_error = NULL; + + /* Find the right event for the rule */ + event_name = qemu_opt_get(opts, "event"); + if (!event_name) { + error_setg(errp, "Missing event name for rule"); + return -1; + } + event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp); + if (event < 0) { + return -1; + } + + /* Set attributes common for all actions */ + rule = g_malloc0(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = event, + .action = d->action, + .state = qemu_opt_get_number(opts, "state", 0), + }; + + /* Parse action-specific options */ + switch (d->action) { + case ACTION_INJECT_ERROR: + rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO); + rule->options.inject.once = qemu_opt_get_bool(opts, "once", 0); + rule->options.inject.immediately = + qemu_opt_get_bool(opts, "immediately", 0); + sector = qemu_opt_get_number(opts, "sector", -1); + rule->options.inject.offset = + sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE; + + iotype = qapi_enum_parse(&BlkdebugIOType_lookup, + qemu_opt_get(opts, "iotype"), + BLKDEBUG_IO_TYPE__MAX, &local_error); + if (local_error) { + error_propagate(errp, local_error); + g_free(rule); + return -1; + } + if (iotype != BLKDEBUG_IO_TYPE__MAX) { + rule->options.inject.iotype_mask = (1ull << iotype); + } else { + /* Apply the default */ + rule->options.inject.iotype_mask = + (1ull << BLKDEBUG_IO_TYPE_READ) + | (1ull << BLKDEBUG_IO_TYPE_WRITE) + | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES) + | (1ull << BLKDEBUG_IO_TYPE_DISCARD) + | (1ull << BLKDEBUG_IO_TYPE_FLUSH); + } + + break; + + case ACTION_SET_STATE: + rule->options.set_state.new_state = + qemu_opt_get_number(opts, "new_state", 0); + break; + + case ACTION_SUSPEND: + rule->options.suspend.tag = + g_strdup(qemu_opt_get(opts, "tag")); + break; + }; + + /* Add the rule */ + QLIST_INSERT_HEAD(&s->rules[event], rule, next); + + return 0; +} + +static void remove_rule(BlkdebugRule *rule) +{ + switch (rule->action) { + case ACTION_INJECT_ERROR: + case ACTION_SET_STATE: + break; + case ACTION_SUSPEND: + g_free(rule->options.suspend.tag); + break; + } + + QLIST_REMOVE(rule, next); + g_free(rule); +} + +static int read_config(BDRVBlkdebugState *s, const char *filename, + QDict *options, Error **errp) +{ + FILE *f = NULL; + int ret; + struct add_rule_data d; + Error *local_err = NULL; + + if (filename) { + f = fopen(filename, "r"); + if (f == NULL) { + error_setg_errno(errp, errno, "Could not read blkdebug config file"); + return -errno; + } + + ret = qemu_config_parse(f, config_groups, filename); + if (ret < 0) { + error_setg(errp, "Could not parse blkdebug config file"); + goto fail; + } + } + + qemu_config_parse_qdict(options, config_groups, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + d.s = s; + d.action = ACTION_INJECT_ERROR; + qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + d.action = ACTION_SET_STATE; + qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + ret = 0; +fail: + qemu_opts_reset(&inject_error_opts); + qemu_opts_reset(&set_state_opts); + if (f) { + fclose(f); + } + return ret; +} + +/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */ +static void blkdebug_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + const char *c; + + /* Parse the blkdebug: prefix */ + if (!strstart(filename, "blkdebug:", &filename)) { + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put_str(options, "x-image", filename); + return; + } + + /* Parse config file path */ + c = strchr(filename, ':'); + if (c == NULL) { + error_setg(errp, "blkdebug requires both config file and image path"); + return; + } + + if (c != filename) { + QString *config_path; + config_path = qstring_from_substr(filename, 0, c - filename); + qdict_put(options, "config", config_path); + } + + /* TODO Allow multi-level nesting and set file.filename here */ + filename = c + 1; + qdict_put_str(options, "x-image", filename); +} + +static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options, + const char *prefix, Error **errp) +{ + int ret = 0; + QDict *subqdict = NULL; + QObject *crumpled_subqdict = NULL; + Visitor *v = NULL; + BlockPermissionList *perm_list = NULL, *element; + + *dest = 0; + + qdict_extract_subqdict(options, &subqdict, prefix); + if (!qdict_size(subqdict)) { + goto out; + } + + crumpled_subqdict = qdict_crumple(subqdict, errp); + if (!crumpled_subqdict) { + ret = -EINVAL; + goto out; + } + + v = qobject_input_visitor_new(crumpled_subqdict); + if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) { + ret = -EINVAL; + goto out; + } + + for (element = perm_list; element; element = element->next) { + *dest |= bdrv_qapi_perm_to_blk_perm(element->value); + } + +out: + qapi_free_BlockPermissionList(perm_list); + visit_free(v); + qobject_unref(subqdict); + qobject_unref(crumpled_subqdict); + return ret; +} + +static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options, + Error **errp) +{ + int ret; + + ret = blkdebug_parse_perm_list(&s->take_child_perms, options, + "take-child-perms.", errp); + if (ret < 0) { + return ret; + } + + ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options, + "unshare-child-perms.", errp); + if (ret < 0) { + return ret; + } + + return 0; +} + +static QemuOptsList runtime_opts = { + .name = "blkdebug", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "config", + .type = QEMU_OPT_STRING, + .help = "Path to the configuration file", + }, + { + .name = "x-image", + .type = QEMU_OPT_STRING, + .help = "[internal use only, will be removed]", + }, + { + .name = "align", + .type = QEMU_OPT_SIZE, + .help = "Required alignment in bytes", + }, + { + .name = "max-transfer", + .type = QEMU_OPT_SIZE, + .help = "Maximum transfer size in bytes", + }, + { + .name = "opt-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Optimum write zero alignment in bytes", + }, + { + .name = "max-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Maximum write zero size in bytes", + }, + { + .name = "opt-discard", + .type = QEMU_OPT_SIZE, + .help = "Optimum discard alignment in bytes", + }, + { + .name = "max-discard", + .type = QEMU_OPT_SIZE, + .help = "Maximum discard size in bytes", + }, + { /* end of list */ } + }, +}; + +static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkdebugState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + uint64_t align; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto out; + } + + /* Read rules from config file or command line options */ + s->config_file = g_strdup(qemu_opt_get(opts, "config")); + ret = read_config(s, s->config_file, options, errp); + if (ret) { + goto out; + } + + /* Set initial state */ + s->state = 1; + + /* Parse permissions modifiers before opening the image file */ + ret = blkdebug_parse_perms(s, options, errp); + if (ret < 0) { + goto out; + } + + /* Open the image file */ + bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image", + bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto out; + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + ret = -EINVAL; + + /* Set alignment overrides */ + s->align = qemu_opt_get_size(opts, "align", 0); + if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) { + error_setg(errp, "Cannot meet constraints with align %" PRIu64, + s->align); + goto out; + } + align = MAX(s->align, bs->file->bs->bl.request_alignment); + + s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0); + if (s->max_transfer && + (s->max_transfer >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_transfer, align))) { + error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64, + s->max_transfer); + goto out; + } + + s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0); + if (s->opt_write_zero && + (s->opt_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_write_zero, align))) { + error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64, + s->opt_write_zero); + goto out; + } + + s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0); + if (s->max_write_zero && + (s->max_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_write_zero, + MAX(s->opt_write_zero, align)))) { + error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64, + s->max_write_zero); + goto out; + } + + s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0); + if (s->opt_discard && + (s->opt_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_discard, align))) { + error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64, + s->opt_discard); + goto out; + } + + s->max_discard = qemu_opt_get_size(opts, "max-discard", 0); + if (s->max_discard && + (s->max_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_discard, + MAX(s->opt_discard, align)))) { + error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64, + s->max_discard); + goto out; + } + + bdrv_debug_event(bs, BLKDBG_NONE); + + ret = 0; +out: + if (ret < 0) { + g_free(s->config_file); + } + qemu_opts_del(opts); + return ret; +} + +static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + BlkdebugIOType iotype) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugRule *rule = NULL; + int error; + bool immediately; + + QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { + uint64_t inject_offset = rule->options.inject.offset; + + if ((inject_offset == -1 || + (bytes && inject_offset >= offset && + inject_offset < offset + bytes)) && + (rule->options.inject.iotype_mask & (1ull << iotype))) + { + break; + } + } + + if (!rule || !rule->options.inject.error) { + return 0; + } + + immediately = rule->options.inject.immediately; + error = rule->options.inject.error; + + if (rule->options.inject.once) { + QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next); + remove_rule(rule); + } + + if (!immediately) { + aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self()); + qemu_coroutine_yield(); + } + + return -error; +} + +static int coroutine_fn +blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int err; + + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); + } + + err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ); + if (err) { + return err; + } + + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +static int coroutine_fn +blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int err; + + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); + } + + err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE); + if (err) { + return err; + } + + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); +} + +static int blkdebug_co_flush(BlockDriverState *bs) +{ + int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH); + + if (err) { + return err; + } + + return bdrv_co_flush(bs->file->bs); +} + +static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + uint32_t align = MAX(bs->bl.request_alignment, + bs->bl.pwrite_zeroes_alignment); + int err; + + /* Only pass through requests that are larger than requested + * preferred alignment (so that we test the fallback to writes on + * unaligned portions), and check that the block layer never hands + * us anything unaligned that crosses an alignment boundary. */ + if (bytes < align) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + bytes, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + bytes, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(bytes, align)); + if (bs->bl.max_pwrite_zeroes) { + assert(bytes <= bs->bl.max_pwrite_zeroes); + } + + err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES); + if (err) { + return err; + } + + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + +static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + uint32_t align = bs->bl.pdiscard_alignment; + int err; + + /* Only pass through requests that are larger than requested + * minimum alignment, and ensure that unaligned requests do not + * cross optimum discard boundaries. */ + if (bytes < bs->bl.request_alignment) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + bytes, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + bytes, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (align && bytes >= align) { + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(bytes, align)); + } + if (bs->bl.max_pdiscard) { + assert(bytes <= bs->bl.max_pdiscard); + } + + err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD); + if (err) { + return err; + } + + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + +static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + int err; + + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + + err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS); + if (err) { + return err; + } + + assert(bs->file && bs->file->bs); + *pnum = bytes; + *map = offset; + *file = bs->file->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; +} + +static void blkdebug_close(BlockDriverState *bs) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugRule *rule, *next; + int i; + + for (i = 0; i < BLKDBG__MAX; i++) { + QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { + remove_rule(rule); + } + } + + g_free(s->config_file); +} + +static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq r; + + r = (BlkdebugSuspendedReq) { + .co = qemu_coroutine_self(), + .tag = g_strdup(rule->options.suspend.tag), + }; + + remove_rule(rule); + QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + + if (!qtest_enabled()) { + printf("blkdebug: Suspended request '%s'\n", r.tag); + } + qemu_coroutine_yield(); + if (!qtest_enabled()) { + printf("blkdebug: Resuming request '%s'\n", r.tag); + } + + QLIST_REMOVE(&r, next); + g_free(r.tag); +} + +static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, + bool injected) +{ + BDRVBlkdebugState *s = bs->opaque; + + /* Only process rules for the current state */ + if (rule->state && rule->state != s->state) { + return injected; + } + + /* Take the action */ + switch (rule->action) { + case ACTION_INJECT_ERROR: + if (!injected) { + QSIMPLEQ_INIT(&s->active_rules); + injected = true; + } + QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next); + break; + + case ACTION_SET_STATE: + s->new_state = rule->options.set_state.new_state; + break; + + case ACTION_SUSPEND: + suspend_request(bs, rule); + break; + } + return injected; +} + +static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule, *next; + bool injected; + + assert((int)event >= 0 && event < BLKDBG__MAX); + + injected = false; + s->new_state = s->state; + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { + injected = process_rule(bs, rule, injected); + } + s->state = s->new_state; +} + +static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule; + int blkdebug_event; + + blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL); + if (blkdebug_event < 0) { + return -ENOENT; + } + + rule = g_malloc(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = blkdebug_event, + .action = ACTION_SUSPEND, + .state = 0, + .options.suspend.tag = g_strdup(tag), + }; + + QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + + return 0; +} + +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r, *next; + + QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co); + return 0; + } + } + return -ENOENT; +} + +static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r, *r_next; + BlkdebugRule *rule, *next; + int i, ret = -ENOENT; + + for (i = 0; i < BLKDBG__MAX; i++) { + QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { + if (rule->action == ACTION_SUSPEND && + !strcmp(rule->options.suspend.tag, tag)) { + remove_rule(rule); + ret = 0; + } + } + } + QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co); + ret = 0; + } + } + return ret; +} + +static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + return true; + } + } + return false; +} + +static int64_t blkdebug_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static void blkdebug_refresh_filename(BlockDriverState *bs) +{ + BDRVBlkdebugState *s = bs->opaque; + const QDictEntry *e; + int ret; + + if (!bs->file->bs->exact_filename[0]) { + return; + } + + for (e = qdict_first(bs->full_open_options); e; + e = qdict_next(bs->full_open_options, e)) + { + /* Real child options are under "image", but "x-image" may + * contain a filename */ + if (strcmp(qdict_entry_key(e), "config") && + strcmp(qdict_entry_key(e), "image") && + strcmp(qdict_entry_key(e), "x-image") && + strcmp(qdict_entry_key(e), "driver")) + { + return; + } + } + + ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "blkdebug:%s:%s", + s->config_file ?: "", bs->file->bs->exact_filename); + if (ret >= sizeof(bs->exact_filename)) { + /* An overflow makes the filename unusable, so do not report any */ + bs->exact_filename[0] = 0; + } +} + +static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkdebugState *s = bs->opaque; + + if (s->align) { + bs->bl.request_alignment = s->align; + } + if (s->max_transfer) { + bs->bl.max_transfer = s->max_transfer; + } + if (s->opt_write_zero) { + bs->bl.pwrite_zeroes_alignment = s->opt_write_zero; + } + if (s->max_write_zero) { + bs->bl.max_pwrite_zeroes = s->max_write_zero; + } + if (s->opt_discard) { + bs->bl.pdiscard_alignment = s->opt_discard; + } + if (s->max_discard) { + bs->bl.max_pdiscard = s->max_discard; + } +} + +static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVBlkdebugState *s = bs->opaque; + + bdrv_default_perms(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + + *nperm |= s->take_child_perms; + *nshared &= ~s->unshare_child_perms; +} + +static const char *const blkdebug_strong_runtime_opts[] = { + "config", + "inject-error.", + "set-state.", + "align", + "max-transfer", + "opt-write-zero", + "max-write-zero", + "opt-discard", + "max-discard", + + NULL +}; + +static BlockDriver bdrv_blkdebug = { + .format_name = "blkdebug", + .protocol_name = "blkdebug", + .instance_size = sizeof(BDRVBlkdebugState), + .is_filter = true, + + .bdrv_parse_filename = blkdebug_parse_filename, + .bdrv_file_open = blkdebug_open, + .bdrv_close = blkdebug_close, + .bdrv_reopen_prepare = blkdebug_reopen_prepare, + .bdrv_child_perm = blkdebug_child_perm, + + .bdrv_getlength = blkdebug_getlength, + .bdrv_refresh_filename = blkdebug_refresh_filename, + .bdrv_refresh_limits = blkdebug_refresh_limits, + + .bdrv_co_preadv = blkdebug_co_preadv, + .bdrv_co_pwritev = blkdebug_co_pwritev, + .bdrv_co_flush_to_disk = blkdebug_co_flush, + .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, + .bdrv_co_pdiscard = blkdebug_co_pdiscard, + .bdrv_co_block_status = blkdebug_co_block_status, + + .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_remove_breakpoint + = blkdebug_debug_remove_breakpoint, + .bdrv_debug_resume = blkdebug_debug_resume, + .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, + + .strong_runtime_opts = blkdebug_strong_runtime_opts, +}; + +static void bdrv_blkdebug_init(void) +{ + bdrv_register(&bdrv_blkdebug); +} + +block_init(bdrv_blkdebug_init); diff --git a/block/blklogwrites.c b/block/blklogwrites.c new file mode 100644 index 000000000..13ae63983 --- /dev/null +++ b/block/blklogwrites.c @@ -0,0 +1,528 @@ +/* + * Write logging blk driver based on blkverify and blkdebug. + * + * Copyright (c) 2017 Tuomas Tynkkynen + * Copyright (c) 2018 Aapo Vienamo + * Copyright (c) 2018 Ari Sundholm + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/module.h" +#include "qemu/option.h" + +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ + +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) +#define LOG_FLAG_MASK (LOG_FLUSH_FLAG \ + | LOG_FUA_FLAG \ + | LOG_DISCARD_FLAG \ + | LOG_MARK_FLAG) + +#define WRITE_LOG_VERSION 1ULL +#define WRITE_LOG_MAGIC 0x6a736677736872ULL + +/* All fields are little-endian. */ +struct log_write_super { + uint64_t magic; + uint64_t version; + uint64_t nr_entries; + uint32_t sectorsize; +} QEMU_PACKED; + +struct log_write_entry { + uint64_t sector; + uint64_t nr_sectors; + uint64_t flags; + uint64_t data_len; +} QEMU_PACKED; + +/* End of disk format structures. */ + +typedef struct { + BdrvChild *log_file; + uint32_t sectorsize; + uint32_t sectorbits; + uint64_t cur_log_sector; + uint64_t nr_entries; + uint64_t update_interval; +} BDRVBlkLogWritesState; + +static QemuOptsList runtime_opts = { + .name = "blklogwrites", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "log-append", + .type = QEMU_OPT_BOOL, + .help = "Append to an existing log", + }, + { + .name = "log-sector-size", + .type = QEMU_OPT_SIZE, + .help = "Log sector size", + }, + { + .name = "log-super-update-interval", + .type = QEMU_OPT_NUMBER, + .help = "Log superblock update interval (# of write requests)", + }, + { /* end of list */ } + }, +}; + +static inline uint32_t blk_log_writes_log2(uint32_t value) +{ + assert(value > 0); + return 31 - clz32(value); +} + +static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size) +{ + return is_power_of_2(sector_size) && + sector_size >= sizeof(struct log_write_super) && + sector_size >= sizeof(struct log_write_entry) && + sector_size < (1ull << 24); +} + +static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log, + uint32_t sector_size, + uint64_t nr_entries, + Error **errp) +{ + uint64_t cur_sector = 1; + uint64_t cur_idx = 0; + uint32_t sector_bits = blk_log_writes_log2(sector_size); + struct log_write_entry cur_entry; + + while (cur_idx < nr_entries) { + int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry, + sizeof(cur_entry)); + if (read_ret < 0) { + error_setg_errno(errp, -read_ret, + "Failed to read log entry %"PRIu64, cur_idx); + return (uint64_t)-1ull; + } + + if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) { + error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64, + le64_to_cpu(cur_entry.flags), cur_idx); + return (uint64_t)-1ull; + } + + /* Account for the sector of the entry itself */ + ++cur_sector; + + /* + * Account for the data of the write. + * For discards, this data is not present. + */ + if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) { + cur_sector += le64_to_cpu(cur_entry.nr_sectors); + } + + ++cur_idx; + } + + return cur_sector; +} + +static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + uint64_t log_sector_size; + bool log_append; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Open the file */ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the log file */ + s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_of_bds, + BDRV_CHILD_METADATA, false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + log_append = qemu_opt_get_bool(opts, "log-append", false); + + if (log_append) { + struct log_write_super log_sb = { 0, 0, 0, 0 }; + + if (qemu_opt_find(opts, "log-sector-size")) { + ret = -EINVAL; + error_setg(errp, "log-append and log-sector-size are mutually " + "exclusive"); + goto fail_log; + } + + /* Read log superblock or fake one for an empty log */ + if (!bdrv_getlength(s->log_file->bs)) { + log_sb.magic = cpu_to_le64(WRITE_LOG_MAGIC); + log_sb.version = cpu_to_le64(WRITE_LOG_VERSION); + log_sb.nr_entries = cpu_to_le64(0); + log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE); + } else { + ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read log superblock"); + goto fail_log; + } + } + + if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock magic"); + goto fail_log; + } + + if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) { + ret = -EINVAL; + error_setg(errp, "Unsupported log version %"PRIu64, + le64_to_cpu(log_sb.version)); + goto fail_log; + } + + log_sector_size = le32_to_cpu(log_sb.sectorsize); + s->cur_log_sector = 1; + s->nr_entries = 0; + + if (blk_log_writes_sector_size_valid(log_sector_size)) { + s->cur_log_sector = + blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size, + le64_to_cpu(log_sb.nr_entries), &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail_log; + } + + s->nr_entries = le64_to_cpu(log_sb.nr_entries); + } + } else { + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + s->cur_log_sector = 1; + s->nr_entries = 0; + } + + if (!blk_log_writes_sector_size_valid(log_sector_size)) { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); + goto fail_log; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval", + 4096); + if (!s->update_interval) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock update interval %"PRIu64, + s->update_interval); + goto fail_log; + } + + ret = 0; +fail_log: + if (ret < 0) { + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; + } +fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + qemu_opts_del(opts); + return ret; +} + +static void blk_log_writes_close(BlockDriverState *bs) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +} + +static int64_t blk_log_writes_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *ro_q, + uint64_t perm, uint64_t shrd, + uint64_t *nperm, uint64_t *nshrd) +{ + if (!c) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + bdrv_default_perms(bs, c, role, ro_q, perm, shrd, + nperm, nshrd); +} + +static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + bs->bl.request_alignment = s->sectorsize; +} + +static int coroutine_fn +blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +typedef struct BlkLogWritesFileReq { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + int file_flags; + QEMUIOVector *qiov; + int (*func)(struct BlkLogWritesFileReq *r); + int file_ret; +} BlkLogWritesFileReq; + +typedef struct { + BlockDriverState *bs; + QEMUIOVector *qiov; + struct log_write_entry entry; + uint64_t zero_size; + int log_ret; +} BlkLogWritesLogReq; + +static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) +{ + BDRVBlkLogWritesState *s = lr->bs->opaque; + uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits; + + s->nr_entries++; + s->cur_log_sector += + ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size, + lr->qiov, 0); + + /* Logging for the "write zeroes" operation */ + if (lr->log_ret == 0 && lr->zero_size) { + cur_log_offset = s->cur_log_sector << s->sectorbits; + s->cur_log_sector += + ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset, + lr->zero_size, 0); + } + + /* Update super block on flush or every update interval */ + if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG) + || (s->nr_entries % s->update_interval == 0))) + { + struct log_write_super super = { + .magic = cpu_to_le64(WRITE_LOG_MAGIC), + .version = cpu_to_le64(WRITE_LOG_VERSION), + .nr_entries = cpu_to_le64(s->nr_entries), + .sectorsize = cpu_to_le32(s->sectorsize), + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(super)); + QEMUIOVector qiov; + + qemu_iovec_init(&qiov, 2); + qemu_iovec_add(&qiov, &super, sizeof(super)); + qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super)); + + lr->log_ret = + bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0); + if (lr->log_ret == 0) { + lr->log_ret = bdrv_co_flush(s->log_file->bs); + } + qemu_iovec_destroy(&qiov); + g_free(zeroes); + } +} + +static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr) +{ + fr->file_ret = fr->func(fr); +} + +static int coroutine_fn +blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + int (*file_func)(BlkLogWritesFileReq *r), + uint64_t entry_flags, bool is_zero_write) +{ + QEMUIOVector log_qiov; + size_t niov = qiov ? qiov->niov : 0; + BDRVBlkLogWritesState *s = bs->opaque; + BlkLogWritesFileReq fr = { + .bs = bs, + .offset = offset, + .bytes = bytes, + .file_flags = flags, + .qiov = qiov, + .func = file_func, + }; + BlkLogWritesLogReq lr = { + .bs = bs, + .qiov = &log_qiov, + .entry = { + .sector = cpu_to_le64(offset >> s->sectorbits), + .nr_sectors = cpu_to_le64(bytes >> s->sectorbits), + .flags = cpu_to_le64(entry_flags), + .data_len = 0, + }, + .zero_size = is_zero_write ? bytes : 0, + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry)); + + assert((1 << s->sectorbits) == s->sectorsize); + assert(bs->bl.request_alignment == s->sectorsize); + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + + qemu_iovec_init(&log_qiov, niov + 2); + qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry)); + qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry)); + if (qiov) { + qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size); + } + + blk_log_writes_co_do_file(&fr); + blk_log_writes_co_do_log(&lr); + + qemu_iovec_destroy(&log_qiov); + g_free(zeroes); + + if (lr.log_ret < 0) { + return lr.log_ret; + } + + return fr.file_ret; +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes, + fr->qiov, fr->file_flags); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes, + fr->file_flags); +} + +static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) +{ + return bdrv_co_flush(fr->bs->file->bs); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes); +} + +static int coroutine_fn +blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, + blk_log_writes_co_do_file_pwritev, 0, false); +} + +static int coroutine_fn +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, + blk_log_writes_co_do_file_pwrite_zeroes, 0, + true); +} + +static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) +{ + return blk_log_writes_co_log(bs, 0, 0, NULL, 0, + blk_log_writes_co_do_file_flush, + LOG_FLUSH_FLAG, false); +} + +static int coroutine_fn +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +{ + return blk_log_writes_co_log(bs, offset, count, NULL, 0, + blk_log_writes_co_do_file_pdiscard, + LOG_DISCARD_FLAG, false); +} + +static const char *const blk_log_writes_strong_runtime_opts[] = { + "log-append", + "log-sector-size", + + NULL +}; + +static BlockDriver bdrv_blk_log_writes = { + .format_name = "blklogwrites", + .instance_size = sizeof(BDRVBlkLogWritesState), + + .bdrv_open = blk_log_writes_open, + .bdrv_close = blk_log_writes_close, + .bdrv_getlength = blk_log_writes_getlength, + .bdrv_child_perm = blk_log_writes_child_perm, + .bdrv_refresh_limits = blk_log_writes_refresh_limits, + + .bdrv_co_preadv = blk_log_writes_co_preadv, + .bdrv_co_pwritev = blk_log_writes_co_pwritev, + .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, + .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, + .bdrv_co_pdiscard = blk_log_writes_co_pdiscard, + + .is_filter = true, + .strong_runtime_opts = blk_log_writes_strong_runtime_opts, +}; + +static void bdrv_blk_log_writes_init(void) +{ + bdrv_register(&bdrv_blk_log_writes); +} + +block_init(bdrv_blk_log_writes_init); diff --git a/block/blkreplay.c b/block/blkreplay.c new file mode 100644 index 000000000..30a0f5d57 --- /dev/null +++ b/block/blkreplay.c @@ -0,0 +1,160 @@ +/* + * Block protocol for record/replay + * + * Copyright (c) 2010-2016 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "block/block_int.h" +#include "sysemu/replay.h" +#include "qapi/error.h" + +typedef struct Request { + Coroutine *co; + QEMUBH *bh; +} Request; + +static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + Error *local_err = NULL; + int ret; + + /* Open the image file */ + bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + + ret = 0; +fail: + return ret; +} + +static int64_t blkreplay_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +/* This bh is used for synchronization of return from coroutines. + It continues yielded coroutine which then finishes its execution. + BH is called adjusted to some replay checkpoint, therefore + record and replay will always finish coroutines deterministically. +*/ +static void blkreplay_bh_cb(void *opaque) +{ + Request *req = opaque; + aio_co_wake(req->co); + qemu_bh_delete(req->bh); + g_free(req); +} + +static void block_request_create(uint64_t reqid, BlockDriverState *bs, + Coroutine *co) +{ + Request *req = g_new(Request, 1); + *req = (Request) { + .co = co, + .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req), + }; + replay_block_event(req->bh, reqid); +} + +static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + uint64_t reqid = blkreplay_next_id(); + int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + uint64_t reqid = blkreplay_next_id(); + int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + uint64_t reqid = blkreplay_next_id(); + int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + uint64_t reqid = blkreplay_next_id(); + int ret = bdrv_co_pdiscard(bs->file, offset, bytes); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs) +{ + uint64_t reqid = blkreplay_next_id(); + int ret = bdrv_co_flush(bs->file->bs); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int blkreplay_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id) +{ + return bdrv_snapshot_goto(bs->file->bs, snapshot_id, NULL); +} + +static BlockDriver bdrv_blkreplay = { + .format_name = "blkreplay", + .instance_size = 0, + .is_filter = true, + + .bdrv_open = blkreplay_open, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_getlength = blkreplay_getlength, + + .bdrv_co_preadv = blkreplay_co_preadv, + .bdrv_co_pwritev = blkreplay_co_pwritev, + + .bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes, + .bdrv_co_pdiscard = blkreplay_co_pdiscard, + .bdrv_co_flush = blkreplay_co_flush, + + .bdrv_snapshot_goto = blkreplay_snapshot_goto, +}; + +static void bdrv_blkreplay_init(void) +{ + bdrv_register(&bdrv_blkreplay); +} + +block_init(bdrv_blkreplay_init); diff --git a/block/blkverify.c b/block/blkverify.c new file mode 100644 index 000000000..4aed53ab5 --- /dev/null +++ b/block/blkverify.c @@ -0,0 +1,338 @@ +/* + * Block protocol for block driver correctness testing + * + * Copyright (C) 2010 IBM, Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/module.h" +#include "qemu/option.h" + +typedef struct { + BdrvChild *test_file; +} BDRVBlkverifyState; + +typedef struct BlkverifyRequest { + Coroutine *co; + BlockDriverState *bs; + + /* Request metadata */ + bool is_write; + uint64_t offset; + uint64_t bytes; + int flags; + + int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *, + BdrvRequestFlags); + + int ret; /* test image result */ + int raw_ret; /* raw image result */ + + unsigned int done; /* completion counter */ + + QEMUIOVector *qiov; /* user I/O vector */ + QEMUIOVector *raw_qiov; /* cloned I/O vector for raw file */ +} BlkverifyRequest; + +static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ", + r->is_write ? "write" : "read", r->offset, r->bytes); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + +/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */ +static void blkverify_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + const char *c; + QString *raw_path; + + + /* Parse the blkverify: prefix */ + if (!strstart(filename, "blkverify:", &filename)) { + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put_str(options, "x-image", filename); + return; + } + + /* Parse the raw image filename */ + c = strchr(filename, ':'); + if (c == NULL) { + error_setg(errp, "blkverify requires raw copy and original image path"); + return; + } + + /* TODO Implement option pass-through and set raw.filename here */ + raw_path = qstring_from_substr(filename, 0, c - filename); + qdict_put(options, "x-raw", raw_path); + + /* TODO Allow multi-level nesting and set file.filename here */ + filename = c + 1; + qdict_put_str(options, "x-image", filename); +} + +static QemuOptsList runtime_opts = { + .name = "blkverify", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "x-raw", + .type = QEMU_OPT_STRING, + .help = "[internal use only, will be removed]", + }, + { + .name = "x-image", + .type = QEMU_OPT_STRING, + .help = "[internal use only, will be removed]", + }, + { /* end of list */ } + }, +}; + +static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkverifyState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Open the raw file */ + bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw", + bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the test file */ + s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, + "test", bs, &child_of_bds, BDRV_CHILD_DATA, + false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + + ret = 0; +fail: + qemu_opts_del(opts); + return ret; +} + +static void blkverify_close(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + bdrv_unref_child(bs, s->test_file); + s->test_file = NULL; +} + +static int64_t blkverify_getlength(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + return bdrv_getlength(s->test_file->bs); +} + +static void coroutine_fn blkverify_do_test_req(void *opaque) +{ + BlkverifyRequest *r = opaque; + BDRVBlkverifyState *s = r->bs->opaque; + + r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov, + r->flags); + r->done++; + qemu_coroutine_enter_if_inactive(r->co); +} + +static void coroutine_fn blkverify_do_raw_req(void *opaque) +{ + BlkverifyRequest *r = opaque; + + r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov, + r->flags); + r->done++; + qemu_coroutine_enter_if_inactive(r->co); +} + +static int coroutine_fn +blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov, + int flags, bool is_write) +{ + Coroutine *co_a, *co_b; + + *r = (BlkverifyRequest) { + .co = qemu_coroutine_self(), + .bs = bs, + .offset = offset, + .bytes = bytes, + .qiov = qiov, + .raw_qiov = raw_qiov, + .flags = flags, + .is_write = is_write, + .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv, + }; + + co_a = qemu_coroutine_create(blkverify_do_test_req, r); + co_b = qemu_coroutine_create(blkverify_do_raw_req, r); + + qemu_coroutine_enter(co_a); + qemu_coroutine_enter(co_b); + + while (r->done < 2) { + qemu_coroutine_yield(); + } + + if (r->ret != r->raw_ret) { + blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret); + } + + return r->ret; +} + +static int coroutine_fn +blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BlkverifyRequest r; + QEMUIOVector raw_qiov; + void *buf; + ssize_t cmp_offset; + int ret; + + buf = qemu_blockalign(bs->file->bs, qiov->size); + qemu_iovec_init(&raw_qiov, qiov->niov); + qemu_iovec_clone(&raw_qiov, qiov, buf); + + ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags, + false); + + cmp_offset = qemu_iovec_compare(qiov, &raw_qiov); + if (cmp_offset != -1) { + blkverify_err(&r, "contents mismatch at offset %" PRId64, + offset + cmp_offset); + } + + qemu_iovec_destroy(&raw_qiov); + qemu_vfree(buf); + + return ret; +} + +static int coroutine_fn +blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BlkverifyRequest r; + return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true); +} + +static int blkverify_co_flush(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + /* Only flush test file, the raw file is not important */ + return bdrv_co_flush(s->test_file->bs); +} + +static bool blkverify_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ + BDRVBlkverifyState *s = bs->opaque; + + /* + * blkverify quits the whole qemu process if there is a mismatch + * between bs->file->bs and s->test_file->bs. Therefore, we know + * know that both must match bs and we can recurse down to either. + */ + return bdrv_recurse_can_replace(bs->file->bs, to_replace) || + bdrv_recurse_can_replace(s->test_file->bs, to_replace); +} + +static void blkverify_refresh_filename(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + if (bs->file->bs->exact_filename[0] + && s->test_file->bs->exact_filename[0]) + { + int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "blkverify:%s:%s", + bs->file->bs->exact_filename, + s->test_file->bs->exact_filename); + if (ret >= sizeof(bs->exact_filename)) { + /* An overflow makes the filename unusable, so do not report any */ + bs->exact_filename[0] = 0; + } + } +} + +static char *blkverify_dirname(BlockDriverState *bs, Error **errp) +{ + /* In general, there are two BDSs with different dirnames below this one; + * so there is no unique dirname we could return (unless both are equal by + * chance). Therefore, to be consistent, just always return NULL. */ + error_setg(errp, "Cannot generate a base directory for blkverify nodes"); + return NULL; +} + +static BlockDriver bdrv_blkverify = { + .format_name = "blkverify", + .protocol_name = "blkverify", + .instance_size = sizeof(BDRVBlkverifyState), + + .bdrv_parse_filename = blkverify_parse_filename, + .bdrv_file_open = blkverify_open, + .bdrv_close = blkverify_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_getlength = blkverify_getlength, + .bdrv_refresh_filename = blkverify_refresh_filename, + .bdrv_dirname = blkverify_dirname, + + .bdrv_co_preadv = blkverify_co_preadv, + .bdrv_co_pwritev = blkverify_co_pwritev, + .bdrv_co_flush = blkverify_co_flush, + + .is_filter = true, + .bdrv_recurse_can_replace = blkverify_recurse_can_replace, +}; + +static void bdrv_blkverify_init(void) +{ + bdrv_register(&bdrv_blkverify); +} + +block_init(bdrv_blkverify_init); diff --git a/block/block-backend.c b/block/block-backend.c new file mode 100644 index 000000000..e493f1751 --- /dev/null +++ b/block/block-backend.c @@ -0,0 +1,2445 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "block/throttle-groups.h" +#include "hw/qdev-core.h" +#include "sysemu/blockdev.h" +#include "sysemu/runstate.h" +#include "sysemu/sysemu.h" +#include "sysemu/replay.h" +#include "qapi/error.h" +#include "qapi/qapi-events-block.h" +#include "qemu/id.h" +#include "qemu/main-loop.h" +#include "qemu/option.h" +#include "trace.h" +#include "migration/misc.h" + +/* Number of coroutines to reserve per attached device model */ +#define COROUTINE_POOL_RESERVATION 64 + +#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + +static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + +typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + void *opaque; + QLIST_ENTRY(BlockBackendAioNotifier) list; +} BlockBackendAioNotifier; + +struct BlockBackend { + char *name; + int refcnt; + BdrvChild *root; + AioContext *ctx; + DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ + QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ + QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ + BlockBackendPublic public; + + DeviceState *dev; /* attached device model, if any */ + const BlockDevOps *dev_ops; + void *dev_opaque; + + /* the block size for which the guest device expects atomicity */ + int guest_block_size; + + /* If the BDS tree is removed, some of its options are stored here (which + * can be used to restore those options in the new BDS on insert) */ + BlockBackendRootState root_state; + + bool enable_write_cache; + + /* I/O stats (display with "info blockstats"). */ + BlockAcctStats stats; + + BlockdevOnError on_read_error, on_write_error; + bool iostatus_enabled; + BlockDeviceIoStatus iostatus; + + uint64_t perm; + uint64_t shared_perm; + bool disable_perm; + + bool allow_aio_context_change; + bool allow_write_beyond_eof; + + NotifierList remove_bs_notifiers, insert_bs_notifiers; + QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; + + int quiesce_counter; + CoQueue queued_requests; + bool disable_request_queuing; + + VMChangeStateEntry *vmsh; + bool force_allow_inactivate; + + /* Number of in-flight aio requests. BlockDriverState also counts + * in-flight requests but aio requests can exist even when blk->root is + * NULL, so we cannot rely on its counter for that case. + * Accessed with atomic ops. + */ + unsigned int in_flight; +}; + +typedef struct BlockBackendAIOCB { + BlockAIOCB common; + BlockBackend *blk; + int ret; +} BlockBackendAIOCB; + +static const AIOCBInfo block_backend_aiocb_info = { + .get_aio_context = blk_aiocb_get_aio_context, + .aiocb_size = sizeof(BlockBackendAIOCB), +}; + +static void drive_info_del(DriveInfo *dinfo); +static BlockBackend *bdrv_first_blk(BlockDriverState *bs); + +/* All BlockBackends */ +static QTAILQ_HEAD(, BlockBackend) block_backends = + QTAILQ_HEAD_INITIALIZER(block_backends); + +/* All BlockBackends referenced by the monitor and which are iterated through by + * blk_next() */ +static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = + QTAILQ_HEAD_INITIALIZER(monitor_block_backends); + +static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + /* We're not supposed to call this function for root nodes */ + abort(); +} +static void blk_root_drained_begin(BdrvChild *child); +static bool blk_root_drained_poll(BdrvChild *child); +static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); + +static void blk_root_change_media(BdrvChild *child, bool load); +static void blk_root_resize(BdrvChild *child); + +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore); + +static char *blk_root_get_parent_desc(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + char *dev_id; + + if (blk->name) { + return g_strdup(blk->name); + } + + dev_id = blk_get_attached_dev_id(blk); + if (*dev_id) { + return dev_id; + } else { + /* TODO Callback into the BB owner for something more detailed */ + g_free(dev_id); + return g_strdup("a block device"); + } +} + +static const char *blk_root_get_name(BdrvChild *child) +{ + return blk_name(child->opaque); +} + +static void blk_vm_state_changed(void *opaque, int running, RunState state) +{ + Error *local_err = NULL; + BlockBackend *blk = opaque; + + if (state == RUN_STATE_INMIGRATE) { + return; + } + + qemu_del_vm_change_state_handler(blk->vmsh); + blk->vmsh = NULL; + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_report_err(local_err); + } +} + +/* + * Notifies the user of the BlockBackend that migration has completed. qdev + * devices can tighten their permissions in response (specifically revoke + * shared write permissions that we needed for storage migration). + * + * If an error is returned, the VM cannot be allowed to be resumed. + */ +static void blk_root_activate(BdrvChild *child, Error **errp) +{ + BlockBackend *blk = child->opaque; + Error *local_err = NULL; + + if (!blk->disable_perm) { + return; + } + + blk->disable_perm = false; + + blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { + /* Activation can happen when migration process is still active, for + * example when nbd_server_add is called during non-shared storage + * migration. Defer the shared_perm update to migration completion. */ + if (!blk->vmsh) { + blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed, + blk); + } + return; + } + + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } +} + +void blk_set_force_allow_inactivate(BlockBackend *blk) +{ + blk->force_allow_inactivate = true; +} + +static bool blk_can_inactivate(BlockBackend *blk) +{ + /* If it is a guest device, inactivate is ok. */ + if (blk->dev || blk_name(blk)[0]) { + return true; + } + + /* Inactivating means no more writes to the image can be done, + * even if those writes would be changes invisible to the + * guest. For block job BBs that satisfy this, we can just allow + * it. This is the case for mirror job source, which is required + * by libvirt non-shared block migration. */ + if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) { + return true; + } + + return blk->force_allow_inactivate; +} + +static int blk_root_inactivate(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + + if (blk->disable_perm) { + return 0; + } + + if (!blk_can_inactivate(blk)) { + return -EPERM; + } + + blk->disable_perm = true; + if (blk->root) { + bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort); + } + + return 0; +} + +static void blk_root_attach(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + BlockBackendAioNotifier *notifier; + + trace_blk_root_attach(child, blk, child->bs); + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + bdrv_add_aio_context_notifier(child->bs, + notifier->attached_aio_context, + notifier->detach_aio_context, + notifier->opaque); + } +} + +static void blk_root_detach(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + BlockBackendAioNotifier *notifier; + + trace_blk_root_detach(child, blk, child->bs); + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + bdrv_remove_aio_context_notifier(child->bs, + notifier->attached_aio_context, + notifier->detach_aio_context, + notifier->opaque); + } +} + +static const BdrvChildClass child_root = { + .inherit_options = blk_root_inherit_options, + + .change_media = blk_root_change_media, + .resize = blk_root_resize, + .get_name = blk_root_get_name, + .get_parent_desc = blk_root_get_parent_desc, + + .drained_begin = blk_root_drained_begin, + .drained_poll = blk_root_drained_poll, + .drained_end = blk_root_drained_end, + + .activate = blk_root_activate, + .inactivate = blk_root_inactivate, + + .attach = blk_root_attach, + .detach = blk_root_detach, + + .can_set_aio_ctx = blk_root_can_set_aio_ctx, + .set_aio_ctx = blk_root_set_aio_ctx, +}; + +/* + * Create a new BlockBackend with a reference count of one. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions + * to request for a block driver node that is attached to this BlockBackend. + * @shared_perm is a bitmask which describes which permissions may be granted + * to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * + * Return the new BlockBackend on success, null on failure. + */ +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) +{ + BlockBackend *blk; + + blk = g_new0(BlockBackend, 1); + blk->refcnt = 1; + blk->ctx = ctx; + blk->perm = perm; + blk->shared_perm = shared_perm; + blk_set_enable_write_cache(blk, true); + + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + + block_acct_init(&blk->stats); + + qemu_co_queue_init(&blk->queued_requests); + notifier_list_init(&blk->remove_bs_notifiers); + notifier_list_init(&blk->insert_bs_notifiers); + QLIST_INIT(&blk->aio_notifiers); + + QTAILQ_INSERT_TAIL(&block_backends, blk, link); + return blk; +} + +/* + * Create a new BlockBackend connected to an existing BlockDriverState. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the + * permissions to request for @bs that is attached to this + * BlockBackend. @shared_perm is a bitmask which describes which + * permissions may be granted to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * + * Return the new BlockBackend on success, null on failure. + */ +BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp) +{ + BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); + + if (blk_insert_bs(blk, bs, errp) < 0) { + blk_unref(blk); + return NULL; + } + return blk; +} + +/* + * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. + * The new BlockBackend is in the main AioContext. + * + * Just as with bdrv_open(), after having called this function the reference to + * @options belongs to the block layer (even on failure). + * + * TODO: Remove @filename and @flags; it should be possible to specify a whole + * BDS tree just by specifying the @options QDict (or @reference, + * alternatively). At the time of adding this function, this is not possible, + * though, so callers of this function have to be able to specify @filename and + * @flags. + */ +BlockBackend *blk_new_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) +{ + BlockBackend *blk; + BlockDriverState *bs; + uint64_t perm = 0; + + /* blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a concern because the BDS stays private, so we + * just request permission according to the flags. + * + * The exceptions are xen_disk and blockdev_init(); in these cases, the + * caller of blk_new_open() doesn't make use of the permissions, but they + * shouldn't hurt either. We can still share everything here because the + * guest devices will add their own blockers if they can't share. */ + if ((flags & BDRV_O_NO_IO) == 0) { + perm |= BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } + } + if (flags & BDRV_O_RESIZE) { + perm |= BLK_PERM_RESIZE; + } + + blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL); + bs = bdrv_open(filename, reference, options, flags, errp); + if (!bs) { + blk_unref(blk); + return NULL; + } + + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->ctx, perm, BLK_PERM_ALL, blk, errp); + if (!blk->root) { + blk_unref(blk); + return NULL; + } + + return blk; +} + +static void blk_delete(BlockBackend *blk) +{ + assert(!blk->refcnt); + assert(!blk->name); + assert(!blk->dev); + if (blk->public.throttle_group_member.throttle_state) { + blk_io_limits_disable(blk); + } + if (blk->root) { + blk_remove_bs(blk); + } + if (blk->vmsh) { + qemu_del_vm_change_state_handler(blk->vmsh); + blk->vmsh = NULL; + } + assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); + assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); + assert(QLIST_EMPTY(&blk->aio_notifiers)); + QTAILQ_REMOVE(&block_backends, blk, link); + drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); + g_free(blk); +} + +static void drive_info_del(DriveInfo *dinfo) +{ + if (!dinfo) { + return; + } + qemu_opts_del(dinfo->opts); + g_free(dinfo); +} + +int blk_get_refcnt(BlockBackend *blk) +{ + return blk ? blk->refcnt : 0; +} + +/* + * Increment @blk's reference count. + * @blk must not be null. + */ +void blk_ref(BlockBackend *blk) +{ + assert(blk->refcnt > 0); + blk->refcnt++; +} + +/* + * Decrement @blk's reference count. + * If this drops it to zero, destroy @blk. + * For convenience, do nothing if @blk is null. + */ +void blk_unref(BlockBackend *blk) +{ + if (blk) { + assert(blk->refcnt > 0); + if (blk->refcnt > 1) { + blk->refcnt--; + } else { + blk_drain(blk); + /* blk_drain() cannot resurrect blk, nobody held a reference */ + assert(blk->refcnt == 1); + blk->refcnt = 0; + blk_delete(blk); + } + } +} + +/* + * Behaves similarly to blk_next() but iterates over all BlockBackends, even the + * ones which are hidden (i.e. are not referenced by the monitor). + */ +BlockBackend *blk_all_next(BlockBackend *blk) +{ + return blk ? QTAILQ_NEXT(blk, link) + : QTAILQ_FIRST(&block_backends); +} + +void blk_remove_all_bs(void) +{ + BlockBackend *blk = NULL; + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *ctx = blk_get_aio_context(blk); + + aio_context_acquire(ctx); + if (blk->root) { + blk_remove_bs(blk); + } + aio_context_release(ctx); + } +} + +/* + * Return the monitor-owned BlockBackend after @blk. + * If @blk is null, return the first one. + * Else, return @blk's next sibling, which may be null. + * + * To iterate over all BlockBackends, do + * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + * ... + * } + */ +BlockBackend *blk_next(BlockBackend *blk) +{ + return blk ? QTAILQ_NEXT(blk, monitor_link) + : QTAILQ_FIRST(&monitor_block_backends); +} + +/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by + * the monitor or attached to a BlockBackend */ +BlockDriverState *bdrv_next(BdrvNextIterator *it) +{ + BlockDriverState *bs, *old_bs; + + /* Must be called from the main loop */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + /* First, return all root nodes of BlockBackends. In order to avoid + * returning a BDS twice when multiple BBs refer to it, we only return it + * if the BB is the first one in the parent list of the BDS. */ + if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { + BlockBackend *old_blk = it->blk; + + old_bs = old_blk ? blk_bs(old_blk) : NULL; + + do { + it->blk = blk_all_next(it->blk); + bs = it->blk ? blk_bs(it->blk) : NULL; + } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk)); + + if (it->blk) { + blk_ref(it->blk); + } + blk_unref(old_blk); + + if (bs) { + bdrv_ref(bs); + bdrv_unref(old_bs); + return bs; + } + it->phase = BDRV_NEXT_MONITOR_OWNED; + } else { + old_bs = it->bs; + } + + /* Then return the monitor-owned BDSes without a BB attached. Ignore all + * BDSes that are attached to a BlockBackend here; they have been handled + * by the above block already */ + do { + it->bs = bdrv_next_monitor_owned(it->bs); + bs = it->bs; + } while (bs && bdrv_has_blk(bs)); + + if (bs) { + bdrv_ref(bs); + } + bdrv_unref(old_bs); + + return bs; +} + +static void bdrv_next_reset(BdrvNextIterator *it) +{ + *it = (BdrvNextIterator) { + .phase = BDRV_NEXT_BACKEND_ROOTS, + }; +} + +BlockDriverState *bdrv_first(BdrvNextIterator *it) +{ + bdrv_next_reset(it); + return bdrv_next(it); +} + +/* Must be called when aborting a bdrv_next() iteration before + * bdrv_next() returns NULL */ +void bdrv_next_cleanup(BdrvNextIterator *it) +{ + /* Must be called from the main loop */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { + if (it->blk) { + bdrv_unref(blk_bs(it->blk)); + blk_unref(it->blk); + } + } else { + bdrv_unref(it->bs); + } + + bdrv_next_reset(it); +} + +/* + * Add a BlockBackend into the list of backends referenced by the monitor, with + * the given @name acting as the handle for the monitor. + * Strictly for use by blockdev.c. + * + * @name must not be null or empty. + * + * Returns true on success and false on failure. In the latter case, an Error + * object is returned through @errp. + */ +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) +{ + assert(!blk->name); + assert(name && name[0]); + + if (!id_wellformed(name)) { + error_setg(errp, "Invalid device name"); + return false; + } + if (blk_by_name(name)) { + error_setg(errp, "Device with id '%s' already exists", name); + return false; + } + if (bdrv_find_node(name)) { + error_setg(errp, + "Device name '%s' conflicts with an existing node name", + name); + return false; + } + + blk->name = g_strdup(name); + QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link); + return true; +} + +/* + * Remove a BlockBackend from the list of backends referenced by the monitor. + * Strictly for use by blockdev.c. + */ +void monitor_remove_blk(BlockBackend *blk) +{ + if (!blk->name) { + return; + } + + QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link); + g_free(blk->name); + blk->name = NULL; +} + +/* + * Return @blk's name, a non-null string. + * Returns an empty string iff @blk is not referenced by the monitor. + */ +const char *blk_name(const BlockBackend *blk) +{ + return blk->name ?: ""; +} + +/* + * Return the BlockBackend with name @name if it exists, else null. + * @name must not be null. + */ +BlockBackend *blk_by_name(const char *name) +{ + BlockBackend *blk = NULL; + + assert(name); + while ((blk = blk_next(blk)) != NULL) { + if (!strcmp(name, blk->name)) { + return blk; + } + } + return NULL; +} + +/* + * Return the BlockDriverState attached to @blk if any, else null. + */ +BlockDriverState *blk_bs(BlockBackend *blk) +{ + return blk->root ? blk->root->bs : NULL; +} + +static BlockBackend *bdrv_first_blk(BlockDriverState *bs) +{ + BdrvChild *child; + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (child->klass == &child_root) { + return child->opaque; + } + } + + return NULL; +} + +/* + * Returns true if @bs has an associated BlockBackend. + */ +bool bdrv_has_blk(BlockDriverState *bs) +{ + return bdrv_first_blk(bs) != NULL; +} + +/* + * Returns true if @bs has only BlockBackends as parents. + */ +bool bdrv_is_root_node(BlockDriverState *bs) +{ + BdrvChild *c; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->klass != &child_root) { + return false; + } + } + + return true; +} + +/* + * Return @blk's DriveInfo if any, else null. + */ +DriveInfo *blk_legacy_dinfo(BlockBackend *blk) +{ + return blk->legacy_dinfo; +} + +/* + * Set @blk's DriveInfo to @dinfo, and return it. + * @blk must not have a DriveInfo set already. + * No other BlockBackend may have the same DriveInfo set. + */ +DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) +{ + assert(!blk->legacy_dinfo); + return blk->legacy_dinfo = dinfo; +} + +/* + * Return the BlockBackend with DriveInfo @dinfo. + * It must exist. + */ +BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) +{ + BlockBackend *blk = NULL; + + while ((blk = blk_next(blk)) != NULL) { + if (blk->legacy_dinfo == dinfo) { + return blk; + } + } + abort(); +} + +/* + * Returns a pointer to the publicly accessible fields of @blk. + */ +BlockBackendPublic *blk_get_public(BlockBackend *blk) +{ + return &blk->public; +} + +/* + * Returns a BlockBackend given the associated @public fields. + */ +BlockBackend *blk_by_public(BlockBackendPublic *public) +{ + return container_of(public, BlockBackend, public); +} + +/* + * Disassociates the currently associated BlockDriverState from @blk. + */ +void blk_remove_bs(BlockBackend *blk) +{ + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { + bs = blk_bs(blk); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); + } + + blk_update_root_state(blk); + + /* bdrv_root_unref_child() will cause blk->root to become stale and may + * switch to a completion coroutine later on. Let's drain all I/O here + * to avoid that and a potential QEMU crash. + */ + blk_drain(blk); + root = blk->root; + blk->root = NULL; + bdrv_root_unref_child(root); +} + +/* + * Associates a new BlockDriverState with @blk. + */ +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) +{ + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + bdrv_ref(bs); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->ctx, blk->perm, blk->shared_perm, + blk, errp); + if (blk->root == NULL) { + return -EPERM; + } + + notifier_list_notify(&blk->insert_bs_notifiers, blk); + if (tgm->throttle_state) { + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs)); + } + + return 0; +} + +/* + * Sets the permission bitmasks that the user of the BlockBackend needs. + */ +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + + if (blk->root && !blk->disable_perm) { + ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + blk->perm = perm; + blk->shared_perm = shared_perm; + + return 0; +} + +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) +{ + *perm = blk->perm; + *shared_perm = blk->shared_perm; +} + +/* + * Attach device model @dev to @blk. + * Return 0 on success, -EBUSY when a device model is attached already. + */ +int blk_attach_dev(BlockBackend *blk, DeviceState *dev) +{ + if (blk->dev) { + return -EBUSY; + } + + /* While migration is still incoming, we don't need to apply the + * permissions of guest device BlockBackends. We might still have a block + * job or NBD server writing to the image for storage migration. */ + if (runstate_check(RUN_STATE_INMIGRATE)) { + blk->disable_perm = true; + } + + blk_ref(blk); + blk->dev = dev; + blk_iostatus_reset(blk); + + return 0; +} + +/* + * Detach device model @dev from @blk. + * @dev must be currently attached to @blk. + */ +void blk_detach_dev(BlockBackend *blk, DeviceState *dev) +{ + assert(blk->dev == dev); + blk->dev = NULL; + blk->dev_ops = NULL; + blk->dev_opaque = NULL; + blk->guest_block_size = 512; + blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); + blk_unref(blk); +} + +/* + * Return the device model attached to @blk if any, else null. + */ +DeviceState *blk_get_attached_dev(BlockBackend *blk) +{ + return blk->dev; +} + +/* Return the qdev ID, or if no ID is assigned the QOM path, of the block + * device attached to the BlockBackend. */ +char *blk_get_attached_dev_id(BlockBackend *blk) +{ + DeviceState *dev = blk->dev; + + if (!dev) { + return g_strdup(""); + } else if (dev->id) { + return g_strdup(dev->id); + } + + return object_get_canonical_path(OBJECT(dev)) ?: g_strdup(""); +} + +/* + * Return the BlockBackend which has the device model @dev attached if it + * exists, else null. + * + * @dev must not be null. + */ +BlockBackend *blk_by_dev(void *dev) +{ + BlockBackend *blk = NULL; + + assert(dev != NULL); + while ((blk = blk_all_next(blk)) != NULL) { + if (blk->dev == dev) { + return blk; + } + } + return NULL; +} + +/* + * Set @blk's device model callbacks to @ops. + * @opaque is the opaque argument to pass to the callbacks. + * This is for use by device models. + */ +void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, + void *opaque) +{ + blk->dev_ops = ops; + blk->dev_opaque = opaque; + + /* Are we currently quiesced? Should we enforce this right now? */ + if (blk->quiesce_counter && ops->drained_begin) { + ops->drained_begin(opaque); + } +} + +/* + * Notify @blk's attached device model of media change. + * + * If @load is true, notify of media load. This action can fail, meaning that + * the medium cannot be loaded. @errp is set then. + * + * If @load is false, notify of media eject. This can never fail. + * + * Also send DEVICE_TRAY_MOVED events as appropriate. + */ +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) +{ + if (blk->dev_ops && blk->dev_ops->change_media_cb) { + bool tray_was_open, tray_is_open; + Error *local_err = NULL; + + tray_was_open = blk_dev_is_tray_open(blk); + blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); + if (local_err) { + assert(load == true); + error_propagate(errp, local_err); + return; + } + tray_is_open = blk_dev_is_tray_open(blk); + + if (tray_was_open != tray_is_open) { + char *id = blk_get_attached_dev_id(blk); + qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open); + g_free(id); + } + } +} + +static void blk_root_change_media(BdrvChild *child, bool load) +{ + blk_dev_change_media_cb(child->opaque, load, NULL); +} + +/* + * Does @blk's attached device model have removable media? + * %true if no device model is attached. + */ +bool blk_dev_has_removable_media(BlockBackend *blk) +{ + return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); +} + +/* + * Does @blk's attached device model have a tray? + */ +bool blk_dev_has_tray(BlockBackend *blk) +{ + return blk->dev_ops && blk->dev_ops->is_tray_open; +} + +/* + * Notify @blk's attached device model of a media eject request. + * If @force is true, the medium is about to be yanked out forcefully. + */ +void blk_dev_eject_request(BlockBackend *blk, bool force) +{ + if (blk->dev_ops && blk->dev_ops->eject_request_cb) { + blk->dev_ops->eject_request_cb(blk->dev_opaque, force); + } +} + +/* + * Does @blk's attached device model have a tray, and is it open? + */ +bool blk_dev_is_tray_open(BlockBackend *blk) +{ + if (blk_dev_has_tray(blk)) { + return blk->dev_ops->is_tray_open(blk->dev_opaque); + } + return false; +} + +/* + * Does @blk's attached device model have the medium locked? + * %false if the device model has no such lock. + */ +bool blk_dev_is_medium_locked(BlockBackend *blk) +{ + if (blk->dev_ops && blk->dev_ops->is_medium_locked) { + return blk->dev_ops->is_medium_locked(blk->dev_opaque); + } + return false; +} + +/* + * Notify @blk's attached device model of a backend size change. + */ +static void blk_root_resize(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + + if (blk->dev_ops && blk->dev_ops->resize_cb) { + blk->dev_ops->resize_cb(blk->dev_opaque); + } +} + +void blk_iostatus_enable(BlockBackend *blk) +{ + blk->iostatus_enabled = true; + blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + +/* The I/O status is only enabled if the drive explicitly + * enables it _and_ the VM is configured to stop on errors */ +bool blk_iostatus_is_enabled(const BlockBackend *blk) +{ + return (blk->iostatus_enabled && + (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || + blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || + blk->on_read_error == BLOCKDEV_ON_ERROR_STOP)); +} + +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) +{ + return blk->iostatus; +} + +void blk_iostatus_disable(BlockBackend *blk) +{ + blk->iostatus_enabled = false; +} + +void blk_iostatus_reset(BlockBackend *blk) +{ + if (blk_iostatus_is_enabled(blk)) { + blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + } +} + +void blk_iostatus_set_err(BlockBackend *blk, int error) +{ + assert(blk_iostatus_is_enabled(blk)); + if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } +} + +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) +{ + blk->allow_write_beyond_eof = allow; +} + +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) +{ + blk->allow_aio_context_change = allow; +} + +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) +{ + blk->disable_request_queuing = disable; +} + +static int blk_check_byte_request(BlockBackend *blk, int64_t offset, + size_t size) +{ + int64_t len; + + if (size > INT_MAX) { + return -EIO; + } + + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + if (offset < 0) { + return -EIO; + } + + if (!blk->allow_write_beyond_eof) { + len = blk_getlength(blk); + if (len < 0) { + return len; + } + + if (offset > len || len - offset < size) { + return -EIO; + } + } + + return 0; +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) +{ + assert(blk->in_flight > 0); + + if (blk->quiesce_counter && !blk->disable_request_queuing) { + blk_dec_in_flight(blk); + qemu_co_queue_wait(&blk->queued_requests, NULL); + blk_inc_in_flight(blk); + } +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + int ret; + BlockDriverState *bs; + + blk_wait_while_drained(blk); + + /* Call blk_bs() only after waiting, the graph may have changed */ + bs = blk_bs(blk); + trace_blk_co_preadv(blk, bs, offset, bytes, flags); + + ret = blk_check_byte_request(blk, offset, bytes); + if (ret < 0) { + return ret; + } + + bdrv_inc_in_flight(bs); + + /* throttling disk I/O */ + if (blk->public.throttle_group_member.throttle_state) { + throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, + bytes, false); + } + + ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags); + bdrv_dec_in_flight(bs); + return ret; +} + +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_preadv(blk, offset, bytes, qiov, flags); + blk_dec_in_flight(blk); + + return ret; +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + int ret; + BlockDriverState *bs; + + blk_wait_while_drained(blk); + + /* Call blk_bs() only after waiting, the graph may have changed */ + bs = blk_bs(blk); + trace_blk_co_pwritev(blk, bs, offset, bytes, flags); + + ret = blk_check_byte_request(blk, offset, bytes); + if (ret < 0) { + return ret; + } + + bdrv_inc_in_flight(bs); + /* throttling disk I/O */ + if (blk->public.throttle_group_member.throttle_state) { + throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, + bytes, true); + } + + if (!blk->enable_write_cache) { + flags |= BDRV_REQ_FUA; + } + + ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset, + flags); + bdrv_dec_in_flight(bs); + return ret; +} + +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); + blk_dec_in_flight(blk); + + return ret; +} + +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); +} + +typedef struct BlkRwCo { + BlockBackend *blk; + int64_t offset; + void *iobuf; + int ret; + BdrvRequestFlags flags; +} BlkRwCo; + +static void blk_read_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); + aio_wait_kick(); +} + +static void blk_write_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, + qiov, 0, rwco->flags); + aio_wait_kick(); +} + +static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + int64_t bytes, CoroutineEntry co_entry, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + BlkRwCo rwco = { + .blk = blk, + .offset = offset, + .iobuf = &qiov, + .flags = flags, + .ret = NOT_DONE, + }; + + blk_inc_in_flight(blk); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + co_entry(&rwco); + } else { + Coroutine *co = qemu_coroutine_create(co_entry, &rwco); + bdrv_coroutine_enter(blk_bs(blk), co); + BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); + } + blk_dec_in_flight(blk); + + return rwco.ret; +} + +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + return blk_prw(blk, offset, NULL, bytes, blk_write_entry, + flags | BDRV_REQ_ZERO_WRITE); +} + +int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) +{ + return bdrv_make_zero(blk->root, flags); +} + +void blk_inc_in_flight(BlockBackend *blk) +{ + qatomic_inc(&blk->in_flight); +} + +void blk_dec_in_flight(BlockBackend *blk) +{ + qatomic_dec(&blk->in_flight); + aio_wait_kick(); +} + +static void error_callback_bh(void *opaque) +{ + struct BlockBackendAIOCB *acb = opaque; + + blk_dec_in_flight(acb->blk); + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_unref(acb); +} + +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret) +{ + struct BlockBackendAIOCB *acb; + + blk_inc_in_flight(blk); + acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); + acb->blk = blk; + acb->ret = ret; + + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + error_callback_bh, acb); + return &acb->common; +} + +typedef struct BlkAioEmAIOCB { + BlockAIOCB common; + BlkRwCo rwco; + int bytes; + bool has_returned; +} BlkAioEmAIOCB; + +static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_) +{ + BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common); + + return blk_get_aio_context(acb->rwco.blk); +} + +static const AIOCBInfo blk_aio_em_aiocb_info = { + .aiocb_size = sizeof(BlkAioEmAIOCB), + .get_aio_context = blk_aio_em_aiocb_get_aio_context, +}; + +static void blk_aio_complete(BlkAioEmAIOCB *acb) +{ + if (acb->has_returned) { + acb->common.cb(acb->common.opaque, acb->rwco.ret); + blk_dec_in_flight(acb->rwco.blk); + qemu_aio_unref(acb); + } +} + +static void blk_aio_complete_bh(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + assert(acb->has_returned); + blk_aio_complete(acb); +} + +static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, + void *iobuf, CoroutineEntry co_entry, + BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ + BlkAioEmAIOCB *acb; + Coroutine *co; + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .iobuf = iobuf, + .flags = flags, + .ret = NOT_DONE, + }; + acb->bytes = bytes; + acb->has_returned = false; + + co = qemu_coroutine_create(co_entry, acb); + bdrv_coroutine_enter(blk_bs(blk), co); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +static void blk_aio_read_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + + assert(qiov->size == acb->bytes); + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); + blk_aio_complete(acb); +} + +static void blk_aio_write_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + + assert(!qiov || qiov->size == acb->bytes); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry, + flags | BDRV_REQ_ZERO_WRITE, cb, opaque); +} + +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) +{ + int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0); + if (ret < 0) { + return ret; + } + return count; +} + +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, + BdrvRequestFlags flags) +{ + int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry, + flags); + if (ret < 0) { + return ret; + } + return count; +} + +int64_t blk_getlength(BlockBackend *blk) +{ + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_getlength(blk_bs(blk)); +} + +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) +{ + if (!blk_bs(blk)) { + *nb_sectors_ptr = 0; + } else { + bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr); + } +} + +int64_t blk_nb_sectors(BlockBackend *blk) +{ + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_nb_sectors(blk_bs(blk)); +} + +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, offset, qiov->size, qiov, + blk_aio_read_entry, flags, cb, opaque); +} + +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, offset, qiov->size, qiov, + blk_aio_write_entry, flags, cb, opaque); +} + +void blk_aio_cancel(BlockAIOCB *acb) +{ + bdrv_aio_cancel(acb); +} + +void blk_aio_cancel_async(BlockAIOCB *acb) +{ + bdrv_aio_cancel_async(acb); +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) +{ + blk_wait_while_drained(blk); + + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_co_ioctl(blk_bs(blk), req, buf); +} + +static void blk_ioctl_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); + aio_wait_kick(); +} + +int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) +{ + return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0); +} + +static void blk_aio_ioctl_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn +blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +{ + int ret; + + blk_wait_while_drained(blk); + + ret = blk_check_byte_request(blk, offset, bytes); + if (ret < 0) { + return ret; + } + + return bdrv_co_pdiscard(blk->root, offset, bytes); +} + +static void blk_aio_pdiscard_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, + cb, opaque); +} + +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_pdiscard(blk, offset, bytes); + blk_dec_in_flight(blk); + + return ret; +} + +static void blk_pdiscard_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); + aio_wait_kick(); +} + +int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +{ + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); +} + +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ +static int coroutine_fn blk_do_flush(BlockBackend *blk) +{ + blk_wait_while_drained(blk); + + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_co_flush(blk_bs(blk)); +} + +static void blk_aio_flush_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_do_flush(rwco->blk); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque) +{ + return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); +} + +int coroutine_fn blk_co_flush(BlockBackend *blk) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_flush(blk); + blk_dec_in_flight(blk); + + return ret; +} + +static void blk_flush_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + rwco->ret = blk_do_flush(rwco->blk); + aio_wait_kick(); +} + +int blk_flush(BlockBackend *blk) +{ + return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0); +} + +void blk_drain(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_drained_begin(bs); + } + + /* We may have -ENOMEDIUM completions in flight */ + AIO_WAIT_WHILE(blk_get_aio_context(blk), + qatomic_mb_read(&blk->in_flight) > 0); + + if (bs) { + bdrv_drained_end(bs); + } +} + +void blk_drain_all(void) +{ + BlockBackend *blk = NULL; + + bdrv_drain_all_begin(); + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *ctx = blk_get_aio_context(blk); + + aio_context_acquire(ctx); + + /* We may have -ENOMEDIUM completions in flight */ + AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0); + + aio_context_release(ctx); + } + + bdrv_drain_all_end(); +} + +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) +{ + blk->on_read_error = on_read_error; + blk->on_write_error = on_write_error; +} + +BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) +{ + return is_read ? blk->on_read_error : blk->on_write_error; +} + +BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + int error) +{ + BlockdevOnError on_err = blk_get_on_error(blk, is_read); + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? + BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BLOCK_ERROR_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_AUTO: + default: + abort(); + } +} + +static void send_qmp_error_event(BlockBackend *blk, + BlockErrorAction action, + bool is_read, int error) +{ + IoOperationType optype; + BlockDriverState *bs = blk_bs(blk); + + optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; + qapi_event_send_block_io_error(blk_name(blk), !!bs, + bs ? bdrv_get_node_name(bs) : NULL, optype, + action, blk_iostatus_is_enabled(blk), + error == ENOSPC, strerror(error)); +} + +/* This is done by device models because, while the block layer knows + * about the error, it does not know whether an operation comes from + * the device or the block layer (from a job, for example). + */ +void blk_error_action(BlockBackend *blk, BlockErrorAction action, + bool is_read, int error) +{ + assert(error >= 0); + + if (action == BLOCK_ERROR_ACTION_STOP) { + /* First set the iostatus, so that "info block" returns an iostatus + * that matches the events raised so far (an additional error iostatus + * is fine, but not a lost one). + */ + blk_iostatus_set_err(blk, error); + + /* Then raise the request to stop the VM and the event. + * qemu_system_vmstop_request_prepare has two effects. First, + * it ensures that the STOP event always comes after the + * BLOCK_IO_ERROR event. Second, it ensures that even if management + * can observe the STOP event and do a "cont" before the STOP + * event is issued, the VM will not stop. In this case, vm_start() + * also ensures that the STOP/RESUME pair of events is emitted. + */ + qemu_system_vmstop_request_prepare(); + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else { + send_qmp_error_event(blk, action, is_read, error); + } +} + +/* + * Returns true if the BlockBackend can support taking write permissions + * (because its root node is not read-only). + */ +bool blk_supports_write_perm(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return !bdrv_is_read_only(bs); + } else { + return !blk->root_state.read_only; + } +} + +/* + * Returns true if the BlockBackend can be written to in its current + * configuration (i.e. if write permission have been requested) + */ +bool blk_is_writable(BlockBackend *blk) +{ + return blk->perm & BLK_PERM_WRITE; +} + +bool blk_is_sg(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { + return false; + } + + return bdrv_is_sg(bs); +} + +bool blk_enable_write_cache(BlockBackend *blk) +{ + return blk->enable_write_cache; +} + +void blk_set_enable_write_cache(BlockBackend *blk, bool wce) +{ + blk->enable_write_cache = wce; +} + +void blk_invalidate_cache(BlockBackend *blk, Error **errp) +{ + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { + error_setg(errp, "Device '%s' has no medium", blk->name); + return; + } + + bdrv_invalidate_cache(bs, errp); +} + +bool blk_is_inserted(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + return bs && bdrv_is_inserted(bs); +} + +bool blk_is_available(BlockBackend *blk) +{ + return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk); +} + +void blk_lock_medium(BlockBackend *blk, bool locked) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_lock_medium(bs, locked); + } +} + +void blk_eject(BlockBackend *blk, bool eject_flag) +{ + BlockDriverState *bs = blk_bs(blk); + char *id; + + if (bs) { + bdrv_eject(bs, eject_flag); + } + + /* Whether or not we ejected on the backend, + * the frontend experienced a tray event. */ + id = blk_get_attached_dev_id(blk); + qapi_event_send_device_tray_moved(blk_name(blk), id, + eject_flag); + g_free(id); +} + +int blk_get_flags(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return bdrv_get_flags(bs); + } else { + return blk->root_state.open_flags; + } +} + +/* Returns the minimum request alignment, in bytes; guaranteed nonzero */ +uint32_t blk_get_request_alignment(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; +} + +/* Returns the maximum transfer length, in bytes; guaranteed nonzero */ +uint32_t blk_get_max_transfer(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + uint32_t max = 0; + + if (bs) { + max = bs->bl.max_transfer; + } + return MIN_NON_ZERO(max, INT_MAX); +} + +int blk_get_max_iov(BlockBackend *blk) +{ + return blk->root->bs->bl.max_iov; +} + +void blk_set_guest_block_size(BlockBackend *blk, int align) +{ + blk->guest_block_size = align; +} + +void *blk_try_blockalign(BlockBackend *blk, size_t size) +{ + return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); +} + +void *blk_blockalign(BlockBackend *blk, size_t size) +{ + return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); +} + +bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) +{ + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { + return false; + } + + return bdrv_op_is_blocked(bs, op, errp); +} + +void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_unblock(bs, op, reason); + } +} + +void blk_op_block_all(BlockBackend *blk, Error *reason) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_block_all(bs, reason); + } +} + +void blk_op_unblock_all(BlockBackend *blk, Error *reason) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_unblock_all(bs, reason); + } +} + +AioContext *blk_get_aio_context(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); + assert(ctx == blk->ctx); + } + + return blk->ctx; +} + +static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) +{ + BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb); + return blk_get_aio_context(blk_acb->blk); +} + +static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + bool update_root_node, Error **errp) +{ + BlockDriverState *bs = blk_bs(blk); + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + int ret; + + if (bs) { + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { + return ret; + } + } + if (tgm->throttle_state) { + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } + } + + blk->ctx = new_context; + return 0; +} + +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp) +{ + return blk_do_set_aio_context(blk, new_context, true, errp); +} + +static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockBackend *blk = child->opaque; + + if (blk->allow_aio_context_change) { + return true; + } + + /* Only manually created BlockBackends that are not attached to anything + * can change their AioContext without updating their user. */ + if (!blk->name || blk->dev) { + /* TODO Add BB name/QOM path */ + error_setg(errp, "Cannot change iothread of active block backend"); + return false; + } + + return true; +} + +static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockBackend *blk = child->opaque; + blk_do_set_aio_context(blk, ctx, false, &error_abort); +} + +void blk_add_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque) +{ + BlockBackendAioNotifier *notifier; + BlockDriverState *bs = blk_bs(blk); + + notifier = g_new(BlockBackendAioNotifier, 1); + notifier->attached_aio_context = attached_aio_context; + notifier->detach_aio_context = detach_aio_context; + notifier->opaque = opaque; + QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list); + + if (bs) { + bdrv_add_aio_context_notifier(bs, attached_aio_context, + detach_aio_context, opaque); + } +} + +void blk_remove_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque) +{ + BlockBackendAioNotifier *notifier; + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_remove_aio_context_notifier(bs, attached_aio_context, + detach_aio_context, opaque); + } + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + if (notifier->attached_aio_context == attached_aio_context && + notifier->detach_aio_context == detach_aio_context && + notifier->opaque == opaque) { + QLIST_REMOVE(notifier, list); + g_free(notifier); + return; + } + } + + abort(); +} + +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) +{ + notifier_list_add(&blk->remove_bs_notifiers, notify); +} + +void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) +{ + notifier_list_add(&blk->insert_bs_notifiers, notify); +} + +void blk_io_plug(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_io_plug(bs); + } +} + +void blk_io_unplug(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_io_unplug(bs); + } +} + +BlockAcctStats *blk_get_stats(BlockBackend *blk) +{ + return &blk->stats; +} + +void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque) +{ + return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); +} + +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + return blk_co_pwritev(blk, offset, bytes, NULL, + flags | BDRV_REQ_ZERO_WRITE); +} + +int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + int count) +{ + return blk_prw(blk, offset, (void *) buf, count, blk_write_entry, + BDRV_REQ_WRITE_COMPRESSED); +} + +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) +{ + if (!blk_is_available(blk)) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + + return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); +} + +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size) +{ + int ret; + + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size); + if (ret < 0) { + return ret; + } + + if (ret == size && !blk->enable_write_cache) { + ret = bdrv_flush(blk_bs(blk)); + } + + return ret < 0 ? ret : size; +} + +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) +{ + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_load_vmstate(blk_bs(blk), buf, pos, size); +} + +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) +{ + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_probe_blocksizes(blk_bs(blk), bsz); +} + +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) +{ + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + + return bdrv_probe_geometry(blk_bs(blk), geo); +} + +/* + * Updates the BlockBackendRootState object with data from the currently + * attached BlockDriverState. + */ +void blk_update_root_state(BlockBackend *blk) +{ + assert(blk->root); + + blk->root_state.open_flags = blk->root->bs->open_flags; + blk->root_state.read_only = blk->root->bs->read_only; + blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; +} + +/* + * Returns the detect-zeroes setting to be used for bdrv_open() of a + * BlockDriverState which is supposed to inherit the root state. + */ +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) +{ + return blk->root_state.detect_zeroes; +} + +/* + * Returns the flags to be used for bdrv_open() of a BlockDriverState which is + * supposed to inherit the root state. + */ +int blk_get_open_flags_from_root_state(BlockBackend *blk) +{ + int bs_flags; + + bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; + bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; + + return bs_flags; +} + +BlockBackendRootState *blk_get_root_state(BlockBackend *blk) +{ + return &blk->root_state; +} + +int blk_commit_all(void) +{ + BlockBackend *blk = NULL; + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *aio_context = blk_get_aio_context(blk); + BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); + + aio_context_acquire(aio_context); + if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { + int ret; + + ret = bdrv_commit(unfiltered_bs); + if (ret < 0) { + aio_context_release(aio_context); + return ret; + } + } + aio_context_release(aio_context); + } + return 0; +} + + +/* throttling disk I/O limits */ +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) +{ + throttle_group_config(&blk->public.throttle_group_member, cfg); +} + +void blk_io_limits_disable(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); + } +} + +/* should be called before blk_set_io_limits if a limit is set */ +void blk_io_limits_enable(BlockBackend *blk, const char *group) +{ + assert(!blk->public.throttle_group_member.throttle_state); + throttle_group_register_tgm(&blk->public.throttle_group_member, + group, blk_get_aio_context(blk)); +} + +void blk_io_limits_update_group(BlockBackend *blk, const char *group) +{ + /* this BB is not part of any group */ + if (!blk->public.throttle_group_member.throttle_state) { + return; + } + + /* this BB is a part of the same group than the one we want */ + if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member), + group)) { + return; + } + + /* need to change the group this bs belong to */ + blk_io_limits_disable(blk); + blk_io_limits_enable(blk, group); +} + +static void blk_root_drained_begin(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + + if (++blk->quiesce_counter == 1) { + if (blk->dev_ops && blk->dev_ops->drained_begin) { + blk->dev_ops->drained_begin(blk->dev_opaque); + } + } + + /* Note that blk->root may not be accessible here yet if we are just + * attaching to a BlockDriverState that is drained. Use child instead. */ + + if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { + throttle_group_restart_tgm(tgm); + } +} + +static bool blk_root_drained_poll(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); + return !!blk->in_flight; +} + +static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) +{ + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); + + assert(blk->public.throttle_group_member.io_limits_disabled); + qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled); + + if (--blk->quiesce_counter == 0) { + if (blk->dev_ops && blk->dev_ops->drained_end) { + blk->dev_ops->drained_end(blk->dev_opaque); + } + while (qemu_co_enter_next(&blk->queued_requests, NULL)) { + /* Resume all queued requests */ + } + } +} + +void blk_register_buf(BlockBackend *blk, void *host, size_t size) +{ + bdrv_register_buf(blk_bs(blk), host, size); +} + +void blk_unregister_buf(BlockBackend *blk, void *host) +{ + bdrv_unregister_buf(blk_bs(blk), host); +} + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + int r; + r = blk_check_byte_request(blk_in, off_in, bytes); + if (r) { + return r; + } + r = blk_check_byte_request(blk_out, off_out, bytes); + if (r) { + return r; + } + return bdrv_co_copy_range(blk_in->root, off_in, + blk_out->root, off_out, + bytes, read_flags, write_flags); +} + +const BdrvChild *blk_root(BlockBackend *blk) +{ + return blk->root; +} + +int blk_make_empty(BlockBackend *blk, Error **errp) +{ + if (!blk_is_available(blk)) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + + return bdrv_make_empty(blk->root, errp); +} diff --git a/block/block-copy.c b/block/block-copy.c new file mode 100644 index 000000000..cd9bc47c8 --- /dev/null +++ b/block/block-copy.c @@ -0,0 +1,686 @@ +/* + * block_copy API + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * Vladimir Sementsov-Ogievskiy + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "trace.h" +#include "qapi/error.h" +#include "block/block-copy.h" +#include "sysemu/block-backend.h" +#include "qemu/units.h" +#include "qemu/coroutine.h" +#include "block/aio_task.h" + +#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB) +#define BLOCK_COPY_MAX_BUFFER (1 * MiB) +#define BLOCK_COPY_MAX_MEM (128 * MiB) +#define BLOCK_COPY_MAX_WORKERS 64 + +static coroutine_fn int block_copy_task_entry(AioTask *task); + +typedef struct BlockCopyCallState { + bool failed; + bool error_is_read; +} BlockCopyCallState; + +typedef struct BlockCopyTask { + AioTask task; + + BlockCopyState *s; + BlockCopyCallState *call_state; + int64_t offset; + int64_t bytes; + bool zeroes; + QLIST_ENTRY(BlockCopyTask) list; + CoQueue wait_queue; /* coroutines blocked on this task */ +} BlockCopyTask; + +static int64_t task_end(BlockCopyTask *task) +{ + return task->offset + task->bytes; +} + +typedef struct BlockCopyState { + /* + * BdrvChild objects are not owned or managed by block-copy. They are + * provided by block-copy user and user is responsible for appropriate + * permissions on these children. + */ + BdrvChild *source; + BdrvChild *target; + BdrvDirtyBitmap *copy_bitmap; + int64_t in_flight_bytes; + int64_t cluster_size; + bool use_copy_range; + int64_t copy_size; + uint64_t len; + QLIST_HEAD(, BlockCopyTask) tasks; + + BdrvRequestFlags write_flags; + + /* + * skip_unallocated: + * + * Used by sync=top jobs, which first scan the source node for unallocated + * areas and clear them in the copy_bitmap. During this process, the bitmap + * is thus not fully initialized: It may still have bits set for areas that + * are unallocated and should actually not be copied. + * + * This is indicated by skip_unallocated. + * + * In this case, block_copy() will query the source’s allocation status, + * skip unallocated regions, clear them in the copy_bitmap, and invoke + * block_copy_reset_unallocated() every time it does. + */ + bool skip_unallocated; + + ProgressMeter *progress; + /* progress_bytes_callback: called when some copying progress is done. */ + ProgressBytesCallbackFunc progress_bytes_callback; + void *progress_opaque; + + SharedResource *mem; +} BlockCopyState; + +static BlockCopyTask *find_conflicting_task(BlockCopyState *s, + int64_t offset, int64_t bytes) +{ + BlockCopyTask *t; + + QLIST_FOREACH(t, &s->tasks, list) { + if (offset + bytes > t->offset && offset < t->offset + t->bytes) { + return t; + } + } + + return NULL; +} + +/* + * If there are no intersecting tasks return false. Otherwise, wait for the + * first found intersecting tasks to finish and return true. + */ +static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset, + int64_t bytes) +{ + BlockCopyTask *task = find_conflicting_task(s, offset, bytes); + + if (!task) { + return false; + } + + qemu_co_queue_wait(&task->wait_queue, NULL); + + return true; +} + +/* + * Search for the first dirty area in offset/bytes range and create task at + * the beginning of it. + */ +static BlockCopyTask *block_copy_task_create(BlockCopyState *s, + BlockCopyCallState *call_state, + int64_t offset, int64_t bytes) +{ + BlockCopyTask *task; + + if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap, + offset, offset + bytes, + s->copy_size, &offset, &bytes)) + { + return NULL; + } + + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + bytes = QEMU_ALIGN_UP(bytes, s->cluster_size); + + /* region is dirty, so no existent tasks possible in it */ + assert(!find_conflicting_task(s, offset, bytes)); + + bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); + s->in_flight_bytes += bytes; + + task = g_new(BlockCopyTask, 1); + *task = (BlockCopyTask) { + .task.func = block_copy_task_entry, + .s = s, + .call_state = call_state, + .offset = offset, + .bytes = bytes, + }; + qemu_co_queue_init(&task->wait_queue); + QLIST_INSERT_HEAD(&s->tasks, task, list); + + return task; +} + +/* + * block_copy_task_shrink + * + * Drop the tail of the task to be handled later. Set dirty bits back and + * wake up all tasks waiting for us (may be some of them are not intersecting + * with shrunk task) + */ +static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task, + int64_t new_bytes) +{ + if (new_bytes == task->bytes) { + return; + } + + assert(new_bytes > 0 && new_bytes < task->bytes); + + task->s->in_flight_bytes -= task->bytes - new_bytes; + bdrv_set_dirty_bitmap(task->s->copy_bitmap, + task->offset + new_bytes, task->bytes - new_bytes); + + task->bytes = new_bytes; + qemu_co_queue_restart_all(&task->wait_queue); +} + +static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret) +{ + task->s->in_flight_bytes -= task->bytes; + if (ret < 0) { + bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes); + } + QLIST_REMOVE(task, list); + qemu_co_queue_restart_all(&task->wait_queue); +} + +void block_copy_state_free(BlockCopyState *s) +{ + if (!s) { + return; + } + + bdrv_release_dirty_bitmap(s->copy_bitmap); + shres_destroy(s->mem); + g_free(s); +} + +static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target) +{ + return MIN_NON_ZERO(INT_MAX, + MIN_NON_ZERO(source->bs->bl.max_transfer, + target->bs->bl.max_transfer)); +} + +BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + int64_t cluster_size, + BdrvRequestFlags write_flags, Error **errp) +{ + BlockCopyState *s; + BdrvDirtyBitmap *copy_bitmap; + + copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, + errp); + if (!copy_bitmap) { + return NULL; + } + bdrv_disable_dirty_bitmap(copy_bitmap); + + s = g_new(BlockCopyState, 1); + *s = (BlockCopyState) { + .source = source, + .target = target, + .copy_bitmap = copy_bitmap, + .cluster_size = cluster_size, + .len = bdrv_dirty_bitmap_size(copy_bitmap), + .write_flags = write_flags, + .mem = shres_create(BLOCK_COPY_MAX_MEM), + }; + + if (block_copy_max_transfer(source, target) < cluster_size) { + /* + * copy_range does not respect max_transfer. We don't want to bother + * with requests smaller than block-copy cluster size, so fallback to + * buffered copying (read and write respect max_transfer on their + * behalf). + */ + s->use_copy_range = false; + s->copy_size = cluster_size; + } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) { + /* Compression supports only cluster-size writes and no copy-range. */ + s->use_copy_range = false; + s->copy_size = cluster_size; + } else { + /* + * We enable copy-range, but keep small copy_size, until first + * successful copy_range (look at block_copy_do_copy). + */ + s->use_copy_range = true; + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + } + + QLIST_INIT(&s->tasks); + + return s; +} + +void block_copy_set_progress_callback( + BlockCopyState *s, + ProgressBytesCallbackFunc progress_bytes_callback, + void *progress_opaque) +{ + s->progress_bytes_callback = progress_bytes_callback; + s->progress_opaque = progress_opaque; +} + +void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm) +{ + s->progress = pm; +} + +/* + * Takes ownership of @task + * + * If pool is NULL directly run the task, otherwise schedule it into the pool. + * + * Returns: task.func return code if pool is NULL + * otherwise -ECANCELED if pool status is bad + * otherwise 0 (successfully scheduled) + */ +static coroutine_fn int block_copy_task_run(AioTaskPool *pool, + BlockCopyTask *task) +{ + if (!pool) { + int ret = task->task.func(&task->task); + + g_free(task); + return ret; + } + + aio_task_pool_wait_slot(pool); + if (aio_task_pool_status(pool) < 0) { + co_put_to_shres(task->s->mem, task->bytes); + block_copy_task_end(task, -ECANCELED); + g_free(task); + return -ECANCELED; + } + + aio_task_pool_start_task(pool, &task->task); + + return 0; +} + +/* + * block_copy_do_copy + * + * Do copy of cluster-aligned chunk. Requested region is allowed to exceed + * s->len only to cover last cluster when s->len is not aligned to clusters. + * + * No sync here: nor bitmap neighter intersecting requests handling, only copy. + * + * Returns 0 on success. + */ +static int coroutine_fn block_copy_do_copy(BlockCopyState *s, + int64_t offset, int64_t bytes, + bool zeroes, bool *error_is_read) +{ + int ret; + int64_t nbytes = MIN(offset + bytes, s->len) - offset; + void *bounce_buffer = NULL; + + assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes); + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); + assert(offset < s->len); + assert(offset + bytes <= s->len || + offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); + assert(nbytes < INT_MAX); + + if (zeroes) { + ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & + ~BDRV_REQ_WRITE_COMPRESSED); + if (ret < 0) { + trace_block_copy_write_zeroes_fail(s, offset, ret); + *error_is_read = false; + } + return ret; + } + + if (s->use_copy_range) { + ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, + 0, s->write_flags); + if (ret < 0) { + trace_block_copy_copy_range_fail(s, offset, ret); + s->use_copy_range = false; + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + /* Fallback to read+write with allocated buffer */ + } else { + if (s->use_copy_range) { + /* + * Successful copy-range. Now increase copy_size. copy_range + * does not respect max_transfer (it's a TODO), so we factor + * that in here. + * + * Note: we double-check s->use_copy_range for the case when + * parallel block-copy request unsets it during previous + * bdrv_co_copy_range call. + */ + s->copy_size = + MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), + QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, + s->target), + s->cluster_size)); + } + goto out; + } + } + + /* + * In case of failed copy_range request above, we may proceed with buffered + * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will + * be properly limited, so don't care too much. Moreover the most likely + * case (copy_range is unsupported for the configuration, so the very first + * copy_range request fails) is handled by setting large copy_size only + * after first successful copy_range. + */ + + bounce_buffer = qemu_blockalign(s->source->bs, nbytes); + + ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0); + if (ret < 0) { + trace_block_copy_read_fail(s, offset, ret); + *error_is_read = true; + goto out; + } + + ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer, + s->write_flags); + if (ret < 0) { + trace_block_copy_write_fail(s, offset, ret); + *error_is_read = false; + goto out; + } + +out: + qemu_vfree(bounce_buffer); + + return ret; +} + +static coroutine_fn int block_copy_task_entry(AioTask *task) +{ + BlockCopyTask *t = container_of(task, BlockCopyTask, task); + bool error_is_read = false; + int ret; + + ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, + &error_is_read); + if (ret < 0 && !t->call_state->failed) { + t->call_state->failed = true; + t->call_state->error_is_read = error_is_read; + } else { + progress_work_done(t->s->progress, t->bytes); + t->s->progress_bytes_callback(t->bytes, t->s->progress_opaque); + } + co_put_to_shres(t->s->mem, t->bytes); + block_copy_task_end(t, ret); + + return ret; +} + +static int block_copy_block_status(BlockCopyState *s, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + int64_t num; + BlockDriverState *base; + int ret; + + if (s->skip_unallocated) { + base = bdrv_backing_chain_next(s->source->bs); + } else { + base = NULL; + } + + ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num, + NULL, NULL); + if (ret < 0 || num < s->cluster_size) { + /* + * On error or if failed to obtain large enough chunk just fallback to + * copy one cluster. + */ + num = s->cluster_size; + ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA; + } else if (offset + num == s->len) { + num = QEMU_ALIGN_UP(num, s->cluster_size); + } else { + num = QEMU_ALIGN_DOWN(num, s->cluster_size); + } + + *pnum = num; + return ret; +} + +/* + * Check if the cluster starting at offset is allocated or not. + * return via pnum the number of contiguous clusters sharing this allocation. + */ +static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, + int64_t *pnum) +{ + BlockDriverState *bs = s->source->bs; + int64_t count, total_count = 0; + int64_t bytes = s->len - offset; + int ret; + + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + + while (true) { + ret = bdrv_is_allocated(bs, offset, bytes, &count); + if (ret < 0) { + return ret; + } + + total_count += count; + + if (ret || count == 0) { + /* + * ret: partial segment(s) are considered allocated. + * otherwise: unallocated tail is treated as an entire segment. + */ + *pnum = DIV_ROUND_UP(total_count, s->cluster_size); + return ret; + } + + /* Unallocated segment(s) with uncertain following segment(s) */ + if (total_count >= s->cluster_size) { + *pnum = total_count / s->cluster_size; + return 0; + } + + offset += count; + bytes -= count; + } +} + +/* + * Reset bits in copy_bitmap starting at offset if they represent unallocated + * data in the image. May reset subsequent contiguous bits. + * @return 0 when the cluster at @offset was unallocated, + * 1 otherwise, and -ret on error. + */ +int64_t block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, int64_t *count) +{ + int ret; + int64_t clusters, bytes; + + ret = block_copy_is_cluster_allocated(s, offset, &clusters); + if (ret < 0) { + return ret; + } + + bytes = clusters * s->cluster_size; + + if (!ret) { + bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); + progress_set_remaining(s->progress, + bdrv_get_dirty_count(s->copy_bitmap) + + s->in_flight_bytes); + } + + *count = bytes; + return ret; +} + +/* + * block_copy_dirty_clusters + * + * Copy dirty clusters in @offset/@bytes range. + * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty + * clusters found and -errno on failure. + */ +static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s, + int64_t offset, int64_t bytes, + bool *error_is_read) +{ + int ret = 0; + bool found_dirty = false; + int64_t end = offset + bytes; + AioTaskPool *aio = NULL; + BlockCopyCallState call_state = {false, false}; + + /* + * block_copy() user is responsible for keeping source and target in same + * aio context + */ + assert(bdrv_get_aio_context(s->source->bs) == + bdrv_get_aio_context(s->target->bs)); + + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); + + while (bytes && aio_task_pool_status(aio) == 0) { + BlockCopyTask *task; + int64_t status_bytes; + + task = block_copy_task_create(s, &call_state, offset, bytes); + if (!task) { + /* No more dirty bits in the bitmap */ + trace_block_copy_skip_range(s, offset, bytes); + break; + } + if (task->offset > offset) { + trace_block_copy_skip_range(s, offset, task->offset - offset); + } + + found_dirty = true; + + ret = block_copy_block_status(s, task->offset, task->bytes, + &status_bytes); + assert(ret >= 0); /* never fail */ + if (status_bytes < task->bytes) { + block_copy_task_shrink(task, status_bytes); + } + if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) { + block_copy_task_end(task, 0); + progress_set_remaining(s->progress, + bdrv_get_dirty_count(s->copy_bitmap) + + s->in_flight_bytes); + trace_block_copy_skip_range(s, task->offset, task->bytes); + offset = task_end(task); + bytes = end - offset; + g_free(task); + continue; + } + task->zeroes = ret & BDRV_BLOCK_ZERO; + + trace_block_copy_process(s, task->offset); + + co_get_from_shres(s->mem, task->bytes); + + offset = task_end(task); + bytes = end - offset; + + if (!aio && bytes) { + aio = aio_task_pool_new(BLOCK_COPY_MAX_WORKERS); + } + + ret = block_copy_task_run(aio, task); + if (ret < 0) { + goto out; + } + } + +out: + if (aio) { + aio_task_pool_wait_all(aio); + + /* + * We are not really interested in -ECANCELED returned from + * block_copy_task_run. If it fails, it means some task already failed + * for real reason, let's return first failure. + * Still, assert that we don't rewrite failure by success. + * + * Note: ret may be positive here because of block-status result. + */ + assert(ret >= 0 || aio_task_pool_status(aio) < 0); + ret = aio_task_pool_status(aio); + + aio_task_pool_free(aio); + } + if (error_is_read && ret < 0) { + *error_is_read = call_state.error_is_read; + } + + return ret < 0 ? ret : found_dirty; +} + +/* + * block_copy + * + * Copy requested region, accordingly to dirty bitmap. + * Collaborate with parallel block_copy requests: if they succeed it will help + * us. If they fail, we will retry not-copied regions. So, if we return error, + * it means that some I/O operation failed in context of _this_ block_copy call, + * not some parallel operation. + */ +int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, + bool *error_is_read) +{ + int ret; + + do { + ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read); + + if (ret == 0) { + ret = block_copy_wait_one(s, offset, bytes); + } + + /* + * We retry in two cases: + * 1. Some progress done + * Something was copied, which means that there were yield points + * and some new dirty bits may have appeared (due to failed parallel + * block-copy requests). + * 2. We have waited for some intersecting block-copy request + * It may have failed and produced new dirty bits. + */ + } while (ret > 0); + + return ret; +} + +BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s) +{ + return s->copy_bitmap; +} + +void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip) +{ + s->skip_unallocated = skip; +} diff --git a/block/block-gen.h b/block/block-gen.h new file mode 100644 index 000000000..f80cf4897 --- /dev/null +++ b/block/block-gen.h @@ -0,0 +1,49 @@ +/* + * Block coroutine wrapping core, used by auto-generated block/block-gen.c + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2020 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_BLOCK_GEN_H +#define BLOCK_BLOCK_GEN_H + +#include "block/block_int.h" + +/* Base structure for argument packing structures */ +typedef struct BdrvPollCo { + BlockDriverState *bs; + bool in_progress; + int ret; + Coroutine *co; /* Keep pointer here for debugging */ +} BdrvPollCo; + +static inline int bdrv_poll_co(BdrvPollCo *s) +{ + assert(!qemu_in_coroutine()); + + bdrv_coroutine_enter(s->bs, s->co); + BDRV_POLL_WHILE(s->bs, s->in_progress); + + return s->ret; +} + +#endif /* BLOCK_BLOCK_GEN_H */ diff --git a/block/bochs.c b/block/bochs.c new file mode 100644 index 000000000..2f010ab40 --- /dev/null +++ b/block/bochs.c @@ -0,0 +1,312 @@ +/* + * Block driver for the various disk image formats used by Bochs + * Currently only for "growing" type in read-only mode + * + * Copyright (c) 2005 Alex Beregszaszi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/bswap.h" +#include "qemu/error-report.h" + +/**************************************************************/ + +#define HEADER_MAGIC "Bochs Virtual HD Image" +#define HEADER_VERSION 0x00020000 +#define HEADER_V1 0x00010000 +#define HEADER_SIZE 512 + +#define REDOLOG_TYPE "Redolog" +#define GROWING_TYPE "Growing" + +// not allocated: 0xffffffff + +// always little-endian +struct bochs_header { + char magic[32]; /* "Bochs Virtual HD Image" */ + char type[16]; /* "Redolog" */ + char subtype[16]; /* "Undoable" / "Volatile" / "Growing" */ + uint32_t version; + uint32_t header; /* size of header */ + + uint32_t catalog; /* num of entries */ + uint32_t bitmap; /* bitmap size */ + uint32_t extent; /* extent size */ + + union { + struct { + uint32_t reserved; /* for ??? */ + uint64_t disk; /* disk size */ + char padding[HEADER_SIZE - 64 - 20 - 12]; + } QEMU_PACKED redolog; + struct { + uint64_t disk; /* disk size */ + char padding[HEADER_SIZE - 64 - 20 - 8]; + } QEMU_PACKED redolog_v1; + char padding[HEADER_SIZE - 64 - 20]; + } extra; +} QEMU_PACKED; + +typedef struct BDRVBochsState { + CoMutex lock; + uint32_t *catalog_bitmap; + uint32_t catalog_size; + + uint32_t data_offset; + + uint32_t bitmap_blocks; + uint32_t extent_blocks; + uint32_t extent_size; +} BDRVBochsState; + +static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const struct bochs_header *bochs = (const void *)buf; + + if (buf_size < HEADER_SIZE) + return 0; + + if (!strcmp(bochs->magic, HEADER_MAGIC) && + !strcmp(bochs->type, REDOLOG_TYPE) && + !strcmp(bochs->subtype, GROWING_TYPE) && + ((le32_to_cpu(bochs->version) == HEADER_VERSION) || + (le32_to_cpu(bochs->version) == HEADER_V1))) + return 100; + + return 0; +} + +static int bochs_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBochsState *s = bs->opaque; + uint32_t i; + struct bochs_header bochs; + int ret; + + /* No write support yet */ + ret = bdrv_apply_auto_read_only(bs, NULL, errp); + if (ret < 0) { + return ret; + } + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); + if (ret < 0) { + return ret; + } + + if (strcmp(bochs.magic, HEADER_MAGIC) || + strcmp(bochs.type, REDOLOG_TYPE) || + strcmp(bochs.subtype, GROWING_TYPE) || + ((le32_to_cpu(bochs.version) != HEADER_VERSION) && + (le32_to_cpu(bochs.version) != HEADER_V1))) { + error_setg(errp, "Image not in Bochs format"); + return -EINVAL; + } + + if (le32_to_cpu(bochs.version) == HEADER_V1) { + bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512; + } else { + bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512; + } + + /* Limit to 1M entries to avoid unbounded allocation. This is what is + * needed for the largest image that bximage can create (~8 TB). */ + s->catalog_size = le32_to_cpu(bochs.catalog); + if (s->catalog_size > 0x100000) { + error_setg(errp, "Catalog size is too large"); + return -EFBIG; + } + + s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size); + if (s->catalog_size && s->catalog_bitmap == NULL) { + error_setg(errp, "Could not allocate memory for catalog"); + return -ENOMEM; + } + + ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap, + s->catalog_size * 4); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < s->catalog_size; i++) + le32_to_cpus(&s->catalog_bitmap[i]); + + s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4); + + s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512; + s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512; + + s->extent_size = le32_to_cpu(bochs.extent); + if (s->extent_size < BDRV_SECTOR_SIZE) { + /* bximage actually never creates extents smaller than 4k */ + error_setg(errp, "Extent size must be at least 512"); + ret = -EINVAL; + goto fail; + } else if (!is_power_of_2(s->extent_size)) { + error_setg(errp, "Extent size %" PRIu32 " is not a power of two", + s->extent_size); + ret = -EINVAL; + goto fail; + } else if (s->extent_size > 0x800000) { + error_setg(errp, "Extent size %" PRIu32 " is too large", + s->extent_size); + ret = -EINVAL; + goto fail; + } + + if (s->catalog_size < DIV_ROUND_UP(bs->total_sectors, + s->extent_size / BDRV_SECTOR_SIZE)) + { + error_setg(errp, "Catalog size is too small for this disk size"); + ret = -EINVAL; + goto fail; + } + + qemu_co_mutex_init(&s->lock); + return 0; + +fail: + g_free(s->catalog_bitmap); + return ret; +} + +static void bochs_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */ +} + +static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) +{ + BDRVBochsState *s = bs->opaque; + uint64_t offset = sector_num * 512; + uint64_t extent_index, extent_offset, bitmap_offset; + char bitmap_entry; + int ret; + + // seek to sector + extent_index = offset / s->extent_size; + extent_offset = (offset % s->extent_size) / 512; + + if (s->catalog_bitmap[extent_index] == 0xffffffff) { + return 0; /* not allocated */ + } + + bitmap_offset = s->data_offset + + (512 * (uint64_t) s->catalog_bitmap[extent_index] * + (s->extent_blocks + s->bitmap_blocks)); + + /* read in bitmap for current extent */ + ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), + &bitmap_entry, 1); + if (ret < 0) { + return ret; + } + + if (!((bitmap_entry >> (extent_offset % 8)) & 1)) { + return 0; /* not allocated */ + } + + return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset)); +} + +static int coroutine_fn +bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVBochsState *s = bs->opaque; + uint64_t sector_num = offset >> BDRV_SECTOR_BITS; + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + uint64_t bytes_done = 0; + QEMUIOVector local_qiov; + int ret; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_co_mutex_lock(&s->lock); + + while (nb_sectors > 0) { + int64_t block_offset = seek_to_sector(bs, sector_num); + if (block_offset < 0) { + ret = block_offset; + goto fail; + } + + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512); + + if (block_offset > 0) { + ret = bdrv_co_preadv(bs->file, block_offset, 512, + &local_qiov, 0); + if (ret < 0) { + goto fail; + } + } else { + qemu_iovec_memset(&local_qiov, 0, 0, 512); + } + nb_sectors--; + sector_num++; + bytes_done += 512; + } + + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&local_qiov); + + return ret; +} + +static void bochs_close(BlockDriverState *bs) +{ + BDRVBochsState *s = bs->opaque; + g_free(s->catalog_bitmap); +} + +static BlockDriver bdrv_bochs = { + .format_name = "bochs", + .instance_size = sizeof(BDRVBochsState), + .bdrv_probe = bochs_probe, + .bdrv_open = bochs_open, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_refresh_limits = bochs_refresh_limits, + .bdrv_co_preadv = bochs_co_preadv, + .bdrv_close = bochs_close, + .is_format = true, +}; + +static void bdrv_bochs_init(void) +{ + bdrv_register(&bdrv_bochs); +} + +block_init(bdrv_bochs_init); diff --git a/block/cloop.c b/block/cloop.c new file mode 100644 index 000000000..c99192a57 --- /dev/null +++ b/block/cloop.c @@ -0,0 +1,308 @@ +/* + * QEMU Block driver for CLOOP images + * + * Copyright (c) 2004 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/bswap.h" +#include + +/* Maximum compressed block size */ +#define MAX_BLOCK_SIZE (64 * 1024 * 1024) + +typedef struct BDRVCloopState { + CoMutex lock; + uint32_t block_size; + uint32_t n_blocks; + uint64_t *offsets; + uint32_t sectors_per_block; + uint32_t current_block; + uint8_t *compressed_block; + uint8_t *uncompressed_block; + z_stream zstream; +} BDRVCloopState; + +static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const char *magic_version_2_0 = "#!/bin/sh\n" + "#V2.0 Format\n" + "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n"; + int length = strlen(magic_version_2_0); + if (length > buf_size) { + length = buf_size; + } + if (!memcmp(magic_version_2_0, buf, length)) { + return 2; + } + return 0; +} + +static int cloop_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVCloopState *s = bs->opaque; + uint32_t offsets_size, max_compressed_block_size = 1, i; + int ret; + + ret = bdrv_apply_auto_read_only(bs, NULL, errp); + if (ret < 0) { + return ret; + } + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + /* read header */ + ret = bdrv_pread(bs->file, 128, &s->block_size, 4); + if (ret < 0) { + return ret; + } + s->block_size = be32_to_cpu(s->block_size); + if (s->block_size % 512) { + error_setg(errp, "block_size %" PRIu32 " must be a multiple of 512", + s->block_size); + return -EINVAL; + } + if (s->block_size == 0) { + error_setg(errp, "block_size cannot be zero"); + return -EINVAL; + } + + /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but + * we can accept more. Prevent ridiculous values like 4 GB - 1 since we + * need a buffer this big. + */ + if (s->block_size > MAX_BLOCK_SIZE) { + error_setg(errp, "block_size %" PRIu32 " must be %u MB or less", + s->block_size, + MAX_BLOCK_SIZE / (1024 * 1024)); + return -EINVAL; + } + + ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4); + if (ret < 0) { + return ret; + } + s->n_blocks = be32_to_cpu(s->n_blocks); + + /* read offsets */ + if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) { + /* Prevent integer overflow */ + error_setg(errp, "n_blocks %" PRIu32 " must be %zu or less", + s->n_blocks, + (UINT32_MAX - 1) / sizeof(uint64_t)); + return -EINVAL; + } + offsets_size = (s->n_blocks + 1) * sizeof(uint64_t); + if (offsets_size > 512 * 1024 * 1024) { + /* Prevent ridiculous offsets_size which causes memory allocation to + * fail or overflows bdrv_pread() size. In practice the 512 MB + * offsets[] limit supports 16 TB images at 256 KB block size. + */ + error_setg(errp, "image requires too many offsets, " + "try increasing block size"); + return -EINVAL; + } + + s->offsets = g_try_malloc(offsets_size); + if (s->offsets == NULL) { + error_setg(errp, "Could not allocate offsets table"); + return -ENOMEM; + } + + ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < s->n_blocks + 1; i++) { + uint64_t size; + + s->offsets[i] = be64_to_cpu(s->offsets[i]); + if (i == 0) { + continue; + } + + if (s->offsets[i] < s->offsets[i - 1]) { + error_setg(errp, "offsets not monotonically increasing at " + "index %" PRIu32 ", image file is corrupt", i); + ret = -EINVAL; + goto fail; + } + + size = s->offsets[i] - s->offsets[i - 1]; + + /* Compressed blocks should be smaller than the uncompressed block size + * but maybe compression performed poorly so the compressed block is + * actually bigger. Clamp down on unrealistic values to prevent + * ridiculous s->compressed_block allocation. + */ + if (size > 2 * MAX_BLOCK_SIZE) { + error_setg(errp, "invalid compressed block size at index %" PRIu32 + ", image file is corrupt", i); + ret = -EINVAL; + goto fail; + } + + if (size > max_compressed_block_size) { + max_compressed_block_size = size; + } + } + + /* initialize zlib engine */ + s->compressed_block = g_try_malloc(max_compressed_block_size + 1); + if (s->compressed_block == NULL) { + error_setg(errp, "Could not allocate compressed_block"); + ret = -ENOMEM; + goto fail; + } + + s->uncompressed_block = g_try_malloc(s->block_size); + if (s->uncompressed_block == NULL) { + error_setg(errp, "Could not allocate uncompressed_block"); + ret = -ENOMEM; + goto fail; + } + + if (inflateInit(&s->zstream) != Z_OK) { + ret = -EINVAL; + goto fail; + } + s->current_block = s->n_blocks; + + s->sectors_per_block = s->block_size/512; + bs->total_sectors = s->n_blocks * s->sectors_per_block; + qemu_co_mutex_init(&s->lock); + return 0; + +fail: + g_free(s->offsets); + g_free(s->compressed_block); + g_free(s->uncompressed_block); + return ret; +} + +static void cloop_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */ +} + +static inline int cloop_read_block(BlockDriverState *bs, int block_num) +{ + BDRVCloopState *s = bs->opaque; + + if (s->current_block != block_num) { + int ret; + uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num]; + + ret = bdrv_pread(bs->file, s->offsets[block_num], + s->compressed_block, bytes); + if (ret != bytes) { + return -1; + } + + s->zstream.next_in = s->compressed_block; + s->zstream.avail_in = bytes; + s->zstream.next_out = s->uncompressed_block; + s->zstream.avail_out = s->block_size; + ret = inflateReset(&s->zstream); + if (ret != Z_OK) { + return -1; + } + ret = inflate(&s->zstream, Z_FINISH); + if (ret != Z_STREAM_END || s->zstream.total_out != s->block_size) { + return -1; + } + + s->current_block = block_num; + } + return 0; +} + +static int coroutine_fn +cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVCloopState *s = bs->opaque; + uint64_t sector_num = offset >> BDRV_SECTOR_BITS; + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int ret, i; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_co_mutex_lock(&s->lock); + + for (i = 0; i < nb_sectors; i++) { + void *data; + uint32_t sector_offset_in_block = + ((sector_num + i) % s->sectors_per_block), + block_num = (sector_num + i) / s->sectors_per_block; + if (cloop_read_block(bs, block_num) != 0) { + ret = -EIO; + goto fail; + } + + data = s->uncompressed_block + sector_offset_in_block * 512; + qemu_iovec_from_buf(qiov, i * 512, data, 512); + } + + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static void cloop_close(BlockDriverState *bs) +{ + BDRVCloopState *s = bs->opaque; + g_free(s->offsets); + g_free(s->compressed_block); + g_free(s->uncompressed_block); + inflateEnd(&s->zstream); +} + +static BlockDriver bdrv_cloop = { + .format_name = "cloop", + .instance_size = sizeof(BDRVCloopState), + .bdrv_probe = cloop_probe, + .bdrv_open = cloop_open, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_refresh_limits = cloop_refresh_limits, + .bdrv_co_preadv = cloop_co_preadv, + .bdrv_close = cloop_close, + .is_format = true, +}; + +static void bdrv_cloop_init(void) +{ + bdrv_register(&bdrv_cloop); +} + +block_init(bdrv_cloop_init); diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 000000000..71db7ba74 --- /dev/null +++ b/block/commit.c @@ -0,0 +1,575 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob_int.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/ratelimit.h" +#include "sysemu/block-backend.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +typedef struct CommitBlockJob { + BlockJob common; + BlockDriverState *commit_top_bs; + BlockBackend *top; + BlockBackend *base; + BlockDriverState *base_bs; + BlockDriverState *base_overlay; + BlockdevOnError on_error; + bool base_read_only; + bool chain_frozen; + char *backing_file_str; +} CommitBlockJob; + +static int commit_prepare(Job *job) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + + bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs); + s->chain_frozen = false; + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); + s->base = NULL; + + /* FIXME: bdrv_drop_intermediate treats total failures and partial failures + * identically. Further work is needed to disambiguate these cases. */ + return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs, + s->backing_file_str); +} + +static void commit_abort(Job *job) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + BlockDriverState *top_bs = blk_bs(s->top); + + if (s->chain_frozen) { + bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs); + } + + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ + bdrv_ref(top_bs); + bdrv_ref(s->commit_top_bs); + + if (s->base) { + blk_unref(s->base); + } + + /* free the blockers on the intermediate nodes so that bdrv_replace_nodes + * can succeed */ + block_job_remove_all_bdrv(&s->common); + + /* If bdrv_drop_intermediate() failed (or was not invoked), remove the + * commit filter driver from the backing chain now. Do this as the final + * step so that the 'consistent read' permission can be granted. + * + * XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs, + &error_abort); + + bdrv_unref(s->commit_top_bs); + bdrv_unref(top_bs); +} + +static void commit_clean(Job *job) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_read_only) { + bdrv_reopen_set_read_only(s->base_bs, true, NULL); + } + + g_free(s->backing_file_str); + blk_unref(s->top); +} + +static int coroutine_fn commit_run(Job *job, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + int64_t offset; + uint64_t delay_ns = 0; + int ret = 0; + int64_t n = 0; /* bytes */ + void *buf = NULL; + int64_t len, base_len; + + ret = len = blk_getlength(s->top); + if (len < 0) { + goto out; + } + job_progress_set_remaining(&s->common.job, len); + + ret = base_len = blk_getlength(s->base); + if (base_len < 0) { + goto out; + } + + if (base_len < len) { + ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); + if (ret) { + goto out; + } + } + + buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); + + for (offset = 0; offset < len; offset += n) { + bool copy; + bool error_in_source = true; + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that bdrv_drain_all() returns. + */ + job_sleep_ns(&s->common.job, delay_ns); + if (job_is_cancelled(&s->common.job)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true, + offset, COMMIT_BUFFER_SIZE, &n); + copy = (ret > 0); + trace_commit_one_iteration(s, offset, n, ret); + if (copy) { + assert(n < SIZE_MAX); + + ret = blk_co_pread(s->top, offset, n, buf, 0); + if (ret >= 0) { + ret = blk_co_pwrite(s->base, offset, n, buf, 0); + if (ret < 0) { + error_in_source = false; + } + } + } + if (ret < 0) { + BlockErrorAction action = + block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + goto out; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + job_progress_update(&s->common.job, n); + + if (copy) { + delay_ns = block_job_ratelimit_get_delay(&s->common, n); + } else { + delay_ns = 0; + } + } + + ret = 0; + +out: + qemu_vfree(buf); + + return ret; +} + +static const BlockJobDriver commit_job_driver = { + .job_driver = { + .instance_size = sizeof(CommitBlockJob), + .job_type = JOB_TYPE_COMMIT, + .free = block_job_free, + .user_resume = block_job_user_resume, + .run = commit_run, + .prepare = commit_prepare, + .abort = commit_abort, + .clean = commit_clean + }, +}; + +static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static void bdrv_commit_top_refresh_filename(BlockDriverState *bs) +{ + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->backing->bs->filename); +} + +static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_commit_top = { + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, + .bdrv_child_perm = bdrv_commit_top_child_perm, + + .is_filter = true, +}; + +void commit_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, BlockDriverState *top, + int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp) +{ + CommitBlockJob *s; + BlockDriverState *iter; + BlockDriverState *commit_top_bs = NULL; + BlockDriverState *filtered_base; + Error *local_err = NULL; + int64_t base_size, top_size; + uint64_t base_perms, iter_shared_perms; + int ret; + + assert(top != bs); + if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + base_size = bdrv_getlength(base); + if (base_size < 0) { + error_setg_errno(errp, -base_size, "Could not inquire base image size"); + return; + } + + top_size = bdrv_getlength(top); + if (top_size < 0) { + error_setg_errno(errp, -top_size, "Could not inquire top image size"); + return; + } + + base_perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + if (base_size < top_size) { + base_perms |= BLK_PERM_RESIZE; + } + + s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, + speed, creation_flags, NULL, NULL, errp); + if (!s) { + return; + } + + /* convert base to r/w, if necessary */ + s->base_read_only = bdrv_is_read_only(base); + if (s->base_read_only) { + if (bdrv_reopen_set_read_only(base, false, errp) != 0) { + goto fail; + } + } + + /* Insert commit_top block node above top, so we can block consistent read + * on the backing chain below it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, + errp); + if (commit_top_bs == NULL) { + goto fail; + } + if (!filter_node_name) { + commit_top_bs->implicit = true; + } + + /* So that we can always drop this node */ + commit_top_bs->never_freeze = true; + + commit_top_bs->total_sectors = top->total_sectors; + + bdrv_append(commit_top_bs, top, &local_err); + if (local_err) { + commit_top_bs = NULL; + error_propagate(errp, local_err); + goto fail; + } + + s->commit_top_bs = commit_top_bs; + + /* + * Block all nodes between top and base, because they will + * disappear from the chain after this operation. + * Note that this assumes that the user is fine with removing all + * nodes (including R/W filters) between top and base. Assuring + * this is the responsibility of the interface (i.e. whoever calls + * commit_start()). + */ + s->base_overlay = bdrv_find_overlay(top, base); + assert(s->base_overlay); + + /* + * The topmost node with + * bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base) + */ + filtered_base = bdrv_cow_bs(s->base_overlay); + assert(bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base)); + + /* + * XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves + * at s->base (if writes are blocked for a node, they are also blocked + * for its backing file). The other options would be a second filter + * driver above s->base. + */ + iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE; + + for (iter = top; iter != base; iter = bdrv_filter_or_cow_bs(iter)) { + if (iter == filtered_base) { + /* + * From here on, all nodes are filters on the base. This + * allows us to share BLK_PERM_CONSISTENT_READ. + */ + iter_shared_perms |= BLK_PERM_CONSISTENT_READ; + } + + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { + goto fail; + } + } + + if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { + goto fail; + } + s->chain_frozen = true; + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; + } + + s->base = blk_new(s->common.job.aio_context, + base_perms, + BLK_PERM_CONSISTENT_READ + | BLK_PERM_GRAPH_MOD + | BLK_PERM_WRITE_UNCHANGED); + ret = blk_insert_bs(s->base, base, errp); + if (ret < 0) { + goto fail; + } + blk_set_disable_request_queuing(s->base, true); + s->base_bs = base; + + /* Required permissions are already taken with block_job_add_bdrv() */ + s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL); + ret = blk_insert_bs(s->top, top, errp); + if (ret < 0) { + goto fail; + } + blk_set_disable_request_queuing(s->top, true); + + s->backing_file_str = g_strdup(backing_file_str); + s->on_error = on_error; + + trace_commit_start(bs, base, top, s); + job_start(&s->common.job); + return; + +fail: + if (s->chain_frozen) { + bdrv_unfreeze_backing_chain(commit_top_bs, base); + } + if (s->base) { + blk_unref(s->base); + } + if (s->top) { + blk_unref(s->top); + } + if (s->base_read_only) { + bdrv_reopen_set_read_only(base, true, NULL); + } + job_early_fail(&s->common.job); + /* commit_top_bs has to be replaced after deleting the block job, + * otherwise this would fail because of lack of permissions. */ + if (commit_top_bs) { + bdrv_replace_node(commit_top_bs, top, &error_abort); + } +} + + +#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE) + +/* commit COW file into the raw image */ +int bdrv_commit(BlockDriverState *bs) +{ + BlockBackend *src, *backing; + BlockDriverState *backing_file_bs = NULL; + BlockDriverState *commit_top_bs = NULL; + BlockDriver *drv = bs->drv; + AioContext *ctx; + int64_t offset, length, backing_length; + int ro; + int64_t n; + int ret = 0; + uint8_t *buf = NULL; + Error *local_err = NULL; + + if (!drv) + return -ENOMEDIUM; + + backing_file_bs = bdrv_cow_bs(bs); + + if (!backing_file_bs) { + return -ENOTSUP; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || + bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) + { + return -EBUSY; + } + + ro = backing_file_bs->read_only; + + if (ro) { + if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { + return -EACCES; + } + } + + ctx = bdrv_get_aio_context(bs); + /* WRITE_UNCHANGED is required for bdrv_make_empty() */ + src = blk_new(ctx, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, + BLK_PERM_ALL); + backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + + ret = blk_insert_bs(src, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + + /* Insert commit_top block node above backing, so we can write to it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, + &local_err); + if (commit_top_bs == NULL) { + error_report_err(local_err); + goto ro_cleanup; + } + + bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); + bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); + + ret = blk_insert_bs(backing, backing_file_bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + + length = blk_getlength(src); + if (length < 0) { + ret = length; + goto ro_cleanup; + } + + backing_length = blk_getlength(backing); + if (backing_length < 0) { + ret = backing_length; + goto ro_cleanup; + } + + /* If our top snapshot is larger than the backing file image, + * grow the backing file image if possible. If not possible, + * we must return an error */ + if (length > backing_length) { + ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, + &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + } + + /* blk_try_blockalign() for src will choose an alignment that works for + * backing as well, so no need to compare the alignment manually. */ + buf = blk_try_blockalign(src, COMMIT_BUF_SIZE); + if (buf == NULL) { + ret = -ENOMEM; + goto ro_cleanup; + } + + for (offset = 0; offset < length; offset += n) { + ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n); + if (ret < 0) { + goto ro_cleanup; + } + if (ret) { + ret = blk_pread(src, offset, buf, n); + if (ret < 0) { + goto ro_cleanup; + } + + ret = blk_pwrite(backing, offset, buf, n, 0); + if (ret < 0) { + goto ro_cleanup; + } + } + } + + ret = blk_make_empty(src, NULL); + /* Ignore -ENOTSUP */ + if (ret < 0 && ret != -ENOTSUP) { + goto ro_cleanup; + } + + blk_flush(src); + + /* + * Make sure all data we wrote to the backing device is actually + * stable on disk. + */ + blk_flush(backing); + + ret = 0; +ro_cleanup: + qemu_vfree(buf); + + blk_unref(backing); + if (bdrv_cow_bs(bs) != backing_file_bs) { + bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); + } + bdrv_unref(commit_top_bs); + blk_unref(src); + + if (ro) { + /* ignoring error return here */ + bdrv_reopen_set_read_only(backing_file_bs, true, NULL); + } + + return ret; +} diff --git a/block/copy-on-read.c b/block/copy-on-read.c new file mode 100644 index 000000000..2816e61af --- /dev/null +++ b/block/copy-on-read.c @@ -0,0 +1,158 @@ +/* + * Copy-on-read filter block driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Author: + * Max Reitz + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/module.h" + + +static int cor_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + return 0; +} + + +#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_RESIZE) +#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH) + +static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = perm & PERM_PASSTHROUGH; + *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; + + /* We must not request write permissions for an inactive node, the child + * cannot provide it. */ + if (!(bs->open_flags & BDRV_O_INACTIVE)) { + *nperm |= BLK_PERM_WRITE_UNCHANGED; + } +} + + +static int64_t cor_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + + +static int coroutine_fn cor_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, + flags | BDRV_REQ_COPY_ON_READ); +} + + +static int coroutine_fn cor_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); +} + + +static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + + +static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + + +static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov) +{ + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, + BDRV_REQ_WRITE_COMPRESSED); +} + + +static void cor_eject(BlockDriverState *bs, bool eject_flag) +{ + bdrv_eject(bs->file->bs, eject_flag); +} + + +static void cor_lock_medium(BlockDriverState *bs, bool locked) +{ + bdrv_lock_medium(bs->file->bs, locked); +} + + +static BlockDriver bdrv_copy_on_read = { + .format_name = "copy-on-read", + + .bdrv_open = cor_open, + .bdrv_child_perm = cor_child_perm, + + .bdrv_getlength = cor_getlength, + + .bdrv_co_preadv = cor_co_preadv, + .bdrv_co_pwritev = cor_co_pwritev, + .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, + .bdrv_co_pdiscard = cor_co_pdiscard, + .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed, + + .bdrv_eject = cor_eject, + .bdrv_lock_medium = cor_lock_medium, + + .has_variable_length = true, + .is_filter = true, +}; + +static void bdrv_copy_on_read_init(void) +{ + bdrv_register(&bdrv_copy_on_read); +} + +block_init(bdrv_copy_on_read_init); diff --git a/block/coroutines.h b/block/coroutines.h new file mode 100644 index 000000000..4cfb4946e --- /dev/null +++ b/block/coroutines.h @@ -0,0 +1,69 @@ +/* + * Block layer I/O functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_COROUTINES_INT_H +#define BLOCK_COROUTINES_INT_H + +#include "block/block_int.h" + +int coroutine_fn bdrv_co_check(BlockDriverState *bs, + BdrvCheckResult *res, BdrvCheckMode fix); +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); + +int generated_co_wrapper +bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); +int generated_co_wrapper +bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool include_base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file, + int *depth); +int generated_co_wrapper +bdrv_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool include_base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file, + int *depth); + +int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); +int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); + +#endif /* BLOCK_COROUTINES_INT_H */ diff --git a/block/create.c b/block/create.c new file mode 100644 index 000000000..89812669d --- /dev/null +++ b/block/create.c @@ -0,0 +1,99 @@ +/* + * Block layer code related to image creation + * + * Copyright (c) 2018 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/job.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-block-core.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/clone-visitor.h" +#include "qapi/error.h" + +typedef struct BlockdevCreateJob { + Job common; + BlockDriver *drv; + BlockdevCreateOptions *opts; +} BlockdevCreateJob; + +static int coroutine_fn blockdev_create_run(Job *job, Error **errp) +{ + BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); + int ret; + + job_progress_set_remaining(&s->common, 1); + ret = s->drv->bdrv_co_create(s->opts, errp); + job_progress_update(&s->common, 1); + + qapi_free_BlockdevCreateOptions(s->opts); + + return ret; +} + +static const JobDriver blockdev_create_job_driver = { + .instance_size = sizeof(BlockdevCreateJob), + .job_type = JOB_TYPE_CREATE, + .run = blockdev_create_run, +}; + +void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, + Error **errp) +{ + BlockdevCreateJob *s; + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + + if (!drv) { + error_setg(errp, "Block driver '%s' not found or not supported", fmt); + return; + } + + /* If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. */ + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; + } + + /* Error out if the driver doesn't support .bdrv_co_create */ + if (!drv->bdrv_co_create) { + error_setg(errp, "Driver does not support blockdev-create"); + return; + } + + /* Create the block job */ + /* TODO Running in the main context. Block drivers need to error out or add + * locking when they use a BDS in a different AioContext. */ + s = job_create(job_id, &blockdev_create_job_driver, NULL, + qemu_get_aio_context(), JOB_DEFAULT | JOB_MANUAL_DISMISS, + NULL, NULL, errp); + if (!s) { + return; + } + + s->drv = drv, + s->opts = QAPI_CLONE(BlockdevCreateOptions, options), + + job_start(&s->common); +} diff --git a/block/crypto.c b/block/crypto.c new file mode 100644 index 000000000..aef5a5721 --- /dev/null +++ b/block/crypto.c @@ -0,0 +1,954 @@ +/* + * QEMU block full disk encryption + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" + +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "crypto/block.h" +#include "qapi/opts-visitor.h" +#include "qapi/qapi-visit-crypto.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/cutils.h" +#include "crypto.h" + +typedef struct BlockCrypto BlockCrypto; + +struct BlockCrypto { + QCryptoBlock *block; + bool updating_keys; +}; + + +static int block_crypto_probe_generic(QCryptoBlockFormat format, + const uint8_t *buf, + int buf_size, + const char *filename) +{ + if (qcrypto_block_has_format(format, buf, buf_size)) { + return 100; + } else { + return 0; + } +} + + +static ssize_t block_crypto_read_func(QCryptoBlock *block, + size_t offset, + uint8_t *buf, + size_t buflen, + void *opaque, + Error **errp) +{ + BlockDriverState *bs = opaque; + ssize_t ret; + + ret = bdrv_pread(bs->file, offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return ret; + } + return ret; +} + +static ssize_t block_crypto_write_func(QCryptoBlock *block, + size_t offset, + const uint8_t *buf, + size_t buflen, + void *opaque, + Error **errp) +{ + BlockDriverState *bs = opaque; + ssize_t ret; + + ret = bdrv_pwrite(bs->file, offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write encryption header"); + return ret; + } + return ret; +} + + +struct BlockCryptoCreateData { + BlockBackend *blk; + uint64_t size; + PreallocMode prealloc; +}; + + +static ssize_t block_crypto_create_write_func(QCryptoBlock *block, + size_t offset, + const uint8_t *buf, + size_t buflen, + void *opaque, + Error **errp) +{ + struct BlockCryptoCreateData *data = opaque; + ssize_t ret; + + ret = blk_pwrite(data->blk, offset, buf, buflen, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write encryption header"); + return ret; + } + return ret; +} + +static ssize_t block_crypto_create_init_func(QCryptoBlock *block, + size_t headerlen, + void *opaque, + Error **errp) +{ + struct BlockCryptoCreateData *data = opaque; + Error *local_error = NULL; + int ret; + + if (data->size > INT64_MAX || headerlen > INT64_MAX - data->size) { + ret = -EFBIG; + goto error; + } + + /* User provided size should reflect amount of space made + * available to the guest, so we must take account of that + * which will be used by the crypto header + */ + ret = blk_truncate(data->blk, data->size + headerlen, false, + data->prealloc, 0, &local_error); + + if (ret >= 0) { + return ret; + } + +error: + if (ret == -EFBIG) { + /* Replace the error message with a better one */ + error_free(local_error); + error_setg(errp, "The requested file size is too large"); + } else { + error_propagate(errp, local_error); + } + + return ret; +} + + +static QemuOptsList block_crypto_runtime_opts_luks = { + .name = "crypto", + .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head), + .desc = { + BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""), + { /* end of list */ } + }, +}; + + +static QemuOptsList block_crypto_create_opts_luks = { + .name = "crypto", + .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""), + { /* end of list */ } + }, +}; + + +static QemuOptsList block_crypto_amend_opts_luks = { + .name = "crypto", + .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head), + .desc = { + BLOCK_CRYPTO_OPT_DEF_LUKS_STATE(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""), + { /* end of list */ } + }, +}; + +QCryptoBlockOpenOptions * +block_crypto_open_opts_init(QDict *opts, Error **errp) +{ + Visitor *v; + QCryptoBlockOpenOptions *ret; + + v = qobject_input_visitor_new_flat_confused(opts, errp); + if (!v) { + return NULL; + } + + visit_type_QCryptoBlockOpenOptions(v, NULL, &ret, errp); + + visit_free(v); + return ret; +} + + +QCryptoBlockCreateOptions * +block_crypto_create_opts_init(QDict *opts, Error **errp) +{ + Visitor *v; + QCryptoBlockCreateOptions *ret; + + v = qobject_input_visitor_new_flat_confused(opts, errp); + if (!v) { + return NULL; + } + + visit_type_QCryptoBlockCreateOptions(v, NULL, &ret, errp); + + visit_free(v); + return ret; +} + +QCryptoBlockAmendOptions * +block_crypto_amend_opts_init(QDict *opts, Error **errp) +{ + Visitor *v; + QCryptoBlockAmendOptions *ret; + + v = qobject_input_visitor_new_flat_confused(opts, errp); + if (!v) { + return NULL; + } + + visit_type_QCryptoBlockAmendOptions(v, NULL, &ret, errp); + + visit_free(v); + return ret; +} + + +static int block_crypto_open_generic(QCryptoBlockFormat format, + QemuOptsList *opts_spec, + BlockDriverState *bs, + QDict *options, + int flags, + Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + QemuOpts *opts = NULL; + int ret = -EINVAL; + QCryptoBlockOpenOptions *open_opts = NULL; + unsigned int cflags = 0; + QDict *cryptoopts = NULL; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto cleanup; + } + + cryptoopts = qemu_opts_to_qdict(opts, NULL); + qdict_put_str(cryptoopts, "format", QCryptoBlockFormat_str(format)); + + open_opts = block_crypto_open_opts_init(cryptoopts, errp); + if (!open_opts) { + goto cleanup; + } + + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + crypto->block = qcrypto_block_open(open_opts, NULL, + block_crypto_read_func, + bs, + cflags, + 1, + errp); + + if (!crypto->block) { + ret = -EIO; + goto cleanup; + } + + bs->encrypted = true; + + ret = 0; + cleanup: + qobject_unref(cryptoopts); + qapi_free_QCryptoBlockOpenOptions(open_opts); + return ret; +} + + +static int block_crypto_co_create_generic(BlockDriverState *bs, + int64_t size, + QCryptoBlockCreateOptions *opts, + PreallocMode prealloc, + Error **errp) +{ + int ret; + BlockBackend *blk; + QCryptoBlock *crypto = NULL; + struct BlockCryptoCreateData data; + + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto cleanup; + } + + if (prealloc == PREALLOC_MODE_METADATA) { + prealloc = PREALLOC_MODE_OFF; + } + + data = (struct BlockCryptoCreateData) { + .blk = blk, + .size = size, + .prealloc = prealloc, + }; + + crypto = qcrypto_block_create(opts, NULL, + block_crypto_create_init_func, + block_crypto_create_write_func, + &data, + errp); + + if (!crypto) { + ret = -EIO; + goto cleanup; + } + + ret = 0; + cleanup: + qcrypto_block_free(crypto); + blk_unref(blk); + return ret; +} + +static int coroutine_fn +block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t payload_offset = + qcrypto_block_get_payload_offset(crypto->block); + + if (payload_offset > INT64_MAX - offset) { + error_setg(errp, "The requested file size is too large"); + return -EFBIG; + } + + offset += payload_offset; + + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); +} + +static void block_crypto_close(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + qcrypto_block_free(crypto->block); +} + +static int block_crypto_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + /* nothing needs checking */ + return 0; +} + +/* + * 1 MB bounce buffer gives good performance / memory tradeoff + * when using cache=none|directsync. + */ +#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) + +static coroutine_fn int +block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t cur_bytes; /* number of bytes in current iteration */ + uint64_t bytes_done = 0; + uint8_t *cipher_data = NULL; + QEMUIOVector hd_qiov; + int ret = 0; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); + + qemu_iovec_init(&hd_qiov, qiov->niov); + + /* Bounce buffer because we don't wish to expose cipher text + * in qiov which points to guest memory. + */ + cipher_data = + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, + qiov->size)); + if (cipher_data == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); + + ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); + if (ret < 0) { + goto cleanup; + } + + if (qcrypto_block_decrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { + ret = -EIO; + goto cleanup; + } + + qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); + + bytes -= cur_bytes; + bytes_done += cur_bytes; + } + + cleanup: + qemu_iovec_destroy(&hd_qiov); + qemu_vfree(cipher_data); + + return ret; +} + + +static coroutine_fn int +block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t cur_bytes; /* number of bytes in current iteration */ + uint64_t bytes_done = 0; + uint8_t *cipher_data = NULL; + QEMUIOVector hd_qiov; + int ret = 0; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + + assert(!(flags & ~BDRV_REQ_FUA)); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); + + qemu_iovec_init(&hd_qiov, qiov->niov); + + /* Bounce buffer because we're not permitted to touch + * contents of qiov - it points to guest memory. + */ + cipher_data = + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, + qiov->size)); + if (cipher_data == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); + + qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); + + if (qcrypto_block_encrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { + ret = -EIO; + goto cleanup; + } + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); + + ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, flags); + if (ret < 0) { + goto cleanup; + } + + bytes -= cur_bytes; + bytes_done += cur_bytes; + } + + cleanup: + qemu_iovec_destroy(&hd_qiov); + qemu_vfree(cipher_data); + + return ret; +} + +static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + bs->bl.request_alignment = sector_size; /* No sub-sector I/O */ +} + + +static int64_t block_crypto_getlength(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + int64_t len = bdrv_getlength(bs->file->bs); + + uint64_t offset = qcrypto_block_get_payload_offset(crypto->block); + assert(offset < INT64_MAX); + + if (offset > len) { + return -EIO; + } + + len -= offset; + + return len; +} + + +static BlockMeasureInfo *block_crypto_measure(QemuOpts *opts, + BlockDriverState *in_bs, + Error **errp) +{ + g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL; + Error *local_err = NULL; + BlockMeasureInfo *info; + uint64_t size; + size_t luks_payload_size; + QDict *cryptoopts; + + /* + * Preallocation mode doesn't affect size requirements but we must consume + * the option. + */ + g_free(qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC)); + + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + + if (in_bs) { + int64_t ssize = bdrv_getlength(in_bs); + + if (ssize < 0) { + error_setg_errno(&local_err, -ssize, + "Unable to get image virtual_size"); + goto err; + } + + size = ssize; + } + + cryptoopts = qemu_opts_to_qdict_filtered(opts, NULL, + &block_crypto_create_opts_luks, true); + qdict_put_str(cryptoopts, "format", "luks"); + create_opts = block_crypto_create_opts_init(cryptoopts, &local_err); + qobject_unref(cryptoopts); + if (!create_opts) { + goto err; + } + + if (!qcrypto_block_calculate_payload_offset(create_opts, NULL, + &luks_payload_size, + &local_err)) { + goto err; + } + + /* + * Unallocated blocks are still encrypted so allocation status makes no + * difference to the file size. + */ + info = g_new0(BlockMeasureInfo, 1); + info->fully_allocated = luks_payload_size + size; + info->required = luks_payload_size + size; + return info; + +err: + error_propagate(errp, local_err); + return NULL; +} + + +static int block_crypto_probe_luks(const uint8_t *buf, + int buf_size, + const char *filename) { + return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, + buf, buf_size, filename); +} + +static int block_crypto_open_luks(BlockDriverState *bs, + QDict *options, + int flags, + Error **errp) +{ + return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, + &block_crypto_runtime_opts_luks, + bs, options, flags, errp); +} + +static int coroutine_fn +block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) +{ + BlockdevCreateOptionsLUKS *luks_opts; + BlockDriverState *bs = NULL; + QCryptoBlockCreateOptions create_opts; + PreallocMode preallocation = PREALLOC_MODE_OFF; + int ret; + + assert(create_options->driver == BLOCKDEV_DRIVER_LUKS); + luks_opts = &create_options->u.luks; + + bs = bdrv_open_blockdev_ref(luks_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + create_opts = (QCryptoBlockCreateOptions) { + .format = Q_CRYPTO_BLOCK_FORMAT_LUKS, + .u.luks = *qapi_BlockdevCreateOptionsLUKS_base(luks_opts), + }; + + if (luks_opts->has_preallocation) { + preallocation = luks_opts->preallocation; + } + + ret = block_crypto_co_create_generic(bs, luks_opts->size, &create_opts, + preallocation, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + bdrv_unref(bs); + return ret; +} + +static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + QCryptoBlockCreateOptions *create_opts = NULL; + BlockDriverState *bs = NULL; + QDict *cryptoopts; + PreallocMode prealloc; + char *buf = NULL; + int64_t size; + int ret; + Error *local_err = NULL; + + /* Parse options */ + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + + buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, + PREALLOC_MODE_OFF, &local_err); + g_free(buf); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + cryptoopts = qemu_opts_to_qdict_filtered(opts, NULL, + &block_crypto_create_opts_luks, + true); + + qdict_put_str(cryptoopts, "format", "luks"); + create_opts = block_crypto_create_opts_init(cryptoopts, errp); + if (!create_opts) { + ret = -EINVAL; + goto fail; + } + + /* Create protocol layer */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto fail; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (!bs) { + ret = -EINVAL; + goto fail; + } + + /* Create format layer */ + ret = block_crypto_co_create_generic(bs, size, create_opts, prealloc, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + /* + * If an error occurred, delete 'filename'. Even if the file existed + * beforehand, it has been truncated and corrupted in the process. + */ + if (ret && bs) { + Error *local_delete_err = NULL; + int r_del = bdrv_co_delete_file(bs, &local_delete_err); + /* + * ENOTSUP will happen if the block driver doesn't support + * the 'bdrv_co_delete_file' interface. This is a predictable + * scenario and shouldn't be reported back to the user. + */ + if ((r_del < 0) && (r_del != -ENOTSUP)) { + error_report_err(local_delete_err); + } + } + + bdrv_unref(bs); + qapi_free_QCryptoBlockCreateOptions(create_opts); + qobject_unref(cryptoopts); + return ret; +} + +static int block_crypto_get_info_luks(BlockDriverState *bs, + BlockDriverInfo *bdi) +{ + BlockDriverInfo subbdi; + int ret; + + ret = bdrv_get_info(bs->file->bs, &subbdi); + if (ret != 0) { + return ret; + } + + bdi->cluster_size = subbdi.cluster_size; + + return 0; +} + +static ImageInfoSpecific * +block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + ImageInfoSpecific *spec_info; + QCryptoBlockInfo *info; + + info = qcrypto_block_get_info(crypto->block, errp); + if (!info) { + return NULL; + } + assert(info->format == Q_CRYPTO_BLOCK_FORMAT_LUKS); + + spec_info = g_new(ImageInfoSpecific, 1); + spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS; + spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1); + *spec_info->u.luks.data = info->u.luks; + + /* Blank out pointers we've just stolen to avoid double free */ + memset(&info->u.luks, 0, sizeof(info->u.luks)); + + qapi_free_QCryptoBlockInfo(info); + + return spec_info; +} + +static int +block_crypto_amend_options_generic_luks(BlockDriverState *bs, + QCryptoBlockAmendOptions *amend_options, + bool force, + Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + int ret; + + assert(crypto); + assert(crypto->block); + + /* apply for exclusive read/write permissions to the underlying file*/ + crypto->updating_keys = true; + ret = bdrv_child_refresh_perms(bs, bs->file, errp); + if (ret) { + goto cleanup; + } + + ret = qcrypto_block_amend_options(crypto->block, + block_crypto_read_func, + block_crypto_write_func, + bs, + amend_options, + force, + errp); +cleanup: + /* release exclusive read/write permissions to the underlying file*/ + crypto->updating_keys = false; + bdrv_child_refresh_perms(bs, bs->file, errp); + return ret; +} + +static int +block_crypto_amend_options_luks(BlockDriverState *bs, + QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, + bool force, + Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + QDict *cryptoopts = NULL; + QCryptoBlockAmendOptions *amend_options = NULL; + int ret = -EINVAL; + + assert(crypto); + assert(crypto->block); + + cryptoopts = qemu_opts_to_qdict(opts, NULL); + qdict_put_str(cryptoopts, "format", "luks"); + amend_options = block_crypto_amend_opts_init(cryptoopts, errp); + qobject_unref(cryptoopts); + if (!amend_options) { + goto cleanup; + } + ret = block_crypto_amend_options_generic_luks(bs, amend_options, + force, errp); +cleanup: + qapi_free_QCryptoBlockAmendOptions(amend_options); + return ret; +} + +static int +coroutine_fn block_crypto_co_amend_luks(BlockDriverState *bs, + BlockdevAmendOptions *opts, + bool force, + Error **errp) +{ + QCryptoBlockAmendOptions amend_opts; + + amend_opts = (QCryptoBlockAmendOptions) { + .format = Q_CRYPTO_BLOCK_FORMAT_LUKS, + .u.luks = *qapi_BlockdevAmendOptionsLUKS_base(&opts->u.luks), + }; + return block_crypto_amend_options_generic_luks(bs, &amend_opts, + force, errp); +} + +static void +block_crypto_child_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + + BlockCrypto *crypto = bs->opaque; + + bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared); + + /* + * For backward compatibility, manually share the write + * and resize permission + */ + *nshared |= shared & (BLK_PERM_WRITE | BLK_PERM_RESIZE); + /* + * Since we are not fully a format driver, don't always request + * the read/resize permission but only when explicitly + * requested + */ + *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + *nperm |= perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE); + + /* + * This driver doesn't modify LUKS metadata except + * when updating the encryption slots. + * Thus unlike a proper format driver we don't ask for + * shared write/read permission. However we need it + * when we are updating the keys, to ensure that only we + * have access to the device. + * + * Encryption update will set the crypto->updating_keys + * during that period and refresh permissions + * + */ + if (crypto->updating_keys) { + /* need exclusive write access for header update */ + *nperm |= BLK_PERM_WRITE; + /* unshare read and write permission */ + *nshared &= ~(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE); + } +} + + +static const char *const block_crypto_strong_runtime_opts[] = { + BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET, + + NULL +}; + +static BlockDriver bdrv_crypto_luks = { + .format_name = "luks", + .instance_size = sizeof(BlockCrypto), + .bdrv_probe = block_crypto_probe_luks, + .bdrv_open = block_crypto_open_luks, + .bdrv_close = block_crypto_close, + .bdrv_child_perm = block_crypto_child_perms, + .bdrv_co_create = block_crypto_co_create_luks, + .bdrv_co_create_opts = block_crypto_co_create_opts_luks, + .bdrv_co_truncate = block_crypto_co_truncate, + .create_opts = &block_crypto_create_opts_luks, + .amend_opts = &block_crypto_amend_opts_luks, + + .bdrv_reopen_prepare = block_crypto_reopen_prepare, + .bdrv_refresh_limits = block_crypto_refresh_limits, + .bdrv_co_preadv = block_crypto_co_preadv, + .bdrv_co_pwritev = block_crypto_co_pwritev, + .bdrv_getlength = block_crypto_getlength, + .bdrv_measure = block_crypto_measure, + .bdrv_get_info = block_crypto_get_info_luks, + .bdrv_get_specific_info = block_crypto_get_specific_info_luks, + .bdrv_amend_options = block_crypto_amend_options_luks, + .bdrv_co_amend = block_crypto_co_amend_luks, + + .is_format = true, + + .strong_runtime_opts = block_crypto_strong_runtime_opts, +}; + +static void block_crypto_init(void) +{ + bdrv_register(&bdrv_crypto_luks); +} + +block_init(block_crypto_init); diff --git a/block/crypto.h b/block/crypto.h new file mode 100644 index 000000000..72e792c9a --- /dev/null +++ b/block/crypto.h @@ -0,0 +1,134 @@ +/* + * QEMU block full disk encryption + * + * Copyright (c) 2015-2017 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef BLOCK_CRYPTO_H +#define BLOCK_CRYPTO_H + +#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, \ + .type = QEMU_OPT_STRING, \ + .help = helpstr, \ + } + +#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret" + +#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix) \ + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \ + "ID of the secret that provides the AES encryption key") + +#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg" +#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg" +#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time" +#define BLOCK_CRYPTO_OPT_LUKS_KEYSLOT "keyslot" +#define BLOCK_CRYPTO_OPT_LUKS_STATE "state" +#define BLOCK_CRYPTO_OPT_LUKS_OLD_SECRET "old-secret" +#define BLOCK_CRYPTO_OPT_LUKS_NEW_SECRET "new-secret" + + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix) \ + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \ + "ID of the secret that provides the keyslot passphrase") + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption cipher algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption cipher mode", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of IV generator algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of IV generator hash algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption hash algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME, \ + .type = QEMU_OPT_NUMBER, \ + .help = "Time to spend in PBKDF in milliseconds", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_STATE(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_STATE, \ + .type = QEMU_OPT_STRING, \ + .help = "Select new state of affected keyslots (active/inactive)",\ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_KEYSLOT, \ + .type = QEMU_OPT_NUMBER, \ + .help = "Select a single keyslot to modify explicitly",\ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_OLD_SECRET, \ + .type = QEMU_OPT_STRING, \ + .help = "Select all keyslots that match this password", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_NEW_SECRET, \ + .type = QEMU_OPT_STRING, \ + .help = "New secret to set in the matching keyslots. " \ + "Empty string to erase", \ + } + +QCryptoBlockCreateOptions * +block_crypto_create_opts_init(QDict *opts, Error **errp); + +QCryptoBlockAmendOptions * +block_crypto_amend_opts_init(QDict *opts, Error **errp); + +QCryptoBlockOpenOptions * +block_crypto_open_opts_init(QDict *opts, Error **errp); + +#endif /* BLOCK_CRYPTO_H */ diff --git a/block/curl.c b/block/curl.c new file mode 100644 index 000000000..4f907c47b --- /dev/null +++ b/block/curl.c @@ -0,0 +1,1074 @@ +/* + * QEMU Block driver for CURL images + * + * Copyright (c) 2009 Alexander Graf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "crypto/secret.h" +#include +#include "qemu/cutils.h" +#include "trace.h" + +// #define DEBUG_VERBOSE + +#if LIBCURL_VERSION_NUM >= 0x071000 +/* The multi interface timer callback was introduced in 7.16.0 */ +#define NEED_CURL_TIMER_CALLBACK +#define HAVE_SOCKET_ACTION +#endif + +#ifndef HAVE_SOCKET_ACTION +/* If curl_multi_socket_action isn't available, define it statically here in + * terms of curl_multi_socket. Note that ev_bitmask will be ignored, which is + * less efficient but still safe. */ +static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, + curl_socket_t sockfd, + int ev_bitmask, + int *running_handles) +{ + return curl_multi_socket(multi_handle, sockfd, running_handles); +} +#define curl_multi_socket_action __curl_multi_socket_action +#endif + +#define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \ + CURLPROTO_FTP | CURLPROTO_FTPS) + +#define CURL_NUM_STATES 8 +#define CURL_NUM_ACB 8 +#define CURL_TIMEOUT_MAX 10000 + +#define CURL_BLOCK_OPT_URL "url" +#define CURL_BLOCK_OPT_READAHEAD "readahead" +#define CURL_BLOCK_OPT_SSLVERIFY "sslverify" +#define CURL_BLOCK_OPT_TIMEOUT "timeout" +#define CURL_BLOCK_OPT_COOKIE "cookie" +#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret" +#define CURL_BLOCK_OPT_USERNAME "username" +#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret" +#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username" +#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret" + +#define CURL_BLOCK_OPT_READAHEAD_DEFAULT (256 * 1024) +#define CURL_BLOCK_OPT_SSLVERIFY_DEFAULT true +#define CURL_BLOCK_OPT_TIMEOUT_DEFAULT 5 + +struct BDRVCURLState; +struct CURLState; + +static bool libcurl_initialized; + +typedef struct CURLAIOCB { + Coroutine *co; + QEMUIOVector *qiov; + + uint64_t offset; + uint64_t bytes; + int ret; + + size_t start; + size_t end; +} CURLAIOCB; + +typedef struct CURLSocket { + int fd; + struct CURLState *state; + QLIST_ENTRY(CURLSocket) next; +} CURLSocket; + +typedef struct CURLState +{ + struct BDRVCURLState *s; + CURLAIOCB *acb[CURL_NUM_ACB]; + CURL *curl; + QLIST_HEAD(, CURLSocket) sockets; + char *orig_buf; + uint64_t buf_start; + size_t buf_off; + size_t buf_len; + char range[128]; + char errmsg[CURL_ERROR_SIZE]; + char in_use; +} CURLState; + +typedef struct BDRVCURLState { + CURLM *multi; + QEMUTimer timer; + uint64_t len; + CURLState states[CURL_NUM_STATES]; + char *url; + size_t readahead_size; + bool sslverify; + uint64_t timeout; + char *cookie; + bool accept_range; + AioContext *aio_context; + QemuMutex mutex; + CoQueue free_state_waitq; + char *username; + char *password; + char *proxyusername; + char *proxypassword; +} BDRVCURLState; + +static void curl_clean_state(CURLState *s); +static void curl_multi_do(void *arg); + +#ifdef NEED_CURL_TIMER_CALLBACK +/* Called from curl_multi_do_locked, with s->mutex held. */ +static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) +{ + BDRVCURLState *s = opaque; + + trace_curl_timer_cb(timeout_ms); + if (timeout_ms == -1) { + timer_del(&s->timer); + } else { + int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000; + timer_mod(&s->timer, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns); + } + return 0; +} +#endif + +/* Called from curl_multi_do_locked, with s->mutex held. */ +static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, + void *userp, void *sp) +{ + BDRVCURLState *s; + CURLState *state = NULL; + CURLSocket *socket; + + curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state); + s = state->s; + + QLIST_FOREACH(socket, &state->sockets, next) { + if (socket->fd == fd) { + break; + } + } + if (!socket) { + socket = g_new0(CURLSocket, 1); + socket->fd = fd; + socket->state = state; + QLIST_INSERT_HEAD(&state->sockets, socket, next); + } + + trace_curl_sock_cb(action, (int)fd); + switch (action) { + case CURL_POLL_IN: + aio_set_fd_handler(s->aio_context, fd, false, + curl_multi_do, NULL, NULL, socket); + break; + case CURL_POLL_OUT: + aio_set_fd_handler(s->aio_context, fd, false, + NULL, curl_multi_do, NULL, socket); + break; + case CURL_POLL_INOUT: + aio_set_fd_handler(s->aio_context, fd, false, + curl_multi_do, curl_multi_do, NULL, socket); + break; + case CURL_POLL_REMOVE: + aio_set_fd_handler(s->aio_context, fd, false, + NULL, NULL, NULL, NULL); + break; + } + + if (action == CURL_POLL_REMOVE) { + QLIST_REMOVE(socket, next); + g_free(socket); + } + + return 0; +} + +/* Called from curl_multi_do_locked, with s->mutex held. */ +static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) +{ + BDRVCURLState *s = opaque; + size_t realsize = size * nmemb; + const char *header = (char *)ptr; + const char *end = header + realsize; + const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen(accept_ranges) + && g_ascii_strncasecmp(header, accept_ranges, + strlen(accept_ranges)) == 0) { + + char *p = strchr(header, ':') + 1; + + /* Skip whitespace between the header name and value. */ + while (p < end && *p && g_ascii_isspace(*p)) { + p++; + } + + if (end - p >= strlen(bytes) + && strncmp(p, bytes, strlen(bytes)) == 0) { + + /* Check that there is nothing but whitespace after the value. */ + p += strlen(bytes); + while (p < end && *p && g_ascii_isspace(*p)) { + p++; + } + + if (p == end || !*p) { + s->accept_range = true; + } + } + } + + return realsize; +} + +/* Called from curl_multi_do_locked, with s->mutex held. */ +static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) +{ + CURLState *s = ((CURLState*)opaque); + size_t realsize = size * nmemb; + + trace_curl_read_cb(realsize); + + if (!s || !s->orig_buf) { + goto read_end; + } + + if (s->buf_off >= s->buf_len) { + /* buffer full, read nothing */ + goto read_end; + } + realsize = MIN(realsize, s->buf_len - s->buf_off); + memcpy(s->orig_buf + s->buf_off, ptr, realsize); + s->buf_off += realsize; + +read_end: + /* curl will error out if we do not return this value */ + return size * nmemb; +} + +/* Called with s->mutex held. */ +static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len, + CURLAIOCB *acb) +{ + int i; + uint64_t end = start + len; + uint64_t clamped_end = MIN(end, s->len); + uint64_t clamped_len = clamped_end - start; + + for (i=0; istates[i]; + uint64_t buf_end = (state->buf_start + state->buf_off); + uint64_t buf_fend = (state->buf_start + state->buf_len); + + if (!state->orig_buf) + continue; + if (!state->buf_off) + continue; + + // Does the existing buffer cover our section? + if ((start >= state->buf_start) && + (start <= buf_end) && + (clamped_end >= state->buf_start) && + (clamped_end <= buf_end)) + { + char *buf = state->orig_buf + (start - state->buf_start); + + qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len); + if (clamped_len < len) { + qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len); + } + acb->ret = 0; + return true; + } + + // Wait for unfinished chunks + if (state->in_use && + (start >= state->buf_start) && + (start <= buf_fend) && + (clamped_end >= state->buf_start) && + (clamped_end <= buf_fend)) + { + int j; + + acb->start = start - state->buf_start; + acb->end = acb->start + clamped_len; + + for (j=0; jacb[j]) { + state->acb[j] = acb; + return true; + } + } + } + } + + return false; +} + +/* Called with s->mutex held. */ +static void curl_multi_check_completion(BDRVCURLState *s) +{ + int msgs_in_queue; + + /* Try to find done transfers, so we can free the easy + * handle again. */ + for (;;) { + CURLMsg *msg; + msg = curl_multi_info_read(s->multi, &msgs_in_queue); + + /* Quit when there are no more completions */ + if (!msg) + break; + + if (msg->msg == CURLMSG_DONE) { + int i; + CURLState *state = NULL; + bool error = msg->data.result != CURLE_OK; + + curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, + (char **)&state); + + if (error) { + static int errcount = 100; + + /* Don't lose the original error message from curl, since + * it contains extra data. + */ + if (errcount > 0) { + error_report("curl: %s", state->errmsg); + if (--errcount == 0) { + error_report("curl: further errors suppressed"); + } + } + } + + for (i = 0; i < CURL_NUM_ACB; i++) { + CURLAIOCB *acb = state->acb[i]; + + if (acb == NULL) { + continue; + } + + if (!error) { + /* Assert that we have read all data */ + assert(state->buf_off >= acb->end); + + qemu_iovec_from_buf(acb->qiov, 0, + state->orig_buf + acb->start, + acb->end - acb->start); + + if (acb->end - acb->start < acb->bytes) { + size_t offset = acb->end - acb->start; + qemu_iovec_memset(acb->qiov, offset, 0, + acb->bytes - offset); + } + } + + acb->ret = error ? -EIO : 0; + state->acb[i] = NULL; + qemu_mutex_unlock(&s->mutex); + aio_co_wake(acb->co); + qemu_mutex_lock(&s->mutex); + } + + curl_clean_state(state); + break; + } + } +} + +/* Called with s->mutex held. */ +static void curl_multi_do_locked(CURLSocket *socket) +{ + BDRVCURLState *s = socket->state->s; + int running; + int r; + + if (!s->multi) { + return; + } + + do { + r = curl_multi_socket_action(s->multi, socket->fd, 0, &running); + } while (r == CURLM_CALL_MULTI_PERFORM); +} + +static void curl_multi_do(void *arg) +{ + CURLSocket *socket = arg; + BDRVCURLState *s = socket->state->s; + + qemu_mutex_lock(&s->mutex); + curl_multi_do_locked(socket); + curl_multi_check_completion(s); + qemu_mutex_unlock(&s->mutex); +} + +static void curl_multi_timeout_do(void *arg) +{ +#ifdef NEED_CURL_TIMER_CALLBACK + BDRVCURLState *s = (BDRVCURLState *)arg; + int running; + + if (!s->multi) { + return; + } + + qemu_mutex_lock(&s->mutex); + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); + + curl_multi_check_completion(s); + qemu_mutex_unlock(&s->mutex); +#else + abort(); +#endif +} + +/* Called with s->mutex held. */ +static CURLState *curl_find_state(BDRVCURLState *s) +{ + CURLState *state = NULL; + int i; + + for (i = 0; i < CURL_NUM_STATES; i++) { + if (!s->states[i].in_use) { + state = &s->states[i]; + state->in_use = 1; + break; + } + } + return state; +} + +static int curl_init_state(BDRVCURLState *s, CURLState *state) +{ + if (!state->curl) { + state->curl = curl_easy_init(); + if (!state->curl) { + return -EIO; + } + curl_easy_setopt(state->curl, CURLOPT_URL, s->url); + curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, + (long) s->sslverify); + curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST, + s->sslverify ? 2L : 0L); + if (s->cookie) { + curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); + } + curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout); + curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, + (void *)curl_read_cb); + curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); + curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state); + curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1); + curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg); + curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1); + + if (s->username) { + curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username); + } + if (s->password) { + curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password); + } + if (s->proxyusername) { + curl_easy_setopt(state->curl, + CURLOPT_PROXYUSERNAME, s->proxyusername); + } + if (s->proxypassword) { + curl_easy_setopt(state->curl, + CURLOPT_PROXYPASSWORD, s->proxypassword); + } + + /* Restrict supported protocols to avoid security issues in the more + * obscure protocols. For example, do not allow POP3/SMTP/IMAP see + * CVE-2013-0249. + * + * Restricting protocols is only supported from 7.19.4 upwards. + */ +#if LIBCURL_VERSION_NUM >= 0x071304 + curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS); + curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS); +#endif + +#ifdef DEBUG_VERBOSE + curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1); +#endif + } + + QLIST_INIT(&state->sockets); + state->s = s; + + return 0; +} + +/* Called with s->mutex held. */ +static void curl_clean_state(CURLState *s) +{ + int j; + for (j = 0; j < CURL_NUM_ACB; j++) { + assert(!s->acb[j]); + } + + if (s->s->multi) + curl_multi_remove_handle(s->s->multi, s->curl); + + while (!QLIST_EMPTY(&s->sockets)) { + CURLSocket *socket = QLIST_FIRST(&s->sockets); + + QLIST_REMOVE(socket, next); + g_free(socket); + } + + s->in_use = 0; + + qemu_co_enter_next(&s->s->free_state_waitq, &s->s->mutex); +} + +static void curl_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + qdict_put_str(options, CURL_BLOCK_OPT_URL, filename); +} + +static void curl_detach_aio_context(BlockDriverState *bs) +{ + BDRVCURLState *s = bs->opaque; + int i; + + qemu_mutex_lock(&s->mutex); + for (i = 0; i < CURL_NUM_STATES; i++) { + if (s->states[i].in_use) { + curl_clean_state(&s->states[i]); + } + if (s->states[i].curl) { + curl_easy_cleanup(s->states[i].curl); + s->states[i].curl = NULL; + } + g_free(s->states[i].orig_buf); + s->states[i].orig_buf = NULL; + } + if (s->multi) { + curl_multi_cleanup(s->multi); + s->multi = NULL; + } + qemu_mutex_unlock(&s->mutex); + + timer_del(&s->timer); +} + +static void curl_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVCURLState *s = bs->opaque; + + aio_timer_init(new_context, &s->timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + curl_multi_timeout_do, s); + + assert(!s->multi); + s->multi = curl_multi_init(); + s->aio_context = new_context; + curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); +#ifdef NEED_CURL_TIMER_CALLBACK + curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); + curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); +#endif +} + +static QemuOptsList runtime_opts = { + .name = "curl", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = CURL_BLOCK_OPT_URL, + .type = QEMU_OPT_STRING, + .help = "URL to open", + }, + { + .name = CURL_BLOCK_OPT_READAHEAD, + .type = QEMU_OPT_SIZE, + .help = "Readahead size", + }, + { + .name = CURL_BLOCK_OPT_SSLVERIFY, + .type = QEMU_OPT_BOOL, + .help = "Verify SSL certificate" + }, + { + .name = CURL_BLOCK_OPT_TIMEOUT, + .type = QEMU_OPT_NUMBER, + .help = "Curl timeout" + }, + { + .name = CURL_BLOCK_OPT_COOKIE, + .type = QEMU_OPT_STRING, + .help = "Pass the cookie or list of cookies with each request" + }, + { + .name = CURL_BLOCK_OPT_COOKIE_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as cookie passed with each request" + }, + { + .name = CURL_BLOCK_OPT_USERNAME, + .type = QEMU_OPT_STRING, + .help = "Username for HTTP auth" + }, + { + .name = CURL_BLOCK_OPT_PASSWORD_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as password for HTTP auth", + }, + { + .name = CURL_BLOCK_OPT_PROXY_USERNAME, + .type = QEMU_OPT_STRING, + .help = "Username for HTTP proxy auth" + }, + { + .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as password for HTTP proxy auth", + }, + { /* end of list */ } + }, +}; + + +static int curl_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVCURLState *s = bs->opaque; + CURLState *state = NULL; + QemuOpts *opts; + const char *file; + const char *cookie; + const char *cookie_secret; + double d; + const char *secretid; + const char *protocol_delimiter; + int ret; + + ret = bdrv_apply_auto_read_only(bs, "curl driver does not support writes", + errp); + if (ret < 0) { + return ret; + } + + if (!libcurl_initialized) { + ret = curl_global_init(CURL_GLOBAL_ALL); + if (ret) { + error_setg(errp, "libcurl initialization failed with %d", ret); + return -EIO; + } + libcurl_initialized = true; + } + + qemu_mutex_init(&s->mutex); + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto out_noclean; + } + + s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD, + CURL_BLOCK_OPT_READAHEAD_DEFAULT); + if ((s->readahead_size & 0x1ff) != 0) { + error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512", + s->readahead_size); + goto out_noclean; + } + + s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT, + CURL_BLOCK_OPT_TIMEOUT_DEFAULT); + if (s->timeout > CURL_TIMEOUT_MAX) { + error_setg(errp, "timeout parameter is too large or negative"); + goto out_noclean; + } + + s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, + CURL_BLOCK_OPT_SSLVERIFY_DEFAULT); + + cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE); + cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET); + + if (cookie && cookie_secret) { + error_setg(errp, + "curl driver cannot handle both cookie and cookie secret"); + goto out_noclean; + } + + if (cookie_secret) { + s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp); + if (!s->cookie) { + goto out_noclean; + } + } else { + s->cookie = g_strdup(cookie); + } + + file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL); + if (file == NULL) { + error_setg(errp, "curl block driver requires an 'url' option"); + goto out_noclean; + } + + if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) || + !strstart(protocol_delimiter, "://", NULL)) + { + error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not " + "start with '%s://')", bs->drv->protocol_name, file, + bs->drv->protocol_name); + goto out_noclean; + } + + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); + secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); + + if (secretid) { + s->password = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!s->password) { + goto out_noclean; + } + } + + s->proxyusername = g_strdup( + qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME)); + secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET); + if (secretid) { + s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!s->proxypassword) { + goto out_noclean; + } + } + + trace_curl_open(file); + qemu_co_queue_init(&s->free_state_waitq); + s->aio_context = bdrv_get_aio_context(bs); + s->url = g_strdup(file); + qemu_mutex_lock(&s->mutex); + state = curl_find_state(s); + qemu_mutex_unlock(&s->mutex); + if (!state) { + goto out_noclean; + } + + // Get file size + + if (curl_init_state(s, state) < 0) { + goto out; + } + + s->accept_range = false; + curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1); + curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, + curl_header_cb); + curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s); + if (curl_easy_perform(state->curl)) + goto out; + if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) { + goto out; + } + /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not + * know or the size is zero. From 7.19.4 CURL returns -1 if size is not + * known and zero if it is really zero-length file. */ +#if LIBCURL_VERSION_NUM >= 0x071304 + if (d < 0) { + pstrcpy(state->errmsg, CURL_ERROR_SIZE, + "Server didn't report file size."); + goto out; + } +#else + if (d <= 0) { + pstrcpy(state->errmsg, CURL_ERROR_SIZE, + "Unknown file size or zero-length file."); + goto out; + } +#endif + + s->len = d; + + if ((!strncasecmp(s->url, "http://", strlen("http://")) + || !strncasecmp(s->url, "https://", strlen("https://"))) + && !s->accept_range) { + pstrcpy(state->errmsg, CURL_ERROR_SIZE, + "Server does not support 'range' (byte ranges)."); + goto out; + } + trace_curl_open_size(s->len); + + qemu_mutex_lock(&s->mutex); + curl_clean_state(state); + qemu_mutex_unlock(&s->mutex); + curl_easy_cleanup(state->curl); + state->curl = NULL; + + curl_attach_aio_context(bs, bdrv_get_aio_context(bs)); + + qemu_opts_del(opts); + return 0; + +out: + error_setg(errp, "CURL: Error opening file: %s", state->errmsg); + curl_easy_cleanup(state->curl); + state->curl = NULL; +out_noclean: + qemu_mutex_destroy(&s->mutex); + g_free(s->cookie); + g_free(s->url); + g_free(s->username); + g_free(s->proxyusername); + g_free(s->proxypassword); + qemu_opts_del(opts); + return -EINVAL; +} + +static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) +{ + CURLState *state; + int running; + + BDRVCURLState *s = bs->opaque; + + uint64_t start = acb->offset; + uint64_t end; + + qemu_mutex_lock(&s->mutex); + + // In case we have the requested data already (e.g. read-ahead), + // we can just call the callback and be done. + if (curl_find_buf(s, start, acb->bytes, acb)) { + goto out; + } + + // No cache found, so let's start a new request + for (;;) { + state = curl_find_state(s); + if (state) { + break; + } + qemu_co_queue_wait(&s->free_state_waitq, &s->mutex); + } + + if (curl_init_state(s, state) < 0) { + curl_clean_state(state); + acb->ret = -EIO; + goto out; + } + + acb->start = 0; + acb->end = MIN(acb->bytes, s->len - start); + + state->buf_off = 0; + g_free(state->orig_buf); + state->buf_start = start; + state->buf_len = MIN(acb->end + s->readahead_size, s->len - start); + end = start + state->buf_len - 1; + state->orig_buf = g_try_malloc(state->buf_len); + if (state->buf_len && state->orig_buf == NULL) { + curl_clean_state(state); + acb->ret = -ENOMEM; + goto out; + } + state->acb[0] = acb; + + snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end); + trace_curl_setup_preadv(acb->bytes, start, state->range); + curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range); + + if (curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) { + state->acb[0] = NULL; + acb->ret = -EIO; + + curl_clean_state(state); + goto out; + } + + /* Tell curl it needs to kick things off */ + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); + +out: + qemu_mutex_unlock(&s->mutex); +} + +static int coroutine_fn curl_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + CURLAIOCB acb = { + .co = qemu_coroutine_self(), + .ret = -EINPROGRESS, + .qiov = qiov, + .offset = offset, + .bytes = bytes + }; + + curl_setup_preadv(bs, &acb); + while (acb.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + return acb.ret; +} + +static void curl_close(BlockDriverState *bs) +{ + BDRVCURLState *s = bs->opaque; + + trace_curl_close(); + curl_detach_aio_context(bs); + qemu_mutex_destroy(&s->mutex); + + g_free(s->cookie); + g_free(s->url); + g_free(s->username); + g_free(s->proxyusername); + g_free(s->proxypassword); +} + +static int64_t curl_getlength(BlockDriverState *bs) +{ + BDRVCURLState *s = bs->opaque; + return s->len; +} + +static void curl_refresh_filename(BlockDriverState *bs) +{ + BDRVCURLState *s = bs->opaque; + + /* "readahead" and "timeout" do not change the guest-visible data, + * so ignore them */ + if (s->sslverify != CURL_BLOCK_OPT_SSLVERIFY_DEFAULT || + s->cookie || s->username || s->password || s->proxyusername || + s->proxypassword) + { + return; + } + + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), s->url); +} + + +static const char *const curl_strong_runtime_opts[] = { + CURL_BLOCK_OPT_URL, + CURL_BLOCK_OPT_SSLVERIFY, + CURL_BLOCK_OPT_COOKIE, + CURL_BLOCK_OPT_COOKIE_SECRET, + CURL_BLOCK_OPT_USERNAME, + CURL_BLOCK_OPT_PASSWORD_SECRET, + CURL_BLOCK_OPT_PROXY_USERNAME, + CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET, + + NULL +}; + +static BlockDriver bdrv_http = { + .format_name = "http", + .protocol_name = "http", + + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, + + .bdrv_co_preadv = curl_co_preadv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, + + .bdrv_refresh_filename = curl_refresh_filename, + .strong_runtime_opts = curl_strong_runtime_opts, +}; + +static BlockDriver bdrv_https = { + .format_name = "https", + .protocol_name = "https", + + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, + + .bdrv_co_preadv = curl_co_preadv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, + + .bdrv_refresh_filename = curl_refresh_filename, + .strong_runtime_opts = curl_strong_runtime_opts, +}; + +static BlockDriver bdrv_ftp = { + .format_name = "ftp", + .protocol_name = "ftp", + + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, + + .bdrv_co_preadv = curl_co_preadv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, + + .bdrv_refresh_filename = curl_refresh_filename, + .strong_runtime_opts = curl_strong_runtime_opts, +}; + +static BlockDriver bdrv_ftps = { + .format_name = "ftps", + .protocol_name = "ftps", + + .instance_size = sizeof(BDRVCURLState), + .bdrv_parse_filename = curl_parse_filename, + .bdrv_file_open = curl_open, + .bdrv_close = curl_close, + .bdrv_getlength = curl_getlength, + + .bdrv_co_preadv = curl_co_preadv, + + .bdrv_detach_aio_context = curl_detach_aio_context, + .bdrv_attach_aio_context = curl_attach_aio_context, + + .bdrv_refresh_filename = curl_refresh_filename, + .strong_runtime_opts = curl_strong_runtime_opts, +}; + +static void curl_block_init(void) +{ + bdrv_register(&bdrv_http); + bdrv_register(&bdrv_https); + bdrv_register(&bdrv_ftp); + bdrv_register(&bdrv_ftps); +} + +block_init(curl_block_init); diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c new file mode 100644 index 000000000..c01319b18 --- /dev/null +++ b/block/dirty-bitmap.c @@ -0,0 +1,986 @@ +/* + * Block Dirty Bitmap + * + * Copyright (c) 2016-2017 Red Hat. Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "qemu/main-loop.h" + +struct BdrvDirtyBitmap { + BlockDriverState *bs; + HBitmap *bitmap; /* Dirty bitmap implementation */ + bool busy; /* Bitmap is busy, it can't be used via QMP */ + BdrvDirtyBitmap *successor; /* Anonymous child, if any. */ + char *name; /* Optional non-empty unique ID */ + int64_t size; /* Size of the bitmap, in bytes */ + bool disabled; /* Bitmap is disabled. It ignores all writes to + the device */ + int active_iterators; /* How many iterators are active */ + bool readonly; /* Bitmap is read-only. This field also + prevents the respective image from being + modified (i.e. blocks writes and discards). + Such operations must fail and both the image + and this bitmap must remain unchanged while + this flag is set. */ + bool persistent; /* bitmap must be saved to owner disk image */ + bool inconsistent; /* bitmap is persistent, but inconsistent. + It cannot be used at all in any way, except + a QMP user can remove it. */ + bool skip_store; /* We are either migrating or deleting this + * bitmap; it should not be stored on the next + * inactivation. */ + QLIST_ENTRY(BdrvDirtyBitmap) list; +}; + +struct BdrvDirtyBitmapIter { + HBitmapIter hbi; + BdrvDirtyBitmap *bitmap; +}; + +static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs) +{ + qemu_mutex_lock(&bs->dirty_bitmap_mutex); +} + +static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs) +{ + qemu_mutex_unlock(&bs->dirty_bitmap_mutex); +} + +void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); +} + +void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap) +{ + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called with BQL or dirty_bitmap lock taken. */ +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) +{ + BdrvDirtyBitmap *bm; + + assert(name); + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->name && !strcmp(name, bm->name)) { + return bm; + } + } + return NULL; +} + +/* Called with BQL taken. */ +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp) +{ + int64_t bitmap_size; + BdrvDirtyBitmap *bitmap; + + assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE); + + if (name) { + if (bdrv_find_dirty_bitmap(bs, name)) { + error_setg(errp, "Bitmap already exists: %s", name); + return NULL; + } + if (strlen(name) > BDRV_BITMAP_MAX_NAME_SIZE) { + error_setg(errp, "Bitmap name too long: %s", name); + return NULL; + } + } + bitmap_size = bdrv_getlength(bs); + if (bitmap_size < 0) { + error_setg_errno(errp, -bitmap_size, "could not get length of device"); + errno = -bitmap_size; + return NULL; + } + bitmap = g_new0(BdrvDirtyBitmap, 1); + bitmap->bs = bs; + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity)); + bitmap->size = bitmap_size; + bitmap->name = g_strdup(name); + bitmap->disabled = false; + bdrv_dirty_bitmaps_lock(bs); + QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + bdrv_dirty_bitmaps_unlock(bs); + return bitmap; +} + +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->size; +} + +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->name; +} + +/* Called with BQL taken. */ +bool bdrv_dirty_bitmap_has_successor(BdrvDirtyBitmap *bitmap) +{ + return bitmap->successor; +} + +static bool bdrv_dirty_bitmap_busy(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->busy; +} + +void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bitmap->busy = busy; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called with BQL taken. */ +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) +{ + return !bitmap->disabled; +} + +/** + * bdrv_dirty_bitmap_status: This API is now deprecated. + * Called with BQL taken. + * + * A BdrvDirtyBitmap can be in four possible user-visible states: + * (1) Active: successor is NULL, and disabled is false: full r/w mode + * (2) Disabled: successor is NULL, and disabled is true: qualified r/w mode, + * guest writes are dropped, but monitor writes are possible, + * through commands like merge and clear. + * (3) Frozen: successor is not NULL. + * A frozen bitmap cannot be renamed, deleted, cleared, set, + * enabled, merged to, etc. A frozen bitmap can only abdicate() + * or reclaim(). + * In this state, the anonymous successor bitmap may be either + * Active and recording writes from the guest (e.g. backup jobs), + * or it can be Disabled and not recording writes. + * (4) Locked: Whether Active or Disabled, the user cannot modify this bitmap + * in any way from the monitor. + * (5) Inconsistent: This is a persistent bitmap whose "in use" bit is set, and + * is unusable by QEMU. It can be deleted to remove it from + * the qcow2. + */ +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) +{ + if (bdrv_dirty_bitmap_inconsistent(bitmap)) { + return DIRTY_BITMAP_STATUS_INCONSISTENT; + } else if (bdrv_dirty_bitmap_has_successor(bitmap)) { + return DIRTY_BITMAP_STATUS_FROZEN; + } else if (bdrv_dirty_bitmap_busy(bitmap)) { + return DIRTY_BITMAP_STATUS_LOCKED; + } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { + return DIRTY_BITMAP_STATUS_DISABLED; + } else { + return DIRTY_BITMAP_STATUS_ACTIVE; + } +} + +/* Called with BQL taken. */ +static bool bdrv_dirty_bitmap_recording(BdrvDirtyBitmap *bitmap) +{ + return !bitmap->disabled || (bitmap->successor && + !bitmap->successor->disabled); +} + +int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, + Error **errp) +{ + if ((flags & BDRV_BITMAP_BUSY) && bdrv_dirty_bitmap_busy(bitmap)) { + error_setg(errp, "Bitmap '%s' is currently in use by another" + " operation and cannot be used", bitmap->name); + return -1; + } + + if ((flags & BDRV_BITMAP_RO) && bdrv_dirty_bitmap_readonly(bitmap)) { + error_setg(errp, "Bitmap '%s' is readonly and cannot be modified", + bitmap->name); + return -1; + } + + if ((flags & BDRV_BITMAP_INCONSISTENT) && + bdrv_dirty_bitmap_inconsistent(bitmap)) { + error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used", + bitmap->name); + error_append_hint(errp, "Try block-dirty-bitmap-remove to delete" + " this bitmap from disk"); + return -1; + } + + return 0; +} + +/** + * Create a successor bitmap destined to replace this bitmap after an operation. + * Requires that the bitmap is not marked busy and has no successor. + * The successor will be enabled if the parent bitmap was. + * Called with BQL taken. + */ +int bdrv_dirty_bitmap_create_successor(BdrvDirtyBitmap *bitmap, Error **errp) +{ + uint64_t granularity; + BdrvDirtyBitmap *child; + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY, errp)) { + return -1; + } + if (bdrv_dirty_bitmap_has_successor(bitmap)) { + error_setg(errp, "Cannot create a successor for a bitmap that already " + "has one"); + return -1; + } + + /* Create an anonymous successor */ + granularity = bdrv_dirty_bitmap_granularity(bitmap); + child = bdrv_create_dirty_bitmap(bitmap->bs, granularity, NULL, errp); + if (!child) { + return -1; + } + + /* Successor will be on or off based on our current state. */ + child->disabled = bitmap->disabled; + bitmap->disabled = true; + + /* Install the successor and mark the parent as busy */ + bitmap->successor = child; + bitmap->busy = true; + return 0; +} + +void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap) +{ + bitmap->disabled = false; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap) +{ + assert(bitmap->bs == bitmap->successor->bs); + bdrv_dirty_bitmaps_lock(bitmap->bs); + bdrv_enable_dirty_bitmap_locked(bitmap->successor); + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called within bdrv_dirty_bitmap_lock..unlock and with BQL taken. */ +static void bdrv_release_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap) +{ + assert(!bitmap->active_iterators); + assert(!bdrv_dirty_bitmap_busy(bitmap)); + assert(!bdrv_dirty_bitmap_has_successor(bitmap)); + QLIST_REMOVE(bitmap, list); + hbitmap_free(bitmap->bitmap); + g_free(bitmap->name); + g_free(bitmap); +} + +/** + * For a bitmap with a successor, yield our name to the successor, + * delete the old bitmap, and return a handle to the new bitmap. + * Called with BQL taken. + */ +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BdrvDirtyBitmap *bitmap, + Error **errp) +{ + char *name; + BdrvDirtyBitmap *successor = bitmap->successor; + + if (successor == NULL) { + error_setg(errp, "Cannot relinquish control if " + "there's no successor present"); + return NULL; + } + + name = bitmap->name; + bitmap->name = NULL; + successor->name = name; + bitmap->successor = NULL; + successor->persistent = bitmap->persistent; + bitmap->persistent = false; + bitmap->busy = false; + bdrv_release_dirty_bitmap(bitmap); + + return successor; +} + +/** + * In cases of failure where we can no longer safely delete the parent, + * we may wish to re-join the parent and child/successor. + * The merged parent will be marked as not busy. + * The marged parent will be enabled if and only if the successor was enabled. + * Called within bdrv_dirty_bitmap_lock..unlock and with BQL taken. + */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *successor = parent->successor; + + if (!successor) { + error_setg(errp, "Cannot reclaim a successor when none is present"); + return NULL; + } + + if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) { + error_setg(errp, "Merging of parent and successor bitmap failed"); + return NULL; + } + + parent->disabled = successor->disabled; + parent->busy = false; + bdrv_release_dirty_bitmap_locked(successor); + parent->successor = NULL; + + return parent; +} + +/* Called with BQL taken. */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *ret; + + bdrv_dirty_bitmaps_lock(parent->bs); + ret = bdrv_reclaim_dirty_bitmap_locked(parent, errp); + bdrv_dirty_bitmaps_unlock(parent->bs); + + return ret; +} + +/** + * Truncates _all_ bitmaps attached to a BDS. + * Called with BQL taken. + */ +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) +{ + BdrvDirtyBitmap *bitmap; + + bdrv_dirty_bitmaps_lock(bs); + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + assert(!bdrv_dirty_bitmap_busy(bitmap)); + assert(!bdrv_dirty_bitmap_has_successor(bitmap)); + assert(!bitmap->active_iterators); + hbitmap_truncate(bitmap->bitmap, bytes); + bitmap->size = bytes; + } + bdrv_dirty_bitmaps_unlock(bs); +} + +/* Called with BQL taken. */ +void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + BlockDriverState *bs = bitmap->bs; + + bdrv_dirty_bitmaps_lock(bs); + bdrv_release_dirty_bitmap_locked(bitmap); + bdrv_dirty_bitmaps_unlock(bs); +} + +/** + * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). + * There must not be any busy bitmaps attached. + * This function does not remove persistent bitmaps from the storage. + * Called with BQL taken. + */ +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm, *next; + + bdrv_dirty_bitmaps_lock(bs); + QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { + if (bdrv_dirty_bitmap_name(bm)) { + bdrv_release_dirty_bitmap_locked(bm); + } + } + bdrv_dirty_bitmaps_unlock(bs); +} + +/** + * Remove persistent dirty bitmap from the storage if it exists. + * Absence of bitmap is not an error, because we have the following scenario: + * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no + * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should + * not fail. + * This function doesn't release corresponding BdrvDirtyBitmap. + */ +static int coroutine_fn +bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, + Error **errp) +{ + if (bs->drv && bs->drv->bdrv_co_remove_persistent_dirty_bitmap) { + return bs->drv->bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp); + } + + return 0; +} + +typedef struct BdrvRemovePersistentDirtyBitmapCo { + BlockDriverState *bs; + const char *name; + Error **errp; + int ret; +} BdrvRemovePersistentDirtyBitmapCo; + +static void coroutine_fn +bdrv_co_remove_persistent_dirty_bitmap_entry(void *opaque) +{ + BdrvRemovePersistentDirtyBitmapCo *s = opaque; + + s->ret = bdrv_co_remove_persistent_dirty_bitmap(s->bs, s->name, s->errp); + aio_wait_kick(); +} + +int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, + Error **errp) +{ + if (qemu_in_coroutine()) { + return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp); + } else { + Coroutine *co; + BdrvRemovePersistentDirtyBitmapCo s = { + .bs = bs, + .name = name, + .errp = errp, + .ret = -EINPROGRESS, + }; + + co = qemu_coroutine_create(bdrv_co_remove_persistent_dirty_bitmap_entry, + &s); + bdrv_coroutine_enter(bs, co); + BDRV_POLL_WHILE(bs, s.ret == -EINPROGRESS); + + return s.ret; + } +} + +bool +bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs) +{ + if (bs->drv && bs->drv->bdrv_supports_persistent_dirty_bitmap) { + return bs->drv->bdrv_supports_persistent_dirty_bitmap(bs); + } + return false; +} + +static bool coroutine_fn +bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + error_setg_errno(errp, ENOMEDIUM, + "Can't store persistent bitmaps to %s", + bdrv_get_device_or_node_name(bs)); + return false; + } + + if (!drv->bdrv_co_can_store_new_dirty_bitmap) { + error_setg_errno(errp, ENOTSUP, + "Can't store persistent bitmaps to %s", + bdrv_get_device_or_node_name(bs)); + return false; + } + + return drv->bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); +} + +typedef struct BdrvCanStoreNewDirtyBitmapCo { + BlockDriverState *bs; + const char *name; + uint32_t granularity; + Error **errp; + bool ret; + + bool in_progress; +} BdrvCanStoreNewDirtyBitmapCo; + +static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque) +{ + BdrvCanStoreNewDirtyBitmapCo *s = opaque; + + s->ret = bdrv_co_can_store_new_dirty_bitmap(s->bs, s->name, s->granularity, + s->errp); + s->in_progress = false; + aio_wait_kick(); +} + +bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp) +{ + if (qemu_in_coroutine()) { + return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); + } else { + Coroutine *co; + BdrvCanStoreNewDirtyBitmapCo s = { + .bs = bs, + .name = name, + .granularity = granularity, + .errp = errp, + .in_progress = true, + }; + + co = qemu_coroutine_create(bdrv_co_can_store_new_dirty_bitmap_entry, + &s); + bdrv_coroutine_enter(bs, co); + BDRV_POLL_WHILE(bs, s.in_progress); + + return s.ret; + } +} + +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bitmap->disabled = true; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bdrv_enable_dirty_bitmap_locked(bitmap); + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + BlockDirtyInfoList *list = NULL; + BlockDirtyInfoList **plist = &list; + + bdrv_dirty_bitmaps_lock(bs); + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); + BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); + info->count = bdrv_get_dirty_count(bm); + info->granularity = bdrv_dirty_bitmap_granularity(bm); + info->has_name = !!bm->name; + info->name = g_strdup(bm->name); + info->status = bdrv_dirty_bitmap_status(bm); + info->recording = bdrv_dirty_bitmap_recording(bm); + info->busy = bdrv_dirty_bitmap_busy(bm); + info->persistent = bm->persistent; + info->has_inconsistent = bm->inconsistent; + info->inconsistent = bm->inconsistent; + entry->value = info; + *plist = entry; + plist = &entry->next; + } + bdrv_dirty_bitmaps_unlock(bs); + + return list; +} + +/* Called within bdrv_dirty_bitmap_lock..unlock */ +bool bdrv_dirty_bitmap_get_locked(BdrvDirtyBitmap *bitmap, int64_t offset) +{ + return hbitmap_get(bitmap->bitmap, offset); +} + +bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset) +{ + bool ret; + bdrv_dirty_bitmaps_lock(bitmap->bs); + ret = bdrv_dirty_bitmap_get_locked(bitmap, offset); + bdrv_dirty_bitmaps_unlock(bitmap->bs); + + return ret; +} + +/** + * Chooses a default granularity based on the existing cluster size, + * but clamped between [4K, 64K]. Defaults to 64K in the case that there + * is no cluster size information available. + */ +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + uint32_t granularity; + + if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + + return granularity; +} + +uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) +{ + return 1U << hbitmap_granularity(bitmap->bitmap); +} + +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) +{ + BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0); + iter->bitmap = bitmap; + bitmap->active_iterators++; + return iter; +} + +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) +{ + if (!iter) { + return; + } + assert(iter->bitmap->active_iterators > 0); + iter->bitmap->active_iterators--; + g_free(iter); +} + +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) +{ + return hbitmap_iter_next(&iter->hbi); +} + +/* Called within bdrv_dirty_bitmap_lock..unlock */ +void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes) +{ + assert(!bdrv_dirty_bitmap_readonly(bitmap)); + hbitmap_set(bitmap->bitmap, offset, bytes); +} + +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes); + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called within bdrv_dirty_bitmap_lock..unlock */ +void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes) +{ + assert(!bdrv_dirty_bitmap_readonly(bitmap)); + hbitmap_reset(bitmap->bitmap, offset, bytes); +} + +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes); + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) +{ + assert(!bdrv_dirty_bitmap_readonly(bitmap)); + bdrv_dirty_bitmaps_lock(bitmap->bs); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup) +{ + HBitmap *tmp = bitmap->bitmap; + assert(!bdrv_dirty_bitmap_readonly(bitmap)); + bitmap->bitmap = backup; + hbitmap_free(tmp); +} + +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes) +{ + return hbitmap_serialization_size(bitmap->bitmap, offset, bytes); +} + +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) +{ + return hbitmap_serialization_align(bitmap->bitmap); +} + +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t offset, + uint64_t bytes) +{ + hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes); +} + +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish) +{ + hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish); +} + +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes, + bool finish) +{ + hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish); +} + +void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes, + bool finish) +{ + hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish); +} + +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) +{ + hbitmap_deserialize_finish(bitmap->bitmap); +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + BdrvDirtyBitmap *bitmap; + + if (QLIST_EMPTY(&bs->dirty_bitmaps)) { + return; + } + + bdrv_dirty_bitmaps_lock(bs); + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + if (!bdrv_dirty_bitmap_enabled(bitmap)) { + continue; + } + assert(!bdrv_dirty_bitmap_readonly(bitmap)); + hbitmap_set(bitmap->bitmap, offset, bytes); + } + bdrv_dirty_bitmaps_unlock(bs); +} + +/** + * Advance a BdrvDirtyBitmapIter to an arbitrary offset. + */ +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) +{ + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset); +} + +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->bitmap); +} + +bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->readonly; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bitmap->readonly = value; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +bool bdrv_has_readonly_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->readonly) { + return true; + } + } + + return false; +} + +bool bdrv_has_named_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bdrv_dirty_bitmap_name(bm)) { + return true; + } + } + + return false; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap, bool persistent) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bitmap->persistent = persistent; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + assert(bitmap->persistent == true); + bitmap->inconsistent = true; + bitmap->disabled = true; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip) +{ + bdrv_dirty_bitmaps_lock(bitmap->bs); + bitmap->skip_store = skip; + bdrv_dirty_bitmaps_unlock(bitmap->bs); +} + +bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap) +{ + return bitmap->persistent && !bitmap->skip_store; +} + +bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->inconsistent; +} + +BdrvDirtyBitmap *bdrv_dirty_bitmap_first(BlockDriverState *bs) +{ + return QLIST_FIRST(&bs->dirty_bitmaps); +} + +BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BdrvDirtyBitmap *bitmap) +{ + return QLIST_NEXT(bitmap, list); +} + +char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp) +{ + return hbitmap_sha256(bitmap->bitmap, errp); +} + +int64_t bdrv_dirty_bitmap_next_dirty(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes) +{ + return hbitmap_next_dirty(bitmap->bitmap, offset, bytes); +} + +int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes) +{ + return hbitmap_next_zero(bitmap->bitmap, offset, bytes); +} + +bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap, + int64_t start, int64_t end, int64_t max_dirty_count, + int64_t *dirty_start, int64_t *dirty_count) +{ + return hbitmap_next_dirty_area(bitmap->bitmap, start, end, max_dirty_count, + dirty_start, dirty_count); +} + +/** + * bdrv_merge_dirty_bitmap: merge src into dest. + * Ensures permissions on bitmaps are reasonable; use for public API. + * + * @backup: If provided, make a copy of dest here prior to merge. + */ +void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, + HBitmap **backup, Error **errp) +{ + bool ret; + + bdrv_dirty_bitmaps_lock(dest->bs); + if (src->bs != dest->bs) { + bdrv_dirty_bitmaps_lock(src->bs); + } + + if (bdrv_dirty_bitmap_check(dest, BDRV_BITMAP_DEFAULT, errp)) { + goto out; + } + + if (bdrv_dirty_bitmap_check(src, BDRV_BITMAP_ALLOW_RO, errp)) { + goto out; + } + + if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) { + error_setg(errp, "Bitmaps are incompatible and can't be merged"); + goto out; + } + + ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false); + assert(ret); + +out: + bdrv_dirty_bitmaps_unlock(dest->bs); + if (src->bs != dest->bs) { + bdrv_dirty_bitmaps_unlock(src->bs); + } +} + +/** + * bdrv_dirty_bitmap_merge_internal: merge src into dest. + * Does NOT check bitmap permissions; not suitable for use as public API. + * + * @backup: If provided, make a copy of dest here prior to merge. + * @lock: If true, lock and unlock bitmaps on the way in/out. + * returns true if the merge succeeded; false if unattempted. + */ +bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, + const BdrvDirtyBitmap *src, + HBitmap **backup, + bool lock) +{ + bool ret; + + assert(!bdrv_dirty_bitmap_readonly(dest)); + assert(!bdrv_dirty_bitmap_inconsistent(dest)); + assert(!bdrv_dirty_bitmap_inconsistent(src)); + + if (lock) { + bdrv_dirty_bitmaps_lock(dest->bs); + if (src->bs != dest->bs) { + bdrv_dirty_bitmaps_lock(src->bs); + } + } + + if (backup) { + *backup = dest->bitmap; + dest->bitmap = hbitmap_alloc(dest->size, hbitmap_granularity(*backup)); + ret = hbitmap_merge(*backup, src->bitmap, dest->bitmap); + } else { + ret = hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap); + } + + if (lock) { + bdrv_dirty_bitmaps_unlock(dest->bs); + if (src->bs != dest->bs) { + bdrv_dirty_bitmaps_unlock(src->bs); + } + } + + return ret; +} diff --git a/block/dmg-bz2.c b/block/dmg-bz2.c new file mode 100644 index 000000000..37f7ee04b --- /dev/null +++ b/block/dmg-bz2.c @@ -0,0 +1,60 @@ +/* + * DMG bzip2 uncompression + * + * Copyright (c) 2004 Johannes E. Schindelin + * Copyright (c) 2016 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "dmg.h" +#include + +static int dmg_uncompress_bz2_do(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out) +{ + int ret; + uint64_t total_out; + bz_stream bzstream = {}; + + ret = BZ2_bzDecompressInit(&bzstream, 0, 0); + if (ret != BZ_OK) { + return -1; + } + bzstream.next_in = next_in; + bzstream.avail_in = avail_in; + bzstream.next_out = next_out; + bzstream.avail_out = avail_out; + ret = BZ2_bzDecompress(&bzstream); + total_out = ((uint64_t)bzstream.total_out_hi32 << 32) + + bzstream.total_out_lo32; + BZ2_bzDecompressEnd(&bzstream); + if (ret != BZ_STREAM_END || + total_out != avail_out) { + return -1; + } + return 0; +} + +__attribute__((constructor)) +static void dmg_bz2_init(void) +{ + assert(!dmg_uncompress_bz2); + dmg_uncompress_bz2 = dmg_uncompress_bz2_do; +} diff --git a/block/dmg-lzfse.c b/block/dmg-lzfse.c new file mode 100644 index 000000000..6798cf4fb --- /dev/null +++ b/block/dmg-lzfse.c @@ -0,0 +1,48 @@ +/* + * DMG lzfse uncompression + * + * Copyright (c) 2018 Julio Cesar Faracco + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "dmg.h" +#include + +static int dmg_uncompress_lzfse_do(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out) +{ + size_t out_size = lzfse_decode_buffer((uint8_t *) next_out, avail_out, + (uint8_t *) next_in, avail_in, + NULL); + + /* We need to decode the single chunk only. */ + /* So, out_size == avail_out is not an error here. */ + if (out_size > 0) { + return out_size; + } + return -1; +} + +__attribute__((constructor)) +static void dmg_lzfse_init(void) +{ + assert(!dmg_uncompress_lzfse); + dmg_uncompress_lzfse = dmg_uncompress_lzfse_do; +} diff --git a/block/dmg.c b/block/dmg.c new file mode 100644 index 000000000..ef35a505f --- /dev/null +++ b/block/dmg.c @@ -0,0 +1,764 @@ +/* + * QEMU Block driver for DMG images + * + * Copyright (c) 2004 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "qemu/bswap.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "dmg.h" + +int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out); + +int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out); + +enum { + /* Limit chunk sizes to prevent unreasonable amounts of memory being used + * or truncating when converting to 32-bit types + */ + DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */ + DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512, +}; + +enum { + /* DMG Block Type */ + UDZE = 0, /* Zeroes */ + UDRW, /* RAW type */ + UDIG, /* Ignore */ + UDCO = 0x80000004, + UDZO, + UDBZ, + ULFO, + UDCM = 0x7ffffffe, /* Comments */ + UDLE = 0xffffffff /* Last Entry */ +}; + +static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + int len; + + if (!filename) { + return 0; + } + + len = strlen(filename); + if (len > 4 && !strcmp(filename + len - 4, ".dmg")) { + return 2; + } + return 0; +} + +static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result) +{ + uint64_t buffer; + int ret; + + ret = bdrv_pread(bs->file, offset, &buffer, 8); + if (ret < 0) { + return ret; + } + + *result = be64_to_cpu(buffer); + return 0; +} + +static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result) +{ + uint32_t buffer; + int ret; + + ret = bdrv_pread(bs->file, offset, &buffer, 4); + if (ret < 0) { + return ret; + } + + *result = be32_to_cpu(buffer); + return 0; +} + +static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset) +{ + return be64_to_cpu(*(uint64_t *)&buffer[offset]); +} + +static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset) +{ + return be32_to_cpu(*(uint32_t *)&buffer[offset]); +} + +/* Increase max chunk sizes, if necessary. This function is used to calculate + * the buffer sizes needed for compressed/uncompressed chunk I/O. + */ +static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk, + uint32_t *max_compressed_size, + uint32_t *max_sectors_per_chunk) +{ + uint32_t compressed_size = 0; + uint32_t uncompressed_sectors = 0; + + switch (s->types[chunk]) { + case UDZO: /* zlib compressed */ + case UDBZ: /* bzip2 compressed */ + case ULFO: /* lzfse compressed */ + compressed_size = s->lengths[chunk]; + uncompressed_sectors = s->sectorcounts[chunk]; + break; + case UDRW: /* copy */ + uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512); + break; + case UDZE: /* zero */ + case UDIG: /* ignore */ + /* as the all-zeroes block may be large, it is treated specially: the + * sector is not copied from a large buffer, a simple memset is used + * instead. Therefore uncompressed_sectors does not need to be set. */ + break; + } + + if (compressed_size > *max_compressed_size) { + *max_compressed_size = compressed_size; + } + if (uncompressed_sectors > *max_sectors_per_chunk) { + *max_sectors_per_chunk = uncompressed_sectors; + } +} + +static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp) +{ + BlockDriverState *file_bs = file->bs; + int64_t length; + int64_t offset = 0; + uint8_t buffer[515]; + int i, ret; + + /* bdrv_getlength returns a multiple of block size (512), rounded up. Since + * dmg images can have odd sizes, try to look for the "koly" magic which + * marks the begin of the UDIF trailer (512 bytes). This magic can be found + * in the last 511 bytes of the second-last sector or the first 4 bytes of + * the last sector (search space: 515 bytes) */ + length = bdrv_getlength(file_bs); + if (length < 0) { + error_setg_errno(errp, -length, + "Failed to get file size while reading UDIF trailer"); + return length; + } else if (length < 512) { + error_setg(errp, "dmg file must be at least 512 bytes long"); + return -EINVAL; + } + if (length > 511 + 512) { + offset = length - 511 - 512; + } + length = length < 515 ? length : 515; + ret = bdrv_pread(file, offset, buffer, length); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed while reading UDIF trailer"); + return ret; + } + for (i = 0; i < length - 3; i++) { + if (buffer[i] == 'k' && buffer[i+1] == 'o' && + buffer[i+2] == 'l' && buffer[i+3] == 'y') { + return offset + i; + } + } + error_setg(errp, "Could not locate UDIF trailer in dmg file"); + return -EINVAL; +} + +/* used when building the sector table */ +typedef struct DmgHeaderState { + /* used internally by dmg_read_mish_block to remember offsets of blocks + * across calls */ + uint64_t data_fork_offset; + /* exported for dmg_open */ + uint32_t max_compressed_size; + uint32_t max_sectors_per_chunk; +} DmgHeaderState; + +static bool dmg_is_known_block_type(uint32_t entry_type) +{ + switch (entry_type) { + case UDZE: /* zeros */ + case UDRW: /* uncompressed */ + case UDIG: /* ignore */ + case UDZO: /* zlib */ + return true; + case UDBZ: /* bzip2 */ + return !!dmg_uncompress_bz2; + case ULFO: /* lzfse */ + return !!dmg_uncompress_lzfse; + default: + return false; + } +} + +static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds, + uint8_t *buffer, uint32_t count) +{ + uint32_t type, i; + int ret; + size_t new_size; + uint32_t chunk_count; + int64_t offset = 0; + uint64_t data_offset; + uint64_t in_offset = ds->data_fork_offset; + uint64_t out_offset; + + type = buff_read_uint32(buffer, offset); + /* skip data that is not a valid MISH block (invalid magic or too small) */ + if (type != 0x6d697368 || count < 244) { + /* assume success for now */ + return 0; + } + + /* chunk offsets are relative to this sector number */ + out_offset = buff_read_uint64(buffer, offset + 8); + + /* location in data fork for (compressed) blob (in bytes) */ + data_offset = buff_read_uint64(buffer, offset + 0x18); + in_offset += data_offset; + + /* move to begin of chunk entries */ + offset += 204; + + chunk_count = (count - 204) / 40; + new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); + s->types = g_realloc(s->types, new_size / 2); + s->offsets = g_realloc(s->offsets, new_size); + s->lengths = g_realloc(s->lengths, new_size); + s->sectors = g_realloc(s->sectors, new_size); + s->sectorcounts = g_realloc(s->sectorcounts, new_size); + + for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) { + s->types[i] = buff_read_uint32(buffer, offset); + if (!dmg_is_known_block_type(s->types[i])) { + chunk_count--; + i--; + offset += 40; + continue; + } + + /* sector number */ + s->sectors[i] = buff_read_uint64(buffer, offset + 8); + s->sectors[i] += out_offset; + + /* sector count */ + s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10); + + /* all-zeroes sector (type UDZE and UDIG) does not need to be + * "uncompressed" and can therefore be unbounded. */ + if (s->types[i] != UDZE && s->types[i] != UDIG + && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) { + error_report("sector count %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", + s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX); + ret = -EINVAL; + goto fail; + } + + /* offset in (compressed) data fork */ + s->offsets[i] = buff_read_uint64(buffer, offset + 0x18); + s->offsets[i] += in_offset; + + /* length in (compressed) data fork */ + s->lengths[i] = buff_read_uint64(buffer, offset + 0x20); + + if (s->lengths[i] > DMG_LENGTHS_MAX) { + error_report("length %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", + s->lengths[i], i, DMG_LENGTHS_MAX); + ret = -EINVAL; + goto fail; + } + + update_max_chunk_size(s, i, &ds->max_compressed_size, + &ds->max_sectors_per_chunk); + offset += 40; + } + s->n_chunks += chunk_count; + return 0; + +fail: + return ret; +} + +static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds, + uint64_t info_begin, uint64_t info_length) +{ + BDRVDMGState *s = bs->opaque; + int ret; + uint32_t count, rsrc_data_offset; + uint8_t *buffer = NULL; + uint64_t info_end; + uint64_t offset; + + /* read offset from begin of resource fork (info_begin) to resource data */ + ret = read_uint32(bs, info_begin, &rsrc_data_offset); + if (ret < 0) { + goto fail; + } else if (rsrc_data_offset > info_length) { + ret = -EINVAL; + goto fail; + } + + /* read length of resource data */ + ret = read_uint32(bs, info_begin + 8, &count); + if (ret < 0) { + goto fail; + } else if (count == 0 || rsrc_data_offset + count > info_length) { + ret = -EINVAL; + goto fail; + } + + /* begin of resource data (consisting of one or more resources) */ + offset = info_begin + rsrc_data_offset; + + /* end of resource data (there is possibly a following resource map + * which will be ignored). */ + info_end = offset + count; + + /* read offsets (mish blocks) from one or more resources in resource data */ + while (offset < info_end) { + /* size of following resource */ + ret = read_uint32(bs, offset, &count); + if (ret < 0) { + goto fail; + } else if (count == 0 || count > info_end - offset) { + ret = -EINVAL; + goto fail; + } + offset += 4; + + buffer = g_realloc(buffer, count); + ret = bdrv_pread(bs->file, offset, buffer, count); + if (ret < 0) { + goto fail; + } + + ret = dmg_read_mish_block(s, ds, buffer, count); + if (ret < 0) { + goto fail; + } + /* advance offset by size of resource */ + offset += count; + } + ret = 0; + +fail: + g_free(buffer); + return ret; +} + +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds, + uint64_t info_begin, uint64_t info_length) +{ + BDRVDMGState *s = bs->opaque; + int ret; + uint8_t *buffer = NULL; + char *data_begin, *data_end; + + /* Have at least some length to avoid NULL for g_malloc. Attempt to set a + * safe upper cap on the data length. A test sample had a XML length of + * about 1 MiB. */ + if (info_length == 0 || info_length > 16 * 1024 * 1024) { + ret = -EINVAL; + goto fail; + } + + buffer = g_malloc(info_length + 1); + buffer[info_length] = '\0'; + ret = bdrv_pread(bs->file, info_begin, buffer, info_length); + if (ret != info_length) { + ret = -EINVAL; + goto fail; + } + + /* look for .... The data is 284 (0x11c) bytes after base64 + * decode. The actual data element has 431 (0x1af) bytes which includes tabs + * and line feeds. */ + data_end = (char *)buffer; + while ((data_begin = strstr(data_end, "")) != NULL) { + guchar *mish; + gsize out_len = 0; + + data_begin += 6; + data_end = strstr(data_begin, ""); + /* malformed XML? */ + if (data_end == NULL) { + ret = -EINVAL; + goto fail; + } + *data_end++ = '\0'; + mish = g_base64_decode(data_begin, &out_len); + ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len); + g_free(mish); + if (ret < 0) { + goto fail; + } + } + ret = 0; + +fail: + g_free(buffer); + return ret; +} + +static int dmg_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVDMGState *s = bs->opaque; + DmgHeaderState ds; + uint64_t rsrc_fork_offset, rsrc_fork_length; + uint64_t plist_xml_offset, plist_xml_length; + int64_t offset; + int ret; + + ret = bdrv_apply_auto_read_only(bs, NULL, errp); + if (ret < 0) { + return ret; + } + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + block_module_load_one("dmg-bz2"); + block_module_load_one("dmg-lzfse"); + + s->n_chunks = 0; + s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; + /* used by dmg_read_mish_block to keep track of the current I/O position */ + ds.data_fork_offset = 0; + ds.max_compressed_size = 1; + ds.max_sectors_per_chunk = 1; + + /* locate the UDIF trailer */ + offset = dmg_find_koly_offset(bs->file, errp); + if (offset < 0) { + ret = offset; + goto fail; + } + + /* offset of data fork (DataForkOffset) */ + ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset); + if (ret < 0) { + goto fail; + } else if (ds.data_fork_offset > offset) { + ret = -EINVAL; + goto fail; + } + + /* offset of resource fork (RsrcForkOffset) */ + ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset); + if (ret < 0) { + goto fail; + } + ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length); + if (ret < 0) { + goto fail; + } + if (rsrc_fork_offset >= offset || + rsrc_fork_length > offset - rsrc_fork_offset) { + ret = -EINVAL; + goto fail; + } + /* offset of property list (XMLOffset) */ + ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset); + if (ret < 0) { + goto fail; + } + ret = read_uint64(bs, offset + 0xe0, &plist_xml_length); + if (ret < 0) { + goto fail; + } + if (plist_xml_offset >= offset || + plist_xml_length > offset - plist_xml_offset) { + ret = -EINVAL; + goto fail; + } + ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors); + if (ret < 0) { + goto fail; + } + if (bs->total_sectors < 0) { + ret = -EINVAL; + goto fail; + } + if (rsrc_fork_length != 0) { + ret = dmg_read_resource_fork(bs, &ds, + rsrc_fork_offset, rsrc_fork_length); + if (ret < 0) { + goto fail; + } + } else if (plist_xml_length != 0) { + ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length); + if (ret < 0) { + goto fail; + } + } else { + ret = -EINVAL; + goto fail; + } + + /* initialize zlib engine */ + s->compressed_chunk = qemu_try_blockalign(bs->file->bs, + ds.max_compressed_size + 1); + s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs, + 512 * ds.max_sectors_per_chunk); + if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) { + ret = -ENOMEM; + goto fail; + } + + if (inflateInit(&s->zstream) != Z_OK) { + ret = -EINVAL; + goto fail; + } + + s->current_chunk = s->n_chunks; + + qemu_co_mutex_init(&s->lock); + return 0; + +fail: + g_free(s->types); + g_free(s->offsets); + g_free(s->lengths); + g_free(s->sectors); + g_free(s->sectorcounts); + qemu_vfree(s->compressed_chunk); + qemu_vfree(s->uncompressed_chunk); + return ret; +} + +static void dmg_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */ +} + +static inline int is_sector_in_chunk(BDRVDMGState *s, + uint32_t chunk_num, uint64_t sector_num) +{ + if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num || + s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) { + return 0; + } else { + return -1; + } +} + +static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num) +{ + /* binary search */ + uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3; + while (chunk1 <= chunk2) { + chunk3 = (chunk1 + chunk2) / 2; + if (s->sectors[chunk3] > sector_num) { + if (chunk3 == 0) { + goto err; + } + chunk2 = chunk3 - 1; + } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) { + return chunk3; + } else { + chunk1 = chunk3 + 1; + } + } +err: + return s->n_chunks; /* error */ +} + +static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) +{ + BDRVDMGState *s = bs->opaque; + + if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) { + int ret; + uint32_t chunk = search_chunk(s, sector_num); + + if (chunk >= s->n_chunks) { + return -1; + } + + s->current_chunk = s->n_chunks; + switch (s->types[chunk]) { /* block entry type */ + case UDZO: { /* zlib compressed */ + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->compressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + + s->zstream.next_in = s->compressed_chunk; + s->zstream.avail_in = s->lengths[chunk]; + s->zstream.next_out = s->uncompressed_chunk; + s->zstream.avail_out = 512 * s->sectorcounts[chunk]; + ret = inflateReset(&s->zstream); + if (ret != Z_OK) { + return -1; + } + ret = inflate(&s->zstream, Z_FINISH); + if (ret != Z_STREAM_END || + s->zstream.total_out != 512 * s->sectorcounts[chunk]) { + return -1; + } + break; } + case UDBZ: /* bzip2 compressed */ + if (!dmg_uncompress_bz2) { + break; + } + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->compressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + + ret = dmg_uncompress_bz2((char *)s->compressed_chunk, + (unsigned int) s->lengths[chunk], + (char *)s->uncompressed_chunk, + (unsigned int) + (512 * s->sectorcounts[chunk])); + if (ret < 0) { + return ret; + } + break; + case ULFO: + if (!dmg_uncompress_lzfse) { + break; + } + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->compressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + + ret = dmg_uncompress_lzfse((char *)s->compressed_chunk, + (unsigned int) s->lengths[chunk], + (char *)s->uncompressed_chunk, + (unsigned int) + (512 * s->sectorcounts[chunk])); + if (ret < 0) { + return ret; + } + break; + case UDRW: /* copy */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->uncompressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + break; + case UDZE: /* zeros */ + case UDIG: /* ignore */ + /* see dmg_read, it is treated specially. No buffer needs to be + * pre-filled, the zeroes can be set directly. */ + break; + } + s->current_chunk = chunk; + } + return 0; +} + +static int coroutine_fn +dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVDMGState *s = bs->opaque; + uint64_t sector_num = offset >> BDRV_SECTOR_BITS; + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int ret, i; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_co_mutex_lock(&s->lock); + + for (i = 0; i < nb_sectors; i++) { + uint32_t sector_offset_in_chunk; + void *data; + + if (dmg_read_chunk(bs, sector_num + i) != 0) { + ret = -EIO; + goto fail; + } + /* Special case: current chunk is all zeroes. Do not perform a memcpy as + * s->uncompressed_chunk may be too small to cover the large all-zeroes + * section. dmg_read_chunk is called to find s->current_chunk */ + if (s->types[s->current_chunk] == UDZE + || s->types[s->current_chunk] == UDIG) { /* all zeroes block entry */ + qemu_iovec_memset(qiov, i * 512, 0, 512); + continue; + } + sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk]; + data = s->uncompressed_chunk + sector_offset_in_chunk * 512; + qemu_iovec_from_buf(qiov, i * 512, data, 512); + } + + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static void dmg_close(BlockDriverState *bs) +{ + BDRVDMGState *s = bs->opaque; + + g_free(s->types); + g_free(s->offsets); + g_free(s->lengths); + g_free(s->sectors); + g_free(s->sectorcounts); + qemu_vfree(s->compressed_chunk); + qemu_vfree(s->uncompressed_chunk); + + inflateEnd(&s->zstream); +} + +static BlockDriver bdrv_dmg = { + .format_name = "dmg", + .instance_size = sizeof(BDRVDMGState), + .bdrv_probe = dmg_probe, + .bdrv_open = dmg_open, + .bdrv_refresh_limits = dmg_refresh_limits, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_preadv = dmg_co_preadv, + .bdrv_close = dmg_close, + .is_format = true, +}; + +static void bdrv_dmg_init(void) +{ + bdrv_register(&bdrv_dmg); +} + +block_init(bdrv_dmg_init); diff --git a/block/dmg.h b/block/dmg.h new file mode 100644 index 000000000..e488601b6 --- /dev/null +++ b/block/dmg.h @@ -0,0 +1,60 @@ +/* + * Header for DMG driver + * + * Copyright (c) 2004-2006 Fabrice Bellard + * Copyright (c) 2016 Red hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_DMG_H +#define BLOCK_DMG_H + +#include "block/block_int.h" +#include + +typedef struct BDRVDMGState { + CoMutex lock; + /* each chunk contains a certain number of sectors, + * offsets[i] is the offset in the .dmg file, + * lengths[i] is the length of the compressed chunk, + * sectors[i] is the sector beginning at offsets[i], + * sectorcounts[i] is the number of sectors in that chunk, + * the sectors array is ordered + * 0<=i + * Kevin Wolf + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "block/block.h" +#include "sysemu/block-backend.h" +#include "sysemu/iothread.h" +#include "block/export.h" +#include "block/nbd.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-block-export.h" +#include "qapi/qapi-events-block-export.h" +#include "qemu/id.h" +#ifdef CONFIG_VHOST_USER_BLK_SERVER +#include "vhost-user-blk-server.h" +#endif + +static const BlockExportDriver *blk_exp_drivers[] = { + &blk_exp_nbd, +#ifdef CONFIG_VHOST_USER_BLK_SERVER + &blk_exp_vhost_user_blk, +#endif +}; + +/* Only accessed from the main thread */ +static QLIST_HEAD(, BlockExport) block_exports = + QLIST_HEAD_INITIALIZER(block_exports); + +BlockExport *blk_exp_find(const char *id) +{ + BlockExport *exp; + + QLIST_FOREACH(exp, &block_exports, next) { + if (strcmp(id, exp->id) == 0) { + return exp; + } + } + + return NULL; +} + +static const BlockExportDriver *blk_exp_find_driver(BlockExportType type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(blk_exp_drivers); i++) { + if (blk_exp_drivers[i]->type == type) { + return blk_exp_drivers[i]; + } + } + return NULL; +} + +BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) +{ + bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread; + const BlockExportDriver *drv; + BlockExport *exp = NULL; + BlockDriverState *bs; + BlockBackend *blk = NULL; + AioContext *ctx; + uint64_t perm; + int ret; + + if (!id_wellformed(export->id)) { + error_setg(errp, "Invalid block export id"); + return NULL; + } + if (blk_exp_find(export->id)) { + error_setg(errp, "Block export id '%s' is already in use", export->id); + return NULL; + } + + drv = blk_exp_find_driver(export->type); + if (!drv) { + error_setg(errp, "No driver found for the requested export type"); + return NULL; + } + + bs = bdrv_lookup_bs(NULL, export->node_name, errp); + if (!bs) { + return NULL; + } + + if (!export->has_writable) { + export->writable = false; + } + if (bdrv_is_read_only(bs) && export->writable) { + error_setg(errp, "Cannot export read-only node as writable"); + return NULL; + } + + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + + if (export->has_iothread) { + IOThread *iothread; + AioContext *new_ctx; + + iothread = iothread_by_id(export->iothread); + if (!iothread) { + error_setg(errp, "iothread \"%s\" not found", export->iothread); + goto fail; + } + + new_ctx = iothread_get_aio_context(iothread); + + ret = bdrv_try_set_aio_context(bs, new_ctx, errp); + if (ret == 0) { + aio_context_release(ctx); + aio_context_acquire(new_ctx); + ctx = new_ctx; + } else if (fixed_iothread) { + goto fail; + } + } + + /* + * Block exports are used for non-shared storage migration. Make sure + * that BDRV_O_INACTIVE is cleared and the image is ready for write + * access since the export could be available before migration handover. + * ctx was acquired in the caller. + */ + bdrv_invalidate_cache(bs, NULL); + + perm = BLK_PERM_CONSISTENT_READ; + if (export->writable) { + perm |= BLK_PERM_WRITE; + } + + blk = blk_new(ctx, perm, BLK_PERM_ALL); + + if (!fixed_iothread) { + blk_set_allow_aio_context_change(blk, true); + } + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } + + if (!export->has_writethrough) { + export->writethrough = false; + } + blk_set_enable_write_cache(blk, !export->writethrough); + + assert(drv->instance_size >= sizeof(BlockExport)); + exp = g_malloc0(drv->instance_size); + *exp = (BlockExport) { + .drv = drv, + .refcount = 1, + .user_owned = true, + .id = g_strdup(export->id), + .ctx = ctx, + .blk = blk, + }; + + ret = drv->create(exp, export, errp); + if (ret < 0) { + goto fail; + } + + assert(exp->blk != NULL); + + QLIST_INSERT_HEAD(&block_exports, exp, next); + + aio_context_release(ctx); + return exp; + +fail: + blk_unref(blk); + aio_context_release(ctx); + if (exp) { + g_free(exp->id); + g_free(exp); + } + return NULL; +} + +/* Callers must hold exp->ctx lock */ +void blk_exp_ref(BlockExport *exp) +{ + assert(exp->refcount > 0); + exp->refcount++; +} + +/* Runs in the main thread */ +static void blk_exp_delete_bh(void *opaque) +{ + BlockExport *exp = opaque; + AioContext *aio_context = exp->ctx; + + aio_context_acquire(aio_context); + + assert(exp->refcount == 0); + QLIST_REMOVE(exp, next); + exp->drv->delete(exp); + blk_unref(exp->blk); + qapi_event_send_block_export_deleted(exp->id); + g_free(exp->id); + g_free(exp); + + aio_context_release(aio_context); +} + +/* Callers must hold exp->ctx lock */ +void blk_exp_unref(BlockExport *exp) +{ + assert(exp->refcount > 0); + if (--exp->refcount == 0) { + /* Touch the block_exports list only in the main thread */ + aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh, + exp); + } +} + +/* + * Drops the user reference to the export and requests that all client + * connections and other internally held references start to shut down. When + * the function returns, there may still be active references while the export + * is in the process of shutting down. + * + * Acquires exp->ctx internally. Callers must *not* hold the lock. + */ +void blk_exp_request_shutdown(BlockExport *exp) +{ + AioContext *aio_context = exp->ctx; + + aio_context_acquire(aio_context); + + /* + * If the user doesn't own the export any more, it is already shutting + * down. We must not call .request_shutdown and decrease the refcount a + * second time. + */ + if (!exp->user_owned) { + goto out; + } + + exp->drv->request_shutdown(exp); + + assert(exp->user_owned); + exp->user_owned = false; + blk_exp_unref(exp); + +out: + aio_context_release(aio_context); +} + +/* + * Returns whether a block export of the given type exists. + * type == BLOCK_EXPORT_TYPE__MAX checks for an export of any type. + */ +static bool blk_exp_has_type(BlockExportType type) +{ + BlockExport *exp; + + if (type == BLOCK_EXPORT_TYPE__MAX) { + return !QLIST_EMPTY(&block_exports); + } + + QLIST_FOREACH(exp, &block_exports, next) { + if (exp->drv->type == type) { + return true; + } + } + + return false; +} + +/* type == BLOCK_EXPORT_TYPE__MAX for all types */ +void blk_exp_close_all_type(BlockExportType type) +{ + BlockExport *exp, *next; + + assert(in_aio_context_home_thread(qemu_get_aio_context())); + + QLIST_FOREACH_SAFE(exp, &block_exports, next, next) { + if (type != BLOCK_EXPORT_TYPE__MAX && exp->drv->type != type) { + continue; + } + blk_exp_request_shutdown(exp); + } + + AIO_WAIT_WHILE(NULL, blk_exp_has_type(type)); +} + +void blk_exp_close_all(void) +{ + blk_exp_close_all_type(BLOCK_EXPORT_TYPE__MAX); +} + +void qmp_block_export_add(BlockExportOptions *export, Error **errp) +{ + blk_exp_add(export, errp); +} + +void qmp_block_export_del(const char *id, + bool has_mode, BlockExportRemoveMode mode, + Error **errp) +{ + ERRP_GUARD(); + BlockExport *exp; + + exp = blk_exp_find(id); + if (exp == NULL) { + error_setg(errp, "Export '%s' is not found", id); + return; + } + if (!exp->user_owned) { + error_setg(errp, "Export '%s' is already shutting down", id); + return; + } + + if (!has_mode) { + mode = BLOCK_EXPORT_REMOVE_MODE_SAFE; + } + if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) { + error_setg(errp, "export '%s' still in use", exp->id); + error_append_hint(errp, "Use mode='hard' to force client " + "disconnect\n"); + return; + } + + blk_exp_request_shutdown(exp); +} + +BlockExportInfoList *qmp_query_block_exports(Error **errp) +{ + BlockExportInfoList *head = NULL, **p_next = &head; + BlockExport *exp; + + QLIST_FOREACH(exp, &block_exports, next) { + BlockExportInfoList *entry = g_new0(BlockExportInfoList, 1); + BlockExportInfo *info = g_new(BlockExportInfo, 1); + *info = (BlockExportInfo) { + .id = g_strdup(exp->id), + .type = exp->drv->type, + .node_name = g_strdup(bdrv_get_node_name(blk_bs(exp->blk))), + .shutting_down = !exp->user_owned, + }; + + entry->value = info; + *p_next = entry; + p_next = &entry->next; + } + + return head; +} diff --git a/block/export/meson.build b/block/export/meson.build new file mode 100644 index 000000000..135b35677 --- /dev/null +++ b/block/export/meson.build @@ -0,0 +1,5 @@ +blockdev_ss.add(files('export.c')) + +if have_vhost_user_blk_server + blockdev_ss.add(files('vhost-user-blk-server.c')) +endif diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c new file mode 100644 index 000000000..62672d1cb --- /dev/null +++ b/block/export/vhost-user-blk-server.c @@ -0,0 +1,433 @@ +/* + * Sharing QEMU block devices via vhost-user protocal + * + * Parts of the code based on nbd/server.c. + * + * Copyright (c) Coiby Xu . + * Copyright (c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "block/block.h" +#include "contrib/libvhost-user/libvhost-user.h" +#include "standard-headers/linux/virtio_blk.h" +#include "qemu/vhost-user-server.h" +#include "vhost-user-blk-server.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "sysemu/block-backend.h" +#include "util/block-helpers.h" + +enum { + VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, +}; +struct virtio_blk_inhdr { + unsigned char status; +}; + +typedef struct VuBlkReq { + VuVirtqElement elem; + int64_t sector_num; + size_t size; + struct virtio_blk_inhdr *in; + struct virtio_blk_outhdr out; + VuServer *server; + struct VuVirtq *vq; +} VuBlkReq; + +/* vhost user block device */ +typedef struct { + BlockExport export; + VuServer vu_server; + uint32_t blk_size; + QIOChannelSocket *sioc; + struct virtio_blk_config blkcfg; + bool writable; +} VuBlkExport; + +static void vu_blk_req_complete(VuBlkReq *req) +{ + VuDev *vu_dev = &req->server->vu_dev; + + /* IO size with 1 extra status byte */ + vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1); + vu_queue_notify(vu_dev, req->vq); + + free(req); +} + +static int coroutine_fn +vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov, + uint32_t iovcnt, uint32_t type) +{ + struct virtio_blk_discard_write_zeroes desc; + ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); + if (unlikely(size != sizeof(desc))) { + error_report("Invalid size %zd, expect %zu", size, sizeof(desc)); + return -EINVAL; + } + + uint64_t range[2] = { le64_to_cpu(desc.sector) << 9, + le32_to_cpu(desc.num_sectors) << 9 }; + if (type == VIRTIO_BLK_T_DISCARD) { + if (blk_co_pdiscard(blk, range[0], range[1]) == 0) { + return 0; + } + } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) { + return 0; + } + } + + return -EINVAL; +} + +static void coroutine_fn vu_blk_virtio_process_req(void *opaque) +{ + VuBlkReq *req = opaque; + VuServer *server = req->server; + VuVirtqElement *elem = &req->elem; + uint32_t type; + + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + BlockBackend *blk = vexp->export.blk; + + struct iovec *in_iov = elem->in_sg; + struct iovec *out_iov = elem->out_sg; + unsigned in_num = elem->in_num; + unsigned out_num = elem->out_num; + + /* refer to hw/block/virtio_blk.c */ + if (elem->out_num < 1 || elem->in_num < 1) { + error_report("virtio-blk request missing headers"); + goto err; + } + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out, + sizeof(req->out)) != sizeof(req->out))) { + error_report("virtio-blk request outhdr too short"); + goto err; + } + + iov_discard_front(&out_iov, &out_num, sizeof(req->out)); + + if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + error_report("virtio-blk request inhdr too short"); + goto err; + } + + /* We always touch the last byte, so just see how big in_iov is. */ + req->in = (void *)in_iov[in_num - 1].iov_base + + in_iov[in_num - 1].iov_len + - sizeof(struct virtio_blk_inhdr); + iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); + + type = le32_to_cpu(req->out.type); + switch (type & ~VIRTIO_BLK_T_BARRIER) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: { + ssize_t ret = 0; + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = le64_to_cpu(req->out.sector); + + if (is_write && !vexp->writable) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + + int64_t offset = req->sector_num * vexp->blk_size; + QEMUIOVector qiov; + if (is_write) { + qemu_iovec_init_external(&qiov, out_iov, out_num); + ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); + } else { + qemu_iovec_init_external(&qiov, in_iov, in_num); + ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0); + } + if (ret >= 0) { + req->in->status = VIRTIO_BLK_S_OK; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; + } + break; + } + case VIRTIO_BLK_T_FLUSH: + if (blk_co_flush(blk) == 0) { + req->in->status = VIRTIO_BLK_S_OK; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; + } + break; + case VIRTIO_BLK_T_GET_ID: { + size_t size = MIN(iov_size(&elem->in_sg[0], in_num), + VIRTIO_BLK_ID_BYTES); + snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); + req->in->status = VIRTIO_BLK_S_OK; + req->size = elem->in_sg[0].iov_len; + break; + } + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: { + int rc; + + if (!vexp->writable) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + + rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type); + if (rc == 0) { + req->in->status = VIRTIO_BLK_S_OK; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; + } + break; + } + default: + req->in->status = VIRTIO_BLK_S_UNSUPP; + break; + } + + vu_blk_req_complete(req); + return; + +err: + free(req); +} + +static void vu_blk_process_vq(VuDev *vu_dev, int idx) +{ + VuServer *server = container_of(vu_dev, VuServer, vu_dev); + VuVirtq *vq = vu_get_queue(vu_dev, idx); + + while (1) { + VuBlkReq *req; + + req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); + if (!req) { + break; + } + + req->server = server; + req->vq = vq; + + Coroutine *co = + qemu_coroutine_create(vu_blk_virtio_process_req, req); + qemu_coroutine_enter(co); + } +} + +static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started) +{ + VuVirtq *vq; + + assert(vu_dev); + + vq = vu_get_queue(vu_dev, idx); + vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL); +} + +static uint64_t vu_blk_get_features(VuDev *dev) +{ + uint64_t features; + VuServer *server = container_of(dev, VuServer, vu_dev); + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + features = 1ull << VIRTIO_BLK_F_SIZE_MAX | + 1ull << VIRTIO_BLK_F_SEG_MAX | + 1ull << VIRTIO_BLK_F_TOPOLOGY | + 1ull << VIRTIO_BLK_F_BLK_SIZE | + 1ull << VIRTIO_BLK_F_FLUSH | + 1ull << VIRTIO_BLK_F_DISCARD | + 1ull << VIRTIO_BLK_F_WRITE_ZEROES | + 1ull << VIRTIO_BLK_F_CONFIG_WCE | + 1ull << VIRTIO_BLK_F_MQ | + 1ull << VIRTIO_F_VERSION_1 | + 1ull << VIRTIO_RING_F_INDIRECT_DESC | + 1ull << VIRTIO_RING_F_EVENT_IDX | + 1ull << VHOST_USER_F_PROTOCOL_FEATURES; + + if (!vexp->writable) { + features |= 1ull << VIRTIO_BLK_F_RO; + } + + return features; +} + +static uint64_t vu_blk_get_protocol_features(VuDev *dev) +{ + return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | + 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; +} + +static int +vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) +{ + VuServer *server = container_of(vu_dev, VuServer, vu_dev); + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + + g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1); + + memcpy(config, &vexp->blkcfg, len); + return 0; +} + +static int +vu_blk_set_config(VuDev *vu_dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + VuServer *server = container_of(vu_dev, VuServer, vu_dev); + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + uint8_t wce; + + /* don't support live migration */ + if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + return -EINVAL; + } + + if (offset != offsetof(struct virtio_blk_config, wce) || + size != 1) { + return -EINVAL; + } + + wce = *data; + vexp->blkcfg.wce = wce; + blk_set_enable_write_cache(vexp->export.blk, wce); + return 0; +} + +/* + * When the client disconnects, it sends a VHOST_USER_NONE request + * and vu_process_message will simple call exit which cause the VM + * to exit abruptly. + * To avoid this issue, process VHOST_USER_NONE request ahead + * of vu_process_message. + * + */ +static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) +{ + if (vmsg->request == VHOST_USER_NONE) { + dev->panic(dev, "disconnect"); + return true; + } + return false; +} + +static const VuDevIface vu_blk_iface = { + .get_features = vu_blk_get_features, + .queue_set_started = vu_blk_queue_set_started, + .get_protocol_features = vu_blk_get_protocol_features, + .get_config = vu_blk_get_config, + .set_config = vu_blk_set_config, + .process_msg = vu_blk_process_msg, +}; + +static void blk_aio_attached(AioContext *ctx, void *opaque) +{ + VuBlkExport *vexp = opaque; + + vexp->export.ctx = ctx; + vhost_user_server_attach_aio_context(&vexp->vu_server, ctx); +} + +static void blk_aio_detach(void *opaque) +{ + VuBlkExport *vexp = opaque; + + vhost_user_server_detach_aio_context(&vexp->vu_server); + vexp->export.ctx = NULL; +} + +static void +vu_blk_initialize_config(BlockDriverState *bs, + struct virtio_blk_config *config, + uint32_t blk_size, + uint16_t num_queues) +{ + config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS); + config->blk_size = cpu_to_le32(blk_size); + config->size_max = cpu_to_le32(0); + config->seg_max = cpu_to_le32(128 - 2); + config->min_io_size = cpu_to_le16(1); + config->opt_io_size = cpu_to_le32(1); + config->num_queues = cpu_to_le16(num_queues); + config->max_discard_sectors = cpu_to_le32(32768); + config->max_discard_seg = cpu_to_le32(1); + config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9); + config->max_write_zeroes_sectors = cpu_to_le32(32768); + config->max_write_zeroes_seg = cpu_to_le32(1); +} + +static void vu_blk_exp_request_shutdown(BlockExport *exp) +{ + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); + + vhost_user_server_stop(&vexp->vu_server); +} + +static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, + Error **errp) +{ + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); + BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk; + Error *local_err = NULL; + uint64_t logical_block_size; + uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT; + + vexp->writable = opts->writable; + vexp->blkcfg.wce = 0; + + if (vu_opts->has_logical_block_size) { + logical_block_size = vu_opts->logical_block_size; + } else { + logical_block_size = BDRV_SECTOR_SIZE; + } + check_block_size(exp->id, "logical-block-size", logical_block_size, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + vexp->blk_size = logical_block_size; + blk_set_guest_block_size(exp->blk, logical_block_size); + + if (vu_opts->has_num_queues) { + num_queues = vu_opts->num_queues; + } + if (num_queues == 0) { + error_setg(errp, "num-queues must be greater than 0"); + return -EINVAL; + } + + vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg, + logical_block_size, num_queues); + + blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, + vexp); + + if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx, + num_queues, &vu_blk_iface, errp)) { + blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, + blk_aio_detach, vexp); + return -EADDRNOTAVAIL; + } + + return 0; +} + +static void vu_blk_exp_delete(BlockExport *exp) +{ + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); + + blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, + vexp); +} + +const BlockExportDriver blk_exp_vhost_user_blk = { + .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK, + .instance_size = sizeof(VuBlkExport), + .create = vu_blk_exp_create, + .delete = vu_blk_exp_delete, + .request_shutdown = vu_blk_exp_request_shutdown, +}; diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h new file mode 100644 index 000000000..fcf46fc8a --- /dev/null +++ b/block/export/vhost-user-blk-server.h @@ -0,0 +1,19 @@ +/* + * Sharing QEMU block devices via vhost-user protocal + * + * Copyright (c) Coiby Xu . + * Copyright (c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef VHOST_USER_BLK_SERVER_H +#define VHOST_USER_BLK_SERVER_H + +#include "block/export.h" + +/* For block/export/export.c */ +extern const BlockExportDriver blk_exp_vhost_user_blk; + +#endif /* VHOST_USER_BLK_SERVER_H */ diff --git a/block/file-posix.c b/block/file-posix.c new file mode 100644 index 000000000..d5fd1dbcd --- /dev/null +++ b/block/file-posix.c @@ -0,0 +1,3892 @@ +/* + * Block driver for RAW files (posix) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "qemu/error-report.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/units.h" +#include "trace.h" +#include "block/thread-pool.h" +#include "qemu/iov.h" +#include "block/raw-aio.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" + +#include "scsi/pr-manager.h" +#include "scsi/constants.h" + +#if defined(__APPLE__) && (__MACH__) +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#endif + +#ifdef __sun__ +#define _POSIX_PTHREAD_SEMANTICS 1 +#include +#endif +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __s390__ +#include +#endif +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE) +#include +#endif +#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) +#include +#include +#endif + +#ifdef __OpenBSD__ +#include +#include +#include +#endif + +#ifdef __NetBSD__ +#include +#include +#include +#include +#endif + +#ifdef __DragonFly__ +#include +#include +#endif + +#ifdef CONFIG_XFS +#include +#endif + +#include "trace.h" + +/* OS X does not have O_DSYNC */ +#ifndef O_DSYNC +#ifdef O_SYNC +#define O_DSYNC O_SYNC +#elif defined(O_FSYNC) +#define O_DSYNC O_FSYNC +#endif +#endif + +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ +#ifndef O_DIRECT +#define O_DIRECT O_DSYNC +#endif + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 + +#define MAX_BLOCKSIZE 4096 + +/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes, + * leaving a few more bytes for its future use. */ +#define RAW_LOCK_PERM_BASE 100 +#define RAW_LOCK_SHARED_BASE 200 + +typedef struct BDRVRawState { + int fd; + bool use_lock; + int type; + int open_flags; + size_t buf_align; + + /* The current permissions. */ + uint64_t perm; + uint64_t shared_perm; + + /* The perms bits whose corresponding bytes are already locked in + * s->fd. */ + uint64_t locked_perm; + uint64_t locked_shared_perm; + + int perm_change_fd; + int perm_change_flags; + BDRVReopenState *reopen_state; + +#ifdef CONFIG_XFS + bool is_xfs:1; +#endif + bool has_discard:1; + bool has_write_zeroes:1; + bool discard_zeroes:1; + bool use_linux_aio:1; + bool use_linux_io_uring:1; + bool page_cache_inconsistent:1; + bool has_fallocate; + bool needs_alignment; + bool drop_cache; + bool check_cache_dropped; + struct { + uint64_t discard_nb_ok; + uint64_t discard_nb_failed; + uint64_t discard_bytes_ok; + } stats; + + PRManager *pr_mgr; +} BDRVRawState; + +typedef struct BDRVRawReopenState { + int fd; + int open_flags; + bool drop_cache; + bool check_cache_dropped; +} BDRVRawReopenState; + +static int fd_open(BlockDriverState *bs); +static int64_t raw_getlength(BlockDriverState *bs); + +typedef struct RawPosixAIOData { + BlockDriverState *bs; + int aio_type; + int aio_fildes; + + off_t aio_offset; + uint64_t aio_nbytes; + + union { + struct { + struct iovec *iov; + int niov; + } io; + struct { + uint64_t cmd; + void *buf; + } ioctl; + struct { + int aio_fd2; + off_t aio_offset2; + } copy_range; + struct { + PreallocMode prealloc; + Error **errp; + } truncate; + }; +} RawPosixAIOData; + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +static int cdrom_reopen(BlockDriverState *bs); +#endif + +#if defined(__NetBSD__) +static int raw_normalize_devicepath(const char **filename, Error **errp) +{ + static char namebuf[PATH_MAX]; + const char *dp, *fname; + struct stat sb; + + fname = *filename; + dp = strrchr(fname, '/'); + if (lstat(fname, &sb) < 0) { + error_setg_file_open(errp, errno, fname); + return -errno; + } + + if (!S_ISBLK(sb.st_mode)) { + return 0; + } + + if (dp == NULL) { + snprintf(namebuf, PATH_MAX, "r%s", fname); + } else { + snprintf(namebuf, PATH_MAX, "%.*s/r%s", + (int)(dp - fname), fname, dp + 1); + } + *filename = namebuf; + warn_report("%s is a block device, using %s", fname, *filename); + + return 0; +} +#else +static int raw_normalize_devicepath(const char **filename, Error **errp) +{ + return 0; +} +#endif + +/* + * Get logical block size via ioctl. On success store it in @sector_size_p. + */ +static int probe_logical_blocksize(int fd, unsigned int *sector_size_p) +{ + unsigned int sector_size; + bool success = false; + int i; + + errno = ENOTSUP; + static const unsigned long ioctl_list[] = { +#ifdef BLKSSZGET + BLKSSZGET, +#endif +#ifdef DKIOCGETBLOCKSIZE + DKIOCGETBLOCKSIZE, +#endif +#ifdef DIOCGSECTORSIZE + DIOCGSECTORSIZE, +#endif + }; + + /* Try a few ioctls to get the right size */ + for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) { + if (ioctl(fd, ioctl_list[i], §or_size) >= 0) { + *sector_size_p = sector_size; + success = true; + } + } + + return success ? 0 : -errno; +} + +/** + * Get physical block size of @fd. + * On success, store it in @blk_size and return 0. + * On failure, return -errno. + */ +static int probe_physical_blocksize(int fd, unsigned int *blk_size) +{ +#ifdef BLKPBSZGET + if (ioctl(fd, BLKPBSZGET, blk_size) < 0) { + return -errno; + } + return 0; +#else + return -ENOTSUP; +#endif +} + +/* + * Returns true if no alignment restrictions are necessary even for files + * opened with O_DIRECT. + * + * raw_probe_alignment() probes the required alignment and assume that 1 means + * the probing failed, so it falls back to a safe default of 4k. This can be + * avoided if we know that byte alignment is okay for the file. + */ +static bool dio_byte_aligned(int fd) +{ +#ifdef __linux__ + struct statfs buf; + int ret; + + ret = fstatfs(fd, &buf); + if (ret == 0 && buf.f_type == NFS_SUPER_MAGIC) { + return true; + } +#endif + return false; +} + +/* Check if read is allowed with given memory buffer and length. + * + * This function is used to check O_DIRECT memory buffer and request alignment. + */ +static bool raw_is_io_aligned(int fd, void *buf, size_t len) +{ + ssize_t ret = pread(fd, buf, len, 0); + + if (ret >= 0) { + return true; + } + +#ifdef __linux__ + /* The Linux kernel returns EINVAL for misaligned O_DIRECT reads. Ignore + * other errors (e.g. real I/O error), which could happen on a failed + * drive, since we only care about probing alignment. + */ + if (errno != EINVAL) { + return true; + } +#endif + + return false; +} + +static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) +{ + BDRVRawState *s = bs->opaque; + char *buf; + size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size); + size_t alignments[] = {1, 512, 1024, 2048, 4096}; + + /* For SCSI generic devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ + if (bdrv_is_sg(bs) || !s->needs_alignment) { + bs->bl.request_alignment = 1; + s->buf_align = 1; + return; + } + + bs->bl.request_alignment = 0; + s->buf_align = 0; + /* Let's try to use the logical blocksize for the alignment. */ + if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) { + bs->bl.request_alignment = 0; + } +#ifdef CONFIG_XFS + if (s->is_xfs) { + struct dioattr da; + if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) { + bs->bl.request_alignment = da.d_miniosz; + /* The kernel returns wrong information for d_mem */ + /* s->buf_align = da.d_mem; */ + } + } +#endif + + /* + * If we could not get the sizes so far, we can only guess them. First try + * to detect request alignment, since it is more likely to succeed. Then + * try to detect buf_align, which cannot be detected in some cases (e.g. + * Gluster). If buf_align cannot be detected, we fallback to the value of + * request_alignment. + */ + + if (!bs->bl.request_alignment) { + int i; + size_t align; + buf = qemu_memalign(max_align, max_align); + for (i = 0; i < ARRAY_SIZE(alignments); i++) { + align = alignments[i]; + if (raw_is_io_aligned(fd, buf, align)) { + /* Fallback to safe value. */ + bs->bl.request_alignment = (align != 1) ? align : max_align; + break; + } + } + qemu_vfree(buf); + } + + if (!s->buf_align) { + int i; + size_t align; + buf = qemu_memalign(max_align, 2 * max_align); + for (i = 0; i < ARRAY_SIZE(alignments); i++) { + align = alignments[i]; + if (raw_is_io_aligned(fd, buf + align, max_align)) { + /* Fallback to request_alignment. */ + s->buf_align = (align != 1) ? align : bs->bl.request_alignment; + break; + } + } + qemu_vfree(buf); + } + + if (!s->buf_align || !bs->bl.request_alignment) { + error_setg(errp, "Could not find working O_DIRECT alignment"); + error_append_hint(errp, "Try cache.direct=off\n"); + } +} + +static int check_hdev_writable(int fd) +{ +#if defined(BLKROGET) + /* Linux block devices can be configured "read-only" using blockdev(8). + * This is independent of device node permissions and therefore open(2) + * with O_RDWR succeeds. Actual writes fail with EPERM. + * + * bdrv_open() is supposed to fail if the disk is read-only. Explicitly + * check for read-only block devices so that Linux block devices behave + * properly. + */ + struct stat st; + int readonly = 0; + + if (fstat(fd, &st)) { + return -errno; + } + + if (!S_ISBLK(st.st_mode)) { + return 0; + } + + if (ioctl(fd, BLKROGET, &readonly) < 0) { + return -errno; + } + + if (readonly) { + return -EACCES; + } +#endif /* defined(BLKROGET) */ + return 0; +} + +static void raw_parse_flags(int bdrv_flags, int *open_flags, bool has_writers) +{ + bool read_write = false; + assert(open_flags != NULL); + + *open_flags |= O_BINARY; + *open_flags &= ~O_ACCMODE; + + if (bdrv_flags & BDRV_O_AUTO_RDONLY) { + read_write = has_writers; + } else if (bdrv_flags & BDRV_O_RDWR) { + read_write = true; + } + + if (read_write) { + *open_flags |= O_RDWR; + } else { + *open_flags |= O_RDONLY; + } + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((bdrv_flags & BDRV_O_NOCACHE)) { + *open_flags |= O_DIRECT; + } +} + +static void raw_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + bdrv_parse_filename_strip_prefix(filename, "file:", options); +} + +static QemuOptsList raw_runtime_opts = { + .name = "raw", + .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "File name of the image", + }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native, io_uring)", + }, + { + .name = "locking", + .type = QEMU_OPT_STRING, + .help = "file locking mode (on/off/auto, default: auto)", + }, + { + .name = "pr-manager", + .type = QEMU_OPT_STRING, + .help = "id of persistent reservation manager object (default: none)", + }, +#if defined(__linux__) + { + .name = "drop-cache", + .type = QEMU_OPT_BOOL, + .help = "invalidate page cache during live migration (default: on)", + }, +#endif + { + .name = "x-check-cache-dropped", + .type = QEMU_OPT_BOOL, + .help = "check that page cache was dropped on live migration (default: off)" + }, + { /* end of list */ } + }, +}; + +static const char *const mutable_opts[] = { "x-check-cache-dropped", NULL }; + +static int raw_open_common(BlockDriverState *bs, QDict *options, + int bdrv_flags, int open_flags, + bool device, Error **errp) +{ + BDRVRawState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + const char *filename = NULL; + const char *str; + BlockdevAioOptions aio, aio_default; + int fd, ret; + struct stat st; + OnOffAuto locking; + + opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + filename = qemu_opt_get(opts, "filename"); + + ret = raw_normalize_devicepath(&filename, errp); + if (ret != 0) { + goto fail; + } + + if (bdrv_flags & BDRV_O_NATIVE_AIO) { + aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE; +#ifdef CONFIG_LINUX_IO_URING + } else if (bdrv_flags & BDRV_O_IO_URING) { + aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING; +#endif + } else { + aio_default = BLOCKDEV_AIO_OPTIONS_THREADS; + } + + aio = qapi_enum_parse(&BlockdevAioOptions_lookup, + qemu_opt_get(opts, "aio"), + aio_default, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); +#ifdef CONFIG_LINUX_IO_URING + s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); +#endif + + locking = qapi_enum_parse(&OnOffAuto_lookup, + qemu_opt_get(opts, "locking"), + ON_OFF_AUTO_AUTO, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + switch (locking) { + case ON_OFF_AUTO_ON: + s->use_lock = true; + if (!qemu_has_ofd_lock()) { + warn_report("File lock requested but OFD locking syscall is " + "unavailable, falling back to POSIX file locks"); + error_printf("Due to the implementation, locks can be lost " + "unexpectedly.\n"); + } + break; + case ON_OFF_AUTO_OFF: + s->use_lock = false; + break; + case ON_OFF_AUTO_AUTO: + s->use_lock = qemu_has_ofd_lock(); + break; + default: + abort(); + } + + str = qemu_opt_get(opts, "pr-manager"); + if (str) { + s->pr_mgr = pr_manager_lookup(str, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + } + + s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true); + s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped", + false); + + s->open_flags = open_flags; + raw_parse_flags(bdrv_flags, &s->open_flags, false); + + s->fd = -1; + fd = qemu_open(filename, s->open_flags, errp); + ret = fd < 0 ? -errno : 0; + + if (ret < 0) { + if (ret == -EROFS) { + ret = -EACCES; + } + goto fail; + } + s->fd = fd; + + /* Check s->open_flags rather than bdrv_flags due to auto-read-only */ + if (s->open_flags & O_RDWR) { + ret = check_hdev_writable(s->fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "The device is not writable"); + goto fail; + } + } + + s->perm = 0; + s->shared_perm = BLK_PERM_ALL; + +#ifdef CONFIG_LINUX_AIO + /* Currently Linux does AIO only for files opened with O_DIRECT */ + if (s->use_linux_aio) { + if (!(s->open_flags & O_DIRECT)) { + error_setg(errp, "aio=native was specified, but it requires " + "cache.direct=on, which was not specified."); + ret = -EINVAL; + goto fail; + } + if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { + error_prepend(errp, "Unable to use native AIO: "); + goto fail; + } + } +#else + if (s->use_linux_aio) { + error_setg(errp, "aio=native was specified, but is not supported " + "in this build."); + ret = -EINVAL; + goto fail; + } +#endif /* !defined(CONFIG_LINUX_AIO) */ + +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { + error_prepend(errp, "Unable to use io_uring: "); + goto fail; + } + } +#else + if (s->use_linux_io_uring) { + error_setg(errp, "aio=io_uring was specified, but is not supported " + "in this build."); + ret = -EINVAL; + goto fail; + } +#endif /* !defined(CONFIG_LINUX_IO_URING) */ + + s->has_discard = true; + s->has_write_zeroes = true; + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + s->needs_alignment = true; + } + + if (fstat(s->fd, &st) < 0) { + ret = -errno; + error_setg_errno(errp, errno, "Could not stat file"); + goto fail; + } + + if (!device) { + if (S_ISBLK(st.st_mode)) { + warn_report("Opening a block device as a file using the '%s' " + "driver is deprecated", bs->drv->format_name); + } else if (S_ISCHR(st.st_mode)) { + warn_report("Opening a character device as a file using the '%s' " + "driver is deprecated", bs->drv->format_name); + } else if (!S_ISREG(st.st_mode)) { + error_setg(errp, "A regular file was expected by the '%s' driver, " + "but something else was given", bs->drv->format_name); + ret = -EINVAL; + goto fail; + } else { + s->discard_zeroes = true; + s->has_fallocate = true; + } + } else { + if (!(S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { + error_setg(errp, "'%s' driver expects either " + "a character or block device", bs->drv->format_name); + ret = -EINVAL; + goto fail; + } + } + + if (S_ISBLK(st.st_mode)) { +#ifdef BLKDISCARDZEROES + unsigned int arg; + if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { + s->discard_zeroes = true; + } +#endif +#ifdef __linux__ + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do + * not rely on the contents of discarded blocks unless using O_DIRECT. + * Same for BLKZEROOUT. + */ + if (!(bs->open_flags & BDRV_O_NOCACHE)) { + s->discard_zeroes = false; + s->has_write_zeroes = false; + } +#endif + } +#ifdef __FreeBSD__ + if (S_ISCHR(st.st_mode)) { + /* + * The file is a char device (disk), which on FreeBSD isn't behind + * a pager, so force all requests to be aligned. This is needed + * so QEMU makes sure all IO operations on the device are aligned + * to sector size, or else FreeBSD will reject them with EINVAL. + */ + s->needs_alignment = true; + } +#endif + +#ifdef CONFIG_XFS + if (platform_test_xfs_fd(s->fd)) { + s->is_xfs = true; + } +#endif + + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + if (S_ISREG(st.st_mode)) { + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } + ret = 0; +fail: + if (ret < 0 && s->fd != -1) { + qemu_close(s->fd); + } + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { + unlink(filename); + } + qemu_opts_del(opts); + return ret; +} + +static int raw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + + s->type = FTYPE_FILE; + return raw_open_common(bs, options, flags, 0, false, errp); +} + +typedef enum { + RAW_PL_PREPARE, + RAW_PL_COMMIT, + RAW_PL_ABORT, +} RawPermLockOp; + +#define PERM_FOREACH(i) \ + for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++) + +/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the + * file; if @unlock == true, also unlock the unneeded bytes. + * @shared_perm_lock_bits is the mask of all permissions that are NOT shared. + */ +static int raw_apply_lock_bytes(BDRVRawState *s, int fd, + uint64_t perm_lock_bits, + uint64_t shared_perm_lock_bits, + bool unlock, Error **errp) +{ + int ret; + int i; + uint64_t locked_perm, locked_shared_perm; + + if (s) { + locked_perm = s->locked_perm; + locked_shared_perm = s->locked_shared_perm; + } else { + /* + * We don't have the previous bits, just lock/unlock for each of the + * requested bits. + */ + if (unlock) { + locked_perm = BLK_PERM_ALL; + locked_shared_perm = BLK_PERM_ALL; + } else { + locked_perm = 0; + locked_shared_perm = 0; + } + } + + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + uint64_t bit = (1ULL << i); + if ((perm_lock_bits & bit) && !(locked_perm & bit)) { + ret = qemu_lock_fd(fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } else if (s) { + s->locked_perm |= bit; + } + } else if (unlock && (locked_perm & bit) && !(perm_lock_bits & bit)) { + ret = qemu_unlock_fd(fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } else if (s) { + s->locked_perm &= ~bit; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + uint64_t bit = (1ULL << i); + if ((shared_perm_lock_bits & bit) && !(locked_shared_perm & bit)) { + ret = qemu_lock_fd(fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } else if (s) { + s->locked_shared_perm |= bit; + } + } else if (unlock && (locked_shared_perm & bit) && + !(shared_perm_lock_bits & bit)) { + ret = qemu_unlock_fd(fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } else if (s) { + s->locked_shared_perm &= ~bit; + } + } + } + return 0; +} + +/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */ +static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + int i; + + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + uint64_t p = 1ULL << i; + if (perm & p) { + ret = qemu_lock_fd_test(fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get \"%s\" lock", + perm_name); + g_free(perm_name); + return ret; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + uint64_t p = 1ULL << i; + if (!(shared_perm & p)) { + ret = qemu_lock_fd_test(fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get shared \"%s\" lock", + perm_name); + g_free(perm_name); + return ret; + } + } + } + return 0; +} + +static int raw_handle_perm_lock(BlockDriverState *bs, + RawPermLockOp op, + uint64_t new_perm, uint64_t new_shared, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret = 0; + Error *local_err = NULL; + + if (!s->use_lock) { + return 0; + } + + if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) { + return 0; + } + + switch (op) { + case RAW_PL_PREPARE: + if ((s->perm | new_perm) == s->perm && + (s->shared_perm & new_shared) == s->shared_perm) + { + /* + * We are going to unlock bytes, it should not fail. If it fail due + * to some fs-dependent permission-unrelated reasons (which occurs + * sometimes on NFS and leads to abort in bdrv_replace_child) we + * can't prevent such errors by any check here. And we ignore them + * anyway in ABORT and COMMIT. + */ + return 0; + } + ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm, + ~s->shared_perm | ~new_shared, + false, errp); + if (!ret) { + ret = raw_check_lock_bytes(s->fd, new_perm, new_shared, errp); + if (!ret) { + return 0; + } + error_append_hint(errp, + "Is another process using the image [%s]?\n", + bs->filename); + } + /* fall through to unlock bytes. */ + case RAW_PL_ABORT: + raw_apply_lock_bytes(s, s->fd, s->perm, ~s->shared_perm, + true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + warn_report_err(local_err); + } + break; + case RAW_PL_COMMIT: + raw_apply_lock_bytes(s, s->fd, new_perm, ~new_shared, + true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + warn_report_err(local_err); + } + break; + } + return ret; +} + +static int raw_reconfigure_getfd(BlockDriverState *bs, int flags, + int *open_flags, uint64_t perm, bool force_dup, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int fd = -1; + int ret; + bool has_writers = perm & + (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_RESIZE); + int fcntl_flags = O_APPEND | O_NONBLOCK; +#ifdef O_NOATIME + fcntl_flags |= O_NOATIME; +#endif + + *open_flags = 0; + if (s->type == FTYPE_CD) { + *open_flags |= O_NONBLOCK; + } + + raw_parse_flags(flags, open_flags, has_writers); + +#ifdef O_ASYNC + /* Not all operating systems have O_ASYNC, and those that don't + * will not let us track the state into rs->open_flags (typically + * you achieve the same effect with an ioctl, for example I_SETSIG + * on Solaris). But we do not use O_ASYNC, so that's fine. + */ + assert((s->open_flags & O_ASYNC) == 0); +#endif + + if (!force_dup && *open_flags == s->open_flags) { + /* We're lucky, the existing fd is fine */ + return s->fd; + } + + if ((*open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) { + /* dup the original fd */ + fd = qemu_dup(s->fd); + if (fd >= 0) { + ret = fcntl_setfl(fd, *open_flags); + if (ret) { + qemu_close(fd); + fd = -1; + } + } + } + + /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ + if (fd == -1) { + const char *normalized_filename = bs->filename; + ret = raw_normalize_devicepath(&normalized_filename, errp); + if (ret >= 0) { + fd = qemu_open(normalized_filename, *open_flags, errp); + if (fd == -1) { + return -1; + } + } + } + + if (fd != -1 && (*open_flags & O_RDWR)) { + ret = check_hdev_writable(fd); + if (ret < 0) { + qemu_close(fd); + error_setg_errno(errp, -ret, "The device is not writable"); + return -1; + } + } + + return fd; +} + +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRawState *s; + BDRVRawReopenState *rs; + QemuOpts *opts; + int ret; + Error *local_err = NULL; + + assert(state != NULL); + assert(state->bs != NULL); + + s = state->bs->opaque; + + state->opaque = g_new0(BDRVRawReopenState, 1); + rs = state->opaque; + + /* Handle options changes */ + opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, state->options, errp)) { + ret = -EINVAL; + goto out; + } + + rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true); + rs->check_cache_dropped = + qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false); + + /* This driver's reopen function doesn't currently allow changing + * other options, so let's put them back in the original QDict and + * bdrv_reopen_prepare() will detect changes and complain. */ + qemu_opts_to_qdict(opts, state->options); + + rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags, + state->perm, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -1; + goto out; + } + + /* Fail already reopen_prepare() if we can't get a working O_DIRECT + * alignment with the new fd. */ + if (rs->fd != -1) { + raw_probe_alignment(state->bs, rs->fd, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out_fd; + } + } + + s->reopen_state = state; + ret = 0; +out_fd: + if (ret < 0) { + qemu_close(rs->fd); + rs->fd = -1; + } +out: + qemu_opts_del(opts); + return ret; +} + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawReopenState *rs = state->opaque; + BDRVRawState *s = state->bs->opaque; + + s->drop_cache = rs->drop_cache; + s->check_cache_dropped = rs->check_cache_dropped; + s->open_flags = rs->open_flags; + + qemu_close(s->fd); + s->fd = rs->fd; + + g_free(state->opaque); + state->opaque = NULL; + + assert(s->reopen_state == state); + s->reopen_state = NULL; +} + + +static void raw_reopen_abort(BDRVReopenState *state) +{ + BDRVRawReopenState *rs = state->opaque; + BDRVRawState *s = state->bs->opaque; + + /* nothing to do if NULL, we didn't get far enough */ + if (rs == NULL) { + return; + } + + if (rs->fd >= 0) { + qemu_close(rs->fd); + rs->fd = -1; + } + g_free(state->opaque); + state->opaque = NULL; + + assert(s->reopen_state == state); + s->reopen_state = NULL; +} + +static int sg_get_max_transfer_length(int fd) +{ +#ifdef BLKSECTGET + int max_bytes = 0; + + if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { + return max_bytes; + } else { + return -errno; + } +#else + return -ENOSYS; +#endif +} + +static int sg_get_max_segments(int fd) +{ +#ifdef CONFIG_LINUX + char buf[32]; + const char *end; + char *sysfspath = NULL; + int ret; + int sysfd = -1; + long max_segments; + struct stat st; + + if (fstat(fd, &st)) { + ret = -errno; + goto out; + } + + sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", + major(st.st_rdev), minor(st.st_rdev)); + sysfd = open(sysfspath, O_RDONLY); + if (sysfd == -1) { + ret = -errno; + goto out; + } + do { + ret = read(sysfd, buf, sizeof(buf) - 1); + } while (ret == -1 && errno == EINTR); + if (ret < 0) { + ret = -errno; + goto out; + } else if (ret == 0) { + ret = -EIO; + goto out; + } + buf[ret] = 0; + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtol(buf, &end, 10, &max_segments); + if (ret == 0 && end && *end == '\n') { + ret = max_segments; + } + +out: + if (sysfd != -1) { + close(sysfd); + } + g_free(sysfspath); + return ret; +#else + return -ENOTSUP; +#endif +} + +static void raw_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVRawState *s = bs->opaque; + + if (bs->sg) { + int ret = sg_get_max_transfer_length(s->fd); + + if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { + bs->bl.max_transfer = pow2floor(ret); + } + + ret = sg_get_max_segments(s->fd); + if (ret > 0) { + bs->bl.max_transfer = MIN(bs->bl.max_transfer, + ret * qemu_real_host_page_size); + } + } + + raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; + bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); +} + +static int check_for_dasd(int fd) +{ +#ifdef BIODASDINFO2 + struct dasd_information2_t info = {0}; + + return ioctl(fd, BIODASDINFO2, &info); +#else + return -1; +#endif +} + +/** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ +static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + BDRVRawState *s = bs->opaque; + int ret; + + /* If DASD, get blocksizes */ + if (check_for_dasd(s->fd) < 0) { + return -ENOTSUP; + } + ret = probe_logical_blocksize(s->fd, &bsz->log); + if (ret < 0) { + return ret; + } + return probe_physical_blocksize(s->fd, &bsz->phys); +} + +/** + * Try to get @bs's geometry: cyls, heads, sectors. + * On success, store them in @geo and return 0. + * On failure return -errno. + * (Allows block driver to assign default geometry values that guest sees) + */ +#ifdef __linux__ +static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + BDRVRawState *s = bs->opaque; + struct hd_geometry ioctl_geo = {0}; + + /* If DASD, get its geometry */ + if (check_for_dasd(s->fd) < 0) { + return -ENOTSUP; + } + if (ioctl(s->fd, HDIO_GETGEO, &ioctl_geo) < 0) { + return -errno; + } + /* HDIO_GETGEO may return success even though geo contains zeros + (e.g. certain multipath setups) */ + if (!ioctl_geo.heads || !ioctl_geo.sectors || !ioctl_geo.cylinders) { + return -ENOTSUP; + } + /* Do not return a geometry for partition */ + if (ioctl_geo.start != 0) { + return -ENOTSUP; + } + geo->heads = ioctl_geo.heads; + geo->sectors = ioctl_geo.sectors; + geo->cylinders = ioctl_geo.cylinders; + + return 0; +} +#else /* __linux__ */ +static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + return -ENOTSUP; +} +#endif + +#if defined(__linux__) +static int handle_aiocb_ioctl(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + int ret; + + ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf); + if (ret == -1) { + return -errno; + } + + return 0; +} +#endif /* linux */ + +static int handle_aiocb_flush(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + BDRVRawState *s = aiocb->bs->opaque; + int ret; + + if (s->page_cache_inconsistent) { + return -EIO; + } + + ret = qemu_fdatasync(aiocb->aio_fildes); + if (ret == -1) { + /* There is no clear definition of the semantics of a failing fsync(), + * so we may have to assume the worst. The sad truth is that this + * assumption is correct for Linux. Some pages are now probably marked + * clean in the page cache even though they are inconsistent with the + * on-disk contents. The next fdatasync() call would succeed, but no + * further writeback attempt will be made. We can't get back to a state + * in which we know what is on disk (we would have to rewrite + * everything that was touched since the last fdatasync() at least), so + * make bdrv_flush() fail permanently. Given that the behaviour isn't + * really defined, I have little hope that other OSes are doing better. + * + * Obviously, this doesn't affect O_DIRECT, which bypasses the page + * cache. */ + if ((s->open_flags & O_DIRECT) == 0) { + s->page_cache_inconsistent = true; + } + return -errno; + } + return 0; +} + +#ifdef CONFIG_PREADV + +static bool preadv_present = true; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return preadv(fd, iov, nr_iov, offset); +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return pwritev(fd, iov, nr_iov, offset); +} + +#else + +static bool preadv_present = false; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +#endif + +static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) +{ + ssize_t len; + + do { + if (aiocb->aio_type & QEMU_AIO_WRITE) + len = qemu_pwritev(aiocb->aio_fildes, + aiocb->io.iov, + aiocb->io.niov, + aiocb->aio_offset); + else + len = qemu_preadv(aiocb->aio_fildes, + aiocb->io.iov, + aiocb->io.niov, + aiocb->aio_offset); + } while (len == -1 && errno == EINTR); + + if (len == -1) { + return -errno; + } + return len; +} + +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) +{ + ssize_t offset = 0; + ssize_t len; + + while (offset < aiocb->aio_nbytes) { + if (aiocb->aio_type & QEMU_AIO_WRITE) { + len = pwrite(aiocb->aio_fildes, + (const char *)buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } else { + len = pread(aiocb->aio_fildes, + buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } + if (len == -1 && errno == EINTR) { + continue; + } else if (len == -1 && errno == EINVAL && + (aiocb->bs->open_flags & BDRV_O_NOCACHE) && + !(aiocb->aio_type & QEMU_AIO_WRITE) && + offset > 0) { + /* O_DIRECT pread() may fail with EINVAL when offset is unaligned + * after a short read. Assume that O_DIRECT short reads only occur + * at EOF. Therefore this is a short read, not an I/O error. + */ + break; + } else if (len == -1) { + offset = -errno; + break; + } else if (len == 0) { + break; + } + offset += len; + } + + return offset; +} + +static int handle_aiocb_rw(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + ssize_t nbytes; + char *buf; + + if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) { + /* + * If there is just a single buffer, and it is properly aligned + * we can just use plain pread/pwrite without any problems. + */ + if (aiocb->io.niov == 1) { + nbytes = handle_aiocb_rw_linear(aiocb, aiocb->io.iov->iov_base); + goto out; + } + /* + * We have more than one iovec, and all are properly aligned. + * + * Try preadv/pwritev first and fall back to linearizing the + * buffer if it's not supported. + */ + if (preadv_present) { + nbytes = handle_aiocb_rw_vector(aiocb); + if (nbytes == aiocb->aio_nbytes || + (nbytes < 0 && nbytes != -ENOSYS)) { + goto out; + } + preadv_present = false; + } + + /* + * XXX(hch): short read/write. no easy way to handle the reminder + * using these interfaces. For now retry using plain + * pread/pwrite? + */ + } + + /* + * Ok, we have to do it the hard way, copy all segments into + * a single aligned buffer. + */ + buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes); + if (buf == NULL) { + nbytes = -ENOMEM; + goto out; + } + + if (aiocb->aio_type & QEMU_AIO_WRITE) { + char *p = buf; + int i; + + for (i = 0; i < aiocb->io.niov; ++i) { + memcpy(p, aiocb->io.iov[i].iov_base, aiocb->io.iov[i].iov_len); + p += aiocb->io.iov[i].iov_len; + } + assert(p - buf == aiocb->aio_nbytes); + } + + nbytes = handle_aiocb_rw_linear(aiocb, buf); + if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { + char *p = buf; + size_t count = aiocb->aio_nbytes, copy; + int i; + + for (i = 0; i < aiocb->io.niov && count; ++i) { + copy = count; + if (copy > aiocb->io.iov[i].iov_len) { + copy = aiocb->io.iov[i].iov_len; + } + memcpy(aiocb->io.iov[i].iov_base, p, copy); + assert(count >= copy); + p += copy; + count -= copy; + } + assert(count == 0); + } + qemu_vfree(buf); + +out: + if (nbytes == aiocb->aio_nbytes) { + return 0; + } else if (nbytes >= 0 && nbytes < aiocb->aio_nbytes) { + if (aiocb->aio_type & QEMU_AIO_WRITE) { + return -EINVAL; + } else { + iov_memset(aiocb->io.iov, aiocb->io.niov, nbytes, + 0, aiocb->aio_nbytes - nbytes); + return 0; + } + } else { + assert(nbytes < 0); + return nbytes; + } +} + +static int translate_err(int err) +{ + if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || + err == -ENOTTY) { + err = -ENOTSUP; + } + return err; +} + +#ifdef CONFIG_FALLOCATE +static int do_fallocate(int fd, int mode, off_t offset, off_t len) +{ + do { + if (fallocate(fd, mode, offset, len) == 0) { + return 0; + } + } while (errno == EINTR); + return translate_err(-errno); +} +#endif + +static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) +{ + int ret = -ENOTSUP; + BDRVRawState *s = aiocb->bs->opaque; + + if (!s->has_write_zeroes) { + return -ENOTSUP; + } + +#ifdef BLKZEROOUT + /* The BLKZEROOUT implementation in the kernel doesn't set + * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow + * fallbacks. */ + if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) { + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = translate_err(-errno); + if (ret == -ENOTSUP) { + s->has_write_zeroes = false; + } + } +#endif + + return ret; +} + +static int handle_aiocb_write_zeroes(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; +#ifdef CONFIG_FALLOCATE + BDRVRawState *s = aiocb->bs->opaque; + int64_t len; +#endif + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { + return handle_aiocb_write_zeroes_block(aiocb); + } + +#ifdef CONFIG_FALLOCATE_ZERO_RANGE + if (s->has_write_zeroes) { + int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, + aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == -EINVAL) { + /* + * Allow falling back to pwrite for file systems that + * do not support fallocate() for an unaligned byte range. + */ + return -ENOTSUP; + } + if (ret == 0 || ret != -ENOTSUP) { + return ret; + } + s->has_write_zeroes = false; + } +#endif + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + if (s->has_discard && s->has_fallocate) { + int ret = do_fallocate(s->fd, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0) { + ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0 || ret != -ENOTSUP) { + return ret; + } + s->has_fallocate = false; + } else if (ret != -ENOTSUP) { + return ret; + } else { + s->has_discard = false; + } + } +#endif + +#ifdef CONFIG_FALLOCATE + /* Last resort: we are trying to extend the file with zeroed data. This + * can be done via fallocate(fd, 0) */ + len = bdrv_getlength(aiocb->bs); + if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) { + int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0 || ret != -ENOTSUP) { + return ret; + } + s->has_fallocate = false; + } +#endif + + return -ENOTSUP; +} + +static int handle_aiocb_write_zeroes_unmap(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque; + + /* First try to write zeros and unmap at the same time */ + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + int ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); + switch (ret) { + case -ENOTSUP: + case -EINVAL: + case -EBUSY: + break; + default: + return ret; + } +#endif + + /* If we couldn't manage to unmap while guaranteed that the area reads as + * all-zero afterwards, just write zeroes without unmapping */ + return handle_aiocb_write_zeroes(aiocb); +} + +#ifndef HAVE_COPY_FILE_RANGE +static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, + off_t *out_off, size_t len, unsigned int flags) +{ +#ifdef __NR_copy_file_range + return syscall(__NR_copy_file_range, in_fd, in_off, out_fd, + out_off, len, flags); +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + +static int handle_aiocb_copy_range(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + uint64_t bytes = aiocb->aio_nbytes; + off_t in_off = aiocb->aio_offset; + off_t out_off = aiocb->copy_range.aio_offset2; + + while (bytes) { + ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, + aiocb->copy_range.aio_fd2, &out_off, + bytes, 0); + trace_file_copy_file_range(aiocb->bs, aiocb->aio_fildes, in_off, + aiocb->copy_range.aio_fd2, out_off, bytes, + 0, ret); + if (ret == 0) { + /* No progress (e.g. when beyond EOF), let the caller fall back to + * buffer I/O. */ + return -ENOSPC; + } + if (ret < 0) { + switch (errno) { + case ENOSYS: + return -ENOTSUP; + case EINTR: + continue; + default: + return -errno; + } + } + bytes -= ret; + } + return 0; +} + +static int handle_aiocb_discard(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + int ret = -EOPNOTSUPP; + BDRVRawState *s = aiocb->bs->opaque; + + if (!s->has_discard) { + return -ENOTSUP; + } + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { +#ifdef BLKDISCARD + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } else { +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); +#endif + } + + ret = translate_err(ret); + if (ret == -ENOTSUP) { + s->has_discard = false; + } + return ret; +} + +/* + * Help alignment probing by allocating the first block. + * + * When reading with direct I/O from unallocated area on Gluster backed by XFS, + * reading succeeds regardless of request length. In this case we fallback to + * safe alignment which is not optimal. Allocating the first block avoids this + * fallback. + * + * fd may be opened with O_DIRECT, but we don't know the buffer alignment or + * request alignment, so we use safe values. + * + * Returns: 0 on success, -errno on failure. Since this is an optimization, + * caller may ignore failures. + */ +static int allocate_first_block(int fd, size_t max_size) +{ + size_t write_size = (max_size < MAX_BLOCKSIZE) + ? BDRV_SECTOR_SIZE + : MAX_BLOCKSIZE; + size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size); + void *buf; + ssize_t n; + int ret; + + buf = qemu_memalign(max_align, write_size); + memset(buf, 0, write_size); + + do { + n = pwrite(fd, buf, write_size, 0); + } while (n == -1 && errno == EINTR); + + ret = (n == -1) ? -errno : 0; + + qemu_vfree(buf); + return ret; +} + +static int handle_aiocb_truncate(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + int result = 0; + int64_t current_length = 0; + char *buf = NULL; + struct stat st; + int fd = aiocb->aio_fildes; + int64_t offset = aiocb->aio_offset; + PreallocMode prealloc = aiocb->truncate.prealloc; + Error **errp = aiocb->truncate.errp; + + if (fstat(fd, &st) < 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not stat file"); + return result; + } + + current_length = st.st_size; + if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + switch (prealloc) { +#ifdef CONFIG_POSIX_FALLOCATE + case PREALLOC_MODE_FALLOC: + /* + * Truncating before posix_fallocate() makes it about twice slower on + * file systems that do not support fallocate(), trying to check if a + * block is allocated before allocating it, so don't do that here. + */ + if (offset != current_length) { + result = -posix_fallocate(fd, current_length, + offset - current_length); + if (result != 0) { + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); + } else if (current_length == 0) { + /* + * posix_fallocate() uses fallocate() if the filesystem + * supports it, or fallback to manually writing zeroes. If + * fallocate() was used, unaligned reads from the fallocated + * area in raw_probe_alignment() will succeed, hence we need to + * allocate the first block. + * + * Optimize future alignment probing; ignore failures. + */ + allocate_first_block(fd, offset); + } + } else { + result = 0; + } + goto out; +#endif + case PREALLOC_MODE_FULL: + { + int64_t num = 0, left = offset - current_length; + off_t seek_result; + + /* + * Knowing the final size from the beginning could allow the file + * system driver to do less allocations and possibly avoid + * fragmentation of the file. + */ + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + goto out; + } + + buf = g_malloc0(65536); + + seek_result = lseek(fd, current_length, SEEK_SET); + if (seek_result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Failed to seek to the old end of file"); + goto out; + } + + while (left > 0) { + num = MIN(left, 65536); + result = write(fd, buf, num); + if (result < 0) { + if (errno == EINTR) { + continue; + } + result = -errno; + error_setg_errno(errp, -result, + "Could not write zeros for preallocation"); + goto out; + } + left -= result; + } + if (result >= 0) { + result = fsync(fd); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not flush file to disk"); + goto out; + } + } + goto out; + } + case PREALLOC_MODE_OFF: + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + } else if (current_length == 0 && offset > current_length) { + /* Optimize future alignment probing; ignore failures. */ + allocate_first_block(fd, offset); + } + return result; + default: + result = -ENOTSUP; + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_str(prealloc)); + return result; + } + +out: + if (result < 0) { + if (ftruncate(fd, current_length) < 0) { + error_report("Failed to restore old file length: %s", + strerror(errno)); + } + } + + g_free(buf); + return result; +} + +static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, + ThreadPoolFunc func, void *arg) +{ + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_co(pool, func, arg); +} + +static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int type) +{ + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + + if (fd_open(bs) < 0) + return -EIO; + + /* + * When using O_DIRECT, the request must be aligned to be able to use + * either libaio or io_uring interface. If not fail back to regular thread + * pool read/write code which emulates this for us if we + * set QEMU_AIO_MISALIGNED. + */ + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { + type |= QEMU_AIO_MISALIGNED; +#ifdef CONFIG_LINUX_IO_URING + } else if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + assert(qiov->size == bytes); + return luring_co_submit(bs, aio, s->fd, offset, qiov, type); +#endif +#ifdef CONFIG_LINUX_AIO + } else if (s->use_linux_aio) { + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); + assert(qiov->size == bytes); + return laio_co_submit(bs, aio, s->fd, offset, qiov, type); +#endif + } + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = type, + .aio_offset = offset, + .aio_nbytes = bytes, + .io = { + .iov = qiov->iov, + .niov = qiov->niov, + }, + }; + + assert(qiov->size == bytes); + return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb); +} + +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ); +} + +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + assert(flags == 0); + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); +} + +static void raw_aio_plug(BlockDriverState *bs) +{ + BDRVRawState __attribute__((unused)) *s = bs->opaque; +#ifdef CONFIG_LINUX_AIO + if (s->use_linux_aio) { + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); + laio_io_plug(bs, aio); + } +#endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + luring_io_plug(bs, aio); + } +#endif +} + +static void raw_aio_unplug(BlockDriverState *bs) +{ + BDRVRawState __attribute__((unused)) *s = bs->opaque; +#ifdef CONFIG_LINUX_AIO + if (s->use_linux_aio) { + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); + laio_io_unplug(bs, aio); + } +#endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + luring_io_unplug(bs, aio); + } +#endif +} + +static int raw_co_flush_to_disk(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + int ret; + + ret = fd_open(bs); + if (ret < 0) { + return ret; + } + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_FLUSH, + }; + +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } +#endif + return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); +} + +static void raw_aio_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVRawState __attribute__((unused)) *s = bs->opaque; +#ifdef CONFIG_LINUX_AIO + if (s->use_linux_aio) { + Error *local_err = NULL; + if (!aio_setup_linux_aio(new_context, &local_err)) { + error_reportf_err(local_err, "Unable to use native AIO, " + "falling back to thread pool: "); + s->use_linux_aio = false; + } + } +#endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + Error *local_err; + if (!aio_setup_linux_io_uring(new_context, &local_err)) { + error_reportf_err(local_err, "Unable to use linux io_uring, " + "falling back to thread pool: "); + s->use_linux_io_uring = false; + } + } +#endif +} + +static void raw_close(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if (s->fd >= 0) { + qemu_close(s->fd); + s->fd = -1; + } +} + +/** + * Truncates the given regular file @fd to @offset and, when growing, fills the + * new space according to @prealloc. + * + * Returns: 0 on success, -errno on failure. + */ +static int coroutine_fn +raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + RawPosixAIOData acb; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = fd, + .aio_type = QEMU_AIO_TRUNCATE, + .aio_offset = offset, + .truncate = { + .prealloc = prealloc, + .errp = errp, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb); +} + +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVRawState *s = bs->opaque; + struct stat st; + int ret; + + if (fstat(s->fd, &st)) { + ret = -errno; + error_setg_errno(errp, -ret, "Failed to fstat() the file"); + return ret; + } + + if (S_ISREG(st.st_mode)) { + /* Always resizes to the exact @offset */ + return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); + } + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Preallocation mode '%s' unsupported for this " + "non-regular file", PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + int64_t cur_length = raw_getlength(bs); + + if (offset != cur_length && exact) { + error_setg(errp, "Cannot resize device files"); + return -ENOTSUP; + } else if (offset > cur_length) { + error_setg(errp, "Cannot grow device files"); + return -EINVAL; + } + } else { + error_setg(errp, "Resizing this file is not supported"); + return -ENOTSUP; + } + + return 0; +} + +#ifdef __OpenBSD__ +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd = s->fd; + struct stat st; + + if (fstat(fd, &st)) + return -errno; + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + struct disklabel dl; + + if (ioctl(fd, DIOCGDINFO, &dl)) + return -errno; + return (uint64_t)dl.d_secsize * + dl.d_partitions[DISKPART(st.st_rdev)].p_size; + } else + return st.st_size; +} +#elif defined(__NetBSD__) +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd = s->fd; + struct stat st; + + if (fstat(fd, &st)) + return -errno; + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + struct dkwedge_info dkw; + + if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) { + return dkw.dkw_size * 512; + } else { + struct disklabel dl; + + if (ioctl(fd, DIOCGDINFO, &dl)) + return -errno; + return (uint64_t)dl.d_secsize * + dl.d_partitions[DISKPART(st.st_rdev)].p_size; + } + } else + return st.st_size; +} +#elif defined(__sun__) +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + struct dk_minfo minfo; + int ret; + int64_t size; + + ret = fd_open(bs); + if (ret < 0) { + return ret; + } + + /* + * Use the DKIOCGMEDIAINFO ioctl to read the size. + */ + ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo); + if (ret != -1) { + return minfo.dki_lbsize * minfo.dki_capacity; + } + + /* + * There are reports that lseek on some devices fails, but + * irc discussion said that contingency on contingency was overkill. + */ + size = lseek(s->fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + return size; +} +#elif defined(CONFIG_BSD) +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd = s->fd; + int64_t size; + struct stat sb; +#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) + int reopened = 0; +#endif + int ret; + + ret = fd_open(bs); + if (ret < 0) + return ret; + +#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) +again: +#endif + if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) { +#ifdef DIOCGMEDIASIZE + if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) +#elif defined(DIOCGPART) + { + struct partinfo pi; + if (ioctl(fd, DIOCGPART, &pi) == 0) + size = pi.media_size; + else + size = 0; + } + if (size == 0) +#endif +#if defined(__APPLE__) && defined(__MACH__) + { + uint64_t sectors = 0; + uint32_t sector_size = 0; + + if (ioctl(fd, DKIOCGETBLOCKCOUNT, §ors) == 0 + && ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) == 0) { + size = sectors * sector_size; + } else { + size = lseek(fd, 0LL, SEEK_END); + if (size < 0) { + return -errno; + } + } + } +#else + size = lseek(fd, 0LL, SEEK_END); + if (size < 0) { + return -errno; + } +#endif +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + switch(s->type) { + case FTYPE_CD: + /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */ + if (size == 2048LL * (unsigned)-1) + size = 0; + /* XXX no disc? maybe we need to reopen... */ + if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) { + reopened = 1; + goto again; + } + } +#endif + } else { + size = lseek(fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + } + return size; +} +#else +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int ret; + int64_t size; + + ret = fd_open(bs); + if (ret < 0) { + return ret; + } + + size = lseek(s->fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + return size; +} +#endif + +static int64_t raw_get_allocated_file_size(BlockDriverState *bs) +{ + struct stat st; + BDRVRawState *s = bs->opaque; + + if (fstat(s->fd, &st) < 0) { + return -errno; + } + return (int64_t)st.st_blocks * 512; +} + +static int coroutine_fn +raw_co_create(BlockdevCreateOptions *options, Error **errp) +{ + BlockdevCreateOptionsFile *file_opts; + Error *local_err = NULL; + int fd; + uint64_t perm, shared; + int result = 0; + + /* Validate options and set default values */ + assert(options->driver == BLOCKDEV_DRIVER_FILE); + file_opts = &options->u.file; + + if (!file_opts->has_nocow) { + file_opts->nocow = false; + } + if (!file_opts->has_preallocation) { + file_opts->preallocation = PREALLOC_MODE_OFF; + } + if (!file_opts->has_extent_size_hint) { + file_opts->extent_size_hint = 1 * MiB; + } + if (file_opts->extent_size_hint > UINT32_MAX) { + result = -EINVAL; + error_setg(errp, "Extent size hint is too large"); + goto out; + } + + /* Create file */ + fd = qemu_create(file_opts->filename, O_RDWR | O_BINARY, 0644, errp); + if (fd < 0) { + result = -errno; + goto out; + } + + /* Take permissions: We want to discard everything, so we need + * BLK_PERM_WRITE; and truncation to the desired size requires + * BLK_PERM_RESIZE. + * On the other hand, we cannot share the RESIZE permission + * because we promise that after this function, the file has the + * size given in the options. If someone else were to resize it + * concurrently, we could not guarantee that. + * Note that after this function, we can no longer guarantee that + * the file is not touched by a third party, so it may be resized + * then. */ + perm = BLK_PERM_WRITE | BLK_PERM_RESIZE; + shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; + + /* Step one: Take locks */ + result = raw_apply_lock_bytes(NULL, fd, perm, ~shared, false, errp); + if (result < 0) { + goto out_close; + } + + /* Step two: Check that nobody else has taken conflicting locks */ + result = raw_check_lock_bytes(fd, perm, shared, errp); + if (result < 0) { + error_append_hint(errp, + "Is another process using the image [%s]?\n", + file_opts->filename); + goto out_unlock; + } + + /* Clear the file by truncating it to 0 */ + result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); + if (result < 0) { + goto out_unlock; + } + + if (file_opts->nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value + * will be ignored since any failure of this operation should not + * block the left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } +#ifdef FS_IOC_FSSETXATTR + /* + * Try to set the extent size hint. Failure is not fatal, and a warning is + * only printed if the option was explicitly specified. + */ + { + struct fsxattr attr; + result = ioctl(fd, FS_IOC_FSGETXATTR, &attr); + if (result == 0) { + attr.fsx_xflags |= FS_XFLAG_EXTSIZE; + attr.fsx_extsize = file_opts->extent_size_hint; + result = ioctl(fd, FS_IOC_FSSETXATTR, &attr); + } + if (result < 0 && file_opts->has_extent_size_hint && + file_opts->extent_size_hint) + { + warn_report("Failed to set extent size hint: %s", + strerror(errno)); + } + } +#endif + + /* Resize and potentially preallocate the file to the desired + * final size */ + result = raw_regular_truncate(NULL, fd, file_opts->size, + file_opts->preallocation, errp); + if (result < 0) { + goto out_unlock; + } + +out_unlock: + raw_apply_lock_bytes(NULL, fd, 0, 0, true, &local_err); + if (local_err) { + /* The above call should not fail, and if it does, that does + * not mean the whole creation operation has failed. So + * report it the user for their convenience, but do not report + * it to the caller. */ + warn_report_err(local_err); + } + +out_close: + if (qemu_close(fd) != 0 && result == 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not close the new file"); + } +out: + return result; +} + +static int coroutine_fn raw_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions options; + int64_t total_size = 0; + int64_t extent_size_hint = 0; + bool has_extent_size_hint = false; + bool nocow = false; + PreallocMode prealloc; + char *buf = NULL; + Error *local_err = NULL; + + /* Skip file: protocol prefix */ + strstart(filename, "file:", &filename); + + /* Read out options */ + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + if (qemu_opt_get(opts, BLOCK_OPT_EXTENT_SIZE_HINT)) { + has_extent_size_hint = true; + extent_size_hint = + qemu_opt_get_size_del(opts, BLOCK_OPT_EXTENT_SIZE_HINT, -1); + } + nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); + buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, + PREALLOC_MODE_OFF, &local_err); + g_free(buf); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + options = (BlockdevCreateOptions) { + .driver = BLOCKDEV_DRIVER_FILE, + .u.file = { + .filename = (char *) filename, + .size = total_size, + .has_preallocation = true, + .preallocation = prealloc, + .has_nocow = true, + .nocow = nocow, + .has_extent_size_hint = has_extent_size_hint, + .extent_size_hint = extent_size_hint, + }, + }; + return raw_co_create(&options, errp); +} + +static int coroutine_fn raw_co_delete_file(BlockDriverState *bs, + Error **errp) +{ + struct stat st; + int ret; + + if (!(stat(bs->filename, &st) == 0) || !S_ISREG(st.st_mode)) { + error_setg_errno(errp, ENOENT, "%s is not a regular file", + bs->filename); + return -ENOENT; + } + + ret = unlink(bs->filename); + if (ret < 0) { + ret = -errno; + error_setg_errno(errp, -ret, "Error when deleting file %s", + bs->filename); + } + + return ret; +} + +/* + * Find allocation range in @bs around offset @start. + * May change underlying file descriptor's file offset. + * If @start is not in a hole, store @start in @data, and the + * beginning of the next hole in @hole, and return 0. + * If @start is in a non-trailing hole, store @start in @hole and the + * beginning of the next non-hole in @data, and return 0. + * If @start is in a trailing hole or beyond EOF, return -ENXIO. + * If we can't find out, return a negative errno other than -ENXIO. + */ +static int find_allocation(BlockDriverState *bs, off_t start, + off_t *data, off_t *hole) +{ +#if defined SEEK_HOLE && defined SEEK_DATA + BDRVRawState *s = bs->opaque; + off_t offs; + + /* + * SEEK_DATA cases: + * D1. offs == start: start is in data + * D2. offs > start: start is in a hole, next data at offs + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole + * or start is beyond EOF + * If the latter happens, the file has been truncated behind + * our back since we opened it. All bets are off then. + * Treating like a trailing hole is simplest. + * D4. offs < 0, errno != ENXIO: we learned nothing + */ + offs = lseek(s->fd, start, SEEK_DATA); + if (offs < 0) { + return -errno; /* D3 or D4 */ + } + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like D4. */ + return -EIO; + } + + if (offs > start) { + /* D2: in hole, next data at offs */ + *hole = start; + *data = offs; + return 0; + } + + /* D1: in data, end not yet known */ + + /* + * SEEK_HOLE cases: + * H1. offs == start: start is in a hole + * If this happens here, a hole has been dug behind our back + * since the previous lseek(). + * H2. offs > start: either start is in data, next hole at offs, + * or start is in trailing hole, EOF at offs + * Linux treats trailing holes like any other hole: offs == + * start. Solaris seeks to EOF instead: offs > start (blech). + * If that happens here, a hole has been dug behind our back + * since the previous lseek(). + * H3. offs < 0, errno = ENXIO: start is beyond EOF + * If this happens, the file has been truncated behind our + * back since we opened it. Treat it like a trailing hole. + * H4. offs < 0, errno != ENXIO: we learned nothing + * Pretend we know nothing at all, i.e. "forget" about D1. + */ + offs = lseek(s->fd, start, SEEK_HOLE); + if (offs < 0) { + return -errno; /* D1 and (H3 or H4) */ + } + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like H4. */ + return -EIO; + } + + if (offs > start) { + /* + * D1 and H2: either in data, next hole at offs, or it was in + * data but is now in a trailing hole. In the latter case, + * all bets are off. Treating it as if it there was data all + * the way to EOF is safe, so simply do that. + */ + *data = start; + *hole = offs; + return 0; + } + + /* D1 and H1 */ + return -EBUSY; +#else + return -ENOTSUP; +#endif +} + +/* + * Returns the allocation status of the specified offset. + * + * The block layer guarantees 'offset' and 'bytes' are within bounds. + * + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same + * allocated/unallocated state. + * + * 'bytes' is the max value 'pnum' should be set to. + */ +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + off_t data = 0, hole = 0; + int ret; + + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + + ret = fd_open(bs); + if (ret < 0) { + return ret; + } + + if (!want_zero) { + *pnum = bytes; + *map = offset; + *file = bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + } + + ret = find_allocation(bs, offset, &data, &hole); + if (ret == -ENXIO) { + /* Trailing hole */ + *pnum = bytes; + ret = BDRV_BLOCK_ZERO; + } else if (ret < 0) { + /* No info available, so pretend there are no holes */ + *pnum = bytes; + ret = BDRV_BLOCK_DATA; + } else if (data == offset) { + /* On a data extent, compute bytes to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(bytes, hole - offset); + + /* + * We are not allowed to return partial sectors, though, so + * round up if necessary. + */ + if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { + int64_t file_length = raw_getlength(bs); + if (file_length > 0) { + /* Ignore errors, this is just a safeguard */ + assert(hole == file_length); + } + *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); + } + + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute bytes to the beginning of the next extent. */ + assert(hole == offset); + *pnum = MIN(bytes, data - offset); + ret = BDRV_BLOCK_ZERO; + } + *map = offset; + *file = bs; + return ret | BDRV_BLOCK_OFFSET_VALID; +} + +#if defined(__linux__) +/* Verify that the file is not in the page cache */ +static void check_cache_dropped(BlockDriverState *bs, Error **errp) +{ + const size_t window_size = 128 * 1024 * 1024; + BDRVRawState *s = bs->opaque; + void *window = NULL; + size_t length = 0; + unsigned char *vec; + size_t page_size; + off_t offset; + off_t end; + + /* mincore(2) page status information requires 1 byte per page */ + page_size = sysconf(_SC_PAGESIZE); + vec = g_malloc(DIV_ROUND_UP(window_size, page_size)); + + end = raw_getlength(bs); + + for (offset = 0; offset < end; offset += window_size) { + void *new_window; + size_t new_length; + size_t vec_end; + size_t i; + int ret; + + /* Unmap previous window if size has changed */ + new_length = MIN(end - offset, window_size); + if (new_length != length) { + munmap(window, length); + window = NULL; + length = 0; + } + + new_window = mmap(window, new_length, PROT_NONE, MAP_PRIVATE, + s->fd, offset); + if (new_window == MAP_FAILED) { + error_setg_errno(errp, errno, "mmap failed"); + break; + } + + window = new_window; + length = new_length; + + ret = mincore(window, length, vec); + if (ret < 0) { + error_setg_errno(errp, errno, "mincore failed"); + break; + } + + vec_end = DIV_ROUND_UP(length, page_size); + for (i = 0; i < vec_end; i++) { + if (vec[i] & 0x1) { + break; + } + } + if (i < vec_end) { + error_setg(errp, "page cache still in use!"); + break; + } + } + + if (window) { + munmap(window, length); + } + + g_free(vec); +} +#endif /* __linux__ */ + +static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = fd_open(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "The file descriptor is not open"); + return; + } + + if (!s->drop_cache) { + return; + } + + if (s->open_flags & O_DIRECT) { + return; /* No host kernel page cache */ + } + +#if defined(__linux__) + /* This sets the scene for the next syscall... */ + ret = bdrv_co_flush(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "flush failed"); + return; + } + + /* Linux does not invalidate pages that are dirty, locked, or mmapped by a + * process. These limitations are okay because we just fsynced the file, + * we don't use mmap, and the file should not be in use by other processes. + */ + ret = posix_fadvise(s->fd, 0, 0, POSIX_FADV_DONTNEED); + if (ret != 0) { /* the return value is a positive errno */ + error_setg_errno(errp, ret, "fadvise failed"); + return; + } + + if (s->check_cache_dropped) { + check_cache_dropped(bs, errp); + } +#else /* __linux__ */ + /* Do nothing. Live migration to a remote host with cache.direct=off is + * unsupported on other host operating systems. Cache consistency issues + * may occur but no error is reported here, partly because that's the + * historical behavior and partly because it's hard to differentiate valid + * configurations that should not cause errors. + */ +#endif /* !__linux__ */ +} + +static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) +{ + if (ret) { + s->stats.discard_nb_failed++; + } else { + s->stats.discard_nb_ok++; + s->stats.discard_bytes_ok += nbytes; + } +} + +static coroutine_fn int +raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) +{ + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + int ret; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_DISCARD, + .aio_offset = offset, + .aio_nbytes = bytes, + }; + + if (blkdev) { + acb.aio_type |= QEMU_AIO_BLKDEV; + } + + ret = raw_thread_pool_submit(bs, handle_aiocb_discard, &acb); + raw_account_discard(s, bytes, ret); + return ret; +} + +static coroutine_fn int +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +{ + return raw_do_pdiscard(bs, offset, bytes, false); +} + +static int coroutine_fn +raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags, bool blkdev) +{ + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + ThreadPoolFunc *handler; + +#ifdef CONFIG_FALLOCATE + if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { + BdrvTrackedRequest *req; + uint64_t end; + + /* + * This is a workaround for a bug in the Linux XFS driver, + * where writes submitted through the AIO interface will be + * discarded if they happen beyond a concurrently running + * fallocate() that increases the file length (i.e., both the + * write and the fallocate() happen beyond the EOF). + * + * To work around it, we extend the tracked request for this + * zero write until INT64_MAX (effectively infinity), and mark + * it as serializing. + * + * We have to enable this workaround for all filesystems and + * AIO modes (not just XFS with aio=native), because for + * remote filesystems we do not know the host configuration. + */ + + req = bdrv_co_get_self_request(bs); + assert(req); + assert(req->type == BDRV_TRACKED_WRITE); + assert(req->offset <= offset); + assert(req->offset + req->bytes >= offset + bytes); + + end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; + req->bytes = end - req->offset; + req->overlap_bytes = req->bytes; + + bdrv_mark_request_serialising(req, bs->bl.request_alignment); + } +#endif + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_WRITE_ZEROES, + .aio_offset = offset, + .aio_nbytes = bytes, + }; + + if (blkdev) { + acb.aio_type |= QEMU_AIO_BLKDEV; + } + if (flags & BDRV_REQ_NO_FALLBACK) { + acb.aio_type |= QEMU_AIO_NO_FALLBACK; + } + + if (flags & BDRV_REQ_MAY_UNMAP) { + acb.aio_type |= QEMU_AIO_DISCARD; + handler = handle_aiocb_write_zeroes_unmap; + } else { + handler = handle_aiocb_write_zeroes; + } + + return raw_thread_pool_submit(bs, handler, &acb); +} + +static int coroutine_fn raw_co_pwrite_zeroes( + BlockDriverState *bs, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false); +} + +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + return 0; +} + +static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + return (BlockStatsSpecificFile) { + .discard_nb_ok = s->stats.discard_nb_ok, + .discard_nb_failed = s->stats.discard_nb_failed, + .discard_bytes_ok = s->stats.discard_bytes_ok, + }; +} + +static BlockStatsSpecific *raw_get_specific_stats(BlockDriverState *bs) +{ + BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1); + + stats->driver = BLOCKDEV_DRIVER_FILE; + stats->u.file = get_blockstats_specific_file(bs); + + return stats; +} + +static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs) +{ + BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1); + + stats->driver = BLOCKDEV_DRIVER_HOST_DEVICE; + stats->u.host_device = get_blockstats_specific_file(bs); + + return stats; +} + +static QemuOptsList raw_create_opts = { + .name = "raw-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off" +#ifdef CONFIG_POSIX_FALLOCATE + ", falloc" +#endif + ", full)" + }, + { + .name = BLOCK_OPT_EXTENT_SIZE_HINT, + .type = QEMU_OPT_SIZE, + .help = "Extent size hint for the image file, 0 to disable" + }, + { /* end of list */ } + } +}; + +static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + BDRVRawReopenState *rs = NULL; + int open_flags; + int ret; + + if (s->perm_change_fd) { + /* + * In the context of reopen, this function may be called several times + * (directly and recursively while change permissions of the parent). + * This is even true for children that don't inherit from the original + * reopen node, so s->reopen_state is not set. + * + * Ignore all but the first call. + */ + return 0; + } + + if (s->reopen_state) { + /* We already have a new file descriptor to set permissions for */ + assert(s->reopen_state->perm == perm); + assert(s->reopen_state->shared_perm == shared); + rs = s->reopen_state->opaque; + s->perm_change_fd = rs->fd; + s->perm_change_flags = rs->open_flags; + } else { + /* We may need a new fd if auto-read-only switches the mode */ + ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm, + false, errp); + if (ret < 0) { + return ret; + } else if (ret != s->fd) { + s->perm_change_fd = ret; + s->perm_change_flags = open_flags; + } + } + + /* Prepare permissions on old fd to avoid conflicts between old and new, + * but keep everything locked that new will need. */ + ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp); + if (ret < 0) { + goto fail; + } + + /* Copy locks to the new fd */ + if (s->perm_change_fd) { + ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared, + false, errp); + if (ret < 0) { + raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); + goto fail; + } + } + return 0; + +fail: + if (s->perm_change_fd && !s->reopen_state) { + qemu_close(s->perm_change_fd); + } + s->perm_change_fd = 0; + return ret; +} + +static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared) +{ + BDRVRawState *s = bs->opaque; + + /* For reopen, we have already switched to the new fd (.bdrv_set_perm is + * called after .bdrv_reopen_commit) */ + if (s->perm_change_fd && s->fd != s->perm_change_fd) { + qemu_close(s->fd); + s->fd = s->perm_change_fd; + s->open_flags = s->perm_change_flags; + } + s->perm_change_fd = 0; + + raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL); + s->perm = perm; + s->shared_perm = shared; +} + +static void raw_abort_perm_update(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + /* For reopen, .bdrv_reopen_abort is called afterwards and will close + * the file descriptor. */ + if (s->perm_change_fd && !s->reopen_state) { + qemu_close(s->perm_change_fd); + } + s->perm_change_fd = 0; + + raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); +} + +static int coroutine_fn raw_co_copy_range_from( + BlockDriverState *bs, BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, + BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) +{ + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); +} + +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + RawPosixAIOData acb; + BDRVRawState *s = bs->opaque; + BDRVRawState *src_s; + + assert(dst->bs == bs); + if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) { + return -ENOTSUP; + } + + src_s = src->bs->opaque; + if (fd_open(src->bs) < 0 || fd_open(dst->bs) < 0) { + return -EIO; + } + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_type = QEMU_AIO_COPY_RANGE, + .aio_fildes = src_s->fd, + .aio_offset = src_offset, + .aio_nbytes = bytes, + .copy_range = { + .aio_fd2 = s->fd, + .aio_offset2 = dst_offset, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb); +} + +BlockDriver bdrv_file = { + .format_name = "file", + .protocol_name = "file", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_probe = NULL, /* no probe for protocols */ + .bdrv_parse_filename = raw_parse_filename, + .bdrv_file_open = raw_open, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_close = raw_close, + .bdrv_co_create = raw_co_create, + .bdrv_co_create_opts = raw_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_co_block_status = raw_co_block_status, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, + .bdrv_co_delete_file = raw_co_delete_file, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = raw_co_pdiscard, + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = raw_get_specific_stats, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, + .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, +}; + +/***********************************************/ +/* host device */ + +#if defined(__APPLE__) && defined(__MACH__) +static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags); +static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator) +{ + kern_return_t kernResult = KERN_FAILURE; + mach_port_t masterPort; + CFMutableDictionaryRef classesToMatch; + const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass}; + char *mediaType = NULL; + + kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort ); + if ( KERN_SUCCESS != kernResult ) { + printf( "IOMasterPort returned %d\n", kernResult ); + } + + int index; + for (index = 0; index < ARRAY_SIZE(matching_array); index++) { + classesToMatch = IOServiceMatching(matching_array[index]); + if (classesToMatch == NULL) { + error_report("IOServiceMatching returned NULL for %s", + matching_array[index]); + continue; + } + CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey), + kCFBooleanTrue); + kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch, + mediaIterator); + if (kernResult != KERN_SUCCESS) { + error_report("Note: IOServiceGetMatchingServices returned %d", + kernResult); + continue; + } + + /* If a match was found, leave the loop */ + if (*mediaIterator != 0) { + trace_file_FindEjectableOpticalMedia(matching_array[index]); + mediaType = g_strdup(matching_array[index]); + break; + } + } + return mediaType; +} + +kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags) +{ + io_object_t nextMedia; + kern_return_t kernResult = KERN_FAILURE; + *bsdPath = '\0'; + nextMedia = IOIteratorNext( mediaIterator ); + if ( nextMedia ) + { + CFTypeRef bsdPathAsCFString; + bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 ); + if ( bsdPathAsCFString ) { + size_t devPathLength; + strcpy( bsdPath, _PATH_DEV ); + if (flags & BDRV_O_NOCACHE) { + strcat(bsdPath, "r"); + } + devPathLength = strlen( bsdPath ); + if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) { + kernResult = KERN_SUCCESS; + } + CFRelease( bsdPathAsCFString ); + } + IOObjectRelease( nextMedia ); + } + + return kernResult; +} + +/* Sets up a real cdrom for use in QEMU */ +static bool setup_cdrom(char *bsd_path, Error **errp) +{ + int index, num_of_test_partitions = 2, fd; + char test_partition[MAXPATHLEN]; + bool partition_found = false; + + /* look for a working partition */ + for (index = 0; index < num_of_test_partitions; index++) { + snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path, + index); + fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE, NULL); + if (fd >= 0) { + partition_found = true; + qemu_close(fd); + break; + } + } + + /* if a working partition on the device was not found */ + if (partition_found == false) { + error_setg(errp, "Failed to find a working partition on disc"); + } else { + trace_file_setup_cdrom(test_partition); + pstrcpy(bsd_path, MAXPATHLEN, test_partition); + } + return partition_found; +} + +/* Prints directions on mounting and unmounting a device */ +static void print_unmounting_directions(const char *file_name) +{ + error_report("If device %s is mounted on the desktop, unmount" + " it first before using it in QEMU", file_name); + error_report("Command to unmount device: diskutil unmountDisk %s", + file_name); + error_report("Command to mount device: diskutil mountDisk %s", file_name); +} + +#endif /* defined(__APPLE__) && defined(__MACH__) */ + +static int hdev_probe_device(const char *filename) +{ + struct stat st; + + /* allow a dedicated CD-ROM driver to match with a higher priority */ + if (strstart(filename, "/dev/cdrom", NULL)) + return 50; + + if (stat(filename, &st) >= 0 && + (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { + return 100; + } + + return 0; +} + +static void hdev_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + bdrv_parse_filename_strip_prefix(filename, "host_device:", options); +} + +static bool hdev_is_sg(BlockDriverState *bs) +{ + +#if defined(__linux__) + + BDRVRawState *s = bs->opaque; + struct stat st; + struct sg_scsi_id scsiid; + int sg_version; + int ret; + + if (stat(bs->filename, &st) < 0 || !S_ISCHR(st.st_mode)) { + return false; + } + + ret = ioctl(s->fd, SG_GET_VERSION_NUM, &sg_version); + if (ret < 0) { + return false; + } + + ret = ioctl(s->fd, SG_GET_SCSI_ID, &scsiid); + if (ret >= 0) { + trace_file_hdev_is_sg(scsiid.scsi_type, sg_version); + return true; + } + +#endif + + return false; +} + +static int hdev_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret; + +#if defined(__APPLE__) && defined(__MACH__) + /* + * Caution: while qdict_get_str() is fine, getting non-string types + * would require more care. When @options come from -blockdev or + * blockdev_add, its members are typed according to the QAPI + * schema, but when they come from -drive, they're all QString. + */ + const char *filename = qdict_get_str(options, "filename"); + char bsd_path[MAXPATHLEN] = ""; + bool error_occurred = false; + + /* If using a real cdrom */ + if (strcmp(filename, "/dev/cdrom") == 0) { + char *mediaType = NULL; + kern_return_t ret_val; + io_iterator_t mediaIterator = 0; + + mediaType = FindEjectableOpticalMedia(&mediaIterator); + if (mediaType == NULL) { + error_setg(errp, "Please make sure your CD/DVD is in the optical" + " drive"); + error_occurred = true; + goto hdev_open_Mac_error; + } + + ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags); + if (ret_val != KERN_SUCCESS) { + error_setg(errp, "Could not get BSD path for optical drive"); + error_occurred = true; + goto hdev_open_Mac_error; + } + + /* If a real optical drive was not found */ + if (bsd_path[0] == '\0') { + error_setg(errp, "Failed to obtain bsd path for optical drive"); + error_occurred = true; + goto hdev_open_Mac_error; + } + + /* If using a cdrom disc and finding a partition on the disc failed */ + if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 && + setup_cdrom(bsd_path, errp) == false) { + print_unmounting_directions(bsd_path); + error_occurred = true; + goto hdev_open_Mac_error; + } + + qdict_put_str(options, "filename", bsd_path); + +hdev_open_Mac_error: + g_free(mediaType); + if (mediaIterator) { + IOObjectRelease(mediaIterator); + } + if (error_occurred) { + return -ENOENT; + } + } +#endif /* defined(__APPLE__) && defined(__MACH__) */ + + s->type = FTYPE_FILE; + + ret = raw_open_common(bs, options, flags, 0, true, errp); + if (ret < 0) { +#if defined(__APPLE__) && defined(__MACH__) + if (*bsd_path) { + filename = bsd_path; + } + /* if a physical device experienced an error while being opened */ + if (strncmp(filename, "/dev/", 5) == 0) { + print_unmounting_directions(filename); + } +#endif /* defined(__APPLE__) && defined(__MACH__) */ + return ret; + } + + /* Since this does ioctl the device must be already opened */ + bs->sg = hdev_is_sg(bs); + + return ret; +} + +#if defined(__linux__) +static int coroutine_fn +hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + int ret; + + ret = fd_open(bs); + if (ret < 0) { + return ret; + } + + if (req == SG_IO && s->pr_mgr) { + struct sg_io_hdr *io_hdr = buf; + if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT || + io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) { + return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs), + s->fd, io_hdr); + } + } + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_type = QEMU_AIO_IOCTL, + .aio_fildes = s->fd, + .aio_offset = 0, + .ioctl = { + .buf = buf, + .cmd = req, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb); +} +#endif /* linux */ + +static int fd_open(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + /* this is just to ensure s->fd is sane (its called by io ops) */ + if (s->fd >= 0) + return 0; + return -EIO; +} + +static coroutine_fn int +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = fd_open(bs); + if (ret < 0) { + raw_account_discard(s, bytes, ret); + return ret; + } + return raw_do_pdiscard(bs, offset, bytes, true); +} + +static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + int rc; + + rc = fd_open(bs); + if (rc < 0) { + return rc; + } + + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); +} + +static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .protocol_name = "host_device", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_probe_device = hdev_probe_device, + .bdrv_parse_filename = hdev_parse_filename, + .bdrv_file_open = hdev_open, + .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = hdev_co_pdiscard, + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = hdev_get_specific_stats, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, + .bdrv_probe_blocksizes = hdev_probe_blocksizes, + .bdrv_probe_geometry = hdev_probe_geometry, + + /* generic scsi device */ +#ifdef __linux__ + .bdrv_co_ioctl = hdev_co_ioctl, +#endif +}; + +#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +static void cdrom_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options); +} +#endif + +#ifdef __linux__ +static int cdrom_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + + s->type = FTYPE_CD; + + /* open will not fail even if no CD is inserted, so add O_NONBLOCK */ + return raw_open_common(bs, options, flags, O_NONBLOCK, true, errp); +} + +static int cdrom_probe_device(const char *filename) +{ + int fd, ret; + int prio = 0; + struct stat st; + + fd = qemu_open(filename, O_RDONLY | O_NONBLOCK, NULL); + if (fd < 0) { + goto out; + } + ret = fstat(fd, &st); + if (ret == -1 || !S_ISBLK(st.st_mode)) { + goto outc; + } + + /* Attempt to detect via a CDROM specific ioctl */ + ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); + if (ret >= 0) + prio = 100; + +outc: + qemu_close(fd); +out: + return prio; +} + +static bool cdrom_is_inserted(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); + return ret == CDS_DISC_OK; +} + +static void cdrom_eject(BlockDriverState *bs, bool eject_flag) +{ + BDRVRawState *s = bs->opaque; + + if (eject_flag) { + if (ioctl(s->fd, CDROMEJECT, NULL) < 0) + perror("CDROMEJECT"); + } else { + if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0) + perror("CDROMEJECT"); + } +} + +static void cdrom_lock_medium(BlockDriverState *bs, bool locked) +{ + BDRVRawState *s = bs->opaque; + + if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) { + /* + * Note: an error can happen if the distribution automatically + * mounts the CD-ROM + */ + /* perror("CDROM_LOCKDOOR"); */ + } +} + +static BlockDriver bdrv_host_cdrom = { + .format_name = "host_cdrom", + .protocol_name = "host_cdrom", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_probe_device = cdrom_probe_device, + .bdrv_parse_filename = cdrom_parse_filename, + .bdrv_file_open = cdrom_open, + .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + + /* removable device support */ + .bdrv_is_inserted = cdrom_is_inserted, + .bdrv_eject = cdrom_eject, + .bdrv_lock_medium = cdrom_lock_medium, + + /* generic scsi device */ + .bdrv_co_ioctl = hdev_co_ioctl, +}; +#endif /* __linux__ */ + +#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) +static int cdrom_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret; + + s->type = FTYPE_CD; + + ret = raw_open_common(bs, options, flags, 0, true, errp); + if (ret) { + return ret; + } + + /* make sure the door isn't locked at this time */ + ioctl(s->fd, CDIOCALLOW); + return 0; +} + +static int cdrom_probe_device(const char *filename) +{ + if (strstart(filename, "/dev/cd", NULL) || + strstart(filename, "/dev/acd", NULL)) + return 100; + return 0; +} + +static int cdrom_reopen(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd; + + /* + * Force reread of possibly changed/newly loaded disc, + * FreeBSD seems to not notice sometimes... + */ + if (s->fd >= 0) + qemu_close(s->fd); + fd = qemu_open(bs->filename, s->open_flags, NULL); + if (fd < 0) { + s->fd = -1; + return -EIO; + } + s->fd = fd; + + /* make sure the door isn't locked at this time */ + ioctl(s->fd, CDIOCALLOW); + return 0; +} + +static bool cdrom_is_inserted(BlockDriverState *bs) +{ + return raw_getlength(bs) > 0; +} + +static void cdrom_eject(BlockDriverState *bs, bool eject_flag) +{ + BDRVRawState *s = bs->opaque; + + if (s->fd < 0) + return; + + (void) ioctl(s->fd, CDIOCALLOW); + + if (eject_flag) { + if (ioctl(s->fd, CDIOCEJECT) < 0) + perror("CDIOCEJECT"); + } else { + if (ioctl(s->fd, CDIOCCLOSE) < 0) + perror("CDIOCCLOSE"); + } + + cdrom_reopen(bs); +} + +static void cdrom_lock_medium(BlockDriverState *bs, bool locked) +{ + BDRVRawState *s = bs->opaque; + + if (s->fd < 0) + return; + if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) { + /* + * Note: an error can happen if the distribution automatically + * mounts the CD-ROM + */ + /* perror("CDROM_LOCKDOOR"); */ + } +} + +static BlockDriver bdrv_host_cdrom = { + .format_name = "host_cdrom", + .protocol_name = "host_cdrom", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_probe_device = cdrom_probe_device, + .bdrv_parse_filename = cdrom_parse_filename, + .bdrv_file_open = cdrom_open, + .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + + /* removable device support */ + .bdrv_is_inserted = cdrom_is_inserted, + .bdrv_eject = cdrom_eject, + .bdrv_lock_medium = cdrom_lock_medium, +}; +#endif /* __FreeBSD__ */ + +static void bdrv_file_init(void) +{ + /* + * Register all the drivers. Note that order is important, the driver + * registered last will get probed first. + */ + bdrv_register(&bdrv_file); + bdrv_register(&bdrv_host_device); +#ifdef __linux__ + bdrv_register(&bdrv_host_cdrom); +#endif +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + bdrv_register(&bdrv_host_cdrom); +#endif +} + +block_init(bdrv_file_init); diff --git a/block/file-win32.c b/block/file-win32.c new file mode 100644 index 000000000..2642088bd --- /dev/null +++ b/block/file-win32.c @@ -0,0 +1,849 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/raw-aio.h" +#include "trace.h" +#include "block/thread-pool.h" +#include "qemu/iov.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include +#include + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +typedef struct RawWin32AIOData { + BlockDriverState *bs; + HANDLE hfile; + struct iovec *aio_iov; + int aio_niov; + size_t aio_nbytes; + off64_t aio_offset; + int aio_type; +} RawWin32AIOData; + +typedef struct BDRVRawState { + HANDLE hfile; + int type; + char drive_path[16]; /* format: "d:\" */ + QEMUWin32AIOState *aio; +} BDRVRawState; + +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static size_t handle_aiocb_rw(RawWin32AIOData *aiocb) +{ + size_t offset = 0; + int i; + + for (i = 0; i < aiocb->aio_niov; i++) { + OVERLAPPED ov; + DWORD ret, ret_count, len; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = (aiocb->aio_offset + offset); + ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32; + len = aiocb->aio_iov[i].iov_len; + if (aiocb->aio_type & QEMU_AIO_WRITE) { + ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } else { + ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } + if (!ret) { + ret_count = 0; + } + if (ret_count != len) { + offset += ret_count; + break; + } + offset += len; + } + + return offset; +} + +static int aio_worker(void *arg) +{ + RawWin32AIOData *aiocb = arg; + ssize_t ret = 0; + size_t count; + + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { + case QEMU_AIO_READ: + count = handle_aiocb_rw(aiocb); + if (count < aiocb->aio_nbytes) { + /* A short read means that we have reached EOF. Pad the buffer + * with zeros for bytes after EOF. */ + iov_memset(aiocb->aio_iov, aiocb->aio_niov, count, + 0, aiocb->aio_nbytes - count); + + count = aiocb->aio_nbytes; + } + if (count == aiocb->aio_nbytes) { + ret = 0; + } else { + ret = -EINVAL; + } + break; + case QEMU_AIO_WRITE: + count = handle_aiocb_rw(aiocb); + if (count == aiocb->aio_nbytes) { + ret = 0; + } else { + ret = -EINVAL; + } + break; + case QEMU_AIO_FLUSH: + if (!FlushFileBuffers(aiocb->hfile)) { + return -EIO; + } + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } + + g_free(aiocb); + return ret; +} + +static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, + int64_t offset, QEMUIOVector *qiov, int count, + BlockCompletionFunc *cb, void *opaque, int type) +{ + RawWin32AIOData *acb = g_new(RawWin32AIOData, 1); + ThreadPool *pool; + + acb->bs = bs; + acb->hfile = hfile; + acb->aio_type = type; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + assert(qiov->size == count); + } + acb->aio_nbytes = count; + acb->aio_offset = offset; + + trace_file_paio_submit(acb, opaque, offset, count, type); + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); +} + +int qemu_ftruncate64(int fd, int64_t length) +{ + LARGE_INTEGER li; + DWORD dw; + LONG high; + HANDLE h; + BOOL res; + + if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0) + return -1; + + h = (HANDLE)_get_osfhandle(fd); + + /* get current position, ftruncate do not change position */ + li.HighPart = 0; + li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT); + if (li.LowPart == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + return -1; + } + + high = length >> 32; + dw = SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN); + if (dw == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + return -1; + } + res = SetEndOfFile(h); + + /* back to old position */ + SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN); + return res ? 0 : -1; +} + +static int set_sparse(int fd) +{ + DWORD returned; + return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE, + NULL, 0, NULL, 0, &returned, NULL); +} + +static void raw_detach_aio_context(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs)); + } +} + +static void raw_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_attach_aio_context(s->aio, new_context); + } +} + +static void raw_probe_alignment(BlockDriverState *bs, Error **errp) +{ + BDRVRawState *s = bs->opaque; + DWORD sectorsPerCluster, freeClusters, totalClusters, count; + DISK_GEOMETRY_EX dg; + BOOL status; + + if (s->type == FTYPE_CD) { + bs->bl.request_alignment = 2048; + return; + } + if (s->type == FTYPE_HARDDISK) { + status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, + NULL, 0, &dg, sizeof(dg), &count, NULL); + if (status != 0) { + bs->bl.request_alignment = dg.Geometry.BytesPerSector; + return; + } + /* try GetDiskFreeSpace too */ + } + + if (s->drive_path[0]) { + GetDiskFreeSpace(s->drive_path, §orsPerCluster, + &dg.Geometry.BytesPerSector, + &freeClusters, &totalClusters); + bs->bl.request_alignment = dg.Geometry.BytesPerSector; + return; + } + + /* XXX Does Windows support AIO on less than 512-byte alignment? */ + bs->bl.request_alignment = 512; +} + +static void raw_parse_flags(int flags, bool use_aio, int *access_flags, + DWORD *overlapped) +{ + assert(access_flags != NULL); + assert(overlapped != NULL); + + if (flags & BDRV_O_RDWR) { + *access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + *access_flags = GENERIC_READ; + } + + *overlapped = FILE_ATTRIBUTE_NORMAL; + if (use_aio) { + *overlapped |= FILE_FLAG_OVERLAPPED; + } + if (flags & BDRV_O_NOCACHE) { + *overlapped |= FILE_FLAG_NO_BUFFERING; + } +} + +static void raw_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + bdrv_parse_filename_strip_prefix(filename, "file:", options); +} + +static QemuOptsList raw_runtime_opts = { + .name = "raw", + .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "File name of the image", + }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + }, + { + .name = "locking", + .type = QEMU_OPT_STRING, + .help = "file locking mode (on/off/auto, default: auto)", + }, + { /* end of list */ } + }, +}; + +static bool get_aio_option(QemuOpts *opts, int flags, Error **errp) +{ + BlockdevAioOptions aio, aio_default; + + aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE + : BLOCKDEV_AIO_OPTIONS_THREADS; + aio = qapi_enum_parse(&BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), + aio_default, errp); + + switch (aio) { + case BLOCKDEV_AIO_OPTIONS_NATIVE: + return true; + case BLOCKDEV_AIO_OPTIONS_THREADS: + return false; + default: + error_setg(errp, "Invalid AIO option"); + } + return false; +} + +static int raw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int access_flags; + DWORD overlapped; + QemuOpts *opts; + Error *local_err = NULL; + const char *filename; + bool use_aio; + OnOffAuto locking; + int ret; + + s->type = FTYPE_FILE; + + opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + locking = qapi_enum_parse(&OnOffAuto_lookup, + qemu_opt_get(opts, "locking"), + ON_OFF_AUTO_AUTO, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + switch (locking) { + case ON_OFF_AUTO_ON: + error_setg(errp, "locking=on is not supported on Windows"); + ret = -EINVAL; + goto fail; + case ON_OFF_AUTO_OFF: + case ON_OFF_AUTO_AUTO: + break; + default: + g_assert_not_reached(); + } + + filename = qemu_opt_get(opts, "filename"); + + use_aio = get_aio_option(opts, flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); + + if (filename[0] && filename[1] == ':') { + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); + } else if (filename[0] == '\\' && filename[1] == '\\') { + s->drive_path[0] = 0; + } else { + /* Relative path. */ + char buf[MAX_PATH]; + GetCurrentDirectory(MAX_PATH, buf); + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]); + } + + s->hfile = CreateFile(filename, access_flags, + FILE_SHARE_READ, NULL, + OPEN_EXISTING, overlapped, NULL); + if (s->hfile == INVALID_HANDLE_VALUE) { + int err = GetLastError(); + + error_setg_win32(errp, err, "Could not open '%s'", filename); + if (err == ERROR_ACCESS_DENIED) { + ret = -EACCES; + } else { + ret = -EINVAL; + } + goto fail; + } + + if (use_aio) { + s->aio = win32_aio_init(); + if (s->aio == NULL) { + CloseHandle(s->hfile); + error_setg(errp, "Could not initialize AIO"); + ret = -EINVAL; + goto fail; + } + + ret = win32_aio_attach(s->aio, s->hfile); + if (ret < 0) { + win32_aio_cleanup(s->aio); + CloseHandle(s->hfile); + error_setg_errno(errp, -ret, "Could not enable AIO"); + goto fail; + } + + win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs)); + } + + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + + ret = 0; +fail: + qemu_opts_del(opts); + return ret; +} + +static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov, + cb, opaque, QEMU_AIO_READ); + } else { + return paio_submit(bs, s->hfile, offset, qiov, bytes, + cb, opaque, QEMU_AIO_READ); + } +} + +static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov, + cb, opaque, QEMU_AIO_WRITE); + } else { + return paio_submit(bs, s->hfile, offset, qiov, bytes, + cb, opaque, QEMU_AIO_WRITE); + } +} + +static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); +} + +static void raw_close(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if (s->aio) { + win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs)); + win32_aio_cleanup(s->aio); + s->aio = NULL; + } + + CloseHandle(s->hfile); + if (bs->open_flags & BDRV_O_TEMPORARY) { + unlink(bs->filename); + } +} + +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVRawState *s = bs->opaque; + LONG low, high; + DWORD dwPtrLow; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + low = offset; + high = offset >> 32; + + /* + * An error has occurred if the return value is INVALID_SET_FILE_POINTER + * and GetLastError doesn't return NO_ERROR. + */ + dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN); + if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + error_setg_win32(errp, GetLastError(), "SetFilePointer error"); + return -EIO; + } + if (SetEndOfFile(s->hfile) == 0) { + error_setg_win32(errp, GetLastError(), "SetEndOfFile error"); + return -EIO; + } + return 0; +} + +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + LARGE_INTEGER l; + ULARGE_INTEGER available, total, total_free; + DISK_GEOMETRY_EX dg; + DWORD count; + BOOL status; + + switch(s->type) { + case FTYPE_FILE: + l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart); + if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR) + return -EIO; + break; + case FTYPE_CD: + if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free)) + return -EIO; + l.QuadPart = total.QuadPart; + break; + case FTYPE_HARDDISK: + status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, + NULL, 0, &dg, sizeof(dg), &count, NULL); + if (status != 0) { + l = dg.DiskSize; + } + break; + default: + return -EIO; + } + return l.QuadPart; +} + +static int64_t raw_get_allocated_file_size(BlockDriverState *bs) +{ + typedef DWORD (WINAPI * get_compressed_t)(const char *filename, + DWORD * high); + get_compressed_t get_compressed; + struct _stati64 st; + const char *filename = bs->filename; + /* WinNT support GetCompressedFileSize to determine allocate size */ + get_compressed = + (get_compressed_t) GetProcAddress(GetModuleHandle("kernel32"), + "GetCompressedFileSizeA"); + if (get_compressed) { + DWORD high, low; + low = get_compressed(filename, &high); + if (low != 0xFFFFFFFFlu || GetLastError() == NO_ERROR) { + return (((int64_t) high) << 32) + low; + } + } + + if (_stati64(filename, &st) < 0) { + return -1; + } + return st.st_size; +} + +static int raw_co_create(BlockdevCreateOptions *options, Error **errp) +{ + BlockdevCreateOptionsFile *file_opts; + int fd; + + assert(options->driver == BLOCKDEV_DRIVER_FILE); + file_opts = &options->u.file; + + if (file_opts->has_preallocation) { + error_setg(errp, "Preallocation is not supported on Windows"); + return -EINVAL; + } + if (file_opts->has_nocow) { + error_setg(errp, "nocow is not supported on Windows"); + return -EINVAL; + } + + fd = qemu_create(file_opts->filename, O_WRONLY | O_TRUNC | O_BINARY, + 0644, errp); + if (fd < 0) { + return -EIO; + } + set_sparse(fd); + ftruncate(fd, file_opts->size); + qemu_close(fd); + + return 0; +} + +static int coroutine_fn raw_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions options; + int64_t total_size = 0; + + strstart(filename, "file:", &filename); + + /* Read out options */ + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + + options = (BlockdevCreateOptions) { + .driver = BLOCKDEV_DRIVER_FILE, + .u.file = { + .filename = (char *) filename, + .size = total_size, + .has_preallocation = false, + .has_nocow = false, + }, + }; + return raw_co_create(&options, errp); +} + +static QemuOptsList raw_create_opts = { + .name = "raw-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { /* end of list */ } + } +}; + +BlockDriver bdrv_file = { + .format_name = "file", + .protocol_name = "file", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_parse_filename = raw_parse_filename, + .bdrv_file_open = raw_open, + .bdrv_refresh_limits = raw_probe_alignment, + .bdrv_close = raw_close, + .bdrv_co_create_opts = raw_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + + .bdrv_aio_preadv = raw_aio_preadv, + .bdrv_aio_pwritev = raw_aio_pwritev, + .bdrv_aio_flush = raw_aio_flush, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + + .create_opts = &raw_create_opts, +}; + +/***********************************************/ +/* host device */ + +static int find_cdrom(char *cdrom_name, int cdrom_name_size) +{ + char drives[256], *pdrv = drives; + UINT type; + + memset(drives, 0, sizeof(drives)); + GetLogicalDriveStrings(sizeof(drives), drives); + while(pdrv[0] != '\0') { + type = GetDriveType(pdrv); + switch(type) { + case DRIVE_CDROM: + snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]); + return 0; + break; + } + pdrv += lstrlen(pdrv) + 1; + } + return -1; +} + +static int find_device_type(BlockDriverState *bs, const char *filename) +{ + BDRVRawState *s = bs->opaque; + UINT type; + const char *p; + + if (strstart(filename, "\\\\.\\", &p) || + strstart(filename, "//./", &p)) { + if (stristart(p, "PhysicalDrive", NULL)) + return FTYPE_HARDDISK; + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]); + type = GetDriveType(s->drive_path); + switch (type) { + case DRIVE_REMOVABLE: + case DRIVE_FIXED: + return FTYPE_HARDDISK; + case DRIVE_CDROM: + return FTYPE_CD; + default: + return FTYPE_FILE; + } + } else { + return FTYPE_FILE; + } +} + +static int hdev_probe_device(const char *filename) +{ + if (strstart(filename, "/dev/cdrom", NULL)) + return 100; + if (is_windows_drive(filename)) + return 100; + return 0; +} + +static void hdev_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + bdrv_parse_filename_strip_prefix(filename, "host_device:", options); +} + +static void hdev_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* XXX Does Windows support AIO on less than 512-byte alignment? */ + bs->bl.request_alignment = 512; +} + +static int hdev_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int access_flags, create_flags; + int ret = 0; + DWORD overlapped; + char device_name[64]; + + Error *local_err = NULL; + const char *filename; + bool use_aio; + + QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, + &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto done; + } + + filename = qemu_opt_get(opts, "filename"); + + use_aio = get_aio_option(opts, flags, &local_err); + if (!local_err && use_aio) { + error_setg(&local_err, "AIO is not supported on Windows host devices"); + } + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto done; + } + + if (strstart(filename, "/dev/cdrom", NULL)) { + if (find_cdrom(device_name, sizeof(device_name)) < 0) { + error_setg(errp, "Could not open CD-ROM drive"); + ret = -ENOENT; + goto done; + } + filename = device_name; + } else { + /* transform drive letters into device name */ + if (((filename[0] >= 'a' && filename[0] <= 'z') || + (filename[0] >= 'A' && filename[0] <= 'Z')) && + filename[1] == ':' && filename[2] == '\0') { + snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]); + filename = device_name; + } + } + s->type = find_device_type(bs, filename); + + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); + + create_flags = OPEN_EXISTING; + + s->hfile = CreateFile(filename, access_flags, + FILE_SHARE_READ, NULL, + create_flags, overlapped, NULL); + if (s->hfile == INVALID_HANDLE_VALUE) { + int err = GetLastError(); + + if (err == ERROR_ACCESS_DENIED) { + ret = -EACCES; + } else { + ret = -EINVAL; + } + error_setg_errno(errp, -ret, "Could not open device"); + goto done; + } + +done: + qemu_opts_del(opts); + return ret; +} + +static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .protocol_name = "host_device", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_parse_filename = hdev_parse_filename, + .bdrv_probe_device = hdev_probe_device, + .bdrv_file_open = hdev_open, + .bdrv_close = raw_close, + .bdrv_refresh_limits = hdev_refresh_limits, + + .bdrv_aio_preadv = raw_aio_preadv, + .bdrv_aio_pwritev = raw_aio_pwritev, + .bdrv_aio_flush = raw_aio_flush, + + .bdrv_detach_aio_context = raw_detach_aio_context, + .bdrv_attach_aio_context = raw_attach_aio_context, + + .bdrv_getlength = raw_getlength, + .has_variable_length = true, + + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, +}; + +static void bdrv_file_init(void) +{ + bdrv_register(&bdrv_file); + bdrv_register(&bdrv_host_device); +} + +block_init(bdrv_file_init); diff --git a/block/filter-compress.c b/block/filter-compress.c new file mode 100644 index 000000000..5136371bf --- /dev/null +++ b/block/filter-compress.c @@ -0,0 +1,158 @@ +/* + * Compress filter block driver + * + * Copyright (c) 2019 Virtuozzo International GmbH + * + * Author: + * Andrey Shinkevich + * (based on block/copy-on-read.c by Max Reitz) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) any later version of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qapi/error.h" + + +static int compress_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + if (!bs->file->bs->drv || !block_driver_can_compress(bs->file->bs->drv)) { + error_setg(errp, + "Compression is not supported for underlying format: %s", + bdrv_get_format_name(bs->file->bs) ?: "(no format)"); + + return -ENOTSUP; + } + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + return 0; +} + + +static int64_t compress_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + + +static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, + int flags) +{ + return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, + flags); +} + + +static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, int flags) +{ + return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, + flags | BDRV_REQ_WRITE_COMPRESSED); +} + + +static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + + +static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + + +static void compress_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockDriverInfo bdi; + int ret; + + if (!bs->file) { + return; + } + + ret = bdrv_get_info(bs->file->bs, &bdi); + if (ret < 0 || bdi.cluster_size == 0) { + return; + } + + bs->bl.request_alignment = bdi.cluster_size; +} + + +static void compress_eject(BlockDriverState *bs, bool eject_flag) +{ + bdrv_eject(bs->file->bs, eject_flag); +} + + +static void compress_lock_medium(BlockDriverState *bs, bool locked) +{ + bdrv_lock_medium(bs->file->bs, locked); +} + + +static BlockDriver bdrv_compress = { + .format_name = "compress", + + .bdrv_open = compress_open, + .bdrv_child_perm = bdrv_default_perms, + + .bdrv_getlength = compress_getlength, + + .bdrv_co_preadv_part = compress_co_preadv_part, + .bdrv_co_pwritev_part = compress_co_pwritev_part, + .bdrv_co_pwrite_zeroes = compress_co_pwrite_zeroes, + .bdrv_co_pdiscard = compress_co_pdiscard, + .bdrv_refresh_limits = compress_refresh_limits, + + .bdrv_eject = compress_eject, + .bdrv_lock_medium = compress_lock_medium, + + .has_variable_length = true, + .is_filter = true, +}; + +static void bdrv_compress_init(void) +{ + bdrv_register(&bdrv_compress); +} + +block_init(bdrv_compress_init); diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 000000000..4f1448e2b --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,1675 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include +#include "block/block_int.h" +#include "block/qdict.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qerror.h" +#include "qemu/uri.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/cutils.h" + +#ifdef CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT +# define glfs_ftruncate(fd, offset) glfs_ftruncate(fd, offset, NULL, NULL) +#endif + +#define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_VOLUME "volume" +#define GLUSTER_OPT_PATH "path" +#define GLUSTER_OPT_TYPE "type" +#define GLUSTER_OPT_SERVER_PATTERN "server." +#define GLUSTER_OPT_HOST "host" +#define GLUSTER_OPT_PORT "port" +#define GLUSTER_OPT_TO "to" +#define GLUSTER_OPT_IPV4 "ipv4" +#define GLUSTER_OPT_IPV6 "ipv6" +#define GLUSTER_OPT_SOCKET "socket" +#define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEFAULT_PORT 24007 +#define GLUSTER_DEBUG_DEFAULT 4 +#define GLUSTER_DEBUG_MAX 9 +#define GLUSTER_OPT_LOGFILE "logfile" +#define GLUSTER_LOGFILE_DEFAULT "-" /* handled in libgfapi as /dev/stderr */ +/* + * Several versions of GlusterFS (3.12? -> 6.0.1) fail when the transfer size + * is greater or equal to 1024 MiB, so we are limiting the transfer size to 512 + * MiB to avoid this rare issue. + */ +#define GLUSTER_MAX_TRANSFER (512 * MiB) + +#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n" + +typedef struct GlusterAIOCB { + int64_t size; + int ret; + Coroutine *coroutine; + AioContext *aio_context; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + struct glfs_fd *fd; + char *logfile; + bool supports_seek_data; + int debug; +} BDRVGlusterState; + +typedef struct BDRVGlusterReopenState { + struct glfs *glfs; + struct glfs_fd *fd; +} BDRVGlusterReopenState; + + +typedef struct GlfsPreopened { + char *volume; + glfs_t *fs; + int ref; +} GlfsPreopened; + +typedef struct ListElement { + QLIST_ENTRY(ListElement) list; + GlfsPreopened saved; +} ListElement; + +static QLIST_HEAD(, ListElement) glfs_list; + +static QemuOptsList qemu_gluster_create_opts = { + .name = "qemu-gluster-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off" +#ifdef CONFIG_GLUSTERFS_FALLOCATE + ", falloc" +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + ", full" +#endif + ")" + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { + .name = GLUSTER_OPT_LOGFILE, + .type = QEMU_OPT_STRING, + .help = "Logfile path of libgfapi", + }, + { /* end of list */ } + } +}; + +static QemuOptsList runtime_opts = { + .name = "gluster", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = GLUSTER_OPT_FILENAME, + .type = QEMU_OPT_STRING, + .help = "URL to the gluster image", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { + .name = GLUSTER_OPT_LOGFILE, + .type = QEMU_OPT_STRING, + .help = "Logfile path of libgfapi", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_json_opts = { + .name = "gluster_json", + .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head), + .desc = { + { + .name = GLUSTER_OPT_VOLUME, + .type = QEMU_OPT_STRING, + .help = "name of gluster volume where VM image resides", + }, + { + .name = GLUSTER_OPT_PATH, + .type = QEMU_OPT_STRING, + .help = "absolute path to image file in gluster volume", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_type_opts = { + .name = "gluster_type", + .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "inet|unix", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_unix_opts = { + .name = "gluster_unix", + .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head), + .desc = { + { + .name = GLUSTER_OPT_SOCKET, + .type = QEMU_OPT_STRING, + .help = "socket file path (legacy)", + }, + { + .name = GLUSTER_OPT_PATH, + .type = QEMU_OPT_STRING, + .help = "socket file path (QAPI)", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_inet_opts = { + .name = "gluster_inet", + .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "inet|unix", + }, + { + .name = GLUSTER_OPT_HOST, + .type = QEMU_OPT_STRING, + .help = "host address (hostname/ipv4/ipv6 addresses)", + }, + { + .name = GLUSTER_OPT_PORT, + .type = QEMU_OPT_STRING, + .help = "port number on which glusterd is listening (default 24007)", + }, + { + .name = "to", + .type = QEMU_OPT_NUMBER, + .help = "max port number, not supported by gluster", + }, + { + .name = "ipv4", + .type = QEMU_OPT_BOOL, + .help = "ipv4 bool value, not supported by gluster", + }, + { + .name = "ipv6", + .type = QEMU_OPT_BOOL, + .help = "ipv6 bool value, not supported by gluster", + }, + { /* end of list */ } + }, +}; + +static void glfs_set_preopened(const char *volume, glfs_t *fs) +{ + ListElement *entry = NULL; + + entry = g_new(ListElement, 1); + + entry->saved.volume = g_strdup(volume); + + entry->saved.fs = fs; + entry->saved.ref = 1; + + QLIST_INSERT_HEAD(&glfs_list, entry, list); +} + +static glfs_t *glfs_find_preopened(const char *volume) +{ + ListElement *entry = NULL; + + QLIST_FOREACH(entry, &glfs_list, list) { + if (strcmp(entry->saved.volume, volume) == 0) { + entry->saved.ref++; + return entry->saved.fs; + } + } + + return NULL; +} + +static void glfs_clear_preopened(glfs_t *fs) +{ + ListElement *entry = NULL; + ListElement *next; + + if (fs == NULL) { + return; + } + + QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) { + if (entry->saved.fs == fs) { + if (--entry->saved.ref) { + return; + } + + QLIST_REMOVE(entry, list); + + glfs_fini(entry->saved.fs); + g_free(entry->saved.volume); + g_free(entry); + } + } +} + +static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volume = g_strndup(q, p - q); + + /* path */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->path = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp or unix. If a transport type isn't specified, then tcp type is assumed. + * + * 'host' specifies the host where the volume file specification for + * the given volume resides. This can be either hostname or ipv4 address. + * If transport type is 'unix', then 'host' field should not be specified. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volume' is the name of the gluster volume which contains the VM image. + * + * 'path' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + */ +static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf, + const char *filename) +{ + SocketAddress *gsconf; + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + gconf->server = g_new0(SocketAddressList, 1); + gconf->server->value = gsconf = g_new0(SocketAddress, 1); + + /* transport */ + if (!uri->scheme || !strcmp(uri->scheme, "gluster")) { + gsconf->type = SOCKET_ADDRESS_TYPE_INET; + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gsconf->type = SOCKET_ADDRESS_TYPE_INET; + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gsconf->type = SOCKET_ADDRESS_TYPE_UNIX; + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gsconf->type = SOCKET_ADDRESS_TYPE_INET; + warn_report("rdma feature is not supported, falling back to tcp"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gsconf->u.q_unix.path = g_strdup(qp->p[0].value); + } else { + gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost"); + if (uri->port) { + gsconf->u.inet.port = g_strdup_printf("%d", uri->port); + } else { + gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT); + } + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf, + Error **errp) +{ + struct glfs *glfs; + int ret; + int old_errno; + SocketAddressList *server; + unsigned long long port; + + glfs = glfs_find_preopened(gconf->volume); + if (glfs) { + return glfs; + } + + glfs = glfs_new(gconf->volume); + if (!glfs) { + goto out; + } + + glfs_set_preopened(gconf->volume, glfs); + + for (server = gconf->server; server; server = server->next) { + switch (server->value->type) { + case SOCKET_ADDRESS_TYPE_UNIX: + ret = glfs_set_volfile_server(glfs, "unix", + server->value->u.q_unix.path, 0); + break; + case SOCKET_ADDRESS_TYPE_INET: + if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 || + port > 65535) { + error_setg(errp, "'%s' is not a valid port number", + server->value->u.inet.port); + errno = EINVAL; + goto out; + } + ret = glfs_set_volfile_server(glfs, "tcp", + server->value->u.inet.host, + (int)port); + break; + case SOCKET_ADDRESS_TYPE_VSOCK: + case SOCKET_ADDRESS_TYPE_FD: + default: + abort(); + } + + if (ret < 0) { + goto out; + } + } + + ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_setg(errp, "Gluster connection for volume %s, path %s failed" + " to connect", gconf->volume, gconf->path); + for (server = gconf->server; server; server = server->next) { + if (server->value->type == SOCKET_ADDRESS_TYPE_UNIX) { + error_append_hint(errp, "hint: failed on socket %s ", + server->value->u.q_unix.path); + } else { + error_append_hint(errp, "hint: failed on host %s and port %s ", + server->value->u.inet.host, + server->value->u.inet.port); + } + } + + error_append_hint(errp, "Please refer to gluster logs for more info\n"); + + /* glfs_init sometimes doesn't set errno although docs suggest that */ + if (errno == 0) { + errno = EINVAL; + } + + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_clear_preopened(glfs); + errno = old_errno; + } + return NULL; +} + +/* + * Convert the json formatted command line into qapi. +*/ +static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf, + QDict *options, Error **errp) +{ + QemuOpts *opts; + SocketAddress *gsconf = NULL; + SocketAddressList *curr = NULL; + QDict *backing_options = NULL; + Error *local_err = NULL; + char *str = NULL; + const char *ptr; + int i, type, num_servers; + + /* create opts info from runtime_json_opts list */ + opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto out; + } + + num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN); + if (num_servers < 1) { + error_setg(&local_err, QERR_MISSING_PARAMETER, "server"); + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME); + goto out; + } + gconf->volume = g_strdup(ptr); + + ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH); + goto out; + } + gconf->path = g_strdup(ptr); + qemu_opts_del(opts); + + for (i = 0; i < num_servers; i++) { + str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i); + qdict_extract_subqdict(options, &backing_options, str); + + /* create opts info from runtime_type_opts list */ + opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, backing_options, errp)) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + + } + gsconf = g_new0(SocketAddress, 1); + if (!strcmp(ptr, "tcp")) { + ptr = "inet"; /* accept legacy "tcp" */ + } + type = qapi_enum_parse(&SocketAddressType_lookup, ptr, -1, NULL); + if (type != SOCKET_ADDRESS_TYPE_INET + && type != SOCKET_ADDRESS_TYPE_UNIX) { + error_setg(&local_err, + "Parameter '%s' may be 'inet' or 'unix'", + GLUSTER_OPT_TYPE); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->type = type; + qemu_opts_del(opts); + + if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) { + /* create opts info from runtime_inet_opts list */ + opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, backing_options, errp)) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_HOST); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.inet.host = g_strdup(ptr); + ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_PORT); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.inet.port = g_strdup(ptr); + + /* defend for unsupported fields in InetSocketAddress, + * i.e. @ipv4, @ipv6 and @to + */ + ptr = qemu_opt_get(opts, GLUSTER_OPT_TO); + if (ptr) { + gsconf->u.inet.has_to = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4); + if (ptr) { + gsconf->u.inet.has_ipv4 = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6); + if (ptr) { + gsconf->u.inet.has_ipv6 = true; + } + if (gsconf->u.inet.has_to) { + error_setg(&local_err, "Parameter 'to' not supported"); + goto out; + } + if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) { + error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported"); + goto out; + } + qemu_opts_del(opts); + } else { + /* create opts info from runtime_unix_opts list */ + opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, backing_options, errp)) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH); + if (!ptr) { + ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET); + } else if (qemu_opt_get(opts, GLUSTER_OPT_SOCKET)) { + error_setg(&local_err, + "Conflicting parameters 'path' and 'socket'"); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_PATH); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.q_unix.path = g_strdup(ptr); + qemu_opts_del(opts); + } + + if (gconf->server == NULL) { + gconf->server = g_new0(SocketAddressList, 1); + gconf->server->value = gsconf; + curr = gconf->server; + } else { + curr->next = g_new0(SocketAddressList, 1); + curr->next->value = gsconf; + curr = curr->next; + } + gsconf = NULL; + + qobject_unref(backing_options); + backing_options = NULL; + g_free(str); + str = NULL; + } + + return 0; + +out: + error_propagate(errp, local_err); + qapi_free_SocketAddress(gsconf); + qemu_opts_del(opts); + g_free(str); + qobject_unref(backing_options); + errno = EINVAL; + return -errno; +} + +/* Converts options given in @filename and the @options QDict into the QAPI + * object @gconf. */ +static int qemu_gluster_parse(BlockdevOptionsGluster *gconf, + const char *filename, + QDict *options, Error **errp) +{ + int ret; + if (filename) { + ret = qemu_gluster_parse_uri(gconf, filename); + if (ret < 0) { + error_setg(errp, "invalid URI %s", filename); + error_append_hint(errp, "Usage: file=gluster[+transport]://" + "[host[:port]]volume/path[?socket=...]" + "[,file.debug=N]" + "[,file.logfile=/path/filename.log]\n"); + return ret; + } + } else { + ret = qemu_gluster_parse_json(gconf, options, errp); + if (ret < 0) { + error_append_hint(errp, "Usage: " + "-drive driver=qcow2,file.driver=gluster," + "file.volume=testvol,file.path=/path/a.qcow2" + "[,file.debug=9]" + "[,file.logfile=/path/filename.log]," + "file.server.0.type=inet," + "file.server.0.host=1.2.3.4," + "file.server.0.port=24007," + "file.server.1.transport=unix," + "file.server.1.path=/var/run/glusterd.socket ..." + "\n"); + return ret; + } + } + + return 0; +} + +static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, + const char *filename, + QDict *options, Error **errp) +{ + int ret; + + ret = qemu_gluster_parse(gconf, filename, options, errp); + if (ret < 0) { + errno = -ret; + return NULL; + } + + return qemu_gluster_glfs_init(gconf, errp); +} + +/* + * AIO callback routine called from GlusterFS thread. + */ +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, +#ifdef CONFIG_GLUSTERFS_IOCB_HAS_STAT + struct glfs_stat *pre, struct glfs_stat *post, +#endif + void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + + if (!ret || ret == acb->size) { + acb->ret = 0; /* Success */ + } else if (ret < 0) { + acb->ret = -errno; /* Read/Write failed */ + } else { + acb->ret = -EIO; /* Partial read/write - fail it */ + } + + aio_co_schedule(acb->aio_context, acb->coroutine); +} + +static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags) +{ + assert(open_flags != NULL); + + *open_flags |= O_BINARY; + + if (bdrv_flags & BDRV_O_RDWR) { + *open_flags |= O_RDWR; + } else { + *open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + *open_flags |= O_DIRECT; + } +} + +/* + * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of + * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used. + * - Corrected versions return -1 and set errno to EINVAL. + * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set + * errno to ENXIO when SEEK_DATA is called with a position of EOF. + */ +static bool qemu_gluster_test_seek(struct glfs_fd *fd) +{ + off_t ret = 0; + +#if defined SEEK_HOLE && defined SEEK_DATA + off_t eof; + + eof = glfs_lseek(fd, 0, SEEK_END); + if (eof < 0) { + /* this should never occur */ + return false; + } + + /* this should always fail with ENXIO if SEEK_DATA is supported */ + ret = glfs_lseek(fd, eof, SEEK_DATA); +#endif + + return (ret < 0) && (errno == ENXIO); +} + +static int qemu_gluster_open(BlockDriverState *bs, QDict *options, + int bdrv_flags, Error **errp) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = 0; + int ret = 0; + BlockdevOptionsGluster *gconf = NULL; + QemuOpts *opts; + const char *filename, *logfile; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto out; + } + + filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME); + + s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (s->debug < 0) { + s->debug = 0; + } else if (s->debug > GLUSTER_DEBUG_MAX) { + s->debug = GLUSTER_DEBUG_MAX; + } + + gconf = g_new0(BlockdevOptionsGluster, 1); + gconf->debug = s->debug; + gconf->has_debug = true; + + logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE); + s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT); + + gconf->logfile = g_strdup(s->logfile); + gconf->has_logfile = true; + + s->glfs = qemu_gluster_init(gconf, filename, options, errp); + if (!s->glfs) { + ret = -errno; + goto out; + } + +#ifdef CONFIG_GLUSTERFS_XLATOR_OPT + /* Without this, if fsync fails for a recoverable reason (for instance, + * ENOSPC), gluster will dump its cache, preventing retries. This means + * almost certain data loss. Not all gluster versions support the + * 'resync-failed-syncs-after-fsync' key value, but there is no way to + * discover during runtime if it is supported (this api returns success for + * unknown key/value pairs) */ + ret = glfs_set_xlator_option(s->glfs, "*-write-behind", + "resync-failed-syncs-after-fsync", + "on"); + if (ret < 0) { + error_setg_errno(errp, errno, "Unable to set xlator key/value pair"); + ret = -errno; + goto out; + } +#endif + + qemu_gluster_parse_flags(bdrv_flags, &open_flags); + + s->fd = glfs_open(s->glfs, gconf->path, open_flags); + ret = s->fd ? 0 : -errno; + + if (ret == -EACCES || ret == -EROFS) { + /* Try to degrade to read-only, but if it doesn't work, still use the + * normal error message. */ + if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) { + open_flags = (open_flags & ~O_RDWR) | O_RDONLY; + s->fd = glfs_open(s->glfs, gconf->path, open_flags); + ret = s->fd ? 0 : -errno; + } + } + + s->supports_seek_data = qemu_gluster_test_seek(s->fd); + +out: + qemu_opts_del(opts); + qapi_free_BlockdevOptionsGluster(gconf); + if (!ret) { + return ret; + } + g_free(s->logfile); + if (s->fd) { + glfs_close(s->fd); + } + + glfs_clear_preopened(s->glfs); + + return ret; +} + +static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; +} + +static int qemu_gluster_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + int ret = 0; + BDRVGlusterState *s; + BDRVGlusterReopenState *reop_s; + BlockdevOptionsGluster *gconf; + int open_flags = 0; + + assert(state != NULL); + assert(state->bs != NULL); + + s = state->bs->opaque; + + state->opaque = g_new0(BDRVGlusterReopenState, 1); + reop_s = state->opaque; + + qemu_gluster_parse_flags(state->flags, &open_flags); + + gconf = g_new0(BlockdevOptionsGluster, 1); + gconf->debug = s->debug; + gconf->has_debug = true; + gconf->logfile = g_strdup(s->logfile); + gconf->has_logfile = true; + + /* + * If 'state->bs->exact_filename' is empty, 'state->options' should contain + * the JSON parameters already parsed. + */ + if (state->bs->exact_filename[0] != '\0') { + reop_s->glfs = qemu_gluster_init(gconf, state->bs->exact_filename, NULL, + errp); + } else { + reop_s->glfs = qemu_gluster_init(gconf, NULL, state->options, errp); + } + if (reop_s->glfs == NULL) { + ret = -errno; + goto exit; + } + +#ifdef CONFIG_GLUSTERFS_XLATOR_OPT + ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind", + "resync-failed-syncs-after-fsync", "on"); + if (ret < 0) { + error_setg_errno(errp, errno, "Unable to set xlator key/value pair"); + ret = -errno; + goto exit; + } +#endif + + reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags); + if (reop_s->fd == NULL) { + /* reops->glfs will be cleaned up in _abort */ + ret = -errno; + goto exit; + } + +exit: + /* state->opaque will be freed in either the _abort or _commit */ + qapi_free_BlockdevOptionsGluster(gconf); + return ret; +} + +static void qemu_gluster_reopen_commit(BDRVReopenState *state) +{ + BDRVGlusterReopenState *reop_s = state->opaque; + BDRVGlusterState *s = state->bs->opaque; + + + /* close the old */ + if (s->fd) { + glfs_close(s->fd); + } + + glfs_clear_preopened(s->glfs); + + /* use the newly opened image / connection */ + s->fd = reop_s->fd; + s->glfs = reop_s->glfs; + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + + +static void qemu_gluster_reopen_abort(BDRVReopenState *state) +{ + BDRVGlusterReopenState *reop_s = state->opaque; + + if (reop_s == NULL) { + return; + } + + if (reop_s->fd) { + glfs_close(reop_s->fd); + } + + glfs_clear_preopened(reop_s->glfs); + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + +#ifdef CONFIG_GLUSTERFS_ZEROFILL +static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, + int size, + BdrvRequestFlags flags) +{ + int ret; + GlusterAIOCB acb; + BDRVGlusterState *s = bs->opaque; + + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); + + ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb); + if (ret < 0) { + return -errno; + } + + qemu_coroutine_yield(); + return acb.ret; +} +#endif + +static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + int64_t current_length; + + current_length = glfs_lseek(fd, 0, SEEK_END); + if (current_length < 0) { + error_setg_errno(errp, errno, "Failed to determine current size"); + return -errno; + } + + if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + if (current_length == offset) { + return 0; + } + + switch (prealloc) { +#ifdef CONFIG_GLUSTERFS_FALLOCATE + case PREALLOC_MODE_FALLOC: + if (glfs_fallocate(fd, 0, current_length, offset - current_length)) { + error_setg_errno(errp, errno, "Could not preallocate data"); + return -errno; + } + break; +#endif /* CONFIG_GLUSTERFS_FALLOCATE */ +#ifdef CONFIG_GLUSTERFS_ZEROFILL + case PREALLOC_MODE_FULL: + if (glfs_ftruncate(fd, offset)) { + error_setg_errno(errp, errno, "Could not resize file"); + return -errno; + } + if (glfs_zerofill(fd, current_length, offset - current_length)) { + error_setg_errno(errp, errno, "Could not zerofill the new area"); + return -errno; + } + break; +#endif /* CONFIG_GLUSTERFS_ZEROFILL */ + case PREALLOC_MODE_OFF: + if (glfs_ftruncate(fd, offset)) { + error_setg_errno(errp, errno, "Could not resize file"); + return -errno; + } + break; + default: + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_str(prealloc)); + return -EINVAL; + } + + return 0; +} + +static int qemu_gluster_co_create(BlockdevCreateOptions *options, + Error **errp) +{ + BlockdevCreateOptionsGluster *opts = &options->u.gluster; + struct glfs *glfs; + struct glfs_fd *fd = NULL; + int ret = 0; + + assert(options->driver == BLOCKDEV_DRIVER_GLUSTER); + + glfs = qemu_gluster_glfs_init(opts->location, errp); + if (!glfs) { + ret = -errno; + goto out; + } + + fd = glfs_creat(glfs, opts->location->path, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + goto out; + } + + ret = qemu_gluster_do_truncate(fd, opts->size, opts->preallocation, errp); + +out: + if (fd) { + if (glfs_close(fd) != 0 && ret == 0) { + ret = -errno; + } + } + glfs_clear_preopened(glfs); + return ret; +} + +static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *options; + BlockdevCreateOptionsGluster *gopts; + BlockdevOptionsGluster *gconf; + char *tmp = NULL; + Error *local_err = NULL; + int ret; + + options = g_new0(BlockdevCreateOptions, 1); + options->driver = BLOCKDEV_DRIVER_GLUSTER; + gopts = &options->u.gluster; + + gconf = g_new0(BlockdevOptionsGluster, 1); + gopts->location = gconf; + + gopts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + + tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + gopts->preallocation = qapi_enum_parse(&PreallocMode_lookup, tmp, + PREALLOC_MODE_OFF, &local_err); + g_free(tmp); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (gconf->debug < 0) { + gconf->debug = 0; + } else if (gconf->debug > GLUSTER_DEBUG_MAX) { + gconf->debug = GLUSTER_DEBUG_MAX; + } + gconf->has_debug = true; + + gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE); + if (!gconf->logfile) { + gconf->logfile = g_strdup(GLUSTER_LOGFILE_DEFAULT); + } + gconf->has_logfile = true; + + ret = qemu_gluster_parse(gconf, filename, NULL, errp); + if (ret < 0) { + goto fail; + } + + ret = qemu_gluster_co_create(options, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + qapi_free_BlockdevCreateOptions(options); + return ret; +} + +static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int write) +{ + int ret; + GlusterAIOCB acb; + BDRVGlusterState *s = bs->opaque; + size_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; + + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + gluster_finish_aiocb, &acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + gluster_finish_aiocb, &acb); + } + + if (ret < 0) { + return -errno; + } + + qemu_coroutine_yield(); + return acb.ret; +} + +static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, + BdrvRequestFlags flags, + Error **errp) +{ + BDRVGlusterState *s = bs->opaque; + return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp); +} + +static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) +{ + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0); +} + +static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov, + int flags) +{ + assert(!flags); + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + g_free(s->logfile); + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_clear_preopened(s->glfs); +} + +static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) +{ + int ret; + GlusterAIOCB acb; + BDRVGlusterState *s = bs->opaque; + + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); + + ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb); + if (ret < 0) { + ret = -errno; + goto error; + } + + qemu_coroutine_yield(); + if (acb.ret < 0) { + ret = acb.ret; + goto error; + } + + return acb.ret; + +error: + /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache + * after a fsync failure, so we have no way of allowing the guest to safely + * continue. Gluster versions prior to 3.5.6 don't retain the cache + * either, but will invalidate the fd on error, so this is again our only + * option. + * + * The 'resync-failed-syncs-after-fsync' xlator option for the + * write-behind cache will cause later gluster versions to retain its + * cache after error, so long as the fd remains open. However, we + * currently have no way of knowing if this option is supported. + * + * TODO: Once gluster provides a way for us to determine if the option + * is supported, bypass the closure and setting drv to NULL. */ + qemu_gluster_close(bs); + bs->drv = NULL; + return ret; +} + +#ifdef CONFIG_GLUSTERFS_DISCARD +static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, + int64_t offset, int size) +{ + int ret; + GlusterAIOCB acb; + BDRVGlusterState *s = bs->opaque; + + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); + + ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb); + if (ret < 0) { + return -errno; + } + + qemu_coroutine_yield(); + return acb.ret; +} +#endif + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +/* + * Find allocation range in @bs around offset @start. + * May change underlying file descriptor's file offset. + * If @start is not in a hole, store @start in @data, and the + * beginning of the next hole in @hole, and return 0. + * If @start is in a non-trailing hole, store @start in @hole and the + * beginning of the next non-hole in @data, and return 0. + * If @start is in a trailing hole or beyond EOF, return -ENXIO. + * If we can't find out, return a negative errno other than -ENXIO. + * + * (Shamefully copied from file-posix.c, only minuscule adaptions.) + */ +static int find_allocation(BlockDriverState *bs, off_t start, + off_t *data, off_t *hole) +{ + BDRVGlusterState *s = bs->opaque; + + if (!s->supports_seek_data) { + goto exit; + } + +#if defined SEEK_HOLE && defined SEEK_DATA + off_t offs; + + /* + * SEEK_DATA cases: + * D1. offs == start: start is in data + * D2. offs > start: start is in a hole, next data at offs + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole + * or start is beyond EOF + * If the latter happens, the file has been truncated behind + * our back since we opened it. All bets are off then. + * Treating like a trailing hole is simplest. + * D4. offs < 0, errno != ENXIO: we learned nothing + */ + offs = glfs_lseek(s->fd, start, SEEK_DATA); + if (offs < 0) { + return -errno; /* D3 or D4 */ + } + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like D4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } + + if (offs > start) { + /* D2: in hole, next data at offs */ + *hole = start; + *data = offs; + return 0; + } + + /* D1: in data, end not yet known */ + + /* + * SEEK_HOLE cases: + * H1. offs == start: start is in a hole + * If this happens here, a hole has been dug behind our back + * since the previous lseek(). + * H2. offs > start: either start is in data, next hole at offs, + * or start is in trailing hole, EOF at offs + * Linux treats trailing holes like any other hole: offs == + * start. Solaris seeks to EOF instead: offs > start (blech). + * If that happens here, a hole has been dug behind our back + * since the previous lseek(). + * H3. offs < 0, errno = ENXIO: start is beyond EOF + * If this happens, the file has been truncated behind our + * back since we opened it. Treat it like a trailing hole. + * H4. offs < 0, errno != ENXIO: we learned nothing + * Pretend we know nothing at all, i.e. "forget" about D1. + */ + offs = glfs_lseek(s->fd, start, SEEK_HOLE); + if (offs < 0) { + return -errno; /* D1 and (H3 or H4) */ + } + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like H4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } + + if (offs > start) { + /* + * D1 and H2: either in data, next hole at offs, or it was in + * data but is now in a trailing hole. In the latter case, + * all bets are off. Treating it as if it there was data all + * the way to EOF is safe, so simply do that. + */ + *data = start; + *hole = offs; + return 0; + } + + /* D1 and H1 */ + return -EBUSY; +#endif + +exit: + return -ENOTSUP; +} + +/* + * Returns the allocation status of the specified offset. + * + * The block layer guarantees 'offset' and 'bytes' are within bounds. + * + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same + * allocated/unallocated state. + * + * 'bytes' is the max value 'pnum' should be set to. + * + * (Based on raw_co_block_status() from file-posix.c.) + */ +static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVGlusterState *s = bs->opaque; + off_t data = 0, hole = 0; + int ret = -EINVAL; + + if (!s->fd) { + return ret; + } + + if (!want_zero) { + *pnum = bytes; + *map = offset; + *file = bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + } + + ret = find_allocation(bs, offset, &data, &hole); + if (ret == -ENXIO) { + /* Trailing hole */ + *pnum = bytes; + ret = BDRV_BLOCK_ZERO; + } else if (ret < 0) { + /* No info available, so pretend there are no holes */ + *pnum = bytes; + ret = BDRV_BLOCK_DATA; + } else if (data == offset) { + /* On a data extent, compute bytes to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(bytes, hole - offset); + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute bytes to the beginning of the next extent. */ + assert(hole == offset); + *pnum = MIN(bytes, data - offset); + ret = BDRV_BLOCK_ZERO; + } + + *map = offset; + *file = bs; + + return ret | BDRV_BLOCK_OFFSET_VALID; +} + + +static const char *const gluster_strong_open_opts[] = { + GLUSTER_OPT_VOLUME, + GLUSTER_OPT_PATH, + GLUSTER_OPT_TYPE, + GLUSTER_OPT_SERVER_PATTERN, + GLUSTER_OPT_HOST, + GLUSTER_OPT_PORT, + GLUSTER_OPT_TO, + GLUSTER_OPT_IPV4, + GLUSTER_OPT_IPV6, + GLUSTER_OPT_SOCKET, + + NULL +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_needs_filename = false, + .bdrv_file_open = qemu_gluster_open, + .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, + .bdrv_reopen_commit = qemu_gluster_reopen_commit, + .bdrv_reopen_abort = qemu_gluster_reopen_abort, + .bdrv_close = qemu_gluster_close, + .bdrv_co_create = qemu_gluster_co_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_truncate = qemu_gluster_co_truncate, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, +#endif + .bdrv_co_block_status = qemu_gluster_co_block_status, + .bdrv_refresh_limits = qemu_gluster_refresh_limits, + .create_opts = &qemu_gluster_create_opts, + .strong_runtime_opts = gluster_strong_open_opts, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_needs_filename = false, + .bdrv_file_open = qemu_gluster_open, + .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, + .bdrv_reopen_commit = qemu_gluster_reopen_commit, + .bdrv_reopen_abort = qemu_gluster_reopen_abort, + .bdrv_close = qemu_gluster_close, + .bdrv_co_create = qemu_gluster_co_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_truncate = qemu_gluster_co_truncate, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, +#endif + .bdrv_co_block_status = qemu_gluster_co_block_status, + .bdrv_refresh_limits = qemu_gluster_refresh_limits, + .create_opts = &qemu_gluster_create_opts, + .strong_runtime_opts = gluster_strong_open_opts, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_needs_filename = true, + .bdrv_file_open = qemu_gluster_open, + .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, + .bdrv_reopen_commit = qemu_gluster_reopen_commit, + .bdrv_reopen_abort = qemu_gluster_reopen_abort, + .bdrv_close = qemu_gluster_close, + .bdrv_co_create = qemu_gluster_co_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_truncate = qemu_gluster_co_truncate, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, +#endif + .bdrv_co_block_status = qemu_gluster_co_block_status, + .bdrv_refresh_limits = qemu_gluster_refresh_limits, + .create_opts = &qemu_gluster_create_opts, + .strong_runtime_opts = gluster_strong_open_opts, +}; + +/* rdma is deprecated (actually never supported for volfile fetch). + * Let's maintain it for the protocol compatibility, to make sure things + * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp + * protocol with a warning. + * TODO: remove gluster+rdma interface support + */ +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_needs_filename = true, + .bdrv_file_open = qemu_gluster_open, + .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, + .bdrv_reopen_commit = qemu_gluster_reopen_commit, + .bdrv_reopen_abort = qemu_gluster_reopen_abort, + .bdrv_close = qemu_gluster_close, + .bdrv_co_create = qemu_gluster_co_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_truncate = qemu_gluster_co_truncate, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, +#endif + .bdrv_co_block_status = qemu_gluster_co_block_status, + .bdrv_refresh_limits = qemu_gluster_refresh_limits, + .create_opts = &qemu_gluster_create_opts, + .strong_runtime_opts = gluster_strong_open_opts, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); diff --git a/block/io.c b/block/io.c new file mode 100644 index 000000000..a9f56a9ab --- /dev/null +++ b/block/io.c @@ -0,0 +1,3297 @@ +/* + * Block layer I/O functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "sysemu/block-backend.h" +#include "block/aio-wait.h" +#include "block/blockjob.h" +#include "block/blockjob_int.h" +#include "block/block_int.h" +#include "block/coroutines.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "sysemu/replay.h" + +/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ +#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) + +static void bdrv_parent_cb_resize(BlockDriverState *bs); +static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags); + +static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) +{ + BdrvChild *c, *next; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } + bdrv_parent_drained_begin_single(c, false); + } +} + +static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, + int *drained_end_counter) +{ + assert(c->parent_quiesce_counter > 0); + c->parent_quiesce_counter--; + if (c->klass->drained_end) { + c->klass->drained_end(c, drained_end_counter); + } +} + +void bdrv_parent_drained_end_single(BdrvChild *c) +{ + int drained_end_counter = 0; + bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); + BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0); +} + +static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents, + int *drained_end_counter) +{ + BdrvChild *c; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } + bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); + } +} + +static bool bdrv_parent_drained_poll_single(BdrvChild *c) +{ + if (c->klass->drained_poll) { + return c->klass->drained_poll(c); + } + return false; +} + +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) +{ + BdrvChild *c, *next; + bool busy = false; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } + busy |= bdrv_parent_drained_poll_single(c); + } + + return busy; +} + +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) +{ + c->parent_quiesce_counter++; + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } + if (poll) { + BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c)); + } +} + +static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) +{ + dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); + dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); + dst->opt_mem_alignment = MAX(dst->opt_mem_alignment, + src->opt_mem_alignment); + dst->min_mem_alignment = MAX(dst->min_mem_alignment, + src->min_mem_alignment); + dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov); +} + +void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + bool have_limits; + Error *local_err = NULL; + + memset(&bs->bl, 0, sizeof(bs->bl)); + + if (!drv) { + return; + } + + /* Default alignment based on whether driver has byte interface */ + bs->bl.request_alignment = (drv->bdrv_co_preadv || + drv->bdrv_aio_preadv || + drv->bdrv_co_preadv_part) ? 1 : 512; + + /* Take some limits from the children as a default */ + have_limits = false; + QLIST_FOREACH(c, &bs->children, next) { + if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) + { + bdrv_refresh_limits(c->bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + bdrv_merge_limits(&bs->bl, &c->bs->bl); + have_limits = true; + } + } + + if (!have_limits) { + bs->bl.min_mem_alignment = 512; + bs->bl.opt_mem_alignment = qemu_real_host_page_size; + + /* Safe default since most protocols use readv()/writev()/etc */ + bs->bl.max_iov = IOV_MAX; + } + + /* Then let the driver override it */ + if (drv->bdrv_refresh_limits) { + drv->bdrv_refresh_limits(bs, errp); + } +} + +/** + * The copy-on-read flag is actually a reference count so multiple users may + * use the feature without worrying about clobbering its previous state. + * Copy-on-read stays enabled until all users have called to disable it. + */ +void bdrv_enable_copy_on_read(BlockDriverState *bs) +{ + qatomic_inc(&bs->copy_on_read); +} + +void bdrv_disable_copy_on_read(BlockDriverState *bs) +{ + int old = qatomic_fetch_dec(&bs->copy_on_read); + assert(old >= 1); +} + +typedef struct { + Coroutine *co; + BlockDriverState *bs; + bool done; + bool begin; + bool recursive; + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; + int *drained_end_counter; +} BdrvCoDrainData; + +static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) +{ + BdrvCoDrainData *data = opaque; + BlockDriverState *bs = data->bs; + + if (data->begin) { + bs->drv->bdrv_co_drain_begin(bs); + } else { + bs->drv->bdrv_co_drain_end(bs); + } + + /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ + qatomic_mb_set(&data->done, true); + if (!data->begin) { + qatomic_dec(data->drained_end_counter); + } + bdrv_dec_in_flight(bs); + + g_free(data); +} + +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, + int *drained_end_counter) +{ + BdrvCoDrainData *data; + + if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || + (!begin && !bs->drv->bdrv_co_drain_end)) { + return; + } + + data = g_new(BdrvCoDrainData, 1); + *data = (BdrvCoDrainData) { + .bs = bs, + .done = false, + .begin = begin, + .drained_end_counter = drained_end_counter, + }; + + if (!begin) { + qatomic_inc(drained_end_counter); + } + + /* Make sure the driver callback completes during the polling phase for + * drain_begin. */ + bdrv_inc_in_flight(bs); + data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); + aio_co_schedule(bdrv_get_aio_context(bs), data->co); +} + +/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents) +{ + BdrvChild *child, *next; + + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { + return true; + } + + if (qatomic_read(&bs->in_flight)) { + return true; + } + + if (recursive) { + assert(!ignore_bds_parents); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + if (bdrv_drain_poll(child->bs, recursive, child, false)) { + return true; + } + } + } + + return false; +} + +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent) +{ + return bdrv_drain_poll(bs, recursive, ignore_parent, false); +} + +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll); +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + int *drained_end_counter); + +static void bdrv_co_drain_bh_cb(void *opaque) +{ + BdrvCoDrainData *data = opaque; + Coroutine *co = data->co; + BlockDriverState *bs = data->bs; + + if (bs) { + AioContext *ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { + assert(!data->drained_end_counter); + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); + } else { + assert(!data->poll); + bdrv_do_drained_end(bs, data->recursive, data->parent, + data->ignore_bds_parents, + data->drained_end_counter); + } + aio_context_release(ctx); + } else { + assert(data->begin); + bdrv_drain_all_begin(); + } + + data->done = true; + aio_co_wake(co); +} + +static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, bool recursive, + BdrvChild *parent, + bool ignore_bds_parents, + bool poll, + int *drained_end_counter) +{ + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); + AioContext *ctx = bdrv_get_aio_context(bs); + AioContext *co_ctx = qemu_coroutine_get_aio_context(self); + + /* Calling bdrv_drain() from a BH ensures the current coroutine yields and + * other coroutines run if they were queued by aio_co_enter(). */ + + assert(qemu_in_coroutine()); + data = (BdrvCoDrainData) { + .co = self, + .bs = bs, + .done = false, + .begin = begin, + .recursive = recursive, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, + .drained_end_counter = drained_end_counter, + }; + + if (bs) { + bdrv_inc_in_flight(bs); + } + + /* + * Temporarily drop the lock across yield or we would get deadlocks. + * bdrv_co_drain_bh_cb() reaquires the lock as needed. + * + * When we yield below, the lock for the current context will be + * released, so if this is actually the lock that protects bs, don't drop + * it a second time. + */ + if (ctx != co_ctx) { + aio_context_release(ctx); + } + replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data); + + qemu_coroutine_yield(); + /* If we are resumed from some other event (such as an aio completion or a + * timer callback), it is a bug in the caller that should be fixed. */ + assert(data.done); + + /* Reaquire the AioContext of bs if we dropped it */ + if (ctx != co_ctx) { + aio_context_acquire(ctx); + } +} + +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents) +{ + assert(!qemu_in_coroutine()); + + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); + bdrv_drain_invoke(bs, true, NULL); +} + +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll) +{ + BdrvChild *child, *next; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, + poll, NULL); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + + if (recursive) { + assert(!ignore_bds_parents); + bs->recursive_quiesce_counter++; + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, + false); + } + } + + /* + * Wait for drained requests to finish. + * + * Calling BDRV_POLL_WHILE() only once for the top-level node is okay: The + * call is needed so things in this AioContext can make progress even + * though we don't return to the main AioContext loop - this automatically + * includes other nodes in the same AioContext and therefore all child + * nodes. + */ + if (poll) { + assert(!ignore_bds_parents); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); + } +} + +void bdrv_drained_begin(BlockDriverState *bs) +{ + bdrv_do_drained_begin(bs, false, NULL, false, true); +} + +void bdrv_subtree_drained_begin(BlockDriverState *bs) +{ + bdrv_do_drained_begin(bs, true, NULL, false, true); +} + +/** + * This function does not poll, nor must any of its recursively called + * functions. The *drained_end_counter pointee will be incremented + * once for every background operation scheduled, and decremented once + * the operation settles. Therefore, the pointer must remain valid + * until the pointee reaches 0. That implies that whoever sets up the + * pointee has to poll until it is 0. + * + * We use atomic operations to access *drained_end_counter, because + * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of + * @bs may contain nodes in different AioContexts, + * (2) bdrv_drain_all_end() uses the same counter for all nodes, + * regardless of which AioContext they are in. + */ +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + int *drained_end_counter) +{ + BdrvChild *child; + int old_quiesce_counter; + + assert(drained_end_counter != NULL); + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, + false, drained_end_counter); + return; + } + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ + bdrv_drain_invoke(bs, false, drained_end_counter); + bdrv_parent_drained_end(bs, parent, ignore_bds_parents, + drained_end_counter); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { + aio_enable_external(bdrv_get_aio_context(bs)); + } + + if (recursive) { + assert(!ignore_bds_parents); + bs->recursive_quiesce_counter--; + QLIST_FOREACH(child, &bs->children, next) { + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, + drained_end_counter); + } + } +} + +void bdrv_drained_end(BlockDriverState *bs) +{ + int drained_end_counter = 0; + bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); + BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +} + +void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) +{ + bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); +} + +void bdrv_subtree_drained_end(BlockDriverState *bs) +{ + int drained_end_counter = 0; + bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); + BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +} + +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +{ + int i; + + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { + bdrv_do_drained_begin(child->bs, true, child, false, true); + } +} + +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) +{ + int drained_end_counter = 0; + int i; + + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { + bdrv_do_drained_end(child->bs, true, child, false, + &drained_end_counter); + } + + BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); +} + +/* + * Wait for pending requests to complete on a single BlockDriverState subtree, + * and suspend block driver's internal I/O until next request arrives. + * + * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState + * AioContext. + */ +void coroutine_fn bdrv_co_drain(BlockDriverState *bs) +{ + assert(qemu_in_coroutine()); + bdrv_drained_begin(bs); + bdrv_drained_end(bs); +} + +void bdrv_drain(BlockDriverState *bs) +{ + bdrv_drained_begin(bs); + bdrv_drained_end(bs); +} + +static void bdrv_drain_assert_idle(BlockDriverState *bs) +{ + BdrvChild *child, *next; + + assert(qatomic_read(&bs->in_flight) == 0); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_drain_assert_idle(child->bs); + } +} + +unsigned int bdrv_drain_all_count = 0; + +static bool bdrv_drain_all_poll(void) +{ + BlockDriverState *bs = NULL; + bool result = false; + + /* bdrv_drain_poll() can't make changes to the graph and we are holding the + * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + result |= bdrv_drain_poll(bs, false, NULL, true); + aio_context_release(aio_context); + } + + return result; +} + +/* + * Wait for pending requests to complete across all BlockDriverStates + * + * This function does not flush data to disk, use bdrv_flush_all() for that + * after calling this function. + * + * This pauses all block jobs and disables external clients. It must + * be paired with bdrv_drain_all_end(). + * + * NOTE: no new block jobs or BlockDriverStates can be created between + * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls. + */ +void bdrv_drain_all_begin(void) +{ + BlockDriverState *bs = NULL; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); + return; + } + + /* + * bdrv queue is managed by record/replay, + * waiting for finishing the I/O requests may + * be infinite + */ + if (replay_events_enabled()) { + return; + } + + /* AIO_WAIT_WHILE() with a NULL context can only be called from the main + * loop AioContext, so make sure we're in the main context. */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + assert(bdrv_drain_all_count < INT_MAX); + bdrv_drain_all_count++; + + /* Quiesce all nodes, without polling in-flight requests yet. The graph + * cannot change during this loop. */ + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); + bdrv_do_drained_begin(bs, false, NULL, true, false); + aio_context_release(aio_context); + } + + /* Now poll the in-flight requests */ + AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); + + while ((bs = bdrv_next_all_states(bs))) { + bdrv_drain_assert_idle(bs); + } +} + +void bdrv_drain_all_end_quiesce(BlockDriverState *bs) +{ + int drained_end_counter = 0; + + g_assert(bs->quiesce_counter > 0); + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { + bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + } + BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +} + +void bdrv_drain_all_end(void) +{ + BlockDriverState *bs = NULL; + int drained_end_counter = 0; + + /* + * bdrv queue is managed by record/replay, + * waiting for finishing the I/O requests may + * be endless + */ + if (replay_events_enabled()) { + return; + } + + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); + bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); + + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; +} + +void bdrv_drain_all(void) +{ + bdrv_drain_all_begin(); + bdrv_drain_all_end(); +} + +/** + * Remove an active request from the tracked requests list + * + * This function should be called when a tracked request is completing. + */ +static void tracked_request_end(BdrvTrackedRequest *req) +{ + if (req->serialising) { + qatomic_dec(&req->bs->serialising_in_flight); + } + + qemu_co_mutex_lock(&req->bs->reqs_lock); + QLIST_REMOVE(req, list); + qemu_co_queue_restart_all(&req->wait_queue); + qemu_co_mutex_unlock(&req->bs->reqs_lock); +} + +/** + * Add an active request to the tracked requests list + */ +static void tracked_request_begin(BdrvTrackedRequest *req, + BlockDriverState *bs, + int64_t offset, + uint64_t bytes, + enum BdrvTrackedRequestType type) +{ + assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes); + + *req = (BdrvTrackedRequest){ + .bs = bs, + .offset = offset, + .bytes = bytes, + .type = type, + .co = qemu_coroutine_self(), + .serialising = false, + .overlap_offset = offset, + .overlap_bytes = bytes, + }; + + qemu_co_queue_init(&req->wait_queue); + + qemu_co_mutex_lock(&bs->reqs_lock); + QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); + qemu_co_mutex_unlock(&bs->reqs_lock); +} + +static bool tracked_request_overlaps(BdrvTrackedRequest *req, + int64_t offset, uint64_t bytes) +{ + /* aaaa bbbb */ + if (offset >= req->overlap_offset + req->overlap_bytes) { + return false; + } + /* bbbb aaaa */ + if (req->overlap_offset >= offset + bytes) { + return false; + } + return true; +} + +static bool coroutine_fn +bdrv_wait_serialising_requests_locked(BlockDriverState *bs, + BdrvTrackedRequest *self) +{ + BdrvTrackedRequest *req; + bool retry; + bool waited = false; + + do { + retry = false; + QLIST_FOREACH(req, &bs->tracked_requests, list) { + if (req == self || (!req->serialising && !self->serialising)) { + continue; + } + if (tracked_request_overlaps(req, self->overlap_offset, + self->overlap_bytes)) + { + /* Hitting this means there was a reentrant request, for + * example, a block driver issuing nested requests. This must + * never happen since it means deadlock. + */ + assert(qemu_coroutine_self() != req->co); + + /* If the request is already (indirectly) waiting for us, or + * will wait for us as soon as it wakes up, then just go on + * (instead of producing a deadlock in the former case). */ + if (!req->waiting_for) { + self->waiting_for = req; + qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock); + self->waiting_for = NULL; + retry = true; + waited = true; + break; + } + } + } + } while (retry); + return waited; +} + +bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) +{ + BlockDriverState *bs = req->bs; + int64_t overlap_offset = req->offset & ~(align - 1); + uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) + - overlap_offset; + bool waited; + + qemu_co_mutex_lock(&bs->reqs_lock); + if (!req->serialising) { + qatomic_inc(&req->bs->serialising_in_flight); + req->serialising = true; + } + + req->overlap_offset = MIN(req->overlap_offset, overlap_offset); + req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); + waited = bdrv_wait_serialising_requests_locked(bs, req); + qemu_co_mutex_unlock(&bs->reqs_lock); + return waited; +} + +/** + * Return the tracked request on @bs for the current coroutine, or + * NULL if there is none. + */ +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) +{ + BdrvTrackedRequest *req; + Coroutine *self = qemu_coroutine_self(); + + QLIST_FOREACH(req, &bs->tracked_requests, list) { + if (req->co == self) { + return req; + } + } + + return NULL; +} + +/** + * Round a region to cluster boundaries + */ +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *cluster_offset, + int64_t *cluster_bytes) +{ + BlockDriverInfo bdi; + + if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { + *cluster_offset = offset; + *cluster_bytes = bytes; + } else { + int64_t c = bdi.cluster_size; + *cluster_offset = QEMU_ALIGN_DOWN(offset, c); + *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); + } +} + +static int bdrv_get_cluster_size(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + int ret; + + ret = bdrv_get_info(bs, &bdi); + if (ret < 0 || bdi.cluster_size == 0) { + return bs->bl.request_alignment; + } else { + return bdi.cluster_size; + } +} + +void bdrv_inc_in_flight(BlockDriverState *bs) +{ + qatomic_inc(&bs->in_flight); +} + +void bdrv_wakeup(BlockDriverState *bs) +{ + aio_wait_kick(); +} + +void bdrv_dec_in_flight(BlockDriverState *bs) +{ + qatomic_dec(&bs->in_flight); + bdrv_wakeup(bs); +} + +static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) +{ + BlockDriverState *bs = self->bs; + bool waited = false; + + if (!qatomic_read(&bs->serialising_in_flight)) { + return false; + } + + qemu_co_mutex_lock(&bs->reqs_lock); + waited = bdrv_wait_serialising_requests_locked(bs, self); + qemu_co_mutex_unlock(&bs->reqs_lock); + + return waited; +} + +static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, + size_t size) +{ + if (size > BDRV_REQUEST_MAX_BYTES) { + return -EIO; + } + + if (!bdrv_is_inserted(bs)) { + return -ENOMEDIUM; + } + + if (offset < 0) { + return -EIO; + } + + return 0; +} + +int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + return bdrv_pwritev(child, offset, bytes, NULL, + BDRV_REQ_ZERO_WRITE | flags); +} + +/* + * Completely zero out a block device with the help of bdrv_pwrite_zeroes. + * The operation is sped up by checking the block status and only writing + * zeroes to the device if they currently do not return zeroes. Optional + * flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP, + * BDRV_REQ_FUA). + * + * Returns < 0 on error, 0 on success. For error codes see bdrv_pwrite(). + */ +int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) +{ + int ret; + int64_t target_size, bytes, offset = 0; + BlockDriverState *bs = child->bs; + + target_size = bdrv_getlength(bs); + if (target_size < 0) { + return target_size; + } + + for (;;) { + bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES); + if (bytes <= 0) { + return 0; + } + ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); + if (ret < 0) { + return ret; + } + if (ret & BDRV_BLOCK_ZERO) { + offset += bytes; + continue; + } + ret = bdrv_pwrite_zeroes(child, offset, bytes, flags); + if (ret < 0) { + return ret; + } + offset += bytes; + } +} + +/* See bdrv_pwrite() for the return codes */ +int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) +{ + int ret; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + if (bytes < 0) { + return -EINVAL; + } + + ret = bdrv_preadv(child, offset, bytes, &qiov, 0); + + return ret < 0 ? ret : bytes; +} + +/* Return no. of bytes on success or < 0 on error. Important errors are: + -EIO generic I/O error (may happen for all errors) + -ENOMEDIUM No media inserted. + -EINVAL Invalid offset or number of bytes + -EACCES Trying to write a read-only device +*/ +int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) +{ + int ret; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + if (bytes < 0) { + return -EINVAL; + } + + ret = bdrv_pwritev(child, offset, bytes, &qiov, 0); + + return ret < 0 ? ret : bytes; +} + +/* + * Writes to the file and ensures that no writes are reordered across this + * request (acts as a barrier) + * + * Returns 0 on success, -errno in error cases. + */ +int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + const void *buf, int count) +{ + int ret; + + ret = bdrv_pwrite(child, offset, buf, count); + if (ret < 0) { + return ret; + } + + ret = bdrv_flush(child->bs); + if (ret < 0) { + return ret; + } + + return 0; +} + +typedef struct CoroutineIOCompletion { + Coroutine *coroutine; + int ret; +} CoroutineIOCompletion; + +static void bdrv_co_io_em_complete(void *opaque, int ret) +{ + CoroutineIOCompletion *co = opaque; + + co->ret = ret; + aio_co_wake(co->coroutine); +} + +static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, int flags) +{ + BlockDriver *drv = bs->drv; + int64_t sector_num; + unsigned int nb_sectors; + QEMUIOVector local_qiov; + int ret; + + assert(!(flags & ~BDRV_REQ_MASK)); + assert(!(flags & BDRV_REQ_NO_FALLBACK)); + + if (!drv) { + return -ENOMEDIUM; + } + + if (drv->bdrv_co_preadv_part) { + return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset, + flags); + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + + if (drv->bdrv_co_preadv) { + ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); + goto out; + } + + if (drv->bdrv_aio_preadv) { + BlockAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags, + bdrv_co_io_em_complete, &co); + if (acb == NULL) { + ret = -EIO; + goto out; + } else { + qemu_coroutine_yield(); + ret = co.ret; + goto out; + } + } + + sector_num = offset >> BDRV_SECTOR_BITS; + nb_sectors = bytes >> BDRV_SECTOR_BITS; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); + assert(drv->bdrv_co_readv); + + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + +out: + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + + return ret; +} + +static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, int flags) +{ + BlockDriver *drv = bs->drv; + int64_t sector_num; + unsigned int nb_sectors; + QEMUIOVector local_qiov; + int ret; + + assert(!(flags & ~BDRV_REQ_MASK)); + assert(!(flags & BDRV_REQ_NO_FALLBACK)); + + if (!drv) { + return -ENOMEDIUM; + } + + if (drv->bdrv_co_pwritev_part) { + ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, + flags & bs->supported_write_flags); + flags &= ~bs->supported_write_flags; + goto emulate_flags; + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + + if (drv->bdrv_co_pwritev) { + ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, + flags & bs->supported_write_flags); + flags &= ~bs->supported_write_flags; + goto emulate_flags; + } + + if (drv->bdrv_aio_pwritev) { + BlockAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, + flags & bs->supported_write_flags, + bdrv_co_io_em_complete, &co); + flags &= ~bs->supported_write_flags; + if (acb == NULL) { + ret = -EIO; + } else { + qemu_coroutine_yield(); + ret = co.ret; + } + goto emulate_flags; + } + + sector_num = offset >> BDRV_SECTOR_BITS; + nb_sectors = bytes >> BDRV_SECTOR_BITS; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); + + assert(drv->bdrv_co_writev); + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, + flags & bs->supported_write_flags); + flags &= ~bs->supported_write_flags; + +emulate_flags: + if (ret == 0 && (flags & BDRV_REQ_FUA)) { + ret = bdrv_co_flush(bs); + } + + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + + return ret; +} + +static int coroutine_fn +bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset) +{ + BlockDriver *drv = bs->drv; + QEMUIOVector local_qiov; + int ret; + + if (!drv) { + return -ENOMEDIUM; + } + + if (!block_driver_can_compress(drv)) { + return -ENOTSUP; + } + + if (drv->bdrv_co_pwritev_compressed_part) { + return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes, + qiov, qiov_offset); + } + + if (qiov_offset == 0) { + return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); + } + + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov); + qemu_iovec_destroy(&local_qiov); + + return ret; +} + +static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + size_t qiov_offset, int flags) +{ + BlockDriverState *bs = child->bs; + + /* Perform I/O through a temporary buffer so that users who scribble over + * their read buffer while the operation is in progress do not end up + * modifying the image file. This is critical for zero-copy guest I/O + * where anything might happen inside guest memory. + */ + void *bounce_buffer = NULL; + + BlockDriver *drv = bs->drv; + int64_t cluster_offset; + int64_t cluster_bytes; + size_t skip_bytes; + int ret; + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, + BDRV_REQUEST_MAX_BYTES); + unsigned int progress = 0; + bool skip_write; + + if (!drv) { + return -ENOMEDIUM; + } + + /* + * Do not write anything when the BDS is inactive. That is not + * allowed, and it would not help. + */ + skip_write = (bs->open_flags & BDRV_O_INACTIVE); + + /* FIXME We cannot require callers to have write permissions when all they + * are doing is a read request. If we did things right, write permissions + * would be obtained anyway, but internally by the copy-on-read code. As + * long as it is implemented here rather than in a separate filter driver, + * the copy-on-read code doesn't have its own BdrvChild, however, for which + * it could request permissions. Therefore we have to bypass the permission + * system for the moment. */ + // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + + /* Cover entire cluster so no additional backing file I/O is required when + * allocating cluster in the image file. Note that this value may exceed + * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which + * is one reason we loop rather than doing it all at once. + */ + bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); + skip_bytes = offset - cluster_offset; + + trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, + cluster_offset, cluster_bytes); + + while (cluster_bytes) { + int64_t pnum; + + if (skip_write) { + ret = 1; /* "already allocated", so nothing will be copied */ + pnum = MIN(cluster_bytes, max_transfer); + } else { + ret = bdrv_is_allocated(bs, cluster_offset, + MIN(cluster_bytes, max_transfer), &pnum); + if (ret < 0) { + /* + * Safe to treat errors in querying allocation as if + * unallocated; we'll probably fail again soon on the + * read, but at least that will set a decent errno. + */ + pnum = MIN(cluster_bytes, max_transfer); + } + + /* Stop at EOF if the image ends in the middle of the cluster */ + if (ret == 0 && pnum == 0) { + assert(progress >= bytes); + break; + } + + assert(skip_bytes < pnum); + } + + if (ret <= 0) { + QEMUIOVector local_qiov; + + /* Must copy-on-read; use the bounce buffer */ + pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + if (!bounce_buffer) { + int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); + int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); + int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); + + bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len); + if (!bounce_buffer) { + ret = -ENOMEM; + goto err; + } + } + qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); + + ret = bdrv_driver_preadv(bs, cluster_offset, pnum, + &local_qiov, 0, 0); + if (ret < 0) { + goto err; + } + + bdrv_debug_event(bs, BLKDBG_COR_WRITE); + if (drv->bdrv_co_pwrite_zeroes && + buffer_is_zero(bounce_buffer, pnum)) { + /* FIXME: Should we (perhaps conditionally) be setting + * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy + * that still correctly reads as zero? */ + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, + BDRV_REQ_WRITE_UNCHANGED); + } else { + /* This does not change the data on the disk, it is not + * necessary to flush even in cache=writethrough mode. + */ + ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, + &local_qiov, 0, + BDRV_REQ_WRITE_UNCHANGED); + } + + if (ret < 0) { + /* It might be okay to ignore write errors for guest + * requests. If this is a deliberate copy-on-read + * then we don't want to ignore the error. Simply + * report it in all cases. + */ + goto err; + } + + if (!(flags & BDRV_REQ_PREFETCH)) { + qemu_iovec_from_buf(qiov, qiov_offset + progress, + bounce_buffer + skip_bytes, + MIN(pnum - skip_bytes, bytes - progress)); + } + } else if (!(flags & BDRV_REQ_PREFETCH)) { + /* Read directly into the destination */ + ret = bdrv_driver_preadv(bs, offset + progress, + MIN(pnum - skip_bytes, bytes - progress), + qiov, qiov_offset + progress, 0); + if (ret < 0) { + goto err; + } + } + + cluster_offset += pnum; + cluster_bytes -= pnum; + progress += pnum - skip_bytes; + skip_bytes = 0; + } + ret = 0; + +err: + qemu_vfree(bounce_buffer); + return ret; +} + +/* + * Forwards an already correctly aligned request to the BlockDriver. This + * handles copy on read, zeroing after EOF, and fragmentation of large + * reads; any other features must be implemented by the caller. + */ +static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) +{ + BlockDriverState *bs = child->bs; + int64_t total_bytes, max_bytes; + int ret = 0; + uint64_t bytes_remaining = bytes; + int max_transfer; + + assert(is_power_of_2(align)); + assert((offset & (align - 1)) == 0); + assert((bytes & (align - 1)) == 0); + assert((bs->open_flags & BDRV_O_NO_IO) == 0); + max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), + align); + + /* TODO: We would need a per-BDS .supported_read_flags and + * potential fallback support, if we ever implement any read flags + * to pass through to drivers. For now, there aren't any + * passthrough flags. */ + assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH))); + + /* Handle Copy on Read and associated serialisation */ + if (flags & BDRV_REQ_COPY_ON_READ) { + /* If we touch the same cluster it counts as an overlap. This + * guarantees that allocating writes will be serialized and not race + * with each other for the same cluster. For example, in copy-on-read + * it ensures that the CoR read and write operations are atomic and + * guest writes cannot interleave between them. */ + bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } else { + bdrv_wait_serialising_requests(req); + } + + if (flags & BDRV_REQ_COPY_ON_READ) { + int64_t pnum; + + ret = bdrv_is_allocated(bs, offset, bytes, &pnum); + if (ret < 0) { + goto out; + } + + if (!ret || pnum != bytes) { + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, + qiov, qiov_offset, flags); + goto out; + } else if (flags & BDRV_REQ_PREFETCH) { + goto out; + } + } + + /* Forward the request to the BlockDriver, possibly fragmenting it */ + total_bytes = bdrv_getlength(bs); + if (total_bytes < 0) { + ret = total_bytes; + goto out; + } + + max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); + if (bytes <= max_bytes && bytes <= max_transfer) { + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0); + goto out; + } + + while (bytes_remaining) { + int num; + + if (max_bytes) { + num = MIN(bytes_remaining, MIN(max_bytes, max_transfer)); + assert(num); + + ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, + num, qiov, + qiov_offset + bytes - bytes_remaining, 0); + max_bytes -= num; + } else { + num = bytes_remaining; + ret = qemu_iovec_memset(qiov, qiov_offset + bytes - bytes_remaining, + 0, bytes_remaining); + } + if (ret < 0) { + goto out; + } + bytes_remaining -= num; + } + +out: + return ret < 0 ? ret : 0; +} + +/* + * Request padding + * + * |<---- align ----->| |<----- align ---->| + * |<- head ->|<------------- bytes ------------->|<-- tail -->| + * | | | | | | + * -*----------$-------*-------- ... --------*-----$------------*--- + * | | | | | | + * | offset | | end | + * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) + * [buf ... ) [tail_buf ) + * + * @buf is an aligned allocation needed to store @head and @tail paddings. @head + * is placed at the beginning of @buf and @tail at the @end. + * + * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk + * around tail, if tail exists. + * + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. + */ +typedef struct BdrvRequestPadding { + uint8_t *buf; + size_t buf_len; + uint8_t *tail_buf; + size_t head; + size_t tail; + bool merge_reads; + QEMUIOVector local_qiov; +} BdrvRequestPadding; + +static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestPadding *pad) +{ + uint64_t align = bs->bl.request_alignment; + size_t sum; + + memset(pad, 0, sizeof(*pad)); + + pad->head = offset & (align - 1); + pad->tail = ((offset + bytes) & (align - 1)); + if (pad->tail) { + pad->tail = align - pad->tail; + } + + if (!pad->head && !pad->tail) { + return false; + } + + assert(bytes); /* Nothing good in aligning zero-length requests */ + + sum = pad->head + bytes + pad->tail; + pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; + pad->buf = qemu_blockalign(bs, pad->buf_len); + pad->merge_reads = sum == pad->buf_len; + if (pad->tail) { + pad->tail_buf = pad->buf + pad->buf_len - align; + } + + return true; +} + +static int bdrv_padding_rmw_read(BdrvChild *child, + BdrvTrackedRequest *req, + BdrvRequestPadding *pad, + bool zero_middle) +{ + QEMUIOVector local_qiov; + BlockDriverState *bs = child->bs; + uint64_t align = bs->bl.request_alignment; + int ret; + + assert(req->serialising && pad->buf); + + if (pad->head || pad->merge_reads) { + uint64_t bytes = pad->merge_reads ? pad->buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); + + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + } + ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, + align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + + if (pad->merge_reads) { + goto zero_mem; + } + } + + if (pad->tail) { + qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); + + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv( + child, req, + req->overlap_offset + req->overlap_bytes - align, + align, align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + +zero_mem: + if (zero_middle) { + memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); + } + + return 0; +} + +static void bdrv_padding_destroy(BdrvRequestPadding *pad) +{ + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); + } +} + +/* + * bdrv_pad_request + * + * Exchange request parameters with padded request if needed. Don't include RMW + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * + * All parameters except @bs are in-out: they represent original request at + * function call and padded (if padding needed) at function finish. + * + * Function always succeeds. + */ +static bool bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, unsigned int *bytes, + BdrvRequestPadding *pad) +{ + if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { + return false; + } + + qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, + *qiov, *qiov_offset, *bytes, + pad->buf + pad->buf_len - pad->tail, pad->tail); + *bytes += pad->head + pad->tail; + *offset -= pad->head; + *qiov = &pad->local_qiov; + *qiov_offset = 0; + + return true; +} + +int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + BlockDriverState *bs = child->bs; + BdrvTrackedRequest req; + BdrvRequestPadding pad; + int ret; + + trace_bdrv_co_preadv(bs, offset, bytes, flags); + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { + return ret; + } + + if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) { + /* + * Aligning zero request is nonsense. Even if driver has special meaning + * of zero-length (like qcow2_co_pwritev_compressed_part), we can't pass + * it to driver due to request_alignment. + * + * Still, no reason to return an error if someone do unaligned + * zero-length read occasionally. + */ + return 0; + } + + bdrv_inc_in_flight(bs); + + /* Don't do copy-on-read if we read data before write operation */ + if (qatomic_read(&bs->copy_on_read)) { + flags |= BDRV_REQ_COPY_ON_READ; + } + + bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad); + + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); + ret = bdrv_aligned_preadv(child, &req, offset, bytes, + bs->bl.request_alignment, + qiov, qiov_offset, flags); + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + + bdrv_padding_destroy(&pad); + + return ret; +} + +static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + BlockDriver *drv = bs->drv; + QEMUIOVector qiov; + void *buf = NULL; + int ret = 0; + bool need_flush = false; + int head = 0; + int tail = 0; + + int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); + int alignment = MAX(bs->bl.pwrite_zeroes_alignment, + bs->bl.request_alignment); + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); + + if (!drv) { + return -ENOMEDIUM; + } + + if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) { + return -ENOTSUP; + } + + assert(alignment % bs->bl.request_alignment == 0); + head = offset % alignment; + tail = (offset + bytes) % alignment; + max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment); + assert(max_write_zeroes >= bs->bl.request_alignment); + + while (bytes > 0 && !ret) { + int num = bytes; + + /* Align request. Block drivers can expect the "bulk" of the request + * to be aligned, and that unaligned requests do not cross cluster + * boundaries. + */ + if (head) { + /* Make a small request up to the first aligned sector. For + * convenience, limit this request to max_transfer even if + * we don't need to fall back to writes. */ + num = MIN(MIN(bytes, max_transfer), alignment - head); + head = (head + num) % alignment; + assert(num < max_write_zeroes); + } else if (tail && num > alignment) { + /* Shorten the request to the last aligned sector. */ + num -= tail; + } + + /* limit request size */ + if (num > max_write_zeroes) { + num = max_write_zeroes; + } + + ret = -ENOTSUP; + /* First try the efficient write zeroes operation */ + if (drv->bdrv_co_pwrite_zeroes) { + ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num, + flags & bs->supported_zero_flags); + if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) && + !(bs->supported_zero_flags & BDRV_REQ_FUA)) { + need_flush = true; + } + } else { + assert(!bs->supported_zero_flags); + } + + if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) { + /* Fall back to bounce buffer if write zeroes is unsupported */ + BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; + + if ((flags & BDRV_REQ_FUA) && + !(bs->supported_write_flags & BDRV_REQ_FUA)) { + /* No need for bdrv_driver_pwrite() to do a fallback + * flush on each chunk; use just one at the end */ + write_flags &= ~BDRV_REQ_FUA; + need_flush = true; + } + num = MIN(num, max_transfer); + if (buf == NULL) { + buf = qemu_try_blockalign0(bs, num); + if (buf == NULL) { + ret = -ENOMEM; + goto fail; + } + } + qemu_iovec_init_buf(&qiov, buf, num); + + ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags); + + /* Keep bounce buffer around if it is big enough for all + * all future requests. + */ + if (num < max_transfer) { + qemu_vfree(buf); + buf = NULL; + } + } + + offset += num; + bytes -= num; + } + +fail: + if (ret == 0 && need_flush) { + ret = bdrv_co_flush(bs); + } + qemu_vfree(buf); + return ret; +} + +static inline int coroutine_fn +bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, + BdrvTrackedRequest *req, int flags) +{ + BlockDriverState *bs = child->bs; + bool waited; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + + if (bs->read_only) { + return -EPERM; + } + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); + + if (flags & BDRV_REQ_SERIALISING) { + waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + /* + * For a misaligned request we should have already waited earlier, + * because we come after bdrv_padding_rmw_read which must be called + * with the request already marked as serialising. + */ + assert(!waited || + (req->offset == req->overlap_offset && + req->bytes == req->overlap_bytes)); + } else { + bdrv_wait_serialising_requests(req); + } + + assert(req->overlap_offset <= offset); + assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); + + switch (req->type) { + case BDRV_TRACKED_WRITE: + case BDRV_TRACKED_DISCARD: + if (flags & BDRV_REQ_WRITE_UNCHANGED) { + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + } else { + assert(child->perm & BLK_PERM_WRITE); + } + return notifier_with_return_list_notify(&bs->before_write_notifiers, + req); + case BDRV_TRACKED_TRUNCATE: + assert(child->perm & BLK_PERM_RESIZE); + return 0; + default: + abort(); + } +} + +static inline void coroutine_fn +bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes, + BdrvTrackedRequest *req, int ret) +{ + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + BlockDriverState *bs = child->bs; + + qatomic_inc(&bs->write_gen); + + /* + * Discard cannot extend the image, but in error handling cases, such as + * when reverting a qcow2 cluster allocation, the discarded range can pass + * the end of image file, so we cannot assert about BDRV_TRACKED_DISCARD + * here. Instead, just skip it, since semantically a discard request + * beyond EOF cannot expand the image anyway. + */ + if (ret == 0 && + (req->type == BDRV_TRACKED_TRUNCATE || + end_sector > bs->total_sectors) && + req->type != BDRV_TRACKED_DISCARD) { + bs->total_sectors = end_sector; + bdrv_parent_cb_resize(bs); + bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS); + } + if (req->bytes) { + switch (req->type) { + case BDRV_TRACKED_WRITE: + stat64_max(&bs->wr_highest_offset, offset + bytes); + /* fall through, to set dirty bits */ + case BDRV_TRACKED_DISCARD: + bdrv_set_dirty(bs, offset, bytes); + break; + default: + break; + } + } +} + +/* + * Forwards an already correctly aligned write request to the BlockDriver, + * after possibly fragmenting it. + */ +static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) +{ + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + int ret; + + uint64_t bytes_remaining = bytes; + int max_transfer; + + if (!drv) { + return -ENOMEDIUM; + } + + if (bdrv_has_readonly_bitmaps(bs)) { + return -EPERM; + } + + assert(is_power_of_2(align)); + assert((offset & (align - 1)) == 0); + assert((bytes & (align - 1)) == 0); + assert(!qiov || qiov_offset + bytes <= qiov->size); + max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), + align); + + ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags); + + if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && + !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && + qemu_iovec_is_zero(qiov, qiov_offset, bytes)) { + flags |= BDRV_REQ_ZERO_WRITE; + if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { + flags |= BDRV_REQ_MAY_UNMAP; + } + } + + if (ret < 0) { + /* Do nothing, write notifier decided to fail this request */ + } else if (flags & BDRV_REQ_ZERO_WRITE) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); + ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); + } else if (flags & BDRV_REQ_WRITE_COMPRESSED) { + ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, + qiov, qiov_offset); + } else if (bytes <= max_transfer) { + bdrv_debug_event(bs, BLKDBG_PWRITEV); + ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags); + } else { + bdrv_debug_event(bs, BLKDBG_PWRITEV); + while (bytes_remaining) { + int num = MIN(bytes_remaining, max_transfer); + int local_flags = flags; + + assert(num); + if (num < bytes_remaining && (flags & BDRV_REQ_FUA) && + !(bs->supported_write_flags & BDRV_REQ_FUA)) { + /* If FUA is going to be emulated by flush, we only + * need to flush on the last iteration */ + local_flags &= ~BDRV_REQ_FUA; + } + + ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, + num, qiov, + qiov_offset + bytes - bytes_remaining, + local_flags); + if (ret < 0) { + break; + } + bytes_remaining -= num; + } + } + bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); + + if (ret >= 0) { + ret = 0; + } + bdrv_co_write_req_finish(child, offset, bytes, req, ret); + + return ret; +} + +static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + int64_t offset, + unsigned int bytes, + BdrvRequestFlags flags, + BdrvTrackedRequest *req) +{ + BlockDriverState *bs = child->bs; + QEMUIOVector local_qiov; + uint64_t align = bs->bl.request_alignment; + int ret = 0; + bool padding; + BdrvRequestPadding pad; + + padding = bdrv_init_padding(bs, offset, bytes, &pad); + if (padding) { + bdrv_mark_request_serialising(req, align); + + bdrv_padding_rmw_read(child, req, &pad, true); + + if (pad.head || pad.merge_reads) { + int64_t aligned_offset = offset & ~(align - 1); + int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); + ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, + align, &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); + if (ret < 0 || pad.merge_reads) { + /* Error or all work is done */ + goto out; + } + offset += write_bytes - pad.head; + bytes -= write_bytes - pad.head; + } + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes >= align) { + /* Write the aligned part in the middle. */ + uint64_t aligned_bytes = bytes & ~(align - 1); + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, + NULL, 0, flags); + if (ret < 0) { + goto out; + } + bytes -= aligned_bytes; + offset += aligned_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes) { + assert(align == pad.tail + bytes); + + qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); + ret = bdrv_aligned_pwritev(child, req, offset, align, align, + &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); + } + +out: + bdrv_padding_destroy(&pad); + + return ret; +} + +/* + * Handle a write request in coroutine context + */ +int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + BlockDriverState *bs = child->bs; + BdrvTrackedRequest req; + uint64_t align = bs->bl.request_alignment; + BdrvRequestPadding pad; + int ret; + + trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); + + if (!bs->drv) { + return -ENOMEDIUM; + } + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { + return ret; + } + + /* If the request is misaligned then we can't make it efficient */ + if ((flags & BDRV_REQ_NO_FALLBACK) && + !QEMU_IS_ALIGNED(offset | bytes, align)) + { + return -ENOTSUP; + } + + if (bytes == 0 && !QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)) { + /* + * Aligning zero request is nonsense. Even if driver has special meaning + * of zero-length (like qcow2_co_pwritev_compressed_part), we can't pass + * it to driver due to request_alignment. + * + * Still, no reason to return an error if someone do unaligned + * zero-length write occasionally. + */ + return 0; + } + + bdrv_inc_in_flight(bs); + /* + * Align write if necessary by performing a read-modify-write cycle. + * Pad qiov with the read parts and be sure to have a tracked request not + * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. + */ + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); + + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); + goto out; + } + + if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { + bdrv_mark_request_serialising(&req, align); + bdrv_padding_rmw_read(child, &req, &pad, false); + } + + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, + qiov, qiov_offset, flags); + + bdrv_padding_destroy(&pad); + +out: + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + + return ret; +} + +int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags); + + if (!(child->bs->open_flags & BDRV_O_UNMAP)) { + flags &= ~BDRV_REQ_MAY_UNMAP; + } + + return bdrv_co_pwritev(child, offset, bytes, NULL, + BDRV_REQ_ZERO_WRITE | flags); +} + +/* + * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not. + */ +int bdrv_flush_all(void) +{ + BdrvNextIterator it; + BlockDriverState *bs = NULL; + int result = 0; + + /* + * bdrv queue is managed by record/replay, + * creating new flush request for stopping + * the VM may break the determinism + */ + if (replay_events_enabled()) { + return result; + } + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_flush(bs); + if (ret < 0 && !result) { + result = ret; + } + aio_context_release(aio_context); + } + + return result; +} + +/* + * Returns the allocation status of the specified sectors. + * Drivers not implementing the functionality are assumed to not support + * backing files, hence all their sectors are reported as allocated. + * + * If 'want_zero' is true, the caller is querying for mapping + * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and + * _ZERO where possible; otherwise, the result favors larger 'pnum', + * with a focus on accurate BDRV_BLOCK_ALLOCATED. + * + * If 'offset' is beyond the end of the disk image the return value is + * BDRV_BLOCK_EOF and 'pnum' is set to 0. + * + * 'bytes' is the max value 'pnum' should be set to. If bytes goes + * beyond the end of the disk image it will be clamped; if 'pnum' is set to + * the end of the image, then the returned value will include BDRV_BLOCK_EOF. + * + * 'pnum' is set to the number of bytes (including and immediately + * following the specified offset) that are easily known to be in the + * same allocated/unallocated state. Note that a second call starting + * at the original offset plus returned pnum may have the same status. + * The returned value is non-zero on success except at end-of-file. + * + * Returns negative errno on failure. Otherwise, if the + * BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are + * set to the host mapping and BDS corresponding to the guest offset. + */ +static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + int64_t total_size; + int64_t n; /* bytes */ + int ret; + int64_t local_map = 0; + BlockDriverState *local_file = NULL; + int64_t aligned_offset, aligned_bytes; + uint32_t align; + bool has_filtered_child; + + assert(pnum); + *pnum = 0; + total_size = bdrv_getlength(bs); + if (total_size < 0) { + ret = total_size; + goto early_out; + } + + if (offset >= total_size) { + ret = BDRV_BLOCK_EOF; + goto early_out; + } + if (!bytes) { + ret = 0; + goto early_out; + } + + n = total_size - offset; + if (n < bytes) { + bytes = n; + } + + /* Must be non-NULL or bdrv_getlength() would have failed */ + assert(bs->drv); + has_filtered_child = bdrv_filter_child(bs); + if (!bs->drv->bdrv_co_block_status && !has_filtered_child) { + *pnum = bytes; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; + if (offset + bytes == total_size) { + ret |= BDRV_BLOCK_EOF; + } + if (bs->drv->protocol_name) { + ret |= BDRV_BLOCK_OFFSET_VALID; + local_map = offset; + local_file = bs; + } + goto early_out; + } + + bdrv_inc_in_flight(bs); + + /* Round out to request_alignment boundaries */ + align = bs->bl.request_alignment; + aligned_offset = QEMU_ALIGN_DOWN(offset, align); + aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; + + if (bs->drv->bdrv_co_block_status) { + ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + aligned_bytes, pnum, &local_map, + &local_file); + } else { + /* Default code for filters */ + + local_file = bdrv_filter_bs(bs); + assert(local_file); + + *pnum = aligned_bytes; + local_map = aligned_offset; + ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; + } + if (ret < 0) { + *pnum = 0; + goto out; + } + + /* + * The driver's result must be a non-zero multiple of request_alignment. + * Clamp pnum and adjust map to original request. + */ + assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) && + align > offset - aligned_offset); + if (ret & BDRV_BLOCK_RECURSE) { + assert(ret & BDRV_BLOCK_DATA); + assert(ret & BDRV_BLOCK_OFFSET_VALID); + assert(!(ret & BDRV_BLOCK_ZERO)); + } + + *pnum -= offset - aligned_offset; + if (*pnum > bytes) { + *pnum = bytes; + } + if (ret & BDRV_BLOCK_OFFSET_VALID) { + local_map += offset - aligned_offset; + } + + if (ret & BDRV_BLOCK_RAW) { + assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); + ret = bdrv_co_block_status(local_file, want_zero, local_map, + *pnum, pnum, &local_map, &local_file); + goto out; + } + + if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { + ret |= BDRV_BLOCK_ALLOCATED; + } else if (bs->drv->supports_backing) { + BlockDriverState *cow_bs = bdrv_cow_bs(bs); + + if (!cow_bs) { + ret |= BDRV_BLOCK_ZERO; + } else if (want_zero) { + int64_t size2 = bdrv_getlength(cow_bs); + + if (size2 >= 0 && offset >= size2) { + ret |= BDRV_BLOCK_ZERO; + } + } + } + + if (want_zero && ret & BDRV_BLOCK_RECURSE && + local_file && local_file != bs && + (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && + (ret & BDRV_BLOCK_OFFSET_VALID)) { + int64_t file_pnum; + int ret2; + + ret2 = bdrv_co_block_status(local_file, want_zero, local_map, + *pnum, &file_pnum, NULL, NULL); + if (ret2 >= 0) { + /* Ignore errors. This is just providing extra information, it + * is useful but not necessary. + */ + if (ret2 & BDRV_BLOCK_EOF && + (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) { + /* + * It is valid for the format block driver to read + * beyond the end of the underlying file's current + * size; such areas read as zero. + */ + ret |= BDRV_BLOCK_ZERO; + } else { + /* Limit request to the range reported by the protocol driver */ + *pnum = file_pnum; + ret |= (ret2 & BDRV_BLOCK_ZERO); + } + } + } + +out: + bdrv_dec_in_flight(bs); + if (ret >= 0 && offset + *pnum == total_size) { + ret |= BDRV_BLOCK_EOF; + } +early_out: + if (file) { + *file = local_file; + } + if (map) { + *map = local_map; + } + return ret; +} + +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool include_base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file, + int *depth) +{ + int ret; + BlockDriverState *p; + int64_t eof = 0; + int dummy; + + assert(!include_base || base); /* Can't include NULL base */ + + if (!depth) { + depth = &dummy; + } + *depth = 0; + + if (!include_base && bs == base) { + *pnum = bytes; + return 0; + } + + ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file); + ++*depth; + if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { + return ret; + } + + if (ret & BDRV_BLOCK_EOF) { + eof = offset + *pnum; + } + + assert(*pnum <= bytes); + bytes = *pnum; + + for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; + p = bdrv_filter_or_cow_bs(p)) + { + ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map, + file); + ++*depth; + if (ret < 0) { + return ret; + } + if (*pnum == 0) { + /* + * The top layer deferred to this layer, and because this layer is + * short, any zeroes that we synthesize beyond EOF behave as if they + * were allocated at this layer. + * + * We don't include BDRV_BLOCK_EOF into ret, as upper layer may be + * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see + * below. + */ + assert(ret & BDRV_BLOCK_EOF); + *pnum = bytes; + if (file) { + *file = p; + } + ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED; + break; + } + if (ret & BDRV_BLOCK_ALLOCATED) { + /* + * We've found the node and the status, we must break. + * + * Drop BDRV_BLOCK_EOF, as it's not for upper layer, which may be + * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see + * below. + */ + ret &= ~BDRV_BLOCK_EOF; + break; + } + + if (p == base) { + assert(include_base); + break; + } + + /* + * OK, [offset, offset + *pnum) region is unallocated on this layer, + * let's continue the diving. + */ + assert(*pnum <= bytes); + bytes = *pnum; + } + + if (offset + *pnum == eof) { + ret |= BDRV_BLOCK_EOF; + } + + return ret; +} + +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) +{ + return bdrv_common_block_status_above(bs, base, false, true, offset, bytes, + pnum, map, file, NULL); +} + +int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) +{ + return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs), + offset, bytes, pnum, map, file); +} + +/* + * Check @bs (and its backing chain) to see if the range defined + * by @offset and @bytes is known to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data. + */ +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes) +{ + int ret; + int64_t pnum = bytes; + + if (!bytes) { + return 1; + } + + ret = bdrv_common_block_status_above(bs, NULL, false, false, offset, + bytes, &pnum, NULL, NULL, NULL); + + if (ret < 0) { + return ret; + } + + return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); +} + +int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + int ret; + int64_t dummy; + + ret = bdrv_common_block_status_above(bs, bs, true, false, offset, + bytes, pnum ? pnum : &dummy, NULL, + NULL, NULL); + if (ret < 0) { + return ret; + } + return !!(ret & BDRV_BLOCK_ALLOCATED); +} + +/* + * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] + * + * Return a positive depth if (a prefix of) the given range is allocated + * in any image between BASE and TOP (BASE is only included if include_base + * is set). Depth 1 is TOP, 2 is the first backing layer, and so forth. + * BASE can be NULL to check if the given offset is allocated in any + * image of the chain. Return 0 otherwise, or negative errno on + * failure. + * + * 'pnum' is set to the number of bytes (including and immediately + * following the specified offset) that are known to be in the same + * allocated/unallocated state. Note that a subsequent call starting + * at 'offset + *pnum' may return the same allocation status (in other + * words, the result is not necessarily the maximum possible range); + * but 'pnum' will only be 0 when end of file is reached. + */ +int bdrv_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + int depth; + int ret = bdrv_common_block_status_above(top, base, include_base, false, + offset, bytes, pnum, NULL, NULL, + &depth); + if (ret < 0) { + return ret; + } + + if (ret & BDRV_BLOCK_ALLOCATED) { + return depth; + } + return 0; +} + +int coroutine_fn +bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *child_bs = bdrv_primary_bs(bs); + int ret = -ENOTSUP; + + if (!drv) { + return -ENOMEDIUM; + } + + bdrv_inc_in_flight(bs); + + if (drv->bdrv_load_vmstate) { + ret = drv->bdrv_load_vmstate(bs, qiov, pos); + } else if (child_bs) { + ret = bdrv_co_readv_vmstate(child_bs, qiov, pos); + } + + bdrv_dec_in_flight(bs); + + return ret; +} + +int coroutine_fn +bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *child_bs = bdrv_primary_bs(bs); + int ret = -ENOTSUP; + + if (!drv) { + return -ENOMEDIUM; + } + + bdrv_inc_in_flight(bs); + + if (drv->bdrv_save_vmstate) { + ret = drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (child_bs) { + ret = bdrv_co_writev_vmstate(child_bs, qiov, pos); + } + + bdrv_dec_in_flight(bs); + + return ret; +} + +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); + int ret = bdrv_writev_vmstate(bs, &qiov, pos); + + return ret < 0 ? ret : size; +} + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); + int ret = bdrv_readv_vmstate(bs, &qiov, pos); + + return ret < 0 ? ret : size; +} + +/**************************************************************/ +/* async I/Os */ + +void bdrv_aio_cancel(BlockAIOCB *acb) +{ + qemu_aio_ref(acb); + bdrv_aio_cancel_async(acb); + while (acb->refcnt > 1) { + if (acb->aiocb_info->get_aio_context) { + aio_poll(acb->aiocb_info->get_aio_context(acb), true); + } else if (acb->bs) { + /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so + * assert that we're not using an I/O thread. Thread-safe + * code should use bdrv_aio_cancel_async exclusively. + */ + assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context()); + aio_poll(bdrv_get_aio_context(acb->bs), true); + } else { + abort(); + } + } + qemu_aio_unref(acb); +} + +/* Async version of aio cancel. The caller is not blocked if the acb implements + * cancel_async, otherwise we do nothing and let the request normally complete. + * In either case the completion callback must be called. */ +void bdrv_aio_cancel_async(BlockAIOCB *acb) +{ + if (acb->aiocb_info->cancel_async) { + acb->aiocb_info->cancel_async(acb); + } +} + +/**************************************************************/ +/* Coroutine block device emulation */ + +int coroutine_fn bdrv_co_flush(BlockDriverState *bs) +{ + BdrvChild *primary_child = bdrv_primary_child(bs); + BdrvChild *child; + int current_gen; + int ret = 0; + + bdrv_inc_in_flight(bs); + + if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || + bdrv_is_sg(bs)) { + goto early_exit; + } + + qemu_co_mutex_lock(&bs->reqs_lock); + current_gen = qatomic_read(&bs->write_gen); + + /* Wait until any previous flushes are completed */ + while (bs->active_flush_req) { + qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock); + } + + /* Flushes reach this point in nondecreasing current_gen order. */ + bs->active_flush_req = true; + qemu_co_mutex_unlock(&bs->reqs_lock); + + /* Write back all layers by calling one driver function */ + if (bs->drv->bdrv_co_flush) { + ret = bs->drv->bdrv_co_flush(bs); + goto out; + } + + /* Write back cached data to the OS even with cache=unsafe */ + BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS); + if (bs->drv->bdrv_co_flush_to_os) { + ret = bs->drv->bdrv_co_flush_to_os(bs); + if (ret < 0) { + goto out; + } + } + + /* But don't actually force it to the disk with cache=unsafe */ + if (bs->open_flags & BDRV_O_NO_FLUSH) { + goto flush_children; + } + + /* Check if we really need to flush anything */ + if (bs->flushed_gen == current_gen) { + goto flush_children; + } + + BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK); + if (!bs->drv) { + /* bs->drv->bdrv_co_flush() might have ejected the BDS + * (even in case of apparent success) */ + ret = -ENOMEDIUM; + goto out; + } + if (bs->drv->bdrv_co_flush_to_disk) { + ret = bs->drv->bdrv_co_flush_to_disk(bs); + } else if (bs->drv->bdrv_aio_flush) { + BlockAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); + if (acb == NULL) { + ret = -EIO; + } else { + qemu_coroutine_yield(); + ret = co.ret; + } + } else { + /* + * Some block drivers always operate in either writethrough or unsafe + * mode and don't support bdrv_flush therefore. Usually qemu doesn't + * know how the server works (because the behaviour is hardcoded or + * depends on server-side configuration), so we can't ensure that + * everything is safe on disk. Returning an error doesn't work because + * that would break guests even if the server operates in writethrough + * mode. + * + * Let's hope the user knows what he's doing. + */ + ret = 0; + } + + if (ret < 0) { + goto out; + } + + /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH + * in the case of cache=unsafe, so there are no useless flushes. + */ +flush_children: + ret = 0; + QLIST_FOREACH(child, &bs->children, next) { + if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + int this_child_ret = bdrv_co_flush(child->bs); + if (!ret) { + ret = this_child_ret; + } + } + } + +out: + /* Notify any pending flushes that we have completed */ + if (ret == 0) { + bs->flushed_gen = current_gen; + } + + qemu_co_mutex_lock(&bs->reqs_lock); + bs->active_flush_req = false; + /* Return value is ignored - it's ok if wait queue is empty */ + qemu_co_queue_next(&bs->flush_queue); + qemu_co_mutex_unlock(&bs->reqs_lock); + +early_exit: + bdrv_dec_in_flight(bs); + return ret; +} + +int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes) +{ + BdrvTrackedRequest req; + int max_pdiscard, ret; + int head, tail, align; + BlockDriverState *bs = child->bs; + + if (!bs || !bs->drv || !bdrv_is_inserted(bs)) { + return -ENOMEDIUM; + } + + if (bdrv_has_readonly_bitmaps(bs)) { + return -EPERM; + } + + if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) { + return -EIO; + } + + /* Do nothing if disabled. */ + if (!(bs->open_flags & BDRV_O_UNMAP)) { + return 0; + } + + if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) { + return 0; + } + + /* Discard is advisory, but some devices track and coalesce + * unaligned requests, so we must pass everything down rather than + * round here. Still, most devices will just silently ignore + * unaligned requests (by returning -ENOTSUP), so we must fragment + * the request accordingly. */ + align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); + assert(align % bs->bl.request_alignment == 0); + head = offset % align; + tail = (offset + bytes) % align; + + bdrv_inc_in_flight(bs); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD); + + ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0); + if (ret < 0) { + goto out; + } + + max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), + align); + assert(max_pdiscard >= bs->bl.request_alignment); + + while (bytes > 0) { + int64_t num = bytes; + + if (head) { + /* Make small requests to get to alignment boundaries. */ + num = MIN(bytes, align - head); + if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) { + num %= bs->bl.request_alignment; + } + head = (head + num) % align; + assert(num < max_pdiscard); + } else if (tail) { + if (num > align) { + /* Shorten the request to the last aligned cluster. */ + num -= tail; + } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) && + tail > bs->bl.request_alignment) { + tail %= bs->bl.request_alignment; + num -= tail; + } + } + /* limit request size */ + if (num > max_pdiscard) { + num = max_pdiscard; + } + + if (!bs->drv) { + ret = -ENOMEDIUM; + goto out; + } + if (bs->drv->bdrv_co_pdiscard) { + ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); + } else { + BlockAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num, + bdrv_co_io_em_complete, &co); + if (acb == NULL) { + ret = -EIO; + goto out; + } else { + qemu_coroutine_yield(); + ret = co.ret; + } + } + if (ret && ret != -ENOTSUP) { + goto out; + } + + offset += num; + bytes -= num; + } + ret = 0; +out: + bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret); + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + return ret; +} + +int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) +{ + BlockDriver *drv = bs->drv; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockAIOCB *acb; + + bdrv_inc_in_flight(bs); + if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) { + co.ret = -ENOTSUP; + goto out; + } + + if (drv->bdrv_co_ioctl) { + co.ret = drv->bdrv_co_ioctl(bs, req, buf); + } else { + acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co); + if (!acb) { + co.ret = -ENOTSUP; + goto out; + } + qemu_coroutine_yield(); + } +out: + bdrv_dec_in_flight(bs); + return co.ret; +} + +void *qemu_blockalign(BlockDriverState *bs, size_t size) +{ + return qemu_memalign(bdrv_opt_mem_align(bs), size); +} + +void *qemu_blockalign0(BlockDriverState *bs, size_t size) +{ + return memset(qemu_blockalign(bs, size), 0, size); +} + +void *qemu_try_blockalign(BlockDriverState *bs, size_t size) +{ + size_t align = bdrv_opt_mem_align(bs); + + /* Ensure that NULL is never returned on success */ + assert(align > 0); + if (size == 0) { + size = align; + } + + return qemu_try_memalign(align, size); +} + +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) +{ + void *mem = qemu_try_blockalign(bs, size); + + if (mem) { + memset(mem, 0, size); + } + + return mem; +} + +/* + * Check if all memory in this vector is sector aligned. + */ +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) +{ + int i; + size_t alignment = bdrv_min_mem_align(bs); + + for (i = 0; i < qiov->niov; i++) { + if ((uintptr_t) qiov->iov[i].iov_base % alignment) { + return false; + } + if (qiov->iov[i].iov_len % alignment) { + return false; + } + } + + return true; +} + +void bdrv_add_before_write_notifier(BlockDriverState *bs, + NotifierWithReturn *notifier) +{ + notifier_with_return_list_add(&bs->before_write_notifiers, notifier); +} + +void bdrv_io_plug(BlockDriverState *bs) +{ + BdrvChild *child; + + QLIST_FOREACH(child, &bs->children, next) { + bdrv_io_plug(child->bs); + } + + if (qatomic_fetch_inc(&bs->io_plugged) == 0) { + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_io_plug) { + drv->bdrv_io_plug(bs); + } + } +} + +void bdrv_io_unplug(BlockDriverState *bs) +{ + BdrvChild *child; + + assert(bs->io_plugged); + if (qatomic_fetch_dec(&bs->io_plugged) == 1) { + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_io_unplug) { + drv->bdrv_io_unplug(bs); + } + } + + QLIST_FOREACH(child, &bs->children, next) { + bdrv_io_unplug(child->bs); + } +} + +void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size) +{ + BdrvChild *child; + + if (bs->drv && bs->drv->bdrv_register_buf) { + bs->drv->bdrv_register_buf(bs, host, size); + } + QLIST_FOREACH(child, &bs->children, next) { + bdrv_register_buf(child->bs, host, size); + } +} + +void bdrv_unregister_buf(BlockDriverState *bs, void *host) +{ + BdrvChild *child; + + if (bs->drv && bs->drv->bdrv_unregister_buf) { + bs->drv->bdrv_unregister_buf(bs, host); + } + QLIST_FOREACH(child, &bs->children, next) { + bdrv_unregister_buf(child->bs, host); + } +} + +static int coroutine_fn bdrv_co_copy_range_internal( + BdrvChild *src, uint64_t src_offset, BdrvChild *dst, + uint64_t dst_offset, uint64_t bytes, + BdrvRequestFlags read_flags, BdrvRequestFlags write_flags, + bool recurse_src) +{ + BdrvTrackedRequest req; + int ret; + + /* TODO We can support BDRV_REQ_NO_FALLBACK here */ + assert(!(read_flags & BDRV_REQ_NO_FALLBACK)); + assert(!(write_flags & BDRV_REQ_NO_FALLBACK)); + + if (!dst || !dst->bs) { + return -ENOMEDIUM; + } + ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes); + if (ret) { + return ret; + } + if (write_flags & BDRV_REQ_ZERO_WRITE) { + return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags); + } + + if (!src || !src->bs) { + return -ENOMEDIUM; + } + ret = bdrv_check_byte_request(src->bs, src_offset, bytes); + if (ret) { + return ret; + } + + if (!src->bs->drv->bdrv_co_copy_range_from + || !dst->bs->drv->bdrv_co_copy_range_to + || src->bs->encrypted || dst->bs->encrypted) { + return -ENOTSUP; + } + + if (recurse_src) { + bdrv_inc_in_flight(src->bs); + tracked_request_begin(&req, src->bs, src_offset, bytes, + BDRV_TRACKED_READ); + + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(read_flags & BDRV_REQ_SERIALISING)); + bdrv_wait_serialising_requests(&req); + + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, + src, src_offset, + dst, dst_offset, + bytes, + read_flags, write_flags); + + tracked_request_end(&req); + bdrv_dec_in_flight(src->bs); + } else { + bdrv_inc_in_flight(dst->bs); + tracked_request_begin(&req, dst->bs, dst_offset, bytes, + BDRV_TRACKED_WRITE); + ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req, + write_flags); + if (!ret) { + ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, + src, src_offset, + dst, dst_offset, + bytes, + read_flags, write_flags); + } + bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret); + tracked_request_end(&req); + bdrv_dec_in_flight(dst->bs); + } + + return ret; +} + +/* Copy range from @src to @dst. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. */ +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); + return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, + bytes, read_flags, write_flags, true); +} + +/* Copy range from @src to @dst. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. */ +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); + return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, + bytes, read_flags, write_flags, false); +} + +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + return bdrv_co_copy_range_from(src, src_offset, + dst, dst_offset, + bytes, read_flags, write_flags); +} + +static void bdrv_parent_cb_resize(BlockDriverState *bs) +{ + BdrvChild *c; + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->klass->resize) { + c->klass->resize(c); + } + } +} + +/** + * Truncate file to 'offset' bytes (needed only for file protocols) + * + * If 'exact' is true, the file must be resized to exactly the given + * 'offset'. Otherwise, it is sufficient for the node to be at least + * 'offset' bytes in length. + */ +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) +{ + BlockDriverState *bs = child->bs; + BdrvChild *filtered, *backing; + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; + int64_t old_size, new_bytes; + int ret; + + + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ + if (!drv) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + if (offset < 0) { + error_setg(errp, "Image size cannot be negative"); + return -EINVAL; + } + + old_size = bdrv_getlength(bs); + if (old_size < 0) { + error_setg_errno(errp, -old_size, "Failed to get old image size"); + return old_size; + } + + if (offset > old_size) { + new_bytes = offset - old_size; + } else { + new_bytes = 0; + } + + bdrv_inc_in_flight(bs); + tracked_request_begin(&req, bs, offset - new_bytes, new_bytes, + BDRV_TRACKED_TRUNCATE); + + /* If we are growing the image and potentially using preallocation for the + * new area, we need to make sure that no write requests are made to it + * concurrently or they might be overwritten by preallocation. */ + if (new_bytes) { + bdrv_mark_request_serialising(&req, 1); + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); + ret = -EACCES; + goto out; + } + ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req, + 0); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to prepare request for truncation"); + goto out; + } + + filtered = bdrv_filter_child(bs); + backing = bdrv_cow_child(bs); + + /* + * If the image has a backing file that is large enough that it would + * provide data for the new area, we cannot leave it unallocated because + * then the backing file content would become visible. Instead, zero-fill + * the new area. + * + * Note that if the image has a backing file, but was opened without the + * backing file, taking care of keeping things consistent with that backing + * file is the user's responsibility. + */ + if (new_bytes && backing) { + int64_t backing_len; + + backing_len = bdrv_getlength(backing->bs); + if (backing_len < 0) { + ret = backing_len; + error_setg_errno(errp, -ret, "Could not get backing file size"); + goto out; + } + + if (backing_len > old_size) { + flags |= BDRV_REQ_ZERO_WRITE; + } + } + + if (drv->bdrv_co_truncate) { + if (flags & ~bs->supported_truncate_flags) { + error_setg(errp, "Block driver does not support requested flags"); + ret = -ENOTSUP; + goto out; + } + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); + } else if (filtered) { + ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp); + } else { + error_setg(errp, "Image format driver does not support resize"); + ret = -ENOTSUP; + goto out; + } + if (ret < 0) { + goto out; + } + + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + } else { + offset = bs->total_sectors * BDRV_SECTOR_SIZE; + } + /* It's possible that truncation succeeded but refresh_total_sectors + * failed, but the latter doesn't affect how we should finish the request. + * Pass 0 as the last parameter so that dirty bitmaps etc. are handled. */ + bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0); + +out: + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + + return ret; +} diff --git a/block/io_uring.c b/block/io_uring.c new file mode 100644 index 000000000..00a3ee9fb --- /dev/null +++ b/block/io_uring.c @@ -0,0 +1,430 @@ +/* + * Linux io_uring support. + * + * Copyright (C) 2009 IBM, Corp. + * Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2019 Aarushi Mehta + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include "qemu-common.h" +#include "block/aio.h" +#include "qemu/queue.h" +#include "block/block.h" +#include "block/raw-aio.h" +#include "qemu/coroutine.h" +#include "qapi/error.h" +#include "trace.h" + +/* io_uring ring size */ +#define MAX_ENTRIES 128 + +typedef struct LuringAIOCB { + Coroutine *co; + struct io_uring_sqe sqeq; + ssize_t ret; + QEMUIOVector *qiov; + bool is_read; + QSIMPLEQ_ENTRY(LuringAIOCB) next; + + /* + * Buffered reads may require resubmission, see + * luring_resubmit_short_read(). + */ + int total_read; + QEMUIOVector resubmit_qiov; +} LuringAIOCB; + +typedef struct LuringQueue { + int plugged; + unsigned int in_queue; + unsigned int in_flight; + bool blocked; + QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue; +} LuringQueue; + +typedef struct LuringState { + AioContext *aio_context; + + struct io_uring ring; + + /* io queue for submit at batch. Protected by AioContext lock. */ + LuringQueue io_q; + + /* I/O completion processing. Only runs in I/O thread. */ + QEMUBH *completion_bh; +} LuringState; + +/** + * luring_resubmit: + * + * Resubmit a request by appending it to submit_queue. The caller must ensure + * that ioq_submit() is called later so that submit_queue requests are started. + */ +static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb) +{ + QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); + s->io_q.in_queue++; +} + +/** + * luring_resubmit_short_read: + * + * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async + * context") a buffered I/O request with the start of the file range in the + * page cache could result in a short read. Applications need to resubmit the + * remaining read request. + * + * This is a slow path but recent kernels never take it. + */ +static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, + int nread) +{ + QEMUIOVector *resubmit_qiov; + size_t remaining; + + trace_luring_resubmit_short_read(s, luringcb, nread); + + /* Update read position */ + luringcb->total_read = nread; + remaining = luringcb->qiov->size - luringcb->total_read; + + /* Shorten qiov */ + resubmit_qiov = &luringcb->resubmit_qiov; + if (resubmit_qiov->iov == NULL) { + qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov); + } else { + qemu_iovec_reset(resubmit_qiov); + } + qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read, + remaining); + + /* Update sqe */ + luringcb->sqeq.off = nread; + luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov; + luringcb->sqeq.len = luringcb->resubmit_qiov.niov; + + luring_resubmit(s, luringcb); +} + +/** + * luring_process_completions: + * @s: AIO state + * + * Fetches completed I/O requests, consumes cqes and invokes their callbacks + * The function is somewhat tricky because it supports nested event loops, for + * example when a request callback invokes aio_poll(). + * + * Function schedules BH completion so it can be called again in a nested + * event loop. When there are no events left to complete the BH is being + * canceled. + * + */ +static void luring_process_completions(LuringState *s) +{ + struct io_uring_cqe *cqes; + int total_bytes; + /* + * Request completion callbacks can run the nested event loop. + * Schedule ourselves so the nested event loop will "see" remaining + * completed requests and process them. Without this, completion + * callbacks that wait for other requests using a nested event loop + * would hang forever. + * + * This workaround is needed because io_uring uses poll_wait, which + * is woken up when new events are added to the uring, thus polling on + * the same uring fd will block unless more events are received. + * + * Other leaf block drivers (drivers that access the data themselves) + * are networking based, so they poll sockets for data and run the + * correct coroutine. + */ + qemu_bh_schedule(s->completion_bh); + + while (io_uring_peek_cqe(&s->ring, &cqes) == 0) { + LuringAIOCB *luringcb; + int ret; + + if (!cqes) { + break; + } + + luringcb = io_uring_cqe_get_data(cqes); + ret = cqes->res; + io_uring_cqe_seen(&s->ring, cqes); + cqes = NULL; + + /* Change counters one-by-one because we can be nested. */ + s->io_q.in_flight--; + trace_luring_process_completion(s, luringcb, ret); + + /* total_read is non-zero only for resubmitted read requests */ + total_bytes = ret + luringcb->total_read; + + if (ret < 0) { + if (ret == -EINTR) { + luring_resubmit(s, luringcb); + continue; + } + } else if (!luringcb->qiov) { + goto end; + } else if (total_bytes == luringcb->qiov->size) { + ret = 0; + /* Only read/write */ + } else { + /* Short Read/Write */ + if (luringcb->is_read) { + if (ret > 0) { + luring_resubmit_short_read(s, luringcb, ret); + continue; + } else { + /* Pad with zeroes */ + qemu_iovec_memset(luringcb->qiov, total_bytes, 0, + luringcb->qiov->size - total_bytes); + ret = 0; + } + } else { + ret = -ENOSPC; + } + } +end: + luringcb->ret = ret; + qemu_iovec_destroy(&luringcb->resubmit_qiov); + + /* + * If the coroutine is already entered it must be in ioq_submit() + * and will notice luringcb->ret has been filled in when it + * eventually runs later. Coroutines cannot be entered recursively + * so avoid doing that! + */ + if (!qemu_coroutine_entered(luringcb->co)) { + aio_co_wake(luringcb->co); + } + } + qemu_bh_cancel(s->completion_bh); +} + +static int ioq_submit(LuringState *s) +{ + int ret = 0; + LuringAIOCB *luringcb, *luringcb_next; + + while (s->io_q.in_queue > 0) { + /* + * Try to fetch sqes from the ring for requests waiting in + * the overflow queue + */ + QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next, + luringcb_next) { + struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring); + if (!sqes) { + break; + } + /* Prep sqe for submission */ + *sqes = luringcb->sqeq; + QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next); + } + ret = io_uring_submit(&s->ring); + trace_luring_io_uring_submit(s, ret); + /* Prevent infinite loop if submission is refused */ + if (ret <= 0) { + if (ret == -EAGAIN || ret == -EINTR) { + continue; + } + break; + } + s->io_q.in_flight += ret; + s->io_q.in_queue -= ret; + } + s->io_q.blocked = (s->io_q.in_queue > 0); + + if (s->io_q.in_flight) { + /* + * We can try to complete something just right away if there are + * still requests in-flight. + */ + luring_process_completions(s); + } + return ret; +} + +static void luring_process_completions_and_submit(LuringState *s) +{ + aio_context_acquire(s->aio_context); + luring_process_completions(s); + + if (!s->io_q.plugged && s->io_q.in_queue > 0) { + ioq_submit(s); + } + aio_context_release(s->aio_context); +} + +static void qemu_luring_completion_bh(void *opaque) +{ + LuringState *s = opaque; + luring_process_completions_and_submit(s); +} + +static void qemu_luring_completion_cb(void *opaque) +{ + LuringState *s = opaque; + luring_process_completions_and_submit(s); +} + +static bool qemu_luring_poll_cb(void *opaque) +{ + LuringState *s = opaque; + + if (io_uring_cq_ready(&s->ring)) { + luring_process_completions_and_submit(s); + return true; + } + + return false; +} + +static void ioq_init(LuringQueue *io_q) +{ + QSIMPLEQ_INIT(&io_q->submit_queue); + io_q->plugged = 0; + io_q->in_queue = 0; + io_q->in_flight = 0; + io_q->blocked = false; +} + +void luring_io_plug(BlockDriverState *bs, LuringState *s) +{ + trace_luring_io_plug(s); + s->io_q.plugged++; +} + +void luring_io_unplug(BlockDriverState *bs, LuringState *s) +{ + assert(s->io_q.plugged); + trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged, + s->io_q.in_queue, s->io_q.in_flight); + if (--s->io_q.plugged == 0 && + !s->io_q.blocked && s->io_q.in_queue > 0) { + ioq_submit(s); + } +} + +/** + * luring_do_submit: + * @fd: file descriptor for I/O + * @luringcb: AIO control block + * @s: AIO state + * @offset: offset for request + * @type: type of request + * + * Fetches sqes from ring, adds to pending queue and preps them + * + */ +static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, + uint64_t offset, int type) +{ + int ret; + struct io_uring_sqe *sqes = &luringcb->sqeq; + + switch (type) { + case QEMU_AIO_WRITE: + io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, + luringcb->qiov->niov, offset); + break; + case QEMU_AIO_READ: + io_uring_prep_readv(sqes, fd, luringcb->qiov->iov, + luringcb->qiov->niov, offset); + break; + case QEMU_AIO_FLUSH: + io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC); + break; + default: + fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n", + __func__, type); + abort(); + } + io_uring_sqe_set_data(sqes, luringcb); + + QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); + s->io_q.in_queue++; + trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged, + s->io_q.in_queue, s->io_q.in_flight); + if (!s->io_q.blocked && + (!s->io_q.plugged || + s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) { + ret = ioq_submit(s); + trace_luring_do_submit_done(s, ret); + return ret; + } + return 0; +} + +int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type) +{ + int ret; + LuringAIOCB luringcb = { + .co = qemu_coroutine_self(), + .ret = -EINPROGRESS, + .qiov = qiov, + .is_read = (type == QEMU_AIO_READ), + }; + trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0, + type); + ret = luring_do_submit(fd, &luringcb, s, offset, type); + + if (ret < 0) { + return ret; + } + + if (luringcb.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + return luringcb.ret; +} + +void luring_detach_aio_context(LuringState *s, AioContext *old_context) +{ + aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL, + s); + qemu_bh_delete(s->completion_bh); + s->aio_context = NULL; +} + +void luring_attach_aio_context(LuringState *s, AioContext *new_context) +{ + s->aio_context = new_context; + s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); + aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, + qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s); +} + +LuringState *luring_init(Error **errp) +{ + int rc; + LuringState *s = g_new0(LuringState, 1); + struct io_uring *ring = &s->ring; + + trace_luring_init_state(s, sizeof(*s)); + + rc = io_uring_queue_init(MAX_ENTRIES, ring, 0); + if (rc < 0) { + error_setg_errno(errp, errno, "failed to init linux io_uring ring"); + g_free(s); + return NULL; + } + + ioq_init(&s->io_q); + return s; + +} + +void luring_cleanup(LuringState *s) +{ + io_uring_queue_exit(&s->ring); + trace_luring_cleanup_state(s); + g_free(s); +} diff --git a/block/iscsi-opts.c b/block/iscsi-opts.c new file mode 100644 index 000000000..afaf8837d --- /dev/null +++ b/block/iscsi-opts.c @@ -0,0 +1,70 @@ +/* + * QEMU Block driver for iSCSI images (static options) + * + * Copyright (c) 2017 Peter Lieven + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/config-file.h" +#include "qemu/module.h" +#include "qemu/option.h" + +static QemuOptsList qemu_iscsi_opts = { + .name = "iscsi", + .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head), + .desc = { + { + .name = "user", + .type = QEMU_OPT_STRING, + .help = "username for CHAP authentication to target", + },{ + .name = "password", + .type = QEMU_OPT_STRING, + .help = "password for CHAP authentication to target", + },{ + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of the secret providing password for CHAP " + "authentication to target", + },{ + .name = "header-digest", + .type = QEMU_OPT_STRING, + .help = "HeaderDigest setting. " + "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}", + },{ + .name = "initiator-name", + .type = QEMU_OPT_STRING, + .help = "Initiator iqn name to use when connecting", + },{ + .name = "timeout", + .type = QEMU_OPT_NUMBER, + .help = "Request timeout in seconds (default 0 = no timeout)", + }, + { /* end of list */ } + }, +}; + +static void iscsi_block_opts_init(void) +{ + qemu_add_opts(&qemu_iscsi_opts); +} + +block_init(iscsi_block_opts_init); diff --git a/block/iscsi.c b/block/iscsi.c new file mode 100644 index 000000000..e30a7e360 --- /dev/null +++ b/block/iscsi.c @@ -0,0 +1,2498 @@ +/* + * QEMU Block driver for iSCSI images + * + * Copyright (c) 2010-2011 Ronnie Sahlberg + * Copyright (c) 2012-2017 Peter Lieven + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include +#include +#include +#include "qemu-common.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "scsi/constants.h" +#include "qemu/iov.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/uuid.h" +#include "sysemu/replay.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "crypto/secret.h" +#include "scsi/utils.h" +#include "trace.h" + +/* Conflict between scsi/utils.h and libiscsi! :( */ +#define SCSI_XFER_NONE ISCSI_XFER_NONE +#include +#define inline __attribute__((gnu_inline)) /* required for libiscsi v1.9.0 */ +#include +#undef inline +#undef SCSI_XFER_NONE +QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE); + +#ifdef __linux__ +#include +#endif + +typedef struct IscsiLun { + struct iscsi_context *iscsi; + AioContext *aio_context; + int lun; + enum scsi_inquiry_peripheral_device_type type; + int block_size; + uint64_t num_blocks; + int events; + QEMUTimer *nop_timer; + QEMUTimer *event_timer; + QemuMutex mutex; + struct scsi_inquiry_logical_block_provisioning lbp; + struct scsi_inquiry_block_limits bl; + struct scsi_inquiry_device_designator *dd; + unsigned char *zeroblock; + /* The allocmap tracks which clusters (pages) on the iSCSI target are + * allocated and which are not. In case a target returns zeros for + * unallocated pages (iscsilun->lprz) we can directly return zeros instead + * of reading zeros over the wire if a read request falls within an + * unallocated block. As there are 3 possible states we need 2 bitmaps to + * track. allocmap_valid keeps track if QEMU's information about a page is + * valid. allocmap tracks if a page is allocated or not. In case QEMU has no + * valid information about a page the corresponding allocmap entry should be + * switched to unallocated as well to force a new lookup of the allocation + * status as lookups are generally skipped if a page is suspect to be + * allocated. If a iSCSI target is opened with cache.direct = on the + * allocmap_valid does not exist turning all cached information invalid so + * that a fresh lookup is made for any page even if allocmap entry returns + * it's unallocated. */ + unsigned long *allocmap; + unsigned long *allocmap_valid; + long allocmap_size; + int cluster_size; + bool use_16_for_rw; + bool write_protected; + bool lbpme; + bool lbprz; + bool dpofua; + bool has_write_same; + bool request_timed_out; +} IscsiLun; + +typedef struct IscsiTask { + int status; + int complete; + int retries; + int do_retry; + struct scsi_task *task; + Coroutine *co; + IscsiLun *iscsilun; + QEMUTimer retry_timer; + int err_code; + char *err_str; +} IscsiTask; + +typedef struct IscsiAIOCB { + BlockAIOCB common; + QEMUBH *bh; + IscsiLun *iscsilun; + struct scsi_task *task; + int status; + int64_t sector_num; + int nb_sectors; + int ret; +#ifdef __linux__ + sg_io_hdr_t *ioh; +#endif + bool cancelled; +} IscsiAIOCB; + +/* libiscsi uses time_t so its enough to process events every second */ +#define EVENT_INTERVAL 1000 +#define NOP_INTERVAL 5000 +#define MAX_NOP_FAILURES 3 +#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) +static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768}; + +/* this threshold is a trade-off knob to choose between + * the potential additional overhead of an extra GET_LBA_STATUS request + * vs. unnecessarily reading a lot of zero sectors over the wire. + * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES + * sectors we check the allocation status of the area covered by the + * request first if the allocationmap indicates that the area might be + * unallocated. */ +#define ISCSI_CHECKALLOC_THRES 64 + +#ifdef __linux__ + +static void +iscsi_bh_cb(void *p) +{ + IscsiAIOCB *acb = p; + + qemu_bh_delete(acb->bh); + + acb->common.cb(acb->common.opaque, acb->status); + + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + + qemu_aio_unref(acb); +} + +static void +iscsi_schedule_bh(IscsiAIOCB *acb) +{ + if (acb->bh) { + return; + } + acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb); + qemu_bh_schedule(acb->bh); +} + +#endif + +static void iscsi_co_generic_bh_cb(void *opaque) +{ + struct IscsiTask *iTask = opaque; + + iTask->complete = 1; + aio_co_wake(iTask->co); +} + +static void iscsi_retry_timer_expired(void *opaque) +{ + struct IscsiTask *iTask = opaque; + iTask->complete = 1; + if (iTask->co) { + aio_co_wake(iTask->co); + } +} + +static inline unsigned exp_random(double mean) +{ + return -mean * log((double)rand() / RAND_MAX); +} + +/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in + * libiscsi 1.10.0, together with other constants we need. Use it as + * a hint that we have to define them ourselves if needed, to keep the + * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for + * the test because SCSI_STATUS_* is an enum. + * + * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes + * an enum, check against the LIBISCSI_API_VERSION macro, which was + * introduced in 1.11.0. If it is present, there is no need to define + * anything. + */ +#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \ + !defined(LIBISCSI_API_VERSION) +#define SCSI_STATUS_TASK_SET_FULL 0x28 +#define SCSI_STATUS_TIMEOUT 0x0f000002 +#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600 +#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00 +#endif + +#ifndef LIBISCSI_API_VERSION +#define LIBISCSI_API_VERSION 20130701 +#endif + +static int iscsi_translate_sense(struct scsi_sense *sense) +{ + return scsi_sense_to_errno(sense->key, + (sense->ascq & 0xFF00) >> 8, + sense->ascq & 0xFF); +} + +/* Called (via iscsi_service) with QemuMutex held. */ +static void +iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, + void *command_data, void *opaque) +{ + struct IscsiTask *iTask = opaque; + struct scsi_task *task = command_data; + + iTask->status = status; + iTask->do_retry = 0; + iTask->err_code = 0; + iTask->task = task; + + if (status != SCSI_STATUS_GOOD) { + iTask->err_code = -EIO; + if (iTask->retries++ < ISCSI_CMD_RETRIES) { + if (status == SCSI_STATUS_BUSY || + status == SCSI_STATUS_TIMEOUT || + status == SCSI_STATUS_TASK_SET_FULL) { + unsigned retry_time = + exp_random(iscsi_retry_times[iTask->retries - 1]); + if (status == SCSI_STATUS_TIMEOUT) { + /* make sure the request is rescheduled AFTER the + * reconnect is initiated */ + retry_time = EVENT_INTERVAL * 2; + iTask->iscsilun->request_timed_out = true; + } + error_report("iSCSI Busy/TaskSetFull/TimeOut" + " (retry #%u in %u ms): %s", + iTask->retries, retry_time, + iscsi_get_error(iscsi)); + aio_timer_init(iTask->iscsilun->aio_context, + &iTask->retry_timer, QEMU_CLOCK_REALTIME, + SCALE_MS, iscsi_retry_timer_expired, iTask); + timer_mod(&iTask->retry_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); + iTask->do_retry = 1; + } else if (status == SCSI_STATUS_CHECK_CONDITION) { + int error = iscsi_translate_sense(&task->sense); + if (error == EAGAIN) { + error_report("iSCSI CheckCondition: %s", + iscsi_get_error(iscsi)); + iTask->do_retry = 1; + } else { + iTask->err_code = -error; + iTask->err_str = g_strdup(iscsi_get_error(iscsi)); + } + } + } + } + + if (iTask->co) { + replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context, + iscsi_co_generic_bh_cb, iTask); + } else { + iTask->complete = 1; + } +} + +static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) +{ + *iTask = (struct IscsiTask) { + .co = qemu_coroutine_self(), + .iscsilun = iscsilun, + }; +} + +#ifdef __linux__ + +/* Called (via iscsi_service) with QemuMutex held. */ +static void +iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data, + void *private_data) +{ + IscsiAIOCB *acb = private_data; + + /* If the command callback hasn't been called yet, drop the task */ + if (!acb->bh) { + /* Call iscsi_aio_ioctl_cb() with SCSI_STATUS_CANCELLED */ + iscsi_scsi_cancel_task(iscsi, acb->task); + } + + qemu_aio_unref(acb); /* acquired in iscsi_aio_cancel() */ +} + +static void +iscsi_aio_cancel(BlockAIOCB *blockacb) +{ + IscsiAIOCB *acb = (IscsiAIOCB *)blockacb; + IscsiLun *iscsilun = acb->iscsilun; + + qemu_mutex_lock(&iscsilun->mutex); + + /* If it was cancelled or completed already, our work is done here */ + if (acb->cancelled || acb->status != -EINPROGRESS) { + qemu_mutex_unlock(&iscsilun->mutex); + return; + } + + acb->cancelled = true; + + qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */ + + /* send a task mgmt call to the target to cancel the task on the target */ + if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task, + iscsi_abort_task_cb, acb) < 0) { + qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */ + } + + qemu_mutex_unlock(&iscsilun->mutex); +} + +static const AIOCBInfo iscsi_aiocb_info = { + .aiocb_size = sizeof(IscsiAIOCB), + .cancel_async = iscsi_aio_cancel, +}; + +#endif + +static void iscsi_process_read(void *arg); +static void iscsi_process_write(void *arg); + +/* Called with QemuMutex held. */ +static void +iscsi_set_events(IscsiLun *iscsilun) +{ + struct iscsi_context *iscsi = iscsilun->iscsi; + int ev = iscsi_which_events(iscsi); + + if (ev != iscsilun->events) { + aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi), + false, + (ev & POLLIN) ? iscsi_process_read : NULL, + (ev & POLLOUT) ? iscsi_process_write : NULL, + NULL, + iscsilun); + iscsilun->events = ev; + } +} + +static void iscsi_timed_check_events(void *opaque) +{ + IscsiLun *iscsilun = opaque; + + qemu_mutex_lock(&iscsilun->mutex); + + /* check for timed out requests */ + iscsi_service(iscsilun->iscsi, 0); + + if (iscsilun->request_timed_out) { + iscsilun->request_timed_out = false; + iscsi_reconnect(iscsilun->iscsi); + } + + /* newer versions of libiscsi may return zero events. Ensure we are able + * to return to service once this situation changes. */ + iscsi_set_events(iscsilun); + + qemu_mutex_unlock(&iscsilun->mutex); + + timer_mod(iscsilun->event_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); +} + +static void +iscsi_process_read(void *arg) +{ + IscsiLun *iscsilun = arg; + struct iscsi_context *iscsi = iscsilun->iscsi; + + qemu_mutex_lock(&iscsilun->mutex); + iscsi_service(iscsi, POLLIN); + iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); +} + +static void +iscsi_process_write(void *arg) +{ + IscsiLun *iscsilun = arg; + struct iscsi_context *iscsi = iscsilun->iscsi; + + qemu_mutex_lock(&iscsilun->mutex); + iscsi_service(iscsi, POLLOUT); + iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); +} + +static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun) +{ + return sector * iscsilun->block_size / BDRV_SECTOR_SIZE; +} + +static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun) +{ + return sector * BDRV_SECTOR_SIZE / iscsilun->block_size; +} + +static bool is_byte_request_lun_aligned(int64_t offset, int count, + IscsiLun *iscsilun) +{ + if (offset % iscsilun->block_size || count % iscsilun->block_size) { + error_report("iSCSI misaligned request: " + "iscsilun->block_size %u, offset %" PRIi64 + ", count %d", + iscsilun->block_size, offset, count); + return false; + } + return true; +} + +static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors, + IscsiLun *iscsilun) +{ + assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS); + return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, + iscsilun); +} + +static void iscsi_allocmap_free(IscsiLun *iscsilun) +{ + g_free(iscsilun->allocmap); + g_free(iscsilun->allocmap_valid); + iscsilun->allocmap = NULL; + iscsilun->allocmap_valid = NULL; +} + + +static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) +{ + iscsi_allocmap_free(iscsilun); + + assert(iscsilun->cluster_size); + iscsilun->allocmap_size = + DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size, + iscsilun->cluster_size); + + iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size); + if (!iscsilun->allocmap) { + return -ENOMEM; + } + + if (open_flags & BDRV_O_NOCACHE) { + /* when cache.direct = on all allocmap entries are + * treated as invalid to force a relookup of the block + * status on every read request */ + return 0; + } + + iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size); + if (!iscsilun->allocmap_valid) { + /* if we are under memory pressure free the allocmap as well */ + iscsi_allocmap_free(iscsilun); + return -ENOMEM; + } + + return 0; +} + +static void +iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset, + int64_t bytes, bool allocated, bool valid) +{ + int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk; + + if (iscsilun->allocmap == NULL) { + return; + } + /* expand to entirely contain all affected clusters */ + assert(iscsilun->cluster_size); + cl_num_expanded = offset / iscsilun->cluster_size; + nb_cls_expanded = DIV_ROUND_UP(offset + bytes, + iscsilun->cluster_size) - cl_num_expanded; + /* shrink to touch only completely contained clusters */ + cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size); + nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk; + if (allocated) { + bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded); + } else { + if (nb_cls_shrunk > 0) { + bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk); + } + } + + if (iscsilun->allocmap_valid == NULL) { + return; + } + if (valid) { + if (nb_cls_shrunk > 0) { + bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk); + } + } else { + bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded, + nb_cls_expanded); + } +} + +static void +iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) +{ + iscsi_allocmap_update(iscsilun, offset, bytes, true, true); +} + +static void +iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) +{ + /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update + * is ignored, so this will in effect be an iscsi_allocmap_set_invalid. + */ + iscsi_allocmap_update(iscsilun, offset, bytes, false, true); +} + +static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) +{ + iscsi_allocmap_update(iscsilun, offset, bytes, false, false); +} + +static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) +{ + if (iscsilun->allocmap) { + bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size); + } + if (iscsilun->allocmap_valid) { + bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size); + } +} + +static inline bool +iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) +{ + unsigned long size; + if (iscsilun->allocmap == NULL) { + return true; + } + assert(iscsilun->cluster_size); + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); + return !(find_next_bit(iscsilun->allocmap, size, + offset / iscsilun->cluster_size) == size); +} + +static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, + int64_t offset, int64_t bytes) +{ + unsigned long size; + if (iscsilun->allocmap_valid == NULL) { + return false; + } + assert(iscsilun->cluster_size); + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); + return (find_next_zero_bit(iscsilun->allocmap_valid, size, + offset / iscsilun->cluster_size) == size); +} + +static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask, + IscsiLun *iscsilun) +{ + while (!iTask->complete) { + iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); + qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); + } +} + +static int coroutine_fn +iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *iov, int flags) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + uint64_t lba; + uint32_t num_sectors; + bool fua = flags & BDRV_REQ_FUA; + int r = 0; + + if (fua) { + assert(iscsilun->dpofua); + } + if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { + return -EINVAL; + } + + if (bs->bl.max_transfer) { + assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer); + } + + lba = sector_qemu2lun(sector_num, iscsilun); + num_sectors = sector_qemu2lun(nb_sectors, iscsilun); + iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); +retry: + if (iscsilun->use_16_for_rw) { +#if LIBISCSI_API_VERSION >= (20160603) + iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, fua, 0, 0, + iscsi_co_generic_cb, &iTask, + (struct scsi_iovec *)iov->iov, iov->niov); + } else { + iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, fua, 0, 0, + iscsi_co_generic_cb, &iTask, + (struct scsi_iovec *)iov->iov, iov->niov); + } +#else + iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, fua, 0, 0, + iscsi_co_generic_cb, &iTask); + } else { + iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, fua, 0, 0, + iscsi_co_generic_cb, &iTask); + } +#endif + if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); + return -ENOMEM; + } +#if LIBISCSI_API_VERSION < (20160603) + scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, + iov->niov); +#endif + iscsi_co_wait_for_task(&iTask, iscsilun); + + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); + error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba, + iTask.err_str); + r = iTask.err_code; + goto out_unlock; + } + + iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); + +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; +} + + + +static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + IscsiLun *iscsilun = bs->opaque; + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; + uint64_t lba, max_bytes; + int ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); + + assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size)); + + /* default to all sectors allocated */ + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + if (map) { + *map = offset; + } + *pnum = bytes; + + /* LUN does not support logical block provisioning */ + if (!iscsilun->lbpme) { + goto out; + } + + lba = offset / iscsilun->block_size; + max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; + + qemu_mutex_lock(&iscsilun->mutex); +retry: + if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun, + lba, 8 + 16, iscsi_co_generic_cb, + &iTask) == NULL) { + ret = -ENOMEM; + goto out_unlock; + } + iscsi_co_wait_for_task(&iTask, iscsilun); + + if (iTask.do_retry) { + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + /* in case the get_lba_status_callout fails (i.e. + * because the device is busy or the cmd is not + * supported) we pretend all blocks are allocated + * for backwards compatibility */ + error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); + goto out_unlock; + } + + lbas = scsi_datain_unmarshall(iTask.task); + if (lbas == NULL || lbas->num_descriptors == 0) { + ret = -EIO; + goto out_unlock; + } + + lbasd = &lbas->descriptors[0]; + + if (lba != lbasd->lba) { + ret = -EIO; + goto out_unlock; + } + + *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { + ret &= ~BDRV_BLOCK_DATA; + if (iscsilun->lbprz) { + ret |= BDRV_BLOCK_ZERO; + } + } + + if (ret & BDRV_BLOCK_ZERO) { + iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum); + } else { + iscsi_allocmap_set_allocated(iscsilun, offset, *pnum); + } + + if (*pnum > bytes) { + *pnum = bytes; + } +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); +out: + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + } + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { + *file = bs; + } + return ret; +} + +static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + uint64_t lba; + uint32_t num_sectors; + int r = 0; + + if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { + return -EINVAL; + } + + if (bs->bl.max_transfer) { + assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer); + } + + /* if cache.direct is off and we have a valid entry in our allocation map + * we can skip checking the block status and directly return zeroes if + * the request falls within an unallocated area */ + if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE) && + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE)) { + qemu_iovec_memset(iov, 0, 0x00, iov->size); + return 0; + } + + if (nb_sectors >= ISCSI_CHECKALLOC_THRES && + !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE) && + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE)) { + int64_t pnum; + /* check the block status from the beginning of the cluster + * containing the start sector */ + int64_t head; + int ret; + + assert(iscsilun->cluster_size); + head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size; + ret = iscsi_co_block_status(bs, true, + sector_num * BDRV_SECTOR_SIZE - head, + BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL); + if (ret < 0) { + return ret; + } + /* if the whole request falls into an unallocated area we can avoid + * reading and directly return zeroes instead */ + if (ret & BDRV_BLOCK_ZERO && + pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) { + qemu_iovec_memset(iov, 0, 0x00, iov->size); + return 0; + } + } + + lba = sector_qemu2lun(sector_num, iscsilun); + num_sectors = sector_qemu2lun(nb_sectors, iscsilun); + + iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); +retry: + if (iscsilun->use_16_for_rw) { +#if LIBISCSI_API_VERSION >= (20160603) + iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, + num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask, + (struct scsi_iovec *)iov->iov, iov->niov); + } else { + iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, + num_sectors * iscsilun->block_size, + iscsilun->block_size, + 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask, + (struct scsi_iovec *)iov->iov, iov->niov); + } +#else + iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba, + num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask); + } else { + iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba, + num_sectors * iscsilun->block_size, + iscsilun->block_size, + 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask); + } +#endif + if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); + return -ENOMEM; + } +#if LIBISCSI_API_VERSION < (20160603) + scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); +#endif + + iscsi_co_wait_for_task(&iTask, iscsilun); + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); + r = iTask.err_code; + } + + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; +} + +static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + int r = 0; + + iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); +retry: + if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, + 0, iscsi_co_generic_cb, &iTask) == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); + return -ENOMEM; + } + + iscsi_co_wait_for_task(&iTask, iscsilun); + + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str); + r = iTask.err_code; + } + + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; +} + +#ifdef __linux__ +/* Called (via iscsi_service) with QemuMutex held. */ +static void +iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, + void *command_data, void *opaque) +{ + IscsiAIOCB *acb = opaque; + + if (status == SCSI_STATUS_CANCELLED) { + if (!acb->bh) { + acb->status = -ECANCELED; + iscsi_schedule_bh(acb); + } + return; + } + + acb->status = 0; + if (status < 0) { + error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s", + iscsi_get_error(iscsi)); + acb->status = -iscsi_translate_sense(&acb->task->sense); + } + + acb->ioh->driver_status = 0; + acb->ioh->host_status = 0; + acb->ioh->resid = 0; + acb->ioh->status = status; + +#define SG_ERR_DRIVER_SENSE 0x08 + + if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) { + int ss; + + acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE; + + acb->ioh->sb_len_wr = acb->task->datain.size - 2; + ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr); + memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss); + } + + iscsi_schedule_bh(acb); +} + +static void iscsi_ioctl_bh_completion(void *opaque) +{ + IscsiAIOCB *acb = opaque; + + qemu_bh_delete(acb->bh); + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_unref(acb); +} + +static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf) +{ + BlockDriverState *bs = acb->common.bs; + IscsiLun *iscsilun = bs->opaque; + int ret = 0; + + switch (req) { + case SG_GET_VERSION_NUM: + *(int *)buf = 30000; + break; + case SG_GET_SCSI_ID: + ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; + break; + default: + ret = -EINVAL; + } + assert(!acb->bh); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), + iscsi_ioctl_bh_completion, acb); + acb->ret = ret; + qemu_bh_schedule(acb->bh); +} + +static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque) +{ + IscsiLun *iscsilun = bs->opaque; + struct iscsi_context *iscsi = iscsilun->iscsi; + struct iscsi_data data; + IscsiAIOCB *acb; + + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); + + acb->iscsilun = iscsilun; + acb->bh = NULL; + acb->status = -EINPROGRESS; + acb->ioh = buf; + acb->cancelled = false; + + if (req != SG_IO) { + iscsi_ioctl_handle_emulated(acb, req, buf); + return &acb->common; + } + + if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) { + error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)", + acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE); + qemu_aio_unref(acb); + return NULL; + } + + acb->task = malloc(sizeof(struct scsi_task)); + if (acb->task == NULL) { + error_report("iSCSI: Failed to allocate task for scsi command. %s", + iscsi_get_error(iscsi)); + qemu_aio_unref(acb); + return NULL; + } + memset(acb->task, 0, sizeof(struct scsi_task)); + + switch (acb->ioh->dxfer_direction) { + case SG_DXFER_TO_DEV: + acb->task->xfer_dir = SCSI_XFER_WRITE; + break; + case SG_DXFER_FROM_DEV: + acb->task->xfer_dir = SCSI_XFER_READ; + break; + default: + acb->task->xfer_dir = SCSI_XFER_NONE; + break; + } + + acb->task->cdb_size = acb->ioh->cmd_len; + memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len); + acb->task->expxferlen = acb->ioh->dxfer_len; + + data.size = 0; + qemu_mutex_lock(&iscsilun->mutex); + if (acb->task->xfer_dir == SCSI_XFER_WRITE) { + if (acb->ioh->iovec_count == 0) { + data.data = acb->ioh->dxferp; + data.size = acb->ioh->dxfer_len; + } else { + scsi_task_set_iov_out(acb->task, + (struct scsi_iovec *) acb->ioh->dxferp, + acb->ioh->iovec_count); + } + } + + if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task, + iscsi_aio_ioctl_cb, + (data.size > 0) ? &data : NULL, + acb) != 0) { + qemu_mutex_unlock(&iscsilun->mutex); + scsi_free_scsi_task(acb->task); + qemu_aio_unref(acb); + return NULL; + } + + /* tell libiscsi to read straight into the buffer we got from ioctl */ + if (acb->task->xfer_dir == SCSI_XFER_READ) { + if (acb->ioh->iovec_count == 0) { + scsi_task_add_data_in_buffer(acb->task, + acb->ioh->dxfer_len, + acb->ioh->dxferp); + } else { + scsi_task_set_iov_in(acb->task, + (struct scsi_iovec *) acb->ioh->dxferp, + acb->ioh->iovec_count); + } + } + + iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); + + return &acb->common; +} + +#endif + +static int64_t +iscsi_getlength(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + int64_t len; + + len = iscsilun->num_blocks; + len *= iscsilun->block_size; + + return len; +} + +static int +coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + struct unmap_list list; + int r = 0; + + if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) { + return -ENOTSUP; + } + + if (!iscsilun->lbp.lbpu) { + /* UNMAP is not supported by the target */ + return 0; + } + + list.lba = offset / iscsilun->block_size; + list.num = bytes / iscsilun->block_size; + + iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); +retry: + if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, + iscsi_co_generic_cb, &iTask) == NULL) { + r = -ENOMEM; + goto out_unlock; + } + + iscsi_co_wait_for_task(&iTask, iscsilun); + + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + iTask.complete = 0; + goto retry; + } + + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); + + if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { + /* the target might fail with a check condition if it + is not happy with the alignment of the UNMAP request + we silently fail in this case */ + goto out_unlock; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s", + list.lba, iTask.err_str); + r = iTask.err_code; + goto out_unlock; + } + +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; +} + +static int +coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + uint64_t lba; + uint32_t nb_blocks; + bool use_16_for_ws = iscsilun->use_16_for_rw; + int r = 0; + + if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_MAY_UNMAP) { + if (!use_16_for_ws && !iscsilun->lbp.lbpws10) { + /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */ + use_16_for_ws = true; + } + if (use_16_for_ws && !iscsilun->lbp.lbpws) { + /* WRITESAME16 with UNMAP is not supported by the target, + * fall back and try WRITESAME10/16 without UNMAP */ + flags &= ~BDRV_REQ_MAY_UNMAP; + use_16_for_ws = iscsilun->use_16_for_rw; + } + } + + if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) { + /* WRITESAME without UNMAP is not supported by the target */ + return -ENOTSUP; + } + + lba = offset / iscsilun->block_size; + nb_blocks = bytes / iscsilun->block_size; + + if (iscsilun->zeroblock == NULL) { + iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size); + if (iscsilun->zeroblock == NULL) { + return -ENOMEM; + } + } + + qemu_mutex_lock(&iscsilun->mutex); + iscsi_co_init_iscsitask(iscsilun, &iTask); +retry: + if (use_16_for_ws) { + iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, + iscsilun->zeroblock, iscsilun->block_size, + nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, &iTask); + } else { + iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba, + iscsilun->zeroblock, iscsilun->block_size, + nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, &iTask); + } + if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); + return -ENOMEM; + } + + iscsi_co_wait_for_task(&iTask, iscsilun); + + if (iTask.status == SCSI_STATUS_CHECK_CONDITION && + iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST && + (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE || + iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) { + /* WRITE SAME is not supported by the target */ + iscsilun->has_write_same = false; + scsi_free_scsi_task(iTask.task); + r = -ENOTSUP; + goto out_unlock; + } + + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); + error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); + r = iTask.err_code; + goto out_unlock; + } + + if (flags & BDRV_REQ_MAY_UNMAP) { + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); + } else { + iscsi_allocmap_set_allocated(iscsilun, offset, bytes); + } + +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; +} + +static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts, + Error **errp) +{ + const char *user = NULL; + const char *password = NULL; + const char *secretid; + char *secret = NULL; + + user = qemu_opt_get(opts, "user"); + if (!user) { + return; + } + + secretid = qemu_opt_get(opts, "password-secret"); + password = qemu_opt_get(opts, "password"); + if (secretid && password) { + error_setg(errp, "'password' and 'password-secret' properties are " + "mutually exclusive"); + return; + } + if (secretid) { + secret = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!secret) { + return; + } + password = secret; + } else if (!password) { + error_setg(errp, "CHAP username specified but no password was given"); + return; + } + + if (iscsi_set_initiator_username_pwd(iscsi, user, password)) { + error_setg(errp, "Failed to set initiator username and password"); + } + + g_free(secret); +} + +static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts, + Error **errp) +{ + const char *digest = NULL; + + digest = qemu_opt_get(opts, "header-digest"); + if (!digest) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C); + } else if (!strcmp(digest, "crc32c")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C); + } else if (!strcmp(digest, "none")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE); + } else if (!strcmp(digest, "crc32c-none")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE); + } else if (!strcmp(digest, "none-crc32c")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C); + } else { + error_setg(errp, "Invalid header-digest setting : %s", digest); + } +} + +static char *get_initiator_name(QemuOpts *opts) +{ + const char *name; + char *iscsi_name; + UuidInfo *uuid_info; + + name = qemu_opt_get(opts, "initiator-name"); + if (name) { + return g_strdup(name); + } + + uuid_info = qmp_query_uuid(NULL); + if (strcmp(uuid_info->UUID, UUID_NONE) == 0) { + name = qemu_get_vm_name(); + } else { + name = uuid_info->UUID; + } + iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s", + name ? ":" : "", name ? name : ""); + qapi_free_UuidInfo(uuid_info); + return iscsi_name; +} + +static void iscsi_nop_timed_event(void *opaque) +{ + IscsiLun *iscsilun = opaque; + + QEMU_LOCK_GUARD(&iscsilun->mutex); + if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) { + error_report("iSCSI: NOP timeout. Reconnecting..."); + iscsilun->request_timed_out = true; + } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) { + error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages."); + return; + } + + timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); + iscsi_set_events(iscsilun); +} + +static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) +{ + struct scsi_task *task = NULL; + struct scsi_readcapacity10 *rc10 = NULL; + struct scsi_readcapacity16 *rc16 = NULL; + int retries = ISCSI_CMD_RETRIES; + + do { + if (task != NULL) { + scsi_free_scsi_task(task); + task = NULL; + } + + switch (iscsilun->type) { + case TYPE_DISK: + task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun); + if (task != NULL && task->status == SCSI_STATUS_GOOD) { + rc16 = scsi_datain_unmarshall(task); + if (rc16 == NULL) { + error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data."); + } else { + iscsilun->block_size = rc16->block_length; + iscsilun->num_blocks = rc16->returned_lba + 1; + iscsilun->lbpme = !!rc16->lbpme; + iscsilun->lbprz = !!rc16->lbprz; + iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff); + } + break; + } + if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + break; + } + /* Fall through and try READ CAPACITY(10) instead. */ + case TYPE_ROM: + task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0); + if (task != NULL && task->status == SCSI_STATUS_GOOD) { + rc10 = scsi_datain_unmarshall(task); + if (rc10 == NULL) { + error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data."); + } else { + iscsilun->block_size = rc10->block_size; + if (rc10->lba == 0) { + /* blank disk loaded */ + iscsilun->num_blocks = 0; + } else { + iscsilun->num_blocks = rc10->lba + 1; + } + } + } + break; + default: + return; + } + } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && retries-- > 0); + + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + error_setg(errp, "iSCSI: failed to send readcapacity10/16 command"); + } else if (!iscsilun->block_size || + iscsilun->block_size % BDRV_SECTOR_SIZE) { + error_setg(errp, "iSCSI: the target returned an invalid " + "block size of %d.", iscsilun->block_size); + } + if (task) { + scsi_free_scsi_task(task); + } +} + +static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun, + int evpd, int pc, void **inq, Error **errp) +{ + int full_size; + struct scsi_task *task = NULL; + task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + goto fail; + } + full_size = scsi_datain_getfullsize(task); + if (full_size > task->datain.size) { + scsi_free_scsi_task(task); + + /* we need more data for the full list */ + task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + goto fail; + } + } + + *inq = scsi_datain_unmarshall(task); + if (*inq == NULL) { + error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob"); + goto fail_with_err; + } + + return task; + +fail: + error_setg(errp, "iSCSI: Inquiry command failed : %s", + iscsi_get_error(iscsi)); +fail_with_err: + if (task != NULL) { + scsi_free_scsi_task(task); + } + return NULL; +} + +static void iscsi_detach_aio_context(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + + aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi), + false, NULL, NULL, NULL, NULL); + iscsilun->events = 0; + + if (iscsilun->nop_timer) { + timer_del(iscsilun->nop_timer); + timer_free(iscsilun->nop_timer); + iscsilun->nop_timer = NULL; + } + if (iscsilun->event_timer) { + timer_del(iscsilun->event_timer); + timer_free(iscsilun->event_timer); + iscsilun->event_timer = NULL; + } +} + +static void iscsi_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + IscsiLun *iscsilun = bs->opaque; + + iscsilun->aio_context = new_context; + iscsi_set_events(iscsilun); + + /* Set up a timer for sending out iSCSI NOPs */ + iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context, + QEMU_CLOCK_REALTIME, SCALE_MS, + iscsi_nop_timed_event, iscsilun); + timer_mod(iscsilun->nop_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); + + /* Set up a timer for periodic calls to iscsi_set_events and to + * scan for command timeout */ + iscsilun->event_timer = aio_timer_new(iscsilun->aio_context, + QEMU_CLOCK_REALTIME, SCALE_MS, + iscsi_timed_check_events, iscsilun); + timer_mod(iscsilun->event_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); +} + +static void iscsi_modesense_sync(IscsiLun *iscsilun) +{ + struct scsi_task *task; + struct scsi_mode_sense *ms = NULL; + iscsilun->write_protected = false; + iscsilun->dpofua = false; + + task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun, + 1, SCSI_MODESENSE_PC_CURRENT, + 0x3F, 0, 255); + if (task == NULL) { + error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s", + iscsi_get_error(iscsilun->iscsi)); + goto out; + } + + if (task->status != SCSI_STATUS_GOOD) { + error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable"); + goto out; + } + ms = scsi_datain_unmarshall(task); + if (!ms) { + error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s", + iscsi_get_error(iscsilun->iscsi)); + goto out; + } + iscsilun->write_protected = ms->device_specific_parameter & 0x80; + iscsilun->dpofua = ms->device_specific_parameter & 0x10; + +out: + if (task) { + scsi_free_scsi_task(task); + } +} + +static void iscsi_parse_iscsi_option(const char *target, QDict *options) +{ + QemuOptsList *list; + QemuOpts *opts; + const char *user, *password, *password_secret, *initiator_name, + *header_digest, *timeout; + + list = qemu_find_opts("iscsi"); + if (!list) { + return; + } + + opts = qemu_opts_find(list, target); + if (opts == NULL) { + opts = QTAILQ_FIRST(&list->head); + if (!opts) { + return; + } + } + + user = qemu_opt_get(opts, "user"); + if (user) { + qdict_set_default_str(options, "user", user); + } + + password = qemu_opt_get(opts, "password"); + if (password) { + qdict_set_default_str(options, "password", password); + } + + password_secret = qemu_opt_get(opts, "password-secret"); + if (password_secret) { + qdict_set_default_str(options, "password-secret", password_secret); + } + + initiator_name = qemu_opt_get(opts, "initiator-name"); + if (initiator_name) { + qdict_set_default_str(options, "initiator-name", initiator_name); + } + + header_digest = qemu_opt_get(opts, "header-digest"); + if (header_digest) { + /* -iscsi takes upper case values, but QAPI only supports lower case + * enum constant names, so we have to convert here. */ + char *qapi_value = g_ascii_strdown(header_digest, -1); + qdict_set_default_str(options, "header-digest", qapi_value); + g_free(qapi_value); + } + + timeout = qemu_opt_get(opts, "timeout"); + if (timeout) { + qdict_set_default_str(options, "timeout", timeout); + } +} + +/* + * We support iscsi url's on the form + * iscsi://[%@][:]// + */ +static void iscsi_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + struct iscsi_url *iscsi_url; + const char *transport_name; + char *lun_str; + + iscsi_url = iscsi_parse_full_url(NULL, filename); + if (iscsi_url == NULL) { + error_setg(errp, "Failed to parse URL : %s", filename); + return; + } + +#if LIBISCSI_API_VERSION >= (20160603) + switch (iscsi_url->transport) { + case TCP_TRANSPORT: + transport_name = "tcp"; + break; + case ISER_TRANSPORT: + transport_name = "iser"; + break; + default: + error_setg(errp, "Unknown transport type (%d)", + iscsi_url->transport); + return; + } +#else + transport_name = "tcp"; +#endif + + qdict_set_default_str(options, "transport", transport_name); + qdict_set_default_str(options, "portal", iscsi_url->portal); + qdict_set_default_str(options, "target", iscsi_url->target); + + lun_str = g_strdup_printf("%d", iscsi_url->lun); + qdict_set_default_str(options, "lun", lun_str); + g_free(lun_str); + + /* User/password from -iscsi take precedence over those from the URL */ + iscsi_parse_iscsi_option(iscsi_url->target, options); + + if (iscsi_url->user[0] != '\0') { + qdict_set_default_str(options, "user", iscsi_url->user); + qdict_set_default_str(options, "password", iscsi_url->passwd); + } + + iscsi_destroy_url(iscsi_url); +} + +static QemuOptsList runtime_opts = { + .name = "iscsi", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "transport", + .type = QEMU_OPT_STRING, + }, + { + .name = "portal", + .type = QEMU_OPT_STRING, + }, + { + .name = "target", + .type = QEMU_OPT_STRING, + }, + { + .name = "user", + .type = QEMU_OPT_STRING, + }, + { + .name = "password", + .type = QEMU_OPT_STRING, + }, + { + .name = "password-secret", + .type = QEMU_OPT_STRING, + }, + { + .name = "lun", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "initiator-name", + .type = QEMU_OPT_STRING, + }, + { + .name = "header-digest", + .type = QEMU_OPT_STRING, + }, + { + .name = "timeout", + .type = QEMU_OPT_NUMBER, + }, + { /* end of list */ } + }, +}; + +static void iscsi_save_designator(IscsiLun *lun, + struct scsi_inquiry_device_identification *inq_di) +{ + struct scsi_inquiry_device_designator *desig, *copy = NULL; + + for (desig = inq_di->designators; desig; desig = desig->next) { + if (desig->association || + desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) { + continue; + } + /* NAA works better than T10 vendor ID based designator. */ + if (!copy || copy->designator_type < desig->designator_type) { + copy = desig; + } + } + if (copy) { + lun->dd = g_new(struct scsi_inquiry_device_designator, 1); + *lun->dd = *copy; + lun->dd->next = NULL; + lun->dd->designator = g_malloc(copy->designator_length); + memcpy(lun->dd->designator, copy->designator, copy->designator_length); + } +} + +static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + IscsiLun *iscsilun = bs->opaque; + struct iscsi_context *iscsi = NULL; + struct scsi_task *task = NULL; + struct scsi_inquiry_standard *inq = NULL; + struct scsi_inquiry_supported_pages *inq_vpd; + char *initiator_name = NULL; + QemuOpts *opts; + Error *local_err = NULL; + const char *transport_name, *portal, *target; +#if LIBISCSI_API_VERSION >= (20160603) + enum iscsi_transport_type transport; +#endif + int i, ret = 0, timeout = 0, lun; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto out; + } + + transport_name = qemu_opt_get(opts, "transport"); + portal = qemu_opt_get(opts, "portal"); + target = qemu_opt_get(opts, "target"); + lun = qemu_opt_get_number(opts, "lun", 0); + + if (!transport_name || !portal || !target) { + error_setg(errp, "Need all of transport, portal and target options"); + ret = -EINVAL; + goto out; + } + + if (!strcmp(transport_name, "tcp")) { +#if LIBISCSI_API_VERSION >= (20160603) + transport = TCP_TRANSPORT; + } else if (!strcmp(transport_name, "iser")) { + transport = ISER_TRANSPORT; +#else + /* TCP is what older libiscsi versions always use */ +#endif + } else { + error_setg(errp, "Unknown transport: %s", transport_name); + ret = -EINVAL; + goto out; + } + + memset(iscsilun, 0, sizeof(IscsiLun)); + + initiator_name = get_initiator_name(opts); + + iscsi = iscsi_create_context(initiator_name); + if (iscsi == NULL) { + error_setg(errp, "iSCSI: Failed to create iSCSI context."); + ret = -ENOMEM; + goto out; + } +#if LIBISCSI_API_VERSION >= (20160603) + if (iscsi_init_transport(iscsi, transport)) { + error_setg(errp, ("Error initializing transport.")); + ret = -EINVAL; + goto out; + } +#endif + if (iscsi_set_targetname(iscsi, target)) { + error_setg(errp, "iSCSI: Failed to set target name."); + ret = -EINVAL; + goto out; + } + + /* check if we got CHAP username/password via the options */ + apply_chap(iscsi, opts, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } + + if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) { + error_setg(errp, "iSCSI: Failed to set session type to normal."); + ret = -EINVAL; + goto out; + } + + /* check if we got HEADER_DIGEST via the options */ + apply_header_digest(iscsi, opts, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } + + /* timeout handling is broken in libiscsi before 1.15.0 */ + timeout = qemu_opt_get_number(opts, "timeout", 0); +#if LIBISCSI_API_VERSION >= 20150621 + iscsi_set_timeout(iscsi, timeout); +#else + if (timeout) { + warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0"); + } +#endif + + if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) { + error_setg(errp, "iSCSI: Failed to connect to LUN : %s", + iscsi_get_error(iscsi)); + ret = -EINVAL; + goto out; + } + + iscsilun->iscsi = iscsi; + iscsilun->aio_context = bdrv_get_aio_context(bs); + iscsilun->lun = lun; + iscsilun->has_write_same = true; + + task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0, + (void **) &inq, errp); + if (task == NULL) { + ret = -EINVAL; + goto out; + } + iscsilun->type = inq->periperal_device_type; + scsi_free_scsi_task(task); + task = NULL; + + iscsi_modesense_sync(iscsilun); + if (iscsilun->dpofua) { + bs->supported_write_flags = BDRV_REQ_FUA; + } + + /* Check the write protect flag of the LUN if we want to write */ + if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) && + iscsilun->write_protected) { + ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp); + if (ret < 0) { + goto out; + } + flags &= ~BDRV_O_RDWR; + } + + iscsi_readcapacity_sync(iscsilun, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } + bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun); + + /* We don't have any emulation for devices other than disks and CD-ROMs, so + * this must be sg ioctl compatible. We force it to be sg, otherwise qemu + * will try to read from the device to guess the image format. + */ + if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) { + bs->sg = true; + } + + task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, + SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES, + (void **) &inq_vpd, errp); + if (task == NULL) { + ret = -EINVAL; + goto out; + } + for (i = 0; i < inq_vpd->num_pages; i++) { + struct scsi_task *inq_task; + struct scsi_inquiry_logical_block_provisioning *inq_lbp; + struct scsi_inquiry_block_limits *inq_bl; + struct scsi_inquiry_device_identification *inq_di; + switch (inq_vpd->pages[i]) { + case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING: + inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, + SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING, + (void **) &inq_lbp, errp); + if (inq_task == NULL) { + ret = -EINVAL; + goto out; + } + memcpy(&iscsilun->lbp, inq_lbp, + sizeof(struct scsi_inquiry_logical_block_provisioning)); + scsi_free_scsi_task(inq_task); + break; + case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS: + inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, + SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS, + (void **) &inq_bl, errp); + if (inq_task == NULL) { + ret = -EINVAL; + goto out; + } + memcpy(&iscsilun->bl, inq_bl, + sizeof(struct scsi_inquiry_block_limits)); + scsi_free_scsi_task(inq_task); + break; + case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION: + inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, + SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION, + (void **) &inq_di, errp); + if (inq_task == NULL) { + ret = -EINVAL; + goto out; + } + iscsi_save_designator(iscsilun, inq_di); + scsi_free_scsi_task(inq_task); + break; + default: + break; + } + } + scsi_free_scsi_task(task); + task = NULL; + + qemu_mutex_init(&iscsilun->mutex); + iscsi_attach_aio_context(bs, iscsilun->aio_context); + + /* Guess the internal cluster (page) size of the iscsi target by the means + * of opt_unmap_gran. Transfer the unmap granularity only if it has a + * reasonable size */ + if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 && + iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { + iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran * + iscsilun->block_size; + if (iscsilun->lbprz) { + ret = iscsi_allocmap_init(iscsilun, flags); + } + } + + if (iscsilun->lbprz && iscsilun->lbp.lbpws) { + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; + } + +out: + qemu_opts_del(opts); + g_free(initiator_name); + if (task != NULL) { + scsi_free_scsi_task(task); + } + + if (ret) { + if (iscsi != NULL) { + if (iscsi_is_logged_in(iscsi)) { + iscsi_logout_sync(iscsi); + } + iscsi_destroy_context(iscsi); + } + memset(iscsilun, 0, sizeof(IscsiLun)); + } + + return ret; +} + +static void iscsi_close(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + struct iscsi_context *iscsi = iscsilun->iscsi; + + iscsi_detach_aio_context(bs); + if (iscsi_is_logged_in(iscsi)) { + iscsi_logout_sync(iscsi); + } + iscsi_destroy_context(iscsi); + if (iscsilun->dd) { + g_free(iscsilun->dd->designator); + g_free(iscsilun->dd); + } + g_free(iscsilun->zeroblock); + iscsi_allocmap_free(iscsilun); + qemu_mutex_destroy(&iscsilun->mutex); + memset(iscsilun, 0, sizeof(IscsiLun)); +} + +static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* We don't actually refresh here, but just return data queried in + * iscsi_open(): iscsi targets don't change their limits. */ + + IscsiLun *iscsilun = bs->opaque; + uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff; + unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size); + + assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg); + + bs->bl.request_alignment = block_size; + + if (iscsilun->bl.max_xfer_len) { + max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len); + } + + if (max_xfer_len * block_size < INT_MAX) { + bs->bl.max_transfer = max_xfer_len * iscsilun->block_size; + } + + if (iscsilun->lbp.lbpu) { + if (iscsilun->bl.max_unmap < 0xffffffff / block_size) { + bs->bl.max_pdiscard = + iscsilun->bl.max_unmap * iscsilun->block_size; + } + bs->bl.pdiscard_alignment = + iscsilun->bl.opt_unmap_gran * iscsilun->block_size; + } else { + bs->bl.pdiscard_alignment = iscsilun->block_size; + } + + if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) { + bs->bl.max_pwrite_zeroes = + iscsilun->bl.max_ws_len * iscsilun->block_size; + } + if (iscsilun->lbp.lbpws) { + bs->bl.pwrite_zeroes_alignment = + iscsilun->bl.opt_unmap_gran * iscsilun->block_size; + } else { + bs->bl.pwrite_zeroes_alignment = iscsilun->block_size; + } + if (iscsilun->bl.opt_xfer_len && + iscsilun->bl.opt_xfer_len < INT_MAX / block_size) { + bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len * + iscsilun->block_size); + } +} + +/* Note that this will not re-establish a connection with an iSCSI target - it + * is effectively a NOP. */ +static int iscsi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + IscsiLun *iscsilun = state->bs->opaque; + + if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) { + error_setg(errp, "Cannot open a write protected LUN as read-write"); + return -EACCES; + } + return 0; +} + +static void iscsi_reopen_commit(BDRVReopenState *reopen_state) +{ + IscsiLun *iscsilun = reopen_state->bs->opaque; + + /* the cache.direct status might have changed */ + if (iscsilun->allocmap != NULL) { + iscsi_allocmap_init(iscsilun, reopen_state->flags); + } +} + +static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + IscsiLun *iscsilun = bs->opaque; + int64_t cur_length; + Error *local_err = NULL; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (iscsilun->type != TYPE_DISK) { + error_setg(errp, "Cannot resize non-disk iSCSI devices"); + return -ENOTSUP; + } + + iscsi_readcapacity_sync(iscsilun, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return -EIO; + } + + cur_length = iscsi_getlength(bs); + if (offset != cur_length && exact) { + error_setg(errp, "Cannot resize iSCSI devices"); + return -ENOTSUP; + } else if (offset > cur_length) { + error_setg(errp, "Cannot grow iSCSI devices"); + return -EINVAL; + } + + if (iscsilun->allocmap != NULL) { + iscsi_allocmap_init(iscsilun, bs->open_flags); + } + + return 0; +} + +static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + IscsiLun *iscsilun = bs->opaque; + bdi->cluster_size = iscsilun->cluster_size; + return 0; +} + +static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + IscsiLun *iscsilun = bs->opaque; + iscsi_allocmap_invalidate(iscsilun); +} + +static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); +} + +static struct scsi_task *iscsi_xcopy_task(int param_len) +{ + struct scsi_task *task; + + task = g_new0(struct scsi_task, 1); + + task->cdb[0] = EXTENDED_COPY; + task->cdb[10] = (param_len >> 24) & 0xFF; + task->cdb[11] = (param_len >> 16) & 0xFF; + task->cdb[12] = (param_len >> 8) & 0xFF; + task->cdb[13] = param_len & 0xFF; + task->cdb_size = 16; + task->xfer_dir = SCSI_XFER_WRITE; + task->expxferlen = param_len; + + return task; +} + +static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) +{ + struct scsi_inquiry_device_designator *dd = lun->dd; + + memset(desc, 0, 32); + desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */ + desc[4] = dd->code_set; + desc[5] = (dd->designator_type & 0xF) + | ((dd->association & 3) << 4); + desc[7] = dd->designator_length; + memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20)); + + desc[28] = 0; + desc[29] = (lun->block_size >> 16) & 0xFF; + desc[30] = (lun->block_size >> 8) & 0xFF; + desc[31] = lun->block_size & 0xFF; +} + +static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index, + int dst_index) +{ + hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */ + hdr[1] = ((dc << 1) | cat) & 0xFF; + hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF; + /* don't account for the first 4 bytes in descriptor header*/ + hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF; + hdr[4] = (src_index >> 8) & 0xFF; + hdr[5] = src_index & 0xFF; + hdr[6] = (dst_index >> 8) & 0xFF; + hdr[7] = dst_index & 0xFF; +} + +static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat, + int src_index, int dst_index, int num_blks, + uint64_t src_lba, uint64_t dst_lba) +{ + iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index); + + /* The caller should verify the request size */ + assert(num_blks < 65536); + desc[10] = (num_blks >> 8) & 0xFF; + desc[11] = num_blks & 0xFF; + desc[12] = (src_lba >> 56) & 0xFF; + desc[13] = (src_lba >> 48) & 0xFF; + desc[14] = (src_lba >> 40) & 0xFF; + desc[15] = (src_lba >> 32) & 0xFF; + desc[16] = (src_lba >> 24) & 0xFF; + desc[17] = (src_lba >> 16) & 0xFF; + desc[18] = (src_lba >> 8) & 0xFF; + desc[19] = src_lba & 0xFF; + desc[20] = (dst_lba >> 56) & 0xFF; + desc[21] = (dst_lba >> 48) & 0xFF; + desc[22] = (dst_lba >> 40) & 0xFF; + desc[23] = (dst_lba >> 32) & 0xFF; + desc[24] = (dst_lba >> 24) & 0xFF; + desc[25] = (dst_lba >> 16) & 0xFF; + desc[26] = (dst_lba >> 8) & 0xFF; + desc[27] = dst_lba & 0xFF; +} + +static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str, + int list_id_usage, int prio, + int tgt_desc_len, + int seg_desc_len, int inline_data_len) +{ + buf[0] = list_id; + buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7); + buf[2] = (tgt_desc_len >> 8) & 0xFF; + buf[3] = tgt_desc_len & 0xFF; + buf[8] = (seg_desc_len >> 24) & 0xFF; + buf[9] = (seg_desc_len >> 16) & 0xFF; + buf[10] = (seg_desc_len >> 8) & 0xFF; + buf[11] = seg_desc_len & 0xFF; + buf[12] = (inline_data_len >> 24) & 0xFF; + buf[13] = (inline_data_len >> 16) & 0xFF; + buf[14] = (inline_data_len >> 8) & 0xFF; + buf[15] = inline_data_len & 0xFF; +} + +static void iscsi_xcopy_data(struct iscsi_data *data, + IscsiLun *src, int64_t src_lba, + IscsiLun *dst, int64_t dst_lba, + uint16_t num_blocks) +{ + uint8_t *buf; + const int src_offset = XCOPY_DESC_OFFSET; + const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE; + const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE; + + data->size = XCOPY_DESC_OFFSET + + IDENT_DESCR_TGT_DESCR_SIZE * 2 + + XCOPY_BLK2BLK_SEG_DESC_SIZE; + data->data = g_malloc0(data->size); + buf = data->data; + + /* Initialise the parameter list header */ + iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */, + 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE, + XCOPY_BLK2BLK_SEG_DESC_SIZE, + 0); + + /* Initialise CSCD list with one src + one dst descriptor */ + iscsi_populate_target_desc(&buf[src_offset], src); + iscsi_populate_target_desc(&buf[dst_offset], dst); + + /* Initialise one segment descriptor */ + iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks, + src_lba, dst_lba); +} + +static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + IscsiLun *dst_lun = dst->bs->opaque; + IscsiLun *src_lun; + struct IscsiTask iscsi_task; + struct iscsi_data data; + int r = 0; + int block_size; + + if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) { + return -ENOTSUP; + } + src_lun = src->bs->opaque; + + if (!src_lun->dd || !dst_lun->dd) { + return -ENOTSUP; + } + if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) { + return -ENOTSUP; + } + if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) { + return -ENOTSUP; + } + if (dst_lun->block_size != src_lun->block_size || + !dst_lun->block_size) { + return -ENOTSUP; + } + + block_size = dst_lun->block_size; + if (bytes / block_size > 65535) { + return -ENOTSUP; + } + + iscsi_xcopy_data(&data, + src_lun, src_offset / block_size, + dst_lun, dst_offset / block_size, + bytes / block_size); + + iscsi_co_init_iscsitask(dst_lun, &iscsi_task); + + qemu_mutex_lock(&dst_lun->mutex); + iscsi_task.task = iscsi_xcopy_task(data.size); +retry: + if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun, + iscsi_task.task, iscsi_co_generic_cb, + &data, + &iscsi_task) != 0) { + r = -EIO; + goto out_unlock; + } + + iscsi_co_wait_for_task(&iscsi_task, dst_lun); + + if (iscsi_task.do_retry) { + iscsi_task.complete = 0; + goto retry; + } + + if (iscsi_task.status != SCSI_STATUS_GOOD) { + r = iscsi_task.err_code; + goto out_unlock; + } + +out_unlock: + + trace_iscsi_xcopy(src_lun, src_offset, dst_lun, dst_offset, bytes, r); + g_free(iscsi_task.task); + qemu_mutex_unlock(&dst_lun->mutex); + g_free(iscsi_task.err_str); + return r; +} + + +static const char *const iscsi_strong_runtime_opts[] = { + "transport", + "portal", + "target", + "user", + "password", + "password-secret", + "lun", + "initiator-name", + "header-digest", + + NULL +}; + +static BlockDriver bdrv_iscsi = { + .format_name = "iscsi", + .protocol_name = "iscsi", + + .instance_size = sizeof(IscsiLun), + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, + + .bdrv_getlength = iscsi_getlength, + .bdrv_get_info = iscsi_get_info, + .bdrv_co_truncate = iscsi_co_truncate, + .bdrv_refresh_limits = iscsi_refresh_limits, + + .bdrv_co_block_status = iscsi_co_block_status, + .bdrv_co_pdiscard = iscsi_co_pdiscard, + .bdrv_co_copy_range_from = iscsi_co_copy_range_from, + .bdrv_co_copy_range_to = iscsi_co_copy_range_to, + .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, + .bdrv_co_readv = iscsi_co_readv, + .bdrv_co_writev = iscsi_co_writev, + .bdrv_co_flush_to_disk = iscsi_co_flush, + +#ifdef __linux__ + .bdrv_aio_ioctl = iscsi_aio_ioctl, +#endif + + .bdrv_detach_aio_context = iscsi_detach_aio_context, + .bdrv_attach_aio_context = iscsi_attach_aio_context, + + .strong_runtime_opts = iscsi_strong_runtime_opts, +}; + +#if LIBISCSI_API_VERSION >= (20160603) +static BlockDriver bdrv_iser = { + .format_name = "iser", + .protocol_name = "iser", + + .instance_size = sizeof(IscsiLun), + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, + + .bdrv_getlength = iscsi_getlength, + .bdrv_get_info = iscsi_get_info, + .bdrv_co_truncate = iscsi_co_truncate, + .bdrv_refresh_limits = iscsi_refresh_limits, + + .bdrv_co_block_status = iscsi_co_block_status, + .bdrv_co_pdiscard = iscsi_co_pdiscard, + .bdrv_co_copy_range_from = iscsi_co_copy_range_from, + .bdrv_co_copy_range_to = iscsi_co_copy_range_to, + .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, + .bdrv_co_readv = iscsi_co_readv, + .bdrv_co_writev = iscsi_co_writev, + .bdrv_co_flush_to_disk = iscsi_co_flush, + +#ifdef __linux__ + .bdrv_aio_ioctl = iscsi_aio_ioctl, +#endif + + .bdrv_detach_aio_context = iscsi_detach_aio_context, + .bdrv_attach_aio_context = iscsi_attach_aio_context, + + .strong_runtime_opts = iscsi_strong_runtime_opts, +}; +#endif + +static void iscsi_block_init(void) +{ + bdrv_register(&bdrv_iscsi); +#if LIBISCSI_API_VERSION >= (20160603) + bdrv_register(&bdrv_iser); +#endif +} + +block_init(iscsi_block_init); diff --git a/block/linux-aio.c b/block/linux-aio.c new file mode 100644 index 000000000..3c0527c2b --- /dev/null +++ b/block/linux-aio.c @@ -0,0 +1,459 @@ +/* + * Linux native AIO support. + * + * Copyright (C) 2009 IBM, Corp. + * Copyright (C) 2009 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "block/aio.h" +#include "qemu/queue.h" +#include "block/block.h" +#include "block/raw-aio.h" +#include "qemu/event_notifier.h" +#include "qemu/coroutine.h" +#include "qapi/error.h" + +#include + +/* + * Queue size (per-device). + * + * XXX: eventually we need to communicate this to the guest and/or make it + * tunable by the guest. If we get more outstanding requests at a time + * than this we will get EAGAIN from io_submit which is communicated to + * the guest as an I/O error. + */ +#define MAX_EVENTS 1024 + +struct qemu_laiocb { + Coroutine *co; + LinuxAioState *ctx; + struct iocb iocb; + ssize_t ret; + size_t nbytes; + QEMUIOVector *qiov; + bool is_read; + QSIMPLEQ_ENTRY(qemu_laiocb) next; +}; + +typedef struct { + int plugged; + unsigned int in_queue; + unsigned int in_flight; + bool blocked; + QSIMPLEQ_HEAD(, qemu_laiocb) pending; +} LaioQueue; + +struct LinuxAioState { + AioContext *aio_context; + + io_context_t ctx; + EventNotifier e; + + /* io queue for submit at batch. Protected by AioContext lock. */ + LaioQueue io_q; + + /* I/O completion processing. Only runs in I/O thread. */ + QEMUBH *completion_bh; + int event_idx; + int event_max; +}; + +static void ioq_submit(LinuxAioState *s); + +static inline ssize_t io_event_ret(struct io_event *ev) +{ + return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res); +} + +/* + * Completes an AIO request. + */ +static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) +{ + int ret; + + ret = laiocb->ret; + if (ret != -ECANCELED) { + if (ret == laiocb->nbytes) { + ret = 0; + } else if (ret >= 0) { + /* Short reads mean EOF, pad with zeros. */ + if (laiocb->is_read) { + qemu_iovec_memset(laiocb->qiov, ret, 0, + laiocb->qiov->size - ret); + } else { + ret = -ENOSPC; + } + } + } + + laiocb->ret = ret; + + /* + * If the coroutine is already entered it must be in ioq_submit() and + * will notice laio->ret has been filled in when it eventually runs + * later. Coroutines cannot be entered recursively so avoid doing + * that! + */ + if (!qemu_coroutine_entered(laiocb->co)) { + aio_co_wake(laiocb->co); + } +} + +/** + * aio_ring buffer which is shared between userspace and kernel. + * + * This copied from linux/fs/aio.c, common header does not exist + * but AIO exists for ages so we assume ABI is stable. + */ +struct aio_ring { + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + unsigned head; /* Written to by userland or by kernel. */ + unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ + + struct io_event io_events[]; +}; + +/** + * io_getevents_peek: + * @ctx: AIO context + * @events: pointer on events array, output value + + * Returns the number of completed events and sets a pointer + * on events array. This function does not update the internal + * ring buffer, only reads head and tail. When @events has been + * processed io_getevents_commit() must be called. + */ +static inline unsigned int io_getevents_peek(io_context_t ctx, + struct io_event **events) +{ + struct aio_ring *ring = (struct aio_ring *)ctx; + unsigned int head = ring->head, tail = ring->tail; + unsigned int nr; + + nr = tail >= head ? tail - head : ring->nr - head; + *events = ring->io_events + head; + /* To avoid speculative loads of s->events[i] before observing tail. + Paired with smp_wmb() inside linux/fs/aio.c: aio_complete(). */ + smp_rmb(); + + return nr; +} + +/** + * io_getevents_commit: + * @ctx: AIO context + * @nr: the number of events on which head should be advanced + * + * Advances head of a ring buffer. + */ +static inline void io_getevents_commit(io_context_t ctx, unsigned int nr) +{ + struct aio_ring *ring = (struct aio_ring *)ctx; + + if (nr) { + ring->head = (ring->head + nr) % ring->nr; + } +} + +/** + * io_getevents_advance_and_peek: + * @ctx: AIO context + * @events: pointer on events array, output value + * @nr: the number of events on which head should be advanced + * + * Advances head of a ring buffer and returns number of elements left. + */ +static inline unsigned int +io_getevents_advance_and_peek(io_context_t ctx, + struct io_event **events, + unsigned int nr) +{ + io_getevents_commit(ctx, nr); + return io_getevents_peek(ctx, events); +} + +/** + * qemu_laio_process_completions: + * @s: AIO state + * + * Fetches completed I/O requests and invokes their callbacks. + * + * The function is somewhat tricky because it supports nested event loops, for + * example when a request callback invokes aio_poll(). In order to do this, + * indices are kept in LinuxAioState. Function schedules BH completion so it + * can be called again in a nested event loop. When there are no events left + * to complete the BH is being canceled. + */ +static void qemu_laio_process_completions(LinuxAioState *s) +{ + struct io_event *events; + + /* Reschedule so nested event loops see currently pending completions */ + qemu_bh_schedule(s->completion_bh); + + while ((s->event_max = io_getevents_advance_and_peek(s->ctx, &events, + s->event_idx))) { + for (s->event_idx = 0; s->event_idx < s->event_max; ) { + struct iocb *iocb = events[s->event_idx].obj; + struct qemu_laiocb *laiocb = + container_of(iocb, struct qemu_laiocb, iocb); + + laiocb->ret = io_event_ret(&events[s->event_idx]); + + /* Change counters one-by-one because we can be nested. */ + s->io_q.in_flight--; + s->event_idx++; + qemu_laio_process_completion(laiocb); + } + } + + qemu_bh_cancel(s->completion_bh); + + /* If we are nested we have to notify the level above that we are done + * by setting event_max to zero, upper level will then jump out of it's + * own `for` loop. If we are the last all counters droped to zero. */ + s->event_max = 0; + s->event_idx = 0; +} + +static void qemu_laio_process_completions_and_submit(LinuxAioState *s) +{ + aio_context_acquire(s->aio_context); + qemu_laio_process_completions(s); + + if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { + ioq_submit(s); + } + aio_context_release(s->aio_context); +} + +static void qemu_laio_completion_bh(void *opaque) +{ + LinuxAioState *s = opaque; + + qemu_laio_process_completions_and_submit(s); +} + +static void qemu_laio_completion_cb(EventNotifier *e) +{ + LinuxAioState *s = container_of(e, LinuxAioState, e); + + if (event_notifier_test_and_clear(&s->e)) { + qemu_laio_process_completions_and_submit(s); + } +} + +static bool qemu_laio_poll_cb(void *opaque) +{ + EventNotifier *e = opaque; + LinuxAioState *s = container_of(e, LinuxAioState, e); + struct io_event *events; + + if (!io_getevents_peek(s->ctx, &events)) { + return false; + } + + qemu_laio_process_completions_and_submit(s); + return true; +} + +static void ioq_init(LaioQueue *io_q) +{ + QSIMPLEQ_INIT(&io_q->pending); + io_q->plugged = 0; + io_q->in_queue = 0; + io_q->in_flight = 0; + io_q->blocked = false; +} + +static void ioq_submit(LinuxAioState *s) +{ + int ret, len; + struct qemu_laiocb *aiocb; + struct iocb *iocbs[MAX_EVENTS]; + QSIMPLEQ_HEAD(, qemu_laiocb) completed; + + do { + if (s->io_q.in_flight >= MAX_EVENTS) { + break; + } + len = 0; + QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) { + iocbs[len++] = &aiocb->iocb; + if (s->io_q.in_flight + len >= MAX_EVENTS) { + break; + } + } + + ret = io_submit(s->ctx, len, iocbs); + if (ret == -EAGAIN) { + break; + } + if (ret < 0) { + /* Fail the first request, retry the rest */ + aiocb = QSIMPLEQ_FIRST(&s->io_q.pending); + QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next); + s->io_q.in_queue--; + aiocb->ret = ret; + qemu_laio_process_completion(aiocb); + continue; + } + + s->io_q.in_flight += ret; + s->io_q.in_queue -= ret; + aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb); + QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed); + } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending)); + s->io_q.blocked = (s->io_q.in_queue > 0); + + if (s->io_q.in_flight) { + /* We can try to complete something just right away if there are + * still requests in-flight. */ + qemu_laio_process_completions(s); + /* + * Even we have completed everything (in_flight == 0), the queue can + * have still pended requests (in_queue > 0). We do not attempt to + * repeat submission to avoid IO hang. The reason is simple: s->e is + * still set and completion callback will be called shortly and all + * pended requests will be submitted from there. + */ + } +} + +void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) +{ + s->io_q.plugged++; +} + +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) +{ + assert(s->io_q.plugged); + if (--s->io_q.plugged == 0 && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { + ioq_submit(s); + } +} + +static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + int type) +{ + LinuxAioState *s = laiocb->ctx; + struct iocb *iocbs = &laiocb->iocb; + QEMUIOVector *qiov = laiocb->qiov; + + switch (type) { + case QEMU_AIO_WRITE: + io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); + break; + case QEMU_AIO_READ: + io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); + break; + /* Currently Linux kernel does not support other operations */ + default: + fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", + __func__, type); + return -EIO; + } + io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); + + QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); + s->io_q.in_queue++; + if (!s->io_q.blocked && + (!s->io_q.plugged || + s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { + ioq_submit(s); + } + + return 0; +} + +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type) +{ + int ret; + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), + .nbytes = qiov->size, + .ctx = s, + .ret = -EINPROGRESS, + .is_read = (type == QEMU_AIO_READ), + .qiov = qiov, + }; + + ret = laio_do_submit(fd, &laiocb, offset, type); + if (ret < 0) { + return ret; + } + + if (laiocb.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + return laiocb.ret; +} + +void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) +{ + aio_set_event_notifier(old_context, &s->e, false, NULL, NULL); + qemu_bh_delete(s->completion_bh); + s->aio_context = NULL; +} + +void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) +{ + s->aio_context = new_context; + s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); + aio_set_event_notifier(new_context, &s->e, false, + qemu_laio_completion_cb, + qemu_laio_poll_cb); +} + +LinuxAioState *laio_init(Error **errp) +{ + int rc; + LinuxAioState *s; + + s = g_malloc0(sizeof(*s)); + rc = event_notifier_init(&s->e, false); + if (rc < 0) { + error_setg_errno(errp, -rc, "failed to to initialize event notifier"); + goto out_free_state; + } + + rc = io_setup(MAX_EVENTS, &s->ctx); + if (rc < 0) { + error_setg_errno(errp, -rc, "failed to create linux AIO context"); + goto out_close_efd; + } + + ioq_init(&s->io_q); + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +} + +void laio_cleanup(LinuxAioState *s) +{ + event_notifier_cleanup(&s->e); + + if (io_destroy(s->ctx) != 0) { + fprintf(stderr, "%s: destroy AIO context %p failed\n", + __func__, &s->ctx); + } + g_free(s); +} diff --git a/block/meson.build b/block/meson.build new file mode 100644 index 000000000..5dcc1e5cc --- /dev/null +++ b/block/meson.build @@ -0,0 +1,126 @@ +block_ss.add(genh) +block_ss.add(files( + 'accounting.c', + 'aio_task.c', + 'amend.c', + 'backup.c', + 'backup-top.c', + 'blkdebug.c', + 'blklogwrites.c', + 'blkverify.c', + 'block-backend.c', + 'block-copy.c', + 'commit.c', + 'copy-on-read.c', + 'create.c', + 'crypto.c', + 'dirty-bitmap.c', + 'filter-compress.c', + 'io.c', + 'mirror.c', + 'nbd.c', + 'null.c', + 'qapi.c', + 'qcow2-bitmap.c', + 'qcow2-cache.c', + 'qcow2-cluster.c', + 'qcow2-refcount.c', + 'qcow2-snapshot.c', + 'qcow2-threads.c', + 'qcow2.c', + 'quorum.c', + 'raw-format.c', + 'snapshot.c', + 'throttle-groups.c', + 'throttle.c', + 'vhdx-endian.c', + 'vhdx-log.c', + 'vhdx.c', + 'vmdk.c', + 'vpc.c', + 'write-threshold.c', +), zstd, zlib) + +softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c')) + +block_ss.add(when: 'CONFIG_QCOW1', if_true: files('qcow.c')) +block_ss.add(when: 'CONFIG_VDI', if_true: files('vdi.c')) +block_ss.add(when: 'CONFIG_CLOOP', if_true: files('cloop.c')) +block_ss.add(when: 'CONFIG_BOCHS', if_true: files('bochs.c')) +block_ss.add(when: 'CONFIG_VVFAT', if_true: files('vvfat.c')) +block_ss.add(when: 'CONFIG_DMG', if_true: files('dmg.c')) +block_ss.add(when: 'CONFIG_QED', if_true: files( + 'qed-check.c', + 'qed-cluster.c', + 'qed-l2-cache.c', + 'qed-table.c', + 'qed.c', +)) +block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], if_true: files('parallels.c')) +block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')) +block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit]) +block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c')) +block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c')) +block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c')) +block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c')) +block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c')) +block_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('io_uring.c')) + +block_modules = {} + +modsrc = [] +foreach m : [ + ['CONFIG_CURL', 'curl', [curl, glib], 'curl.c'], + ['CONFIG_GLUSTERFS', 'gluster', glusterfs, 'gluster.c'], + ['CONFIG_LIBISCSI', 'iscsi', libiscsi, 'iscsi.c'], + ['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'], + ['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'], + ['CONFIG_RBD', 'rbd', rbd, 'rbd.c'], +] + if config_host.has_key(m[0]) + if enable_modules + modsrc += files(m[3]) + endif + module_ss = ss.source_set() + module_ss.add(when: m[2], if_true: files(m[3])) + block_modules += {m[1] : module_ss} + endif +endforeach + +# those are not exactly regular block modules, so treat them apart +if 'CONFIG_DMG' in config_host + foreach m : [ + ['CONFIG_LZFSE', 'dmg-lzfse', liblzfse, 'dmg-lzfse.c'], + ['CONFIG_BZIP2', 'dmg-bz2', [glib, libbzip2], 'dmg-bz2.c'] + ] + if config_host.has_key(m[0]) + module_ss = ss.source_set() + module_ss.add(when: m[2], if_true: files(m[3])) + block_modules += {m[1] : module_ss} + endif + endforeach +endif + +module_block_py = find_program('../scripts/modules/module_block.py') +module_block_h = custom_target('module_block.h', + output: 'module_block.h', + input: modsrc, + command: [module_block_py, '@OUTPUT0@', modsrc]) +block_ss.add(module_block_h) + +wrapper_py = find_program('../scripts/block-coroutine-wrapper.py') +block_gen_c = custom_target('block-gen.c', + output: 'block-gen.c', + input: files('../include/block/block.h', + 'coroutines.h'), + command: [wrapper_py, '@OUTPUT@', '@INPUT@']) +block_ss.add(block_gen_c) + +block_ss.add(files('stream.c')) + +softmmu_ss.add(files('qapi-sysemu.c')) + +subdir('export') +subdir('monitor') + +modules += {'block': block_modules} diff --git a/block/mirror.c b/block/mirror.c new file mode 100644 index 000000000..8e1ad6ece --- /dev/null +++ b/block/mirror.c @@ -0,0 +1,1888 @@ +/* + * Image mirroring + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/coroutine.h" +#include "qemu/range.h" +#include "trace.h" +#include "block/blockjob_int.h" +#include "block/block_int.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/ratelimit.h" +#include "qemu/bitmap.h" + +#define MAX_IN_FLIGHT 16 +#define MAX_IO_BYTES (1 << 20) /* 1 Mb */ +#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES) + +/* The mirroring buffer is a list of granularity-sized chunks. + * Free chunks are organized in a list. + */ +typedef struct MirrorBuffer { + QSIMPLEQ_ENTRY(MirrorBuffer) next; +} MirrorBuffer; + +typedef struct MirrorOp MirrorOp; + +typedef struct MirrorBlockJob { + BlockJob common; + BlockBackend *target; + BlockDriverState *mirror_top_bs; + BlockDriverState *base; + BlockDriverState *base_overlay; + + /* The name of the graph node to replace */ + char *replaces; + /* The BDS to replace */ + BlockDriverState *to_replace; + /* Used to block operations on the drive-mirror-replace target */ + Error *replace_blocker; + bool is_none_mode; + BlockMirrorBackingMode backing_mode; + /* Whether the target image requires explicit zero-initialization */ + bool zero_target; + MirrorCopyMode copy_mode; + BlockdevOnError on_source_error, on_target_error; + bool synced; + /* Set when the target is synced (dirty bitmap is clean, nothing + * in flight) and the job is running in active mode */ + bool actively_synced; + bool should_complete; + int64_t granularity; + size_t buf_size; + int64_t bdev_length; + unsigned long *cow_bitmap; + BdrvDirtyBitmap *dirty_bitmap; + BdrvDirtyBitmapIter *dbi; + uint8_t *buf; + QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; + int buf_free_count; + + uint64_t last_pause_ns; + unsigned long *in_flight_bitmap; + int in_flight; + int64_t bytes_in_flight; + QTAILQ_HEAD(, MirrorOp) ops_in_flight; + int ret; + bool unmap; + int target_cluster_size; + int max_iov; + bool initial_zeroing_ongoing; + int in_active_write_counter; + bool prepared; + bool in_drain; +} MirrorBlockJob; + +typedef struct MirrorBDSOpaque { + MirrorBlockJob *job; + bool stop; +} MirrorBDSOpaque; + +struct MirrorOp { + MirrorBlockJob *s; + QEMUIOVector qiov; + int64_t offset; + uint64_t bytes; + + /* The pointee is set by mirror_co_read(), mirror_co_zero(), and + * mirror_co_discard() before yielding for the first time */ + int64_t *bytes_handled; + + bool is_pseudo_op; + bool is_active_write; + bool is_in_flight; + CoQueue waiting_requests; + Coroutine *co; + + QTAILQ_ENTRY(MirrorOp) next; +}; + +typedef enum MirrorMethod { + MIRROR_METHOD_COPY, + MIRROR_METHOD_ZERO, + MIRROR_METHOD_DISCARD, +} MirrorMethod; + +static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, + int error) +{ + s->synced = false; + s->actively_synced = false; + if (read) { + return block_job_error_action(&s->common, s->on_source_error, + true, error); + } else { + return block_job_error_action(&s->common, s->on_target_error, + false, error); + } +} + +static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, + MirrorBlockJob *s, + uint64_t offset, + uint64_t bytes) +{ + uint64_t self_start_chunk = offset / s->granularity; + uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); + uint64_t self_nb_chunks = self_end_chunk - self_start_chunk; + + while (find_next_bit(s->in_flight_bitmap, self_end_chunk, + self_start_chunk) < self_end_chunk && + s->ret >= 0) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { + uint64_t op_start_chunk = op->offset / s->granularity; + uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes, + s->granularity) - + op_start_chunk; + + if (op == self) { + continue; + } + + if (ranges_overlap(self_start_chunk, self_nb_chunks, + op_start_chunk, op_nb_chunks)) + { + qemu_co_queue_wait(&op->waiting_requests, NULL); + break; + } + } + } +} + +static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) +{ + MirrorBlockJob *s = op->s; + struct iovec *iov; + int64_t chunk_num; + int i, nb_chunks; + + trace_mirror_iteration_done(s, op->offset, op->bytes, ret); + + s->in_flight--; + s->bytes_in_flight -= op->bytes; + iov = op->qiov.iov; + for (i = 0; i < op->qiov.niov; i++) { + MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next); + s->buf_free_count++; + } + + chunk_num = op->offset / s->granularity; + nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); + + bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); + QTAILQ_REMOVE(&s->ops_in_flight, op, next); + if (ret >= 0) { + if (s->cow_bitmap) { + bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); + } + if (!s->initial_zeroing_ongoing) { + job_progress_update(&s->common.job, op->bytes); + } + } + qemu_iovec_destroy(&op->qiov); + + qemu_co_queue_restart_all(&op->waiting_requests); + g_free(op); +} + +static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret) +{ + MirrorBlockJob *s = op->s; + + if (ret < 0) { + BlockErrorAction action; + + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); + action = mirror_error_action(s, false, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { + s->ret = ret; + } + } + + mirror_iteration_done(op, ret); +} + +static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret) +{ + MirrorBlockJob *s = op->s; + + if (ret < 0) { + BlockErrorAction action; + + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); + action = mirror_error_action(s, true, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { + s->ret = ret; + } + + mirror_iteration_done(op, ret); + return; + } + + ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0); + mirror_write_complete(op, ret); +} + +/* Clip bytes relative to offset to not exceed end-of-file */ +static inline int64_t mirror_clip_bytes(MirrorBlockJob *s, + int64_t offset, + int64_t bytes) +{ + return MIN(bytes, s->bdev_length - offset); +} + +/* Round offset and/or bytes to target cluster if COW is needed, and + * return the offset of the adjusted tail against original. */ +static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + uint64_t *bytes) +{ + bool need_cow; + int ret = 0; + int64_t align_offset = *offset; + int64_t align_bytes = *bytes; + int max_bytes = s->granularity * s->max_iov; + + need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap); + need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, + s->cow_bitmap); + if (need_cow) { + bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes, + &align_offset, &align_bytes); + } + + if (align_bytes > max_bytes) { + align_bytes = max_bytes; + if (need_cow) { + align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size); + } + } + /* Clipping may result in align_bytes unaligned to chunk boundary, but + * that doesn't matter because it's already the end of source image. */ + align_bytes = mirror_clip_bytes(s, align_offset, align_bytes); + + ret = align_offset + align_bytes - (*offset + *bytes); + *offset = align_offset; + *bytes = align_bytes; + assert(ret >= 0); + return ret; +} + +static inline void coroutine_fn +mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) +{ + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ + if (!op->is_pseudo_op && op->is_in_flight && + op->is_active_write == active) + { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } + } + abort(); +} + +static inline void coroutine_fn +mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +{ + /* Only non-active operations use up in-flight slots */ + mirror_wait_for_any_operation(s, false); +} + +/* Perform a mirror copy operation. + * + * *op->bytes_handled is set to the number of bytes copied after and + * including offset, excluding any bytes copied prior to offset due + * to alignment. This will be op->bytes if no alignment is necessary, + * or (new_end - op->offset) if the tail is rounded up or down due to + * alignment or buffer limit. + */ +static void coroutine_fn mirror_co_read(void *opaque) +{ + MirrorOp *op = opaque; + MirrorBlockJob *s = op->s; + int nb_chunks; + uint64_t ret; + uint64_t max_bytes; + + max_bytes = s->granularity * s->max_iov; + + /* We can only handle as much as buf_size at a time. */ + op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes)); + assert(op->bytes); + assert(op->bytes < BDRV_REQUEST_MAX_BYTES); + *op->bytes_handled = op->bytes; + + if (s->cow_bitmap) { + *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes); + } + /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */ + assert(*op->bytes_handled <= UINT_MAX); + assert(op->bytes <= s->buf_size); + /* The offset is granularity-aligned because: + * 1) Caller passes in aligned values; + * 2) mirror_cow_align is used only when target cluster is larger. */ + assert(QEMU_IS_ALIGNED(op->offset, s->granularity)); + /* The range is sector-aligned, since bdrv_getlength() rounds up. */ + assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE)); + nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); + mirror_wait_for_free_in_flight_slot(s); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks + * from s->buf_free. + */ + qemu_iovec_init(&op->qiov, nb_chunks); + while (nb_chunks-- > 0) { + MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); + size_t remaining = op->bytes - op->qiov.size; + + QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); + s->buf_free_count--; + qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); + } + + /* Copy the dirty cluster. */ + s->in_flight++; + s->bytes_in_flight += op->bytes; + op->is_in_flight = true; + trace_mirror_one_iteration(s, op->offset, op->bytes); + + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, + &op->qiov, 0); + mirror_read_complete(op, ret); +} + +static void coroutine_fn mirror_co_zero(void *opaque) +{ + MirrorOp *op = opaque; + int ret; + + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; + op->is_in_flight = true; + + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + mirror_write_complete(op, ret); +} + +static void coroutine_fn mirror_co_discard(void *opaque) +{ + MirrorOp *op = opaque; + int ret; + + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; + op->is_in_flight = true; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +} + +static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + unsigned bytes, MirrorMethod mirror_method) +{ + MirrorOp *op; + Coroutine *co; + int64_t bytes_handled = -1; + + op = g_new(MirrorOp, 1); + *op = (MirrorOp){ + .s = s, + .offset = offset, + .bytes = bytes, + .bytes_handled = &bytes_handled, + }; + qemu_co_queue_init(&op->waiting_requests); + + switch (mirror_method) { + case MIRROR_METHOD_COPY: + co = qemu_coroutine_create(mirror_co_read, op); + break; + case MIRROR_METHOD_ZERO: + co = qemu_coroutine_create(mirror_co_zero, op); + break; + case MIRROR_METHOD_DISCARD: + co = qemu_coroutine_create(mirror_co_discard, op); + break; + default: + abort(); + } + op->co = co; + + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + qemu_coroutine_enter(co); + /* At this point, ownership of op has been moved to the coroutine + * and the object may already be freed */ + + /* Assert that this value has been set */ + assert(bytes_handled >= 0); + + /* Same assertion as in mirror_co_read() (and for mirror_co_read() + * and mirror_co_discard(), bytes_handled == op->bytes, which + * is the @bytes parameter given to this function) */ + assert(bytes_handled <= UINT_MAX); + return bytes_handled; +} + +static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->mirror_top_bs->backing->bs; + MirrorOp *pseudo_op; + int64_t offset; + uint64_t delay_ns = 0, ret = 0; + /* At least the first dirty chunk is mirrored in one iteration. */ + int nb_chunks = 1; + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); + int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); + + bdrv_dirty_bitmap_lock(s->dirty_bitmap); + offset = bdrv_dirty_iter_next(s->dbi); + if (offset < 0) { + bdrv_set_dirty_iter(s->dbi, 0); + offset = bdrv_dirty_iter_next(s->dbi); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); + assert(offset >= 0); + } + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + + mirror_wait_on_conflicts(NULL, s, offset, 1); + + job_pause_point(&s->common.job); + + /* Find the number of consective dirty chunks following the first dirty + * one, and wait for in flight requests in them. */ + bdrv_dirty_bitmap_lock(s->dirty_bitmap); + while (nb_chunks * s->granularity < s->buf_size) { + int64_t next_dirty; + int64_t next_offset = offset + nb_chunks * s->granularity; + int64_t next_chunk = next_offset / s->granularity; + if (next_offset >= s->bdev_length || + !bdrv_dirty_bitmap_get_locked(s->dirty_bitmap, next_offset)) { + break; + } + if (test_bit(next_chunk, s->in_flight_bitmap)) { + break; + } + + next_dirty = bdrv_dirty_iter_next(s->dbi); + if (next_dirty > next_offset || next_dirty < 0) { + /* The bitmap iterator's cache is stale, refresh it */ + bdrv_set_dirty_iter(s->dbi, next_offset); + next_dirty = bdrv_dirty_iter_next(s->dbi); + } + assert(next_dirty == next_offset); + nb_chunks++; + } + + /* Clear dirty bits before querying the block status, because + * calling bdrv_block_status_above could yield - if some blocks are + * marked dirty in this window, we need to know. + */ + bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, + nb_chunks * s->granularity); + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + + /* Before claiming an area in the in-flight bitmap, we have to + * create a MirrorOp for it so that conflicting requests can wait + * for it. mirror_perform() will create the real MirrorOps later, + * for now we just create a pseudo operation that will wake up all + * conflicting requests once all real operations have been + * launched. */ + pseudo_op = g_new(MirrorOp, 1); + *pseudo_op = (MirrorOp){ + .offset = offset, + .bytes = nb_chunks * s->granularity, + .is_pseudo_op = true, + }; + qemu_co_queue_init(&pseudo_op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next); + + bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); + while (nb_chunks > 0 && offset < s->bdev_length) { + int ret; + int64_t io_bytes; + int64_t io_bytes_acct; + MirrorMethod mirror_method = MIRROR_METHOD_COPY; + + assert(!(offset % s->granularity)); + ret = bdrv_block_status_above(source, NULL, offset, + nb_chunks * s->granularity, + &io_bytes, NULL, NULL); + if (ret < 0) { + io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); + } else if (ret & BDRV_BLOCK_DATA) { + io_bytes = MIN(io_bytes, max_io_bytes); + } + + io_bytes -= io_bytes % s->granularity; + if (io_bytes < s->granularity) { + io_bytes = s->granularity; + } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { + int64_t target_offset; + int64_t target_bytes; + bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, + &target_offset, &target_bytes); + if (target_offset == offset && + target_bytes == io_bytes) { + mirror_method = ret & BDRV_BLOCK_ZERO ? + MIRROR_METHOD_ZERO : + MIRROR_METHOD_DISCARD; + } + } + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); + mirror_wait_for_free_in_flight_slot(s); + } + + if (s->ret < 0) { + ret = 0; + goto fail; + } + + io_bytes = mirror_clip_bytes(s, offset, io_bytes); + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); + if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { + io_bytes_acct = 0; + } else { + io_bytes_acct = io_bytes; + } + assert(io_bytes); + offset += io_bytes; + nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); + delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); + } + + ret = delay_ns; +fail: + QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); + qemu_co_queue_restart_all(&pseudo_op->waiting_requests); + g_free(pseudo_op); + + return ret; +} + +static void mirror_free_init(MirrorBlockJob *s) +{ + int granularity = s->granularity; + size_t buf_size = s->buf_size; + uint8_t *buf = s->buf; + + assert(s->buf_free_count == 0); + QSIMPLEQ_INIT(&s->buf_free); + while (buf_size != 0) { + MirrorBuffer *cur = (MirrorBuffer *)buf; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next); + s->buf_free_count++; + buf_size -= granularity; + buf += granularity; + } +} + +/* This is also used for the .pause callback. There is no matching + * mirror_resume() because mirror_run() will begin iterating again + * when the job is resumed. + */ +static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) +{ + while (s->in_flight > 0) { + mirror_wait_for_free_in_flight_slot(s); + } +} + +/** + * mirror_exit_common: handle both abort() and prepare() cases. + * for .prepare, returns 0 on success and -errno on failure. + * for .abort cases, denoted by abort = true, MUST return 0. + */ +static int mirror_exit_common(Job *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockJob *bjob = &s->common; + MirrorBDSOpaque *bs_opaque; + AioContext *replace_aio_context = NULL; + BlockDriverState *src; + BlockDriverState *target_bs; + BlockDriverState *mirror_top_bs; + Error *local_err = NULL; + bool abort = job->ret < 0; + int ret = 0; + + if (s->prepared) { + return 0; + } + s->prepared = true; + + mirror_top_bs = s->mirror_top_bs; + bs_opaque = mirror_top_bs->opaque; + src = mirror_top_bs->backing->bs; + target_bs = blk_bs(s->target); + + if (bdrv_chain_contains(src, target_bs)) { + bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs); + } + + bdrv_release_dirty_bitmap(s->dirty_bitmap); + + /* Make sure that the source BDS doesn't go away during bdrv_replace_node, + * before we can call bdrv_drained_end */ + bdrv_ref(src); + bdrv_ref(mirror_top_bs); + bdrv_ref(target_bs); + + /* + * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before + * inserting target_bs at s->to_replace, where we might not be able to get + * these permissions. + */ + blk_unref(s->target); + s->target = NULL; + + /* We don't access the source any more. Dropping any WRITE/RESIZE is + * required before it could become a backing file of target_bs. Not having + * these permissions any more means that we can't allow any new requests on + * mirror_top_bs from now on, so keep it drained. */ + bdrv_drained_begin(mirror_top_bs); + bs_opaque->stop = true; + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); + if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); + + if (bdrv_cow_bs(unfiltered_target) != backing) { + bdrv_set_backing_hd(unfiltered_target, backing, &local_err); + if (local_err) { + error_report_err(local_err); + local_err = NULL; + ret = -EPERM; + } + } + } + + if (s->to_replace) { + replace_aio_context = bdrv_get_aio_context(s->to_replace); + aio_context_acquire(replace_aio_context); + } + + if (s->should_complete && !abort) { + BlockDriverState *to_replace = s->to_replace ?: src; + bool ro = bdrv_is_read_only(to_replace); + + if (ro != bdrv_is_read_only(target_bs)) { + bdrv_reopen_set_read_only(target_bs, ro, NULL); + } + + /* The mirror job has no requests in flight any more, but we need to + * drain potential other users of the BDS before changing the graph. */ + assert(s->in_drain); + bdrv_drained_begin(target_bs); + /* + * Cannot use check_to_replace_node() here, because that would + * check for an op blocker on @to_replace, and we have our own + * there. + */ + if (bdrv_recurse_can_replace(src, to_replace)) { + bdrv_replace_node(to_replace, target_bs, &local_err); + } else { + error_setg(&local_err, "Can no longer replace '%s' by '%s', " + "because it can no longer be guaranteed that doing so " + "would not lead to an abrupt change of visible data", + to_replace->node_name, target_bs->node_name); + } + bdrv_drained_end(target_bs); + if (local_err) { + error_report_err(local_err); + ret = -EPERM; + } + } + if (s->to_replace) { + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } + if (replace_aio_context) { + aio_context_release(replace_aio_context); + } + g_free(s->replaces); + bdrv_unref(target_bs); + + /* + * Remove the mirror filter driver from the graph. Before this, get rid of + * the blockers on the intermediate nodes so that the resulting state is + * valid. + */ + block_job_remove_all_bdrv(bjob); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); + + /* We just changed the BDS the job BB refers to (with either or both of the + * bdrv_replace_node() calls), so switch the BB back so the cleanup does + * the right thing. We don't need any permissions any more now. */ + blk_remove_bs(bjob->blk); + blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); + + bs_opaque->job = NULL; + + bdrv_drained_end(src); + bdrv_drained_end(mirror_top_bs); + s->in_drain = false; + bdrv_unref(mirror_top_bs); + bdrv_unref(src); + + return ret; +} + +static int mirror_prepare(Job *job) +{ + return mirror_exit_common(job); +} + +static void mirror_abort(Job *job) +{ + int ret = mirror_exit_common(job); + assert(ret == 0); +} + +static void coroutine_fn mirror_throttle(MirrorBlockJob *s) +{ + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) { + s->last_pause_ns = now; + job_sleep_ns(&s->common.job, 0); + } else { + job_pause_point(&s->common.job); + } +} + +static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) +{ + int64_t offset; + BlockDriverState *bs = s->mirror_top_bs->backing->bs; + BlockDriverState *target_bs = blk_bs(s->target); + int ret; + int64_t count; + + if (s->zero_target) { + if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); + return 0; + } + + s->initial_zeroing_ongoing = true; + for (offset = 0; offset < s->bdev_length; ) { + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); + + mirror_throttle(s); + + if (job_is_cancelled(&s->common.job)) { + s->initial_zeroing_ongoing = false; + return 0; + } + + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); + mirror_wait_for_free_in_flight_slot(s); + continue; + } + + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); + offset += bytes; + } + + mirror_wait_for_all_io(s); + s->initial_zeroing_ongoing = false; + } + + /* First part, loop on the sectors and initialize the dirty bitmap. */ + for (offset = 0; offset < s->bdev_length; ) { + /* Just to make sure we are not exceeding int limit. */ + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); + + mirror_throttle(s); + + if (job_is_cancelled(&s->common.job)) { + return 0; + } + + ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes, + &count); + if (ret < 0) { + return ret; + } + + assert(count); + if (ret > 0) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); + } + offset += count; + } + return 0; +} + +/* Called when going out of the streaming phase to flush the bulk of the + * data to the medium, or just before completing. + */ +static int mirror_flush(MirrorBlockJob *s) +{ + int ret = blk_flush(s->target); + if (ret < 0) { + if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { + s->ret = ret; + } + } + return ret; +} + +static int coroutine_fn mirror_run(Job *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockDriverState *bs = s->mirror_top_bs->backing->bs; + BlockDriverState *target_bs = blk_bs(s->target); + bool need_drain = true; + int64_t length; + int64_t target_length; + BlockDriverInfo bdi; + char backing_filename[2]; /* we only need 2 characters because we are only + checking for a NULL string */ + int ret = 0; + + if (job_is_cancelled(&s->common.job)) { + goto immediate_exit; + } + + s->bdev_length = bdrv_getlength(bs); + if (s->bdev_length < 0) { + ret = s->bdev_length; + goto immediate_exit; + } + + target_length = blk_getlength(s->target); + if (target_length < 0) { + ret = target_length; + goto immediate_exit; + } + + /* Active commit must resize the base image if its size differs from the + * active layer. */ + if (s->base == blk_bs(s->target)) { + if (s->bdev_length > target_length) { + ret = blk_truncate(s->target, s->bdev_length, false, + PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + goto immediate_exit; + } + } + } else if (s->bdev_length != target_length) { + error_setg(errp, "Source and target image have different sizes"); + ret = -EINVAL; + goto immediate_exit; + } + + if (s->bdev_length == 0) { + /* Transition to the READY state and wait for complete. */ + job_transition_to_ready(&s->common.job); + s->synced = true; + s->actively_synced = true; + while (!job_is_cancelled(&s->common.job) && !s->should_complete) { + job_yield(&s->common.job); + } + s->common.job.cancelled = false; + goto immediate_exit; + } + + length = DIV_ROUND_UP(s->bdev_length, s->granularity); + s->in_flight_bitmap = bitmap_new(length); + + /* If we have no backing file yet in the destination, we cannot let + * the destination do COW. Instead, we copy sectors around the + * dirty data if needed. We need a bitmap to do that. + */ + bdrv_get_backing_filename(target_bs, backing_filename, + sizeof(backing_filename)); + if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) { + s->target_cluster_size = bdi.cluster_size; + } else { + s->target_cluster_size = BDRV_SECTOR_SIZE; + } + if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) && + s->granularity < s->target_cluster_size) { + s->buf_size = MAX(s->buf_size, s->target_cluster_size); + s->cow_bitmap = bitmap_new(length); + } + s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); + + s->buf = qemu_try_blockalign(bs, s->buf_size); + if (s->buf == NULL) { + ret = -ENOMEM; + goto immediate_exit; + } + + mirror_free_init(s); + + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + if (!s->is_none_mode) { + ret = mirror_dirty_init(s); + if (ret < 0 || job_is_cancelled(&s->common.job)) { + goto immediate_exit; + } + } + + assert(!s->dbi); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); + for (;;) { + uint64_t delay_ns = 0; + int64_t cnt, delta; + bool should_complete; + + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { + mirror_wait_for_any_operation(s, true); + } + + if (s->ret < 0) { + ret = s->ret; + goto immediate_exit; + } + + job_pause_point(&s->common.job); + + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is + * the number of bytes currently being processed; together those are + * the current remaining operation length */ + job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt); + + /* Note that even when no rate limit is applied we need to yield + * periodically with no pending I/O so that bdrv_drain_all() returns. + * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is + * an error, or when the source is clean, whichever comes first. */ + delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; + if (delta < BLOCK_JOB_SLICE_TIME && + s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); + mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); + } + } + + should_complete = false; + if (s->in_flight == 0 && cnt == 0) { + trace_mirror_before_flush(s); + if (!s->synced) { + if (mirror_flush(s) < 0) { + /* Go check s->ret. */ + continue; + } + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + job_transition_to_ready(&s->common.job); + s->synced = true; + if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) { + s->actively_synced = true; + } + } + + should_complete = s->should_complete || + job_is_cancelled(&s->common.job); + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + } + + if (cnt == 0 && should_complete) { + /* The dirty bitmap is not updated while operations are pending. + * If we're about to exit, wait for pending operations before + * calling bdrv_get_dirty_count(bs), or we may exit while the + * source has dirty data to copy! + * + * Note that I/O can be submitted by the guest while + * mirror_populate runs, so pause it now. Before deciding + * whether to switch to target check one last time if I/O has + * come in the meanwhile, and if not flush the data to disk. + */ + trace_mirror_before_drain(s, cnt); + + s->in_drain = true; + bdrv_drained_begin(bs); + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + if (cnt > 0 || mirror_flush(s) < 0) { + bdrv_drained_end(bs); + s->in_drain = false; + continue; + } + + /* The two disks are in sync. Exit and report successful + * completion. + */ + assert(QLIST_EMPTY(&bs->tracked_requests)); + s->common.job.cancelled = false; + need_drain = false; + break; + } + + ret = 0; + + if (s->synced && !should_complete) { + delay_ns = (s->in_flight == 0 && + cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); + } + trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); + job_sleep_ns(&s->common.job, delay_ns); + if (job_is_cancelled(&s->common.job) && + (!s->synced || s->common.job.force_cancel)) + { + break; + } + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + } + +immediate_exit: + if (s->in_flight > 0) { + /* We get here only if something went wrong. Either the job failed, + * or it was cancelled prematurely so that we do not guarantee that + * the target is a copy of the source. + */ + assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) && + job_is_cancelled(&s->common.job))); + assert(need_drain); + mirror_wait_for_all_io(s); + } + + assert(s->in_flight == 0); + qemu_vfree(s->buf); + g_free(s->cow_bitmap); + g_free(s->in_flight_bitmap); + bdrv_dirty_iter_free(s->dbi); + + if (need_drain) { + s->in_drain = true; + bdrv_drained_begin(bs); + } + + return ret; +} + +static void mirror_complete(Job *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockDriverState *target; + + target = blk_bs(s->target); + + if (!s->synced) { + error_setg(errp, "The active block job '%s' cannot be completed", + job->id); + return; + } + + if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { + int ret; + + assert(!bdrv_backing_chain_next(target)); + ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL, + "backing", errp); + if (ret < 0) { + return; + } + } + + /* block all operations on to_replace bs */ + if (s->replaces) { + AioContext *replace_aio_context; + + s->to_replace = bdrv_find_node(s->replaces); + if (!s->to_replace) { + error_setg(errp, "Node name '%s' not found", s->replaces); + return; + } + + replace_aio_context = bdrv_get_aio_context(s->to_replace); + aio_context_acquire(replace_aio_context); + + /* TODO Translate this into permission system. Current definition of + * GRAPH_MOD would require to request it for the parents; they might + * not even be BlockDriverStates, however, so a BdrvChild can't address + * them. May need redefinition of GRAPH_MOD. */ + error_setg(&s->replace_blocker, + "block device is in use by block-job-complete"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + bdrv_ref(s->to_replace); + + aio_context_release(replace_aio_context); + } + + s->should_complete = true; + job_enter(job); +} + +static void coroutine_fn mirror_pause(Job *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + + mirror_wait_for_all_io(s); +} + +static bool mirror_drained_poll(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + /* If the job isn't paused nor cancelled, we can't be sure that it won't + * issue more requests. We make an exception if we've reached this point + * from one of our own drain sections, to avoid a deadlock waiting for + * ourselves. + */ + if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) { + return true; + } + + return !!s->in_flight; +} + +static const BlockJobDriver mirror_job_driver = { + .job_driver = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = JOB_TYPE_MIRROR, + .free = block_job_free, + .user_resume = block_job_user_resume, + .run = mirror_run, + .prepare = mirror_prepare, + .abort = mirror_abort, + .pause = mirror_pause, + .complete = mirror_complete, + }, + .drained_poll = mirror_drained_poll, +}; + +static const BlockJobDriver commit_active_job_driver = { + .job_driver = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = JOB_TYPE_COMMIT, + .free = block_job_free, + .user_resume = block_job_user_resume, + .run = mirror_run, + .prepare = mirror_prepare, + .abort = mirror_abort, + .pause = mirror_pause, + .complete = mirror_complete, + }, + .drained_poll = mirror_drained_poll, +}; + +static void coroutine_fn +do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret; + size_t qiov_offset = 0; + int64_t bitmap_offset, bitmap_end; + + if (!QEMU_IS_ALIGNED(offset, job->granularity) && + bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) + { + /* + * Dirty unaligned padding: ignore it. + * + * Reasoning: + * 1. If we copy it, we can't reset corresponding bit in + * dirty_bitmap as there may be some "dirty" bytes still not + * copied. + * 2. It's already dirty, so skipping it we don't diverge mirror + * progress. + * + * Note, that because of this, guest write may have no contribution + * into mirror converge, but that's not bad, as we have background + * process of mirroring. If under some bad circumstances (high guest + * IO load) background process starve, we will not converge anyway, + * even if each write will contribute, as guest is not guaranteed to + * rewrite the whole disk. + */ + qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset; + if (bytes <= qiov_offset) { + /* nothing to do after shrink */ + return; + } + offset += qiov_offset; + bytes -= qiov_offset; + } + + if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) && + bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1)) + { + uint64_t tail = (offset + bytes) % job->granularity; + + if (bytes <= tail) { + /* nothing to do after shrink */ + return; + } + bytes -= tail; + } + + /* + * Tails are either clean or shrunk, so for bitmap resetting + * we safely align the range down. + */ + bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); + bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); + if (bitmap_offset < bitmap_end) { + bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, + bitmap_end - bitmap_offset); + } + + job_progress_increase_remaining(&job->common.job, bytes); + + switch (method) { + case MIRROR_METHOD_COPY: + ret = blk_co_pwritev_part(job->target, offset, bytes, + qiov, qiov_offset, flags); + break; + + case MIRROR_METHOD_ZERO: + assert(!qiov); + ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); + break; + + case MIRROR_METHOD_DISCARD: + assert(!qiov); + ret = blk_co_pdiscard(job->target, offset, bytes); + break; + + default: + abort(); + } + + if (ret >= 0) { + job_progress_update(&job->common.job, bytes); + } else { + BlockErrorAction action; + + /* + * We failed, so we should mark dirty the whole area, aligned up. + * Note that we don't care about shrunk tails if any: they were dirty + * at function start, and they must be still dirty, as we've locked + * the region for in-flight op. + */ + bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); + bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); + bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, + bitmap_end - bitmap_offset); + job->actively_synced = false; + + action = mirror_error_action(job, false, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + if (!job->ret) { + job->ret = ret; + } + } + } +} + +static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + uint64_t offset, + uint64_t bytes) +{ + MirrorOp *op; + uint64_t start_chunk = offset / s->granularity; + uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); + + op = g_new(MirrorOp, 1); + *op = (MirrorOp){ + .s = s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, + .is_in_flight = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + + s->in_active_write_counter++; + + mirror_wait_on_conflicts(op, s, offset, bytes); + + bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); + + return op; +} + +static void coroutine_fn active_write_settle(MirrorOp *op) +{ + uint64_t start_chunk = op->offset / op->s->granularity; + uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes, + op->s->granularity); + + if (!--op->s->in_active_write_counter && op->s->actively_synced) { + BdrvChild *source = op->s->mirror_top_bs->backing; + + if (QLIST_FIRST(&source->bs->parents) == source && + QLIST_NEXT(source, next_parent) == NULL) + { + /* Assert that we are back in sync once all active write + * operations are settled. + * Note that we can only assert this if the mirror node + * is the source node's only parent. */ + assert(!bdrv_get_dirty_count(op->s->dirty_bitmap)); + } + } + bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); + QTAILQ_REMOVE(&op->s->ops_in_flight, op, next); + qemu_co_queue_restart_all(&op->waiting_requests); + g_free(op); +} + +static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, + MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + MirrorOp *op = NULL; + MirrorBDSOpaque *s = bs->opaque; + int ret = 0; + bool copy_to_target; + + copy_to_target = s->job->ret >= 0 && + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; + + if (copy_to_target) { + op = active_write_prepare(s->job, offset, bytes); + } + + switch (method) { + case MIRROR_METHOD_COPY: + ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); + break; + + case MIRROR_METHOD_ZERO: + ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); + break; + + case MIRROR_METHOD_DISCARD: + ret = bdrv_co_pdiscard(bs->backing, offset, bytes); + break; + + default: + abort(); + } + + if (ret < 0) { + goto out; + } + + if (copy_to_target) { + do_sync_target_write(s->job, method, offset, bytes, qiov, flags); + } + +out: + if (copy_to_target) { + active_write_settle(op); + } + return ret; +} + +static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + MirrorBDSOpaque *s = bs->opaque; + QEMUIOVector bounce_qiov; + void *bounce_buf; + int ret = 0; + bool copy_to_target; + + copy_to_target = s->job->ret >= 0 && + s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; + + if (copy_to_target) { + /* The guest might concurrently modify the data to write; but + * the data on source and destination must match, so we have + * to use a bounce buffer if we are going to write to the + * target now. */ + bounce_buf = qemu_blockalign(bs, bytes); + iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes); + + qemu_iovec_init(&bounce_qiov, 1); + qemu_iovec_add(&bounce_qiov, bounce_buf, bytes); + qiov = &bounce_qiov; + } + + ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov, + flags); + + if (copy_to_target) { + qemu_iovec_destroy(&bounce_qiov); + qemu_vfree(bounce_buf); + } + + return ret; +} + +static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) +{ + if (bs->backing == NULL) { + /* we can be here after failed bdrv_append in mirror_start_job */ + return 0; + } + return bdrv_co_flush(bs->backing->bs); +} + +static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL, + flags); +} + +static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes, + NULL, 0); +} + +static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs) +{ + if (bs->backing == NULL) { + /* we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd */ + return; + } + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->backing->bs->filename); +} + +static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + MirrorBDSOpaque *s = bs->opaque; + + if (s->stop) { + /* + * If the job is to be stopped, we do not need to forward + * anything to the real image. + */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + return; + } + + /* Must be able to forward guest writes to the real image */ + *nperm = 0; + if (perm & BLK_PERM_WRITE) { + *nperm |= BLK_PERM_WRITE; + } + + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_mirror_top = { + .format_name = "mirror_top", + .bdrv_co_preadv = bdrv_mirror_top_preadv, + .bdrv_co_pwritev = bdrv_mirror_top_pwritev, + .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, + .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, + .bdrv_co_flush = bdrv_mirror_top_flush, + .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, + .bdrv_child_perm = bdrv_mirror_top_child_perm, + + .is_filter = true, +}; + +static BlockJob *mirror_start_job( + const char *job_id, BlockDriverState *bs, + int creation_flags, BlockDriverState *target, + const char *replaces, int64_t speed, + uint32_t granularity, int64_t buf_size, + BlockMirrorBackingMode backing_mode, + bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, + BlockCompletionFunc *cb, + void *opaque, + const BlockJobDriver *driver, + bool is_none_mode, BlockDriverState *base, + bool auto_complete, const char *filter_node_name, + bool is_mirror, MirrorCopyMode copy_mode, + Error **errp) +{ + MirrorBlockJob *s; + MirrorBDSOpaque *bs_opaque; + BlockDriverState *mirror_top_bs; + bool target_is_backing; + uint64_t target_perms, target_shared_perms; + Error *local_err = NULL; + int ret; + + if (granularity == 0) { + granularity = bdrv_get_default_bitmap_granularity(target); + } + + assert(is_power_of_2(granularity)); + + if (buf_size < 0) { + error_setg(errp, "Invalid parameter 'buf-size'"); + return NULL; + } + + if (buf_size == 0) { + buf_size = DEFAULT_MIRROR_BUF_SIZE; + } + + if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) { + error_setg(errp, "Can't mirror node into itself"); + return NULL; + } + + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, + BDRV_O_RDWR, errp); + if (mirror_top_bs == NULL) { + return NULL; + } + if (!filter_node_name) { + mirror_top_bs->implicit = true; + } + + /* So that we can always drop this node */ + mirror_top_bs->never_freeze = true; + + mirror_top_bs->total_sectors = bs->total_sectors; + mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + BDRV_REQ_NO_FALLBACK; + bs_opaque = g_new0(MirrorBDSOpaque, 1); + mirror_top_bs->opaque = bs_opaque; + + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep + * it alive until block_job_create() succeeds even if bs has no parent. */ + bdrv_ref(mirror_top_bs); + bdrv_drained_begin(bs); + bdrv_append(mirror_top_bs, bs, &local_err); + bdrv_drained_end(bs); + + if (local_err) { + bdrv_unref(mirror_top_bs); + error_propagate(errp, local_err); + return NULL; + } + + /* Make sure that the source is not resized while the job is running */ + s = block_job_create(job_id, driver, NULL, mirror_top_bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + creation_flags, cb, opaque, errp); + if (!s) { + goto fail; + } + bs_opaque->job = s; + + /* The block job now has a reference to this node */ + bdrv_unref(mirror_top_bs); + + s->mirror_top_bs = mirror_top_bs; + + /* No resize for the target either; while the mirror is still running, a + * consistent read isn't necessarily possible. We could possibly allow + * writes and graph modifications, though it would likely defeat the + * purpose of a mirror, so leave them blocked for now. + * + * In the case of active commit, things look a bit different, though, + * because the target is an already populated backing file in active use. + * We can allow anything except resize there.*/ + + target_perms = BLK_PERM_WRITE; + target_shared_perms = BLK_PERM_WRITE_UNCHANGED; + + target_is_backing = bdrv_chain_contains(bs, target); + if (target_is_backing) { + int64_t bs_size, target_size; + bs_size = bdrv_getlength(bs); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, + "Could not inquire top image size"); + goto fail; + } + + target_size = bdrv_getlength(target); + if (target_size < 0) { + error_setg_errno(errp, -target_size, + "Could not inquire base image size"); + goto fail; + } + + if (target_size < bs_size) { + target_perms |= BLK_PERM_RESIZE; + } + + target_shared_perms |= BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_GRAPH_MOD; + } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) { + /* + * We may want to allow this in the future, but it would + * require taking some extra care. + */ + error_setg(errp, "Cannot mirror to a filter on top of a node in the " + "source's backing chain"); + goto fail; + } + + if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) { + target_perms |= BLK_PERM_GRAPH_MOD; + } + + s->target = blk_new(s->common.job.aio_context, + target_perms, target_shared_perms); + ret = blk_insert_bs(s->target, target, errp); + if (ret < 0) { + goto fail; + } + if (is_mirror) { + /* XXX: Mirror target could be a NBD server of target QEMU in the case + * of non-shared block migration. To allow migration completion, we + * have to allow "inactivate" of the target BB. When that happens, we + * know the job is drained, and the vcpus are stopped, so no write + * operation will be performed. Block layer already has assertions to + * ensure that. */ + blk_set_force_allow_inactivate(s->target); + } + blk_set_allow_aio_context_change(s->target, true); + blk_set_disable_request_queuing(s->target, true); + + s->replaces = g_strdup(replaces); + s->on_source_error = on_source_error; + s->on_target_error = on_target_error; + s->is_none_mode = is_none_mode; + s->backing_mode = backing_mode; + s->zero_target = zero_target; + s->copy_mode = copy_mode; + s->base = base; + s->base_overlay = bdrv_find_overlay(bs, base); + s->granularity = granularity; + s->buf_size = ROUND_UP(buf_size, granularity); + s->unmap = unmap; + if (auto_complete) { + s->should_complete = true; + } + + s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); + if (!s->dirty_bitmap) { + goto fail; + } + if (s->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) { + bdrv_disable_dirty_bitmap(s->dirty_bitmap); + } + + ret = block_job_add_bdrv(&s->common, "source", bs, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | + BLK_PERM_CONSISTENT_READ, + errp); + if (ret < 0) { + goto fail; + } + + /* Required permissions are already taken with blk_new() */ + block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + + /* In commit_active_start() all intermediate nodes disappear, so + * any jobs in them must be blocked */ + if (target_is_backing) { + BlockDriverState *iter, *filtered_target; + uint64_t iter_shared_perms; + + /* + * The topmost node with + * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target) + */ + filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target)); + + assert(bdrv_skip_filters(filtered_target) == + bdrv_skip_filters(target)); + + /* + * XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). + */ + iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE; + + for (iter = bdrv_filter_or_cow_bs(bs); iter != target; + iter = bdrv_filter_or_cow_bs(iter)) + { + if (iter == filtered_target) { + /* + * From here on, all nodes are filters on the base. + * This allows us to share BLK_PERM_CONSISTENT_READ. + */ + iter_shared_perms |= BLK_PERM_CONSISTENT_READ; + } + + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { + goto fail; + } + } + + if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { + goto fail; + } + } + + QTAILQ_INIT(&s->ops_in_flight); + + trace_mirror_start(bs, s, opaque); + job_start(&s->common.job); + + return &s->common; + +fail: + if (s) { + /* Make sure this BDS does not go away until we have completed the graph + * changes below */ + bdrv_ref(mirror_top_bs); + + g_free(s->replaces); + blk_unref(s->target); + bs_opaque->job = NULL; + if (s->dirty_bitmap) { + bdrv_release_dirty_bitmap(s->dirty_bitmap); + } + job_early_fail(&s->common.job); + } + + bs_opaque->stop = true; + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); + + bdrv_unref(mirror_top_bs); + + return NULL; +} + +void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp) +{ + bool is_none_mode; + BlockDriverState *base; + + if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || + (mode == MIRROR_SYNC_MODE_BITMAP)) { + error_setg(errp, "Sync mode '%s' not supported", + MirrorSyncMode_str(mode)); + return; + } + is_none_mode = mode == MIRROR_SYNC_MODE_NONE; + base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; + mirror_start_job(job_id, bs, creation_flags, target, replaces, + speed, granularity, buf_size, backing_mode, zero_target, + on_source_error, on_target_error, unmap, NULL, NULL, + &mirror_job_driver, is_none_mode, base, false, + filter_node_name, true, copy_mode, errp); +} + +BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, int creation_flags, + int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp) +{ + bool base_read_only; + Error *local_err = NULL; + BlockJob *ret; + + base_read_only = bdrv_is_read_only(base); + + if (base_read_only) { + if (bdrv_reopen_set_read_only(base, false, errp) < 0) { + return NULL; + } + } + + ret = mirror_start_job( + job_id, bs, creation_flags, base, NULL, speed, 0, 0, + MIRROR_LEAVE_BACKING_CHAIN, false, + on_error, on_error, true, cb, opaque, + &commit_active_job_driver, false, base, auto_complete, + filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto error_restore_flags; + } + + return ret; + +error_restore_flags: + /* ignore error and errp for bdrv_reopen, because we want to propagate + * the original error */ + if (base_read_only) { + bdrv_reopen_set_read_only(base, true, NULL); + } + return NULL; +} diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c new file mode 100644 index 000000000..9f11deec6 --- /dev/null +++ b/block/monitor/bitmap-qmp-cmds.c @@ -0,0 +1,321 @@ +/* + * QEMU block dirty bitmap QMP commands + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "block/block_int.h" +#include "qapi/qapi-commands-block.h" +#include "qapi/error.h" + +/** + * block_dirty_bitmap_lookup: + * Return a dirty bitmap (if present), after validating + * the node reference and bitmap names. + * + * @node: The name of the BDS node to search for bitmaps + * @name: The name of the bitmap to search for + * @pbs: Output pointer for BDS lookup, if desired. Can be NULL. + * @errp: Output pointer for error information. Can be NULL. + * + * @return: A bitmap object on success, or NULL on failure. + */ +BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, + const char *name, + BlockDriverState **pbs, + Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + + if (!node) { + error_setg(errp, "Node cannot be NULL"); + return NULL; + } + if (!name) { + error_setg(errp, "Bitmap name cannot be NULL"); + return NULL; + } + bs = bdrv_lookup_bs(node, node, NULL); + if (!bs) { + error_setg(errp, "Node '%s' not found", node); + return NULL; + } + + bitmap = bdrv_find_dirty_bitmap(bs, name); + if (!bitmap) { + error_setg(errp, "Dirty bitmap '%s' not found", name); + return NULL; + } + + if (pbs) { + *pbs = bs; + } + + return bitmap; +} + +void qmp_block_dirty_bitmap_add(const char *node, const char *name, + bool has_granularity, uint32_t granularity, + bool has_persistent, bool persistent, + bool has_disabled, bool disabled, + Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); + return; + } + + bs = bdrv_lookup_bs(node, node, errp); + if (!bs) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); + goto out; + } + } else { + /* Default to cluster size, if available: */ + granularity = bdrv_get_default_bitmap_granularity(bs); + } + + if (!has_persistent) { + persistent = false; + } + + if (!has_disabled) { + disabled = false; + } + + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { + goto out; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { + goto out; + } + + if (disabled) { + bdrv_disable_dirty_bitmap(bitmap); + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); + +out: + aio_context_release(aio_context); +} + +BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + bool release, + BlockDriverState **bitmap_bs, + Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + AioContext *aio_context; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return NULL; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { + aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { + aio_context_release(aio_context); + return NULL; + } + + if (release) { + bdrv_release_dirty_bitmap(bitmap); + } + + if (bitmap_bs) { + *bitmap_bs = bs; + } + + aio_context_release(aio_context); + return release ? NULL : bitmap; +} + +void qmp_block_dirty_bitmap_remove(const char *node, const char *name, + Error **errp) +{ + block_dirty_bitmap_remove(node, name, true, NULL, errp); +} + +/** + * Completely clear a bitmap, for the purposes of synchronizing a bitmap + * immediately after a full backup operation. + */ +void qmp_block_dirty_bitmap_clear(const char *node, const char *name, + Error **errp) +{ + BdrvDirtyBitmap *bitmap; + BlockDriverState *bs; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return; + } + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { + return; + } + + bdrv_clear_dirty_bitmap(bitmap, NULL); +} + +void qmp_block_dirty_bitmap_enable(const char *node, const char *name, + Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap) { + return; + } + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return; + } + + bdrv_enable_dirty_bitmap(bitmap); +} + +void qmp_block_dirty_bitmap_disable(const char *node, const char *name, + Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap) { + return; + } + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return; + } + + bdrv_disable_dirty_bitmap(bitmap); +} + +BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bms, + HBitmap **backup, Error **errp) +{ + BlockDriverState *bs; + BdrvDirtyBitmap *dst, *src, *anon; + BlockDirtyBitmapMergeSourceList *lst; + Error *local_err = NULL; + + dst = block_dirty_bitmap_lookup(node, target, &bs, errp); + if (!dst) { + return NULL; + } + + anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), + NULL, errp); + if (!anon) { + return NULL; + } + + for (lst = bms; lst; lst = lst->next) { + switch (lst->value->type) { + const char *name, *node; + case QTYPE_QSTRING: + name = lst->value->u.local; + src = bdrv_find_dirty_bitmap(bs, name); + if (!src) { + error_setg(errp, "Dirty bitmap '%s' not found", name); + dst = NULL; + goto out; + } + break; + case QTYPE_QDICT: + node = lst->value->u.external.node; + name = lst->value->u.external.name; + src = block_dirty_bitmap_lookup(node, name, NULL, errp); + if (!src) { + dst = NULL; + goto out; + } + break; + default: + abort(); + } + + bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); + if (local_err) { + error_propagate(errp, local_err); + dst = NULL; + goto out; + } + } + + /* Merge into dst; dst is unchanged on failure. */ + bdrv_merge_dirty_bitmap(dst, anon, backup, errp); + + out: + bdrv_release_dirty_bitmap(anon); + return dst; +} + +void qmp_block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bitmaps, + Error **errp) +{ + block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); +} diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c new file mode 100644 index 000000000..d15a2be82 --- /dev/null +++ b/block/monitor/block-hmp-cmds.c @@ -0,0 +1,1014 @@ +/* + * Blockdev HMP commands + * + * Authors: + * Anthony Liguori + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "qapi/qapi-commands-block.h" +#include "qapi/qapi-commands-block-export.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/config-file.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/cutils.h" +#include "sysemu/sysemu.h" +#include "monitor/monitor.h" +#include "monitor/hmp.h" +#include "block/nbd.h" +#include "block/qapi.h" +#include "block/block_int.h" +#include "block/block-hmp-cmds.h" +#include "qemu-io.h" + +static void hmp_drive_add_node(Monitor *mon, const char *optstr) +{ + QemuOpts *opts; + QDict *qdict; + Error *local_err = NULL; + + opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false); + if (!opts) { + return; + } + + qdict = qemu_opts_to_qdict(opts, NULL); + + if (!qdict_get_try_str(qdict, "node-name")) { + qobject_unref(qdict); + error_report("'node-name' needs to be specified"); + goto out; + } + + BlockDriverState *bs = bds_tree_init(qdict, &local_err); + if (!bs) { + error_report_err(local_err); + goto out; + } + + bdrv_set_monitor_owned(bs); +out: + qemu_opts_del(opts); +} + +void hmp_drive_add(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + DriveInfo *dinfo; + QemuOpts *opts; + MachineClass *mc; + const char *optstr = qdict_get_str(qdict, "opts"); + bool node = qdict_get_try_bool(qdict, "node", false); + + if (node) { + hmp_drive_add_node(mon, optstr); + return; + } + + opts = drive_def(optstr); + if (!opts) + return; + + mc = MACHINE_GET_CLASS(current_machine); + dinfo = drive_new(opts, mc->block_default_type, &err); + if (err) { + error_report_err(err); + qemu_opts_del(opts); + goto err; + } + + if (!dinfo) { + return; + } + + switch (dinfo->type) { + case IF_NONE: + monitor_printf(mon, "OK\n"); + break; + default: + monitor_printf(mon, "Can't hot-add drive to type %d\n", dinfo->type); + goto err; + } + return; + +err: + if (dinfo) { + BlockBackend *blk = blk_by_legacy_dinfo(dinfo); + monitor_remove_blk(blk); + blk_unref(blk); + } +} + +void hmp_drive_del(Monitor *mon, const QDict *qdict) +{ + const char *id = qdict_get_str(qdict, "id"); + BlockBackend *blk; + BlockDriverState *bs; + AioContext *aio_context; + Error *local_err = NULL; + + bs = bdrv_find_node(id); + if (bs) { + qmp_blockdev_del(id, &local_err); + if (local_err) { + error_report_err(local_err); + } + return; + } + + blk = blk_by_name(id); + if (!blk) { + error_report("Device '%s' not found", id); + return; + } + + if (!blk_legacy_dinfo(blk)) { + error_report("Deleting device added with blockdev-add" + " is not supported"); + return; + } + + aio_context = blk_get_aio_context(blk); + aio_context_acquire(aio_context); + + bs = blk_bs(blk); + if (bs) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { + error_report_err(local_err); + aio_context_release(aio_context); + return; + } + + blk_remove_bs(blk); + } + + /* Make the BlockBackend and the attached BlockDriverState anonymous */ + monitor_remove_blk(blk); + + /* + * If this BlockBackend has a device attached to it, its refcount will be + * decremented when the device is removed; otherwise we have to do so here. + */ + if (blk_get_attached_dev(blk)) { + /* Further I/O must not pause the guest */ + blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT, + BLOCKDEV_ON_ERROR_REPORT); + } else { + blk_unref(blk); + } + + aio_context_release(aio_context); +} + +void hmp_commit(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + BlockBackend *blk; + int ret; + + if (!strcmp(device, "all")) { + ret = blk_commit_all(); + } else { + BlockDriverState *bs; + AioContext *aio_context; + + blk = blk_by_name(device); + if (!blk) { + error_report("Device '%s' not found", device); + return; + } + if (!blk_is_available(blk)) { + error_report("Device '%s' has no medium", device); + return; + } + + bs = bdrv_skip_implicit_filters(blk_bs(blk)); + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + ret = bdrv_commit(bs); + + aio_context_release(aio_context); + } + if (ret < 0) { + error_report("'commit' error for '%s': %s", device, strerror(-ret)); + } +} + +void hmp_drive_mirror(Monitor *mon, const QDict *qdict) +{ + const char *filename = qdict_get_str(qdict, "target"); + const char *format = qdict_get_try_str(qdict, "format"); + bool reuse = qdict_get_try_bool(qdict, "reuse", false); + bool full = qdict_get_try_bool(qdict, "full", false); + Error *err = NULL; + DriveMirror mirror = { + .device = (char *)qdict_get_str(qdict, "device"), + .target = (char *)filename, + .has_format = !!format, + .format = (char *)format, + .sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, + .has_mode = true, + .mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS, + .unmap = true, + }; + + if (!filename) { + error_setg(&err, QERR_MISSING_PARAMETER, "target"); + hmp_handle_error(mon, err); + return; + } + qmp_drive_mirror(&mirror, &err); + hmp_handle_error(mon, err); +} + +void hmp_drive_backup(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *filename = qdict_get_str(qdict, "target"); + const char *format = qdict_get_try_str(qdict, "format"); + bool reuse = qdict_get_try_bool(qdict, "reuse", false); + bool full = qdict_get_try_bool(qdict, "full", false); + bool compress = qdict_get_try_bool(qdict, "compress", false); + Error *err = NULL; + DriveBackup backup = { + .device = (char *)device, + .target = (char *)filename, + .has_format = !!format, + .format = (char *)format, + .sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, + .has_mode = true, + .mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS, + .has_compress = !!compress, + .compress = compress, + }; + + if (!filename) { + error_setg(&err, QERR_MISSING_PARAMETER, "target"); + hmp_handle_error(mon, err); + return; + } + + qmp_drive_backup(&backup, &err); + hmp_handle_error(mon, err); +} + +void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + int64_t value = qdict_get_int(qdict, "speed"); + + qmp_block_job_set_speed(device, value, &error); + + hmp_handle_error(mon, error); +} + +void hmp_block_job_cancel(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + bool force = qdict_get_try_bool(qdict, "force", false); + + qmp_block_job_cancel(device, true, force, &error); + + hmp_handle_error(mon, error); +} + +void hmp_block_job_pause(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_pause(device, &error); + + hmp_handle_error(mon, error); +} + +void hmp_block_job_resume(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_resume(device, &error); + + hmp_handle_error(mon, error); +} + +void hmp_block_job_complete(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_complete(device, &error); + + hmp_handle_error(mon, error); +} + +void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *filename = qdict_get_try_str(qdict, "snapshot-file"); + const char *format = qdict_get_try_str(qdict, "format"); + bool reuse = qdict_get_try_bool(qdict, "reuse", false); + enum NewImageMode mode; + Error *err = NULL; + + if (!filename) { + /* + * In the future, if 'snapshot-file' is not specified, the snapshot + * will be taken internally. Today it's actually required. + */ + error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file"); + hmp_handle_error(mon, err); + return; + } + + mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; + qmp_blockdev_snapshot_sync(true, device, false, NULL, + filename, false, NULL, + !!format, format, + true, mode, &err); + hmp_handle_error(mon, err); +} + +void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *name = qdict_get_str(qdict, "name"); + Error *err = NULL; + + qmp_blockdev_snapshot_internal_sync(device, name, &err); + hmp_handle_error(mon, err); +} + +void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *name = qdict_get_str(qdict, "name"); + const char *id = qdict_get_try_str(qdict, "id"); + Error *err = NULL; + + qmp_blockdev_snapshot_delete_internal_sync(device, !!id, id, + true, name, &err); + hmp_handle_error(mon, err); +} + +void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) +{ + const char *uri = qdict_get_str(qdict, "uri"); + bool writable = qdict_get_try_bool(qdict, "writable", false); + bool all = qdict_get_try_bool(qdict, "all", false); + Error *local_err = NULL; + BlockInfoList *block_list, *info; + SocketAddress *addr; + NbdServerAddOptions export; + + if (writable && !all) { + error_setg(&local_err, "-w only valid together with -a"); + goto exit; + } + + /* First check if the address is valid and start the server. */ + addr = socket_parse(uri, &local_err); + if (local_err != NULL) { + goto exit; + } + + nbd_server_start(addr, NULL, NULL, 0, &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; + } + + if (!all) { + return; + } + + /* Then try adding all block devices. If one fails, close all and + * exit. + */ + block_list = qmp_query_block(NULL); + + for (info = block_list; info; info = info->next) { + if (!info->value->has_inserted) { + continue; + } + + export = (NbdServerAddOptions) { + .device = info->value->device, + .has_writable = true, + .writable = writable, + }; + + qmp_nbd_server_add(&export, &local_err); + + if (local_err != NULL) { + qmp_nbd_server_stop(NULL); + break; + } + } + + qapi_free_BlockInfoList(block_list); + +exit: + hmp_handle_error(mon, local_err); +} + +void hmp_nbd_server_add(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *name = qdict_get_try_str(qdict, "name"); + bool writable = qdict_get_try_bool(qdict, "writable", false); + Error *local_err = NULL; + + NbdServerAddOptions export = { + .device = (char *) device, + .has_name = !!name, + .name = (char *) name, + .has_writable = true, + .writable = writable, + }; + + qmp_nbd_server_add(&export, &local_err); + hmp_handle_error(mon, local_err); +} + +void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict) +{ + const char *name = qdict_get_str(qdict, "name"); + bool force = qdict_get_try_bool(qdict, "force", false); + Error *err = NULL; + + /* Rely on BLOCK_EXPORT_REMOVE_MODE_SAFE being the default */ + qmp_nbd_server_remove(name, force, BLOCK_EXPORT_REMOVE_MODE_HARD, &err); + hmp_handle_error(mon, err); +} + +void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + + qmp_nbd_server_stop(&err); + hmp_handle_error(mon, err); +} + +void hmp_block_resize(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + int64_t size = qdict_get_int(qdict, "size"); + Error *err = NULL; + + qmp_block_resize(true, device, false, NULL, size, &err); + hmp_handle_error(mon, err); +} + +void hmp_block_stream(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + const char *base = qdict_get_try_str(qdict, "base"); + int64_t speed = qdict_get_try_int(qdict, "speed", 0); + + qmp_block_stream(true, device, device, base != NULL, base, false, NULL, + false, NULL, qdict_haskey(qdict, "speed"), speed, true, + BLOCKDEV_ON_ERROR_REPORT, false, false, false, false, + &error); + + hmp_handle_error(mon, error); +} + +void hmp_block_passwd(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *password = qdict_get_str(qdict, "password"); + Error *err = NULL; + + qmp_block_passwd(true, device, false, NULL, password, &err); + hmp_handle_error(mon, err); +} + +void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + char *device = (char *) qdict_get_str(qdict, "device"); + BlockIOThrottle throttle = { + .bps = qdict_get_int(qdict, "bps"), + .bps_rd = qdict_get_int(qdict, "bps_rd"), + .bps_wr = qdict_get_int(qdict, "bps_wr"), + .iops = qdict_get_int(qdict, "iops"), + .iops_rd = qdict_get_int(qdict, "iops_rd"), + .iops_wr = qdict_get_int(qdict, "iops_wr"), + }; + + /* + * qmp_block_set_io_throttle has separate parameters for the + * (deprecated) block device name and the qdev ID but the HMP + * version has only one, so we must decide which one to pass. + */ + if (blk_by_name(device)) { + throttle.has_device = true; + throttle.device = device; + } else { + throttle.has_id = true; + throttle.id = device; + } + + qmp_block_set_io_throttle(&throttle, &err); + hmp_handle_error(mon, err); +} + +void hmp_eject(Monitor *mon, const QDict *qdict) +{ + bool force = qdict_get_try_bool(qdict, "force", false); + const char *device = qdict_get_str(qdict, "device"); + Error *err = NULL; + + qmp_eject(true, device, false, NULL, true, force, &err); + hmp_handle_error(mon, err); +} + +void hmp_qemu_io(Monitor *mon, const QDict *qdict) +{ + BlockBackend *blk; + BlockBackend *local_blk = NULL; + bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char *device = qdict_get_str(qdict, "device"); + const char *command = qdict_get_str(qdict, "command"); + Error *err = NULL; + int ret; + + if (qdev) { + blk = blk_by_qdev_id(device, &err); + if (!blk) { + goto fail; + } + } else { + blk = blk_by_name(device); + if (!blk) { + BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); + if (bs) { + blk = local_blk = blk_new(bdrv_get_aio_context(bs), + 0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } + } else { + goto fail; + } + } + } + + /* + * Notably absent: Proper permission management. This is sad, but it seems + * almost impossible to achieve without changing the semantics and thereby + * limiting the use cases of the qemu-io HMP command. + * + * In an ideal world we would unconditionally create a new BlockBackend for + * qemuio_command(), but we have commands like 'reopen' and want them to + * take effect on the exact BlockBackend whose name the user passed instead + * of just on a temporary copy of it. + * + * Another problem is that deleting the temporary BlockBackend involves + * draining all requests on it first, but some qemu-iotests cases want to + * issue multiple aio_read/write requests and expect them to complete in + * the background while the monitor has already returned. + * + * This is also what prevents us from saving the original permissions and + * restoring them later: We can't revoke permissions until all requests + * have completed, and we don't know when that is nor can we really let + * anything else run before we have revoken them to avoid race conditions. + * + * What happens now is that command() in qemu-io-cmds.c can extend the + * permissions if necessary for the qemu-io command. And they simply stay + * extended, possibly resulting in a read-only guest device keeping write + * permissions. Ugly, but it appears to be the lesser evil. + */ + qemuio_command(blk, command); + +fail: + blk_unref(local_blk); + hmp_handle_error(mon, err); +} + +static void print_block_info(Monitor *mon, BlockInfo *info, + BlockDeviceInfo *inserted, bool verbose) +{ + ImageInfo *image_info; + + assert(!info || !info->has_inserted || info->inserted == inserted); + + if (info && *info->device) { + monitor_printf(mon, "%s", info->device); + if (inserted && inserted->has_node_name) { + monitor_printf(mon, " (%s)", inserted->node_name); + } + } else { + assert(info || inserted); + monitor_printf(mon, "%s", + inserted && inserted->has_node_name ? inserted->node_name + : info && info->has_qdev ? info->qdev + : ""); + } + + if (inserted) { + monitor_printf(mon, ": %s (%s%s%s)\n", + inserted->file, + inserted->drv, + inserted->ro ? ", read-only" : "", + inserted->encrypted ? ", encrypted" : ""); + } else { + monitor_printf(mon, ": [not inserted]\n"); + } + + if (info) { + if (info->has_qdev) { + monitor_printf(mon, " Attached to: %s\n", info->qdev); + } + if (info->has_io_status && info->io_status != BLOCK_DEVICE_IO_STATUS_OK) { + monitor_printf(mon, " I/O status: %s\n", + BlockDeviceIoStatus_str(info->io_status)); + } + + if (info->removable) { + monitor_printf(mon, " Removable device: %slocked, tray %s\n", + info->locked ? "" : "not ", + info->tray_open ? "open" : "closed"); + } + } + + + if (!inserted) { + return; + } + + monitor_printf(mon, " Cache mode: %s%s%s\n", + inserted->cache->writeback ? "writeback" : "writethrough", + inserted->cache->direct ? ", direct" : "", + inserted->cache->no_flush ? ", ignore flushes" : ""); + + if (inserted->has_backing_file) { + monitor_printf(mon, + " Backing file: %s " + "(chain depth: %" PRId64 ")\n", + inserted->backing_file, + inserted->backing_file_depth); + } + + if (inserted->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF) { + monitor_printf(mon, " Detect zeroes: %s\n", + BlockdevDetectZeroesOptions_str(inserted->detect_zeroes)); + } + + if (inserted->bps || inserted->bps_rd || inserted->bps_wr || + inserted->iops || inserted->iops_rd || inserted->iops_wr) + { + monitor_printf(mon, " I/O throttling: bps=%" PRId64 + " bps_rd=%" PRId64 " bps_wr=%" PRId64 + " bps_max=%" PRId64 + " bps_rd_max=%" PRId64 + " bps_wr_max=%" PRId64 + " iops=%" PRId64 " iops_rd=%" PRId64 + " iops_wr=%" PRId64 + " iops_max=%" PRId64 + " iops_rd_max=%" PRId64 + " iops_wr_max=%" PRId64 + " iops_size=%" PRId64 + " group=%s\n", + inserted->bps, + inserted->bps_rd, + inserted->bps_wr, + inserted->bps_max, + inserted->bps_rd_max, + inserted->bps_wr_max, + inserted->iops, + inserted->iops_rd, + inserted->iops_wr, + inserted->iops_max, + inserted->iops_rd_max, + inserted->iops_wr_max, + inserted->iops_size, + inserted->group); + } + + if (verbose) { + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { + bdrv_image_info_dump(image_info); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { + break; + } + } + } +} + +void hmp_info_block(Monitor *mon, const QDict *qdict) +{ + BlockInfoList *block_list, *info; + BlockDeviceInfoList *blockdev_list, *blockdev; + const char *device = qdict_get_try_str(qdict, "device"); + bool verbose = qdict_get_try_bool(qdict, "verbose", false); + bool nodes = qdict_get_try_bool(qdict, "nodes", false); + bool printed = false; + + /* Print BlockBackend information */ + if (!nodes) { + block_list = qmp_query_block(NULL); + } else { + block_list = NULL; + } + + for (info = block_list; info; info = info->next) { + if (device && strcmp(device, info->value->device)) { + continue; + } + + if (info != block_list) { + monitor_printf(mon, "\n"); + } + + print_block_info(mon, info->value, info->value->has_inserted + ? info->value->inserted : NULL, + verbose); + printed = true; + } + + qapi_free_BlockInfoList(block_list); + + if ((!device && !nodes) || printed) { + return; + } + + /* Print node information */ + blockdev_list = qmp_query_named_block_nodes(false, false, NULL); + for (blockdev = blockdev_list; blockdev; blockdev = blockdev->next) { + assert(blockdev->value->has_node_name); + if (device && strcmp(device, blockdev->value->node_name)) { + continue; + } + + if (blockdev != blockdev_list) { + monitor_printf(mon, "\n"); + } + + print_block_info(mon, NULL, blockdev->value, verbose); + } + qapi_free_BlockDeviceInfoList(blockdev_list); +} + +void hmp_info_blockstats(Monitor *mon, const QDict *qdict) +{ + BlockStatsList *stats_list, *stats; + + stats_list = qmp_query_blockstats(false, false, NULL); + + for (stats = stats_list; stats; stats = stats->next) { + if (!stats->value->has_device) { + continue; + } + + monitor_printf(mon, "%s:", stats->value->device); + monitor_printf(mon, " rd_bytes=%" PRId64 + " wr_bytes=%" PRId64 + " rd_operations=%" PRId64 + " wr_operations=%" PRId64 + " flush_operations=%" PRId64 + " wr_total_time_ns=%" PRId64 + " rd_total_time_ns=%" PRId64 + " flush_total_time_ns=%" PRId64 + " rd_merged=%" PRId64 + " wr_merged=%" PRId64 + " idle_time_ns=%" PRId64 + "\n", + stats->value->stats->rd_bytes, + stats->value->stats->wr_bytes, + stats->value->stats->rd_operations, + stats->value->stats->wr_operations, + stats->value->stats->flush_operations, + stats->value->stats->wr_total_time_ns, + stats->value->stats->rd_total_time_ns, + stats->value->stats->flush_total_time_ns, + stats->value->stats->rd_merged, + stats->value->stats->wr_merged, + stats->value->stats->idle_time_ns); + } + + qapi_free_BlockStatsList(stats_list); +} + +void hmp_info_block_jobs(Monitor *mon, const QDict *qdict) +{ + BlockJobInfoList *list; + + list = qmp_query_block_jobs(&error_abort); + + if (!list) { + monitor_printf(mon, "No active jobs\n"); + return; + } + + while (list) { + if (strcmp(list->value->type, "stream") == 0) { + monitor_printf(mon, "Streaming device %s: Completed %" PRId64 + " of %" PRId64 " bytes, speed limit %" PRId64 + " bytes/s\n", + list->value->device, + list->value->offset, + list->value->len, + list->value->speed); + } else { + monitor_printf(mon, "Type %s, device %s: Completed %" PRId64 + " of %" PRId64 " bytes, speed limit %" PRId64 + " bytes/s\n", + list->value->type, + list->value->device, + list->value->offset, + list->value->len, + list->value->speed); + } + list = list->next; + } + + qapi_free_BlockJobInfoList(list); +} + +void hmp_info_snapshots(Monitor *mon, const QDict *qdict) +{ + BlockDriverState *bs, *bs1; + BdrvNextIterator it1; + QEMUSnapshotInfo *sn_tab, *sn; + bool no_snapshot = true; + int nb_sns, i; + int total; + int *global_snapshots; + AioContext *aio_context; + + typedef struct SnapshotEntry { + QEMUSnapshotInfo sn; + QTAILQ_ENTRY(SnapshotEntry) next; + } SnapshotEntry; + + typedef struct ImageEntry { + const char *imagename; + QTAILQ_ENTRY(ImageEntry) next; + QTAILQ_HEAD(, SnapshotEntry) snapshots; + } ImageEntry; + + QTAILQ_HEAD(, ImageEntry) image_list = + QTAILQ_HEAD_INITIALIZER(image_list); + + ImageEntry *image_entry, *next_ie; + SnapshotEntry *snapshot_entry; + + bs = bdrv_all_find_vmstate_bs(); + if (!bs) { + monitor_printf(mon, "No available block device supports snapshots\n"); + return; + } + aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); + nb_sns = bdrv_snapshot_list(bs, &sn_tab); + aio_context_release(aio_context); + + if (nb_sns < 0) { + monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); + return; + } + + for (bs1 = bdrv_first(&it1); bs1; bs1 = bdrv_next(&it1)) { + int bs1_nb_sns = 0; + ImageEntry *ie; + SnapshotEntry *se; + AioContext *ctx = bdrv_get_aio_context(bs1); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs1)) { + sn = NULL; + bs1_nb_sns = bdrv_snapshot_list(bs1, &sn); + if (bs1_nb_sns > 0) { + no_snapshot = false; + ie = g_new0(ImageEntry, 1); + ie->imagename = bdrv_get_device_name(bs1); + QTAILQ_INIT(&ie->snapshots); + QTAILQ_INSERT_TAIL(&image_list, ie, next); + for (i = 0; i < bs1_nb_sns; i++) { + se = g_new0(SnapshotEntry, 1); + se->sn = sn[i]; + QTAILQ_INSERT_TAIL(&ie->snapshots, se, next); + } + } + g_free(sn); + } + aio_context_release(ctx); + } + + if (no_snapshot) { + monitor_printf(mon, "There is no snapshot available.\n"); + return; + } + + global_snapshots = g_new0(int, nb_sns); + total = 0; + for (i = 0; i < nb_sns; i++) { + SnapshotEntry *next_sn; + if (bdrv_all_find_snapshot(sn_tab[i].name, &bs1) == 0) { + global_snapshots[total] = i; + total++; + QTAILQ_FOREACH(image_entry, &image_list, next) { + QTAILQ_FOREACH_SAFE(snapshot_entry, &image_entry->snapshots, + next, next_sn) { + if (!strcmp(sn_tab[i].name, snapshot_entry->sn.name)) { + QTAILQ_REMOVE(&image_entry->snapshots, snapshot_entry, + next); + g_free(snapshot_entry); + } + } + } + } + } + monitor_printf(mon, "List of snapshots present on all disks:\n"); + + if (total > 0) { + bdrv_snapshot_dump(NULL); + monitor_printf(mon, "\n"); + for (i = 0; i < total; i++) { + sn = &sn_tab[global_snapshots[i]]; + /* + * The ID is not guaranteed to be the same on all images, so + * overwrite it. + */ + pstrcpy(sn->id_str, sizeof(sn->id_str), "--"); + bdrv_snapshot_dump(sn); + monitor_printf(mon, "\n"); + } + } else { + monitor_printf(mon, "None\n"); + } + + QTAILQ_FOREACH(image_entry, &image_list, next) { + if (QTAILQ_EMPTY(&image_entry->snapshots)) { + continue; + } + monitor_printf(mon, + "\nList of partial (non-loadable) snapshots on '%s':\n", + image_entry->imagename); + bdrv_snapshot_dump(NULL); + monitor_printf(mon, "\n"); + QTAILQ_FOREACH(snapshot_entry, &image_entry->snapshots, next) { + bdrv_snapshot_dump(&snapshot_entry->sn); + monitor_printf(mon, "\n"); + } + } + + QTAILQ_FOREACH_SAFE(image_entry, &image_list, next, next_ie) { + SnapshotEntry *next_sn; + QTAILQ_FOREACH_SAFE(snapshot_entry, &image_entry->snapshots, next, + next_sn) { + g_free(snapshot_entry); + } + g_free(image_entry); + } + g_free(sn_tab); + g_free(global_snapshots); +} diff --git a/block/monitor/meson.build b/block/monitor/meson.build new file mode 100644 index 000000000..374aac114 --- /dev/null +++ b/block/monitor/meson.build @@ -0,0 +1,2 @@ +softmmu_ss.add(files('block-hmp-cmds.c')) +block_ss.add(files('bitmap-qmp-cmds.c')) diff --git a/block/nbd.c b/block/nbd.c new file mode 100644 index 000000000..42536702b --- /dev/null +++ b/block/nbd.c @@ -0,0 +1,2513 @@ +/* + * QEMU Block driver for NBD + * + * Copyright (c) 2019 Virtuozzo International GmbH. + * Copyright (C) 2016 Red Hat, Inc. + * Copyright (C) 2008 Bull S.A.S. + * Author: Laurent Vivier + * + * Some parts: + * Copyright (C) 2007 Anthony Liguori + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "trace.h" +#include "qemu/uri.h" +#include "qemu/option.h" +#include "qemu/cutils.h" +#include "qemu/main-loop.h" + +#include "qapi/qapi-visit-sockets.h" +#include "qapi/qmp/qstring.h" +#include "qapi/clone-visitor.h" + +#include "block/qdict.h" +#include "block/nbd.h" +#include "block/block_int.h" + +#define EN_OPTSTR ":exportname=" +#define MAX_NBD_REQUESTS 16 + +#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs)) +#define INDEX_TO_HANDLE(bs, index) ((index) ^ (uint64_t)(intptr_t)(bs)) + +typedef struct { + Coroutine *coroutine; + uint64_t offset; /* original offset of the request */ + bool receiving; /* waiting for connection_co? */ +} NBDClientRequest; + +typedef enum NBDClientState { + NBD_CLIENT_CONNECTING_WAIT, + NBD_CLIENT_CONNECTING_NOWAIT, + NBD_CLIENT_CONNECTED, + NBD_CLIENT_QUIT +} NBDClientState; + +typedef enum NBDConnectThreadState { + /* No thread, no pending results */ + CONNECT_THREAD_NONE, + + /* Thread is running, no results for now */ + CONNECT_THREAD_RUNNING, + + /* + * Thread is running, but requestor exited. Thread should close + * the new socket and free the connect state on exit. + */ + CONNECT_THREAD_RUNNING_DETACHED, + + /* Thread finished, results are stored in a state */ + CONNECT_THREAD_FAIL, + CONNECT_THREAD_SUCCESS +} NBDConnectThreadState; + +typedef struct NBDConnectThread { + /* Initialization constants */ + SocketAddress *saddr; /* address to connect to */ + /* + * Bottom half to schedule on completion. Scheduled only if bh_ctx is not + * NULL + */ + QEMUBHFunc *bh_func; + void *bh_opaque; + + /* + * Result of last attempt. Valid in FAIL and SUCCESS states. + * If you want to steal error, don't forget to set pointer to NULL. + */ + QIOChannelSocket *sioc; + Error *err; + + /* state and bh_ctx are protected by mutex */ + QemuMutex mutex; + NBDConnectThreadState state; /* current state of the thread */ + AioContext *bh_ctx; /* where to schedule bh (NULL means don't schedule) */ +} NBDConnectThread; + +typedef struct BDRVNBDState { + QIOChannelSocket *sioc; /* The master data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + NBDExportInfo info; + + CoMutex send_mutex; + CoQueue free_sema; + Coroutine *connection_co; + Coroutine *teardown_co; + QemuCoSleepState *connection_co_sleep_ns_state; + bool drained; + bool wait_drained_end; + int in_flight; + NBDClientState state; + int connect_status; + Error *connect_err; + bool wait_in_flight; + + QEMUTimer *reconnect_delay_timer; + + NBDClientRequest requests[MAX_NBD_REQUESTS]; + NBDReply reply; + BlockDriverState *bs; + + /* Connection parameters */ + uint32_t reconnect_delay; + SocketAddress *saddr; + char *export, *tlscredsid; + QCryptoTLSCreds *tlscreds; + const char *hostname; + char *x_dirty_bitmap; + bool alloc_depth; + + bool wait_connect; + NBDConnectThread *connect_thread; +} BDRVNBDState; + +static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr, + Error **errp); +static QIOChannelSocket *nbd_co_establish_connection(BlockDriverState *bs, + Error **errp); +static void nbd_co_establish_connection_cancel(BlockDriverState *bs, + bool detach); +static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc, + Error **errp); + +static void nbd_clear_bdrvstate(BDRVNBDState *s) +{ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; + g_free(s->export); + s->export = NULL; + g_free(s->tlscredsid); + s->tlscredsid = NULL; + g_free(s->x_dirty_bitmap); + s->x_dirty_bitmap = NULL; +} + +static void nbd_channel_error(BDRVNBDState *s, int ret) +{ + if (ret == -EIO) { + if (s->state == NBD_CLIENT_CONNECTED) { + s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT : + NBD_CLIENT_CONNECTING_NOWAIT; + } + } else { + if (s->state == NBD_CLIENT_CONNECTED) { + qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } + s->state = NBD_CLIENT_QUIT; + } +} + +static void nbd_recv_coroutines_wake_all(BDRVNBDState *s) +{ + int i; + + for (i = 0; i < MAX_NBD_REQUESTS; i++) { + NBDClientRequest *req = &s->requests[i]; + + if (req->coroutine && req->receiving) { + aio_co_wake(req->coroutine); + } + } +} + +static void reconnect_delay_timer_del(BDRVNBDState *s) +{ + if (s->reconnect_delay_timer) { + timer_del(s->reconnect_delay_timer); + timer_free(s->reconnect_delay_timer); + s->reconnect_delay_timer = NULL; + } +} + +static void reconnect_delay_timer_cb(void *opaque) +{ + BDRVNBDState *s = opaque; + + if (s->state == NBD_CLIENT_CONNECTING_WAIT) { + s->state = NBD_CLIENT_CONNECTING_NOWAIT; + while (qemu_co_enter_next(&s->free_sema, NULL)) { + /* Resume all queued requests */ + } + } + + reconnect_delay_timer_del(s); +} + +static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) +{ + if (s->state != NBD_CLIENT_CONNECTING_WAIT) { + return; + } + + assert(!s->reconnect_delay_timer); + s->reconnect_delay_timer = aio_timer_new(bdrv_get_aio_context(s->bs), + QEMU_CLOCK_REALTIME, + SCALE_NS, + reconnect_delay_timer_cb, s); + timer_mod(s->reconnect_delay_timer, expire_time_ns); +} + +static void nbd_client_detach_aio_context(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + /* Timer is deleted in nbd_client_co_drain_begin() */ + assert(!s->reconnect_delay_timer); + qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); +} + +static void nbd_client_attach_aio_context_bh(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + /* + * The node is still drained, so we know the coroutine has yielded in + * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is + * entered for the first time. Both places are safe for entering the + * coroutine. + */ + qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); + bdrv_dec_in_flight(bs); +} + +static void nbd_client_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + /* + * s->connection_co is either yielded from nbd_receive_reply or from + * nbd_co_reconnect_loop() + */ + if (s->state == NBD_CLIENT_CONNECTED) { + qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context); + } + + bdrv_inc_in_flight(bs); + + /* + * Need to wait here for the BH to run because the BH must run while the + * node is still drained. + */ + aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs); +} + +static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + s->drained = true; + if (s->connection_co_sleep_ns_state) { + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); + } + + nbd_co_establish_connection_cancel(bs, false); + + reconnect_delay_timer_del(s); + + if (s->state == NBD_CLIENT_CONNECTING_WAIT) { + s->state = NBD_CLIENT_CONNECTING_NOWAIT; + qemu_co_queue_restart_all(&s->free_sema); + } +} + +static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + s->drained = false; + if (s->wait_drained_end) { + s->wait_drained_end = false; + aio_co_wake(s->connection_co); + } +} + + +static void nbd_teardown_connection(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + if (s->ioc) { + /* finish any pending coroutines */ + qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } else if (s->sioc) { + /* abort negotiation */ + qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); + } + + s->state = NBD_CLIENT_QUIT; + if (s->connection_co) { + if (s->connection_co_sleep_ns_state) { + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); + } + nbd_co_establish_connection_cancel(bs, true); + } + if (qemu_in_coroutine()) { + s->teardown_co = qemu_coroutine_self(); + /* connection_co resumes us when it terminates */ + qemu_coroutine_yield(); + s->teardown_co = NULL; + } else { + BDRV_POLL_WHILE(bs, s->connection_co); + } + assert(!s->connection_co); +} + +static bool nbd_client_connecting(BDRVNBDState *s) +{ + return s->state == NBD_CLIENT_CONNECTING_WAIT || + s->state == NBD_CLIENT_CONNECTING_NOWAIT; +} + +static bool nbd_client_connecting_wait(BDRVNBDState *s) +{ + return s->state == NBD_CLIENT_CONNECTING_WAIT; +} + +static void connect_bh(void *opaque) +{ + BDRVNBDState *state = opaque; + + assert(state->wait_connect); + state->wait_connect = false; + aio_co_wake(state->connection_co); +} + +static void nbd_init_connect_thread(BDRVNBDState *s) +{ + s->connect_thread = g_new(NBDConnectThread, 1); + + *s->connect_thread = (NBDConnectThread) { + .saddr = QAPI_CLONE(SocketAddress, s->saddr), + .state = CONNECT_THREAD_NONE, + .bh_func = connect_bh, + .bh_opaque = s, + }; + + qemu_mutex_init(&s->connect_thread->mutex); +} + +static void nbd_free_connect_thread(NBDConnectThread *thr) +{ + if (thr->sioc) { + qio_channel_close(QIO_CHANNEL(thr->sioc), NULL); + } + error_free(thr->err); + qapi_free_SocketAddress(thr->saddr); + g_free(thr); +} + +static void *connect_thread_func(void *opaque) +{ + NBDConnectThread *thr = opaque; + int ret; + bool do_free = false; + + thr->sioc = qio_channel_socket_new(); + + error_free(thr->err); + thr->err = NULL; + ret = qio_channel_socket_connect_sync(thr->sioc, thr->saddr, &thr->err); + if (ret < 0) { + object_unref(OBJECT(thr->sioc)); + thr->sioc = NULL; + } + + qemu_mutex_lock(&thr->mutex); + + switch (thr->state) { + case CONNECT_THREAD_RUNNING: + thr->state = ret < 0 ? CONNECT_THREAD_FAIL : CONNECT_THREAD_SUCCESS; + if (thr->bh_ctx) { + aio_bh_schedule_oneshot(thr->bh_ctx, thr->bh_func, thr->bh_opaque); + + /* play safe, don't reuse bh_ctx on further connection attempts */ + thr->bh_ctx = NULL; + } + break; + case CONNECT_THREAD_RUNNING_DETACHED: + do_free = true; + break; + default: + abort(); + } + + qemu_mutex_unlock(&thr->mutex); + + if (do_free) { + nbd_free_connect_thread(thr); + } + + return NULL; +} + +static QIOChannelSocket *coroutine_fn +nbd_co_establish_connection(BlockDriverState *bs, Error **errp) +{ + QemuThread thread; + BDRVNBDState *s = bs->opaque; + QIOChannelSocket *res; + NBDConnectThread *thr = s->connect_thread; + + qemu_mutex_lock(&thr->mutex); + + switch (thr->state) { + case CONNECT_THREAD_FAIL: + case CONNECT_THREAD_NONE: + error_free(thr->err); + thr->err = NULL; + thr->state = CONNECT_THREAD_RUNNING; + qemu_thread_create(&thread, "nbd-connect", + connect_thread_func, thr, QEMU_THREAD_DETACHED); + break; + case CONNECT_THREAD_SUCCESS: + /* Previous attempt finally succeeded in background */ + thr->state = CONNECT_THREAD_NONE; + res = thr->sioc; + thr->sioc = NULL; + qemu_mutex_unlock(&thr->mutex); + return res; + case CONNECT_THREAD_RUNNING: + /* Already running, will wait */ + break; + default: + abort(); + } + + thr->bh_ctx = qemu_get_current_aio_context(); + + qemu_mutex_unlock(&thr->mutex); + + + /* + * We are going to wait for connect-thread finish, but + * nbd_client_co_drain_begin() can interrupt. + * + * Note that wait_connect variable is not visible for connect-thread. It + * doesn't need mutex protection, it used only inside home aio context of + * bs. + */ + s->wait_connect = true; + qemu_coroutine_yield(); + + qemu_mutex_lock(&thr->mutex); + + switch (thr->state) { + case CONNECT_THREAD_SUCCESS: + case CONNECT_THREAD_FAIL: + thr->state = CONNECT_THREAD_NONE; + error_propagate(errp, thr->err); + thr->err = NULL; + res = thr->sioc; + thr->sioc = NULL; + break; + case CONNECT_THREAD_RUNNING: + case CONNECT_THREAD_RUNNING_DETACHED: + /* + * Obviously, drained section wants to start. Report the attempt as + * failed. Still connect thread is executing in background, and its + * result may be used for next connection attempt. + */ + res = NULL; + error_setg(errp, "Connection attempt cancelled by other operation"); + break; + + case CONNECT_THREAD_NONE: + /* + * Impossible. We've seen this thread running. So it should be + * running or at least give some results. + */ + abort(); + + default: + abort(); + } + + qemu_mutex_unlock(&thr->mutex); + + return res; +} + +/* + * nbd_co_establish_connection_cancel + * Cancel nbd_co_establish_connection asynchronously: it will finish soon, to + * allow drained section to begin. + * + * If detach is true, also cleanup the state (or if thread is running, move it + * to CONNECT_THREAD_RUNNING_DETACHED state). s->connect_thread becomes NULL if + * detach is true. + */ +static void nbd_co_establish_connection_cancel(BlockDriverState *bs, + bool detach) +{ + BDRVNBDState *s = bs->opaque; + NBDConnectThread *thr = s->connect_thread; + bool wake = false; + bool do_free = false; + + qemu_mutex_lock(&thr->mutex); + + if (thr->state == CONNECT_THREAD_RUNNING) { + /* We can cancel only in running state, when bh is not yet scheduled */ + thr->bh_ctx = NULL; + if (s->wait_connect) { + s->wait_connect = false; + wake = true; + } + if (detach) { + thr->state = CONNECT_THREAD_RUNNING_DETACHED; + s->connect_thread = NULL; + } + } else if (detach) { + do_free = true; + } + + qemu_mutex_unlock(&thr->mutex); + + if (do_free) { + nbd_free_connect_thread(thr); + s->connect_thread = NULL; + } + + if (wake) { + aio_co_wake(s->connection_co); + } +} + +static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) +{ + int ret; + Error *local_err = NULL; + QIOChannelSocket *sioc; + + if (!nbd_client_connecting(s)) { + return; + } + + /* Wait for completion of all in-flight requests */ + + qemu_co_mutex_lock(&s->send_mutex); + + while (s->in_flight > 0) { + qemu_co_mutex_unlock(&s->send_mutex); + nbd_recv_coroutines_wake_all(s); + s->wait_in_flight = true; + qemu_coroutine_yield(); + s->wait_in_flight = false; + qemu_co_mutex_lock(&s->send_mutex); + } + + qemu_co_mutex_unlock(&s->send_mutex); + + if (!nbd_client_connecting(s)) { + return; + } + + /* + * Now we are sure that nobody is accessing the channel, and no one will + * try until we set the state to CONNECTED. + */ + + /* Finalize previous connection if any */ + if (s->ioc) { + qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); + object_unref(OBJECT(s->sioc)); + s->sioc = NULL; + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; + } + + sioc = nbd_co_establish_connection(s->bs, &local_err); + if (!sioc) { + ret = -ECONNREFUSED; + goto out; + } + + bdrv_dec_in_flight(s->bs); + + ret = nbd_client_handshake(s->bs, sioc, &local_err); + + if (s->drained) { + s->wait_drained_end = true; + while (s->drained) { + /* + * We may be entered once from nbd_client_attach_aio_context_bh + * and then from nbd_client_co_drain_end. So here is a loop. + */ + qemu_coroutine_yield(); + } + } + bdrv_inc_in_flight(s->bs); + +out: + s->connect_status = ret; + error_free(s->connect_err); + s->connect_err = NULL; + error_propagate(&s->connect_err, local_err); + + if (ret >= 0) { + /* successfully connected */ + s->state = NBD_CLIENT_CONNECTED; + qemu_co_queue_restart_all(&s->free_sema); + } +} + +static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s) +{ + uint64_t timeout = 1 * NANOSECONDS_PER_SECOND; + uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND; + + if (s->state == NBD_CLIENT_CONNECTING_WAIT) { + reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + s->reconnect_delay * NANOSECONDS_PER_SECOND); + } + + nbd_reconnect_attempt(s); + + while (nbd_client_connecting(s)) { + if (s->drained) { + bdrv_dec_in_flight(s->bs); + s->wait_drained_end = true; + while (s->drained) { + /* + * We may be entered once from nbd_client_attach_aio_context_bh + * and then from nbd_client_co_drain_end. So here is a loop. + */ + qemu_coroutine_yield(); + } + bdrv_inc_in_flight(s->bs); + } else { + qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout, + &s->connection_co_sleep_ns_state); + if (s->drained) { + continue; + } + if (timeout < max_timeout) { + timeout *= 2; + } + } + + nbd_reconnect_attempt(s); + } + + reconnect_delay_timer_del(s); +} + +static coroutine_fn void nbd_connection_entry(void *opaque) +{ + BDRVNBDState *s = opaque; + uint64_t i; + int ret = 0; + Error *local_err = NULL; + + while (s->state != NBD_CLIENT_QUIT) { + /* + * The NBD client can only really be considered idle when it has + * yielded from qio_channel_readv_all_eof(), waiting for data. This is + * the point where the additional scheduled coroutine entry happens + * after nbd_client_attach_aio_context(). + * + * Therefore we keep an additional in_flight reference all the time and + * only drop it temporarily here. + */ + + if (nbd_client_connecting(s)) { + nbd_co_reconnect_loop(s); + } + + if (s->state != NBD_CLIENT_CONNECTED) { + continue; + } + + assert(s->reply.handle == 0); + ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err); + + if (local_err) { + trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; + } + if (ret <= 0) { + nbd_channel_error(s, ret ? ret : -EIO); + continue; + } + + /* + * There's no need for a mutex on the receive side, because the + * handler acts as a synchronization point and ensures that only + * one coroutine is called until the reply finishes. + */ + i = HANDLE_TO_INDEX(s, s->reply.handle); + if (i >= MAX_NBD_REQUESTS || + !s->requests[i].coroutine || + !s->requests[i].receiving || + (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply)) + { + nbd_channel_error(s, -EINVAL); + continue; + } + + /* + * We're woken up again by the request itself. Note that there + * is no race between yielding and reentering connection_co. This + * is because: + * + * - if the request runs on the same AioContext, it is only + * entered after we yield + * + * - if the request runs on a different AioContext, reentering + * connection_co happens through a bottom half, which can only + * run after we yield. + */ + aio_co_wake(s->requests[i].coroutine); + qemu_coroutine_yield(); + } + + qemu_co_queue_restart_all(&s->free_sema); + nbd_recv_coroutines_wake_all(s); + bdrv_dec_in_flight(s->bs); + + s->connection_co = NULL; + if (s->ioc) { + qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); + object_unref(OBJECT(s->sioc)); + s->sioc = NULL; + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; + } + + if (s->teardown_co) { + aio_co_wake(s->teardown_co); + } + aio_wait_kick(); +} + +static int nbd_co_send_request(BlockDriverState *bs, + NBDRequest *request, + QEMUIOVector *qiov) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + int rc, i = -1; + + qemu_co_mutex_lock(&s->send_mutex); + while (s->in_flight == MAX_NBD_REQUESTS || nbd_client_connecting_wait(s)) { + qemu_co_queue_wait(&s->free_sema, &s->send_mutex); + } + + if (s->state != NBD_CLIENT_CONNECTED) { + rc = -EIO; + goto err; + } + + s->in_flight++; + + for (i = 0; i < MAX_NBD_REQUESTS; i++) { + if (s->requests[i].coroutine == NULL) { + break; + } + } + + g_assert(qemu_in_coroutine()); + assert(i < MAX_NBD_REQUESTS); + + s->requests[i].coroutine = qemu_coroutine_self(); + s->requests[i].offset = request->from; + s->requests[i].receiving = false; + + request->handle = INDEX_TO_HANDLE(s, i); + + assert(s->ioc); + + if (qiov) { + qio_channel_set_cork(s->ioc, true); + rc = nbd_send_request(s->ioc, request); + if (rc >= 0 && s->state == NBD_CLIENT_CONNECTED) { + if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, + NULL) < 0) { + rc = -EIO; + } + } else if (rc >= 0) { + rc = -EIO; + } + qio_channel_set_cork(s->ioc, false); + } else { + rc = nbd_send_request(s->ioc, request); + } + +err: + if (rc < 0) { + nbd_channel_error(s, rc); + if (i != -1) { + s->requests[i].coroutine = NULL; + s->in_flight--; + } + if (s->in_flight == 0 && s->wait_in_flight) { + aio_co_wake(s->connection_co); + } else { + qemu_co_queue_next(&s->free_sema); + } + } + qemu_co_mutex_unlock(&s->send_mutex); + return rc; +} + +static inline uint16_t payload_advance16(uint8_t **payload) +{ + *payload += 2; + return lduw_be_p(*payload - 2); +} + +static inline uint32_t payload_advance32(uint8_t **payload) +{ + *payload += 4; + return ldl_be_p(*payload - 4); +} + +static inline uint64_t payload_advance64(uint8_t **payload) +{ + *payload += 8; + return ldq_be_p(*payload - 8); +} + +static int nbd_parse_offset_hole_payload(BDRVNBDState *s, + NBDStructuredReplyChunk *chunk, + uint8_t *payload, uint64_t orig_offset, + QEMUIOVector *qiov, Error **errp) +{ + uint64_t offset; + uint32_t hole_size; + + if (chunk->length != sizeof(offset) + sizeof(hole_size)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_OFFSET_HOLE"); + return -EINVAL; + } + + offset = payload_advance64(&payload); + hole_size = payload_advance32(&payload); + + if (!hole_size || offset < orig_offset || hole_size > qiov->size || + offset > orig_offset + qiov->size - hole_size) { + error_setg(errp, "Protocol error: server sent chunk exceeding requested" + " region"); + return -EINVAL; + } + if (s->info.min_block && + !QEMU_IS_ALIGNED(hole_size, s->info.min_block)) { + trace_nbd_structured_read_compliance("hole"); + } + + qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size); + + return 0; +} + +/* + * nbd_parse_blockstatus_payload + * Based on our request, we expect only one extent in reply, for the + * base:allocation context. + */ +static int nbd_parse_blockstatus_payload(BDRVNBDState *s, + NBDStructuredReplyChunk *chunk, + uint8_t *payload, uint64_t orig_length, + NBDExtent *extent, Error **errp) +{ + uint32_t context_id; + + /* The server succeeded, so it must have sent [at least] one extent */ + if (chunk->length < sizeof(context_id) + sizeof(*extent)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_BLOCK_STATUS"); + return -EINVAL; + } + + context_id = payload_advance32(&payload); + if (s->info.context_id != context_id) { + error_setg(errp, "Protocol error: unexpected context id %d for " + "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context " + "id is %d", context_id, + s->info.context_id); + return -EINVAL; + } + + extent->length = payload_advance32(&payload); + extent->flags = payload_advance32(&payload); + + if (extent->length == 0) { + error_setg(errp, "Protocol error: server sent status chunk with " + "zero length"); + return -EINVAL; + } + + /* + * A server sending unaligned block status is in violation of the + * protocol, but as qemu-nbd 3.1 is such a server (at least for + * POSIX files that are not a multiple of 512 bytes, since qemu + * rounds files up to 512-byte multiples but lseek(SEEK_HOLE) + * still sees an implicit hole beyond the real EOF), it's nicer to + * work around the misbehaving server. If the request included + * more than the final unaligned block, truncate it back to an + * aligned result; if the request was only the final block, round + * up to the full block and change the status to fully-allocated + * (always a safe status, even if it loses information). + */ + if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length, + s->info.min_block)) { + trace_nbd_parse_blockstatus_compliance("extent length is unaligned"); + if (extent->length > s->info.min_block) { + extent->length = QEMU_ALIGN_DOWN(extent->length, + s->info.min_block); + } else { + extent->length = s->info.min_block; + extent->flags = 0; + } + } + + /* + * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have + * sent us any more than one extent, nor should it have included + * status beyond our request in that extent. However, it's easy + * enough to ignore the server's noncompliance without killing the + * connection; just ignore trailing extents, and clamp things to + * the length of our request. + */ + if (chunk->length > sizeof(context_id) + sizeof(*extent)) { + trace_nbd_parse_blockstatus_compliance("more than one extent"); + } + if (extent->length > orig_length) { + extent->length = orig_length; + trace_nbd_parse_blockstatus_compliance("extent length too large"); + } + + /* + * HACK: if we are using x-dirty-bitmaps to access + * qemu:allocation-depth, treat all depths > 2 the same as 2, + * since nbd_client_co_block_status is only expecting the low two + * bits to be set. + */ + if (s->alloc_depth && extent->flags > 2) { + extent->flags = 2; + } + + return 0; +} + +/* + * nbd_parse_error_payload + * on success @errp contains message describing nbd error reply + */ +static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk, + uint8_t *payload, int *request_ret, + Error **errp) +{ + uint32_t error; + uint16_t message_size; + + assert(chunk->type & (1 << 15)); + + if (chunk->length < sizeof(error) + sizeof(message_size)) { + error_setg(errp, + "Protocol error: invalid payload for structured error"); + return -EINVAL; + } + + error = nbd_errno_to_system_errno(payload_advance32(&payload)); + if (error == 0) { + error_setg(errp, "Protocol error: server sent structured error chunk " + "with error = 0"); + return -EINVAL; + } + + *request_ret = -error; + message_size = payload_advance16(&payload); + + if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) { + error_setg(errp, "Protocol error: server sent structured error chunk " + "with incorrect message size"); + return -EINVAL; + } + + /* TODO: Add a trace point to mention the server complaint */ + + /* TODO handle ERROR_OFFSET */ + + return 0; +} + +static int nbd_co_receive_offset_data_payload(BDRVNBDState *s, + uint64_t orig_offset, + QEMUIOVector *qiov, Error **errp) +{ + QEMUIOVector sub_qiov; + uint64_t offset; + size_t data_size; + int ret; + NBDStructuredReplyChunk *chunk = &s->reply.structured; + + assert(nbd_reply_is_structured(&s->reply)); + + /* The NBD spec requires at least one byte of payload */ + if (chunk->length <= sizeof(offset)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_OFFSET_DATA"); + return -EINVAL; + } + + if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) { + return -EIO; + } + + data_size = chunk->length - sizeof(offset); + assert(data_size); + if (offset < orig_offset || data_size > qiov->size || + offset > orig_offset + qiov->size - data_size) { + error_setg(errp, "Protocol error: server sent chunk exceeding requested" + " region"); + return -EINVAL; + } + if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) { + trace_nbd_structured_read_compliance("data"); + } + + qemu_iovec_init(&sub_qiov, qiov->niov); + qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size); + ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp); + qemu_iovec_destroy(&sub_qiov); + + return ret < 0 ? -EIO : 0; +} + +#define NBD_MAX_MALLOC_PAYLOAD 1000 +static coroutine_fn int nbd_co_receive_structured_payload( + BDRVNBDState *s, void **payload, Error **errp) +{ + int ret; + uint32_t len; + + assert(nbd_reply_is_structured(&s->reply)); + + len = s->reply.structured.length; + + if (len == 0) { + return 0; + } + + if (payload == NULL) { + error_setg(errp, "Unexpected structured payload"); + return -EINVAL; + } + + if (len > NBD_MAX_MALLOC_PAYLOAD) { + error_setg(errp, "Payload too large"); + return -EINVAL; + } + + *payload = g_new(char, len); + ret = nbd_read(s->ioc, *payload, len, "structured payload", errp); + if (ret < 0) { + g_free(*payload); + *payload = NULL; + return ret; + } + + return 0; +} + +/* + * nbd_co_do_receive_one_chunk + * for simple reply: + * set request_ret to received reply error + * if qiov is not NULL: read payload to @qiov + * for structured reply chunk: + * if error chunk: read payload, set @request_ret, do not set @payload + * else if offset_data chunk: read payload data to @qiov, do not set @payload + * else: read payload to @payload + * + * If function fails, @errp contains corresponding error message, and the + * connection with the server is suspect. If it returns 0, then the + * transaction succeeded (although @request_ret may be a negative errno + * corresponding to the server's error reply), and errp is unchanged. + */ +static coroutine_fn int nbd_co_do_receive_one_chunk( + BDRVNBDState *s, uint64_t handle, bool only_structured, + int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp) +{ + int ret; + int i = HANDLE_TO_INDEX(s, handle); + void *local_payload = NULL; + NBDStructuredReplyChunk *chunk; + + if (payload) { + *payload = NULL; + } + *request_ret = 0; + + /* Wait until we're woken up by nbd_connection_entry. */ + s->requests[i].receiving = true; + qemu_coroutine_yield(); + s->requests[i].receiving = false; + if (s->state != NBD_CLIENT_CONNECTED) { + error_setg(errp, "Connection closed"); + return -EIO; + } + assert(s->ioc); + + assert(s->reply.handle == handle); + + if (nbd_reply_is_simple(&s->reply)) { + if (only_structured) { + error_setg(errp, "Protocol error: simple reply when structured " + "reply chunk was expected"); + return -EINVAL; + } + + *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error); + if (*request_ret < 0 || !qiov) { + return 0; + } + + return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov, + errp) < 0 ? -EIO : 0; + } + + /* handle structured reply chunk */ + assert(s->info.structured_reply); + chunk = &s->reply.structured; + + if (chunk->type == NBD_REPLY_TYPE_NONE) { + if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) { + error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without" + " NBD_REPLY_FLAG_DONE flag set"); + return -EINVAL; + } + if (chunk->length) { + error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with" + " nonzero length"); + return -EINVAL; + } + return 0; + } + + if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) { + if (!qiov) { + error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk"); + return -EINVAL; + } + + return nbd_co_receive_offset_data_payload(s, s->requests[i].offset, + qiov, errp); + } + + if (nbd_reply_type_is_error(chunk->type)) { + payload = &local_payload; + } + + ret = nbd_co_receive_structured_payload(s, payload, errp); + if (ret < 0) { + return ret; + } + + if (nbd_reply_type_is_error(chunk->type)) { + ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp); + g_free(local_payload); + return ret; + } + + return 0; +} + +/* + * nbd_co_receive_one_chunk + * Read reply, wake up connection_co and set s->quit if needed. + * Return value is a fatal error code or normal nbd reply error code + */ +static coroutine_fn int nbd_co_receive_one_chunk( + BDRVNBDState *s, uint64_t handle, bool only_structured, + int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload, + Error **errp) +{ + int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured, + request_ret, qiov, payload, errp); + + if (ret < 0) { + memset(reply, 0, sizeof(*reply)); + nbd_channel_error(s, ret); + } else { + /* For assert at loop start in nbd_connection_entry */ + *reply = s->reply; + } + s->reply.handle = 0; + + if (s->connection_co && !s->wait_in_flight) { + /* + * We must check s->wait_in_flight, because we may entered by + * nbd_recv_coroutines_wake_all(), in this case we should not + * wake connection_co here, it will woken by last request. + */ + aio_co_wake(s->connection_co); + } + + return ret; +} + +typedef struct NBDReplyChunkIter { + int ret; + int request_ret; + Error *err; + bool done, only_structured; +} NBDReplyChunkIter; + +static void nbd_iter_channel_error(NBDReplyChunkIter *iter, + int ret, Error **local_err) +{ + assert(local_err && *local_err); + assert(ret < 0); + + if (!iter->ret) { + iter->ret = ret; + error_propagate(&iter->err, *local_err); + } else { + error_free(*local_err); + } + + *local_err = NULL; +} + +static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret) +{ + assert(ret < 0); + + if (!iter->request_ret) { + iter->request_ret = ret; + } +} + +/* + * NBD_FOREACH_REPLY_CHUNK + * The pointer stored in @payload requires g_free() to free it. + */ +#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \ + qiov, reply, payload) \ + for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \ + nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);) + +/* + * nbd_reply_chunk_iter_receive + * The pointer stored in @payload requires g_free() to free it. + */ +static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, + NBDReplyChunkIter *iter, + uint64_t handle, + QEMUIOVector *qiov, NBDReply *reply, + void **payload) +{ + int ret, request_ret; + NBDReply local_reply; + NBDStructuredReplyChunk *chunk; + Error *local_err = NULL; + if (s->state != NBD_CLIENT_CONNECTED) { + error_setg(&local_err, "Connection closed"); + nbd_iter_channel_error(iter, -EIO, &local_err); + goto break_loop; + } + + if (iter->done) { + /* Previous iteration was last. */ + goto break_loop; + } + + if (reply == NULL) { + reply = &local_reply; + } + + ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured, + &request_ret, qiov, reply, payload, + &local_err); + if (ret < 0) { + nbd_iter_channel_error(iter, ret, &local_err); + } else if (request_ret < 0) { + nbd_iter_request_error(iter, request_ret); + } + + /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */ + if (nbd_reply_is_simple(reply) || s->state != NBD_CLIENT_CONNECTED) { + goto break_loop; + } + + chunk = &reply->structured; + iter->only_structured = true; + + if (chunk->type == NBD_REPLY_TYPE_NONE) { + /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */ + assert(chunk->flags & NBD_REPLY_FLAG_DONE); + goto break_loop; + } + + if (chunk->flags & NBD_REPLY_FLAG_DONE) { + /* This iteration is last. */ + iter->done = true; + } + + /* Execute the loop body */ + return true; + +break_loop: + s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL; + + qemu_co_mutex_lock(&s->send_mutex); + s->in_flight--; + if (s->in_flight == 0 && s->wait_in_flight) { + aio_co_wake(s->connection_co); + } else { + qemu_co_queue_next(&s->free_sema); + } + qemu_co_mutex_unlock(&s->send_mutex); + + return false; +} + +static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle, + int *request_ret, Error **errp) +{ + NBDReplyChunkIter iter; + + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) { + /* nbd_reply_chunk_iter_receive does all the work */ + } + + error_propagate(errp, iter.err); + *request_ret = iter.request_ret; + return iter.ret; +} + +static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle, + uint64_t offset, QEMUIOVector *qiov, + int *request_ret, Error **errp) +{ + NBDReplyChunkIter iter; + NBDReply reply; + void *payload = NULL; + Error *local_err = NULL; + + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply, + qiov, &reply, &payload) + { + int ret; + NBDStructuredReplyChunk *chunk = &reply.structured; + + assert(nbd_reply_is_structured(&reply)); + + switch (chunk->type) { + case NBD_REPLY_TYPE_OFFSET_DATA: + /* + * special cased in nbd_co_receive_one_chunk, data is already + * in qiov + */ + break; + case NBD_REPLY_TYPE_OFFSET_HOLE: + ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload, + offset, qiov, &local_err); + if (ret < 0) { + nbd_channel_error(s, ret); + nbd_iter_channel_error(&iter, ret, &local_err); + } + break; + default: + if (!nbd_reply_type_is_error(chunk->type)) { + /* not allowed reply type */ + nbd_channel_error(s, -EINVAL); + error_setg(&local_err, + "Unexpected reply type: %d (%s) for CMD_READ", + chunk->type, nbd_reply_type_lookup(chunk->type)); + nbd_iter_channel_error(&iter, -EINVAL, &local_err); + } + } + + g_free(payload); + payload = NULL; + } + + error_propagate(errp, iter.err); + *request_ret = iter.request_ret; + return iter.ret; +} + +static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s, + uint64_t handle, uint64_t length, + NBDExtent *extent, + int *request_ret, Error **errp) +{ + NBDReplyChunkIter iter; + NBDReply reply; + void *payload = NULL; + Error *local_err = NULL; + bool received = false; + + assert(!extent->length); + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) { + int ret; + NBDStructuredReplyChunk *chunk = &reply.structured; + + assert(nbd_reply_is_structured(&reply)); + + switch (chunk->type) { + case NBD_REPLY_TYPE_BLOCK_STATUS: + if (received) { + nbd_channel_error(s, -EINVAL); + error_setg(&local_err, "Several BLOCK_STATUS chunks in reply"); + nbd_iter_channel_error(&iter, -EINVAL, &local_err); + } + received = true; + + ret = nbd_parse_blockstatus_payload(s, &reply.structured, + payload, length, extent, + &local_err); + if (ret < 0) { + nbd_channel_error(s, ret); + nbd_iter_channel_error(&iter, ret, &local_err); + } + break; + default: + if (!nbd_reply_type_is_error(chunk->type)) { + nbd_channel_error(s, -EINVAL); + error_setg(&local_err, + "Unexpected reply type: %d (%s) " + "for CMD_BLOCK_STATUS", + chunk->type, nbd_reply_type_lookup(chunk->type)); + nbd_iter_channel_error(&iter, -EINVAL, &local_err); + } + } + + g_free(payload); + payload = NULL; + } + + if (!extent->length && !iter.request_ret) { + error_setg(&local_err, "Server did not reply with any status extents"); + nbd_iter_channel_error(&iter, -EIO, &local_err); + } + + error_propagate(errp, iter.err); + *request_ret = iter.request_ret; + return iter.ret; +} + +static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, + QEMUIOVector *write_qiov) +{ + int ret, request_ret; + Error *local_err = NULL; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + assert(request->type != NBD_CMD_READ); + if (write_qiov) { + assert(request->type == NBD_CMD_WRITE); + assert(request->len == iov_size(write_qiov->iov, write_qiov->niov)); + } else { + assert(request->type != NBD_CMD_WRITE); + } + + do { + ret = nbd_co_send_request(bs, request, write_qiov); + if (ret < 0) { + continue; + } + + ret = nbd_co_receive_return_code(s, request->handle, + &request_ret, &local_err); + if (local_err) { + trace_nbd_co_request_fail(request->from, request->len, + request->handle, request->flags, + request->type, + nbd_cmd_lookup(request->type), + ret, error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(s)); + + return ret ? ret : request_ret; +} + +static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + int ret, request_ret; + Error *local_err = NULL; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { + .type = NBD_CMD_READ, + .from = offset, + .len = bytes, + }; + + assert(bytes <= NBD_MAX_BUFFER_SIZE); + assert(!flags); + + if (!bytes) { + return 0; + } + /* + * Work around the fact that the block layer doesn't do + * byte-accurate sizing yet - if the read exceeds the server's + * advertised size because the block layer rounded size up, then + * truncate the request to the server and tail-pad with zero. + */ + if (offset >= s->info.size) { + assert(bytes < BDRV_SECTOR_SIZE); + qemu_iovec_memset(qiov, 0, 0, bytes); + return 0; + } + if (offset + bytes > s->info.size) { + uint64_t slop = offset + bytes - s->info.size; + + assert(slop < BDRV_SECTOR_SIZE); + qemu_iovec_memset(qiov, bytes - slop, 0, slop); + request.len -= slop; + } + + do { + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + continue; + } + + ret = nbd_co_receive_cmdread_reply(s, request.handle, offset, qiov, + &request_ret, &local_err); + if (local_err) { + trace_nbd_co_request_fail(request.from, request.len, request.handle, + request.flags, request.type, + nbd_cmd_lookup(request.type), + ret, error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(s)); + + return ret ? ret : request_ret; +} + +static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { + .type = NBD_CMD_WRITE, + .from = offset, + .len = bytes, + }; + + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); + if (flags & BDRV_REQ_FUA) { + assert(s->info.flags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + + assert(bytes <= NBD_MAX_BUFFER_SIZE); + + if (!bytes) { + return 0; + } + return nbd_co_request(bs, &request, qiov); +} + +static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { + .type = NBD_CMD_WRITE_ZEROES, + .from = offset, + .len = bytes, + }; + + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); + if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_FUA) { + assert(s->info.flags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + request.flags |= NBD_CMD_FLAG_NO_HOLE; + } + if (flags & BDRV_REQ_NO_FALLBACK) { + assert(s->info.flags & NBD_FLAG_SEND_FAST_ZERO); + request.flags |= NBD_CMD_FLAG_FAST_ZERO; + } + + if (!bytes) { + return 0; + } + return nbd_co_request(bs, &request, NULL); +} + +static int nbd_client_co_flush(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { .type = NBD_CMD_FLUSH }; + + if (!(s->info.flags & NBD_FLAG_SEND_FLUSH)) { + return 0; + } + + request.from = 0; + request.len = 0; + + return nbd_co_request(bs, &request, NULL); +} + +static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, + int bytes) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { + .type = NBD_CMD_TRIM, + .from = offset, + .len = bytes, + }; + + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); + if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) { + return 0; + } + + return nbd_co_request(bs, &request, NULL); +} + +static int coroutine_fn nbd_client_co_block_status( + BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) +{ + int ret, request_ret; + NBDExtent extent = { 0 }; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + Error *local_err = NULL; + + NBDRequest request = { + .type = NBD_CMD_BLOCK_STATUS, + .from = offset, + .len = MIN(QEMU_ALIGN_DOWN(INT_MAX, bs->bl.request_alignment), + MIN(bytes, s->info.size - offset)), + .flags = NBD_CMD_FLAG_REQ_ONE, + }; + + if (!s->info.base_allocation) { + *pnum = bytes; + *map = offset; + *file = bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + } + + /* + * Work around the fact that the block layer doesn't do + * byte-accurate sizing yet - if the status request exceeds the + * server's advertised size because the block layer rounded size + * up, we truncated the request to the server (above), or are + * called on just the hole. + */ + if (offset >= s->info.size) { + *pnum = bytes; + assert(bytes < BDRV_SECTOR_SIZE); + /* Intentionally don't report offset_valid for the hole */ + return BDRV_BLOCK_ZERO; + } + + if (s->info.min_block) { + assert(QEMU_IS_ALIGNED(request.len, s->info.min_block)); + } + do { + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + continue; + } + + ret = nbd_co_receive_blockstatus_reply(s, request.handle, bytes, + &extent, &request_ret, + &local_err); + if (local_err) { + trace_nbd_co_request_fail(request.from, request.len, request.handle, + request.flags, request.type, + nbd_cmd_lookup(request.type), + ret, error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(s)); + + if (ret < 0 || request_ret < 0) { + return ret ? ret : request_ret; + } + + assert(extent.length); + *pnum = extent.length; + *map = offset; + *file = bs; + return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) | + (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) | + BDRV_BLOCK_OFFSET_VALID; +} + +static int nbd_client_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVNBDState *s = (BDRVNBDState *)state->bs->opaque; + + if ((state->flags & BDRV_O_RDWR) && (s->info.flags & NBD_FLAG_READ_ONLY)) { + error_setg(errp, "Can't reopen read-only NBD mount as read/write"); + return -EACCES; + } + return 0; +} + +static void nbd_client_close(BlockDriverState *bs) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + NBDRequest request = { .type = NBD_CMD_DISC }; + + if (s->ioc) { + nbd_send_request(s->ioc, &request); + } + + nbd_teardown_connection(bs); +} + +static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr, + Error **errp) +{ + ERRP_GUARD(); + QIOChannelSocket *sioc; + + sioc = qio_channel_socket_new(); + qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client"); + + qio_channel_socket_connect_sync(sioc, saddr, errp); + if (*errp) { + object_unref(OBJECT(sioc)); + return NULL; + } + + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + + return sioc; +} + +/* nbd_client_handshake takes ownership on sioc. On failure it is unref'ed. */ +static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc, + Error **errp) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + trace_nbd_client_handshake(s->export); + + s->sioc = sioc; + + qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL); + qio_channel_attach_aio_context(QIO_CHANNEL(sioc), aio_context); + + s->info.request_sizes = true; + s->info.structured_reply = true; + s->info.base_allocation = true; + s->info.x_dirty_bitmap = g_strdup(s->x_dirty_bitmap); + s->info.name = g_strdup(s->export ?: ""); + ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(sioc), s->tlscreds, + s->hostname, &s->ioc, &s->info, errp); + g_free(s->info.x_dirty_bitmap); + g_free(s->info.name); + if (ret < 0) { + object_unref(OBJECT(sioc)); + s->sioc = NULL; + return ret; + } + if (s->x_dirty_bitmap) { + if (!s->info.base_allocation) { + error_setg(errp, "requested x-dirty-bitmap %s not found", + s->x_dirty_bitmap); + ret = -EINVAL; + goto fail; + } + if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) { + s->alloc_depth = true; + } + } + if (s->info.flags & NBD_FLAG_READ_ONLY) { + ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp); + if (ret < 0) { + goto fail; + } + } + if (s->info.flags & NBD_FLAG_SEND_FUA) { + bs->supported_write_flags = BDRV_REQ_FUA; + bs->supported_zero_flags |= BDRV_REQ_FUA; + } + if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) { + bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP; + if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) { + bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK; + } + } + + if (!s->ioc) { + s->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(s->ioc)); + } + + trace_nbd_client_handshake_success(s->export); + + return 0; + + fail: + /* + * We have connected, but must fail for other reasons. + * Send NBD_CMD_DISC as a courtesy to the server. + */ + { + NBDRequest request = { .type = NBD_CMD_DISC }; + + nbd_send_request(s->ioc ?: QIO_CHANNEL(sioc), &request); + + object_unref(OBJECT(sioc)); + s->sioc = NULL; + + return ret; + } +} + +/* + * Parse nbd_open options + */ + +static int nbd_parse_uri(const char *filename, QDict *options) +{ + URI *uri; + const char *p; + QueryParams *qp = NULL; + int ret = 0; + bool is_unix; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!g_strcmp0(uri->scheme, "nbd")) { + is_unix = false; + } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) { + is_unix = false; + } else if (!g_strcmp0(uri->scheme, "nbd+unix")) { + is_unix = true; + } else { + ret = -EINVAL; + goto out; + } + + p = uri->path ? uri->path : ""; + if (p[0] == '/') { + p++; + } + if (p[0]) { + qdict_put_str(options, "export", p); + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + /* nbd+unix:///export?socket=path */ + if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + qdict_put_str(options, "server.type", "unix"); + qdict_put_str(options, "server.path", qp->p[0].value); + } else { + QString *host; + char *port_str; + + /* nbd[+tcp]://host[:port]/export */ + if (!uri->server) { + ret = -EINVAL; + goto out; + } + + /* strip braces from literal IPv6 address */ + if (uri->server[0] == '[') { + host = qstring_from_substr(uri->server, 1, + strlen(uri->server) - 1); + } else { + host = qstring_from_str(uri->server); + } + + qdict_put_str(options, "server.type", "inet"); + qdict_put(options, "server.host", host); + + port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT); + qdict_put_str(options, "server.port", port_str); + g_free(port_str); + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static bool nbd_has_filename_options_conflict(QDict *options, Error **errp) +{ + const QDictEntry *e; + + for (e = qdict_first(options); e; e = qdict_next(options, e)) { + if (!strcmp(e->key, "host") || + !strcmp(e->key, "port") || + !strcmp(e->key, "path") || + !strcmp(e->key, "export") || + strstart(e->key, "server.", NULL)) + { + error_setg(errp, "Option '%s' cannot be used with a file name", + e->key); + return true; + } + } + + return false; +} + +static void nbd_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + g_autofree char *file = NULL; + char *export_name; + const char *host_spec; + const char *unixpath; + + if (nbd_has_filename_options_conflict(options, errp)) { + return; + } + + if (strstr(filename, "://")) { + int ret = nbd_parse_uri(filename, options); + if (ret < 0) { + error_setg(errp, "No valid URL specified"); + } + return; + } + + file = g_strdup(filename); + + export_name = strstr(file, EN_OPTSTR); + if (export_name) { + if (export_name[strlen(EN_OPTSTR)] == 0) { + return; + } + export_name[0] = 0; /* truncate 'file' */ + export_name += strlen(EN_OPTSTR); + + qdict_put_str(options, "export", export_name); + } + + /* extract the host_spec - fail if it's not nbd:... */ + if (!strstart(file, "nbd:", &host_spec)) { + error_setg(errp, "File name string for NBD must start with 'nbd:'"); + return; + } + + if (!*host_spec) { + return; + } + + /* are we a UNIX or TCP socket? */ + if (strstart(host_spec, "unix:", &unixpath)) { + qdict_put_str(options, "server.type", "unix"); + qdict_put_str(options, "server.path", unixpath); + } else { + InetSocketAddress *addr = g_new(InetSocketAddress, 1); + + if (inet_parse(addr, host_spec, errp)) { + goto out_inet; + } + + qdict_put_str(options, "server.type", "inet"); + qdict_put_str(options, "server.host", addr->host); + qdict_put_str(options, "server.port", addr->port); + out_inet: + qapi_free_InetSocketAddress(addr); + } +} + +static bool nbd_process_legacy_socket_options(QDict *output_options, + QemuOpts *legacy_opts, + Error **errp) +{ + const char *path = qemu_opt_get(legacy_opts, "path"); + const char *host = qemu_opt_get(legacy_opts, "host"); + const char *port = qemu_opt_get(legacy_opts, "port"); + const QDictEntry *e; + + if (!path && !host && !port) { + return true; + } + + for (e = qdict_first(output_options); e; e = qdict_next(output_options, e)) + { + if (strstart(e->key, "server.", NULL)) { + error_setg(errp, "Cannot use 'server' and path/host/port at the " + "same time"); + return false; + } + } + + if (path && host) { + error_setg(errp, "path and host may not be used at the same time"); + return false; + } else if (path) { + if (port) { + error_setg(errp, "port may not be used without host"); + return false; + } + + qdict_put_str(output_options, "server.type", "unix"); + qdict_put_str(output_options, "server.path", path); + } else if (host) { + qdict_put_str(output_options, "server.type", "inet"); + qdict_put_str(output_options, "server.host", host); + qdict_put_str(output_options, "server.port", + port ?: stringify(NBD_DEFAULT_PORT)); + } + + return true; +} + +static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, + Error **errp) +{ + SocketAddress *saddr = NULL; + QDict *addr = NULL; + Visitor *iv = NULL; + + qdict_extract_subqdict(options, &addr, "server."); + if (!qdict_size(addr)) { + error_setg(errp, "NBD server address missing"); + goto done; + } + + iv = qobject_input_visitor_new_flat_confused(addr, errp); + if (!iv) { + goto done; + } + + if (!visit_type_SocketAddress(iv, NULL, &saddr, errp)) { + goto done; + } + +done: + qobject_unref(addr); + visit_free(iv); + return saddr; +} + +static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp) +{ + Object *obj; + QCryptoTLSCreds *creds; + + obj = object_resolve_path_component( + object_get_objects_root(), id); + if (!obj) { + error_setg(errp, "No TLS credentials with id '%s'", + id); + return NULL; + } + creds = (QCryptoTLSCreds *) + object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS); + if (!creds) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + id); + return NULL; + } + + if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + error_setg(errp, + "Expecting TLS credentials with a client endpoint"); + return NULL; + } + object_ref(obj); + return creds; +} + + +static QemuOptsList nbd_runtime_opts = { + .name = "nbd", + .head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head), + .desc = { + { + .name = "host", + .type = QEMU_OPT_STRING, + .help = "TCP host to connect to", + }, + { + .name = "port", + .type = QEMU_OPT_STRING, + .help = "TCP port to connect to", + }, + { + .name = "path", + .type = QEMU_OPT_STRING, + .help = "Unix socket path to connect to", + }, + { + .name = "export", + .type = QEMU_OPT_STRING, + .help = "Name of the NBD export to open", + }, + { + .name = "tls-creds", + .type = QEMU_OPT_STRING, + .help = "ID of the TLS credentials to use", + }, + { + .name = "x-dirty-bitmap", + .type = QEMU_OPT_STRING, + .help = "experimental: expose named dirty bitmap in place of " + "block status", + }, + { + .name = "reconnect-delay", + .type = QEMU_OPT_NUMBER, + .help = "On an unexpected disconnect, the nbd client tries to " + "connect again until succeeding or encountering a serious " + "error. During the first @reconnect-delay seconds, all " + "requests are paused and will be rerun on a successful " + "reconnect. After that time, any delayed requests and all " + "future requests before a successful reconnect will " + "immediately fail. Default 0", + }, + { /* end of list */ } + }, +}; + +static int nbd_process_options(BlockDriverState *bs, QDict *options, + Error **errp) +{ + BDRVNBDState *s = bs->opaque; + QemuOpts *opts; + int ret = -EINVAL; + + opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto error; + } + + /* Translate @host, @port, and @path to a SocketAddress */ + if (!nbd_process_legacy_socket_options(options, opts, errp)) { + goto error; + } + + /* Pop the config into our state object. Exit if invalid. */ + s->saddr = nbd_config(s, options, errp); + if (!s->saddr) { + goto error; + } + + s->export = g_strdup(qemu_opt_get(opts, "export")); + if (s->export && strlen(s->export) > NBD_MAX_STRING_SIZE) { + error_setg(errp, "export name too long to send to server"); + goto error; + } + + s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds")); + if (s->tlscredsid) { + s->tlscreds = nbd_get_tls_creds(s->tlscredsid, errp); + if (!s->tlscreds) { + goto error; + } + + /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */ + if (s->saddr->type != SOCKET_ADDRESS_TYPE_INET) { + error_setg(errp, "TLS only supported over IP sockets"); + goto error; + } + s->hostname = s->saddr->u.inet.host; + } + + s->x_dirty_bitmap = g_strdup(qemu_opt_get(opts, "x-dirty-bitmap")); + if (s->x_dirty_bitmap && strlen(s->x_dirty_bitmap) > NBD_MAX_STRING_SIZE) { + error_setg(errp, "x-dirty-bitmap query too long to send to server"); + goto error; + } + + s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0); + + ret = 0; + + error: + if (ret < 0) { + nbd_clear_bdrvstate(s); + } + qemu_opts_del(opts); + return ret; +} + +static int nbd_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + int ret; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + QIOChannelSocket *sioc; + + ret = nbd_process_options(bs, options, errp); + if (ret < 0) { + return ret; + } + + s->bs = bs; + qemu_co_mutex_init(&s->send_mutex); + qemu_co_queue_init(&s->free_sema); + + /* + * establish TCP connection, return error if it fails + * TODO: Configurable retry-until-timeout behaviour. + */ + sioc = nbd_establish_connection(s->saddr, errp); + if (!sioc) { + return -ECONNREFUSED; + } + + ret = nbd_client_handshake(bs, sioc, errp); + if (ret < 0) { + nbd_clear_bdrvstate(s); + return ret; + } + /* successfully connected */ + s->state = NBD_CLIENT_CONNECTED; + + nbd_init_connect_thread(s); + + s->connection_co = qemu_coroutine_create(nbd_connection_entry, s); + bdrv_inc_in_flight(bs); + aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co); + + return 0; +} + +static int nbd_co_flush(BlockDriverState *bs) +{ + return nbd_client_co_flush(bs); +} + +static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + uint32_t min = s->info.min_block; + uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block); + + /* + * If the server did not advertise an alignment: + * - a size that is not sector-aligned implies that an alignment + * of 1 can be used to access those tail bytes + * - advertisement of block status requires an alignment of 1, so + * that we don't violate block layer constraints that block + * status is always aligned (as we can't control whether the + * server will report sub-sector extents, such as a hole at EOF + * on an unaligned POSIX file) + * - otherwise, assume the server is so old that we are safer avoiding + * sub-sector requests + */ + if (!min) { + min = (!QEMU_IS_ALIGNED(s->info.size, BDRV_SECTOR_SIZE) || + s->info.base_allocation) ? 1 : BDRV_SECTOR_SIZE; + } + + bs->bl.request_alignment = min; + bs->bl.max_pdiscard = QEMU_ALIGN_DOWN(INT_MAX, min); + bs->bl.max_pwrite_zeroes = max; + bs->bl.max_transfer = max; + + if (s->info.opt_block && + s->info.opt_block > bs->bl.opt_transfer) { + bs->bl.opt_transfer = s->info.opt_block; + } +} + +static void nbd_close(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + + nbd_client_close(bs); + nbd_clear_bdrvstate(s); +} + +/* + * NBD cannot truncate, but if the caller asks to truncate to the same size, or + * to a smaller size with exact=false, there is no reason to fail the + * operation. + * + * Preallocation mode is ignored since it does not seems useful to fail when + * we never change anything. + */ +static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVNBDState *s = bs->opaque; + + if (offset != s->info.size && exact) { + error_setg(errp, "Cannot resize NBD nodes"); + return -ENOTSUP; + } + + if (offset > s->info.size) { + error_setg(errp, "Cannot grow NBD nodes"); + return -EINVAL; + } + + return 0; +} + +static int64_t nbd_getlength(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + + return s->info.size; +} + +static void nbd_refresh_filename(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + const char *host = NULL, *port = NULL, *path = NULL; + size_t len = 0; + + if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) { + const InetSocketAddress *inet = &s->saddr->u.inet; + if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) { + host = inet->host; + port = inet->port; + } + } else if (s->saddr->type == SOCKET_ADDRESS_TYPE_UNIX) { + path = s->saddr->u.q_unix.path; + } /* else can't represent as pseudo-filename */ + + if (path && s->export) { + len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nbd+unix:///%s?socket=%s", s->export, path); + } else if (path && !s->export) { + len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nbd+unix://?socket=%s", path); + } else if (host && s->export) { + len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nbd://%s:%s/%s", host, port, s->export); + } else if (host && !s->export) { + len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nbd://%s:%s", host, port); + } + if (len >= sizeof(bs->exact_filename)) { + /* Name is too long to represent exactly, so leave it empty. */ + bs->exact_filename[0] = '\0'; + } +} + +static char *nbd_dirname(BlockDriverState *bs, Error **errp) +{ + /* The generic bdrv_dirname() implementation is able to work out some + * directory name for NBD nodes, but that would be wrong. So far there is no + * specification for how "export paths" would work, so NBD does not have + * directory names. */ + error_setg(errp, "Cannot generate a base directory for NBD nodes"); + return NULL; +} + +static const char *const nbd_strong_runtime_opts[] = { + "path", + "host", + "port", + "export", + "tls-creds", + "server.", + + NULL +}; + +static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_refresh_limits = nbd_refresh_limits, + .bdrv_co_truncate = nbd_co_truncate, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_client_detach_aio_context, + .bdrv_attach_aio_context = nbd_client_attach_aio_context, + .bdrv_co_drain_begin = nbd_client_co_drain_begin, + .bdrv_co_drain_end = nbd_client_co_drain_end, + .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, +}; + +static BlockDriver bdrv_nbd_tcp = { + .format_name = "nbd", + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_refresh_limits = nbd_refresh_limits, + .bdrv_co_truncate = nbd_co_truncate, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_client_detach_aio_context, + .bdrv_attach_aio_context = nbd_client_attach_aio_context, + .bdrv_co_drain_begin = nbd_client_co_drain_begin, + .bdrv_co_drain_end = nbd_client_co_drain_end, + .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, +}; + +static BlockDriver bdrv_nbd_unix = { + .format_name = "nbd", + .protocol_name = "nbd+unix", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, + .bdrv_refresh_limits = nbd_refresh_limits, + .bdrv_co_truncate = nbd_co_truncate, + .bdrv_getlength = nbd_getlength, + .bdrv_detach_aio_context = nbd_client_detach_aio_context, + .bdrv_attach_aio_context = nbd_client_attach_aio_context, + .bdrv_co_drain_begin = nbd_client_co_drain_begin, + .bdrv_co_drain_end = nbd_client_co_drain_end, + .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, +}; + +static void bdrv_nbd_init(void) +{ + bdrv_register(&bdrv_nbd); + bdrv_register(&bdrv_nbd_tcp); + bdrv_register(&bdrv_nbd_unix); +} + +block_init(bdrv_nbd_init); diff --git a/block/nfs.c b/block/nfs.c new file mode 100644 index 000000000..8c1968bb4 --- /dev/null +++ b/block/nfs.c @@ -0,0 +1,913 @@ +/* + * QEMU Block driver for native access to files on NFS shares + * + * Copyright (c) 2014-2017 Peter Lieven + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#if !defined(_WIN32) +#include +#endif +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "trace.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/uri.h" +#include "qemu/cutils.h" +#include "sysemu/sysemu.h" +#include "sysemu/replay.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" +#include + + +#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 +#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE) +#define QEMU_NFS_MAX_DEBUG_LEVEL 2 + +typedef struct NFSClient { + struct nfs_context *context; + struct nfsfh *fh; + int events; + bool has_zero_init; + AioContext *aio_context; + QemuMutex mutex; + uint64_t st_blocks; + bool cache_used; + NFSServer *server; + char *path; + int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug; +} NFSClient; + +typedef struct NFSRPC { + BlockDriverState *bs; + int ret; + int complete; + QEMUIOVector *iov; + struct stat *st; + Coroutine *co; + NFSClient *client; +} NFSRPC; + +static int nfs_parse_uri(const char *filename, QDict *options, Error **errp) +{ + URI *uri = NULL; + QueryParams *qp = NULL; + int ret = -EINVAL, i; + + uri = uri_parse(filename); + if (!uri) { + error_setg(errp, "Invalid URI specified"); + goto out; + } + if (g_strcmp0(uri->scheme, "nfs") != 0) { + error_setg(errp, "URI scheme must be 'nfs'"); + goto out; + } + + if (!uri->server) { + error_setg(errp, "missing hostname in URI"); + goto out; + } + + if (!uri->path) { + error_setg(errp, "missing file path in URI"); + goto out; + } + + qp = query_params_parse(uri->query); + if (!qp) { + error_setg(errp, "could not parse query parameters"); + goto out; + } + + qdict_put_str(options, "server.host", uri->server); + qdict_put_str(options, "server.type", "inet"); + qdict_put_str(options, "path", uri->path); + + for (i = 0; i < qp->n; i++) { + unsigned long long val; + if (!qp->p[i].value) { + error_setg(errp, "Value for NFS parameter expected: %s", + qp->p[i].name); + goto out; + } + if (parse_uint_full(qp->p[i].value, &val, 0)) { + error_setg(errp, "Illegal value for NFS parameter: %s", + qp->p[i].name); + goto out; + } + if (!strcmp(qp->p[i].name, "uid")) { + qdict_put_str(options, "user", qp->p[i].value); + } else if (!strcmp(qp->p[i].name, "gid")) { + qdict_put_str(options, "group", qp->p[i].value); + } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) { + qdict_put_str(options, "tcp-syn-count", qp->p[i].value); + } else if (!strcmp(qp->p[i].name, "readahead")) { + qdict_put_str(options, "readahead-size", qp->p[i].value); + } else if (!strcmp(qp->p[i].name, "pagecache")) { + qdict_put_str(options, "page-cache-size", qp->p[i].value); + } else if (!strcmp(qp->p[i].name, "debug")) { + qdict_put_str(options, "debug", qp->p[i].value); + } else { + error_setg(errp, "Unknown NFS parameter name: %s", + qp->p[i].name); + goto out; + } + } + ret = 0; +out: + if (qp) { + query_params_free(qp); + } + if (uri) { + uri_free(uri); + } + return ret; +} + +static bool nfs_has_filename_options_conflict(QDict *options, Error **errp) +{ + const QDictEntry *qe; + + for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) { + if (!strcmp(qe->key, "host") || + !strcmp(qe->key, "path") || + !strcmp(qe->key, "user") || + !strcmp(qe->key, "group") || + !strcmp(qe->key, "tcp-syn-count") || + !strcmp(qe->key, "readahead-size") || + !strcmp(qe->key, "page-cache-size") || + !strcmp(qe->key, "debug") || + strstart(qe->key, "server.", NULL)) + { + error_setg(errp, "Option %s cannot be used with a filename", + qe->key); + return true; + } + } + + return false; +} + +static void nfs_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + if (nfs_has_filename_options_conflict(options, errp)) { + return; + } + + nfs_parse_uri(filename, options, errp); +} + +static void nfs_process_read(void *arg); +static void nfs_process_write(void *arg); + +/* Called with QemuMutex held. */ +static void nfs_set_events(NFSClient *client) +{ + int ev = nfs_which_events(client->context); + if (ev != client->events) { + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, + (ev & POLLIN) ? nfs_process_read : NULL, + (ev & POLLOUT) ? nfs_process_write : NULL, + NULL, client); + + } + client->events = ev; +} + +static void nfs_process_read(void *arg) +{ + NFSClient *client = arg; + + qemu_mutex_lock(&client->mutex); + nfs_service(client->context, POLLIN); + nfs_set_events(client); + qemu_mutex_unlock(&client->mutex); +} + +static void nfs_process_write(void *arg) +{ + NFSClient *client = arg; + + qemu_mutex_lock(&client->mutex); + nfs_service(client->context, POLLOUT); + nfs_set_events(client); + qemu_mutex_unlock(&client->mutex); +} + +static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task) +{ + *task = (NFSRPC) { + .co = qemu_coroutine_self(), + .bs = bs, + .client = bs->opaque, + }; +} + +static void nfs_co_generic_bh_cb(void *opaque) +{ + NFSRPC *task = opaque; + + task->complete = 1; + aio_co_wake(task->co); +} + +/* Called (via nfs_service) with QemuMutex held. */ +static void +nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data, + void *private_data) +{ + NFSRPC *task = private_data; + task->ret = ret; + assert(!task->st); + if (task->ret > 0 && task->iov) { + if (task->ret <= task->iov->size) { + qemu_iovec_from_buf(task->iov, 0, data, task->ret); + } else { + task->ret = -EIO; + } + } + if (task->ret < 0) { + error_report("NFS Error: %s", nfs_get_error(nfs)); + } + replay_bh_schedule_oneshot_event(task->client->aio_context, + nfs_co_generic_bh_cb, task); +} + +static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *iov, + int flags) +{ + NFSClient *client = bs->opaque; + NFSRPC task; + + nfs_co_init_task(bs, &task); + task.iov = iov; + + WITH_QEMU_LOCK_GUARD(&client->mutex) { + if (nfs_pread_async(client->context, client->fh, + offset, bytes, nfs_co_generic_cb, &task) != 0) { + return -ENOMEM; + } + + nfs_set_events(client); + } + while (!task.complete) { + qemu_coroutine_yield(); + } + + if (task.ret < 0) { + return task.ret; + } + + /* zero pad short reads */ + if (task.ret < iov->size) { + qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret); + } + + return 0; +} + +static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *iov, + int flags) +{ + NFSClient *client = bs->opaque; + NFSRPC task; + char *buf = NULL; + bool my_buffer = false; + + nfs_co_init_task(bs, &task); + + if (iov->niov != 1) { + buf = g_try_malloc(bytes); + if (bytes && buf == NULL) { + return -ENOMEM; + } + qemu_iovec_to_buf(iov, 0, buf, bytes); + my_buffer = true; + } else { + buf = iov->iov[0].iov_base; + } + + WITH_QEMU_LOCK_GUARD(&client->mutex) { + if (nfs_pwrite_async(client->context, client->fh, + offset, bytes, buf, + nfs_co_generic_cb, &task) != 0) { + if (my_buffer) { + g_free(buf); + } + return -ENOMEM; + } + + nfs_set_events(client); + } + while (!task.complete) { + qemu_coroutine_yield(); + } + + if (my_buffer) { + g_free(buf); + } + + if (task.ret != bytes) { + return task.ret < 0 ? task.ret : -EIO; + } + + return 0; +} + +static int coroutine_fn nfs_co_flush(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + NFSRPC task; + + nfs_co_init_task(bs, &task); + + WITH_QEMU_LOCK_GUARD(&client->mutex) { + if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, + &task) != 0) { + return -ENOMEM; + } + + nfs_set_events(client); + } + while (!task.complete) { + qemu_coroutine_yield(); + } + + return task.ret; +} + +static void nfs_detach_aio_context(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, NULL, NULL, NULL, NULL); + client->events = 0; +} + +static void nfs_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + NFSClient *client = bs->opaque; + + client->aio_context = new_context; + nfs_set_events(client); +} + +static void nfs_client_close(NFSClient *client) +{ + if (client->context) { + qemu_mutex_lock(&client->mutex); + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, NULL, NULL, NULL, NULL); + qemu_mutex_unlock(&client->mutex); + if (client->fh) { + nfs_close(client->context, client->fh); + client->fh = NULL; + } +#ifdef LIBNFS_FEATURE_UMOUNT + nfs_umount(client->context); +#endif + nfs_destroy_context(client->context); + client->context = NULL; + } + g_free(client->path); + qemu_mutex_destroy(&client->mutex); + qapi_free_NFSServer(client->server); + client->server = NULL; +} + +static void nfs_file_close(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + nfs_client_close(client); +} + +static int64_t nfs_client_open(NFSClient *client, BlockdevOptionsNfs *opts, + int flags, int open_flags, Error **errp) +{ + int64_t ret = -EINVAL; + struct stat st; + char *file = NULL, *strp = NULL; + + qemu_mutex_init(&client->mutex); + + client->path = g_strdup(opts->path); + + strp = strrchr(client->path, '/'); + if (strp == NULL) { + error_setg(errp, "Invalid URL specified"); + goto fail; + } + file = g_strdup(strp); + *strp = 0; + + /* Steal the NFSServer object from opts; set the original pointer to NULL + * to avoid use after free and double free. */ + client->server = opts->server; + opts->server = NULL; + + client->context = nfs_init_context(); + if (client->context == NULL) { + error_setg(errp, "Failed to init NFS context"); + goto fail; + } + + if (opts->has_user) { + client->uid = opts->user; + nfs_set_uid(client->context, client->uid); + } + + if (opts->has_group) { + client->gid = opts->group; + nfs_set_gid(client->context, client->gid); + } + + if (opts->has_tcp_syn_count) { + client->tcp_syncnt = opts->tcp_syn_count; + nfs_set_tcp_syncnt(client->context, client->tcp_syncnt); + } + +#ifdef LIBNFS_FEATURE_READAHEAD + if (opts->has_readahead_size) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS readahead " + "if cache.direct = on"); + goto fail; + } + client->readahead = opts->readahead_size; + if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) { + warn_report("Truncating NFS readahead size to %d", + QEMU_NFS_MAX_READAHEAD_SIZE); + client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE; + } + nfs_set_readahead(client->context, client->readahead); +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); +#endif + client->cache_used = true; + } +#endif + +#ifdef LIBNFS_FEATURE_PAGECACHE + if (opts->has_page_cache_size) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS pagecache " + "if cache.direct = on"); + goto fail; + } + client->pagecache = opts->page_cache_size; + if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) { + warn_report("Truncating NFS pagecache size to %d pages", + QEMU_NFS_MAX_PAGECACHE_SIZE); + client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE; + } + nfs_set_pagecache(client->context, client->pagecache); + nfs_set_pagecache_ttl(client->context, 0); + client->cache_used = true; + } +#endif + +#ifdef LIBNFS_FEATURE_DEBUG + if (opts->has_debug) { + client->debug = opts->debug; + /* limit the maximum debug level to avoid potential flooding + * of our log files. */ + if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) { + warn_report("Limiting NFS debug level to %d", + QEMU_NFS_MAX_DEBUG_LEVEL); + client->debug = QEMU_NFS_MAX_DEBUG_LEVEL; + } + nfs_set_debug(client->context, client->debug); + } +#endif + + ret = nfs_mount(client->context, client->server->host, client->path); + if (ret < 0) { + error_setg(errp, "Failed to mount nfs share: %s", + nfs_get_error(client->context)); + goto fail; + } + + if (flags & O_CREAT) { + ret = nfs_creat(client->context, file, 0600, &client->fh); + if (ret < 0) { + error_setg(errp, "Failed to create file: %s", + nfs_get_error(client->context)); + goto fail; + } + } else { + ret = nfs_open(client->context, file, flags, &client->fh); + if (ret < 0) { + error_setg(errp, "Failed to open file : %s", + nfs_get_error(client->context)); + goto fail; + } + } + + ret = nfs_fstat(client->context, client->fh, &st); + if (ret < 0) { + error_setg(errp, "Failed to fstat file: %s", + nfs_get_error(client->context)); + goto fail; + } + + ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE); +#if !defined(_WIN32) + client->st_blocks = st.st_blocks; +#endif + client->has_zero_init = S_ISREG(st.st_mode); + *strp = '/'; + goto out; + +fail: + nfs_client_close(client); +out: + g_free(file); + return ret; +} + +static BlockdevOptionsNfs *nfs_options_qdict_to_qapi(QDict *options, + Error **errp) +{ + BlockdevOptionsNfs *opts = NULL; + Visitor *v; + const QDictEntry *e; + + v = qobject_input_visitor_new_flat_confused(options, errp); + if (!v) { + return NULL; + } + + visit_type_BlockdevOptionsNfs(v, NULL, &opts, errp); + visit_free(v); + if (!opts) { + return NULL; + } + + /* Remove the processed options from the QDict (the visitor processes + * _all_ options in the QDict) */ + while ((e = qdict_first(options))) { + qdict_del(options, e->key); + } + + return opts; +} + +static int64_t nfs_client_open_qdict(NFSClient *client, QDict *options, + int flags, int open_flags, Error **errp) +{ + BlockdevOptionsNfs *opts; + int64_t ret; + + opts = nfs_options_qdict_to_qapi(options, errp); + if (opts == NULL) { + ret = -EINVAL; + goto fail; + } + + ret = nfs_client_open(client, opts, flags, open_flags, errp); +fail: + qapi_free_BlockdevOptionsNfs(opts); + return ret; +} + +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) { + NFSClient *client = bs->opaque; + int64_t ret; + + client->aio_context = bdrv_get_aio_context(bs); + + ret = nfs_client_open_qdict(client, options, + (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, + bs->open_flags, errp); + if (ret < 0) { + return ret; + } + + bs->total_sectors = ret; + if (client->has_zero_init) { + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } + return 0; +} + +static QemuOptsList nfs_create_opts = { + .name = "nfs-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { /* end of list */ } + } +}; + +static int nfs_file_co_create(BlockdevCreateOptions *options, Error **errp) +{ + BlockdevCreateOptionsNfs *opts = &options->u.nfs; + NFSClient *client = g_new0(NFSClient, 1); + int ret; + + assert(options->driver == BLOCKDEV_DRIVER_NFS); + + client->aio_context = qemu_get_aio_context(); + + ret = nfs_client_open(client, opts->location, O_CREAT, 0, errp); + if (ret < 0) { + goto out; + } + ret = nfs_ftruncate(client->context, client->fh, opts->size); + nfs_client_close(client); + +out: + g_free(client); + return ret; +} + +static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, + const char *url, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options; + BlockdevCreateOptionsNfs *nfs_opts; + QDict *options; + int ret; + + create_options = g_new0(BlockdevCreateOptions, 1); + create_options->driver = BLOCKDEV_DRIVER_NFS; + nfs_opts = &create_options->u.nfs; + + /* Read out options */ + nfs_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + + options = qdict_new(); + ret = nfs_parse_uri(url, options, errp); + if (ret < 0) { + goto out; + } + + nfs_opts->location = nfs_options_qdict_to_qapi(options, errp); + if (nfs_opts->location == NULL) { + ret = -EINVAL; + goto out; + } + + ret = nfs_file_co_create(create_options, errp); + if (ret < 0) { + goto out; + } + + ret = 0; +out: + qobject_unref(options); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +static int nfs_has_zero_init(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + return client->has_zero_init; +} + +#if !defined(_WIN32) +/* Called (via nfs_service) with QemuMutex held. */ +static void +nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data, + void *private_data) +{ + NFSRPC *task = private_data; + task->ret = ret; + if (task->ret == 0) { + memcpy(task->st, data, sizeof(struct stat)); + } + if (task->ret < 0) { + error_report("NFS Error: %s", nfs_get_error(nfs)); + } + + /* Set task->complete before reading bs->wakeup. */ + qatomic_mb_set(&task->complete, 1); + bdrv_wakeup(task->bs); +} + +static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + NFSRPC task = {0}; + struct stat st; + + if (bdrv_is_read_only(bs) && + !(bs->open_flags & BDRV_O_NOCACHE)) { + return client->st_blocks * 512; + } + + task.bs = bs; + task.st = &st; + if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb, + &task) != 0) { + return -ENOMEM; + } + + nfs_set_events(client); + BDRV_POLL_WHILE(bs, !task.complete); + + return (task.ret < 0 ? task.ret : st.st_blocks * 512); +} +#endif + +static int coroutine_fn +nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) +{ + NFSClient *client = bs->opaque; + int ret; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + ret = nfs_ftruncate(client->context, client->fh, offset); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to truncate file"); + return ret; + } + + return 0; +} + +/* Note that this will not re-establish a connection with the NFS server + * - it is effectively a NOP. */ +static int nfs_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + NFSClient *client = state->bs->opaque; + struct stat st; + int ret = 0; + + if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) { + error_setg(errp, "Cannot open a read-only mount as read-write"); + return -EACCES; + } + + if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) { + error_setg(errp, "Cannot disable cache if libnfs readahead or" + " pagecache is enabled"); + return -EINVAL; + } + + /* Update cache for read-only reopens */ + if (!(state->flags & BDRV_O_RDWR)) { + ret = nfs_fstat(client->context, client->fh, &st); + if (ret < 0) { + error_setg(errp, "Failed to fstat file: %s", + nfs_get_error(client->context)); + return ret; + } +#if !defined(_WIN32) + client->st_blocks = st.st_blocks; +#endif + } + + return 0; +} + +static void nfs_refresh_filename(BlockDriverState *bs) +{ + NFSClient *client = bs->opaque; + + if (client->uid && !client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?uid=%" PRId64, client->server->host, client->path, + client->uid); + } else if (!client->uid && client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?gid=%" PRId64, client->server->host, client->path, + client->gid); + } else if (client->uid && client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64, + client->server->host, client->path, client->uid, client->gid); + } else { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s", client->server->host, client->path); + } +} + +static char *nfs_dirname(BlockDriverState *bs, Error **errp) +{ + NFSClient *client = bs->opaque; + + if (client->uid || client->gid) { + bdrv_refresh_filename(bs); + error_setg(errp, "Cannot generate a base directory for NFS node '%s'", + bs->filename); + return NULL; + } + + return g_strdup_printf("nfs://%s%s/", client->server->host, client->path); +} + +#ifdef LIBNFS_FEATURE_PAGECACHE +static void coroutine_fn nfs_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + NFSClient *client = bs->opaque; + nfs_pagecache_invalidate(client->context, client->fh); +} +#endif + +static const char *nfs_strong_runtime_opts[] = { + "path", + "user", + "group", + "server.", + + NULL +}; + +static BlockDriver bdrv_nfs = { + .format_name = "nfs", + .protocol_name = "nfs", + + .instance_size = sizeof(NFSClient), + .bdrv_parse_filename = nfs_parse_filename, + .create_opts = &nfs_create_opts, + + .bdrv_has_zero_init = nfs_has_zero_init, +/* libnfs does not provide the allocated filesize of a file on win32. */ +#if !defined(_WIN32) + .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, +#endif + .bdrv_co_truncate = nfs_file_co_truncate, + + .bdrv_file_open = nfs_file_open, + .bdrv_close = nfs_file_close, + .bdrv_co_create = nfs_file_co_create, + .bdrv_co_create_opts = nfs_file_co_create_opts, + .bdrv_reopen_prepare = nfs_reopen_prepare, + + .bdrv_co_preadv = nfs_co_preadv, + .bdrv_co_pwritev = nfs_co_pwritev, + .bdrv_co_flush_to_disk = nfs_co_flush, + + .bdrv_detach_aio_context = nfs_detach_aio_context, + .bdrv_attach_aio_context = nfs_attach_aio_context, + .bdrv_refresh_filename = nfs_refresh_filename, + .bdrv_dirname = nfs_dirname, + + .strong_runtime_opts = nfs_strong_runtime_opts, + +#ifdef LIBNFS_FEATURE_PAGECACHE + .bdrv_co_invalidate_cache = nfs_co_invalidate_cache, +#endif +}; + +static void nfs_block_init(void) +{ + bdrv_register(&bdrv_nfs); +} + +block_init(nfs_block_init); diff --git a/block/null.c b/block/null.c new file mode 100644 index 000000000..cc9b1d4ea --- /dev/null +++ b/block/null.c @@ -0,0 +1,325 @@ +/* + * Null block driver + * + * Authors: + * Fam Zheng + * + * Copyright (C) 2014 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/block_int.h" +#include "sysemu/replay.h" + +#define NULL_OPT_LATENCY "latency-ns" +#define NULL_OPT_ZEROES "read-zeroes" + +typedef struct { + int64_t length; + int64_t latency_ns; + bool read_zeroes; +} BDRVNullState; + +static QemuOptsList runtime_opts = { + .name = "null", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "size of the null block", + }, + { + .name = NULL_OPT_LATENCY, + .type = QEMU_OPT_NUMBER, + .help = "nanoseconds (approximated) to wait " + "before completing request", + }, + { + .name = NULL_OPT_ZEROES, + .type = QEMU_OPT_BOOL, + .help = "return zeroes when read", + }, + { /* end of list */ } + }, +}; + +static void null_co_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* This functions only exists so that a null-co:// filename is accepted + * with the null-co driver. */ + if (strcmp(filename, "null-co://")) { + error_setg(errp, "The only allowed filename for this driver is " + "'null-co://'"); + return; + } +} + +static void null_aio_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* This functions only exists so that a null-aio:// filename is accepted + * with the null-aio driver. */ + if (strcmp(filename, "null-aio://")) { + error_setg(errp, "The only allowed filename for this driver is " + "'null-aio://'"); + return; + } +} + +static int null_file_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + QemuOpts *opts; + BDRVNullState *s = bs->opaque; + int ret = 0; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &error_abort); + s->length = + qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30); + s->latency_ns = + qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0); + if (s->latency_ns < 0) { + error_setg(errp, "latency-ns is invalid"); + ret = -EINVAL; + } + s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false); + qemu_opts_del(opts); + bs->supported_write_flags = BDRV_REQ_FUA; + return ret; +} + +static int64_t null_getlength(BlockDriverState *bs) +{ + BDRVNullState *s = bs->opaque; + return s->length; +} + +static coroutine_fn int null_co_common(BlockDriverState *bs) +{ + BDRVNullState *s = bs->opaque; + + if (s->latency_ns) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, s->latency_ns); + } + return 0; +} + +static coroutine_fn int null_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVNullState *s = bs->opaque; + + if (s->read_zeroes) { + qemu_iovec_memset(qiov, 0, 0, bytes); + } + + return null_co_common(bs); +} + +static coroutine_fn int null_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return null_co_common(bs); +} + +static coroutine_fn int null_co_flush(BlockDriverState *bs) +{ + return null_co_common(bs); +} + +typedef struct { + BlockAIOCB common; + QEMUTimer timer; +} NullAIOCB; + +static const AIOCBInfo null_aiocb_info = { + .aiocb_size = sizeof(NullAIOCB), +}; + +static void null_bh_cb(void *opaque) +{ + NullAIOCB *acb = opaque; + acb->common.cb(acb->common.opaque, 0); + qemu_aio_unref(acb); +} + +static void null_timer_cb(void *opaque) +{ + NullAIOCB *acb = opaque; + acb->common.cb(acb->common.opaque, 0); + timer_deinit(&acb->timer); + qemu_aio_unref(acb); +} + +static inline BlockAIOCB *null_aio_common(BlockDriverState *bs, + BlockCompletionFunc *cb, + void *opaque) +{ + NullAIOCB *acb; + BDRVNullState *s = bs->opaque; + + acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque); + /* Only emulate latency after vcpu is running. */ + if (s->latency_ns) { + aio_timer_init(bdrv_get_aio_context(bs), &acb->timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + null_timer_cb, acb); + timer_mod_ns(&acb->timer, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns); + } else { + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs), + null_bh_cb, acb); + } + return &acb->common; +} + +static BlockAIOCB *null_aio_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, + void *opaque) +{ + BDRVNullState *s = bs->opaque; + + if (s->read_zeroes) { + qemu_iovec_memset(qiov, 0, 0, bytes); + } + + return null_aio_common(bs, cb, opaque); +} + +static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, + void *opaque) +{ + return null_aio_common(bs, cb, opaque); +} + +static BlockAIOCB *null_aio_flush(BlockDriverState *bs, + BlockCompletionFunc *cb, + void *opaque) +{ + return null_aio_common(bs, cb, opaque); +} + +static int null_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +static int coroutine_fn null_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVNullState *s = bs->opaque; + int ret = BDRV_BLOCK_OFFSET_VALID; + + *pnum = bytes; + *map = offset; + *file = bs; + + if (s->read_zeroes) { + ret |= BDRV_BLOCK_ZERO; + } + return ret; +} + +static void null_refresh_filename(BlockDriverState *bs) +{ + const QDictEntry *e; + + for (e = qdict_first(bs->full_open_options); e; + e = qdict_next(bs->full_open_options, e)) + { + /* These options can be ignored */ + if (strcmp(qdict_entry_key(e), "filename") && + strcmp(qdict_entry_key(e), "driver") && + strcmp(qdict_entry_key(e), NULL_OPT_LATENCY)) + { + return; + } + } + + snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://", + bs->drv->format_name); +} + +static int64_t null_allocated_file_size(BlockDriverState *bs) +{ + return 0; +} + +static const char *const null_strong_runtime_opts[] = { + BLOCK_OPT_SIZE, + NULL_OPT_ZEROES, + + NULL +}; + +static BlockDriver bdrv_null_co = { + .format_name = "null-co", + .protocol_name = "null-co", + .instance_size = sizeof(BDRVNullState), + + .bdrv_file_open = null_file_open, + .bdrv_parse_filename = null_co_parse_filename, + .bdrv_getlength = null_getlength, + .bdrv_get_allocated_file_size = null_allocated_file_size, + + .bdrv_co_preadv = null_co_preadv, + .bdrv_co_pwritev = null_co_pwritev, + .bdrv_co_flush_to_disk = null_co_flush, + .bdrv_reopen_prepare = null_reopen_prepare, + + .bdrv_co_block_status = null_co_block_status, + + .bdrv_refresh_filename = null_refresh_filename, + .strong_runtime_opts = null_strong_runtime_opts, +}; + +static BlockDriver bdrv_null_aio = { + .format_name = "null-aio", + .protocol_name = "null-aio", + .instance_size = sizeof(BDRVNullState), + + .bdrv_file_open = null_file_open, + .bdrv_parse_filename = null_aio_parse_filename, + .bdrv_getlength = null_getlength, + .bdrv_get_allocated_file_size = null_allocated_file_size, + + .bdrv_aio_preadv = null_aio_preadv, + .bdrv_aio_pwritev = null_aio_pwritev, + .bdrv_aio_flush = null_aio_flush, + .bdrv_reopen_prepare = null_reopen_prepare, + + .bdrv_co_block_status = null_co_block_status, + + .bdrv_refresh_filename = null_refresh_filename, + .strong_runtime_opts = null_strong_runtime_opts, +}; + +static void bdrv_null_init(void) +{ + bdrv_register(&bdrv_null_co); + bdrv_register(&bdrv_null_aio); +} + +block_init(bdrv_null_init); diff --git a/block/nvme.c b/block/nvme.c new file mode 100644 index 000000000..a06a188d5 --- /dev/null +++ b/block/nvme.c @@ -0,0 +1,1556 @@ +/* + * NVMe block driver based on vfio + * + * Copyright 2016 - 2018 Red Hat, Inc. + * + * Authors: + * Fam Zheng + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/cutils.h" +#include "qemu/option.h" +#include "qemu/vfio-helpers.h" +#include "block/block_int.h" +#include "sysemu/replay.h" +#include "trace.h" + +#include "block/nvme.h" + +#define NVME_SQ_ENTRY_BYTES 64 +#define NVME_CQ_ENTRY_BYTES 16 +#define NVME_QUEUE_SIZE 128 +#define NVME_DOORBELL_SIZE 4096 + +/* + * We have to leave one slot empty as that is the full queue case where + * head == tail + 1. + */ +#define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1) + +typedef struct BDRVNVMeState BDRVNVMeState; + +/* Same index is used for queues and IRQs */ +#define INDEX_ADMIN 0 +#define INDEX_IO(n) (1 + n) + +/* This driver shares a single MSIX IRQ for the admin and I/O queues */ +enum { + MSIX_SHARED_IRQ_IDX = 0, + MSIX_IRQ_COUNT = 1 +}; + +typedef struct { + int32_t head, tail; + uint8_t *queue; + uint64_t iova; + /* Hardware MMIO register */ + volatile uint32_t *doorbell; +} NVMeQueue; + +typedef struct { + BlockCompletionFunc *cb; + void *opaque; + int cid; + void *prp_list_page; + uint64_t prp_list_iova; + int free_req_next; /* q->reqs[] index of next free req */ +} NVMeRequest; + +typedef struct { + QemuMutex lock; + + /* Read from I/O code path, initialized under BQL */ + BDRVNVMeState *s; + int index; + + /* Fields protected by BQL */ + uint8_t *prp_list_pages; + + /* Fields protected by @lock */ + CoQueue free_req_queue; + NVMeQueue sq, cq; + int cq_phase; + int free_req_head; + NVMeRequest reqs[NVME_NUM_REQS]; + int need_kick; + int inflight; + + /* Thread-safe, no lock necessary */ + QEMUBH *completion_bh; +} NVMeQueuePair; + +struct BDRVNVMeState { + AioContext *aio_context; + QEMUVFIOState *vfio; + void *bar0_wo_map; + /* Memory mapped registers */ + volatile struct { + uint32_t sq_tail; + uint32_t cq_head; + } *doorbells; + /* The submission/completion queue pairs. + * [0]: admin queue. + * [1..]: io queues. + */ + NVMeQueuePair **queues; + unsigned queue_count; + size_t page_size; + /* How many uint32_t elements does each doorbell entry take. */ + size_t doorbell_scale; + bool write_cache_supported; + EventNotifier irq_notifier[MSIX_IRQ_COUNT]; + + uint64_t nsze; /* Namespace size reported by identify command */ + int nsid; /* The namespace id to read/write data. */ + int blkshift; + + uint64_t max_transfer; + bool plugged; + + bool supports_write_zeroes; + bool supports_discard; + + CoMutex dma_map_lock; + CoQueue dma_flush_queue; + + /* Total size of mapped qiov, accessed under dma_map_lock */ + int dma_map_count; + + /* PCI address (required for nvme_refresh_filename()) */ + char *device; + + struct { + uint64_t completion_errors; + uint64_t aligned_accesses; + uint64_t unaligned_accesses; + } stats; +}; + +#define NVME_BLOCK_OPT_DEVICE "device" +#define NVME_BLOCK_OPT_NAMESPACE "namespace" + +static void nvme_process_completion_bh(void *opaque); + +static QemuOptsList runtime_opts = { + .name = "nvme", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = NVME_BLOCK_OPT_DEVICE, + .type = QEMU_OPT_STRING, + .help = "NVMe PCI device address", + }, + { + .name = NVME_BLOCK_OPT_NAMESPACE, + .type = QEMU_OPT_NUMBER, + .help = "NVMe namespace", + }, + { /* end of list */ } + }, +}; + +/* Returns true on success, false on failure. */ +static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, + unsigned nentries, size_t entry_bytes, Error **errp) +{ + size_t bytes; + int r; + + bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size); + q->head = q->tail = 0; + q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes); + if (!q->queue) { + error_setg(errp, "Cannot allocate queue"); + return false; + } + memset(q->queue, 0, bytes); + r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); + if (r) { + error_setg(errp, "Cannot map queue"); + return false; + } + return true; +} + +static void nvme_free_queue_pair(NVMeQueuePair *q) +{ + trace_nvme_free_queue_pair(q->index, q); + if (q->completion_bh) { + qemu_bh_delete(q->completion_bh); + } + qemu_vfree(q->prp_list_pages); + qemu_vfree(q->sq.queue); + qemu_vfree(q->cq.queue); + qemu_mutex_destroy(&q->lock); + g_free(q); +} + +static void nvme_free_req_queue_cb(void *opaque) +{ + NVMeQueuePair *q = opaque; + + qemu_mutex_lock(&q->lock); + while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { + /* Retry all pending requests */ + } + qemu_mutex_unlock(&q->lock); +} + +static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, + AioContext *aio_context, + unsigned idx, size_t size, + Error **errp) +{ + int i, r; + NVMeQueuePair *q; + uint64_t prp_list_iova; + size_t bytes; + + q = g_try_new0(NVMeQueuePair, 1); + if (!q) { + return NULL; + } + trace_nvme_create_queue_pair(idx, q, size, aio_context, + event_notifier_get_fd(s->irq_notifier)); + bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS, + qemu_real_host_page_size); + q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes); + if (!q->prp_list_pages) { + goto fail; + } + memset(q->prp_list_pages, 0, bytes); + qemu_mutex_init(&q->lock); + q->s = s; + q->index = idx; + qemu_co_queue_init(&q->free_req_queue); + q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q); + r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes, + false, &prp_list_iova); + if (r) { + goto fail; + } + q->free_req_head = -1; + for (i = 0; i < NVME_NUM_REQS; i++) { + NVMeRequest *req = &q->reqs[i]; + req->cid = i + 1; + req->free_req_next = q->free_req_head; + q->free_req_head = i; + req->prp_list_page = q->prp_list_pages + i * s->page_size; + req->prp_list_iova = prp_list_iova + i * s->page_size; + } + + if (!nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, errp)) { + goto fail; + } + q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail; + + if (!nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, errp)) { + goto fail; + } + q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head; + + return q; +fail: + nvme_free_queue_pair(q); + return NULL; +} + +/* With q->lock */ +static void nvme_kick(NVMeQueuePair *q) +{ + BDRVNVMeState *s = q->s; + + if (s->plugged || !q->need_kick) { + return; + } + trace_nvme_kick(s, q->index); + assert(!(q->sq.tail & 0xFF00)); + /* Fence the write to submission queue entry before notifying the device. */ + smp_wmb(); + *q->sq.doorbell = cpu_to_le32(q->sq.tail); + q->inflight += q->need_kick; + q->need_kick = 0; +} + +/* Find a free request element if any, otherwise: + * a) if in coroutine context, try to wait for one to become available; + * b) if not in coroutine, return NULL; + */ +static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) +{ + NVMeRequest *req; + + qemu_mutex_lock(&q->lock); + + while (q->free_req_head == -1) { + if (qemu_in_coroutine()) { + trace_nvme_free_req_queue_wait(q->s, q->index); + qemu_co_queue_wait(&q->free_req_queue, &q->lock); + } else { + qemu_mutex_unlock(&q->lock); + return NULL; + } + } + + req = &q->reqs[q->free_req_head]; + q->free_req_head = req->free_req_next; + req->free_req_next = -1; + + qemu_mutex_unlock(&q->lock); + return req; +} + +/* With q->lock */ +static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req) +{ + req->free_req_next = q->free_req_head; + q->free_req_head = req - q->reqs; +} + +/* With q->lock */ +static void nvme_wake_free_req_locked(NVMeQueuePair *q) +{ + if (!qemu_co_queue_empty(&q->free_req_queue)) { + replay_bh_schedule_oneshot_event(q->s->aio_context, + nvme_free_req_queue_cb, q); + } +} + +/* Insert a request in the freelist and wake waiters */ +static void nvme_put_free_req_and_wake(NVMeQueuePair *q, NVMeRequest *req) +{ + qemu_mutex_lock(&q->lock); + nvme_put_free_req_locked(q, req); + nvme_wake_free_req_locked(q); + qemu_mutex_unlock(&q->lock); +} + +static inline int nvme_translate_error(const NvmeCqe *c) +{ + uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF; + if (status) { + trace_nvme_error(le32_to_cpu(c->result), + le16_to_cpu(c->sq_head), + le16_to_cpu(c->sq_id), + le16_to_cpu(c->cid), + le16_to_cpu(status)); + } + switch (status) { + case 0: + return 0; + case 1: + return -ENOSYS; + case 2: + return -EINVAL; + default: + return -EIO; + } +} + +/* With q->lock */ +static bool nvme_process_completion(NVMeQueuePair *q) +{ + BDRVNVMeState *s = q->s; + bool progress = false; + NVMeRequest *preq; + NVMeRequest req; + NvmeCqe *c; + + trace_nvme_process_completion(s, q->index, q->inflight); + if (s->plugged) { + trace_nvme_process_completion_queue_plugged(s, q->index); + return false; + } + + /* + * Support re-entrancy when a request cb() function invokes aio_poll(). + * Pending completions must be visible to aio_poll() so that a cb() + * function can wait for the completion of another request. + * + * The aio_poll() loop will execute our BH and we'll resume completion + * processing there. + */ + qemu_bh_schedule(q->completion_bh); + + assert(q->inflight >= 0); + while (q->inflight) { + int ret; + int16_t cid; + + c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES]; + if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) { + break; + } + ret = nvme_translate_error(c); + if (ret) { + s->stats.completion_errors++; + } + q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE; + if (!q->cq.head) { + q->cq_phase = !q->cq_phase; + } + cid = le16_to_cpu(c->cid); + if (cid == 0 || cid > NVME_QUEUE_SIZE) { + warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", " + "queue size: %u", cid, NVME_QUEUE_SIZE); + continue; + } + trace_nvme_complete_command(s, q->index, cid); + preq = &q->reqs[cid - 1]; + req = *preq; + assert(req.cid == cid); + assert(req.cb); + nvme_put_free_req_locked(q, preq); + preq->cb = preq->opaque = NULL; + q->inflight--; + qemu_mutex_unlock(&q->lock); + req.cb(req.opaque, ret); + qemu_mutex_lock(&q->lock); + progress = true; + } + if (progress) { + /* Notify the device so it can post more completions. */ + smp_mb_release(); + *q->cq.doorbell = cpu_to_le32(q->cq.head); + nvme_wake_free_req_locked(q); + } + + qemu_bh_cancel(q->completion_bh); + + return progress; +} + +static void nvme_process_completion_bh(void *opaque) +{ + NVMeQueuePair *q = opaque; + + /* + * We're being invoked because a nvme_process_completion() cb() function + * called aio_poll(). The callback may be waiting for further completions + * so notify the device that it has space to fill in more completions now. + */ + smp_mb_release(); + *q->cq.doorbell = cpu_to_le32(q->cq.head); + nvme_wake_free_req_locked(q); + + nvme_process_completion(q); +} + +static void nvme_trace_command(const NvmeCmd *cmd) +{ + int i; + + if (!trace_event_get_state_backends(TRACE_NVME_SUBMIT_COMMAND_RAW)) { + return; + } + for (i = 0; i < 8; ++i) { + uint8_t *cmdp = (uint8_t *)cmd + i * 8; + trace_nvme_submit_command_raw(cmdp[0], cmdp[1], cmdp[2], cmdp[3], + cmdp[4], cmdp[5], cmdp[6], cmdp[7]); + } +} + +static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, + NvmeCmd *cmd, BlockCompletionFunc cb, + void *opaque) +{ + assert(!req->cb); + req->cb = cb; + req->opaque = opaque; + cmd->cid = cpu_to_le16(req->cid); + + trace_nvme_submit_command(q->s, q->index, req->cid); + nvme_trace_command(cmd); + qemu_mutex_lock(&q->lock); + memcpy((uint8_t *)q->sq.queue + + q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd)); + q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE; + q->need_kick++; + nvme_kick(q); + nvme_process_completion(q); + qemu_mutex_unlock(&q->lock); +} + +static void nvme_admin_cmd_sync_cb(void *opaque, int ret) +{ + int *pret = opaque; + *pret = ret; + aio_wait_kick(); +} + +static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *q = s->queues[INDEX_ADMIN]; + AioContext *aio_context = bdrv_get_aio_context(bs); + NVMeRequest *req; + int ret = -EINPROGRESS; + req = nvme_get_free_req(q); + if (!req) { + return -EBUSY; + } + nvme_submit_command(q, req, cmd, nvme_admin_cmd_sync_cb, &ret); + + AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS); + return ret; +} + +/* Returns true on success, false on failure. */ +static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) +{ + BDRVNVMeState *s = bs->opaque; + bool ret = false; + union { + NvmeIdCtrl ctrl; + NvmeIdNs ns; + } *id; + NvmeLBAF *lbaf; + uint16_t oncs; + int r; + uint64_t iova; + NvmeCmd cmd = { + .opcode = NVME_ADM_CMD_IDENTIFY, + .cdw10 = cpu_to_le32(0x1), + }; + size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size); + + id = qemu_try_memalign(qemu_real_host_page_size, id_size); + if (!id) { + error_setg(errp, "Cannot allocate buffer for identify response"); + goto out; + } + r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova); + if (r) { + error_setg(errp, "Cannot map buffer for DMA"); + goto out; + } + + memset(id, 0, id_size); + cmd.dptr.prp1 = cpu_to_le64(iova); + if (nvme_admin_cmd_sync(bs, &cmd)) { + error_setg(errp, "Failed to identify controller"); + goto out; + } + + if (le32_to_cpu(id->ctrl.nn) < namespace) { + error_setg(errp, "Invalid namespace"); + goto out; + } + s->write_cache_supported = le32_to_cpu(id->ctrl.vwc) & 0x1; + s->max_transfer = (id->ctrl.mdts ? 1 << id->ctrl.mdts : 0) * s->page_size; + /* For now the page list buffer per command is one page, to hold at most + * s->page_size / sizeof(uint64_t) entries. */ + s->max_transfer = MIN_NON_ZERO(s->max_transfer, + s->page_size / sizeof(uint64_t) * s->page_size); + + oncs = le16_to_cpu(id->ctrl.oncs); + s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES); + s->supports_discard = !!(oncs & NVME_ONCS_DSM); + + memset(id, 0, id_size); + cmd.cdw10 = 0; + cmd.nsid = cpu_to_le32(namespace); + if (nvme_admin_cmd_sync(bs, &cmd)) { + error_setg(errp, "Failed to identify namespace"); + goto out; + } + + s->nsze = le64_to_cpu(id->ns.nsze); + lbaf = &id->ns.lbaf[NVME_ID_NS_FLBAS_INDEX(id->ns.flbas)]; + + if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(id->ns.dlfeat) && + NVME_ID_NS_DLFEAT_READ_BEHAVIOR(id->ns.dlfeat) == + NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES) { + bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP; + } + + if (lbaf->ms) { + error_setg(errp, "Namespaces with metadata are not yet supported"); + goto out; + } + + if (lbaf->ds < BDRV_SECTOR_BITS || lbaf->ds > 12 || + (1 << lbaf->ds) > s->page_size) + { + error_setg(errp, "Namespace has unsupported block size (2^%d)", + lbaf->ds); + goto out; + } + + ret = true; + s->blkshift = lbaf->ds; +out: + qemu_vfio_dma_unmap(s->vfio, id); + qemu_vfree(id); + + return ret; +} + +static bool nvme_poll_queue(NVMeQueuePair *q) +{ + bool progress = false; + + const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; + NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; + + trace_nvme_poll_queue(q->s, q->index); + /* + * Do an early check for completions. q->lock isn't needed because + * nvme_process_completion() only runs in the event loop thread and + * cannot race with itself. + */ + if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { + return false; + } + + qemu_mutex_lock(&q->lock); + while (nvme_process_completion(q)) { + /* Keep polling */ + progress = true; + } + qemu_mutex_unlock(&q->lock); + + return progress; +} + +static bool nvme_poll_queues(BDRVNVMeState *s) +{ + bool progress = false; + int i; + + for (i = 0; i < s->queue_count; i++) { + if (nvme_poll_queue(s->queues[i])) { + progress = true; + } + } + return progress; +} + +static void nvme_handle_event(EventNotifier *n) +{ + BDRVNVMeState *s = container_of(n, BDRVNVMeState, + irq_notifier[MSIX_SHARED_IRQ_IDX]); + + trace_nvme_handle_event(s); + event_notifier_test_and_clear(n); + nvme_poll_queues(s); +} + +static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) +{ + BDRVNVMeState *s = bs->opaque; + unsigned n = s->queue_count; + NVMeQueuePair *q; + NvmeCmd cmd; + unsigned queue_size = NVME_QUEUE_SIZE; + + assert(n <= UINT16_MAX); + q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs), + n, queue_size, errp); + if (!q) { + return false; + } + cmd = (NvmeCmd) { + .opcode = NVME_ADM_CMD_CREATE_CQ, + .dptr.prp1 = cpu_to_le64(q->cq.iova), + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), + .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC), + }; + if (nvme_admin_cmd_sync(bs, &cmd)) { + error_setg(errp, "Failed to create CQ io queue [%u]", n); + goto out_error; + } + cmd = (NvmeCmd) { + .opcode = NVME_ADM_CMD_CREATE_SQ, + .dptr.prp1 = cpu_to_le64(q->sq.iova), + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), + .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)), + }; + if (nvme_admin_cmd_sync(bs, &cmd)) { + error_setg(errp, "Failed to create SQ io queue [%u]", n); + goto out_error; + } + s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); + s->queues[n] = q; + s->queue_count++; + return true; +out_error: + nvme_free_queue_pair(q); + return false; +} + +static bool nvme_poll_cb(void *opaque) +{ + EventNotifier *e = opaque; + BDRVNVMeState *s = container_of(e, BDRVNVMeState, + irq_notifier[MSIX_SHARED_IRQ_IDX]); + + return nvme_poll_queues(s); +} + +static int nvme_init(BlockDriverState *bs, const char *device, int namespace, + Error **errp) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *q; + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + uint64_t cap; + uint64_t timeout_ms; + uint64_t deadline, now; + volatile NvmeBar *regs = NULL; + + qemu_co_mutex_init(&s->dma_map_lock); + qemu_co_queue_init(&s->dma_flush_queue); + s->device = g_strdup(device); + s->nsid = namespace; + s->aio_context = bdrv_get_aio_context(bs); + ret = event_notifier_init(&s->irq_notifier[MSIX_SHARED_IRQ_IDX], 0); + if (ret) { + error_setg(errp, "Failed to init event notifier"); + return ret; + } + + s->vfio = qemu_vfio_open_pci(device, errp); + if (!s->vfio) { + ret = -EINVAL; + goto out; + } + + regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar), + PROT_READ | PROT_WRITE, errp); + if (!regs) { + ret = -EINVAL; + goto out; + } + /* Perform initialize sequence as described in NVMe spec "7.6.1 + * Initialization". */ + + cap = le64_to_cpu(regs->cap); + trace_nvme_controller_capability_raw(cap); + trace_nvme_controller_capability("Maximum Queue Entries Supported", + 1 + NVME_CAP_MQES(cap)); + trace_nvme_controller_capability("Contiguous Queues Required", + NVME_CAP_CQR(cap)); + trace_nvme_controller_capability("Doorbell Stride", + 2 << (2 + NVME_CAP_DSTRD(cap))); + trace_nvme_controller_capability("Subsystem Reset Supported", + NVME_CAP_NSSRS(cap)); + trace_nvme_controller_capability("Memory Page Size Minimum", + 1 << (12 + NVME_CAP_MPSMIN(cap))); + trace_nvme_controller_capability("Memory Page Size Maximum", + 1 << (12 + NVME_CAP_MPSMAX(cap))); + if (!NVME_CAP_CSS(cap)) { + error_setg(errp, "Device doesn't support NVMe command set"); + ret = -EINVAL; + goto out; + } + + s->page_size = 1u << (12 + NVME_CAP_MPSMIN(cap)); + s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); + bs->bl.opt_mem_alignment = s->page_size; + bs->bl.request_alignment = s->page_size; + timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); + + /* Reset device to get a clean state. */ + regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); + /* Wait for CSTS.RDY = 0. */ + deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; + while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { + error_setg(errp, "Timeout while waiting for device to reset (%" + PRId64 " ms)", + timeout_ms); + ret = -ETIMEDOUT; + goto out; + } + } + + s->bar0_wo_map = qemu_vfio_pci_map_bar(s->vfio, 0, 0, + sizeof(NvmeBar) + NVME_DOORBELL_SIZE, + PROT_WRITE, errp); + s->doorbells = (void *)((uintptr_t)s->bar0_wo_map + sizeof(NvmeBar)); + if (!s->doorbells) { + ret = -EINVAL; + goto out; + } + + /* Set up admin queue. */ + s->queues = g_new(NVMeQueuePair *, 1); + q = nvme_create_queue_pair(s, aio_context, 0, NVME_QUEUE_SIZE, errp); + if (!q) { + ret = -EINVAL; + goto out; + } + s->queues[INDEX_ADMIN] = q; + s->queue_count = 1; + QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); + regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); + regs->asq = cpu_to_le64(q->sq.iova); + regs->acq = cpu_to_le64(q->cq.iova); + + /* After setting up all control registers we can enable device now. */ + regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); + /* Wait for CSTS.RDY = 1. */ + now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + deadline = now + timeout_ms * SCALE_MS; + while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { + error_setg(errp, "Timeout while waiting for device to start (%" + PRId64 " ms)", + timeout_ms); + ret = -ETIMEDOUT; + goto out; + } + } + + ret = qemu_vfio_pci_init_irq(s->vfio, s->irq_notifier, + VFIO_PCI_MSIX_IRQ_INDEX, errp); + if (ret) { + goto out; + } + aio_set_event_notifier(bdrv_get_aio_context(bs), + &s->irq_notifier[MSIX_SHARED_IRQ_IDX], + false, nvme_handle_event, nvme_poll_cb); + + if (!nvme_identify(bs, namespace, errp)) { + ret = -EIO; + goto out; + } + + /* Set up command queues. */ + if (!nvme_add_io_queue(bs, errp)) { + ret = -EIO; + } +out: + if (regs) { + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar)); + } + + /* Cleaning up is done in nvme_file_open() upon error. */ + return ret; +} + +/* Parse a filename in the format of nvme://XXXX:XX:XX.X/X. Example: + * + * nvme://0000:44:00.0/1 + * + * where the "nvme://" is a fixed form of the protocol prefix, the middle part + * is the PCI address, and the last part is the namespace number starting from + * 1 according to the NVMe spec. */ +static void nvme_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + int pref = strlen("nvme://"); + + if (strlen(filename) > pref && !strncmp(filename, "nvme://", pref)) { + const char *tmp = filename + pref; + char *device; + const char *namespace; + unsigned long ns; + const char *slash = strchr(tmp, '/'); + if (!slash) { + qdict_put_str(options, NVME_BLOCK_OPT_DEVICE, tmp); + return; + } + device = g_strndup(tmp, slash - tmp); + qdict_put_str(options, NVME_BLOCK_OPT_DEVICE, device); + g_free(device); + namespace = slash + 1; + if (*namespace && qemu_strtoul(namespace, NULL, 10, &ns)) { + error_setg(errp, "Invalid namespace '%s', positive number expected", + namespace); + return; + } + qdict_put_str(options, NVME_BLOCK_OPT_NAMESPACE, + *namespace ? namespace : "1"); + } +} + +static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable, + Error **errp) +{ + int ret; + BDRVNVMeState *s = bs->opaque; + NvmeCmd cmd = { + .opcode = NVME_ADM_CMD_SET_FEATURES, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32(0x06), + .cdw11 = cpu_to_le32(enable ? 0x01 : 0x00), + }; + + ret = nvme_admin_cmd_sync(bs, &cmd); + if (ret) { + error_setg(errp, "Failed to configure NVMe write cache"); + } + return ret; +} + +static void nvme_close(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + + for (unsigned i = 0; i < s->queue_count; ++i) { + nvme_free_queue_pair(s->queues[i]); + } + g_free(s->queues); + aio_set_event_notifier(bdrv_get_aio_context(bs), + &s->irq_notifier[MSIX_SHARED_IRQ_IDX], + false, NULL, NULL); + event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); + qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, + 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); + qemu_vfio_close(s->vfio); + + g_free(s->device); +} + +static int nvme_file_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + const char *device; + QemuOpts *opts; + int namespace; + int ret; + BDRVNVMeState *s = bs->opaque; + + bs->supported_write_flags = BDRV_REQ_FUA; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &error_abort); + device = qemu_opt_get(opts, NVME_BLOCK_OPT_DEVICE); + if (!device) { + error_setg(errp, "'" NVME_BLOCK_OPT_DEVICE "' option is required"); + qemu_opts_del(opts); + return -EINVAL; + } + + namespace = qemu_opt_get_number(opts, NVME_BLOCK_OPT_NAMESPACE, 1); + ret = nvme_init(bs, device, namespace, errp); + qemu_opts_del(opts); + if (ret) { + goto fail; + } + if (flags & BDRV_O_NOCACHE) { + if (!s->write_cache_supported) { + error_setg(errp, + "NVMe controller doesn't support write cache configuration"); + ret = -EINVAL; + } else { + ret = nvme_enable_disable_write_cache(bs, !(flags & BDRV_O_NOCACHE), + errp); + } + if (ret) { + goto fail; + } + } + return 0; +fail: + nvme_close(bs); + return ret; +} + +static int64_t nvme_getlength(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + return s->nsze << s->blkshift; +} + +static uint32_t nvme_get_blocksize(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + assert(s->blkshift >= BDRV_SECTOR_BITS && s->blkshift <= 12); + return UINT32_C(1) << s->blkshift; +} + +static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + uint32_t blocksize = nvme_get_blocksize(bs); + bsz->phys = blocksize; + bsz->log = blocksize; + return 0; +} + +/* Called with s->dma_map_lock */ +static coroutine_fn int nvme_cmd_unmap_qiov(BlockDriverState *bs, + QEMUIOVector *qiov) +{ + int r = 0; + BDRVNVMeState *s = bs->opaque; + + s->dma_map_count -= qiov->size; + if (!s->dma_map_count && !qemu_co_queue_empty(&s->dma_flush_queue)) { + r = qemu_vfio_dma_reset_temporary(s->vfio); + if (!r) { + qemu_co_queue_restart_all(&s->dma_flush_queue); + } + } + return r; +} + +/* Called with s->dma_map_lock */ +static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, + NVMeRequest *req, QEMUIOVector *qiov) +{ + BDRVNVMeState *s = bs->opaque; + uint64_t *pagelist = req->prp_list_page; + int i, j, r; + int entries = 0; + + assert(qiov->size); + assert(QEMU_IS_ALIGNED(qiov->size, s->page_size)); + assert(qiov->size / s->page_size <= s->page_size / sizeof(uint64_t)); + for (i = 0; i < qiov->niov; ++i) { + bool retry = true; + uint64_t iova; + size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len, + qemu_real_host_page_size); +try_map: + r = qemu_vfio_dma_map(s->vfio, + qiov->iov[i].iov_base, + len, true, &iova); + if (r == -ENOMEM && retry) { + retry = false; + trace_nvme_dma_flush_queue_wait(s); + if (s->dma_map_count) { + trace_nvme_dma_map_flush(s); + qemu_co_queue_wait(&s->dma_flush_queue, &s->dma_map_lock); + } else { + r = qemu_vfio_dma_reset_temporary(s->vfio); + if (r) { + goto fail; + } + } + goto try_map; + } + if (r) { + goto fail; + } + + for (j = 0; j < qiov->iov[i].iov_len / s->page_size; j++) { + pagelist[entries++] = cpu_to_le64(iova + j * s->page_size); + } + trace_nvme_cmd_map_qiov_iov(s, i, qiov->iov[i].iov_base, + qiov->iov[i].iov_len / s->page_size); + } + + s->dma_map_count += qiov->size; + + assert(entries <= s->page_size / sizeof(uint64_t)); + switch (entries) { + case 0: + abort(); + case 1: + cmd->dptr.prp1 = pagelist[0]; + cmd->dptr.prp2 = 0; + break; + case 2: + cmd->dptr.prp1 = pagelist[0]; + cmd->dptr.prp2 = pagelist[1]; + break; + default: + cmd->dptr.prp1 = pagelist[0]; + cmd->dptr.prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); + break; + } + trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries); + for (i = 0; i < entries; ++i) { + trace_nvme_cmd_map_qiov_pages(s, i, pagelist[i]); + } + return 0; +fail: + /* No need to unmap [0 - i) iovs even if we've failed, since we don't + * increment s->dma_map_count. This is okay for fixed mapping memory areas + * because they are already mapped before calling this function; for + * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by + * calling qemu_vfio_dma_reset_temporary when necessary. */ + return r; +} + +typedef struct { + Coroutine *co; + int ret; + AioContext *ctx; +} NVMeCoData; + +static void nvme_rw_cb_bh(void *opaque) +{ + NVMeCoData *data = opaque; + qemu_coroutine_enter(data->co); +} + +static void nvme_rw_cb(void *opaque, int ret) +{ + NVMeCoData *data = opaque; + data->ret = ret; + if (!data->co) { + /* The rw coroutine hasn't yielded, don't try to enter. */ + return; + } + replay_bh_schedule_oneshot_event(data->ctx, nvme_rw_cb_bh, data); +} + +static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + bool is_write, + int flags) +{ + int r; + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; + NVMeRequest *req; + + uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) | + (flags & BDRV_REQ_FUA ? 1 << 30 : 0); + NvmeCmd cmd = { + .opcode = is_write ? NVME_CMD_WRITE : NVME_CMD_READ, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF), + .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF), + .cdw12 = cpu_to_le32(cdw12), + }; + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + trace_nvme_prw_aligned(s, is_write, offset, bytes, flags, qiov->niov); + assert(s->queue_count > 1); + req = nvme_get_free_req(ioq); + assert(req); + + qemu_co_mutex_lock(&s->dma_map_lock); + r = nvme_cmd_map_qiov(bs, &cmd, req, qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + if (r) { + nvme_put_free_req_and_wake(ioq, req); + return r; + } + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + qemu_co_mutex_lock(&s->dma_map_lock); + r = nvme_cmd_unmap_qiov(bs, qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + if (r) { + return r; + } + + trace_nvme_rw_done(s, is_write, offset, bytes, data.ret); + return data.ret; +} + +static inline bool nvme_qiov_aligned(BlockDriverState *bs, + const QEMUIOVector *qiov) +{ + int i; + BDRVNVMeState *s = bs->opaque; + + for (i = 0; i < qiov->niov; ++i) { + if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, + qemu_real_host_page_size) || + !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) { + trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base, + qiov->iov[i].iov_len, s->page_size); + return false; + } + } + return true; +} + +static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, bool is_write, int flags) +{ + BDRVNVMeState *s = bs->opaque; + int r; + uint8_t *buf = NULL; + QEMUIOVector local_qiov; + size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size); + assert(QEMU_IS_ALIGNED(offset, s->page_size)); + assert(QEMU_IS_ALIGNED(bytes, s->page_size)); + assert(bytes <= s->max_transfer); + if (nvme_qiov_aligned(bs, qiov)) { + s->stats.aligned_accesses++; + return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags); + } + s->stats.unaligned_accesses++; + trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write); + buf = qemu_try_memalign(qemu_real_host_page_size, len); + + if (!buf) { + return -ENOMEM; + } + qemu_iovec_init(&local_qiov, 1); + if (is_write) { + qemu_iovec_to_buf(qiov, 0, buf, bytes); + } + qemu_iovec_add(&local_qiov, buf, bytes); + r = nvme_co_prw_aligned(bs, offset, bytes, &local_qiov, is_write, flags); + qemu_iovec_destroy(&local_qiov); + if (!r && !is_write) { + qemu_iovec_from_buf(qiov, 0, buf, bytes); + } + qemu_vfree(buf); + return r; +} + +static coroutine_fn int nvme_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return nvme_co_prw(bs, offset, bytes, qiov, false, flags); +} + +static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return nvme_co_prw(bs, offset, bytes, qiov, true, flags); +} + +static coroutine_fn int nvme_co_flush(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; + NVMeRequest *req; + NvmeCmd cmd = { + .opcode = NVME_CMD_FLUSH, + .nsid = cpu_to_le32(s->nsid), + }; + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + assert(s->queue_count > 1); + req = nvme_get_free_req(ioq); + assert(req); + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + if (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + return data.ret; +} + + +static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, + int bytes, + BdrvRequestFlags flags) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; + NVMeRequest *req; + + uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF; + + if (!s->supports_write_zeroes) { + return -ENOTSUP; + } + + NvmeCmd cmd = { + .opcode = NVME_CMD_WRITE_ZEROES, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF), + .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF), + }; + + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + if (flags & BDRV_REQ_MAY_UNMAP) { + cdw12 |= (1 << 25); + } + + if (flags & BDRV_REQ_FUA) { + cdw12 |= (1 << 30); + } + + cmd.cdw12 = cpu_to_le32(cdw12); + + trace_nvme_write_zeroes(s, offset, bytes, flags); + assert(s->queue_count > 1); + req = nvme_get_free_req(ioq); + assert(req); + + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + trace_nvme_rw_done(s, true, offset, bytes, data.ret); + return data.ret; +} + + +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, + int64_t offset, + int bytes) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; + NVMeRequest *req; + NvmeDsmRange *buf; + QEMUIOVector local_qiov; + int ret; + + NvmeCmd cmd = { + .opcode = NVME_CMD_DSM, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32(0), /*number of ranges - 0 based*/ + .cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/ + }; + + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + if (!s->supports_discard) { + return -ENOTSUP; + } + + assert(s->queue_count > 1); + + buf = qemu_try_memalign(s->page_size, s->page_size); + if (!buf) { + return -ENOMEM; + } + memset(buf, 0, s->page_size); + buf->nlb = cpu_to_le32(bytes >> s->blkshift); + buf->slba = cpu_to_le64(offset >> s->blkshift); + buf->cattr = 0; + + qemu_iovec_init(&local_qiov, 1); + qemu_iovec_add(&local_qiov, buf, 4096); + + req = nvme_get_free_req(ioq); + assert(req); + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + nvme_put_free_req_and_wake(ioq, req); + goto out; + } + + trace_nvme_dsm(s, offset, bytes); + + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_unmap_qiov(bs, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + goto out; + } + + ret = data.ret; + trace_nvme_dsm_done(s, offset, bytes, ret); +out: + qemu_iovec_destroy(&local_qiov); + qemu_vfree(buf); + return ret; + +} + + +static int nvme_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +static void nvme_refresh_filename(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + + snprintf(bs->exact_filename, sizeof(bs->exact_filename), "nvme://%s/%i", + s->device, s->nsid); +} + +static void nvme_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVNVMeState *s = bs->opaque; + + bs->bl.opt_mem_alignment = s->page_size; + bs->bl.request_alignment = s->page_size; + bs->bl.max_transfer = s->max_transfer; +} + +static void nvme_detach_aio_context(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + + for (unsigned i = 0; i < s->queue_count; i++) { + NVMeQueuePair *q = s->queues[i]; + + qemu_bh_delete(q->completion_bh); + q->completion_bh = NULL; + } + + aio_set_event_notifier(bdrv_get_aio_context(bs), + &s->irq_notifier[MSIX_SHARED_IRQ_IDX], + false, NULL, NULL); +} + +static void nvme_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVNVMeState *s = bs->opaque; + + s->aio_context = new_context; + aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], + false, nvme_handle_event, nvme_poll_cb); + + for (unsigned i = 0; i < s->queue_count; i++) { + NVMeQueuePair *q = s->queues[i]; + + q->completion_bh = + aio_bh_new(new_context, nvme_process_completion_bh, q); + } +} + +static void nvme_aio_plug(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + assert(!s->plugged); + s->plugged = true; +} + +static void nvme_aio_unplug(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + assert(s->plugged); + s->plugged = false; + for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) { + NVMeQueuePair *q = s->queues[i]; + qemu_mutex_lock(&q->lock); + nvme_kick(q); + nvme_process_completion(q); + qemu_mutex_unlock(&q->lock); + } +} + +static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size) +{ + int ret; + BDRVNVMeState *s = bs->opaque; + + ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL); + if (ret) { + /* FIXME: we may run out of IOVA addresses after repeated + * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap + * doesn't reclaim addresses for fixed mappings. */ + error_report("nvme_register_buf failed: %s", strerror(-ret)); + } +} + +static void nvme_unregister_buf(BlockDriverState *bs, void *host) +{ + BDRVNVMeState *s = bs->opaque; + + qemu_vfio_dma_unmap(s->vfio, host); +} + +static BlockStatsSpecific *nvme_get_specific_stats(BlockDriverState *bs) +{ + BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1); + BDRVNVMeState *s = bs->opaque; + + stats->driver = BLOCKDEV_DRIVER_NVME; + stats->u.nvme = (BlockStatsSpecificNvme) { + .completion_errors = s->stats.completion_errors, + .aligned_accesses = s->stats.aligned_accesses, + .unaligned_accesses = s->stats.unaligned_accesses, + }; + + return stats; +} + +static const char *const nvme_strong_runtime_opts[] = { + NVME_BLOCK_OPT_DEVICE, + NVME_BLOCK_OPT_NAMESPACE, + + NULL +}; + +static BlockDriver bdrv_nvme = { + .format_name = "nvme", + .protocol_name = "nvme", + .instance_size = sizeof(BDRVNVMeState), + + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + + .bdrv_parse_filename = nvme_parse_filename, + .bdrv_file_open = nvme_file_open, + .bdrv_close = nvme_close, + .bdrv_getlength = nvme_getlength, + .bdrv_probe_blocksizes = nvme_probe_blocksizes, + + .bdrv_co_preadv = nvme_co_preadv, + .bdrv_co_pwritev = nvme_co_pwritev, + + .bdrv_co_pwrite_zeroes = nvme_co_pwrite_zeroes, + .bdrv_co_pdiscard = nvme_co_pdiscard, + + .bdrv_co_flush_to_disk = nvme_co_flush, + .bdrv_reopen_prepare = nvme_reopen_prepare, + + .bdrv_refresh_filename = nvme_refresh_filename, + .bdrv_refresh_limits = nvme_refresh_limits, + .strong_runtime_opts = nvme_strong_runtime_opts, + .bdrv_get_specific_stats = nvme_get_specific_stats, + + .bdrv_detach_aio_context = nvme_detach_aio_context, + .bdrv_attach_aio_context = nvme_attach_aio_context, + + .bdrv_io_plug = nvme_aio_plug, + .bdrv_io_unplug = nvme_aio_unplug, + + .bdrv_register_buf = nvme_register_buf, + .bdrv_unregister_buf = nvme_unregister_buf, +}; + +static void bdrv_nvme_init(void) +{ + bdrv_register(&bdrv_nvme); +} + +block_init(bdrv_nvme_init); diff --git a/block/parallels.c b/block/parallels.c new file mode 100644 index 000000000..3c22dfdc9 --- /dev/null +++ b/block/parallels.c @@ -0,0 +1,925 @@ +/* + * Block driver for Parallels disk image format + * + * Copyright (c) 2007 Alex Beregszaszi + * Copyright (c) 2015 Denis V. Lunev + * + * This code was originally based on comparing different disk images created + * by Parallels. Currently it is based on opened OpenVZ sources + * available at + * http://git.openvz.org/?p=ploop;a=summary + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "qemu/bswap.h" +#include "qemu/bitmap.h" +#include "migration/blocker.h" +#include "parallels.h" + +/**************************************************************/ + +#define HEADER_MAGIC "WithoutFreeSpace" +#define HEADER_MAGIC2 "WithouFreSpacExt" +#define HEADER_VERSION 2 +#define HEADER_INUSE_MAGIC (0x746F6E59) +#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32) + +static QEnumLookup prealloc_mode_lookup = { + .array = (const char *const[]) { + "falloc", + "truncate", + }, + .size = PRL_PREALLOC_MODE__MAX +}; + +#define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode" +#define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size" + +static QemuOptsList parallels_runtime_opts = { + .name = "parallels", + .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head), + .desc = { + { + .name = PARALLELS_OPT_PREALLOC_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Preallocation size on image expansion", + .def_value_str = "128M", + }, + { + .name = PARALLELS_OPT_PREALLOC_MODE, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode on image expansion " + "(allowed values: falloc, truncate)", + .def_value_str = "falloc", + }, + { /* end of list */ }, + }, +}; + +static QemuOptsList parallels_create_opts = { + .name = "parallels-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size", + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Parallels image cluster size", + .def_value_str = stringify(DEFAULT_CLUSTER_SIZE), + }, + { /* end of list */ } + } +}; + + +static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) +{ + return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; +} + +static uint32_t bat_entry_off(uint32_t idx) +{ + return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx; +} + +static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) +{ + uint32_t index, offset; + + index = sector_num / s->tracks; + offset = sector_num % s->tracks; + + /* not allocated */ + if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) { + return -1; + } + return bat2sect(s, index) + offset; +} + +static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors) +{ + int ret = s->tracks - sector_num % s->tracks; + return MIN(nb_sectors, ret); +} + +static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t start_off = -2, prev_end_off = -2; + + *pnum = 0; + while (nb_sectors > 0 || start_off == -2) { + int64_t offset = seek_to_sector(s, sector_num); + int to_end; + + if (start_off == -2) { + start_off = offset; + prev_end_off = offset; + } else if (offset != prev_end_off) { + break; + } + + to_end = cluster_remainder(s, sector_num, nb_sectors); + nb_sectors -= to_end; + sector_num += to_end; + *pnum += to_end; + + if (offset > 0) { + prev_end_off += to_end; + } + } + return start_off; +} + +static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int ret = 0; + BDRVParallelsState *s = bs->opaque; + int64_t pos, space, idx, to_allocate, i, len; + + pos = block_status(s, sector_num, nb_sectors, pnum); + if (pos > 0) { + return pos; + } + + idx = sector_num / s->tracks; + to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; + + /* This function is called only by parallels_co_writev(), which will never + * pass a sector_num at or beyond the end of the image (because the block + * layer never passes such a sector_num to that function). Therefore, idx + * is always below s->bat_size. + * block_status() will limit *pnum so that sector_num + *pnum will not + * exceed the image end. Therefore, idx + to_allocate cannot exceed + * s->bat_size. + * Note that s->bat_size is an unsigned int, therefore idx + to_allocate + * will always fit into a uint32_t. */ + assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); + + space = to_allocate * s->tracks; + len = bdrv_getlength(bs->file->bs); + if (len < 0) { + return len; + } + if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + space += s->prealloc_size; + /* + * We require the expanded size to read back as zero. If the + * user permitted truncation, we try that; but if it fails, we + * force the safer-but-slower fallocate. + */ + if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { + ret = bdrv_truncate(bs->file, + (s->data_end + space) << BDRV_SECTOR_BITS, + false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, + NULL); + if (ret == -ENOTSUP) { + s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; + } + } + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { + ret = bdrv_pwrite_zeroes(bs->file, + s->data_end << BDRV_SECTOR_BITS, + space << BDRV_SECTOR_BITS, 0); + } + if (ret < 0) { + return ret; + } + } + + /* Try to read from backing to fill empty clusters + * FIXME: 1. previous write_zeroes may be redundant + * 2. most of data we read from backing will be rewritten by + * parallels_co_writev. On aligned-to-cluster write we do not need + * this read at all. + * 3. it would be good to combine write of data from backing and new + * data into one write call */ + if (bs->backing) { + int64_t nb_cow_sectors = to_allocate * s->tracks; + int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; + void *buf = qemu_blockalign(bs, nb_cow_bytes); + + ret = bdrv_co_pread(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, + nb_cow_bytes, buf, 0); + if (ret < 0) { + qemu_vfree(buf); + return ret; + } + + ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, + nb_cow_bytes, buf, 0); + qemu_vfree(buf); + if (ret < 0) { + return ret; + } + } + + for (i = 0; i < to_allocate; i++) { + s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); + s->data_end += s->tracks; + bitmap_set(s->bat_dirty_bmap, + bat_entry_off(idx + i) / s->bat_dirty_block, 1); + } + + return bat2sect(s, idx) + sector_num % s->tracks; +} + + +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block); + unsigned long bit; + + qemu_co_mutex_lock(&s->lock); + + bit = find_first_bit(s->bat_dirty_bmap, size); + while (bit < size) { + uint32_t off = bit * s->bat_dirty_block; + uint32_t to_write = s->bat_dirty_block; + int ret; + + if (off + to_write > s->header_size) { + to_write = s->header_size - off; + } + ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, + to_write); + if (ret < 0) { + qemu_co_mutex_unlock(&s->lock); + return ret; + } + bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1); + } + bitmap_zero(s->bat_dirty_bmap, size); + + qemu_co_mutex_unlock(&s->lock); + return 0; +} + + +static int coroutine_fn parallels_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVParallelsState *s = bs->opaque; + int count; + + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + qemu_co_mutex_lock(&s->lock); + offset = block_status(s, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, &count); + qemu_co_mutex_unlock(&s->lock); + + *pnum = count * BDRV_SECTOR_SIZE; + if (offset < 0) { + return 0; + } + + *map = offset * BDRV_SECTOR_SIZE; + *file = bs->file->bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; +} + +static coroutine_fn int parallels_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int flags) +{ + BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + assert(!flags); + qemu_iovec_init(&hd_qiov, qiov->niov); + + while (nb_sectors > 0) { + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = allocate_clusters(bs, sector_num, nb_sectors, &n); + qemu_co_mutex_unlock(&s->lock); + if (position < 0) { + ret = (int)position; + break; + } + + nbytes = n << BDRV_SECTOR_BITS; + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); + if (ret < 0) { + break; + } + + nb_sectors -= n; + sector_num += n; + bytes_done += nbytes; + } + + qemu_iovec_destroy(&hd_qiov); + return ret; +} + +static coroutine_fn int parallels_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + while (nb_sectors > 0) { + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = block_status(s, sector_num, nb_sectors, &n); + qemu_co_mutex_unlock(&s->lock); + + nbytes = n << BDRV_SECTOR_BITS; + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + if (position < 0) { + if (bs->backing) { + ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE, + nbytes, &hd_qiov, 0); + if (ret < 0) { + break; + } + } else { + qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); + } + } else { + ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); + if (ret < 0) { + break; + } + } + + nb_sectors -= n; + sector_num += n; + bytes_done += nbytes; + } + + qemu_iovec_destroy(&hd_qiov); + return ret; +} + + +static int coroutine_fn parallels_co_check(BlockDriverState *bs, + BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t size, prev_off, high_off; + int ret; + uint32_t i; + bool flush_bat = false; + int cluster_size = s->tracks << BDRV_SECTOR_BITS; + + size = bdrv_getlength(bs->file->bs); + if (size < 0) { + res->check_errors++; + return size; + } + + qemu_co_mutex_lock(&s->lock); + if (s->header_unclean) { + fprintf(stderr, "%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + /* parallels_close will do the job right */ + res->corruptions_fixed++; + s->header_unclean = false; + } + } + + res->bfi.total_clusters = s->bat_size; + res->bfi.compressed_clusters = 0; /* compression is not supported */ + + high_off = 0; + prev_off = 0; + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + prev_off = 0; + continue; + } + + /* cluster outside the image */ + if (off > size) { + fprintf(stderr, "%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + prev_off = 0; + s->bat_bitmap[i] = 0; + res->corruptions_fixed++; + flush_bat = true; + continue; + } + } + + res->bfi.allocated_clusters++; + if (off > high_off) { + high_off = off; + } + + if (prev_off != 0 && (prev_off + cluster_size) != off) { + res->bfi.fragmented_clusters++; + } + prev_off = off; + } + + ret = 0; + if (flush_bat) { + ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size); + if (ret < 0) { + res->check_errors++; + goto out; + } + } + + res->image_end_offset = high_off + cluster_size; + if (size > res->image_end_offset) { + int64_t count; + count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size); + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", + size - res->image_end_offset); + res->leaks += count; + if (fix & BDRV_FIX_LEAKS) { + Error *local_err = NULL; + + /* + * In order to really repair the image, we must shrink it. + * That means we have to pass exact=true. + */ + ret = bdrv_truncate(bs->file, res->image_end_offset, true, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + res->check_errors++; + goto out; + } + res->leaks_fixed += count; + } + } + +out: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + + +static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, + Error **errp) +{ + BlockdevCreateOptionsParallels *parallels_opts; + BlockDriverState *bs; + BlockBackend *blk; + int64_t total_size, cl_size; + uint32_t bat_entries, bat_sectors; + ParallelsHeader header; + uint8_t tmp[BDRV_SECTOR_SIZE]; + int ret; + + assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS); + parallels_opts = &opts->u.parallels; + + /* Sanity checks */ + total_size = parallels_opts->size; + + if (parallels_opts->has_cluster_size) { + cl_size = parallels_opts->cluster_size; + } else { + cl_size = DEFAULT_CLUSTER_SIZE; + } + + /* XXX What is the real limit here? This is an insanely large maximum. */ + if (cl_size >= INT64_MAX / MAX_PARALLELS_IMAGE_FACTOR) { + error_setg(errp, "Cluster size is too large"); + return -EINVAL; + } + if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) { + error_setg(errp, "Image size is too large for this cluster size"); + return -E2BIG; + } + + if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Image size must be a multiple of 512 bytes"); + return -EINVAL; + } + + if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Cluster size must be a multiple of 512 bytes"); + return -EINVAL; + } + + /* Create BlockBackend to write to the image */ + bs = bdrv_open_blockdev_ref(parallels_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto out; + } + blk_set_allow_write_beyond_eof(blk, true); + + /* Create image format */ + bat_entries = DIV_ROUND_UP(total_size, cl_size); + bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size); + bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; + + memset(&header, 0, sizeof(header)); + memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); + header.version = cpu_to_le32(HEADER_VERSION); + /* don't care much about geometry, it is not used on image level */ + header.heads = cpu_to_le32(HEADS_NUMBER); + header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE + / HEADS_NUMBER / SEC_IN_CYL); + header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); + header.bat_entries = cpu_to_le32(bat_entries); + header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); + header.data_off = cpu_to_le32(bat_sectors); + + /* write all the data */ + memset(tmp, 0, sizeof(tmp)); + memcpy(tmp, &header, sizeof(header)); + + ret = blk_pwrite(blk, 0, tmp, BDRV_SECTOR_SIZE, 0); + if (ret < 0) { + goto exit; + } + ret = blk_pwrite_zeroes(blk, BDRV_SECTOR_SIZE, + (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); + if (ret < 0) { + goto exit; + } + + ret = 0; +out: + blk_unref(blk); + bdrv_unref(bs); + return ret; + +exit: + error_setg_errno(errp, -ret, "Failed to create Parallels image"); + goto out; +} + +static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + BlockDriverState *bs = NULL; + QDict *qdict; + Visitor *v; + int ret; + + static const QDictRenames opt_renames[] = { + { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, + { NULL, NULL }, + }; + + /* Parse options and convert legacy syntax */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, ¶llels_create_opts, + true); + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto done; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto done; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto done; + } + + /* Now get the QAPI type BlockdevCreateOptions */ + qdict_put_str(qdict, "driver", "parallels"); + qdict_put_str(qdict, "file", bs->node_name); + + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto done; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto done; + } + + /* Silently round up sizes */ + create_options->u.parallels.size = + ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE); + create_options->u.parallels.cluster_size = + ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE); + + /* Create the Parallels image (format layer) */ + ret = parallels_co_create(create_options, errp); + if (ret < 0) { + goto done; + } + ret = 0; + +done: + qobject_unref(qdict); + bdrv_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + + +static int parallels_probe(const uint8_t *buf, int buf_size, + const char *filename) +{ + const ParallelsHeader *ph = (const void *)buf; + + if (buf_size < sizeof(ParallelsHeader)) { + return 0; + } + + if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || + !memcmp(ph->magic, HEADER_MAGIC2, 16)) && + (le32_to_cpu(ph->version) == HEADER_VERSION)) { + return 100; + } + + return 0; +} + +static int parallels_update_header(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs), + sizeof(ParallelsHeader)); + + if (size > s->header_size) { + size = s->header_size; + } + return bdrv_pwrite_sync(bs->file, 0, s->header, size); +} + +static int parallels_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVParallelsState *s = bs->opaque; + ParallelsHeader ph; + int ret, size, i; + QemuOpts *opts = NULL; + Error *local_err = NULL; + char *buf; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); + if (ret < 0) { + goto fail; + } + + bs->total_sectors = le64_to_cpu(ph.nb_sectors); + + if (le32_to_cpu(ph.version) != HEADER_VERSION) { + goto fail_format; + } + if (!memcmp(ph.magic, HEADER_MAGIC, 16)) { + s->off_multiplier = 1; + bs->total_sectors = 0xffffffff & bs->total_sectors; + } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) { + s->off_multiplier = le32_to_cpu(ph.tracks); + } else { + goto fail_format; + } + + s->tracks = le32_to_cpu(ph.tracks); + if (s->tracks == 0) { + error_setg(errp, "Invalid image: Zero sectors per track"); + ret = -EINVAL; + goto fail; + } + if (s->tracks > INT32_MAX/513) { + error_setg(errp, "Invalid image: Too big cluster"); + ret = -EFBIG; + goto fail; + } + + s->bat_size = le32_to_cpu(ph.bat_entries); + if (s->bat_size > INT_MAX / sizeof(uint32_t)) { + error_setg(errp, "Catalog too large"); + ret = -EFBIG; + goto fail; + } + + size = bat_entry_off(s->bat_size); + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); + s->header = qemu_try_blockalign(bs->file->bs, s->header_size); + if (s->header == NULL) { + ret = -ENOMEM; + goto fail; + } + s->data_end = le32_to_cpu(ph.data_off); + if (s->data_end == 0) { + s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + } + if (s->data_end < s->header_size) { + /* there is not enough unused space to fit to block align between BAT + and actual data. We can't avoid read-modify-write... */ + s->header_size = size; + } + + ret = bdrv_pread(bs->file, 0, s->header, s->header_size); + if (ret < 0) { + goto fail; + } + s->bat_bitmap = (uint32_t *)(s->header + 1); + + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i); + if (off >= s->data_end) { + s->data_end = off + s->tracks; + } + } + + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + /* Image was not closed correctly. The check is mandatory */ + s->header_unclean = true; + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { + error_setg(errp, "parallels: Image was not closed correctly; " + "cannot be opened read/write"); + ret = -EACCES; + goto fail; + } + } + + opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); + if (!opts) { + goto fail_options; + } + + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto fail_options; + } + + s->prealloc_size = + qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + /* prealloc_mode can be downgraded later during allocate_clusters */ + s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_FALLOCATE, + &local_err); + g_free(buf); + if (local_err != NULL) { + error_propagate(errp, local_err); + goto fail_options; + } + + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) { + s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); + ret = parallels_update_header(bs); + if (ret < 0) { + goto fail; + } + } + + s->bat_dirty_block = 4 * qemu_real_host_page_size; + s->bat_dirty_bmap = + bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); + + /* Disable migration until bdrv_invalidate_cache method is added */ + error_setg(&s->migration_blocker, "The Parallels format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + qemu_co_mutex_init(&s->lock); + return 0; + +fail_format: + error_setg(errp, "Image not in Parallels format"); +fail_options: + ret = -EINVAL; +fail: + qemu_vfree(s->header); + return ret; +} + + +static void parallels_close(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) { + s->header->inuse = 0; + parallels_update_header(bs); + + /* errors are ignored, so we might as well pass exact=true */ + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, + PREALLOC_MODE_OFF, 0, NULL); + } + + g_free(s->bat_dirty_bmap); + qemu_vfree(s->header); + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + +static BlockDriver bdrv_parallels = { + .format_name = "parallels", + .instance_size = sizeof(BDRVParallelsState), + .bdrv_probe = parallels_probe, + .bdrv_open = parallels_open, + .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_block_status = parallels_co_block_status, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, + .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, + .is_format = true, + .supports_backing = true, + .bdrv_co_create = parallels_co_create, + .bdrv_co_create_opts = parallels_co_create_opts, + .bdrv_co_check = parallels_co_check, + .create_opts = ¶llels_create_opts, +}; + +static void bdrv_parallels_init(void) +{ + bdrv_register(&bdrv_parallels); +} + +block_init(bdrv_parallels_init); diff --git a/block/parallels.h b/block/parallels.h new file mode 100644 index 000000000..5aa101cfc --- /dev/null +++ b/block/parallels.h @@ -0,0 +1,87 @@ +/* +* Block driver for Parallels disk image format +* +* Copyright (c) 2015-2017 Virtuozzo, Inc. +* Authors: +* 2016-2017 Klim S. Kireev +* 2015 Denis V. Lunev +* +* This code was originally based on comparing different disk images created +* by Parallels. Currently it is based on opened OpenVZ sources +* available at +* https://github.com/OpenVZ/ploop +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ +#ifndef BLOCK_PARALLELS_H +#define BLOCK_PARALLELS_H +#include "qemu/coroutine.h" + +#define HEADS_NUMBER 16 +#define SEC_IN_CYL 32 +#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ + +/* always little-endian */ +typedef struct ParallelsHeader { + char magic[16]; /* "WithoutFreeSpace" */ + uint32_t version; + uint32_t heads; + uint32_t cylinders; + uint32_t tracks; + uint32_t bat_entries; + uint64_t nb_sectors; + uint32_t inuse; + uint32_t data_off; + char padding[12]; +} QEMU_PACKED ParallelsHeader; + +typedef enum ParallelsPreallocMode { + PRL_PREALLOC_MODE_FALLOCATE = 0, + PRL_PREALLOC_MODE_TRUNCATE = 1, + PRL_PREALLOC_MODE__MAX = 2, +} ParallelsPreallocMode; + +typedef struct BDRVParallelsState { + /** Locking is conservative, the lock protects + * - image file extending (truncate, fallocate) + * - any access to block allocation table + */ + CoMutex lock; + + ParallelsHeader *header; + uint32_t header_size; + bool header_unclean; + + unsigned long *bat_dirty_bmap; + unsigned int bat_dirty_block; + + uint32_t *bat_bitmap; + unsigned int bat_size; + + int64_t data_end; + uint64_t prealloc_size; + ParallelsPreallocMode prealloc_mode; + + unsigned int tracks; + + unsigned int off_multiplier; + Error *migration_blocker; +} BDRVParallelsState; + +#endif diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c new file mode 100644 index 000000000..8498402ad --- /dev/null +++ b/block/qapi-sysemu.c @@ -0,0 +1,590 @@ +/* + * QMP command handlers specific to the system emulators + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qapi/qapi-commands-block.h" +#include "qapi/qmp/qdict.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" + +static BlockBackend *qmp_get_blk(const char *blk_name, const char *qdev_id, + Error **errp) +{ + BlockBackend *blk; + + if (!blk_name == !qdev_id) { + error_setg(errp, "Need exactly one of 'device' and 'id'"); + return NULL; + } + + if (qdev_id) { + blk = blk_by_qdev_id(qdev_id, errp); + } else { + blk = blk_by_name(blk_name); + if (blk == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", blk_name); + } + } + + return blk; +} + +/* + * Attempt to open the tray of @device. + * If @force, ignore its tray lock. + * Else, if the tray is locked, don't open it, but ask the guest to open it. + * On error, store an error through @errp and return -errno. + * If @device does not exist, return -ENODEV. + * If it has no removable media, return -ENOTSUP. + * If it has no tray, return -ENOSYS. + * If the guest was asked to open the tray, return -EINPROGRESS. + * Else, return 0. + */ +static int do_open_tray(const char *blk_name, const char *qdev_id, + bool force, Error **errp) +{ + BlockBackend *blk; + const char *device = qdev_id ?: blk_name; + bool locked; + + blk = qmp_get_blk(blk_name, qdev_id, errp); + if (!blk) { + return -ENODEV; + } + + if (!blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device); + return -ENOTSUP; + } + + if (!blk_dev_has_tray(blk)) { + error_setg(errp, "Device '%s' does not have a tray", device); + return -ENOSYS; + } + + if (blk_dev_is_tray_open(blk)) { + return 0; + } + + locked = blk_dev_is_medium_locked(blk); + if (locked) { + blk_dev_eject_request(blk, force); + } + + if (!locked || force) { + blk_dev_change_media_cb(blk, false, &error_abort); + } + + if (locked && !force) { + error_setg(errp, "Device '%s' is locked and force was not specified, " + "wait for tray to open and try again", device); + return -EINPROGRESS; + } + + return 0; +} + +void qmp_blockdev_open_tray(bool has_device, const char *device, + bool has_id, const char *id, + bool has_force, bool force, + Error **errp) +{ + Error *local_err = NULL; + int rc; + + if (!has_force) { + force = false; + } + rc = do_open_tray(has_device ? device : NULL, + has_id ? id : NULL, + force, &local_err); + if (rc && rc != -ENOSYS && rc != -EINPROGRESS) { + error_propagate(errp, local_err); + return; + } + error_free(local_err); +} + +void qmp_blockdev_close_tray(bool has_device, const char *device, + bool has_id, const char *id, + Error **errp) +{ + BlockBackend *blk; + Error *local_err = NULL; + + device = has_device ? device : NULL; + id = has_id ? id : NULL; + + blk = qmp_get_blk(device, id, errp); + if (!blk) { + return; + } + + if (!blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device ?: id); + return; + } + + if (!blk_dev_has_tray(blk)) { + /* Ignore this command on tray-less devices */ + return; + } + + if (!blk_dev_is_tray_open(blk)) { + return; + } + + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void blockdev_remove_medium(bool has_device, const char *device, + bool has_id, const char *id, Error **errp) +{ + BlockBackend *blk; + BlockDriverState *bs; + AioContext *aio_context; + bool has_attached_device; + + device = has_device ? device : NULL; + id = has_id ? id : NULL; + + blk = qmp_get_blk(device, id, errp); + if (!blk) { + return; + } + + /* For BBs without a device, we can exchange the BDS tree at will */ + has_attached_device = blk_get_attached_dev(blk); + + if (has_attached_device && !blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device '%s' is not removable", device ?: id); + return; + } + + if (has_attached_device && blk_dev_has_tray(blk) && + !blk_dev_is_tray_open(blk)) + { + error_setg(errp, "Tray of device '%s' is not open", device ?: id); + return; + } + + bs = blk_bs(blk); + if (!bs) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { + goto out; + } + + blk_remove_bs(blk); + + if (!blk_dev_has_tray(blk)) { + /* For tray-less devices, blockdev-open-tray is a no-op (or may not be + * called at all); therefore, the medium needs to be ejected here. + * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load + * value passed here (i.e. false). */ + blk_dev_change_media_cb(blk, false, &error_abort); + } + +out: + aio_context_release(aio_context); +} + +void qmp_blockdev_remove_medium(const char *id, Error **errp) +{ + blockdev_remove_medium(false, NULL, true, id, errp); +} + +static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, + BlockDriverState *bs, Error **errp) +{ + Error *local_err = NULL; + bool has_device; + int ret; + + /* For BBs without a device, we can exchange the BDS tree at will */ + has_device = blk_get_attached_dev(blk); + + if (has_device && !blk_dev_has_removable_media(blk)) { + error_setg(errp, "Device is not removable"); + return; + } + + if (has_device && blk_dev_has_tray(blk) && !blk_dev_is_tray_open(blk)) { + error_setg(errp, "Tray of the device is not open"); + return; + } + + if (blk_bs(blk)) { + error_setg(errp, "There already is a medium in the device"); + return; + } + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + return; + } + + if (!blk_dev_has_tray(blk)) { + /* For tray-less devices, blockdev-close-tray is a no-op (or may not be + * called at all); therefore, the medium needs to be pushed into the + * slot here. + * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load + * value passed here (i.e. true). */ + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_remove_bs(blk); + return; + } + } +} + +static void blockdev_insert_medium(bool has_device, const char *device, + bool has_id, const char *id, + const char *node_name, Error **errp) +{ + BlockBackend *blk; + BlockDriverState *bs; + + blk = qmp_get_blk(has_device ? device : NULL, + has_id ? id : NULL, + errp); + if (!blk) { + return; + } + + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Node '%s' not found", node_name); + return; + } + + if (bdrv_has_blk(bs)) { + error_setg(errp, "Node '%s' is already in use", node_name); + return; + } + + qmp_blockdev_insert_anon_medium(blk, bs, errp); +} + +void qmp_blockdev_insert_medium(const char *id, const char *node_name, + Error **errp) +{ + blockdev_insert_medium(false, NULL, true, id, node_name, errp); +} + +void qmp_blockdev_change_medium(bool has_device, const char *device, + bool has_id, const char *id, + const char *filename, + bool has_format, const char *format, + bool has_read_only, + BlockdevChangeReadOnlyMode read_only, + Error **errp) +{ + BlockBackend *blk; + BlockDriverState *medium_bs = NULL; + int bdrv_flags; + bool detect_zeroes; + int rc; + QDict *options = NULL; + Error *err = NULL; + + blk = qmp_get_blk(has_device ? device : NULL, + has_id ? id : NULL, + errp); + if (!blk) { + goto fail; + } + + if (blk_bs(blk)) { + blk_update_root_state(blk); + } + + bdrv_flags = blk_get_open_flags_from_root_state(blk); + bdrv_flags &= ~(BDRV_O_TEMPORARY | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | + BDRV_O_PROTOCOL | BDRV_O_AUTO_RDONLY); + + if (!has_read_only) { + read_only = BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN; + } + + switch (read_only) { + case BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN: + break; + + case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_ONLY: + bdrv_flags &= ~BDRV_O_RDWR; + break; + + case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_WRITE: + bdrv_flags |= BDRV_O_RDWR; + break; + + default: + abort(); + } + + options = qdict_new(); + detect_zeroes = blk_get_detect_zeroes_from_root_state(blk); + qdict_put_str(options, "detect-zeroes", detect_zeroes ? "on" : "off"); + + if (has_format) { + qdict_put_str(options, "driver", format); + } + + medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); + if (!medium_bs) { + goto fail; + } + + rc = do_open_tray(has_device ? device : NULL, + has_id ? id : NULL, + false, &err); + if (rc && rc != -ENOSYS) { + error_propagate(errp, err); + goto fail; + } + error_free(err); + err = NULL; + + blockdev_remove_medium(has_device, device, has_id, id, &err); + if (err) { + error_propagate(errp, err); + goto fail; + } + + qmp_blockdev_insert_anon_medium(blk, medium_bs, &err); + if (err) { + error_propagate(errp, err); + goto fail; + } + + qmp_blockdev_close_tray(has_device, device, has_id, id, errp); + +fail: + /* If the medium has been inserted, the device has its own reference, so + * ours must be relinquished; and if it has not been inserted successfully, + * the reference must be relinquished anyway */ + bdrv_unref(medium_bs); +} + +void qmp_eject(bool has_device, const char *device, + bool has_id, const char *id, + bool has_force, bool force, Error **errp) +{ + Error *local_err = NULL; + int rc; + + if (!has_force) { + force = false; + } + + rc = do_open_tray(has_device ? device : NULL, + has_id ? id : NULL, + force, &local_err); + if (rc && rc != -ENOSYS) { + error_propagate(errp, local_err); + return; + } + error_free(local_err); + + blockdev_remove_medium(has_device, device, has_id, id, errp); +} + +/* throttling disk I/O limits */ +void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) +{ + ThrottleConfig cfg; + BlockDriverState *bs; + BlockBackend *blk; + AioContext *aio_context; + + blk = qmp_get_blk(arg->has_device ? arg->device : NULL, + arg->has_id ? arg->id : NULL, + errp); + if (!blk) { + return; + } + + aio_context = blk_get_aio_context(blk); + aio_context_acquire(aio_context); + + bs = blk_bs(blk); + if (!bs) { + error_setg(errp, "Device has no medium"); + goto out; + } + + throttle_config_init(&cfg); + cfg.buckets[THROTTLE_BPS_TOTAL].avg = arg->bps; + cfg.buckets[THROTTLE_BPS_READ].avg = arg->bps_rd; + cfg.buckets[THROTTLE_BPS_WRITE].avg = arg->bps_wr; + + cfg.buckets[THROTTLE_OPS_TOTAL].avg = arg->iops; + cfg.buckets[THROTTLE_OPS_READ].avg = arg->iops_rd; + cfg.buckets[THROTTLE_OPS_WRITE].avg = arg->iops_wr; + + if (arg->has_bps_max) { + cfg.buckets[THROTTLE_BPS_TOTAL].max = arg->bps_max; + } + if (arg->has_bps_rd_max) { + cfg.buckets[THROTTLE_BPS_READ].max = arg->bps_rd_max; + } + if (arg->has_bps_wr_max) { + cfg.buckets[THROTTLE_BPS_WRITE].max = arg->bps_wr_max; + } + if (arg->has_iops_max) { + cfg.buckets[THROTTLE_OPS_TOTAL].max = arg->iops_max; + } + if (arg->has_iops_rd_max) { + cfg.buckets[THROTTLE_OPS_READ].max = arg->iops_rd_max; + } + if (arg->has_iops_wr_max) { + cfg.buckets[THROTTLE_OPS_WRITE].max = arg->iops_wr_max; + } + + if (arg->has_bps_max_length) { + cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_max_length; + } + if (arg->has_bps_rd_max_length) { + cfg.buckets[THROTTLE_BPS_READ].burst_length = arg->bps_rd_max_length; + } + if (arg->has_bps_wr_max_length) { + cfg.buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_wr_max_length; + } + if (arg->has_iops_max_length) { + cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_max_length; + } + if (arg->has_iops_rd_max_length) { + cfg.buckets[THROTTLE_OPS_READ].burst_length = arg->iops_rd_max_length; + } + if (arg->has_iops_wr_max_length) { + cfg.buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_wr_max_length; + } + + if (arg->has_iops_size) { + cfg.op_size = arg->iops_size; + } + + if (!throttle_is_valid(&cfg, errp)) { + goto out; + } + + if (throttle_enabled(&cfg)) { + /* Enable I/O limits if they're not enabled yet, otherwise + * just update the throttling group. */ + if (!blk_get_public(blk)->throttle_group_member.throttle_state) { + blk_io_limits_enable(blk, + arg->has_group ? arg->group : + arg->has_device ? arg->device : + arg->id); + } else if (arg->has_group) { + blk_io_limits_update_group(blk, arg->group); + } + /* Set the new throttling configuration */ + blk_set_io_limits(blk, &cfg); + } else if (blk_get_public(blk)->throttle_group_member.throttle_state) { + /* If all throttling settings are set to 0, disable I/O limits */ + blk_io_limits_disable(blk); + } + +out: + aio_context_release(aio_context); +} + +void qmp_block_latency_histogram_set( + const char *id, + bool has_boundaries, uint64List *boundaries, + bool has_boundaries_read, uint64List *boundaries_read, + bool has_boundaries_write, uint64List *boundaries_write, + bool has_boundaries_flush, uint64List *boundaries_flush, + Error **errp) +{ + BlockBackend *blk = qmp_get_blk(NULL, id, errp); + BlockAcctStats *stats; + int ret; + + if (!blk) { + return; + } + + stats = blk_get_stats(blk); + + if (!has_boundaries && !has_boundaries_read && !has_boundaries_write && + !has_boundaries_flush) + { + block_latency_histograms_clear(stats); + return; + } + + if (has_boundaries || has_boundaries_read) { + ret = block_latency_histogram_set( + stats, BLOCK_ACCT_READ, + has_boundaries_read ? boundaries_read : boundaries); + if (ret) { + error_setg(errp, "Device '%s' set read boundaries fail", id); + return; + } + } + + if (has_boundaries || has_boundaries_write) { + ret = block_latency_histogram_set( + stats, BLOCK_ACCT_WRITE, + has_boundaries_write ? boundaries_write : boundaries); + if (ret) { + error_setg(errp, "Device '%s' set write boundaries fail", id); + return; + } + } + + if (has_boundaries || has_boundaries_flush) { + ret = block_latency_histogram_set( + stats, BLOCK_ACCT_FLUSH, + has_boundaries_flush ? boundaries_flush : boundaries); + if (ret) { + error_setg(errp, "Device '%s' set flush boundaries fail", id); + return; + } + } +} diff --git a/block/qapi.c b/block/qapi.c new file mode 100644 index 000000000..036da085e --- /dev/null +++ b/block/qapi.c @@ -0,0 +1,907 @@ +/* + * Block layer qmp and info dump related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "block/qapi.h" +#include "block/block_int.h" +#include "block/throttle-groups.h" +#include "block/write-threshold.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-block-core.h" +#include "qapi/qobject-output-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qbool.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qnum.h" +#include "qapi/qmp/qstring.h" +#include "qemu/qemu-print.h" +#include "sysemu/block-backend.h" +#include "qemu/cutils.h" + +BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + BlockDriverState *bs, + bool flat, + Error **errp) +{ + ImageInfo **p_image_info; + BlockDriverState *bs0, *backing; + BlockDeviceInfo *info; + + if (!bs->drv) { + error_setg(errp, "Block device %s is ejected", bs->node_name); + return NULL; + } + + bdrv_refresh_filename(bs); + + info = g_malloc0(sizeof(*info)); + info->file = g_strdup(bs->filename); + info->ro = bs->read_only; + info->drv = g_strdup(bs->drv->format_name); + info->encrypted = bs->encrypted; + info->encryption_key_missing = false; + + info->cache = g_new(BlockdevCacheInfo, 1); + *info->cache = (BlockdevCacheInfo) { + .writeback = blk ? blk_enable_write_cache(blk) : true, + .direct = !!(bs->open_flags & BDRV_O_NOCACHE), + .no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH), + }; + + if (bs->node_name[0]) { + info->has_node_name = true; + info->node_name = g_strdup(bs->node_name); + } + + backing = bdrv_cow_bs(bs); + if (backing) { + info->has_backing_file = true; + info->backing_file = g_strdup(backing->filename); + } + + if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + info->has_dirty_bitmaps = true; + info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); + } + + info->detect_zeroes = bs->detect_zeroes; + + if (blk && blk_get_public(blk)->throttle_group_member.throttle_state) { + ThrottleConfig cfg; + BlockBackendPublic *blkp = blk_get_public(blk); + + throttle_group_get_config(&blkp->throttle_group_member, &cfg); + + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; + info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; + info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; + + info->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; + info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; + info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; + + info->has_bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->has_bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->has_bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + info->bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + + info->has_iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + + info->has_bps_max_length = info->has_bps_max; + info->bps_max_length = + cfg.buckets[THROTTLE_BPS_TOTAL].burst_length; + info->has_bps_rd_max_length = info->has_bps_rd_max; + info->bps_rd_max_length = + cfg.buckets[THROTTLE_BPS_READ].burst_length; + info->has_bps_wr_max_length = info->has_bps_wr_max; + info->bps_wr_max_length = + cfg.buckets[THROTTLE_BPS_WRITE].burst_length; + + info->has_iops_max_length = info->has_iops_max; + info->iops_max_length = + cfg.buckets[THROTTLE_OPS_TOTAL].burst_length; + info->has_iops_rd_max_length = info->has_iops_rd_max; + info->iops_rd_max_length = + cfg.buckets[THROTTLE_OPS_READ].burst_length; + info->has_iops_wr_max_length = info->has_iops_wr_max; + info->iops_wr_max_length = + cfg.buckets[THROTTLE_OPS_WRITE].burst_length; + + info->has_iops_size = cfg.op_size; + info->iops_size = cfg.op_size; + + info->has_group = true; + info->group = + g_strdup(throttle_group_get_name(&blkp->throttle_group_member)); + } + + info->write_threshold = bdrv_write_threshold_get(bs); + + bs0 = bs; + p_image_info = &info->image; + info->backing_file_depth = 0; + while (1) { + Error *local_err = NULL; + bdrv_query_image_info(bs0, p_image_info, &local_err); + if (local_err) { + error_propagate(errp, local_err); + qapi_free_BlockDeviceInfo(info); + return NULL; + } + + /* stop gathering data for flat output */ + if (flat) { + break; + } + + if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { + /* + * Put any filtered child here (for backwards compatibility to when + * we put bs0->backing here, which might be any filtered child). + */ + info->backing_file_depth++; + bs0 = bdrv_filter_or_cow_bs(bs0); + (*p_image_info)->has_backing_image = true; + p_image_info = &((*p_image_info)->backing_image); + } else { + break; + } + + /* Skip automatically inserted nodes that the user isn't aware of for + * query-block (blk != NULL), but not for query-named-block-nodes */ + if (blk) { + bs0 = bdrv_skip_implicit_filters(bs0); + } + } + + return info; +} + +/* + * Returns 0 on success, with *p_list either set to describe snapshot + * information, or NULL because there are no snapshots. Returns -errno on + * error, with *p_list untouched. + */ +int bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp) +{ + int i, sn_count; + QEMUSnapshotInfo *sn_tab = NULL; + SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL; + SnapshotInfo *info; + + sn_count = bdrv_snapshot_list(bs, &sn_tab); + if (sn_count < 0) { + const char *dev = bdrv_get_device_name(bs); + switch (sn_count) { + case -ENOMEDIUM: + error_setg(errp, "Device '%s' is not inserted", dev); + break; + case -ENOTSUP: + error_setg(errp, + "Device '%s' does not support internal snapshots", + dev); + break; + default: + error_setg_errno(errp, -sn_count, + "Can't list snapshots of device '%s'", dev); + break; + } + return sn_count; + } + + for (i = 0; i < sn_count; i++) { + info = g_new0(SnapshotInfo, 1); + info->id = g_strdup(sn_tab[i].id_str); + info->name = g_strdup(sn_tab[i].name); + info->vm_state_size = sn_tab[i].vm_state_size; + info->date_sec = sn_tab[i].date_sec; + info->date_nsec = sn_tab[i].date_nsec; + info->vm_clock_sec = sn_tab[i].vm_clock_nsec / 1000000000; + info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000; + info->icount = sn_tab[i].icount; + info->has_icount = sn_tab[i].icount != -1ULL; + + info_list = g_new0(SnapshotInfoList, 1); + info_list->value = info; + + /* XXX: waiting for the qapi to support qemu-queue.h types */ + if (!cur_item) { + head = cur_item = info_list; + } else { + cur_item->next = info_list; + cur_item = info_list; + } + + } + + g_free(sn_tab); + *p_list = head; + return 0; +} + +/** + * bdrv_query_image_info: + * @bs: block device to examine + * @p_info: location to store image information + * @errp: location to store error information + * + * Store "flat" image information in @p_info. + * + * "Flat" means it does *not* query backing image information, + * i.e. (*pinfo)->has_backing_image will be set to false and + * (*pinfo)->backing_image to NULL even when the image does in fact have + * a backing image. + * + * @p_info will be set only on success. On error, store error in @errp. + */ +void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, + Error **errp) +{ + int64_t size; + const char *backing_filename; + BlockDriverInfo bdi; + int ret; + Error *err = NULL; + ImageInfo *info; + + aio_context_acquire(bdrv_get_aio_context(bs)); + + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "Can't get image size '%s'", + bs->exact_filename); + goto out; + } + + bdrv_refresh_filename(bs); + + info = g_new0(ImageInfo, 1); + info->filename = g_strdup(bs->filename); + info->format = g_strdup(bdrv_get_format_name(bs)); + info->virtual_size = size; + info->actual_size = bdrv_get_allocated_file_size(bs); + info->has_actual_size = info->actual_size >= 0; + if (bs->encrypted) { + info->encrypted = true; + info->has_encrypted = true; + } + if (bdrv_get_info(bs, &bdi) >= 0) { + if (bdi.cluster_size != 0) { + info->cluster_size = bdi.cluster_size; + info->has_cluster_size = true; + } + info->dirty_flag = bdi.is_dirty; + info->has_dirty_flag = true; + } + info->format_specific = bdrv_get_specific_info(bs, &err); + if (err) { + error_propagate(errp, err); + qapi_free_ImageInfo(info); + goto out; + } + info->has_format_specific = info->format_specific != NULL; + + backing_filename = bs->backing_file; + if (backing_filename[0] != '\0') { + char *backing_filename2; + + info->backing_filename = g_strdup(backing_filename); + info->has_backing_filename = true; + backing_filename2 = bdrv_get_full_backing_filename(bs, NULL); + + /* Always report the full_backing_filename if present, even if it's the + * same as backing_filename. That they are same is useful info. */ + if (backing_filename2) { + info->full_backing_filename = g_strdup(backing_filename2); + info->has_full_backing_filename = true; + } + + if (bs->backing_format[0]) { + info->backing_filename_format = g_strdup(bs->backing_format); + info->has_backing_filename_format = true; + } + g_free(backing_filename2); + } + + ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err); + switch (ret) { + case 0: + if (info->snapshots) { + info->has_snapshots = true; + } + break; + /* recoverable error */ + case -ENOMEDIUM: + case -ENOTSUP: + error_free(err); + break; + default: + error_propagate(errp, err); + qapi_free_ImageInfo(info); + goto out; + } + + *p_info = info; + +out: + aio_context_release(bdrv_get_aio_context(bs)); +} + +/* @p_info will be set only on success. */ +static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, + Error **errp) +{ + BlockInfo *info = g_malloc0(sizeof(*info)); + BlockDriverState *bs = blk_bs(blk); + char *qdev; + + /* Skip automatically inserted nodes that the user isn't aware of */ + bs = bdrv_skip_implicit_filters(bs); + + info->device = g_strdup(blk_name(blk)); + info->type = g_strdup("unknown"); + info->locked = blk_dev_is_medium_locked(blk); + info->removable = blk_dev_has_removable_media(blk); + + qdev = blk_get_attached_dev_id(blk); + if (qdev && *qdev) { + info->has_qdev = true; + info->qdev = qdev; + } else { + g_free(qdev); + } + + if (blk_dev_has_tray(blk)) { + info->has_tray_open = true; + info->tray_open = blk_dev_is_tray_open(blk); + } + + if (blk_iostatus_is_enabled(blk)) { + info->has_io_status = true; + info->io_status = blk_iostatus(blk); + } + + if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) { + info->has_dirty_bitmaps = true; + info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); + } + + if (bs && bs->drv) { + info->has_inserted = true; + info->inserted = bdrv_block_device_info(blk, bs, false, errp); + if (info->inserted == NULL) { + goto err; + } + } + + *p_info = info; + return; + + err: + qapi_free_BlockInfo(info); +} + +static uint64List *uint64_list(uint64_t *list, int size) +{ + int i; + uint64List *out_list = NULL; + uint64List **pout_list = &out_list; + + for (i = 0; i < size; i++) { + uint64List *entry = g_new(uint64List, 1); + entry->value = list[i]; + *pout_list = entry; + pout_list = &entry->next; + } + + *pout_list = NULL; + + return out_list; +} + +static void bdrv_latency_histogram_stats(BlockLatencyHistogram *hist, + bool *not_null, + BlockLatencyHistogramInfo **info) +{ + *not_null = hist->bins != NULL; + if (*not_null) { + *info = g_new0(BlockLatencyHistogramInfo, 1); + + (*info)->boundaries = uint64_list(hist->boundaries, hist->nbins - 1); + (*info)->bins = uint64_list(hist->bins, hist->nbins); + } +} + +static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) +{ + BlockAcctStats *stats = blk_get_stats(blk); + BlockAcctTimedStats *ts = NULL; + + ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; + ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + ds->unmap_bytes = stats->nr_bytes[BLOCK_ACCT_UNMAP]; + ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; + ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; + ds->unmap_operations = stats->nr_ops[BLOCK_ACCT_UNMAP]; + + ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + ds->failed_unmap_operations = stats->failed_ops[BLOCK_ACCT_UNMAP]; + + ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + ds->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + ds->invalid_unmap_operations = stats->invalid_ops[BLOCK_ACCT_UNMAP]; + + ds->rd_merged = stats->merged[BLOCK_ACCT_READ]; + ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + ds->unmap_merged = stats->merged[BLOCK_ACCT_UNMAP]; + ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; + ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; + ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + ds->unmap_total_time_ns = stats->total_time_ns[BLOCK_ACCT_UNMAP]; + + ds->has_idle_time_ns = stats->last_access_time_ns > 0; + if (ds->has_idle_time_ns) { + ds->idle_time_ns = block_acct_idle_time_ns(stats); + } + + ds->account_invalid = stats->account_invalid; + ds->account_failed = stats->account_failed; + + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = ds->timed_stats; + timed_stats->value = dev_stats; + ds->timed_stats = timed_stats; + + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + + dev_stats->interval_length = ts->interval_length; + + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } + + bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_READ], + &ds->has_rd_latency_histogram, + &ds->rd_latency_histogram); + bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_WRITE], + &ds->has_wr_latency_histogram, + &ds->wr_latency_histogram); + bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_FLUSH], + &ds->has_flush_latency_histogram, + &ds->flush_latency_histogram); +} + +static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs, + bool blk_level) +{ + BdrvChild *parent_child; + BlockDriverState *filter_or_cow_bs; + BlockStats *s = NULL; + + s = g_malloc0(sizeof(*s)); + s->stats = g_malloc0(sizeof(*s->stats)); + + if (!bs) { + return s; + } + + /* Skip automatically inserted nodes that the user isn't aware of in + * a BlockBackend-level command. Stay at the exact node for a node-level + * command. */ + if (blk_level) { + bs = bdrv_skip_implicit_filters(bs); + } + + if (bdrv_get_node_name(bs)[0]) { + s->has_node_name = true; + s->node_name = g_strdup(bdrv_get_node_name(bs)); + } + + s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset); + + s->driver_specific = bdrv_get_specific_stats(bs); + if (s->driver_specific) { + s->has_driver_specific = true; + } + + parent_child = bdrv_primary_child(bs); + if (!parent_child || + !(parent_child->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED))) + { + BdrvChild *c; + + /* + * Look for a unique data-storing child. We do not need to look for + * filtered children, as there would be only one and it would have been + * the primary child. + */ + parent_child = NULL; + QLIST_FOREACH(c, &bs->children, next) { + if (c->role & BDRV_CHILD_DATA) { + if (parent_child) { + /* + * There are multiple data-storing children and we cannot + * choose between them. + */ + parent_child = NULL; + break; + } + parent_child = c; + } + } + } + if (parent_child) { + s->has_parent = true; + s->parent = bdrv_query_bds_stats(parent_child->bs, blk_level); + } + + filter_or_cow_bs = bdrv_filter_or_cow_bs(bs); + if (blk_level && filter_or_cow_bs) { + /* + * Put any filtered or COW child here (for backwards + * compatibility to when we put bs0->backing here, which might + * be either) + */ + s->has_backing = true; + s->backing = bdrv_query_bds_stats(filter_or_cow_bs, blk_level); + } + + return s; +} + +BlockInfoList *qmp_query_block(Error **errp) +{ + BlockInfoList *head = NULL, **p_next = &head; + BlockBackend *blk; + Error *local_err = NULL; + + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { + BlockInfoList *info; + + if (!*blk_name(blk) && !blk_get_attached_dev(blk)) { + continue; + } + + info = g_malloc0(sizeof(*info)); + bdrv_query_info(blk, &info->value, &local_err); + if (local_err) { + error_propagate(errp, local_err); + g_free(info); + qapi_free_BlockInfoList(head); + return NULL; + } + + *p_next = info; + p_next = &info->next; + } + + return head; +} + +BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + bool query_nodes, + Error **errp) +{ + BlockStatsList *head = NULL, **p_next = &head; + BlockBackend *blk; + BlockDriverState *bs; + + /* Just to be safe if query_nodes is not always initialized */ + if (has_query_nodes && query_nodes) { + for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { + BlockStatsList *info = g_malloc0(sizeof(*info)); + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + info->value = bdrv_query_bds_stats(bs, false); + aio_context_release(ctx); + + *p_next = info; + p_next = &info->next; + } + } else { + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { + BlockStatsList *info; + AioContext *ctx = blk_get_aio_context(blk); + BlockStats *s; + char *qdev; + + if (!*blk_name(blk) && !blk_get_attached_dev(blk)) { + continue; + } + + aio_context_acquire(ctx); + s = bdrv_query_bds_stats(blk_bs(blk), true); + s->has_device = true; + s->device = g_strdup(blk_name(blk)); + + qdev = blk_get_attached_dev_id(blk); + if (qdev && *qdev) { + s->has_qdev = true; + s->qdev = qdev; + } else { + g_free(qdev); + } + + bdrv_query_blk_stats(s->stats, blk); + aio_context_release(ctx); + + info = g_malloc0(sizeof(*info)); + info->value = s; + *p_next = info; + p_next = &info->next; + } + } + + return head; +} + +void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) +{ + char date_buf[128], clock_buf[128]; + char icount_buf[128] = {0}; + struct tm tm; + time_t ti; + int64_t secs; + char *sizing = NULL; + + if (!sn) { + qemu_printf("%-10s%-18s%7s%20s%13s%11s", + "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT"); + } else { + ti = sn->date_sec; + localtime_r(&ti, &tm); + strftime(date_buf, sizeof(date_buf), + "%Y-%m-%d %H:%M:%S", &tm); + secs = sn->vm_clock_nsec / 1000000000; + snprintf(clock_buf, sizeof(clock_buf), + "%02d:%02d:%02d.%03d", + (int)(secs / 3600), + (int)((secs / 60) % 60), + (int)(secs % 60), + (int)((sn->vm_clock_nsec / 1000000) % 1000)); + sizing = size_to_str(sn->vm_state_size); + if (sn->icount != -1ULL) { + snprintf(icount_buf, sizeof(icount_buf), + "%"PRId64, sn->icount); + } + qemu_printf("%-9s %-17s %7s%20s%13s%11s", + sn->id_str, sn->name, + sizing, + date_buf, + clock_buf, + icount_buf); + } + g_free(sizing); +} + +static void dump_qdict(int indentation, QDict *dict); +static void dump_qlist(int indentation, QList *list); + +static void dump_qobject(int comp_indent, QObject *obj) +{ + switch (qobject_type(obj)) { + case QTYPE_QNUM: { + QNum *value = qobject_to(QNum, obj); + char *tmp = qnum_to_string(value); + qemu_printf("%s", tmp); + g_free(tmp); + break; + } + case QTYPE_QSTRING: { + QString *value = qobject_to(QString, obj); + qemu_printf("%s", qstring_get_str(value)); + break; + } + case QTYPE_QDICT: { + QDict *value = qobject_to(QDict, obj); + dump_qdict(comp_indent, value); + break; + } + case QTYPE_QLIST: { + QList *value = qobject_to(QList, obj); + dump_qlist(comp_indent, value); + break; + } + case QTYPE_QBOOL: { + QBool *value = qobject_to(QBool, obj); + qemu_printf("%s", qbool_get_bool(value) ? "true" : "false"); + break; + } + default: + abort(); + } +} + +static void dump_qlist(int indentation, QList *list) +{ + const QListEntry *entry; + int i = 0; + + for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) { + QType type = qobject_type(entry->value); + bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST); + qemu_printf("%*s[%i]:%c", indentation * 4, "", i, + composite ? '\n' : ' '); + dump_qobject(indentation + 1, entry->value); + if (!composite) { + qemu_printf("\n"); + } + } +} + +static void dump_qdict(int indentation, QDict *dict) +{ + const QDictEntry *entry; + + for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) { + QType type = qobject_type(entry->value); + bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST); + char *key = g_malloc(strlen(entry->key) + 1); + int i; + + /* replace dashes with spaces in key (variable) names */ + for (i = 0; entry->key[i]; i++) { + key[i] = entry->key[i] == '-' ? ' ' : entry->key[i]; + } + key[i] = 0; + qemu_printf("%*s%s:%c", indentation * 4, "", key, + composite ? '\n' : ' '); + dump_qobject(indentation + 1, entry->value); + if (!composite) { + qemu_printf("\n"); + } + g_free(key); + } +} + +void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) +{ + QObject *obj, *data; + Visitor *v = qobject_output_visitor_new(&obj); + + visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); + visit_complete(v, &obj); + data = qdict_get(qobject_to(QDict, obj), "data"); + dump_qobject(1, data); + qobject_unref(obj); + visit_free(v); +} + +void bdrv_image_info_dump(ImageInfo *info) +{ + char *size_buf, *dsize_buf; + if (!info->has_actual_size) { + dsize_buf = g_strdup("unavailable"); + } else { + dsize_buf = size_to_str(info->actual_size); + } + size_buf = size_to_str(info->virtual_size); + qemu_printf("image: %s\n" + "file format: %s\n" + "virtual size: %s (%" PRId64 " bytes)\n" + "disk size: %s\n", + info->filename, info->format, size_buf, + info->virtual_size, + dsize_buf); + g_free(size_buf); + g_free(dsize_buf); + + if (info->has_encrypted && info->encrypted) { + qemu_printf("encrypted: yes\n"); + } + + if (info->has_cluster_size) { + qemu_printf("cluster_size: %" PRId64 "\n", + info->cluster_size); + } + + if (info->has_dirty_flag && info->dirty_flag) { + qemu_printf("cleanly shut down: no\n"); + } + + if (info->has_backing_filename) { + qemu_printf("backing file: %s", info->backing_filename); + if (!info->has_full_backing_filename) { + qemu_printf(" (cannot determine actual path)"); + } else if (strcmp(info->backing_filename, + info->full_backing_filename) != 0) { + qemu_printf(" (actual path: %s)", info->full_backing_filename); + } + qemu_printf("\n"); + if (info->has_backing_filename_format) { + qemu_printf("backing file format: %s\n", + info->backing_filename_format); + } + } + + if (info->has_snapshots) { + SnapshotInfoList *elem; + + qemu_printf("Snapshot list:\n"); + bdrv_snapshot_dump(NULL); + qemu_printf("\n"); + + /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but + * we convert to the block layer's native QEMUSnapshotInfo for now. + */ + for (elem = info->snapshots; elem; elem = elem->next) { + QEMUSnapshotInfo sn = { + .vm_state_size = elem->value->vm_state_size, + .date_sec = elem->value->date_sec, + .date_nsec = elem->value->date_nsec, + .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL + + elem->value->vm_clock_nsec, + .icount = elem->value->has_icount ? + elem->value->icount : -1ULL, + }; + + pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); + pstrcpy(sn.name, sizeof(sn.name), elem->value->name); + bdrv_snapshot_dump(&sn); + qemu_printf("\n"); + } + } + + if (info->has_format_specific) { + qemu_printf("Format specific information:\n"); + bdrv_image_info_specific_dump(info->format_specific); + } +} diff --git a/block/qcow.c b/block/qcow.c new file mode 100644 index 000000000..f8919a44d --- /dev/null +++ b/block/qcow.c @@ -0,0 +1,1212 @@ +/* + * Block driver for the QCOW format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/bswap.h" +#include "qemu/cutils.h" +#include +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "crypto/block.h" +#include "migration/blocker.h" +#include "crypto.h" + +/**************************************************************/ +/* QEMU COW block driver with compression and encryption support */ + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) +#define QCOW_VERSION 1 + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 + +#define QCOW_OFLAG_COMPRESSED (1LL << 63) + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t mtime; + uint64_t size; /* in bytes */ + uint8_t cluster_bits; + uint8_t l2_bits; + uint16_t padding; + uint32_t crypt_method; + uint64_t l1_table_offset; +} QEMU_PACKED QCowHeader; + +#define L2_CACHE_SIZE 16 + +typedef struct BDRVQcowState { + int cluster_bits; + int cluster_size; + int l2_bits; + int l2_size; + unsigned int l1_size; + uint64_t cluster_offset_mask; + uint64_t l1_table_offset; + uint64_t *l1_table; + uint64_t *l2_cache; + uint64_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + uint8_t *cluster_cache; + uint8_t *cluster_data; + uint64_t cluster_cache_offset; + QCryptoBlock *crypto; /* Disk encryption format driver */ + uint32_t crypt_method_header; + CoMutex lock; + Error *migration_blocker; +} BDRVQcowState; + +static QemuOptsList qcow_create_opts; + +static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); + +static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const QCowHeader *cow_header = (const void *)buf; + + if (buf_size >= sizeof(QCowHeader) && + be32_to_cpu(cow_header->magic) == QCOW_MAGIC && + be32_to_cpu(cow_header->version) == QCOW_VERSION) + return 100; + else + return 0; +} + +static int qcow_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVQcowState *s = bs->opaque; + unsigned int len, i, shift; + int ret; + QCowHeader header; + QCryptoBlockOpenOptions *crypto_opts = NULL; + unsigned int cflags = 0; + QDict *encryptopts = NULL; + const char *encryptfmt; + + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + encryptfmt = qdict_get_try_str(encryptopts, "format"); + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + ret = -EINVAL; + goto fail; + } + + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + if (ret < 0) { + goto fail; + } + header.magic = be32_to_cpu(header.magic); + header.version = be32_to_cpu(header.version); + header.backing_file_offset = be64_to_cpu(header.backing_file_offset); + header.backing_file_size = be32_to_cpu(header.backing_file_size); + header.mtime = be32_to_cpu(header.mtime); + header.size = be64_to_cpu(header.size); + header.crypt_method = be32_to_cpu(header.crypt_method); + header.l1_table_offset = be64_to_cpu(header.l1_table_offset); + + if (header.magic != QCOW_MAGIC) { + error_setg(errp, "Image not in qcow format"); + ret = -EINVAL; + goto fail; + } + if (header.version != QCOW_VERSION) { + error_setg(errp, "qcow (v%d) does not support qcow version %" PRIu32, + QCOW_VERSION, header.version); + if (header.version == 2 || header.version == 3) { + error_append_hint(errp, "Try the 'qcow2' driver instead.\n"); + } + + ret = -ENOTSUP; + goto fail; + } + + if (header.size <= 1) { + error_setg(errp, "Image size is too small (must be at least 2 bytes)"); + ret = -EINVAL; + goto fail; + } + if (header.cluster_bits < 9 || header.cluster_bits > 16) { + error_setg(errp, "Cluster size must be between 512 and 64k"); + ret = -EINVAL; + goto fail; + } + + /* l2_bits specifies number of entries; storing a uint64_t in each entry, + * so bytes = num_entries << 3. */ + if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) { + error_setg(errp, "L2 table size must be between 512 and 64k"); + ret = -EINVAL; + goto fail; + } + + s->crypt_method_header = header.crypt_method; + if (s->crypt_method_header) { + if (bdrv_uses_whitelist() && + s->crypt_method_header == QCOW_CRYPT_AES) { + error_setg(errp, + "Use of AES-CBC encrypted qcow images is no longer " + "supported in system emulators"); + error_append_hint(errp, + "You can use 'qemu-img convert' to convert your " + "image to an alternative supported format, such " + "as unencrypted qcow, or raw with the LUKS " + "format instead.\n"); + ret = -ENOSYS; + goto fail; + } + if (s->crypt_method_header == QCOW_CRYPT_AES) { + if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { + error_setg(errp, + "Header reported 'aes' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_put_str(encryptopts, "format", "qcow"); + crypto_opts = block_crypto_open_opts_init(encryptopts, errp); + if (!crypto_opts) { + ret = -EINVAL; + goto fail; + } + + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", + NULL, NULL, cflags, 1, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; + } + } else { + error_setg(errp, "invalid encryption method in qcow header"); + ret = -EINVAL; + goto fail; + } + bs->encrypted = true; + } else { + if (encryptfmt) { + error_setg(errp, "No encryption in image header, but options " + "specified format '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + } + s->cluster_bits = header.cluster_bits; + s->cluster_size = 1 << s->cluster_bits; + s->l2_bits = header.l2_bits; + s->l2_size = 1 << s->l2_bits; + bs->total_sectors = header.size / 512; + s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; + + /* read the level 1 table */ + shift = s->cluster_bits + s->l2_bits; + if (header.size > UINT64_MAX - (1LL << shift)) { + error_setg(errp, "Image too large"); + ret = -EINVAL; + goto fail; + } else { + uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift; + if (l1_size > INT_MAX / sizeof(uint64_t)) { + error_setg(errp, "Image too large"); + ret = -EINVAL; + goto fail; + } + s->l1_size = l1_size; + } + + s->l1_table_offset = header.l1_table_offset; + s->l1_table = g_try_new(uint64_t, s->l1_size); + if (s->l1_table == NULL) { + error_setg(errp, "Could not allocate memory for L1 table"); + ret = -ENOMEM; + goto fail; + } + + ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + s->l1_size * sizeof(uint64_t)); + if (ret < 0) { + goto fail; + } + + for(i = 0;i < s->l1_size; i++) { + s->l1_table[i] = be64_to_cpu(s->l1_table[i]); + } + + /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */ + s->l2_cache = + qemu_try_blockalign(bs->file->bs, + s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + if (s->l2_cache == NULL) { + error_setg(errp, "Could not allocate L2 table cache"); + ret = -ENOMEM; + goto fail; + } + s->cluster_cache = g_malloc(s->cluster_size); + s->cluster_data = g_malloc(s->cluster_size); + s->cluster_cache_offset = -1; + + /* read the backing file name */ + if (header.backing_file_offset != 0) { + len = header.backing_file_size; + if (len > 1023 || len >= sizeof(bs->backing_file)) { + error_setg(errp, "Backing file name too long"); + ret = -EINVAL; + goto fail; + } + ret = bdrv_pread(bs->file, header.backing_file_offset, + bs->auto_backing_file, len); + if (ret < 0) { + goto fail; + } + bs->auto_backing_file[len] = '\0'; + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->auto_backing_file); + } + + /* Disable migration when qcow images are used */ + error_setg(&s->migration_blocker, "The qcow format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + + qobject_unref(encryptopts); + qapi_free_QCryptoBlockOpenOptions(crypto_opts); + qemu_co_mutex_init(&s->lock); + return 0; + + fail: + g_free(s->l1_table); + qemu_vfree(s->l2_cache); + g_free(s->cluster_cache); + g_free(s->cluster_data); + qcrypto_block_free(s->crypto); + qobject_unref(encryptopts); + qapi_free_QCryptoBlockOpenOptions(crypto_opts); + return ret; +} + + +/* We have nothing to do for QCOW reopen, stubs just return + * success */ +static int qcow_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + + +/* 'allocate' is: + * + * 0 to not allocate. + * + * 1 to allocate a normal cluster (for sector-aligned byte offsets 'n_start' + * to 'n_end' within the cluster) + * + * 2 to allocate a compressed cluster of size + * 'compressed_size'. 'compressed_size' must be > 0 and < + * cluster_size + * + * return 0 if not allocated, 1 if *result is assigned, and negative + * errno on failure. + */ +static int get_cluster_offset(BlockDriverState *bs, + uint64_t offset, int allocate, + int compressed_size, + int n_start, int n_end, uint64_t *result) +{ + BDRVQcowState *s = bs->opaque; + int min_index, i, j, l1_index, l2_index, ret; + int64_t l2_offset; + uint64_t *l2_table, cluster_offset, tmp; + uint32_t min_count; + int new_l2_table; + + *result = 0; + l1_index = offset >> (s->l2_bits + s->cluster_bits); + l2_offset = s->l1_table[l1_index]; + new_l2_table = 0; + if (!l2_offset) { + if (!allocate) + return 0; + /* allocate a new l2 entry */ + l2_offset = bdrv_getlength(bs->file->bs); + if (l2_offset < 0) { + return l2_offset; + } + /* round to cluster size */ + l2_offset = QEMU_ALIGN_UP(l2_offset, s->cluster_size); + /* update the L1 entry */ + s->l1_table[l1_index] = l2_offset; + tmp = cpu_to_be64(l2_offset); + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + ret = bdrv_pwrite_sync(bs->file, + s->l1_table_offset + l1_index * sizeof(tmp), + &tmp, sizeof(tmp)); + if (ret < 0) { + return ret; + } + new_l2_table = 1; + } + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == s->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++s->l2_cache_counts[i] == 0xffffffff) { + for(j = 0; j < L2_CACHE_SIZE; j++) { + s->l2_cache_counts[j] >>= 1; + } + } + l2_table = s->l2_cache + (i << s->l2_bits); + goto found; + } + } + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (s->l2_cache_counts[i] < min_count) { + min_count = s->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = s->l2_cache + (min_index << s->l2_bits); + BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); + if (new_l2_table) { + memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); + ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret < 0) { + return ret; + } + } else { + ret = bdrv_pread(bs->file, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret < 0) { + return ret; + } + } + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + found: + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + cluster_offset = be64_to_cpu(l2_table[l2_index]); + if (!cluster_offset || + ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { + if (!allocate) + return 0; + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); + assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE)); + /* allocate a new cluster */ + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && + (n_end - n_start) < s->cluster_size) { + /* if the cluster is already compressed, we must + decompress it in the case it is not completely + overwritten */ + if (decompress_cluster(bs, cluster_offset) < 0) { + return -EIO; + } + cluster_offset = bdrv_getlength(bs->file->bs); + if ((int64_t) cluster_offset < 0) { + return cluster_offset; + } + cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size); + /* write the cluster content */ + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, + s->cluster_size); + if (ret < 0) { + return ret; + } + } else { + cluster_offset = bdrv_getlength(bs->file->bs); + if ((int64_t) cluster_offset < 0) { + return cluster_offset; + } + if (allocate == 1) { + /* round to cluster size */ + cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size); + if (cluster_offset + s->cluster_size > INT64_MAX) { + return -E2BIG; + } + ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, + false, PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } + /* if encrypted, we must initialize the cluster + content which won't be written */ + if (bs->encrypted && + (n_end - n_start) < s->cluster_size) { + uint64_t start_offset; + assert(s->crypto); + start_offset = offset & ~(s->cluster_size - 1); + for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) { + if (i < n_start || i >= n_end) { + memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE); + if (qcrypto_block_encrypt(s->crypto, + start_offset + i, + s->cluster_data, + BDRV_SECTOR_SIZE, + NULL) < 0) { + return -EIO; + } + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = bdrv_pwrite(bs->file, + cluster_offset + i, + s->cluster_data, + BDRV_SECTOR_SIZE); + if (ret < 0) { + return ret; + } + } + } + } + } else if (allocate == 2) { + cluster_offset |= QCOW_OFLAG_COMPRESSED | + (uint64_t)compressed_size << (63 - s->cluster_bits); + } + } + /* update L2 table */ + tmp = cpu_to_be64(cluster_offset); + l2_table[l2_index] = tmp; + if (allocate == 2) { + BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); + } else { + BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); + } + ret = bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), + &tmp, sizeof(tmp)); + if (ret < 0) { + return ret; + } + } + *result = cluster_offset; + return 1; +} + +static int coroutine_fn qcow_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVQcowState *s = bs->opaque; + int index_in_cluster, ret; + int64_t n; + uint64_t cluster_offset; + + qemu_co_mutex_lock(&s->lock); + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { + return ret; + } + index_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - index_in_cluster; + if (n > bytes) { + n = bytes; + } + *pnum = n; + if (!cluster_offset) { + return 0; + } + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) { + return BDRV_BLOCK_DATA; + } + *map = cluster_offset | index_in_cluster; + *file = bs->file->bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; +} + +static int decompress_buffer(uint8_t *out_buf, int out_buf_size, + const uint8_t *buf, int buf_size) +{ + z_stream strm1, *strm = &strm1; + int ret, out_len; + + memset(strm, 0, sizeof(*strm)); + + strm->next_in = (uint8_t *)buf; + strm->avail_in = buf_size; + strm->next_out = out_buf; + strm->avail_out = out_buf_size; + + ret = inflateInit2(strm, -12); + if (ret != Z_OK) + return -1; + ret = inflate(strm, Z_FINISH); + out_len = strm->next_out - out_buf; + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || + out_len != out_buf_size) { + inflateEnd(strm); + return -1; + } + inflateEnd(strm); + return 0; +} + +static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) +{ + BDRVQcowState *s = bs->opaque; + int ret, csize; + uint64_t coffset; + + coffset = cluster_offset & s->cluster_offset_mask; + if (s->cluster_cache_offset != coffset) { + csize = cluster_offset >> (63 - s->cluster_bits); + csize &= (s->cluster_size - 1); + BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); + ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize); + if (ret != csize) + return -1; + if (decompress_buffer(s->cluster_cache, s->cluster_size, + s->cluster_data, csize) < 0) { + return -1; + } + s->cluster_cache_offset = coffset; + } + return 0; +} + +static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* At least encrypted images require 512-byte alignment. Apply the + * limit universally, rather than just on encrypted images, as + * it's easier to let the block layer handle rounding than to + * audit this code further. */ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; +} + +static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + BDRVQcowState *s = bs->opaque; + int offset_in_cluster; + int ret = 0, n; + uint64_t cluster_offset; + uint8_t *buf; + void *orig_buf; + + assert(!flags); + if (qiov->niov > 1) { + buf = orig_buf = qemu_try_blockalign(bs, qiov->size); + if (buf == NULL) { + return -ENOMEM; + } + } else { + orig_buf = NULL; + buf = (uint8_t *)qiov->iov->iov_base; + } + + qemu_co_mutex_lock(&s->lock); + + while (bytes != 0) { + /* prepare next request */ + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); + if (ret < 0) { + break; + } + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; + } + + if (!cluster_offset) { + if (bs->backing) { + /* read from the base image */ + qemu_co_mutex_unlock(&s->lock); + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); + ret = bdrv_co_pread(bs->backing, offset, n, buf, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + break; + } + } else { + /* Note: in this case, no need to wait */ + memset(buf, 0, n); + } + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + /* add AIO support for compressed blocks ? */ + if (decompress_cluster(bs, cluster_offset) < 0) { + ret = -EIO; + break; + } + memcpy(buf, s->cluster_cache + offset_in_cluster, n); + } else { + if ((cluster_offset & 511) != 0) { + ret = -EIO; + break; + } + qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + ret = bdrv_co_pread(bs->file, cluster_offset + offset_in_cluster, + n, buf, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + break; + } + if (bs->encrypted) { + assert(s->crypto); + if (qcrypto_block_decrypt(s->crypto, + offset, buf, n, NULL) < 0) { + ret = -EIO; + break; + } + } + } + ret = 0; + + bytes -= n; + offset += n; + buf += n; + } + + qemu_co_mutex_unlock(&s->lock); + + if (qiov->niov > 1) { + qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); + qemu_vfree(orig_buf); + } + + return ret; +} + +static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + BDRVQcowState *s = bs->opaque; + int offset_in_cluster; + uint64_t cluster_offset; + int ret = 0, n; + uint8_t *buf; + void *orig_buf; + + assert(!flags); + s->cluster_cache_offset = -1; /* disable compressed cache */ + + /* We must always copy the iov when encrypting, so we + * don't modify the original data buffer during encryption */ + if (bs->encrypted || qiov->niov > 1) { + buf = orig_buf = qemu_try_blockalign(bs, qiov->size); + if (buf == NULL) { + return -ENOMEM; + } + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); + } else { + orig_buf = NULL; + buf = (uint8_t *)qiov->iov->iov_base; + } + + qemu_co_mutex_lock(&s->lock); + + while (bytes != 0) { + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; + } + ret = get_cluster_offset(bs, offset, 1, 0, offset_in_cluster, + offset_in_cluster + n, &cluster_offset); + if (ret < 0) { + break; + } + if (!cluster_offset || (cluster_offset & 511) != 0) { + ret = -EIO; + break; + } + if (bs->encrypted) { + assert(s->crypto); + if (qcrypto_block_encrypt(s->crypto, offset, buf, n, NULL) < 0) { + ret = -EIO; + break; + } + } + + qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = bdrv_co_pwrite(bs->file, cluster_offset + offset_in_cluster, + n, buf, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + break; + } + ret = 0; + + bytes -= n; + offset += n; + buf += n; + } + qemu_co_mutex_unlock(&s->lock); + + qemu_vfree(orig_buf); + + return ret; +} + +static void qcow_close(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + qcrypto_block_free(s->crypto); + s->crypto = NULL; + g_free(s->l1_table); + qemu_vfree(s->l2_cache); + g_free(s->cluster_cache); + g_free(s->cluster_data); + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + +static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, + Error **errp) +{ + BlockdevCreateOptionsQcow *qcow_opts; + int header_size, backing_filename_len, l1_size, shift, i; + QCowHeader header; + uint8_t *tmp; + int64_t total_size = 0; + int ret; + BlockDriverState *bs; + BlockBackend *qcow_blk; + QCryptoBlock *crypto = NULL; + + assert(opts->driver == BLOCKDEV_DRIVER_QCOW); + qcow_opts = &opts->u.qcow; + + /* Sanity checks */ + total_size = qcow_opts->size; + if (total_size == 0) { + error_setg(errp, "Image size is too small, cannot be zero length"); + return -EINVAL; + } + + if (qcow_opts->has_encrypt && + qcow_opts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_QCOW) + { + error_setg(errp, "Unsupported encryption format"); + return -EINVAL; + } + + /* Create BlockBackend to write to the image */ + bs = bdrv_open_blockdev_ref(qcow_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + qcow_blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL, errp); + if (!qcow_blk) { + ret = -EPERM; + goto exit; + } + blk_set_allow_write_beyond_eof(qcow_blk, true); + + /* Create image format */ + memset(&header, 0, sizeof(header)); + header.magic = cpu_to_be32(QCOW_MAGIC); + header.version = cpu_to_be32(QCOW_VERSION); + header.size = cpu_to_be64(total_size); + header_size = sizeof(header); + backing_filename_len = 0; + if (qcow_opts->has_backing_file) { + if (strcmp(qcow_opts->backing_file, "fat:")) { + header.backing_file_offset = cpu_to_be64(header_size); + backing_filename_len = strlen(qcow_opts->backing_file); + header.backing_file_size = cpu_to_be32(backing_filename_len); + header_size += backing_filename_len; + } else { + /* special backing file for vvfat */ + qcow_opts->has_backing_file = false; + } + header.cluster_bits = 9; /* 512 byte cluster to avoid copying + unmodified sectors */ + header.l2_bits = 12; /* 32 KB L2 tables */ + } else { + header.cluster_bits = 12; /* 4 KB clusters */ + header.l2_bits = 9; /* 4 KB L2 tables */ + } + header_size = (header_size + 7) & ~7; + shift = header.cluster_bits + header.l2_bits; + l1_size = (total_size + (1LL << shift) - 1) >> shift; + + header.l1_table_offset = cpu_to_be64(header_size); + + if (qcow_opts->has_encrypt) { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + + crypto = qcrypto_block_create(qcow_opts->encrypt, "encrypt.", + NULL, NULL, NULL, errp); + if (!crypto) { + ret = -EINVAL; + goto exit; + } + } else { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + } + + /* write all the data */ + ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0); + if (ret != sizeof(header)) { + goto exit; + } + + if (qcow_opts->has_backing_file) { + ret = blk_pwrite(qcow_blk, sizeof(header), + qcow_opts->backing_file, backing_filename_len, 0); + if (ret != backing_filename_len) { + goto exit; + } + } + + tmp = g_malloc0(BDRV_SECTOR_SIZE); + for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE); + i++) { + ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i, + tmp, BDRV_SECTOR_SIZE, 0); + if (ret != BDRV_SECTOR_SIZE) { + g_free(tmp); + goto exit; + } + } + + g_free(tmp); + ret = 0; +exit: + blk_unref(qcow_blk); + bdrv_unref(bs); + qcrypto_block_free(crypto); + return ret; +} + +static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + BlockDriverState *bs = NULL; + QDict *qdict = NULL; + Visitor *v; + const char *val; + int ret; + char *backing_fmt; + + static const QDictRenames opt_renames[] = { + { BLOCK_OPT_BACKING_FILE, "backing-file" }, + { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, + { NULL, NULL }, + }; + + /* + * We can't actually store a backing format, but can check that + * the user's request made sense. + */ + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + if (backing_fmt && !bdrv_find_format(backing_fmt)) { + error_setg(errp, "unrecognized backing format '%s'", backing_fmt); + ret = -EINVAL; + goto fail; + } + + /* Parse options and convert legacy syntax */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, &qcow_create_opts, true); + + val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); + if (val && !strcmp(val, "on")) { + qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); + } else if (val && !strcmp(val, "off")) { + qdict_del(qdict, BLOCK_OPT_ENCRYPT); + } + + val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); + if (val && !strcmp(val, "aes")) { + qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); + } + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto fail; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto fail; + } + + /* Now get the QAPI type BlockdevCreateOptions */ + qdict_put_str(qdict, "driver", "qcow"); + qdict_put_str(qdict, "file", bs->node_name); + + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto fail; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto fail; + } + + /* Silently round up size */ + assert(create_options->driver == BLOCKDEV_DRIVER_QCOW); + create_options->u.qcow.size = + ROUND_UP(create_options->u.qcow.size, BDRV_SECTOR_SIZE); + + /* Create the qcow image (format layer) */ + ret = qcow_co_create(create_options, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + g_free(backing_fmt); + qobject_unref(qdict); + bdrv_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +static int qcow_make_empty(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + uint32_t l1_length = s->l1_size * sizeof(uint64_t); + int ret; + + memset(s->l1_table, 0, l1_length); + if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, + l1_length) < 0) + return -1; + ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, + PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) + return ret; + + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); + + return 0; +} + +/* XXX: put compressed sectors first, then all the cluster aligned + tables to avoid losing bytes in alignment */ +static coroutine_fn int +qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov) +{ + BDRVQcowState *s = bs->opaque; + z_stream strm; + int ret, out_len; + uint8_t *buf, *out_buf; + uint64_t cluster_offset; + + buf = qemu_blockalign(bs, s->cluster_size); + if (bytes != s->cluster_size) { + if (bytes > s->cluster_size || + offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) + { + qemu_vfree(buf); + return -EINVAL; + } + /* Zero-pad last write if image size is not cluster aligned */ + memset(buf + bytes, 0, s->cluster_size - bytes); + } + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); + + out_buf = g_malloc(s->cluster_size); + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, -12, + 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + ret = -EINVAL; + goto fail; + } + + strm.avail_in = s->cluster_size; + strm.next_in = (uint8_t *)buf; + strm.avail_out = s->cluster_size; + strm.next_out = out_buf; + + ret = deflate(&strm, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) { + deflateEnd(&strm); + ret = -EINVAL; + goto fail; + } + out_len = strm.next_out - out_buf; + + deflateEnd(&strm); + + if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + /* could not compress: write normal cluster */ + ret = qcow_co_pwritev(bs, offset, bytes, qiov, 0); + if (ret < 0) { + goto fail; + } + goto success; + } + qemu_co_mutex_lock(&s->lock); + ret = get_cluster_offset(bs, offset, 2, out_len, 0, 0, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { + goto fail; + } + if (cluster_offset == 0) { + ret = -EIO; + goto fail; + } + cluster_offset &= s->cluster_offset_mask; + + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); + ret = bdrv_co_pwrite(bs->file, cluster_offset, out_len, out_buf, 0); + if (ret < 0) { + goto fail; + } +success: + ret = 0; +fail: + qemu_vfree(buf); + g_free(out_buf); + return ret; +} + +static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVQcowState *s = bs->opaque; + bdi->cluster_size = s->cluster_size; + return 0; +} + +static QemuOptsList qcow_create_opts = { + .name = "qcow-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = QEMU_OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Format of the backing image", + }, + { + .name = BLOCK_OPT_ENCRYPT, + .type = QEMU_OPT_BOOL, + .help = "Encrypt the image with format 'aes'. (Deprecated " + "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", + }, + { + .name = BLOCK_OPT_ENCRYPT_FORMAT, + .type = QEMU_OPT_STRING, + .help = "Encrypt the image, format choices: 'aes'", + }, + BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."), + { /* end of list */ } + } +}; + +static const char *const qcow_strong_runtime_opts[] = { + "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, + + NULL +}; + +static BlockDriver bdrv_qcow = { + .format_name = "qcow", + .instance_size = sizeof(BDRVQcowState), + .bdrv_probe = qcow_probe, + .bdrv_open = qcow_open, + .bdrv_close = qcow_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_reopen_prepare = qcow_reopen_prepare, + .bdrv_co_create = qcow_co_create, + .bdrv_co_create_opts = qcow_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .is_format = true, + .supports_backing = true, + .bdrv_refresh_limits = qcow_refresh_limits, + + .bdrv_co_preadv = qcow_co_preadv, + .bdrv_co_pwritev = qcow_co_pwritev, + .bdrv_co_block_status = qcow_co_block_status, + + .bdrv_make_empty = qcow_make_empty, + .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed, + .bdrv_get_info = qcow_get_info, + + .create_opts = &qcow_create_opts, + .strong_runtime_opts = qcow_strong_runtime_opts, +}; + +static void bdrv_qcow_init(void) +{ + bdrv_register(&bdrv_qcow); +} + +block_init(bdrv_qcow_init); diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c new file mode 100644 index 000000000..d7a31a8dd --- /dev/null +++ b/block/qcow2-bitmap.c @@ -0,0 +1,1812 @@ +/* + * Bitmaps for the QCOW version 2 format + * + * Copyright (c) 2014-2017 Vladimir Sementsov-Ogievskiy + * + * This file is derived from qcow2-snapshot.c, original copyright: + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" + +#include "qcow2.h" + +/* NOTICE: BME here means Bitmaps Extension and used as a namespace for + * _internal_ constants. Please do not use this _internal_ abbreviation for + * other needs and/or outside of this file. */ + +/* Bitmap directory entry constraints */ +#define BME_MAX_TABLE_SIZE 0x8000000 +#define BME_MAX_PHYS_SIZE 0x20000000 /* restrict BdrvDirtyBitmap size in RAM */ +#define BME_MAX_GRANULARITY_BITS 31 +#define BME_MIN_GRANULARITY_BITS 9 +#define BME_MAX_NAME_SIZE 1023 + +/* Size of bitmap table entries */ +#define BME_TABLE_ENTRY_SIZE (sizeof(uint64_t)) + +QEMU_BUILD_BUG_ON(BME_MAX_NAME_SIZE != BDRV_BITMAP_MAX_NAME_SIZE); + +#if BME_MAX_TABLE_SIZE * 8ULL > INT_MAX +#error In the code bitmap table physical size assumed to fit into int +#endif + +/* Bitmap directory entry flags */ +#define BME_RESERVED_FLAGS 0xfffffffcU +#define BME_FLAG_IN_USE (1U << 0) +#define BME_FLAG_AUTO (1U << 1) + +/* bits [1, 8] U [56, 63] are reserved */ +#define BME_TABLE_ENTRY_RESERVED_MASK 0xff000000000001feULL +#define BME_TABLE_ENTRY_OFFSET_MASK 0x00fffffffffffe00ULL +#define BME_TABLE_ENTRY_FLAG_ALL_ONES (1ULL << 0) + +typedef struct QEMU_PACKED Qcow2BitmapDirEntry { + /* header is 8 byte aligned */ + uint64_t bitmap_table_offset; + + uint32_t bitmap_table_size; + uint32_t flags; + + uint8_t type; + uint8_t granularity_bits; + uint16_t name_size; + uint32_t extra_data_size; + /* extra data follows */ + /* name follows */ +} Qcow2BitmapDirEntry; + +typedef struct Qcow2BitmapTable { + uint64_t offset; + uint32_t size; /* number of 64bit entries */ + QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry; +} Qcow2BitmapTable; + +typedef struct Qcow2Bitmap { + Qcow2BitmapTable table; + uint32_t flags; + uint8_t granularity_bits; + char *name; + + BdrvDirtyBitmap *dirty_bitmap; + + QSIMPLEQ_ENTRY(Qcow2Bitmap) entry; +} Qcow2Bitmap; +typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList; + +typedef enum BitmapType { + BT_DIRTY_TRACKING_BITMAP = 1 +} BitmapType; + +static inline bool can_write(BlockDriverState *bs) +{ + return !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); +} + +static int update_header_sync(BlockDriverState *bs) +{ + int ret; + + ret = qcow2_update_header(bs); + if (ret < 0) { + return ret; + } + + return bdrv_flush(bs->file->bs); +} + +static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++i) { + bitmap_table[i] = cpu_to_be64(bitmap_table[i]); + } +} + +static int check_table_entry(uint64_t entry, int cluster_size) +{ + uint64_t offset; + + if (entry & BME_TABLE_ENTRY_RESERVED_MASK) { + return -EINVAL; + } + + offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + if (offset != 0) { + /* if offset specified, bit 0 is reserved */ + if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { + return -EINVAL; + } + + if (offset % cluster_size != 0) { + return -EINVAL; + } + } + + return 0; +} + +static int64_t get_bitmap_bytes_needed(int64_t len, uint32_t granularity) +{ + int64_t num_bits = DIV_ROUND_UP(len, granularity); + + return DIV_ROUND_UP(num_bits, 8); +} + +static int check_constraints_on_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int granularity_bits = ctz32(granularity); + int64_t len = bdrv_getlength(bs); + int64_t bitmap_bytes; + + assert(granularity > 0); + assert((granularity & (granularity - 1)) == 0); + + if (len < 0) { + error_setg_errno(errp, -len, "Failed to get size of '%s'", + bdrv_get_device_or_node_name(bs)); + return len; + } + + if (granularity_bits > BME_MAX_GRANULARITY_BITS) { + error_setg(errp, "Granularity exceeds maximum (%llu bytes)", + 1ULL << BME_MAX_GRANULARITY_BITS); + return -EINVAL; + } + if (granularity_bits < BME_MIN_GRANULARITY_BITS) { + error_setg(errp, "Granularity is under minimum (%llu bytes)", + 1ULL << BME_MIN_GRANULARITY_BITS); + return -EINVAL; + } + + bitmap_bytes = get_bitmap_bytes_needed(len, granularity); + if ((bitmap_bytes > (uint64_t)BME_MAX_PHYS_SIZE) || + (bitmap_bytes > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size)) + { + error_setg(errp, "Too much space will be occupied by the bitmap. " + "Use larger granularity"); + return -EINVAL; + } + + if (strlen(name) > BME_MAX_NAME_SIZE) { + error_setg(errp, "Name length exceeds maximum (%u characters)", + BME_MAX_NAME_SIZE); + return -EINVAL; + } + + return 0; +} + +static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table, + uint32_t bitmap_table_size) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + for (i = 0; i < bitmap_table_size; ++i) { + uint64_t addr = bitmap_table[i] & BME_TABLE_ENTRY_OFFSET_MASK; + if (!addr) { + continue; + } + + qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_ALWAYS); + bitmap_table[i] = 0; + } +} + +static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb, + uint64_t **bitmap_table) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint32_t i; + uint64_t *table; + + assert(tb->size != 0); + table = g_try_new(uint64_t, tb->size); + if (table == NULL) { + return -ENOMEM; + } + + assert(tb->size <= BME_MAX_TABLE_SIZE); + ret = bdrv_pread(bs->file, tb->offset, + table, tb->size * BME_TABLE_ENTRY_SIZE); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < tb->size; ++i) { + table[i] = be64_to_cpu(table[i]); + ret = check_table_entry(table[i], s->cluster_size); + if (ret < 0) { + goto fail; + } + } + + *bitmap_table = table; + return 0; + +fail: + g_free(table); + + return ret; +} + +static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) +{ + int ret; + uint64_t *bitmap_table; + + ret = bitmap_table_load(bs, tb, &bitmap_table); + if (ret < 0) { + return ret; + } + + clear_bitmap_table(bs, bitmap_table, tb->size); + qcow2_free_clusters(bs, tb->offset, tb->size * BME_TABLE_ENTRY_SIZE, + QCOW2_DISCARD_OTHER); + g_free(bitmap_table); + + tb->offset = 0; + tb->size = 0; + + return 0; +} + +/* Return the disk size covered by a single qcow2 cluster of bitmap data. */ +static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s, + const BdrvDirtyBitmap *bitmap) +{ + uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + uint64_t limit = granularity * (s->cluster_size << 3); + + assert(QEMU_IS_ALIGNED(limit, + bdrv_dirty_bitmap_serialization_align(bitmap))); + return limit; +} + +/* load_bitmap_data + * @bitmap_table entries must satisfy specification constraints. + * @bitmap must be cleared */ +static int load_bitmap_data(BlockDriverState *bs, + const uint64_t *bitmap_table, + uint32_t bitmap_table_size, + BdrvDirtyBitmap *bitmap) +{ + int ret = 0; + BDRVQcow2State *s = bs->opaque; + uint64_t offset, limit; + uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint8_t *buf = NULL; + uint64_t i, tab_size = + size_to_clusters(s, + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + + if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { + return -EINVAL; + } + + buf = g_malloc(s->cluster_size); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { + uint64_t count = MIN(bm_size - offset, limit); + uint64_t entry = bitmap_table[i]; + uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + + assert(check_table_entry(entry, s->cluster_size) == 0); + + if (data_offset == 0) { + if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { + bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, + false); + } else { + /* No need to deserialize zeros because the dirty bitmap is + * already cleared */ + } + } else { + ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size); + if (ret < 0) { + goto finish; + } + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count, + false); + } + } + ret = 0; + + bdrv_dirty_bitmap_deserialize_finish(bitmap); + +finish: + g_free(buf); + + return ret; +} + +static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, + Qcow2Bitmap *bm, Error **errp) +{ + int ret; + uint64_t *bitmap_table = NULL; + uint32_t granularity; + BdrvDirtyBitmap *bitmap = NULL; + + granularity = 1U << bm->granularity_bits; + bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp); + if (bitmap == NULL) { + goto fail; + } + + if (bm->flags & BME_FLAG_IN_USE) { + /* Data is unusable, skip loading it */ + return bitmap; + } + + ret = bitmap_table_load(bs, &bm->table, &bitmap_table); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Could not read bitmap_table table from image for " + "bitmap '%s'", bm->name); + goto fail; + } + + ret = load_bitmap_data(bs, bitmap_table, bm->table.size, bitmap); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read bitmap '%s' from image", + bm->name); + goto fail; + } + + g_free(bitmap_table); + return bitmap; + +fail: + g_free(bitmap_table); + if (bitmap != NULL) { + bdrv_release_dirty_bitmap(bitmap); + } + + return NULL; +} + +/* + * Bitmap List + */ + +/* + * Bitmap List private functions + * Only Bitmap List knows about bitmap directory structure in Qcow2. + */ + +static inline void bitmap_dir_entry_to_cpu(Qcow2BitmapDirEntry *entry) +{ + entry->bitmap_table_offset = be64_to_cpu(entry->bitmap_table_offset); + entry->bitmap_table_size = be32_to_cpu(entry->bitmap_table_size); + entry->flags = be32_to_cpu(entry->flags); + entry->name_size = be16_to_cpu(entry->name_size); + entry->extra_data_size = be32_to_cpu(entry->extra_data_size); +} + +static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry) +{ + entry->bitmap_table_offset = cpu_to_be64(entry->bitmap_table_offset); + entry->bitmap_table_size = cpu_to_be32(entry->bitmap_table_size); + entry->flags = cpu_to_be32(entry->flags); + entry->name_size = cpu_to_be16(entry->name_size); + entry->extra_data_size = cpu_to_be32(entry->extra_data_size); +} + +static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size) +{ + int size = sizeof(Qcow2BitmapDirEntry) + name_size + extra_data_size; + return ROUND_UP(size, 8); +} + +static inline int dir_entry_size(Qcow2BitmapDirEntry *entry) +{ + return calc_dir_entry_size(entry->name_size, entry->extra_data_size); +} + +static inline const char *dir_entry_name_field(Qcow2BitmapDirEntry *entry) +{ + return (const char *)(entry + 1) + entry->extra_data_size; +} + +static inline char *dir_entry_copy_name(Qcow2BitmapDirEntry *entry) +{ + const char *name_field = dir_entry_name_field(entry); + return g_strndup(name_field, entry->name_size); +} + +static inline Qcow2BitmapDirEntry *next_dir_entry(Qcow2BitmapDirEntry *entry) +{ + return (Qcow2BitmapDirEntry *)((uint8_t *)entry + dir_entry_size(entry)); +} + +static int check_dir_entry(BlockDriverState *bs, Qcow2BitmapDirEntry *entry) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t phys_bitmap_bytes; + int64_t len; + + bool fail = (entry->bitmap_table_size == 0) || + (entry->bitmap_table_offset == 0) || + (entry->bitmap_table_offset % s->cluster_size) || + (entry->bitmap_table_size > BME_MAX_TABLE_SIZE) || + (entry->granularity_bits > BME_MAX_GRANULARITY_BITS) || + (entry->granularity_bits < BME_MIN_GRANULARITY_BITS) || + (entry->flags & BME_RESERVED_FLAGS) || + (entry->name_size > BME_MAX_NAME_SIZE) || + (entry->type != BT_DIRTY_TRACKING_BITMAP); + + if (fail) { + return -EINVAL; + } + + phys_bitmap_bytes = (uint64_t)entry->bitmap_table_size * s->cluster_size; + len = bdrv_getlength(bs); + + if (len < 0) { + return len; + } + + if (phys_bitmap_bytes > BME_MAX_PHYS_SIZE) { + return -EINVAL; + } + + if (!(entry->flags & BME_FLAG_IN_USE) && + (len > ((phys_bitmap_bytes * 8) << entry->granularity_bits))) + { + /* + * We've loaded a valid bitmap (IN_USE not set) or we are going to + * store a valid bitmap, but the allocated bitmap table size is not + * enough to store this bitmap. + * + * Note, that it's OK to have an invalid bitmap with invalid size due + * to a bitmap that was not correctly saved after image resize. + */ + return -EINVAL; + } + + return 0; +} + +static inline void bitmap_directory_to_be(uint8_t *dir, size_t size) +{ + uint8_t *end = dir + size; + while (dir < end) { + Qcow2BitmapDirEntry *e = (Qcow2BitmapDirEntry *)dir; + dir += dir_entry_size(e); + + bitmap_dir_entry_to_be(e); + } +} + +/* + * Bitmap List public functions + */ + +static void bitmap_free(Qcow2Bitmap *bm) +{ + if (bm == NULL) { + return; + } + + g_free(bm->name); + g_free(bm); +} + +static void bitmap_list_free(Qcow2BitmapList *bm_list) +{ + Qcow2Bitmap *bm; + + if (bm_list == NULL) { + return; + } + + while ((bm = QSIMPLEQ_FIRST(bm_list)) != NULL) { + QSIMPLEQ_REMOVE_HEAD(bm_list, entry); + bitmap_free(bm); + } + + g_free(bm_list); +} + +static Qcow2BitmapList *bitmap_list_new(void) +{ + Qcow2BitmapList *bm_list = g_new(Qcow2BitmapList, 1); + QSIMPLEQ_INIT(bm_list); + + return bm_list; +} + +static uint32_t bitmap_list_count(Qcow2BitmapList *bm_list) +{ + Qcow2Bitmap *bm; + uint32_t nb_bitmaps = 0; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + nb_bitmaps++; + } + + return nb_bitmaps; +} + +/* bitmap_list_load + * Get bitmap list from qcow2 image. Actually reads bitmap directory, + * checks it and convert to bitmap list. + */ +static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, + uint64_t size, Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint8_t *dir, *dir_end; + Qcow2BitmapDirEntry *e; + uint32_t nb_dir_entries = 0; + Qcow2BitmapList *bm_list = NULL; + + if (size == 0) { + error_setg(errp, "Requested bitmap directory size is zero"); + return NULL; + } + + if (size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "Requested bitmap directory size is too big"); + return NULL; + } + + dir = g_try_malloc(size); + if (dir == NULL) { + error_setg(errp, "Failed to allocate space for bitmap directory"); + return NULL; + } + dir_end = dir + size; + + ret = bdrv_pread(bs->file, offset, dir, size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read bitmap directory"); + goto fail; + } + + bm_list = bitmap_list_new(); + for (e = (Qcow2BitmapDirEntry *)dir; + e < (Qcow2BitmapDirEntry *)dir_end; + e = next_dir_entry(e)) + { + Qcow2Bitmap *bm; + + if ((uint8_t *)(e + 1) > dir_end) { + goto broken_dir; + } + + if (++nb_dir_entries > s->nb_bitmaps) { + error_setg(errp, "More bitmaps found than specified in header" + " extension"); + goto fail; + } + bitmap_dir_entry_to_cpu(e); + + if ((uint8_t *)next_dir_entry(e) > dir_end) { + goto broken_dir; + } + + if (e->extra_data_size != 0) { + error_setg(errp, "Bitmap extra data is not supported"); + goto fail; + } + + ret = check_dir_entry(bs, e); + if (ret < 0) { + error_setg(errp, "Bitmap '%.*s' doesn't satisfy the constraints", + e->name_size, dir_entry_name_field(e)); + goto fail; + } + + bm = g_new0(Qcow2Bitmap, 1); + bm->table.offset = e->bitmap_table_offset; + bm->table.size = e->bitmap_table_size; + bm->flags = e->flags; + bm->granularity_bits = e->granularity_bits; + bm->name = dir_entry_copy_name(e); + QSIMPLEQ_INSERT_TAIL(bm_list, bm, entry); + } + + if (nb_dir_entries != s->nb_bitmaps) { + error_setg(errp, "Less bitmaps found than specified in header" + " extension"); + goto fail; + } + + if ((uint8_t *)e != dir_end) { + goto broken_dir; + } + + g_free(dir); + return bm_list; + +broken_dir: + error_setg(errp, "Broken bitmap directory"); + +fail: + g_free(dir); + bitmap_list_free(bm_list); + + return NULL; +} + +int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + + if (s->nb_bitmaps == 0) { + return 0; + } + + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, + s->bitmap_directory_offset, + s->bitmap_directory_size); + if (ret < 0) { + return ret; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, NULL); + if (bm_list == NULL) { + res->corruptions++; + return -EINVAL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + uint64_t *bitmap_table = NULL; + int i; + + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + bm->table.offset, + bm->table.size * BME_TABLE_ENTRY_SIZE); + if (ret < 0) { + goto out; + } + + ret = bitmap_table_load(bs, &bm->table, &bitmap_table); + if (ret < 0) { + res->corruptions++; + goto out; + } + + for (i = 0; i < bm->table.size; ++i) { + uint64_t entry = bitmap_table[i]; + uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + + if (check_table_entry(entry, s->cluster_size) < 0) { + res->corruptions++; + continue; + } + + if (offset == 0) { + continue; + } + + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + offset, s->cluster_size); + if (ret < 0) { + g_free(bitmap_table); + goto out; + } + } + + g_free(bitmap_table); + } + +out: + bitmap_list_free(bm_list); + + return ret; +} + +/* bitmap_list_store + * Store bitmap list to qcow2 image as a bitmap directory. + * Everything is checked. + */ +static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list, + uint64_t *offset, uint64_t *size, bool in_place) +{ + int ret; + uint8_t *dir; + int64_t dir_offset = 0; + uint64_t dir_size = 0; + Qcow2Bitmap *bm; + Qcow2BitmapDirEntry *e; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + dir_size += calc_dir_entry_size(strlen(bm->name), 0); + } + + if (dir_size == 0 || dir_size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + return -EINVAL; + } + + if (in_place) { + if (*size != dir_size || *offset == 0) { + return -EINVAL; + } + + dir_offset = *offset; + } + + dir = g_try_malloc0(dir_size); + if (dir == NULL) { + return -ENOMEM; + } + + e = (Qcow2BitmapDirEntry *)dir; + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + e->bitmap_table_offset = bm->table.offset; + e->bitmap_table_size = bm->table.size; + e->flags = bm->flags; + e->type = BT_DIRTY_TRACKING_BITMAP; + e->granularity_bits = bm->granularity_bits; + e->name_size = strlen(bm->name); + e->extra_data_size = 0; + memcpy(e + 1, bm->name, e->name_size); + + if (check_dir_entry(bs, e) < 0) { + ret = -EINVAL; + goto fail; + } + + e = next_dir_entry(e); + } + + bitmap_directory_to_be(dir, dir_size); + + if (!in_place) { + dir_offset = qcow2_alloc_clusters(bs, dir_size); + if (dir_offset < 0) { + ret = dir_offset; + goto fail; + } + } + + /* Actually, even in in-place case ignoring QCOW2_OL_BITMAP_DIRECTORY is not + * necessary, because we drop QCOW2_AUTOCLEAR_BITMAPS when updating bitmap + * directory in-place (actually, turn-off the extension), which is checked + * in qcow2_check_metadata_overlap() */ + ret = qcow2_pre_write_overlap_check( + bs, in_place ? QCOW2_OL_BITMAP_DIRECTORY : 0, dir_offset, dir_size, + false); + if (ret < 0) { + goto fail; + } + + ret = bdrv_pwrite(bs->file, dir_offset, dir, dir_size); + if (ret < 0) { + goto fail; + } + + g_free(dir); + + if (!in_place) { + *size = dir_size; + *offset = dir_offset; + } + + return 0; + +fail: + g_free(dir); + + if (!in_place && dir_offset > 0) { + qcow2_free_clusters(bs, dir_offset, dir_size, QCOW2_DISCARD_OTHER); + } + + return ret; +} + +/* + * Bitmap List end + */ + +static int update_ext_header_and_dir_in_place(BlockDriverState *bs, + Qcow2BitmapList *bm_list) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS) || + bm_list == NULL || QSIMPLEQ_EMPTY(bm_list) || + bitmap_list_count(bm_list) != s->nb_bitmaps) + { + return -EINVAL; + } + + s->autoclear_features &= ~(uint64_t)QCOW2_AUTOCLEAR_BITMAPS; + ret = update_header_sync(bs); + if (ret < 0) { + /* Two variants are possible here: + * 1. Autoclear flag is dropped, all bitmaps will be lost. + * 2. Autoclear flag is not dropped, old state is left. + */ + return ret; + } + + /* autoclear bit is not set, so we can safely update bitmap directory */ + + ret = bitmap_list_store(bs, bm_list, &s->bitmap_directory_offset, + &s->bitmap_directory_size, true); + if (ret < 0) { + /* autoclear bit is cleared, so all leaked clusters would be removed on + * qemu-img check */ + return ret; + } + + ret = update_header_sync(bs); + if (ret < 0) { + /* autoclear bit is cleared, so all leaked clusters would be removed on + * qemu-img check */ + return ret; + } + + s->autoclear_features |= QCOW2_AUTOCLEAR_BITMAPS; + return update_header_sync(bs); + /* If final update_header_sync() fails, two variants are possible: + * 1. Autoclear flag is not set, all bitmaps will be lost. + * 2. Autoclear flag is set, header and directory are successfully updated. + */ +} + +static int update_ext_header_and_dir(BlockDriverState *bs, + Qcow2BitmapList *bm_list) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + uint64_t new_offset = 0; + uint64_t new_size = 0; + uint32_t new_nb_bitmaps = 0; + uint64_t old_offset = s->bitmap_directory_offset; + uint64_t old_size = s->bitmap_directory_size; + uint32_t old_nb_bitmaps = s->nb_bitmaps; + uint64_t old_autocl = s->autoclear_features; + + if (bm_list != NULL && !QSIMPLEQ_EMPTY(bm_list)) { + new_nb_bitmaps = bitmap_list_count(bm_list); + + if (new_nb_bitmaps > QCOW2_MAX_BITMAPS) { + return -EINVAL; + } + + ret = bitmap_list_store(bs, bm_list, &new_offset, &new_size, false); + if (ret < 0) { + return ret; + } + + ret = qcow2_flush_caches(bs); + if (ret < 0) { + goto fail; + } + + s->autoclear_features |= QCOW2_AUTOCLEAR_BITMAPS; + } else { + s->autoclear_features &= ~(uint64_t)QCOW2_AUTOCLEAR_BITMAPS; + } + + s->bitmap_directory_offset = new_offset; + s->bitmap_directory_size = new_size; + s->nb_bitmaps = new_nb_bitmaps; + + ret = update_header_sync(bs); + if (ret < 0) { + goto fail; + } + + if (old_size > 0) { + qcow2_free_clusters(bs, old_offset, old_size, QCOW2_DISCARD_OTHER); + } + + return 0; + +fail: + if (new_offset > 0) { + qcow2_free_clusters(bs, new_offset, new_size, QCOW2_DISCARD_OTHER); + } + + s->bitmap_directory_offset = old_offset; + s->bitmap_directory_size = old_size; + s->nb_bitmaps = old_nb_bitmaps; + s->autoclear_features = old_autocl; + + return ret; +} + +/* for g_slist_foreach for GSList of BdrvDirtyBitmap* elements */ +static void release_dirty_bitmap_helper(gpointer bitmap, + gpointer bs) +{ + bdrv_release_dirty_bitmap(bitmap); +} + +/* for g_slist_foreach for GSList of BdrvDirtyBitmap* elements */ +static void set_readonly_helper(gpointer bitmap, gpointer value) +{ + bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value); +} + +/* qcow2_load_dirty_bitmaps() + * Return value is a hint for caller: true means that the Qcow2 header was + * updated. (false doesn't mean that the header should be updated by the + * caller, it just means that updating was not needed or the image cannot be + * written to). + * On failure the function returns false. + */ +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + GSList *created_dirty_bitmaps = NULL; + bool header_updated = false; + bool needs_update = false; + + if (s->nb_bitmaps == 0) { + /* No bitmaps - nothing to do */ + return false; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return false; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + BdrvDirtyBitmap *bitmap; + + if ((bm->flags & BME_FLAG_IN_USE) && + bdrv_find_dirty_bitmap(bs, bm->name)) + { + /* + * We already have corresponding BdrvDirtyBitmap, and bitmap in the + * image is marked IN_USE. Firstly, this state is valid, no reason + * to consider existing BdrvDirtyBitmap to be bad. Secondly it's + * absolutely possible, when we do migration with shared storage + * with dirty-bitmaps capability enabled: if the bitmap was loaded + * from this storage before migration start, the storage will + * of-course contain IN_USE outdated version of the bitmap, and we + * should not load it on migration target, as we already have this + * bitmap, being migrated. + */ + continue; + } + + bitmap = load_bitmap(bs, bm, errp); + if (bitmap == NULL) { + goto fail; + } + + bdrv_dirty_bitmap_set_persistence(bitmap, true); + if (bm->flags & BME_FLAG_IN_USE) { + bdrv_dirty_bitmap_set_inconsistent(bitmap); + } else { + /* NB: updated flags only get written if can_write(bs) is true. */ + bm->flags |= BME_FLAG_IN_USE; + needs_update = true; + } + if (!(bm->flags & BME_FLAG_AUTO)) { + bdrv_disable_dirty_bitmap(bitmap); + } + created_dirty_bitmaps = + g_slist_append(created_dirty_bitmaps, bitmap); + } + + if (needs_update && can_write(bs)) { + /* in_use flags must be updated */ + int ret = update_ext_header_and_dir_in_place(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Can't update bitmap directory"); + goto fail; + } + header_updated = true; + } + + if (!can_write(bs)) { + g_slist_foreach(created_dirty_bitmaps, set_readonly_helper, + (gpointer)true); + } + + g_slist_free(created_dirty_bitmaps); + bitmap_list_free(bm_list); + + return header_updated; + +fail: + g_slist_foreach(created_dirty_bitmaps, release_dirty_bitmap_helper, bs); + g_slist_free(created_dirty_bitmaps); + bitmap_list_free(bm_list); + + return false; +} + + +static Qcow2BitmapInfoFlagsList *get_bitmap_info_flags(uint32_t flags) +{ + Qcow2BitmapInfoFlagsList *list = NULL; + Qcow2BitmapInfoFlagsList **plist = &list; + int i; + + static const struct { + int bme; /* Bitmap directory entry flags */ + int info; /* The flags to report to the user */ + } map[] = { + { BME_FLAG_IN_USE, QCOW2_BITMAP_INFO_FLAGS_IN_USE }, + { BME_FLAG_AUTO, QCOW2_BITMAP_INFO_FLAGS_AUTO }, + }; + + int map_size = ARRAY_SIZE(map); + + for (i = 0; i < map_size; ++i) { + if (flags & map[i].bme) { + Qcow2BitmapInfoFlagsList *entry = + g_new0(Qcow2BitmapInfoFlagsList, 1); + entry->value = map[i].info; + *plist = entry; + plist = &entry->next; + flags &= ~map[i].bme; + } + } + /* Check if the BME_* mapping above is complete */ + assert(!flags); + + return list; +} + +/* + * qcow2_get_bitmap_info_list() + * Returns a list of QCOW2 bitmap details. + * In case of no bitmaps, the function returns NULL and + * the @errp parameter is not set. + * When bitmap information can not be obtained, the function returns + * NULL and the @errp parameter is set. + */ +Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + Qcow2BitmapInfoList *list = NULL; + Qcow2BitmapInfoList **plist = &list; + + if (s->nb_bitmaps == 0) { + return NULL; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return NULL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + Qcow2BitmapInfo *info = g_new0(Qcow2BitmapInfo, 1); + Qcow2BitmapInfoList *obj = g_new0(Qcow2BitmapInfoList, 1); + info->granularity = 1U << bm->granularity_bits; + info->name = g_strdup(bm->name); + info->flags = get_bitmap_info_flags(bm->flags & ~BME_RESERVED_FLAGS); + obj->value = info; + *plist = obj; + plist = &obj->next; + } + + bitmap_list_free(bm_list); + + return list; +} + +int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + GSList *ro_dirty_bitmaps = NULL; + int ret = -EINVAL; + bool need_header_update = false; + + if (s->nb_bitmaps == 0) { + /* No bitmaps - nothing to do */ + return 0; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return -EINVAL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name); + + if (!bitmap) { + error_setg(errp, "Unexpected bitmap '%s' in image '%s'", + bm->name, bs->filename); + goto out; + } + + if (!(bm->flags & BME_FLAG_IN_USE)) { + if (!bdrv_dirty_bitmap_readonly(bitmap)) { + error_setg(errp, "Corruption: bitmap '%s' is not marked IN_USE " + "in the image '%s' and not marked readonly in RAM", + bm->name, bs->filename); + goto out; + } + if (bdrv_dirty_bitmap_inconsistent(bitmap)) { + error_setg(errp, "Corruption: bitmap '%s' is inconsistent but " + "is not marked IN_USE in the image '%s'", bm->name, + bs->filename); + goto out; + } + + bm->flags |= BME_FLAG_IN_USE; + need_header_update = true; + } else { + /* + * What if flags already has BME_FLAG_IN_USE ? + * + * 1. if we are reopening RW -> RW it's OK, of course. + * 2. if we are reopening RO -> RW: + * 2.1 if @bitmap is inconsistent, it's OK. It means that it was + * inconsistent (IN_USE) when we loaded it + * 2.2 if @bitmap is not inconsistent. This seems to be impossible + * and implies third party interaction. Let's error-out for + * safety. + */ + if (bdrv_dirty_bitmap_readonly(bitmap) && + !bdrv_dirty_bitmap_inconsistent(bitmap)) + { + error_setg(errp, "Corruption: bitmap '%s' is marked IN_USE " + "in the image '%s' but it is readonly and " + "consistent in RAM", + bm->name, bs->filename); + goto out; + } + } + + if (bdrv_dirty_bitmap_readonly(bitmap)) { + ro_dirty_bitmaps = g_slist_append(ro_dirty_bitmaps, bitmap); + } + } + + if (need_header_update) { + if (!can_write(bs->file->bs) || !(bs->file->perm & BLK_PERM_WRITE)) { + error_setg(errp, "Failed to reopen bitmaps rw: no write access " + "the protocol file"); + goto out; + } + + /* in_use flags must be updated */ + ret = update_ext_header_and_dir_in_place(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Cannot update bitmap directory"); + goto out; + } + } + + g_slist_foreach(ro_dirty_bitmaps, set_readonly_helper, false); + ret = 0; + +out: + g_slist_free(ro_dirty_bitmaps); + bitmap_list_free(bm_list); + + return ret; +} + +/* Checks to see if it's safe to resize bitmaps */ +int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + int ret = 0; + + if (s->nb_bitmaps == 0) { + return 0; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return -EINVAL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name); + if (bitmap == NULL) { + /* + * We rely on all bitmaps being in-memory to be able to resize them, + * Otherwise, we'd need to resize them on disk explicitly + */ + error_setg(errp, "Cannot resize qcow2 with persistent bitmaps that " + "were not loaded into memory"); + ret = -ENOTSUP; + goto out; + } + + /* + * The checks against readonly and busy are redundant, but certainly + * do no harm. checks against inconsistent are crucial: + */ + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { + ret = -ENOTSUP; + goto out; + } + } + +out: + bitmap_list_free(bm_list); + return ret; +} + +/* store_bitmap_data() + * Store bitmap to image, filling bitmap table accordingly. + */ +static uint64_t *store_bitmap_data(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + uint32_t *bitmap_table_size, Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + int64_t offset; + uint64_t limit; + uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + const char *bm_name = bdrv_dirty_bitmap_name(bitmap); + uint8_t *buf = NULL; + uint64_t *tb; + uint64_t tb_size = + size_to_clusters(s, + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + + if (tb_size > BME_MAX_TABLE_SIZE || + tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) + { + error_setg(errp, "Bitmap '%s' is too big", bm_name); + return NULL; + } + + tb = g_try_new0(uint64_t, tb_size); + if (tb == NULL) { + error_setg(errp, "No memory"); + return NULL; + } + + buf = g_malloc(s->cluster_size); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + assert(DIV_ROUND_UP(bm_size, limit) == tb_size); + + offset = 0; + while ((offset = bdrv_dirty_bitmap_next_dirty(bitmap, offset, INT64_MAX)) + >= 0) + { + uint64_t cluster = offset / limit; + uint64_t end, write_size; + int64_t off; + + /* + * We found the first dirty offset, but want to write out the + * entire cluster of the bitmap that includes that offset, + * including any leading zero bits. + */ + offset = QEMU_ALIGN_DOWN(offset, limit); + end = MIN(bm_size, offset + limit); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset, + end - offset); + assert(write_size <= s->cluster_size); + + off = qcow2_alloc_clusters(bs, s->cluster_size); + if (off < 0) { + error_setg_errno(errp, -off, + "Failed to allocate clusters for bitmap '%s'", + bm_name); + goto fail; + } + tb[cluster] = off; + + bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset); + if (write_size < s->cluster_size) { + memset(buf + write_size, 0, s->cluster_size - write_size); + } + + ret = qcow2_pre_write_overlap_check(bs, 0, off, s->cluster_size, false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Qcow2 overlap check failed"); + goto fail; + } + + ret = bdrv_pwrite(bs->file, off, buf, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; + } + + offset = end; + } + + *bitmap_table_size = tb_size; + g_free(buf); + + return tb; + +fail: + clear_bitmap_table(bs, tb, tb_size); + g_free(buf); + g_free(tb); + + return NULL; +} + +/* store_bitmap() + * Store bm->dirty_bitmap to qcow2. + * Set bm->table_offset and bm->table_size accordingly. + */ +static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) +{ + int ret; + uint64_t *tb; + int64_t tb_offset; + uint32_t tb_size; + BdrvDirtyBitmap *bitmap = bm->dirty_bitmap; + const char *bm_name; + + assert(bitmap != NULL); + + bm_name = bdrv_dirty_bitmap_name(bitmap); + + tb = store_bitmap_data(bs, bitmap, &tb_size, errp); + if (tb == NULL) { + return -EINVAL; + } + + assert(tb_size <= BME_MAX_TABLE_SIZE); + tb_offset = qcow2_alloc_clusters(bs, tb_size * sizeof(tb[0])); + if (tb_offset < 0) { + error_setg_errno(errp, -tb_offset, + "Failed to allocate clusters for bitmap '%s'", + bm_name); + ret = tb_offset; + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, tb_offset, + tb_size * sizeof(tb[0]), false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Qcow2 overlap check failed"); + goto fail; + } + + bitmap_table_to_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0])); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; + } + + g_free(tb); + + bm->table.offset = tb_offset; + bm->table.size = tb_size; + + return 0; + +fail: + clear_bitmap_table(bs, tb, tb_size); + + if (tb_offset > 0) { + qcow2_free_clusters(bs, tb_offset, tb_size * sizeof(tb[0]), + QCOW2_DISCARD_OTHER); + } + + g_free(tb); + + return ret; +} + +static Qcow2Bitmap *find_bitmap_by_name(Qcow2BitmapList *bm_list, + const char *name) +{ + Qcow2Bitmap *bm; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (strcmp(name, bm->name) == 0) { + return bm; + } + } + + return NULL; +} + +int coroutine_fn qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + Qcow2Bitmap *bm = NULL; + Qcow2BitmapList *bm_list; + + if (s->nb_bitmaps == 0) { + /* + * Absence of the bitmap is not an error: see explanation above + * bdrv_co_remove_persistent_dirty_bitmap() definition. + */ + return 0; + } + + qemu_co_mutex_lock(&s->lock); + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + ret = -EIO; + goto out; + } + + bm = find_bitmap_by_name(bm_list, name); + if (bm == NULL) { + /* Absence of the bitmap is not an error, see above. */ + ret = 0; + goto out; + } + + QSIMPLEQ_REMOVE(bm_list, bm, Qcow2Bitmap, entry); + + ret = update_ext_header_and_dir(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update bitmap extension"); + goto out; + } + + free_bitmap_clusters(bs, &bm->table); + +out: + qemu_co_mutex_unlock(&s->lock); + + bitmap_free(bm); + bitmap_list_free(bm_list); + + return ret; +} + +/* + * qcow2_store_persistent_dirty_bitmaps + * + * Stores persistent BdrvDirtyBitmap objects. + * + * @release_stored: if true, release BdrvDirtyBitmap's after storing to the + * image. This is used in two cases, both via qcow2_inactivate: + * 1. bdrv_close: It's correct to remove bitmaps on close. + * 2. migration: If bitmaps are migrated through migration channel via + * 'dirty-bitmaps' migration capability they are not handled by this code. + * Otherwise, it's OK to drop BdrvDirtyBitmap's and reload them on + * invalidation. + * + * Anyway, it's correct to remove BdrvDirtyBitmap's on inactivation, as + * inactivation means that we lose control on disk, and therefore on bitmaps, + * we should sync them and do not touch more. + * + * Contrariwise, we don't want to release any bitmaps on just reopen-to-ro, + * when we need to store them, as image is still under our control, and it's + * good to keep all the bitmaps in read-only mode. Moreover, keeping them + * read-only is correct because this is what would happen if we opened the node + * readonly to begin with, and whether we opened directly or reopened to that + * state shouldn't matter for the state we get afterward. + */ +void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, + bool release_stored, Error **errp) +{ + BdrvDirtyBitmap *bitmap; + BDRVQcow2State *s = bs->opaque; + uint32_t new_nb_bitmaps = s->nb_bitmaps; + uint64_t new_dir_size = s->bitmap_directory_size; + int ret; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + QSIMPLEQ_HEAD(, Qcow2BitmapTable) drop_tables; + Qcow2BitmapTable *tb, *tb_next; + bool need_write = false; + + QSIMPLEQ_INIT(&drop_tables); + + if (s->nb_bitmaps == 0) { + bm_list = bitmap_list_new(); + } else { + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return; + } + } + + /* check constraints and names */ + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { + const char *name = bdrv_dirty_bitmap_name(bitmap); + uint32_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + Qcow2Bitmap *bm; + + if (!bdrv_dirty_bitmap_get_persistence(bitmap) || + bdrv_dirty_bitmap_inconsistent(bitmap)) { + continue; + } + + if (bdrv_dirty_bitmap_readonly(bitmap)) { + /* + * Store the bitmap in the associated Qcow2Bitmap so it + * can be released later + */ + bm = find_bitmap_by_name(bm_list, name); + if (bm) { + bm->dirty_bitmap = bitmap; + } + continue; + } + + need_write = true; + + if (check_constraints_on_bitmap(bs, name, granularity, errp) < 0) { + error_prepend(errp, "Bitmap '%s' doesn't satisfy the constraints: ", + name); + goto fail; + } + + bm = find_bitmap_by_name(bm_list, name); + if (bm == NULL) { + if (++new_nb_bitmaps > QCOW2_MAX_BITMAPS) { + error_setg(errp, "Too many persistent bitmaps"); + goto fail; + } + + new_dir_size += calc_dir_entry_size(strlen(name), 0); + if (new_dir_size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "Bitmap directory is too large"); + goto fail; + } + + bm = g_new0(Qcow2Bitmap, 1); + bm->name = g_strdup(name); + QSIMPLEQ_INSERT_TAIL(bm_list, bm, entry); + } else { + if (!(bm->flags & BME_FLAG_IN_USE)) { + error_setg(errp, "Bitmap '%s' already exists in the image", + name); + goto fail; + } + tb = g_memdup(&bm->table, sizeof(bm->table)); + bm->table.offset = 0; + bm->table.size = 0; + QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry); + } + bm->flags = bdrv_dirty_bitmap_enabled(bitmap) ? BME_FLAG_AUTO : 0; + bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap)); + bm->dirty_bitmap = bitmap; + } + + if (!need_write) { + goto success; + } + + if (!can_write(bs)) { + error_setg(errp, "No write access"); + goto fail; + } + + /* allocate clusters and store bitmaps */ + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + BdrvDirtyBitmap *bitmap = bm->dirty_bitmap; + + if (bitmap == NULL || bdrv_dirty_bitmap_readonly(bitmap)) { + continue; + } + + ret = store_bitmap(bs, bm, errp); + if (ret < 0) { + goto fail; + } + } + + ret = update_ext_header_and_dir(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update bitmap extension"); + goto fail; + } + + /* Bitmap directory was successfully updated, so, old data can be dropped. + * TODO it is better to reuse these clusters */ + QSIMPLEQ_FOREACH_SAFE(tb, &drop_tables, entry, tb_next) { + free_bitmap_clusters(bs, tb); + g_free(tb); + } + +success: + if (release_stored) { + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (bm->dirty_bitmap == NULL) { + continue; + } + + bdrv_release_dirty_bitmap(bm->dirty_bitmap); + } + } + + bitmap_list_free(bm_list); + return; + +fail: + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (bm->dirty_bitmap == NULL || bm->table.offset == 0 || + bdrv_dirty_bitmap_readonly(bm->dirty_bitmap)) + { + continue; + } + + free_bitmap_clusters(bs, &bm->table); + } + + QSIMPLEQ_FOREACH_SAFE(tb, &drop_tables, entry, tb_next) { + g_free(tb); + } + + bitmap_list_free(bm_list); +} + +int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp) +{ + BdrvDirtyBitmap *bitmap; + Error *local_err = NULL; + + qcow2_store_persistent_dirty_bitmaps(bs, false, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return -EINVAL; + } + + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { + if (bdrv_dirty_bitmap_get_persistence(bitmap)) { + bdrv_dirty_bitmap_set_readonly(bitmap, true); + } + } + + return 0; +} + +bool coroutine_fn qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + BdrvDirtyBitmap *bitmap; + uint64_t bitmap_directory_size = 0; + uint32_t nb_bitmaps = 0; + + if (bdrv_find_dirty_bitmap(bs, name)) { + error_setg(errp, "Bitmap already exists: %s", name); + return false; + } + + if (s->qcow_version < 3) { + /* Without autoclear_features, we would always have to assume + * that a program without persistent dirty bitmap support has + * accessed this qcow2 file when opening it, and would thus + * have to drop all dirty bitmaps (defeating their purpose). + */ + error_setg(errp, "Cannot store dirty bitmaps in qcow2 v2 files"); + goto fail; + } + + if (check_constraints_on_bitmap(bs, name, granularity, errp) != 0) { + goto fail; + } + + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { + if (bdrv_dirty_bitmap_get_persistence(bitmap)) { + nb_bitmaps++; + bitmap_directory_size += + calc_dir_entry_size(strlen(bdrv_dirty_bitmap_name(bitmap)), 0); + } + } + nb_bitmaps++; + bitmap_directory_size += calc_dir_entry_size(strlen(name), 0); + + if (nb_bitmaps > QCOW2_MAX_BITMAPS) { + error_setg(errp, + "Maximum number of persistent bitmaps is already reached"); + goto fail; + } + + if (bitmap_directory_size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "Not enough space in the bitmap directory"); + goto fail; + } + + return true; + +fail: + error_prepend(errp, "Can't make bitmap '%s' persistent in '%s': ", + name, bdrv_get_device_or_node_name(bs)); + return false; +} + +bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + + return s->qcow_version >= 3; +} + +/* + * Compute the space required to copy bitmaps from @in_bs. + * + * The computation is based as if copying to a new image with the + * given @cluster_size, which may differ from the cluster size in + * @in_bs; in fact, @in_bs might be something other than qcow2. + */ +uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *in_bs, + uint32_t cluster_size) +{ + uint64_t bitmaps_size = 0; + BdrvDirtyBitmap *bm; + size_t bitmap_dir_size = 0; + + FOR_EACH_DIRTY_BITMAP(in_bs, bm) { + if (bdrv_dirty_bitmap_get_persistence(bm)) { + const char *name = bdrv_dirty_bitmap_name(bm); + uint32_t granularity = bdrv_dirty_bitmap_granularity(bm); + uint64_t bmbytes = + get_bitmap_bytes_needed(bdrv_dirty_bitmap_size(bm), + granularity); + uint64_t bmclusters = DIV_ROUND_UP(bmbytes, cluster_size); + + /* Assume the entire bitmap is allocated */ + bitmaps_size += bmclusters * cluster_size; + /* Also reserve space for the bitmap table entries */ + bitmaps_size += ROUND_UP(bmclusters * BME_TABLE_ENTRY_SIZE, + cluster_size); + /* And space for contribution to bitmap directory size */ + bitmap_dir_size += calc_dir_entry_size(strlen(name), 0); + } + } + bitmaps_size += ROUND_UP(bitmap_dir_size, cluster_size); + + return bitmaps_size; +} diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c new file mode 100644 index 000000000..7444b9c4a --- /dev/null +++ b/block/qcow2-cache.c @@ -0,0 +1,458 @@ +/* + * L2/refcount table cache for the QCOW2 format + * + * Copyright (c) 2010 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qcow2.h" +#include "trace.h" + +typedef struct Qcow2CachedTable { + int64_t offset; + uint64_t lru_counter; + int ref; + bool dirty; +} Qcow2CachedTable; + +struct Qcow2Cache { + Qcow2CachedTable *entries; + struct Qcow2Cache *depends; + int size; + int table_size; + bool depends_on_flush; + void *table_array; + uint64_t lru_counter; + uint64_t cache_clean_lru_counter; +}; + +static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table) +{ + return (uint8_t *) c->table_array + (size_t) table * c->table_size; +} + +static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table) +{ + ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; + int idx = table_offset / c->table_size; + assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0); + return idx; +} + +static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c) +{ + if (c == s->refcount_block_cache) { + return "refcount block"; + } else if (c == s->l2_table_cache) { + return "L2 table"; + } else { + /* Do not abort, because this is not critical */ + return "unknown"; + } +} + +static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables) +{ +/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */ +#ifdef CONFIG_LINUX + void *t = qcow2_cache_get_table_addr(c, i); + int align = qemu_real_host_page_size; + size_t mem_size = (size_t) c->table_size * num_tables; + size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; + size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); + if (mem_size > offset && length > 0) { + madvise((uint8_t *) t + offset, length, MADV_DONTNEED); + } +#endif +} + +static inline bool can_clean_entry(Qcow2Cache *c, int i) +{ + Qcow2CachedTable *t = &c->entries[i]; + return t->ref == 0 && !t->dirty && t->offset != 0 && + t->lru_counter <= c->cache_clean_lru_counter; +} + +void qcow2_cache_clean_unused(Qcow2Cache *c) +{ + int i = 0; + while (i < c->size) { + int to_clean = 0; + + /* Skip the entries that we don't need to clean */ + while (i < c->size && !can_clean_entry(c, i)) { + i++; + } + + /* And count how many we can clean in a row */ + while (i < c->size && can_clean_entry(c, i)) { + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + i++; + to_clean++; + } + + if (to_clean > 0) { + qcow2_cache_table_release(c, i - to_clean, to_clean); + } + } + + c->cache_clean_lru_counter = c->lru_counter; +} + +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2Cache *c; + + assert(num_tables > 0); + assert(is_power_of_2(table_size)); + assert(table_size >= (1 << MIN_CLUSTER_BITS)); + assert(table_size <= s->cluster_size); + + c = g_new0(Qcow2Cache, 1); + c->size = num_tables; + c->table_size = table_size; + c->entries = g_try_new0(Qcow2CachedTable, num_tables); + c->table_array = qemu_try_blockalign(bs->file->bs, + (size_t) num_tables * c->table_size); + + if (!c->entries || !c->table_array) { + qemu_vfree(c->table_array); + g_free(c->entries); + g_free(c); + c = NULL; + } + + return c; +} + +int qcow2_cache_destroy(Qcow2Cache *c) +{ + int i; + + for (i = 0; i < c->size; i++) { + assert(c->entries[i].ref == 0); + } + + qemu_vfree(c->table_array); + g_free(c->entries); + g_free(c); + + return 0; +} + +static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) +{ + int ret; + + ret = qcow2_cache_flush(bs, c->depends); + if (ret < 0) { + return ret; + } + + c->depends = NULL; + c->depends_on_flush = false; + + return 0; +} + +static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) +{ + BDRVQcow2State *s = bs->opaque; + int ret = 0; + + if (!c->entries[i].dirty || !c->entries[i].offset) { + return 0; + } + + trace_qcow2_cache_entry_flush(qemu_coroutine_self(), + c == s->l2_table_cache, i); + + if (c->depends) { + ret = qcow2_cache_flush_dependency(bs, c); + } else if (c->depends_on_flush) { + ret = bdrv_flush(bs->file->bs); + if (ret >= 0) { + c->depends_on_flush = false; + } + } + + if (ret < 0) { + return ret; + } + + if (c == s->refcount_block_cache) { + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, + c->entries[i].offset, c->table_size, false); + } else if (c == s->l2_table_cache) { + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, + c->entries[i].offset, c->table_size, false); + } else { + ret = qcow2_pre_write_overlap_check(bs, 0, + c->entries[i].offset, c->table_size, false); + } + + if (ret < 0) { + return ret; + } + + if (c == s->refcount_block_cache) { + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); + } else if (c == s->l2_table_cache) { + BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); + } + + ret = bdrv_pwrite(bs->file, c->entries[i].offset, + qcow2_cache_get_table_addr(c, i), c->table_size); + if (ret < 0) { + return ret; + } + + c->entries[i].dirty = false; + + return 0; +} + +int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c) +{ + BDRVQcow2State *s = bs->opaque; + int result = 0; + int ret; + int i; + + trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); + + for (i = 0; i < c->size; i++) { + ret = qcow2_cache_entry_flush(bs, c, i); + if (ret < 0 && result != -ENOSPC) { + result = ret; + } + } + + return result; +} + +int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) +{ + int result = qcow2_cache_write(bs, c); + + if (result == 0) { + int ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + result = ret; + } + } + + return result; +} + +int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, + Qcow2Cache *dependency) +{ + int ret; + + if (dependency->depends) { + ret = qcow2_cache_flush_dependency(bs, dependency); + if (ret < 0) { + return ret; + } + } + + if (c->depends && (c->depends != dependency)) { + ret = qcow2_cache_flush_dependency(bs, c); + if (ret < 0) { + return ret; + } + } + + c->depends = dependency; + return 0; +} + +void qcow2_cache_depends_on_flush(Qcow2Cache *c) +{ + c->depends_on_flush = true; +} + +int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) +{ + int ret, i; + + ret = qcow2_cache_flush(bs, c); + if (ret < 0) { + return ret; + } + + for (i = 0; i < c->size; i++) { + assert(c->entries[i].ref == 0); + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + } + + qcow2_cache_table_release(c, 0, c->size); + + c->lru_counter = 0; + + return 0; +} + +static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, + uint64_t offset, void **table, bool read_from_disk) +{ + BDRVQcow2State *s = bs->opaque; + int i; + int ret; + int lookup_index; + uint64_t min_lru_counter = UINT64_MAX; + int min_lru_index = -1; + + assert(offset != 0); + + trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, + offset, read_from_disk); + + if (!QEMU_IS_ALIGNED(offset, c->table_size)) { + qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s " + "cache: Offset %#" PRIx64 " is unaligned", + qcow2_cache_get_name(s, c), offset); + return -EIO; + } + + /* Check if the table is already cached */ + i = lookup_index = (offset / c->table_size * 4) % c->size; + do { + const Qcow2CachedTable *t = &c->entries[i]; + if (t->offset == offset) { + goto found; + } + if (t->ref == 0 && t->lru_counter < min_lru_counter) { + min_lru_counter = t->lru_counter; + min_lru_index = i; + } + if (++i == c->size) { + i = 0; + } + } while (i != lookup_index); + + if (min_lru_index == -1) { + /* This can't happen in current synchronous code, but leave the check + * here as a reminder for whoever starts using AIO with the cache */ + abort(); + } + + /* Cache miss: write a table back and replace it */ + i = min_lru_index; + trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), + c == s->l2_table_cache, i); + + ret = qcow2_cache_entry_flush(bs, c, i); + if (ret < 0) { + return ret; + } + + trace_qcow2_cache_get_read(qemu_coroutine_self(), + c == s->l2_table_cache, i); + c->entries[i].offset = 0; + if (read_from_disk) { + if (c == s->l2_table_cache) { + BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); + } + + ret = bdrv_pread(bs->file, offset, + qcow2_cache_get_table_addr(c, i), + c->table_size); + if (ret < 0) { + return ret; + } + } + + c->entries[i].offset = offset; + + /* And return the right table */ +found: + c->entries[i].ref++; + *table = qcow2_cache_get_table_addr(c, i); + + trace_qcow2_cache_get_done(qemu_coroutine_self(), + c == s->l2_table_cache, i); + + return 0; +} + +int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table) +{ + return qcow2_cache_do_get(bs, c, offset, table, true); +} + +int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table) +{ + return qcow2_cache_do_get(bs, c, offset, table, false); +} + +void qcow2_cache_put(Qcow2Cache *c, void **table) +{ + int i = qcow2_cache_get_table_idx(c, *table); + + c->entries[i].ref--; + *table = NULL; + + if (c->entries[i].ref == 0) { + c->entries[i].lru_counter = ++c->lru_counter; + } + + assert(c->entries[i].ref >= 0); +} + +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) +{ + int i = qcow2_cache_get_table_idx(c, table); + assert(c->entries[i].offset != 0); + c->entries[i].dirty = true; +} + +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset) +{ + int i; + + for (i = 0; i < c->size; i++) { + if (c->entries[i].offset == offset) { + return qcow2_cache_get_table_addr(c, i); + } + } + return NULL; +} + +void qcow2_cache_discard(Qcow2Cache *c, void *table) +{ + int i = qcow2_cache_get_table_idx(c, table); + + assert(c->entries[i].ref == 0); + + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + c->entries[i].dirty = false; + + qcow2_cache_table_release(c, i, 1); +} diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c new file mode 100644 index 000000000..bd0597842 --- /dev/null +++ b/block/qcow2-cluster.c @@ -0,0 +1,2465 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include + +#include "qapi/error.h" +#include "qcow2.h" +#include "qemu/bswap.h" +#include "trace.h" + +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) +{ + BDRVQcow2State *s = bs->opaque; + int new_l1_size, i, ret; + + if (exact_size >= s->l1_size) { + return 0; + } + + new_l1_size = exact_size; + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); +#endif + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); + ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + + new_l1_size * L1E_SIZE, + (s->l1_size - new_l1_size) * L1E_SIZE, 0); + if (ret < 0) { + goto fail; + } + + ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); + for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { + continue; + } + qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size, QCOW2_DISCARD_ALWAYS); + s->l1_table[i] = 0; + } + return 0; + +fail: + /* + * If the write in the l1_table failed the image may contain a partially + * overwritten l1_table. In this case it would be better to clear the + * l1_table in memory to avoid possible image corruption. + */ + memset(s->l1_table + new_l1_size, 0, + (s->l1_size - new_l1_size) * L1E_SIZE); + return ret; +} + +int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, + bool exact_size) +{ + BDRVQcow2State *s = bs->opaque; + int new_l1_size2, ret, i; + uint64_t *new_l1_table; + int64_t old_l1_table_offset, old_l1_size; + int64_t new_l1_table_offset, new_l1_size; + uint8_t data[12]; + + if (min_size <= s->l1_size) + return 0; + + /* Do a sanity check on min_size before trying to calculate new_l1_size + * (this prevents overflows during the while loop for the calculation of + * new_l1_size) */ + if (min_size > INT_MAX / L1E_SIZE) { + return -EFBIG; + } + + if (exact_size) { + new_l1_size = min_size; + } else { + /* Bump size up to reduce the number of times we have to grow */ + new_l1_size = s->l1_size; + if (new_l1_size == 0) { + new_l1_size = 1; + } + while (min_size > new_l1_size) { + new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2); + } + } + + QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX); + if (new_l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) { + return -EFBIG; + } + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", + s->l1_size, new_l1_size); +#endif + + new_l1_size2 = L1E_SIZE * new_l1_size; + new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2); + if (new_l1_table == NULL) { + return -ENOMEM; + } + memset(new_l1_table, 0, new_l1_size2); + + if (s->l1_size) { + memcpy(new_l1_table, s->l1_table, s->l1_size * L1E_SIZE); + } + + /* write new table (align to cluster) */ + BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); + new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); + if (new_l1_table_offset < 0) { + qemu_vfree(new_l1_table); + return new_l1_table_offset; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* the L1 position has not yet been updated, so these clusters must + * indeed be completely free */ + ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, + new_l1_size2, false); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); + for(i = 0; i < s->l1_size; i++) + new_l1_table[i] = cpu_to_be64(new_l1_table[i]); + ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, + new_l1_table, new_l1_size2); + if (ret < 0) + goto fail; + for(i = 0; i < s->l1_size; i++) + new_l1_table[i] = be64_to_cpu(new_l1_table[i]); + + /* set new table */ + BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); + stl_be_p(data, new_l1_size); + stq_be_p(data + 4, new_l1_table_offset); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), + data, sizeof(data)); + if (ret < 0) { + goto fail; + } + qemu_vfree(s->l1_table); + old_l1_table_offset = s->l1_table_offset; + s->l1_table_offset = new_l1_table_offset; + s->l1_table = new_l1_table; + old_l1_size = s->l1_size; + s->l1_size = new_l1_size; + qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * L1E_SIZE, + QCOW2_DISCARD_OTHER); + return 0; + fail: + qemu_vfree(new_l1_table); + qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, + QCOW2_DISCARD_OTHER); + return ret; +} + +/* + * l2_load + * + * @bs: The BlockDriverState + * @offset: A guest offset, used to calculate what slice of the L2 + * table to load. + * @l2_offset: Offset to the L2 table in the image file. + * @l2_slice: Location to store the pointer to the L2 slice. + * + * Loads a L2 slice into memory (L2 slices are the parts of L2 tables + * that are loaded by the qcow2 cache). If the slice is in the cache, + * the cache is used; otherwise the L2 slice is loaded from the image + * file. + */ +static int l2_load(BlockDriverState *bs, uint64_t offset, + uint64_t l2_offset, uint64_t **l2_slice) +{ + BDRVQcow2State *s = bs->opaque; + int start_of_slice = l2_entry_size(s) * + (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); + + return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, + (void **)l2_slice); +} + +/* + * Writes an L1 entry to disk (note that depending on the alignment + * requirements this function may write more that just one entry in + * order to prevent bdrv_pwrite from performing a read-modify-write) + */ +int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) +{ + BDRVQcow2State *s = bs->opaque; + int l1_start_index; + int i, ret; + int bufsize = MAX(L1E_SIZE, + MIN(bs->file->bs->bl.request_alignment, s->cluster_size)); + int nentries = bufsize / L1E_SIZE; + g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries); + + if (buf == NULL) { + return -ENOMEM; + } + + l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries); + for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) { + buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); + } + + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, + s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false); + if (ret < 0) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + ret = bdrv_pwrite_sync(bs->file, + s->l1_table_offset + L1E_SIZE * l1_start_index, + buf, bufsize); + if (ret < 0) { + return ret; + } + + return 0; +} + +/* + * l2_allocate + * + * Allocate a new l2 entry in the file. If l1_index points to an already + * used entry in the L2 table (i.e. we are doing a copy on write for the L2 + * table) copy the contents of the old L2 table into the newly allocated one. + * Otherwise the new table is initialized with zeros. + * + */ + +static int l2_allocate(BlockDriverState *bs, int l1_index) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t old_l2_offset; + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; + int64_t l2_offset; + int ret; + + old_l2_offset = s->l1_table[l1_index]; + + trace_qcow2_l2_allocate(bs, l1_index); + + /* allocate a new l2 entry */ + + l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); + if (l2_offset < 0) { + ret = l2_offset; + goto fail; + } + + /* The offset must fit in the offset field of the L1 table entry */ + assert((l2_offset & L1E_OFFSET_MASK) == l2_offset); + + /* If we're allocating the table at offset 0 then something is wrong */ + if (l2_offset == 0) { + qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " + "allocation of L2 table at offset 0"); + ret = -EIO; + goto fail; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* allocate a new entry in the l2 cache */ + + slice_size2 = s->l2_slice_size * l2_entry_size(s); + n_slices = s->cluster_size / slice_size2; + + trace_qcow2_l2_allocate_get_empty(bs, l1_index); + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get_empty(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); + if (ret < 0) { + goto fail; + } + + if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { + /* if there was no old l2 table, clear the new slice */ + memset(l2_slice, 0, slice_size2); + } else { + uint64_t *old_slice; + uint64_t old_l2_slice_offset = + (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; + + /* if there was an old l2 table, read a slice from the disk */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); + ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, + (void **) &old_slice); + if (ret < 0) { + goto fail; + } + + memcpy(l2_slice, old_slice, slice_size2); + + qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); + } + + /* write the l2 slice to the file */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); + + trace_qcow2_l2_allocate_write_l2(bs, l1_index); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret < 0) { + goto fail; + } + + /* update the L1 entry */ + trace_qcow2_l2_allocate_write_l1(bs, l1_index); + s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; + ret = qcow2_write_l1_entry(bs, l1_index); + if (ret < 0) { + goto fail; + } + + trace_qcow2_l2_allocate_done(bs, l1_index, 0); + return 0; + +fail: + trace_qcow2_l2_allocate_done(bs, l1_index, ret); + if (l2_slice != NULL) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } + s->l1_table[l1_index] = old_l2_offset; + if (l2_offset > 0) { + qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), + QCOW2_DISCARD_ALWAYS); + } + return ret; +} + +/* + * For a given L2 entry, count the number of contiguous subclusters of + * the same type starting from @sc_from. Compressed clusters are + * treated as if they were divided into subclusters of size + * s->subcluster_size. + * + * Return the number of contiguous subclusters and set @type to the + * subcluster type. + * + * If the L2 entry is invalid return -errno and set @type to + * QCOW2_SUBCLUSTER_INVALID. + */ +static int qcow2_get_subcluster_range_type(BlockDriverState *bs, + uint64_t l2_entry, + uint64_t l2_bitmap, + unsigned sc_from, + QCow2SubclusterType *type) +{ + BDRVQcow2State *s = bs->opaque; + uint32_t val; + + *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); + + if (*type == QCOW2_SUBCLUSTER_INVALID) { + return -EINVAL; + } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { + return s->subclusters_per_cluster - sc_from; + } + + switch (*type) { + case QCOW2_SUBCLUSTER_NORMAL: + val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return cto32(val) - sc_from; + + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; + return cto32(val) - sc_from; + + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + val = ((l2_bitmap >> 32) | l2_bitmap) + & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return ctz32(val) - sc_from; + + default: + g_assert_not_reached(); + } +} + +/* + * Return the number of contiguous subclusters of the exact same type + * in a given L2 slice, starting from cluster @l2_index, subcluster + * @sc_index. Allocated subclusters are required to be contiguous in + * the image file. + * At most @nb_clusters are checked (note that this means clusters, + * not subclusters). + * Compressed clusters are always processed one by one but for the + * purpose of this count they are treated as if they were divided into + * subclusters of size s->subcluster_size. + * On failure return -errno and update @l2_index to point to the + * invalid entry. + */ +static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, + unsigned sc_index, uint64_t *l2_slice, + unsigned *l2_index) +{ + BDRVQcow2State *s = bs->opaque; + int i, count = 0; + bool check_offset = false; + uint64_t expected_offset = 0; + QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; + + assert(*l2_index + nb_clusters <= s->l2_slice_size); + + for (i = 0; i < nb_clusters; i++) { + unsigned first_sc = (i == 0) ? sc_index : 0; + uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); + uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); + int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, + first_sc, &type); + if (ret < 0) { + *l2_index += i; /* Point to the invalid entry */ + return -EIO; + } + if (i == 0) { + if (type == QCOW2_SUBCLUSTER_COMPRESSED) { + /* Compressed clusters are always processed one by one */ + return ret; + } + expected_type = type; + expected_offset = l2_entry & L2E_OFFSET_MASK; + check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); + } else if (type != expected_type) { + break; + } else if (check_offset) { + expected_offset += s->cluster_size; + if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { + break; + } + } + count += ret; + /* Stop if there are type changes before the end of the cluster */ + if (first_sc + ret < s->subclusters_per_cluster) { + break; + } + } + + return count; +} + +static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, + uint64_t src_cluster_offset, + unsigned offset_in_cluster, + QEMUIOVector *qiov) +{ + int ret; + + if (qiov->size == 0) { + return 0; + } + + BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); + + if (!bs->drv) { + return -ENOMEDIUM; + } + + /* Call .bdrv_co_readv() directly instead of using the public block-layer + * interface. This avoids double I/O throttling and request tracking, + * which can lead to deadlock when block layer copy-on-read is enabled. + */ + ret = bs->drv->bdrv_co_preadv_part(bs, + src_cluster_offset + offset_in_cluster, + qiov->size, qiov, 0, 0); + if (ret < 0) { + return ret; + } + + return 0; +} + +static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, + uint64_t cluster_offset, + unsigned offset_in_cluster, + QEMUIOVector *qiov) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + if (qiov->size == 0) { + return 0; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, + cluster_offset + offset_in_cluster, qiov->size, true); + if (ret < 0) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); + ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, + qiov->size, qiov, 0); + if (ret < 0) { + return ret; + } + + return 0; +} + + +/* + * get_host_offset + * + * For a given offset of the virtual disk find the equivalent host + * offset in the qcow2 file and store it in *host_offset. Neither + * offset needs to be aligned to a cluster boundary. + * + * If the cluster is unallocated then *host_offset will be 0. + * If the cluster is compressed then *host_offset will contain the + * complete compressed cluster descriptor. + * + * On entry, *bytes is the maximum number of contiguous bytes starting at + * offset that we are interested in. + * + * On exit, *bytes is the number of bytes starting at offset that have the same + * subcluster type and (if applicable) are stored contiguously in the image + * file. The subcluster type is stored in *subcluster_type. + * Compressed clusters are always processed one by one. + * + * Returns 0 on success, -errno in error cases. + */ +int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCow2SubclusterType *subcluster_type) +{ + BDRVQcow2State *s = bs->opaque; + unsigned int l2_index, sc_index; + uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; + int sc; + unsigned int offset_in_cluster; + uint64_t bytes_available, bytes_needed, nb_clusters; + QCow2SubclusterType type; + int ret; + + offset_in_cluster = offset_into_cluster(s, offset); + bytes_needed = (uint64_t) *bytes + offset_in_cluster; + + /* compute how many bytes there are between the start of the cluster + * containing offset and the end of the l2 slice that contains + * the entry pointing to it */ + bytes_available = + ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) + << s->cluster_bits; + + if (bytes_needed > bytes_available) { + bytes_needed = bytes_available; + } + + *host_offset = 0; + + /* seek to the l2 offset in the l1 table */ + + l1_index = offset_to_l1_index(s, offset); + if (l1_index >= s->l1_size) { + type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + goto out; + } + + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + if (!l2_offset) { + type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + goto out; + } + + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 + " unaligned (L1 index: %#" PRIx64 ")", + l2_offset, l1_index); + return -EIO; + } + + /* load the l2 slice in memory */ + + ret = l2_load(bs, offset, l2_offset, &l2_slice); + if (ret < 0) { + return ret; + } + + /* find the cluster offset for the given disk offset */ + + l2_index = offset_to_l2_slice_index(s, offset); + sc_index = offset_to_sc_index(s, offset); + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + + nb_clusters = size_to_clusters(s, bytes_needed); + /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned + * integers; the minimum cluster size is 512, so this assertion is always + * true */ + assert(nb_clusters <= INT_MAX); + + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); + if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { + qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" + " in pre-v3 image (L2 offset: %#" PRIx64 + ", L2 index: %#x)", l2_offset, l2_index); + ret = -EIO; + goto fail; + } + switch (type) { + case QCOW2_SUBCLUSTER_INVALID: + break; /* This is handled by count_contiguous_subclusters() below */ + case QCOW2_SUBCLUSTER_COMPRESSED: + if (has_data_file(bs)) { + qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " + "entry found in image with external data " + "file (L2 offset: %#" PRIx64 ", L2 index: " + "%#x)", l2_offset, l2_index); + ret = -EIO; + goto fail; + } + *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK; + break; + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + break; + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { + uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; + *host_offset = host_cluster_offset + offset_in_cluster; + if (offset_into_cluster(s, host_cluster_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, + "Cluster allocation offset %#" + PRIx64 " unaligned (L2 offset: %#" PRIx64 + ", L2 index: %#x)", host_cluster_offset, + l2_offset, l2_index); + ret = -EIO; + goto fail; + } + if (has_data_file(bs) && *host_offset != offset) { + qcow2_signal_corruption(bs, true, -1, -1, + "External data file host cluster offset %#" + PRIx64 " does not match guest cluster " + "offset: %#" PRIx64 + ", L2 index: %#x)", host_cluster_offset, + offset - offset_in_cluster, l2_index); + ret = -EIO; + goto fail; + } + break; + } + default: + abort(); + } + + sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, + l2_slice, &l2_index); + if (sc < 0) { + qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " + " (L2 offset: %#" PRIx64 ", L2 index: %#x)", + l2_offset, l2_index); + ret = -EIO; + goto fail; + } + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; + +out: + if (bytes_available > bytes_needed) { + bytes_available = bytes_needed; + } + + /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster; + * subtracting offset_in_cluster will therefore definitely yield something + * not exceeding UINT_MAX */ + assert(bytes_available - offset_in_cluster <= UINT_MAX); + *bytes = bytes_available - offset_in_cluster; + + *subcluster_type = type; + + return 0; + +fail: + qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); + return ret; +} + +/* + * get_cluster_table + * + * for a given disk offset, load (and allocate if needed) + * the appropriate slice of its l2 table. + * + * the cluster index in the l2 slice is given to the caller. + * + * Returns 0 on success, -errno in failure case + */ +static int get_cluster_table(BlockDriverState *bs, uint64_t offset, + uint64_t **new_l2_slice, + int *new_l2_index) +{ + BDRVQcow2State *s = bs->opaque; + unsigned int l2_index; + uint64_t l1_index, l2_offset; + uint64_t *l2_slice = NULL; + int ret; + + /* seek to the l2 offset in the l1 table */ + + l1_index = offset_to_l1_index(s, offset); + if (l1_index >= s->l1_size) { + ret = qcow2_grow_l1_table(bs, l1_index + 1, false); + if (ret < 0) { + return ret; + } + } + + assert(l1_index < s->l1_size); + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 + " unaligned (L1 index: %#" PRIx64 ")", + l2_offset, l1_index); + return -EIO; + } + + if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { + /* First allocate a new L2 table (and do COW if needed) */ + ret = l2_allocate(bs, l1_index); + if (ret < 0) { + return ret; + } + + /* Then decrease the refcount of the old table */ + if (l2_offset) { + qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), + QCOW2_DISCARD_OTHER); + } + + /* Get the offset of the newly-allocated l2 table */ + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + assert(offset_into_cluster(s, l2_offset) == 0); + } + + /* load the l2 slice in memory */ + ret = l2_load(bs, offset, l2_offset, &l2_slice); + if (ret < 0) { + return ret; + } + + /* find the cluster offset for the given disk offset */ + + l2_index = offset_to_l2_slice_index(s, offset); + + *new_l2_slice = l2_slice; + *new_l2_index = l2_index; + + return 0; +} + +/* + * alloc_compressed_cluster_offset + * + * For a given offset on the virtual disk, allocate a new compressed cluster + * and put the host offset of the cluster into *host_offset. If a cluster is + * already allocated at the offset, return an error. + * + * Return 0 on success and -errno in error cases + */ +int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int compressed_size, + uint64_t *host_offset) +{ + BDRVQcow2State *s = bs->opaque; + int l2_index, ret; + uint64_t *l2_slice; + int64_t cluster_offset; + int nb_csectors; + + if (has_data_file(bs)) { + return 0; + } + + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + /* Compression can't overwrite anything. Fail if the cluster was already + * allocated. */ + cluster_offset = get_l2_entry(s, l2_slice, l2_index); + if (cluster_offset & L2E_OFFSET_MASK) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + return -EIO; + } + + cluster_offset = qcow2_alloc_bytes(bs, compressed_size); + if (cluster_offset < 0) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + return cluster_offset; + } + + nb_csectors = + (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - + (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); + + /* The offset and size must fit in their fields of the L2 table entry */ + assert((cluster_offset & s->cluster_offset_mask) == cluster_offset); + assert((nb_csectors & s->csize_mask) == nb_csectors); + + cluster_offset |= QCOW_OFLAG_COMPRESSED | + ((uint64_t)nb_csectors << s->csize_shift); + + /* update L2 table */ + + /* compressed clusters never have the copied flag */ + + BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + set_l2_entry(s, l2_slice, l2_index, cluster_offset); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index, 0); + } + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + *host_offset = cluster_offset & s->cluster_offset_mask; + return 0; +} + +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2COWRegion *start = &m->cow_start; + Qcow2COWRegion *end = &m->cow_end; + unsigned buffer_size; + unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); + bool merge_reads; + uint8_t *start_buffer, *end_buffer; + QEMUIOVector qiov; + int ret; + + assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); + assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); + assert(start->offset + start->nb_bytes <= end->offset); + + if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { + return 0; + } + + /* If we have to read both the start and end COW regions and the + * middle region is not too large then perform just one read + * operation */ + merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; + if (merge_reads) { + buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; + } else { + /* If we have to do two reads, add some padding in the middle + * if necessary to make sure that the end region is optimally + * aligned. */ + size_t align = bdrv_opt_mem_align(bs); + assert(align > 0 && align <= UINT_MAX); + assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= + UINT_MAX - end->nb_bytes); + buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; + } + + /* Reserve a buffer large enough to store all the data that we're + * going to read */ + start_buffer = qemu_try_blockalign(bs, buffer_size); + if (start_buffer == NULL) { + return -ENOMEM; + } + /* The part of the buffer where the end region is located */ + end_buffer = start_buffer + buffer_size - end->nb_bytes; + + qemu_iovec_init(&qiov, 2 + (m->data_qiov ? + qemu_iovec_subvec_niov(m->data_qiov, + m->data_qiov_offset, + data_bytes) + : 0)); + + qemu_co_mutex_unlock(&s->lock); + /* First we read the existing data from both COW regions. We + * either read the whole region in one go, or the start and end + * regions separately. */ + if (merge_reads) { + qemu_iovec_add(&qiov, start_buffer, buffer_size); + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); + } else { + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); + if (ret < 0) { + goto fail; + } + + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); + } + if (ret < 0) { + goto fail; + } + + /* Encrypt the data if necessary before writing it */ + if (bs->encrypted) { + ret = qcow2_co_encrypt(bs, + m->alloc_offset + start->offset, + m->offset + start->offset, + start_buffer, start->nb_bytes); + if (ret < 0) { + goto fail; + } + + ret = qcow2_co_encrypt(bs, + m->alloc_offset + end->offset, + m->offset + end->offset, + end_buffer, end->nb_bytes); + if (ret < 0) { + goto fail; + } + } + + /* And now we can write everything. If we have the guest data we + * can write everything in one single operation */ + if (m->data_qiov) { + qemu_iovec_reset(&qiov); + if (start->nb_bytes) { + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + } + qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); + if (end->nb_bytes) { + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + } + /* NOTE: we have a write_aio blkdebug event here followed by + * a cow_write one in do_perform_cow_write(), but there's only + * one single I/O operation */ + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); + } else { + /* If there's no guest data then write both COW regions separately */ + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); + if (ret < 0) { + goto fail; + } + + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); + } + +fail: + qemu_co_mutex_lock(&s->lock); + + /* + * Before we update the L2 table to actually point to the new cluster, we + * need to be sure that the refcounts have been increased and COW was + * handled. + */ + if (ret == 0) { + qcow2_cache_depends_on_flush(s->l2_table_cache); + } + + qemu_vfree(start_buffer); + qemu_iovec_destroy(&qiov); + return ret; +} + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcow2State *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, *l2_slice; + uint64_t cluster_offset = m->alloc_offset; + + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + assert(m->nb_clusters > 0); + + old_cluster = g_try_new(uint64_t, m->nb_clusters); + if (old_cluster == NULL) { + ret = -ENOMEM; + goto err; + } + + /* copy content of unmodified sectors */ + ret = perform_cow(bs, m); + if (ret < 0) { + goto err; + } + + /* Update L2 table. */ + if (s->use_lazy_refcounts) { + qcow2_mark_dirty(bs); + } + if (qcow2_need_accurate_refcounts(s)) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); + } + + ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); + if (ret < 0) { + goto err; + } + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + + assert(l2_index + m->nb_clusters <= s->l2_slice_size); + assert(m->cow_end.offset + m->cow_end.nb_bytes <= + m->nb_clusters << s->cluster_bits); + for (i = 0; i < m->nb_clusters; i++) { + uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits); + /* if two concurrent writes happen to the same unallocated cluster + * each write allocates separate cluster and writes data concurrently. + * The first one to complete updates l2 table with pointer to its + * cluster the second one has to do RMW (which is done above by + * perform_cow()), update l2 table with its cluster pointer and free + * old cluster. This is what this loop does */ + if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { + old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); + } + + /* The offset must fit in the offset field of the L2 table entry */ + assert((offset & L2E_OFFSET_MASK) == offset); + + set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); + + /* Update bitmap with the subclusters that were just written */ + if (has_subclusters(s) && !m->prealloc) { + uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + unsigned written_from = m->cow_start.offset; + unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes; + int first_sc, last_sc; + /* Narrow written_from and written_to down to the current cluster */ + written_from = MAX(written_from, i << s->cluster_bits); + written_to = MIN(written_to, (i + 1) << s->cluster_bits); + assert(written_from < written_to); + first_sc = offset_to_sc_index(s, written_from); + last_sc = offset_to_sc_index(s, written_to - 1); + l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); + l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); + set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); + } + } + + + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + /* + * If this was a COW, we need to decrease the refcount of the old cluster. + * + * Don't discard clusters that reach a refcount of 0 (e.g. compressed + * clusters), the next write will reuse them anyway. + */ + if (!m->keep_old_clusters && j != 0) { + for (i = 0; i < j; i++) { + qcow2_free_any_cluster(bs, old_cluster[i], QCOW2_DISCARD_NEVER); + } + } + + ret = 0; +err: + g_free(old_cluster); + return ret; + } + +/** + * Frees the allocated clusters because the request failed and they won't + * actually be linked. + */ +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcow2State *s = bs->opaque; + if (!has_data_file(bs) && !m->keep_old_clusters) { + qcow2_free_clusters(bs, m->alloc_offset, + m->nb_clusters << s->cluster_bits, + QCOW2_DISCARD_NEVER); + } +} + +/* + * For a given write request, create a new QCowL2Meta structure, add + * it to @m and the BDRVQcow2State.cluster_allocs list. If the write + * request does not need copy-on-write or changes to the L2 metadata + * then this function does nothing. + * + * @host_cluster_offset points to the beginning of the first cluster. + * + * @guest_offset and @bytes indicate the offset and length of the + * request. + * + * @l2_slice contains the L2 entries of all clusters involved in this + * write request. + * + * If @keep_old is true it means that the clusters were already + * allocated and will be overwritten. If false then the clusters are + * new and we have to decrease the reference count of the old ones. + * + * Returns 0 on success, -errno on failure. + */ +static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, + uint64_t guest_offset, unsigned bytes, + uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) +{ + BDRVQcow2State *s = bs->opaque; + int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); + uint64_t l2_entry, l2_bitmap; + unsigned cow_start_from, cow_end_to; + unsigned cow_start_to = offset_into_cluster(s, guest_offset); + unsigned cow_end_from = cow_start_to + bytes; + unsigned nb_clusters = size_to_clusters(s, cow_end_from); + QCowL2Meta *old_m = *m; + QCow2SubclusterType type; + int i; + bool skip_cow = keep_old; + + assert(nb_clusters <= s->l2_slice_size - l2_index); + + /* Check the type of all affected subclusters */ + for (i = 0; i < nb_clusters; i++) { + l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + if (skip_cow) { + unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); + unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); + int first_sc = offset_to_sc_index(s, write_from); + int last_sc = offset_to_sc_index(s, write_to - 1); + int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, + first_sc, &type); + /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ + if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { + skip_cow = false; + } + } else { + /* If we can't skip the cow we can still look for invalid entries */ + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); + } + if (type == QCOW2_SUBCLUSTER_INVALID) { + int l1_index = offset_to_l1_index(s, guest_offset); + uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " + "entry found (L2 offset: %#" PRIx64 + ", L2 index: %#x)", + l2_offset, l2_index + i); + return -EIO; + } + } + + if (skip_cow) { + return 0; + } + + /* Get the L2 entry of the first cluster */ + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + sc_index = offset_to_sc_index(s, guest_offset); + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); + + if (!keep_old) { + switch (type) { + case QCOW2_SUBCLUSTER_COMPRESSED: + cow_start_from = 0; + break; + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + if (has_subclusters(s)) { + /* Skip all leading zero and unallocated subclusters */ + uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; + cow_start_from = + MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; + } else { + cow_start_from = 0; + } + break; + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + cow_start_from = sc_index << s->subcluster_bits; + break; + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_SUBCLUSTER_NORMAL: + cow_start_from = cow_start_to; + break; + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_start_from = sc_index << s->subcluster_bits; + break; + default: + g_assert_not_reached(); + } + } + + /* Get the L2 entry of the last cluster */ + l2_index += nb_clusters - 1; + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); + + if (!keep_old) { + switch (type) { + case QCOW2_SUBCLUSTER_COMPRESSED: + cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); + break; + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); + if (has_subclusters(s)) { + /* Skip all trailing zero and unallocated subclusters */ + uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; + cow_end_to -= + MIN(s->subclusters_per_cluster - sc_index - 1, + clz32(alloc_bitmap)) << s->subcluster_bits; + } + break; + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); + break; + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_SUBCLUSTER_NORMAL: + cow_end_to = cow_end_from; + break; + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); + break; + default: + g_assert_not_reached(); + } + } + + *m = g_malloc0(sizeof(**m)); + **m = (QCowL2Meta) { + .next = old_m, + + .alloc_offset = host_cluster_offset, + .offset = start_of_cluster(s, guest_offset), + .nb_clusters = nb_clusters, + + .keep_old_clusters = keep_old, + + .cow_start = { + .offset = cow_start_from, + .nb_bytes = cow_start_to - cow_start_from, + }, + .cow_end = { + .offset = cow_end_from, + .nb_bytes = cow_end_to - cow_end_from, + }, + }; + + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); + + return 0; +} + +/* + * Returns true if writing to the cluster pointed to by @l2_entry + * requires a new allocation (that is, if the cluster is unallocated + * or has refcount > 1 and therefore cannot be written in-place). + */ +static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) +{ + switch (qcow2_get_cluster_type(bs, l2_entry)) { + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (l2_entry & QCOW_OFLAG_COPIED) { + return false; + } + /* fallthrough */ + case QCOW2_CLUSTER_UNALLOCATED: + case QCOW2_CLUSTER_COMPRESSED: + case QCOW2_CLUSTER_ZERO_PLAIN: + return true; + default: + abort(); + } +} + +/* + * Returns the number of contiguous clusters that can be written to + * using one single write request, starting from @l2_index. + * At most @nb_clusters are checked. + * + * If @new_alloc is true this counts clusters that are either + * unallocated, or allocated but with refcount > 1 (so they need to be + * newly allocated and COWed). + * + * If @new_alloc is false this counts clusters that are already + * allocated and can be overwritten in-place (this includes clusters + * of type QCOW2_CLUSTER_ZERO_ALLOC). + */ +static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, + uint64_t *l2_slice, int l2_index, + bool new_alloc) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); + uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; + int i; + + for (i = 0; i < nb_clusters; i++) { + l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { + break; + } + if (!new_alloc) { + if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { + break; + } + expected_offset += s->cluster_size; + } + } + + assert(i <= nb_clusters); + return i; +} + +/* + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. + * + * Returns: + * 0 if there was no dependency. *cur_bytes indicates the number of + * bytes from guest_offset that can be read before the next + * dependency must be processed (or the request is complete) + * + * -EAGAIN if we had to wait for another request, previously gathered + * information on cluster allocation may be invalid now. The caller + * must start over anyway, so consider *cur_bytes undefined. + */ +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *cur_bytes, QCowL2Meta **m) +{ + BDRVQcow2State *s = bs->opaque; + QCowL2Meta *old_alloc; + uint64_t bytes = *cur_bytes; + + QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { + + uint64_t start = guest_offset; + uint64_t end = start + bytes; + uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); + uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); + + if (end <= old_start || start >= old_end) { + /* No intersection */ + } else { + if (start < old_start) { + /* Stop at the start of a running allocation */ + bytes = old_start - start; + } else { + bytes = 0; + } + + /* Stop if already an l2meta exists. After yielding, it wouldn't + * be valid any more, so we'd have to clean up the old L2Metas + * and deal with requests depending on them before starting to + * gather new ones. Not worth the trouble. */ + if (bytes == 0 && *m) { + *cur_bytes = 0; + return 0; + } + + if (bytes == 0) { + /* Wait for the dependency to complete. We need to recheck + * the free/allocated clusters when we continue. */ + qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); + return -EAGAIN; + } + } + } + + /* Make sure that existing clusters and new allocations are only used up to + * the next dependency if we shortened the request above */ + *cur_bytes = bytes; + + return 0; +} + +/* + * Checks how many already allocated clusters that don't require a new + * allocation there are at the given guest_offset (up to *bytes). + * If *host_offset is not INV_OFFSET, only physically contiguous clusters + * beginning at this host offset are counted. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no allocated clusters are available at the given offset. + * *bytes is normally unchanged. It is set to 0 if the cluster + * is allocated and can be overwritten in-place but doesn't have + * the right physical offset. + * + * 1: if allocated clusters that can be overwritten in place are + * available at the requested offset. *bytes may have decreased + * and describes the length of the area that can be written to. + * + * -errno: in error cases + */ +static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcow2State *s = bs->opaque; + int l2_index; + uint64_t l2_entry, cluster_offset; + uint64_t *l2_slice; + uint64_t nb_clusters; + unsigned int keep_clusters; + int ret; + + trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + + assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset) + == offset_into_cluster(s, *host_offset)); + + /* + * Calculate the number of clusters to look for. We stop at L2 slice + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); + /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ + nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + l2_entry = get_l2_entry(s, l2_slice, l2_index); + cluster_offset = l2_entry & L2E_OFFSET_MASK; + + if (!cluster_needs_new_alloc(bs, l2_entry)) { + if (offset_into_cluster(s, cluster_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " + "%#" PRIx64 " unaligned (guest offset: %#" + PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? + "Preallocated zero" : "Data", + cluster_offset, guest_offset); + ret = -EIO; + goto out; + } + + /* If a specific host_offset is required, check it */ + if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { + *bytes = 0; + ret = 0; + goto out; + } + + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, + l2_index, false); + assert(keep_clusters <= nb_clusters); + + *bytes = MIN(*bytes, + keep_clusters * s->cluster_size + - offset_into_cluster(s, guest_offset)); + assert(*bytes != 0); + + ret = calculate_l2_meta(bs, cluster_offset, guest_offset, + *bytes, l2_slice, m, true); + if (ret < 0) { + goto out; + } + + ret = 1; + } else { + ret = 0; + } + + /* Cleanup */ +out: + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + /* Only return a host offset if we actually made progress. Otherwise we + * would make requirements for handle_alloc() that it can't fulfill */ + if (ret > 0) { + *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); + } + + return ret; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is not INV_OFFSET, it specifies the offset in the image file + * at which the new clusters must start. *nb_clusters can be 0 on return in + * this case if the cluster at host_offset is already in use. If *host_offset + * is INV_OFFSET, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + + if (has_data_file(bs)) { + assert(*host_offset == INV_OFFSET || + *host_offset == start_of_cluster(s, guest_offset)); + *host_offset = start_of_cluster(s, guest_offset); + return 0; + } + + /* Allocate new clusters */ + trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); + if (*host_offset == INV_OFFSET) { + int64_t cluster_offset = + qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); + if (cluster_offset < 0) { + return cluster_offset; + } + *host_offset = cluster_offset; + return 0; + } else { + int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + if (ret < 0) { + return ret; + } + *nb_clusters = ret; + return 0; + } +} + +/* + * Allocates new clusters for an area that is either still unallocated or + * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, + * clusters are only allocated if the new allocation can match the specified + * host offset. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no clusters could be allocated. *bytes is set to 0, + * *host_offset is left unchanged. + * + * 1: if new clusters were allocated. *bytes may be decreased if the + * new allocation doesn't cover all of the requested area. + * *host_offset is updated to contain the host offset of the first + * newly allocated cluster. + * + * -errno: in error cases + */ +static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcow2State *s = bs->opaque; + int l2_index; + uint64_t *l2_slice; + uint64_t nb_clusters; + int ret; + + uint64_t alloc_cluster_offset; + + trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + assert(*bytes > 0); + + /* + * Calculate the number of clusters to look for. We stop at L2 slice + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); + /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ + nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + nb_clusters = count_single_write_clusters(bs, nb_clusters, + l2_slice, l2_index, true); + + /* This function is only called when there were no non-COW clusters, so if + * we can't find any unallocated or COW clusters either, something is + * wrong with our code. */ + assert(nb_clusters > 0); + + /* Allocate at a given offset in the image file */ + alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : + start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto out; + } + + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + ret = 0; + goto out; + } + + assert(alloc_cluster_offset != INV_OFFSET); + + /* + * Save info needed for meta data update. + * + * requested_bytes: Number of bytes from the start of the first + * newly allocated cluster to the end of the (possibly shortened + * before) write request. + * + * avail_bytes: Number of bytes from the start of the first + * newly allocated to the end of the last newly allocated cluster. + * + * nb_bytes: The number of bytes from the start of the first + * newly allocated cluster to the end of the area that the write + * request actually writes to (excluding COW at the end) + */ + uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); + int avail_bytes = nb_clusters << s->cluster_bits; + int nb_bytes = MIN(requested_bytes, avail_bytes); + + *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); + *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); + assert(*bytes != 0); + + ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, + l2_slice, m, false); + if (ret < 0) { + goto out; + } + + ret = 1; + +out: + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + return ret; +} + +/* + * For a given area on the virtual disk defined by @offset and @bytes, + * find the corresponding area on the qcow2 image, allocating new + * clusters (or subclusters) if necessary. The result can span a + * combination of allocated and previously unallocated clusters. + * + * Note that offset may not be cluster aligned. In this case, the returned + * *host_offset points to exact byte referenced by offset and therefore + * isn't cluster aligned as well. + * + * On return, @host_offset is set to the beginning of the requested + * area. This area is guaranteed to be contiguous on the qcow2 file + * but it can be smaller than initially requested. In this case @bytes + * is updated with the actual size. + * + * If any clusters or subclusters were allocated then @m contains a + * list with the information of all the affected regions. Note that + * this can happen regardless of whether this function succeeds or + * not. The caller is responsible for updating the L2 metadata of the + * allocated clusters (on success) or freeing them (on failure), and + * for clearing the contents of @m afterwards in both cases. + * + * If the request conflicts with another write request in flight, the coroutine + * is queued and will be reentered when the dependency has completed. + * + * Return 0 on success and -errno in error cases + */ +int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCowL2Meta **m) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t start, remaining; + uint64_t cluster_offset; + uint64_t cur_bytes; + int ret; + + trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); + +again: + start = offset; + remaining = *bytes; + cluster_offset = INV_OFFSET; + *host_offset = INV_OFFSET; + cur_bytes = 0; + *m = NULL; + + while (true) { + + if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) { + *host_offset = cluster_offset; + } + + assert(remaining >= cur_bytes); + + start += cur_bytes; + remaining -= cur_bytes; + + if (cluster_offset != INV_OFFSET) { + cluster_offset += cur_bytes; + } + + if (remaining == 0) { + break; + } + + cur_bytes = remaining; + + /* + * Now start gathering as many contiguous clusters as possible: + * + * 1. Check for overlaps with in-flight allocations + * + * a) Overlap not in the first cluster -> shorten this request and + * let the caller handle the rest in its next loop iteration. + * + * b) Real overlaps of two requests. Yield and restart the search + * for contiguous clusters (the situation could have changed + * while we were sleeping) + * + * c) TODO: Request starts in the same cluster as the in-flight + * allocation ends. Shorten the COW of the in-fight allocation, + * set cluster_offset to write to the same cluster and set up + * the right synchronisation between the in-flight request and + * the new one. + */ + ret = handle_dependencies(bs, start, &cur_bytes, m); + if (ret == -EAGAIN) { + /* Currently handle_dependencies() doesn't yield if we already had + * an allocation. If it did, we would have to clean up the L2Meta + * structs before starting over. */ + assert(*m == NULL); + goto again; + } else if (ret < 0) { + return ret; + } else if (cur_bytes == 0) { + break; + } else { + /* handle_dependencies() may have decreased cur_bytes (shortened + * the allocations below) so that the next dependency is processed + * correctly during the next loop iteration. */ + } + + /* + * 2. Count contiguous COPIED clusters. + */ + ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else if (cur_bytes == 0) { + break; + } + + /* + * 3. If the request still hasn't completed, allocate new clusters, + * considering any cluster_offset of steps 1c or 2. + */ + ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else { + assert(cur_bytes == 0); + break; + } + } + + *bytes -= remaining; + assert(*bytes > 0); + assert(*host_offset != INV_OFFSET); + assert(offset_into_cluster(s, *host_offset) == + offset_into_cluster(s, offset)); + + return 0; +} + +/* + * This discards as many clusters of nb_clusters as possible at once (i.e. + * all clusters in the same L2 slice) and returns the number of discarded + * clusters. + */ +static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, + enum qcow2_discard_type type, bool full_discard) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_slice; + int l2_index; + int ret; + int i; + + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); + assert(nb_clusters <= INT_MAX); + + for (i = 0; i < nb_clusters; i++) { + uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + uint64_t new_l2_entry = old_l2_entry; + uint64_t new_l2_bitmap = old_l2_bitmap; + QCow2ClusterType cluster_type = + qcow2_get_cluster_type(bs, old_l2_entry); + + /* + * If full_discard is true, the cluster should not read back as zeroes, + * but rather fall through to the backing file. + * + * If full_discard is false, make sure that a discarded area reads back + * as zeroes for v3 images (we cannot do it for v2 without actually + * writing a zero-filled buffer). We can skip the operation if the + * cluster is already marked as zero, or if it's unallocated and we + * don't have a backing file. + * + * TODO We might want to use bdrv_block_status(bs) here, but we're + * holding s->lock, so that doesn't work today. + */ + if (full_discard) { + new_l2_entry = new_l2_bitmap = 0; + } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { + if (has_subclusters(s)) { + new_l2_entry = 0; + new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; + } else { + new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; + } + } + + if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { + continue; + } + + /* First remove L2 entries */ + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); + } + /* Then decrease the refcount */ + qcow2_free_any_cluster(bs, old_l2_entry, type); + } + + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + return nb_clusters; +} + +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t end_offset = offset + bytes; + uint64_t nb_clusters; + int64_t cleared; + int ret; + + /* Caller must pass aligned values, except at image end */ + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + end_offset == bs->total_sectors << BDRV_SECTOR_BITS); + + nb_clusters = size_to_clusters(s, bytes); + + s->cache_discards = true; + + /* Each L2 slice is handled by its own loop iteration */ + while (nb_clusters > 0) { + cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, + full_discard); + if (cleared < 0) { + ret = cleared; + goto fail; + } + + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); + } + + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; +} + +/* + * This zeroes as many clusters of nb_clusters as possible at once (i.e. + * all clusters in the same L2 slice) and returns the number of zeroed + * clusters. + */ +static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, int flags) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_slice; + int l2_index; + int ret; + int i; + + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); + assert(nb_clusters <= INT_MAX); + + for (i = 0; i < nb_clusters; i++) { + uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); + bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || + ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); + uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; + uint64_t new_l2_bitmap = old_l2_bitmap; + + if (has_subclusters(s)) { + new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; + } else { + new_l2_entry |= QCOW_OFLAG_ZERO; + } + + if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { + continue; + } + + /* First update L2 entries */ + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); + } + + /* Then decrease the refcount */ + if (unmap) { + qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST); + } + } + + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + return nb_clusters; +} + +static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, + unsigned nb_subclusters) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_slice; + uint64_t old_l2_bitmap, l2_bitmap; + int l2_index, ret, sc = offset_to_sc_index(s, offset); + + /* For full clusters use zero_in_l2_slice() instead */ + assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); + assert(sc + nb_subclusters <= s->subclusters_per_cluster); + assert(offset_into_subcluster(s, offset) == 0); + + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { + case QCOW2_CLUSTER_COMPRESSED: + ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ + goto out; + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_UNALLOCATED: + break; + default: + g_assert_not_reached(); + } + + old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + + l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); + l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); + + if (old_l2_bitmap != l2_bitmap) { + set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + } + + ret = 0; +out: + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + return ret; +} + +int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t end_offset = offset + bytes; + uint64_t nb_clusters; + unsigned head, tail; + int64_t cleared; + int ret; + + /* If we have to stay in sync with an external data file, zero out + * s->data_file first. */ + if (data_file_is_raw(bs)) { + assert(has_data_file(bs)); + ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags); + if (ret < 0) { + return ret; + } + } + + /* Caller must pass aligned values, except at image end */ + assert(offset_into_subcluster(s, offset) == 0); + assert(offset_into_subcluster(s, end_offset) == 0 || + end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); + + /* + * The zero flag is only supported by version 3 and newer. However, if we + * have no backing file, we can resort to discard in version 2. + */ + if (s->qcow_version < 3) { + if (!bs->backing) { + return qcow2_cluster_discard(bs, offset, bytes, + QCOW2_DISCARD_REQUEST, false); + } + return -ENOTSUP; + } + + head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; + offset += head; + + tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : + end_offset - MAX(offset, start_of_cluster(s, end_offset)); + end_offset -= tail; + + s->cache_discards = true; + + if (head) { + ret = zero_l2_subclusters(bs, offset - head, + size_to_subclusters(s, head)); + if (ret < 0) { + goto fail; + } + } + + /* Each L2 slice is handled by its own loop iteration */ + nb_clusters = size_to_clusters(s, end_offset - offset); + + while (nb_clusters > 0) { + cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); + if (cleared < 0) { + ret = cleared; + goto fail; + } + + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); + } + + if (tail) { + ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); + if (ret < 0) { + goto fail; + } + } + + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; +} + +/* + * Expands all zero clusters in a specific L1 table (or deallocates them, for + * non-backed non-pre-allocated zero clusters). + * + * l1_entries and *visited_l1_entries are used to keep track of progress for + * status_cb(). l1_entries contains the total number of L1 entries and + * *visited_l1_entries counts all visited L1 entries. + */ +static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, + int l1_size, int64_t *visited_l1_entries, + int64_t l1_entries, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) +{ + BDRVQcow2State *s = bs->opaque; + bool is_active_l1 = (l1_table == s->l1_table); + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; + int ret; + int i, j; + + /* qcow2_downgrade() is not allowed in images with subclusters */ + assert(!has_subclusters(s)); + + slice_size2 = s->l2_slice_size * l2_entry_size(s); + n_slices = s->cluster_size / slice_size2; + + if (!is_active_l1) { + /* inactive L2 tables require a buffer to be stored in when loading + * them from disk */ + l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); + if (l2_slice == NULL) { + return -ENOMEM; + } + } + + for (i = 0; i < l1_size; i++) { + uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; + uint64_t l2_refcount; + + if (!l2_offset) { + /* unallocated */ + (*visited_l1_entries)++; + if (status_cb) { + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); + } + continue; + } + + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" + PRIx64 " unaligned (L1 index: %#x)", + l2_offset, i); + ret = -EIO; + goto fail; + } + + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &l2_refcount); + if (ret < 0) { + goto fail; + } + + for (slice = 0; slice < n_slices; slice++) { + uint64_t slice_offset = l2_offset + slice * slice_size2; + bool l2_dirty = false; + if (is_active_l1) { + /* get active L2 tables from cache */ + ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, + (void **)&l2_slice); + } else { + /* load inactive L2 tables from disk */ + ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2); + } + if (ret < 0) { + goto fail; + } + + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t l2_entry = get_l2_entry(s, l2_slice, j); + int64_t offset = l2_entry & L2E_OFFSET_MASK; + QCow2ClusterType cluster_type = + qcow2_get_cluster_type(bs, l2_entry); + + if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && + cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { + continue; + } + + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + if (!bs->backing) { + /* + * not backed; therefore we can simply deallocate the + * cluster. No need to call set_l2_bitmap(), this + * function doesn't support images with subclusters. + */ + set_l2_entry(s, l2_slice, j, 0); + l2_dirty = true; + continue; + } + + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + ret = offset; + goto fail; + } + + /* The offset must fit in the offset field */ + assert((offset & L2E_OFFSET_MASK) == offset); + + if (l2_refcount > 1) { + /* For shared L2 tables, set the refcount accordingly + * (it is already 1 and needs to be l2_refcount) */ + ret = qcow2_update_cluster_refcount( + bs, offset >> s->cluster_bits, + refcount_diff(1, l2_refcount), false, + QCOW2_DISCARD_OTHER); + if (ret < 0) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } + } + } + + if (offset_into_cluster(s, offset)) { + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, + "Cluster allocation offset " + "%#" PRIx64 " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", offset, + l2_offset, l2_index); + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + ret = -EIO; + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, offset, + s->cluster_size, true); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; + } + + ret = bdrv_pwrite_zeroes(s->data_file, offset, + s->cluster_size, 0); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; + } + + if (l2_refcount == 1) { + set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); + } else { + set_l2_entry(s, l2_slice, j, offset); + } + /* + * No need to call set_l2_bitmap() after set_l2_entry() because + * this function doesn't support images with subclusters. + */ + l2_dirty = true; + } + + if (is_active_l1) { + if (l2_dirty) { + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_depends_on_flush(s->l2_table_cache); + } + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } else { + if (l2_dirty) { + ret = qcow2_pre_write_overlap_check( + bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, + slice_offset, slice_size2, false); + if (ret < 0) { + goto fail; + } + + ret = bdrv_pwrite(bs->file, slice_offset, + l2_slice, slice_size2); + if (ret < 0) { + goto fail; + } + } + } + } + + (*visited_l1_entries)++; + if (status_cb) { + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); + } + } + + ret = 0; + +fail: + if (l2_slice) { + if (!is_active_l1) { + qemu_vfree(l2_slice); + } else { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } + } + return ret; +} + +/* + * For backed images, expands all zero clusters on the image. For non-backed + * images, deallocates all non-pre-allocated zero clusters (and claims the + * allocation for pre-allocated ones). This is important for downgrading to a + * qcow2 version which doesn't yet support metadata zero clusters. + */ +int qcow2_expand_zero_clusters(BlockDriverState *bs, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l1_table = NULL; + int64_t l1_entries = 0, visited_l1_entries = 0; + int ret; + int i, j; + + if (status_cb) { + l1_entries = s->l1_size; + for (i = 0; i < s->nb_snapshots; i++) { + l1_entries += s->snapshots[i].l1_size; + } + } + + ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, + &visited_l1_entries, l1_entries, + status_cb, cb_opaque); + if (ret < 0) { + goto fail; + } + + /* Inactive L1 tables may point to active L2 tables - therefore it is + * necessary to flush the L2 table cache before trying to access the L2 + * tables pointed to by inactive L1 entries (else we might try to expand + * zero clusters that have already been expanded); furthermore, it is also + * necessary to empty the L2 table cache, since it may contain tables which + * are now going to be modified directly on disk, bypassing the cache. + * qcow2_cache_empty() does both for us. */ + ret = qcow2_cache_empty(bs, s->l2_table_cache); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < s->nb_snapshots; i++) { + int l1_size2; + uint64_t *new_l1_table; + Error *local_err = NULL; + + ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset, + s->snapshots[i].l1_size, L1E_SIZE, + QCOW_MAX_L1_SIZE, "Snapshot L1 table", + &local_err); + if (ret < 0) { + error_report_err(local_err); + goto fail; + } + + l1_size2 = s->snapshots[i].l1_size * L1E_SIZE; + new_l1_table = g_try_realloc(l1_table, l1_size2); + + if (!new_l1_table) { + ret = -ENOMEM; + goto fail; + } + + l1_table = new_l1_table; + + ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, + l1_table, l1_size2); + if (ret < 0) { + goto fail; + } + + for (j = 0; j < s->snapshots[i].l1_size; j++) { + be64_to_cpus(&l1_table[j]); + } + + ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, + &visited_l1_entries, l1_entries, + status_cb, cb_opaque); + if (ret < 0) { + goto fail; + } + } + + ret = 0; + +fail: + g_free(l1_table); + return ret; +} diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c new file mode 100644 index 000000000..8e649b008 --- /dev/null +++ b/block/qcow2-refcount.c @@ -0,0 +1,3497 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qcow2.h" +#include "qemu/range.h" +#include "qemu/bswap.h" +#include "qemu/cutils.h" +#include "trace.h" + +static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, + uint64_t max); +static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, + int64_t offset, int64_t length, uint64_t addend, + bool decrease, enum qcow2_discard_type type); + +static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index); + +static void set_refcount_ro0(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro1(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro2(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro3(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro4(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro5(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro6(void *refcount_array, uint64_t index, + uint64_t value); + + +static Qcow2GetRefcountFunc *const get_refcount_funcs[] = { + &get_refcount_ro0, + &get_refcount_ro1, + &get_refcount_ro2, + &get_refcount_ro3, + &get_refcount_ro4, + &get_refcount_ro5, + &get_refcount_ro6 +}; + +static Qcow2SetRefcountFunc *const set_refcount_funcs[] = { + &set_refcount_ro0, + &set_refcount_ro1, + &set_refcount_ro2, + &set_refcount_ro3, + &set_refcount_ro4, + &set_refcount_ro5, + &set_refcount_ro6 +}; + + +/*********************************************************/ +/* refcount handling */ + +static void update_max_refcount_table_index(BDRVQcow2State *s) +{ + unsigned i = s->refcount_table_size - 1; + while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) { + i--; + } + /* Set s->max_refcount_table_index to the index of the last used entry */ + s->max_refcount_table_index = i; +} + +int qcow2_refcount_init(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + unsigned int refcount_table_size2, i; + int ret; + + assert(s->refcount_order >= 0 && s->refcount_order <= 6); + + s->get_refcount = get_refcount_funcs[s->refcount_order]; + s->set_refcount = set_refcount_funcs[s->refcount_order]; + + assert(s->refcount_table_size <= INT_MAX / REFTABLE_ENTRY_SIZE); + refcount_table_size2 = s->refcount_table_size * REFTABLE_ENTRY_SIZE; + s->refcount_table = g_try_malloc(refcount_table_size2); + + if (s->refcount_table_size > 0) { + if (s->refcount_table == NULL) { + ret = -ENOMEM; + goto fail; + } + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); + ret = bdrv_pread(bs->file, s->refcount_table_offset, + s->refcount_table, refcount_table_size2); + if (ret < 0) { + goto fail; + } + for(i = 0; i < s->refcount_table_size; i++) + be64_to_cpus(&s->refcount_table[i]); + update_max_refcount_table_index(s); + } + return 0; + fail: + return ret; +} + +void qcow2_refcount_close(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + g_free(s->refcount_table); +} + + +static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1; +} + +static void set_refcount_ro0(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 1)); + ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8)); + ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8); +} + +static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4))) + & 0x3; +} + +static void set_refcount_ro1(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 2)); + ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4))); + ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4)); +} + +static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2))) + & 0xf; +} + +static void set_refcount_ro2(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 4)); + ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2))); + ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2)); +} + +static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index) +{ + return ((const uint8_t *)refcount_array)[index]; +} + +static void set_refcount_ro3(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 8)); + ((uint8_t *)refcount_array)[index] = value; +} + +static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index) +{ + return be16_to_cpu(((const uint16_t *)refcount_array)[index]); +} + +static void set_refcount_ro4(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 16)); + ((uint16_t *)refcount_array)[index] = cpu_to_be16(value); +} + +static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index) +{ + return be32_to_cpu(((const uint32_t *)refcount_array)[index]); +} + +static void set_refcount_ro5(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 32)); + ((uint32_t *)refcount_array)[index] = cpu_to_be32(value); +} + +static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index) +{ + return be64_to_cpu(((const uint64_t *)refcount_array)[index]); +} + +static void set_refcount_ro6(void *refcount_array, uint64_t index, + uint64_t value) +{ + ((uint64_t *)refcount_array)[index] = cpu_to_be64(value); +} + + +static int load_refcount_block(BlockDriverState *bs, + int64_t refcount_block_offset, + void **refcount_block) +{ + BDRVQcow2State *s = bs->opaque; + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); + return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, + refcount_block); +} + +/* + * Retrieves the refcount of the cluster given by its index and stores it in + * *refcount. Returns 0 on success and -errno on failure. + */ +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, + uint64_t *refcount) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t refcount_table_index, block_index; + int64_t refcount_block_offset; + int ret; + void *refcount_block; + + refcount_table_index = cluster_index >> s->refcount_block_bits; + if (refcount_table_index >= s->refcount_table_size) { + *refcount = 0; + return 0; + } + refcount_block_offset = + s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; + if (!refcount_block_offset) { + *refcount = 0; + return 0; + } + + if (offset_into_cluster(s, refcount_block_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64 + " unaligned (reftable index: %#" PRIx64 ")", + refcount_block_offset, refcount_table_index); + return -EIO; + } + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, + &refcount_block); + if (ret < 0) { + return ret; + } + + block_index = cluster_index & (s->refcount_block_size - 1); + *refcount = s->get_refcount(refcount_block, block_index); + + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + + return 0; +} + +/* Checks if two offsets are described by the same refcount block */ +static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a, + uint64_t offset_b) +{ + uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits); + uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits); + + return (block_a == block_b); +} + +/* + * Loads a refcount block. If it doesn't exist yet, it is allocated first + * (including growing the refcount table if needed). + * + * Returns 0 on success or -errno in error case + */ +static int alloc_refcount_block(BlockDriverState *bs, + int64_t cluster_index, void **refcount_block) +{ + BDRVQcow2State *s = bs->opaque; + unsigned int refcount_table_index; + int64_t ret; + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); + + /* Find the refcount block for the given cluster */ + refcount_table_index = cluster_index >> s->refcount_block_bits; + + if (refcount_table_index < s->refcount_table_size) { + + uint64_t refcount_block_offset = + s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; + + /* If it's already there, we're done */ + if (refcount_block_offset) { + if (offset_into_cluster(s, refcount_block_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" + PRIx64 " unaligned (reftable index: " + "%#x)", refcount_block_offset, + refcount_table_index); + return -EIO; + } + + return load_refcount_block(bs, refcount_block_offset, + refcount_block); + } + } + + /* + * If we came here, we need to allocate something. Something is at least + * a cluster for the new refcount block. It may also include a new refcount + * table if the old refcount table is too small. + * + * Note that allocating clusters here needs some special care: + * + * - We can't use the normal qcow2_alloc_clusters(), it would try to + * increase the refcount and very likely we would end up with an endless + * recursion. Instead we must place the refcount blocks in a way that + * they can describe them themselves. + * + * - We need to consider that at this point we are inside update_refcounts + * and potentially doing an initial refcount increase. This means that + * some clusters have already been allocated by the caller, but their + * refcount isn't accurate yet. If we allocate clusters for metadata, we + * need to return -EAGAIN to signal the caller that it needs to restart + * the search for free clusters. + * + * - alloc_clusters_noref and qcow2_free_clusters may load a different + * refcount block into the cache + */ + + *refcount_block = NULL; + + /* We write to the refcount table, so we might depend on L2 tables */ + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret < 0) { + return ret; + } + + /* Allocate the refcount block itself and mark it as used */ + int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX); + if (new_block < 0) { + return new_block; + } + + /* The offset must fit in the offset field of the refcount table entry */ + assert((new_block & REFT_OFFSET_MASK) == new_block); + + /* If we're allocating the block at offset 0 then something is wrong */ + if (new_block == 0) { + qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " + "allocation of refcount block at offset 0"); + return -EIO; + } + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64 + " at %" PRIx64 "\n", + refcount_table_index, cluster_index << s->cluster_bits, new_block); +#endif + + if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { + /* Zero the new refcount block before updating it */ + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, + refcount_block); + if (ret < 0) { + goto fail; + } + + memset(*refcount_block, 0, s->cluster_size); + + /* The block describes itself, need to update the cache */ + int block_index = (new_block >> s->cluster_bits) & + (s->refcount_block_size - 1); + s->set_refcount(*refcount_block, block_index, 1); + } else { + /* Described somewhere else. This can recurse at most twice before we + * arrive at a block that describes itself. */ + ret = update_refcount(bs, new_block, s->cluster_size, 1, false, + QCOW2_DISCARD_NEVER); + if (ret < 0) { + goto fail; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* Initialize the new refcount block only after updating its refcount, + * update_refcount uses the refcount cache itself */ + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, + refcount_block); + if (ret < 0) { + goto fail; + } + + memset(*refcount_block, 0, s->cluster_size); + } + + /* Now the new refcount block needs to be written to disk */ + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* If the refcount table is big enough, just hook the block up there */ + if (refcount_table_index < s->refcount_table_size) { + uint64_t data64 = cpu_to_be64(new_block); + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP); + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset + + refcount_table_index * REFTABLE_ENTRY_SIZE, + &data64, sizeof(data64)); + if (ret < 0) { + goto fail; + } + + s->refcount_table[refcount_table_index] = new_block; + /* If there's a hole in s->refcount_table then it can happen + * that refcount_table_index < s->max_refcount_table_index */ + s->max_refcount_table_index = + MAX(s->max_refcount_table_index, refcount_table_index); + + /* The new refcount block may be where the caller intended to put its + * data, so let it restart the search. */ + return -EAGAIN; + } + + qcow2_cache_put(s->refcount_block_cache, refcount_block); + + /* + * If we come here, we need to grow the refcount table. Again, a new + * refcount table needs some space and we can't simply allocate to avoid + * endless recursion. + * + * Therefore let's grab new refcount blocks at the end of the image, which + * will describe themselves and the new refcount table. This way we can + * reference them only in the new table and do the switch to the new + * refcount table at once without producing an inconsistent state in + * between. + */ + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW); + + /* Calculate the number of refcount blocks needed so far; this will be the + * basis for calculating the index of the first cluster used for the + * self-describing refcount structures which we are about to create. + * + * Because we reached this point, there cannot be any refcount entries for + * cluster_index or higher indices yet. However, because new_block has been + * allocated to describe that cluster (and it will assume this role later + * on), we cannot use that index; also, new_block may actually have a higher + * cluster index than cluster_index, so it needs to be taken into account + * here (and 1 needs to be added to its value because that cluster is used). + */ + uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1, + (new_block >> s->cluster_bits) + 1), + s->refcount_block_size); + + /* Create the new refcount table and blocks */ + uint64_t meta_offset = (blocks_used * s->refcount_block_size) * + s->cluster_size; + + ret = qcow2_refcount_area(bs, meta_offset, 0, false, + refcount_table_index, new_block); + if (ret < 0) { + return ret; + } + + ret = load_refcount_block(bs, new_block, refcount_block); + if (ret < 0) { + return ret; + } + + /* If we were trying to do the initial refcount update for some cluster + * allocation, we might have used the same clusters to store newly + * allocated metadata. Make the caller search some new space. */ + return -EAGAIN; + +fail: + if (*refcount_block != NULL) { + qcow2_cache_put(s->refcount_block_cache, refcount_block); + } + return ret; +} + +/* + * Starting at @start_offset, this function creates new self-covering refcount + * structures: A new refcount table and refcount blocks which cover all of + * themselves, and a number of @additional_clusters beyond their end. + * @start_offset must be at the end of the image file, that is, there must be + * only empty space beyond it. + * If @exact_size is false, the refcount table will have 50 % more entries than + * necessary so it will not need to grow again soon. + * If @new_refblock_offset is not zero, it contains the offset of a refcount + * block that should be entered into the new refcount table at index + * @new_refblock_index. + * + * Returns: The offset after the new refcount structures (i.e. where the + * @additional_clusters may be placed) on success, -errno on error. + */ +int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, + uint64_t additional_clusters, bool exact_size, + int new_refblock_index, + uint64_t new_refblock_offset) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t total_refblock_count_u64, additional_refblock_count; + int total_refblock_count, table_size, area_reftable_index, table_clusters; + int i; + uint64_t table_offset, block_offset, end_offset; + int ret; + uint64_t *new_table; + + assert(!(start_offset % s->cluster_size)); + + qcow2_refcount_metadata_size(start_offset / s->cluster_size + + additional_clusters, + s->cluster_size, s->refcount_order, + !exact_size, &total_refblock_count_u64); + if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) { + return -EFBIG; + } + total_refblock_count = total_refblock_count_u64; + + /* Index in the refcount table of the first refcount block to cover the area + * of refcount structures we are about to create; we know that + * @total_refblock_count can cover @start_offset, so this will definitely + * fit into an int. */ + area_reftable_index = (start_offset / s->cluster_size) / + s->refcount_block_size; + + if (exact_size) { + table_size = total_refblock_count; + } else { + table_size = total_refblock_count + + DIV_ROUND_UP(total_refblock_count, 2); + } + /* The qcow2 file can only store the reftable size in number of clusters */ + table_size = ROUND_UP(table_size, s->cluster_size / REFTABLE_ENTRY_SIZE); + table_clusters = (table_size * REFTABLE_ENTRY_SIZE) / s->cluster_size; + + if (table_size > QCOW_MAX_REFTABLE_SIZE) { + return -EFBIG; + } + + new_table = g_try_new0(uint64_t, table_size); + + assert(table_size > 0); + if (new_table == NULL) { + ret = -ENOMEM; + goto fail; + } + + /* Fill the new refcount table */ + if (table_size > s->max_refcount_table_index) { + /* We're actually growing the reftable */ + memcpy(new_table, s->refcount_table, + (s->max_refcount_table_index + 1) * REFTABLE_ENTRY_SIZE); + } else { + /* Improbable case: We're shrinking the reftable. However, the caller + * has assured us that there is only empty space beyond @start_offset, + * so we can simply drop all of the refblocks that won't fit into the + * new reftable. */ + memcpy(new_table, s->refcount_table, table_size * REFTABLE_ENTRY_SIZE); + } + + if (new_refblock_offset) { + assert(new_refblock_index < total_refblock_count); + new_table[new_refblock_index] = new_refblock_offset; + } + + /* Count how many new refblocks we have to create */ + additional_refblock_count = 0; + for (i = area_reftable_index; i < total_refblock_count; i++) { + if (!new_table[i]) { + additional_refblock_count++; + } + } + + table_offset = start_offset + additional_refblock_count * s->cluster_size; + end_offset = table_offset + table_clusters * s->cluster_size; + + /* Fill the refcount blocks, and create new ones, if necessary */ + block_offset = start_offset; + for (i = area_reftable_index; i < total_refblock_count; i++) { + void *refblock_data; + uint64_t first_offset_covered; + + /* Reuse an existing refblock if possible, create a new one otherwise */ + if (new_table[i]) { + ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i], + &refblock_data); + if (ret < 0) { + goto fail; + } + } else { + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, + block_offset, &refblock_data); + if (ret < 0) { + goto fail; + } + memset(refblock_data, 0, s->cluster_size); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, + refblock_data); + + new_table[i] = block_offset; + block_offset += s->cluster_size; + } + + /* First host offset covered by this refblock */ + first_offset_covered = (uint64_t)i * s->refcount_block_size * + s->cluster_size; + if (first_offset_covered < end_offset) { + int j, end_index; + + /* Set the refcount of all of the new refcount structures to 1 */ + + if (first_offset_covered < start_offset) { + assert(i == area_reftable_index); + j = (start_offset - first_offset_covered) / s->cluster_size; + assert(j < s->refcount_block_size); + } else { + j = 0; + } + + end_index = MIN((end_offset - first_offset_covered) / + s->cluster_size, + s->refcount_block_size); + + for (; j < end_index; j++) { + /* The caller guaranteed us this space would be empty */ + assert(s->get_refcount(refblock_data, j) == 0); + s->set_refcount(refblock_data, j, 1); + } + + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, + refblock_data); + } + + qcow2_cache_put(s->refcount_block_cache, &refblock_data); + } + + assert(block_offset == table_offset); + + /* Write refcount blocks to disk */ + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* Write refcount table to disk */ + for (i = 0; i < total_refblock_count; i++) { + cpu_to_be64s(&new_table[i]); + } + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE); + ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, + table_size * REFTABLE_ENTRY_SIZE); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < total_refblock_count; i++) { + be64_to_cpus(&new_table[i]); + } + + /* Hook up the new refcount table in the qcow2 header */ + struct QEMU_PACKED { + uint64_t d64; + uint32_t d32; + } data; + data.d64 = cpu_to_be64(table_offset); + data.d32 = cpu_to_be32(table_clusters); + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &data, sizeof(data)); + if (ret < 0) { + goto fail; + } + + /* And switch it in memory */ + uint64_t old_table_offset = s->refcount_table_offset; + uint64_t old_table_size = s->refcount_table_size; + + g_free(s->refcount_table); + s->refcount_table = new_table; + s->refcount_table_size = table_size; + s->refcount_table_offset = table_offset; + update_max_refcount_table_index(s); + + /* Free old table. */ + qcow2_free_clusters(bs, old_table_offset, + old_table_size * REFTABLE_ENTRY_SIZE, + QCOW2_DISCARD_OTHER); + + return end_offset; + +fail: + g_free(new_table); + return ret; +} + +void qcow2_process_discards(BlockDriverState *bs, int ret) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2DiscardRegion *d, *next; + + QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) { + QTAILQ_REMOVE(&s->discards, d, next); + + /* Discard is optional, ignore the return value */ + if (ret >= 0) { + int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes); + if (r2 < 0) { + trace_qcow2_process_discards_failed_region(d->offset, d->bytes, + r2); + } + } + + g_free(d); + } +} + +static void update_refcount_discard(BlockDriverState *bs, + uint64_t offset, uint64_t length) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2DiscardRegion *d, *p, *next; + + QTAILQ_FOREACH(d, &s->discards, next) { + uint64_t new_start = MIN(offset, d->offset); + uint64_t new_end = MAX(offset + length, d->offset + d->bytes); + + if (new_end - new_start <= length + d->bytes) { + /* There can't be any overlap, areas ending up here have no + * references any more and therefore shouldn't get freed another + * time. */ + assert(d->bytes + length == new_end - new_start); + d->offset = new_start; + d->bytes = new_end - new_start; + goto found; + } + } + + d = g_malloc(sizeof(*d)); + *d = (Qcow2DiscardRegion) { + .bs = bs, + .offset = offset, + .bytes = length, + }; + QTAILQ_INSERT_TAIL(&s->discards, d, next); + +found: + /* Merge discard requests if they are adjacent now */ + QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) { + if (p == d + || p->offset > d->offset + d->bytes + || d->offset > p->offset + p->bytes) + { + continue; + } + + /* Still no overlap possible */ + assert(p->offset == d->offset + d->bytes + || d->offset == p->offset + p->bytes); + + QTAILQ_REMOVE(&s->discards, p, next); + d->offset = MIN(d->offset, p->offset); + d->bytes += p->bytes; + g_free(p); + } +} + +/* XXX: cache several refcount block clusters ? */ +/* @addend is the absolute value of the addend; if @decrease is set, @addend + * will be subtracted from the current refcount, otherwise it will be added */ +static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, + int64_t offset, + int64_t length, + uint64_t addend, + bool decrease, + enum qcow2_discard_type type) +{ + BDRVQcow2State *s = bs->opaque; + int64_t start, last, cluster_offset; + void *refcount_block = NULL; + int64_t old_table_index = -1; + int ret; + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 + " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "", + addend); +#endif + if (length < 0) { + return -EINVAL; + } else if (length == 0) { + return 0; + } + + if (decrease) { + qcow2_cache_set_dependency(bs, s->refcount_block_cache, + s->l2_table_cache); + } + + start = start_of_cluster(s, offset); + last = start_of_cluster(s, offset + length - 1); + for(cluster_offset = start; cluster_offset <= last; + cluster_offset += s->cluster_size) + { + int block_index; + uint64_t refcount; + int64_t cluster_index = cluster_offset >> s->cluster_bits; + int64_t table_index = cluster_index >> s->refcount_block_bits; + + /* Load the refcount block and allocate it if needed */ + if (table_index != old_table_index) { + if (refcount_block) { + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + } + ret = alloc_refcount_block(bs, cluster_index, &refcount_block); + /* If the caller needs to restart the search for free clusters, + * try the same ones first to see if they're still free. */ + if (ret == -EAGAIN) { + if (s->free_cluster_index > (start >> s->cluster_bits)) { + s->free_cluster_index = (start >> s->cluster_bits); + } + } + if (ret < 0) { + goto fail; + } + } + old_table_index = table_index; + + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); + + /* we can update the count and save it */ + block_index = cluster_index & (s->refcount_block_size - 1); + + refcount = s->get_refcount(refcount_block, block_index); + if (decrease ? (refcount - addend > refcount) + : (refcount + addend < refcount || + refcount + addend > s->refcount_max)) + { + ret = -EINVAL; + goto fail; + } + if (decrease) { + refcount -= addend; + } else { + refcount += addend; + } + if (refcount == 0 && cluster_index < s->free_cluster_index) { + s->free_cluster_index = cluster_index; + } + s->set_refcount(refcount_block, block_index, refcount); + + if (refcount == 0) { + void *table; + + table = qcow2_cache_is_table_offset(s->refcount_block_cache, + offset); + if (table != NULL) { + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + old_table_index = -1; + qcow2_cache_discard(s->refcount_block_cache, table); + } + + table = qcow2_cache_is_table_offset(s->l2_table_cache, offset); + if (table != NULL) { + qcow2_cache_discard(s->l2_table_cache, table); + } + + if (s->discard_passthrough[type]) { + update_refcount_discard(bs, cluster_offset, s->cluster_size); + } + } + } + + ret = 0; +fail: + if (!s->cache_discards) { + qcow2_process_discards(bs, ret); + } + + /* Write last changed block to disk */ + if (refcount_block) { + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + } + + /* + * Try do undo any updates if an error is returned (This may succeed in + * some cases like ENOSPC for allocating a new refcount block) + */ + if (ret < 0) { + int dummy; + dummy = update_refcount(bs, offset, cluster_offset - offset, addend, + !decrease, QCOW2_DISCARD_NEVER); + (void)dummy; + } + + return ret; +} + +/* + * Increases or decreases the refcount of a given cluster. + * + * @addend is the absolute value of the addend; if @decrease is set, @addend + * will be subtracted from the current refcount, otherwise it will be added. + * + * On success 0 is returned; on failure -errno is returned. + */ +int qcow2_update_cluster_refcount(BlockDriverState *bs, + int64_t cluster_index, + uint64_t addend, bool decrease, + enum qcow2_discard_type type) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend, + decrease, type); + if (ret < 0) { + return ret; + } + + return 0; +} + + + +/*********************************************************/ +/* cluster allocation functions */ + + + +/* return < 0 if error */ +static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, + uint64_t max) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t i, nb_clusters, refcount; + int ret; + + /* We can't allocate clusters if they may still be queued for discard. */ + if (s->cache_discards) { + qcow2_process_discards(bs, 0); + } + + nb_clusters = size_to_clusters(s, size); +retry: + for(i = 0; i < nb_clusters; i++) { + uint64_t next_cluster_index = s->free_cluster_index++; + ret = qcow2_get_refcount(bs, next_cluster_index, &refcount); + + if (ret < 0) { + return ret; + } else if (refcount != 0) { + goto retry; + } + } + + /* Make sure that all offsets in the "allocated" range are representable + * in the requested max */ + if (s->free_cluster_index > 0 && + s->free_cluster_index - 1 > (max >> s->cluster_bits)) + { + return -EFBIG; + } + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n", + size, + (s->free_cluster_index - nb_clusters) << s->cluster_bits); +#endif + return (s->free_cluster_index - nb_clusters) << s->cluster_bits; +} + +int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) +{ + int64_t offset; + int ret; + + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); + do { + offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET); + if (offset < 0) { + return offset; + } + + ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); + } while (ret == -EAGAIN); + + if (ret < 0) { + return ret; + } + + return offset; +} + +int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int64_t nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t cluster_index, refcount; + uint64_t i; + int ret; + + assert(nb_clusters >= 0); + if (nb_clusters == 0) { + return 0; + } + + do { + /* Check how many clusters there are free */ + cluster_index = offset >> s->cluster_bits; + for(i = 0; i < nb_clusters; i++) { + ret = qcow2_get_refcount(bs, cluster_index++, &refcount); + if (ret < 0) { + return ret; + } else if (refcount != 0) { + break; + } + } + + /* And then allocate them */ + ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false, + QCOW2_DISCARD_NEVER); + } while (ret == -EAGAIN); + + if (ret < 0) { + return ret; + } + + return i; +} + +/* only used to allocate compressed sectors. We try to allocate + contiguous sectors. size must be <= cluster_size */ +int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + size_t free_in_cluster; + int ret; + + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES); + assert(size > 0 && size <= s->cluster_size); + assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset)); + + offset = s->free_byte_offset; + + if (offset) { + uint64_t refcount; + ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount); + if (ret < 0) { + return ret; + } + + if (refcount == s->refcount_max) { + offset = 0; + } + } + + free_in_cluster = s->cluster_size - offset_into_cluster(s, offset); + do { + if (!offset || free_in_cluster < size) { + int64_t new_cluster; + + new_cluster = alloc_clusters_noref(bs, s->cluster_size, + MIN(s->cluster_offset_mask, + QCOW_MAX_CLUSTER_OFFSET)); + if (new_cluster < 0) { + return new_cluster; + } + + if (new_cluster == 0) { + qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " + "allocation of compressed cluster " + "at offset 0"); + return -EIO; + } + + if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { + offset = new_cluster; + free_in_cluster = s->cluster_size; + } else { + free_in_cluster += s->cluster_size; + } + } + + assert(offset); + ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); + if (ret < 0) { + offset = 0; + } + } while (ret == -EAGAIN); + if (ret < 0) { + return ret; + } + + /* The cluster refcount was incremented; refcount blocks must be flushed + * before the caller's L2 table updates. */ + qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); + + s->free_byte_offset = offset + size; + if (!offset_into_cluster(s, s->free_byte_offset)) { + s->free_byte_offset = 0; + } + + return offset; +} + +void qcow2_free_clusters(BlockDriverState *bs, + int64_t offset, int64_t size, + enum qcow2_discard_type type) +{ + int ret; + + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); + ret = update_refcount(bs, offset, size, 1, true, type); + if (ret < 0) { + fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); + /* TODO Remember the clusters to free them later and avoid leaking */ + } +} + +/* + * Free a cluster using its L2 entry (handles clusters of all types, e.g. + * normal cluster, compressed cluster, etc.) + */ +void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, + enum qcow2_discard_type type) +{ + BDRVQcow2State *s = bs->opaque; + QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry); + + if (has_data_file(bs)) { + if (s->discard_passthrough[type] && + (ctype == QCOW2_CLUSTER_NORMAL || + ctype == QCOW2_CLUSTER_ZERO_ALLOC)) + { + bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK, + s->cluster_size); + } + return; + } + + switch (ctype) { + case QCOW2_CLUSTER_COMPRESSED: + { + int64_t offset = (l2_entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; + int size = QCOW2_COMPRESSED_SECTOR_SIZE * + (((l2_entry >> s->csize_shift) & s->csize_mask) + 1); + qcow2_free_clusters(bs, offset, size, type); + } + break; + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, false, -1, -1, + "Cannot free unaligned cluster %#llx", + l2_entry & L2E_OFFSET_MASK); + } else { + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + s->cluster_size, type); + } + break; + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + break; + default: + abort(); + } +} + +int coroutine_fn qcow2_write_caches(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + ret = qcow2_cache_write(bs, s->l2_table_cache); + if (ret < 0) { + return ret; + } + + if (qcow2_need_accurate_refcounts(s)) { + ret = qcow2_cache_write(bs, s->refcount_block_cache); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +int coroutine_fn qcow2_flush_caches(BlockDriverState *bs) +{ + int ret = qcow2_write_caches(bs); + if (ret < 0) { + return ret; + } + + return bdrv_flush(bs->file->bs); +} + +/*********************************************************/ +/* snapshots and image creation */ + + + +/* update the refcounts of snapshots and the copied flag */ +int qcow2_update_snapshot_refcount(BlockDriverState *bs, + int64_t l1_table_offset, int l1_size, int addend) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount; + bool l1_allocated = false; + int64_t old_entry, old_l2_offset; + unsigned slice, slice_size2, n_slices; + int i, j, l1_modified = 0, nb_csectors; + int ret; + + assert(addend >= -1 && addend <= 1); + + l2_slice = NULL; + l1_table = NULL; + l1_size2 = l1_size * L1E_SIZE; + slice_size2 = s->l2_slice_size * l2_entry_size(s); + n_slices = s->cluster_size / slice_size2; + + s->cache_discards = true; + + /* WARNING: qcow2_snapshot_goto relies on this function not using the + * l1_table_offset when it is the current s->l1_table_offset! Be careful + * when changing this! */ + if (l1_table_offset != s->l1_table_offset) { + l1_table = g_try_malloc0(l1_size2); + if (l1_size2 && l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } + l1_allocated = true; + + ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < l1_size; i++) { + be64_to_cpus(&l1_table[i]); + } + } else { + assert(l1_size == s->l1_size); + l1_table = s->l1_table; + l1_allocated = false; + } + + for (i = 0; i < l1_size; i++) { + l2_offset = l1_table[i]; + if (l2_offset) { + old_l2_offset = l2_offset; + l2_offset &= L1E_OFFSET_MASK; + + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" + PRIx64 " unaligned (L1 index: %#x)", + l2_offset, i); + ret = -EIO; + goto fail; + } + + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); + if (ret < 0) { + goto fail; + } + + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t cluster_index; + uint64_t offset; + + entry = get_l2_entry(s, l2_slice, j); + old_entry = entry; + entry &= ~QCOW_OFLAG_COPIED; + offset = entry & L2E_OFFSET_MASK; + + switch (qcow2_get_cluster_type(bs, entry)) { + case QCOW2_CLUSTER_COMPRESSED: + nb_csectors = ((entry >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) { + uint64_t coffset = (entry & s->cluster_offset_mask) + & QCOW2_COMPRESSED_SECTOR_MASK; + ret = update_refcount( + bs, coffset, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE, + abs(addend), addend < 0, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + /* compressed clusters are never modified */ + refcount = 2; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, offset)) { + /* Here l2_index means table (not slice) index */ + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, "Cluster " + "allocation offset %#" PRIx64 + " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset, l2_offset, l2_index); + ret = -EIO; + goto fail; + } + + cluster_index = offset >> s->cluster_bits; + assert(cluster_index); + if (addend != 0) { + ret = qcow2_update_cluster_refcount( + bs, cluster_index, abs(addend), addend < 0, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + + ret = qcow2_get_refcount(bs, cluster_index, &refcount); + if (ret < 0) { + goto fail; + } + break; + + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; + + default: + abort(); + } + + if (refcount == 1) { + entry |= QCOW_OFLAG_COPIED; + } + if (entry != old_entry) { + if (addend > 0) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); + } + set_l2_entry(s, l2_slice, j, entry); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, + l2_slice); + } + } + + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } + + if (addend != 0) { + ret = qcow2_update_cluster_refcount(bs, l2_offset >> + s->cluster_bits, + abs(addend), addend < 0, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { + goto fail; + } else if (refcount == 1) { + l2_offset |= QCOW_OFLAG_COPIED; + } + if (l2_offset != old_l2_offset) { + l1_table[i] = l2_offset; + l1_modified = 1; + } + } + } + + ret = bdrv_flush(bs); +fail: + if (l2_slice) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } + + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + /* Update L1 only if it isn't deleted anyway (addend = -1) */ + if (ret == 0 && addend >= 0 && l1_modified) { + for (i = 0; i < l1_size; i++) { + cpu_to_be64s(&l1_table[i]); + } + + ret = bdrv_pwrite_sync(bs->file, l1_table_offset, + l1_table, l1_size2); + + for (i = 0; i < l1_size; i++) { + be64_to_cpus(&l1_table[i]); + } + } + if (l1_allocated) + g_free(l1_table); + return ret; +} + + + + +/*********************************************************/ +/* refcount checking functions */ + + +static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries) +{ + /* This assertion holds because there is no way we can address more than + * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because + * offsets have to be representable in bytes); due to every cluster + * corresponding to one refcount entry, we are well below that limit */ + assert(entries < (UINT64_C(1) << (64 - 9))); + + /* Thanks to the assertion this will not overflow, because + * s->refcount_order < 7. + * (note: x << s->refcount_order == x * s->refcount_bits) */ + return DIV_ROUND_UP(entries << s->refcount_order, 8); +} + +/** + * Reallocates *array so that it can hold new_size entries. *size must contain + * the current number of entries in *array. If the reallocation fails, *array + * and *size will not be modified and -errno will be returned. If the + * reallocation is successful, *array will be set to the new buffer, *size + * will be set to new_size and 0 will be returned. The size of the reallocated + * refcount array buffer will be aligned to a cluster boundary, and the newly + * allocated area will be zeroed. + */ +static int realloc_refcount_array(BDRVQcow2State *s, void **array, + int64_t *size, int64_t new_size) +{ + int64_t old_byte_size, new_byte_size; + void *new_ptr; + + /* Round to clusters so the array can be directly written to disk */ + old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size)) + * s->cluster_size; + new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size)) + * s->cluster_size; + + if (new_byte_size == old_byte_size) { + *size = new_size; + return 0; + } + + assert(new_byte_size > 0); + + if (new_byte_size > SIZE_MAX) { + return -ENOMEM; + } + + new_ptr = g_try_realloc(*array, new_byte_size); + if (!new_ptr) { + return -ENOMEM; + } + + if (new_byte_size > old_byte_size) { + memset((char *)new_ptr + old_byte_size, 0, + new_byte_size - old_byte_size); + } + + *array = new_ptr; + *size = new_size; + + return 0; +} + +/* + * Increases the refcount for a range of clusters in a given refcount table. + * This is used to construct a temporary refcount table out of L1 and L2 tables + * which can be compared to the refcount table saved in the image. + * + * Modifies the number of errors in res. + */ +int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, + int64_t offset, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t start, last, cluster_offset, k, refcount; + int64_t file_len; + int ret; + + if (size <= 0) { + return 0; + } + + file_len = bdrv_getlength(bs->file->bs); + if (file_len < 0) { + return file_len; + } + + /* + * Last cluster of qcow2 image may be semi-allocated, so it may be OK to + * reference some space after file end but it should be less than one + * cluster. + */ + if (offset + size - file_len >= s->cluster_size) { + fprintf(stderr, "ERROR: counting reference for region exceeding the " + "end of the file by one cluster or more: offset 0x%" PRIx64 + " size 0x%" PRIx64 "\n", offset, size); + res->corruptions++; + return 0; + } + + start = start_of_cluster(s, offset); + last = start_of_cluster(s, offset + size - 1); + for(cluster_offset = start; cluster_offset <= last; + cluster_offset += s->cluster_size) { + k = cluster_offset >> s->cluster_bits; + if (k >= *refcount_table_size) { + ret = realloc_refcount_array(s, refcount_table, + refcount_table_size, k + 1); + if (ret < 0) { + res->check_errors++; + return ret; + } + } + + refcount = s->get_refcount(*refcount_table, k); + if (refcount == s->refcount_max) { + fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 + "\n", cluster_offset); + fprintf(stderr, "Use qemu-img amend to increase the refcount entry " + "width or qemu-img convert to create a clean copy if the " + "image cannot be opened for writing\n"); + res->corruptions++; + continue; + } + s->set_refcount(*refcount_table, k, refcount + 1); + } + + return 0; +} + +/* Flags for check_refcounts_l1() and check_refcounts_l2() */ +enum { + CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */ +}; + +/* + * Increases the refcount in the given refcount table for the all clusters + * referenced in the L2 table. While doing so, performs some checks on L2 + * entries. + * + * Returns the number of errors found by the checks or -errno if an internal + * error occurred. + */ +static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, int64_t l2_offset, + int flags, BdrvCheckMode fix, bool active) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_table, l2_entry; + uint64_t next_contiguous_offset = 0; + int i, l2_size, nb_csectors, ret; + + /* Read L2 table from disk */ + l2_size = s->l2_size * l2_entry_size(s); + l2_table = g_malloc(l2_size); + + ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size); + if (ret < 0) { + fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n"); + res->check_errors++; + goto fail; + } + + /* Do the actual checks */ + for(i = 0; i < s->l2_size; i++) { + l2_entry = get_l2_entry(s, l2_table, i); + + switch (qcow2_get_cluster_type(bs, l2_entry)) { + case QCOW2_CLUSTER_COMPRESSED: + /* Compressed clusters don't have QCOW_OFLAG_COPIED */ + if (l2_entry & QCOW_OFLAG_COPIED) { + fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": " + "copied flag must never be set for compressed " + "clusters\n", l2_entry & s->cluster_offset_mask); + l2_entry &= ~QCOW_OFLAG_COPIED; + res->corruptions++; + } + + if (has_data_file(bs)) { + fprintf(stderr, "ERROR compressed cluster %d with data file, " + "entry=0x%" PRIx64 "\n", i, l2_entry); + res->corruptions++; + break; + } + + /* Mark cluster as used */ + nb_csectors = ((l2_entry >> s->csize_shift) & + s->csize_mask) + 1; + l2_entry &= s->cluster_offset_mask; + ret = qcow2_inc_refcounts_imrt( + bs, res, refcount_table, refcount_table_size, + l2_entry & QCOW2_COMPRESSED_SECTOR_MASK, + nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE); + if (ret < 0) { + goto fail; + } + + if (flags & CHECK_FRAG_INFO) { + res->bfi.allocated_clusters++; + res->bfi.compressed_clusters++; + + /* Compressed clusters are fragmented by nature. Since they + * take up sub-sector space but we only have sector granularity + * I/O we need to re-read the same sectors even for adjacent + * compressed clusters. + */ + res->bfi.fragmented_clusters++; + } + break; + + case QCOW2_CLUSTER_ZERO_ALLOC: + case QCOW2_CLUSTER_NORMAL: + { + uint64_t offset = l2_entry & L2E_OFFSET_MASK; + + /* Correct offsets are cluster aligned */ + if (offset_into_cluster(s, offset)) { + bool contains_data; + res->corruptions++; + + if (has_subclusters(s)) { + uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i); + contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC); + } else { + contains_data = !(l2_entry & QCOW_OFLAG_ZERO); + } + + if (!contains_data) { + fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated " + "cluster is not properly aligned; L2 entry " + "corrupted.\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", + offset); + if (fix & BDRV_FIX_ERRORS) { + int idx = i * (l2_entry_size(s) / sizeof(uint64_t)); + uint64_t l2e_offset = + l2_offset + (uint64_t)i * l2_entry_size(s); + int ign = active ? QCOW2_OL_ACTIVE_L2 : + QCOW2_OL_INACTIVE_L2; + + l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO; + set_l2_entry(s, l2_table, i, l2_entry); + ret = qcow2_pre_write_overlap_check(bs, ign, + l2e_offset, l2_entry_size(s), false); + if (ret < 0) { + fprintf(stderr, "ERROR: Overlap check failed\n"); + res->check_errors++; + /* Something is seriously wrong, so abort checking + * this L2 table */ + goto fail; + } + + ret = bdrv_pwrite_sync(bs->file, l2e_offset, + &l2_table[idx], + l2_entry_size(s)); + if (ret < 0) { + fprintf(stderr, "ERROR: Failed to overwrite L2 " + "table entry: %s\n", strerror(-ret)); + res->check_errors++; + /* Do not abort, continue checking the rest of this + * L2 table's entries */ + } else { + res->corruptions--; + res->corruptions_fixed++; + /* Skip marking the cluster as used + * (it is unused now) */ + continue; + } + } + } else { + fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is " + "not properly aligned; L2 entry corrupted.\n", offset); + } + } + + if (flags & CHECK_FRAG_INFO) { + res->bfi.allocated_clusters++; + if (next_contiguous_offset && + offset != next_contiguous_offset) { + res->bfi.fragmented_clusters++; + } + next_contiguous_offset = offset + s->cluster_size; + } + + /* Mark cluster as used */ + if (!has_data_file(bs)) { + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, + refcount_table_size, + offset, s->cluster_size); + if (ret < 0) { + goto fail; + } + } + break; + } + + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + break; + + default: + abort(); + } + } + + g_free(l2_table); + return 0; + +fail: + g_free(l2_table); + return ret; +} + +/* + * Increases the refcount for the L1 table, its L2 tables and all referenced + * clusters in the given refcount table. While doing so, performs some checks + * on L1 and L2 entries. + * + * Returns the number of errors found by the checks or -errno if an internal + * error occurred. + */ +static int check_refcounts_l1(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, + int64_t l1_table_offset, int l1_size, + int flags, BdrvCheckMode fix, bool active) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l1_table = NULL, l2_offset, l1_size2; + int i, ret; + + l1_size2 = l1_size * L1E_SIZE; + + /* Mark L1 table as used */ + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, + l1_table_offset, l1_size2); + if (ret < 0) { + goto fail; + } + + /* Read L1 table entries from disk */ + if (l1_size2 > 0) { + l1_table = g_try_malloc(l1_size2); + if (l1_table == NULL) { + ret = -ENOMEM; + res->check_errors++; + goto fail; + } + ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); + if (ret < 0) { + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); + res->check_errors++; + goto fail; + } + for(i = 0;i < l1_size; i++) + be64_to_cpus(&l1_table[i]); + } + + /* Do the actual checks */ + for(i = 0; i < l1_size; i++) { + l2_offset = l1_table[i]; + if (l2_offset) { + /* Mark L2 table as used */ + l2_offset &= L1E_OFFSET_MASK; + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + l2_offset, s->cluster_size); + if (ret < 0) { + goto fail; + } + + /* L2 tables are cluster aligned */ + if (offset_into_cluster(s, l2_offset)) { + fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " + "cluster aligned; L1 entry corrupted\n", l2_offset); + res->corruptions++; + } + + /* Process and check L2 entries */ + ret = check_refcounts_l2(bs, res, refcount_table, + refcount_table_size, l2_offset, flags, + fix, active); + if (ret < 0) { + goto fail; + } + } + } + g_free(l1_table); + return 0; + +fail: + g_free(l1_table); + return ret; +} + +/* + * Checks the OFLAG_COPIED flag for all L1 and L2 entries. + * + * This function does not print an error message nor does it increment + * check_errors if qcow2_get_refcount fails (this is because such an error will + * have been already detected and sufficiently signaled by the calling function + * (qcow2_check_refcounts) by the time this function is called). + */ +static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); + int ret; + uint64_t refcount; + int i, j; + bool repair; + + if (fix & BDRV_FIX_ERRORS) { + /* Always repair */ + repair = true; + } else if (fix & BDRV_FIX_LEAKS) { + /* Repair only if that seems safe: This function is always + * called after the refcounts have been fixed, so the refcount + * is accurate if that repair was successful */ + repair = !res->check_errors && !res->corruptions && !res->leaks; + } else { + repair = false; + } + + for (i = 0; i < s->l1_size; i++) { + uint64_t l1_entry = s->l1_table[i]; + uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; + int l2_dirty = 0; + + if (!l2_offset) { + continue; + } + + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { + /* don't print message nor increment check_errors */ + continue; + } + if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { + res->corruptions++; + fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d " + "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n", + repair ? "Repairing" : "ERROR", i, l1_entry, refcount); + if (repair) { + s->l1_table[i] = refcount == 1 + ? l1_entry | QCOW_OFLAG_COPIED + : l1_entry & ~QCOW_OFLAG_COPIED; + ret = qcow2_write_l1_entry(bs, i); + if (ret < 0) { + res->check_errors++; + goto fail; + } + res->corruptions--; + res->corruptions_fixed++; + } + } + + ret = bdrv_pread(bs->file, l2_offset, l2_table, + s->l2_size * l2_entry_size(s)); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not read L2 table: %s\n", + strerror(-ret)); + res->check_errors++; + goto fail; + } + + for (j = 0; j < s->l2_size; j++) { + uint64_t l2_entry = get_l2_entry(s, l2_table, j); + uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; + QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry); + + if (cluster_type == QCOW2_CLUSTER_NORMAL || + cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) { + if (has_data_file(bs)) { + refcount = 1; + } else { + ret = qcow2_get_refcount(bs, + data_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { + /* don't print message nor increment check_errors */ + continue; + } + } + if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { + res->corruptions++; + fprintf(stderr, "%s OFLAG_COPIED data cluster: " + "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n", + repair ? "Repairing" : "ERROR", l2_entry, refcount); + if (repair) { + set_l2_entry(s, l2_table, j, + refcount == 1 ? + l2_entry | QCOW_OFLAG_COPIED : + l2_entry & ~QCOW_OFLAG_COPIED); + l2_dirty++; + } + } + } + } + + if (l2_dirty > 0) { + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, + l2_offset, s->cluster_size, + false); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not write L2 table; metadata " + "overlap check failed: %s\n", strerror(-ret)); + res->check_errors++; + goto fail; + } + + ret = bdrv_pwrite(bs->file, l2_offset, l2_table, + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not write L2 table: %s\n", + strerror(-ret)); + res->check_errors++; + goto fail; + } + res->corruptions -= l2_dirty; + res->corruptions_fixed += l2_dirty; + } + } + + ret = 0; + +fail: + qemu_vfree(l2_table); + return ret; +} + +/* + * Checks consistency of refblocks and accounts for each refblock in + * *refcount_table. + */ +static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix, bool *rebuild, + void **refcount_table, int64_t *nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i, size; + int ret; + + for(i = 0; i < s->refcount_table_size; i++) { + uint64_t offset, cluster; + offset = s->refcount_table[i]; + cluster = offset >> s->cluster_bits; + + /* Refcount blocks are cluster aligned */ + if (offset_into_cluster(s, offset)) { + fprintf(stderr, "ERROR refcount block %" PRId64 " is not " + "cluster aligned; refcount table entry corrupted\n", i); + res->corruptions++; + *rebuild = true; + continue; + } + + if (cluster >= *nb_clusters) { + res->corruptions++; + fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + if (fix & BDRV_FIX_ERRORS) { + int64_t new_nb_clusters; + Error *local_err = NULL; + + if (offset > INT64_MAX - s->cluster_size) { + ret = -EINVAL; + goto resize_fail; + } + + ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto resize_fail; + } + size = bdrv_getlength(bs->file->bs); + if (size < 0) { + ret = size; + goto resize_fail; + } + + new_nb_clusters = size_to_clusters(s, size); + assert(new_nb_clusters >= *nb_clusters); + + ret = realloc_refcount_array(s, refcount_table, + nb_clusters, new_nb_clusters); + if (ret < 0) { + res->check_errors++; + return ret; + } + + if (cluster >= *nb_clusters) { + ret = -EINVAL; + goto resize_fail; + } + + res->corruptions--; + res->corruptions_fixed++; + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, nb_clusters, + offset, s->cluster_size); + if (ret < 0) { + return ret; + } + /* No need to check whether the refcount is now greater than 1: + * This area was just allocated and zeroed, so it can only be + * exactly 1 after qcow2_inc_refcounts_imrt() */ + continue; + +resize_fail: + *rebuild = true; + fprintf(stderr, "ERROR could not resize image: %s\n", + strerror(-ret)); + } + continue; + } + + if (offset != 0) { + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + offset, s->cluster_size); + if (ret < 0) { + return ret; + } + if (s->get_refcount(*refcount_table, cluster) != 1) { + fprintf(stderr, "ERROR refcount block %" PRId64 + " refcount=%" PRIu64 "\n", i, + s->get_refcount(*refcount_table, cluster)); + res->corruptions++; + *rebuild = true; + } + } + } + + return 0; +} + +/* + * Calculates an in-memory refcount table. + */ +static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix, bool *rebuild, + void **refcount_table, int64_t *nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + QCowSnapshot *sn; + int ret; + + if (!*refcount_table) { + int64_t old_size = 0; + ret = realloc_refcount_array(s, refcount_table, + &old_size, *nb_clusters); + if (ret < 0) { + res->check_errors++; + return ret; + } + } + + /* header */ + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + 0, s->cluster_size); + if (ret < 0) { + return ret; + } + + /* current L1 table */ + ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, + s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO, + fix, true); + if (ret < 0) { + return ret; + } + + /* snapshots */ + if (has_data_file(bs) && s->nb_snapshots) { + fprintf(stderr, "ERROR %d snapshots in image with data file\n", + s->nb_snapshots); + res->corruptions++; + } + + for (i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + if (offset_into_cluster(s, sn->l1_table_offset)) { + fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": " + "L1 table is not cluster aligned; snapshot table entry " + "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset); + res->corruptions++; + continue; + } + if (sn->l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) { + fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": " + "L1 table is too large; snapshot table entry corrupted\n", + sn->id_str, sn->name, sn->l1_size); + res->corruptions++; + continue; + } + ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, + sn->l1_table_offset, sn->l1_size, 0, fix, + false); + if (ret < 0) { + return ret; + } + } + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->snapshots_offset, s->snapshots_size); + if (ret < 0) { + return ret; + } + + /* refcount data */ + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->refcount_table_offset, + s->refcount_table_size * + REFTABLE_ENTRY_SIZE); + if (ret < 0) { + return ret; + } + + /* encryption */ + if (s->crypto_header.length) { + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->crypto_header.offset, + s->crypto_header.length); + if (ret < 0) { + return ret; + } + } + + /* bitmaps */ + ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters); + if (ret < 0) { + return ret; + } + + return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters); +} + +/* + * Compares the actual reference count for each cluster in the image against the + * refcount as reported by the refcount structures on-disk. + */ +static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix, bool *rebuild, + int64_t *highest_cluster, + void *refcount_table, int64_t nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + uint64_t refcount1, refcount2; + int ret; + + for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) { + ret = qcow2_get_refcount(bs, i, &refcount1); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + res->check_errors++; + continue; + } + + refcount2 = s->get_refcount(refcount_table, i); + + if (refcount1 > 0 || refcount2 > 0) { + *highest_cluster = i; + } + + if (refcount1 != refcount2) { + /* Check if we're allowed to fix the mismatch */ + int *num_fixed = NULL; + if (refcount1 == 0) { + *rebuild = true; + } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) { + num_fixed = &res->leaks_fixed; + } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) { + num_fixed = &res->corruptions_fixed; + } + + fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64 + " reference=%" PRIu64 "\n", + num_fixed != NULL ? "Repairing" : + refcount1 < refcount2 ? "ERROR" : + "Leaked", + i, refcount1, refcount2); + + if (num_fixed) { + ret = update_refcount(bs, i << s->cluster_bits, 1, + refcount_diff(refcount1, refcount2), + refcount1 > refcount2, + QCOW2_DISCARD_ALWAYS); + if (ret >= 0) { + (*num_fixed)++; + continue; + } + } + + /* And if we couldn't, print an error */ + if (refcount1 < refcount2) { + res->corruptions++; + } else { + res->leaks++; + } + } + } +} + +/* + * Allocates clusters using an in-memory refcount table (IMRT) in contrast to + * the on-disk refcount structures. + * + * On input, *first_free_cluster tells where to start looking, and need not + * actually be a free cluster; the returned offset will not be before that + * cluster. On output, *first_free_cluster points to the first gap found, even + * if that gap was too small to be used as the returned offset. + * + * Note that *first_free_cluster is a cluster index whereas the return value is + * an offset. + */ +static int64_t alloc_clusters_imrt(BlockDriverState *bs, + int cluster_count, + void **refcount_table, + int64_t *imrt_nb_clusters, + int64_t *first_free_cluster) +{ + BDRVQcow2State *s = bs->opaque; + int64_t cluster = *first_free_cluster, i; + bool first_gap = true; + int contiguous_free_clusters; + int ret; + + /* Starting at *first_free_cluster, find a range of at least cluster_count + * continuously free clusters */ + for (contiguous_free_clusters = 0; + cluster < *imrt_nb_clusters && + contiguous_free_clusters < cluster_count; + cluster++) + { + if (!s->get_refcount(*refcount_table, cluster)) { + contiguous_free_clusters++; + if (first_gap) { + /* If this is the first free cluster found, update + * *first_free_cluster accordingly */ + *first_free_cluster = cluster; + first_gap = false; + } + } else if (contiguous_free_clusters) { + contiguous_free_clusters = 0; + } + } + + /* If contiguous_free_clusters is greater than zero, it contains the number + * of continuously free clusters until the current cluster; the first free + * cluster in the current "gap" is therefore + * cluster - contiguous_free_clusters */ + + /* If no such range could be found, grow the in-memory refcount table + * accordingly to append free clusters at the end of the image */ + if (contiguous_free_clusters < cluster_count) { + /* contiguous_free_clusters clusters are already empty at the image end; + * we need cluster_count clusters; therefore, we have to allocate + * cluster_count - contiguous_free_clusters new clusters at the end of + * the image (which is the current value of cluster; note that cluster + * may exceed old_imrt_nb_clusters if *first_free_cluster pointed beyond + * the image end) */ + ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters, + cluster + cluster_count + - contiguous_free_clusters); + if (ret < 0) { + return ret; + } + } + + /* Go back to the first free cluster */ + cluster -= contiguous_free_clusters; + for (i = 0; i < cluster_count; i++) { + s->set_refcount(*refcount_table, cluster + i, 1); + } + + return cluster << s->cluster_bits; +} + +/* + * Creates a new refcount structure based solely on the in-memory information + * given through *refcount_table. All necessary allocations will be reflected + * in that array. + * + * On success, the old refcount structure is leaked (it will be covered by the + * new refcount structure). + */ +static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, + int64_t *nb_clusters) +{ + BDRVQcow2State *s = bs->opaque; + int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; + int64_t refblock_offset, refblock_start, refblock_index; + uint32_t reftable_size = 0; + uint64_t *on_disk_reftable = NULL; + void *on_disk_refblock; + int ret = 0; + struct { + uint64_t reftable_offset; + uint32_t reftable_clusters; + } QEMU_PACKED reftable_offset_and_clusters; + + qcow2_cache_empty(bs, s->refcount_block_cache); + +write_refblocks: + for (; cluster < *nb_clusters; cluster++) { + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + + /* Don't allocate a cluster in a refblock already written to disk */ + if (first_free_cluster < refblock_start) { + first_free_cluster = refblock_start; + } + refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, + nb_clusters, &first_free_cluster); + if (refblock_offset < 0) { + fprintf(stderr, "ERROR allocating refblock: %s\n", + strerror(-refblock_offset)); + res->check_errors++; + ret = refblock_offset; + goto fail; + } + + if (reftable_size <= refblock_index) { + uint32_t old_reftable_size = reftable_size; + uint64_t *new_on_disk_reftable; + + reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, + s->cluster_size) / REFTABLE_ENTRY_SIZE; + new_on_disk_reftable = g_try_realloc(on_disk_reftable, + reftable_size * + REFTABLE_ENTRY_SIZE); + if (!new_on_disk_reftable) { + res->check_errors++; + ret = -ENOMEM; + goto fail; + } + on_disk_reftable = new_on_disk_reftable; + + memset(on_disk_reftable + old_reftable_size, 0, + (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); + + /* The offset we have for the reftable is now no longer valid; + * this will leak that range, but we can easily fix that by running + * a leak-fixing check after this rebuild operation */ + reftable_offset = -1; + } else { + assert(on_disk_reftable); + } + on_disk_reftable[refblock_index] = refblock_offset; + + /* If this is apparently the last refblock (for now), try to squeeze the + * reftable in */ + if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && + reftable_offset < 0) + { + uint64_t reftable_clusters = size_to_clusters(s, reftable_size * + REFTABLE_ENTRY_SIZE); + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { + fprintf(stderr, "ERROR allocating reftable: %s\n", + strerror(-reftable_offset)); + res->check_errors++; + ret = reftable_offset; + goto fail; + } + } + + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); + goto fail; + } + + /* The size of *refcount_table is always cluster-aligned, therefore the + * write operation will not overflow */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); + goto fail; + } + + /* Go to the end of this refblock */ + cluster = refblock_start + s->refcount_block_size - 1; + } + + if (reftable_offset < 0) { + uint64_t post_refblock_start, reftable_clusters; + + post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); + reftable_clusters = + size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); + /* Not pretty but simple */ + if (first_free_cluster < post_refblock_start) { + first_free_cluster = post_refblock_start; + } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { + fprintf(stderr, "ERROR allocating reftable: %s\n", + strerror(-reftable_offset)); + res->check_errors++; + ret = reftable_offset; + goto fail; + } + + goto write_refblocks; + } + + for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, + reftable_size * REFTABLE_ENTRY_SIZE, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + + assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_size * REFTABLE_ENTRY_SIZE); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = + cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { + fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); + goto fail; + } + + for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; + s->refcount_table_size = reftable_size; + update_max_refcount_table_index(s); + + return 0; + +fail: + g_free(on_disk_reftable); + return ret; +} + +/* + * Checks an image for refcount consistency. + * + * Returns 0 if no errors are found, the number of errors in case the image is + * detected as corrupted, and -errno when an internal error occurred. + */ +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + BdrvCheckResult pre_compare_res; + int64_t size, highest_cluster, nb_clusters; + void *refcount_table = NULL; + bool rebuild = false; + int ret; + + size = bdrv_getlength(bs->file->bs); + if (size < 0) { + res->check_errors++; + return size; + } + + nb_clusters = size_to_clusters(s, size); + if (nb_clusters > INT_MAX) { + res->check_errors++; + return -EFBIG; + } + + res->bfi.total_clusters = + size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); + + ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + /* In case we don't need to rebuild the refcount structure (but want to fix + * something), this function is immediately called again, in which case the + * result should be ignored */ + pre_compare_res = *res; + compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table, + nb_clusters); + + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + res->corruptions = 0; + res->leaks = 0; + + /* Because the old reftable has been exchanged for a new one the + * references have to be recalculated */ + rebuild = false; + memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters)); + ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + if (fix & BDRV_FIX_LEAKS) { + /* The old refcount structures are now leaked, fix it; the result + * can be ignored, aside from leaks which were introduced by + * rebuild_refcount_structure() that could not be fixed */ + BdrvCheckResult saved_res = *res; + *res = (BdrvCheckResult){ 0 }; + + compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild, + &highest_cluster, refcount_table, nb_clusters); + if (rebuild) { + fprintf(stderr, "ERROR rebuilt refcount structure is still " + "broken\n"); + } + + /* Any leaks accounted for here were introduced by + * rebuild_refcount_structure() because that function has created a + * new refcount structure from scratch */ + fresh_leaks = res->leaks; + *res = saved_res; + } + + if (res->corruptions < old_res.corruptions) { + res->corruptions_fixed += old_res.corruptions - res->corruptions; + } + if (res->leaks < old_res.leaks) { + res->leaks_fixed += old_res.leaks - res->leaks; + } + res->leaks += fresh_leaks; + } else if (fix) { + if (rebuild) { + fprintf(stderr, "ERROR need to rebuild refcount structures\n"); + res->check_errors++; + ret = -EIO; + goto fail; + } + + if (res->leaks || res->corruptions) { + *res = pre_compare_res; + compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, + refcount_table, nb_clusters); + } + } + + /* check OFLAG_COPIED */ + ret = check_oflag_copied(bs, res, fix); + if (ret < 0) { + goto fail; + } + + res->image_end_offset = (highest_cluster + 1) * s->cluster_size; + ret = 0; + +fail: + g_free(refcount_table); + + return ret; +} + +#define overlaps_with(ofs, sz) \ + ranges_overlap(offset, size, ofs, sz) + +/* + * Checks if the given offset into the image file is actually free to use by + * looking for overlaps with important metadata sections (L1/L2 tables etc.), + * i.e. a sanity check without relying on the refcount tables. + * + * The ign parameter specifies what checks not to perform (being a bitmask of + * QCow2MetadataOverlap values), i.e., what sections to ignore. + * + * Returns: + * - 0 if writing to this offset will not affect the mentioned metadata + * - a positive QCow2MetadataOverlap value indicating one overlapping section + * - a negative value (-errno) indicating an error while performing a check, + * e.g. when bdrv_pread failed on QCOW2_OL_INACTIVE_L2 + */ +int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, + int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int chk = s->overlap_check & ~ign; + int i, j; + + if (!size) { + return 0; + } + + if (chk & QCOW2_OL_MAIN_HEADER) { + if (offset < s->cluster_size) { + return QCOW2_OL_MAIN_HEADER; + } + } + + /* align range to test to cluster boundaries */ + size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size); + offset = start_of_cluster(s, offset); + + if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { + if (overlaps_with(s->l1_table_offset, s->l1_size * L1E_SIZE)) { + return QCOW2_OL_ACTIVE_L1; + } + } + + if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) { + if (overlaps_with(s->refcount_table_offset, + s->refcount_table_size * REFTABLE_ENTRY_SIZE)) { + return QCOW2_OL_REFCOUNT_TABLE; + } + } + + if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) { + if (overlaps_with(s->snapshots_offset, s->snapshots_size)) { + return QCOW2_OL_SNAPSHOT_TABLE; + } + } + + if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].l1_size && + overlaps_with(s->snapshots[i].l1_table_offset, + s->snapshots[i].l1_size * L1E_SIZE)) { + return QCOW2_OL_INACTIVE_L1; + } + } + } + + if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) { + for (i = 0; i < s->l1_size; i++) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) && + overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_ACTIVE_L2; + } + } + } + + if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) { + unsigned last_entry = s->max_refcount_table_index; + assert(last_entry < s->refcount_table_size); + assert(last_entry + 1 == s->refcount_table_size || + (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0); + for (i = 0; i <= last_entry; i++) { + if ((s->refcount_table[i] & REFT_OFFSET_MASK) && + overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_REFCOUNT_BLOCK; + } + } + } + + if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + uint64_t l1_ofs = s->snapshots[i].l1_table_offset; + uint32_t l1_sz = s->snapshots[i].l1_size; + uint64_t l1_sz2 = l1_sz * L1E_SIZE; + uint64_t *l1; + int ret; + + ret = qcow2_validate_table(bs, l1_ofs, l1_sz, L1E_SIZE, + QCOW_MAX_L1_SIZE, "", NULL); + if (ret < 0) { + return ret; + } + + l1 = g_try_malloc(l1_sz2); + + if (l1_sz2 && l1 == NULL) { + return -ENOMEM; + } + + ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2); + if (ret < 0) { + g_free(l1); + return ret; + } + + for (j = 0; j < l1_sz; j++) { + uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK; + if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) { + g_free(l1); + return QCOW2_OL_INACTIVE_L2; + } + } + + g_free(l1); + } + } + + if ((chk & QCOW2_OL_BITMAP_DIRECTORY) && + (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) + { + if (overlaps_with(s->bitmap_directory_offset, + s->bitmap_directory_size)) + { + return QCOW2_OL_BITMAP_DIRECTORY; + } + } + + return 0; +} + +static const char *metadata_ol_names[] = { + [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header", + [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table", + [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table", + [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table", + [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block", + [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table", + [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table", + [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table", + [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory", +}; +QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names)); + +/* + * First performs a check for metadata overlaps (through + * qcow2_check_metadata_overlap); if that fails with a negative value (error + * while performing a check), that value is returned. If an impending overlap + * is detected, the BDS will be made unusable, the qcow2 file marked corrupt + * and -EIO returned. + * + * Returns 0 if there were neither overlaps nor errors while checking for + * overlaps; or a negative value (-errno) on error. + */ +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, + int64_t size, bool data_file) +{ + int ret; + + if (data_file && has_data_file(bs)) { + return 0; + } + + ret = qcow2_check_metadata_overlap(bs, ign, offset, size); + if (ret < 0) { + return ret; + } else if (ret > 0) { + int metadata_ol_bitnr = ctz32(ret); + assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR); + + qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid " + "write on metadata (overlaps with %s)", + metadata_ol_names[metadata_ol_bitnr]); + return -EIO; + } + + return 0; +} + +/* A pointer to a function of this type is given to walk_over_reftable(). That + * function will create refblocks and pass them to a RefblockFinishOp once they + * are completed (@refblock). @refblock_empty is set if the refblock is + * completely empty. + * + * Along with the refblock, a corresponding reftable entry is passed, in the + * reftable @reftable (which may be reallocated) at @reftable_index. + * + * @allocated should be set to true if a new cluster has been allocated. + */ +typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, + bool *allocated, Error **errp); + +/** + * This "operation" for walk_over_reftable() allocates the refblock on disk (if + * it is not empty) and inserts its offset into the new reftable. The size of + * this new reftable is increased as required. + */ +static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + + if (!refblock_empty && reftable_index >= *reftable_size) { + uint64_t *new_reftable; + uint64_t new_reftable_size; + + new_reftable_size = ROUND_UP(reftable_index + 1, + s->cluster_size / REFTABLE_ENTRY_SIZE); + if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / REFTABLE_ENTRY_SIZE) { + error_setg(errp, + "This operation would make the refcount table grow " + "beyond the maximum size supported by QEMU, aborting"); + return -ENOTSUP; + } + + new_reftable = g_try_realloc(*reftable, new_reftable_size * + REFTABLE_ENTRY_SIZE); + if (!new_reftable) { + error_setg(errp, "Failed to increase reftable buffer size"); + return -ENOMEM; + } + + memset(new_reftable + *reftable_size, 0, + (new_reftable_size - *reftable_size) * REFTABLE_ENTRY_SIZE); + + *reftable = new_reftable; + *reftable_size = new_reftable_size; + } + + if (!refblock_empty && !(*reftable)[reftable_index]) { + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + error_setg_errno(errp, -offset, "Failed to allocate refblock"); + return offset; + } + (*reftable)[reftable_index] = offset; + *allocated = true; + } + + return 0; +} + +/** + * This "operation" for walk_over_reftable() writes the refblock to disk at the + * offset specified by the new reftable's entry. It does not modify the new + * reftable or change any refcounts. + */ +static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + int ret; + + if (reftable_index < *reftable_size && (*reftable)[reftable_index]) { + offset = (*reftable)[reftable_index]; + + ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size, + false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + return ret; + } + + ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write refblock"); + return ret; + } + } else { + assert(refblock_empty); + } + + return 0; +} + +/** + * This function walks over the existing reftable and every referenced refblock; + * if @new_set_refcount is non-NULL, it is called for every refcount entry to + * create an equal new entry in the passed @new_refblock. Once that + * @new_refblock is completely filled, @operation will be called. + * + * @status_cb and @cb_opaque are used for the amend operation's status callback. + * @index is the index of the walk_over_reftable() calls and @total is the total + * number of walk_over_reftable() calls per amend operation. Both are used for + * calculating the parameters for the status callback. + * + * @allocated is set to true if a new cluster has been allocated. + */ +static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, + uint64_t *new_reftable_index, + uint64_t *new_reftable_size, + void *new_refblock, int new_refblock_size, + int new_refcount_bits, + RefblockFinishOp *operation, bool *allocated, + Qcow2SetRefcountFunc *new_set_refcount, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, int index, int total, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t reftable_index; + bool new_refblock_empty = true; + int refblock_index; + int new_refblock_index = 0; + int ret; + + for (reftable_index = 0; reftable_index < s->refcount_table_size; + reftable_index++) + { + uint64_t refblock_offset = s->refcount_table[reftable_index] + & REFT_OFFSET_MASK; + + status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + if (refblock_offset) { + void *refblock; + + if (offset_into_cluster(s, refblock_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" + PRIx64 " unaligned (reftable index: %#" + PRIx64 ")", refblock_offset, + reftable_index); + error_setg(errp, + "Image is corrupt (unaligned refblock offset)"); + return -EIO; + } + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset, + &refblock); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to retrieve refblock"); + return ret; + } + + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + uint64_t refcount; + + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + qcow2_cache_put(s->refcount_block_cache, &refblock); + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + refcount = s->get_refcount(refblock, refblock_index); + if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { + uint64_t offset; + + qcow2_cache_put(s->refcount_block_cache, &refblock); + + offset = ((reftable_index << s->refcount_block_bits) + + refblock_index) << s->cluster_bits; + + error_setg(errp, "Cannot decrease refcount entry width to " + "%i bits: Cluster at offset %#" PRIx64 " has a " + "refcount of %" PRIu64, new_refcount_bits, + offset, refcount); + return -EINVAL; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, + refcount); + } else { + new_refblock_index++; + } + new_refblock_empty = new_refblock_empty && refcount == 0; + } + + qcow2_cache_put(s->refcount_block_cache, &refblock); + } else { + /* No refblock means every refcount is 0 */ + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, 0); + } else { + new_refblock_index++; + } + } + } + } + + if (new_refblock_index > 0) { + /* Complete the potentially existing partially filled final refblock */ + if (new_set_refcount) { + for (; new_refblock_index < new_refblock_size; + new_refblock_index++) + { + new_set_refcount(new_refblock, new_refblock_index, 0); + } + } + + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, new_refblock_empty, + allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + } + + status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + return 0; +} + +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2GetRefcountFunc *new_get_refcount; + Qcow2SetRefcountFunc *new_set_refcount; + void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size); + uint64_t *new_reftable = NULL, new_reftable_size = 0; + uint64_t *old_reftable, old_reftable_size, old_reftable_offset; + uint64_t new_reftable_index = 0; + uint64_t i; + int64_t new_reftable_offset = 0, allocated_reftable_size = 0; + int new_refblock_size, new_refcount_bits = 1 << refcount_order; + int old_refcount_order; + int walk_index = 0; + int ret; + bool new_allocation; + + assert(s->qcow_version >= 3); + assert(refcount_order >= 0 && refcount_order <= 6); + + /* see qcow2_open() */ + new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3)); + + new_get_refcount = get_refcount_funcs[refcount_order]; + new_set_refcount = set_refcount_funcs[refcount_order]; + + + do { + int total_walks; + + new_allocation = false; + + /* At least we have to do this walk and the one which writes the + * refblocks; also, at least we have to do this loop here at least + * twice (normally), first to do the allocations, and second to + * determine that everything is correctly allocated, this then makes + * three walks in total */ + total_walks = MAX(walk_index + 2, 3); + + /* First, allocate the structures so they are present in the refcount + * structures */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, NULL, new_refblock_size, + new_refcount_bits, &alloc_refblock, + &new_allocation, NULL, status_cb, cb_opaque, + walk_index++, total_walks, errp); + if (ret < 0) { + goto done; + } + + new_reftable_index = 0; + + if (new_allocation) { + if (new_reftable_offset) { + qcow2_free_clusters( + bs, new_reftable_offset, + allocated_reftable_size * REFTABLE_ENTRY_SIZE, + QCOW2_DISCARD_NEVER); + } + + new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size * + REFTABLE_ENTRY_SIZE); + if (new_reftable_offset < 0) { + error_setg_errno(errp, -new_reftable_offset, + "Failed to allocate the new reftable"); + ret = new_reftable_offset; + goto done; + } + allocated_reftable_size = new_reftable_size; + } + } while (new_allocation); + + /* Second, write the new refblocks */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, new_refblock, + new_refblock_size, new_refcount_bits, + &flush_refblock, &new_allocation, new_set_refcount, + status_cb, cb_opaque, walk_index, walk_index + 1, + errp); + if (ret < 0) { + goto done; + } + assert(!new_allocation); + + + /* Write the new reftable */ + ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset, + new_reftable_size * REFTABLE_ENTRY_SIZE, + false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + goto done; + } + + for (i = 0; i < new_reftable_size; i++) { + cpu_to_be64s(&new_reftable[i]); + } + + ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable, + new_reftable_size * REFTABLE_ENTRY_SIZE); + + for (i = 0; i < new_reftable_size; i++) { + be64_to_cpus(&new_reftable[i]); + } + + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write the new reftable"); + goto done; + } + + + /* Empty the refcount cache */ + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to flush the refblock cache"); + goto done; + } + + /* Update the image header to point to the new reftable; this only updates + * the fields which are relevant to qcow2_update_header(); other fields + * such as s->refcount_table or s->refcount_bits stay stale for now + * (because we have to restore everything if qcow2_update_header() fails) */ + old_refcount_order = s->refcount_order; + old_reftable_size = s->refcount_table_size; + old_reftable_offset = s->refcount_table_offset; + + s->refcount_order = refcount_order; + s->refcount_table_size = new_reftable_size; + s->refcount_table_offset = new_reftable_offset; + + ret = qcow2_update_header(bs); + if (ret < 0) { + s->refcount_order = old_refcount_order; + s->refcount_table_size = old_reftable_size; + s->refcount_table_offset = old_reftable_offset; + error_setg_errno(errp, -ret, "Failed to update the qcow2 header"); + goto done; + } + + /* Now update the rest of the in-memory information */ + old_reftable = s->refcount_table; + s->refcount_table = new_reftable; + update_max_refcount_table_index(s); + + s->refcount_bits = 1 << refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; + + s->refcount_block_bits = s->cluster_bits - (refcount_order - 3); + s->refcount_block_size = 1 << s->refcount_block_bits; + + s->get_refcount = new_get_refcount; + s->set_refcount = new_set_refcount; + + /* For cleaning up all old refblocks and the old reftable below the "done" + * label */ + new_reftable = old_reftable; + new_reftable_size = old_reftable_size; + new_reftable_offset = old_reftable_offset; + +done: + if (new_reftable) { + /* On success, new_reftable actually points to the old reftable (and + * new_reftable_size is the old reftable's size); but that is just + * fine */ + for (i = 0; i < new_reftable_size; i++) { + uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK; + if (offset) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + } + } + g_free(new_reftable); + + if (new_reftable_offset > 0) { + qcow2_free_clusters(bs, new_reftable_offset, + new_reftable_size * REFTABLE_ENTRY_SIZE, + QCOW2_DISCARD_OTHER); + } + } + + qemu_vfree(new_refblock); + return ret; +} + +static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset) +{ + BDRVQcow2State *s = bs->opaque; + uint32_t index = offset_to_reftable_index(s, offset); + int64_t covering_refblock_offset = 0; + + if (index < s->refcount_table_size) { + covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK; + } + if (!covering_refblock_offset) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is " + "not covered by the refcount structures", + offset); + return -EIO; + } + + return covering_refblock_offset; +} + +static int qcow2_discard_refcount_block(BlockDriverState *bs, + uint64_t discard_block_offs) +{ + BDRVQcow2State *s = bs->opaque; + int64_t refblock_offs; + uint64_t cluster_index = discard_block_offs >> s->cluster_bits; + uint32_t block_index = cluster_index & (s->refcount_block_size - 1); + void *refblock; + int ret; + + refblock_offs = get_refblock_offset(bs, discard_block_offs); + if (refblock_offs < 0) { + return refblock_offs; + } + + assert(discard_block_offs != 0); + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + return ret; + } + + if (s->get_refcount(refblock, block_index) != 1) { + qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:" + " refblock offset %#" PRIx64 + ", reftable index %u" + ", block offset %#" PRIx64 + ", refcount %#" PRIx64, + refblock_offs, + offset_to_reftable_index(s, discard_block_offs), + discard_block_offs, + s->get_refcount(refblock, block_index)); + qcow2_cache_put(s->refcount_block_cache, &refblock); + return -EINVAL; + } + s->set_refcount(refblock, block_index, 0); + + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock); + + qcow2_cache_put(s->refcount_block_cache, &refblock); + + if (cluster_index < s->free_cluster_index) { + s->free_cluster_index = cluster_index; + } + + refblock = qcow2_cache_is_table_offset(s->refcount_block_cache, + discard_block_offs); + if (refblock) { + /* discard refblock from the cache if refblock is cached */ + qcow2_cache_discard(s->refcount_block_cache, refblock); + } + update_refcount_discard(bs, discard_block_offs, s->cluster_size); + + return 0; +} + +int qcow2_shrink_reftable(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *reftable_tmp = + g_malloc(s->refcount_table_size * REFTABLE_ENTRY_SIZE); + int i, ret; + + for (i = 0; i < s->refcount_table_size; i++) { + int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK; + void *refblock; + bool unused_block; + + if (refblock_offs == 0) { + reftable_tmp[i] = 0; + continue; + } + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + goto out; + } + + /* the refblock has own reference */ + if (i == offset_to_reftable_index(s, refblock_offs)) { + uint64_t block_index = (refblock_offs >> s->cluster_bits) & + (s->refcount_block_size - 1); + uint64_t refcount = s->get_refcount(refblock, block_index); + + s->set_refcount(refblock, block_index, 0); + + unused_block = buffer_is_zero(refblock, s->cluster_size); + + s->set_refcount(refblock, block_index, refcount); + } else { + unused_block = buffer_is_zero(refblock, s->cluster_size); + } + qcow2_cache_put(s->refcount_block_cache, &refblock); + + reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); + } + + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp, + s->refcount_table_size * REFTABLE_ENTRY_SIZE); + /* + * If the write in the reftable failed the image may contain a partially + * overwritten reftable. In this case it would be better to clear the + * reftable in memory to avoid possible image corruption. + */ + for (i = 0; i < s->refcount_table_size; i++) { + if (s->refcount_table[i] && !reftable_tmp[i]) { + if (ret == 0) { + ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] & + REFT_OFFSET_MASK); + } + s->refcount_table[i] = 0; + } + } + + if (!s->cache_discards) { + qcow2_process_discards(bs, ret); + } + +out: + g_free(reftable_tmp); + return ret; +} + +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + + for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + return ret; + } + if (refcount > 0) { + return i; + } + } + qcow2_signal_corruption(bs, true, -1, -1, + "There are no references in the refcount table."); + return -EIO; +} + +int qcow2_detect_metadata_preallocation(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i, end_cluster, cluster_count = 0, threshold; + int64_t file_length, real_allocation, real_clusters; + + qemu_co_mutex_assert_locked(&s->lock); + + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + return file_length; + } + + real_allocation = bdrv_get_allocated_file_size(bs->file->bs); + if (real_allocation < 0) { + return real_allocation; + } + + real_clusters = real_allocation / s->cluster_size; + threshold = MAX(real_clusters * 10 / 9, real_clusters + 2); + + end_cluster = size_to_clusters(s, file_length); + for (i = 0; i < end_cluster && cluster_count < threshold; i++) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + return ret; + } + cluster_count += !!refcount; + } + + return cluster_count >= threshold; +} diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c new file mode 100644 index 000000000..2e98c7f4b --- /dev/null +++ b/block/qcow2-snapshot.c @@ -0,0 +1,1073 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" +#include "qcow2.h" +#include "qemu/bswap.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" + +static void qcow2_free_single_snapshot(BlockDriverState *bs, int i) +{ + BDRVQcow2State *s = bs->opaque; + + assert(i >= 0 && i < s->nb_snapshots); + g_free(s->snapshots[i].name); + g_free(s->snapshots[i].id_str); + g_free(s->snapshots[i].unknown_extra_data); + memset(&s->snapshots[i], 0, sizeof(s->snapshots[i])); +} + +void qcow2_free_snapshots(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + for(i = 0; i < s->nb_snapshots; i++) { + qcow2_free_single_snapshot(bs, i); + } + g_free(s->snapshots); + s->snapshots = NULL; + s->nb_snapshots = 0; +} + +/* + * If @repair is true, try to repair a broken snapshot table instead + * of just returning an error: + * + * - If the snapshot table was too long, set *nb_clusters_reduced to + * the number of snapshots removed off the end. + * The caller will update the on-disk nb_snapshots accordingly; + * this leaks clusters, but is safe. + * (The on-disk information must be updated before + * qcow2_check_refcounts(), because that function relies on + * s->nb_snapshots to reflect the on-disk value.) + * + * - If there were snapshots with too much extra metadata, increment + * *extra_data_dropped for each. + * This requires the caller to eventually rewrite the whole snapshot + * table, which requires cluster allocation. Therefore, this should + * be done only after qcow2_check_refcounts() made sure the refcount + * structures are valid. + * (In the meantime, the image is still valid because + * qcow2_check_refcounts() does not do anything with snapshots' + * extra data.) + */ +static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, + int *nb_clusters_reduced, + int *extra_data_dropped, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshotHeader h; + QCowSnapshotExtraData extra; + QCowSnapshot *sn; + int i, id_str_size, name_size; + int64_t offset, pre_sn_offset; + uint64_t table_length = 0; + int ret; + + if (!s->nb_snapshots) { + s->snapshots = NULL; + s->snapshots_size = 0; + return 0; + } + + offset = s->snapshots_offset; + s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots); + + for(i = 0; i < s->nb_snapshots; i++) { + bool truncate_unknown_extra_data = false; + + pre_sn_offset = offset; + table_length = ROUND_UP(table_length, 8); + + /* Read statically sized part of the snapshot header */ + offset = ROUND_UP(offset, 8); + ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); + goto fail; + } + + offset += sizeof(h); + sn = s->snapshots + i; + sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); + sn->l1_size = be32_to_cpu(h.l1_size); + sn->vm_state_size = be32_to_cpu(h.vm_state_size); + sn->date_sec = be32_to_cpu(h.date_sec); + sn->date_nsec = be32_to_cpu(h.date_nsec); + sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); + sn->extra_data_size = be32_to_cpu(h.extra_data_size); + + id_str_size = be16_to_cpu(h.id_str_size); + name_size = be16_to_cpu(h.name_size); + + if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) { + if (!repair) { + ret = -EFBIG; + error_setg(errp, "Too much extra metadata in snapshot table " + "entry %i", i); + error_append_hint(errp, "You can force-remove this extra " + "metadata with qemu-img check -r all\n"); + goto fail; + } + + fprintf(stderr, "Discarding too much extra metadata in snapshot " + "table entry %i (%" PRIu32 " > %u)\n", + i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA); + + (*extra_data_dropped)++; + truncate_unknown_extra_data = true; + } + + /* Read known extra data */ + ret = bdrv_pread(bs->file, offset, &extra, + MIN(sizeof(extra), sn->extra_data_size)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); + goto fail; + } + offset += MIN(sizeof(extra), sn->extra_data_size); + + if (sn->extra_data_size >= endof(QCowSnapshotExtraData, + vm_state_size_large)) { + sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large); + } + + if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) { + sn->disk_size = be64_to_cpu(extra.disk_size); + } else { + sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + } + + if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) { + sn->icount = be64_to_cpu(extra.icount); + } else { + sn->icount = -1ULL; + } + + if (sn->extra_data_size > sizeof(extra)) { + uint64_t extra_data_end; + size_t unknown_extra_data_size; + + extra_data_end = offset + sn->extra_data_size - sizeof(extra); + + if (truncate_unknown_extra_data) { + sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA; + } + + /* Store unknown extra data */ + unknown_extra_data_size = sn->extra_data_size - sizeof(extra); + sn->unknown_extra_data = g_malloc(unknown_extra_data_size); + ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data, + unknown_extra_data_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to read snapshot table"); + goto fail; + } + offset = extra_data_end; + } + + /* Read snapshot ID */ + sn->id_str = g_malloc(id_str_size + 1); + ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); + goto fail; + } + offset += id_str_size; + sn->id_str[id_str_size] = '\0'; + + /* Read snapshot name */ + sn->name = g_malloc(name_size + 1); + ret = bdrv_pread(bs->file, offset, sn->name, name_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); + goto fail; + } + offset += name_size; + sn->name[name_size] = '\0'; + + /* Note that the extra data may have been truncated */ + table_length += sizeof(h) + sn->extra_data_size + id_str_size + + name_size; + if (!repair) { + assert(table_length == offset - s->snapshots_offset); + } + + if (table_length > QCOW_MAX_SNAPSHOTS_SIZE || + offset - s->snapshots_offset > INT_MAX) + { + if (!repair) { + ret = -EFBIG; + error_setg(errp, "Snapshot table is too big"); + error_append_hint(errp, "You can force-remove all %u " + "overhanging snapshots with qemu-img check " + "-r all\n", s->nb_snapshots - i); + goto fail; + } + + fprintf(stderr, "Discarding %u overhanging snapshots (snapshot " + "table is too big)\n", s->nb_snapshots - i); + + *nb_clusters_reduced += (s->nb_snapshots - i); + + /* Discard current snapshot also */ + qcow2_free_single_snapshot(bs, i); + + /* + * This leaks all the rest of the snapshot table and the + * snapshots' clusters, but we run in check -r all mode, + * so qcow2_check_refcounts() will take care of it. + */ + s->nb_snapshots = i; + offset = pre_sn_offset; + break; + } + } + + assert(offset - s->snapshots_offset <= INT_MAX); + s->snapshots_size = offset - s->snapshots_offset; + return 0; + +fail: + qcow2_free_snapshots(bs); + return ret; +} + +int qcow2_read_snapshots(BlockDriverState *bs, Error **errp) +{ + return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp); +} + +/* add at the end of the file a new list of snapshots */ +int qcow2_write_snapshots(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshot *sn; + QCowSnapshotHeader h; + QCowSnapshotExtraData extra; + int i, name_size, id_str_size, snapshots_size; + struct { + uint32_t nb_snapshots; + uint64_t snapshots_offset; + } QEMU_PACKED header_data; + int64_t offset, snapshots_offset = 0; + int ret; + + /* compute the size of the snapshots */ + offset = 0; + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + offset = ROUND_UP(offset, 8); + offset += sizeof(h); + offset += MAX(sizeof(extra), sn->extra_data_size); + offset += strlen(sn->id_str); + offset += strlen(sn->name); + + if (offset > QCOW_MAX_SNAPSHOTS_SIZE) { + ret = -EFBIG; + goto fail; + } + } + + assert(offset <= INT_MAX); + snapshots_size = offset; + + /* Allocate space for the new snapshot list */ + snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); + offset = snapshots_offset; + if (offset < 0) { + ret = offset; + goto fail; + } + ret = bdrv_flush(bs); + if (ret < 0) { + goto fail; + } + + /* The snapshot list position has not yet been updated, so these clusters + * must indeed be completely free */ + ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false); + if (ret < 0) { + goto fail; + } + + + /* Write all snapshots to the new list */ + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + memset(&h, 0, sizeof(h)); + h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); + h.l1_size = cpu_to_be32(sn->l1_size); + /* If it doesn't fit in 32 bit, older implementations should treat it + * as a disk-only snapshot rather than truncate the VM state */ + if (sn->vm_state_size <= 0xffffffff) { + h.vm_state_size = cpu_to_be32(sn->vm_state_size); + } + h.date_sec = cpu_to_be32(sn->date_sec); + h.date_nsec = cpu_to_be32(sn->date_nsec); + h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); + h.extra_data_size = cpu_to_be32(MAX(sizeof(extra), + sn->extra_data_size)); + + memset(&extra, 0, sizeof(extra)); + extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size); + extra.disk_size = cpu_to_be64(sn->disk_size); + extra.icount = cpu_to_be64(sn->icount); + + id_str_size = strlen(sn->id_str); + name_size = strlen(sn->name); + assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX); + h.id_str_size = cpu_to_be16(id_str_size); + h.name_size = cpu_to_be16(name_size); + offset = ROUND_UP(offset, 8); + + ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); + if (ret < 0) { + goto fail; + } + offset += sizeof(h); + + ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra)); + if (ret < 0) { + goto fail; + } + offset += sizeof(extra); + + if (sn->extra_data_size > sizeof(extra)) { + size_t unknown_extra_data_size = + sn->extra_data_size - sizeof(extra); + + /* qcow2_read_snapshots() ensures no unbounded allocation */ + assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES); + assert(sn->unknown_extra_data); + + ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data, + unknown_extra_data_size); + if (ret < 0) { + goto fail; + } + offset += unknown_extra_data_size; + } + + ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); + if (ret < 0) { + goto fail; + } + offset += id_str_size; + + ret = bdrv_pwrite(bs->file, offset, sn->name, name_size); + if (ret < 0) { + goto fail; + } + offset += name_size; + } + + /* + * Update the header to point to the new snapshot table. This requires the + * new table and its refcounts to be stable on disk. + */ + ret = bdrv_flush(bs); + if (ret < 0) { + goto fail; + } + + QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) != + endof(QCowHeader, nb_snapshots)); + + header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots); + header_data.snapshots_offset = cpu_to_be64(snapshots_offset); + + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + &header_data, sizeof(header_data)); + if (ret < 0) { + goto fail; + } + + /* free the old snapshot table */ + qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size, + QCOW2_DISCARD_SNAPSHOT); + s->snapshots_offset = snapshots_offset; + s->snapshots_size = snapshots_size; + return 0; + +fail: + if (snapshots_offset > 0) { + qcow2_free_clusters(bs, snapshots_offset, snapshots_size, + QCOW2_DISCARD_ALWAYS); + } + return ret; +} + +int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + Error *local_err = NULL; + int nb_clusters_reduced = 0; + int extra_data_dropped = 0; + int ret; + struct { + uint32_t nb_snapshots; + uint64_t snapshots_offset; + } QEMU_PACKED snapshot_table_pointer; + + /* qcow2_do_open() discards this information in check mode */ + ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots), + &snapshot_table_pointer, sizeof(snapshot_table_pointer)); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to read the snapshot table pointer from " + "the image header: %s\n", strerror(-ret)); + return ret; + } + + s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset); + s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots); + + if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) { + fprintf(stderr, "Discarding %u overhanging snapshots\n", + s->nb_snapshots - QCOW_MAX_SNAPSHOTS); + + nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS; + s->nb_snapshots = QCOW_MAX_SNAPSHOTS; + } + + ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots, + sizeof(QCowSnapshotHeader), + sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, + "snapshot table", &local_err); + if (ret < 0) { + result->check_errors++; + error_reportf_err(local_err, "ERROR "); + + if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) { + fprintf(stderr, "You can force-remove all %u overhanging snapshots " + "with qemu-img check -r all\n", + s->nb_snapshots - QCOW_MAX_SNAPSHOTS); + } + + /* We did not read the snapshot table, so invalidate this information */ + s->snapshots_offset = 0; + s->nb_snapshots = 0; + + return ret; + } + + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS, + &nb_clusters_reduced, &extra_data_dropped, + &local_err); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + result->check_errors++; + error_reportf_err(local_err, + "ERROR failed to read the snapshot table: "); + + /* We did not read the snapshot table, so invalidate this information */ + s->snapshots_offset = 0; + s->nb_snapshots = 0; + + return ret; + } + result->corruptions += nb_clusters_reduced + extra_data_dropped; + + if (nb_clusters_reduced) { + /* + * Update image header now, because: + * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be + * the same as what the image header says, + * (2) this leaks clusters, but qcow2_check_refcounts() will + * fix that. + */ + assert(fix & BDRV_FIX_ERRORS); + + snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + &snapshot_table_pointer.nb_snapshots, + sizeof(snapshot_table_pointer.nb_snapshots)); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to update the snapshot count in the " + "image header: %s\n", strerror(-ret)); + return ret; + } + + result->corruptions_fixed += nb_clusters_reduced; + result->corruptions -= nb_clusters_reduced; + } + + /* + * All of v3 images' snapshot table entries need to have at least + * 16 bytes of extra data. + */ + if (s->qcow_version >= 3) { + int i; + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].extra_data_size < + sizeof_field(QCowSnapshotExtraData, vm_state_size_large) + + sizeof_field(QCowSnapshotExtraData, disk_size)) + { + result->corruptions++; + fprintf(stderr, "%s snapshot table entry %i is incomplete\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + } + } + } + + return 0; +} + +int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + if (result->corruptions && (fix & BDRV_FIX_ERRORS)) { + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_write_snapshots(bs); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to update snapshot table: %s\n", + strerror(-ret)); + return ret; + } + + result->corruptions_fixed += result->corruptions; + result->corruptions = 0; + } + + return 0; +} + +static void find_new_snapshot_id(BlockDriverState *bs, + char *id_str, int id_str_size) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshot *sn; + int i; + unsigned long id, id_max = 0; + + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + id = strtoul(sn->id_str, NULL, 10); + if (id > id_max) + id_max = id; + } + snprintf(id_str, id_str_size, "%lu", id_max + 1); +} + +static int find_snapshot_by_id_and_name(BlockDriverState *bs, + const char *id, + const char *name) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + if (id && name) { + for (i = 0; i < s->nb_snapshots; i++) { + if (!strcmp(s->snapshots[i].id_str, id) && + !strcmp(s->snapshots[i].name, name)) { + return i; + } + } + } else if (id) { + for (i = 0; i < s->nb_snapshots; i++) { + if (!strcmp(s->snapshots[i].id_str, id)) { + return i; + } + } + } else if (name) { + for (i = 0; i < s->nb_snapshots; i++) { + if (!strcmp(s->snapshots[i].name, name)) { + return i; + } + } + } + + return -1; +} + +static int find_snapshot_by_id_or_name(BlockDriverState *bs, + const char *id_or_name) +{ + int ret; + + ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL); + if (ret >= 0) { + return ret; + } + return find_snapshot_by_id_and_name(bs, NULL, id_or_name); +} + +/* if no id is provided, a new one is constructed */ +int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshot *new_snapshot_list = NULL; + QCowSnapshot *old_snapshot_list = NULL; + QCowSnapshot sn1, *sn = &sn1; + int i, ret; + uint64_t *l1_table = NULL; + int64_t l1_table_offset; + + if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) { + return -EFBIG; + } + + if (has_data_file(bs)) { + return -ENOTSUP; + } + + memset(sn, 0, sizeof(*sn)); + + /* Generate an ID */ + find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); + + /* Populate sn with passed data */ + sn->id_str = g_strdup(sn_info->id_str); + sn->name = g_strdup(sn_info->name); + + sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + sn->vm_state_size = sn_info->vm_state_size; + sn->date_sec = sn_info->date_sec; + sn->date_nsec = sn_info->date_nsec; + sn->vm_clock_nsec = sn_info->vm_clock_nsec; + sn->icount = sn_info->icount; + sn->extra_data_size = sizeof(QCowSnapshotExtraData); + + /* Allocate the L1 table of the snapshot and copy the current one there. */ + l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE); + if (l1_table_offset < 0) { + ret = l1_table_offset; + goto fail; + } + + sn->l1_table_offset = l1_table_offset; + sn->l1_size = s->l1_size; + + l1_table = g_try_new(uint64_t, s->l1_size); + if (s->l1_size && l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } + + for(i = 0; i < s->l1_size; i++) { + l1_table[i] = cpu_to_be64(s->l1_table[i]); + } + + ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset, + s->l1_size * L1E_SIZE, false); + if (ret < 0) { + goto fail; + } + + ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, + s->l1_size * L1E_SIZE); + if (ret < 0) { + goto fail; + } + + g_free(l1_table); + l1_table = NULL; + + /* + * Increase the refcounts of all clusters and make sure everything is + * stable on disk before updating the snapshot table to contain a pointer + * to the new L1 table. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); + if (ret < 0) { + goto fail; + } + + /* Append the new snapshot to the snapshot list */ + new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1); + if (s->snapshots) { + memcpy(new_snapshot_list, s->snapshots, + s->nb_snapshots * sizeof(QCowSnapshot)); + old_snapshot_list = s->snapshots; + } + s->snapshots = new_snapshot_list; + s->snapshots[s->nb_snapshots++] = *sn; + + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + g_free(s->snapshots); + s->snapshots = old_snapshot_list; + s->nb_snapshots--; + goto fail; + } + + g_free(old_snapshot_list); + + /* The VM state isn't needed any more in the active L1 table; in fact, it + * hurts by causing expensive COW for the next snapshot. */ + qcow2_cluster_discard(bs, qcow2_vm_state_offset(s), + ROUND_UP(sn->vm_state_size, s->cluster_size), + QCOW2_DISCARD_NEVER, false); + +#ifdef DEBUG_ALLOC + { + BdrvCheckResult result = {0}; + qcow2_check_refcounts(bs, &result, 0); + } +#endif + return 0; + +fail: + g_free(sn->id_str); + g_free(sn->name); + g_free(l1_table); + + return ret; +} + +/* copy the snapshot 'snapshot_name' into the current disk image */ +int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshot *sn; + Error *local_err = NULL; + int i, snapshot_index; + int cur_l1_bytes, sn_l1_bytes; + int ret; + uint64_t *sn_l1_table = NULL; + + if (has_data_file(bs)) { + return -ENOTSUP; + } + + /* Search the snapshot */ + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); + if (snapshot_index < 0) { + return -ENOENT; + } + sn = &s->snapshots[snapshot_index]; + + ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size, + L1E_SIZE, QCOW_MAX_L1_SIZE, + "Snapshot L1 table", &local_err); + if (ret < 0) { + error_report_err(local_err); + goto fail; + } + + if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { + BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, + &local_err); + if (!blk) { + error_report_err(local_err); + ret = -ENOTSUP; + goto fail; + } + + ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0, + &local_err); + blk_unref(blk); + if (ret < 0) { + error_report_err(local_err); + goto fail; + } + } + + /* + * Make sure that the current L1 table is big enough to contain the whole + * L1 table of the snapshot. If the snapshot L1 table is smaller, the + * current one must be padded with zeros. + */ + ret = qcow2_grow_l1_table(bs, sn->l1_size, true); + if (ret < 0) { + goto fail; + } + + cur_l1_bytes = s->l1_size * L1E_SIZE; + sn_l1_bytes = sn->l1_size * L1E_SIZE; + + /* + * Copy the snapshot L1 table to the current L1 table. + * + * Before overwriting the old current L1 table on disk, make sure to + * increase all refcounts for the clusters referenced by the new one. + * Decrease the refcount referenced by the old one only when the L1 + * table is overwritten. + */ + sn_l1_table = g_try_malloc0(cur_l1_bytes); + if (cur_l1_bytes && sn_l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } + + ret = bdrv_pread(bs->file, sn->l1_table_offset, + sn_l1_table, sn_l1_bytes); + if (ret < 0) { + goto fail; + } + + ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, + sn->l1_size, 1); + if (ret < 0) { + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, + s->l1_table_offset, cur_l1_bytes, + false); + if (ret < 0) { + goto fail; + } + + ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, + cur_l1_bytes); + if (ret < 0) { + goto fail; + } + + /* + * Decrease refcount of clusters of current L1 table. + * + * At this point, the in-memory s->l1_table points to the old L1 table, + * whereas on disk we already have the new one. + * + * qcow2_update_snapshot_refcount special cases the current L1 table to use + * the in-memory data instead of really using the offset to load a new one, + * which is why this works. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, + s->l1_size, -1); + + /* + * Now update the in-memory L1 table to be in sync with the on-disk one. We + * need to do this even if updating refcounts failed. + */ + for(i = 0;i < s->l1_size; i++) { + s->l1_table[i] = be64_to_cpu(sn_l1_table[i]); + } + + if (ret < 0) { + goto fail; + } + + g_free(sn_l1_table); + sn_l1_table = NULL; + + /* + * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed + * when we decreased the refcount of the old snapshot. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); + if (ret < 0) { + goto fail; + } + +#ifdef DEBUG_ALLOC + { + BdrvCheckResult result = {0}; + qcow2_check_refcounts(bs, &result, 0); + } +#endif + return 0; + +fail: + g_free(sn_l1_table); + return ret; +} + +int qcow2_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCowSnapshot sn; + int snapshot_index, ret; + + if (has_data_file(bs)) { + return -ENOTSUP; + } + + /* Search the snapshot */ + snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); + if (snapshot_index < 0) { + error_setg(errp, "Can't find the snapshot"); + return -ENOENT; + } + sn = s->snapshots[snapshot_index]; + + ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size, + L1E_SIZE, QCOW_MAX_L1_SIZE, + "Snapshot L1 table", errp); + if (ret < 0) { + return ret; + } + + /* Remove it from the snapshot list */ + memmove(s->snapshots + snapshot_index, + s->snapshots + snapshot_index + 1, + (s->nb_snapshots - snapshot_index - 1) * sizeof(sn)); + s->nb_snapshots--; + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to remove snapshot from snapshot list"); + return ret; + } + + /* + * The snapshot is now unused, clean up. If we fail after this point, we + * won't recover but just leak clusters. + */ + g_free(sn.unknown_extra_data); + g_free(sn.id_str); + g_free(sn.name); + + /* + * Now decrease the refcounts of clusters referenced by the snapshot and + * free the L1 table. + */ + ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset, + sn.l1_size, -1); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table"); + return ret; + } + qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE, + QCOW2_DISCARD_SNAPSHOT); + + /* must update the copied flag on the current cluster offsets */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to update snapshot status in disk"); + return ret; + } + +#ifdef DEBUG_ALLOC + { + BdrvCheckResult result = {0}; + qcow2_check_refcounts(bs, &result, 0); + } +#endif + return 0; +} + +int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) +{ + BDRVQcow2State *s = bs->opaque; + QEMUSnapshotInfo *sn_tab, *sn_info; + QCowSnapshot *sn; + int i; + + if (has_data_file(bs)) { + return -ENOTSUP; + } + if (!s->nb_snapshots) { + *psn_tab = NULL; + return s->nb_snapshots; + } + + sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots); + for(i = 0; i < s->nb_snapshots; i++) { + sn_info = sn_tab + i; + sn = s->snapshots + i; + pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), + sn->id_str); + pstrcpy(sn_info->name, sizeof(sn_info->name), + sn->name); + sn_info->vm_state_size = sn->vm_state_size; + sn_info->date_sec = sn->date_sec; + sn_info->date_nsec = sn->date_nsec; + sn_info->vm_clock_nsec = sn->vm_clock_nsec; + sn_info->icount = sn->icount; + } + *psn_tab = sn_tab; + return s->nb_snapshots; +} + +int qcow2_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp) +{ + int i, snapshot_index; + BDRVQcow2State *s = bs->opaque; + QCowSnapshot *sn; + uint64_t *new_l1_table; + int new_l1_bytes; + int ret; + + assert(bs->read_only); + + /* Search the snapshot */ + snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); + if (snapshot_index < 0) { + error_setg(errp, + "Can't find snapshot"); + return -ENOENT; + } + sn = &s->snapshots[snapshot_index]; + + /* Allocate and read in the snapshot's L1 table */ + ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size, + L1E_SIZE, QCOW_MAX_L1_SIZE, + "Snapshot L1 table", errp); + if (ret < 0) { + return ret; + } + new_l1_bytes = sn->l1_size * L1E_SIZE; + new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes); + if (new_l1_table == NULL) { + return -ENOMEM; + } + + ret = bdrv_pread(bs->file, sn->l1_table_offset, + new_l1_table, new_l1_bytes); + if (ret < 0) { + error_setg(errp, "Failed to read l1 table for snapshot"); + qemu_vfree(new_l1_table); + return ret; + } + + /* Switch the L1 table */ + qemu_vfree(s->l1_table); + + s->l1_size = sn->l1_size; + s->l1_table_offset = sn->l1_table_offset; + s->l1_table = new_l1_table; + + for(i = 0;i < s->l1_size; i++) { + be64_to_cpus(&s->l1_table[i]); + } + + return 0; +} diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c new file mode 100644 index 000000000..1914baf45 --- /dev/null +++ b/block/qcow2-threads.c @@ -0,0 +1,527 @@ +/* + * Threaded data processing for Qcow2: compression, encryption + * + * Copyright (c) 2004-2006 Fabrice Bellard + * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#define ZLIB_CONST +#include + +#ifdef CONFIG_ZSTD +#include +#include +#endif + +#include "qcow2.h" +#include "block/thread-pool.h" +#include "crypto.h" + +static int coroutine_fn +qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + + qemu_co_mutex_lock(&s->lock); + while (s->nb_threads >= QCOW2_MAX_THREADS) { + qemu_co_queue_wait(&s->thread_task_queue, &s->lock); + } + s->nb_threads++; + qemu_co_mutex_unlock(&s->lock); + + ret = thread_pool_submit_co(pool, func, arg); + + qemu_co_mutex_lock(&s->lock); + s->nb_threads--; + qemu_co_queue_next(&s->thread_task_queue); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + + +/* + * Compression + */ + +typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, + const void *src, size_t src_size); +typedef struct Qcow2CompressData { + void *dest; + size_t dest_size; + const void *src; + size_t src_size; + ssize_t ret; + + Qcow2CompressFunc func; +} Qcow2CompressData; + +/* + * qcow2_zlib_compress() + * + * Compress @src_size bytes of data using zlib compression method + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: compressed size on success + * -ENOMEM destination buffer is not enough to store compressed data + * -EIO on any other error + */ +static ssize_t qcow2_zlib_compress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + ssize_t ret; + z_stream strm; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -12, 9, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { + return -EIO; + } + + /* + * strm.next_in is not const in old zlib versions, such as those used on + * OpenBSD/NetBSD, so cast the const away + */ + strm.avail_in = src_size; + strm.next_in = (void *) src; + strm.avail_out = dest_size; + strm.next_out = dest; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) { + ret = dest_size - strm.avail_out; + } else { + ret = (ret == Z_OK ? -ENOMEM : -EIO); + } + + deflateEnd(&strm); + + return ret; +} + +/* + * qcow2_zlib_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes using zlib compression method + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -EIO on fail + */ +static ssize_t qcow2_zlib_decompress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + int ret; + z_stream strm; + + memset(&strm, 0, sizeof(strm)); + strm.avail_in = src_size; + strm.next_in = (void *) src; + strm.avail_out = dest_size; + strm.next_out = dest; + + ret = inflateInit2(&strm, -12); + if (ret != Z_OK) { + return -EIO; + } + + ret = inflate(&strm, Z_FINISH); + if ((ret == Z_STREAM_END || ret == Z_BUF_ERROR) && strm.avail_out == 0) { + /* + * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but + * @src buffer may be processed partly (because in qcow2 we know size of + * compressed data with precision of one sector) + */ + ret = 0; + } else { + ret = -EIO; + } + + inflateEnd(&strm); + + return ret; +} + +#ifdef CONFIG_ZSTD + +/* + * qcow2_zstd_compress() + * + * Compress @src_size bytes of data using zstd compression method + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: compressed size on success + * -ENOMEM destination buffer is not enough to store compressed data + * -EIO on any other error + */ +static ssize_t qcow2_zstd_compress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + ssize_t ret; + size_t zstd_ret; + ZSTD_outBuffer output = { + .dst = dest, + .size = dest_size, + .pos = 0 + }; + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + .pos = 0 + }; + ZSTD_CCtx *cctx = ZSTD_createCCtx(); + + if (!cctx) { + return -EIO; + } + /* + * Use the zstd streamed interface for symmetry with decompression, + * where streaming is essential since we don't record the exact + * compressed size. + * + * ZSTD_compressStream2() tries to compress everything it could + * with a single call. Although, ZSTD docs says that: + * "You must continue calling ZSTD_compressStream2() with ZSTD_e_end + * until it returns 0, at which point you are free to start a new frame", + * in out tests we saw the only case when it returned with >0 - + * when the output buffer was too small. In that case, + * ZSTD_compressStream2() expects a bigger buffer on the next call. + * We can't provide a bigger buffer because we are limited with dest_size + * which we pass to the ZSTD_compressStream2() at once. + * So, we don't need any loops and just abort the compression when we + * don't get 0 result on the first call. + */ + zstd_ret = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end); + + if (zstd_ret) { + if (zstd_ret > output.size - output.pos) { + ret = -ENOMEM; + } else { + ret = -EIO; + } + goto out; + } + + /* make sure that zstd didn't overflow the dest buffer */ + assert(output.pos <= dest_size); + ret = output.pos; +out: + ZSTD_freeCCtx(cctx); + return ret; +} + +/* + * qcow2_zstd_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes using zstd compression method + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -EIO on any error + */ +static ssize_t qcow2_zstd_decompress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + size_t zstd_ret = 0; + ssize_t ret = 0; + ZSTD_outBuffer output = { + .dst = dest, + .size = dest_size, + .pos = 0 + }; + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + .pos = 0 + }; + ZSTD_DCtx *dctx = ZSTD_createDCtx(); + + if (!dctx) { + return -EIO; + } + + /* + * The compressed stream from the input buffer may consist of more + * than one zstd frame. So we iterate until we get a fully + * uncompressed cluster. + * From zstd docs related to ZSTD_decompressStream: + * "return : 0 when a frame is completely decoded and fully flushed" + * We suppose that this means: each time ZSTD_decompressStream reads + * only ONE full frame and returns 0 if and only if that frame + * is completely decoded and flushed. Only after returning 0, + * ZSTD_decompressStream reads another ONE full frame. + */ + while (output.pos < output.size) { + size_t last_in_pos = input.pos; + size_t last_out_pos = output.pos; + zstd_ret = ZSTD_decompressStream(dctx, &output, &input); + + if (ZSTD_isError(zstd_ret)) { + ret = -EIO; + break; + } + + /* + * The ZSTD manual is vague about what to do if it reads + * the buffer partially, and we don't want to get stuck + * in an infinite loop where ZSTD_decompressStream + * returns > 0 waiting for another input chunk. So, we add + * a check which ensures that the loop makes some progress + * on each step. + */ + if (last_in_pos >= input.pos && + last_out_pos >= output.pos) { + ret = -EIO; + break; + } + } + /* + * Make sure that we have the frame fully flushed here + * if not, we somehow managed to get uncompressed cluster + * greater then the cluster size, possibly because of its + * damage. + */ + if (zstd_ret > 0) { + ret = -EIO; + } + + ZSTD_freeDCtx(dctx); + assert(ret == 0 || ret == -EIO); + return ret; +} +#endif + +static int qcow2_compress_pool_func(void *opaque) +{ + Qcow2CompressData *data = opaque; + + data->ret = data->func(data->dest, data->dest_size, + data->src, data->src_size); + + return 0; +} + +static ssize_t coroutine_fn +qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size, Qcow2CompressFunc func) +{ + Qcow2CompressData arg = { + .dest = dest, + .dest_size = dest_size, + .src = src, + .src_size = src_size, + .func = func, + }; + + qcow2_co_process(bs, qcow2_compress_pool_func, &arg); + + return arg.ret; +} + +/* + * qcow2_co_compress() + * + * Compress @src_size bytes of data using the compression + * method defined by the image compression type + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: compressed size on success + * a negative error code on failure + */ +ssize_t coroutine_fn +qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2CompressFunc fn; + + switch (s->compression_type) { + case QCOW2_COMPRESSION_TYPE_ZLIB: + fn = qcow2_zlib_compress; + break; + +#ifdef CONFIG_ZSTD + case QCOW2_COMPRESSION_TYPE_ZSTD: + fn = qcow2_zstd_compress; + break; +#endif + default: + abort(); + } + + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, fn); +} + +/* + * qcow2_co_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes using the compression method defined by the image + * compression type + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * a negative error code on failure + */ +ssize_t coroutine_fn +qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2CompressFunc fn; + + switch (s->compression_type) { + case QCOW2_COMPRESSION_TYPE_ZLIB: + fn = qcow2_zlib_decompress; + break; + +#ifdef CONFIG_ZSTD + case QCOW2_COMPRESSION_TYPE_ZSTD: + fn = qcow2_zstd_decompress; + break; +#endif + default: + abort(); + } + + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, fn); +} + + +/* + * Cryptography + */ + +/* + * Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and + * qcrypto_block_decrypt() functions. + */ +typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset, + uint8_t *buf, size_t len, Error **errp); + +typedef struct Qcow2EncDecData { + QCryptoBlock *block; + uint64_t offset; + uint8_t *buf; + size_t len; + + Qcow2EncDecFunc func; +} Qcow2EncDecData; + +static int qcow2_encdec_pool_func(void *opaque) +{ + Qcow2EncDecData *data = opaque; + + return data->func(data->block, data->offset, data->buf, data->len, NULL); +} + +static int coroutine_fn +qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, + uint64_t guest_offset, void *buf, size_t len, + Qcow2EncDecFunc func) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2EncDecData arg = { + .block = s->crypto, + .offset = s->crypt_physical_offset ? host_offset : guest_offset, + .buf = buf, + .len = len, + .func = func, + }; + uint64_t sector_size; + + assert(s->crypto); + + sector_size = qcrypto_block_get_sector_size(s->crypto); + assert(QEMU_IS_ALIGNED(guest_offset, sector_size)); + assert(QEMU_IS_ALIGNED(host_offset, sector_size)); + assert(QEMU_IS_ALIGNED(len, sector_size)); + + return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); +} + +/* + * qcow2_co_encrypt() + * + * Encrypts one or more contiguous aligned sectors + * + * @host_offset - underlying storage offset of the first sector of the + * data to be encrypted + * + * @guest_offset - guest (virtual) offset of the first sector of the + * data to be encrypted + * + * @buf - buffer with the data to encrypt, that after encryption + * will be written to the underlying storage device at + * @host_offset + * + * @len - length of the buffer (must be a multiple of the encryption + * sector size) + * + * Depending on the encryption method, @host_offset and/or @guest_offset + * may be used for generating the initialization vector for + * encryption. + * + * Note that while the whole range must be aligned on sectors, it + * does not have to be aligned on clusters and can also cross cluster + * boundaries + */ +int coroutine_fn +qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, + uint64_t guest_offset, void *buf, size_t len) +{ + return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, + qcrypto_block_encrypt); +} + +/* + * qcow2_co_decrypt() + * + * Decrypts one or more contiguous aligned sectors + * Similar to qcow2_co_encrypt + */ +int coroutine_fn +qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, + uint64_t guest_offset, void *buf, size_t len) +{ + return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, + qcrypto_block_decrypt); +} diff --git a/block/qcow2.c b/block/qcow2.c new file mode 100644 index 000000000..3a90ef278 --- /dev/null +++ b/block/qcow2.c @@ -0,0 +1,5950 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qcow2.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-events-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "trace.h" +#include "qemu/option_int.h" +#include "qemu/cutils.h" +#include "qemu/bswap.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "crypto.h" +#include "block/aio_task.h" + +/* + Differences with QCOW: + + - Support for multiple incremental snapshots. + - Memory management by reference counts. + - Clusters which have a reference count of one have the bit + QCOW_OFLAG_COPIED to optimize write performance. + - Size of compressed clusters is stored in sectors to reduce bit usage + in the cluster offsets. + - Support for storing additional data (such as the VM state) in the + snapshots. + - If a backing store is used, the cluster size is not constrained + (could be backported to QCOW). + - L2 tables have always a size of one cluster. +*/ + + +typedef struct { + uint32_t magic; + uint32_t len; +} QEMU_PACKED QCowExtension; + +#define QCOW2_EXT_MAGIC_END 0 +#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca +#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 +#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 +#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 +#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441 + +static int coroutine_fn +qcow2_co_preadv_compressed(BlockDriverState *bs, + uint64_t cluster_descriptor, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset); + +static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const QCowHeader *cow_header = (const void *)buf; + + if (buf_size >= sizeof(QCowHeader) && + be32_to_cpu(cow_header->magic) == QCOW_MAGIC && + be32_to_cpu(cow_header->version) >= 2) + return 100; + else + return 0; +} + + +static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, + uint8_t *buf, size_t buflen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + ssize_t ret; + + if ((offset + buflen) > s->crypto_header.length) { + error_setg(errp, "Request for data outside of extension header"); + return -1; + } + + ret = bdrv_pread(bs->file, + s->crypto_header.offset + offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return -1; + } + return ret; +} + + +static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + int64_t ret; + int64_t clusterlen; + + ret = qcow2_alloc_clusters(bs, headerlen); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Cannot allocate cluster for LUKS header size %zu", + headerlen); + return -1; + } + + s->crypto_header.length = headerlen; + s->crypto_header.offset = ret; + + /* + * Zero fill all space in cluster so it has predictable + * content, as we may not initialize some regions of the + * header (eg only 1 out of 8 key slots will be initialized) + */ + clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; + assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); + ret = bdrv_pwrite_zeroes(bs->file, + ret, + clusterlen, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not zero fill encryption header"); + return -1; + } + + return ret; +} + + +static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + ssize_t ret; + + if ((offset + buflen) > s->crypto_header.length) { + error_setg(errp, "Request for data outside of extension header"); + return -1; + } + + ret = bdrv_pwrite(bs->file, + s->crypto_header.offset + offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return -1; + } + return ret; +} + +static QDict* +qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp) +{ + QDict *cryptoopts_qdict; + QDict *opts_qdict; + + /* Extract "encrypt." options into a qdict */ + opts_qdict = qemu_opts_to_qdict(opts, NULL); + qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); + qobject_unref(opts_qdict); + qdict_put_str(cryptoopts_qdict, "format", fmt); + return cryptoopts_qdict; +} + +/* + * read qcow2 extension and fill bs + * start reading from start_offset + * finish reading upon magic of value 0 or when end_offset reached + * unknown magic is skipped (future extension this version knows nothing about) + * return 0 upon success, non-0 otherwise + */ +static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, + uint64_t end_offset, void **p_feature_table, + int flags, bool *need_update_header, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCowExtension ext; + uint64_t offset; + int ret; + Qcow2BitmapHeaderExt bitmaps_ext; + + if (need_update_header != NULL) { + *need_update_header = false; + } + +#ifdef DEBUG_EXT + printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); +#endif + offset = start_offset; + while (offset < end_offset) { + +#ifdef DEBUG_EXT + /* Sanity check */ + if (offset > s->cluster_size) + printf("qcow2_read_extension: suspicious offset %lu\n", offset); + + printf("attempting to read extended header in offset %lu\n", offset); +#endif + + ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); + if (ret < 0) { + error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " + "pread fail from offset %" PRIu64, offset); + return 1; + } + ext.magic = be32_to_cpu(ext.magic); + ext.len = be32_to_cpu(ext.len); + offset += sizeof(ext); +#ifdef DEBUG_EXT + printf("ext.magic = 0x%x\n", ext.magic); +#endif + if (offset > end_offset || ext.len > end_offset - offset) { + error_setg(errp, "Header extension too large"); + return -EINVAL; + } + + switch (ext.magic) { + case QCOW2_EXT_MAGIC_END: + return 0; + + case QCOW2_EXT_MAGIC_BACKING_FORMAT: + if (ext.len >= sizeof(bs->backing_format)) { + error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 + " too large (>=%zu)", ext.len, + sizeof(bs->backing_format)); + return 2; + } + ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " + "Could not read format name"); + return 3; + } + bs->backing_format[ext.len] = '\0'; + s->image_backing_format = g_strdup(bs->backing_format); +#ifdef DEBUG_EXT + printf("Qcow2: Got format extension %s\n", bs->backing_format); +#endif + break; + + case QCOW2_EXT_MAGIC_FEATURE_TABLE: + if (p_feature_table != NULL) { + void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); + ret = bdrv_pread(bs->file, offset , feature_table, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " + "Could not read table"); + return ret; + } + + *p_feature_table = feature_table; + } + break; + + case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { + unsigned int cflags = 0; + if (s->crypt_method_header != QCOW_CRYPT_LUKS) { + error_setg(errp, "CRYPTO header extension only " + "expected with LUKS encryption method"); + return -EINVAL; + } + if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { + error_setg(errp, "CRYPTO header extension size %u, " + "but expected size %zu", ext.len, + sizeof(Qcow2CryptoHeaderExtension)); + return -EINVAL; + } + + ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Unable to read CRYPTO header extension"); + return ret; + } + s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); + s->crypto_header.length = be64_to_cpu(s->crypto_header.length); + + if ((s->crypto_header.offset % s->cluster_size) != 0) { + error_setg(errp, "Encryption header offset '%" PRIu64 "' is " + "not a multiple of cluster size '%u'", + s->crypto_header.offset, s->cluster_size); + return -EINVAL; + } + + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + qcow2_crypto_hdr_read_func, + bs, cflags, QCOW2_MAX_THREADS, errp); + if (!s->crypto) { + return -EINVAL; + } + } break; + + case QCOW2_EXT_MAGIC_BITMAPS: + if (ext.len != sizeof(bitmaps_ext)) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Invalid extension length"); + return -EINVAL; + } + + if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { + if (s->qcow_version < 3) { + /* Let's be a bit more specific */ + warn_report("This qcow2 v2 image contains bitmaps, but " + "they may have been modified by a program " + "without persistent bitmap support; so now " + "they must all be considered inconsistent"); + } else { + warn_report("a program lacking bitmap support " + "modified this file, so all bitmaps are now " + "considered inconsistent"); + } + error_printf("Some clusters may be leaked, " + "run 'qemu-img check -r' on the image " + "file to fix."); + if (need_update_header != NULL) { + /* Updating is needed to drop invalid bitmap extension. */ + *need_update_header = true; + } + break; + } + + ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Could not read ext header"); + return ret; + } + + if (bitmaps_ext.reserved32 != 0) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Reserved field is not zero"); + return -EINVAL; + } + + bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps); + bitmaps_ext.bitmap_directory_size = + be64_to_cpu(bitmaps_ext.bitmap_directory_size); + bitmaps_ext.bitmap_directory_offset = + be64_to_cpu(bitmaps_ext.bitmap_directory_offset); + + if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { + error_setg(errp, + "bitmaps_ext: Image has %" PRIu32 " bitmaps, " + "exceeding the QEMU supported maximum of %d", + bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); + return -EINVAL; + } + + if (bitmaps_ext.nb_bitmaps == 0) { + error_setg(errp, "found bitmaps extension with zero bitmaps"); + return -EINVAL; + } + + if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) { + error_setg(errp, "bitmaps_ext: " + "invalid bitmap directory offset"); + return -EINVAL; + } + + if (bitmaps_ext.bitmap_directory_size > + QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "bitmaps_ext: " + "bitmap directory size (%" PRIu64 ") exceeds " + "the maximum supported size (%d)", + bitmaps_ext.bitmap_directory_size, + QCOW2_MAX_BITMAP_DIRECTORY_SIZE); + return -EINVAL; + } + + s->nb_bitmaps = bitmaps_ext.nb_bitmaps; + s->bitmap_directory_offset = + bitmaps_ext.bitmap_directory_offset; + s->bitmap_directory_size = + bitmaps_ext.bitmap_directory_size; + +#ifdef DEBUG_EXT + printf("Qcow2: Got bitmaps extension: " + "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", + s->bitmap_directory_offset, s->nb_bitmaps); +#endif + break; + + case QCOW2_EXT_MAGIC_DATA_FILE: + { + s->image_data_file = g_malloc0(ext.len + 1); + ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, + "ERROR: Could not read data file name"); + return ret; + } +#ifdef DEBUG_EXT + printf("Qcow2: Got external data file %s\n", s->image_data_file); +#endif + break; + } + + default: + /* unknown magic - save it in case we need to rewrite the header */ + /* If you add a new feature, make sure to also update the fast + * path of qcow2_make_empty() to deal with it. */ + { + Qcow2UnknownHeaderExtension *uext; + + uext = g_malloc0(sizeof(*uext) + ext.len); + uext->magic = ext.magic; + uext->len = ext.len; + QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); + + ret = bdrv_pread(bs->file, offset , uext->data, uext->len); + if (ret < 0) { + error_setg_errno(errp, -ret, "ERROR: unknown extension: " + "Could not read data"); + return ret; + } + } + break; + } + + offset += ((ext.len + 7) & ~7); + } + + return 0; +} + +static void cleanup_unknown_header_ext(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2UnknownHeaderExtension *uext, *next; + + QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { + QLIST_REMOVE(uext, next); + g_free(uext); + } +} + +static void report_unsupported_feature(Error **errp, Qcow2Feature *table, + uint64_t mask) +{ + g_autoptr(GString) features = g_string_sized_new(60); + + while (table && table->name[0] != '\0') { + if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { + if (mask & (1ULL << table->bit)) { + if (features->len > 0) { + g_string_append(features, ", "); + } + g_string_append_printf(features, "%.46s", table->name); + mask &= ~(1ULL << table->bit); + } + } + table++; + } + + if (mask) { + if (features->len > 0) { + g_string_append(features, ", "); + } + g_string_append_printf(features, + "Unknown incompatible feature: %" PRIx64, mask); + } + + error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str); +} + +/* + * Sets the dirty bit and flushes afterwards if necessary. + * + * The incompatible_features bit is only set if the image file header was + * updated successfully. Therefore it is not required to check the return + * value of this function. + */ +int qcow2_mark_dirty(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t val; + int ret; + + assert(s->qcow_version >= 3); + + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { + return 0; /* already dirty */ + } + + val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); + ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), + &val, sizeof(val)); + if (ret < 0) { + return ret; + } + ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + return ret; + } + + /* Only treat image as dirty if the header was updated successfully */ + s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; + return 0; +} + +/* + * Clears the dirty bit and flushes before if necessary. Only call this + * function when there are no pending requests, it does not guard against + * concurrent requests dirtying the image. + */ +static int qcow2_mark_clean(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { + int ret; + + s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; + + ret = qcow2_flush_caches(bs); + if (ret < 0) { + return ret; + } + + return qcow2_update_header(bs); + } + return 0; +} + +/* + * Marks the image as corrupt. + */ +int qcow2_mark_corrupt(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + + s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; + return qcow2_update_header(bs); +} + +/* + * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes + * before if necessary. + */ +int qcow2_mark_consistent(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { + int ret = qcow2_flush_caches(bs); + if (ret < 0) { + return ret; + } + + s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; + return qcow2_update_header(bs); + } + return 0; +} + +static void qcow2_add_check_result(BdrvCheckResult *out, + const BdrvCheckResult *src, + bool set_allocation_info) +{ + out->corruptions += src->corruptions; + out->leaks += src->leaks; + out->check_errors += src->check_errors; + out->corruptions_fixed += src->corruptions_fixed; + out->leaks_fixed += src->leaks_fixed; + + if (set_allocation_info) { + out->image_end_offset = src->image_end_offset; + out->bfi = src->bfi; + } +} + +static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BdrvCheckResult snapshot_res = {}; + BdrvCheckResult refcount_res = {}; + int ret; + + memset(result, 0, sizeof(*result)); + + ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix); + if (ret < 0) { + qcow2_add_check_result(result, &snapshot_res, false); + return ret; + } + + ret = qcow2_check_refcounts(bs, &refcount_res, fix); + qcow2_add_check_result(result, &refcount_res, true); + if (ret < 0) { + qcow2_add_check_result(result, &snapshot_res, false); + return ret; + } + + ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix); + qcow2_add_check_result(result, &snapshot_res, false); + if (ret < 0) { + return ret; + } + + if (fix && result->check_errors == 0 && result->corruptions == 0) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + return ret; + } + return qcow2_mark_consistent(bs); + } + return ret; +} + +static int coroutine_fn qcow2_co_check(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_co_check_locked(bs, result, fix); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, + uint64_t entries, size_t entry_len, + int64_t max_size_bytes, const char *table_name, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + + if (entries > max_size_bytes / entry_len) { + error_setg(errp, "%s too large", table_name); + return -EFBIG; + } + + /* Use signed INT64_MAX as the maximum even for uint64_t header fields, + * because values will be passed to qemu functions taking int64_t. */ + if ((INT64_MAX - entries * entry_len < offset) || + (offset_into_cluster(s, offset) != 0)) { + error_setg(errp, "%s offset invalid", table_name); + return -EINVAL; + } + + return 0; +} + +static const char *const mutable_opts[] = { + QCOW2_OPT_LAZY_REFCOUNTS, + QCOW2_OPT_DISCARD_REQUEST, + QCOW2_OPT_DISCARD_SNAPSHOT, + QCOW2_OPT_DISCARD_OTHER, + QCOW2_OPT_OVERLAP, + QCOW2_OPT_OVERLAP_TEMPLATE, + QCOW2_OPT_OVERLAP_MAIN_HEADER, + QCOW2_OPT_OVERLAP_ACTIVE_L1, + QCOW2_OPT_OVERLAP_ACTIVE_L2, + QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, + QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, + QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, + QCOW2_OPT_OVERLAP_INACTIVE_L1, + QCOW2_OPT_OVERLAP_INACTIVE_L2, + QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, + QCOW2_OPT_CACHE_SIZE, + QCOW2_OPT_L2_CACHE_SIZE, + QCOW2_OPT_L2_CACHE_ENTRY_SIZE, + QCOW2_OPT_REFCOUNT_CACHE_SIZE, + QCOW2_OPT_CACHE_CLEAN_INTERVAL, + NULL +}; + +static QemuOptsList qcow2_runtime_opts = { + .name = "qcow2", + .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), + .desc = { + { + .name = QCOW2_OPT_LAZY_REFCOUNTS, + .type = QEMU_OPT_BOOL, + .help = "Postpone refcount updates", + }, + { + .name = QCOW2_OPT_DISCARD_REQUEST, + .type = QEMU_OPT_BOOL, + .help = "Pass guest discard requests to the layer below", + }, + { + .name = QCOW2_OPT_DISCARD_SNAPSHOT, + .type = QEMU_OPT_BOOL, + .help = "Generate discard requests when snapshot related space " + "is freed", + }, + { + .name = QCOW2_OPT_DISCARD_OTHER, + .type = QEMU_OPT_BOOL, + .help = "Generate discard requests when other clusters are freed", + }, + { + .name = QCOW2_OPT_OVERLAP, + .type = QEMU_OPT_STRING, + .help = "Selects which overlap checks to perform from a range of " + "templates (none, constant, cached, all)", + }, + { + .name = QCOW2_OPT_OVERLAP_TEMPLATE, + .type = QEMU_OPT_STRING, + .help = "Selects which overlap checks to perform from a range of " + "templates (none, constant, cached, all)", + }, + { + .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into the main qcow2 header", + }, + { + .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into the active L1 table", + }, + { + .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into an active L2 table", + }, + { + .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into the refcount table", + }, + { + .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into a refcount block", + }, + { + .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into the snapshot table", + }, + { + .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into an inactive L1 table", + }, + { + .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into an inactive L2 table", + }, + { + .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, + .type = QEMU_OPT_BOOL, + .help = "Check for unintended writes into the bitmap directory", + }, + { + .name = QCOW2_OPT_CACHE_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Maximum combined metadata (L2 tables and refcount blocks) " + "cache size", + }, + { + .name = QCOW2_OPT_L2_CACHE_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Maximum L2 table cache size", + }, + { + .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Size of each entry in the L2 cache", + }, + { + .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Maximum refcount block cache size", + }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", + "ID of secret providing qcow2 AES key or LUKS passphrase"), + { /* end of list */ } + }, +}; + +static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { + [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, + [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, + [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, + [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, + [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, + [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, + [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, + [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, + [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, +}; + +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + qcow2_cache_clean_unused(s->l2_table_cache); + qcow2_cache_clean_unused(s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + +static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + uint64_t *l2_cache_size, + uint64_t *l2_cache_entry_size, + uint64_t *refcount_cache_size, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t combined_cache_size, l2_cache_max_setting; + bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; + bool l2_cache_entry_size_set; + int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); + /* An L2 table is always one cluster in size so the max cache size + * should be a multiple of the cluster size. */ + uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s), + s->cluster_size); + + combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); + l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); + refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE); + + combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); + l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, + DEFAULT_L2_CACHE_MAX_SIZE); + *refcount_cache_size = qemu_opt_get_size(opts, + QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); + + *l2_cache_entry_size = qemu_opt_get_size( + opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); + + *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); + + if (combined_cache_size_set) { + if (l2_cache_size_set && refcount_cache_size_set) { + error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE + " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " + "at the same time"); + return; + } else if (l2_cache_size_set && + (l2_cache_max_setting > combined_cache_size)) { + error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " + QCOW2_OPT_CACHE_SIZE); + return; + } else if (*refcount_cache_size > combined_cache_size) { + error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " + QCOW2_OPT_CACHE_SIZE); + return; + } + + if (l2_cache_size_set) { + *refcount_cache_size = combined_cache_size - *l2_cache_size; + } else if (refcount_cache_size_set) { + *l2_cache_size = combined_cache_size - *refcount_cache_size; + } else { + /* Assign as much memory as possible to the L2 cache, and + * use the remainder for the refcount cache */ + if (combined_cache_size >= max_l2_cache + min_refcount_cache) { + *l2_cache_size = max_l2_cache; + *refcount_cache_size = combined_cache_size - *l2_cache_size; + } else { + *refcount_cache_size = + MIN(combined_cache_size, min_refcount_cache); + *l2_cache_size = combined_cache_size - *refcount_cache_size; + } + } + } + + /* + * If the L2 cache is not enough to cover the whole disk then + * default to 4KB entries. Smaller entries reduce the cost of + * loads and evictions and increase I/O performance. + */ + if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) { + *l2_cache_entry_size = MIN(s->cluster_size, 4096); + } + + /* l2_cache_size and refcount_cache_size are ensured to have at least + * their minimum values in qcow2_update_options_prepare() */ + + if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || + *l2_cache_entry_size > s->cluster_size || + !is_power_of_2(*l2_cache_entry_size)) { + error_setg(errp, "L2 cache entry size must be a power of two " + "between %d and the cluster size (%d)", + 1 << MIN_CLUSTER_BITS, s->cluster_size); + return; + } +} + +typedef struct Qcow2ReopenState { + Qcow2Cache *l2_table_cache; + Qcow2Cache *refcount_block_cache; + int l2_slice_size; /* Number of entries in a slice of the L2 table */ + bool use_lazy_refcounts; + int overlap_check; + bool discard_passthrough[QCOW2_DISCARD_MAX]; + uint64_t cache_clean_interval; + QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ +} Qcow2ReopenState; + +static int qcow2_update_options_prepare(BlockDriverState *bs, + Qcow2ReopenState *r, + QDict *options, int flags, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QemuOpts *opts = NULL; + const char *opt_overlap_check, *opt_overlap_check_template; + int overlap_check_template = 0; + uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; + int i; + const char *encryptfmt; + QDict *encryptopts = NULL; + Error *local_err = NULL; + int ret; + + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + encryptfmt = qdict_get_try_str(encryptopts, "format"); + + opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + /* get L2 table/refcount block cache size from command line options */ + read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, + &refcount_cache_size, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + l2_cache_size /= l2_cache_entry_size; + if (l2_cache_size < MIN_L2_CACHE_SIZE) { + l2_cache_size = MIN_L2_CACHE_SIZE; + } + if (l2_cache_size > INT_MAX) { + error_setg(errp, "L2 cache size too big"); + ret = -EINVAL; + goto fail; + } + + refcount_cache_size /= s->cluster_size; + if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { + refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; + } + if (refcount_cache_size > INT_MAX) { + error_setg(errp, "Refcount cache size too big"); + ret = -EINVAL; + goto fail; + } + + /* alloc new L2 table/refcount block cache, flush old one */ + if (s->l2_table_cache) { + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); + goto fail; + } + } + + if (s->refcount_block_cache) { + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + error_setg_errno(errp, -ret, + "Failed to flush the refcount block cache"); + goto fail; + } + } + + r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s); + r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, + l2_cache_entry_size); + r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, + s->cluster_size); + if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { + error_setg(errp, "Could not allocate metadata caches"); + ret = -ENOMEM; + goto fail; + } + + /* New interval for cache cleanup timer */ + r->cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, + DEFAULT_CACHE_CLEAN_INTERVAL); +#ifndef CONFIG_LINUX + if (r->cache_clean_interval != 0) { + error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL + " not supported on this host"); + ret = -EINVAL; + goto fail; + } +#endif + if (r->cache_clean_interval > UINT_MAX) { + error_setg(errp, "Cache clean interval too big"); + ret = -EINVAL; + goto fail; + } + + /* lazy-refcounts; flush if going from enabled to disabled */ + r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, + (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); + if (r->use_lazy_refcounts && s->qcow_version < 3) { + error_setg(errp, "Lazy refcounts require a qcow2 image with at least " + "qemu 1.1 compatibility level"); + ret = -EINVAL; + goto fail; + } + + if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); + goto fail; + } + } + + /* Overlap check options */ + opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); + opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); + if (opt_overlap_check_template && opt_overlap_check && + strcmp(opt_overlap_check_template, opt_overlap_check)) + { + error_setg(errp, "Conflicting values for qcow2 options '" + QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE + "' ('%s')", opt_overlap_check, opt_overlap_check_template); + ret = -EINVAL; + goto fail; + } + if (!opt_overlap_check) { + opt_overlap_check = opt_overlap_check_template ?: "cached"; + } + + if (!strcmp(opt_overlap_check, "none")) { + overlap_check_template = 0; + } else if (!strcmp(opt_overlap_check, "constant")) { + overlap_check_template = QCOW2_OL_CONSTANT; + } else if (!strcmp(opt_overlap_check, "cached")) { + overlap_check_template = QCOW2_OL_CACHED; + } else if (!strcmp(opt_overlap_check, "all")) { + overlap_check_template = QCOW2_OL_ALL; + } else { + error_setg(errp, "Unsupported value '%s' for qcow2 option " + "'overlap-check'. Allowed are any of the following: " + "none, constant, cached, all", opt_overlap_check); + ret = -EINVAL; + goto fail; + } + + r->overlap_check = 0; + for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { + /* overlap-check defines a template bitmask, but every flag may be + * overwritten through the associated boolean option */ + r->overlap_check |= + qemu_opt_get_bool(opts, overlap_bool_option_names[i], + overlap_check_template & (1 << i)) << i; + } + + r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; + r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; + r->discard_passthrough[QCOW2_DISCARD_REQUEST] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, + flags & BDRV_O_UNMAP); + r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); + r->discard_passthrough[QCOW2_DISCARD_OTHER] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); + + switch (s->crypt_method_header) { + case QCOW_CRYPT_NONE: + if (encryptfmt) { + error_setg(errp, "No encryption in image header, but options " + "specified format '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + break; + + case QCOW_CRYPT_AES: + if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { + error_setg(errp, + "Header reported 'aes' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_put_str(encryptopts, "format", "qcow"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); + break; + + case QCOW_CRYPT_LUKS: + if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { + error_setg(errp, + "Header reported 'luks' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_put_str(encryptopts, "format", "luks"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); + break; + + default: + error_setg(errp, "Unsupported encryption method %d", + s->crypt_method_header); + break; + } + if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { + ret = -EINVAL; + goto fail; + } + + ret = 0; +fail: + qobject_unref(encryptopts); + qemu_opts_del(opts); + opts = NULL; + return ret; +} + +static void qcow2_update_options_commit(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + if (s->l2_table_cache) { + qcow2_cache_destroy(s->l2_table_cache); + } + if (s->refcount_block_cache) { + qcow2_cache_destroy(s->refcount_block_cache); + } + s->l2_table_cache = r->l2_table_cache; + s->refcount_block_cache = r->refcount_block_cache; + s->l2_slice_size = r->l2_slice_size; + + s->overlap_check = r->overlap_check; + s->use_lazy_refcounts = r->use_lazy_refcounts; + + for (i = 0; i < QCOW2_DISCARD_MAX; i++) { + s->discard_passthrough[i] = r->discard_passthrough[i]; + } + + if (s->cache_clean_interval != r->cache_clean_interval) { + cache_clean_timer_del(bs); + s->cache_clean_interval = r->cache_clean_interval; + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + } + + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + s->crypto_opts = r->crypto_opts; +} + +static void qcow2_update_options_abort(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + if (r->l2_table_cache) { + qcow2_cache_destroy(r->l2_table_cache); + } + if (r->refcount_block_cache) { + qcow2_cache_destroy(r->refcount_block_cache); + } + qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); +} + +static int qcow2_update_options(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + Qcow2ReopenState r = {}; + int ret; + + ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); + if (ret >= 0) { + qcow2_update_options_commit(bs, &r); + } else { + qcow2_update_options_abort(bs, &r); + } + + return ret; +} + +static int validate_compression_type(BDRVQcow2State *s, Error **errp) +{ + switch (s->compression_type) { + case QCOW2_COMPRESSION_TYPE_ZLIB: +#ifdef CONFIG_ZSTD + case QCOW2_COMPRESSION_TYPE_ZSTD: +#endif + break; + + default: + error_setg(errp, "qcow2: unknown compression type: %u", + s->compression_type); + return -ENOTSUP; + } + + /* + * if the compression type differs from QCOW2_COMPRESSION_TYPE_ZLIB + * the incompatible feature flag must be set + */ + if (s->compression_type == QCOW2_COMPRESSION_TYPE_ZLIB) { + if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) { + error_setg(errp, "qcow2: Compression type incompatible feature " + "bit must not be set"); + return -EINVAL; + } + } else { + if (!(s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION)) { + error_setg(errp, "qcow2: Compression type incompatible feature " + "bit must be set"); + return -EINVAL; + } + } + + return 0; +} + +/* Called with s->lock held. */ +static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + unsigned int len, i; + int ret = 0; + QCowHeader header; + Error *local_err = NULL; + uint64_t ext_end; + uint64_t l1_vm_state_index; + bool update_header = false; + + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read qcow2 header"); + goto fail; + } + header.magic = be32_to_cpu(header.magic); + header.version = be32_to_cpu(header.version); + header.backing_file_offset = be64_to_cpu(header.backing_file_offset); + header.backing_file_size = be32_to_cpu(header.backing_file_size); + header.size = be64_to_cpu(header.size); + header.cluster_bits = be32_to_cpu(header.cluster_bits); + header.crypt_method = be32_to_cpu(header.crypt_method); + header.l1_table_offset = be64_to_cpu(header.l1_table_offset); + header.l1_size = be32_to_cpu(header.l1_size); + header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset); + header.refcount_table_clusters = + be32_to_cpu(header.refcount_table_clusters); + header.snapshots_offset = be64_to_cpu(header.snapshots_offset); + header.nb_snapshots = be32_to_cpu(header.nb_snapshots); + + if (header.magic != QCOW_MAGIC) { + error_setg(errp, "Image is not in qcow2 format"); + ret = -EINVAL; + goto fail; + } + if (header.version < 2 || header.version > 3) { + error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); + ret = -ENOTSUP; + goto fail; + } + + s->qcow_version = header.version; + + /* Initialise cluster size */ + if (header.cluster_bits < MIN_CLUSTER_BITS || + header.cluster_bits > MAX_CLUSTER_BITS) { + error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, + header.cluster_bits); + ret = -EINVAL; + goto fail; + } + + s->cluster_bits = header.cluster_bits; + s->cluster_size = 1 << s->cluster_bits; + + /* Initialise version 3 header fields */ + if (header.version == 2) { + header.incompatible_features = 0; + header.compatible_features = 0; + header.autoclear_features = 0; + header.refcount_order = 4; + header.header_length = 72; + } else { + header.incompatible_features = + be64_to_cpu(header.incompatible_features); + header.compatible_features = be64_to_cpu(header.compatible_features); + header.autoclear_features = be64_to_cpu(header.autoclear_features); + header.refcount_order = be32_to_cpu(header.refcount_order); + header.header_length = be32_to_cpu(header.header_length); + + if (header.header_length < 104) { + error_setg(errp, "qcow2 header too short"); + ret = -EINVAL; + goto fail; + } + } + + if (header.header_length > s->cluster_size) { + error_setg(errp, "qcow2 header exceeds cluster size"); + ret = -EINVAL; + goto fail; + } + + if (header.header_length > sizeof(header)) { + s->unknown_header_fields_size = header.header_length - sizeof(header); + s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); + ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, + s->unknown_header_fields_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " + "fields"); + goto fail; + } + } + + if (header.backing_file_offset > s->cluster_size) { + error_setg(errp, "Invalid backing file offset"); + ret = -EINVAL; + goto fail; + } + + if (header.backing_file_offset) { + ext_end = header.backing_file_offset; + } else { + ext_end = 1 << header.cluster_bits; + } + + /* Handle feature bits */ + s->incompatible_features = header.incompatible_features; + s->compatible_features = header.compatible_features; + s->autoclear_features = header.autoclear_features; + + /* + * Handle compression type + * Older qcow2 images don't contain the compression type header. + * Distinguish them by the header length and use + * the only valid (default) compression type in that case + */ + if (header.header_length > offsetof(QCowHeader, compression_type)) { + s->compression_type = header.compression_type; + } else { + s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; + } + + ret = validate_compression_type(s, errp); + if (ret) { + goto fail; + } + + if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { + void *feature_table = NULL; + qcow2_read_extensions(bs, header.header_length, ext_end, + &feature_table, flags, NULL, NULL); + report_unsupported_feature(errp, feature_table, + s->incompatible_features & + ~QCOW2_INCOMPAT_MASK); + ret = -ENOTSUP; + g_free(feature_table); + goto fail; + } + + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { + /* Corrupt images may not be written to unless they are being repaired + */ + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { + error_setg(errp, "qcow2: Image is corrupt; cannot be opened " + "read/write"); + ret = -EACCES; + goto fail; + } + } + + s->subclusters_per_cluster = + has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1; + s->subcluster_size = s->cluster_size / s->subclusters_per_cluster; + s->subcluster_bits = ctz32(s->subcluster_size); + + if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) { + error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size); + ret = -EINVAL; + goto fail; + } + + /* Check support for various header values */ + if (header.refcount_order > 6) { + error_setg(errp, "Reference count entry width too large; may not " + "exceed 64 bits"); + ret = -EINVAL; + goto fail; + } + s->refcount_order = header.refcount_order; + s->refcount_bits = 1 << s->refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; + + s->crypt_method_header = header.crypt_method; + if (s->crypt_method_header) { + if (bdrv_uses_whitelist() && + s->crypt_method_header == QCOW_CRYPT_AES) { + error_setg(errp, + "Use of AES-CBC encrypted qcow2 images is no longer " + "supported in system emulators"); + error_append_hint(errp, + "You can use 'qemu-img convert' to convert your " + "image to an alternative supported format, such " + "as unencrypted qcow2, or raw with the LUKS " + "format instead.\n"); + ret = -ENOSYS; + goto fail; + } + + if (s->crypt_method_header == QCOW_CRYPT_AES) { + s->crypt_physical_offset = false; + } else { + /* Assuming LUKS and any future crypt methods we + * add will all use physical offsets, due to the + * fact that the alternative is insecure... */ + s->crypt_physical_offset = true; + } + + bs->encrypted = true; + } + + s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s)); + s->l2_size = 1 << s->l2_bits; + /* 2^(s->refcount_order - 3) is the refcount width in bytes */ + s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); + s->refcount_block_size = 1 << s->refcount_block_bits; + bs->total_sectors = header.size / BDRV_SECTOR_SIZE; + s->csize_shift = (62 - (s->cluster_bits - 8)); + s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; + s->cluster_offset_mask = (1LL << s->csize_shift) - 1; + + s->refcount_table_offset = header.refcount_table_offset; + s->refcount_table_size = + header.refcount_table_clusters << (s->cluster_bits - 3); + + if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { + error_setg(errp, "Image does not contain a reference count table"); + ret = -EINVAL; + goto fail; + } + + ret = qcow2_validate_table(bs, s->refcount_table_offset, + header.refcount_table_clusters, + s->cluster_size, QCOW_MAX_REFTABLE_SIZE, + "Reference count table", errp); + if (ret < 0) { + goto fail; + } + + if (!(flags & BDRV_O_CHECK)) { + /* + * The total size in bytes of the snapshot table is checked in + * qcow2_read_snapshots() because the size of each snapshot is + * variable and we don't know it yet. + * Here we only check the offset and number of snapshots. + */ + ret = qcow2_validate_table(bs, header.snapshots_offset, + header.nb_snapshots, + sizeof(QCowSnapshotHeader), + sizeof(QCowSnapshotHeader) * + QCOW_MAX_SNAPSHOTS, + "Snapshot table", errp); + if (ret < 0) { + goto fail; + } + } + + /* read the level 1 table */ + ret = qcow2_validate_table(bs, header.l1_table_offset, + header.l1_size, L1E_SIZE, + QCOW_MAX_L1_SIZE, "Active L1 table", errp); + if (ret < 0) { + goto fail; + } + s->l1_size = header.l1_size; + s->l1_table_offset = header.l1_table_offset; + + l1_vm_state_index = size_to_l1(s, header.size); + if (l1_vm_state_index > INT_MAX) { + error_setg(errp, "Image is too big"); + ret = -EFBIG; + goto fail; + } + s->l1_vm_state_index = l1_vm_state_index; + + /* the L1 table must contain at least enough entries to put + header.size bytes */ + if (s->l1_size < s->l1_vm_state_index) { + error_setg(errp, "L1 table is too small"); + ret = -EINVAL; + goto fail; + } + + if (s->l1_size > 0) { + s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE); + if (s->l1_table == NULL) { + error_setg(errp, "Could not allocate L1 table"); + ret = -ENOMEM; + goto fail; + } + ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + s->l1_size * L1E_SIZE); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read L1 table"); + goto fail; + } + for(i = 0;i < s->l1_size; i++) { + s->l1_table[i] = be64_to_cpu(s->l1_table[i]); + } + } + + /* Parse driver-specific options */ + ret = qcow2_update_options(bs, options, flags, errp); + if (ret < 0) { + goto fail; + } + + s->flags = flags; + + ret = qcow2_refcount_init(bs); + if (ret != 0) { + error_setg_errno(errp, -ret, "Could not initialize refcount handling"); + goto fail; + } + + QLIST_INIT(&s->cluster_allocs); + QTAILQ_INIT(&s->discards); + + /* read qcow2 extensions */ + if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, + flags, &update_header, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Open external data file */ + s->data_file = bdrv_open_child(NULL, options, "data-file", bs, + &child_of_bds, BDRV_CHILD_DATA, + true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { + if (!s->data_file && s->image_data_file) { + s->data_file = bdrv_open_child(s->image_data_file, options, + "data-file", bs, &child_of_bds, + BDRV_CHILD_DATA, false, errp); + if (!s->data_file) { + ret = -EINVAL; + goto fail; + } + } + if (!s->data_file) { + error_setg(errp, "'data-file' is required for this image"); + ret = -EINVAL; + goto fail; + } + + /* No data here */ + bs->file->role &= ~BDRV_CHILD_DATA; + + /* Must succeed because we have given up permissions if anything */ + bdrv_child_refresh_perms(bs, bs->file, &error_abort); + } else { + if (s->data_file) { + error_setg(errp, "'data-file' can only be set for images with an " + "external data file"); + ret = -EINVAL; + goto fail; + } + + s->data_file = bs->file; + + if (data_file_is_raw(bs)) { + error_setg(errp, "data-file-raw requires a data file"); + ret = -EINVAL; + goto fail; + } + } + + /* qcow2_read_extension may have set up the crypto context + * if the crypt method needs a header region, some methods + * don't need header extensions, so must check here + */ + if (s->crypt_method_header && !s->crypto) { + if (s->crypt_method_header == QCOW_CRYPT_AES) { + unsigned int cflags = 0; + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + NULL, NULL, cflags, + QCOW2_MAX_THREADS, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; + } + } else if (!(flags & BDRV_O_NO_IO)) { + error_setg(errp, "Missing CRYPTO header for crypt method %d", + s->crypt_method_header); + ret = -EINVAL; + goto fail; + } + } + + /* read the backing file name */ + if (header.backing_file_offset != 0) { + len = header.backing_file_size; + if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || + len >= sizeof(bs->backing_file)) { + error_setg(errp, "Backing file name too long"); + ret = -EINVAL; + goto fail; + } + ret = bdrv_pread(bs->file, header.backing_file_offset, + bs->auto_backing_file, len); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read backing file name"); + goto fail; + } + bs->auto_backing_file[len] = '\0'; + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->auto_backing_file); + s->image_backing_file = g_strdup(bs->auto_backing_file); + } + + /* + * Internal snapshots; skip reading them in check mode, because + * we do not need them then, and we do not want to abort because + * of a broken table. + */ + if (!(flags & BDRV_O_CHECK)) { + s->snapshots_offset = header.snapshots_offset; + s->nb_snapshots = header.nb_snapshots; + + ret = qcow2_read_snapshots(bs, errp); + if (ret < 0) { + goto fail; + } + } + + /* Clear unknown autoclear feature bits */ + update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; + update_header = + update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); + if (update_header) { + s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; + } + + /* == Handle persistent dirty bitmaps == + * + * We want load dirty bitmaps in three cases: + * + * 1. Normal open of the disk in active mode, not related to invalidation + * after migration. + * + * 2. Invalidation of the target vm after pre-copy phase of migration, if + * bitmaps are _not_ migrating through migration channel, i.e. + * 'dirty-bitmaps' capability is disabled. + * + * 3. Invalidation of source vm after failed or canceled migration. + * This is a very interesting case. There are two possible types of + * bitmaps: + * + * A. Stored on inactivation and removed. They should be loaded from the + * image. + * + * B. Not stored: not-persistent bitmaps and bitmaps, migrated through + * the migration channel (with dirty-bitmaps capability). + * + * On the other hand, there are two possible sub-cases: + * + * 3.1 disk was changed by somebody else while were inactive. In this + * case all in-RAM dirty bitmaps (both persistent and not) are + * definitely invalid. And we don't have any method to determine + * this. + * + * Simple and safe thing is to just drop all the bitmaps of type B on + * inactivation. But in this case we lose bitmaps in valid 4.2 case. + * + * On the other hand, resuming source vm, if disk was already changed + * is a bad thing anyway: not only bitmaps, the whole vm state is + * out of sync with disk. + * + * This means, that user or management tool, who for some reason + * decided to resume source vm, after disk was already changed by + * target vm, should at least drop all dirty bitmaps by hand. + * + * So, we can ignore this case for now, but TODO: "generation" + * extension for qcow2, to determine, that image was changed after + * last inactivation. And if it is changed, we will drop (or at least + * mark as 'invalid' all the bitmaps of type B, both persistent + * and not). + * + * 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved + * to disk ('dirty-bitmaps' capability disabled), or not saved + * ('dirty-bitmaps' capability enabled), but we don't need to care + * of: let's load bitmaps as always: stored bitmaps will be loaded, + * and not stored has flag IN_USE=1 in the image and will be skipped + * on loading. + * + * One remaining possible case when we don't want load bitmaps: + * + * 4. Open disk in inactive mode in target vm (bitmaps are migrating or + * will be loaded on invalidation, no needs try loading them before) + */ + + if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) { + /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */ + bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + update_header = update_header && !header_updated; + } + + if (update_header) { + ret = qcow2_update_header(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not update qcow2 header"); + goto fail; + } + } + + bs->supported_zero_flags = header.version >= 3 ? + BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0; + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + + /* Repair image if dirty */ + if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && + (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { + BdrvCheckResult result = {0}; + + ret = qcow2_co_check_locked(bs, &result, + BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0 || result.check_errors) { + if (ret >= 0) { + ret = -EIO; + } + error_setg_errno(errp, -ret, "Could not repair dirty image"); + goto fail; + } + } + +#ifdef DEBUG_ALLOC + { + BdrvCheckResult result = {0}; + qcow2_check_refcounts(bs, &result, 0); + } +#endif + + qemu_co_queue_init(&s->thread_task_queue); + + return ret; + + fail: + g_free(s->image_data_file); + if (has_data_file(bs)) { + bdrv_unref_child(bs, s->data_file); + s->data_file = NULL; + } + g_free(s->unknown_header_fields); + cleanup_unknown_header_ext(bs); + qcow2_free_snapshots(bs); + qcow2_refcount_close(bs); + qemu_vfree(s->l1_table); + /* else pre-write overlap checks in cache_destroy may crash */ + s->l1_table = NULL; + cache_clean_timer_del(bs); + if (s->l2_table_cache) { + qcow2_cache_destroy(s->l2_table_cache); + } + if (s->refcount_block_cache) { + qcow2_cache_destroy(s->refcount_block_cache); + } + qcrypto_block_free(s->crypto); + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + return ret; +} + +typedef struct QCow2OpenCo { + BlockDriverState *bs; + QDict *options; + int flags; + Error **errp; + int ret; +} QCow2OpenCo; + +static void coroutine_fn qcow2_open_entry(void *opaque) +{ + QCow2OpenCo *qoc = opaque; + BDRVQcow2State *s = qoc->bs->opaque; + + qemu_co_mutex_lock(&s->lock); + qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); + qemu_co_mutex_unlock(&s->lock); +} + +static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCow2OpenCo qoc = { + .bs = bs, + .options = options, + .flags = flags, + .errp = errp, + .ret = -EINPROGRESS + }; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + /* Initialise locks */ + qemu_co_mutex_init(&s->lock); + + if (qemu_in_coroutine()) { + /* From bdrv_co_create. */ + qcow2_open_entry(&qoc); + } else { + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); + BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); + } + return qoc.ret; +} + +static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + + if (bs->encrypted) { + /* Encryption works on a sector granularity */ + bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); + } + bs->bl.pwrite_zeroes_alignment = s->subcluster_size; + bs->bl.pdiscard_alignment = s->cluster_size; +} + +static int qcow2_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + Qcow2ReopenState *r; + int ret; + + r = g_new0(Qcow2ReopenState, 1); + state->opaque = r; + + ret = qcow2_update_options_prepare(state->bs, r, state->options, + state->flags, errp); + if (ret < 0) { + goto fail; + } + + /* We need to write out any unwritten data if we reopen read-only. */ + if ((state->flags & BDRV_O_RDWR) == 0) { + ret = qcow2_reopen_bitmaps_ro(state->bs, errp); + if (ret < 0) { + goto fail; + } + + ret = bdrv_flush(state->bs); + if (ret < 0) { + goto fail; + } + + ret = qcow2_mark_clean(state->bs); + if (ret < 0) { + goto fail; + } + } + + return 0; + +fail: + qcow2_update_options_abort(state->bs, r); + g_free(r); + return ret; +} + +static void qcow2_reopen_commit(BDRVReopenState *state) +{ + qcow2_update_options_commit(state->bs, state->opaque); + g_free(state->opaque); +} + +static void qcow2_reopen_commit_post(BDRVReopenState *state) +{ + if (state->flags & BDRV_O_RDWR) { + Error *local_err = NULL; + + if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) { + /* + * This is not fatal, bitmaps just left read-only, so all following + * writes will fail. User can remove read-only bitmaps to unblock + * writes or retry reopen. + */ + error_reportf_err(local_err, + "%s: Failed to make dirty bitmaps writable: ", + bdrv_get_node_name(state->bs)); + } + } +} + +static void qcow2_reopen_abort(BDRVReopenState *state) +{ + qcow2_update_options_abort(state->bs, state->opaque); + g_free(state->opaque); +} + +static void qcow2_join_options(QDict *options, QDict *old_options) +{ + bool has_new_overlap_template = + qdict_haskey(options, QCOW2_OPT_OVERLAP) || + qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); + bool has_new_total_cache_size = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); + bool has_all_cache_options; + + /* New overlap template overrides all old overlap options */ + if (has_new_overlap_template) { + qdict_del(old_options, QCOW2_OPT_OVERLAP); + qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); + qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); + } + + /* New total cache size overrides all old options */ + if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { + qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); + qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + } + + qdict_join(options, old_options, false); + + /* + * If after merging all cache size options are set, an old total size is + * overwritten. Do keep all options, however, if all three are new. The + * resulting error message is what we want to happen. + */ + has_all_cache_options = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + + if (has_all_cache_options && !has_new_total_cache_size) { + qdict_del(options, QCOW2_OPT_CACHE_SIZE); + } +} + +static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t count, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t host_offset; + unsigned int bytes; + QCow2SubclusterType type; + int ret, status = 0; + + qemu_co_mutex_lock(&s->lock); + + if (!s->metadata_preallocation_checked) { + ret = qcow2_detect_metadata_preallocation(bs); + s->metadata_preallocation = (ret == 1); + s->metadata_preallocation_checked = true; + } + + bytes = MIN(INT_MAX, count); + ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { + return ret; + } + + *pnum = bytes; + + if ((type == QCOW2_SUBCLUSTER_NORMAL || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) { + *map = host_offset; + *file = s->data_file->bs; + status |= BDRV_BLOCK_OFFSET_VALID; + } + if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC) { + status |= BDRV_BLOCK_ZERO; + } else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && + type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) { + status |= BDRV_BLOCK_DATA; + } + if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) && + (status & BDRV_BLOCK_OFFSET_VALID)) + { + status |= BDRV_BLOCK_RECURSE; + } + return status; +} + +static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, + QCowL2Meta **pl2meta, + bool link_l2) +{ + int ret = 0; + QCowL2Meta *l2meta = *pl2meta; + + while (l2meta != NULL) { + QCowL2Meta *next; + + if (link_l2) { + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); + if (ret) { + goto out; + } + } else { + qcow2_alloc_cluster_abort(bs, l2meta); + } + + /* Take the request off the list of running requests */ + QLIST_REMOVE(l2meta, next_in_flight); + + qemu_co_queue_restart_all(&l2meta->dependent_requests); + + next = l2meta->next; + g_free(l2meta); + l2meta = next; + } +out: + *pl2meta = l2meta; + return ret; +} + +static coroutine_fn int +qcow2_co_preadv_encrypted(BlockDriverState *bs, + uint64_t host_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + uint64_t qiov_offset) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint8_t *buf; + + assert(bs->encrypted && s->crypto); + assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + + /* + * For encrypted images, read everything into a temporary + * contiguous buffer on which the AES functions can work. + * Also, decryption in a separate buffer is better as it + * prevents the guest from learning information about the + * encrypted nature of the virtual disk. + */ + + buf = qemu_try_blockalign(s->data_file->bs, bytes); + if (buf == NULL) { + return -ENOMEM; + } + + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0); + if (ret < 0) { + goto fail; + } + + if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0) + { + ret = -EIO; + goto fail; + } + qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes); + +fail: + qemu_vfree(buf); + + return ret; +} + +typedef struct Qcow2AioTask { + AioTask task; + + BlockDriverState *bs; + QCow2SubclusterType subcluster_type; /* only for read */ + uint64_t host_offset; /* or full descriptor in compressed clusters */ + uint64_t offset; + uint64_t bytes; + QEMUIOVector *qiov; + uint64_t qiov_offset; + QCowL2Meta *l2meta; /* only for write */ +} Qcow2AioTask; + +static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task); +static coroutine_fn int qcow2_add_task(BlockDriverState *bs, + AioTaskPool *pool, + AioTaskFunc func, + QCow2SubclusterType subcluster_type, + uint64_t host_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, + QCowL2Meta *l2meta) +{ + Qcow2AioTask local_task; + Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task; + + *task = (Qcow2AioTask) { + .task.func = func, + .bs = bs, + .subcluster_type = subcluster_type, + .qiov = qiov, + .host_offset = host_offset, + .offset = offset, + .bytes = bytes, + .qiov_offset = qiov_offset, + .l2meta = l2meta, + }; + + trace_qcow2_add_task(qemu_coroutine_self(), bs, pool, + func == qcow2_co_preadv_task_entry ? "read" : "write", + subcluster_type, host_offset, offset, bytes, + qiov, qiov_offset); + + if (!pool) { + return func(&task->task); + } + + aio_task_pool_start_task(pool, &task->task); + + return 0; +} + +static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs, + QCow2SubclusterType subc_type, + uint64_t host_offset, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset) +{ + BDRVQcow2State *s = bs->opaque; + + switch (subc_type) { + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + /* Both zero types are handled in qcow2_co_preadv_part */ + g_assert_not_reached(); + + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */ + + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); + return bdrv_co_preadv_part(bs->backing, offset, bytes, + qiov, qiov_offset, 0); + + case QCOW2_SUBCLUSTER_COMPRESSED: + return qcow2_co_preadv_compressed(bs, host_offset, + offset, bytes, qiov, qiov_offset); + + case QCOW2_SUBCLUSTER_NORMAL: + if (bs->encrypted) { + return qcow2_co_preadv_encrypted(bs, host_offset, + offset, bytes, qiov, qiov_offset); + } + + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + return bdrv_co_preadv_part(s->data_file, host_offset, + bytes, qiov, qiov_offset, 0); + + default: + g_assert_not_reached(); + } + + g_assert_not_reached(); +} + +static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task) +{ + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); + + assert(!t->l2meta); + + return qcow2_co_preadv_task(t->bs, t->subcluster_type, + t->host_offset, t->offset, t->bytes, + t->qiov, t->qiov_offset); +} + +static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, int flags) +{ + BDRVQcow2State *s = bs->opaque; + int ret = 0; + unsigned int cur_bytes; /* number of bytes in current iteration */ + uint64_t host_offset = 0; + QCow2SubclusterType type; + AioTaskPool *aio = NULL; + + while (bytes != 0 && aio_task_pool_status(aio) == 0) { + /* prepare next request */ + cur_bytes = MIN(bytes, INT_MAX); + if (s->crypto) { + cur_bytes = MIN(cur_bytes, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + } + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_get_host_offset(bs, offset, &cur_bytes, + &host_offset, &type); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { + goto out; + } + + if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + (type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) || + (type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing)) + { + qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); + } else { + if (!aio && cur_bytes != bytes) { + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); + } + ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type, + host_offset, offset, cur_bytes, + qiov, qiov_offset, NULL); + if (ret < 0) { + goto out; + } + } + + bytes -= cur_bytes; + offset += cur_bytes; + qiov_offset += cur_bytes; + } + +out: + if (aio) { + aio_task_pool_wait_all(aio); + if (ret == 0) { + ret = aio_task_pool_status(aio); + } + g_free(aio); + } + + return ret; +} + +/* Check if it's possible to merge a write request with the writing of + * the data from the COW regions */ +static bool merge_cow(uint64_t offset, unsigned bytes, + QEMUIOVector *qiov, size_t qiov_offset, + QCowL2Meta *l2meta) +{ + QCowL2Meta *m; + + for (m = l2meta; m != NULL; m = m->next) { + /* If both COW regions are empty then there's nothing to merge */ + if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { + continue; + } + + /* If COW regions are handled already, skip this too */ + if (m->skip_cow) { + continue; + } + + /* + * The write request should start immediately after the first + * COW region. This does not always happen because the area + * touched by the request can be larger than the one defined + * by @m (a single request can span an area consisting of a + * mix of previously unallocated and allocated clusters, that + * is why @l2meta is a list). + */ + if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { + /* In this case the request starts before this region */ + assert(offset < l2meta_cow_start(m)); + assert(m->cow_start.nb_bytes == 0); + continue; + } + + /* The write request should end immediately before the second + * COW region (see above for why it does not always happen) */ + if (m->offset + m->cow_end.offset != offset + bytes) { + assert(offset + bytes > m->offset + m->cow_end.offset); + assert(m->cow_end.nb_bytes == 0); + continue; + } + + /* Make sure that adding both COW regions to the QEMUIOVector + * does not exceed IOV_MAX */ + if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) { + continue; + } + + m->data_qiov = qiov; + m->data_qiov_offset = qiov_offset; + return true; + } + + return false; +} + +/* + * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error. + * Note that returning 0 does not guarantee non-zero data. + */ +static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) +{ + /* + * This check is designed for optimization shortcut so it must be + * efficient. + * Instead of is_zero(), use bdrv_co_is_zero_fast() as it is + * faster (but not as accurate and can result in false negatives). + */ + int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset, + m->cow_start.nb_bytes); + if (ret <= 0) { + return ret; + } + + return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset, + m->cow_end.nb_bytes); +} + +static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) +{ + BDRVQcow2State *s = bs->opaque; + QCowL2Meta *m; + + if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) { + return 0; + } + + if (bs->encrypted) { + return 0; + } + + for (m = l2meta; m != NULL; m = m->next) { + int ret; + uint64_t start_offset = m->alloc_offset + m->cow_start.offset; + unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes - + m->cow_start.offset; + + if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) { + continue; + } + + ret = is_zero_cow(bs, m); + if (ret < 0) { + return ret; + } else if (ret == 0) { + continue; + } + + /* + * instead of writing zero COW buffers, + * efficiently zero out the whole clusters + */ + + ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes, + true); + if (ret < 0) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE); + ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes, + BDRV_REQ_NO_FALLBACK); + if (ret < 0) { + if (ret != -ENOTSUP && ret != -EAGAIN) { + return ret; + } + continue; + } + + trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters); + m->skip_cow = true; + } + return 0; +} + +/* + * qcow2_co_pwritev_task + * Called with s->lock unlocked + * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must + * not use it somehow after qcow2_co_pwritev_task() call + */ +static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, + uint64_t host_offset, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + uint64_t qiov_offset, + QCowL2Meta *l2meta) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + void *crypt_buf = NULL; + QEMUIOVector encrypted_qiov; + + if (bs->encrypted) { + assert(s->crypto); + assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + crypt_buf = qemu_try_blockalign(bs->file->bs, bytes); + if (crypt_buf == NULL) { + ret = -ENOMEM; + goto out_unlocked; + } + qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes); + + if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) { + ret = -EIO; + goto out_unlocked; + } + + qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes); + qiov = &encrypted_qiov; + qiov_offset = 0; + } + + /* Try to efficiently initialize the physical space with zeroes */ + ret = handle_alloc_space(bs, l2meta); + if (ret < 0) { + goto out_unlocked; + } + + /* + * If we need to do COW, check if it's possible to merge the + * writing of the guest data together with that of the COW regions. + * If it's not possible (or not necessary) then write the + * guest data now. + */ + if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) { + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + trace_qcow2_writev_data(qemu_coroutine_self(), host_offset); + ret = bdrv_co_pwritev_part(s->data_file, host_offset, + bytes, qiov, qiov_offset, 0); + if (ret < 0) { + goto out_unlocked; + } + } + + qemu_co_mutex_lock(&s->lock); + + ret = qcow2_handle_l2meta(bs, &l2meta, true); + goto out_locked; + +out_unlocked: + qemu_co_mutex_lock(&s->lock); + +out_locked: + qcow2_handle_l2meta(bs, &l2meta, false); + qemu_co_mutex_unlock(&s->lock); + + qemu_vfree(crypt_buf); + + return ret; +} + +static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task) +{ + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); + + assert(!t->subcluster_type); + + return qcow2_co_pwritev_task(t->bs, t->host_offset, + t->offset, t->bytes, t->qiov, t->qiov_offset, + t->l2meta); +} + +static coroutine_fn int qcow2_co_pwritev_part( + BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags) +{ + BDRVQcow2State *s = bs->opaque; + int offset_in_cluster; + int ret; + unsigned int cur_bytes; /* number of sectors in current iteration */ + uint64_t host_offset; + QCowL2Meta *l2meta = NULL; + AioTaskPool *aio = NULL; + + trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); + + while (bytes != 0 && aio_task_pool_status(aio) == 0) { + + l2meta = NULL; + + trace_qcow2_writev_start_part(qemu_coroutine_self()); + offset_in_cluster = offset_into_cluster(s, offset); + cur_bytes = MIN(bytes, INT_MAX); + if (bs->encrypted) { + cur_bytes = MIN(cur_bytes, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + - offset_in_cluster); + } + + qemu_co_mutex_lock(&s->lock); + + ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes, + &host_offset, &l2meta); + if (ret < 0) { + goto out_locked; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, + cur_bytes, true); + if (ret < 0) { + goto out_locked; + } + + qemu_co_mutex_unlock(&s->lock); + + if (!aio && cur_bytes != bytes) { + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); + } + ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0, + host_offset, offset, + cur_bytes, qiov, qiov_offset, l2meta); + l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */ + if (ret < 0) { + goto fail_nometa; + } + + bytes -= cur_bytes; + offset += cur_bytes; + qiov_offset += cur_bytes; + trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); + } + ret = 0; + + qemu_co_mutex_lock(&s->lock); + +out_locked: + qcow2_handle_l2meta(bs, &l2meta, false); + + qemu_co_mutex_unlock(&s->lock); + +fail_nometa: + if (aio) { + aio_task_pool_wait_all(aio); + if (ret == 0) { + ret = aio_task_pool_status(aio); + } + g_free(aio); + } + + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); + + return ret; +} + +static int qcow2_inactivate(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int ret, result = 0; + Error *local_err = NULL; + + qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err); + if (local_err != NULL) { + result = -EINVAL; + error_reportf_err(local_err, "Lost persistent bitmaps during " + "inactivation of node '%s': ", + bdrv_get_device_or_node_name(bs)); + } + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + result = ret; + error_report("Failed to flush the L2 table cache: %s", + strerror(-ret)); + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + result = ret; + error_report("Failed to flush the refcount block cache: %s", + strerror(-ret)); + } + + if (result == 0) { + qcow2_mark_clean(bs); + } + + return result; +} + +static void qcow2_close(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + qemu_vfree(s->l1_table); + /* else pre-write overlap checks in cache_destroy may crash */ + s->l1_table = NULL; + + if (!(s->flags & BDRV_O_INACTIVE)) { + qcow2_inactivate(bs); + } + + cache_clean_timer_del(bs); + qcow2_cache_destroy(s->l2_table_cache); + qcow2_cache_destroy(s->refcount_block_cache); + + qcrypto_block_free(s->crypto); + s->crypto = NULL; + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + + g_free(s->unknown_header_fields); + cleanup_unknown_header_ext(bs); + + g_free(s->image_data_file); + g_free(s->image_backing_file); + g_free(s->image_backing_format); + + if (has_data_file(bs)) { + bdrv_unref_child(bs, s->data_file); + s->data_file = NULL; + } + + qcow2_refcount_close(bs); + qcow2_free_snapshots(bs); +} + +static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int flags = s->flags; + QCryptoBlock *crypto = NULL; + QDict *options; + Error *local_err = NULL; + int ret; + + /* + * Backing files are read-only which makes all of their metadata immutable, + * that means we don't have to worry about reopening them here. + */ + + crypto = s->crypto; + s->crypto = NULL; + + qcow2_close(bs); + + memset(s, 0, sizeof(BDRVQcow2State)); + options = qdict_clone_shallow(bs->options); + + flags &= ~BDRV_O_INACTIVE; + qemu_co_mutex_lock(&s->lock); + ret = qcow2_do_open(bs, options, flags, &local_err); + qemu_co_mutex_unlock(&s->lock); + qobject_unref(options); + if (local_err) { + error_propagate_prepend(errp, local_err, + "Could not reopen qcow2 layer: "); + bs->drv = NULL; + return; + } else if (ret < 0) { + error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); + bs->drv = NULL; + return; + } + + s->crypto = crypto; +} + +static size_t header_ext_add(char *buf, uint32_t magic, const void *s, + size_t len, size_t buflen) +{ + QCowExtension *ext_backing_fmt = (QCowExtension*) buf; + size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); + + if (buflen < ext_len) { + return -ENOSPC; + } + + *ext_backing_fmt = (QCowExtension) { + .magic = cpu_to_be32(magic), + .len = cpu_to_be32(len), + }; + + if (len) { + memcpy(buf + sizeof(QCowExtension), s, len); + } + + return ext_len; +} + +/* + * Updates the qcow2 header, including the variable length parts of it, i.e. + * the backing file name and all extensions. qcow2 was not designed to allow + * such changes, so if we run out of space (we can only use the first cluster) + * this function may fail. + * + * Returns 0 on success, -errno in error cases. + */ +int qcow2_update_header(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + QCowHeader *header; + char *buf; + size_t buflen = s->cluster_size; + int ret; + uint64_t total_size; + uint32_t refcount_table_clusters; + size_t header_length; + Qcow2UnknownHeaderExtension *uext; + + buf = qemu_blockalign(bs, buflen); + + /* Header structure */ + header = (QCowHeader*) buf; + + if (buflen < sizeof(*header)) { + ret = -ENOSPC; + goto fail; + } + + header_length = sizeof(*header) + s->unknown_header_fields_size; + total_size = bs->total_sectors * BDRV_SECTOR_SIZE; + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); + + ret = validate_compression_type(s, NULL); + if (ret) { + goto fail; + } + + *header = (QCowHeader) { + /* Version 2 fields */ + .magic = cpu_to_be32(QCOW_MAGIC), + .version = cpu_to_be32(s->qcow_version), + .backing_file_offset = 0, + .backing_file_size = 0, + .cluster_bits = cpu_to_be32(s->cluster_bits), + .size = cpu_to_be64(total_size), + .crypt_method = cpu_to_be32(s->crypt_method_header), + .l1_size = cpu_to_be32(s->l1_size), + .l1_table_offset = cpu_to_be64(s->l1_table_offset), + .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), + .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), + .nb_snapshots = cpu_to_be32(s->nb_snapshots), + .snapshots_offset = cpu_to_be64(s->snapshots_offset), + + /* Version 3 fields */ + .incompatible_features = cpu_to_be64(s->incompatible_features), + .compatible_features = cpu_to_be64(s->compatible_features), + .autoclear_features = cpu_to_be64(s->autoclear_features), + .refcount_order = cpu_to_be32(s->refcount_order), + .header_length = cpu_to_be32(header_length), + .compression_type = s->compression_type, + }; + + /* For older versions, write a shorter header */ + switch (s->qcow_version) { + case 2: + ret = offsetof(QCowHeader, incompatible_features); + break; + case 3: + ret = sizeof(*header); + break; + default: + ret = -EINVAL; + goto fail; + } + + buf += ret; + buflen -= ret; + memset(buf, 0, buflen); + + /* Preserve any unknown field in the header */ + if (s->unknown_header_fields_size) { + if (buflen < s->unknown_header_fields_size) { + ret = -ENOSPC; + goto fail; + } + + memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); + buf += s->unknown_header_fields_size; + buflen -= s->unknown_header_fields_size; + } + + /* Backing file format header extension */ + if (s->image_backing_format) { + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, + s->image_backing_format, + strlen(s->image_backing_format), + buflen); + if (ret < 0) { + goto fail; + } + + buf += ret; + buflen -= ret; + } + + /* External data file header extension */ + if (has_data_file(bs) && s->image_data_file) { + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE, + s->image_data_file, strlen(s->image_data_file), + buflen); + if (ret < 0) { + goto fail; + } + + buf += ret; + buflen -= ret; + } + + /* Full disk encryption header pointer extension */ + if (s->crypto_header.offset != 0) { + s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset); + s->crypto_header.length = cpu_to_be64(s->crypto_header.length); + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, + &s->crypto_header, sizeof(s->crypto_header), + buflen); + s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); + s->crypto_header.length = be64_to_cpu(s->crypto_header.length); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + } + + /* + * Feature table. A mere 8 feature names occupies 392 bytes, and + * when coupled with the v3 minimum header of 104 bytes plus the + * 8-byte end-of-extension marker, that would leave only 8 bytes + * for a backing file name in an image with 512-byte clusters. + * Thus, we choose to omit this header for cluster sizes 4k and + * smaller. + */ + if (s->qcow_version >= 3 && s->cluster_size > 4096) { + static const Qcow2Feature features[] = { + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_DIRTY_BITNR, + .name = "dirty bit", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, + .name = "corrupt bit", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR, + .name = "external data file", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_COMPRESSION_BITNR, + .name = "compression type", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_EXTL2_BITNR, + .name = "extended L2 entries", + }, + { + .type = QCOW2_FEAT_TYPE_COMPATIBLE, + .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, + .name = "lazy refcounts", + }, + { + .type = QCOW2_FEAT_TYPE_AUTOCLEAR, + .bit = QCOW2_AUTOCLEAR_BITMAPS_BITNR, + .name = "bitmaps", + }, + { + .type = QCOW2_FEAT_TYPE_AUTOCLEAR, + .bit = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR, + .name = "raw external data", + }, + }; + + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, + features, sizeof(features), buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + } + + /* Bitmap extension */ + if (s->nb_bitmaps > 0) { + Qcow2BitmapHeaderExt bitmaps_header = { + .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), + .bitmap_directory_size = + cpu_to_be64(s->bitmap_directory_size), + .bitmap_directory_offset = + cpu_to_be64(s->bitmap_directory_offset) + }; + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, + &bitmaps_header, sizeof(bitmaps_header), + buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + } + + /* Keep unknown header extensions */ + QLIST_FOREACH(uext, &s->unknown_header_ext, next) { + ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); + if (ret < 0) { + goto fail; + } + + buf += ret; + buflen -= ret; + } + + /* End of header extensions */ + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); + if (ret < 0) { + goto fail; + } + + buf += ret; + buflen -= ret; + + /* Backing file name */ + if (s->image_backing_file) { + size_t backing_file_len = strlen(s->image_backing_file); + + if (buflen < backing_file_len) { + ret = -ENOSPC; + goto fail; + } + + /* Using strncpy is ok here, since buf is not NUL-terminated. */ + strncpy(buf, s->image_backing_file, buflen); + + header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); + header->backing_file_size = cpu_to_be32(backing_file_len); + } + + /* Write the new header */ + ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + qemu_vfree(header); + return ret; +} + +static int qcow2_change_backing_file(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt) +{ + BDRVQcow2State *s = bs->opaque; + + /* Adding a backing file means that the external data file alone won't be + * enough to make sense of the content */ + if (backing_file && data_file_is_raw(bs)) { + return -EINVAL; + } + + if (backing_file && strlen(backing_file) > 1023) { + return -EINVAL; + } + + pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), + backing_file ?: ""); + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); + + g_free(s->image_backing_file); + g_free(s->image_backing_format); + + s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; + s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; + + return qcow2_update_header(bs); +} + +static int qcow2_set_up_encryption(BlockDriverState *bs, + QCryptoBlockCreateOptions *cryptoopts, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCryptoBlock *crypto = NULL; + int fmt, ret; + + switch (cryptoopts->format) { + case Q_CRYPTO_BLOCK_FORMAT_LUKS: + fmt = QCOW_CRYPT_LUKS; + break; + case Q_CRYPTO_BLOCK_FORMAT_QCOW: + fmt = QCOW_CRYPT_AES; + break; + default: + error_setg(errp, "Crypto format not supported in qcow2"); + return -EINVAL; + } + + s->crypt_method_header = fmt; + + crypto = qcrypto_block_create(cryptoopts, "encrypt.", + qcow2_crypto_hdr_init_func, + qcow2_crypto_hdr_write_func, + bs, errp); + if (!crypto) { + return -EINVAL; + } + + ret = qcow2_update_header(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write encryption header"); + goto out; + } + + ret = 0; + out: + qcrypto_block_free(crypto); + return ret; +} + +/** + * Preallocates metadata structures for data clusters between @offset (in the + * guest disk) and @new_length (which is thus generally the new guest disk + * size). + * + * Returns: 0 on success, -errno on failure. + */ +static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + uint64_t new_length, PreallocMode mode, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t bytes; + uint64_t host_offset = 0; + int64_t file_length; + unsigned int cur_bytes; + int ret; + QCowL2Meta *meta = NULL, *m; + + assert(offset <= new_length); + bytes = new_length - offset; + + while (bytes) { + cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size)); + ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes, + &host_offset, &meta); + if (ret < 0) { + error_setg_errno(errp, -ret, "Allocating clusters failed"); + goto out; + } + + for (m = meta; m != NULL; m = m->next) { + m->prealloc = true; + } + + ret = qcow2_handle_l2meta(bs, &meta, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Mapping clusters failed"); + goto out; + } + + /* TODO Preallocate data if requested */ + + bytes -= cur_bytes; + offset += cur_bytes; + } + + /* + * It is expected that the image file is large enough to actually contain + * all of the allocated clusters (otherwise we get failing reads after + * EOF). Extend the image to the last allocated sector. + */ + file_length = bdrv_getlength(s->data_file->bs); + if (file_length < 0) { + error_setg_errno(errp, -file_length, "Could not get file size"); + ret = file_length; + goto out; + } + + if (host_offset + cur_bytes > file_length) { + if (mode == PREALLOC_MODE_METADATA) { + mode = PREALLOC_MODE_OFF; + } + ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, + mode, 0, errp); + if (ret < 0) { + goto out; + } + } + + ret = 0; + +out: + qcow2_handle_l2meta(bs, &meta, false); + return ret; +} + +/* qcow2_refcount_metadata_size: + * @clusters: number of clusters to refcount (including data and L1/L2 tables) + * @cluster_size: size of a cluster, in bytes + * @refcount_order: refcount bits power-of-2 exponent + * @generous_increase: allow for the refcount table to be 1.5x as large as it + * needs to be + * + * Returns: Number of bytes required for refcount blocks and table metadata. + */ +int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, + int refcount_order, bool generous_increase, + uint64_t *refblock_count) +{ + /* + * Every host cluster is reference-counted, including metadata (even + * refcount metadata is recursively included). + * + * An accurate formula for the size of refcount metadata size is difficult + * to derive. An easier method of calculation is finding the fixed point + * where no further refcount blocks or table clusters are required to + * reference count every cluster. + */ + int64_t blocks_per_table_cluster = cluster_size / REFTABLE_ENTRY_SIZE; + int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); + int64_t table = 0; /* number of refcount table clusters */ + int64_t blocks = 0; /* number of refcount block clusters */ + int64_t last; + int64_t n = 0; + + do { + last = n; + blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); + table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); + n = clusters + blocks + table; + + if (n == last && generous_increase) { + clusters += DIV_ROUND_UP(table, 2); + n = 0; /* force another loop */ + generous_increase = false; + } + } while (n != last); + + if (refblock_count) { + *refblock_count = blocks; + } + + return (blocks + table) * cluster_size; +} + +/** + * qcow2_calc_prealloc_size: + * @total_size: virtual disk size in bytes + * @cluster_size: cluster size in bytes + * @refcount_order: refcount bits power-of-2 exponent + * @extended_l2: true if the image has extended L2 entries + * + * Returns: Total number of bytes required for the fully allocated image + * (including metadata). + */ +static int64_t qcow2_calc_prealloc_size(int64_t total_size, + size_t cluster_size, + int refcount_order, + bool extended_l2) +{ + int64_t meta_size = 0; + uint64_t nl1e, nl2e; + int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); + size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; + + /* header: 1 cluster */ + meta_size += cluster_size; + + /* total size of L2 tables */ + nl2e = aligned_total_size / cluster_size; + nl2e = ROUND_UP(nl2e, cluster_size / l2e_size); + meta_size += nl2e * l2e_size; + + /* total size of L1 tables */ + nl1e = nl2e * l2e_size / cluster_size; + nl1e = ROUND_UP(nl1e, cluster_size / L1E_SIZE); + meta_size += nl1e * L1E_SIZE; + + /* total size of refcount table and blocks */ + meta_size += qcow2_refcount_metadata_size( + (meta_size + aligned_total_size) / cluster_size, + cluster_size, refcount_order, false, NULL); + + return meta_size + aligned_total_size; +} + +static bool validate_cluster_size(size_t cluster_size, bool extended_l2, + Error **errp) +{ + int cluster_bits = ctz32(cluster_size); + if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || + (1 << cluster_bits) != cluster_size) + { + error_setg(errp, "Cluster size must be a power of two between %d and " + "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); + return false; + } + + if (extended_l2) { + unsigned min_cluster_size = + (1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER; + if (cluster_size < min_cluster_size) { + error_setg(errp, "Extended L2 entries are only supported with " + "cluster sizes of at least %u bytes", min_cluster_size); + return false; + } + } + + return true; +} + +static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2, + Error **errp) +{ + size_t cluster_size; + + cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, + DEFAULT_CLUSTER_SIZE); + if (!validate_cluster_size(cluster_size, extended_l2, errp)) { + return 0; + } + return cluster_size; +} + +static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) +{ + char *buf; + int ret; + + buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); + if (!buf) { + ret = 3; /* default */ + } else if (!strcmp(buf, "0.10")) { + ret = 2; + } else if (!strcmp(buf, "1.1")) { + ret = 3; + } else { + error_setg(errp, "Invalid compatibility level: '%s'", buf); + ret = -EINVAL; + } + g_free(buf); + return ret; +} + +static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, + Error **errp) +{ + uint64_t refcount_bits; + + refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); + if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { + error_setg(errp, "Refcount width must be a power of two and may not " + "exceed 64 bits"); + return 0; + } + + if (version < 3 && refcount_bits != 16) { + error_setg(errp, "Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return 0; + } + + return refcount_bits; +} + +static int coroutine_fn +qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) +{ + BlockdevCreateOptionsQcow2 *qcow2_opts; + QDict *options; + + /* + * Open the image file and write a minimal qcow2 header. + * + * We keep things simple and start with a zero-sized image. We also + * do without refcount blocks or a L1 table for now. We'll fix the + * inconsistency later. + * + * We do need a refcount table because growing the refcount table means + * allocating two new refcount blocks - the second of which would be at + * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file + * size for any qcow2 image. + */ + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + BlockDriverState *data_bs = NULL; + QCowHeader *header; + size_t cluster_size; + int version; + int refcount_order; + uint64_t *refcount_table; + int ret; + uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; + + assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); + qcow2_opts = &create_options->u.qcow2; + + bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + /* Validate options and set default values */ + if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Image size must be a multiple of %u bytes", + (unsigned) BDRV_SECTOR_SIZE); + ret = -EINVAL; + goto out; + } + + if (qcow2_opts->has_version) { + switch (qcow2_opts->version) { + case BLOCKDEV_QCOW2_VERSION_V2: + version = 2; + break; + case BLOCKDEV_QCOW2_VERSION_V3: + version = 3; + break; + default: + g_assert_not_reached(); + } + } else { + version = 3; + } + + if (qcow2_opts->has_cluster_size) { + cluster_size = qcow2_opts->cluster_size; + } else { + cluster_size = DEFAULT_CLUSTER_SIZE; + } + + if (!qcow2_opts->has_extended_l2) { + qcow2_opts->extended_l2 = false; + } + if (qcow2_opts->extended_l2) { + if (version < 3) { + error_setg(errp, "Extended L2 entries are only supported with " + "compatibility level 1.1 and above (use version=v3 or " + "greater)"); + ret = -EINVAL; + goto out; + } + } + + if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) { + ret = -EINVAL; + goto out; + } + + if (!qcow2_opts->has_preallocation) { + qcow2_opts->preallocation = PREALLOC_MODE_OFF; + } + if (qcow2_opts->has_backing_file && + qcow2_opts->preallocation != PREALLOC_MODE_OFF && + !qcow2_opts->extended_l2) + { + error_setg(errp, "Backing file and preallocation can only be used at " + "the same time if extended_l2 is on"); + ret = -EINVAL; + goto out; + } + if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { + error_setg(errp, "Backing format cannot be used without backing file"); + ret = -EINVAL; + goto out; + } + + if (!qcow2_opts->has_lazy_refcounts) { + qcow2_opts->lazy_refcounts = false; + } + if (version < 3 && qcow2_opts->lazy_refcounts) { + error_setg(errp, "Lazy refcounts only supported with compatibility " + "level 1.1 and above (use version=v3 or greater)"); + ret = -EINVAL; + goto out; + } + + if (!qcow2_opts->has_refcount_bits) { + qcow2_opts->refcount_bits = 16; + } + if (qcow2_opts->refcount_bits > 64 || + !is_power_of_2(qcow2_opts->refcount_bits)) + { + error_setg(errp, "Refcount width must be a power of two and may not " + "exceed 64 bits"); + ret = -EINVAL; + goto out; + } + if (version < 3 && qcow2_opts->refcount_bits != 16) { + error_setg(errp, "Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use version=v3 or " + "greater)"); + ret = -EINVAL; + goto out; + } + refcount_order = ctz32(qcow2_opts->refcount_bits); + + if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) { + error_setg(errp, "data-file-raw requires data-file"); + ret = -EINVAL; + goto out; + } + if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) { + error_setg(errp, "Backing file and data-file-raw cannot be used at " + "the same time"); + ret = -EINVAL; + goto out; + } + + if (qcow2_opts->data_file) { + if (version < 3) { + error_setg(errp, "External data files are only supported with " + "compatibility level 1.1 and above (use version=v3 or " + "greater)"); + ret = -EINVAL; + goto out; + } + data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp); + if (data_bs == NULL) { + ret = -EIO; + goto out; + } + } + + if (qcow2_opts->has_compression_type && + qcow2_opts->compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) { + + ret = -EINVAL; + + if (version < 3) { + error_setg(errp, "Non-zlib compression type is only supported with " + "compatibility level 1.1 and above (use version=v3 or " + "greater)"); + goto out; + } + + switch (qcow2_opts->compression_type) { +#ifdef CONFIG_ZSTD + case QCOW2_COMPRESSION_TYPE_ZSTD: + break; +#endif + default: + error_setg(errp, "Unknown compression type"); + goto out; + } + + compression_type = qcow2_opts->compression_type; + } + + /* Create BlockBackend to write to the image */ + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto out; + } + blk_set_allow_write_beyond_eof(blk, true); + + /* Write the header */ + QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); + header = g_malloc0(cluster_size); + *header = (QCowHeader) { + .magic = cpu_to_be32(QCOW_MAGIC), + .version = cpu_to_be32(version), + .cluster_bits = cpu_to_be32(ctz32(cluster_size)), + .size = cpu_to_be64(0), + .l1_table_offset = cpu_to_be64(0), + .l1_size = cpu_to_be32(0), + .refcount_table_offset = cpu_to_be64(cluster_size), + .refcount_table_clusters = cpu_to_be32(1), + .refcount_order = cpu_to_be32(refcount_order), + /* don't deal with endianness since compression_type is 1 byte long */ + .compression_type = compression_type, + .header_length = cpu_to_be32(sizeof(*header)), + }; + + /* We'll update this to correct value later */ + header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + + if (qcow2_opts->lazy_refcounts) { + header->compatible_features |= + cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); + } + if (data_bs) { + header->incompatible_features |= + cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE); + } + if (qcow2_opts->data_file_raw) { + header->autoclear_features |= + cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW); + } + if (compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) { + header->incompatible_features |= + cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION); + } + + if (qcow2_opts->extended_l2) { + header->incompatible_features |= + cpu_to_be64(QCOW2_INCOMPAT_EXTL2); + } + + ret = blk_pwrite(blk, 0, header, cluster_size, 0); + g_free(header); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write qcow2 header"); + goto out; + } + + /* Write a refcount table with one refcount block */ + refcount_table = g_malloc0(2 * cluster_size); + refcount_table[0] = cpu_to_be64(2 * cluster_size); + ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); + g_free(refcount_table); + + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write refcount table"); + goto out; + } + + blk_unref(blk); + blk = NULL; + + /* + * And now open the image and make it consistent first (i.e. increase the + * refcount of the cluster that is occupied by the header and the refcount + * table) + */ + options = qdict_new(); + qdict_put_str(options, "driver", "qcow2"); + qdict_put_str(options, "file", bs->node_name); + if (data_bs) { + qdict_put_str(options, "data-file", data_bs->node_name); + } + blk = blk_new_open(NULL, NULL, options, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, + errp); + if (blk == NULL) { + ret = -EIO; + goto out; + } + + ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " + "header and refcount table"); + goto out; + + } else if (ret != 0) { + error_report("Huh, first cluster in empty image is already in use?"); + abort(); + } + + /* Set the external data file if necessary */ + if (data_bs) { + BDRVQcow2State *s = blk_bs(blk)->opaque; + s->image_data_file = g_strdup(data_bs->filename); + } + + /* Create a full header (including things like feature table) */ + ret = qcow2_update_header(blk_bs(blk)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not update qcow2 header"); + goto out; + } + + /* Okay, now that we have a valid image, let's give it the right size */ + ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, + 0, errp); + if (ret < 0) { + error_prepend(errp, "Could not resize image: "); + goto out; + } + + /* Want a backing file? There you go. */ + if (qcow2_opts->has_backing_file) { + const char *backing_format = NULL; + + if (qcow2_opts->has_backing_fmt) { + backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); + } + + ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, + backing_format, false); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not assign backing file '%s' " + "with format '%s'", qcow2_opts->backing_file, + backing_format); + goto out; + } + } + + /* Want encryption? There you go. */ + if (qcow2_opts->has_encrypt) { + ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); + if (ret < 0) { + goto out; + } + } + + blk_unref(blk); + blk = NULL; + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. + * Using BDRV_O_NO_IO, since encryption is now setup we don't want to + * have to setup decryption context. We're not doing any I/O on the top + * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does + * not have effect. + */ + options = qdict_new(); + qdict_put_str(options, "driver", "qcow2"); + qdict_put_str(options, "file", bs->node_name); + if (data_bs) { + qdict_put_str(options, "data-file", data_bs->node_name); + } + blk = blk_new_open(NULL, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, + errp); + if (blk == NULL) { + ret = -EIO; + goto out; + } + + ret = 0; +out: + blk_unref(blk); + bdrv_unref(bs); + bdrv_unref(data_bs); + return ret; +} + +static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; + Visitor *v; + BlockDriverState *bs = NULL; + BlockDriverState *data_bs = NULL; + const char *val; + int ret; + + /* Only the keyval visitor supports the dotted syntax needed for + * encryption, so go through a QDict before getting a QAPI type. Ignore + * options meant for the protocol layer so that the visitor doesn't + * complain. */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, + true); + + /* Handle encryption options */ + val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); + if (val && !strcmp(val, "on")) { + qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); + } else if (val && !strcmp(val, "off")) { + qdict_del(qdict, BLOCK_OPT_ENCRYPT); + } + + val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); + if (val && !strcmp(val, "aes")) { + qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); + } + + /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into + * version=v2/v3 below. */ + val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); + if (val && !strcmp(val, "0.10")) { + qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); + } else if (val && !strcmp(val, "1.1")) { + qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); + } + + /* Change legacy command line options into QMP ones */ + static const QDictRenames opt_renames[] = { + { BLOCK_OPT_BACKING_FILE, "backing-file" }, + { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, + { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, + { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, + { BLOCK_OPT_EXTL2, "extended-l2" }, + { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, + { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, + { BLOCK_OPT_COMPAT_LEVEL, "version" }, + { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" }, + { BLOCK_OPT_COMPRESSION_TYPE, "compression-type" }, + { NULL, NULL }, + }; + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto finish; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto finish; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto finish; + } + + /* Create and open an external data file (protocol layer) */ + val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); + if (val) { + ret = bdrv_create_file(val, opts, errp); + if (ret < 0) { + goto finish; + } + + data_bs = bdrv_open(val, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + errp); + if (data_bs == NULL) { + ret = -EIO; + goto finish; + } + + qdict_del(qdict, BLOCK_OPT_DATA_FILE); + qdict_put_str(qdict, "data-file", data_bs->node_name); + } + + /* Set 'driver' and 'node' options */ + qdict_put_str(qdict, "driver", "qcow2"); + qdict_put_str(qdict, "file", bs->node_name); + + /* Now get the QAPI type BlockdevCreateOptions */ + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto finish; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto finish; + } + + /* Silently round up size */ + create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, + BDRV_SECTOR_SIZE); + + /* Create the qcow2 image (format layer) */ + ret = qcow2_co_create(create_options, errp); + if (ret < 0) { + goto finish; + } + + ret = 0; +finish: + qobject_unref(qdict); + bdrv_unref(bs); + bdrv_unref(data_bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + + +static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + int64_t nr; + int res; + + /* Clamp to image length, before checking status of underlying sectors */ + if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { + bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; + } + + if (!bytes) { + return true; + } + + /* + * bdrv_block_status_above doesn't merge different types of zeros, for + * example, zeros which come from the region which is unallocated in + * the whole backing chain, and zeros which come because of a short + * backing file. So, we need a loop. + */ + do { + res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); + offset += nr; + bytes -= nr; + } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes); + + return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0; +} + +static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + + uint32_t head = offset_into_subcluster(s, offset); + uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) - + (offset + bytes); + + trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); + if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { + tail = 0; + } + + if (head || tail) { + uint64_t off; + unsigned int nr; + QCow2SubclusterType type; + + assert(head + bytes + tail <= s->subcluster_size); + + /* check whether remainder of cluster already reads as zero */ + if (!(is_zero(bs, offset - head, head) && + is_zero(bs, offset + bytes, tail))) { + return -ENOTSUP; + } + + qemu_co_mutex_lock(&s->lock); + /* We can have new write after previous check */ + offset -= head; + bytes = s->subcluster_size; + nr = s->subcluster_size; + ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type); + if (ret < 0 || + (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && + type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && + type != QCOW2_SUBCLUSTER_ZERO_PLAIN && + type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) { + qemu_co_mutex_unlock(&s->lock); + return ret < 0 ? ret : -ENOTSUP; + } + } else { + qemu_co_mutex_lock(&s->lock); + } + + trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); + + /* Whatever is left can use real zero subclusters */ + ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + + /* If the image does not support QCOW_OFLAG_ZERO then discarding + * clusters could expose stale data from the backing file. */ + if (s->qcow_version < 3 && bs->backing) { + return -ENOTSUP; + } + + if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { + assert(bytes < s->cluster_size); + /* Ignore partial clusters, except for the special case of the + * complete partial cluster at the end of an unaligned file */ + if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || + offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { + return -ENOTSUP; + } + } + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, + false); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int coroutine_fn +qcow2_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + unsigned int cur_bytes; /* number of bytes in current iteration */ + BdrvChild *child = NULL; + BdrvRequestFlags cur_write_flags; + + assert(!bs->encrypted); + qemu_co_mutex_lock(&s->lock); + + while (bytes != 0) { + uint64_t copy_offset = 0; + QCow2SubclusterType type; + /* prepare next request */ + cur_bytes = MIN(bytes, INT_MAX); + cur_write_flags = write_flags; + + ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes, + ©_offset, &type); + if (ret < 0) { + goto out; + } + + switch (type) { + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + if (bs->backing && bs->backing->bs) { + int64_t backing_length = bdrv_getlength(bs->backing->bs); + if (src_offset >= backing_length) { + cur_write_flags |= BDRV_REQ_ZERO_WRITE; + } else { + child = bs->backing; + cur_bytes = MIN(cur_bytes, backing_length - src_offset); + copy_offset = src_offset; + } + } else { + cur_write_flags |= BDRV_REQ_ZERO_WRITE; + } + break; + + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + cur_write_flags |= BDRV_REQ_ZERO_WRITE; + break; + + case QCOW2_SUBCLUSTER_COMPRESSED: + ret = -ENOTSUP; + goto out; + + case QCOW2_SUBCLUSTER_NORMAL: + child = s->data_file; + break; + + default: + abort(); + } + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_copy_range_from(child, + copy_offset, + dst, dst_offset, + cur_bytes, read_flags, cur_write_flags); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto out; + } + + bytes -= cur_bytes; + src_offset += cur_bytes; + dst_offset += cur_bytes; + } + ret = 0; + +out: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int coroutine_fn +qcow2_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + unsigned int cur_bytes; /* number of sectors in current iteration */ + uint64_t host_offset; + QCowL2Meta *l2meta = NULL; + + assert(!bs->encrypted); + + qemu_co_mutex_lock(&s->lock); + + while (bytes != 0) { + + l2meta = NULL; + + cur_bytes = MIN(bytes, INT_MAX); + + /* TODO: + * If src->bs == dst->bs, we could simply copy by incrementing + * the refcnt, without copying user data. + * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ + ret = qcow2_alloc_host_offset(bs, dst_offset, &cur_bytes, + &host_offset, &l2meta); + if (ret < 0) { + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, cur_bytes, + true); + if (ret < 0) { + goto fail; + } + + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_copy_range_to(src, src_offset, s->data_file, host_offset, + cur_bytes, read_flags, write_flags); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto fail; + } + + ret = qcow2_handle_l2meta(bs, &l2meta, true); + if (ret) { + goto fail; + } + + bytes -= cur_bytes; + src_offset += cur_bytes; + dst_offset += cur_bytes; + } + ret = 0; + +fail: + qcow2_handle_l2meta(bs, &l2meta, false); + + qemu_co_mutex_unlock(&s->lock); + + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); + + return ret; +} + +static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t old_length; + int64_t new_l1_size; + int ret; + QDict *options; + + if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && + prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) + { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + error_setg(errp, "The new size must be a multiple of %u", + (unsigned) BDRV_SECTOR_SIZE); + return -EINVAL; + } + + qemu_co_mutex_lock(&s->lock); + + /* + * Even though we store snapshot size for all images, it was not + * required until v3, so it is not safe to proceed for v2. + */ + if (s->nb_snapshots && s->qcow_version < 3) { + error_setg(errp, "Can't resize a v2 image which has snapshots"); + ret = -ENOTSUP; + goto fail; + } + + /* See qcow2-bitmap.c for which bitmap scenarios prevent a resize. */ + if (qcow2_truncate_bitmaps_check(bs, errp)) { + ret = -ENOTSUP; + goto fail; + } + + old_length = bs->total_sectors * BDRV_SECTOR_SIZE; + new_l1_size = size_to_l1(s, offset); + + if (offset < old_length) { + int64_t last_cluster, old_file_size; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, + "Preallocation can't be used for shrinking an image"); + ret = -EINVAL; + goto fail; + } + + ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), + old_length - ROUND_UP(offset, + s->cluster_size), + QCOW2_DISCARD_ALWAYS, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); + goto fail; + } + + ret = qcow2_shrink_l1_table(bs, new_l1_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to reduce the number of L2 tables"); + goto fail; + } + + ret = qcow2_shrink_reftable(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to discard unused refblocks"); + goto fail; + } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + ret = old_file_size; + goto fail; + } + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster < 0) { + error_setg_errno(errp, -last_cluster, + "Failed to find the last cluster"); + ret = last_cluster; + goto fail; + } + if ((last_cluster + 1) * s->cluster_size < old_file_size) { + Error *local_err = NULL; + + /* + * Do not pass @exact here: It will not help the user if + * we get an error here just because they wanted to shrink + * their qcow2 image (on a block device) with qemu-img. + * (And on the qcow2 layer, the @exact requirement is + * always fulfilled, so there is no need to pass it on.) + */ + bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + false, PREALLOC_MODE_OFF, 0, &local_err); + if (local_err) { + warn_reportf_err(local_err, + "Failed to truncate the tail of the image: "); + } + } + } else { + ret = qcow2_grow_l1_table(bs, new_l1_size, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to grow the L1 table"); + goto fail; + } + } + + switch (prealloc) { + case PREALLOC_MODE_OFF: + if (has_data_file(bs)) { + /* + * If the caller wants an exact resize, the external data + * file should be resized to the exact target size, too, + * so we pass @exact here. + */ + ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, + errp); + if (ret < 0) { + goto fail; + } + } + break; + + case PREALLOC_MODE_METADATA: + ret = preallocate_co(bs, old_length, offset, prealloc, errp); + if (ret < 0) { + goto fail; + } + break; + + case PREALLOC_MODE_FALLOC: + case PREALLOC_MODE_FULL: + { + int64_t allocation_start, host_offset, guest_offset; + int64_t clusters_allocated; + int64_t old_file_size, last_cluster, new_file_size; + uint64_t nb_new_data_clusters, nb_new_l2_tables; + bool subclusters_need_allocation = false; + + /* With a data file, preallocation means just allocating the metadata + * and forwarding the truncate request to the data file */ + if (has_data_file(bs)) { + ret = preallocate_co(bs, old_length, offset, prealloc, errp); + if (ret < 0) { + goto fail; + } + break; + } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + ret = old_file_size; + goto fail; + } + + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster >= 0) { + old_file_size = (last_cluster + 1) * s->cluster_size; + } else { + old_file_size = ROUND_UP(old_file_size, s->cluster_size); + } + + nb_new_data_clusters = (ROUND_UP(offset, s->cluster_size) - + start_of_cluster(s, old_length)) >> s->cluster_bits; + + /* This is an overestimation; we will not actually allocate space for + * these in the file but just make sure the new refcount structures are + * able to cover them so we will not have to allocate new refblocks + * while entering the data blocks in the potentially new L2 tables. + * (We do not actually care where the L2 tables are placed. Maybe they + * are already allocated or they can be placed somewhere before + * @old_file_size. It does not matter because they will be fully + * allocated automatically, so they do not need to be covered by the + * preallocation. All that matters is that we will not have to allocate + * new refcount structures for them.) */ + nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, + s->cluster_size / l2_entry_size(s)); + /* The cluster range may not be aligned to L2 boundaries, so add one L2 + * table for a potential head/tail */ + nb_new_l2_tables++; + + allocation_start = qcow2_refcount_area(bs, old_file_size, + nb_new_data_clusters + + nb_new_l2_tables, + true, 0, 0); + if (allocation_start < 0) { + error_setg_errno(errp, -allocation_start, + "Failed to resize refcount structures"); + ret = allocation_start; + goto fail; + } + + clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, + nb_new_data_clusters); + if (clusters_allocated < 0) { + error_setg_errno(errp, -clusters_allocated, + "Failed to allocate data clusters"); + ret = clusters_allocated; + goto fail; + } + + assert(clusters_allocated == nb_new_data_clusters); + + /* Allocate the data area */ + new_file_size = allocation_start + + nb_new_data_clusters * s->cluster_size; + /* + * Image file grows, so @exact does not matter. + * + * If we need to zero out the new area, try first whether the protocol + * driver can already take care of this. + */ + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, + BDRV_REQ_ZERO_WRITE, NULL); + if (ret >= 0) { + flags &= ~BDRV_REQ_ZERO_WRITE; + /* Ensure that we read zeroes and not backing file data */ + subclusters_need_allocation = true; + } + } else { + ret = -1; + } + if (ret < 0) { + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, + errp); + } + if (ret < 0) { + error_prepend(errp, "Failed to resize underlying file: "); + qcow2_free_clusters(bs, allocation_start, + nb_new_data_clusters * s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } + + /* Create the necessary L2 entries */ + host_offset = allocation_start; + guest_offset = old_length; + while (nb_new_data_clusters) { + int64_t nb_clusters = MIN( + nb_new_data_clusters, + s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); + unsigned cow_start_length = offset_into_cluster(s, guest_offset); + QCowL2Meta allocation; + guest_offset = start_of_cluster(s, guest_offset); + allocation = (QCowL2Meta) { + .offset = guest_offset, + .alloc_offset = host_offset, + .nb_clusters = nb_clusters, + .cow_start = { + .offset = 0, + .nb_bytes = cow_start_length, + }, + .cow_end = { + .offset = nb_clusters << s->cluster_bits, + .nb_bytes = 0, + }, + .prealloc = !subclusters_need_allocation, + }; + qemu_co_queue_init(&allocation.dependent_requests); + + ret = qcow2_alloc_cluster_link_l2(bs, &allocation); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update L2 tables"); + qcow2_free_clusters(bs, host_offset, + nb_new_data_clusters * s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } + + guest_offset += nb_clusters * s->cluster_size; + host_offset += nb_clusters * s->cluster_size; + nb_new_data_clusters -= nb_clusters; + } + break; + } + + default: + g_assert_not_reached(); + } + + if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { + uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size); + + /* + * Use zero clusters as much as we can. qcow2_subcluster_zeroize() + * requires a subcluster-aligned start. The end may be unaligned if + * it is at the end of the image (which it is here). + */ + if (offset > zero_start) { + ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start, + 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to zero out new clusters"); + goto fail; + } + } + + /* Write explicit zeros for the unaligned head */ + if (zero_start > old_length) { + uint64_t len = MIN(zero_start, offset) - old_length; + uint8_t *buf = qemu_blockalign0(bs, len); + QEMUIOVector qiov; + qemu_iovec_init_buf(&qiov, buf, len); + + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); + qemu_co_mutex_lock(&s->lock); + + qemu_vfree(buf); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to zero out the new area"); + goto fail; + } + } + } + + if (prealloc != PREALLOC_MODE_OFF) { + /* Flush metadata before actually changing the image size */ + ret = qcow2_write_caches(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to flush the preallocated area to disk"); + goto fail; + } + } + + bs->total_sectors = offset / BDRV_SECTOR_SIZE; + + /* write updated header.size */ + offset = cpu_to_be64(offset); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), + &offset, sizeof(offset)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update the image size"); + goto fail; + } + + s->l1_vm_state_index = new_l1_size; + + /* Update cache sizes */ + options = qdict_clone_shallow(bs->options); + ret = qcow2_update_options(bs, options, s->flags, errp); + qobject_unref(options); + if (ret < 0) { + goto fail; + } + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static coroutine_fn int +qcow2_co_pwritev_compressed_task(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + ssize_t out_len; + uint8_t *buf, *out_buf; + uint64_t cluster_offset; + + assert(bytes == s->cluster_size || (bytes < s->cluster_size && + (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS))); + + buf = qemu_blockalign(bs, s->cluster_size); + if (bytes < s->cluster_size) { + /* Zero-pad last write if image size is not cluster aligned */ + memset(buf + bytes, 0, s->cluster_size - bytes); + } + qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes); + + out_buf = g_malloc(s->cluster_size); + + out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1, + buf, s->cluster_size); + if (out_len == -ENOMEM) { + /* could not compress: write normal cluster */ + ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0); + if (ret < 0) { + goto fail; + } + goto success; + } else if (out_len < 0) { + ret = -EINVAL; + goto fail; + } + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len, + &cluster_offset); + if (ret < 0) { + qemu_co_mutex_unlock(&s->lock); + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED); + ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0); + if (ret < 0) { + goto fail; + } +success: + ret = 0; +fail: + qemu_vfree(buf); + g_free(out_buf); + return ret; +} + +static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task) +{ + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); + + assert(!t->subcluster_type && !t->l2meta); + + return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov, + t->qiov_offset); +} + +/* + * XXX: put compressed sectors first, then all the cluster aligned + * tables to avoid losing bytes in alignment + */ +static coroutine_fn int +qcow2_co_pwritev_compressed_part(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) +{ + BDRVQcow2State *s = bs->opaque; + AioTaskPool *aio = NULL; + int ret = 0; + + if (has_data_file(bs)) { + return -ENOTSUP; + } + + if (bytes == 0) { + /* + * align end of file to a sector boundary to ease reading with + * sector based I/Os + */ + int64_t len = bdrv_getlength(bs->file->bs); + if (len < 0) { + return len; + } + return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, + NULL); + } + + if (offset_into_cluster(s, offset)) { + return -EINVAL; + } + + if (offset_into_cluster(s, bytes) && + (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) { + return -EINVAL; + } + + while (bytes && aio_task_pool_status(aio) == 0) { + uint64_t chunk_size = MIN(bytes, s->cluster_size); + + if (!aio && chunk_size != bytes) { + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); + } + + ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry, + 0, 0, offset, chunk_size, qiov, qiov_offset, NULL); + if (ret < 0) { + break; + } + qiov_offset += chunk_size; + offset += chunk_size; + bytes -= chunk_size; + } + + if (aio) { + aio_task_pool_wait_all(aio); + if (ret == 0) { + ret = aio_task_pool_status(aio); + } + g_free(aio); + } + + return ret; +} + +static int coroutine_fn +qcow2_co_preadv_compressed(BlockDriverState *bs, + uint64_t cluster_descriptor, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset) +{ + BDRVQcow2State *s = bs->opaque; + int ret = 0, csize, nb_csectors; + uint64_t coffset; + uint8_t *buf, *out_buf; + int offset_in_cluster = offset_into_cluster(s, offset); + + coffset = cluster_descriptor & s->cluster_offset_mask; + nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1; + csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - + (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK); + + buf = g_try_malloc(csize); + if (!buf) { + return -ENOMEM; + } + + out_buf = qemu_blockalign(bs, s->cluster_size); + + BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); + ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0); + if (ret < 0) { + goto fail; + } + + if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) { + ret = -EIO; + goto fail; + } + + qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes); + +fail: + qemu_vfree(out_buf); + g_free(buf); + + return ret; +} + +static int make_completely_empty(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + Error *local_err = NULL; + int ret, l1_clusters; + int64_t offset; + uint64_t *new_reftable = NULL; + uint64_t rt_entry, l1_size2; + struct { + uint64_t l1_offset; + uint64_t reftable_offset; + uint32_t reftable_clusters; + } QEMU_PACKED l1_ofs_rt_ofs_cls; + + ret = qcow2_cache_empty(bs, s->l2_table_cache); + if (ret < 0) { + goto fail; + } + + ret = qcow2_cache_empty(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* Refcounts will be broken utterly */ + ret = qcow2_mark_dirty(bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE); + l1_size2 = (uint64_t)s->l1_size * L1E_SIZE; + + /* After this call, neither the in-memory nor the on-disk refcount + * information accurately describe the actual references */ + + ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, + l1_clusters * s->cluster_size, 0); + if (ret < 0) { + goto fail_broken_refcounts; + } + memset(s->l1_table, 0, l1_size2); + + BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); + + /* Overwrite enough clusters at the beginning of the sectors to place + * the refcount table, a refcount block and the L1 table in; this may + * overwrite parts of the existing refcount and L1 table, which is not + * an issue because the dirty flag is set, complete data loss is in fact + * desired and partial data loss is consequently fine as well */ + ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, + (2 + l1_clusters) * s->cluster_size, 0); + /* This call (even if it failed overall) may have overwritten on-disk + * refcount structures; in that case, the in-memory refcount information + * will probably differ from the on-disk information which makes the BDS + * unusable */ + if (ret < 0) { + goto fail_broken_refcounts; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); + + /* "Create" an empty reftable (one cluster) directly after the image + * header and an empty L1 table three clusters after the image header; + * the cluster between those two will be used as the first refblock */ + l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); + l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); + l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), + &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); + if (ret < 0) { + goto fail_broken_refcounts; + } + + s->l1_table_offset = 3 * s->cluster_size; + + new_reftable = g_try_new0(uint64_t, s->cluster_size / REFTABLE_ENTRY_SIZE); + if (!new_reftable) { + ret = -ENOMEM; + goto fail_broken_refcounts; + } + + s->refcount_table_offset = s->cluster_size; + s->refcount_table_size = s->cluster_size / REFTABLE_ENTRY_SIZE; + s->max_refcount_table_index = 0; + + g_free(s->refcount_table); + s->refcount_table = new_reftable; + new_reftable = NULL; + + /* Now the in-memory refcount information again corresponds to the on-disk + * information (reftable is empty and no refblocks (the refblock cache is + * empty)); however, this means some clusters (e.g. the image header) are + * referenced, but not refcounted, but the normal qcow2 code assumes that + * the in-memory information is always correct */ + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); + + /* Enter the first refblock into the reftable */ + rt_entry = cpu_to_be64(2 * s->cluster_size); + ret = bdrv_pwrite_sync(bs->file, s->cluster_size, + &rt_entry, sizeof(rt_entry)); + if (ret < 0) { + goto fail_broken_refcounts; + } + s->refcount_table[0] = 2 * s->cluster_size; + + s->free_cluster_index = 0; + assert(3 + l1_clusters <= s->refcount_block_size); + offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); + if (offset < 0) { + ret = offset; + goto fail_broken_refcounts; + } else if (offset > 0) { + error_report("First cluster in emptied image is in use"); + abort(); + } + + /* Now finally the in-memory information corresponds to the on-disk + * structures and is correct */ + ret = qcow2_mark_clean(bs); + if (ret < 0) { + goto fail; + } + + ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, + PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto fail; + } + + return 0; + +fail_broken_refcounts: + /* The BDS is unusable at this point. If we wanted to make it usable, we + * would have to call qcow2_refcount_close(), qcow2_refcount_init(), + * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() + * again. However, because the functions which could have caused this error + * path to be taken are used by those functions as well, it's very likely + * that that sequence will fail as well. Therefore, just eject the BDS. */ + bs->drv = NULL; + +fail: + g_free(new_reftable); + return ret; +} + +static int qcow2_make_empty(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t offset, end_offset; + int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); + int l1_clusters, ret = 0; + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE); + + if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && + 3 + l1_clusters <= s->refcount_block_size && + s->crypt_method_header != QCOW_CRYPT_LUKS && + !has_data_file(bs)) { + /* The following function only works for qcow2 v3 images (it + * requires the dirty flag) and only as long as there are no + * features that reserve extra clusters (such as snapshots, + * LUKS header, or persistent bitmaps), because it completely + * empties the image. Furthermore, the L1 table and three + * additional clusters (image header, refcount table, one + * refcount block) have to fit inside one refcount block. It + * only resets the image file, i.e. does not work with an + * external data file. */ + return make_completely_empty(bs); + } + + /* This fallback code simply discards every active cluster; this is slow, + * but works in all cases */ + end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; + for (offset = 0; offset < end_offset; offset += step) { + /* As this function is generally used after committing an external + * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the + * default action for this kind of discard is to pass the discard, + * which will ideally result in an actually smaller image file, as + * is probably desired. */ + ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), + QCOW2_DISCARD_SNAPSHOT, true); + if (ret < 0) { + break; + } + } + + return ret; +} + +static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_write_caches(bs); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp) +{ + Error *local_err = NULL; + BlockMeasureInfo *info; + uint64_t required = 0; /* bytes that contribute to required size */ + uint64_t virtual_size; /* disk size as seen by guest */ + uint64_t refcount_bits; + uint64_t l2_tables; + uint64_t luks_payload_size = 0; + size_t cluster_size; + int version; + char *optstr; + PreallocMode prealloc; + bool has_backing_file; + bool has_luks; + bool extended_l2; + size_t l2e_size; + + /* Parse image creation options */ + extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false); + + cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2, + &local_err); + if (local_err) { + goto err; + } + + version = qcow2_opt_get_version_del(opts, &local_err); + if (local_err) { + goto err; + } + + refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); + if (local_err) { + goto err; + } + + optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, + PREALLOC_MODE_OFF, &local_err); + g_free(optstr); + if (local_err) { + goto err; + } + + optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); + has_backing_file = !!optstr; + g_free(optstr); + + optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); + has_luks = optstr && strcmp(optstr, "luks") == 0; + g_free(optstr); + + if (has_luks) { + g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL; + QDict *cryptoopts = qcow2_extract_crypto_opts(opts, "luks", errp); + size_t headerlen; + + create_opts = block_crypto_create_opts_init(cryptoopts, errp); + qobject_unref(cryptoopts); + if (!create_opts) { + goto err; + } + + if (!qcrypto_block_calculate_payload_offset(create_opts, + "encrypt.", + &headerlen, + &local_err)) { + goto err; + } + + luks_payload_size = ROUND_UP(headerlen, cluster_size); + } + + virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + virtual_size = ROUND_UP(virtual_size, cluster_size); + + /* Check that virtual disk size is valid */ + l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; + l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, + cluster_size / l2e_size); + if (l2_tables * L1E_SIZE > QCOW_MAX_L1_SIZE) { + error_setg(&local_err, "The image size is too large " + "(try using a larger cluster size)"); + goto err; + } + + /* Account for input image */ + if (in_bs) { + int64_t ssize = bdrv_getlength(in_bs); + if (ssize < 0) { + error_setg_errno(&local_err, -ssize, + "Unable to get image virtual_size"); + goto err; + } + + virtual_size = ROUND_UP(ssize, cluster_size); + + if (has_backing_file) { + /* We don't how much of the backing chain is shared by the input + * image and the new image file. In the worst case the new image's + * backing file has nothing in common with the input image. Be + * conservative and assume all clusters need to be written. + */ + required = virtual_size; + } else { + int64_t offset; + int64_t pnum = 0; + + for (offset = 0; offset < ssize; offset += pnum) { + int ret; + + ret = bdrv_block_status_above(in_bs, NULL, offset, + ssize - offset, &pnum, NULL, + NULL); + if (ret < 0) { + error_setg_errno(&local_err, -ret, + "Unable to get block status"); + goto err; + } + + if (ret & BDRV_BLOCK_ZERO) { + /* Skip zero regions (safe with no backing file) */ + } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == + (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { + /* Extend pnum to end of cluster for next iteration */ + pnum = ROUND_UP(offset + pnum, cluster_size) - offset; + + /* Count clusters we've seen */ + required += offset % cluster_size + pnum; + } + } + } + } + + /* Take into account preallocation. Nothing special is needed for + * PREALLOC_MODE_METADATA since metadata is always counted. + */ + if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { + required = virtual_size; + } + + info = g_new0(BlockMeasureInfo, 1); + info->fully_allocated = luks_payload_size + + qcow2_calc_prealloc_size(virtual_size, cluster_size, + ctz32(refcount_bits), extended_l2); + + /* + * Remove data clusters that are not required. This overestimates the + * required size because metadata needed for the fully allocated file is + * still counted. Show bitmaps only if both source and destination + * would support them. + */ + info->required = info->fully_allocated - virtual_size + required; + info->has_bitmaps = version >= 3 && in_bs && + bdrv_supports_persistent_dirty_bitmap(in_bs); + if (info->has_bitmaps) { + info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs, + cluster_size); + } + return info; + +err: + error_propagate(errp, local_err); + return NULL; +} + +static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVQcow2State *s = bs->opaque; + bdi->cluster_size = s->cluster_size; + bdi->vm_state_offset = qcow2_vm_state_offset(s); + return 0; +} + +static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + ImageInfoSpecific *spec_info; + QCryptoBlockInfo *encrypt_info = NULL; + Error *local_err = NULL; + + if (s->crypto != NULL) { + encrypt_info = qcrypto_block_get_info(s->crypto, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; + } + } + + spec_info = g_new(ImageInfoSpecific, 1); + *spec_info = (ImageInfoSpecific){ + .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, + .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1), + }; + if (s->qcow_version == 2) { + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ + .compat = g_strdup("0.10"), + .refcount_bits = s->refcount_bits, + }; + } else if (s->qcow_version == 3) { + Qcow2BitmapInfoList *bitmaps; + bitmaps = qcow2_get_bitmap_info_list(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + qapi_free_ImageInfoSpecific(spec_info); + qapi_free_QCryptoBlockInfo(encrypt_info); + return NULL; + } + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ + .compat = g_strdup("1.1"), + .lazy_refcounts = s->compatible_features & + QCOW2_COMPAT_LAZY_REFCOUNTS, + .has_lazy_refcounts = true, + .corrupt = s->incompatible_features & + QCOW2_INCOMPAT_CORRUPT, + .has_corrupt = true, + .has_extended_l2 = true, + .extended_l2 = has_subclusters(s), + .refcount_bits = s->refcount_bits, + .has_bitmaps = !!bitmaps, + .bitmaps = bitmaps, + .has_data_file = !!s->image_data_file, + .data_file = g_strdup(s->image_data_file), + .has_data_file_raw = has_data_file(bs), + .data_file_raw = data_file_is_raw(bs), + .compression_type = s->compression_type, + }; + } else { + /* if this assertion fails, this probably means a new version was + * added without having it covered here */ + assert(false); + } + + if (encrypt_info) { + ImageInfoSpecificQCow2Encryption *qencrypt = + g_new(ImageInfoSpecificQCow2Encryption, 1); + switch (encrypt_info->format) { + case Q_CRYPTO_BLOCK_FORMAT_QCOW: + qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; + break; + case Q_CRYPTO_BLOCK_FORMAT_LUKS: + qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; + qencrypt->u.luks = encrypt_info->u.luks; + break; + default: + abort(); + } + /* Since we did shallow copy above, erase any pointers + * in the original info */ + memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); + qapi_free_QCryptoBlockInfo(encrypt_info); + + spec_info->u.qcow2.data->has_encrypt = true; + spec_info->u.qcow2.data->encrypt = qencrypt; + } + + return spec_info; +} + +static int qcow2_has_zero_init(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + bool preallocated; + + if (qemu_in_coroutine()) { + qemu_co_mutex_lock(&s->lock); + } + /* + * Check preallocation status: Preallocated images have all L2 + * tables allocated, nonpreallocated images have none. It is + * therefore enough to check the first one. + */ + preallocated = s->l1_size > 0 && s->l1_table[0] != 0; + if (qemu_in_coroutine()) { + qemu_co_mutex_unlock(&s->lock); + } + + if (!preallocated) { + return 1; + } else if (bs->encrypted) { + return 0; + } else { + return bdrv_has_zero_init(s->data_file->bs); + } +} + +static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) +{ + BDRVQcow2State *s = bs->opaque; + + BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); + return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0, 0); +} + +static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) +{ + BDRVQcow2State *s = bs->opaque; + + BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); + return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0, 0); +} + +/* + * Downgrades an image's version. To achieve this, any incompatible features + * have to be removed. + */ +static int qcow2_downgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int current_version = s->qcow_version; + int ret; + int i; + + /* This is qcow2_downgrade(), not qcow2_upgrade() */ + assert(target_version < current_version); + + /* There are no other versions (now) that you can downgrade to */ + assert(target_version == 2); + + if (s->refcount_order != 4) { + error_setg(errp, "compat=0.10 requires refcount_bits=16"); + return -ENOTSUP; + } + + if (has_data_file(bs)) { + error_setg(errp, "Cannot downgrade an image with a data file"); + return -ENOTSUP; + } + + /* + * If any internal snapshot has a different size than the current + * image size, or VM state size that exceeds 32 bits, downgrading + * is unsafe. Even though we would still use v3-compliant output + * to preserve that data, other v2 programs might not realize + * those optional fields are important. + */ + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].vm_state_size > UINT32_MAX || + s->snapshots[i].disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { + error_setg(errp, "Internal snapshots prevent downgrade of image"); + return -ENOTSUP; + } + } + + /* clear incompatible features */ + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to make the image clean"); + return ret; + } + } + + /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in + * the first place; if that happens nonetheless, returning -ENOTSUP is the + * best thing to do anyway */ + + if (s->incompatible_features) { + error_setg(errp, "Cannot downgrade an image with incompatible features " + "%#" PRIx64 " set", s->incompatible_features); + return -ENOTSUP; + } + + /* since we can ignore compatible features, we can set them to 0 as well */ + s->compatible_features = 0; + /* if lazy refcounts have been used, they have already been fixed through + * clearing the dirty flag */ + + /* clearing autoclear features is trivial */ + s->autoclear_features = 0; + + ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); + return ret; + } + + s->qcow_version = target_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = current_version; + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + return 0; +} + +/* + * Upgrades an image's version. While newer versions encompass all + * features of older versions, some things may have to be presented + * differently. + */ +static int qcow2_upgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + bool need_snapshot_update; + int current_version = s->qcow_version; + int i; + int ret; + + /* This is qcow2_upgrade(), not qcow2_downgrade() */ + assert(target_version > current_version); + + /* There are no other versions (yet) that you can upgrade to */ + assert(target_version == 3); + + status_cb(bs, 0, 2, cb_opaque); + + /* + * In v2, snapshots do not need to have extra data. v3 requires + * the 64-bit VM state size and the virtual disk size to be + * present. + * qcow2_write_snapshots() will always write the list in the + * v3-compliant format. + */ + need_snapshot_update = false; + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].extra_data_size < + sizeof_field(QCowSnapshotExtraData, vm_state_size_large) + + sizeof_field(QCowSnapshotExtraData, disk_size)) + { + need_snapshot_update = true; + break; + } + } + if (need_snapshot_update) { + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update the snapshot table"); + return ret; + } + } + status_cb(bs, 1, 2, cb_opaque); + + s->qcow_version = target_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = current_version; + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + status_cb(bs, 2, 2, cb_opaque); + + return 0; +} + +typedef enum Qcow2AmendOperation { + /* This is the value Qcow2AmendHelperCBInfo::last_operation will be + * statically initialized to so that the helper CB can discern the first + * invocation from an operation change */ + QCOW2_NO_OPERATION = 0, + + QCOW2_UPGRADING, + QCOW2_UPDATING_ENCRYPTION, + QCOW2_CHANGING_REFCOUNT_ORDER, + QCOW2_DOWNGRADING, +} Qcow2AmendOperation; + +typedef struct Qcow2AmendHelperCBInfo { + /* The code coordinating the amend operations should only modify + * these four fields; the rest will be managed by the CB */ + BlockDriverAmendStatusCB *original_status_cb; + void *original_cb_opaque; + + Qcow2AmendOperation current_operation; + + /* Total number of operations to perform (only set once) */ + int total_operations; + + /* The following fields are managed by the CB */ + + /* Number of operations completed */ + int operations_completed; + + /* Cumulative offset of all completed operations */ + int64_t offset_completed; + + Qcow2AmendOperation last_operation; + int64_t last_work_size; +} Qcow2AmendHelperCBInfo; + +static void qcow2_amend_helper_cb(BlockDriverState *bs, + int64_t operation_offset, + int64_t operation_work_size, void *opaque) +{ + Qcow2AmendHelperCBInfo *info = opaque; + int64_t current_work_size; + int64_t projected_work_size; + + if (info->current_operation != info->last_operation) { + if (info->last_operation != QCOW2_NO_OPERATION) { + info->offset_completed += info->last_work_size; + info->operations_completed++; + } + + info->last_operation = info->current_operation; + } + + assert(info->total_operations > 0); + assert(info->operations_completed < info->total_operations); + + info->last_work_size = operation_work_size; + + current_work_size = info->offset_completed + operation_work_size; + + /* current_work_size is the total work size for (operations_completed + 1) + * operations (which includes this one), so multiply it by the number of + * operations not covered and divide it by the number of operations + * covered to get a projection for the operations not covered */ + projected_work_size = current_work_size * (info->total_operations - + info->operations_completed - 1) + / (info->operations_completed + 1); + + info->original_status_cb(bs, info->offset_completed + operation_offset, + current_work_size + projected_work_size, + info->original_cb_opaque); +} + +static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, + bool force, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int old_version = s->qcow_version, new_version = old_version; + uint64_t new_size = 0; + const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL; + bool lazy_refcounts = s->use_lazy_refcounts; + bool data_file_raw = data_file_is_raw(bs); + const char *compat = NULL; + int refcount_bits = s->refcount_bits; + int ret; + QemuOptDesc *desc = opts->list->desc; + Qcow2AmendHelperCBInfo helper_cb_info; + bool encryption_update = false; + + while (desc && desc->name) { + if (!qemu_opt_find(opts, desc->name)) { + /* only change explicitly defined options */ + desc++; + continue; + } + + if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { + compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); + if (!compat) { + /* preserve default */ + } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) { + new_version = 2; + } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) { + new_version = 3; + } else { + error_setg(errp, "Unknown compatibility level %s", compat); + return -EINVAL; + } + } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { + new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { + backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { + backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); + } else if (g_str_has_prefix(desc->name, "encrypt.")) { + if (!s->crypto) { + error_setg(errp, + "Can't amend encryption options - encryption not present"); + return -EINVAL; + } + if (s->crypt_method_header != QCOW_CRYPT_LUKS) { + error_setg(errp, + "Only LUKS encryption options can be amended"); + return -ENOTSUP; + } + encryption_update = true; + } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { + lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, + lazy_refcounts); + } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { + refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + + if (refcount_bits <= 0 || refcount_bits > 64 || + !is_power_of_2(refcount_bits)) + { + error_setg(errp, "Refcount width must be a power of two and " + "may not exceed 64 bits"); + return -EINVAL; + } + } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) { + data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE); + if (data_file && !has_data_file(bs)) { + error_setg(errp, "data-file can only be set for images that " + "use an external data file"); + return -EINVAL; + } + } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) { + data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW, + data_file_raw); + if (data_file_raw && !data_file_is_raw(bs)) { + error_setg(errp, "data-file-raw cannot be set on existing " + "images"); + return -EINVAL; + } + } else { + /* if this point is reached, this probably means a new option was + * added without having it covered here */ + abort(); + } + + desc++; + } + + helper_cb_info = (Qcow2AmendHelperCBInfo){ + .original_status_cb = status_cb, + .original_cb_opaque = cb_opaque, + .total_operations = (new_version != old_version) + + (s->refcount_bits != refcount_bits) + + (encryption_update == true) + }; + + /* Upgrade first (some features may require compat=1.1) */ + if (new_version > old_version) { + helper_cb_info.current_operation = QCOW2_UPGRADING; + ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info, errp); + if (ret < 0) { + return ret; + } + } + + if (encryption_update) { + QDict *amend_opts_dict; + QCryptoBlockAmendOptions *amend_opts; + + helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION; + amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp); + if (!amend_opts_dict) { + return -EINVAL; + } + amend_opts = block_crypto_amend_opts_init(amend_opts_dict, errp); + qobject_unref(amend_opts_dict); + if (!amend_opts) { + return -EINVAL; + } + ret = qcrypto_block_amend_options(s->crypto, + qcow2_crypto_hdr_read_func, + qcow2_crypto_hdr_write_func, + bs, + amend_opts, + force, + errp); + qapi_free_QCryptoBlockAmendOptions(amend_opts); + if (ret < 0) { + return ret; + } + } + + if (s->refcount_bits != refcount_bits) { + int refcount_order = ctz32(refcount_bits); + + if (new_version < 3 && refcount_bits != 16) { + error_setg(errp, "Refcount widths other than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return -EINVAL; + } + + helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; + ret = qcow2_change_refcount_order(bs, refcount_order, + &qcow2_amend_helper_cb, + &helper_cb_info, errp); + if (ret < 0) { + return ret; + } + } + + /* data-file-raw blocks backing files, so clear it first if requested */ + if (data_file_raw) { + s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW; + } else { + s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW; + } + + if (data_file) { + g_free(s->image_data_file); + s->image_data_file = *data_file ? g_strdup(data_file) : NULL; + } + + ret = qcow2_update_header(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + + if (backing_file || backing_format) { + if (g_strcmp0(backing_file, s->image_backing_file) || + g_strcmp0(backing_format, s->image_backing_format)) { + warn_report("Deprecated use of amend to alter the backing file; " + "use qemu-img rebase instead"); + } + ret = qcow2_change_backing_file(bs, + backing_file ?: s->image_backing_file, + backing_format ?: s->image_backing_format); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to change the backing file"); + return ret; + } + } + + if (s->use_lazy_refcounts != lazy_refcounts) { + if (lazy_refcounts) { + if (new_version < 3) { + error_setg(errp, "Lazy refcounts only supported with " + "compatibility level 1.1 and above (use compat=1.1 " + "or greater)"); + return -EINVAL; + } + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + s->use_lazy_refcounts = true; + } else { + /* make image clean first */ + ret = qcow2_mark_clean(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to make the image clean"); + return ret; + } + /* now disallow lazy refcounts */ + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + s->use_lazy_refcounts = false; + } + } + + if (new_size) { + BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + return -EPERM; + } + + /* + * Amending image options should ensure that the image has + * exactly the given new values, so pass exact=true here. + */ + ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); + blk_unref(blk); + if (ret < 0) { + return ret; + } + } + + /* Downgrade last (so unsupported features can be removed before) */ + if (new_version < old_version) { + helper_cb_info.current_operation = QCOW2_DOWNGRADING; + ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +static int coroutine_fn qcow2_co_amend(BlockDriverState *bs, + BlockdevAmendOptions *opts, + bool force, + Error **errp) +{ + BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2; + BDRVQcow2State *s = bs->opaque; + int ret = 0; + + if (qopts->has_encrypt) { + if (!s->crypto) { + error_setg(errp, "image is not encrypted, can't amend"); + return -EOPNOTSUPP; + } + + if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) { + error_setg(errp, + "Amend can't be used to change the qcow2 encryption format"); + return -EOPNOTSUPP; + } + + if (s->crypt_method_header != QCOW_CRYPT_LUKS) { + error_setg(errp, + "Only LUKS encryption options can be amended for qcow2 with blockdev-amend"); + return -EOPNOTSUPP; + } + + ret = qcrypto_block_amend_options(s->crypto, + qcow2_crypto_hdr_read_func, + qcow2_crypto_hdr_write_func, + bs, + qopts->encrypt, + force, + errp); + } + return ret; +} + +/* + * If offset or size are negative, respectively, they will not be included in + * the BLOCK_IMAGE_CORRUPTED event emitted. + * fatal will be ignored for read-only BDS; corruptions found there will always + * be considered non-fatal. + */ +void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, + int64_t size, const char *message_format, ...) +{ + BDRVQcow2State *s = bs->opaque; + const char *node_name; + char *message; + va_list ap; + + fatal = fatal && bdrv_is_writable(bs); + + if (s->signaled_corruption && + (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) + { + return; + } + + va_start(ap, message_format); + message = g_strdup_vprintf(message_format, ap); + va_end(ap); + + if (fatal) { + fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " + "corruption events will be suppressed\n", message); + } else { + fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " + "corruption events will be suppressed\n", message); + } + + node_name = bdrv_get_node_name(bs); + qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), + *node_name != '\0', node_name, + message, offset >= 0, offset, + size >= 0, size, + fatal); + g_free(message); + + if (fatal) { + qcow2_mark_corrupt(bs); + bs->drv = NULL; /* make BDS unusable */ + } + + s->signaled_corruption = true; +} + +#define QCOW_COMMON_OPTIONS \ + { \ + .name = BLOCK_OPT_SIZE, \ + .type = QEMU_OPT_SIZE, \ + .help = "Virtual disk size" \ + }, \ + { \ + .name = BLOCK_OPT_COMPAT_LEVEL, \ + .type = QEMU_OPT_STRING, \ + .help = "Compatibility level (v2 [0.10] or v3 [1.1])" \ + }, \ + { \ + .name = BLOCK_OPT_BACKING_FILE, \ + .type = QEMU_OPT_STRING, \ + .help = "File name of a base image" \ + }, \ + { \ + .name = BLOCK_OPT_BACKING_FMT, \ + .type = QEMU_OPT_STRING, \ + .help = "Image format of the base image" \ + }, \ + { \ + .name = BLOCK_OPT_DATA_FILE, \ + .type = QEMU_OPT_STRING, \ + .help = "File name of an external data file" \ + }, \ + { \ + .name = BLOCK_OPT_DATA_FILE_RAW, \ + .type = QEMU_OPT_BOOL, \ + .help = "The external data file must stay valid " \ + "as a raw image" \ + }, \ + { \ + .name = BLOCK_OPT_LAZY_REFCOUNTS, \ + .type = QEMU_OPT_BOOL, \ + .help = "Postpone refcount updates", \ + .def_value_str = "off" \ + }, \ + { \ + .name = BLOCK_OPT_REFCOUNT_BITS, \ + .type = QEMU_OPT_NUMBER, \ + .help = "Width of a reference count entry in bits", \ + .def_value_str = "16" \ + } + +static QemuOptsList qcow2_create_opts = { + .name = "qcow2-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), + .desc = { + { \ + .name = BLOCK_OPT_ENCRYPT, \ + .type = QEMU_OPT_BOOL, \ + .help = "Encrypt the image with format 'aes'. (Deprecated " \ + "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", \ + }, \ + { \ + .name = BLOCK_OPT_ENCRYPT_FORMAT, \ + .type = QEMU_OPT_STRING, \ + .help = "Encrypt the image, format choices: 'aes', 'luks'", \ + }, \ + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", \ + "ID of secret providing qcow AES key or LUKS passphrase"), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), \ + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), \ + { \ + .name = BLOCK_OPT_CLUSTER_SIZE, \ + .type = QEMU_OPT_SIZE, \ + .help = "qcow2 cluster size", \ + .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) \ + }, \ + { \ + .name = BLOCK_OPT_EXTL2, \ + .type = QEMU_OPT_BOOL, \ + .help = "Extended L2 tables", \ + .def_value_str = "off" \ + }, \ + { \ + .name = BLOCK_OPT_PREALLOC, \ + .type = QEMU_OPT_STRING, \ + .help = "Preallocation mode (allowed values: off, " \ + "metadata, falloc, full)" \ + }, \ + { \ + .name = BLOCK_OPT_COMPRESSION_TYPE, \ + .type = QEMU_OPT_STRING, \ + .help = "Compression method used for image cluster " \ + "compression", \ + .def_value_str = "zlib" \ + }, + QCOW_COMMON_OPTIONS, + { /* end of list */ } + } +}; + +static QemuOptsList qcow2_amend_opts = { + .name = "qcow2-amend-opts", + .head = QTAILQ_HEAD_INITIALIZER(qcow2_amend_opts.head), + .desc = { + BLOCK_CRYPTO_OPT_DEF_LUKS_STATE("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), + QCOW_COMMON_OPTIONS, + { /* end of list */ } + } +}; + +static const char *const qcow2_strong_runtime_opts[] = { + "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, + + NULL +}; + +BlockDriver bdrv_qcow2 = { + .format_name = "qcow2", + .instance_size = sizeof(BDRVQcow2State), + .bdrv_probe = qcow2_probe, + .bdrv_open = qcow2_open, + .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, + .bdrv_reopen_commit_post = qcow2_reopen_commit_post, + .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_create_opts = qcow2_co_create_opts, + .bdrv_co_create = qcow2_co_create, + .bdrv_has_zero_init = qcow2_has_zero_init, + .bdrv_co_block_status = qcow2_co_block_status, + + .bdrv_co_preadv_part = qcow2_co_preadv_part, + .bdrv_co_pwritev_part = qcow2_co_pwritev_part, + .bdrv_co_flush_to_os = qcow2_co_flush_to_os, + + .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, + .bdrv_co_pdiscard = qcow2_co_pdiscard, + .bdrv_co_copy_range_from = qcow2_co_copy_range_from, + .bdrv_co_copy_range_to = qcow2_co_copy_range_to, + .bdrv_co_truncate = qcow2_co_truncate, + .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part, + .bdrv_make_empty = qcow2_make_empty, + + .bdrv_snapshot_create = qcow2_snapshot_create, + .bdrv_snapshot_goto = qcow2_snapshot_goto, + .bdrv_snapshot_delete = qcow2_snapshot_delete, + .bdrv_snapshot_list = qcow2_snapshot_list, + .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, + .bdrv_measure = qcow2_measure, + .bdrv_get_info = qcow2_get_info, + .bdrv_get_specific_info = qcow2_get_specific_info, + + .bdrv_save_vmstate = qcow2_save_vmstate, + .bdrv_load_vmstate = qcow2_load_vmstate, + + .is_format = true, + .supports_backing = true, + .bdrv_change_backing_file = qcow2_change_backing_file, + + .bdrv_refresh_limits = qcow2_refresh_limits, + .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, + .bdrv_inactivate = qcow2_inactivate, + + .create_opts = &qcow2_create_opts, + .amend_opts = &qcow2_amend_opts, + .strong_runtime_opts = qcow2_strong_runtime_opts, + .mutable_opts = mutable_opts, + .bdrv_co_check = qcow2_co_check, + .bdrv_amend_options = qcow2_amend_options, + .bdrv_co_amend = qcow2_co_amend, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, + + .bdrv_supports_persistent_dirty_bitmap = + qcow2_supports_persistent_dirty_bitmap, + .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap, + .bdrv_co_remove_persistent_dirty_bitmap = + qcow2_co_remove_persistent_dirty_bitmap, +}; + +static void bdrv_qcow2_init(void) +{ + bdrv_register(&bdrv_qcow2); +} + +block_init(bdrv_qcow2_init); diff --git a/block/qcow2.h b/block/qcow2.h new file mode 100644 index 000000000..0678073b7 --- /dev/null +++ b/block/qcow2.h @@ -0,0 +1,1013 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_QCOW2_H +#define BLOCK_QCOW2_H + +#include "crypto/block.h" +#include "qemu/coroutine.h" +#include "qemu/units.h" +#include "block/block_int.h" + +//#define DEBUG_ALLOC +//#define DEBUG_ALLOC2 +//#define DEBUG_EXT + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 +#define QCOW_CRYPT_LUKS 2 + +#define QCOW_MAX_CRYPT_CLUSTERS 32 +#define QCOW_MAX_SNAPSHOTS 65536 + +/* Field widths in qcow2 mean normal cluster offsets cannot reach + * 64PB; depending on cluster size, compressed clusters can have a + * smaller limit (64PB for up to 16k clusters, then ramps down to + * 512TB for 2M clusters). */ +#define QCOW_MAX_CLUSTER_OFFSET ((1ULL << 56) - 1) + +/* 8 MB refcount table is enough for 2 PB images at 64k cluster size + * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ +#define QCOW_MAX_REFTABLE_SIZE (8 * MiB) + +/* 32 MB L1 table is enough for 2 PB images at 64k cluster size + * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ +#define QCOW_MAX_L1_SIZE (32 * MiB) + +/* Allow for an average of 1k per snapshot table entry, should be plenty of + * space for snapshot names and IDs */ +#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS) + +/* Maximum amount of extra data per snapshot table entry to accept */ +#define QCOW_MAX_SNAPSHOT_EXTRA_DATA 1024 + +/* Bitmap header extension constraints */ +#define QCOW2_MAX_BITMAPS 65535 +#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS) + +/* Maximum of parallel sub-request per guest request */ +#define QCOW2_MAX_WORKERS 8 + +/* indicate that the refcount of the referenced cluster is exactly one. */ +#define QCOW_OFLAG_COPIED (1ULL << 63) +/* indicate that the cluster is compressed (they never have the copied flag) */ +#define QCOW_OFLAG_COMPRESSED (1ULL << 62) +/* The cluster reads as all zeros */ +#define QCOW_OFLAG_ZERO (1ULL << 0) + +#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32 + +/* The subcluster X [0..31] is allocated */ +#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X)) +/* The subcluster X [0..31] reads as zeroes */ +#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */ +#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X)) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */ +#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32) +/* L2 entry bitmap with all allocation bits set */ +#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32)) +/* L2 entry bitmap with all "read as zeroes" bits set */ +#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32)) + +/* Size of normal and extended L2 entries */ +#define L2E_SIZE_NORMAL (sizeof(uint64_t)) +#define L2E_SIZE_EXTENDED (sizeof(uint64_t) * 2) + +/* Size of L1 table entries */ +#define L1E_SIZE (sizeof(uint64_t)) + +/* Size of reftable entries */ +#define REFTABLE_ENTRY_SIZE (sizeof(uint64_t)) + +#define MIN_CLUSTER_BITS 9 +#define MAX_CLUSTER_BITS 21 + +/* Defined in the qcow2 spec (compressed cluster descriptor) */ +#define QCOW2_COMPRESSED_SECTOR_SIZE 512U +#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) + +/* Must be at least 2 to cover COW */ +#define MIN_L2_CACHE_SIZE 2 /* cache entries */ + +/* Must be at least 4 to cover all cases of refcount table growth */ +#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ + +#ifdef CONFIG_LINUX +#define DEFAULT_L2_CACHE_MAX_SIZE (32 * MiB) +#define DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */ +#else +#define DEFAULT_L2_CACHE_MAX_SIZE (8 * MiB) +/* Cache clean interval is currently available only on Linux, so must be 0 */ +#define DEFAULT_CACHE_CLEAN_INTERVAL 0 +#endif + +#define DEFAULT_CLUSTER_SIZE 65536 + +#define QCOW2_OPT_DATA_FILE "data-file" +#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" +#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" +#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" +#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" +#define QCOW2_OPT_OVERLAP "overlap-check" +#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template" +#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header" +#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1" +#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2" +#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table" +#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block" +#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table" +#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1" +#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2" +#define QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY "overlap-check.bitmap-directory" +#define QCOW2_OPT_CACHE_SIZE "cache-size" +#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" +#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size" +#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t cluster_bits; + uint64_t size; /* in bytes */ + uint32_t crypt_method; + uint32_t l1_size; /* XXX: save number of clusters instead ? */ + uint64_t l1_table_offset; + uint64_t refcount_table_offset; + uint32_t refcount_table_clusters; + uint32_t nb_snapshots; + uint64_t snapshots_offset; + + /* The following fields are only valid for version >= 3 */ + uint64_t incompatible_features; + uint64_t compatible_features; + uint64_t autoclear_features; + + uint32_t refcount_order; + uint32_t header_length; + + /* Additional fields */ + uint8_t compression_type; + + /* header must be a multiple of 8 */ + uint8_t padding[7]; +} QEMU_PACKED QCowHeader; + +QEMU_BUILD_BUG_ON(!QEMU_IS_ALIGNED(sizeof(QCowHeader), 8)); + +typedef struct QEMU_PACKED QCowSnapshotHeader { + /* header is 8 byte aligned */ + uint64_t l1_table_offset; + + uint32_t l1_size; + uint16_t id_str_size; + uint16_t name_size; + + uint32_t date_sec; + uint32_t date_nsec; + + uint64_t vm_clock_nsec; + + uint32_t vm_state_size; + uint32_t extra_data_size; /* for extension */ + /* extra data follows */ + /* id_str follows */ + /* name follows */ +} QCowSnapshotHeader; + +typedef struct QEMU_PACKED QCowSnapshotExtraData { + uint64_t vm_state_size_large; + uint64_t disk_size; + uint64_t icount; +} QCowSnapshotExtraData; + + +typedef struct QCowSnapshot { + uint64_t l1_table_offset; + uint32_t l1_size; + char *id_str; + char *name; + uint64_t disk_size; + uint64_t vm_state_size; + uint32_t date_sec; + uint32_t date_nsec; + uint64_t vm_clock_nsec; + /* icount value for the moment when snapshot was taken */ + uint64_t icount; + /* Size of all extra data, including QCowSnapshotExtraData if available */ + uint32_t extra_data_size; + /* Data beyond QCowSnapshotExtraData, if any */ + void *unknown_extra_data; +} QCowSnapshot; + +struct Qcow2Cache; +typedef struct Qcow2Cache Qcow2Cache; + +typedef struct Qcow2CryptoHeaderExtension { + uint64_t offset; + uint64_t length; +} QEMU_PACKED Qcow2CryptoHeaderExtension; + +typedef struct Qcow2UnknownHeaderExtension { + uint32_t magic; + uint32_t len; + QLIST_ENTRY(Qcow2UnknownHeaderExtension) next; + uint8_t data[]; +} Qcow2UnknownHeaderExtension; + +enum { + QCOW2_FEAT_TYPE_INCOMPATIBLE = 0, + QCOW2_FEAT_TYPE_COMPATIBLE = 1, + QCOW2_FEAT_TYPE_AUTOCLEAR = 2, +}; + +/* Incompatible feature bits */ +enum { + QCOW2_INCOMPAT_DIRTY_BITNR = 0, + QCOW2_INCOMPAT_CORRUPT_BITNR = 1, + QCOW2_INCOMPAT_DATA_FILE_BITNR = 2, + QCOW2_INCOMPAT_COMPRESSION_BITNR = 3, + QCOW2_INCOMPAT_EXTL2_BITNR = 4, + QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR, + QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR, + QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR, + QCOW2_INCOMPAT_COMPRESSION = 1 << QCOW2_INCOMPAT_COMPRESSION_BITNR, + QCOW2_INCOMPAT_EXTL2 = 1 << QCOW2_INCOMPAT_EXTL2_BITNR, + + QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY + | QCOW2_INCOMPAT_CORRUPT + | QCOW2_INCOMPAT_DATA_FILE + | QCOW2_INCOMPAT_COMPRESSION + | QCOW2_INCOMPAT_EXTL2, +}; + +/* Compatible feature bits */ +enum { + QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0, + QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, + + QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS, +}; + +/* Autoclear feature bits */ +enum { + QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0, + QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR = 1, + QCOW2_AUTOCLEAR_BITMAPS = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR, + QCOW2_AUTOCLEAR_DATA_FILE_RAW = 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR, + + QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS + | QCOW2_AUTOCLEAR_DATA_FILE_RAW, +}; + +enum qcow2_discard_type { + QCOW2_DISCARD_NEVER = 0, + QCOW2_DISCARD_ALWAYS, + QCOW2_DISCARD_REQUEST, + QCOW2_DISCARD_SNAPSHOT, + QCOW2_DISCARD_OTHER, + QCOW2_DISCARD_MAX +}; + +typedef struct Qcow2Feature { + uint8_t type; + uint8_t bit; + char name[46]; +} QEMU_PACKED Qcow2Feature; + +typedef struct Qcow2DiscardRegion { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + QTAILQ_ENTRY(Qcow2DiscardRegion) next; +} Qcow2DiscardRegion; + +typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array, + uint64_t index); +typedef void Qcow2SetRefcountFunc(void *refcount_array, + uint64_t index, uint64_t value); + +typedef struct Qcow2BitmapHeaderExt { + uint32_t nb_bitmaps; + uint32_t reserved32; + uint64_t bitmap_directory_size; + uint64_t bitmap_directory_offset; +} QEMU_PACKED Qcow2BitmapHeaderExt; + +#define QCOW2_MAX_THREADS 4 + +typedef struct BDRVQcow2State { + int cluster_bits; + int cluster_size; + int l2_slice_size; + int subcluster_bits; + int subcluster_size; + int subclusters_per_cluster; + int l2_bits; + int l2_size; + int l1_size; + int l1_vm_state_index; + int refcount_block_bits; + int refcount_block_size; + int csize_shift; + int csize_mask; + uint64_t cluster_offset_mask; + uint64_t l1_table_offset; + uint64_t *l1_table; + + Qcow2Cache *l2_table_cache; + Qcow2Cache *refcount_block_cache; + QEMUTimer *cache_clean_timer; + unsigned cache_clean_interval; + + QLIST_HEAD(, QCowL2Meta) cluster_allocs; + + uint64_t *refcount_table; + uint64_t refcount_table_offset; + uint32_t refcount_table_size; + uint32_t max_refcount_table_index; /* Last used entry in refcount_table */ + uint64_t free_cluster_index; + uint64_t free_byte_offset; + + CoMutex lock; + + Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */ + QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ + QCryptoBlock *crypto; /* Disk encryption format driver */ + bool crypt_physical_offset; /* Whether to use virtual or physical offset + for encryption initialization vector tweak */ + uint32_t crypt_method_header; + uint64_t snapshots_offset; + int snapshots_size; + unsigned int nb_snapshots; + QCowSnapshot *snapshots; + + uint32_t nb_bitmaps; + uint64_t bitmap_directory_size; + uint64_t bitmap_directory_offset; + + int flags; + int qcow_version; + bool use_lazy_refcounts; + int refcount_order; + int refcount_bits; + uint64_t refcount_max; + + Qcow2GetRefcountFunc *get_refcount; + Qcow2SetRefcountFunc *set_refcount; + + bool discard_passthrough[QCOW2_DISCARD_MAX]; + + int overlap_check; /* bitmask of Qcow2MetadataOverlap values */ + bool signaled_corruption; + + uint64_t incompatible_features; + uint64_t compatible_features; + uint64_t autoclear_features; + + size_t unknown_header_fields_size; + void *unknown_header_fields; + QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; + QTAILQ_HEAD (, Qcow2DiscardRegion) discards; + bool cache_discards; + + /* Backing file path and format as stored in the image (this is not the + * effective path/format, which may be the result of a runtime option + * override) */ + char *image_backing_file; + char *image_backing_format; + char *image_data_file; + + CoQueue thread_task_queue; + int nb_threads; + + BdrvChild *data_file; + + bool metadata_preallocation_checked; + bool metadata_preallocation; + /* + * Compression type used for the image. Default: 0 - ZLIB + * The image compression type is set on image creation. + * For now, the only way to change the compression type + * is to convert the image with the desired compression type set. + */ + Qcow2CompressionType compression_type; +} BDRVQcow2State; + +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + unsigned offset; + + /** Number of bytes to copy */ + unsigned nb_bytes; +} Qcow2COWRegion; + +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that need to have their L2 table entries updated (because they are + * newly allocated or need changes in their L2 bitmaps) + */ +typedef struct QCowL2Meta +{ + /** Guest offset of the first updated cluster */ + uint64_t offset; + + /** Host offset of the first updated cluster */ + uint64_t alloc_offset; + + /** Number of updated clusters */ + int nb_clusters; + + /** Do not free the old clusters */ + bool keep_old_clusters; + + /** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ + CoQueue dependent_requests; + + /** + * The COW Region immediately before the area the guest actually + * writes to. This (part of the) write request starts at + * cow_start.offset + cow_start.nb_bytes. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region immediately after the area the guest actually + * writes to. This (part of the) write request ends at cow_end.offset + * (which must always be set even when cow_end.nb_bytes is 0). + */ + Qcow2COWRegion cow_end; + + /* + * Indicates that COW regions are already handled and do not require + * any more processing. + */ + bool skip_cow; + + /** + * Indicates that this is not a normal write request but a preallocation. + * If the image has extended L2 entries this means that no new individual + * subclusters will be marked as allocated in the L2 bitmap (but any + * existing contents of that bitmap will be kept). + */ + bool prealloc; + + /** + * The I/O vector with the data from the actual guest write request. + * If non-NULL, this is meant to be merged together with the data + * from @cow_start and @cow_end into one single write operation. + */ + QEMUIOVector *data_qiov; + size_t data_qiov_offset; + + /** Pointer to next L2Meta of the same write request */ + struct QCowL2Meta *next; + + QLIST_ENTRY(QCowL2Meta) next_in_flight; +} QCowL2Meta; + +/* + * In images with standard L2 entries all clusters are treated as if + * they had one subcluster so QCow2ClusterType and QCow2SubclusterType + * can be mapped to each other and have the exact same meaning + * (QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC cannot happen in these images). + * + * In images with extended L2 entries QCow2ClusterType refers to the + * complete cluster and QCow2SubclusterType to each of the individual + * subclusters, so there are several possible combinations: + * + * |--------------+---------------------------| + * | Cluster type | Possible subcluster types | + * |--------------+---------------------------| + * | UNALLOCATED | UNALLOCATED_PLAIN | + * | | ZERO_PLAIN | + * |--------------+---------------------------| + * | NORMAL | UNALLOCATED_ALLOC | + * | | ZERO_ALLOC | + * | | NORMAL | + * |--------------+---------------------------| + * | COMPRESSED | COMPRESSED | + * |--------------+---------------------------| + * + * QCOW2_SUBCLUSTER_INVALID means that the L2 entry is incorrect and + * the image should be marked corrupt. + */ + +typedef enum QCow2ClusterType { + QCOW2_CLUSTER_UNALLOCATED, + QCOW2_CLUSTER_ZERO_PLAIN, + QCOW2_CLUSTER_ZERO_ALLOC, + QCOW2_CLUSTER_NORMAL, + QCOW2_CLUSTER_COMPRESSED, +} QCow2ClusterType; + +typedef enum QCow2SubclusterType { + QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN, + QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC, + QCOW2_SUBCLUSTER_ZERO_PLAIN, + QCOW2_SUBCLUSTER_ZERO_ALLOC, + QCOW2_SUBCLUSTER_NORMAL, + QCOW2_SUBCLUSTER_COMPRESSED, + QCOW2_SUBCLUSTER_INVALID, +} QCow2SubclusterType; + +typedef enum QCow2MetadataOverlap { + QCOW2_OL_MAIN_HEADER_BITNR = 0, + QCOW2_OL_ACTIVE_L1_BITNR = 1, + QCOW2_OL_ACTIVE_L2_BITNR = 2, + QCOW2_OL_REFCOUNT_TABLE_BITNR = 3, + QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4, + QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5, + QCOW2_OL_INACTIVE_L1_BITNR = 6, + QCOW2_OL_INACTIVE_L2_BITNR = 7, + QCOW2_OL_BITMAP_DIRECTORY_BITNR = 8, + + QCOW2_OL_MAX_BITNR = 9, + + QCOW2_OL_NONE = 0, + QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR), + QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR), + QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR), + QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR), + QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR), + QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR), + QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR), + /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv + * reads. */ + QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR), + QCOW2_OL_BITMAP_DIRECTORY = (1 << QCOW2_OL_BITMAP_DIRECTORY_BITNR), +} QCow2MetadataOverlap; + +/* Perform all overlap checks which can be done in constant time */ +#define QCOW2_OL_CONSTANT \ + (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \ + QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_BITMAP_DIRECTORY) + +/* Perform all overlap checks which don't require disk access */ +#define QCOW2_OL_CACHED \ + (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \ + QCOW2_OL_INACTIVE_L1) + +/* Perform all overlap checks */ +#define QCOW2_OL_ALL \ + (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2) + +#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL +#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL + +#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL + +#define INV_OFFSET (-1ULL) + +static inline bool has_subclusters(BDRVQcow2State *s) +{ + return s->incompatible_features & QCOW2_INCOMPAT_EXTL2; +} + +static inline size_t l2_entry_size(BDRVQcow2State *s) +{ + return has_subclusters(s) ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; +} + +static inline uint64_t get_l2_entry(BDRVQcow2State *s, uint64_t *l2_slice, + int idx) +{ + idx *= l2_entry_size(s) / sizeof(uint64_t); + return be64_to_cpu(l2_slice[idx]); +} + +static inline uint64_t get_l2_bitmap(BDRVQcow2State *s, uint64_t *l2_slice, + int idx) +{ + if (has_subclusters(s)) { + idx *= l2_entry_size(s) / sizeof(uint64_t); + return be64_to_cpu(l2_slice[idx + 1]); + } else { + return 0; /* For convenience only; this value has no meaning. */ + } +} + +static inline void set_l2_entry(BDRVQcow2State *s, uint64_t *l2_slice, + int idx, uint64_t entry) +{ + idx *= l2_entry_size(s) / sizeof(uint64_t); + l2_slice[idx] = cpu_to_be64(entry); +} + +static inline void set_l2_bitmap(BDRVQcow2State *s, uint64_t *l2_slice, + int idx, uint64_t bitmap) +{ + assert(has_subclusters(s)); + idx *= l2_entry_size(s) / sizeof(uint64_t); + l2_slice[idx + 1] = cpu_to_be64(bitmap); +} + +static inline bool has_data_file(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + return (s->data_file != bs->file); +} + +static inline bool data_file_is_raw(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + return !!(s->autoclear_features & QCOW2_AUTOCLEAR_DATA_FILE_RAW); +} + +static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset) +{ + return offset & ~(s->cluster_size - 1); +} + +static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset) +{ + return offset & (s->cluster_size - 1); +} + +static inline int64_t offset_into_subcluster(BDRVQcow2State *s, int64_t offset) +{ + return offset & (s->subcluster_size - 1); +} + +static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size) +{ + return (size + (s->cluster_size - 1)) >> s->cluster_bits; +} + +static inline uint64_t size_to_subclusters(BDRVQcow2State *s, uint64_t size) +{ + return (size + (s->subcluster_size - 1)) >> s->subcluster_bits; +} + +static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size) +{ + int shift = s->cluster_bits + s->l2_bits; + return (size + (1ULL << shift) - 1) >> shift; +} + +static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->l2_bits + s->cluster_bits); +} + +static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->cluster_bits) & (s->l2_size - 1); +} + +static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); +} + +static inline int offset_to_sc_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->subcluster_bits) & (s->subclusters_per_cluster - 1); +} + +static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) +{ + return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); +} + +static inline QCow2ClusterType qcow2_get_cluster_type(BlockDriverState *bs, + uint64_t l2_entry) +{ + BDRVQcow2State *s = bs->opaque; + + if (l2_entry & QCOW_OFLAG_COMPRESSED) { + return QCOW2_CLUSTER_COMPRESSED; + } else if ((l2_entry & QCOW_OFLAG_ZERO) && !has_subclusters(s)) { + if (l2_entry & L2E_OFFSET_MASK) { + return QCOW2_CLUSTER_ZERO_ALLOC; + } + return QCOW2_CLUSTER_ZERO_PLAIN; + } else if (!(l2_entry & L2E_OFFSET_MASK)) { + /* Offset 0 generally means unallocated, but it is ambiguous with + * external data files because 0 is a valid offset there. However, all + * clusters in external data files always have refcount 1, so we can + * rely on QCOW_OFLAG_COPIED to disambiguate. */ + if (has_data_file(bs) && (l2_entry & QCOW_OFLAG_COPIED)) { + return QCOW2_CLUSTER_NORMAL; + } else { + return QCOW2_CLUSTER_UNALLOCATED; + } + } else { + return QCOW2_CLUSTER_NORMAL; + } +} + +/* + * In an image without subsclusters @l2_bitmap is ignored and + * @sc_index must be 0. + * Return QCOW2_SUBCLUSTER_INVALID if an invalid l2 entry is detected + * (this checks the whole entry and bitmap, not only the bits related + * to subcluster @sc_index). + */ +static inline +QCow2SubclusterType qcow2_get_subcluster_type(BlockDriverState *bs, + uint64_t l2_entry, + uint64_t l2_bitmap, + unsigned sc_index) +{ + BDRVQcow2State *s = bs->opaque; + QCow2ClusterType type = qcow2_get_cluster_type(bs, l2_entry); + assert(sc_index < s->subclusters_per_cluster); + + if (has_subclusters(s)) { + switch (type) { + case QCOW2_CLUSTER_COMPRESSED: + return QCOW2_SUBCLUSTER_COMPRESSED; + case QCOW2_CLUSTER_NORMAL: + if ((l2_bitmap >> 32) & l2_bitmap) { + return QCOW2_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW2_SUBCLUSTER_ZERO_ALLOC; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ALLOC(sc_index)) { + return QCOW2_SUBCLUSTER_NORMAL; + } else { + return QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC; + } + case QCOW2_CLUSTER_UNALLOCATED: + if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) { + return QCOW2_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW2_SUBCLUSTER_ZERO_PLAIN; + } else { + return QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + } + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_CLUSTER_COMPRESSED: + return QCOW2_SUBCLUSTER_COMPRESSED; + case QCOW2_CLUSTER_ZERO_PLAIN: + return QCOW2_SUBCLUSTER_ZERO_PLAIN; + case QCOW2_CLUSTER_ZERO_ALLOC: + return QCOW2_SUBCLUSTER_ZERO_ALLOC; + case QCOW2_CLUSTER_NORMAL: + return QCOW2_SUBCLUSTER_NORMAL; + case QCOW2_CLUSTER_UNALLOCATED: + return QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + default: + g_assert_not_reached(); + } + } +} + +static inline bool qcow2_cluster_is_allocated(QCow2ClusterType type) +{ + return (type == QCOW2_CLUSTER_COMPRESSED || type == QCOW2_CLUSTER_NORMAL || + type == QCOW2_CLUSTER_ZERO_ALLOC); +} + +/* Check whether refcounts are eager or lazy */ +static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s) +{ + return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY); +} + +static inline uint64_t l2meta_cow_start(QCowL2Meta *m) +{ + return m->offset + m->cow_start.offset; +} + +static inline uint64_t l2meta_cow_end(QCowL2Meta *m) +{ + return m->offset + m->cow_end.offset + m->cow_end.nb_bytes; +} + +static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) +{ + return r1 > r2 ? r1 - r2 : r2 - r1; +} + +static inline +uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->refcount_block_bits + s->cluster_bits); +} + +/* qcow2.c functions */ +int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, + int refcount_order, bool generous_increase, + uint64_t *refblock_count); + +int qcow2_mark_dirty(BlockDriverState *bs); +int qcow2_mark_corrupt(BlockDriverState *bs); +int qcow2_mark_consistent(BlockDriverState *bs); +int qcow2_update_header(BlockDriverState *bs); + +void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, + int64_t size, const char *message_format, ...) + GCC_FMT_ATTR(5, 6); + +int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, + uint64_t entries, size_t entry_len, + int64_t max_size_bytes, const char *table_name, + Error **errp); + +/* qcow2-refcount.c functions */ +int qcow2_refcount_init(BlockDriverState *bs); +void qcow2_refcount_close(BlockDriverState *bs); + +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, + uint64_t *refcount); + +int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, + uint64_t addend, bool decrease, + enum qcow2_discard_type type); + +int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset, + uint64_t additional_clusters, bool exact_size, + int new_refblock_index, + uint64_t new_refblock_offset); + +int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); +int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int64_t nb_clusters); +int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); +void qcow2_free_clusters(BlockDriverState *bs, + int64_t offset, int64_t size, + enum qcow2_discard_type type); +void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, + enum qcow2_discard_type type); + +int qcow2_update_snapshot_refcount(BlockDriverState *bs, + int64_t l1_table_offset, int l1_size, int addend); + +int coroutine_fn qcow2_flush_caches(BlockDriverState *bs); +int coroutine_fn qcow2_write_caches(BlockDriverState *bs); +int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); + +void qcow2_process_discards(BlockDriverState *bs, int ret); + +int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, + int64_t size); +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, + int64_t size, bool data_file); +int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, + int64_t offset, int64_t size); + +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp); +int qcow2_shrink_reftable(BlockDriverState *bs); +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); +int qcow2_detect_metadata_preallocation(BlockDriverState *bs); + +/* qcow2-cluster.c functions */ +int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, + bool exact_size); +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); +int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); +int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, + uint8_t *buf, int nb_sectors, bool enc, Error **errp); + +int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCow2SubclusterType *subcluster_type); +int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCowL2Meta **m); +int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int compressed_size, + uint64_t *host_offset); + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard); +int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags); + +int qcow2_expand_zero_clusters(BlockDriverState *bs, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque); + +/* qcow2-snapshot.c functions */ +int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); +int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); +int qcow2_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); +int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab); +int qcow2_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + +void qcow2_free_snapshots(BlockDriverState *bs); +int qcow2_read_snapshots(BlockDriverState *bs, Error **errp); +int qcow2_write_snapshots(BlockDriverState *bs); + +int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); +int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); + +/* qcow2-cache.c functions */ +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size); +int qcow2_cache_destroy(Qcow2Cache *c); + +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table); +int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); +int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c); +int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, + Qcow2Cache *dependency); +void qcow2_cache_depends_on_flush(Qcow2Cache *c); + +void qcow2_cache_clean_unused(Qcow2Cache *c); +int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); + +int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table); +int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table); +void qcow2_cache_put(Qcow2Cache *c, void **table); +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset); +void qcow2_cache_discard(Qcow2Cache *c, void *table); + +/* qcow2-bitmap.c functions */ +int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size); +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp); +Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs, + Error **errp); +int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); +int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp); +void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, + bool release_stored, Error **errp); +int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); +bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); +int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); +bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); +uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, + uint32_t cluster_size); + +ssize_t coroutine_fn +qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size); +ssize_t coroutine_fn +qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size); +int coroutine_fn +qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, + uint64_t guest_offset, void *buf, size_t len); +int coroutine_fn +qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, + uint64_t guest_offset, void *buf, size_t len); + +#endif diff --git a/block/qed-check.c b/block/qed-check.c new file mode 100644 index 000000000..418033ee2 --- /dev/null +++ b/block/qed-check.c @@ -0,0 +1,251 @@ +/* + * QEMU Enhanced Disk Format Consistency Check + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qed.h" + +typedef struct { + BDRVQEDState *s; + BdrvCheckResult *result; + bool fix; /* whether to fix invalid offsets */ + + uint64_t nclusters; + uint32_t *used_clusters; /* referenced cluster bitmap */ + + QEDRequest request; +} QEDCheck; + +static bool qed_test_bit(uint32_t *bitmap, uint64_t n) { + return !!(bitmap[n / 32] & (1 << (n % 32))); +} + +static void qed_set_bit(uint32_t *bitmap, uint64_t n) { + bitmap[n / 32] |= 1 << (n % 32); +} + +/** + * Set bitmap bits for clusters + * + * @check: Check structure + * @offset: Starting offset in bytes + * @n: Number of clusters + */ +static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset, + unsigned int n) +{ + uint64_t cluster = qed_bytes_to_clusters(check->s, offset); + unsigned int corruptions = 0; + + while (n-- != 0) { + /* Clusters should only be referenced once */ + if (qed_test_bit(check->used_clusters, cluster)) { + corruptions++; + } + + qed_set_bit(check->used_clusters, cluster); + cluster++; + } + + check->result->corruptions += corruptions; + return corruptions == 0; +} + +/** + * Check an L2 table + * + * @ret: Number of invalid cluster offsets + */ +static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table) +{ + BDRVQEDState *s = check->s; + unsigned int i, num_invalid = 0; + uint64_t last_offset = 0; + + for (i = 0; i < s->table_nelems; i++) { + uint64_t offset = table->offsets[i]; + + if (qed_offset_is_unalloc_cluster(offset) || + qed_offset_is_zero_cluster(offset)) { + continue; + } + check->result->bfi.allocated_clusters++; + if (last_offset && (last_offset + s->header.cluster_size != offset)) { + check->result->bfi.fragmented_clusters++; + } + last_offset = offset; + + /* Detect invalid cluster offset */ + if (!qed_check_cluster_offset(s, offset)) { + if (check->fix) { + table->offsets[i] = 0; + check->result->corruptions_fixed++; + } else { + check->result->corruptions++; + } + + num_invalid++; + continue; + } + + qed_set_used_clusters(check, offset, 1); + } + + return num_invalid; +} + +/** + * Descend tables and check each cluster is referenced once only + */ +static int coroutine_fn qed_check_l1_table(QEDCheck *check, QEDTable *table) +{ + BDRVQEDState *s = check->s; + unsigned int i, num_invalid_l1 = 0; + int ret, last_error = 0; + + /* Mark L1 table clusters used */ + qed_set_used_clusters(check, s->header.l1_table_offset, + s->header.table_size); + + for (i = 0; i < s->table_nelems; i++) { + unsigned int num_invalid_l2; + uint64_t offset = table->offsets[i]; + + if (qed_offset_is_unalloc_cluster(offset)) { + continue; + } + + /* Detect invalid L2 offset */ + if (!qed_check_table_offset(s, offset)) { + /* Clear invalid offset */ + if (check->fix) { + table->offsets[i] = 0; + check->result->corruptions_fixed++; + } else { + check->result->corruptions++; + } + + num_invalid_l1++; + continue; + } + + if (!qed_set_used_clusters(check, offset, s->header.table_size)) { + continue; /* skip an invalid table */ + } + + ret = qed_read_l2_table_sync(s, &check->request, offset); + if (ret) { + check->result->check_errors++; + last_error = ret; + continue; + } + + num_invalid_l2 = qed_check_l2_table(check, + check->request.l2_table->table); + + /* Write out fixed L2 table */ + if (num_invalid_l2 > 0 && check->fix) { + ret = qed_write_l2_table_sync(s, &check->request, 0, + s->table_nelems, false); + if (ret) { + check->result->check_errors++; + last_error = ret; + continue; + } + } + } + + /* Drop reference to final table */ + qed_unref_l2_cache_entry(check->request.l2_table); + check->request.l2_table = NULL; + + /* Write out fixed L1 table */ + if (num_invalid_l1 > 0 && check->fix) { + ret = qed_write_l1_table_sync(s, 0, s->table_nelems); + if (ret) { + check->result->check_errors++; + last_error = ret; + } + } + + return last_error; +} + +/** + * Check for unreferenced (leaked) clusters + */ +static void qed_check_for_leaks(QEDCheck *check) +{ + BDRVQEDState *s = check->s; + uint64_t i; + + for (i = s->header.header_size; i < check->nclusters; i++) { + if (!qed_test_bit(check->used_clusters, i)) { + check->result->leaks++; + } + } +} + +/** + * Mark an image clean once it passes check or has been repaired + */ +static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result) +{ + /* Skip if there were unfixable corruptions or I/O errors */ + if (result->corruptions > 0 || result->check_errors > 0) { + return; + } + + /* Skip if image is already marked clean */ + if (!(s->header.features & QED_F_NEED_CHECK)) { + return; + } + + /* Ensure fixes reach storage before clearing check bit */ + bdrv_flush(s->bs); + + s->header.features &= ~QED_F_NEED_CHECK; + qed_write_header_sync(s); +} + +/* Called with table_lock held. */ +int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) +{ + QEDCheck check = { + .s = s, + .result = result, + .nclusters = qed_bytes_to_clusters(s, s->file_size), + .request = { .l2_table = NULL }, + .fix = fix, + }; + int ret; + + check.used_clusters = g_try_new0(uint32_t, (check.nclusters + 31) / 32); + if (check.nclusters && check.used_clusters == NULL) { + return -ENOMEM; + } + + check.result->bfi.total_clusters = + DIV_ROUND_UP(s->header.image_size, s->header.cluster_size); + ret = qed_check_l1_table(&check, s->l1_table); + if (ret == 0) { + /* Only check for leaks if entire image was scanned successfully */ + qed_check_for_leaks(&check); + + if (fix) { + qed_check_mark_clean(s, result); + } + } + + g_free(check.used_clusters); + return ret; +} diff --git a/block/qed-cluster.c b/block/qed-cluster.c new file mode 100644 index 000000000..672e2e654 --- /dev/null +++ b/block/qed-cluster.c @@ -0,0 +1,142 @@ +/* + * QEMU Enhanced Disk Format Cluster functions + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qed.h" + +/** + * Count the number of contiguous data clusters + * + * @s: QED state + * @table: L2 table + * @index: First cluster index + * @n: Maximum number of clusters + * @offset: Set to first cluster offset + * + * This function scans tables for contiguous clusters. A contiguous run of + * clusters may be allocated, unallocated, or zero. + */ +static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, + QEDTable *table, + unsigned int index, + unsigned int n, + uint64_t *offset) +{ + unsigned int end = MIN(index + n, s->table_nelems); + uint64_t last = table->offsets[index]; + unsigned int i; + + *offset = last; + + for (i = index + 1; i < end; i++) { + if (qed_offset_is_unalloc_cluster(last)) { + /* Counting unallocated clusters */ + if (!qed_offset_is_unalloc_cluster(table->offsets[i])) { + break; + } + } else if (qed_offset_is_zero_cluster(last)) { + /* Counting zero clusters */ + if (!qed_offset_is_zero_cluster(table->offsets[i])) { + break; + } + } else { + /* Counting allocated clusters */ + if (table->offsets[i] != last + s->header.cluster_size) { + break; + } + last = table->offsets[i]; + } + } + return i - index; +} + +/** + * Find the offset of a data cluster + * + * @s: QED state + * @request: L2 cache entry + * @pos: Byte position in device + * @len: Number of bytes (may be shortened on return) + * @img_offset: Contains offset in the image file on success + * + * This function translates a position in the block device to an offset in the + * image file. The translated offset or unallocated range in the image file is + * reported back in *img_offset and *len. + * + * If the L2 table exists, request->l2_table points to the L2 table cache entry + * and the caller must free the reference when they are finished. The cache + * entry is exposed in this way to avoid callers having to read the L2 table + * again later during request processing. If request->l2_table is non-NULL it + * will be unreferenced before taking on the new cache entry. + * + * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous + * range in the image file. + * + * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1 + * table offset, respectively. len is number of contiguous unallocated bytes. + * + * Called with table_lock held. + */ +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, + uint64_t pos, size_t *len, + uint64_t *img_offset) +{ + uint64_t l2_offset; + uint64_t offset = 0; + unsigned int index; + unsigned int n; + int ret; + + /* Limit length to L2 boundary. Requests are broken up at the L2 boundary + * so that a request acts on one L2 table at a time. + */ + *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); + + l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)]; + if (qed_offset_is_unalloc_cluster(l2_offset)) { + *img_offset = 0; + return QED_CLUSTER_L1; + } + if (!qed_check_table_offset(s, l2_offset)) { + *img_offset = *len = 0; + return -EINVAL; + } + + ret = qed_read_l2_table(s, request, l2_offset); + if (ret) { + goto out; + } + + index = qed_l2_index(s, pos); + n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len); + n = qed_count_contiguous_clusters(s, request->l2_table->table, + index, n, &offset); + + if (qed_offset_is_unalloc_cluster(offset)) { + ret = QED_CLUSTER_L2; + } else if (qed_offset_is_zero_cluster(offset)) { + ret = QED_CLUSTER_ZERO; + } else if (qed_check_cluster_offset(s, offset)) { + ret = QED_CLUSTER_FOUND; + } else { + ret = -EINVAL; + } + + *len = MIN(*len, + n * s->header.cluster_size - qed_offset_into_cluster(s, pos)); + +out: + *img_offset = offset; + return ret; +} diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c new file mode 100644 index 000000000..b54836239 --- /dev/null +++ b/block/qed-l2-cache.c @@ -0,0 +1,194 @@ +/* + * QEMU Enhanced Disk Format L2 Cache + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +/* + * L2 table cache usage is as follows: + * + * An open image has one L2 table cache that is used to avoid accessing the + * image file for recently referenced L2 tables. + * + * Cluster offset lookup translates the logical offset within the block device + * to a cluster offset within the image file. This is done by indexing into + * the L1 and L2 tables which store cluster offsets. It is here where the L2 + * table cache serves up recently referenced L2 tables. + * + * If there is a cache miss, that L2 table is read from the image file and + * committed to the cache. Subsequent accesses to that L2 table will be served + * from the cache until the table is evicted from the cache. + * + * L2 tables are also committed to the cache when new L2 tables are allocated + * in the image file. Since the L2 table cache is write-through, the new L2 + * table is first written out to the image file and then committed to the + * cache. + * + * Multiple I/O requests may be using an L2 table cache entry at any given + * time. That means an entry may be in use across several requests and + * reference counting is needed to free the entry at the correct time. In + * particular, an entry evicted from the cache will only be freed once all + * references are dropped. + * + * An in-flight I/O request will hold a reference to a L2 table cache entry for + * the period during which it needs to access the L2 table. This includes + * cluster offset lookup, L2 table allocation, and L2 table update when a new + * data cluster has been allocated. + * + * An interesting case occurs when two requests need to access an L2 table that + * is not in the cache. Since the operation to read the table from the image + * file takes some time to complete, both requests may see a cache miss and + * start reading the L2 table from the image file. The first to finish will + * commit its L2 table into the cache. When the second tries to commit its + * table will be deleted in favor of the existing cache entry. + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "qed.h" + +/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */ +#define MAX_L2_CACHE_SIZE 50 + +/** + * Initialize the L2 cache + */ +void qed_init_l2_cache(L2TableCache *l2_cache) +{ + QTAILQ_INIT(&l2_cache->entries); + l2_cache->n_entries = 0; +} + +/** + * Free the L2 cache + */ +void qed_free_l2_cache(L2TableCache *l2_cache) +{ + CachedL2Table *entry, *next_entry; + + QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) { + qemu_vfree(entry->table); + g_free(entry); + } +} + +/** + * Allocate an uninitialized entry from the cache + * + * The returned entry has a reference count of 1 and is owned by the caller. + * The caller must allocate the actual table field for this entry and it must + * be freeable using qemu_vfree(). + */ +CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache) +{ + CachedL2Table *entry; + + entry = g_malloc0(sizeof(*entry)); + entry->ref++; + + trace_qed_alloc_l2_cache_entry(l2_cache, entry); + + return entry; +} + +/** + * Decrease an entry's reference count and free if necessary when the reference + * count drops to zero. + * + * Called with table_lock held. + */ +void qed_unref_l2_cache_entry(CachedL2Table *entry) +{ + if (!entry) { + return; + } + + entry->ref--; + trace_qed_unref_l2_cache_entry(entry, entry->ref); + if (entry->ref == 0) { + qemu_vfree(entry->table); + g_free(entry); + } +} + +/** + * Find an entry in the L2 cache. This may return NULL and it's up to the + * caller to satisfy the cache miss. + * + * For a cached entry, this function increases the reference count and returns + * the entry. + * + * Called with table_lock held. + */ +CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset) +{ + CachedL2Table *entry; + + QTAILQ_FOREACH(entry, &l2_cache->entries, node) { + if (entry->offset == offset) { + trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref); + entry->ref++; + return entry; + } + } + return NULL; +} + +/** + * Commit an L2 cache entry into the cache. This is meant to be used as part of + * the process to satisfy a cache miss. A caller would allocate an entry which + * is not actually in the L2 cache and then once the entry was valid and + * present on disk, the entry can be committed into the cache. + * + * Since the cache is write-through, it's important that this function is not + * called until the entry is present on disk and the L1 has been updated to + * point to the entry. + * + * N.B. This function steals a reference to the l2_table from the caller so the + * caller must obtain a new reference by issuing a call to + * qed_find_l2_cache_entry(). + * + * Called with table_lock held. + */ +void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) +{ + CachedL2Table *entry; + + entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset); + if (entry) { + qed_unref_l2_cache_entry(entry); + qed_unref_l2_cache_entry(l2_table); + return; + } + + /* Evict an unused cache entry so we have space. If all entries are in use + * we can grow the cache temporarily and we try to shrink back down later. + */ + if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) { + CachedL2Table *next; + QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) { + if (entry->ref > 1) { + continue; + } + + QTAILQ_REMOVE(&l2_cache->entries, entry, node); + l2_cache->n_entries--; + qed_unref_l2_cache_entry(entry); + + /* Stop evicting when we've shrunk back to max size */ + if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) { + break; + } + } + } + + l2_cache->n_entries++; + QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node); +} diff --git a/block/qed-table.c b/block/qed-table.c new file mode 100644 index 000000000..405d446cb --- /dev/null +++ b/block/qed-table.c @@ -0,0 +1,194 @@ +/* + * QEMU Enhanced Disk Format Table I/O + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "qed.h" +#include "qemu/bswap.h" + +/* Called with table_lock held. */ +static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset, + QEDTable *table) +{ + unsigned int bytes = s->header.cluster_size * s->header.table_size; + + int noffsets; + int i, ret; + + trace_qed_read_table(s, offset, table); + + qemu_co_mutex_unlock(&s->table_lock); + ret = bdrv_co_pread(s->bs->file, offset, bytes, table->offsets, 0); + qemu_co_mutex_lock(&s->table_lock); + if (ret < 0) { + goto out; + } + + /* Byteswap offsets */ + noffsets = bytes / sizeof(uint64_t); + for (i = 0; i < noffsets; i++) { + table->offsets[i] = le64_to_cpu(table->offsets[i]); + } + + ret = 0; +out: + /* Completion */ + trace_qed_read_table_cb(s, table, ret); + return ret; +} + +/** + * Write out an updated part or all of a table + * + * @s: QED state + * @offset: Offset of table in image file, in bytes + * @table: Table + * @index: Index of first element + * @n: Number of elements + * @flush: Whether or not to sync to disk + * + * Called with table_lock held. + */ +static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset, + QEDTable *table, unsigned int index, + unsigned int n, bool flush) +{ + unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; + unsigned int start, end, i; + QEDTable *new_table; + size_t len_bytes; + int ret; + + trace_qed_write_table(s, offset, table, index, n); + + /* Calculate indices of the first and one after last elements */ + start = index & ~sector_mask; + end = (index + n + sector_mask) & ~sector_mask; + + len_bytes = (end - start) * sizeof(uint64_t); + + new_table = qemu_blockalign(s->bs, len_bytes); + + /* Byteswap table */ + for (i = start; i < end; i++) { + uint64_t le_offset = cpu_to_le64(table->offsets[i]); + new_table->offsets[i - start] = le_offset; + } + + /* Adjust for offset into table */ + offset += start * sizeof(uint64_t); + + qemu_co_mutex_unlock(&s->table_lock); + ret = bdrv_co_pwrite(s->bs->file, offset, len_bytes, new_table->offsets, 0); + qemu_co_mutex_lock(&s->table_lock); + trace_qed_write_table_cb(s, table, flush, ret); + if (ret < 0) { + goto out; + } + + if (flush) { + ret = bdrv_flush(s->bs); + if (ret < 0) { + goto out; + } + } + + ret = 0; +out: + qemu_vfree(new_table); + return ret; +} + +int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s) +{ + return qed_read_table(s, s->header.l1_table_offset, s->l1_table); +} + +/* Called with table_lock held. */ +int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index, + unsigned int n) +{ + BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); + return qed_write_table(s, s->header.l1_table_offset, + s->l1_table, index, n, false); +} + +int coroutine_fn qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, + unsigned int n) +{ + return qed_write_l1_table(s, index, n); +} + +/* Called with table_lock held. */ +int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, + uint64_t offset) +{ + int ret; + + qed_unref_l2_cache_entry(request->l2_table); + + /* Check for cached L2 entry */ + request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); + if (request->l2_table) { + return 0; + } + + request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); + request->l2_table->table = qed_alloc_table(s); + + BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); + ret = qed_read_table(s, offset, request->l2_table->table); + + if (ret) { + /* can't trust loaded L2 table anymore */ + qed_unref_l2_cache_entry(request->l2_table); + request->l2_table = NULL; + } else { + request->l2_table->offset = offset; + + qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table); + + /* This is guaranteed to succeed because we just committed the entry + * to the cache. + */ + request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); + assert(request->l2_table != NULL); + } + + return ret; +} + +int coroutine_fn qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, + uint64_t offset) +{ + return qed_read_l2_table(s, request, offset); +} + +/* Called with table_lock held. */ +int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, + bool flush) +{ + BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); + return qed_write_table(s, request->l2_table->offset, + request->l2_table->table, index, n, flush); +} + +int coroutine_fn qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, + bool flush) +{ + return qed_write_l2_table(s, request, index, n, flush); +} diff --git a/block/qed.c b/block/qed.c new file mode 100644 index 000000000..b27e7546c --- /dev/null +++ b/block/qed.c @@ -0,0 +1,1643 @@ +/* + * QEMU Enhanced Disk Format + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "block/qdict.h" +#include "qapi/error.h" +#include "qemu/timer.h" +#include "qemu/bswap.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "trace.h" +#include "qed.h" +#include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" + +static QemuOptsList qed_create_opts; + +static int bdrv_qed_probe(const uint8_t *buf, int buf_size, + const char *filename) +{ + const QEDHeader *header = (const QEDHeader *)buf; + + if (buf_size < sizeof(*header)) { + return 0; + } + if (le32_to_cpu(header->magic) != QED_MAGIC) { + return 0; + } + return 100; +} + +/** + * Check whether an image format is raw + * + * @fmt: Backing file format, may be NULL + */ +static bool qed_fmt_is_raw(const char *fmt) +{ + return fmt && strcmp(fmt, "raw") == 0; +} + +static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu) +{ + cpu->magic = le32_to_cpu(le->magic); + cpu->cluster_size = le32_to_cpu(le->cluster_size); + cpu->table_size = le32_to_cpu(le->table_size); + cpu->header_size = le32_to_cpu(le->header_size); + cpu->features = le64_to_cpu(le->features); + cpu->compat_features = le64_to_cpu(le->compat_features); + cpu->autoclear_features = le64_to_cpu(le->autoclear_features); + cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset); + cpu->image_size = le64_to_cpu(le->image_size); + cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset); + cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size); +} + +static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) +{ + le->magic = cpu_to_le32(cpu->magic); + le->cluster_size = cpu_to_le32(cpu->cluster_size); + le->table_size = cpu_to_le32(cpu->table_size); + le->header_size = cpu_to_le32(cpu->header_size); + le->features = cpu_to_le64(cpu->features); + le->compat_features = cpu_to_le64(cpu->compat_features); + le->autoclear_features = cpu_to_le64(cpu->autoclear_features); + le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset); + le->image_size = cpu_to_le64(cpu->image_size); + le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset); + le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); +} + +int qed_write_header_sync(BDRVQEDState *s) +{ + QEDHeader le; + int ret; + + qed_header_cpu_to_le(&s->header, &le); + ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); + if (ret != sizeof(le)) { + return ret; + } + return 0; +} + +/** + * Update header in-place (does not rewrite backing filename or other strings) + * + * This function only updates known header fields in-place and does not affect + * extra data after the QED header. + * + * No new allocating reqs can start while this function runs. + */ +static int coroutine_fn qed_write_header(BDRVQEDState *s) +{ + /* We must write full sectors for O_DIRECT but cannot necessarily generate + * the data following the header if an unrecognized compat feature is + * active. Therefore, first read the sectors containing the header, update + * them, and write back. + */ + + int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE); + size_t len = nsectors * BDRV_SECTOR_SIZE; + uint8_t *buf; + int ret; + + assert(s->allocating_acb || s->allocating_write_reqs_plugged); + + buf = qemu_blockalign(s->bs, len); + + ret = bdrv_co_pread(s->bs->file, 0, len, buf, 0); + if (ret < 0) { + goto out; + } + + /* Update header */ + qed_header_cpu_to_le(&s->header, (QEDHeader *) buf); + + ret = bdrv_co_pwrite(s->bs->file, 0, len, buf, 0); + if (ret < 0) { + goto out; + } + + ret = 0; +out: + qemu_vfree(buf); + return ret; +} + +static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) +{ + uint64_t table_entries; + uint64_t l2_size; + + table_entries = (table_size * cluster_size) / sizeof(uint64_t); + l2_size = table_entries * cluster_size; + + return l2_size * table_entries; +} + +static bool qed_is_cluster_size_valid(uint32_t cluster_size) +{ + if (cluster_size < QED_MIN_CLUSTER_SIZE || + cluster_size > QED_MAX_CLUSTER_SIZE) { + return false; + } + if (cluster_size & (cluster_size - 1)) { + return false; /* not power of 2 */ + } + return true; +} + +static bool qed_is_table_size_valid(uint32_t table_size) +{ + if (table_size < QED_MIN_TABLE_SIZE || + table_size > QED_MAX_TABLE_SIZE) { + return false; + } + if (table_size & (table_size - 1)) { + return false; /* not power of 2 */ + } + return true; +} + +static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, + uint32_t table_size) +{ + if (image_size % BDRV_SECTOR_SIZE != 0) { + return false; /* not multiple of sector size */ + } + if (image_size > qed_max_image_size(cluster_size, table_size)) { + return false; /* image is too large */ + } + return true; +} + +/** + * Read a string of known length from the image file + * + * @file: Image file + * @offset: File offset to start of string, in bytes + * @n: String length in bytes + * @buf: Destination buffer + * @buflen: Destination buffer length in bytes + * @ret: 0 on success, -errno on failure + * + * The string is NUL-terminated. + */ +static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n, + char *buf, size_t buflen) +{ + int ret; + if (n >= buflen) { + return -EINVAL; + } + ret = bdrv_pread(file, offset, buf, n); + if (ret < 0) { + return ret; + } + buf[n] = '\0'; + return 0; +} + +/** + * Allocate new clusters + * + * @s: QED state + * @n: Number of contiguous clusters to allocate + * @ret: Offset of first allocated cluster + * + * This function only produces the offset where the new clusters should be + * written. It updates BDRVQEDState but does not make any changes to the image + * file. + * + * Called with table_lock held. + */ +static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) +{ + uint64_t offset = s->file_size; + s->file_size += n * s->header.cluster_size; + return offset; +} + +QEDTable *qed_alloc_table(BDRVQEDState *s) +{ + /* Honor O_DIRECT memory alignment requirements */ + return qemu_blockalign(s->bs, + s->header.cluster_size * s->header.table_size); +} + +/** + * Allocate a new zeroed L2 table + * + * Called with table_lock held. + */ +static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) +{ + CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); + + l2_table->table = qed_alloc_table(s); + l2_table->offset = qed_alloc_clusters(s, s->header.table_size); + + memset(l2_table->table->offsets, 0, + s->header.cluster_size * s->header.table_size); + return l2_table; +} + +static bool qed_plug_allocating_write_reqs(BDRVQEDState *s) +{ + qemu_co_mutex_lock(&s->table_lock); + + /* No reentrancy is allowed. */ + assert(!s->allocating_write_reqs_plugged); + if (s->allocating_acb != NULL) { + /* Another allocating write came concurrently. This cannot happen + * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. + */ + qemu_co_mutex_unlock(&s->table_lock); + return false; + } + + s->allocating_write_reqs_plugged = true; + qemu_co_mutex_unlock(&s->table_lock); + return true; +} + +static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) +{ + qemu_co_mutex_lock(&s->table_lock); + assert(s->allocating_write_reqs_plugged); + s->allocating_write_reqs_plugged = false; + qemu_co_queue_next(&s->allocating_write_reqs); + qemu_co_mutex_unlock(&s->table_lock); +} + +static void coroutine_fn qed_need_check_timer_entry(void *opaque) +{ + BDRVQEDState *s = opaque; + int ret; + + trace_qed_need_check_timer_cb(s); + + if (!qed_plug_allocating_write_reqs(s)) { + return; + } + + /* Ensure writes are on disk before clearing flag */ + ret = bdrv_co_flush(s->bs->file->bs); + if (ret < 0) { + qed_unplug_allocating_write_reqs(s); + return; + } + + s->header.features &= ~QED_F_NEED_CHECK; + ret = qed_write_header(s); + (void) ret; + + qed_unplug_allocating_write_reqs(s); + + ret = bdrv_co_flush(s->bs); + (void) ret; +} + +static void qed_need_check_timer_cb(void *opaque) +{ + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); + qemu_coroutine_enter(co); +} + +static void qed_start_need_check_timer(BDRVQEDState *s) +{ + trace_qed_start_need_check_timer(s); + + /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for + * migration. + */ + timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT); +} + +/* It's okay to call this multiple times or when no timer is started */ +static void qed_cancel_need_check_timer(BDRVQEDState *s) +{ + trace_qed_cancel_need_check_timer(s); + timer_del(s->need_check_timer); +} + +static void bdrv_qed_detach_aio_context(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + qed_cancel_need_check_timer(s); + timer_free(s->need_check_timer); +} + +static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVQEDState *s = bs->opaque; + + s->need_check_timer = aio_timer_new(new_context, + QEMU_CLOCK_VIRTUAL, SCALE_NS, + qed_need_check_timer_cb, s); + if (s->header.features & QED_F_NEED_CHECK) { + qed_start_need_check_timer(s); + } +} + +static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + /* Fire the timer immediately in order to start doing I/O as soon as the + * header is flushed. + */ + if (s->need_check_timer && timer_pending(s->need_check_timer)) { + qed_cancel_need_check_timer(s); + qed_need_check_timer_entry(s); + } +} + +static void bdrv_qed_init_state(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + memset(s, 0, sizeof(BDRVQEDState)); + s->bs = bs; + qemu_co_mutex_init(&s->table_lock); + qemu_co_queue_init(&s->allocating_write_reqs); +} + +/* Called with table_lock held. */ +static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + BDRVQEDState *s = bs->opaque; + QEDHeader le_header; + int64_t file_size; + int ret; + + ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); + if (ret < 0) { + return ret; + } + qed_header_le_to_cpu(&le_header, &s->header); + + if (s->header.magic != QED_MAGIC) { + error_setg(errp, "Image not in QED format"); + return -EINVAL; + } + if (s->header.features & ~QED_FEATURE_MASK) { + /* image uses unsupported feature bits */ + error_setg(errp, "Unsupported QED features: %" PRIx64, + s->header.features & ~QED_FEATURE_MASK); + return -ENOTSUP; + } + if (!qed_is_cluster_size_valid(s->header.cluster_size)) { + return -EINVAL; + } + + /* Round down file size to the last cluster */ + file_size = bdrv_getlength(bs->file->bs); + if (file_size < 0) { + return file_size; + } + s->file_size = qed_start_of_cluster(s, file_size); + + if (!qed_is_table_size_valid(s->header.table_size)) { + return -EINVAL; + } + if (!qed_is_image_size_valid(s->header.image_size, + s->header.cluster_size, + s->header.table_size)) { + return -EINVAL; + } + if (!qed_check_table_offset(s, s->header.l1_table_offset)) { + return -EINVAL; + } + + s->table_nelems = (s->header.cluster_size * s->header.table_size) / + sizeof(uint64_t); + s->l2_shift = ctz32(s->header.cluster_size); + s->l2_mask = s->table_nelems - 1; + s->l1_shift = s->l2_shift + ctz32(s->table_nelems); + + /* Header size calculation must not overflow uint32_t */ + if (s->header.header_size > UINT32_MAX / s->header.cluster_size) { + return -EINVAL; + } + + if ((s->header.features & QED_F_BACKING_FILE)) { + if ((uint64_t)s->header.backing_filename_offset + + s->header.backing_filename_size > + s->header.cluster_size * s->header.header_size) { + return -EINVAL; + } + + ret = qed_read_string(bs->file, s->header.backing_filename_offset, + s->header.backing_filename_size, + bs->auto_backing_file, + sizeof(bs->auto_backing_file)); + if (ret < 0) { + return ret; + } + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->auto_backing_file); + + if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { + pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); + } + } + + /* Reset unknown autoclear feature bits. This is a backwards + * compatibility mechanism that allows images to be opened by older + * programs, which "knock out" unknown feature bits. When an image is + * opened by a newer program again it can detect that the autoclear + * feature is no longer valid. + */ + if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && + !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) { + s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; + + ret = qed_write_header_sync(s); + if (ret) { + return ret; + } + + /* From here on only known autoclear feature bits are valid */ + bdrv_flush(bs->file->bs); + } + + s->l1_table = qed_alloc_table(s); + qed_init_l2_cache(&s->l2_cache); + + ret = qed_read_l1_table_sync(s); + if (ret) { + goto out; + } + + /* If image was not closed cleanly, check consistency */ + if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) { + /* Read-only images cannot be fixed. There is no risk of corruption + * since write operations are not possible. Therefore, allow + * potentially inconsistent images to be opened read-only. This can + * aid data recovery from an otherwise inconsistent image. + */ + if (!bdrv_is_read_only(bs->file->bs) && + !(flags & BDRV_O_INACTIVE)) { + BdrvCheckResult result = {0}; + + ret = qed_check(s, &result, true); + if (ret) { + goto out; + } + } + } + + bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs)); + +out: + if (ret) { + qed_free_l2_cache(&s->l2_cache); + qemu_vfree(s->l1_table); + } + return ret; +} + +typedef struct QEDOpenCo { + BlockDriverState *bs; + QDict *options; + int flags; + Error **errp; + int ret; +} QEDOpenCo; + +static void coroutine_fn bdrv_qed_open_entry(void *opaque) +{ + QEDOpenCo *qoc = opaque; + BDRVQEDState *s = qoc->bs->opaque; + + qemu_co_mutex_lock(&s->table_lock); + qoc->ret = bdrv_qed_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); + qemu_co_mutex_unlock(&s->table_lock); +} + +static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + QEDOpenCo qoc = { + .bs = bs, + .options = options, + .flags = flags, + .errp = errp, + .ret = -EINPROGRESS + }; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + bdrv_qed_init_state(bs); + if (qemu_in_coroutine()) { + bdrv_qed_open_entry(&qoc); + } else { + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc)); + BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); + } + BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); + return qoc.ret; +} + +static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVQEDState *s = bs->opaque; + + bs->bl.pwrite_zeroes_alignment = s->header.cluster_size; +} + +/* We have nothing to do for QED reopen, stubs just return + * success */ +static int bdrv_qed_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +static void bdrv_qed_close(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + bdrv_qed_detach_aio_context(bs); + + /* Ensure writes reach stable storage */ + bdrv_flush(bs->file->bs); + + /* Clean shutdown, no check required on next open */ + if (s->header.features & QED_F_NEED_CHECK) { + s->header.features &= ~QED_F_NEED_CHECK; + qed_write_header_sync(s); + } + + qed_free_l2_cache(&s->l2_cache); + qemu_vfree(s->l1_table); +} + +static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, + Error **errp) +{ + BlockdevCreateOptionsQed *qed_opts; + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + + QEDHeader header; + QEDHeader le_header; + uint8_t *l1_table = NULL; + size_t l1_size; + int ret = 0; + + assert(opts->driver == BLOCKDEV_DRIVER_QED); + qed_opts = &opts->u.qed; + + /* Validate options and set default values */ + if (!qed_opts->has_cluster_size) { + qed_opts->cluster_size = QED_DEFAULT_CLUSTER_SIZE; + } + if (!qed_opts->has_table_size) { + qed_opts->table_size = QED_DEFAULT_TABLE_SIZE; + } + + if (!qed_is_cluster_size_valid(qed_opts->cluster_size)) { + error_setg(errp, "QED cluster size must be within range [%u, %u] " + "and power of 2", + QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); + return -EINVAL; + } + if (!qed_is_table_size_valid(qed_opts->table_size)) { + error_setg(errp, "QED table size must be within range [%u, %u] " + "and power of 2", + QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); + return -EINVAL; + } + if (!qed_is_image_size_valid(qed_opts->size, qed_opts->cluster_size, + qed_opts->table_size)) + { + error_setg(errp, "QED image size must be a non-zero multiple of " + "cluster size and less than %" PRIu64 " bytes", + qed_max_image_size(qed_opts->cluster_size, + qed_opts->table_size)); + return -EINVAL; + } + + /* Create BlockBackend to write to the image */ + bs = bdrv_open_blockdev_ref(qed_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto out; + } + blk_set_allow_write_beyond_eof(blk, true); + + /* Prepare image format */ + header = (QEDHeader) { + .magic = QED_MAGIC, + .cluster_size = qed_opts->cluster_size, + .table_size = qed_opts->table_size, + .header_size = 1, + .features = 0, + .compat_features = 0, + .l1_table_offset = qed_opts->cluster_size, + .image_size = qed_opts->size, + }; + + l1_size = header.cluster_size * header.table_size; + + /* + * The QED format associates file length with allocation status, + * so a new file (which is empty) must have a length of 0. + */ + ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto out; + } + + if (qed_opts->has_backing_file) { + header.features |= QED_F_BACKING_FILE; + header.backing_filename_offset = sizeof(le_header); + header.backing_filename_size = strlen(qed_opts->backing_file); + + if (qed_opts->has_backing_fmt) { + const char *backing_fmt = BlockdevDriver_str(qed_opts->backing_fmt); + if (qed_fmt_is_raw(backing_fmt)) { + header.features |= QED_F_BACKING_FORMAT_NO_PROBE; + } + } + } + + qed_header_cpu_to_le(&header, &le_header); + ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0); + if (ret < 0) { + goto out; + } + ret = blk_pwrite(blk, sizeof(le_header), qed_opts->backing_file, + header.backing_filename_size, 0); + if (ret < 0) { + goto out; + } + + l1_table = g_malloc0(l1_size); + ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0); + if (ret < 0) { + goto out; + } + + ret = 0; /* success */ +out: + g_free(l1_table); + blk_unref(blk); + bdrv_unref(bs); + return ret; +} + +static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; + Visitor *v; + BlockDriverState *bs = NULL; + int ret; + + static const QDictRenames opt_renames[] = { + { BLOCK_OPT_BACKING_FILE, "backing-file" }, + { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, + { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, + { BLOCK_OPT_TABLE_SIZE, "table-size" }, + { NULL, NULL }, + }; + + /* Parse options and convert legacy syntax */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, &qed_create_opts, true); + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto fail; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto fail; + } + + /* Now get the QAPI type BlockdevCreateOptions */ + qdict_put_str(qdict, "driver", "qed"); + qdict_put_str(qdict, "file", bs->node_name); + + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto fail; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto fail; + } + + /* Silently round up size */ + assert(create_options->driver == BLOCKDEV_DRIVER_QED); + create_options->u.qed.size = + ROUND_UP(create_options->u.qed.size, BDRV_SECTOR_SIZE); + + /* Create the qed image (format layer) */ + ret = bdrv_qed_co_create(create_options, errp); + +fail: + qobject_unref(qdict); + bdrv_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t pos, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVQEDState *s = bs->opaque; + size_t len = MIN(bytes, SIZE_MAX); + int status; + QEDRequest request = { .l2_table = NULL }; + uint64_t offset; + int ret; + + qemu_co_mutex_lock(&s->table_lock); + ret = qed_find_cluster(s, &request, pos, &len, &offset); + + *pnum = len; + switch (ret) { + case QED_CLUSTER_FOUND: + *map = offset | qed_offset_into_cluster(s, pos); + status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + *file = bs->file->bs; + break; + case QED_CLUSTER_ZERO: + status = BDRV_BLOCK_ZERO; + break; + case QED_CLUSTER_L2: + case QED_CLUSTER_L1: + status = 0; + break; + default: + assert(ret < 0); + status = ret; + break; + } + + qed_unref_l2_cache_entry(request.l2_table); + qemu_co_mutex_unlock(&s->table_lock); + + return status; +} + +static BDRVQEDState *acb_to_s(QEDAIOCB *acb) +{ + return acb->bs->opaque; +} + +/** + * Read from the backing file or zero-fill if no backing file + * + * @s: QED state + * @pos: Byte position in device + * @qiov: Destination I/O vector + * + * This function reads qiov->size bytes starting at pos from the backing file. + * If there is no backing file then zeroes are read. + */ +static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos, + QEMUIOVector *qiov) +{ + if (s->bs->backing) { + BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); + return bdrv_co_preadv(s->bs->backing, pos, qiov->size, qiov, 0); + } + qemu_iovec_memset(qiov, 0, 0, qiov->size); + return 0; +} + +/** + * Copy data from backing file into the image + * + * @s: QED state + * @pos: Byte position in device + * @len: Number of bytes + * @offset: Byte offset in image file + */ +static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, + uint64_t pos, uint64_t len, + uint64_t offset) +{ + QEMUIOVector qiov; + int ret; + + /* Skip copy entirely if there is no work to do */ + if (len == 0) { + return 0; + } + + qemu_iovec_init_buf(&qiov, qemu_blockalign(s->bs, len), len); + + ret = qed_read_backing_file(s, pos, &qiov); + + if (ret) { + goto out; + } + + BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); + ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0); + if (ret < 0) { + goto out; + } + ret = 0; +out: + qemu_vfree(qemu_iovec_buf(&qiov)); + return ret; +} + +/** + * Link one or more contiguous clusters into a table + * + * @s: QED state + * @table: L2 table + * @index: First cluster index + * @n: Number of contiguous clusters + * @cluster: First cluster offset + * + * The cluster offset may be an allocated byte offset in the image file, the + * zero cluster marker, or the unallocated cluster marker. + * + * Called with table_lock held. + */ +static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, + int index, unsigned int n, + uint64_t cluster) +{ + int i; + for (i = index; i < index + n; i++) { + table->offsets[i] = cluster; + if (!qed_offset_is_unalloc_cluster(cluster) && + !qed_offset_is_zero_cluster(cluster)) { + cluster += s->header.cluster_size; + } + } +} + +/* Called with table_lock held. */ +static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) +{ + BDRVQEDState *s = acb_to_s(acb); + + /* Free resources */ + qemu_iovec_destroy(&acb->cur_qiov); + qed_unref_l2_cache_entry(acb->request.l2_table); + + /* Free the buffer we may have allocated for zero writes */ + if (acb->flags & QED_AIOCB_ZERO) { + qemu_vfree(acb->qiov->iov[0].iov_base); + acb->qiov->iov[0].iov_base = NULL; + } + + /* Start next allocating write request waiting behind this one. Note that + * requests enqueue themselves when they first hit an unallocated cluster + * but they wait until the entire request is finished before waking up the + * next request in the queue. This ensures that we don't cycle through + * requests multiple times but rather finish one at a time completely. + */ + if (acb == s->allocating_acb) { + s->allocating_acb = NULL; + if (!qemu_co_queue_empty(&s->allocating_write_reqs)) { + qemu_co_queue_next(&s->allocating_write_reqs); + } else if (s->header.features & QED_F_NEED_CHECK) { + qed_start_need_check_timer(s); + } + } +} + +/** + * Update L1 table with new L2 table offset and write it out + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) +{ + BDRVQEDState *s = acb_to_s(acb); + CachedL2Table *l2_table = acb->request.l2_table; + uint64_t l2_offset = l2_table->offset; + int index, ret; + + index = qed_l1_index(s, acb->cur_pos); + s->l1_table->offsets[index] = l2_table->offset; + + ret = qed_write_l1_table(s, index, 1); + + /* Commit the current L2 table to the cache */ + qed_commit_l2_cache_entry(&s->l2_cache, l2_table); + + /* This is guaranteed to succeed because we just committed the entry to the + * cache. + */ + acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); + assert(acb->request.l2_table != NULL); + + return ret; +} + + +/** + * Update L2 table with new cluster offsets and write them out + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) +{ + BDRVQEDState *s = acb_to_s(acb); + bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; + int index, ret; + + if (need_alloc) { + qed_unref_l2_cache_entry(acb->request.l2_table); + acb->request.l2_table = qed_new_l2_table(s); + } + + index = qed_l2_index(s, acb->cur_pos); + qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, + offset); + + if (need_alloc) { + /* Write out the whole new L2 table */ + ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true); + if (ret) { + return ret; + } + return qed_aio_write_l1_update(acb); + } else { + /* Write out only the updated part of the L2 table */ + ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, + false); + if (ret) { + return ret; + } + } + return 0; +} + +/** + * Write data to the image file + * + * Called with table_lock *not* held. + */ +static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) +{ + BDRVQEDState *s = acb_to_s(acb); + uint64_t offset = acb->cur_cluster + + qed_offset_into_cluster(s, acb->cur_pos); + + trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size); + + BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); + return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size, + &acb->cur_qiov, 0); +} + +/** + * Populate untouched regions of new data cluster + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) +{ + BDRVQEDState *s = acb_to_s(acb); + uint64_t start, len, offset; + int ret; + + qemu_co_mutex_unlock(&s->table_lock); + + /* Populate front untouched region of new data cluster */ + start = qed_start_of_cluster(s, acb->cur_pos); + len = qed_offset_into_cluster(s, acb->cur_pos); + + trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); + ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); + if (ret < 0) { + goto out; + } + + /* Populate back untouched region of new data cluster */ + start = acb->cur_pos + acb->cur_qiov.size; + len = qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; + offset = acb->cur_cluster + + qed_offset_into_cluster(s, acb->cur_pos) + + acb->cur_qiov.size; + + trace_qed_aio_write_postfill(s, acb, start, len, offset); + ret = qed_copy_from_backing_file(s, start, len, offset); + if (ret < 0) { + goto out; + } + + ret = qed_aio_write_main(acb); + if (ret < 0) { + goto out; + } + + if (s->bs->backing) { + /* + * Flush new data clusters before updating the L2 table + * + * This flush is necessary when a backing file is in use. A crash + * during an allocating write could result in empty clusters in the + * image. If the write only touched a subregion of the cluster, + * then backing image sectors have been lost in the untouched + * region. The solution is to flush after writing a new data + * cluster and before updating the L2 table. + */ + ret = bdrv_co_flush(s->bs->file->bs); + } + +out: + qemu_co_mutex_lock(&s->table_lock); + return ret; +} + +/** + * Check if the QED_F_NEED_CHECK bit should be set during allocating write + */ +static bool qed_should_set_need_check(BDRVQEDState *s) +{ + /* The flush before L2 update path ensures consistency */ + if (s->bs->backing) { + return false; + } + + return !(s->header.features & QED_F_NEED_CHECK); +} + +/** + * Write new data cluster + * + * @acb: Write request + * @len: Length in bytes + * + * This path is taken when writing to previously unallocated clusters. + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) +{ + BDRVQEDState *s = acb_to_s(acb); + int ret; + + /* Cancel timer when the first allocating request comes in */ + if (s->allocating_acb == NULL) { + qed_cancel_need_check_timer(s); + } + + /* Freeze this request if another allocating write is in progress */ + if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) { + if (s->allocating_acb != NULL) { + qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock); + assert(s->allocating_acb == NULL); + } + s->allocating_acb = acb; + return -EAGAIN; /* start over with looking up table entries */ + } + + acb->cur_nclusters = qed_bytes_to_clusters(s, + qed_offset_into_cluster(s, acb->cur_pos) + len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + + if (acb->flags & QED_AIOCB_ZERO) { + /* Skip ahead if the clusters are already zero */ + if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { + return 0; + } + acb->cur_cluster = 1; + } else { + acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); + } + + if (qed_should_set_need_check(s)) { + s->header.features |= QED_F_NEED_CHECK; + ret = qed_write_header(s); + if (ret < 0) { + return ret; + } + } + + if (!(acb->flags & QED_AIOCB_ZERO)) { + ret = qed_aio_write_cow(acb); + if (ret < 0) { + return ret; + } + } + + return qed_aio_write_l2_update(acb, acb->cur_cluster); +} + +/** + * Write data cluster in place + * + * @acb: Write request + * @offset: Cluster offset in bytes + * @len: Length in bytes + * + * This path is taken when writing to already allocated clusters. + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, + size_t len) +{ + BDRVQEDState *s = acb_to_s(acb); + int r; + + qemu_co_mutex_unlock(&s->table_lock); + + /* Allocate buffer for zero writes */ + if (acb->flags & QED_AIOCB_ZERO) { + struct iovec *iov = acb->qiov->iov; + + if (!iov->iov_base) { + iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len); + if (iov->iov_base == NULL) { + r = -ENOMEM; + goto out; + } + memset(iov->iov_base, 0, iov->iov_len); + } + } + + /* Calculate the I/O vector */ + acb->cur_cluster = offset; + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + + /* Do the actual write. */ + r = qed_aio_write_main(acb); +out: + qemu_co_mutex_lock(&s->table_lock); + return r; +} + +/** + * Write data cluster + * + * @opaque: Write request + * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 + * @offset: Cluster offset in bytes + * @len: Length in bytes + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_write_data(void *opaque, int ret, + uint64_t offset, size_t len) +{ + QEDAIOCB *acb = opaque; + + trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len); + + acb->find_cluster_ret = ret; + + switch (ret) { + case QED_CLUSTER_FOUND: + return qed_aio_write_inplace(acb, offset, len); + + case QED_CLUSTER_L2: + case QED_CLUSTER_L1: + case QED_CLUSTER_ZERO: + return qed_aio_write_alloc(acb, len); + + default: + g_assert_not_reached(); + } +} + +/** + * Read data cluster + * + * @opaque: Read request + * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 + * @offset: Cluster offset in bytes + * @len: Length in bytes + * + * Called with table_lock held. + */ +static int coroutine_fn qed_aio_read_data(void *opaque, int ret, + uint64_t offset, size_t len) +{ + QEDAIOCB *acb = opaque; + BDRVQEDState *s = acb_to_s(acb); + BlockDriverState *bs = acb->bs; + int r; + + qemu_co_mutex_unlock(&s->table_lock); + + /* Adjust offset into cluster */ + offset += qed_offset_into_cluster(s, acb->cur_pos); + + trace_qed_aio_read_data(s, acb, ret, offset, len); + + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + + /* Handle zero cluster and backing file reads, otherwise read + * data cluster directly. + */ + if (ret == QED_CLUSTER_ZERO) { + qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); + r = 0; + } else if (ret != QED_CLUSTER_FOUND) { + r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov); + } else { + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, + &acb->cur_qiov, 0); + } + + qemu_co_mutex_lock(&s->table_lock); + return r; +} + +/** + * Begin next I/O or complete the request + */ +static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) +{ + BDRVQEDState *s = acb_to_s(acb); + uint64_t offset; + size_t len; + int ret; + + qemu_co_mutex_lock(&s->table_lock); + while (1) { + trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size); + + acb->qiov_offset += acb->cur_qiov.size; + acb->cur_pos += acb->cur_qiov.size; + qemu_iovec_reset(&acb->cur_qiov); + + /* Complete request */ + if (acb->cur_pos >= acb->end_pos) { + ret = 0; + break; + } + + /* Find next cluster and start I/O */ + len = acb->end_pos - acb->cur_pos; + ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset); + if (ret < 0) { + break; + } + + if (acb->flags & QED_AIOCB_WRITE) { + ret = qed_aio_write_data(acb, ret, offset, len); + } else { + ret = qed_aio_read_data(acb, ret, offset, len); + } + + if (ret < 0 && ret != -EAGAIN) { + break; + } + } + + trace_qed_aio_complete(s, acb, ret); + qed_aio_complete(acb); + qemu_co_mutex_unlock(&s->table_lock); + return ret; +} + +static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + int flags) +{ + QEDAIOCB acb = { + .bs = bs, + .cur_pos = (uint64_t) sector_num * BDRV_SECTOR_SIZE, + .end_pos = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE, + .qiov = qiov, + .flags = flags, + }; + qemu_iovec_init(&acb.cur_qiov, qiov->niov); + + trace_qed_aio_setup(bs->opaque, &acb, sector_num, nb_sectors, NULL, flags); + + /* Start request */ + return qed_aio_next_io(&acb); +} + +static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + return qed_co_request(bs, sector_num, qiov, nb_sectors, 0); +} + +static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int flags) +{ + assert(!flags); + return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE); +} + +static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, + int bytes, + BdrvRequestFlags flags) +{ + BDRVQEDState *s = bs->opaque; + + /* + * Zero writes start without an I/O buffer. If a buffer becomes necessary + * then it will be allocated during request processing. + */ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); + + /* Fall back if the request is not aligned */ + if (qed_offset_into_cluster(s, offset) || + qed_offset_into_cluster(s, bytes)) { + return -ENOTSUP; + } + + return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov, + bytes >> BDRV_SECTOR_BITS, + QED_AIOCB_WRITE | QED_AIOCB_ZERO); +} + +static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, + BdrvRequestFlags flags, + Error **errp) +{ + BDRVQEDState *s = bs->opaque; + uint64_t old_image_size; + int ret; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (!qed_is_image_size_valid(offset, s->header.cluster_size, + s->header.table_size)) { + error_setg(errp, "Invalid image size specified"); + return -EINVAL; + } + + if ((uint64_t)offset < s->header.image_size) { + error_setg(errp, "Shrinking images is currently not supported"); + return -ENOTSUP; + } + + old_image_size = s->header.image_size; + s->header.image_size = offset; + ret = qed_write_header_sync(s); + if (ret < 0) { + s->header.image_size = old_image_size; + error_setg_errno(errp, -ret, "Failed to update the image size"); + } + return ret; +} + +static int64_t bdrv_qed_getlength(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + return s->header.image_size; +} + +static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVQEDState *s = bs->opaque; + + memset(bdi, 0, sizeof(*bdi)); + bdi->cluster_size = s->header.cluster_size; + bdi->is_dirty = s->header.features & QED_F_NEED_CHECK; + return 0; +} + +static int bdrv_qed_change_backing_file(BlockDriverState *bs, + const char *backing_file, + const char *backing_fmt) +{ + BDRVQEDState *s = bs->opaque; + QEDHeader new_header, le_header; + void *buffer; + size_t buffer_len, backing_file_len; + int ret; + + /* Refuse to set backing filename if unknown compat feature bits are + * active. If the image uses an unknown compat feature then we may not + * know the layout of data following the header structure and cannot safely + * add a new string. + */ + if (backing_file && (s->header.compat_features & + ~QED_COMPAT_FEATURE_MASK)) { + return -ENOTSUP; + } + + memcpy(&new_header, &s->header, sizeof(new_header)); + + new_header.features &= ~(QED_F_BACKING_FILE | + QED_F_BACKING_FORMAT_NO_PROBE); + + /* Adjust feature flags */ + if (backing_file) { + new_header.features |= QED_F_BACKING_FILE; + + if (qed_fmt_is_raw(backing_fmt)) { + new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE; + } + } + + /* Calculate new header size */ + backing_file_len = 0; + + if (backing_file) { + backing_file_len = strlen(backing_file); + } + + buffer_len = sizeof(new_header); + new_header.backing_filename_offset = buffer_len; + new_header.backing_filename_size = backing_file_len; + buffer_len += backing_file_len; + + /* Make sure we can rewrite header without failing */ + if (buffer_len > new_header.header_size * new_header.cluster_size) { + return -ENOSPC; + } + + /* Prepare new header */ + buffer = g_malloc(buffer_len); + + qed_header_cpu_to_le(&new_header, &le_header); + memcpy(buffer, &le_header, sizeof(le_header)); + buffer_len = sizeof(le_header); + + if (backing_file) { + memcpy(buffer + buffer_len, backing_file, backing_file_len); + buffer_len += backing_file_len; + } + + /* Write new header */ + ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len); + g_free(buffer); + if (ret == 0) { + memcpy(&s->header, &new_header, sizeof(new_header)); + } + return ret; +} + +static void coroutine_fn bdrv_qed_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVQEDState *s = bs->opaque; + Error *local_err = NULL; + int ret; + + bdrv_qed_close(bs); + + bdrv_qed_init_state(bs); + qemu_co_mutex_lock(&s->table_lock); + ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err); + qemu_co_mutex_unlock(&s->table_lock); + if (local_err) { + error_propagate_prepend(errp, local_err, + "Could not reopen qed layer: "); + return; + } else if (ret < 0) { + error_setg_errno(errp, -ret, "Could not reopen qed layer"); + return; + } +} + +static int coroutine_fn bdrv_qed_co_check(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQEDState *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->table_lock); + ret = qed_check(s, result, !!fix); + qemu_co_mutex_unlock(&s->table_lock); + + return ret; +} + +static QemuOptsList qed_create_opts = { + .name = "qed-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qed_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = QEMU_OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Image format of the base image" + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Cluster size (in bytes)", + .def_value_str = stringify(QED_DEFAULT_CLUSTER_SIZE) + }, + { + .name = BLOCK_OPT_TABLE_SIZE, + .type = QEMU_OPT_SIZE, + .help = "L1/L2 table size (in clusters)" + }, + { /* end of list */ } + } +}; + +static BlockDriver bdrv_qed = { + .format_name = "qed", + .instance_size = sizeof(BDRVQEDState), + .create_opts = &qed_create_opts, + .is_format = true, + .supports_backing = true, + + .bdrv_probe = bdrv_qed_probe, + .bdrv_open = bdrv_qed_open, + .bdrv_close = bdrv_qed_close, + .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_create = bdrv_qed_co_create, + .bdrv_co_create_opts = bdrv_qed_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_co_block_status = bdrv_qed_co_block_status, + .bdrv_co_readv = bdrv_qed_co_readv, + .bdrv_co_writev = bdrv_qed_co_writev, + .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, + .bdrv_co_truncate = bdrv_qed_co_truncate, + .bdrv_getlength = bdrv_qed_getlength, + .bdrv_get_info = bdrv_qed_get_info, + .bdrv_refresh_limits = bdrv_qed_refresh_limits, + .bdrv_change_backing_file = bdrv_qed_change_backing_file, + .bdrv_co_invalidate_cache = bdrv_qed_co_invalidate_cache, + .bdrv_co_check = bdrv_qed_co_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, + .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, +}; + +static void bdrv_qed_init(void) +{ + bdrv_register(&bdrv_qed); +} + +block_init(bdrv_qed_init); diff --git a/block/qed.h b/block/qed.h new file mode 100644 index 000000000..3d12bf78d --- /dev/null +++ b/block/qed.h @@ -0,0 +1,318 @@ +/* + * QEMU Enhanced Disk Format + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef BLOCK_QED_H +#define BLOCK_QED_H + +#include "block/block_int.h" +#include "qemu/cutils.h" + +/* The layout of a QED file is as follows: + * + * +--------+----------+----------+----------+-----+ + * | header | L1 table | cluster0 | cluster1 | ... | + * +--------+----------+----------+----------+-----+ + * + * There is a 2-level pagetable for cluster allocation: + * + * +----------+ + * | L1 table | + * +----------+ + * ,------' | '------. + * +----------+ | +----------+ + * | L2 table | ... | L2 table | + * +----------+ +----------+ + * ,------' | '------. + * +----------+ | +----------+ + * | Data | ... | Data | + * +----------+ +----------+ + * + * The L1 table is fixed size and always present. L2 tables are allocated on + * demand. The L1 table size determines the maximum possible image size; it + * can be influenced using the cluster_size and table_size values. + * + * All fields are little-endian on disk. + */ +#define QED_DEFAULT_CLUSTER_SIZE 65536 +enum { + QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24, + + /* The image supports a backing file */ + QED_F_BACKING_FILE = 0x01, + + /* The image needs a consistency check before use */ + QED_F_NEED_CHECK = 0x02, + + /* The backing file format must not be probed, treat as raw image */ + QED_F_BACKING_FORMAT_NO_PROBE = 0x04, + + /* Feature bits must be used when the on-disk format changes */ + QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */ + QED_F_NEED_CHECK | + QED_F_BACKING_FORMAT_NO_PROBE, + QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */ + QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */ + + /* Data is stored in groups of sectors called clusters. Cluster size must + * be large to avoid keeping too much metadata. I/O requests that have + * sub-cluster size will require read-modify-write. + */ + QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */ + QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024, + + /* Allocated clusters are tracked using a 2-level pagetable. Table size is + * a multiple of clusters so large maximum image sizes can be supported + * without jacking up the cluster size too much. + */ + QED_MIN_TABLE_SIZE = 1, /* in clusters */ + QED_MAX_TABLE_SIZE = 16, + QED_DEFAULT_TABLE_SIZE = 4, + + /* Delay to flush and clean image after last allocating write completes */ + QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */ +}; + +typedef struct { + uint32_t magic; /* QED\0 */ + + uint32_t cluster_size; /* in bytes */ + uint32_t table_size; /* for L1 and L2 tables, in clusters */ + uint32_t header_size; /* in clusters */ + + uint64_t features; /* format feature bits */ + uint64_t compat_features; /* compatible feature bits */ + uint64_t autoclear_features; /* self-resetting feature bits */ + + uint64_t l1_table_offset; /* in bytes */ + uint64_t image_size; /* total logical image size, in bytes */ + + /* if (features & QED_F_BACKING_FILE) */ + uint32_t backing_filename_offset; /* in bytes from start of header */ + uint32_t backing_filename_size; /* in bytes */ +} QEMU_PACKED QEDHeader; + +typedef struct { + uint64_t offsets[0]; /* in bytes */ +} QEDTable; + +/* The L2 cache is a simple write-through cache for L2 structures */ +typedef struct CachedL2Table { + QEDTable *table; + uint64_t offset; /* offset=0 indicates an invalidate entry */ + QTAILQ_ENTRY(CachedL2Table) node; + int ref; +} CachedL2Table; + +typedef struct { + QTAILQ_HEAD(, CachedL2Table) entries; + unsigned int n_entries; +} L2TableCache; + +typedef struct QEDRequest { + CachedL2Table *l2_table; +} QEDRequest; + +enum { + QED_AIOCB_WRITE = 0x0001, /* read or write? */ + QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */ +}; + +typedef struct QEDAIOCB { + BlockDriverState *bs; + QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */ + int flags; /* QED_AIOCB_* bits ORed together */ + uint64_t end_pos; /* request end on block device, in bytes */ + + /* User scatter-gather list */ + QEMUIOVector *qiov; + size_t qiov_offset; /* byte count already processed */ + + /* Current cluster scatter-gather list */ + QEMUIOVector cur_qiov; + uint64_t cur_pos; /* position on block device, in bytes */ + uint64_t cur_cluster; /* cluster offset in image file */ + unsigned int cur_nclusters; /* number of clusters being accessed */ + int find_cluster_ret; /* used for L1/L2 update */ + + QEDRequest request; +} QEDAIOCB; + +typedef struct { + BlockDriverState *bs; /* device */ + + /* Written only by an allocating write or the timer handler (the latter + * while allocating reqs are plugged). + */ + QEDHeader header; /* always cpu-endian */ + + /* Protected by table_lock. */ + CoMutex table_lock; + QEDTable *l1_table; + L2TableCache l2_cache; /* l2 table cache */ + uint32_t table_nelems; + uint32_t l1_shift; + uint32_t l2_shift; + uint32_t l2_mask; + uint64_t file_size; /* length of image file, in bytes */ + + /* Allocating write request queue */ + QEDAIOCB *allocating_acb; + CoQueue allocating_write_reqs; + bool allocating_write_reqs_plugged; + + /* Periodic flush and clear need check flag */ + QEMUTimer *need_check_timer; +} BDRVQEDState; + +enum { + QED_CLUSTER_FOUND, /* cluster found */ + QED_CLUSTER_ZERO, /* zero cluster found */ + QED_CLUSTER_L2, /* cluster missing in L2 */ + QED_CLUSTER_L1, /* cluster missing in L1 */ +}; + +/** + * Header functions + */ +int qed_write_header_sync(BDRVQEDState *s); + +/** + * L2 cache functions + */ +void qed_init_l2_cache(L2TableCache *l2_cache); +void qed_free_l2_cache(L2TableCache *l2_cache); +CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache); +void qed_unref_l2_cache_entry(CachedL2Table *entry); +CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset); +void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table); + +/** + * Table I/O functions + */ +int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s); +int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index, + unsigned int n); +int coroutine_fn qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, + unsigned int n); +int coroutine_fn qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, + uint64_t offset); +int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, + uint64_t offset); +int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, + bool flush); +int coroutine_fn qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, + bool flush); + +/** + * Cluster functions + */ +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, + uint64_t pos, size_t *len, + uint64_t *img_offset); + +/** + * Consistency check + */ +int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix); + +QEDTable *qed_alloc_table(BDRVQEDState *s); + +/** + * Round down to the start of a cluster + */ +static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset) +{ + return offset & ~(uint64_t)(s->header.cluster_size - 1); +} + +static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset) +{ + return offset & (s->header.cluster_size - 1); +} + +static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes) +{ + return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) / + (s->header.cluster_size - 1); +} + +static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos) +{ + return pos >> s->l1_shift; +} + +static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos) +{ + return (pos >> s->l2_shift) & s->l2_mask; +} + +/** + * Test if a cluster offset is valid + */ +static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset) +{ + uint64_t header_size = (uint64_t)s->header.header_size * + s->header.cluster_size; + + if (offset & (s->header.cluster_size - 1)) { + return false; + } + return offset >= header_size && offset < s->file_size; +} + +/** + * Test if a table offset is valid + */ +static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset) +{ + uint64_t end_offset = offset + (s->header.table_size - 1) * + s->header.cluster_size; + + /* Overflow check */ + if (end_offset <= offset) { + return false; + } + + return qed_check_cluster_offset(s, offset) && + qed_check_cluster_offset(s, end_offset); +} + +static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s, + uint64_t offset) +{ + if (qed_offset_into_cluster(s, offset)) { + return false; + } + return true; +} + +static inline bool qed_offset_is_unalloc_cluster(uint64_t offset) +{ + if (offset == 0) { + return true; + } + return false; +} + +static inline bool qed_offset_is_zero_cluster(uint64_t offset) +{ + if (offset == 1) { + return true; + } + return false; +} + +#endif /* BLOCK_QED_H */ diff --git a/block/quorum.c b/block/quorum.c new file mode 100644 index 000000000..b10fc2089 --- /dev/null +++ b/block/quorum.c @@ -0,0 +1,1227 @@ +/* + * Quorum Block filter + * + * Copyright (C) 2012-2014 Nodalink, EURL. + * + * Author: + * Benoît Canet + * + * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp) + * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc). + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "qapi/error.h" +#include "qapi/qapi-events-block.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qstring.h" +#include "crypto/hash.h" + +#define HASH_LENGTH 32 + +#define INDEXSTR_LEN 32 + +#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold" +#define QUORUM_OPT_BLKVERIFY "blkverify" +#define QUORUM_OPT_REWRITE "rewrite-corrupted" +#define QUORUM_OPT_READ_PATTERN "read-pattern" + +/* This union holds a vote hash value */ +typedef union QuorumVoteValue { + uint8_t h[HASH_LENGTH]; /* SHA-256 hash */ + int64_t l; /* simpler 64 bits hash */ +} QuorumVoteValue; + +/* A vote item */ +typedef struct QuorumVoteItem { + int index; + QLIST_ENTRY(QuorumVoteItem) next; +} QuorumVoteItem; + +/* this structure is a vote version. A version is the set of votes sharing the + * same vote value. + * The set of votes will be tracked with the items field and its cardinality is + * vote_count. + */ +typedef struct QuorumVoteVersion { + QuorumVoteValue value; + int index; + int vote_count; + QLIST_HEAD(, QuorumVoteItem) items; + QLIST_ENTRY(QuorumVoteVersion) next; +} QuorumVoteVersion; + +/* this structure holds a group of vote versions together */ +typedef struct QuorumVotes { + QLIST_HEAD(, QuorumVoteVersion) vote_list; + bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b); +} QuorumVotes; + +/* the following structure holds the state of one quorum instance */ +typedef struct BDRVQuorumState { + BdrvChild **children; /* children BlockDriverStates */ + int num_children; /* children count */ + unsigned next_child_index; /* the index of the next child that should + * be added + */ + int threshold; /* if less than threshold children reads gave the + * same result a quorum error occurs. + */ + bool is_blkverify; /* true if the driver is in blkverify mode + * Writes are mirrored on two children devices. + * On reads the two children devices' contents are + * compared and if a difference is spotted its + * location is printed and the code aborts. + * It is useful to debug other block drivers by + * comparing them with a reference one. + */ + bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted + * block if Quorum is reached. + */ + + QuorumReadPattern read_pattern; +} BDRVQuorumState; + +typedef struct QuorumAIOCB QuorumAIOCB; + +/* Quorum will create one instance of the following structure per operation it + * performs on its children. + * So for each read/write operation coming from the upper layer there will be + * $children_count QuorumChildRequest. + */ +typedef struct QuorumChildRequest { + BlockDriverState *bs; + QEMUIOVector qiov; + uint8_t *buf; + int ret; + QuorumAIOCB *parent; +} QuorumChildRequest; + +/* Quorum will use the following structure to track progress of each read/write + * operation received by the upper layer. + * This structure hold pointers to the QuorumChildRequest structures instances + * used to do operations on each children and track overall progress. + */ +struct QuorumAIOCB { + BlockDriverState *bs; + Coroutine *co; + + /* Request metadata */ + uint64_t offset; + uint64_t bytes; + int flags; + + QEMUIOVector *qiov; /* calling IOV */ + + QuorumChildRequest *qcrs; /* individual child requests */ + int count; /* number of completed AIOCB */ + int success_count; /* number of successfully completed AIOCB */ + + int rewrite_count; /* number of replica to rewrite: count down to + * zero once writes are fired + */ + + QuorumVotes votes; + + bool is_read; + int vote_ret; + int children_read; /* how many children have been read from */ +}; + +typedef struct QuorumCo { + QuorumAIOCB *acb; + int idx; +} QuorumCo; + +static void quorum_aio_finalize(QuorumAIOCB *acb) +{ + g_free(acb->qcrs); + g_free(acb); +} + +static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) +{ + return !memcmp(a->h, b->h, HASH_LENGTH); +} + +static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) +{ + return a->l == b->l; +} + +static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, + QEMUIOVector *qiov, + uint64_t offset, + uint64_t bytes, + int flags) +{ + BDRVQuorumState *s = bs->opaque; + QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); + int i; + + *acb = (QuorumAIOCB) { + .co = qemu_coroutine_self(), + .bs = bs, + .offset = offset, + .bytes = bytes, + .flags = flags, + .qiov = qiov, + .votes.compare = quorum_sha256_compare, + .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list), + }; + + acb->qcrs = g_new0(QuorumChildRequest, s->num_children); + for (i = 0; i < s->num_children; i++) { + acb->qcrs[i].buf = NULL; + acb->qcrs[i].ret = 0; + acb->qcrs[i].parent = acb; + } + + return acb; +} + +static void quorum_report_bad(QuorumOpType type, uint64_t offset, + uint64_t bytes, char *node_name, int ret) +{ + const char *msg = NULL; + int64_t start_sector = offset / BDRV_SECTOR_SIZE; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + + if (ret < 0) { + msg = strerror(-ret); + } + + qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector, + end_sector - start_sector); +} + +static void quorum_report_failure(QuorumAIOCB *acb) +{ + const char *reference = bdrv_get_device_or_node_name(acb->bs); + int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; + int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes, + BDRV_SECTOR_SIZE); + + qapi_event_send_quorum_failure(reference, start_sector, + end_sector - start_sector); +} + +static int quorum_vote_error(QuorumAIOCB *acb); + +static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->bs->opaque; + + if (acb->success_count < s->threshold) { + acb->vote_ret = quorum_vote_error(acb); + quorum_report_failure(acb); + return true; + } + + return false; +} + +static int read_fifo_child(QuorumAIOCB *acb); + +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) +{ + int i; + assert(dest->niov == source->niov); + assert(dest->size == source->size); + for (i = 0; i < source->niov; i++) { + assert(dest->iov[i].iov_len == source->iov[i].iov_len); + memcpy(dest->iov[i].iov_base, + source->iov[i].iov_base, + source->iov[i].iov_len); + } +} + +static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) +{ + QuorumAIOCB *acb = sacb->parent; + QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; + quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret); +} + +static void quorum_report_bad_versions(BDRVQuorumState *s, + QuorumAIOCB *acb, + QuorumVoteValue *value) +{ + QuorumVoteVersion *version; + QuorumVoteItem *item; + + QLIST_FOREACH(version, &acb->votes.vote_list, next) { + if (acb->votes.compare(&version->value, value)) { + continue; + } + QLIST_FOREACH(item, &version->items, next) { + quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes, + s->children[item->index]->bs->node_name, 0); + } + } +} + +static void quorum_rewrite_entry(void *opaque) +{ + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + + /* Ignore any errors, it's just a correction attempt for already + * corrupted data. + * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the + * area with different data from the other children. */ + bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes, + acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED); + + /* Wake up the caller after the last rewrite */ + acb->rewrite_count--; + if (!acb->rewrite_count) { + qemu_coroutine_enter_if_inactive(acb->co); + } +} + +static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb, + QuorumVoteValue *value) +{ + QuorumVoteVersion *version; + QuorumVoteItem *item; + int count = 0; + + /* first count the number of bad versions: done first to avoid concurrency + * issues. + */ + QLIST_FOREACH(version, &acb->votes.vote_list, next) { + if (acb->votes.compare(&version->value, value)) { + continue; + } + QLIST_FOREACH(item, &version->items, next) { + count++; + } + } + + /* quorum_rewrite_entry will count down this to zero */ + acb->rewrite_count = count; + + /* now fire the correcting rewrites */ + QLIST_FOREACH(version, &acb->votes.vote_list, next) { + if (acb->votes.compare(&version->value, value)) { + continue; + } + QLIST_FOREACH(item, &version->items, next) { + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = item->index, + }; + + co = qemu_coroutine_create(quorum_rewrite_entry, &data); + qemu_coroutine_enter(co); + } + } + + /* return true if any rewrite is done else false */ + return count; +} + +static void quorum_count_vote(QuorumVotes *votes, + QuorumVoteValue *value, + int index) +{ + QuorumVoteVersion *v = NULL, *version = NULL; + QuorumVoteItem *item; + + /* look if we have something with this hash */ + QLIST_FOREACH(v, &votes->vote_list, next) { + if (votes->compare(&v->value, value)) { + version = v; + break; + } + } + + /* It's a version not yet in the list add it */ + if (!version) { + version = g_new0(QuorumVoteVersion, 1); + QLIST_INIT(&version->items); + memcpy(&version->value, value, sizeof(version->value)); + version->index = index; + version->vote_count = 0; + QLIST_INSERT_HEAD(&votes->vote_list, version, next); + } + + version->vote_count++; + + item = g_new0(QuorumVoteItem, 1); + item->index = index; + QLIST_INSERT_HEAD(&version->items, item, next); +} + +static void quorum_free_vote_list(QuorumVotes *votes) +{ + QuorumVoteVersion *version, *next_version; + QuorumVoteItem *item, *next_item; + + QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) { + QLIST_REMOVE(version, next); + QLIST_FOREACH_SAFE(item, &version->items, next, next_item) { + QLIST_REMOVE(item, next); + g_free(item); + } + g_free(version); + } +} + +static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash) +{ + QEMUIOVector *qiov = &acb->qcrs[i].qiov; + size_t len = sizeof(hash->h); + uint8_t *data = hash->h; + + /* XXX - would be nice if we could pass in the Error ** + * and propagate that back, but this quorum code is + * restricted to just errno values currently */ + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, + qiov->iov, qiov->niov, + &data, &len, + NULL) < 0) { + return -EINVAL; + } + + return 0; +} + +static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes) +{ + int max = 0; + QuorumVoteVersion *candidate, *winner = NULL; + + QLIST_FOREACH(candidate, &votes->vote_list, next) { + if (candidate->vote_count > max) { + max = candidate->vote_count; + winner = candidate; + } + } + + return winner; +} + +/* qemu_iovec_compare is handy for blkverify mode because it returns the first + * differing byte location. Yet it is handcoded to compare vectors one byte + * after another so it does not benefit from the libc SIMD optimizations. + * quorum_iovec_compare is written for speed and should be used in the non + * blkverify mode of quorum. + */ +static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) +{ + int i; + int result; + + assert(a->niov == b->niov); + for (i = 0; i < a->niov; i++) { + assert(a->iov[i].iov_len == b->iov[i].iov_len); + result = memcmp(a->iov[i].iov_base, + b->iov[i].iov_base, + a->iov[i].iov_len); + if (result) { + return false; + } + } + + return true; +} + +static bool quorum_compare(QuorumAIOCB *acb, QEMUIOVector *a, QEMUIOVector *b) +{ + BDRVQuorumState *s = acb->bs->opaque; + ssize_t offset; + + /* This driver will replace blkverify in this particular case */ + if (s->is_blkverify) { + offset = qemu_iovec_compare(a, b); + if (offset != -1) { + fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 + " contents mismatch at offset %" PRIu64 "\n", + acb->offset, acb->bytes, acb->offset + offset); + exit(1); + } + return true; + } + + return quorum_iovec_compare(a, b); +} + +/* Do a vote to get the error code */ +static int quorum_vote_error(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->bs->opaque; + QuorumVoteVersion *winner = NULL; + QuorumVotes error_votes; + QuorumVoteValue result_value; + int i, ret = 0; + bool error = false; + + QLIST_INIT(&error_votes.vote_list); + error_votes.compare = quorum_64bits_compare; + + for (i = 0; i < s->num_children; i++) { + ret = acb->qcrs[i].ret; + if (ret) { + error = true; + result_value.l = ret; + quorum_count_vote(&error_votes, &result_value, i); + } + } + + if (error) { + winner = quorum_get_vote_winner(&error_votes); + ret = winner->value.l; + } + + quorum_free_vote_list(&error_votes); + + return ret; +} + +static void quorum_vote(QuorumAIOCB *acb) +{ + bool quorum = true; + int i, j, ret; + QuorumVoteValue hash; + BDRVQuorumState *s = acb->bs->opaque; + QuorumVoteVersion *winner; + + if (quorum_has_too_much_io_failed(acb)) { + return; + } + + /* get the index of the first successful read */ + for (i = 0; i < s->num_children; i++) { + if (!acb->qcrs[i].ret) { + break; + } + } + + assert(i < s->num_children); + + /* compare this read with all other successful reads stopping at quorum + * failure + */ + for (j = i + 1; j < s->num_children; j++) { + if (acb->qcrs[j].ret) { + continue; + } + quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov); + if (!quorum) { + break; + } + } + + /* Every successful read agrees */ + if (quorum) { + quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); + return; + } + + /* compute hashes for each successful read, also store indexes */ + for (i = 0; i < s->num_children; i++) { + if (acb->qcrs[i].ret) { + continue; + } + ret = quorum_compute_hash(acb, i, &hash); + /* if ever the hash computation failed */ + if (ret < 0) { + acb->vote_ret = ret; + goto free_exit; + } + quorum_count_vote(&acb->votes, &hash, i); + } + + /* vote to select the most represented version */ + winner = quorum_get_vote_winner(&acb->votes); + + /* if the winner count is smaller than threshold the read fails */ + if (winner->vote_count < s->threshold) { + quorum_report_failure(acb); + acb->vote_ret = -EIO; + goto free_exit; + } + + /* we have a winner: copy it */ + quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov); + + /* some versions are bad print them */ + quorum_report_bad_versions(s, acb, &winner->value); + + /* corruption correction is enabled */ + if (s->rewrite_corrupted) { + quorum_rewrite_bad_versions(acb, &winner->value); + } + +free_exit: + /* free lists */ + quorum_free_vote_list(&acb->votes); +} + +static void read_quorum_children_entry(void *opaque) +{ + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + int i = co->idx; + QuorumChildRequest *sacb = &acb->qcrs[i]; + + sacb->bs = s->children[i]->bs; + sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes, + &acb->qcrs[i].qiov, 0); + + if (sacb->ret == 0) { + acb->success_count++; + } else { + quorum_report_bad_acb(sacb, sacb->ret); + } + + acb->count++; + assert(acb->count <= s->num_children); + assert(acb->success_count <= s->num_children); + + /* Wake up the caller after the last read */ + if (acb->count == s->num_children) { + qemu_coroutine_enter_if_inactive(acb->co); + } +} + +static int read_quorum_children(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->bs->opaque; + int i; + + acb->children_read = s->num_children; + for (i = 0; i < s->num_children; i++) { + acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size); + qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); + qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf); + } + + for (i = 0; i < s->num_children; i++) { + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = i, + }; + + co = qemu_coroutine_create(read_quorum_children_entry, &data); + qemu_coroutine_enter(co); + } + + while (acb->count < s->num_children) { + qemu_coroutine_yield(); + } + + /* Do the vote on read */ + quorum_vote(acb); + for (i = 0; i < s->num_children; i++) { + qemu_vfree(acb->qcrs[i].buf); + qemu_iovec_destroy(&acb->qcrs[i].qiov); + } + + while (acb->rewrite_count) { + qemu_coroutine_yield(); + } + + return acb->vote_ret; +} + +static int read_fifo_child(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->bs->opaque; + int n, ret; + + /* We try to read the next child in FIFO order if we failed to read */ + do { + n = acb->children_read++; + acb->qcrs[n].bs = s->children[n]->bs; + ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, + acb->qiov, 0); + if (ret < 0) { + quorum_report_bad_acb(&acb->qcrs[n], ret); + } + } while (ret < 0 && acb->children_read < s->num_children); + + /* FIXME: rewrite failed children if acb->children_read > 1? */ + + return ret; +} + +static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + BDRVQuorumState *s = bs->opaque; + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); + int ret; + + acb->is_read = true; + acb->children_read = 0; + + if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { + ret = read_quorum_children(acb); + } else { + ret = read_fifo_child(acb); + } + quorum_aio_finalize(acb); + + return ret; +} + +static void write_quorum_entry(void *opaque) +{ + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + int i = co->idx; + QuorumChildRequest *sacb = &acb->qcrs[i]; + + sacb->bs = s->children[i]->bs; + sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, + acb->qiov, acb->flags); + if (sacb->ret == 0) { + acb->success_count++; + } else { + quorum_report_bad_acb(sacb, sacb->ret); + } + acb->count++; + assert(acb->count <= s->num_children); + assert(acb->success_count <= s->num_children); + + /* Wake up the caller after the last write */ + if (acb->count == s->num_children) { + qemu_coroutine_enter_if_inactive(acb->co); + } +} + +static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + BDRVQuorumState *s = bs->opaque; + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); + int i, ret; + + for (i = 0; i < s->num_children; i++) { + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = i, + }; + + co = qemu_coroutine_create(write_quorum_entry, &data); + qemu_coroutine_enter(co); + } + + while (acb->count < s->num_children) { + qemu_coroutine_yield(); + } + + quorum_has_too_much_io_failed(acb); + + ret = acb->vote_ret; + quorum_aio_finalize(acb); + + return ret; +} + +static int64_t quorum_getlength(BlockDriverState *bs) +{ + BDRVQuorumState *s = bs->opaque; + int64_t result; + int i; + + /* check that all file have the same length */ + result = bdrv_getlength(s->children[0]->bs); + if (result < 0) { + return result; + } + for (i = 1; i < s->num_children; i++) { + int64_t value = bdrv_getlength(s->children[i]->bs); + if (value < 0) { + return value; + } + if (value != result) { + return -EIO; + } + } + + return result; +} + +static coroutine_fn int quorum_co_flush(BlockDriverState *bs) +{ + BDRVQuorumState *s = bs->opaque; + QuorumVoteVersion *winner = NULL; + QuorumVotes error_votes; + QuorumVoteValue result_value; + int i; + int result = 0; + int success_count = 0; + + QLIST_INIT(&error_votes.vote_list); + error_votes.compare = quorum_64bits_compare; + + for (i = 0; i < s->num_children; i++) { + result = bdrv_co_flush(s->children[i]->bs); + if (result) { + quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, 0, + s->children[i]->bs->node_name, result); + result_value.l = result; + quorum_count_vote(&error_votes, &result_value, i); + } else { + success_count++; + } + } + + if (success_count >= s->threshold) { + result = 0; + } else { + winner = quorum_get_vote_winner(&error_votes); + result = winner->value.l; + } + quorum_free_vote_list(&error_votes); + + return result; +} + +static bool quorum_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ + BDRVQuorumState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_children; i++) { + /* + * We have no idea whether our children show the same data as + * this node (@bs). It is actually highly likely that + * @to_replace does not, because replacing a broken child is + * one of the main use cases here. + * + * We do know that the new BDS will match @bs, so replacing + * any of our children by it will be safe. It cannot change + * the data this quorum node presents to its parents. + * + * However, replacing @to_replace by @bs in any of our + * children's chains may change visible data somewhere in + * there. We therefore cannot recurse down those chains with + * bdrv_recurse_can_replace(). + * (More formally, bdrv_recurse_can_replace() requires that + * @to_replace will be replaced by something matching the @bs + * passed to it. We cannot guarantee that.) + * + * Thus, we can only check whether any of our immediate + * children matches @to_replace. + * + * (In the future, we might add a function to recurse down a + * chain that checks that nothing there cares about a change + * in data from the respective child in question. For + * example, most filters do not care when their child's data + * suddenly changes, as long as their parents do not care.) + */ + if (s->children[i]->bs == to_replace) { + /* + * We now have to ensure that there is no other parent + * that cares about replacing this child by a node with + * potentially different data. + * We do so by checking whether there are any other parents + * at all, which is stricter than necessary, but also very + * simple. (We may decide to implement something more + * complex and permissive when there is an actual need for + * it.) + */ + return QLIST_FIRST(&to_replace->parents) == s->children[i] && + QLIST_NEXT(s->children[i], next_parent) == NULL; + } + } + + return false; +} + +static int quorum_valid_threshold(int threshold, int num_children, Error **errp) +{ + + if (threshold < 1) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "vote-threshold", "value >= 1"); + return -ERANGE; + } + + if (threshold > num_children) { + error_setg(errp, "threshold may not exceed children count"); + return -ERANGE; + } + + return 0; +} + +static QemuOptsList quorum_runtime_opts = { + .name = "quorum", + .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head), + .desc = { + { + .name = QUORUM_OPT_VOTE_THRESHOLD, + .type = QEMU_OPT_NUMBER, + .help = "The number of vote needed for reaching quorum", + }, + { + .name = QUORUM_OPT_BLKVERIFY, + .type = QEMU_OPT_BOOL, + .help = "Trigger block verify mode if set", + }, + { + .name = QUORUM_OPT_REWRITE, + .type = QEMU_OPT_BOOL, + .help = "Rewrite corrupted block on read quorum", + }, + { + .name = QUORUM_OPT_READ_PATTERN, + .type = QEMU_OPT_STRING, + .help = "Allowed pattern: quorum, fifo. Quorum is default", + }, + { /* end of list */ } + }, +}; + +static int quorum_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVQuorumState *s = bs->opaque; + Error *local_err = NULL; + QemuOpts *opts = NULL; + const char *pattern_str; + bool *opened; + int i; + int ret = 0; + + qdict_flatten(options); + + /* count how many different children are present */ + s->num_children = qdict_array_entries(options, "children."); + if (s->num_children < 0) { + error_setg(errp, "Option children is not a valid array"); + ret = -EINVAL; + goto exit; + } + if (s->num_children < 1) { + error_setg(errp, "Number of provided children must be 1 or more"); + ret = -EINVAL; + goto exit; + } + + opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto exit; + } + + s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); + /* and validate it against s->num_children */ + ret = quorum_valid_threshold(s->threshold, s->num_children, errp); + if (ret < 0) { + goto exit; + } + + pattern_str = qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN); + if (!pattern_str) { + ret = QUORUM_READ_PATTERN_QUORUM; + } else { + ret = qapi_enum_parse(&QuorumReadPattern_lookup, pattern_str, + -EINVAL, NULL); + } + if (ret < 0) { + error_setg(errp, "Please set read-pattern as fifo or quorum"); + goto exit; + } + s->read_pattern = ret; + + if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { + s->is_blkverify = qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false); + if (s->is_blkverify && (s->num_children != 2 || s->threshold != 2)) { + error_setg(errp, "blkverify=on can only be set if there are " + "exactly two files and vote-threshold is 2"); + ret = -EINVAL; + goto exit; + } + + s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, + false); + if (s->rewrite_corrupted && s->is_blkverify) { + error_setg(errp, + "rewrite-corrupted=on cannot be used with blkverify=on"); + ret = -EINVAL; + goto exit; + } + } + + /* allocate the children array */ + s->children = g_new0(BdrvChild *, s->num_children); + opened = g_new0(bool, s->num_children); + + for (i = 0; i < s->num_children; i++) { + char indexstr[INDEXSTR_LEN]; + ret = snprintf(indexstr, INDEXSTR_LEN, "children.%d", i); + assert(ret < INDEXSTR_LEN); + + s->children[i] = bdrv_open_child(NULL, options, indexstr, bs, + &child_of_bds, BDRV_CHILD_DATA, false, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto close_exit; + } + + opened[i] = true; + } + s->next_child_index = s->num_children; + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + + g_free(opened); + goto exit; + +close_exit: + /* cleanup on error */ + for (i = 0; i < s->num_children; i++) { + if (!opened[i]) { + continue; + } + bdrv_unref_child(bs, s->children[i]); + } + g_free(s->children); + g_free(opened); +exit: + qemu_opts_del(opts); + return ret; +} + +static void quorum_close(BlockDriverState *bs) +{ + BDRVQuorumState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_children; i++) { + bdrv_unref_child(bs, s->children[i]); + } + + g_free(s->children); +} + +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, + Error **errp) +{ + BDRVQuorumState *s = bs->opaque; + BdrvChild *child; + char indexstr[INDEXSTR_LEN]; + int ret; + + if (s->is_blkverify) { + error_setg(errp, "Cannot add a child to a quorum in blkverify mode"); + return; + } + + assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); + if (s->num_children == INT_MAX / sizeof(BdrvChild *) || + s->next_child_index == UINT_MAX) { + error_setg(errp, "Too many children"); + return; + } + + ret = snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index); + if (ret < 0 || ret >= INDEXSTR_LEN) { + error_setg(errp, "cannot generate child name"); + return; + } + s->next_child_index++; + + bdrv_drained_begin(bs); + + /* We can safely add the child now */ + bdrv_ref(child_bs); + + child = bdrv_attach_child(bs, child_bs, indexstr, &child_of_bds, + BDRV_CHILD_DATA, errp); + if (child == NULL) { + s->next_child_index--; + goto out; + } + s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); + s->children[s->num_children++] = child; + +out: + bdrv_drained_end(bs); +} + +static void quorum_del_child(BlockDriverState *bs, BdrvChild *child, + Error **errp) +{ + BDRVQuorumState *s = bs->opaque; + char indexstr[INDEXSTR_LEN]; + int i; + + for (i = 0; i < s->num_children; i++) { + if (s->children[i] == child) { + break; + } + } + + /* we have checked it in bdrv_del_child() */ + assert(i < s->num_children); + + if (s->num_children <= s->threshold) { + error_setg(errp, + "The number of children cannot be lower than the vote threshold %d", + s->threshold); + return; + } + + /* We know now that num_children > threshold, so blkverify must be false */ + assert(!s->is_blkverify); + + snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index - 1); + if (!strncmp(child->name, indexstr, INDEXSTR_LEN)) { + s->next_child_index--; + } + + bdrv_drained_begin(bs); + + /* We can safely remove this child now */ + memmove(&s->children[i], &s->children[i + 1], + (s->num_children - i - 1) * sizeof(BdrvChild *)); + s->children = g_renew(BdrvChild *, s->children, --s->num_children); + bdrv_unref_child(bs, child); + + bdrv_drained_end(bs); +} + +static void quorum_gather_child_options(BlockDriverState *bs, QDict *target, + bool backing_overridden) +{ + BDRVQuorumState *s = bs->opaque; + QList *children_list; + int i; + + /* + * The generic implementation for gathering child options in + * bdrv_refresh_filename() would use the names of the children + * as specified for bdrv_open_child() or bdrv_attach_child(), + * which is "children.%u" with %u being a value + * (s->next_child_index) that is incremented each time a new child + * is added (and never decremented). Since children can be + * deleted at runtime, there may be gaps in that enumeration. + * When creating a new quorum BDS and specifying the children for + * it through runtime options, the enumeration used there may not + * have any gaps, though. + * + * Therefore, we have to create a new gap-less enumeration here + * (which we can achieve by simply putting all of the children's + * full_open_options into a QList). + * + * XXX: Note that there are issues with the current child option + * structure quorum uses (such as the fact that children do + * not really have unique permanent names). Therefore, this + * is going to have to change in the future and ideally we + * want quorum to be covered by the generic implementation. + */ + + children_list = qlist_new(); + qdict_put(target, "children", children_list); + + for (i = 0; i < s->num_children; i++) { + qlist_append(children_list, + qobject_ref(s->children[i]->bs->full_open_options)); + } +} + +static char *quorum_dirname(BlockDriverState *bs, Error **errp) +{ + /* In general, there are multiple BDSs with different dirnames below this + * one; so there is no unique dirname we could return (unless all are equal + * by chance, or there is only one). Therefore, to be consistent, just + * always return NULL. */ + error_setg(errp, "Cannot generate a base directory for quorum nodes"); + return NULL; +} + +static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVQuorumState *s = bs->opaque; + + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + if (s->rewrite_corrupted) { + *nperm |= BLK_PERM_WRITE; + } + + /* + * We cannot share RESIZE or WRITE, as this would make the + * children differ from each other. + */ + *nshared = (shared & (BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE_UNCHANGED)) + | DEFAULT_PERM_UNCHANGED; +} + +static const char *const quorum_strong_runtime_opts[] = { + QUORUM_OPT_VOTE_THRESHOLD, + QUORUM_OPT_BLKVERIFY, + QUORUM_OPT_REWRITE, + QUORUM_OPT_READ_PATTERN, + + NULL +}; + +static BlockDriver bdrv_quorum = { + .format_name = "quorum", + + .instance_size = sizeof(BDRVQuorumState), + + .bdrv_open = quorum_open, + .bdrv_close = quorum_close, + .bdrv_gather_child_options = quorum_gather_child_options, + .bdrv_dirname = quorum_dirname, + + .bdrv_co_flush_to_disk = quorum_co_flush, + + .bdrv_getlength = quorum_getlength, + + .bdrv_co_preadv = quorum_co_preadv, + .bdrv_co_pwritev = quorum_co_pwritev, + + .bdrv_add_child = quorum_add_child, + .bdrv_del_child = quorum_del_child, + + .bdrv_child_perm = quorum_child_perm, + + .bdrv_recurse_can_replace = quorum_recurse_can_replace, + + .strong_runtime_opts = quorum_strong_runtime_opts, +}; + +static void bdrv_quorum_init(void) +{ + if (!qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA256)) { + /* SHA256 hash support is required for quorum device */ + return; + } + bdrv_register(&bdrv_quorum); +} + +block_init(bdrv_quorum_init); diff --git a/block/raw-format.c b/block/raw-format.c new file mode 100644 index 000000000..42ec50802 --- /dev/null +++ b/block/raw-format.c @@ -0,0 +1,618 @@ +/* BlockDriver implementation for "raw" format driver + * + * Copyright (C) 2010-2016 Red Hat, Inc. + * Copyright (C) 2010, Blue Swirl + * Copyright (C) 2009, Anthony Liguori + * + * Author: + * Laszlo Ersek + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qemu/option.h" + +typedef struct BDRVRawState { + uint64_t offset; + uint64_t size; + bool has_size; +} BDRVRawState; + +static const char *const mutable_opts[] = { "offset", "size", NULL }; + +static QemuOptsList raw_runtime_opts = { + .name = "raw", + .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), + .desc = { + { + .name = "offset", + .type = QEMU_OPT_SIZE, + .help = "offset in the disk where the image starts", + }, + { + .name = "size", + .type = QEMU_OPT_SIZE, + .help = "virtual disk size", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList raw_create_opts = { + .name = "raw-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { /* end of list */ } + } +}; + +static int raw_read_options(QDict *options, uint64_t *offset, bool *has_size, + uint64_t *size, Error **errp) +{ + QemuOpts *opts = NULL; + int ret; + + opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto end; + } + + *offset = qemu_opt_get_size(opts, "offset", 0); + *has_size = qemu_opt_find(opts, "size"); + *size = qemu_opt_get_size(opts, "size", 0); + + ret = 0; +end: + qemu_opts_del(opts); + return ret; +} + +static int raw_apply_options(BlockDriverState *bs, BDRVRawState *s, + uint64_t offset, bool has_size, uint64_t size, + Error **errp) +{ + int64_t real_size = 0; + + real_size = bdrv_getlength(bs->file->bs); + if (real_size < 0) { + error_setg_errno(errp, -real_size, "Could not get image size"); + return real_size; + } + + /* Check size and offset */ + if (offset > real_size) { + error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than " + "size of the containing file (%" PRId64 ")", + s->offset, real_size); + return -EINVAL; + } + + if (has_size && (real_size - offset) < size) { + error_setg(errp, "The sum of offset (%" PRIu64 ") and size " + "(%" PRIu64 ") has to be smaller or equal to the " + " actual size of the containing file (%" PRId64 ")", + s->offset, s->size, real_size); + return -EINVAL; + } + + /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding + * up and leaking out of the specified area. */ + if (has_size && !QEMU_IS_ALIGNED(size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Specified size is not multiple of %llu", + BDRV_SECTOR_SIZE); + return -EINVAL; + } + + s->offset = offset; + s->has_size = has_size; + s->size = has_size ? size : real_size - offset; + + return 0; +} + +static int raw_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + bool has_size; + uint64_t offset, size; + int ret; + + assert(reopen_state != NULL); + assert(reopen_state->bs != NULL); + + reopen_state->opaque = g_new0(BDRVRawState, 1); + + ret = raw_read_options(reopen_state->options, &offset, &has_size, &size, + errp); + if (ret < 0) { + return ret; + } + + ret = raw_apply_options(reopen_state->bs, reopen_state->opaque, + offset, has_size, size, errp); + if (ret < 0) { + return ret; + } + + return 0; +} + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawState *new_s = state->opaque; + BDRVRawState *s = state->bs->opaque; + + memcpy(s, new_s, sizeof(BDRVRawState)); + + g_free(state->opaque); + state->opaque = NULL; +} + +static void raw_reopen_abort(BDRVReopenState *state) +{ + g_free(state->opaque); + state->opaque = NULL; +} + +/* Check and adjust the offset, against 'offset' and 'size' options. */ +static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, + uint64_t bytes, bool is_write) +{ + BDRVRawState *s = bs->opaque; + + if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) { + /* There's not enough space for the write, or the read request is + * out-of-range. Don't read/write anything to prevent leaking out of + * the size specified in options. */ + return is_write ? -ENOSPC : -EINVAL; + } + + if (*offset > INT64_MAX - s->offset) { + return -EINVAL; + } + *offset += s->offset; + + return 0; +} + +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + int ret; + + ret = raw_adjust_offset(bs, &offset, bytes, false); + if (ret) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + void *buf = NULL; + BlockDriver *drv; + QEMUIOVector local_qiov; + int ret; + + if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { + /* Handling partial writes would be a pain - so we just + * require that guests have 512-byte request alignment if + * probing occurred */ + QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512); + QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512); + assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE); + + buf = qemu_try_blockalign(bs->file->bs, 512); + if (!buf) { + ret = -ENOMEM; + goto fail; + } + + ret = qemu_iovec_to_buf(qiov, 0, buf, 512); + if (ret != 512) { + ret = -EINVAL; + goto fail; + } + + drv = bdrv_probe_all(buf, 512, NULL); + if (drv != bs->drv) { + ret = -EPERM; + goto fail; + } + + /* Use the checked buffer, a malicious guest might be overwriting its + * original buffer in the background. */ + qemu_iovec_init(&local_qiov, qiov->niov + 1); + qemu_iovec_add(&local_qiov, buf, 512); + qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512); + qiov = &local_qiov; + } + + ret = raw_adjust_offset(bs, &offset, bytes, true); + if (ret) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); + +fail: + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + qemu_vfree(buf); + return ret; +} + +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVRawState *s = bs->opaque; + *pnum = bytes; + *file = bs->file->bs; + *map = offset + s->offset; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; +} + +static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + int ret; + + ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); + if (ret) { + return ret; + } + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + +static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + int ret; + + ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); + if (ret) { + return ret; + } + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + +static int64_t raw_getlength(BlockDriverState *bs) +{ + int64_t len; + BDRVRawState *s = bs->opaque; + + /* Update size. It should not change unless the file was externally + * modified. */ + len = bdrv_getlength(bs->file->bs); + if (len < 0) { + return len; + } + + if (len < s->offset) { + s->size = 0; + } else { + if (s->has_size) { + /* Try to honour the size */ + s->size = MIN(s->size, len - s->offset); + } else { + s->size = len - s->offset; + } + } + + return s->size; +} + +static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp) +{ + BlockMeasureInfo *info; + int64_t required; + + if (in_bs) { + required = bdrv_getlength(in_bs); + if (required < 0) { + error_setg_errno(errp, -required, "Unable to get image size"); + return NULL; + } + } else { + required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + } + + info = g_new0(BlockMeasureInfo, 1); + info->required = required; + + /* Unallocated sectors count towards the file size in raw images */ + info->fully_allocated = info->required; + return info; +} + +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + return bdrv_get_info(bs->file->bs, bdi); +} + +static void raw_refresh_limits(BlockDriverState *bs, Error **errp) +{ + if (bs->probed) { + /* To make it easier to protect the first sector, any probed + * image is restricted to read-modify-write on sub-sector + * operations. */ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; + } +} + +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVRawState *s = bs->opaque; + + if (s->has_size) { + error_setg(errp, "Cannot resize fixed-size raw disks"); + return -ENOTSUP; + } + + if (INT64_MAX - offset < s->offset) { + error_setg(errp, "Disk size too large for the chosen offset"); + return -EINVAL; + } + + s->size = offset; + offset += s->offset; + return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); +} + +static void raw_eject(BlockDriverState *bs, bool eject_flag) +{ + bdrv_eject(bs->file->bs, eject_flag); +} + +static void raw_lock_medium(BlockDriverState *bs, bool locked) +{ + bdrv_lock_medium(bs->file->bs, locked); +} + +static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + BDRVRawState *s = bs->opaque; + if (s->offset || s->has_size) { + return -ENOTSUP; + } + return bdrv_co_ioctl(bs->file->bs, req, buf); +} + +static int raw_has_zero_init(BlockDriverState *bs) +{ + return bdrv_has_zero_init(bs->file->bs); +} + +static int coroutine_fn raw_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + return bdrv_create_file(filename, opts, errp); +} + +static int raw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + bool has_size; + uint64_t offset, size; + BdrvChildRole file_role; + int ret; + + ret = raw_read_options(options, &offset, &has_size, &size, errp); + if (ret < 0) { + return ret; + } + + /* + * Without offset and a size limit, this driver behaves very much + * like a filter. With any such limit, it does not. + */ + if (offset || has_size) { + file_role = BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY; + } else { + file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; + } + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + file_role, false, errp); + if (!bs->file) { + return -EINVAL; + } + + bs->sg = bs->file->bs->sg; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & + BDRV_REQ_ZERO_WRITE; + + if (bs->probed && !bdrv_is_read_only(bs)) { + bdrv_refresh_filename(bs->file->bs); + fprintf(stderr, + "WARNING: Image format was not specified for '%s' and probing " + "guessed raw.\n" + " Automatically detecting the format is dangerous for " + "raw images, write operations on block 0 will be restricted.\n" + " Specify the 'raw' format explicitly to remove the " + "restrictions.\n", + bs->file->bs->filename); + } + + ret = raw_apply_options(bs, s, offset, has_size, size, errp); + if (ret < 0) { + return ret; + } + + if (bs->sg && (s->offset || s->has_size)) { + error_setg(errp, "Cannot use offset/size with SCSI generic devices"); + return -EINVAL; + } + + return 0; +} + +static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + /* smallest possible positive score so that raw is used if and only if no + * other block driver works + */ + return 1; +} + +static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = bdrv_probe_blocksizes(bs->file->bs, bsz); + if (ret < 0) { + return ret; + } + + if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { + return -ENOTSUP; + } + + return 0; +} + +static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + BDRVRawState *s = bs->opaque; + if (s->offset || s->has_size) { + return -ENOTSUP; + } + return bdrv_probe_geometry(bs->file->bs, geo); +} + +static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + int ret; + + ret = raw_adjust_offset(bs, &src_offset, bytes, false); + if (ret) { + return ret; + } + return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, + bytes, read_flags, write_flags); +} + +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) +{ + int ret; + + ret = raw_adjust_offset(bs, &dst_offset, bytes, true); + if (ret) { + return ret; + } + return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, + read_flags, write_flags); +} + +static const char *const raw_strong_runtime_opts[] = { + "offset", + "size", + + NULL +}; + +BlockDriver bdrv_raw = { + .format_name = "raw", + .instance_size = sizeof(BDRVRawState), + .bdrv_probe = &raw_probe, + .bdrv_reopen_prepare = &raw_reopen_prepare, + .bdrv_reopen_commit = &raw_reopen_commit, + .bdrv_reopen_abort = &raw_reopen_abort, + .bdrv_open = &raw_open, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_create_opts = &raw_co_create_opts, + .bdrv_co_preadv = &raw_co_preadv, + .bdrv_co_pwritev = &raw_co_pwritev, + .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, + .bdrv_co_pdiscard = &raw_co_pdiscard, + .bdrv_co_block_status = &raw_co_block_status, + .bdrv_co_copy_range_from = &raw_co_copy_range_from, + .bdrv_co_copy_range_to = &raw_co_copy_range_to, + .bdrv_co_truncate = &raw_co_truncate, + .bdrv_getlength = &raw_getlength, + .is_format = true, + .has_variable_length = true, + .bdrv_measure = &raw_measure, + .bdrv_get_info = &raw_get_info, + .bdrv_refresh_limits = &raw_refresh_limits, + .bdrv_probe_blocksizes = &raw_probe_blocksizes, + .bdrv_probe_geometry = &raw_probe_geometry, + .bdrv_eject = &raw_eject, + .bdrv_lock_medium = &raw_lock_medium, + .bdrv_co_ioctl = &raw_co_ioctl, + .create_opts = &raw_create_opts, + .bdrv_has_zero_init = &raw_has_zero_init, + .strong_runtime_opts = raw_strong_runtime_opts, + .mutable_opts = mutable_opts, +}; + +static void bdrv_raw_init(void) +{ + bdrv_register(&bdrv_raw); +} + +block_init(bdrv_raw_init); diff --git a/block/rbd.c b/block/rbd.c new file mode 100644 index 000000000..318e2826f --- /dev/null +++ b/block/rbd.c @@ -0,0 +1,1309 @@ +/* + * QEMU Block driver for RADOS (Ceph) + * + * Copyright (C) 2010-2011 Christian Brunner , + * Josh Durgin + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" + +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "crypto/secret.h" +#include "qemu/cutils.h" +#include "sysemu/replay.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" + +/* + * When specifying the image filename use: + * + * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]] + * + * poolname must be the name of an existing rados pool. + * + * devicename is the name of the rbd image. + * + * Each option given is used to configure rados, and may be any valid + * Ceph option, "id", or "conf". + * + * The "id" option indicates what user we should authenticate as to + * the Ceph cluster. If it is excluded we will use the Ceph default + * (normally 'admin'). + * + * The "conf" option specifies a Ceph configuration file to read. If + * it is not specified, we will read from the default Ceph locations + * (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration + * file, specify conf=/dev/null. + * + * Configuration values containing :, @, or = can be escaped with a + * leading "\". + */ + +/* rbd_aio_discard added in 0.1.2 */ +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) +#define LIBRBD_SUPPORTS_DISCARD +#else +#undef LIBRBD_SUPPORTS_DISCARD +#endif + +#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) + +#define RBD_MAX_SNAPS 100 + +/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */ +#ifdef LIBRBD_SUPPORTS_IOVEC +#define LIBRBD_USE_IOVEC 1 +#else +#define LIBRBD_USE_IOVEC 0 +#endif + +typedef enum { + RBD_AIO_READ, + RBD_AIO_WRITE, + RBD_AIO_DISCARD, + RBD_AIO_FLUSH +} RBDAIOCmd; + +typedef struct RBDAIOCB { + BlockAIOCB common; + int64_t ret; + QEMUIOVector *qiov; + char *bounce; + RBDAIOCmd cmd; + int error; + struct BDRVRBDState *s; +} RBDAIOCB; + +typedef struct RADOSCB { + RBDAIOCB *acb; + struct BDRVRBDState *s; + int64_t size; + char *buf; + int64_t ret; +} RADOSCB; + +typedef struct BDRVRBDState { + rados_t cluster; + rados_ioctx_t io_ctx; + rbd_image_t image; + char *image_name; + char *snap; + char *namespace; + uint64_t image_size; +} BDRVRBDState; + +static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, + BlockdevOptionsRbd *opts, bool cache, + const char *keypairs, const char *secretid, + Error **errp); + +static char *qemu_rbd_next_tok(char *src, char delim, char **p) +{ + char *end; + + *p = NULL; + + for (end = src; *end; ++end) { + if (*end == delim) { + break; + } + if (*end == '\\' && end[1] != '\0') { + end++; + } + } + if (*end == delim) { + *p = end + 1; + *end = '\0'; + } + return src; +} + +static void qemu_rbd_unescape(char *src) +{ + char *p; + + for (p = src; *src; ++src, ++p) { + if (*src == '\\' && src[1] != '\0') { + src++; + } + *p = *src; + } + *p = '\0'; +} + +static void qemu_rbd_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + const char *start; + char *p, *buf; + QList *keypairs = NULL; + char *found_str, *image_name; + + if (!strstart(filename, "rbd:", &start)) { + error_setg(errp, "File name must start with 'rbd:'"); + return; + } + + buf = g_strdup(start); + p = buf; + + found_str = qemu_rbd_next_tok(p, '/', &p); + if (!p) { + error_setg(errp, "Pool name is required"); + goto done; + } + qemu_rbd_unescape(found_str); + qdict_put_str(options, "pool", found_str); + + if (strchr(p, '@')) { + image_name = qemu_rbd_next_tok(p, '@', &p); + + found_str = qemu_rbd_next_tok(p, ':', &p); + qemu_rbd_unescape(found_str); + qdict_put_str(options, "snapshot", found_str); + } else { + image_name = qemu_rbd_next_tok(p, ':', &p); + } + /* Check for namespace in the image_name */ + if (strchr(image_name, '/')) { + found_str = qemu_rbd_next_tok(image_name, '/', &image_name); + qemu_rbd_unescape(found_str); + qdict_put_str(options, "namespace", found_str); + } else { + qdict_put_str(options, "namespace", ""); + } + qemu_rbd_unescape(image_name); + qdict_put_str(options, "image", image_name); + if (!p) { + goto done; + } + + /* The following are essentially all key/value pairs, and we treat + * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */ + while (p) { + char *name, *value; + name = qemu_rbd_next_tok(p, '=', &p); + if (!p) { + error_setg(errp, "conf option %s has no value", name); + break; + } + + qemu_rbd_unescape(name); + + value = qemu_rbd_next_tok(p, ':', &p); + qemu_rbd_unescape(value); + + if (!strcmp(name, "conf")) { + qdict_put_str(options, "conf", value); + } else if (!strcmp(name, "id")) { + qdict_put_str(options, "user", value); + } else { + /* + * We pass these internally to qemu_rbd_set_keypairs(), so + * we can get away with the simpler list of [ "key1", + * "value1", "key2", "value2" ] rather than a raw dict + * { "key1": "value1", "key2": "value2" } where we can't + * guarantee order, or even a more correct but complex + * [ { "key1": "value1" }, { "key2": "value2" } ] + */ + if (!keypairs) { + keypairs = qlist_new(); + } + qlist_append_str(keypairs, name); + qlist_append_str(keypairs, value); + } + } + + if (keypairs) { + qdict_put(options, "=keyvalue-pairs", + qobject_to_json(QOBJECT(keypairs))); + } + +done: + g_free(buf); + qobject_unref(keypairs); + return; +} + + +static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* XXX Does RBD support AIO on less than 512-byte alignment? */ + bs->bl.request_alignment = 512; +} + + +static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts, + Error **errp) +{ + char *key, *acr; + int r; + GString *accu; + RbdAuthModeList *auth; + + if (opts->key_secret) { + key = qcrypto_secret_lookup_as_base64(opts->key_secret, errp); + if (!key) { + return -EIO; + } + r = rados_conf_set(cluster, "key", key); + g_free(key); + if (r < 0) { + error_setg_errno(errp, -r, "Could not set 'key'"); + return r; + } + } + + if (opts->has_auth_client_required) { + accu = g_string_new(""); + for (auth = opts->auth_client_required; auth; auth = auth->next) { + if (accu->str[0]) { + g_string_append_c(accu, ';'); + } + g_string_append(accu, RbdAuthMode_str(auth->value)); + } + acr = g_string_free(accu, FALSE); + r = rados_conf_set(cluster, "auth_client_required", acr); + g_free(acr); + if (r < 0) { + error_setg_errno(errp, -r, + "Could not set 'auth_client_required'"); + return r; + } + } + + return 0; +} + +static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json, + Error **errp) +{ + QList *keypairs; + QString *name; + QString *value; + const char *key; + size_t remaining; + int ret = 0; + + if (!keypairs_json) { + return ret; + } + keypairs = qobject_to(QList, + qobject_from_json(keypairs_json, &error_abort)); + remaining = qlist_size(keypairs) / 2; + assert(remaining); + + while (remaining--) { + name = qobject_to(QString, qlist_pop(keypairs)); + value = qobject_to(QString, qlist_pop(keypairs)); + assert(name && value); + key = qstring_get_str(name); + + ret = rados_conf_set(cluster, key, qstring_get_str(value)); + qobject_unref(value); + if (ret < 0) { + error_setg_errno(errp, -ret, "invalid conf option %s", key); + qobject_unref(name); + ret = -EINVAL; + break; + } + qobject_unref(name); + } + + qobject_unref(keypairs); + return ret; +} + +static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs) +{ + if (LIBRBD_USE_IOVEC) { + RBDAIOCB *acb = rcb->acb; + iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0, + acb->qiov->size - offs); + } else { + memset(rcb->buf + offs, 0, rcb->size - offs); + } +} + +/* FIXME Deprecate and remove keypairs or make it available in QMP. */ +static int qemu_rbd_do_create(BlockdevCreateOptions *options, + const char *keypairs, const char *password_secret, + Error **errp) +{ + BlockdevCreateOptionsRbd *opts = &options->u.rbd; + rados_t cluster; + rados_ioctx_t io_ctx; + int obj_order = 0; + int ret; + + assert(options->driver == BLOCKDEV_DRIVER_RBD); + if (opts->location->has_snapshot) { + error_setg(errp, "Can't use snapshot name for image creation"); + return -EINVAL; + } + + if (opts->has_cluster_size) { + int64_t objsize = opts->cluster_size; + if ((objsize - 1) & objsize) { /* not a power of 2? */ + error_setg(errp, "obj size needs to be power of 2"); + return -EINVAL; + } + if (objsize < 4096) { + error_setg(errp, "obj size too small"); + return -EINVAL; + } + obj_order = ctz32(objsize); + } + + ret = qemu_rbd_connect(&cluster, &io_ctx, opts->location, false, keypairs, + password_secret, errp); + if (ret < 0) { + return ret; + } + + ret = rbd_create(io_ctx, opts->location->image, opts->size, &obj_order); + if (ret < 0) { + error_setg_errno(errp, -ret, "error rbd create"); + goto out; + } + + ret = 0; +out: + rados_ioctx_destroy(io_ctx); + rados_shutdown(cluster); + return ret; +} + +static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) +{ + return qemu_rbd_do_create(options, NULL, NULL, errp); +} + +static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options; + BlockdevCreateOptionsRbd *rbd_opts; + BlockdevOptionsRbd *loc; + Error *local_err = NULL; + const char *keypairs, *password_secret; + QDict *options = NULL; + int ret = 0; + + create_options = g_new0(BlockdevCreateOptions, 1); + create_options->driver = BLOCKDEV_DRIVER_RBD; + rbd_opts = &create_options->u.rbd; + + rbd_opts->location = g_new0(BlockdevOptionsRbd, 1); + + password_secret = qemu_opt_get(opts, "password-secret"); + + /* Read out options */ + rbd_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + rbd_opts->cluster_size = qemu_opt_get_size_del(opts, + BLOCK_OPT_CLUSTER_SIZE, 0); + rbd_opts->has_cluster_size = (rbd_opts->cluster_size != 0); + + options = qdict_new(); + qemu_rbd_parse_filename(filename, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto exit; + } + + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from -blockdev + * or blockdev_add, its members are typed according to the QAPI + * schema, but when they come from -drive, they're all QString. + */ + loc = rbd_opts->location; + loc->pool = g_strdup(qdict_get_try_str(options, "pool")); + loc->conf = g_strdup(qdict_get_try_str(options, "conf")); + loc->has_conf = !!loc->conf; + loc->user = g_strdup(qdict_get_try_str(options, "user")); + loc->has_user = !!loc->user; + loc->q_namespace = g_strdup(qdict_get_try_str(options, "namespace")); + loc->has_q_namespace = !!loc->q_namespace; + loc->image = g_strdup(qdict_get_try_str(options, "image")); + keypairs = qdict_get_try_str(options, "=keyvalue-pairs"); + + ret = qemu_rbd_do_create(create_options, keypairs, password_secret, errp); + if (ret < 0) { + goto exit; + } + +exit: + qobject_unref(options); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +/* + * This aio completion is being called from rbd_finish_bh() and runs in qemu + * BH context. + */ +static void qemu_rbd_complete_aio(RADOSCB *rcb) +{ + RBDAIOCB *acb = rcb->acb; + int64_t r; + + r = rcb->ret; + + if (acb->cmd != RBD_AIO_READ) { + if (r < 0) { + acb->ret = r; + acb->error = 1; + } else if (!acb->error) { + acb->ret = rcb->size; + } + } else { + if (r < 0) { + qemu_rbd_memset(rcb, 0); + acb->ret = r; + acb->error = 1; + } else if (r < rcb->size) { + qemu_rbd_memset(rcb, r); + if (!acb->error) { + acb->ret = rcb->size; + } + } else if (!acb->error) { + acb->ret = r; + } + } + + g_free(rcb); + + if (!LIBRBD_USE_IOVEC) { + if (acb->cmd == RBD_AIO_READ) { + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); + } + qemu_vfree(acb->bounce); + } + + acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); + + qemu_aio_unref(acb); +} + +static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp) +{ + const char **vals; + const char *host, *port; + char *rados_str; + InetSocketAddressBaseList *p; + int i, cnt; + + if (!opts->has_server) { + return NULL; + } + + for (cnt = 0, p = opts->server; p; p = p->next) { + cnt++; + } + + vals = g_new(const char *, cnt + 1); + + for (i = 0, p = opts->server; p; p = p->next, i++) { + host = p->value->host; + port = p->value->port; + + if (strchr(host, ':')) { + vals[i] = g_strdup_printf("[%s]:%s", host, port); + } else { + vals[i] = g_strdup_printf("%s:%s", host, port); + } + } + vals[i] = NULL; + + rados_str = i ? g_strjoinv(";", (char **)vals) : NULL; + g_strfreev((char **)vals); + return rados_str; +} + +static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, + BlockdevOptionsRbd *opts, bool cache, + const char *keypairs, const char *secretid, + Error **errp) +{ + char *mon_host = NULL; + Error *local_err = NULL; + int r; + + if (secretid) { + if (opts->key_secret) { + error_setg(errp, + "Legacy 'password-secret' clashes with 'key-secret'"); + return -EINVAL; + } + opts->key_secret = g_strdup(secretid); + opts->has_key_secret = true; + } + + mon_host = qemu_rbd_mon_host(opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + r = -EINVAL; + goto out; + } + + r = rados_create(cluster, opts->user); + if (r < 0) { + error_setg_errno(errp, -r, "error initializing"); + goto out; + } + + /* try default location when conf=NULL, but ignore failure */ + r = rados_conf_read_file(*cluster, opts->conf); + if (opts->has_conf && r < 0) { + error_setg_errno(errp, -r, "error reading conf file %s", opts->conf); + goto failed_shutdown; + } + + r = qemu_rbd_set_keypairs(*cluster, keypairs, errp); + if (r < 0) { + goto failed_shutdown; + } + + if (mon_host) { + r = rados_conf_set(*cluster, "mon_host", mon_host); + if (r < 0) { + goto failed_shutdown; + } + } + + r = qemu_rbd_set_auth(*cluster, opts, errp); + if (r < 0) { + goto failed_shutdown; + } + + /* + * Fallback to more conservative semantics if setting cache + * options fails. Ignore errors from setting rbd_cache because the + * only possible error is that the option does not exist, and + * librbd defaults to no caching. If write through caching cannot + * be set up, fall back to no caching. + */ + if (cache) { + rados_conf_set(*cluster, "rbd_cache", "true"); + } else { + rados_conf_set(*cluster, "rbd_cache", "false"); + } + + r = rados_connect(*cluster); + if (r < 0) { + error_setg_errno(errp, -r, "error connecting"); + goto failed_shutdown; + } + + r = rados_ioctx_create(*cluster, opts->pool, io_ctx); + if (r < 0) { + error_setg_errno(errp, -r, "error opening pool %s", opts->pool); + goto failed_shutdown; + } + /* + * Set the namespace after opening the io context on the pool, + * if nspace == NULL or if nspace == "", it is just as we did nothing + */ + rados_ioctx_set_namespace(*io_ctx, opts->q_namespace); + + r = 0; + goto out; + +failed_shutdown: + rados_shutdown(*cluster); +out: + g_free(mon_host); + return r; +} + +static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts, + Error **errp) +{ + Visitor *v; + + /* Convert the remaining options into a QAPI object */ + v = qobject_input_visitor_new_flat_confused(options, errp); + if (!v) { + return -EINVAL; + } + + visit_type_BlockdevOptionsRbd(v, NULL, opts, errp); + visit_free(v); + if (!opts) { + return -EINVAL; + } + + return 0; +} + +static int qemu_rbd_attempt_legacy_options(QDict *options, + BlockdevOptionsRbd **opts, + char **keypairs) +{ + char *filename; + int r; + + filename = g_strdup(qdict_get_try_str(options, "filename")); + if (!filename) { + return -EINVAL; + } + qdict_del(options, "filename"); + + qemu_rbd_parse_filename(filename, options, NULL); + + /* keypairs freed by caller */ + *keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs")); + if (*keypairs) { + qdict_del(options, "=keyvalue-pairs"); + } + + r = qemu_rbd_convert_options(options, opts, NULL); + + g_free(filename); + return r; +} + +static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVRBDState *s = bs->opaque; + BlockdevOptionsRbd *opts = NULL; + const QDictEntry *e; + Error *local_err = NULL; + char *keypairs, *secretid; + int r; + + keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs")); + if (keypairs) { + qdict_del(options, "=keyvalue-pairs"); + } + + secretid = g_strdup(qdict_get_try_str(options, "password-secret")); + if (secretid) { + qdict_del(options, "password-secret"); + } + + r = qemu_rbd_convert_options(options, &opts, &local_err); + if (local_err) { + /* If keypairs are present, that means some options are present in + * the modern option format. Don't attempt to parse legacy option + * formats, as we won't support mixed usage. */ + if (keypairs) { + error_propagate(errp, local_err); + goto out; + } + + /* If the initial attempt to convert and process the options failed, + * we may be attempting to open an image file that has the rbd options + * specified in the older format consisting of all key/value pairs + * encoded in the filename. Go ahead and attempt to parse the + * filename, and see if we can pull out the required options. */ + r = qemu_rbd_attempt_legacy_options(options, &opts, &keypairs); + if (r < 0) { + /* Propagate the original error, not the legacy parsing fallback + * error, as the latter was just a best-effort attempt. */ + error_propagate(errp, local_err); + goto out; + } + /* Take care whenever deciding to actually deprecate; once this ability + * is removed, we will not be able to open any images with legacy-styled + * backing image strings. */ + warn_report("RBD options encoded in the filename as keyvalue pairs " + "is deprecated"); + } + + /* Remove the processed options from the QDict (the visitor processes + * _all_ options in the QDict) */ + while ((e = qdict_first(options))) { + qdict_del(options, e->key); + } + + r = qemu_rbd_connect(&s->cluster, &s->io_ctx, opts, + !(flags & BDRV_O_NOCACHE), keypairs, secretid, errp); + if (r < 0) { + goto out; + } + + s->snap = g_strdup(opts->snapshot); + s->image_name = g_strdup(opts->image); + + /* rbd_open is always r/w */ + r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap); + if (r < 0) { + error_setg_errno(errp, -r, "error reading header from %s", + s->image_name); + goto failed_open; + } + + r = rbd_get_size(s->image, &s->image_size); + if (r < 0) { + error_setg_errno(errp, -r, "error getting image size from %s", + s->image_name); + rbd_close(s->image); + goto failed_open; + } + + /* If we are using an rbd snapshot, we must be r/o, otherwise + * leave as-is */ + if (s->snap != NULL) { + r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp); + if (r < 0) { + rbd_close(s->image); + goto failed_open; + } + } + + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + + r = 0; + goto out; + +failed_open: + rados_ioctx_destroy(s->io_ctx); + g_free(s->snap); + g_free(s->image_name); + rados_shutdown(s->cluster); +out: + qapi_free_BlockdevOptionsRbd(opts); + g_free(keypairs); + g_free(secretid); + return r; +} + + +/* Since RBD is currently always opened R/W via the API, + * we just need to check if we are using a snapshot or not, in + * order to determine if we will allow it to be R/W */ +static int qemu_rbd_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRBDState *s = state->bs->opaque; + int ret = 0; + + if (s->snap && state->flags & BDRV_O_RDWR) { + error_setg(errp, + "Cannot change node '%s' to r/w when using RBD snapshot", + bdrv_get_device_or_node_name(state->bs)); + ret = -EINVAL; + } + + return ret; +} + +static void qemu_rbd_close(BlockDriverState *bs) +{ + BDRVRBDState *s = bs->opaque; + + rbd_close(s->image); + rados_ioctx_destroy(s->io_ctx); + g_free(s->snap); + g_free(s->image_name); + rados_shutdown(s->cluster); +} + +/* Resize the RBD image and update the 'image_size' with the current size */ +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) +{ + BDRVRBDState *s = bs->opaque; + int r; + + r = rbd_resize(s->image, size); + if (r < 0) { + return r; + } + + s->image_size = size; + + return 0; +} + +static const AIOCBInfo rbd_aiocb_info = { + .aiocb_size = sizeof(RBDAIOCB), +}; + +static void rbd_finish_bh(void *opaque) +{ + RADOSCB *rcb = opaque; + qemu_rbd_complete_aio(rcb); +} + +/* + * This is the callback function for rbd_aio_read and _write + * + * Note: this function is being called from a non qemu thread so + * we need to be careful about what we do here. Generally we only + * schedule a BH, and do the rest of the io completion handling + * from rbd_finish_bh() which runs in a qemu context. + */ +static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) +{ + RBDAIOCB *acb = rcb->acb; + + rcb->ret = rbd_aio_get_return_value(c); + rbd_aio_release(c); + + replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs), + rbd_finish_bh, rcb); +} + +static int rbd_aio_discard_wrapper(rbd_image_t image, + uint64_t off, + uint64_t len, + rbd_completion_t comp) +{ +#ifdef LIBRBD_SUPPORTS_DISCARD + return rbd_aio_discard(image, off, len, comp); +#else + return -ENOTSUP; +#endif +} + +static int rbd_aio_flush_wrapper(rbd_image_t image, + rbd_completion_t comp) +{ +#ifdef LIBRBD_SUPPORTS_AIO_FLUSH + return rbd_aio_flush(image, comp); +#else + return -ENOTSUP; +#endif +} + +static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, + int64_t off, + QEMUIOVector *qiov, + int64_t size, + BlockCompletionFunc *cb, + void *opaque, + RBDAIOCmd cmd) +{ + RBDAIOCB *acb; + RADOSCB *rcb = NULL; + rbd_completion_t c; + int r; + + BDRVRBDState *s = bs->opaque; + + acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque); + acb->cmd = cmd; + acb->qiov = qiov; + assert(!qiov || qiov->size == size); + + rcb = g_new(RADOSCB, 1); + + if (!LIBRBD_USE_IOVEC) { + if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { + acb->bounce = NULL; + } else { + acb->bounce = qemu_try_blockalign(bs, qiov->size); + if (acb->bounce == NULL) { + goto failed; + } + } + if (cmd == RBD_AIO_WRITE) { + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); + } + rcb->buf = acb->bounce; + } + + acb->ret = 0; + acb->error = 0; + acb->s = s; + + rcb->acb = acb; + rcb->s = acb->s; + rcb->size = size; + r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); + if (r < 0) { + goto failed; + } + + switch (cmd) { + case RBD_AIO_WRITE: { + /* + * RBD APIs don't allow us to write more than actual size, so in order + * to support growing images, we resize the image before write + * operations that exceed the current size. + */ + if (off + size > s->image_size) { + r = qemu_rbd_resize(bs, off + size); + if (r < 0) { + goto failed_completion; + } + } +#ifdef LIBRBD_SUPPORTS_IOVEC + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); +#else + r = rbd_aio_write(s->image, off, size, rcb->buf, c); +#endif + break; + } + case RBD_AIO_READ: +#ifdef LIBRBD_SUPPORTS_IOVEC + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); +#else + r = rbd_aio_read(s->image, off, size, rcb->buf, c); +#endif + break; + case RBD_AIO_DISCARD: + r = rbd_aio_discard_wrapper(s->image, off, size, c); + break; + case RBD_AIO_FLUSH: + r = rbd_aio_flush_wrapper(s->image, c); + break; + default: + r = -EINVAL; + } + + if (r < 0) { + goto failed_completion; + } + return &acb->common; + +failed_completion: + rbd_aio_release(c); +failed: + g_free(rcb); + if (!LIBRBD_USE_IOVEC) { + qemu_vfree(acb->bounce); + } + + qemu_aio_unref(acb); + return NULL; +} + +static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, + void *opaque) +{ + return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque, + RBD_AIO_READ); +} + +static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, + void *opaque) +{ + return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque, + RBD_AIO_WRITE); +} + +#ifdef LIBRBD_SUPPORTS_AIO_FLUSH +static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs, + BlockCompletionFunc *cb, + void *opaque) +{ + return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH); +} + +#else + +static int qemu_rbd_co_flush(BlockDriverState *bs) +{ +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) + /* rbd_flush added in 0.1.1 */ + BDRVRBDState *s = bs->opaque; + return rbd_flush(s->image); +#else + return 0; +#endif +} +#endif + +static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVRBDState *s = bs->opaque; + rbd_image_info_t info; + int r; + + r = rbd_stat(s->image, &info, sizeof(info)); + if (r < 0) { + return r; + } + + bdi->cluster_size = info.obj_size; + return 0; +} + +static int64_t qemu_rbd_getlength(BlockDriverState *bs) +{ + BDRVRBDState *s = bs->opaque; + rbd_image_info_t info; + int r; + + r = rbd_stat(s->image, &info, sizeof(info)); + if (r < 0) { + return r; + } + + return info.size; +} + +static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, + BdrvRequestFlags flags, + Error **errp) +{ + int r; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + r = qemu_rbd_resize(bs, offset); + if (r < 0) { + error_setg_errno(errp, -r, "Failed to resize file"); + return r; + } + + return 0; +} + +static int qemu_rbd_snap_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info) +{ + BDRVRBDState *s = bs->opaque; + int r; + + if (sn_info->name[0] == '\0') { + return -EINVAL; /* we need a name for rbd snapshots */ + } + + /* + * rbd snapshots are using the name as the user controlled unique identifier + * we can't use the rbd snapid for that purpose, as it can't be set + */ + if (sn_info->id_str[0] != '\0' && + strcmp(sn_info->id_str, sn_info->name) != 0) { + return -EINVAL; + } + + if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) { + return -ERANGE; + } + + r = rbd_snap_create(s->image, sn_info->name); + if (r < 0) { + error_report("failed to create snap: %s", strerror(-r)); + return r; + } + + return 0; +} + +static int qemu_rbd_snap_remove(BlockDriverState *bs, + const char *snapshot_id, + const char *snapshot_name, + Error **errp) +{ + BDRVRBDState *s = bs->opaque; + int r; + + if (!snapshot_name) { + error_setg(errp, "rbd need a valid snapshot name"); + return -EINVAL; + } + + /* If snapshot_id is specified, it must be equal to name, see + qemu_rbd_snap_list() */ + if (snapshot_id && strcmp(snapshot_id, snapshot_name)) { + error_setg(errp, + "rbd do not support snapshot id, it should be NULL or " + "equal to snapshot name"); + return -EINVAL; + } + + r = rbd_snap_remove(s->image, snapshot_name); + if (r < 0) { + error_setg_errno(errp, -r, "Failed to remove the snapshot"); + } + return r; +} + +static int qemu_rbd_snap_rollback(BlockDriverState *bs, + const char *snapshot_name) +{ + BDRVRBDState *s = bs->opaque; + + return rbd_snap_rollback(s->image, snapshot_name); +} + +static int qemu_rbd_snap_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_tab) +{ + BDRVRBDState *s = bs->opaque; + QEMUSnapshotInfo *sn_info, *sn_tab = NULL; + int i, snap_count; + rbd_snap_info_t *snaps; + int max_snaps = RBD_MAX_SNAPS; + + do { + snaps = g_new(rbd_snap_info_t, max_snaps); + snap_count = rbd_snap_list(s->image, snaps, &max_snaps); + if (snap_count <= 0) { + g_free(snaps); + } + } while (snap_count == -ERANGE); + + if (snap_count <= 0) { + goto done; + } + + sn_tab = g_new0(QEMUSnapshotInfo, snap_count); + + for (i = 0; i < snap_count; i++) { + const char *snap_name = snaps[i].name; + + sn_info = sn_tab + i; + pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name); + pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name); + + sn_info->vm_state_size = snaps[i].size; + sn_info->date_sec = 0; + sn_info->date_nsec = 0; + sn_info->vm_clock_nsec = 0; + } + rbd_snap_list_end(snaps); + g_free(snaps); + + done: + *psn_tab = sn_tab; + return snap_count; +} + +#ifdef LIBRBD_SUPPORTS_DISCARD +static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs, + int64_t offset, + int bytes, + BlockCompletionFunc *cb, + void *opaque) +{ + return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque, + RBD_AIO_DISCARD); +} +#endif + +#ifdef LIBRBD_SUPPORTS_INVALIDATE +static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVRBDState *s = bs->opaque; + int r = rbd_invalidate_cache(s->image); + if (r < 0) { + error_setg_errno(errp, -r, "Failed to invalidate the cache"); + } +} +#endif + +static QemuOptsList qemu_rbd_create_opts = { + .name = "rbd-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "RBD object size" + }, + { + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", + }, + { /* end of list */ } + } +}; + +static const char *const qemu_rbd_strong_runtime_opts[] = { + "pool", + "namespace", + "image", + "conf", + "snapshot", + "user", + "server.", + "password-secret", + + NULL +}; + +static BlockDriver bdrv_rbd = { + .format_name = "rbd", + .instance_size = sizeof(BDRVRBDState), + .bdrv_parse_filename = qemu_rbd_parse_filename, + .bdrv_refresh_limits = qemu_rbd_refresh_limits, + .bdrv_file_open = qemu_rbd_open, + .bdrv_close = qemu_rbd_close, + .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, + .bdrv_co_create = qemu_rbd_co_create, + .bdrv_co_create_opts = qemu_rbd_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_get_info = qemu_rbd_getinfo, + .create_opts = &qemu_rbd_create_opts, + .bdrv_getlength = qemu_rbd_getlength, + .bdrv_co_truncate = qemu_rbd_co_truncate, + .protocol_name = "rbd", + + .bdrv_aio_preadv = qemu_rbd_aio_preadv, + .bdrv_aio_pwritev = qemu_rbd_aio_pwritev, + +#ifdef LIBRBD_SUPPORTS_AIO_FLUSH + .bdrv_aio_flush = qemu_rbd_aio_flush, +#else + .bdrv_co_flush_to_disk = qemu_rbd_co_flush, +#endif + +#ifdef LIBRBD_SUPPORTS_DISCARD + .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard, +#endif + + .bdrv_snapshot_create = qemu_rbd_snap_create, + .bdrv_snapshot_delete = qemu_rbd_snap_remove, + .bdrv_snapshot_list = qemu_rbd_snap_list, + .bdrv_snapshot_goto = qemu_rbd_snap_rollback, +#ifdef LIBRBD_SUPPORTS_INVALIDATE + .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache, +#endif + + .strong_runtime_opts = qemu_rbd_strong_runtime_opts, +}; + +static void bdrv_rbd_init(void) +{ + bdrv_register(&bdrv_rbd); +} + +block_init(bdrv_rbd_init); diff --git a/block/replication.c b/block/replication.c new file mode 100644 index 000000000..0c7021578 --- /dev/null +++ b/block/replication.c @@ -0,0 +1,748 @@ +/* + * Replication Block filter + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2016 Intel Corporation + * Copyright (c) 2016 FUJITSU LIMITED + * + * Author: + * Wen Congyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "block/nbd.h" +#include "block/blockjob.h" +#include "block/block_int.h" +#include "block/block_backup.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "replication.h" + +typedef enum { + BLOCK_REPLICATION_NONE, /* block replication is not started */ + BLOCK_REPLICATION_RUNNING, /* block replication is running */ + BLOCK_REPLICATION_FAILOVER, /* failover is running in background */ + BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */ + BLOCK_REPLICATION_DONE, /* block replication is done */ +} ReplicationStage; + +typedef struct BDRVReplicationState { + ReplicationMode mode; + ReplicationStage stage; + BdrvChild *active_disk; + BlockJob *commit_job; + BdrvChild *hidden_disk; + BdrvChild *secondary_disk; + BlockJob *backup_job; + char *top_id; + ReplicationState *rs; + Error *blocker; + bool orig_hidden_read_only; + bool orig_secondary_read_only; + int error; +} BDRVReplicationState; + +static void replication_start(ReplicationState *rs, ReplicationMode mode, + Error **errp); +static void replication_do_checkpoint(ReplicationState *rs, Error **errp); +static void replication_get_error(ReplicationState *rs, Error **errp); +static void replication_stop(ReplicationState *rs, bool failover, + Error **errp); + +#define REPLICATION_MODE "mode" +#define REPLICATION_TOP_ID "top-id" +static QemuOptsList replication_runtime_opts = { + .name = "replication", + .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head), + .desc = { + { + .name = REPLICATION_MODE, + .type = QEMU_OPT_STRING, + }, + { + .name = REPLICATION_TOP_ID, + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static ReplicationOps replication_ops = { + .start = replication_start, + .checkpoint = replication_do_checkpoint, + .get_error = replication_get_error, + .stop = replication_stop, +}; + +static int replication_open(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + int ret; + BDRVReplicationState *s = bs->opaque; + QemuOpts *opts = NULL; + const char *mode; + const char *top_id; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + ret = -EINVAL; + opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto fail; + } + + mode = qemu_opt_get(opts, REPLICATION_MODE); + if (!mode) { + error_setg(errp, "Missing the option mode"); + goto fail; + } + + if (!strcmp(mode, "primary")) { + s->mode = REPLICATION_MODE_PRIMARY; + top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); + if (top_id) { + error_setg(errp, + "The primary side does not support option top-id"); + goto fail; + } + } else if (!strcmp(mode, "secondary")) { + s->mode = REPLICATION_MODE_SECONDARY; + top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); + s->top_id = g_strdup(top_id); + if (!s->top_id) { + error_setg(errp, "Missing the option top-id"); + goto fail; + } + } else { + error_setg(errp, + "The option mode's value should be primary or secondary"); + goto fail; + } + + s->rs = replication_new(bs, &replication_ops); + + ret = 0; + +fail: + qemu_opts_del(opts); + return ret; +} + +static void replication_close(BlockDriverState *bs) +{ + BDRVReplicationState *s = bs->opaque; + Job *commit_job; + + if (s->stage == BLOCK_REPLICATION_RUNNING) { + replication_stop(s->rs, false, NULL); + } + if (s->stage == BLOCK_REPLICATION_FAILOVER) { + commit_job = &s->commit_job->job; + assert(commit_job->aio_context == qemu_get_current_aio_context()); + job_cancel_sync(commit_job); + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { + g_free(s->top_id); + } + + replication_remove(s->rs); +} + +static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = BLK_PERM_CONSISTENT_READ; + if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) { + *nperm |= BLK_PERM_WRITE; + } + *nshared = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED; + return; +} + +static int64_t replication_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static int replication_get_io_status(BDRVReplicationState *s) +{ + switch (s->stage) { + case BLOCK_REPLICATION_NONE: + return -EIO; + case BLOCK_REPLICATION_RUNNING: + return 0; + case BLOCK_REPLICATION_FAILOVER: + return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0; + case BLOCK_REPLICATION_FAILOVER_FAILED: + return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1; + case BLOCK_REPLICATION_DONE: + /* + * active commit job completes, and active disk and secondary_disk + * is swapped, so we can operate bs->file directly + */ + return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0; + default: + abort(); + } +} + +static int replication_return_value(BDRVReplicationState *s, int ret) +{ + if (s->mode == REPLICATION_MODE_SECONDARY) { + return ret; + } + + if (ret < 0) { + s->error = ret; + ret = 0; + } + + return ret; +} + +static coroutine_fn int replication_co_readv(BlockDriverState *bs, + int64_t sector_num, + int remaining_sectors, + QEMUIOVector *qiov) +{ + BDRVReplicationState *s = bs->opaque; + int ret; + + if (s->mode == REPLICATION_MODE_PRIMARY) { + /* We only use it to forward primary write requests */ + return -EIO; + } + + ret = replication_get_io_status(s); + if (ret < 0) { + return ret; + } + + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); + + return replication_return_value(s, ret); +} + +static coroutine_fn int replication_co_writev(BlockDriverState *bs, + int64_t sector_num, + int remaining_sectors, + QEMUIOVector *qiov, + int flags) +{ + BDRVReplicationState *s = bs->opaque; + QEMUIOVector hd_qiov; + uint64_t bytes_done = 0; + BdrvChild *top = bs->file; + BdrvChild *base = s->secondary_disk; + BdrvChild *target; + int ret; + int64_t n; + + assert(!flags); + ret = replication_get_io_status(s); + if (ret < 0) { + goto out; + } + + if (ret == 0) { + ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); + return replication_return_value(s, ret); + } + + /* + * Failover failed, only write to active disk if the sectors + * have already been allocated in active disk/hidden disk. + */ + qemu_iovec_init(&hd_qiov, qiov->niov); + while (remaining_sectors > 0) { + int64_t count; + + ret = bdrv_is_allocated_above(top->bs, base->bs, false, + sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, + &count); + if (ret < 0) { + goto out1; + } + + assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); + n = count >> BDRV_SECTOR_BITS; + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count); + + target = ret ? top : base; + ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE, + n * BDRV_SECTOR_SIZE, &hd_qiov, 0); + if (ret < 0) { + goto out1; + } + + remaining_sectors -= n; + sector_num += n; + bytes_done += count; + } + +out1: + qemu_iovec_destroy(&hd_qiov); +out: + return ret; +} + +static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) +{ + Error *local_err = NULL; + int ret; + + if (!s->backup_job) { + error_setg(errp, "Backup job was cancelled unexpectedly"); + return; + } + + backup_do_checkpoint(s->backup_job, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (!s->active_disk->bs->drv) { + error_setg(errp, "Active disk %s is ejected", + s->active_disk->bs->node_name); + return; + } + + ret = bdrv_make_empty(s->active_disk, errp); + if (ret < 0) { + return; + } + + if (!s->hidden_disk->bs->drv) { + error_setg(errp, "Hidden disk %s is ejected", + s->hidden_disk->bs->node_name); + return; + } + + BlockBackend *blk = blk_new(qemu_get_current_aio_context(), + BLK_PERM_WRITE, BLK_PERM_ALL); + blk_insert_bs(blk, s->hidden_disk->bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_unref(blk); + return; + } + + ret = blk_make_empty(blk, errp); + blk_unref(blk); + if (ret < 0) { + return; + } +} + +/* This function is supposed to be called twice: + * first with writable = true, then with writable = false. + * The first call puts s->hidden_disk and s->secondary_disk in + * r/w mode, and the second puts them back in their original state. + */ +static void reopen_backing_file(BlockDriverState *bs, bool writable, + Error **errp) +{ + BDRVReplicationState *s = bs->opaque; + BlockReopenQueue *reopen_queue = NULL; + + if (writable) { + s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs); + s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs); + } + + bdrv_subtree_drained_begin(s->hidden_disk->bs); + bdrv_subtree_drained_begin(s->secondary_disk->bs); + + if (s->orig_hidden_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); + reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, + opts, true); + } + + if (s->orig_secondary_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); + reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs, + opts, true); + } + + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, errp); + } + + bdrv_subtree_drained_end(s->hidden_disk->bs); + bdrv_subtree_drained_end(s->secondary_disk->bs); +} + +static void backup_job_cleanup(BlockDriverState *bs) +{ + BDRVReplicationState *s = bs->opaque; + BlockDriverState *top_bs; + + s->backup_job = NULL; + + top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL); + if (!top_bs) { + return; + } + bdrv_op_unblock_all(top_bs, s->blocker); + error_free(s->blocker); + reopen_backing_file(bs, false, NULL); +} + +static void backup_job_completed(void *opaque, int ret) +{ + BlockDriverState *bs = opaque; + BDRVReplicationState *s = bs->opaque; + + if (s->stage != BLOCK_REPLICATION_FAILOVER) { + /* The backup job is cancelled unexpectedly */ + s->error = -EIO; + } + + backup_job_cleanup(bs); +} + +static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs) +{ + BdrvChild *child; + + /* The bs itself is the top_bs */ + if (top_bs == bs) { + return true; + } + + /* Iterate over top_bs's children */ + QLIST_FOREACH(child, &top_bs->children, next) { + if (child->bs == bs || check_top_bs(child->bs, bs)) { + return true; + } + } + + return false; +} + +static void replication_start(ReplicationState *rs, ReplicationMode mode, + Error **errp) +{ + BlockDriverState *bs = rs->opaque; + BDRVReplicationState *s; + BlockDriverState *top_bs; + int64_t active_length, hidden_length, disk_length; + AioContext *aio_context; + Error *local_err = NULL; + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { + /* + * This case happens when a secondary is promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ + aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is running or done"); + aio_context_release(aio_context); + return; + } + + if (s->mode != mode) { + error_setg(errp, "The parameter mode's value is invalid, needs %d," + " but got %d", s->mode, mode); + aio_context_release(aio_context); + return; + } + + switch (s->mode) { + case REPLICATION_MODE_PRIMARY: + break; + case REPLICATION_MODE_SECONDARY: + s->active_disk = bs->file; + if (!s->active_disk || !s->active_disk->bs || + !s->active_disk->bs->backing) { + error_setg(errp, "Active disk doesn't have backing file"); + aio_context_release(aio_context); + return; + } + + s->hidden_disk = s->active_disk->bs->backing; + if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) { + error_setg(errp, "Hidden disk doesn't have backing file"); + aio_context_release(aio_context); + return; + } + + s->secondary_disk = s->hidden_disk->bs->backing; + if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) { + error_setg(errp, "The secondary disk doesn't have block backend"); + aio_context_release(aio_context); + return; + } + + /* verify the length */ + active_length = bdrv_getlength(s->active_disk->bs); + hidden_length = bdrv_getlength(s->hidden_disk->bs); + disk_length = bdrv_getlength(s->secondary_disk->bs); + if (active_length < 0 || hidden_length < 0 || disk_length < 0 || + active_length != hidden_length || hidden_length != disk_length) { + error_setg(errp, "Active disk, hidden disk, secondary disk's length" + " are not the same"); + aio_context_release(aio_context); + return; + } + + /* Must be true, or the bdrv_getlength() calls would have failed */ + assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv); + + if (!s->active_disk->bs->drv->bdrv_make_empty || + !s->hidden_disk->bs->drv->bdrv_make_empty) { + error_setg(errp, + "Active disk or hidden disk doesn't support make_empty"); + aio_context_release(aio_context); + return; + } + + /* reopen the backing file in r/w mode */ + reopen_backing_file(bs, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + aio_context_release(aio_context); + return; + } + + /* start backup job now */ + error_setg(&s->blocker, + "Block device is in use by internal backup job"); + + top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL); + if (!top_bs || !bdrv_is_root_node(top_bs) || + !check_top_bs(top_bs, bs)) { + error_setg(errp, "No top_bs or it is invalid"); + reopen_backing_file(bs, false, NULL); + aio_context_release(aio_context); + return; + } + bdrv_op_block_all(top_bs, s->blocker); + bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); + + s->backup_job = backup_job_create( + NULL, s->secondary_disk->bs, s->hidden_disk->bs, + 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL, + BLOCKDEV_ON_ERROR_REPORT, + BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, + backup_job_completed, bs, NULL, &local_err); + if (local_err) { + error_propagate(errp, local_err); + backup_job_cleanup(bs); + aio_context_release(aio_context); + return; + } + job_start(&s->backup_job->job); + break; + default: + aio_context_release(aio_context); + abort(); + } + + s->stage = BLOCK_REPLICATION_RUNNING; + + if (s->mode == REPLICATION_MODE_SECONDARY) { + secondary_do_checkpoint(s, errp); + } + + s->error = 0; + aio_context_release(aio_context); +} + +static void replication_do_checkpoint(ReplicationState *rs, Error **errp) +{ + BlockDriverState *bs = rs->opaque; + BDRVReplicationState *s; + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { + /* + * This case happens when a secondary was promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ + aio_context_release(aio_context); + return; + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { + secondary_do_checkpoint(s, errp); + } + aio_context_release(aio_context); +} + +static void replication_get_error(ReplicationState *rs, Error **errp) +{ + BlockDriverState *bs = rs->opaque; + BDRVReplicationState *s; + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is not running"); + aio_context_release(aio_context); + return; + } + + if (s->error) { + error_setg(errp, "I/O error occurred"); + aio_context_release(aio_context); + return; + } + aio_context_release(aio_context); +} + +static void replication_done(void *opaque, int ret) +{ + BlockDriverState *bs = opaque; + BDRVReplicationState *s = bs->opaque; + + if (ret == 0) { + s->stage = BLOCK_REPLICATION_DONE; + + s->active_disk = NULL; + s->secondary_disk = NULL; + s->hidden_disk = NULL; + s->error = 0; + } else { + s->stage = BLOCK_REPLICATION_FAILOVER_FAILED; + s->error = -EIO; + } +} + +static void replication_stop(ReplicationState *rs, bool failover, Error **errp) +{ + BlockDriverState *bs = rs->opaque; + BDRVReplicationState *s; + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { + /* + * This case happens when a secondary was promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ + aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_RUNNING) { + error_setg(errp, "Block replication is not running"); + aio_context_release(aio_context); + return; + } + + switch (s->mode) { + case REPLICATION_MODE_PRIMARY: + s->stage = BLOCK_REPLICATION_DONE; + s->error = 0; + break; + case REPLICATION_MODE_SECONDARY: + /* + * This BDS will be closed, and the job should be completed + * before the BDS is closed, because we will access hidden + * disk, secondary disk in backup_job_completed(). + */ + if (s->backup_job) { + job_cancel_sync(&s->backup_job->job); + } + + if (!failover) { + secondary_do_checkpoint(s, errp); + s->stage = BLOCK_REPLICATION_DONE; + aio_context_release(aio_context); + return; + } + + s->stage = BLOCK_REPLICATION_FAILOVER; + s->commit_job = commit_active_start( + NULL, s->active_disk->bs, s->secondary_disk->bs, + JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, + NULL, replication_done, bs, true, errp); + break; + default: + aio_context_release(aio_context); + abort(); + } + aio_context_release(aio_context); +} + +static const char *const replication_strong_runtime_opts[] = { + REPLICATION_MODE, + REPLICATION_TOP_ID, + + NULL +}; + +static BlockDriver bdrv_replication = { + .format_name = "replication", + .instance_size = sizeof(BDRVReplicationState), + + .bdrv_open = replication_open, + .bdrv_close = replication_close, + .bdrv_child_perm = replication_child_perm, + + .bdrv_getlength = replication_getlength, + .bdrv_co_readv = replication_co_readv, + .bdrv_co_writev = replication_co_writev, + + .is_filter = true, + + .has_variable_length = true, + .strong_runtime_opts = replication_strong_runtime_opts, +}; + +static void bdrv_replication_init(void) +{ + bdrv_register(&bdrv_replication); +} + +block_init(bdrv_replication_init); diff --git a/block/sheepdog.c b/block/sheepdog.c new file mode 100644 index 000000000..a45c73826 --- /dev/null +++ b/block/sheepdog.c @@ -0,0 +1,3356 @@ +/* + * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" +#include "qemu/uri.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/bitops.h" +#include "qemu/cutils.h" +#include "trace.h" + +#define SD_PROTO_VER 0x01 + +#define SD_DEFAULT_ADDR "localhost" +#define SD_DEFAULT_PORT 7000 + +#define SD_OP_CREATE_AND_WRITE_OBJ 0x01 +#define SD_OP_READ_OBJ 0x02 +#define SD_OP_WRITE_OBJ 0x03 +/* 0x04 is used internally by Sheepdog */ + +#define SD_OP_NEW_VDI 0x11 +#define SD_OP_LOCK_VDI 0x12 +#define SD_OP_RELEASE_VDI 0x13 +#define SD_OP_GET_VDI_INFO 0x14 +#define SD_OP_READ_VDIS 0x15 +#define SD_OP_FLUSH_VDI 0x16 +#define SD_OP_DEL_VDI 0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 + +#define SD_FLAG_CMD_WRITE 0x01 +#define SD_FLAG_CMD_COW 0x02 +#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ +#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ + +#define SD_RES_SUCCESS 0x00 /* Success */ +#define SD_RES_UNKNOWN 0x01 /* Unknown error */ +#define SD_RES_NO_OBJ 0x02 /* No object found */ +#define SD_RES_EIO 0x03 /* I/O error */ +#define SD_RES_VDI_EXIST 0x04 /* Vdi exists already */ +#define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */ +#define SD_RES_SYSTEM_ERROR 0x06 /* System error */ +#define SD_RES_VDI_LOCKED 0x07 /* Vdi is locked */ +#define SD_RES_NO_VDI 0x08 /* No vdi found */ +#define SD_RES_NO_BASE_VDI 0x09 /* No base vdi found */ +#define SD_RES_VDI_READ 0x0A /* Cannot read requested vdi */ +#define SD_RES_VDI_WRITE 0x0B /* Cannot write requested vdi */ +#define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */ +#define SD_RES_BASE_VDI_WRITE 0x0D /* Cannot write base vdi */ +#define SD_RES_NO_TAG 0x0E /* Requested tag is not found */ +#define SD_RES_STARTUP 0x0F /* Sheepdog is on starting up */ +#define SD_RES_VDI_NOT_LOCKED 0x10 /* Vdi is not locked */ +#define SD_RES_SHUTDOWN 0x11 /* Sheepdog is shutting down */ +#define SD_RES_NO_MEM 0x12 /* Cannot allocate memory */ +#define SD_RES_FULL_VDI 0x13 /* we already have the maximum vdis */ +#define SD_RES_VER_MISMATCH 0x14 /* Protocol version mismatch */ +#define SD_RES_NO_SPACE 0x15 /* Server has no room for new objects */ +#define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */ +#define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */ +#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ +#define SD_RES_HALT 0x19 /* Sheepdog is stopped serving IO request */ +#define SD_RES_READONLY 0x1A /* Object is read-only */ + +/* + * Object ID rules + * + * 0 - 19 (20 bits): data object space + * 20 - 31 (12 bits): reserved data object space + * 32 - 55 (24 bits): vdi object space + * 56 - 59 ( 4 bits): reserved vdi object space + * 60 - 63 ( 4 bits): object type identifier space + */ + +#define VDI_SPACE_SHIFT 32 +#define VDI_BIT (UINT64_C(1) << 63) +#define VMSTATE_BIT (UINT64_C(1) << 62) +#define MAX_DATA_OBJS (UINT64_C(1) << 20) +#define MAX_CHILDREN 1024 +#define SD_MAX_VDI_LEN 256 +#define SD_MAX_VDI_TAG_LEN 256 +#define SD_NR_VDIS (1U << 24) +#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) +#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22 +/* + * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and + * (SD_EC_MAX_STRIP - 1) for parity strips + * + * SD_MAX_COPIES is sum of number of data strips and parity strips. + */ +#define SD_EC_MAX_STRIP 16 +#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1) + +#define SD_INODE_SIZE (sizeof(SheepdogInode)) +#define CURRENT_VDI_ID 0 + +#define LOCK_TYPE_NORMAL 0 +#define LOCK_TYPE_SHARED 1 /* for iSCSI multipath */ + +typedef struct SheepdogReq { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t opcode_specific[8]; +} SheepdogReq; + +typedef struct SheepdogRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint32_t opcode_specific[7]; +} SheepdogRsp; + +typedef struct SheepdogObjReq { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint64_t oid; + uint64_t cow_oid; + uint8_t copies; + uint8_t copy_policy; + uint8_t reserved[6]; + uint64_t offset; +} SheepdogObjReq; + +typedef struct SheepdogObjRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint8_t copies; + uint8_t copy_policy; + uint8_t reserved[2]; + uint32_t pad[6]; +} SheepdogObjRsp; + +typedef struct SheepdogVdiReq { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint64_t vdi_size; + uint32_t base_vdi_id; + uint8_t copies; + uint8_t copy_policy; + uint8_t store_policy; + uint8_t block_size_shift; + uint32_t snapid; + uint32_t type; + uint32_t pad[2]; +} SheepdogVdiReq; + +typedef struct SheepdogVdiRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint32_t rsvd; + uint32_t vdi_id; + uint32_t pad[5]; +} SheepdogVdiRsp; + +typedef struct SheepdogClusterRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint8_t nr_copies; + uint8_t copy_policy; + uint8_t block_size_shift; + uint8_t __pad1; + uint32_t __pad2[6]; +} SheepdogClusterRsp; + +typedef struct SheepdogInode { + char name[SD_MAX_VDI_LEN]; + char tag[SD_MAX_VDI_TAG_LEN]; + uint64_t ctime; + uint64_t snap_ctime; + uint64_t vm_clock_nsec; + uint64_t vdi_size; + uint64_t vm_state_size; + uint16_t copy_policy; + uint8_t nr_copies; + uint8_t block_size_shift; + uint32_t snap_id; + uint32_t vdi_id; + uint32_t parent_vdi_id; + uint32_t child_vdi_id[MAX_CHILDREN]; + uint32_t data_vdi_id[MAX_DATA_OBJS]; +} SheepdogInode; + +#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id) + +/* + * 64 bit FNV-1a non-zero initial basis + */ +#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) + +static void deprecation_warning(void) +{ + static bool warned; + + if (!warned) { + warn_report("the sheepdog block driver is deprecated"); + warned = true; + } +} + +/* + * 64 bit Fowler/Noll/Vo FNV-1a hash code + */ +static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) +{ + unsigned char *bp = buf; + unsigned char *be = bp + len; + while (bp < be) { + hval ^= (uint64_t) *bp++; + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + return hval; +} + +static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx) +{ + return inode->vdi_id == inode->data_vdi_id[idx]; +} + +static inline bool is_data_obj(uint64_t oid) +{ + return !(VDI_BIT & oid); +} + +static inline uint64_t data_oid_to_idx(uint64_t oid) +{ + return oid & (MAX_DATA_OBJS - 1); +} + +static inline uint32_t oid_to_vid(uint64_t oid) +{ + return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT; +} + +static inline uint64_t vid_to_vdi_oid(uint32_t vid) +{ + return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); +} + +static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx) +{ + return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; +} + +static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) +{ + return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; +} + +static inline bool is_snapshot(struct SheepdogInode *inode) +{ + return !!inode->snap_ctime; +} + +static inline size_t count_data_objs(const struct SheepdogInode *inode) +{ + return DIV_ROUND_UP(inode->vdi_size, + (1UL << inode->block_size_shift)); +} + +typedef struct SheepdogAIOCB SheepdogAIOCB; +typedef struct BDRVSheepdogState BDRVSheepdogState; + +typedef struct AIOReq { + SheepdogAIOCB *aiocb; + unsigned int iov_offset; + + uint64_t oid; + uint64_t base_oid; + uint64_t offset; + unsigned int data_len; + uint8_t flags; + uint32_t id; + bool create; + + QLIST_ENTRY(AIOReq) aio_siblings; +} AIOReq; + +enum AIOCBState { + AIOCB_WRITE_UDATA, + AIOCB_READ_UDATA, + AIOCB_FLUSH_CACHE, + AIOCB_DISCARD_OBJ, +}; + +#define AIOCBOverlapping(x, y) \ + (!(x->max_affect_data_idx < y->min_affect_data_idx \ + || y->max_affect_data_idx < x->min_affect_data_idx)) + +struct SheepdogAIOCB { + BDRVSheepdogState *s; + + QEMUIOVector *qiov; + + int64_t sector_num; + int nb_sectors; + + int ret; + enum AIOCBState aiocb_type; + + Coroutine *coroutine; + int nr_pending; + + uint32_t min_affect_data_idx; + uint32_t max_affect_data_idx; + + /* + * The difference between affect_data_idx and dirty_data_idx: + * affect_data_idx represents range of index of all request types. + * dirty_data_idx represents range of index updated by COW requests. + * dirty_data_idx is used for updating an inode object. + */ + uint32_t min_dirty_data_idx; + uint32_t max_dirty_data_idx; + + QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; +}; + +struct BDRVSheepdogState { + BlockDriverState *bs; + AioContext *aio_context; + + SheepdogInode inode; + + char name[SD_MAX_VDI_LEN]; + bool is_snapshot; + uint32_t cache_flags; + bool discard_supported; + + SocketAddress *addr; + int fd; + + CoMutex lock; + Coroutine *co_send; + Coroutine *co_recv; + + uint32_t aioreq_seq_num; + + /* Every aio request must be linked to either of these queues. */ + QLIST_HEAD(, AIOReq) inflight_aio_head; + QLIST_HEAD(, AIOReq) failed_aio_head; + + CoMutex queue_lock; + CoQueue overlapping_queue; + QLIST_HEAD(, SheepdogAIOCB) inflight_aiocb_head; +}; + +typedef struct BDRVSheepdogReopenState { + int fd; + int cache_flags; +} BDRVSheepdogReopenState; + +static const char *sd_strerror(int err) +{ + int i; + + static const struct { + int err; + const char *desc; + } errors[] = { + {SD_RES_SUCCESS, "Success"}, + {SD_RES_UNKNOWN, "Unknown error"}, + {SD_RES_NO_OBJ, "No object found"}, + {SD_RES_EIO, "I/O error"}, + {SD_RES_VDI_EXIST, "VDI exists already"}, + {SD_RES_INVALID_PARMS, "Invalid parameters"}, + {SD_RES_SYSTEM_ERROR, "System error"}, + {SD_RES_VDI_LOCKED, "VDI is already locked"}, + {SD_RES_NO_VDI, "No vdi found"}, + {SD_RES_NO_BASE_VDI, "No base VDI found"}, + {SD_RES_VDI_READ, "Failed read the requested VDI"}, + {SD_RES_VDI_WRITE, "Failed to write the requested VDI"}, + {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"}, + {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"}, + {SD_RES_NO_TAG, "Failed to find the requested tag"}, + {SD_RES_STARTUP, "The system is still booting"}, + {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"}, + {SD_RES_SHUTDOWN, "The system is shutting down"}, + {SD_RES_NO_MEM, "Out of memory on the server"}, + {SD_RES_FULL_VDI, "We already have the maximum vdis"}, + {SD_RES_VER_MISMATCH, "Protocol version mismatch"}, + {SD_RES_NO_SPACE, "Server has no space for new objects"}, + {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"}, + {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"}, + {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"}, + {SD_RES_HALT, "Sheepdog is stopped serving IO request"}, + {SD_RES_READONLY, "Object is read-only"}, + }; + + for (i = 0; i < ARRAY_SIZE(errors); ++i) { + if (errors[i].err == err) { + return errors[i].desc; + } + } + + return "Invalid error code"; +} + +/* + * Sheepdog I/O handling: + * + * 1. In sd_co_rw_vector, we send the I/O requests to the server and + * link the requests to the inflight_list in the + * BDRVSheepdogState. The function yields while waiting for + * receiving the response. + * + * 2. We receive the response in aio_read_response, the fd handler to + * the sheepdog connection. We switch back to sd_co_readv/sd_writev + * after all the requests belonging to the AIOCB are finished. If + * needed, sd_co_writev will send another requests for the vdi object. + */ + +static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, + uint64_t oid, unsigned int data_len, + uint64_t offset, uint8_t flags, bool create, + uint64_t base_oid, unsigned int iov_offset) +{ + AIOReq *aio_req; + + aio_req = g_malloc(sizeof(*aio_req)); + aio_req->aiocb = acb; + aio_req->iov_offset = iov_offset; + aio_req->oid = oid; + aio_req->base_oid = base_oid; + aio_req->offset = offset; + aio_req->data_len = data_len; + aio_req->flags = flags; + aio_req->id = s->aioreq_seq_num++; + aio_req->create = create; + + acb->nr_pending++; + return aio_req; +} + +static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb) +{ + SheepdogAIOCB *cb; + +retry: + QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { + if (AIOCBOverlapping(acb, cb)) { + qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock); + goto retry; + } + } +} + +static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, + QEMUIOVector *qiov, int64_t sector_num, int nb_sectors, + int type) +{ + uint32_t object_size; + + object_size = (UINT32_C(1) << s->inode.block_size_shift); + + acb->s = s; + + acb->qiov = qiov; + + acb->sector_num = sector_num; + acb->nb_sectors = nb_sectors; + + acb->coroutine = qemu_coroutine_self(); + acb->ret = 0; + acb->nr_pending = 0; + + acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; + acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + + acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; + + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; + acb->aiocb_type = type; + + if (type == AIOCB_FLUSH_CACHE) { + return; + } + + qemu_co_mutex_lock(&s->queue_lock); + wait_for_overlapping_aiocb(s, acb); + QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings); + qemu_co_mutex_unlock(&s->queue_lock); +} + +static SocketAddress *sd_server_config(QDict *options, Error **errp) +{ + QDict *server = NULL; + Visitor *iv = NULL; + SocketAddress *saddr = NULL; + + qdict_extract_subqdict(options, &server, "server."); + + iv = qobject_input_visitor_new_flat_confused(server, errp); + if (!iv) { + goto done; + } + + if (!visit_type_SocketAddress(iv, NULL, &saddr, errp)) { + goto done; + } + +done: + visit_free(iv); + qobject_unref(server); + return saddr; +} + +/* Return -EIO in case of error, file descriptor on success */ +static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) +{ + int fd; + + fd = socket_connect(s->addr, errp); + + if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) { + int ret = socket_set_nodelay(fd); + if (ret < 0) { + warn_report("can't set TCP_NODELAY: %s", strerror(errno)); + } + } + + if (fd >= 0) { + qemu_set_nonblock(fd); + } else { + fd = -EIO; + } + + return fd; +} + +/* Return 0 on success and -errno in case of error */ +static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, + unsigned int *wlen) +{ + int ret; + + ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); + if (ret != sizeof(*hdr)) { + error_report("failed to send a req, %s", strerror(errno)); + return -errno; + } + + ret = qemu_co_send(sockfd, data, *wlen); + if (ret != *wlen) { + error_report("failed to send a req, %s", strerror(errno)); + return -errno; + } + + return ret; +} + +typedef struct SheepdogReqCo { + int sockfd; + BlockDriverState *bs; + AioContext *aio_context; + SheepdogReq *hdr; + void *data; + unsigned int *wlen; + unsigned int *rlen; + int ret; + bool finished; + Coroutine *co; +} SheepdogReqCo; + +static void restart_co_req(void *opaque) +{ + SheepdogReqCo *srco = opaque; + + aio_co_wake(srco->co); +} + +static coroutine_fn void do_co_req(void *opaque) +{ + int ret; + SheepdogReqCo *srco = opaque; + int sockfd = srco->sockfd; + SheepdogReq *hdr = srco->hdr; + void *data = srco->data; + unsigned int *wlen = srco->wlen; + unsigned int *rlen = srco->rlen; + + srco->co = qemu_coroutine_self(); + aio_set_fd_handler(srco->aio_context, sockfd, false, + NULL, restart_co_req, NULL, srco); + + ret = send_co_req(sockfd, hdr, data, wlen); + if (ret < 0) { + goto out; + } + + aio_set_fd_handler(srco->aio_context, sockfd, false, + restart_co_req, NULL, NULL, srco); + + ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); + if (ret != sizeof(*hdr)) { + error_report("failed to get a rsp, %s", strerror(errno)); + ret = -errno; + goto out; + } + + if (*rlen > hdr->data_length) { + *rlen = hdr->data_length; + } + + if (*rlen) { + ret = qemu_co_recv(sockfd, data, *rlen); + if (ret != *rlen) { + error_report("failed to get the data, %s", strerror(errno)); + ret = -errno; + goto out; + } + } + ret = 0; +out: + /* there is at most one request for this sockfd, so it is safe to + * set each handler to NULL. */ + aio_set_fd_handler(srco->aio_context, sockfd, false, + NULL, NULL, NULL, NULL); + + srco->co = NULL; + srco->ret = ret; + /* Set srco->finished before reading bs->wakeup. */ + qatomic_mb_set(&srco->finished, true); + if (srco->bs) { + bdrv_wakeup(srco->bs); + } +} + +/* + * Send the request to the sheep in a synchronous manner. + * + * Return 0 on success, -errno in case of error. + */ +static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr, + void *data, unsigned int *wlen, unsigned int *rlen) +{ + Coroutine *co; + SheepdogReqCo srco = { + .sockfd = sockfd, + .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(), + .bs = bs, + .hdr = hdr, + .data = data, + .wlen = wlen, + .rlen = rlen, + .ret = 0, + .finished = false, + }; + + if (qemu_in_coroutine()) { + do_co_req(&srco); + } else { + co = qemu_coroutine_create(do_co_req, &srco); + if (bs) { + bdrv_coroutine_enter(bs, co); + BDRV_POLL_WHILE(bs, !srco.finished); + } else { + qemu_coroutine_enter(co); + while (!srco.finished) { + aio_poll(qemu_get_aio_context(), true); + } + } + } + + return srco.ret; +} + +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, + struct iovec *iov, int niov, + enum AIOCBState aiocb_type); +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); +static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); +static int get_sheep_fd(BDRVSheepdogState *s, Error **errp); +static void co_write_request(void *opaque); + +static coroutine_fn void reconnect_to_sdog(void *opaque) +{ + BDRVSheepdogState *s = opaque; + AIOReq *aio_req, *next; + + aio_set_fd_handler(s->aio_context, s->fd, false, NULL, + NULL, NULL, NULL); + close(s->fd); + s->fd = -1; + + /* Wait for outstanding write requests to be completed. */ + while (s->co_send != NULL) { + co_write_request(opaque); + } + + /* Try to reconnect the sheepdog server every one second. */ + while (s->fd < 0) { + Error *local_err = NULL; + s->fd = get_sheep_fd(s, &local_err); + if (s->fd < 0) { + trace_sheepdog_reconnect_to_sdog(); + error_report_err(local_err); + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, NANOSECONDS_PER_SECOND); + } + }; + + /* + * Now we have to resend all the request in the inflight queue. However, + * resend_aioreq() can yield and newly created requests can be added to the + * inflight queue before the coroutine is resumed. To avoid mixing them, we + * have to move all the inflight requests to the failed queue before + * resend_aioreq() is called. + */ + qemu_co_mutex_lock(&s->queue_lock); + QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) { + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings); + } + + /* Resend all the failed aio requests. */ + while (!QLIST_EMPTY(&s->failed_aio_head)) { + aio_req = QLIST_FIRST(&s->failed_aio_head); + QLIST_REMOVE(aio_req, aio_siblings); + qemu_co_mutex_unlock(&s->queue_lock); + resend_aioreq(s, aio_req); + qemu_co_mutex_lock(&s->queue_lock); + } + qemu_co_mutex_unlock(&s->queue_lock); +} + +/* + * Receive responses of the I/O requests. + * + * This function is registered as a fd handler, and called from the + * main loop when s->fd is ready for reading responses. + */ +static void coroutine_fn aio_read_response(void *opaque) +{ + SheepdogObjRsp rsp; + BDRVSheepdogState *s = opaque; + int fd = s->fd; + int ret; + AIOReq *aio_req = NULL; + SheepdogAIOCB *acb; + uint64_t idx; + + /* read a header */ + ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); + if (ret != sizeof(rsp)) { + error_report("failed to get the header, %s", strerror(errno)); + goto err; + } + + /* find the right aio_req from the inflight aio list */ + QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) { + if (aio_req->id == rsp.id) { + break; + } + } + if (!aio_req) { + error_report("cannot find aio_req %x", rsp.id); + goto err; + } + + acb = aio_req->aiocb; + + switch (acb->aiocb_type) { + case AIOCB_WRITE_UDATA: + if (!is_data_obj(aio_req->oid)) { + break; + } + idx = data_oid_to_idx(aio_req->oid); + + if (aio_req->create) { + /* + * If the object is newly created one, we need to update + * the vdi object (metadata object). min_dirty_data_idx + * and max_dirty_data_idx are changed to include updated + * index between them. + */ + if (rsp.result == SD_RES_SUCCESS) { + s->inode.data_vdi_id[idx] = s->inode.vdi_id; + acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx); + acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx); + } + } + break; + case AIOCB_READ_UDATA: + ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, + aio_req->iov_offset, rsp.data_length); + if (ret != rsp.data_length) { + error_report("failed to get the data, %s", strerror(errno)); + goto err; + } + break; + case AIOCB_FLUSH_CACHE: + if (rsp.result == SD_RES_INVALID_PARMS) { + trace_sheepdog_aio_read_response(); + s->cache_flags = SD_FLAG_CMD_DIRECT; + rsp.result = SD_RES_SUCCESS; + } + break; + case AIOCB_DISCARD_OBJ: + switch (rsp.result) { + case SD_RES_INVALID_PARMS: + error_report("server doesn't support discard command"); + rsp.result = SD_RES_SUCCESS; + s->discard_supported = false; + break; + default: + break; + } + } + + /* No more data for this aio_req (reload_inode below uses its own file + * descriptor handler which doesn't use co_recv). + */ + s->co_recv = NULL; + + qemu_co_mutex_lock(&s->queue_lock); + QLIST_REMOVE(aio_req, aio_siblings); + qemu_co_mutex_unlock(&s->queue_lock); + + switch (rsp.result) { + case SD_RES_SUCCESS: + break; + case SD_RES_READONLY: + if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) { + ret = reload_inode(s, 0, ""); + if (ret < 0) { + goto err; + } + } + if (is_data_obj(aio_req->oid)) { + aio_req->oid = vid_to_data_oid(s->inode.vdi_id, + data_oid_to_idx(aio_req->oid)); + } else { + aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); + } + resend_aioreq(s, aio_req); + return; + default: + acb->ret = -EIO; + error_report("%s", sd_strerror(rsp.result)); + break; + } + + g_free(aio_req); + + if (!--acb->nr_pending) { + /* + * We've finished all requests which belong to the AIOCB, so + * we can switch back to sd_co_readv/writev now. + */ + aio_co_wake(acb->coroutine); + } + + return; + +err: + reconnect_to_sdog(opaque); +} + +static void co_read_response(void *opaque) +{ + BDRVSheepdogState *s = opaque; + + if (!s->co_recv) { + s->co_recv = qemu_coroutine_create(aio_read_response, opaque); + } + + aio_co_enter(s->aio_context, s->co_recv); +} + +static void co_write_request(void *opaque) +{ + BDRVSheepdogState *s = opaque; + + aio_co_wake(s->co_send); +} + +/* + * Return a socket descriptor to read/write objects. + * + * We cannot use this descriptor for other operations because + * the block driver may be on waiting response from the server. + */ +static int get_sheep_fd(BDRVSheepdogState *s, Error **errp) +{ + int fd; + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + aio_set_fd_handler(s->aio_context, fd, false, + co_read_response, NULL, NULL, s); + return fd; +} + +/* + * Parse numeric snapshot ID in @str + * If @str can't be parsed as number, return false. + * Else, if the number is zero or too large, set *@snapid to zero and + * return true. + * Else, set *@snapid to the number and return true. + */ +static bool sd_parse_snapid(const char *str, uint32_t *snapid) +{ + unsigned long ul; + int ret; + + ret = qemu_strtoul(str, NULL, 10, &ul); + if (ret == -ERANGE) { + ul = ret = 0; + } + if (ret) { + return false; + } + if (ul > UINT32_MAX) { + ul = 0; + } + + *snapid = ul; + return true; +} + +static bool sd_parse_snapid_or_tag(const char *str, + uint32_t *snapid, char tag[]) +{ + if (!sd_parse_snapid(str, snapid)) { + *snapid = 0; + if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) { + return false; + } + } else if (!*snapid) { + return false; + } else { + tag[0] = 0; + } + return true; +} + +typedef struct { + const char *path; /* non-null iff transport is tcp */ + const char *host; /* valid when transport is tcp */ + int port; /* valid when transport is tcp */ + char vdi[SD_MAX_VDI_LEN]; + char tag[SD_MAX_VDI_TAG_LEN]; + uint32_t snap_id; + /* Remainder is only for sd_config_done() */ + URI *uri; + QueryParams *qp; +} SheepdogConfig; + +static void sd_config_done(SheepdogConfig *cfg) +{ + if (cfg->qp) { + query_params_free(cfg->qp); + } + uri_free(cfg->uri); +} + +static void sd_parse_uri(SheepdogConfig *cfg, const char *filename, + Error **errp) +{ + Error *err = NULL; + QueryParams *qp = NULL; + bool is_unix; + URI *uri; + + memset(cfg, 0, sizeof(*cfg)); + + cfg->uri = uri = uri_parse(filename); + if (!uri) { + error_setg(&err, "invalid URI '%s'", filename); + goto out; + } + + /* transport */ + if (!g_strcmp0(uri->scheme, "sheepdog")) { + is_unix = false; + } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) { + is_unix = false; + } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) { + is_unix = true; + } else { + error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp'," + " or 'sheepdog+unix'"); + goto out; + } + + if (uri->path == NULL || !strcmp(uri->path, "/")) { + error_setg(&err, "missing file path in URI"); + goto out; + } + if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN) + >= SD_MAX_VDI_LEN) { + error_setg(&err, "VDI name is too long"); + goto out; + } + + cfg->qp = qp = query_params_parse(uri->query); + + if (is_unix) { + /* sheepdog+unix:///vdiname?socket=path */ + if (uri->server || uri->port) { + error_setg(&err, "URI scheme %s doesn't accept a server address", + uri->scheme); + goto out; + } + if (!qp->n) { + error_setg(&err, + "URI scheme %s requires query parameter 'socket'", + uri->scheme); + goto out; + } + if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) { + error_setg(&err, "unexpected query parameters"); + goto out; + } + cfg->path = qp->p[0].value; + } else { + /* sheepdog[+tcp]://[host:port]/vdiname */ + if (qp->n) { + error_setg(&err, "unexpected query parameters"); + goto out; + } + cfg->host = uri->server; + cfg->port = uri->port; + } + + /* snapshot tag */ + if (uri->fragment) { + if (!sd_parse_snapid_or_tag(uri->fragment, + &cfg->snap_id, cfg->tag)) { + error_setg(&err, "'%s' is not a valid snapshot ID", + uri->fragment); + goto out; + } + } else { + cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */ + } + +out: + if (err) { + error_propagate(errp, err); + sd_config_done(cfg); + } +} + +/* + * Parse a filename (old syntax) + * + * filename must be one of the following formats: + * 1. [vdiname] + * 2. [vdiname]:[snapid] + * 3. [vdiname]:[tag] + * 4. [hostname]:[port]:[vdiname] + * 5. [hostname]:[port]:[vdiname]:[snapid] + * 6. [hostname]:[port]:[vdiname]:[tag] + * + * You can boot from the snapshot images by specifying `snapid` or + * `tag'. + * + * You can run VMs outside the Sheepdog cluster by specifying + * `hostname' and `port' (experimental). + */ +static void parse_vdiname(SheepdogConfig *cfg, const char *filename, + Error **errp) +{ + Error *err = NULL; + char *p, *q, *uri; + const char *host_spec, *vdi_spec; + int nr_sep; + + strstart(filename, "sheepdog:", &filename); + p = q = g_strdup(filename); + + /* count the number of separators */ + nr_sep = 0; + while (*p) { + if (*p == ':') { + nr_sep++; + } + p++; + } + p = q; + + /* use the first two tokens as host_spec. */ + if (nr_sep >= 2) { + host_spec = p; + p = strchr(p, ':'); + p++; + p = strchr(p, ':'); + *p++ = '\0'; + } else { + host_spec = ""; + } + + vdi_spec = p; + + p = strchr(vdi_spec, ':'); + if (p) { + *p++ = '#'; + } + + uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec); + + /* + * FIXME We to escape URI meta-characters, e.g. "x?y=z" + * produces "sheepdog://x?y=z". Because of that ... + */ + sd_parse_uri(cfg, uri, &err); + if (err) { + /* + * ... this can fail, but the error message is misleading. + * Replace it by the traditional useless one until the + * escaping is fixed. + */ + error_free(err); + error_setg(errp, "Can't parse filename"); + } + + g_free(q); + g_free(uri); +} + +static void sd_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + Error *err = NULL; + SheepdogConfig cfg; + char buf[32]; + + if (strstr(filename, "://")) { + sd_parse_uri(&cfg, filename, &err); + } else { + parse_vdiname(&cfg, filename, &err); + } + if (err) { + error_propagate(errp, err); + return; + } + + if (cfg.path) { + qdict_set_default_str(options, "server.path", cfg.path); + qdict_set_default_str(options, "server.type", "unix"); + } else { + qdict_set_default_str(options, "server.type", "inet"); + qdict_set_default_str(options, "server.host", + cfg.host ?: SD_DEFAULT_ADDR); + snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT); + qdict_set_default_str(options, "server.port", buf); + } + qdict_set_default_str(options, "vdi", cfg.vdi); + qdict_set_default_str(options, "tag", cfg.tag); + if (cfg.snap_id) { + snprintf(buf, sizeof(buf), "%d", cfg.snap_id); + qdict_set_default_str(options, "snap-id", buf); + } + + sd_config_done(&cfg); +} + +static int find_vdi_name(BDRVSheepdogState *s, const char *filename, + uint32_t snapid, const char *tag, uint32_t *vid, + bool lock, Error **errp) +{ + int ret, fd; + SheepdogVdiReq hdr; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + unsigned int wlen, rlen = 0; + char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN] QEMU_NONSTRING; + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + /* This pair of strncpy calls ensures that the buffer is zero-filled, + * which is desirable since we'll soon be sending those bytes, and + * don't want the send_req to read uninitialized data. + */ + strncpy(buf, filename, SD_MAX_VDI_LEN); + strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); + + memset(&hdr, 0, sizeof(hdr)); + if (lock) { + hdr.opcode = SD_OP_LOCK_VDI; + hdr.type = LOCK_TYPE_NORMAL; + } else { + hdr.opcode = SD_OP_GET_VDI_INFO; + } + wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN; + hdr.proto_ver = SD_PROTO_VER; + hdr.data_length = wlen; + hdr.snapid = snapid; + hdr.flags = SD_FLAG_CMD_WRITE; + + ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + if (ret) { + error_setg_errno(errp, -ret, "cannot get vdi info"); + goto out; + } + + if (rsp->result != SD_RES_SUCCESS) { + error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s", + sd_strerror(rsp->result), filename, snapid, tag); + if (rsp->result == SD_RES_NO_VDI) { + ret = -ENOENT; + } else if (rsp->result == SD_RES_VDI_LOCKED) { + ret = -EBUSY; + } else { + ret = -EIO; + } + goto out; + } + *vid = rsp->vdi_id; + + ret = 0; +out: + closesocket(fd); + return ret; +} + +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, + struct iovec *iov, int niov, + enum AIOCBState aiocb_type) +{ + int nr_copies = s->inode.nr_copies; + SheepdogObjReq hdr; + unsigned int wlen = 0; + int ret; + uint64_t oid = aio_req->oid; + unsigned int datalen = aio_req->data_len; + uint64_t offset = aio_req->offset; + uint8_t flags = aio_req->flags; + uint64_t old_oid = aio_req->base_oid; + bool create = aio_req->create; + + qemu_co_mutex_lock(&s->queue_lock); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); + qemu_co_mutex_unlock(&s->queue_lock); + + if (!nr_copies) { + error_report("bug"); + } + + memset(&hdr, 0, sizeof(hdr)); + + switch (aiocb_type) { + case AIOCB_FLUSH_CACHE: + hdr.opcode = SD_OP_FLUSH_VDI; + break; + case AIOCB_READ_UDATA: + hdr.opcode = SD_OP_READ_OBJ; + hdr.flags = flags; + break; + case AIOCB_WRITE_UDATA: + if (create) { + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + } else { + hdr.opcode = SD_OP_WRITE_OBJ; + } + wlen = datalen; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + break; + case AIOCB_DISCARD_OBJ: + hdr.opcode = SD_OP_WRITE_OBJ; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0; + offset = offsetof(SheepdogInode, + data_vdi_id[data_oid_to_idx(oid)]); + oid = vid_to_vdi_oid(s->inode.vdi_id); + wlen = datalen = sizeof(uint32_t); + break; + } + + if (s->cache_flags) { + hdr.flags |= s->cache_flags; + } + + hdr.oid = oid; + hdr.cow_oid = old_oid; + hdr.copies = s->inode.nr_copies; + + hdr.data_length = datalen; + hdr.offset = offset; + + hdr.id = aio_req->id; + + qemu_co_mutex_lock(&s->lock); + s->co_send = qemu_coroutine_self(); + aio_set_fd_handler(s->aio_context, s->fd, false, + co_read_response, co_write_request, NULL, s); + socket_set_cork(s->fd, 1); + + /* send a header */ + ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); + if (ret != sizeof(hdr)) { + error_report("failed to send a req, %s", strerror(errno)); + goto out; + } + + if (wlen) { + ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); + if (ret != wlen) { + error_report("failed to send a data, %s", strerror(errno)); + } + } +out: + socket_set_cork(s->fd, 0); + aio_set_fd_handler(s->aio_context, s->fd, false, + co_read_response, NULL, NULL, s); + s->co_send = NULL; + qemu_co_mutex_unlock(&s->lock); +} + +static int read_write_object(int fd, BlockDriverState *bs, char *buf, + uint64_t oid, uint8_t copies, + unsigned int datalen, uint64_t offset, + bool write, bool create, uint32_t cache_flags) +{ + SheepdogObjReq hdr; + SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; + unsigned int wlen, rlen; + int ret; + + memset(&hdr, 0, sizeof(hdr)); + + if (write) { + wlen = datalen; + rlen = 0; + hdr.flags = SD_FLAG_CMD_WRITE; + if (create) { + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + } else { + hdr.opcode = SD_OP_WRITE_OBJ; + } + } else { + wlen = 0; + rlen = datalen; + hdr.opcode = SD_OP_READ_OBJ; + } + + hdr.flags |= cache_flags; + + hdr.oid = oid; + hdr.data_length = datalen; + hdr.offset = offset; + hdr.copies = copies; + + ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + if (ret) { + error_report("failed to send a request to the sheep"); + return ret; + } + + switch (rsp->result) { + case SD_RES_SUCCESS: + return 0; + default: + error_report("%s", sd_strerror(rsp->result)); + return -EIO; + } +} + +static int read_object(int fd, BlockDriverState *bs, char *buf, + uint64_t oid, uint8_t copies, + unsigned int datalen, uint64_t offset, + uint32_t cache_flags) +{ + return read_write_object(fd, bs, buf, oid, copies, + datalen, offset, false, + false, cache_flags); +} + +static int write_object(int fd, BlockDriverState *bs, char *buf, + uint64_t oid, uint8_t copies, + unsigned int datalen, uint64_t offset, bool create, + uint32_t cache_flags) +{ + return read_write_object(fd, bs, buf, oid, copies, + datalen, offset, true, + create, cache_flags); +} + +/* update inode with the latest state */ +static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) +{ + Error *local_err = NULL; + SheepdogInode *inode; + int ret = 0, fd; + uint32_t vid = 0; + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return -EIO; + } + + inode = g_malloc(SD_INODE_HEADER_SIZE); + + ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err); + if (ret) { + error_report_err(local_err); + goto out; + } + + ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0, + s->cache_flags); + if (ret < 0) { + goto out; + } + + if (inode->vdi_id != s->inode.vdi_id) { + memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE); + } + +out: + g_free(inode); + closesocket(fd); + + return ret; +} + +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) +{ + SheepdogAIOCB *acb = aio_req->aiocb; + + aio_req->create = false; + + /* check whether this request becomes a CoW one */ + if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { + int idx = data_oid_to_idx(aio_req->oid); + + if (is_data_obj_writable(&s->inode, idx)) { + goto out; + } + + if (s->inode.data_vdi_id[idx]) { + aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); + aio_req->flags |= SD_FLAG_CMD_COW; + } + aio_req->create = true; + } +out: + if (is_data_obj(aio_req->oid)) { + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, + acb->aiocb_type); + } else { + struct iovec iov; + iov.iov_base = &s->inode; + iov.iov_len = sizeof(s->inode); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); + } +} + +static void sd_detach_aio_context(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + + aio_set_fd_handler(s->aio_context, s->fd, false, NULL, + NULL, NULL, NULL); +} + +static void sd_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVSheepdogState *s = bs->opaque; + + s->aio_context = new_context; + aio_set_fd_handler(new_context, s->fd, false, + co_read_response, NULL, NULL, s); +} + +static QemuOptsList runtime_opts = { + .name = "sheepdog", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "vdi", + .type = QEMU_OPT_STRING, + }, + { + .name = "snap-id", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "tag", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static int sd_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + int ret, fd; + uint32_t vid = 0; + BDRVSheepdogState *s = bs->opaque; + const char *vdi, *snap_id_str, *tag; + uint64_t snap_id; + char *buf = NULL; + QemuOpts *opts; + + deprecation_warning(); + + s->bs = bs; + s->aio_context = bdrv_get_aio_context(bs); + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto err_no_fd; + } + + s->addr = sd_server_config(options, errp); + if (!s->addr) { + ret = -EINVAL; + goto err_no_fd; + } + + vdi = qemu_opt_get(opts, "vdi"); + snap_id_str = qemu_opt_get(opts, "snap-id"); + snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID); + tag = qemu_opt_get(opts, "tag"); + + if (!vdi) { + error_setg(errp, "parameter 'vdi' is missing"); + ret = -EINVAL; + goto err_no_fd; + } + if (strlen(vdi) >= SD_MAX_VDI_LEN) { + error_setg(errp, "value of parameter 'vdi' is too long"); + ret = -EINVAL; + goto err_no_fd; + } + + if (snap_id > UINT32_MAX) { + snap_id = 0; + } + if (snap_id_str && !snap_id) { + error_setg(errp, "'snap-id=%s' is not a valid snapshot ID", + snap_id_str); + ret = -EINVAL; + goto err_no_fd; + } + + if (!tag) { + tag = ""; + } + if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) { + error_setg(errp, "value of parameter 'tag' is too long"); + ret = -EINVAL; + goto err_no_fd; + } + + QLIST_INIT(&s->inflight_aio_head); + QLIST_INIT(&s->failed_aio_head); + QLIST_INIT(&s->inflight_aiocb_head); + + s->fd = get_sheep_fd(s, errp); + if (s->fd < 0) { + ret = s->fd; + goto err_no_fd; + } + + ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp); + if (ret) { + goto err; + } + + /* + * QEMU block layer emulates writethrough cache as 'writeback + flush', so + * we always set SD_FLAG_CMD_CACHE (writeback cache) as default. + */ + s->cache_flags = SD_FLAG_CMD_CACHE; + if (flags & BDRV_O_NOCACHE) { + s->cache_flags = SD_FLAG_CMD_DIRECT; + } + s->discard_supported = true; + + if (snap_id || tag[0]) { + trace_sheepdog_open(vid); + s->is_snapshot = true; + } + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + ret = fd; + goto err; + } + + buf = g_malloc(SD_INODE_SIZE); + ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid), + 0, SD_INODE_SIZE, 0, s->cache_flags); + + closesocket(fd); + + if (ret) { + error_setg(errp, "Can't read snapshot inode"); + goto err; + } + + memcpy(&s->inode, buf, sizeof(s->inode)); + + bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + pstrcpy(s->name, sizeof(s->name), vdi); + qemu_co_mutex_init(&s->lock); + qemu_co_mutex_init(&s->queue_lock); + qemu_co_queue_init(&s->overlapping_queue); + qemu_opts_del(opts); + g_free(buf); + return 0; + +err: + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, + false, NULL, NULL, NULL, NULL); + closesocket(s->fd); +err_no_fd: + qemu_opts_del(opts); + g_free(buf); + return ret; +} + +static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, + Error **errp) +{ + BDRVSheepdogState *s = state->bs->opaque; + BDRVSheepdogReopenState *re_s; + int ret = 0; + + re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1); + + re_s->cache_flags = SD_FLAG_CMD_CACHE; + if (state->flags & BDRV_O_NOCACHE) { + re_s->cache_flags = SD_FLAG_CMD_DIRECT; + } + + re_s->fd = get_sheep_fd(s, errp); + if (re_s->fd < 0) { + ret = re_s->fd; + return ret; + } + + return ret; +} + +static void sd_reopen_commit(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (s->fd) { + aio_set_fd_handler(s->aio_context, s->fd, false, + NULL, NULL, NULL, NULL); + closesocket(s->fd); + } + + s->fd = re_s->fd; + s->cache_flags = re_s->cache_flags; + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + +static void sd_reopen_abort(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (re_s == NULL) { + return; + } + + if (re_s->fd) { + aio_set_fd_handler(s->aio_context, re_s->fd, false, + NULL, NULL, NULL, NULL); + closesocket(re_s->fd); + } + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + +static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, + Error **errp) +{ + SheepdogVdiReq hdr; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + int fd, ret; + unsigned int wlen, rlen = 0; + char buf[SD_MAX_VDI_LEN]; + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + /* FIXME: would it be better to fail (e.g., return -EIO) when filename + * does not fit in buf? For now, just truncate and avoid buffer overrun. + */ + memset(buf, 0, sizeof(buf)); + pstrcpy(buf, sizeof(buf), s->name); + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_NEW_VDI; + hdr.base_vdi_id = s->inode.vdi_id; + + wlen = SD_MAX_VDI_LEN; + + hdr.flags = SD_FLAG_CMD_WRITE; + hdr.snapid = snapshot; + + hdr.data_length = wlen; + hdr.vdi_size = s->inode.vdi_size; + hdr.copy_policy = s->inode.copy_policy; + hdr.copies = s->inode.nr_copies; + hdr.block_size_shift = s->inode.block_size_shift; + + ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen); + + closesocket(fd); + + if (ret) { + error_setg_errno(errp, -ret, "create failed"); + return ret; + } + + if (rsp->result != SD_RES_SUCCESS) { + error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name); + return -EIO; + } + + if (vdi_id) { + *vdi_id = rsp->vdi_id; + } + + return 0; +} + +static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size, + Error **errp) +{ + BlockBackend *blk = NULL; + BDRVSheepdogState *base = bs->opaque; + unsigned long buf_size; + uint32_t idx, max_idx; + uint32_t object_size; + void *buf = NULL; + int ret; + + blk = blk_new_with_bs(bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL, errp); + + if (!blk) { + ret = -EPERM; + goto out_with_err_set; + } + + blk_set_allow_write_beyond_eof(blk, true); + + object_size = (UINT32_C(1) << base->inode.block_size_shift); + buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); + buf = g_malloc0(buf_size); + + max_idx = DIV_ROUND_UP(new_size, buf_size); + + for (idx = old_size / buf_size; idx < max_idx; idx++) { + /* + * The created image can be a cloned image, so we need to read + * a data from the source image. + */ + ret = blk_pread(blk, idx * buf_size, buf, buf_size); + if (ret < 0) { + goto out; + } + ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0); + if (ret < 0) { + goto out; + } + } + + ret = 0; +out: + if (ret < 0) { + error_setg_errno(errp, -ret, "Can't pre-allocate"); + } +out_with_err_set: + blk_unref(blk); + g_free(buf); + + return ret; +} + +static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size, + Error **errp) +{ + BlockDriverState *bs; + Visitor *v; + QObject *obj = NULL; + QDict *qdict; + int ret; + + v = qobject_output_visitor_new(&obj); + visit_type_BlockdevOptionsSheepdog(v, NULL, &location, &error_abort); + visit_free(v); + + qdict = qobject_to(QDict, obj); + qdict_flatten(qdict); + + qdict_put_str(qdict, "driver", "sheepdog"); + + bs = bdrv_open(NULL, NULL, qdict, BDRV_O_PROTOCOL | BDRV_O_RDWR, errp); + if (bs == NULL) { + ret = -EIO; + goto fail; + } + + ret = sd_prealloc(bs, 0, size, errp); +fail: + bdrv_unref(bs); + qobject_unref(qdict); + return ret; +} + +static int parse_redundancy(BDRVSheepdogState *s, SheepdogRedundancy *opt) +{ + struct SheepdogInode *inode = &s->inode; + + switch (opt->type) { + case SHEEPDOG_REDUNDANCY_TYPE_FULL: + if (opt->u.full.copies > SD_MAX_COPIES || opt->u.full.copies < 1) { + return -EINVAL; + } + inode->copy_policy = 0; + inode->nr_copies = opt->u.full.copies; + return 0; + + case SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED: + { + int64_t copy = opt->u.erasure_coded.data_strips; + int64_t parity = opt->u.erasure_coded.parity_strips; + + if (copy != 2 && copy != 4 && copy != 8 && copy != 16) { + return -EINVAL; + } + + if (parity >= SD_EC_MAX_STRIP || parity < 1) { + return -EINVAL; + } + + /* + * 4 bits for parity and 4 bits for data. + * We have to compress upper data bits because it can't represent 16 + */ + inode->copy_policy = ((copy / 2) << 4) + parity; + inode->nr_copies = copy + parity; + return 0; + } + + default: + g_assert_not_reached(); + } + + return -EINVAL; +} + +/* + * Sheepdog support two kinds of redundancy, full replication and erasure + * coding. + * + * # create a fully replicated vdi with x copies + * -o redundancy=x (1 <= x <= SD_MAX_COPIES) + * + * # create a erasure coded vdi with x data strips and y parity strips + * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP) + */ +static SheepdogRedundancy *parse_redundancy_str(const char *opt) +{ + SheepdogRedundancy *redundancy; + const char *n1, *n2; + long copy, parity; + char p[10]; + int ret; + + pstrcpy(p, sizeof(p), opt); + n1 = strtok(p, ":"); + n2 = strtok(NULL, ":"); + + if (!n1) { + return NULL; + } + + ret = qemu_strtol(n1, NULL, 10, ©); + if (ret < 0) { + return NULL; + } + + redundancy = g_new0(SheepdogRedundancy, 1); + if (!n2) { + *redundancy = (SheepdogRedundancy) { + .type = SHEEPDOG_REDUNDANCY_TYPE_FULL, + .u.full.copies = copy, + }; + } else { + ret = qemu_strtol(n2, NULL, 10, &parity); + if (ret < 0) { + g_free(redundancy); + return NULL; + } + + *redundancy = (SheepdogRedundancy) { + .type = SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED, + .u.erasure_coded = { + .data_strips = copy, + .parity_strips = parity, + }, + }; + } + + return redundancy; +} + +static int parse_block_size_shift(BDRVSheepdogState *s, + BlockdevCreateOptionsSheepdog *opts) +{ + struct SheepdogInode *inode = &s->inode; + uint64_t object_size; + int obj_order; + + if (opts->has_object_size) { + object_size = opts->object_size; + + if ((object_size - 1) & object_size) { /* not a power of 2? */ + return -EINVAL; + } + obj_order = ctz32(object_size); + if (obj_order < 20 || obj_order > 31) { + return -EINVAL; + } + inode->block_size_shift = (uint8_t)obj_order; + } + + return 0; +} + +static int sd_co_create(BlockdevCreateOptions *options, Error **errp) +{ + BlockdevCreateOptionsSheepdog *opts = &options->u.sheepdog; + int ret = 0; + uint32_t vid = 0; + char *backing_file = NULL; + char *buf = NULL; + BDRVSheepdogState *s; + uint64_t max_vdi_size; + bool prealloc = false; + + assert(options->driver == BLOCKDEV_DRIVER_SHEEPDOG); + + deprecation_warning(); + + s = g_new0(BDRVSheepdogState, 1); + + /* Steal SocketAddress from QAPI, set NULL to prevent double free */ + s->addr = opts->location->server; + opts->location->server = NULL; + + if (strlen(opts->location->vdi) >= sizeof(s->name)) { + error_setg(errp, "'vdi' string too long"); + ret = -EINVAL; + goto out; + } + pstrcpy(s->name, sizeof(s->name), opts->location->vdi); + + s->inode.vdi_size = opts->size; + backing_file = opts->backing_file; + + if (!opts->has_preallocation) { + opts->preallocation = PREALLOC_MODE_OFF; + } + switch (opts->preallocation) { + case PREALLOC_MODE_OFF: + prealloc = false; + break; + case PREALLOC_MODE_FULL: + prealloc = true; + break; + default: + error_setg(errp, "Preallocation mode not supported for Sheepdog"); + ret = -EINVAL; + goto out; + } + + if (opts->has_redundancy) { + ret = parse_redundancy(s, opts->redundancy); + if (ret < 0) { + error_setg(errp, "Invalid redundancy mode"); + goto out; + } + } + ret = parse_block_size_shift(s, opts); + if (ret < 0) { + error_setg(errp, "Invalid object_size." + " obect_size needs to be power of 2" + " and be limited from 2^20 to 2^31"); + goto out; + } + + if (opts->has_backing_file) { + BlockBackend *blk; + BDRVSheepdogState *base; + BlockDriver *drv; + + /* Currently, only Sheepdog backing image is supported. */ + drv = bdrv_find_protocol(opts->backing_file, true, NULL); + if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { + error_setg(errp, "backing_file must be a sheepdog image"); + ret = -EINVAL; + goto out; + } + + blk = blk_new_open(opts->backing_file, NULL, NULL, + BDRV_O_PROTOCOL, errp); + if (blk == NULL) { + ret = -EIO; + goto out; + } + + base = blk_bs(blk)->opaque; + + if (!is_snapshot(&base->inode)) { + error_setg(errp, "cannot clone from a non snapshot vdi"); + blk_unref(blk); + ret = -EINVAL; + goto out; + } + s->inode.vdi_id = base->inode.vdi_id; + blk_unref(blk); + } + + s->aio_context = qemu_get_aio_context(); + + /* if block_size_shift is not specified, get cluster default value */ + if (s->inode.block_size_shift == 0) { + SheepdogVdiReq hdr; + SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr; + int fd; + unsigned int wlen = 0, rlen = 0; + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + ret = fd; + goto out; + } + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; + hdr.proto_ver = SD_PROTO_VER; + + ret = do_req(fd, NULL, (SheepdogReq *)&hdr, + NULL, &wlen, &rlen); + closesocket(fd); + if (ret) { + error_setg_errno(errp, -ret, "failed to get cluster default"); + goto out; + } + if (rsp->result == SD_RES_SUCCESS) { + s->inode.block_size_shift = rsp->block_size_shift; + } else { + s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT; + } + } + + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; + + if (s->inode.vdi_size > max_vdi_size) { + error_setg(errp, "An image is too large." + " The maximum image size is %"PRIu64 "GB", + max_vdi_size / 1024 / 1024 / 1024); + ret = -EINVAL; + goto out; + } + + ret = do_sd_create(s, &vid, 0, errp); + if (ret) { + goto out; + } + + if (prealloc) { + ret = sd_create_prealloc(opts->location, opts->size, errp); + } +out: + g_free(backing_file); + g_free(buf); + g_free(s->addr); + g_free(s); + return ret; +} + +static int coroutine_fn sd_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + QDict *qdict = NULL, *location_qdict; + Visitor *v; + char *redundancy = NULL; + Error *local_err = NULL; + int ret; + char *backing_fmt = NULL; + + redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY); + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + + if (backing_fmt && strcmp(backing_fmt, "sheepdog") != 0) { + error_setg(errp, "backing_file must be a sheepdog image"); + ret = -EINVAL; + goto fail; + } + + qdict = qemu_opts_to_qdict(opts, NULL); + qdict_put_str(qdict, "driver", "sheepdog"); + + location_qdict = qdict_new(); + qdict_put(qdict, "location", location_qdict); + + sd_parse_filename(filename, location_qdict, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + qdict_flatten(qdict); + + /* Change legacy command line options into QMP ones */ + static const QDictRenames opt_renames[] = { + { BLOCK_OPT_BACKING_FILE, "backing-file" }, + { BLOCK_OPT_OBJECT_SIZE, "object-size" }, + { NULL, NULL }, + }; + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Get the QAPI object */ + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto fail; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto fail; + } + + assert(create_options->driver == BLOCKDEV_DRIVER_SHEEPDOG); + create_options->u.sheepdog.size = + ROUND_UP(create_options->u.sheepdog.size, BDRV_SECTOR_SIZE); + + if (redundancy) { + create_options->u.sheepdog.has_redundancy = true; + create_options->u.sheepdog.redundancy = + parse_redundancy_str(redundancy); + if (create_options->u.sheepdog.redundancy == NULL) { + error_setg(errp, "Invalid redundancy mode"); + ret = -EINVAL; + goto fail; + } + } + + ret = sd_co_create(create_options, errp); +fail: + qapi_free_BlockdevCreateOptions(create_options); + qobject_unref(qdict); + g_free(redundancy); + g_free(backing_fmt); + return ret; +} + +static void sd_close(BlockDriverState *bs) +{ + Error *local_err = NULL; + BDRVSheepdogState *s = bs->opaque; + SheepdogVdiReq hdr; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + unsigned int wlen, rlen = 0; + int fd, ret; + + trace_sheepdog_close(s->name); + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return; + } + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_RELEASE_VDI; + hdr.type = LOCK_TYPE_NORMAL; + hdr.base_vdi_id = s->inode.vdi_id; + wlen = strlen(s->name) + 1; + hdr.data_length = wlen; + hdr.flags = SD_FLAG_CMD_WRITE; + + ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); + + closesocket(fd); + + if (!ret && rsp->result != SD_RES_SUCCESS && + rsp->result != SD_RES_VDI_NOT_LOCKED) { + error_report("%s, %s", sd_strerror(rsp->result), s->name); + } + + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, + false, NULL, NULL, NULL, NULL); + closesocket(s->fd); + qapi_free_SocketAddress(s->addr); +} + +static int64_t sd_getlength(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + + return s->inode.vdi_size; +} + +static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVSheepdogState *s = bs->opaque; + int ret, fd; + unsigned int datalen; + uint64_t max_vdi_size; + int64_t old_size = s->inode.vdi_size; + + if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; + if (offset < old_size) { + error_setg(errp, "shrinking is not supported"); + return -EINVAL; + } else if (offset > max_vdi_size) { + error_setg(errp, "too big image size"); + return -EINVAL; + } + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + /* we don't need to update entire object */ + datalen = SD_INODE_HEADER_SIZE; + s->inode.vdi_size = offset; + ret = write_object(fd, s->bs, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); + close(fd); + + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to update an inode"); + return ret; + } + + if (prealloc == PREALLOC_MODE_FULL) { + ret = sd_prealloc(bs, old_size, offset, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * This function is called after writing data objects. If we need to + * update metadata, this sends a write request to the vdi object. + */ +static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) +{ + BDRVSheepdogState *s = acb->s; + struct iovec iov; + AIOReq *aio_req; + uint32_t offset, data_len, mn, mx; + + mn = acb->min_dirty_data_idx; + mx = acb->max_dirty_data_idx; + if (mn <= mx) { + /* we need to update the vdi object. */ + ++acb->nr_pending; + offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + + mn * sizeof(s->inode.data_vdi_id[0]); + data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); + + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; + + iov.iov_base = &s->inode; + iov.iov_len = sizeof(s->inode); + aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), + data_len, offset, 0, false, 0, offset); + add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); + if (--acb->nr_pending) { + qemu_coroutine_yield(); + } + } +} + +/* Delete current working VDI on the snapshot chain */ +static bool sd_delete(BDRVSheepdogState *s) +{ + Error *local_err = NULL; + unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0; + SheepdogVdiReq hdr = { + .opcode = SD_OP_DEL_VDI, + .base_vdi_id = s->inode.vdi_id, + .data_length = wlen, + .flags = SD_FLAG_CMD_WRITE, + }; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + int fd, ret; + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return false; + } + + ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, + s->name, &wlen, &rlen); + closesocket(fd); + if (ret) { + return false; + } + switch (rsp->result) { + case SD_RES_NO_VDI: + error_report("%s was already deleted", s->name); + /* fall through */ + case SD_RES_SUCCESS: + break; + default: + error_report("%s, %s", sd_strerror(rsp->result), s->name); + return false; + } + + return true; +} + +/* + * Create a writable VDI from a snapshot + */ +static int sd_create_branch(BDRVSheepdogState *s) +{ + Error *local_err = NULL; + int ret, fd; + uint32_t vid; + char *buf; + bool deleted; + + trace_sheepdog_create_branch_snapshot(s->inode.vdi_id); + + buf = g_malloc(SD_INODE_SIZE); + + /* + * Even If deletion fails, we will just create extra snapshot based on + * the working VDI which was supposed to be deleted. So no need to + * false bail out. + */ + deleted = sd_delete(s); + ret = do_sd_create(s, &vid, !deleted, &local_err); + if (ret) { + error_report_err(local_err); + goto out; + } + + trace_sheepdog_create_branch_created(vid); + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + ret = fd; + goto out; + } + + ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid), + s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags); + + closesocket(fd); + + if (ret < 0) { + goto out; + } + + memcpy(&s->inode, buf, sizeof(s->inode)); + + s->is_snapshot = false; + ret = 0; + trace_sheepdog_create_branch_new(s->inode.vdi_id); + +out: + g_free(buf); + + return ret; +} + +/* + * Send I/O requests to the server. + * + * This function sends requests to the server, links the requests to + * the inflight_list in BDRVSheepdogState, and exits without + * waiting the response. The responses are received in the + * `aio_read_response' function which is called from the main loop as + * a fd handler. + * + * Returns 1 when we need to wait a response, 0 when there is no sent + * request and -errno in error cases. + */ +static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb) +{ + int ret = 0; + unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE; + unsigned long idx; + uint32_t object_size; + uint64_t oid; + uint64_t offset; + BDRVSheepdogState *s = acb->s; + SheepdogInode *inode = &s->inode; + AIOReq *aio_req; + + if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) { + /* + * In the case we open the snapshot VDI, Sheepdog creates the + * writable VDI when we do a write operation first. + */ + ret = sd_create_branch(s); + if (ret) { + acb->ret = -EIO; + return; + } + } + + object_size = (UINT32_C(1) << inode->block_size_shift); + idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; + offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size; + + /* + * Make sure we don't free the aiocb before we are done with all requests. + * This additional reference is dropped at the end of this function. + */ + acb->nr_pending++; + + while (done != total) { + uint8_t flags = 0; + uint64_t old_oid = 0; + bool create = false; + + oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); + + len = MIN(total - done, object_size - offset); + + switch (acb->aiocb_type) { + case AIOCB_READ_UDATA: + if (!inode->data_vdi_id[idx]) { + qemu_iovec_memset(acb->qiov, done, 0, len); + goto done; + } + break; + case AIOCB_WRITE_UDATA: + if (!inode->data_vdi_id[idx]) { + create = true; + } else if (!is_data_obj_writable(inode, idx)) { + /* Copy-On-Write */ + create = true; + old_oid = oid; + flags = SD_FLAG_CMD_COW; + } + break; + case AIOCB_DISCARD_OBJ: + /* + * We discard the object only when the whole object is + * 1) allocated 2) trimmed. Otherwise, simply skip it. + */ + if (len != object_size || inode->data_vdi_id[idx] == 0) { + goto done; + } + break; + default: + break; + } + + if (create) { + trace_sheepdog_co_rw_vector_update(inode->vdi_id, oid, + vid_to_data_oid(inode->data_vdi_id[idx], idx), + idx); + oid = vid_to_data_oid(inode->vdi_id, idx); + trace_sheepdog_co_rw_vector_new(oid); + } + + aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, + old_oid, + acb->aiocb_type == AIOCB_DISCARD_OBJ ? + 0 : done); + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, + acb->aiocb_type); + done: + offset = 0; + idx++; + done += len; + } + if (--acb->nr_pending) { + qemu_coroutine_yield(); + } +} + +static void sd_aio_complete(SheepdogAIOCB *acb) +{ + BDRVSheepdogState *s; + if (acb->aiocb_type == AIOCB_FLUSH_CACHE) { + return; + } + + s = acb->s; + qemu_co_mutex_lock(&s->queue_lock); + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overlapping_queue); + qemu_co_mutex_unlock(&s->queue_lock); +} + +static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, + int flags) +{ + SheepdogAIOCB acb; + int ret; + int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE; + BDRVSheepdogState *s = bs->opaque; + + assert(!flags); + if (offset > s->inode.vdi_size) { + ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } + } + + sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA); + sd_co_rw_vector(&acb); + sd_write_done(&acb); + sd_aio_complete(&acb); + + return acb.ret; +} + +static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + SheepdogAIOCB acb; + BDRVSheepdogState *s = bs->opaque; + + sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA); + sd_co_rw_vector(&acb); + sd_aio_complete(&acb); + + return acb.ret; +} + +static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + SheepdogAIOCB acb; + AIOReq *aio_req; + + if (s->cache_flags != SD_FLAG_CMD_CACHE) { + return 0; + } + + sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE); + + acb.nr_pending++; + aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id), + 0, 0, 0, false, 0, 0); + add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type); + + if (--acb.nr_pending) { + qemu_coroutine_yield(); + } + + sd_aio_complete(&acb); + return acb.ret; +} + +static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) +{ + Error *local_err = NULL; + BDRVSheepdogState *s = bs->opaque; + int ret, fd; + uint32_t new_vid; + SheepdogInode *inode; + unsigned int datalen; + + trace_sheepdog_snapshot_create_info(sn_info->name, sn_info->id_str, s->name, + sn_info->vm_state_size, s->is_snapshot); + + if (s->is_snapshot) { + error_report("You can't create a snapshot of a snapshot VDI, " + "%s (%" PRIu32 ").", s->name, s->inode.vdi_id); + + return -EINVAL; + } + + trace_sheepdog_snapshot_create(sn_info->name, sn_info->id_str); + + s->inode.vm_state_size = sn_info->vm_state_size; + s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; + /* It appears that inode.tag does not require a NUL terminator, + * which means this use of strncpy is ok. + */ + strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); + /* we don't need to update entire object */ + datalen = SD_INODE_HEADER_SIZE; + inode = g_malloc(datalen); + + /* refresh inode. */ + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + ret = fd; + goto cleanup; + } + + ret = write_object(fd, s->bs, (char *)&s->inode, + vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, + datalen, 0, false, s->cache_flags); + if (ret < 0) { + error_report("failed to write snapshot's inode."); + goto cleanup; + } + + ret = do_sd_create(s, &new_vid, 1, &local_err); + if (ret < 0) { + error_reportf_err(local_err, + "failed to create inode for snapshot: "); + goto cleanup; + } + + ret = read_object(fd, s->bs, (char *)inode, + vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0, + s->cache_flags); + + if (ret < 0) { + error_report("failed to read new inode info. %s", strerror(errno)); + goto cleanup; + } + + memcpy(&s->inode, inode, datalen); + trace_sheepdog_snapshot_create_inode(s->inode.name, s->inode.snap_id, + s->inode.vdi_id); + +cleanup: + g_free(inode); + closesocket(fd); + return ret; +} + +/* + * We implement rollback(loadvm) operation to the specified snapshot by + * 1) switch to the snapshot + * 2) rely on sd_create_branch to delete working VDI and + * 3) create a new working VDI based on the specified snapshot + */ +static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) +{ + BDRVSheepdogState *s = bs->opaque; + BDRVSheepdogState *old_s; + char tag[SD_MAX_VDI_TAG_LEN]; + uint32_t snapid = 0; + int ret; + + if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) { + return -EINVAL; + } + + old_s = g_new(BDRVSheepdogState, 1); + + memcpy(old_s, s, sizeof(BDRVSheepdogState)); + + ret = reload_inode(s, snapid, tag); + if (ret) { + goto out; + } + + ret = sd_create_branch(s); + if (ret) { + goto out; + } + + g_free(old_s); + + return 0; +out: + /* recover bdrv_sd_state */ + memcpy(s, old_s, sizeof(BDRVSheepdogState)); + g_free(old_s); + + error_report("failed to open. recover old bdrv_sd_state."); + + return ret; +} + +#define NR_BATCHED_DISCARD 128 + +static int remove_objects(BDRVSheepdogState *s, Error **errp) +{ + int fd, i = 0, nr_objs = 0; + int ret; + SheepdogInode *inode = &s->inode; + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + nr_objs = count_data_objs(inode); + while (i < nr_objs) { + int start_idx, nr_filled_idx; + + while (i < nr_objs && !inode->data_vdi_id[i]) { + i++; + } + start_idx = i; + + nr_filled_idx = 0; + while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) { + if (inode->data_vdi_id[i]) { + inode->data_vdi_id[i] = 0; + nr_filled_idx++; + } + + i++; + } + + ret = write_object(fd, s->bs, + (char *)&inode->data_vdi_id[start_idx], + vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies, + (i - start_idx) * sizeof(uint32_t), + offsetof(struct SheepdogInode, + data_vdi_id[start_idx]), + false, s->cache_flags); + if (ret < 0) { + error_setg(errp, "Failed to discard snapshot inode"); + goto out; + } + } + + ret = 0; +out: + closesocket(fd); + return ret; +} + +static int sd_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp) +{ + /* + * FIXME should delete the snapshot matching both @snapshot_id and + * @name, but @name not used here + */ + unsigned long snap_id = 0; + char snap_tag[SD_MAX_VDI_TAG_LEN]; + int fd, ret; + char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; + BDRVSheepdogState *s = bs->opaque; + unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0; + uint32_t vid; + SheepdogVdiReq hdr = { + .opcode = SD_OP_DEL_VDI, + .data_length = wlen, + .flags = SD_FLAG_CMD_WRITE, + }; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + + ret = remove_objects(s, errp); + if (ret) { + return ret; + } + + memset(buf, 0, sizeof(buf)); + memset(snap_tag, 0, sizeof(snap_tag)); + pstrcpy(buf, SD_MAX_VDI_LEN, s->name); + /* TODO Use sd_parse_snapid() once this mess is cleaned up */ + ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id); + if (ret || snap_id > UINT32_MAX) { + /* + * FIXME Since qemu_strtoul() returns -EINVAL when + * @snapshot_id is null, @snapshot_id is mandatory. Correct + * would be to require at least one of @snapshot_id and @name. + */ + error_setg(errp, "Invalid snapshot ID: %s", + snapshot_id ? snapshot_id : ""); + return -EINVAL; + } + + if (snap_id) { + hdr.snapid = (uint32_t) snap_id; + } else { + /* FIXME I suspect we should use @name here */ + /* FIXME don't truncate silently */ + pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id); + pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag); + } + + ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp); + if (ret) { + return ret; + } + + fd = connect_to_sdog(s, errp); + if (fd < 0) { + return fd; + } + + ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, + buf, &wlen, &rlen); + closesocket(fd); + if (ret) { + error_setg_errno(errp, -ret, "Couldn't send request to server"); + return ret; + } + + switch (rsp->result) { + case SD_RES_NO_VDI: + error_setg(errp, "Can't find the snapshot"); + return -ENOENT; + case SD_RES_SUCCESS: + break; + default: + error_setg(errp, "%s", sd_strerror(rsp->result)); + return -EIO; + } + + return 0; +} + +static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) +{ + Error *local_err = NULL; + BDRVSheepdogState *s = bs->opaque; + SheepdogReq req; + int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long); + QEMUSnapshotInfo *sn_tab = NULL; + unsigned wlen, rlen; + int found = 0; + SheepdogInode *inode; + unsigned long *vdi_inuse; + unsigned int start_nr; + uint64_t hval; + uint32_t vid; + + vdi_inuse = g_malloc(max); + inode = g_malloc(SD_INODE_HEADER_SIZE); + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + ret = fd; + goto out; + } + + rlen = max; + wlen = 0; + + memset(&req, 0, sizeof(req)); + + req.opcode = SD_OP_READ_VDIS; + req.data_length = max; + + ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen); + + closesocket(fd); + if (ret) { + goto out; + } + + sn_tab = g_new0(QEMUSnapshotInfo, nr); + + /* calculate a vdi id with hash function */ + hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); + start_nr = hval & (SD_NR_VDIS - 1); + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + ret = fd; + goto out; + } + + for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) { + if (!test_bit(vid, vdi_inuse)) { + break; + } + + /* we don't need to read entire object */ + ret = read_object(fd, s->bs, (char *)inode, + vid_to_vdi_oid(vid), + 0, SD_INODE_HEADER_SIZE, 0, + s->cache_flags); + + if (ret) { + continue; + } + + if (!strcmp(inode->name, s->name) && is_snapshot(inode)) { + sn_tab[found].date_sec = inode->snap_ctime >> 32; + sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff; + sn_tab[found].vm_state_size = inode->vm_state_size; + sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec; + + snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), + "%" PRIu32, inode->snap_id); + pstrcpy(sn_tab[found].name, + MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)), + inode->tag); + found++; + } + } + + closesocket(fd); +out: + *psn_tab = sn_tab; + + g_free(vdi_inuse); + g_free(inode); + + if (ret < 0) { + return ret; + } + + return found; +} + +static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, + int64_t pos, int size, int load) +{ + Error *local_err = NULL; + bool create; + int fd, ret = 0, remaining = size; + unsigned int data_len; + uint64_t vmstate_oid; + uint64_t offset; + uint32_t vdi_index; + uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id; + uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift); + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return fd; + } + + while (remaining) { + vdi_index = pos / object_size; + offset = pos % object_size; + + data_len = MIN(remaining, object_size - offset); + + vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index); + + create = (offset == 0); + if (load) { + ret = read_object(fd, s->bs, (char *)data, vmstate_oid, + s->inode.nr_copies, data_len, offset, + s->cache_flags); + } else { + ret = write_object(fd, s->bs, (char *)data, vmstate_oid, + s->inode.nr_copies, data_len, offset, create, + s->cache_flags); + } + + if (ret < 0) { + error_report("failed to save vmstate %s", strerror(errno)); + goto cleanup; + } + + pos += data_len; + data += data_len; + remaining -= data_len; + } + ret = size; +cleanup: + closesocket(fd); + return ret; +} + +static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) +{ + BDRVSheepdogState *s = bs->opaque; + void *buf; + int ret; + + buf = qemu_blockalign(bs, qiov->size); + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); + ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0); + qemu_vfree(buf); + + return ret; +} + +static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) +{ + BDRVSheepdogState *s = bs->opaque; + void *buf; + int ret; + + buf = qemu_blockalign(bs, qiov->size); + ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1); + qemu_iovec_from_buf(qiov, 0, buf, qiov->size); + qemu_vfree(buf); + + return ret; +} + + +static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, + int bytes) +{ + SheepdogAIOCB acb; + BDRVSheepdogState *s = bs->opaque; + QEMUIOVector discard_iov; + struct iovec iov; + uint32_t zero = 0; + + if (!s->discard_supported) { + return 0; + } + + memset(&discard_iov, 0, sizeof(discard_iov)); + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &zero; + iov.iov_len = sizeof(zero); + discard_iov.iov = &iov; + discard_iov.niov = 1; + if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) { + return -ENOTSUP; + } + sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); + sd_co_rw_vector(&acb); + sd_aio_complete(&acb); + + return acb.ret; +} + +static coroutine_fn int +sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVSheepdogState *s = bs->opaque; + SheepdogInode *inode = &s->inode; + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); + unsigned long start = offset / object_size, + end = DIV_ROUND_UP(offset + bytes, object_size); + unsigned long idx; + *map = offset; + int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + + for (idx = start; idx < end; idx++) { + if (inode->data_vdi_id[idx] == 0) { + break; + } + } + if (idx == start) { + /* Get the longest length of unallocated sectors */ + ret = 0; + for (idx = start + 1; idx < end; idx++) { + if (inode->data_vdi_id[idx] != 0) { + break; + } + } + } + + *pnum = (idx - start) * object_size; + if (*pnum > bytes) { + *pnum = bytes; + } + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { + *file = bs; + } + return ret; +} + +static int64_t sd_get_allocated_file_size(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + SheepdogInode *inode = &s->inode; + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); + unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size); + uint64_t size = 0; + + for (i = 0; i < last; i++) { + if (inode->data_vdi_id[i] == 0) { + continue; + } + size += object_size; + } + return size; +} + +static QemuOptsList sd_create_opts = { + .name = "sheepdog-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = QEMU_OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Must be 'sheepdog' if present", + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off, full)" + }, + { + .name = BLOCK_OPT_REDUNDANCY, + .type = QEMU_OPT_STRING, + .help = "Redundancy of the image" + }, + { + .name = BLOCK_OPT_OBJECT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Object size of the image" + }, + { /* end of list */ } + } +}; + +static const char *const sd_strong_runtime_opts[] = { + "vdi", + "snap-id", + "tag", + "server.", + + NULL +}; + +static BlockDriver bdrv_sheepdog = { + .format_name = "sheepdog", + .protocol_name = "sheepdog", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_parse_filename = sd_parse_filename, + .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, + .bdrv_close = sd_close, + .bdrv_co_create = sd_co_create, + .bdrv_co_create_opts = sd_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, + .bdrv_co_truncate = sd_co_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + .bdrv_co_pdiscard = sd_co_pdiscard, + .bdrv_co_block_status = sd_co_block_status, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + + .create_opts = &sd_create_opts, + .strong_runtime_opts = sd_strong_runtime_opts, +}; + +static BlockDriver bdrv_sheepdog_tcp = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+tcp", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_parse_filename = sd_parse_filename, + .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, + .bdrv_close = sd_close, + .bdrv_co_create = sd_co_create, + .bdrv_co_create_opts = sd_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, + .bdrv_co_truncate = sd_co_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + .bdrv_co_pdiscard = sd_co_pdiscard, + .bdrv_co_block_status = sd_co_block_status, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + + .create_opts = &sd_create_opts, + .strong_runtime_opts = sd_strong_runtime_opts, +}; + +static BlockDriver bdrv_sheepdog_unix = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+unix", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_parse_filename = sd_parse_filename, + .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, + .bdrv_close = sd_close, + .bdrv_co_create = sd_co_create, + .bdrv_co_create_opts = sd_co_create_opts, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, + .bdrv_co_truncate = sd_co_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + .bdrv_co_pdiscard = sd_co_pdiscard, + .bdrv_co_block_status = sd_co_block_status, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .bdrv_detach_aio_context = sd_detach_aio_context, + .bdrv_attach_aio_context = sd_attach_aio_context, + + .create_opts = &sd_create_opts, + .strong_runtime_opts = sd_strong_runtime_opts, +}; + +static void bdrv_sheepdog_init(void) +{ + bdrv_register(&bdrv_sheepdog); + bdrv_register(&bdrv_sheepdog_tcp); + bdrv_register(&bdrv_sheepdog_unix); +} +block_init(bdrv_sheepdog_init); diff --git a/block/snapshot.c b/block/snapshot.c new file mode 100644 index 000000000..a2bf3a54e --- /dev/null +++ b/block/snapshot.c @@ -0,0 +1,624 @@ +/* + * Block layer snapshot related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/snapshot.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qstring.h" +#include "qemu/option.h" +#include "sysemu/block-backend.h" + +QemuOptsList internal_snapshot_opts = { + .name = "snapshot", + .head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head), + .desc = { + { + .name = SNAPSHOT_OPT_ID, + .type = QEMU_OPT_STRING, + .help = "snapshot id" + },{ + .name = SNAPSHOT_OPT_NAME, + .type = QEMU_OPT_STRING, + .help = "snapshot name" + },{ + /* end of list */ + } + }, +}; + +int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, + const char *name) +{ + QEMUSnapshotInfo *sn_tab, *sn; + int nb_sns, i, ret; + + ret = -ENOENT; + nb_sns = bdrv_snapshot_list(bs, &sn_tab); + if (nb_sns < 0) { + return ret; + } + for (i = 0; i < nb_sns; i++) { + sn = &sn_tab[i]; + if (!strcmp(sn->name, name)) { + *sn_info = *sn; + ret = 0; + break; + } + } + g_free(sn_tab); + return ret; +} + +/** + * Look up an internal snapshot by @id and @name. + * @bs: block device to search + * @id: unique snapshot ID, or NULL + * @name: snapshot name, or NULL + * @sn_info: location to store information on the snapshot found + * @errp: location to store error, will be set only for exception + * + * This function will traverse snapshot list in @bs to search the matching + * one, @id and @name are the matching condition: + * If both @id and @name are specified, find the first one with id @id and + * name @name. + * If only @id is specified, find the first one with id @id. + * If only @name is specified, find the first one with name @name. + * if none is specified, abort(). + * + * Returns: true when a snapshot is found and @sn_info will be filled, false + * when error or not found. If all operation succeed but no matching one is + * found, @errp will NOT be set. + */ +bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, + const char *id, + const char *name, + QEMUSnapshotInfo *sn_info, + Error **errp) +{ + QEMUSnapshotInfo *sn_tab, *sn; + int nb_sns, i; + bool ret = false; + + assert(id || name); + + nb_sns = bdrv_snapshot_list(bs, &sn_tab); + if (nb_sns < 0) { + error_setg_errno(errp, -nb_sns, "Failed to get a snapshot list"); + return false; + } else if (nb_sns == 0) { + return false; + } + + if (id && name) { + for (i = 0; i < nb_sns; i++) { + sn = &sn_tab[i]; + if (!strcmp(sn->id_str, id) && !strcmp(sn->name, name)) { + *sn_info = *sn; + ret = true; + break; + } + } + } else if (id) { + for (i = 0; i < nb_sns; i++) { + sn = &sn_tab[i]; + if (!strcmp(sn->id_str, id)) { + *sn_info = *sn; + ret = true; + break; + } + } + } else if (name) { + for (i = 0; i < nb_sns; i++) { + sn = &sn_tab[i]; + if (!strcmp(sn->name, name)) { + *sn_info = *sn; + ret = true; + break; + } + } + } + + g_free(sn_tab); + return ret; +} + +/** + * Return a pointer to the child BDS pointer to which we can fall + * back if the given BDS does not support snapshots. + * Return NULL if there is no BDS to (safely) fall back to. + * + * We need to return an indirect pointer because bdrv_snapshot_goto() + * has to modify the BdrvChild pointer. + */ +static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) +{ + BdrvChild **fallback; + BdrvChild *child; + + /* + * The only BdrvChild pointers that are safe to modify (and which + * we can thus return a reference to) are bs->file and + * bs->backing. + */ + fallback = &bs->file; + if (!*fallback && bs->drv && bs->drv->is_filter) { + fallback = &bs->backing; + } + + if (!*fallback) { + return NULL; + } + + /* + * Check that there are no other children that would need to be + * snapshotted. If there are, it is not safe to fall back to + * *fallback. + */ + QLIST_FOREACH(child, &bs->children, next) { + if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | + BDRV_CHILD_FILTERED) && + child != *fallback) + { + return NULL; + } + } + + return fallback; +} + +static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) +{ + BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs); + return child_ptr ? (*child_ptr)->bs : NULL; +} + +int bdrv_can_snapshot(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { + return 0; + } + + if (!drv->bdrv_snapshot_create) { + BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + if (fallback_bs) { + return bdrv_can_snapshot(fallback_bs); + } + return 0; + } + + return 1; +} + +int bdrv_snapshot_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_snapshot_create) { + return drv->bdrv_snapshot_create(bs, sn_info); + } + if (fallback_bs) { + return bdrv_snapshot_create(fallback_bs, sn_info); + } + return -ENOTSUP; +} + +int bdrv_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id, + Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild **fallback_ptr; + int ret, open_ret; + + if (!drv) { + error_setg(errp, "Block driver is closed"); + return -ENOMEDIUM; + } + + if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + error_setg(errp, "Device has active dirty bitmaps"); + return -EBUSY; + } + + if (drv->bdrv_snapshot_goto) { + ret = drv->bdrv_snapshot_goto(bs, snapshot_id); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to load snapshot"); + } + return ret; + } + + fallback_ptr = bdrv_snapshot_fallback_ptr(bs); + if (fallback_ptr) { + QDict *options; + QDict *file_options; + Error *local_err = NULL; + BlockDriverState *fallback_bs = (*fallback_ptr)->bs; + char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name); + + options = qdict_clone_shallow(bs->options); + + /* Prevent it from getting deleted when detached from bs */ + bdrv_ref(fallback_bs); + + qdict_extract_subqdict(options, &file_options, subqdict_prefix); + qobject_unref(file_options); + g_free(subqdict_prefix); + + qdict_put_str(options, (*fallback_ptr)->name, + bdrv_get_node_name(fallback_bs)); + + if (drv->bdrv_close) { + drv->bdrv_close(bs); + } + + bdrv_unref_child(bs, *fallback_ptr); + *fallback_ptr = NULL; + + ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); + open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); + qobject_unref(options); + if (open_ret < 0) { + bdrv_unref(fallback_bs); + bs->drv = NULL; + /* A bdrv_snapshot_goto() error takes precedence */ + error_propagate(errp, local_err); + return ret < 0 ? ret : open_ret; + } + + assert(fallback_bs == (*fallback_ptr)->bs); + bdrv_unref(fallback_bs); + return ret; + } + + error_setg(errp, "Block driver does not support snapshots"); + return -ENOTSUP; +} + +/** + * Delete an internal snapshot by @snapshot_id and @name. + * @bs: block device used in the operation + * @snapshot_id: unique snapshot ID, or NULL + * @name: snapshot name, or NULL + * @errp: location to store error + * + * If both @snapshot_id and @name are specified, delete the first one with + * id @snapshot_id and name @name. + * If only @snapshot_id is specified, delete the first one with id + * @snapshot_id. + * If only @name is specified, delete the first one with name @name. + * if none is specified, return -EINVAL. + * + * Returns: 0 on success, -errno on failure. If @bs is not inserted, return + * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs + * does not support internal snapshot deletion, return -ENOTSUP. If @bs does + * not support parameter @snapshot_id or @name, or one of them is not correctly + * specified, return -EINVAL. If @bs can't find one matching @id and @name, + * return -ENOENT. If @errp != NULL, it will always be filled with error + * message on failure. + */ +int bdrv_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + int ret; + + if (!drv) { + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); + return -ENOMEDIUM; + } + if (!snapshot_id && !name) { + error_setg(errp, "snapshot_id and name are both NULL"); + return -EINVAL; + } + + /* drain all pending i/o before deleting snapshot */ + bdrv_drained_begin(bs); + + if (drv->bdrv_snapshot_delete) { + ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); + } else if (fallback_bs) { + ret = bdrv_snapshot_delete(fallback_bs, snapshot_id, name, errp); + } else { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshot deletion", + drv->format_name, bdrv_get_device_name(bs)); + ret = -ENOTSUP; + } + + bdrv_drained_end(bs); + return ret; +} + +int bdrv_snapshot_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info) +{ + BlockDriver *drv = bs->drv; + BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_snapshot_list) { + return drv->bdrv_snapshot_list(bs, psn_info); + } + if (fallback_bs) { + return bdrv_snapshot_list(fallback_bs, psn_info); + } + return -ENOTSUP; +} + +/** + * Temporarily load an internal snapshot by @snapshot_id and @name. + * @bs: block device used in the operation + * @snapshot_id: unique snapshot ID, or NULL + * @name: snapshot name, or NULL + * @errp: location to store error + * + * If both @snapshot_id and @name are specified, load the first one with + * id @snapshot_id and name @name. + * If only @snapshot_id is specified, load the first one with id + * @snapshot_id. + * If only @name is specified, load the first one with name @name. + * if none is specified, return -EINVAL. + * + * Returns: 0 on success, -errno on fail. If @bs is not inserted, return + * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support + * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and + * @name, return -ENOENT. If @errp != NULL, it will always be filled on + * failure. + */ +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); + return -ENOMEDIUM; + } + if (!snapshot_id && !name) { + error_setg(errp, "snapshot_id and name are both NULL"); + return -EINVAL; + } + if (!bs->read_only) { + error_setg(errp, "Device is not readonly"); + return -EINVAL; + } + if (drv->bdrv_snapshot_load_tmp) { + return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp); + } + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support temporarily loading internal snapshots", + drv->format_name, bdrv_get_device_name(bs)); + return -ENOTSUP; +} + +int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp) +{ + int ret; + Error *local_err = NULL; + + ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err); + if (ret == -ENOENT || ret == -EINVAL) { + error_free(local_err); + local_err = NULL; + ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err); + } + + error_propagate(errp, local_err); + + return ret; +} + +static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) +{ + if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { + return false; + } + + /* Include all nodes that are either in use by a BlockBackend, or that + * aren't attached to any node, but owned by the monitor. */ + return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); +} + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers) */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) +{ + bool ok = true; + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_all_snapshots_includes_bs(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); + if (!ok) { + bdrv_next_cleanup(&it); + goto fail; + } + } + +fail: + *first_bad_bs = bs; + return ok; +} + +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **errp) +{ + int ret = 0; + BlockDriverState *bs; + BdrvNextIterator it; + QEMUSnapshotInfo sn1, *snapshot = &sn1; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_all_snapshots_includes_bs(bs) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) + { + ret = bdrv_snapshot_delete(bs, snapshot->id_str, + snapshot->name, errp); + } + aio_context_release(ctx); + if (ret < 0) { + bdrv_next_cleanup(&it); + goto fail; + } + } + +fail: + *first_bad_bs = bs; + return ret; +} + + +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **errp) +{ + int ret = 0; + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_goto(bs, name, errp); + } + aio_context_release(ctx); + if (ret < 0) { + bdrv_next_cleanup(&it); + goto fail; + } + } + +fail: + *first_bad_bs = bs; + return ret; +} + +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + QEMUSnapshotInfo sn; + int err = 0; + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_all_snapshots_includes_bs(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); + if (err < 0) { + bdrv_next_cleanup(&it); + goto fail; + } + } + +fail: + *first_bad_bs = bs; + return err; +} + +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); + } else if (bdrv_all_snapshots_includes_bs(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } + aio_context_release(ctx); + if (err < 0) { + bdrv_next_cleanup(&it); + goto fail; + } + } + +fail: + *first_bad_bs = bs; + return err; +} + +BlockDriverState *bdrv_all_find_vmstate_bs(void) +{ + BlockDriverState *bs; + BdrvNextIterator it; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *ctx = bdrv_get_aio_context(bs); + bool found; + + aio_context_acquire(ctx); + found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs); + aio_context_release(ctx); + + if (found) { + bdrv_next_cleanup(&it); + break; + } + } + return bs; +} diff --git a/block/ssh.c b/block/ssh.c new file mode 100644 index 000000000..ebe3d8b63 --- /dev/null +++ b/block/ssh.c @@ -0,0 +1,1423 @@ +/* + * Secure Shell (ssh) backend for QEMU. + * + * Copyright (C) 2013 Red Hat Inc., Richard W.M. Jones + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include +#include + +#include "block/block_int.h" +#include "block/qdict.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/ctype.h" +#include "qemu/cutils.h" +#include "qemu/sockets.h" +#include "qemu/uri.h" +#include "qapi/qapi-visit-sockets.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" +#include "trace.h" + +/* + * TRACE_LIBSSH= enables tracing in libssh itself. + * The meaning of is described here: + * http://api.libssh.org/master/group__libssh__log.html + */ +#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */ + +typedef struct BDRVSSHState { + /* Coroutine. */ + CoMutex lock; + + /* SSH connection. */ + int sock; /* socket */ + ssh_session session; /* ssh session */ + sftp_session sftp; /* sftp session */ + sftp_file sftp_handle; /* sftp remote file handle */ + + /* + * File attributes at open. We try to keep the .size field + * updated if it changes (eg by writing at the end of the file). + */ + sftp_attributes attrs; + + InetSocketAddress *inet; + + /* Used to warn if 'flush' is not supported. */ + bool unsafe_flush_warning; + + /* + * Store the user name for ssh_refresh_filename() because the + * default depends on the system you are on -- therefore, when we + * generate a filename, it should always contain the user name we + * are actually using. + */ + char *user; +} BDRVSSHState; + +static void ssh_state_init(BDRVSSHState *s) +{ + memset(s, 0, sizeof *s); + s->sock = -1; + qemu_co_mutex_init(&s->lock); +} + +static void ssh_state_free(BDRVSSHState *s) +{ + g_free(s->user); + + if (s->attrs) { + sftp_attributes_free(s->attrs); + } + if (s->sftp_handle) { + sftp_close(s->sftp_handle); + } + if (s->sftp) { + sftp_free(s->sftp); + } + if (s->session) { + ssh_disconnect(s->session); + ssh_free(s->session); /* This frees s->sock */ + } +} + +static void GCC_FMT_ATTR(3, 4) +session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...) +{ + va_list args; + char *msg; + + va_start(args, fs); + msg = g_strdup_vprintf(fs, args); + va_end(args); + + if (s->session) { + const char *ssh_err; + int ssh_err_code; + + /* This is not an errno. See . */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + error_setg(errp, "%s: %s (libssh error code: %d)", + msg, ssh_err, ssh_err_code); + } else { + error_setg(errp, "%s", msg); + } + g_free(msg); +} + +static void GCC_FMT_ATTR(3, 4) +sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...) +{ + va_list args; + char *msg; + + va_start(args, fs); + msg = g_strdup_vprintf(fs, args); + va_end(args); + + if (s->sftp) { + const char *ssh_err; + int ssh_err_code; + int sftp_err_code; + + /* This is not an errno. See . */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + /* See . */ + sftp_err_code = sftp_get_error(s->sftp); + + error_setg(errp, + "%s: %s (libssh error code: %d, sftp error code: %d)", + msg, ssh_err, ssh_err_code, sftp_err_code); + } else { + error_setg(errp, "%s", msg); + } + g_free(msg); +} + +static void sftp_error_trace(BDRVSSHState *s, const char *op) +{ + const char *ssh_err; + int ssh_err_code; + int sftp_err_code; + + /* This is not an errno. See . */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + /* See . */ + sftp_err_code = sftp_get_error(s->sftp); + + trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code); +} + +static int parse_uri(const char *filename, QDict *options, Error **errp) +{ + URI *uri = NULL; + QueryParams *qp; + char *port_str; + int i; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + if (g_strcmp0(uri->scheme, "ssh") != 0) { + error_setg(errp, "URI scheme must be 'ssh'"); + goto err; + } + + if (!uri->server || strcmp(uri->server, "") == 0) { + error_setg(errp, "missing hostname in URI"); + goto err; + } + + if (!uri->path || strcmp(uri->path, "") == 0) { + error_setg(errp, "missing remote path in URI"); + goto err; + } + + qp = query_params_parse(uri->query); + if (!qp) { + error_setg(errp, "could not parse query parameters"); + goto err; + } + + if(uri->user && strcmp(uri->user, "") != 0) { + qdict_put_str(options, "user", uri->user); + } + + qdict_put_str(options, "server.host", uri->server); + + port_str = g_strdup_printf("%d", uri->port ?: 22); + qdict_put_str(options, "server.port", port_str); + g_free(port_str); + + qdict_put_str(options, "path", uri->path); + + /* Pick out any query parameters that we understand, and ignore + * the rest. + */ + for (i = 0; i < qp->n; ++i) { + if (strcmp(qp->p[i].name, "host_key_check") == 0) { + qdict_put_str(options, "host_key_check", qp->p[i].value); + } + } + + query_params_free(qp); + uri_free(uri); + return 0; + + err: + if (uri) { + uri_free(uri); + } + return -EINVAL; +} + +static bool ssh_has_filename_options_conflict(QDict *options, Error **errp) +{ + const QDictEntry *qe; + + for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) { + if (!strcmp(qe->key, "host") || + !strcmp(qe->key, "port") || + !strcmp(qe->key, "path") || + !strcmp(qe->key, "user") || + !strcmp(qe->key, "host_key_check") || + strstart(qe->key, "server.", NULL)) + { + error_setg(errp, "Option '%s' cannot be used with a file name", + qe->key); + return true; + } + } + + return false; +} + +static void ssh_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + if (ssh_has_filename_options_conflict(options, errp)) { + return; + } + + parse_uri(filename, options, errp); +} + +static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp) +{ + int ret; +#ifdef HAVE_LIBSSH_0_8 + enum ssh_known_hosts_e state; + int r; + ssh_key pubkey; + enum ssh_keytypes_e pubkey_type; + unsigned char *server_hash = NULL; + size_t server_hash_len; + char *fingerprint = NULL; + + state = ssh_session_is_known_server(s->session); + trace_ssh_server_status(state); + + switch (state) { + case SSH_KNOWN_HOSTS_OK: + /* OK */ + trace_ssh_check_host_key_knownhosts(); + break; + case SSH_KNOWN_HOSTS_CHANGED: + ret = -EINVAL; + r = ssh_get_server_publickey(s->session, &pubkey); + if (r == 0) { + r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256, + &server_hash, &server_hash_len); + pubkey_type = ssh_key_type(pubkey); + ssh_key_free(pubkey); + } + if (r == 0) { + fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256, + server_hash, + server_hash_len); + ssh_clean_pubkey_hash(&server_hash); + } + if (fingerprint) { + error_setg(errp, + "host key (%s key with fingerprint %s) does not match " + "the one in known_hosts; this may be a possible attack", + ssh_key_type_to_char(pubkey_type), fingerprint); + ssh_string_free_char(fingerprint); + } else { + error_setg(errp, + "host key does not match the one in known_hosts; this " + "may be a possible attack"); + } + goto out; + case SSH_KNOWN_HOSTS_OTHER: + ret = -EINVAL; + error_setg(errp, + "host key for this server not found, another type exists"); + goto out; + case SSH_KNOWN_HOSTS_UNKNOWN: + ret = -EINVAL; + error_setg(errp, "no host key was found in known_hosts"); + goto out; + case SSH_KNOWN_HOSTS_NOT_FOUND: + ret = -ENOENT; + error_setg(errp, "known_hosts file not found"); + goto out; + case SSH_KNOWN_HOSTS_ERROR: + ret = -EINVAL; + error_setg(errp, "error while checking the host"); + goto out; + default: + ret = -EINVAL; + error_setg(errp, "error while checking for known server (%d)", state); + goto out; + } +#else /* !HAVE_LIBSSH_0_8 */ + int state; + + state = ssh_is_server_known(s->session); + trace_ssh_server_status(state); + + switch (state) { + case SSH_SERVER_KNOWN_OK: + /* OK */ + trace_ssh_check_host_key_knownhosts(); + break; + case SSH_SERVER_KNOWN_CHANGED: + ret = -EINVAL; + error_setg(errp, + "host key does not match the one in known_hosts; this " + "may be a possible attack"); + goto out; + case SSH_SERVER_FOUND_OTHER: + ret = -EINVAL; + error_setg(errp, + "host key for this server not found, another type exists"); + goto out; + case SSH_SERVER_FILE_NOT_FOUND: + ret = -ENOENT; + error_setg(errp, "known_hosts file not found"); + goto out; + case SSH_SERVER_NOT_KNOWN: + ret = -EINVAL; + error_setg(errp, "no host key was found in known_hosts"); + goto out; + case SSH_SERVER_ERROR: + ret = -EINVAL; + error_setg(errp, "server error"); + goto out; + default: + ret = -EINVAL; + error_setg(errp, "error while checking for known server (%d)", state); + goto out; + } +#endif /* !HAVE_LIBSSH_0_8 */ + + /* known_hosts checking successful. */ + ret = 0; + + out: + return ret; +} + +static unsigned hex2decimal(char ch) +{ + if (ch >= '0' && ch <= '9') { + return (ch - '0'); + } else if (ch >= 'a' && ch <= 'f') { + return 10 + (ch - 'a'); + } else if (ch >= 'A' && ch <= 'F') { + return 10 + (ch - 'A'); + } + + return -1; +} + +/* Compare the binary fingerprint (hash of host key) with the + * host_key_check parameter. + */ +static int compare_fingerprint(const unsigned char *fingerprint, size_t len, + const char *host_key_check) +{ + unsigned c; + + while (len > 0) { + while (*host_key_check == ':') + host_key_check++; + if (!qemu_isxdigit(host_key_check[0]) || + !qemu_isxdigit(host_key_check[1])) + return 1; + c = hex2decimal(host_key_check[0]) * 16 + + hex2decimal(host_key_check[1]); + if (c - *fingerprint != 0) + return c - *fingerprint; + fingerprint++; + len--; + host_key_check += 2; + } + return *host_key_check - '\0'; +} + +static int +check_host_key_hash(BDRVSSHState *s, const char *hash, + enum ssh_publickey_hash_type type, Error **errp) +{ + int r; + ssh_key pubkey; + unsigned char *server_hash; + size_t server_hash_len; + +#ifdef HAVE_LIBSSH_0_8 + r = ssh_get_server_publickey(s->session, &pubkey); +#else + r = ssh_get_publickey(s->session, &pubkey); +#endif + if (r != SSH_OK) { + session_error_setg(errp, s, "failed to read remote host key"); + return -EINVAL; + } + + r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len); + ssh_key_free(pubkey); + if (r != 0) { + session_error_setg(errp, s, + "failed reading the hash of the server SSH key"); + return -EINVAL; + } + + r = compare_fingerprint(server_hash, server_hash_len, hash); + ssh_clean_pubkey_hash(&server_hash); + if (r != 0) { + error_setg(errp, "remote host key does not match host_key_check '%s'", + hash); + return -EPERM; + } + + return 0; +} + +static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp) +{ + SshHostKeyCheckMode mode; + + if (hkc) { + mode = hkc->mode; + } else { + mode = SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS; + } + + switch (mode) { + case SSH_HOST_KEY_CHECK_MODE_NONE: + return 0; + case SSH_HOST_KEY_CHECK_MODE_HASH: + if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) { + return check_host_key_hash(s, hkc->u.hash.hash, + SSH_PUBLICKEY_HASH_MD5, errp); + } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) { + return check_host_key_hash(s, hkc->u.hash.hash, + SSH_PUBLICKEY_HASH_SHA1, errp); + } + g_assert_not_reached(); + break; + case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS: + return check_host_key_knownhosts(s, errp); + default: + g_assert_not_reached(); + } + + return -EINVAL; +} + +static int authenticate(BDRVSSHState *s, Error **errp) +{ + int r, ret; + int method; + + /* Try to authenticate with the "none" method. */ + r = ssh_userauth_none(s->session, NULL); + if (r == SSH_AUTH_ERROR) { + ret = -EPERM; + session_error_setg(errp, s, "failed to authenticate using none " + "authentication"); + goto out; + } else if (r == SSH_AUTH_SUCCESS) { + /* Authenticated! */ + ret = 0; + goto out; + } + + method = ssh_userauth_list(s->session, NULL); + trace_ssh_auth_methods(method); + + /* + * Try to authenticate with publickey, using the ssh-agent + * if available. + */ + if (method & SSH_AUTH_METHOD_PUBLICKEY) { + r = ssh_userauth_publickey_auto(s->session, NULL, NULL); + if (r == SSH_AUTH_ERROR) { + ret = -EINVAL; + session_error_setg(errp, s, "failed to authenticate using " + "publickey authentication"); + goto out; + } else if (r == SSH_AUTH_SUCCESS) { + /* Authenticated! */ + ret = 0; + goto out; + } + } + + ret = -EPERM; + error_setg(errp, "failed to authenticate using publickey authentication " + "and the identities held by your ssh-agent"); + + out: + return ret; +} + +static QemuOptsList ssh_runtime_opts = { + .name = "ssh", + .head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head), + .desc = { + { + .name = "host", + .type = QEMU_OPT_STRING, + .help = "Host to connect to", + }, + { + .name = "port", + .type = QEMU_OPT_NUMBER, + .help = "Port to connect to", + }, + { + .name = "host_key_check", + .type = QEMU_OPT_STRING, + .help = "Defines how and what to check the host key against", + }, + { /* end of list */ } + }, +}; + +static bool ssh_process_legacy_options(QDict *output_opts, + QemuOpts *legacy_opts, + Error **errp) +{ + const char *host = qemu_opt_get(legacy_opts, "host"); + const char *port = qemu_opt_get(legacy_opts, "port"); + const char *host_key_check = qemu_opt_get(legacy_opts, "host_key_check"); + + if (!host && port) { + error_setg(errp, "port may not be used without host"); + return false; + } + + if (host) { + qdict_put_str(output_opts, "server.host", host); + qdict_put_str(output_opts, "server.port", port ?: stringify(22)); + } + + if (host_key_check) { + if (strcmp(host_key_check, "no") == 0) { + qdict_put_str(output_opts, "host-key-check.mode", "none"); + } else if (strncmp(host_key_check, "md5:", 4) == 0) { + qdict_put_str(output_opts, "host-key-check.mode", "hash"); + qdict_put_str(output_opts, "host-key-check.type", "md5"); + qdict_put_str(output_opts, "host-key-check.hash", + &host_key_check[4]); + } else if (strncmp(host_key_check, "sha1:", 5) == 0) { + qdict_put_str(output_opts, "host-key-check.mode", "hash"); + qdict_put_str(output_opts, "host-key-check.type", "sha1"); + qdict_put_str(output_opts, "host-key-check.hash", + &host_key_check[5]); + } else if (strcmp(host_key_check, "yes") == 0) { + qdict_put_str(output_opts, "host-key-check.mode", "known_hosts"); + } else { + error_setg(errp, "unknown host_key_check setting (%s)", + host_key_check); + return false; + } + } + + return true; +} + +static BlockdevOptionsSsh *ssh_parse_options(QDict *options, Error **errp) +{ + BlockdevOptionsSsh *result = NULL; + QemuOpts *opts = NULL; + const QDictEntry *e; + Visitor *v; + + /* Translate legacy options */ + opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto fail; + } + + if (!ssh_process_legacy_options(options, opts, errp)) { + goto fail; + } + + /* Create the QAPI object */ + v = qobject_input_visitor_new_flat_confused(options, errp); + if (!v) { + goto fail; + } + + visit_type_BlockdevOptionsSsh(v, NULL, &result, errp); + visit_free(v); + if (!result) { + goto fail; + } + + /* Remove the processed options from the QDict (the visitor processes + * _all_ options in the QDict) */ + while ((e = qdict_first(options))) { + qdict_del(options, e->key); + } + +fail: + qemu_opts_del(opts); + return result; +} + +static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts, + int ssh_flags, int creat_mode, Error **errp) +{ + int r, ret; + unsigned int port = 0; + int new_sock = -1; + + if (opts->has_user) { + s->user = g_strdup(opts->user); + } else { + s->user = g_strdup(g_get_user_name()); + if (!s->user) { + error_setg_errno(errp, errno, "Can't get user name"); + ret = -errno; + goto err; + } + } + + /* Pop the config into our state object, Exit if invalid */ + s->inet = opts->server; + opts->server = NULL; + + if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) { + error_setg(errp, "Use only numeric port value"); + ret = -EINVAL; + goto err; + } + + /* Open the socket and connect. */ + new_sock = inet_connect_saddr(s->inet, errp); + if (new_sock < 0) { + ret = -EIO; + goto err; + } + + /* + * Try to disable the Nagle algorithm on TCP sockets to reduce latency, + * but do not fail if it cannot be disabled. + */ + r = socket_set_nodelay(new_sock); + if (r < 0) { + warn_report("can't set TCP_NODELAY for the ssh server %s: %s", + s->inet->host, strerror(errno)); + } + + /* Create SSH session. */ + s->session = ssh_new(); + if (!s->session) { + ret = -EINVAL; + session_error_setg(errp, s, "failed to initialize libssh session"); + goto err; + } + + /* + * Make sure we are in blocking mode during the connection and + * authentication phases. + */ + ssh_set_blocking(s->session, 1); + + r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the user in the libssh session"); + goto err; + } + + r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the host in the libssh session"); + goto err; + } + + if (port > 0) { + r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the port in the libssh session"); + goto err; + } + } + + r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none"); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to disable the compression in the libssh " + "session"); + goto err; + } + + /* Read ~/.ssh/config. */ + r = ssh_options_parse_config(s->session, NULL); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, "failed to parse ~/.ssh/config"); + goto err; + } + + r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the socket in the libssh session"); + goto err; + } + /* libssh took ownership of the socket. */ + s->sock = new_sock; + new_sock = -1; + + /* Connect. */ + r = ssh_connect(s->session); + if (r != SSH_OK) { + ret = -EINVAL; + session_error_setg(errp, s, "failed to establish SSH session"); + goto err; + } + + /* Check the remote host's key against known_hosts. */ + ret = check_host_key(s, opts->host_key_check, errp); + if (ret < 0) { + goto err; + } + + /* Authenticate. */ + ret = authenticate(s, errp); + if (ret < 0) { + goto err; + } + + /* Start SFTP. */ + s->sftp = sftp_new(s->session); + if (!s->sftp) { + session_error_setg(errp, s, "failed to create sftp handle"); + ret = -EINVAL; + goto err; + } + + r = sftp_init(s->sftp); + if (r < 0) { + sftp_error_setg(errp, s, "failed to initialize sftp handle"); + ret = -EINVAL; + goto err; + } + + /* Open the remote file. */ + trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode); + s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode); + if (!s->sftp_handle) { + sftp_error_setg(errp, s, "failed to open remote file '%s'", + opts->path); + ret = -EINVAL; + goto err; + } + + /* Make sure the SFTP file is handled in blocking mode. */ + sftp_file_set_blocking(s->sftp_handle); + + s->attrs = sftp_fstat(s->sftp_handle); + if (!s->attrs) { + sftp_error_setg(errp, s, "failed to read file attributes"); + return -EINVAL; + } + + return 0; + + err: + if (s->attrs) { + sftp_attributes_free(s->attrs); + } + s->attrs = NULL; + if (s->sftp_handle) { + sftp_close(s->sftp_handle); + } + s->sftp_handle = NULL; + if (s->sftp) { + sftp_free(s->sftp); + } + s->sftp = NULL; + if (s->session) { + ssh_disconnect(s->session); + ssh_free(s->session); + } + s->session = NULL; + s->sock = -1; + if (new_sock >= 0) { + close(new_sock); + } + + return ret; +} + +static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, + Error **errp) +{ + BDRVSSHState *s = bs->opaque; + BlockdevOptionsSsh *opts; + int ret; + int ssh_flags; + + ssh_state_init(s); + + ssh_flags = 0; + if (bdrv_flags & BDRV_O_RDWR) { + ssh_flags |= O_RDWR; + } else { + ssh_flags |= O_RDONLY; + } + + opts = ssh_parse_options(options, errp); + if (opts == NULL) { + return -EINVAL; + } + + /* Start up SSH. */ + ret = connect_to_ssh(s, opts, ssh_flags, 0, errp); + if (ret < 0) { + goto err; + } + + /* Go non-blocking. */ + ssh_set_blocking(s->session, 0); + + if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) { + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } + + qapi_free_BlockdevOptionsSsh(opts); + + return 0; + + err: + qapi_free_BlockdevOptionsSsh(opts); + + return ret; +} + +/* Note: This is a blocking operation */ +static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp) +{ + ssize_t ret; + char c[1] = { '\0' }; + int was_blocking = ssh_is_blocking(s->session); + + /* offset must be strictly greater than the current size so we do + * not overwrite anything */ + assert(offset > 0 && offset > s->attrs->size); + + ssh_set_blocking(s->session, 1); + + sftp_seek64(s->sftp_handle, offset - 1); + ret = sftp_write(s->sftp_handle, c, 1); + + ssh_set_blocking(s->session, was_blocking); + + if (ret < 0) { + sftp_error_setg(errp, s, "Failed to grow file"); + return -EIO; + } + + s->attrs->size = offset; + return 0; +} + +static QemuOptsList ssh_create_opts = { + .name = "ssh-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { /* end of list */ } + } +}; + +static int ssh_co_create(BlockdevCreateOptions *options, Error **errp) +{ + BlockdevCreateOptionsSsh *opts = &options->u.ssh; + BDRVSSHState s; + int ret; + + assert(options->driver == BLOCKDEV_DRIVER_SSH); + + ssh_state_init(&s); + + ret = connect_to_ssh(&s, opts->location, + O_RDWR | O_CREAT | O_TRUNC, + 0644, errp); + if (ret < 0) { + goto fail; + } + + if (opts->size > 0) { + ret = ssh_grow_file(&s, opts->size, errp); + if (ret < 0) { + goto fail; + } + } + + ret = 0; +fail: + ssh_state_free(&s); + return ret; +} + +static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options; + BlockdevCreateOptionsSsh *ssh_opts; + int ret; + QDict *uri_options = NULL; + + create_options = g_new0(BlockdevCreateOptions, 1); + create_options->driver = BLOCKDEV_DRIVER_SSH; + ssh_opts = &create_options->u.ssh; + + /* Get desired file size. */ + ssh_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + trace_ssh_co_create_opts(ssh_opts->size); + + uri_options = qdict_new(); + ret = parse_uri(filename, uri_options, errp); + if (ret < 0) { + goto out; + } + + ssh_opts->location = ssh_parse_options(uri_options, errp); + if (ssh_opts->location == NULL) { + ret = -EINVAL; + goto out; + } + + ret = ssh_co_create(create_options, errp); + + out: + qobject_unref(uri_options); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +static void ssh_close(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + + ssh_state_free(s); +} + +static int ssh_has_zero_init(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + /* Assume false, unless we can positively prove it's true. */ + int has_zero_init = 0; + + if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) { + has_zero_init = 1; + } + + return has_zero_init; +} + +typedef struct BDRVSSHRestart { + BlockDriverState *bs; + Coroutine *co; +} BDRVSSHRestart; + +static void restart_coroutine(void *opaque) +{ + BDRVSSHRestart *restart = opaque; + BlockDriverState *bs = restart->bs; + BDRVSSHState *s = bs->opaque; + AioContext *ctx = bdrv_get_aio_context(bs); + + trace_ssh_restart_coroutine(restart->co); + aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL); + + aio_co_wake(restart->co); +} + +/* A non-blocking call returned EAGAIN, so yield, ensuring the + * handlers are set up so that we'll be rescheduled when there is an + * interesting event on the socket. + */ +static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) +{ + int r; + IOHandler *rd_handler = NULL, *wr_handler = NULL; + BDRVSSHRestart restart = { + .bs = bs, + .co = qemu_coroutine_self() + }; + + r = ssh_get_poll_flags(s->session); + + if (r & SSH_READ_PENDING) { + rd_handler = restart_coroutine; + } + if (r & SSH_WRITE_PENDING) { + wr_handler = restart_coroutine; + } + + trace_ssh_co_yield(s->sock, rd_handler, wr_handler); + + aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, + false, rd_handler, wr_handler, NULL, &restart); + qemu_coroutine_yield(); + trace_ssh_co_yield_back(s->sock); +} + +static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs, + int64_t offset, size_t size, + QEMUIOVector *qiov) +{ + ssize_t r; + size_t got; + char *buf, *end_of_vec; + struct iovec *i; + + trace_ssh_read(offset, size); + + trace_ssh_seek(offset); + sftp_seek64(s->sftp_handle, offset); + + /* This keeps track of the current iovec element ('i'), where we + * will write to next ('buf'), and the end of the current iovec + * ('end_of_vec'). + */ + i = &qiov->iov[0]; + buf = i->iov_base; + end_of_vec = i->iov_base + i->iov_len; + + for (got = 0; got < size; ) { + size_t request_read_size; + again: + /* + * The size of SFTP packets is limited to 32K bytes, so limit + * the amount of data requested to 16K, as libssh currently + * does not handle multiple requests on its own. + */ + request_read_size = MIN(end_of_vec - buf, 16384); + trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size); + r = sftp_read(s->sftp_handle, buf, request_read_size); + trace_ssh_read_return(r, sftp_get_error(s->sftp)); + + if (r == SSH_AGAIN) { + co_yield(s, bs); + goto again; + } + if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) { + /* EOF: Short read so pad the buffer with zeroes and return it. */ + qemu_iovec_memset(qiov, got, 0, size - got); + return 0; + } + if (r <= 0) { + sftp_error_trace(s, "read"); + return -EIO; + } + + got += r; + buf += r; + if (buf >= end_of_vec && got < size) { + i++; + buf = i->iov_base; + end_of_vec = i->iov_base + i->iov_len; + } + } + + return 0; +} + +static coroutine_fn int ssh_co_readv(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVSSHState *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->lock); + ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE, qiov); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static int ssh_write(BDRVSSHState *s, BlockDriverState *bs, + int64_t offset, size_t size, + QEMUIOVector *qiov) +{ + ssize_t r; + size_t written; + char *buf, *end_of_vec; + struct iovec *i; + + trace_ssh_write(offset, size); + + trace_ssh_seek(offset); + sftp_seek64(s->sftp_handle, offset); + + /* This keeps track of the current iovec element ('i'), where we + * will read from next ('buf'), and the end of the current iovec + * ('end_of_vec'). + */ + i = &qiov->iov[0]; + buf = i->iov_base; + end_of_vec = i->iov_base + i->iov_len; + + for (written = 0; written < size; ) { + size_t request_write_size; + again: + /* + * Avoid too large data packets, as libssh currently does not + * handle multiple requests on its own. + */ + request_write_size = MIN(end_of_vec - buf, 131072); + trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size); + r = sftp_write(s->sftp_handle, buf, request_write_size); + trace_ssh_write_return(r, sftp_get_error(s->sftp)); + + if (r == SSH_AGAIN) { + co_yield(s, bs); + goto again; + } + if (r < 0) { + sftp_error_trace(s, "write"); + return -EIO; + } + + written += r; + buf += r; + if (buf >= end_of_vec && written < size) { + i++; + buf = i->iov_base; + end_of_vec = i->iov_base + i->iov_len; + } + + if (offset + written > s->attrs->size) { + s->attrs->size = offset + written; + } + } + + return 0; +} + +static coroutine_fn int ssh_co_writev(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, + int flags) +{ + BDRVSSHState *s = bs->opaque; + int ret; + + assert(!flags); + qemu_co_mutex_lock(&s->lock); + ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE, qiov); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static void unsafe_flush_warning(BDRVSSHState *s, const char *what) +{ + if (!s->unsafe_flush_warning) { + warn_report("ssh server %s does not support fsync", + s->inet->host); + if (what) { + error_report("to support fsync, you need %s", what); + } + s->unsafe_flush_warning = true; + } +} + +#ifdef HAVE_LIBSSH_0_8 + +static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs) +{ + int r; + + trace_ssh_flush(); + + if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) { + unsafe_flush_warning(s, "OpenSSH >= 6.3"); + return 0; + } + again: + r = sftp_fsync(s->sftp_handle); + if (r == SSH_AGAIN) { + co_yield(s, bs); + goto again; + } + if (r < 0) { + sftp_error_trace(s, "fsync"); + return -EIO; + } + + return 0; +} + +static coroutine_fn int ssh_co_flush(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->lock); + ret = ssh_flush(s, bs); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +#else /* !HAVE_LIBSSH_0_8 */ + +static coroutine_fn int ssh_co_flush(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + + unsafe_flush_warning(s, "libssh >= 0.8.0"); + return 0; +} + +#endif /* !HAVE_LIBSSH_0_8 */ + +static int64_t ssh_getlength(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + int64_t length; + + /* Note we cannot make a libssh call here. */ + length = (int64_t) s->attrs->size; + trace_ssh_getlength(length); + + return length; +} + +static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) +{ + BDRVSSHState *s = bs->opaque; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (offset < s->attrs->size) { + error_setg(errp, "ssh driver does not support shrinking files"); + return -ENOTSUP; + } + + if (offset == s->attrs->size) { + return 0; + } + + return ssh_grow_file(s, offset, errp); +} + +static void ssh_refresh_filename(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + const char *path, *host_key_check; + int ret; + + /* + * None of these options can be represented in a plain "host:port" + * format, so if any was given, we have to abort. + */ + if (s->inet->has_ipv4 || s->inet->has_ipv6 || s->inet->has_to || + s->inet->has_numeric) + { + return; + } + + path = qdict_get_try_str(bs->full_open_options, "path"); + assert(path); /* mandatory option */ + + host_key_check = qdict_get_try_str(bs->full_open_options, "host_key_check"); + + ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "ssh://%s@%s:%s%s%s%s", + s->user, s->inet->host, s->inet->port, path, + host_key_check ? "?host_key_check=" : "", + host_key_check ?: ""); + if (ret >= sizeof(bs->exact_filename)) { + /* An overflow makes the filename unusable, so do not report any */ + bs->exact_filename[0] = '\0'; + } +} + +static char *ssh_bdrv_dirname(BlockDriverState *bs, Error **errp) +{ + if (qdict_haskey(bs->full_open_options, "host_key_check")) { + /* + * We cannot generate a simple prefix if we would have to + * append a query string. + */ + error_setg(errp, + "Cannot generate a base directory with host_key_check set"); + return NULL; + } + + if (bs->exact_filename[0] == '\0') { + error_setg(errp, "Cannot generate a base directory for this ssh node"); + return NULL; + } + + return path_combine(bs->exact_filename, ""); +} + +static const char *const ssh_strong_runtime_opts[] = { + "host", + "port", + "path", + "user", + "host_key_check", + "server.", + + NULL +}; + +static BlockDriver bdrv_ssh = { + .format_name = "ssh", + .protocol_name = "ssh", + .instance_size = sizeof(BDRVSSHState), + .bdrv_parse_filename = ssh_parse_filename, + .bdrv_file_open = ssh_file_open, + .bdrv_co_create = ssh_co_create, + .bdrv_co_create_opts = ssh_co_create_opts, + .bdrv_close = ssh_close, + .bdrv_has_zero_init = ssh_has_zero_init, + .bdrv_co_readv = ssh_co_readv, + .bdrv_co_writev = ssh_co_writev, + .bdrv_getlength = ssh_getlength, + .bdrv_co_truncate = ssh_co_truncate, + .bdrv_co_flush_to_disk = ssh_co_flush, + .bdrv_refresh_filename = ssh_refresh_filename, + .bdrv_dirname = ssh_bdrv_dirname, + .create_opts = &ssh_create_opts, + .strong_runtime_opts = ssh_strong_runtime_opts, +}; + +static void bdrv_ssh_init(void) +{ + int r; + + r = ssh_init(); + if (r != 0) { + fprintf(stderr, "libssh initialization failed, %d\n", r); + exit(EXIT_FAILURE); + } + +#if TRACE_LIBSSH != 0 + ssh_set_log_level(TRACE_LIBSSH); +#endif + + bdrv_register(&bdrv_ssh); +} + +block_init(bdrv_ssh_init); diff --git a/block/stream.c b/block/stream.c new file mode 100644 index 000000000..236384f2f --- /dev/null +++ b/block/stream.c @@ -0,0 +1,309 @@ +/* + * Image streaming + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob_int.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/ratelimit.h" +#include "sysemu/block-backend.h" + +enum { + /* + * Maximum chunk size to feed to copy-on-read. This should be + * large enough to process multiple clusters in a single call, so + * that populating contiguous regions of the image is efficient. + */ + STREAM_CHUNK = 512 * 1024, /* in bytes */ +}; + +typedef struct StreamBlockJob { + BlockJob common; + BlockDriverState *base_overlay; /* COW overlay (stream from this) */ + BlockDriverState *above_base; /* Node directly above the base */ + BlockdevOnError on_error; + char *backing_file_str; + bool bs_read_only; + bool chain_frozen; +} StreamBlockJob; + +static int coroutine_fn stream_populate(BlockBackend *blk, + int64_t offset, uint64_t bytes) +{ + assert(bytes < SIZE_MAX); + + return blk_co_preadv(blk, offset, bytes, NULL, + BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH); +} + +static void stream_abort(Job *job) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + + if (s->chain_frozen) { + BlockJob *bjob = &s->common; + bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base); + } +} + +static int stream_prepare(Job *job) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockJob *bjob = &s->common; + BlockDriverState *bs = blk_bs(bjob->blk); + BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs); + BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); + Error *local_err = NULL; + int ret = 0; + + bdrv_unfreeze_backing_chain(bs, s->above_base); + s->chain_frozen = false; + + if (bdrv_cow_child(unfiltered_bs)) { + const char *base_id = NULL, *base_fmt = NULL; + if (base) { + base_id = s->backing_file_str; + if (base->drv) { + base_fmt = base->drv->format_name; + } + } + bdrv_set_backing_hd(unfiltered_bs, base, &local_err); + ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); + if (local_err) { + error_report_err(local_err); + return -EPERM; + } + } + + return ret; +} + +static void stream_clean(Job *job) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockJob *bjob = &s->common; + BlockDriverState *bs = blk_bs(bjob->blk); + + /* Reopen the image back in read-only mode if necessary */ + if (s->bs_read_only) { + /* Give up write permissions before making it read-only */ + blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); + bdrv_reopen_set_read_only(bs, true, NULL); + } + + g_free(s->backing_file_str); +} + +static int coroutine_fn stream_run(Job *job, Error **errp) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockBackend *blk = s->common.blk; + BlockDriverState *bs = blk_bs(blk); + BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs); + bool enable_cor = !bdrv_cow_child(s->base_overlay); + int64_t len; + int64_t offset = 0; + uint64_t delay_ns = 0; + int error = 0; + int64_t n = 0; /* bytes */ + + if (unfiltered_bs == s->base_overlay) { + /* Nothing to stream */ + return 0; + } + + len = bdrv_getlength(bs); + if (len < 0) { + return len; + } + job_progress_set_remaining(&s->common.job, len); + + /* Turn on copy-on-read for the whole block device so that guest read + * requests help us make progress. Only do this when copying the entire + * backing chain since the copy-on-read operation does not take base into + * account. + */ + if (enable_cor) { + bdrv_enable_copy_on_read(bs); + } + + for ( ; offset < len; offset += n) { + bool copy; + int ret; + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that bdrv_drain_all() returns. + */ + job_sleep_ns(&s->common.job, delay_ns); + if (job_is_cancelled(&s->common.job)) { + break; + } + + copy = false; + + ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n); + if (ret == 1) { + /* Allocated in the top, no need to copy. */ + } else if (ret >= 0) { + /* Copy if allocated in the intermediate images. Limit to the + * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ + ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs), + s->base_overlay, true, + offset, n, &n); + /* Finish early if end of backing file has been reached */ + if (ret == 0 && n == 0) { + n = len - offset; + } + + copy = (ret > 0); + } + trace_stream_one_iteration(s, offset, n, ret); + if (copy) { + ret = stream_populate(blk, offset, n); + } + if (ret < 0) { + BlockErrorAction action = + block_job_error_action(&s->common, s->on_error, true, -ret); + if (action == BLOCK_ERROR_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BLOCK_ERROR_ACTION_REPORT) { + break; + } + } + + /* Publish progress */ + job_progress_update(&s->common.job, n); + if (copy) { + delay_ns = block_job_ratelimit_get_delay(&s->common, n); + } else { + delay_ns = 0; + } + } + + if (enable_cor) { + bdrv_disable_copy_on_read(bs); + } + + /* Do not remove the backing file if an error was there but ignored. */ + return error; +} + +static const BlockJobDriver stream_job_driver = { + .job_driver = { + .instance_size = sizeof(StreamBlockJob), + .job_type = JOB_TYPE_STREAM, + .free = block_job_free, + .run = stream_run, + .prepare = stream_prepare, + .abort = stream_abort, + .clean = stream_clean, + .user_resume = block_job_user_resume, + }, +}; + +void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, + int creation_flags, int64_t speed, + BlockdevOnError on_error, Error **errp) +{ + StreamBlockJob *s; + BlockDriverState *iter; + bool bs_read_only; + int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; + BlockDriverState *base_overlay = bdrv_find_overlay(bs, base); + BlockDriverState *above_base; + + if (!base_overlay) { + error_setg(errp, "'%s' is not in the backing chain of '%s'", + base->node_name, bs->node_name); + return; + } + + /* + * Find the node directly above @base. @base_overlay is a COW overlay, so + * it must have a bdrv_cow_child(), but it is the immediate overlay of + * @base, so between the two there can only be filters. + */ + above_base = base_overlay; + if (bdrv_cow_bs(above_base) != base) { + above_base = bdrv_cow_bs(above_base); + while (bdrv_filter_bs(above_base) != base) { + above_base = bdrv_filter_bs(above_base); + } + } + + if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) { + return; + } + + /* Make sure that the image is opened in read-write mode */ + bs_read_only = bdrv_is_read_only(bs); + if (bs_read_only) { + if (bdrv_reopen_set_read_only(bs, false, errp) != 0) { + bs_read_only = false; + goto fail; + } + } + + /* Prevent concurrent jobs trying to modify the graph structure here, we + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. */ + s = block_job_create(job_id, &stream_job_driver, NULL, bs, + basic_flags | BLK_PERM_GRAPH_MOD, + basic_flags | BLK_PERM_WRITE, + speed, creation_flags, NULL, NULL, errp); + if (!s) { + goto fail; + } + + /* Block all intermediate nodes between bs and base, because they will + * disappear from the chain after this operation. The streaming job reads + * every block only once, assuming that it doesn't change, so forbid writes + * and resizes. Reassign the base node pointer because the backing BS of the + * bottom node might change after the call to bdrv_reopen_set_read_only() + * due to parallel block jobs running. + * above_base node might change after the call to + * bdrv_reopen_set_read_only() due to parallel block jobs running. + */ + base = bdrv_filter_or_cow_bs(above_base); + for (iter = bdrv_filter_or_cow_bs(bs); iter != base; + iter = bdrv_filter_or_cow_bs(iter)) + { + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + basic_flags, &error_abort); + } + + s->base_overlay = base_overlay; + s->above_base = above_base; + s->backing_file_str = g_strdup(backing_file_str); + s->bs_read_only = bs_read_only; + s->chain_frozen = true; + + s->on_error = on_error; + trace_stream_start(bs, base, s); + job_start(&s->common.job); + return; + +fail: + if (bs_read_only) { + bdrv_reopen_set_read_only(bs, true, NULL); + } + bdrv_unfreeze_backing_chain(bs, above_base); +} diff --git a/block/throttle-groups.c b/block/throttle-groups.c new file mode 100644 index 000000000..e2f2813c0 --- /dev/null +++ b/block/throttle-groups.c @@ -0,0 +1,977 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "block/throttle-groups.h" +#include "qemu/throttle-options.h" +#include "qemu/main-loop.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "sysemu/qtest.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-block-core.h" +#include "qom/object.h" +#include "qom/object_interfaces.h" + +static void throttle_group_obj_init(Object *obj); +static void throttle_group_obj_complete(UserCreatable *obj, Error **errp); +static void timer_cb(ThrottleGroupMember *tgm, bool is_write); + +/* The ThrottleGroup structure (with its ThrottleState) is shared + * among different ThrottleGroupMembers and it's independent from + * AioContext, so in order to use it from different threads it needs + * its own locking. + * + * This locking is however handled internally in this file, so it's + * transparent to outside users. + * + * The whole ThrottleGroup structure is private and invisible to + * outside users, that only use it through its ThrottleState. + * + * In addition to the ThrottleGroup structure, ThrottleGroupMember has + * fields that need to be accessed by other members of the group and + * therefore also need to be protected by this lock. Once a + * ThrottleGroupMember is registered in a group those fields can be accessed + * by other threads any time. + * + * Again, all this is handled internally and is mostly transparent to + * the outside. The 'throttle_timers' field however has an additional + * constraint because it may be temporarily invalid (see for example + * blk_set_aio_context()). Therefore in this file a thread will + * access some other ThrottleGroupMember's timers only after verifying that + * that ThrottleGroupMember has throttled requests in the queue. + */ +struct ThrottleGroup { + Object parent_obj; + + /* refuse individual property change if initialization is complete */ + bool is_initialized; + char *name; /* This is constant during the lifetime of the group */ + + QemuMutex lock; /* This lock protects the following four fields */ + ThrottleState ts; + QLIST_HEAD(, ThrottleGroupMember) head; + ThrottleGroupMember *tokens[2]; + bool any_timer_armed[2]; + QEMUClockType clock_type; + + /* This field is protected by the global QEMU mutex */ + QTAILQ_ENTRY(ThrottleGroup) list; +}; + +/* This is protected by the global QEMU mutex */ +static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = + QTAILQ_HEAD_INITIALIZER(throttle_groups); + + +/* This function reads throttle_groups and must be called under the global + * mutex. + */ +static ThrottleGroup *throttle_group_by_name(const char *name) +{ + ThrottleGroup *iter; + + /* Look for an existing group with that name */ + QTAILQ_FOREACH(iter, &throttle_groups, list) { + if (!g_strcmp0(name, iter->name)) { + return iter; + } + } + + return NULL; +} + +/* This function reads throttle_groups and must be called under the global + * mutex. + */ +bool throttle_group_exists(const char *name) +{ + return throttle_group_by_name(name) != NULL; +} + +/* Increments the reference count of a ThrottleGroup given its name. + * + * If no ThrottleGroup is found with the given name a new one is + * created. + * + * This function edits throttle_groups and must be called under the global + * mutex. + * + * @name: the name of the ThrottleGroup + * @ret: the ThrottleState member of the ThrottleGroup + */ +ThrottleState *throttle_group_incref(const char *name) +{ + ThrottleGroup *tg = NULL; + + /* Look for an existing group with that name */ + tg = throttle_group_by_name(name); + + if (tg) { + object_ref(OBJECT(tg)); + } else { + /* Create a new one if not found */ + /* new ThrottleGroup obj will have a refcnt = 1 */ + tg = THROTTLE_GROUP(object_new(TYPE_THROTTLE_GROUP)); + tg->name = g_strdup(name); + throttle_group_obj_complete(USER_CREATABLE(tg), &error_abort); + } + + return &tg->ts; +} + +/* Decrease the reference count of a ThrottleGroup. + * + * When the reference count reaches zero the ThrottleGroup is + * destroyed. + * + * This function edits throttle_groups and must be called under the global + * mutex. + * + * @ts: The ThrottleGroup to unref, given by its ThrottleState member + */ +void throttle_group_unref(ThrottleState *ts) +{ + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + object_unref(OBJECT(tg)); +} + +/* Get the name from a ThrottleGroupMember's group. The name (and the pointer) + * is guaranteed to remain constant during the lifetime of the group. + * + * @tgm: a ThrottleGroupMember + * @ret: the name of the group. + */ +const char *throttle_group_get_name(ThrottleGroupMember *tgm) +{ + ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); + return tg->name; +} + +/* Return the next ThrottleGroupMember in the round-robin sequence, simulating + * a circular list. + * + * This assumes that tg->lock is held. + * + * @tgm: the current ThrottleGroupMember + * @ret: the next ThrottleGroupMember in the sequence + */ +static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + ThrottleGroupMember *next = QLIST_NEXT(tgm, round_robin); + + if (!next) { + next = QLIST_FIRST(&tg->head); + } + + return next; +} + +/* + * Return whether a ThrottleGroupMember has pending requests. + * + * This assumes that tg->lock is held. + * + * @tgm: the ThrottleGroupMember + * @is_write: the type of operation (read/write) + * @ret: whether the ThrottleGroupMember has pending requests. + */ +static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, + bool is_write) +{ + return tgm->pending_reqs[is_write]; +} + +/* Return the next ThrottleGroupMember in the round-robin sequence with pending + * I/O requests. + * + * This assumes that tg->lock is held. + * + * @tgm: the current ThrottleGroupMember + * @is_write: the type of operation (read/write) + * @ret: the next ThrottleGroupMember with pending requests, or tgm if + * there is none. + */ +static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, + bool is_write) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + ThrottleGroupMember *token, *start; + + /* If this member has its I/O limits disabled then it means that + * it's being drained. Skip the round-robin search and return tgm + * immediately if it has pending requests. Otherwise we could be + * forcing it to wait for other member's throttled requests. */ + if (tgm_has_pending_reqs(tgm, is_write) && + qatomic_read(&tgm->io_limits_disabled)) { + return tgm; + } + + start = token = tg->tokens[is_write]; + + /* get next bs round in round robin style */ + token = throttle_group_next_tgm(token); + while (token != start && !tgm_has_pending_reqs(token, is_write)) { + token = throttle_group_next_tgm(token); + } + + /* If no IO are queued for scheduling on the next round robin token + * then decide the token is the current tgm because chances are + * the current tgm got the current request queued. + */ + if (token == start && !tgm_has_pending_reqs(token, is_write)) { + token = tgm; + } + + /* Either we return the original TGM, or one with pending requests */ + assert(token == tgm || tgm_has_pending_reqs(token, is_write)); + + return token; +} + +/* Check if the next I/O request for a ThrottleGroupMember needs to be + * throttled or not. If there's no timer set in this group, set one and update + * the token accordingly. + * + * This assumes that tg->lock is held. + * + * @tgm: the current ThrottleGroupMember + * @is_write: the type of operation (read/write) + * @ret: whether the I/O request needs to be throttled or not + */ +static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, + bool is_write) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + ThrottleTimers *tt = &tgm->throttle_timers; + bool must_wait; + + if (qatomic_read(&tgm->io_limits_disabled)) { + return false; + } + + /* Check if any of the timers in this group is already armed */ + if (tg->any_timer_armed[is_write]) { + return true; + } + + must_wait = throttle_schedule_timer(ts, tt, is_write); + + /* If a timer just got armed, set tgm as the current token */ + if (must_wait) { + tg->tokens[is_write] = tgm; + tg->any_timer_armed[is_write] = true; + } + + return must_wait; +} + +/* Start the next pending I/O request for a ThrottleGroupMember. Return whether + * any request was actually pending. + * + * @tgm: the current ThrottleGroupMember + * @is_write: the type of operation (read/write) + */ +static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm, + bool is_write) +{ + bool ret; + + qemu_co_mutex_lock(&tgm->throttled_reqs_lock); + ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]); + qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); + + return ret; +} + +/* Look for the next pending I/O request and schedule it. + * + * This assumes that tg->lock is held. + * + * @tgm: the current ThrottleGroupMember + * @is_write: the type of operation (read/write) + */ +static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool must_wait; + ThrottleGroupMember *token; + + /* Check if there's any pending request to schedule next */ + token = next_throttle_token(tgm, is_write); + if (!tgm_has_pending_reqs(token, is_write)) { + return; + } + + /* Set a timer for the request if it needs to be throttled */ + must_wait = throttle_group_schedule_timer(token, is_write); + + /* If it doesn't have to wait, queue it for immediate execution */ + if (!must_wait) { + /* Give preference to requests from the current tgm */ + if (qemu_in_coroutine() && + throttle_group_co_restart_queue(tgm, is_write)) { + token = tgm; + } else { + ThrottleTimers *tt = &token->throttle_timers; + int64_t now = qemu_clock_get_ns(tg->clock_type); + timer_mod(tt->timers[is_write], now); + tg->any_timer_armed[is_write] = true; + } + tg->tokens[is_write] = token; + } +} + +/* Check if an I/O request needs to be throttled, wait and set a timer + * if necessary, and schedule the next request using a round robin + * algorithm. + * + * @tgm: the current ThrottleGroupMember + * @bytes: the number of bytes for this I/O + * @is_write: the type of operation (read/write) + */ +void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, + unsigned int bytes, + bool is_write) +{ + bool must_wait; + ThrottleGroupMember *token; + ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + + /* First we check if this I/O has to be throttled. */ + token = next_throttle_token(tgm, is_write); + must_wait = throttle_group_schedule_timer(token, is_write); + + /* Wait if there's a timer set or queued requests of this type */ + if (must_wait || tgm->pending_reqs[is_write]) { + tgm->pending_reqs[is_write]++; + qemu_mutex_unlock(&tg->lock); + qemu_co_mutex_lock(&tgm->throttled_reqs_lock); + qemu_co_queue_wait(&tgm->throttled_reqs[is_write], + &tgm->throttled_reqs_lock); + qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); + qemu_mutex_lock(&tg->lock); + tgm->pending_reqs[is_write]--; + } + + /* The I/O will be executed, so do the accounting */ + throttle_account(tgm->throttle_state, is_write, bytes); + + /* Schedule the next request */ + schedule_next_request(tgm, is_write); + + qemu_mutex_unlock(&tg->lock); +} + +typedef struct { + ThrottleGroupMember *tgm; + bool is_write; +} RestartData; + +static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) +{ + RestartData *data = opaque; + ThrottleGroupMember *tgm = data->tgm; + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool is_write = data->is_write; + bool empty_queue; + + empty_queue = !throttle_group_co_restart_queue(tgm, is_write); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(tgm, is_write); + qemu_mutex_unlock(&tg->lock); + } + + g_free(data); + + qatomic_dec(&tgm->restart_pending); + aio_wait_kick(); +} + +static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) +{ + Coroutine *co; + RestartData *rd = g_new0(RestartData, 1); + + rd->tgm = tgm; + rd->is_write = is_write; + + /* This function is called when a timer is fired or when + * throttle_group_restart_tgm() is called. Either way, there can + * be no timer pending on this tgm at this point */ + assert(!timer_pending(tgm->throttle_timers.timers[is_write])); + + qatomic_inc(&tgm->restart_pending); + + co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); + aio_co_enter(tgm->aio_context, co); +} + +void throttle_group_restart_tgm(ThrottleGroupMember *tgm) +{ + int i; + + if (tgm->throttle_state) { + for (i = 0; i < 2; i++) { + QEMUTimer *t = tgm->throttle_timers.timers[i]; + if (timer_pending(t)) { + /* If there's a pending timer on this tgm, fire it now */ + timer_del(t); + timer_cb(tgm, i); + } else { + /* Else run the next request from the queue manually */ + throttle_group_restart_queue(tgm, i); + } + } + } +} + +/* Update the throttle configuration for a particular group. Similar + * to throttle_config(), but guarantees atomicity within the + * throttling group. + * + * @tgm: a ThrottleGroupMember that is a member of the group + * @cfg: the configuration to set + */ +void throttle_group_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_config(ts, tg->clock_type, cfg); + qemu_mutex_unlock(&tg->lock); + + throttle_group_restart_tgm(tgm); +} + +/* Get the throttle configuration from a particular group. Similar to + * throttle_get_config(), but guarantees atomicity within the + * throttling group. + * + * @tgm: a ThrottleGroupMember that is a member of the group + * @cfg: the configuration will be written here + */ +void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_get_config(ts, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* ThrottleTimers callback. This wakes up a request that was waiting + * because it had been throttled. + * + * @tgm: the ThrottleGroupMember whose request had been throttled + * @is_write: the type of operation (read/write) + */ +static void timer_cb(ThrottleGroupMember *tgm, bool is_write) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + + /* The timer has just been fired, so we can update the flag */ + qemu_mutex_lock(&tg->lock); + tg->any_timer_armed[is_write] = false; + qemu_mutex_unlock(&tg->lock); + + /* Run the request that was waiting for this timer */ + throttle_group_restart_queue(tgm, is_write); +} + +static void read_timer_cb(void *opaque) +{ + timer_cb(opaque, false); +} + +static void write_timer_cb(void *opaque) +{ + timer_cb(opaque, true); +} + +/* Register a ThrottleGroupMember from the throttling group, also initializing + * its timers and updating its throttle_state pointer to point to it. If a + * throttling group with that name does not exist yet, it will be created. + * + * This function edits throttle_groups and must be called under the global + * mutex. + * + * @tgm: the ThrottleGroupMember to insert + * @groupname: the name of the group + * @ctx: the AioContext to use + */ +void throttle_group_register_tgm(ThrottleGroupMember *tgm, + const char *groupname, + AioContext *ctx) +{ + int i; + ThrottleState *ts = throttle_group_incref(groupname); + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + + tgm->throttle_state = ts; + tgm->aio_context = ctx; + qatomic_set(&tgm->restart_pending, 0); + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ + for (i = 0; i < 2; i++) { + if (!tg->tokens[i]) { + tg->tokens[i] = tgm; + } + } + + QLIST_INSERT_HEAD(&tg->head, tgm, round_robin); + + throttle_timers_init(&tgm->throttle_timers, + tgm->aio_context, + tg->clock_type, + read_timer_cb, + write_timer_cb, + tgm); + qemu_co_mutex_init(&tgm->throttled_reqs_lock); + qemu_co_queue_init(&tgm->throttled_reqs[0]); + qemu_co_queue_init(&tgm->throttled_reqs[1]); + + qemu_mutex_unlock(&tg->lock); +} + +/* Unregister a ThrottleGroupMember from its group, removing it from the list, + * destroying the timers and setting the throttle_state pointer to NULL. + * + * The ThrottleGroupMember must not have pending throttled requests, so the + * caller has to drain them first. + * + * The group will be destroyed if it's empty after this operation. + * + * @tgm the ThrottleGroupMember to remove + */ +void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) +{ + ThrottleState *ts = tgm->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + ThrottleGroupMember *token; + int i; + + if (!ts) { + /* Discard already unregistered tgm */ + return; + } + + /* Wait for throttle_group_restart_queue_entry() coroutines to finish */ + AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0); + + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + assert(tgm->pending_reqs[i] == 0); + assert(qemu_co_queue_empty(&tgm->throttled_reqs[i])); + assert(!timer_pending(tgm->throttle_timers.timers[i])); + if (tg->tokens[i] == tgm) { + token = throttle_group_next_tgm(tgm); + /* Take care of the case where this is the last tgm in the group */ + if (token == tgm) { + token = NULL; + } + tg->tokens[i] = token; + } + } + + /* remove the current tgm from the list */ + QLIST_REMOVE(tgm, round_robin); + throttle_timers_destroy(&tgm->throttle_timers); + qemu_mutex_unlock(&tg->lock); + + throttle_group_unref(&tg->ts); + tgm->throttle_state = NULL; +} + +void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, + AioContext *new_context) +{ + ThrottleTimers *tt = &tgm->throttle_timers; + throttle_timers_attach_aio_context(tt, new_context); + tgm->aio_context = new_context; +} + +void throttle_group_detach_aio_context(ThrottleGroupMember *tgm) +{ + ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); + ThrottleTimers *tt = &tgm->throttle_timers; + int i; + + /* Requests must have been drained */ + assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0); + assert(qemu_co_queue_empty(&tgm->throttled_reqs[0])); + assert(qemu_co_queue_empty(&tgm->throttled_reqs[1])); + + /* Kick off next ThrottleGroupMember, if necessary */ + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + if (timer_pending(tt->timers[i])) { + tg->any_timer_armed[i] = false; + schedule_next_request(tgm, i); + } + } + qemu_mutex_unlock(&tg->lock); + + throttle_timers_detach_aio_context(tt); + tgm->aio_context = NULL; +} + +#undef THROTTLE_OPT_PREFIX +#define THROTTLE_OPT_PREFIX "x-" + +/* Helper struct and array for QOM property setter/getter */ +typedef struct { + const char *name; + BucketType type; + enum { + AVG, + MAX, + BURST_LENGTH, + IOPS_SIZE, + } category; +} ThrottleParamInfo; + +static ThrottleParamInfo properties[] = { + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL, + THROTTLE_OPS_TOTAL, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX, + THROTTLE_OPS_TOTAL, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, + THROTTLE_OPS_TOTAL, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ, + THROTTLE_OPS_READ, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX, + THROTTLE_OPS_READ, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH, + THROTTLE_OPS_READ, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE, + THROTTLE_OPS_WRITE, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX, + THROTTLE_OPS_WRITE, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH, + THROTTLE_OPS_WRITE, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL, + THROTTLE_BPS_TOTAL, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX, + THROTTLE_BPS_TOTAL, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH, + THROTTLE_BPS_TOTAL, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ, + THROTTLE_BPS_READ, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX, + THROTTLE_BPS_READ, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH, + THROTTLE_BPS_READ, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE, + THROTTLE_BPS_WRITE, AVG, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX, + THROTTLE_BPS_WRITE, MAX, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH, + THROTTLE_BPS_WRITE, BURST_LENGTH, + }, + { + THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE, + 0, IOPS_SIZE, + } +}; + +/* This function edits throttle_groups and must be called under the global + * mutex */ +static void throttle_group_obj_init(Object *obj) +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + + tg->clock_type = QEMU_CLOCK_REALTIME; + if (qtest_enabled()) { + /* For testing block IO throttling only */ + tg->clock_type = QEMU_CLOCK_VIRTUAL; + } + tg->is_initialized = false; + qemu_mutex_init(&tg->lock); + throttle_init(&tg->ts); + QLIST_INIT(&tg->head); +} + +/* This function edits throttle_groups and must be called under the global + * mutex */ +static void throttle_group_obj_complete(UserCreatable *obj, Error **errp) +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + ThrottleConfig cfg; + + /* set group name to object id if it exists */ + if (!tg->name && tg->parent_obj.parent) { + tg->name = g_strdup(object_get_canonical_path_component(OBJECT(obj))); + } + /* We must have a group name at this point */ + assert(tg->name); + + /* error if name is duplicate */ + if (throttle_group_exists(tg->name)) { + error_setg(errp, "A group with this name already exists"); + return; + } + + /* check validity */ + throttle_get_config(&tg->ts, &cfg); + if (!throttle_is_valid(&cfg, errp)) { + return; + } + throttle_config(&tg->ts, tg->clock_type, &cfg); + QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); + tg->is_initialized = true; +} + +/* This function edits throttle_groups and must be called under the global + * mutex */ +static void throttle_group_obj_finalize(Object *obj) +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + if (tg->is_initialized) { + QTAILQ_REMOVE(&throttle_groups, tg, list); + } + qemu_mutex_destroy(&tg->lock); + g_free(tg->name); +} + +static void throttle_group_set(Object *obj, Visitor *v, const char * name, + void *opaque, Error **errp) + +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + ThrottleConfig *cfg; + ThrottleParamInfo *info = opaque; + int64_t value; + + /* If we have finished initialization, don't accept individual property + * changes through QOM. Throttle configuration limits must be set in one + * transaction, as certain combinations are invalid. + */ + if (tg->is_initialized) { + error_setg(errp, "Property cannot be set after initialization"); + return; + } + + if (!visit_type_int64(v, name, &value, errp)) { + return; + } + if (value < 0) { + error_setg(errp, "Property values cannot be negative"); + return; + } + + cfg = &tg->ts.cfg; + switch (info->category) { + case AVG: + cfg->buckets[info->type].avg = value; + break; + case MAX: + cfg->buckets[info->type].max = value; + break; + case BURST_LENGTH: + if (value > UINT_MAX) { + error_setg(errp, "%s value must be in the" "range [0, %u]", + info->name, UINT_MAX); + return; + } + cfg->buckets[info->type].burst_length = value; + break; + case IOPS_SIZE: + cfg->op_size = value; + break; + } +} + +static void throttle_group_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + ThrottleConfig cfg; + ThrottleParamInfo *info = opaque; + int64_t value; + + throttle_get_config(&tg->ts, &cfg); + switch (info->category) { + case AVG: + value = cfg.buckets[info->type].avg; + break; + case MAX: + value = cfg.buckets[info->type].max; + break; + case BURST_LENGTH: + value = cfg.buckets[info->type].burst_length; + break; + case IOPS_SIZE: + value = cfg.op_size; + break; + } + + visit_type_int64(v, name, &value, errp); +} + +static void throttle_group_set_limits(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) + +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + ThrottleConfig cfg; + ThrottleLimits *argp; + Error *local_err = NULL; + + if (!visit_type_ThrottleLimits(v, name, &argp, errp)) { + return; + } + qemu_mutex_lock(&tg->lock); + throttle_get_config(&tg->ts, &cfg); + throttle_limits_to_config(argp, &cfg, &local_err); + if (local_err) { + goto unlock; + } + throttle_config(&tg->ts, tg->clock_type, &cfg); + +unlock: + qemu_mutex_unlock(&tg->lock); + qapi_free_ThrottleLimits(argp); + error_propagate(errp, local_err); + return; +} + +static void throttle_group_get_limits(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + ThrottleGroup *tg = THROTTLE_GROUP(obj); + ThrottleConfig cfg; + ThrottleLimits arg = { 0 }; + ThrottleLimits *argp = &arg; + + qemu_mutex_lock(&tg->lock); + throttle_get_config(&tg->ts, &cfg); + qemu_mutex_unlock(&tg->lock); + + throttle_config_to_limits(&cfg, argp); + + visit_type_ThrottleLimits(v, name, &argp, errp); +} + +static bool throttle_group_can_be_deleted(UserCreatable *uc) +{ + return OBJECT(uc)->ref == 1; +} + +static void throttle_group_obj_class_init(ObjectClass *klass, void *class_data) +{ + size_t i = 0; + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + + ucc->complete = throttle_group_obj_complete; + ucc->can_be_deleted = throttle_group_can_be_deleted; + + /* individual properties */ + for (i = 0; i < sizeof(properties) / sizeof(ThrottleParamInfo); i++) { + object_class_property_add(klass, + properties[i].name, + "int", + throttle_group_get, + throttle_group_set, + NULL, &properties[i]); + } + + /* ThrottleLimits */ + object_class_property_add(klass, + "limits", "ThrottleLimits", + throttle_group_get_limits, + throttle_group_set_limits, + NULL, NULL); +} + +static const TypeInfo throttle_group_info = { + .name = TYPE_THROTTLE_GROUP, + .parent = TYPE_OBJECT, + .class_init = throttle_group_obj_class_init, + .instance_size = sizeof(ThrottleGroup), + .instance_init = throttle_group_obj_init, + .instance_finalize = throttle_group_obj_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + }, +}; + +static void throttle_groups_init(void) +{ + type_register_static(&throttle_group_info); +} + +type_init(throttle_groups_init); diff --git a/block/throttle.c b/block/throttle.c new file mode 100644 index 000000000..b685166ad --- /dev/null +++ b/block/throttle.c @@ -0,0 +1,276 @@ +/* + * QEMU block throttling filter driver infrastructure + * + * Copyright (c) 2017 Manos Pitsidianakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "block/throttle-groups.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/throttle-options.h" +#include "qapi/error.h" + +static QemuOptsList throttle_opts = { + .name = "throttle", + .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head), + .desc = { + { + .name = QEMU_OPT_THROTTLE_GROUP_NAME, + .type = QEMU_OPT_STRING, + .help = "Name of the throttle group", + }, + { /* end of list */ } + }, +}; + +/* + * If this function succeeds then the throttle group name is stored in + * @group and must be freed by the caller. + * If there's an error then @group remains unmodified. + */ +static int throttle_parse_options(QDict *options, char **group, Error **errp) +{ + int ret; + const char *group_name; + QemuOpts *opts = qemu_opts_create(&throttle_opts, NULL, 0, &error_abort); + + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fin; + } + + group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME); + if (!group_name) { + error_setg(errp, "Please specify a throttle group"); + ret = -EINVAL; + goto fin; + } else if (!throttle_group_exists(group_name)) { + error_setg(errp, "Throttle group '%s' does not exist", group_name); + ret = -EINVAL; + goto fin; + } + + *group = g_strdup(group_name); + ret = 0; +fin: + qemu_opts_del(opts); + return ret; +} + +static int throttle_open(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + ThrottleGroupMember *tgm = bs->opaque; + char *group; + int ret; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + bs->supported_write_flags = bs->file->bs->supported_write_flags | + BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = bs->file->bs->supported_zero_flags | + BDRV_REQ_WRITE_UNCHANGED; + + ret = throttle_parse_options(options, &group, errp); + if (ret == 0) { + /* Register membership to group with name group_name */ + throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs)); + g_free(group); + } + + return ret; +} + +static void throttle_close(BlockDriverState *bs) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_unregister_tgm(tgm); +} + + +static int64_t throttle_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_co_io_limits_intercept(tgm, bytes, false); + + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_co_io_limits_intercept(tgm, bytes, true); + + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); +} + +static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_co_io_limits_intercept(tgm, bytes, true); + + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + +static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_co_io_limits_intercept(tgm, bytes, true); + + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + +static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov) +{ + return throttle_co_pwritev(bs, offset, bytes, qiov, + BDRV_REQ_WRITE_COMPRESSED); +} + +static int throttle_co_flush(BlockDriverState *bs) +{ + return bdrv_co_flush(bs->file->bs); +} + +static void throttle_detach_aio_context(BlockDriverState *bs) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_detach_aio_context(tgm); +} + +static void throttle_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + ThrottleGroupMember *tgm = bs->opaque; + throttle_group_attach_aio_context(tgm, new_context); +} + +static int throttle_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + int ret; + char *group = NULL; + + assert(reopen_state != NULL); + assert(reopen_state->bs != NULL); + + ret = throttle_parse_options(reopen_state->options, &group, errp); + reopen_state->opaque = group; + return ret; +} + +static void throttle_reopen_commit(BDRVReopenState *reopen_state) +{ + BlockDriverState *bs = reopen_state->bs; + ThrottleGroupMember *tgm = bs->opaque; + char *group = reopen_state->opaque; + + assert(group); + + if (strcmp(group, throttle_group_get_name(tgm))) { + throttle_group_unregister_tgm(tgm); + throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs)); + } + g_free(reopen_state->opaque); + reopen_state->opaque = NULL; +} + +static void throttle_reopen_abort(BDRVReopenState *reopen_state) +{ + g_free(reopen_state->opaque); + reopen_state->opaque = NULL; +} + +static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) +{ + ThrottleGroupMember *tgm = bs->opaque; + if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { + throttle_group_restart_tgm(tgm); + } +} + +static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) +{ + ThrottleGroupMember *tgm = bs->opaque; + assert(tgm->io_limits_disabled); + qatomic_dec(&tgm->io_limits_disabled); +} + +static const char *const throttle_strong_runtime_opts[] = { + QEMU_OPT_THROTTLE_GROUP_NAME, + + NULL +}; + +static BlockDriver bdrv_throttle = { + .format_name = "throttle", + .instance_size = sizeof(ThrottleGroupMember), + + .bdrv_open = throttle_open, + .bdrv_close = throttle_close, + .bdrv_co_flush = throttle_co_flush, + + .bdrv_child_perm = bdrv_default_perms, + + .bdrv_getlength = throttle_getlength, + + .bdrv_co_preadv = throttle_co_preadv, + .bdrv_co_pwritev = throttle_co_pwritev, + + .bdrv_co_pwrite_zeroes = throttle_co_pwrite_zeroes, + .bdrv_co_pdiscard = throttle_co_pdiscard, + .bdrv_co_pwritev_compressed = throttle_co_pwritev_compressed, + + .bdrv_attach_aio_context = throttle_attach_aio_context, + .bdrv_detach_aio_context = throttle_detach_aio_context, + + .bdrv_reopen_prepare = throttle_reopen_prepare, + .bdrv_reopen_commit = throttle_reopen_commit, + .bdrv_reopen_abort = throttle_reopen_abort, + + .bdrv_co_drain_begin = throttle_co_drain_begin, + .bdrv_co_drain_end = throttle_co_drain_end, + + .is_filter = true, + .strong_runtime_opts = throttle_strong_runtime_opts, +}; + +static void bdrv_throttle_init(void) +{ + bdrv_register(&bdrv_throttle); +} + +block_init(bdrv_throttle_init); diff --git a/block/trace-events b/block/trace-events new file mode 100644 index 000000000..8368f4acb --- /dev/null +++ b/block/trace-events @@ -0,0 +1,224 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# ../block.c +bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags 0x%x format_name \"%s\"" +bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" + +# block-backend.c +blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" +blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" +blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p" +blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p" + +# io.c +bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" +bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" +bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x" +bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %"PRId64 +bdrv_co_copy_range_from(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x" +bdrv_co_copy_range_to(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x" + +# stream.c +stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d" +stream_start(void *bs, void *base, void *s) "bs %p base %p s %p" + +# commit.c +commit_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d" +commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p" + +# mirror.c +mirror_start(void *bs, void *s, void *opaque) "bs %p s %p opaque %p" +mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64 +mirror_before_flush(void *s) "s %p" +mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 +mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns" +mirror_one_iteration(void *s, int64_t offset, uint64_t bytes) "s %p offset %" PRId64 " bytes %" PRIu64 +mirror_iteration_done(void *s, int64_t offset, uint64_t bytes, int ret) "s %p offset %" PRId64 " bytes %" PRIu64 " ret %d" +mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d" +mirror_yield_in_flight(void *s, int64_t offset, int in_flight) "s %p offset %" PRId64 " in_flight %d" + +# backup.c +backup_do_cow_enter(void *job, int64_t start, int64_t offset, uint64_t bytes) "job %p start %" PRId64 " offset %" PRId64 " bytes %" PRIu64 +backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p offset %" PRId64 " bytes %" PRIu64 " ret %d" + +# block-copy.c +block_copy_skip_range(void *bcs, int64_t start, uint64_t bytes) "bcs %p start %"PRId64" bytes %"PRId64 +block_copy_process(void *bcs, int64_t start) "bcs %p start %"PRId64 +block_copy_copy_range_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_write_zeroes_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" + +# ../blockdev.c +qmp_block_job_cancel(void *job) "job %p" +qmp_block_job_pause(void *job) "job %p" +qmp_block_job_resume(void *job) "job %p" +qmp_block_job_complete(void *job) "job %p" +qmp_block_job_finalize(void *job) "job %p" +qmp_block_job_dismiss(void *job) "job %p" +qmp_block_stream(void *bs) "bs %p" + +# file-win32.c +file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" + +# io_uring.c +luring_init_state(void *s, size_t size) "s %p size %zu" +luring_cleanup_state(void *s) "%p freed" +luring_io_plug(void *s) "LuringState %p plug" +luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" +luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" +luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d" +luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d" +luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d" +luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d" +luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d" + +# qcow2.c +qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu" +qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" +qcow2_writev_done_req(void *co, int ret) "co %p ret %d" +qcow2_writev_start_part(void *co) "co %p" +qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d" +qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64 +qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" +qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" +qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d" + +# qcow2-cluster.c +qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" +qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset 0x%" PRIx64 " host_offset 0x%" PRIx64 " bytes 0x%" PRIx64 +qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset 0x%" PRIx64 " host_offset 0x%" PRIx64 " bytes 0x%" PRIx64 +qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset 0x%" PRIx64 " host_offset 0x%" PRIx64 " nb_clusters %d" +qcow2_cluster_alloc_phys(void *co) "co %p" +qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d" + +qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d" + +# qcow2-cache.c +qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset 0x%" PRIx64 " read_from_disk %d" +qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d" +qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d" + +# qcow2-refcount.c +qcow2_process_discards_failed_region(uint64_t offset, uint64_t bytes, int ret) "offset 0x%" PRIx64 " bytes 0x%" PRIx64 " ret %d" + +# qed-l2-cache.c +qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p" +qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d" +qed_find_l2_cache_entry(void *l2_cache, void *entry, uint64_t offset, int ref) "l2_cache %p entry %p offset %"PRIu64" ref %d" + +# qed-table.c +qed_read_table(void *s, uint64_t offset, void *table) "s %p offset %"PRIu64" table %p" +qed_read_table_cb(void *s, void *table, int ret) "s %p table %p ret %d" +qed_write_table(void *s, uint64_t offset, void *table, unsigned int index, unsigned int n) "s %p offset %"PRIu64" table %p index %u n %u" +qed_write_table_cb(void *s, void *table, int flush, int ret) "s %p table %p flush %d ret %d" + +# qed.c +qed_need_check_timer_cb(void *s) "s %p" +qed_start_need_check_timer(void *s) "s %p" +qed_cancel_need_check_timer(void *s) "s %p" +qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d" +qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int flags) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p flags 0x%x" +qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64 +qed_aio_read_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" +qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" +qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 +qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 +qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" + +# nvme.c +nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 +nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 +nvme_kick(void *s, unsigned q_index) "s %p q #%u" +nvme_dma_flush_queue_wait(void *s) "s %p" +nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" +nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d" +nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u" +nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" +nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" +nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" +nvme_handle_event(void *s) "s %p" +nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u" +nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d" +nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d" +nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" +nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d" +nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" +nvme_dma_map_flush(void *s) "s %p" +nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" +nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d" +nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" +nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" +nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 +nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" + +# iscsi.c +iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d" + +# nbd.c +nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s" +nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk" +nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s" +nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s" +nbd_client_handshake(const char *export_name) "export '%s'" +nbd_client_handshake_success(const char *export_name) "export '%s'" + +# ssh.c +ssh_restart_coroutine(void *co) "co=%p" +ssh_flush(void) "fsync" +ssh_check_host_key_knownhosts(void) "host key OK" +ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o" +ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p" +ssh_co_yield_back(int sock) "s->sock=%d - back" +ssh_getlength(int64_t length) "length=%" PRIi64 +ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64 +ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu" +ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)" +ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)" +ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu" +ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)" +ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)" +ssh_seek(int64_t offset) "seeking to offset=%" PRIi64 +ssh_auth_methods(int methods) "auth methods=0x%x" +ssh_server_status(int status) "server status=%d" + +# curl.c +curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld" +curl_sock_cb(int action, int fd) "sock action %d on fd %d" +curl_read_cb(size_t realsize) "just reading %zu bytes" +curl_open(const char *file) "opening %s" +curl_open_size(uint64_t size) "size = %" PRIu64 +curl_setup_preadv(uint64_t bytes, uint64_t start, const char *range) "reading %" PRIu64 " at %" PRIu64 " (%s)" +curl_close(void) "close" + +# file-posix.c +file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64 +file_FindEjectableOpticalMedia(const char *media) "Matching using %s" +file_setup_cdrom(const char *partition) "Using %s as optical disc" +file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d" + +# sheepdog.c +sheepdog_reconnect_to_sdog(void) "Wait for connection to be established" +sheepdog_aio_read_response(void) "disable cache since the server doesn't support it" +sheepdog_open(uint32_t vid) "0x%" PRIx32 " snapshot inode was open" +sheepdog_close(const char *name) "%s" +sheepdog_create_branch_snapshot(uint32_t vdi) "0x%" PRIx32 " is snapshot" +sheepdog_create_branch_created(uint32_t vdi) "0x%" PRIx32 " is created" +sheepdog_create_branch_new(uint32_t vdi) "0x%" PRIx32 " was newly created" +sheepdog_co_rw_vector_update(uint32_t vdi, uint64_t oid, uint64_t data, long idx) "update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld" +sheepdog_co_rw_vector_new(uint64_t oid) "new oid 0x%" PRIx64 +sheepdog_snapshot_create_info(const char *sn_name, const char *id, const char *name, int64_t size, int is_snapshot) "sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " "is_snapshot %d" +sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s" +sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32 + +# ssh.c +sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)" diff --git a/block/trace.h b/block/trace.h new file mode 100644 index 000000000..3a436e6c7 --- /dev/null +++ b/block/trace.h @@ -0,0 +1 @@ +#include "trace/trace-block.h" diff --git a/block/vdi.c b/block/vdi.c new file mode 100644 index 000000000..5627e7d76 --- /dev/null +++ b/block/vdi.c @@ -0,0 +1,1061 @@ +/* + * Block driver for the Virtual Disk Image (VDI) format + * + * Copyright (c) 2009, 2012 Stefan Weil + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Reference: + * http://forums.virtualbox.org/viewtopic.php?t=8046 + * + * This driver supports create / read / write operations on VDI images. + * + * Todo (see also TODO in code): + * + * Some features like snapshots are still missing. + * + * Deallocation of zero-filled blocks and shrinking images are missing, too + * (might be added to common block layer). + * + * Allocation of blocks could be optimized (less writes to block map and + * header). + * + * Read and write of adjacent blocks could be done in one operation + * (current code uses one operation per block (1 MiB). + * + * The code is not thread safe (missing locks for changes in header and + * block table, no problem with current QEMU). + * + * Hints: + * + * Blocks (VDI documentation) correspond to clusters (QEMU). + * QEMU's backing files could be implemented using VDI snapshot files (TODO). + * VDI snapshot files may also contain the complete machine state. + * Maybe this machine state can be converted to QEMU PC machine snapshot data. + * + * The driver keeps a block cache (little endian entries) in memory. + * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM, + * so this seems to be reasonable. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/bswap.h" +#include "migration/blocker.h" +#include "qemu/coroutine.h" +#include "qemu/cutils.h" +#include "qemu/uuid.h" + +/* Code configuration options. */ + +/* Enable debug messages. */ +//~ #define CONFIG_VDI_DEBUG + +/* Support write operations on VDI images. */ +#define CONFIG_VDI_WRITE + +/* Support non-standard block (cluster) size. This is untested. + * Maybe it will be needed for very large images. + */ +//~ #define CONFIG_VDI_BLOCK_SIZE + +/* Support static (fixed, pre-allocated) images. */ +#define CONFIG_VDI_STATIC_IMAGE + +/* Command line option for static images. */ +#define BLOCK_OPT_STATIC "static" + +#define SECTOR_SIZE 512 +#define DEFAULT_CLUSTER_SIZE 1048576 +/* Note: can't use 1 * MiB, because it's passed to stringify() */ + +#if defined(CONFIG_VDI_DEBUG) +#define VDI_DEBUG 1 +#else +#define VDI_DEBUG 0 +#endif + +#define logout(fmt, ...) \ + do { \ + if (VDI_DEBUG) { \ + fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ + } \ + } while (0) + +/* Image signature. */ +#define VDI_SIGNATURE 0xbeda107f + +/* Image version. */ +#define VDI_VERSION_1_1 0x00010001 + +/* Image type. */ +#define VDI_TYPE_DYNAMIC 1 +#define VDI_TYPE_STATIC 2 + +/* Innotek / SUN images use these strings in header.text: + * "<<< innotek VirtualBox Disk Image >>>\n" + * "<<< Sun xVM VirtualBox Disk Image >>>\n" + * "<<< Sun VirtualBox Disk Image >>>\n" + * The value does not matter, so QEMU created images use a different text. + */ +#define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n" + +/* A never-allocated block; semantically arbitrary content. */ +#define VDI_UNALLOCATED 0xffffffffU + +/* A discarded (no longer allocated) block; semantically zero-filled. */ +#define VDI_DISCARDED 0xfffffffeU + +#define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED) + +/* The bmap will take up VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) bytes; since + * the bmap is read and written in a single operation, its size needs to be + * limited to INT_MAX; furthermore, when opening an image, the bmap size is + * rounded up to be aligned on BDRV_SECTOR_SIZE. + * Therefore this should satisfy the following: + * VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) + BDRV_SECTOR_SIZE == INT_MAX + 1 + * (INT_MAX + 1 is the first value not representable as an int) + * This guarantees that any value below or equal to the constant will, when + * multiplied by sizeof(uint32_t) and rounded up to a BDRV_SECTOR_SIZE boundary, + * still be below or equal to INT_MAX. */ +#define VDI_BLOCKS_IN_IMAGE_MAX \ + ((unsigned)((INT_MAX + 1u - BDRV_SECTOR_SIZE) / sizeof(uint32_t))) +#define VDI_DISK_SIZE_MAX ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \ + (uint64_t)DEFAULT_CLUSTER_SIZE) + +static QemuOptsList vdi_create_opts; + +typedef struct { + char text[0x40]; + uint32_t signature; + uint32_t version; + uint32_t header_size; + uint32_t image_type; + uint32_t image_flags; + char description[256]; + uint32_t offset_bmap; + uint32_t offset_data; + uint32_t cylinders; /* disk geometry, unused here */ + uint32_t heads; /* disk geometry, unused here */ + uint32_t sectors; /* disk geometry, unused here */ + uint32_t sector_size; + uint32_t unused1; + uint64_t disk_size; + uint32_t block_size; + uint32_t block_extra; /* unused here */ + uint32_t blocks_in_image; + uint32_t blocks_allocated; + QemuUUID uuid_image; + QemuUUID uuid_last_snap; + QemuUUID uuid_link; + QemuUUID uuid_parent; + uint64_t unused2[7]; +} QEMU_PACKED VdiHeader; + +QEMU_BUILD_BUG_ON(sizeof(VdiHeader) != 512); + +typedef struct { + /* The block map entries are little endian (even in memory). */ + uint32_t *bmap; + /* Size of block (bytes). */ + uint32_t block_size; + /* First sector of block map. */ + uint32_t bmap_sector; + /* VDI header (converted to host endianness). */ + VdiHeader header; + + CoRwlock bmap_lock; + + Error *migration_blocker; +} BDRVVdiState; + +static void vdi_header_to_cpu(VdiHeader *header) +{ + header->signature = le32_to_cpu(header->signature); + header->version = le32_to_cpu(header->version); + header->header_size = le32_to_cpu(header->header_size); + header->image_type = le32_to_cpu(header->image_type); + header->image_flags = le32_to_cpu(header->image_flags); + header->offset_bmap = le32_to_cpu(header->offset_bmap); + header->offset_data = le32_to_cpu(header->offset_data); + header->cylinders = le32_to_cpu(header->cylinders); + header->heads = le32_to_cpu(header->heads); + header->sectors = le32_to_cpu(header->sectors); + header->sector_size = le32_to_cpu(header->sector_size); + header->disk_size = le64_to_cpu(header->disk_size); + header->block_size = le32_to_cpu(header->block_size); + header->block_extra = le32_to_cpu(header->block_extra); + header->blocks_in_image = le32_to_cpu(header->blocks_in_image); + header->blocks_allocated = le32_to_cpu(header->blocks_allocated); + header->uuid_image = qemu_uuid_bswap(header->uuid_image); + header->uuid_last_snap = qemu_uuid_bswap(header->uuid_last_snap); + header->uuid_link = qemu_uuid_bswap(header->uuid_link); + header->uuid_parent = qemu_uuid_bswap(header->uuid_parent); +} + +static void vdi_header_to_le(VdiHeader *header) +{ + header->signature = cpu_to_le32(header->signature); + header->version = cpu_to_le32(header->version); + header->header_size = cpu_to_le32(header->header_size); + header->image_type = cpu_to_le32(header->image_type); + header->image_flags = cpu_to_le32(header->image_flags); + header->offset_bmap = cpu_to_le32(header->offset_bmap); + header->offset_data = cpu_to_le32(header->offset_data); + header->cylinders = cpu_to_le32(header->cylinders); + header->heads = cpu_to_le32(header->heads); + header->sectors = cpu_to_le32(header->sectors); + header->sector_size = cpu_to_le32(header->sector_size); + header->disk_size = cpu_to_le64(header->disk_size); + header->block_size = cpu_to_le32(header->block_size); + header->block_extra = cpu_to_le32(header->block_extra); + header->blocks_in_image = cpu_to_le32(header->blocks_in_image); + header->blocks_allocated = cpu_to_le32(header->blocks_allocated); + header->uuid_image = qemu_uuid_bswap(header->uuid_image); + header->uuid_last_snap = qemu_uuid_bswap(header->uuid_last_snap); + header->uuid_link = qemu_uuid_bswap(header->uuid_link); + header->uuid_parent = qemu_uuid_bswap(header->uuid_parent); +} + +static void vdi_header_print(VdiHeader *header) +{ + char uuidstr[37]; + QemuUUID uuid; + logout("text %s", header->text); + logout("signature 0x%08x\n", header->signature); + logout("header size 0x%04x\n", header->header_size); + logout("image type 0x%04x\n", header->image_type); + logout("image flags 0x%04x\n", header->image_flags); + logout("description %s\n", header->description); + logout("offset bmap 0x%04x\n", header->offset_bmap); + logout("offset data 0x%04x\n", header->offset_data); + logout("cylinders 0x%04x\n", header->cylinders); + logout("heads 0x%04x\n", header->heads); + logout("sectors 0x%04x\n", header->sectors); + logout("sector size 0x%04x\n", header->sector_size); + logout("image size 0x%" PRIx64 " B (%" PRIu64 " MiB)\n", + header->disk_size, header->disk_size / MiB); + logout("block size 0x%04x\n", header->block_size); + logout("block extra 0x%04x\n", header->block_extra); + logout("blocks tot. 0x%04x\n", header->blocks_in_image); + logout("blocks all. 0x%04x\n", header->blocks_allocated); + uuid = header->uuid_image; + qemu_uuid_unparse(&uuid, uuidstr); + logout("uuid image %s\n", uuidstr); + uuid = header->uuid_last_snap; + qemu_uuid_unparse(&uuid, uuidstr); + logout("uuid snap %s\n", uuidstr); + uuid = header->uuid_link; + qemu_uuid_unparse(&uuid, uuidstr); + logout("uuid link %s\n", uuidstr); + uuid = header->uuid_parent; + qemu_uuid_unparse(&uuid, uuidstr); + logout("uuid parent %s\n", uuidstr); +} + +static int coroutine_fn vdi_co_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + /* TODO: additional checks possible. */ + BDRVVdiState *s = (BDRVVdiState *)bs->opaque; + uint32_t blocks_allocated = 0; + uint32_t block; + uint32_t *bmap; + logout("\n"); + + if (fix) { + return -ENOTSUP; + } + + bmap = g_try_new(uint32_t, s->header.blocks_in_image); + if (s->header.blocks_in_image && bmap == NULL) { + res->check_errors++; + return -ENOMEM; + } + + memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t)); + + /* Check block map and value of blocks_allocated. */ + for (block = 0; block < s->header.blocks_in_image; block++) { + uint32_t bmap_entry = le32_to_cpu(s->bmap[block]); + if (VDI_IS_ALLOCATED(bmap_entry)) { + if (bmap_entry < s->header.blocks_in_image) { + blocks_allocated++; + if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) { + bmap[bmap_entry] = bmap_entry; + } else { + fprintf(stderr, "ERROR: block index %" PRIu32 + " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry); + res->corruptions++; + } + } else { + fprintf(stderr, "ERROR: block index %" PRIu32 + " too large, is %" PRIu32 "\n", block, bmap_entry); + res->corruptions++; + } + } + } + if (blocks_allocated != s->header.blocks_allocated) { + fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32 + ", should be %" PRIu32 "\n", + blocks_allocated, s->header.blocks_allocated); + res->corruptions++; + } + + g_free(bmap); + + return 0; +} + +static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + /* TODO: vdi_get_info would be needed for machine snapshots. + vm_state_offset is still missing. */ + BDRVVdiState *s = (BDRVVdiState *)bs->opaque; + logout("\n"); + bdi->cluster_size = s->block_size; + bdi->vm_state_offset = 0; + return 0; +} + +static int vdi_make_empty(BlockDriverState *bs) +{ + /* TODO: missing code. */ + logout("\n"); + /* The return value for missing code must be 0, see block.c. */ + return 0; +} + +static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const VdiHeader *header = (const VdiHeader *)buf; + int ret = 0; + + logout("\n"); + + if (buf_size < sizeof(*header)) { + /* Header too small, no VDI. */ + } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) { + ret = 100; + } + + if (ret == 0) { + logout("no vdi image\n"); + } else { + logout("%s", header->text); + } + + return ret; +} + +static int vdi_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVVdiState *s = bs->opaque; + VdiHeader header; + size_t bmap_size; + int ret; + QemuUUID uuid_link, uuid_parent; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + logout("\n"); + + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + if (ret < 0) { + goto fail; + } + + vdi_header_to_cpu(&header); + if (VDI_DEBUG) { + vdi_header_print(&header); + } + + if (header.disk_size > VDI_DISK_SIZE_MAX) { + error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64 + ", max supported is 0x%" PRIx64 ")", + header.disk_size, VDI_DISK_SIZE_MAX); + ret = -ENOTSUP; + goto fail; + } + + uuid_link = header.uuid_link; + uuid_parent = header.uuid_parent; + + if (header.disk_size % SECTOR_SIZE != 0) { + /* 'VBoxManage convertfromraw' can create images with odd disk sizes. + We accept them but round the disk size to the next multiple of + SECTOR_SIZE. */ + logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size); + header.disk_size = ROUND_UP(header.disk_size, SECTOR_SIZE); + } + + if (header.signature != VDI_SIGNATURE) { + error_setg(errp, "Image not in VDI format (bad signature %08" PRIx32 + ")", header.signature); + ret = -EINVAL; + goto fail; + } else if (header.version != VDI_VERSION_1_1) { + error_setg(errp, "unsupported VDI image (version %" PRIu32 ".%" PRIu32 + ")", header.version >> 16, header.version & 0xffff); + ret = -ENOTSUP; + goto fail; + } else if (header.offset_bmap % SECTOR_SIZE != 0) { + /* We only support block maps which start on a sector boundary. */ + error_setg(errp, "unsupported VDI image (unaligned block map offset " + "0x%" PRIx32 ")", header.offset_bmap); + ret = -ENOTSUP; + goto fail; + } else if (header.offset_data % SECTOR_SIZE != 0) { + /* We only support data blocks which start on a sector boundary. */ + error_setg(errp, "unsupported VDI image (unaligned data offset 0x%" + PRIx32 ")", header.offset_data); + ret = -ENOTSUP; + goto fail; + } else if (header.sector_size != SECTOR_SIZE) { + error_setg(errp, "unsupported VDI image (sector size %" PRIu32 + " is not %u)", header.sector_size, SECTOR_SIZE); + ret = -ENOTSUP; + goto fail; + } else if (header.block_size != DEFAULT_CLUSTER_SIZE) { + error_setg(errp, "unsupported VDI image (block size %" PRIu32 + " is not %" PRIu32 ")", + header.block_size, DEFAULT_CLUSTER_SIZE); + ret = -ENOTSUP; + goto fail; + } else if (header.disk_size > + (uint64_t)header.blocks_in_image * header.block_size) { + error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", " + "image bitmap has room for %" PRIu64 ")", + header.disk_size, + (uint64_t)header.blocks_in_image * header.block_size); + ret = -ENOTSUP; + goto fail; + } else if (!qemu_uuid_is_null(&uuid_link)) { + error_setg(errp, "unsupported VDI image (non-NULL link UUID)"); + ret = -ENOTSUP; + goto fail; + } else if (!qemu_uuid_is_null(&uuid_parent)) { + error_setg(errp, "unsupported VDI image (non-NULL parent UUID)"); + ret = -ENOTSUP; + goto fail; + } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) { + error_setg(errp, "unsupported VDI image " + "(too many blocks %u, max is %u)", + header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX); + ret = -ENOTSUP; + goto fail; + } + + bs->total_sectors = header.disk_size / SECTOR_SIZE; + + s->block_size = header.block_size; + s->bmap_sector = header.offset_bmap / SECTOR_SIZE; + s->header = header; + + bmap_size = header.blocks_in_image * sizeof(uint32_t); + bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE); + s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE); + if (s->bmap == NULL) { + ret = -ENOMEM; + goto fail; + } + + ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap, + bmap_size * SECTOR_SIZE); + if (ret < 0) { + goto fail_free_bmap; + } + + /* Disable migration when vdi images are used */ + error_setg(&s->migration_blocker, "The vdi format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail_free_bmap; + } + + qemu_co_rwlock_init(&s->bmap_lock); + + return 0; + + fail_free_bmap: + qemu_vfree(s->bmap); + + fail: + return ret; +} + +static int vdi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +static int coroutine_fn vdi_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVVdiState *s = (BDRVVdiState *)bs->opaque; + size_t bmap_index = offset / s->block_size; + size_t index_in_block = offset % s->block_size; + uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); + int result; + + logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum); + *pnum = MIN(s->block_size - index_in_block, bytes); + result = VDI_IS_ALLOCATED(bmap_entry); + if (!result) { + return BDRV_BLOCK_ZERO; + } + + *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + + index_in_block; + *file = bs->file->bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | + (s->header.image_type == VDI_TYPE_STATIC ? BDRV_BLOCK_RECURSE : 0); +} + +static int coroutine_fn +vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVVdiState *s = bs->opaque; + QEMUIOVector local_qiov; + uint32_t bmap_entry; + uint32_t block_index; + uint32_t offset_in_block; + uint32_t n_bytes; + uint64_t bytes_done = 0; + int ret = 0; + + logout("\n"); + + qemu_iovec_init(&local_qiov, qiov->niov); + + while (ret >= 0 && bytes > 0) { + block_index = offset / s->block_size; + offset_in_block = offset % s->block_size; + n_bytes = MIN(bytes, s->block_size - offset_in_block); + + logout("will read %u bytes starting at offset %" PRIu64 "\n", + n_bytes, offset); + + /* prepare next AIO request */ + qemu_co_rwlock_rdlock(&s->bmap_lock); + bmap_entry = le32_to_cpu(s->bmap[block_index]); + qemu_co_rwlock_unlock(&s->bmap_lock); + if (!VDI_IS_ALLOCATED(bmap_entry)) { + /* Block not allocated, return zeros, no need to wait. */ + qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); + ret = 0; + } else { + uint64_t data_offset = s->header.offset_data + + (uint64_t)bmap_entry * s->block_size + + offset_in_block; + + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + ret = bdrv_co_preadv(bs->file, data_offset, n_bytes, + &local_qiov, 0); + } + logout("%u bytes read\n", n_bytes); + + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + } + + qemu_iovec_destroy(&local_qiov); + + return ret; +} + +static int coroutine_fn +vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVVdiState *s = bs->opaque; + QEMUIOVector local_qiov; + uint32_t bmap_entry; + uint32_t block_index; + uint32_t offset_in_block; + uint32_t n_bytes; + uint64_t data_offset; + uint32_t bmap_first = VDI_UNALLOCATED; + uint32_t bmap_last = VDI_UNALLOCATED; + uint8_t *block = NULL; + uint64_t bytes_done = 0; + int ret = 0; + + logout("\n"); + + qemu_iovec_init(&local_qiov, qiov->niov); + + while (ret >= 0 && bytes > 0) { + block_index = offset / s->block_size; + offset_in_block = offset % s->block_size; + n_bytes = MIN(bytes, s->block_size - offset_in_block); + + logout("will write %u bytes starting at offset %" PRIu64 "\n", + n_bytes, offset); + + /* prepare next AIO request */ + qemu_co_rwlock_rdlock(&s->bmap_lock); + bmap_entry = le32_to_cpu(s->bmap[block_index]); + if (!VDI_IS_ALLOCATED(bmap_entry)) { + /* Allocate new block and write to it. */ + uint64_t data_offset; + qemu_co_rwlock_upgrade(&s->bmap_lock); + bmap_entry = le32_to_cpu(s->bmap[block_index]); + if (VDI_IS_ALLOCATED(bmap_entry)) { + /* A concurrent allocation did the work for us. */ + qemu_co_rwlock_downgrade(&s->bmap_lock); + goto nonallocating_write; + } + + bmap_entry = s->header.blocks_allocated; + s->bmap[block_index] = cpu_to_le32(bmap_entry); + s->header.blocks_allocated++; + data_offset = s->header.offset_data + + (uint64_t)bmap_entry * s->block_size; + if (block == NULL) { + block = g_malloc(s->block_size); + bmap_first = block_index; + } + bmap_last = block_index; + /* Copy data to be written to new block and zero unused parts. */ + memset(block, 0, offset_in_block); + qemu_iovec_to_buf(qiov, bytes_done, block + offset_in_block, + n_bytes); + memset(block + offset_in_block + n_bytes, 0, + s->block_size - n_bytes - offset_in_block); + + /* Write the new block under CoRwLock write-side protection, + * so this full-cluster write does not overlap a partial write + * of the same cluster, issued from the "else" branch. + */ + ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size); + qemu_co_rwlock_unlock(&s->bmap_lock); + } else { +nonallocating_write: + data_offset = s->header.offset_data + + (uint64_t)bmap_entry * s->block_size + + offset_in_block; + qemu_co_rwlock_unlock(&s->bmap_lock); + + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes, + &local_qiov, 0); + } + + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + + logout("%u bytes written\n", n_bytes); + } + + qemu_iovec_destroy(&local_qiov); + + logout("finished data write\n"); + if (ret < 0) { + return ret; + } + + if (block) { + /* One or more new blocks were allocated. */ + VdiHeader *header = (VdiHeader *) block; + uint8_t *base; + uint64_t offset; + uint32_t n_sectors; + + logout("now writing modified header\n"); + assert(VDI_IS_ALLOCATED(bmap_first)); + *header = s->header; + vdi_header_to_le(header); + ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); + g_free(block); + block = NULL; + + if (ret < 0) { + return ret; + } + + logout("now writing modified block map entry %u...%u\n", + bmap_first, bmap_last); + /* Write modified sectors from block map. */ + bmap_first /= (SECTOR_SIZE / sizeof(uint32_t)); + bmap_last /= (SECTOR_SIZE / sizeof(uint32_t)); + n_sectors = bmap_last - bmap_first + 1; + offset = s->bmap_sector + bmap_first; + base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; + logout("will write %u block map sectors starting from entry %u\n", + n_sectors, bmap_first); + ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base, + n_sectors * SECTOR_SIZE); + } + + return ret < 0 ? ret : 0; +} + +static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, + size_t block_size, Error **errp) +{ + BlockdevCreateOptionsVdi *vdi_opts; + int ret = 0; + uint64_t bytes = 0; + uint32_t blocks; + uint32_t image_type; + VdiHeader header; + size_t i; + size_t bmap_size; + int64_t offset = 0; + BlockDriverState *bs_file = NULL; + BlockBackend *blk = NULL; + uint32_t *bmap = NULL; + QemuUUID uuid; + + assert(create_options->driver == BLOCKDEV_DRIVER_VDI); + vdi_opts = &create_options->u.vdi; + + logout("\n"); + + /* Validate options and set default values */ + bytes = vdi_opts->size; + + if (!vdi_opts->has_preallocation) { + vdi_opts->preallocation = PREALLOC_MODE_OFF; + } + switch (vdi_opts->preallocation) { + case PREALLOC_MODE_OFF: + image_type = VDI_TYPE_DYNAMIC; + break; + case PREALLOC_MODE_METADATA: + image_type = VDI_TYPE_STATIC; + break; + default: + error_setg(errp, "Preallocation mode not supported for vdi"); + return -EINVAL; + } + +#ifndef CONFIG_VDI_STATIC_IMAGE + if (image_type == VDI_TYPE_STATIC) { + ret = -ENOTSUP; + error_setg(errp, "Statically allocated images cannot be created in " + "this build"); + goto exit; + } +#endif +#ifndef CONFIG_VDI_BLOCK_SIZE + if (block_size != DEFAULT_CLUSTER_SIZE) { + ret = -ENOTSUP; + error_setg(errp, + "A non-default cluster size is not supported in this build"); + goto exit; + } +#endif + + if (bytes > VDI_DISK_SIZE_MAX) { + ret = -ENOTSUP; + error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64 + ", max supported is 0x%" PRIx64 ")", + bytes, VDI_DISK_SIZE_MAX); + goto exit; + } + + /* Create BlockBackend to write to the image */ + bs_file = bdrv_open_blockdev_ref(vdi_opts->file, errp); + if (!bs_file) { + ret = -EIO; + goto exit; + } + + blk = blk_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL, errp); + if (!blk) { + ret = -EPERM; + goto exit; + } + + blk_set_allow_write_beyond_eof(blk, true); + + /* We need enough blocks to store the given disk size, + so always round up. */ + blocks = DIV_ROUND_UP(bytes, block_size); + + bmap_size = blocks * sizeof(uint32_t); + bmap_size = ROUND_UP(bmap_size, SECTOR_SIZE); + + memset(&header, 0, sizeof(header)); + pstrcpy(header.text, sizeof(header.text), VDI_TEXT); + header.signature = VDI_SIGNATURE; + header.version = VDI_VERSION_1_1; + header.header_size = 0x180; + header.image_type = image_type; + header.offset_bmap = 0x200; + header.offset_data = 0x200 + bmap_size; + header.sector_size = SECTOR_SIZE; + header.disk_size = bytes; + header.block_size = block_size; + header.blocks_in_image = blocks; + if (image_type == VDI_TYPE_STATIC) { + header.blocks_allocated = blocks; + } + qemu_uuid_generate(&uuid); + header.uuid_image = uuid; + qemu_uuid_generate(&uuid); + header.uuid_last_snap = uuid; + /* There is no need to set header.uuid_link or header.uuid_parent here. */ + if (VDI_DEBUG) { + vdi_header_print(&header); + } + vdi_header_to_le(&header); + ret = blk_pwrite(blk, offset, &header, sizeof(header), 0); + if (ret < 0) { + error_setg(errp, "Error writing header"); + goto exit; + } + offset += sizeof(header); + + if (bmap_size > 0) { + bmap = g_try_malloc0(bmap_size); + if (bmap == NULL) { + ret = -ENOMEM; + error_setg(errp, "Could not allocate bmap"); + goto exit; + } + for (i = 0; i < blocks; i++) { + if (image_type == VDI_TYPE_STATIC) { + bmap[i] = i; + } else { + bmap[i] = VDI_UNALLOCATED; + } + } + ret = blk_pwrite(blk, offset, bmap, bmap_size, 0); + if (ret < 0) { + error_setg(errp, "Error writing bmap"); + goto exit; + } + offset += bmap_size; + } + + if (image_type == VDI_TYPE_STATIC) { + ret = blk_truncate(blk, offset + blocks * block_size, false, + PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + error_prepend(errp, "Failed to statically allocate file"); + goto exit; + } + } + + ret = 0; +exit: + blk_unref(blk); + bdrv_unref(bs_file); + g_free(bmap); + return ret; +} + +static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, + Error **errp) +{ + return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); +} + +static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + QDict *qdict = NULL; + BlockdevCreateOptions *create_options = NULL; + BlockDriverState *bs_file = NULL; + uint64_t block_size = DEFAULT_CLUSTER_SIZE; + bool is_static = false; + Visitor *v; + int ret; + + /* Parse options and convert legacy syntax. + * + * Since CONFIG_VDI_BLOCK_SIZE is disabled by default, + * cluster-size is not part of the QAPI schema; therefore we have + * to parse it before creating the QAPI object. */ +#if defined(CONFIG_VDI_BLOCK_SIZE) + block_size = qemu_opt_get_size_del(opts, + BLOCK_OPT_CLUSTER_SIZE, + DEFAULT_CLUSTER_SIZE); + if (block_size < BDRV_SECTOR_SIZE || block_size > UINT32_MAX || + !is_power_of_2(block_size)) + { + error_setg(errp, "Invalid cluster size"); + ret = -EINVAL; + goto done; + } +#endif + if (qemu_opt_get_bool_del(opts, BLOCK_OPT_STATIC, false)) { + is_static = true; + } + + qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true); + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto done; + } + + bs_file = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (!bs_file) { + ret = -EIO; + goto done; + } + + qdict_put_str(qdict, "driver", "vdi"); + qdict_put_str(qdict, "file", bs_file->node_name); + if (is_static) { + qdict_put_str(qdict, "preallocation", "metadata"); + } + + /* Get the QAPI object */ + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto done; + } + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto done; + } + + /* Silently round up size */ + assert(create_options->driver == BLOCKDEV_DRIVER_VDI); + create_options->u.vdi.size = ROUND_UP(create_options->u.vdi.size, + BDRV_SECTOR_SIZE); + + /* Create the vdi image (format layer) */ + ret = vdi_co_do_create(create_options, block_size, errp); +done: + qobject_unref(qdict); + qapi_free_BlockdevCreateOptions(create_options); + bdrv_unref(bs_file); + return ret; +} + +static void vdi_close(BlockDriverState *bs) +{ + BDRVVdiState *s = bs->opaque; + + qemu_vfree(s->bmap); + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + +static int vdi_has_zero_init(BlockDriverState *bs) +{ + BDRVVdiState *s = bs->opaque; + + if (s->header.image_type == VDI_TYPE_STATIC) { + return bdrv_has_zero_init(bs->file->bs); + } else { + return 1; + } +} + +static QemuOptsList vdi_create_opts = { + .name = "vdi-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(vdi_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, +#if defined(CONFIG_VDI_BLOCK_SIZE) + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "VDI cluster (block) size", + .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) + }, +#endif +#if defined(CONFIG_VDI_STATIC_IMAGE) + { + .name = BLOCK_OPT_STATIC, + .type = QEMU_OPT_BOOL, + .help = "VDI static (pre-allocated) image", + .def_value_str = "off" + }, +#endif + /* TODO: An additional option to set UUID values might be useful. */ + { /* end of list */ } + } +}; + +static BlockDriver bdrv_vdi = { + .format_name = "vdi", + .instance_size = sizeof(BDRVVdiState), + .bdrv_probe = vdi_probe, + .bdrv_open = vdi_open, + .bdrv_close = vdi_close, + .bdrv_reopen_prepare = vdi_reopen_prepare, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_create = vdi_co_create, + .bdrv_co_create_opts = vdi_co_create_opts, + .bdrv_has_zero_init = vdi_has_zero_init, + .bdrv_co_block_status = vdi_co_block_status, + .bdrv_make_empty = vdi_make_empty, + + .bdrv_co_preadv = vdi_co_preadv, +#if defined(CONFIG_VDI_WRITE) + .bdrv_co_pwritev = vdi_co_pwritev, +#endif + + .bdrv_get_info = vdi_get_info, + + .is_format = true, + .create_opts = &vdi_create_opts, + .bdrv_co_check = vdi_co_check, +}; + +static void bdrv_vdi_init(void) +{ + logout("\n"); + bdrv_register(&bdrv_vdi); +} + +block_init(bdrv_vdi_init); diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c new file mode 100644 index 000000000..52c8027d9 --- /dev/null +++ b/block/vhdx-endian.c @@ -0,0 +1,221 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/bswap.h" +#include "vhdx.h" + +/* + * All the VHDX formats on disk are little endian - the following + * are helper import/export functions to correctly convert + * endianness from disk read to native cpu format, and back again. + */ + + +/* VHDX File Header */ + + +void vhdx_header_le_import(VHDXHeader *h) +{ + assert(h != NULL); + + h->signature = le32_to_cpu(h->signature); + h->checksum = le32_to_cpu(h->checksum); + h->sequence_number = le64_to_cpu(h->sequence_number); + + leguid_to_cpus(&h->file_write_guid); + leguid_to_cpus(&h->data_write_guid); + leguid_to_cpus(&h->log_guid); + + h->log_version = le16_to_cpu(h->log_version); + h->version = le16_to_cpu(h->version); + h->log_length = le32_to_cpu(h->log_length); + h->log_offset = le64_to_cpu(h->log_offset); +} + +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h) +{ + assert(orig_h != NULL); + assert(new_h != NULL); + + new_h->signature = cpu_to_le32(orig_h->signature); + new_h->checksum = cpu_to_le32(orig_h->checksum); + new_h->sequence_number = cpu_to_le64(orig_h->sequence_number); + + new_h->file_write_guid = orig_h->file_write_guid; + new_h->data_write_guid = orig_h->data_write_guid; + new_h->log_guid = orig_h->log_guid; + + cpu_to_leguids(&new_h->file_write_guid); + cpu_to_leguids(&new_h->data_write_guid); + cpu_to_leguids(&new_h->log_guid); + + new_h->log_version = cpu_to_le16(orig_h->log_version); + new_h->version = cpu_to_le16(orig_h->version); + new_h->log_length = cpu_to_le32(orig_h->log_length); + new_h->log_offset = cpu_to_le64(orig_h->log_offset); +} + + +/* VHDX Log Headers */ + + +void vhdx_log_desc_le_import(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + d->signature = le32_to_cpu(d->signature); + d->file_offset = le64_to_cpu(d->file_offset); + d->sequence_number = le64_to_cpu(d->sequence_number); +} + +void vhdx_log_desc_le_export(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + d->signature = cpu_to_le32(d->signature); + d->trailing_bytes = cpu_to_le32(d->trailing_bytes); + d->leading_bytes = cpu_to_le64(d->leading_bytes); + d->file_offset = cpu_to_le64(d->file_offset); + d->sequence_number = cpu_to_le64(d->sequence_number); +} + +void vhdx_log_data_le_import(VHDXLogDataSector *d) +{ + assert(d != NULL); + + d->data_signature = le32_to_cpu(d->data_signature); + d->sequence_high = le32_to_cpu(d->sequence_high); + d->sequence_low = le32_to_cpu(d->sequence_low); +} + +void vhdx_log_data_le_export(VHDXLogDataSector *d) +{ + assert(d != NULL); + + d->data_signature = cpu_to_le32(d->data_signature); + d->sequence_high = cpu_to_le32(d->sequence_high); + d->sequence_low = cpu_to_le32(d->sequence_low); +} + +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = le32_to_cpu(hdr->signature); + hdr->checksum = le32_to_cpu(hdr->checksum); + hdr->entry_length = le32_to_cpu(hdr->entry_length); + hdr->tail = le32_to_cpu(hdr->tail); + hdr->sequence_number = le64_to_cpu(hdr->sequence_number); + hdr->descriptor_count = le32_to_cpu(hdr->descriptor_count); + leguid_to_cpus(&hdr->log_guid); + hdr->flushed_file_offset = le64_to_cpu(hdr->flushed_file_offset); + hdr->last_file_offset = le64_to_cpu(hdr->last_file_offset); +} + +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = cpu_to_le32(hdr->signature); + hdr->checksum = cpu_to_le32(hdr->checksum); + hdr->entry_length = cpu_to_le32(hdr->entry_length); + hdr->tail = cpu_to_le32(hdr->tail); + hdr->sequence_number = cpu_to_le64(hdr->sequence_number); + hdr->descriptor_count = cpu_to_le32(hdr->descriptor_count); + cpu_to_leguids(&hdr->log_guid); + hdr->flushed_file_offset = cpu_to_le64(hdr->flushed_file_offset); + hdr->last_file_offset = cpu_to_le64(hdr->last_file_offset); +} + + +/* Region table entries */ +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = le32_to_cpu(hdr->signature); + hdr->checksum = le32_to_cpu(hdr->checksum); + hdr->entry_count = le32_to_cpu(hdr->entry_count); +} + +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = cpu_to_le32(hdr->signature); + hdr->checksum = cpu_to_le32(hdr->checksum); + hdr->entry_count = cpu_to_le32(hdr->entry_count); +} + +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->guid); + e->file_offset = le64_to_cpu(e->file_offset); + e->length = le32_to_cpu(e->length); + e->data_bits = le32_to_cpu(e->data_bits); +} + +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->guid); + e->file_offset = cpu_to_le64(e->file_offset); + e->length = cpu_to_le32(e->length); + e->data_bits = cpu_to_le32(e->data_bits); +} + + +/* Metadata headers & table */ +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = le64_to_cpu(hdr->signature); + hdr->entry_count = le16_to_cpu(hdr->entry_count); +} + +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + hdr->signature = cpu_to_le64(hdr->signature); + hdr->entry_count = cpu_to_le16(hdr->entry_count); +} + +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->item_id); + e->offset = le32_to_cpu(e->offset); + e->length = le32_to_cpu(e->length); + e->data_bits = le32_to_cpu(e->data_bits); +} +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->item_id); + e->offset = cpu_to_le32(e->offset); + e->length = cpu_to_le32(e->length); + e->data_bits = cpu_to_le32(e->data_bits); +} diff --git a/block/vhdx-log.c b/block/vhdx-log.c new file mode 100644 index 000000000..404fb5f3c --- /dev/null +++ b/block/vhdx-log.c @@ -0,0 +1,1076 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This file covers the functionality of the metadata log writing, parsing, and + * replay. + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "qemu/error-report.h" +#include "qemu/bswap.h" +#include "vhdx.h" + + +typedef struct VHDXLogSequence { + bool valid; + uint32_t count; + VHDXLogEntries log; + VHDXLogEntryHeader hdr; +} VHDXLogSequence; + +typedef struct VHDXLogDescEntries { + VHDXLogEntryHeader hdr; + VHDXLogDescriptor desc[]; +} VHDXLogDescEntries; + +static const MSGUID zero_guid = { 0 }; + +/* The log located on the disk is circular buffer containing + * sectors of 4096 bytes each. + * + * It is assumed for the read/write functions below that the + * circular buffer scheme uses a 'one sector open' to indicate + * the buffer is full. Given the validation methods used for each + * sector, this method should be compatible with other methods that + * do not waste a sector. + */ + + +/* Allow peeking at the hdr entry at the beginning of the current + * read index, without advancing the read index */ +static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, + VHDXLogEntryHeader *hdr) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + assert(hdr != NULL); + + /* peek is only supported on sector boundaries */ + if (log->read % VHDX_LOG_SECTOR_SIZE) { + ret = -EFAULT; + goto exit; + } + + read = log->read; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { + read = 0; + } + + if (read == log->write) { + ret = -EINVAL; + goto exit; + } + + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); + if (ret < 0) { + goto exit; + } + vhdx_log_entry_hdr_le_import(hdr); + +exit: + return ret; +} + +/* Index increment for log, based on sector boundaries */ +static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) +{ + idx += VHDX_LOG_SECTOR_SIZE; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + return idx >= length ? 0 : idx; +} + + +/* Reset the log to empty */ +static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) +{ + MSGUID guid = { 0 }; + s->log.read = s->log.write = 0; + /* a log guid of 0 indicates an empty log to any parser of v0 + * VHDX logs */ + vhdx_update_headers(bs, s, false, &guid); +} + +/* Reads num_sectors from the log (all log sectors are 4096 bytes), + * into buffer 'buffer'. Upon return, *sectors_read will contain + * the number of sectors successfully read. + * + * It is assumed that 'buffer' is already allocated, and of sufficient + * size (i.e. >= 4096*num_sectors). + * + * If 'peek' is true, then the tail (read) pointer for the circular buffer is + * not modified. + * + * 0 is returned on success, -errno otherwise. */ +static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_read, void *buffer, + uint32_t num_sectors, bool peek) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + read = log->read; + + *sectors_read = 0; + while (num_sectors) { + if (read == log->write) { + /* empty */ + break; + } + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + read = vhdx_log_inc_idx(read, log->length); + + *sectors_read = *sectors_read + 1; + num_sectors--; + } + +exit: + if (!peek) { + log->read = read; + } + return ret; +} + +/* Writes num_sectors to the log (all log sectors are 4096 bytes), + * from buffer 'buffer'. Upon return, *sectors_written will contain + * the number of sectors successfully written. + * + * It is assumed that 'buffer' is at least 4096*num_sectors large. + * + * 0 is returned on success, -errno otherwise */ +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_written, void *buffer, + uint32_t num_sectors) +{ + int ret = 0; + uint64_t offset; + uint32_t write; + void *buffer_tmp; + BDRVVHDXState *s = bs->opaque; + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + write = log->write; + + buffer_tmp = buffer; + while (num_sectors) { + + offset = log->offset + write; + write = vhdx_log_inc_idx(write, log->length); + if (write == log->read) { + /* full */ + break; + } + ret = bdrv_pwrite(bs->file, offset, buffer_tmp, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + buffer_tmp += VHDX_LOG_SECTOR_SIZE; + + log->write = write; + *sectors_written = *sectors_written + 1; + num_sectors--; + } + +exit: + return ret; +} + + +/* Validates a log entry header */ +static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, + BDRVVHDXState *s) +{ + int valid = false; + + if (hdr->signature != VHDX_LOG_SIGNATURE) { + goto exit; + } + + /* if the individual entry length is larger than the whole log + * buffer, that is obviously invalid */ + if (log->length < hdr->entry_length) { + goto exit; + } + + /* length of entire entry must be in units of 4KB (log sector size) */ + if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { + goto exit; + } + + /* per spec, sequence # must be > 0 */ + if (hdr->sequence_number == 0) { + goto exit; + } + + /* log entries are only valid if they match the file-wide log guid + * found in the active header */ + if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { + goto exit; + } + + if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { + goto exit; + } + + valid = true; + +exit: + return valid; +} + +/* + * Given a log header, this will validate that the descriptors and the + * corresponding data sectors (if applicable) + * + * Validation consists of: + * 1. Making sure the sequence numbers matches the entry header + * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) + * 3. File offset field is a multiple of 4KB + * 4. If a data descriptor, the corresponding data sector + * has its signature ('data') and matching sequence number + * + * @desc: the data buffer containing the descriptor + * @hdr: the log entry header + * + * Returns true if valid + */ +static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, + VHDXLogEntryHeader *hdr) +{ + bool ret = false; + + if (desc->sequence_number != hdr->sequence_number) { + goto exit; + } + if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { + goto exit; + } + + if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { + if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { + /* valid */ + ret = true; + } + } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { + /* valid */ + ret = true; + } + +exit: + return ret; +} + + +/* Prior to sector data for a log entry, there is the header + * and the descriptors referenced in the header: + * + * [] = 4KB sector + * + * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] + * + * The first sector in a log entry has a 64 byte header, and + * up to 126 32-byte descriptors. If more descriptors than + * 126 are required, then subsequent sectors can have up to 128 + * descriptors. Each sector is 4KB. Data follows the descriptor + * sectors. + * + * This will return the number of sectors needed to encompass + * the passed number of descriptors in desc_cnt. + * + * This will never return 0, even if desc_cnt is 0. + */ +static int vhdx_compute_desc_sectors(uint32_t desc_cnt) +{ + uint32_t desc_sectors; + + desc_cnt += 2; /* account for header in first sector */ + desc_sectors = desc_cnt / 128; + if (desc_cnt % 128) { + desc_sectors++; + } + + return desc_sectors; +} + + +/* Reads the log header, and subsequent descriptors (if any). This + * will allocate all the space for buffer, which must be NULL when + * passed into this function. Each descriptor will also be validated, + * and error returned if any are invalid. */ +static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, VHDXLogDescEntries **buffer, + bool convert_endian) +{ + int ret = 0; + uint32_t desc_sectors; + uint32_t sectors_read; + VHDXLogEntryHeader hdr; + VHDXLogDescEntries *desc_entries = NULL; + VHDXLogDescriptor desc; + int i; + + assert(*buffer == NULL); + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto exit; + } + + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + ret = -EINVAL; + goto exit; + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + desc_entries = qemu_try_blockalign(bs->file->bs, + desc_sectors * VHDX_LOG_SECTOR_SIZE); + if (desc_entries == NULL) { + ret = -ENOMEM; + goto exit; + } + + ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, + desc_sectors, false); + if (ret < 0) { + goto free_and_exit; + } + if (sectors_read != desc_sectors) { + ret = -EINVAL; + goto free_and_exit; + } + + /* put in proper endianness, and validate each desc */ + for (i = 0; i < hdr.descriptor_count; i++) { + desc = desc_entries->desc[i]; + vhdx_log_desc_le_import(&desc); + if (convert_endian) { + desc_entries->desc[i] = desc; + } + if (vhdx_log_desc_is_valid(&desc, &hdr) == false) { + ret = -EINVAL; + goto free_and_exit; + } + } + if (convert_endian) { + desc_entries->hdr = hdr; + } + + *buffer = desc_entries; + goto exit; + +free_and_exit: + qemu_vfree(desc_entries); +exit: + return ret; +} + + +/* Flushes the descriptor described by desc to the VHDX image file. + * If the descriptor is a data descriptor, than 'data' must be non-NULL, + * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be + * written. + * + * Verification is performed to make sure the sequence numbers of a data + * descriptor match the sequence number in the desc. + * + * For a zero descriptor, it may describe multiple sectors to fill with zeroes. + * In this case, it should be noted that zeroes are written to disk, and the + * image file is not extended as a sparse file. */ +static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, + VHDXLogDataSector *data) +{ + int ret = 0; + uint64_t seq, file_offset; + uint32_t offset = 0; + void *buffer = NULL; + uint64_t count = 1; + int i; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { + /* data sector */ + if (data == NULL) { + ret = -EFAULT; + goto exit; + } + + /* The sequence number of the data sector must match that + * in the descriptor */ + seq = data->sequence_high; + seq <<= 32; + seq |= data->sequence_low & 0xffffffff; + + if (seq != desc->sequence_number) { + ret = -EINVAL; + goto exit; + } + + /* Each data sector is in total 4096 bytes, however the first + * 8 bytes, and last 4 bytes, are located in the descriptor */ + memcpy(buffer, &desc->leading_bytes, 8); + offset += 8; + + memcpy(buffer+offset, data->data, 4084); + offset += 4084; + + memcpy(buffer+offset, &desc->trailing_bytes, 4); + + } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { + /* write 'count' sectors of sector */ + memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); + count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; + } else { + error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32, + desc->signature); + ret = -EINVAL; + goto exit; + } + + file_offset = desc->file_offset; + + /* count is only > 1 if we are writing zeroes */ + for (i = 0; i < count; i++) { + ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + file_offset += VHDX_LOG_SECTOR_SIZE; + } + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Flush the entire log (as described by 'logs') to the VHDX image + * file, and then set the log to 'empty' status once complete. + * + * The log entries should be validate prior to flushing */ +static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + int i; + uint32_t cnt, sectors_read; + uint64_t new_file_size; + void *data = NULL; + int64_t file_length; + VHDXLogDescEntries *desc_entries = NULL; + VHDXLogEntryHeader hdr_tmp = { 0 }; + + cnt = logs->count; + + data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + /* each iteration represents one log sequence, which may span multiple + * sectors */ + while (cnt--) { + ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); + if (ret < 0) { + goto exit; + } + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + ret = file_length; + goto exit; + } + /* if the log shows a FlushedFileOffset larger than our current file + * size, then that means the file has been truncated / corrupted, and + * we must refused to open it / use it */ + if (hdr_tmp.flushed_file_offset > file_length) { + ret = -EINVAL; + goto exit; + } + + ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true); + if (ret < 0) { + goto exit; + } + + for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { + if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) { + /* data sector, so read a sector to flush */ + ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, + data, 1, false); + if (ret < 0) { + goto exit; + } + if (sectors_read != 1) { + ret = -EINVAL; + goto exit; + } + vhdx_log_data_le_import(data); + } + + ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); + if (ret < 0) { + goto exit; + } + } + if (file_length < desc_entries->hdr.last_file_offset) { + new_file_size = desc_entries->hdr.last_file_offset; + if (new_file_size % (1 * MiB)) { + /* round up to nearest 1MB boundary */ + new_file_size = QEMU_ALIGN_UP(new_file_size, MiB); + if (new_file_size > INT64_MAX) { + ret = -EINVAL; + goto exit; + } + ret = bdrv_truncate(bs->file, new_file_size, false, + PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + goto exit; + } + } + } + qemu_vfree(desc_entries); + desc_entries = NULL; + } + + ret = bdrv_flush(bs); + if (ret < 0) { + goto exit; + } + /* once the log is fully flushed, indicate that we have an empty log + * now. This also sets the log guid to 0, to indicate an empty log */ + vhdx_log_reset(bs, s); + +exit: + qemu_vfree(data); + qemu_vfree(desc_entries); + return ret; +} + +static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, uint64_t seq, + bool *valid, VHDXLogEntryHeader *entry) +{ + int ret = 0; + VHDXLogEntryHeader hdr; + void *buffer = NULL; + uint32_t i, desc_sectors, total_sectors, crc; + uint32_t sectors_read = 0; + VHDXLogDescEntries *desc_buffer = NULL; + + *valid = false; + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto inc_and_exit; + } + + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + goto inc_and_exit; + } + + if (seq > 0) { + if (hdr.sequence_number != seq + 1) { + goto inc_and_exit; + } + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + + /* Read all log sectors, and calculate log checksum */ + + total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; + + + /* read_desc() will increment the read idx */ + ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false); + if (ret < 0) { + goto free_and_exit; + } + + crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, + desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); + crc ^= 0xffffffff; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + if (total_sectors > desc_sectors) { + for (i = 0; i < total_sectors - desc_sectors; i++) { + sectors_read = 0; + ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, + 1, false); + if (ret < 0 || sectors_read != 1) { + goto free_and_exit; + } + crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); + crc ^= 0xffffffff; + } + } + crc ^= 0xffffffff; + if (crc != hdr.checksum) { + goto free_and_exit; + } + + *valid = true; + *entry = hdr; + goto free_and_exit; + +inc_and_exit: + log->read = vhdx_log_inc_idx(log->read, log->length); + +free_and_exit: + qemu_vfree(buffer); + qemu_vfree(desc_buffer); + return ret; +} + +/* Search through the log circular buffer, and find the valid, active + * log sequence, if any exists + * */ +static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + uint32_t tail; + bool seq_valid = false; + VHDXLogSequence candidate = { 0 }; + VHDXLogEntryHeader hdr = { 0 }; + VHDXLogEntries curr_log; + + memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); + curr_log.write = curr_log.length; /* assume log is full */ + curr_log.read = 0; + + + /* now we will go through the whole log sector by sector, until + * we find a valid, active log sequence, or reach the end of the + * log buffer */ + for (;;) { + uint64_t curr_seq = 0; + VHDXLogSequence current = { 0 }; + + tail = curr_log.read; + + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + + if (seq_valid) { + current.valid = true; + current.log = curr_log; + current.log.read = tail; + current.log.write = curr_log.read; + current.count = 1; + current.hdr = hdr; + + + for (;;) { + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + if (seq_valid == false) { + break; + } + current.log.write = curr_log.read; + current.count++; + + curr_seq = hdr.sequence_number; + } + } + + if (current.valid) { + if (candidate.valid == false || + current.hdr.sequence_number > candidate.hdr.sequence_number) { + candidate = current; + } + } + + if (curr_log.read < tail) { + break; + } + } + + *logs = candidate; + + if (candidate.valid) { + /* this is the next sequence number, for writes */ + s->log.sequence = candidate.hdr.sequence_number + 1; + } + + +exit: + return ret; +} + +/* Parse the replay log. Per the VHDX spec, if the log is present + * it must be replayed prior to opening the file, even read-only. + * + * If read-only, we must replay the log in RAM (or refuse to open + * a dirty VHDX file read-only) */ +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, + Error **errp) +{ + int ret = 0; + VHDXHeader *hdr; + VHDXLogSequence logs = { 0 }; + + hdr = s->headers[s->curr_header]; + + *flushed = false; + + /* s->log.hdr is freed in vhdx_close() */ + if (s->log.hdr == NULL) { + s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); + } + + s->log.offset = hdr->log_offset; + s->log.length = hdr->log_length; + + if (s->log.offset < VHDX_LOG_MIN_SIZE || + s->log.offset % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + /* per spec, only log version of 0 is supported */ + if (hdr->log_version != 0) { + ret = -EINVAL; + goto exit; + } + + /* If either the log guid, or log length is zero, + * then a replay log is not present */ + if (guid_eq(hdr->log_guid, zero_guid)) { + goto exit; + } + + if (hdr->log_length == 0) { + goto exit; + } + + if (hdr->log_length % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + + /* The log is present, we need to find if and where there is an active + * sequence of valid entries present in the log. */ + + ret = vhdx_log_search(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + if (logs.valid) { + if (bs->read_only) { + bdrv_refresh_filename(bs); + ret = -EPERM; + error_setg(errp, + "VHDX image file '%s' opened read-only, but " + "contains a log that needs to be replayed", + bs->filename); + error_append_hint(errp, "To replay the log, run:\n" + "qemu-img check -r all '%s'\n", + bs->filename); + goto exit; + } + /* now flush the log */ + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + *flushed = true; + } + + +exit: + return ret; +} + + + +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, + VHDXLogDataSector *sector, void *data, + uint64_t seq) +{ + /* 8 + 4084 + 4 = 4096, 1 log sector */ + memcpy(&desc->leading_bytes, data, 8); + data += 8; + desc->leading_bytes = cpu_to_le64(desc->leading_bytes); + memcpy(sector->data, data, 4084); + data += 4084; + memcpy(&desc->trailing_bytes, data, 4); + desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes); + data += 4; + + sector->sequence_high = (uint32_t) (seq >> 32); + sector->sequence_low = (uint32_t) (seq & 0xffffffff); + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; + + vhdx_log_desc_le_export(desc); + vhdx_log_data_le_export(sector); +} + + +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + void *buffer = NULL; + void *merged_sector = NULL; + void *data_tmp, *sector_write; + unsigned int i; + int sector_offset; + uint32_t desc_sectors, sectors, total_length; + uint32_t sectors_written = 0; + uint32_t aligned_length; + uint32_t leading_length = 0; + uint32_t trailing_length = 0; + uint32_t partial_sectors = 0; + uint32_t bytes_written = 0; + uint64_t file_offset; + int64_t file_length; + VHDXHeader *header; + VHDXLogEntryHeader new_hdr; + VHDXLogDescriptor *new_desc = NULL; + VHDXLogDataSector *data_sector = NULL; + MSGUID new_guid = { 0 }; + + header = s->headers[s->curr_header]; + + /* need to have offset read data, and be on 4096 byte boundary */ + + if (length > header->log_length) { + /* no log present. we could create a log here instead of failing */ + ret = -EINVAL; + goto exit; + } + + if (guid_eq(header->log_guid, zero_guid)) { + vhdx_guid_generate(&new_guid); + vhdx_update_headers(bs, s, false, &new_guid); + } else { + /* currently, we require that the log be flushed after + * every write. */ + ret = -ENOTSUP; + goto exit; + } + + /* 0 is an invalid sequence number, but may also represent the first + * log write (or a wrapped seq) */ + if (s->log.sequence == 0) { + s->log.sequence = 1; + } + + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; + file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE); + + aligned_length = length; + + /* add in the unaligned head and tail bytes */ + if (sector_offset) { + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); + leading_length = leading_length > length ? length : leading_length; + aligned_length -= leading_length; + partial_sectors++; + } + + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); + if (trailing_length) { + partial_sectors++; + } + + sectors += partial_sectors; + + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + ret = file_length; + goto exit; + } + + /* sectors is now how many sectors the data itself takes, not + * including the header and descriptor metadata */ + + new_hdr = (VHDXLogEntryHeader) { + .signature = VHDX_LOG_SIGNATURE, + .tail = s->log.tail, + .sequence_number = s->log.sequence, + .descriptor_count = sectors, + .reserved = 0, + .flushed_file_offset = file_length, + .last_file_offset = file_length, + .log_guid = header->log_guid, + }; + + + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); + + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; + new_hdr.entry_length = total_length; + + vhdx_log_entry_hdr_le_export(&new_hdr); + + buffer = qemu_blockalign(bs, total_length); + memcpy(buffer, &new_hdr, sizeof(new_hdr)); + + new_desc = buffer + sizeof(new_hdr); + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); + data_tmp = data; + + /* All log sectors are 4KB, so for any partial sectors we must + * merge the data with preexisting data from the final file + * destination */ + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + for (i = 0; i < sectors; i++) { + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; + new_desc->sequence_number = s->log.sequence; + new_desc->file_offset = file_offset; + + if (i == 0 && leading_length) { + /* partial sector at the front of the buffer */ + ret = bdrv_pread(bs->file, file_offset, merged_sector, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector + sector_offset, data_tmp, leading_length); + bytes_written = leading_length; + sector_write = merged_sector; + } else if (i == sectors - 1 && trailing_length) { + /* partial sector at the end of the buffer */ + ret = bdrv_pread(bs->file, + file_offset, + merged_sector + trailing_length, + VHDX_LOG_SECTOR_SIZE - trailing_length); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector, data_tmp, trailing_length); + bytes_written = trailing_length; + sector_write = merged_sector; + } else { + bytes_written = VHDX_LOG_SECTOR_SIZE; + sector_write = data_tmp; + } + + /* populate the raw sector data into the proper structures, + * as well as update the descriptor, and convert to proper + * endianness */ + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, + s->log.sequence); + + data_tmp += bytes_written; + data_sector++; + new_desc++; + file_offset += VHDX_LOG_SECTOR_SIZE; + } + + /* checksum covers entire entry, from the log header through the + * last data sector */ + vhdx_update_checksum(buffer, total_length, + offsetof(VHDXLogEntryHeader, checksum)); + + /* now write to the log */ + ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, + desc_sectors + sectors); + if (ret < 0) { + goto exit; + } + + if (sectors_written != desc_sectors + sectors) { + /* instead of failing, we could flush the log here */ + ret = -EINVAL; + goto exit; + } + + s->log.sequence++; + /* write new tail */ + s->log.tail = s->log.write; + +exit: + qemu_vfree(buffer); + qemu_vfree(merged_sector); + return ret; +} + +/* Perform a log write, and then immediately flush the entire log */ +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + VHDXLogSequence logs = { .valid = true, + .count = 1, + .hdr = { 0 } }; + + + /* Make sure data written (new and/or changed blocks) is stable + * on disk, before creating log entry */ + ret = bdrv_flush(bs); + if (ret < 0) { + goto exit; + } + + ret = vhdx_log_write(bs, s, data, length, offset); + if (ret < 0) { + goto exit; + } + logs.log = s->log; + + /* Make sure log is stable on disk */ + ret = bdrv_flush(bs); + if (ret < 0) { + goto exit; + } + + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + s->log = logs.log; + +exit: + return ret; +} + diff --git a/block/vhdx.c b/block/vhdx.c new file mode 100644 index 000000000..356ec4c45 --- /dev/null +++ b/block/vhdx.c @@ -0,0 +1,2258 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/crc32c.h" +#include "qemu/bswap.h" +#include "qemu/error-report.h" +#include "vhdx.h" +#include "migration/blocker.h" +#include "qemu/uuid.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" + +/* Options for VHDX creation */ + +#define VHDX_BLOCK_OPT_LOG_SIZE "log_size" +#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size" +#define VHDX_BLOCK_OPT_ZERO "block_state_zero" + +typedef enum VHDXImageType { + VHDX_TYPE_DYNAMIC = 0, + VHDX_TYPE_FIXED, + VHDX_TYPE_DIFFERENCING, /* Currently unsupported */ +} VHDXImageType; + +static QemuOptsList vhdx_create_opts; + +/* Several metadata and region table data entries are identified by + * guids in a MS-specific GUID format. */ + + +/* ------- Known Region Table GUIDs ---------------------- */ +static const MSGUID bat_guid = { .data1 = 0x2dc27766, + .data2 = 0xf623, + .data3 = 0x4200, + .data4 = { 0x9d, 0x64, 0x11, 0x5e, + 0x9b, 0xfd, 0x4a, 0x08} }; + +static const MSGUID metadata_guid = { .data1 = 0x8b7ca206, + .data2 = 0x4790, + .data3 = 0x4b9a, + .data4 = { 0xb8, 0xfe, 0x57, 0x5f, + 0x05, 0x0f, 0x88, 0x6e} }; + + + +/* ------- Known Metadata Entry GUIDs ---------------------- */ +static const MSGUID file_param_guid = { .data1 = 0xcaa16737, + .data2 = 0xfa36, + .data3 = 0x4d43, + .data4 = { 0xb3, 0xb6, 0x33, 0xf0, + 0xaa, 0x44, 0xe7, 0x6b} }; + +static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224, + .data2 = 0xcd1b, + .data3 = 0x4876, + .data4 = { 0xb2, 0x11, 0x5d, 0xbe, + 0xd8, 0x3b, 0xf4, 0xb8} }; + +static const MSGUID page83_guid = { .data1 = 0xbeca12ab, + .data2 = 0xb2e6, + .data3 = 0x4523, + .data4 = { 0x93, 0xef, 0xc3, 0x09, + 0xe0, 0x00, 0xc7, 0x46} }; + + +static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7, + .data2 = 0x445d, + .data3 = 0x4471, + .data4 = { 0x9c, 0xc9, 0xe9, 0x88, + 0x52, 0x51, 0xc5, 0x56} }; + +static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d, + .data2 = 0xb30b, + .data3 = 0x454d, + .data4 = { 0xab, 0xf7, 0xd3, + 0xd8, 0x48, 0x34, + 0xab, 0x0c} }; + +static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d, + .data2 = 0xa96f, + .data3 = 0x4709, + .data4 = { 0xba, 0x47, 0xf2, + 0x33, 0xa8, 0xfa, + 0xab, 0x5f} }; + +/* Each parent type must have a valid GUID; this is for parent images + * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would + * need to make up our own QCOW2 GUID type */ +static const MSGUID parent_vhdx_guid __attribute__((unused)) + = { .data1 = 0xb04aefb7, + .data2 = 0xd19e, + .data3 = 0x4a81, + .data4 = { 0xb7, 0x89, 0x25, 0xb8, + 0xe9, 0x44, 0x59, 0x13} }; + + +#define META_FILE_PARAMETER_PRESENT 0x01 +#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02 +#define META_PAGE_83_PRESENT 0x04 +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08 +#define META_PHYS_SECTOR_SIZE_PRESENT 0x10 +#define META_PARENT_LOCATOR_PRESENT 0x20 + +#define META_ALL_PRESENT \ + (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \ + META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ + META_PHYS_SECTOR_SIZE_PRESENT) + + +typedef struct VHDXSectorInfo { + uint32_t bat_idx; /* BAT entry index */ + uint32_t sectors_avail; /* sectors available in payload block */ + uint32_t bytes_left; /* bytes left in the block after data to r/w */ + uint32_t bytes_avail; /* bytes available in payload block */ + uint64_t file_offset; /* absolute offset in bytes, in file */ + uint64_t block_offset; /* block offset, in bytes */ +} VHDXSectorInfo; + +/* Calculates new checksum. + * + * Zero is substituted during crc calculation for the original crc field + * crc_offset: byte offset in buf of the buffer crc + * buf: buffer pointer + * size: size of buffer (must be > crc_offset+4) + * + * Note: The buffer should have all multi-byte data in little-endian format, + * and the resulting checksum is in little endian format. + */ +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) +{ + uint32_t crc; + + assert(buf != NULL); + assert(size > (crc_offset + sizeof(crc))); + + memset(buf + crc_offset, 0, sizeof(crc)); + crc = crc32c(0xffffffff, buf, size); + crc = cpu_to_le32(crc); + memcpy(buf + crc_offset, &crc, sizeof(crc)); + + return crc; +} + +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, + int crc_offset) +{ + uint32_t crc_new; + uint32_t crc_orig; + assert(buf != NULL); + + if (crc_offset > 0) { + memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); + memset(buf + crc_offset, 0, sizeof(crc_orig)); + } + + crc_new = crc32c(crc, buf, size); + if (crc_offset > 0) { + memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig)); + } + + return crc_new; +} + +/* Validates the checksum of the buffer, with an in-place CRC. + * + * Zero is substituted during crc calculation for the original crc field, + * and the crc field is restored afterwards. But the buffer will be modified + * during the calculation, so this may not be not suitable for multi-threaded + * use. + * + * crc_offset: byte offset in buf of the buffer crc + * buf: buffer pointer + * size: size of buffer (must be > crc_offset+4) + * + * returns true if checksum is valid, false otherwise + */ +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) +{ + uint32_t crc_orig; + uint32_t crc; + + assert(buf != NULL); + assert(size > (crc_offset + 4)); + + memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); + crc_orig = le32_to_cpu(crc_orig); + + crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset); + + return crc == crc_orig; +} + + +/* + * This generates a UUID that is compliant with the MS GUIDs used + * in the VHDX spec (and elsewhere). + */ +void vhdx_guid_generate(MSGUID *guid) +{ + QemuUUID uuid; + assert(guid != NULL); + + qemu_uuid_generate(&uuid); + memcpy(guid, &uuid, sizeof(MSGUID)); +} + +/* Check for region overlaps inside the VHDX image */ +static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length) +{ + int ret = 0; + uint64_t end; + VHDXRegionEntry *r; + + end = start + length; + QLIST_FOREACH(r, &s->regions, entries) { + if (!((start >= r->end) || (end <= r->start))) { + error_report("VHDX region %" PRIu64 "-%" PRIu64 " overlaps with " + "region %" PRIu64 "-%." PRIu64, start, end, r->start, + r->end); + ret = -EINVAL; + goto exit; + } + } + +exit: + return ret; +} + +/* Register a region for future checks */ +static void vhdx_region_register(BDRVVHDXState *s, + uint64_t start, uint64_t length) +{ + VHDXRegionEntry *r; + + r = g_malloc0(sizeof(*r)); + + r->start = start; + r->end = start + length; + + QLIST_INSERT_HEAD(&s->regions, r, entries); +} + +/* Free all registered regions */ +static void vhdx_region_unregister_all(BDRVVHDXState *s) +{ + VHDXRegionEntry *r, *r_next; + + QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) { + QLIST_REMOVE(r, entries); + g_free(r); + } +} + +static void vhdx_set_shift_bits(BDRVVHDXState *s) +{ + s->logical_sector_size_bits = ctz32(s->logical_sector_size); + s->sectors_per_block_bits = ctz32(s->sectors_per_block); + s->chunk_ratio_bits = ctz64(s->chunk_ratio); + s->block_size_bits = ctz32(s->block_size); +} + +/* + * Per the MS VHDX Specification, for every VHDX file: + * - The header section is fixed size - 1 MB + * - The header section is always the first "object" + * - The first 64KB of the header is the File Identifier + * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile") + * - The following 512 bytes constitute a UTF-16 string identifiying the + * software that created the file, and is optional and diagnostic only. + * + * Therefore, we probe by looking for the vhdxfile signature "vhdxfile" + */ +static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) { + return 100; + } + return 0; +} + +/* + * Writes the header to the specified offset. + * + * This will optionally read in buffer data from disk (otherwise zero-fill), + * and then update the header checksum. Header is converted to proper + * endianness before being written to the specified file offset + */ +static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr, + uint64_t offset, bool read) +{ + BlockDriverState *bs_file = file->bs; + uint8_t *buffer = NULL; + int ret; + VHDXHeader *header_le; + + assert(bs_file != NULL); + assert(hdr != NULL); + + /* the header checksum is not over just the packed size of VHDXHeader, + * but rather over the entire 'reserved' range for the header, which is + * 4KB (VHDX_HEADER_SIZE). */ + + buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE); + if (read) { + /* if true, we can't assume the extra reserved bytes are 0 */ + ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto exit; + } + } else { + memset(buffer, 0, VHDX_HEADER_SIZE); + } + + /* overwrite the actual VHDXHeader portion */ + header_le = (VHDXHeader *)buffer; + memcpy(header_le, hdr, sizeof(VHDXHeader)); + vhdx_header_le_export(hdr, header_le); + vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, + offsetof(VHDXHeader, checksum)); + ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader)); + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Update the VHDX headers + * + * This follows the VHDX spec procedures for header updates. + * + * - non-current header is updated with largest sequence number + */ +static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid, MSGUID *log_guid) +{ + int ret = 0; + int hdr_idx = 0; + uint64_t header_offset = VHDX_HEADER1_OFFSET; + + VHDXHeader *active_header; + VHDXHeader *inactive_header; + + /* operate on the non-current header */ + if (s->curr_header == 0) { + hdr_idx = 1; + header_offset = VHDX_HEADER2_OFFSET; + } + + active_header = s->headers[s->curr_header]; + inactive_header = s->headers[hdr_idx]; + + inactive_header->sequence_number = active_header->sequence_number + 1; + + /* a new file guid must be generated before any file write, including + * headers */ + inactive_header->file_write_guid = s->session_guid; + + /* a new data guid only needs to be generated before any guest-visible + * writes (i.e. something observable via virtual disk read) */ + if (generate_data_write_guid) { + vhdx_guid_generate(&inactive_header->data_write_guid); + } + + /* update the log guid if present */ + if (log_guid) { + inactive_header->log_guid = *log_guid; + } + + ret = vhdx_write_header(bs->file, inactive_header, header_offset, true); + if (ret < 0) { + goto exit; + } + s->curr_header = hdr_idx; + +exit: + return ret; +} + +/* + * The VHDX spec calls for header updates to be performed twice, so that both + * the current and non-current header have valid info + */ +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid, MSGUID *log_guid) +{ + int ret; + + ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); + if (ret < 0) { + return ret; + } + return vhdx_update_header(bs, s, generate_data_write_guid, log_guid); +} + +/* opens the specified header block from the VHDX file header section */ +static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, + Error **errp) +{ + int ret; + VHDXHeader *header1; + VHDXHeader *header2; + bool h1_valid = false; + bool h2_valid = false; + uint64_t h1_seq = 0; + uint64_t h2_seq = 0; + uint8_t *buffer; + + /* header1 & header2 are freed in vhdx_close() */ + header1 = qemu_blockalign(bs, sizeof(VHDXHeader)); + header2 = qemu_blockalign(bs, sizeof(VHDXHeader)); + + buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); + + s->headers[0] = header1; + s->headers[1] = header2; + + /* We have to read the whole VHDX_HEADER_SIZE instead of + * sizeof(VHDXHeader), because the checksum is over the whole + * region */ + ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, + VHDX_HEADER_SIZE); + if (ret < 0) { + goto fail; + } + /* copy over just the relevant portion that we need */ + memcpy(header1, buffer, sizeof(VHDXHeader)); + + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { + vhdx_header_le_import(header1); + if (header1->signature == VHDX_HEADER_SIGNATURE && + header1->version == 1) { + h1_seq = header1->sequence_number; + h1_valid = true; + } + } + + ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, + VHDX_HEADER_SIZE); + if (ret < 0) { + goto fail; + } + /* copy over just the relevant portion that we need */ + memcpy(header2, buffer, sizeof(VHDXHeader)); + + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { + vhdx_header_le_import(header2); + if (header2->signature == VHDX_HEADER_SIGNATURE && + header2->version == 1) { + h2_seq = header2->sequence_number; + h2_valid = true; + } + } + + /* If there is only 1 valid header (or no valid headers), we + * don't care what the sequence numbers are */ + if (h1_valid && !h2_valid) { + s->curr_header = 0; + } else if (!h1_valid && h2_valid) { + s->curr_header = 1; + } else if (!h1_valid && !h2_valid) { + goto fail; + } else { + /* If both headers are valid, then we choose the active one by the + * highest sequence number. If the sequence numbers are equal, that is + * invalid */ + if (h1_seq > h2_seq) { + s->curr_header = 0; + } else if (h2_seq > h1_seq) { + s->curr_header = 1; + } else { + /* The Microsoft Disk2VHD tool will create 2 identical + * headers, with identical sequence numbers. If the headers are + * identical, don't consider the file corrupt */ + if (!memcmp(header1, header2, sizeof(VHDXHeader))) { + s->curr_header = 0; + } else { + goto fail; + } + } + } + + vhdx_region_register(s, s->headers[s->curr_header]->log_offset, + s->headers[s->curr_header]->log_length); + goto exit; + +fail: + error_setg_errno(errp, -ret, "No valid VHDX header found"); + qemu_vfree(header1); + qemu_vfree(header2); + s->headers[0] = NULL; + s->headers[1] = NULL; +exit: + qemu_vfree(buffer); +} + + +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + uint8_t *buffer; + int offset = 0; + VHDXRegionTableEntry rt_entry; + uint32_t i; + bool bat_rt_found = false; + bool metadata_rt_found = false; + + /* We have to read the whole 64KB block, because the crc32 is over the + * whole block */ + buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE); + + ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto fail; + } + memcpy(&s->rt, buffer, sizeof(s->rt)); + offset += sizeof(s->rt); + + if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) { + ret = -EINVAL; + goto fail; + } + + vhdx_region_header_le_import(&s->rt); + + if (s->rt.signature != VHDX_REGION_SIGNATURE) { + ret = -EINVAL; + goto fail; + } + + + /* Per spec, maximum region table entry count is 2047 */ + if (s->rt.entry_count > 2047) { + ret = -EINVAL; + goto fail; + } + + for (i = 0; i < s->rt.entry_count; i++) { + memcpy(&rt_entry, buffer + offset, sizeof(rt_entry)); + offset += sizeof(rt_entry); + + vhdx_region_entry_le_import(&rt_entry); + + /* check for region overlap between these entries, and any + * other memory regions in the file */ + ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length); + if (ret < 0) { + goto fail; + } + + vhdx_region_register(s, rt_entry.file_offset, rt_entry.length); + + /* see if we recognize the entry */ + if (guid_eq(rt_entry.guid, bat_guid)) { + /* must be unique; if we have already found it this is invalid */ + if (bat_rt_found) { + ret = -EINVAL; + goto fail; + } + bat_rt_found = true; + s->bat_rt = rt_entry; + continue; + } + + if (guid_eq(rt_entry.guid, metadata_guid)) { + /* must be unique; if we have already found it this is invalid */ + if (metadata_rt_found) { + ret = -EINVAL; + goto fail; + } + metadata_rt_found = true; + s->metadata_rt = rt_entry; + continue; + } + + if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) { + /* cannot read vhdx file - required region table entry that + * we do not understand. per spec, we must fail to open */ + ret = -ENOTSUP; + goto fail; + } + } + + if (!bat_rt_found || !metadata_rt_found) { + ret = -EINVAL; + goto fail; + } + + ret = 0; + +fail: + qemu_vfree(buffer); + return ret; +} + + + +/* Metadata initial parser + * + * This loads all the metadata entry fields. This may cause additional + * fields to be processed (e.g. parent locator, etc..). + * + * There are 5 Metadata items that are always required: + * - File Parameters (block size, has a parent) + * - Virtual Disk Size (size, in bytes, of the virtual drive) + * - Page 83 Data (scsi page 83 guid) + * - Logical Sector Size (logical sector size in bytes, either 512 or + * 4096. We only support 512 currently) + * - Physical Sector Size (512 or 4096) + * + * Also, if the File Parameters indicate this is a differencing file, + * we must also look for the Parent Locator metadata item. + */ +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + uint8_t *buffer; + int offset = 0; + uint32_t i = 0; + VHDXMetadataTableEntry md_entry; + + buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE); + + ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer, + VHDX_METADATA_TABLE_MAX_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); + offset += sizeof(s->metadata_hdr); + + vhdx_metadata_header_le_import(&s->metadata_hdr); + + if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) { + ret = -EINVAL; + goto exit; + } + + s->metadata_entries.present = 0; + + if ((s->metadata_hdr.entry_count * sizeof(md_entry)) > + (VHDX_METADATA_TABLE_MAX_SIZE - offset)) { + ret = -EINVAL; + goto exit; + } + + for (i = 0; i < s->metadata_hdr.entry_count; i++) { + memcpy(&md_entry, buffer + offset, sizeof(md_entry)); + offset += sizeof(md_entry); + + vhdx_metadata_entry_le_import(&md_entry); + + if (guid_eq(md_entry.item_id, file_param_guid)) { + if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.file_parameters_entry = md_entry; + s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, virtual_size_guid)) { + if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.virtual_disk_size_entry = md_entry; + s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, page83_guid)) { + if (s->metadata_entries.present & META_PAGE_83_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.page83_data_entry = md_entry; + s->metadata_entries.present |= META_PAGE_83_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, logical_sector_guid)) { + if (s->metadata_entries.present & + META_LOGICAL_SECTOR_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.logical_sector_size_entry = md_entry; + s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, phys_sector_guid)) { + if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.phys_sector_size_entry = md_entry; + s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, parent_locator_guid)) { + if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.parent_locator_entry = md_entry; + s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT; + continue; + } + + if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) { + /* cannot read vhdx file - required region table entry that + * we do not understand. per spec, we must fail to open */ + ret = -ENOTSUP; + goto exit; + } + } + + if (s->metadata_entries.present != META_ALL_PRESENT) { + ret = -ENOTSUP; + goto exit; + } + + ret = bdrv_pread(bs->file, + s->metadata_entries.file_parameters_entry.offset + + s->metadata_rt.file_offset, + &s->params, + sizeof(s->params)); + + if (ret < 0) { + goto exit; + } + + s->params.block_size = le32_to_cpu(s->params.block_size); + s->params.data_bits = le32_to_cpu(s->params.data_bits); + + + /* We now have the file parameters, so we can tell if this is a + * differencing file (i.e.. has_parent), is dynamic or fixed + * sized (leave_blocks_allocated), and the block size */ + + /* The parent locator required iff the file parameters has_parent set */ + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { + /* TODO: parse parent locator fields */ + ret = -ENOTSUP; /* temp, until differencing files are supported */ + goto exit; + } else { + /* if has_parent is set, but there is not parent locator present, + * then that is an invalid combination */ + ret = -EINVAL; + goto exit; + } + } + + /* determine virtual disk size, logical sector size, + * and phys sector size */ + + ret = bdrv_pread(bs->file, + s->metadata_entries.virtual_disk_size_entry.offset + + s->metadata_rt.file_offset, + &s->virtual_disk_size, + sizeof(uint64_t)); + if (ret < 0) { + goto exit; + } + ret = bdrv_pread(bs->file, + s->metadata_entries.logical_sector_size_entry.offset + + s->metadata_rt.file_offset, + &s->logical_sector_size, + sizeof(uint32_t)); + if (ret < 0) { + goto exit; + } + ret = bdrv_pread(bs->file, + s->metadata_entries.phys_sector_size_entry.offset + + s->metadata_rt.file_offset, + &s->physical_sector_size, + sizeof(uint32_t)); + if (ret < 0) { + goto exit; + } + + s->virtual_disk_size = le64_to_cpu(s->virtual_disk_size); + s->logical_sector_size = le32_to_cpu(s->logical_sector_size); + s->physical_sector_size = le32_to_cpu(s->physical_sector_size); + + if (s->params.block_size < VHDX_BLOCK_SIZE_MIN || + s->params.block_size > VHDX_BLOCK_SIZE_MAX) { + ret = -EINVAL; + goto exit; + } + + /* Currently we only support 512 */ + if (s->logical_sector_size != 512) { + ret = -ENOTSUP; + goto exit; + } + + /* Both block_size and sector_size are guaranteed powers of 2, below. + Due to range checks above, s->sectors_per_block can never be < 256 */ + s->sectors_per_block = s->params.block_size / s->logical_sector_size; + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * + (uint64_t)s->logical_sector_size / + (uint64_t)s->params.block_size; + + /* These values are ones we will want to use for division / multiplication + * later on, and they are all guaranteed (per the spec) to be powers of 2, + * so we can take advantage of that for shift operations during + * reads/writes */ + if (s->logical_sector_size & (s->logical_sector_size - 1)) { + ret = -EINVAL; + goto exit; + } + if (s->sectors_per_block & (s->sectors_per_block - 1)) { + ret = -EINVAL; + goto exit; + } + if (s->chunk_ratio & (s->chunk_ratio - 1)) { + ret = -EINVAL; + goto exit; + } + s->block_size = s->params.block_size; + if (s->block_size & (s->block_size - 1)) { + ret = -EINVAL; + goto exit; + } + + vhdx_set_shift_bits(s); + + ret = 0; + +exit: + qemu_vfree(buffer); + return ret; +} + +/* + * Calculate the number of BAT entries, including sector + * bitmap entries. + */ +static void vhdx_calc_bat_entries(BDRVVHDXState *s) +{ + uint32_t data_blocks_cnt, bitmap_blocks_cnt; + + data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size); + bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio); + + if (s->parent_entries) { + s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); + } else { + s->bat_entries = data_blocks_cnt + + ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); + } + +} + +static int vhdx_check_bat_entries(BlockDriverState *bs, int *errcnt) +{ + BDRVVHDXState *s = bs->opaque; + int64_t image_file_size = bdrv_getlength(bs->file->bs); + uint64_t payblocks = s->chunk_ratio; + uint64_t i; + int ret = 0; + + if (image_file_size < 0) { + error_report("Could not determinate VHDX image file size."); + return image_file_size; + } + + for (i = 0; i < s->bat_entries; i++) { + if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == + PAYLOAD_BLOCK_FULLY_PRESENT) { + uint64_t offset = s->bat[i] & VHDX_BAT_FILE_OFF_MASK; + /* + * Allow that the last block exists only partially. The VHDX spec + * states that the image file can only grow in blocksize increments, + * but QEMU created images with partial last blocks in the past. + */ + uint32_t block_length = MIN(s->block_size, + bs->total_sectors * BDRV_SECTOR_SIZE - i * s->block_size); + /* + * Check for BAT entry overflow. + */ + if (offset > INT64_MAX - s->block_size) { + error_report("VHDX BAT entry %" PRIu64 " offset overflow.", i); + ret = -EINVAL; + if (!errcnt) { + break; + } + (*errcnt)++; + } + /* + * Check if fully allocated BAT entries do not reside after + * end of the image file. + */ + if (offset >= image_file_size) { + error_report("VHDX BAT entry %" PRIu64 " start offset %" PRIu64 + " points after end of file (%" PRIi64 "). Image" + " has probably been truncated.", + i, offset, image_file_size); + ret = -EINVAL; + if (!errcnt) { + break; + } + (*errcnt)++; + } else if (offset + block_length > image_file_size) { + error_report("VHDX BAT entry %" PRIu64 " end offset %" PRIu64 + " points after end of file (%" PRIi64 "). Image" + " has probably been truncated.", + i, offset + block_length - 1, image_file_size); + ret = -EINVAL; + if (!errcnt) { + break; + } + (*errcnt)++; + } + + /* + * verify populated BAT field file offsets against + * region table and log entries + */ + if (payblocks--) { + /* payload bat entries */ + int ret2; + ret2 = vhdx_region_check(s, offset, s->block_size); + if (ret2 < 0) { + ret = -EINVAL; + if (!errcnt) { + break; + } + (*errcnt)++; + } + } else { + payblocks = s->chunk_ratio; + /* + * Once differencing files are supported, verify sector bitmap + * blocks here + */ + } + } + } + + return ret; +} + +static void vhdx_close(BlockDriverState *bs) +{ + BDRVVHDXState *s = bs->opaque; + qemu_vfree(s->headers[0]); + s->headers[0] = NULL; + qemu_vfree(s->headers[1]); + s->headers[1] = NULL; + qemu_vfree(s->bat); + s->bat = NULL; + qemu_vfree(s->parent_entries); + s->parent_entries = NULL; + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); + qemu_vfree(s->log.hdr); + s->log.hdr = NULL; + vhdx_region_unregister_all(s); +} + +static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVVHDXState *s = bs->opaque; + int ret = 0; + uint32_t i; + uint64_t signature; + Error *local_err = NULL; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + s->bat = NULL; + s->first_visible_write = true; + + qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->regions); + + /* validate the file signature */ + ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t)); + if (ret < 0) { + goto fail; + } + if (memcmp(&signature, "vhdxfile", 8)) { + ret = -EINVAL; + goto fail; + } + + /* This is used for any header updates, for the file_write_guid. + * The spec dictates that a new value should be used for the first + * header update */ + vhdx_guid_generate(&s->session_guid); + + vhdx_parse_header(bs, s, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + ret = vhdx_parse_log(bs, s, &s->log_replayed_on_open, errp); + if (ret < 0) { + goto fail; + } + + ret = vhdx_open_region_tables(bs, s); + if (ret < 0) { + goto fail; + } + + ret = vhdx_parse_metadata(bs, s); + if (ret < 0) { + goto fail; + } + + s->block_size = s->params.block_size; + + /* the VHDX spec dictates that virtual_disk_size is always a multiple of + * logical_sector_size */ + bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits; + + vhdx_calc_bat_entries(s); + + s->bat_offset = s->bat_rt.file_offset; + + if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) { + /* BAT allocation is not large enough for all entries */ + ret = -EINVAL; + goto fail; + } + + /* s->bat is freed in vhdx_close() */ + s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length); + if (s->bat == NULL) { + ret = -ENOMEM; + goto fail; + } + + ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); + if (ret < 0) { + goto fail; + } + + /* endian convert populated BAT field entires */ + for (i = 0; i < s->bat_entries; i++) { + s->bat[i] = le64_to_cpu(s->bat[i]); + } + + if (!(flags & BDRV_O_CHECK)) { + ret = vhdx_check_bat_entries(bs, NULL); + if (ret < 0) { + goto fail; + } + } + + /* Disable migration when VHDX images are used */ + error_setg(&s->migration_blocker, "The vhdx format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + + /* TODO: differencing files */ + + return 0; +fail: + vhdx_close(bs); + return ret; +} + +static int vhdx_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + + +/* + * Perform sector to block offset translations, to get various + * sector and file offsets into the image. See VHDXSectorInfo + */ +static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, + int nb_sectors, VHDXSectorInfo *sinfo) +{ + uint32_t block_offset; + + sinfo->bat_idx = sector_num >> s->sectors_per_block_bits; + /* effectively a modulo - this gives us the offset into the block + * (in sector sizes) for our sector number */ + block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits); + /* the chunk ratio gives us the interleaving of the sector + * bitmaps, so we need to advance our page block index by the + * sector bitmaps entry number */ + sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits; + + /* the number of sectors we can read/write in this cycle */ + sinfo->sectors_avail = s->sectors_per_block - block_offset; + + sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits; + + if (sinfo->sectors_avail > nb_sectors) { + sinfo->sectors_avail = nb_sectors; + } + + sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits; + + sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK; + + sinfo->block_offset = block_offset << s->logical_sector_size_bits; + + /* The file offset must be past the header section, so must be > 0 */ + if (sinfo->file_offset == 0) { + return; + } + + /* block offset is the offset in vhdx logical sectors, in + * the payload data block. Convert that to a byte offset + * in the block, and add in the payload data block offset + * in the file, in bytes, to get the final read address */ + + sinfo->file_offset += sinfo->block_offset; +} + + +static int vhdx_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVVHDXState *s = bs->opaque; + + bdi->cluster_size = s->block_size; + + return 0; +} + + +static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVVHDXState *s = bs->opaque; + int ret = 0; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + while (nb_sectors > 0) { + /* We are a differencing file, so we need to inspect the sector bitmap + * to see if we have the data or not */ + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); + + /* check the payload block state */ + switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNDEFINED: + case PAYLOAD_BLOCK_UNMAPPED: + case PAYLOAD_BLOCK_UNMAPPED_v095: + case PAYLOAD_BLOCK_ZERO: + /* return zero */ + qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail); + break; + case PAYLOAD_BLOCK_FULLY_PRESENT: + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_preadv(bs->file, sinfo.file_offset, + sinfo.sectors_avail * BDRV_SECTOR_SIZE, + &hd_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto exit; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + } + } + ret = 0; +exit: + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); + return ret; +} + +/* + * Allocate a new payload block at the end of the file. + * + * Allocation will happen at 1MB alignment inside the file. + * + * If @need_zero is set on entry but not cleared on return, then truncation + * could not guarantee that the new portion reads as zero, and the caller + * will take care of it instead. + * + * Returns the file offset start of the new payload block + */ +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t *new_offset, bool *need_zero) +{ + int64_t current_len; + + current_len = bdrv_getlength(bs->file->bs); + if (current_len < 0) { + return current_len; + } + + *new_offset = current_len; + + /* per the spec, the address for a block is in units of 1MB */ + *new_offset = ROUND_UP(*new_offset, 1 * MiB); + if (*new_offset > INT64_MAX) { + return -EINVAL; + } + + if (*need_zero) { + int ret; + + ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false, + PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); + if (ret != -ENOTSUP) { + *need_zero = false; + return ret; + } + } + + return bdrv_truncate(bs->file, *new_offset + s->block_size, false, + PREALLOC_MODE_OFF, 0, NULL); +} + +/* + * Update the BAT table entry with the new file offset, and the new entry + * state */ +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXSectorInfo *sinfo, + uint64_t *bat_entry_le, + uint64_t *bat_offset, int state) +{ + /* The BAT entry is a uint64, with 44 bits for the file offset in units of + * 1MB, and 3 bits for the block state. */ + if ((state == PAYLOAD_BLOCK_ZERO) || + (state == PAYLOAD_BLOCK_UNDEFINED) || + (state == PAYLOAD_BLOCK_NOT_PRESENT) || + (state == PAYLOAD_BLOCK_UNMAPPED)) { + s->bat[sinfo->bat_idx] = 0; /* For PAYLOAD_BLOCK_ZERO, the + FileOffsetMB field is denoted as + 'reserved' in the v1.0 spec. If it is + non-zero, MS Hyper-V will fail to read + the disk image */ + } else { + s->bat[sinfo->bat_idx] = sinfo->file_offset; + } + + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; + + *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]); + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); + +} + +/* Per the spec, on the first write of guest-visible data to the file the + * data write guid must be updated in the header */ +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + if (s->first_visible_write) { + s->first_visible_write = false; + ret = vhdx_update_headers(bs, s, true, NULL); + } + return ret; +} + +static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, + int flags) +{ + int ret = -ENOTSUP; + BDRVVHDXState *s = bs->opaque; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + uint64_t bat_entry = 0; + uint64_t bat_entry_offset = 0; + QEMUIOVector hd_qiov; + struct iovec iov1 = { 0 }; + struct iovec iov2 = { 0 }; + int sectors_to_write; + int bat_state; + uint64_t bat_prior_offset = 0; + bool bat_update = false; + + assert(!flags); + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + while (nb_sectors > 0) { + bool use_zero_buffers = false; + bat_update = false; + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + sectors_to_write = sinfo.sectors_avail; + + qemu_iovec_reset(&hd_qiov); + /* check the payload block state */ + bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK; + switch (bat_state) { + case PAYLOAD_BLOCK_ZERO: + /* in this case, we need to preserve zero writes for + * data that is not part of this write, so we must pad + * the rest of the buffer to zeroes */ + use_zero_buffers = true; + /* fall through */ + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: + case PAYLOAD_BLOCK_UNMAPPED_v095: + case PAYLOAD_BLOCK_UNDEFINED: + bat_prior_offset = sinfo.file_offset; + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset, + &use_zero_buffers); + if (ret < 0) { + goto exit; + } + /* + * once we support differencing files, this may also be + * partially present + */ + /* update block state to the newly specified state */ + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, + PAYLOAD_BLOCK_FULLY_PRESENT); + bat_update = true; + /* + * Since we just allocated a block, file_offset is the + * beginning of the payload block. It needs to be the + * write address, which includes the offset into the + * block, unless the entire block needs to read as + * zeroes but truncation was not able to provide them, + * in which case we need to fill in the rest. + */ + if (!use_zero_buffers) { + sinfo.file_offset += sinfo.block_offset; + } else { + /* zero fill the front, if any */ + if (sinfo.block_offset) { + iov1.iov_len = sinfo.block_offset; + iov1.iov_base = qemu_blockalign(bs, iov1.iov_len); + memset(iov1.iov_base, 0, iov1.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0, + iov1.iov_len); + sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS; + } + + /* our actual data */ + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + + /* zero fill the back, if any */ + if ((sinfo.bytes_avail - sinfo.block_offset) < + s->block_size) { + iov2.iov_len = s->block_size - + (sinfo.bytes_avail + sinfo.block_offset); + iov2.iov_base = qemu_blockalign(bs, iov2.iov_len); + memset(iov2.iov_base, 0, iov2.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0, + iov2.iov_len); + sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS; + } + } + + /* fall through */ + case PAYLOAD_BLOCK_FULLY_PRESENT: + /* if the file offset address is in the header zone, + * there is a problem */ + if (sinfo.file_offset < (1 * MiB)) { + ret = -EFAULT; + goto error_bat_restore; + } + + if (!use_zero_buffers) { + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + } + /* block exists, so we can just overwrite it */ + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, + sectors_to_write * BDRV_SECTOR_SIZE, + &hd_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto error_bat_restore; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + + if (bat_update) { + /* this will update the BAT entry into the log journal, and + * then flush the log journal out to disk */ + ret = vhdx_log_write_and_flush(bs, s, &bat_entry, + sizeof(VHDXBatEntry), + bat_entry_offset); + if (ret < 0) { + goto exit; + } + } + + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + + } + } + + goto exit; + +error_bat_restore: + if (bat_update) { + /* keep metadata in sync, and restore the bat entry state + * if error. */ + sinfo.file_offset = bat_prior_offset; + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, bat_state); + } +exit: + qemu_vfree(iov1.iov_base); + qemu_vfree(iov2.iov_base); + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); + return ret; +} + + + +/* + * Create VHDX Headers + * + * There are 2 headers, and the highest sequence number will represent + * the active header + */ +static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, + uint32_t log_size) +{ + BlockDriverState *bs = blk_bs(blk); + BdrvChild *child; + int ret = 0; + VHDXHeader *hdr = NULL; + + hdr = g_new0(VHDXHeader, 1); + + hdr->signature = VHDX_HEADER_SIGNATURE; + hdr->sequence_number = g_random_int(); + hdr->log_version = 0; + hdr->version = 1; + hdr->log_length = log_size; + hdr->log_offset = VHDX_HEADER_SECTION_END; + vhdx_guid_generate(&hdr->file_write_guid); + vhdx_guid_generate(&hdr->data_write_guid); + + /* XXX Ugly way to get blk->root, but that's a feature, not a bug. This + * hack makes it obvious that vhdx_write_header() bypasses the BlockBackend + * here, which it really shouldn't be doing. */ + child = QLIST_FIRST(&bs->parents); + assert(!QLIST_NEXT(child, next_parent)); + + ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false); + if (ret < 0) { + goto exit; + } + hdr->sequence_number++; + ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false); + if (ret < 0) { + goto exit; + } + +exit: + g_free(hdr); + return ret; +} + +#define VHDX_METADATA_ENTRY_BUFFER_SIZE \ + (sizeof(VHDXFileParameters) +\ + sizeof(VHDXVirtualDiskSize) +\ + sizeof(VHDXPage83Data) +\ + sizeof(VHDXVirtualDiskLogicalSectorSize) +\ + sizeof(VHDXVirtualDiskPhysicalSectorSize)) + +/* + * Create the Metadata entries. + * + * For more details on the entries, see section 3.5 (pg 29) in the + * VHDX 1.00 specification. + * + * We support 5 metadata entries (all required by spec): + * File Parameters, + * Virtual Disk Size, + * Page 83 Data, + * Logical Sector Size, + * Physical Sector Size + * + * The first 64KB of the Metadata section is reserved for the metadata + * header and entries; beyond that, the metadata items themselves reside. + */ +static int vhdx_create_new_metadata(BlockBackend *blk, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint64_t metadata_offset, + VHDXImageType type) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + void *entry_buffer; + VHDXMetadataTableHeader *md_table; + VHDXMetadataTableEntry *md_table_entry; + + /* Metadata entries */ + VHDXFileParameters *mt_file_params; + VHDXVirtualDiskSize *mt_virtual_size; + VHDXPage83Data *mt_page83; + VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size; + VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size; + + entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE); + + mt_file_params = entry_buffer; + offset += sizeof(VHDXFileParameters); + mt_virtual_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskSize); + mt_page83 = entry_buffer + offset; + offset += sizeof(VHDXPage83Data); + mt_log_sector_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskLogicalSectorSize); + mt_phys_sector_size = entry_buffer + offset; + + mt_file_params->block_size = cpu_to_le32(block_size); + if (type == VHDX_TYPE_FIXED) { + mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED; + mt_file_params->data_bits = cpu_to_le32(mt_file_params->data_bits); + } + + vhdx_guid_generate(&mt_page83->page_83_data); + cpu_to_leguids(&mt_page83->page_83_data); + mt_virtual_size->virtual_disk_size = cpu_to_le64(image_size); + mt_log_sector_size->logical_sector_size = cpu_to_le32(sector_size); + mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size); + + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + md_table = buffer; + + md_table->signature = VHDX_METADATA_SIGNATURE; + md_table->entry_count = 5; + vhdx_metadata_header_le_export(md_table); + + + /* This will reference beyond the reserved table portion */ + offset = 64 * KiB; + + md_table_entry = buffer + sizeof(VHDXMetadataTableHeader); + + md_table_entry[0].item_id = file_param_guid; + md_table_entry[0].offset = offset; + md_table_entry[0].length = sizeof(VHDXFileParameters); + md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED; + offset += md_table_entry[0].length; + vhdx_metadata_entry_le_export(&md_table_entry[0]); + + md_table_entry[1].item_id = virtual_size_guid; + md_table_entry[1].offset = offset; + md_table_entry[1].length = sizeof(VHDXVirtualDiskSize); + md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[1].length; + vhdx_metadata_entry_le_export(&md_table_entry[1]); + + md_table_entry[2].item_id = page83_guid; + md_table_entry[2].offset = offset; + md_table_entry[2].length = sizeof(VHDXPage83Data); + md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[2].length; + vhdx_metadata_entry_le_export(&md_table_entry[2]); + + md_table_entry[3].item_id = logical_sector_guid; + md_table_entry[3].offset = offset; + md_table_entry[3].length = sizeof(VHDXVirtualDiskLogicalSectorSize); + md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[3].length; + vhdx_metadata_entry_le_export(&md_table_entry[3]); + + md_table_entry[4].item_id = phys_sector_guid; + md_table_entry[4].offset = offset; + md_table_entry[4].length = sizeof(VHDXVirtualDiskPhysicalSectorSize); + md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + vhdx_metadata_entry_le_export(&md_table_entry[4]); + + ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0); + if (ret < 0) { + goto exit; + } + + ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer, + VHDX_METADATA_ENTRY_BUFFER_SIZE, 0); + if (ret < 0) { + goto exit; + } + + +exit: + g_free(buffer); + g_free(entry_buffer); + return ret; +} + +/* This create the actual BAT itself. We currently only support + * 'Dynamic' and 'Fixed' image types. + * + * Dynamic images: default state of the BAT is all zeroes. + * + * Fixed images: default state of the BAT is fully populated, with + * file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT. + */ +static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, + uint64_t image_size, VHDXImageType type, + bool use_zero_blocks, uint64_t file_offset, + uint32_t length, Error **errp) +{ + int ret = 0; + uint64_t data_file_offset; + uint64_t total_sectors = 0; + uint64_t sector_num = 0; + uint64_t unused; + int block_state; + VHDXSectorInfo sinfo; + + assert(s->bat == NULL); + + /* this gives a data start after BAT/bitmap entries, and well + * past any metadata entries (with a 4 MB buffer for future + * expansion */ + data_file_offset = file_offset + length + 5 * MiB; + total_sectors = image_size >> s->logical_sector_size_bits; + + if (type == VHDX_TYPE_DYNAMIC) { + /* All zeroes, so we can just extend the file - the end of the BAT + * is the furthest thing we have written yet */ + ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, + 0, errp); + if (ret < 0) { + goto exit; + } + } else if (type == VHDX_TYPE_FIXED) { + ret = blk_truncate(blk, data_file_offset + image_size, false, + PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } + } else { + error_setg(errp, "Unsupported image type"); + ret = -ENOTSUP; + goto exit; + } + + if (type == VHDX_TYPE_FIXED || + use_zero_blocks || + bdrv_has_zero_init(blk_bs(blk)) == 0) { + /* for a fixed file, the default BAT entry is not zero */ + s->bat = g_try_malloc0(length); + if (length && s->bat == NULL) { + error_setg(errp, "Failed to allocate memory for the BAT"); + ret = -ENOMEM; + goto exit; + } + block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT : + PAYLOAD_BLOCK_NOT_PRESENT; + block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state; + /* fill the BAT by emulating sector writes of sectors_per_block size */ + while (sector_num < total_sectors) { + vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo); + sinfo.file_offset = data_file_offset + + (sector_num << s->logical_sector_size_bits); + sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB); + vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused, + block_state); + s->bat[sinfo.bat_idx] = cpu_to_le64(s->bat[sinfo.bat_idx]); + sector_num += s->sectors_per_block; + } + ret = blk_pwrite(blk, file_offset, s->bat, length, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write the BAT"); + goto exit; + } + } + + + +exit: + g_free(s->bat); + return ret; +} + +/* Creates the region table header, and region table entries. + * There are 2 supported region table entries: BAT, and Metadata/ + * + * As the calculations for the BAT region table are also needed + * to create the BAT itself, we will also cause the BAT to be + * created. + */ +static int vhdx_create_new_region_table(BlockBackend *blk, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint32_t log_size, + bool use_zero_blocks, + VHDXImageType type, + uint64_t *metadata_offset, + Error **errp) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + uint64_t bat_file_offset; + uint32_t bat_length; + BDRVVHDXState *s = NULL; + VHDXRegionTableHeader *region_table; + VHDXRegionTableEntry *rt_bat; + VHDXRegionTableEntry *rt_metadata; + + assert(metadata_offset != NULL); + + /* Populate enough of the BDRVVHDXState to be able to use the + * pre-existing BAT calculation, translation, and update functions */ + s = g_new0(BDRVVHDXState, 1); + + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * + (uint64_t) sector_size / (uint64_t) block_size; + + s->sectors_per_block = block_size / sector_size; + s->virtual_disk_size = image_size; + s->block_size = block_size; + s->logical_sector_size = sector_size; + + vhdx_set_shift_bits(s); + + vhdx_calc_bat_entries(s); + + /* At this point the VHDX state is populated enough for creation */ + + /* a single buffer is used so we can calculate the checksum over the + * entire 64KB block */ + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + region_table = buffer; + offset += sizeof(VHDXRegionTableHeader); + rt_bat = buffer + offset; + offset += sizeof(VHDXRegionTableEntry); + rt_metadata = buffer + offset; + + region_table->signature = VHDX_REGION_SIGNATURE; + region_table->entry_count = 2; /* BAT and Metadata */ + + rt_bat->guid = bat_guid; + rt_bat->length = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB); + rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB); + s->bat_offset = rt_bat->file_offset; + + rt_metadata->guid = metadata_guid; + rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length, + MiB); + rt_metadata->length = 1 * MiB; /* min size, and more than enough */ + *metadata_offset = rt_metadata->file_offset; + + bat_file_offset = rt_bat->file_offset; + bat_length = rt_bat->length; + + vhdx_region_header_le_export(region_table); + vhdx_region_entry_le_export(rt_bat); + vhdx_region_entry_le_export(rt_metadata); + + vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE, + offsetof(VHDXRegionTableHeader, checksum)); + + + /* The region table gives us the data we need to create the BAT, + * so do that now */ + ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks, + bat_file_offset, bat_length, errp); + if (ret < 0) { + goto exit; + } + + /* Now write out the region headers to disk */ + ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write first region table"); + goto exit; + } + + ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write second region table"); + goto exit; + } + +exit: + g_free(s); + g_free(buffer); + return ret; +} + +/* We need to create the following elements: + * + * .-----------------------------------------------------------------. + * | (A) | (B) | (C) | (D) | (E) | + * | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 | + * | | | | | | + * .-----------------------------------------------------------------. + * 0 64KB 128KB 192KB 256KB 320KB + * + * + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * | (F) | (G) | (H) | | + * | Journal Log | BAT / Bitmap | Metadata | .... data ...... | + * | | | | | + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * 1MB + */ +static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, + Error **errp) +{ + BlockdevCreateOptionsVhdx *vhdx_opts; + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + + int ret = 0; + uint64_t image_size; + uint32_t log_size; + uint32_t block_size; + uint64_t signature; + uint64_t metadata_offset; + bool use_zero_blocks = false; + + gunichar2 *creator = NULL; + glong creator_items; + VHDXImageType image_type; + + assert(opts->driver == BLOCKDEV_DRIVER_VHDX); + vhdx_opts = &opts->u.vhdx; + + /* Validate options and set default values */ + image_size = vhdx_opts->size; + if (image_size > VHDX_MAX_IMAGE_SIZE) { + error_setg(errp, "Image size too large; max of 64TB"); + return -EINVAL; + } + + if (!vhdx_opts->has_log_size) { + log_size = DEFAULT_LOG_SIZE; + } else { + if (vhdx_opts->log_size > UINT32_MAX) { + error_setg(errp, "Log size must be smaller than 4 GB"); + return -EINVAL; + } + log_size = vhdx_opts->log_size; + } + if (log_size < MiB || (log_size % MiB) != 0) { + error_setg(errp, "Log size must be a multiple of 1 MB"); + return -EINVAL; + } + + if (!vhdx_opts->has_block_state_zero) { + use_zero_blocks = true; + } else { + use_zero_blocks = vhdx_opts->block_state_zero; + } + + if (!vhdx_opts->has_subformat) { + vhdx_opts->subformat = BLOCKDEV_VHDX_SUBFORMAT_DYNAMIC; + } + + switch (vhdx_opts->subformat) { + case BLOCKDEV_VHDX_SUBFORMAT_DYNAMIC: + image_type = VHDX_TYPE_DYNAMIC; + break; + case BLOCKDEV_VHDX_SUBFORMAT_FIXED: + image_type = VHDX_TYPE_FIXED; + break; + default: + g_assert_not_reached(); + } + + /* These are pretty arbitrary, and mainly designed to keep the BAT + * size reasonable to load into RAM */ + if (vhdx_opts->has_block_size) { + block_size = vhdx_opts->block_size; + } else { + if (image_size > 32 * TiB) { + block_size = 64 * MiB; + } else if (image_size > (uint64_t) 100 * GiB) { + block_size = 32 * MiB; + } else if (image_size > 1 * GiB) { + block_size = 16 * MiB; + } else { + block_size = 8 * MiB; + } + } + + if (block_size < MiB || (block_size % MiB) != 0) { + error_setg(errp, "Block size must be a multiple of 1 MB"); + return -EINVAL; + } + if (!is_power_of_2(block_size)) { + error_setg(errp, "Block size must be a power of two"); + return -EINVAL; + } + if (block_size > VHDX_BLOCK_SIZE_MAX) { + error_setg(errp, "Block size must not exceed %" PRId64, + VHDX_BLOCK_SIZE_MAX); + return -EINVAL; + } + + /* Create BlockBackend to write to the image */ + bs = bdrv_open_blockdev_ref(vhdx_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto delete_and_exit; + } + blk_set_allow_write_beyond_eof(blk, true); + + /* Create (A) */ + + /* The creator field is optional, but may be useful for + * debugging / diagnostics */ + creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, + &creator_items, NULL); + signature = cpu_to_le64(VHDX_FILE_SIGNATURE); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature), + 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write file signature"); + goto delete_and_exit; + } + if (creator) { + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), + creator, creator_items * sizeof(gunichar2), 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write creator field"); + goto delete_and_exit; + } + } + + + /* Creates (B),(C) */ + ret = vhdx_create_new_headers(blk, image_size, log_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write image headers"); + goto delete_and_exit; + } + + /* Creates (D),(E),(G) explicitly. (F) created as by-product */ + ret = vhdx_create_new_region_table(blk, image_size, block_size, 512, + log_size, use_zero_blocks, image_type, + &metadata_offset, errp); + if (ret < 0) { + goto delete_and_exit; + } + + /* Creates (H) */ + ret = vhdx_create_new_metadata(blk, image_size, block_size, 512, + metadata_offset, image_type); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to initialize metadata"); + goto delete_and_exit; + } + + ret = 0; +delete_and_exit: + blk_unref(blk); + bdrv_unref(bs); + g_free(creator); + return ret; +} + +static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; + Visitor *v; + BlockDriverState *bs = NULL; + int ret; + + static const QDictRenames opt_renames[] = { + { VHDX_BLOCK_OPT_LOG_SIZE, "log-size" }, + { VHDX_BLOCK_OPT_BLOCK_SIZE, "block-size" }, + { VHDX_BLOCK_OPT_ZERO, "block-state-zero" }, + { NULL, NULL }, + }; + + /* Parse options and convert legacy syntax */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vhdx_create_opts, true); + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto fail; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto fail; + } + + /* Now get the QAPI type BlockdevCreateOptions */ + qdict_put_str(qdict, "driver", "vhdx"); + qdict_put_str(qdict, "file", bs->node_name); + + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto fail; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto fail; + } + + /* Silently round up sizes: + * The image size is rounded to 512 bytes. Make the block and log size + * close to what was specified, but must be at least 1MB, and a multiple of + * 1 MB. Also respect VHDX_BLOCK_SIZE_MAX for block sizes. block_size = 0 + * means auto, which is represented by a missing key in QAPI. */ + assert(create_options->driver == BLOCKDEV_DRIVER_VHDX); + create_options->u.vhdx.size = + ROUND_UP(create_options->u.vhdx.size, BDRV_SECTOR_SIZE); + + if (create_options->u.vhdx.has_log_size) { + create_options->u.vhdx.log_size = + ROUND_UP(create_options->u.vhdx.log_size, MiB); + } + if (create_options->u.vhdx.has_block_size) { + create_options->u.vhdx.block_size = + ROUND_UP(create_options->u.vhdx.block_size, MiB); + + if (create_options->u.vhdx.block_size == 0) { + create_options->u.vhdx.has_block_size = false; + } + if (create_options->u.vhdx.block_size > VHDX_BLOCK_SIZE_MAX) { + create_options->u.vhdx.block_size = VHDX_BLOCK_SIZE_MAX; + } + } + + /* Create the vhdx image (format layer) */ + ret = vhdx_co_create(create_options, errp); + +fail: + qobject_unref(qdict); + bdrv_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + +/* If opened r/w, the VHDX driver will automatically replay the log, + * if one is present, inside the vhdx_open() call. + * + * If qemu-img check -r all is called, the image is automatically opened + * r/w and any log has already been replayed, so there is nothing (currently) + * for us to do here + */ +static int coroutine_fn vhdx_co_check(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVVHDXState *s = bs->opaque; + + if (s->log_replayed_on_open) { + result->corruptions_fixed++; + } + + vhdx_check_bat_entries(bs, &result->corruptions); + + return 0; +} + +static int vhdx_has_zero_init(BlockDriverState *bs) +{ + BDRVVHDXState *s = bs->opaque; + int state; + + /* + * Check the subformat: Fixed images have all BAT entries present, + * dynamic images have none (right after creation). It is + * therefore enough to check the first BAT entry. + */ + if (!s->bat_entries) { + return 1; + } + + state = s->bat[0] & VHDX_BAT_STATE_BIT_MASK; + if (state == PAYLOAD_BLOCK_FULLY_PRESENT) { + /* Fixed subformat */ + return bdrv_has_zero_init(bs->file->bs); + } + + /* Dynamic subformat */ + return 1; +} + +static QemuOptsList vhdx_create_opts = { + .name = "vhdx-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(vhdx_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size; max of 64TB." + }, + { + .name = VHDX_BLOCK_OPT_LOG_SIZE, + .type = QEMU_OPT_SIZE, + .def_value_str = stringify(DEFAULT_LOG_SIZE), + .help = "Log size; min 1MB." + }, + { + .name = VHDX_BLOCK_OPT_BLOCK_SIZE, + .type = QEMU_OPT_SIZE, + .def_value_str = stringify(0), + .help = "Block Size; min 1MB, max 256MB. " + "0 means auto-calculate based on image size." + }, + { + .name = BLOCK_OPT_SUBFMT, + .type = QEMU_OPT_STRING, + .help = "VHDX format type, can be either 'dynamic' or 'fixed'. " + "Default is 'dynamic'." + }, + { + .name = VHDX_BLOCK_OPT_ZERO, + .type = QEMU_OPT_BOOL, + .help = "Force use of payload blocks of type 'ZERO'. " + "Non-standard, but default. Do not set to 'off' when " + "using 'qemu-img convert' with subformat=dynamic." + }, + { NULL } + } +}; + +static BlockDriver bdrv_vhdx = { + .format_name = "vhdx", + .instance_size = sizeof(BDRVVHDXState), + .bdrv_probe = vhdx_probe, + .bdrv_open = vhdx_open, + .bdrv_close = vhdx_close, + .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_readv = vhdx_co_readv, + .bdrv_co_writev = vhdx_co_writev, + .bdrv_co_create = vhdx_co_create, + .bdrv_co_create_opts = vhdx_co_create_opts, + .bdrv_get_info = vhdx_get_info, + .bdrv_co_check = vhdx_co_check, + .bdrv_has_zero_init = vhdx_has_zero_init, + + .is_format = true, + .create_opts = &vhdx_create_opts, +}; + +static void bdrv_vhdx_init(void) +{ + bdrv_register(&bdrv_vhdx); +} + +block_init(bdrv_vhdx_init); diff --git a/block/vhdx.h b/block/vhdx.h new file mode 100644 index 000000000..0b74924ce --- /dev/null +++ b/block/vhdx.h @@ -0,0 +1,451 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef BLOCK_VHDX_H +#define BLOCK_VHDX_H +#include "qemu/units.h" + +#define DEFAULT_LOG_SIZE 1048576 /* 1MiB */ +/* Note: can't use 1 * MiB, because it's passed to stringify() */ + +/* Structures and fields present in the VHDX file */ + +/* The header section has the following blocks, + * each block is 64KB: + * + * _____________________________________________________________________________ + * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) | + * |----------|---------------|------------|--------------|--------------------| + * | | | | | | + * 0.........64KB...........128KB........192KB..........256KB................1MB + */ + +#define VHDX_HEADER_BLOCK_SIZE (64 * KiB) + +#define VHDX_FILE_ID_OFFSET 0 +#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) +#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) +#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) +#define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4) + +#define VHDX_HEADER_SECTION_END (1 * MiB) +/* + * A note on the use of MS-GUID fields. For more details on the GUID, + * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. + * + * The VHDX specification only states that these are MS GUIDs, and which + * bytes are data1-data4. It makes no mention of what algorithm should be used + * to generate the GUID, nor what standard. However, looking at the specified + * known GUID fields, it appears the GUIDs are: + * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4) + * Random algorithm (noted by 0x4XXX for .data3) + */ + +/* ---- HEADER SECTION STRUCTURES ---- */ + +/* These structures are ones that are defined in the VHDX specification + * document */ + +#define VHDX_FILE_SIGNATURE 0x656C696678646876ULL /* "vhdxfile" in ASCII */ +typedef struct VHDXFileIdentifier { + uint64_t signature; /* "vhdxfile" in ASCII */ + uint16_t creator[256]; /* optional; utf-16 string to identify + the vhdx file creator. Diagnostic + only */ +} VHDXFileIdentifier; + + +/* the guid is a 16 byte unique ID - the definition for this used by + * Microsoft is not just 16 bytes though - it is a structure that is defined, + * so we need to follow it here so that endianness does not trip us up */ + +typedef struct QEMU_PACKED MSGUID { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MSGUID; + +#define guid_eq(a, b) \ + (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) + +#define VHDX_HEADER_SIZE (4 * KiB) /* although the vhdx_header struct in disk + is only 582 bytes, for purposes of crc + the header is the first 4KB of the 64KB + block */ + +/* The full header is 4KB, although the actual header data is much smaller. + * But for the checksum calculation, it is over the entire 4KB structure, + * not just the defined portion of it */ +#define VHDX_HEADER_SIGNATURE 0x64616568 +typedef struct QEMU_PACKED VHDXHeader { + uint32_t signature; /* "head" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the whole header */ + uint64_t sequence_number; /* Seq number of this header. Each + VHDX file has 2 of these headers, + and only the header with the highest + sequence number is valid */ + MSGUID file_write_guid; /* 128 bit unique identifier. Must be + updated to new, unique value before + the first modification is made to + file */ + MSGUID data_write_guid; /* 128 bit unique identifier. Must be + updated to new, unique value before + the first modification is made to + visible data. Visbile data is + defined as: + - system & user metadata + - raw block data + - disk size + - any change that will + cause the virtual disk + sector read to differ + + This does not need to change if + blocks are re-arranged */ + MSGUID log_guid; /* 128 bit unique identifier. If zero, + there is no valid log. If non-zero, + log entries with this guid are + valid. */ + uint16_t log_version; /* version of the log format. Must be + set to zero */ + uint16_t version; /* version of the vhdx file. Currently, + only supported version is "1" */ + uint32_t log_length; /* length of the log. Must be multiple + of 1MB */ + uint64_t log_offset; /* byte offset in the file of the log. + Must also be a multiple of 1MB */ +} VHDXHeader; + +/* Header for the region table block */ +#define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */ +typedef struct QEMU_PACKED VHDXRegionTableHeader { + uint32_t signature; /* "regi" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the 64KB table */ + uint32_t entry_count; /* number of valid entries */ + uint32_t reserved; +} VHDXRegionTableHeader; + +/* Individual region table entry. There may be a maximum of 2047 of these + * + * There are two known region table properties. Both are required. + * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08 + * Metadata: 8B7CA20647904B9AB8FE575F050F886E + */ +#define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand + this entry in order to open + file */ +typedef struct QEMU_PACKED VHDXRegionTableEntry { + MSGUID guid; /* 128-bit unique identifier */ + uint64_t file_offset; /* offset of the object in the file. + Must be multiple of 1MB */ + uint32_t length; /* length, in bytes, of the object */ + uint32_t data_bits; +} VHDXRegionTableEntry; + + +/* ---- LOG ENTRY STRUCTURES ---- */ +#define VHDX_LOG_MIN_SIZE (1 * MiB) +#define VHDX_LOG_SECTOR_SIZE (4 * KiB) +#define VHDX_LOG_HDR_SIZE 64 +#define VHDX_LOG_SIGNATURE 0x65676f6c +typedef struct QEMU_PACKED VHDXLogEntryHeader { + uint32_t signature; /* "loge" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the 64KB table */ + uint32_t entry_length; /* length in bytes, multiple of 1MB */ + uint32_t tail; /* byte offset of first log entry of a + seq, where this entry is the last + entry */ + uint64_t sequence_number; /* incremented with each log entry. + May not be zero. */ + uint32_t descriptor_count; /* number of descriptors in this log + entry, must be >= 0 */ + uint32_t reserved; + MSGUID log_guid; /* value of the log_guid from + vhdx_header. If not found in + vhdx_header, it is invalid */ + uint64_t flushed_file_offset; /* see spec for full details - this + should be vhdx file size in bytes */ + uint64_t last_file_offset; /* size in bytes that all allocated + file structures fit into */ +} VHDXLogEntryHeader; + +#define VHDX_LOG_DESC_SIZE 32 +#define VHDX_LOG_DESC_SIGNATURE 0x63736564 +#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a +typedef struct QEMU_PACKED VHDXLogDescriptor { + uint32_t signature; /* "zero" or "desc" in ASCII */ + union { + uint32_t reserved; /* zero desc */ + uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the + data sector */ + }; + union { + uint64_t zero_length; /* zero desc: length of the section to + zero */ + uint64_t leading_bytes; /* data desc: bytes 0-7 of the data + sector */ + }; + uint64_t file_offset; /* file offset to write zeros - multiple + of 4kB */ + uint64_t sequence_number; /* must match same field in + vhdx_log_entry_header */ +} VHDXLogDescriptor; + +#define VHDX_LOG_DATA_SIGNATURE 0x61746164 +typedef struct QEMU_PACKED VHDXLogDataSector { + uint32_t data_signature; /* "data" in ASCII */ + uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ + uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). + see the data descriptor field for the + other mising bytes */ + uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ +} VHDXLogDataSector; + + + +/* block states - different state values depending on whether it is a + * payload block, or a sector block. */ + +#define PAYLOAD_BLOCK_NOT_PRESENT 0 +#define PAYLOAD_BLOCK_UNDEFINED 1 +#define PAYLOAD_BLOCK_ZERO 2 +#define PAYLOAD_BLOCK_UNMAPPED 3 +#define PAYLOAD_BLOCK_UNMAPPED_v095 5 +#define PAYLOAD_BLOCK_FULLY_PRESENT 6 +#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 + +#define SB_BLOCK_NOT_PRESENT 0 +#define SB_BLOCK_PRESENT 6 + +/* per the spec */ +#define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) + +/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state + other bits are reserved */ +#define VHDX_BAT_STATE_BIT_MASK 0x07 +#define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000ULL /* upper 44 bits */ +typedef uint64_t VHDXBatEntry; + +/* ---- METADATA REGION STRUCTURES ---- */ + +#define VHDX_METADATA_ENTRY_SIZE 32 +#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ +#define VHDX_METADATA_TABLE_MAX_SIZE \ + (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) +#define VHDX_METADATA_SIGNATURE 0x617461646174656DULL /* "metadata" in ASCII */ +typedef struct QEMU_PACKED VHDXMetadataTableHeader { + uint64_t signature; /* "metadata" in ASCII */ + uint16_t reserved; + uint16_t entry_count; /* number table entries. <= 2047 */ + uint32_t reserved2[5]; +} VHDXMetadataTableHeader; + +#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ +#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, + otherwise file metdata */ +#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this + entry to open the file */ +typedef struct QEMU_PACKED VHDXMetadataTableEntry { + MSGUID item_id; /* 128-bit identifier for metadata */ + uint32_t offset; /* byte offset of the metadata. At + least 64kB. Relative to start of + metadata region */ + /* note: if length = 0, so is offset */ + uint32_t length; /* length of metadata. <= 1MB. */ + uint32_t data_bits; /* least-significant 3 bits are flags, + the rest are reserved (see above) */ + uint32_t reserved2; +} VHDXMetadataTableEntry; + +#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to + be BLOCK_NOT_PRESENT. + If set indicates a fixed + size VHDX file */ +#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ +#define VHDX_BLOCK_SIZE_MIN (1 * MiB) +#define VHDX_BLOCK_SIZE_MAX (256 * MiB) +typedef struct QEMU_PACKED VHDXFileParameters { + uint32_t block_size; /* size of each payload block, always + power of 2, <= 256MB and >= 1MB. */ + uint32_t data_bits; /* least-significant 2 bits are flags, + the rest are reserved (see above) */ +} VHDXFileParameters; + +#define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB) +typedef struct QEMU_PACKED VHDXVirtualDiskSize { + uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. + Must be multiple of the sector size, + max of 64TB */ +} VHDXVirtualDiskSize; + +typedef struct QEMU_PACKED VHDXPage83Data { + MSGUID page_83_data; /* unique id for scsi devices that + support page 0x83 */ +} VHDXPage83Data; + +typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize { + uint32_t logical_sector_size; /* virtual disk sector size (in bytes). + Can only be 512 or 4096 bytes */ +} VHDXVirtualDiskLogicalSectorSize; + +typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { + uint32_t physical_sector_size; /* physical sector size (in bytes). + Can only be 512 or 4096 bytes */ +} VHDXVirtualDiskPhysicalSectorSize; + +typedef struct QEMU_PACKED VHDXParentLocatorHeader { + MSGUID locator_type; /* type of the parent virtual disk. */ + uint16_t reserved; + uint16_t key_value_count; /* number of key/value pairs for this + locator */ +} VHDXParentLocatorHeader; + +/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */ +typedef struct QEMU_PACKED VHDXParentLocatorEntry { + uint32_t key_offset; /* offset in metadata for key, > 0 */ + uint32_t value_offset; /* offset in metadata for value, >0 */ + uint16_t key_length; /* length of entry key, > 0 */ + uint16_t value_length; /* length of entry value, > 0 */ +} VHDXParentLocatorEntry; + + +/* ----- END VHDX SPECIFICATION STRUCTURES ---- */ + +typedef struct VHDXMetadataEntries { + VHDXMetadataTableEntry file_parameters_entry; + VHDXMetadataTableEntry virtual_disk_size_entry; + VHDXMetadataTableEntry page83_data_entry; + VHDXMetadataTableEntry logical_sector_size_entry; + VHDXMetadataTableEntry phys_sector_size_entry; + VHDXMetadataTableEntry parent_locator_entry; + uint16_t present; +} VHDXMetadataEntries; + +typedef struct VHDXLogEntries { + uint64_t offset; + uint64_t length; + uint32_t write; + uint32_t read; + VHDXLogEntryHeader *hdr; + void *desc_buffer; + uint64_t sequence; + uint32_t tail; +} VHDXLogEntries; + +typedef struct VHDXRegionEntry { + uint64_t start; + uint64_t end; + QLIST_ENTRY(VHDXRegionEntry) entries; +} VHDXRegionEntry; + +typedef struct BDRVVHDXState { + CoMutex lock; + + int curr_header; + VHDXHeader *headers[2]; + + VHDXRegionTableHeader rt; + VHDXRegionTableEntry bat_rt; /* region table for the BAT */ + VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ + + VHDXMetadataTableHeader metadata_hdr; + VHDXMetadataEntries metadata_entries; + + VHDXFileParameters params; + uint32_t block_size; + uint32_t block_size_bits; + uint32_t sectors_per_block; + uint32_t sectors_per_block_bits; + + uint64_t virtual_disk_size; + uint32_t logical_sector_size; + uint32_t physical_sector_size; + + uint64_t chunk_ratio; + uint32_t chunk_ratio_bits; + uint32_t logical_sector_size_bits; + + uint32_t bat_entries; + VHDXBatEntry *bat; + uint64_t bat_offset; + + bool first_visible_write; + MSGUID session_guid; + + VHDXLogEntries log; + + VHDXParentLocatorHeader parent_header; + VHDXParentLocatorEntry *parent_entries; + + Error *migration_blocker; + + bool log_replayed_on_open; + + QLIST_HEAD(, VHDXRegionEntry) regions; +} BDRVVHDXState; + +void vhdx_guid_generate(MSGUID *guid); + +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw, + MSGUID *log_guid); + +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, + int crc_offset); + +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); + +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, + Error **errp); + +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset); + +static inline void leguid_to_cpus(MSGUID *guid) +{ + guid->data1 = le32_to_cpu(guid->data1); + guid->data2 = le16_to_cpu(guid->data2); + guid->data3 = le16_to_cpu(guid->data3); +} + +static inline void cpu_to_leguids(MSGUID *guid) +{ + guid->data1 = cpu_to_le32(guid->data1); + guid->data2 = cpu_to_le16(guid->data2); + guid->data3 = cpu_to_le16(guid->data3); +} + +void vhdx_header_le_import(VHDXHeader *h); +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); +void vhdx_log_desc_le_import(VHDXLogDescriptor *d); +void vhdx_log_desc_le_export(VHDXLogDescriptor *d); +void vhdx_log_data_le_import(VHDXLogDataSector *d); +void vhdx_log_data_le_export(VHDXLogDataSector *d); +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr); +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr); +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e); +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e); +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e); +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e); +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); + +#endif diff --git a/block/vmdk.c b/block/vmdk.c new file mode 100644 index 000000000..a00dc00eb --- /dev/null +++ b/block/vmdk.c @@ -0,0 +1,3087 @@ +/* + * Block driver for the VMDK format + * + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2005 Filip Navara + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/bswap.h" +#include "migration/blocker.h" +#include "qemu/cutils.h" +#include + +#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') +#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') +#define VMDK4_COMPRESSION_DEFLATE 1 +#define VMDK4_FLAG_NL_DETECT (1 << 0) +#define VMDK4_FLAG_RGD (1 << 1) +/* Zeroed-grain enable bit */ +#define VMDK4_FLAG_ZERO_GRAIN (1 << 2) +#define VMDK4_FLAG_COMPRESS (1 << 16) +#define VMDK4_FLAG_MARKER (1 << 17) +#define VMDK4_GD_AT_END 0xffffffffffffffffULL + +#define VMDK_EXTENT_MAX_SECTORS (1ULL << 32) + +#define VMDK_GTE_ZEROED 0x1 + +/* VMDK internal error codes */ +#define VMDK_OK 0 +#define VMDK_ERROR (-1) +/* Cluster not allocated */ +#define VMDK_UNALLOC (-2) +#define VMDK_ZEROED (-3) + +#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" + +typedef struct { + uint32_t version; + uint32_t flags; + uint32_t disk_sectors; + uint32_t granularity; + uint32_t l1dir_offset; + uint32_t l1dir_size; + uint32_t file_sectors; + uint32_t cylinders; + uint32_t heads; + uint32_t sectors_per_track; +} QEMU_PACKED VMDK3Header; + +typedef struct { + uint32_t version; + uint32_t flags; + uint64_t capacity; + uint64_t granularity; + uint64_t desc_offset; + uint64_t desc_size; + /* Number of GrainTableEntries per GrainTable */ + uint32_t num_gtes_per_gt; + uint64_t rgd_offset; + uint64_t gd_offset; + uint64_t grain_offset; + char filler[1]; + char check_bytes[4]; + uint16_t compressAlgorithm; +} QEMU_PACKED VMDK4Header; + +typedef struct VMDKSESparseConstHeader { + uint64_t magic; + uint64_t version; + uint64_t capacity; + uint64_t grain_size; + uint64_t grain_table_size; + uint64_t flags; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t volatile_header_offset; + uint64_t volatile_header_size; + uint64_t journal_header_offset; + uint64_t journal_header_size; + uint64_t journal_offset; + uint64_t journal_size; + uint64_t grain_dir_offset; + uint64_t grain_dir_size; + uint64_t grain_tables_offset; + uint64_t grain_tables_size; + uint64_t free_bitmap_offset; + uint64_t free_bitmap_size; + uint64_t backmap_offset; + uint64_t backmap_size; + uint64_t grains_offset; + uint64_t grains_size; + uint8_t pad[304]; +} QEMU_PACKED VMDKSESparseConstHeader; + +typedef struct VMDKSESparseVolatileHeader { + uint64_t magic; + uint64_t free_gt_number; + uint64_t next_txn_seq_number; + uint64_t replay_journal; + uint8_t pad[480]; +} QEMU_PACKED VMDKSESparseVolatileHeader; + +#define L2_CACHE_SIZE 16 + +typedef struct VmdkExtent { + BdrvChild *file; + bool flat; + bool compressed; + bool has_marker; + bool has_zero_grain; + bool sesparse; + uint64_t sesparse_l2_tables_offset; + uint64_t sesparse_clusters_offset; + int32_t entry_size; + int version; + int64_t sectors; + int64_t end_sector; + int64_t flat_start_offset; + int64_t l1_table_offset; + int64_t l1_backup_table_offset; + void *l1_table; + uint32_t *l1_backup_table; + unsigned int l1_size; + uint32_t l1_entry_sectors; + + unsigned int l2_size; + void *l2_cache; + uint32_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + + int64_t cluster_sectors; + int64_t next_cluster_sector; + char *type; +} VmdkExtent; + +typedef struct BDRVVmdkState { + CoMutex lock; + uint64_t desc_offset; + bool cid_updated; + bool cid_checked; + uint32_t cid; + uint32_t parent_cid; + int num_extents; + /* Extent array with num_extents entries, ascend ordered by address */ + VmdkExtent *extents; + Error *migration_blocker; + char *create_type; +} BDRVVmdkState; + +typedef struct VmdkMetaData { + unsigned int l1_index; + unsigned int l2_index; + unsigned int l2_offset; + bool new_allocation; + uint32_t *l2_cache_entry; +} VmdkMetaData; + +typedef struct VmdkGrainMarker { + uint64_t lba; + uint32_t size; + uint8_t data[]; +} QEMU_PACKED VmdkGrainMarker; + +enum { + MARKER_END_OF_STREAM = 0, + MARKER_GRAIN_TABLE = 1, + MARKER_GRAIN_DIRECTORY = 2, + MARKER_FOOTER = 3, +}; + +static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + uint32_t magic; + + if (buf_size < 4) { + return 0; + } + magic = be32_to_cpu(*(uint32_t *)buf); + if (magic == VMDK3_MAGIC || + magic == VMDK4_MAGIC) { + return 100; + } else { + const char *p = (const char *)buf; + const char *end = p + buf_size; + while (p < end) { + if (*p == '#') { + /* skip comment line */ + while (p < end && *p != '\n') { + p++; + } + p++; + continue; + } + if (*p == ' ') { + while (p < end && *p == ' ') { + p++; + } + /* skip '\r' if windows line endings used. */ + if (p < end && *p == '\r') { + p++; + } + /* only accept blank lines before 'version=' line */ + if (p == end || *p != '\n') { + return 0; + } + p++; + continue; + } + if (end - p >= strlen("version=X\n")) { + if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || + strncmp("version=2\n", p, strlen("version=2\n")) == 0 || + strncmp("version=3\n", p, strlen("version=3\n")) == 0) { + return 100; + } + } + if (end - p >= strlen("version=X\r\n")) { + if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || + strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 || + strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) { + return 100; + } + } + return 0; + } + return 0; + } +} + +#define SECTOR_SIZE 512 +#define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */ +#define BUF_SIZE 4096 +#define HEADER_SIZE 512 /* first sector of 512 bytes */ + +static void vmdk_free_extents(BlockDriverState *bs) +{ + int i; + BDRVVmdkState *s = bs->opaque; + VmdkExtent *e; + + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + g_free(e->l1_table); + g_free(e->l2_cache); + g_free(e->l1_backup_table); + g_free(e->type); + if (e->file != bs->file) { + bdrv_unref_child(bs, e->file); + } + } + g_free(s->extents); +} + +static void vmdk_free_last_extent(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + + if (s->num_extents == 0) { + return; + } + s->num_extents--; + s->extents = g_renew(VmdkExtent, s->extents, s->num_extents); +} + +/* Return -ve errno, or 0 on success and write CID into *pcid. */ +static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid) +{ + char *desc; + uint32_t cid; + const char *p_name, *cid_str; + size_t cid_str_size; + BDRVVmdkState *s = bs->opaque; + int ret; + + desc = g_malloc0(DESC_SIZE); + ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + if (ret < 0) { + goto out; + } + + if (parent) { + cid_str = "parentCID"; + cid_str_size = sizeof("parentCID"); + } else { + cid_str = "CID"; + cid_str_size = sizeof("CID"); + } + + desc[DESC_SIZE - 1] = '\0'; + p_name = strstr(desc, cid_str); + if (p_name == NULL) { + ret = -EINVAL; + goto out; + } + p_name += cid_str_size; + if (sscanf(p_name, "%" SCNx32, &cid) != 1) { + ret = -EINVAL; + goto out; + } + *pcid = cid; + ret = 0; + +out: + g_free(desc); + return ret; +} + +static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) +{ + char *desc, *tmp_desc; + char *p_name, *tmp_str; + BDRVVmdkState *s = bs->opaque; + int ret = 0; + + desc = g_malloc0(DESC_SIZE); + tmp_desc = g_malloc0(DESC_SIZE); + ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + if (ret < 0) { + goto out; + } + + desc[DESC_SIZE - 1] = '\0'; + tmp_str = strstr(desc, "parentCID"); + if (tmp_str == NULL) { + ret = -EINVAL; + goto out; + } + + pstrcpy(tmp_desc, DESC_SIZE, tmp_str); + p_name = strstr(desc, "CID"); + if (p_name != NULL) { + p_name += sizeof("CID"); + snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid); + pstrcat(desc, DESC_SIZE, tmp_desc); + } + + ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); + +out: + g_free(desc); + g_free(tmp_desc); + return ret; +} + +static int vmdk_is_cid_valid(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + uint32_t cur_pcid; + + if (!s->cid_checked && bs->backing) { + BlockDriverState *p_bs = bs->backing->bs; + + if (strcmp(p_bs->drv->format_name, "vmdk")) { + /* Backing file is not in vmdk format, so it does not have + * a CID, which makes the overlay's parent CID invalid */ + return 0; + } + + if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) { + /* read failure: report as not valid */ + return 0; + } + if (s->parent_cid != cur_pcid) { + /* CID not valid */ + return 0; + } + } + s->cid_checked = true; + /* CID valid */ + return 1; +} + +/* We have nothing to do for VMDK reopen, stubs just return success */ +static int vmdk_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + assert(state != NULL); + assert(state->bs != NULL); + return 0; +} + +static int vmdk_parent_open(BlockDriverState *bs) +{ + char *p_name; + char *desc; + BDRVVmdkState *s = bs->opaque; + int ret; + + desc = g_malloc0(DESC_SIZE + 1); + ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + if (ret < 0) { + goto out; + } + ret = 0; + + p_name = strstr(desc, "parentFileNameHint"); + if (p_name != NULL) { + char *end_name; + + p_name += sizeof("parentFileNameHint") + 1; + end_name = strchr(p_name, '\"'); + if (end_name == NULL) { + ret = -EINVAL; + goto out; + } + if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) { + ret = -EINVAL; + goto out; + } + + pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name); + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->auto_backing_file); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), + "vmdk"); + } + +out: + g_free(desc); + return ret; +} + +/* Create and append extent to the extent array. Return the added VmdkExtent + * address. return NULL if allocation failed. */ +static int vmdk_add_extent(BlockDriverState *bs, + BdrvChild *file, bool flat, int64_t sectors, + int64_t l1_offset, int64_t l1_backup_offset, + uint32_t l1_size, + int l2_size, uint64_t cluster_sectors, + VmdkExtent **new_extent, + Error **errp) +{ + VmdkExtent *extent; + BDRVVmdkState *s = bs->opaque; + int64_t nb_sectors; + + if (cluster_sectors > 0x200000) { + /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ + error_setg(errp, "Invalid granularity, image may be corrupt"); + return -EFBIG; + } + if (l1_size > 32 * 1024 * 1024) { + /* + * Although with big capacity and small l1_entry_sectors, we can get a + * big l1_size, we don't want unbounded value to allocate the table. + * Limit it to 32M, which is enough to store: + * 8TB - for both VMDK3 & VMDK4 with + * minimal cluster size: 512B + * minimal L2 table size: 512 entries + * 8 TB is still more than the maximal value supported for + * VMDK3 & VMDK4 which is 2TB. + * 64TB - for "ESXi seSparse Extent" + * minimal cluster size: 512B (default is 4KB) + * L2 table size: 4096 entries (const). + * 64TB is more than the maximal value supported for + * seSparse VMDKs (which is slightly less than 64TB) + */ + error_setg(errp, "L1 size too big"); + return -EFBIG; + } + + nb_sectors = bdrv_nb_sectors(file->bs); + if (nb_sectors < 0) { + return nb_sectors; + } + + s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1); + extent = &s->extents[s->num_extents]; + s->num_extents++; + + memset(extent, 0, sizeof(VmdkExtent)); + extent->file = file; + extent->flat = flat; + extent->sectors = sectors; + extent->l1_table_offset = l1_offset; + extent->l1_backup_table_offset = l1_backup_offset; + extent->l1_size = l1_size; + extent->l1_entry_sectors = l2_size * cluster_sectors; + extent->l2_size = l2_size; + extent->cluster_sectors = flat ? sectors : cluster_sectors; + extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors); + extent->entry_size = sizeof(uint32_t); + + if (s->num_extents > 1) { + extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; + } else { + extent->end_sector = extent->sectors; + } + bs->total_sectors = extent->end_sector; + if (new_extent) { + *new_extent = extent; + } + return 0; +} + +static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, + Error **errp) +{ + int ret; + size_t l1_size; + int i; + + /* read the L1 table */ + l1_size = extent->l1_size * extent->entry_size; + extent->l1_table = g_try_malloc(l1_size); + if (l1_size && extent->l1_table == NULL) { + return -ENOMEM; + } + + ret = bdrv_pread(extent->file, + extent->l1_table_offset, + extent->l1_table, + l1_size); + if (ret < 0) { + bdrv_refresh_filename(extent->file->bs); + error_setg_errno(errp, -ret, + "Could not read l1 table from extent '%s'", + extent->file->bs->filename); + goto fail_l1; + } + for (i = 0; i < extent->l1_size; i++) { + if (extent->entry_size == sizeof(uint64_t)) { + le64_to_cpus((uint64_t *)extent->l1_table + i); + } else { + assert(extent->entry_size == sizeof(uint32_t)); + le32_to_cpus((uint32_t *)extent->l1_table + i); + } + } + + if (extent->l1_backup_table_offset) { + assert(!extent->sesparse); + extent->l1_backup_table = g_try_malloc(l1_size); + if (l1_size && extent->l1_backup_table == NULL) { + ret = -ENOMEM; + goto fail_l1; + } + ret = bdrv_pread(extent->file, + extent->l1_backup_table_offset, + extent->l1_backup_table, + l1_size); + if (ret < 0) { + bdrv_refresh_filename(extent->file->bs); + error_setg_errno(errp, -ret, + "Could not read l1 backup table from extent '%s'", + extent->file->bs->filename); + goto fail_l1b; + } + for (i = 0; i < extent->l1_size; i++) { + le32_to_cpus(&extent->l1_backup_table[i]); + } + } + + extent->l2_cache = + g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE); + return 0; + fail_l1b: + g_free(extent->l1_backup_table); + fail_l1: + g_free(extent->l1_table); + return ret; +} + +static int vmdk_open_vmfs_sparse(BlockDriverState *bs, + BdrvChild *file, + int flags, Error **errp) +{ + int ret; + uint32_t magic; + VMDK3Header header; + VmdkExtent *extent = NULL; + + ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read header from file '%s'", + file->bs->filename); + return ret; + } + ret = vmdk_add_extent(bs, file, false, + le32_to_cpu(header.disk_sectors), + (int64_t)le32_to_cpu(header.l1dir_offset) << 9, + 0, + le32_to_cpu(header.l1dir_size), + 4096, + le32_to_cpu(header.granularity), + &extent, + errp); + if (ret < 0) { + return ret; + } + ret = vmdk_init_tables(bs, extent, errp); + if (ret) { + /* free extent allocated by vmdk_add_extent */ + vmdk_free_last_extent(bs); + } + return ret; +} + +#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe) +#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe) + +/* Strict checks - format not officially documented */ +static int check_se_sparse_const_header(VMDKSESparseConstHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->version = le64_to_cpu(header->version); + header->grain_size = le64_to_cpu(header->grain_size); + header->grain_table_size = le64_to_cpu(header->grain_table_size); + header->flags = le64_to_cpu(header->flags); + header->reserved1 = le64_to_cpu(header->reserved1); + header->reserved2 = le64_to_cpu(header->reserved2); + header->reserved3 = le64_to_cpu(header->reserved3); + header->reserved4 = le64_to_cpu(header->reserved4); + + header->volatile_header_offset = + le64_to_cpu(header->volatile_header_offset); + header->volatile_header_size = le64_to_cpu(header->volatile_header_size); + + header->journal_header_offset = le64_to_cpu(header->journal_header_offset); + header->journal_header_size = le64_to_cpu(header->journal_header_size); + + header->journal_offset = le64_to_cpu(header->journal_offset); + header->journal_size = le64_to_cpu(header->journal_size); + + header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset); + header->grain_dir_size = le64_to_cpu(header->grain_dir_size); + + header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset); + header->grain_tables_size = le64_to_cpu(header->grain_tables_size); + + header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset); + header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size); + + header->backmap_offset = le64_to_cpu(header->backmap_offset); + header->backmap_size = le64_to_cpu(header->backmap_size); + + header->grains_offset = le64_to_cpu(header->grains_offset); + header->grains_size = le64_to_cpu(header->grains_size); + + if (header->magic != SESPARSE_CONST_HEADER_MAGIC) { + error_setg(errp, "Bad const header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->version != 0x0000000200000001) { + error_setg(errp, "Unsupported version: 0x%016" PRIx64, + header->version); + return -ENOTSUP; + } + + if (header->grain_size != 8) { + error_setg(errp, "Unsupported grain size: %" PRIu64, + header->grain_size); + return -ENOTSUP; + } + + if (header->grain_table_size != 64) { + error_setg(errp, "Unsupported grain table size: %" PRIu64, + header->grain_table_size); + return -ENOTSUP; + } + + if (header->flags != 0) { + error_setg(errp, "Unsupported flags: 0x%016" PRIx64, + header->flags); + return -ENOTSUP; + } + + if (header->reserved1 != 0 || header->reserved2 != 0 || + header->reserved3 != 0 || header->reserved4 != 0) { + error_setg(errp, "Unsupported reserved bits:" + " 0x%016" PRIx64 " 0x%016" PRIx64 + " 0x%016" PRIx64 " 0x%016" PRIx64, + header->reserved1, header->reserved2, + header->reserved3, header->reserved4); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero const header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->free_gt_number = le64_to_cpu(header->free_gt_number); + header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number); + header->replay_journal = le64_to_cpu(header->replay_journal); + + if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) { + error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->replay_journal) { + error_setg(errp, "Image is dirty, Replaying journal not supported"); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero volatile header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int vmdk_open_se_sparse(BlockDriverState *bs, + BdrvChild *file, + int flags, Error **errp) +{ + int ret; + VMDKSESparseConstHeader const_header; + VMDKSESparseVolatileHeader volatile_header; + VmdkExtent *extent = NULL; + + ret = bdrv_apply_auto_read_only(bs, + "No write support for seSparse images available", errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(const_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, 0, &const_header, sizeof(const_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read const header from file '%s'", + file->bs->filename); + return ret; + } + + /* check const header */ + ret = check_se_sparse_const_header(&const_header, errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(volatile_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, + const_header.volatile_header_offset * SECTOR_SIZE, + &volatile_header, sizeof(volatile_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read volatile header from file '%s'", + file->bs->filename); + return ret; + } + + /* check volatile header */ + ret = check_se_sparse_volatile_header(&volatile_header, errp); + if (ret < 0) { + return ret; + } + + ret = vmdk_add_extent(bs, file, false, + const_header.capacity, + const_header.grain_dir_offset * SECTOR_SIZE, + 0, + const_header.grain_dir_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_table_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_size, + &extent, + errp); + if (ret < 0) { + return ret; + } + + extent->sesparse = true; + extent->sesparse_l2_tables_offset = const_header.grain_tables_offset; + extent->sesparse_clusters_offset = const_header.grains_offset; + extent->entry_size = sizeof(uint64_t); + + ret = vmdk_init_tables(bs, extent, errp); + if (ret) { + /* free extent allocated by vmdk_add_extent */ + vmdk_free_last_extent(bs); + } + + return ret; +} + +static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, + QDict *options, Error **errp); + +static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp) +{ + int64_t size; + char *buf; + int ret; + + size = bdrv_getlength(file->bs); + if (size < 0) { + error_setg_errno(errp, -size, "Could not access file"); + return NULL; + } + + if (size < 4) { + /* Both descriptor file and sparse image must be much larger than 4 + * bytes, also callers of vmdk_read_desc want to compare the first 4 + * bytes with VMDK4_MAGIC, let's error out if less is read. */ + error_setg(errp, "File is too small, not a valid image"); + return NULL; + } + + size = MIN(size, (1 << 20) - 1); /* avoid unbounded allocation */ + buf = g_malloc(size + 1); + + ret = bdrv_pread(file, desc_offset, buf, size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read from file"); + g_free(buf); + return NULL; + } + buf[ret] = 0; + + return buf; +} + +static int vmdk_open_vmdk4(BlockDriverState *bs, + BdrvChild *file, + int flags, QDict *options, Error **errp) +{ + int ret; + uint32_t magic; + uint32_t l1_size, l1_entry_sectors; + VMDK4Header header; + VmdkExtent *extent = NULL; + BDRVVmdkState *s = bs->opaque; + int64_t l1_backup_offset = 0; + bool compressed; + + ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read header from file '%s'", + file->bs->filename); + return -EINVAL; + } + if (header.capacity == 0) { + uint64_t desc_offset = le64_to_cpu(header.desc_offset); + if (desc_offset) { + char *buf = vmdk_read_desc(file, desc_offset << 9, errp); + if (!buf) { + return -EINVAL; + } + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); + g_free(buf); + return ret; + } + } + + if (!s->create_type) { + s->create_type = g_strdup("monolithicSparse"); + } + + if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { + /* + * The footer takes precedence over the header, so read it in. The + * footer starts at offset -1024 from the end: One sector for the + * footer, and another one for the end-of-stream marker. + */ + struct { + struct { + uint64_t val; + uint32_t size; + uint32_t type; + uint8_t pad[512 - 16]; + } QEMU_PACKED footer_marker; + + uint32_t magic; + VMDK4Header header; + uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; + + struct { + uint64_t val; + uint32_t size; + uint32_t type; + uint8_t pad[512 - 16]; + } QEMU_PACKED eos_marker; + } QEMU_PACKED footer; + + ret = bdrv_pread(file, + bs->file->bs->total_sectors * 512 - 1536, + &footer, sizeof(footer)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read footer"); + return ret; + } + + /* Some sanity checks for the footer */ + if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || + le32_to_cpu(footer.footer_marker.size) != 0 || + le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || + le64_to_cpu(footer.eos_marker.val) != 0 || + le32_to_cpu(footer.eos_marker.size) != 0 || + le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) + { + error_setg(errp, "Invalid footer"); + return -EINVAL; + } + + header = footer.header; + } + + compressed = + le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; + if (le32_to_cpu(header.version) > 3) { + error_setg(errp, "Unsupported VMDK version %" PRIu32, + le32_to_cpu(header.version)); + return -ENOTSUP; + } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) && + !compressed) { + /* VMware KB 2064959 explains that version 3 added support for + * persistent changed block tracking (CBT), and backup software can + * read it as version=1 if it doesn't care about the changed area + * information. So we are safe to enable read only. */ + error_setg(errp, "VMDK version 3 must be read only"); + return -EINVAL; + } + + if (le32_to_cpu(header.num_gtes_per_gt) > 512) { + error_setg(errp, "L2 table size too big"); + return -EINVAL; + } + + l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt) + * le64_to_cpu(header.granularity); + if (l1_entry_sectors == 0) { + error_setg(errp, "L1 entry size is invalid"); + return -EINVAL; + } + l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1) + / l1_entry_sectors; + if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { + l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; + } + if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) { + error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes", + (int64_t)(le64_to_cpu(header.grain_offset) + * BDRV_SECTOR_SIZE)); + return -EINVAL; + } + + ret = vmdk_add_extent(bs, file, false, + le64_to_cpu(header.capacity), + le64_to_cpu(header.gd_offset) << 9, + l1_backup_offset, + l1_size, + le32_to_cpu(header.num_gtes_per_gt), + le64_to_cpu(header.granularity), + &extent, + errp); + if (ret < 0) { + return ret; + } + extent->compressed = + le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; + if (extent->compressed) { + g_free(s->create_type); + s->create_type = g_strdup("streamOptimized"); + } + extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; + extent->version = le32_to_cpu(header.version); + extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; + ret = vmdk_init_tables(bs, extent, errp); + if (ret) { + /* free extent allocated by vmdk_add_extent */ + vmdk_free_last_extent(bs); + } + return ret; +} + +/* find an option value out of descriptor file */ +static int vmdk_parse_description(const char *desc, const char *opt_name, + char *buf, int buf_size) +{ + char *opt_pos, *opt_end; + const char *end = desc + strlen(desc); + + opt_pos = strstr(desc, opt_name); + if (!opt_pos) { + return VMDK_ERROR; + } + /* Skip "=\"" following opt_name */ + opt_pos += strlen(opt_name) + 2; + if (opt_pos >= end) { + return VMDK_ERROR; + } + opt_end = opt_pos; + while (opt_end < end && *opt_end != '"') { + opt_end++; + } + if (opt_end == end || buf_size < opt_end - opt_pos + 1) { + return VMDK_ERROR; + } + pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); + return VMDK_OK; +} + +/* Open an extent file and append to bs array */ +static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, + char *buf, QDict *options, Error **errp) +{ + uint32_t magic; + + magic = ldl_be_p(buf); + switch (magic) { + case VMDK3_MAGIC: + return vmdk_open_vmfs_sparse(bs, file, flags, errp); + case VMDK4_MAGIC: + return vmdk_open_vmdk4(bs, file, flags, options, errp); + default: + error_setg(errp, "Image not in VMDK format"); + return -EINVAL; + } +} + +static const char *next_line(const char *s) +{ + while (*s) { + if (*s == '\n') { + return s + 1; + } + s++; + } + return s; +} + +static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, + QDict *options, Error **errp) +{ + int ret; + int matches; + char access[11]; + char type[11]; + char fname[512]; + const char *p, *np; + int64_t sectors = 0; + int64_t flat_offset; + char *desc_file_dir = NULL; + char *extent_path; + BdrvChild *extent_file; + BdrvChildRole extent_role; + BDRVVmdkState *s = bs->opaque; + VmdkExtent *extent = NULL; + char extent_opt_prefix[32]; + Error *local_err = NULL; + + for (p = desc; *p; p = next_line(p)) { + /* parse extent line in one of below formats: + * + * RW [size in sectors] FLAT "file-name.vmdk" OFFSET + * RW [size in sectors] SPARSE "file-name.vmdk" + * RW [size in sectors] VMFS "file-name.vmdk" + * RW [size in sectors] VMFSSPARSE "file-name.vmdk" + * RW [size in sectors] SESPARSE "file-name.vmdk" + */ + flat_offset = -1; + matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, + access, §ors, type, fname, &flat_offset); + if (matches < 4 || strcmp(access, "RW")) { + continue; + } else if (!strcmp(type, "FLAT")) { + if (matches != 5 || flat_offset < 0) { + goto invalid; + } + } else if (!strcmp(type, "VMFS")) { + if (matches == 4) { + flat_offset = 0; + } else { + goto invalid; + } + } else if (matches != 4) { + goto invalid; + } + + if (sectors <= 0 || + (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && + strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") && + strcmp(type, "SESPARSE")) || + (strcmp(access, "RW"))) { + continue; + } + + if (path_is_absolute(fname)) { + extent_path = g_strdup(fname); + } else { + if (!desc_file_dir) { + desc_file_dir = bdrv_dirname(bs->file->bs, errp); + if (!desc_file_dir) { + bdrv_refresh_filename(bs->file->bs); + error_prepend(errp, "Cannot use relative paths with VMDK " + "descriptor file '%s': ", + bs->file->bs->filename); + ret = -EINVAL; + goto out; + } + } + + extent_path = g_strconcat(desc_file_dir, fname, NULL); + } + + ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); + assert(ret < 32); + + extent_role = BDRV_CHILD_DATA; + if (strcmp(type, "FLAT") != 0 && strcmp(type, "VMFS") != 0) { + /* non-flat extents have metadata */ + extent_role |= BDRV_CHILD_METADATA; + } + + extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix, + bs, &child_of_bds, extent_role, false, + &local_err); + g_free(extent_path); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } + + /* save to extents array */ + if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { + /* FLAT extent */ + + ret = vmdk_add_extent(bs, extent_file, true, sectors, + 0, 0, 0, 0, 0, &extent, errp); + if (ret < 0) { + bdrv_unref_child(bs, extent_file); + goto out; + } + extent->flat_start_offset = flat_offset << 9; + } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) { + /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ + char *buf = vmdk_read_desc(extent_file, 0, errp); + if (!buf) { + ret = -EINVAL; + } else { + ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, + options, errp); + } + g_free(buf); + if (ret) { + bdrv_unref_child(bs, extent_file); + goto out; + } + extent = &s->extents[s->num_extents - 1]; + } else if (!strcmp(type, "SESPARSE")) { + ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); + if (ret) { + bdrv_unref_child(bs, extent_file); + goto out; + } + extent = &s->extents[s->num_extents - 1]; + } else { + error_setg(errp, "Unsupported extent type '%s'", type); + bdrv_unref_child(bs, extent_file); + ret = -ENOTSUP; + goto out; + } + extent->type = g_strdup(type); + } + + ret = 0; + goto out; + +invalid: + np = next_line(p); + assert(np != p); + if (np[-1] == '\n') { + np--; + } + error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p); + ret = -EINVAL; + +out: + g_free(desc_file_dir); + return ret; +} + +static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, + QDict *options, Error **errp) +{ + int ret; + char ct[128]; + BDRVVmdkState *s = bs->opaque; + + if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { + error_setg(errp, "invalid VMDK image descriptor"); + ret = -EINVAL; + goto exit; + } + if (strcmp(ct, "monolithicFlat") && + strcmp(ct, "vmfs") && + strcmp(ct, "vmfsSparse") && + strcmp(ct, "seSparse") && + strcmp(ct, "twoGbMaxExtentSparse") && + strcmp(ct, "twoGbMaxExtentFlat")) { + error_setg(errp, "Unsupported image type '%s'", ct); + ret = -ENOTSUP; + goto exit; + } + s->create_type = g_strdup(ct); + s->desc_offset = 0; + ret = vmdk_parse_extents(buf, bs, options, errp); +exit: + return ret; +} + +static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + char *buf; + int ret; + BDRVVmdkState *s = bs->opaque; + uint32_t magic; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + buf = vmdk_read_desc(bs->file, 0, errp); + if (!buf) { + return -EINVAL; + } + + magic = ldl_be_p(buf); + switch (magic) { + case VMDK3_MAGIC: + case VMDK4_MAGIC: + ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, + errp); + s->desc_offset = 0x200; + break; + default: + /* No data in the descriptor file */ + bs->file->role &= ~BDRV_CHILD_DATA; + + /* Must succeed because we have given up permissions if anything */ + bdrv_child_refresh_perms(bs, bs->file, &error_abort); + + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); + break; + } + if (ret) { + goto fail; + } + + /* try to open parent images, if exist */ + ret = vmdk_parent_open(bs); + if (ret) { + goto fail; + } + ret = vmdk_read_cid(bs, 0, &s->cid); + if (ret) { + goto fail; + } + ret = vmdk_read_cid(bs, 1, &s->parent_cid); + if (ret) { + goto fail; + } + qemu_co_mutex_init(&s->lock); + + /* Disable migration when VMDK images are used */ + error_setg(&s->migration_blocker, "The vmdk format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + + g_free(buf); + return 0; + +fail: + g_free(buf); + g_free(s->create_type); + s->create_type = NULL; + vmdk_free_extents(bs); + return ret; +} + + +static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVVmdkState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_extents; i++) { + if (!s->extents[i].flat) { + bs->bl.pwrite_zeroes_alignment = + MAX(bs->bl.pwrite_zeroes_alignment, + s->extents[i].cluster_sectors << BDRV_SECTOR_BITS); + } + } +} + +/** + * get_whole_cluster + * + * Copy backing file's cluster that covers @sector_num, otherwise write zero, + * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting + * a zeroed cluster in the current layer and must not copy data from the + * backing file. + * + * If @skip_start_sector < @skip_end_sector, the relative range + * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave + * it for call to write user data in the request. + */ +static int get_whole_cluster(BlockDriverState *bs, + VmdkExtent *extent, + uint64_t cluster_offset, + uint64_t offset, + uint64_t skip_start_bytes, + uint64_t skip_end_bytes, + bool zeroed) +{ + int ret = VMDK_OK; + int64_t cluster_bytes; + uint8_t *whole_grain; + bool copy_from_backing; + + /* For COW, align request sector_num to cluster start */ + cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; + offset = QEMU_ALIGN_DOWN(offset, cluster_bytes); + whole_grain = qemu_blockalign(bs, cluster_bytes); + copy_from_backing = bs->backing && !zeroed; + + if (!copy_from_backing) { + memset(whole_grain, 0, skip_start_bytes); + memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes); + } + + assert(skip_end_bytes <= cluster_bytes); + /* we will be here if it's first write on non-exist grain(cluster). + * try to read from parent image, if exist */ + if (bs->backing && !vmdk_is_cid_valid(bs)) { + ret = VMDK_ERROR; + goto exit; + } + + /* Read backing data before skip range */ + if (skip_start_bytes > 0) { + if (copy_from_backing) { + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); + ret = bdrv_pread(bs->backing, offset, whole_grain, + skip_start_bytes); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); + ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain, + skip_start_bytes); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + /* Read backing data after skip range */ + if (skip_end_bytes < cluster_bytes) { + if (copy_from_backing) { + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); + ret = bdrv_pread(bs->backing, offset + skip_end_bytes, + whole_grain + skip_end_bytes, + cluster_bytes - skip_end_bytes); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); + ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes, + whole_grain + skip_end_bytes, + cluster_bytes - skip_end_bytes); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + + ret = VMDK_OK; +exit: + qemu_vfree(whole_grain); + return ret; +} + +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, + uint32_t offset) +{ + offset = cpu_to_le32(offset); + /* update L2 table */ + BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE); + if (bdrv_pwrite(extent->file, + ((int64_t)m_data->l2_offset * 512) + + (m_data->l2_index * sizeof(offset)), + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; + } + /* update backup L2 table */ + if (extent->l1_backup_table_offset != 0) { + m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; + if (bdrv_pwrite(extent->file, + ((int64_t)m_data->l2_offset * 512) + + (m_data->l2_index * sizeof(offset)), + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; + } + } + if (bdrv_flush(extent->file->bs) < 0) { + return VMDK_ERROR; + } + if (m_data->l2_cache_entry) { + *m_data->l2_cache_entry = offset; + } + + return VMDK_OK; +} + +/** + * get_cluster_offset + * + * Look up cluster offset in extent file by sector number, and store in + * @cluster_offset. + * + * For flat extents, the start offset as parsed from the description file is + * returned. + * + * For sparse extents, look up in L1, L2 table. If allocate is true, return an + * offset for a new cluster and update L2 cache. If there is a backing file, + * COW is done before returning; otherwise, zeroes are written to the allocated + * cluster. Both COW and zero writing skips the sector range + * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller + * has new data to write there. + * + * Returns: VMDK_OK if cluster exists and mapped in the image. + * VMDK_UNALLOC if cluster is not mapped and @allocate is false. + * VMDK_ERROR if failed. + */ +static int get_cluster_offset(BlockDriverState *bs, + VmdkExtent *extent, + VmdkMetaData *m_data, + uint64_t offset, + bool allocate, + uint64_t *cluster_offset, + uint64_t skip_start_bytes, + uint64_t skip_end_bytes) +{ + unsigned int l1_index, l2_offset, l2_index; + int min_index, i, j; + uint32_t min_count; + void *l2_table; + bool zeroed = false; + int64_t ret; + int64_t cluster_sector; + unsigned int l2_size_bytes = extent->l2_size * extent->entry_size; + + if (m_data) { + m_data->new_allocation = false; + } + if (extent->flat) { + *cluster_offset = extent->flat_start_offset; + return VMDK_OK; + } + + offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; + l1_index = (offset >> 9) / extent->l1_entry_sectors; + if (l1_index >= extent->l1_size) { + return VMDK_ERROR; + } + if (extent->sesparse) { + uint64_t l2_offset_u64; + + assert(extent->entry_size == sizeof(uint64_t)); + + l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index]; + if (l2_offset_u64 == 0) { + l2_offset = 0; + } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) { + /* + * Top most nibble is 0x1 if grain table is allocated. + * strict check - top most 4 bytes must be 0x10000000 since max + * supported size is 64TB for disk - so no more than 64TB / 16MB + * grain directories which is smaller than uint32, + * where 16MB is the only supported default grain table coverage. + */ + return VMDK_ERROR; + } else { + l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff; + l2_offset_u64 = extent->sesparse_l2_tables_offset + + l2_offset_u64 * l2_size_bytes / SECTOR_SIZE; + if (l2_offset_u64 > 0x00000000ffffffff) { + return VMDK_ERROR; + } + l2_offset = (unsigned int)(l2_offset_u64); + } + } else { + assert(extent->entry_size == sizeof(uint32_t)); + l2_offset = ((uint32_t *)extent->l1_table)[l1_index]; + } + if (!l2_offset) { + return VMDK_UNALLOC; + } + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == extent->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++extent->l2_cache_counts[i] == 0xffffffff) { + for (j = 0; j < L2_CACHE_SIZE; j++) { + extent->l2_cache_counts[j] >>= 1; + } + } + l2_table = (char *)extent->l2_cache + (i * l2_size_bytes); + goto found; + } + } + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (extent->l2_cache_counts[i] < min_count) { + min_count = extent->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes); + BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD); + if (bdrv_pread(extent->file, + (int64_t)l2_offset * 512, + l2_table, + l2_size_bytes + ) != l2_size_bytes) { + return VMDK_ERROR; + } + + extent->l2_cache_offsets[min_index] = l2_offset; + extent->l2_cache_counts[min_index] = 1; + found: + l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; + if (m_data) { + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->l2_offset = l2_offset; + m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index; + } + + if (extent->sesparse) { + cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]); + switch (cluster_sector & 0xf000000000000000) { + case 0x0000000000000000: + /* unallocated grain */ + if (cluster_sector != 0) { + return VMDK_ERROR; + } + break; + case 0x1000000000000000: + /* scsi-unmapped grain - fallthrough */ + case 0x2000000000000000: + /* zero grain */ + zeroed = true; + break; + case 0x3000000000000000: + /* allocated grain */ + cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) | + ((cluster_sector & 0x0000ffffffffffff) << 12)); + cluster_sector = extent->sesparse_clusters_offset + + cluster_sector * extent->cluster_sectors; + break; + default: + return VMDK_ERROR; + } + } else { + cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]); + + if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { + zeroed = true; + } + } + + if (!cluster_sector || zeroed) { + if (!allocate) { + return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; + } + assert(!extent->sesparse); + + if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) { + return VMDK_ERROR; + } + + cluster_sector = extent->next_cluster_sector; + extent->next_cluster_sector += extent->cluster_sectors; + + /* First of all we write grain itself, to avoid race condition + * that may to corrupt the image. + * This problem may occur because of insufficient space on host disk + * or inappropriate VM shutdown. + */ + ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE, + offset, skip_start_bytes, skip_end_bytes, + zeroed); + if (ret) { + return ret; + } + if (m_data) { + m_data->new_allocation = true; + } + } + *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; + return VMDK_OK; +} + +static VmdkExtent *find_extent(BDRVVmdkState *s, + int64_t sector_num, VmdkExtent *start_hint) +{ + VmdkExtent *extent = start_hint; + + if (!extent) { + extent = &s->extents[0]; + } + while (extent < &s->extents[s->num_extents]) { + if (sector_num < extent->end_sector) { + return extent; + } + extent++; + } + return NULL; +} + +static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, + int64_t offset) +{ + uint64_t extent_begin_offset, extent_relative_offset; + uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE; + + extent_begin_offset = + (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE; + extent_relative_offset = offset - extent_begin_offset; + return extent_relative_offset % cluster_size; +} + +static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVVmdkState *s = bs->opaque; + int64_t index_in_cluster, n, ret; + uint64_t cluster_offset; + VmdkExtent *extent; + + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); + if (!extent) { + return -EIO; + } + qemu_co_mutex_lock(&s->lock); + ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset, + 0, 0); + qemu_co_mutex_unlock(&s->lock); + + index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); + switch (ret) { + case VMDK_ERROR: + ret = -EIO; + break; + case VMDK_UNALLOC: + ret = 0; + break; + case VMDK_ZEROED: + ret = BDRV_BLOCK_ZERO; + break; + case VMDK_OK: + ret = BDRV_BLOCK_DATA; + if (!extent->compressed) { + ret |= BDRV_BLOCK_OFFSET_VALID; + *map = cluster_offset + index_in_cluster; + if (extent->flat) { + ret |= BDRV_BLOCK_RECURSE; + } + } + *file = extent->file->bs; + break; + } + + n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; + *pnum = MIN(n, bytes); + return ret; +} + +static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, + int64_t offset_in_cluster, QEMUIOVector *qiov, + uint64_t qiov_offset, uint64_t n_bytes, + uint64_t offset) +{ + int ret; + VmdkGrainMarker *data = NULL; + uLongf buf_len; + QEMUIOVector local_qiov; + int64_t write_offset; + int64_t write_end_sector; + + if (extent->compressed) { + void *compressed_data; + + /* Only whole clusters */ + if (offset_in_cluster || + n_bytes > (extent->cluster_sectors * SECTOR_SIZE) || + (n_bytes < (extent->cluster_sectors * SECTOR_SIZE) && + offset + n_bytes != extent->end_sector * SECTOR_SIZE)) + { + ret = -EINVAL; + goto out; + } + + if (!extent->has_marker) { + ret = -EINVAL; + goto out; + } + buf_len = (extent->cluster_sectors << 9) * 2; + data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); + + compressed_data = g_malloc(n_bytes); + qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes); + ret = compress(data->data, &buf_len, compressed_data, n_bytes); + g_free(compressed_data); + + if (ret != Z_OK || buf_len == 0) { + ret = -EINVAL; + goto out; + } + + data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS); + data->size = cpu_to_le32(buf_len); + + n_bytes = buf_len + sizeof(VmdkGrainMarker); + qemu_iovec_init_buf(&local_qiov, data, n_bytes); + + BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED); + } else { + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes); + + BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO); + } + + write_offset = cluster_offset + offset_in_cluster; + ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes, + &local_qiov, 0); + + write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE); + + if (extent->compressed) { + extent->next_cluster_sector = write_end_sector; + } else { + extent->next_cluster_sector = MAX(extent->next_cluster_sector, + write_end_sector); + } + + if (ret < 0) { + goto out; + } + ret = 0; + out: + g_free(data); + if (!extent->compressed) { + qemu_iovec_destroy(&local_qiov); + } + return ret; +} + +static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, + int64_t offset_in_cluster, QEMUIOVector *qiov, + int bytes) +{ + int ret; + int cluster_bytes, buf_bytes; + uint8_t *cluster_buf, *compressed_data; + uint8_t *uncomp_buf; + uint32_t data_len; + VmdkGrainMarker *marker; + uLongf buf_len; + + + if (!extent->compressed) { + BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO); + ret = bdrv_co_preadv(extent->file, + cluster_offset + offset_in_cluster, bytes, + qiov, 0); + if (ret < 0) { + return ret; + } + return 0; + } + cluster_bytes = extent->cluster_sectors * 512; + /* Read two clusters in case GrainMarker + compressed data > one cluster */ + buf_bytes = cluster_bytes * 2; + cluster_buf = g_malloc(buf_bytes); + uncomp_buf = g_malloc(cluster_bytes); + BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED); + ret = bdrv_pread(extent->file, + cluster_offset, + cluster_buf, buf_bytes); + if (ret < 0) { + goto out; + } + compressed_data = cluster_buf; + buf_len = cluster_bytes; + data_len = cluster_bytes; + if (extent->has_marker) { + marker = (VmdkGrainMarker *)cluster_buf; + compressed_data = marker->data; + data_len = le32_to_cpu(marker->size); + } + if (!data_len || data_len > buf_bytes) { + ret = -EINVAL; + goto out; + } + ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len); + if (ret != Z_OK) { + ret = -EINVAL; + goto out; + + } + if (offset_in_cluster < 0 || + offset_in_cluster + bytes > buf_len) { + ret = -EINVAL; + goto out; + } + qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes); + ret = 0; + + out: + g_free(uncomp_buf); + g_free(cluster_buf); + return ret; +} + +static int coroutine_fn +vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVVmdkState *s = bs->opaque; + int ret; + uint64_t n_bytes, offset_in_cluster; + VmdkExtent *extent = NULL; + QEMUIOVector local_qiov; + uint64_t cluster_offset; + uint64_t bytes_done = 0; + + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_co_mutex_lock(&s->lock); + + while (bytes > 0) { + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); + if (!extent) { + ret = -EIO; + goto fail; + } + ret = get_cluster_offset(bs, extent, NULL, + offset, false, &cluster_offset, 0, 0); + offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); + + n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE + - offset_in_cluster); + + if (ret != VMDK_OK) { + /* if not allocated, try to read from parent image, if exist */ + if (bs->backing && ret != VMDK_ZEROED) { + if (!vmdk_is_cid_valid(bs)) { + ret = -EINVAL; + goto fail; + } + + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); + ret = bdrv_co_preadv(bs->backing, offset, n_bytes, + &local_qiov, 0); + if (ret < 0) { + goto fail; + } + } else { + qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); + } + } else { + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster, + &local_qiov, n_bytes); + if (ret) { + goto fail; + } + } + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + } + + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&local_qiov); + + return ret; +} + +/** + * vmdk_write: + * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try + * with each cluster. By dry run we can find if the zero write + * is possible without modifying image data. + * + * Returns: error code with 0 for success. + */ +static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + bool zeroed, bool zero_dry_run) +{ + BDRVVmdkState *s = bs->opaque; + VmdkExtent *extent = NULL; + int ret; + int64_t offset_in_cluster, n_bytes; + uint64_t cluster_offset; + uint64_t bytes_done = 0; + VmdkMetaData m_data; + + if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { + error_report("Wrong offset: offset=0x%" PRIx64 + " total_sectors=0x%" PRIx64, + offset, bs->total_sectors); + return -EIO; + } + + while (bytes > 0) { + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); + if (!extent) { + return -EIO; + } + if (extent->sesparse) { + return -ENOTSUP; + } + offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); + n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE + - offset_in_cluster); + + ret = get_cluster_offset(bs, extent, &m_data, offset, + !(extent->compressed || zeroed), + &cluster_offset, offset_in_cluster, + offset_in_cluster + n_bytes); + if (extent->compressed) { + if (ret == VMDK_OK) { + /* Refuse write to allocated cluster for streamOptimized */ + error_report("Could not write to allocated cluster" + " for streamOptimized"); + return -EIO; + } else if (!zeroed) { + /* allocate */ + ret = get_cluster_offset(bs, extent, &m_data, offset, + true, &cluster_offset, 0, 0); + } + } + if (ret == VMDK_ERROR) { + return -EINVAL; + } + if (zeroed) { + /* Do zeroed write, buf is ignored */ + if (extent->has_zero_grain && + offset_in_cluster == 0 && + n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) { + n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE; + if (!zero_dry_run && ret != VMDK_ZEROED) { + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) + != VMDK_OK) { + return -EIO; + } + } + } else { + return -ENOTSUP; + } + } else { + ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster, + qiov, bytes_done, n_bytes, offset); + if (ret) { + return ret; + } + if (m_data.new_allocation) { + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data, + cluster_offset >> BDRV_SECTOR_BITS) + != VMDK_OK) { + return -EIO; + } + } + } + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + + /* update CID on the first write every time the virtual disk is + * opened */ + if (!s->cid_updated) { + ret = vmdk_write_cid(bs, g_random_int()); + if (ret < 0) { + return ret; + } + s->cid_updated = true; + } + } + return 0; +} + +static int coroutine_fn +vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int coroutine_fn +vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov) +{ + if (bytes == 0) { + /* The caller will write bytes 0 to signal EOF. + * When receive it, we align EOF to a sector boundary. */ + BDRVVmdkState *s = bs->opaque; + int i, ret; + int64_t length; + + for (i = 0; i < s->num_extents; i++) { + length = bdrv_getlength(s->extents[i].file->bs); + if (length < 0) { + return length; + } + length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); + ret = bdrv_truncate(s->extents[i].file, length, false, + PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } + } + return 0; + } + return vmdk_co_pwritev(bs, offset, bytes, qiov, 0); +} + +static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, + int bytes, + BdrvRequestFlags flags) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + + qemu_co_mutex_lock(&s->lock); + /* write zeroes could fail if sectors not aligned to cluster, test it with + * dry_run == true before really updating image */ + ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true); + if (!ret) { + ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false); + } + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int vmdk_init_extent(BlockBackend *blk, + int64_t filesize, bool flat, + bool compress, bool zeroed_grain, + Error **errp) +{ + int ret, i; + VMDK4Header header; + uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; + uint32_t *gd_buf = NULL; + int gd_buf_size; + + if (flat) { + ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); + goto exit; + } + magic = cpu_to_be32(VMDK4_MAGIC); + memset(&header, 0, sizeof(header)); + if (compress) { + header.version = 3; + } else if (zeroed_grain) { + header.version = 2; + } else { + header.version = 1; + } + header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT + | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) + | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); + header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; + header.capacity = filesize / BDRV_SECTOR_SIZE; + header.granularity = 128; + header.num_gtes_per_gt = BDRV_SECTOR_SIZE; + + grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity); + gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t), + BDRV_SECTOR_SIZE); + gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt); + gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE); + + header.desc_offset = 1; + header.desc_size = 20; + header.rgd_offset = header.desc_offset + header.desc_size; + header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count); + header.grain_offset = + ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count), + header.granularity); + /* swap endianness for all header fields */ + header.version = cpu_to_le32(header.version); + header.flags = cpu_to_le32(header.flags); + header.capacity = cpu_to_le64(header.capacity); + header.granularity = cpu_to_le64(header.granularity); + header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt); + header.desc_offset = cpu_to_le64(header.desc_offset); + header.desc_size = cpu_to_le64(header.desc_size); + header.rgd_offset = cpu_to_le64(header.rgd_offset); + header.gd_offset = cpu_to_le64(header.gd_offset); + header.grain_offset = cpu_to_le64(header.grain_offset); + header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm); + + header.check_bytes[0] = 0xa; + header.check_bytes[1] = 0x20; + header.check_bytes[2] = 0xd; + header.check_bytes[3] = 0xa; + + /* write all the data */ + ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0); + if (ret < 0) { + error_setg(errp, QERR_IO_ERROR); + goto exit; + } + ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0); + if (ret < 0) { + error_setg(errp, QERR_IO_ERROR); + goto exit; + } + + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, + PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } + + /* write grain directory */ + gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE; + gd_buf = g_malloc0(gd_buf_size); + for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors; + i < gt_count; i++, tmp += gt_size) { + gd_buf[i] = cpu_to_le32(tmp); + } + ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size, 0); + if (ret < 0) { + error_setg(errp, QERR_IO_ERROR); + goto exit; + } + + /* write backup grain directory */ + for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors; + i < gt_count; i++, tmp += gt_size) { + gd_buf[i] = cpu_to_le32(tmp); + } + ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size, 0); + if (ret < 0) { + error_setg(errp, QERR_IO_ERROR); + } + + ret = 0; +exit: + g_free(gd_buf); + return ret; +} + +static int vmdk_create_extent(const char *filename, int64_t filesize, + bool flat, bool compress, bool zeroed_grain, + BlockBackend **pbb, + QemuOpts *opts, Error **errp) +{ + int ret; + BlockBackend *blk = NULL; + + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto exit; + } + + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + errp); + if (blk == NULL) { + ret = -EIO; + goto exit; + } + + blk_set_allow_write_beyond_eof(blk, true); + + ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp); +exit: + if (blk) { + if (pbb) { + *pbb = blk; + } else { + blk_unref(blk); + blk = NULL; + } + } + return ret; +} + +static int filename_decompose(const char *filename, char *path, char *prefix, + char *postfix, size_t buf_len, Error **errp) +{ + const char *p, *q; + + if (filename == NULL || !strlen(filename)) { + error_setg(errp, "No filename provided"); + return VMDK_ERROR; + } + p = strrchr(filename, '/'); + if (p == NULL) { + p = strrchr(filename, '\\'); + } + if (p == NULL) { + p = strrchr(filename, ':'); + } + if (p != NULL) { + p++; + if (p - filename >= buf_len) { + return VMDK_ERROR; + } + pstrcpy(path, p - filename + 1, filename); + } else { + p = filename; + path[0] = '\0'; + } + q = strrchr(p, '.'); + if (q == NULL) { + pstrcpy(prefix, buf_len, p); + postfix[0] = '\0'; + } else { + if (q - p >= buf_len) { + return VMDK_ERROR; + } + pstrcpy(prefix, q - p + 1, p); + pstrcpy(postfix, buf_len, q); + } + return VMDK_OK; +} + +/* + * idx == 0: get or create the descriptor file (also the image file if in a + * non-split format. + * idx >= 1: get the n-th extent if in a split subformat + */ +typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size, + int idx, + bool flat, + bool split, + bool compress, + bool zeroed_grain, + void *opaque, + Error **errp); + +static void vmdk_desc_add_extent(GString *desc, + const char *extent_line_fmt, + int64_t size, const char *filename) +{ + char *basename = g_path_get_basename(filename); + + g_string_append_printf(desc, extent_line_fmt, + DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename); + g_free(basename); +} + +static int coroutine_fn vmdk_co_do_create(int64_t size, + BlockdevVmdkSubformat subformat, + BlockdevVmdkAdapterType adapter_type, + const char *backing_file, + const char *hw_version, + bool compat6, + bool zeroed_grain, + vmdk_create_extent_fn extent_fn, + void *opaque, + Error **errp) +{ + int extent_idx; + BlockBackend *blk = NULL; + BlockBackend *extent_blk; + Error *local_err = NULL; + char *desc = NULL; + int ret = 0; + bool flat, split, compress; + GString *ext_desc_lines; + const int64_t split_size = 0x80000000; /* VMDK has constant split size */ + int64_t extent_size; + int64_t created_size = 0; + const char *extent_line_fmt; + char *parent_desc_line = g_malloc0(BUF_SIZE); + uint32_t parent_cid = 0xffffffff; + uint32_t number_heads = 16; + uint32_t desc_offset = 0, desc_len; + const char desc_template[] = + "# Disk DescriptorFile\n" + "version=1\n" + "CID=%" PRIx32 "\n" + "parentCID=%" PRIx32 "\n" + "createType=\"%s\"\n" + "%s" + "\n" + "# Extent description\n" + "%s" + "\n" + "# The Disk Data Base\n" + "#DDB\n" + "\n" + "ddb.virtualHWVersion = \"%s\"\n" + "ddb.geometry.cylinders = \"%" PRId64 "\"\n" + "ddb.geometry.heads = \"%" PRIu32 "\"\n" + "ddb.geometry.sectors = \"63\"\n" + "ddb.adapterType = \"%s\"\n"; + + ext_desc_lines = g_string_new(NULL); + + /* Read out options */ + if (compat6) { + if (hw_version) { + error_setg(errp, + "compat6 cannot be enabled with hwversion set"); + ret = -EINVAL; + goto exit; + } + hw_version = "6"; + } + if (!hw_version) { + hw_version = "4"; + } + + if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) { + /* that's the number of heads with which vmware operates when + creating, exporting, etc. vmdk files with a non-ide adapter type */ + number_heads = 255; + } + split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) || + (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE); + flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) || + (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT); + compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED; + + if (flat) { + extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n"; + } else { + extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n"; + } + if (flat && backing_file) { + error_setg(errp, "Flat image can't have backing file"); + ret = -ENOTSUP; + goto exit; + } + if (flat && zeroed_grain) { + error_setg(errp, "Flat image can't enable zeroed grain"); + ret = -ENOTSUP; + goto exit; + } + + /* Create extents */ + if (split) { + extent_size = split_size; + } else { + extent_size = size; + } + if (!split && !flat) { + created_size = extent_size; + } else { + created_size = 0; + } + /* Get the descriptor file BDS */ + blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain, + opaque, errp); + if (!blk) { + ret = -EIO; + goto exit; + } + if (!split && !flat) { + vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size, + blk_bs(blk)->filename); + } + + if (backing_file) { + BlockBackend *backing; + char *full_backing = + bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename, + backing_file, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -ENOENT; + goto exit; + } + assert(full_backing); + + backing = blk_new_open(full_backing, NULL, NULL, + BDRV_O_NO_BACKING, errp); + g_free(full_backing); + if (backing == NULL) { + ret = -EIO; + goto exit; + } + if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { + error_setg(errp, "Invalid backing file format: %s. Must be vmdk", + blk_bs(backing)->drv->format_name); + blk_unref(backing); + ret = -EINVAL; + goto exit; + } + ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); + blk_unref(backing); + if (ret) { + error_setg(errp, "Failed to read parent CID"); + goto exit; + } + snprintf(parent_desc_line, BUF_SIZE, + "parentFileNameHint=\"%s\"", backing_file); + } + extent_idx = 1; + while (created_size < size) { + int64_t cur_size = MIN(size - created_size, extent_size); + extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress, + zeroed_grain, opaque, errp); + if (!extent_blk) { + ret = -EINVAL; + goto exit; + } + vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size, + blk_bs(extent_blk)->filename); + created_size += cur_size; + extent_idx++; + blk_unref(extent_blk); + } + + /* Check whether we got excess extents */ + extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, + opaque, NULL); + if (extent_blk) { + blk_unref(extent_blk); + error_setg(errp, "List of extents contains unused extents"); + ret = -EINVAL; + goto exit; + } + + /* generate descriptor file */ + desc = g_strdup_printf(desc_template, + g_random_int(), + parent_cid, + BlockdevVmdkSubformat_str(subformat), + parent_desc_line, + ext_desc_lines->str, + hw_version, + size / + (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE), + number_heads, + BlockdevVmdkAdapterType_str(adapter_type)); + desc_len = strlen(desc); + /* the descriptor offset = 0x200 */ + if (!split && !flat) { + desc_offset = 0x200; + } + + ret = blk_pwrite(blk, desc_offset, desc, desc_len, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write description"); + goto exit; + } + /* bdrv_pwrite write padding zeros to align to sector, we don't need that + * for description file */ + if (desc_offset == 0) { + ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } + } + ret = 0; +exit: + if (blk) { + blk_unref(blk); + } + g_free(desc); + g_free(parent_desc_line); + g_string_free(ext_desc_lines, true); + return ret; +} + +typedef struct { + char *path; + char *prefix; + char *postfix; + QemuOpts *opts; +} VMDKCreateOptsData; + +static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx, + bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, + Error **errp) +{ + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + VMDKCreateOptsData *data = opaque; + char *ext_filename = NULL; + char *rel_filename = NULL; + + /* We're done, don't create excess extents. */ + if (size == -1) { + assert(errp == NULL); + return NULL; + } + + if (idx == 0) { + rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix); + } else if (split) { + rel_filename = g_strdup_printf("%s-%c%03d%s", + data->prefix, + flat ? 'f' : 's', idx, data->postfix); + } else { + assert(idx == 1); + rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix); + } + + ext_filename = g_strdup_printf("%s%s", data->path, rel_filename); + g_free(rel_filename); + + if (vmdk_create_extent(ext_filename, size, + flat, compress, zeroed_grain, &blk, data->opts, + errp)) { + goto exit; + } + bdrv_unref(bs); +exit: + g_free(ext_filename); + return blk; +} + +static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + Error *local_err = NULL; + char *desc = NULL; + int64_t total_size = 0; + char *adapter_type = NULL; + BlockdevVmdkAdapterType adapter_type_enum; + char *backing_file = NULL; + char *hw_version = NULL; + char *fmt = NULL; + BlockdevVmdkSubformat subformat; + int ret = 0; + char *path = g_malloc0(PATH_MAX); + char *prefix = g_malloc0(PATH_MAX); + char *postfix = g_malloc0(PATH_MAX); + char *desc_line = g_malloc0(BUF_SIZE); + char *ext_filename = g_malloc0(PATH_MAX); + char *desc_filename = g_malloc0(PATH_MAX); + char *parent_desc_line = g_malloc0(BUF_SIZE); + bool zeroed_grain; + bool compat6; + VMDKCreateOptsData data; + char *backing_fmt = NULL; + + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) { + error_setg(errp, "backing_file must be a vmdk image"); + ret = -EINVAL; + goto exit; + } + + if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) { + ret = -EINVAL; + goto exit; + } + /* Read out options */ + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE); + backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); + hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION); + compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false); + if (strcmp(hw_version, "undefined") == 0) { + g_free(hw_version); + hw_version = NULL; + } + fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); + zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false); + + if (adapter_type) { + adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup, + adapter_type, + BLOCKDEV_VMDK_ADAPTER_TYPE_IDE, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto exit; + } + } else { + adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE; + } + + if (!fmt) { + /* Default format to monolithicSparse */ + subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE; + } else { + subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup, + fmt, + BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto exit; + } + } + data = (VMDKCreateOptsData){ + .prefix = prefix, + .postfix = postfix, + .path = path, + .opts = opts, + }; + ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum, + backing_file, hw_version, compat6, zeroed_grain, + vmdk_co_create_opts_cb, &data, errp); + +exit: + g_free(backing_fmt); + g_free(adapter_type); + g_free(backing_file); + g_free(hw_version); + g_free(fmt); + g_free(desc); + g_free(path); + g_free(prefix); + g_free(postfix); + g_free(desc_line); + g_free(ext_filename); + g_free(desc_filename); + g_free(parent_desc_line); + return ret; +} + +static BlockBackend *vmdk_co_create_cb(int64_t size, int idx, + bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, + Error **errp) +{ + int ret; + BlockDriverState *bs; + BlockBackend *blk; + BlockdevCreateOptionsVmdk *opts = opaque; + + if (idx == 0) { + bs = bdrv_open_blockdev_ref(opts->file, errp); + } else { + int i; + BlockdevRefList *list = opts->extents; + for (i = 1; i < idx; i++) { + if (!list || !list->next) { + error_setg(errp, "Extent [%d] not specified", i); + return NULL; + } + list = list->next; + } + if (!list) { + error_setg(errp, "Extent [%d] not specified", idx - 1); + return NULL; + } + bs = bdrv_open_blockdev_ref(list->value, errp); + } + if (!bs) { + return NULL; + } + blk = blk_new_with_bs(bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL, errp); + if (!blk) { + return NULL; + } + blk_set_allow_write_beyond_eof(blk, true); + bdrv_unref(bs); + + if (size != -1) { + ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); + if (ret) { + blk_unref(blk); + blk = NULL; + } + } + return blk; +} + +static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options, + Error **errp) +{ + int ret; + BlockdevCreateOptionsVmdk *opts; + + opts = &create_options->u.vmdk; + + /* Validate options */ + if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Image size must be a multiple of 512 bytes"); + ret = -EINVAL; + goto out; + } + + ret = vmdk_co_do_create(opts->size, + opts->subformat, + opts->adapter_type, + opts->backing_file, + opts->hwversion, + false, + opts->zeroed_grain, + vmdk_co_create_cb, + opts, errp); + return ret; + +out: + return ret; +} + +static void vmdk_close(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + + vmdk_free_extents(bs); + g_free(s->create_type); + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + +static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) +{ + int i; + int64_t ret = 0; + int64_t r; + BDRVVmdkState *s = bs->opaque; + + ret = bdrv_get_allocated_file_size(bs->file->bs); + if (ret < 0) { + return ret; + } + for (i = 0; i < s->num_extents; i++) { + if (s->extents[i].file == bs->file) { + continue; + } + r = bdrv_get_allocated_file_size(s->extents[i].file->bs); + if (r < 0) { + return r; + } + ret += r; + } + return ret; +} + +static int vmdk_has_zero_init(BlockDriverState *bs) +{ + int i; + BDRVVmdkState *s = bs->opaque; + + /* If has a flat extent and its underlying storage doesn't have zero init, + * return 0. */ + for (i = 0; i < s->num_extents; i++) { + if (s->extents[i].flat) { + if (!bdrv_has_zero_init(s->extents[i].file->bs)) { + return 0; + } + } + } + return 1; +} + +static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) +{ + ImageInfo *info = g_new0(ImageInfo, 1); + + bdrv_refresh_filename(extent->file->bs); + *info = (ImageInfo){ + .filename = g_strdup(extent->file->bs->filename), + .format = g_strdup(extent->type), + .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, + .compressed = extent->compressed, + .has_compressed = extent->compressed, + .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE, + .has_cluster_size = !extent->flat, + }; + + return info; +} + +static int coroutine_fn vmdk_co_check(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVVmdkState *s = bs->opaque; + VmdkExtent *extent = NULL; + int64_t sector_num = 0; + int64_t total_sectors = bdrv_nb_sectors(bs); + int ret; + uint64_t cluster_offset; + + if (fix) { + return -ENOTSUP; + } + + for (;;) { + if (sector_num >= total_sectors) { + return 0; + } + extent = find_extent(s, sector_num, extent); + if (!extent) { + fprintf(stderr, + "ERROR: could not find extent for sector %" PRId64 "\n", + sector_num); + ret = -EINVAL; + break; + } + ret = get_cluster_offset(bs, extent, NULL, + sector_num << BDRV_SECTOR_BITS, + false, &cluster_offset, 0, 0); + if (ret == VMDK_ERROR) { + fprintf(stderr, + "ERROR: could not get cluster_offset for sector %" + PRId64 "\n", sector_num); + break; + } + if (ret == VMDK_OK) { + int64_t extent_len = bdrv_getlength(extent->file->bs); + if (extent_len < 0) { + fprintf(stderr, + "ERROR: could not get extent file length for sector %" + PRId64 "\n", sector_num); + ret = extent_len; + break; + } + if (cluster_offset >= extent_len) { + fprintf(stderr, + "ERROR: cluster offset for sector %" + PRId64 " points after EOF\n", sector_num); + ret = -EINVAL; + break; + } + } + sector_num += extent->cluster_sectors; + } + + result->corruptions++; + return ret; +} + +static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, + Error **errp) +{ + int i; + BDRVVmdkState *s = bs->opaque; + ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); + ImageInfoList **next; + + *spec_info = (ImageInfoSpecific){ + .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, + .u = { + .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1), + }, + }; + + *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) { + .create_type = g_strdup(s->create_type), + .cid = s->cid, + .parent_cid = s->parent_cid, + }; + + next = &spec_info->u.vmdk.data->extents; + for (i = 0; i < s->num_extents; i++) { + *next = g_new0(ImageInfoList, 1); + (*next)->value = vmdk_get_extent_info(&s->extents[i]); + (*next)->next = NULL; + next = &(*next)->next; + } + + return spec_info; +} + +static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b) +{ + return a->flat == b->flat && + a->compressed == b->compressed && + (a->flat || a->cluster_sectors == b->cluster_sectors); +} + +static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + int i; + BDRVVmdkState *s = bs->opaque; + assert(s->num_extents); + + /* See if we have multiple extents but they have different cases */ + for (i = 1; i < s->num_extents; i++) { + if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) { + return -ENOTSUP; + } + } + bdi->needs_compressed_writes = s->extents[0].compressed; + if (!s->extents[0].flat) { + bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS; + } + return 0; +} + +static void vmdk_gather_child_options(BlockDriverState *bs, QDict *target, + bool backing_overridden) +{ + /* No children but file and backing can be explicitly specified (TODO) */ + qdict_put(target, "file", + qobject_ref(bs->file->bs->full_open_options)); + + if (backing_overridden) { + if (bs->backing) { + qdict_put(target, "backing", + qobject_ref(bs->backing->bs->full_open_options)); + } else { + qdict_put_null(target, "backing"); + } + } +} + +static QemuOptsList vmdk_create_opts = { + .name = "vmdk-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_ADAPTER_TYPE, + .type = QEMU_OPT_STRING, + .help = "Virtual adapter type, can be one of " + "ide (default), lsilogic, buslogic or legacyESX" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = QEMU_OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Must be 'vmdk' if present", + }, + { + .name = BLOCK_OPT_COMPAT6, + .type = QEMU_OPT_BOOL, + .help = "VMDK version 6 image", + .def_value_str = "off" + }, + { + .name = BLOCK_OPT_HWVERSION, + .type = QEMU_OPT_STRING, + .help = "VMDK hardware version", + .def_value_str = "undefined" + }, + { + .name = BLOCK_OPT_SUBFMT, + .type = QEMU_OPT_STRING, + .help = + "VMDK flat extent format, can be one of " + "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " + }, + { + .name = BLOCK_OPT_ZEROED_GRAIN, + .type = QEMU_OPT_BOOL, + .help = "Enable efficient zero writes " + "using the zeroed-grain GTE feature" + }, + { /* end of list */ } + } +}; + +static BlockDriver bdrv_vmdk = { + .format_name = "vmdk", + .instance_size = sizeof(BDRVVmdkState), + .bdrv_probe = vmdk_probe, + .bdrv_open = vmdk_open, + .bdrv_co_check = vmdk_co_check, + .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_preadv = vmdk_co_preadv, + .bdrv_co_pwritev = vmdk_co_pwritev, + .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, + .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes, + .bdrv_close = vmdk_close, + .bdrv_co_create_opts = vmdk_co_create_opts, + .bdrv_co_create = vmdk_co_create, + .bdrv_co_block_status = vmdk_co_block_status, + .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, + .bdrv_has_zero_init = vmdk_has_zero_init, + .bdrv_get_specific_info = vmdk_get_specific_info, + .bdrv_refresh_limits = vmdk_refresh_limits, + .bdrv_get_info = vmdk_get_info, + .bdrv_gather_child_options = vmdk_gather_child_options, + + .is_format = true, + .supports_backing = true, + .create_opts = &vmdk_create_opts, +}; + +static void bdrv_vmdk_init(void) +{ + bdrv_register(&bdrv_vmdk); +} + +block_init(bdrv_vmdk_init); diff --git a/block/vpc.c b/block/vpc.c new file mode 100644 index 000000000..1ab55f928 --- /dev/null +++ b/block/vpc.c @@ -0,0 +1,1256 @@ +/* + * Block driver for Connectix / Microsoft Virtual PC images + * + * Copyright (c) 2005 Alex Beregszaszi + * Copyright (c) 2009 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "sysemu/block-backend.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "migration/blocker.h" +#include "qemu/bswap.h" +#include "qemu/uuid.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-block-core.h" + +/**************************************************************/ + +#define HEADER_SIZE 512 + +//#define CACHE + +enum vhd_type { + VHD_FIXED = 2, + VHD_DYNAMIC = 3, + VHD_DIFFERENCING = 4, +}; + +/* Seconds since Jan 1, 2000 0:00:00 (UTC) */ +#define VHD_TIMESTAMP_BASE 946684800 + +#define VHD_CHS_MAX_C 65535LL +#define VHD_CHS_MAX_H 16 +#define VHD_CHS_MAX_S 255 + +#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */ +#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) + +#define VPC_OPT_FORCE_SIZE "force_size" + +/* always big-endian */ +typedef struct vhd_footer { + char creator[8]; /* "conectix" */ + uint32_t features; + uint32_t version; + + /* Offset of next header structure, 0xFFFFFFFF if none */ + uint64_t data_offset; + + /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ + uint32_t timestamp; + + char creator_app[4]; /* e.g., "vpc " */ + uint16_t major; + uint16_t minor; + char creator_os[4]; /* "Wi2k" */ + + uint64_t orig_size; + uint64_t current_size; + + uint16_t cyls; + uint8_t heads; + uint8_t secs_per_cyl; + + uint32_t type; + + /* Checksum of the Hard Disk Footer ("one's complement of the sum of all + the bytes in the footer without the checksum field") */ + uint32_t checksum; + + /* UUID used to identify a parent hard disk (backing file) */ + QemuUUID uuid; + + uint8_t in_saved_state; +} QEMU_PACKED VHDFooter; + +typedef struct vhd_dyndisk_header { + char magic[8]; /* "cxsparse" */ + + /* Offset of next header structure, 0xFFFFFFFF if none */ + uint64_t data_offset; + + /* Offset of the Block Allocation Table (BAT) */ + uint64_t table_offset; + + uint32_t version; + uint32_t max_table_entries; /* 32bit/entry */ + + /* 2 MB by default, must be a power of two */ + uint32_t block_size; + + uint32_t checksum; + uint8_t parent_uuid[16]; + uint32_t parent_timestamp; + uint32_t reserved; + + /* Backing file name (in UTF-16) */ + uint8_t parent_name[512]; + + struct { + uint32_t platform; + uint32_t data_space; + uint32_t data_length; + uint32_t reserved; + uint64_t data_offset; + } parent_locator[8]; +} QEMU_PACKED VHDDynDiskHeader; + +typedef struct BDRVVPCState { + CoMutex lock; + uint8_t footer_buf[HEADER_SIZE]; + uint64_t free_data_block_offset; + int max_table_entries; + uint32_t *pagetable; + uint64_t bat_offset; + uint64_t last_bitmap_offset; + + uint32_t block_size; + uint32_t bitmap_size; + bool force_use_chs; + bool force_use_sz; + +#ifdef CACHE + uint8_t *pageentry_u8; + uint32_t *pageentry_u32; + uint16_t *pageentry_u16; + + uint64_t last_bitmap; +#endif + + Error *migration_blocker; +} BDRVVPCState; + +#define VPC_OPT_SIZE_CALC "force_size_calc" +static QemuOptsList vpc_runtime_opts = { + .name = "vpc-runtime-opts", + .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), + .desc = { + { + .name = VPC_OPT_SIZE_CALC, + .type = QEMU_OPT_STRING, + .help = "Force disk size calculation to use either CHS geometry, " + "or use the disk current_size specified in the VHD footer. " + "{chs, current_size}" + }, + { /* end of list */ } + } +}; + +static QemuOptsList vpc_create_opts; + +static uint32_t vpc_checksum(uint8_t *buf, size_t size) +{ + uint32_t res = 0; + int i; + + for (i = 0; i < size; i++) + res += buf[i]; + + return ~res; +} + + +static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) + return 100; + return 0; +} + +static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, + Error **errp) +{ + BDRVVPCState *s = bs->opaque; + const char *size_calc; + + size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); + + if (!size_calc) { + /* no override, use autodetect only */ + } else if (!strcmp(size_calc, "current_size")) { + s->force_use_sz = true; + } else if (!strcmp(size_calc, "chs")) { + s->force_use_chs = true; + } else { + error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); + } +} + +static int vpc_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVVPCState *s = bs->opaque; + int i; + VHDFooter *footer; + VHDDynDiskHeader *dyndisk_header; + QemuOpts *opts = NULL; + Error *local_err = NULL; + bool use_chs; + uint8_t buf[HEADER_SIZE]; + uint32_t checksum; + uint64_t computed_size; + uint64_t pagetable_size; + int disk_type = VHD_DYNAMIC; + int ret; + int64_t bs_size; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); + if (!bs->file) { + return -EINVAL; + } + + opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + vpc_parse_options(bs, opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE); + if (ret < 0) { + error_setg(errp, "Unable to read VHD header"); + goto fail; + } + + footer = (VHDFooter *) s->footer_buf; + if (strncmp(footer->creator, "conectix", 8)) { + int64_t offset = bdrv_getlength(bs->file->bs); + if (offset < 0) { + ret = offset; + error_setg(errp, "Invalid file size"); + goto fail; + } else if (offset < HEADER_SIZE) { + ret = -EINVAL; + error_setg(errp, "File too small for a VHD header"); + goto fail; + } + + /* If a fixed disk, the footer is found only at the end of the file */ + ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf, + HEADER_SIZE); + if (ret < 0) { + goto fail; + } + if (strncmp(footer->creator, "conectix", 8)) { + error_setg(errp, "invalid VPC image"); + ret = -EINVAL; + goto fail; + } + disk_type = VHD_FIXED; + } + + checksum = be32_to_cpu(footer->checksum); + footer->checksum = 0; + if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) { + error_setg(errp, "Incorrect header checksum"); + ret = -EINVAL; + goto fail; + } + + /* Write 'checksum' back to footer, or else will leave it with zero. */ + footer->checksum = cpu_to_be32(checksum); + + /* The visible size of a image in Virtual PC depends on the geometry + rather than on the size stored in the footer (the size in the footer + is too large usually) */ + bs->total_sectors = (int64_t) + be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; + + /* Microsoft Virtual PC and Microsoft Hyper-V produce and read + * VHD image sizes differently. VPC will rely on CHS geometry, + * while Hyper-V and disk2vhd use the size specified in the footer. + * + * We use a couple of approaches to try and determine the correct method: + * look at the Creator App field, and look for images that have CHS + * geometry that is the maximum value. + * + * If the CHS geometry is the maximum CHS geometry, then we assume that + * the size is the footer->current_size to avoid truncation. Otherwise, + * we follow the table based on footer->creator_app: + * + * Known creator apps: + * 'vpc ' : CHS Virtual PC (uses disk geometry) + * 'qemu' : CHS QEMU (uses disk geometry) + * 'qem2' : current_size QEMU (uses current_size) + * 'win ' : current_size Hyper-V + * 'd2v ' : current_size Disk2vhd + * 'tap\0' : current_size XenServer + * 'CTXS' : current_size XenConverter + * + * The user can override the table values via drive options, however + * even with an override we will still use current_size for images + * that have CHS geometry of the maximum size. + */ + use_chs = (!!strncmp(footer->creator_app, "win ", 4) && + !!strncmp(footer->creator_app, "qem2", 4) && + !!strncmp(footer->creator_app, "d2v ", 4) && + !!strncmp(footer->creator_app, "CTXS", 4) && + !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs; + + if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { + bs->total_sectors = be64_to_cpu(footer->current_size) / + BDRV_SECTOR_SIZE; + } + + /* Allow a maximum disk size of 2040 GiB */ + if (bs->total_sectors > VHD_MAX_SECTORS) { + ret = -EFBIG; + goto fail; + } + + if (disk_type == VHD_DYNAMIC) { + ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, + HEADER_SIZE); + if (ret < 0) { + error_setg(errp, "Error reading dynamic VHD header"); + goto fail; + } + + dyndisk_header = (VHDDynDiskHeader *) buf; + + if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { + error_setg(errp, "Invalid header magic"); + ret = -EINVAL; + goto fail; + } + + s->block_size = be32_to_cpu(dyndisk_header->block_size); + if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) { + error_setg(errp, "Invalid block size %" PRIu32, s->block_size); + ret = -EINVAL; + goto fail; + } + s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; + + s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); + + if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) { + error_setg(errp, "Too many blocks"); + ret = -EINVAL; + goto fail; + } + + computed_size = (uint64_t) s->max_table_entries * s->block_size; + if (computed_size < bs->total_sectors * 512) { + error_setg(errp, "Page table too small"); + ret = -EINVAL; + goto fail; + } + + if (s->max_table_entries > SIZE_MAX / 4 || + s->max_table_entries > (int) INT_MAX / 4) { + error_setg(errp, "Max Table Entries too large (%" PRId32 ")", + s->max_table_entries); + ret = -EINVAL; + goto fail; + } + + pagetable_size = (uint64_t) s->max_table_entries * 4; + + s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); + if (s->pagetable == NULL) { + error_setg(errp, "Unable to allocate memory for page table"); + ret = -ENOMEM; + goto fail; + } + + s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); + + ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, + pagetable_size); + if (ret < 0) { + error_setg(errp, "Error reading pagetable"); + goto fail; + } + + s->free_data_block_offset = + ROUND_UP(s->bat_offset + pagetable_size, 512); + + for (i = 0; i < s->max_table_entries; i++) { + be32_to_cpus(&s->pagetable[i]); + if (s->pagetable[i] != 0xFFFFFFFF) { + int64_t next = (512 * (int64_t) s->pagetable[i]) + + s->bitmap_size + s->block_size; + + if (next > s->free_data_block_offset) { + s->free_data_block_offset = next; + } + } + } + + bs_size = bdrv_getlength(bs->file->bs); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "Unable to learn image size"); + ret = bs_size; + goto fail; + } + if (s->free_data_block_offset > bs_size) { + error_setg(errp, "block-vpc: free_data_block_offset points after " + "the end of file. The image has been truncated."); + ret = -EINVAL; + goto fail; + } + + s->last_bitmap_offset = (int64_t) -1; + +#ifdef CACHE + s->pageentry_u8 = g_malloc(512); + s->pageentry_u32 = s->pageentry_u8; + s->pageentry_u16 = s->pageentry_u8; + s->last_pagetable = -1; +#endif + } + + /* Disable migration when VHD images are used */ + error_setg(&s->migration_blocker, "The vpc format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + + qemu_co_mutex_init(&s->lock); + qemu_opts_del(opts); + + return 0; + +fail: + qemu_opts_del(opts); + qemu_vfree(s->pagetable); +#ifdef CACHE + g_free(s->pageentry_u8); +#endif + return ret; +} + +static int vpc_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + +/* + * Returns the absolute byte offset of the given sector in the image file. + * If the sector is not allocated, -1 is returned instead. + * If an error occurred trying to write an updated block bitmap back to + * the file, -2 is returned, and the error value is written to *err. + * This can only happen for a write operation. + * + * The parameter write must be 1 if the offset will be used for a write + * operation (the block bitmaps is updated then), 0 otherwise. + * If write is true then err must not be NULL. + */ +static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset, + bool write, int *err) +{ + BDRVVPCState *s = bs->opaque; + uint64_t bitmap_offset, block_offset; + uint32_t pagetable_index, offset_in_block; + + assert(!(write && err == NULL)); + + pagetable_index = offset / s->block_size; + offset_in_block = offset % s->block_size; + + if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) + return -1; /* not allocated */ + + bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; + block_offset = bitmap_offset + s->bitmap_size + offset_in_block; + + /* We must ensure that we don't write to any sectors which are marked as + unused in the bitmap. We get away with setting all bits in the block + bitmap each time we write to a new block. This might cause Virtual PC to + miss sparse read optimization, but it's not a problem in terms of + correctness. */ + if (write && (s->last_bitmap_offset != bitmap_offset)) { + uint8_t bitmap[s->bitmap_size]; + int r; + + s->last_bitmap_offset = bitmap_offset; + memset(bitmap, 0xff, s->bitmap_size); + r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size); + if (r < 0) { + *err = r; + return -2; + } + } + + return block_offset; +} + +/* + * Writes the footer to the end of the image file. This is needed when the + * file grows as it overwrites the old footer + * + * Returns 0 on success and < 0 on error + */ +static int rewrite_footer(BlockDriverState *bs) +{ + int ret; + BDRVVPCState *s = bs->opaque; + int64_t offset = s->free_data_block_offset; + + ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE); + if (ret < 0) + return ret; + + return 0; +} + +/* + * Allocates a new block. This involves writing a new footer and updating + * the Block Allocation Table to use the space at the old end of the image + * file (overwriting the old footer) + * + * Returns the sectors' offset in the image file on success and < 0 on error + */ +static int64_t alloc_block(BlockDriverState *bs, int64_t offset) +{ + BDRVVPCState *s = bs->opaque; + int64_t bat_offset; + uint32_t index, bat_value; + int ret; + uint8_t bitmap[s->bitmap_size]; + + /* Check if sector_num is valid */ + if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) { + return -EINVAL; + } + + /* Write entry into in-memory BAT */ + index = offset / s->block_size; + assert(s->pagetable[index] == 0xFFFFFFFF); + s->pagetable[index] = s->free_data_block_offset / 512; + + /* Initialize the block's bitmap */ + memset(bitmap, 0xff, s->bitmap_size); + ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap, + s->bitmap_size); + if (ret < 0) { + return ret; + } + + /* Write new footer (the old one will be overwritten) */ + s->free_data_block_offset += s->block_size + s->bitmap_size; + ret = rewrite_footer(bs); + if (ret < 0) + goto fail; + + /* Write BAT entry to disk */ + bat_offset = s->bat_offset + (4 * index); + bat_value = cpu_to_be32(s->pagetable[index]); + ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4); + if (ret < 0) + goto fail; + + return get_image_offset(bs, offset, false, NULL); + +fail: + s->free_data_block_offset -= (s->block_size + s->bitmap_size); + return ret; +} + +static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVVPCState *s = (BDRVVPCState *)bs->opaque; + VHDFooter *footer = (VHDFooter *) s->footer_buf; + + if (be32_to_cpu(footer->type) != VHD_FIXED) { + bdi->cluster_size = s->block_size; + } + + return 0; +} + +static int coroutine_fn +vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVVPCState *s = bs->opaque; + int ret; + int64_t image_offset; + int64_t n_bytes; + int64_t bytes_done = 0; + VHDFooter *footer = (VHDFooter *) s->footer_buf; + QEMUIOVector local_qiov; + + if (be32_to_cpu(footer->type) == VHD_FIXED) { + return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0); + } + + qemu_co_mutex_lock(&s->lock); + qemu_iovec_init(&local_qiov, qiov->niov); + + while (bytes > 0) { + image_offset = get_image_offset(bs, offset, false, NULL); + n_bytes = MIN(bytes, s->block_size - (offset % s->block_size)); + + if (image_offset == -1) { + qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); + } else { + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_preadv(bs->file, image_offset, n_bytes, + &local_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto fail; + } + } + + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + } + + ret = 0; +fail: + qemu_iovec_destroy(&local_qiov); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static int coroutine_fn +vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + BDRVVPCState *s = bs->opaque; + int64_t image_offset; + int64_t n_bytes; + int64_t bytes_done = 0; + int ret = 0; + VHDFooter *footer = (VHDFooter *) s->footer_buf; + QEMUIOVector local_qiov; + + if (be32_to_cpu(footer->type) == VHD_FIXED) { + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0); + } + + qemu_co_mutex_lock(&s->lock); + qemu_iovec_init(&local_qiov, qiov->niov); + + while (bytes > 0) { + image_offset = get_image_offset(bs, offset, true, &ret); + if (image_offset == -2) { + /* Failed to write block bitmap: can't proceed with write */ + goto fail; + } + n_bytes = MIN(bytes, s->block_size - (offset % s->block_size)); + + if (image_offset == -1) { + image_offset = alloc_block(bs, offset); + if (image_offset < 0) { + ret = image_offset; + goto fail; + } + } + + qemu_iovec_reset(&local_qiov); + qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes, + &local_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto fail; + } + + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; + } + + ret = 0; +fail: + qemu_iovec_destroy(&local_qiov); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static int coroutine_fn vpc_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVVPCState *s = bs->opaque; + VHDFooter *footer = (VHDFooter*) s->footer_buf; + int64_t image_offset; + bool allocated; + int ret; + int64_t n; + + if (be32_to_cpu(footer->type) == VHD_FIXED) { + *pnum = bytes; + *map = offset; + *file = bs->file->bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_RECURSE; + } + + qemu_co_mutex_lock(&s->lock); + + image_offset = get_image_offset(bs, offset, false, NULL); + allocated = (image_offset != -1); + *pnum = 0; + ret = BDRV_BLOCK_ZERO; + + do { + /* All sectors in a block are contiguous (without using the bitmap) */ + n = ROUND_UP(offset + 1, s->block_size) - offset; + n = MIN(n, bytes); + + *pnum += n; + offset += n; + bytes -= n; + /* *pnum can't be greater than one block for allocated + * sectors since there is always a bitmap in between. */ + if (allocated) { + *file = bs->file->bs; + *map = image_offset; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + break; + } + if (bytes == 0) { + break; + } + image_offset = get_image_offset(bs, offset, false, NULL); + } while (image_offset == -1); + + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +/* + * Calculates the number of cylinders, heads and sectors per cylinder + * based on a given number of sectors. This is the algorithm described + * in the VHD specification. + * + * Note that the geometry doesn't always exactly match total_sectors but + * may round it down. + * + * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override + * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) + * and instead allow up to 255 heads. + */ +static int calculate_geometry(int64_t total_sectors, uint16_t *cyls, + uint8_t *heads, uint8_t *secs_per_cyl) +{ + uint32_t cyls_times_heads; + + total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY); + + if (total_sectors >= 65535LL * 16 * 63) { + *secs_per_cyl = 255; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } else { + *secs_per_cyl = 17; + cyls_times_heads = total_sectors / *secs_per_cyl; + *heads = DIV_ROUND_UP(cyls_times_heads, 1024); + + if (*heads < 4) { + *heads = 4; + } + + if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { + *secs_per_cyl = 31; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } + + if (cyls_times_heads >= (*heads * 1024)) { + *secs_per_cyl = 63; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } + } + + *cyls = cyls_times_heads / *heads; + + return 0; +} + +static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, + int64_t total_sectors) +{ + VHDDynDiskHeader *dyndisk_header = + (VHDDynDiskHeader *) buf; + size_t block_size, num_bat_entries; + int i; + int ret; + int64_t offset = 0; + + /* Write the footer (twice: at the beginning and at the end) */ + block_size = 0x200000; + num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512); + + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0); + if (ret < 0) { + goto fail; + } + + offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0); + if (ret < 0) { + goto fail; + } + + /* Write the initial BAT */ + offset = 3 * 512; + + memset(buf, 0xFF, 512); + for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) { + ret = blk_pwrite(blk, offset, buf, 512, 0); + if (ret < 0) { + goto fail; + } + offset += 512; + } + + /* Prepare the Dynamic Disk Header */ + memset(buf, 0, 1024); + + memcpy(dyndisk_header->magic, "cxsparse", 8); + + /* + * Note: The spec is actually wrong here for data_offset, it says + * 0xFFFFFFFF, but MS tools expect all 64 bits to be set. + */ + dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); + dyndisk_header->table_offset = cpu_to_be64(3 * 512); + dyndisk_header->version = cpu_to_be32(0x00010000); + dyndisk_header->block_size = cpu_to_be32(block_size); + dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries); + + dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024)); + + /* Write the header */ + offset = 512; + + ret = blk_pwrite(blk, offset, buf, 1024, 0); + if (ret < 0) { + goto fail; + } + + ret = 0; + fail: + return ret; +} + +static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, + int64_t total_size, Error **errp) +{ + int ret; + + /* Add footer to total size */ + total_size += HEADER_SIZE; + + ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + return ret; + } + + ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Unable to write VHD header"); + return ret; + } + + return 0; +} + +static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, + uint16_t *out_cyls, + uint8_t *out_heads, + uint8_t *out_secs_per_cyl, + int64_t *out_total_sectors, + Error **errp) +{ + int64_t total_size = vpc_opts->size; + uint16_t cyls = 0; + uint8_t heads = 0; + uint8_t secs_per_cyl = 0; + int64_t total_sectors; + int i; + + /* + * Calculate matching total_size and geometry. Increase the number of + * sectors requested until we get enough (or fail). This ensures that + * qemu-img convert doesn't truncate images, but rather rounds up. + * + * If the image size can't be represented by a spec conformant CHS geometry, + * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use + * the image size from the VHD footer to calculate total_sectors. + */ + if (vpc_opts->force_size) { + /* This will force the use of total_size for sector count, below */ + cyls = VHD_CHS_MAX_C; + heads = VHD_CHS_MAX_H; + secs_per_cyl = VHD_CHS_MAX_S; + } else { + total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); + for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { + calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + } + } + + if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { + total_sectors = total_size / BDRV_SECTOR_SIZE; + /* Allow a maximum disk size of 2040 GiB */ + if (total_sectors > VHD_MAX_SECTORS) { + error_setg(errp, "Disk size is too large, max size is 2040 GiB"); + return -EFBIG; + } + } else { + total_sectors = (int64_t) cyls * heads * secs_per_cyl; + } + + *out_total_sectors = total_sectors; + if (out_cyls) { + *out_cyls = cyls; + *out_heads = heads; + *out_secs_per_cyl = secs_per_cyl; + } + + return 0; +} + +static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, + Error **errp) +{ + BlockdevCreateOptionsVpc *vpc_opts; + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + + uint8_t buf[1024]; + VHDFooter *footer = (VHDFooter *) buf; + uint16_t cyls = 0; + uint8_t heads = 0; + uint8_t secs_per_cyl = 0; + int64_t total_sectors; + int64_t total_size; + int disk_type; + int ret = -EIO; + QemuUUID uuid; + + assert(opts->driver == BLOCKDEV_DRIVER_VPC); + vpc_opts = &opts->u.vpc; + + /* Validate options and set default values */ + total_size = vpc_opts->size; + + if (!vpc_opts->has_subformat) { + vpc_opts->subformat = BLOCKDEV_VPC_SUBFORMAT_DYNAMIC; + } + switch (vpc_opts->subformat) { + case BLOCKDEV_VPC_SUBFORMAT_DYNAMIC: + disk_type = VHD_DYNAMIC; + break; + case BLOCKDEV_VPC_SUBFORMAT_FIXED: + disk_type = VHD_FIXED; + break; + default: + g_assert_not_reached(); + } + + /* Create BlockBackend to write to the image */ + bs = bdrv_open_blockdev_ref(vpc_opts->file, errp); + if (bs == NULL) { + return -EIO; + } + + blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, + errp); + if (!blk) { + ret = -EPERM; + goto out; + } + blk_set_allow_write_beyond_eof(blk, true); + + /* Get geometry and check that it matches the image size*/ + ret = calculate_rounded_image_size(vpc_opts, &cyls, &heads, &secs_per_cyl, + &total_sectors, errp); + if (ret < 0) { + goto out; + } + + if (total_size != total_sectors * BDRV_SECTOR_SIZE) { + error_setg(errp, "The requested image size cannot be represented in " + "CHS geometry"); + error_append_hint(errp, "Try size=%llu or force-size=on (the " + "latter makes the image incompatible with " + "Virtual PC)", + total_sectors * BDRV_SECTOR_SIZE); + ret = -EINVAL; + goto out; + } + + /* Prepare the Hard Disk Footer */ + memset(buf, 0, 1024); + + memcpy(footer->creator, "conectix", 8); + if (vpc_opts->force_size) { + memcpy(footer->creator_app, "qem2", 4); + } else { + memcpy(footer->creator_app, "qemu", 4); + } + memcpy(footer->creator_os, "Wi2k", 4); + + footer->features = cpu_to_be32(0x02); + footer->version = cpu_to_be32(0x00010000); + if (disk_type == VHD_DYNAMIC) { + footer->data_offset = cpu_to_be64(HEADER_SIZE); + } else { + footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); + } + footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE); + + /* Version of Virtual PC 2007 */ + footer->major = cpu_to_be16(0x0005); + footer->minor = cpu_to_be16(0x0003); + footer->orig_size = cpu_to_be64(total_size); + footer->current_size = cpu_to_be64(total_size); + footer->cyls = cpu_to_be16(cyls); + footer->heads = heads; + footer->secs_per_cyl = secs_per_cyl; + + footer->type = cpu_to_be32(disk_type); + + qemu_uuid_generate(&uuid); + footer->uuid = uuid; + + footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); + + if (disk_type == VHD_DYNAMIC) { + ret = create_dynamic_disk(blk, buf, total_sectors); + if (ret < 0) { + error_setg(errp, "Unable to create or write VHD header"); + } + } else { + ret = create_fixed_disk(blk, buf, total_size, errp); + } + +out: + blk_unref(blk); + bdrv_unref(bs); + return ret; +} + +static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; + Visitor *v; + BlockDriverState *bs = NULL; + int ret; + + static const QDictRenames opt_renames[] = { + { VPC_OPT_FORCE_SIZE, "force-size" }, + { NULL, NULL }, + }; + + /* Parse options and convert legacy syntax */ + qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vpc_create_opts, true); + + if (!qdict_rename_keys(qdict, opt_renames, errp)) { + ret = -EINVAL; + goto fail; + } + + /* Create and open the file (protocol layer) */ + ret = bdrv_create_file(filename, opts, errp); + if (ret < 0) { + goto fail; + } + + bs = bdrv_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); + if (bs == NULL) { + ret = -EIO; + goto fail; + } + + /* Now get the QAPI type BlockdevCreateOptions */ + qdict_put_str(qdict, "driver", "vpc"); + qdict_put_str(qdict, "file", bs->node_name); + + v = qobject_input_visitor_new_flat_confused(qdict, errp); + if (!v) { + ret = -EINVAL; + goto fail; + } + + visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); + visit_free(v); + if (!create_options) { + ret = -EINVAL; + goto fail; + } + + /* Silently round up size */ + assert(create_options->driver == BLOCKDEV_DRIVER_VPC); + create_options->u.vpc.size = + ROUND_UP(create_options->u.vpc.size, BDRV_SECTOR_SIZE); + + if (!create_options->u.vpc.force_size) { + int64_t total_sectors; + ret = calculate_rounded_image_size(&create_options->u.vpc, NULL, NULL, + NULL, &total_sectors, errp); + if (ret < 0) { + goto fail; + } + + create_options->u.vpc.size = total_sectors * BDRV_SECTOR_SIZE; + } + + + /* Create the vpc image (format layer) */ + ret = vpc_co_create(create_options, errp); + +fail: + qobject_unref(qdict); + bdrv_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; +} + + +static int vpc_has_zero_init(BlockDriverState *bs) +{ + BDRVVPCState *s = bs->opaque; + VHDFooter *footer = (VHDFooter *) s->footer_buf; + + if (be32_to_cpu(footer->type) == VHD_FIXED) { + return bdrv_has_zero_init(bs->file->bs); + } else { + return 1; + } +} + +static void vpc_close(BlockDriverState *bs) +{ + BDRVVPCState *s = bs->opaque; + qemu_vfree(s->pagetable); +#ifdef CACHE + g_free(s->pageentry_u8); +#endif + + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + +static QemuOptsList vpc_create_opts = { + .name = "vpc-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_SUBFMT, + .type = QEMU_OPT_STRING, + .help = + "Type of virtual hard disk format. Supported formats are " + "{dynamic (default) | fixed} " + }, + { + .name = VPC_OPT_FORCE_SIZE, + .type = QEMU_OPT_BOOL, + .help = "Force disk size calculation to use the actual size " + "specified, rather than using the nearest CHS-based " + "calculation" + }, + { /* end of list */ } + } +}; + +static const char *const vpc_strong_runtime_opts[] = { + VPC_OPT_SIZE_CALC, + + NULL +}; + +static BlockDriver bdrv_vpc = { + .format_name = "vpc", + .instance_size = sizeof(BDRVVPCState), + + .bdrv_probe = vpc_probe, + .bdrv_open = vpc_open, + .bdrv_close = vpc_close, + .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_create = vpc_co_create, + .bdrv_co_create_opts = vpc_co_create_opts, + + .bdrv_co_preadv = vpc_co_preadv, + .bdrv_co_pwritev = vpc_co_pwritev, + .bdrv_co_block_status = vpc_co_block_status, + + .bdrv_get_info = vpc_get_info, + + .is_format = true, + .create_opts = &vpc_create_opts, + .bdrv_has_zero_init = vpc_has_zero_init, + .strong_runtime_opts = vpc_strong_runtime_opts, +}; + +static void bdrv_vpc_init(void) +{ + bdrv_register(&bdrv_vpc); +} + +block_init(bdrv_vpc_init); diff --git a/block/vvfat.c b/block/vvfat.c new file mode 100644 index 000000000..54807f82c --- /dev/null +++ b/block/vvfat.c @@ -0,0 +1,3286 @@ +/* vim:set shiftwidth=4 ts=4: */ +/* + * QEMU Block driver for virtual VFAT (shadows a local directory) + * + * Copyright (c) 2004,2005 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "qemu/module.h" +#include "qemu/option.h" +#include "qemu/bswap.h" +#include "migration/blocker.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/ctype.h" +#include "qemu/cutils.h" +#include "qemu/error-report.h" + +#ifndef S_IWGRP +#define S_IWGRP 0 +#endif +#ifndef S_IWOTH +#define S_IWOTH 0 +#endif + +/* TODO: add ":bootsector=blabla.img:" */ +/* LATER TODO: add automatic boot sector generation from + BOOTEASY.ASM and Ranish Partition Manager + Note that DOS assumes the system files to be the first files in the + file system (test if the boot sector still relies on that fact)! */ +/* MAYBE TODO: write block-visofs.c */ +/* TODO: call try_commit() only after a timeout */ + +/* #define DEBUG */ + +#ifdef DEBUG + +#define DLOG(a) a + +static void checkpoint(void); + +#else + +#define DLOG(a) + +#endif + +/* bootsector OEM name. see related compatibility problems at: + * https://jdebp.eu/FGA/volume-boot-block-oem-name-field.html + * http://seasip.info/Misc/oemid.html + */ +#define BOOTSECTOR_OEM_NAME "MSWIN4.1" + +#define DIR_DELETED 0xe5 +#define DIR_KANJI DIR_DELETED +#define DIR_KANJI_FAKE 0x05 +#define DIR_FREE 0x00 + +/* dynamic array functions */ +typedef struct array_t { + char* pointer; + unsigned int size,next,item_size; +} array_t; + +static inline void array_init(array_t* array,unsigned int item_size) +{ + array->pointer = NULL; + array->size=0; + array->next=0; + array->item_size=item_size; +} + +static inline void array_free(array_t* array) +{ + g_free(array->pointer); + array->size=array->next=0; +} + +/* does not automatically grow */ +static inline void* array_get(array_t* array,unsigned int index) { + assert(index < array->next); + assert(array->pointer); + return array->pointer + index * array->item_size; +} + +static inline void array_ensure_allocated(array_t *array, int index) +{ + if((index + 1) * array->item_size > array->size) { + int new_size = (index + 32) * array->item_size; + array->pointer = g_realloc(array->pointer, new_size); + assert(array->pointer); + memset(array->pointer + array->size, 0, new_size - array->size); + array->size = new_size; + array->next = index + 1; + } +} + +static inline void* array_get_next(array_t* array) { + unsigned int next = array->next; + + array_ensure_allocated(array, next); + array->next = next + 1; + return array_get(array, next); +} + +static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) { + if((array->next+count)*array->item_size>array->size) { + int increment=count*array->item_size; + array->pointer=g_realloc(array->pointer,array->size+increment); + if(!array->pointer) + return NULL; + array->size+=increment; + } + memmove(array->pointer+(index+count)*array->item_size, + array->pointer+index*array->item_size, + (array->next-index)*array->item_size); + array->next+=count; + return array->pointer+index*array->item_size; +} + +static inline int array_remove_slice(array_t* array,int index, int count) +{ + assert(index >=0); + assert(count > 0); + assert(index + count <= array->next); + + memmove(array->pointer + index * array->item_size, + array->pointer + (index + count) * array->item_size, + (array->next - index - count) * array->item_size); + + array->next -= count; + return 0; +} + +static int array_remove(array_t* array,int index) +{ + return array_remove_slice(array, index, 1); +} + +/* return the index for a given member */ +static int array_index(array_t* array, void* pointer) +{ + size_t offset = (char*)pointer - array->pointer; + assert((offset % array->item_size) == 0); + assert(offset/array->item_size < array->next); + return offset/array->item_size; +} + +/* These structures are used to fake a disk and the VFAT filesystem. + * For this reason we need to use QEMU_PACKED. */ + +typedef struct bootsector_t { + uint8_t jump[3]; + uint8_t name[8]; + uint16_t sector_size; + uint8_t sectors_per_cluster; + uint16_t reserved_sectors; + uint8_t number_of_fats; + uint16_t root_entries; + uint16_t total_sectors16; + uint8_t media_type; + uint16_t sectors_per_fat; + uint16_t sectors_per_track; + uint16_t number_of_heads; + uint32_t hidden_sectors; + uint32_t total_sectors; + union { + struct { + uint8_t drive_number; + uint8_t reserved1; + uint8_t signature; + uint32_t id; + uint8_t volume_label[11]; + uint8_t fat_type[8]; + uint8_t ignored[0x1c0]; + } QEMU_PACKED fat16; + struct { + uint32_t sectors_per_fat; + uint16_t flags; + uint8_t major,minor; + uint32_t first_cluster_of_root_dir; + uint16_t info_sector; + uint16_t backup_boot_sector; + uint8_t reserved[12]; + uint8_t drive_number; + uint8_t reserved1; + uint8_t signature; + uint32_t id; + uint8_t volume_label[11]; + uint8_t fat_type[8]; + uint8_t ignored[0x1a4]; + } QEMU_PACKED fat32; + } u; + uint8_t magic[2]; +} QEMU_PACKED bootsector_t; + +typedef struct { + uint8_t head; + uint8_t sector; + uint8_t cylinder; +} mbr_chs_t; + +typedef struct partition_t { + uint8_t attributes; /* 0x80 = bootable */ + mbr_chs_t start_CHS; + uint8_t fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */ + mbr_chs_t end_CHS; + uint32_t start_sector_long; + uint32_t length_sector_long; +} QEMU_PACKED partition_t; + +typedef struct mbr_t { + uint8_t ignored[0x1b8]; + uint32_t nt_id; + uint8_t ignored2[2]; + partition_t partition[4]; + uint8_t magic[2]; +} QEMU_PACKED mbr_t; + +typedef struct direntry_t { + uint8_t name[8 + 3]; + uint8_t attributes; + uint8_t reserved[2]; + uint16_t ctime; + uint16_t cdate; + uint16_t adate; + uint16_t begin_hi; + uint16_t mtime; + uint16_t mdate; + uint16_t begin; + uint32_t size; +} QEMU_PACKED direntry_t; + +/* this structure are used to transparently access the files */ + +typedef struct mapping_t { + /* begin is the first cluster, end is the last+1 */ + uint32_t begin,end; + /* as s->directory is growable, no pointer may be used here */ + unsigned int dir_index; + /* the clusters of a file may be in any order; this points to the first */ + int first_mapping_index; + union { + /* offset is + * - the offset in the file (in clusters) for a file, or + * - the next cluster of the directory for a directory + */ + struct { + uint32_t offset; + } file; + struct { + int parent_mapping_index; + int first_dir_index; + } dir; + } info; + /* path contains the full path, i.e. it always starts with s->path */ + char* path; + + enum { + MODE_UNDEFINED = 0, + MODE_NORMAL = 1, + MODE_MODIFIED = 2, + MODE_DIRECTORY = 4, + MODE_DELETED = 8, + } mode; + int read_only; +} mapping_t; + +#ifdef DEBUG +static void print_direntry(const struct direntry_t*); +static void print_mapping(const struct mapping_t* mapping); +#endif + +/* here begins the real VVFAT driver */ + +typedef struct BDRVVVFATState { + CoMutex lock; + BlockDriverState* bs; /* pointer to parent */ + unsigned char first_sectors[0x40*0x200]; + + int fat_type; /* 16 or 32 */ + array_t fat,directory,mapping; + char volume_label[11]; + + uint32_t offset_to_bootsector; /* 0 for floppy, 0x3f for disk */ + + unsigned int cluster_size; + unsigned int sectors_per_cluster; + unsigned int sectors_per_fat; + uint32_t last_cluster_of_root_directory; + /* how many entries are available in root directory (0 for FAT32) */ + uint16_t root_entries; + uint32_t sector_count; /* total number of sectors of the partition */ + uint32_t cluster_count; /* total number of clusters of this partition */ + uint32_t max_fat_value; + uint32_t offset_to_fat; + uint32_t offset_to_root_dir; + + int current_fd; + mapping_t* current_mapping; + unsigned char* cluster; /* points to current cluster */ + unsigned char* cluster_buffer; /* points to a buffer to hold temp data */ + unsigned int current_cluster; + + /* write support */ + char* qcow_filename; + BdrvChild* qcow; + void* fat2; + char* used_clusters; + array_t commits; + const char* path; + int downcase_short_names; + + Error *migration_blocker; +} BDRVVVFATState; + +/* take the sector position spos and convert it to Cylinder/Head/Sector position + * if the position is outside the specified geometry, fill maximum value for CHS + * and return 1 to signal overflow. + */ +static int sector2CHS(mbr_chs_t *chs, int spos, int cyls, int heads, int secs) +{ + int head,sector; + sector = spos % secs; spos /= secs; + head = spos % heads; spos /= heads; + if (spos >= cyls) { + /* Overflow, + it happens if 32bit sector positions are used, while CHS is only 24bit. + Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */ + chs->head = 0xFF; + chs->sector = 0xFF; + chs->cylinder = 0xFF; + return 1; + } + chs->head = (uint8_t)head; + chs->sector = (uint8_t)( (sector+1) | ((spos>>8)<<6) ); + chs->cylinder = (uint8_t)spos; + return 0; +} + +static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs) +{ + /* TODO: if the files mbr.img and bootsect.img exist, use them */ + mbr_t* real_mbr=(mbr_t*)s->first_sectors; + partition_t* partition = &(real_mbr->partition[0]); + int lba; + + memset(s->first_sectors,0,512); + + /* Win NT Disk Signature */ + real_mbr->nt_id= cpu_to_le32(0xbe1afdfa); + + partition->attributes=0x80; /* bootable */ + + /* LBA is used when partition is outside the CHS geometry */ + lba = sector2CHS(&partition->start_CHS, s->offset_to_bootsector, + cyls, heads, secs); + lba |= sector2CHS(&partition->end_CHS, s->bs->total_sectors - 1, + cyls, heads, secs); + + /*LBA partitions are identified only by start/length_sector_long not by CHS*/ + partition->start_sector_long = cpu_to_le32(s->offset_to_bootsector); + partition->length_sector_long = cpu_to_le32(s->bs->total_sectors + - s->offset_to_bootsector); + + /* FAT12/FAT16/FAT32 */ + /* DOS uses different types when partition is LBA, + probably to prevent older versions from using CHS on them */ + partition->fs_type = s->fat_type == 12 ? 0x1 : + s->fat_type == 16 ? (lba ? 0xe : 0x06) : + /*s->fat_type == 32*/ (lba ? 0xc : 0x0b); + + real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa; +} + +/* direntry functions */ + +static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename) +{ + int number_of_entries, i; + glong length; + direntry_t *entry; + + gunichar2 *longname = g_utf8_to_utf16(filename, -1, NULL, &length, NULL); + if (!longname) { + fprintf(stderr, "vvfat: invalid UTF-8 name: %s\n", filename); + return NULL; + } + + number_of_entries = DIV_ROUND_UP(length * 2, 26); + + for(i=0;idirectory)); + entry->attributes=0xf; + entry->reserved[0]=0; + entry->begin=0; + entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); + } + for(i=0;i<26*number_of_entries;i++) { + int offset=(i%26); + if(offset<10) offset=1+offset; + else if(offset<22) offset=14+offset-10; + else offset=28+offset-22; + entry=array_get(&(s->directory),s->directory.next-1-(i/26)); + if (i >= 2 * length + 2) { + entry->name[offset] = 0xff; + } else if (i % 2 == 0) { + entry->name[offset] = longname[i / 2] & 0xff; + } else { + entry->name[offset] = longname[i / 2] >> 8; + } + } + g_free(longname); + return array_get(&(s->directory),s->directory.next-number_of_entries); +} + +static char is_free(const direntry_t* direntry) +{ + return direntry->name[0] == DIR_DELETED || direntry->name[0] == DIR_FREE; +} + +static char is_volume_label(const direntry_t* direntry) +{ + return direntry->attributes == 0x28; +} + +static char is_long_name(const direntry_t* direntry) +{ + return direntry->attributes == 0xf; +} + +static char is_short_name(const direntry_t* direntry) +{ + return !is_volume_label(direntry) && !is_long_name(direntry) + && !is_free(direntry); +} + +static char is_directory(const direntry_t* direntry) +{ + return direntry->attributes & 0x10 && direntry->name[0] != DIR_DELETED; +} + +static inline char is_dot(const direntry_t* direntry) +{ + return is_short_name(direntry) && direntry->name[0] == '.'; +} + +static char is_file(const direntry_t* direntry) +{ + return is_short_name(direntry) && !is_directory(direntry); +} + +static inline uint32_t begin_of_direntry(const direntry_t* direntry) +{ + return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16); +} + +static inline uint32_t filesize_of_direntry(const direntry_t* direntry) +{ + return le32_to_cpu(direntry->size); +} + +static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin) +{ + direntry->begin = cpu_to_le16(begin & 0xffff); + direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff); +} + +static bool valid_filename(const unsigned char *name) +{ + unsigned char c; + if (!strcmp((const char*)name, ".") || !strcmp((const char*)name, "..")) { + return false; + } + for (; (c = *name); name++) { + if (!((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + c > 127 || + strchr("$%'-_@~`!(){}^#&.+,;=[]", c) != NULL)) + { + return false; + } + } + return true; +} + +static uint8_t to_valid_short_char(gunichar c) +{ + c = g_unichar_toupper(c); + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + strchr("$%'-_@~`!(){}^#&", c) != NULL) { + return c; + } else { + return 0; + } +} + +static direntry_t *create_short_filename(BDRVVVFATState *s, + const char *filename, + unsigned int directory_start) +{ + int i, j = 0; + direntry_t *entry = array_get_next(&(s->directory)); + const gchar *p, *last_dot = NULL; + gunichar c; + bool lossy_conversion = false; + char tail[8]; + + if (!entry) { + return NULL; + } + memset(entry->name, 0x20, sizeof(entry->name)); + + /* copy filename and search last dot */ + for (p = filename; ; p = g_utf8_next_char(p)) { + c = g_utf8_get_char(p); + if (c == '\0') { + break; + } else if (c == '.') { + if (j == 0) { + /* '.' at start of filename */ + lossy_conversion = true; + } else { + if (last_dot) { + lossy_conversion = true; + } + last_dot = p; + } + } else if (!last_dot) { + /* first part of the name; copy it */ + uint8_t v = to_valid_short_char(c); + if (j < 8 && v) { + entry->name[j++] = v; + } else { + lossy_conversion = true; + } + } + } + + /* copy extension (if any) */ + if (last_dot) { + j = 0; + for (p = g_utf8_next_char(last_dot); ; p = g_utf8_next_char(p)) { + c = g_utf8_get_char(p); + if (c == '\0') { + break; + } else { + /* extension; copy it */ + uint8_t v = to_valid_short_char(c); + if (j < 3 && v) { + entry->name[8 + (j++)] = v; + } else { + lossy_conversion = true; + } + } + } + } + + if (entry->name[0] == DIR_KANJI) { + entry->name[0] = DIR_KANJI_FAKE; + } + + /* numeric-tail generation */ + for (j = 0; j < 8; j++) { + if (entry->name[j] == ' ') { + break; + } + } + for (i = lossy_conversion ? 1 : 0; i < 999999; i++) { + direntry_t *entry1; + if (i > 0) { + int len = snprintf(tail, sizeof(tail), "~%u", (unsigned)i); + assert(len <= 7); + memcpy(entry->name + MIN(j, 8 - len), tail, len); + } + for (entry1 = array_get(&(s->directory), directory_start); + entry1 < entry; entry1++) { + if (!is_long_name(entry1) && + !memcmp(entry1->name, entry->name, 11)) { + break; /* found dupe */ + } + } + if (entry1 == entry) { + /* no dupe found */ + return entry; + } + } + return NULL; +} + +/* fat functions */ + +static inline uint8_t fat_chksum(const direntry_t* entry) +{ + uint8_t chksum=0; + int i; + + for (i = 0; i < ARRAY_SIZE(entry->name); i++) { + chksum = (((chksum & 0xfe) >> 1) | + ((chksum & 0x01) ? 0x80 : 0)) + entry->name[i]; + } + + return chksum; +} + +/* if return_time==0, this returns the fat_date, else the fat_time */ +static uint16_t fat_datetime(time_t time,int return_time) { + struct tm* t; + struct tm t1; + t = &t1; + localtime_r(&time,t); + if(return_time) + return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11)); + return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9)); +} + +static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value) +{ + if(s->fat_type==32) { + uint32_t* entry=array_get(&(s->fat),cluster); + *entry=cpu_to_le32(value); + } else if(s->fat_type==16) { + uint16_t* entry=array_get(&(s->fat),cluster); + *entry=cpu_to_le16(value&0xffff); + } else { + int offset = (cluster*3/2); + unsigned char* p = array_get(&(s->fat), offset); + switch (cluster&1) { + case 0: + p[0] = value&0xff; + p[1] = (p[1]&0xf0) | ((value>>8)&0xf); + break; + case 1: + p[0] = (p[0]&0xf) | ((value&0xf)<<4); + p[1] = (value>>4); + break; + } + } +} + +static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster) +{ + if(s->fat_type==32) { + uint32_t* entry=array_get(&(s->fat),cluster); + return le32_to_cpu(*entry); + } else if(s->fat_type==16) { + uint16_t* entry=array_get(&(s->fat),cluster); + return le16_to_cpu(*entry); + } else { + const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2; + return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; + } +} + +static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry) +{ + if(fat_entry>s->max_fat_value-8) + return -1; + return 0; +} + +static inline void init_fat(BDRVVVFATState* s) +{ + if (s->fat_type == 12) { + array_init(&(s->fat),1); + array_ensure_allocated(&(s->fat), + s->sectors_per_fat * 0x200 * 3 / 2 - 1); + } else { + array_init(&(s->fat),(s->fat_type==32?4:2)); + array_ensure_allocated(&(s->fat), + s->sectors_per_fat * 0x200 / s->fat.item_size - 1); + } + memset(s->fat.pointer,0,s->fat.size); + + switch(s->fat_type) { + case 12: s->max_fat_value=0xfff; break; + case 16: s->max_fat_value=0xffff; break; + case 32: s->max_fat_value=0x0fffffff; break; + default: s->max_fat_value=0; /* error... */ + } + +} + +static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, + unsigned int directory_start, const char* filename, int is_dot) +{ + int long_index = s->directory.next; + direntry_t* entry = NULL; + direntry_t* entry_long = NULL; + + if(is_dot) { + entry=array_get_next(&(s->directory)); + memset(entry->name, 0x20, sizeof(entry->name)); + memcpy(entry->name,filename,strlen(filename)); + return entry; + } + + entry_long=create_long_filename(s,filename); + entry = create_short_filename(s, filename, directory_start); + + /* calculate checksum; propagate to long name */ + if(entry_long) { + uint8_t chksum=fat_chksum(entry); + + /* calculate anew, because realloc could have taken place */ + entry_long=array_get(&(s->directory),long_index); + while(entry_longreserved[1]=chksum; + entry_long++; + } + } + + return entry; +} + +/* + * Read a directory. (the index of the corresponding mapping must be passed). + */ +static int read_directory(BDRVVVFATState* s, int mapping_index) +{ + mapping_t* mapping = array_get(&(s->mapping), mapping_index); + direntry_t* direntry; + const char* dirname = mapping->path; + int first_cluster = mapping->begin; + int parent_index = mapping->info.dir.parent_mapping_index; + mapping_t* parent_mapping = (mapping_t*) + (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL); + int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1; + + DIR* dir=opendir(dirname); + struct dirent* entry; + int i; + + assert(mapping->mode & MODE_DIRECTORY); + + if(!dir) { + mapping->end = mapping->begin; + return -1; + } + + i = mapping->info.dir.first_dir_index = + first_cluster == 0 ? 0 : s->directory.next; + + if (first_cluster != 0) { + /* create the top entries of a subdirectory */ + (void)create_short_and_long_name(s, i, ".", 1); + (void)create_short_and_long_name(s, i, "..", 1); + } + + /* actually read the directory, and allocate the mappings */ + while((entry=readdir(dir))) { + unsigned int length=strlen(dirname)+2+strlen(entry->d_name); + char* buffer; + direntry_t* direntry; + struct stat st; + int is_dot=!strcmp(entry->d_name,"."); + int is_dotdot=!strcmp(entry->d_name,".."); + + if (first_cluster == 0 && s->directory.next >= s->root_entries - 1) { + fprintf(stderr, "Too many entries in root directory\n"); + closedir(dir); + return -2; + } + + if(first_cluster == 0 && (is_dotdot || is_dot)) + continue; + + buffer = g_malloc(length); + snprintf(buffer,length,"%s/%s",dirname,entry->d_name); + + if(stat(buffer,&st)<0) { + g_free(buffer); + continue; + } + + /* create directory entry for this file */ + if (!is_dot && !is_dotdot) { + direntry = create_short_and_long_name(s, i, entry->d_name, 0); + } else { + direntry = array_get(&(s->directory), is_dot ? i : i + 1); + } + direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20); + direntry->reserved[0]=direntry->reserved[1]=0; + direntry->ctime=fat_datetime(st.st_ctime,1); + direntry->cdate=fat_datetime(st.st_ctime,0); + direntry->adate=fat_datetime(st.st_atime,0); + direntry->begin_hi=0; + direntry->mtime=fat_datetime(st.st_mtime,1); + direntry->mdate=fat_datetime(st.st_mtime,0); + if(is_dotdot) + set_begin_of_direntry(direntry, first_cluster_of_parent); + else if(is_dot) + set_begin_of_direntry(direntry, first_cluster); + else + direntry->begin=0; /* do that later */ + if (st.st_size > 0x7fffffff) { + fprintf(stderr, "File %s is larger than 2GB\n", buffer); + g_free(buffer); + closedir(dir); + return -2; + } + direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size); + + /* create mapping for this file */ + if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) { + s->current_mapping = array_get_next(&(s->mapping)); + s->current_mapping->begin=0; + s->current_mapping->end=st.st_size; + /* + * we get the direntry of the most recent direntry, which + * contains the short name and all the relevant information. + */ + s->current_mapping->dir_index=s->directory.next-1; + s->current_mapping->first_mapping_index = -1; + if (S_ISDIR(st.st_mode)) { + s->current_mapping->mode = MODE_DIRECTORY; + s->current_mapping->info.dir.parent_mapping_index = + mapping_index; + } else { + s->current_mapping->mode = MODE_UNDEFINED; + s->current_mapping->info.file.offset = 0; + } + s->current_mapping->path=buffer; + s->current_mapping->read_only = + (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0; + } else { + g_free(buffer); + } + } + closedir(dir); + + /* fill with zeroes up to the end of the cluster */ + while(s->directory.next%(0x10*s->sectors_per_cluster)) { + direntry_t* direntry=array_get_next(&(s->directory)); + memset(direntry,0,sizeof(direntry_t)); + } + + if (s->fat_type != 32 && + mapping_index == 0 && + s->directory.next < s->root_entries) { + /* root directory */ + int cur = s->directory.next; + array_ensure_allocated(&(s->directory), s->root_entries - 1); + s->directory.next = s->root_entries; + memset(array_get(&(s->directory), cur), 0, + (s->root_entries - cur) * sizeof(direntry_t)); + } + + /* re-get the mapping, since s->mapping was possibly realloc()ed */ + mapping = array_get(&(s->mapping), mapping_index); + first_cluster += (s->directory.next - mapping->info.dir.first_dir_index) + * 0x20 / s->cluster_size; + mapping->end = first_cluster; + + direntry = array_get(&(s->directory), mapping->dir_index); + set_begin_of_direntry(direntry, mapping->begin); + + return 0; +} + +static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num) +{ + return (sector_num - s->offset_to_root_dir) / s->sectors_per_cluster; +} + +static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num) +{ + return s->offset_to_root_dir + s->sectors_per_cluster * cluster_num; +} + +static int init_directories(BDRVVVFATState* s, + const char *dirname, int heads, int secs, + Error **errp) +{ + bootsector_t* bootsector; + mapping_t* mapping; + unsigned int i; + unsigned int cluster; + + memset(&(s->first_sectors[0]),0,0x40*0x200); + + s->cluster_size=s->sectors_per_cluster*0x200; + s->cluster_buffer=g_malloc(s->cluster_size); + + /* + * The formula: sc = spf+1+spf*spc*(512*8/fat_type), + * where sc is sector_count, + * spf is sectors_per_fat, + * spc is sectors_per_clusters, and + * fat_type = 12, 16 or 32. + */ + i = 1+s->sectors_per_cluster*0x200*8/s->fat_type; + s->sectors_per_fat=(s->sector_count+i)/i; /* round up */ + + s->offset_to_fat = s->offset_to_bootsector + 1; + s->offset_to_root_dir = s->offset_to_fat + s->sectors_per_fat * 2; + + array_init(&(s->mapping),sizeof(mapping_t)); + array_init(&(s->directory),sizeof(direntry_t)); + + /* add volume label */ + { + direntry_t* entry=array_get_next(&(s->directory)); + entry->attributes=0x28; /* archive | volume label */ + memcpy(entry->name, s->volume_label, sizeof(entry->name)); + } + + /* Now build FAT, and write back information into directory */ + init_fat(s); + + /* TODO: if there are more entries, bootsector has to be adjusted! */ + s->root_entries = 0x02 * 0x10 * s->sectors_per_cluster; + s->cluster_count=sector2cluster(s, s->sector_count); + + mapping = array_get_next(&(s->mapping)); + mapping->begin = 0; + mapping->dir_index = 0; + mapping->info.dir.parent_mapping_index = -1; + mapping->first_mapping_index = -1; + mapping->path = g_strdup(dirname); + i = strlen(mapping->path); + if (i > 0 && mapping->path[i - 1] == '/') + mapping->path[i - 1] = '\0'; + mapping->mode = MODE_DIRECTORY; + mapping->read_only = 0; + s->path = mapping->path; + + for (i = 0, cluster = 0; i < s->mapping.next; i++) { + /* MS-DOS expects the FAT to be 0 for the root directory + * (except for the media byte). */ + /* LATER TODO: still true for FAT32? */ + int fix_fat = (i != 0); + mapping = array_get(&(s->mapping), i); + + if (mapping->mode & MODE_DIRECTORY) { + char *path = mapping->path; + mapping->begin = cluster; + if(read_directory(s, i)) { + error_setg(errp, "Could not read directory %s", path); + return -1; + } + mapping = array_get(&(s->mapping), i); + } else { + assert(mapping->mode == MODE_UNDEFINED); + mapping->mode=MODE_NORMAL; + mapping->begin = cluster; + if (mapping->end > 0) { + direntry_t* direntry = array_get(&(s->directory), + mapping->dir_index); + + mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size; + set_begin_of_direntry(direntry, mapping->begin); + } else { + mapping->end = cluster + 1; + fix_fat = 0; + } + } + + assert(mapping->begin < mapping->end); + + /* next free cluster */ + cluster = mapping->end; + + if(cluster > s->cluster_count) { + error_setg(errp, + "Directory does not fit in FAT%d (capacity %.2f MB)", + s->fat_type, s->sector_count / 2000.0); + return -1; + } + + /* fix fat for entry */ + if (fix_fat) { + int j; + for(j = mapping->begin; j < mapping->end - 1; j++) + fat_set(s, j, j+1); + fat_set(s, mapping->end - 1, s->max_fat_value); + } + } + + mapping = array_get(&(s->mapping), 0); + s->last_cluster_of_root_directory = mapping->end; + + /* the FAT signature */ + fat_set(s,0,s->max_fat_value); + fat_set(s,1,s->max_fat_value); + + s->current_mapping = NULL; + + bootsector = (bootsector_t *)(s->first_sectors + + s->offset_to_bootsector * 0x200); + bootsector->jump[0]=0xeb; + bootsector->jump[1]=0x3e; + bootsector->jump[2]=0x90; + memcpy(bootsector->name, BOOTSECTOR_OEM_NAME, 8); + bootsector->sector_size=cpu_to_le16(0x200); + bootsector->sectors_per_cluster=s->sectors_per_cluster; + bootsector->reserved_sectors=cpu_to_le16(1); + bootsector->number_of_fats=0x2; /* number of FATs */ + bootsector->root_entries = cpu_to_le16(s->root_entries); + bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count); + /* media descriptor: hard disk=0xf8, floppy=0xf0 */ + bootsector->media_type = (s->offset_to_bootsector > 0 ? 0xf8 : 0xf0); + s->fat.pointer[0] = bootsector->media_type; + bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat); + bootsector->sectors_per_track = cpu_to_le16(secs); + bootsector->number_of_heads = cpu_to_le16(heads); + bootsector->hidden_sectors = cpu_to_le32(s->offset_to_bootsector); + bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0); + + /* LATER TODO: if FAT32, this is wrong */ + /* drive_number: fda=0, hda=0x80 */ + bootsector->u.fat16.drive_number = s->offset_to_bootsector == 0 ? 0 : 0x80; + bootsector->u.fat16.signature=0x29; + bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd); + + memcpy(bootsector->u.fat16.volume_label, s->volume_label, + sizeof(bootsector->u.fat16.volume_label)); + memcpy(bootsector->u.fat16.fat_type, + s->fat_type == 12 ? "FAT12 " : "FAT16 ", 8); + bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa; + + return 0; +} + +#ifdef DEBUG +static BDRVVVFATState *vvv = NULL; +#endif + +static int enable_write_target(BlockDriverState *bs, Error **errp); +static int is_consistent(BDRVVVFATState *s); + +static QemuOptsList runtime_opts = { + .name = "vvfat", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "dir", + .type = QEMU_OPT_STRING, + .help = "Host directory to map to the vvfat device", + }, + { + .name = "fat-type", + .type = QEMU_OPT_NUMBER, + .help = "FAT type (12, 16 or 32)", + }, + { + .name = "floppy", + .type = QEMU_OPT_BOOL, + .help = "Create a floppy rather than a hard disk image", + }, + { + .name = "label", + .type = QEMU_OPT_STRING, + .help = "Use a volume label other than QEMU VVFAT", + }, + { + .name = "rw", + .type = QEMU_OPT_BOOL, + .help = "Make the image writable", + }, + { /* end of list */ } + }, +}; + +static void vvfat_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + int fat_type = 0; + bool floppy = false; + bool rw = false; + int i; + + if (!strstart(filename, "fat:", NULL)) { + error_setg(errp, "File name string must start with 'fat:'"); + return; + } + + /* Parse options */ + if (strstr(filename, ":32:")) { + fat_type = 32; + } else if (strstr(filename, ":16:")) { + fat_type = 16; + } else if (strstr(filename, ":12:")) { + fat_type = 12; + } + + if (strstr(filename, ":floppy:")) { + floppy = true; + } + + if (strstr(filename, ":rw:")) { + rw = true; + } + + /* Get the directory name without options */ + i = strrchr(filename, ':') - filename; + assert(i >= 3); + if (filename[i - 2] == ':' && qemu_isalpha(filename[i - 1])) { + /* workaround for DOS drive names */ + filename += i - 1; + } else { + filename += i + 1; + } + + /* Fill in the options QDict */ + qdict_put_str(options, "dir", filename); + qdict_put_int(options, "fat-type", fat_type); + qdict_put_bool(options, "floppy", floppy); + qdict_put_bool(options, "rw", rw); +} + +static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVVVFATState *s = bs->opaque; + int cyls, heads, secs; + bool floppy; + const char *dirname, *label; + QemuOpts *opts; + int ret; + +#ifdef DEBUG + vvv = s; +#endif + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; + goto fail; + } + + dirname = qemu_opt_get(opts, "dir"); + if (!dirname) { + error_setg(errp, "vvfat block driver requires a 'dir' option"); + ret = -EINVAL; + goto fail; + } + + s->fat_type = qemu_opt_get_number(opts, "fat-type", 0); + floppy = qemu_opt_get_bool(opts, "floppy", false); + + memset(s->volume_label, ' ', sizeof(s->volume_label)); + label = qemu_opt_get(opts, "label"); + if (label) { + size_t label_length = strlen(label); + if (label_length > 11) { + error_setg(errp, "vvfat label cannot be longer than 11 bytes"); + ret = -EINVAL; + goto fail; + } + memcpy(s->volume_label, label, label_length); + } else { + memcpy(s->volume_label, "QEMU VVFAT", 10); + } + + if (floppy) { + /* 1.44MB or 2.88MB floppy. 2.88MB can be FAT12 (default) or FAT16. */ + if (!s->fat_type) { + s->fat_type = 12; + secs = 36; + s->sectors_per_cluster = 2; + } else { + secs = s->fat_type == 12 ? 18 : 36; + s->sectors_per_cluster = 1; + } + cyls = 80; + heads = 2; + } else { + /* 32MB or 504MB disk*/ + if (!s->fat_type) { + s->fat_type = 16; + } + s->offset_to_bootsector = 0x3f; + cyls = s->fat_type == 12 ? 64 : 1024; + heads = 16; + secs = 63; + } + + switch (s->fat_type) { + case 32: + warn_report("FAT32 has not been tested. You are welcome to do so!"); + break; + case 16: + case 12: + break; + default: + error_setg(errp, "Valid FAT types are only 12, 16 and 32"); + ret = -EINVAL; + goto fail; + } + + + s->bs = bs; + + /* LATER TODO: if FAT32, adjust */ + s->sectors_per_cluster=0x10; + + s->current_cluster=0xffffffff; + + s->qcow = NULL; + s->qcow_filename = NULL; + s->fat2 = NULL; + s->downcase_short_names = 1; + + DLOG(fprintf(stderr, "vvfat %s chs %d,%d,%d\n", + dirname, cyls, heads, secs)); + + s->sector_count = cyls * heads * secs - s->offset_to_bootsector; + + if (qemu_opt_get_bool(opts, "rw", false)) { + if (!bdrv_is_read_only(bs)) { + ret = enable_write_target(bs, errp); + if (ret < 0) { + goto fail; + } + } else { + ret = -EPERM; + error_setg(errp, + "Unable to set VVFAT to 'rw' when drive is read-only"); + goto fail; + } + } else { + ret = bdrv_apply_auto_read_only(bs, NULL, errp); + if (ret < 0) { + goto fail; + } + } + + bs->total_sectors = cyls * heads * secs; + + if (init_directories(s, dirname, heads, secs, errp)) { + ret = -EIO; + goto fail; + } + + s->sector_count = s->offset_to_root_dir + + s->sectors_per_cluster * s->cluster_count; + + /* Disable migration when vvfat is used rw */ + if (s->qcow) { + error_setg(&s->migration_blocker, + "The vvfat (rw) format used by node '%s' " + "does not support live migration", + bdrv_get_device_or_node_name(bs)); + ret = migrate_add_blocker(s->migration_blocker, errp); + if (ret < 0) { + error_free(s->migration_blocker); + goto fail; + } + } + + if (s->offset_to_bootsector > 0) { + init_mbr(s, cyls, heads, secs); + } + + qemu_co_mutex_init(&s->lock); + + ret = 0; +fail: + qemu_opts_del(opts); + return ret; +} + +static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */ +} + +static inline void vvfat_close_current_file(BDRVVVFATState *s) +{ + if(s->current_mapping) { + s->current_mapping = NULL; + if (s->current_fd) { + qemu_close(s->current_fd); + s->current_fd = 0; + } + } + s->current_cluster = -1; +} + +/* mappings between index1 and index2-1 are supposed to be ordered + * return value is the index of the last mapping for which end>cluster_num + */ +static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2) +{ + while(1) { + int index3; + mapping_t* mapping; + index3=(index1+index2)/2; + mapping=array_get(&(s->mapping),index3); + assert(mapping->begin < mapping->end); + if(mapping->begin>=cluster_num) { + assert(index2!=index3 || index2==0); + if(index2==index3) + return index1; + index2=index3; + } else { + if(index1==index3) + return mapping->end<=cluster_num ? index2 : index1; + index1=index3; + } + assert(index1<=index2); + DLOG(mapping=array_get(&(s->mapping),index1); + assert(mapping->begin<=cluster_num); + assert(index2 >= s->mapping.next || + ((mapping = array_get(&(s->mapping),index2)) && + mapping->end>cluster_num))); + } +} + +static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num) +{ + int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next); + mapping_t* mapping; + if(index>=s->mapping.next) + return NULL; + mapping=array_get(&(s->mapping),index); + if(mapping->begin>cluster_num) + return NULL; + assert(mapping->begin<=cluster_num && mapping->end>cluster_num); + return mapping; +} + +static int open_file(BDRVVVFATState* s,mapping_t* mapping) +{ + if(!mapping) + return -1; + if(!s->current_mapping || + strcmp(s->current_mapping->path,mapping->path)) { + /* open file */ + int fd = qemu_open_old(mapping->path, + O_RDONLY | O_BINARY | O_LARGEFILE); + if(fd<0) + return -1; + vvfat_close_current_file(s); + s->current_fd = fd; + s->current_mapping = mapping; + } + return 0; +} + +static inline int read_cluster(BDRVVVFATState *s,int cluster_num) +{ + if(s->current_cluster != cluster_num) { + int result=0; + off_t offset; + assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY)); + if(!s->current_mapping + || s->current_mapping->begin>cluster_num + || s->current_mapping->end<=cluster_num) { + /* binary search of mappings for file */ + mapping_t* mapping=find_mapping_for_cluster(s,cluster_num); + + assert(!mapping || (cluster_num>=mapping->begin && cluster_numend)); + + if (mapping && mapping->mode & MODE_DIRECTORY) { + vvfat_close_current_file(s); + s->current_mapping = mapping; +read_cluster_directory: + offset = s->cluster_size*(cluster_num-s->current_mapping->begin); + s->cluster = (unsigned char*)s->directory.pointer+offset + + 0x20*s->current_mapping->info.dir.first_dir_index; + assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0); + assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size); + s->current_cluster = cluster_num; + return 0; + } + + if(open_file(s,mapping)) + return -2; + } else if (s->current_mapping->mode & MODE_DIRECTORY) + goto read_cluster_directory; + + assert(s->current_fd); + + offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset; + if(lseek(s->current_fd, offset, SEEK_SET)!=offset) + return -3; + s->cluster=s->cluster_buffer; + result=read(s->current_fd,s->cluster,s->cluster_size); + if(result<0) { + s->current_cluster = -1; + return -1; + } + s->current_cluster = cluster_num; + } + return 0; +} + +#ifdef DEBUG +static void print_direntry(const direntry_t* direntry) +{ + int j = 0; + char buffer[1024]; + + fprintf(stderr, "direntry %p: ", direntry); + if(!direntry) + return; + if(is_long_name(direntry)) { + unsigned char* c=(unsigned char*)direntry; + int i; + for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2) +#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;} + ADD_CHAR(c[i]); + for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2) + ADD_CHAR(c[i]); + for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2) + ADD_CHAR(c[i]); + buffer[j] = 0; + fprintf(stderr, "%s\n", buffer); + } else { + int i; + for(i=0;i<11;i++) + ADD_CHAR(direntry->name[i]); + buffer[j] = 0; + fprintf(stderr, "%s attributes=0x%02x begin=%u size=%u\n", + buffer, + direntry->attributes, + begin_of_direntry(direntry),le32_to_cpu(direntry->size)); + } +} + +static void print_mapping(const mapping_t* mapping) +{ + fprintf(stderr, "mapping (%p): begin, end = %u, %u, dir_index = %u, " + "first_mapping_index = %d, name = %s, mode = 0x%x, " , + mapping, mapping->begin, mapping->end, mapping->dir_index, + mapping->first_mapping_index, mapping->path, mapping->mode); + + if (mapping->mode & MODE_DIRECTORY) + fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); + else + fprintf(stderr, "offset = %u\n", mapping->info.file.offset); +} +#endif + +static int vvfat_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVVVFATState *s = bs->opaque; + int i; + + for(i=0;i= bs->total_sectors) + return -1; + if (s->qcow) { + int64_t n; + int ret; + ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE, + (nb_sectors - i) * BDRV_SECTOR_SIZE, &n); + if (ret < 0) { + return ret; + } + if (ret) { + DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 + " allocated\n", sector_num, + n >> BDRV_SECTOR_BITS)); + if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, + buf + i * 0x200, n) < 0) { + return -1; + } + i += (n >> BDRV_SECTOR_BITS) - 1; + sector_num += (n >> BDRV_SECTOR_BITS) - 1; + continue; + } + DLOG(fprintf(stderr, "sector %" PRId64 " not allocated\n", + sector_num)); + } + if (sector_num < s->offset_to_root_dir) { + if (sector_num < s->offset_to_fat) { + memcpy(buf + i * 0x200, + &(s->first_sectors[sector_num * 0x200]), + 0x200); + } else if (sector_num < s->offset_to_fat + s->sectors_per_fat) { + memcpy(buf + i * 0x200, + &(s->fat.pointer[(sector_num + - s->offset_to_fat) * 0x200]), + 0x200); + } else if (sector_num < s->offset_to_root_dir) { + memcpy(buf + i * 0x200, + &(s->fat.pointer[(sector_num - s->offset_to_fat + - s->sectors_per_fat) * 0x200]), + 0x200); + } + } else { + uint32_t sector = sector_num - s->offset_to_root_dir, + sector_offset_in_cluster=(sector%s->sectors_per_cluster), + cluster_num=sector/s->sectors_per_cluster; + if(cluster_num > s->cluster_count || read_cluster(s, cluster_num) != 0) { + /* LATER TODO: strict: return -1; */ + memset(buf+i*0x200,0,0x200); + continue; + } + memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200); + } + } + return 0; +} + +static int coroutine_fn +vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret; + BDRVVVFATState *s = bs->opaque; + uint64_t sector_num = offset >> BDRV_SECTOR_BITS; + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + void *buf; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + buf = g_try_malloc(bytes); + if (bytes && buf == NULL) { + return -ENOMEM; + } + + qemu_co_mutex_lock(&s->lock); + ret = vvfat_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + + qemu_iovec_from_buf(qiov, 0, buf, bytes); + g_free(buf); + + return ret; +} + +/* LATER TODO: statify all functions */ + +/* + * Idea of the write support (use snapshot): + * + * 1. check if all data is consistent, recording renames, modifications, + * new files and directories (in s->commits). + * + * 2. if the data is not consistent, stop committing + * + * 3. handle renames, and create new files and directories (do not yet + * write their contents) + * + * 4. walk the directories, fixing the mapping and direntries, and marking + * the handled mappings as not deleted + * + * 5. commit the contents of the files + * + * 6. handle deleted files and directories + * + */ + +typedef struct commit_t { + char* path; + union { + struct { uint32_t cluster; } rename; + struct { int dir_index; uint32_t modified_offset; } writeout; + struct { uint32_t first_cluster; } new_file; + struct { uint32_t cluster; } mkdir; + } param; + /* DELETEs and RMDIRs are handled differently: see handle_deletes() */ + enum { + ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR + } action; +} commit_t; + +static void clear_commits(BDRVVVFATState* s) +{ + int i; +DLOG(fprintf(stderr, "clear_commits (%u commits)\n", s->commits.next)); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + assert(commit->path || commit->action == ACTION_WRITEOUT); + if (commit->action != ACTION_WRITEOUT) { + assert(commit->path); + g_free(commit->path); + } else + assert(commit->path == NULL); + } + s->commits.next = 0; +} + +static void schedule_rename(BDRVVVFATState* s, + uint32_t cluster, char* new_path) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = new_path; + commit->param.rename.cluster = cluster; + commit->action = ACTION_RENAME; +} + +static void schedule_writeout(BDRVVVFATState* s, + int dir_index, uint32_t modified_offset) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = NULL; + commit->param.writeout.dir_index = dir_index; + commit->param.writeout.modified_offset = modified_offset; + commit->action = ACTION_WRITEOUT; +} + +static void schedule_new_file(BDRVVVFATState* s, + char* path, uint32_t first_cluster) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = path; + commit->param.new_file.first_cluster = first_cluster; + commit->action = ACTION_NEW_FILE; +} + +static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = path; + commit->param.mkdir.cluster = cluster; + commit->action = ACTION_MKDIR; +} + +typedef struct { + /* + * Since the sequence number is at most 0x3f, and the filename + * length is at most 13 times the sequence number, the maximal + * filename length is 0x3f * 13 bytes. + */ + unsigned char name[0x3f * 13 + 1]; + gunichar2 name2[0x3f * 13 + 1]; + int checksum, len; + int sequence_number; +} long_file_name; + +static void lfn_init(long_file_name* lfn) +{ + lfn->sequence_number = lfn->len = 0; + lfn->checksum = 0x100; +} + +/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */ +static int parse_long_name(long_file_name* lfn, + const direntry_t* direntry) +{ + int i, j, offset; + const unsigned char* pointer = (const unsigned char*)direntry; + + if (!is_long_name(direntry)) + return 1; + + if (pointer[0] & 0x40) { + /* first entry; do some initialization */ + lfn->sequence_number = pointer[0] & 0x3f; + lfn->checksum = pointer[13]; + lfn->name[0] = 0; + lfn->name[lfn->sequence_number * 13] = 0; + } else if ((pointer[0] & 0x3f) != --lfn->sequence_number) { + /* not the expected sequence number */ + return -1; + } else if (pointer[13] != lfn->checksum) { + /* not the expected checksum */ + return -2; + } else if (pointer[12] || pointer[26] || pointer[27]) { + /* invalid zero fields */ + return -3; + } + + offset = 13 * (lfn->sequence_number - 1); + for (i = 0, j = 1; i < 13; i++, j+=2) { + if (j == 11) + j = 14; + else if (j == 26) + j = 28; + + if (pointer[j] == 0 && pointer[j + 1] == 0) { + /* end of long file name */ + break; + } + gunichar2 c = (pointer[j + 1] << 8) + pointer[j]; + lfn->name2[offset + i] = c; + } + + if (pointer[0] & 0x40) { + /* first entry; set len */ + lfn->len = offset + i; + } + if ((pointer[0] & 0x3f) == 0x01) { + /* last entry; finalize entry */ + glong olen; + gchar *utf8 = g_utf16_to_utf8(lfn->name2, lfn->len, NULL, &olen, NULL); + if (!utf8) { + return -4; + } + lfn->len = olen; + memcpy(lfn->name, utf8, olen + 1); + g_free(utf8); + } + + return 0; +} + +/* returns 0 if successful, >0 if no short_name, and <0 on error */ +static int parse_short_name(BDRVVVFATState* s, + long_file_name* lfn, direntry_t* direntry) +{ + int i, j; + + if (!is_short_name(direntry)) + return 1; + + for (j = 7; j >= 0 && direntry->name[j] == ' '; j--); + for (i = 0; i <= j; i++) { + uint8_t c = direntry->name[i]; + if (c != to_valid_short_char(c)) { + return -1; + } else if (s->downcase_short_names) { + lfn->name[i] = qemu_tolower(direntry->name[i]); + } else { + lfn->name[i] = direntry->name[i]; + } + } + + for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) { + } + if (j >= 0) { + lfn->name[i++] = '.'; + lfn->name[i + j + 1] = '\0'; + for (;j >= 0; j--) { + uint8_t c = direntry->name[8 + j]; + if (c != to_valid_short_char(c)) { + return -2; + } else if (s->downcase_short_names) { + lfn->name[i + j] = qemu_tolower(c); + } else { + lfn->name[i + j] = c; + } + } + } else + lfn->name[i + j + 1] = '\0'; + + if (lfn->name[0] == DIR_KANJI_FAKE) { + lfn->name[0] = DIR_KANJI; + } + lfn->len = strlen((char*)lfn->name); + + return 0; +} + +static inline uint32_t modified_fat_get(BDRVVVFATState* s, + unsigned int cluster) +{ + if (cluster < s->last_cluster_of_root_directory) { + if (cluster + 1 == s->last_cluster_of_root_directory) + return s->max_fat_value; + else + return cluster + 1; + } + + if (s->fat_type==32) { + uint32_t* entry=((uint32_t*)s->fat2)+cluster; + return le32_to_cpu(*entry); + } else if (s->fat_type==16) { + uint16_t* entry=((uint16_t*)s->fat2)+cluster; + return le16_to_cpu(*entry); + } else { + const uint8_t* x=s->fat2+cluster*3/2; + return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; + } +} + +static inline bool cluster_was_modified(BDRVVVFATState *s, + uint32_t cluster_num) +{ + int was_modified = 0; + int i; + + if (s->qcow == NULL) { + return 0; + } + + for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) { + was_modified = bdrv_is_allocated(s->qcow->bs, + (cluster2sector(s, cluster_num) + + i) * BDRV_SECTOR_SIZE, + BDRV_SECTOR_SIZE, NULL); + } + + /* + * Note that this treats failures to learn allocation status the + * same as if an allocation has occurred. It's as safe as + * anything else, given that a failure to learn allocation status + * will probably result in more failures. + */ + return !!was_modified; +} + +static const char* get_basename(const char* path) +{ + char* basename = strrchr(path, '/'); + if (basename == NULL) + return path; + else + return basename + 1; /* strip '/' */ +} + +/* + * The array s->used_clusters holds the states of the clusters. If it is + * part of a file, it has bit 2 set, in case of a directory, bit 1. If it + * was modified, bit 3 is set. + * If any cluster is allocated, but not part of a file or directory, this + * driver refuses to commit. + */ +typedef enum { + USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4 +} used_t; + +/* + * get_cluster_count_for_direntry() not only determines how many clusters + * are occupied by direntry, but also if it was renamed or modified. + * + * A file is thought to be renamed *only* if there already was a file with + * exactly the same first cluster, but a different name. + * + * Further, the files/directories handled by this function are + * assumed to be *not* deleted (and *only* those). + */ +static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, + direntry_t* direntry, const char* path) +{ + /* + * This is a little bit tricky: + * IF the guest OS just inserts a cluster into the file chain, + * and leaves the rest alone, (i.e. the original file had clusters + * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens: + * + * - do_commit will write the cluster into the file at the given + * offset, but + * + * - the cluster which is overwritten should be moved to a later + * position in the file. + * + * I am not aware that any OS does something as braindead, but this + * situation could happen anyway when not committing for a long time. + * Just to be sure that this does not bite us, detect it, and copy the + * contents of the clusters to-be-overwritten into the qcow. + */ + int copy_it = 0; + int was_modified = 0; + int32_t ret = 0; + + uint32_t cluster_num = begin_of_direntry(direntry); + uint32_t offset = 0; + int first_mapping_index = -1; + mapping_t* mapping = NULL; + const char* basename2 = NULL; + + vvfat_close_current_file(s); + + /* the root directory */ + if (cluster_num == 0) + return 0; + + /* write support */ + if (s->qcow) { + basename2 = get_basename(path); + + mapping = find_mapping_for_cluster(s, cluster_num); + + if (mapping) { + const char* basename; + + assert(mapping->mode & MODE_DELETED); + mapping->mode &= ~MODE_DELETED; + + basename = get_basename(mapping->path); + + assert(mapping->mode & MODE_NORMAL); + + /* rename */ + if (strcmp(basename, basename2)) + schedule_rename(s, cluster_num, g_strdup(path)); + } else if (is_file(direntry)) + /* new file */ + schedule_new_file(s, g_strdup(path), cluster_num); + else { + abort(); + return 0; + } + } + + while(1) { + if (s->qcow) { + if (!copy_it && cluster_was_modified(s, cluster_num)) { + if (mapping == NULL || + mapping->begin > cluster_num || + mapping->end <= cluster_num) + mapping = find_mapping_for_cluster(s, cluster_num); + + + if (mapping && + (mapping->mode & MODE_DIRECTORY) == 0) { + + /* was modified in qcow */ + if (offset != mapping->info.file.offset + s->cluster_size + * (cluster_num - mapping->begin)) { + /* offset of this cluster in file chain has changed */ + abort(); + copy_it = 1; + } else if (offset == 0) { + const char* basename = get_basename(mapping->path); + + if (strcmp(basename, basename2)) + copy_it = 1; + first_mapping_index = array_index(&(s->mapping), mapping); + } + + if (mapping->first_mapping_index != first_mapping_index + && mapping->info.file.offset > 0) { + abort(); + copy_it = 1; + } + + /* need to write out? */ + if (!was_modified && is_file(direntry)) { + was_modified = 1; + schedule_writeout(s, mapping->dir_index, offset); + } + } + } + + if (copy_it) { + int i; + /* + * This is horribly inefficient, but that is okay, since + * it is rarely executed, if at all. + */ + int64_t offset = cluster2sector(s, cluster_num); + + vvfat_close_current_file(s); + for (i = 0; i < s->sectors_per_cluster; i++) { + int res; + + res = bdrv_is_allocated(s->qcow->bs, + (offset + i) * BDRV_SECTOR_SIZE, + BDRV_SECTOR_SIZE, NULL); + if (res < 0) { + return -1; + } + if (!res) { + res = vvfat_read(s->bs, offset, s->cluster_buffer, 1); + if (res) { + return -1; + } + res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, + s->cluster_buffer, BDRV_SECTOR_SIZE); + if (res < 0) { + return -2; + } + } + } + } + } + + ret++; + if (s->used_clusters[cluster_num] & USED_ANY) + return 0; + s->used_clusters[cluster_num] = USED_FILE; + + cluster_num = modified_fat_get(s, cluster_num); + + if (fat_eof(s, cluster_num)) + return ret; + else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16) + return -1; + + offset += s->cluster_size; + } +} + +/* + * This function looks at the modified data (qcow). + * It returns 0 upon inconsistency or error, and the number of clusters + * used by the directory, its subdirectories and their files. + */ +static int check_directory_consistency(BDRVVVFATState *s, + int cluster_num, const char* path) +{ + int ret = 0; + unsigned char* cluster = g_malloc(s->cluster_size); + direntry_t* direntries = (direntry_t*)cluster; + mapping_t* mapping = find_mapping_for_cluster(s, cluster_num); + + long_file_name lfn; + int path_len = strlen(path); + char path2[PATH_MAX + 1]; + + assert(path_len < PATH_MAX); /* len was tested before! */ + pstrcpy(path2, sizeof(path2), path); + path2[path_len] = '/'; + path2[path_len + 1] = '\0'; + + if (mapping) { + const char* basename = get_basename(mapping->path); + const char* basename2 = get_basename(path); + + assert(mapping->mode & MODE_DIRECTORY); + + assert(mapping->mode & MODE_DELETED); + mapping->mode &= ~MODE_DELETED; + + if (strcmp(basename, basename2)) + schedule_rename(s, cluster_num, g_strdup(path)); + } else + /* new directory */ + schedule_mkdir(s, cluster_num, g_strdup(path)); + + lfn_init(&lfn); + do { + int i; + int subret = 0; + + ret++; + + if (s->used_clusters[cluster_num] & USED_ANY) { + fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num); + goto fail; + } + s->used_clusters[cluster_num] = USED_DIRECTORY; + +DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num))); + subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster, + s->sectors_per_cluster); + if (subret) { + fprintf(stderr, "Error fetching direntries\n"); + fail: + g_free(cluster); + return 0; + } + + for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) { + int cluster_count = 0; + +DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i)); + if (is_volume_label(direntries + i) || is_dot(direntries + i) || + is_free(direntries + i)) + continue; + + subret = parse_long_name(&lfn, direntries + i); + if (subret < 0) { + fprintf(stderr, "Error in long name\n"); + goto fail; + } + if (subret == 0 || is_free(direntries + i)) + continue; + + if (fat_chksum(direntries+i) != lfn.checksum) { + subret = parse_short_name(s, &lfn, direntries + i); + if (subret < 0) { + fprintf(stderr, "Error in short name (%d)\n", subret); + goto fail; + } + if (subret > 0 || !strcmp((char*)lfn.name, ".") + || !strcmp((char*)lfn.name, "..")) + continue; + } + lfn.checksum = 0x100; /* cannot use long name twice */ + + if (!valid_filename(lfn.name)) { + fprintf(stderr, "Invalid file name\n"); + goto fail; + } + if (path_len + 1 + lfn.len >= PATH_MAX) { + fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name); + goto fail; + } + pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1, + (char*)lfn.name); + + if (is_directory(direntries + i)) { + if (begin_of_direntry(direntries + i) == 0) { + DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i)); + goto fail; + } + cluster_count = check_directory_consistency(s, + begin_of_direntry(direntries + i), path2); + if (cluster_count == 0) { + DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i)); + goto fail; + } + } else if (is_file(direntries + i)) { + /* check file size with FAT */ + cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2); + if (cluster_count != + DIV_ROUND_UP(le32_to_cpu(direntries[i].size), s->cluster_size)) { + DLOG(fprintf(stderr, "Cluster count mismatch\n")); + goto fail; + } + } else + abort(); /* cluster_count = 0; */ + + ret += cluster_count; + } + + cluster_num = modified_fat_get(s, cluster_num); + } while(!fat_eof(s, cluster_num)); + + g_free(cluster); + return ret; +} + +/* returns 1 on success */ +static int is_consistent(BDRVVVFATState* s) +{ + int i, check; + int used_clusters_count = 0; + +DLOG(checkpoint()); + /* + * - get modified FAT + * - compare the two FATs (TODO) + * - get buffer for marking used clusters + * - recurse direntries from root (using bs->bdrv_pread to make + * sure to get the new data) + * - check that the FAT agrees with the size + * - count the number of clusters occupied by this directory and + * its files + * - check that the cumulative used cluster count agrees with the + * FAT + * - if all is fine, return number of used clusters + */ + if (s->fat2 == NULL) { + int size = 0x200 * s->sectors_per_fat; + s->fat2 = g_malloc(size); + memcpy(s->fat2, s->fat.pointer, size); + } + check = vvfat_read(s->bs, + s->offset_to_fat, s->fat2, s->sectors_per_fat); + if (check) { + fprintf(stderr, "Could not copy fat\n"); + return 0; + } + assert (s->used_clusters); + for (i = 0; i < sector2cluster(s, s->sector_count); i++) + s->used_clusters[i] &= ~USED_ANY; + + clear_commits(s); + + /* mark every mapped file/directory as deleted. + * (check_directory_consistency() will unmark those still present). */ + if (s->qcow) + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->first_mapping_index < 0) + mapping->mode |= MODE_DELETED; + } + + used_clusters_count = check_directory_consistency(s, 0, s->path); + if (used_clusters_count <= 0) { + DLOG(fprintf(stderr, "problem in directory\n")); + return 0; + } + + check = s->last_cluster_of_root_directory; + for (i = check; i < sector2cluster(s, s->sector_count); i++) { + if (modified_fat_get(s, i)) { + if(!s->used_clusters[i]) { + DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i)); + return 0; + } + check++; + } + + if (s->used_clusters[i] == USED_ALLOCATED) { + /* allocated, but not used... */ + DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i)); + return 0; + } + } + + if (check != used_clusters_count) + return 0; + + return used_clusters_count; +} + +static inline void adjust_mapping_indices(BDRVVVFATState* s, + int offset, int adjust) +{ + int i; + + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + +#define ADJUST_MAPPING_INDEX(name) \ + if (mapping->name >= offset) \ + mapping->name += adjust + + ADJUST_MAPPING_INDEX(first_mapping_index); + if (mapping->mode & MODE_DIRECTORY) + ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index); + } +} + +/* insert or update mapping */ +static mapping_t* insert_mapping(BDRVVVFATState* s, + uint32_t begin, uint32_t end) +{ + /* + * - find mapping where mapping->begin >= begin, + * - if mapping->begin > begin: insert + * - adjust all references to mappings! + * - else: adjust + * - replace name + */ + int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next); + mapping_t* mapping = NULL; + mapping_t* first_mapping = array_get(&(s->mapping), 0); + + if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index)) + && mapping->begin < begin) { + mapping->end = begin; + index++; + mapping = array_get(&(s->mapping), index); + } + if (index >= s->mapping.next || mapping->begin > begin) { + mapping = array_insert(&(s->mapping), index, 1); + mapping->path = NULL; + adjust_mapping_indices(s, index, +1); + } + + mapping->begin = begin; + mapping->end = end; + +DLOG(mapping_t* next_mapping; +assert(index + 1 >= s->mapping.next || +((next_mapping = array_get(&(s->mapping), index + 1)) && + next_mapping->begin >= end))); + + if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) + s->current_mapping = array_get(&(s->mapping), + s->current_mapping - first_mapping); + + return mapping; +} + +static int remove_mapping(BDRVVVFATState* s, int mapping_index) +{ + mapping_t* mapping = array_get(&(s->mapping), mapping_index); + mapping_t* first_mapping = array_get(&(s->mapping), 0); + + /* free mapping */ + if (mapping->first_mapping_index < 0) { + g_free(mapping->path); + } + + /* remove from s->mapping */ + array_remove(&(s->mapping), mapping_index); + + /* adjust all references to mappings */ + adjust_mapping_indices(s, mapping_index, -1); + + if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) + s->current_mapping = array_get(&(s->mapping), + s->current_mapping - first_mapping); + + return 0; +} + +static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust) +{ + int i; + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->dir_index >= offset) + mapping->dir_index += adjust; + if ((mapping->mode & MODE_DIRECTORY) && + mapping->info.dir.first_dir_index >= offset) + mapping->info.dir.first_dir_index += adjust; + } +} + +static direntry_t* insert_direntries(BDRVVVFATState* s, + int dir_index, int count) +{ + /* + * make room in s->directory, + * adjust_dirindices + */ + direntry_t* result = array_insert(&(s->directory), dir_index, count); + if (result == NULL) + return NULL; + adjust_dirindices(s, dir_index, count); + return result; +} + +static int remove_direntries(BDRVVVFATState* s, int dir_index, int count) +{ + int ret = array_remove_slice(&(s->directory), dir_index, count); + if (ret) + return ret; + adjust_dirindices(s, dir_index, -count); + return 0; +} + +/* + * Adapt the mappings of the cluster chain starting at first cluster + * (i.e. if a file starts at first_cluster, the chain is followed according + * to the modified fat, and the corresponding entries in s->mapping are + * adjusted) + */ +static int commit_mappings(BDRVVVFATState* s, + uint32_t first_cluster, int dir_index) +{ + mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t cluster = first_cluster; + + vvfat_close_current_file(s); + + assert(mapping); + assert(mapping->begin == first_cluster); + mapping->first_mapping_index = -1; + mapping->dir_index = dir_index; + mapping->mode = (dir_index <= 0 || is_directory(direntry)) ? + MODE_DIRECTORY : MODE_NORMAL; + + while (!fat_eof(s, cluster)) { + uint32_t c, c1; + + for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1; + c = c1, c1 = modified_fat_get(s, c1)); + + c++; + if (c > mapping->end) { + int index = array_index(&(s->mapping), mapping); + int i, max_i = s->mapping.next - index; + for (i = 1; i < max_i && mapping[i].begin < c; i++); + while (--i > 0) + remove_mapping(s, index + 1); + } + assert(mapping == array_get(&(s->mapping), s->mapping.next - 1) + || mapping[1].begin >= c); + mapping->end = c; + + if (!fat_eof(s, c1)) { + int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next); + mapping_t* next_mapping = i >= s->mapping.next ? NULL : + array_get(&(s->mapping), i); + + if (next_mapping == NULL || next_mapping->begin > c1) { + int i1 = array_index(&(s->mapping), mapping); + + next_mapping = insert_mapping(s, c1, c1+1); + + if (c1 < c) + i1++; + mapping = array_get(&(s->mapping), i1); + } + + next_mapping->dir_index = mapping->dir_index; + next_mapping->first_mapping_index = + mapping->first_mapping_index < 0 ? + array_index(&(s->mapping), mapping) : + mapping->first_mapping_index; + next_mapping->path = mapping->path; + next_mapping->mode = mapping->mode; + next_mapping->read_only = mapping->read_only; + if (mapping->mode & MODE_DIRECTORY) { + next_mapping->info.dir.parent_mapping_index = + mapping->info.dir.parent_mapping_index; + next_mapping->info.dir.first_dir_index = + mapping->info.dir.first_dir_index + + 0x10 * s->sectors_per_cluster * + (mapping->end - mapping->begin); + } else + next_mapping->info.file.offset = mapping->info.file.offset + + mapping->end - mapping->begin; + + mapping = next_mapping; + } + + cluster = c1; + } + + return 0; +} + +static int commit_direntries(BDRVVVFATState* s, + int dir_index, int parent_mapping_index) +{ + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry); + mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); + int factor = 0x10 * s->sectors_per_cluster; + int old_cluster_count, new_cluster_count; + int current_dir_index; + int first_dir_index; + int ret, i; + uint32_t c; + + assert(direntry); + assert(mapping); + assert(mapping->begin == first_cluster); + assert(mapping->info.dir.first_dir_index < s->directory.next); + assert(mapping->mode & MODE_DIRECTORY); + assert(dir_index == 0 || is_directory(direntry)); + + DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", + mapping->path, parent_mapping_index)); + + current_dir_index = mapping->info.dir.first_dir_index; + first_dir_index = current_dir_index; + mapping->info.dir.parent_mapping_index = parent_mapping_index; + + if (first_cluster == 0) { + old_cluster_count = new_cluster_count = + s->last_cluster_of_root_directory; + } else { + for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c); + c = fat_get(s, c)) + old_cluster_count++; + + for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c); + c = modified_fat_get(s, c)) + new_cluster_count++; + } + + if (new_cluster_count > old_cluster_count) { + if (insert_direntries(s, + current_dir_index + factor * old_cluster_count, + factor * (new_cluster_count - old_cluster_count)) == NULL) + return -1; + } else if (new_cluster_count < old_cluster_count) + remove_direntries(s, + current_dir_index + factor * new_cluster_count, + factor * (old_cluster_count - new_cluster_count)); + + for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) { + direntry_t *first_direntry; + void* direntry = array_get(&(s->directory), current_dir_index); + int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry, + s->sectors_per_cluster); + if (ret) + return ret; + + /* The first directory entry on the filesystem is the volume name */ + first_direntry = (direntry_t*) s->directory.pointer; + assert(!memcmp(first_direntry->name, s->volume_label, 11)); + + current_dir_index += factor; + } + + ret = commit_mappings(s, first_cluster, dir_index); + if (ret) + return ret; + + /* recurse */ + for (i = 0; i < factor * new_cluster_count; i++) { + direntry = array_get(&(s->directory), first_dir_index + i); + if (is_directory(direntry) && !is_dot(direntry)) { + mapping = find_mapping_for_cluster(s, first_cluster); + if (mapping == NULL) { + return -1; + } + assert(mapping->mode & MODE_DIRECTORY); + ret = commit_direntries(s, first_dir_index + i, + array_index(&(s->mapping), mapping)); + if (ret) + return ret; + } + } + + return 0; +} + +/* commit one file (adjust contents, adjust mapping), + return first_mapping_index */ +static int commit_one_file(BDRVVVFATState* s, + int dir_index, uint32_t offset) +{ + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t c = begin_of_direntry(direntry); + uint32_t first_cluster = c; + mapping_t* mapping = find_mapping_for_cluster(s, c); + uint32_t size = filesize_of_direntry(direntry); + char *cluster; + uint32_t i; + int fd = 0; + + assert(offset < size); + assert((offset % s->cluster_size) == 0); + + if (mapping == NULL) { + return -1; + } + + for (i = s->cluster_size; i < offset; i += s->cluster_size) + c = modified_fat_get(s, c); + + fd = qemu_open_old(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666); + if (fd < 0) { + fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path, + strerror(errno), errno); + return fd; + } + if (offset > 0) { + if (lseek(fd, offset, SEEK_SET) != offset) { + qemu_close(fd); + return -3; + } + } + + cluster = g_malloc(s->cluster_size); + + while (offset < size) { + uint32_t c1; + int rest_size = (size - offset > s->cluster_size ? + s->cluster_size : size - offset); + int ret; + + c1 = modified_fat_get(s, c); + + assert((size - offset == 0 && fat_eof(s, c)) || + (size > offset && c >=2 && !fat_eof(s, c))); + + ret = vvfat_read(s->bs, cluster2sector(s, c), + (uint8_t*)cluster, DIV_ROUND_UP(rest_size, 0x200)); + + if (ret < 0) { + qemu_close(fd); + g_free(cluster); + return ret; + } + + if (write(fd, cluster, rest_size) < 0) { + qemu_close(fd); + g_free(cluster); + return -2; + } + + offset += rest_size; + c = c1; + } + + if (ftruncate(fd, size)) { + perror("ftruncate()"); + qemu_close(fd); + g_free(cluster); + return -4; + } + qemu_close(fd); + g_free(cluster); + + return commit_mappings(s, first_cluster, dir_index); +} + +#ifdef DEBUG +/* test, if all mappings point to valid direntries */ +static void check1(BDRVVVFATState* s) +{ + int i; + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->mode & MODE_DELETED) { + fprintf(stderr, "deleted\n"); + continue; + } + assert(mapping->dir_index < s->directory.next); + direntry_t* direntry = array_get(&(s->directory), mapping->dir_index); + assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0); + if (mapping->mode & MODE_DIRECTORY) { + assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next); + assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0); + } + } +} + +/* test, if all direntries have mappings */ +static void check2(BDRVVVFATState* s) +{ + int i; + int first_mapping = -1; + + for (i = 0; i < s->directory.next; i++) { + direntry_t* direntry = array_get(&(s->directory), i); + + if (is_short_name(direntry) && begin_of_direntry(direntry)) { + mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry)); + assert(mapping); + assert(mapping->dir_index == i || is_dot(direntry)); + assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry)); + } + + if ((i % (0x10 * s->sectors_per_cluster)) == 0) { + /* cluster start */ + int j, count = 0; + + for (j = 0; j < s->mapping.next; j++) { + mapping_t* mapping = array_get(&(s->mapping), j); + if (mapping->mode & MODE_DELETED) + continue; + if (mapping->mode & MODE_DIRECTORY) { + if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) { + assert(++count == 1); + if (mapping->first_mapping_index == -1) + first_mapping = array_index(&(s->mapping), mapping); + else + assert(first_mapping == mapping->first_mapping_index); + if (mapping->info.dir.parent_mapping_index < 0) + assert(j == 0); + else { + mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index); + assert(parent->mode & MODE_DIRECTORY); + assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index); + } + } + } + } + if (count == 0) + first_mapping = -1; + } + } +} +#endif + +static int handle_renames_and_mkdirs(BDRVVVFATState* s) +{ + int i; + +#ifdef DEBUG + fprintf(stderr, "handle_renames\n"); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + fprintf(stderr, "%d, %s (%u, %d)\n", i, + commit->path ? commit->path : "(null)", + commit->param.rename.cluster, commit->action); + } +#endif + + for (i = 0; i < s->commits.next;) { + commit_t* commit = array_get(&(s->commits), i); + if (commit->action == ACTION_RENAME) { + mapping_t* mapping = find_mapping_for_cluster(s, + commit->param.rename.cluster); + char *old_path; + + if (mapping == NULL) { + return -1; + } + old_path = mapping->path; + assert(commit->path); + mapping->path = commit->path; + if (rename(old_path, mapping->path)) + return -2; + + if (mapping->mode & MODE_DIRECTORY) { + int l1 = strlen(mapping->path); + int l2 = strlen(old_path); + int diff = l1 - l2; + direntry_t* direntry = array_get(&(s->directory), + mapping->info.dir.first_dir_index); + uint32_t c = mapping->begin; + int i = 0; + + /* recurse */ + while (!fat_eof(s, c)) { + do { + direntry_t* d = direntry + i; + + if (is_file(d) || (is_directory(d) && !is_dot(d))) { + int l; + char *new_path; + mapping_t* m = find_mapping_for_cluster(s, + begin_of_direntry(d)); + if (m == NULL) { + return -1; + } + l = strlen(m->path); + new_path = g_malloc(l + diff + 1); + + assert(!strncmp(m->path, mapping->path, l2)); + + pstrcpy(new_path, l + diff + 1, mapping->path); + pstrcpy(new_path + l1, l + diff + 1 - l1, + m->path + l2); + + schedule_rename(s, m->begin, new_path); + } + i++; + } while((i % (0x10 * s->sectors_per_cluster)) != 0); + c = fat_get(s, c); + } + } + + g_free(old_path); + array_remove(&(s->commits), i); + continue; + } else if (commit->action == ACTION_MKDIR) { + mapping_t* mapping; + int j, parent_path_len; + +#ifdef __MINGW32__ + if (mkdir(commit->path)) + return -5; +#else + if (mkdir(commit->path, 0755)) + return -5; +#endif + + mapping = insert_mapping(s, commit->param.mkdir.cluster, + commit->param.mkdir.cluster + 1); + if (mapping == NULL) + return -6; + + mapping->mode = MODE_DIRECTORY; + mapping->read_only = 0; + mapping->path = commit->path; + j = s->directory.next; + assert(j); + insert_direntries(s, s->directory.next, + 0x10 * s->sectors_per_cluster); + mapping->info.dir.first_dir_index = j; + + parent_path_len = strlen(commit->path) + - strlen(get_basename(commit->path)) - 1; + for (j = 0; j < s->mapping.next; j++) { + mapping_t* m = array_get(&(s->mapping), j); + if (m->first_mapping_index < 0 && m != mapping && + !strncmp(m->path, mapping->path, parent_path_len) && + strlen(m->path) == parent_path_len) + break; + } + assert(j < s->mapping.next); + mapping->info.dir.parent_mapping_index = j; + + array_remove(&(s->commits), i); + continue; + } + + i++; + } + return 0; +} + +/* + * TODO: make sure that the short name is not matching *another* file + */ +static int handle_commits(BDRVVVFATState* s) +{ + int i, fail = 0; + + vvfat_close_current_file(s); + + for (i = 0; !fail && i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + switch(commit->action) { + case ACTION_RENAME: case ACTION_MKDIR: + abort(); + fail = -2; + break; + case ACTION_WRITEOUT: { +#ifndef NDEBUG + /* these variables are only used by assert() below */ + direntry_t* entry = array_get(&(s->directory), + commit->param.writeout.dir_index); + uint32_t begin = begin_of_direntry(entry); + mapping_t* mapping = find_mapping_for_cluster(s, begin); +#endif + + assert(mapping); + assert(mapping->begin == begin); + assert(commit->path == NULL); + + if (commit_one_file(s, commit->param.writeout.dir_index, + commit->param.writeout.modified_offset)) + fail = -3; + + break; + } + case ACTION_NEW_FILE: { + int begin = commit->param.new_file.first_cluster; + mapping_t* mapping = find_mapping_for_cluster(s, begin); + direntry_t* entry; + int i; + + /* find direntry */ + for (i = 0; i < s->directory.next; i++) { + entry = array_get(&(s->directory), i); + if (is_file(entry) && begin_of_direntry(entry) == begin) + break; + } + + if (i >= s->directory.next) { + fail = -6; + continue; + } + + /* make sure there exists an initial mapping */ + if (mapping && mapping->begin != begin) { + mapping->end = begin; + mapping = NULL; + } + if (mapping == NULL) { + mapping = insert_mapping(s, begin, begin+1); + } + /* most members will be fixed in commit_mappings() */ + assert(commit->path); + mapping->path = commit->path; + mapping->read_only = 0; + mapping->mode = MODE_NORMAL; + mapping->info.file.offset = 0; + + if (commit_one_file(s, i, 0)) + fail = -7; + + break; + } + default: + abort(); + } + } + if (i > 0 && array_remove_slice(&(s->commits), 0, i)) + return -1; + return fail; +} + +static int handle_deletes(BDRVVVFATState* s) +{ + int i, deferred = 1, deleted = 1; + + /* delete files corresponding to mappings marked as deleted */ + /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */ + while (deferred && deleted) { + deferred = 0; + deleted = 0; + + for (i = 1; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->mode & MODE_DELETED) { + direntry_t* entry = array_get(&(s->directory), + mapping->dir_index); + + if (is_free(entry)) { + /* remove file/directory */ + if (mapping->mode & MODE_DIRECTORY) { + int j, next_dir_index = s->directory.next, + first_dir_index = mapping->info.dir.first_dir_index; + + if (rmdir(mapping->path) < 0) { + if (errno == ENOTEMPTY) { + deferred++; + continue; + } else + return -5; + } + + for (j = 1; j < s->mapping.next; j++) { + mapping_t* m = array_get(&(s->mapping), j); + if (m->mode & MODE_DIRECTORY && + m->info.dir.first_dir_index > + first_dir_index && + m->info.dir.first_dir_index < + next_dir_index) + next_dir_index = + m->info.dir.first_dir_index; + } + remove_direntries(s, first_dir_index, + next_dir_index - first_dir_index); + + deleted++; + } + } else { + if (unlink(mapping->path)) + return -4; + deleted++; + } + DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry)); + remove_mapping(s, i); + } + } + } + + return 0; +} + +/* + * synchronize mapping with new state: + * + * - copy FAT (with bdrv_pread) + * - mark all filenames corresponding to mappings as deleted + * - recurse direntries from root (using bs->bdrv_pread) + * - delete files corresponding to mappings marked as deleted + */ +static int do_commit(BDRVVVFATState* s) +{ + int ret = 0; + + /* the real meat are the commits. Nothing to do? Move along! */ + if (s->commits.next == 0) + return 0; + + vvfat_close_current_file(s); + + ret = handle_renames_and_mkdirs(s); + if (ret) { + fprintf(stderr, "Error handling renames (%d)\n", ret); + abort(); + return ret; + } + + /* copy FAT (with bdrv_pread) */ + memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat); + + /* recurse direntries from root (using bs->bdrv_pread) */ + ret = commit_direntries(s, 0, -1); + if (ret) { + fprintf(stderr, "Fatal: error while committing (%d)\n", ret); + abort(); + return ret; + } + + ret = handle_commits(s); + if (ret) { + fprintf(stderr, "Error handling commits (%d)\n", ret); + abort(); + return ret; + } + + ret = handle_deletes(s); + if (ret) { + fprintf(stderr, "Error deleting\n"); + abort(); + return ret; + } + + bdrv_make_empty(s->qcow, NULL); + + memset(s->used_clusters, 0, sector2cluster(s, s->sector_count)); + +DLOG(checkpoint()); + return 0; +} + +static int try_commit(BDRVVVFATState* s) +{ + vvfat_close_current_file(s); +DLOG(checkpoint()); + if(!is_consistent(s)) + return -1; + return do_commit(s); +} + +static int vvfat_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVVVFATState *s = bs->opaque; + int i, ret; + +DLOG(checkpoint()); + + /* Check if we're operating in read-only mode */ + if (s->qcow == NULL) { + return -EACCES; + } + + vvfat_close_current_file(s); + + /* + * Some sanity checks: + * - do not allow writing to the boot sector + */ + + if (sector_num < s->offset_to_fat) + return -1; + + for (i = sector2cluster(s, sector_num); + i <= sector2cluster(s, sector_num + nb_sectors - 1);) { + mapping_t* mapping = find_mapping_for_cluster(s, i); + if (mapping) { + if (mapping->read_only) { + fprintf(stderr, "Tried to write to write-protected file %s\n", + mapping->path); + return -1; + } + + if (mapping->mode & MODE_DIRECTORY) { + int begin = cluster2sector(s, i); + int end = begin + s->sectors_per_cluster, k; + int dir_index; + const direntry_t* direntries; + long_file_name lfn; + + lfn_init(&lfn); + + if (begin < sector_num) + begin = sector_num; + if (end > sector_num + nb_sectors) + end = sector_num + nb_sectors; + dir_index = mapping->dir_index + + 0x10 * (begin - mapping->begin * s->sectors_per_cluster); + direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num)); + + for (k = 0; k < (end - begin) * 0x10; k++) { + /* no access to the direntry of a read-only file */ + if (is_short_name(direntries + k) && + (direntries[k].attributes & 1)) { + if (memcmp(direntries + k, + array_get(&(s->directory), dir_index + k), + sizeof(direntry_t))) { + warn_report("tried to write to write-protected " + "file"); + return -1; + } + } + } + } + i = mapping->end; + } else + i++; + } + + /* + * Use qcow backend. Commit later. + */ +DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); + ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf, + nb_sectors * BDRV_SECTOR_SIZE); + if (ret < 0) { + fprintf(stderr, "Error writing to qcow backend\n"); + return ret; + } + + for (i = sector2cluster(s, sector_num); + i <= sector2cluster(s, sector_num + nb_sectors - 1); i++) + if (i >= 0) + s->used_clusters[i] |= USED_ALLOCATED; + +DLOG(checkpoint()); + /* TODO: add timeout */ + try_commit(s); + +DLOG(checkpoint()); + return 0; +} + +static int coroutine_fn +vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret; + BDRVVVFATState *s = bs->opaque; + uint64_t sector_num = offset >> BDRV_SECTOR_BITS; + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + void *buf; + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + buf = g_try_malloc(bytes); + if (bytes && buf == NULL) { + return -ENOMEM; + } + qemu_iovec_to_buf(qiov, 0, buf, bytes); + + qemu_co_mutex_lock(&s->lock); + ret = vvfat_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + + g_free(buf); + + return ret; +} + +static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *n, + int64_t *map, + BlockDriverState **file) +{ + *n = bytes; + return BDRV_BLOCK_DATA; +} + +static int coroutine_fn +write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + int ret; + + BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); + qemu_co_mutex_lock(&s->lock); + ret = try_commit(s); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + +static BlockDriver vvfat_write_target = { + .format_name = "vvfat_write_target", + .instance_size = sizeof(void*), + .bdrv_co_pwritev = write_target_commit, +}; + +static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "off"); + qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); +} + +static const BdrvChildClass child_vvfat_qcow = { + .parent_is_bds = true, + .inherit_options = vvfat_qcow_options, +}; + +static int enable_write_target(BlockDriverState *bs, Error **errp) +{ + BDRVVVFATState *s = bs->opaque; + BlockDriver *bdrv_qcow = NULL; + BlockDriverState *backing; + QemuOpts *opts = NULL; + int ret; + int size = sector2cluster(s, s->sector_count); + QDict *options; + + s->used_clusters = calloc(size, 1); + + array_init(&(s->commits), sizeof(commit_t)); + + s->qcow_filename = g_malloc(PATH_MAX); + ret = get_tmp_filename(s->qcow_filename, PATH_MAX); + if (ret < 0) { + error_setg_errno(errp, -ret, "can't create temporary file"); + goto err; + } + + bdrv_qcow = bdrv_find_format("qcow"); + if (!bdrv_qcow) { + error_setg(errp, "Failed to locate qcow driver"); + ret = -ENOENT; + goto err; + } + + opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512, + &error_abort); + qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort); + + ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp); + qemu_opts_del(opts); + if (ret < 0) { + goto err; + } + + options = qdict_new(); + qdict_put_str(options, "write-target.driver", "qcow"); + s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs, + &child_vvfat_qcow, + BDRV_CHILD_DATA | BDRV_CHILD_METADATA, + false, errp); + qobject_unref(options); + if (!s->qcow) { + ret = -EINVAL; + goto err; + } + +#ifndef _WIN32 + unlink(s->qcow_filename); +#endif + + backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR, + &error_abort); + *(void**) backing->opaque = s; + + bdrv_set_backing_hd(s->bs, backing, &error_abort); + bdrv_unref(backing); + + return 0; + +err: + g_free(s->qcow_filename); + s->qcow_filename = NULL; + return ret; +} + +static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVVVFATState *s = bs->opaque; + + assert(c == s->qcow || (role & BDRV_CHILD_COW)); + + if (c == s->qcow) { + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; + } else { + /* The backing file is there so 'commit' can use it. vvfat doesn't + * access it in any way. */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + } +} + +static void vvfat_close(BlockDriverState *bs) +{ + BDRVVVFATState *s = bs->opaque; + + vvfat_close_current_file(s); + array_free(&(s->fat)); + array_free(&(s->directory)); + array_free(&(s->mapping)); + g_free(s->cluster_buffer); + + if (s->qcow) { + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); + } +} + +static const char *const vvfat_strong_runtime_opts[] = { + "dir", + "fat-type", + "floppy", + "label", + "rw", + + NULL +}; + +static BlockDriver bdrv_vvfat = { + .format_name = "vvfat", + .protocol_name = "fat", + .instance_size = sizeof(BDRVVVFATState), + + .bdrv_parse_filename = vvfat_parse_filename, + .bdrv_file_open = vvfat_open, + .bdrv_refresh_limits = vvfat_refresh_limits, + .bdrv_close = vvfat_close, + .bdrv_child_perm = vvfat_child_perm, + + .bdrv_co_preadv = vvfat_co_preadv, + .bdrv_co_pwritev = vvfat_co_pwritev, + .bdrv_co_block_status = vvfat_co_block_status, + + .strong_runtime_opts = vvfat_strong_runtime_opts, +}; + +static void bdrv_vvfat_init(void) +{ + bdrv_register(&bdrv_vvfat); +} + +block_init(bdrv_vvfat_init); + +#ifdef DEBUG +static void checkpoint(void) +{ + assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); + check1(vvv); + check2(vvv); + assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); +} +#endif diff --git a/block/win32-aio.c b/block/win32-aio.c new file mode 100644 index 000000000..b7221a272 --- /dev/null +++ b/block/win32-aio.c @@ -0,0 +1,216 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "block/block_int.h" +#include "block/aio.h" +#include "block/raw-aio.h" +#include "qemu/event_notifier.h" +#include "qemu/iov.h" +#include +#include + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +struct QEMUWin32AIOState { + HANDLE hIOCP; + EventNotifier e; + int count; + AioContext *aio_ctx; +}; + +typedef struct QEMUWin32AIOCB { + BlockAIOCB common; + struct QEMUWin32AIOState *ctx; + int nbytes; + OVERLAPPED ov; + QEMUIOVector *qiov; + void *buf; + bool is_read; + bool is_linear; +} QEMUWin32AIOCB; + +/* + * Completes an AIO request (calls the callback and frees the ACB). + */ +static void win32_aio_process_completion(QEMUWin32AIOState *s, + QEMUWin32AIOCB *waiocb, DWORD count) +{ + int ret; + s->count--; + + if (waiocb->ov.Internal != 0) { + ret = -EIO; + } else { + ret = 0; + if (count < waiocb->nbytes) { + /* Short reads mean EOF, pad with zeros. */ + if (waiocb->is_read) { + qemu_iovec_memset(waiocb->qiov, count, 0, + waiocb->qiov->size - count); + } else { + ret = -EINVAL; + } + } + } + + if (!waiocb->is_linear) { + if (ret == 0 && waiocb->is_read) { + QEMUIOVector *qiov = waiocb->qiov; + iov_from_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size); + } + qemu_vfree(waiocb->buf); + } + + waiocb->common.cb(waiocb->common.opaque, ret); + qemu_aio_unref(waiocb); +} + +static void win32_aio_completion_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + DWORD count; + ULONG_PTR key; + OVERLAPPED *ov; + + event_notifier_test_and_clear(&s->e); + while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) { + QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov); + + win32_aio_process_completion(s, waiocb, count); + } +} + +static const AIOCBInfo win32_aiocb_info = { + .aiocb_size = sizeof(QEMUWin32AIOCB), +}; + +BlockAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + BlockCompletionFunc *cb, void *opaque, int type) +{ + struct QEMUWin32AIOCB *waiocb; + DWORD rc; + + waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque); + waiocb->nbytes = bytes; + waiocb->qiov = qiov; + waiocb->is_read = (type == QEMU_AIO_READ); + + if (qiov->niov > 1) { + waiocb->buf = qemu_try_blockalign(bs, qiov->size); + if (waiocb->buf == NULL) { + goto out; + } + if (type & QEMU_AIO_WRITE) { + iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size); + } + waiocb->is_linear = false; + } else { + waiocb->buf = qiov->iov[0].iov_base; + waiocb->is_linear = true; + } + + memset(&waiocb->ov, 0, sizeof(waiocb->ov)); + waiocb->ov.Offset = (DWORD)offset; + waiocb->ov.OffsetHigh = (DWORD)(offset >> 32); + waiocb->ov.hEvent = event_notifier_get_handle(&aio->e); + + aio->count++; + + if (type & QEMU_AIO_READ) { + rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } else { + rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } + if(rc == 0 && GetLastError() != ERROR_IO_PENDING) { + goto out_dec_count; + } + return &waiocb->common; + +out_dec_count: + aio->count--; +out: + qemu_aio_unref(waiocb); + return NULL; +} + +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) +{ + if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) { + return -EINVAL; + } else { + return 0; + } +} + +void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, + AioContext *old_context) +{ + aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL); + aio->aio_ctx = NULL; +} + +void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, + AioContext *new_context) +{ + aio->aio_ctx = new_context; + aio_set_event_notifier(new_context, &aio->e, false, + win32_aio_completion_cb, NULL); +} + +QEMUWin32AIOState *win32_aio_init(void) +{ + QEMUWin32AIOState *s; + + s = g_malloc0(sizeof(*s)); + if (event_notifier_init(&s->e, false) < 0) { + goto out_free_state; + } + + s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (s->hIOCP == NULL) { + goto out_close_efd; + } + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +} + +void win32_aio_cleanup(QEMUWin32AIOState *aio) +{ + assert(!aio->aio_ctx); + CloseHandle(aio->hIOCP); + event_notifier_cleanup(&aio->e); + g_free(aio); +} diff --git a/block/write-threshold.c b/block/write-threshold.c new file mode 100644 index 000000000..85b78dc2a --- /dev/null +++ b/block/write-threshold.c @@ -0,0 +1,124 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/coroutine.h" +#include "block/write-threshold.h" +#include "qemu/notify.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-block-core.h" +#include "qapi/qapi-events-block-core.h" + +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) +{ + return bs->write_threshold_offset; +} + +bool bdrv_write_threshold_is_set(const BlockDriverState *bs) +{ + return bs->write_threshold_offset > 0; +} + +static void write_threshold_disable(BlockDriverState *bs) +{ + if (bdrv_write_threshold_is_set(bs)) { + notifier_with_return_remove(&bs->write_threshold_notifier); + bs->write_threshold_offset = 0; + } +} + +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (req->offset > bs->write_threshold_offset) { + return (req->offset - bs->write_threshold_offset) + req->bytes; + } + if ((req->offset + req->bytes) > bs->write_threshold_offset) { + return (req->offset + req->bytes) - bs->write_threshold_offset; + } + } + return 0; +} + +static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, + void *opaque) +{ + BdrvTrackedRequest *req = opaque; + BlockDriverState *bs = req->bs; + uint64_t amount = 0; + + amount = bdrv_write_threshold_exceeded(bs, req); + if (amount > 0) { + qapi_event_send_block_write_threshold( + bs->node_name, + amount, + bs->write_threshold_offset); + + /* autodisable to avoid flooding the monitor */ + write_threshold_disable(bs); + } + + return 0; /* should always let other notifiers run */ +} + +static void write_threshold_register_notifier(BlockDriverState *bs) +{ + bs->write_threshold_notifier.notify = before_write_notify; + bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier); +} + +static void write_threshold_update(BlockDriverState *bs, + int64_t threshold_bytes) +{ + bs->write_threshold_offset = threshold_bytes; +} + +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (threshold_bytes > 0) { + write_threshold_update(bs, threshold_bytes); + } else { + write_threshold_disable(bs); + } + } else { + if (threshold_bytes > 0) { + /* avoid multiple registration */ + write_threshold_register_notifier(bs); + write_threshold_update(bs, threshold_bytes); + } + /* discard bogus disable request */ + } +} + +void qmp_block_set_write_threshold(const char *node_name, + uint64_t threshold_bytes, + Error **errp) +{ + BlockDriverState *bs; + AioContext *aio_context; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Device '%s' not found", node_name); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + bdrv_write_threshold_set(bs, threshold_bytes); + + aio_context_release(aio_context); +} diff --git a/blockdev-nbd.c b/blockdev-nbd.c new file mode 100644 index 000000000..b264620b9 --- /dev/null +++ b/blockdev-nbd.c @@ -0,0 +1,278 @@ +/* + * Serving QEMU block devices via NBD + * + * Copyright (c) 2012 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "qapi/error.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-block-export.h" +#include "qapi/qapi-commands-block-export.h" +#include "block/nbd.h" +#include "io/channel-socket.h" +#include "io/net-listener.h" + +typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; +} NBDServerData; + +static NBDServerData *nbd_server; +static bool is_qemu_nbd; + +static void nbd_update_server_watch(NBDServerData *s); + +void nbd_server_is_qemu_nbd(bool value) +{ + is_qemu_nbd = value; +} + +bool nbd_server_is_running(void) +{ + return nbd_server || is_qemu_nbd; +} + +static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +{ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; + nbd_update_server_watch(nbd_server); +} + +static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) +{ + nbd_server->connections++; + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, + nbd_blockdev_client_closed); +} + +static void nbd_update_server_watch(NBDServerData *s) +{ + if (!s->max_connections || s->connections < s->max_connections) { + qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL); + } else { + qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); + } +} + +static void nbd_server_free(NBDServerData *server) +{ + if (!server) { + return; + } + + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } + g_free(server->tlsauthz); + + g_free(server); +} + +static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp) +{ + Object *obj; + QCryptoTLSCreds *creds; + + obj = object_resolve_path_component( + object_get_objects_root(), id); + if (!obj) { + error_setg(errp, "No TLS credentials with id '%s'", + id); + return NULL; + } + creds = (QCryptoTLSCreds *) + object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS); + if (!creds) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + id); + return NULL; + } + + if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + error_setg(errp, + "Expecting TLS credentials with a server endpoint"); + return NULL; + } + object_ref(obj); + return creds; +} + + +void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, + Error **errp) +{ + if (nbd_server) { + error_setg(errp, "NBD server already running"); + return; + } + + nbd_server = g_new0(NBDServerData, 1); + nbd_server->max_connections = max_connections; + nbd_server->listener = qio_net_listener_new(); + + qio_net_listener_set_name(nbd_server->listener, + "nbd-listener"); + + /* + * Because this server is persistent, a backlog of SOMAXCONN is + * better than trying to size it to max_connections. + */ + if (qio_net_listener_open_sync(nbd_server->listener, addr, SOMAXCONN, + errp) < 0) { + goto error; + } + + if (tls_creds) { + nbd_server->tlscreds = nbd_get_tls_creds(tls_creds, errp); + if (!nbd_server->tlscreds) { + goto error; + } + + /* TODO SOCKET_ADDRESS_TYPE_FD where fd has AF_INET or AF_INET6 */ + if (addr->type != SOCKET_ADDRESS_TYPE_INET) { + error_setg(errp, "TLS is only supported with IPv4/IPv6"); + goto error; + } + } + + nbd_server->tlsauthz = g_strdup(tls_authz); + + nbd_update_server_watch(nbd_server); + + return; + + error: + nbd_server_free(nbd_server); + nbd_server = NULL; +} + +void nbd_server_start_options(NbdServerOptions *arg, Error **errp) +{ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); +} + +void qmp_nbd_server_start(SocketAddressLegacy *addr, + bool has_tls_creds, const char *tls_creds, + bool has_tls_authz, const char *tls_authz, + bool has_max_connections, uint32_t max_connections, + Error **errp) +{ + SocketAddress *addr_flat = socket_address_flatten(addr); + + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); +} + +void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp) +{ + BlockExport *export; + BlockDriverState *bs; + BlockBackend *on_eject_blk; + BlockExportOptions *export_opts; + + bs = bdrv_lookup_bs(arg->device, arg->device, errp); + if (!bs) { + return; + } + + /* + * block-export-add would default to the node-name, but we may have to use + * the device name as a default here for compatibility. + */ + if (!arg->has_name) { + arg->has_name = true; + arg->name = g_strdup(arg->device); + } + + export_opts = g_new(BlockExportOptions, 1); + *export_opts = (BlockExportOptions) { + .type = BLOCK_EXPORT_TYPE_NBD, + .id = g_strdup(arg->name), + .node_name = g_strdup(bdrv_get_node_name(bs)), + .has_writable = arg->has_writable, + .writable = arg->writable, + }; + QAPI_CLONE_MEMBERS(BlockExportOptionsNbdBase, &export_opts->u.nbd, + qapi_NbdServerAddOptions_base(arg)); + if (arg->has_bitmap) { + export_opts->u.nbd.has_bitmaps = true; + QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, g_strdup(arg->bitmap)); + } + + /* + * nbd-server-add doesn't complain when a read-only device should be + * exported as writable, but simply downgrades it. This is an error with + * block-export-add. + */ + if (bdrv_is_read_only(bs)) { + export_opts->has_writable = true; + export_opts->writable = false; + } + + export = blk_exp_add(export_opts, errp); + if (!export) { + goto fail; + } + + /* + * nbd-server-add removes the export when the named BlockBackend used for + * @device goes away. + */ + on_eject_blk = blk_by_name(arg->device); + if (on_eject_blk) { + nbd_export_set_on_eject_blk(export, on_eject_blk); + } + +fail: + qapi_free_BlockExportOptions(export_opts); +} + +void qmp_nbd_server_remove(const char *name, + bool has_mode, BlockExportRemoveMode mode, + Error **errp) +{ + BlockExport *exp; + + exp = blk_exp_find(name); + if (exp && exp->drv->type != BLOCK_EXPORT_TYPE_NBD) { + error_setg(errp, "Block export '%s' is not an NBD export", name); + return; + } + + qmp_block_export_del(name, has_mode, mode, errp); +} + +void qmp_nbd_server_stop(Error **errp) +{ + if (!nbd_server) { + error_setg(errp, "NBD server not running"); + return; + } + + blk_exp_close_all_type(BLOCK_EXPORT_TYPE_NBD); + + nbd_server_free(nbd_server); + nbd_server = NULL; +} diff --git a/blockdev.c b/blockdev.c new file mode 100644 index 000000000..e4dfa65aa --- /dev/null +++ b/blockdev.c @@ -0,0 +1,3816 @@ +/* + * QEMU host block devices + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/block/block.h" +#include "block/blockjob.h" +#include "block/qdict.h" +#include "block/throttle-groups.h" +#include "monitor/monitor.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/qemu-print.h" +#include "qemu/config-file.h" +#include "qapi/qapi-commands-block.h" +#include "qapi/qapi-commands-transaction.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qnum.h" +#include "qapi/qmp/qstring.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qobject-output-visitor.h" +#include "sysemu/sysemu.h" +#include "sysemu/iothread.h" +#include "block/block_int.h" +#include "block/trace.h" +#include "sysemu/arch_init.h" +#include "sysemu/qtest.h" +#include "sysemu/runstate.h" +#include "sysemu/replay.h" +#include "qemu/cutils.h" +#include "qemu/help_option.h" +#include "qemu/main-loop.h" +#include "qemu/throttle-options.h" + +QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states = + QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states); + +void bdrv_set_monitor_owned(BlockDriverState *bs) +{ + QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list); +} + +static const char *const if_name[IF_COUNT] = { + [IF_NONE] = "none", + [IF_IDE] = "ide", + [IF_SCSI] = "scsi", + [IF_FLOPPY] = "floppy", + [IF_PFLASH] = "pflash", + [IF_MTD] = "mtd", + [IF_SD] = "sd", + [IF_VIRTIO] = "virtio", + [IF_XEN] = "xen", +}; + +static int if_max_devs[IF_COUNT] = { + /* + * Do not change these numbers! They govern how drive option + * index maps to unit and bus. That mapping is ABI. + * + * All controllers used to implement if=T drives need to support + * if_max_devs[T] units, for any T with if_max_devs[T] != 0. + * Otherwise, some index values map to "impossible" bus, unit + * values. + * + * For instance, if you change [IF_SCSI] to 255, -drive + * if=scsi,index=12 no longer means bus=1,unit=5, but + * bus=0,unit=12. With an lsi53c895a controller (7 units max), + * the drive can't be set up. Regression. + */ + [IF_IDE] = 2, + [IF_SCSI] = 7, +}; + +/** + * Boards may call this to offer board-by-board overrides + * of the default, global values. + */ +void override_max_devs(BlockInterfaceType type, int max_devs) +{ + BlockBackend *blk; + DriveInfo *dinfo; + + if (max_devs <= 0) { + return; + } + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + dinfo = blk_legacy_dinfo(blk); + if (dinfo->type == type) { + fprintf(stderr, "Cannot override units-per-bus property of" + " the %s interface, because a drive of that type has" + " already been added.\n", if_name[type]); + g_assert_not_reached(); + } + } + + if_max_devs[type] = max_devs; +} + +/* + * We automatically delete the drive when a device using it gets + * unplugged. Questionable feature, but we can't just drop it. + * Device models call blockdev_mark_auto_del() to schedule the + * automatic deletion, and generic qdev code calls blockdev_auto_del() + * when deletion is actually safe. + */ +void blockdev_mark_auto_del(BlockBackend *blk) +{ + DriveInfo *dinfo = blk_legacy_dinfo(blk); + BlockJob *job; + + if (!dinfo) { + return; + } + + for (job = block_job_next(NULL); job; job = block_job_next(job)) { + if (block_job_has_bdrv(job, blk_bs(blk))) { + AioContext *aio_context = job->job.aio_context; + aio_context_acquire(aio_context); + + job_cancel(&job->job, false); + + aio_context_release(aio_context); + } + } + + dinfo->auto_del = 1; +} + +void blockdev_auto_del(BlockBackend *blk) +{ + DriveInfo *dinfo = blk_legacy_dinfo(blk); + + if (dinfo && dinfo->auto_del) { + monitor_remove_blk(blk); + blk_unref(blk); + } +} + +/** + * Returns the current mapping of how many units per bus + * a particular interface can support. + * + * A positive integer indicates n units per bus. + * 0 implies the mapping has not been established. + * -1 indicates an invalid BlockInterfaceType was given. + */ +int drive_get_max_devs(BlockInterfaceType type) +{ + if (type >= IF_IDE && type < IF_COUNT) { + return if_max_devs[type]; + } + + return -1; +} + +static int drive_index_to_bus_id(BlockInterfaceType type, int index) +{ + int max_devs = if_max_devs[type]; + return max_devs ? index / max_devs : 0; +} + +static int drive_index_to_unit_id(BlockInterfaceType type, int index) +{ + int max_devs = if_max_devs[type]; + return max_devs ? index % max_devs : index; +} + +QemuOpts *drive_def(const char *optstr) +{ + return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); +} + +QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, + const char *optstr) +{ + QemuOpts *opts; + + opts = drive_def(optstr); + if (!opts) { + return NULL; + } + if (type != IF_DEFAULT) { + qemu_opt_set(opts, "if", if_name[type], &error_abort); + } + if (index >= 0) { + qemu_opt_set_number(opts, "index", index, &error_abort); + } + if (file) + qemu_opt_set(opts, "file", file, &error_abort); + return opts; +} + +DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit) +{ + BlockBackend *blk; + DriveInfo *dinfo; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + dinfo = blk_legacy_dinfo(blk); + if (dinfo && dinfo->type == type + && dinfo->bus == bus && dinfo->unit == unit) { + return dinfo; + } + } + + return NULL; +} + +void drive_mark_claimed_by_board(void) +{ + BlockBackend *blk; + DriveInfo *dinfo; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + dinfo = blk_legacy_dinfo(blk); + if (dinfo && blk_get_attached_dev(blk)) { + dinfo->claimed_by_board = true; + } + } +} + +void drive_check_orphaned(void) +{ + BlockBackend *blk; + DriveInfo *dinfo; + Location loc; + bool orphans = false; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + dinfo = blk_legacy_dinfo(blk); + if (dinfo->is_default || dinfo->type == IF_NONE) { + continue; + } + if (!blk_get_attached_dev(blk)) { + loc_push_none(&loc); + qemu_opts_loc_restore(dinfo->opts); + error_report("machine type does not support" + " if=%s,bus=%d,unit=%d", + if_name[dinfo->type], dinfo->bus, dinfo->unit); + loc_pop(&loc); + orphans = true; + continue; + } + if (!dinfo->claimed_by_board && dinfo->type != IF_VIRTIO) { + loc_push_none(&loc); + qemu_opts_loc_restore(dinfo->opts); + warn_report("bogus if=%s is deprecated, use if=none", + if_name[dinfo->type]); + loc_pop(&loc); + } + } + + if (orphans) { + exit(1); + } +} + +DriveInfo *drive_get_by_index(BlockInterfaceType type, int index) +{ + return drive_get(type, + drive_index_to_bus_id(type, index), + drive_index_to_unit_id(type, index)); +} + +int drive_get_max_bus(BlockInterfaceType type) +{ + int max_bus; + BlockBackend *blk; + DriveInfo *dinfo; + + max_bus = -1; + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + dinfo = blk_legacy_dinfo(blk); + if (dinfo && dinfo->type == type && dinfo->bus > max_bus) { + max_bus = dinfo->bus; + } + } + return max_bus; +} + +/* Get a block device. This should only be used for single-drive devices + (e.g. SD/Floppy/MTD). Multi-disk devices (scsi/ide) should use the + appropriate bus. */ +DriveInfo *drive_get_next(BlockInterfaceType type) +{ + static int next_block_unit[IF_COUNT]; + + return drive_get(type, 0, next_block_unit[type]++); +} + +static void bdrv_format_print(void *opaque, const char *name) +{ + qemu_printf(" %s", name); +} + +typedef struct { + QEMUBH *bh; + BlockDriverState *bs; +} BDRVPutRefBH; + +static int parse_block_error_action(const char *buf, bool is_read, Error **errp) +{ + if (!strcmp(buf, "ignore")) { + return BLOCKDEV_ON_ERROR_IGNORE; + } else if (!is_read && !strcmp(buf, "enospc")) { + return BLOCKDEV_ON_ERROR_ENOSPC; + } else if (!strcmp(buf, "stop")) { + return BLOCKDEV_ON_ERROR_STOP; + } else if (!strcmp(buf, "report")) { + return BLOCKDEV_ON_ERROR_REPORT; + } else { + error_setg(errp, "'%s' invalid %s error action", + buf, is_read ? "read" : "write"); + return -1; + } +} + +static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals, + Error **errp) +{ + const QListEntry *entry; + for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) { + switch (qobject_type(entry->value)) { + + case QTYPE_QSTRING: { + unsigned long long length; + const char *str = qstring_get_str(qobject_to(QString, + entry->value)); + if (parse_uint_full(str, &length, 10) == 0 && + length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %s", str); + return false; + } + break; + } + + case QTYPE_QNUM: { + int64_t length = qnum_get_int(qobject_to(QNum, entry->value)); + + if (length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %" PRId64, length); + return false; + } + break; + } + + default: + error_setg(errp, "The specification of stats-intervals is invalid"); + return false; + } + } + return true; +} + +typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType; + +/* All parameters but @opts are optional and may be set to NULL. */ +static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, + const char **throttling_group, ThrottleConfig *throttle_cfg, + BlockdevDetectZeroesOptions *detect_zeroes, Error **errp) +{ + Error *local_error = NULL; + const char *aio; + + if (bdrv_flags) { + if (qemu_opt_get_bool(opts, "copy-on-read", false)) { + *bdrv_flags |= BDRV_O_COPY_ON_READ; + } + + if ((aio = qemu_opt_get(opts, "aio")) != NULL) { + if (bdrv_parse_aio(aio, bdrv_flags) < 0) { + error_setg(errp, "invalid aio option"); + return; + } + } + } + + /* disk I/O throttling */ + if (throttling_group) { + *throttling_group = qemu_opt_get(opts, "throttling.group"); + } + + if (throttle_cfg) { + throttle_config_init(throttle_cfg); + throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.bps-total", 0); + throttle_cfg->buckets[THROTTLE_BPS_READ].avg = + qemu_opt_get_number(opts, "throttling.bps-read", 0); + throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.bps-write", 0); + throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.iops-total", 0); + throttle_cfg->buckets[THROTTLE_OPS_READ].avg = + qemu_opt_get_number(opts, "throttling.iops-read", 0); + throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.iops-write", 0); + + throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.bps-total-max", 0); + throttle_cfg->buckets[THROTTLE_BPS_READ].max = + qemu_opt_get_number(opts, "throttling.bps-read-max", 0); + throttle_cfg->buckets[THROTTLE_BPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.bps-write-max", 0); + throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.iops-total-max", 0); + throttle_cfg->buckets[THROTTLE_OPS_READ].max = + qemu_opt_get_number(opts, "throttling.iops-read-max", 0); + throttle_cfg->buckets[THROTTLE_OPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.iops-write-max", 0); + + throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1); + throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1); + throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1); + throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1); + throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1); + throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1); + + throttle_cfg->op_size = + qemu_opt_get_number(opts, "throttling.iops-size", 0); + + if (!throttle_is_valid(throttle_cfg, errp)) { + return; + } + } + + if (detect_zeroes) { + *detect_zeroes = + qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, + qemu_opt_get(opts, "detect-zeroes"), + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, + &local_error); + if (local_error) { + error_propagate(errp, local_error); + return; + } + } +} + +/* Takes the ownership of bs_opts */ +static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + Error **errp) +{ + const char *buf; + int bdrv_flags = 0; + int on_read_error, on_write_error; + bool account_invalid, account_failed; + bool writethrough, read_only; + BlockBackend *blk; + BlockDriverState *bs; + ThrottleConfig cfg; + int snapshot = 0; + Error *error = NULL; + QemuOpts *opts; + QDict *interval_dict = NULL; + QList *interval_list = NULL; + const char *id; + BlockdevDetectZeroesOptions detect_zeroes = + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; + const char *throttling_group = NULL; + + /* Check common options by copying from bs_opts to opts, all other options + * stay in bs_opts for processing by bdrv_open(). */ + id = qdict_get_try_str(bs_opts, "id"); + opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, errp); + if (!opts) { + goto err_no_opts; + } + + if (!qemu_opts_absorb_qdict(opts, bs_opts, errp)) { + goto early_err; + } + + if (id) { + qdict_del(bs_opts, "id"); + } + + /* extract parameters */ + snapshot = qemu_opt_get_bool(opts, "snapshot", 0); + + account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true); + account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true); + + writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true); + + id = qemu_opts_id(opts); + + qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals."); + qdict_array_split(interval_dict, &interval_list); + + if (qdict_size(interval_dict) != 0) { + error_setg(errp, "Invalid option stats-intervals.%s", + qdict_first(interval_dict)->key); + goto early_err; + } + + extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg, + &detect_zeroes, &error); + if (error) { + error_propagate(errp, error); + goto early_err; + } + + if ((buf = qemu_opt_get(opts, "format")) != NULL) { + if (is_help_option(buf)) { + qemu_printf("Supported formats:"); + bdrv_iterate_format(bdrv_format_print, NULL, false); + qemu_printf("\nSupported formats (read-only):"); + bdrv_iterate_format(bdrv_format_print, NULL, true); + qemu_printf("\n"); + goto early_err; + } + + if (qdict_haskey(bs_opts, "driver")) { + error_setg(errp, "Cannot specify both 'driver' and 'format'"); + goto early_err; + } + qdict_put_str(bs_opts, "driver", buf); + } + + on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + if ((buf = qemu_opt_get(opts, "werror")) != NULL) { + on_write_error = parse_block_error_action(buf, 0, &error); + if (error) { + error_propagate(errp, error); + goto early_err; + } + } + + on_read_error = BLOCKDEV_ON_ERROR_REPORT; + if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { + on_read_error = parse_block_error_action(buf, 1, &error); + if (error) { + error_propagate(errp, error); + goto early_err; + } + } + + if (snapshot) { + bdrv_flags |= BDRV_O_SNAPSHOT; + } + + read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false); + + /* init */ + if ((!file || !*file) && !qdict_size(bs_opts)) { + BlockBackendRootState *blk_rs; + + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + blk_rs = blk_get_root_state(blk); + blk_rs->open_flags = bdrv_flags; + blk_rs->read_only = read_only; + blk_rs->detect_zeroes = detect_zeroes; + + qobject_unref(bs_opts); + } else { + if (file && !*file) { + file = NULL; + } + + /* bdrv_open() defaults to the values in bdrv_flags (for compatibility + * with other callers) rather than what we want as the real defaults. + * Apply the defaults here instead. */ + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, + read_only ? "on" : "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_AUTO_READ_ONLY, "on"); + assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0); + + if (runstate_check(RUN_STATE_INMIGRATE)) { + bdrv_flags |= BDRV_O_INACTIVE; + } + + blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp); + if (!blk) { + goto err_no_bs_opts; + } + bs = blk_bs(blk); + + bs->detect_zeroes = detect_zeroes; + + block_acct_setup(blk_get_stats(blk), account_invalid, account_failed); + + if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) { + blk_unref(blk); + blk = NULL; + goto err_no_bs_opts; + } + } + + /* disk I/O throttling */ + if (throttle_enabled(&cfg)) { + if (!throttling_group) { + throttling_group = id; + } + blk_io_limits_enable(blk, throttling_group); + blk_set_io_limits(blk, &cfg); + } + + blk_set_enable_write_cache(blk, !writethrough); + blk_set_on_error(blk, on_read_error, on_write_error); + + if (!monitor_add_blk(blk, id, errp)) { + blk_unref(blk); + blk = NULL; + goto err_no_bs_opts; + } + +err_no_bs_opts: + qemu_opts_del(opts); + qobject_unref(interval_dict); + qobject_unref(interval_list); + return blk; + +early_err: + qemu_opts_del(opts); + qobject_unref(interval_dict); + qobject_unref(interval_list); +err_no_opts: + qobject_unref(bs_opts); + return NULL; +} + +/* Takes the ownership of bs_opts */ +BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) +{ + int bdrv_flags = 0; + + /* bdrv_open() defaults to the values in bdrv_flags (for compatibility + * with other callers) rather than what we want as the real defaults. + * Apply the defaults here instead. */ + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, "off"); + + if (runstate_check(RUN_STATE_INMIGRATE)) { + bdrv_flags |= BDRV_O_INACTIVE; + } + + return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); +} + +void blockdev_close_all_bdrv_states(void) +{ + BlockDriverState *bs, *next_bs; + + QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + bdrv_unref(bs); + aio_context_release(ctx); + } +} + +/* Iterates over the list of monitor-owned BlockDriverStates */ +BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs) +{ + return bs ? QTAILQ_NEXT(bs, monitor_list) + : QTAILQ_FIRST(&monitor_bdrv_states); +} + +static bool qemu_opt_rename(QemuOpts *opts, const char *from, const char *to, + Error **errp) +{ + const char *value; + + value = qemu_opt_get(opts, from); + if (value) { + if (qemu_opt_find(opts, to)) { + error_setg(errp, "'%s' and its alias '%s' can't be used at the " + "same time", to, from); + return false; + } + } + + /* rename all items in opts */ + while ((value = qemu_opt_get(opts, from))) { + qemu_opt_set(opts, to, value, &error_abort); + qemu_opt_unset(opts, from); + } + return true; +} + +QemuOptsList qemu_legacy_drive_opts = { + .name = "drive", + .head = QTAILQ_HEAD_INITIALIZER(qemu_legacy_drive_opts.head), + .desc = { + { + .name = "bus", + .type = QEMU_OPT_NUMBER, + .help = "bus number", + },{ + .name = "unit", + .type = QEMU_OPT_NUMBER, + .help = "unit number (i.e. lun for scsi)", + },{ + .name = "index", + .type = QEMU_OPT_NUMBER, + .help = "index number", + },{ + .name = "media", + .type = QEMU_OPT_STRING, + .help = "media type (disk, cdrom)", + },{ + .name = "if", + .type = QEMU_OPT_STRING, + .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)", + },{ + .name = "file", + .type = QEMU_OPT_STRING, + .help = "file name", + }, + + /* Options that are passed on, but have special semantics with -drive */ + { + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, + .help = "open drive file as read-only", + },{ + .name = "rerror", + .type = QEMU_OPT_STRING, + .help = "read error action", + },{ + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", + },{ + .name = "copy-on-read", + .type = QEMU_OPT_BOOL, + .help = "copy read data from backing file into image file", + }, + + { /* end of list */ } + }, +}; + +DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + Error **errp) +{ + const char *value; + BlockBackend *blk; + DriveInfo *dinfo = NULL; + QDict *bs_opts; + QemuOpts *legacy_opts; + DriveMediaType media = MEDIA_DISK; + BlockInterfaceType type; + int max_devs, bus_id, unit_id, index; + const char *werror, *rerror; + bool read_only = false; + bool copy_on_read; + const char *filename; + int i; + + /* Change legacy command line options into QMP ones */ + static const struct { + const char *from; + const char *to; + } opt_renames[] = { + { "iops", "throttling.iops-total" }, + { "iops_rd", "throttling.iops-read" }, + { "iops_wr", "throttling.iops-write" }, + + { "bps", "throttling.bps-total" }, + { "bps_rd", "throttling.bps-read" }, + { "bps_wr", "throttling.bps-write" }, + + { "iops_max", "throttling.iops-total-max" }, + { "iops_rd_max", "throttling.iops-read-max" }, + { "iops_wr_max", "throttling.iops-write-max" }, + + { "bps_max", "throttling.bps-total-max" }, + { "bps_rd_max", "throttling.bps-read-max" }, + { "bps_wr_max", "throttling.bps-write-max" }, + + { "iops_size", "throttling.iops-size" }, + + { "group", "throttling.group" }, + + { "readonly", BDRV_OPT_READ_ONLY }, + }; + + for (i = 0; i < ARRAY_SIZE(opt_renames); i++) { + if (!qemu_opt_rename(all_opts, opt_renames[i].from, + opt_renames[i].to, errp)) { + return NULL; + } + } + + value = qemu_opt_get(all_opts, "cache"); + if (value) { + int flags = 0; + bool writethrough; + + if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) { + error_setg(errp, "invalid cache option"); + return NULL; + } + + /* Specific options take precedence */ + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB, + !writethrough, &error_abort); + } + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT, + !!(flags & BDRV_O_NOCACHE), &error_abort); + } + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH, + !!(flags & BDRV_O_NO_FLUSH), &error_abort); + } + qemu_opt_unset(all_opts, "cache"); + } + + /* Get a QDict for processing the options */ + bs_opts = qdict_new(); + qemu_opts_to_qdict(all_opts, bs_opts); + + legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0, + &error_abort); + if (!qemu_opts_absorb_qdict(legacy_opts, bs_opts, errp)) { + goto fail; + } + + /* Media type */ + value = qemu_opt_get(legacy_opts, "media"); + if (value) { + if (!strcmp(value, "disk")) { + media = MEDIA_DISK; + } else if (!strcmp(value, "cdrom")) { + media = MEDIA_CDROM; + read_only = true; + } else { + error_setg(errp, "'%s' invalid media", value); + goto fail; + } + } + + /* copy-on-read is disabled with a warning for read-only devices */ + read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false); + copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false); + + if (read_only && copy_on_read) { + warn_report("disabling copy-on-read on read-only drive"); + copy_on_read = false; + } + + qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off"); + qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off"); + + /* Controller type */ + value = qemu_opt_get(legacy_opts, "if"); + if (value) { + for (type = 0; + type < IF_COUNT && strcmp(value, if_name[type]); + type++) { + } + if (type == IF_COUNT) { + error_setg(errp, "unsupported bus type '%s'", value); + goto fail; + } + } else { + type = block_default_type; + } + + /* Device address specified by bus/unit or index. + * If none was specified, try to find the first free one. */ + bus_id = qemu_opt_get_number(legacy_opts, "bus", 0); + unit_id = qemu_opt_get_number(legacy_opts, "unit", -1); + index = qemu_opt_get_number(legacy_opts, "index", -1); + + max_devs = if_max_devs[type]; + + if (index != -1) { + if (bus_id != 0 || unit_id != -1) { + error_setg(errp, "index cannot be used with bus and unit"); + goto fail; + } + bus_id = drive_index_to_bus_id(type, index); + unit_id = drive_index_to_unit_id(type, index); + } + + if (unit_id == -1) { + unit_id = 0; + while (drive_get(type, bus_id, unit_id) != NULL) { + unit_id++; + if (max_devs && unit_id >= max_devs) { + unit_id -= max_devs; + bus_id++; + } + } + } + + if (max_devs && unit_id >= max_devs) { + error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1); + goto fail; + } + + if (drive_get(type, bus_id, unit_id) != NULL) { + error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists", + bus_id, unit_id, index); + goto fail; + } + + /* no id supplied -> create one */ + if (qemu_opts_id(all_opts) == NULL) { + char *new_id; + const char *mediastr = ""; + if (type == IF_IDE || type == IF_SCSI) { + mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd"; + } + if (max_devs) { + new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id, + mediastr, unit_id); + } else { + new_id = g_strdup_printf("%s%s%i", if_name[type], + mediastr, unit_id); + } + qdict_put_str(bs_opts, "id", new_id); + g_free(new_id); + } + + /* Add virtio block device */ + if (type == IF_VIRTIO) { + QemuOpts *devopts; + devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, + &error_abort); + if (arch_type == QEMU_ARCH_S390X) { + qemu_opt_set(devopts, "driver", "virtio-blk-ccw", &error_abort); + } else { + qemu_opt_set(devopts, "driver", "virtio-blk-pci", &error_abort); + } + qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"), + &error_abort); + } + + filename = qemu_opt_get(legacy_opts, "file"); + + /* Check werror/rerror compatibility with if=... */ + werror = qemu_opt_get(legacy_opts, "werror"); + if (werror != NULL) { + if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && + type != IF_NONE) { + error_setg(errp, "werror is not supported by this bus type"); + goto fail; + } + qdict_put_str(bs_opts, "werror", werror); + } + + rerror = qemu_opt_get(legacy_opts, "rerror"); + if (rerror != NULL) { + if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && + type != IF_NONE) { + error_setg(errp, "rerror is not supported by this bus type"); + goto fail; + } + qdict_put_str(bs_opts, "rerror", rerror); + } + + /* Actual block device init: Functionality shared with blockdev-add */ + blk = blockdev_init(filename, bs_opts, errp); + bs_opts = NULL; + if (!blk) { + goto fail; + } + + /* Create legacy DriveInfo */ + dinfo = g_malloc0(sizeof(*dinfo)); + dinfo->opts = all_opts; + + dinfo->type = type; + dinfo->bus = bus_id; + dinfo->unit = unit_id; + + blk_set_legacy_dinfo(blk, dinfo); + + switch(type) { + case IF_IDE: + case IF_SCSI: + case IF_XEN: + case IF_NONE: + dinfo->media_cd = media == MEDIA_CDROM; + break; + default: + break; + } + +fail: + qemu_opts_del(legacy_opts); + qobject_unref(bs_opts); + return dinfo; +} + +static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) +{ + BlockDriverState *bs; + + bs = bdrv_lookup_bs(name, name, errp); + if (bs == NULL) { + return NULL; + } + + if (!bdrv_is_root_node(bs)) { + error_setg(errp, "Need a root block node"); + return NULL; + } + + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device has no medium"); + return NULL; + } + + return bs; +} + +static void blockdev_do_action(TransactionAction *action, Error **errp) +{ + TransactionActionList list; + + list.value = action; + list.next = NULL; + qmp_transaction(&list, false, NULL, errp); +} + +void qmp_blockdev_snapshot_sync(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *snapshot_file, + bool has_snapshot_node_name, + const char *snapshot_node_name, + bool has_format, const char *format, + bool has_mode, NewImageMode mode, Error **errp) +{ + BlockdevSnapshotSync snapshot = { + .has_device = has_device, + .device = (char *) device, + .has_node_name = has_node_name, + .node_name = (char *) node_name, + .snapshot_file = (char *) snapshot_file, + .has_snapshot_node_name = has_snapshot_node_name, + .snapshot_node_name = (char *) snapshot_node_name, + .has_format = has_format, + .format = (char *) format, + .has_mode = has_mode, + .mode = mode, + }; + TransactionAction action = { + .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, + .u.blockdev_snapshot_sync.data = &snapshot, + }; + blockdev_do_action(&action, errp); +} + +void qmp_blockdev_snapshot(const char *node, const char *overlay, + Error **errp) +{ + BlockdevSnapshot snapshot_data = { + .node = (char *) node, + .overlay = (char *) overlay + }; + TransactionAction action = { + .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT, + .u.blockdev_snapshot.data = &snapshot_data, + }; + blockdev_do_action(&action, errp); +} + +void qmp_blockdev_snapshot_internal_sync(const char *device, + const char *name, + Error **errp) +{ + BlockdevSnapshotInternal snapshot = { + .device = (char *) device, + .name = (char *) name + }; + TransactionAction action = { + .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC, + .u.blockdev_snapshot_internal_sync.data = &snapshot, + }; + blockdev_do_action(&action, errp); +} + +SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + bool has_id, + const char *id, + bool has_name, + const char *name, + Error **errp) +{ + BlockDriverState *bs; + AioContext *aio_context; + QEMUSnapshotInfo sn; + Error *local_err = NULL; + SnapshotInfo *info = NULL; + int ret; + + bs = qmp_get_root_bs(device, errp); + if (!bs) { + return NULL; + } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (!has_id) { + id = NULL; + } + + if (!has_name) { + name = NULL; + } + + if (!id && !name) { + error_setg(errp, "Name or id must be provided"); + goto out_aio_context; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { + goto out_aio_context; + } + + ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out_aio_context; + } + if (!ret) { + error_setg(errp, + "Snapshot with id '%s' and name '%s' does not exist on " + "device '%s'", + STR_OR_NULL(id), STR_OR_NULL(name), device); + goto out_aio_context; + } + + bdrv_snapshot_delete(bs, id, name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out_aio_context; + } + + aio_context_release(aio_context); + + info = g_new0(SnapshotInfo, 1); + info->id = g_strdup(sn.id_str); + info->name = g_strdup(sn.name); + info->date_nsec = sn.date_nsec; + info->date_sec = sn.date_sec; + info->vm_state_size = sn.vm_state_size; + info->vm_clock_nsec = sn.vm_clock_nsec % 1000000000; + info->vm_clock_sec = sn.vm_clock_nsec / 1000000000; + if (sn.icount != -1ULL) { + info->icount = sn.icount; + info->has_icount = true; + } + + return info; + +out_aio_context: + aio_context_release(aio_context); + return NULL; +} + +/* New and old BlockDriverState structs for atomic group operations */ + +typedef struct BlkActionState BlkActionState; + +/** + * BlkActionOps: + * Table of operations that define an Action. + * + * @instance_size: Size of state struct, in bytes. + * @prepare: Prepare the work, must NOT be NULL. + * @commit: Commit the changes, can be NULL. + * @abort: Abort the changes on fail, can be NULL. + * @clean: Clean up resources after all transaction actions have called + * commit() or abort(). Can be NULL. + * + * Only prepare() may fail. In a single transaction, only one of commit() or + * abort() will be called. clean() will always be called if it is present. + */ +typedef struct BlkActionOps { + size_t instance_size; + void (*prepare)(BlkActionState *common, Error **errp); + void (*commit)(BlkActionState *common); + void (*abort)(BlkActionState *common); + void (*clean)(BlkActionState *common); +} BlkActionOps; + +/** + * BlkActionState: + * Describes one Action's state within a Transaction. + * + * @action: QAPI-defined enum identifying which Action to perform. + * @ops: Table of ActionOps this Action can perform. + * @block_job_txn: Transaction which this action belongs to. + * @entry: List membership for all Actions in this Transaction. + * + * This structure must be arranged as first member in a subclassed type, + * assuming that the compiler will also arrange it to the same offsets as the + * base class. + */ +struct BlkActionState { + TransactionAction *action; + const BlkActionOps *ops; + JobTxn *block_job_txn; + TransactionProperties *txn_props; + QTAILQ_ENTRY(BlkActionState) entry; +}; + +/* internal snapshot private data */ +typedef struct InternalSnapshotState { + BlkActionState common; + BlockDriverState *bs; + QEMUSnapshotInfo sn; + bool created; +} InternalSnapshotState; + + +static int action_check_completion_mode(BlkActionState *s, Error **errp) +{ + if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + error_setg(errp, + "Action '%s' does not support Transaction property " + "completion-mode = %s", + TransactionActionKind_str(s->action->type), + ActionCompletionMode_str(s->txn_props->completion_mode)); + return -1; + } + return 0; +} + +static void internal_snapshot_prepare(BlkActionState *common, + Error **errp) +{ + Error *local_err = NULL; + const char *device; + const char *name; + BlockDriverState *bs; + QEMUSnapshotInfo old_sn, *sn; + bool ret; + qemu_timeval tv; + BlockdevSnapshotInternal *internal; + InternalSnapshotState *state; + AioContext *aio_context; + int ret1; + + g_assert(common->action->type == + TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC); + internal = common->action->u.blockdev_snapshot_internal_sync.data; + state = DO_UPCAST(InternalSnapshotState, common, common); + + /* 1. parse input */ + device = internal->device; + name = internal->name; + + /* 2. check for validation */ + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + bs = qmp_get_root_bs(device, errp); + if (!bs) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + state->bs = bs; + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { + goto out; + } + + if (bdrv_is_read_only(bs)) { + error_setg(errp, "Device '%s' is read only", device); + goto out; + } + + if (!bdrv_can_snapshot(bs)) { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshots", + bs->drv->format_name, device); + goto out; + } + + if (!strlen(name)) { + error_setg(errp, "Name is empty"); + goto out; + } + + /* check whether a snapshot with name exist */ + ret = bdrv_snapshot_find_by_id_and_name(bs, NULL, name, &old_sn, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } else if (ret) { + error_setg(errp, + "Snapshot with name '%s' already exists on device '%s'", + name, device); + goto out; + } + + /* 3. take the snapshot */ + sn = &state->sn; + pstrcpy(sn->name, sizeof(sn->name), name); + qemu_gettimeofday(&tv); + sn->date_sec = tv.tv_sec; + sn->date_nsec = tv.tv_usec * 1000; + sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (replay_mode != REPLAY_MODE_NONE) { + sn->icount = replay_get_current_icount(); + } else { + sn->icount = -1ULL; + } + + ret1 = bdrv_snapshot_create(bs, sn); + if (ret1 < 0) { + error_setg_errno(errp, -ret1, + "Failed to create snapshot '%s' on device '%s'", + name, device); + goto out; + } + + /* 4. succeed, mark a snapshot is created */ + state->created = true; + +out: + aio_context_release(aio_context); +} + +static void internal_snapshot_abort(BlkActionState *common) +{ + InternalSnapshotState *state = + DO_UPCAST(InternalSnapshotState, common, common); + BlockDriverState *bs = state->bs; + QEMUSnapshotInfo *sn = &state->sn; + AioContext *aio_context; + Error *local_error = NULL; + + if (!state->created) { + return; + } + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { + error_reportf_err(local_error, + "Failed to delete snapshot with id '%s' and " + "name '%s' on device '%s' in abort: ", + sn->id_str, sn->name, + bdrv_get_device_name(bs)); + } + + aio_context_release(aio_context); +} + +static void internal_snapshot_clean(BlkActionState *common) +{ + InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState, + common, common); + AioContext *aio_context; + + if (!state->bs) { + return; + } + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + bdrv_drained_end(state->bs); + + aio_context_release(aio_context); +} + +/* external snapshot private data */ +typedef struct ExternalSnapshotState { + BlkActionState common; + BlockDriverState *old_bs; + BlockDriverState *new_bs; + bool overlay_appended; +} ExternalSnapshotState; + +static void external_snapshot_prepare(BlkActionState *common, + Error **errp) +{ + int flags = 0; + QDict *options = NULL; + Error *local_err = NULL; + /* Device and node name of the image to generate the snapshot from */ + const char *device; + const char *node_name; + /* Reference to the new image (for 'blockdev-snapshot') */ + const char *snapshot_ref; + /* File name of the new image (for 'blockdev-snapshot-sync') */ + const char *new_image_file; + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; + uint64_t perm, shared; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ + switch (action->type) { + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT: + { + BlockdevSnapshot *s = action->u.blockdev_snapshot.data; + device = s->node; + node_name = s->node; + new_image_file = NULL; + snapshot_ref = s->overlay; + } + break; + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + { + BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data; + device = s->has_device ? s->device : NULL; + node_name = s->has_node_name ? s->node_name : NULL; + new_image_file = s->snapshot_file; + snapshot_ref = NULL; + } + break; + default: + g_assert_not_reached(); + } + + /* start processing */ + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + state->old_bs = bdrv_lookup_bs(device, node_name, errp); + if (!state->old_bs) { + return; + } + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(state->old_bs); + + if (!bdrv_is_inserted(state->old_bs)) { + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + goto out; + } + + if (bdrv_op_is_blocked(state->old_bs, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) { + goto out; + } + + if (!bdrv_is_read_only(state->old_bs)) { + if (bdrv_flush(state->old_bs)) { + error_setg(errp, QERR_IO_ERROR); + goto out; + } + } + + if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) { + BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data; + const char *format = s->has_format ? s->format : "qcow2"; + enum NewImageMode mode; + const char *snapshot_node_name = + s->has_snapshot_node_name ? s->snapshot_node_name : NULL; + + if (node_name && !snapshot_node_name) { + error_setg(errp, "New overlay node name missing"); + goto out; + } + + if (snapshot_node_name && + bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { + error_setg(errp, "New overlay node name already in use"); + goto out; + } + + flags = state->old_bs->open_flags; + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ); + flags |= BDRV_O_NO_BACKING; + + /* create new image w/backing file */ + mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS; + if (mode != NEW_IMAGE_MODE_EXISTING) { + int64_t size = bdrv_getlength(state->old_bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + goto out; + } + bdrv_refresh_filename(state->old_bs); + bdrv_img_create(new_image_file, format, + state->old_bs->filename, + state->old_bs->drv->format_name, + NULL, size, flags, false, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + } + + options = qdict_new(); + if (snapshot_node_name) { + qdict_put_str(options, "node-name", snapshot_node_name); + } + qdict_put_str(options, "driver", format); + } + + state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, + errp); + /* We will manually add the backing_hd field to the bs later */ + if (!state->new_bs) { + goto out; + } + + /* + * Allow attaching a backing file to an overlay that's already in use only + * if the parents don't assume that they are already seeing a valid image. + * (Specifically, allow it as a mirror target, which is write-only access.) + */ + bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); + if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); + goto out; + } + + if (state->new_bs->drv->is_filter) { + error_setg(errp, "Filters cannot be used as overlays"); + goto out; + } + + if (bdrv_cow_child(state->new_bs)) { + error_setg(errp, "The overlay already has a backing image"); + goto out; + } + + if (!state->new_bs->drv->supports_backing) { + error_setg(errp, "The overlay does not support backing images"); + goto out; + } + + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ + bdrv_ref(state->new_bs); + bdrv_append(state->new_bs, state->old_bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + state->overlay_appended = true; + +out: + aio_context_release(aio_context); +} + +static void external_snapshot_commit(BlkActionState *common) +{ + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); + + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + if (!qatomic_read(&state->old_bs->copy_on_read)) { + bdrv_reopen_set_read_only(state->old_bs, true, NULL); + } + + aio_context_release(aio_context); +} + +static void external_snapshot_abort(BlkActionState *common) +{ + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); + if (state->new_bs) { + if (state->overlay_appended) { + AioContext *aio_context; + AioContext *tmp_context; + int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); + + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ + bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); + + /* + * The call to bdrv_set_backing_hd() above returns state->old_bs to + * the main AioContext. As we're still going to be using it, return + * it to the AioContext it was before. + */ + tmp_context = bdrv_get_aio_context(state->old_bs); + if (aio_context != tmp_context) { + aio_context_release(aio_context); + aio_context_acquire(tmp_context); + + ret = bdrv_try_set_aio_context(state->old_bs, + aio_context, NULL); + assert(ret == 0); + + aio_context_release(tmp_context); + aio_context_acquire(aio_context); + } + + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ + + aio_context_release(aio_context); + } + } +} + +static void external_snapshot_clean(BlkActionState *common) +{ + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); + AioContext *aio_context; + + if (!state->old_bs) { + return; + } + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); + + bdrv_drained_end(state->old_bs); + bdrv_unref(state->new_bs); + + aio_context_release(aio_context); +} + +typedef struct DriveBackupState { + BlkActionState common; + BlockDriverState *bs; + BlockJob *job; +} DriveBackupState; + +static BlockJob *do_backup_common(BackupCommon *backup, + BlockDriverState *bs, + BlockDriverState *target_bs, + AioContext *aio_context, + JobTxn *txn, Error **errp); + +static void drive_backup_prepare(BlkActionState *common, Error **errp) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackup *backup; + BlockDriverState *bs; + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; + AioContext *old_context; + QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; + bool set_backing_hd = false; + int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; + + if (!backup->has_mode) { + backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + + if (!bs->drv) { + error_setg(errp, "Device has no medium"); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + + if (!backup->has_format) { + backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? + NULL : (char *) bs->drv->format_name; + } + + /* Early check to avoid creating target */ + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + goto out; + } + + flags = bs->open_flags | BDRV_O_RDWR; + + /* + * See if we have a backing HD we can use to create our new image + * on top of. + */ + if (backup->sync == MIRROR_SYNC_MODE_TOP) { + /* + * Backup will not replace the source by the target, so none + * of the filters skipped here will be removed (in contrast to + * mirror). Therefore, we can skip all of them when looking + * for the first COW relationship. + */ + source = bdrv_cow_bs(bdrv_skip_filters(bs)); + if (!source) { + backup->sync = MIRROR_SYNC_MODE_FULL; + } + } + if (backup->sync == MIRROR_SYNC_MODE_NONE) { + source = bs; + flags |= BDRV_O_NO_BACKING; + set_backing_hd = true; + } + + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + goto out; + } + + if (backup->mode != NEW_IMAGE_MODE_EXISTING) { + assert(backup->format); + if (source) { + /* Implicit filters should not appear in the filename */ + BlockDriverState *explicit_backing = + bdrv_skip_implicit_filters(source); + + bdrv_refresh_filename(explicit_backing); + bdrv_img_create(backup->target, backup->format, + explicit_backing->filename, + explicit_backing->drv->format_name, NULL, + size, flags, false, &local_err); + } else { + bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, + size, flags, false, &local_err); + } + } + + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + options = qdict_new(); + qdict_put_str(options, "discard", "unmap"); + qdict_put_str(options, "detect-zeroes", "unmap"); + if (backup->format) { + qdict_put_str(options, "driver", backup->format); + } + + target_bs = bdrv_open(backup->target, NULL, options, flags, errp); + if (!target_bs) { + goto out; + } + + /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + old_context = bdrv_get_aio_context(target_bs); + aio_context_release(aio_context); + aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); + aio_context_release(old_context); + return; + } + + aio_context_release(old_context); + aio_context_acquire(aio_context); + + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { + goto unref; + } + } + + state->bs = bs; + + state->job = do_backup_common(qapi_DriveBackup_base(backup), + bs, target_bs, aio_context, + common->block_job_txn, errp); + +unref: + bdrv_unref(target_bs); +out: + aio_context_release(aio_context); +} + +static void drive_backup_commit(BlkActionState *common) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); + + aio_context_release(aio_context); +} + +static void drive_backup_abort(BlkActionState *common) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + + if (state->job) { + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + job_cancel_sync(&state->job->job); + + aio_context_release(aio_context); + } +} + +static void drive_backup_clean(BlkActionState *common) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + AioContext *aio_context; + + if (!state->bs) { + return; + } + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + bdrv_drained_end(state->bs); + + aio_context_release(aio_context); +} + +typedef struct BlockdevBackupState { + BlkActionState common; + BlockDriverState *bs; + BlockJob *job; +} BlockdevBackupState; + +static void blockdev_backup_prepare(BlkActionState *common, Error **errp) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; + AioContext *old_context; + int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; + + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + + target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); + if (!target_bs) { + return; + } + + /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); + old_context = bdrv_get_aio_context(target_bs); + aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + aio_context_release(old_context); + return; + } + + aio_context_release(old_context); + aio_context_acquire(aio_context); + state->bs = bs; + + /* Paired with .clean() */ + bdrv_drained_begin(state->bs); + + state->job = do_backup_common(qapi_BlockdevBackup_base(backup), + bs, target_bs, aio_context, + common->block_job_txn, errp); + + aio_context_release(aio_context); +} + +static void blockdev_backup_commit(BlkActionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); + + aio_context_release(aio_context); +} + +static void blockdev_backup_abort(BlkActionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + + if (state->job) { + AioContext *aio_context; + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + job_cancel_sync(&state->job->job); + + aio_context_release(aio_context); + } +} + +static void blockdev_backup_clean(BlkActionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + AioContext *aio_context; + + if (!state->bs) { + return; + } + + aio_context = bdrv_get_aio_context(state->bs); + aio_context_acquire(aio_context); + + bdrv_drained_end(state->bs); + + aio_context_release(aio_context); +} + +typedef struct BlockDirtyBitmapState { + BlkActionState common; + BdrvDirtyBitmap *bitmap; + BlockDriverState *bs; + HBitmap *backup; + bool prepared; + bool was_enabled; +} BlockDirtyBitmapState; + +static void block_dirty_bitmap_add_prepare(BlkActionState *common, + Error **errp) +{ + Error *local_err = NULL; + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_add.data; + /* AIO context taken and released within qmp_block_dirty_bitmap_add */ + qmp_block_dirty_bitmap_add(action->node, action->name, + action->has_granularity, action->granularity, + action->has_persistent, action->persistent, + action->has_disabled, action->disabled, + &local_err); + + if (!local_err) { + state->prepared = true; + } else { + error_propagate(errp, local_err); + } +} + +static void block_dirty_bitmap_add_abort(BlkActionState *common) +{ + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + action = common->action->u.block_dirty_bitmap_add.data; + /* Should not be able to fail: IF the bitmap was added via .prepare(), + * then the node reference and bitmap name must have been valid. + */ + if (state->prepared) { + qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort); + } +} + +static void block_dirty_bitmap_clear_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + BlockDirtyBitmap *action; + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_clear.data; + state->bitmap = block_dirty_bitmap_lookup(action->node, + action->name, + &state->bs, + errp); + if (!state->bitmap) { + return; + } + + if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_DEFAULT, errp)) { + return; + } + + bdrv_clear_dirty_bitmap(state->bitmap, &state->backup); +} + +static void block_dirty_bitmap_restore(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (state->backup) { + bdrv_restore_dirty_bitmap(state->bitmap, state->backup); + } +} + +static void block_dirty_bitmap_free_backup(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + hbitmap_free(state->backup); +} + +static void block_dirty_bitmap_enable_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmap *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_enable.data; + state->bitmap = block_dirty_bitmap_lookup(action->node, + action->name, + NULL, + errp); + if (!state->bitmap) { + return; + } + + if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return; + } + + state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap); + bdrv_enable_dirty_bitmap(state->bitmap); +} + +static void block_dirty_bitmap_enable_abort(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (!state->was_enabled) { + bdrv_disable_dirty_bitmap(state->bitmap); + } +} + +static void block_dirty_bitmap_disable_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmap *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_disable.data; + state->bitmap = block_dirty_bitmap_lookup(action->node, + action->name, + NULL, + errp); + if (!state->bitmap) { + return; + } + + if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return; + } + + state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap); + bdrv_disable_dirty_bitmap(state->bitmap); +} + +static void block_dirty_bitmap_disable_abort(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (state->was_enabled) { + bdrv_enable_dirty_bitmap(state->bitmap); + } +} + +static void block_dirty_bitmap_merge_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmapMerge *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_merge.data; + + state->bitmap = block_dirty_bitmap_merge(action->node, action->target, + action->bitmaps, &state->backup, + errp); +} + +static void block_dirty_bitmap_remove_prepare(BlkActionState *common, + Error **errp) +{ + BlockDirtyBitmap *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (action_check_completion_mode(common, errp) < 0) { + return; + } + + action = common->action->u.block_dirty_bitmap_remove.data; + + state->bitmap = block_dirty_bitmap_remove(action->node, action->name, + false, &state->bs, errp); + if (state->bitmap) { + bdrv_dirty_bitmap_skip_store(state->bitmap, true); + bdrv_dirty_bitmap_set_busy(state->bitmap, true); + } +} + +static void block_dirty_bitmap_remove_abort(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (state->bitmap) { + bdrv_dirty_bitmap_skip_store(state->bitmap, false); + bdrv_dirty_bitmap_set_busy(state->bitmap, false); + } +} + +static void block_dirty_bitmap_remove_commit(BlkActionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + bdrv_dirty_bitmap_set_busy(state->bitmap, false); + bdrv_release_dirty_bitmap(state->bitmap); +} + +static void abort_prepare(BlkActionState *common, Error **errp) +{ + error_setg(errp, "Transaction aborted using Abort action"); +} + +static void abort_commit(BlkActionState *common) +{ + g_assert_not_reached(); /* this action never succeeds */ +} + +static const BlkActionOps actions[] = { + [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { + .instance_size = sizeof(ExternalSnapshotState), + .prepare = external_snapshot_prepare, + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + .clean = external_snapshot_clean, + }, + [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { + .instance_size = sizeof(ExternalSnapshotState), + .prepare = external_snapshot_prepare, + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + .clean = external_snapshot_clean, + }, + [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { + .instance_size = sizeof(DriveBackupState), + .prepare = drive_backup_prepare, + .commit = drive_backup_commit, + .abort = drive_backup_abort, + .clean = drive_backup_clean, + }, + [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { + .instance_size = sizeof(BlockdevBackupState), + .prepare = blockdev_backup_prepare, + .commit = blockdev_backup_commit, + .abort = blockdev_backup_abort, + .clean = blockdev_backup_clean, + }, + [TRANSACTION_ACTION_KIND_ABORT] = { + .instance_size = sizeof(BlkActionState), + .prepare = abort_prepare, + .commit = abort_commit, + }, + [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = { + .instance_size = sizeof(InternalSnapshotState), + .prepare = internal_snapshot_prepare, + .abort = internal_snapshot_abort, + .clean = internal_snapshot_clean, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_add_prepare, + .abort = block_dirty_bitmap_add_abort, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_clear_prepare, + .commit = block_dirty_bitmap_free_backup, + .abort = block_dirty_bitmap_restore, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_enable_prepare, + .abort = block_dirty_bitmap_enable_abort, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_disable_prepare, + .abort = block_dirty_bitmap_disable_abort, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_merge_prepare, + .commit = block_dirty_bitmap_free_backup, + .abort = block_dirty_bitmap_restore, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_remove_prepare, + .commit = block_dirty_bitmap_remove_commit, + .abort = block_dirty_bitmap_remove_abort, + }, + /* Where are transactions for MIRROR, COMMIT and STREAM? + * Although these blockjobs use transaction callbacks like the backup job, + * these jobs do not necessarily adhere to transaction semantics. + * These jobs may not fully undo all of their actions on abort, nor do they + * necessarily work in transactions with more than one job in them. + */ +}; + +/** + * Allocate a TransactionProperties structure if necessary, and fill + * that structure with desired defaults if they are unset. + */ +static TransactionProperties *get_transaction_properties( + TransactionProperties *props) +{ + if (!props) { + props = g_new0(TransactionProperties, 1); + } + + if (!props->has_completion_mode) { + props->has_completion_mode = true; + props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL; + } + + return props; +} + +/* + * 'Atomic' group operations. The operations are performed as a set, and if + * any fail then we roll back all operations in the group. + */ +void qmp_transaction(TransactionActionList *dev_list, + bool has_props, + struct TransactionProperties *props, + Error **errp) +{ + TransactionActionList *dev_entry = dev_list; + JobTxn *block_job_txn = NULL; + BlkActionState *state, *next; + Error *local_err = NULL; + + QTAILQ_HEAD(, BlkActionState) snap_bdrv_states; + QTAILQ_INIT(&snap_bdrv_states); + + /* Does this transaction get canceled as a group on failure? + * If not, we don't really need to make a JobTxn. + */ + props = get_transaction_properties(props); + if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + block_job_txn = job_txn_new(); + } + + /* drain all i/o before any operations */ + bdrv_drain_all(); + + /* We don't do anything in this loop that commits us to the operations */ + while (NULL != dev_entry) { + TransactionAction *dev_info = NULL; + const BlkActionOps *ops; + + dev_info = dev_entry->value; + dev_entry = dev_entry->next; + + assert(dev_info->type < ARRAY_SIZE(actions)); + + ops = &actions[dev_info->type]; + assert(ops->instance_size > 0); + + state = g_malloc0(ops->instance_size); + state->ops = ops; + state->action = dev_info; + state->block_job_txn = block_job_txn; + state->txn_props = props; + QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry); + + state->ops->prepare(state, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto delete_and_fail; + } + } + + QTAILQ_FOREACH(state, &snap_bdrv_states, entry) { + if (state->ops->commit) { + state->ops->commit(state); + } + } + + /* success */ + goto exit; + +delete_and_fail: + /* failure, and it is all-or-none; roll back all operations */ + QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) { + if (state->ops->abort) { + state->ops->abort(state); + } + } +exit: + QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) { + if (state->ops->clean) { + state->ops->clean(state); + } + g_free(state); + } + if (!has_props) { + qapi_free_TransactionProperties(props); + } + job_txn_unref(block_job_txn); +} + +void qmp_block_passwd(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *password, Error **errp) +{ + error_setg(errp, + "Setting block passwords directly is no longer supported"); +} + +BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node, + const char *name, + Error **errp) +{ + BdrvDirtyBitmap *bitmap; + BlockDriverState *bs; + BlockDirtyBitmapSha256 *ret = NULL; + char *sha256; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return NULL; + } + + sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp); + if (sha256 == NULL) { + return NULL; + } + + ret = g_new(BlockDirtyBitmapSha256, 1); + ret->sha256 = sha256; + + return ret; +} + +void coroutine_fn qmp_block_resize(bool has_device, const char *device, + bool has_node_name, const char *node_name, + int64_t size, Error **errp) +{ + Error *local_err = NULL; + BlockBackend *blk; + BlockDriverState *bs; + AioContext *old_ctx; + + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (size < 0) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size"); + return; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { + error_setg(errp, QERR_DEVICE_IN_USE, device); + return; + } + + blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); + if (!blk) { + return; + } + + bdrv_co_lock(bs); + bdrv_drained_begin(bs); + bdrv_co_unlock(bs); + + old_ctx = bdrv_co_enter(bs); + blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); + bdrv_co_leave(bs, old_ctx); + + bdrv_co_lock(bs); + bdrv_drained_end(bs); + blk_unref(blk); + bdrv_co_unlock(bs); +} + +void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, + bool has_base, const char *base, + bool has_base_node, const char *base_node, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, + bool has_auto_finalize, bool auto_finalize, + bool has_auto_dismiss, bool auto_dismiss, + Error **errp) +{ + BlockDriverState *bs, *iter; + BlockDriverState *base_bs = NULL; + AioContext *aio_context; + Error *local_err = NULL; + const char *base_name = NULL; + int job_flags = JOB_DEFAULT; + + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; + } + + bs = bdrv_lookup_bs(device, device, errp); + if (!bs) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (has_base && has_base_node) { + error_setg(errp, "'base' and 'base-node' cannot be specified " + "at the same time"); + goto out; + } + + if (has_base) { + base_bs = bdrv_find_backing_image(bs, base); + if (base_bs == NULL) { + error_setg(errp, QERR_BASE_NOT_FOUND, base); + goto out; + } + assert(bdrv_get_aio_context(base_bs) == aio_context); + base_name = base; + } + + if (has_base_node) { + base_bs = bdrv_lookup_bs(NULL, base_node, errp); + if (!base_bs) { + goto out; + } + if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) { + error_setg(errp, "Node '%s' is not a backing image of '%s'", + base_node, device); + goto out; + } + assert(bdrv_get_aio_context(base_bs) == aio_context); + bdrv_refresh_filename(base_bs); + base_name = base_bs->filename; + } + + /* Check for op blockers in the whole chain between bs and base */ + for (iter = bs; iter && iter != base_bs; + iter = bdrv_filter_or_cow_bs(iter)) + { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) { + goto out; + } + } + + /* if we are streaming the entire chain, the result will have no backing + * file, and specifying one is therefore an error */ + if (base_bs == NULL && has_backing_file) { + error_setg(errp, "backing file specified, but streaming the " + "entire chain"); + goto out; + } + + /* backing_file string overrides base bs filename */ + base_name = has_backing_file ? backing_file : base_name; + + if (has_auto_finalize && !auto_finalize) { + job_flags |= JOB_MANUAL_FINALIZE; + } + if (has_auto_dismiss && !auto_dismiss) { + job_flags |= JOB_MANUAL_DISMISS; + } + + stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name, + job_flags, has_speed ? speed : 0, on_error, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + trace_qmp_block_stream(bs); + +out: + aio_context_release(aio_context); +} + +void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + bool has_base_node, const char *base_node, + bool has_base, const char *base, + bool has_top_node, const char *top_node, + bool has_top, const char *top, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, + bool has_filter_node_name, const char *filter_node_name, + bool has_auto_finalize, bool auto_finalize, + bool has_auto_dismiss, bool auto_dismiss, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *iter; + BlockDriverState *base_bs, *top_bs; + AioContext *aio_context; + Error *local_err = NULL; + int job_flags = JOB_DEFAULT; + uint64_t top_perm, top_shared; + + if (!has_speed) { + speed = 0; + } + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_filter_node_name) { + filter_node_name = NULL; + } + if (has_auto_finalize && !auto_finalize) { + job_flags |= JOB_MANUAL_FINALIZE; + } + if (has_auto_dismiss && !auto_dismiss) { + job_flags |= JOB_MANUAL_DISMISS; + } + + /* Important Note: + * libvirt relies on the DeviceNotFound error class in order to probe for + * live commit feature versions; for this to work, we must make sure to + * perform the device lookup before any generic errors that may occur in a + * scenario in which all optional arguments are omitted. */ + bs = qmp_get_root_bs(device, &local_err); + if (!bs) { + bs = bdrv_lookup_bs(device, device, NULL); + if (!bs) { + error_free(local_err); + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", device); + } else { + error_propagate(errp, local_err); + } + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { + goto out; + } + + /* default top_bs is the active layer */ + top_bs = bs; + + if (has_top_node && has_top) { + error_setg(errp, "'top-node' and 'top' are mutually exclusive"); + goto out; + } else if (has_top_node) { + top_bs = bdrv_lookup_bs(NULL, top_node, errp); + if (top_bs == NULL) { + goto out; + } + if (!bdrv_chain_contains(bs, top_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + top_node); + goto out; + } + } else if (has_top && top) { + /* This strcmp() is just a shortcut, there is no need to + * refresh @bs's filename. If it mismatches, + * bdrv_find_backing_image() will do the refresh and may still + * return @bs. */ + if (strcmp(bs->filename, top) != 0) { + top_bs = bdrv_find_backing_image(bs, top); + } + } + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); + goto out; + } + + assert(bdrv_get_aio_context(top_bs) == aio_context); + + if (has_base_node && has_base) { + error_setg(errp, "'base-node' and 'base' are mutually exclusive"); + goto out; + } else if (has_base_node) { + base_bs = bdrv_lookup_bs(NULL, base_node, errp); + if (base_bs == NULL) { + goto out; + } + if (!bdrv_chain_contains(top_bs, base_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + base_node); + goto out; + } + } else if (has_base && base) { + base_bs = bdrv_find_backing_image(top_bs, base); + } else { + base_bs = bdrv_find_base(top_bs); + } + + if (base_bs == NULL) { + error_setg(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + goto out; + } + + assert(bdrv_get_aio_context(base_bs) == aio_context); + + for (iter = top_bs; iter != bdrv_filter_or_cow_bs(base_bs); + iter = bdrv_filter_or_cow_bs(iter)) + { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } + } + + /* Do not allow attempts to commit an image into itself */ + if (top_bs == base_bs) { + error_setg(errp, "cannot commit an image into itself"); + goto out; + } + + /* + * Active commit is required if and only if someone has taken a + * WRITE permission on the top node. Historically, we have always + * used active commit for top nodes, so continue that practice + * lest we possibly break clients that rely on this behavior, e.g. + * to later attach this node to a writing parent. + * (Active commit is never really wrong.) + */ + bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared); + if (top_perm & BLK_PERM_WRITE || + bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) + { + if (has_backing_file) { + if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) { + error_setg(errp, "'backing-file' specified," + " but 'top' is the active layer"); + } else { + error_setg(errp, "'backing-file' specified, but 'top' has a " + "writer on it"); + } + goto out; + } + if (!has_job_id) { + /* + * Emulate here what block_job_create() does, because it + * is possible that @bs != @top_bs (the block job should + * be named after @bs, even if @top_bs is the actual + * source) + */ + job_id = bdrv_get_device_name(bs); + } + commit_active_start(job_id, top_bs, base_bs, job_flags, speed, on_error, + filter_node_name, NULL, NULL, false, &local_err); + } else { + BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); + if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } + commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags, + speed, on_error, has_backing_file ? backing_file : NULL, + filter_node_name, &local_err); + } + if (local_err != NULL) { + error_propagate(errp, local_err); + goto out; + } + +out: + aio_context_release(aio_context); +} + +/* Common QMP interface for drive-backup and blockdev-backup */ +static BlockJob *do_backup_common(BackupCommon *backup, + BlockDriverState *bs, + BlockDriverState *target_bs, + AioContext *aio_context, + JobTxn *txn, Error **errp) +{ + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; + int job_flags = JOB_DEFAULT; + + if (!backup->has_speed) { + backup->speed = 0; + } + if (!backup->has_on_source_error) { + backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!backup->has_on_target_error) { + backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!backup->has_job_id) { + backup->job_id = NULL; + } + if (!backup->has_auto_finalize) { + backup->auto_finalize = true; + } + if (!backup->has_auto_dismiss) { + backup->auto_dismiss = true; + } + if (!backup->has_compress) { + backup->compress = false; + } + + if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || + (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { + /* done before desugaring 'incremental' to print the right message */ + if (!backup->has_bitmap) { + error_setg(errp, "must provide a valid bitmap name for " + "'%s' sync mode", MirrorSyncMode_str(backup->sync)); + return NULL; + } + } + + if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) { + if (backup->has_bitmap_mode && + backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) { + error_setg(errp, "Bitmap sync mode must be '%s' " + "when using sync mode '%s'", + BitmapSyncMode_str(BITMAP_SYNC_MODE_ON_SUCCESS), + MirrorSyncMode_str(backup->sync)); + return NULL; + } + backup->has_bitmap_mode = true; + backup->sync = MIRROR_SYNC_MODE_BITMAP; + backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS; + } + + if (backup->has_bitmap) { + bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); + if (!bmap) { + error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); + return NULL; + } + if (!backup->has_bitmap_mode) { + error_setg(errp, "Bitmap sync mode must be given " + "when providing a bitmap"); + return NULL; + } + if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return NULL; + } + + /* This does not produce a useful bitmap artifact: */ + if (backup->sync == MIRROR_SYNC_MODE_NONE) { + error_setg(errp, "sync mode '%s' does not produce meaningful bitmap" + " outputs", MirrorSyncMode_str(backup->sync)); + return NULL; + } + + /* If the bitmap isn't used for input or output, this is useless: */ + if (backup->bitmap_mode == BITMAP_SYNC_MODE_NEVER && + backup->sync != MIRROR_SYNC_MODE_BITMAP) { + error_setg(errp, "Bitmap sync mode '%s' has no meaningful effect" + " when combined with sync mode '%s'", + BitmapSyncMode_str(backup->bitmap_mode), + MirrorSyncMode_str(backup->sync)); + return NULL; + } + } + + if (!backup->has_bitmap && backup->has_bitmap_mode) { + error_setg(errp, "Cannot specify bitmap sync mode without a bitmap"); + return NULL; + } + + if (!backup->auto_finalize) { + job_flags |= JOB_MANUAL_FINALIZE; + } + if (!backup->auto_dismiss) { + job_flags |= JOB_MANUAL_DISMISS; + } + + job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, + backup->sync, bmap, backup->bitmap_mode, + backup->compress, + backup->filter_node_name, + backup->on_source_error, + backup->on_target_error, + job_flags, NULL, NULL, txn, errp); + return job; +} + +void qmp_drive_backup(DriveBackup *backup, Error **errp) +{ + TransactionAction action = { + .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, + .u.drive_backup.data = backup, + }; + blockdev_do_action(&action, errp); +} + +BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat, + bool flat, + Error **errp) +{ + bool return_flat = has_flat && flat; + + return bdrv_named_nodes_list(return_flat, errp); +} + +XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) +{ + return bdrv_get_xdbg_block_graph(errp); +} + +void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) +{ + TransactionAction action = { + .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, + .u.blockdev_backup.data = backup, + }; + blockdev_do_action(&action, errp); +} + +/* Parameter check and block job starting for drive mirroring. + * Caller should hold @device and @target's aio context (must be the same). + **/ +static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, + bool has_replaces, const char *replaces, + enum MirrorSyncMode sync, + BlockMirrorBackingMode backing_mode, + bool zero_target, + bool has_speed, int64_t speed, + bool has_granularity, uint32_t granularity, + bool has_buf_size, int64_t buf_size, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + bool has_unmap, bool unmap, + bool has_filter_node_name, + const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, + bool has_auto_finalize, bool auto_finalize, + bool has_auto_dismiss, bool auto_dismiss, + Error **errp) +{ + BlockDriverState *unfiltered_bs; + int job_flags = JOB_DEFAULT; + + if (!has_speed) { + speed = 0; + } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_granularity) { + granularity = 0; + } + if (!has_buf_size) { + buf_size = 0; + } + if (!has_unmap) { + unmap = true; + } + if (!has_filter_node_name) { + filter_node_name = NULL; + } + if (!has_copy_mode) { + copy_mode = MIRROR_COPY_MODE_BACKGROUND; + } + if (has_auto_finalize && !auto_finalize) { + job_flags |= JOB_MANUAL_FINALIZE; + } + if (has_auto_dismiss && !auto_dismiss) { + job_flags |= JOB_MANUAL_DISMISS; + } + + if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", + "a value in range [512B, 64MB]"); + return; + } + if (granularity & (granularity - 1)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", + "power of 2"); + return; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) { + return; + } + if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) { + return; + } + + if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) { + sync = MIRROR_SYNC_MODE_FULL; + } + + if (!has_replaces) { + /* We want to mirror from @bs, but keep implicit filters on top */ + unfiltered_bs = bdrv_skip_implicit_filters(bs); + if (unfiltered_bs != bs) { + replaces = unfiltered_bs->node_name; + has_replaces = true; + } + } + + if (has_replaces) { + BlockDriverState *to_replace_bs; + AioContext *replace_aio_context; + int64_t bs_size, replace_size; + + bs_size = bdrv_getlength(bs); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "Failed to query device's size"); + return; + } + + to_replace_bs = check_to_replace_node(bs, replaces, errp); + if (!to_replace_bs) { + return; + } + + replace_aio_context = bdrv_get_aio_context(to_replace_bs); + aio_context_acquire(replace_aio_context); + replace_size = bdrv_getlength(to_replace_bs); + aio_context_release(replace_aio_context); + + if (replace_size < 0) { + error_setg_errno(errp, -replace_size, + "Failed to query the replacement node's size"); + return; + } + if (bs_size != replace_size) { + error_setg(errp, "cannot replace image with a mirror image of " + "different size"); + return; + } + } + + /* pass the node name to replace to mirror start since it's loose coupling + * and will allow to check whether the node still exist at mirror completion + */ + mirror_start(job_id, bs, target, + has_replaces ? replaces : NULL, job_flags, + speed, granularity, buf_size, sync, backing_mode, zero_target, + on_source_error, on_target_error, unmap, filter_node_name, + copy_mode, errp); +} + +void qmp_drive_mirror(DriveMirror *arg, Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *target_backing_bs, *target_bs; + AioContext *aio_context; + AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; + int flags; + int64_t size; + const char *format = arg->format; + bool zero_target; + int ret; + + bs = qmp_get_root_bs(arg->device, errp); + if (!bs) { + return; + } + + /* Early check to avoid creating target */ + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (!arg->has_mode) { + arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + if (!arg->has_format) { + format = (arg->mode == NEW_IMAGE_MODE_EXISTING + ? NULL : bs->drv->format_name); + } + + flags = bs->open_flags | BDRV_O_RDWR; + target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs)); + if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) { + arg->sync = MIRROR_SYNC_MODE_FULL; + } + if (arg->sync == MIRROR_SYNC_MODE_NONE) { + target_backing_bs = bs; + } + + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + goto out; + } + + if (arg->has_replaces) { + if (!arg->has_node_name) { + error_setg(errp, "a node-name must be provided when replacing a" + " named node of the graph"); + goto out; + } + } + + if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) { + backing_mode = MIRROR_SOURCE_BACKING_CHAIN; + } else { + backing_mode = MIRROR_OPEN_BACKING_CHAIN; + } + + /* Don't open backing image in create() */ + flags |= BDRV_O_NO_BACKING; + + if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs) + && arg->mode != NEW_IMAGE_MODE_EXISTING) + { + /* create new image w/o backing file */ + assert(format); + bdrv_img_create(arg->target, format, + NULL, NULL, NULL, size, flags, false, &local_err); + } else { + /* Implicit filters should not appear in the filename */ + BlockDriverState *explicit_backing = + bdrv_skip_implicit_filters(target_backing_bs); + + switch (arg->mode) { + case NEW_IMAGE_MODE_EXISTING: + break; + case NEW_IMAGE_MODE_ABSOLUTE_PATHS: + /* create new image with backing file */ + bdrv_refresh_filename(explicit_backing); + bdrv_img_create(arg->target, format, + explicit_backing->filename, + explicit_backing->drv->format_name, + NULL, size, flags, false, &local_err); + break; + default: + abort(); + } + } + + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + options = qdict_new(); + if (arg->has_node_name) { + qdict_put_str(options, "node-name", arg->node_name); + } + if (format) { + qdict_put_str(options, "driver", format); + } + + /* Mirroring takes care of copy-on-write using the source's backing + * file. + */ + target_bs = bdrv_open(arg->target, NULL, options, flags, errp); + if (!target_bs) { + goto out; + } + + zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && + (arg->mode == NEW_IMAGE_MODE_EXISTING || + !bdrv_has_zero_init(target_bs))); + + + /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + old_context = bdrv_get_aio_context(target_bs); + aio_context_release(aio_context); + aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); + aio_context_release(old_context); + return; + } + + aio_context_release(old_context); + aio_context_acquire(aio_context); + + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, + backing_mode, zero_target, + arg->has_speed, arg->speed, + arg->has_granularity, arg->granularity, + arg->has_buf_size, arg->buf_size, + arg->has_on_source_error, arg->on_source_error, + arg->has_on_target_error, arg->on_target_error, + arg->has_unmap, arg->unmap, + false, NULL, + arg->has_copy_mode, arg->copy_mode, + arg->has_auto_finalize, arg->auto_finalize, + arg->has_auto_dismiss, arg->auto_dismiss, + errp); + bdrv_unref(target_bs); +out: + aio_context_release(aio_context); +} + +void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + const char *device, const char *target, + bool has_replaces, const char *replaces, + MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_granularity, uint32_t granularity, + bool has_buf_size, int64_t buf_size, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + bool has_filter_node_name, + const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, + bool has_auto_finalize, bool auto_finalize, + bool has_auto_dismiss, bool auto_dismiss, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; + AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + bool zero_target; + int ret; + + bs = qmp_get_root_bs(device, errp); + if (!bs) { + return; + } + + target_bs = bdrv_lookup_bs(target, target, errp); + if (!target_bs) { + return; + } + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + + /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + + aio_context_release(old_context); + aio_context_acquire(aio_context); + + if (ret < 0) { + goto out; + } + + blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs, + has_replaces, replaces, sync, backing_mode, + zero_target, has_speed, speed, + has_granularity, granularity, + has_buf_size, buf_size, + has_on_source_error, on_source_error, + has_on_target_error, on_target_error, + true, true, + has_filter_node_name, filter_node_name, + has_copy_mode, copy_mode, + has_auto_finalize, auto_finalize, + has_auto_dismiss, auto_dismiss, + errp); +out: + aio_context_release(aio_context); +} + +/* Get a block job using its ID and acquire its AioContext */ +static BlockJob *find_block_job(const char *id, AioContext **aio_context, + Error **errp) +{ + BlockJob *job; + + assert(id != NULL); + + *aio_context = NULL; + + job = block_job_get(id); + + if (!job) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "Block job '%s' not found", id); + return NULL; + } + + *aio_context = blk_get_aio_context(job->blk); + aio_context_acquire(*aio_context); + + return job; +} + +void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context, errp); + + if (!job) { + return; + } + + block_job_set_speed(job, speed, errp); + aio_context_release(aio_context); +} + +void qmp_block_job_cancel(const char *device, + bool has_force, bool force, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context, errp); + + if (!job) { + return; + } + + if (!has_force) { + force = false; + } + + if (job_user_paused(&job->job) && !force) { + error_setg(errp, "The block job for device '%s' is currently paused", + device); + goto out; + } + + trace_qmp_block_job_cancel(job); + job_user_cancel(&job->job, force, errp); +out: + aio_context_release(aio_context); +} + +void qmp_block_job_pause(const char *device, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_block_job_pause(job); + job_user_pause(&job->job, errp); + aio_context_release(aio_context); +} + +void qmp_block_job_resume(const char *device, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_block_job_resume(job); + job_user_resume(&job->job, errp); + aio_context_release(aio_context); +} + +void qmp_block_job_complete(const char *device, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_block_job_complete(job); + job_complete(&job->job, errp); + aio_context_release(aio_context); +} + +void qmp_block_job_finalize(const char *id, Error **errp) +{ + AioContext *aio_context; + BlockJob *job = find_block_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_block_job_finalize(job); + job_ref(&job->job); + job_finalize(&job->job, errp); + + /* + * Job's context might have changed via job_finalize (and job_txn_apply + * automatically acquires the new one), so make sure we release the correct + * one. + */ + aio_context = blk_get_aio_context(job->blk); + job_unref(&job->job); + aio_context_release(aio_context); +} + +void qmp_block_job_dismiss(const char *id, Error **errp) +{ + AioContext *aio_context; + BlockJob *bjob = find_block_job(id, &aio_context, errp); + Job *job; + + if (!bjob) { + return; + } + + trace_qmp_block_job_dismiss(bjob); + job = &bjob->job; + job_dismiss(&job, errp); + aio_context_release(aio_context); +} + +void qmp_change_backing_file(const char *device, + const char *image_node_name, + const char *backing_file, + Error **errp) +{ + BlockDriverState *bs = NULL; + AioContext *aio_context; + BlockDriverState *image_bs = NULL; + Error *local_err = NULL; + bool ro; + int ret; + + bs = qmp_get_root_bs(device, errp); + if (!bs) { + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + if (!image_bs) { + error_setg(errp, "image file not found"); + goto out; + } + + if (bdrv_find_base(image_bs) == image_bs) { + error_setg(errp, "not allowing backing file change on an image " + "without a backing file"); + goto out; + } + + /* even though we are not necessarily operating on bs, we need it to + * determine if block ops are currently prohibited on the chain */ + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) { + goto out; + } + + /* final sanity check */ + if (!bdrv_chain_contains(bs, image_bs)) { + error_setg(errp, "'%s' and image file are not in the same chain", + device); + goto out; + } + + /* if not r/w, reopen to make r/w */ + ro = bdrv_is_read_only(image_bs); + + if (ro) { + if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { + goto out; + } + } + + ret = bdrv_change_backing_file(image_bs, backing_file, + image_bs->drv ? image_bs->drv->format_name : "", + false); + + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not change backing file to '%s'", + backing_file); + /* don't exit here, so we can try to restore open flags if + * appropriate */ + } + + if (ro) { + bdrv_reopen_set_read_only(image_bs, true, errp); + } + +out: + aio_context_release(aio_context); +} + +void qmp_blockdev_add(BlockdevOptions *options, Error **errp) +{ + BlockDriverState *bs; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + QDict *qdict; + + visit_type_BlockdevOptions(v, NULL, &options, &error_abort); + visit_complete(v, &obj); + qdict = qobject_to(QDict, obj); + + qdict_flatten(qdict); + + if (!qdict_get_try_str(qdict, "node-name")) { + error_setg(errp, "'node-name' must be specified for the root node"); + goto fail; + } + + bs = bds_tree_init(qdict, errp); + if (!bs) { + goto fail; + } + + bdrv_set_monitor_owned(bs); + +fail: + visit_free(v); +} + +void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp) +{ + BlockDriverState *bs; + AioContext *ctx; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + BlockReopenQueue *queue; + QDict *qdict; + + /* Check for the selected node name */ + if (!options->has_node_name) { + error_setg(errp, "Node name not specified"); + goto fail; + } + + bs = bdrv_find_node(options->node_name); + if (!bs) { + error_setg(errp, "Cannot find node named '%s'", options->node_name); + goto fail; + } + + /* Put all options in a QDict and flatten it */ + visit_type_BlockdevOptions(v, NULL, &options, &error_abort); + visit_complete(v, &obj); + qdict = qobject_to(QDict, obj); + + qdict_flatten(qdict); + + /* Perform the reopen operation */ + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, qdict, false); + bdrv_reopen_multiple(queue, errp); + bdrv_subtree_drained_end(bs); + aio_context_release(ctx); + +fail: + visit_free(v); +} + +void qmp_blockdev_del(const char *node_name, Error **errp) +{ + AioContext *aio_context; + BlockDriverState *bs; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Cannot find node %s", node_name); + return; + } + if (bdrv_has_blk(bs)) { + error_setg(errp, "Node %s is in use", node_name); + return; + } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { + goto out; + } + + if (!QTAILQ_IN_USE(bs, monitor_list)) { + error_setg(errp, "Node %s is not owned by the monitor", + bs->node_name); + goto out; + } + + if (bs->refcnt > 1) { + error_setg(errp, "Block device %s is in use", + bdrv_get_device_or_node_name(bs)); + goto out; + } + + QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list); + bdrv_unref(bs); + +out: + aio_context_release(aio_context); +} + +static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs, + const char *child_name) +{ + BdrvChild *child; + + QLIST_FOREACH(child, &parent_bs->children, next) { + if (strcmp(child->name, child_name) == 0) { + return child; + } + } + + return NULL; +} + +void qmp_x_blockdev_change(const char *parent, bool has_child, + const char *child, bool has_node, + const char *node, Error **errp) +{ + BlockDriverState *parent_bs, *new_bs = NULL; + BdrvChild *p_child; + + parent_bs = bdrv_lookup_bs(parent, parent, errp); + if (!parent_bs) { + return; + } + + if (has_child == has_node) { + if (has_child) { + error_setg(errp, "The parameters child and node are in conflict"); + } else { + error_setg(errp, "Either child or node must be specified"); + } + return; + } + + if (has_child) { + p_child = bdrv_find_child(parent_bs, child); + if (!p_child) { + error_setg(errp, "Node '%s' does not have child '%s'", + parent, child); + return; + } + bdrv_del_child(parent_bs, p_child, errp); + } + + if (has_node) { + new_bs = bdrv_find_node(node); + if (!new_bs) { + error_setg(errp, "Node '%s' not found", node); + return; + } + bdrv_add_child(parent_bs, new_bs, errp); + } +} + +BlockJobInfoList *qmp_query_block_jobs(Error **errp) +{ + BlockJobInfoList *head = NULL, **p_next = &head; + BlockJob *job; + + for (job = block_job_next(NULL); job; job = block_job_next(job)) { + BlockJobInfoList *elem; + AioContext *aio_context; + + if (block_job_is_internal(job)) { + continue; + } + elem = g_new0(BlockJobInfoList, 1); + aio_context = blk_get_aio_context(job->blk); + aio_context_acquire(aio_context); + elem->value = block_job_query(job, errp); + aio_context_release(aio_context); + if (!elem->value) { + g_free(elem); + qapi_free_BlockJobInfoList(head); + return NULL; + } + *p_next = elem; + p_next = &elem->next; + } + + return head; +} + +void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, + bool has_force, bool force, Error **errp) +{ + AioContext *old_context; + AioContext *new_context; + BlockDriverState *bs; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Cannot find node %s", node_name); + return; + } + + /* Protects against accidents. */ + if (!(has_force && force) && bdrv_has_blk(bs)) { + error_setg(errp, "Node %s is associated with a BlockBackend and could " + "be in use (use force=true to override this check)", + node_name); + return; + } + + if (iothread->type == QTYPE_QSTRING) { + IOThread *obj = iothread_by_id(iothread->u.s); + if (!obj) { + error_setg(errp, "Cannot find iothread %s", iothread->u.s); + return; + } + + new_context = iothread_get_aio_context(obj); + } else { + new_context = qemu_get_aio_context(); + } + + old_context = bdrv_get_aio_context(bs); + aio_context_acquire(old_context); + + bdrv_try_set_aio_context(bs, new_context, errp); + + aio_context_release(old_context); +} + +QemuOptsList qemu_common_drive_opts = { + .name = "drive", + .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head), + .desc = { + { + .name = "snapshot", + .type = QEMU_OPT_BOOL, + .help = "enable/disable snapshot mode", + },{ + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native, io_uring)", + },{ + .name = BDRV_OPT_CACHE_WB, + .type = QEMU_OPT_BOOL, + .help = "Enable writeback mode", + },{ + .name = "format", + .type = QEMU_OPT_STRING, + .help = "disk format (raw, qcow2, ...)", + },{ + .name = "rerror", + .type = QEMU_OPT_STRING, + .help = "read error action", + },{ + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", + },{ + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, + .help = "open drive file as read-only", + }, + + THROTTLE_OPTS, + + { + .name = "throttling.group", + .type = QEMU_OPT_STRING, + .help = "name of the block throttling group", + },{ + .name = "copy-on-read", + .type = QEMU_OPT_BOOL, + .help = "copy read data from backing file into image file", + },{ + .name = "detect-zeroes", + .type = QEMU_OPT_STRING, + .help = "try to optimize zero writes (off, on, unmap)", + },{ + .name = "stats-account-invalid", + .type = QEMU_OPT_BOOL, + .help = "whether to account for invalid I/O operations " + "in the statistics", + },{ + .name = "stats-account-failed", + .type = QEMU_OPT_BOOL, + .help = "whether to account for failed I/O operations " + "in the statistics", + }, + { /* end of list */ } + }, +}; + +QemuOptsList qemu_drive_opts = { + .name = "drive", + .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head), + .desc = { + /* + * no elements => accept any params + * validation will happen later + */ + { /* end of list */ } + }, +}; diff --git a/blockjob.c b/blockjob.c new file mode 100644 index 000000000..62f1a5373 --- /dev/null +++ b/blockjob.c @@ -0,0 +1,521 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block.h" +#include "block/blockjob_int.h" +#include "block/block_int.h" +#include "block/trace.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" +#include "qapi/qapi-events-block-core.h" +#include "qapi/qmp/qerror.h" +#include "qemu/coroutine.h" +#include "qemu/main-loop.h" +#include "qemu/timer.h" + +/* + * The block job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the block job list with block_job_get, and + * therefore needs consistency across block_job_get and the actual operation + * (e.g. block_job_set_speed). The consistency is achieved with + * aio_context_acquire/release. These functions are declared in blockjob.h. + * + * The second includes functions used by the block job drivers and sometimes + * by the core block layer. These do not care about locking, because the + * whole coroutine runs under the AioContext lock, and are declared in + * blockjob_int.h. + */ + +static bool is_block_job(Job *job) +{ + return job_type(job) == JOB_TYPE_BACKUP || + job_type(job) == JOB_TYPE_COMMIT || + job_type(job) == JOB_TYPE_MIRROR || + job_type(job) == JOB_TYPE_STREAM; +} + +BlockJob *block_job_next(BlockJob *bjob) +{ + Job *job = bjob ? &bjob->job : NULL; + + do { + job = job_next(job); + } while (job && !is_block_job(job)); + + return job ? container_of(job, BlockJob, job) : NULL; +} + +BlockJob *block_job_get(const char *id) +{ + Job *job = job_get(id); + + if (job && is_block_job(job)) { + return container_of(job, BlockJob, job); + } else { + return NULL; + } +} + +void block_job_free(Job *job) +{ + BlockJob *bjob = container_of(job, BlockJob, job); + + block_job_remove_all_bdrv(bjob); + blk_unref(bjob->blk); + error_free(bjob->blocker); +} + +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", job_type_str(&job->job), job->job.id); +} + +static void child_job_drained_begin(BdrvChild *c) +{ + BlockJob *job = c->opaque; + job_pause(&job->job); +} + +static bool child_job_drained_poll(BdrvChild *c) +{ + BlockJob *bjob = c->opaque; + Job *job = &bjob->job; + const BlockJobDriver *drv = block_job_driver(bjob); + + /* An inactive or completed job doesn't have any pending requests. Jobs + * with !job->busy are either already paused or have a pause point after + * being reentered, so no job driver code will run before they pause. */ + if (!job->busy || job_is_completed(job)) { + return false; + } + + /* Otherwise, assume that it isn't fully stopped yet, but allow the job to + * override this assumption. */ + if (drv->drained_poll) { + return drv->drained_poll(bjob); + } else { + return true; + } +} + +static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) +{ + BlockJob *job = c->opaque; + job_resume(&job->job); +} + +static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) { + return false; + } + } + return true; +} + +static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, + GSList **ignore) +{ + BlockJob *job = c->opaque; + GSList *l; + + for (l = job->nodes; l; l = l->next) { + BdrvChild *sibling = l->data; + if (g_slist_find(*ignore, sibling)) { + continue; + } + *ignore = g_slist_prepend(*ignore, sibling); + bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); + } + + job->job.aio_context = ctx; +} + +static const BdrvChildClass child_job = { + .get_parent_desc = child_job_get_parent_desc, + .drained_begin = child_job_drained_begin, + .drained_poll = child_job_drained_poll, + .drained_end = child_job_drained_end, + .can_set_aio_ctx = child_job_can_set_aio_ctx, + .set_aio_ctx = child_job_set_aio_ctx, + .stay_at_node = true, +}; + +void block_job_remove_all_bdrv(BlockJob *job) +{ + /* + * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), + * which will also traverse job->nodes, so consume the list one by + * one to make sure that such a concurrent access does not attempt + * to process an already freed BdrvChild. + */ + while (job->nodes) { + GSList *l = job->nodes; + BdrvChild *c = l->data; + + job->nodes = l->next; + + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); + + g_slist_free_1(l); + } +} + +bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +{ + GSList *el; + + for (el = job->nodes; el; el = el->next) { + BdrvChild *c = el->data; + if (c->bs == bs) { + return true; + } + } + + return false; +} + +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) +{ + BdrvChild *c; + bool need_context_ops; + + bdrv_ref(bs); + + need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context; + + if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { + aio_context_release(job->job.aio_context); + } + c = bdrv_root_attach_child(bs, name, &child_job, 0, + job->job.aio_context, perm, shared_perm, job, + errp); + if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { + aio_context_acquire(job->job.aio_context); + } + if (c == NULL) { + return -EPERM; + } + + job->nodes = g_slist_prepend(job->nodes, c); + bdrv_op_block_all(bs, job->blocker); + + return 0; +} + +static void block_job_on_idle(Notifier *n, void *opaque) +{ + aio_wait_kick(); +} + +bool block_job_is_internal(BlockJob *job) +{ + return (job->job.id == NULL); +} + +const BlockJobDriver *block_job_driver(BlockJob *job) +{ + return container_of(job->job.driver, BlockJobDriver, job_driver); +} + +/* Assumes the job_mutex is held */ +static bool job_timer_pending(Job *job) +{ + return timer_pending(&job->sleep_timer); +} + +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + int64_t old_speed = job->speed; + + if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp)) { + return; + } + if (speed < 0) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "speed", + "a non-negative value"); + return; + } + + ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME); + + job->speed = speed; + if (speed && speed <= old_speed) { + return; + } + + /* kick only if a timer is pending */ + job_enter_cond(&job->job, job_timer_pending); +} + +int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) +{ + if (!job->speed) { + return 0; + } + + return ratelimit_calculate_delay(&job->limit, n); +} + +BlockJobInfo *block_job_query(BlockJob *job, Error **errp) +{ + BlockJobInfo *info; + + if (block_job_is_internal(job)) { + error_setg(errp, "Cannot query QEMU internal jobs"); + return NULL; + } + info = g_new0(BlockJobInfo, 1); + info->type = g_strdup(job_type_str(&job->job)); + info->device = g_strdup(job->job.id); + info->busy = qatomic_read(&job->job.busy); + info->paused = job->job.pause_count > 0; + info->offset = job->job.progress.current; + info->len = job->job.progress.total; + info->speed = job->speed; + info->io_status = job->iostatus; + info->ready = job_is_ready(&job->job), + info->status = job->job.status; + info->auto_finalize = job->job.auto_finalize; + info->auto_dismiss = job->job.auto_dismiss; + info->has_error = job->job.ret != 0; + info->error = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL; + return info; +} + +static void block_job_iostatus_set_err(BlockJob *job, int error) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } +} + +static void block_job_event_cancelled(Notifier *n, void *opaque) +{ + BlockJob *job = opaque; + + if (block_job_is_internal(job)) { + return; + } + + qapi_event_send_block_job_cancelled(job_type(&job->job), + job->job.id, + job->job.progress.total, + job->job.progress.current, + job->speed); +} + +static void block_job_event_completed(Notifier *n, void *opaque) +{ + BlockJob *job = opaque; + const char *msg = NULL; + + if (block_job_is_internal(job)) { + return; + } + + if (job->job.ret < 0) { + msg = strerror(-job->job.ret); + } + + qapi_event_send_block_job_completed(job_type(&job->job), + job->job.id, + job->job.progress.total, + job->job.progress.current, + job->speed, + !!msg, + msg); +} + +static void block_job_event_pending(Notifier *n, void *opaque) +{ + BlockJob *job = opaque; + + if (block_job_is_internal(job)) { + return; + } + + qapi_event_send_block_job_pending(job_type(&job->job), + job->job.id); +} + +static void block_job_event_ready(Notifier *n, void *opaque) +{ + BlockJob *job = opaque; + + if (block_job_is_internal(job)) { + return; + } + + qapi_event_send_block_job_ready(job_type(&job->job), + job->job.id, + job->job.progress.total, + job->job.progress.current, + job->speed); +} + + +/* + * API for block job drivers and the block layer. These functions are + * declared in blockjob_int.h. + */ + +void *block_job_create(const char *job_id, const BlockJobDriver *driver, + JobTxn *txn, BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, + BlockCompletionFunc *cb, void *opaque, Error **errp) +{ + BlockBackend *blk; + BlockJob *job; + + if (job_id == NULL && !(flags & JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); + } + + blk = blk_new_with_bs(bs, perm, shared_perm, errp); + if (!blk) { + return NULL; + } + + job = job_create(job_id, &driver->job_driver, txn, blk_get_aio_context(blk), + flags, cb, opaque, errp); + if (job == NULL) { + blk_unref(blk); + return NULL; + } + + assert(is_block_job(&job->job)); + assert(job->job.driver->free == &block_job_free); + assert(job->job.driver->user_resume == &block_job_user_resume); + + job->blk = blk; + + job->finalize_cancelled_notifier.notify = block_job_event_cancelled; + job->finalize_completed_notifier.notify = block_job_event_completed; + job->pending_notifier.notify = block_job_event_pending; + job->ready_notifier.notify = block_job_event_ready; + job->idle_notifier.notify = block_job_on_idle; + + notifier_list_add(&job->job.on_finalize_cancelled, + &job->finalize_cancelled_notifier); + notifier_list_add(&job->job.on_finalize_completed, + &job->finalize_completed_notifier); + notifier_list_add(&job->job.on_pending, &job->pending_notifier); + notifier_list_add(&job->job.on_ready, &job->ready_notifier); + notifier_list_add(&job->job.on_idle, &job->idle_notifier); + + error_setg(&job->blocker, "block device is in use by block job: %s", + job_type_str(&job->job)); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); + + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + + /* Disable request queuing in the BlockBackend to avoid deadlocks on drain: + * The job reports that it's busy until it reaches a pause point. */ + blk_set_disable_request_queuing(blk, true); + blk_set_allow_aio_context_change(blk, true); + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (local_err) { + job_early_fail(&job->job); + error_propagate(errp, local_err); + return NULL; + } + } + + return job; +} + +void block_job_iostatus_reset(BlockJob *job) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + return; + } + assert(job->job.user_paused && job->job.pause_count > 0); + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + +void block_job_user_resume(Job *job) +{ + BlockJob *bjob = container_of(job, BlockJob, job); + block_job_iostatus_reset(bjob); +} + +BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, + int is_read, int error) +{ + BlockErrorAction action; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + case BLOCKDEV_ON_ERROR_AUTO: + action = (error == ENOSPC) ? + BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_STOP: + action = BLOCK_ERROR_ACTION_STOP; + break; + case BLOCKDEV_ON_ERROR_REPORT: + action = BLOCK_ERROR_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_IGNORE: + action = BLOCK_ERROR_ACTION_IGNORE; + break; + default: + abort(); + } + if (!block_job_is_internal(job)) { + qapi_event_send_block_job_error(job->job.id, + is_read ? IO_OPERATION_TYPE_READ : + IO_OPERATION_TYPE_WRITE, + action); + } + if (action == BLOCK_ERROR_ACTION_STOP) { + if (!job->job.user_paused) { + job_pause(&job->job); + /* make the pause user visible, which will be resumed from QMP. */ + job->job.user_paused = true; + } + block_job_iostatus_set_err(job, error); + } + return action; +} diff --git a/configure b/configure new file mode 100755 index 000000000..9d64e368d --- /dev/null +++ b/configure @@ -0,0 +1,7098 @@ +#!/bin/sh +# +# qemu configure script (c) 2003 Fabrice Bellard +# + +# Unset some variables known to interfere with behavior of common tools, +# just as autoconf does. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +# Don't allow CCACHE, if present, to use cached results of compile tests! +export CCACHE_RECACHE=yes + +# make source path absolute +source_path=$(cd "$(dirname -- "$0")"; pwd) + +if test "$PWD" = "$source_path" +then + echo "Using './build' as the directory for build output" + + MARKER=build/auto-created-by-configure + + if test -e build + then + if test -f $MARKER + then + rm -rf build + else + echo "ERROR: ./build dir already exists and was not previously created by configure" + exit 1 + fi + fi + + mkdir build + touch $MARKER + + cat > GNUmakefile <<'EOF' +# This file is auto-generated by configure to support in-source tree +# 'make' command invocation + +ifeq ($(MAKECMDGOALS),) +recurse: all +endif + +.NOTPARALLEL: % +%: force + @echo 'changing dir to build for $(MAKE) "$(MAKECMDGOALS)"...' + @$(MAKE) -C build -f Makefile $(MAKECMDGOALS) + @if test "$(MAKECMDGOALS)" = "distclean" && \ + test -e build/auto-created-by-configure ; \ + then \ + rm -rf build GNUmakefile ; \ + fi +force: ; +.PHONY: force +GNUmakefile: ; + +EOF + cd build + exec $source_path/configure "$@" +fi + +# Temporary directory used for files created while +# configure runs. Since it is in the build directory +# we can safely blow away any previous version of it +# (and we need not jump through hoops to try to delete +# it when configure exits.) +TMPDIR1="config-temp" +rm -rf "${TMPDIR1}" +mkdir -p "${TMPDIR1}" +if [ $? -ne 0 ]; then + echo "ERROR: failed to create temporary directory" + exit 1 +fi + +TMPB="qemu-conf" +TMPC="${TMPDIR1}/${TMPB}.c" +TMPO="${TMPDIR1}/${TMPB}.o" +TMPCXX="${TMPDIR1}/${TMPB}.cxx" +TMPE="${TMPDIR1}/${TMPB}.exe" +TMPTXT="${TMPDIR1}/${TMPB}.txt" + +rm -f config.log + +# Print a helpful header at the top of config.log +echo "# QEMU configure log $(date)" >> config.log +printf "# Configured with:" >> config.log +printf " '%s'" "$0" "$@" >> config.log +echo >> config.log +echo "#" >> config.log + +print_error() { + (echo + echo "ERROR: $1" + while test -n "$2"; do + echo " $2" + shift + done + echo) >&2 +} + +error_exit() { + print_error "$@" + exit 1 +} + +do_compiler() { + # Run the compiler, capturing its output to the log. First argument + # is compiler binary to execute. + local compiler="$1" + shift + if test -n "$BASH_VERSION"; then eval ' + echo >>config.log " +funcs: ${FUNCNAME[*]} +lines: ${BASH_LINENO[*]}" + '; fi + echo $compiler "$@" >> config.log + $compiler "$@" >> config.log 2>&1 || return $? + # Test passed. If this is an --enable-werror build, rerun + # the test with -Werror and bail out if it fails. This + # makes warning-generating-errors in configure test code + # obvious to developers. + if test "$werror" != "yes"; then + return 0 + fi + # Don't bother rerunning the compile if we were already using -Werror + case "$*" in + *-Werror*) + return 0 + ;; + esac + echo $compiler -Werror "$@" >> config.log + $compiler -Werror "$@" >> config.log 2>&1 && return $? + error_exit "configure test passed without -Werror but failed with -Werror." \ + "This is probably a bug in the configure script. The failing command" \ + "will be at the bottom of config.log." \ + "You can run configure with --disable-werror to bypass this check." +} + +do_cc() { + do_compiler "$cc" "$@" +} + +do_cxx() { + do_compiler "$cxx" "$@" +} + +# Append $2 to the variable named $1, with space separation +add_to() { + eval $1=\${$1:+\"\$$1 \"}\$2 +} + +update_cxxflags() { + # Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those + # options which some versions of GCC's C++ compiler complain about + # because they only make sense for C programs. + QEMU_CXXFLAGS="$QEMU_CXXFLAGS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS" + CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu99/-std=gnu++11/) + for arg in $QEMU_CFLAGS; do + case $arg in + -Wstrict-prototypes|-Wmissing-prototypes|-Wnested-externs|\ + -Wold-style-declaration|-Wold-style-definition|-Wredundant-decls) + ;; + *) + QEMU_CXXFLAGS=${QEMU_CXXFLAGS:+$QEMU_CXXFLAGS }$arg + ;; + esac + done +} + +compile_object() { + local_cflags="$1" + do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC +} + +compile_prog() { + local_cflags="$1" + local_ldflags="$2" + do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \ + $LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags +} + +# symbolically link $1 to $2. Portable version of "ln -sf". +symlink() { + rm -rf "$2" + mkdir -p "$(dirname "$2")" + ln -s "$1" "$2" +} + +# check whether a command is available to this shell (may be either an +# executable or a builtin) +has() { + type "$1" >/dev/null 2>&1 +} + +version_ge () { + local_ver1=`echo $1 | tr . ' '` + local_ver2=`echo $2 | tr . ' '` + while true; do + set x $local_ver1 + local_first=${2-0} + # 'shift 2' if $2 is set, or 'shift' if $2 is not set + shift ${2:+2} + local_ver1=$* + set x $local_ver2 + # the second argument finished, the first must be greater or equal + test $# = 1 && return 0 + test $local_first -lt $2 && return 1 + test $local_first -gt $2 && return 0 + shift ${2:+2} + local_ver2=$* + done +} + +have_backend () { + echo "$trace_backends" | grep "$1" >/dev/null +} + +glob() { + eval test -z '"${1#'"$2"'}"' +} + +ld_has() { + $ld --help 2>/dev/null | grep ".$1" >/dev/null 2>&1 +} + +if printf %s\\n "$source_path" "$PWD" | grep -q "[[:space:]:]"; +then + error_exit "main directory cannot contain spaces nor colons" +fi + +# default parameters +cpu="" +iasl="iasl" +interp_prefix="/usr/gnemul/qemu-%M" +static="no" +cross_prefix="" +audio_drv_list="" +block_drv_rw_whitelist="" +block_drv_ro_whitelist="" +host_cc="cc" +audio_win_int="" +libs_qga="" +debug_info="yes" +stack_protector="" +safe_stack="" +use_containers="yes" +gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") +glib_has_gslice="no" + +if test -e "$source_path/.git" +then + git_update=yes + git_submodules="ui/keycodemapdb" + git_submodules="$git_submodules tests/fp/berkeley-testfloat-3" + git_submodules="$git_submodules tests/fp/berkeley-softfloat-3" +else + git_update=no + git_submodules="" + + # if ! test -f "$source_path/ui/keycodemapdb/README" + # then + # echo + # echo "ERROR: missing file $source_path/ui/keycodemapdb/README" + # echo + # echo "This is not a GIT checkout but module content appears to" + # echo "be missing. Do not use 'git archive' or GitHub download links" + # echo "to acquire QEMU source archives. Non-GIT builds are only" + # echo "supported with source archives linked from:" + # echo + # echo " https://www.qemu.org/download/#source" + # echo + # echo "Developers working with GIT can use scripts/archive-source.sh" + # echo "if they need to create valid source archives." + # echo + # exit 1 + # fi +fi +git="git" + +# Don't accept a target_list environment variable. +unset target_list +unset target_list_exclude + +# Default value for a variable defining feature "foo". +# * foo="no" feature will only be used if --enable-foo arg is given +# * foo="" feature will be searched for, and if found, will be used +# unless --disable-foo is given +# * foo="yes" this value will only be set by --enable-foo flag. +# feature will searched for, +# if not found, configure exits with error +# +# Always add --enable-foo and --disable-foo command line args. +# Distributions want to ensure that several features are compiled in, and it +# is impossible without a --enable-foo that exits if a feature is not found. + +brlapi="" +curl="" +iconv="auto" +curses="auto" +docs="auto" +fdt="auto" +netmap="no" +sdl="auto" +sdl_image="auto" +virtiofsd="auto" +virtfs="" +libudev="auto" +mpath="auto" +vnc="enabled" +sparse="auto" +vde="" +vnc_sasl="auto" +vnc_jpeg="auto" +vnc_png="auto" +xkbcommon="auto" +xen="" +xen_ctrl_version="" +xen_pci_passthrough="auto" +linux_aio="" +linux_io_uring="" +cap_ng="" +attr="" +libattr="" +xfs="" +tcg="enabled" +membarrier="" +vhost_net="" +vhost_crypto="" +vhost_scsi="" +vhost_vsock="" +vhost_user="no" +vhost_user_blk_server="auto" +vhost_user_fs="" +kvm="auto" +hax="auto" +hvf="auto" +whpx="auto" +rdma="" +pvrdma="" +gprof="no" +debug_tcg="no" +debug="no" +sanitizers="no" +tsan="no" +fortify_source="" +strip_opt="yes" +tcg_interpreter="no" +bigendian="no" +mingw32="no" +gcov="no" +EXESUF="" +HOST_DSOSUF=".so" +modules="no" +module_upgrades="no" +prefix="/usr/local" +qemu_suffix="qemu" +slirp="auto" +oss_lib="" +bsd="no" +linux="no" +solaris="no" +profiler="no" +cocoa="auto" +softmmu="yes" +linux_user="no" +bsd_user="no" +blobs="true" +pkgversion="" +pie="" +qom_cast_debug="yes" +trace_backends="log" +trace_file="trace" +spice="" +rbd="" +smartcard="" +u2f="auto" +libusb="" +usb_redir="" +opengl="" +opengl_dmabuf="no" +cpuid_h="no" +avx2_opt="" +capstone="auto" +lzo="" +snappy="" +bzip2="" +lzfse="" +zstd="" +guest_agent="" +guest_agent_with_vss="no" +guest_agent_ntddscsi="no" +guest_agent_msi="" +vss_win32_sdk="" +win_sdk="no" +want_tools="" +libiscsi="" +libnfs="" +coroutine="" +coroutine_pool="" +debug_stack_usage="no" +crypto_afalg="no" +seccomp="" +glusterfs="" +glusterfs_xlator_opt="no" +glusterfs_discard="no" +glusterfs_fallocate="no" +glusterfs_zerofill="no" +glusterfs_ftruncate_has_stat="no" +glusterfs_iocb_has_stat="no" +gtk="" +gtk_gl="no" +tls_priority="NORMAL" +gnutls="" +nettle="" +nettle_xts="no" +gcrypt="" +gcrypt_hmac="no" +gcrypt_xts="no" +qemu_private_xts="yes" +auth_pam="" +vte="" +virglrenderer="" +tpm="" +libssh="" +live_block_migration="yes" +numa="" +tcmalloc="no" +jemalloc="no" +replication="yes" +bochs="yes" +cloop="yes" +dmg="yes" +qcow1="yes" +vdi="yes" +vvfat="yes" +qed="yes" +parallels="yes" +sheepdog="no" +libxml2="" +debug_mutex="no" +libpmem="" +default_devices="yes" +plugins="no" +fuzzing="no" +rng_none="no" +secret_keyring="" +libdaxctl="" +meson="" +ninja="" +skip_meson=no +gettext="" + +bogus_os="no" +malloc_trim="auto" + +# parse CC options first +for opt do + optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)') + case "$opt" in + --cross-prefix=*) cross_prefix="$optarg" + ;; + --cc=*) CC="$optarg" + ;; + --cxx=*) CXX="$optarg" + ;; + --cpu=*) cpu="$optarg" + ;; + --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg" + QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg" + ;; + --extra-cxxflags=*) QEMU_CXXFLAGS="$QEMU_CXXFLAGS $optarg" + ;; + --extra-ldflags=*) QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg" + EXTRA_LDFLAGS="$optarg" + ;; + --enable-debug-info) debug_info="yes" + ;; + --disable-debug-info) debug_info="no" + ;; + --cross-cc-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --cross-cc-FOO option" + ;; + --cross-cc-cflags-*) cc_arch=${opt#--cross-cc-flags-}; cc_arch=${cc_arch%%=*} + eval "cross_cc_cflags_${cc_arch}=\$optarg" + cross_cc_vars="$cross_cc_vars cross_cc_cflags_${cc_arch}" + ;; + --cross-cc-*) cc_arch=${opt#--cross-cc-}; cc_arch=${cc_arch%%=*} + cc_archs="$cc_archs $cc_arch" + eval "cross_cc_${cc_arch}=\$optarg" + cross_cc_vars="$cross_cc_vars cross_cc_${cc_arch}" + ;; + esac +done +# OS specific +# Using uname is really, really broken. Once we have the right set of checks +# we can eliminate its usage altogether. + +# Preferred compiler: +# ${CC} (if set) +# ${cross_prefix}gcc (if cross-prefix specified) +# system compiler +if test -z "${CC}${cross_prefix}"; then + cc="$host_cc" +else + cc="${CC-${cross_prefix}gcc}" +fi + +if test -z "${CXX}${cross_prefix}"; then + cxx="c++" +else + cxx="${CXX-${cross_prefix}g++}" +fi + +ar="${AR-${cross_prefix}ar}" +as="${AS-${cross_prefix}as}" +ccas="${CCAS-$cc}" +cpp="${CPP-$cc -E}" +objcopy="${OBJCOPY-${cross_prefix}objcopy}" +ld="${LD-${cross_prefix}ld}" +ranlib="${RANLIB-${cross_prefix}ranlib}" +nm="${NM-${cross_prefix}nm}" +strip="${STRIP-${cross_prefix}strip}" +windres="${WINDRES-${cross_prefix}windres}" +pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}" +query_pkg_config() { + "${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@" +} +pkg_config=query_pkg_config +sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" + +# If the user hasn't specified ARFLAGS, default to 'rv', just as make does. +ARFLAGS="${ARFLAGS-rv}" + +# default flags for all hosts +# We use -fwrapv to tell the compiler that we require a C dialect where +# left shift of signed integers is well defined and has the expected +# 2s-complement style results. (Both clang and gcc agree that it +# provides these semantics.) +QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS" +QEMU_CFLAGS="-Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS" +QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS" +QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS" + +# Flags that are needed during configure but later taken care of by Meson +CONFIGURE_CFLAGS="-std=gnu99 -Wall" +CONFIGURE_LDFLAGS= + + +check_define() { +cat > $TMPC < $TMPC < +int main(void) { return 0; } +EOF + compile_object +} + +write_c_skeleton() { + cat > $TMPC < $TMPC < +#include +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; } +EOF +} + +if check_define __linux__ ; then + targetos="Linux" +elif check_define _WIN32 ; then + targetos='MINGW32' +elif check_define __OpenBSD__ ; then + targetos='OpenBSD' +elif check_define __sun__ ; then + targetos='SunOS' +elif check_define __HAIKU__ ; then + targetos='Haiku' +elif check_define __FreeBSD__ ; then + targetos='FreeBSD' +elif check_define __FreeBSD_kernel__ && check_define __GLIBC__; then + targetos='GNU/kFreeBSD' +elif check_define __DragonFly__ ; then + targetos='DragonFly' +elif check_define __NetBSD__; then + targetos='NetBSD' +elif check_define __APPLE__; then + targetos='Darwin' +else + # This is a fatal error, but don't report it yet, because we + # might be going to just print the --help text, or it might + # be the result of a missing compiler. + targetos='bogus' + bogus_os='yes' +fi + +# Some host OSes need non-standard checks for which CPU to use. +# Note that these checks are broken for cross-compilation: if you're +# cross-compiling to one of these OSes then you'll need to specify +# the correct CPU with the --cpu option. +case $targetos in +Darwin) + # on Leopard most of the system is 32-bit, so we have to ask the kernel if we can + # run 64-bit userspace code. + # If the user didn't specify a CPU explicitly and the kernel says this is + # 64 bit hw, then assume x86_64. Otherwise fall through to the usual detection code. + if test -z "$cpu" && test "$(sysctl -n hw.optional.x86_64)" = "1"; then + cpu="x86_64" + fi + HOST_DSOSUF=".dylib" + ;; +SunOS) + # $(uname -m) returns i86pc even on an x86_64 box, so default based on isainfo + if test -z "$cpu" && test "$(isainfo -k)" = "amd64"; then + cpu="x86_64" + fi +esac + +if test ! -z "$cpu" ; then + # command line argument + : +elif check_define __i386__ ; then + cpu="i386" +elif check_define __x86_64__ ; then + if check_define __ILP32__ ; then + cpu="x32" + else + cpu="x86_64" + fi +elif check_define __sparc__ ; then + if check_define __arch64__ ; then + cpu="sparc64" + else + cpu="sparc" + fi +elif check_define _ARCH_PPC ; then + if check_define _ARCH_PPC64 ; then + if check_define _LITTLE_ENDIAN ; then + cpu="ppc64le" + else + cpu="ppc64" + fi + else + cpu="ppc" + fi +elif check_define __mips__ ; then + cpu="mips" +elif check_define __s390__ ; then + if check_define __s390x__ ; then + cpu="s390x" + else + cpu="s390" + fi +elif check_define __riscv ; then + if check_define _LP64 ; then + cpu="riscv64" + else + cpu="riscv32" + fi +elif check_define __arm__ ; then + cpu="arm" +elif check_define __aarch64__ ; then + cpu="aarch64" +else + cpu=$(uname -m) +fi + +ARCH= +# Normalise host CPU name and set ARCH. +# Note that this case should only have supported host CPUs, not guests. +case "$cpu" in + ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64) + ;; + ppc64le) + ARCH="ppc64" + ;; + i386|i486|i586|i686|i86pc|BePC) + cpu="i386" + ;; + x86_64|amd64) + cpu="x86_64" + ;; + armv*b|armv*l|arm) + cpu="arm" + ;; + aarch64) + cpu="aarch64" + ;; + mips*) + cpu="mips" + ;; + sparc|sun4[cdmuv]) + cpu="sparc" + ;; + *) + # This will result in either an error or falling back to TCI later + ARCH=unknown + ;; +esac +if test -z "$ARCH"; then + ARCH="$cpu" +fi + +# OS specific + +case $targetos in +MINGW32*) + mingw32="yes" + audio_possible_drivers="dsound sdl" + if check_include dsound.h; then + audio_drv_list="dsound" + else + audio_drv_list="" + fi + supported_os="yes" + pie="no" +;; +GNU/kFreeBSD) + bsd="yes" + audio_drv_list="oss try-sdl" + audio_possible_drivers="oss sdl pa" +;; +FreeBSD) + bsd="yes" + make="${MAKE-gmake}" + audio_drv_list="oss try-sdl" + audio_possible_drivers="oss sdl pa" + # needed for kinfo_getvmmap(3) in libutil.h + netmap="" # enable netmap autodetect +;; +DragonFly) + bsd="yes" + make="${MAKE-gmake}" + audio_drv_list="oss try-sdl" + audio_possible_drivers="oss sdl pa" +;; +NetBSD) + bsd="yes" + make="${MAKE-gmake}" + audio_drv_list="oss try-sdl" + audio_possible_drivers="oss sdl" + oss_lib="-lossaudio" +;; +OpenBSD) + bsd="yes" + make="${MAKE-gmake}" + audio_drv_list="try-sdl" + audio_possible_drivers="sdl" +;; +Darwin) + bsd="yes" + darwin="yes" + if [ "$cpu" = "x86_64" ] ; then + QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" + QEMU_LDFLAGS="-arch x86_64 $QEMU_LDFLAGS" + fi + cocoa="enabled" + audio_drv_list="coreaudio try-sdl" + audio_possible_drivers="coreaudio sdl" + QEMU_LDFLAGS="-framework CoreFoundation -framework IOKit $QEMU_LDFLAGS" + # Disable attempts to use ObjectiveC features in os/object.h since they + # won't work when we're compiling with gcc as a C compiler. + QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS" +;; +SunOS) + solaris="yes" + make="${MAKE-gmake}" + smbd="${SMBD-/usr/sfw/sbin/smbd}" + if test -f /usr/include/sys/soundcard.h ; then + audio_drv_list="oss try-sdl" + fi + audio_possible_drivers="oss sdl" +# needed for CMSG_ macros in sys/socket.h + QEMU_CFLAGS="-D_XOPEN_SOURCE=600 $QEMU_CFLAGS" +# needed for TIOCWIN* defines in termios.h + QEMU_CFLAGS="-D__EXTENSIONS__ $QEMU_CFLAGS" +;; +Haiku) + haiku="yes" + QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE $QEMU_CFLAGS" +;; +Linux) + audio_drv_list="try-pa oss" + audio_possible_drivers="oss alsa sdl pa" + linux="yes" + linux_user="yes" + vhost_user="yes" +;; +esac + +if [ "$bsd" = "yes" ] ; then + if [ "$darwin" != "yes" ] ; then + bsd_user="yes" + fi +fi + +: ${make=${MAKE-make}} + +# We prefer python 3.x. A bare 'python' is traditionally +# python 2.x, but some distros have it as python 3.x, so +# we check that too +python= +explicit_python=no +for binary in "${PYTHON-python3}" python +do + if has "$binary" + then + python=$(command -v "$binary") + break + fi +done + + +# Check for ancillary tools used in testing +genisoimage= +for binary in genisoimage mkisofs +do + if has $binary + then + genisoimage=$(command -v "$binary") + break + fi +done + +: ${smbd=${SMBD-/usr/sbin/smbd}} + +# Default objcc to clang if available, otherwise use CC +if has clang; then + objcc=clang +else + objcc="$cc" +fi + +if test "$mingw32" = "yes" ; then + EXESUF=".exe" + HOST_DSOSUF=".dll" + # MinGW needs -mthreads for TLS and macro _MT. + CONFIGURE_CFLAGS="-mthreads $CONFIGURE_CFLAGS" + write_c_skeleton; + prefix="/qemu" + qemu_suffix="" + libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -lwininet -liphlpapi -lnetapi32 $libs_qga" +fi + +werror="" + +for opt do + optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)') + case "$opt" in + --help|-h) show_help=yes + ;; + --version|-V) exec cat $source_path/VERSION + ;; + --prefix=*) prefix="$optarg" + ;; + --interp-prefix=*) interp_prefix="$optarg" + ;; + --cross-prefix=*) + ;; + --cc=*) + ;; + --host-cc=*) host_cc="$optarg" + ;; + --cxx=*) + ;; + --iasl=*) iasl="$optarg" + ;; + --objcc=*) objcc="$optarg" + ;; + --make=*) make="$optarg" + ;; + --install=*) + ;; + --python=*) python="$optarg" ; explicit_python=yes + ;; + --sphinx-build=*) sphinx_build="$optarg" + ;; + --skip-meson) skip_meson=yes + ;; + --meson=*) meson="$optarg" + ;; + --ninja=*) ninja="$optarg" + ;; + --smbd=*) smbd="$optarg" + ;; + --extra-cflags=*) + ;; + --extra-cxxflags=*) + ;; + --extra-ldflags=*) + ;; + --enable-debug-info) + ;; + --disable-debug-info) + ;; + --cross-cc-*) + ;; + --enable-modules) + modules="yes" + ;; + --disable-modules) + modules="no" + ;; + --disable-module-upgrades) module_upgrades="no" + ;; + --enable-module-upgrades) module_upgrades="yes" + ;; + --cpu=*) + ;; + --target-list=*) target_list="$optarg" + if test "$target_list_exclude"; then + error_exit "Can't mix --target-list with --target-list-exclude" + fi + ;; + --target-list-exclude=*) target_list_exclude="$optarg" + if test "$target_list"; then + error_exit "Can't mix --target-list-exclude with --target-list" + fi + ;; + --enable-trace-backends=*) trace_backends="$optarg" + ;; + # XXX: backwards compatibility + --enable-trace-backend=*) trace_backends="$optarg" + ;; + --with-trace-file=*) trace_file="$optarg" + ;; + --with-default-devices) default_devices="yes" + ;; + --without-default-devices) default_devices="no" + ;; + --enable-gprof) gprof="yes" + ;; + --enable-gcov) gcov="yes" + ;; + --static) + static="yes" + QEMU_PKG_CONFIG_FLAGS="--static $QEMU_PKG_CONFIG_FLAGS" + ;; + --mandir=*) mandir="$optarg" + ;; + --bindir=*) bindir="$optarg" + ;; + --libdir=*) libdir="$optarg" + ;; + --libexecdir=*) libexecdir="$optarg" + ;; + --includedir=*) includedir="$optarg" + ;; + --datadir=*) datadir="$optarg" + ;; + --with-suffix=*) qemu_suffix="$optarg" + ;; + --docdir=*) docdir="$optarg" + ;; + --localedir=*) localedir="$optarg" + ;; + --sysconfdir=*) sysconfdir="$optarg" + ;; + --localstatedir=*) local_statedir="$optarg" + ;; + --firmwarepath=*) firmwarepath="$optarg" + ;; + --host=*|--build=*|\ + --disable-dependency-tracking|\ + --sbindir=*|--sharedstatedir=*|\ + --oldincludedir=*|--datarootdir=*|--infodir=*|\ + --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*) + # These switches are silently ignored, for compatibility with + # autoconf-generated configure scripts. This allows QEMU's + # configure to be used by RPM and similar macros that set + # lots of directory switches by default. + ;; + --disable-sdl) sdl="disabled" + ;; + --enable-sdl) sdl="enabled" + ;; + --disable-sdl-image) sdl_image="disabled" + ;; + --enable-sdl-image) sdl_image="enabled" + ;; + --disable-qom-cast-debug) qom_cast_debug="no" + ;; + --enable-qom-cast-debug) qom_cast_debug="yes" + ;; + --disable-virtfs) virtfs="no" + ;; + --enable-virtfs) virtfs="yes" + ;; + --disable-libudev) libudev="disabled" + ;; + --enable-libudev) libudev="enabled" + ;; + --disable-virtiofsd) virtiofsd="disabled" + ;; + --enable-virtiofsd) virtiofsd="enabled" + ;; + --disable-mpath) mpath="disabled" + ;; + --enable-mpath) mpath="enabled" + ;; + --disable-vnc) vnc="disabled" + ;; + --enable-vnc) vnc="enabled" + ;; + --disable-gettext) gettext="false" + ;; + --enable-gettext) gettext="true" + ;; + --oss-lib=*) oss_lib="$optarg" + ;; + --audio-drv-list=*) audio_drv_list="$optarg" + ;; + --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') + ;; + --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') + ;; + --enable-debug-tcg) debug_tcg="yes" + ;; + --disable-debug-tcg) debug_tcg="no" + ;; + --enable-debug) + # Enable debugging options that aren't excessively noisy + debug_tcg="yes" + debug_mutex="yes" + debug="yes" + strip_opt="no" + fortify_source="no" + ;; + --enable-sanitizers) sanitizers="yes" + ;; + --disable-sanitizers) sanitizers="no" + ;; + --enable-tsan) tsan="yes" + ;; + --disable-tsan) tsan="no" + ;; + --enable-sparse) sparse="enabled" + ;; + --disable-sparse) sparse="disabled" + ;; + --disable-strip) strip_opt="no" + ;; + --disable-vnc-sasl) vnc_sasl="disabled" + ;; + --enable-vnc-sasl) vnc_sasl="enabled" + ;; + --disable-vnc-jpeg) vnc_jpeg="disabled" + ;; + --enable-vnc-jpeg) vnc_jpeg="enabled" + ;; + --disable-vnc-png) vnc_png="disabled" + ;; + --enable-vnc-png) vnc_png="enabled" + ;; + --disable-slirp) slirp="disabled" + ;; + --enable-slirp=git) slirp="internal" + ;; + --enable-slirp=system) slirp="system" + ;; + --disable-vde) vde="no" + ;; + --enable-vde) vde="yes" + ;; + --disable-netmap) netmap="no" + ;; + --enable-netmap) netmap="yes" + ;; + --disable-xen) xen="disabled" + ;; + --enable-xen) xen="enabled" + ;; + --disable-xen-pci-passthrough) xen_pci_passthrough="disabled" + ;; + --enable-xen-pci-passthrough) xen_pci_passthrough="enabled" + ;; + --disable-brlapi) brlapi="no" + ;; + --enable-brlapi) brlapi="yes" + ;; + --disable-kvm) kvm="disabled" + ;; + --enable-kvm) kvm="enabled" + ;; + --disable-hax) hax="disabled" + ;; + --enable-hax) hax="enabled" + ;; + --disable-hvf) hvf="disabled" + ;; + --enable-hvf) hvf="enabled" + ;; + --disable-whpx) whpx="disabled" + ;; + --enable-whpx) whpx="enabled" + ;; + --disable-tcg-interpreter) tcg_interpreter="no" + ;; + --enable-tcg-interpreter) tcg_interpreter="yes" + ;; + --disable-cap-ng) cap_ng="no" + ;; + --enable-cap-ng) cap_ng="yes" + ;; + --disable-tcg) tcg="disabled" + ;; + --enable-tcg) tcg="enabled" + ;; + --disable-malloc-trim) malloc_trim="disabled" + ;; + --enable-malloc-trim) malloc_trim="enabled" + ;; + --disable-spice) spice="no" + ;; + --enable-spice) spice="yes" + ;; + --disable-libiscsi) libiscsi="no" + ;; + --enable-libiscsi) libiscsi="yes" + ;; + --disable-libnfs) libnfs="no" + ;; + --enable-libnfs) libnfs="yes" + ;; + --enable-profiler) profiler="yes" + ;; + --disable-cocoa) cocoa="disabled" + ;; + --enable-cocoa) + cocoa="enabled" ; + audio_drv_list="coreaudio $(echo $audio_drv_list | sed s,coreaudio,,g)" + ;; + --disable-system) softmmu="no" + ;; + --enable-system) softmmu="yes" + ;; + --disable-user) + linux_user="no" ; + bsd_user="no" ; + ;; + --enable-user) ;; + --disable-linux-user) linux_user="no" + ;; + --enable-linux-user) linux_user="yes" + ;; + --disable-bsd-user) bsd_user="no" + ;; + --enable-bsd-user) bsd_user="yes" + ;; + --enable-pie) pie="yes" + ;; + --disable-pie) pie="no" + ;; + --enable-werror) werror="yes" + ;; + --disable-werror) werror="no" + ;; + --enable-stack-protector) stack_protector="yes" + ;; + --disable-stack-protector) stack_protector="no" + ;; + --enable-safe-stack) safe_stack="yes" + ;; + --disable-safe-stack) safe_stack="no" + ;; + --disable-curses) curses="disabled" + ;; + --enable-curses) curses="enabled" + ;; + --disable-iconv) iconv="disabled" + ;; + --enable-iconv) iconv="enabled" + ;; + --disable-curl) curl="no" + ;; + --enable-curl) curl="yes" + ;; + --disable-fdt) fdt="disabled" + ;; + --enable-fdt) fdt="enabled" + ;; + --enable-fdt=git) fdt="internal" + ;; + --enable-fdt=system) fdt="system" + ;; + --disable-linux-aio) linux_aio="no" + ;; + --enable-linux-aio) linux_aio="yes" + ;; + --disable-linux-io-uring) linux_io_uring="no" + ;; + --enable-linux-io-uring) linux_io_uring="yes" + ;; + --disable-attr) attr="no" + ;; + --enable-attr) attr="yes" + ;; + --disable-membarrier) membarrier="no" + ;; + --enable-membarrier) membarrier="yes" + ;; + --disable-blobs) blobs="false" + ;; + --with-pkgversion=*) pkgversion="$optarg" + ;; + --with-coroutine=*) coroutine="$optarg" + ;; + --disable-coroutine-pool) coroutine_pool="no" + ;; + --enable-coroutine-pool) coroutine_pool="yes" + ;; + --enable-debug-stack-usage) debug_stack_usage="yes" + ;; + --enable-crypto-afalg) crypto_afalg="yes" + ;; + --disable-crypto-afalg) crypto_afalg="no" + ;; + --disable-docs) docs="disabled" + ;; + --enable-docs) docs="enabled" + ;; + --disable-vhost-net) vhost_net="no" + ;; + --enable-vhost-net) vhost_net="yes" + ;; + --disable-vhost-crypto) vhost_crypto="no" + ;; + --enable-vhost-crypto) vhost_crypto="yes" + ;; + --disable-vhost-scsi) vhost_scsi="no" + ;; + --enable-vhost-scsi) vhost_scsi="yes" + ;; + --disable-vhost-vsock) vhost_vsock="no" + ;; + --enable-vhost-vsock) vhost_vsock="yes" + ;; + --disable-vhost-user-blk-server) vhost_user_blk_server="disabled" + ;; + --enable-vhost-user-blk-server) vhost_user_blk_server="enabled" + ;; + --disable-vhost-user-fs) vhost_user_fs="no" + ;; + --enable-vhost-user-fs) vhost_user_fs="yes" + ;; + --disable-opengl) opengl="no" + ;; + --enable-opengl) opengl="yes" + ;; + --disable-rbd) rbd="no" + ;; + --enable-rbd) rbd="yes" + ;; + --disable-xfsctl) xfs="no" + ;; + --enable-xfsctl) xfs="yes" + ;; + --disable-smartcard) smartcard="no" + ;; + --enable-smartcard) smartcard="yes" + ;; + --disable-u2f) u2f="disabled" + ;; + --enable-u2f) u2f="enabled" + ;; + --disable-libusb) libusb="no" + ;; + --enable-libusb) libusb="yes" + ;; + --disable-usb-redir) usb_redir="no" + ;; + --enable-usb-redir) usb_redir="yes" + ;; + --disable-zlib-test) + ;; + --disable-lzo) lzo="no" + ;; + --enable-lzo) lzo="yes" + ;; + --disable-snappy) snappy="no" + ;; + --enable-snappy) snappy="yes" + ;; + --disable-bzip2) bzip2="no" + ;; + --enable-bzip2) bzip2="yes" + ;; + --enable-lzfse) lzfse="yes" + ;; + --disable-lzfse) lzfse="no" + ;; + --disable-zstd) zstd="no" + ;; + --enable-zstd) zstd="yes" + ;; + --enable-guest-agent) guest_agent="yes" + ;; + --disable-guest-agent) guest_agent="no" + ;; + --enable-guest-agent-msi) guest_agent_msi="yes" + ;; + --disable-guest-agent-msi) guest_agent_msi="no" + ;; + --with-vss-sdk) vss_win32_sdk="" + ;; + --with-vss-sdk=*) vss_win32_sdk="$optarg" + ;; + --without-vss-sdk) vss_win32_sdk="no" + ;; + --with-win-sdk) win_sdk="" + ;; + --with-win-sdk=*) win_sdk="$optarg" + ;; + --without-win-sdk) win_sdk="no" + ;; + --enable-tools) want_tools="yes" + ;; + --disable-tools) want_tools="no" + ;; + --enable-seccomp) seccomp="yes" + ;; + --disable-seccomp) seccomp="no" + ;; + --disable-glusterfs) glusterfs="no" + ;; + --disable-avx2) avx2_opt="no" + ;; + --enable-avx2) avx2_opt="yes" + ;; + --disable-avx512f) avx512f_opt="no" + ;; + --enable-avx512f) avx512f_opt="yes" + ;; + + --enable-glusterfs) glusterfs="yes" + ;; + --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) + echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 + ;; + --enable-vhdx|--disable-vhdx) + echo "$0: $opt is obsolete, VHDX driver is always built" >&2 + ;; + --enable-uuid|--disable-uuid) + echo "$0: $opt is obsolete, UUID support is always built" >&2 + ;; + --disable-gtk) gtk="no" + ;; + --enable-gtk) gtk="yes" + ;; + --tls-priority=*) tls_priority="$optarg" + ;; + --disable-gnutls) gnutls="no" + ;; + --enable-gnutls) gnutls="yes" + ;; + --disable-nettle) nettle="no" + ;; + --enable-nettle) nettle="yes" + ;; + --disable-gcrypt) gcrypt="no" + ;; + --enable-gcrypt) gcrypt="yes" + ;; + --disable-auth-pam) auth_pam="no" + ;; + --enable-auth-pam) auth_pam="yes" + ;; + --enable-rdma) rdma="yes" + ;; + --disable-rdma) rdma="no" + ;; + --enable-pvrdma) pvrdma="yes" + ;; + --disable-pvrdma) pvrdma="no" + ;; + --disable-vte) vte="no" + ;; + --enable-vte) vte="yes" + ;; + --disable-virglrenderer) virglrenderer="no" + ;; + --enable-virglrenderer) virglrenderer="yes" + ;; + --disable-tpm) tpm="no" + ;; + --enable-tpm) tpm="yes" + ;; + --disable-libssh) libssh="no" + ;; + --enable-libssh) libssh="yes" + ;; + --disable-live-block-migration) live_block_migration="no" + ;; + --enable-live-block-migration) live_block_migration="yes" + ;; + --disable-numa) numa="no" + ;; + --enable-numa) numa="yes" + ;; + --disable-libxml2) libxml2="no" + ;; + --enable-libxml2) libxml2="yes" + ;; + --disable-tcmalloc) tcmalloc="no" + ;; + --enable-tcmalloc) tcmalloc="yes" + ;; + --disable-jemalloc) jemalloc="no" + ;; + --enable-jemalloc) jemalloc="yes" + ;; + --disable-replication) replication="no" + ;; + --enable-replication) replication="yes" + ;; + --disable-bochs) bochs="no" + ;; + --enable-bochs) bochs="yes" + ;; + --disable-cloop) cloop="no" + ;; + --enable-cloop) cloop="yes" + ;; + --disable-dmg) dmg="no" + ;; + --enable-dmg) dmg="yes" + ;; + --disable-qcow1) qcow1="no" + ;; + --enable-qcow1) qcow1="yes" + ;; + --disable-vdi) vdi="no" + ;; + --enable-vdi) vdi="yes" + ;; + --disable-vvfat) vvfat="no" + ;; + --enable-vvfat) vvfat="yes" + ;; + --disable-qed) qed="no" + ;; + --enable-qed) qed="yes" + ;; + --disable-parallels) parallels="no" + ;; + --enable-parallels) parallels="yes" + ;; + --disable-sheepdog) sheepdog="no" + ;; + --enable-sheepdog) sheepdog="yes" + ;; + --disable-vhost-user) vhost_user="no" + ;; + --enable-vhost-user) vhost_user="yes" + ;; + --disable-vhost-vdpa) vhost_vdpa="no" + ;; + --enable-vhost-vdpa) vhost_vdpa="yes" + ;; + --disable-vhost-kernel) vhost_kernel="no" + ;; + --enable-vhost-kernel) vhost_kernel="yes" + ;; + --disable-capstone) capstone="disabled" + ;; + --enable-capstone) capstone="enabled" + ;; + --enable-capstone=git) capstone="internal" + ;; + --enable-capstone=system) capstone="system" + ;; + --with-git=*) git="$optarg" + ;; + --enable-git-update) git_update=yes + ;; + --disable-git-update) git_update=no + ;; + --enable-debug-mutex) debug_mutex=yes + ;; + --disable-debug-mutex) debug_mutex=no + ;; + --enable-libpmem) libpmem=yes + ;; + --disable-libpmem) libpmem=no + ;; + --enable-xkbcommon) xkbcommon="enabled" + ;; + --disable-xkbcommon) xkbcommon="disabled" + ;; + --enable-plugins) plugins="yes" + ;; + --disable-plugins) plugins="no" + ;; + --enable-containers) use_containers="yes" + ;; + --disable-containers) use_containers="no" + ;; + --enable-fuzzing) fuzzing=yes + ;; + --disable-fuzzing) fuzzing=no + ;; + --gdb=*) gdb_bin="$optarg" + ;; + --enable-rng-none) rng_none=yes + ;; + --disable-rng-none) rng_none=no + ;; + --enable-keyring) secret_keyring="yes" + ;; + --disable-keyring) secret_keyring="no" + ;; + --enable-libdaxctl) libdaxctl=yes + ;; + --disable-libdaxctl) libdaxctl=no + ;; + *) + echo "ERROR: unknown option $opt" + echo "Try '$0 --help' for more information" + exit 1 + ;; + esac +done + +libdir="${libdir:-$prefix/lib}" +libexecdir="${libexecdir:-$prefix/libexec}" +includedir="${includedir:-$prefix/include}" + +if test "$mingw32" = "yes" ; then + mandir="$prefix" + datadir="$prefix" + docdir="$prefix" + bindir="$prefix" + sysconfdir="$prefix" + local_statedir="$prefix" +else + mandir="${mandir:-$prefix/share/man}" + datadir="${datadir:-$prefix/share}" + docdir="${docdir:-$prefix/share/doc}" + bindir="${bindir:-$prefix/bin}" + sysconfdir="${sysconfdir:-$prefix/etc}" + local_statedir="${local_statedir:-$prefix/var}" +fi +firmwarepath="${firmwarepath:-$datadir/qemu-firmware}" +localedir="${localedir:-$datadir/locale}" + +case "$cpu" in + ppc) + CPU_CFLAGS="-m32" + QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS" + ;; + ppc64) + CPU_CFLAGS="-m64" + QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" + ;; + sparc) + CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" + QEMU_LDFLAGS="-m32 -mv8plus $QEMU_LDFLAGS" + ;; + sparc64) + CPU_CFLAGS="-m64 -mcpu=ultrasparc" + QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" + ;; + s390) + CPU_CFLAGS="-m31" + QEMU_LDFLAGS="-m31 $QEMU_LDFLAGS" + ;; + s390x) + CPU_CFLAGS="-m64" + QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" + ;; + i386) + CPU_CFLAGS="-m32" + QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS" + ;; + x86_64) + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" + QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" + ;; + x32) + CPU_CFLAGS="-mx32" + QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS" + ;; + # No special flags required for other host CPUs +esac + +eval "cross_cc_${cpu}=\$host_cc" +cross_cc_vars="$cross_cc_vars cross_cc_${cpu}" +QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS" + +# For user-mode emulation the host arch has to be one we explicitly +# support, even if we're using TCI. +if [ "$ARCH" = "unknown" ]; then + bsd_user="no" + linux_user="no" +fi + +default_target_list="" +deprecated_targets_list=ppc64abi32-linux-user,tilegx-linux-user,lm32-softmmu,unicore32-softmmu +deprecated_features="" +mak_wilds="" + +if [ "$softmmu" = "yes" ]; then + mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-softmmu.mak" +fi +if [ "$linux_user" = "yes" ]; then + mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-linux-user.mak" +fi +if [ "$bsd_user" = "yes" ]; then + mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-bsd-user.mak" +fi + +# If the user doesn't explicitly specify a deprecated target we will +# skip it. +if test -z "$target_list"; then + if test -z "$target_list_exclude"; then + target_list_exclude="$deprecated_targets_list" + else + target_list_exclude="$target_list_exclude,$deprecated_targets_list" + fi +fi + +for config in $mak_wilds; do + target="$(basename "$config" .mak)" + if echo "$target_list_exclude" | grep -vq "$target"; then + default_target_list="${default_target_list} $target" + fi +done + +# Enumerate public trace backends for --help output +trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/')) + +if test x"$show_help" = x"yes" ; then +cat << EOF + +Usage: configure [options] +Options: [defaults in brackets after descriptions] + +Standard options: + --help print this message + --prefix=PREFIX install in PREFIX [$prefix] + --interp-prefix=PREFIX where to find shared libraries, etc. + use %M for cpu name [$interp_prefix] + --target-list=LIST set target list (default: build all non-deprecated) +$(echo Available targets: $default_target_list | \ + fold -s -w 53 | sed -e 's/^/ /') +$(echo Deprecated targets: $deprecated_targets_list | \ + fold -s -w 53 | sed -e 's/^/ /') + --target-list-exclude=LIST exclude a set of targets from the default target-list + +Advanced options (experts only): + --cross-prefix=PREFIX use PREFIX for compile tools [$cross_prefix] + --cc=CC use C compiler CC [$cc] + --iasl=IASL use ACPI compiler IASL [$iasl] + --host-cc=CC use C compiler CC [$host_cc] for code run at + build time + --cxx=CXX use C++ compiler CXX [$cxx] + --objcc=OBJCC use Objective-C compiler OBJCC [$objcc] + --extra-cflags=CFLAGS append extra C compiler flags QEMU_CFLAGS + --extra-cxxflags=CXXFLAGS append extra C++ compiler flags QEMU_CXXFLAGS + --extra-ldflags=LDFLAGS append extra linker flags LDFLAGS + --cross-cc-ARCH=CC use compiler when building ARCH guest test cases + --cross-cc-flags-ARCH= use compiler flags when building ARCH guest tests + --make=MAKE use specified make [$make] + --python=PYTHON use specified python [$python] + --sphinx-build=SPHINX use specified sphinx-build [$sphinx_build] + --meson=MESON use specified meson [$meson] + --ninja=NINJA use specified ninja [$ninja] + --smbd=SMBD use specified smbd [$smbd] + --with-git=GIT use specified git [$git] + --static enable static build [$static] + --mandir=PATH install man pages in PATH + --datadir=PATH install firmware in PATH/$qemu_suffix + --localedir=PATH install translation in PATH/$qemu_suffix + --docdir=PATH install documentation in PATH/$qemu_suffix + --bindir=PATH install binaries in PATH + --libdir=PATH install libraries in PATH + --libexecdir=PATH install helper binaries in PATH + --sysconfdir=PATH install config in PATH/$qemu_suffix + --localstatedir=PATH install local state in PATH (set at runtime on win32) + --firmwarepath=PATH search PATH for firmware files + --efi-aarch64=PATH PATH of efi file to use for aarch64 VMs. + --with-suffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir/docdir [$qemu_suffix] + --with-pkgversion=VERS use specified string as sub-version of the package + --enable-debug enable common debug build options + --enable-sanitizers enable default sanitizers + --enable-tsan enable thread sanitizer + --disable-strip disable stripping binaries + --disable-werror disable compilation abort on warning + --disable-stack-protector disable compiler-provided stack protection + --audio-drv-list=LIST set audio drivers list: + Available drivers: $audio_possible_drivers + --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L + --block-drv-rw-whitelist=L + set block driver read-write whitelist + (affects only QEMU, not qemu-img) + --block-drv-ro-whitelist=L + set block driver read-only whitelist + (affects only QEMU, not qemu-img) + --enable-trace-backends=B Set trace backend + Available backends: $trace_backend_list + --with-trace-file=NAME Full PATH,NAME of file to store traces + Default:trace- + --disable-slirp disable SLIRP userspace network connectivity + --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI) + --enable-malloc-trim enable libc malloc_trim() for memory optimization + --oss-lib path to OSS library + --cpu=CPU Build for host CPU [$cpu] + --with-coroutine=BACKEND coroutine backend. Supported options: + ucontext, sigaltstack, windows + --enable-gcov enable test coverage analysis with gcov + --disable-blobs disable installing provided firmware blobs + --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent + --with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb) + --tls-priority default TLS protocol/cipher priority string + --enable-gprof QEMU profiling with gprof + --enable-profiler profiler support + --enable-debug-stack-usage + track the maximum stack usage of stacks created by qemu_alloc_stack + --enable-plugins + enable plugins via shared library loading + --disable-containers don't use containers for cross-building + --gdb=GDB-path gdb to use for gdbstub tests [$gdb_bin] + +Optional features, enabled with --enable-FEATURE and +disabled with --disable-FEATURE, default is enabled if available: + + system all system emulation targets + user supported user emulation targets + linux-user all linux usermode emulation targets + bsd-user all BSD usermode emulation targets + docs build documentation + guest-agent build the QEMU Guest Agent + guest-agent-msi build guest agent Windows MSI installation package + pie Position Independent Executables + modules modules support (non-Windows) + module-upgrades try to load modules from alternate paths for upgrades + debug-tcg TCG debugging (default is disabled) + debug-info debugging information + sparse sparse checker + safe-stack SafeStack Stack Smash Protection. Depends on + clang/llvm >= 3.7 and requires coroutine backend ucontext. + + gnutls GNUTLS cryptography support + nettle nettle cryptography support + gcrypt libgcrypt cryptography support + auth-pam PAM access control + sdl SDL UI + sdl-image SDL Image support for icons + gtk gtk UI + vte vte support for the gtk UI + curses curses UI + iconv font glyph conversion support + vnc VNC UI support + vnc-sasl SASL encryption for VNC server + vnc-jpeg JPEG lossy compression for VNC server + vnc-png PNG compression for VNC server + cocoa Cocoa UI (Mac OS X only) + virtfs VirtFS + virtiofsd build virtiofs daemon (virtiofsd) + libudev Use libudev to enumerate host devices + mpath Multipath persistent reservation passthrough + xen xen backend driver support + xen-pci-passthrough PCI passthrough support for Xen + brlapi BrlAPI (Braile) + curl curl connectivity + membarrier membarrier system call (for Linux 4.14+ or Windows) + fdt fdt device tree + kvm KVM acceleration support + hax HAX acceleration support + hvf Hypervisor.framework acceleration support + whpx Windows Hypervisor Platform acceleration support + rdma Enable RDMA-based migration + pvrdma Enable PVRDMA support + vde support for vde network + netmap support for netmap network + linux-aio Linux AIO support + linux-io-uring Linux io_uring support + cap-ng libcap-ng support + attr attr and xattr support + vhost-net vhost-net kernel acceleration support + vhost-vsock virtio sockets device support + vhost-scsi vhost-scsi kernel target support + vhost-crypto vhost-user-crypto backend support + vhost-kernel vhost kernel backend support + vhost-user vhost-user backend support + vhost-user-blk-server vhost-user-blk server support + vhost-vdpa vhost-vdpa kernel backend support + spice spice + rbd rados block device (rbd) + libiscsi iscsi support + libnfs nfs support + smartcard smartcard support (libcacard) + u2f U2F support (u2f-emu) + libusb libusb (for usb passthrough) + live-block-migration Block migration in the main migration stream + usb-redir usb network redirection support + lzo support of lzo compression library + snappy support of snappy compression library + bzip2 support of bzip2 compression library + (for reading bzip2-compressed dmg images) + lzfse support of lzfse compression library + (for reading lzfse-compressed dmg images) + zstd support for zstd compression library + (for migration compression and qcow2 cluster compression) + seccomp seccomp support + coroutine-pool coroutine freelist (better performance) + glusterfs GlusterFS backend + tpm TPM support + libssh ssh block device support + numa libnuma support + libxml2 for Parallels image format + tcmalloc tcmalloc support + jemalloc jemalloc support + avx2 AVX2 optimization support + avx512f AVX512F optimization support + replication replication support + opengl opengl support + virglrenderer virgl rendering support + xfsctl xfsctl support + qom-cast-debug cast debugging support + tools build qemu-io, qemu-nbd and qemu-img tools + bochs bochs image format support + cloop cloop image format support + dmg dmg image format support + qcow1 qcow v1 image format support + vdi vdi image format support + vvfat vvfat image format support + qed qed image format support + parallels parallels image format support + sheepdog sheepdog block driver support (deprecated) + crypto-afalg Linux AF_ALG crypto backend driver + capstone capstone disassembler support + debug-mutex mutex debugging support + libpmem libpmem support + xkbcommon xkbcommon support + rng-none dummy RNG, avoid using /dev/(u)random and getrandom() + libdaxctl libdaxctl support + +NOTE: The object files are built at the place where configure is launched +EOF +exit 0 +fi + +# Remove old dependency files to make sure that they get properly regenerated +rm -f */config-devices.mak.d + +if test -z "$python" +then + error_exit "Python not found. Use --python=/path/to/python" +fi + +# Note that if the Python conditional here evaluates True we will exit +# with status 1 which is a shell 'false' value. +if ! $python -c 'import sys; sys.exit(sys.version_info < (3,6))'; then + error_exit "Cannot use '$python', Python >= 3.6 is required." \ + "Use --python=/path/to/python to specify a supported Python." +fi + +# Preserve python version since some functionality is dependent on it +python_version=$($python -c 'import sys; print("%d.%d.%d" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]))' 2>/dev/null) + +# Suppress writing compiled files +python="$python -B" + +if test -z "$meson"; then + if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.55.3; then + meson=meson + elif test -e "${source_path}/.git" && test $git_update = 'yes' ; then + meson=git + elif test -e "${source_path}/meson/meson.py" ; then + meson=internal + else + if test "$explicit_python" = yes; then + error_exit "--python requires using QEMU's embedded Meson distribution, but it was not found." + else + error_exit "Meson not found. Use --meson=/path/to/meson" + fi + fi +else + # Meson uses its own Python interpreter to invoke other Python scripts, + # but the user wants to use the one they specified with --python. + # + # We do not want to override the distro Python interpreter (and sometimes + # cannot: for example in Homebrew /usr/bin/meson is a bash script), so + # just require --meson=git|internal together with --python. + if test "$explicit_python" = yes; then + case "$meson" in + git | internal) ;; + *) error_exit "--python requires using QEMU's embedded Meson distribution." ;; + esac + fi +fi + +if test "$meson" = git; then + git_submodules="${git_submodules} meson" +fi + +case "$meson" in + git | internal) + meson="$python ${source_path}/meson/meson.py" + ;; + *) meson=$(command -v "$meson") ;; +esac + +# Probe for ninja + +if test -z "$ninja"; then + for c in ninja ninja-build samu; do + if has $c; then + ninja=$(command -v "$c") + break + fi + done + if test -z "$ninja"; then + error_exit "Cannot find Ninja" + fi +fi + +# Check that the C compiler works. Doing this here before testing +# the host CPU ensures that we had a valid CC to autodetect the +# $cpu var (and we should bail right here if that's not the case). +# It also allows the help message to be printed without a CC. +write_c_skeleton; +if compile_object ; then + : C compiler works ok +else + error_exit "\"$cc\" either does not exist or does not work" +fi +if ! compile_prog ; then + error_exit "\"$cc\" cannot build an executable (is your linker broken?)" +fi + +# Consult white-list to determine whether to enable werror +# by default. Only enable by default for git builds +if test -z "$werror" ; then + if test -e "$source_path/.git" && \ + { test "$linux" = "yes" || test "$mingw32" = "yes"; }; then + werror="yes" + else + werror="no" + fi +fi + +if test "$bogus_os" = "yes"; then + # Now that we know that we're not printing the help and that + # the compiler works (so the results of the check_defines we used + # to identify the OS are reliable), if we didn't recognize the + # host OS we should stop now. + error_exit "Unrecognized host OS (uname -s reports '$(uname -s)')" +fi + +# Check whether the compiler matches our minimum requirements: +cat > $TMPC << EOF +#if defined(__clang_major__) && defined(__clang_minor__) +# ifdef __apple_build_version__ +# if __clang_major__ < 5 || (__clang_major__ == 5 && __clang_minor__ < 1) +# error You need at least XCode Clang v5.1 to compile QEMU +# endif +# else +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 4) +# error You need at least Clang v3.4 to compile QEMU +# endif +# endif +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +# error You need at least GCC v4.8 to compile QEMU +# endif +#else +# error You either need GCC or Clang to compiler QEMU +#endif +int main (void) { return 0; } +EOF +if ! compile_prog "" "" ; then + error_exit "You need at least GCC v4.8 or Clang v3.4 (or XCode Clang v5.1)" +fi + +# Accumulate -Wfoo and -Wno-bar separately. +# We will list all of the enable flags first, and the disable flags second. +# Note that we do not add -Werror, because that would enable it for all +# configure tests. If a configure test failed due to -Werror this would +# just silently disable some features, so it's too error prone. + +warn_flags= +add_to warn_flags -Wold-style-declaration +add_to warn_flags -Wold-style-definition +add_to warn_flags -Wtype-limits +add_to warn_flags -Wformat-security +add_to warn_flags -Wformat-y2k +add_to warn_flags -Winit-self +add_to warn_flags -Wignored-qualifiers +add_to warn_flags -Wempty-body +add_to warn_flags -Wnested-externs +add_to warn_flags -Wendif-labels +add_to warn_flags -Wexpansion-to-defined + +nowarn_flags= +add_to nowarn_flags -Wno-initializer-overrides +add_to nowarn_flags -Wno-missing-include-dirs +add_to nowarn_flags -Wno-shift-negative-value +add_to nowarn_flags -Wno-string-plus-int +add_to nowarn_flags -Wno-typedef-redefinition +add_to nowarn_flags -Wno-tautological-type-limit-compare +add_to nowarn_flags -Wno-psabi + +gcc_flags="$warn_flags $nowarn_flags" + +cc_has_warning_flag() { + write_c_skeleton; + + # Use the positive sense of the flag when testing for -Wno-wombat + # support (gcc will happily accept the -Wno- form of unknown + # warning options). + optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')" + compile_prog "-Werror $optflag" "" +} + +for flag in $gcc_flags; do + if cc_has_warning_flag $flag ; then + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + fi +done + +if test "$stack_protector" != "no"; then + cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ + char arr[64], *p = arr, *c = argv[0]; + while (*c) { + *p++ = *c++; + } + return 0; +} +EOF + gcc_flags="-fstack-protector-strong -fstack-protector-all" + sp_on=0 + for flag in $gcc_flags; do + # We need to check both a compile and a link, since some compiler + # setups fail only on a .c->.o compile and some only at link time + if compile_object "-Werror $flag" && + compile_prog "-Werror $flag" ""; then + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" + sp_on=1 + break + fi + done + if test "$stack_protector" = yes; then + if test $sp_on = 0; then + error_exit "Stack protector not supported" + fi + fi +fi + +# Disable -Wmissing-braces on older compilers that warn even for +# the "universal" C zero initializer {0}. +cat > $TMPC << EOF +struct { + int a[2]; +} x = {0}; +EOF +if compile_object "-Werror" "" ; then + : +else + QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces" +fi + +# Our module code doesn't support Windows +if test "$modules" = "yes" && test "$mingw32" = "yes" ; then + error_exit "Modules are not available for Windows" +fi + +# module_upgrades is only reasonable if modules are enabled +if test "$modules" = "no" && test "$module_upgrades" = "yes" ; then + error_exit "Can't enable module-upgrades as Modules are not enabled" +fi + +# Static linking is not possible with modules or PIE +if test "$static" = "yes" ; then + if test "$modules" = "yes" ; then + error_exit "static and modules are mutually incompatible" + fi +fi + +# Unconditional check for compiler __thread support + cat > $TMPC << EOF +static __thread int tls_var; +int main(void) { return tls_var; } +EOF + +if ! compile_prog "-Werror" "" ; then + error_exit "Your compiler does not support the __thread specifier for " \ + "Thread-Local Storage (TLS). Please upgrade to a version that does." +fi + +cat > $TMPC << EOF + +#ifdef __linux__ +# define THREAD __thread +#else +# define THREAD +#endif +static THREAD int tls_var; +int main(void) { return tls_var; } +EOF + +# Check we support --no-pie first; we will need this for building ROMs. +if compile_prog "-Werror -fno-pie" "-no-pie"; then + CFLAGS_NOPIE="-fno-pie" +fi + +if test "$static" = "yes"; then + if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; then + CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS" + QEMU_LDFLAGS="-static-pie $QEMU_LDFLAGS" + pie="yes" + elif test "$pie" = "yes"; then + error_exit "-static-pie not available due to missing toolchain support" + else + QEMU_LDFLAGS="-static $QEMU_LDFLAGS" + pie="no" + fi +elif test "$pie" = "no"; then + CONFIGURE_CFLAGS="$CFLAGS_NOPIE $CONFIGURE_CFLAGS" +elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then + CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS" + CONFIGURE_LDFLAGS="-pie $CONFIGURE_LDFLAGS" + pie="yes" +elif test "$pie" = "yes"; then + error_exit "PIE not available due to missing toolchain support" +else + echo "Disabling PIE due to missing toolchain support" + pie="no" +fi + +# Detect support for PT_GNU_RELRO + DT_BIND_NOW. +# The combination is known as "full relro", because .got.plt is read-only too. +if compile_prog "" "-Wl,-z,relro -Wl,-z,now" ; then + QEMU_LDFLAGS="-Wl,-z,relro -Wl,-z,now $QEMU_LDFLAGS" +fi + +########################################## +# __sync_fetch_and_and requires at least -march=i486. Many toolchains +# use i686 as default anyway, but for those that don't, an explicit +# specification is necessary + +if test "$cpu" = "i386"; then + cat > $TMPC << EOF +static int sfaa(int *ptr) +{ + return __sync_fetch_and_and(ptr, 0); +} + +int main(void) +{ + int val = 42; + val = __sync_val_compare_and_swap(&val, 0, 1); + sfaa(&val); + return val; +} +EOF + if ! compile_prog "" "" ; then + QEMU_CFLAGS="-march=i486 $QEMU_CFLAGS" + fi +fi + +######################################### +# Solaris specific configure tool chain decisions + +if test "$solaris" = "yes" ; then + if has ar; then + : + else + if test -f /usr/ccs/bin/ar ; then + error_exit "No path includes ar" \ + "Add /usr/ccs/bin to your path and rerun configure" + fi + error_exit "No path includes ar" + fi +fi + +if test -z "${target_list+xxx}" ; then + default_targets=yes + for target in $default_target_list; do + target_list="$target_list $target" + done + target_list="${target_list# }" +else + default_targets=no + target_list=$(echo "$target_list" | sed -e 's/,/ /g') + for target in $target_list; do + # Check that we recognised the target name; this allows a more + # friendly error message than if we let it fall through. + case " $default_target_list " in + *" $target "*) + ;; + *) + error_exit "Unknown target name '$target'" + ;; + esac + done +fi + +for target in $target_list; do + # if a deprecated target is enabled we note it here + if echo "$deprecated_targets_list" | grep -q "$target"; then + add_to deprecated_features $target + fi +done + +# see if system emulation was really requested +case " $target_list " in + *"-softmmu "*) softmmu=yes + ;; + *) softmmu=no + ;; +esac + +feature_not_found() { + feature=$1 + remedy=$2 + + error_exit "User requested feature $feature" \ + "configure was not able to find it." \ + "$remedy" +} + +# --- +# big/little endian test +cat > $TMPC << EOF +short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, }; +short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, }; +extern int foo(short *, short *); +int main(int argc, char *argv[]) { + return foo(big_endian, little_endian); +} +EOF + +if compile_object ; then + if strings -a $TMPO | grep -q BiGeNdIaN ; then + bigendian="yes" + elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then + bigendian="no" + else + echo big/little test failed + fi +else + echo big/little test failed +fi + +########################################## +# system tools +if test -z "$want_tools"; then + if test "$softmmu" = "no"; then + want_tools=no + else + want_tools=yes + fi +fi + +########################################## +# cocoa implies not SDL or GTK +# (the cocoa UI code currently assumes it is always the active UI +# and doesn't interact well with other UI frontend code) +if test "$cocoa" = "enabled"; then + if test "$sdl" = "enabled"; then + error_exit "Cocoa and SDL UIs cannot both be enabled at once" + fi + if test "$gtk" = "yes"; then + error_exit "Cocoa and GTK UIs cannot both be enabled at once" + fi + gtk=no + sdl=disabled +fi + +# Some versions of Mac OS X incorrectly define SIZE_MAX +cat > $TMPC << EOF +#include +#include +int main(int argc, char *argv[]) { + return printf("%zu", SIZE_MAX); +} +EOF +have_broken_size_max=no +if ! compile_object -Werror ; then + have_broken_size_max=yes +fi + +########################################## +# L2TPV3 probe + +cat > $TMPC < +#include +int main(void) { return sizeof(struct mmsghdr); } +EOF +if compile_prog "" "" ; then + l2tpv3=yes +else + l2tpv3=no +fi + +if check_include "pty.h" ; then + pty_h=yes +else + pty_h=no +fi + +cat > $TMPC < +int main(int argc, char *argv[]) { + return mlockall(MCL_FUTURE); +} +EOF +if compile_prog "" "" ; then + have_mlockall=yes +else + have_mlockall=no +fi + +######################################### +# vhost interdependencies and host support + +# vhost backends +if test "$vhost_user" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-user is only available on Linux" +fi +test "$vhost_vdpa" = "" && vhost_vdpa=$linux +if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-vdpa is only available on Linux" +fi +test "$vhost_kernel" = "" && vhost_kernel=$linux +if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-kernel is only available on Linux" +fi + +# vhost-kernel devices +test "$vhost_scsi" = "" && vhost_scsi=$vhost_kernel +if test "$vhost_scsi" = "yes" && test "$vhost_kernel" != "yes"; then + error_exit "--enable-vhost-scsi requires --enable-vhost-kernel" +fi +test "$vhost_vsock" = "" && vhost_vsock=$vhost_kernel +if test "$vhost_vsock" = "yes" && test "$vhost_kernel" != "yes"; then + error_exit "--enable-vhost-vsock requires --enable-vhost-kernel" +fi + +# vhost-user backends +test "$vhost_net_user" = "" && vhost_net_user=$vhost_user +if test "$vhost_net_user" = "yes" && test "$vhost_user" = "no"; then + error_exit "--enable-vhost-net-user requires --enable-vhost-user" +fi +test "$vhost_crypto" = "" && vhost_crypto=$vhost_user +if test "$vhost_crypto" = "yes" && test "$vhost_user" = "no"; then + error_exit "--enable-vhost-crypto requires --enable-vhost-user" +fi +test "$vhost_user_fs" = "" && vhost_user_fs=$vhost_user +if test "$vhost_user_fs" = "yes" && test "$vhost_user" = "no"; then + error_exit "--enable-vhost-user-fs requires --enable-vhost-user" +fi +#vhost-vdpa backends +test "$vhost_net_vdpa" = "" && vhost_net_vdpa=$vhost_vdpa +if test "$vhost_net_vdpa" = "yes" && test "$vhost_vdpa" = "no"; then + error_exit "--enable-vhost-net-vdpa requires --enable-vhost-vdpa" +fi + +# OR the vhost-kernel, vhost-vdpa and vhost-user values for simplicity +if test "$vhost_net" = ""; then + test "$vhost_net_user" = "yes" && vhost_net=yes + test "$vhost_net_vdpa" = "yes" && vhost_net=yes + test "$vhost_kernel" = "yes" && vhost_net=yes +fi + +########################################## +# pkg-config probe + +if ! has "$pkg_config_exe"; then + error_exit "pkg-config binary '$pkg_config_exe' not found" +fi + +########################################## +# NPTL probe + +if test "$linux_user" = "yes"; then + cat > $TMPC < +#include +int main(void) { +#if !defined(CLONE_SETTLS) || !defined(FUTEX_WAIT) +#error bork +#endif + return 0; +} +EOF + if ! compile_object ; then + feature_not_found "nptl" "Install glibc and linux kernel headers." + fi +fi + +########################################## +# lzo check + +if test "$lzo" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { lzo_version(); return 0; } +EOF + if compile_prog "" "-llzo2" ; then + lzo_libs="-llzo2" + lzo="yes" + else + if test "$lzo" = "yes"; then + feature_not_found "liblzo2" "Install liblzo2 devel" + fi + lzo="no" + fi +fi + +########################################## +# snappy check + +if test "$snappy" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { snappy_max_compressed_length(4096); return 0; } +EOF + if compile_prog "" "-lsnappy" ; then + snappy_libs='-lsnappy' + snappy="yes" + else + if test "$snappy" = "yes"; then + feature_not_found "libsnappy" "Install libsnappy devel" + fi + snappy="no" + fi +fi + +########################################## +# bzip2 check + +if test "$bzip2" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { BZ2_bzlibVersion(); return 0; } +EOF + if compile_prog "" "-lbz2" ; then + bzip2="yes" + else + if test "$bzip2" = "yes"; then + feature_not_found "libbzip2" "Install libbzip2 devel" + fi + bzip2="no" + fi +fi + +########################################## +# lzfse check + +if test "$lzfse" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { lzfse_decode_scratch_size(); return 0; } +EOF + if compile_prog "" "-llzfse" ; then + lzfse="yes" + else + if test "$lzfse" = "yes"; then + feature_not_found "lzfse" "Install lzfse devel" + fi + lzfse="no" + fi +fi + +########################################## +# zstd check + +if test "$zstd" != "no" ; then + libzstd_minver="1.4.0" + if $pkg_config --atleast-version=$libzstd_minver libzstd ; then + zstd_cflags="$($pkg_config --cflags libzstd)" + zstd_libs="$($pkg_config --libs libzstd)" + zstd="yes" + else + if test "$zstd" = "yes" ; then + feature_not_found "libzstd" "Install libzstd devel" + fi + zstd="no" + fi +fi + +########################################## +# libseccomp check + +if test "$seccomp" != "no" ; then + libseccomp_minver="2.3.0" + if $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then + seccomp_cflags="$($pkg_config --cflags libseccomp)" + seccomp_libs="$($pkg_config --libs libseccomp)" + seccomp="yes" + else + if test "$seccomp" = "yes" ; then + feature_not_found "libseccomp" \ + "Install libseccomp devel >= $libseccomp_minver" + fi + seccomp="no" + fi +fi + +########################################## +# xen probe + +if test "$xen" != "disabled" ; then + # Check whether Xen library path is specified via --extra-ldflags to avoid + # overriding this setting with pkg-config output. If not, try pkg-config + # to obtain all needed flags. + + if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \ + $pkg_config --exists xencontrol ; then + xen_ctrl_version="$(printf '%d%02d%02d' \ + $($pkg_config --modversion xencontrol | sed 's/\./ /g') )" + xen=enabled + xen_pc="xencontrol xenstore xenforeignmemory xengnttab" + xen_pc="$xen_pc xenevtchn xendevicemodel" + if $pkg_config --exists xentoolcore; then + xen_pc="$xen_pc xentoolcore" + fi + xen_cflags="$($pkg_config --cflags $xen_pc)" + xen_libs="$($pkg_config --libs $xen_pc)" + else + + xen_libs="-lxenstore -lxenctrl" + xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn" + + # First we test whether Xen headers and libraries are available. + # If no, we are done and there is no Xen support. + # If yes, more tests are run to detect the Xen version. + + # Xen (any) + cat > $TMPC < +int main(void) { + return 0; +} +EOF + if ! compile_prog "" "$xen_libs" ; then + # Xen not found + if test "$xen" = "enabled" ; then + feature_not_found "xen" "Install xen devel" + fi + xen=disabled + + # Xen unstable + elif + cat > $TMPC < +#include +int main(void) { + xendevicemodel_handle *xd; + xenforeignmemory_handle *xfmem; + + xd = xendevicemodel_open(0, 0); + xendevicemodel_pin_memory_cacheattr(xd, 0, 0, 0, 0); + + xfmem = xenforeignmemory_open(0, 0); + xenforeignmemory_map_resource(xfmem, 0, 0, 0, 0, 0, NULL, 0, 0); + + return 0; +} +EOF + compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore" + then + xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore" + xen_ctrl_version=41100 + xen=enabled + elif + cat > $TMPC < +#include +int main(void) { + xenforeignmemory_handle *xfmem; + + xfmem = xenforeignmemory_open(0, 0); + xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0); + xentoolcore_restrict_all(0); + + return 0; +} +EOF + compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore" + then + xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore" + xen_ctrl_version=41000 + xen=enabled + elif + cat > $TMPC < +int main(void) { + xendevicemodel_handle *xd; + + xd = xendevicemodel_open(0, 0); + xendevicemodel_close(xd); + + return 0; +} +EOF + compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs" + then + xen_stable_libs="-lxendevicemodel $xen_stable_libs" + xen_ctrl_version=40900 + xen=enabled + elif + cat > $TMPC < +#include +#include +#include +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc = NULL; + xenforeignmemory_handle *xfmem; + xenevtchn_handle *xe; + xengnttab_handle *xg; + xengnttab_grant_copy_segment_t* seg = NULL; + + xs_daemon_open(); + + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + + xfmem = xenforeignmemory_open(0, 0); + xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0); + + xe = xenevtchn_open(0, 0); + xenevtchn_fd(xe); + + xg = xengnttab_open(0, 0); + xengnttab_grant_copy(xg, 0, seg); + + return 0; +} +EOF + compile_prog "" "$xen_libs $xen_stable_libs" + then + xen_ctrl_version=40800 + xen=enabled + elif + cat > $TMPC < +#include +#include +#include +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc = NULL; + xenforeignmemory_handle *xfmem; + xenevtchn_handle *xe; + xengnttab_handle *xg; + + xs_daemon_open(); + + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + + xfmem = xenforeignmemory_open(0, 0); + xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0); + + xe = xenevtchn_open(0, 0); + xenevtchn_fd(xe); + + xg = xengnttab_open(0, 0); + xengnttab_map_grant_ref(xg, 0, 0, 0); + + return 0; +} +EOF + compile_prog "" "$xen_libs $xen_stable_libs" + then + xen_ctrl_version=40701 + xen=enabled + + # Xen 4.6 + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL); + xc_reserved_device_memory_map(xc, 0, 0, 0, 0, NULL, 0); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=40600 + xen=enabled + + # Xen 4.5 + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, 0, NULL); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=40500 + xen=enabled + + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=40200 + xen=enabled + + else + if test "$xen" = "enabled" ; then + feature_not_found "xen (unsupported version)" \ + "Install a supported xen (xen 4.2 or newer)" + fi + xen=disabled + fi + + if test "$xen" = enabled; then + if test $xen_ctrl_version -ge 40701 ; then + xen_libs="$xen_libs $xen_stable_libs " + fi + fi + fi +fi + +if test "$xen_pci_passthrough" != "disabled"; then + if test "$xen" = "enabled" && test "$linux" = "yes"; then + xen_pci_passthrough=enabled + else + if test "$xen_pci_passthrough" = "enabled"; then + error_exit "User requested feature Xen PCI Passthrough" \ + " but this feature requires /sys from Linux" + fi + xen_pci_passthrough=disabled + fi +fi + +########################################## +# gettext probe +if test "$gettext" != "false" ; then + if has xgettext; then + gettext=true + else + if test "$gettext" = "true" ; then + feature_not_found "gettext" "Install xgettext binary" + fi + gettext=false + fi +fi + +########################################## +# X11 probe +if $pkg_config --exists "x11"; then + have_x11=yes + x11_cflags=$($pkg_config --cflags x11) + x11_libs=$($pkg_config --libs x11) +fi + +########################################## +# GTK probe + +if test "$gtk" != "no"; then + gtkpackage="gtk+-3.0" + gtkx11package="gtk+-x11-3.0" + gtkversion="3.22.0" + if $pkg_config --exists "$gtkpackage >= $gtkversion"; then + gtk_cflags=$($pkg_config --cflags $gtkpackage) + gtk_libs=$($pkg_config --libs $gtkpackage) + gtk_version=$($pkg_config --modversion $gtkpackage) + if $pkg_config --exists "$gtkx11package >= $gtkversion"; then + need_x11=yes + gtk_cflags="$gtk_cflags $x11_cflags" + gtk_libs="$gtk_libs $x11_libs" + fi + gtk="yes" + elif test "$gtk" = "yes"; then + feature_not_found "gtk" "Install gtk3-devel" + else + gtk="no" + fi +fi + + +########################################## +# GNUTLS probe + +if test "$gnutls" != "no"; then + pass="no" + if $pkg_config --exists "gnutls >= 3.1.18"; then + gnutls_cflags=$($pkg_config --cflags gnutls) + gnutls_libs=$($pkg_config --libs gnutls) + # Packaging for the static libraries is not always correct. + # At least ubuntu 18.04 ships only shared libraries. + write_c_skeleton + if compile_prog "" "$gnutls_libs" ; then + pass="yes" + fi + fi + if test "$pass" = "no" && test "$gnutls" = "yes"; then + feature_not_found "gnutls" "Install gnutls devel >= 3.1.18" + else + gnutls="$pass" + fi +fi + + +# If user didn't give a --disable/enable-gcrypt flag, +# then mark as disabled if user requested nettle +# explicitly +if test -z "$gcrypt" +then + if test "$nettle" = "yes" + then + gcrypt="no" + fi +fi + +# If user didn't give a --disable/enable-nettle flag, +# then mark as disabled if user requested gcrypt +# explicitly +if test -z "$nettle" +then + if test "$gcrypt" = "yes" + then + nettle="no" + fi +fi + +has_libgcrypt() { + if ! has "libgcrypt-config" + then + return 1 + fi + + if test -n "$cross_prefix" + then + host=$(libgcrypt-config --host) + if test "$host-" != $cross_prefix + then + return 1 + fi + fi + + maj=`libgcrypt-config --version | awk -F . '{print $1}'` + min=`libgcrypt-config --version | awk -F . '{print $2}'` + + if test $maj != 1 || test $min -lt 5 + then + return 1 + fi + + return 0 +} + + +if test "$nettle" != "no"; then + pass="no" + if $pkg_config --exists "nettle >= 2.7.1"; then + nettle_cflags=$($pkg_config --cflags nettle) + nettle_libs=$($pkg_config --libs nettle) + nettle_version=$($pkg_config --modversion nettle) + # Link test to make sure the given libraries work (e.g for static). + write_c_skeleton + if compile_prog "" "$nettle_libs" ; then + if test -z "$gcrypt"; then + gcrypt="no" + fi + pass="yes" + fi + fi + if test "$pass" = "yes" + then + cat > $TMPC << EOF +#include +int main(void) { + return 0; +} +EOF + if compile_prog "$nettle_cflags" "$nettle_libs" ; then + nettle_xts=yes + qemu_private_xts=no + fi + fi + if test "$pass" = "no" && test "$nettle" = "yes"; then + feature_not_found "nettle" "Install nettle devel >= 2.7.1" + else + nettle="$pass" + fi +fi + +if test "$gcrypt" != "no"; then + pass="no" + if has_libgcrypt; then + gcrypt_cflags=$(libgcrypt-config --cflags) + gcrypt_libs=$(libgcrypt-config --libs) + # Debian has removed -lgpg-error from libgcrypt-config + # as it "spreads unnecessary dependencies" which in + # turn breaks static builds... + if test "$static" = "yes" + then + gcrypt_libs="$gcrypt_libs -lgpg-error" + fi + + # Link test to make sure the given libraries work (e.g for static). + write_c_skeleton + if compile_prog "" "$gcrypt_libs" ; then + pass="yes" + fi + fi + if test "$pass" = "yes"; then + gcrypt="yes" + cat > $TMPC << EOF +#include +int main(void) { + gcry_mac_hd_t handle; + gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5, + GCRY_MAC_FLAG_SECURE, NULL); + return 0; +} +EOF + if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then + gcrypt_hmac=yes + fi + cat > $TMPC << EOF +#include +int main(void) { + gcry_cipher_hd_t handle; + gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0); + return 0; +} +EOF + if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then + gcrypt_xts=yes + qemu_private_xts=no + fi + elif test "$gcrypt" = "yes"; then + feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0" + else + gcrypt="no" + fi +fi + + +if test "$gcrypt" = "yes" && test "$nettle" = "yes" +then + error_exit "Only one of gcrypt & nettle can be enabled" +fi + +########################################## +# libtasn1 - only for the TLS creds/session test suite + +tasn1=yes +tasn1_cflags="" +tasn1_libs="" +if $pkg_config --exists "libtasn1"; then + tasn1_cflags=$($pkg_config --cflags libtasn1) + tasn1_libs=$($pkg_config --libs libtasn1) +else + tasn1=no +fi + + +########################################## +# PAM probe + +if test "$auth_pam" != "no"; then + cat > $TMPC < +#include +int main(void) { + const char *service_name = "qemu"; + const char *user = "frank"; + const struct pam_conv pam_conv = { 0 }; + pam_handle_t *pamh = NULL; + pam_start(service_name, user, &pam_conv, &pamh); + return 0; +} +EOF + if compile_prog "" "-lpam" ; then + auth_pam=yes + else + if test "$auth_pam" = "yes"; then + feature_not_found "PAM" "Install PAM development package" + else + auth_pam=no + fi + fi +fi + +########################################## +# getifaddrs (for tests/test-io-channel-socket ) + +have_ifaddrs_h=yes +if ! check_include "ifaddrs.h" ; then + have_ifaddrs_h=no +fi + +######################################### +# libdrm check +have_drm_h=no +if check_include "libdrm/drm.h" ; then + have_drm_h=yes +fi + +######################################### +# sys/signal.h check +have_sys_signal_h=no +if check_include "sys/signal.h" ; then + have_sys_signal_h=yes +fi + +########################################## +# VTE probe + +if test "$vte" != "no"; then + vteminversion="0.32.0" + if $pkg_config --exists "vte-2.91"; then + vtepackage="vte-2.91" + else + vtepackage="vte-2.90" + fi + if $pkg_config --exists "$vtepackage >= $vteminversion"; then + vte_cflags=$($pkg_config --cflags $vtepackage) + vte_libs=$($pkg_config --libs $vtepackage) + vteversion=$($pkg_config --modversion $vtepackage) + vte="yes" + elif test "$vte" = "yes"; then + feature_not_found "vte" "Install libvte-2.90/2.91 devel" + else + vte="no" + fi +fi + +########################################## +# RDMA needs OpenFabrics libraries +if test "$rdma" != "no" ; then + cat > $TMPC < +int main(void) { return 0; } +EOF + rdma_libs="-lrdmacm -libverbs -libumad" + if compile_prog "" "$rdma_libs" ; then + rdma="yes" + else + if test "$rdma" = "yes" ; then + error_exit \ + " OpenFabrics librdmacm/libibverbs/libibumad not present." \ + " Your options:" \ + " (1) Fast: Install infiniband packages (devel) from your distro." \ + " (2) Cleanest: Install libraries from www.openfabrics.org" \ + " (3) Also: Install softiwarp if you don't have RDMA hardware" + fi + rdma="no" + fi +fi + +########################################## +# PVRDMA detection + +cat > $TMPC < + +int +main(void) +{ + char buf = 0; + void *addr = &buf; + addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED); + + return 0; +} +EOF + +if test "$rdma" = "yes" ; then + case "$pvrdma" in + "") + if compile_prog "" ""; then + pvrdma="yes" + else + pvrdma="no" + fi + ;; + "yes") + if ! compile_prog "" ""; then + error_exit "PVRDMA is not supported since mremap is not implemented" + fi + pvrdma="yes" + ;; + "no") + pvrdma="no" + ;; + esac +else + if test "$pvrdma" = "yes" ; then + error_exit "PVRDMA requires rdma suppport" + fi + pvrdma="no" +fi + +# Let's see if enhanced reg_mr is supported +if test "$pvrdma" = "yes" ; then + +cat > $TMPC < + +int +main(void) +{ + struct ibv_mr *mr; + struct ibv_pd *pd = NULL; + size_t length = 10; + uint64_t iova = 0; + int access = 0; + void *addr = NULL; + + mr = ibv_reg_mr_iova(pd, addr, length, iova, access); + + ibv_dereg_mr(mr); + + return 0; +} +EOF + if ! compile_prog "" "-libverbs"; then + QEMU_CFLAGS="$QEMU_CFLAGS -DLEGACY_RDMA_REG_MR" + fi +fi + +########################################## +# xfsctl() probe, used for file-posix.c +if test "$xfs" != "no" ; then + cat > $TMPC << EOF +#include /* NULL */ +#include +int main(void) +{ + xfsctl(NULL, 0, 0, NULL); + return 0; +} +EOF + if compile_prog "" "" ; then + xfs="yes" + else + if test "$xfs" = "yes" ; then + feature_not_found "xfs" "Install xfsprogs/xfslibs devel" + fi + xfs=no + fi +fi + +########################################## +# vde libraries probe +if test "$vde" != "no" ; then + vde_libs="-lvdeplug" + cat > $TMPC << EOF +#include +int main(void) +{ + struct vde_open_args a = {0, 0, 0}; + char s[] = ""; + vde_open(s, s, &a); + return 0; +} +EOF + if compile_prog "" "$vde_libs" ; then + vde=yes + else + if test "$vde" = "yes" ; then + feature_not_found "vde" "Install vde (Virtual Distributed Ethernet) devel" + fi + vde=no + fi +fi + +########################################## +# netmap support probe +# Apart from looking for netmap headers, we make sure that the host API version +# supports the netmap backend (>=11). The upper bound (15) is meant to simulate +# a minor/major version number. Minor new features will be marked with values up +# to 15, and if something happens that requires a change to the backend we will +# move above 15, submit the backend fixes and modify this two bounds. +if test "$netmap" != "no" ; then + cat > $TMPC << EOF +#include +#include +#include +#include +#if (NETMAP_API < 11) || (NETMAP_API > 15) +#error +#endif +int main(void) { return 0; } +EOF + if compile_prog "" "" ; then + netmap=yes + else + if test "$netmap" = "yes" ; then + feature_not_found "netmap" + fi + netmap=no + fi +fi + +########################################## +# libcap-ng library probe +if test "$cap_ng" != "no" ; then + cap_libs="-lcap-ng" + cat > $TMPC << EOF +#include +int main(void) +{ + capng_capability_to_name(CAPNG_EFFECTIVE); + return 0; +} +EOF + if compile_prog "" "$cap_libs" ; then + cap_ng=yes + else + if test "$cap_ng" = "yes" ; then + feature_not_found "cap_ng" "Install libcap-ng devel" + fi + cap_ng=no + fi +fi + +########################################## +# Sound support libraries probe + +audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/,/ /g') +for drv in $audio_drv_list; do + case $drv in + alsa | try-alsa) + if $pkg_config alsa --exists; then + alsa_libs=$($pkg_config alsa --libs) + alsa_cflags=$($pkg_config alsa --cflags) + alsa=yes + if test "$drv" = "try-alsa"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa/alsa/') + fi + else + if test "$drv" = "try-alsa"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa//') + else + error_exit "$drv check failed" \ + "Make sure to have the $drv libs and headers installed." + fi + fi + ;; + + pa | try-pa) + if $pkg_config libpulse --exists; then + libpulse=yes + pulse_libs=$($pkg_config libpulse --libs) + pulse_cflags=$($pkg_config libpulse --cflags) + if test "$drv" = "try-pa"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa/pa/') + fi + else + if test "$drv" = "try-pa"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa//') + else + error_exit "$drv check failed" \ + "Make sure to have the $drv libs and headers installed." + fi + fi + ;; + + sdl) + if test "$sdl" = "no"; then + error_exit "sdl not found or disabled, can not use sdl audio driver" + fi + ;; + + try-sdl) + if test "$sdl" = "no"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl//') + else + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl/sdl/') + fi + ;; + + coreaudio) + coreaudio_libs="-framework CoreAudio" + ;; + + dsound) + dsound_libs="-lole32 -ldxguid" + audio_win_int="yes" + ;; + + oss) + oss_libs="$oss_lib" + ;; + + jack | try-jack) + if $pkg_config jack --exists; then + libjack=yes + jack_libs=$($pkg_config jack --libs) + if test "$drv" = "try-jack"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack/jack/') + fi + else + if test "$drv" = "try-jack"; then + audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack//') + else + error_exit "$drv check failed" \ + "Make sure to have the $drv libs and headers installed." + fi + fi + ;; + + *) + echo "$audio_possible_drivers" | grep -q "\<$drv\>" || { + error_exit "Unknown driver '$drv' selected" \ + "Possible drivers are: $audio_possible_drivers" + } + ;; + esac +done + +########################################## +# BrlAPI probe + +if test "$brlapi" != "no" ; then + brlapi_libs="-lbrlapi" + cat > $TMPC << EOF +#include +#include +int main( void ) { return brlapi__openConnection (NULL, NULL, NULL); } +EOF + if compile_prog "" "$brlapi_libs" ; then + brlapi=yes + else + if test "$brlapi" = "yes" ; then + feature_not_found "brlapi" "Install brlapi devel" + fi + brlapi=no + fi +fi + +########################################## +# curl probe +if test "$curl" != "no" ; then + if $pkg_config libcurl --exists; then + curlconfig="$pkg_config libcurl" + else + curlconfig=curl-config + fi + cat > $TMPC << EOF +#include +int main(void) { curl_easy_init(); curl_multi_setopt(0, 0, 0); return 0; } +EOF + curl_cflags=$($curlconfig --cflags 2>/dev/null) + curl_libs=$($curlconfig --libs 2>/dev/null) + if compile_prog "$curl_cflags" "$curl_libs" ; then + curl=yes + else + if test "$curl" = "yes" ; then + feature_not_found "curl" "Install libcurl devel" + fi + curl=no + fi +fi # test "$curl" + +########################################## +# glib support probe + +glib_req_ver=2.48 +glib_modules=gthread-2.0 +if test "$modules" = yes; then + glib_modules="$glib_modules gmodule-export-2.0" +fi +if test "$plugins" = yes; then + glib_modules="$glib_modules gmodule-2.0" +fi + +for i in $glib_modules; do + if $pkg_config --atleast-version=$glib_req_ver $i; then + glib_cflags=$($pkg_config --cflags $i) + glib_libs=$($pkg_config --libs $i) + else + error_exit "glib-$glib_req_ver $i is required to compile QEMU" + fi +done + +# Check whether glib has gslice, which we have to avoid for correctness. +# TODO: remove this check and the corresponding workaround (qtree) when +# the minimum supported glib is >= $glib_dropped_gslice_version. +glib_dropped_gslice_version=2.75.3 +for i in $glib_modules; do + if ! $pkg_config --atleast-version=$glib_dropped_gslice_version $i; then + glib_has_gslice="yes" + break + fi +done + +# This workaround is required due to a bug in pkg-config file for glib as it +# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static + +if test "$static" = yes && test "$mingw32" = yes; then + glib_cflags="-DGLIB_STATIC_COMPILATION $glib_cflags" +fi + +if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then + gio_cflags=$($pkg_config --cflags gio-2.0) + gio_libs=$($pkg_config --libs gio-2.0) + gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0) + if [ ! -x "$gdbus_codegen" ]; then + gdbus_codegen= + fi + # Check that the libraries actually work -- Ubuntu 18.04 ships + # with pkg-config --static --libs data for gio-2.0 that is missing + # -lblkid and will give a link error. + cat > $TMPC < +int main(void) +{ + g_dbus_proxy_new_sync(0, 0, 0, 0, 0, 0, 0, 0); + return 0; +} +EOF + if compile_prog "$gio_cflags" "$gio_libs" ; then + gio=yes + else + gio=no + fi +else + gio=no +fi + +if $pkg_config --atleast-version=$glib_req_ver gio-unix-2.0; then + gio_cflags="$gio_cflags $($pkg_config --cflags gio-unix-2.0)" + gio_libs="$gio_libs $($pkg_config --libs gio-unix-2.0)" +fi + +# Sanity check that the current size_t matches the +# size that glib thinks it should be. This catches +# problems on multi-arch where people try to build +# 32-bit QEMU while pointing at 64-bit glib headers +cat > $TMPC < +#include + +#define QEMU_BUILD_BUG_ON(x) \ + typedef char qemu_build_bug_on[(x)?-1:1] __attribute__((unused)); + +int main(void) { + QEMU_BUILD_BUG_ON(sizeof(size_t) != GLIB_SIZEOF_SIZE_T); + return 0; +} +EOF + +if ! compile_prog "$glib_cflags" "$glib_libs" ; then + error_exit "sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T."\ + "You probably need to set PKG_CONFIG_LIBDIR"\ + "to point to the right pkg-config files for your"\ + "build target" +fi + +# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage +cat > $TMPC << EOF +#include +int main(void) { return 0; } +EOF +if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then + if cc_has_warning_flag "-Wno-unknown-attributes"; then + glib_cflags="-Wno-unknown-attributes $glib_cflags" + CONFIGURE_CFLAGS="-Wno-unknown-attributes $CONFIGURE_CFLAGS" + fi +fi + +# Silence clang warnings triggered by glib < 2.57.2 +cat > $TMPC << EOF +#include +typedef struct Foo { + int i; +} Foo; +static void foo_free(Foo *f) +{ + g_free(f); +} +G_DEFINE_AUTOPTR_CLEANUP_FUNC(Foo, foo_free); +int main(void) { return 0; } +EOF +if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then + if cc_has_warning_flag "-Wno-unused-function"; then + glib_cflags="$glib_cflags -Wno-unused-function" + CONFIGURE_CFLAGS="$CONFIGURE_CFLAGS -Wno-unused-function" + fi +fi + +########################################## +# SHA command probe for modules +if test "$modules" = yes; then + shacmd_probe="sha1sum sha1 shasum" + for c in $shacmd_probe; do + if has $c; then + shacmd="$c" + break + fi + done + if test "$shacmd" = ""; then + error_exit "one of the checksum commands is required to enable modules: $shacmd_probe" + fi +fi + +########################################## +# pthread probe +PTHREADLIBS_LIST="-pthread -lpthread -lpthreadGC2" + +pthread=no +cat > $TMPC << EOF +#include +static void *f(void *p) { return NULL; } +int main(void) { + pthread_t thread; + pthread_create(&thread, 0, f, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + pthread=yes +else + for pthread_lib in $PTHREADLIBS_LIST; do + if compile_prog "" "$pthread_lib" ; then + pthread=yes + found=no + for lib_entry in $LIBS; do + if test "$lib_entry" = "$pthread_lib"; then + found=yes + break + fi + done + if test "$found" = "no"; then + LIBS="$pthread_lib $LIBS" + fi + break + fi + done +fi + +if test "$mingw32" != yes && test "$pthread" = no; then + error_exit "pthread check failed" \ + "Make sure to have the pthread libs and headers installed." +fi + +# check for pthread_setname_np with thread id +pthread_setname_np_w_tid=no +cat > $TMPC << EOF +#include + +static void *f(void *p) { return NULL; } +int main(void) +{ + pthread_t thread; + pthread_create(&thread, 0, f, 0); + pthread_setname_np(thread, "QEMU"); + return 0; +} +EOF +if compile_prog "" "$pthread_lib" ; then + pthread_setname_np_w_tid=yes +fi + +# check for pthread_setname_np without thread id +pthread_setname_np_wo_tid=no +cat > $TMPC << EOF +#include + +static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; } +int main(void) +{ + pthread_t thread; + pthread_create(&thread, 0, f, 0); + return 0; +} +EOF +if compile_prog "" "$pthread_lib" ; then + pthread_setname_np_wo_tid=yes +fi + +########################################## +# rbd probe +if test "$rbd" != "no" ; then + cat > $TMPC < +#include +int main(void) { + rados_t cluster; + rados_create(&cluster, NULL); + return 0; +} +EOF + rbd_libs="-lrbd -lrados" + if compile_prog "" "$rbd_libs" ; then + rbd=yes + else + if test "$rbd" = "yes" ; then + feature_not_found "rados block device" "Install librbd/ceph devel" + fi + rbd=no + fi +fi + +########################################## +# libssh probe +if test "$libssh" != "no" ; then + if $pkg_config --exists libssh; then + libssh_cflags=$($pkg_config libssh --cflags) + libssh_libs=$($pkg_config libssh --libs) + libssh=yes + else + if test "$libssh" = "yes" ; then + error_exit "libssh required for --enable-libssh" + fi + libssh=no + fi +fi + +########################################## +# Check for libssh 0.8 +# This is done like this instead of using the LIBSSH_VERSION_* and +# SSH_VERSION_* macros because some distributions in the past shipped +# snapshots of the future 0.8 from Git, and those snapshots did not +# have updated version numbers (still referring to 0.7.0). + +if test "$libssh" = "yes"; then + cat > $TMPC < +int main(void) { return ssh_get_server_publickey(NULL, NULL); } +EOF + if compile_prog "$libssh_cflags" "$libssh_libs"; then + libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags" + fi +fi + +########################################## +# linux-aio probe + +if test "$linux_aio" != "no" ; then + cat > $TMPC < +#include +#include +int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); return 0; } +EOF + if compile_prog "" "-laio" ; then + linux_aio=yes + else + if test "$linux_aio" = "yes" ; then + feature_not_found "linux AIO" "Install libaio devel" + fi + linux_aio=no + fi +fi +########################################## +# linux-io-uring probe + +if test "$linux_io_uring" != "no" ; then + if $pkg_config liburing; then + linux_io_uring_cflags=$($pkg_config --cflags liburing) + linux_io_uring_libs=$($pkg_config --libs liburing) + linux_io_uring=yes + else + if test "$linux_io_uring" = "yes" ; then + feature_not_found "linux io_uring" "Install liburing devel" + fi + linux_io_uring=no + fi +fi + +########################################## +# TPM emulation is only on POSIX + +if test "$tpm" = ""; then + if test "$mingw32" = "yes"; then + tpm=no + else + tpm=yes + fi +elif test "$tpm" = "yes"; then + if test "$mingw32" = "yes" ; then + error_exit "TPM emulation only available on POSIX systems" + fi +fi + +########################################## +# attr probe + +libattr_libs= +if test "$attr" != "no" ; then + cat > $TMPC < +#include +#ifdef CONFIG_LIBATTR +#include +#else +#include +#endif +int main(void) { getxattr(NULL, NULL, NULL, 0); setxattr(NULL, NULL, NULL, 0, 0); return 0; } +EOF + if compile_prog "" "" ; then + attr=yes + # Older distros have , and need -lattr: + elif compile_prog "-DCONFIG_LIBATTR" "-lattr" ; then + attr=yes + libattr_libs="-lattr" + libattr=yes + else + if test "$attr" = "yes" ; then + feature_not_found "ATTR" "Install libc6 or libattr devel" + fi + attr=no + fi +fi + +########################################## +# iovec probe +cat > $TMPC < +#include +#include +int main(void) { return sizeof(struct iovec); } +EOF +iovec=no +if compile_prog "" "" ; then + iovec=yes +fi + +########################################## +# preadv probe +cat > $TMPC < +#include +#include +int main(void) { return preadv(0, 0, 0, 0); } +EOF +preadv=no +if compile_prog "" "" ; then + preadv=yes +fi + +########################################## +# fdt probe + +case "$fdt" in + auto | enabled | internal) + # Simpler to always update submodule, even if not needed. + if test -e "${source_path}/.git" && test $git_update = 'yes' ; then + git_submodules="${git_submodules} dtc" + fi + ;; +esac + +########################################## +# opengl probe (for sdl2, gtk, milkymist-tmu2) + +gbm="no" +if $pkg_config gbm; then + gbm_cflags="$($pkg_config --cflags gbm)" + gbm_libs="$($pkg_config --libs gbm)" + gbm="yes" +fi + +if test "$opengl" != "no" ; then + opengl_pkgs="epoxy gbm" + if $pkg_config $opengl_pkgs; then + opengl_cflags="$($pkg_config --cflags $opengl_pkgs)" + opengl_libs="$($pkg_config --libs $opengl_pkgs)" + opengl=yes + if test "$gtk" = "yes" && $pkg_config --exists "$gtkpackage >= 3.16"; then + gtk_gl="yes" + fi + else + if test "$opengl" = "yes" ; then + feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs" + fi + opengl_cflags="" + opengl_libs="" + opengl=no + fi +fi + +if test "$opengl" = "yes"; then + cat > $TMPC << EOF +#include +#ifndef EGL_MESA_image_dma_buf_export +# error mesa/epoxy lacks support for dmabufs (mesa 10.6+) +#endif +int main(void) { return 0; } +EOF + if compile_prog "" "" ; then + opengl_dmabuf=yes + fi +fi + +if test "$opengl" = "yes" && test "$have_x11" = "yes"; then + for target in $target_list; do + case $target in + lm32-softmmu) # milkymist-tmu2 requires X11 and OpenGL + need_x11=yes + ;; + esac + done +fi + +########################################## +# libxml2 probe +if test "$libxml2" != "no" ; then + if $pkg_config --exists libxml-2.0; then + libxml2="yes" + libxml2_cflags=$($pkg_config --cflags libxml-2.0) + libxml2_libs=$($pkg_config --libs libxml-2.0) + else + if test "$libxml2" = "yes"; then + feature_not_found "libxml2" "Install libxml2 devel" + fi + libxml2="no" + fi +fi + +########################################## +# glusterfs probe +if test "$glusterfs" != "no" ; then + if $pkg_config --atleast-version=3 glusterfs-api; then + glusterfs="yes" + glusterfs_cflags=$($pkg_config --cflags glusterfs-api) + glusterfs_libs=$($pkg_config --libs glusterfs-api) + if $pkg_config --atleast-version=4 glusterfs-api; then + glusterfs_xlator_opt="yes" + fi + if $pkg_config --atleast-version=5 glusterfs-api; then + glusterfs_discard="yes" + fi + if $pkg_config --atleast-version=6 glusterfs-api; then + glusterfs_fallocate="yes" + glusterfs_zerofill="yes" + fi + cat > $TMPC << EOF +#include + +int +main(void) +{ + /* new glfs_ftruncate() passes two additional args */ + return glfs_ftruncate(NULL, 0, NULL, NULL); +} +EOF + if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then + glusterfs_ftruncate_has_stat="yes" + fi + cat > $TMPC << EOF +#include + +/* new glfs_io_cbk() passes two additional glfs_stat structs */ +static void +glusterfs_iocb(glfs_fd_t *fd, ssize_t ret, struct glfs_stat *prestat, struct glfs_stat *poststat, void *data) +{} + +int +main(void) +{ + glfs_io_cbk iocb = &glusterfs_iocb; + iocb(NULL, 0 , NULL, NULL, NULL); + return 0; +} +EOF + if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then + glusterfs_iocb_has_stat="yes" + fi + else + if test "$glusterfs" = "yes" ; then + feature_not_found "GlusterFS backend support" \ + "Install glusterfs-api devel >= 3" + fi + glusterfs="no" + fi +fi + +# Check for inotify functions when we are building linux-user +# emulator. This is done because older glibc versions don't +# have syscall stubs for these implemented. In that case we +# don't provide them even if kernel supports them. +# +inotify=no +cat > $TMPC << EOF +#include + +int +main(void) +{ + /* try to start inotify */ + return inotify_init(); +} +EOF +if compile_prog "" "" ; then + inotify=yes +fi + +inotify1=no +cat > $TMPC << EOF +#include + +int +main(void) +{ + /* try to start inotify */ + return inotify_init1(0); +} +EOF +if compile_prog "" "" ; then + inotify1=yes +fi + +# check if pipe2 is there +pipe2=no +cat > $TMPC << EOF +#include +#include + +int main(void) +{ + int pipefd[2]; + return pipe2(pipefd, O_CLOEXEC); +} +EOF +if compile_prog "" "" ; then + pipe2=yes +fi + +# check if accept4 is there +accept4=no +cat > $TMPC << EOF +#include +#include + +int main(void) +{ + accept4(0, NULL, NULL, SOCK_CLOEXEC); + return 0; +} +EOF +if compile_prog "" "" ; then + accept4=yes +fi + +# check if tee/splice is there. vmsplice was added same time. +splice=no +cat > $TMPC << EOF +#include +#include +#include + +int main(void) +{ + int len, fd = 0; + len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK); + splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE); + return 0; +} +EOF +if compile_prog "" "" ; then + splice=yes +fi + +########################################## +# libnuma probe + +if test "$numa" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { return numa_available(); } +EOF + + if compile_prog "" "-lnuma" ; then + numa=yes + numa_libs="-lnuma" + else + if test "$numa" = "yes" ; then + feature_not_found "numa" "install numactl devel" + fi + numa=no + fi +fi + +malloc=system +if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then + echo "ERROR: tcmalloc && jemalloc can't be used at the same time" + exit 1 +elif test "$tcmalloc" = "yes" ; then + malloc=tcmalloc +elif test "$jemalloc" = "yes" ; then + malloc=jemalloc +fi + +########################################## +# signalfd probe +signalfd="no" +cat > $TMPC << EOF +#include +#include +#include +int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); } +EOF + +if compile_prog "" "" ; then + signalfd=yes +fi + +# check if optreset global is declared by +optreset="no" +cat > $TMPC << EOF +#include +int main(void) { return optreset; } +EOF + +if compile_prog "" "" ; then + optreset=yes +fi + +# check if eventfd is supported +eventfd=no +cat > $TMPC << EOF +#include + +int main(void) +{ + return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); +} +EOF +if compile_prog "" "" ; then + eventfd=yes +fi + +# check if memfd is supported +memfd=no +cat > $TMPC << EOF +#include + +int main(void) +{ + return memfd_create("foo", MFD_ALLOW_SEALING); +} +EOF +if compile_prog "" "" ; then + memfd=yes +fi + +# check for usbfs +have_usbfs=no +if test "$linux_user" = "yes"; then + cat > $TMPC << EOF +#include + +#ifndef USBDEVFS_GET_CAPABILITIES +#error "USBDEVFS_GET_CAPABILITIES undefined" +#endif + +#ifndef USBDEVFS_DISCONNECT_CLAIM +#error "USBDEVFS_DISCONNECT_CLAIM undefined" +#endif + +int main(void) +{ + return 0; +} +EOF + if compile_prog "" ""; then + have_usbfs=yes + fi +fi + +# check for fallocate +fallocate=no +cat > $TMPC << EOF +#include + +int main(void) +{ + fallocate(0, 0, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fallocate=yes +fi + +# check for fallocate hole punching +fallocate_punch_hole=no +cat > $TMPC << EOF +#include +#include + +int main(void) +{ + fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fallocate_punch_hole=yes +fi + +# check that fallocate supports range zeroing inside the file +fallocate_zero_range=no +cat > $TMPC << EOF +#include +#include + +int main(void) +{ + fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fallocate_zero_range=yes +fi + +# check for posix_fallocate +posix_fallocate=no +cat > $TMPC << EOF +#include + +int main(void) +{ + posix_fallocate(0, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + posix_fallocate=yes +fi + +# check for sync_file_range +sync_file_range=no +cat > $TMPC << EOF +#include + +int main(void) +{ + sync_file_range(0, 0, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + sync_file_range=yes +fi + +# check for linux/fiemap.h and FS_IOC_FIEMAP +fiemap=no +cat > $TMPC << EOF +#include +#include +#include + +int main(void) +{ + ioctl(0, FS_IOC_FIEMAP, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fiemap=yes +fi + +# check for dup3 +dup3=no +cat > $TMPC << EOF +#include + +int main(void) +{ + dup3(0, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + dup3=yes +fi + +# check for ppoll support +ppoll=no +cat > $TMPC << EOF +#include + +int main(void) +{ + struct pollfd pfd = { .fd = 0, .events = 0, .revents = 0 }; + ppoll(&pfd, 1, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + ppoll=yes +fi + +# check for prctl(PR_SET_TIMERSLACK , ... ) support +prctl_pr_set_timerslack=no +cat > $TMPC << EOF +#include + +int main(void) +{ + prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + prctl_pr_set_timerslack=yes +fi + +# check for epoll support +epoll=no +cat > $TMPC << EOF +#include + +int main(void) +{ + epoll_create(0); + return 0; +} +EOF +if compile_prog "" "" ; then + epoll=yes +fi + +# epoll_create1 is a later addition +# so we must check separately for its presence +epoll_create1=no +cat > $TMPC << EOF +#include + +int main(void) +{ + /* Note that we use epoll_create1 as a value, not as + * a function being called. This is necessary so that on + * old SPARC glibc versions where the function was present in + * the library but not declared in the header file we will + * fail the configure check. (Otherwise we will get a compiler + * warning but not an error, and will proceed to fail the + * qemu compile where we compile with -Werror.) + */ + return (int)(uintptr_t)&epoll_create1; +} +EOF +if compile_prog "" "" ; then + epoll_create1=yes +fi + +# check for sendfile support +sendfile=no +cat > $TMPC << EOF +#include + +int main(void) +{ + return sendfile(0, 0, 0, 0); +} +EOF +if compile_prog "" "" ; then + sendfile=yes +fi + +# check for timerfd support (glibc 2.8 and newer) +timerfd=no +cat > $TMPC << EOF +#include + +int main(void) +{ + return(timerfd_create(CLOCK_REALTIME, 0)); +} +EOF +if compile_prog "" "" ; then + timerfd=yes +fi + +# check for setns and unshare support +setns=no +cat > $TMPC << EOF +#include + +int main(void) +{ + int ret; + ret = setns(0, 0); + ret = unshare(0); + return ret; +} +EOF +if compile_prog "" "" ; then + setns=yes +fi + +# clock_adjtime probe +clock_adjtime=no +cat > $TMPC < + +int main(void) +{ + return clock_adjtime(0, 0); +} +EOF +clock_adjtime=no +if compile_prog "" "" ; then + clock_adjtime=yes +fi + +# syncfs probe +syncfs=no +cat > $TMPC < + +int main(void) +{ + return syncfs(0); +} +EOF +syncfs=no +if compile_prog "" "" ; then + syncfs=yes +fi + +# check for kcov support (kernel must be 4.4+, compiled with certain options) +kcov=no +if check_include sys/kcov.h ; then + kcov=yes +fi + +# check for btrfs filesystem support (kernel must be 3.9+) +btrfs=no +if check_include linux/btrfs.h ; then + btrfs=yes +fi + +# Search for bswap_32 function +byteswap_h=no +cat > $TMPC << EOF +#include +int main(void) { return bswap_32(0); } +EOF +if compile_prog "" "" ; then + byteswap_h=yes +fi + +# Search for bswap32 function +bswap_h=no +cat > $TMPC << EOF +#include +#include +#include +int main(void) { return bswap32(0); } +EOF +if compile_prog "" "" ; then + bswap_h=yes +fi + +########################################## +# Do we have libiscsi >= 1.9.0 +if test "$libiscsi" != "no" ; then + if $pkg_config --atleast-version=1.9.0 libiscsi; then + libiscsi="yes" + libiscsi_cflags=$($pkg_config --cflags libiscsi) + libiscsi_libs=$($pkg_config --libs libiscsi) + else + if test "$libiscsi" = "yes" ; then + feature_not_found "libiscsi" "Install libiscsi >= 1.9.0" + fi + libiscsi="no" + fi +fi + +########################################## +# Do we need librt +# uClibc provides 2 versions of clock_gettime(), one with realtime +# support and one without. This means that the clock_gettime() don't +# need -lrt. We still need it for timer_create() so we check for this +# function in addition. +cat > $TMPC < +#include +int main(void) { + timer_create(CLOCK_REALTIME, NULL, NULL); + return clock_gettime(CLOCK_REALTIME, NULL); +} +EOF + +if compile_prog "" "" ; then + : +# we need pthread for static linking. use previous pthread test result +elif compile_prog "" "$pthread_lib -lrt" ; then + LIBS="$LIBS -lrt" +fi + +# Check whether we have openpty() in either libc or libutil +cat > $TMPC << EOF +extern int openpty(int *am, int *as, char *name, void *termp, void *winp); +int main(void) { return openpty(0, 0, 0, 0, 0); } +EOF + +have_openpty="no" +if compile_prog "" "" ; then + have_openpty="yes" +else + if compile_prog "" "-lutil" ; then + have_openpty="yes" + fi +fi + +########################################## +# spice probe +if test "$spice" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { spice_server_new(); return 0; } +EOF + spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null) + spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null) + if $pkg_config --atleast-version=0.12.5 spice-server && \ + $pkg_config --atleast-version=0.12.3 spice-protocol && \ + compile_prog "$spice_cflags" "$spice_libs" ; then + spice="yes" + else + if test "$spice" = "yes" ; then + feature_not_found "spice" \ + "Install spice-server(>=0.12.5) and spice-protocol(>=0.12.3) devel" + fi + spice="no" + fi +fi + +# check for smartcard support +if test "$smartcard" != "no"; then + if $pkg_config --atleast-version=2.5.1 libcacard; then + libcacard_cflags=$($pkg_config --cflags libcacard) + libcacard_libs=$($pkg_config --libs libcacard) + smartcard="yes" + else + if test "$smartcard" = "yes"; then + feature_not_found "smartcard" "Install libcacard devel" + fi + smartcard="no" + fi +fi + +# check for libusb +if test "$libusb" != "no" ; then + if $pkg_config --atleast-version=1.0.13 libusb-1.0; then + libusb="yes" + libusb_cflags=$($pkg_config --cflags libusb-1.0) + libusb_libs=$($pkg_config --libs libusb-1.0) + else + if test "$libusb" = "yes"; then + feature_not_found "libusb" "Install libusb devel >= 1.0.13" + fi + libusb="no" + fi +fi + +# check for usbredirparser for usb network redirection support +if test "$usb_redir" != "no" ; then + if $pkg_config --atleast-version=0.6 libusbredirparser-0.5; then + usb_redir="yes" + usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5) + usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5) + else + if test "$usb_redir" = "yes"; then + feature_not_found "usb-redir" "Install usbredir devel" + fi + usb_redir="no" + fi +fi + +########################################## +# check if we have VSS SDK headers for win + +if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \ + test "$vss_win32_sdk" != "no" ; then + case "$vss_win32_sdk" in + "") vss_win32_include="-isystem $source_path" ;; + *\ *) # The SDK is installed in "Program Files" by default, but we cannot + # handle path with spaces. So we symlink the headers into ".sdk/vss". + vss_win32_include="-isystem $source_path/.sdk/vss" + symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc" + ;; + *) vss_win32_include="-isystem $vss_win32_sdk" + esac + cat > $TMPC << EOF +#define __MIDL_user_allocate_free_DEFINED__ +#include +int main(void) { return VSS_CTX_BACKUP; } +EOF + if compile_prog "$vss_win32_include" "" ; then + guest_agent_with_vss="yes" + QEMU_CFLAGS="$QEMU_CFLAGS $vss_win32_include" + libs_qga="-lole32 -loleaut32 -lshlwapi -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga" + qga_vss_provider="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb" + else + if test "$vss_win32_sdk" != "" ; then + echo "ERROR: Please download and install Microsoft VSS SDK:" + echo "ERROR: http://www.microsoft.com/en-us/download/details.aspx?id=23490" + echo "ERROR: On POSIX-systems, you can extract the SDK headers by:" + echo "ERROR: scripts/extract-vsssdk-headers setup.exe" + echo "ERROR: The headers are extracted in the directory \`inc'." + feature_not_found "VSS support" + fi + guest_agent_with_vss="no" + fi +fi + +########################################## +# lookup Windows platform SDK (if not specified) +# The SDK is needed only to build .tlb (type library) file of guest agent +# VSS provider from the source. It is usually unnecessary because the +# pre-compiled .tlb file is included. + +if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \ + test "$guest_agent_with_vss" = "yes" ; then + if test -z "$win_sdk"; then + programfiles="$PROGRAMFILES" + test -n "$PROGRAMW6432" && programfiles="$PROGRAMW6432" + if test -n "$programfiles"; then + win_sdk=$(ls -d "$programfiles/Microsoft SDKs/Windows/v"* | tail -1) 2>/dev/null + else + feature_not_found "Windows SDK" + fi + elif test "$win_sdk" = "no"; then + win_sdk="" + fi +fi + +########################################## +# check if mingw environment provides a recent ntddscsi.h +if test "$mingw32" = "yes" && test "$guest_agent" != "no"; then + cat > $TMPC << EOF +#include +#include +int main(void) { +#if !defined(IOCTL_SCSI_GET_ADDRESS) +#error Missing required ioctl definitions +#endif + SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 }; + return addr.Lun; +} +EOF + if compile_prog "" "" ; then + guest_agent_ntddscsi=yes + libs_qga="-lsetupapi -lcfgmgr32 $libs_qga" + fi +fi + +########################################## +# virgl renderer probe + +if test "$virglrenderer" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { virgl_renderer_poll(); return 0; } +EOF + virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null) + virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null) + virgl_version=$($pkg_config --modversion virglrenderer 2>/dev/null) + if $pkg_config virglrenderer >/dev/null 2>&1 && \ + compile_prog "$virgl_cflags" "$virgl_libs" ; then + virglrenderer="yes" + else + if test "$virglrenderer" = "yes" ; then + feature_not_found "virglrenderer" + fi + virglrenderer="no" + fi +fi + +########################################## +# capstone + +case "$capstone" in + auto | enabled | internal) + # Simpler to always update submodule, even if not needed. + if test -e "${source_path}/.git" && test $git_update = 'yes' ; then + git_submodules="${git_submodules} capstone" + fi + ;; +esac + +########################################## +# check if we have fdatasync + +fdatasync=no +cat > $TMPC << EOF +#include +int main(void) { +#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0 +return fdatasync(0); +#else +#error Not supported +#endif +} +EOF +if compile_prog "" "" ; then + fdatasync=yes +fi + +########################################## +# check if we have madvise + +madvise=no +cat > $TMPC << EOF +#include +#include +#include +int main(void) { return madvise(NULL, 0, MADV_DONTNEED); } +EOF +if compile_prog "" "" ; then + madvise=yes +fi + +########################################## +# check if we have posix_madvise + +posix_madvise=no +cat > $TMPC << EOF +#include +#include +int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); } +EOF +if compile_prog "" "" ; then + posix_madvise=yes +fi + +########################################## +# check if we have posix_memalign() + +posix_memalign=no +cat > $TMPC << EOF +#include +int main(void) { + void *p; + return posix_memalign(&p, 8, 8); +} +EOF +if compile_prog "" "" ; then + posix_memalign=yes +fi + +########################################## +# check if we have posix_syslog + +posix_syslog=no +cat > $TMPC << EOF +#include +int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; } +EOF +if compile_prog "" "" ; then + posix_syslog=yes +fi + +########################################## +# check if we have sem_timedwait + +sem_timedwait=no +cat > $TMPC << EOF +#include +int main(void) { sem_t s; struct timespec t = {0}; return sem_timedwait(&s, &t); } +EOF +if compile_prog "" "" ; then + sem_timedwait=yes +fi + +########################################## +# check if we have strchrnul + +strchrnul=no +cat > $TMPC << EOF +#include +int main(void); +// Use a haystack that the compiler shouldn't be able to constant fold +char *haystack = (char*)&main; +int main(void) { return strchrnul(haystack, 'x') != &haystack[6]; } +EOF +if compile_prog "" "" ; then + strchrnul=yes +fi + +######################################### +# check if we have st_atim + +st_atim=no +cat > $TMPC << EOF +#include +#include +int main(void) { return offsetof(struct stat, st_atim); } +EOF +if compile_prog "" "" ; then + st_atim=yes +fi + +########################################## +# check if trace backend exists + +$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends > /dev/null 2> /dev/null +if test "$?" -ne 0 ; then + error_exit "invalid trace backends" \ + "Please choose supported trace backends." +fi + +########################################## +# For 'ust' backend, test if ust headers are present +if have_backend "ust"; then + cat > $TMPC << EOF +#include +int main(void) { return 0; } +EOF + if compile_prog "" "-Wl,--no-as-needed -ldl" ; then + if $pkg_config lttng-ust --exists; then + lttng_ust_libs=$($pkg_config --libs lttng-ust) + else + lttng_ust_libs="-llttng-ust -ldl" + fi + if $pkg_config liburcu-bp --exists; then + urcu_bp_libs=$($pkg_config --libs liburcu-bp) + else + urcu_bp_libs="-lurcu-bp" + fi + else + error_exit "Trace backend 'ust' missing lttng-ust header files" + fi +fi + +########################################## +# For 'dtrace' backend, test if 'dtrace' command is present +if have_backend "dtrace"; then + if ! has 'dtrace' ; then + error_exit "dtrace command is not found in PATH $PATH" + fi + trace_backend_stap="no" + if has 'stap' ; then + trace_backend_stap="yes" + + # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol + # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility + # instead. QEMU --enable-modules depends on this because the SystemTap + # semaphores are linked into the main binary and not the module's shared + # object. + QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2" + fi +fi + +########################################## +# check and set a backend for coroutine + +# We prefer ucontext, but it's not always possible. The fallback +# is sigcontext. On Windows the only valid backend is the Windows +# specific one. + +ucontext_works=no +if test "$darwin" != "yes"; then + cat > $TMPC << EOF +#include +#ifdef __stub_makecontext +#error Ignoring glibc stub makecontext which will always fail +#endif +int main(void) { makecontext(0, 0, 0); return 0; } +EOF + if compile_prog "" "" ; then + ucontext_works=yes + fi +fi + +if test "$coroutine" = ""; then + if test "$mingw32" = "yes"; then + coroutine=win32 + elif test "$ucontext_works" = "yes"; then + coroutine=ucontext + else + coroutine=sigaltstack + fi +else + case $coroutine in + windows) + if test "$mingw32" != "yes"; then + error_exit "'windows' coroutine backend only valid for Windows" + fi + # Unfortunately the user visible backend name doesn't match the + # coroutine-*.c filename for this case, so we have to adjust it here. + coroutine=win32 + ;; + ucontext) + if test "$ucontext_works" != "yes"; then + feature_not_found "ucontext" + fi + ;; + sigaltstack) + if test "$mingw32" = "yes"; then + error_exit "only the 'windows' coroutine backend is valid for Windows" + fi + ;; + *) + error_exit "unknown coroutine backend $coroutine" + ;; + esac +fi + +if test "$coroutine_pool" = ""; then + coroutine_pool=yes +fi + +if test "$debug_stack_usage" = "yes"; then + if test "$coroutine_pool" = "yes"; then + echo "WARN: disabling coroutine pool for stack usage debugging" + coroutine_pool=no + fi +fi + +################################################## +# SafeStack + + +if test "$safe_stack" = "yes"; then +cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ +#if ! __has_feature(safe_stack) +#error SafeStack Disabled +#endif + return 0; +} +EOF + flag="-fsanitize=safe-stack" + # Check that safe-stack is supported and enabled. + if compile_prog "-Werror $flag" "$flag"; then + # Flag needed both at compilation and at linking + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" + else + error_exit "SafeStack not supported by your compiler" + fi + if test "$coroutine" != "ucontext"; then + error_exit "SafeStack is only supported by the coroutine backend ucontext" + fi +else +cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ +#if defined(__has_feature) +#if __has_feature(safe_stack) +#error SafeStack Enabled +#endif +#endif + return 0; +} +EOF +if test "$safe_stack" = "no"; then + # Make sure that safe-stack is disabled + if ! compile_prog "-Werror" ""; then + # SafeStack was already enabled, try to explicitly remove the feature + flag="-fno-sanitize=safe-stack" + if ! compile_prog "-Werror $flag" "$flag"; then + error_exit "Configure cannot disable SafeStack" + fi + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" + fi +else # "$safe_stack" = "" + # Set safe_stack to yes or no based on pre-existing flags + if compile_prog "-Werror" ""; then + safe_stack="no" + else + safe_stack="yes" + if test "$coroutine" != "ucontext"; then + error_exit "SafeStack is only supported by the coroutine backend ucontext" + fi + fi +fi +fi + +########################################## +# check if we have open_by_handle_at + +open_by_handle_at=no +cat > $TMPC << EOF +#include +#if !defined(AT_EMPTY_PATH) +# error missing definition +#else +int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); } +#endif +EOF +if compile_prog "" "" ; then + open_by_handle_at=yes +fi + +######################################## +# check if we have linux/magic.h + +linux_magic_h=no +cat > $TMPC << EOF +#include +int main(void) { + return 0; +} +EOF +if compile_prog "" "" ; then + linux_magic_h=yes +fi + +######################################## +# check if we have valgrind/valgrind.h + +valgrind_h=no +cat > $TMPC << EOF +#include +int main(void) { + return 0; +} +EOF +if compile_prog "" "" ; then + valgrind_h=yes +fi + +######################################## +# check if environ is declared + +has_environ=no +cat > $TMPC << EOF +#include +int main(void) { + environ = 0; + return 0; +} +EOF +if compile_prog "" "" ; then + has_environ=yes +fi + +######################################## +# check if cpuid.h is usable. + +cat > $TMPC << EOF +#include +int main(void) { + unsigned a, b, c, d; + int max = __get_cpuid_max(0, 0); + + if (max >= 1) { + __cpuid(1, a, b, c, d); + } + + if (max >= 7) { + __cpuid_count(7, 0, a, b, c, d); + } + + return 0; +} +EOF +if compile_prog "" "" ; then + cpuid_h=yes +fi + +########################################## +# avx2 optimization requirement check +# +# There is no point enabling this if cpuid.h is not usable, +# since we won't be able to select the new routines. + +if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then + cat > $TMPC << EOF +#pragma GCC push_options +#pragma GCC target("avx2") +#include +#include +static int bar(void *a) { + __m256i x = *(__m256i *)a; + return _mm256_testz_si256(x, x); +} +int main(int argc, char *argv[]) { return bar(argv[0]); } +EOF + if compile_object "" ; then + avx2_opt="yes" + else + avx2_opt="no" + fi +fi + +########################################## +# avx512f optimization requirement check +# +# There is no point enabling this if cpuid.h is not usable, +# since we won't be able to select the new routines. +# by default, it is turned off. +# if user explicitly want to enable it, check environment + +if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then + cat > $TMPC << EOF +#pragma GCC push_options +#pragma GCC target("avx512f") +#include +#include +static int bar(void *a) { + __m512i x = *(__m512i *)a; + return _mm512_test_epi64_mask(x, x); +} +int main(int argc, char *argv[]) +{ + return bar(argv[0]); +} +EOF + if ! compile_object "" ; then + avx512f_opt="no" + fi +else + avx512f_opt="no" +fi + +######################################## +# check if __[u]int128_t is usable. + +int128=no +cat > $TMPC << EOF +__int128_t a; +__uint128_t b; +int main (void) { + a = a + b; + b = a * b; + a = a * a; + return 0; +} +EOF +if compile_prog "" "" ; then + int128=yes +fi + +######################################### +# See if 128-bit atomic operations are supported. + +atomic128=no +if test "$int128" = "yes"; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + y = __atomic_load_16(&x, 0); + __atomic_store_16(&x, y, 0); + __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" ; then + atomic128=yes + fi +fi + +cmpxchg128=no +if test "$int128" = yes && test "$atomic128" = no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; +} +EOF + if compile_prog "" "" ; then + cmpxchg128=yes + fi +fi + +######################################### +# See if 64-bit atomic operations are supported. +# Note that without __atomic builtins, we can only +# assume atomic loads/stores max at pointer size. + +cat > $TMPC << EOF +#include +int main(void) +{ + uint64_t x = 0, y = 0; +#ifdef __ATOMIC_RELAXED + y = __atomic_load_n(&x, __ATOMIC_RELAXED); + __atomic_store_n(&x, y, __ATOMIC_RELAXED); + __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __atomic_exchange_n(&x, y, __ATOMIC_RELAXED); + __atomic_fetch_add(&x, y, __ATOMIC_RELAXED); +#else + typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1]; + __sync_lock_test_and_set(&x, y); + __sync_val_compare_and_swap(&x, y, 0); + __sync_fetch_and_add(&x, y); +#endif + return 0; +} +EOF +if compile_prog "" "" ; then + atomic64=yes +fi + +######################################### +# See if --dynamic-list is supported by the linker +ld_dynamic_list="no" +if test "$static" = "no" ; then + cat > $TMPTXT < $TMPC < +void foo(void); + +void foo(void) +{ + printf("foo\n"); +} + +int main(void) +{ + foo(); + return 0; +} +EOF + + if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then + ld_dynamic_list="yes" + fi +fi + +######################################### +# See if -exported_symbols_list is supported by the linker + +ld_exported_symbols_list="no" +if test "$static" = "no" ; then + cat > $TMPTXT < $TMPC << EOF +int x = 1; +extern const int y __attribute__((alias("x"))); +int main(void) { return 0; } +EOF +if compile_prog "" "" ; then + attralias=yes +fi + +######################################## +# check if getauxval is available. + +getauxval=no +cat > $TMPC << EOF +#include +int main(void) { + return getauxval(AT_HWCAP) == 0; +} +EOF +if compile_prog "" "" ; then + getauxval=yes +fi + +######################################## +# check if ccache is interfering with +# semantic analysis of macros + +unset CCACHE_CPP2 +ccache_cpp2=no +cat > $TMPC << EOF +static const int Z = 1; +#define fn() ({ Z; }) +#define TAUT(X) ((X) == Z) +#define PAREN(X, Y) (X == Y) +#define ID(X) (X) +int main(int argc, char *argv[]) +{ + int x = 0, y = 0; + x = ID(x); + x = fn(); + fn(); + if (PAREN(x, y)) return 0; + if (TAUT(Z)) return 0; + return 0; +} +EOF + +if ! compile_object "-Werror"; then + ccache_cpp2=yes +fi + +################################################# +# clang does not support glibc + FORTIFY_SOURCE. + +if test "$fortify_source" != "no"; then + if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then + fortify_source="no"; + elif test -n "$cxx" && has $cxx && + echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then + fortify_source="no"; + else + fortify_source="yes" + fi +fi + +############################################### +# Check if copy_file_range is provided by glibc +have_copy_file_range=no +cat > $TMPC << EOF +#include +int main(void) { + copy_file_range(0, NULL, 0, NULL, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + have_copy_file_range=yes +fi + +########################################## +# check if struct fsxattr is available via linux/fs.h + +have_fsxattr=no +cat > $TMPC << EOF +#include +struct fsxattr foo; +int main(void) { + return 0; +} +EOF +if compile_prog "" "" ; then + have_fsxattr=yes +fi + +########################################## +# check for usable membarrier system call +if test "$membarrier" = "yes"; then + have_membarrier=no + if test "$mingw32" = "yes" ; then + have_membarrier=yes + elif test "$linux" = "yes" ; then + cat > $TMPC << EOF + #include + #include + #include + #include + int main(void) { + syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0); + syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0); + exit(0); + } +EOF + if compile_prog "" "" ; then + have_membarrier=yes + fi + fi + if test "$have_membarrier" = "no"; then + feature_not_found "membarrier" "membarrier system call not available" + fi +else + # Do not enable it by default even for Mingw32, because it doesn't + # work on Wine. + membarrier=no +fi + +########################################## +# check if rtnetlink.h exists and is useful +have_rtnetlink=no +cat > $TMPC << EOF +#include +int main(void) { + return IFLA_PROTO_DOWN; +} +EOF +if compile_prog "" "" ; then + have_rtnetlink=yes +fi + +########################################## +# check for usable AF_VSOCK environment +have_af_vsock=no +cat > $TMPC << EOF +#include +#include +#include +#if !defined(AF_VSOCK) +# error missing AF_VSOCK flag +#endif +#include +int main(void) { + int sock, ret; + struct sockaddr_vm svm; + socklen_t len = sizeof(svm); + sock = socket(AF_VSOCK, SOCK_STREAM, 0); + ret = getpeername(sock, (struct sockaddr *)&svm, &len); + if ((ret == -1) && (errno == ENOTCONN)) { + return 0; + } + return -1; +} +EOF +if compile_prog "" "" ; then + have_af_vsock=yes +fi + +########################################## +# check for usable AF_ALG environment +have_afalg=no +cat > $TMPC << EOF +#include +#include +#include +#include +int main(void) { + int sock; + sock = socket(AF_ALG, SOCK_SEQPACKET, 0); + return sock; +} +EOF +if compile_prog "" "" ; then + have_afalg=yes +fi +if test "$crypto_afalg" = "yes" +then + if test "$have_afalg" != "yes" + then + error_exit "AF_ALG requested but could not be detected" + fi +fi + + +################################################# +# check for sysmacros.h + +have_sysmacros=no +cat > $TMPC << EOF +#include +int main(void) { + return makedev(0, 0); +} +EOF +if compile_prog "" "" ; then + have_sysmacros=yes +fi + +########################################## +# check for _Static_assert() + +have_static_assert=no +cat > $TMPC << EOF +_Static_assert(1, "success"); +int main(void) { + return 0; +} +EOF +if compile_prog "" "" ; then + have_static_assert=yes +fi + +########################################## +# check for utmpx.h, it is missing e.g. on OpenBSD + +have_utmpx=no +cat > $TMPC << EOF +#include +struct utmpx user_info; +int main(void) { + return 0; +} +EOF +if compile_prog "" "" ; then + have_utmpx=yes +fi + +########################################## +# check for getrandom() + +have_getrandom=no +cat > $TMPC << EOF +#include +int main(void) { + return getrandom(0, 0, GRND_NONBLOCK); +} +EOF +if compile_prog "" "" ; then + have_getrandom=yes +fi + +########################################## +# checks for sanitizers + +have_asan=no +have_ubsan=no +have_asan_iface_h=no +have_asan_iface_fiber=no + +if test "$sanitizers" = "yes" ; then + write_c_skeleton + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then + have_asan=yes + fi + + # we could use a simple skeleton for flags checks, but this also + # detect the static linking issue of ubsan, see also: + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84285 + cat > $TMPC << EOF +#include +int main(void) { + void *tmp = malloc(10); + if (tmp != NULL) { + return *(int *)(tmp + 2); + } + return 1; +} +EOF + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then + have_ubsan=yes + fi + + if check_include "sanitizer/asan_interface.h" ; then + have_asan_iface_h=yes + fi + + cat > $TMPC << EOF +#include +int main(void) { + __sanitizer_start_switch_fiber(0, 0, 0); + return 0; +} +EOF + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" "" ; then + have_asan_iface_fiber=yes + fi +fi + +########################################## +# checks for fuzzer +if test "$fuzzing" = "yes" && test -z "${LIB_FUZZING_ENGINE+xxx}"; then + write_c_fuzzer_skeleton + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=fuzzer" ""; then + have_fuzzer=yes + else + error_exit "Your compiler doesn't support -fsanitize=fuzzer" + exit 1 + fi +fi + +# Thread sanitizer is, for now, much noisier than the other sanitizers; +# keep it separate until that is not the case. +if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then + error_exit "TSAN is not supported with other sanitiziers." +fi +have_tsan=no +have_tsan_iface_fiber=no +if test "$tsan" = "yes" ; then + write_c_skeleton + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then + have_tsan=yes + fi + cat > $TMPC << EOF +#include +int main(void) { + __tsan_create_fiber(0); + return 0; +} +EOF + if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then + have_tsan_iface_fiber=yes + fi +fi + +########################################## +# check for libpmem + +if test "$libpmem" != "no"; then + if $pkg_config --exists "libpmem"; then + libpmem="yes" + libpmem_libs=$($pkg_config --libs libpmem) + libpmem_cflags=$($pkg_config --cflags libpmem) + else + if test "$libpmem" = "yes" ; then + feature_not_found "libpmem" "Install nvml or pmdk" + fi + libpmem="no" + fi +fi + +########################################## +# check for libdaxctl + +if test "$libdaxctl" != "no"; then + if $pkg_config --atleast-version=57 "libdaxctl"; then + libdaxctl="yes" + libdaxctl_libs=$($pkg_config --libs libdaxctl) + libdaxctl_cflags=$($pkg_config --cflags libdaxctl) + else + if test "$libdaxctl" = "yes" ; then + feature_not_found "libdaxctl" "Install libdaxctl" + fi + libdaxctl="no" + fi +fi + +########################################## +# check for slirp + +case "$slirp" in + auto | enabled | internal) + # Simpler to always update submodule, even if not needed. + if test -e "${source_path}/.git" && test $git_update = 'yes' ; then + git_submodules="${git_submodules} slirp" + fi + ;; +esac + +########################################## +# check for usable __NR_keyctl syscall + +if test "$linux" = "yes" ; then + + have_keyring=no + cat > $TMPC << EOF +#include +#include +#include +#include +int main(void) { + return syscall(__NR_keyctl, KEYCTL_READ, 0, NULL, NULL, 0); +} +EOF + if compile_prog "" "" ; then + have_keyring=yes + fi +fi +if test "$secret_keyring" != "no" +then + if test "$have_keyring" = "yes" + then + secret_keyring=yes + else + if test "$secret_keyring" = "yes" + then + error_exit "syscall __NR_keyctl requested, \ +but not implemented on your system" + else + secret_keyring=no + fi + fi +fi + +########################################## +# End of CC checks +# After here, no more $cc or $ld runs + +write_c_skeleton + +if test "$gcov" = "yes" ; then + : +elif test "$fortify_source" = "yes" ; then + QEMU_CFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $QEMU_CFLAGS" + debug=no +fi + +case "$ARCH" in +alpha) + # Ensure there's only a single GP + QEMU_CFLAGS="-msmall-data $QEMU_CFLAGS" +;; +esac + +if test "$gprof" = "yes" ; then + QEMU_CFLAGS="-p $QEMU_CFLAGS" + QEMU_LDFLAGS="-p $QEMU_LDFLAGS" +fi + +if test "$have_asan" = "yes"; then + QEMU_CFLAGS="-fsanitize=address $QEMU_CFLAGS" + QEMU_LDFLAGS="-fsanitize=address $QEMU_LDFLAGS" + if test "$have_asan_iface_h" = "no" ; then + echo "ASAN build enabled, but ASAN header missing." \ + "Without code annotation, the report may be inferior." + elif test "$have_asan_iface_fiber" = "no" ; then + echo "ASAN build enabled, but ASAN header is too old." \ + "Without code annotation, the report may be inferior." + fi +fi +if test "$have_tsan" = "yes" ; then + if test "$have_tsan_iface_fiber" = "yes" ; then + QEMU_CFLAGS="-fsanitize=thread $QEMU_CFLAGS" + QEMU_LDFLAGS="-fsanitize=thread $QEMU_LDFLAGS" + else + error_exit "Cannot enable TSAN due to missing fiber annotation interface." + fi +elif test "$tsan" = "yes" ; then + error_exit "Cannot enable TSAN due to missing sanitize thread interface." +fi +if test "$have_ubsan" = "yes"; then + QEMU_CFLAGS="-fsanitize=undefined $QEMU_CFLAGS" + QEMU_LDFLAGS="-fsanitize=undefined $QEMU_LDFLAGS" +fi + +########################################## +# Do we have libnfs +if test "$libnfs" != "no" ; then + if $pkg_config --atleast-version=1.9.3 libnfs; then + libnfs="yes" + libnfs_libs=$($pkg_config --libs libnfs) + else + if test "$libnfs" = "yes" ; then + feature_not_found "libnfs" "Install libnfs devel >= 1.9.3" + fi + libnfs="no" + fi +fi + +########################################## + +# Exclude --warn-common with TSan to suppress warnings from the TSan libraries. +if test "$solaris" = "no" && test "$tsan" = "no"; then + if $ld --version 2>/dev/null | grep "GNU ld" >/dev/null 2>/dev/null ; then + QEMU_LDFLAGS="-Wl,--warn-common $QEMU_LDFLAGS" + fi +fi + +# Use ASLR, no-SEH and DEP if available +if test "$mingw32" = "yes" ; then + flags="--no-seh --nxcompat" + + # Disable ASLR for debug builds to allow debugging with gdb + if test "$debug" = "no" ; then + flags="--dynamicbase $flags" + fi + + for flag in $flags; do + if ld_has $flag ; then + QEMU_LDFLAGS="-Wl,$flag $QEMU_LDFLAGS" + fi + done +fi + +# We can only support ivshmem if we have eventfd +if [ "$eventfd" = "yes" ]; then + ivshmem=yes +fi + +if test "$softmmu" = yes ; then + if test "$linux" = yes; then + if test "$virtfs" != no && test "$cap_ng" = yes && test "$attr" = yes ; then + virtfs=yes + else + if test "$virtfs" = yes; then + error_exit "VirtFS requires libcap-ng devel and libattr devel" + fi + virtfs=no + fi + else + if test "$virtfs" = yes; then + error_exit "VirtFS is supported only on Linux" + fi + virtfs=no + fi +fi + +# Probe for guest agent support/options + +if [ "$guest_agent" != "no" ]; then + if [ "$softmmu" = no -a "$want_tools" = no ] ; then + guest_agent=no + elif [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" -o "$mingw32" = "yes" ] ; then + guest_agent=yes + elif [ "$guest_agent" != yes ]; then + guest_agent=no + else + error_exit "Guest agent is not supported on this platform" + fi +fi + +# Guest agent Window MSI package + +if test "$guest_agent" != yes; then + if test "$guest_agent_msi" = yes; then + error_exit "MSI guest agent package requires guest agent enabled" + fi + guest_agent_msi=no +elif test "$mingw32" != "yes"; then + if test "$guest_agent_msi" = "yes"; then + error_exit "MSI guest agent package is available only for MinGW Windows cross-compilation" + fi + guest_agent_msi=no +elif ! has wixl; then + if test "$guest_agent_msi" = "yes"; then + error_exit "MSI guest agent package requires wixl tool installed ( usually from msitools package )" + fi + guest_agent_msi=no +else + # we support qemu-ga, mingw32, and wixl: default to MSI enabled if it wasn't + # disabled explicitly + if test "$guest_agent_msi" != "no"; then + guest_agent_msi=yes + fi +fi + +if test "$guest_agent_msi" = "yes"; then + if test "$guest_agent_with_vss" = "yes"; then + QEMU_GA_MSI_WITH_VSS="-D InstallVss" + fi + + if test "$QEMU_GA_MANUFACTURER" = ""; then + QEMU_GA_MANUFACTURER=QEMU + fi + + if test "$QEMU_GA_DISTRO" = ""; then + QEMU_GA_DISTRO=Linux + fi + + if test "$QEMU_GA_VERSION" = ""; then + QEMU_GA_VERSION=$(cat $source_path/VERSION) + fi + + QEMU_GA_MSI_MINGW_DLL_PATH="-D Mingw_dlls=$($pkg_config --variable=prefix glib-2.0)/bin" + + case "$cpu" in + x86_64) + QEMU_GA_MSI_ARCH="-a x64 -D Arch=64" + ;; + i386) + QEMU_GA_MSI_ARCH="-D Arch=32" + ;; + *) + error_exit "CPU $cpu not supported for building installation package" + ;; + esac +fi + +# Mac OS X ships with a broken assembler +roms= +if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \ + test "$targetos" != "Darwin" && test "$targetos" != "SunOS" && \ + test "$targetos" != "Haiku" && test "$softmmu" = yes ; then + # Different host OS linkers have different ideas about the name of the ELF + # emulation. Linux and OpenBSD/amd64 use 'elf_i386'; FreeBSD uses the _fbsd + # variant; OpenBSD/i386 uses the _obsd variant; and Windows uses i386pe. + for emu in elf_i386 elf_i386_fbsd elf_i386_obsd i386pe; do + if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then + ld_i386_emulation="$emu" + roms="optionrom" + break + fi + done +fi + +# Only build s390-ccw bios if we're on s390x and the compiler has -march=z900 +if test "$cpu" = "s390x" && test "$softmmu" = yes ; then + write_c_skeleton + if compile_prog "-march=z900" ""; then + roms="$roms s390-ccw" + # SLOF is required for building the s390-ccw firmware on s390x, + # since it is using the libnet code from SLOF for network booting. + if test -e "${source_path}/.git" ; then + git_submodules="${git_submodules} roms/SLOF" + fi + fi +fi + +# Check that the C++ compiler exists and works with the C compiler. +# All the QEMU_CXXFLAGS are based on QEMU_CFLAGS. Keep this at the end to don't miss any other that could be added. +if has $cxx; then + cat > $TMPC < $TMPCXX < $config_host_mak +echo >> $config_host_mak + +echo all: >> $config_host_mak +echo "GIT=$git" >> $config_host_mak +echo "GIT_SUBMODULES=$git_submodules" >> $config_host_mak +echo "GIT_UPDATE=$git_update" >> $config_host_mak + +echo "ARCH=$ARCH" >> $config_host_mak + +if test "$default_devices" = "yes" ; then + echo "CONFIG_MINIKCONF_MODE=--defconfig" >> $config_host_mak +else + echo "CONFIG_MINIKCONF_MODE=--allnoconfig" >> $config_host_mak +fi +if test "$debug_tcg" = "yes" ; then + echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak +fi +if test "$strip_opt" = "yes" ; then + echo "STRIP=${strip}" >> $config_host_mak +fi +if test "$bigendian" = "yes" ; then + echo "HOST_WORDS_BIGENDIAN=y" >> $config_host_mak +fi +if test "$mingw32" = "yes" ; then + echo "CONFIG_WIN32=y" >> $config_host_mak + rc_version=$(cat $source_path/VERSION) + version_major=${rc_version%%.*} + rc_version=${rc_version#*.} + version_minor=${rc_version%%.*} + rc_version=${rc_version#*.} + version_subminor=${rc_version%%.*} + version_micro=0 + echo "CONFIG_FILEVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak + echo "CONFIG_PRODUCTVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak + if test "$guest_agent_with_vss" = "yes" ; then + echo "CONFIG_QGA_VSS=y" >> $config_host_mak + echo "QGA_VSS_PROVIDER=$qga_vss_provider" >> $config_host_mak + echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak + fi + if test "$guest_agent_ntddscsi" = "yes" ; then + echo "CONFIG_QGA_NTDDSCSI=y" >> $config_host_mak + fi + if test "$guest_agent_msi" = "yes"; then + echo "CONFIG_QGA_MSI=y" >> $config_host_mak + echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak + echo "QEMU_GA_MSI_WITH_VSS=${QEMU_GA_MSI_WITH_VSS}" >> $config_host_mak + echo "QEMU_GA_MSI_ARCH=${QEMU_GA_MSI_ARCH}" >> $config_host_mak + echo "QEMU_GA_MANUFACTURER=${QEMU_GA_MANUFACTURER}" >> $config_host_mak + echo "QEMU_GA_DISTRO=${QEMU_GA_DISTRO}" >> $config_host_mak + echo "QEMU_GA_VERSION=${QEMU_GA_VERSION}" >> $config_host_mak + fi +else + echo "CONFIG_POSIX=y" >> $config_host_mak +fi + +if test "$linux" = "yes" ; then + echo "CONFIG_LINUX=y" >> $config_host_mak +fi + +if test "$darwin" = "yes" ; then + echo "CONFIG_DARWIN=y" >> $config_host_mak +fi + +if test "$solaris" = "yes" ; then + echo "CONFIG_SOLARIS=y" >> $config_host_mak +fi +if test "$haiku" = "yes" ; then + echo "CONFIG_HAIKU=y" >> $config_host_mak +fi +if test "$static" = "yes" ; then + echo "CONFIG_STATIC=y" >> $config_host_mak +fi +if test "$profiler" = "yes" ; then + echo "CONFIG_PROFILER=y" >> $config_host_mak +fi +if test "$want_tools" = "yes" ; then + echo "CONFIG_TOOLS=y" >> $config_host_mak +fi +if test "$guest_agent" = "yes" ; then + echo "CONFIG_GUEST_AGENT=y" >> $config_host_mak +fi +echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak +if test "$vde" = "yes" ; then + echo "CONFIG_VDE=y" >> $config_host_mak + echo "VDE_LIBS=$vde_libs" >> $config_host_mak +fi +if test "$netmap" = "yes" ; then + echo "CONFIG_NETMAP=y" >> $config_host_mak +fi +if test "$l2tpv3" = "yes" ; then + echo "CONFIG_L2TPV3=y" >> $config_host_mak +fi +if test "$gprof" = "yes" ; then + echo "CONFIG_GPROF=y" >> $config_host_mak +fi +if test "$cap_ng" = "yes" ; then + echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak + echo "LIBCAP_NG_LIBS=$cap_libs" >> $config_host_mak +fi +echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak +for drv in $audio_drv_list; do + def=CONFIG_AUDIO_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]') + echo "$def=y" >> $config_host_mak +done +if test "$alsa" = "yes" ; then + echo "CONFIG_ALSA=y" >> $config_host_mak +fi +echo "ALSA_LIBS=$alsa_libs" >> $config_host_mak +echo "ALSA_CFLAGS=$alsa_cflags" >> $config_host_mak +if test "$libpulse" = "yes" ; then + echo "CONFIG_LIBPULSE=y" >> $config_host_mak +fi +echo "PULSE_LIBS=$pulse_libs" >> $config_host_mak +echo "PULSE_CFLAGS=$pulse_cflags" >> $config_host_mak +echo "COREAUDIO_LIBS=$coreaudio_libs" >> $config_host_mak +echo "DSOUND_LIBS=$dsound_libs" >> $config_host_mak +echo "OSS_LIBS=$oss_libs" >> $config_host_mak +if test "$libjack" = "yes" ; then + echo "CONFIG_LIBJACK=y" >> $config_host_mak +fi +echo "JACK_LIBS=$jack_libs" >> $config_host_mak +if test "$audio_win_int" = "yes" ; then + echo "CONFIG_AUDIO_WIN_INT=y" >> $config_host_mak +fi +echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak +echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak +if test "$xfs" = "yes" ; then + echo "CONFIG_XFS=y" >> $config_host_mak +fi +qemu_version=$(head $source_path/VERSION) +echo "PKGVERSION=$pkgversion" >>$config_host_mak +echo "SRC_PATH=$source_path" >> $config_host_mak +echo "TARGET_DIRS=$target_list" >> $config_host_mak +if test "$modules" = "yes"; then + # $shacmd can generate a hash started with digit, which the compiler doesn't + # like as an symbol. So prefix it with an underscore + echo "CONFIG_STAMP=_$( (echo $qemu_version; cat $0) | $shacmd - | cut -f1 -d\ )" >> $config_host_mak + echo "CONFIG_MODULES=y" >> $config_host_mak +fi +if test "$module_upgrades" = "yes"; then + echo "CONFIG_MODULE_UPGRADES=y" >> $config_host_mak +fi +if test "$have_x11" = "yes" && test "$need_x11" = "yes"; then + echo "CONFIG_X11=y" >> $config_host_mak + echo "X11_CFLAGS=$x11_cflags" >> $config_host_mak + echo "X11_LIBS=$x11_libs" >> $config_host_mak +fi +if test "$pipe2" = "yes" ; then + echo "CONFIG_PIPE2=y" >> $config_host_mak +fi +if test "$accept4" = "yes" ; then + echo "CONFIG_ACCEPT4=y" >> $config_host_mak +fi +if test "$splice" = "yes" ; then + echo "CONFIG_SPLICE=y" >> $config_host_mak +fi +if test "$eventfd" = "yes" ; then + echo "CONFIG_EVENTFD=y" >> $config_host_mak +fi +if test "$memfd" = "yes" ; then + echo "CONFIG_MEMFD=y" >> $config_host_mak +fi +if test "$have_usbfs" = "yes" ; then + echo "CONFIG_USBFS=y" >> $config_host_mak +fi +if test "$fallocate" = "yes" ; then + echo "CONFIG_FALLOCATE=y" >> $config_host_mak +fi +if test "$fallocate_punch_hole" = "yes" ; then + echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak +fi +if test "$fallocate_zero_range" = "yes" ; then + echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak +fi +if test "$posix_fallocate" = "yes" ; then + echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak +fi +if test "$sync_file_range" = "yes" ; then + echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak +fi +if test "$fiemap" = "yes" ; then + echo "CONFIG_FIEMAP=y" >> $config_host_mak +fi +if test "$dup3" = "yes" ; then + echo "CONFIG_DUP3=y" >> $config_host_mak +fi +if test "$ppoll" = "yes" ; then + echo "CONFIG_PPOLL=y" >> $config_host_mak +fi +if test "$prctl_pr_set_timerslack" = "yes" ; then + echo "CONFIG_PRCTL_PR_SET_TIMERSLACK=y" >> $config_host_mak +fi +if test "$epoll" = "yes" ; then + echo "CONFIG_EPOLL=y" >> $config_host_mak +fi +if test "$epoll_create1" = "yes" ; then + echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak +fi +if test "$sendfile" = "yes" ; then + echo "CONFIG_SENDFILE=y" >> $config_host_mak +fi +if test "$timerfd" = "yes" ; then + echo "CONFIG_TIMERFD=y" >> $config_host_mak +fi +if test "$setns" = "yes" ; then + echo "CONFIG_SETNS=y" >> $config_host_mak +fi +if test "$clock_adjtime" = "yes" ; then + echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak +fi +if test "$syncfs" = "yes" ; then + echo "CONFIG_SYNCFS=y" >> $config_host_mak +fi +if test "$kcov" = "yes" ; then + echo "CONFIG_KCOV=y" >> $config_host_mak +fi +if test "$btrfs" = "yes" ; then + echo "CONFIG_BTRFS=y" >> $config_host_mak +fi +if test "$inotify" = "yes" ; then + echo "CONFIG_INOTIFY=y" >> $config_host_mak +fi +if test "$inotify1" = "yes" ; then + echo "CONFIG_INOTIFY1=y" >> $config_host_mak +fi +if test "$sem_timedwait" = "yes" ; then + echo "CONFIG_SEM_TIMEDWAIT=y" >> $config_host_mak +fi +if test "$strchrnul" = "yes" ; then + echo "HAVE_STRCHRNUL=y" >> $config_host_mak +fi +if test "$st_atim" = "yes" ; then + echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak +fi +if test "$byteswap_h" = "yes" ; then + echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak +fi +if test "$bswap_h" = "yes" ; then + echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak +fi +if test "$curl" = "yes" ; then + echo "CONFIG_CURL=y" >> $config_host_mak + echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak + echo "CURL_LIBS=$curl_libs" >> $config_host_mak +fi +if test "$brlapi" = "yes" ; then + echo "CONFIG_BRLAPI=y" >> $config_host_mak + echo "BRLAPI_LIBS=$brlapi_libs" >> $config_host_mak +fi +if test "$gtk" = "yes" ; then + echo "CONFIG_GTK=y" >> $config_host_mak + echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak + echo "GTK_LIBS=$gtk_libs" >> $config_host_mak + if test "$gtk_gl" = "yes" ; then + echo "CONFIG_GTK_GL=y" >> $config_host_mak + fi +fi +if test "$gio" = "yes" ; then + echo "CONFIG_GIO=y" >> $config_host_mak + echo "GIO_CFLAGS=$gio_cflags" >> $config_host_mak + echo "GIO_LIBS=$gio_libs" >> $config_host_mak + echo "GDBUS_CODEGEN=$gdbus_codegen" >> $config_host_mak +fi +echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak +if test "$gnutls" = "yes" ; then + echo "CONFIG_GNUTLS=y" >> $config_host_mak + echo "GNUTLS_CFLAGS=$gnutls_cflags" >> $config_host_mak + echo "GNUTLS_LIBS=$gnutls_libs" >> $config_host_mak +fi +if test "$gcrypt" = "yes" ; then + echo "CONFIG_GCRYPT=y" >> $config_host_mak + if test "$gcrypt_hmac" = "yes" ; then + echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak + fi + echo "GCRYPT_CFLAGS=$gcrypt_cflags" >> $config_host_mak + echo "GCRYPT_LIBS=$gcrypt_libs" >> $config_host_mak +fi +if test "$nettle" = "yes" ; then + echo "CONFIG_NETTLE=y" >> $config_host_mak + echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak + echo "NETTLE_CFLAGS=$nettle_cflags" >> $config_host_mak + echo "NETTLE_LIBS=$nettle_libs" >> $config_host_mak +fi +if test "$qemu_private_xts" = "yes" ; then + echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak +fi +if test "$tasn1" = "yes" ; then + echo "CONFIG_TASN1=y" >> $config_host_mak +fi +if test "$auth_pam" = "yes" ; then + echo "CONFIG_AUTH_PAM=y" >> $config_host_mak +fi +if test "$have_ifaddrs_h" = "yes" ; then + echo "HAVE_IFADDRS_H=y" >> $config_host_mak +fi +if test "$have_drm_h" = "yes" ; then + echo "HAVE_DRM_H=y" >> $config_host_mak +fi +if test "$have_broken_size_max" = "yes" ; then + echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak +fi +if test "$have_openpty" = "yes" ; then + echo "HAVE_OPENPTY=y" >> $config_host_mak +fi +if test "$have_sys_signal_h" = "yes" ; then + echo "HAVE_SYS_SIGNAL_H=y" >> $config_host_mak +fi + +# Work around a system header bug with some kernel/XFS header +# versions where they both try to define 'struct fsxattr': +# xfs headers will not try to redefine structs from linux headers +# if this macro is set. +if test "$have_fsxattr" = "yes" ; then + echo "HAVE_FSXATTR=y" >> $config_host_mak +fi +if test "$have_copy_file_range" = "yes" ; then + echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak +fi +if test "$vte" = "yes" ; then + echo "CONFIG_VTE=y" >> $config_host_mak + echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak + echo "VTE_LIBS=$vte_libs" >> $config_host_mak +fi +if test "$virglrenderer" = "yes" ; then + echo "CONFIG_VIRGL=y" >> $config_host_mak + echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak + echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak +fi +if test "$xen" = "enabled" ; then + echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak + echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak + echo "XEN_CFLAGS=$xen_cflags" >> $config_host_mak + echo "XEN_LIBS=$xen_libs" >> $config_host_mak +fi +if test "$linux_aio" = "yes" ; then + echo "CONFIG_LINUX_AIO=y" >> $config_host_mak +fi +if test "$linux_io_uring" = "yes" ; then + echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak + echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak + echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak +fi +if test "$attr" = "yes" ; then + echo "CONFIG_ATTR=y" >> $config_host_mak + echo "LIBATTR_LIBS=$libattr_libs" >> $config_host_mak +fi +if test "$libattr" = "yes" ; then + echo "CONFIG_LIBATTR=y" >> $config_host_mak +fi +if test "$virtfs" = "yes" ; then + echo "CONFIG_VIRTFS=y" >> $config_host_mak +fi +if test "$vhost_scsi" = "yes" ; then + echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak +fi +if test "$vhost_net" = "yes" ; then + echo "CONFIG_VHOST_NET=y" >> $config_host_mak +fi +if test "$vhost_net_user" = "yes" ; then + echo "CONFIG_VHOST_NET_USER=y" >> $config_host_mak +fi +if test "$vhost_net_vdpa" = "yes" ; then + echo "CONFIG_VHOST_NET_VDPA=y" >> $config_host_mak +fi +if test "$vhost_crypto" = "yes" ; then + echo "CONFIG_VHOST_CRYPTO=y" >> $config_host_mak +fi +if test "$vhost_vsock" = "yes" ; then + echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak + if test "$vhost_user" = "yes" ; then + echo "CONFIG_VHOST_USER_VSOCK=y" >> $config_host_mak + fi +fi +if test "$vhost_kernel" = "yes" ; then + echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak +fi +if test "$vhost_user" = "yes" ; then + echo "CONFIG_VHOST_USER=y" >> $config_host_mak +fi +if test "$vhost_vdpa" = "yes" ; then + echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak +fi +if test "$vhost_user_fs" = "yes" ; then + echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak +fi +if test "$iovec" = "yes" ; then + echo "CONFIG_IOVEC=y" >> $config_host_mak +fi +if test "$preadv" = "yes" ; then + echo "CONFIG_PREADV=y" >> $config_host_mak +fi +if test "$membarrier" = "yes" ; then + echo "CONFIG_MEMBARRIER=y" >> $config_host_mak +fi +if test "$signalfd" = "yes" ; then + echo "CONFIG_SIGNALFD=y" >> $config_host_mak +fi +if test "$optreset" = "yes" ; then + echo "HAVE_OPTRESET=y" >> $config_host_mak +fi +if test "$tcg" = "enabled"; then + if test "$tcg_interpreter" = "yes" ; then + echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak + fi +fi +if test "$fdatasync" = "yes" ; then + echo "CONFIG_FDATASYNC=y" >> $config_host_mak +fi +if test "$madvise" = "yes" ; then + echo "CONFIG_MADVISE=y" >> $config_host_mak +fi +if test "$posix_madvise" = "yes" ; then + echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak +fi +if test "$posix_memalign" = "yes" ; then + echo "CONFIG_POSIX_MEMALIGN=y" >> $config_host_mak +fi +if test "$spice" = "yes" ; then + echo "CONFIG_SPICE=y" >> $config_host_mak + echo "SPICE_CFLAGS=$spice_cflags" >> $config_host_mak + echo "SPICE_LIBS=$spice_libs" >> $config_host_mak +fi + +if test "$smartcard" = "yes" ; then + echo "CONFIG_SMARTCARD=y" >> $config_host_mak + echo "SMARTCARD_CFLAGS=$libcacard_cflags" >> $config_host_mak + echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak +fi + +if test "$libusb" = "yes" ; then + echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak + echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak + echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak +fi + +if test "$usb_redir" = "yes" ; then + echo "CONFIG_USB_REDIR=y" >> $config_host_mak + echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak + echo "USB_REDIR_LIBS=$usb_redir_libs" >> $config_host_mak +fi + +if test "$opengl" = "yes" ; then + echo "CONFIG_OPENGL=y" >> $config_host_mak + echo "OPENGL_CFLAGS=$opengl_cflags" >> $config_host_mak + echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak + if test "$opengl_dmabuf" = "yes" ; then + echo "CONFIG_OPENGL_DMABUF=y" >> $config_host_mak + fi +fi + +if test "$gbm" = "yes" ; then + echo "CONFIG_GBM=y" >> $config_host_mak + echo "GBM_LIBS=$gbm_libs" >> $config_host_mak + echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak +fi + + +if test "$avx2_opt" = "yes" ; then + echo "CONFIG_AVX2_OPT=y" >> $config_host_mak +fi + +if test "$avx512f_opt" = "yes" ; then + echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak +fi + +if test "$lzo" = "yes" ; then + echo "CONFIG_LZO=y" >> $config_host_mak + echo "LZO_LIBS=$lzo_libs" >> $config_host_mak +fi + +if test "$snappy" = "yes" ; then + echo "CONFIG_SNAPPY=y" >> $config_host_mak + echo "SNAPPY_LIBS=$snappy_libs" >> $config_host_mak +fi + +if test "$bzip2" = "yes" ; then + echo "CONFIG_BZIP2=y" >> $config_host_mak + echo "BZIP2_LIBS=-lbz2" >> $config_host_mak +fi + +if test "$lzfse" = "yes" ; then + echo "CONFIG_LZFSE=y" >> $config_host_mak + echo "LZFSE_LIBS=-llzfse" >> $config_host_mak +fi + +if test "$zstd" = "yes" ; then + echo "CONFIG_ZSTD=y" >> $config_host_mak + echo "ZSTD_CFLAGS=$zstd_cflags" >> $config_host_mak + echo "ZSTD_LIBS=$zstd_libs" >> $config_host_mak +fi + +if test "$libiscsi" = "yes" ; then + echo "CONFIG_LIBISCSI=y" >> $config_host_mak + echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak + echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak +fi + +if test "$libnfs" = "yes" ; then + echo "CONFIG_LIBNFS=y" >> $config_host_mak + echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak +fi + +if test "$seccomp" = "yes"; then + echo "CONFIG_SECCOMP=y" >> $config_host_mak + echo "SECCOMP_CFLAGS=$seccomp_cflags" >> $config_host_mak + echo "SECCOMP_LIBS=$seccomp_libs" >> $config_host_mak +fi + +# XXX: suppress that +if [ "$bsd" = "yes" ] ; then + echo "CONFIG_BSD=y" >> $config_host_mak +fi + +if test "$qom_cast_debug" = "yes" ; then + echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak +fi +if test "$rbd" = "yes" ; then + echo "CONFIG_RBD=y" >> $config_host_mak + echo "RBD_LIBS=$rbd_libs" >> $config_host_mak +fi + +echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak +if test "$coroutine_pool" = "yes" ; then + echo "CONFIG_COROUTINE_POOL=1" >> $config_host_mak +else + echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak +fi + +if test "$debug_stack_usage" = "yes" ; then + echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak +fi + +if test "$crypto_afalg" = "yes" ; then + echo "CONFIG_AF_ALG=y" >> $config_host_mak +fi + +if test "$open_by_handle_at" = "yes" ; then + echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak +fi + +if test "$linux_magic_h" = "yes" ; then + echo "CONFIG_LINUX_MAGIC_H=y" >> $config_host_mak +fi + +if test "$valgrind_h" = "yes" ; then + echo "CONFIG_VALGRIND_H=y" >> $config_host_mak +fi + +if test "$have_asan_iface_fiber" = "yes" ; then + echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak +fi + +if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then + echo "CONFIG_TSAN=y" >> $config_host_mak +fi + +if test "$has_environ" = "yes" ; then + echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak +fi + +if test "$cpuid_h" = "yes" ; then + echo "CONFIG_CPUID_H=y" >> $config_host_mak +fi + +if test "$int128" = "yes" ; then + echo "CONFIG_INT128=y" >> $config_host_mak +fi + +if test "$atomic128" = "yes" ; then + echo "CONFIG_ATOMIC128=y" >> $config_host_mak +fi + +if test "$cmpxchg128" = "yes" ; then + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak +fi + +if test "$atomic64" = "yes" ; then + echo "CONFIG_ATOMIC64=y" >> $config_host_mak +fi + +if test "$attralias" = "yes" ; then + echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak +fi + +if test "$getauxval" = "yes" ; then + echo "CONFIG_GETAUXVAL=y" >> $config_host_mak +fi + +if test "$glusterfs" = "yes" ; then + echo "CONFIG_GLUSTERFS=y" >> $config_host_mak + echo "GLUSTERFS_CFLAGS=$glusterfs_cflags" >> $config_host_mak + echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak +fi + +if test "$glusterfs_xlator_opt" = "yes" ; then + echo "CONFIG_GLUSTERFS_XLATOR_OPT=y" >> $config_host_mak +fi + +if test "$glusterfs_discard" = "yes" ; then + echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak +fi + +if test "$glusterfs_fallocate" = "yes" ; then + echo "CONFIG_GLUSTERFS_FALLOCATE=y" >> $config_host_mak +fi + +if test "$glusterfs_zerofill" = "yes" ; then + echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak +fi + +if test "$glusterfs_ftruncate_has_stat" = "yes" ; then + echo "CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT=y" >> $config_host_mak +fi + +if test "$glusterfs_iocb_has_stat" = "yes" ; then + echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak +fi + +if test "$libssh" = "yes" ; then + echo "CONFIG_LIBSSH=y" >> $config_host_mak + echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak + echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak +fi + +if test "$live_block_migration" = "yes" ; then + echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak +fi + +if test "$tpm" = "yes"; then + echo 'CONFIG_TPM=y' >> $config_host_mak +fi + +echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak +if have_backend "nop"; then + echo "CONFIG_TRACE_NOP=y" >> $config_host_mak +fi +if have_backend "simple"; then + echo "CONFIG_TRACE_SIMPLE=y" >> $config_host_mak + # Set the appropriate trace file. + trace_file="\"$trace_file-\" FMT_pid" +fi +if have_backend "log"; then + echo "CONFIG_TRACE_LOG=y" >> $config_host_mak +fi +if have_backend "ust"; then + echo "CONFIG_TRACE_UST=y" >> $config_host_mak + echo "LTTNG_UST_LIBS=$lttng_ust_libs" >> $config_host_mak + echo "URCU_BP_LIBS=$urcu_bp_libs" >> $config_host_mak +fi +if have_backend "dtrace"; then + echo "CONFIG_TRACE_DTRACE=y" >> $config_host_mak + if test "$trace_backend_stap" = "yes" ; then + echo "CONFIG_TRACE_SYSTEMTAP=y" >> $config_host_mak + fi +fi +if have_backend "ftrace"; then + if test "$linux" = "yes" ; then + echo "CONFIG_TRACE_FTRACE=y" >> $config_host_mak + else + feature_not_found "ftrace(trace backend)" "ftrace requires Linux" + fi +fi +if have_backend "syslog"; then + if test "$posix_syslog" = "yes" ; then + echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak + else + feature_not_found "syslog(trace backend)" "syslog not available" + fi +fi +echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak + +if test "$rdma" = "yes" ; then + echo "CONFIG_RDMA=y" >> $config_host_mak + echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak +fi + +if test "$pvrdma" = "yes" ; then + echo "CONFIG_PVRDMA=y" >> $config_host_mak +fi + +if test "$have_rtnetlink" = "yes" ; then + echo "CONFIG_RTNETLINK=y" >> $config_host_mak +fi + +if test "$libxml2" = "yes" ; then + echo "CONFIG_LIBXML2=y" >> $config_host_mak + echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak + echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak +fi + +if test "$replication" = "yes" ; then + echo "CONFIG_REPLICATION=y" >> $config_host_mak +fi + +if test "$have_af_vsock" = "yes" ; then + echo "CONFIG_AF_VSOCK=y" >> $config_host_mak +fi + +if test "$have_sysmacros" = "yes" ; then + echo "CONFIG_SYSMACROS=y" >> $config_host_mak +fi + +if test "$have_static_assert" = "yes" ; then + echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak +fi + +if test "$have_utmpx" = "yes" ; then + echo "HAVE_UTMPX=y" >> $config_host_mak +fi +if test "$have_getrandom" = "yes" ; then + echo "CONFIG_GETRANDOM=y" >> $config_host_mak +fi +if test "$ivshmem" = "yes" ; then + echo "CONFIG_IVSHMEM=y" >> $config_host_mak +fi +if test "$debug_mutex" = "yes" ; then + echo "CONFIG_DEBUG_MUTEX=y" >> $config_host_mak +fi + +# Hold two types of flag: +# CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on +# a thread we have a handle to +# CONFIG_PTHREAD_SETNAME_NP_W_TID - A way of doing it on a particular +# platform +if test "$pthread_setname_np_w_tid" = "yes" ; then + echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak + echo "CONFIG_PTHREAD_SETNAME_NP_W_TID=y" >> $config_host_mak +elif test "$pthread_setname_np_wo_tid" = "yes" ; then + echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak + echo "CONFIG_PTHREAD_SETNAME_NP_WO_TID=y" >> $config_host_mak +fi + +if test "$libpmem" = "yes" ; then + echo "CONFIG_LIBPMEM=y" >> $config_host_mak + echo "LIBPMEM_LIBS=$libpmem_libs" >> $config_host_mak + echo "LIBPMEM_CFLAGS=$libpmem_cflags" >> $config_host_mak +fi + +if test "$libdaxctl" = "yes" ; then + echo "CONFIG_LIBDAXCTL=y" >> $config_host_mak + echo "LIBDAXCTL_LIBS=$libdaxctl_libs" >> $config_host_mak +fi + +if test "$bochs" = "yes" ; then + echo "CONFIG_BOCHS=y" >> $config_host_mak +fi +if test "$cloop" = "yes" ; then + echo "CONFIG_CLOOP=y" >> $config_host_mak +fi +if test "$dmg" = "yes" ; then + echo "CONFIG_DMG=y" >> $config_host_mak +fi +if test "$qcow1" = "yes" ; then + echo "CONFIG_QCOW1=y" >> $config_host_mak +fi +if test "$vdi" = "yes" ; then + echo "CONFIG_VDI=y" >> $config_host_mak +fi +if test "$vvfat" = "yes" ; then + echo "CONFIG_VVFAT=y" >> $config_host_mak +fi +if test "$qed" = "yes" ; then + echo "CONFIG_QED=y" >> $config_host_mak +fi +if test "$parallels" = "yes" ; then + echo "CONFIG_PARALLELS=y" >> $config_host_mak +fi +if test "$sheepdog" = "yes" ; then + add_to deprecated_features "sheepdog" + echo "CONFIG_SHEEPDOG=y" >> $config_host_mak +fi +if test "$pty_h" = "yes" ; then + echo "HAVE_PTY_H=y" >> $config_host_mak +fi +if test "$have_mlockall" = "yes" ; then + echo "HAVE_MLOCKALL=y" >> $config_host_mak +fi +if test "$fuzzing" = "yes" ; then + # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the + # needed CFLAGS have already been provided + if test -z "${LIB_FUZZING_ENGINE+xxx}" ; then + QEMU_CFLAGS="$QEMU_CFLAGS -fsanitize=fuzzer-no-link" + FUZZ_EXE_LDFLAGS="-fsanitize=fuzzer" + else + FUZZ_EXE_LDFLAGS="$LIB_FUZZING_ENGINE" + fi +fi + +if test "$plugins" = "yes" ; then + echo "CONFIG_PLUGIN=y" >> $config_host_mak + # Copy the export object list to the build dir + if test "$ld_dynamic_list" = "yes" ; then + echo "CONFIG_HAS_LD_DYNAMIC_LIST=yes" >> $config_host_mak + ld_symbols=qemu-plugins-ld.symbols + cp "$source_path/plugins/qemu-plugins.symbols" $ld_symbols + elif test "$ld_exported_symbols_list" = "yes" ; then + echo "CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST=yes" >> $config_host_mak + ld64_symbols=qemu-plugins-ld64.symbols + echo "# Automatically generated by configure - do not modify" > $ld64_symbols + grep 'qemu_' "$source_path/plugins/qemu-plugins.symbols" | sed 's/;//g' | \ + sed -E 's/^[[:space:]]*(.*)/_\1/' >> $ld64_symbols + else + error_exit \ + "If \$plugins=yes, either \$ld_dynamic_list or " \ + "\$ld_exported_symbols_list should have been set to 'yes'." + fi +fi + +if test -n "$gdb_bin" ; then + echo "HAVE_GDB_BIN=$gdb_bin" >> $config_host_mak +fi + +if test "$secret_keyring" = "yes" ; then + echo "CONFIG_SECRET_KEYRING=y" >> $config_host_mak +fi + +echo "ROMS=$roms" >> $config_host_mak +echo "MAKE=$make" >> $config_host_mak +echo "PYTHON=$python" >> $config_host_mak +echo "GENISOIMAGE=$genisoimage" >> $config_host_mak +echo "MESON=$meson" >> $config_host_mak +echo "NINJA=$ninja" >> $config_host_mak +echo "CC=$cc" >> $config_host_mak +if $iasl -h > /dev/null 2>&1; then + echo "CONFIG_IASL=$iasl" >> $config_host_mak +fi +echo "CXX=$cxx" >> $config_host_mak +echo "OBJCC=$objcc" >> $config_host_mak +echo "AR=$ar" >> $config_host_mak +echo "ARFLAGS=$ARFLAGS" >> $config_host_mak +echo "AS=$as" >> $config_host_mak +echo "CCAS=$ccas" >> $config_host_mak +echo "CPP=$cpp" >> $config_host_mak +echo "OBJCOPY=$objcopy" >> $config_host_mak +echo "LD=$ld" >> $config_host_mak +echo "RANLIB=$ranlib" >> $config_host_mak +echo "NM=$nm" >> $config_host_mak +echo "PKG_CONFIG=$pkg_config_exe" >> $config_host_mak +echo "WINDRES=$windres" >> $config_host_mak +echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak +echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak +echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak +echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak +echo "GLIB_LIBS=$glib_libs" >> $config_host_mak +if test "$glib_has_gslice" = "yes" ; then + echo "HAVE_GLIB_WITH_SLICE_ALLOCATOR=y" >> $config_host_mak +fi +echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak +echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak +echo "EXESUF=$EXESUF" >> $config_host_mak +echo "HOST_DSOSUF=$HOST_DSOSUF" >> $config_host_mak +echo "LIBS_QGA=$libs_qga" >> $config_host_mak +echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak +echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak +if test "$gcov" = "yes" ; then + echo "CONFIG_GCOV=y" >> $config_host_mak +fi + +if test "$fuzzing" != "no"; then + echo "CONFIG_FUZZ=y" >> $config_host_mak +fi +echo "FUZZ_EXE_LDFLAGS=$FUZZ_EXE_LDFLAGS" >> $config_host_mak + +if test "$rng_none" = "yes"; then + echo "CONFIG_RNG_NONE=y" >> $config_host_mak +fi + +# use included Linux headers +if test "$linux" = "yes" ; then + mkdir -p linux-headers + case "$cpu" in + i386|x86_64|x32) + linux_arch=x86 + ;; + ppc|ppc64|ppc64le) + linux_arch=powerpc + ;; + s390x) + linux_arch=s390 + ;; + aarch64) + linux_arch=arm64 + ;; + mips64) + linux_arch=mips + ;; + *) + # For most CPUs the kernel architecture name and QEMU CPU name match. + linux_arch="$cpu" + ;; + esac + # For non-KVM architectures we will not have asm headers + if [ -e "$source_path/linux-headers/asm-$linux_arch" ]; then + symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm + fi +fi + +for target in $target_list; do + target_dir="$target" + target_name=$(echo $target | cut -d '-' -f 1) + mkdir -p $target_dir + case $target in + *-user) symlink "../qemu-$target_name" "$target_dir/qemu-$target_name" ;; + *) symlink "../qemu-system-$target_name" "$target_dir/qemu-system-$target_name" ;; + esac +done + +echo "CONFIG_QEMU_INTERP_PREFIX=$interp_prefix" | sed 's/%M/@0@/' >> $config_host_mak +if test "$default_targets" = "yes"; then + echo "CONFIG_DEFAULT_TARGETS=y" >> $config_host_mak +fi + +if test "$numa" = "yes"; then + echo "CONFIG_NUMA=y" >> $config_host_mak + echo "NUMA_LIBS=$numa_libs" >> $config_host_mak +fi + +if test "$ccache_cpp2" = "yes"; then + echo "export CCACHE_CPP2=y" >> $config_host_mak +fi + +if test "$safe_stack" = "yes"; then + echo "CONFIG_SAFESTACK=y" >> $config_host_mak +fi + +# If we're using a separate build tree, set it up now. +# DIRS are directories which we simply mkdir in the build tree; +# LINKS are things to symlink back into the source tree +# (these can be both files and directories). +# Caution: do not add files or directories here using wildcards. This +# will result in problems later if a new file matching the wildcard is +# added to the source tree -- nothing will cause configure to be rerun +# so the build tree will be missing the link back to the new file, and +# tests might fail. Prefer to keep the relevant files in their own +# directory and symlink the directory instead. +# UNLINK is used to remove symlinks from older development versions +# that might get into the way when doing "git update" without doing +# a "make distclean" in between. +DIRS="tests tests/tcg tests/tcg/lm32 tests/qapi-schema tests/qtest/libqos" +DIRS="$DIRS tests/qtest tests/qemu-iotests tests/vm tests/fp tests/qgraph" +DIRS="$DIRS docs docs/interop fsdev scsi" +DIRS="$DIRS pc-bios/optionrom pc-bios/s390-ccw" +DIRS="$DIRS roms/seabios" +DIRS="$DIRS contrib/plugins/" +LINKS="Makefile" +LINKS="$LINKS tests/tcg/lm32/Makefile" +LINKS="$LINKS tests/tcg/Makefile.target" +LINKS="$LINKS pc-bios/optionrom/Makefile" +LINKS="$LINKS pc-bios/s390-ccw/Makefile" +LINKS="$LINKS roms/seabios/Makefile" +LINKS="$LINKS pc-bios/qemu-icon.bmp" +LINKS="$LINKS .gdbinit scripts" # scripts needed by relative path in .gdbinit +LINKS="$LINKS tests/acceptance tests/data" +LINKS="$LINKS tests/qemu-iotests/check" +LINKS="$LINKS python" +LINKS="$LINKS contrib/plugins/Makefile " +UNLINK="pc-bios/keymaps" +for bios_file in \ + $source_path/pc-bios/*.bin \ + $source_path/pc-bios/*.elf \ + $source_path/pc-bios/*.lid \ + $source_path/pc-bios/*.rom \ + $source_path/pc-bios/*.dtb \ + $source_path/pc-bios/*.img \ + $source_path/pc-bios/openbios-* \ + $source_path/pc-bios/u-boot.* \ + $source_path/pc-bios/edk2-*.fd.bz2 \ + $source_path/pc-bios/palcode-* +do + LINKS="$LINKS pc-bios/$(basename $bios_file)" +done +mkdir -p $DIRS +for f in $LINKS ; do + if [ -e "$source_path/$f" ]; then + symlink "$source_path/$f" "$f" + fi +done +for f in $UNLINK ; do + if [ -L "$f" ]; then + rm -f "$f" + fi +done + +(for i in $cross_cc_vars; do + export $i +done +export target_list source_path use_containers +#$source_path/tests/tcg/configure.sh) +) + +# temporary config to build submodules +for rom in seabios; do + config_mak=roms/$rom/config.mak + echo "# Automatically generated by configure - do not modify" > $config_mak + echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak + echo "AS=$as" >> $config_mak + echo "CCAS=$ccas" >> $config_mak + echo "CC=$cc" >> $config_mak + echo "BCC=bcc" >> $config_mak + echo "CPP=$cpp" >> $config_mak + echo "OBJCOPY=objcopy" >> $config_mak + echo "IASL=$iasl" >> $config_mak + echo "LD=$ld" >> $config_mak + echo "RANLIB=$ranlib" >> $config_mak +done + +# set up qemu-iotests in this build directory +iotests_common_env="tests/qemu-iotests/common.env" + +echo "# Automatically generated by configure - do not modify" > "$iotests_common_env" +echo >> "$iotests_common_env" +echo "export PYTHON='$python'" >> "$iotests_common_env" + +if test "$skip_meson" = no; then +cross="config-meson.cross.new" +meson_quote() { + echo "'$(echo $* | sed "s/ /','/g")'" +} + +echo "# Automatically generated by configure - do not modify" > $cross +echo "[properties]" >> $cross +test -z "$cxx" && echo "link_language = 'c'" >> $cross +echo "[built-in options]" >> $cross +echo "c_args = [${CFLAGS:+$(meson_quote $CFLAGS)}]" >> $cross +echo "cpp_args = [${CXXFLAGS:+$(meson_quote $CXXFLAGS)}]" >> $cross +echo "c_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross +echo "cpp_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross +echo "[binaries]" >> $cross +echo "c = [$(meson_quote $cc)]" >> $cross +test -n "$cxx" && echo "cpp = [$(meson_quote $cxx)]" >> $cross +echo "ar = [$(meson_quote $ar)]" >> $cross +echo "nm = [$(meson_quote $nm)]" >> $cross +echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross +echo "ranlib = [$(meson_quote $ranlib)]" >> $cross +if has $sdl2_config; then + echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross +fi +echo "strip = [$(meson_quote $strip)]" >> $cross +echo "windres = [$(meson_quote $windres)]" >> $cross +if test -n "$cross_prefix"; then + cross_arg="--cross-file config-meson.cross" + echo "[host_machine]" >> $cross + if test "$mingw32" = "yes" ; then + echo "system = 'windows'" >> $cross + fi + if test "$linux" = "yes" ; then + echo "system = 'linux'" >> $cross + fi + case "$ARCH" in + i386|x86_64) + echo "cpu_family = 'x86'" >> $cross + ;; + ppc64le) + echo "cpu_family = 'ppc64'" >> $cross + ;; + *) + echo "cpu_family = '$ARCH'" >> $cross + ;; + esac + echo "cpu = '$cpu'" >> $cross + if test "$bigendian" = "yes" ; then + echo "endian = 'big'" >> $cross + else + echo "endian = 'little'" >> $cross + fi +else + cross_arg="--native-file config-meson.cross" +fi +mv $cross config-meson.cross + +rm -rf meson-private meson-info meson-logs +unset staticpic +if ! version_ge "$($meson --version)" 0.56.0; then + staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) +fi +NINJA=$ninja $meson setup \ + --prefix "$prefix" \ + --libdir "$libdir" \ + --libexecdir "$libexecdir" \ + --bindir "$bindir" \ + --includedir "$includedir" \ + --datadir "$datadir" \ + --mandir "$mandir" \ + --sysconfdir "$sysconfdir" \ + --localedir "$localedir" \ + --localstatedir "$local_statedir" \ + -Ddocdir="$docdir" \ + -Dqemu_firmwarepath="$firmwarepath" \ + -Dqemu_suffix="$qemu_suffix" \ + -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \ + -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \ + -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ + -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ + -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ + ${staticpic:+-Db_staticpic=$staticpic} \ + -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ + -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ + -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \ + -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \ + -Dcocoa=$cocoa -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \ + -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \ + -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \ + -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \ + -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\ + -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \ + -Dvhost_user_blk_server=$vhost_user_blk_server \ + $cross_arg \ + "$PWD" "$source_path" + +if test "$?" -ne 0 ; then + error_exit "meson setup failed" +fi +fi + +if test -n "${deprecated_features}"; then + echo "Warning, deprecated features enabled." + echo "Please see docs/system/deprecated.rst" + echo " features: ${deprecated_features}" +fi + +# Save the configure command line for later reuse. +cat <config.status +#!/bin/sh +# Generated by configure. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. +EOD + +preserve_env() { + envname=$1 + + eval envval=\$$envname + + if test -n "$envval" + then + echo "$envname='$envval'" >> config.status + echo "export $envname" >> config.status + else + echo "unset $envname" >> config.status + fi +} + +# Preserve various env variables that influence what +# features/build target configure will detect +preserve_env AR +preserve_env AS +preserve_env CC +preserve_env CPP +preserve_env CXX +preserve_env INSTALL +preserve_env LD +preserve_env LD_LIBRARY_PATH +preserve_env LIBTOOL +preserve_env MAKE +preserve_env NM +preserve_env OBJCOPY +preserve_env PATH +preserve_env PKG_CONFIG +preserve_env PKG_CONFIG_LIBDIR +preserve_env PKG_CONFIG_PATH +preserve_env PYTHON +preserve_env SDL2_CONFIG +preserve_env SMBD +preserve_env STRIP +preserve_env WINDRES + +printf "exec" >>config.status +for i in "$0" "$@"; do + test "$i" = --skip-meson || printf " '%s'" "$i" >>config.status +done +echo ' "$@"' >>config.status +chmod +x config.status + +rm -r "$TMPDIR1" diff --git a/crypto/aes.c b/crypto/aes.c new file mode 100644 index 000000000..159800df6 --- /dev/null +++ b/crypto/aes.c @@ -0,0 +1,1601 @@ +/** + * + * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project. + */ +/* + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "qemu/osdep.h" +#include "crypto/aes.h" + +typedef uint32_t u32; +typedef uint8_t u8; + +/* This controls loop-unrolling in aes_core.c */ +#undef FULL_UNROLL +# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } + +const uint8_t AES_sbox[256] = { + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, + 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, + 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, + 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, + 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, + 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, + 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, + 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, + 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, + 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, + 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, + 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, +}; + +const uint8_t AES_isbox[256] = { + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, + 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, + 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, + 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, + 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, + 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, + 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, + 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, + 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, + 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, + 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, + 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, + 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, + 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, + 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, + 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, + 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, + 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, + 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, + 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, + 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, + 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, + 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, + 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, + 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, + 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D, +}; + +const uint8_t AES_shifts[16] = { + 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 +}; + +const uint8_t AES_ishifts[16] = { + 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 +}; + +/* AES_imc[x][0] = [x].[0e, 09, 0d, 0b]; */ +/* AES_imc[x][1] = [x].[0b, 0e, 09, 0d]; */ +/* AES_imc[x][2] = [x].[0d, 0b, 0e, 09]; */ +/* AES_imc[x][3] = [x].[09, 0d, 0b, 0e]; */ +const uint32_t AES_imc[256][4] = { + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }, /* x=00 */ + { 0x0E090D0B, 0x0B0E090D, 0x0D0B0E09, 0x090D0B0E, }, /* x=01 */ + { 0x1C121A16, 0x161C121A, 0x1A161C12, 0x121A161C, }, /* x=02 */ + { 0x121B171D, 0x1D121B17, 0x171D121B, 0x1B171D12, }, /* x=03 */ + { 0x3824342C, 0x2C382434, 0x342C3824, 0x24342C38, }, /* x=04 */ + { 0x362D3927, 0x27362D39, 0x3927362D, 0x2D392736, }, /* x=05 */ + { 0x24362E3A, 0x3A24362E, 0x2E3A2436, 0x362E3A24, }, /* x=06 */ + { 0x2A3F2331, 0x312A3F23, 0x23312A3F, 0x3F23312A, }, /* x=07 */ + { 0x70486858, 0x58704868, 0x68587048, 0x48685870, }, /* x=08 */ + { 0x7E416553, 0x537E4165, 0x65537E41, 0x4165537E, }, /* x=09 */ + { 0x6C5A724E, 0x4E6C5A72, 0x724E6C5A, 0x5A724E6C, }, /* x=0A */ + { 0x62537F45, 0x4562537F, 0x7F456253, 0x537F4562, }, /* x=0B */ + { 0x486C5C74, 0x74486C5C, 0x5C74486C, 0x6C5C7448, }, /* x=0C */ + { 0x4665517F, 0x7F466551, 0x517F4665, 0x65517F46, }, /* x=0D */ + { 0x547E4662, 0x62547E46, 0x4662547E, 0x7E466254, }, /* x=0E */ + { 0x5A774B69, 0x695A774B, 0x4B695A77, 0x774B695A, }, /* x=0F */ + { 0xE090D0B0, 0xB0E090D0, 0xD0B0E090, 0x90D0B0E0, }, /* x=10 */ + { 0xEE99DDBB, 0xBBEE99DD, 0xDDBBEE99, 0x99DDBBEE, }, /* x=11 */ + { 0xFC82CAA6, 0xA6FC82CA, 0xCAA6FC82, 0x82CAA6FC, }, /* x=12 */ + { 0xF28BC7AD, 0xADF28BC7, 0xC7ADF28B, 0x8BC7ADF2, }, /* x=13 */ + { 0xD8B4E49C, 0x9CD8B4E4, 0xE49CD8B4, 0xB4E49CD8, }, /* x=14 */ + { 0xD6BDE997, 0x97D6BDE9, 0xE997D6BD, 0xBDE997D6, }, /* x=15 */ + { 0xC4A6FE8A, 0x8AC4A6FE, 0xFE8AC4A6, 0xA6FE8AC4, }, /* x=16 */ + { 0xCAAFF381, 0x81CAAFF3, 0xF381CAAF, 0xAFF381CA, }, /* x=17 */ + { 0x90D8B8E8, 0xE890D8B8, 0xB8E890D8, 0xD8B8E890, }, /* x=18 */ + { 0x9ED1B5E3, 0xE39ED1B5, 0xB5E39ED1, 0xD1B5E39E, }, /* x=19 */ + { 0x8CCAA2FE, 0xFE8CCAA2, 0xA2FE8CCA, 0xCAA2FE8C, }, /* x=1A */ + { 0x82C3AFF5, 0xF582C3AF, 0xAFF582C3, 0xC3AFF582, }, /* x=1B */ + { 0xA8FC8CC4, 0xC4A8FC8C, 0x8CC4A8FC, 0xFC8CC4A8, }, /* x=1C */ + { 0xA6F581CF, 0xCFA6F581, 0x81CFA6F5, 0xF581CFA6, }, /* x=1D */ + { 0xB4EE96D2, 0xD2B4EE96, 0x96D2B4EE, 0xEE96D2B4, }, /* x=1E */ + { 0xBAE79BD9, 0xD9BAE79B, 0x9BD9BAE7, 0xE79BD9BA, }, /* x=1F */ + { 0xDB3BBB7B, 0x7BDB3BBB, 0xBB7BDB3B, 0x3BBB7BDB, }, /* x=20 */ + { 0xD532B670, 0x70D532B6, 0xB670D532, 0x32B670D5, }, /* x=21 */ + { 0xC729A16D, 0x6DC729A1, 0xA16DC729, 0x29A16DC7, }, /* x=22 */ + { 0xC920AC66, 0x66C920AC, 0xAC66C920, 0x20AC66C9, }, /* x=23 */ + { 0xE31F8F57, 0x57E31F8F, 0x8F57E31F, 0x1F8F57E3, }, /* x=24 */ + { 0xED16825C, 0x5CED1682, 0x825CED16, 0x16825CED, }, /* x=25 */ + { 0xFF0D9541, 0x41FF0D95, 0x9541FF0D, 0x0D9541FF, }, /* x=26 */ + { 0xF104984A, 0x4AF10498, 0x984AF104, 0x04984AF1, }, /* x=27 */ + { 0xAB73D323, 0x23AB73D3, 0xD323AB73, 0x73D323AB, }, /* x=28 */ + { 0xA57ADE28, 0x28A57ADE, 0xDE28A57A, 0x7ADE28A5, }, /* x=29 */ + { 0xB761C935, 0x35B761C9, 0xC935B761, 0x61C935B7, }, /* x=2A */ + { 0xB968C43E, 0x3EB968C4, 0xC43EB968, 0x68C43EB9, }, /* x=2B */ + { 0x9357E70F, 0x0F9357E7, 0xE70F9357, 0x57E70F93, }, /* x=2C */ + { 0x9D5EEA04, 0x049D5EEA, 0xEA049D5E, 0x5EEA049D, }, /* x=2D */ + { 0x8F45FD19, 0x198F45FD, 0xFD198F45, 0x45FD198F, }, /* x=2E */ + { 0x814CF012, 0x12814CF0, 0xF012814C, 0x4CF01281, }, /* x=2F */ + { 0x3BAB6BCB, 0xCB3BAB6B, 0x6BCB3BAB, 0xAB6BCB3B, }, /* x=30 */ + { 0x35A266C0, 0xC035A266, 0x66C035A2, 0xA266C035, }, /* x=31 */ + { 0x27B971DD, 0xDD27B971, 0x71DD27B9, 0xB971DD27, }, /* x=32 */ + { 0x29B07CD6, 0xD629B07C, 0x7CD629B0, 0xB07CD629, }, /* x=33 */ + { 0x038F5FE7, 0xE7038F5F, 0x5FE7038F, 0x8F5FE703, }, /* x=34 */ + { 0x0D8652EC, 0xEC0D8652, 0x52EC0D86, 0x8652EC0D, }, /* x=35 */ + { 0x1F9D45F1, 0xF11F9D45, 0x45F11F9D, 0x9D45F11F, }, /* x=36 */ + { 0x119448FA, 0xFA119448, 0x48FA1194, 0x9448FA11, }, /* x=37 */ + { 0x4BE30393, 0x934BE303, 0x03934BE3, 0xE303934B, }, /* x=38 */ + { 0x45EA0E98, 0x9845EA0E, 0x0E9845EA, 0xEA0E9845, }, /* x=39 */ + { 0x57F11985, 0x8557F119, 0x198557F1, 0xF1198557, }, /* x=3A */ + { 0x59F8148E, 0x8E59F814, 0x148E59F8, 0xF8148E59, }, /* x=3B */ + { 0x73C737BF, 0xBF73C737, 0x37BF73C7, 0xC737BF73, }, /* x=3C */ + { 0x7DCE3AB4, 0xB47DCE3A, 0x3AB47DCE, 0xCE3AB47D, }, /* x=3D */ + { 0x6FD52DA9, 0xA96FD52D, 0x2DA96FD5, 0xD52DA96F, }, /* x=3E */ + { 0x61DC20A2, 0xA261DC20, 0x20A261DC, 0xDC20A261, }, /* x=3F */ + { 0xAD766DF6, 0xF6AD766D, 0x6DF6AD76, 0x766DF6AD, }, /* x=40 */ + { 0xA37F60FD, 0xFDA37F60, 0x60FDA37F, 0x7F60FDA3, }, /* x=41 */ + { 0xB16477E0, 0xE0B16477, 0x77E0B164, 0x6477E0B1, }, /* x=42 */ + { 0xBF6D7AEB, 0xEBBF6D7A, 0x7AEBBF6D, 0x6D7AEBBF, }, /* x=43 */ + { 0x955259DA, 0xDA955259, 0x59DA9552, 0x5259DA95, }, /* x=44 */ + { 0x9B5B54D1, 0xD19B5B54, 0x54D19B5B, 0x5B54D19B, }, /* x=45 */ + { 0x894043CC, 0xCC894043, 0x43CC8940, 0x4043CC89, }, /* x=46 */ + { 0x87494EC7, 0xC787494E, 0x4EC78749, 0x494EC787, }, /* x=47 */ + { 0xDD3E05AE, 0xAEDD3E05, 0x05AEDD3E, 0x3E05AEDD, }, /* x=48 */ + { 0xD33708A5, 0xA5D33708, 0x08A5D337, 0x3708A5D3, }, /* x=49 */ + { 0xC12C1FB8, 0xB8C12C1F, 0x1FB8C12C, 0x2C1FB8C1, }, /* x=4A */ + { 0xCF2512B3, 0xB3CF2512, 0x12B3CF25, 0x2512B3CF, }, /* x=4B */ + { 0xE51A3182, 0x82E51A31, 0x3182E51A, 0x1A3182E5, }, /* x=4C */ + { 0xEB133C89, 0x89EB133C, 0x3C89EB13, 0x133C89EB, }, /* x=4D */ + { 0xF9082B94, 0x94F9082B, 0x2B94F908, 0x082B94F9, }, /* x=4E */ + { 0xF701269F, 0x9FF70126, 0x269FF701, 0x01269FF7, }, /* x=4F */ + { 0x4DE6BD46, 0x464DE6BD, 0xBD464DE6, 0xE6BD464D, }, /* x=50 */ + { 0x43EFB04D, 0x4D43EFB0, 0xB04D43EF, 0xEFB04D43, }, /* x=51 */ + { 0x51F4A750, 0x5051F4A7, 0xA75051F4, 0xF4A75051, }, /* x=52 */ + { 0x5FFDAA5B, 0x5B5FFDAA, 0xAA5B5FFD, 0xFDAA5B5F, }, /* x=53 */ + { 0x75C2896A, 0x6A75C289, 0x896A75C2, 0xC2896A75, }, /* x=54 */ + { 0x7BCB8461, 0x617BCB84, 0x84617BCB, 0xCB84617B, }, /* x=55 */ + { 0x69D0937C, 0x7C69D093, 0x937C69D0, 0xD0937C69, }, /* x=56 */ + { 0x67D99E77, 0x7767D99E, 0x9E7767D9, 0xD99E7767, }, /* x=57 */ + { 0x3DAED51E, 0x1E3DAED5, 0xD51E3DAE, 0xAED51E3D, }, /* x=58 */ + { 0x33A7D815, 0x1533A7D8, 0xD81533A7, 0xA7D81533, }, /* x=59 */ + { 0x21BCCF08, 0x0821BCCF, 0xCF0821BC, 0xBCCF0821, }, /* x=5A */ + { 0x2FB5C203, 0x032FB5C2, 0xC2032FB5, 0xB5C2032F, }, /* x=5B */ + { 0x058AE132, 0x32058AE1, 0xE132058A, 0x8AE13205, }, /* x=5C */ + { 0x0B83EC39, 0x390B83EC, 0xEC390B83, 0x83EC390B, }, /* x=5D */ + { 0x1998FB24, 0x241998FB, 0xFB241998, 0x98FB2419, }, /* x=5E */ + { 0x1791F62F, 0x2F1791F6, 0xF62F1791, 0x91F62F17, }, /* x=5F */ + { 0x764DD68D, 0x8D764DD6, 0xD68D764D, 0x4DD68D76, }, /* x=60 */ + { 0x7844DB86, 0x867844DB, 0xDB867844, 0x44DB8678, }, /* x=61 */ + { 0x6A5FCC9B, 0x9B6A5FCC, 0xCC9B6A5F, 0x5FCC9B6A, }, /* x=62 */ + { 0x6456C190, 0x906456C1, 0xC1906456, 0x56C19064, }, /* x=63 */ + { 0x4E69E2A1, 0xA14E69E2, 0xE2A14E69, 0x69E2A14E, }, /* x=64 */ + { 0x4060EFAA, 0xAA4060EF, 0xEFAA4060, 0x60EFAA40, }, /* x=65 */ + { 0x527BF8B7, 0xB7527BF8, 0xF8B7527B, 0x7BF8B752, }, /* x=66 */ + { 0x5C72F5BC, 0xBC5C72F5, 0xF5BC5C72, 0x72F5BC5C, }, /* x=67 */ + { 0x0605BED5, 0xD50605BE, 0xBED50605, 0x05BED506, }, /* x=68 */ + { 0x080CB3DE, 0xDE080CB3, 0xB3DE080C, 0x0CB3DE08, }, /* x=69 */ + { 0x1A17A4C3, 0xC31A17A4, 0xA4C31A17, 0x17A4C31A, }, /* x=6A */ + { 0x141EA9C8, 0xC8141EA9, 0xA9C8141E, 0x1EA9C814, }, /* x=6B */ + { 0x3E218AF9, 0xF93E218A, 0x8AF93E21, 0x218AF93E, }, /* x=6C */ + { 0x302887F2, 0xF2302887, 0x87F23028, 0x2887F230, }, /* x=6D */ + { 0x223390EF, 0xEF223390, 0x90EF2233, 0x3390EF22, }, /* x=6E */ + { 0x2C3A9DE4, 0xE42C3A9D, 0x9DE42C3A, 0x3A9DE42C, }, /* x=6F */ + { 0x96DD063D, 0x3D96DD06, 0x063D96DD, 0xDD063D96, }, /* x=70 */ + { 0x98D40B36, 0x3698D40B, 0x0B3698D4, 0xD40B3698, }, /* x=71 */ + { 0x8ACF1C2B, 0x2B8ACF1C, 0x1C2B8ACF, 0xCF1C2B8A, }, /* x=72 */ + { 0x84C61120, 0x2084C611, 0x112084C6, 0xC6112084, }, /* x=73 */ + { 0xAEF93211, 0x11AEF932, 0x3211AEF9, 0xF93211AE, }, /* x=74 */ + { 0xA0F03F1A, 0x1AA0F03F, 0x3F1AA0F0, 0xF03F1AA0, }, /* x=75 */ + { 0xB2EB2807, 0x07B2EB28, 0x2807B2EB, 0xEB2807B2, }, /* x=76 */ + { 0xBCE2250C, 0x0CBCE225, 0x250CBCE2, 0xE2250CBC, }, /* x=77 */ + { 0xE6956E65, 0x65E6956E, 0x6E65E695, 0x956E65E6, }, /* x=78 */ + { 0xE89C636E, 0x6EE89C63, 0x636EE89C, 0x9C636EE8, }, /* x=79 */ + { 0xFA877473, 0x73FA8774, 0x7473FA87, 0x877473FA, }, /* x=7A */ + { 0xF48E7978, 0x78F48E79, 0x7978F48E, 0x8E7978F4, }, /* x=7B */ + { 0xDEB15A49, 0x49DEB15A, 0x5A49DEB1, 0xB15A49DE, }, /* x=7C */ + { 0xD0B85742, 0x42D0B857, 0x5742D0B8, 0xB85742D0, }, /* x=7D */ + { 0xC2A3405F, 0x5FC2A340, 0x405FC2A3, 0xA3405FC2, }, /* x=7E */ + { 0xCCAA4D54, 0x54CCAA4D, 0x4D54CCAA, 0xAA4D54CC, }, /* x=7F */ + { 0x41ECDAF7, 0xF741ECDA, 0xDAF741EC, 0xECDAF741, }, /* x=80 */ + { 0x4FE5D7FC, 0xFC4FE5D7, 0xD7FC4FE5, 0xE5D7FC4F, }, /* x=81 */ + { 0x5DFEC0E1, 0xE15DFEC0, 0xC0E15DFE, 0xFEC0E15D, }, /* x=82 */ + { 0x53F7CDEA, 0xEA53F7CD, 0xCDEA53F7, 0xF7CDEA53, }, /* x=83 */ + { 0x79C8EEDB, 0xDB79C8EE, 0xEEDB79C8, 0xC8EEDB79, }, /* x=84 */ + { 0x77C1E3D0, 0xD077C1E3, 0xE3D077C1, 0xC1E3D077, }, /* x=85 */ + { 0x65DAF4CD, 0xCD65DAF4, 0xF4CD65DA, 0xDAF4CD65, }, /* x=86 */ + { 0x6BD3F9C6, 0xC66BD3F9, 0xF9C66BD3, 0xD3F9C66B, }, /* x=87 */ + { 0x31A4B2AF, 0xAF31A4B2, 0xB2AF31A4, 0xA4B2AF31, }, /* x=88 */ + { 0x3FADBFA4, 0xA43FADBF, 0xBFA43FAD, 0xADBFA43F, }, /* x=89 */ + { 0x2DB6A8B9, 0xB92DB6A8, 0xA8B92DB6, 0xB6A8B92D, }, /* x=8A */ + { 0x23BFA5B2, 0xB223BFA5, 0xA5B223BF, 0xBFA5B223, }, /* x=8B */ + { 0x09808683, 0x83098086, 0x86830980, 0x80868309, }, /* x=8C */ + { 0x07898B88, 0x8807898B, 0x8B880789, 0x898B8807, }, /* x=8D */ + { 0x15929C95, 0x9515929C, 0x9C951592, 0x929C9515, }, /* x=8E */ + { 0x1B9B919E, 0x9E1B9B91, 0x919E1B9B, 0x9B919E1B, }, /* x=8F */ + { 0xA17C0A47, 0x47A17C0A, 0x0A47A17C, 0x7C0A47A1, }, /* x=90 */ + { 0xAF75074C, 0x4CAF7507, 0x074CAF75, 0x75074CAF, }, /* x=91 */ + { 0xBD6E1051, 0x51BD6E10, 0x1051BD6E, 0x6E1051BD, }, /* x=92 */ + { 0xB3671D5A, 0x5AB3671D, 0x1D5AB367, 0x671D5AB3, }, /* x=93 */ + { 0x99583E6B, 0x6B99583E, 0x3E6B9958, 0x583E6B99, }, /* x=94 */ + { 0x97513360, 0x60975133, 0x33609751, 0x51336097, }, /* x=95 */ + { 0x854A247D, 0x7D854A24, 0x247D854A, 0x4A247D85, }, /* x=96 */ + { 0x8B432976, 0x768B4329, 0x29768B43, 0x4329768B, }, /* x=97 */ + { 0xD134621F, 0x1FD13462, 0x621FD134, 0x34621FD1, }, /* x=98 */ + { 0xDF3D6F14, 0x14DF3D6F, 0x6F14DF3D, 0x3D6F14DF, }, /* x=99 */ + { 0xCD267809, 0x09CD2678, 0x7809CD26, 0x267809CD, }, /* x=9A */ + { 0xC32F7502, 0x02C32F75, 0x7502C32F, 0x2F7502C3, }, /* x=9B */ + { 0xE9105633, 0x33E91056, 0x5633E910, 0x105633E9, }, /* x=9C */ + { 0xE7195B38, 0x38E7195B, 0x5B38E719, 0x195B38E7, }, /* x=9D */ + { 0xF5024C25, 0x25F5024C, 0x4C25F502, 0x024C25F5, }, /* x=9E */ + { 0xFB0B412E, 0x2EFB0B41, 0x412EFB0B, 0x0B412EFB, }, /* x=9F */ + { 0x9AD7618C, 0x8C9AD761, 0x618C9AD7, 0xD7618C9A, }, /* x=A0 */ + { 0x94DE6C87, 0x8794DE6C, 0x6C8794DE, 0xDE6C8794, }, /* x=A1 */ + { 0x86C57B9A, 0x9A86C57B, 0x7B9A86C5, 0xC57B9A86, }, /* x=A2 */ + { 0x88CC7691, 0x9188CC76, 0x769188CC, 0xCC769188, }, /* x=A3 */ + { 0xA2F355A0, 0xA0A2F355, 0x55A0A2F3, 0xF355A0A2, }, /* x=A4 */ + { 0xACFA58AB, 0xABACFA58, 0x58ABACFA, 0xFA58ABAC, }, /* x=A5 */ + { 0xBEE14FB6, 0xB6BEE14F, 0x4FB6BEE1, 0xE14FB6BE, }, /* x=A6 */ + { 0xB0E842BD, 0xBDB0E842, 0x42BDB0E8, 0xE842BDB0, }, /* x=A7 */ + { 0xEA9F09D4, 0xD4EA9F09, 0x09D4EA9F, 0x9F09D4EA, }, /* x=A8 */ + { 0xE49604DF, 0xDFE49604, 0x04DFE496, 0x9604DFE4, }, /* x=A9 */ + { 0xF68D13C2, 0xC2F68D13, 0x13C2F68D, 0x8D13C2F6, }, /* x=AA */ + { 0xF8841EC9, 0xC9F8841E, 0x1EC9F884, 0x841EC9F8, }, /* x=AB */ + { 0xD2BB3DF8, 0xF8D2BB3D, 0x3DF8D2BB, 0xBB3DF8D2, }, /* x=AC */ + { 0xDCB230F3, 0xF3DCB230, 0x30F3DCB2, 0xB230F3DC, }, /* x=AD */ + { 0xCEA927EE, 0xEECEA927, 0x27EECEA9, 0xA927EECE, }, /* x=AE */ + { 0xC0A02AE5, 0xE5C0A02A, 0x2AE5C0A0, 0xA02AE5C0, }, /* x=AF */ + { 0x7A47B13C, 0x3C7A47B1, 0xB13C7A47, 0x47B13C7A, }, /* x=B0 */ + { 0x744EBC37, 0x37744EBC, 0xBC37744E, 0x4EBC3774, }, /* x=B1 */ + { 0x6655AB2A, 0x2A6655AB, 0xAB2A6655, 0x55AB2A66, }, /* x=B2 */ + { 0x685CA621, 0x21685CA6, 0xA621685C, 0x5CA62168, }, /* x=B3 */ + { 0x42638510, 0x10426385, 0x85104263, 0x63851042, }, /* x=B4 */ + { 0x4C6A881B, 0x1B4C6A88, 0x881B4C6A, 0x6A881B4C, }, /* x=B5 */ + { 0x5E719F06, 0x065E719F, 0x9F065E71, 0x719F065E, }, /* x=B6 */ + { 0x5078920D, 0x0D507892, 0x920D5078, 0x78920D50, }, /* x=B7 */ + { 0x0A0FD964, 0x640A0FD9, 0xD9640A0F, 0x0FD9640A, }, /* x=B8 */ + { 0x0406D46F, 0x6F0406D4, 0xD46F0406, 0x06D46F04, }, /* x=B9 */ + { 0x161DC372, 0x72161DC3, 0xC372161D, 0x1DC37216, }, /* x=BA */ + { 0x1814CE79, 0x791814CE, 0xCE791814, 0x14CE7918, }, /* x=BB */ + { 0x322BED48, 0x48322BED, 0xED48322B, 0x2BED4832, }, /* x=BC */ + { 0x3C22E043, 0x433C22E0, 0xE0433C22, 0x22E0433C, }, /* x=BD */ + { 0x2E39F75E, 0x5E2E39F7, 0xF75E2E39, 0x39F75E2E, }, /* x=BE */ + { 0x2030FA55, 0x552030FA, 0xFA552030, 0x30FA5520, }, /* x=BF */ + { 0xEC9AB701, 0x01EC9AB7, 0xB701EC9A, 0x9AB701EC, }, /* x=C0 */ + { 0xE293BA0A, 0x0AE293BA, 0xBA0AE293, 0x93BA0AE2, }, /* x=C1 */ + { 0xF088AD17, 0x17F088AD, 0xAD17F088, 0x88AD17F0, }, /* x=C2 */ + { 0xFE81A01C, 0x1CFE81A0, 0xA01CFE81, 0x81A01CFE, }, /* x=C3 */ + { 0xD4BE832D, 0x2DD4BE83, 0x832DD4BE, 0xBE832DD4, }, /* x=C4 */ + { 0xDAB78E26, 0x26DAB78E, 0x8E26DAB7, 0xB78E26DA, }, /* x=C5 */ + { 0xC8AC993B, 0x3BC8AC99, 0x993BC8AC, 0xAC993BC8, }, /* x=C6 */ + { 0xC6A59430, 0x30C6A594, 0x9430C6A5, 0xA59430C6, }, /* x=C7 */ + { 0x9CD2DF59, 0x599CD2DF, 0xDF599CD2, 0xD2DF599C, }, /* x=C8 */ + { 0x92DBD252, 0x5292DBD2, 0xD25292DB, 0xDBD25292, }, /* x=C9 */ + { 0x80C0C54F, 0x4F80C0C5, 0xC54F80C0, 0xC0C54F80, }, /* x=CA */ + { 0x8EC9C844, 0x448EC9C8, 0xC8448EC9, 0xC9C8448E, }, /* x=CB */ + { 0xA4F6EB75, 0x75A4F6EB, 0xEB75A4F6, 0xF6EB75A4, }, /* x=CC */ + { 0xAAFFE67E, 0x7EAAFFE6, 0xE67EAAFF, 0xFFE67EAA, }, /* x=CD */ + { 0xB8E4F163, 0x63B8E4F1, 0xF163B8E4, 0xE4F163B8, }, /* x=CE */ + { 0xB6EDFC68, 0x68B6EDFC, 0xFC68B6ED, 0xEDFC68B6, }, /* x=CF */ + { 0x0C0A67B1, 0xB10C0A67, 0x67B10C0A, 0x0A67B10C, }, /* x=D0 */ + { 0x02036ABA, 0xBA02036A, 0x6ABA0203, 0x036ABA02, }, /* x=D1 */ + { 0x10187DA7, 0xA710187D, 0x7DA71018, 0x187DA710, }, /* x=D2 */ + { 0x1E1170AC, 0xAC1E1170, 0x70AC1E11, 0x1170AC1E, }, /* x=D3 */ + { 0x342E539D, 0x9D342E53, 0x539D342E, 0x2E539D34, }, /* x=D4 */ + { 0x3A275E96, 0x963A275E, 0x5E963A27, 0x275E963A, }, /* x=D5 */ + { 0x283C498B, 0x8B283C49, 0x498B283C, 0x3C498B28, }, /* x=D6 */ + { 0x26354480, 0x80263544, 0x44802635, 0x35448026, }, /* x=D7 */ + { 0x7C420FE9, 0xE97C420F, 0x0FE97C42, 0x420FE97C, }, /* x=D8 */ + { 0x724B02E2, 0xE2724B02, 0x02E2724B, 0x4B02E272, }, /* x=D9 */ + { 0x605015FF, 0xFF605015, 0x15FF6050, 0x5015FF60, }, /* x=DA */ + { 0x6E5918F4, 0xF46E5918, 0x18F46E59, 0x5918F46E, }, /* x=DB */ + { 0x44663BC5, 0xC544663B, 0x3BC54466, 0x663BC544, }, /* x=DC */ + { 0x4A6F36CE, 0xCE4A6F36, 0x36CE4A6F, 0x6F36CE4A, }, /* x=DD */ + { 0x587421D3, 0xD3587421, 0x21D35874, 0x7421D358, }, /* x=DE */ + { 0x567D2CD8, 0xD8567D2C, 0x2CD8567D, 0x7D2CD856, }, /* x=DF */ + { 0x37A10C7A, 0x7A37A10C, 0x0C7A37A1, 0xA10C7A37, }, /* x=E0 */ + { 0x39A80171, 0x7139A801, 0x017139A8, 0xA8017139, }, /* x=E1 */ + { 0x2BB3166C, 0x6C2BB316, 0x166C2BB3, 0xB3166C2B, }, /* x=E2 */ + { 0x25BA1B67, 0x6725BA1B, 0x1B6725BA, 0xBA1B6725, }, /* x=E3 */ + { 0x0F853856, 0x560F8538, 0x38560F85, 0x8538560F, }, /* x=E4 */ + { 0x018C355D, 0x5D018C35, 0x355D018C, 0x8C355D01, }, /* x=E5 */ + { 0x13972240, 0x40139722, 0x22401397, 0x97224013, }, /* x=E6 */ + { 0x1D9E2F4B, 0x4B1D9E2F, 0x2F4B1D9E, 0x9E2F4B1D, }, /* x=E7 */ + { 0x47E96422, 0x2247E964, 0x642247E9, 0xE9642247, }, /* x=E8 */ + { 0x49E06929, 0x2949E069, 0x692949E0, 0xE0692949, }, /* x=E9 */ + { 0x5BFB7E34, 0x345BFB7E, 0x7E345BFB, 0xFB7E345B, }, /* x=EA */ + { 0x55F2733F, 0x3F55F273, 0x733F55F2, 0xF2733F55, }, /* x=EB */ + { 0x7FCD500E, 0x0E7FCD50, 0x500E7FCD, 0xCD500E7F, }, /* x=EC */ + { 0x71C45D05, 0x0571C45D, 0x5D0571C4, 0xC45D0571, }, /* x=ED */ + { 0x63DF4A18, 0x1863DF4A, 0x4A1863DF, 0xDF4A1863, }, /* x=EE */ + { 0x6DD64713, 0x136DD647, 0x47136DD6, 0xD647136D, }, /* x=EF */ + { 0xD731DCCA, 0xCAD731DC, 0xDCCAD731, 0x31DCCAD7, }, /* x=F0 */ + { 0xD938D1C1, 0xC1D938D1, 0xD1C1D938, 0x38D1C1D9, }, /* x=F1 */ + { 0xCB23C6DC, 0xDCCB23C6, 0xC6DCCB23, 0x23C6DCCB, }, /* x=F2 */ + { 0xC52ACBD7, 0xD7C52ACB, 0xCBD7C52A, 0x2ACBD7C5, }, /* x=F3 */ + { 0xEF15E8E6, 0xE6EF15E8, 0xE8E6EF15, 0x15E8E6EF, }, /* x=F4 */ + { 0xE11CE5ED, 0xEDE11CE5, 0xE5EDE11C, 0x1CE5EDE1, }, /* x=F5 */ + { 0xF307F2F0, 0xF0F307F2, 0xF2F0F307, 0x07F2F0F3, }, /* x=F6 */ + { 0xFD0EFFFB, 0xFBFD0EFF, 0xFFFBFD0E, 0x0EFFFBFD, }, /* x=F7 */ + { 0xA779B492, 0x92A779B4, 0xB492A779, 0x79B492A7, }, /* x=F8 */ + { 0xA970B999, 0x99A970B9, 0xB999A970, 0x70B999A9, }, /* x=F9 */ + { 0xBB6BAE84, 0x84BB6BAE, 0xAE84BB6B, 0x6BAE84BB, }, /* x=FA */ + { 0xB562A38F, 0x8FB562A3, 0xA38FB562, 0x62A38FB5, }, /* x=FB */ + { 0x9F5D80BE, 0xBE9F5D80, 0x80BE9F5D, 0x5D80BE9F, }, /* x=FC */ + { 0x91548DB5, 0xB591548D, 0x8DB59154, 0x548DB591, }, /* x=FD */ + { 0x834F9AA8, 0xA8834F9A, 0x9AA8834F, 0x4F9AA883, }, /* x=FE */ + { 0x8D4697A3, 0xA38D4697, 0x97A38D46, 0x4697A38D, }, /* x=FF */ +}; + + + +/* +AES_Te0[x] = S [x].[02, 01, 01, 03]; +AES_Te1[x] = S [x].[03, 02, 01, 01]; +AES_Te2[x] = S [x].[01, 03, 02, 01]; +AES_Te3[x] = S [x].[01, 01, 03, 02]; +AES_Te4[x] = S [x].[01, 01, 01, 01]; + +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; +AES_Td1[x] = Si[x].[0b, 0e, 09, 0d]; +AES_Td2[x] = Si[x].[0d, 0b, 0e, 09]; +AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; +AES_Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +const uint32_t AES_Te0[256] = { + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}; +const uint32_t AES_Te1[256] = { + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}; +const uint32_t AES_Te2[256] = { + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}; +const uint32_t AES_Te3[256] = { + + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +}; +const uint32_t AES_Te4[256] = { + 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, + 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, + 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, + 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, + 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, + 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, + 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, + 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, + 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, + 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, + 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, + 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, + 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, + 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, + 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, + 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, + 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, + 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, + 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, + 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, + 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, + 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, + 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, + 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, + 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, + 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, + 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, + 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, + 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, + 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, + 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, + 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, + 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, + 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, + 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, + 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, + 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, + 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, + 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, + 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, + 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, + 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, + 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, + 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, + 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, + 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, + 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, + 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, + 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, + 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, + 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, + 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, + 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, + 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, + 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, + 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, + 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, + 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, + 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, + 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, + 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, + 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, + 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, + 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, +}; +const uint32_t AES_Td0[256] = { + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}; +const uint32_t AES_Td1[256] = { + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}; +const uint32_t AES_Td2[256] = { + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}; +const uint32_t AES_Td3[256] = { + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +}; +const uint32_t AES_Td4[256] = { + 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, + 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, + 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, + 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, + 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, + 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, + 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, + 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, + 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, + 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, + 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, + 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, + 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, + 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, + 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, + 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, + 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, + 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, + 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, + 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, + 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, + 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, + 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, + 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, + 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, + 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, + 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, + 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, + 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, + 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, + 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, + 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, + 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, + 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, + 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, + 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, + 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, + 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, + 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, + 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, + 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, + 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, + 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, + 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, + 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, + 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, + 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, + 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, + 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, + 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, + 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, + 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, + 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, + 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, + 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, + 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, + 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, + 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, + 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, + 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, + 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, + 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, + 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, + 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +/** + * Expand the cipher key into the encryption key schedule. + */ +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { + + u32 *rk; + int i = 0; + u32 temp; + + if (!userKey || !key) + return -1; + if (bits != 128 && bits != 192 && bits != 256) + return -2; + + rk = key->rd_key; + + if (bits==128) + key->rounds = 10; + else if (bits==192) + key->rounds = 12; + else + key->rounds = 14; + + rk[0] = GETU32(userKey ); + rk[1] = GETU32(userKey + 4); + rk[2] = GETU32(userKey + 8); + rk[3] = GETU32(userKey + 12); + if (bits == 128) { + while (1) { + temp = rk[3]; + rk[4] = rk[0] ^ + (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 0; + } + rk += 4; + } + } + rk[4] = GETU32(userKey + 16); + rk[5] = GETU32(userKey + 20); + if (bits == 192) { + while (1) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 0; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(userKey + 24); + rk[7] = GETU32(userKey + 28); + if (bits == 256) { + while (1) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 0; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (AES_Te4[(temp >> 24) ] & 0xff000000) ^ + (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + abort(); +} + +/** + * Expand the cipher key into the decryption key schedule. + */ +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { + + u32 *rk; + int i, j, status; + u32 temp; + + /* first, start with an encryption schedule */ + status = AES_set_encrypt_key(userKey, bits, key); + if (status < 0) + return status; + + rk = key->rd_key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < (key->rounds); i++) { + rk += 4; + rk[0] = + AES_Td0[AES_Te4[(rk[0] >> 24) ] & 0xff] ^ + AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + AES_Td2[AES_Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + AES_Td3[AES_Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + AES_Td0[AES_Te4[(rk[1] >> 24) ] & 0xff] ^ + AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + AES_Td2[AES_Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + AES_Td3[AES_Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + AES_Td0[AES_Te4[(rk[2] >> 24) ] & 0xff] ^ + AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + AES_Td2[AES_Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + AES_Td3[AES_Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + AES_Td0[AES_Te4[(rk[3] >> 24) ] & 0xff] ^ + AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + AES_Td2[AES_Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + AES_Td3[AES_Te4[(rk[3] ) & 0xff] & 0xff]; + } + return 0; +} + +#ifndef AES_ASM +/* + * Encrypt a single block + * in and out can overlap + */ +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) { + + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + assert(in && out && key); + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in ) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48]; + s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49]; + s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50]; + s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52]; + t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53]; + t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54]; + t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + AES_Te0[(s0 >> 24) ] ^ + AES_Te1[(s1 >> 16) & 0xff] ^ + AES_Te2[(s2 >> 8) & 0xff] ^ + AES_Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + AES_Te0[(s1 >> 24) ] ^ + AES_Te1[(s2 >> 16) & 0xff] ^ + AES_Te2[(s3 >> 8) & 0xff] ^ + AES_Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + AES_Te0[(s2 >> 24) ] ^ + AES_Te1[(s3 >> 16) & 0xff] ^ + AES_Te2[(s0 >> 8) & 0xff] ^ + AES_Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + AES_Te0[(s3 >> 24) ] ^ + AES_Te1[(s0 >> 16) & 0xff] ^ + AES_Te2[(s1 >> 8) & 0xff] ^ + AES_Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + AES_Te0[(t0 >> 24) ] ^ + AES_Te1[(t1 >> 16) & 0xff] ^ + AES_Te2[(t2 >> 8) & 0xff] ^ + AES_Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + AES_Te0[(t1 >> 24) ] ^ + AES_Te1[(t2 >> 16) & 0xff] ^ + AES_Te2[(t3 >> 8) & 0xff] ^ + AES_Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + AES_Te0[(t2 >> 24) ] ^ + AES_Te1[(t3 >> 16) & 0xff] ^ + AES_Te2[(t0 >> 8) & 0xff] ^ + AES_Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + AES_Te0[(t3 >> 24) ] ^ + AES_Te1[(t0 >> 16) & 0xff] ^ + AES_Te2[(t1 >> 8) & 0xff] ^ + AES_Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (AES_Te4[(t0 >> 24) ] & 0xff000000) ^ + (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(out , s0); + s1 = + (AES_Te4[(t1 >> 24) ] & 0xff000000) ^ + (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (AES_Te4[(t2 >> 24) ] & 0xff000000) ^ + (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (AES_Te4[(t3 >> 24) ] & 0xff000000) ^ + (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +/* + * Decrypt a single block + * in and out can overlap + */ +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) { + + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + assert(in && out && key); + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in ) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48]; + s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49]; + s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50]; + s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52]; + t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53]; + t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54]; + t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + AES_Td0[(s0 >> 24) ] ^ + AES_Td1[(s3 >> 16) & 0xff] ^ + AES_Td2[(s2 >> 8) & 0xff] ^ + AES_Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + AES_Td0[(s1 >> 24) ] ^ + AES_Td1[(s0 >> 16) & 0xff] ^ + AES_Td2[(s3 >> 8) & 0xff] ^ + AES_Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + AES_Td0[(s2 >> 24) ] ^ + AES_Td1[(s1 >> 16) & 0xff] ^ + AES_Td2[(s0 >> 8) & 0xff] ^ + AES_Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + AES_Td0[(s3 >> 24) ] ^ + AES_Td1[(s2 >> 16) & 0xff] ^ + AES_Td2[(s1 >> 8) & 0xff] ^ + AES_Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + AES_Td0[(t0 >> 24) ] ^ + AES_Td1[(t3 >> 16) & 0xff] ^ + AES_Td2[(t2 >> 8) & 0xff] ^ + AES_Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + AES_Td0[(t1 >> 24) ] ^ + AES_Td1[(t0 >> 16) & 0xff] ^ + AES_Td2[(t3 >> 8) & 0xff] ^ + AES_Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + AES_Td0[(t2 >> 24) ] ^ + AES_Td1[(t1 >> 16) & 0xff] ^ + AES_Td2[(t0 >> 8) & 0xff] ^ + AES_Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + AES_Td0[(t3 >> 24) ] ^ + AES_Td1[(t2 >> 16) & 0xff] ^ + AES_Td2[(t1 >> 8) & 0xff] ^ + AES_Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (AES_Td4[(t0 >> 24) ] & 0xff000000) ^ + (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(out , s0); + s1 = + (AES_Td4[(t1 >> 24) ] & 0xff000000) ^ + (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (AES_Td4[(t2 >> 24) ] & 0xff000000) ^ + (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (AES_Td4[(t3 >> 24) ] & 0xff000000) ^ + (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (AES_Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (AES_Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +#endif /* AES_ASM */ diff --git a/crypto/afalg.c b/crypto/afalg.c new file mode 100644 index 000000000..10046bb0a --- /dev/null +++ b/crypto/afalg.c @@ -0,0 +1,116 @@ +/* + * QEMU Crypto af_alg support + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/sockets.h" +#include "qapi/error.h" +#include "afalgpriv.h" + +static bool +qcrypto_afalg_build_saddr(const char *type, const char *name, + struct sockaddr_alg *salg, Error **errp) +{ + salg->salg_family = AF_ALG; + + if (strnlen(type, SALG_TYPE_LEN_MAX) >= SALG_TYPE_LEN_MAX) { + error_setg(errp, "Afalg type(%s) is larger than %d bytes", + type, SALG_TYPE_LEN_MAX); + return false; + } + + if (strnlen(name, SALG_NAME_LEN_MAX) >= SALG_NAME_LEN_MAX) { + error_setg(errp, "Afalg name(%s) is larger than %d bytes", + name, SALG_NAME_LEN_MAX); + return false; + } + + pstrcpy((char *)salg->salg_type, SALG_TYPE_LEN_MAX, type); + pstrcpy((char *)salg->salg_name, SALG_NAME_LEN_MAX, name); + + return true; +} + +static int +qcrypto_afalg_socket_bind(const char *type, const char *name, + Error **errp) +{ + int sbind; + struct sockaddr_alg salg = {0}; + + if (!qcrypto_afalg_build_saddr(type, name, &salg, errp)) { + return -1; + } + + sbind = qemu_socket(AF_ALG, SOCK_SEQPACKET, 0); + if (sbind < 0) { + error_setg_errno(errp, errno, "Failed to create socket"); + return -1; + } + + if (bind(sbind, (const struct sockaddr *)&salg, sizeof(salg)) != 0) { + error_setg_errno(errp, errno, "Failed to bind socket"); + closesocket(sbind); + return -1; + } + + return sbind; +} + +QCryptoAFAlg * +qcrypto_afalg_comm_alloc(const char *type, const char *name, + Error **errp) +{ + QCryptoAFAlg *afalg; + + afalg = g_new0(QCryptoAFAlg, 1); + /* initilize crypto API socket */ + afalg->opfd = -1; + afalg->tfmfd = qcrypto_afalg_socket_bind(type, name, errp); + if (afalg->tfmfd == -1) { + goto error; + } + + afalg->opfd = qemu_accept(afalg->tfmfd, NULL, 0); + if (afalg->opfd == -1) { + error_setg_errno(errp, errno, "Failed to accept socket"); + goto error; + } + + return afalg; + +error: + qcrypto_afalg_comm_free(afalg); + return NULL; +} + +void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg) +{ + if (!afalg) { + return; + } + + if (afalg->msg) { + g_free(afalg->msg->msg_control); + g_free(afalg->msg); + } + + if (afalg->tfmfd != -1) { + closesocket(afalg->tfmfd); + } + + if (afalg->opfd != -1) { + closesocket(afalg->opfd); + } + + g_free(afalg); +} diff --git a/crypto/afalgpriv.h b/crypto/afalgpriv.h new file mode 100644 index 000000000..5a2393f1b --- /dev/null +++ b/crypto/afalgpriv.h @@ -0,0 +1,67 @@ +/* + * QEMU Crypto af_alg support + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QCRYPTO_AFALGPRIV_H +#define QCRYPTO_AFALGPRIV_H + +#include +#include "crypto/cipher.h" + +#define SALG_TYPE_LEN_MAX 14 +#define SALG_NAME_LEN_MAX 64 + +#ifndef SOL_ALG +#define SOL_ALG 279 +#endif + +#define AFALG_TYPE_CIPHER "skcipher" +#define AFALG_TYPE_HASH "hash" + +#define ALG_OPTYPE_LEN 4 +#define ALG_MSGIV_LEN(len) (sizeof(struct af_alg_iv) + (len)) + +typedef struct QCryptoAFAlg QCryptoAFAlg; + +struct QCryptoAFAlg { + QCryptoCipher base; + + int tfmfd; + int opfd; + struct msghdr *msg; + struct cmsghdr *cmsg; +}; + +/** + * qcrypto_afalg_comm_alloc: + * @type: the type of crypto operation + * @name: the name of crypto operation + * + * Allocate a QCryptoAFAlg object and bind itself to + * a AF_ALG socket. + * + * Returns: + * a new QCryptoAFAlg object, or NULL in error. + */ +QCryptoAFAlg * +qcrypto_afalg_comm_alloc(const char *type, const char *name, + Error **errp); + +/** + * afalg_comm_free: + * @afalg: the QCryptoAFAlg object + * + * Free the @afalg. + */ +void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg); + +#endif diff --git a/crypto/afsplit.c b/crypto/afsplit.c new file mode 100644 index 000000000..b1a5a2089 --- /dev/null +++ b/crypto/afsplit.c @@ -0,0 +1,146 @@ +/* + * QEMU Crypto anti forensic information splitter + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * Derived from cryptsetup package lib/luks1/af.c + * + * Copyright (C) 2004, Clemens Fruhwirth + * Copyright (C) 2009-2012, Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "crypto/afsplit.h" +#include "crypto/random.h" + + +static void qcrypto_afsplit_xor(size_t blocklen, + const uint8_t *in1, + const uint8_t *in2, + uint8_t *out) +{ + size_t i; + for (i = 0; i < blocklen; i++) { + out[i] = in1[i] ^ in2[i]; + } +} + + +static int qcrypto_afsplit_hash(QCryptoHashAlgorithm hash, + size_t blocklen, + uint8_t *block, + Error **errp) +{ + size_t digestlen = qcrypto_hash_digest_len(hash); + + size_t hashcount = blocklen / digestlen; + size_t finallen = blocklen % digestlen; + uint32_t i; + + if (finallen) { + hashcount++; + } else { + finallen = digestlen; + } + + for (i = 0; i < hashcount; i++) { + g_autofree uint8_t *out = NULL; + size_t outlen = 0; + uint32_t iv = cpu_to_be32(i); + struct iovec in[] = { + { .iov_base = &iv, + .iov_len = sizeof(iv) }, + { .iov_base = block + (i * digestlen), + .iov_len = (i == (hashcount - 1)) ? finallen : digestlen }, + }; + + if (qcrypto_hash_bytesv(hash, + in, + G_N_ELEMENTS(in), + &out, &outlen, + errp) < 0) { + return -1; + } + + assert(outlen == digestlen); + memcpy(block + (i * digestlen), out, + (i == (hashcount - 1)) ? finallen : digestlen); + } + + return 0; +} + + +int qcrypto_afsplit_encode(QCryptoHashAlgorithm hash, + size_t blocklen, + uint32_t stripes, + const uint8_t *in, + uint8_t *out, + Error **errp) +{ + g_autofree uint8_t *block = g_new0(uint8_t, blocklen); + size_t i; + + for (i = 0; i < (stripes - 1); i++) { + if (qcrypto_random_bytes(out + (i * blocklen), blocklen, errp) < 0) { + return -1; + } + + qcrypto_afsplit_xor(blocklen, + out + (i * blocklen), + block, + block); + if (qcrypto_afsplit_hash(hash, blocklen, block, + errp) < 0) { + return -1; + } + } + qcrypto_afsplit_xor(blocklen, + in, + block, + out + (i * blocklen)); + return 0; +} + + +int qcrypto_afsplit_decode(QCryptoHashAlgorithm hash, + size_t blocklen, + uint32_t stripes, + const uint8_t *in, + uint8_t *out, + Error **errp) +{ + g_autofree uint8_t *block = g_new0(uint8_t, blocklen); + size_t i; + + for (i = 0; i < (stripes - 1); i++) { + qcrypto_afsplit_xor(blocklen, + in + (i * blocklen), + block, + block); + if (qcrypto_afsplit_hash(hash, blocklen, block, + errp) < 0) { + return -1; + } + } + + qcrypto_afsplit_xor(blocklen, + in + (i * blocklen), + block, + out); + return 0; +} diff --git a/crypto/block-luks.c b/crypto/block-luks.c new file mode 100644 index 000000000..564caa109 --- /dev/null +++ b/crypto/block-luks.c @@ -0,0 +1,1976 @@ +/* + * QEMU Crypto block device encryption LUKS format + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/bswap.h" + +#include "block-luks.h" + +#include "crypto/hash.h" +#include "crypto/afsplit.h" +#include "crypto/pbkdf.h" +#include "crypto/secret.h" +#include "crypto/random.h" +#include "qemu/uuid.h" + +#include "qemu/coroutine.h" +#include "qemu/bitmap.h" + +/* + * Reference for the LUKS format implemented here is + * + * docs/on-disk-format.pdf + * + * in 'cryptsetup' package source code + * + * This file implements the 1.2.1 specification, dated + * Oct 16, 2011. + */ + +typedef struct QCryptoBlockLUKS QCryptoBlockLUKS; +typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader; +typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot; + + +/* The following constants are all defined by the LUKS spec */ +#define QCRYPTO_BLOCK_LUKS_VERSION 1 + +#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6 +#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32 +#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32 +#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32 +#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20 +#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32 +#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40 +#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8 +#define QCRYPTO_BLOCK_LUKS_STRIPES 4000 +#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000 +#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000 +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096 + +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3 + +#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL + +#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000 +#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40 + +static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = { + 'L', 'U', 'K', 'S', 0xBA, 0xBE +}; + +typedef struct QCryptoBlockLUKSNameMap QCryptoBlockLUKSNameMap; +struct QCryptoBlockLUKSNameMap { + const char *name; + int id; +}; + +typedef struct QCryptoBlockLUKSCipherSizeMap QCryptoBlockLUKSCipherSizeMap; +struct QCryptoBlockLUKSCipherSizeMap { + uint32_t key_bytes; + int id; +}; +typedef struct QCryptoBlockLUKSCipherNameMap QCryptoBlockLUKSCipherNameMap; +struct QCryptoBlockLUKSCipherNameMap { + const char *name; + const QCryptoBlockLUKSCipherSizeMap *sizes; +}; + + +static const QCryptoBlockLUKSCipherSizeMap +qcrypto_block_luks_cipher_size_map_aes[] = { + { 16, QCRYPTO_CIPHER_ALG_AES_128 }, + { 24, QCRYPTO_CIPHER_ALG_AES_192 }, + { 32, QCRYPTO_CIPHER_ALG_AES_256 }, + { 0, 0 }, +}; + +static const QCryptoBlockLUKSCipherSizeMap +qcrypto_block_luks_cipher_size_map_cast5[] = { + { 16, QCRYPTO_CIPHER_ALG_CAST5_128 }, + { 0, 0 }, +}; + +static const QCryptoBlockLUKSCipherSizeMap +qcrypto_block_luks_cipher_size_map_serpent[] = { + { 16, QCRYPTO_CIPHER_ALG_SERPENT_128 }, + { 24, QCRYPTO_CIPHER_ALG_SERPENT_192 }, + { 32, QCRYPTO_CIPHER_ALG_SERPENT_256 }, + { 0, 0 }, +}; + +static const QCryptoBlockLUKSCipherSizeMap +qcrypto_block_luks_cipher_size_map_twofish[] = { + { 16, QCRYPTO_CIPHER_ALG_TWOFISH_128 }, + { 24, QCRYPTO_CIPHER_ALG_TWOFISH_192 }, + { 32, QCRYPTO_CIPHER_ALG_TWOFISH_256 }, + { 0, 0 }, +}; + +static const QCryptoBlockLUKSCipherNameMap +qcrypto_block_luks_cipher_name_map[] = { + { "aes", qcrypto_block_luks_cipher_size_map_aes }, + { "cast5", qcrypto_block_luks_cipher_size_map_cast5 }, + { "serpent", qcrypto_block_luks_cipher_size_map_serpent }, + { "twofish", qcrypto_block_luks_cipher_size_map_twofish }, +}; + + +/* + * This struct is written to disk in big-endian format, + * but operated upon in native-endian format. + */ +struct QCryptoBlockLUKSKeySlot { + /* state of keyslot, enabled/disable */ + uint32_t active; + /* iterations for PBKDF2 */ + uint32_t iterations; + /* salt for PBKDF2 */ + uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; + /* start sector of key material */ + uint32_t key_offset_sector; + /* number of anti-forensic stripes */ + uint32_t stripes; +}; + +QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSKeySlot) != 48); + + +/* + * This struct is written to disk in big-endian format, + * but operated upon in native-endian format. + */ +struct QCryptoBlockLUKSHeader { + /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */ + char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN]; + + /* LUKS version, currently 1 */ + uint16_t version; + + /* cipher name specification (aes, etc) */ + char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN]; + + /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */ + char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN]; + + /* hash specification (sha256, etc) */ + char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN]; + + /* start offset of the volume data (in 512 byte sectors) */ + uint32_t payload_offset_sector; + + /* Number of key bytes */ + uint32_t master_key_len; + + /* master key checksum after PBKDF2 */ + uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN]; + + /* salt for master key PBKDF2 */ + uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; + + /* iterations for master key PBKDF2 */ + uint32_t master_key_iterations; + + /* UUID of the partition in standard ASCII representation */ + uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN]; + + /* key slots */ + QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS]; +}; + +QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592); + + +struct QCryptoBlockLUKS { + QCryptoBlockLUKSHeader header; + + /* Main encryption algorithm used for encryption*/ + QCryptoCipherAlgorithm cipher_alg; + + /* Mode of encryption for the selected encryption algorithm */ + QCryptoCipherMode cipher_mode; + + /* Initialization vector generation algorithm */ + QCryptoIVGenAlgorithm ivgen_alg; + + /* Hash algorithm used for IV generation*/ + QCryptoHashAlgorithm ivgen_hash_alg; + + /* + * Encryption algorithm used for IV generation. + * Usually the same as main encryption algorithm + */ + QCryptoCipherAlgorithm ivgen_cipher_alg; + + /* Hash algorithm used in pbkdf2 function */ + QCryptoHashAlgorithm hash_alg; + + /* Name of the secret that was used to open the image */ + char *secret; +}; + + +static int qcrypto_block_luks_cipher_name_lookup(const char *name, + QCryptoCipherMode mode, + uint32_t key_bytes, + Error **errp) +{ + const QCryptoBlockLUKSCipherNameMap *map = + qcrypto_block_luks_cipher_name_map; + size_t maplen = G_N_ELEMENTS(qcrypto_block_luks_cipher_name_map); + size_t i, j; + + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + key_bytes /= 2; + } + + for (i = 0; i < maplen; i++) { + if (!g_str_equal(map[i].name, name)) { + continue; + } + for (j = 0; j < map[i].sizes[j].key_bytes; j++) { + if (map[i].sizes[j].key_bytes == key_bytes) { + return map[i].sizes[j].id; + } + } + } + + error_setg(errp, "Algorithm %s with key size %d bytes not supported", + name, key_bytes); + return 0; +} + +static const char * +qcrypto_block_luks_cipher_alg_lookup(QCryptoCipherAlgorithm alg, + Error **errp) +{ + const QCryptoBlockLUKSCipherNameMap *map = + qcrypto_block_luks_cipher_name_map; + size_t maplen = G_N_ELEMENTS(qcrypto_block_luks_cipher_name_map); + size_t i, j; + for (i = 0; i < maplen; i++) { + for (j = 0; j < map[i].sizes[j].key_bytes; j++) { + if (map[i].sizes[j].id == alg) { + return map[i].name; + } + } + } + + error_setg(errp, "Algorithm '%s' not supported", + QCryptoCipherAlgorithm_str(alg)); + return NULL; +} + +/* XXX replace with qapi_enum_parse() in future, when we can + * make that function emit a more friendly error message */ +static int qcrypto_block_luks_name_lookup(const char *name, + const QEnumLookup *map, + const char *type, + Error **errp) +{ + int ret = qapi_enum_parse(map, name, -1, NULL); + + if (ret < 0) { + error_setg(errp, "%s %s not supported", type, name); + return 0; + } + return ret; +} + +#define qcrypto_block_luks_cipher_mode_lookup(name, errp) \ + qcrypto_block_luks_name_lookup(name, \ + &QCryptoCipherMode_lookup, \ + "Cipher mode", \ + errp) + +#define qcrypto_block_luks_hash_name_lookup(name, errp) \ + qcrypto_block_luks_name_lookup(name, \ + &QCryptoHashAlgorithm_lookup, \ + "Hash algorithm", \ + errp) + +#define qcrypto_block_luks_ivgen_name_lookup(name, errp) \ + qcrypto_block_luks_name_lookup(name, \ + &QCryptoIVGenAlgorithm_lookup, \ + "IV generator", \ + errp) + + +static bool +qcrypto_block_luks_has_format(const uint8_t *buf, + size_t buf_size) +{ + const QCryptoBlockLUKSHeader *luks_header = (const void *)buf; + + if (buf_size >= offsetof(QCryptoBlockLUKSHeader, cipher_name) && + memcmp(luks_header->magic, qcrypto_block_luks_magic, + QCRYPTO_BLOCK_LUKS_MAGIC_LEN) == 0 && + be16_to_cpu(luks_header->version) == QCRYPTO_BLOCK_LUKS_VERSION) { + return true; + } else { + return false; + } +} + + +/** + * Deal with a quirk of dm-crypt usage of ESSIV. + * + * When calculating ESSIV IVs, the cipher length used by ESSIV + * may be different from the cipher length used for the block + * encryption, becauses dm-crypt uses the hash digest length + * as the key size. ie, if you have AES 128 as the block cipher + * and SHA 256 as ESSIV hash, then ESSIV will use AES 256 as + * the cipher since that gets a key length matching the digest + * size, not AES 128 with truncated digest as might be imagined + */ +static QCryptoCipherAlgorithm +qcrypto_block_luks_essiv_cipher(QCryptoCipherAlgorithm cipher, + QCryptoHashAlgorithm hash, + Error **errp) +{ + size_t digestlen = qcrypto_hash_digest_len(hash); + size_t keylen = qcrypto_cipher_get_key_len(cipher); + if (digestlen == keylen) { + return cipher; + } + + switch (cipher) { + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_AES_128)) { + return QCRYPTO_CIPHER_ALG_AES_128; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_AES_192)) { + return QCRYPTO_CIPHER_ALG_AES_192; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_AES_256)) { + return QCRYPTO_CIPHER_ALG_AES_256; + } else { + error_setg(errp, "No AES cipher with key size %zu available", + digestlen); + return 0; + } + break; + case QCRYPTO_CIPHER_ALG_SERPENT_128: + case QCRYPTO_CIPHER_ALG_SERPENT_192: + case QCRYPTO_CIPHER_ALG_SERPENT_256: + if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_SERPENT_128)) { + return QCRYPTO_CIPHER_ALG_SERPENT_128; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_SERPENT_192)) { + return QCRYPTO_CIPHER_ALG_SERPENT_192; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_SERPENT_256)) { + return QCRYPTO_CIPHER_ALG_SERPENT_256; + } else { + error_setg(errp, "No Serpent cipher with key size %zu available", + digestlen); + return 0; + } + break; + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_192: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_TWOFISH_128)) { + return QCRYPTO_CIPHER_ALG_TWOFISH_128; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_TWOFISH_192)) { + return QCRYPTO_CIPHER_ALG_TWOFISH_192; + } else if (digestlen == qcrypto_cipher_get_key_len( + QCRYPTO_CIPHER_ALG_TWOFISH_256)) { + return QCRYPTO_CIPHER_ALG_TWOFISH_256; + } else { + error_setg(errp, "No Twofish cipher with key size %zu available", + digestlen); + return 0; + } + break; + default: + error_setg(errp, "Cipher %s not supported with essiv", + QCryptoCipherAlgorithm_str(cipher)); + return 0; + } +} + +/* + * Returns number of sectors needed to store the key material + * given number of anti forensic stripes + */ +static int +qcrypto_block_luks_splitkeylen_sectors(const QCryptoBlockLUKS *luks, + unsigned int header_sectors, + unsigned int stripes) +{ + /* + * This calculation doesn't match that shown in the spec, + * but instead follows the cryptsetup implementation. + */ + + size_t splitkeylen = luks->header.master_key_len * stripes; + + /* First align the key material size to block size*/ + size_t splitkeylen_sectors = + DIV_ROUND_UP(splitkeylen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE); + + /* Then also align the key material size to the size of the header */ + return ROUND_UP(splitkeylen_sectors, header_sectors); +} + +/* + * Stores the main LUKS header, taking care of endianess + */ +static int +qcrypto_block_luks_store_header(QCryptoBlock *block, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp) +{ + const QCryptoBlockLUKS *luks = block->opaque; + Error *local_err = NULL; + size_t i; + g_autofree QCryptoBlockLUKSHeader *hdr_copy = NULL; + + /* Create a copy of the header */ + hdr_copy = g_new0(QCryptoBlockLUKSHeader, 1); + memcpy(hdr_copy, &luks->header, sizeof(QCryptoBlockLUKSHeader)); + + /* + * Everything on disk uses Big Endian (tm), so flip header fields + * before writing them + */ + cpu_to_be16s(&hdr_copy->version); + cpu_to_be32s(&hdr_copy->payload_offset_sector); + cpu_to_be32s(&hdr_copy->master_key_len); + cpu_to_be32s(&hdr_copy->master_key_iterations); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + cpu_to_be32s(&hdr_copy->key_slots[i].active); + cpu_to_be32s(&hdr_copy->key_slots[i].iterations); + cpu_to_be32s(&hdr_copy->key_slots[i].key_offset_sector); + cpu_to_be32s(&hdr_copy->key_slots[i].stripes); + } + + /* Write out the partition header and key slot headers */ + writefunc(block, 0, (const uint8_t *)hdr_copy, sizeof(*hdr_copy), + opaque, &local_err); + + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + return 0; +} + +/* + * Loads the main LUKS header,and byteswaps it to native endianess + * And run basic sanity checks on it + */ +static int +qcrypto_block_luks_load_header(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + void *opaque, + Error **errp) +{ + ssize_t rv; + size_t i; + QCryptoBlockLUKS *luks = block->opaque; + + /* + * Read the entire LUKS header, minus the key material from + * the underlying device + */ + rv = readfunc(block, 0, + (uint8_t *)&luks->header, + sizeof(luks->header), + opaque, + errp); + if (rv < 0) { + return rv; + } + + /* + * The header is always stored in big-endian format, so + * convert everything to native + */ + be16_to_cpus(&luks->header.version); + be32_to_cpus(&luks->header.payload_offset_sector); + be32_to_cpus(&luks->header.master_key_len); + be32_to_cpus(&luks->header.master_key_iterations); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + be32_to_cpus(&luks->header.key_slots[i].active); + be32_to_cpus(&luks->header.key_slots[i].iterations); + be32_to_cpus(&luks->header.key_slots[i].key_offset_sector); + be32_to_cpus(&luks->header.key_slots[i].stripes); + } + + return 0; +} + +/* + * Does basic sanity checks on the LUKS header + */ +static int +qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp) +{ + size_t i, j; + + unsigned int header_sectors = QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET / + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + + if (memcmp(luks->header.magic, qcrypto_block_luks_magic, + QCRYPTO_BLOCK_LUKS_MAGIC_LEN) != 0) { + error_setg(errp, "Volume is not in LUKS format"); + return -1; + } + + if (luks->header.version != QCRYPTO_BLOCK_LUKS_VERSION) { + error_setg(errp, "LUKS version %" PRIu32 " is not supported", + luks->header.version); + return -1; + } + + /* Check all keyslots for corruption */ + for (i = 0 ; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS ; i++) { + + const QCryptoBlockLUKSKeySlot *slot1 = &luks->header.key_slots[i]; + unsigned int start1 = slot1->key_offset_sector; + unsigned int len1 = + qcrypto_block_luks_splitkeylen_sectors(luks, + header_sectors, + slot1->stripes); + + if (slot1->stripes == 0) { + error_setg(errp, "Keyslot %zu is corrupted (stripes == 0)", i); + return -1; + } + + if (slot1->active != QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED && + slot1->active != QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED) { + error_setg(errp, + "Keyslot %zu state (active/disable) is corrupted", i); + return -1; + } + + if (start1 + len1 > luks->header.payload_offset_sector) { + error_setg(errp, + "Keyslot %zu is overlapping with the encrypted payload", + i); + return -1; + } + + for (j = i + 1 ; j < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS ; j++) { + const QCryptoBlockLUKSKeySlot *slot2 = &luks->header.key_slots[j]; + unsigned int start2 = slot2->key_offset_sector; + unsigned int len2 = + qcrypto_block_luks_splitkeylen_sectors(luks, + header_sectors, + slot2->stripes); + + if (start1 + len1 > start2 && start2 + len2 > start1) { + error_setg(errp, + "Keyslots %zu and %zu are overlapping in the header", + i, j); + return -1; + } + } + + } + return 0; +} + +/* + * Parses the crypto parameters that are stored in the LUKS header + */ + +static int +qcrypto_block_luks_parse_header(QCryptoBlockLUKS *luks, Error **errp) +{ + g_autofree char *cipher_mode = g_strdup(luks->header.cipher_mode); + char *ivgen_name, *ivhash_name; + Error *local_err = NULL; + + /* + * The cipher_mode header contains a string that we have + * to further parse, of the format + * + * -[:] + * + * eg cbc-essiv:sha256, cbc-plain64 + */ + ivgen_name = strchr(cipher_mode, '-'); + if (!ivgen_name) { + error_setg(errp, "Unexpected cipher mode string format %s", + luks->header.cipher_mode); + return -1; + } + *ivgen_name = '\0'; + ivgen_name++; + + ivhash_name = strchr(ivgen_name, ':'); + if (!ivhash_name) { + luks->ivgen_hash_alg = 0; + } else { + *ivhash_name = '\0'; + ivhash_name++; + + luks->ivgen_hash_alg = qcrypto_block_luks_hash_name_lookup(ivhash_name, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + } + + luks->cipher_mode = qcrypto_block_luks_cipher_mode_lookup(cipher_mode, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + + luks->cipher_alg = + qcrypto_block_luks_cipher_name_lookup(luks->header.cipher_name, + luks->cipher_mode, + luks->header.master_key_len, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + + luks->hash_alg = + qcrypto_block_luks_hash_name_lookup(luks->header.hash_spec, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + + luks->ivgen_alg = qcrypto_block_luks_ivgen_name_lookup(ivgen_name, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + + if (luks->ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) { + if (!ivhash_name) { + error_setg(errp, "Missing IV generator hash specification"); + return -1; + } + luks->ivgen_cipher_alg = + qcrypto_block_luks_essiv_cipher(luks->cipher_alg, + luks->ivgen_hash_alg, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + } else { + + /* + * Note we parsed the ivhash_name earlier in the cipher_mode + * spec string even with plain/plain64 ivgens, but we + * will ignore it, since it is irrelevant for these ivgens. + * This is for compat with dm-crypt which will silently + * ignore hash names with these ivgens rather than report + * an error about the invalid usage + */ + luks->ivgen_cipher_alg = luks->cipher_alg; + } + return 0; +} + +/* + * Given a key slot, user password, and the master key, + * will store the encrypted master key there, and update the + * in-memory header. User must then write the in-memory header + * + * Returns: + * 0 if the keyslot was written successfully + * with the provided password + * -1 if a fatal error occurred while storing the key + */ +static int +qcrypto_block_luks_store_key(QCryptoBlock *block, + unsigned int slot_idx, + const char *password, + uint8_t *masterkey, + uint64_t iter_time, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + QCryptoBlockLUKSKeySlot *slot; + g_autofree uint8_t *splitkey = NULL; + size_t splitkeylen; + g_autofree uint8_t *slotkey = NULL; + g_autoptr(QCryptoCipher) cipher = NULL; + g_autoptr(QCryptoIVGen) ivgen = NULL; + Error *local_err = NULL; + uint64_t iters; + int ret = -1; + + assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + slot = &luks->header.key_slots[slot_idx]; + if (qcrypto_random_bytes(slot->salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + errp) < 0) { + goto cleanup; + } + + splitkeylen = luks->header.master_key_len * slot->stripes; + + /* + * Determine how many iterations are required to + * hash the user password while consuming 1 second of compute + * time + */ + iters = qcrypto_pbkdf2_count_iters(luks->hash_alg, + (uint8_t *)password, strlen(password), + slot->salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + luks->header.master_key_len, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto cleanup; + } + + if (iters > (ULLONG_MAX / iter_time)) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu too large to scale", + (unsigned long long)iters); + goto cleanup; + } + + /* iter_time was in millis, but count_iters reported for secs */ + iters = iters * iter_time / 1000; + + if (iters > UINT32_MAX) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu larger than %u", + (unsigned long long)iters, UINT32_MAX); + goto cleanup; + } + + slot->iterations = + MAX(iters, QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS); + + + /* + * Generate a key that we'll use to encrypt the master + * key, from the user's password + */ + slotkey = g_new0(uint8_t, luks->header.master_key_len); + if (qcrypto_pbkdf2(luks->hash_alg, + (uint8_t *)password, strlen(password), + slot->salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + slot->iterations, + slotkey, luks->header.master_key_len, + errp) < 0) { + goto cleanup; + } + + + /* + * Setup the encryption objects needed to encrypt the + * master key material + */ + cipher = qcrypto_cipher_new(luks->cipher_alg, + luks->cipher_mode, + slotkey, luks->header.master_key_len, + errp); + if (!cipher) { + goto cleanup; + } + + ivgen = qcrypto_ivgen_new(luks->ivgen_alg, + luks->ivgen_cipher_alg, + luks->ivgen_hash_alg, + slotkey, luks->header.master_key_len, + errp); + if (!ivgen) { + goto cleanup; + } + + /* + * Before storing the master key, we need to vastly + * increase its size, as protection against forensic + * disk data recovery + */ + splitkey = g_new0(uint8_t, splitkeylen); + + if (qcrypto_afsplit_encode(luks->hash_alg, + luks->header.master_key_len, + slot->stripes, + masterkey, + splitkey, + errp) < 0) { + goto cleanup; + } + + /* + * Now we encrypt the split master key with the key generated + * from the user's password, before storing it + */ + if (qcrypto_block_cipher_encrypt_helper(cipher, block->niv, ivgen, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + 0, + splitkey, + splitkeylen, + errp) < 0) { + goto cleanup; + } + + /* Write out the slot's master key material. */ + if (writefunc(block, + slot->key_offset_sector * + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + splitkey, splitkeylen, + opaque, + errp) != splitkeylen) { + goto cleanup; + } + + slot->active = QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED; + + if (qcrypto_block_luks_store_header(block, writefunc, opaque, errp) < 0) { + goto cleanup; + } + + ret = 0; + +cleanup: + if (slotkey) { + memset(slotkey, 0, luks->header.master_key_len); + } + if (splitkey) { + memset(splitkey, 0, splitkeylen); + } + return ret; +} + +/* + * Given a key slot, and user password, this will attempt to unlock + * the master encryption key from the key slot. + * + * Returns: + * 0 if the key slot is disabled, or key could not be decrypted + * with the provided password + * 1 if the key slot is enabled, and key decrypted successfully + * with the provided password + * -1 if a fatal error occurred loading the key + */ +static int +qcrypto_block_luks_load_key(QCryptoBlock *block, + size_t slot_idx, + const char *password, + uint8_t *masterkey, + QCryptoBlockReadFunc readfunc, + void *opaque, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + const QCryptoBlockLUKSKeySlot *slot; + g_autofree uint8_t *splitkey = NULL; + size_t splitkeylen; + g_autofree uint8_t *possiblekey = NULL; + ssize_t rv; + g_autoptr(QCryptoCipher) cipher = NULL; + uint8_t keydigest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN]; + g_autoptr(QCryptoIVGen) ivgen = NULL; + size_t niv; + + assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + slot = &luks->header.key_slots[slot_idx]; + if (slot->active != QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED) { + return 0; + } + + splitkeylen = luks->header.master_key_len * slot->stripes; + splitkey = g_new0(uint8_t, splitkeylen); + possiblekey = g_new0(uint8_t, luks->header.master_key_len); + + /* + * The user password is used to generate a (possible) + * decryption key. This may or may not successfully + * decrypt the master key - we just blindly assume + * the key is correct and validate the results of + * decryption later. + */ + if (qcrypto_pbkdf2(luks->hash_alg, + (const uint8_t *)password, strlen(password), + slot->salt, QCRYPTO_BLOCK_LUKS_SALT_LEN, + slot->iterations, + possiblekey, luks->header.master_key_len, + errp) < 0) { + return -1; + } + + /* + * We need to read the master key material from the + * LUKS key material header. What we're reading is + * not the raw master key, but rather the data after + * it has been passed through AFSplit and the result + * then encrypted. + */ + rv = readfunc(block, + slot->key_offset_sector * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + splitkey, splitkeylen, + opaque, + errp); + if (rv < 0) { + return -1; + } + + + /* Setup the cipher/ivgen that we'll use to try to decrypt + * the split master key material */ + cipher = qcrypto_cipher_new(luks->cipher_alg, + luks->cipher_mode, + possiblekey, + luks->header.master_key_len, + errp); + if (!cipher) { + return -1; + } + + niv = qcrypto_cipher_get_iv_len(luks->cipher_alg, + luks->cipher_mode); + + ivgen = qcrypto_ivgen_new(luks->ivgen_alg, + luks->ivgen_cipher_alg, + luks->ivgen_hash_alg, + possiblekey, + luks->header.master_key_len, + errp); + if (!ivgen) { + return -1; + } + + + /* + * The master key needs to be decrypted in the same + * way that the block device payload will be decrypted + * later. In particular we'll be using the IV generator + * to reset the encryption cipher every time the master + * key crosses a sector boundary. + */ + if (qcrypto_block_cipher_decrypt_helper(cipher, + niv, + ivgen, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + 0, + splitkey, + splitkeylen, + errp) < 0) { + return -1; + } + + /* + * Now we've decrypted the split master key, join + * it back together to get the actual master key. + */ + if (qcrypto_afsplit_decode(luks->hash_alg, + luks->header.master_key_len, + slot->stripes, + splitkey, + masterkey, + errp) < 0) { + return -1; + } + + + /* + * We still don't know that the masterkey we got is valid, + * because we just blindly assumed the user's password + * was correct. This is where we now verify it. We are + * creating a hash of the master key using PBKDF and + * then comparing that to the hash stored in the key slot + * header + */ + if (qcrypto_pbkdf2(luks->hash_alg, + masterkey, + luks->header.master_key_len, + luks->header.master_key_salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + luks->header.master_key_iterations, + keydigest, + G_N_ELEMENTS(keydigest), + errp) < 0) { + return -1; + } + + if (memcmp(keydigest, luks->header.master_key_digest, + QCRYPTO_BLOCK_LUKS_DIGEST_LEN) == 0) { + /* Success, we got the right master key */ + return 1; + } + + /* Fail, user's password was not valid for this key slot, + * tell caller to try another slot */ + return 0; +} + + +/* + * Given a user password, this will iterate over all key + * slots and try to unlock each active key slot using the + * password until it successfully obtains a master key. + * + * Returns 0 if a key was loaded, -1 if no keys could be loaded + */ +static int +qcrypto_block_luks_find_key(QCryptoBlock *block, + const char *password, + uint8_t *masterkey, + QCryptoBlockReadFunc readfunc, + void *opaque, + Error **errp) +{ + size_t i; + int rv; + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + rv = qcrypto_block_luks_load_key(block, + i, + password, + masterkey, + readfunc, + opaque, + errp); + if (rv < 0) { + goto error; + } + if (rv == 1) { + return 0; + } + } + + error_setg(errp, "Invalid password, cannot unlock any keyslot"); + error: + return -1; +} + +/* + * Returns true if a slot i is marked as active + * (contains encrypted copy of the master key) + */ +static bool +qcrypto_block_luks_slot_active(const QCryptoBlockLUKS *luks, + unsigned int slot_idx) +{ + uint32_t val; + + assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + val = luks->header.key_slots[slot_idx].active; + return val == QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED; +} + +/* + * Returns the number of slots that are marked as active + * (slots that contain encrypted copy of the master key) + */ +static unsigned int +qcrypto_block_luks_count_active_slots(const QCryptoBlockLUKS *luks) +{ + size_t i = 0; + unsigned int ret = 0; + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + if (qcrypto_block_luks_slot_active(luks, i)) { + ret++; + } + } + return ret; +} + +/* + * Finds first key slot which is not active + * Returns the key slot index, or -1 if it doesn't exist + */ +static int +qcrypto_block_luks_find_free_keyslot(const QCryptoBlockLUKS *luks) +{ + size_t i; + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + if (!qcrypto_block_luks_slot_active(luks, i)) { + return i; + } + } + return -1; +} + +/* + * Erases an keyslot given its index + * Returns: + * 0 if the keyslot was erased successfully + * -1 if a error occurred while erasing the keyslot + * + */ +static int +qcrypto_block_luks_erase_key(QCryptoBlock *block, + unsigned int slot_idx, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + QCryptoBlockLUKSKeySlot *slot; + g_autofree uint8_t *garbagesplitkey = NULL; + size_t splitkeylen; + size_t i; + Error *local_err = NULL; + int ret; + + assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + slot = &luks->header.key_slots[slot_idx]; + + splitkeylen = luks->header.master_key_len * slot->stripes; + assert(splitkeylen > 0); + + garbagesplitkey = g_new0(uint8_t, splitkeylen); + + /* Reset the key slot header */ + memset(slot->salt, 0, QCRYPTO_BLOCK_LUKS_SALT_LEN); + slot->iterations = 0; + slot->active = QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED; + + ret = qcrypto_block_luks_store_header(block, writefunc, + opaque, &local_err); + + if (ret < 0) { + error_propagate(errp, local_err); + } + /* + * Now try to erase the key material, even if the header + * update failed + */ + for (i = 0; i < QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS; i++) { + if (qcrypto_random_bytes(garbagesplitkey, + splitkeylen, &local_err) < 0) { + /* + * If we failed to get the random data, still write + * at least zeros to the key slot at least once + */ + error_propagate(errp, local_err); + + if (i > 0) { + return -1; + } + } + if (writefunc(block, + slot->key_offset_sector * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + garbagesplitkey, + splitkeylen, + opaque, + &local_err) != splitkeylen) { + error_propagate(errp, local_err); + return -1; + } + } + return ret; +} + +static int +qcrypto_block_luks_open(QCryptoBlock *block, + QCryptoBlockOpenOptions *options, + const char *optprefix, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, + size_t n_threads, + Error **errp) +{ + QCryptoBlockLUKS *luks = NULL; + g_autofree uint8_t *masterkey = NULL; + g_autofree char *password = NULL; + + if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) { + if (!options->u.luks.key_secret) { + error_setg(errp, "Parameter '%skey-secret' is required for cipher", + optprefix ? optprefix : ""); + return -1; + } + password = qcrypto_secret_lookup_as_utf8( + options->u.luks.key_secret, errp); + if (!password) { + return -1; + } + } + + luks = g_new0(QCryptoBlockLUKS, 1); + block->opaque = luks; + luks->secret = g_strdup(options->u.luks.key_secret); + + if (qcrypto_block_luks_load_header(block, readfunc, opaque, errp) < 0) { + goto fail; + } + + if (qcrypto_block_luks_check_header(luks, errp) < 0) { + goto fail; + } + + if (qcrypto_block_luks_parse_header(luks, errp) < 0) { + goto fail; + } + + if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) { + /* Try to find which key slot our password is valid for + * and unlock the master key from that slot. + */ + + masterkey = g_new0(uint8_t, luks->header.master_key_len); + + if (qcrypto_block_luks_find_key(block, + password, + masterkey, + readfunc, opaque, + errp) < 0) { + goto fail; + } + + /* We have a valid master key now, so can setup the + * block device payload decryption objects + */ + block->kdfhash = luks->hash_alg; + block->niv = qcrypto_cipher_get_iv_len(luks->cipher_alg, + luks->cipher_mode); + + block->ivgen = qcrypto_ivgen_new(luks->ivgen_alg, + luks->ivgen_cipher_alg, + luks->ivgen_hash_alg, + masterkey, + luks->header.master_key_len, + errp); + if (!block->ivgen) { + goto fail; + } + + if (qcrypto_block_init_cipher(block, + luks->cipher_alg, + luks->cipher_mode, + masterkey, + luks->header.master_key_len, + n_threads, + errp) < 0) { + goto fail; + } + } + + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->payload_offset = luks->header.payload_offset_sector * + block->sector_size; + + return 0; + + fail: + qcrypto_block_free_cipher(block); + qcrypto_ivgen_free(block->ivgen); + g_free(luks->secret); + g_free(luks); + return -1; +} + + +static void +qcrypto_block_luks_uuid_gen(uint8_t *uuidstr) +{ + QemuUUID uuid; + qemu_uuid_generate(&uuid); + qemu_uuid_unparse(&uuid, (char *)uuidstr); +} + +static int +qcrypto_block_luks_create(QCryptoBlock *block, + QCryptoBlockCreateOptions *options, + const char *optprefix, + QCryptoBlockInitFunc initfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp) +{ + QCryptoBlockLUKS *luks; + QCryptoBlockCreateOptionsLUKS luks_opts; + Error *local_err = NULL; + g_autofree uint8_t *masterkey = NULL; + size_t header_sectors; + size_t split_key_sectors; + size_t i; + g_autofree char *password = NULL; + const char *cipher_alg; + const char *cipher_mode; + const char *ivgen_alg; + const char *ivgen_hash_alg = NULL; + const char *hash_alg; + g_autofree char *cipher_mode_spec = NULL; + uint64_t iters; + + memcpy(&luks_opts, &options->u.luks, sizeof(luks_opts)); + if (!luks_opts.has_iter_time) { + luks_opts.iter_time = QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS; + } + if (!luks_opts.has_cipher_alg) { + luks_opts.cipher_alg = QCRYPTO_CIPHER_ALG_AES_256; + } + if (!luks_opts.has_cipher_mode) { + luks_opts.cipher_mode = QCRYPTO_CIPHER_MODE_XTS; + } + if (!luks_opts.has_ivgen_alg) { + luks_opts.ivgen_alg = QCRYPTO_IVGEN_ALG_PLAIN64; + } + if (!luks_opts.has_hash_alg) { + luks_opts.hash_alg = QCRYPTO_HASH_ALG_SHA256; + } + if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) { + if (!luks_opts.has_ivgen_hash_alg) { + luks_opts.ivgen_hash_alg = QCRYPTO_HASH_ALG_SHA256; + luks_opts.has_ivgen_hash_alg = true; + } + } + + luks = g_new0(QCryptoBlockLUKS, 1); + block->opaque = luks; + + luks->cipher_alg = luks_opts.cipher_alg; + luks->cipher_mode = luks_opts.cipher_mode; + luks->ivgen_alg = luks_opts.ivgen_alg; + luks->ivgen_hash_alg = luks_opts.ivgen_hash_alg; + luks->hash_alg = luks_opts.hash_alg; + + + /* Note we're allowing ivgen_hash_alg to be set even for + * non-essiv iv generators that don't need a hash. It will + * be silently ignored, for compatibility with dm-crypt */ + + if (!options->u.luks.key_secret) { + error_setg(errp, "Parameter '%skey-secret' is required for cipher", + optprefix ? optprefix : ""); + goto error; + } + luks->secret = g_strdup(options->u.luks.key_secret); + + password = qcrypto_secret_lookup_as_utf8(luks_opts.key_secret, errp); + if (!password) { + goto error; + } + + + memcpy(luks->header.magic, qcrypto_block_luks_magic, + QCRYPTO_BLOCK_LUKS_MAGIC_LEN); + + /* We populate the header in native endianness initially and + * then convert everything to big endian just before writing + * it out to disk + */ + luks->header.version = QCRYPTO_BLOCK_LUKS_VERSION; + qcrypto_block_luks_uuid_gen(luks->header.uuid); + + cipher_alg = qcrypto_block_luks_cipher_alg_lookup(luks_opts.cipher_alg, + errp); + if (!cipher_alg) { + goto error; + } + + cipher_mode = QCryptoCipherMode_str(luks_opts.cipher_mode); + ivgen_alg = QCryptoIVGenAlgorithm_str(luks_opts.ivgen_alg); + if (luks_opts.has_ivgen_hash_alg) { + ivgen_hash_alg = QCryptoHashAlgorithm_str(luks_opts.ivgen_hash_alg); + cipher_mode_spec = g_strdup_printf("%s-%s:%s", cipher_mode, ivgen_alg, + ivgen_hash_alg); + } else { + cipher_mode_spec = g_strdup_printf("%s-%s", cipher_mode, ivgen_alg); + } + hash_alg = QCryptoHashAlgorithm_str(luks_opts.hash_alg); + + + if (strlen(cipher_alg) >= QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN) { + error_setg(errp, "Cipher name '%s' is too long for LUKS header", + cipher_alg); + goto error; + } + if (strlen(cipher_mode_spec) >= QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN) { + error_setg(errp, "Cipher mode '%s' is too long for LUKS header", + cipher_mode_spec); + goto error; + } + if (strlen(hash_alg) >= QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN) { + error_setg(errp, "Hash name '%s' is too long for LUKS header", + hash_alg); + goto error; + } + + if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) { + luks->ivgen_cipher_alg = + qcrypto_block_luks_essiv_cipher(luks_opts.cipher_alg, + luks_opts.ivgen_hash_alg, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto error; + } + } else { + luks->ivgen_cipher_alg = luks_opts.cipher_alg; + } + + strcpy(luks->header.cipher_name, cipher_alg); + strcpy(luks->header.cipher_mode, cipher_mode_spec); + strcpy(luks->header.hash_spec, hash_alg); + + luks->header.master_key_len = + qcrypto_cipher_get_key_len(luks_opts.cipher_alg); + + if (luks_opts.cipher_mode == QCRYPTO_CIPHER_MODE_XTS) { + luks->header.master_key_len *= 2; + } + + /* Generate the salt used for hashing the master key + * with PBKDF later + */ + if (qcrypto_random_bytes(luks->header.master_key_salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + errp) < 0) { + goto error; + } + + /* Generate random master key */ + masterkey = g_new0(uint8_t, luks->header.master_key_len); + if (qcrypto_random_bytes(masterkey, + luks->header.master_key_len, errp) < 0) { + goto error; + } + + + /* Setup the block device payload encryption objects */ + if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg, + luks_opts.cipher_mode, masterkey, + luks->header.master_key_len, 1, errp) < 0) { + goto error; + } + + block->kdfhash = luks_opts.hash_alg; + block->niv = qcrypto_cipher_get_iv_len(luks_opts.cipher_alg, + luks_opts.cipher_mode); + block->ivgen = qcrypto_ivgen_new(luks_opts.ivgen_alg, + luks->ivgen_cipher_alg, + luks_opts.ivgen_hash_alg, + masterkey, luks->header.master_key_len, + errp); + + if (!block->ivgen) { + goto error; + } + + + /* Determine how many iterations we need to hash the master + * key, in order to have 1 second of compute time used + */ + iters = qcrypto_pbkdf2_count_iters(luks_opts.hash_alg, + masterkey, luks->header.master_key_len, + luks->header.master_key_salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + QCRYPTO_BLOCK_LUKS_DIGEST_LEN, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto error; + } + + if (iters > (ULLONG_MAX / luks_opts.iter_time)) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu too large to scale", + (unsigned long long)iters); + goto error; + } + + /* iter_time was in millis, but count_iters reported for secs */ + iters = iters * luks_opts.iter_time / 1000; + + /* Why /= 8 ? That matches cryptsetup, but there's no + * explanation why they chose /= 8... Probably so that + * if all 8 keyslots are active we only spend 1 second + * in total time to check all keys */ + iters /= 8; + if (iters > UINT32_MAX) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu larger than %u", + (unsigned long long)iters, UINT32_MAX); + goto error; + } + iters = MAX(iters, QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS); + luks->header.master_key_iterations = iters; + + /* Hash the master key, saving the result in the LUKS + * header. This hash is used when opening the encrypted + * device to verify that the user password unlocked a + * valid master key + */ + if (qcrypto_pbkdf2(luks_opts.hash_alg, + masterkey, luks->header.master_key_len, + luks->header.master_key_salt, + QCRYPTO_BLOCK_LUKS_SALT_LEN, + luks->header.master_key_iterations, + luks->header.master_key_digest, + QCRYPTO_BLOCK_LUKS_DIGEST_LEN, + errp) < 0) { + goto error; + } + + /* start with the sector that follows the header*/ + header_sectors = QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET / + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + + split_key_sectors = + qcrypto_block_luks_splitkeylen_sectors(luks, + header_sectors, + QCRYPTO_BLOCK_LUKS_STRIPES); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + QCryptoBlockLUKSKeySlot *slot = &luks->header.key_slots[i]; + slot->active = QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED; + + slot->key_offset_sector = header_sectors + i * split_key_sectors; + slot->stripes = QCRYPTO_BLOCK_LUKS_STRIPES; + } + + /* The total size of the LUKS headers is the partition header + key + * slot headers, rounded up to the nearest sector, combined with + * the size of each master key material region, also rounded up + * to the nearest sector */ + luks->header.payload_offset_sector = header_sectors + + QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS * split_key_sectors; + + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->payload_offset = luks->header.payload_offset_sector * + block->sector_size; + + /* Reserve header space to match payload offset */ + initfunc(block, block->payload_offset, opaque, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto error; + } + + + /* populate the slot 0 with the password encrypted master key*/ + /* This will also store the header */ + if (qcrypto_block_luks_store_key(block, + 0, + password, + masterkey, + luks_opts.iter_time, + writefunc, + opaque, + errp) < 0) { + goto error; + } + + memset(masterkey, 0, luks->header.master_key_len); + + return 0; + + error: + if (masterkey) { + memset(masterkey, 0, luks->header.master_key_len); + } + + qcrypto_block_free_cipher(block); + qcrypto_ivgen_free(block->ivgen); + + g_free(luks->secret); + g_free(luks); + return -1; +} + +static int +qcrypto_block_luks_amend_add_keyslot(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptionsLUKS *opts_luks, + bool force, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + uint64_t iter_time = opts_luks->has_iter_time ? + opts_luks->iter_time : + QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS; + int keyslot; + g_autofree char *old_password = NULL; + g_autofree char *new_password = NULL; + g_autofree uint8_t *master_key = NULL; + + char *secret = opts_luks->has_secret ? opts_luks->secret : luks->secret; + + if (!opts_luks->has_new_secret) { + error_setg(errp, "'new-secret' is required to activate a keyslot"); + return -1; + } + if (opts_luks->has_old_secret) { + error_setg(errp, + "'old-secret' must not be given when activating keyslots"); + return -1; + } + + if (opts_luks->has_keyslot) { + keyslot = opts_luks->keyslot; + if (keyslot < 0 || keyslot >= QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS) { + error_setg(errp, + "Invalid keyslot %u specified, must be between 0 and %u", + keyslot, QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS - 1); + return -1; + } + } else { + keyslot = qcrypto_block_luks_find_free_keyslot(luks); + if (keyslot == -1) { + error_setg(errp, + "Can't add a keyslot - all keyslots are in use"); + return -1; + } + } + + if (!force && qcrypto_block_luks_slot_active(luks, keyslot)) { + error_setg(errp, + "Refusing to overwrite active keyslot %i - " + "please erase it first", + keyslot); + return -1; + } + + /* Locate the password that will be used to retrieve the master key */ + old_password = qcrypto_secret_lookup_as_utf8(secret, errp); + if (!old_password) { + return -1; + } + + /* Retrieve the master key */ + master_key = g_new0(uint8_t, luks->header.master_key_len); + + if (qcrypto_block_luks_find_key(block, old_password, master_key, + readfunc, opaque, errp) < 0) { + error_append_hint(errp, "Failed to retrieve the master key"); + return -1; + } + + /* Locate the new password*/ + new_password = qcrypto_secret_lookup_as_utf8(opts_luks->new_secret, errp); + if (!new_password) { + return -1; + } + + /* Now set the new keyslots */ + if (qcrypto_block_luks_store_key(block, keyslot, new_password, master_key, + iter_time, writefunc, opaque, errp)) { + error_append_hint(errp, "Failed to write to keyslot %i", keyslot); + return -1; + } + return 0; +} + +static int +qcrypto_block_luks_amend_erase_keyslots(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptionsLUKS *opts_luks, + bool force, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + g_autofree uint8_t *tmpkey = NULL; + g_autofree char *old_password = NULL; + + if (opts_luks->has_new_secret) { + error_setg(errp, + "'new-secret' must not be given when erasing keyslots"); + return -1; + } + if (opts_luks->has_iter_time) { + error_setg(errp, + "'iter-time' must not be given when erasing keyslots"); + return -1; + } + if (opts_luks->has_secret) { + error_setg(errp, + "'secret' must not be given when erasing keyslots"); + return -1; + } + + /* Load the old password if given */ + if (opts_luks->has_old_secret) { + old_password = qcrypto_secret_lookup_as_utf8(opts_luks->old_secret, + errp); + if (!old_password) { + return -1; + } + + /* + * Allocate a temporary key buffer that we will need when + * checking if slot matches the given old password + */ + tmpkey = g_new0(uint8_t, luks->header.master_key_len); + } + + /* Erase an explicitly given keyslot */ + if (opts_luks->has_keyslot) { + int keyslot = opts_luks->keyslot; + + if (keyslot < 0 || keyslot >= QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS) { + error_setg(errp, + "Invalid keyslot %i specified, must be between 0 and %i", + keyslot, QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS - 1); + return -1; + } + + if (opts_luks->has_old_secret) { + int rv = qcrypto_block_luks_load_key(block, + keyslot, + old_password, + tmpkey, + readfunc, + opaque, + errp); + if (rv == -1) { + return -1; + } else if (rv == 0) { + error_setg(errp, + "Given keyslot %i doesn't contain the given " + "old password for erase operation", + keyslot); + return -1; + } + } + + if (!force && !qcrypto_block_luks_slot_active(luks, keyslot)) { + error_setg(errp, + "Given keyslot %i is already erased (inactive) ", + keyslot); + return -1; + } + + if (!force && qcrypto_block_luks_count_active_slots(luks) == 1) { + error_setg(errp, + "Attempt to erase the only active keyslot %i " + "which will erase all the data in the image " + "irreversibly - refusing operation", + keyslot); + return -1; + } + + if (qcrypto_block_luks_erase_key(block, keyslot, + writefunc, opaque, errp)) { + error_append_hint(errp, "Failed to erase keyslot %i", keyslot); + return -1; + } + + /* Erase all keyslots that match the given old password */ + } else if (opts_luks->has_old_secret) { + + unsigned long slots_to_erase_bitmap = 0; + size_t i; + int slot_count; + + assert(QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS <= + sizeof(slots_to_erase_bitmap) * 8); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + int rv = qcrypto_block_luks_load_key(block, + i, + old_password, + tmpkey, + readfunc, + opaque, + errp); + if (rv == -1) { + return -1; + } else if (rv == 1) { + bitmap_set(&slots_to_erase_bitmap, i, 1); + } + } + + slot_count = bitmap_count_one(&slots_to_erase_bitmap, + QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + if (slot_count == 0) { + error_setg(errp, + "No keyslots match given (old) password for erase operation"); + return -1; + } + + if (!force && + slot_count == qcrypto_block_luks_count_active_slots(luks)) { + error_setg(errp, + "All the active keyslots match the (old) password that " + "was given and erasing them will erase all the data in " + "the image irreversibly - refusing operation"); + return -1; + } + + /* Now apply the update */ + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + if (!test_bit(i, &slots_to_erase_bitmap)) { + continue; + } + if (qcrypto_block_luks_erase_key(block, i, writefunc, + opaque, errp)) { + error_append_hint(errp, "Failed to erase keyslot %zu", i); + return -1; + } + } + } else { + error_setg(errp, + "To erase keyslot(s), either explicit keyslot index " + "or the password currently contained in them must be given"); + return -1; + } + return 0; +} + +static int +qcrypto_block_luks_amend_options(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptions *options, + bool force, + Error **errp) +{ + QCryptoBlockAmendOptionsLUKS *opts_luks = &options->u.luks; + + switch (opts_luks->state) { + case Q_CRYPTO_BLOCKLUKS_KEYSLOT_STATE_ACTIVE: + return qcrypto_block_luks_amend_add_keyslot(block, readfunc, + writefunc, opaque, + opts_luks, force, errp); + case Q_CRYPTO_BLOCKLUKS_KEYSLOT_STATE_INACTIVE: + return qcrypto_block_luks_amend_erase_keyslots(block, readfunc, + writefunc, opaque, + opts_luks, force, errp); + default: + g_assert_not_reached(); + } +} + +static int qcrypto_block_luks_get_info(QCryptoBlock *block, + QCryptoBlockInfo *info, + Error **errp) +{ + QCryptoBlockLUKS *luks = block->opaque; + QCryptoBlockInfoLUKSSlot *slot; + QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots; + size_t i; + + info->u.luks.cipher_alg = luks->cipher_alg; + info->u.luks.cipher_mode = luks->cipher_mode; + info->u.luks.ivgen_alg = luks->ivgen_alg; + if (info->u.luks.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) { + info->u.luks.has_ivgen_hash_alg = true; + info->u.luks.ivgen_hash_alg = luks->ivgen_hash_alg; + } + info->u.luks.hash_alg = luks->hash_alg; + info->u.luks.payload_offset = block->payload_offset; + info->u.luks.master_key_iters = luks->header.master_key_iterations; + info->u.luks.uuid = g_strndup((const char *)luks->header.uuid, + sizeof(luks->header.uuid)); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1); + *prev = slots; + + slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1); + slot->active = luks->header.key_slots[i].active == + QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED; + slot->key_offset = luks->header.key_slots[i].key_offset_sector + * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + if (slot->active) { + slot->has_iters = true; + slot->iters = luks->header.key_slots[i].iterations; + slot->has_stripes = true; + slot->stripes = luks->header.key_slots[i].stripes; + } + + prev = &slots->next; + } + + return 0; +} + + +static void qcrypto_block_luks_cleanup(QCryptoBlock *block) +{ + QCryptoBlockLUKS *luks = block->opaque; + if (luks) { + g_free(luks->secret); + g_free(luks); + } +} + + +static int +qcrypto_block_luks_decrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + return qcrypto_block_decrypt_helper(block, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + offset, buf, len, errp); +} + + +static int +qcrypto_block_luks_encrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + return qcrypto_block_encrypt_helper(block, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, + offset, buf, len, errp); +} + + +const QCryptoBlockDriver qcrypto_block_driver_luks = { + .open = qcrypto_block_luks_open, + .create = qcrypto_block_luks_create, + .amend = qcrypto_block_luks_amend_options, + .get_info = qcrypto_block_luks_get_info, + .cleanup = qcrypto_block_luks_cleanup, + .decrypt = qcrypto_block_luks_decrypt, + .encrypt = qcrypto_block_luks_encrypt, + .has_format = qcrypto_block_luks_has_format, +}; diff --git a/crypto/block-luks.h b/crypto/block-luks.h new file mode 100644 index 000000000..7f094e7e9 --- /dev/null +++ b/crypto/block-luks.h @@ -0,0 +1,28 @@ +/* + * QEMU Crypto block device encryption LUKS format + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_BLOCK_LUKS_H +#define QCRYPTO_BLOCK_LUKS_H + +#include "blockpriv.h" + +extern const QCryptoBlockDriver qcrypto_block_driver_luks; + +#endif /* QCRYPTO_BLOCK_LUKS_H */ diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c new file mode 100644 index 000000000..4d7cf36a8 --- /dev/null +++ b/crypto/block-qcow.c @@ -0,0 +1,185 @@ +/* + * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +/* + * Note that the block encryption implemented in this file is broken + * by design. This exists only to allow data to be liberated from + * existing qcow[2] images and should not be used in any new areas. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "block-qcow.h" +#include "crypto/secret.h" + +#define QCRYPTO_BLOCK_QCOW_SECTOR_SIZE 512 + + +static bool +qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED, + size_t buf_size G_GNUC_UNUSED) +{ + return false; +} + + +static int +qcrypto_block_qcow_init(QCryptoBlock *block, + const char *keysecret, + size_t n_threads, + Error **errp) +{ + char *password; + int ret; + uint8_t keybuf[16]; + int len; + + memset(keybuf, 0, 16); + + password = qcrypto_secret_lookup_as_utf8(keysecret, errp); + if (!password) { + return -1; + } + + len = strlen(password); + memcpy(keybuf, password, MIN(len, sizeof(keybuf))); + g_free(password); + + block->niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC); + block->ivgen = qcrypto_ivgen_new(QCRYPTO_IVGEN_ALG_PLAIN64, + 0, 0, NULL, 0, errp); + if (!block->ivgen) { + ret = -ENOTSUP; + goto fail; + } + + ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC, + keybuf, G_N_ELEMENTS(keybuf), + n_threads, errp); + if (ret < 0) { + ret = -ENOTSUP; + goto fail; + } + + block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE; + block->payload_offset = 0; + + return 0; + + fail: + qcrypto_block_free_cipher(block); + qcrypto_ivgen_free(block->ivgen); + return ret; +} + + +static int +qcrypto_block_qcow_open(QCryptoBlock *block, + QCryptoBlockOpenOptions *options, + const char *optprefix, + QCryptoBlockReadFunc readfunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED, + unsigned int flags, + size_t n_threads, + Error **errp) +{ + if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) { + block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE; + block->payload_offset = 0; + return 0; + } else { + if (!options->u.qcow.key_secret) { + error_setg(errp, + "Parameter '%skey-secret' is required for cipher", + optprefix ? optprefix : ""); + return -1; + } + return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, + n_threads, errp); + } +} + + +static int +qcrypto_block_qcow_create(QCryptoBlock *block, + QCryptoBlockCreateOptions *options, + const char *optprefix, + QCryptoBlockInitFunc initfunc G_GNUC_UNUSED, + QCryptoBlockWriteFunc writefunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED, + Error **errp) +{ + if (!options->u.qcow.key_secret) { + error_setg(errp, "Parameter '%skey-secret' is required for cipher", + optprefix ? optprefix : ""); + return -1; + } + /* QCow2 has no special header, since everything is hardwired */ + return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp); +} + + +static void +qcrypto_block_qcow_cleanup(QCryptoBlock *block) +{ +} + + +static int +qcrypto_block_qcow_decrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + return qcrypto_block_decrypt_helper(block, + QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, + offset, buf, len, errp); +} + + +static int +qcrypto_block_qcow_encrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + return qcrypto_block_encrypt_helper(block, + QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, + offset, buf, len, errp); +} + + +const QCryptoBlockDriver qcrypto_block_driver_qcow = { + .open = qcrypto_block_qcow_open, + .create = qcrypto_block_qcow_create, + .cleanup = qcrypto_block_qcow_cleanup, + .decrypt = qcrypto_block_qcow_decrypt, + .encrypt = qcrypto_block_qcow_encrypt, + .has_format = qcrypto_block_qcow_has_format, +}; diff --git a/crypto/block-qcow.h b/crypto/block-qcow.h new file mode 100644 index 000000000..340dcfe46 --- /dev/null +++ b/crypto/block-qcow.h @@ -0,0 +1,28 @@ +/* + * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_BLOCK_QCOW_H +#define QCRYPTO_BLOCK_QCOW_H + +#include "blockpriv.h" + +extern const QCryptoBlockDriver qcrypto_block_driver_qcow; + +#endif /* QCRYPTO_BLOCK_QCOW_H */ diff --git a/crypto/block.c b/crypto/block.c new file mode 100644 index 000000000..eb057948b --- /dev/null +++ b/crypto/block.c @@ -0,0 +1,475 @@ +/* + * QEMU Crypto block device encryption + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "blockpriv.h" +#include "block-qcow.h" +#include "block-luks.h" + +static const QCryptoBlockDriver *qcrypto_block_drivers[] = { + [Q_CRYPTO_BLOCK_FORMAT_QCOW] = &qcrypto_block_driver_qcow, + [Q_CRYPTO_BLOCK_FORMAT_LUKS] = &qcrypto_block_driver_luks, +}; + + +bool qcrypto_block_has_format(QCryptoBlockFormat format, + const uint8_t *buf, + size_t len) +{ + const QCryptoBlockDriver *driver; + + if (format >= G_N_ELEMENTS(qcrypto_block_drivers) || + !qcrypto_block_drivers[format]) { + return false; + } + + driver = qcrypto_block_drivers[format]; + + return driver->has_format(buf, len); +} + + +QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + const char *optprefix, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, + size_t n_threads, + Error **errp) +{ + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || + !qcrypto_block_drivers[options->format]) { + error_setg(errp, "Unsupported block driver %s", + QCryptoBlockFormat_str(options->format)); + g_free(block); + return NULL; + } + + block->driver = qcrypto_block_drivers[options->format]; + + if (block->driver->open(block, options, optprefix, + readfunc, opaque, flags, n_threads, errp) < 0) + { + g_free(block); + return NULL; + } + + qemu_mutex_init(&block->mutex); + + return block; +} + + +QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + const char *optprefix, + QCryptoBlockInitFunc initfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp) +{ + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || + !qcrypto_block_drivers[options->format]) { + error_setg(errp, "Unsupported block driver %s", + QCryptoBlockFormat_str(options->format)); + g_free(block); + return NULL; + } + + block->driver = qcrypto_block_drivers[options->format]; + + if (block->driver->create(block, options, optprefix, initfunc, + writefunc, opaque, errp) < 0) { + g_free(block); + return NULL; + } + + qemu_mutex_init(&block->mutex); + + return block; +} + + +static ssize_t qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block, + size_t headerlen, void *opaque, Error **errp) +{ + size_t *headerlenp = opaque; + + /* Stash away the payload size */ + *headerlenp = headerlen; + return 0; +} + + +static ssize_t qcrypto_block_headerlen_hdr_write_func(QCryptoBlock *block, + size_t offset, const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) +{ + /* Discard the bytes, we're not actually writing to an image */ + return buflen; +} + + +bool +qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts, + const char *optprefix, + size_t *len, + Error **errp) +{ + /* Fake LUKS creation in order to determine the payload size */ + g_autoptr(QCryptoBlock) crypto = + qcrypto_block_create(create_opts, optprefix, + qcrypto_block_headerlen_hdr_init_func, + qcrypto_block_headerlen_hdr_write_func, + len, errp); + return crypto != NULL; +} + +int qcrypto_block_amend_options(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptions *options, + bool force, + Error **errp) +{ + if (options->format != block->format) { + error_setg(errp, + "Cannot amend encryption format"); + return -1; + } + + if (!block->driver->amend) { + error_setg(errp, + "Crypto format %s doesn't support format options amendment", + QCryptoBlockFormat_str(block->format)); + return -1; + } + + return block->driver->amend(block, + readfunc, + writefunc, + opaque, + options, + force, + errp); +} + +QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, + Error **errp) +{ + QCryptoBlockInfo *info = g_new0(QCryptoBlockInfo, 1); + + info->format = block->format; + + if (block->driver->get_info && + block->driver->get_info(block, info, errp) < 0) { + g_free(info); + return NULL; + } + + return info; +} + + +int qcrypto_block_decrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + return block->driver->decrypt(block, offset, buf, len, errp); +} + + +int qcrypto_block_encrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + return block->driver->encrypt(block, offset, buf, len, errp); +} + + +QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block) +{ + /* Ciphers should be accessed through pop/push method to be thread-safe. + * Better, they should not be accessed externally at all (note, that + * pop/push are static functions) + * This function is used only in test with one thread (it's safe to skip + * pop/push interface), so it's enough to assert it here: + */ + assert(block->n_ciphers <= 1); + return block->ciphers ? block->ciphers[0] : NULL; +} + + +static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block) +{ + QCryptoCipher *cipher; + + qemu_mutex_lock(&block->mutex); + + assert(block->n_free_ciphers > 0); + block->n_free_ciphers--; + cipher = block->ciphers[block->n_free_ciphers]; + + qemu_mutex_unlock(&block->mutex); + + return cipher; +} + + +static void qcrypto_block_push_cipher(QCryptoBlock *block, + QCryptoCipher *cipher) +{ + qemu_mutex_lock(&block->mutex); + + assert(block->n_free_ciphers < block->n_ciphers); + block->ciphers[block->n_free_ciphers] = cipher; + block->n_free_ciphers++; + + qemu_mutex_unlock(&block->mutex); +} + + +int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, + size_t n_threads, Error **errp) +{ + size_t i; + + assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers); + + block->ciphers = g_new0(QCryptoCipher *, n_threads); + + for (i = 0; i < n_threads; i++) { + block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp); + if (!block->ciphers[i]) { + qcrypto_block_free_cipher(block); + return -1; + } + block->n_ciphers++; + block->n_free_ciphers++; + } + + return 0; +} + + +void qcrypto_block_free_cipher(QCryptoBlock *block) +{ + size_t i; + + if (!block->ciphers) { + return; + } + + assert(block->n_ciphers == block->n_free_ciphers); + + for (i = 0; i < block->n_ciphers; i++) { + qcrypto_cipher_free(block->ciphers[i]); + } + + g_free(block->ciphers); + block->ciphers = NULL; + block->n_ciphers = block->n_free_ciphers = 0; +} + +QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) +{ + /* ivgen should be accessed under mutex. However, this function is used only + * in test with one thread, so it's enough to assert it here: + */ + assert(block->n_ciphers <= 1); + return block->ivgen; +} + + +QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block) +{ + return block->kdfhash; +} + + +uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block) +{ + return block->payload_offset; +} + + +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block) +{ + return block->sector_size; +} + + +void qcrypto_block_free(QCryptoBlock *block) +{ + if (!block) { + return; + } + + block->driver->cleanup(block); + + qcrypto_block_free_cipher(block); + qcrypto_ivgen_free(block->ivgen); + qemu_mutex_destroy(&block->mutex); + g_free(block); +} + + +typedef int (*QCryptoCipherEncDecFunc)(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp); + +static int do_qcrypto_block_cipher_encdec(QCryptoCipher *cipher, + size_t niv, + QCryptoIVGen *ivgen, + QemuMutex *ivgen_mutex, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + QCryptoCipherEncDecFunc func, + Error **errp) +{ + g_autofree uint8_t *iv = niv ? g_new0(uint8_t, niv) : NULL; + int ret = -1; + uint64_t startsector = offset / sectorsize; + + assert(QEMU_IS_ALIGNED(offset, sectorsize)); + assert(QEMU_IS_ALIGNED(len, sectorsize)); + + while (len > 0) { + size_t nbytes; + if (niv) { + if (ivgen_mutex) { + qemu_mutex_lock(ivgen_mutex); + } + ret = qcrypto_ivgen_calculate(ivgen, startsector, iv, niv, errp); + if (ivgen_mutex) { + qemu_mutex_unlock(ivgen_mutex); + } + + if (ret < 0) { + return -1; + } + + if (qcrypto_cipher_setiv(cipher, + iv, niv, + errp) < 0) { + return -1; + } + } + + nbytes = len > sectorsize ? sectorsize : len; + if (func(cipher, buf, buf, nbytes, errp) < 0) { + return -1; + } + + startsector++; + buf += nbytes; + len -= nbytes; + } + + return 0; +} + + +int qcrypto_block_cipher_decrypt_helper(QCryptoCipher *cipher, + size_t niv, + QCryptoIVGen *ivgen, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + return do_qcrypto_block_cipher_encdec(cipher, niv, ivgen, NULL, sectorsize, + offset, buf, len, + qcrypto_cipher_decrypt, errp); +} + + +int qcrypto_block_cipher_encrypt_helper(QCryptoCipher *cipher, + size_t niv, + QCryptoIVGen *ivgen, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + return do_qcrypto_block_cipher_encdec(cipher, niv, ivgen, NULL, sectorsize, + offset, buf, len, + qcrypto_cipher_encrypt, errp); +} + +int qcrypto_block_decrypt_helper(QCryptoBlock *block, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + int ret; + QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, + len, qcrypto_cipher_decrypt, errp); + + qcrypto_block_push_cipher(block, cipher); + + return ret; +} + +int qcrypto_block_encrypt_helper(QCryptoBlock *block, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp) +{ + int ret; + QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, + len, qcrypto_cipher_encrypt, errp); + + qcrypto_block_push_cipher(block, cipher); + + return ret; +} diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h new file mode 100644 index 000000000..3c7ccea50 --- /dev/null +++ b/crypto/blockpriv.h @@ -0,0 +1,135 @@ +/* + * QEMU Crypto block device encryption + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_BLOCKPRIV_H +#define QCRYPTO_BLOCKPRIV_H + +#include "crypto/block.h" +#include "qemu/thread.h" + +typedef struct QCryptoBlockDriver QCryptoBlockDriver; + +struct QCryptoBlock { + QCryptoBlockFormat format; + + const QCryptoBlockDriver *driver; + void *opaque; + + QCryptoCipher **ciphers; + size_t n_ciphers; + size_t n_free_ciphers; + QCryptoIVGen *ivgen; + QemuMutex mutex; + + QCryptoHashAlgorithm kdfhash; + size_t niv; + uint64_t payload_offset; /* In bytes */ + uint64_t sector_size; /* In bytes */ +}; + +struct QCryptoBlockDriver { + int (*open)(QCryptoBlock *block, + QCryptoBlockOpenOptions *options, + const char *optprefix, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, + size_t n_threads, + Error **errp); + + int (*create)(QCryptoBlock *block, + QCryptoBlockCreateOptions *options, + const char *optprefix, + QCryptoBlockInitFunc initfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp); + + int (*amend)(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptions *options, + bool force, + Error **errp); + + int (*get_info)(QCryptoBlock *block, + QCryptoBlockInfo *info, + Error **errp); + + void (*cleanup)(QCryptoBlock *block); + + int (*encrypt)(QCryptoBlock *block, + uint64_t startsector, + uint8_t *buf, + size_t len, + Error **errp); + int (*decrypt)(QCryptoBlock *block, + uint64_t startsector, + uint8_t *buf, + size_t len, + Error **errp); + + bool (*has_format)(const uint8_t *buf, + size_t buflen); +}; + + +int qcrypto_block_cipher_decrypt_helper(QCryptoCipher *cipher, + size_t niv, + QCryptoIVGen *ivgen, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +int qcrypto_block_cipher_encrypt_helper(QCryptoCipher *cipher, + size_t niv, + QCryptoIVGen *ivgen, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +int qcrypto_block_decrypt_helper(QCryptoBlock *block, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +int qcrypto_block_encrypt_helper(QCryptoBlock *block, + int sectorsize, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, + size_t n_threads, Error **errp); + +void qcrypto_block_free_cipher(QCryptoBlock *block); + +#endif /* QCRYPTO_BLOCKPRIV_H */ diff --git a/crypto/cipher-afalg.c b/crypto/cipher-afalg.c new file mode 100644 index 000000000..052355a8a --- /dev/null +++ b/crypto/cipher-afalg.c @@ -0,0 +1,233 @@ +/* + * QEMU Crypto af_alg-backend cipher support + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/sockets.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "crypto/cipher.h" +#include "cipherpriv.h" + + +static char * +qcrypto_afalg_cipher_format_name(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + Error **errp) +{ + char *name; + const char *alg_name; + const char *mode_name; + + switch (alg) { + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + alg_name = "aes"; + break; + case QCRYPTO_CIPHER_ALG_CAST5_128: + alg_name = "cast5"; + break; + case QCRYPTO_CIPHER_ALG_SERPENT_128: + case QCRYPTO_CIPHER_ALG_SERPENT_192: + case QCRYPTO_CIPHER_ALG_SERPENT_256: + alg_name = "serpent"; + break; + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_192: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + alg_name = "twofish"; + break; + + default: + error_setg(errp, "Unsupported cipher algorithm %d", alg); + return NULL; + } + + mode_name = QCryptoCipherMode_str(mode); + name = g_strdup_printf("%s(%s)", mode_name, alg_name); + + return name; +} + +static const struct QCryptoCipherDriver qcrypto_cipher_afalg_driver; + +QCryptoCipher * +qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, Error **errp) +{ + QCryptoAFAlg *afalg; + size_t expect_niv; + char *name; + + name = qcrypto_afalg_cipher_format_name(alg, mode, errp); + if (!name) { + return NULL; + } + + afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_CIPHER, name, errp); + if (!afalg) { + g_free(name); + return NULL; + } + + g_free(name); + + /* setkey */ + if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, key, + nkey) != 0) { + error_setg_errno(errp, errno, "Set key failed"); + qcrypto_afalg_comm_free(afalg); + return NULL; + } + + /* prepare msg header */ + afalg->msg = g_new0(struct msghdr, 1); + afalg->msg->msg_controllen += CMSG_SPACE(ALG_OPTYPE_LEN); + expect_niv = qcrypto_cipher_get_iv_len(alg, mode); + if (expect_niv) { + afalg->msg->msg_controllen += CMSG_SPACE(ALG_MSGIV_LEN(expect_niv)); + } + afalg->msg->msg_control = g_new0(uint8_t, afalg->msg->msg_controllen); + + /* We use 1st msghdr for crypto-info and 2nd msghdr for IV-info */ + afalg->cmsg = CMSG_FIRSTHDR(afalg->msg); + afalg->cmsg->cmsg_type = ALG_SET_OP; + afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_OPTYPE_LEN); + if (expect_niv) { + afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg); + afalg->cmsg->cmsg_type = ALG_SET_IV; + afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_MSGIV_LEN(expect_niv)); + } + afalg->cmsg = CMSG_FIRSTHDR(afalg->msg); + + afalg->base.driver = &qcrypto_cipher_afalg_driver; + return &afalg->base; +} + +static int +qcrypto_afalg_cipher_setiv(QCryptoCipher *cipher, + const uint8_t *iv, + size_t niv, Error **errp) +{ + QCryptoAFAlg *afalg = container_of(cipher, QCryptoAFAlg, base); + struct af_alg_iv *alg_iv; + size_t expect_niv; + + expect_niv = qcrypto_cipher_get_iv_len(cipher->alg, cipher->mode); + if (niv != expect_niv) { + error_setg(errp, "Set IV len(%zu) not match expected(%zu)", + niv, expect_niv); + return -1; + } + + /* move ->cmsg to next msghdr, for IV-info */ + afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg); + + /* build setiv msg */ + afalg->cmsg->cmsg_level = SOL_ALG; + alg_iv = (struct af_alg_iv *)CMSG_DATA(afalg->cmsg); + alg_iv->ivlen = niv; + memcpy(alg_iv->iv, iv, niv); + + return 0; +} + +static int +qcrypto_afalg_cipher_op(QCryptoAFAlg *afalg, + const void *in, void *out, + size_t len, bool do_encrypt, + Error **errp) +{ + uint32_t *type = NULL; + struct iovec iov; + size_t ret, rlen, done = 0; + uint32_t origin_controllen; + + origin_controllen = afalg->msg->msg_controllen; + /* movev ->cmsg to first header, for crypto-info */ + afalg->cmsg = CMSG_FIRSTHDR(afalg->msg); + + /* build encrypt msg */ + afalg->cmsg->cmsg_level = SOL_ALG; + afalg->msg->msg_iov = &iov; + afalg->msg->msg_iovlen = 1; + type = (uint32_t *)CMSG_DATA(afalg->cmsg); + if (do_encrypt) { + *type = ALG_OP_ENCRYPT; + } else { + *type = ALG_OP_DECRYPT; + } + + do { + iov.iov_base = (void *)in + done; + iov.iov_len = len - done; + + /* send info to AF_ALG core */ + ret = sendmsg(afalg->opfd, afalg->msg, 0); + if (ret == -1) { + error_setg_errno(errp, errno, "Send data to AF_ALG core failed"); + return -1; + } + + /* encrypto && get result */ + rlen = read(afalg->opfd, out, ret); + if (rlen == -1) { + error_setg_errno(errp, errno, "Get result from AF_ALG core failed"); + return -1; + } + assert(rlen == ret); + + /* do not update IV for following chunks */ + afalg->msg->msg_controllen = 0; + done += ret; + } while (done < len); + + afalg->msg->msg_controllen = origin_controllen; + + return 0; +} + +static int +qcrypto_afalg_cipher_encrypt(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoAFAlg *afalg = container_of(cipher, QCryptoAFAlg, base); + + return qcrypto_afalg_cipher_op(afalg, in, out, len, true, errp); +} + +static int +qcrypto_afalg_cipher_decrypt(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoAFAlg *afalg = container_of(cipher, QCryptoAFAlg, base); + + return qcrypto_afalg_cipher_op(afalg, in, out, len, false, errp); +} + +static void qcrypto_afalg_comm_ctx_free(QCryptoCipher *cipher) +{ + QCryptoAFAlg *afalg = container_of(cipher, QCryptoAFAlg, base); + + qcrypto_afalg_comm_free(afalg); +} + +static const struct QCryptoCipherDriver qcrypto_cipher_afalg_driver = { + .cipher_encrypt = qcrypto_afalg_cipher_encrypt, + .cipher_decrypt = qcrypto_afalg_cipher_decrypt, + .cipher_setiv = qcrypto_afalg_cipher_setiv, + .cipher_free = qcrypto_afalg_comm_ctx_free, +}; diff --git a/crypto/cipher-builtin.c.inc b/crypto/cipher-builtin.c.inc new file mode 100644 index 000000000..7597cf4a1 --- /dev/null +++ b/crypto/cipher-builtin.c.inc @@ -0,0 +1,435 @@ +/* + * QEMU Crypto cipher built-in algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "crypto/aes.h" +#include "crypto/desrfb.h" +#include "crypto/xts.h" + +typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext; +struct QCryptoCipherBuiltinAESContext { + AES_KEY enc; + AES_KEY dec; +}; + +typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES; +struct QCryptoCipherBuiltinAES { + QCryptoCipher base; + QCryptoCipherBuiltinAESContext key; + QCryptoCipherBuiltinAESContext key_tweak; + uint8_t iv[AES_BLOCK_SIZE]; +}; + + +static inline bool qcrypto_length_check(size_t len, size_t blocksize, + Error **errp) +{ + if (unlikely(len & (blocksize - 1))) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, blocksize); + return false; + } + return true; +} + +static void qcrypto_cipher_ctx_free(QCryptoCipher *cipher) +{ + g_free(cipher); +} + +static int qcrypto_cipher_no_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + error_setg(errp, "Setting IV is not supported"); + return -1; +} + +static void do_aes_encrypt_ecb(const void *vctx, + size_t len, + uint8_t *out, + const uint8_t *in) +{ + const QCryptoCipherBuiltinAESContext *ctx = vctx; + + /* We have already verified that len % AES_BLOCK_SIZE == 0. */ + while (len) { + AES_encrypt(in, out, &ctx->enc); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } +} + +static void do_aes_decrypt_ecb(const void *vctx, + size_t len, + uint8_t *out, + const uint8_t *in) +{ + const QCryptoCipherBuiltinAESContext *ctx = vctx; + + /* We have already verified that len % AES_BLOCK_SIZE == 0. */ + while (len) { + AES_decrypt(in, out, &ctx->dec); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } +} + +static void do_aes_encrypt_cbc(const AES_KEY *key, + size_t len, + uint8_t *out, + const uint8_t *in, + uint8_t *ivec) +{ + uint8_t tmp[AES_BLOCK_SIZE]; + size_t n; + + /* We have already verified that len % AES_BLOCK_SIZE == 0. */ + while (len) { + for (n = 0; n < AES_BLOCK_SIZE; ++n) { + tmp[n] = in[n] ^ ivec[n]; + } + AES_encrypt(tmp, out, key); + memcpy(ivec, out, AES_BLOCK_SIZE); + len -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } +} + +static void do_aes_decrypt_cbc(const AES_KEY *key, + size_t len, + uint8_t *out, + const uint8_t *in, + uint8_t *ivec) +{ + uint8_t tmp[AES_BLOCK_SIZE]; + size_t n; + + /* We have already verified that len % AES_BLOCK_SIZE == 0. */ + while (len) { + memcpy(tmp, in, AES_BLOCK_SIZE); + AES_decrypt(in, out, key); + for (n = 0; n < AES_BLOCK_SIZE; ++n) { + out[n] ^= ivec[n]; + } + memcpy(ivec, tmp, AES_BLOCK_SIZE); + len -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } +} + +static int qcrypto_cipher_aes_encrypt_ecb(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + do_aes_encrypt_ecb(&ctx->key, len, out, in); + return 0; +} + +static int qcrypto_cipher_aes_decrypt_ecb(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + do_aes_decrypt_ecb(&ctx->key, len, out, in); + return 0; +} + +static int qcrypto_cipher_aes_encrypt_cbc(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + do_aes_encrypt_cbc(&ctx->key.enc, len, out, in, ctx->iv); + return 0; +} + +static int qcrypto_cipher_aes_decrypt_cbc(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + do_aes_decrypt_cbc(&ctx->key.dec, len, out, in, ctx->iv); + return 0; +} + +static int qcrypto_cipher_aes_encrypt_xts(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + xts_encrypt(&ctx->key, &ctx->key_tweak, + do_aes_encrypt_ecb, do_aes_decrypt_ecb, + ctx->iv, len, out, in); + return 0; +} + +static int qcrypto_cipher_aes_decrypt_xts(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { + return -1; + } + xts_decrypt(&ctx->key, &ctx->key_tweak, + do_aes_encrypt_ecb, do_aes_decrypt_ecb, + ctx->iv, len, out, in); + return 0; +} + + +static int qcrypto_cipher_aes_setiv(QCryptoCipher *cipher, const uint8_t *iv, + size_t niv, Error **errp) +{ + QCryptoCipherBuiltinAES *ctx + = container_of(cipher, QCryptoCipherBuiltinAES, base); + + if (niv != AES_BLOCK_SIZE) { + error_setg(errp, "IV must be %d bytes not %zu", + AES_BLOCK_SIZE, niv); + return -1; + } + + memcpy(ctx->iv, iv, AES_BLOCK_SIZE); + return 0; +} + +static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_ecb = { + .cipher_encrypt = qcrypto_cipher_aes_encrypt_ecb, + .cipher_decrypt = qcrypto_cipher_aes_decrypt_ecb, + .cipher_setiv = qcrypto_cipher_no_setiv, + .cipher_free = qcrypto_cipher_ctx_free, +}; + +static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_cbc = { + .cipher_encrypt = qcrypto_cipher_aes_encrypt_cbc, + .cipher_decrypt = qcrypto_cipher_aes_decrypt_cbc, + .cipher_setiv = qcrypto_cipher_aes_setiv, + .cipher_free = qcrypto_cipher_ctx_free, +}; + +static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_xts = { + .cipher_encrypt = qcrypto_cipher_aes_encrypt_xts, + .cipher_decrypt = qcrypto_cipher_aes_decrypt_xts, + .cipher_setiv = qcrypto_cipher_aes_setiv, + .cipher_free = qcrypto_cipher_ctx_free, +}; + + +typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB; +struct QCryptoCipherBuiltinDESRFB { + QCryptoCipher base; + + /* C.f. alg_key_len[QCRYPTO_CIPHER_ALG_DES_RFB] */ + uint8_t key[8]; +}; + +static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinDESRFB *ctx + = container_of(cipher, QCryptoCipherBuiltinDESRFB, base); + size_t i; + + if (!qcrypto_length_check(len, 8, errp)) { + return -1; + } + + deskey(ctx->key, EN0); + + for (i = 0; i < len; i += 8) { + des((void *)in + i, out + i); + } + + return 0; +} + +static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher, + const void *in, void *out, + size_t len, Error **errp) +{ + QCryptoCipherBuiltinDESRFB *ctx + = container_of(cipher, QCryptoCipherBuiltinDESRFB, base); + size_t i; + + if (!qcrypto_length_check(len, 8, errp)) { + return -1; + } + + deskey(ctx->key, DE1); + + for (i = 0; i < len; i += 8) { + des((void *)in + i, out + i); + } + + return 0; +} + +static const struct QCryptoCipherDriver qcrypto_cipher_des_rfb_driver = { + .cipher_encrypt = qcrypto_cipher_encrypt_des_rfb, + .cipher_decrypt = qcrypto_cipher_decrypt_des_rfb, + .cipher_setiv = qcrypto_cipher_no_setiv, + .cipher_free = qcrypto_cipher_ctx_free, +}; + +bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +{ + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + return mode == QCRYPTO_CIPHER_MODE_ECB; + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + case QCRYPTO_CIPHER_MODE_XTS: + return true; + default: + return false; + } + break; + default: + return false; + } +} + +static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, + Error **errp) +{ + if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) { + return NULL; + } + + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + if (mode == QCRYPTO_CIPHER_MODE_ECB) { + QCryptoCipherBuiltinDESRFB *ctx; + + ctx = g_new0(QCryptoCipherBuiltinDESRFB, 1); + ctx->base.driver = &qcrypto_cipher_des_rfb_driver; + memcpy(ctx->key, key, sizeof(ctx->key)); + + return &ctx->base; + } + goto bad_mode; + + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + { + QCryptoCipherBuiltinAES *ctx; + const QCryptoCipherDriver *drv; + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + drv = &qcrypto_cipher_aes_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + drv = &qcrypto_cipher_aes_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_XTS: + drv = &qcrypto_cipher_aes_driver_xts; + break; + default: + goto bad_mode; + } + + ctx = g_new0(QCryptoCipherBuiltinAES, 1); + ctx->base.driver = drv; + + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + nkey /= 2; + if (AES_set_encrypt_key(key + nkey, nkey * 8, + &ctx->key_tweak.enc)) { + error_setg(errp, "Failed to set encryption key"); + goto error; + } + if (AES_set_decrypt_key(key + nkey, nkey * 8, + &ctx->key_tweak.dec)) { + error_setg(errp, "Failed to set decryption key"); + goto error; + } + } + if (AES_set_encrypt_key(key, nkey * 8, &ctx->key.enc)) { + error_setg(errp, "Failed to set encryption key"); + goto error; + } + if (AES_set_decrypt_key(key, nkey * 8, &ctx->key.dec)) { + error_setg(errp, "Failed to set decryption key"); + goto error; + } + + return &ctx->base; + + error: + g_free(ctx); + return NULL; + } + + default: + error_setg(errp, + "Unsupported cipher algorithm %s", + QCryptoCipherAlgorithm_str(alg)); + return NULL; + } + + bad_mode: + error_setg(errp, "Unsupported cipher mode %s", + QCryptoCipherMode_str(mode)); + return NULL; +} diff --git a/crypto/cipher-gcrypt.c.inc b/crypto/cipher-gcrypt.c.inc new file mode 100644 index 000000000..42d413753 --- /dev/null +++ b/crypto/cipher-gcrypt.c.inc @@ -0,0 +1,409 @@ +/* + * QEMU Crypto cipher libgcrypt algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifdef CONFIG_QEMU_PRIVATE_XTS +#include "crypto/xts.h" +#endif + +#include + +bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +{ + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_3DES: + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + case QCRYPTO_CIPHER_ALG_CAST5_128: + case QCRYPTO_CIPHER_ALG_SERPENT_128: + case QCRYPTO_CIPHER_ALG_SERPENT_192: + case QCRYPTO_CIPHER_ALG_SERPENT_256: + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + break; + default: + return false; + } + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + case QCRYPTO_CIPHER_MODE_XTS: + case QCRYPTO_CIPHER_MODE_CTR: + return true; + default: + return false; + } +} + +typedef struct QCryptoCipherGcrypt { + QCryptoCipher base; + gcry_cipher_hd_t handle; + size_t blocksize; +#ifdef CONFIG_QEMU_PRIVATE_XTS + gcry_cipher_hd_t tweakhandle; + uint8_t iv[XTS_BLOCK_SIZE]; +#endif +} QCryptoCipherGcrypt; + + +static void qcrypto_gcrypt_ctx_free(QCryptoCipher *cipher) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + + gcry_cipher_close(ctx->handle); + g_free(ctx); +} + +static int qcrypto_gcrypt_encrypt(QCryptoCipher *cipher, const void *in, + void *out, size_t len, Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + gcry_error_t err; + + if (len & (ctx->blocksize - 1)) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + err = gcry_cipher_encrypt(ctx->handle, out, len, in, len); + if (err != 0) { + error_setg(errp, "Cannot encrypt data: %s", gcry_strerror(err)); + return -1; + } + + return 0; +} + + +static int qcrypto_gcrypt_decrypt(QCryptoCipher *cipher, const void *in, + void *out, size_t len, Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + gcry_error_t err; + + if (len & (ctx->blocksize - 1)) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + err = gcry_cipher_decrypt(ctx->handle, out, len, in, len); + if (err != 0) { + error_setg(errp, "Cannot decrypt data: %s", + gcry_strerror(err)); + return -1; + } + + return 0; +} + +static int qcrypto_gcrypt_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + gcry_error_t err; + + if (niv != ctx->blocksize) { + error_setg(errp, "Expected IV size %zu not %zu", + ctx->blocksize, niv); + return -1; + } + + gcry_cipher_reset(ctx->handle); + err = gcry_cipher_setiv(ctx->handle, iv, niv); + if (err != 0) { + error_setg(errp, "Cannot set IV: %s", gcry_strerror(err)); + return -1; + } + + return 0; +} + +static int qcrypto_gcrypt_ctr_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + gcry_error_t err; + + if (niv != ctx->blocksize) { + error_setg(errp, "Expected IV size %zu not %zu", + ctx->blocksize, niv); + return -1; + } + + err = gcry_cipher_setctr(ctx->handle, iv, niv); + if (err != 0) { + error_setg(errp, "Cannot set Counter: %s", gcry_strerror(err)); + return -1; + } + + return 0; +} + + +static const struct QCryptoCipherDriver qcrypto_gcrypt_driver = { + .cipher_encrypt = qcrypto_gcrypt_encrypt, + .cipher_decrypt = qcrypto_gcrypt_decrypt, + .cipher_setiv = qcrypto_gcrypt_setiv, + .cipher_free = qcrypto_gcrypt_ctx_free, +}; + +static const struct QCryptoCipherDriver qcrypto_gcrypt_ctr_driver = { + .cipher_encrypt = qcrypto_gcrypt_encrypt, + .cipher_decrypt = qcrypto_gcrypt_decrypt, + .cipher_setiv = qcrypto_gcrypt_ctr_setiv, + .cipher_free = qcrypto_gcrypt_ctx_free, +}; + +#ifdef CONFIG_QEMU_PRIVATE_XTS +static void qcrypto_gcrypt_xts_ctx_free(QCryptoCipher *cipher) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + + gcry_cipher_close(ctx->tweakhandle); + qcrypto_gcrypt_ctx_free(cipher); +} + +static void qcrypto_gcrypt_xts_wrape(const void *ctx, size_t length, + uint8_t *dst, const uint8_t *src) +{ + gcry_error_t err; + err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); + g_assert(err == 0); +} + +static void qcrypto_gcrypt_xts_wrapd(const void *ctx, size_t length, + uint8_t *dst, const uint8_t *src) +{ + gcry_error_t err; + err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); + g_assert(err == 0); +} + +static int qcrypto_gcrypt_xts_encrypt(QCryptoCipher *cipher, const void *in, + void *out, size_t len, Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + + if (len & (ctx->blocksize - 1)) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + xts_encrypt(ctx->handle, ctx->tweakhandle, + qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd, + ctx->iv, len, out, in); + return 0; +} + +static int qcrypto_gcrypt_xts_decrypt(QCryptoCipher *cipher, const void *in, + void *out, size_t len, Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + + if (len & (ctx->blocksize - 1)) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + xts_decrypt(ctx->handle, ctx->tweakhandle, + qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd, + ctx->iv, len, out, in); + return 0; +} + +static int qcrypto_gcrypt_xts_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); + + if (niv != ctx->blocksize) { + error_setg(errp, "Expected IV size %zu not %zu", + ctx->blocksize, niv); + return -1; + } + + memcpy(ctx->iv, iv, niv); + return 0; +} + +static const struct QCryptoCipherDriver qcrypto_gcrypt_xts_driver = { + .cipher_encrypt = qcrypto_gcrypt_xts_encrypt, + .cipher_decrypt = qcrypto_gcrypt_xts_decrypt, + .cipher_setiv = qcrypto_gcrypt_xts_setiv, + .cipher_free = qcrypto_gcrypt_xts_ctx_free, +}; +#endif /* CONFIG_QEMU_PRIVATE_XTS */ + + +static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, + Error **errp) +{ + QCryptoCipherGcrypt *ctx; + const QCryptoCipherDriver *drv; + gcry_error_t err; + int gcryalg, gcrymode; + + if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) { + return NULL; + } + + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + gcryalg = GCRY_CIPHER_DES; + break; + case QCRYPTO_CIPHER_ALG_3DES: + gcryalg = GCRY_CIPHER_3DES; + break; + case QCRYPTO_CIPHER_ALG_AES_128: + gcryalg = GCRY_CIPHER_AES128; + break; + case QCRYPTO_CIPHER_ALG_AES_192: + gcryalg = GCRY_CIPHER_AES192; + break; + case QCRYPTO_CIPHER_ALG_AES_256: + gcryalg = GCRY_CIPHER_AES256; + break; + case QCRYPTO_CIPHER_ALG_CAST5_128: + gcryalg = GCRY_CIPHER_CAST5; + break; + case QCRYPTO_CIPHER_ALG_SERPENT_128: + gcryalg = GCRY_CIPHER_SERPENT128; + break; + case QCRYPTO_CIPHER_ALG_SERPENT_192: + gcryalg = GCRY_CIPHER_SERPENT192; + break; + case QCRYPTO_CIPHER_ALG_SERPENT_256: + gcryalg = GCRY_CIPHER_SERPENT256; + break; + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + gcryalg = GCRY_CIPHER_TWOFISH128; + break; + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + gcryalg = GCRY_CIPHER_TWOFISH; + break; + default: + error_setg(errp, "Unsupported cipher algorithm %s", + QCryptoCipherAlgorithm_str(alg)); + return NULL; + } + + drv = &qcrypto_gcrypt_driver; + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + gcrymode = GCRY_CIPHER_MODE_ECB; + break; + case QCRYPTO_CIPHER_MODE_XTS: +#ifdef CONFIG_QEMU_PRIVATE_XTS + drv = &qcrypto_gcrypt_xts_driver; + gcrymode = GCRY_CIPHER_MODE_ECB; +#else + gcrymode = GCRY_CIPHER_MODE_XTS; +#endif + break; + case QCRYPTO_CIPHER_MODE_CBC: + gcrymode = GCRY_CIPHER_MODE_CBC; + break; + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_gcrypt_ctr_driver; + gcrymode = GCRY_CIPHER_MODE_CTR; + break; + default: + error_setg(errp, "Unsupported cipher mode %s", + QCryptoCipherMode_str(mode)); + return NULL; + } + + ctx = g_new0(QCryptoCipherGcrypt, 1); + ctx->base.driver = drv; + + err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0); + if (err != 0) { + error_setg(errp, "Cannot initialize cipher: %s", + gcry_strerror(err)); + goto error; + } + ctx->blocksize = gcry_cipher_get_algo_blklen(gcryalg); + +#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + if (ctx->blocksize != XTS_BLOCK_SIZE) { + error_setg(errp, + "Cipher block size %zu must equal XTS block size %d", + ctx->blocksize, XTS_BLOCK_SIZE); + goto error; + } + err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0); + if (err != 0) { + error_setg(errp, "Cannot initialize cipher: %s", + gcry_strerror(err)); + goto error; + } + } +#endif + + if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) { + /* We're using standard DES cipher from gcrypt, so we need + * to munge the key so that the results are the same as the + * bizarre RFB variant of DES :-) + */ + uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey); + err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey); + g_free(rfbkey); + } else { +#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + nkey /= 2; + err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey); + if (err != 0) { + error_setg(errp, "Cannot set key: %s", gcry_strerror(err)); + goto error; + } + } +#endif + err = gcry_cipher_setkey(ctx->handle, key, nkey); + } + if (err != 0) { + error_setg(errp, "Cannot set key: %s", gcry_strerror(err)); + goto error; + } + + return &ctx->base; + + error: +#ifdef CONFIG_QEMU_PRIVATE_XTS + gcry_cipher_close(ctx->tweakhandle); +#endif + gcry_cipher_close(ctx->handle); + g_free(ctx); + return NULL; +} diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc new file mode 100644 index 000000000..cac771e4f --- /dev/null +++ b/crypto/cipher-nettle.c.inc @@ -0,0 +1,760 @@ +/* + * QEMU Crypto cipher nettle algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifdef CONFIG_QEMU_PRIVATE_XTS +#include "crypto/xts.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef CONFIG_QEMU_PRIVATE_XTS +#include +#endif + +typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx, + size_t length, + uint8_t *dst, + const uint8_t *src); + +#if CONFIG_NETTLE_VERSION_MAJOR < 3 +typedef nettle_crypt_func * QCryptoCipherNettleFuncNative; +typedef void * cipher_ctx_t; +typedef unsigned cipher_length_t; +#define CONST_CTX + +#define cast5_set_key cast128_set_key + +#define aes128_ctx aes_ctx +#define aes192_ctx aes_ctx +#define aes256_ctx aes_ctx +#define aes128_set_encrypt_key(c, k) \ + aes_set_encrypt_key(c, 16, k) +#define aes192_set_encrypt_key(c, k) \ + aes_set_encrypt_key(c, 24, k) +#define aes256_set_encrypt_key(c, k) \ + aes_set_encrypt_key(c, 32, k) +#define aes128_set_decrypt_key(c, k) \ + aes_set_decrypt_key(c, 16, k) +#define aes192_set_decrypt_key(c, k) \ + aes_set_decrypt_key(c, 24, k) +#define aes256_set_decrypt_key(c, k) \ + aes_set_decrypt_key(c, 32, k) +#define aes128_encrypt aes_encrypt +#define aes192_encrypt aes_encrypt +#define aes256_encrypt aes_encrypt +#define aes128_decrypt aes_decrypt +#define aes192_decrypt aes_decrypt +#define aes256_decrypt aes_decrypt +#else +typedef nettle_cipher_func * QCryptoCipherNettleFuncNative; +typedef const void * cipher_ctx_t; +typedef size_t cipher_length_t; +#define CONST_CTX const +#endif + +static inline bool qcrypto_length_check(size_t len, size_t blocksize, + Error **errp) +{ + if (unlikely(len & (blocksize - 1))) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, blocksize); + return false; + } + return true; +} + + +static void qcrypto_cipher_ctx_free(QCryptoCipher *ctx) +{ + g_free(ctx); +} + +static int qcrypto_cipher_no_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + error_setg(errp, "Setting IV is not supported"); + return -1; +} + + +#define DEFINE_SETIV(NAME, TYPE, BLEN) \ +static int NAME##_setiv(QCryptoCipher *cipher, const uint8_t *iv, \ + size_t niv, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (niv != BLEN) { \ + error_setg(errp, "Expected IV size %d not %zu", BLEN, niv); \ + return -1; \ + } \ + memcpy(ctx->iv, iv, niv); \ + return 0; \ +} + + +#define DEFINE_ECB(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ +static int NAME##_encrypt_ecb(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + ENCRYPT(&ctx->key, len, out, in); \ + return 0; \ +} \ +static int NAME##_decrypt_ecb(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + DECRYPT(&ctx->key, len, out, in); \ + return 0; \ +} \ +static const struct QCryptoCipherDriver NAME##_driver_ecb = { \ + .cipher_encrypt = NAME##_encrypt_ecb, \ + .cipher_decrypt = NAME##_decrypt_ecb, \ + .cipher_setiv = qcrypto_cipher_no_setiv, \ + .cipher_free = qcrypto_cipher_ctx_free, \ +}; + + +#define DEFINE_CBC(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ +static int NAME##_encrypt_cbc(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + cbc_encrypt(&ctx->key, ENCRYPT, BLEN, ctx->iv, len, out, in); \ + return 0; \ +} \ +static int NAME##_decrypt_cbc(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + cbc_decrypt(&ctx->key, DECRYPT, BLEN, ctx->iv, len, out, in); \ + return 0; \ +} \ +static const struct QCryptoCipherDriver NAME##_driver_cbc = { \ + .cipher_encrypt = NAME##_encrypt_cbc, \ + .cipher_decrypt = NAME##_decrypt_cbc, \ + .cipher_setiv = NAME##_setiv, \ + .cipher_free = qcrypto_cipher_ctx_free, \ +}; + + +#define DEFINE_CTR(NAME, TYPE, BLEN, ENCRYPT) \ +static int NAME##_encrypt_ctr(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + ctr_crypt(&ctx->key, ENCRYPT, BLEN, ctx->iv, len, out, in); \ + return 0; \ +} \ +static const struct QCryptoCipherDriver NAME##_driver_ctr = { \ + .cipher_encrypt = NAME##_encrypt_ctr, \ + .cipher_decrypt = NAME##_encrypt_ctr, \ + .cipher_setiv = NAME##_setiv, \ + .cipher_free = qcrypto_cipher_ctx_free, \ +}; + + +#ifdef CONFIG_QEMU_PRIVATE_XTS +#define DEFINE__XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ +static void NAME##_xts_wrape(const void *ctx, size_t length, \ + uint8_t *dst, const uint8_t *src) \ +{ \ + ENCRYPT((cipher_ctx_t)ctx, length, dst, src); \ +} \ +static void NAME##_xts_wrapd(const void *ctx, size_t length, \ + uint8_t *dst, const uint8_t *src) \ +{ \ + DECRYPT((cipher_ctx_t)ctx, length, dst, src); \ +} \ +static int NAME##_encrypt_xts(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + xts_encrypt(&ctx->key, &ctx->key_xts, \ + NAME##_xts_wrape, NAME##_xts_wrapd, \ + ctx->iv, len, out, in); \ + return 0; \ +} \ +static int NAME##_decrypt_xts(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + xts_decrypt(&ctx->key, &ctx->key_xts, \ + NAME##_xts_wrape, NAME##_xts_wrapd, \ + ctx->iv, len, out, in); \ + return 0; \ +} +#else +#define DEFINE__XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ +static int NAME##_encrypt_xts(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + xts_encrypt_message(&ctx->key, &ctx->key_xts, ENCRYPT, \ + ctx->iv, len, out, in); \ + return 0; \ +} \ +static int NAME##_decrypt_xts(QCryptoCipher *cipher, const void *in, \ + void *out, size_t len, Error **errp) \ +{ \ + TYPE *ctx = container_of(cipher, TYPE, base); \ + if (!qcrypto_length_check(len, BLEN, errp)) { \ + return -1; \ + } \ + xts_decrypt_message(&ctx->key, &ctx->key_xts, DECRYPT, ENCRYPT, \ + ctx->iv, len, out, in); \ + return 0; \ +} +#endif + +#define DEFINE_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + QEMU_BUILD_BUG_ON(BLEN != XTS_BLOCK_SIZE); \ + DEFINE__XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ +static const struct QCryptoCipherDriver NAME##_driver_xts = { \ + .cipher_encrypt = NAME##_encrypt_xts, \ + .cipher_decrypt = NAME##_decrypt_xts, \ + .cipher_setiv = NAME##_setiv, \ + .cipher_free = qcrypto_cipher_ctx_free, \ +}; + + +#define DEFINE_ECB_CBC_CTR(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + DEFINE_SETIV(NAME, TYPE, BLEN) \ + DEFINE_ECB(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + DEFINE_CBC(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + DEFINE_CTR(NAME, TYPE, BLEN, ENCRYPT) + +#define DEFINE_ECB_CBC_CTR_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + DEFINE_ECB_CBC_CTR(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) \ + DEFINE_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) + + +typedef struct QCryptoNettleDESRFB { + QCryptoCipher base; + struct des_ctx key; + uint8_t iv[DES_BLOCK_SIZE]; +} QCryptoNettleDESRFB; + +static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + des_encrypt(ctx, length, dst, src); +} + +static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + des_decrypt(ctx, length, dst, src); +} + +DEFINE_ECB_CBC_CTR(qcrypto_nettle_des_rfb, QCryptoNettleDESRFB, + DES_BLOCK_SIZE, des_encrypt_native, des_decrypt_native) + + +typedef struct QCryptoNettleDES3 { + QCryptoCipher base; + struct des3_ctx key; + uint8_t iv[DES3_BLOCK_SIZE]; +} QCryptoNettleDES3; + +static void des3_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + des3_encrypt(ctx, length, dst, src); +} + +static void des3_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + des3_decrypt(ctx, length, dst, src); +} + +DEFINE_ECB_CBC_CTR(qcrypto_nettle_des3, QCryptoNettleDES3, DES3_BLOCK_SIZE, + des3_encrypt_native, des3_decrypt_native) + + +typedef struct QCryptoNettleAES128 { + QCryptoCipher base; + uint8_t iv[AES_BLOCK_SIZE]; + /* First key from pair is encode, second key is decode. */ + struct aes128_ctx key[2], key_xts[2]; +} QCryptoNettleAES128; + +static void aes128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes128_ctx *keys = ctx; + aes128_encrypt(&keys[0], length, dst, src); +} + +static void aes128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes128_ctx *keys = ctx; + aes128_decrypt(&keys[1], length, dst, src); +} + +DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_aes128, + QCryptoNettleAES128, AES_BLOCK_SIZE, + aes128_encrypt_native, aes128_decrypt_native) + + +typedef struct QCryptoNettleAES192 { + QCryptoCipher base; + uint8_t iv[AES_BLOCK_SIZE]; + /* First key from pair is encode, second key is decode. */ + struct aes192_ctx key[2], key_xts[2]; +} QCryptoNettleAES192; + +static void aes192_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes192_ctx *keys = ctx; + aes192_encrypt(&keys[0], length, dst, src); +} + +static void aes192_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes192_ctx *keys = ctx; + aes192_decrypt(&keys[1], length, dst, src); +} + +DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_aes192, + QCryptoNettleAES192, AES_BLOCK_SIZE, + aes192_encrypt_native, aes192_decrypt_native) + + +typedef struct QCryptoNettleAES256 { + QCryptoCipher base; + uint8_t iv[AES_BLOCK_SIZE]; + /* First key from pair is encode, second key is decode. */ + struct aes256_ctx key[2], key_xts[2]; +} QCryptoNettleAES256; + +static void aes256_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes256_ctx *keys = ctx; + aes256_encrypt(&keys[0], length, dst, src); +} + +static void aes256_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + CONST_CTX struct aes256_ctx *keys = ctx; + aes256_decrypt(&keys[1], length, dst, src); +} + +DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_aes256, + QCryptoNettleAES256, AES_BLOCK_SIZE, + aes256_encrypt_native, aes256_decrypt_native) + + +typedef struct QCryptoNettleCAST128 { + QCryptoCipher base; + uint8_t iv[CAST128_BLOCK_SIZE]; + struct cast128_ctx key, key_xts; +} QCryptoNettleCAST128; + +static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + cast128_encrypt(ctx, length, dst, src); +} + +static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + cast128_decrypt(ctx, length, dst, src); +} + +DEFINE_ECB_CBC_CTR(qcrypto_nettle_cast128, + QCryptoNettleCAST128, CAST128_BLOCK_SIZE, + cast128_encrypt_native, cast128_decrypt_native) + + +typedef struct QCryptoNettleSerpent { + QCryptoCipher base; + uint8_t iv[SERPENT_BLOCK_SIZE]; + struct serpent_ctx key, key_xts; +} QCryptoNettleSerpent; + + +static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + serpent_encrypt(ctx, length, dst, src); +} + +static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + serpent_decrypt(ctx, length, dst, src); +} + +DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_serpent, + QCryptoNettleSerpent, SERPENT_BLOCK_SIZE, + serpent_encrypt_native, serpent_decrypt_native) + + +typedef struct QCryptoNettleTwofish { + QCryptoCipher base; + uint8_t iv[TWOFISH_BLOCK_SIZE]; + struct twofish_ctx key, key_xts; +} QCryptoNettleTwofish; + +static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + twofish_encrypt(ctx, length, dst, src); +} + +static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, + uint8_t *dst, const uint8_t *src) +{ + twofish_decrypt(ctx, length, dst, src); +} + +DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_twofish, + QCryptoNettleTwofish, TWOFISH_BLOCK_SIZE, + twofish_encrypt_native, twofish_decrypt_native) + + +bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +{ + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_3DES: + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + case QCRYPTO_CIPHER_ALG_CAST5_128: + case QCRYPTO_CIPHER_ALG_SERPENT_128: + case QCRYPTO_CIPHER_ALG_SERPENT_192: + case QCRYPTO_CIPHER_ALG_SERPENT_256: + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_192: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + break; + default: + return false; + } + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + case QCRYPTO_CIPHER_MODE_XTS: + case QCRYPTO_CIPHER_MODE_CTR: + return true; + default: + return false; + } +} + +static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, + Error **errp) +{ + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + case QCRYPTO_CIPHER_MODE_XTS: + case QCRYPTO_CIPHER_MODE_CTR: + break; + default: + goto bad_cipher_mode; + } + + if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) { + return NULL; + } + + switch (alg) { + case QCRYPTO_CIPHER_ALG_DES_RFB: + { + QCryptoNettleDESRFB *ctx; + const QCryptoCipherDriver *drv; + uint8_t *rfbkey; + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + drv = &qcrypto_nettle_des_rfb_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + drv = &qcrypto_nettle_des_rfb_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_des_rfb_driver_ctr; + break; + default: + goto bad_cipher_mode; + } + + ctx = g_new0(QCryptoNettleDESRFB, 1); + ctx->base.driver = drv; + + rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey); + des_set_key(&ctx->key, rfbkey); + g_free(rfbkey); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_3DES: + { + QCryptoNettleDES3 *ctx; + const QCryptoCipherDriver *drv; + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + drv = &qcrypto_nettle_des3_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + drv = &qcrypto_nettle_des3_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_des3_driver_ctr; + break; + default: + goto bad_cipher_mode; + } + + ctx = g_new0(QCryptoNettleDES3, 1); + ctx->base.driver = drv; + des3_set_key(&ctx->key, key); + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_AES_128: + { + QCryptoNettleAES128 *ctx = g_new0(QCryptoNettleAES128, 1); + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + ctx->base.driver = &qcrypto_nettle_aes128_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + ctx->base.driver = &qcrypto_nettle_aes128_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctx->base.driver = &qcrypto_nettle_aes128_driver_ctr; + break; + case QCRYPTO_CIPHER_MODE_XTS: + ctx->base.driver = &qcrypto_nettle_aes128_driver_xts; + nkey /= 2; + aes128_set_encrypt_key(&ctx->key_xts[0], key + nkey); + aes128_set_decrypt_key(&ctx->key_xts[1], key + nkey); + break; + default: + g_assert_not_reached(); + } + aes128_set_encrypt_key(&ctx->key[0], key); + aes128_set_decrypt_key(&ctx->key[1], key); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_AES_192: + { + QCryptoNettleAES192 *ctx = g_new0(QCryptoNettleAES192, 1); + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + ctx->base.driver = &qcrypto_nettle_aes192_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + ctx->base.driver = &qcrypto_nettle_aes192_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctx->base.driver = &qcrypto_nettle_aes192_driver_ctr; + break; + case QCRYPTO_CIPHER_MODE_XTS: + ctx->base.driver = &qcrypto_nettle_aes192_driver_xts; + nkey /= 2; + aes192_set_encrypt_key(&ctx->key_xts[0], key + nkey); + aes192_set_decrypt_key(&ctx->key_xts[1], key + nkey); + break; + default: + g_assert_not_reached(); + } + aes192_set_encrypt_key(&ctx->key[0], key); + aes192_set_decrypt_key(&ctx->key[1], key); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_AES_256: + { + QCryptoNettleAES256 *ctx = g_new0(QCryptoNettleAES256, 1); + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + ctx->base.driver = &qcrypto_nettle_aes256_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + ctx->base.driver = &qcrypto_nettle_aes256_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctx->base.driver = &qcrypto_nettle_aes256_driver_ctr; + break; + case QCRYPTO_CIPHER_MODE_XTS: + ctx->base.driver = &qcrypto_nettle_aes256_driver_xts; + nkey /= 2; + aes256_set_encrypt_key(&ctx->key_xts[0], key + nkey); + aes256_set_decrypt_key(&ctx->key_xts[1], key + nkey); + break; + default: + g_assert_not_reached(); + } + aes256_set_encrypt_key(&ctx->key[0], key); + aes256_set_decrypt_key(&ctx->key[1], key); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_CAST5_128: + { + QCryptoNettleCAST128 *ctx; + const QCryptoCipherDriver *drv; + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + drv = &qcrypto_nettle_cast128_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + drv = &qcrypto_nettle_cast128_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_cast128_driver_ctr; + break; + default: + goto bad_cipher_mode; + } + + ctx = g_new0(QCryptoNettleCAST128, 1); + ctx->base.driver = drv; + cast5_set_key(&ctx->key, nkey, key); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_SERPENT_128: + case QCRYPTO_CIPHER_ALG_SERPENT_192: + case QCRYPTO_CIPHER_ALG_SERPENT_256: + { + QCryptoNettleSerpent *ctx = g_new0(QCryptoNettleSerpent, 1); + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + ctx->base.driver = &qcrypto_nettle_serpent_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + ctx->base.driver = &qcrypto_nettle_serpent_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctx->base.driver = &qcrypto_nettle_serpent_driver_ctr; + break; + case QCRYPTO_CIPHER_MODE_XTS: + ctx->base.driver = &qcrypto_nettle_serpent_driver_xts; + nkey /= 2; + serpent_set_key(&ctx->key_xts, nkey, key + nkey); + break; + default: + g_assert_not_reached(); + } + serpent_set_key(&ctx->key, nkey, key); + + return &ctx->base; + } + + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_192: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + { + QCryptoNettleTwofish *ctx = g_new0(QCryptoNettleTwofish, 1); + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + ctx->base.driver = &qcrypto_nettle_twofish_driver_ecb; + break; + case QCRYPTO_CIPHER_MODE_CBC: + ctx->base.driver = &qcrypto_nettle_twofish_driver_cbc; + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctx->base.driver = &qcrypto_nettle_twofish_driver_ctr; + break; + case QCRYPTO_CIPHER_MODE_XTS: + ctx->base.driver = &qcrypto_nettle_twofish_driver_xts; + nkey /= 2; + twofish_set_key(&ctx->key_xts, nkey, key + nkey); + break; + default: + g_assert_not_reached(); + } + twofish_set_key(&ctx->key, nkey, key); + + return &ctx->base; + } + + default: + error_setg(errp, "Unsupported cipher algorithm %s", + QCryptoCipherAlgorithm_str(alg)); + return NULL; + } + + bad_cipher_mode: + error_setg(errp, "Unsupported cipher mode %s", + QCryptoCipherMode_str(mode)); + return NULL; +} diff --git a/crypto/cipher.c b/crypto/cipher.c new file mode 100644 index 000000000..068b2fb86 --- /dev/null +++ b/crypto/cipher.c @@ -0,0 +1,222 @@ +/* + * QEMU Crypto cipher algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qapi/error.h" +#include "crypto/cipher.h" +#include "cipherpriv.h" + + +static const size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = { + [QCRYPTO_CIPHER_ALG_AES_128] = 16, + [QCRYPTO_CIPHER_ALG_AES_192] = 24, + [QCRYPTO_CIPHER_ALG_AES_256] = 32, + [QCRYPTO_CIPHER_ALG_DES_RFB] = 8, + [QCRYPTO_CIPHER_ALG_3DES] = 24, + [QCRYPTO_CIPHER_ALG_CAST5_128] = 16, + [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16, + [QCRYPTO_CIPHER_ALG_SERPENT_192] = 24, + [QCRYPTO_CIPHER_ALG_SERPENT_256] = 32, + [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 24, + [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 32, +}; + +static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = { + [QCRYPTO_CIPHER_ALG_AES_128] = 16, + [QCRYPTO_CIPHER_ALG_AES_192] = 16, + [QCRYPTO_CIPHER_ALG_AES_256] = 16, + [QCRYPTO_CIPHER_ALG_DES_RFB] = 8, + [QCRYPTO_CIPHER_ALG_3DES] = 8, + [QCRYPTO_CIPHER_ALG_CAST5_128] = 8, + [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16, + [QCRYPTO_CIPHER_ALG_SERPENT_192] = 16, + [QCRYPTO_CIPHER_ALG_SERPENT_256] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 16, +}; + +static const bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = { + [QCRYPTO_CIPHER_MODE_ECB] = false, + [QCRYPTO_CIPHER_MODE_CBC] = true, + [QCRYPTO_CIPHER_MODE_XTS] = true, + [QCRYPTO_CIPHER_MODE_CTR] = true, +}; + + +size_t qcrypto_cipher_get_block_len(QCryptoCipherAlgorithm alg) +{ + assert(alg < G_N_ELEMENTS(alg_key_len)); + return alg_block_len[alg]; +} + + +size_t qcrypto_cipher_get_key_len(QCryptoCipherAlgorithm alg) +{ + assert(alg < G_N_ELEMENTS(alg_key_len)); + return alg_key_len[alg]; +} + + +size_t qcrypto_cipher_get_iv_len(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +{ + if (alg >= G_N_ELEMENTS(alg_block_len)) { + return 0; + } + if (mode >= G_N_ELEMENTS(mode_need_iv)) { + return 0; + } + + if (mode_need_iv[mode]) { + return alg_block_len[alg]; + } + return 0; +} + + +static bool +qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + size_t nkey, + Error **errp) +{ + if ((unsigned)alg >= QCRYPTO_CIPHER_ALG__MAX) { + error_setg(errp, "Cipher algorithm %d out of range", + alg); + return false; + } + + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + if (alg == QCRYPTO_CIPHER_ALG_DES_RFB + || alg == QCRYPTO_CIPHER_ALG_3DES) { + error_setg(errp, "XTS mode not compatible with DES-RFB/3DES"); + return false; + } + if (nkey % 2) { + error_setg(errp, "XTS cipher key length should be a multiple of 2"); + return false; + } + + if (alg_key_len[alg] != (nkey / 2)) { + error_setg(errp, "Cipher key length %zu should be %zu", + nkey, alg_key_len[alg] * 2); + return false; + } + } else { + if (alg_key_len[alg] != nkey) { + error_setg(errp, "Cipher key length %zu should be %zu", + nkey, alg_key_len[alg]); + return false; + } + } + return true; +} + +#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE) +static uint8_t * +qcrypto_cipher_munge_des_rfb_key(const uint8_t *key, + size_t nkey) +{ + uint8_t *ret = g_new0(uint8_t, nkey); + size_t i; + for (i = 0; i < nkey; i++) { + uint8_t r = key[i]; + r = (r & 0xf0) >> 4 | (r & 0x0f) << 4; + r = (r & 0xcc) >> 2 | (r & 0x33) << 2; + r = (r & 0xaa) >> 1 | (r & 0x55) << 1; + ret[i] = r; + } + return ret; +} +#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */ + +#ifdef CONFIG_GCRYPT +#include "cipher-gcrypt.c.inc" +#elif defined CONFIG_NETTLE +#include "cipher-nettle.c.inc" +#else +#include "cipher-builtin.c.inc" +#endif + +QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoCipher *cipher = NULL; + +#ifdef CONFIG_AF_ALG + cipher = qcrypto_afalg_cipher_ctx_new(alg, mode, key, nkey, NULL); +#endif + + if (!cipher) { + cipher = qcrypto_cipher_ctx_new(alg, mode, key, nkey, errp); + if (!cipher) { + return NULL; + } + } + + cipher->alg = alg; + cipher->mode = mode; + + return cipher; +} + + +int qcrypto_cipher_encrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp) +{ + const QCryptoCipherDriver *drv = cipher->driver; + return drv->cipher_encrypt(cipher, in, out, len, errp); +} + + +int qcrypto_cipher_decrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp) +{ + const QCryptoCipherDriver *drv = cipher->driver; + return drv->cipher_decrypt(cipher, in, out, len, errp); +} + + +int qcrypto_cipher_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + const QCryptoCipherDriver *drv = cipher->driver; + return drv->cipher_setiv(cipher, iv, niv, errp); +} + + +void qcrypto_cipher_free(QCryptoCipher *cipher) +{ + if (cipher) { + cipher->driver->cipher_free(cipher); + } +} diff --git a/crypto/cipherpriv.h b/crypto/cipherpriv.h new file mode 100644 index 000000000..396527857 --- /dev/null +++ b/crypto/cipherpriv.h @@ -0,0 +1,52 @@ +/* + * QEMU Crypto cipher driver supports + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#ifndef QCRYPTO_CIPHERPRIV_H +#define QCRYPTO_CIPHERPRIV_H + +#include "qapi/qapi-types-crypto.h" + +struct QCryptoCipherDriver { + int (*cipher_encrypt)(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp); + + int (*cipher_decrypt)(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp); + + int (*cipher_setiv)(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp); + + void (*cipher_free)(QCryptoCipher *cipher); +}; + +#ifdef CONFIG_AF_ALG + +#include "afalgpriv.h" + +extern QCryptoCipher * +qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, Error **errp); + +#endif + +#endif diff --git a/crypto/desrfb.c b/crypto/desrfb.c new file mode 100644 index 000000000..3274c3651 --- /dev/null +++ b/crypto/desrfb.c @@ -0,0 +1,416 @@ +/* + * This is D3DES (V5.09) by Richard Outerbridge with the double and + * triple-length support removed for use in VNC. Also the bytebit[] array + * has been reversed so that the most significant bit in each byte of the + * key is ignored, not the least significant. + * + * These changes are: + * Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* D3DES (V5.09) - + * + * A portable, public domain, version of the Data Encryption Standard. + * + * Written with Symantec's THINK (Lightspeed) C by Richard Outerbridge. + * Thanks to: Dan Hoey for his excellent Initial and Inverse permutation + * code; Jim Gillogly & Phil Karn for the DES key schedule code; Dennis + * Ferguson, Eric Young and Dana How for comparing notes; and Ray Lau, + * for humouring me on. + * + * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge. + * (GEnie : OUTER; CIS : [71755,204]) Graven Imagery, 1992. + */ + +#include "qemu/osdep.h" +#include "crypto/desrfb.h" + +static void scrunch(unsigned char *, unsigned long *); +static void unscrun(unsigned long *, unsigned char *); +static void desfunc(unsigned long *, unsigned long *); +static void cookey(unsigned long *); + +static unsigned long KnL[32] = { 0L }; + +static const unsigned short bytebit[8] = { + 01, 02, 04, 010, 020, 040, 0100, 0200 }; + +static const unsigned long bigbyte[24] = { + 0x800000L, 0x400000L, 0x200000L, 0x100000L, + 0x80000L, 0x40000L, 0x20000L, 0x10000L, + 0x8000L, 0x4000L, 0x2000L, 0x1000L, + 0x800L, 0x400L, 0x200L, 0x100L, + 0x80L, 0x40L, 0x20L, 0x10L, + 0x8L, 0x4L, 0x2L, 0x1L }; + +/* Use the key schedule specified in the Standard (ANSI X3.92-1981). */ + +static const unsigned char pc1[56] = { + 56, 48, 40, 32, 24, 16, 8, 0, 57, 49, 41, 33, 25, 17, + 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35, + 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21, + 13, 5, 60, 52, 44, 36, 28, 20, 12, 4, 27, 19, 11, 3 }; + +static const unsigned char totrot[16] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 }; + +static const unsigned char pc2[48] = { + 13, 16, 10, 23, 0, 4, 2, 27, 14, 5, 20, 9, + 22, 18, 11, 3, 25, 7, 15, 6, 26, 19, 12, 1, + 40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47, + 43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31 }; + +/* Thanks to James Gillogly & Phil Karn! */ +void deskey(unsigned char *key, int edf) +{ + register int i, j, l, m, n; + unsigned char pc1m[56], pcr[56]; + unsigned long kn[32]; + + for ( j = 0; j < 56; j++ ) { + l = pc1[j]; + m = l & 07; + pc1m[j] = (key[l >> 3] & bytebit[m]) ? 1 : 0; + } + for( i = 0; i < 16; i++ ) { + if( edf == DE1 ) m = (15 - i) << 1; + else m = i << 1; + n = m + 1; + kn[m] = kn[n] = 0L; + for( j = 0; j < 28; j++ ) { + l = j + totrot[i]; + if( l < 28 ) pcr[j] = pc1m[l]; + else pcr[j] = pc1m[l - 28]; + } + for( j = 28; j < 56; j++ ) { + l = j + totrot[i]; + if( l < 56 ) pcr[j] = pc1m[l]; + else pcr[j] = pc1m[l - 28]; + } + for( j = 0; j < 24; j++ ) { + if( pcr[pc2[j]] ) kn[m] |= bigbyte[j]; + if( pcr[pc2[j+24]] ) kn[n] |= bigbyte[j]; + } + } + cookey(kn); + return; + } + +static void cookey(register unsigned long *raw1) +{ + register unsigned long *cook, *raw0; + unsigned long dough[32]; + register int i; + + cook = dough; + for( i = 0; i < 16; i++, raw1++ ) { + raw0 = raw1++; + *cook = (*raw0 & 0x00fc0000L) << 6; + *cook |= (*raw0 & 0x00000fc0L) << 10; + *cook |= (*raw1 & 0x00fc0000L) >> 10; + *cook++ |= (*raw1 & 0x00000fc0L) >> 6; + *cook = (*raw0 & 0x0003f000L) << 12; + *cook |= (*raw0 & 0x0000003fL) << 16; + *cook |= (*raw1 & 0x0003f000L) >> 4; + *cook++ |= (*raw1 & 0x0000003fL); + } + usekey(dough); + return; + } + +void usekey(register unsigned long *from) +{ + register unsigned long *to, *endp; + + to = KnL, endp = &KnL[32]; + while( to < endp ) *to++ = *from++; + return; + } + +void des(unsigned char *inblock, unsigned char *outblock) +{ + unsigned long work[2]; + + scrunch(inblock, work); + desfunc(work, KnL); + unscrun(work, outblock); + return; + } + +static void scrunch(register unsigned char *outof, register unsigned long *into) +{ + *into = (*outof++ & 0xffL) << 24; + *into |= (*outof++ & 0xffL) << 16; + *into |= (*outof++ & 0xffL) << 8; + *into++ |= (*outof++ & 0xffL); + *into = (*outof++ & 0xffL) << 24; + *into |= (*outof++ & 0xffL) << 16; + *into |= (*outof++ & 0xffL) << 8; + *into |= (*outof & 0xffL); + return; + } + +static void unscrun(register unsigned long *outof, register unsigned char *into) +{ + *into++ = (unsigned char)((*outof >> 24) & 0xffL); + *into++ = (unsigned char)((*outof >> 16) & 0xffL); + *into++ = (unsigned char)((*outof >> 8) & 0xffL); + *into++ = (unsigned char)(*outof++ & 0xffL); + *into++ = (unsigned char)((*outof >> 24) & 0xffL); + *into++ = (unsigned char)((*outof >> 16) & 0xffL); + *into++ = (unsigned char)((*outof >> 8) & 0xffL); + *into = (unsigned char)(*outof & 0xffL); + return; + } + +static const unsigned long SP1[64] = { + 0x01010400L, 0x00000000L, 0x00010000L, 0x01010404L, + 0x01010004L, 0x00010404L, 0x00000004L, 0x00010000L, + 0x00000400L, 0x01010400L, 0x01010404L, 0x00000400L, + 0x01000404L, 0x01010004L, 0x01000000L, 0x00000004L, + 0x00000404L, 0x01000400L, 0x01000400L, 0x00010400L, + 0x00010400L, 0x01010000L, 0x01010000L, 0x01000404L, + 0x00010004L, 0x01000004L, 0x01000004L, 0x00010004L, + 0x00000000L, 0x00000404L, 0x00010404L, 0x01000000L, + 0x00010000L, 0x01010404L, 0x00000004L, 0x01010000L, + 0x01010400L, 0x01000000L, 0x01000000L, 0x00000400L, + 0x01010004L, 0x00010000L, 0x00010400L, 0x01000004L, + 0x00000400L, 0x00000004L, 0x01000404L, 0x00010404L, + 0x01010404L, 0x00010004L, 0x01010000L, 0x01000404L, + 0x01000004L, 0x00000404L, 0x00010404L, 0x01010400L, + 0x00000404L, 0x01000400L, 0x01000400L, 0x00000000L, + 0x00010004L, 0x00010400L, 0x00000000L, 0x01010004L }; + +static const unsigned long SP2[64] = { + 0x80108020L, 0x80008000L, 0x00008000L, 0x00108020L, + 0x00100000L, 0x00000020L, 0x80100020L, 0x80008020L, + 0x80000020L, 0x80108020L, 0x80108000L, 0x80000000L, + 0x80008000L, 0x00100000L, 0x00000020L, 0x80100020L, + 0x00108000L, 0x00100020L, 0x80008020L, 0x00000000L, + 0x80000000L, 0x00008000L, 0x00108020L, 0x80100000L, + 0x00100020L, 0x80000020L, 0x00000000L, 0x00108000L, + 0x00008020L, 0x80108000L, 0x80100000L, 0x00008020L, + 0x00000000L, 0x00108020L, 0x80100020L, 0x00100000L, + 0x80008020L, 0x80100000L, 0x80108000L, 0x00008000L, + 0x80100000L, 0x80008000L, 0x00000020L, 0x80108020L, + 0x00108020L, 0x00000020L, 0x00008000L, 0x80000000L, + 0x00008020L, 0x80108000L, 0x00100000L, 0x80000020L, + 0x00100020L, 0x80008020L, 0x80000020L, 0x00100020L, + 0x00108000L, 0x00000000L, 0x80008000L, 0x00008020L, + 0x80000000L, 0x80100020L, 0x80108020L, 0x00108000L }; + +static const unsigned long SP3[64] = { + 0x00000208L, 0x08020200L, 0x00000000L, 0x08020008L, + 0x08000200L, 0x00000000L, 0x00020208L, 0x08000200L, + 0x00020008L, 0x08000008L, 0x08000008L, 0x00020000L, + 0x08020208L, 0x00020008L, 0x08020000L, 0x00000208L, + 0x08000000L, 0x00000008L, 0x08020200L, 0x00000200L, + 0x00020200L, 0x08020000L, 0x08020008L, 0x00020208L, + 0x08000208L, 0x00020200L, 0x00020000L, 0x08000208L, + 0x00000008L, 0x08020208L, 0x00000200L, 0x08000000L, + 0x08020200L, 0x08000000L, 0x00020008L, 0x00000208L, + 0x00020000L, 0x08020200L, 0x08000200L, 0x00000000L, + 0x00000200L, 0x00020008L, 0x08020208L, 0x08000200L, + 0x08000008L, 0x00000200L, 0x00000000L, 0x08020008L, + 0x08000208L, 0x00020000L, 0x08000000L, 0x08020208L, + 0x00000008L, 0x00020208L, 0x00020200L, 0x08000008L, + 0x08020000L, 0x08000208L, 0x00000208L, 0x08020000L, + 0x00020208L, 0x00000008L, 0x08020008L, 0x00020200L }; + +static const unsigned long SP4[64] = { + 0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L, + 0x00802080L, 0x00800081L, 0x00800001L, 0x00002001L, + 0x00000000L, 0x00802000L, 0x00802000L, 0x00802081L, + 0x00000081L, 0x00000000L, 0x00800080L, 0x00800001L, + 0x00000001L, 0x00002000L, 0x00800000L, 0x00802001L, + 0x00000080L, 0x00800000L, 0x00002001L, 0x00002080L, + 0x00800081L, 0x00000001L, 0x00002080L, 0x00800080L, + 0x00002000L, 0x00802080L, 0x00802081L, 0x00000081L, + 0x00800080L, 0x00800001L, 0x00802000L, 0x00802081L, + 0x00000081L, 0x00000000L, 0x00000000L, 0x00802000L, + 0x00002080L, 0x00800080L, 0x00800081L, 0x00000001L, + 0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L, + 0x00802081L, 0x00000081L, 0x00000001L, 0x00002000L, + 0x00800001L, 0x00002001L, 0x00802080L, 0x00800081L, + 0x00002001L, 0x00002080L, 0x00800000L, 0x00802001L, + 0x00000080L, 0x00800000L, 0x00002000L, 0x00802080L }; + +static const unsigned long SP5[64] = { + 0x00000100L, 0x02080100L, 0x02080000L, 0x42000100L, + 0x00080000L, 0x00000100L, 0x40000000L, 0x02080000L, + 0x40080100L, 0x00080000L, 0x02000100L, 0x40080100L, + 0x42000100L, 0x42080000L, 0x00080100L, 0x40000000L, + 0x02000000L, 0x40080000L, 0x40080000L, 0x00000000L, + 0x40000100L, 0x42080100L, 0x42080100L, 0x02000100L, + 0x42080000L, 0x40000100L, 0x00000000L, 0x42000000L, + 0x02080100L, 0x02000000L, 0x42000000L, 0x00080100L, + 0x00080000L, 0x42000100L, 0x00000100L, 0x02000000L, + 0x40000000L, 0x02080000L, 0x42000100L, 0x40080100L, + 0x02000100L, 0x40000000L, 0x42080000L, 0x02080100L, + 0x40080100L, 0x00000100L, 0x02000000L, 0x42080000L, + 0x42080100L, 0x00080100L, 0x42000000L, 0x42080100L, + 0x02080000L, 0x00000000L, 0x40080000L, 0x42000000L, + 0x00080100L, 0x02000100L, 0x40000100L, 0x00080000L, + 0x00000000L, 0x40080000L, 0x02080100L, 0x40000100L }; + +static const unsigned long SP6[64] = { + 0x20000010L, 0x20400000L, 0x00004000L, 0x20404010L, + 0x20400000L, 0x00000010L, 0x20404010L, 0x00400000L, + 0x20004000L, 0x00404010L, 0x00400000L, 0x20000010L, + 0x00400010L, 0x20004000L, 0x20000000L, 0x00004010L, + 0x00000000L, 0x00400010L, 0x20004010L, 0x00004000L, + 0x00404000L, 0x20004010L, 0x00000010L, 0x20400010L, + 0x20400010L, 0x00000000L, 0x00404010L, 0x20404000L, + 0x00004010L, 0x00404000L, 0x20404000L, 0x20000000L, + 0x20004000L, 0x00000010L, 0x20400010L, 0x00404000L, + 0x20404010L, 0x00400000L, 0x00004010L, 0x20000010L, + 0x00400000L, 0x20004000L, 0x20000000L, 0x00004010L, + 0x20000010L, 0x20404010L, 0x00404000L, 0x20400000L, + 0x00404010L, 0x20404000L, 0x00000000L, 0x20400010L, + 0x00000010L, 0x00004000L, 0x20400000L, 0x00404010L, + 0x00004000L, 0x00400010L, 0x20004010L, 0x00000000L, + 0x20404000L, 0x20000000L, 0x00400010L, 0x20004010L }; + +static const unsigned long SP7[64] = { + 0x00200000L, 0x04200002L, 0x04000802L, 0x00000000L, + 0x00000800L, 0x04000802L, 0x00200802L, 0x04200800L, + 0x04200802L, 0x00200000L, 0x00000000L, 0x04000002L, + 0x00000002L, 0x04000000L, 0x04200002L, 0x00000802L, + 0x04000800L, 0x00200802L, 0x00200002L, 0x04000800L, + 0x04000002L, 0x04200000L, 0x04200800L, 0x00200002L, + 0x04200000L, 0x00000800L, 0x00000802L, 0x04200802L, + 0x00200800L, 0x00000002L, 0x04000000L, 0x00200800L, + 0x04000000L, 0x00200800L, 0x00200000L, 0x04000802L, + 0x04000802L, 0x04200002L, 0x04200002L, 0x00000002L, + 0x00200002L, 0x04000000L, 0x04000800L, 0x00200000L, + 0x04200800L, 0x00000802L, 0x00200802L, 0x04200800L, + 0x00000802L, 0x04000002L, 0x04200802L, 0x04200000L, + 0x00200800L, 0x00000000L, 0x00000002L, 0x04200802L, + 0x00000000L, 0x00200802L, 0x04200000L, 0x00000800L, + 0x04000002L, 0x04000800L, 0x00000800L, 0x00200002L }; + +static const unsigned long SP8[64] = { + 0x10001040L, 0x00001000L, 0x00040000L, 0x10041040L, + 0x10000000L, 0x10001040L, 0x00000040L, 0x10000000L, + 0x00040040L, 0x10040000L, 0x10041040L, 0x00041000L, + 0x10041000L, 0x00041040L, 0x00001000L, 0x00000040L, + 0x10040000L, 0x10000040L, 0x10001000L, 0x00001040L, + 0x00041000L, 0x00040040L, 0x10040040L, 0x10041000L, + 0x00001040L, 0x00000000L, 0x00000000L, 0x10040040L, + 0x10000040L, 0x10001000L, 0x00041040L, 0x00040000L, + 0x00041040L, 0x00040000L, 0x10041000L, 0x00001000L, + 0x00000040L, 0x10040040L, 0x00001000L, 0x00041040L, + 0x10001000L, 0x00000040L, 0x10000040L, 0x10040000L, + 0x10040040L, 0x10000000L, 0x00040000L, 0x10001040L, + 0x00000000L, 0x10041040L, 0x00040040L, 0x10000040L, + 0x10040000L, 0x10001000L, 0x10001040L, 0x00000000L, + 0x10041040L, 0x00041000L, 0x00041000L, 0x00001040L, + 0x00001040L, 0x00040040L, 0x10000000L, 0x10041000L }; + +static void desfunc(register unsigned long *block, register unsigned long *keys) +{ + register unsigned long fval, work, right, leftt; + register int round; + + leftt = block[0]; + right = block[1]; + work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL; + right ^= work; + leftt ^= (work << 4); + work = ((leftt >> 16) ^ right) & 0x0000ffffL; + right ^= work; + leftt ^= (work << 16); + work = ((right >> 2) ^ leftt) & 0x33333333L; + leftt ^= work; + right ^= (work << 2); + work = ((right >> 8) ^ leftt) & 0x00ff00ffL; + leftt ^= work; + right ^= (work << 8); + right = ((right << 1) | ((right >> 31) & 1L)) & 0xffffffffL; + work = (leftt ^ right) & 0xaaaaaaaaL; + leftt ^= work; + right ^= work; + leftt = ((leftt << 1) | ((leftt >> 31) & 1L)) & 0xffffffffL; + + for( round = 0; round < 8; round++ ) { + work = (right << 28) | (right >> 4); + work ^= *keys++; + fval = SP7[ work & 0x3fL]; + fval |= SP5[(work >> 8) & 0x3fL]; + fval |= SP3[(work >> 16) & 0x3fL]; + fval |= SP1[(work >> 24) & 0x3fL]; + work = right ^ *keys++; + fval |= SP8[ work & 0x3fL]; + fval |= SP6[(work >> 8) & 0x3fL]; + fval |= SP4[(work >> 16) & 0x3fL]; + fval |= SP2[(work >> 24) & 0x3fL]; + leftt ^= fval; + work = (leftt << 28) | (leftt >> 4); + work ^= *keys++; + fval = SP7[ work & 0x3fL]; + fval |= SP5[(work >> 8) & 0x3fL]; + fval |= SP3[(work >> 16) & 0x3fL]; + fval |= SP1[(work >> 24) & 0x3fL]; + work = leftt ^ *keys++; + fval |= SP8[ work & 0x3fL]; + fval |= SP6[(work >> 8) & 0x3fL]; + fval |= SP4[(work >> 16) & 0x3fL]; + fval |= SP2[(work >> 24) & 0x3fL]; + right ^= fval; + } + + right = (right << 31) | (right >> 1); + work = (leftt ^ right) & 0xaaaaaaaaL; + leftt ^= work; + right ^= work; + leftt = (leftt << 31) | (leftt >> 1); + work = ((leftt >> 8) ^ right) & 0x00ff00ffL; + right ^= work; + leftt ^= (work << 8); + work = ((leftt >> 2) ^ right) & 0x33333333L; + right ^= work; + leftt ^= (work << 2); + work = ((right >> 16) ^ leftt) & 0x0000ffffL; + leftt ^= work; + right ^= (work << 16); + work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL; + leftt ^= work; + right ^= (work << 4); + *block++ = right; + *block = leftt; + return; + } + +/* Validation sets: + * + * Single-length key, single-length plaintext - + * Key : 0123 4567 89ab cdef + * Plain : 0123 4567 89ab cde7 + * Cipher : c957 4425 6a5e d31d + * + * Double-length key, single-length plaintext - + * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 + * Plain : 0123 4567 89ab cde7 + * Cipher : 7f1d 0a77 826b 8aff + * + * Double-length key, double-length plaintext - + * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 + * Plain : 0123 4567 89ab cdef 0123 4567 89ab cdff + * Cipher : 27a0 8440 406a df60 278f 47cf 42d6 15d7 + * + * Triple-length key, single-length plaintext - + * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567 + * Plain : 0123 4567 89ab cde7 + * Cipher : de0b 7c06 ae5e 0ed5 + * + * Triple-length key, double-length plaintext - + * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567 + * Plain : 0123 4567 89ab cdef 0123 4567 89ab cdff + * Cipher : ad0d 1b30 ac17 cf07 0ed1 1c63 81e4 4de5 + * + * d3des V5.0a rwo 9208.07 18:44 Graven Imagery + **********************************************************************/ diff --git a/crypto/hash-afalg.c b/crypto/hash-afalg.c new file mode 100644 index 000000000..cf34c694a --- /dev/null +++ b/crypto/hash-afalg.c @@ -0,0 +1,214 @@ +/* + * QEMU Crypto af_alg-backend hash/hmac support + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qemu/sockets.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "crypto/hash.h" +#include "crypto/hmac.h" +#include "hashpriv.h" +#include "hmacpriv.h" + +static char * +qcrypto_afalg_hash_format_name(QCryptoHashAlgorithm alg, + bool is_hmac, + Error **errp) +{ + char *name; + const char *alg_name; + + switch (alg) { + case QCRYPTO_HASH_ALG_MD5: + alg_name = "md5"; + break; + case QCRYPTO_HASH_ALG_SHA1: + alg_name = "sha1"; + break; + case QCRYPTO_HASH_ALG_SHA224: + alg_name = "sha224"; + break; + case QCRYPTO_HASH_ALG_SHA256: + alg_name = "sha256"; + break; + case QCRYPTO_HASH_ALG_SHA384: + alg_name = "sha384"; + break; + case QCRYPTO_HASH_ALG_SHA512: + alg_name = "sha512"; + break; + case QCRYPTO_HASH_ALG_RIPEMD160: + alg_name = "rmd160"; + break; + + default: + error_setg(errp, "Unsupported hash algorithm %d", alg); + return NULL; + } + + if (is_hmac) { + name = g_strdup_printf("hmac(%s)", alg_name); + } else { + name = g_strdup_printf("%s", alg_name); + } + + return name; +} + +static QCryptoAFAlg * +qcrypto_afalg_hash_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + bool is_hmac, Error **errp) +{ + QCryptoAFAlg *afalg; + char *name; + + name = qcrypto_afalg_hash_format_name(alg, is_hmac, errp); + if (!name) { + return NULL; + } + + afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_HASH, name, errp); + if (!afalg) { + g_free(name); + return NULL; + } + + g_free(name); + + /* HMAC needs setkey */ + if (is_hmac) { + if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, + key, nkey) != 0) { + error_setg_errno(errp, errno, "Set hmac key failed"); + qcrypto_afalg_comm_free(afalg); + return NULL; + } + } + + return afalg; +} + +static QCryptoAFAlg * +qcrypto_afalg_hash_ctx_new(QCryptoHashAlgorithm alg, + Error **errp) +{ + return qcrypto_afalg_hash_hmac_ctx_new(alg, NULL, 0, false, errp); +} + +QCryptoAFAlg * +qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + return qcrypto_afalg_hash_hmac_ctx_new(alg, key, nkey, true, errp); +} + +static int +qcrypto_afalg_hash_hmac_bytesv(QCryptoAFAlg *hmac, + QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoAFAlg *afalg; + struct iovec outv; + int ret = 0; + bool is_hmac = (hmac != NULL) ? true : false; + const int expect_len = qcrypto_hash_digest_len(alg); + + if (*resultlen == 0) { + *resultlen = expect_len; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != expect_len) { + error_setg(errp, + "Result buffer size %zu is not match hash %d", + *resultlen, expect_len); + return -1; + } + + if (is_hmac) { + afalg = hmac; + } else { + afalg = qcrypto_afalg_hash_ctx_new(alg, errp); + if (!afalg) { + return -1; + } + } + + /* send data to kernel's crypto core */ + ret = iov_send_recv(afalg->opfd, iov, niov, + 0, iov_size(iov, niov), true); + if (ret < 0) { + error_setg_errno(errp, errno, "Send data to afalg-core failed"); + goto out; + } + + /* hash && get result */ + outv.iov_base = *result; + outv.iov_len = *resultlen; + ret = iov_send_recv(afalg->opfd, &outv, 1, + 0, iov_size(&outv, 1), false); + if (ret < 0) { + error_setg_errno(errp, errno, "Recv result from afalg-core failed"); + } else { + ret = 0; + } + +out: + if (!is_hmac) { + qcrypto_afalg_comm_free(afalg); + } + return ret; +} + +static int +qcrypto_afalg_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, uint8_t **result, + size_t *resultlen, + Error **errp) +{ + return qcrypto_afalg_hash_hmac_bytesv(NULL, alg, iov, niov, result, + resultlen, errp); +} + +static int +qcrypto_afalg_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, uint8_t **result, + size_t *resultlen, + Error **errp) +{ + return qcrypto_afalg_hash_hmac_bytesv(hmac->opaque, hmac->alg, + iov, niov, result, resultlen, + errp); +} + +static void qcrypto_afalg_hmac_ctx_free(QCryptoHmac *hmac) +{ + QCryptoAFAlg *afalg; + + afalg = hmac->opaque; + qcrypto_afalg_comm_free(afalg); +} + +QCryptoHashDriver qcrypto_hash_afalg_driver = { + .hash_bytesv = qcrypto_afalg_hash_bytesv, +}; + +QCryptoHmacDriver qcrypto_hmac_afalg_driver = { + .hmac_bytesv = qcrypto_afalg_hmac_bytesv, + .hmac_free = qcrypto_afalg_hmac_ctx_free, +}; diff --git a/crypto/hash-gcrypt.c b/crypto/hash-gcrypt.c new file mode 100644 index 000000000..829e48258 --- /dev/null +++ b/crypto/hash-gcrypt.c @@ -0,0 +1,116 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "crypto/hash.h" +#include "hashpriv.h" + + +static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GCRY_MD_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GCRY_MD_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GCRY_MD_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GCRY_MD_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160, +}; + +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) && + qcrypto_hash_alg_map[alg] != GCRY_MD_NONE) { + return true; + } + return false; +} + + +static int +qcrypto_gcrypt_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + int i, ret; + gcry_md_hd_t md; + unsigned char *digest; + + if (!qcrypto_hash_supports(alg)) { + error_setg(errp, + "Unknown hash algorithm %d", + alg); + return -1; + } + + ret = gcry_md_open(&md, qcrypto_hash_alg_map[alg], 0); + + if (ret < 0) { + error_setg(errp, + "Unable to initialize hash algorithm: %s", + gcry_strerror(ret)); + return -1; + } + + for (i = 0; i < niov; i++) { + gcry_md_write(md, iov[i].iov_base, iov[i].iov_len); + } + + ret = gcry_md_get_algo_dlen(qcrypto_hash_alg_map[alg]); + if (ret <= 0) { + error_setg(errp, + "Unable to get hash length: %s", + gcry_strerror(ret)); + goto error; + } + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, + "Result buffer size %zu is smaller than hash %d", + *resultlen, ret); + goto error; + } + + digest = gcry_md_read(md, 0); + if (!digest) { + error_setg(errp, + "No digest produced"); + goto error; + } + memcpy(*result, digest, *resultlen); + + gcry_md_close(md); + return 0; + + error: + gcry_md_close(md); + return -1; +} + + +QCryptoHashDriver qcrypto_hash_lib_driver = { + .hash_bytesv = qcrypto_gcrypt_hash_bytesv, +}; diff --git a/crypto/hash-glib.c b/crypto/hash-glib.c new file mode 100644 index 000000000..82de9db70 --- /dev/null +++ b/crypto/hash-glib.c @@ -0,0 +1,100 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/hash.h" +#include "hashpriv.h" + + +static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5, + [QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = -1, + [QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = -1, + [QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = -1, +}; + +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) && + qcrypto_hash_alg_map[alg] != -1) { + return true; + } + return false; +} + + +static int +qcrypto_glib_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + int i, ret; + GChecksum *cs; + + if (!qcrypto_hash_supports(alg)) { + error_setg(errp, + "Unknown hash algorithm %d", + alg); + return -1; + } + + cs = g_checksum_new(qcrypto_hash_alg_map[alg]); + + for (i = 0; i < niov; i++) { + g_checksum_update(cs, iov[i].iov_base, iov[i].iov_len); + } + + ret = g_checksum_type_get_length(qcrypto_hash_alg_map[alg]); + if (ret < 0) { + error_setg(errp, "%s", + "Unable to get hash length"); + goto error; + } + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, + "Result buffer size %zu is smaller than hash %d", + *resultlen, ret); + goto error; + } + + g_checksum_get_digest(cs, *result, resultlen); + + g_checksum_free(cs); + return 0; + + error: + g_checksum_free(cs); + return -1; +} + + +QCryptoHashDriver qcrypto_hash_lib_driver = { + .hash_bytesv = qcrypto_glib_hash_bytesv, +}; diff --git a/crypto/hash-nettle.c b/crypto/hash-nettle.c new file mode 100644 index 000000000..2a6ee7c7d --- /dev/null +++ b/crypto/hash-nettle.c @@ -0,0 +1,167 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/hash.h" +#include "hashpriv.h" +#include +#include +#include + +#if CONFIG_NETTLE_VERSION_MAJOR < 3 +typedef unsigned int hash_length_t; +#else +typedef size_t hash_length_t; +#endif + +typedef void (*qcrypto_nettle_init)(void *ctx); +typedef void (*qcrypto_nettle_write)(void *ctx, + hash_length_t len, + const uint8_t *buf); +typedef void (*qcrypto_nettle_result)(void *ctx, + hash_length_t len, + uint8_t *buf); + +union qcrypto_hash_ctx { + struct md5_ctx md5; + struct sha1_ctx sha1; + struct sha224_ctx sha224; + struct sha256_ctx sha256; + struct sha384_ctx sha384; + struct sha512_ctx sha512; + struct ripemd160_ctx ripemd160; +}; + +struct qcrypto_hash_alg { + qcrypto_nettle_init init; + qcrypto_nettle_write write; + qcrypto_nettle_result result; + size_t len; +} qcrypto_hash_alg_map[] = { + [QCRYPTO_HASH_ALG_MD5] = { + .init = (qcrypto_nettle_init)md5_init, + .write = (qcrypto_nettle_write)md5_update, + .result = (qcrypto_nettle_result)md5_digest, + .len = MD5_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA1] = { + .init = (qcrypto_nettle_init)sha1_init, + .write = (qcrypto_nettle_write)sha1_update, + .result = (qcrypto_nettle_result)sha1_digest, + .len = SHA1_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA224] = { + .init = (qcrypto_nettle_init)sha224_init, + .write = (qcrypto_nettle_write)sha224_update, + .result = (qcrypto_nettle_result)sha224_digest, + .len = SHA224_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA256] = { + .init = (qcrypto_nettle_init)sha256_init, + .write = (qcrypto_nettle_write)sha256_update, + .result = (qcrypto_nettle_result)sha256_digest, + .len = SHA256_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA384] = { + .init = (qcrypto_nettle_init)sha384_init, + .write = (qcrypto_nettle_write)sha384_update, + .result = (qcrypto_nettle_result)sha384_digest, + .len = SHA384_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA512] = { + .init = (qcrypto_nettle_init)sha512_init, + .write = (qcrypto_nettle_write)sha512_update, + .result = (qcrypto_nettle_result)sha512_digest, + .len = SHA512_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_RIPEMD160] = { + .init = (qcrypto_nettle_init)ripemd160_init, + .write = (qcrypto_nettle_write)ripemd160_update, + .result = (qcrypto_nettle_result)ripemd160_digest, + .len = RIPEMD160_DIGEST_SIZE, + }, +}; + +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) && + qcrypto_hash_alg_map[alg].init != NULL) { + return true; + } + return false; +} + + +static int +qcrypto_nettle_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + size_t i; + union qcrypto_hash_ctx ctx; + + if (!qcrypto_hash_supports(alg)) { + error_setg(errp, + "Unknown hash algorithm %d", + alg); + return -1; + } + + qcrypto_hash_alg_map[alg].init(&ctx); + + for (i = 0; i < niov; i++) { + /* Some versions of nettle have functions + * declared with 'int' instead of 'size_t' + * so to be safe avoid writing more than + * UINT_MAX bytes at a time + */ + size_t len = iov[i].iov_len; + uint8_t *base = iov[i].iov_base; + while (len) { + size_t shortlen = MIN(len, UINT_MAX); + qcrypto_hash_alg_map[alg].write(&ctx, len, base); + len -= shortlen; + base += len; + } + } + + if (*resultlen == 0) { + *resultlen = qcrypto_hash_alg_map[alg].len; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != qcrypto_hash_alg_map[alg].len) { + error_setg(errp, + "Result buffer size %zu is smaller than hash %zu", + *resultlen, qcrypto_hash_alg_map[alg].len); + return -1; + } + + qcrypto_hash_alg_map[alg].result(&ctx, *resultlen, *result); + + return 0; +} + + +QCryptoHashDriver qcrypto_hash_lib_driver = { + .hash_bytesv = qcrypto_nettle_hash_bytesv, +}; diff --git a/crypto/hash.c b/crypto/hash.c new file mode 100644 index 000000000..b0f8228bd --- /dev/null +++ b/crypto/hash.c @@ -0,0 +1,144 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/hash.h" +#include "hashpriv.h" + +static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = 16, + [QCRYPTO_HASH_ALG_SHA1] = 20, + [QCRYPTO_HASH_ALG_SHA224] = 28, + [QCRYPTO_HASH_ALG_SHA256] = 32, + [QCRYPTO_HASH_ALG_SHA384] = 48, + [QCRYPTO_HASH_ALG_SHA512] = 64, + [QCRYPTO_HASH_ALG_RIPEMD160] = 20, +}; + +size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg) +{ + assert(alg < G_N_ELEMENTS(qcrypto_hash_alg_size)); + return qcrypto_hash_alg_size[alg]; +} + +int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ +#ifdef CONFIG_AF_ALG + int ret; + /* + * TODO: + * Maybe we should treat some afalg errors as fatal + */ + ret = qcrypto_hash_afalg_driver.hash_bytesv(alg, iov, niov, + result, resultlen, + NULL); + if (ret == 0) { + return ret; + } +#endif + + return qcrypto_hash_lib_driver.hash_bytesv(alg, iov, niov, + result, resultlen, + errp); +} + + +int qcrypto_hash_bytes(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + struct iovec iov = { .iov_base = (char *)buf, + .iov_len = len }; + return qcrypto_hash_bytesv(alg, &iov, 1, result, resultlen, errp); +} + +static const char hex[] = "0123456789abcdef"; + +int qcrypto_hash_digestv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + char **digest, + Error **errp) +{ + uint8_t *result = NULL; + size_t resultlen = 0; + size_t i; + + if (qcrypto_hash_bytesv(alg, iov, niov, &result, &resultlen, errp) < 0) { + return -1; + } + + *digest = g_new0(char, (resultlen * 2) + 1); + for (i = 0 ; i < resultlen ; i++) { + (*digest)[(i * 2)] = hex[(result[i] >> 4) & 0xf]; + (*digest)[(i * 2) + 1] = hex[result[i] & 0xf]; + } + (*digest)[resultlen * 2] = '\0'; + g_free(result); + return 0; +} + +int qcrypto_hash_digest(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + char **digest, + Error **errp) +{ + struct iovec iov = { .iov_base = (char *)buf, .iov_len = len }; + + return qcrypto_hash_digestv(alg, &iov, 1, digest, errp); +} + +int qcrypto_hash_base64v(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + char **base64, + Error **errp) +{ + uint8_t *result = NULL; + size_t resultlen = 0; + + if (qcrypto_hash_bytesv(alg, iov, niov, &result, &resultlen, errp) < 0) { + return -1; + } + + *base64 = g_base64_encode(result, resultlen); + g_free(result); + return 0; +} + +int qcrypto_hash_base64(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + char **base64, + Error **errp) +{ + struct iovec iov = { .iov_base = (char *)buf, .iov_len = len }; + + return qcrypto_hash_base64v(alg, &iov, 1, base64, errp); +} diff --git a/crypto/hashpriv.h b/crypto/hashpriv.h new file mode 100644 index 000000000..cee26ccb4 --- /dev/null +++ b/crypto/hashpriv.h @@ -0,0 +1,39 @@ +/* + * QEMU Crypto hash driver supports + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#ifndef QCRYPTO_HASHPRIV_H +#define QCRYPTO_HASHPRIV_H + +typedef struct QCryptoHashDriver QCryptoHashDriver; + +struct QCryptoHashDriver { + int (*hash_bytesv)(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp); +}; + +extern QCryptoHashDriver qcrypto_hash_lib_driver; + +#ifdef CONFIG_AF_ALG + +#include "afalgpriv.h" + +extern QCryptoHashDriver qcrypto_hash_afalg_driver; + +#endif + +#endif diff --git a/crypto/hmac-gcrypt.c b/crypto/hmac-gcrypt.c new file mode 100644 index 000000000..0c6f97971 --- /dev/null +++ b/crypto/hmac-gcrypt.c @@ -0,0 +1,150 @@ +/* + * QEMU Crypto hmac algorithms (based on libgcrypt) + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/hmac.h" +#include "hmacpriv.h" +#include + +static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GCRY_MAC_HMAC_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GCRY_MAC_HMAC_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GCRY_MAC_HMAC_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GCRY_MAC_HMAC_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MAC_HMAC_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MAC_HMAC_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MAC_HMAC_RMD160, +}; + +typedef struct QCryptoHmacGcrypt QCryptoHmacGcrypt; +struct QCryptoHmacGcrypt { + gcry_mac_hd_t handle; +}; + +bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) && + qcrypto_hmac_alg_map[alg] != GCRY_MAC_NONE) { + return true; + } + + return false; +} + +void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoHmacGcrypt *ctx; + gcry_error_t err; + + if (!qcrypto_hmac_supports(alg)) { + error_setg(errp, "Unsupported hmac algorithm %s", + QCryptoHashAlgorithm_str(alg)); + return NULL; + } + + ctx = g_new0(QCryptoHmacGcrypt, 1); + + err = gcry_mac_open(&ctx->handle, qcrypto_hmac_alg_map[alg], + GCRY_MAC_FLAG_SECURE, NULL); + if (err != 0) { + error_setg(errp, "Cannot initialize hmac: %s", + gcry_strerror(err)); + goto error; + } + + err = gcry_mac_setkey(ctx->handle, (const void *)key, nkey); + if (err != 0) { + error_setg(errp, "Cannot set key: %s", + gcry_strerror(err)); + gcry_mac_close(ctx->handle); + goto error; + } + + return ctx; + +error: + g_free(ctx); + return NULL; +} + +static void +qcrypto_gcrypt_hmac_ctx_free(QCryptoHmac *hmac) +{ + QCryptoHmacGcrypt *ctx; + + ctx = hmac->opaque; + gcry_mac_close(ctx->handle); + + g_free(ctx); +} + +static int +qcrypto_gcrypt_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoHmacGcrypt *ctx; + gcry_error_t err; + uint32_t ret; + int i; + + ctx = hmac->opaque; + + for (i = 0; i < niov; i++) { + gcry_mac_write(ctx->handle, iov[i].iov_base, iov[i].iov_len); + } + + ret = gcry_mac_get_algo_maclen(qcrypto_hmac_alg_map[hmac->alg]); + if (ret <= 0) { + error_setg(errp, "Unable to get hmac length: %s", + gcry_strerror(ret)); + return -1; + } + + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, "Result buffer size %zu is smaller than hmac %d", + *resultlen, ret); + return -1; + } + + err = gcry_mac_read(ctx->handle, *result, resultlen); + if (err != 0) { + error_setg(errp, "Cannot get result: %s", + gcry_strerror(err)); + return -1; + } + + err = gcry_mac_reset(ctx->handle); + if (err != 0) { + error_setg(errp, "Cannot reset hmac context: %s", + gcry_strerror(err)); + return -1; + } + + return 0; +} + +QCryptoHmacDriver qcrypto_hmac_lib_driver = { + .hmac_bytesv = qcrypto_gcrypt_hmac_bytesv, + .hmac_free = qcrypto_gcrypt_hmac_ctx_free, +}; diff --git a/crypto/hmac-glib.c b/crypto/hmac-glib.c new file mode 100644 index 000000000..509bbc74c --- /dev/null +++ b/crypto/hmac-glib.c @@ -0,0 +1,124 @@ +/* + * QEMU Crypto hmac algorithms (based on glib) + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/hmac.h" +#include "hmacpriv.h" + +static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5, + [QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1, + [QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256, + [QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512, + [QCRYPTO_HASH_ALG_SHA224] = -1, + [QCRYPTO_HASH_ALG_SHA384] = -1, + [QCRYPTO_HASH_ALG_RIPEMD160] = -1, +}; + +typedef struct QCryptoHmacGlib QCryptoHmacGlib; +struct QCryptoHmacGlib { + GHmac *ghmac; +}; + +bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) && + qcrypto_hmac_alg_map[alg] != -1) { + return true; + } + + return false; +} + +void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoHmacGlib *ctx; + + if (!qcrypto_hmac_supports(alg)) { + error_setg(errp, "Unsupported hmac algorithm %s", + QCryptoHashAlgorithm_str(alg)); + return NULL; + } + + ctx = g_new0(QCryptoHmacGlib, 1); + + ctx->ghmac = g_hmac_new(qcrypto_hmac_alg_map[alg], + (const uint8_t *)key, nkey); + if (!ctx->ghmac) { + error_setg(errp, "Cannot initialize hmac and set key"); + goto error; + } + + return ctx; + +error: + g_free(ctx); + return NULL; +} + +static void +qcrypto_glib_hmac_ctx_free(QCryptoHmac *hmac) +{ + QCryptoHmacGlib *ctx; + + ctx = hmac->opaque; + g_hmac_unref(ctx->ghmac); + + g_free(ctx); +} + +static int +qcrypto_glib_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoHmacGlib *ctx; + int i, ret; + + ctx = hmac->opaque; + + for (i = 0; i < niov; i++) { + g_hmac_update(ctx->ghmac, iov[i].iov_base, iov[i].iov_len); + } + + ret = g_checksum_type_get_length(qcrypto_hmac_alg_map[hmac->alg]); + if (ret < 0) { + error_setg(errp, "Unable to get hmac length"); + return -1; + } + + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, "Result buffer size %zu is smaller than hmac %d", + *resultlen, ret); + return -1; + } + + g_hmac_get_digest(ctx->ghmac, *result, resultlen); + + return 0; +} + +QCryptoHmacDriver qcrypto_hmac_lib_driver = { + .hmac_bytesv = qcrypto_glib_hmac_bytesv, + .hmac_free = qcrypto_glib_hmac_ctx_free, +}; diff --git a/crypto/hmac-nettle.c b/crypto/hmac-nettle.c new file mode 100644 index 000000000..1152b741f --- /dev/null +++ b/crypto/hmac-nettle.c @@ -0,0 +1,180 @@ +/* + * QEMU Crypto hmac algorithms (based on nettle) + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/hmac.h" +#include "hmacpriv.h" +#include + +#if CONFIG_NETTLE_VERSION_MAJOR < 3 +typedef unsigned int hmac_length_t; +#else +typedef size_t hmac_length_t; +#endif + +typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx, + hmac_length_t key_length, + const uint8_t *key); + +typedef void (*qcrypto_nettle_hmac_update)(void *ctx, + hmac_length_t length, + const uint8_t *data); + +typedef void (*qcrypto_nettle_hmac_digest)(void *ctx, + hmac_length_t length, + uint8_t *digest); + +typedef struct QCryptoHmacNettle QCryptoHmacNettle; +struct QCryptoHmacNettle { + union qcrypto_nettle_hmac_ctx { + struct hmac_md5_ctx md5_ctx; + struct hmac_sha1_ctx sha1_ctx; + struct hmac_sha256_ctx sha256_ctx; /* equals hmac_sha224_ctx */ + struct hmac_sha512_ctx sha512_ctx; /* equals hmac_sha384_ctx */ + struct hmac_ripemd160_ctx ripemd160_ctx; + } u; +}; + +struct qcrypto_nettle_hmac_alg { + qcrypto_nettle_hmac_setkey setkey; + qcrypto_nettle_hmac_update update; + qcrypto_nettle_hmac_digest digest; + size_t len; +} qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_md5_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_md5_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_md5_digest, + .len = MD5_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA1] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha1_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_sha1_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_sha1_digest, + .len = SHA1_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA224] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha224_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_sha224_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_sha224_digest, + .len = SHA224_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA256] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha256_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_sha256_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_sha256_digest, + .len = SHA256_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA384] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha384_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_sha384_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_sha384_digest, + .len = SHA384_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_SHA512] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha512_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_sha512_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_sha512_digest, + .len = SHA512_DIGEST_SIZE, + }, + [QCRYPTO_HASH_ALG_RIPEMD160] = { + .setkey = (qcrypto_nettle_hmac_setkey)hmac_ripemd160_set_key, + .update = (qcrypto_nettle_hmac_update)hmac_ripemd160_update, + .digest = (qcrypto_nettle_hmac_digest)hmac_ripemd160_digest, + .len = RIPEMD160_DIGEST_SIZE, + }, +}; + +bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) +{ + if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) && + qcrypto_hmac_alg_map[alg].setkey != NULL) { + return true; + } + + return false; +} + +void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoHmacNettle *ctx; + + if (!qcrypto_hmac_supports(alg)) { + error_setg(errp, "Unsupported hmac algorithm %s", + QCryptoHashAlgorithm_str(alg)); + return NULL; + } + + ctx = g_new0(QCryptoHmacNettle, 1); + + qcrypto_hmac_alg_map[alg].setkey(&ctx->u, nkey, key); + + return ctx; +} + +static void +qcrypto_nettle_hmac_ctx_free(QCryptoHmac *hmac) +{ + QCryptoHmacNettle *ctx; + + ctx = hmac->opaque; + g_free(ctx); +} + +static int +qcrypto_nettle_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoHmacNettle *ctx; + size_t i; + + ctx = (QCryptoHmacNettle *)hmac->opaque; + + for (i = 0; i < niov; ++i) { + size_t len = iov[i].iov_len; + uint8_t *base = iov[i].iov_base; + while (len) { + size_t shortlen = MIN(len, UINT_MAX); + qcrypto_hmac_alg_map[hmac->alg].update(&ctx->u, len, base); + len -= shortlen; + base += len; + } + } + + if (*resultlen == 0) { + *resultlen = qcrypto_hmac_alg_map[hmac->alg].len; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != qcrypto_hmac_alg_map[hmac->alg].len) { + error_setg(errp, + "Result buffer size %zu is smaller than hash %zu", + *resultlen, qcrypto_hmac_alg_map[hmac->alg].len); + return -1; + } + + qcrypto_hmac_alg_map[hmac->alg].digest(&ctx->u, *resultlen, *result); + + return 0; +} + +QCryptoHmacDriver qcrypto_hmac_lib_driver = { + .hmac_bytesv = qcrypto_nettle_hmac_bytesv, + .hmac_free = qcrypto_nettle_hmac_ctx_free, +}; diff --git a/crypto/hmac.c b/crypto/hmac.c new file mode 100644 index 000000000..4de7e8c9c --- /dev/null +++ b/crypto/hmac.c @@ -0,0 +1,127 @@ +/* + * QEMU Crypto hmac algorithms + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "crypto/hmac.h" +#include "hmacpriv.h" + +static const char hex[] = "0123456789abcdef"; + +int qcrypto_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoHmacDriver *drv = hmac->driver; + + return drv->hmac_bytesv(hmac, iov, niov, result, resultlen, errp); +} + +int qcrypto_hmac_bytes(QCryptoHmac *hmac, + const char *buf, + size_t len, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + struct iovec iov = { + .iov_base = (char *)buf, + .iov_len = len + }; + + return qcrypto_hmac_bytesv(hmac, &iov, 1, result, resultlen, errp); +} + +int qcrypto_hmac_digestv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + char **digest, + Error **errp) +{ + uint8_t *result = NULL; + size_t resultlen = 0; + size_t i; + + if (qcrypto_hmac_bytesv(hmac, iov, niov, &result, &resultlen, errp) < 0) { + return -1; + } + + *digest = g_new0(char, (resultlen * 2) + 1); + + for (i = 0 ; i < resultlen ; i++) { + (*digest)[(i * 2)] = hex[(result[i] >> 4) & 0xf]; + (*digest)[(i * 2) + 1] = hex[result[i] & 0xf]; + } + + (*digest)[resultlen * 2] = '\0'; + + g_free(result); + return 0; +} + +int qcrypto_hmac_digest(QCryptoHmac *hmac, + const char *buf, + size_t len, + char **digest, + Error **errp) +{ + struct iovec iov = { + .iov_base = (char *)buf, + .iov_len = len + }; + + return qcrypto_hmac_digestv(hmac, &iov, 1, digest, errp); +} + +QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoHmac *hmac; + void *ctx = NULL; + QCryptoHmacDriver *drv = NULL; + +#ifdef CONFIG_AF_ALG + ctx = qcrypto_afalg_hmac_ctx_new(alg, key, nkey, NULL); + if (ctx) { + drv = &qcrypto_hmac_afalg_driver; + } +#endif + + if (!ctx) { + ctx = qcrypto_hmac_ctx_new(alg, key, nkey, errp); + if (!ctx) { + return NULL; + } + + drv = &qcrypto_hmac_lib_driver; + } + + hmac = g_new0(QCryptoHmac, 1); + hmac->alg = alg; + hmac->opaque = ctx; + hmac->driver = (void *)drv; + + return hmac; +} + +void qcrypto_hmac_free(QCryptoHmac *hmac) +{ + QCryptoHmacDriver *drv; + + if (hmac) { + drv = hmac->driver; + drv->hmac_free(hmac); + g_free(hmac); + } +} diff --git a/crypto/hmacpriv.h b/crypto/hmacpriv.h new file mode 100644 index 000000000..4387ca258 --- /dev/null +++ b/crypto/hmacpriv.h @@ -0,0 +1,48 @@ +/* + * QEMU Crypto hmac driver supports + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Longpeng(Mike) + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#ifndef QCRYPTO_HMACPRIV_H +#define QCRYPTO_HMACPRIV_H + +typedef struct QCryptoHmacDriver QCryptoHmacDriver; + +struct QCryptoHmacDriver { + int (*hmac_bytesv)(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp); + + void (*hmac_free)(QCryptoHmac *hmac); +}; + +extern void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp); +extern QCryptoHmacDriver qcrypto_hmac_lib_driver; + +#ifdef CONFIG_AF_ALG + +#include "afalgpriv.h" + +extern QCryptoAFAlg * +qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp); +extern QCryptoHmacDriver qcrypto_hmac_afalg_driver; + +#endif + +#endif diff --git a/crypto/init.c b/crypto/init.c new file mode 100644 index 000000000..ea233b919 --- /dev/null +++ b/crypto/init.c @@ -0,0 +1,137 @@ +/* + * QEMU Crypto initialization + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/init.h" +#include "qapi/error.h" +#include "qemu/thread.h" + +#ifdef CONFIG_GNUTLS +#include +#include +#endif + +#ifdef CONFIG_GCRYPT +#include +#endif + +#include "crypto/random.h" + +/* #define DEBUG_GNUTLS */ + +/* + * We need to init gcrypt threading if + * + * - gcrypt < 1.6.0 + * + */ + +#if (defined(CONFIG_GCRYPT) && \ + (GCRYPT_VERSION_NUMBER < 0x010600)) +#define QCRYPTO_INIT_GCRYPT_THREADS +#else +#undef QCRYPTO_INIT_GCRYPT_THREADS +#endif + +#ifdef DEBUG_GNUTLS +static void qcrypto_gnutls_log(int level, const char *str) +{ + fprintf(stderr, "%d: %s", level, str); +} +#endif + +#ifdef QCRYPTO_INIT_GCRYPT_THREADS +static int qcrypto_gcrypt_mutex_init(void **priv) +{ \ + QemuMutex *lock = NULL; + lock = g_new0(QemuMutex, 1); + qemu_mutex_init(lock); + *priv = lock; + return 0; +} + +static int qcrypto_gcrypt_mutex_destroy(void **priv) +{ + QemuMutex *lock = *priv; + qemu_mutex_destroy(lock); + g_free(lock); + return 0; +} + +static int qcrypto_gcrypt_mutex_lock(void **priv) +{ + QemuMutex *lock = *priv; + qemu_mutex_lock(lock); + return 0; +} + +static int qcrypto_gcrypt_mutex_unlock(void **priv) +{ + QemuMutex *lock = *priv; + qemu_mutex_unlock(lock); + return 0; +} + +static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = { + (GCRY_THREAD_OPTION_PTHREAD | (GCRY_THREAD_OPTION_VERSION << 8)), + NULL, + qcrypto_gcrypt_mutex_init, + qcrypto_gcrypt_mutex_destroy, + qcrypto_gcrypt_mutex_lock, + qcrypto_gcrypt_mutex_unlock, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL +}; +#endif /* QCRYPTO_INIT_GCRYPT */ + +int qcrypto_init(Error **errp) +{ +#ifdef QCRYPTO_INIT_GCRYPT_THREADS + gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl); +#endif /* QCRYPTO_INIT_GCRYPT_THREADS */ + +#ifdef CONFIG_GNUTLS + int ret; + ret = gnutls_global_init(); + if (ret < 0) { + error_setg(errp, + "Unable to initialize GNUTLS library: %s", + gnutls_strerror(ret)); + return -1; + } +#ifdef DEBUG_GNUTLS + gnutls_global_set_log_level(10); + gnutls_global_set_log_function(qcrypto_gnutls_log); +#endif +#endif + +#ifdef CONFIG_GCRYPT + if (!gcry_check_version(NULL)) { + error_setg(errp, "Unable to initialize gcrypt"); + return -1; + } + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); +#endif + + if (qcrypto_random_init(errp) < 0) { + return -1; + } + + return 0; +} diff --git a/crypto/ivgen-essiv.c b/crypto/ivgen-essiv.c new file mode 100644 index 000000000..3d5a18879 --- /dev/null +++ b/crypto/ivgen-essiv.c @@ -0,0 +1,120 @@ +/* + * QEMU Crypto block IV generator - essiv + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "ivgen-essiv.h" + +typedef struct QCryptoIVGenESSIV QCryptoIVGenESSIV; +struct QCryptoIVGenESSIV { + QCryptoCipher *cipher; +}; + +static int qcrypto_ivgen_essiv_init(QCryptoIVGen *ivgen, + const uint8_t *key, size_t nkey, + Error **errp) +{ + uint8_t *salt; + size_t nhash; + size_t nsalt; + QCryptoIVGenESSIV *essiv = g_new0(QCryptoIVGenESSIV, 1); + + /* Not necessarily the same as nkey */ + nsalt = qcrypto_cipher_get_key_len(ivgen->cipher); + + nhash = qcrypto_hash_digest_len(ivgen->hash); + /* Salt must be larger of hash size or key size */ + salt = g_new0(uint8_t, MAX(nhash, nsalt)); + + if (qcrypto_hash_bytes(ivgen->hash, (const gchar *)key, nkey, + &salt, &nhash, + errp) < 0) { + g_free(essiv); + g_free(salt); + return -1; + } + + /* Now potentially truncate salt to match cipher key len */ + essiv->cipher = qcrypto_cipher_new(ivgen->cipher, + QCRYPTO_CIPHER_MODE_ECB, + salt, MIN(nhash, nsalt), + errp); + if (!essiv->cipher) { + g_free(essiv); + g_free(salt); + return -1; + } + + g_free(salt); + ivgen->private = essiv; + + return 0; +} + +static int qcrypto_ivgen_essiv_calculate(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp) +{ + QCryptoIVGenESSIV *essiv = ivgen->private; + size_t ndata = qcrypto_cipher_get_block_len(ivgen->cipher); + uint8_t *data = g_new(uint8_t, ndata); + + sector = cpu_to_le64(sector); + memcpy(data, (uint8_t *)§or, MIN(sizeof(sector), ndata)); + if (sizeof(sector) < ndata) { + memset(data + sizeof(sector), 0, ndata - sizeof(sector)); + } + + if (qcrypto_cipher_encrypt(essiv->cipher, + data, + data, + ndata, + errp) < 0) { + g_free(data); + return -1; + } + + if (ndata > niv) { + ndata = niv; + } + memcpy(iv, data, ndata); + if (ndata < niv) { + memset(iv + ndata, 0, niv - ndata); + } + g_free(data); + return 0; +} + +static void qcrypto_ivgen_essiv_cleanup(QCryptoIVGen *ivgen) +{ + QCryptoIVGenESSIV *essiv = ivgen->private; + + qcrypto_cipher_free(essiv->cipher); + g_free(essiv); +} + + +struct QCryptoIVGenDriver qcrypto_ivgen_essiv = { + .init = qcrypto_ivgen_essiv_init, + .calculate = qcrypto_ivgen_essiv_calculate, + .cleanup = qcrypto_ivgen_essiv_cleanup, +}; + diff --git a/crypto/ivgen-essiv.h b/crypto/ivgen-essiv.h new file mode 100644 index 000000000..d6edecf18 --- /dev/null +++ b/crypto/ivgen-essiv.h @@ -0,0 +1,27 @@ +/* + * QEMU Crypto block IV generator - essiv + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QCRYPTO_IVGEN_ESSIV_H +#define QCRYPTO_IVGEN_ESSIV_H + +#include "ivgenpriv.h" + +extern struct QCryptoIVGenDriver qcrypto_ivgen_essiv; + +#endif /* QCRYPTO_IVGEN_ESSIV_H */ diff --git a/crypto/ivgen-plain.c b/crypto/ivgen-plain.c new file mode 100644 index 000000000..81af198c4 --- /dev/null +++ b/crypto/ivgen-plain.c @@ -0,0 +1,60 @@ +/* + * QEMU Crypto block IV generator - plain + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "ivgen-plain.h" + +static int qcrypto_ivgen_plain_init(QCryptoIVGen *ivgen, + const uint8_t *key, size_t nkey, + Error **errp) +{ + return 0; +} + +static int qcrypto_ivgen_plain_calculate(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp) +{ + size_t ivprefix; + uint32_t shortsector = cpu_to_le32((sector & 0xffffffff)); + ivprefix = sizeof(shortsector); + if (ivprefix > niv) { + ivprefix = niv; + } + memcpy(iv, &shortsector, ivprefix); + if (ivprefix < niv) { + memset(iv + ivprefix, 0, niv - ivprefix); + } + return 0; +} + +static void qcrypto_ivgen_plain_cleanup(QCryptoIVGen *ivgen) +{ +} + + +struct QCryptoIVGenDriver qcrypto_ivgen_plain = { + .init = qcrypto_ivgen_plain_init, + .calculate = qcrypto_ivgen_plain_calculate, + .cleanup = qcrypto_ivgen_plain_cleanup, +}; + diff --git a/crypto/ivgen-plain.h b/crypto/ivgen-plain.h new file mode 100644 index 000000000..43db89880 --- /dev/null +++ b/crypto/ivgen-plain.h @@ -0,0 +1,27 @@ +/* + * QEMU Crypto block IV generator - plain + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QCRYPTO_IVGEN_PLAIN_H__ +#define QCRYPTO_IVGEN_PLAIN_H__ + +#include "ivgenpriv.h" + +extern struct QCryptoIVGenDriver qcrypto_ivgen_plain; + +#endif /* QCRYPTO_IVGEN_PLAIN_H__ */ diff --git a/crypto/ivgen-plain64.c b/crypto/ivgen-plain64.c new file mode 100644 index 000000000..b377036c1 --- /dev/null +++ b/crypto/ivgen-plain64.c @@ -0,0 +1,60 @@ +/* + * QEMU Crypto block IV generator - plain + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "ivgen-plain.h" + +static int qcrypto_ivgen_plain_init(QCryptoIVGen *ivgen, + const uint8_t *key, size_t nkey, + Error **errp) +{ + return 0; +} + +static int qcrypto_ivgen_plain_calculate(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp) +{ + size_t ivprefix; + ivprefix = sizeof(sector); + sector = cpu_to_le64(sector); + if (ivprefix > niv) { + ivprefix = niv; + } + memcpy(iv, §or, ivprefix); + if (ivprefix < niv) { + memset(iv + ivprefix, 0, niv - ivprefix); + } + return 0; +} + +static void qcrypto_ivgen_plain_cleanup(QCryptoIVGen *ivgen) +{ +} + + +struct QCryptoIVGenDriver qcrypto_ivgen_plain64 = { + .init = qcrypto_ivgen_plain_init, + .calculate = qcrypto_ivgen_plain_calculate, + .cleanup = qcrypto_ivgen_plain_cleanup, +}; + diff --git a/crypto/ivgen-plain64.h b/crypto/ivgen-plain64.h new file mode 100644 index 000000000..f14100947 --- /dev/null +++ b/crypto/ivgen-plain64.h @@ -0,0 +1,27 @@ +/* + * QEMU Crypto block IV generator - plain64 + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QCRYPTO_IVGEN_PLAIN64_H +#define QCRYPTO_IVGEN_PLAIN64_H + +#include "ivgenpriv.h" + +extern struct QCryptoIVGenDriver qcrypto_ivgen_plain64; + +#endif /* QCRYPTO_IVGEN_PLAIN64_H */ diff --git a/crypto/ivgen.c b/crypto/ivgen.c new file mode 100644 index 000000000..12822f851 --- /dev/null +++ b/crypto/ivgen.c @@ -0,0 +1,101 @@ +/* + * QEMU Crypto block IV generator + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "ivgenpriv.h" +#include "ivgen-plain.h" +#include "ivgen-plain64.h" +#include "ivgen-essiv.h" + + +QCryptoIVGen *qcrypto_ivgen_new(QCryptoIVGenAlgorithm alg, + QCryptoCipherAlgorithm cipheralg, + QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoIVGen *ivgen = g_new0(QCryptoIVGen, 1); + + ivgen->algorithm = alg; + ivgen->cipher = cipheralg; + ivgen->hash = hash; + + switch (alg) { + case QCRYPTO_IVGEN_ALG_PLAIN: + ivgen->driver = &qcrypto_ivgen_plain; + break; + case QCRYPTO_IVGEN_ALG_PLAIN64: + ivgen->driver = &qcrypto_ivgen_plain64; + break; + case QCRYPTO_IVGEN_ALG_ESSIV: + ivgen->driver = &qcrypto_ivgen_essiv; + break; + default: + error_setg(errp, "Unknown block IV generator algorithm %d", alg); + g_free(ivgen); + return NULL; + } + + if (ivgen->driver->init(ivgen, key, nkey, errp) < 0) { + g_free(ivgen); + return NULL; + } + + return ivgen; +} + + +int qcrypto_ivgen_calculate(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp) +{ + return ivgen->driver->calculate(ivgen, sector, iv, niv, errp); +} + + +QCryptoIVGenAlgorithm qcrypto_ivgen_get_algorithm(QCryptoIVGen *ivgen) +{ + return ivgen->algorithm; +} + + +QCryptoCipherAlgorithm qcrypto_ivgen_get_cipher(QCryptoIVGen *ivgen) +{ + return ivgen->cipher; +} + + +QCryptoHashAlgorithm qcrypto_ivgen_get_hash(QCryptoIVGen *ivgen) +{ + return ivgen->hash; +} + + +void qcrypto_ivgen_free(QCryptoIVGen *ivgen) +{ + if (!ivgen) { + return; + } + ivgen->driver->cleanup(ivgen); + g_free(ivgen); +} diff --git a/crypto/ivgenpriv.h b/crypto/ivgenpriv.h new file mode 100644 index 000000000..cecdbedfd --- /dev/null +++ b/crypto/ivgenpriv.h @@ -0,0 +1,49 @@ +/* + * QEMU Crypto block IV generator + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_IVGENPRIV_H +#define QCRYPTO_IVGENPRIV_H + +#include "crypto/ivgen.h" + +typedef struct QCryptoIVGenDriver QCryptoIVGenDriver; + +struct QCryptoIVGenDriver { + int (*init)(QCryptoIVGen *ivgen, + const uint8_t *key, size_t nkey, + Error **errp); + int (*calculate)(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp); + void (*cleanup)(QCryptoIVGen *ivgen); +}; + +struct QCryptoIVGen { + QCryptoIVGenDriver *driver; + void *private; + + QCryptoIVGenAlgorithm algorithm; + QCryptoCipherAlgorithm cipher; + QCryptoHashAlgorithm hash; +}; + + +#endif /* QCRYPTO_IVGENPRIV_H */ diff --git a/crypto/meson.build b/crypto/meson.build new file mode 100644 index 000000000..7f37b5d33 --- /dev/null +++ b/crypto/meson.build @@ -0,0 +1,73 @@ +crypto_ss.add(genh) +crypto_ss.add(files( + 'afsplit.c', + 'block-luks.c', + 'block-qcow.c', + 'block.c', + 'cipher.c', + 'desrfb.c', + 'hash.c', + 'hmac.c', + 'ivgen-essiv.c', + 'ivgen-plain.c', + 'ivgen-plain64.c', + 'ivgen.c', + 'pbkdf.c', + 'secret_common.c', + 'secret.c', + 'tlscreds.c', + 'tlscredsanon.c', + 'tlscredspsk.c', + 'tlscredsx509.c', + 'tlssession.c', +)) + +if 'CONFIG_NETTLE' in config_host + crypto_ss.add(files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c')) +elif 'CONFIG_GCRYPT' in config_host + crypto_ss.add(files('hash-gcrypt.c', 'pbkdf-gcrypt.c')) + if 'CONFIG_GCRYPT_HMAC' in config_host + crypto_ss.add(files('hmac-gcrypt.c')) + else + crypto_ss.add(files('hmac-glib.c')) + endif +else + crypto_ss.add(files('hash-glib.c', 'hmac-glib.c', 'pbkdf-stub.c')) +endif + +crypto_ss.add(when: 'CONFIG_SECRET_KEYRING', if_true: files('secret_keyring.c')) +crypto_ss.add(when: 'CONFIG_QEMU_PRIVATE_XTS', if_true: files('xts.c')) +crypto_ss.add(when: 'CONFIG_AF_ALG', if_true: files('afalg.c', 'cipher-afalg.c', 'hash-afalg.c')) +crypto_ss.add(when: 'CONFIG_GNUTLS', if_true: files('tls-cipher-suites.c')) + +if 'CONFIG_NETTLE' in config_host + crypto_ss.add(nettle) +elif 'CONFIG_GCRYPT' in config_host + crypto_ss.add(gcrypt) +endif + +if 'CONFIG_GNUTLS' in config_host + crypto_ss.add(gnutls) +endif + + +util_ss.add(files('aes.c')) +util_ss.add(files('init.c')) + +if 'CONFIG_GCRYPT' in config_host + util_ss.add(files('random-gcrypt.c')) +elif 'CONFIG_GNUTLS' in config_host + util_ss.add(files('random-gnutls.c')) +elif 'CONFIG_RNG_NONE' in config_host + util_ss.add(files('random-none.c')) +else + util_ss.add(files('random-platform.c')) +endif + +if 'CONFIG_GCRYPT' in config_host + util_ss.add(gcrypt) +endif + +if 'CONFIG_GNUTLS' in config_host + util_ss.add(gnutls) +endif diff --git a/crypto/pbkdf-gcrypt.c b/crypto/pbkdf-gcrypt.c new file mode 100644 index 000000000..a8d8e64f4 --- /dev/null +++ b/crypto/pbkdf-gcrypt.c @@ -0,0 +1,86 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "crypto/pbkdf.h" + +bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash) +{ + switch (hash) { + case QCRYPTO_HASH_ALG_MD5: + case QCRYPTO_HASH_ALG_SHA1: + case QCRYPTO_HASH_ALG_SHA224: + case QCRYPTO_HASH_ALG_SHA256: + case QCRYPTO_HASH_ALG_SHA384: + case QCRYPTO_HASH_ALG_SHA512: + case QCRYPTO_HASH_ALG_RIPEMD160: + return true; + default: + return false; + } +} + +int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + uint64_t iterations, + uint8_t *out, size_t nout, + Error **errp) +{ + static const int hash_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GCRY_MD_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GCRY_MD_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GCRY_MD_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GCRY_MD_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160, + }; + int ret; + + if (iterations > ULONG_MAX) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu must be less than %lu", + (long long unsigned)iterations, ULONG_MAX); + return -1; + } + + if (hash >= G_N_ELEMENTS(hash_map) || + hash_map[hash] == GCRY_MD_NONE) { + error_setg_errno(errp, ENOSYS, + "PBKDF does not support hash algorithm %s", + QCryptoHashAlgorithm_str(hash)); + return -1; + } + + ret = gcry_kdf_derive(key, nkey, GCRY_KDF_PBKDF2, + hash_map[hash], + salt, nsalt, iterations, + nout, out); + if (ret != 0) { + error_setg(errp, "Cannot derive password: %s", + gcry_strerror(ret)); + return -1; + } + + return 0; +} diff --git a/crypto/pbkdf-nettle.c b/crypto/pbkdf-nettle.c new file mode 100644 index 000000000..d6293c25a --- /dev/null +++ b/crypto/pbkdf-nettle.c @@ -0,0 +1,117 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "crypto/pbkdf.h" + + +bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash) +{ + switch (hash) { + case QCRYPTO_HASH_ALG_SHA1: + case QCRYPTO_HASH_ALG_SHA224: + case QCRYPTO_HASH_ALG_SHA256: + case QCRYPTO_HASH_ALG_SHA384: + case QCRYPTO_HASH_ALG_SHA512: + case QCRYPTO_HASH_ALG_RIPEMD160: + return true; + default: + return false; + } +} + +int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + uint64_t iterations, + uint8_t *out, size_t nout, + Error **errp) +{ + union { + struct hmac_md5_ctx md5; + struct hmac_sha1_ctx sha1; + struct hmac_sha224_ctx sha224; + struct hmac_sha256_ctx sha256; + struct hmac_sha384_ctx sha384; + struct hmac_sha512_ctx sha512; + struct hmac_ripemd160_ctx ripemd160; + } ctx; + + if (iterations > UINT_MAX) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu must be less than %u", + (long long unsigned)iterations, UINT_MAX); + return -1; + } + + switch (hash) { + case QCRYPTO_HASH_ALG_MD5: + hmac_md5_set_key(&ctx.md5, nkey, key); + PBKDF2(&ctx.md5, hmac_md5_update, hmac_md5_digest, + MD5_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_SHA1: + hmac_sha1_set_key(&ctx.sha1, nkey, key); + PBKDF2(&ctx.sha1, hmac_sha1_update, hmac_sha1_digest, + SHA1_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_SHA224: + hmac_sha224_set_key(&ctx.sha224, nkey, key); + PBKDF2(&ctx.sha224, hmac_sha224_update, hmac_sha224_digest, + SHA224_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_SHA256: + hmac_sha256_set_key(&ctx.sha256, nkey, key); + PBKDF2(&ctx.sha256, hmac_sha256_update, hmac_sha256_digest, + SHA256_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_SHA384: + hmac_sha384_set_key(&ctx.sha384, nkey, key); + PBKDF2(&ctx.sha384, hmac_sha384_update, hmac_sha384_digest, + SHA384_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_SHA512: + hmac_sha512_set_key(&ctx.sha512, nkey, key); + PBKDF2(&ctx.sha512, hmac_sha512_update, hmac_sha512_digest, + SHA512_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + case QCRYPTO_HASH_ALG_RIPEMD160: + hmac_ripemd160_set_key(&ctx.ripemd160, nkey, key); + PBKDF2(&ctx.ripemd160, hmac_ripemd160_update, hmac_ripemd160_digest, + RIPEMD160_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; + + default: + error_setg_errno(errp, ENOSYS, + "PBKDF does not support hash algorithm %s", + QCryptoHashAlgorithm_str(hash)); + return -1; + } + return 0; +} diff --git a/crypto/pbkdf-stub.c b/crypto/pbkdf-stub.c new file mode 100644 index 000000000..9c4622e42 --- /dev/null +++ b/crypto/pbkdf-stub.c @@ -0,0 +1,43 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/pbkdf.h" + +bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash G_GNUC_UNUSED) +{ + return false; +} + +int qcrypto_pbkdf2(QCryptoHashAlgorithm hash G_GNUC_UNUSED, + const uint8_t *key G_GNUC_UNUSED, + size_t nkey G_GNUC_UNUSED, + const uint8_t *salt G_GNUC_UNUSED, + size_t nsalt G_GNUC_UNUSED, + uint64_t iterations G_GNUC_UNUSED, + uint8_t *out G_GNUC_UNUSED, + size_t nout G_GNUC_UNUSED, + Error **errp) +{ + error_setg_errno(errp, ENOSYS, + "No crypto library supporting PBKDF in this build"); + return -1; +} diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c new file mode 100644 index 000000000..3775ddc6c --- /dev/null +++ b/crypto/pbkdf.c @@ -0,0 +1,110 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "crypto/pbkdf.h" +#ifndef _WIN32 +#include +#endif + + +static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms, + Error **errp) +{ +#ifdef _WIN32 + FILETIME creation_time, exit_time, kernel_time, user_time; + ULARGE_INTEGER thread_time; + + if (!GetThreadTimes(GetCurrentThread(), &creation_time, &exit_time, + &kernel_time, &user_time)) { + error_setg(errp, "Unable to get thread CPU usage"); + return -1; + } + + thread_time.LowPart = user_time.dwLowDateTime; + thread_time.HighPart = user_time.dwHighDateTime; + + /* QuadPart is units of 100ns and we want ms as unit */ + *val_ms = thread_time.QuadPart / 10000ll; + return 0; +#elif defined(RUSAGE_THREAD) + struct rusage ru; + if (getrusage(RUSAGE_THREAD, &ru) < 0) { + error_setg_errno(errp, errno, "Unable to get thread CPU usage"); + return -1; + } + + *val_ms = ((ru.ru_utime.tv_sec * 1000ll) + + (ru.ru_utime.tv_usec / 1000)); + return 0; +#else + *val_ms = 0; + error_setg(errp, "Unable to calculate thread CPU usage on this platform"); + return -1; +#endif +} + +uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + size_t nout, + Error **errp) +{ + uint64_t ret = -1; + g_autofree uint8_t *out = g_new(uint8_t, nout); + uint64_t iterations = (1 << 15); + unsigned long long delta_ms, start_ms, end_ms; + + while (1) { + if (qcrypto_pbkdf2_get_thread_cpu(&start_ms, errp) < 0) { + goto cleanup; + } + if (qcrypto_pbkdf2(hash, + key, nkey, + salt, nsalt, + iterations, + out, nout, + errp) < 0) { + goto cleanup; + } + if (qcrypto_pbkdf2_get_thread_cpu(&end_ms, errp) < 0) { + goto cleanup; + } + + delta_ms = end_ms - start_ms; + + if (delta_ms > 500) { + break; + } else if (delta_ms < 100) { + iterations = iterations * 10; + } else { + iterations = (iterations * 1000 / delta_ms); + } + } + + iterations = iterations * 1000 / delta_ms; + + ret = iterations; + + cleanup: + memset(out, 0, nout); + return ret; +} diff --git a/crypto/random-gcrypt.c b/crypto/random-gcrypt.c new file mode 100644 index 000000000..8306f16b6 --- /dev/null +++ b/crypto/random-gcrypt.c @@ -0,0 +1,35 @@ +/* + * QEMU Crypto random number provider + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" + +#include "crypto/random.h" + +#include + +int qcrypto_random_bytes(void *buf, + size_t buflen, + Error **errp G_GNUC_UNUSED) +{ + gcry_randomize(buf, buflen, GCRY_STRONG_RANDOM); + return 0; +} + +int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; } diff --git a/crypto/random-gnutls.c b/crypto/random-gnutls.c new file mode 100644 index 000000000..96af91aee --- /dev/null +++ b/crypto/random-gnutls.c @@ -0,0 +1,47 @@ +/* + * QEMU Crypto random number provider + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" + +#include "crypto/random.h" +#include "qapi/error.h" + +#include +#include + +int qcrypto_random_bytes(void *buf, + size_t buflen, + Error **errp) +{ + int ret; + + ret = gnutls_rnd(GNUTLS_RND_RANDOM, buf, buflen); + + if (ret < 0) { + error_setg(errp, "Cannot get random bytes: %s", + gnutls_strerror(ret)); + return -1; + } + + return 0; +} + + +int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; } diff --git a/crypto/random-none.c b/crypto/random-none.c new file mode 100644 index 000000000..102f8a4dc --- /dev/null +++ b/crypto/random-none.c @@ -0,0 +1,38 @@ +/* + * QEMU Crypto "none" random number provider + * + * Copyright (c) 2020 Marek Marczykowski-Górecki + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" + +#include "crypto/random.h" +#include "qapi/error.h" + +int qcrypto_random_init(Error **errp) +{ + return 0; +} + +int qcrypto_random_bytes(void *buf, + size_t buflen, + Error **errp) +{ + error_setg(errp, "Random bytes not available with \"none\" rng"); + return -1; +} diff --git a/crypto/random-platform.c b/crypto/random-platform.c new file mode 100644 index 000000000..f92f96987 --- /dev/null +++ b/crypto/random-platform.c @@ -0,0 +1,114 @@ +/* + * QEMU Crypto random number provider + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" + +#include "crypto/random.h" +#include "qapi/error.h" + +#ifdef _WIN32 +#include +static HCRYPTPROV hCryptProv; +#else +# ifdef CONFIG_GETRANDOM +# include +# endif +/* This is -1 for getrandom(), or a file handle for /dev/{u,}random. */ +static int fd; +#endif + +int qcrypto_random_init(Error **errp) +{ +#ifdef _WIN32 + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_SILENT | CRYPT_VERIFYCONTEXT)) { + error_setg_win32(errp, GetLastError(), + "Unable to create cryptographic provider"); + return -1; + } +#else +# ifdef CONFIG_GETRANDOM + if (getrandom(NULL, 0, 0) == 0) { + /* Use getrandom() */ + fd = -1; + return 0; + } + /* Fall through to /dev/urandom case. */ +# endif + fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); + if (fd == -1 && errno == ENOENT) { + fd = open("/dev/random", O_RDONLY | O_CLOEXEC); + } + if (fd < 0) { + error_setg_errno(errp, errno, "No /dev/urandom or /dev/random"); + return -1; + } +#endif + return 0; +} + +int qcrypto_random_bytes(void *buf, + size_t buflen, + Error **errp) +{ +#ifdef _WIN32 + if (!CryptGenRandom(hCryptProv, buflen, buf)) { + error_setg_win32(errp, GetLastError(), + "Unable to read random bytes"); + return -1; + } +#else +# ifdef CONFIG_GETRANDOM + if (likely(fd < 0)) { + while (1) { + ssize_t got = getrandom(buf, buflen, 0); + if (likely(got == buflen)) { + return 0; + } + if (got >= 0) { + buflen -= got; + buf += got; + } else if (errno != EINTR) { + error_setg_errno(errp, errno, "getrandom"); + return -1; + } + } + } + /* Fall through to /dev/urandom case. */ +# endif + while (1) { + ssize_t got = read(fd, buf, buflen); + if (likely(got == buflen)) { + return 0; + } + if (got > 0) { + buflen -= got; + buf += got; + } else if (got == 0) { + error_setg(errp, "Unexpected EOF reading random bytes"); + return -1; + } else if (errno != EINTR) { + error_setg_errno(errp, errno, "Unable to read random bytes"); + return -1; + } + } +#endif + return 0; +} diff --git a/crypto/secret.c b/crypto/secret.c new file mode 100644 index 000000000..281cb81f0 --- /dev/null +++ b/crypto/secret.c @@ -0,0 +1,165 @@ +/* + * QEMU crypto secret support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/secret.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "qemu/module.h" +#include "trace.h" + + +static void +qcrypto_secret_load_data(QCryptoSecretCommon *sec_common, + uint8_t **output, + size_t *outputlen, + Error **errp) +{ + char *data = NULL; + size_t length = 0; + GError *gerr = NULL; + + QCryptoSecret *secret = QCRYPTO_SECRET(sec_common); + + *output = NULL; + *outputlen = 0; + + if (secret->file) { + if (secret->data) { + error_setg(errp, + "'file' and 'data' are mutually exclusive"); + return; + } + if (!g_file_get_contents(secret->file, &data, &length, &gerr)) { + error_setg(errp, + "Unable to read %s: %s", + secret->file, gerr->message); + g_error_free(gerr); + return; + } + *output = (uint8_t *)data; + *outputlen = length; + } else if (secret->data) { + *outputlen = strlen(secret->data); + *output = (uint8_t *)g_strdup(secret->data); + } else { + error_setg(errp, "Either 'file' or 'data' must be provided"); + } +} + + +static void +qcrypto_secret_prop_set_data(Object *obj, + const char *value, + Error **errp) +{ + QCryptoSecret *secret = QCRYPTO_SECRET(obj); + + g_free(secret->data); + secret->data = g_strdup(value); +} + + +static char * +qcrypto_secret_prop_get_data(Object *obj, + Error **errp) +{ + QCryptoSecret *secret = QCRYPTO_SECRET(obj); + return g_strdup(secret->data); +} + + +static void +qcrypto_secret_prop_set_file(Object *obj, + const char *value, + Error **errp) +{ + QCryptoSecret *secret = QCRYPTO_SECRET(obj); + + g_free(secret->file); + secret->file = g_strdup(value); +} + + +static char * +qcrypto_secret_prop_get_file(Object *obj, + Error **errp) +{ + QCryptoSecret *secret = QCRYPTO_SECRET(obj); + return g_strdup(secret->file); +} + + +static void +qcrypto_secret_complete(UserCreatable *uc, Error **errp) +{ + object_property_set_bool(OBJECT(uc), "loaded", true, errp); +} + + +static void +qcrypto_secret_finalize(Object *obj) +{ + QCryptoSecret *secret = QCRYPTO_SECRET(obj); + + g_free(secret->file); + g_free(secret->data); +} + +static void +qcrypto_secret_class_init(ObjectClass *oc, void *data) +{ + QCryptoSecretCommonClass *sic = QCRYPTO_SECRET_COMMON_CLASS(oc); + sic->load_data = qcrypto_secret_load_data; + + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + ucc->complete = qcrypto_secret_complete; + + object_class_property_add_str(oc, "data", + qcrypto_secret_prop_get_data, + qcrypto_secret_prop_set_data); + object_class_property_add_str(oc, "file", + qcrypto_secret_prop_get_file, + qcrypto_secret_prop_set_file); +} + + +static const TypeInfo qcrypto_secret_info = { + .parent = TYPE_QCRYPTO_SECRET_COMMON, + .name = TYPE_QCRYPTO_SECRET, + .instance_size = sizeof(QCryptoSecret), + .instance_finalize = qcrypto_secret_finalize, + .class_size = sizeof(QCryptoSecretClass), + .class_init = qcrypto_secret_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qcrypto_secret_register_types(void) +{ + type_register_static(&qcrypto_secret_info); +} + + +type_init(qcrypto_secret_register_types); diff --git a/crypto/secret_common.c b/crypto/secret_common.c new file mode 100644 index 000000000..b03d53086 --- /dev/null +++ b/crypto/secret_common.c @@ -0,0 +1,403 @@ +/* + * QEMU crypto secret support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/secret_common.h" +#include "crypto/cipher.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "qemu/base64.h" +#include "qemu/module.h" +#include "trace.h" + + +static void qcrypto_secret_decrypt(QCryptoSecretCommon *secret, + const uint8_t *input, + size_t inputlen, + uint8_t **output, + size_t *outputlen, + Error **errp) +{ + g_autofree uint8_t *iv = NULL; + g_autofree uint8_t *key = NULL; + g_autofree uint8_t *ciphertext = NULL; + size_t keylen, ciphertextlen, ivlen; + g_autoptr(QCryptoCipher) aes = NULL; + g_autofree uint8_t *plaintext = NULL; + + *output = NULL; + *outputlen = 0; + + if (qcrypto_secret_lookup(secret->keyid, + &key, &keylen, + errp) < 0) { + return; + } + + if (keylen != 32) { + error_setg(errp, "Key should be 32 bytes in length"); + return; + } + + if (!secret->iv) { + error_setg(errp, "IV is required to decrypt secret"); + return; + } + + iv = qbase64_decode(secret->iv, -1, &ivlen, errp); + if (!iv) { + return; + } + if (ivlen != 16) { + error_setg(errp, "IV should be 16 bytes in length not %zu", + ivlen); + return; + } + + aes = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_256, + QCRYPTO_CIPHER_MODE_CBC, + key, keylen, + errp); + if (!aes) { + return; + } + + if (qcrypto_cipher_setiv(aes, iv, ivlen, errp) < 0) { + return; + } + + if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) { + ciphertext = qbase64_decode((const gchar *)input, + inputlen, + &ciphertextlen, + errp); + if (!ciphertext) { + return; + } + plaintext = g_new0(uint8_t, ciphertextlen + 1); + } else { + ciphertextlen = inputlen; + plaintext = g_new0(uint8_t, inputlen + 1); + } + if (qcrypto_cipher_decrypt(aes, + ciphertext ? ciphertext : input, + plaintext, + ciphertextlen, + errp) < 0) { + return; + } + + if (plaintext[ciphertextlen - 1] > 16 || + plaintext[ciphertextlen - 1] > ciphertextlen) { + error_setg(errp, "Incorrect number of padding bytes (%d) " + "found on decrypted data", + (int)plaintext[ciphertextlen - 1]); + return; + } + + /* + * Even though plaintext may contain arbitrary NUL + * ensure it is explicitly NUL terminated. + */ + ciphertextlen -= plaintext[ciphertextlen - 1]; + plaintext[ciphertextlen] = '\0'; + + *output = g_steal_pointer(&plaintext); + *outputlen = ciphertextlen; +} + + +static void qcrypto_secret_decode(const uint8_t *input, + size_t inputlen, + uint8_t **output, + size_t *outputlen, + Error **errp) +{ + *output = qbase64_decode((const gchar *)input, + inputlen, + outputlen, + errp); +} + + +static void +qcrypto_secret_prop_set_loaded(Object *obj, + bool value, + Error **errp) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + QCryptoSecretCommonClass *sec_class + = QCRYPTO_SECRET_COMMON_GET_CLASS(obj); + + if (value) { + Error *local_err = NULL; + uint8_t *input = NULL; + size_t inputlen = 0; + uint8_t *output = NULL; + size_t outputlen = 0; + + if (sec_class->load_data) { + sec_class->load_data(secret, &input, &inputlen, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } else { + error_setg(errp, "%s provides no 'load_data' method'", + object_get_typename(obj)); + return; + } + + if (secret->keyid) { + qcrypto_secret_decrypt(secret, input, inputlen, + &output, &outputlen, &local_err); + g_free(input); + if (local_err) { + error_propagate(errp, local_err); + return; + } + input = output; + inputlen = outputlen; + } else { + if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) { + qcrypto_secret_decode(input, inputlen, + &output, &outputlen, &local_err); + g_free(input); + if (local_err) { + error_propagate(errp, local_err); + return; + } + input = output; + inputlen = outputlen; + } + } + + secret->rawdata = input; + secret->rawlen = inputlen; + } else { + g_free(secret->rawdata); + secret->rawlen = 0; + } +} + + +static bool +qcrypto_secret_prop_get_loaded(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + return secret->rawdata != NULL; +} + + +static void +qcrypto_secret_prop_set_format(Object *obj, + int value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoSecretCommon *creds = QCRYPTO_SECRET_COMMON(obj); + creds->format = value; +} + + +static int +qcrypto_secret_prop_get_format(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoSecretCommon *creds = QCRYPTO_SECRET_COMMON(obj); + return creds->format; +} + + +static void +qcrypto_secret_prop_set_iv(Object *obj, + const char *value, + Error **errp) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + + g_free(secret->iv); + secret->iv = g_strdup(value); +} + + +static char * +qcrypto_secret_prop_get_iv(Object *obj, + Error **errp) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + return g_strdup(secret->iv); +} + + +static void +qcrypto_secret_prop_set_keyid(Object *obj, + const char *value, + Error **errp) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + + g_free(secret->keyid); + secret->keyid = g_strdup(value); +} + + +static char * +qcrypto_secret_prop_get_keyid(Object *obj, + Error **errp) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + return g_strdup(secret->keyid); +} + + +static void +qcrypto_secret_finalize(Object *obj) +{ + QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj); + + g_free(secret->iv); + g_free(secret->keyid); + g_free(secret->rawdata); +} + +static void +qcrypto_secret_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_bool(oc, "loaded", + qcrypto_secret_prop_get_loaded, + qcrypto_secret_prop_set_loaded); + object_class_property_add_enum(oc, "format", + "QCryptoSecretFormat", + &QCryptoSecretFormat_lookup, + qcrypto_secret_prop_get_format, + qcrypto_secret_prop_set_format); + object_class_property_add_str(oc, "keyid", + qcrypto_secret_prop_get_keyid, + qcrypto_secret_prop_set_keyid); + object_class_property_add_str(oc, "iv", + qcrypto_secret_prop_get_iv, + qcrypto_secret_prop_set_iv); +} + + +int qcrypto_secret_lookup(const char *secretid, + uint8_t **data, + size_t *datalen, + Error **errp) +{ + Object *obj; + QCryptoSecretCommon *secret; + + obj = object_resolve_path_component( + object_get_objects_root(), secretid); + if (!obj) { + error_setg(errp, "No secret with id '%s'", secretid); + return -1; + } + + secret = (QCryptoSecretCommon *) + object_dynamic_cast(obj, + TYPE_QCRYPTO_SECRET_COMMON); + if (!secret) { + error_setg(errp, "Object with id '%s' is not a secret", + secretid); + return -1; + } + + if (!secret->rawdata) { + error_setg(errp, "Secret with id '%s' has no data", + secretid); + return -1; + } + + *data = g_new0(uint8_t, secret->rawlen + 1); + memcpy(*data, secret->rawdata, secret->rawlen); + (*data)[secret->rawlen] = '\0'; + *datalen = secret->rawlen; + + return 0; +} + + +char *qcrypto_secret_lookup_as_utf8(const char *secretid, + Error **errp) +{ + uint8_t *data; + size_t datalen; + + if (qcrypto_secret_lookup(secretid, + &data, + &datalen, + errp) < 0) { + return NULL; + } + + if (!g_utf8_validate((const gchar *)data, datalen, NULL)) { + error_setg(errp, + "Data from secret %s is not valid UTF-8", + secretid); + g_free(data); + return NULL; + } + + return (char *)data; +} + + +char *qcrypto_secret_lookup_as_base64(const char *secretid, + Error **errp) +{ + uint8_t *data; + size_t datalen; + char *ret; + + if (qcrypto_secret_lookup(secretid, + &data, + &datalen, + errp) < 0) { + return NULL; + } + + ret = g_base64_encode(data, datalen); + g_free(data); + return ret; +} + + +static const TypeInfo qcrypto_secret_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QCRYPTO_SECRET_COMMON, + .instance_size = sizeof(QCryptoSecretCommon), + .instance_finalize = qcrypto_secret_finalize, + .class_size = sizeof(QCryptoSecretCommonClass), + .class_init = qcrypto_secret_class_init, + .abstract = true, +}; + + +static void +qcrypto_secret_register_types(void) +{ + type_register_static(&qcrypto_secret_info); +} + + +type_init(qcrypto_secret_register_types); diff --git a/crypto/secret_keyring.c b/crypto/secret_keyring.c new file mode 100644 index 000000000..10d8bc48a --- /dev/null +++ b/crypto/secret_keyring.c @@ -0,0 +1,147 @@ +/* + * QEMU crypto secret support + * + * Copyright 2020 Yandex N.V. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "trace.h" +#include "crypto/secret_keyring.h" + + +static inline +long keyctl_read(int32_t key, uint8_t *buffer, size_t buflen) +{ + return syscall(__NR_keyctl, KEYCTL_READ, key, buffer, buflen, 0); +} + + +static void +qcrypto_secret_keyring_load_data(QCryptoSecretCommon *sec_common, + uint8_t **output, + size_t *outputlen, + Error **errp) +{ + QCryptoSecretKeyring *secret = QCRYPTO_SECRET_KEYRING(sec_common); + uint8_t *buffer = NULL; + long retcode; + + *output = NULL; + *outputlen = 0; + + if (!secret->serial) { + error_setg(errp, "'serial' parameter must be provided"); + return; + } + + retcode = keyctl_read(secret->serial, NULL, 0); + if (retcode <= 0) { + goto keyctl_error; + } + + buffer = g_new0(uint8_t, retcode); + + retcode = keyctl_read(secret->serial, buffer, retcode); + if (retcode < 0) { + g_free(buffer); + goto keyctl_error; + } + + *outputlen = retcode; + *output = buffer; + return; + +keyctl_error: + error_setg_errno(errp, errno, + "Unable to read serial key %08x", + secret->serial); +} + + +static void +qcrypto_secret_prop_set_key(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + QCryptoSecretKeyring *secret = QCRYPTO_SECRET_KEYRING(obj); + int32_t value; + visit_type_int32(v, name, &value, errp); + if (!value) { + error_setg(errp, "'serial' should not be equal to 0"); + } + secret->serial = value; +} + + +static void +qcrypto_secret_prop_get_key(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + QCryptoSecretKeyring *secret = QCRYPTO_SECRET_KEYRING(obj); + int32_t value = secret->serial; + visit_type_int32(v, name, &value, errp); +} + + +static void +qcrypto_secret_keyring_complete(UserCreatable *uc, Error **errp) +{ + object_property_set_bool(OBJECT(uc), "loaded", true, errp); +} + + +static void +qcrypto_secret_keyring_class_init(ObjectClass *oc, void *data) +{ + QCryptoSecretCommonClass *sic = QCRYPTO_SECRET_COMMON_CLASS(oc); + sic->load_data = qcrypto_secret_keyring_load_data; + + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + ucc->complete = qcrypto_secret_keyring_complete; + + object_class_property_add(oc, "serial", "int32_t", + qcrypto_secret_prop_get_key, + qcrypto_secret_prop_set_key, + NULL, NULL); +} + + +static const TypeInfo qcrypto_secret_info = { + .parent = TYPE_QCRYPTO_SECRET_COMMON, + .name = TYPE_QCRYPTO_SECRET_KEYRING, + .instance_size = sizeof(QCryptoSecretKeyring), + .class_init = qcrypto_secret_keyring_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qcrypto_secret_register_types(void) +{ + type_register_static(&qcrypto_secret_info); +} + + +type_init(qcrypto_secret_register_types); diff --git a/crypto/tls-cipher-suites.c b/crypto/tls-cipher-suites.c new file mode 100644 index 000000000..55fb5f7c1 --- /dev/null +++ b/crypto/tls-cipher-suites.c @@ -0,0 +1,126 @@ +/* + * QEMU TLS Cipher Suites + * + * Copyright (c) 2018-2020 Red Hat, Inc. + * + * Author: Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "crypto/tlscreds.h" +#include "crypto/tls-cipher-suites.h" +#include "hw/nvram/fw_cfg.h" +#include "trace.h" + +/* + * IANA registered TLS ciphers: + * https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-4 + */ +typedef struct { + uint8_t data[2]; +} QEMU_PACKED IANA_TLS_CIPHER; + +GByteArray *qcrypto_tls_cipher_suites_get_data(QCryptoTLSCipherSuites *obj, + Error **errp) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + gnutls_priority_t pcache; + GByteArray *byte_array; + const char *err; + size_t i; + int ret; + + trace_qcrypto_tls_cipher_suite_priority(creds->priority); + ret = gnutls_priority_init(&pcache, creds->priority, &err); + if (ret < 0) { + error_setg(errp, "Syntax error using priority '%s': %s", + creds->priority, gnutls_strerror(ret)); + return NULL; + } + + byte_array = g_byte_array_new(); + + for (i = 0;; i++) { + int ret; + unsigned idx; + const char *name; + IANA_TLS_CIPHER cipher; + gnutls_protocol_t protocol; + const char *version; + + ret = gnutls_priority_get_cipher_suite_index(pcache, i, &idx); + if (ret == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) { + break; + } + if (ret == GNUTLS_E_UNKNOWN_CIPHER_SUITE) { + continue; + } + + name = gnutls_cipher_suite_info(idx, (unsigned char *)&cipher, + NULL, NULL, NULL, &protocol); + if (name == NULL) { + continue; + } + + version = gnutls_protocol_get_name(protocol); + g_byte_array_append(byte_array, cipher.data, 2); + trace_qcrypto_tls_cipher_suite_info(cipher.data[0], + cipher.data[1], + version, name); + } + trace_qcrypto_tls_cipher_suite_count(byte_array->len); + gnutls_priority_deinit(pcache); + + return byte_array; +} + +static void qcrypto_tls_cipher_suites_complete(UserCreatable *uc, + Error **errp) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(uc); + + if (!creds->priority) { + error_setg(errp, "'priority' property is not set"); + return; + } +} + +static GByteArray *qcrypto_tls_cipher_suites_fw_cfg_gen_data(Object *obj, + Error **errp) +{ + return qcrypto_tls_cipher_suites_get_data(QCRYPTO_TLS_CIPHER_SUITES(obj), + errp); +} + +static void qcrypto_tls_cipher_suites_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + FWCfgDataGeneratorClass *fwgc = FW_CFG_DATA_GENERATOR_CLASS(oc); + + ucc->complete = qcrypto_tls_cipher_suites_complete; + fwgc->get_data = qcrypto_tls_cipher_suites_fw_cfg_gen_data; +} + +static const TypeInfo qcrypto_tls_cipher_suites_info = { + .parent = TYPE_QCRYPTO_TLS_CREDS, + .name = TYPE_QCRYPTO_TLS_CIPHER_SUITES, + .instance_size = sizeof(QCryptoTLSCipherSuites), + .class_size = sizeof(QCryptoTLSCredsClass), + .class_init = qcrypto_tls_cipher_suites_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { TYPE_FW_CFG_DATA_GENERATOR_INTERFACE }, + { } + } +}; + +static void qcrypto_tls_cipher_suites_register_types(void) +{ + type_register_static(&qcrypto_tls_cipher_suites_info); +} + +type_init(qcrypto_tls_cipher_suites_register_types); diff --git a/crypto/tlscreds.c b/crypto/tlscreds.c new file mode 100644 index 000000000..b68735f06 --- /dev/null +++ b/crypto/tlscreds.c @@ -0,0 +1,282 @@ +/* + * QEMU crypto TLS credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "tlscredspriv.h" +#include "trace.h" + +#define DH_BITS 2048 + +#ifdef CONFIG_GNUTLS +int +qcrypto_tls_creds_get_dh_params_file(QCryptoTLSCreds *creds, + const char *filename, + gnutls_dh_params_t *dh_params, + Error **errp) +{ + int ret; + + trace_qcrypto_tls_creds_load_dh(creds, filename ? filename : ""); + + if (filename == NULL) { + ret = gnutls_dh_params_init(dh_params); + if (ret < 0) { + error_setg(errp, "Unable to initialize DH parameters: %s", + gnutls_strerror(ret)); + return -1; + } + ret = gnutls_dh_params_generate2(*dh_params, DH_BITS); + if (ret < 0) { + gnutls_dh_params_deinit(*dh_params); + *dh_params = NULL; + error_setg(errp, "Unable to generate DH parameters: %s", + gnutls_strerror(ret)); + return -1; + } + } else { + GError *gerr = NULL; + gchar *contents; + gsize len; + gnutls_datum_t data; + if (!g_file_get_contents(filename, + &contents, + &len, + &gerr)) { + + error_setg(errp, "%s", gerr->message); + g_error_free(gerr); + return -1; + } + data.data = (unsigned char *)contents; + data.size = len; + ret = gnutls_dh_params_init(dh_params); + if (ret < 0) { + g_free(contents); + error_setg(errp, "Unable to initialize DH parameters: %s", + gnutls_strerror(ret)); + return -1; + } + ret = gnutls_dh_params_import_pkcs3(*dh_params, + &data, + GNUTLS_X509_FMT_PEM); + g_free(contents); + if (ret < 0) { + gnutls_dh_params_deinit(*dh_params); + *dh_params = NULL; + error_setg(errp, "Unable to load DH parameters from %s: %s", + filename, gnutls_strerror(ret)); + return -1; + } + } + + return 0; +} + + +int +qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds, + const char *filename, + bool required, + char **cred, + Error **errp) +{ + struct stat sb; + int ret = -1; + + if (!creds->dir) { + if (required) { + error_setg(errp, "Missing 'dir' property value"); + return -1; + } else { + return 0; + } + } + + *cred = g_strdup_printf("%s/%s", creds->dir, filename); + + if (stat(*cred, &sb) < 0) { + if (errno == ENOENT && !required) { + ret = 0; + } else { + error_setg_errno(errp, errno, + "Unable to access credentials %s", + *cred); + } + g_free(*cred); + *cred = NULL; + goto cleanup; + } + + ret = 0; + cleanup: + trace_qcrypto_tls_creds_get_path(creds, filename, + *cred ? *cred : ""); + return ret; +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_prop_set_verify(Object *obj, + bool value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + creds->verifyPeer = value; +} + + +static bool +qcrypto_tls_creds_prop_get_verify(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + return creds->verifyPeer; +} + + +static void +qcrypto_tls_creds_prop_set_dir(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + creds->dir = g_strdup(value); +} + + +static char * +qcrypto_tls_creds_prop_get_dir(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + return g_strdup(creds->dir); +} + + +static void +qcrypto_tls_creds_prop_set_priority(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + creds->priority = g_strdup(value); +} + + +static char * +qcrypto_tls_creds_prop_get_priority(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + return g_strdup(creds->priority); +} + + +static void +qcrypto_tls_creds_prop_set_endpoint(Object *obj, + int value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + creds->endpoint = value; +} + + +static int +qcrypto_tls_creds_prop_get_endpoint(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + return creds->endpoint; +} + + +static void +qcrypto_tls_creds_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_bool(oc, "verify-peer", + qcrypto_tls_creds_prop_get_verify, + qcrypto_tls_creds_prop_set_verify); + object_class_property_add_str(oc, "dir", + qcrypto_tls_creds_prop_get_dir, + qcrypto_tls_creds_prop_set_dir); + object_class_property_add_enum(oc, "endpoint", + "QCryptoTLSCredsEndpoint", + &QCryptoTLSCredsEndpoint_lookup, + qcrypto_tls_creds_prop_get_endpoint, + qcrypto_tls_creds_prop_set_endpoint); + object_class_property_add_str(oc, "priority", + qcrypto_tls_creds_prop_get_priority, + qcrypto_tls_creds_prop_set_priority); +} + + +static void +qcrypto_tls_creds_init(Object *obj) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + creds->verifyPeer = true; +} + + +static void +qcrypto_tls_creds_finalize(Object *obj) +{ + QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj); + + g_free(creds->dir); + g_free(creds->priority); +} + + +static const TypeInfo qcrypto_tls_creds_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QCRYPTO_TLS_CREDS, + .instance_size = sizeof(QCryptoTLSCreds), + .instance_init = qcrypto_tls_creds_init, + .instance_finalize = qcrypto_tls_creds_finalize, + .class_init = qcrypto_tls_creds_class_init, + .class_size = sizeof(QCryptoTLSCredsClass), + .abstract = true, +}; + + +static void +qcrypto_tls_creds_register_types(void) +{ + type_register_static(&qcrypto_tls_creds_info); +} + + +type_init(qcrypto_tls_creds_register_types); diff --git a/crypto/tlscredsanon.c b/crypto/tlscredsanon.c new file mode 100644 index 000000000..30275b684 --- /dev/null +++ b/crypto/tlscredsanon.c @@ -0,0 +1,215 @@ +/* + * QEMU crypto TLS anonymous credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/tlscredsanon.h" +#include "tlscredspriv.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" +#include "trace.h" + + +#ifdef CONFIG_GNUTLS + + +static int +qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds, + Error **errp) +{ + g_autofree char *dhparams = NULL; + int ret; + + trace_qcrypto_tls_creds_anon_load(creds, + creds->parent_obj.dir ? creds->parent_obj.dir : ""); + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + if (qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_DH_PARAMS, + false, &dhparams, errp) < 0) { + return -1; + } + + ret = gnutls_anon_allocate_server_credentials(&creds->data.server); + if (ret < 0) { + error_setg(errp, "Cannot allocate credentials: %s", + gnutls_strerror(ret)); + return -1; + } + + if (qcrypto_tls_creds_get_dh_params_file(&creds->parent_obj, dhparams, + &creds->parent_obj.dh_params, + errp) < 0) { + return -1; + } + + gnutls_anon_set_server_dh_params(creds->data.server, + creds->parent_obj.dh_params); + } else { + ret = gnutls_anon_allocate_client_credentials(&creds->data.client); + if (ret < 0) { + error_setg(errp, "Cannot allocate credentials: %s", + gnutls_strerror(ret)); + return -1; + } + } + + return 0; +} + + +static void +qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds) +{ + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + if (creds->data.client) { + gnutls_anon_free_client_credentials(creds->data.client); + creds->data.client = NULL; + } + } else { + if (creds->data.server) { + gnutls_anon_free_server_credentials(creds->data.server); + creds->data.server = NULL; + } + } + if (creds->parent_obj.dh_params) { + gnutls_dh_params_deinit(creds->parent_obj.dh_params); + creds->parent_obj.dh_params = NULL; + } +} + +#else /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED, + Error **errp) +{ + error_setg(errp, "TLS credentials support requires GNUTLS"); +} + + +static void +qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED) +{ + /* nada */ +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_anon_prop_set_loaded(Object *obj, + bool value, + Error **errp) +{ + QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj); + + if (value) { + qcrypto_tls_creds_anon_load(creds, errp); + } else { + qcrypto_tls_creds_anon_unload(creds); + } +} + + +#ifdef CONFIG_GNUTLS + + +static bool +qcrypto_tls_creds_anon_prop_get_loaded(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj); + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + return creds->data.server != NULL; + } else { + return creds->data.client != NULL; + } +} + + +#else /* ! CONFIG_GNUTLS */ + + +static bool +qcrypto_tls_creds_anon_prop_get_loaded(Object *obj G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return false; +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp) +{ + object_property_set_bool(OBJECT(uc), "loaded", true, errp); +} + + +static void +qcrypto_tls_creds_anon_finalize(Object *obj) +{ + QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj); + + qcrypto_tls_creds_anon_unload(creds); +} + + +static void +qcrypto_tls_creds_anon_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = qcrypto_tls_creds_anon_complete; + + object_class_property_add_bool(oc, "loaded", + qcrypto_tls_creds_anon_prop_get_loaded, + qcrypto_tls_creds_anon_prop_set_loaded); +} + + +static const TypeInfo qcrypto_tls_creds_anon_info = { + .parent = TYPE_QCRYPTO_TLS_CREDS, + .name = TYPE_QCRYPTO_TLS_CREDS_ANON, + .instance_size = sizeof(QCryptoTLSCredsAnon), + .instance_finalize = qcrypto_tls_creds_anon_finalize, + .class_size = sizeof(QCryptoTLSCredsAnonClass), + .class_init = qcrypto_tls_creds_anon_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qcrypto_tls_creds_anon_register_types(void) +{ + type_register_static(&qcrypto_tls_creds_anon_info); +} + + +type_init(qcrypto_tls_creds_anon_register_types); diff --git a/crypto/tlscredspriv.h b/crypto/tlscredspriv.h new file mode 100644 index 000000000..39f1a91c4 --- /dev/null +++ b/crypto/tlscredspriv.h @@ -0,0 +1,41 @@ +/* + * QEMU crypto TLS credential support private helpers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSCREDSPRIV_H +#define QCRYPTO_TLSCREDSPRIV_H + +#include "crypto/tlscreds.h" + +#ifdef CONFIG_GNUTLS + +int qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds, + const char *filename, + bool required, + char **cred, + Error **errp); + +int qcrypto_tls_creds_get_dh_params_file(QCryptoTLSCreds *creds, + const char *filename, + gnutls_dh_params_t *dh_params, + Error **errp); + +#endif + +#endif /* QCRYPTO_TLSCREDSPRIV_H */ diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c new file mode 100644 index 000000000..e26807b89 --- /dev/null +++ b/crypto/tlscredspsk.c @@ -0,0 +1,306 @@ +/* + * QEMU crypto TLS Pre-Shared Keys (PSK) support + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/tlscredspsk.h" +#include "tlscredspriv.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" +#include "trace.h" + + +#ifdef CONFIG_GNUTLS + +static int +lookup_key(const char *pskfile, const char *username, gnutls_datum_t *key, + Error **errp) +{ + const size_t ulen = strlen(username); + GError *gerr = NULL; + char *content = NULL; + char **lines = NULL; + size_t clen = 0, i; + int ret = -1; + + if (!g_file_get_contents(pskfile, &content, &clen, &gerr)) { + error_setg(errp, "Cannot read PSK file %s: %s", + pskfile, gerr->message); + g_error_free(gerr); + return -1; + } + + lines = g_strsplit(content, "\n", -1); + for (i = 0; lines[i] != NULL; ++i) { + if (strncmp(lines[i], username, ulen) == 0 && lines[i][ulen] == ':') { + key->data = (unsigned char *) g_strdup(&lines[i][ulen + 1]); + key->size = strlen(lines[i]) - ulen - 1; + ret = 0; + goto out; + } + } + error_setg(errp, "Username %s not found in PSK file %s", + username, pskfile); + + out: + free(content); + g_strfreev(lines); + return ret; +} + +static int +qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds, + Error **errp) +{ + g_autofree char *pskfile = NULL; + g_autofree char *dhparams = NULL; + const char *username; + int ret; + int rv = -1; + gnutls_datum_t key = { .data = NULL }; + + trace_qcrypto_tls_creds_psk_load(creds, + creds->parent_obj.dir ? creds->parent_obj.dir : ""); + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + if (creds->username) { + error_setg(errp, "username should not be set when endpoint=server"); + goto cleanup; + } + + if (qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_DH_PARAMS, + false, &dhparams, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_PSKFILE, + true, &pskfile, errp) < 0) { + goto cleanup; + } + + ret = gnutls_psk_allocate_server_credentials(&creds->data.server); + if (ret < 0) { + error_setg(errp, "Cannot allocate credentials: %s", + gnutls_strerror(ret)); + goto cleanup; + } + + if (qcrypto_tls_creds_get_dh_params_file(&creds->parent_obj, dhparams, + &creds->parent_obj.dh_params, + errp) < 0) { + goto cleanup; + } + + gnutls_psk_set_server_credentials_file(creds->data.server, pskfile); + gnutls_psk_set_server_dh_params(creds->data.server, + creds->parent_obj.dh_params); + } else { + if (qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_PSKFILE, + true, &pskfile, errp) < 0) { + goto cleanup; + } + + if (creds->username) { + username = creds->username; + } else { + username = "qemu"; + } + if (lookup_key(pskfile, username, &key, errp) != 0) { + goto cleanup; + } + + ret = gnutls_psk_allocate_client_credentials(&creds->data.client); + if (ret < 0) { + error_setg(errp, "Cannot allocate credentials: %s", + gnutls_strerror(ret)); + goto cleanup; + } + + gnutls_psk_set_client_credentials(creds->data.client, + username, &key, GNUTLS_PSK_KEY_HEX); + } + + rv = 0; + cleanup: + g_free(key.data); + return rv; +} + + +static void +qcrypto_tls_creds_psk_unload(QCryptoTLSCredsPSK *creds) +{ + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + if (creds->data.client) { + gnutls_psk_free_client_credentials(creds->data.client); + creds->data.client = NULL; + } + } else { + if (creds->data.server) { + gnutls_psk_free_server_credentials(creds->data.server); + creds->data.server = NULL; + } + } + if (creds->parent_obj.dh_params) { + gnutls_dh_params_deinit(creds->parent_obj.dh_params); + creds->parent_obj.dh_params = NULL; + } +} + +#else /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds G_GNUC_UNUSED, + Error **errp) +{ + error_setg(errp, "TLS credentials support requires GNUTLS"); +} + + +static void +qcrypto_tls_creds_psk_unload(QCryptoTLSCredsPSK *creds G_GNUC_UNUSED) +{ + /* nada */ +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_psk_prop_set_loaded(Object *obj, + bool value, + Error **errp) +{ + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + if (value) { + qcrypto_tls_creds_psk_load(creds, errp); + } else { + qcrypto_tls_creds_psk_unload(creds); + } +} + + +#ifdef CONFIG_GNUTLS + + +static bool +qcrypto_tls_creds_psk_prop_get_loaded(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + return creds->data.server != NULL; + } else { + return creds->data.client != NULL; + } +} + + +#else /* ! CONFIG_GNUTLS */ + + +static bool +qcrypto_tls_creds_psk_prop_get_loaded(Object *obj G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return false; +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_psk_complete(UserCreatable *uc, Error **errp) +{ + object_property_set_bool(OBJECT(uc), "loaded", true, errp); +} + + +static void +qcrypto_tls_creds_psk_finalize(Object *obj) +{ + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + qcrypto_tls_creds_psk_unload(creds); +} + +static void +qcrypto_tls_creds_psk_prop_set_username(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + creds->username = g_strdup(value); +} + + +static char * +qcrypto_tls_creds_psk_prop_get_username(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + return g_strdup(creds->username); +} + +static void +qcrypto_tls_creds_psk_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = qcrypto_tls_creds_psk_complete; + + object_class_property_add_bool(oc, "loaded", + qcrypto_tls_creds_psk_prop_get_loaded, + qcrypto_tls_creds_psk_prop_set_loaded); + object_class_property_add_str(oc, "username", + qcrypto_tls_creds_psk_prop_get_username, + qcrypto_tls_creds_psk_prop_set_username); +} + + +static const TypeInfo qcrypto_tls_creds_psk_info = { + .parent = TYPE_QCRYPTO_TLS_CREDS, + .name = TYPE_QCRYPTO_TLS_CREDS_PSK, + .instance_size = sizeof(QCryptoTLSCredsPSK), + .instance_finalize = qcrypto_tls_creds_psk_finalize, + .class_size = sizeof(QCryptoTLSCredsPSKClass), + .class_init = qcrypto_tls_creds_psk_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qcrypto_tls_creds_psk_register_types(void) +{ + type_register_static(&qcrypto_tls_creds_psk_info); +} + + +type_init(qcrypto_tls_creds_psk_register_types); diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c new file mode 100644 index 000000000..dd7267ccd --- /dev/null +++ b/crypto/tlscredsx509.c @@ -0,0 +1,841 @@ +/* + * QEMU crypto TLS x509 credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/tlscredsx509.h" +#include "tlscredspriv.h" +#include "crypto/secret.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" +#include "trace.h" + + +#ifdef CONFIG_GNUTLS + +#include + + +static int +qcrypto_tls_creds_check_cert_times(gnutls_x509_crt_t cert, + const char *certFile, + bool isServer, + bool isCA, + Error **errp) +{ + time_t now = time(NULL); + + if (now == ((time_t)-1)) { + error_setg_errno(errp, errno, "cannot get current time"); + return -1; + } + + if (gnutls_x509_crt_get_expiration_time(cert) < now) { + error_setg(errp, + (isCA ? + "The CA certificate %s has expired" : + (isServer ? + "The server certificate %s has expired" : + "The client certificate %s has expired")), + certFile); + return -1; + } + + if (gnutls_x509_crt_get_activation_time(cert) > now) { + error_setg(errp, + (isCA ? + "The CA certificate %s is not yet active" : + (isServer ? + "The server certificate %s is not yet active" : + "The client certificate %s is not yet active")), + certFile); + return -1; + } + + return 0; +} + + +static int +qcrypto_tls_creds_check_cert_basic_constraints(QCryptoTLSCredsX509 *creds, + gnutls_x509_crt_t cert, + const char *certFile, + bool isServer, + bool isCA, + Error **errp) +{ + int status; + + status = gnutls_x509_crt_get_basic_constraints(cert, NULL, NULL, NULL); + trace_qcrypto_tls_creds_x509_check_basic_constraints( + creds, certFile, status); + + if (status > 0) { /* It is a CA cert */ + if (!isCA) { + error_setg(errp, isServer ? + "The certificate %s basic constraints show a CA, " + "but we need one for a server" : + "The certificate %s basic constraints show a CA, " + "but we need one for a client", + certFile); + return -1; + } + } else if (status == 0) { /* It is not a CA cert */ + if (isCA) { + error_setg(errp, + "The certificate %s basic constraints do not " + "show a CA", + certFile); + return -1; + } + } else if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) { + /* Missing basicConstraints */ + if (isCA) { + error_setg(errp, + "The certificate %s is missing basic constraints " + "for a CA", + certFile); + return -1; + } + } else { /* General error */ + error_setg(errp, + "Unable to query certificate %s basic constraints: %s", + certFile, gnutls_strerror(status)); + return -1; + } + + return 0; +} + + +static int +qcrypto_tls_creds_check_cert_key_usage(QCryptoTLSCredsX509 *creds, + gnutls_x509_crt_t cert, + const char *certFile, + bool isCA, + Error **errp) +{ + int status; + unsigned int usage = 0; + unsigned int critical = 0; + + status = gnutls_x509_crt_get_key_usage(cert, &usage, &critical); + trace_qcrypto_tls_creds_x509_check_key_usage( + creds, certFile, status, usage, critical); + + if (status < 0) { + if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) { + usage = isCA ? GNUTLS_KEY_KEY_CERT_SIGN : + GNUTLS_KEY_DIGITAL_SIGNATURE|GNUTLS_KEY_KEY_ENCIPHERMENT; + } else { + error_setg(errp, + "Unable to query certificate %s key usage: %s", + certFile, gnutls_strerror(status)); + return -1; + } + } + + if (isCA) { + if (!(usage & GNUTLS_KEY_KEY_CERT_SIGN)) { + if (critical) { + error_setg(errp, + "Certificate %s usage does not permit " + "certificate signing", certFile); + return -1; + } + } + } else { + if (!(usage & GNUTLS_KEY_DIGITAL_SIGNATURE)) { + if (critical) { + error_setg(errp, + "Certificate %s usage does not permit digital " + "signature", certFile); + return -1; + } + } + if (!(usage & GNUTLS_KEY_KEY_ENCIPHERMENT)) { + if (critical) { + error_setg(errp, + "Certificate %s usage does not permit key " + "encipherment", certFile); + return -1; + } + } + } + + return 0; +} + + +static int +qcrypto_tls_creds_check_cert_key_purpose(QCryptoTLSCredsX509 *creds, + gnutls_x509_crt_t cert, + const char *certFile, + bool isServer, + Error **errp) +{ + int status; + size_t i; + unsigned int purposeCritical; + unsigned int critical; + char *buffer = NULL; + size_t size; + bool allowClient = false, allowServer = false; + + critical = 0; + for (i = 0; ; i++) { + size = 0; + status = gnutls_x509_crt_get_key_purpose_oid(cert, i, buffer, + &size, NULL); + + if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) { + + /* If there is no data at all, then we must allow + client/server to pass */ + if (i == 0) { + allowServer = allowClient = true; + } + break; + } + if (status != GNUTLS_E_SHORT_MEMORY_BUFFER) { + error_setg(errp, + "Unable to query certificate %s key purpose: %s", + certFile, gnutls_strerror(status)); + return -1; + } + + buffer = g_new0(char, size); + + status = gnutls_x509_crt_get_key_purpose_oid(cert, i, buffer, + &size, &purposeCritical); + + if (status < 0) { + trace_qcrypto_tls_creds_x509_check_key_purpose( + creds, certFile, status, "", purposeCritical); + g_free(buffer); + error_setg(errp, + "Unable to query certificate %s key purpose: %s", + certFile, gnutls_strerror(status)); + return -1; + } + trace_qcrypto_tls_creds_x509_check_key_purpose( + creds, certFile, status, buffer, purposeCritical); + if (purposeCritical) { + critical = true; + } + + if (g_str_equal(buffer, GNUTLS_KP_TLS_WWW_SERVER)) { + allowServer = true; + } else if (g_str_equal(buffer, GNUTLS_KP_TLS_WWW_CLIENT)) { + allowClient = true; + } else if (g_str_equal(buffer, GNUTLS_KP_ANY)) { + allowServer = allowClient = true; + } + + g_free(buffer); + buffer = NULL; + } + + if (isServer) { + if (!allowServer) { + if (critical) { + error_setg(errp, + "Certificate %s purpose does not allow " + "use with a TLS server", certFile); + return -1; + } + } + } else { + if (!allowClient) { + if (critical) { + error_setg(errp, + "Certificate %s purpose does not allow use " + "with a TLS client", certFile); + return -1; + } + } + } + + return 0; +} + + +static int +qcrypto_tls_creds_check_cert(QCryptoTLSCredsX509 *creds, + gnutls_x509_crt_t cert, + const char *certFile, + bool isServer, + bool isCA, + Error **errp) +{ + if (qcrypto_tls_creds_check_cert_times(cert, certFile, + isServer, isCA, + errp) < 0) { + return -1; + } + + if (qcrypto_tls_creds_check_cert_basic_constraints(creds, + cert, certFile, + isServer, isCA, + errp) < 0) { + return -1; + } + + if (qcrypto_tls_creds_check_cert_key_usage(creds, + cert, certFile, + isCA, errp) < 0) { + return -1; + } + + if (!isCA && + qcrypto_tls_creds_check_cert_key_purpose(creds, + cert, certFile, + isServer, errp) < 0) { + return -1; + } + + return 0; +} + + +static int +qcrypto_tls_creds_check_cert_pair(gnutls_x509_crt_t cert, + const char *certFile, + gnutls_x509_crt_t *cacerts, + size_t ncacerts, + const char *cacertFile, + bool isServer, + Error **errp) +{ + unsigned int status; + + if (gnutls_x509_crt_list_verify(&cert, 1, + cacerts, ncacerts, + NULL, 0, + 0, &status) < 0) { + error_setg(errp, isServer ? + "Unable to verify server certificate %s against " + "CA certificate %s" : + "Unable to verify client certificate %s against " + "CA certificate %s", + certFile, cacertFile); + return -1; + } + + if (status != 0) { + const char *reason = "Invalid certificate"; + + if (status & GNUTLS_CERT_INVALID) { + reason = "The certificate is not trusted"; + } + + if (status & GNUTLS_CERT_SIGNER_NOT_FOUND) { + reason = "The certificate hasn't got a known issuer"; + } + + if (status & GNUTLS_CERT_REVOKED) { + reason = "The certificate has been revoked"; + } + +#ifndef GNUTLS_1_0_COMPAT + if (status & GNUTLS_CERT_INSECURE_ALGORITHM) { + reason = "The certificate uses an insecure algorithm"; + } +#endif + + error_setg(errp, + "Our own certificate %s failed validation against %s: %s", + certFile, cacertFile, reason); + return -1; + } + + return 0; +} + + +static gnutls_x509_crt_t +qcrypto_tls_creds_load_cert(QCryptoTLSCredsX509 *creds, + const char *certFile, + bool isServer, + Error **errp) +{ + gnutls_datum_t data; + gnutls_x509_crt_t cert = NULL; + g_autofree char *buf = NULL; + gsize buflen; + GError *gerr = NULL; + int ret = -1; + int err; + + trace_qcrypto_tls_creds_x509_load_cert(creds, isServer, certFile); + + err = gnutls_x509_crt_init(&cert); + if (err < 0) { + error_setg(errp, "Unable to initialize certificate: %s", + gnutls_strerror(err)); + goto cleanup; + } + + if (!g_file_get_contents(certFile, &buf, &buflen, &gerr)) { + error_setg(errp, "Cannot load CA cert list %s: %s", + certFile, gerr->message); + g_error_free(gerr); + goto cleanup; + } + + data.data = (unsigned char *)buf; + data.size = strlen(buf); + + err = gnutls_x509_crt_import(cert, &data, GNUTLS_X509_FMT_PEM); + if (err < 0) { + error_setg(errp, isServer ? + "Unable to import server certificate %s: %s" : + "Unable to import client certificate %s: %s", + certFile, + gnutls_strerror(err)); + goto cleanup; + } + + ret = 0; + + cleanup: + if (ret != 0) { + gnutls_x509_crt_deinit(cert); + cert = NULL; + } + return cert; +} + + +static int +qcrypto_tls_creds_load_ca_cert_list(QCryptoTLSCredsX509 *creds, + const char *certFile, + gnutls_x509_crt_t *certs, + unsigned int certMax, + size_t *ncerts, + Error **errp) +{ + gnutls_datum_t data; + g_autofree char *buf = NULL; + gsize buflen; + GError *gerr = NULL; + + *ncerts = 0; + trace_qcrypto_tls_creds_x509_load_cert_list(creds, certFile); + + if (!g_file_get_contents(certFile, &buf, &buflen, &gerr)) { + error_setg(errp, "Cannot load CA cert list %s: %s", + certFile, gerr->message); + g_error_free(gerr); + return -1; + } + + data.data = (unsigned char *)buf; + data.size = strlen(buf); + + if (gnutls_x509_crt_list_import(certs, &certMax, &data, + GNUTLS_X509_FMT_PEM, 0) < 0) { + error_setg(errp, + "Unable to import CA certificate list %s", + certFile); + return -1; + } + *ncerts = certMax; + + return 0; +} + + +#define MAX_CERTS 16 +static int +qcrypto_tls_creds_x509_sanity_check(QCryptoTLSCredsX509 *creds, + bool isServer, + const char *cacertFile, + const char *certFile, + Error **errp) +{ + gnutls_x509_crt_t cert = NULL; + gnutls_x509_crt_t cacerts[MAX_CERTS]; + size_t ncacerts = 0; + size_t i; + int ret = -1; + + memset(cacerts, 0, sizeof(cacerts)); + if (certFile && + access(certFile, R_OK) == 0) { + cert = qcrypto_tls_creds_load_cert(creds, + certFile, isServer, + errp); + if (!cert) { + goto cleanup; + } + } + if (access(cacertFile, R_OK) == 0) { + if (qcrypto_tls_creds_load_ca_cert_list(creds, + cacertFile, cacerts, + MAX_CERTS, &ncacerts, + errp) < 0) { + goto cleanup; + } + } + + if (cert && + qcrypto_tls_creds_check_cert(creds, + cert, certFile, isServer, + false, errp) < 0) { + goto cleanup; + } + + for (i = 0; i < ncacerts; i++) { + if (qcrypto_tls_creds_check_cert(creds, + cacerts[i], cacertFile, + isServer, true, errp) < 0) { + goto cleanup; + } + } + + if (cert && ncacerts && + qcrypto_tls_creds_check_cert_pair(cert, certFile, cacerts, + ncacerts, cacertFile, + isServer, errp) < 0) { + goto cleanup; + } + + ret = 0; + + cleanup: + if (cert) { + gnutls_x509_crt_deinit(cert); + } + for (i = 0; i < ncacerts; i++) { + gnutls_x509_crt_deinit(cacerts[i]); + } + return ret; +} + + +static int +qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds, + Error **errp) +{ + char *cacert = NULL, *cacrl = NULL, *cert = NULL, + *key = NULL, *dhparams = NULL; + int ret; + int rv = -1; + + trace_qcrypto_tls_creds_x509_load(creds, + creds->parent_obj.dir ? creds->parent_obj.dir : ""); + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + if (qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_CA_CERT, + true, &cacert, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_CA_CRL, + false, &cacrl, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_SERVER_CERT, + true, &cert, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_SERVER_KEY, + true, &key, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_DH_PARAMS, + false, &dhparams, errp) < 0) { + goto cleanup; + } + } else { + if (qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_CA_CERT, + true, &cacert, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_CLIENT_CERT, + false, &cert, errp) < 0 || + qcrypto_tls_creds_get_path(&creds->parent_obj, + QCRYPTO_TLS_CREDS_X509_CLIENT_KEY, + false, &key, errp) < 0) { + goto cleanup; + } + } + + if (creds->sanityCheck && + qcrypto_tls_creds_x509_sanity_check(creds, + creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + cacert, cert, errp) < 0) { + goto cleanup; + } + + ret = gnutls_certificate_allocate_credentials(&creds->data); + if (ret < 0) { + error_setg(errp, "Cannot allocate credentials: '%s'", + gnutls_strerror(ret)); + goto cleanup; + } + + ret = gnutls_certificate_set_x509_trust_file(creds->data, + cacert, + GNUTLS_X509_FMT_PEM); + if (ret < 0) { + error_setg(errp, "Cannot load CA certificate '%s': %s", + cacert, gnutls_strerror(ret)); + goto cleanup; + } + + if (cert != NULL && key != NULL) { + char *password = NULL; + if (creds->passwordid) { + password = qcrypto_secret_lookup_as_utf8(creds->passwordid, + errp); + if (!password) { + goto cleanup; + } + } + ret = gnutls_certificate_set_x509_key_file2(creds->data, + cert, key, + GNUTLS_X509_FMT_PEM, + password, + 0); + g_free(password); + if (ret < 0) { + error_setg(errp, "Cannot load certificate '%s' & key '%s': %s", + cert, key, gnutls_strerror(ret)); + goto cleanup; + } + } + + if (cacrl != NULL) { + ret = gnutls_certificate_set_x509_crl_file(creds->data, + cacrl, + GNUTLS_X509_FMT_PEM); + if (ret < 0) { + error_setg(errp, "Cannot load CRL '%s': %s", + cacrl, gnutls_strerror(ret)); + goto cleanup; + } + } + + if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + if (qcrypto_tls_creds_get_dh_params_file(&creds->parent_obj, dhparams, + &creds->parent_obj.dh_params, + errp) < 0) { + goto cleanup; + } + gnutls_certificate_set_dh_params(creds->data, + creds->parent_obj.dh_params); + } + + rv = 0; + cleanup: + g_free(cacert); + g_free(cacrl); + g_free(cert); + g_free(key); + g_free(dhparams); + return rv; +} + + +static void +qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds) +{ + if (creds->data) { + gnutls_certificate_free_credentials(creds->data); + creds->data = NULL; + } + if (creds->parent_obj.dh_params) { + gnutls_dh_params_deinit(creds->parent_obj.dh_params); + creds->parent_obj.dh_params = NULL; + } +} + + +#else /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED, + Error **errp) +{ + error_setg(errp, "TLS credentials support requires GNUTLS"); +} + + +static void +qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED) +{ + /* nada */ +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_x509_prop_set_loaded(Object *obj, + bool value, + Error **errp) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + if (value) { + qcrypto_tls_creds_x509_load(creds, errp); + } else { + qcrypto_tls_creds_x509_unload(creds); + } +} + + +#ifdef CONFIG_GNUTLS + + +static bool +qcrypto_tls_creds_x509_prop_get_loaded(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + return creds->data != NULL; +} + + +#else /* ! CONFIG_GNUTLS */ + + +static bool +qcrypto_tls_creds_x509_prop_get_loaded(Object *obj G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return false; +} + + +#endif /* ! CONFIG_GNUTLS */ + + +static void +qcrypto_tls_creds_x509_prop_set_sanity(Object *obj, + bool value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + creds->sanityCheck = value; +} + + +static void +qcrypto_tls_creds_x509_prop_set_passwordid(Object *obj, + const char *value, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + creds->passwordid = g_strdup(value); +} + + +static char * +qcrypto_tls_creds_x509_prop_get_passwordid(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + return g_strdup(creds->passwordid); +} + + +static bool +qcrypto_tls_creds_x509_prop_get_sanity(Object *obj, + Error **errp G_GNUC_UNUSED) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + return creds->sanityCheck; +} + + +static void +qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp) +{ + object_property_set_bool(OBJECT(uc), "loaded", true, errp); +} + + +static void +qcrypto_tls_creds_x509_init(Object *obj) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + creds->sanityCheck = true; +} + + +static void +qcrypto_tls_creds_x509_finalize(Object *obj) +{ + QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj); + + g_free(creds->passwordid); + qcrypto_tls_creds_x509_unload(creds); +} + + +static void +qcrypto_tls_creds_x509_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = qcrypto_tls_creds_x509_complete; + + object_class_property_add_bool(oc, "loaded", + qcrypto_tls_creds_x509_prop_get_loaded, + qcrypto_tls_creds_x509_prop_set_loaded); + object_class_property_add_bool(oc, "sanity-check", + qcrypto_tls_creds_x509_prop_get_sanity, + qcrypto_tls_creds_x509_prop_set_sanity); + object_class_property_add_str(oc, "passwordid", + qcrypto_tls_creds_x509_prop_get_passwordid, + qcrypto_tls_creds_x509_prop_set_passwordid); +} + + +static const TypeInfo qcrypto_tls_creds_x509_info = { + .parent = TYPE_QCRYPTO_TLS_CREDS, + .name = TYPE_QCRYPTO_TLS_CREDS_X509, + .instance_size = sizeof(QCryptoTLSCredsX509), + .instance_init = qcrypto_tls_creds_x509_init, + .instance_finalize = qcrypto_tls_creds_x509_finalize, + .class_size = sizeof(QCryptoTLSCredsX509Class), + .class_init = qcrypto_tls_creds_x509_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + + +static void +qcrypto_tls_creds_x509_register_types(void) +{ + type_register_static(&qcrypto_tls_creds_x509_info); +} + + +type_init(qcrypto_tls_creds_x509_register_types); diff --git a/crypto/tlssession.c b/crypto/tlssession.c new file mode 100644 index 000000000..33203e8ca --- /dev/null +++ b/crypto/tlssession.c @@ -0,0 +1,642 @@ +/* + * QEMU crypto TLS session support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "crypto/tlssession.h" +#include "crypto/tlscredsanon.h" +#include "crypto/tlscredspsk.h" +#include "crypto/tlscredsx509.h" +#include "qapi/error.h" +#include "authz/base.h" +#include "trace.h" + +#ifdef CONFIG_GNUTLS + + +#include + + +struct QCryptoTLSSession { + QCryptoTLSCreds *creds; + gnutls_session_t handle; + char *hostname; + char *authzid; + bool handshakeComplete; + QCryptoTLSSessionWriteFunc writeFunc; + QCryptoTLSSessionReadFunc readFunc; + void *opaque; + char *peername; +}; + + +void +qcrypto_tls_session_free(QCryptoTLSSession *session) +{ + if (!session) { + return; + } + + gnutls_deinit(session->handle); + g_free(session->hostname); + g_free(session->peername); + g_free(session->authzid); + object_unref(OBJECT(session->creds)); + g_free(session); +} + + +static ssize_t +qcrypto_tls_session_push(void *opaque, const void *buf, size_t len) +{ + QCryptoTLSSession *session = opaque; + + if (!session->writeFunc) { + errno = EIO; + return -1; + }; + + return session->writeFunc(buf, len, session->opaque); +} + + +static ssize_t +qcrypto_tls_session_pull(void *opaque, void *buf, size_t len) +{ + QCryptoTLSSession *session = opaque; + + if (!session->readFunc) { + errno = EIO; + return -1; + }; + + return session->readFunc(buf, len, session->opaque); +} + +#define TLS_PRIORITY_ADDITIONAL_ANON "+ANON-DH" +#define TLS_PRIORITY_ADDITIONAL_PSK "+ECDHE-PSK:+DHE-PSK:+PSK" + +QCryptoTLSSession * +qcrypto_tls_session_new(QCryptoTLSCreds *creds, + const char *hostname, + const char *authzid, + QCryptoTLSCredsEndpoint endpoint, + Error **errp) +{ + QCryptoTLSSession *session; + int ret; + + session = g_new0(QCryptoTLSSession, 1); + trace_qcrypto_tls_session_new( + session, creds, hostname ? hostname : "", + authzid ? authzid : "", endpoint); + + if (hostname) { + session->hostname = g_strdup(hostname); + } + if (authzid) { + session->authzid = g_strdup(authzid); + } + session->creds = creds; + object_ref(OBJECT(creds)); + + if (creds->endpoint != endpoint) { + error_setg(errp, "Credentials endpoint doesn't match session"); + goto error; + } + + if (endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + ret = gnutls_init(&session->handle, GNUTLS_SERVER); + } else { + ret = gnutls_init(&session->handle, GNUTLS_CLIENT); + } + if (ret < 0) { + error_setg(errp, "Cannot initialize TLS session: %s", + gnutls_strerror(ret)); + goto error; + } + + if (object_dynamic_cast(OBJECT(creds), + TYPE_QCRYPTO_TLS_CREDS_ANON)) { + QCryptoTLSCredsAnon *acreds = QCRYPTO_TLS_CREDS_ANON(creds); + char *prio; + + if (creds->priority != NULL) { + prio = g_strdup_printf("%s:%s", + creds->priority, + TLS_PRIORITY_ADDITIONAL_ANON); + } else { + prio = g_strdup(CONFIG_TLS_PRIORITY ":" + TLS_PRIORITY_ADDITIONAL_ANON); + } + + ret = gnutls_priority_set_direct(session->handle, prio, NULL); + if (ret < 0) { + error_setg(errp, "Unable to set TLS session priority %s: %s", + prio, gnutls_strerror(ret)); + g_free(prio); + goto error; + } + g_free(prio); + if (creds->endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + ret = gnutls_credentials_set(session->handle, + GNUTLS_CRD_ANON, + acreds->data.server); + } else { + ret = gnutls_credentials_set(session->handle, + GNUTLS_CRD_ANON, + acreds->data.client); + } + if (ret < 0) { + error_setg(errp, "Cannot set session credentials: %s", + gnutls_strerror(ret)); + goto error; + } + } else if (object_dynamic_cast(OBJECT(creds), + TYPE_QCRYPTO_TLS_CREDS_PSK)) { + QCryptoTLSCredsPSK *pcreds = QCRYPTO_TLS_CREDS_PSK(creds); + char *prio; + + if (creds->priority != NULL) { + prio = g_strdup_printf("%s:%s", + creds->priority, + TLS_PRIORITY_ADDITIONAL_PSK); + } else { + prio = g_strdup(CONFIG_TLS_PRIORITY ":" + TLS_PRIORITY_ADDITIONAL_PSK); + } + + ret = gnutls_priority_set_direct(session->handle, prio, NULL); + if (ret < 0) { + error_setg(errp, "Unable to set TLS session priority %s: %s", + prio, gnutls_strerror(ret)); + g_free(prio); + goto error; + } + g_free(prio); + if (creds->endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + ret = gnutls_credentials_set(session->handle, + GNUTLS_CRD_PSK, + pcreds->data.server); + } else { + ret = gnutls_credentials_set(session->handle, + GNUTLS_CRD_PSK, + pcreds->data.client); + } + if (ret < 0) { + error_setg(errp, "Cannot set session credentials: %s", + gnutls_strerror(ret)); + goto error; + } + } else if (object_dynamic_cast(OBJECT(creds), + TYPE_QCRYPTO_TLS_CREDS_X509)) { + QCryptoTLSCredsX509 *tcreds = QCRYPTO_TLS_CREDS_X509(creds); + const char *prio = creds->priority; + if (!prio) { + prio = CONFIG_TLS_PRIORITY; + } + + ret = gnutls_priority_set_direct(session->handle, prio, NULL); + if (ret < 0) { + error_setg(errp, "Cannot set default TLS session priority %s: %s", + prio, gnutls_strerror(ret)); + goto error; + } + ret = gnutls_credentials_set(session->handle, + GNUTLS_CRD_CERTIFICATE, + tcreds->data); + if (ret < 0) { + error_setg(errp, "Cannot set session credentials: %s", + gnutls_strerror(ret)); + goto error; + } + + if (creds->endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + /* This requests, but does not enforce a client cert. + * The cert checking code later does enforcement */ + gnutls_certificate_server_set_request(session->handle, + GNUTLS_CERT_REQUEST); + } + } else { + error_setg(errp, "Unsupported TLS credentials type %s", + object_get_typename(OBJECT(creds))); + goto error; + } + + gnutls_transport_set_ptr(session->handle, session); + gnutls_transport_set_push_function(session->handle, + qcrypto_tls_session_push); + gnutls_transport_set_pull_function(session->handle, + qcrypto_tls_session_pull); + + return session; + + error: + qcrypto_tls_session_free(session); + return NULL; +} + +static int +qcrypto_tls_session_check_certificate(QCryptoTLSSession *session, + Error **errp) +{ + int ret; + unsigned int status; + const gnutls_datum_t *certs; + unsigned int nCerts, i; + time_t now; + gnutls_x509_crt_t cert = NULL; + Error *err = NULL; + + now = time(NULL); + if (now == ((time_t)-1)) { + error_setg_errno(errp, errno, "Cannot get current time"); + return -1; + } + + ret = gnutls_certificate_verify_peers2(session->handle, &status); + if (ret < 0) { + error_setg(errp, "Verify failed: %s", gnutls_strerror(ret)); + return -1; + } + + if (status != 0) { + const char *reason = "Invalid certificate"; + + if (status & GNUTLS_CERT_INVALID) { + reason = "The certificate is not trusted"; + } + + if (status & GNUTLS_CERT_SIGNER_NOT_FOUND) { + reason = "The certificate hasn't got a known issuer"; + } + + if (status & GNUTLS_CERT_REVOKED) { + reason = "The certificate has been revoked"; + } + + if (status & GNUTLS_CERT_INSECURE_ALGORITHM) { + reason = "The certificate uses an insecure algorithm"; + } + + error_setg(errp, "%s", reason); + return -1; + } + + certs = gnutls_certificate_get_peers(session->handle, &nCerts); + if (!certs) { + error_setg(errp, "No certificate peers"); + return -1; + } + + for (i = 0; i < nCerts; i++) { + ret = gnutls_x509_crt_init(&cert); + if (ret < 0) { + error_setg(errp, "Cannot initialize certificate: %s", + gnutls_strerror(ret)); + return -1; + } + + ret = gnutls_x509_crt_import(cert, &certs[i], GNUTLS_X509_FMT_DER); + if (ret < 0) { + error_setg(errp, "Cannot import certificate: %s", + gnutls_strerror(ret)); + goto error; + } + + if (gnutls_x509_crt_get_expiration_time(cert) < now) { + error_setg(errp, "The certificate has expired"); + goto error; + } + + if (gnutls_x509_crt_get_activation_time(cert) > now) { + error_setg(errp, "The certificate is not yet activated"); + goto error; + } + + if (gnutls_x509_crt_get_activation_time(cert) > now) { + error_setg(errp, "The certificate is not yet activated"); + goto error; + } + + if (i == 0) { + size_t dnameSize = 1024; + session->peername = g_malloc(dnameSize); + requery: + ret = gnutls_x509_crt_get_dn(cert, session->peername, &dnameSize); + if (ret < 0) { + if (ret == GNUTLS_E_SHORT_MEMORY_BUFFER) { + session->peername = g_realloc(session->peername, + dnameSize); + goto requery; + } + error_setg(errp, "Cannot get client distinguished name: %s", + gnutls_strerror(ret)); + goto error; + } + if (session->authzid) { + bool allow; + + allow = qauthz_is_allowed_by_id(session->authzid, + session->peername, &err); + if (err) { + error_propagate(errp, err); + goto error; + } + if (!allow) { + error_setg(errp, "TLS x509 authz check for %s is denied", + session->peername); + goto error; + } + } + if (session->hostname) { + if (!gnutls_x509_crt_check_hostname(cert, session->hostname)) { + error_setg(errp, + "Certificate does not match the hostname %s", + session->hostname); + goto error; + } + } + } + + gnutls_x509_crt_deinit(cert); + } + + return 0; + + error: + gnutls_x509_crt_deinit(cert); + return -1; +} + + +int +qcrypto_tls_session_check_credentials(QCryptoTLSSession *session, + Error **errp) +{ + if (object_dynamic_cast(OBJECT(session->creds), + TYPE_QCRYPTO_TLS_CREDS_ANON)) { + trace_qcrypto_tls_session_check_creds(session, "nop"); + return 0; + } else if (object_dynamic_cast(OBJECT(session->creds), + TYPE_QCRYPTO_TLS_CREDS_PSK)) { + trace_qcrypto_tls_session_check_creds(session, "nop"); + return 0; + } else if (object_dynamic_cast(OBJECT(session->creds), + TYPE_QCRYPTO_TLS_CREDS_X509)) { + if (session->creds->verifyPeer) { + int ret = qcrypto_tls_session_check_certificate(session, + errp); + trace_qcrypto_tls_session_check_creds(session, + ret == 0 ? "pass" : "fail"); + return ret; + } else { + trace_qcrypto_tls_session_check_creds(session, "skip"); + return 0; + } + } else { + trace_qcrypto_tls_session_check_creds(session, "error"); + error_setg(errp, "Unexpected credential type %s", + object_get_typename(OBJECT(session->creds))); + return -1; + } +} + + +void +qcrypto_tls_session_set_callbacks(QCryptoTLSSession *session, + QCryptoTLSSessionWriteFunc writeFunc, + QCryptoTLSSessionReadFunc readFunc, + void *opaque) +{ + session->writeFunc = writeFunc; + session->readFunc = readFunc; + session->opaque = opaque; +} + + +ssize_t +qcrypto_tls_session_write(QCryptoTLSSession *session, + const char *buf, + size_t len) +{ + ssize_t ret = gnutls_record_send(session->handle, buf, len); + + if (ret < 0) { + switch (ret) { + case GNUTLS_E_AGAIN: + errno = EAGAIN; + break; + case GNUTLS_E_INTERRUPTED: + errno = EINTR; + break; + default: + errno = EIO; + break; + } + ret = -1; + } + + return ret; +} + + +ssize_t +qcrypto_tls_session_read(QCryptoTLSSession *session, + char *buf, + size_t len) +{ + ssize_t ret = gnutls_record_recv(session->handle, buf, len); + + if (ret < 0) { + switch (ret) { + case GNUTLS_E_AGAIN: + errno = EAGAIN; + break; + case GNUTLS_E_INTERRUPTED: + errno = EINTR; + break; + case GNUTLS_E_PREMATURE_TERMINATION: + errno = ECONNABORTED; + break; + default: + errno = EIO; + break; + } + ret = -1; + } + + return ret; +} + + +int +qcrypto_tls_session_handshake(QCryptoTLSSession *session, + Error **errp) +{ + int ret = gnutls_handshake(session->handle); + if (ret == 0) { + session->handshakeComplete = true; + } else { + if (ret == GNUTLS_E_INTERRUPTED || + ret == GNUTLS_E_AGAIN) { + ret = 1; + } else { + error_setg(errp, "TLS handshake failed: %s", + gnutls_strerror(ret)); + ret = -1; + } + } + + return ret; +} + + +QCryptoTLSSessionHandshakeStatus +qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *session) +{ + if (session->handshakeComplete) { + return QCRYPTO_TLS_HANDSHAKE_COMPLETE; + } else if (gnutls_record_get_direction(session->handle) == 0) { + return QCRYPTO_TLS_HANDSHAKE_RECVING; + } else { + return QCRYPTO_TLS_HANDSHAKE_SENDING; + } +} + + +int +qcrypto_tls_session_get_key_size(QCryptoTLSSession *session, + Error **errp) +{ + gnutls_cipher_algorithm_t cipher; + int ssf; + + cipher = gnutls_cipher_get(session->handle); + ssf = gnutls_cipher_get_key_size(cipher); + if (!ssf) { + error_setg(errp, "Cannot get TLS cipher key size"); + return -1; + } + return ssf; +} + + +char * +qcrypto_tls_session_get_peer_name(QCryptoTLSSession *session) +{ + if (session->peername) { + return g_strdup(session->peername); + } + return NULL; +} + + +#else /* ! CONFIG_GNUTLS */ + + +QCryptoTLSSession * +qcrypto_tls_session_new(QCryptoTLSCreds *creds G_GNUC_UNUSED, + const char *hostname G_GNUC_UNUSED, + const char *authzid G_GNUC_UNUSED, + QCryptoTLSCredsEndpoint endpoint G_GNUC_UNUSED, + Error **errp) +{ + error_setg(errp, "TLS requires GNUTLS support"); + return NULL; +} + + +void +qcrypto_tls_session_free(QCryptoTLSSession *sess G_GNUC_UNUSED) +{ +} + + +int +qcrypto_tls_session_check_credentials(QCryptoTLSSession *sess G_GNUC_UNUSED, + Error **errp) +{ + error_setg(errp, "TLS requires GNUTLS support"); + return -1; +} + + +void +qcrypto_tls_session_set_callbacks( + QCryptoTLSSession *sess G_GNUC_UNUSED, + QCryptoTLSSessionWriteFunc writeFunc G_GNUC_UNUSED, + QCryptoTLSSessionReadFunc readFunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED) +{ +} + + +ssize_t +qcrypto_tls_session_write(QCryptoTLSSession *sess, + const char *buf, + size_t len) +{ + errno = -EIO; + return -1; +} + + +ssize_t +qcrypto_tls_session_read(QCryptoTLSSession *sess, + char *buf, + size_t len) +{ + errno = -EIO; + return -1; +} + + +int +qcrypto_tls_session_handshake(QCryptoTLSSession *sess, + Error **errp) +{ + error_setg(errp, "TLS requires GNUTLS support"); + return -1; +} + + +QCryptoTLSSessionHandshakeStatus +qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess) +{ + return QCRYPTO_TLS_HANDSHAKE_COMPLETE; +} + + +int +qcrypto_tls_session_get_key_size(QCryptoTLSSession *sess, + Error **errp) +{ + error_setg(errp, "TLS requires GNUTLS support"); + return -1; +} + + +char * +qcrypto_tls_session_get_peer_name(QCryptoTLSSession *sess) +{ + return NULL; +} + +#endif diff --git a/crypto/trace-events b/crypto/trace-events new file mode 100644 index 000000000..798b6067a --- /dev/null +++ b/crypto/trace-events @@ -0,0 +1,28 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# tlscreds.c +qcrypto_tls_creds_load_dh(void *creds, const char *filename) "TLS creds load DH creds=%p filename=%s" +qcrypto_tls_creds_get_path(void *creds, const char *filename, const char *path) "TLS creds path creds=%p filename=%s path=%s" + +# tlscredsanon.c +qcrypto_tls_creds_anon_load(void *creds, const char *dir) "TLS creds anon load creds=%p dir=%s" + +# tlscredspsk.c +qcrypto_tls_creds_psk_load(void *creds, const char *dir) "TLS creds psk load creds=%p dir=%s" + +# tlscredsx509.c +qcrypto_tls_creds_x509_load(void *creds, const char *dir) "TLS creds x509 load creds=%p dir=%s" +qcrypto_tls_creds_x509_check_basic_constraints(void *creds, const char *file, int status) "TLS creds x509 check basic constraints creds=%p file=%s status=%d" +qcrypto_tls_creds_x509_check_key_usage(void *creds, const char *file, int status, int usage, int critical) "TLS creds x509 check key usage creds=%p file=%s status=%d usage=%d critical=%d" +qcrypto_tls_creds_x509_check_key_purpose(void *creds, const char *file, int status, const char *usage, int critical) "TLS creds x509 check key usage creds=%p file=%s status=%d usage=%s critical=%d" +qcrypto_tls_creds_x509_load_cert(void *creds, int isServer, const char *file) "TLS creds x509 load cert creds=%p isServer=%d file=%s" +qcrypto_tls_creds_x509_load_cert_list(void *creds, const char *file) "TLS creds x509 load cert list creds=%p file=%s" + +# tlssession.c +qcrypto_tls_session_new(void *session, void *creds, const char *hostname, const char *authzid, int endpoint) "TLS session new session=%p creds=%p hostname=%s authzid=%s endpoint=%d" +qcrypto_tls_session_check_creds(void *session, const char *status) "TLS session check creds session=%p status=%s" + +# tls-cipher-suites.c +qcrypto_tls_cipher_suite_priority(const char *name) "priority: %s" +qcrypto_tls_cipher_suite_info(uint8_t data0, uint8_t data1, const char *version, const char *name) "data=[0x%02x,0x%02x] version=%s name=%s" +qcrypto_tls_cipher_suite_count(unsigned count) "count: %u" diff --git a/crypto/trace.h b/crypto/trace.h new file mode 100644 index 000000000..a9af0f315 --- /dev/null +++ b/crypto/trace.h @@ -0,0 +1 @@ +#include "trace/trace-crypto.h" diff --git a/crypto/xts.c b/crypto/xts.c new file mode 100644 index 000000000..d4a49fdb7 --- /dev/null +++ b/crypto/xts.c @@ -0,0 +1,250 @@ +/* + * QEMU Crypto XTS cipher mode + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + * This code is originally derived from public domain / WTFPL code in + * LibTomCrypt crytographic library http://libtom.org. The XTS code + * was donated by Elliptic Semiconductor Inc (www.ellipticsemi.com) + * to the LibTom Projects + * + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "crypto/xts.h" + +typedef union { + uint8_t b[XTS_BLOCK_SIZE]; + uint64_t u[2]; +} xts_uint128; + +static inline void xts_uint128_xor(xts_uint128 *D, + const xts_uint128 *S1, + const xts_uint128 *S2) +{ + D->u[0] = S1->u[0] ^ S2->u[0]; + D->u[1] = S1->u[1] ^ S2->u[1]; +} + +static inline void xts_uint128_cpu_to_les(xts_uint128 *v) +{ + cpu_to_le64s(&v->u[0]); + cpu_to_le64s(&v->u[1]); +} + +static inline void xts_uint128_le_to_cpus(xts_uint128 *v) +{ + le64_to_cpus(&v->u[0]); + le64_to_cpus(&v->u[1]); +} + +static void xts_mult_x(xts_uint128 *I) +{ + uint64_t tt; + + xts_uint128_le_to_cpus(I); + + tt = I->u[0] >> 63; + I->u[0] <<= 1; + + if (I->u[1] >> 63) { + I->u[0] ^= 0x87; + } + I->u[1] <<= 1; + I->u[1] |= tt; + + xts_uint128_cpu_to_les(I); +} + + +/** + * xts_tweak_encdec: + * @param ctxt: the cipher context + * @param func: the cipher function + * @src: buffer providing the input text of XTS_BLOCK_SIZE bytes + * @dst: buffer to output the output text of XTS_BLOCK_SIZE bytes + * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes + * + * Encrypt/decrypt data with a tweak + */ +static inline void xts_tweak_encdec(const void *ctx, + xts_cipher_func *func, + const xts_uint128 *src, + xts_uint128 *dst, + xts_uint128 *iv) +{ + /* tweak encrypt block i */ + xts_uint128_xor(dst, src, iv); + + func(ctx, XTS_BLOCK_SIZE, dst->b, dst->b); + + xts_uint128_xor(dst, dst, iv); + + /* LFSR the tweak */ + xts_mult_x(iv); +} + + +void xts_decrypt(const void *datactx, + const void *tweakctx, + xts_cipher_func *encfunc, + xts_cipher_func *decfunc, + uint8_t *iv, + size_t length, + uint8_t *dst, + const uint8_t *src) +{ + xts_uint128 PP, CC, T; + unsigned long i, m, mo, lim; + + /* get number of blocks */ + m = length >> 4; + mo = length & 15; + + /* must have at least one full block */ + g_assert(m != 0); + + if (mo == 0) { + lim = m; + } else { + lim = m - 1; + } + + /* encrypt the iv */ + encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv); + + if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) && + QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) { + xts_uint128 *S = (xts_uint128 *)src; + xts_uint128 *D = (xts_uint128 *)dst; + for (i = 0; i < lim; i++, S++, D++) { + xts_tweak_encdec(datactx, decfunc, S, D, &T); + } + } else { + xts_uint128 D; + + for (i = 0; i < lim; i++) { + memcpy(&D, src, XTS_BLOCK_SIZE); + xts_tweak_encdec(datactx, decfunc, &D, &D, &T); + memcpy(dst, &D, XTS_BLOCK_SIZE); + src += XTS_BLOCK_SIZE; + dst += XTS_BLOCK_SIZE; + } + } + + /* if length is not a multiple of XTS_BLOCK_SIZE then */ + if (mo > 0) { + xts_uint128 S, D; + memcpy(&CC, &T, XTS_BLOCK_SIZE); + xts_mult_x(&CC); + + /* PP = tweak decrypt block m-1 */ + memcpy(&S, src, XTS_BLOCK_SIZE); + xts_tweak_encdec(datactx, decfunc, &S, &PP, &CC); + + /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */ + for (i = 0; i < mo; i++) { + CC.b[i] = src[XTS_BLOCK_SIZE + i]; + dst[XTS_BLOCK_SIZE + i] = PP.b[i]; + } + for (; i < XTS_BLOCK_SIZE; i++) { + CC.b[i] = PP.b[i]; + } + + /* Pm-1 = Tweak uncrypt CC */ + xts_tweak_encdec(datactx, decfunc, &CC, &D, &T); + memcpy(dst, &D, XTS_BLOCK_SIZE); + } + + /* Decrypt the iv back */ + decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b); +} + + +void xts_encrypt(const void *datactx, + const void *tweakctx, + xts_cipher_func *encfunc, + xts_cipher_func *decfunc, + uint8_t *iv, + size_t length, + uint8_t *dst, + const uint8_t *src) +{ + xts_uint128 PP, CC, T; + unsigned long i, m, mo, lim; + + /* get number of blocks */ + m = length >> 4; + mo = length & 15; + + /* must have at least one full block */ + g_assert(m != 0); + + if (mo == 0) { + lim = m; + } else { + lim = m - 1; + } + + /* encrypt the iv */ + encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv); + + if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) && + QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) { + xts_uint128 *S = (xts_uint128 *)src; + xts_uint128 *D = (xts_uint128 *)dst; + for (i = 0; i < lim; i++, S++, D++) { + xts_tweak_encdec(datactx, encfunc, S, D, &T); + } + } else { + xts_uint128 D; + + for (i = 0; i < lim; i++) { + memcpy(&D, src, XTS_BLOCK_SIZE); + xts_tweak_encdec(datactx, encfunc, &D, &D, &T); + memcpy(dst, &D, XTS_BLOCK_SIZE); + + dst += XTS_BLOCK_SIZE; + src += XTS_BLOCK_SIZE; + } + } + + /* if length is not a multiple of XTS_BLOCK_SIZE then */ + if (mo > 0) { + xts_uint128 S, D; + /* CC = tweak encrypt block m-1 */ + memcpy(&S, src, XTS_BLOCK_SIZE); + xts_tweak_encdec(datactx, encfunc, &S, &CC, &T); + + /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */ + for (i = 0; i < mo; i++) { + PP.b[i] = src[XTS_BLOCK_SIZE + i]; + dst[XTS_BLOCK_SIZE + i] = CC.b[i]; + } + + for (; i < XTS_BLOCK_SIZE; i++) { + PP.b[i] = CC.b[i]; + } + + /* Cm-1 = Tweak encrypt PP */ + xts_tweak_encdec(datactx, encfunc, &PP, &D, &T); + memcpy(dst, &D, XTS_BLOCK_SIZE); + } + + /* Decrypt the iv back */ + decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b); +} diff --git a/include/authz/base.h b/include/authz/base.h new file mode 100644 index 000000000..b53e4e450 --- /dev/null +++ b/include/authz/base.h @@ -0,0 +1,101 @@ +/* + * QEMU authorization framework base class + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QAUTHZ_BASE_H +#define QAUTHZ_BASE_H + +#include "qapi/error.h" +#include "qom/object.h" + + +#define TYPE_QAUTHZ "authz" + +OBJECT_DECLARE_TYPE(QAuthZ, QAuthZClass, + QAUTHZ) + + +/** + * QAuthZ: + * + * The QAuthZ class defines an API contract to be used + * for providing an authorization driver for services + * with user identities. + */ + +struct QAuthZ { + Object parent_obj; +}; + + +struct QAuthZClass { + ObjectClass parent_class; + + bool (*is_allowed)(QAuthZ *authz, + const char *identity, + Error **errp); +}; + + +/** + * qauthz_is_allowed: + * @authz: the authorization object + * @identity: the user identity to authorize + * @errp: pointer to a NULL initialized error object + * + * Check if a user @identity is authorized. If an error + * occurs this method will return false to indicate + * denial, as well as setting @errp to contain the details. + * Callers are recommended to treat the denial and error + * scenarios identically. Specifically the error info in + * @errp should never be fed back to the user being + * authorized, it is merely for benefit of administrator + * debugging. + * + * Returns: true if @identity is authorized, false if denied or if + * an error occurred. + */ +bool qauthz_is_allowed(QAuthZ *authz, + const char *identity, + Error **errp); + + +/** + * qauthz_is_allowed_by_id: + * @authzid: ID of the authorization object + * @identity: the user identity to authorize + * @errp: pointer to a NULL initialized error object + * + * Check if a user @identity is authorized. If an error + * occurs this method will return false to indicate + * denial, as well as setting @errp to contain the details. + * Callers are recommended to treat the denial and error + * scenarios identically. Specifically the error info in + * @errp should never be fed back to the user being + * authorized, it is merely for benefit of administrator + * debugging. + * + * Returns: true if @identity is authorized, false if denied or if + * an error occurred. + */ +bool qauthz_is_allowed_by_id(const char *authzid, + const char *identity, + Error **errp); + +#endif /* QAUTHZ_BASE_H */ diff --git a/include/authz/list.h b/include/authz/list.h new file mode 100644 index 000000000..7ef4ad4b4 --- /dev/null +++ b/include/authz/list.h @@ -0,0 +1,94 @@ +/* + * QEMU list authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QAUTHZ_LIST_H +#define QAUTHZ_LIST_H + +#include "authz/base.h" +#include "qapi/qapi-types-authz.h" +#include "qom/object.h" + +#define TYPE_QAUTHZ_LIST "authz-list" + +OBJECT_DECLARE_SIMPLE_TYPE(QAuthZList, + QAUTHZ_LIST) + + + +/** + * QAuthZList: + * + * This authorization driver provides a list mechanism + * for granting access by matching user names against a + * list of globs. Each match rule has an associated policy + * and a catch all policy applies if no rule matches + * + * To create an instance of this class via QMP: + * + * { + * "execute": "object-add", + * "arguments": { + * "qom-type": "authz-list", + * "id": "authz0", + * "props": { + * "rules": [ + * { "match": "fred", "policy": "allow", "format": "exact" }, + * { "match": "bob", "policy": "allow", "format": "exact" }, + * { "match": "danb", "policy": "deny", "format": "exact" }, + * { "match": "dan*", "policy": "allow", "format": "glob" } + * ], + * "policy": "deny" + * } + * } + * } + * + */ +struct QAuthZList { + QAuthZ parent_obj; + + QAuthZListPolicy policy; + QAuthZListRuleList *rules; +}; + + + + +QAuthZList *qauthz_list_new(const char *id, + QAuthZListPolicy policy, + Error **errp); + +ssize_t qauthz_list_append_rule(QAuthZList *auth, + const char *match, + QAuthZListPolicy policy, + QAuthZListFormat format, + Error **errp); + +ssize_t qauthz_list_insert_rule(QAuthZList *auth, + const char *match, + QAuthZListPolicy policy, + QAuthZListFormat format, + size_t index, + Error **errp); + +ssize_t qauthz_list_delete_rule(QAuthZList *auth, + const char *match); + + +#endif /* QAUTHZ_LIST_H */ diff --git a/include/authz/listfile.h b/include/authz/listfile.h new file mode 100644 index 000000000..0a1e5bddd --- /dev/null +++ b/include/authz/listfile.h @@ -0,0 +1,97 @@ +/* + * QEMU list file authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QAUTHZ_LISTFILE_H +#define QAUTHZ_LISTFILE_H + +#include "authz/list.h" +#include "qemu/filemonitor.h" +#include "qom/object.h" + +#define TYPE_QAUTHZ_LIST_FILE "authz-list-file" + +OBJECT_DECLARE_SIMPLE_TYPE(QAuthZListFile, + QAUTHZ_LIST_FILE) + + + +/** + * QAuthZListFile: + * + * This authorization driver provides a file mechanism + * for granting access by matching user names against a + * file of globs. Each match rule has an associated policy + * and a catch all policy applies if no rule matches + * + * To create an instance of this class via QMP: + * + * { + * "execute": "object-add", + * "arguments": { + * "qom-type": "authz-list-file", + * "id": "authz0", + * "props": { + * "filename": "/etc/qemu/myvm-vnc.acl", + * "refresh": true + * } + * } + * } + * + * If 'refresh' is 'yes', inotify is used to monitor for changes + * to the file and auto-reload the rules. + * + * The myvm-vnc.acl file should contain the parameters for + * the QAuthZList object in JSON format: + * + * { + * "rules": [ + * { "match": "fred", "policy": "allow", "format": "exact" }, + * { "match": "bob", "policy": "allow", "format": "exact" }, + * { "match": "danb", "policy": "deny", "format": "exact" }, + * { "match": "dan*", "policy": "allow", "format": "glob" } + * ], + * "policy": "deny" + * } + * + * The object can be created on the command line using + * + * -object authz-list-file,id=authz0,\ + * filename=/etc/qemu/myvm-vnc.acl,refresh=yes + * + */ +struct QAuthZListFile { + QAuthZ parent_obj; + + QAuthZ *list; + char *filename; + bool refresh; + QFileMonitor *file_monitor; + int64_t file_watch; +}; + + + + +QAuthZListFile *qauthz_list_file_new(const char *id, + const char *filename, + bool refresh, + Error **errp); + +#endif /* QAUTHZ_LISTFILE_H */ diff --git a/include/authz/pamacct.h b/include/authz/pamacct.h new file mode 100644 index 000000000..592edb2bf --- /dev/null +++ b/include/authz/pamacct.h @@ -0,0 +1,88 @@ +/* + * QEMU PAM authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QAUTHZ_PAMACCT_H +#define QAUTHZ_PAMACCT_H + +#include "authz/base.h" +#include "qom/object.h" + + +#define TYPE_QAUTHZ_PAM "authz-pam" + +OBJECT_DECLARE_SIMPLE_TYPE(QAuthZPAM, + QAUTHZ_PAM) + + + +/** + * QAuthZPAM: + * + * This authorization driver provides a PAM mechanism + * for granting access by matching user names against a + * list of globs. Each match rule has an associated policy + * and a catch all policy applies if no rule matches + * + * To create an instance of this class via QMP: + * + * { + * "execute": "object-add", + * "arguments": { + * "qom-type": "authz-pam", + * "id": "authz0", + * "parameters": { + * "service": "qemu-vnc-tls" + * } + * } + * } + * + * The driver only uses the PAM "account" verification + * subsystem. The above config would require a config + * file /etc/pam.d/qemu-vnc-tls. For a simple file + * lookup it would contain + * + * account requisite pam_listfile.so item=user sense=allow \ + * file=/etc/qemu/vnc.allow + * + * The external file would then contain a list of usernames. + * If x509 cert was being used as the username, a suitable + * entry would match the distinguish name: + * + * CN=laptop.berrange.com,O=Berrange Home,L=London,ST=London,C=GB + * + * On the command line it can be created using + * + * -object authz-pam,id=authz0,service=qemu-vnc-tls + * + */ +struct QAuthZPAM { + QAuthZ parent_obj; + + char *service; +}; + + + + +QAuthZPAM *qauthz_pam_new(const char *id, + const char *service, + Error **errp); + +#endif /* QAUTHZ_PAMACCT_H */ diff --git a/include/authz/simple.h b/include/authz/simple.h new file mode 100644 index 000000000..c46a5ac5a --- /dev/null +++ b/include/authz/simple.h @@ -0,0 +1,72 @@ +/* + * QEMU simple authorization driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QAUTHZ_SIMPLE_H +#define QAUTHZ_SIMPLE_H + +#include "authz/base.h" +#include "qom/object.h" + +#define TYPE_QAUTHZ_SIMPLE "authz-simple" + +OBJECT_DECLARE_SIMPLE_TYPE(QAuthZSimple, + QAUTHZ_SIMPLE) + + + +/** + * QAuthZSimple: + * + * This authorization driver provides a simple mechanism + * for granting access based on an exact matched username. + * + * To create an instance of this class via QMP: + * + * { + * "execute": "object-add", + * "arguments": { + * "qom-type": "authz-simple", + * "id": "authz0", + * "props": { + * "identity": "fred" + * } + * } + * } + * + * Or via the command line + * + * -object authz-simple,id=authz0,identity=fred + * + */ +struct QAuthZSimple { + QAuthZ parent_obj; + + char *identity; +}; + + + + +QAuthZSimple *qauthz_simple_new(const char *id, + const char *identity, + Error **errp); + + +#endif /* QAUTHZ_SIMPLE_H */ diff --git a/include/block/accounting.h b/include/block/accounting.h new file mode 100644 index 000000000..878b4c358 --- /dev/null +++ b/include/block/accounting.h @@ -0,0 +1,123 @@ +/* + * QEMU System Emulator block accounting + * + * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_ACCOUNTING_H +#define BLOCK_ACCOUNTING_H + +#include "qemu/timed-average.h" +#include "qemu/thread.h" +#include "qapi/qapi-builtin-types.h" + +typedef struct BlockAcctTimedStats BlockAcctTimedStats; +typedef struct BlockAcctStats BlockAcctStats; + +enum BlockAcctType { + BLOCK_ACCT_NONE = 0, + BLOCK_ACCT_READ, + BLOCK_ACCT_WRITE, + BLOCK_ACCT_FLUSH, + BLOCK_ACCT_UNMAP, + BLOCK_MAX_IOTYPE, +}; + +struct BlockAcctTimedStats { + BlockAcctStats *stats; + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + +typedef struct BlockLatencyHistogram { + /* The following histogram is represented like this: + * + * 5| * + * 4| * + * 3| * * + * 2| * * * + * 1| * * * * + * +------------------ + * 10 50 100 + * + * BlockLatencyHistogram histogram = { + * .nbins = 4, + * .boundaries = {10, 50, 100}, + * .bins = {3, 1, 5, 2}, + * }; + * + * @boundaries array define histogram intervals as follows: + * [0, boundaries[0]), [boundaries[0], boundaries[1]), ... + * [boundaries[nbins-2], +inf) + * + * So, for example above, histogram intervals are: + * [0, 10), [10, 50), [50, 100), [100, +inf) + */ + int nbins; + uint64_t *boundaries; /* @nbins-1 numbers here + (all boundaries, except 0 and +inf) */ + uint64_t *bins; +} BlockLatencyHistogram; + +struct BlockAcctStats { + QemuMutex lock; + uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; + uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; + uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; + uint64_t merged[BLOCK_MAX_IOTYPE]; + int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; + bool account_invalid; + bool account_failed; + BlockLatencyHistogram latency_histogram[BLOCK_MAX_IOTYPE]; +}; + +typedef struct BlockAcctCookie { + int64_t bytes; + int64_t start_time_ns; + enum BlockAcctType type; +} BlockAcctCookie; + +void block_acct_init(BlockAcctStats *stats); +void block_acct_setup(BlockAcctStats *stats, bool account_invalid, + bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); +void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, + int64_t bytes, enum BlockAcctType type); +void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); +void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, + int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); +int block_latency_histogram_set(BlockAcctStats *stats, enum BlockAcctType type, + uint64List *boundaries); +void block_latency_histograms_clear(BlockAcctStats *stats); + +#endif diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h new file mode 100644 index 000000000..b39eefb38 --- /dev/null +++ b/include/block/aio-wait.h @@ -0,0 +1,149 @@ +/* + * AioContext wait support + * + * Copyright (C) 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_AIO_WAIT_H +#define QEMU_AIO_WAIT_H + +#include "block/aio.h" +#include "qemu/main-loop.h" + +/** + * AioWait: + * + * An object that facilitates synchronous waiting on a condition. A single + * global AioWait object (global_aio_wait) is used internally. + * + * The main loop can wait on an operation running in an IOThread as follows: + * + * AioContext *ctx = ...; + * MyWork work = { .done = false }; + * schedule_my_work_in_iothread(ctx, &work); + * AIO_WAIT_WHILE(ctx, !work.done); + * + * The IOThread must call aio_wait_kick() to notify the main loop when + * work.done changes: + * + * static void do_work(...) + * { + * ... + * work.done = true; + * aio_wait_kick(); + * } + */ +typedef struct { + /* Number of waiting AIO_WAIT_WHILE() callers. Accessed with atomic ops. */ + unsigned num_waiters; +} AioWait; + +extern AioWait global_aio_wait; + +/** + * AIO_WAIT_WHILE: + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. + * @cond: wait while this conditional expression is true + * + * Wait while a condition is true. Use this to implement synchronous + * operations that require event loop activity. + * + * The caller must be sure that something calls aio_wait_kick() when the value + * of @cond might have changed. + * + * The caller's thread must be the IOThread that owns @ctx or the main loop + * thread (with @ctx acquired exactly once). This function cannot be used to + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +#define AIO_WAIT_WHILE(ctx, cond) ({ \ + bool waited_ = false; \ + AioWait *wait_ = &global_aio_wait; \ + AioContext *ctx_ = (ctx); \ + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ + waited_ = true; \ + } \ + } else { \ + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + while ((cond)) { \ + if (ctx_) { \ + aio_context_release(ctx_); \ + } \ + aio_poll(qemu_get_aio_context(), true); \ + if (ctx_) { \ + aio_context_acquire(ctx_); \ + } \ + waited_ = true; \ + } \ + } \ + qatomic_dec(&wait_->num_waiters); \ + waited_; }) + +/** + * aio_wait_kick: + * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During + * synchronous operations performed in an IOThread, the main thread lets the + * IOThread's event loop run, waiting for the operation to complete. A + * aio_wait_kick() call will wake up the main thread. + */ +void aio_wait_kick(void); + +/** + * aio_wait_bh_oneshot: + * @ctx: the aio context + * @cb: the BH callback function + * @opaque: user data for the BH callback function + * + * Run a BH in @ctx and wait for it to complete. + * + * Must be called from the main loop thread with @ctx acquired exactly once. + * Note that main loop event processing may occur. + */ +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * in_aio_context_home_thread: + * @ctx: the aio context + * + * Return whether we are running in the thread that normally runs @ctx. Note + * that acquiring/releasing ctx does not affect the outcome, each AioContext + * still only has one home thread that is responsible for running it. + */ +static inline bool in_aio_context_home_thread(AioContext *ctx) +{ + if (ctx == qemu_get_current_aio_context()) { + return true; + } + + if (ctx == qemu_get_aio_context()) { + return qemu_mutex_iothread_locked(); + } else { + return false; + } +} + +#endif /* QEMU_AIO_WAIT_H */ diff --git a/include/block/aio.h b/include/block/aio.h new file mode 100644 index 000000000..5f342267d --- /dev/null +++ b/include/block/aio.h @@ -0,0 +1,730 @@ +/* + * QEMU aio implementation + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_AIO_H +#define QEMU_AIO_H + +#ifdef CONFIG_LINUX_IO_URING +#include +#endif +#include "qemu/coroutine.h" +#include "qemu/queue.h" +#include "qemu/event_notifier.h" +#include "qemu/thread.h" +#include "qemu/timer.h" + +typedef struct BlockAIOCB BlockAIOCB; +typedef void BlockCompletionFunc(void *opaque, int ret); + +typedef struct AIOCBInfo { + void (*cancel_async)(BlockAIOCB *acb); + AioContext *(*get_aio_context)(BlockAIOCB *acb); + size_t aiocb_size; +} AIOCBInfo; + +struct BlockAIOCB { + const AIOCBInfo *aiocb_info; + BlockDriverState *bs; + BlockCompletionFunc *cb; + void *opaque; + int refcnt; +}; + +void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); +void qemu_aio_unref(void *p); +void qemu_aio_ref(void *p); + +typedef struct AioHandler AioHandler; +typedef QLIST_HEAD(, AioHandler) AioHandlerList; +typedef void QEMUBHFunc(void *opaque); +typedef bool AioPollFn(void *opaque); +typedef void IOHandler(void *opaque); + +struct Coroutine; +struct ThreadPool; +struct LinuxAioState; +struct LuringState; + +/* Is polling disabled? */ +bool aio_poll_disabled(AioContext *ctx); + +/* Callbacks for file descriptor monitoring implementations */ +typedef struct { + /* + * update: + * @ctx: the AioContext + * @old_node: the existing handler or NULL if this file descriptor is being + * monitored for the first time + * @new_node: the new handler or NULL if this file descriptor is being + * removed + * + * Add/remove/modify a monitored file descriptor. + * + * Called with ctx->list_lock acquired. + */ + void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node); + + /* + * wait: + * @ctx: the AioContext + * @ready_list: list for handlers that become ready + * @timeout: maximum duration to wait, in nanoseconds + * + * Wait for file descriptors to become ready and place them on ready_list. + * + * Called with ctx->list_lock incremented but not locked. + * + * Returns: number of ready file descriptors. + */ + int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout); + + /* + * need_wait: + * @ctx: the AioContext + * + * Tell aio_poll() when to stop userspace polling early because ->wait() + * has fds ready. + * + * File descriptor monitoring implementations that cannot poll fd readiness + * from userspace should use aio_poll_disabled() here. This ensures that + * file descriptors are not starved by handlers that frequently make + * progress via userspace polling. + * + * Returns: true if ->wait() should be called, false otherwise. + */ + bool (*need_wait)(AioContext *ctx); +} FDMonOps; + +/* + * Each aio_bh_poll() call carves off a slice of the BH list, so that newly + * scheduled BHs are not processed until the next aio_bh_poll() call. All + * active aio_bh_poll() calls chain their slices together in a list, so that + * nested aio_bh_poll() calls process all scheduled bottom halves. + */ +typedef QSLIST_HEAD(, QEMUBH) BHList; +typedef struct BHListSlice BHListSlice; +struct BHListSlice { + BHList bh_list; + QSIMPLEQ_ENTRY(BHListSlice) next; +}; + +typedef QSLIST_HEAD(, AioHandler) AioHandlerSList; + +struct AioContext { + GSource source; + + /* Used by AioContext users to protect from multi-threaded access. */ + QemuRecMutex lock; + + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ + AioHandlerList aio_handlers; + + /* The list of AIO handlers to be deleted. Protected by ctx->list_lock. */ + AioHandlerList deleted_aio_handlers; + + /* Used to avoid unnecessary event_notifier_set calls in aio_notify; + * only written from the AioContext home thread, or under the BQL in + * the case of the main AioContext. However, it is read from any + * thread so it is still accessed with atomic primitives. + * + * If this field is 0, everything (file descriptors, bottom halves, + * timers) will be re-evaluated before the next blocking poll() or + * io_uring wait; therefore, the event_notifier_set call can be + * skipped. If it is non-zero, you may need to wake up a concurrent + * aio_poll or the glib main event loop, making event_notifier_set + * necessary. + * + * Bit 0 is reserved for GSource usage of the AioContext, and is 1 + * between a call to aio_ctx_prepare and the next call to aio_ctx_check. + * Bits 1-31 simply count the number of active calls to aio_poll + * that are in the prepare or poll phase. + * + * The GSource and aio_poll must use a different mechanism because + * there is no certainty that a call to GSource's prepare callback + * (via g_main_context_prepare) is indeed followed by check and + * dispatch. It's not clear whether this would be a bug, but let's + * play safe and allow it---it will just cause extra calls to + * event_notifier_set until the next call to dispatch. + * + * Instead, the aio_poll calls include both the prepare and the + * dispatch phase, hence a simple counter is enough for them. + */ + uint32_t notify_me; + + /* A lock to protect between QEMUBH and AioHandler adders and deleter, + * and to ensure that no callbacks are removed while we're walking and + * dispatching them. + */ + QemuLockCnt list_lock; + + /* Bottom Halves pending aio_bh_poll() processing */ + BHList bh_list; + + /* Chained BH list slices for each nested aio_bh_poll() call */ + QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list; + + /* Used by aio_notify. + * + * "notified" is used to avoid expensive event_notifier_test_and_clear + * calls. When it is clear, the EventNotifier is clear, or one thread + * is going to clear "notified" before processing more events. False + * positives are possible, i.e. "notified" could be set even though the + * EventNotifier is clear. + * + * Note that event_notifier_set *cannot* be optimized the same way. For + * more information on the problem that would result, see "#ifdef BUG2" + * in the docs/aio_notify_accept.promela formal model. + */ + bool notified; + EventNotifier notifier; + + QSLIST_HEAD(, Coroutine) scheduled_coroutines; + QEMUBH *co_schedule_bh; + + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ + struct ThreadPool *thread_pool; + +#ifdef CONFIG_LINUX_AIO + /* + * State for native Linux AIO. Uses aio_context_acquire/release for + * locking. + */ + struct LinuxAioState *linux_aio; +#endif +#ifdef CONFIG_LINUX_IO_URING + /* + * State for Linux io_uring. Uses aio_context_acquire/release for + * locking. + */ + struct LuringState *linux_io_uring; + + /* State for file descriptor monitoring using Linux io_uring */ + struct io_uring fdmon_io_uring; + AioHandlerSList submit_list; +#endif + + /* TimerLists for calling timers - one per clock type. Has its own + * locking. + */ + QEMUTimerListGroup tlg; + + int external_disable_cnt; + + /* Number of AioHandlers without .io_poll() */ + int poll_disable_cnt; + + /* Polling mode parameters */ + int64_t poll_ns; /* current polling time in nanoseconds */ + int64_t poll_max_ns; /* maximum polling time in nanoseconds */ + int64_t poll_grow; /* polling time growth factor */ + int64_t poll_shrink; /* polling time shrink factor */ + + /* + * List of handlers participating in userspace polling. Protected by + * ctx->list_lock. Iterated and modified mostly by the event loop thread + * from aio_poll() with ctx->list_lock incremented. aio_set_fd_handler() + * only touches the list to delete nodes if ctx->list_lock's count is zero. + */ + AioHandlerList poll_aio_handlers; + + /* Are we in polling mode or monitoring file descriptors? */ + bool poll_started; + + /* epoll(7) state used when built with CONFIG_EPOLL */ + int epollfd; + + const FDMonOps *fdmon_ops; +}; + +/** + * aio_context_new: Allocate a new AioContext. + * + * AioContext provide a mini event-loop that can be waited on synchronously. + * They also provide bottom halves, a service to execute a piece of code + * as soon as possible. + */ +AioContext *aio_context_new(Error **errp); + +/** + * aio_context_ref: + * @ctx: The AioContext to operate on. + * + * Add a reference to an AioContext. + */ +void aio_context_ref(AioContext *ctx); + +/** + * aio_context_unref: + * @ctx: The AioContext to operate on. + * + * Drop a reference to an AioContext. + */ +void aio_context_unref(AioContext *ctx); + +/* Take ownership of the AioContext. If the AioContext will be shared between + * threads, and a thread does not want to be interrupted, it will have to + * take ownership around calls to aio_poll(). Otherwise, aio_poll() + * automatically takes care of calling aio_context_acquire and + * aio_context_release. + * + * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A + * thread still has to call those to avoid being interrupted by the guest. + * + * Bottom halves, timers and callbacks can be created or removed without + * acquiring the AioContext. + */ +void aio_context_acquire(AioContext *ctx); + +/* Relinquish ownership of the AioContext. */ +void aio_context_release(AioContext *ctx); + +/** + * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run + * only once and as soon as possible. + */ +void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * aio_bh_new: Allocate a new bottom half structure. + * + * Bottom halves are lightweight callbacks whose invocation is guaranteed + * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure + * is opaque and must be allocated prior to its use. + */ +QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * aio_notify: Force processing of pending events. + * + * Similar to signaling a condition variable, aio_notify forces + * aio_poll to exit, so that the next call will re-examine pending events. + * The caller of aio_notify will usually call aio_poll again very soon, + * or go through another iteration of the GLib main loop. Hence, aio_notify + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling aio_notify is rarely necessary, because for example scheduling + * a bottom half calls it already. + */ +void aio_notify(AioContext *ctx); + +/** + * aio_notify_accept: Acknowledge receiving an aio_notify. + * + * aio_notify() uses an EventNotifier in order to wake up a sleeping + * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are + * usually rare, but the AioContext has to clear the EventNotifier on + * every aio_poll() or g_main_context_iteration() in order to avoid + * busy waiting. This event_notifier_test_and_clear() cannot be done + * using the usual aio_context_set_event_notifier(), because it must + * be done before processing all events (file descriptors, bottom halves, + * timers). + * + * aio_notify_accept() is an optimized event_notifier_test_and_clear() + * that is specific to an AioContext's notifier; it is used internally + * to clear the EventNotifier only if aio_notify() had been called. + */ +void aio_notify_accept(AioContext *ctx); + +/** + * aio_bh_call: Executes callback function of the specified BH. + */ +void aio_bh_call(QEMUBH *bh); + +/** + * aio_bh_poll: Poll bottom halves for an AioContext. + * + * These are internal functions used by the QEMU main loop. + * And notice that multiple occurrences of aio_bh_poll cannot + * be called concurrently + */ +int aio_bh_poll(AioContext *ctx); + +/** + * qemu_bh_schedule: Schedule a bottom half. + * + * Scheduling a bottom half interrupts the main loop and causes the + * execution of the callback that was passed to qemu_bh_new. + * + * Bottom halves that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. + * + * @bh: The bottom half to be scheduled. + */ +void qemu_bh_schedule(QEMUBH *bh); + +/** + * qemu_bh_cancel: Cancel execution of a bottom half. + * + * Canceling execution of a bottom half undoes the effect of calls to + * qemu_bh_schedule without freeing its resources yet. While cancellation + * itself is also wait-free and thread-safe, it can of course race with the + * loop that executes bottom halves unless you are holding the iothread + * mutex. This makes it mostly useless if you are not holding the mutex. + * + * @bh: The bottom half to be canceled. + */ +void qemu_bh_cancel(QEMUBH *bh); + +/** + *qemu_bh_delete: Cancel execution of a bottom half and free its resources. + * + * Deleting a bottom half frees the memory that was allocated for it by + * qemu_bh_new. It also implies canceling the bottom half if it was + * scheduled. + * This func is async. The bottom half will do the delete action at the finial + * end. + * + * @bh: The bottom half to be deleted. + */ +void qemu_bh_delete(QEMUBH *bh); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext, before g_poll is invoked. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_prepare(AioContext *ctx); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext, after g_poll is invoked. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_pending(AioContext *ctx); + +/* Dispatch any pending callbacks from the GSource attached to the AioContext. + * + * This is used internally in the implementation of the GSource. + */ +void aio_dispatch(AioContext *ctx); + +/* Progress in completing AIO work to occur. This can issue new pending + * aio as a result of executing I/O completion or bh callbacks. + * + * Return whether any progress was made by executing AIO or bottom half + * handlers. If @blocking == true, this should always be true except + * if someone called aio_notify. + * + * If there are no pending bottom halves, but there are pending AIO + * operations, it may not be possible to make any progress without + * blocking. If @blocking is true, this function will wait until one + * or more AIO events have completed, to ensure something has moved + * before returning. + */ +bool aio_poll(AioContext *ctx, bool blocking); + +/* Register a file descriptor and associated callbacks. Behaves very similarly + * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will + * be invoked when using aio_poll(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of qemu_set_fd_handler[2]. + */ +void aio_set_fd_handler(AioContext *ctx, + int fd, + bool is_external, + IOHandler *io_read, + IOHandler *io_write, + AioPollFn *io_poll, + void *opaque); + +/* Set polling begin/end callbacks for a file descriptor that has already been + * registered with aio_set_fd_handler. Do nothing if the file descriptor is + * not registered. + */ +void aio_set_fd_poll(AioContext *ctx, int fd, + IOHandler *io_poll_begin, + IOHandler *io_poll_end); + +/* Register an event notifier and associated callbacks. Behaves very similarly + * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks + * will be invoked when using aio_poll(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of event_notifier_set_handler. + */ +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *notifier, + bool is_external, + EventNotifierHandler *io_read, + AioPollFn *io_poll); + +/* Set polling begin/end callbacks for an event notifier that has already been + * registered with aio_set_event_notifier. Do nothing if the event notifier is + * not registered. + */ +void aio_set_event_notifier_poll(AioContext *ctx, + EventNotifier *notifier, + EventNotifierHandler *io_poll_begin, + EventNotifierHandler *io_poll_end); + +/* Return a GSource that lets the main loop poll the file descriptors attached + * to this AioContext. + */ +GSource *aio_get_g_source(AioContext *ctx); + +/* Return the ThreadPool bound to this AioContext */ +struct ThreadPool *aio_get_thread_pool(AioContext *ctx); + +/* Setup the LinuxAioState bound to this AioContext */ +struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); + +/* Return the LinuxAioState bound to this AioContext */ +struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); + +/* Setup the LuringState bound to this AioContext */ +struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); + +/* Return the LuringState bound to this AioContext */ +struct LuringState *aio_get_linux_io_uring(AioContext *ctx); +/** + * aio_timer_new_with_attrs: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer (with attributes) attached to the context @ctx. + * The function is responsible for memory allocation. + * + * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. + * Use that unless you really need dynamic memory allocation. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); +} + +/** + * aio_timer_new: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer attached to the context @ctx. + * See aio_timer_new_with_attrs for details. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, + int scale, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); +} + +/** + * aio_timer_init_with_attrs: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer (with attributes) attached to the context @ctx. + * The caller is responsible for memory allocation. + */ +static inline void aio_timer_init_with_attrs(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); +} + +/** + * aio_timer_init: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer attached to the context @ctx. + * See aio_timer_init_with_attrs for details. + */ +static inline void aio_timer_init(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); +} + +/** + * aio_compute_timeout: + * @ctx: the aio context + * + * Compute the timeout that a blocking aio_poll should use. + */ +int64_t aio_compute_timeout(AioContext *ctx); + +/** + * aio_disable_external: + * @ctx: the aio context + * + * Disable the further processing of external clients. + */ +static inline void aio_disable_external(AioContext *ctx) +{ + qatomic_inc(&ctx->external_disable_cnt); +} + +/** + * aio_enable_external: + * @ctx: the aio context + * + * Enable the processing of external clients. + */ +static inline void aio_enable_external(AioContext *ctx) +{ + int old; + + old = qatomic_fetch_dec(&ctx->external_disable_cnt); + assert(old > 0); + if (old == 1) { + /* Kick event loop so it re-arms file descriptors */ + aio_notify(ctx); + } +} + +/** + * aio_external_disabled: + * @ctx: the aio context + * + * Return true if the external clients are disabled. + */ +static inline bool aio_external_disabled(AioContext *ctx) +{ + return qatomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_node_check: + * @ctx: the aio context + * @is_external: Whether or not the checked node is an external event source. + * + * Check if the node's is_external flag is okay to be polled by the ctx at this + * moment. True means green light. + */ +static inline bool aio_node_check(AioContext *ctx, bool is_external) +{ + return !is_external || !qatomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_co_schedule: + * @ctx: the aio context + * @co: the coroutine + * + * Start a coroutine on a remote AioContext. + * + * The coroutine must not be entered by anyone else while aio_co_schedule() + * is active. In addition the coroutine must have yielded unless ctx + * is the context in which the coroutine is running (i.e. the value of + * qemu_get_current_aio_context() from the coroutine itself). + */ +void aio_co_schedule(AioContext *ctx, struct Coroutine *co); + +/** + * aio_co_reschedule_self: + * @new_ctx: the new context + * + * Move the currently running coroutine to new_ctx. If the coroutine is already + * running in new_ctx, do nothing. + */ +void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx); + +/** + * aio_co_wake: + * @co: the coroutine + * + * Restart a coroutine on the AioContext where it was running last, thus + * preventing coroutines from jumping from one context to another when they + * go to sleep. + * + * aio_co_wake may be executed either in coroutine or non-coroutine + * context. The coroutine must not be entered by anyone else while + * aio_co_wake() is active. + */ +void aio_co_wake(struct Coroutine *co); + +/** + * aio_co_enter: + * @ctx: the context to run the coroutine + * @co: the coroutine to run + * + * Enter a coroutine in the specified AioContext. + */ +void aio_co_enter(AioContext *ctx, struct Coroutine *co); + +/** + * Return the AioContext whose event loop runs in the current thread. + * + * If called from an IOThread this will be the IOThread's AioContext. If + * called from another thread it will be the main loop AioContext. + */ +AioContext *qemu_get_current_aio_context(void); + +/** + * aio_context_setup: + * @ctx: the aio context + * + * Initialize the aio context. + */ +void aio_context_setup(AioContext *ctx); + +/** + * aio_context_destroy: + * @ctx: the aio context + * + * Destroy the aio context. + */ +void aio_context_destroy(AioContext *ctx); + +/* Used internally, do not call outside AioContext code */ +void aio_context_use_g_source(AioContext *ctx); + +/** + * aio_context_set_poll_params: + * @ctx: the aio context + * @max_ns: how long to busy poll for, in nanoseconds + * @grow: polling time growth factor + * @shrink: polling time shrink factor + * + * Poll mode can be disabled by setting poll_max_ns to 0. + */ +void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, + Error **errp); + +#endif diff --git a/include/block/aio_task.h b/include/block/aio_task.h new file mode 100644 index 000000000..50bc1e181 --- /dev/null +++ b/include/block/aio_task.h @@ -0,0 +1,54 @@ +/* + * Aio tasks loops + * + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_AIO_TASK_H +#define BLOCK_AIO_TASK_H + +#include "qemu/coroutine.h" + +typedef struct AioTaskPool AioTaskPool; +typedef struct AioTask AioTask; +typedef int coroutine_fn (*AioTaskFunc)(AioTask *task); +struct AioTask { + AioTaskPool *pool; + AioTaskFunc func; + int ret; +}; + +AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks); +void aio_task_pool_free(AioTaskPool *); + +/* error code of failed task or 0 if all is OK */ +int aio_task_pool_status(AioTaskPool *pool); + +bool aio_task_pool_empty(AioTaskPool *pool); + +/* User provides filled @task, however task->pool will be set automatically */ +void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task); + +void coroutine_fn aio_task_pool_wait_slot(AioTaskPool *pool); +void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool); +void coroutine_fn aio_task_pool_wait_all(AioTaskPool *pool); + +#endif /* BLOCK_AIO_TASK_H */ diff --git a/include/block/block-copy.h b/include/block/block-copy.h new file mode 100644 index 000000000..aac85e148 --- /dev/null +++ b/include/block/block-copy.h @@ -0,0 +1,47 @@ +/* + * block_copy API + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * Vladimir Sementsov-Ogievskiy + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BLOCK_COPY_H +#define BLOCK_COPY_H + +#include "block/block.h" +#include "qemu/co-shared-resource.h" + +typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque); +typedef struct BlockCopyState BlockCopyState; + +BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + int64_t cluster_size, + BdrvRequestFlags write_flags, + Error **errp); + +void block_copy_set_progress_callback( + BlockCopyState *s, + ProgressBytesCallbackFunc progress_bytes_callback, + void *progress_opaque); + +void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm); + +void block_copy_state_free(BlockCopyState *s); + +int64_t block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, int64_t *count); + +int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, + bool *error_is_read); + +BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s); +void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip); + +#endif /* BLOCK_COPY_H */ diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h new file mode 100644 index 000000000..3412e108c --- /dev/null +++ b/include/block/block-hmp-cmds.h @@ -0,0 +1,54 @@ +/* + * HMP commands related to the block layer + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2020 Red Hat, Inc. + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef BLOCK_HMP_COMMANDS_H +#define BLOCK_HMP_COMMANDS_H + +void hmp_drive_add(Monitor *mon, const QDict *qdict); + +void hmp_commit(Monitor *mon, const QDict *qdict); +void hmp_drive_del(Monitor *mon, const QDict *qdict); + +void hmp_drive_mirror(Monitor *mon, const QDict *qdict); +void hmp_drive_backup(Monitor *mon, const QDict *qdict); + +void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); +void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); +void hmp_block_job_pause(Monitor *mon, const QDict *qdict); +void hmp_block_job_resume(Monitor *mon, const QDict *qdict); +void hmp_block_job_complete(Monitor *mon, const QDict *qdict); + +void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict); +void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict); +void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict); + +void hmp_nbd_server_start(Monitor *mon, const QDict *qdict); +void hmp_nbd_server_add(Monitor *mon, const QDict *qdict); +void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict); +void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict); + +void hmp_block_resize(Monitor *mon, const QDict *qdict); +void hmp_block_stream(Monitor *mon, const QDict *qdict); +void hmp_block_passwd(Monitor *mon, const QDict *qdict); +void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); +void hmp_eject(Monitor *mon, const QDict *qdict); + +void hmp_qemu_io(Monitor *mon, const QDict *qdict); + +void hmp_info_block(Monitor *mon, const QDict *qdict); +void hmp_info_blockstats(Monitor *mon, const QDict *qdict); +void hmp_info_block_jobs(Monitor *mon, const QDict *qdict); +void hmp_info_snapshots(Monitor *mon, const QDict *qdict); + +#endif diff --git a/include/block/block.h b/include/block/block.h new file mode 100644 index 000000000..c9d7c5876 --- /dev/null +++ b/include/block/block.h @@ -0,0 +1,839 @@ +#ifndef BLOCK_H +#define BLOCK_H + +#include "block/aio.h" +#include "block/aio-wait.h" +#include "qemu/iov.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" +#include "qemu/hbitmap.h" + +/* + * generated_co_wrapper + * + * Function specifier, which does nothing but mark functions to be + * generated by scripts/block-coroutine-wrapper.py + * + * Read more in docs/devel/block-coroutine-wrapper.rst + */ +#define generated_co_wrapper + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildClass BdrvChildClass; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; + /* + * True if this block driver only supports compressed writes + */ + bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; + uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, + + /* + * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate + * that the block driver should unmap (discard) blocks if it is guaranteed + * that the result will read back as zeroes. The flag is only passed to the + * driver if the block device is opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, + + BDRV_REQ_FUA = 0x10, + BDRV_REQ_WRITE_COMPRESSED = 0x20, + + /* Signifies that this write request will not change the visible disk + * content. */ + BDRV_REQ_WRITE_UNCHANGED = 0x40, + + /* + * BDRV_REQ_SERIALISING forces request serialisation for writes. + * It is used to ensure that writes to the backing file of a backup process + * target cannot race with a read of the backup target that defers to the + * backing file. + * + * Note, that BDRV_REQ_SERIALISING is _not_ opposite in meaning to + * BDRV_REQ_NO_SERIALISING. A more descriptive name for the latter might be + * _DO_NOT_WAIT_FOR_SERIALISING, except that is too long. + */ + BDRV_REQ_SERIALISING = 0x80, + + /* Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. */ + BDRV_REQ_NO_FALLBACK = 0x100, + + /* + * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ + * on read request and means that caller doesn't really need data to be + * written to qiov parameter which may be NULL. + */ + BDRV_REQ_PREFETCH = 0x200, + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x3ff, +} BdrvRequestFlags; + +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ +#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ +#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: + select an appropriate protocol driver, + ignoring the format layer */ +#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ +#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */ +#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) + + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" +#define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" +#define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" + + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) + +#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) +#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) + +/* + * Allocation status flags for bdrv_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data + * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer + * + * Internal flags: + * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request + * that the block layer recompute the answer from the returned + * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. + * + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * returned file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 0x01 +#define BDRV_BLOCK_ZERO 0x02 +#define BDRV_BLOCK_OFFSET_VALID 0x04 +#define BDRV_BLOCK_RAW 0x08 +#define BDRV_BLOCK_ALLOCATED 0x10 +#define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 + +typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + BlockdevDetectZeroesOptions detect_zeroes; + bool backing_missing; + bool replace_backing_bs; /* new_backing_bs is ignored if this is false */ + BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */ + uint64_t perm, shared_perm; + QDict *options; + QDict *explicit_options; + void *opaque; +} BDRVReopenState; + +/* + * Block operation types + */ +typedef enum BlockOpType { + BLOCK_OP_TYPE_BACKUP_SOURCE, + BLOCK_OP_TYPE_BACKUP_TARGET, + BLOCK_OP_TYPE_CHANGE, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, + BLOCK_OP_TYPE_DATAPLANE, + BLOCK_OP_TYPE_DRIVE_DEL, + BLOCK_OP_TYPE_EJECT, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + BLOCK_OP_TYPE_MIRROR_SOURCE, + BLOCK_OP_TYPE_MIRROR_TARGET, + BLOCK_OP_TYPE_RESIZE, + BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, + BLOCK_OP_TYPE_MAX, +} BlockOpType; + +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + * + * As the BLK_PERM_WRITE permission is strictly stronger, either is + * sufficient to perform an unchanging write. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * This permission is required to change the node that this BdrvChild + * points to. + */ + BLK_PERM_GRAPH_MOD = 0x10, + + BLK_PERM_ALL = 0x1f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, +}; + +/* + * Flags that parent nodes assign to child nodes to specify what kind of + * role(s) they take. + * + * At least one of DATA, METADATA, FILTERED, or COW must be set for + * every child. + */ +enum BdrvChildRoleBits { + /* + * This child stores data. + * Any node may have an arbitrary number of such children. + */ + BDRV_CHILD_DATA = (1 << 0), + + /* + * This child stores metadata. + * Any node may have an arbitrary number of metadata-storing + * children. + */ + BDRV_CHILD_METADATA = (1 << 1), + + /* + * A child that always presents exactly the same visible data as + * the parent, e.g. by virtue of the parent forwarding all reads + * and writes. + * This flag is mutually exclusive with DATA, METADATA, and COW. + * Any node may have at most one filtered child at a time. + */ + BDRV_CHILD_FILTERED = (1 << 2), + + /* + * Child from which to read all data that isn't allocated in the + * parent (i.e., the backing child); such data is copied to the + * parent through COW (and optionally COR). + * This field is mutually exclusive with DATA, METADATA, and + * FILTERED. + * Any node may have at most one such backing child at a time. + */ + BDRV_CHILD_COW = (1 << 3), + + /* + * The primary child. For most drivers, this is the child whose + * filename applies best to the parent node. + * Any node may have at most one primary child at a time. + */ + BDRV_CHILD_PRIMARY = (1 << 4), + + /* Useful combination of flags */ + BDRV_CHILD_IMAGE = BDRV_CHILD_DATA + | BDRV_CHILD_METADATA + | BDRV_CHILD_PRIMARY, +}; + +/* Mask of BdrvChildRoleBits values */ +typedef unsigned int BdrvChildRole; + +char *bdrv_perm_names(uint64_t perm); +uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); + +/* disk I/O throttling */ +void bdrv_init(void); +void bdrv_init_with_whitelist(void); +bool bdrv_uses_whitelist(void); +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); +BlockDriver *bdrv_find_protocol(const char *filename, + bool allow_protocol_prefix, + Error **errp); +BlockDriver *bdrv_find_format(const char *format_name); +int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); + +BlockDriverState *bdrv_new(void); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + Error **errp); + +int bdrv_parse_aio(const char *mode, int *flags); +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); +int bdrv_parse_discard_flags(const char *mode, int *flags); +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + bool allow_none, Error **errp); +BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); +int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, QDict *options, + bool keep_old_opts); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp); +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); +void bdrv_reopen_commit(BDRVReopenState *reopen_state); +void bdrv_reopen_abort(BDRVReopenState *reopen_state); +int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int bytes, BdrvRequestFlags flags); +int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); +int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes); +int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes); +int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + const void *buf, int count); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, + int bytes, BdrvRequestFlags flags); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); +void bdrv_refresh_filename(BlockDriverState *bs); + +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp); +int generated_co_wrapper +bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + +int64_t bdrv_nb_sectors(BlockDriverState *bs); +int64_t bdrv_getlength(BlockDriverState *bs); +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); +BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, + BlockDriverState *in_bs, Error **errp); +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +void bdrv_refresh_limits(BlockDriverState *bs, Error **errp); +int bdrv_commit(BlockDriverState *bs); +int bdrv_make_empty(BdrvChild *c, Error **errp); +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn); +void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); +bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); +int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); + + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + int64_t image_end_offset; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); + +/* The units of offset and total_work_size may be chosen arbitrarily by the + * block driver; total_work_size may change during the course of the amendment + * operation */ +typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, + int64_t total_work_size, void *opaque); +int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, + Error **errp); + +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp); + +/* async block I/O */ +void bdrv_aio_cancel(BlockAIOCB *acb); +void bdrv_aio_cancel_async(BlockAIOCB *acb); + +/* sg packet commands */ +int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); + +/* Invalidate any cached metadata used by image formats */ +int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, + Error **errp); +void bdrv_invalidate_cache_all(Error **errp); +int bdrv_inactivate_all(void); + +/* Ensure contents are flushed to disk. */ +int generated_co_wrapper bdrv_flush(BlockDriverState *bs); +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +int bdrv_flush_all(void); +void bdrv_close_all(void); +void bdrv_drain(BlockDriverState *bs); +void coroutine_fn bdrv_co_drain(BlockDriverState *bs); +void bdrv_drain_all_begin(void); +void bdrv_drain_all_end(void); +void bdrv_drain_all(void); + +#define BDRV_POLL_WHILE(bs, cond) ({ \ + BlockDriverState *bs_ = (bs); \ + AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ + cond); }) + +int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); +int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +int bdrv_has_zero_init_1(BlockDriverState *bs); +int bdrv_has_zero_init(BlockDriverState *bs); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); +int bdrv_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file); +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); +int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum); +int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, + bool include_base, int64_t offset, int64_t bytes, + int64_t *pnum); +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes); + +bool bdrv_is_read_only(BlockDriverState *bs); +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp); +int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp); +bool bdrv_is_writable(BlockDriverState *bs); +bool bdrv_is_sg(BlockDriverState *bs); +bool bdrv_is_inserted(BlockDriverState *bs); +void bdrv_lock_medium(BlockDriverState *bs, bool locked); +void bdrv_eject(BlockDriverState *bs, bool eject_flag); +const char *bdrv_get_format_name(BlockDriverState *bs); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); +XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); +BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); + +typedef struct BdrvNextIterator { + enum { + BDRV_NEXT_BACKEND_ROOTS, + BDRV_NEXT_MONITOR_OWNED, + } phase; + BlockBackend *blk; + BlockDriverState *bs; +} BdrvNextIterator; + +BlockDriverState *bdrv_first(BdrvNextIterator *it); +BlockDriverState *bdrv_next(BdrvNextIterator *it); +void bdrv_next_cleanup(BdrvNextIterator *it); + +BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); +bool bdrv_supports_compressed_writes(BlockDriverState *bs); +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque, bool read_only); +const char *bdrv_get_node_name(const BlockDriverState *bs); +const char *bdrv_get_device_name(const BlockDriverState *bs); +const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); +int bdrv_get_flags(BlockDriverState *bs); +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, + Error **errp); +BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *cluster_offset, + int64_t *cluster_bytes); + +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size); +char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); +char *bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + Error **errp); +char *bdrv_dirname(BlockDriverState *bs, Error **errp); + +int path_has_protocol(const char *path); +int path_is_absolute(const char *path); +char *path_combine(const char *base_path, const char *filename); + +int generated_co_wrapper +bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int generated_co_wrapper +bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, + bool quiet, Error **errp); + +/* Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack */ +size_t bdrv_min_mem_align(BlockDriverState *bs); +/* Returns optimal alignment in bytes for bounce buffer */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void *qemu_blockalign(BlockDriverState *bs, size_t size); +void *qemu_blockalign0(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); + +void bdrv_enable_copy_on_read(BlockDriverState *bs); +void bdrv_disable_copy_on_read(BlockDriverState *bs); + +void bdrv_ref(BlockDriverState *bs); +void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp); + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + +#define BLKDBG_EVENT(child, evt) \ + do { \ + if (child) { \ + bdrv_debug_event(child->bs, evt); \ + } \ + } while (0) + +void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +/** + * bdrv_get_aio_context: + * + * Returns: the currently bound #AioContext + */ +AioContext *bdrv_get_aio_context(BlockDriverState *bs); + +/** + * Move the current coroutine to the AioContext of @bs and return the old + * AioContext of the coroutine. Increase bs->in_flight so that draining @bs + * will wait for the operation to proceed until the corresponding + * bdrv_co_leave(). + * + * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as + * this will deadlock. + */ +AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs); + +/** + * Ends a section started by bdrv_co_enter(). Move the current coroutine back + * to old_ctx and decrease bs->in_flight again. + */ +void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); + +/** + * Locks the AioContext of @bs if it's not the current AioContext. This avoids + * double locking which could lead to deadlocks: This is a coroutine_fn, so we + * know we already own the lock of the current AioContext. + * + * May only be called in the main thread. + */ +void coroutine_fn bdrv_co_lock(BlockDriverState *bs); + +/** + * Unlocks the AioContext of @bs if it's not the current AioContext. + */ +void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); + +/** + * Transfer control to @co in the aio context of @bs + */ +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); + +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore); +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp); +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp); +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp); +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); + +void bdrv_io_plug(BlockDriverState *bs); +void bdrv_io_unplug(BlockDriverState *bs); + +/** + * bdrv_parent_drained_begin_single: + * + * Begin a quiesced section for the parent of @c. If @poll is true, wait for + * any pending activity to cease. + */ +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + +/** + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled, which may result in graph changes. + */ +void bdrv_parent_drained_end_single(BdrvChild *c); + +/** + * bdrv_drain_poll: + * + * Poll for pending requests in @bs, its parents (except for @ignore_parent), + * and if @recursive is true its children as well (used for subtree drain). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for + * drain_all). + * + * This is part of bdrv_drained_begin. + */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents); + +/** + * bdrv_drained_begin: + * + * Begin a quiesced section for exclusive access to the BDS, by disabling + * external request sources including NBD server and device model. Note that + * this doesn't block timers or coroutines from submitting more requests, which + * means block_job_pause is still necessary. + * + * This function can be recursive. + */ +void bdrv_drained_begin(BlockDriverState *bs); + +/** + * bdrv_do_drained_begin_quiesce: + * + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +/** + * Like bdrv_drained_begin, but recursively begins a quiesced section for + * exclusive access to all child nodes as well. + */ +void bdrv_subtree_drained_begin(BlockDriverState *bs); + +/** + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled. On one hand, that may result in graph changes. On + * the other, this requires that the caller either runs in the main + * loop; or that all involved nodes (@bs and all of its parents) are + * in the caller's AioContext. + */ +void bdrv_drained_end(BlockDriverState *bs); + +/** + * bdrv_drained_end_no_poll: + * + * Same as bdrv_drained_end(), but do not poll for the subgraph to + * actually become unquiesced. Therefore, no graph changes will occur + * with this function. + * + * *drained_end_counter is incremented for every background operation + * that is scheduled, and will be decremented for every operation once + * it settles. The caller must poll until it reaches 0. The counter + * should be accessed using atomic operations only. + */ +void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); + +/** + * End a quiescent section started by bdrv_subtree_drained_begin(). + */ +void bdrv_subtree_drained_end(BlockDriverState *bs); + +void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, + Error **errp); +void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); + +bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp); +/** + * + * bdrv_register_buf/bdrv_unregister_buf: + * + * Register/unregister a buffer for I/O. For example, VFIO drivers are + * interested to know the memory areas that would later be used for I/O, so + * that they can prepare IOMMU mapping etc., to get better performance. + */ +void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); +void bdrv_unregister_buf(BlockDriverState *bs, void *host); + +/** + * + * bdrv_co_copy_range: + * + * Do offloaded copy between two children. If the operation is not implemented + * by the driver, or if the backend storage doesn't support it, a negative + * error code will be returned. + * + * Note: block layer doesn't emulate or fallback to a bounce buffer approach + * because usually the caller shouldn't attempt offloaded copy any more (e.g. + * calling copy_file_range(2)) after the first error, thus it should fall back + * to a read+write path in the caller level. + * + * @src: Source child to copy data from + * @src_offset: offset in @src image to read data + * @dst: Destination child to copy data to + * @dst_offset: offset in @dst image to write data + * @bytes: number of bytes to copy + * @flags: request flags. Supported flags: + * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero + * write on @dst as if bdrv_co_pwrite_zeroes is + * called. Used to simplify caller code, or + * during BlockDriver.bdrv_co_copy_range_from() + * recursion. + * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping + * requests currently in flight. + * + * Returns: 0 if succeeded; negative error code if failed. + **/ +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); +#endif diff --git a/include/block/block_backup.h b/include/block/block_backup.h new file mode 100644 index 000000000..157596c29 --- /dev/null +++ b/include/block/block_backup.h @@ -0,0 +1,25 @@ +/* + * QEMU backup + * + * Copyright (c) 2013 Proxmox Server Solutions + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2016 Intel Corporation + * Copyright (c) 2016 FUJITSU LIMITED + * + * Authors: + * Dietmar Maurer + * Changlong Xie + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef BLOCK_BACKUP_H +#define BLOCK_BACKUP_H + +#include "block/block_int.h" + +void backup_do_checkpoint(BlockJob *job, Error **errp); + +#endif diff --git a/include/block/block_int.h b/include/block/block_int.h new file mode 100644 index 000000000..95d9333be --- /dev/null +++ b/include/block/block_int.h @@ -0,0 +1,1419 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_H +#define BLOCK_INT_H + +#include "block/accounting.h" +#include "block/block.h" +#include "block/aio-wait.h" +#include "qemu/queue.h" +#include "qemu/coroutine.h" +#include "qemu/stats64.h" +#include "qemu/timer.h" +#include "qemu/hbitmap.h" +#include "block/snapshot.h" +#include "qemu/throttle.h" + +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_HWVERSION "hwversion" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE "table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" +#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" +#define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" +#define BLOCK_OPT_DATA_FILE "data_file" +#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" +#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" +#define BLOCK_OPT_EXTL2 "extended_l2" + +#define BLOCK_PROBE_BUF_SIZE 512 + +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_DISCARD, + BDRV_TRACKED_TRUNCATE, +}; + +typedef struct BdrvTrackedRequest { + BlockDriverState *bs; + int64_t offset; + uint64_t bytes; + enum BdrvTrackedRequestType type; + + bool serialising; + int64_t overlap_offset; + uint64_t overlap_bytes; + + QLIST_ENTRY(BdrvTrackedRequest) list; + Coroutine *co; /* owner, used for deadlock detection */ + CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; +} BdrvTrackedRequest; + +struct BlockDriver { + const char *format_name; + int instance_size; + + /* set to true if the BlockDriver is a block filter. Block filters pass + * certain callbacks that refer to data (see block.c) to their bs->file + * or bs->backing (whichever one exists) if the driver doesn't implement + * them. Drivers that do not wish to forward must implement them and return + * -ENOTSUP. + * Note that filters are not allowed to modify data. + * + * Filters generally cannot have more than a single filtered child, + * because the data they present must at all times be the same as + * that on their filtered child. That would be impossible to + * achieve for multiple filtered children. + * (And this filtered child must then be bs->file or bs->backing.) + */ + bool is_filter; + /* + * Set to true if the BlockDriver is a format driver. Format nodes + * generally do not expect their children to be other format nodes + * (except for backing files), and so format probing is disabled + * on those children. + */ + bool is_format; + /* + * Return true if @to_replace can be replaced by a BDS with the + * same data as @bs without it affecting @bs's behavior (that is, + * without it being visible to @bs's parents). + */ + bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, + BlockDriverState *to_replace); + + int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); + int (*bdrv_probe_device)(const char *filename); + + /* Any driver implementing this callback is expected to be able to handle + * NULL file names in its .bdrv_open() implementation */ + void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); + /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have + * this field set to true, except ones that are defined only by their + * child's bs. + * An example of the last type will be the quorum block driver. + */ + bool bdrv_needs_filename; + + /* + * Set if a driver can support backing files. This also implies the + * following semantics: + * + * - Return status 0 of .bdrv_co_block_status means that corresponding + * blocks are not allocated in this layer of backing-chain + * - For such (unallocated) blocks, read will: + * - fill buffer with zeros if there is no backing file + * - read from the backing file otherwise, where the block layer + * takes care of reading zeros beyond EOF if backing file is short + */ + bool supports_backing; + + /* For handling image reopen for split or non-split files */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + + int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + + /* Protocol drivers should implement this instead of bdrv_open */ + int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + void (*bdrv_close)(BlockDriverState *bs); + + + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); + int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); + + int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, + BlockdevAmendOptions *opts, + bool force, + Error **errp); + + int (*bdrv_amend_options)(BlockDriverState *bs, + QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, + bool force, + Error **errp); + + int (*bdrv_make_empty)(BlockDriverState *bs); + + /* + * Refreshes the bs->exact_filename field. If that is impossible, + * bs->exact_filename has to be left empty. + */ + void (*bdrv_refresh_filename)(BlockDriverState *bs); + + /* + * Gathers the open options for all children into @target. + * A simple format driver (without backing file support) might + * implement this function like this: + * + * QINCREF(bs->file->bs->full_open_options); + * qdict_put(target, "file", bs->file->bs->full_open_options); + * + * If not specified, the generic implementation will simply put + * all children's options under their respective name. + * + * @backing_overridden is true when bs->backing seems not to be + * the child that would result from opening bs->backing_file. + * Therefore, if it is true, the backing child's options should be + * gathered; otherwise, there is no need since the backing child + * is the one implied by the image header. + * + * Note that ideally this function would not be needed. Every + * block driver which implements it is probably doing something + * shady regarding its runtime option structure. + */ + void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, + bool backing_overridden); + + /* + * Returns an allocated string which is the directory name of this BDS: It + * will be used to make relative filenames absolute by prepending this + * function's return value to them. + */ + char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); + + /* aio */ + BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, + int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + + /** + * @offset: position in bytes to read at + * @bytes: number of bytes to read + * @qiov: the buffers to fill with read data + * @flags: currently unused, always 0 + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags); + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); + /** + * @offset: position in bytes to write at + * @bytes: number of bytes to write + * @qiov: the buffers containing data to write + * @flags: zero or more bits allowed by 'supported_write_flags' + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, int flags); + + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() + * will be called instead. + */ + int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, + int64_t offset, int bytes); + + /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, + * and invoke bdrv_co_copy_range_from(child, ...), or invoke + * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, + BdrvChild *src, + uint64_t offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, + * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy + * operation if @bs is the leaf and @src has the same BlockDriver. Return + * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* + * Building block for bdrv_block_status[_above] and + * bdrv_is_allocated[_above]. The driver should answer only + * according to the current layer, and should only need to set + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See + * block.h for the overall meaning of the bits. As a hint, the + * flag want_zero is true if the caller cares more about precise + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for + * overall allocation (favor larger *pnum, perhaps by reporting + * _DATA instead of _ZERO). The block layer guarantees input + * clamped to bdrv_getlength() and aligned to request_alignment, + * as well as non-NULL pnum, map, and file; in turn, the driver + * must return an error or set pnum to an aligned non-zero value. + */ + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); + + /* + * Invalidate any cached meta-data. + */ + void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, + Error **errp); + int (*bdrv_inactivate)(BlockDriverState *bs); + + /* + * Flushes all data for all layers by calling bdrv_co_flush for underlying + * layers, if needed. This function is needed for deterministic + * synchronization of the flush finishing callback. + */ + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); + + /* Delete a created file. */ + int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, + Error **errp); + + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example file-posix.c calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + + /* + * Drivers setting this field must be able to work with just a plain + * filename with ':' as a prefix, and no other options. + * Options may be extracted from the filename by implementing + * bdrv_parse_filename. + */ + const char *protocol_name; + + /* + * Truncate @bs to @offset bytes using the given @prealloc mode + * when growing. Modes other than PREALLOC_MODE_OFF should be + * rejected when shrinking @bs. + * + * If @exact is true, @bs must be resized to exactly @offset. + * Otherwise, it is sufficient for @bs (if it is a host block + * device and thus there is no way to resize it) to be at least + * @offset bytes in length. + * + * If @exact is true and this function fails but would succeed + * with @exact = false, it should return -ENOTSUP. + */ + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); + + int64_t (*bdrv_getlength)(BlockDriverState *bs); + bool has_variable_length; + int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp); + + int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset); + + int (*bdrv_snapshot_create)(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); + int (*bdrv_snapshot_goto)(BlockDriverState *bs, + const char *snapshot_id); + int (*bdrv_snapshot_delete)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, + Error **errp); + BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); + + int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + + int (*bdrv_change_backing_file)(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); + + /* removable device specific */ + bool (*bdrv_is_inserted)(BlockDriverState *bs); + void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); + void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); + + /* to control generic scsi devices */ + BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf); + + /* List of options for creating images, terminated by name == NULL */ + QemuOptsList *create_opts; + + /* List of options for image amend */ + QemuOptsList *amend_opts; + + /* + * If this driver supports reopening images this contains a + * NULL-terminated list of the runtime options that can be + * modified. If an option in this list is unspecified during + * reopen then it _must_ be reset to its default value or return + * an error. + */ + const char *const *mutable_opts; + + /* + * Returns 0 for completed check, -errno for internal errors. + * The check results are stored in result. + */ + int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); + + void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); + + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + + void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); + + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); + + /* Remove fd handlers, timers, and other event loop callbacks so the event + * loop is no longer in use. Called with no in-flight requests and in + * depth-first traversal order with parents before child nodes. + */ + void (*bdrv_detach_aio_context)(BlockDriverState *bs); + + /* Add fd handlers, timers, and other event loop callbacks so I/O requests + * can be processed again. Called with no in-flight requests and in + * depth-first traversal order with child nodes before parent nodes. + */ + void (*bdrv_attach_aio_context)(BlockDriverState *bs, + AioContext *new_context); + + /* io queue for linux-aio */ + void (*bdrv_io_plug)(BlockDriverState *bs); + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ + int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); + /** + * Try to get @bs's geometry (cyls, heads, sectors) + * On success, store them in @geo and return 0. + * On failure return -errno. + * Only drivers that want to override guest geometry implement this + * callback; see hd_geometry_guess(). + */ + int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + + /** + * bdrv_co_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. + * bdrv_co_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. + */ + void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); + void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + + void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, + Error **errp); + void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, + Error **errp); + + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @child_class and @role. + * + * If @reopen_queue is non-NULL, don't return the currently needed + * permissions, but those that will be needed after applying the + * @reopen_queue. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); + int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, + const char *name, + Error **errp); + + /** + * Register/unregister a buffer for I/O. For example, when the driver is + * interested to know the memory areas that will later be used in iovs, so + * that it can do IOMMU mapping with VFIO etc., in order to get better + * performance. In the case of VFIO drivers, this callback is used to do + * DMA mapping for hot buffers. + */ + void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); + void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); + QLIST_ENTRY(BlockDriver) list; + + /* Pointer to a NULL-terminated array of names of strong options + * that can be specified for bdrv_open(). A strong option is one + * that changes the data of a BDS. + * If this pointer is NULL, the array is considered empty. + * "filename" and "driver" are always considered strong. */ + const char *const *strong_runtime_opts; +}; + +static inline bool block_driver_can_compress(BlockDriver *drv) +{ + return drv->bdrv_co_pwritev_compressed || + drv->bdrv_co_pwritev_compressed_part; +} + +typedef struct BlockLimits { + /* Alignment requirement, in bytes, for offset/length of I/O + * requests. Must be a power of 2 less than INT_MAX; defaults to + * 1 for drivers with modern byte interfaces, and to 512 + * otherwise. */ + uint32_t request_alignment; + + /* Maximum number of bytes that can be discarded at once (since it + * is signed, it must be < 2G, if set). Must be multiple of + * pdiscard_alignment, but need not be power of 2. May be 0 if no + * inherent 32-bit limit */ + int32_t max_pdiscard; + + /* Optimal alignment for discard requests in bytes. A power of 2 + * is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pdiscard if + * that is set. May be 0 if bl.request_alignment is good enough */ + uint32_t pdiscard_alignment; + + /* Maximum number of bytes that can zeroized at once (since it is + * signed, it must be < 2G, if set). Must be multiple of + * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ + int32_t max_pwrite_zeroes; + + /* Optimal alignment for write zeroes requests in bytes. A power + * of 2 is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pwrite_zeroes + * if that is set. May be 0 if bl.request_alignment is good + * enough */ + uint32_t pwrite_zeroes_alignment; + + /* Optimal transfer length in bytes. A power of 2 is best but not + * mandatory. Must be a multiple of bl.request_alignment, or 0 if + * no preferred size */ + uint32_t opt_transfer; + + /* Maximal transfer length in bytes. Need not be power of 2, but + * must be multiple of opt_transfer and bl.request_alignment, or 0 + * for no 32-bit limit. For now, anything larger than INT_MAX is + * clamped down. */ + uint32_t max_transfer; + + /* memory alignment, in bytes so that no bounce buffer is needed */ + size_t min_mem_alignment; + + /* memory alignment, in bytes, for bounce buffer */ + size_t opt_mem_alignment; + + /* maximum number of iovec elements */ + int max_iov; +} BlockLimits; + +typedef struct BdrvOpBlocker BdrvOpBlocker; + +typedef struct BdrvAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + + void *opaque; + bool deleted; + + QLIST_ENTRY(BdrvAioNotifier) list; +} BdrvAioNotifier; + +struct BdrvChildClass { + /* If true, bdrv_replace_node() doesn't change the node this BdrvChild + * points to. */ + bool stay_at_node; + + /* If true, the parent is a BlockDriverState and bdrv_next_all_states() + * will return it. This information is used for drain_all, where every node + * will be drained separately, so the drain only needs to be propagated to + * non-BDS parents. */ + bool parent_is_bds; + + void (*inherit_options)(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options); + + void (*change_media)(BdrvChild *child, bool load); + void (*resize)(BdrvChild *child); + + /* Returns a name that is supposedly more useful for human users than the + * node name for identifying the node in question (in particular, a BB + * name), or NULL if the parent can't provide a better name. */ + const char *(*get_name)(BdrvChild *child); + + /* Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. */ + char *(*get_parent_desc)(BdrvChild *child); + + /* + * If this pair of functions is implemented, the parent doesn't issue new + * requests after returning from .drained_begin() until .drained_end() is + * called. + * + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * + * If drained_end() schedules background operations, it must atomically + * increment *drained_end_counter for each such operation and atomically + * decrement it once the operation has settled. + * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); + void (*drained_end)(BdrvChild *child, int *drained_end_counter); + + /* + * Returns whether the parent has pending requests for the child. This + * callback is polled after .drained_begin() has been called until all + * activity on the child has stopped. + */ + bool (*drained_poll)(BdrvChild *child); + + /* Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. */ + void (*activate)(BdrvChild *child, Error **errp); + int (*inactivate)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); + + /* Notifies the parent that the filename of its child has changed (e.g. + * because the direct child was removed from the backing chain), so that it + * can update its reference. */ + int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, + const char *filename, Error **errp); + + bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); + void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); +}; + +extern const BdrvChildClass child_of_bds; + +struct BdrvChild { + BlockDriverState *bs; + char *name; + const BdrvChildClass *klass; + BdrvChildRole role; + void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + + /* backup of permissions during permission update procedure */ + bool has_backup_perm; + uint64_t backup_perm; + uint64_t backup_shared_perm; + + /* + * This link is frozen: the child can neither be replaced nor + * detached from the parent. + */ + bool frozen; + + /* + * How many times the parent of this child has been drained + * (through klass->drained_*). + * Usually, this is equal to bs->quiesce_counter (potentially + * reduced by bdrv_drain_all_count). It may differ while the + * child is entering or leaving a drained section. + */ + int parent_quiesce_counter; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +}; + +/* + * Note: the function bdrv_append() copies and swaps contents of + * BlockDriverStates, so if you add new fields to this struct, please + * inspect bdrv_append() to determine if the new fields need to be + * copied as well. + */ +struct BlockDriverState { + /* Protected by big QEMU lock or read-only after opening. No special + * locking needed during I/O... + */ + int open_flags; /* flags used to open the file, re-used for re-open */ + bool read_only; /* if true, the media is read only */ + bool encrypted; /* if true, the media is encrypted */ + bool sg; /* if true, the device is a /dev/sg* */ + bool probed; /* if true, format was probed rather than specified */ + bool force_share; /* if true, always allow all shared permissions */ + bool implicit; /* if true, this filter node was automatically inserted */ + + BlockDriver *drv; /* NULL means no media */ + void *opaque; + + AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + /* long-running tasks intended to always use the same AioContext as this + * BDS may register themselves in this list to be notified of changes + * regarding this BDS's context */ + QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; + bool walking_aio_notifiers; /* to make removal during iteration safe */ + + char filename[PATH_MAX]; + /* + * If not empty, this image is a diff in relation to backing_file. + * Note that this is the name given in the image header and + * therefore may or may not be equal to .backing->bs->filename. + * If this field contains a relative path, it is to be resolved + * relatively to the overlay's location. + */ + char backing_file[PATH_MAX]; + /* + * The backing filename indicated by the image header. Contrary + * to backing_file, if we ever open this file, auto_backing_file + * is replaced by the resulting BDS's filename (i.e. after a + * bdrv_refresh_filename() run). + */ + char auto_backing_file[PATH_MAX]; + char backing_format[16]; /* if non-zero and backing_file exists */ + + QDict *full_open_options; + char exact_filename[PATH_MAX]; + + BdrvChild *backing; + BdrvChild *file; + + /* I/O Limits */ + BlockLimits bl; + + /* Flags honored during pwrite (so far: BDRV_REQ_FUA, + * BDRV_REQ_WRITE_UNCHANGED). + * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those + * writes will be issued as normal writes without the flag set. + * This is important to note for drivers that do not explicitly + * request a WRITE permission for their children and instead take + * the same permissions as their parent did (this is commonly what + * block filters do). Such drivers have to be aware that the + * parent may have taken a WRITE_UNCHANGED permission only and is + * issuing such requests. Drivers either must make sure that + * these requests do not result in plain WRITE accesses (usually + * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding + * every incoming write request as-is, including potentially that + * flag), or they have to explicitly take the WRITE permission for + * their children. */ + unsigned int supported_write_flags; + /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, + * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ + unsigned int supported_zero_flags; + /* + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). + * + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure + * that any added space reads as all zeros. If this can't be guaranteed, + * the operation must fail. + */ + unsigned int supported_truncate_flags; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* element of the list of all BlockDriverStates (all_bdrv_states) */ + QTAILQ_ENTRY(BlockDriverState) bs_list; + /* element of the list of monitor-owned BDS */ + QTAILQ_ENTRY(BlockDriverState) monitor_list; + int refcnt; + + /* operation blockers */ + QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; + + /* The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. */ + BlockDriverState *inherits_from; + QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild) parents; + + QDict *options; + QDict *explicit_options; + BlockdevDetectZeroesOptions detect_zeroes; + + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + + /* Protected by AioContext lock */ + + /* If we are reading a disk image, give its size in sectors. + * Generally read-only; it is written to by load_snapshot and + * save_snaphost, but the block layer is quiescent during those. + */ + int64_t total_sectors; + + /* Callback before write request is processed */ + NotifierWithReturnList before_write_notifiers; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; + + /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. + * Reading from the list can be done with either the BQL or the + * dirty_bitmap_mutex. Modifying a bitmap only requires + * dirty_bitmap_mutex. */ + QemuMutex dirty_bitmap_mutex; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + + /* Offset after the highest byte written to */ + Stat64 wr_highest_offset; + + /* If true, copy read backing sectors into image. Can be >1 if more + * than one client has requested copy-on-read. Accessed with atomic + * ops. + */ + int copy_on_read; + + /* number of in-flight requests; overall and serialising. + * Accessed with atomic ops. + */ + unsigned int in_flight; + unsigned int serialising_in_flight; + + /* counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ + unsigned io_plugged; + + /* do we need to tell the quest if we have a volatile write cache? */ + int enable_write_cache; + + /* Accessed with atomic ops. */ + int quiesce_counter; + int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + + /* Protected by reqs_lock. */ + CoMutex reqs_lock; + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + CoQueue flush_queue; /* Serializing flush queue */ + bool active_flush_req; /* Flush request in flight? */ + + /* Only read/written by whoever has set active_flush_req to true. */ + unsigned int flushed_gen; /* Flushed write generation */ + + /* BdrvChild links to this node may never be frozen */ + bool never_freeze; +}; + +struct BlockBackendRootState { + int open_flags; + bool read_only; + BlockdevDetectZeroesOptions detect_zeroes; +}; + +typedef enum BlockMirrorBackingMode { + /* Reuse the existing backing chain from the source for the target. + * - sync=full: Set backing BDS to NULL. + * - sync=top: Use source's backing BDS. + * - sync=none: Use source as the backing BDS. */ + MIRROR_SOURCE_BACKING_CHAIN, + + /* Open the target's backing chain completely anew */ + MIRROR_OPEN_BACKING_CHAIN, + + /* Do not change the target's backing BDS after job completion */ + MIRROR_LEAVE_BACKING_CHAIN, +} BlockMirrorBackingMode; + + +/* Essential block drivers which must always be statically linked into qemu, and + * which therefore can be accessed without using bdrv_find_format() */ +extern BlockDriver bdrv_file; +extern BlockDriver bdrv_raw; +extern BlockDriver bdrv_qcow2; + +int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); + +static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + return bdrv_co_preadv(child, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); +} + +extern unsigned int bdrv_drain_all_count; +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + +bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + +int get_tmp_filename(char *filename, int size); +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); + +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options); + +bool bdrv_backing_overridden(BlockDriverState *bs); + + +/** + * bdrv_add_before_write_notifier: + * + * Register a callback that is invoked before write requests are processed but + * after any throttling or waiting for overlapping requests. + */ +void bdrv_add_before_write_notifier(BlockDriverState *bs, + NotifierWithReturn *notifier); + +/** + * bdrv_add_aio_context_notifier: + * + * If a long-running job intends to be always run in the same AioContext as a + * certain BDS, it may use this function to be notified of changes regarding the + * association of the BDS to an AioContext. + * + * attached_aio_context() is called after the target BDS has been attached to a + * new AioContext; detach_aio_context() is called before the target BDS is being + * detached from its old AioContext. + */ +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); + +/** + * bdrv_remove_aio_context_notifier: + * + * Unsubscribe of change notifications regarding the BDS's AioContext. The + * parameters given here have to be the same as those given to + * bdrv_add_aio_context_notifier(). + */ +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*aio_context_attached)(AioContext *, + void *), + void (*aio_context_detached)(void *), + void *opaque); + +/** + * bdrv_wakeup: + * @bs: The BlockDriverState for which an I/O operation has been completed. + * + * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During + * synchronous I/O on a BlockDriverState that is attached to another + * I/O thread, the main thread lets the I/O thread's event loop run, + * waiting for the I/O operation to complete. A bdrv_wakeup will wake + * up the main thread if necessary. + * + * Manual calls to bdrv_wakeup are rarely necessary, because + * bdrv_dec_in_flight already calls it. + */ +void bdrv_wakeup(BlockDriverState *bs); + +#ifdef _WIN32 +int is_windows_drive(const char *filename); +#endif + +/** + * stream_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @backing_file_str: The file name that will be written to @bs as the + * the new backing file if the job completes. Ignored if @base is %NULL. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @errp: Error object. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @backing_file_str in the written image and to @base in the live + * BlockDriverState. + */ +void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, + int creation_flags, int64_t speed, + BlockdevOnError on_error, Error **errp); + +/** + * commit_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device. + * @top: Top block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. + * @errp: Error object. + * + */ +void commit_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, BlockDriverState *top, + int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp); +/** + * commit_active_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @auto_complete: Auto complete the job. + * @errp: Error object. + * + */ +BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, int creation_flags, + int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp); +/* + * mirror_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. + * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. + * @zero_target: Whether the target should be explicitly zero-initialized + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @copy_mode: When to trigger writes to the target. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @target until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp); + +/* + * backup_job_create: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. + * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' + * @bitmap_mode: The bitmap synchronization policy to use. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). + * + * Create a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, + BitmapSyncMode bitmap_mode, + bool compress, + const char *filter_node_name, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + int creation_flags, + BlockCompletionFunc *cb, void *opaque, + JobTxn *txn, Error **errp); + +BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + AioContext *ctx, + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); +void bdrv_root_unref_child(BdrvChild *child); + +void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); + +/** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's + * .bdrv_child_perm() implementation return the correct values. + */ +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/** + * Calls bs->drv->bdrv_child_perm() and updates the child's permission + * masks with the result. + * Drivers should invoke this function whenever an event occurs that + * makes their .bdrv_child_perm() implementation return different + * values than before, but which will not result in the block layer + * automatically refreshing the permissions. + */ +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); + +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace); + +/* + * Default implementation for BlockDriver.bdrv_child_perm() that can + * be used by block filters and image formats, as long as they use the + * child_of_bds child class and set an appropriate BdrvChildRole. + */ +void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +const char *bdrv_get_parent_name(const BlockDriverState *bs); +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); +bool blk_dev_has_removable_media(BlockBackend *blk); +bool blk_dev_has_tray(BlockBackend *blk); +void blk_dev_eject_request(BlockBackend *blk, bool force); +bool blk_dev_is_tray_open(BlockBackend *blk); +bool blk_dev_is_medium_locked(BlockBackend *blk); + +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); +bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, + const BdrvDirtyBitmap *src, + HBitmap **backup, bool lock); + +void bdrv_inc_in_flight(BlockDriverState *bs); +void bdrv_dec_in_flight(BlockDriverState *bs); + +void blockdev_close_all_bdrv_states(void); + +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + +void bdrv_set_monitor_owned(BlockDriverState *bs); +BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp); + +/** + * Simple implementation of bdrv_co_create_opts for protocol drivers + * which only support creation via opening a file + * (usually existing raw storage device) + */ +int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); +extern QemuOptsList bdrv_create_opts_simple; + +BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, + const char *name, + BlockDriverState **pbs, + Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bms, + HBitmap **backup, Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + bool release, + BlockDriverState **bitmap_bs, + Error **errp); + +BdrvChild *bdrv_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_primary_child(BlockDriverState *bs); +BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs); +BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); +BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); + +static inline BlockDriverState *child_bs(BdrvChild *child) +{ + return child ? child->bs : NULL; +} + +static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs) +{ + return child_bs(bdrv_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs) +{ + return child_bs(bdrv_filter_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) +{ + return child_bs(bdrv_filter_or_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) +{ + return child_bs(bdrv_primary_child(bs)); +} + +/** + * End all quiescent sections started by bdrv_drain_all_begin(). This is + * needed when deleting a BDS before bdrv_drain_all_end() is called. + * + * NOTE: this is an internal helper for bdrv_close() *only*. No one else + * should call it. + */ +void bdrv_drain_all_end_quiesce(BlockDriverState *bs); + +#endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h new file mode 100644 index 000000000..35faa3aa2 --- /dev/null +++ b/include/block/blockjob.h @@ -0,0 +1,176 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCKJOB_H +#define BLOCKJOB_H + +#include "qemu/job.h" +#include "block/block.h" +#include "qemu/ratelimit.h" + +#define BLOCK_JOB_SLICE_TIME 100000000ULL /* ns */ + +typedef struct BlockJobDriver BlockJobDriver; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +typedef struct BlockJob { + /** Data belonging to the generic Job infrastructure */ + Job job; + + /** The block device on which the job is operating. */ + BlockBackend *blk; + + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** Rate limiting data structure for implementing @speed. */ + RateLimit limit; + + /** Block other operations when block job is running */ + Error *blocker; + + /** Called when a cancelled job is finalised. */ + Notifier finalize_cancelled_notifier; + + /** Called when a successfully completed job is finalised. */ + Notifier finalize_completed_notifier; + + /** Called when the job transitions to PENDING */ + Notifier pending_notifier; + + /** Called when the job transitions to READY */ + Notifier ready_notifier; + + /** Called when the job coroutine yields or terminates */ + Notifier idle_notifier; + + /** BlockDriverStates that are involved in this block job */ + GSList *nodes; +} BlockJob; + +/** + * block_job_next: + * @job: A block job, or %NULL. + * + * Get the next element from the list of block jobs after @job, or the + * first one if @job is %NULL. + * + * Returns the requested job, or %NULL if there are no more jobs left. + */ +BlockJob *block_job_next(BlockJob *job); + +/** + * block_job_get: + * @id: The id of the block job. + * + * Get the block job identified by @id (which must not be %NULL). + * + * Returns the requested job, or %NULL if it doesn't exist. + */ +BlockJob *block_job_get(const char *id); + +/** + * block_job_add_bdrv: + * @job: A block job + * @name: The name to assign to the new BdrvChild + * @bs: A BlockDriverState that is involved in @job + * @perm, @shared_perm: Permissions to request on the node + * + * Add @bs to the list of BlockDriverState that are involved in + * @job. This means that all operations will be blocked on @bs while + * @job exists. + */ +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp); + +/** + * block_job_remove_all_bdrv: + * @job: The block job + * + * Remove all BlockDriverStates from the list of nodes that are involved in the + * job. This removes the blockers added with block_job_add_bdrv(). + */ +void block_job_remove_all_bdrv(BlockJob *job); + +/** + * block_job_has_bdrv: + * @job: The block job + * + * Searches for @bs in the list of nodes that are involved in the + * job. + */ +bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job, Error **errp); + +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job and on BlockDriverState objects it uses, + * other than job->blk. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_is_internal: + * @job: The job to determine if it is user-visible or not. + * + * Returns true if the job should not be visible to the management layer. + */ +bool block_job_is_internal(BlockJob *job); + +/** + * block_job_driver: + * + * Returns the driver associated with a block job. + */ +const BlockJobDriver *block_job_driver(BlockJob *job); + +#endif diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h new file mode 100644 index 000000000..e2824a36a --- /dev/null +++ b/include/block/blockjob_int.h @@ -0,0 +1,120 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCKJOB_INT_H +#define BLOCKJOB_INT_H + +#include "block/blockjob.h" +#include "block/block.h" + +/** + * BlockJobDriver: + * + * A class type for block job driver. + */ +struct BlockJobDriver { + /** Generic JobDriver callbacks and settings */ + JobDriver job_driver; + + /* + * Returns whether the job has pending requests for the child or will + * submit new requests before the next pause point. This callback is polled + * in the context of draining a job node after requesting that the job be + * paused, until all activity on the child has stopped. + */ + bool (*drained_poll)(BlockJob *job); + + /* + * If the callback is not NULL, it will be invoked before the job is + * resumed in a new AioContext. This is the place to move any resources + * besides job->blk to the new AioContext. + */ + void (*attached_aio_context)(BlockJob *job, AioContext *new_context); +}; + +/** + * block_job_create: + * @job_id: The id of the newly-created job, or %NULL to have one + * generated automatically. + * @driver: The class object for the newly-created job. + * @txn: The transaction this job belongs to, if any. %NULL otherwise. + * @bs: The block + * @perm, @shared_perm: Permissions to request for @bs + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @flags: Creation flags for the Block Job. See @JobCreateFlags. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const char *job_id, const BlockJobDriver *driver, + JobTxn *txn, BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, + BlockCompletionFunc *cb, void *opaque, Error **errp); + +/** + * block_job_free: + * Callback to be used for JobDriver.free in all block jobs. Frees block job + * specific resources in @job. + */ +void block_job_free(Job *job); + +/** + * block_job_user_resume: + * Callback to be used for JobDriver.user_resume in all block jobs. Resets the + * iostatus when the user resumes @job. + */ +void block_job_user_resume(Job *job); + +/** + * block_job_ratelimit_get_delay: + * + * Calculate and return delay for the next request in ns. See the documentation + * of ratelimit_calculate_delay() for details. + */ +int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, + int is_read, int error); + +#endif diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h new file mode 100644 index 000000000..36e8da4fc --- /dev/null +++ b/include/block/dirty-bitmap.h @@ -0,0 +1,120 @@ +#ifndef BLOCK_DIRTY_BITMAP_H +#define BLOCK_DIRTY_BITMAP_H + +#include "qapi/qapi-types-block-core.h" +#include "qemu/hbitmap.h" + +typedef enum BitmapCheckFlags { + BDRV_BITMAP_BUSY = 1, + BDRV_BITMAP_RO = 2, + BDRV_BITMAP_INCONSISTENT = 4, +} BitmapCheckFlags; + +#define BDRV_BITMAP_DEFAULT (BDRV_BITMAP_BUSY | BDRV_BITMAP_RO | \ + BDRV_BITMAP_INCONSISTENT) +#define BDRV_BITMAP_ALLOW_RO (BDRV_BITMAP_BUSY | BDRV_BITMAP_INCONSISTENT) + +#define BDRV_BITMAP_MAX_NAME_SIZE 1023 + +bool bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs); +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp); +int bdrv_dirty_bitmap_create_successor(BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BdrvDirtyBitmap *bitmap, + Error **errp); +void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, + const char *name); +int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, + Error **errp); +void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); +int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, + Error **errp); +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); +uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_has_successor(BdrvDirtyBitmap *bitmap); +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes); +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes); +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); + +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes); +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t offset, + uint64_t bytes); +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish); +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes, + bool finish); +void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, + uint64_t offset, uint64_t bytes, + bool finish); +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); + +void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value); +void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap, + bool persistent); +void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy); +void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, + HBitmap **backup, Error **errp); +void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip); +bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset); + +/* Functions that require manual locking. */ +void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_get_locked(BdrvDirtyBitmap *bitmap, int64_t offset); +void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes); +void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t offset, int64_t bytes); +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes); +bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap); +bool bdrv_has_readonly_bitmaps(BlockDriverState *bs); +bool bdrv_has_named_bitmaps(BlockDriverState *bs); +bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap); + +BdrvDirtyBitmap *bdrv_dirty_bitmap_first(BlockDriverState *bs); +BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BdrvDirtyBitmap *bitmap); +#define FOR_EACH_DIRTY_BITMAP(bs, bitmap) \ +for (bitmap = bdrv_dirty_bitmap_first(bs); bitmap; \ + bitmap = bdrv_dirty_bitmap_next(bitmap)) + +char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp); +int64_t bdrv_dirty_bitmap_next_dirty(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes); +int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes); +bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap, + int64_t start, int64_t end, int64_t max_dirty_count, + int64_t *dirty_start, int64_t *dirty_count); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + Error **errp); + +#endif diff --git a/include/block/export.h b/include/block/export.h new file mode 100644 index 000000000..7feb02e10 --- /dev/null +++ b/include/block/export.h @@ -0,0 +1,89 @@ +/* + * Declarations for block exports + * + * Copyright (c) 2012, 2020 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini + * Kevin Wolf + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef BLOCK_EXPORT_H +#define BLOCK_EXPORT_H + +#include "qapi/qapi-types-block-export.h" +#include "qemu/queue.h" + +typedef struct BlockExport BlockExport; + +typedef struct BlockExportDriver { + /* The export type that this driver services */ + BlockExportType type; + + /* + * The size of the driver-specific state that contains BlockExport as its + * first field. + */ + size_t instance_size; + + /* Creates and starts a new block export */ + int (*create)(BlockExport *, BlockExportOptions *, Error **); + + /* + * Frees a removed block export. This function is only called after all + * references have been dropped. + */ + void (*delete)(BlockExport *); + + /* + * Start to disconnect all clients and drop other references held + * internally by the export driver. When the function returns, there may + * still be active references while the export is in the process of + * shutting down. + */ + void (*request_shutdown)(BlockExport *); +} BlockExportDriver; + +struct BlockExport { + const BlockExportDriver *drv; + + /* Unique identifier for the export */ + char *id; + + /* + * Reference count for this block export. This includes strong references + * both from the owner (qemu-nbd or the monitor) and clients connected to + * the export. + */ + int refcount; + + /* + * True if one of the references in refcount belongs to the user. After the + * user has dropped their reference, they may not e.g. remove the same + * export a second time (which would decrease the refcount without having + * it incremented first). + */ + bool user_owned; + + /* The AioContext whose lock protects this BlockExport object. */ + AioContext *ctx; + + /* The block device to export */ + BlockBackend *blk; + + /* List entry for block_exports */ + QLIST_ENTRY(BlockExport) next; +}; + +BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp); +BlockExport *blk_exp_find(const char *id); +void blk_exp_ref(BlockExport *exp); +void blk_exp_unref(BlockExport *exp); +void blk_exp_request_shutdown(BlockExport *exp); +void blk_exp_close_all(void); +void blk_exp_close_all_type(BlockExportType type); + +#endif diff --git a/include/block/nbd.h b/include/block/nbd.h new file mode 100644 index 000000000..4a52a43ef --- /dev/null +++ b/include/block/nbd.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2016-2020 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef NBD_H +#define NBD_H + +#include "block/export.h" +#include "io/channel-socket.h" +#include "crypto/tlscreds.h" +#include "qapi/error.h" + +extern const BlockExportDriver blk_exp_nbd; + +/* Handshake phase structs - this struct is passed on the wire */ + +struct NBDOption { + uint64_t magic; /* NBD_OPTS_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct NBDOption NBDOption; + +struct NBDOptionReply { + uint64_t magic; /* NBD_REP_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t type; /* NBD_REP_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct NBDOptionReply NBDOptionReply; + +typedef struct NBDOptionReplyMetaContext { + NBDOptionReply h; /* h.type = NBD_REP_META_CONTEXT, h.length > 4 */ + uint32_t context_id; + /* metadata context name follows */ +} QEMU_PACKED NBDOptionReplyMetaContext; + +/* Transmission phase structs + * + * Note: these are _NOT_ the same as the network representation of an NBD + * request and reply! + */ +struct NBDRequest { + uint64_t handle; + uint64_t from; + uint32_t len; + uint16_t flags; /* NBD_CMD_FLAG_* */ + uint16_t type; /* NBD_CMD_* */ +}; +typedef struct NBDRequest NBDRequest; + +typedef struct NBDSimpleReply { + uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC */ + uint32_t error; + uint64_t handle; +} QEMU_PACKED NBDSimpleReply; + +/* Header of all structured replies */ +typedef struct NBDStructuredReplyChunk { + uint32_t magic; /* NBD_STRUCTURED_REPLY_MAGIC */ + uint16_t flags; /* combination of NBD_REPLY_FLAG_* */ + uint16_t type; /* NBD_REPLY_TYPE_* */ + uint64_t handle; /* request handle */ + uint32_t length; /* length of payload */ +} QEMU_PACKED NBDStructuredReplyChunk; + +typedef union NBDReply { + NBDSimpleReply simple; + NBDStructuredReplyChunk structured; + struct { + /* @magic and @handle fields have the same offset and size both in + * simple reply and structured reply chunk, so let them be accessible + * without ".simple." or ".structured." specification + */ + uint32_t magic; + uint32_t _skip; + uint64_t handle; + } QEMU_PACKED; +} NBDReply; + +/* Header of chunk for NBD_REPLY_TYPE_OFFSET_DATA */ +typedef struct NBDStructuredReadData { + NBDStructuredReplyChunk h; /* h.length >= 9 */ + uint64_t offset; + /* At least one byte of data payload follows, calculated from h.length */ +} QEMU_PACKED NBDStructuredReadData; + +/* Complete chunk for NBD_REPLY_TYPE_OFFSET_HOLE */ +typedef struct NBDStructuredReadHole { + NBDStructuredReplyChunk h; /* h.length == 12 */ + uint64_t offset; + uint32_t length; +} QEMU_PACKED NBDStructuredReadHole; + +/* Header of all NBD_REPLY_TYPE_ERROR* errors */ +typedef struct NBDStructuredError { + NBDStructuredReplyChunk h; /* h.length >= 6 */ + uint32_t error; + uint16_t message_length; +} QEMU_PACKED NBDStructuredError; + +/* Header of NBD_REPLY_TYPE_BLOCK_STATUS */ +typedef struct NBDStructuredMeta { + NBDStructuredReplyChunk h; /* h.length >= 12 (at least one extent) */ + uint32_t context_id; + /* extents follows */ +} QEMU_PACKED NBDStructuredMeta; + +/* Extent chunk for NBD_REPLY_TYPE_BLOCK_STATUS */ +typedef struct NBDExtent { + uint32_t length; + uint32_t flags; /* NBD_STATE_* */ +} QEMU_PACKED NBDExtent; + +/* Transmission (export) flags: sent from server to client during handshake, + but describe what will happen during transmission */ +enum { + NBD_FLAG_HAS_FLAGS_BIT = 0, /* Flags are there */ + NBD_FLAG_READ_ONLY_BIT = 1, /* Device is read-only */ + NBD_FLAG_SEND_FLUSH_BIT = 2, /* Send FLUSH */ + NBD_FLAG_SEND_FUA_BIT = 3, /* Send FUA (Force Unit Access) */ + NBD_FLAG_ROTATIONAL_BIT = 4, /* Use elevator algorithm - + rotational media */ + NBD_FLAG_SEND_TRIM_BIT = 5, /* Send TRIM (discard) */ + NBD_FLAG_SEND_WRITE_ZEROES_BIT = 6, /* Send WRITE_ZEROES */ + NBD_FLAG_SEND_DF_BIT = 7, /* Send DF (Do not Fragment) */ + NBD_FLAG_CAN_MULTI_CONN_BIT = 8, /* Multi-client cache consistent */ + NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ + NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ + NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ +}; + +#define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) +#define NBD_FLAG_READ_ONLY (1 << NBD_FLAG_READ_ONLY_BIT) +#define NBD_FLAG_SEND_FLUSH (1 << NBD_FLAG_SEND_FLUSH_BIT) +#define NBD_FLAG_SEND_FUA (1 << NBD_FLAG_SEND_FUA_BIT) +#define NBD_FLAG_ROTATIONAL (1 << NBD_FLAG_ROTATIONAL_BIT) +#define NBD_FLAG_SEND_TRIM (1 << NBD_FLAG_SEND_TRIM_BIT) +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT) +#define NBD_FLAG_SEND_DF (1 << NBD_FLAG_SEND_DF_BIT) +#define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) +#define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) +#define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) + +/* New-style handshake (global) flags, sent from server to client, and + control what will happen during handshake phase. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ + +/* New-style client flags, sent from client to server to control what happens + during handshake phase. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ + +/* Option requests. */ +#define NBD_OPT_EXPORT_NAME (1) +#define NBD_OPT_ABORT (2) +#define NBD_OPT_LIST (3) +/* #define NBD_OPT_PEEK_EXPORT (4) not in use */ +#define NBD_OPT_STARTTLS (5) +#define NBD_OPT_INFO (6) +#define NBD_OPT_GO (7) +#define NBD_OPT_STRUCTURED_REPLY (8) +#define NBD_OPT_LIST_META_CONTEXT (9) +#define NBD_OPT_SET_META_CONTEXT (10) + +/* Option reply types. */ +#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value)) + +#define NBD_REP_ACK (1) /* Data sending finished. */ +#define NBD_REP_SERVER (2) /* Export description. */ +#define NBD_REP_INFO (3) /* NBD_OPT_INFO/GO. */ +#define NBD_REP_META_CONTEXT (4) /* NBD_OPT_{LIST,SET}_META_CONTEXT */ + +#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */ +#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */ +#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */ +#define NBD_REP_ERR_PLATFORM NBD_REP_ERR(4) /* Not compiled in */ +#define NBD_REP_ERR_TLS_REQD NBD_REP_ERR(5) /* TLS required */ +#define NBD_REP_ERR_UNKNOWN NBD_REP_ERR(6) /* Export unknown */ +#define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */ +#define NBD_REP_ERR_BLOCK_SIZE_REQD NBD_REP_ERR(8) /* Need INFO_BLOCK_SIZE */ + +/* Info types, used during NBD_REP_INFO */ +#define NBD_INFO_EXPORT 0 +#define NBD_INFO_NAME 1 +#define NBD_INFO_DESCRIPTION 2 +#define NBD_INFO_BLOCK_SIZE 3 + +/* Request flags, sent from client to server during transmission phase */ +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */ +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ +#define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */ +#define NBD_CMD_FLAG_REQ_ONE (1 << 3) /* only one extent in BLOCK_STATUS + * reply chunk */ +#define NBD_CMD_FLAG_FAST_ZERO (1 << 4) /* fail if WRITE_ZEROES is not fast */ + +/* Supported request types */ +enum { + NBD_CMD_READ = 0, + NBD_CMD_WRITE = 1, + NBD_CMD_DISC = 2, + NBD_CMD_FLUSH = 3, + NBD_CMD_TRIM = 4, + NBD_CMD_CACHE = 5, + NBD_CMD_WRITE_ZEROES = 6, + NBD_CMD_BLOCK_STATUS = 7, +}; + +#define NBD_DEFAULT_PORT 10809 + +/* Maximum size of a single READ/WRITE data buffer */ +#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024) + +/* + * Maximum size of a protocol string (export name, metadata context name, + * etc.). Use malloc rather than stack allocation for storage of a + * string. + */ +#define NBD_MAX_STRING_SIZE 4096 + +/* Two types of reply structures */ +#define NBD_SIMPLE_REPLY_MAGIC 0x67446698 +#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef + +/* Structured reply flags */ +#define NBD_REPLY_FLAG_DONE (1 << 0) /* This reply-chunk is last */ + +/* Structured reply types */ +#define NBD_REPLY_ERR(value) ((1 << 15) | (value)) + +#define NBD_REPLY_TYPE_NONE 0 +#define NBD_REPLY_TYPE_OFFSET_DATA 1 +#define NBD_REPLY_TYPE_OFFSET_HOLE 2 +#define NBD_REPLY_TYPE_BLOCK_STATUS 5 +#define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1) +#define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_ERR(2) + +/* Extent flags for base:allocation in NBD_REPLY_TYPE_BLOCK_STATUS */ +#define NBD_STATE_HOLE (1 << 0) +#define NBD_STATE_ZERO (1 << 1) + +/* Extent flags for qemu:dirty-bitmap in NBD_REPLY_TYPE_BLOCK_STATUS */ +#define NBD_STATE_DIRTY (1 << 0) + +/* No flags needed for qemu:allocation-depth in NBD_REPLY_TYPE_BLOCK_STATUS */ + +static inline bool nbd_reply_type_is_error(int type) +{ + return type & (1 << 15); +} + +/* NBD errors are based on errno numbers, so there is a 1:1 mapping, + * but only a limited set of errno values is specified in the protocol. + * Everything else is squashed to EINVAL. + */ +#define NBD_SUCCESS 0 +#define NBD_EPERM 1 +#define NBD_EIO 5 +#define NBD_ENOMEM 12 +#define NBD_EINVAL 22 +#define NBD_ENOSPC 28 +#define NBD_EOVERFLOW 75 +#define NBD_ENOTSUP 95 +#define NBD_ESHUTDOWN 108 + +/* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */ +struct NBDExportInfo { + /* Set by client before nbd_receive_negotiate() */ + bool request_sizes; + char *x_dirty_bitmap; + + /* Set by client before nbd_receive_negotiate(), or by server results + * during nbd_receive_export_list() */ + char *name; /* must be non-NULL */ + + /* In-out fields, set by client before nbd_receive_negotiate() and + * updated by server results during nbd_receive_negotiate() */ + bool structured_reply; + bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */ + + /* Set by server results during nbd_receive_negotiate() and + * nbd_receive_export_list() */ + uint64_t size; + uint16_t flags; + uint32_t min_block; + uint32_t opt_block; + uint32_t max_block; + + uint32_t context_id; + + /* Set by server results during nbd_receive_export_list() */ + char *description; + int n_contexts; + char **contexts; +}; +typedef struct NBDExportInfo NBDExportInfo; + +int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc, + QCryptoTLSCreds *tlscreds, + const char *hostname, QIOChannel **outioc, + NBDExportInfo *info, Error **errp); +void nbd_free_export_list(NBDExportInfo *info, int count); +int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, + const char *hostname, NBDExportInfo **info, + Error **errp); +int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, + Error **errp); +int nbd_send_request(QIOChannel *ioc, NBDRequest *request); +int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc, + NBDReply *reply, Error **errp); +int nbd_client(int fd); +int nbd_disconnect(int fd); +int nbd_errno_to_system_errno(int err); + +typedef struct NBDExport NBDExport; +typedef struct NBDClient NBDClient; + +void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk); + +AioContext *nbd_export_aio_context(NBDExport *exp); +NBDExport *nbd_export_find(const char *name); + +void nbd_client_new(QIOChannelSocket *sioc, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, + void (*close_fn)(NBDClient *, bool)); +void nbd_client_get(NBDClient *client); +void nbd_client_put(NBDClient *client); + +void nbd_server_is_qemu_nbd(bool value); +bool nbd_server_is_running(void); +void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, + Error **errp); +void nbd_server_start_options(NbdServerOptions *arg, Error **errp); + +/* nbd_read + * Reads @size bytes from @ioc. Returns 0 on success. + */ +static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, + const char *desc, Error **errp) +{ + ERRP_GUARD(); + int ret = qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; + + if (ret < 0) { + if (desc) { + error_prepend(errp, "Failed to read %s: ", desc); + } + return -1; + } + + return 0; +} + +#define DEF_NBD_READ_N(bits) \ +static inline int nbd_read##bits(QIOChannel *ioc, \ + uint##bits##_t *val, \ + const char *desc, Error **errp) \ +{ \ + if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) { \ + return -1; \ + } \ + *val = be##bits##_to_cpu(*val); \ + return 0; \ +} + +DEF_NBD_READ_N(16) /* Defines nbd_read16(). */ +DEF_NBD_READ_N(32) /* Defines nbd_read32(). */ +DEF_NBD_READ_N(64) /* Defines nbd_read64(). */ + +#undef DEF_NBD_READ_N + +static inline bool nbd_reply_is_simple(NBDReply *reply) +{ + return reply->magic == NBD_SIMPLE_REPLY_MAGIC; +} + +static inline bool nbd_reply_is_structured(NBDReply *reply) +{ + return reply->magic == NBD_STRUCTURED_REPLY_MAGIC; +} + +const char *nbd_reply_type_lookup(uint16_t type); +const char *nbd_opt_lookup(uint32_t opt); +const char *nbd_rep_lookup(uint32_t rep); +const char *nbd_info_lookup(uint16_t info); +const char *nbd_cmd_lookup(uint16_t info); +const char *nbd_err_lookup(int err); + +#endif diff --git a/include/block/nvme.h b/include/block/nvme.h new file mode 100644 index 000000000..3e02d9ca9 --- /dev/null +++ b/include/block/nvme.h @@ -0,0 +1,1074 @@ +#ifndef BLOCK_NVME_H +#define BLOCK_NVME_H + +typedef struct QEMU_PACKED NvmeBar { + uint64_t cap; + uint32_t vs; + uint32_t intms; + uint32_t intmc; + uint32_t cc; + uint32_t rsvd1; + uint32_t csts; + uint32_t nssrc; + uint32_t aqa; + uint64_t asq; + uint64_t acq; + uint32_t cmbloc; + uint32_t cmbsz; + uint8_t padding[3520]; /* not used by QEMU */ + uint32_t pmrcap; + uint32_t pmrctl; + uint32_t pmrsts; + uint32_t pmrebs; + uint32_t pmrswtp; + uint64_t pmrmsc; + uint8_t reserved[484]; +} NvmeBar; + +enum NvmeCapShift { + CAP_MQES_SHIFT = 0, + CAP_CQR_SHIFT = 16, + CAP_AMS_SHIFT = 17, + CAP_TO_SHIFT = 24, + CAP_DSTRD_SHIFT = 32, + CAP_NSSRS_SHIFT = 36, + CAP_CSS_SHIFT = 37, + CAP_MPSMIN_SHIFT = 48, + CAP_MPSMAX_SHIFT = 52, + CAP_PMR_SHIFT = 56, +}; + +enum NvmeCapMask { + CAP_MQES_MASK = 0xffff, + CAP_CQR_MASK = 0x1, + CAP_AMS_MASK = 0x3, + CAP_TO_MASK = 0xff, + CAP_DSTRD_MASK = 0xf, + CAP_NSSRS_MASK = 0x1, + CAP_CSS_MASK = 0xff, + CAP_MPSMIN_MASK = 0xf, + CAP_MPSMAX_MASK = 0xf, + CAP_PMR_MASK = 0x1, +}; + +#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) +#define NVME_CAP_CQR(cap) (((cap) >> CAP_CQR_SHIFT) & CAP_CQR_MASK) +#define NVME_CAP_AMS(cap) (((cap) >> CAP_AMS_SHIFT) & CAP_AMS_MASK) +#define NVME_CAP_TO(cap) (((cap) >> CAP_TO_SHIFT) & CAP_TO_MASK) +#define NVME_CAP_DSTRD(cap) (((cap) >> CAP_DSTRD_SHIFT) & CAP_DSTRD_MASK) +#define NVME_CAP_NSSRS(cap) (((cap) >> CAP_NSSRS_SHIFT) & CAP_NSSRS_MASK) +#define NVME_CAP_CSS(cap) (((cap) >> CAP_CSS_SHIFT) & CAP_CSS_MASK) +#define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK) +#define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK) + +#define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \ + << CAP_MQES_SHIFT) +#define NVME_CAP_SET_CQR(cap, val) (cap |= (uint64_t)(val & CAP_CQR_MASK) \ + << CAP_CQR_SHIFT) +#define NVME_CAP_SET_AMS(cap, val) (cap |= (uint64_t)(val & CAP_AMS_MASK) \ + << CAP_AMS_SHIFT) +#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK) \ + << CAP_TO_SHIFT) +#define NVME_CAP_SET_DSTRD(cap, val) (cap |= (uint64_t)(val & CAP_DSTRD_MASK) \ + << CAP_DSTRD_SHIFT) +#define NVME_CAP_SET_NSSRS(cap, val) (cap |= (uint64_t)(val & CAP_NSSRS_MASK) \ + << CAP_NSSRS_SHIFT) +#define NVME_CAP_SET_CSS(cap, val) (cap |= (uint64_t)(val & CAP_CSS_MASK) \ + << CAP_CSS_SHIFT) +#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\ + << CAP_MPSMIN_SHIFT) +#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ + << CAP_MPSMAX_SHIFT) +#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\ + << CAP_PMR_SHIFT) + +enum NvmeCapCss { + NVME_CAP_CSS_NVM = 1 << 0, + NVME_CAP_CSS_ADMIN_ONLY = 1 << 7, +}; + +enum NvmeCcShift { + CC_EN_SHIFT = 0, + CC_CSS_SHIFT = 4, + CC_MPS_SHIFT = 7, + CC_AMS_SHIFT = 11, + CC_SHN_SHIFT = 14, + CC_IOSQES_SHIFT = 16, + CC_IOCQES_SHIFT = 20, +}; + +enum NvmeCcMask { + CC_EN_MASK = 0x1, + CC_CSS_MASK = 0x7, + CC_MPS_MASK = 0xf, + CC_AMS_MASK = 0x7, + CC_SHN_MASK = 0x3, + CC_IOSQES_MASK = 0xf, + CC_IOCQES_MASK = 0xf, +}; + +#define NVME_CC_EN(cc) ((cc >> CC_EN_SHIFT) & CC_EN_MASK) +#define NVME_CC_CSS(cc) ((cc >> CC_CSS_SHIFT) & CC_CSS_MASK) +#define NVME_CC_MPS(cc) ((cc >> CC_MPS_SHIFT) & CC_MPS_MASK) +#define NVME_CC_AMS(cc) ((cc >> CC_AMS_SHIFT) & CC_AMS_MASK) +#define NVME_CC_SHN(cc) ((cc >> CC_SHN_SHIFT) & CC_SHN_MASK) +#define NVME_CC_IOSQES(cc) ((cc >> CC_IOSQES_SHIFT) & CC_IOSQES_MASK) +#define NVME_CC_IOCQES(cc) ((cc >> CC_IOCQES_SHIFT) & CC_IOCQES_MASK) + +enum NvmeCcCss { + NVME_CC_CSS_NVM = 0x0, + NVME_CC_CSS_ADMIN_ONLY = 0x7, +}; + +enum NvmeCstsShift { + CSTS_RDY_SHIFT = 0, + CSTS_CFS_SHIFT = 1, + CSTS_SHST_SHIFT = 2, + CSTS_NSSRO_SHIFT = 4, +}; + +enum NvmeCstsMask { + CSTS_RDY_MASK = 0x1, + CSTS_CFS_MASK = 0x1, + CSTS_SHST_MASK = 0x3, + CSTS_NSSRO_MASK = 0x1, +}; + +enum NvmeCsts { + NVME_CSTS_READY = 1 << CSTS_RDY_SHIFT, + NVME_CSTS_FAILED = 1 << CSTS_CFS_SHIFT, + NVME_CSTS_SHST_NORMAL = 0 << CSTS_SHST_SHIFT, + NVME_CSTS_SHST_PROGRESS = 1 << CSTS_SHST_SHIFT, + NVME_CSTS_SHST_COMPLETE = 2 << CSTS_SHST_SHIFT, + NVME_CSTS_NSSRO = 1 << CSTS_NSSRO_SHIFT, +}; + +#define NVME_CSTS_RDY(csts) ((csts >> CSTS_RDY_SHIFT) & CSTS_RDY_MASK) +#define NVME_CSTS_CFS(csts) ((csts >> CSTS_CFS_SHIFT) & CSTS_CFS_MASK) +#define NVME_CSTS_SHST(csts) ((csts >> CSTS_SHST_SHIFT) & CSTS_SHST_MASK) +#define NVME_CSTS_NSSRO(csts) ((csts >> CSTS_NSSRO_SHIFT) & CSTS_NSSRO_MASK) + +enum NvmeAqaShift { + AQA_ASQS_SHIFT = 0, + AQA_ACQS_SHIFT = 16, +}; + +enum NvmeAqaMask { + AQA_ASQS_MASK = 0xfff, + AQA_ACQS_MASK = 0xfff, +}; + +#define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK) +#define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) + +enum NvmeCmblocShift { + CMBLOC_BIR_SHIFT = 0, + CMBLOC_OFST_SHIFT = 12, +}; + +enum NvmeCmblocMask { + CMBLOC_BIR_MASK = 0x7, + CMBLOC_OFST_MASK = 0xfffff, +}; + +#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \ + CMBLOC_BIR_MASK) +#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \ + CMBLOC_OFST_MASK) + +#define NVME_CMBLOC_SET_BIR(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT) +#define NVME_CMBLOC_SET_OFST(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT) + +enum NvmeCmbszShift { + CMBSZ_SQS_SHIFT = 0, + CMBSZ_CQS_SHIFT = 1, + CMBSZ_LISTS_SHIFT = 2, + CMBSZ_RDS_SHIFT = 3, + CMBSZ_WDS_SHIFT = 4, + CMBSZ_SZU_SHIFT = 8, + CMBSZ_SZ_SHIFT = 12, +}; + +enum NvmeCmbszMask { + CMBSZ_SQS_MASK = 0x1, + CMBSZ_CQS_MASK = 0x1, + CMBSZ_LISTS_MASK = 0x1, + CMBSZ_RDS_MASK = 0x1, + CMBSZ_WDS_MASK = 0x1, + CMBSZ_SZU_MASK = 0xf, + CMBSZ_SZ_MASK = 0xfffff, +}; + +#define NVME_CMBSZ_SQS(cmbsz) ((cmbsz >> CMBSZ_SQS_SHIFT) & CMBSZ_SQS_MASK) +#define NVME_CMBSZ_CQS(cmbsz) ((cmbsz >> CMBSZ_CQS_SHIFT) & CMBSZ_CQS_MASK) +#define NVME_CMBSZ_LISTS(cmbsz)((cmbsz >> CMBSZ_LISTS_SHIFT) & CMBSZ_LISTS_MASK) +#define NVME_CMBSZ_RDS(cmbsz) ((cmbsz >> CMBSZ_RDS_SHIFT) & CMBSZ_RDS_MASK) +#define NVME_CMBSZ_WDS(cmbsz) ((cmbsz >> CMBSZ_WDS_SHIFT) & CMBSZ_WDS_MASK) +#define NVME_CMBSZ_SZU(cmbsz) ((cmbsz >> CMBSZ_SZU_SHIFT) & CMBSZ_SZU_MASK) +#define NVME_CMBSZ_SZ(cmbsz) ((cmbsz >> CMBSZ_SZ_SHIFT) & CMBSZ_SZ_MASK) + +#define NVME_CMBSZ_SET_SQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SQS_MASK) << CMBSZ_SQS_SHIFT) +#define NVME_CMBSZ_SET_CQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_CQS_MASK) << CMBSZ_CQS_SHIFT) +#define NVME_CMBSZ_SET_LISTS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_LISTS_MASK) << CMBSZ_LISTS_SHIFT) +#define NVME_CMBSZ_SET_RDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_RDS_MASK) << CMBSZ_RDS_SHIFT) +#define NVME_CMBSZ_SET_WDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_WDS_MASK) << CMBSZ_WDS_SHIFT) +#define NVME_CMBSZ_SET_SZU(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZU_MASK) << CMBSZ_SZU_SHIFT) +#define NVME_CMBSZ_SET_SZ(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZ_MASK) << CMBSZ_SZ_SHIFT) + +#define NVME_CMBSZ_GETSIZE(cmbsz) \ + (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) + +enum NvmePmrcapShift { + PMRCAP_RDS_SHIFT = 3, + PMRCAP_WDS_SHIFT = 4, + PMRCAP_BIR_SHIFT = 5, + PMRCAP_PMRTU_SHIFT = 8, + PMRCAP_PMRWBM_SHIFT = 10, + PMRCAP_PMRTO_SHIFT = 16, + PMRCAP_CMSS_SHIFT = 24, +}; + +enum NvmePmrcapMask { + PMRCAP_RDS_MASK = 0x1, + PMRCAP_WDS_MASK = 0x1, + PMRCAP_BIR_MASK = 0x7, + PMRCAP_PMRTU_MASK = 0x3, + PMRCAP_PMRWBM_MASK = 0xf, + PMRCAP_PMRTO_MASK = 0xff, + PMRCAP_CMSS_MASK = 0x1, +}; + +#define NVME_PMRCAP_RDS(pmrcap) \ + ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK) +#define NVME_PMRCAP_WDS(pmrcap) \ + ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK) +#define NVME_PMRCAP_BIR(pmrcap) \ + ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK) +#define NVME_PMRCAP_PMRTU(pmrcap) \ + ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK) +#define NVME_PMRCAP_PMRWBM(pmrcap) \ + ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK) +#define NVME_PMRCAP_PMRTO(pmrcap) \ + ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK) +#define NVME_PMRCAP_CMSS(pmrcap) \ + ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK) + +#define NVME_PMRCAP_SET_RDS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT) +#define NVME_PMRCAP_SET_WDS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT) +#define NVME_PMRCAP_SET_BIR(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT) +#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT) +#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT) +#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT) +#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT) + +enum NvmePmrctlShift { + PMRCTL_EN_SHIFT = 0, +}; + +enum NvmePmrctlMask { + PMRCTL_EN_MASK = 0x1, +}; + +#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK) + +#define NVME_PMRCTL_SET_EN(pmrctl, val) \ + (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT) + +enum NvmePmrstsShift { + PMRSTS_ERR_SHIFT = 0, + PMRSTS_NRDY_SHIFT = 8, + PMRSTS_HSTS_SHIFT = 9, + PMRSTS_CBAI_SHIFT = 12, +}; + +enum NvmePmrstsMask { + PMRSTS_ERR_MASK = 0xff, + PMRSTS_NRDY_MASK = 0x1, + PMRSTS_HSTS_MASK = 0x7, + PMRSTS_CBAI_MASK = 0x1, +}; + +#define NVME_PMRSTS_ERR(pmrsts) \ + ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK) +#define NVME_PMRSTS_NRDY(pmrsts) \ + ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK) +#define NVME_PMRSTS_HSTS(pmrsts) \ + ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK) +#define NVME_PMRSTS_CBAI(pmrsts) \ + ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK) + +#define NVME_PMRSTS_SET_ERR(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT) +#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT) +#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT) +#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT) + +enum NvmePmrebsShift { + PMREBS_PMRSZU_SHIFT = 0, + PMREBS_RBB_SHIFT = 4, + PMREBS_PMRWBZ_SHIFT = 8, +}; + +enum NvmePmrebsMask { + PMREBS_PMRSZU_MASK = 0xf, + PMREBS_RBB_MASK = 0x1, + PMREBS_PMRWBZ_MASK = 0xffffff, +}; + +#define NVME_PMREBS_PMRSZU(pmrebs) \ + ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK) +#define NVME_PMREBS_RBB(pmrebs) \ + ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK) +#define NVME_PMREBS_PMRWBZ(pmrebs) \ + ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK) + +#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT) +#define NVME_PMREBS_SET_RBB(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT) +#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT) + +enum NvmePmrswtpShift { + PMRSWTP_PMRSWTU_SHIFT = 0, + PMRSWTP_PMRSWTV_SHIFT = 8, +}; + +enum NvmePmrswtpMask { + PMRSWTP_PMRSWTU_MASK = 0xf, + PMRSWTP_PMRSWTV_MASK = 0xffffff, +}; + +#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \ + ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK) +#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \ + ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK) + +#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \ + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT) +#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) + +enum NvmePmrmscShift { + PMRMSC_CMSE_SHIFT = 1, + PMRMSC_CBA_SHIFT = 12, +}; + +enum NvmePmrmscMask { + PMRMSC_CMSE_MASK = 0x1, + PMRMSC_CBA_MASK = 0xfffffffffffff, +}; + +#define NVME_PMRMSC_CMSE(pmrmsc) \ + ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) +#define NVME_PMRMSC_CBA(pmrmsc) \ + ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) + +#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ + (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) +#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ + (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) + +enum NvmeSglDescriptorType { + NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0, + NVME_SGL_DESCR_TYPE_BIT_BUCKET = 0x1, + NVME_SGL_DESCR_TYPE_SEGMENT = 0x2, + NVME_SGL_DESCR_TYPE_LAST_SEGMENT = 0x3, + NVME_SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4, + + NVME_SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf, +}; + +enum NvmeSglDescriptorSubtype { + NVME_SGL_DESCR_SUBTYPE_ADDRESS = 0x0, +}; + +typedef struct QEMU_PACKED NvmeSglDescriptor { + uint64_t addr; + uint32_t len; + uint8_t rsvd[3]; + uint8_t type; +} NvmeSglDescriptor; + +#define NVME_SGL_TYPE(type) ((type >> 4) & 0xf) +#define NVME_SGL_SUBTYPE(type) (type & 0xf) + +typedef union NvmeCmdDptr { + struct { + uint64_t prp1; + uint64_t prp2; + }; + + NvmeSglDescriptor sgl; +} NvmeCmdDptr; + +enum NvmePsdt { + NVME_PSDT_PRP = 0x0, + NVME_PSDT_SGL_MPTR_CONTIGUOUS = 0x1, + NVME_PSDT_SGL_MPTR_SGL = 0x2, +}; + +typedef struct QEMU_PACKED NvmeCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t res1; + uint64_t mptr; + NvmeCmdDptr dptr; + uint32_t cdw10; + uint32_t cdw11; + uint32_t cdw12; + uint32_t cdw13; + uint32_t cdw14; + uint32_t cdw15; +} NvmeCmd; + +#define NVME_CMD_FLAGS_FUSE(flags) (flags & 0x3) +#define NVME_CMD_FLAGS_PSDT(flags) ((flags >> 6) & 0x3) + +enum NvmeAdminCommands { + NVME_ADM_CMD_DELETE_SQ = 0x00, + NVME_ADM_CMD_CREATE_SQ = 0x01, + NVME_ADM_CMD_GET_LOG_PAGE = 0x02, + NVME_ADM_CMD_DELETE_CQ = 0x04, + NVME_ADM_CMD_CREATE_CQ = 0x05, + NVME_ADM_CMD_IDENTIFY = 0x06, + NVME_ADM_CMD_ABORT = 0x08, + NVME_ADM_CMD_SET_FEATURES = 0x09, + NVME_ADM_CMD_GET_FEATURES = 0x0a, + NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c, + NVME_ADM_CMD_ACTIVATE_FW = 0x10, + NVME_ADM_CMD_DOWNLOAD_FW = 0x11, + NVME_ADM_CMD_FORMAT_NVM = 0x80, + NVME_ADM_CMD_SECURITY_SEND = 0x81, + NVME_ADM_CMD_SECURITY_RECV = 0x82, +}; + +enum NvmeIoCommands { + NVME_CMD_FLUSH = 0x00, + NVME_CMD_WRITE = 0x01, + NVME_CMD_READ = 0x02, + NVME_CMD_WRITE_UNCOR = 0x04, + NVME_CMD_COMPARE = 0x05, + NVME_CMD_WRITE_ZEROES = 0x08, + NVME_CMD_DSM = 0x09, +}; + +typedef struct QEMU_PACKED NvmeDeleteQ { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[9]; + uint16_t qid; + uint16_t rsvd10; + uint32_t rsvd11[5]; +} NvmeDeleteQ; + +typedef struct QEMU_PACKED NvmeCreateCq { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[5]; + uint64_t prp1; + uint64_t rsvd8; + uint16_t cqid; + uint16_t qsize; + uint16_t cq_flags; + uint16_t irq_vector; + uint32_t rsvd12[4]; +} NvmeCreateCq; + +#define NVME_CQ_FLAGS_PC(cq_flags) (cq_flags & 0x1) +#define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1) + +enum NvmeFlagsCq { + NVME_CQ_PC = 1, + NVME_CQ_IEN = 2, +}; + +typedef struct QEMU_PACKED NvmeCreateSq { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t rsvd1[5]; + uint64_t prp1; + uint64_t rsvd8; + uint16_t sqid; + uint16_t qsize; + uint16_t sq_flags; + uint16_t cqid; + uint32_t rsvd12[4]; +} NvmeCreateSq; + +#define NVME_SQ_FLAGS_PC(sq_flags) (sq_flags & 0x1) +#define NVME_SQ_FLAGS_QPRIO(sq_flags) ((sq_flags >> 1) & 0x3) + +enum NvmeFlagsSq { + NVME_SQ_PC = 1, + + NVME_SQ_PRIO_URGENT = 0, + NVME_SQ_PRIO_HIGH = 1, + NVME_SQ_PRIO_NORMAL = 2, + NVME_SQ_PRIO_LOW = 3, +}; + +typedef struct QEMU_PACKED NvmeIdentify { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t cns; + uint32_t rsvd11[5]; +} NvmeIdentify; + +typedef struct QEMU_PACKED NvmeRwCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2; + uint64_t mptr; + NvmeCmdDptr dptr; + uint64_t slba; + uint16_t nlb; + uint16_t control; + uint32_t dsmgmt; + uint32_t reftag; + uint16_t apptag; + uint16_t appmask; +} NvmeRwCmd; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRACT = 1 << 13, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, +}; + +typedef struct QEMU_PACKED NvmeDsmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + NvmeCmdDptr dptr; + uint32_t nr; + uint32_t attributes; + uint32_t rsvd12[4]; +} NvmeDsmCmd; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +typedef struct QEMU_PACKED NvmeDsmRange { + uint32_t cattr; + uint32_t nlb; + uint64_t slba; +} NvmeDsmRange; + +enum NvmeAsyncEventRequest { + NVME_AER_TYPE_ERROR = 0, + NVME_AER_TYPE_SMART = 1, + NVME_AER_TYPE_IO_SPECIFIC = 6, + NVME_AER_TYPE_VENDOR_SPECIFIC = 7, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0, + NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1, + NVME_AER_INFO_ERR_DIAG_FAIL = 2, + NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3, + NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4, + NVME_AER_INFO_ERR_FW_IMG_LOAD_ERR = 5, + NVME_AER_INFO_SMART_RELIABILITY = 0, + NVME_AER_INFO_SMART_TEMP_THRESH = 1, + NVME_AER_INFO_SMART_SPARE_THRESH = 2, +}; + +typedef struct QEMU_PACKED NvmeAerResult { + uint8_t event_type; + uint8_t event_info; + uint8_t log_page; + uint8_t resv; +} NvmeAerResult; + +typedef struct QEMU_PACKED NvmeCqe { + uint32_t result; + uint32_t rsvd; + uint16_t sq_head; + uint16_t sq_id; + uint16_t cid; + uint16_t status; +} NvmeCqe; + +enum NvmeStatusCodes { + NVME_SUCCESS = 0x0000, + NVME_INVALID_OPCODE = 0x0001, + NVME_INVALID_FIELD = 0x0002, + NVME_CID_CONFLICT = 0x0003, + NVME_DATA_TRAS_ERROR = 0x0004, + NVME_POWER_LOSS_ABORT = 0x0005, + NVME_INTERNAL_DEV_ERROR = 0x0006, + NVME_CMD_ABORT_REQ = 0x0007, + NVME_CMD_ABORT_SQ_DEL = 0x0008, + NVME_CMD_ABORT_FAILED_FUSE = 0x0009, + NVME_CMD_ABORT_MISSING_FUSE = 0x000a, + NVME_INVALID_NSID = 0x000b, + NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_SGL_SEG_DESCR = 0x000d, + NVME_INVALID_NUM_SGL_DESCRS = 0x000e, + NVME_DATA_SGL_LEN_INVALID = 0x000f, + NVME_MD_SGL_LEN_INVALID = 0x0010, + NVME_SGL_DESCR_TYPE_INVALID = 0x0011, + NVME_INVALID_USE_OF_CMB = 0x0012, + NVME_INVALID_PRP_OFFSET = 0x0013, + NVME_LBA_RANGE = 0x0080, + NVME_CAP_EXCEEDED = 0x0081, + NVME_NS_NOT_READY = 0x0082, + NVME_NS_RESV_CONFLICT = 0x0083, + NVME_INVALID_CQID = 0x0100, + NVME_INVALID_QID = 0x0101, + NVME_MAX_QSIZE_EXCEEDED = 0x0102, + NVME_ACL_EXCEEDED = 0x0103, + NVME_RESERVED = 0x0104, + NVME_AER_LIMIT_EXCEEDED = 0x0105, + NVME_INVALID_FW_SLOT = 0x0106, + NVME_INVALID_FW_IMAGE = 0x0107, + NVME_INVALID_IRQ_VECTOR = 0x0108, + NVME_INVALID_LOG_ID = 0x0109, + NVME_INVALID_FORMAT = 0x010a, + NVME_FW_REQ_RESET = 0x010b, + NVME_INVALID_QUEUE_DEL = 0x010c, + NVME_FID_NOT_SAVEABLE = 0x010d, + NVME_FEAT_NOT_CHANGEABLE = 0x010e, + NVME_FEAT_NOT_NS_SPEC = 0x010f, + NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, + NVME_CONFLICTING_ATTRS = 0x0180, + NVME_INVALID_PROT_INFO = 0x0181, + NVME_WRITE_TO_RO = 0x0182, + NVME_WRITE_FAULT = 0x0280, + NVME_UNRECOVERED_READ = 0x0281, + NVME_E2E_GUARD_ERROR = 0x0282, + NVME_E2E_APP_ERROR = 0x0283, + NVME_E2E_REF_ERROR = 0x0284, + NVME_CMP_FAILURE = 0x0285, + NVME_ACCESS_DENIED = 0x0286, + NVME_MORE = 0x2000, + NVME_DNR = 0x4000, + NVME_NO_COMPLETE = 0xffff, +}; + +typedef struct QEMU_PACKED NvmeFwSlotInfoLog { + uint8_t afi; + uint8_t reserved1[7]; + uint8_t frs1[8]; + uint8_t frs2[8]; + uint8_t frs3[8]; + uint8_t frs4[8]; + uint8_t frs5[8]; + uint8_t frs6[8]; + uint8_t frs7[8]; + uint8_t reserved2[448]; +} NvmeFwSlotInfoLog; + +typedef struct QEMU_PACKED NvmeErrorLog { + uint64_t error_count; + uint16_t sqid; + uint16_t cid; + uint16_t status_field; + uint16_t param_error_location; + uint64_t lba; + uint32_t nsid; + uint8_t vs; + uint8_t resv[35]; +} NvmeErrorLog; + +typedef struct QEMU_PACKED NvmeSmartLog { + uint8_t critical_warning; + uint16_t temperature; + uint8_t available_spare; + uint8_t available_spare_threshold; + uint8_t percentage_used; + uint8_t reserved1[26]; + uint64_t data_units_read[2]; + uint64_t data_units_written[2]; + uint64_t host_read_commands[2]; + uint64_t host_write_commands[2]; + uint64_t controller_busy_time[2]; + uint64_t power_cycles[2]; + uint64_t power_on_hours[2]; + uint64_t unsafe_shutdowns[2]; + uint64_t media_errors[2]; + uint64_t number_of_error_log_entries[2]; + uint8_t reserved2[320]; +} NvmeSmartLog; + +enum NvmeSmartWarn { + NVME_SMART_SPARE = 1 << 0, + NVME_SMART_TEMPERATURE = 1 << 1, + NVME_SMART_RELIABILITY = 1 << 2, + NVME_SMART_MEDIA_READ_ONLY = 1 << 3, + NVME_SMART_FAILED_VOLATILE_MEDIA = 1 << 4, +}; + +enum NvmeLogIdentifier { + NVME_LOG_ERROR_INFO = 0x01, + NVME_LOG_SMART_INFO = 0x02, + NVME_LOG_FW_SLOT_INFO = 0x03, +}; + +typedef struct QEMU_PACKED NvmePSD { + uint16_t mp; + uint16_t reserved; + uint32_t enlat; + uint32_t exlat; + uint8_t rrt; + uint8_t rrl; + uint8_t rwt; + uint8_t rwl; + uint8_t resv[16]; +} NvmePSD; + +#define NVME_IDENTIFY_DATA_SIZE 4096 + +enum { + NVME_ID_CNS_NS = 0x0, + NVME_ID_CNS_CTRL = 0x1, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x2, + NVME_ID_CNS_NS_DESCR_LIST = 0x3, +}; + +typedef struct QEMU_PACKED NvmeIdCtrl { + uint16_t vid; + uint16_t ssvid; + uint8_t sn[20]; + uint8_t mn[40]; + uint8_t fr[8]; + uint8_t rab; + uint8_t ieee[3]; + uint8_t cmic; + uint8_t mdts; + uint16_t cntlid; + uint32_t ver; + uint32_t rtd3r; + uint32_t rtd3e; + uint32_t oaes; + uint32_t ctratt; + uint8_t rsvd100[12]; + uint8_t fguid[16]; + uint8_t rsvd128[128]; + uint16_t oacs; + uint8_t acl; + uint8_t aerl; + uint8_t frmw; + uint8_t lpa; + uint8_t elpe; + uint8_t npss; + uint8_t avscc; + uint8_t apsta; + uint16_t wctemp; + uint16_t cctemp; + uint16_t mtfa; + uint32_t hmpre; + uint32_t hmmin; + uint8_t tnvmcap[16]; + uint8_t unvmcap[16]; + uint32_t rpmbs; + uint16_t edstt; + uint8_t dsto; + uint8_t fwug; + uint16_t kas; + uint16_t hctma; + uint16_t mntmt; + uint16_t mxtmt; + uint32_t sanicap; + uint8_t rsvd332[180]; + uint8_t sqes; + uint8_t cqes; + uint16_t maxcmd; + uint32_t nn; + uint16_t oncs; + uint16_t fuses; + uint8_t fna; + uint8_t vwc; + uint16_t awun; + uint16_t awupf; + uint8_t nvscc; + uint8_t rsvd531; + uint16_t acwu; + uint8_t rsvd534[2]; + uint32_t sgls; + uint8_t rsvd540[228]; + uint8_t subnqn[256]; + uint8_t rsvd1024[1024]; + NvmePSD psd[32]; + uint8_t vs[1024]; +} NvmeIdCtrl; + +enum NvmeIdCtrlOacs { + NVME_OACS_SECURITY = 1 << 0, + NVME_OACS_FORMAT = 1 << 1, + NVME_OACS_FW = 1 << 2, +}; + +enum NvmeIdCtrlOncs { + NVME_ONCS_COMPARE = 1 << 0, + NVME_ONCS_WRITE_UNCORR = 1 << 1, + NVME_ONCS_DSM = 1 << 2, + NVME_ONCS_WRITE_ZEROES = 1 << 3, + NVME_ONCS_FEATURES = 1 << 4, + NVME_ONCS_RESRVATIONS = 1 << 5, + NVME_ONCS_TIMESTAMP = 1 << 6, +}; + +enum NvmeIdCtrlFrmw { + NVME_FRMW_SLOT1_RO = 1 << 0, +}; + +enum NvmeIdCtrlLpa { + NVME_LPA_NS_SMART = 1 << 0, + NVME_LPA_EXTENDED = 1 << 2, +}; + +#define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) +#define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf) +#define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) +#define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) + +#define NVME_CTRL_SGLS_SUPPORT_MASK (0x3 << 0) +#define NVME_CTRL_SGLS_SUPPORT_NO_ALIGN (0x1 << 0) +#define NVME_CTRL_SGLS_SUPPORT_DWORD_ALIGN (0x1 << 1) +#define NVME_CTRL_SGLS_KEYED (0x1 << 2) +#define NVME_CTRL_SGLS_BITBUCKET (0x1 << 16) +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS (0x1 << 17) +#define NVME_CTRL_SGLS_EXCESS_LENGTH (0x1 << 18) +#define NVME_CTRL_SGLS_MPTR_SGL (0x1 << 19) +#define NVME_CTRL_SGLS_ADDR_OFFSET (0x1 << 20) + +#define NVME_ARB_AB(arb) (arb & 0x7) +#define NVME_ARB_AB_NOLIMIT 0x7 +#define NVME_ARB_LPW(arb) ((arb >> 8) & 0xff) +#define NVME_ARB_MPW(arb) ((arb >> 16) & 0xff) +#define NVME_ARB_HPW(arb) ((arb >> 24) & 0xff) + +#define NVME_INTC_THR(intc) (intc & 0xff) +#define NVME_INTC_TIME(intc) ((intc >> 8) & 0xff) + +#define NVME_INTVC_NOCOALESCING (0x1 << 16) + +#define NVME_TEMP_THSEL(temp) ((temp >> 20) & 0x3) +#define NVME_TEMP_THSEL_OVER 0x0 +#define NVME_TEMP_THSEL_UNDER 0x1 + +#define NVME_TEMP_TMPSEL(temp) ((temp >> 16) & 0xf) +#define NVME_TEMP_TMPSEL_COMPOSITE 0x0 + +#define NVME_TEMP_TMPTH(temp) (temp & 0xffff) + +#define NVME_AEC_SMART(aec) (aec & 0xff) +#define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1) +#define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1) + +enum NvmeFeatureIds { + NVME_ARBITRATION = 0x1, + NVME_POWER_MANAGEMENT = 0x2, + NVME_LBA_RANGE_TYPE = 0x3, + NVME_TEMPERATURE_THRESHOLD = 0x4, + NVME_ERROR_RECOVERY = 0x5, + NVME_VOLATILE_WRITE_CACHE = 0x6, + NVME_NUMBER_OF_QUEUES = 0x7, + NVME_INTERRUPT_COALESCING = 0x8, + NVME_INTERRUPT_VECTOR_CONF = 0x9, + NVME_WRITE_ATOMICITY = 0xa, + NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, + NVME_TIMESTAMP = 0xe, + NVME_SOFTWARE_PROGRESS_MARKER = 0x80, + NVME_FID_MAX = 0x100, +}; + +typedef enum NvmeFeatureCap { + NVME_FEAT_CAP_SAVE = 1 << 0, + NVME_FEAT_CAP_NS = 1 << 1, + NVME_FEAT_CAP_CHANGE = 1 << 2, +} NvmeFeatureCap; + +typedef enum NvmeGetFeatureSelect { + NVME_GETFEAT_SELECT_CURRENT = 0x0, + NVME_GETFEAT_SELECT_DEFAULT = 0x1, + NVME_GETFEAT_SELECT_SAVED = 0x2, + NVME_GETFEAT_SELECT_CAP = 0x3, +} NvmeGetFeatureSelect; + +#define NVME_GETSETFEAT_FID_MASK 0xff +#define NVME_GETSETFEAT_FID(dw10) (dw10 & NVME_GETSETFEAT_FID_MASK) + +#define NVME_GETFEAT_SELECT_SHIFT 8 +#define NVME_GETFEAT_SELECT_MASK 0x7 +#define NVME_GETFEAT_SELECT(dw10) \ + ((dw10 >> NVME_GETFEAT_SELECT_SHIFT) & NVME_GETFEAT_SELECT_MASK) + +#define NVME_SETFEAT_SAVE_SHIFT 31 +#define NVME_SETFEAT_SAVE_MASK 0x1 +#define NVME_SETFEAT_SAVE(dw10) \ + ((dw10 >> NVME_SETFEAT_SAVE_SHIFT) & NVME_SETFEAT_SAVE_MASK) + +typedef struct QEMU_PACKED NvmeRangeType { + uint8_t type; + uint8_t attributes; + uint8_t rsvd2[14]; + uint64_t slba; + uint64_t nlb; + uint8_t guid[16]; + uint8_t rsvd48[16]; +} NvmeRangeType; + +typedef struct QEMU_PACKED NvmeLBAF { + uint16_t ms; + uint8_t ds; + uint8_t rp; +} NvmeLBAF; + +#define NVME_NSID_BROADCAST 0xffffffff + +typedef struct QEMU_PACKED NvmeIdNs { + uint64_t nsze; + uint64_t ncap; + uint64_t nuse; + uint8_t nsfeat; + uint8_t nlbaf; + uint8_t flbas; + uint8_t mc; + uint8_t dpc; + uint8_t dps; + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint8_t dlfeat; + uint16_t nawun; + uint16_t nawupf; + uint16_t nacwu; + uint16_t nabsn; + uint16_t nabo; + uint16_t nabspf; + uint16_t noiob; + uint8_t nvmcap[16]; + uint8_t rsvd64[40]; + uint8_t nguid[16]; + uint64_t eui64; + NvmeLBAF lbaf[16]; + uint8_t rsvd192[192]; + uint8_t vs[3712]; +} NvmeIdNs; + +typedef struct QEMU_PACKED NvmeIdNsDescr { + uint8_t nidt; + uint8_t nidl; + uint8_t rsvd2[2]; +} NvmeIdNsDescr; + +enum { + NVME_NIDT_EUI64_LEN = 8, + NVME_NIDT_NGUID_LEN = 16, + NVME_NIDT_UUID_LEN = 16, +}; + +enum NvmeNsIdentifierType { + NVME_NIDT_EUI64 = 0x1, + NVME_NIDT_NGUID = 0x2, + NVME_NIDT_UUID = 0x3, +}; + +/*Deallocate Logical Block Features*/ +#define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10) +#define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat) ((dlfeat) & 0x08) + +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR(dlfeat) ((dlfeat) & 0x7) +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_UNDEFINED 0 +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES 1 +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ONES 2 + + +#define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1)) +#define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1) +#define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf)) +#define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1) +#define NVME_ID_NS_MC_EXTENDED(mc) ((mc & 0x1)) +#define NVME_ID_NS_DPC_LAST_EIGHT(dpc) ((dpc >> 4) & 0x1) +#define NVME_ID_NS_DPC_FIRST_EIGHT(dpc) ((dpc >> 3) & 0x1) +#define NVME_ID_NS_DPC_TYPE_3(dpc) ((dpc >> 2) & 0x1) +#define NVME_ID_NS_DPC_TYPE_2(dpc) ((dpc >> 1) & 0x1) +#define NVME_ID_NS_DPC_TYPE_1(dpc) ((dpc & 0x1)) +#define NVME_ID_NS_DPC_TYPE_MASK 0x7 + +enum NvmeIdNsDps { + DPS_TYPE_NONE = 0, + DPS_TYPE_1 = 1, + DPS_TYPE_2 = 2, + DPS_TYPE_3 = 3, + DPS_TYPE_MASK = 0x7, + DPS_FIRST_EIGHT = 8, +}; + +static inline void _nvme_check_size(void) +{ + QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4); + QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCreateSq) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4); +} +#endif diff --git a/include/block/qapi.h b/include/block/qapi.h new file mode 100644 index 000000000..22c7807c8 --- /dev/null +++ b/include/block/qapi.h @@ -0,0 +1,45 @@ +/* + * Block layer qmp and info dump related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_QAPI_H +#define BLOCK_QAPI_H + +#include "block/block.h" +#include "block/snapshot.h" + +BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + BlockDriverState *bs, + bool flat, + Error **errp); +int bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); +void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, + Error **errp); + +void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); +void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); +void bdrv_image_info_dump(ImageInfo *info); +#endif diff --git a/include/block/qdict.h b/include/block/qdict.h new file mode 100644 index 000000000..d8cb502d7 --- /dev/null +++ b/include/block/qdict.h @@ -0,0 +1,34 @@ +/* + * Special QDict functions used by the block layer + * + * Copyright (c) 2013-2018 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_QDICT_H +#define BLOCK_QDICT_H + +#include "qapi/qmp/qdict.h" + +void qdict_copy_default(QDict *dst, QDict *src, const char *key); +void qdict_set_default_str(QDict *dst, const char *key, const char *val); + +void qdict_join(QDict *dest, QDict *src, bool overwrite); + +void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); +void qdict_array_split(QDict *src, QList **dst); +int qdict_array_entries(QDict *src, const char *subqdict); +QObject *qdict_crumple(const QDict *src, Error **errp); +void qdict_flatten(QDict *qdict); + +typedef struct QDictRenames { + const char *from; + const char *to; +} QDictRenames; +bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp); + +Visitor *qobject_input_visitor_new_flat_confused(QDict *qdict, + Error **errp); +#endif diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h new file mode 100644 index 000000000..251b10d27 --- /dev/null +++ b/include/block/raw-aio.h @@ -0,0 +1,88 @@ +/* + * Declarations for AIO in the raw protocol + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_RAW_AIO_H +#define QEMU_RAW_AIO_H + +#include "block/aio.h" +#include "qemu/coroutine.h" +#include "qemu/iov.h" + +/* AIO request types */ +#define QEMU_AIO_READ 0x0001 +#define QEMU_AIO_WRITE 0x0002 +#define QEMU_AIO_IOCTL 0x0004 +#define QEMU_AIO_FLUSH 0x0008 +#define QEMU_AIO_DISCARD 0x0010 +#define QEMU_AIO_WRITE_ZEROES 0x0020 +#define QEMU_AIO_COPY_RANGE 0x0040 +#define QEMU_AIO_TRUNCATE 0x0080 +#define QEMU_AIO_TYPE_MASK \ + (QEMU_AIO_READ | \ + QEMU_AIO_WRITE | \ + QEMU_AIO_IOCTL | \ + QEMU_AIO_FLUSH | \ + QEMU_AIO_DISCARD | \ + QEMU_AIO_WRITE_ZEROES | \ + QEMU_AIO_COPY_RANGE | \ + QEMU_AIO_TRUNCATE) + +/* AIO flags */ +#define QEMU_AIO_MISALIGNED 0x1000 +#define QEMU_AIO_BLKDEV 0x2000 +#define QEMU_AIO_NO_FALLBACK 0x4000 + + +/* linux-aio.c - Linux native implementation */ +#ifdef CONFIG_LINUX_AIO +typedef struct LinuxAioState LinuxAioState; +LinuxAioState *laio_init(Error **errp); +void laio_cleanup(LinuxAioState *s); +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type); +void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); +void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); +void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s); +#endif +/* io_uring.c - Linux io_uring implementation */ +#ifdef CONFIG_LINUX_IO_URING +typedef struct LuringState LuringState; +LuringState *luring_init(Error **errp); +void luring_cleanup(LuringState *s); +int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type); +void luring_detach_aio_context(LuringState *s, AioContext *old_context); +void luring_attach_aio_context(LuringState *s, AioContext *new_context); +void luring_io_plug(BlockDriverState *bs, LuringState *s); +void luring_io_unplug(BlockDriverState *bs, LuringState *s); +#endif + +#ifdef _WIN32 +typedef struct QEMUWin32AIOState QEMUWin32AIOState; +QEMUWin32AIOState *win32_aio_init(void); +void win32_aio_cleanup(QEMUWin32AIOState *aio); +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile); +BlockAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, + BlockCompletionFunc *cb, void *opaque, int type); +void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, + AioContext *old_context); +void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, + AioContext *new_context); +#endif + +#endif /* QEMU_RAW_AIO_H */ diff --git a/include/block/snapshot.h b/include/block/snapshot.h new file mode 100644 index 000000000..b0fe42993 --- /dev/null +++ b/include/block/snapshot.h @@ -0,0 +1,93 @@ +/* + * Block layer snapshot related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SNAPSHOT_H +#define SNAPSHOT_H + + + +#define SNAPSHOT_OPT_BASE "snapshot." +#define SNAPSHOT_OPT_ID "snapshot.id" +#define SNAPSHOT_OPT_NAME "snapshot.name" + +extern QemuOptsList internal_snapshot_opts; + +typedef struct QEMUSnapshotInfo { + char id_str[128]; /* unique snapshot id */ + /* the following fields are informative. They are not needed for + the consistency of the snapshot */ + char name[256]; /* user chosen name */ + uint64_t vm_state_size; /* VM state info size */ + uint32_t date_sec; /* UTC date of the snapshot */ + uint32_t date_nsec; + uint64_t vm_clock_nsec; /* VM clock relative to boot */ + uint64_t icount; /* record/replay step */ +} QEMUSnapshotInfo; + +int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, + const char *name); +bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, + const char *id, + const char *name, + QEMUSnapshotInfo *sn_info, + Error **errp); +int bdrv_can_snapshot(BlockDriverState *bs); +int bdrv_snapshot_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); +int bdrv_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id, + Error **errp); +int bdrv_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); +int bdrv_snapshot_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); +int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, + Error **errp); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **errp); +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs); + +BlockDriverState *bdrv_all_find_vmstate_bs(void); + +#endif diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h new file mode 100644 index 000000000..7dd7d730a --- /dev/null +++ b/include/block/thread-pool.h @@ -0,0 +1,37 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_THREAD_POOL_H +#define QEMU_THREAD_POOL_H + +#include "block/block.h" + +typedef int ThreadPoolFunc(void *opaque); + +typedef struct ThreadPool ThreadPool; + +ThreadPool *thread_pool_new(struct AioContext *ctx); +void thread_pool_free(ThreadPool *pool); + +BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, + ThreadPoolFunc *func, void *arg, + BlockCompletionFunc *cb, void *opaque); +int coroutine_fn thread_pool_submit_co(ThreadPool *pool, + ThreadPoolFunc *func, void *arg); +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); + +#endif diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h new file mode 100644 index 000000000..8bf7d233f --- /dev/null +++ b/include/block/throttle-groups.h @@ -0,0 +1,91 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef THROTTLE_GROUPS_H +#define THROTTLE_GROUPS_H + +#include "qemu/throttle.h" +#include "block/block_int.h" +#include "qom/object.h" + +/* The ThrottleGroupMember structure indicates membership in a ThrottleGroup + * and holds related data. + */ + +typedef struct ThrottleGroupMember { + AioContext *aio_context; + /* throttled_reqs_lock protects the CoQueues for throttled requests. */ + CoMutex throttled_reqs_lock; + CoQueue throttled_reqs[2]; + + /* Nonzero if the I/O limits are currently being ignored; generally + * it is zero. Accessed with atomic operations. + */ + unsigned int io_limits_disabled; + + /* Number of pending throttle_group_restart_queue_entry() coroutines. + * Accessed with atomic operations. + */ + unsigned int restart_pending; + + /* The following fields are protected by the ThrottleGroup lock. + * See the ThrottleGroup documentation for details. + * throttle_state tells us if I/O limits are configured. */ + ThrottleState *throttle_state; + ThrottleTimers throttle_timers; + unsigned pending_reqs[2]; + QLIST_ENTRY(ThrottleGroupMember) round_robin; + +} ThrottleGroupMember; + +#define TYPE_THROTTLE_GROUP "throttle-group" +OBJECT_DECLARE_SIMPLE_TYPE(ThrottleGroup, THROTTLE_GROUP) + +const char *throttle_group_get_name(ThrottleGroupMember *tgm); + +ThrottleState *throttle_group_incref(const char *name); +void throttle_group_unref(ThrottleState *ts); + +void throttle_group_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg); +void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg); + +void throttle_group_register_tgm(ThrottleGroupMember *tgm, + const char *groupname, + AioContext *ctx); +void throttle_group_unregister_tgm(ThrottleGroupMember *tgm); +void throttle_group_restart_tgm(ThrottleGroupMember *tgm); + +void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, + unsigned int bytes, + bool is_write); +void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, + AioContext *new_context); +void throttle_group_detach_aio_context(ThrottleGroupMember *tgm); +/* + * throttle_group_exists() must be called under the global + * mutex. + */ +bool throttle_group_exists(const char *name); + +#endif diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h new file mode 100644 index 000000000..c646f267a --- /dev/null +++ b/include/block/write-threshold.h @@ -0,0 +1,62 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_WRITE_THRESHOLD_H +#define BLOCK_WRITE_THRESHOLD_H + +#include "block/block_int.h" + +/* + * bdrv_write_threshold_set: + * + * Set the write threshold for block devices, in bytes. + * Notify when a write exceeds the threshold, meaning the device + * is becoming full, so it can be transparently resized. + * To be used with thin-provisioned block devices. + * + * Use threshold_bytes == 0 to disable. + */ +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); + +/* + * bdrv_write_threshold_get + * + * Get the configured write threshold, in bytes. + * Zero means no threshold configured. + */ +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_is_set + * + * Tell if a write threshold is set for a given BDS. + */ +bool bdrv_write_threshold_is_set(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_exceeded + * + * Return the extent of a write request that exceeded the threshold, + * or zero if the request is below the threshold. + * Return zero also if the threshold was not set. + * + * NOTE: here we assume the following holds for each request this code + * deals with: + * + * assert((req->offset + req->bytes) <= UINT64_MAX) + * + * Please not there is *not* an actual C assert(). + */ +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req); + +#endif diff --git a/include/chardev/char-fd.h b/include/chardev/char-fd.h new file mode 100644 index 000000000..9de0e440d --- /dev/null +++ b/include/chardev/char-fd.h @@ -0,0 +1,47 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_FD_H +#define CHAR_FD_H + +#include "io/channel.h" +#include "chardev/char.h" +#include "qom/object.h" + +struct FDChardev { + Chardev parent; + + QIOChannel *ioc_in, *ioc_out; + int max_size; +}; +typedef struct FDChardev FDChardev; + +#define TYPE_CHARDEV_FD "chardev-fd" + +DECLARE_INSTANCE_CHECKER(FDChardev, FD_CHARDEV, + TYPE_CHARDEV_FD) + +void qemu_chr_open_fd(Chardev *chr, int fd_in, int fd_out); +int qmp_chardev_open_file_source(char *src, int flags, Error **errp); + +#endif /* CHAR_FD_H */ diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h new file mode 100644 index 000000000..a55384336 --- /dev/null +++ b/include/chardev/char-fe.h @@ -0,0 +1,286 @@ +#ifndef QEMU_CHAR_FE_H +#define QEMU_CHAR_FE_H + +#include "chardev/char.h" +#include "qemu/main-loop.h" + +typedef void IOEventHandler(void *opaque, QEMUChrEvent event); +typedef int BackendChangeHandler(void *opaque); + +/* This is the backend as seen by frontend, the actual backend is + * Chardev */ +struct CharBackend { + Chardev *chr; + IOEventHandler *chr_event; + IOCanReadHandler *chr_can_read; + IOReadHandler *chr_read; + BackendChangeHandler *chr_be_change; + void *opaque; + int tag; + int fe_open; +}; + +/** + * qemu_chr_fe_init: + * + * Initializes a front end for the given CharBackend and + * Chardev. Call qemu_chr_fe_deinit() to remove the association and + * release the driver. + * + * Returns: false on error. + */ +bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp); + +/** + * qemu_chr_fe_deinit: + * @b: a CharBackend + * @del: if true, delete the chardev backend +* + * Dissociate the CharBackend from the Chardev. + * + * Safe to call without associated Chardev. + */ +void qemu_chr_fe_deinit(CharBackend *b, bool del); + +/** + * qemu_chr_fe_get_driver: + * + * Returns: the driver associated with a CharBackend or NULL if no + * associated Chardev. + * Note: avoid this function as the driver should never be accessed directly, + * especially by the frontends that support chardevice hotswap. + * Consider qemu_chr_fe_backend_connected() to check for driver existence + */ +Chardev *qemu_chr_fe_get_driver(CharBackend *be); + +/** + * qemu_chr_fe_backend_connected: + * + * Returns: true if there is a chardevice associated with @be. + */ +bool qemu_chr_fe_backend_connected(CharBackend *be); + +/** + * qemu_chr_fe_backend_open: + * + * Returns: true if chardevice associated with @be is open. + */ +bool qemu_chr_fe_backend_open(CharBackend *be); + +/** + * qemu_chr_fe_set_handlers_full: + * @b: a CharBackend + * @fd_can_read: callback to get the amount of data the frontend may + * receive + * @fd_read: callback to receive data from char + * @fd_event: event callback + * @be_change: backend change callback; passing NULL means hot backend change + * is not supported and will not be attempted + * @opaque: an opaque pointer for the callbacks + * @context: a main loop context or NULL for the default + * @set_open: whether to call qemu_chr_fe_set_open() implicitely when + * any of the handler is non-NULL + * @sync_state: whether to issue event callback with updated state + * + * Set the front end char handlers. The front end takes the focus if + * any of the handler is non-NULL. + * + * Without associated Chardev, nothing is changed. + */ +void qemu_chr_fe_set_handlers_full(CharBackend *b, + IOCanReadHandler *fd_can_read, + IOReadHandler *fd_read, + IOEventHandler *fd_event, + BackendChangeHandler *be_change, + void *opaque, + GMainContext *context, + bool set_open, + bool sync_state); + +/** + * qemu_chr_fe_set_handlers: + * + * Version of qemu_chr_fe_set_handlers_full() with sync_state = true. + */ +void qemu_chr_fe_set_handlers(CharBackend *b, + IOCanReadHandler *fd_can_read, + IOReadHandler *fd_read, + IOEventHandler *fd_event, + BackendChangeHandler *be_change, + void *opaque, + GMainContext *context, + bool set_open); + +/** + * qemu_chr_fe_take_focus: + * + * Take the focus (if the front end is muxed). + * + * Without associated Chardev, nothing is changed. + */ +void qemu_chr_fe_take_focus(CharBackend *b); + +/** + * qemu_chr_fe_accept_input: + * + * Notify that the frontend is ready to receive data + */ +void qemu_chr_fe_accept_input(CharBackend *be); + +/** + * qemu_chr_fe_disconnect: + * + * Close a fd accepted by character backend. + * Without associated Chardev, do nothing. + */ +void qemu_chr_fe_disconnect(CharBackend *be); + +/** + * qemu_chr_fe_wait_connected: + * + * Wait for characted backend to be connected, return < 0 on error or + * if no associated Chardev. + */ +int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp); + +/** + * qemu_chr_fe_set_echo: + * @echo: true to enable echo, false to disable echo + * + * Ask the backend to override its normal echo setting. This only really + * applies to the stdio backend and is used by the QMP server such that you + * can see what you type if you try to type QMP commands. + * Without associated Chardev, do nothing. + */ +void qemu_chr_fe_set_echo(CharBackend *be, bool echo); + +/** + * qemu_chr_fe_set_open: + * + * Set character frontend open status. This is an indication that the + * front end is ready (or not) to begin doing I/O. + * Without associated Chardev, do nothing. + */ +void qemu_chr_fe_set_open(CharBackend *be, int fe_open); + +/** + * qemu_chr_fe_printf: + * @fmt: see #printf + * + * Write to a character backend using a printf style interface. This + * function is thread-safe. It does nothing without associated + * Chardev. + */ +void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/** + * qemu_chr_fe_add_watch: + * @cond: the condition to poll for + * @func: the function to call when the condition happens + * @user_data: the opaque pointer to pass to @func + * + * If the backend is connected, create and add a #GSource that fires + * when the given condition (typically G_IO_OUT|G_IO_HUP or G_IO_HUP) + * is active; return the #GSource's tag. If it is disconnected, + * or without associated Chardev, return 0. + * + * Note that you are responsible to update the front-end sources if + * you are switching the main context with qemu_chr_fe_set_handlers(). + * + * Returns: the source tag + */ +guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond, + GIOFunc func, void *user_data); + +/** + * qemu_chr_fe_write: + * @buf: the data + * @len: the number of bytes to send + * + * Write data to a character backend from the front end. This function + * will send data from the front end to the back end. This function + * is thread-safe. + * + * Returns: the number of bytes consumed (0 if no associated Chardev) + */ +int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len); + +/** + * qemu_chr_fe_write_all: + * @buf: the data + * @len: the number of bytes to send + * + * Write data to a character backend from the front end. This function will + * send data from the front end to the back end. Unlike @qemu_chr_fe_write, + * this function will block if the back end cannot consume all of the data + * attempted to be written. This function is thread-safe. + * + * Returns: the number of bytes consumed (0 if no associated Chardev) + */ +int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len); + +/** + * qemu_chr_fe_read_all: + * @buf: the data buffer + * @len: the number of bytes to read + * + * Read data to a buffer from the back end. + * + * Returns: the number of bytes read (0 if no associated Chardev) + */ +int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len); + +/** + * qemu_chr_fe_ioctl: + * @cmd: see CHR_IOCTL_* + * @arg: the data associated with @cmd + * + * Issue a device specific ioctl to a backend. This function is thread-safe. + * + * Returns: if @cmd is not supported by the backend or there is no + * associated Chardev, -ENOTSUP, otherwise the return + * value depends on the semantics of @cmd + */ +int qemu_chr_fe_ioctl(CharBackend *be, int cmd, void *arg); + +/** + * qemu_chr_fe_get_msgfd: + * + * For backends capable of fd passing, return the latest file descriptor passed + * by a client. + * + * Returns: -1 if fd passing isn't supported or there is no pending file + * descriptor. If a file descriptor is returned, subsequent calls to + * this function will return -1 until a client sends a new file + * descriptor. + */ +int qemu_chr_fe_get_msgfd(CharBackend *be); + +/** + * qemu_chr_fe_get_msgfds: + * + * For backends capable of fd passing, return the number of file received + * descriptors and fills the fds array up to num elements + * + * Returns: -1 if fd passing isn't supported or there are no pending file + * descriptors. If file descriptors are returned, subsequent calls to + * this function will return -1 until a client sends a new set of file + * descriptors. + */ +int qemu_chr_fe_get_msgfds(CharBackend *be, int *fds, int num); + +/** + * qemu_chr_fe_set_msgfds: + * + * For backends capable of fd passing, set an array of fds to be passed with + * the next send operation. + * A subsequent call to this function before calling a write function will + * result in overwriting the fd array with the new value without being send. + * Upon writing the message the fd array is freed. + * + * Returns: -1 if fd passing isn't supported or no associated Chardev. + */ +int qemu_chr_fe_set_msgfds(CharBackend *be, int *fds, int num); + +#endif /* QEMU_CHAR_FE_H */ diff --git a/include/chardev/char-io.h b/include/chardev/char-io.h new file mode 100644 index 000000000..ac379ea70 --- /dev/null +++ b/include/chardev/char-io.h @@ -0,0 +1,46 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_IO_H +#define CHAR_IO_H + +#include "io/channel.h" +#include "chardev/char.h" +#include "qemu/main-loop.h" + +/* Can only be used for read */ +GSource *io_add_watch_poll(Chardev *chr, + QIOChannel *ioc, + IOCanReadHandler *fd_can_read, + QIOChannelFunc fd_read, + gpointer user_data, + GMainContext *context); + +void remove_fd_in_watch(Chardev *chr); + +int io_channel_send(QIOChannel *ioc, const void *buf, size_t len); + +int io_channel_send_full(QIOChannel *ioc, const void *buf, size_t len, + int *fds, size_t nfds); + +#endif /* CHAR_IO_H */ diff --git a/include/chardev/char-parallel.h b/include/chardev/char-parallel.h new file mode 100644 index 000000000..c09751fd6 --- /dev/null +++ b/include/chardev/char-parallel.h @@ -0,0 +1,45 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_PARALLEL_H +#define CHAR_PARALLEL_H + +#include "chardev/char.h" + +#define CHR_IOCTL_PP_READ_DATA 3 +#define CHR_IOCTL_PP_WRITE_DATA 4 +#define CHR_IOCTL_PP_READ_CONTROL 5 +#define CHR_IOCTL_PP_WRITE_CONTROL 6 +#define CHR_IOCTL_PP_READ_STATUS 7 +#define CHR_IOCTL_PP_EPP_READ_ADDR 8 +#define CHR_IOCTL_PP_EPP_READ 9 +#define CHR_IOCTL_PP_EPP_WRITE_ADDR 10 +#define CHR_IOCTL_PP_EPP_WRITE 11 +#define CHR_IOCTL_PP_DATA_DIR 12 + +struct ParallelIOArg { + void *buffer; + int count; +}; + +#endif /* CHAR_PARALLEL_H */ diff --git a/include/chardev/char-serial.h b/include/chardev/char-serial.h new file mode 100644 index 000000000..ad6891b26 --- /dev/null +++ b/include/chardev/char-serial.h @@ -0,0 +1,49 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_SERIAL_H +#define CHAR_SERIAL_H + +#include "chardev/char.h" + +#define CHR_IOCTL_SERIAL_SET_PARAMS 1 +typedef struct { + int speed; + int parity; + int data_bits; + int stop_bits; +} QEMUSerialSetParams; + +#define CHR_IOCTL_SERIAL_SET_BREAK 2 + +#define CHR_IOCTL_SERIAL_SET_TIOCM 13 +#define CHR_IOCTL_SERIAL_GET_TIOCM 14 + +#define CHR_TIOCM_CTS 0x020 +#define CHR_TIOCM_CAR 0x040 +#define CHR_TIOCM_DSR 0x100 +#define CHR_TIOCM_RI 0x080 +#define CHR_TIOCM_DTR 0x002 +#define CHR_TIOCM_RTS 0x004 + +#endif diff --git a/include/chardev/char-win-stdio.h b/include/chardev/char-win-stdio.h new file mode 100644 index 000000000..d7314f734 --- /dev/null +++ b/include/chardev/char-win-stdio.h @@ -0,0 +1,29 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_WIN_STDIO_H +#define CHAR_WIN_STDIO_H + +#define TYPE_CHARDEV_WIN_STDIO "chardev-win-stdio" + +#endif /* CHAR_WIN_STDIO_H */ diff --git a/include/chardev/char-win.h b/include/chardev/char-win.h new file mode 100644 index 000000000..485521469 --- /dev/null +++ b/include/chardev/char-win.h @@ -0,0 +1,54 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef CHAR_WIN_H +#define CHAR_WIN_H + +#include "chardev/char.h" +#include "qom/object.h" + +struct WinChardev { + Chardev parent; + + bool keep_open; /* console do not close file */ + HANDLE file, hrecv, hsend; + OVERLAPPED orecv; + BOOL fpipe; + + /* Protected by the Chardev chr_write_lock. */ + OVERLAPPED osend; +}; +typedef struct WinChardev WinChardev; + +#define NSENDBUF 2048 +#define NRECVBUF 2048 + +#define TYPE_CHARDEV_WIN "chardev-win" +DECLARE_INSTANCE_CHECKER(WinChardev, WIN_CHARDEV, + TYPE_CHARDEV_WIN) + +void win_chr_set_file(Chardev *chr, HANDLE file, bool keep_open); +int win_chr_serial_init(Chardev *chr, const char *filename, Error **errp); +int win_chr_pipe_poll(void *opaque); + +#endif /* CHAR_WIN_H */ diff --git a/include/chardev/char.h b/include/chardev/char.h new file mode 100644 index 000000000..db42f0a8c --- /dev/null +++ b/include/chardev/char.h @@ -0,0 +1,289 @@ +#ifndef QEMU_CHAR_H +#define QEMU_CHAR_H + +#include "qapi/qapi-types-char.h" +#include "qemu/bitmap.h" +#include "qemu/thread.h" +#include "qom/object.h" + +#define IAC_EOR 239 +#define IAC_SE 240 +#define IAC_NOP 241 +#define IAC_BREAK 243 +#define IAC_IP 244 +#define IAC_SB 250 +#define IAC 255 + +/* character device */ +typedef struct CharBackend CharBackend; + +typedef enum { + CHR_EVENT_BREAK, /* serial break char */ + CHR_EVENT_OPENED, /* new connection established */ + CHR_EVENT_MUX_IN, /* mux-focus was set to this terminal */ + CHR_EVENT_MUX_OUT, /* mux-focus will move on */ + CHR_EVENT_CLOSED /* connection closed. NOTE: currently this event + * is only bound to the read port of the chardev. + * Normally the read port and write port of a + * chardev should be the same, but it can be + * different, e.g., for fd chardevs, when the two + * fds are different. So when we received the + * CLOSED event it's still possible that the out + * port is still open. TODO: we should only send + * the CLOSED event when both ports are closed. + */ +} QEMUChrEvent; + +#define CHR_READ_BUF_LEN 4096 + +typedef enum { + /* Whether the chardev peer is able to close and + * reopen the data channel, thus requiring support + * for qemu_chr_wait_connected() to wait for a + * valid connection */ + QEMU_CHAR_FEATURE_RECONNECTABLE, + /* Whether it is possible to send/recv file descriptors + * over the data channel */ + QEMU_CHAR_FEATURE_FD_PASS, + /* Whether replay or record mode is enabled */ + QEMU_CHAR_FEATURE_REPLAY, + /* Whether the gcontext can be changed after calling + * qemu_chr_be_update_read_handlers() */ + QEMU_CHAR_FEATURE_GCONTEXT, + + QEMU_CHAR_FEATURE_LAST, +} ChardevFeature; + +#define qemu_chr_replay(chr) qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_REPLAY) + +struct Chardev { + Object parent_obj; + + QemuMutex chr_write_lock; + CharBackend *be; + char *label; + char *filename; + int logfd; + int be_open; + GSource *gsource; + GMainContext *gcontext; + DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST); +}; + +/** + * qemu_chr_new_from_opts: + * @opts: see qemu-config.c for a list of valid options + * @context: the #GMainContext to be used at initialization time + * + * Create a new character backend from a QemuOpts list. + * + * Returns: on success: a new character backend + * otherwise: NULL; @errp specifies the error + * or left untouched in case of help option + */ +Chardev *qemu_chr_new_from_opts(QemuOpts *opts, + GMainContext *context, + Error **errp); + +/** + * qemu_chr_parse_common: + * @opts: the options that still need parsing + * @backend: a new backend + * + * Parse the common options available to all character backends. + */ +void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend); + +/** + * qemu_chr_parse_opts: + * + * Parse the options to the ChardevBackend struct. + * + * Returns: a new backend or NULL on error + */ +ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts, + Error **errp); + +/** + * qemu_chr_new: + * @label: the name of the backend + * @filename: the URI + * @context: the #GMainContext to be used at initialization time + * + * Create a new character backend from a URI. + * Do not implicitly initialize a monitor if the chardev is muxed. + * + * Returns: a new character backend + */ +Chardev *qemu_chr_new(const char *label, const char *filename, + GMainContext *context); + +/** + * qemu_chr_new_mux_mon: + * @label: the name of the backend + * @filename: the URI + * @context: the #GMainContext to be used at initialization time + * + * Create a new character backend from a URI. + * Implicitly initialize a monitor if the chardev is muxed. + * + * Returns: a new character backend + */ +Chardev *qemu_chr_new_mux_mon(const char *label, const char *filename, + GMainContext *context); + +/** +* qemu_chr_change: +* @opts: the new backend options + * + * Change an existing character backend + */ +void qemu_chr_change(QemuOpts *opts, Error **errp); + +/** + * qemu_chr_cleanup: + * + * Delete all chardevs (when leaving qemu) + */ +void qemu_chr_cleanup(void); + +/** + * qemu_chr_new_noreplay: + * @label: the name of the backend + * @filename: the URI + * @permit_mux_mon: if chardev is muxed, initialize a monitor + * @context: the #GMainContext to be used at initialization time + * + * Create a new character backend from a URI. + * Character device communications are not written + * into the replay log. + * + * Returns: a new character backend + */ +Chardev *qemu_chr_new_noreplay(const char *label, const char *filename, + bool permit_mux_mon, GMainContext *context); + +/** + * qemu_chr_be_can_write: + * + * Determine how much data the front end can currently accept. This function + * returns the number of bytes the front end can accept. If it returns 0, the + * front end cannot receive data at the moment. The function must be polled + * to determine when data can be received. + * + * Returns: the number of bytes the front end can receive via @qemu_chr_be_write + */ +int qemu_chr_be_can_write(Chardev *s); + +/** + * qemu_chr_be_write: + * @buf: a buffer to receive data from the front end + * @len: the number of bytes to receive from the front end + * + * Write data from the back end to the front end. Before issuing this call, + * the caller should call @qemu_chr_be_can_write to determine how much data + * the front end can currently accept. + */ +void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len); + +/** + * qemu_chr_be_write_impl: + * @buf: a buffer to receive data from the front end + * @len: the number of bytes to receive from the front end + * + * Implementation of back end writing. Used by replay module. + */ +void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len); + +/** + * qemu_chr_be_update_read_handlers: + * @context: the gcontext that will be used to attach the watch sources + * + * Invoked when frontend read handlers are setup + */ +void qemu_chr_be_update_read_handlers(Chardev *s, + GMainContext *context); + +/** + * qemu_chr_be_event: + * @event: the event to send + * + * Send an event from the back end to the front end. + */ +void qemu_chr_be_event(Chardev *s, QEMUChrEvent event); + +int qemu_chr_add_client(Chardev *s, int fd); +Chardev *qemu_chr_find(const char *name); + +bool qemu_chr_has_feature(Chardev *chr, + ChardevFeature feature); +void qemu_chr_set_feature(Chardev *chr, + ChardevFeature feature); +QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename, + bool permit_mux_mon); +int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all); +#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true) +int qemu_chr_wait_connected(Chardev *chr, Error **errp); + +#define TYPE_CHARDEV "chardev" +OBJECT_DECLARE_TYPE(Chardev, ChardevClass, CHARDEV) + +#define TYPE_CHARDEV_NULL "chardev-null" +#define TYPE_CHARDEV_MUX "chardev-mux" +#define TYPE_CHARDEV_RINGBUF "chardev-ringbuf" +#define TYPE_CHARDEV_PTY "chardev-pty" +#define TYPE_CHARDEV_CONSOLE "chardev-console" +#define TYPE_CHARDEV_STDIO "chardev-stdio" +#define TYPE_CHARDEV_PIPE "chardev-pipe" +#define TYPE_CHARDEV_MEMORY "chardev-memory" +#define TYPE_CHARDEV_PARALLEL "chardev-parallel" +#define TYPE_CHARDEV_FILE "chardev-file" +#define TYPE_CHARDEV_SERIAL "chardev-serial" +#define TYPE_CHARDEV_SOCKET "chardev-socket" +#define TYPE_CHARDEV_UDP "chardev-udp" + +#define CHARDEV_IS_RINGBUF(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_RINGBUF) +#define CHARDEV_IS_PTY(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_PTY) + +struct ChardevClass { + ObjectClass parent_class; + + bool internal; /* TODO: eventually use TYPE_USER_CREATABLE */ + void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp); + + void (*open)(Chardev *chr, ChardevBackend *backend, + bool *be_opened, Error **errp); + + int (*chr_write)(Chardev *s, const uint8_t *buf, int len); + int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len); + GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond); + void (*chr_update_read_handler)(Chardev *s); + int (*chr_ioctl)(Chardev *s, int cmd, void *arg); + int (*get_msgfds)(Chardev *s, int* fds, int num); + int (*set_msgfds)(Chardev *s, int *fds, int num); + int (*chr_add_client)(Chardev *chr, int fd); + int (*chr_wait_connected)(Chardev *chr, Error **errp); + void (*chr_disconnect)(Chardev *chr); + void (*chr_accept_input)(Chardev *chr); + void (*chr_set_echo)(Chardev *chr, bool echo); + void (*chr_set_fe_open)(Chardev *chr, int fe_open); + void (*chr_be_event)(Chardev *s, QEMUChrEvent event); + /* Return 0 if succeeded, 1 if failed */ + int (*chr_machine_done)(Chardev *chr); +}; + +Chardev *qemu_chardev_new(const char *id, const char *typename, + ChardevBackend *backend, GMainContext *context, + Error **errp); + +extern int term_escape_char; + +GSource *qemu_chr_timeout_add_ms(Chardev *chr, guint ms, + GSourceFunc func, void *private); + +/* console.c */ +void qemu_chr_parse_vc(QemuOpts *opts, ChardevBackend *backend, Error **errp); + +#endif diff --git a/include/chardev/spice.h b/include/chardev/spice.h new file mode 100644 index 000000000..58e5b727e --- /dev/null +++ b/include/chardev/spice.h @@ -0,0 +1,26 @@ +#ifndef CHARDEV_SPICE_H +#define CHARDEV_SPICE_H + +#include +#include "chardev/char-fe.h" +#include "qom/object.h" + +struct SpiceChardev { + Chardev parent; + + SpiceCharDeviceInstance sin; + bool active; + bool blocked; + const uint8_t *datapos; + int datalen; +}; +typedef struct SpiceChardev SpiceChardev; + +#define TYPE_CHARDEV_SPICE "chardev-spice" +#define TYPE_CHARDEV_SPICEVMC "chardev-spicevmc" +#define TYPE_CHARDEV_SPICEPORT "chardev-spiceport" + +DECLARE_INSTANCE_CHECKER(SpiceChardev, SPICE_CHARDEV, + TYPE_CHARDEV_SPICE) + +#endif diff --git a/include/crypto/aes.h b/include/crypto/aes.h new file mode 100644 index 000000000..ba297d6a7 --- /dev/null +++ b/include/crypto/aes.h @@ -0,0 +1,63 @@ +#ifndef QEMU_AES_H +#define QEMU_AES_H + +#define AES_MAXNR 14 +#define AES_BLOCK_SIZE 16 + +struct aes_key_st { + uint32_t rd_key[4 *(AES_MAXNR + 1)]; + int rounds; +}; +typedef struct aes_key_st AES_KEY; + +/* FreeBSD/OpenSSL have their own AES functions with the same names in -lcrypto + * (which might be pulled in via curl), so redefine to avoid conflicts. */ +#define AES_set_encrypt_key QEMU_AES_set_encrypt_key +#define AES_set_decrypt_key QEMU_AES_set_decrypt_key +#define AES_encrypt QEMU_AES_encrypt +#define AES_decrypt QEMU_AES_decrypt + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); + +extern const uint8_t AES_sbox[256]; +extern const uint8_t AES_isbox[256]; + +/* AES ShiftRows and InvShiftRows */ +extern const uint8_t AES_shifts[16]; +extern const uint8_t AES_ishifts[16]; + +/* AES InvMixColumns */ +/* AES_imc[x][0] = [x].[0e, 09, 0d, 0b]; */ +/* AES_imc[x][1] = [x].[0b, 0e, 09, 0d]; */ +/* AES_imc[x][2] = [x].[0d, 0b, 0e, 09]; */ +/* AES_imc[x][3] = [x].[09, 0d, 0b, 0e]; */ +extern const uint32_t AES_imc[256][4]; + +/* +AES_Te0[x] = S [x].[02, 01, 01, 03]; +AES_Te1[x] = S [x].[03, 02, 01, 01]; +AES_Te2[x] = S [x].[01, 03, 02, 01]; +AES_Te3[x] = S [x].[01, 01, 03, 02]; +AES_Te4[x] = S [x].[01, 01, 01, 01]; + +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; +AES_Td1[x] = Si[x].[0b, 0e, 09, 0d]; +AES_Td2[x] = Si[x].[0d, 0b, 0e, 09]; +AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; +AES_Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256], + AES_Te3[256], AES_Te4[256]; +extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256], + AES_Td3[256], AES_Td4[256]; + +#endif diff --git a/include/crypto/afsplit.h b/include/crypto/afsplit.h new file mode 100644 index 000000000..4894d6433 --- /dev/null +++ b/include/crypto/afsplit.h @@ -0,0 +1,134 @@ +/* + * QEMU Crypto anti forensic information splitter + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QCRYPTO_AFSPLIT_H +#define QCRYPTO_AFSPLIT_H + +#include "crypto/hash.h" + +/** + * This module implements the anti-forensic splitter that is specified + * as part of the LUKS format: + * + * http://clemens.endorphin.org/cryptography + * http://clemens.endorphin.org/TKS1-draft.pdf + * + * The core idea is to take a short piece of data (key material) + * and process it to expand it to a much larger piece of data. + * The expansion process is reversible, to obtain the original + * short data. The key property of the expansion is that if any + * byte in the larger data set is changed / missing, it should be + * impossible to recreate the original short data. + * + * + * Creating a large split key for storage + * + * size_t nkey = 32; + * uint32_t stripes = 32768; // To produce a 1 MB split key + * uint8_t *masterkey = ....a 32-byte AES key... + * uint8_t *splitkey; + * + * splitkey = g_new0(uint8_t, nkey * stripes); + * + * if (qcrypto_afsplit_encode(QCRYPTO_HASH_ALG_SHA256, + * nkey, stripes, + * masterkey, splitkey, errp) < 0) { + * g_free(splitkey); + * g_free(masterkey); + * return -1; + * } + * + * ...store splitkey somewhere... + * + * g_free(splitkey); + * g_free(masterkey); + * + * + * + * + * Retrieving a master key from storage + * + * size_t nkey = 32; + * uint32_t stripes = 32768; // To produce a 1 MB split key + * uint8_t *masterkey; + * uint8_t *splitkey = .... read in 1 MB of data... + * + * masterkey = g_new0(uint8_t, nkey); + * + * if (qcrypto_afsplit_decode(QCRYPTO_HASH_ALG_SHA256, + * nkey, stripes, + * splitkey, masterkey, errp) < 0) { + * g_free(splitkey); + * g_free(masterkey); + * return -1; + * } + * + * ..decrypt data with masterkey... + * + * g_free(splitkey); + * g_free(masterkey); + * + * + */ + +/** + * qcrypto_afsplit_encode: + * @hash: the hash algorithm to use for data expansion + * @blocklen: the size of @in in bytes + * @stripes: the number of times to expand @in in size + * @in: the master key to be expanded in size + * @out: preallocated buffer to hold the split key + * @errp: pointer to a NULL-initialized error object + * + * Split the data in @in, which is @blocklen bytes in + * size, to form a larger piece of data @out, which is + * @blocklen * @stripes bytes in size. + * + * Returns: 0 on success, -1 on error; + */ +int qcrypto_afsplit_encode(QCryptoHashAlgorithm hash, + size_t blocklen, + uint32_t stripes, + const uint8_t *in, + uint8_t *out, + Error **errp); + +/** + * qcrypto_afsplit_decode: + * @hash: the hash algorithm to use for data compression + * @blocklen: the size of @out in bytes + * @stripes: the number of times to decrease @in in size + * @in: the split key to be recombined + * @out: preallocated buffer to hold the master key + * @errp: pointer to a NULL-initialized error object + * + * Join the data in @in, which is @blocklen * @stripes + * bytes in size, to form the original small piece of + * data @out, which is @blocklen bytes in size. + * + * Returns: 0 on success, -1 on error; + */ +int qcrypto_afsplit_decode(QCryptoHashAlgorithm hash, + size_t blocklen, + uint32_t stripes, + const uint8_t *in, + uint8_t *out, + Error **errp); + +#endif /* QCRYPTO_AFSPLIT_H */ diff --git a/include/crypto/block.h b/include/crypto/block.h new file mode 100644 index 000000000..7a65e8e40 --- /dev/null +++ b/include/crypto/block.h @@ -0,0 +1,315 @@ +/* + * QEMU Crypto block device encryption + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_BLOCK_H +#define QCRYPTO_BLOCK_H + +#include "crypto/cipher.h" +#include "crypto/ivgen.h" + +typedef struct QCryptoBlock QCryptoBlock; + +/* See also QCryptoBlockFormat, QCryptoBlockCreateOptions + * and QCryptoBlockOpenOptions in qapi/crypto.json */ + +typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block, + size_t offset, + uint8_t *buf, + size_t buflen, + void *opaque, + Error **errp); + +typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block, + size_t headerlen, + void *opaque, + Error **errp); + +typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block, + size_t offset, + const uint8_t *buf, + size_t buflen, + void *opaque, + Error **errp); + +/** + * qcrypto_block_has_format: + * @format: the encryption format + * @buf: the data from head of the volume + * @len: the length of @buf in bytes + * + * Given @len bytes of data from the head of a storage volume + * in @buf, probe to determine if the volume has the encryption + * format specified in @format. + * + * Returns: true if the data in @buf matches @format + */ +bool qcrypto_block_has_format(QCryptoBlockFormat format, + const uint8_t *buf, + size_t buflen); + +typedef enum { + QCRYPTO_BLOCK_OPEN_NO_IO = (1 << 0), +} QCryptoBlockOpenFlags; + +/** + * qcrypto_block_open: + * @options: the encryption options + * @optprefix: name prefix for options + * @readfunc: callback for reading data from the volume + * @opaque: data to pass to @readfunc + * @flags: bitmask of QCryptoBlockOpenFlags values + * @n_threads: allow concurrent I/O from up to @n_threads threads + * @errp: pointer to a NULL-initialized error object + * + * Create a new block encryption object for an existing + * storage volume encrypted with format identified by + * the parameters in @options. + * + * This will use @readfunc to initialize the encryption + * context based on the volume header(s), extracting the + * master key(s) as required. + * + * If @flags contains QCRYPTO_BLOCK_OPEN_NO_IO then + * the open process will be optimized to skip any parts + * that are only required to perform I/O. In particular + * this would usually avoid the need to decrypt any + * master keys. The only thing that can be done with + * the resulting QCryptoBlock object would be to query + * metadata such as the payload offset. There will be + * no cipher or ivgen objects available. + * + * If any part of initializing the encryption context + * fails an error will be returned. This could be due + * to the volume being in the wrong format, a cipher + * or IV generator algorithm that is not supported, + * or incorrect passphrases. + * + * Returns: a block encryption format, or NULL on error + */ +QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + const char *optprefix, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, + size_t n_threads, + Error **errp); + +/** + * qcrypto_block_create: + * @options: the encryption options + * @optprefix: name prefix for options + * @initfunc: callback for initializing volume header + * @writefunc: callback for writing data to the volume header + * @opaque: data to pass to @initfunc and @writefunc + * @errp: pointer to a NULL-initialized error object + * + * Create a new block encryption object for initializing + * a storage volume to be encrypted with format identified + * by the parameters in @options. + * + * This method will allocate space for a new volume header + * using @initfunc and then write header data using @writefunc, + * generating new master keys, etc as required. Any existing + * data present on the volume will be irrevocably destroyed. + * + * If any part of initializing the encryption context + * fails an error will be returned. This could be due + * to the volume being in the wrong format, a cipher + * or IV generator algorithm that is not supported, + * or incorrect passphrases. + * + * Returns: a block encryption format, or NULL on error + */ +QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + const char *optprefix, + QCryptoBlockInitFunc initfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + Error **errp); + +/** + * qcrypto_block_amend_options: + * @block: the block encryption object + * + * @readfunc: callback for reading data from the volume header + * @writefunc: callback for writing data to the volume header + * @opaque: data to pass to @readfunc and @writefunc + * @options: the new/amended encryption options + * @force: hint for the driver to allow unsafe operation + * @errp: error pointer + * + * Changes the crypto options of the encryption format + * + */ +int qcrypto_block_amend_options(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + QCryptoBlockWriteFunc writefunc, + void *opaque, + QCryptoBlockAmendOptions *options, + bool force, + Error **errp); + + +/** + * qcrypto_block_calculate_payload_offset: + * @create_opts: the encryption options + * @optprefix: name prefix for options + * @len: output for number of header bytes before payload + * @errp: pointer to a NULL-initialized error object + * + * Calculate the number of header bytes before the payload in an encrypted + * storage volume. The header is an area before the payload that is reserved + * for encryption metadata. + * + * Returns: true on success, false on error + */ +bool +qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts, + const char *optprefix, + size_t *len, + Error **errp); + + +/** + * qcrypto_block_get_info: + * @block: the block encryption object + * @errp: pointer to a NULL-initialized error object + * + * Get information about the configuration options for the + * block encryption object. This includes details such as + * the cipher algorithms, modes, and initialization vector + * generators. + * + * Returns: a block encryption info object, or NULL on error + */ +QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, + Error **errp); + +/** + * @qcrypto_block_decrypt: + * @block: the block encryption object + * @offset: the position at which @iov was read + * @buf: the buffer to decrypt + * @len: the length of @buf in bytes + * @errp: pointer to a NULL-initialized error object + * + * Decrypt @len bytes of cipher text in @buf, writing + * plain text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. + * + * Returns 0 on success, -1 on failure + */ +int qcrypto_block_decrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +/** + * @qcrypto_block_encrypt: + * @block: the block encryption object + * @offset: the position at which @iov will be written + * @buf: the buffer to decrypt + * @len: the length of @buf in bytes + * @errp: pointer to a NULL-initialized error object + * + * Encrypt @len bytes of plain text in @buf, writing + * cipher text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. + * + * Returns 0 on success, -1 on failure + */ +int qcrypto_block_encrypt(QCryptoBlock *block, + uint64_t offset, + uint8_t *buf, + size_t len, + Error **errp); + +/** + * qcrypto_block_get_cipher: + * @block: the block encryption object + * + * Get the cipher to use for payload encryption + * + * Returns: the cipher object + */ +QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block); + +/** + * qcrypto_block_get_ivgen: + * @block: the block encryption object + * + * Get the initialization vector generator to use for + * payload encryption + * + * Returns: the IV generator object + */ +QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block); + + +/** + * qcrypto_block_get_kdf_hash: + * @block: the block encryption object + * + * Get the hash algorithm used with the key derivation + * function + * + * Returns: the hash algorithm + */ +QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block); + +/** + * qcrypto_block_get_payload_offset: + * @block: the block encryption object + * + * Get the offset to the payload indicated by the + * encryption header, in bytes. + * + * Returns: the payload offset in bytes + */ +uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block); + +/** + * qcrypto_block_get_sector_size: + * @block: the block encryption object + * + * Get the size of sectors used for payload encryption. A new + * IV is used at the start of each sector. The encryption + * sector size is not required to match the sector size of the + * underlying storage. For example LUKS will always use a 512 + * byte sector size, even if the volume is on a disk with 4k + * sectors. + * + * Returns: the sector in bytes + */ +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block); + +/** + * qcrypto_block_free: + * @block: the block encryption object + * + * Release all resources associated with the encryption + * object + */ +void qcrypto_block_free(QCryptoBlock *block); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlock, qcrypto_block_free) + +#endif /* QCRYPTO_BLOCK_H */ diff --git a/include/crypto/cipher.h b/include/crypto/cipher.h new file mode 100644 index 000000000..083e12a7d --- /dev/null +++ b/include/crypto/cipher.h @@ -0,0 +1,238 @@ +/* + * QEMU Crypto cipher algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_CIPHER_H +#define QCRYPTO_CIPHER_H + +#include "qapi/qapi-types-crypto.h" + +typedef struct QCryptoCipher QCryptoCipher; +typedef struct QCryptoCipherDriver QCryptoCipherDriver; + +/* See also "QCryptoCipherAlgorithm" and "QCryptoCipherMode" + * enums defined in qapi/crypto.json */ + +/** + * QCryptoCipher: + * + * The QCryptoCipher object provides a way to perform encryption + * and decryption of data, with a standard API, regardless of the + * algorithm used. It further isolates the calling code from the + * details of the specific underlying implementation, whether + * built-in, libgcrypt or nettle. + * + * Each QCryptoCipher object is capable of performing both + * encryption and decryption, and can operate in a number + * or modes including ECB, CBC. + * + * + * Encrypting data with AES-128 in CBC mode + * + * QCryptoCipher *cipher; + * uint8_t key = ....; + * size_t keylen = 16; + * uint8_t iv = ....; + * + * if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) { + * error_report(errp, "Feature requires AES cipher support"); + * return -1; + * } + * + * cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128, + * QCRYPTO_CIPHER_MODE_CBC, + * key, keylen, + * errp); + * if (!cipher) { + * return -1; + * } + * + * if (qcrypto_cipher_set_iv(cipher, iv, keylen, errp) < 0) { + * return -1; + * } + * + * if (qcrypto_cipher_encrypt(cipher, rawdata, encdata, datalen, errp) < 0) { + * return -1; + * } + * + * qcrypto_cipher_free(cipher); + * + * + * + */ + +struct QCryptoCipher { + QCryptoCipherAlgorithm alg; + QCryptoCipherMode mode; + const QCryptoCipherDriver *driver; +}; + +/** + * qcrypto_cipher_supports: + * @alg: the cipher algorithm + * @mode: the cipher mode + * + * Determine if @alg cipher algorithm in @mode is supported by the + * current configured build + * + * Returns: true if the algorithm is supported, false otherwise + */ +bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode); + +/** + * qcrypto_cipher_get_block_len: + * @alg: the cipher algorithm + * + * Get the required data block size in bytes. When + * encrypting data, it must be a multiple of the + * block size. + * + * Returns: the block size in bytes + */ +size_t qcrypto_cipher_get_block_len(QCryptoCipherAlgorithm alg); + + +/** + * qcrypto_cipher_get_key_len: + * @alg: the cipher algorithm + * + * Get the required key size in bytes. + * + * Returns: the key size in bytes + */ +size_t qcrypto_cipher_get_key_len(QCryptoCipherAlgorithm alg); + + +/** + * qcrypto_cipher_get_iv_len: + * @alg: the cipher algorithm + * @mode: the cipher mode + * + * Get the required initialization vector size + * in bytes, if one is required. + * + * Returns: the IV size in bytes, or 0 if no IV is permitted + */ +size_t qcrypto_cipher_get_iv_len(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode); + + +/** + * qcrypto_cipher_new: + * @alg: the cipher algorithm + * @mode: the cipher usage mode + * @key: the private key bytes + * @nkey: the length of @key + * @errp: pointer to a NULL-initialized error object + * + * Creates a new cipher object for encrypting/decrypting + * data with the algorithm @alg in the usage mode @mode. + * + * The @key parameter provides the bytes representing + * the encryption/decryption key to use. The @nkey parameter + * specifies the length of @key in bytes. Each algorithm has + * one or more valid key lengths, and it is an error to provide + * a key of the incorrect length. + * + * The returned cipher object must be released with + * qcrypto_cipher_free() when no longer required + * + * Returns: a new cipher object, or NULL on error + */ +QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, + Error **errp); + +/** + * qcrypto_cipher_free: + * @cipher: the cipher object + * + * Release the memory associated with @cipher that + * was previously allocated by qcrypto_cipher_new() + */ +void qcrypto_cipher_free(QCryptoCipher *cipher); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoCipher, qcrypto_cipher_free) + +/** + * qcrypto_cipher_encrypt: + * @cipher: the cipher object + * @in: buffer holding the plain text input data + * @out: buffer to fill with the cipher text output data + * @len: the length of @in and @out buffers + * @errp: pointer to a NULL-initialized error object + * + * Encrypts the plain text stored in @in, filling + * @out with the resulting ciphered text. Both the + * @in and @out buffers must have the same size, + * given by @len. + * + * Returns: 0 on success, or -1 on error + */ +int qcrypto_cipher_encrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp); + + +/** + * qcrypto_cipher_decrypt: + * @cipher: the cipher object + * @in: buffer holding the cipher text input data + * @out: buffer to fill with the plain text output data + * @len: the length of @in and @out buffers + * @errp: pointer to a NULL-initialized error object + * + * Decrypts the cipher text stored in @in, filling + * @out with the resulting plain text. Both the + * @in and @out buffers must have the same size, + * given by @len. + * + * Returns: 0 on success, or -1 on error + */ +int qcrypto_cipher_decrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp); + +/** + * qcrypto_cipher_setiv: + * @cipher: the cipher object + * @iv: the initialization vector or counter (CTR mode) bytes + * @niv: the length of @iv + * @errpr: pointer to a NULL-initialized error object + * + * If the @cipher object is setup to use a mode that requires + * initialization vectors or counter, this sets the @niv + * bytes. The @iv data should have the same length as the + * cipher key used when originally constructing the cipher + * object. It is an error to set an initialization vector + * or counter if the cipher mode does not require one. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_cipher_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp); + +#endif /* QCRYPTO_CIPHER_H */ diff --git a/include/crypto/desrfb.h b/include/crypto/desrfb.h new file mode 100644 index 000000000..7ca596c38 --- /dev/null +++ b/include/crypto/desrfb.h @@ -0,0 +1,50 @@ +/* + * This is D3DES (V5.09) by Richard Outerbridge with the double and + * triple-length support removed for use in VNC. + * + * These changes are: + * Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef QCRYPTO_DESRFB_H +#define QCRYPTO_DESRFB_H + +/* d3des.h - + * + * Headers and defines for d3des.c + * Graven Imagery, 1992. + * + * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge + * (GEnie : OUTER; CIS : [71755,204]) + */ + +#define EN0 0 /* MODE == encrypt */ +#define DE1 1 /* MODE == decrypt */ + +void deskey(unsigned char *, int); +/* hexkey[8] MODE + * Sets the internal key register according to the hexadecimal + * key contained in the 8 bytes of hexkey, according to the DES, + * for encryption or decryption according to MODE. + */ + +void usekey(unsigned long *); +/* cookedkey[32] + * Loads the internal key register with the data in cookedkey. + */ + +void des(unsigned char *, unsigned char *); +/* from[8] to[8] + * Encrypts/Decrypts (according to the key currently loaded in the + * internal key register) one block of eight bytes at address 'from' + * into the block at address 'to'. They can be the same. + */ + +/* d3des.h V5.09 rwo 9208.04 15:06 Graven Imagery + ********************************************************************/ + +#endif diff --git a/include/crypto/hash.h b/include/crypto/hash.h new file mode 100644 index 000000000..54d87aa2a --- /dev/null +++ b/include/crypto/hash.h @@ -0,0 +1,192 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_HASH_H +#define QCRYPTO_HASH_H + +#include "qapi/qapi-types-crypto.h" + +/* See also "QCryptoHashAlgorithm" defined in qapi/crypto.json */ + +/** + * qcrypto_hash_supports: + * @alg: the hash algorithm + * + * Determine if @alg hash algorithm is supported by the + * current configured build. + * + * Returns: true if the algorithm is supported, false otherwise + */ +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg); + + +/** + * qcrypto_hash_digest_len: + * @alg: the hash algorithm + * + * Determine the size of the hash digest in bytes + * + * Returns: the digest length in bytes + */ +size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg); + +/** + * qcrypto_hash_bytesv: + * @alg: the hash algorithm + * @iov: the array of memory regions to hash + * @niov: the length of @iov + * @result: pointer to hold output hash + * @resultlen: pointer to hold length of @result + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory regions + * present in @iov. The @result pointer will be + * filled with raw bytes representing the computed + * hash, which will have length @resultlen. The + * memory pointer in @result must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp); + +/** + * qcrypto_hash_bytes: + * @alg: the hash algorithm + * @buf: the memory region to hash + * @len: the length of @buf + * @result: pointer to hold output hash + * @resultlen: pointer to hold length of @result + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory region + * @buf of length @len. The @result pointer will be + * filled with raw bytes representing the computed + * hash, which will have length @resultlen. The + * memory pointer in @result must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_bytes(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + uint8_t **result, + size_t *resultlen, + Error **errp); + +/** + * qcrypto_hash_digestv: + * @alg: the hash algorithm + * @iov: the array of memory regions to hash + * @niov: the length of @iov + * @digest: pointer to hold output hash + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory regions + * present in @iov. The @digest pointer will be + * filled with the printable hex digest of the computed + * hash, which will be terminated by '\0'. The + * memory pointer in @digest must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_digestv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + char **digest, + Error **errp); + +/** + * qcrypto_hash_digest: + * @alg: the hash algorithm + * @buf: the memory region to hash + * @len: the length of @buf + * @digest: pointer to hold output hash + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory region + * @buf of length @len. The @digest pointer will be + * filled with the printable hex digest of the computed + * hash, which will be terminated by '\0'. The + * memory pointer in @digest must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_digest(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + char **digest, + Error **errp); + +/** + * qcrypto_hash_base64v: + * @alg: the hash algorithm + * @iov: the array of memory regions to hash + * @niov: the length of @iov + * @base64: pointer to hold output hash + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory regions + * present in @iov. The @base64 pointer will be + * filled with the base64 encoding of the computed + * hash, which will be terminated by '\0'. The + * memory pointer in @base64 must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_base64v(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + char **base64, + Error **errp); + +/** + * qcrypto_hash_base64: + * @alg: the hash algorithm + * @buf: the memory region to hash + * @len: the length of @buf + * @base64: pointer to hold output hash + * @errp: pointer to a NULL-initialized error object + * + * Computes the hash across all the memory region + * @buf of length @len. The @base64 pointer will be + * filled with the base64 encoding of the computed + * hash, which will be terminated by '\0'. The + * memory pointer in @base64 must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hash_base64(QCryptoHashAlgorithm alg, + const char *buf, + size_t len, + char **base64, + Error **errp); + +#endif /* QCRYPTO_HASH_H */ diff --git a/include/crypto/hmac.h b/include/crypto/hmac.h new file mode 100644 index 000000000..ad4d77841 --- /dev/null +++ b/include/crypto/hmac.h @@ -0,0 +1,169 @@ +/* + * QEMU Crypto hmac algorithms + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#ifndef QCRYPTO_HMAC_H +#define QCRYPTO_HMAC_H + +#include "qapi/qapi-types-crypto.h" + +typedef struct QCryptoHmac QCryptoHmac; +struct QCryptoHmac { + QCryptoHashAlgorithm alg; + void *opaque; + void *driver; +}; + +/** + * qcrypto_hmac_supports: + * @alg: the hmac algorithm + * + * Determine if @alg hmac algorithm is supported by + * the current configured build + * + * Returns: + * true if the algorithm is supported, false otherwise + */ +bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg); + +/** + * qcrypto_hmac_new: + * @alg: the hmac algorithm + * @key: the key bytes + * @nkey: the length of @key + * @errp: pointer to a NULL-initialized error object + * + * Creates a new hmac object with the algorithm @alg + * + * The @key parameter provides the bytes representing + * the secret key to use. The @nkey parameter specifies + * the length of @key in bytes + * + * Note: must use qcrypto_hmac_free() to release the + * returned hmac object when no longer required + * + * Returns: + * a new hmac object, or NULL on error + */ +QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp); + +/** + * qcrypto_hmac_free: + * @hmac: the hmac object + * + * Release the memory associated with @hmac that was + * previously allocated by qcrypto_hmac_new() + */ +void qcrypto_hmac_free(QCryptoHmac *hmac); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoHmac, qcrypto_hmac_free) + +/** + * qcrypto_hmac_bytesv: + * @hmac: the hmac object + * @iov: the array of memory regions to hmac + * @niov: the length of @iov + * @result: pointer to hold output hmac + * @resultlen: pointer to hold length of @result + * @errp: pointer to a NULL-initialized error object + * + * Computes the hmac across all the memory regions + * present in @iov. The @result pointer will be + * filled with raw bytes representing the computed + * hmac, which will have length @resultlen. The + * memory pointer in @result must be released + * with a call to g_free() when no longer required. + * + * Returns: + * 0 on success, -1 on error + */ +int qcrypto_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp); + +/** + * qcrypto_hmac_bytes: + * @hmac: the hmac object + * @buf: the memory region to hmac + * @len: the length of @buf + * @result: pointer to hold output hmac + * @resultlen: pointer to hold length of @result + * @errp: pointer to a NULL-initialized error object + * + * Computes the hmac across all the memory region + * @buf of length @len. The @result pointer will be + * filled with raw bytes representing the computed + * hmac, which will have length @resultlen. The + * memory pointer in @result must be released + * with a call to g_free() when no longer required. + * + * Returns: + * 0 on success, -1 on error + */ +int qcrypto_hmac_bytes(QCryptoHmac *hmac, + const char *buf, + size_t len, + uint8_t **result, + size_t *resultlen, + Error **errp); + +/** + * qcrypto_hmac_digestv: + * @hmac: the hmac object + * @iov: the array of memory regions to hmac + * @niov: the length of @iov + * @digest: pointer to hold output hmac + * @errp: pointer to a NULL-initialized error object + * + * Computes the hmac across all the memory regions + * present in @iov. The @digest pointer will be + * filled with the printable hex digest of the computed + * hmac, which will be terminated by '\0'. The + * memory pointer in @digest must be released + * with a call to g_free() when no longer required. + * + * Returns: + * 0 on success, -1 on error + */ +int qcrypto_hmac_digestv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + char **digest, + Error **errp); + +/** + * qcrypto_hmac_digest: + * @hmac: the hmac object + * @buf: the memory region to hmac + * @len: the length of @buf + * @digest: pointer to hold output hmac + * @errp: pointer to a NULL-initialized error object + * + * Computes the hmac across all the memory region + * @buf of length @len. The @digest pointer will be + * filled with the printable hex digest of the computed + * hmac, which will be terminated by '\0'. The + * memory pointer in @digest must be released + * with a call to g_free() when no longer required. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_hmac_digest(QCryptoHmac *hmac, + const char *buf, + size_t len, + char **digest, + Error **errp); + +#endif diff --git a/include/crypto/init.h b/include/crypto/init.h new file mode 100644 index 000000000..00e0f637c --- /dev/null +++ b/include/crypto/init.h @@ -0,0 +1,28 @@ +/* + * QEMU Crypto initialization + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_INIT_H +#define QCRYPTO_INIT_H + +#include "qapi/error.h" + +int qcrypto_init(Error **errp); + +#endif /* QCRYPTO_INIT_H */ diff --git a/include/crypto/ivgen.h b/include/crypto/ivgen.h new file mode 100644 index 000000000..e41521519 --- /dev/null +++ b/include/crypto/ivgen.h @@ -0,0 +1,208 @@ +/* + * QEMU Crypto block IV generator + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_IVGEN_H +#define QCRYPTO_IVGEN_H + +#include "crypto/cipher.h" +#include "crypto/hash.h" + +/** + * This module provides a framework for generating initialization + * vectors for block encryption schemes using chained cipher modes + * CBC. The principle is that each disk sector is assigned a unique + * initialization vector for use for encryption of data in that + * sector. + * + * + * Encrypting block data with initialiation vectors + * + * uint8_t *data = ....data to encrypt... + * size_t ndata = XXX; + * uint8_t *key = ....some encryption key... + * size_t nkey = XXX; + * uint8_t *iv; + * size_t niv; + * size_t sector = 0; + * + * g_assert((ndata % 512) == 0); + * + * QCryptoIVGen *ivgen = qcrypto_ivgen_new(QCRYPTO_IVGEN_ALG_ESSIV, + * QCRYPTO_CIPHER_ALG_AES_128, + * QCRYPTO_HASH_ALG_SHA256, + * key, nkey, errp); + * if (!ivgen) { + * return -1; + * } + * + * QCryptoCipher *cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128, + * QCRYPTO_CIPHER_MODE_CBC, + * key, nkey, errp); + * if (!cipher) { + * goto error; + * } + * + * niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128, + * QCRYPTO_CIPHER_MODE_CBC); + * iv = g_new0(uint8_t, niv); + * + * + * while (ndata) { + * if (qcrypto_ivgen_calculate(ivgen, sector, iv, niv, errp) < 0) { + * goto error; + * } + * if (qcrypto_cipher_setiv(cipher, iv, niv, errp) < 0) { + * goto error; + * } + * if (qcrypto_cipher_encrypt(cipher, + * data + (sector * 512), + * data + (sector * 512), + * 512, errp) < 0) { + * goto error; + * } + * sector++; + * ndata -= 512; + * } + * + * g_free(iv); + * qcrypto_ivgen_free(ivgen); + * qcrypto_cipher_free(cipher); + * return 0; + * + *error: + * g_free(iv); + * qcrypto_ivgen_free(ivgen); + * qcrypto_cipher_free(cipher); + * return -1; + * + * + */ + +typedef struct QCryptoIVGen QCryptoIVGen; + +/* See also QCryptoIVGenAlgorithm enum in qapi/crypto.json */ + + +/** + * qcrypto_ivgen_new: + * @alg: the initialization vector generation algorithm + * @cipheralg: the cipher algorithm or 0 + * @hash: the hash algorithm or 0 + * @key: the encryption key or NULL + * @nkey: the size of @key in bytes + * + * Create a new initialization vector generator that uses + * the algorithm @alg. Whether the remaining parameters + * are required or not depends on the choice of @alg + * requested. + * + * - QCRYPTO_IVGEN_ALG_PLAIN + * + * The IVs are generated by the 32-bit truncated sector + * number. This should never be used for block devices + * that are larger than 2^32 sectors in size. + * All the other parameters are unused. + * + * - QCRYPTO_IVGEN_ALG_PLAIN64 + * + * The IVs are generated by the 64-bit sector number. + * All the other parameters are unused. + * + * - QCRYPTO_IVGEN_ALG_ESSIV: + * + * The IVs are generated by encrypting the 64-bit sector + * number with a hash of an encryption key. The @cipheralg, + * @hash, @key and @nkey parameters are all required. + * + * Returns: a new IV generator, or NULL on error + */ +QCryptoIVGen *qcrypto_ivgen_new(QCryptoIVGenAlgorithm alg, + QCryptoCipherAlgorithm cipheralg, + QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + Error **errp); + +/** + * qcrypto_ivgen_calculate: + * @ivgen: the IV generator object + * @sector: the 64-bit sector number + * @iv: a pre-allocated buffer to hold the generated IV + * @niv: the number of bytes in @iv + * @errp: pointer to a NULL-initialized error object + * + * Calculate a new initialiation vector for the data + * to be stored in sector @sector. The IV will be + * written into the buffer @iv of size @niv. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_ivgen_calculate(QCryptoIVGen *ivgen, + uint64_t sector, + uint8_t *iv, size_t niv, + Error **errp); + + +/** + * qcrypto_ivgen_get_algorithm: + * @ivgen: the IV generator object + * + * Get the algorithm used by this IV generator + * + * Returns: the IV generator algorithm + */ +QCryptoIVGenAlgorithm qcrypto_ivgen_get_algorithm(QCryptoIVGen *ivgen); + + +/** + * qcrypto_ivgen_get_cipher: + * @ivgen: the IV generator object + * + * Get the cipher algorithm used by this IV generator (if + * applicable) + * + * Returns: the cipher algorithm + */ +QCryptoCipherAlgorithm qcrypto_ivgen_get_cipher(QCryptoIVGen *ivgen); + + +/** + * qcrypto_ivgen_get_hash: + * @ivgen: the IV generator object + * + * Get the hash algorithm used by this IV generator (if + * applicable) + * + * Returns: the hash algorithm + */ +QCryptoHashAlgorithm qcrypto_ivgen_get_hash(QCryptoIVGen *ivgen); + + +/** + * qcrypto_ivgen_free: + * @ivgen: the IV generator object + * + * Release all resources associated with @ivgen, or a no-op + * if @ivgen is NULL + */ +void qcrypto_ivgen_free(QCryptoIVGen *ivgen); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoIVGen, qcrypto_ivgen_free) + +#endif /* QCRYPTO_IVGEN_H */ diff --git a/include/crypto/pbkdf.h b/include/crypto/pbkdf.h new file mode 100644 index 000000000..2c31a44a2 --- /dev/null +++ b/include/crypto/pbkdf.h @@ -0,0 +1,156 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_PBKDF_H +#define QCRYPTO_PBKDF_H + +#include "crypto/hash.h" + +/** + * This module provides an interface to the PBKDF2 algorithm + * + * https://en.wikipedia.org/wiki/PBKDF2 + * + * + * Generating an AES encryption key from a user password + * + * #include "crypto/cipher.h" + * #include "crypto/random.h" + * #include "crypto/pbkdf.h" + * + * .... + * + * char *password = "a-typical-awful-user-password"; + * size_t nkey = qcrypto_cipher_get_key_len(QCRYPTO_CIPHER_ALG_AES_128); + * uint8_t *salt = g_new0(uint8_t, nkey); + * uint8_t *key = g_new0(uint8_t, nkey); + * int iterations; + * QCryptoCipher *cipher; + * + * if (qcrypto_random_bytes(salt, nkey, errp) < 0) { + * g_free(key); + * g_free(salt); + * return -1; + * } + * + * iterations = qcrypto_pbkdf2_count_iters(QCRYPTO_HASH_ALG_SHA256, + * (const uint8_t *)password, + * strlen(password), + * salt, nkey, errp); + * if (iterations < 0) { + * g_free(key); + * g_free(salt); + * return -1; + * } + * + * if (qcrypto_pbkdf2(QCRYPTO_HASH_ALG_SHA256, + * (const uint8_t *)password, strlen(password), + * salt, nkey, iterations, key, nkey, errp) < 0) { + * g_free(key); + * g_free(salt); + * return -1; + * } + * + * g_free(salt); + * + * cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128, + * QCRYPTO_CIPHER_MODE_ECB, + * key, nkey, errp); + * g_free(key); + * + * ....encrypt some data... + * + * qcrypto_cipher_free(cipher); + * + * + * + */ + +/** + * qcrypto_pbkdf2_supports: + * @hash: the hash algorithm + * + * Determine if the current build supports the PBKDF2 algorithm + * in combination with the hash @hash. + * + * Returns true if supported, false otherwise + */ +bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash); + + +/** + * qcrypto_pbkdf2: + * @hash: the hash algorithm to use + * @key: the user password / key + * @nkey: the length of @key in bytes + * @salt: a random salt + * @nsalt: length of @salt in bytes + * @iterations: the number of iterations to compute + * @out: pointer to pre-allocated buffer to hold output + * @nout: length of @out in bytes + * @errp: pointer to a NULL-initialized error object + * + * Apply the PBKDF2 algorithm to derive an encryption + * key from a user password provided in @key. The + * @salt parameter is used to perturb the algorithm. + * The @iterations count determines how many times + * the hashing process is run, which influences how + * hard it is to crack the key. The number of @iterations + * should be large enough such that the algorithm takes + * 1 second or longer to derive a key. The derived key + * will be stored in the preallocated buffer @out. + * + * Returns: 0 on success, -1 on error + */ +int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + uint64_t iterations, + uint8_t *out, size_t nout, + Error **errp); + +/** + * qcrypto_pbkdf2_count_iters: + * @hash: the hash algorithm to use + * @key: the user password / key + * @nkey: the length of @key in bytes + * @salt: a random salt + * @nsalt: length of @salt in bytes + * @nout: size of desired derived key + * @errp: pointer to a NULL-initialized error object + * + * Time the PBKDF2 algorithm to determine how many + * iterations are required to derive an encryption + * key from a user password provided in @key in 1 + * second of compute time. The result of this can + * be used as a the @iterations parameter of a later + * call to qcrypto_pbkdf2(). The value of @nout should + * match that value that will later be provided with + * a call to qcrypto_pbkdf2(). + * + * Returns: number of iterations in 1 second, -1 on error + */ +uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + size_t nout, + Error **errp); + +#endif /* QCRYPTO_PBKDF_H */ diff --git a/include/crypto/random.h b/include/crypto/random.h new file mode 100644 index 000000000..325ff075d --- /dev/null +++ b/include/crypto/random.h @@ -0,0 +1,50 @@ +/* + * QEMU Crypto random number provider + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_RANDOM_H +#define QCRYPTO_RANDOM_H + + +/** + * qcrypto_random_bytes: + * @buf: the buffer to fill + * @buflen: length of @buf in bytes + * @errp: pointer to a NULL-initialized error object + * + * Fill @buf with @buflen bytes of cryptographically strong + * random data + * + * Returns 0 on success, -1 on error + */ +int qcrypto_random_bytes(void *buf, + size_t buflen, + Error **errp); + +/** + * qcrypto_random_init: + * @errp: pointer to a NULL-initialized error object + * + * Initializes the handles used by qcrypto_random_bytes + * + * Returns 0 on success, -1 on error + */ +int qcrypto_random_init(Error **errp); + +#endif /* QCRYPTO_RANDOM_H */ diff --git a/include/crypto/secret.h b/include/crypto/secret.h new file mode 100644 index 000000000..5d20ae6d2 --- /dev/null +++ b/include/crypto/secret.h @@ -0,0 +1,133 @@ +/* + * QEMU crypto secret support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_SECRET_H +#define QCRYPTO_SECRET_H + +#include "qapi/qapi-types-crypto.h" +#include "qom/object.h" +#include "crypto/secret_common.h" + +#define TYPE_QCRYPTO_SECRET "secret" +typedef struct QCryptoSecret QCryptoSecret; +DECLARE_INSTANCE_CHECKER(QCryptoSecret, QCRYPTO_SECRET, + TYPE_QCRYPTO_SECRET) + +typedef struct QCryptoSecretClass QCryptoSecretClass; + +/** + * QCryptoSecret: + * + * The QCryptoSecret object provides storage of secrets, + * which may be user passwords, encryption keys or any + * other kind of sensitive data that is represented as + * a sequence of bytes. + * + * The sensitive data associated with the secret can + * be provided directly via the 'data' property, or + * indirectly via the 'file' property. In the latter + * case there is support for file descriptor passing + * via the usual /dev/fdset/NN syntax that QEMU uses. + * + * The data for a secret can be provided in two formats, + * either as a UTF-8 string (the default), or as base64 + * encoded 8-bit binary data. The latter is appropriate + * for raw encryption keys, while the former is appropriate + * for user entered passwords. + * + * The data may be optionally encrypted with AES-256-CBC, + * and the decryption key provided by another + * QCryptoSecret instance identified by the 'keyid' + * property. When passing sensitive data directly + * via the 'data' property it is strongly recommended + * to use the AES encryption facility to prevent the + * sensitive data being exposed in the process listing + * or system log files. + * + * Providing data directly, insecurely (suitable for + * ad hoc developer testing only) + * + * $QEMU -object secret,id=sec0,data=letmein + * + * Providing data indirectly: + * + * # printf "letmein" > password.txt + * # $QEMU \ + * -object secret,id=sec0,file=password.txt + * + * Using a master encryption key with data. + * + * The master key needs to be created as 32 secure + * random bytes (optionally base64 encoded) + * + * # openssl rand -base64 32 > key.b64 + * # KEY=$(base64 -d key.b64 | hexdump -v -e '/1 "%02X"') + * + * Each secret to be encrypted needs to have a random + * initialization vector generated. These do not need + * to be kept secret + * + * # openssl rand -base64 16 > iv.b64 + * # IV=$(base64 -d iv.b64 | hexdump -v -e '/1 "%02X"') + * + * A secret to be defined can now be encrypted + * + * # SECRET=$(printf "letmein" | + * openssl enc -aes-256-cbc -a -K $KEY -iv $IV) + * + * When launching QEMU, create a master secret pointing + * to key.b64 and specify that to be used to decrypt + * the user password + * + * # $QEMU \ + * -object secret,id=secmaster0,format=base64,file=key.b64 \ + * -object secret,id=sec0,keyid=secmaster0,format=base64,\ + * data=$SECRET,iv=$(. + * + */ + +#ifndef QCRYPTO_SECRET_COMMON_H +#define QCRYPTO_SECRET_COMMON_H + +#include "qapi/qapi-types-crypto.h" +#include "qom/object.h" + +#define TYPE_QCRYPTO_SECRET_COMMON "secret_common" +OBJECT_DECLARE_TYPE(QCryptoSecretCommon, QCryptoSecretCommonClass, + QCRYPTO_SECRET_COMMON) + + +struct QCryptoSecretCommon { + Object parent_obj; + uint8_t *rawdata; + size_t rawlen; + QCryptoSecretFormat format; + char *keyid; + char *iv; +}; + + +struct QCryptoSecretCommonClass { + ObjectClass parent_class; + void (*load_data)(QCryptoSecretCommon *secret, + uint8_t **output, + size_t *outputlen, + Error **errp); +}; + + +extern int qcrypto_secret_lookup(const char *secretid, + uint8_t **data, + size_t *datalen, + Error **errp); +extern char *qcrypto_secret_lookup_as_utf8(const char *secretid, + Error **errp); +extern char *qcrypto_secret_lookup_as_base64(const char *secretid, + Error **errp); + +#endif /* QCRYPTO_SECRET_COMMON_H */ diff --git a/include/crypto/secret_keyring.h b/include/crypto/secret_keyring.h new file mode 100644 index 000000000..3758852cb --- /dev/null +++ b/include/crypto/secret_keyring.h @@ -0,0 +1,40 @@ +/* + * QEMU crypto secret support + * + * Copyright 2020 Yandex N.V. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_SECRET_KEYRING_H +#define QCRYPTO_SECRET_KEYRING_H + +#include "qapi/qapi-types-crypto.h" +#include "qom/object.h" +#include "crypto/secret_common.h" + +#define TYPE_QCRYPTO_SECRET_KEYRING "secret_keyring" +OBJECT_DECLARE_SIMPLE_TYPE(QCryptoSecretKeyring, + QCRYPTO_SECRET_KEYRING) + + +struct QCryptoSecretKeyring { + QCryptoSecretCommon parent; + int32_t serial; +}; + + + +#endif /* QCRYPTO_SECRET_KEYRING_H */ diff --git a/include/crypto/tls-cipher-suites.h b/include/crypto/tls-cipher-suites.h new file mode 100644 index 000000000..bb9ee53e0 --- /dev/null +++ b/include/crypto/tls-cipher-suites.h @@ -0,0 +1,40 @@ +/* + * QEMU TLS Cipher Suites Registry (RFC8447) + * + * Copyright (c) 2018-2020 Red Hat, Inc. + * + * Author: Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QCRYPTO_TLSCIPHERSUITES_H +#define QCRYPTO_TLSCIPHERSUITES_H + +#include "qom/object.h" +#include "crypto/tlscreds.h" + +#define TYPE_QCRYPTO_TLS_CIPHER_SUITES "tls-cipher-suites" +typedef struct QCryptoTLSCipherSuites QCryptoTLSCipherSuites; +DECLARE_INSTANCE_CHECKER(QCryptoTLSCipherSuites, QCRYPTO_TLS_CIPHER_SUITES, + TYPE_QCRYPTO_TLS_CIPHER_SUITES) + +struct QCryptoTLSCipherSuites { + /* */ + QCryptoTLSCreds parent_obj; + /* */ +}; + +/** + * qcrypto_tls_cipher_suites_get_data: + * @obj: pointer to a TLS cipher suites object + * @errp: pointer to a NULL-initialized error object + * + * Returns: reference to a byte array containing the data. + * The caller should release the reference when no longer + * required. + */ +GByteArray *qcrypto_tls_cipher_suites_get_data(QCryptoTLSCipherSuites *obj, + Error **errp); + +#endif /* QCRYPTO_TLSCIPHERSUITES_H */ diff --git a/include/crypto/tlscreds.h b/include/crypto/tlscreds.h new file mode 100644 index 000000000..079e37604 --- /dev/null +++ b/include/crypto/tlscreds.h @@ -0,0 +1,67 @@ +/* + * QEMU crypto TLS credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSCREDS_H +#define QCRYPTO_TLSCREDS_H + +#include "qapi/qapi-types-crypto.h" +#include "qom/object.h" + +#ifdef CONFIG_GNUTLS +#include +#endif + +#define TYPE_QCRYPTO_TLS_CREDS "tls-creds" +typedef struct QCryptoTLSCreds QCryptoTLSCreds; +DECLARE_INSTANCE_CHECKER(QCryptoTLSCreds, QCRYPTO_TLS_CREDS, + TYPE_QCRYPTO_TLS_CREDS) + +typedef struct QCryptoTLSCredsClass QCryptoTLSCredsClass; + +#define QCRYPTO_TLS_CREDS_DH_PARAMS "dh-params.pem" + + +/** + * QCryptoTLSCreds: + * + * The QCryptoTLSCreds object is an abstract base for different + * types of TLS handshake credentials. Most commonly the + * QCryptoTLSCredsX509 subclass will be used to provide x509 + * certificate credentials. + */ + +struct QCryptoTLSCreds { + Object parent_obj; + char *dir; + QCryptoTLSCredsEndpoint endpoint; +#ifdef CONFIG_GNUTLS + gnutls_dh_params_t dh_params; +#endif + bool verifyPeer; + char *priority; +}; + + +struct QCryptoTLSCredsClass { + ObjectClass parent_class; +}; + + +#endif /* QCRYPTO_TLSCREDS_H */ diff --git a/include/crypto/tlscredsanon.h b/include/crypto/tlscredsanon.h new file mode 100644 index 000000000..3f464a380 --- /dev/null +++ b/include/crypto/tlscredsanon.h @@ -0,0 +1,112 @@ +/* + * QEMU crypto TLS anonymous credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSCREDSANON_H +#define QCRYPTO_TLSCREDSANON_H + +#include "crypto/tlscreds.h" +#include "qom/object.h" + +#define TYPE_QCRYPTO_TLS_CREDS_ANON "tls-creds-anon" +typedef struct QCryptoTLSCredsAnon QCryptoTLSCredsAnon; +DECLARE_INSTANCE_CHECKER(QCryptoTLSCredsAnon, QCRYPTO_TLS_CREDS_ANON, + TYPE_QCRYPTO_TLS_CREDS_ANON) + + +typedef struct QCryptoTLSCredsAnonClass QCryptoTLSCredsAnonClass; + +/** + * QCryptoTLSCredsAnon: + * + * The QCryptoTLSCredsAnon object provides a representation + * of anonymous credentials used perform a TLS handshake. + * This is primarily provided for backwards compatibility and + * its use is discouraged as it has poor security characteristics + * due to lacking MITM attack protection amongst other problems. + * + * This is a user creatable object, which can be instantiated + * via object_new_propv(): + * + * + * Creating anonymous TLS credential objects in code + * + * Object *obj; + * Error *err = NULL; + * obj = object_new_propv(TYPE_QCRYPTO_TLS_CREDS_ANON, + * "tlscreds0", + * &err, + * "endpoint", "server", + * "dir", "/path/x509/cert/dir", + * "verify-peer", "yes", + * NULL); + * + * + * + * Or via QMP: + * + * + * Creating anonymous TLS credential objects via QMP + * + * { + * "execute": "object-add", "arguments": { + * "id": "tlscreds0", + * "qom-type": "tls-creds-anon", + * "props": { + * "endpoint": "server", + * "dir": "/path/to/x509/cert/dir", + * "verify-peer": false + * } + * } + * } + * + * + * + * + * Or via the CLI: + * + * + * Creating anonymous TLS credential objects via CLI + * + * qemu-system-x86_64 -object tls-creds-anon,id=tlscreds0,\ + * endpoint=server,verify-peer=off,\ + * dir=/path/to/x509/certdir/ + * + * + * + */ + + +struct QCryptoTLSCredsAnon { + QCryptoTLSCreds parent_obj; +#ifdef CONFIG_GNUTLS + union { + gnutls_anon_server_credentials_t server; + gnutls_anon_client_credentials_t client; + } data; +#endif +}; + + +struct QCryptoTLSCredsAnonClass { + QCryptoTLSCredsClass parent_class; +}; + + +#endif /* QCRYPTO_TLSCREDSANON_H */ diff --git a/include/crypto/tlscredspsk.h b/include/crypto/tlscredspsk.h new file mode 100644 index 000000000..d7e6bdb5e --- /dev/null +++ b/include/crypto/tlscredspsk.h @@ -0,0 +1,107 @@ +/* + * QEMU crypto TLS Pre-Shared Key (PSK) support + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSCREDSPSK_H +#define QCRYPTO_TLSCREDSPSK_H + +#include "crypto/tlscreds.h" +#include "qom/object.h" + +#define TYPE_QCRYPTO_TLS_CREDS_PSK "tls-creds-psk" +typedef struct QCryptoTLSCredsPSK QCryptoTLSCredsPSK; +DECLARE_INSTANCE_CHECKER(QCryptoTLSCredsPSK, QCRYPTO_TLS_CREDS_PSK, + TYPE_QCRYPTO_TLS_CREDS_PSK) + +typedef struct QCryptoTLSCredsPSKClass QCryptoTLSCredsPSKClass; + +#define QCRYPTO_TLS_CREDS_PSKFILE "keys.psk" + +/** + * QCryptoTLSCredsPSK: + * + * The QCryptoTLSCredsPSK object provides a representation + * of the Pre-Shared Key credential used to perform a TLS handshake. + * + * This is a user creatable object, which can be instantiated + * via object_new_propv(): + * + * + * Creating TLS-PSK credential objects in code + * + * Object *obj; + * Error *err = NULL; + * obj = object_new_propv(TYPE_QCRYPTO_TLS_CREDS_PSK, + * "tlscreds0", + * &err, + * "dir", "/path/to/dir", + * "endpoint", "client", + * NULL); + * + * + * + * Or via QMP: + * + * + * Creating TLS-PSK credential objects via QMP + * + * { + * "execute": "object-add", "arguments": { + * "id": "tlscreds0", + * "qom-type": "tls-creds-psk", + * "props": { + * "dir": "/path/to/dir", + * "endpoint": "client" + * } + * } + * } + * + * + * + * Or via the CLI: + * + * + * Creating TLS-PSK credential objects via CLI + * + * qemu-system-x86_64 --object tls-creds-psk,id=tlscreds0,\ + * endpoint=client,dir=/path/to/dir[,username=qemu] + * + * + * + * The PSK file can be created and managed using psktool. + */ + +struct QCryptoTLSCredsPSK { + QCryptoTLSCreds parent_obj; + char *username; +#ifdef CONFIG_GNUTLS + union { + gnutls_psk_server_credentials_t server; + gnutls_psk_client_credentials_t client; + } data; +#endif +}; + + +struct QCryptoTLSCredsPSKClass { + QCryptoTLSCredsClass parent_class; +}; + + +#endif /* QCRYPTO_TLSCREDSPSK_H */ diff --git a/include/crypto/tlscredsx509.h b/include/crypto/tlscredsx509.h new file mode 100644 index 000000000..c6d89b788 --- /dev/null +++ b/include/crypto/tlscredsx509.h @@ -0,0 +1,114 @@ +/* + * QEMU crypto TLS x509 credential support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSCREDSX509_H +#define QCRYPTO_TLSCREDSX509_H + +#include "crypto/tlscreds.h" +#include "qom/object.h" + +#define TYPE_QCRYPTO_TLS_CREDS_X509 "tls-creds-x509" +typedef struct QCryptoTLSCredsX509 QCryptoTLSCredsX509; +DECLARE_INSTANCE_CHECKER(QCryptoTLSCredsX509, QCRYPTO_TLS_CREDS_X509, + TYPE_QCRYPTO_TLS_CREDS_X509) + +typedef struct QCryptoTLSCredsX509Class QCryptoTLSCredsX509Class; + +#define QCRYPTO_TLS_CREDS_X509_CA_CERT "ca-cert.pem" +#define QCRYPTO_TLS_CREDS_X509_CA_CRL "ca-crl.pem" +#define QCRYPTO_TLS_CREDS_X509_SERVER_KEY "server-key.pem" +#define QCRYPTO_TLS_CREDS_X509_SERVER_CERT "server-cert.pem" +#define QCRYPTO_TLS_CREDS_X509_CLIENT_KEY "client-key.pem" +#define QCRYPTO_TLS_CREDS_X509_CLIENT_CERT "client-cert.pem" + + +/** + * QCryptoTLSCredsX509: + * + * The QCryptoTLSCredsX509 object provides a representation + * of x509 credentials used to perform a TLS handshake. + * + * This is a user creatable object, which can be instantiated + * via object_new_propv(): + * + * + * Creating x509 TLS credential objects in code + * + * Object *obj; + * Error *err = NULL; + * obj = object_new_propv(TYPE_QCRYPTO_TLS_CREDS_X509, + * "tlscreds0", + * &err, + * "endpoint", "server", + * "dir", "/path/x509/cert/dir", + * "verify-peer", "yes", + * NULL); + * + * + * + * Or via QMP: + * + * + * Creating x509 TLS credential objects via QMP + * + * { + * "execute": "object-add", "arguments": { + * "id": "tlscreds0", + * "qom-type": "tls-creds-x509", + * "props": { + * "endpoint": "server", + * "dir": "/path/to/x509/cert/dir", + * "verify-peer": false + * } + * } + * } + * + * + * + * + * Or via the CLI: + * + * + * Creating x509 TLS credential objects via CLI + * + * qemu-system-x86_64 -object tls-creds-x509,id=tlscreds0,\ + * endpoint=server,verify-peer=off,\ + * dir=/path/to/x509/certdir/ + * + * + * + */ + +struct QCryptoTLSCredsX509 { + QCryptoTLSCreds parent_obj; +#ifdef CONFIG_GNUTLS + gnutls_certificate_credentials_t data; +#endif + bool sanityCheck; + char *passwordid; +}; + + +struct QCryptoTLSCredsX509Class { + QCryptoTLSCredsClass parent_class; +}; + + +#endif /* QCRYPTO_TLSCREDSX509_H */ diff --git a/include/crypto/tlssession.h b/include/crypto/tlssession.h new file mode 100644 index 000000000..15b9cef08 --- /dev/null +++ b/include/crypto/tlssession.h @@ -0,0 +1,324 @@ +/* + * QEMU crypto TLS session support + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QCRYPTO_TLSSESSION_H +#define QCRYPTO_TLSSESSION_H + +#include "crypto/tlscreds.h" + +/** + * QCryptoTLSSession: + * + * The QCryptoTLSSession object encapsulates the + * logic to integrate with a TLS providing library such + * as GNUTLS, to setup and run TLS sessions. + * + * The API is designed such that it has no assumption about + * the type of transport it is running over. It may be a + * traditional TCP socket, or something else entirely. The + * only requirement is a full-duplex stream of some kind. + * + * + * Using TLS session objects + * + * static ssize_t mysock_send(const char *buf, size_t len, + * void *opaque) + * { + * int fd = GPOINTER_TO_INT(opaque); + * + * return write(*fd, buf, len); + * } + * + * static ssize_t mysock_recv(const char *buf, size_t len, + * void *opaque) + * { + * int fd = GPOINTER_TO_INT(opaque); + * + * return read(*fd, buf, len); + * } + * + * static int mysock_run_tls(int sockfd, + * QCryptoTLSCreds *creds, + * Error **errp) + * { + * QCryptoTLSSession *sess; + * + * sess = qcrypto_tls_session_new(creds, + * "vnc.example.com", + * NULL, + * QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, + * errp); + * if (sess == NULL) { + * return -1; + * } + * + * qcrypto_tls_session_set_callbacks(sess, + * mysock_send, + * mysock_recv + * GINT_TO_POINTER(fd)); + * + * while (1) { + * if (qcrypto_tls_session_handshake(sess, errp) < 0) { + * qcrypto_tls_session_free(sess); + * return -1; + * } + * + * switch(qcrypto_tls_session_get_handshake_status(sess)) { + * case QCRYPTO_TLS_HANDSHAKE_COMPLETE: + * if (qcrypto_tls_session_check_credentials(sess, errp) < )) { + * qcrypto_tls_session_free(sess); + * return -1; + * } + * goto done; + * case QCRYPTO_TLS_HANDSHAKE_RECVING: + * ...wait for GIO_IN event on fd... + * break; + * case QCRYPTO_TLS_HANDSHAKE_SENDING: + * ...wait for GIO_OUT event on fd... + * break; + * } + * } + * done: + * + * ....send/recv payload data on sess... + * + * qcrypto_tls_session_free(sess): + * } + * + * + */ + +typedef struct QCryptoTLSSession QCryptoTLSSession; + + +/** + * qcrypto_tls_session_new: + * @creds: pointer to a TLS credentials object + * @hostname: optional hostname to validate + * @aclname: optional ACL to validate peer credentials against + * @endpoint: role of the TLS session, client or server + * @errp: pointer to a NULL-initialized error object + * + * Create a new TLS session object that will be used to + * negotiate a TLS session over an arbitrary data channel. + * The session object can operate as either the server or + * client, according to the value of the @endpoint argument. + * + * For clients, the @hostname parameter should hold the full + * unmodified hostname as requested by the user. This will + * be used to verify the against the hostname reported in + * the server's credentials (aka x509 certificate). + * + * The @aclname parameter (optionally) specifies the name + * of an access control list that will be used to validate + * the peer's credentials. For x509 credentials, the ACL + * will be matched against the CommonName shown in the peer's + * certificate. If the session is acting as a server, setting + * an ACL will require that the client provide a validate + * x509 client certificate. + * + * After creating the session object, the I/O callbacks + * must be set using the qcrypto_tls_session_set_callbacks() + * method. A TLS handshake sequence must then be completed + * using qcrypto_tls_session_handshake(), before payload + * data is permitted to be sent/received. + * + * The session object must be released by calling + * qcrypto_tls_session_free() when no longer required + * + * Returns: a TLS session object, or NULL on error. + */ +QCryptoTLSSession *qcrypto_tls_session_new(QCryptoTLSCreds *creds, + const char *hostname, + const char *aclname, + QCryptoTLSCredsEndpoint endpoint, + Error **errp); + +/** + * qcrypto_tls_session_free: + * @sess: the TLS session object + * + * Release all memory associated with the TLS session + * object previously allocated by qcrypto_tls_session_new() + */ +void qcrypto_tls_session_free(QCryptoTLSSession *sess); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoTLSSession, qcrypto_tls_session_free) + +/** + * qcrypto_tls_session_check_credentials: + * @sess: the TLS session object + * @errp: pointer to a NULL-initialized error object + * + * Validate the peer's credentials after a successful + * TLS handshake. It is an error to call this before + * qcrypto_tls_session_get_handshake_status() returns + * QCRYPTO_TLS_HANDSHAKE_COMPLETE + * + * Returns 0 if the credentials validated, -1 on error + */ +int qcrypto_tls_session_check_credentials(QCryptoTLSSession *sess, + Error **errp); + +typedef ssize_t (*QCryptoTLSSessionWriteFunc)(const char *buf, + size_t len, + void *opaque); +typedef ssize_t (*QCryptoTLSSessionReadFunc)(char *buf, + size_t len, + void *opaque); + +/** + * qcrypto_tls_session_set_callbacks: + * @sess: the TLS session object + * @writeFunc: callback for sending data + * @readFunc: callback to receiving data + * @opaque: data to pass to callbacks + * + * Sets the callback functions that are to be used for sending + * and receiving data on the underlying data channel. Typically + * the callbacks to write/read to/from a TCP socket, but there + * is no assumption made about the type of channel used. + * + * The @writeFunc callback will be passed the encrypted + * data to send to the remote peer. + * + * The @readFunc callback will be passed a pointer to fill + * with encrypted data received from the remote peer + */ +void qcrypto_tls_session_set_callbacks(QCryptoTLSSession *sess, + QCryptoTLSSessionWriteFunc writeFunc, + QCryptoTLSSessionReadFunc readFunc, + void *opaque); + +/** + * qcrypto_tls_session_write: + * @sess: the TLS session object + * @buf: the plain text to send + * @len: the length of @buf + * + * Encrypt @len bytes of the data in @buf and send + * it to the remote peer using the callback previously + * registered with qcrypto_tls_session_set_callbacks() + * + * It is an error to call this before + * qcrypto_tls_session_get_handshake_status() returns + * QCRYPTO_TLS_HANDSHAKE_COMPLETE + * + * Returns: the number of bytes sent, or -1 on error + */ +ssize_t qcrypto_tls_session_write(QCryptoTLSSession *sess, + const char *buf, + size_t len); + +/** + * qcrypto_tls_session_read: + * @sess: the TLS session object + * @buf: to fill with plain text received + * @len: the length of @buf + * + * Receive up to @len bytes of data from the remote peer + * using the callback previously registered with + * qcrypto_tls_session_set_callbacks(), decrypt it and + * store it in @buf. + * + * It is an error to call this before + * qcrypto_tls_session_get_handshake_status() returns + * QCRYPTO_TLS_HANDSHAKE_COMPLETE + * + * Returns: the number of bytes received, or -1 on error + */ +ssize_t qcrypto_tls_session_read(QCryptoTLSSession *sess, + char *buf, + size_t len); + +/** + * qcrypto_tls_session_handshake: + * @sess: the TLS session object + * @errp: pointer to a NULL-initialized error object + * + * Start, or continue, a TLS handshake sequence. If + * the underlying data channel is non-blocking, then + * this method may return control before the handshake + * is complete. On non-blocking channels the + * qcrypto_tls_session_get_handshake_status() method + * should be used to determine whether the handshake + * has completed, or is waiting to send or receive + * data. In the latter cases, the caller should setup + * an event loop watch and call this method again + * once the underlying data channel is ready to read + * or write again + */ +int qcrypto_tls_session_handshake(QCryptoTLSSession *sess, + Error **errp); + +typedef enum { + QCRYPTO_TLS_HANDSHAKE_COMPLETE, + QCRYPTO_TLS_HANDSHAKE_SENDING, + QCRYPTO_TLS_HANDSHAKE_RECVING, +} QCryptoTLSSessionHandshakeStatus; + +/** + * qcrypto_tls_session_get_handshake_status: + * @sess: the TLS session object + * + * Check the status of the TLS handshake. This + * is used with non-blocking data channels to + * determine whether the handshake is waiting + * to send or receive further data to/from the + * remote peer. + * + * Once this returns QCRYPTO_TLS_HANDSHAKE_COMPLETE + * it is permitted to send/receive payload data on + * the channel + */ +QCryptoTLSSessionHandshakeStatus +qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess); + +/** + * qcrypto_tls_session_get_key_size: + * @sess: the TLS session object + * @errp: pointer to a NULL-initialized error object + * + * Check the size of the data channel encryption key + * + * Returns: the length in bytes of the encryption key + * or -1 on error + */ +int qcrypto_tls_session_get_key_size(QCryptoTLSSession *sess, + Error **errp); + +/** + * qcrypto_tls_session_get_peer_name: + * @sess: the TLS session object + * + * Get the identified name of the remote peer. If the + * TLS session was negotiated using x509 certificate + * credentials, this will return the CommonName from + * the peer's certificate. If no identified name is + * available it will return NULL. + * + * The returned data must be released with g_free() + * when no longer required. + * + * Returns: the peer's name or NULL. + */ +char *qcrypto_tls_session_get_peer_name(QCryptoTLSSession *sess); + +#endif /* QCRYPTO_TLSSESSION_H */ diff --git a/include/crypto/xts.h b/include/crypto/xts.h new file mode 100644 index 000000000..f267b7824 --- /dev/null +++ b/include/crypto/xts.h @@ -0,0 +1,82 @@ +/* + * QEMU Crypto XTS cipher mode + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + * This code is originally derived from public domain / WTFPL code in + * LibTomCrypt crytographic library http://libtom.org. The XTS code + * was donated by Elliptic Semiconductor Inc (www.ellipticsemi.com) + * to the LibTom Projects + * + */ + +#ifndef QCRYPTO_XTS_H +#define QCRYPTO_XTS_H + + +#define XTS_BLOCK_SIZE 16 + +typedef void xts_cipher_func(const void *ctx, + size_t length, + uint8_t *dst, + const uint8_t *src); + +/** + * xts_decrypt: + * @datactx: the cipher context for data decryption + * @tweakctx: the cipher context for tweak decryption + * @encfunc: the cipher function for encryption + * @decfunc: the cipher function for decryption + * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes + * @length: the length of @dst and @src + * @dst: buffer to hold the decrypted plaintext + * @src: buffer providing the ciphertext + * + * Decrypts @src into @dst + */ +void xts_decrypt(const void *datactx, + const void *tweakctx, + xts_cipher_func *encfunc, + xts_cipher_func *decfunc, + uint8_t *iv, + size_t length, + uint8_t *dst, + const uint8_t *src); + +/** + * xts_decrypt: + * @datactx: the cipher context for data encryption + * @tweakctx: the cipher context for tweak encryption + * @encfunc: the cipher function for encryption + * @decfunc: the cipher function for decryption + * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes + * @length: the length of @dst and @src + * @dst: buffer to hold the encrypted ciphertext + * @src: buffer providing the plaintext + * + * Decrypts @src into @dst + */ +void xts_encrypt(const void *datactx, + const void *tweakctx, + xts_cipher_func *encfunc, + xts_cipher_func *decfunc, + uint8_t *iv, + size_t length, + uint8_t *dst, + const uint8_t *src); + + +#endif /* QCRYPTO_XTS_H */ diff --git a/include/disas/capstone.h b/include/disas/capstone.h new file mode 100644 index 000000000..e29068dd9 --- /dev/null +++ b/include/disas/capstone.h @@ -0,0 +1,38 @@ +#ifndef QEMU_CAPSTONE_H +#define QEMU_CAPSTONE_H + +#ifdef CONFIG_CAPSTONE + +#include + +#else + +/* Just enough to allow backends to init without ifdefs. */ + +#define CS_ARCH_ARM -1 +#define CS_ARCH_ARM64 -1 +#define CS_ARCH_MIPS -1 +#define CS_ARCH_X86 -1 +#define CS_ARCH_PPC -1 +#define CS_ARCH_SPARC -1 +#define CS_ARCH_SYSZ -1 + +#define CS_MODE_LITTLE_ENDIAN 0 +#define CS_MODE_BIG_ENDIAN 0 +#define CS_MODE_ARM 0 +#define CS_MODE_16 0 +#define CS_MODE_32 0 +#define CS_MODE_64 0 +#define CS_MODE_THUMB 0 +#define CS_MODE_MCLASS 0 +#define CS_MODE_V8 0 +#define CS_MODE_MICRO 0 +#define CS_MODE_MIPS3 0 +#define CS_MODE_MIPS32R6 0 +#define CS_MODE_MIPSGP64 0 +#define CS_MODE_V9 0 +#define CS_MODE_MIPS32 0 +#define CS_MODE_MIPS64 0 + +#endif /* CONFIG_CAPSTONE */ +#endif /* QEMU_CAPSTONE_H */ diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h new file mode 100644 index 000000000..2164762b4 --- /dev/null +++ b/include/disas/dis-asm.h @@ -0,0 +1,510 @@ +/* Interface between the opcode library and its callers. + Written by Cygnus Support, 1993. + + The opcode library (libopcodes.a) provides instruction decoders for + a large variety of instruction sets, callable with an identical + interface, for making instruction-processing programs more independent + of the instruction set being processed. */ + +#ifndef DISAS_DIS_ASM_H +#define DISAS_DIS_ASM_H + +typedef void *PTR; +typedef uint64_t bfd_vma; +typedef int64_t bfd_signed_vma; +typedef uint8_t bfd_byte; +#define sprintf_vma(s,x) sprintf (s, "%0" PRIx64, x) +#define snprintf_vma(s,ss,x) snprintf (s, ss, "%0" PRIx64, x) + +#define BFD64 + +enum bfd_flavour { + bfd_target_unknown_flavour, + bfd_target_aout_flavour, + bfd_target_coff_flavour, + bfd_target_ecoff_flavour, + bfd_target_elf_flavour, + bfd_target_ieee_flavour, + bfd_target_nlm_flavour, + bfd_target_oasys_flavour, + bfd_target_tekhex_flavour, + bfd_target_srec_flavour, + bfd_target_ihex_flavour, + bfd_target_som_flavour, + bfd_target_os9k_flavour, + bfd_target_versados_flavour, + bfd_target_msdos_flavour, + bfd_target_evax_flavour +}; + +enum bfd_endian { BFD_ENDIAN_BIG, BFD_ENDIAN_LITTLE, BFD_ENDIAN_UNKNOWN }; + +enum bfd_architecture +{ + bfd_arch_unknown, /* File arch not known */ + bfd_arch_obscure, /* Arch known, not one of these */ + bfd_arch_m68k, /* Motorola 68xxx */ +#define bfd_mach_m68000 1 +#define bfd_mach_m68008 2 +#define bfd_mach_m68010 3 +#define bfd_mach_m68020 4 +#define bfd_mach_m68030 5 +#define bfd_mach_m68040 6 +#define bfd_mach_m68060 7 +#define bfd_mach_cpu32 8 +#define bfd_mach_mcf5200 9 +#define bfd_mach_mcf5206e 10 +#define bfd_mach_mcf5307 11 +#define bfd_mach_mcf5407 12 +#define bfd_mach_mcf528x 13 +#define bfd_mach_mcfv4e 14 +#define bfd_mach_mcf521x 15 +#define bfd_mach_mcf5249 16 +#define bfd_mach_mcf547x 17 +#define bfd_mach_mcf548x 18 + bfd_arch_vax, /* DEC Vax */ + bfd_arch_i960, /* Intel 960 */ + /* The order of the following is important. + lower number indicates a machine type that + only accepts a subset of the instructions + available to machines with higher numbers. + The exception is the "ca", which is + incompatible with all other machines except + "core". */ + +#define bfd_mach_i960_core 1 +#define bfd_mach_i960_ka_sa 2 +#define bfd_mach_i960_kb_sb 3 +#define bfd_mach_i960_mc 4 +#define bfd_mach_i960_xa 5 +#define bfd_mach_i960_ca 6 +#define bfd_mach_i960_jx 7 +#define bfd_mach_i960_hx 8 + + bfd_arch_a29k, /* AMD 29000 */ + bfd_arch_sparc, /* SPARC */ +#define bfd_mach_sparc 1 +/* The difference between v8plus and v9 is that v9 is a true 64 bit env. */ +#define bfd_mach_sparc_sparclet 2 +#define bfd_mach_sparc_sparclite 3 +#define bfd_mach_sparc_v8plus 4 +#define bfd_mach_sparc_v8plusa 5 /* with ultrasparc add'ns. */ +#define bfd_mach_sparc_sparclite_le 6 +#define bfd_mach_sparc_v9 7 +#define bfd_mach_sparc_v9a 8 /* with ultrasparc add'ns. */ +#define bfd_mach_sparc_v8plusb 9 /* with cheetah add'ns. */ +#define bfd_mach_sparc_v9b 10 /* with cheetah add'ns. */ +/* Nonzero if MACH has the v9 instruction set. */ +#define bfd_mach_sparc_v9_p(mach) \ + ((mach) >= bfd_mach_sparc_v8plus && (mach) <= bfd_mach_sparc_v9b \ + && (mach) != bfd_mach_sparc_sparclite_le) + bfd_arch_mips, /* MIPS Rxxxx */ +#define bfd_mach_mips3000 3000 +#define bfd_mach_mips3900 3900 +#define bfd_mach_mips4000 4000 +#define bfd_mach_mips4010 4010 +#define bfd_mach_mips4100 4100 +#define bfd_mach_mips4300 4300 +#define bfd_mach_mips4400 4400 +#define bfd_mach_mips4600 4600 +#define bfd_mach_mips4650 4650 +#define bfd_mach_mips5000 5000 +#define bfd_mach_mips6000 6000 +#define bfd_mach_mips8000 8000 +#define bfd_mach_mips10000 10000 +#define bfd_mach_mips16 16 + bfd_arch_i386, /* Intel 386 */ +#define bfd_mach_i386_i386 0 +#define bfd_mach_i386_i8086 1 +#define bfd_mach_i386_i386_intel_syntax 2 +#define bfd_mach_x86_64 3 +#define bfd_mach_x86_64_intel_syntax 4 + bfd_arch_we32k, /* AT&T WE32xxx */ + bfd_arch_tahoe, /* CCI/Harris Tahoe */ + bfd_arch_i860, /* Intel 860 */ + bfd_arch_romp, /* IBM ROMP PC/RT */ + bfd_arch_alliant, /* Alliant */ + bfd_arch_convex, /* Convex */ + bfd_arch_m88k, /* Motorola 88xxx */ + bfd_arch_pyramid, /* Pyramid Technology */ + bfd_arch_h8300, /* Hitachi H8/300 */ +#define bfd_mach_h8300 1 +#define bfd_mach_h8300h 2 +#define bfd_mach_h8300s 3 + bfd_arch_powerpc, /* PowerPC */ +#define bfd_mach_ppc 0 +#define bfd_mach_ppc64 1 +#define bfd_mach_ppc_403 403 +#define bfd_mach_ppc_403gc 4030 +#define bfd_mach_ppc_e500 500 +#define bfd_mach_ppc_505 505 +#define bfd_mach_ppc_601 601 +#define bfd_mach_ppc_602 602 +#define bfd_mach_ppc_603 603 +#define bfd_mach_ppc_ec603e 6031 +#define bfd_mach_ppc_604 604 +#define bfd_mach_ppc_620 620 +#define bfd_mach_ppc_630 630 +#define bfd_mach_ppc_750 750 +#define bfd_mach_ppc_860 860 +#define bfd_mach_ppc_a35 35 +#define bfd_mach_ppc_rs64ii 642 +#define bfd_mach_ppc_rs64iii 643 +#define bfd_mach_ppc_7400 7400 + bfd_arch_rs6000, /* IBM RS/6000 */ + bfd_arch_hppa, /* HP PA RISC */ +#define bfd_mach_hppa10 10 +#define bfd_mach_hppa11 11 +#define bfd_mach_hppa20 20 +#define bfd_mach_hppa20w 25 + bfd_arch_d10v, /* Mitsubishi D10V */ + bfd_arch_z8k, /* Zilog Z8000 */ +#define bfd_mach_z8001 1 +#define bfd_mach_z8002 2 + bfd_arch_h8500, /* Hitachi H8/500 */ + bfd_arch_sh, /* Hitachi SH */ +#define bfd_mach_sh 1 +#define bfd_mach_sh2 0x20 +#define bfd_mach_sh_dsp 0x2d +#define bfd_mach_sh2a 0x2a +#define bfd_mach_sh2a_nofpu 0x2b +#define bfd_mach_sh2e 0x2e +#define bfd_mach_sh3 0x30 +#define bfd_mach_sh3_nommu 0x31 +#define bfd_mach_sh3_dsp 0x3d +#define bfd_mach_sh3e 0x3e +#define bfd_mach_sh4 0x40 +#define bfd_mach_sh4_nofpu 0x41 +#define bfd_mach_sh4_nommu_nofpu 0x42 +#define bfd_mach_sh4a 0x4a +#define bfd_mach_sh4a_nofpu 0x4b +#define bfd_mach_sh4al_dsp 0x4d +#define bfd_mach_sh5 0x50 + bfd_arch_alpha, /* Dec Alpha */ +#define bfd_mach_alpha 1 +#define bfd_mach_alpha_ev4 0x10 +#define bfd_mach_alpha_ev5 0x20 +#define bfd_mach_alpha_ev6 0x30 + bfd_arch_arm, /* Advanced Risc Machines ARM */ +#define bfd_mach_arm_unknown 0 +#define bfd_mach_arm_2 1 +#define bfd_mach_arm_2a 2 +#define bfd_mach_arm_3 3 +#define bfd_mach_arm_3M 4 +#define bfd_mach_arm_4 5 +#define bfd_mach_arm_4T 6 +#define bfd_mach_arm_5 7 +#define bfd_mach_arm_5T 8 +#define bfd_mach_arm_5TE 9 +#define bfd_mach_arm_XScale 10 +#define bfd_mach_arm_ep9312 11 +#define bfd_mach_arm_iWMMXt 12 +#define bfd_mach_arm_iWMMXt2 13 + bfd_arch_ns32k, /* National Semiconductors ns32000 */ + bfd_arch_w65, /* WDC 65816 */ + bfd_arch_tic30, /* Texas Instruments TMS320C30 */ + bfd_arch_v850, /* NEC V850 */ +#define bfd_mach_v850 0 + bfd_arch_arc, /* Argonaut RISC Core */ +#define bfd_mach_arc_base 0 + bfd_arch_m32r, /* Mitsubishi M32R/D */ +#define bfd_mach_m32r 0 /* backwards compatibility */ + bfd_arch_mn10200, /* Matsushita MN10200 */ + bfd_arch_mn10300, /* Matsushita MN10300 */ + bfd_arch_avr, /* AVR microcontrollers */ +#define bfd_mach_avr1 1 +#define bfd_mach_avr2 2 +#define bfd_mach_avr25 25 +#define bfd_mach_avr3 3 +#define bfd_mach_avr31 31 +#define bfd_mach_avr35 35 +#define bfd_mach_avr4 4 +#define bfd_mach_avr5 5 +#define bfd_mach_avr51 51 +#define bfd_mach_avr6 6 +#define bfd_mach_avrtiny 100 +#define bfd_mach_avrxmega1 101 +#define bfd_mach_avrxmega2 102 +#define bfd_mach_avrxmega3 103 +#define bfd_mach_avrxmega4 104 +#define bfd_mach_avrxmega5 105 +#define bfd_mach_avrxmega6 106 +#define bfd_mach_avrxmega7 107 + bfd_arch_cris, /* Axis CRIS */ +#define bfd_mach_cris_v0_v10 255 +#define bfd_mach_cris_v32 32 +#define bfd_mach_cris_v10_v32 1032 + bfd_arch_microblaze, /* Xilinx MicroBlaze. */ + bfd_arch_moxie, /* The Moxie core. */ + bfd_arch_ia64, /* HP/Intel ia64 */ +#define bfd_mach_ia64_elf64 64 +#define bfd_mach_ia64_elf32 32 + bfd_arch_nios2, /* Nios II */ +#define bfd_mach_nios2 0 +#define bfd_mach_nios2r1 1 +#define bfd_mach_nios2r2 2 + bfd_arch_lm32, /* Lattice Mico32 */ +#define bfd_mach_lm32 1 + bfd_arch_rx, /* Renesas RX */ +#define bfd_mach_rx 0x75 +#define bfd_mach_rx_v2 0x76 +#define bfd_mach_rx_v3 0x77 + bfd_arch_last + }; +#define bfd_mach_s390_31 31 +#define bfd_mach_s390_64 64 + +typedef struct symbol_cache_entry +{ + const char *name; + union + { + PTR p; + bfd_vma i; + } udata; +} asymbol; + +typedef int (*fprintf_function)(FILE *f, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +enum dis_insn_type { + dis_noninsn, /* Not a valid instruction */ + dis_nonbranch, /* Not a branch instruction */ + dis_branch, /* Unconditional branch */ + dis_condbranch, /* Conditional branch */ + dis_jsr, /* Jump to subroutine */ + dis_condjsr, /* Conditional jump to subroutine */ + dis_dref, /* Data reference instruction */ + dis_dref2 /* Two data references in instruction */ +}; + +/* This struct is passed into the instruction decoding routine, + and is passed back out into each callback. The various fields are used + for conveying information from your main routine into your callbacks, + for passing information into the instruction decoders (such as the + addresses of the callback functions), or for passing information + back from the instruction decoders to their callers. + + It must be initialized before it is first passed; this can be done + by hand, or using one of the initialization macros below. */ + +typedef struct disassemble_info { + fprintf_function fprintf_func; + FILE *stream; + PTR application_data; + + /* Target description. We could replace this with a pointer to the bfd, + but that would require one. There currently isn't any such requirement + so to avoid introducing one we record these explicitly. */ + /* The bfd_flavour. This can be bfd_target_unknown_flavour. */ + enum bfd_flavour flavour; + /* The bfd_arch value. */ + enum bfd_architecture arch; + /* The bfd_mach value. */ + unsigned long mach; + /* Endianness (for bi-endian cpus). Mono-endian cpus can ignore this. */ + enum bfd_endian endian; + + /* An array of pointers to symbols either at the location being disassembled + or at the start of the function being disassembled. The array is sorted + so that the first symbol is intended to be the one used. The others are + present for any misc. purposes. This is not set reliably, but if it is + not NULL, it is correct. */ + asymbol **symbols; + /* Number of symbols in array. */ + int num_symbols; + + /* For use by the disassembler. + The top 16 bits are reserved for public use (and are documented here). + The bottom 16 bits are for the internal use of the disassembler. */ + unsigned long flags; +#define INSN_HAS_RELOC 0x80000000 +#define INSN_ARM_BE32 0x00010000 + PTR private_data; + + /* Function used to get bytes to disassemble. MEMADDR is the + address of the stuff to be disassembled, MYADDR is the address to + put the bytes in, and LENGTH is the number of bytes to read. + INFO is a pointer to this struct. + Returns an errno value or 0 for success. */ + int (*read_memory_func) + (bfd_vma memaddr, bfd_byte *myaddr, int length, + struct disassemble_info *info); + + /* Function which should be called if we get an error that we can't + recover from. STATUS is the errno value from read_memory_func and + MEMADDR is the address that we were trying to read. INFO is a + pointer to this struct. */ + void (*memory_error_func) + (int status, bfd_vma memaddr, struct disassemble_info *info); + + /* Function called to print ADDR. */ + void (*print_address_func) + (bfd_vma addr, struct disassemble_info *info); + + /* Function called to print an instruction. The function is architecture + * specific. + */ + int (*print_insn)(bfd_vma addr, struct disassemble_info *info); + + /* Function called to determine if there is a symbol at the given ADDR. + If there is, the function returns 1, otherwise it returns 0. + This is used by ports which support an overlay manager where + the overlay number is held in the top part of an address. In + some circumstances we want to include the overlay number in the + address, (normally because there is a symbol associated with + that address), but sometimes we want to mask out the overlay bits. */ + int (* symbol_at_address_func) + (bfd_vma addr, struct disassemble_info * info); + + /* These are for buffer_read_memory. */ + bfd_byte *buffer; + bfd_vma buffer_vma; + int buffer_length; + + /* This variable may be set by the instruction decoder. It suggests + the number of bytes objdump should display on a single line. If + the instruction decoder sets this, it should always set it to + the same value in order to get reasonable looking output. */ + int bytes_per_line; + + /* the next two variables control the way objdump displays the raw data */ + /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */ + /* output will look like this: + 00: 00000000 00000000 + with the chunks displayed according to "display_endian". */ + int bytes_per_chunk; + enum bfd_endian display_endian; + + /* Results from instruction decoders. Not all decoders yet support + this information. This info is set each time an instruction is + decoded, and is only valid for the last such instruction. + + To determine whether this decoder supports this information, set + insn_info_valid to 0, decode an instruction, then check it. */ + + char insn_info_valid; /* Branch info has been set. */ + char branch_delay_insns; /* How many sequential insn's will run before + a branch takes effect. (0 = normal) */ + char data_size; /* Size of data reference in insn, in bytes */ + enum dis_insn_type insn_type; /* Type of instruction */ + bfd_vma target; /* Target address of branch or dref, if known; + zero if unknown. */ + bfd_vma target2; /* Second target address for dref2 */ + + /* Command line options specific to the target disassembler. */ + char * disassembler_options; + + /* Field intended to be used by targets in any way they deem suitable. */ + int64_t target_info; + + /* Options for Capstone disassembly. */ + int cap_arch; + int cap_mode; + int cap_insn_unit; + int cap_insn_split; + +} disassemble_info; + +/* Standard disassemblers. Disassemble one instruction at the given + target address. Return number of bytes processed. */ +typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *); + +int print_insn_tci(bfd_vma, disassemble_info*); +int print_insn_big_mips (bfd_vma, disassemble_info*); +int print_insn_little_mips (bfd_vma, disassemble_info*); +int print_insn_nanomips (bfd_vma, disassemble_info*); +int print_insn_i386 (bfd_vma, disassemble_info*); +int print_insn_m68k (bfd_vma, disassemble_info*); +int print_insn_z8001 (bfd_vma, disassemble_info*); +int print_insn_z8002 (bfd_vma, disassemble_info*); +int print_insn_h8300 (bfd_vma, disassemble_info*); +int print_insn_h8300h (bfd_vma, disassemble_info*); +int print_insn_h8300s (bfd_vma, disassemble_info*); +int print_insn_h8500 (bfd_vma, disassemble_info*); +int print_insn_arm_a64 (bfd_vma, disassemble_info*); +int print_insn_alpha (bfd_vma, disassemble_info*); +disassembler_ftype arc_get_disassembler (int, int); +int print_insn_arm (bfd_vma, disassemble_info*); +int print_insn_sparc (bfd_vma, disassemble_info*); +int print_insn_big_a29k (bfd_vma, disassemble_info*); +int print_insn_little_a29k (bfd_vma, disassemble_info*); +int print_insn_i960 (bfd_vma, disassemble_info*); +int print_insn_sh (bfd_vma, disassemble_info*); +int print_insn_shl (bfd_vma, disassemble_info*); +int print_insn_hppa (bfd_vma, disassemble_info*); +int print_insn_m32r (bfd_vma, disassemble_info*); +int print_insn_m88k (bfd_vma, disassemble_info*); +int print_insn_mn10200 (bfd_vma, disassemble_info*); +int print_insn_mn10300 (bfd_vma, disassemble_info*); +int print_insn_moxie (bfd_vma, disassemble_info*); +int print_insn_ns32k (bfd_vma, disassemble_info*); +int print_insn_big_powerpc (bfd_vma, disassemble_info*); +int print_insn_little_powerpc (bfd_vma, disassemble_info*); +int print_insn_rs6000 (bfd_vma, disassemble_info*); +int print_insn_w65 (bfd_vma, disassemble_info*); +int print_insn_d10v (bfd_vma, disassemble_info*); +int print_insn_v850 (bfd_vma, disassemble_info*); +int print_insn_tic30 (bfd_vma, disassemble_info*); +int print_insn_ppc (bfd_vma, disassemble_info*); +int print_insn_s390 (bfd_vma, disassemble_info*); +int print_insn_crisv32 (bfd_vma, disassemble_info*); +int print_insn_crisv10 (bfd_vma, disassemble_info*); +int print_insn_microblaze (bfd_vma, disassemble_info*); +int print_insn_ia64 (bfd_vma, disassemble_info*); +int print_insn_lm32 (bfd_vma, disassemble_info*); +int print_insn_big_nios2 (bfd_vma, disassemble_info*); +int print_insn_little_nios2 (bfd_vma, disassemble_info*); +int print_insn_xtensa (bfd_vma, disassemble_info*); +int print_insn_riscv32 (bfd_vma, disassemble_info*); +int print_insn_riscv64 (bfd_vma, disassemble_info*); +int print_insn_rx(bfd_vma, disassemble_info *); + +#ifdef CONFIG_CAPSTONE +bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size); +bool cap_disas_host(disassemble_info *info, void *code, size_t size); +bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count); +bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size); +#else +# define cap_disas_target(i, p, s) false +# define cap_disas_host(i, p, s) false +# define cap_disas_monitor(i, p, c) false +# define cap_disas_plugin(i, p, c) false +#endif /* CONFIG_CAPSTONE */ + +#ifndef ATTRIBUTE_UNUSED +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#endif + +/* from libbfd */ + +#include "qemu/bswap.h" + +static inline bfd_vma bfd_getl64(const bfd_byte *addr) +{ + return ldq_le_p(addr); +} + +static inline bfd_vma bfd_getl32(const bfd_byte *addr) +{ + return (uint32_t)ldl_le_p(addr); +} + +static inline bfd_vma bfd_getl16(const bfd_byte *addr) +{ + return lduw_le_p(addr); +} + +static inline bfd_vma bfd_getb32(const bfd_byte *addr) +{ + return (uint32_t)ldl_be_p(addr); +} + +static inline bfd_vma bfd_getb16(const bfd_byte *addr) +{ + return lduw_be_p(addr); +} + +typedef bool bfd_boolean; + +#endif /* DISAS_DIS_ASM_H */ diff --git a/include/disas/disas.h b/include/disas/disas.h new file mode 100644 index 000000000..36c33f6f1 --- /dev/null +++ b/include/disas/disas.h @@ -0,0 +1,47 @@ +#ifndef QEMU_DISAS_H +#define QEMU_DISAS_H + +#include "exec/hwaddr.h" + +#ifdef NEED_CPU_H +#include "cpu.h" + +/* Disassemble this for me please... (debugging). */ +void disas(FILE *out, void *code, unsigned long size); +void target_disas(FILE *out, CPUState *cpu, target_ulong code, + target_ulong size); + +void monitor_disas(Monitor *mon, CPUState *cpu, + target_ulong pc, int nb_insn, int is_physical); + +char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size); + +/* Look up symbol for debugging purpose. Returns "" if unknown. */ +const char *lookup_symbol(target_ulong orig_addr); +#endif + +struct syminfo; +struct elf32_sym; +struct elf64_sym; + +#if defined(CONFIG_USER_ONLY) +typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr); +#else +typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr); +#endif + +struct syminfo { + lookup_symbol_t lookup_symbol; + unsigned int disas_num_syms; + union { + struct elf32_sym *elf32; + struct elf64_sym *elf64; + } disas_symtab; + const char *disas_strtab; + struct syminfo *next; +}; + +/* Filled in by elfload.c. Simplistic, but will do for now. */ +extern struct syminfo *syminfos; + +#endif /* QEMU_DISAS_H */ diff --git a/include/elf.h b/include/elf.h new file mode 100644 index 000000000..7a418ee55 --- /dev/null +++ b/include/elf.h @@ -0,0 +1,1776 @@ +#ifndef QEMU_ELF_H +#define QEMU_ELF_H + +/* 32-bit ELF base types. */ +typedef uint32_t Elf32_Addr; +typedef uint16_t Elf32_Half; +typedef uint32_t Elf32_Off; +typedef int32_t Elf32_Sword; +typedef uint32_t Elf32_Word; + +/* 64-bit ELF base types. */ +typedef uint64_t Elf64_Addr; +typedef uint16_t Elf64_Half; +typedef int16_t Elf64_SHalf; +typedef uint64_t Elf64_Off; +typedef int32_t Elf64_Sword; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Xword; +typedef int64_t Elf64_Sxword; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_LOOS 0x60000000 +#define PT_HIOS 0x6fffffff +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff + +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + +#define PT_MIPS_REGINFO 0x70000000 +#define PT_MIPS_RTPROC 0x70000001 +#define PT_MIPS_OPTIONS 0x70000002 +#define PT_MIPS_ABIFLAGS 0x70000003 + +/* Flags in the e_flags field of the header */ +/* MIPS architecture level. */ +#define EF_MIPS_ARCH 0xf0000000 + +/* Legal values for MIPS architecture level. */ +#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ +#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ +#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ +#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ +#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ +#define EF_MIPS_ARCH_32 0x50000000 /* MIPS32 code. */ +#define EF_MIPS_ARCH_64 0x60000000 /* MIPS64 code. */ +#define EF_MIPS_ARCH_32R2 0x70000000 /* MIPS32r2 code. */ +#define EF_MIPS_ARCH_64R2 0x80000000 /* MIPS64r2 code. */ +#define EF_MIPS_ARCH_32R6 0x90000000 /* MIPS32r6 code. */ +#define EF_MIPS_ARCH_64R6 0xa0000000 /* MIPS64r6 code. */ + +/* The ABI of a file. */ +#define EF_MIPS_ABI_O32 0x00001000 /* O32 ABI. */ +#define EF_MIPS_ABI_O64 0x00002000 /* O32 extended for 64 bit. */ + +#define EF_MIPS_NOREORDER 0x00000001 +#define EF_MIPS_PIC 0x00000002 +#define EF_MIPS_CPIC 0x00000004 +#define EF_MIPS_ABI2 0x00000020 +#define EF_MIPS_OPTIONS_FIRST 0x00000080 +#define EF_MIPS_32BITMODE 0x00000100 +#define EF_MIPS_ABI 0x0000f000 +#define EF_MIPS_FP64 0x00000200 +#define EF_MIPS_NAN2008 0x00000400 + +/* MIPS machine variant */ +#define EF_MIPS_MACH_NONE 0x00000000 /* A standard MIPS implementation */ +#define EF_MIPS_MACH_3900 0x00810000 /* Toshiba R3900 */ +#define EF_MIPS_MACH_4010 0x00820000 /* LSI R4010 */ +#define EF_MIPS_MACH_4100 0x00830000 /* NEC VR4100 */ +#define EF_MIPS_MACH_4650 0x00850000 /* MIPS R4650 */ +#define EF_MIPS_MACH_4120 0x00870000 /* NEC VR4120 */ +#define EF_MIPS_MACH_4111 0x00880000 /* NEC VR4111/VR4181 */ +#define EF_MIPS_MACH_SB1 0x008a0000 /* Broadcom SB-1 */ +#define EF_MIPS_MACH_OCTEON 0x008b0000 /* Cavium Networks Octeon */ +#define EF_MIPS_MACH_XLR 0x008c0000 /* RMI Xlr */ +#define EF_MIPS_MACH_OCTEON2 0x008d0000 /* Cavium Networks Octeon2 */ +#define EF_MIPS_MACH_OCTEON3 0x008e0000 /* Cavium Networks Octeon3 */ +#define EF_MIPS_MACH_5400 0x00910000 /* NEC VR5400 */ +#define EF_MIPS_MACH_5900 0x00920000 /* Toshiba/Sony R5900 */ +#define EF_MIPS_MACH_5500 0x00980000 /* NEC VR5500 */ +#define EF_MIPS_MACH_9000 0x00990000 /* PMC-Sierra RM9000 */ +#define EF_MIPS_MACH_LS2E 0x00a00000 /* ST Microelectronics Loongson 2E */ +#define EF_MIPS_MACH_LS2F 0x00a10000 /* ST Microelectronics Loongson 2F */ +#define EF_MIPS_MACH_LS3A 0x00a20000 /* ST Microelectronics Loongson 3A */ +#define EF_MIPS_MACH 0x00ff0000 /* EF_MIPS_MACH_xxx selection mask */ + +#define MIPS_ABI_FP_UNKNOWN (-1) /* Unknown FP ABI (internal) */ + +#define MIPS_ABI_FP_ANY 0x0 /* FP ABI doesn't matter */ +#define MIPS_ABI_FP_DOUBLE 0x1 /* -mdouble-float */ +#define MIPS_ABI_FP_SINGLE 0x2 /* -msingle-float */ +#define MIPS_ABI_FP_SOFT 0x3 /* -msoft-float */ +#define MIPS_ABI_FP_OLD_64 0x4 /* -mips32r2 -mfp64 */ +#define MIPS_ABI_FP_XX 0x5 /* -mfpxx */ +#define MIPS_ABI_FP_64 0x6 /* -mips32r2 -mfp64 */ +#define MIPS_ABI_FP_64A 0x7 /* -mips32r2 -mfp64 -mno-odd-spreg */ + +typedef struct mips_elf_abiflags_v0 { + uint16_t version; /* Version of flags structure */ + uint8_t isa_level; /* The level of the ISA: 1-5, 32, 64 */ + uint8_t isa_rev; /* The revision of ISA: */ + /* - 0 for MIPS V and below, */ + /* - 1-n otherwise. */ + uint8_t gpr_size; /* The size of general purpose registers */ + uint8_t cpr1_size; /* The size of co-processor 1 registers */ + uint8_t cpr2_size; /* The size of co-processor 2 registers */ + uint8_t fp_abi; /* The floating-point ABI */ + uint32_t isa_ext; /* Mask of processor-specific extensions */ + uint32_t ases; /* Mask of ASEs used */ + uint32_t flags1; /* Mask of general flags */ + uint32_t flags2; +} Mips_elf_abiflags_v0; + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* These constants define the various ELF target machines */ +#define EM_NONE 0 +#define EM_M32 1 +#define EM_SPARC 2 +#define EM_386 3 +#define EM_68K 4 +#define EM_88K 5 +#define EM_486 6 /* Perhaps disused */ +#define EM_860 7 + +#define EM_MIPS 8 /* MIPS R3000 (officially, big-endian only) */ + +#define EM_MIPS_RS4_BE 10 /* MIPS R4000 big-endian */ + +#define EM_PARISC 15 /* HPPA */ + +#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ + +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC64 */ + +#define EM_ARM 40 /* ARM */ + +#define EM_SH 42 /* SuperH */ + +#define EM_SPARCV9 43 /* SPARC v9 64-bit */ + +#define EM_TRICORE 44 /* Infineon TriCore */ + +#define EM_IA_64 50 /* HP/Intel IA-64 */ + +#define EM_X86_64 62 /* AMD x86-64 */ + +#define EM_S390 22 /* IBM S/390 */ + +#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ + +#define EM_AVR 83 /* AVR 8-bit microcontroller */ + +#define EM_V850 87 /* NEC v850 */ + +#define EM_H8_300H 47 /* Hitachi H8/300H */ +#define EM_H8S 48 /* Hitachi H8S */ +#define EM_LATTICEMICO32 138 /* LatticeMico32 */ + +#define EM_OPENRISC 92 /* OpenCores OpenRISC */ + +#define EM_UNICORE32 110 /* UniCore32 */ + +#define EM_RX 173 /* Renesas RX family */ + +#define EM_RISCV 243 /* RISC-V */ + +#define EM_NANOMIPS 249 /* Wave Computing nanoMIPS */ + +/* + * This is an interim value that we will use until the committee comes + * up with a final number. + */ +#define EM_ALPHA 0x9026 + +/* Bogus old v850 magic number, used by old tools. */ +#define EM_CYGNUS_V850 0x9080 + +/* + * This is the old interim value for S/390 architecture + */ +#define EM_S390_OLD 0xA390 + +#define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ + +#define EM_MICROBLAZE 189 +#define EM_MICROBLAZE_OLD 0xBAAB + +#define EM_XTENSA 94 /* Tensilica Xtensa */ + +#define EM_AARCH64 183 + +#define EM_TILEGX 191 /* TILE-Gx */ + +#define EM_MOXIE 223 /* Moxie processor family */ +#define EM_MOXIE_OLD 0xFEED + +#define EF_AVR_MACH 0x7F /* Mask for AVR e_flags to get core type */ + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_BINDNOW 24 +#define DT_INIT_ARRAY 25 +#define DT_FINI_ARRAY 26 +#define DT_INIT_ARRAYSZ 27 +#define DT_FINI_ARRAYSZ 28 +#define DT_RUNPATH 29 +#define DT_FLAGS 30 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff + +/* DT_ entries which fall between DT_VALRNGLO and DT_VALRNDHI use + the d_val field of the Elf*_Dyn structure. I.e. they contain scalars. */ +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff + +/* DT_ entries which fall between DT_ADDRRNGLO and DT_ADDRRNGHI use + the d_ptr field of the Elf*_Dyn structure. I.e. they contain pointers. */ +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_ADDRRNGHI 0x6ffffeff + +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff + +#define DT_MIPS_RLD_VERSION 0x70000001 +#define DT_MIPS_TIME_STAMP 0x70000002 +#define DT_MIPS_ICHECKSUM 0x70000003 +#define DT_MIPS_IVERSION 0x70000004 +#define DT_MIPS_FLAGS 0x70000005 + #define RHF_NONE 0 + #define RHF_HARDWAY 1 + #define RHF_NOTPOT 2 +#define DT_MIPS_BASE_ADDRESS 0x70000006 +#define DT_MIPS_CONFLICT 0x70000008 +#define DT_MIPS_LIBLIST 0x70000009 +#define DT_MIPS_LOCAL_GOTNO 0x7000000a +#define DT_MIPS_CONFLICTNO 0x7000000b +#define DT_MIPS_LIBLISTNO 0x70000010 +#define DT_MIPS_SYMTABNO 0x70000011 +#define DT_MIPS_UNREFEXTNO 0x70000012 +#define DT_MIPS_GOTSYM 0x70000013 +#define DT_MIPS_HIPAGENO 0x70000014 +#define DT_MIPS_RLD_MAP 0x70000016 + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF_ST_INFO(bind, type) (((bind) << 4) | ((type) & 0xf)) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +/* Symbolic values for the entries in the auxiliary table + put on the initial stack */ +#define AT_NULL 0 /* end of vector */ +#define AT_IGNORE 1 /* entry should be ignored */ +#define AT_EXECFD 2 /* file descriptor of program */ +#define AT_PHDR 3 /* program headers for program */ +#define AT_PHENT 4 /* size of program header entry */ +#define AT_PHNUM 5 /* number of program headers */ +#define AT_PAGESZ 6 /* system page size */ +#define AT_BASE 7 /* base address of interpreter */ +#define AT_FLAGS 8 /* flags */ +#define AT_ENTRY 9 /* entry point of program */ +#define AT_NOTELF 10 /* program is not ELF */ +#define AT_UID 11 /* real uid */ +#define AT_EUID 12 /* effective uid */ +#define AT_GID 13 /* real gid */ +#define AT_EGID 14 /* effective gid */ +#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_CLKTCK 17 /* frequency at which times() increments */ +#define AT_FPUCW 18 /* info about fpu initialization by kernel */ +#define AT_DCACHEBSIZE 19 /* data cache block size */ +#define AT_ICACHEBSIZE 20 /* instruction cache block size */ +#define AT_UCACHEBSIZE 21 /* unified cache block size */ +#define AT_IGNOREPPC 22 /* ppc only; entry should be ignored */ +#define AT_SECURE 23 /* boolean, was exec suid-like? */ +#define AT_BASE_PLATFORM 24 /* string identifying real platforms */ +#define AT_RANDOM 25 /* address of 16 random bytes */ +#define AT_HWCAP2 26 /* extension of AT_HWCAP */ +#define AT_EXECFN 31 /* filename of the executable */ +#define AT_SYSINFO 32 /* address of kernel entry point */ +#define AT_SYSINFO_EHDR 33 /* address of kernel vdso */ +#define AT_L1I_CACHESHAPE 34 /* shapes of the caches: */ +#define AT_L1D_CACHESHAPE 35 /* bits 0-3: cache associativity. */ +#define AT_L2_CACHESHAPE 36 /* bits 4-7: log2 of line size. */ +#define AT_L3_CACHESHAPE 37 /* val&~255: cache size. */ + +typedef struct dynamic{ + Elf32_Sword d_tag; + union{ + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) +#define ELF64_R_TYPE_DATA(i) (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000) + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 +/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */ +#define R_386_PC8 23 + +#define R_MIPS_NONE 0 +#define R_MIPS_16 1 +#define R_MIPS_32 2 +#define R_MIPS_REL32 3 +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 +#define R_MIPS_GPREL16 7 +#define R_MIPS_LITERAL 8 +#define R_MIPS_GOT16 9 +#define R_MIPS_PC16 10 +#define R_MIPS_CALL16 11 +#define R_MIPS_GPREL32 12 +/* The remaining relocs are defined on Irix, although they are not + in the MIPS ELF ABI. */ +#define R_MIPS_UNUSED1 13 +#define R_MIPS_UNUSED2 14 +#define R_MIPS_UNUSED3 15 +#define R_MIPS_SHIFT5 16 +#define R_MIPS_SHIFT6 17 +#define R_MIPS_64 18 +#define R_MIPS_GOT_DISP 19 +#define R_MIPS_GOT_PAGE 20 +#define R_MIPS_GOT_OFST 21 +/* + * The following two relocation types are specified in the MIPS ABI + * conformance guide version 1.2 but not yet in the psABI. + */ +#define R_MIPS_GOTHI16 22 +#define R_MIPS_GOTLO16 23 +#define R_MIPS_SUB 24 +#define R_MIPS_INSERT_A 25 +#define R_MIPS_INSERT_B 26 +#define R_MIPS_DELETE 27 +#define R_MIPS_HIGHER 28 +#define R_MIPS_HIGHEST 29 +/* + * The following two relocation types are specified in the MIPS ABI + * conformance guide version 1.2 but not yet in the psABI. + */ +#define R_MIPS_CALLHI16 30 +#define R_MIPS_CALLLO16 31 +/* + * This range is reserved for vendor specific relocations. + */ +#define R_MIPS_LOVENDOR 100 +#define R_MIPS_HIVENDOR 127 + + +/* SUN SPARC specific definitions. */ + +/* Values for Elf64_Ehdr.e_flags. */ + +#define EF_SPARCV9_MM 3 +#define EF_SPARCV9_TSO 0 +#define EF_SPARCV9_PSO 1 +#define EF_SPARCV9_RMO 2 +#define EF_SPARC_LEDATA 0x800000 /* little endian data */ +#define EF_SPARC_EXT_MASK 0xFFFF00 +#define EF_SPARC_32PLUS 0x000100 /* generic V8+ features */ +#define EF_SPARC_SUN_US1 0x000200 /* Sun UltraSPARC1 extensions */ +#define EF_SPARC_HAL_R1 0x000400 /* HAL R1 extensions */ +#define EF_SPARC_SUN_US3 0x000800 /* Sun UltraSPARCIII extensions */ + +/* + * Sparc ELF relocation types + */ +#define R_SPARC_NONE 0 +#define R_SPARC_8 1 +#define R_SPARC_16 2 +#define R_SPARC_32 3 +#define R_SPARC_DISP8 4 +#define R_SPARC_DISP16 5 +#define R_SPARC_DISP32 6 +#define R_SPARC_WDISP30 7 +#define R_SPARC_WDISP22 8 +#define R_SPARC_HI22 9 +#define R_SPARC_22 10 +#define R_SPARC_13 11 +#define R_SPARC_LO10 12 +#define R_SPARC_GOT10 13 +#define R_SPARC_GOT13 14 +#define R_SPARC_GOT22 15 +#define R_SPARC_PC10 16 +#define R_SPARC_PC22 17 +#define R_SPARC_WPLT30 18 +#define R_SPARC_COPY 19 +#define R_SPARC_GLOB_DAT 20 +#define R_SPARC_JMP_SLOT 21 +#define R_SPARC_RELATIVE 22 +#define R_SPARC_UA32 23 +#define R_SPARC_PLT32 24 +#define R_SPARC_HIPLT22 25 +#define R_SPARC_LOPLT10 26 +#define R_SPARC_PCPLT32 27 +#define R_SPARC_PCPLT22 28 +#define R_SPARC_PCPLT10 29 +#define R_SPARC_10 30 +#define R_SPARC_11 31 +#define R_SPARC_64 32 +#define R_SPARC_OLO10 33 +#define R_SPARC_HH22 34 +#define R_SPARC_HM10 35 +#define R_SPARC_LM22 36 +#define R_SPARC_WDISP16 40 +#define R_SPARC_WDISP19 41 +#define R_SPARC_7 43 +#define R_SPARC_5 44 +#define R_SPARC_6 45 + +/* Bits present in AT_HWCAP for ARM. */ + +#define HWCAP_ARM_SWP (1 << 0) +#define HWCAP_ARM_HALF (1 << 1) +#define HWCAP_ARM_THUMB (1 << 2) +#define HWCAP_ARM_26BIT (1 << 3) +#define HWCAP_ARM_FAST_MULT (1 << 4) +#define HWCAP_ARM_FPA (1 << 5) +#define HWCAP_ARM_VFP (1 << 6) +#define HWCAP_ARM_EDSP (1 << 7) +#define HWCAP_ARM_JAVA (1 << 8) +#define HWCAP_ARM_IWMMXT (1 << 9) +#define HWCAP_ARM_CRUNCH (1 << 10) +#define HWCAP_ARM_THUMBEE (1 << 11) +#define HWCAP_ARM_NEON (1 << 12) +#define HWCAP_ARM_VFPv3 (1 << 13) +#define HWCAP_ARM_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */ +#define HWCAP_ARM_TLS (1 << 15) +#define HWCAP_ARM_VFPv4 (1 << 16) +#define HWCAP_ARM_IDIVA (1 << 17) +#define HWCAP_ARM_IDIVT (1 << 18) +#define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) +#define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs */ +#define HWCAP_LPAE (1 << 20) + +/* Bits present in AT_HWCAP for PowerPC. */ + +#define PPC_FEATURE_32 0x80000000 +#define PPC_FEATURE_64 0x40000000 +#define PPC_FEATURE_601_INSTR 0x20000000 +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +#define PPC_FEATURE_HAS_FPU 0x08000000 +#define PPC_FEATURE_HAS_MMU 0x04000000 +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_HAS_SPE 0x00800000 +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 +#define PPC_FEATURE_POWER4 0x00080000 +#define PPC_FEATURE_POWER5 0x00040000 +#define PPC_FEATURE_POWER5_PLUS 0x00020000 +#define PPC_FEATURE_CELL 0x00010000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_POWER6_EXT 0x00000200 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 + +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT \ + 0x00000040 + +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* Bits present in AT_HWCAP2 for PowerPC. */ + +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HAS_HTM 0x40000000 +#define PPC_FEATURE2_HAS_DSCR 0x20000000 +#define PPC_FEATURE2_HAS_EBB 0x10000000 +#define PPC_FEATURE2_HAS_ISEL 0x08000000 +#define PPC_FEATURE2_HAS_TAR 0x04000000 +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_ARCH_3_10 0x00040000 + +/* Bits present in AT_HWCAP for Sparc. */ + +#define HWCAP_SPARC_FLUSH 0x00000001 +#define HWCAP_SPARC_STBAR 0x00000002 +#define HWCAP_SPARC_SWAP 0x00000004 +#define HWCAP_SPARC_MULDIV 0x00000008 +#define HWCAP_SPARC_V9 0x00000010 +#define HWCAP_SPARC_ULTRA3 0x00000020 +#define HWCAP_SPARC_BLKINIT 0x00000040 +#define HWCAP_SPARC_N2 0x00000080 +#define HWCAP_SPARC_MUL32 0x00000100 +#define HWCAP_SPARC_DIV32 0x00000200 +#define HWCAP_SPARC_FSMULD 0x00000400 +#define HWCAP_SPARC_V8PLUS 0x00000800 +#define HWCAP_SPARC_POPC 0x00001000 +#define HWCAP_SPARC_VIS 0x00002000 +#define HWCAP_SPARC_VIS2 0x00004000 +#define HWCAP_SPARC_ASI_BLK_INIT 0x00008000 +#define HWCAP_SPARC_FMAF 0x00010000 +#define HWCAP_SPARC_VIS3 0x00020000 +#define HWCAP_SPARC_HPC 0x00040000 +#define HWCAP_SPARC_RANDOM 0x00080000 +#define HWCAP_SPARC_TRANS 0x00100000 +#define HWCAP_SPARC_FJFMAU 0x00200000 +#define HWCAP_SPARC_IMA 0x00400000 +#define HWCAP_SPARC_ASI_CACHE_SPARING 0x00800000 +#define HWCAP_SPARC_PAUSE 0x01000000 +#define HWCAP_SPARC_CBCOND 0x02000000 +#define HWCAP_SPARC_CRYPTO 0x04000000 + +/* Bits present in AT_HWCAP for s390. */ + +#define HWCAP_S390_ESAN3 1 +#define HWCAP_S390_ZARCH 2 +#define HWCAP_S390_STFLE 4 +#define HWCAP_S390_MSA 8 +#define HWCAP_S390_LDISP 16 +#define HWCAP_S390_EIMM 32 +#define HWCAP_S390_DFP 64 +#define HWCAP_S390_HPAGE 128 +#define HWCAP_S390_ETF3EH 256 +#define HWCAP_S390_HIGH_GPRS 512 +#define HWCAP_S390_TE 1024 +#define HWCAP_S390_VXRS 2048 + +/* M68K specific definitions. */ +/* We use the top 24 bits to encode information about the + architecture variant. */ +#define EF_M68K_CPU32 0x00810000 +#define EF_M68K_M68000 0x01000000 +#define EF_M68K_CFV4E 0x00008000 +#define EF_M68K_FIDO 0x02000000 +#define EF_M68K_ARCH_MASK \ + (EF_M68K_M68000 | EF_M68K_CPU32 | EF_M68K_CFV4E | EF_M68K_FIDO) + +/* We use the bottom 8 bits to encode information about the + coldfire variant. If we use any of these bits, the top 24 bits are + either 0 or EF_M68K_CFV4E. */ +#define EF_M68K_CF_ISA_MASK 0x0F /* Which ISA */ +#define EF_M68K_CF_ISA_A_NODIV 0x01 /* ISA A except for div */ +#define EF_M68K_CF_ISA_A 0x02 +#define EF_M68K_CF_ISA_A_PLUS 0x03 +#define EF_M68K_CF_ISA_B_NOUSP 0x04 /* ISA_B except for USP */ +#define EF_M68K_CF_ISA_B 0x05 +#define EF_M68K_CF_ISA_C 0x06 +#define EF_M68K_CF_ISA_C_NODIV 0x07 /* ISA C except for div */ +#define EF_M68K_CF_MAC_MASK 0x30 +#define EF_M68K_CF_MAC 0x10 /* MAC */ +#define EF_M68K_CF_EMAC 0x20 /* EMAC */ +#define EF_M68K_CF_EMAC_B 0x30 /* EMAC_B */ +#define EF_M68K_CF_FLOAT 0x40 /* Has float insns */ +#define EF_M68K_CF_MASK 0xFF + +/* + * 68k ELF relocation types + */ +#define R_68K_NONE 0 +#define R_68K_32 1 +#define R_68K_16 2 +#define R_68K_8 3 +#define R_68K_PC32 4 +#define R_68K_PC16 5 +#define R_68K_PC8 6 +#define R_68K_GOT32 7 +#define R_68K_GOT16 8 +#define R_68K_GOT8 9 +#define R_68K_GOT32O 10 +#define R_68K_GOT16O 11 +#define R_68K_GOT8O 12 +#define R_68K_PLT32 13 +#define R_68K_PLT16 14 +#define R_68K_PLT8 15 +#define R_68K_PLT32O 16 +#define R_68K_PLT16O 17 +#define R_68K_PLT8O 18 +#define R_68K_COPY 19 +#define R_68K_GLOB_DAT 20 +#define R_68K_JMP_SLOT 21 +#define R_68K_RELATIVE 22 + +/* + * Alpha ELF relocation types + */ +#define R_ALPHA_NONE 0 /* No reloc */ +#define R_ALPHA_REFLONG 1 /* Direct 32 bit */ +#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ +#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ +#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ +#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ +#define R_ALPHA_GPDISP 6 /* Add displacement to GP */ +#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ +#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ +#define R_ALPHA_SREL16 9 /* PC relative 16 bit */ +#define R_ALPHA_SREL32 10 /* PC relative 32 bit */ +#define R_ALPHA_SREL64 11 /* PC relative 64 bit */ +#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ +#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ +#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ +#define R_ALPHA_COPY 24 /* Copy symbol at runtime */ +#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ +#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ +#define R_ALPHA_RELATIVE 27 /* Adjust by program base */ +#define R_ALPHA_BRSGP 28 +#define R_ALPHA_TLSGD 29 +#define R_ALPHA_TLS_LDM 30 +#define R_ALPHA_DTPMOD64 31 +#define R_ALPHA_GOTDTPREL 32 +#define R_ALPHA_DTPREL64 33 +#define R_ALPHA_DTPRELHI 34 +#define R_ALPHA_DTPRELLO 35 +#define R_ALPHA_DTPREL16 36 +#define R_ALPHA_GOTTPREL 37 +#define R_ALPHA_TPREL64 38 +#define R_ALPHA_TPRELHI 39 +#define R_ALPHA_TPRELLO 40 +#define R_ALPHA_TPREL16 41 + +#define SHF_ALPHA_GPREL 0x10000000 + + +/* PowerPC specific definitions. */ + +/* Processor specific flags for the ELF header e_flags field. */ +#define EF_PPC64_ABI 0x3 + +/* PowerPC relocations defined by the ABIs */ +#define R_PPC_NONE 0 +#define R_PPC_ADDR32 1 /* 32bit absolute address */ +#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */ +#define R_PPC_ADDR16 3 /* 16bit absolute address */ +#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */ +#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */ +#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */ +#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */ +#define R_PPC_ADDR14_BRTAKEN 8 +#define R_PPC_ADDR14_BRNTAKEN 9 +#define R_PPC_REL24 10 /* PC relative 26 bit */ +#define R_PPC_REL14 11 /* PC relative 16 bit */ +#define R_PPC_REL14_BRTAKEN 12 +#define R_PPC_REL14_BRNTAKEN 13 +#define R_PPC_GOT16 14 +#define R_PPC_GOT16_LO 15 +#define R_PPC_GOT16_HI 16 +#define R_PPC_GOT16_HA 17 +#define R_PPC_PLTREL24 18 +#define R_PPC_COPY 19 +#define R_PPC_GLOB_DAT 20 +#define R_PPC_JMP_SLOT 21 +#define R_PPC_RELATIVE 22 +#define R_PPC_LOCAL24PC 23 +#define R_PPC_UADDR32 24 +#define R_PPC_UADDR16 25 +#define R_PPC_REL32 26 +#define R_PPC_PLT32 27 +#define R_PPC_PLTREL32 28 +#define R_PPC_PLT16_LO 29 +#define R_PPC_PLT16_HI 30 +#define R_PPC_PLT16_HA 31 +#define R_PPC_SDAREL16 32 +#define R_PPC_SECTOFF 33 +#define R_PPC_SECTOFF_LO 34 +#define R_PPC_SECTOFF_HI 35 +#define R_PPC_SECTOFF_HA 36 +/* Keep this the last entry. */ +#ifndef R_PPC_NUM +#define R_PPC_NUM 37 +#endif + +/* ARM specific declarations */ + +/* Processor specific flags for the ELF header e_flags field. */ +#define EF_ARM_RELEXEC 0x01 +#define EF_ARM_HASENTRY 0x02 +#define EF_ARM_INTERWORK 0x04 +#define EF_ARM_APCS_26 0x08 +#define EF_ARM_APCS_FLOAT 0x10 +#define EF_ARM_PIC 0x20 +#define EF_ALIGN8 0x40 /* 8-bit structure alignment is in use */ +#define EF_NEW_ABI 0x80 +#define EF_OLD_ABI 0x100 +#define EF_ARM_SOFT_FLOAT 0x200 +#define EF_ARM_VFP_FLOAT 0x400 +#define EF_ARM_MAVERICK_FLOAT 0x800 + +/* Other constants defined in the ARM ELF spec. version B-01. */ +#define EF_ARM_SYMSARESORTED 0x04 /* NB conflicts with EF_INTERWORK */ +#define EF_ARM_DYNSYMSUSESEGIDX 0x08 /* NB conflicts with EF_APCS26 */ +#define EF_ARM_MAPSYMSFIRST 0x10 /* NB conflicts with EF_APCS_FLOAT */ +#define EF_ARM_EABIMASK 0xFF000000 + +/* Constants defined in AAELF. */ +#define EF_ARM_BE8 0x00800000 +#define EF_ARM_LE8 0x00400000 + +#define EF_ARM_EABI_VERSION(flags) ((flags) & EF_ARM_EABIMASK) +#define EF_ARM_EABI_UNKNOWN 0x00000000 +#define EF_ARM_EABI_VER1 0x01000000 +#define EF_ARM_EABI_VER2 0x02000000 +#define EF_ARM_EABI_VER3 0x03000000 +#define EF_ARM_EABI_VER4 0x04000000 +#define EF_ARM_EABI_VER5 0x05000000 + +/* Additional symbol types for Thumb */ +#define STT_ARM_TFUNC 0xd + +/* ARM-specific values for sh_flags */ +#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */ +#define SHF_ARM_COMDEF 0x80000000 /* Section may be multiply defined + in the input to a link step */ + +/* ARM-specific program header flags */ +#define PF_ARM_SB 0x10000000 /* Segment contains the location + addressed by the static base */ + +/* ARM relocs. */ +#define R_ARM_NONE 0 /* No reloc */ +#define R_ARM_PC24 1 /* PC relative 26 bit branch */ +#define R_ARM_ABS32 2 /* Direct 32 bit */ +#define R_ARM_REL32 3 /* PC relative 32 bit */ +#define R_ARM_PC13 4 +#define R_ARM_ABS16 5 /* Direct 16 bit */ +#define R_ARM_ABS12 6 /* Direct 12 bit */ +#define R_ARM_THM_ABS5 7 +#define R_ARM_ABS8 8 /* Direct 8 bit */ +#define R_ARM_SBREL32 9 +#define R_ARM_THM_PC22 10 +#define R_ARM_THM_PC8 11 +#define R_ARM_AMP_VCALL9 12 +#define R_ARM_SWI24 13 +#define R_ARM_THM_SWI8 14 +#define R_ARM_XPC25 15 +#define R_ARM_THM_XPC22 16 +#define R_ARM_COPY 20 /* Copy symbol at runtime */ +#define R_ARM_GLOB_DAT 21 /* Create GOT entry */ +#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */ +#define R_ARM_RELATIVE 23 /* Adjust by program base */ +#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */ +#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ +#define R_ARM_GOT32 26 /* 32 bit GOT entry */ +#define R_ARM_PLT32 27 /* 32 bit PLT address */ +#define R_ARM_CALL 28 +#define R_ARM_JUMP24 29 +#define R_ARM_GNU_VTENTRY 100 +#define R_ARM_GNU_VTINHERIT 101 +#define R_ARM_THM_PC11 102 /* thumb unconditional branch */ +#define R_ARM_THM_PC9 103 /* thumb conditional branch */ +#define R_ARM_RXPC25 249 +#define R_ARM_RSBREL32 250 +#define R_ARM_THM_RPC22 251 +#define R_ARM_RREL32 252 +#define R_ARM_RABS22 253 +#define R_ARM_RPC24 254 +#define R_ARM_RBASE 255 +/* Keep this the last entry. */ +#define R_ARM_NUM 256 + +/* ARM Aarch64 relocation types */ +#define R_AARCH64_NONE 256 /* also accepts R_ARM_NONE (0) */ +/* static data relocations */ +#define R_AARCH64_ABS64 257 +#define R_AARCH64_ABS32 258 +#define R_AARCH64_ABS16 259 +#define R_AARCH64_PREL64 260 +#define R_AARCH64_PREL32 261 +#define R_AARCH64_PREL16 262 +/* static aarch64 group relocations */ +/* group relocs to create unsigned data value or address inline */ +#define R_AARCH64_MOVW_UABS_G0 263 +#define R_AARCH64_MOVW_UABS_G0_NC 264 +#define R_AARCH64_MOVW_UABS_G1 265 +#define R_AARCH64_MOVW_UABS_G1_NC 266 +#define R_AARCH64_MOVW_UABS_G2 267 +#define R_AARCH64_MOVW_UABS_G2_NC 268 +#define R_AARCH64_MOVW_UABS_G3 269 +/* group relocs to create signed data or offset value inline */ +#define R_AARCH64_MOVW_SABS_G0 270 +#define R_AARCH64_MOVW_SABS_G1 271 +#define R_AARCH64_MOVW_SABS_G2 272 +/* relocs to generate 19, 21, and 33 bit PC-relative addresses */ +#define R_AARCH64_LD_PREL_LO19 273 +#define R_AARCH64_ADR_PREL_LO21 274 +#define R_AARCH64_ADR_PREL_PG_HI21 275 +#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 +#define R_AARCH64_ADD_ABS_LO12_NC 277 +#define R_AARCH64_LDST8_ABS_LO12_NC 278 +#define R_AARCH64_LDST16_ABS_LO12_NC 284 +#define R_AARCH64_LDST32_ABS_LO12_NC 285 +#define R_AARCH64_LDST64_ABS_LO12_NC 286 +#define R_AARCH64_LDST128_ABS_LO12_NC 299 +/* relocs for control-flow - all offsets as multiple of 4 */ +#define R_AARCH64_TSTBR14 279 +#define R_AARCH64_CONDBR19 280 +#define R_AARCH64_JUMP26 282 +#define R_AARCH64_CALL26 283 +/* group relocs to create pc-relative offset inline */ +#define R_AARCH64_MOVW_PREL_G0 287 +#define R_AARCH64_MOVW_PREL_G0_NC 288 +#define R_AARCH64_MOVW_PREL_G1 289 +#define R_AARCH64_MOVW_PREL_G1_NC 290 +#define R_AARCH64_MOVW_PREL_G2 291 +#define R_AARCH64_MOVW_PREL_G2_NC 292 +#define R_AARCH64_MOVW_PREL_G3 293 +/* group relocs to create a GOT-relative offset inline */ +#define R_AARCH64_MOVW_GOTOFF_G0 300 +#define R_AARCH64_MOVW_GOTOFF_G0_NC 301 +#define R_AARCH64_MOVW_GOTOFF_G1 302 +#define R_AARCH64_MOVW_GOTOFF_G1_NC 303 +#define R_AARCH64_MOVW_GOTOFF_G2 304 +#define R_AARCH64_MOVW_GOTOFF_G2_NC 305 +#define R_AARCH64_MOVW_GOTOFF_G3 306 +/* GOT-relative data relocs */ +#define R_AARCH64_GOTREL64 307 +#define R_AARCH64_GOTREL32 308 +/* GOT-relative instr relocs */ +#define R_AARCH64_GOT_LD_PREL19 309 +#define R_AARCH64_LD64_GOTOFF_LO15 310 +#define R_AARCH64_ADR_GOT_PAGE 311 +#define R_AARCH64_LD64_GOT_LO12_NC 312 +#define R_AARCH64_LD64_GOTPAGE_LO15 313 +/* General Dynamic TLS relocations */ +#define R_AARCH64_TLSGD_ADR_PREL21 512 +#define R_AARCH64_TLSGD_ADR_PAGE21 513 +#define R_AARCH64_TLSGD_ADD_LO12_NC 514 +#define R_AARCH64_TLSGD_MOVW_G1 515 +#define R_AARCH64_TLSGD_MOVW_G0_NC 516 +/* Local Dynamic TLS relocations */ +#define R_AARCH64_TLSLD_ADR_PREL21 517 +#define R_AARCH64_TLSLD_ADR_PAGE21 518 +#define R_AARCH64_TLSLD_ADD_LO12_NC 519 +#define R_AARCH64_TLSLD_MOVW_G1 520 +#define R_AARCH64_TLSLD_MOVW_G0_NC 521 +#define R_AARCH64_TLSLD_LD_PREL19 522 +#define R_AARCH64_TLSLD_MOVW_DTPREL_G2 523 +#define R_AARCH64_TLSLD_MOVW_DTPREL_G1 524 +#define R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC 525 +#define R_AARCH64_TLSLD_MOVW_DTPREL_G0 526 +#define R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC 527 +#define R_AARCH64_TLSLD_ADD_DTPREL_HI12 528 +#define R_AARCH64_TLSLD_ADD_DTPREL_LO12 529 +#define R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC 530 +#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12 531 +#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC 532 +#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12 533 +#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC 534 +#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12 535 +#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC 536 +#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12 537 +#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC 538 +/* initial exec TLS relocations */ +#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 539 +#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC 540 +#define R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 541 +#define R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC 542 +#define R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 543 +/* local exec TLS relocations */ +#define R_AARCH64_TLSLE_MOVW_TPREL_G2 544 +#define R_AARCH64_TLSLE_MOVW_TPREL_G1 545 +#define R_AARCH64_TLSLE_MOVW_TPREL_G1_NC 546 +#define R_AARCH64_TLSLE_MOVW_TPREL_G0 547 +#define R_AARCH64_TLSLE_MOVW_TPREL_G0_NC 548 +#define R_AARCH64_TLSLE_ADD_TPREL_HI12 549 +#define R_AARCH64_TLSLE_ADD_TPREL_LO12 550 +#define R_AARCH64_TLSLE_ADD_TPREL_LO12_NC 551 +#define R_AARCH64_TLSLE_LDST8_TPREL_LO12 552 +#define R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC 553 +#define R_AARCH64_TLSLE_LDST16_TPREL_LO12 554 +#define R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC 555 +#define R_AARCH64_TLSLE_LDST32_TPREL_LO12 556 +#define R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC 557 +#define R_AARCH64_TLSLE_LDST64_TPREL_LO12 558 +#define R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC 559 +/* Dynamic Relocations */ +#define R_AARCH64_COPY 1024 +#define R_AARCH64_GLOB_DAT 1025 +#define R_AARCH64_JUMP_SLOT 1026 +#define R_AARCH64_RELATIVE 1027 +#define R_AARCH64_TLS_DTPREL64 1028 +#define R_AARCH64_TLS_DTPMOD64 1029 +#define R_AARCH64_TLS_TPREL64 1030 +#define R_AARCH64_TLS_DTPREL32 1031 +#define R_AARCH64_TLS_DTPMOD32 1032 +#define R_AARCH64_TLS_TPREL32 1033 + +/* s390 relocations defined by the ABIs */ +#define R_390_NONE 0 /* No reloc. */ +#define R_390_8 1 /* Direct 8 bit. */ +#define R_390_12 2 /* Direct 12 bit. */ +#define R_390_16 3 /* Direct 16 bit. */ +#define R_390_32 4 /* Direct 32 bit. */ +#define R_390_PC32 5 /* PC relative 32 bit. */ +#define R_390_GOT12 6 /* 12 bit GOT offset. */ +#define R_390_GOT32 7 /* 32 bit GOT offset. */ +#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ +#define R_390_COPY 9 /* Copy symbol at runtime. */ +#define R_390_GLOB_DAT 10 /* Create GOT entry. */ +#define R_390_JMP_SLOT 11 /* Create PLT entry. */ +#define R_390_RELATIVE 12 /* Adjust by program base. */ +#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ +#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */ +#define R_390_GOT16 15 /* 16 bit GOT offset. */ +#define R_390_PC16 16 /* PC relative 16 bit. */ +#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ +#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ +#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ +#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ +#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ +#define R_390_64 22 /* Direct 64 bit. */ +#define R_390_PC64 23 /* PC relative 64 bit. */ +#define R_390_GOT64 24 /* 64 bit GOT offset. */ +#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ +#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ +#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ +#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ +#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ +#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ +#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ +#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ +#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ +#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ +#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ +#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ +#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ +#define R_390_TLS_GDCALL 38 /* Tag for function call in general + dynamic TLS code. */ +#define R_390_TLS_LDCALL 39 /* Tag for function call in local + dynamic TLS code. */ +#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic + thread local data. */ +#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic + thread local data. */ +#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS + block. */ +#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS + block. */ +#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ +#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ +#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS + block. */ +#define R_390_20 57 +/* Keep this the last entry. */ +#define R_390_NUM 58 + +/* x86-64 relocation types */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +/* Legal values for e_flags field of Elf64_Ehdr. */ + +#define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */ + +/* HPPA specific definitions. */ + +/* Legal values for e_flags field of Elf32_Ehdr. */ + +#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ +#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ +#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ +#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ +#define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch + prediction. */ +#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ +#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ + +/* Defined values for `e_flags & EF_PARISC_ARCH' are: */ + +#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ +#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ +#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ + +/* Additional section indeces. */ + +#define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tenatively declared + symbols in ANSI C. */ +#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ + +/* Legal values for sh_type field of Elf32_Shdr. */ + +#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ +#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ +#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ + +/* Legal values for sh_flags field of Elf32_Shdr. */ + +#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ +#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ +#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ + +#define STT_HP_OPAQUE (STT_LOOS + 0x1) +#define STT_HP_STUB (STT_LOOS + 0x2) + +/* HPPA relocs. */ + +#define R_PARISC_NONE 0 /* No reloc. */ +#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ +#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ +#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ +#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ +#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ +#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ +#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ +#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ +#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ +#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ +#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ +#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ +#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ +#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */ +#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */ +#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */ +#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */ +#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */ +#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */ +#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */ +#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */ +#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */ +#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */ +#define R_PARISC_FPTR64 64 /* 64 bits function address. */ +#define R_PARISC_PLABEL32 65 /* 32 bits function address. */ +#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */ +#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */ +#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */ +#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */ +#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */ +#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */ +#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */ +#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */ +#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */ +#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */ +#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */ +#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */ +#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */ +#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */ +#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */ +#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */ +#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */ +#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */ +#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LORESERVE 128 +#define R_PARISC_COPY 128 /* Copy relocation. */ +#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */ +#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */ +#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */ +#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */ +#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */ +#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */ +#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */ +#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */ +#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_HIRESERVE 255 + +/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ + +#define PT_HP_TLS (PT_LOOS + 0x0) +#define PT_HP_CORE_NONE (PT_LOOS + 0x1) +#define PT_HP_CORE_VERSION (PT_LOOS + 0x2) +#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3) +#define PT_HP_CORE_COMM (PT_LOOS + 0x4) +#define PT_HP_CORE_PROC (PT_LOOS + 0x5) +#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6) +#define PT_HP_CORE_STACK (PT_LOOS + 0x7) +#define PT_HP_CORE_SHM (PT_LOOS + 0x8) +#define PT_HP_CORE_MMF (PT_LOOS + 0x9) +#define PT_HP_PARALLEL (PT_LOOS + 0x10) +#define PT_HP_FASTBIND (PT_LOOS + 0x11) +#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12) +#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13) +#define PT_HP_STACK (PT_LOOS + 0x14) + +#define PT_PARISC_ARCHEXT 0x70000000 +#define PT_PARISC_UNWIND 0x70000001 + +/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */ + +#define PF_PARISC_SBP 0x08000000 + +#define PF_HP_PAGE_SIZE 0x00100000 +#define PF_HP_FAR_SHARED 0x00200000 +#define PF_HP_NEAR_SHARED 0x00400000 +#define PF_HP_CODE 0x01000000 +#define PF_HP_MODIFY 0x02000000 +#define PF_HP_LAZYSWAP 0x04000000 +#define PF_HP_SBP 0x08000000 + +/* IA-64 specific declarations. */ + +/* Processor specific flags for the Ehdr e_flags field. */ +#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */ +#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */ +#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */ + +/* Processor specific values for the Phdr p_type field. */ +#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */ +#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */ + +/* Processor specific flags for the Phdr p_flags field. */ +#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Shdr sh_type field. */ +#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */ +#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */ + +/* Processor specific flags for the Shdr sh_flags field. */ +#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ +#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Dyn d_tag field. */ +#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0) +#define DT_IA_64_NUM 1 + +/* IA-64 relocations. */ +#define R_IA64_NONE 0x00 /* none */ +#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ +#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ +#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ +#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ +#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ +#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ +#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ +#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */ +#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */ +#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */ +#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */ +#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */ +#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */ +#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */ +#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */ +#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */ +#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */ +#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */ +#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */ +#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */ +#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */ +#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */ +#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */ +#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */ +#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */ +#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */ +#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */ +#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */ +#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ +#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ +#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */ +#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */ +#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */ +#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */ +#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */ +#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */ +#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */ +#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */ +#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */ +#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */ +#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */ +#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */ +#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ +#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ +#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ +#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ +#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ +#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ +#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ +#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ +#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */ +#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */ +#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */ +#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ +#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ +#define R_IA64_COPY 0x84 /* copy relocation */ +#define R_IA64_SUB 0x85 /* Addend and symbol difference */ +#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ +#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ +#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */ +#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */ +#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */ +#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */ +#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */ +#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */ +#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */ +#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */ +#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */ +#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */ +#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */ +#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */ +#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */ +#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ + +/* RISC-V relocations. */ +#define R_RISCV_NONE 0 +#define R_RISCV_32 1 +#define R_RISCV_64 2 +#define R_RISCV_RELATIVE 3 +#define R_RISCV_COPY 4 +#define R_RISCV_JUMP_SLOT 5 +#define R_RISCV_TLS_DTPMOD32 6 +#define R_RISCV_TLS_DTPMOD64 7 +#define R_RISCV_TLS_DTPREL32 8 +#define R_RISCV_TLS_DTPREL64 9 +#define R_RISCV_TLS_TPREL32 10 +#define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_BRANCH 16 +#define R_RISCV_JAL 17 +#define R_RISCV_CALL 18 +#define R_RISCV_CALL_PLT 19 +#define R_RISCV_GOT_HI20 20 +#define R_RISCV_TLS_GOT_HI20 21 +#define R_RISCV_TLS_GD_HI20 22 +#define R_RISCV_PCREL_HI20 23 +#define R_RISCV_PCREL_LO12_I 24 +#define R_RISCV_PCREL_LO12_S 25 +#define R_RISCV_HI20 26 +#define R_RISCV_LO12_I 27 +#define R_RISCV_LO12_S 28 +#define R_RISCV_TPREL_HI20 29 +#define R_RISCV_TPREL_LO12_I 30 +#define R_RISCV_TPREL_LO12_S 31 +#define R_RISCV_TPREL_ADD 32 +#define R_RISCV_ADD8 33 +#define R_RISCV_ADD16 34 +#define R_RISCV_ADD32 35 +#define R_RISCV_ADD64 36 +#define R_RISCV_SUB8 37 +#define R_RISCV_SUB16 38 +#define R_RISCV_SUB32 39 +#define R_RISCV_SUB64 40 +#define R_RISCV_GNU_VTINHERIT 41 +#define R_RISCV_GNU_VTENTRY 42 +#define R_RISCV_ALIGN 43 +#define R_RISCV_RVC_BRANCH 44 +#define R_RISCV_RVC_JUMP 45 +#define R_RISCV_RVC_LUI 46 +#define R_RISCV_GPREL_I 47 +#define R_RISCV_GPREL_S 48 +#define R_RISCV_TPREL_I 49 +#define R_RISCV_TPREL_S 50 +#define R_RISCV_RELAX 51 +#define R_RISCV_SUB6 52 +#define R_RISCV_SET6 53 +#define R_RISCV_SET8 54 +#define R_RISCV_SET16 55 +#define R_RISCV_SET32 56 + +/* RISC-V ELF Flags. */ +#define EF_RISCV_RVC 0x0001 +#define EF_RISCV_FLOAT_ABI 0x0006 +#define EF_RISCV_FLOAT_ABI_SOFT 0x0000 +#define EF_RISCV_FLOAT_ABI_SINGLE 0x0002 +#define EF_RISCV_FLOAT_ABI_DOUBLE 0x0004 +#define EF_RISCV_FLOAT_ABI_QUAD 0x0006 +#define EF_RISCV_RVE 0x0008 +#define EF_RISCV_TSO 0x0010 + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela{ + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym{ + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +/* Special value for e_phnum. This indicates that the real number of + program headers is too large to fit into e_phnum. Instead the real + value is in the field sh_info of section 0. */ +#define PN_XNUM 0xffff + +typedef struct elf32_hdr{ + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[16]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr{ + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff +#define SHT_MIPS_LIST 0x70000000 +#define SHT_MIPS_CONFLICT 0x70000002 +#define SHT_MIPS_GPTAB 0x70000003 +#define SHT_MIPS_UCODE 0x70000004 + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_MASKPROC 0xf0000000 +#define SHF_MIPS_GPREL 0x10000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff +#define SHN_MIPS_ACCOMON 0xff00 + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFOSABI_NONE 0 /* UNIX System V ABI */ +#define ELFOSABI_SYSV 0 /* Alias. */ +#define ELFOSABI_HPUX 1 /* HP-UX */ +#define ELFOSABI_NETBSD 2 /* NetBSD. */ +#define ELFOSABI_LINUX 3 /* Linux. */ +#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */ +#define ELFOSABI_AIX 7 /* IBM AIX. */ +#define ELFOSABI_IRIX 8 /* SGI Irix. */ +#define ELFOSABI_FREEBSD 9 /* FreeBSD. */ +#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */ +#define ELFOSABI_MODESTO 11 /* Novell Modesto. */ +#define ELFOSABI_OPENBSD 12 /* OpenBSD. */ +#define ELFOSABI_ARM_FDPIC 65 /* ARM FDPIC */ +#define ELFOSABI_ARM 97 /* ARM */ +#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +/* Notes used in ET_CORE */ +#define NT_PRSTATUS 1 +#define NT_FPREGSET 2 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ +#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ + +/* Defined note types for GNU systems. */ + +#define NT_GNU_PROPERTY_TYPE_0 5 /* Program property */ + +/* Values used in GNU .note.gnu.property notes (NT_GNU_PROPERTY_TYPE_0). */ + +#define GNU_PROPERTY_STACK_SIZE 1 +#define GNU_PROPERTY_NO_COPY_ON_PROTECTED 2 + +#define GNU_PROPERTY_LOPROC 0xc0000000 +#define GNU_PROPERTY_HIPROC 0xdfffffff +#define GNU_PROPERTY_LOUSER 0xe0000000 +#define GNU_PROPERTY_HIUSER 0xffffffff + +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1u << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1u << 1) + +/* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel, use this entry point to launch the guest in 32-bit + * protected mode with paging disabled. + * + * [ Corresponding definition in Linux kernel: include/xen/interface/elfnote.h ] + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 /* 0x12 */ + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + + +/* This data structure represents a PT_LOAD segment. */ +struct elf32_fdpic_loadseg { + /* Core address to which the segment is mapped. */ + Elf32_Addr addr; + /* VMA recorded in the program header. */ + Elf32_Addr p_vaddr; + /* Size of this segment in memory. */ + Elf32_Word p_memsz; +}; +struct elf32_fdpic_loadmap { + /* Protocol version number, must be zero. */ + Elf32_Half version; + /* Number of segments in this map. */ + Elf32_Half nsegs; + /* The actual memory map. */ + struct elf32_fdpic_loadseg segs[/*nsegs*/]; +}; + +#ifdef ELF_CLASS +#if ELF_CLASS == ELFCLASS32 + +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_shdr elf32_shdr +#define elf_sym elf32_sym +#define elf_addr_t Elf32_Off +#define elf_rela elf32_rela + +#ifdef ELF_USES_RELOCA +# define ELF_RELOC Elf32_Rela +#else +# define ELF_RELOC Elf32_Rel +#endif + +#else + +#define elfhdr elf64_hdr +#define elf_phdr elf64_phdr +#define elf_note elf64_note +#define elf_shdr elf64_shdr +#define elf_sym elf64_sym +#define elf_addr_t Elf64_Off +#define elf_rela elf64_rela + +#ifdef ELF_USES_RELOCA +# define ELF_RELOC Elf64_Rela +#else +# define ELF_RELOC Elf64_Rel +#endif + +#endif /* ELF_CLASS */ + +#ifndef ElfW +# if ELF_CLASS == ELFCLASS32 +# define ElfW(x) Elf32_ ## x +# define ELFW(x) ELF32_ ## x +# else +# define ElfW(x) Elf64_ ## x +# define ELFW(x) ELF64_ ## x +# endif +#endif + +#endif /* ELF_CLASS */ + + +#endif /* QEMU_ELF_H */ diff --git a/include/exec/address-spaces.h b/include/exec/address-spaces.h new file mode 100644 index 000000000..db8bfa9a9 --- /dev/null +++ b/include/exec/address-spaces.h @@ -0,0 +1,41 @@ +/* + * Internal memory management interfaces + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef EXEC_ADDRESS_SPACES_H +#define EXEC_ADDRESS_SPACES_H + +/* + * Internal interfaces between memory.c/exec.c/vl.c. Do not #include unless + * you're one of them. + */ + +#include "exec/memory.h" + +#ifndef CONFIG_USER_ONLY + +/* Get the root memory region. This interface should only be used temporarily + * until a proper bus interface is available. + */ +MemoryRegion *get_system_memory(void); + +/* Get the root I/O port region. This interface should only be used + * temporarily until a proper bus interface is available. + */ +MemoryRegion *get_system_io(void); + +extern AddressSpace address_space_memory; +extern AddressSpace address_space_io; + +#endif + +#endif diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h new file mode 100644 index 000000000..4b5408c34 --- /dev/null +++ b/include/exec/cpu-all.h @@ -0,0 +1,495 @@ +/* + * defines common to all virtual CPUs + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#ifndef CPU_ALL_H +#define CPU_ALL_H + +#include "exec/cpu-common.h" +#include "exec/memory.h" +#include "qemu/thread.h" +#include "hw/core/cpu.h" +#include "qemu/rcu.h" + +#define EXCP_INTERRUPT 0x10000 /* async interruption */ +#define EXCP_HLT 0x10001 /* hlt instruction reached */ +#define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */ +#define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */ +#define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */ +#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */ + +/* some important defines: + * + * HOST_WORDS_BIGENDIAN : if defined, the host cpu is big endian and + * otherwise little endian. + * + * TARGET_WORDS_BIGENDIAN : same for target cpu + */ + +#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN) +#define BSWAP_NEEDED +#endif + +#ifdef BSWAP_NEEDED + +static inline uint16_t tswap16(uint16_t s) +{ + return bswap16(s); +} + +static inline uint32_t tswap32(uint32_t s) +{ + return bswap32(s); +} + +static inline uint64_t tswap64(uint64_t s) +{ + return bswap64(s); +} + +static inline void tswap16s(uint16_t *s) +{ + *s = bswap16(*s); +} + +static inline void tswap32s(uint32_t *s) +{ + *s = bswap32(*s); +} + +static inline void tswap64s(uint64_t *s) +{ + *s = bswap64(*s); +} + +#else + +static inline uint16_t tswap16(uint16_t s) +{ + return s; +} + +static inline uint32_t tswap32(uint32_t s) +{ + return s; +} + +static inline uint64_t tswap64(uint64_t s) +{ + return s; +} + +static inline void tswap16s(uint16_t *s) +{ +} + +static inline void tswap32s(uint32_t *s) +{ +} + +static inline void tswap64s(uint64_t *s) +{ +} + +#endif + +#if TARGET_LONG_SIZE == 4 +#define tswapl(s) tswap32(s) +#define tswapls(s) tswap32s((uint32_t *)(s)) +#define bswaptls(s) bswap32s(s) +#else +#define tswapl(s) tswap64(s) +#define tswapls(s) tswap64s((uint64_t *)(s)) +#define bswaptls(s) bswap64s(s) +#endif + +/* Target-endianness CPU memory access functions. These fit into the + * {ld,st}{type}{sign}{size}{endian}_p naming scheme described in bswap.h. + */ +#if defined(TARGET_WORDS_BIGENDIAN) +#define lduw_p(p) lduw_be_p(p) +#define ldsw_p(p) ldsw_be_p(p) +#define ldl_p(p) ldl_be_p(p) +#define ldq_p(p) ldq_be_p(p) +#define ldfl_p(p) ldfl_be_p(p) +#define ldfq_p(p) ldfq_be_p(p) +#define stw_p(p, v) stw_be_p(p, v) +#define stl_p(p, v) stl_be_p(p, v) +#define stq_p(p, v) stq_be_p(p, v) +#define stfl_p(p, v) stfl_be_p(p, v) +#define stfq_p(p, v) stfq_be_p(p, v) +#define ldn_p(p, sz) ldn_be_p(p, sz) +#define stn_p(p, sz, v) stn_be_p(p, sz, v) +#else +#define lduw_p(p) lduw_le_p(p) +#define ldsw_p(p) ldsw_le_p(p) +#define ldl_p(p) ldl_le_p(p) +#define ldq_p(p) ldq_le_p(p) +#define ldfl_p(p) ldfl_le_p(p) +#define ldfq_p(p) ldfq_le_p(p) +#define stw_p(p, v) stw_le_p(p, v) +#define stl_p(p, v) stl_le_p(p, v) +#define stq_p(p, v) stq_le_p(p, v) +#define stfl_p(p, v) stfl_le_p(p, v) +#define stfq_p(p, v) stfq_le_p(p, v) +#define ldn_p(p, sz) ldn_le_p(p, sz) +#define stn_p(p, sz, v) stn_le_p(p, sz, v) +#endif + +/* MMU memory access macros */ + +#if defined(CONFIG_USER_ONLY) +#include "exec/user/abitypes.h" + +/* On some host systems the guest address space is reserved on the host. + * This allows the guest address space to be offset to a convenient location. + */ +extern unsigned long guest_base; +extern bool have_guest_base; +extern unsigned long reserved_va; + +/* + * Limit the guest addresses as best we can. + * + * When not using -R reserved_va, we cannot really limit the guest + * to less address space than the host. For 32-bit guests, this + * acts as a sanity check that we're not giving the guest an address + * that it cannot even represent. For 64-bit guests... the address + * might not be what the real kernel would give, but it is at least + * representable in the guest. + * + * TODO: Improve address allocation to avoid this problem, and to + * avoid setting bits at the top of guest addresses that might need + * to be used for tags. + */ +#define GUEST_ADDR_MAX_ \ + ((MIN_CONST(TARGET_VIRT_ADDR_SPACE_BITS, TARGET_ABI_BITS) <= 32) ? \ + UINT32_MAX : ~0ul) +#define GUEST_ADDR_MAX (reserved_va ? reserved_va - 1 : GUEST_ADDR_MAX_) + +#else + +#include "exec/hwaddr.h" + +#define SUFFIX +#define ARG1 as +#define ARG1_DECL AddressSpace *as +#define TARGET_ENDIANNESS +#include "exec/memory_ldst.h.inc" + +#define SUFFIX _cached_slow +#define ARG1 cache +#define ARG1_DECL MemoryRegionCache *cache +#define TARGET_ENDIANNESS +#include "exec/memory_ldst.h.inc" + +static inline void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val) +{ + address_space_stl_notdirty(as, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +#define SUFFIX +#define ARG1 as +#define ARG1_DECL AddressSpace *as +#define TARGET_ENDIANNESS +#include "exec/memory_ldst_phys.h.inc" + +/* Inline fast path for direct RAM access. */ +#define ENDIANNESS +#include "exec/memory_ldst_cached.h.inc" + +#define SUFFIX _cached +#define ARG1 cache +#define ARG1_DECL MemoryRegionCache *cache +#define TARGET_ENDIANNESS +#include "exec/memory_ldst_phys.h.inc" +#endif + +/* page related stuff */ + +#ifdef TARGET_PAGE_BITS_VARY +typedef struct { + bool decided; + int bits; + target_long mask; +} TargetPageBits; +#if defined(CONFIG_ATTRIBUTE_ALIAS) || !defined(IN_EXEC_VARY) +extern const TargetPageBits target_page; +#else +extern TargetPageBits target_page; +#endif +#ifdef CONFIG_DEBUG_TCG +#define TARGET_PAGE_BITS ({ assert(target_page.decided); target_page.bits; }) +#define TARGET_PAGE_MASK ({ assert(target_page.decided); target_page.mask; }) +#else +#define TARGET_PAGE_BITS target_page.bits +#define TARGET_PAGE_MASK target_page.mask +#endif +#define TARGET_PAGE_SIZE (-(int)TARGET_PAGE_MASK) +#else +#define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS +#define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS) +#define TARGET_PAGE_MASK ((target_long)-1 << TARGET_PAGE_BITS) +#endif + +#define TARGET_PAGE_ALIGN(addr) ROUND_UP((addr), TARGET_PAGE_SIZE) + +/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even + * when intptr_t is 32-bit and we are aligning a long long. + */ +extern uintptr_t qemu_host_page_size; +extern intptr_t qemu_host_page_mask; + +#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size) +#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size) + +/* same as PROT_xxx */ +#define PAGE_READ 0x0001 +#define PAGE_WRITE 0x0002 +#define PAGE_EXEC 0x0004 +#define PAGE_BITS (PAGE_READ | PAGE_WRITE | PAGE_EXEC) +#define PAGE_VALID 0x0008 +/* original state of the write flag (used when tracking self-modifying + code */ +#define PAGE_WRITE_ORG 0x0010 +/* Invalidate the TLB entry immediately, helpful for s390x + * Low-Address-Protection. Used with PAGE_WRITE in tlb_set_page_with_attrs() */ +#define PAGE_WRITE_INV 0x0040 +#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) +/* FIXME: Code that sets/uses this is broken and needs to go away. */ +#define PAGE_RESERVED 0x0020 +#endif +/* Target-specific bits that will be used via page_get_flags(). */ +#define PAGE_TARGET_1 0x0080 + +#if defined(CONFIG_USER_ONLY) +void page_dump(FILE *f); + +typedef int (*walk_memory_regions_fn)(void *, target_ulong, + target_ulong, unsigned long); +int walk_memory_regions(void *, walk_memory_regions_fn); + +int page_get_flags(target_ulong address); +void page_set_flags(target_ulong start, target_ulong end, int flags); +int page_check_range(target_ulong start, target_ulong len, int flags); +#endif + +CPUArchState *cpu_copy(CPUArchState *env); + +/* Flags for use in ENV->INTERRUPT_PENDING. + + The numbers assigned here are non-sequential in order to preserve + binary compatibility with the vmstate dump. Bit 0 (0x0001) was + previously used for CPU_INTERRUPT_EXIT, and is cleared when loading + the vmstate dump. */ + +/* External hardware interrupt pending. This is typically used for + interrupts from devices. */ +#define CPU_INTERRUPT_HARD 0x0002 + +/* Exit the current TB. This is typically used when some system-level device + makes some change to the memory mapping. E.g. the a20 line change. */ +#define CPU_INTERRUPT_EXITTB 0x0004 + +/* Halt the CPU. */ +#define CPU_INTERRUPT_HALT 0x0020 + +/* Debug event pending. */ +#define CPU_INTERRUPT_DEBUG 0x0080 + +/* Reset signal. */ +#define CPU_INTERRUPT_RESET 0x0400 + +/* Several target-specific external hardware interrupts. Each target/cpu.h + should define proper names based on these defines. */ +#define CPU_INTERRUPT_TGT_EXT_0 0x0008 +#define CPU_INTERRUPT_TGT_EXT_1 0x0010 +#define CPU_INTERRUPT_TGT_EXT_2 0x0040 +#define CPU_INTERRUPT_TGT_EXT_3 0x0200 +#define CPU_INTERRUPT_TGT_EXT_4 0x1000 + +/* Several target-specific internal interrupts. These differ from the + preceding target-specific interrupts in that they are intended to + originate from within the cpu itself, typically in response to some + instruction being executed. These, therefore, are not masked while + single-stepping within the debugger. */ +#define CPU_INTERRUPT_TGT_INT_0 0x0100 +#define CPU_INTERRUPT_TGT_INT_1 0x0800 +#define CPU_INTERRUPT_TGT_INT_2 0x2000 + +/* First unused bit: 0x4000. */ + +/* The set of all bits that should be masked when single-stepping. */ +#define CPU_INTERRUPT_SSTEP_MASK \ + (CPU_INTERRUPT_HARD \ + | CPU_INTERRUPT_TGT_EXT_0 \ + | CPU_INTERRUPT_TGT_EXT_1 \ + | CPU_INTERRUPT_TGT_EXT_2 \ + | CPU_INTERRUPT_TGT_EXT_3 \ + | CPU_INTERRUPT_TGT_EXT_4) + +#ifdef CONFIG_USER_ONLY + +/* + * Allow some level of source compatibility with softmmu. We do not + * support any of the more exotic features, so only invalid pages may + * be signaled by probe_access_flags(). + */ +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) +#define TLB_MMIO 0 +#define TLB_WATCHPOINT 0 + +#else + +/* + * Flags stored in the low bits of the TLB virtual address. + * These are defined so that fast path ram access is all zeros. + * The flags all must be between TARGET_PAGE_BITS and + * maximum address alignment bit. + * + * Use TARGET_PAGE_BITS_MIN so that these bits are constant + * when TARGET_PAGE_BITS_VARY is in effect. + */ +/* Zero if TLB entry is valid. */ +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) +/* Set if TLB entry references a clean RAM page. The iotlb entry will + contain the page physical address. */ +#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) +/* Set if TLB entry is an IO callback. */ +#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) +/* Set if TLB entry contains a watchpoint. */ +#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) +/* Set if TLB entry requires byte swap. */ +#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) +/* Set if TLB entry writes ignored. */ +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) + +/* Use this mask to check interception with an alignment mask + * in a TCG backend. + */ +#define TLB_FLAGS_MASK \ + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ + | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) + +/** + * tlb_hit_page: return true if page aligned @addr is a hit against the + * TLB entry @tlb_addr + * + * @addr: virtual address to test (must be page aligned) + * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value) + */ +static inline bool tlb_hit_page(target_ulong tlb_addr, target_ulong addr) +{ + return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK)); +} + +/** + * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr + * + * @addr: virtual address to test (need not be page aligned) + * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value) + */ +static inline bool tlb_hit(target_ulong tlb_addr, target_ulong addr) +{ + return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK); +} + +#ifdef CONFIG_TCG +void dump_drift_info(void); +void dump_exec_info(void); +void dump_opcount_info(void); +#endif /* CONFIG_TCG */ + +#endif /* !CONFIG_USER_ONLY */ + +/* Returns: 0 on success, -1 on error */ +int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, + void *ptr, target_ulong len, bool is_write); + +int cpu_exec(CPUState *cpu); + +/** + * cpu_set_cpustate_pointers(cpu) + * @cpu: The cpu object + * + * Set the generic pointers in CPUState into the outer object. + */ +static inline void cpu_set_cpustate_pointers(ArchCPU *cpu) +{ + cpu->parent_obj.env_ptr = &cpu->env; + cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr; +} + +/** + * env_archcpu(env) + * @env: The architecture environment + * + * Return the ArchCPU associated with the environment. + */ +static inline ArchCPU *env_archcpu(CPUArchState *env) +{ + return container_of(env, ArchCPU, env); +} + +/** + * env_cpu(env) + * @env: The architecture environment + * + * Return the CPUState associated with the environment. + */ +static inline CPUState *env_cpu(CPUArchState *env) +{ + return &env_archcpu(env)->parent_obj; +} + +/** + * env_neg(env) + * @env: The architecture environment + * + * Return the CPUNegativeOffsetState associated with the environment. + */ +static inline CPUNegativeOffsetState *env_neg(CPUArchState *env) +{ + ArchCPU *arch_cpu = container_of(env, ArchCPU, env); + return &arch_cpu->neg; +} + +/** + * cpu_neg(cpu) + * @cpu: The generic CPUState + * + * Return the CPUNegativeOffsetState associated with the cpu. + */ +static inline CPUNegativeOffsetState *cpu_neg(CPUState *cpu) +{ + ArchCPU *arch_cpu = container_of(cpu, ArchCPU, parent_obj); + return &arch_cpu->neg; +} + +/** + * env_tlb(env) + * @env: The architecture environment + * + * Return the CPUTLB state associated with the environment. + */ +static inline CPUTLB *env_tlb(CPUArchState *env) +{ + return &env_neg(env)->tlb; +} + +#endif /* CPU_ALL_H */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h new file mode 100644 index 000000000..19805ed6d --- /dev/null +++ b/include/exec/cpu-common.h @@ -0,0 +1,110 @@ +#ifndef CPU_COMMON_H +#define CPU_COMMON_H + +/* CPU interfaces that are target independent. */ + +#ifndef CONFIG_USER_ONLY +#include "exec/hwaddr.h" +#endif + +/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ +void qemu_init_cpu_list(void); +void cpu_list_lock(void); +void cpu_list_unlock(void); + +void tcg_flush_softmmu_tlb(CPUState *cs); + +void tcg_iommu_init_notifier_list(CPUState *cpu); +void tcg_iommu_free_notifier_list(CPUState *cpu); + +#if !defined(CONFIG_USER_ONLY) + +enum device_endian { + DEVICE_NATIVE_ENDIAN, + DEVICE_BIG_ENDIAN, + DEVICE_LITTLE_ENDIAN, +}; + +#if defined(HOST_WORDS_BIGENDIAN) +#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN +#else +#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN +#endif + +/* address in the RAM (different from a physical address) */ +#if defined(CONFIG_XEN_BACKEND) +typedef uint64_t ram_addr_t; +# define RAM_ADDR_MAX UINT64_MAX +# define RAM_ADDR_FMT "%" PRIx64 +#else +typedef uintptr_t ram_addr_t; +# define RAM_ADDR_MAX UINTPTR_MAX +# define RAM_ADDR_FMT "%" PRIxPTR +#endif + +extern ram_addr_t ram_size; + +/* memory API */ + +void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); +/* This should not be used by devices. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr); +RAMBlock *qemu_ram_block_by_name(const char *name); +RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t *offset); +ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); +void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); +void qemu_ram_unset_idstr(RAMBlock *block); +const char *qemu_ram_get_idstr(RAMBlock *rb); +void *qemu_ram_get_host_addr(RAMBlock *rb); +ram_addr_t qemu_ram_get_offset(RAMBlock *rb); +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); +bool qemu_ram_is_shared(RAMBlock *rb); +bool qemu_ram_is_uf_zeroable(RAMBlock *rb); +void qemu_ram_set_uf_zeroable(RAMBlock *rb); +bool qemu_ram_is_migratable(RAMBlock *rb); +void qemu_ram_set_migratable(RAMBlock *rb); +void qemu_ram_unset_migratable(RAMBlock *rb); + +size_t qemu_ram_pagesize(RAMBlock *block); +size_t qemu_ram_pagesize_largest(void); + +void cpu_physical_memory_rw(hwaddr addr, void *buf, + hwaddr len, bool is_write); +static inline void cpu_physical_memory_read(hwaddr addr, + void *buf, hwaddr len) +{ + cpu_physical_memory_rw(addr, buf, len, false); +} +static inline void cpu_physical_memory_write(hwaddr addr, + const void *buf, hwaddr len) +{ + cpu_physical_memory_rw(addr, (void *)buf, len, true); +} +void *cpu_physical_memory_map(hwaddr addr, + hwaddr *plen, + bool is_write); +void cpu_physical_memory_unmap(void *buffer, hwaddr len, + bool is_write, hwaddr access_len); +void cpu_register_map_client(QEMUBH *bh); +void cpu_unregister_map_client(QEMUBH *bh); + +bool cpu_physical_memory_is_io(hwaddr phys_addr); + +/* Coalesced MMIO regions are areas where write operations can be reordered. + * This usually implies that write operations are side-effect free. This allows + * batching which can make a major impact on performance when using + * virtualization. + */ +void qemu_flush_coalesced_mmio_buffer(void); + +void cpu_flush_icache_range(hwaddr start, hwaddr len); + +typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); + +int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); + +#endif + +#endif /* CPU_COMMON_H */ diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h new file mode 100644 index 000000000..d1f5e3fc3 --- /dev/null +++ b/include/exec/cpu-defs.h @@ -0,0 +1,248 @@ +/* + * common defines for all CPUs + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#ifndef CPU_DEFS_H +#define CPU_DEFS_H + +#ifndef NEED_CPU_H +#error cpu.h included from common code +#endif + +#include "qemu/host-utils.h" +#include "qemu/thread.h" +#ifdef CONFIG_TCG +#include "tcg-target.h" +#endif +#ifndef CONFIG_USER_ONLY +#include "exec/hwaddr.h" +#endif +#include "exec/memattrs.h" +#include "hw/core/cpu.h" + +#include "cpu-param.h" + +#ifndef TARGET_LONG_BITS +# error TARGET_LONG_BITS must be defined in cpu-param.h +#endif +#ifndef NB_MMU_MODES +# error NB_MMU_MODES must be defined in cpu-param.h +#endif +#ifndef TARGET_PHYS_ADDR_SPACE_BITS +# error TARGET_PHYS_ADDR_SPACE_BITS must be defined in cpu-param.h +#endif +#ifndef TARGET_VIRT_ADDR_SPACE_BITS +# error TARGET_VIRT_ADDR_SPACE_BITS must be defined in cpu-param.h +#endif +#ifndef TARGET_PAGE_BITS +# ifdef TARGET_PAGE_BITS_VARY +# ifndef TARGET_PAGE_BITS_MIN +# error TARGET_PAGE_BITS_MIN must be defined in cpu-param.h +# endif +# else +# error TARGET_PAGE_BITS must be defined in cpu-param.h +# endif +#endif + +#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8) + +/* target_ulong is the type of a virtual address */ +#if TARGET_LONG_SIZE == 4 +typedef int32_t target_long; +typedef uint32_t target_ulong; +#define TARGET_FMT_lx "%08x" +#define TARGET_FMT_ld "%d" +#define TARGET_FMT_lu "%u" +#elif TARGET_LONG_SIZE == 8 +typedef int64_t target_long; +typedef uint64_t target_ulong; +#define TARGET_FMT_lx "%016" PRIx64 +#define TARGET_FMT_ld "%" PRId64 +#define TARGET_FMT_lu "%" PRIu64 +#else +#error TARGET_LONG_SIZE undefined +#endif + +#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + +/* use a fully associative victim tlb of 8 entries */ +#define CPU_VTLB_SIZE 8 + +#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32 +#define CPU_TLB_ENTRY_BITS 4 +#else +#define CPU_TLB_ENTRY_BITS 5 +#endif + +#define CPU_TLB_DYN_MIN_BITS 6 +#define CPU_TLB_DYN_DEFAULT_BITS 8 + +# if HOST_LONG_BITS == 32 +/* Make sure we do not require a double-word shift for the TLB load */ +# define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS) +# else /* HOST_LONG_BITS == 64 */ +/* + * Assuming TARGET_PAGE_BITS==12, with 2**22 entries we can cover 2**(22+12) == + * 2**34 == 16G of address space. This is roughly what one would expect a + * TLB to cover in a modern (as of 2018) x86_64 CPU. For instance, Intel + * Skylake's Level-2 STLB has 16 1G entries. + * Also, make sure we do not size the TLB past the guest's address space. + */ +# ifdef TARGET_PAGE_BITS_VARY +# define CPU_TLB_DYN_MAX_BITS \ + MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# else +# define CPU_TLB_DYN_MAX_BITS \ + MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# endif +# endif + +typedef struct CPUTLBEntry { + /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address + bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not + go directly to ram. + bit 3 : indicates that the entry is invalid + bit 2..0 : zero + */ + union { + struct { + target_ulong addr_read; + target_ulong addr_write; + target_ulong addr_code; + /* Addend to virtual address to get host address. IO accesses + use the corresponding iotlb value. */ + uintptr_t addend; + }; + /* padding to get a power of two size */ + uint8_t dummy[1 << CPU_TLB_ENTRY_BITS]; + }; +} CPUTLBEntry; + +QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS)); + +/* The IOTLB is not accessed directly inline by generated TCG code, + * so the CPUIOTLBEntry layout is not as critical as that of the + * CPUTLBEntry. (This is also why we don't want to combine the two + * structs into one.) + */ +typedef struct CPUIOTLBEntry { + /* + * @addr contains: + * - in the lower TARGET_PAGE_BITS, a physical section number + * - with the lower TARGET_PAGE_BITS masked off, an offset which + * must be added to the virtual address to obtain: + * + the ram_addr_t of the target RAM (if the physical section + * number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM) + * + the offset within the target MemoryRegion (otherwise) + */ + hwaddr addr; + MemTxAttrs attrs; +} CPUIOTLBEntry; + +/* + * Data elements that are per MMU mode, minus the bits accessed by + * the TCG fast path. + */ +typedef struct CPUTLBDesc { + /* + * Describe a region covering all of the large pages allocated + * into the tlb. When any page within this region is flushed, + * we must flush the entire tlb. The region is matched if + * (addr & large_page_mask) == large_page_addr. + */ + target_ulong large_page_addr; + target_ulong large_page_mask; + /* host time (in ns) at the beginning of the time window */ + int64_t window_begin_ns; + /* maximum number of entries observed in the window */ + size_t window_max_entries; + size_t n_used_entries; + /* The next index to use in the tlb victim table. */ + size_t vindex; + /* The tlb victim table, in two parts. */ + CPUTLBEntry vtable[CPU_VTLB_SIZE]; + CPUIOTLBEntry viotlb[CPU_VTLB_SIZE]; + /* The iotlb. */ + CPUIOTLBEntry *iotlb; +} CPUTLBDesc; + +/* + * Data elements that are per MMU mode, accessed by the fast path. + * The structure is aligned to aid loading the pair with one insn. + */ +typedef struct CPUTLBDescFast { + /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ + uintptr_t mask; + /* The array of tlb entries itself. */ + CPUTLBEntry *table; +} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *)); + +/* + * Data elements that are shared between all MMU modes. + */ +typedef struct CPUTLBCommon { + /* Serialize updates to f.table and d.vtable, and others as noted. */ + QemuSpin lock; + /* + * Within dirty, for each bit N, modifications have been made to + * mmu_idx N since the last time that mmu_idx was flushed. + * Protected by tlb_c.lock. + */ + uint16_t dirty; + /* + * Statistics. These are not lock protected, but are read and + * written atomically. This allows the monitor to print a snapshot + * of the stats without interfering with the cpu. + */ + size_t full_flush_count; + size_t part_flush_count; + size_t elide_flush_count; +} CPUTLBCommon; + +/* + * The entire softmmu tlb, for all MMU modes. + * The meaning of each of the MMU modes is defined in the target code. + * Since this is placed within CPUNegativeOffsetState, the smallest + * negative offsets are at the end of the struct. + */ + +typedef struct CPUTLB { + CPUTLBCommon c; + CPUTLBDesc d[NB_MMU_MODES]; + CPUTLBDescFast f[NB_MMU_MODES]; +} CPUTLB; + +/* This will be used by TCG backends to compute offsets. */ +#define TLB_MASK_TABLE_OFS(IDX) \ + ((int)offsetof(ArchCPU, neg.tlb.f[IDX]) - (int)offsetof(ArchCPU, env)) + +#else + +typedef struct CPUTLB { } CPUTLB; + +#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */ + +/* + * This structure must be placed in ArchCPU immediately + * before CPUArchState, as a field named "neg". + */ +typedef struct CPUNegativeOffsetState { + CPUTLB tlb; + IcountDecr icount_decr; +} CPUNegativeOffsetState; + +#endif diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h new file mode 100644 index 000000000..ef54cb7e1 --- /dev/null +++ b/include/exec/cpu_ldst.h @@ -0,0 +1,449 @@ +/* + * Software MMU support + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +/* + * Generate inline load/store functions for all MMU modes (typically + * at least _user and _kernel) as well as _data versions, for all data + * sizes. + * + * Used by target op helpers. + * + * The syntax for the accessors is: + * + * load: cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr) + * cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr) + * cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr) + * + * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val) + * cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr) + * cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) + * + * sign is: + * (empty): for 32 and 64 bit sizes + * u : unsigned + * s : signed + * + * size is: + * b: 8 bits + * w: 16 bits + * l: 32 bits + * q: 64 bits + * + * end is: + * (empty): for target native endian, or for 8 bit access + * _be: for forced big endian + * _le: for forced little endian + * + * mmusuffix is one of the generic suffixes "data" or "code", or "mmuidx". + * The "mmuidx" suffix carries an extra mmu_idx argument that specifies + * the index to use; the "data" and "code" suffixes take the index from + * cpu_mmu_index(). + */ +#ifndef CPU_LDST_H +#define CPU_LDST_H + +#if defined(CONFIG_USER_ONLY) +/* sparc32plus has 64bit long but 32bit space address + * this can make bad result with g2h() and h2g() + */ +#if TARGET_VIRT_ADDR_SPACE_BITS <= 32 +typedef uint32_t abi_ptr; +#define TARGET_ABI_FMT_ptr "%x" +#else +typedef uint64_t abi_ptr; +#define TARGET_ABI_FMT_ptr "%"PRIx64 +#endif + +/* All direct uses of g2h and h2g need to go away for usermode softmmu. */ +#define g2h(x) ((void *)((unsigned long)(abi_ptr)(x) + guest_base)) + +#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS +#define guest_addr_valid(x) (1) +#else +#define guest_addr_valid(x) ((x) <= GUEST_ADDR_MAX) +#endif +#define h2g_valid(x) guest_addr_valid((unsigned long)(x) - guest_base) + +static inline int guest_range_valid(unsigned long start, unsigned long len) +{ + return len - 1 <= GUEST_ADDR_MAX && start <= GUEST_ADDR_MAX - len + 1; +} + +#define h2g_nocheck(x) ({ \ + unsigned long __ret = (unsigned long)(x) - guest_base; \ + (abi_ptr)__ret; \ +}) + +#define h2g(x) ({ \ + /* Check if given address fits target address space */ \ + assert(h2g_valid(x)); \ + h2g_nocheck(x); \ +}) +#else +typedef target_ulong abi_ptr; +#define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx +#endif + +uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val); + +void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); + +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + +#if defined(CONFIG_USER_ONLY) + +extern __thread uintptr_t helper_retaddr; + +static inline void set_helper_retaddr(uintptr_t ra) +{ + helper_retaddr = ra; + /* + * Ensure that this write is visible to the SIGSEGV handler that + * may be invoked due to a subsequent invalid memory operation. + */ + signal_barrier(); +} + +static inline void clear_helper_retaddr(void) +{ + /* + * Ensure that previous memory operations have succeeded before + * removing the data visible to the signal handler. + */ + signal_barrier(); + helper_retaddr = 0; +} + +/* + * Provide the same *_mmuidx_ra interface as for softmmu. + * The mmu_idx argument is ignored. + */ + +static inline uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldub_data_ra(env, addr, ra); +} + +static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldsb_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_lduw_be_data_ra(env, addr, ra); +} + +static inline int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldsw_be_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldl_be_data_ra(env, addr, ra); +} + +static inline uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldq_be_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_lduw_le_data_ra(env, addr, ra); +} + +static inline int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldsw_le_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldl_le_data_ra(env, addr, ra); +} + +static inline uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldq_le_data_ra(env, addr, ra); +} + +static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, uintptr_t ra) +{ + cpu_stb_data_ra(env, addr, val, ra); +} + +static inline void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stw_be_data_ra(env, addr, val, ra); +} + +static inline void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stl_be_data_ra(env, addr, val, ra); +} + +static inline void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stq_be_data_ra(env, addr, val, ra); +} + +static inline void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stw_le_data_ra(env, addr, val, ra); +} + +static inline void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stl_le_data_ra(env, addr, val, ra); +} + +static inline void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stq_le_data_ra(env, addr, val, ra); +} + +#else + +/* Needed for TCG_OVERSIZED_GUEST */ +#include "tcg/tcg.h" + +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +{ +#if TCG_OVERSIZED_GUEST + return entry->addr_write; +#else + return qatomic_read(&entry->addr_write); +#endif +} + +/* Find the TLB index corresponding to the mmu_idx + address pair. */ +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS; + + return (addr >> TARGET_PAGE_BITS) & size_mask; +} + +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)]; +} + +uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); + +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +#endif /* defined(CONFIG_USER_ONLY) */ + +#ifdef TARGET_WORDS_BIGENDIAN +# define cpu_lduw_data cpu_lduw_be_data +# define cpu_ldsw_data cpu_ldsw_be_data +# define cpu_ldl_data cpu_ldl_be_data +# define cpu_ldq_data cpu_ldq_be_data +# define cpu_lduw_data_ra cpu_lduw_be_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_be_data_ra +# define cpu_ldl_data_ra cpu_ldl_be_data_ra +# define cpu_ldq_data_ra cpu_ldq_be_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_be_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra +# define cpu_stw_data cpu_stw_be_data +# define cpu_stl_data cpu_stl_be_data +# define cpu_stq_data cpu_stq_be_data +# define cpu_stw_data_ra cpu_stw_be_data_ra +# define cpu_stl_data_ra cpu_stl_be_data_ra +# define cpu_stq_data_ra cpu_stq_be_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra +#else +# define cpu_lduw_data cpu_lduw_le_data +# define cpu_ldsw_data cpu_ldsw_le_data +# define cpu_ldl_data cpu_ldl_le_data +# define cpu_ldq_data cpu_ldq_le_data +# define cpu_lduw_data_ra cpu_lduw_le_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_le_data_ra +# define cpu_ldl_data_ra cpu_ldl_le_data_ra +# define cpu_ldq_data_ra cpu_ldq_le_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_le_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra +# define cpu_stw_data cpu_stw_le_data +# define cpu_stl_data cpu_stl_le_data +# define cpu_stq_data cpu_stq_le_data +# define cpu_stw_data_ra cpu_stw_le_data_ra +# define cpu_stl_data_ra cpu_stl_le_data_ra +# define cpu_stq_data_ra cpu_stq_le_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra +#endif + +uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); +uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); +uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr); +uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr); + +static inline int cpu_ldsb_code(CPUArchState *env, abi_ptr addr) +{ + return (int8_t)cpu_ldub_code(env, addr); +} + +static inline int cpu_ldsw_code(CPUArchState *env, abi_ptr addr) +{ + return (int16_t)cpu_lduw_code(env, addr); +} + +/** + * tlb_vaddr_to_host: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index to use for lookup + * + * Look up the specified guest virtual index in the TCG softmmu TLB. + * If we can translate a host virtual address suitable for direct RAM + * access, without causing a guest exception, then return it. + * Otherwise (TLB entry is for an I/O access, guest software + * TLB fill required, etc) return NULL. + */ +#ifdef CONFIG_USER_ONLY +static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, + MMUAccessType access_type, int mmu_idx) +{ + return g2h(addr); +} +#else +void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, + MMUAccessType access_type, int mmu_idx); +#endif + +#endif /* CPU_LDST_H */ diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h new file mode 100644 index 000000000..19b16e58f --- /dev/null +++ b/include/exec/cputlb.h @@ -0,0 +1,31 @@ +/* + * Common CPU TLB handling + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef CPUTLB_H +#define CPUTLB_H + +#include "exec/cpu-common.h" + +#if !defined(CONFIG_USER_ONLY) +/* cputlb.c */ +void tlb_protect_code(ram_addr_t ram_addr); +void tlb_unprotect_code(ram_addr_t ram_addr); +void tlb_flush_counts(size_t *full, size_t *part, size_t *elide); +#endif +#endif diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h new file mode 100644 index 000000000..94fe05daa --- /dev/null +++ b/include/exec/exec-all.h @@ -0,0 +1,682 @@ +/* + * internal execution defines for qemu + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef EXEC_ALL_H +#define EXEC_ALL_H + +#include "cpu.h" +#include "exec/tb-context.h" +#ifdef CONFIG_TCG +#include "exec/cpu_ldst.h" +#endif +#include "sysemu/cpu-timers.h" + +/* allow to see translation results - the slowdown should be negligible, so we leave it */ +#define DEBUG_DISAS + +/* Page tracking code uses ram addresses in system mode, and virtual + addresses in userspace mode. Define tb_page_addr_t to be an appropriate + type. */ +#if defined(CONFIG_USER_ONLY) +typedef abi_ulong tb_page_addr_t; +#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx +#else +typedef ram_addr_t tb_page_addr_t; +#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT +#endif + +#include "qemu/log.h" + +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); +void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, + target_ulong *data); + +void cpu_gen_init(void); + +/** + * cpu_restore_state: + * @cpu: the vCPU state is to be restore to + * @searched_pc: the host PC the fault occurred at + * @will_exit: true if the TB executed will be interrupted after some + cpu adjustments. Required for maintaining the correct + icount valus + * @return: true if state was restored, false otherwise + * + * Attempt to restore the state for a fault occurring in translated + * code. If the searched_pc is not in translated code no state is + * restored and the function returns false. + */ +bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit); + +void QEMU_NORETURN cpu_loop_exit_noexc(CPUState *cpu); +void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); +TranslationBlock *tb_gen_code(CPUState *cpu, + target_ulong pc, target_ulong cs_base, + uint32_t flags, + int cflags); + +void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); +void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); + +/** + * cpu_loop_exit_requested: + * @cpu: The CPU state to be tested + * + * Indicate if somebody asked for a return of the CPU to the main loop + * (e.g., via cpu_exit() or cpu_interrupt()). + * + * This is helpful for architectures that support interruptible + * instructions. After writing back all state to registers/memory, this + * call can be used to check if it makes sense to return to the main loop + * or to continue executing the interruptible instruction. + */ +static inline bool cpu_loop_exit_requested(CPUState *cpu) +{ + return (int32_t)qatomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0; +} + +#if !defined(CONFIG_USER_ONLY) +void cpu_reloading_memory_map(void); +/** + * cpu_address_space_init: + * @cpu: CPU to add this address space to + * @asidx: integer index of this address space + * @prefix: prefix to be used as name of address space + * @mr: the root memory region of address space + * + * Add the specified address space to the CPU's cpu_ases list. + * The address space added with @asidx 0 is the one used for the + * convenience pointer cpu->as. + * The target-specific code which registers ASes is responsible + * for defining what semantics address space 0, 1, 2, etc have. + * + * Before the first call to this function, the caller must set + * cpu->num_ases to the total number of address spaces it needs + * to support. + * + * Note that with KVM only one address space is supported. + */ +void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); +#endif + +#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) +/* cputlb.c */ +/** + * tlb_init - initialize a CPU's TLB + * @cpu: CPU whose TLB should be initialized + */ +void tlb_init(CPUState *cpu); +/** + * tlb_destroy - destroy a CPU's TLB + * @cpu: CPU whose TLB should be destroyed + */ +void tlb_destroy(CPUState *cpu); +/** + * tlb_flush_page: + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of page to be flushed + * + * Flush one page from the TLB of the specified CPU, for all + * MMU indexes. + */ +void tlb_flush_page(CPUState *cpu, target_ulong addr); +/** + * tlb_flush_page_all_cpus: + * @cpu: src CPU of the flush + * @addr: virtual address of page to be flushed + * + * Flush one page from the TLB of the specified CPU, for all + * MMU indexes. + */ +void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); +/** + * tlb_flush_page_all_cpus_synced: + * @cpu: src CPU of the flush + * @addr: virtual address of page to be flushed + * + * Flush one page from the TLB of the specified CPU, for all MMU + * indexes like tlb_flush_page_all_cpus except the source vCPUs work + * is scheduled as safe work meaning all flushes will be complete once + * the source vCPUs safe work is complete. This will depend on when + * the guests translation ends the TB. + */ +void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr); +/** + * tlb_flush: + * @cpu: CPU whose TLB should be flushed + * + * Flush the entire TLB for the specified CPU. Most CPU architectures + * allow the implementation to drop entries from the TLB at any time + * so this is generally safe. If more selective flushing is required + * use one of the other functions for efficiency. + */ +void tlb_flush(CPUState *cpu); +/** + * tlb_flush_all_cpus: + * @cpu: src CPU of the flush + */ +void tlb_flush_all_cpus(CPUState *src_cpu); +/** + * tlb_flush_all_cpus_synced: + * @cpu: src CPU of the flush + * + * Like tlb_flush_all_cpus except this except the source vCPUs work is + * scheduled as safe work meaning all flushes will be complete once + * the source vCPUs safe work is complete. This will depend on when + * the guests translation ends the TB. + */ +void tlb_flush_all_cpus_synced(CPUState *src_cpu); +/** + * tlb_flush_page_by_mmuidx: + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of MMU indexes to flush + * + * Flush one page from the TLB of the specified CPU, for the specified + * MMU indexes. + */ +void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap); +/** + * tlb_flush_page_by_mmuidx_all_cpus: + * @cpu: Originating CPU of the flush + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of MMU indexes to flush + * + * Flush one page from the TLB of all CPUs, for the specified + * MMU indexes. + */ +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + uint16_t idxmap); +/** + * tlb_flush_page_by_mmuidx_all_cpus_synced: + * @cpu: Originating CPU of the flush + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of MMU indexes to flush + * + * Flush one page from the TLB of all CPUs, for the specified MMU + * indexes like tlb_flush_page_by_mmuidx_all_cpus except the source + * vCPUs work is scheduled as safe work meaning all flushes will be + * complete once the source vCPUs safe work is complete. This will + * depend on when the guests translation ends the TB. + */ +void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, + uint16_t idxmap); +/** + * tlb_flush_by_mmuidx: + * @cpu: CPU whose TLB should be flushed + * @wait: If true ensure synchronisation by exiting the cpu_loop + * @idxmap: bitmap of MMU indexes to flush + * + * Flush all entries from the TLB of the specified CPU, for the specified + * MMU indexes. + */ +void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap); +/** + * tlb_flush_by_mmuidx_all_cpus: + * @cpu: Originating CPU of the flush + * @idxmap: bitmap of MMU indexes to flush + * + * Flush all entries from all TLBs of all CPUs, for the specified + * MMU indexes. + */ +void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap); +/** + * tlb_flush_by_mmuidx_all_cpus_synced: + * @cpu: Originating CPU of the flush + * @idxmap: bitmap of MMU indexes to flush + * + * Flush all entries from all TLBs of all CPUs, for the specified + * MMU indexes like tlb_flush_by_mmuidx_all_cpus except except the source + * vCPUs work is scheduled as safe work meaning all flushes will be + * complete once the source vCPUs safe work is complete. This will + * depend on when the guests translation ends the TB. + */ +void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); + +/** + * tlb_flush_page_bits_by_mmuidx + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of mmu indexes to flush + * @bits: number of significant bits in address + * + * Similar to tlb_flush_page_mask, but with a bitmap of indexes. + */ +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); + +/* Similarly, with broadcast and syncing. */ +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced + (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); + +/** + * tlb_set_page_with_attrs: + * @cpu: CPU to add this TLB entry for + * @vaddr: virtual address of page to add entry for + * @paddr: physical address of the page + * @attrs: memory transaction attributes + * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits) + * @mmu_idx: MMU index to insert TLB entry for + * @size: size of the page in bytes + * + * Add an entry to this CPU's TLB (a mapping from virtual address + * @vaddr to physical address @paddr) with the specified memory + * transaction attributes. This is generally called by the target CPU + * specific code after it has been called through the tlb_fill() + * entry point and performed a successful page table walk to find + * the physical address and attributes for the virtual address + * which provoked the TLB miss. + * + * At most one entry for a given virtual address is permitted. Only a + * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only + * used by tlb_flush_page. + */ +void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, + hwaddr paddr, MemTxAttrs attrs, + int prot, int mmu_idx, target_ulong size); +/* tlb_set_page: + * + * This function is equivalent to calling tlb_set_page_with_attrs() + * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided + * as a convenience for CPUs which don't use memory transaction attributes. + */ +void tlb_set_page(CPUState *cpu, target_ulong vaddr, + hwaddr paddr, int prot, + int mmu_idx, target_ulong size); +#else +static inline void tlb_init(CPUState *cpu) +{ +} +static inline void tlb_destroy(CPUState *cpu) +{ +} +static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) +{ +} +static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) +{ +} +static inline void tlb_flush_page_all_cpus_synced(CPUState *src, + target_ulong addr) +{ +} +static inline void tlb_flush(CPUState *cpu) +{ +} +static inline void tlb_flush_all_cpus(CPUState *src_cpu) +{ +} +static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu) +{ +} +static inline void tlb_flush_page_by_mmuidx(CPUState *cpu, + target_ulong addr, uint16_t idxmap) +{ +} + +static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) +{ +} +static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) +{ +} +static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) +{ +} +static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap) +{ +} + +static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, + uint16_t idxmap) +{ +} +static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void +tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) +{ +} +#endif +/** + * probe_access: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @size: size of the access + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @retaddr: return address for unwinding + * + * Look up the guest virtual address @addr. Raise an exception if the + * page does not satisfy @access_type. Raise an exception if the + * access (@addr, @size) hits a watchpoint. For writes, mark a clean + * page as dirty. + * + * Finally, return the host address for a page that is backed by RAM, + * or NULL if the page requires I/O. + */ +void *probe_access(CPUArchState *env, target_ulong addr, int size, + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); + +static inline void *probe_write(CPUArchState *env, target_ulong addr, int size, + int mmu_idx, uintptr_t retaddr) +{ + return probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); +} + +static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, + int mmu_idx, uintptr_t retaddr) +{ + return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); +} + +/** + * probe_access_flags: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @nonfault: suppress the fault + * @phost: return value for host address + * @retaddr: return address for unwinding + * + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for + * the page, and storing the host address for RAM in @phost. + * + * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK. + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags. + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. + */ +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr); + +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ + +/* Estimated block size for TB allocation. */ +/* ??? The following is based on a 2015 survey of x86_64 host output. + Better would seem to be some sort of dynamically sized TB array, + adapting to the block sizes actually being produced. */ +#if defined(CONFIG_SOFTMMU) +#define CODE_GEN_AVG_BLOCK_SIZE 400 +#else +#define CODE_GEN_AVG_BLOCK_SIZE 150 +#endif + +/* + * Translation Cache-related fields of a TB. + * This struct exists just for convenience; we keep track of TB's in a binary + * search tree, and the only fields needed to compare TB's in the tree are + * @ptr and @size. + * Note: the address of search data can be obtained by adding @size to @ptr. + */ +struct tb_tc { + void *ptr; /* pointer to the translated code */ + size_t size; +}; + +struct TranslationBlock { + target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ + target_ulong cs_base; /* CS base for this block */ + uint32_t flags; /* flags defining in which context the code was generated */ + uint16_t size; /* size of target code for this block (1 <= + size <= TARGET_PAGE_SIZE) */ + uint16_t icount; + uint32_t cflags; /* compile flags */ +#define CF_COUNT_MASK 0x00007fff +#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ +#define CF_NOCACHE 0x00010000 /* To be freed after execution */ +#define CF_USE_ICOUNT 0x00020000 +#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ +#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ +#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ +#define CF_CLUSTER_SHIFT 24 +/* cflags' mask for hashing/comparison */ +#define CF_HASH_MASK \ + (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK) + + /* Per-vCPU dynamic tracing state used to generate this TB */ + uint32_t trace_vcpu_dstate; + + struct tb_tc tc; + + /* original tb when cflags has CF_NOCACHE */ + struct TranslationBlock *orig_tb; + /* first and second physical page containing code. The lower bit + of the pointer tells the index in page_next[]. + The list is protected by the TB's page('s) lock(s) */ + uintptr_t page_next[2]; + tb_page_addr_t page_addr[2]; + + /* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */ + QemuSpin jmp_lock; + + /* The following data are used to directly call another TB from + * the code of this one. This can be done either by emitting direct or + * indirect native jump instructions. These jumps are reset so that the TB + * just continues its execution. The TB can be linked to another one by + * setting one of the jump targets (or patching the jump instruction). Only + * two of such jumps are supported. + */ + uint16_t jmp_reset_offset[2]; /* offset of original jump target */ +#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ + uintptr_t jmp_target_arg[2]; /* target address or offset */ + + /* + * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps. + * Each TB can have two outgoing jumps, and therefore can participate + * in two lists. The list entries are kept in jmp_list_next[2]. The least + * significant bit (LSB) of the pointers in these lists is used to encode + * which of the two list entries is to be used in the pointed TB. + * + * List traversals are protected by jmp_lock. The destination TB of each + * outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock + * can be acquired from any origin TB. + * + * jmp_dest[] are tagged pointers as well. The LSB is set when the TB is + * being invalidated, so that no further outgoing jumps from it can be set. + * + * jmp_lock also protects the CF_INVALID cflag; a jump must not be chained + * to a destination TB that has CF_INVALID set. + */ + uintptr_t jmp_list_head; + uintptr_t jmp_list_next[2]; + uintptr_t jmp_dest[2]; +}; + +extern bool parallel_cpus; + +/* Hide the qatomic_read to make code a little easier on the eyes */ +static inline uint32_t tb_cflags(const TranslationBlock *tb) +{ + return qatomic_read(&tb->cflags); +} + +/* current cflags for hashing/comparison */ +static inline uint32_t curr_cflags(void) +{ + return (parallel_cpus ? CF_PARALLEL : 0) + | (icount_enabled() ? CF_USE_ICOUNT : 0); +} + +/* TranslationBlock invalidate API */ +#if defined(CONFIG_USER_ONLY) +void tb_invalidate_phys_addr(target_ulong addr); +void tb_invalidate_phys_range(target_ulong start, target_ulong end); +#else +void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs); +#endif +void tb_flush(CPUState *cpu); +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags, + uint32_t cf_mask); +void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); + +/* GETPC is the true target of the return instruction that we'll execute. */ +#if defined(CONFIG_TCG_INTERPRETER) +extern uintptr_t tci_tb_ptr; +# define GETPC() tci_tb_ptr +#else +# define GETPC() \ + ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0))) +#endif + +/* The true return address will often point to a host insn that is part of + the next translated guest insn. Adjust the address backward to point to + the middle of the call insn. Subtracting one would do the job except for + several compressed mode architectures (arm, mips) which set the low bit + to indicate the compressed mode; subtracting two works around that. It + is also the case that there are no host isas that contain a call insn + smaller than 4 bytes, so we don't worry about special-casing this. */ +#define GETPC_ADJ 2 + +#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG) +void assert_no_pages_locked(void); +#else +static inline void assert_no_pages_locked(void) +{ +} +#endif + +#if !defined(CONFIG_USER_ONLY) + +/** + * iotlb_to_section: + * @cpu: CPU performing the access + * @index: TCG CPU IOTLB entry + * + * Given a TCG CPU IOTLB entry, return the MemoryRegionSection that + * it refers to. @index will have been initially created and returned + * by memory_region_section_get_iotlb(). + */ +struct MemoryRegionSection *iotlb_to_section(CPUState *cpu, + hwaddr index, MemTxAttrs attrs); +#endif + +#if defined(CONFIG_USER_ONLY) +void mmap_lock(void); +void mmap_unlock(void); +bool have_mmap_lock(void); + +/** + * get_page_addr_code() - user-mode version + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * Returns @addr. + */ +static inline tb_page_addr_t get_page_addr_code(CPUArchState *env, + target_ulong addr) +{ + return addr; +} + +/** + * get_page_addr_code_hostp() - user-mode version + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * Returns @addr. + * + * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content + * is kept. + */ +static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, + target_ulong addr, + void **hostp) +{ + if (hostp) { + *hostp = g2h(addr); + } + return addr; +} +#else +static inline void mmap_lock(void) {} +static inline void mmap_unlock(void) {} + +/** + * get_page_addr_code() - full-system version + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * If we cannot translate and execute from the entire RAM page, or if + * the region is not backed by RAM, returns -1. Otherwise, returns the + * ram_addr_t corresponding to the guest code at @addr. + * + * Note: this function can trigger an exception. + */ +tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr); + +/** + * get_page_addr_code_hostp() - full-system version + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * See get_page_addr_code() (full-system version) for documentation on the + * return value. + * + * Sets *@hostp (when @hostp is non-NULL) as follows. + * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp + * to the host address where @addr's content is kept. + * + * Note: this function can trigger an exception. + */ +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, + void **hostp); + +void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); +void tlb_set_dirty(CPUState *cpu, target_ulong vaddr); + +/* exec.c */ +void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr); + +MemoryRegionSection * +address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, + hwaddr *xlat, hwaddr *plen, + MemTxAttrs attrs, int *prot); +hwaddr memory_region_section_get_iotlb(CPUState *cpu, + MemoryRegionSection *section); +#endif + +/* vl.c */ +extern int singlestep; + +#endif diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h new file mode 100644 index 000000000..94d8f83e9 --- /dev/null +++ b/include/exec/gdbstub.h @@ -0,0 +1,203 @@ +#ifndef GDBSTUB_H +#define GDBSTUB_H + +#define DEFAULT_GDBSTUB_PORT "1234" + +/* GDB breakpoint/watchpoint types */ +#define GDB_BREAKPOINT_SW 0 +#define GDB_BREAKPOINT_HW 1 +#define GDB_WATCHPOINT_WRITE 2 +#define GDB_WATCHPOINT_READ 3 +#define GDB_WATCHPOINT_ACCESS 4 + +#ifdef NEED_CPU_H +#include "cpu.h" + +typedef void (*gdb_syscall_complete_cb)(CPUState *cpu, + target_ulong ret, target_ulong err); + +/** + * gdb_do_syscall: + * @cb: function to call when the system call has completed + * @fmt: gdb syscall format string + * ...: list of arguments to interpolate into @fmt + * + * Send a GDB syscall request. This function will return immediately; + * the callback function will be called later when the remote system + * call has completed. + * + * @fmt should be in the 'call-id,parameter,parameter...' format documented + * for the F request packet in the GDB remote protocol. A limited set of + * printf-style format specifiers is supported: + * %x - target_ulong argument printed in hex + * %lx - 64-bit argument printed in hex + * %s - string pointer (target_ulong) and length (int) pair + */ +void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...); +/** + * gdb_do_syscallv: + * @cb: function to call when the system call has completed + * @fmt: gdb syscall format string + * @va: arguments to interpolate into @fmt + * + * As gdb_do_syscall, but taking a va_list rather than a variable + * argument list. + */ +void gdb_do_syscallv(gdb_syscall_complete_cb cb, const char *fmt, va_list va); +int use_gdb_syscalls(void); +void gdb_set_stop_cpu(CPUState *cpu); +void gdb_exit(CPUArchState *, int); +#ifdef CONFIG_USER_ONLY +/** + * gdb_handlesig: yield control to gdb + * @cpu: CPU + * @sig: if non-zero, the signal number which caused us to stop + * + * This function yields control to gdb, when a user-mode-only target + * needs to stop execution. If @sig is non-zero, then we will send a + * stop packet to tell gdb that we have stopped because of this signal. + * + * This function will block (handling protocol requests from gdb) + * until gdb tells us to continue target execution. When it does + * return, the return value is a signal to deliver to the target, + * or 0 if no signal should be delivered, ie the signal that caused + * us to stop should be ignored. + */ +int gdb_handlesig(CPUState *, int); +void gdb_signalled(CPUArchState *, int); +void gdbserver_fork(CPUState *); +#endif +/* Get or set a register. Returns the size of the register. */ +typedef int (*gdb_get_reg_cb)(CPUArchState *env, GByteArray *buf, int reg); +typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg); +void gdb_register_coprocessor(CPUState *cpu, + gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg, + int num_regs, const char *xml, int g_pos); + +/* + * The GDB remote protocol transfers values in target byte order. As + * the gdbstub may be batching up several register values we always + * append to the array. + */ + +static inline int gdb_get_reg8(GByteArray *buf, uint8_t val) +{ + g_byte_array_append(buf, &val, 1); + return 1; +} + +static inline int gdb_get_reg16(GByteArray *buf, uint16_t val) +{ + uint16_t to_word = tswap16(val); + g_byte_array_append(buf, (uint8_t *) &to_word, 2); + return 2; +} + +static inline int gdb_get_reg32(GByteArray *buf, uint32_t val) +{ + uint32_t to_long = tswap32(val); + g_byte_array_append(buf, (uint8_t *) &to_long, 4); + return 4; +} + +static inline int gdb_get_reg64(GByteArray *buf, uint64_t val) +{ + uint64_t to_quad = tswap64(val); + g_byte_array_append(buf, (uint8_t *) &to_quad, 8); + return 8; +} + +static inline int gdb_get_reg128(GByteArray *buf, uint64_t val_hi, + uint64_t val_lo) +{ + uint64_t to_quad; +#ifdef TARGET_WORDS_BIGENDIAN + to_quad = tswap64(val_hi); + g_byte_array_append(buf, (uint8_t *) &to_quad, 8); + to_quad = tswap64(val_lo); + g_byte_array_append(buf, (uint8_t *) &to_quad, 8); +#else + to_quad = tswap64(val_lo); + g_byte_array_append(buf, (uint8_t *) &to_quad, 8); + to_quad = tswap64(val_hi); + g_byte_array_append(buf, (uint8_t *) &to_quad, 8); +#endif + return 16; +} + +static inline int gdb_get_float32(GByteArray *array, float32 val) +{ + uint8_t buf[sizeof(CPU_FloatU)]; + + stfl_p(buf, val); + g_byte_array_append(array, buf, sizeof(buf)); + + return sizeof(buf); +} + +static inline int gdb_get_float64(GByteArray *array, float64 val) +{ + uint8_t buf[sizeof(CPU_DoubleU)]; + + stfq_p(buf, val); + g_byte_array_append(array, buf, sizeof(buf)); + + return sizeof(buf); +} + +static inline int gdb_get_zeroes(GByteArray *array, size_t len) +{ + guint oldlen = array->len; + g_byte_array_set_size(array, oldlen + len); + memset(array->data + oldlen, 0, len); + + return len; +} + +/** + * gdb_get_reg_ptr: get pointer to start of last element + * @len: length of element + * + * This is a helper function to extract the pointer to the last + * element for additional processing. Some front-ends do additional + * dynamic swapping of the elements based on CPU state. + */ +static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len) +{ + return buf->data + buf->len - len; +} + +#if TARGET_LONG_BITS == 64 +#define gdb_get_regl(buf, val) gdb_get_reg64(buf, val) +#define ldtul_p(addr) ldq_p(addr) +#else +#define gdb_get_regl(buf, val) gdb_get_reg32(buf, val) +#define ldtul_p(addr) ldl_p(addr) +#endif + +#endif + +/** + * gdbserver_start: start the gdb server + * @port_or_device: connection spec for gdb + * + * For CONFIG_USER this is either a tcp port or a path to a fifo. For + * system emulation you can use a full chardev spec for your gdbserver + * port. + */ +int gdbserver_start(const char *port_or_device); + +void gdbserver_cleanup(void); + +/** + * gdb_has_xml: + * This is an ugly hack to cope with both new and old gdb. + * If gdb sends qXfer:features:read then assume we're talking to a newish + * gdb that understands target descriptions. + */ +extern bool gdb_has_xml; + +/* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */ +extern const char *const xml_builtin[][2]; + +#endif diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h new file mode 100644 index 000000000..822c43cfd --- /dev/null +++ b/include/exec/gen-icount.h @@ -0,0 +1,86 @@ +#ifndef GEN_ICOUNT_H +#define GEN_ICOUNT_H + +#include "qemu/timer.h" + +/* Helpers for instruction counting code generation. */ + +static TCGOp *icount_start_insn; + +static inline void gen_io_start(void) +{ + TCGv_i32 tmp = tcg_const_i32(1); + tcg_gen_st_i32(tmp, cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); + tcg_temp_free_i32(tmp); +} + +/* + * cpu->can_do_io is cleared automatically at the beginning of + * each translation block. The cost is minimal and only paid + * for -icount, plus it would be very easy to forget doing it + * in the translator. Therefore, backends only need to call + * gen_io_start. + */ +static inline void gen_io_end(void) +{ + TCGv_i32 tmp = tcg_const_i32(0); + tcg_gen_st_i32(tmp, cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); + tcg_temp_free_i32(tmp); +} + +static inline void gen_tb_start(TranslationBlock *tb) +{ + TCGv_i32 count, imm; + + tcg_ctx->exitreq_label = gen_new_label(); + if (tb_cflags(tb) & CF_USE_ICOUNT) { + count = tcg_temp_local_new_i32(); + } else { + count = tcg_temp_new_i32(); + } + + tcg_gen_ld_i32(count, cpu_env, + offsetof(ArchCPU, neg.icount_decr.u32) - + offsetof(ArchCPU, env)); + + if (tb_cflags(tb) & CF_USE_ICOUNT) { + imm = tcg_temp_new_i32(); + /* We emit a movi with a dummy immediate argument. Keep the insn index + * of the movi so that we later (when we know the actual insn count) + * can update the immediate argument with the actual insn count. */ + tcg_gen_movi_i32(imm, 0xdeadbeef); + icount_start_insn = tcg_last_op(); + + tcg_gen_sub_i32(count, count, imm); + tcg_temp_free_i32(imm); + } + + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); + + if (tb_cflags(tb) & CF_USE_ICOUNT) { + tcg_gen_st16_i32(count, cpu_env, + offsetof(ArchCPU, neg.icount_decr.u16.low) - + offsetof(ArchCPU, env)); + gen_io_end(); + } + + tcg_temp_free_i32(count); +} + +static inline void gen_tb_end(TranslationBlock *tb, int num_insns) +{ + if (tb_cflags(tb) & CF_USE_ICOUNT) { + /* Update the num_insn immediate parameter now that we know + * the actual insn count. */ + tcg_set_insn_param(icount_start_insn, 1, num_insns); + } + + gen_set_label(tcg_ctx->exitreq_label); + tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED); +} + +#endif diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h new file mode 100644 index 000000000..29c02f85d --- /dev/null +++ b/include/exec/helper-gen.h @@ -0,0 +1,97 @@ +/* Helper file for declaring TCG helper functions. + This one expands generation functions for tcg opcodes. */ + +#ifndef HELPER_GEN_H +#define HELPER_GEN_H + +#include "exec/helper-head.h" + +#define DEF_HELPER_FLAGS_0(name, flags, ret) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \ +{ \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 0, NULL); \ +} + +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1)) \ +{ \ + TCGTemp *args[1] = { dh_arg(t1, 1) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 1, args); \ +} + +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2)) \ +{ \ + TCGTemp *args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 2, args); \ +} + +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \ +{ \ + TCGTemp *args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 3, args); \ +} + +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), \ + dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \ +{ \ + TCGTemp *args[4] = { dh_arg(t1, 1), dh_arg(t2, 2), \ + dh_arg(t3, 3), dh_arg(t4, 4) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 4, args); \ +} + +#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ + dh_arg_decl(t4, 4), dh_arg_decl(t5, 5)) \ +{ \ + TCGTemp *args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ + dh_arg(t4, 4), dh_arg(t5, 5) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 5, args); \ +} + +#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ + dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6)) \ +{ \ + TCGTemp *args[6] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ + dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args); \ +} + +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ + dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \ + dh_arg_decl(t7, 7)) \ +{ \ + TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ + dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6), \ + dh_arg(t7, 7) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args); \ +} + +#include "helper.h" +#include "trace/generated-helpers.h" +#include "trace/generated-helpers-wrappers.h" +#include "tcg-runtime.h" +#include "plugin-helpers.h" + +#undef DEF_HELPER_FLAGS_0 +#undef DEF_HELPER_FLAGS_1 +#undef DEF_HELPER_FLAGS_2 +#undef DEF_HELPER_FLAGS_3 +#undef DEF_HELPER_FLAGS_4 +#undef DEF_HELPER_FLAGS_5 +#undef DEF_HELPER_FLAGS_6 +#undef DEF_HELPER_FLAGS_7 +#undef GEN_HELPER + +#endif /* HELPER_GEN_H */ diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h new file mode 100644 index 000000000..3094c7946 --- /dev/null +++ b/include/exec/helper-head.h @@ -0,0 +1,156 @@ +/* Helper file for declaring TCG helper functions. + Used by other helper files. + + Targets should use DEF_HELPER_N and DEF_HELPER_FLAGS_N to declare helper + functions. Names should be specified without the helper_ prefix, and + the return and argument types specified. 3 basic types are understood + (i32, i64 and ptr). Additional aliases are provided for convenience and + to match the types used by the C helper implementation. + + The target helper.h should be included in all files that use/define + helper functions. THis will ensure that function prototypes are + consistent. In addition it should be included an extra two times for + helper.c, defining: + GEN_HELPER 1 to produce op generation functions (gen_helper_*) + GEN_HELPER 2 to do runtime registration helper functions. + */ + +#ifndef EXEC_HELPER_HEAD_H +#define EXEC_HELPER_HEAD_H + +#define HELPER(name) glue(helper_, name) + +/* Some types that make sense in C, but not for TCG. */ +#define dh_alias_i32 i32 +#define dh_alias_s32 i32 +#define dh_alias_int i32 +#define dh_alias_i64 i64 +#define dh_alias_s64 i64 +#define dh_alias_f16 i32 +#define dh_alias_f32 i32 +#define dh_alias_f64 i64 +#define dh_alias_ptr ptr +#define dh_alias_cptr ptr +#define dh_alias_void void +#define dh_alias_noreturn noreturn +#define dh_alias(t) glue(dh_alias_, t) + +#define dh_ctype_i32 uint32_t +#define dh_ctype_s32 int32_t +#define dh_ctype_int int +#define dh_ctype_i64 uint64_t +#define dh_ctype_s64 int64_t +#define dh_ctype_f16 uint32_t +#define dh_ctype_f32 float32 +#define dh_ctype_f64 float64 +#define dh_ctype_ptr void * +#define dh_ctype_cptr const void * +#define dh_ctype_void void +#define dh_ctype_noreturn void QEMU_NORETURN +#define dh_ctype(t) dh_ctype_##t + +#ifdef NEED_CPU_H +# ifdef TARGET_LONG_BITS +# if TARGET_LONG_BITS == 32 +# define dh_alias_tl i32 +# else +# define dh_alias_tl i64 +# endif +# endif +# define dh_alias_env ptr +# define dh_ctype_tl target_ulong +# define dh_ctype_env CPUArchState * +#endif + +/* We can't use glue() here because it falls foul of C preprocessor + recursive expansion rules. */ +#define dh_retvar_decl0_void void +#define dh_retvar_decl0_noreturn void +#define dh_retvar_decl0_i32 TCGv_i32 retval +#define dh_retvar_decl0_i64 TCGv_i64 retval +#define dh_retvar_decl0_ptr TCGv_ptr retval +#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t)) + +#define dh_retvar_decl_void +#define dh_retvar_decl_noreturn +#define dh_retvar_decl_i32 TCGv_i32 retval, +#define dh_retvar_decl_i64 TCGv_i64 retval, +#define dh_retvar_decl_ptr TCGv_ptr retval, +#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) + +#define dh_retvar_void NULL +#define dh_retvar_noreturn NULL +#define dh_retvar_i32 tcgv_i32_temp(retval) +#define dh_retvar_i64 tcgv_i64_temp(retval) +#define dh_retvar_ptr tcgv_ptr_temp(retval) +#define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) + +#define dh_is_64bit_void 0 +#define dh_is_64bit_noreturn 0 +#define dh_is_64bit_i32 0 +#define dh_is_64bit_i64 1 +#define dh_is_64bit_ptr (sizeof(void *) == 8) +#define dh_is_64bit_cptr dh_is_64bit_ptr +#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) + +#define dh_is_signed_void 0 +#define dh_is_signed_noreturn 0 +#define dh_is_signed_i32 0 +#define dh_is_signed_s32 1 +#define dh_is_signed_i64 0 +#define dh_is_signed_s64 1 +#define dh_is_signed_f16 0 +#define dh_is_signed_f32 0 +#define dh_is_signed_f64 0 +#define dh_is_signed_tl 0 +#define dh_is_signed_int 1 +/* ??? This is highly specific to the host cpu. There are even special + extension instructions that may be required, e.g. ia64's addp4. But + for now we don't support any 64-bit targets with 32-bit pointers. */ +#define dh_is_signed_ptr 0 +#define dh_is_signed_cptr dh_is_signed_ptr +#define dh_is_signed_env dh_is_signed_ptr +#define dh_is_signed(t) dh_is_signed_##t + +#define dh_callflag_i32 0 +#define dh_callflag_s32 0 +#define dh_callflag_int 0 +#define dh_callflag_i64 0 +#define dh_callflag_s64 0 +#define dh_callflag_f16 0 +#define dh_callflag_f32 0 +#define dh_callflag_f64 0 +#define dh_callflag_ptr 0 +#define dh_callflag_cptr dh_callflag_ptr +#define dh_callflag_void 0 +#define dh_callflag_noreturn TCG_CALL_NO_RETURN +#define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) + +#define dh_sizemask(t, n) \ + ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) + +#define dh_arg(t, n) \ + glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) + +#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) + +#define DEF_HELPER_0(name, ret) \ + DEF_HELPER_FLAGS_0(name, 0, ret) +#define DEF_HELPER_1(name, ret, t1) \ + DEF_HELPER_FLAGS_1(name, 0, ret, t1) +#define DEF_HELPER_2(name, ret, t1, t2) \ + DEF_HELPER_FLAGS_2(name, 0, ret, t1, t2) +#define DEF_HELPER_3(name, ret, t1, t2, t3) \ + DEF_HELPER_FLAGS_3(name, 0, ret, t1, t2, t3) +#define DEF_HELPER_4(name, ret, t1, t2, t3, t4) \ + DEF_HELPER_FLAGS_4(name, 0, ret, t1, t2, t3, t4) +#define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) \ + DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5) +#define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \ + DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6) +#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ + DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) + +/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ + +#endif /* EXEC_HELPER_HEAD_H */ diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h new file mode 100644 index 000000000..659f9298e --- /dev/null +++ b/include/exec/helper-proto.h @@ -0,0 +1,56 @@ +/* Helper file for declaring TCG helper functions. + This one expands prototypes for the helper functions. */ + +#ifndef HELPER_PROTO_H +#define HELPER_PROTO_H + +#include "exec/helper-head.h" + +#define DEF_HELPER_FLAGS_0(name, flags, ret) \ +dh_ctype(ret) HELPER(name) (void); + +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1)); + +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)); + +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3)); + +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ + dh_ctype(t4)); + +#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ + dh_ctype(t4), dh_ctype(t5)); + +#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ + dh_ctype(t4), dh_ctype(t5), dh_ctype(t6)); + +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ + dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ + dh_ctype(t7)); + +#define IN_HELPER_PROTO + +#include "helper.h" +#include "trace/generated-helpers.h" +#include "tcg-runtime.h" +#include "plugin-helpers.h" + +#undef IN_HELPER_PROTO + +#undef DEF_HELPER_FLAGS_0 +#undef DEF_HELPER_FLAGS_1 +#undef DEF_HELPER_FLAGS_2 +#undef DEF_HELPER_FLAGS_3 +#undef DEF_HELPER_FLAGS_4 +#undef DEF_HELPER_FLAGS_5 +#undef DEF_HELPER_FLAGS_6 +#undef DEF_HELPER_FLAGS_7 + +#endif /* HELPER_PROTO_H */ diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h new file mode 100644 index 000000000..27870509a --- /dev/null +++ b/include/exec/helper-tcg.h @@ -0,0 +1,76 @@ +/* Helper file for declaring TCG helper functions. + This one defines data structures private to tcg.c. */ + +#ifndef HELPER_TCG_H +#define HELPER_TCG_H + +#include "exec/helper-head.h" + +/* Need one more level of indirection before stringification + to get all the macros expanded first. */ +#define str(s) #s + +#define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) }, + +#define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, + +#define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) }, + +#define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, + +#define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, + +#define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ + | dh_sizemask(t5, 5) }, + +#define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ + | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, + +#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ + { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ + | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, + +#include "helper.h" +#include "trace/generated-helpers.h" +#include "tcg-runtime.h" +#include "plugin-helpers.h" + +#undef str +#undef DEF_HELPER_FLAGS_0 +#undef DEF_HELPER_FLAGS_1 +#undef DEF_HELPER_FLAGS_2 +#undef DEF_HELPER_FLAGS_3 +#undef DEF_HELPER_FLAGS_4 +#undef DEF_HELPER_FLAGS_5 +#undef DEF_HELPER_FLAGS_6 +#undef DEF_HELPER_FLAGS_7 + +#endif /* HELPER_TCG_H */ diff --git a/include/exec/hwaddr.h b/include/exec/hwaddr.h new file mode 100644 index 000000000..8f16d179a --- /dev/null +++ b/include/exec/hwaddr.h @@ -0,0 +1,26 @@ +/* Define hwaddr if it exists. */ + +#ifndef HWADDR_H +#define HWADDR_H + + +#define HWADDR_BITS 64 +/* hwaddr is the type of a physical address (its size can + be different from 'target_ulong'). */ + +typedef uint64_t hwaddr; +#define HWADDR_MAX UINT64_MAX +#define TARGET_FMT_plx "%016" PRIx64 +#define HWADDR_PRId PRId64 +#define HWADDR_PRIi PRIi64 +#define HWADDR_PRIo PRIo64 +#define HWADDR_PRIu PRIu64 +#define HWADDR_PRIx PRIx64 +#define HWADDR_PRIX PRIX64 + +typedef struct MemMapEntry { + hwaddr base; + hwaddr size; +} MemMapEntry; + +#endif diff --git a/include/exec/ioport.h b/include/exec/ioport.h new file mode 100644 index 000000000..e34f66899 --- /dev/null +++ b/include/exec/ioport.h @@ -0,0 +1,75 @@ +/* + * defines ioport related functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/************************************************************************** + * IO ports API + */ + +#ifndef IOPORT_H +#define IOPORT_H + +#include "exec/memory.h" + +#define MAX_IOPORTS (64 * 1024) +#define IOPORTS_MASK (MAX_IOPORTS - 1) + +typedef struct MemoryRegionPortio { + uint32_t offset; + uint32_t len; + unsigned size; + uint32_t (*read)(void *opaque, uint32_t address); + void (*write)(void *opaque, uint32_t address, uint32_t data); + uint32_t base; /* private field */ +} MemoryRegionPortio; + +#define PORTIO_END_OF_LIST() { } + +#ifndef CONFIG_USER_ONLY +extern const MemoryRegionOps unassigned_io_ops; +#endif + +void cpu_outb(uint32_t addr, uint8_t val); +void cpu_outw(uint32_t addr, uint16_t val); +void cpu_outl(uint32_t addr, uint32_t val); +uint8_t cpu_inb(uint32_t addr); +uint16_t cpu_inw(uint32_t addr); +uint32_t cpu_inl(uint32_t addr); + +typedef struct PortioList { + const struct MemoryRegionPortio *ports; + Object *owner; + struct MemoryRegion *address_space; + unsigned nr; + struct MemoryRegion **regions; + void *opaque; + const char *name; + bool flush_coalesced_mmio; +} PortioList; + +void portio_list_init(PortioList *piolist, Object *owner, + const struct MemoryRegionPortio *callbacks, + void *opaque, const char *name); +void portio_list_set_flush_coalesced(PortioList *piolist); +void portio_list_destroy(PortioList *piolist); +void portio_list_add(PortioList *piolist, + struct MemoryRegion *address_space, + uint32_t addr); +void portio_list_del(PortioList *piolist); + +#endif /* IOPORT_H */ diff --git a/include/exec/log.h b/include/exec/log.h new file mode 100644 index 000000000..e02fff5de --- /dev/null +++ b/include/exec/log.h @@ -0,0 +1,84 @@ +#ifndef QEMU_EXEC_LOG_H +#define QEMU_EXEC_LOG_H + +#include "qemu/log.h" +#include "hw/core/cpu.h" +#include "disas/disas.h" + +/* cpu_dump_state() logging functions: */ +/** + * log_cpu_state: + * @cpu: The CPU whose state is to be logged. + * @flags: Flags what to log. + * + * Logs the output of cpu_dump_state(). + */ +static inline void log_cpu_state(CPUState *cpu, int flags) +{ + QemuLogFile *logfile; + + if (qemu_log_enabled()) { + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile) { + cpu_dump_state(cpu, logfile->fd, flags); + } + rcu_read_unlock(); + } +} + +/** + * log_cpu_state_mask: + * @mask: Mask when to log. + * @cpu: The CPU whose state is to be logged. + * @flags: Flags what to log. + * + * Logs the output of cpu_dump_state() if loglevel includes @mask. + */ +static inline void log_cpu_state_mask(int mask, CPUState *cpu, int flags) +{ + if (qemu_loglevel & mask) { + log_cpu_state(cpu, flags); + } +} + +#ifdef NEED_CPU_H +/* disas() and target_disas() to qemu_logfile: */ +static inline void log_target_disas(CPUState *cpu, target_ulong start, + target_ulong len) +{ + QemuLogFile *logfile; + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile) { + target_disas(logfile->fd, cpu, start, len); + } + rcu_read_unlock(); +} + +static inline void log_disas(void *code, unsigned long size) +{ + QemuLogFile *logfile; + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile) { + disas(logfile->fd, code, size); + } + rcu_read_unlock(); +} + +#if defined(CONFIG_USER_ONLY) +/* page_dump() output to the log file: */ +static inline void log_page_dump(const char *operation) +{ + FILE *logfile = qemu_log_lock(); + if (logfile) { + qemu_log("page layout changed following %s\n", operation); + page_dump(logfile); + } + qemu_log_unlock(logfile); +} +#endif +#endif + +#endif diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h new file mode 100644 index 000000000..95f2d20d5 --- /dev/null +++ b/include/exec/memattrs.h @@ -0,0 +1,71 @@ +/* + * Memory transaction attributes + * + * Copyright (c) 2015 Linaro Limited. + * + * Authors: + * Peter Maydell + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMATTRS_H +#define MEMATTRS_H + +/* Every memory transaction has associated with it a set of + * attributes. Some of these are generic (such as the ID of + * the bus master); some are specific to a particular kind of + * bus (such as the ARM Secure/NonSecure bit). We define them + * all as non-overlapping bitfields in a single struct to avoid + * confusion if different parts of QEMU used the same bit for + * different semantics. + */ +typedef struct MemTxAttrs { + /* Bus masters which don't specify any attributes will get this + * (via the MEMTXATTRS_UNSPECIFIED constant), so that we can + * distinguish "all attributes deliberately clear" from + * "didn't specify" if necessary. + */ + unsigned int unspecified:1; + /* ARM/AMBA: TrustZone Secure access + * x86: System Management Mode access + */ + unsigned int secure:1; + /* Memory access is usermode (unprivileged) */ + unsigned int user:1; + /* Requester ID (for MSI for example) */ + unsigned int requester_id:16; + /* Invert endianness for this page */ + unsigned int byte_swap:1; + /* + * The following are target-specific page-table bits. These are not + * related to actual memory transactions at all. However, this structure + * is part of the tlb_fill interface, cached in the cputlb structure, + * and has unused bits. These fields will be read by target-specific + * helpers using env->iotlb[mmu_idx][tlb_index()].attrs.target_tlb_bitN. + */ + unsigned int target_tlb_bit0 : 1; + unsigned int target_tlb_bit1 : 1; + unsigned int target_tlb_bit2 : 1; +} MemTxAttrs; + +/* Bus masters which don't specify any attributes will get this, + * which has all attribute bits clear except the topmost one + * (so that we can distinguish "all attributes deliberately clear" + * from "didn't specify" if necessary). + */ +#define MEMTXATTRS_UNSPECIFIED ((MemTxAttrs) { .unspecified = 1 }) + +/* New-style MMIO accessors can indicate that the transaction failed. + * A zero (MEMTX_OK) response means success; anything else is a failure + * of some kind. The memory subsystem will bitwise-OR together results + * if it is synthesizing an operation from multiple smaller accesses. + */ +#define MEMTX_OK 0 +#define MEMTX_ERROR (1U << 0) /* device returned an error */ +#define MEMTX_DECODE_ERROR (1U << 1) /* nothing at that address */ +typedef uint32_t MemTxResult; + +#endif diff --git a/include/exec/memop.h b/include/exec/memop.h new file mode 100644 index 000000000..529d07b02 --- /dev/null +++ b/include/exec/memop.h @@ -0,0 +1,134 @@ +/* + * Constants for memory operations + * + * Authors: + * Richard Henderson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMOP_H +#define MEMOP_H + +#include "qemu/host-utils.h" + +typedef enum MemOp { + MO_8 = 0, + MO_16 = 1, + MO_32 = 2, + MO_64 = 3, + MO_SIZE = 3, /* Mask for the above. */ + + MO_SIGN = 4, /* Sign-extended, otherwise zero-extended. */ + + MO_BSWAP = 8, /* Host reverse endian. */ +#ifdef HOST_WORDS_BIGENDIAN + MO_LE = MO_BSWAP, + MO_BE = 0, +#else + MO_LE = 0, + MO_BE = MO_BSWAP, +#endif +#ifdef NEED_CPU_H +#ifdef TARGET_WORDS_BIGENDIAN + MO_TE = MO_BE, +#else + MO_TE = MO_LE, +#endif +#endif + + /* + * MO_UNALN accesses are never checked for alignment. + * MO_ALIGN accesses will result in a call to the CPU's + * do_unaligned_access hook if the guest address is not aligned. + * The default depends on whether the target CPU defines + * TARGET_ALIGNED_ONLY. + * + * Some architectures (e.g. ARMv8) need the address which is aligned + * to a size more than the size of the memory access. + * Some architectures (e.g. SPARCv9) need an address which is aligned, + * but less strictly than the natural alignment. + * + * MO_ALIGN supposes the alignment size is the size of a memory access. + * + * There are three options: + * - unaligned access permitted (MO_UNALN). + * - an alignment to the size of an access (MO_ALIGN); + * - an alignment to a specified size, which may be more or less than + * the access size (MO_ALIGN_x where 'x' is a size in bytes); + */ + MO_ASHIFT = 4, + MO_AMASK = 7 << MO_ASHIFT, +#ifdef NEED_CPU_H +#ifdef TARGET_ALIGNED_ONLY + MO_ALIGN = 0, + MO_UNALN = MO_AMASK, +#else + MO_ALIGN = MO_AMASK, + MO_UNALN = 0, +#endif +#endif + MO_ALIGN_2 = 1 << MO_ASHIFT, + MO_ALIGN_4 = 2 << MO_ASHIFT, + MO_ALIGN_8 = 3 << MO_ASHIFT, + MO_ALIGN_16 = 4 << MO_ASHIFT, + MO_ALIGN_32 = 5 << MO_ASHIFT, + MO_ALIGN_64 = 6 << MO_ASHIFT, + + /* Combinations of the above, for ease of use. */ + MO_UB = MO_8, + MO_UW = MO_16, + MO_UL = MO_32, + MO_SB = MO_SIGN | MO_8, + MO_SW = MO_SIGN | MO_16, + MO_SL = MO_SIGN | MO_32, + MO_Q = MO_64, + + MO_LEUW = MO_LE | MO_UW, + MO_LEUL = MO_LE | MO_UL, + MO_LESW = MO_LE | MO_SW, + MO_LESL = MO_LE | MO_SL, + MO_LEQ = MO_LE | MO_Q, + + MO_BEUW = MO_BE | MO_UW, + MO_BEUL = MO_BE | MO_UL, + MO_BESW = MO_BE | MO_SW, + MO_BESL = MO_BE | MO_SL, + MO_BEQ = MO_BE | MO_Q, + +#ifdef NEED_CPU_H + MO_TEUW = MO_TE | MO_UW, + MO_TEUL = MO_TE | MO_UL, + MO_TESW = MO_TE | MO_SW, + MO_TESL = MO_TE | MO_SL, + MO_TEQ = MO_TE | MO_Q, +#endif + + MO_SSIZE = MO_SIZE | MO_SIGN, +} MemOp; + +/* MemOp to size in bytes. */ +static inline unsigned memop_size(MemOp op) +{ + return 1 << (op & MO_SIZE); +} + +/* Size in bytes to MemOp. */ +static inline MemOp size_memop(unsigned size) +{ +#ifdef CONFIG_DEBUG_TCG + /* Power of 2 up to 8. */ + assert((size & (size - 1)) == 0 && size >= 1 && size <= 8); +#endif + return ctz32(size); +} + +/* Big endianness from MemOp. */ +static inline bool memop_big_endian(MemOp op) +{ + return (op & MO_BSWAP) == MO_BE; +} + +#endif diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h new file mode 100644 index 000000000..9fcc2af25 --- /dev/null +++ b/include/exec/memory-internal.h @@ -0,0 +1,53 @@ +/* + * Declarations for functions which are internal to the memory subsystem. + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +/* + * This header is for use by exec.c, memory.c and accel/tcg/cputlb.c ONLY, + * for declarations which are shared between the memory subsystem's + * internals and the TCG TLB code. Do not include it from elsewhere. + */ + +#ifndef MEMORY_INTERNAL_H +#define MEMORY_INTERNAL_H + +#include "cpu.h" + +#ifndef CONFIG_USER_ONLY +static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv) +{ + return fv->dispatch; +} + +static inline AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as) +{ + return flatview_to_dispatch(address_space_to_flatview(as)); +} + +FlatView *address_space_get_flatview(AddressSpace *as); +void flatview_unref(FlatView *view); + +extern const MemoryRegionOps unassigned_mem_ops; + +bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr, + unsigned size, bool is_write, + MemTxAttrs attrs); + +void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section); +AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv); +void address_space_dispatch_compact(AddressSpaceDispatch *d); +void address_space_dispatch_free(AddressSpaceDispatch *d); + +void mtree_print_dispatch(struct AddressSpaceDispatch *d, + MemoryRegion *root); +#endif +#endif diff --git a/include/exec/memory.h b/include/exec/memory.h new file mode 100644 index 000000000..0f3e6bcd5 --- /dev/null +++ b/include/exec/memory.h @@ -0,0 +1,2596 @@ +/* + * Physical memory management API + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_H +#define MEMORY_H + +#ifndef CONFIG_USER_ONLY + +#include "exec/cpu-common.h" +#include "exec/hwaddr.h" +#include "exec/memattrs.h" +#include "exec/memop.h" +#include "exec/ramlist.h" +#include "qemu/bswap.h" +#include "qemu/queue.h" +#include "qemu/int128.h" +#include "qemu/notify.h" +#include "qom/object.h" +#include "qemu/rcu.h" + +#define RAM_ADDR_INVALID (~(ram_addr_t)0) + +#define MAX_PHYS_ADDR_SPACE_BITS 62 +#define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1) + +#define TYPE_MEMORY_REGION "qemu:memory-region" +DECLARE_INSTANCE_CHECKER(MemoryRegion, MEMORY_REGION, + TYPE_MEMORY_REGION) + +#define TYPE_IOMMU_MEMORY_REGION "qemu:iommu-memory-region" +typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass; +DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass, + IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION) + +#ifdef CONFIG_FUZZ +void fuzz_dma_read_cb(size_t addr, + size_t len, + MemoryRegion *mr, + bool is_write); +#else +static inline void fuzz_dma_read_cb(size_t addr, + size_t len, + MemoryRegion *mr, + bool is_write) +{ + /* Do Nothing */ +} +#endif + +extern bool global_dirty_log; + +typedef struct MemoryRegionOps MemoryRegionOps; + +struct ReservedRegion { + hwaddr low; + hwaddr high; + unsigned type; +}; + +typedef struct IOMMUTLBEntry IOMMUTLBEntry; + +/* See address_space_translate: bit 0 is read, bit 1 is write. */ +typedef enum { + IOMMU_NONE = 0, + IOMMU_RO = 1, + IOMMU_WO = 2, + IOMMU_RW = 3, +} IOMMUAccessFlags; + +#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) + +struct IOMMUTLBEntry { + AddressSpace *target_as; + hwaddr iova; + hwaddr translated_addr; + hwaddr addr_mask; /* 0xfff = 4k translation */ + IOMMUAccessFlags perm; +}; + +/* + * Bitmap for different IOMMUNotifier capabilities. Each notifier can + * register with one or multiple IOMMU Notifier capability bit(s). + */ +typedef enum { + IOMMU_NOTIFIER_NONE = 0, + /* Notify cache invalidations */ + IOMMU_NOTIFIER_UNMAP = 0x1, + /* Notify entry changes (newly created entries) */ + IOMMU_NOTIFIER_MAP = 0x2, +} IOMMUNotifierFlag; + +#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) + +struct IOMMUNotifier; +typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, + IOMMUTLBEntry *data); + +struct IOMMUNotifier { + IOMMUNotify notify; + IOMMUNotifierFlag notifier_flags; + /* Notify for address space range start <= addr <= end */ + hwaddr start; + hwaddr end; + int iommu_idx; + QLIST_ENTRY(IOMMUNotifier) node; +}; +typedef struct IOMMUNotifier IOMMUNotifier; + +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC (1 << 0) + +/* RAM is mmap-ed with MAP_SHARED */ +#define RAM_SHARED (1 << 1) + +/* Only a portion of RAM (used_length) is actually used, and migrated. + * This used_length size can change across reboots. + */ +#define RAM_RESIZEABLE (1 << 2) + +/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically + * zero the page and wake waiting processes. + * (Set during postcopy) + */ +#define RAM_UF_ZEROPAGE (1 << 3) + +/* RAM can be migrated */ +#define RAM_MIGRATABLE (1 << 4) + +/* RAM is a persistent kind memory */ +#define RAM_PMEM (1 << 5) + +static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, + int iommu_idx) +{ + n->notify = fn; + n->notifier_flags = flags; + n->start = start; + n->end = end; + n->iommu_idx = iommu_idx; +} + +/* + * Memory region callbacks + */ +struct MemoryRegionOps { + /* Read from the memory region. @addr is relative to @mr; @size is + * in bytes. */ + uint64_t (*read)(void *opaque, + hwaddr addr, + unsigned size); + /* Write to the memory region. @addr is relative to @mr; @size is + * in bytes. */ + void (*write)(void *opaque, + hwaddr addr, + uint64_t data, + unsigned size); + + MemTxResult (*read_with_attrs)(void *opaque, + hwaddr addr, + uint64_t *data, + unsigned size, + MemTxAttrs attrs); + MemTxResult (*write_with_attrs)(void *opaque, + hwaddr addr, + uint64_t data, + unsigned size, + MemTxAttrs attrs); + + enum device_endian endianness; + /* Guest-visible constraints: */ + struct { + /* If nonzero, specify bounds on access sizes beyond which a machine + * check is thrown. + */ + unsigned min_access_size; + unsigned max_access_size; + /* If true, unaligned accesses are supported. Otherwise unaligned + * accesses throw machine checks. + */ + bool unaligned; + /* + * If present, and returns #false, the transaction is not accepted + * by the device (and results in machine dependent behaviour such + * as a machine check exception). + */ + bool (*accepts)(void *opaque, hwaddr addr, + unsigned size, bool is_write, + MemTxAttrs attrs); + } valid; + /* Internal implementation constraints: */ + struct { + /* If nonzero, specifies the minimum size implemented. Smaller sizes + * will be rounded upwards and a partial result will be returned. + */ + unsigned min_access_size; + /* If nonzero, specifies the maximum size implemented. Larger sizes + * will be done as a series of accesses with smaller sizes. + */ + unsigned max_access_size; + /* If true, unaligned accesses are supported. Otherwise all accesses + * are converted to (possibly multiple) naturally aligned accesses. + */ + bool unaligned; + } impl; +}; + +typedef struct MemoryRegionClass { + /* private */ + ObjectClass parent_class; +} MemoryRegionClass; + + +enum IOMMUMemoryRegionAttr { + IOMMU_ATTR_SPAPR_TCE_FD +}; + +/* + * IOMMUMemoryRegionClass: + * + * All IOMMU implementations need to subclass TYPE_IOMMU_MEMORY_REGION + * and provide an implementation of at least the @translate method here + * to handle requests to the memory region. Other methods are optional. + * + * The IOMMU implementation must use the IOMMU notifier infrastructure + * to report whenever mappings are changed, by calling + * memory_region_notify_iommu() (or, if necessary, by calling + * memory_region_notify_one() for each registered notifier). + * + * Conceptually an IOMMU provides a mapping from input address + * to an output TLB entry. If the IOMMU is aware of memory transaction + * attributes and the output TLB entry depends on the transaction + * attributes, we represent this using IOMMU indexes. Each index + * selects a particular translation table that the IOMMU has: + * + * @attrs_to_index returns the IOMMU index for a set of transaction attributes + * + * @translate takes an input address and an IOMMU index + * + * and the mapping returned can only depend on the input address and the + * IOMMU index. + * + * Most IOMMUs don't care about the transaction attributes and support + * only a single IOMMU index. A more complex IOMMU might have one index + * for secure transactions and one for non-secure transactions. + */ +struct IOMMUMemoryRegionClass { + /* private: */ + MemoryRegionClass parent_class; + + /* public: */ + /** + * @translate: + * + * Return a TLB entry that contains a given address. + * + * The IOMMUAccessFlags indicated via @flag are optional and may + * be specified as IOMMU_NONE to indicate that the caller needs + * the full translation information for both reads and writes. If + * the access flags are specified then the IOMMU implementation + * may use this as an optimization, to stop doing a page table + * walk as soon as it knows that the requested permissions are not + * allowed. If IOMMU_NONE is passed then the IOMMU must do the + * full page table walk and report the permissions in the returned + * IOMMUTLBEntry. (Note that this implies that an IOMMU may not + * return different mappings for reads and writes.) + * + * The returned information remains valid while the caller is + * holding the big QEMU lock or is inside an RCU critical section; + * if the caller wishes to cache the mapping beyond that it must + * register an IOMMU notifier so it can invalidate its cached + * information when the IOMMU mapping changes. + * + * @iommu: the IOMMUMemoryRegion + * + * @hwaddr: address to be translated within the memory region + * + * @flag: requested access permission + * + * @iommu_idx: IOMMU index for the translation + */ + IOMMUTLBEntry (*translate)(IOMMUMemoryRegion *iommu, hwaddr addr, + IOMMUAccessFlags flag, int iommu_idx); + /** + * @get_min_page_size: + * + * Returns minimum supported page size in bytes. + * + * If this method is not provided then the minimum is assumed to + * be TARGET_PAGE_SIZE. + * + * @iommu: the IOMMUMemoryRegion + */ + uint64_t (*get_min_page_size)(IOMMUMemoryRegion *iommu); + /** + * @notify_flag_changed: + * + * Called when IOMMU Notifier flag changes (ie when the set of + * events which IOMMU users are requesting notification for changes). + * Optional method -- need not be provided if the IOMMU does not + * need to know exactly which events must be notified. + * + * @iommu: the IOMMUMemoryRegion + * + * @old_flags: events which previously needed to be notified + * + * @new_flags: events which now need to be notified + * + * Returns 0 on success, or a negative errno; in particular + * returns -EINVAL if the new flag bitmap is not supported by the + * IOMMU memory region. In case of failure, the error object + * must be created + */ + int (*notify_flag_changed)(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old_flags, + IOMMUNotifierFlag new_flags, + Error **errp); + /** + * @replay: + * + * Called to handle memory_region_iommu_replay(). + * + * The default implementation of memory_region_iommu_replay() is to + * call the IOMMU translate method for every page in the address space + * with flag == IOMMU_NONE and then call the notifier if translate + * returns a valid mapping. If this method is implemented then it + * overrides the default behaviour, and must provide the full semantics + * of memory_region_iommu_replay(), by calling @notifier for every + * translation present in the IOMMU. + * + * Optional method -- an IOMMU only needs to provide this method + * if the default is inefficient or produces undesirable side effects. + * + * Note: this is not related to record-and-replay functionality. + */ + void (*replay)(IOMMUMemoryRegion *iommu, IOMMUNotifier *notifier); + + /** + * @get_attr: + * + * Get IOMMU misc attributes. This is an optional method that + * can be used to allow users of the IOMMU to get implementation-specific + * information. The IOMMU implements this method to handle calls + * by IOMMU users to memory_region_iommu_get_attr() by filling in + * the arbitrary data pointer for any IOMMUMemoryRegionAttr values that + * the IOMMU supports. If the method is unimplemented then + * memory_region_iommu_get_attr() will always return -EINVAL. + * + * @iommu: the IOMMUMemoryRegion + * + * @attr: attribute being queried + * + * @data: memory to fill in with the attribute data + * + * Returns 0 on success, or a negative errno; in particular + * returns -EINVAL for unrecognized or unimplemented attribute types. + */ + int (*get_attr)(IOMMUMemoryRegion *iommu, enum IOMMUMemoryRegionAttr attr, + void *data); + + /** + * @attrs_to_index: + * + * Return the IOMMU index to use for a given set of transaction attributes. + * + * Optional method: if an IOMMU only supports a single IOMMU index then + * the default implementation of memory_region_iommu_attrs_to_index() + * will return 0. + * + * The indexes supported by an IOMMU must be contiguous, starting at 0. + * + * @iommu: the IOMMUMemoryRegion + * @attrs: memory transaction attributes + */ + int (*attrs_to_index)(IOMMUMemoryRegion *iommu, MemTxAttrs attrs); + + /** + * @num_indexes: + * + * Return the number of IOMMU indexes this IOMMU supports. + * + * Optional method: if this method is not provided, then + * memory_region_iommu_num_indexes() will return 1, indicating that + * only a single IOMMU index is supported. + * + * @iommu: the IOMMUMemoryRegion + */ + int (*num_indexes)(IOMMUMemoryRegion *iommu); + + /** + * @iommu_set_page_size_mask: + * + * Restrict the page size mask that can be supported with a given IOMMU + * memory region. Used for example to propagate host physical IOMMU page + * size mask limitations to the virtual IOMMU. + * + * Optional method: if this method is not provided, then the default global + * page mask is used. + * + * @iommu: the IOMMUMemoryRegion + * + * @page_size_mask: a bitmask of supported page sizes. At least one bit, + * representing the smallest page size, must be set. Additional set bits + * represent supported block sizes. For example a host physical IOMMU that + * uses page tables with a page size of 4kB, and supports 2MB and 4GB + * blocks, will set mask 0x40201000. A granule of 4kB with indiscriminate + * block sizes is specified with mask 0xfffffffffffff000. + * + * Returns 0 on success, or a negative error. In case of failure, the error + * object must be created. + */ + int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu, + uint64_t page_size_mask, + Error **errp); +}; + +typedef struct CoalescedMemoryRange CoalescedMemoryRange; +typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; + +/** MemoryRegion: + * + * A struct representing a memory region. + */ +struct MemoryRegion { + Object parent_obj; + + /* private: */ + + /* The following fields should fit in a cache line */ + bool romd_mode; + bool ram; + bool subpage; + bool readonly; /* For RAM regions */ + bool nonvolatile; + bool rom_device; + bool flush_coalesced_mmio; + uint8_t dirty_log_mask; + bool is_iommu; + RAMBlock *ram_block; + Object *owner; + + const MemoryRegionOps *ops; + void *opaque; + MemoryRegion *container; + Int128 size; + hwaddr addr; + void (*destructor)(MemoryRegion *mr); + uint64_t align; + bool terminates; + bool ram_device; + bool enabled; + bool warning_printed; /* For reservations */ + uint8_t vga_logging_count; + MemoryRegion *alias; + hwaddr alias_offset; + int32_t priority; + QTAILQ_HEAD(, MemoryRegion) subregions; + QTAILQ_ENTRY(MemoryRegion) subregions_link; + QTAILQ_HEAD(, CoalescedMemoryRange) coalesced; + const char *name; + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; +}; + +struct IOMMUMemoryRegion { + MemoryRegion parent_obj; + + QLIST_HEAD(, IOMMUNotifier) iommu_notify; + IOMMUNotifierFlag iommu_notify_flags; +}; + +#define IOMMU_NOTIFIER_FOREACH(n, mr) \ + QLIST_FOREACH((n), &(mr)->iommu_notify, node) + +/** + * struct MemoryListener: callbacks structure for updates to the physical memory map + * + * Allows a component to adjust to changes in the guest-visible memory map. + * Use with memory_listener_register() and memory_listener_unregister(). + */ +struct MemoryListener { + /** + * @begin: + * + * Called at the beginning of an address space update transaction. + * Followed by calls to #MemoryListener.region_add(), + * #MemoryListener.region_del(), #MemoryListener.region_nop(), + * #MemoryListener.log_start() and #MemoryListener.log_stop() in + * increasing address order. + * + * @listener: The #MemoryListener. + */ + void (*begin)(MemoryListener *listener); + + /** + * @commit: + * + * Called at the end of an address space update transaction, + * after the last call to #MemoryListener.region_add(), + * #MemoryListener.region_del() or #MemoryListener.region_nop(), + * #MemoryListener.log_start() and #MemoryListener.log_stop(). + * + * @listener: The #MemoryListener. + */ + void (*commit)(MemoryListener *listener); + + /** + * @region_add: + * + * Called during an address space update transaction, + * for a section of the address space that is new in this address space + * space since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The new #MemoryRegionSection. + */ + void (*region_add)(MemoryListener *listener, MemoryRegionSection *section); + + /** + * @region_del: + * + * Called during an address space update transaction, + * for a section of the address space that has disappeared in the address + * space since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The old #MemoryRegionSection. + */ + void (*region_del)(MemoryListener *listener, MemoryRegionSection *section); + + /** + * @region_nop: + * + * Called during an address space update transaction, + * for a section of the address space that is in the same place in the address + * space as in the last transaction. + * + * @listener: The #MemoryListener. + * @section: The #MemoryRegionSection. + */ + void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section); + + /** + * @log_start: + * + * Called during an address space update transaction, after + * one of #MemoryListener.region_add(),#MemoryListener.region_del() or + * #MemoryListener.region_nop(), if dirty memory logging clients have + * become active since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The #MemoryRegionSection. + * @old: A bitmap of dirty memory logging clients that were active in + * the previous transaction. + * @new: A bitmap of dirty memory logging clients that are active in + * the current transaction. + */ + void (*log_start)(MemoryListener *listener, MemoryRegionSection *section, + int old, int new); + + /** + * @log_stop: + * + * Called during an address space update transaction, after + * one of #MemoryListener.region_add(), #MemoryListener.region_del() or + * #MemoryListener.region_nop() and possibly after + * #MemoryListener.log_start(), if dirty memory logging clients have + * become inactive since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The #MemoryRegionSection. + * @old: A bitmap of dirty memory logging clients that were active in + * the previous transaction. + * @new: A bitmap of dirty memory logging clients that are active in + * the current transaction. + */ + void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section, + int old, int new); + + /** + * @log_sync: + * + * Called by memory_region_snapshot_and_clear_dirty() and + * memory_global_dirty_log_sync(), before accessing QEMU's "official" + * copy of the dirty memory bitmap for a #MemoryRegionSection. + * + * @listener: The #MemoryListener. + * @section: The #MemoryRegionSection. + */ + void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); + + /** + * @log_clear: + * + * Called before reading the dirty memory bitmap for a + * #MemoryRegionSection. + * + * @listener: The #MemoryListener. + * @section: The #MemoryRegionSection. + */ + void (*log_clear)(MemoryListener *listener, MemoryRegionSection *section); + + /** + * @log_global_start: + * + * Called by memory_global_dirty_log_start(), which + * enables the %DIRTY_LOG_MIGRATION client on all memory regions in + * the address space. #MemoryListener.log_global_start() is also + * called when a #MemoryListener is added, if global dirty logging is + * active at that time. + * + * @listener: The #MemoryListener. + */ + void (*log_global_start)(MemoryListener *listener); + + /** + * @log_global_stop: + * + * Called by memory_global_dirty_log_stop(), which + * disables the %DIRTY_LOG_MIGRATION client on all memory regions in + * the address space. + * + * @listener: The #MemoryListener. + */ + void (*log_global_stop)(MemoryListener *listener); + + /** + * @log_global_after_sync: + * + * Called after reading the dirty memory bitmap + * for any #MemoryRegionSection. + * + * @listener: The #MemoryListener. + */ + void (*log_global_after_sync)(MemoryListener *listener); + + /** + * @eventfd_add: + * + * Called during an address space update transaction, + * for a section of the address space that has had a new ioeventfd + * registration since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The new #MemoryRegionSection. + * @match_data: The @match_data parameter for the new ioeventfd. + * @data: The @data parameter for the new ioeventfd. + * @e: The #EventNotifier parameter for the new ioeventfd. + */ + void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection *section, + bool match_data, uint64_t data, EventNotifier *e); + + /** + * @eventfd_del: + * + * Called during an address space update transaction, + * for a section of the address space that has dropped an ioeventfd + * registration since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The new #MemoryRegionSection. + * @match_data: The @match_data parameter for the dropped ioeventfd. + * @data: The @data parameter for the dropped ioeventfd. + * @e: The #EventNotifier parameter for the dropped ioeventfd. + */ + void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, + bool match_data, uint64_t data, EventNotifier *e); + + /** + * @coalesced_io_add: + * + * Called during an address space update transaction, + * for a section of the address space that has had a new coalesced + * MMIO range registration since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The new #MemoryRegionSection. + * @addr: The starting address for the coalesced MMIO range. + * @len: The length of the coalesced MMIO range. + */ + void (*coalesced_io_add)(MemoryListener *listener, MemoryRegionSection *section, + hwaddr addr, hwaddr len); + + /** + * @coalesced_io_del: + * + * Called during an address space update transaction, + * for a section of the address space that has dropped a coalesced + * MMIO range since the last transaction. + * + * @listener: The #MemoryListener. + * @section: The new #MemoryRegionSection. + * @addr: The starting address for the coalesced MMIO range. + * @len: The length of the coalesced MMIO range. + */ + void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section, + hwaddr addr, hwaddr len); + /** + * @priority: + * + * Govern the order in which memory listeners are invoked. Lower priorities + * are invoked earlier for "add" or "start" callbacks, and later for "delete" + * or "stop" callbacks. + */ + unsigned priority; + + /* private: */ + AddressSpace *address_space; + QTAILQ_ENTRY(MemoryListener) link; + QTAILQ_ENTRY(MemoryListener) link_as; +}; + +/** + * struct AddressSpace: describes a mapping of addresses to #MemoryRegion objects + */ +struct AddressSpace { + /* private: */ + struct rcu_head rcu; + char *name; + MemoryRegion *root; + + /* Accessed via RCU. */ + struct FlatView *current_map; + + int ioeventfd_nb; + struct MemoryRegionIoeventfd *ioeventfds; + QTAILQ_HEAD(, MemoryListener) listeners; + QTAILQ_ENTRY(AddressSpace) address_spaces_link; +}; + +typedef struct AddressSpaceDispatch AddressSpaceDispatch; +typedef struct FlatRange FlatRange; + +/* Flattened global view of current active memory hierarchy. Kept in sorted + * order. + */ +struct FlatView { + struct rcu_head rcu; + unsigned ref; + FlatRange *ranges; + unsigned nr; + unsigned nr_allocated; + struct AddressSpaceDispatch *dispatch; + MemoryRegion *root; +}; + +static inline FlatView *address_space_to_flatview(AddressSpace *as) +{ + return qatomic_rcu_read(&as->current_map); +} + +typedef int (*flatview_cb)(Int128 start, + Int128 len, + const MemoryRegion*, void*); + +void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque); + +/** + * struct MemoryRegionSection: describes a fragment of a #MemoryRegion + * + * @mr: the region, or %NULL if empty + * @fv: the flat view of the address space the region is mapped in + * @offset_within_region: the beginning of the section, relative to @mr's start + * @size: the size of the section; will not exceed @mr's boundaries + * @offset_within_address_space: the address of the first byte of the section + * relative to the region's address space + * @readonly: writes to this section are ignored + * @nonvolatile: this section is non-volatile + */ +struct MemoryRegionSection { + Int128 size; + MemoryRegion *mr; + FlatView *fv; + hwaddr offset_within_region; + hwaddr offset_within_address_space; + bool readonly; + bool nonvolatile; +}; + +static inline bool MemoryRegionSection_eq(MemoryRegionSection *a, + MemoryRegionSection *b) +{ + return a->mr == b->mr && + a->fv == b->fv && + a->offset_within_region == b->offset_within_region && + a->offset_within_address_space == b->offset_within_address_space && + int128_eq(a->size, b->size) && + a->readonly == b->readonly && + a->nonvolatile == b->nonvolatile; +} + +/** + * memory_region_init: Initialize a memory region + * + * The region typically acts as a container for other memory regions. Use + * memory_region_add_subregion() to add subregions. + * + * @mr: the #MemoryRegion to be initialized + * @owner: the object that tracks the region's reference count + * @name: used for debugging; not visible to the user or ABI + * @size: size of the region; any subregions beyond this size will be clipped + */ +void memory_region_init(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size); + +/** + * memory_region_ref: Add 1 to a memory region's reference count + * + * Whenever memory regions are accessed outside the BQL, they need to be + * preserved against hot-unplug. MemoryRegions actually do not have their + * own reference count; they piggyback on a QOM object, their "owner". + * This function adds a reference to the owner. + * + * All MemoryRegions must have an owner if they can disappear, even if the + * device they belong to operates exclusively under the BQL. This is because + * the region could be returned at any time by memory_region_find, and this + * is usually under guest control. + * + * @mr: the #MemoryRegion + */ +void memory_region_ref(MemoryRegion *mr); + +/** + * memory_region_unref: Remove 1 to a memory region's reference count + * + * Whenever memory regions are accessed outside the BQL, they need to be + * preserved against hot-unplug. MemoryRegions actually do not have their + * own reference count; they piggyback on a QOM object, their "owner". + * This function removes a reference to the owner and possibly destroys it. + * + * @mr: the #MemoryRegion + */ +void memory_region_unref(MemoryRegion *mr); + +/** + * memory_region_init_io: Initialize an I/O memory region. + * + * Accesses into the region will cause the callbacks in @ops to be called. + * if @size is nonzero, subregions will be clipped to @size. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @ops: a structure containing read and write callbacks to be used when + * I/O is performed on the region. + * @opaque: passed to the read and write callbacks of the @ops structure. + * @name: used for debugging; not visible to the user or ABI + * @size: size of the region. + */ +void memory_region_init_io(MemoryRegion *mr, + struct Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size); + +/** + * memory_region_init_ram_nomigrate: Initialize RAM memory region. Accesses + * into the region will modify memory + * directly. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + */ +void memory_region_init_ram_nomigrate(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp); + +/** + * memory_region_init_ram_shared_nomigrate: Initialize RAM memory region. + * Accesses into the region will + * modify memory directly. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @share: allow remapping RAM to different addresses + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function is similar to memory_region_init_ram_nomigrate. + * The only difference is part of the RAM region can be remapped. + */ +void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + Error **errp); + +/** + * memory_region_init_resizeable_ram: Initialize memory region with resizeable + * RAM. Accesses into the region will + * modify memory directly. Only an initial + * portion of this RAM is actually used. + * The used size can change across reboots. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: used size of the region. + * @max_size: max size of the region. + * @resized: callback to notify owner about used size change. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + */ +void memory_region_init_resizeable_ram(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + uint64_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + Error **errp); +#ifdef CONFIG_POSIX + +/** + * memory_region_init_ram_from_file: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @align: alignment of the region base address; if 0, the default alignment + * (getpagesize()) will be used. + * @ram_flags: Memory region features: + * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag + * - RAM_PMEM: the memory is persistent memory + * Other bits are ignored now. + * @path: the path in which to allocate the RAM. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + */ +void memory_region_init_ram_from_file(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + uint64_t align, + uint32_t ram_flags, + const char *path, + Error **errp); + +/** + * memory_region_init_ram_from_fd: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @fd: the fd to mmap. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + */ +void memory_region_init_ram_from_fd(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + int fd, + Error **errp); +#endif + +/** + * memory_region_init_ram_ptr: Initialize RAM memory region from a + * user-provided pointer. Accesses into the + * region will modify memory directly. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @ptr: memory to be mapped; must contain at least @size bytes. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + */ +void memory_region_init_ram_ptr(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + void *ptr); + +/** + * memory_region_init_ram_device_ptr: Initialize RAM device memory region from + * a user-provided pointer. + * + * A RAM device represents a mapping to a physical device, such as to a PCI + * MMIO BAR of an vfio-pci assigned device. The memory region may be mapped + * into the VM address space and access to the region will modify memory + * directly. However, the memory region should not be included in a memory + * dump (device may not be enabled/mapped at the time of the dump), and + * operations incompatible with manipulating MMIO should be avoided. Replaces + * skip_dump flag. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @ptr: memory to be mapped; must contain at least @size bytes. + * + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. + * (For RAM device memory regions, migrating the contents rarely makes sense.) + */ +void memory_region_init_ram_device_ptr(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + void *ptr); + +/** + * memory_region_init_alias: Initialize a memory region that aliases all or a + * part of another memory region. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: used for debugging; not visible to the user or ABI + * @orig: the region to be referenced; @mr will be equivalent to + * @orig between @offset and @offset + @size - 1. + * @offset: start of the section in @orig to be referenced. + * @size: size of the region. + */ +void memory_region_init_alias(MemoryRegion *mr, + struct Object *owner, + const char *name, + MemoryRegion *orig, + hwaddr offset, + uint64_t size); + +/** + * memory_region_init_rom_nomigrate: Initialize a ROM memory region. + * + * This has the same effect as calling memory_region_init_ram_nomigrate() + * and then marking the resulting region read-only with + * memory_region_set_readonly(). + * + * Note that this function does not do anything to cause the data in the + * RAM side of the memory region to be migrated; that is the responsibility + * of the caller. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_rom_nomigrate(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp); + +/** + * memory_region_init_rom_device_nomigrate: Initialize a ROM memory region. + * Writes are handled via callbacks. + * + * Note that this function does not do anything to cause the data in the + * RAM side of the memory region to be migrated; that is the responsibility + * of the caller. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @ops: callbacks for write access handling (must not be NULL). + * @opaque: passed to the read and write callbacks of the @ops structure. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, + struct Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size, + Error **errp); + +/** + * memory_region_init_iommu: Initialize a memory region of a custom type + * that translates addresses + * + * An IOMMU region translates addresses and forwards accesses to a target + * memory region. + * + * The IOMMU implementation must define a subclass of TYPE_IOMMU_MEMORY_REGION. + * @_iommu_mr should be a pointer to enough memory for an instance of + * that subclass, @instance_size is the size of that subclass, and + * @mrtypename is its name. This function will initialize @_iommu_mr as an + * instance of the subclass, and its methods will then be called to handle + * accesses to the memory region. See the documentation of + * #IOMMUMemoryRegionClass for further details. + * + * @_iommu_mr: the #IOMMUMemoryRegion to be initialized + * @instance_size: the IOMMUMemoryRegion subclass instance size + * @mrtypename: the type name of the #IOMMUMemoryRegion + * @owner: the object that tracks the region's reference count + * @name: used for debugging; not visible to the user or ABI + * @size: size of the region. + */ +void memory_region_init_iommu(void *_iommu_mr, + size_t instance_size, + const char *mrtypename, + Object *owner, + const char *name, + uint64_t size); + +/** + * memory_region_init_ram - Initialize RAM memory region. Accesses into the + * region will modify memory directly. + * + * @mr: the #MemoryRegion to be initialized + * @owner: the object that tracks the region's reference count (must be + * TYPE_DEVICE or a subclass of TYPE_DEVICE, or NULL) + * @name: name of the memory region + * @size: size of the region in bytes + * @errp: pointer to Error*, to store an error if it happens. + * + * This function allocates RAM for a board model or device, and + * arranges for it to be migrated (by calling vmstate_register_ram() + * if @owner is a DeviceState, or vmstate_register_ram_global() if + * @owner is NULL). + * + * TODO: Currently we restrict @owner to being either NULL (for + * global RAM regions with no owner) or devices, so that we can + * give the RAM block a unique name for migration purposes. + * We should lift this restriction and allow arbitrary Objects. + * If you pass a non-NULL non-device @owner then we will assert. + */ +void memory_region_init_ram(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp); + +/** + * memory_region_init_rom: Initialize a ROM memory region. + * + * This has the same effect as calling memory_region_init_ram() + * and then marking the resulting region read-only with + * memory_region_set_readonly(). This includes arranging for the + * contents to be migrated. + * + * TODO: Currently we restrict @owner to being either NULL (for + * global RAM regions with no owner) or devices, so that we can + * give the RAM block a unique name for migration purposes. + * We should lift this restriction and allow arbitrary Objects. + * If you pass a non-NULL non-device @owner then we will assert. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_rom(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp); + +/** + * memory_region_init_rom_device: Initialize a ROM memory region. + * Writes are handled via callbacks. + * + * This function initializes a memory region backed by RAM for reads + * and callbacks for writes, and arranges for the RAM backing to + * be migrated (by calling vmstate_register_ram() + * if @owner is a DeviceState, or vmstate_register_ram_global() if + * @owner is NULL). + * + * TODO: Currently we restrict @owner to being either NULL (for + * global RAM regions with no owner) or devices, so that we can + * give the RAM block a unique name for migration purposes. + * We should lift this restriction and allow arbitrary Objects. + * If you pass a non-NULL non-device @owner then we will assert. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @ops: callbacks for write access handling (must not be NULL). + * @opaque: passed to the read and write callbacks of the @ops structure. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_rom_device(MemoryRegion *mr, + struct Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size, + Error **errp); + + +/** + * memory_region_owner: get a memory region's owner. + * + * @mr: the memory region being queried. + */ +struct Object *memory_region_owner(MemoryRegion *mr); + +/** + * memory_region_size: get a memory region's size. + * + * @mr: the memory region being queried. + */ +uint64_t memory_region_size(MemoryRegion *mr); + +/** + * memory_region_is_ram: check whether a memory region is random access + * + * Returns %true if a memory region is random access. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_ram(MemoryRegion *mr) +{ + return mr->ram; +} + +/** + * memory_region_is_ram_device: check whether a memory region is a ram device + * + * Returns %true if a memory region is a device backed ram region + * + * @mr: the memory region being queried + */ +bool memory_region_is_ram_device(MemoryRegion *mr); + +/** + * memory_region_is_romd: check whether a memory region is in ROMD mode + * + * Returns %true if a memory region is a ROM device and currently set to allow + * direct reads. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_romd(MemoryRegion *mr) +{ + return mr->rom_device && mr->romd_mode; +} + +/** + * memory_region_get_iommu: check whether a memory region is an iommu + * + * Returns pointer to IOMMUMemoryRegion if a memory region is an iommu, + * otherwise NULL. + * + * @mr: the memory region being queried + */ +static inline IOMMUMemoryRegion *memory_region_get_iommu(MemoryRegion *mr) +{ + if (mr->alias) { + return memory_region_get_iommu(mr->alias); + } + if (mr->is_iommu) { + return (IOMMUMemoryRegion *) mr; + } + return NULL; +} + +/** + * memory_region_get_iommu_class_nocheck: returns iommu memory region class + * if an iommu or NULL if not + * + * Returns pointer to IOMMUMemoryRegionClass if a memory region is an iommu, + * otherwise NULL. This is fast path avoiding QOM checking, use with caution. + * + * @iommu_mr: the memory region being queried + */ +static inline IOMMUMemoryRegionClass *memory_region_get_iommu_class_nocheck( + IOMMUMemoryRegion *iommu_mr) +{ + return (IOMMUMemoryRegionClass *) (((Object *)iommu_mr)->class); +} + +#define memory_region_is_iommu(mr) (memory_region_get_iommu(mr) != NULL) + +/** + * memory_region_iommu_get_min_page_size: get minimum supported page size + * for an iommu + * + * Returns minimum supported page size for an iommu. + * + * @iommu_mr: the memory region being queried + */ +uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr); + +/** + * memory_region_notify_iommu: notify a change in an IOMMU translation entry. + * + * The notification type will be decided by entry.perm bits: + * + * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE. + * - For MAP (newly added entry) notifies: set entry.perm to the + * permission of the page (which is definitely !IOMMU_NONE). + * + * Note: for any IOMMU implementation, an in-place mapping change + * should be notified with an UNMAP followed by a MAP. + * + * @iommu_mr: the memory region that was changed + * @iommu_idx: the IOMMU index for the translation table which has changed + * @entry: the new entry in the IOMMU translation table. The entry + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, + IOMMUTLBEntry entry); + +/** + * memory_region_notify_one: notify a change in an IOMMU translation + * entry to a single notifier + * + * This works just like memory_region_notify_iommu(), but it only + * notifies a specific notifier, not all of them. + * + * @notifier: the notifier to be notified + * @entry: the new entry in the IOMMU translation table. The entry + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +void memory_region_notify_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry); + +/** + * memory_region_register_iommu_notifier: register a notifier for changes to + * IOMMU translation entries. + * + * Returns 0 on success, or a negative errno otherwise. In particular, + * -EINVAL indicates that at least one of the attributes of the notifier + * is not supported (flag/range) by the IOMMU memory region. In case of error + * the error object must be created. + * + * @mr: the memory region to observe + * @n: the IOMMUNotifier to be added; the notify callback receives a + * pointer to an #IOMMUTLBEntry as the opaque value; the pointer + * ceases to be valid on exit from the notifier. + * @errp: pointer to Error*, to store an error if it happens. + */ +int memory_region_register_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n, Error **errp); + +/** + * memory_region_iommu_replay: replay existing IOMMU translations to + * a notifier with the minimum page granularity returned by + * mr->iommu_ops->get_page_size(). + * + * Note: this is not related to record-and-replay functionality. + * + * @iommu_mr: the memory region to observe + * @n: the notifier to which to replay iommu mappings + */ +void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n); + +/** + * memory_region_unregister_iommu_notifier: unregister a notifier for + * changes to IOMMU translation entries. + * + * @mr: the memory region which was observed and for which notity_stopped() + * needs to be called + * @n: the notifier to be removed. + */ +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n); + +/** + * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is + * defined on the IOMMU. + * + * Returns 0 on success, or a negative errno otherwise. In particular, + * -EINVAL indicates that the IOMMU does not support the requested + * attribute. + * + * @iommu_mr: the memory region + * @attr: the requested attribute + * @data: a pointer to the requested attribute data + */ +int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, + enum IOMMUMemoryRegionAttr attr, + void *data); + +/** + * memory_region_iommu_attrs_to_index: return the IOMMU index to + * use for translations with the given memory transaction attributes. + * + * @iommu_mr: the memory region + * @attrs: the memory transaction attributes + */ +int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr, + MemTxAttrs attrs); + +/** + * memory_region_iommu_num_indexes: return the total number of IOMMU + * indexes that this IOMMU supports. + * + * @iommu_mr: the memory region + */ +int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr); + +/** + * memory_region_iommu_set_page_size_mask: set the supported page + * sizes for a given IOMMU memory region + * + * @iommu_mr: IOMMU memory region + * @page_size_mask: supported page size mask + * @errp: pointer to Error*, to store an error if it happens. + */ +int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, + uint64_t page_size_mask, + Error **errp); + +/** + * memory_region_name: get a memory region's name + * + * Returns the string that was used to initialize the memory region. + * + * @mr: the memory region being queried + */ +const char *memory_region_name(const MemoryRegion *mr); + +/** + * memory_region_is_logging: return whether a memory region is logging writes + * + * Returns %true if the memory region is logging writes for the given client + * + * @mr: the memory region being queried + * @client: the client being queried + */ +bool memory_region_is_logging(MemoryRegion *mr, uint8_t client); + +/** + * memory_region_get_dirty_log_mask: return the clients for which a + * memory region is logging writes. + * + * Returns a bitmap of clients, in which the DIRTY_MEMORY_* constants + * are the bit indices. + * + * @mr: the memory region being queried + */ +uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr); + +/** + * memory_region_is_rom: check whether a memory region is ROM + * + * Returns %true if a memory region is read-only memory. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_rom(MemoryRegion *mr) +{ + return mr->ram && mr->readonly; +} + +/** + * memory_region_is_nonvolatile: check whether a memory region is non-volatile + * + * Returns %true is a memory region is non-volatile memory. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_nonvolatile(MemoryRegion *mr) +{ + return mr->nonvolatile; +} + +/** + * memory_region_get_fd: Get a file descriptor backing a RAM memory region. + * + * Returns a file descriptor backing a file-based RAM memory region, + * or -1 if the region is not a file-based RAM memory region. + * + * @mr: the RAM or alias memory region being queried. + */ +int memory_region_get_fd(MemoryRegion *mr); + +/** + * memory_region_from_host: Convert a pointer into a RAM memory region + * and an offset within it. + * + * Given a host pointer inside a RAM memory region (created with + * memory_region_init_ram() or memory_region_init_ram_ptr()), return + * the MemoryRegion and the offset within it. + * + * Use with care; by the time this function returns, the returned pointer is + * not protected by RCU anymore. If the caller is not within an RCU critical + * section and does not hold the iothread lock, it must have other means of + * protecting the pointer, such as a reference to the region that includes + * the incoming ram_addr_t. + * + * @ptr: the host pointer to be converted + * @offset: the offset within memory region + */ +MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset); + +/** + * memory_region_get_ram_ptr: Get a pointer into a RAM memory region. + * + * Returns a host pointer to a RAM memory region (created with + * memory_region_init_ram() or memory_region_init_ram_ptr()). + * + * Use with care; by the time this function returns, the returned pointer is + * not protected by RCU anymore. If the caller is not within an RCU critical + * section and does not hold the iothread lock, it must have other means of + * protecting the pointer, such as a reference to the region that includes + * the incoming ram_addr_t. + * + * @mr: the memory region being queried. + */ +void *memory_region_get_ram_ptr(MemoryRegion *mr); + +/* memory_region_ram_resize: Resize a RAM region. + * + * Only legal before guest might have detected the memory size: e.g. on + * incoming migration, or right after reset. + * + * @mr: a memory region created with @memory_region_init_resizeable_ram. + * @newsize: the new size the region + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, + Error **errp); + +/** + * memory_region_msync: Synchronize selected address range of + * a memory mapped region + * + * @mr: the memory region to be msync + * @addr: the initial address of the range to be sync + * @size: the size of the range to be sync + */ +void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size); + +/** + * memory_region_writeback: Trigger cache writeback for + * selected address range + * + * @mr: the memory region to be updated + * @addr: the initial address of the range to be written back + * @size: the size of the range to be written back + */ +void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size); + +/** + * memory_region_set_log: Turn dirty logging on or off for a region. + * + * Turns dirty logging on or off for a specified client (display, migration). + * Only meaningful for RAM regions. + * + * @mr: the memory region being updated. + * @log: whether dirty logging is to be enabled or disabled. + * @client: the user of the logging information; %DIRTY_MEMORY_VGA only. + */ +void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client); + +/** + * memory_region_set_dirty: Mark a range of bytes as dirty in a memory region. + * + * Marks a range of bytes as dirty, after it has been dirtied outside + * guest code. + * + * @mr: the memory region being dirtied. + * @addr: the address (relative to the start of the region) being dirtied. + * @size: size of the range being dirtied. + */ +void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr size); + +/** + * memory_region_clear_dirty_bitmap - clear dirty bitmap for memory range + * + * This function is called when the caller wants to clear the remote + * dirty bitmap of a memory range within the memory region. This can + * be used by e.g. KVM to manually clear dirty log when + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is declared support by the host + * kernel. + * + * @mr: the memory region to clear the dirty log upon + * @start: start address offset within the memory region + * @len: length of the memory region to clear dirty bitmap + */ +void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start, + hwaddr len); + +/** + * memory_region_snapshot_and_clear_dirty: Get a snapshot of the dirty + * bitmap and clear it. + * + * Creates a snapshot of the dirty bitmap, clears the dirty bitmap and + * returns the snapshot. The snapshot can then be used to query dirty + * status, using memory_region_snapshot_get_dirty. Snapshotting allows + * querying the same page multiple times, which is especially useful for + * display updates where the scanlines often are not page aligned. + * + * The dirty bitmap region which gets copyed into the snapshot (and + * cleared afterwards) can be larger than requested. The boundaries + * are rounded up/down so complete bitmap longs (covering 64 pages on + * 64bit hosts) can be copied over into the bitmap snapshot. Which + * isn't a problem for display updates as the extra pages are outside + * the visible area, and in case the visible area changes a full + * display redraw is due anyway. Should other use cases for this + * function emerge we might have to revisit this implementation + * detail. + * + * Use g_free to release DirtyBitmapSnapshot. + * + * @mr: the memory region being queried. + * @addr: the address (relative to the start of the region) being queried. + * @size: the size of the range being queried. + * @client: the user of the logging information; typically %DIRTY_MEMORY_VGA. + */ +DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, + hwaddr addr, + hwaddr size, + unsigned client); + +/** + * memory_region_snapshot_get_dirty: Check whether a range of bytes is dirty + * in the specified dirty bitmap snapshot. + * + * @mr: the memory region being queried. + * @snap: the dirty bitmap snapshot + * @addr: the address (relative to the start of the region) being queried. + * @size: the size of the range being queried. + */ +bool memory_region_snapshot_get_dirty(MemoryRegion *mr, + DirtyBitmapSnapshot *snap, + hwaddr addr, hwaddr size); + +/** + * memory_region_reset_dirty: Mark a range of pages as clean, for a specified + * client. + * + * Marks a range of pages as no longer dirty. + * + * @mr: the region being updated. + * @addr: the start of the subrange being cleaned. + * @size: the size of the subrange being cleaned. + * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or + * %DIRTY_MEMORY_VGA. + */ +void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr size, unsigned client); + +/** + * memory_region_flush_rom_device: Mark a range of pages dirty and invalidate + * TBs (for self-modifying code). + * + * The MemoryRegionOps->write() callback of a ROM device must use this function + * to mark byte ranges that have been modified internally, such as by directly + * accessing the memory returned by memory_region_get_ram_ptr(). + * + * This function marks the range dirty and invalidates TBs so that TCG can + * detect self-modifying code. + * + * @mr: the region being flushed. + * @addr: the start, relative to the start of the region, of the range being + * flushed. + * @size: the size, in bytes, of the range being flushed. + */ +void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size); + +/** + * memory_region_set_readonly: Turn a memory region read-only (or read-write) + * + * Allows a memory region to be marked as read-only (turning it into a ROM). + * only useful on RAM regions. + * + * @mr: the region being updated. + * @readonly: whether rhe region is to be ROM or RAM. + */ +void memory_region_set_readonly(MemoryRegion *mr, bool readonly); + +/** + * memory_region_set_nonvolatile: Turn a memory region non-volatile + * + * Allows a memory region to be marked as non-volatile. + * only useful on RAM regions. + * + * @mr: the region being updated. + * @nonvolatile: whether rhe region is to be non-volatile. + */ +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile); + +/** + * memory_region_rom_device_set_romd: enable/disable ROMD mode + * + * Allows a ROM device (initialized with memory_region_init_rom_device() to + * set to ROMD mode (default) or MMIO mode. When it is in ROMD mode, the + * device is mapped to guest memory and satisfies read access directly. + * When in MMIO mode, reads are forwarded to the #MemoryRegion.read function. + * Writes are always handled by the #MemoryRegion.write function. + * + * @mr: the memory region to be updated + * @romd_mode: %true to put the region into ROMD mode + */ +void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode); + +/** + * memory_region_set_coalescing: Enable memory coalescing for the region. + * + * Enabled writes to a region to be queued for later processing. MMIO ->write + * callbacks may be delayed until a non-coalesced MMIO is issued. + * Only useful for IO regions. Roughly similar to write-combining hardware. + * + * @mr: the memory region to be write coalesced + */ +void memory_region_set_coalescing(MemoryRegion *mr); + +/** + * memory_region_add_coalescing: Enable memory coalescing for a sub-range of + * a region. + * + * Like memory_region_set_coalescing(), but works on a sub-range of a region. + * Multiple calls can be issued coalesced disjoint ranges. + * + * @mr: the memory region to be updated. + * @offset: the start of the range within the region to be coalesced. + * @size: the size of the subrange to be coalesced. + */ +void memory_region_add_coalescing(MemoryRegion *mr, + hwaddr offset, + uint64_t size); + +/** + * memory_region_clear_coalescing: Disable MMIO coalescing for the region. + * + * Disables any coalescing caused by memory_region_set_coalescing() or + * memory_region_add_coalescing(). Roughly equivalent to uncacheble memory + * hardware. + * + * @mr: the memory region to be updated. + */ +void memory_region_clear_coalescing(MemoryRegion *mr); + +/** + * memory_region_set_flush_coalesced: Enforce memory coalescing flush before + * accesses. + * + * Ensure that pending coalesced MMIO request are flushed before the memory + * region is accessed. This property is automatically enabled for all regions + * passed to memory_region_set_coalescing() and memory_region_add_coalescing(). + * + * @mr: the memory region to be updated. + */ +void memory_region_set_flush_coalesced(MemoryRegion *mr); + +/** + * memory_region_clear_flush_coalesced: Disable memory coalescing flush before + * accesses. + * + * Clear the automatic coalesced MMIO flushing enabled via + * memory_region_set_flush_coalesced. Note that this service has no effect on + * memory regions that have MMIO coalescing enabled for themselves. For them, + * automatic flushing will stop once coalescing is disabled. + * + * @mr: the memory region to be updated. + */ +void memory_region_clear_flush_coalesced(MemoryRegion *mr); + +/** + * memory_region_add_eventfd: Request an eventfd to be triggered when a word + * is written to a location. + * + * Marks a word in an IO region (initialized with memory_region_init_io()) + * as a trigger for an eventfd event. The I/O callback will not be called. + * The caller must be prepared to handle failure (that is, take the required + * action if the callback _is_ called). + * + * @mr: the memory region being updated. + * @addr: the address within @mr that is to be monitored + * @size: the size of the access to trigger the eventfd + * @match_data: whether to match against @data, instead of just @addr + * @data: the data to match against the guest write + * @e: event notifier to be triggered when @addr, @size, and @data all match. + **/ +void memory_region_add_eventfd(MemoryRegion *mr, + hwaddr addr, + unsigned size, + bool match_data, + uint64_t data, + EventNotifier *e); + +/** + * memory_region_del_eventfd: Cancel an eventfd. + * + * Cancels an eventfd trigger requested by a previous + * memory_region_add_eventfd() call. + * + * @mr: the memory region being updated. + * @addr: the address within @mr that is to be monitored + * @size: the size of the access to trigger the eventfd + * @match_data: whether to match against @data, instead of just @addr + * @data: the data to match against the guest write + * @e: event notifier to be triggered when @addr, @size, and @data all match. + */ +void memory_region_del_eventfd(MemoryRegion *mr, + hwaddr addr, + unsigned size, + bool match_data, + uint64_t data, + EventNotifier *e); + +/** + * memory_region_add_subregion: Add a subregion to a container. + * + * Adds a subregion at @offset. The subregion may not overlap with other + * subregions (except for those explicitly marked as overlapping). A region + * may only be added once as a subregion (unless removed with + * memory_region_del_subregion()); use memory_region_init_alias() if you + * want a region to be a subregion in multiple locations. + * + * @mr: the region to contain the new subregion; must be a container + * initialized with memory_region_init(). + * @offset: the offset relative to @mr where @subregion is added. + * @subregion: the subregion to be added. + */ +void memory_region_add_subregion(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion); +/** + * memory_region_add_subregion_overlap: Add a subregion to a container + * with overlap. + * + * Adds a subregion at @offset. The subregion may overlap with other + * subregions. Conflicts are resolved by having a higher @priority hide a + * lower @priority. Subregions without priority are taken as @priority 0. + * A region may only be added once as a subregion (unless removed with + * memory_region_del_subregion()); use memory_region_init_alias() if you + * want a region to be a subregion in multiple locations. + * + * @mr: the region to contain the new subregion; must be a container + * initialized with memory_region_init(). + * @offset: the offset relative to @mr where @subregion is added. + * @subregion: the subregion to be added. + * @priority: used for resolving overlaps; highest priority wins. + */ +void memory_region_add_subregion_overlap(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion, + int priority); + +/** + * memory_region_get_ram_addr: Get the ram address associated with a memory + * region + * + * @mr: the region to be queried + */ +ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr); + +uint64_t memory_region_get_alignment(const MemoryRegion *mr); +/** + * memory_region_del_subregion: Remove a subregion. + * + * Removes a subregion from its container. + * + * @mr: the container to be updated. + * @subregion: the region being removed; must be a current subregion of @mr. + */ +void memory_region_del_subregion(MemoryRegion *mr, + MemoryRegion *subregion); + +/* + * memory_region_set_enabled: dynamically enable or disable a region + * + * Enables or disables a memory region. A disabled memory region + * ignores all accesses to itself and its subregions. It does not + * obscure sibling subregions with lower priority - it simply behaves as + * if it was removed from the hierarchy. + * + * Regions default to being enabled. + * + * @mr: the region to be updated + * @enabled: whether to enable or disable the region + */ +void memory_region_set_enabled(MemoryRegion *mr, bool enabled); + +/* + * memory_region_set_address: dynamically update the address of a region + * + * Dynamically updates the address of a region, relative to its container. + * May be used on regions are currently part of a memory hierarchy. + * + * @mr: the region to be updated + * @addr: new address, relative to container region + */ +void memory_region_set_address(MemoryRegion *mr, hwaddr addr); + +/* + * memory_region_set_size: dynamically update the size of a region. + * + * Dynamically updates the size of a region. + * + * @mr: the region to be updated + * @size: used size of the region. + */ +void memory_region_set_size(MemoryRegion *mr, uint64_t size); + +/* + * memory_region_set_alias_offset: dynamically update a memory alias's offset + * + * Dynamically updates the offset into the target region that an alias points + * to, as if the fourth argument to memory_region_init_alias() has changed. + * + * @mr: the #MemoryRegion to be updated; should be an alias. + * @offset: the new offset into the target memory region + */ +void memory_region_set_alias_offset(MemoryRegion *mr, + hwaddr offset); + +/** + * memory_region_present: checks if an address relative to a @container + * translates into #MemoryRegion within @container + * + * Answer whether a #MemoryRegion within @container covers the address + * @addr. + * + * @container: a #MemoryRegion within which @addr is a relative address + * @addr: the area within @container to be searched + */ +bool memory_region_present(MemoryRegion *container, hwaddr addr); + +/** + * memory_region_is_mapped: returns true if #MemoryRegion is mapped + * into any address space. + * + * @mr: a #MemoryRegion which should be checked if it's mapped + */ +bool memory_region_is_mapped(MemoryRegion *mr); + +/** + * memory_region_find: translate an address/size relative to a + * MemoryRegion into a #MemoryRegionSection. + * + * Locates the first #MemoryRegion within @mr that overlaps the range + * given by @addr and @size. + * + * Returns a #MemoryRegionSection that describes a contiguous overlap. + * It will have the following characteristics: + * - @size = 0 iff no overlap was found + * - @mr is non-%NULL iff an overlap was found + * + * Remember that in the return value the @offset_within_region is + * relative to the returned region (in the .@mr field), not to the + * @mr argument. + * + * Similarly, the .@offset_within_address_space is relative to the + * address space that contains both regions, the passed and the + * returned one. However, in the special case where the @mr argument + * has no container (and thus is the root of the address space), the + * following will hold: + * - @offset_within_address_space >= @addr + * - @offset_within_address_space + .@size <= @addr + @size + * + * @mr: a MemoryRegion within which @addr is a relative address + * @addr: start of the area within @as to be searched + * @size: size of the area to be searched + */ +MemoryRegionSection memory_region_find(MemoryRegion *mr, + hwaddr addr, uint64_t size); + +/** + * memory_global_dirty_log_sync: synchronize the dirty log for all memory + * + * Synchronizes the dirty page log for all address spaces. + */ +void memory_global_dirty_log_sync(void); + +/** + * memory_global_dirty_log_sync: synchronize the dirty log for all memory + * + * Synchronizes the vCPUs with a thread that is reading the dirty bitmap. + * This function must be called after the dirty log bitmap is cleared, and + * before dirty guest memory pages are read. If you are using + * #DirtyBitmapSnapshot, memory_region_snapshot_and_clear_dirty() takes + * care of doing this. + */ +void memory_global_after_dirty_log_sync(void); + +/** + * memory_region_transaction_begin: Start a transaction. + * + * During a transaction, changes will be accumulated and made visible + * only when the transaction ends (is committed). + */ +void memory_region_transaction_begin(void); + +/** + * memory_region_transaction_commit: Commit a transaction and make changes + * visible to the guest. + */ +void memory_region_transaction_commit(void); + +/** + * memory_listener_register: register callbacks to be called when memory + * sections are mapped or unmapped into an address + * space + * + * @listener: an object containing the callbacks to be called + * @filter: if non-%NULL, only regions in this address space will be observed + */ +void memory_listener_register(MemoryListener *listener, AddressSpace *filter); + +/** + * memory_listener_unregister: undo the effect of memory_listener_register() + * + * @listener: an object containing the callbacks to be removed + */ +void memory_listener_unregister(MemoryListener *listener); + +/** + * memory_global_dirty_log_start: begin dirty logging for all regions + */ +void memory_global_dirty_log_start(void); + +/** + * memory_global_dirty_log_stop: end dirty logging for all regions + */ +void memory_global_dirty_log_stop(void); + +void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled); + +/** + * memory_region_dispatch_read: perform a read directly to the specified + * MemoryRegion. + * + * @mr: #MemoryRegion to access + * @addr: address within that region + * @pval: pointer to uint64_t which the data is written to + * @op: size, sign, and endianness of the memory operation + * @attrs: memory transaction attributes to use for the access + */ +MemTxResult memory_region_dispatch_read(MemoryRegion *mr, + hwaddr addr, + uint64_t *pval, + MemOp op, + MemTxAttrs attrs); +/** + * memory_region_dispatch_write: perform a write directly to the specified + * MemoryRegion. + * + * @mr: #MemoryRegion to access + * @addr: address within that region + * @data: data to write + * @op: size, sign, and endianness of the memory operation + * @attrs: memory transaction attributes to use for the access + */ +MemTxResult memory_region_dispatch_write(MemoryRegion *mr, + hwaddr addr, + uint64_t data, + MemOp op, + MemTxAttrs attrs); + +/** + * address_space_init: initializes an address space + * + * @as: an uninitialized #AddressSpace + * @root: a #MemoryRegion that routes addresses for the address space + * @name: an address space name. The name is only used for debugging + * output. + */ +void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name); + +/** + * address_space_destroy: destroy an address space + * + * Releases all resources associated with an address space. After an address space + * is destroyed, its root memory region (given by address_space_init()) may be destroyed + * as well. + * + * @as: address space to be destroyed + */ +void address_space_destroy(AddressSpace *as); + +/** + * address_space_remove_listeners: unregister all listeners of an address space + * + * Removes all callbacks previously registered with memory_listener_register() + * for @as. + * + * @as: an initialized #AddressSpace + */ +void address_space_remove_listeners(AddressSpace *as); + +/** + * address_space_rw: read from or write to an address space. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @attrs: memory transaction attributes + * @buf: buffer with the data transferred + * @len: the number of bytes to read or write + * @is_write: indicates the transfer direction + */ +MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, void *buf, + hwaddr len, bool is_write); + +/** + * address_space_write: write to address space. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @attrs: memory transaction attributes + * @buf: buffer with the data transferred + * @len: the number of bytes to write + */ +MemTxResult address_space_write(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, hwaddr len); + +/** + * address_space_write_rom: write to address space, including ROM. + * + * This function writes to the specified address space, but will + * write data to both ROM and RAM. This is used for non-guest + * writes like writes from the gdb debug stub or initial loading + * of ROM contents. + * + * Note that portions of the write which attempt to write data to + * a device will be silently ignored -- only real RAM and ROM will + * be written to. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @attrs: memory transaction attributes + * @buf: buffer with the data transferred + * @len: the number of bytes to write + */ +MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, hwaddr len); + +/* address_space_ld*: load from an address space + * address_space_st*: store to an address space + * + * These functions perform a load or store of the byte, word, + * longword or quad to the specified address within the AddressSpace. + * The _le suffixed functions treat the data as little endian; + * _be indicates big endian; no suffix indicates "same endianness + * as guest CPU". + * + * The "guest CPU endianness" accessors are deprecated for use outside + * target-* code; devices should be CPU-agnostic and use either the LE + * or the BE accessors. + * + * @as #AddressSpace to be accessed + * @addr: address within that address space + * @val: data value, for stores + * @attrs: memory transaction attributes + * @result: location to write the success/failure of the transaction; + * if NULL, this information is discarded + */ + +#define SUFFIX +#define ARG1 as +#define ARG1_DECL AddressSpace *as +#include "exec/memory_ldst.h.inc" + +#define SUFFIX +#define ARG1 as +#define ARG1_DECL AddressSpace *as +#include "exec/memory_ldst_phys.h.inc" + +struct MemoryRegionCache { + void *ptr; + hwaddr xlat; + hwaddr len; + FlatView *fv; + MemoryRegionSection mrs; + bool is_write; +}; + +#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL }) + + +/* address_space_ld*_cached: load from a cached #MemoryRegion + * address_space_st*_cached: store into a cached #MemoryRegion + * + * These functions perform a load or store of the byte, word, + * longword or quad to the specified address. The address is + * a physical address in the AddressSpace, but it must lie within + * a #MemoryRegion that was mapped with address_space_cache_init. + * + * The _le suffixed functions treat the data as little endian; + * _be indicates big endian; no suffix indicates "same endianness + * as guest CPU". + * + * The "guest CPU endianness" accessors are deprecated for use outside + * target-* code; devices should be CPU-agnostic and use either the LE + * or the BE accessors. + * + * @cache: previously initialized #MemoryRegionCache to be accessed + * @addr: address within the address space + * @val: data value, for stores + * @attrs: memory transaction attributes + * @result: location to write the success/failure of the transaction; + * if NULL, this information is discarded + */ + +#define SUFFIX _cached_slow +#define ARG1 cache +#define ARG1_DECL MemoryRegionCache *cache +#include "exec/memory_ldst.h.inc" + +/* Inline fast path for direct RAM access. */ +static inline uint8_t address_space_ldub_cached(MemoryRegionCache *cache, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len); + if (likely(cache->ptr)) { + return ldub_p(cache->ptr + addr); + } else { + return address_space_ldub_cached_slow(cache, addr, attrs, result); + } +} + +static inline void address_space_stb_cached(MemoryRegionCache *cache, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len); + if (likely(cache->ptr)) { + stb_p(cache->ptr + addr, val); + } else { + address_space_stb_cached_slow(cache, addr, val, attrs, result); + } +} + +#define ENDIANNESS _le +#include "exec/memory_ldst_cached.h.inc" + +#define ENDIANNESS _be +#include "exec/memory_ldst_cached.h.inc" + +#define SUFFIX _cached +#define ARG1 cache +#define ARG1_DECL MemoryRegionCache *cache +#include "exec/memory_ldst_phys.h.inc" + +/* address_space_cache_init: prepare for repeated access to a physical + * memory region + * + * @cache: #MemoryRegionCache to be filled + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @len: length of buffer + * @is_write: indicates the transfer direction + * + * Will only work with RAM, and may map a subset of the requested range by + * returning a value that is less than @len. On failure, return a negative + * errno value. + * + * Because it only works with RAM, this function can be used for + * read-modify-write operations. In this case, is_write should be %true. + * + * Note that addresses passed to the address_space_*_cached functions + * are relative to @addr. + */ +int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpace *as, + hwaddr addr, + hwaddr len, + bool is_write); + +/** + * address_space_cache_invalidate: complete a write to a #MemoryRegionCache + * + * @cache: The #MemoryRegionCache to operate on. + * @addr: The first physical address that was written, relative to the + * address that was passed to @address_space_cache_init. + * @access_len: The number of bytes that were written starting at @addr. + */ +void address_space_cache_invalidate(MemoryRegionCache *cache, + hwaddr addr, + hwaddr access_len); + +/** + * address_space_cache_destroy: free a #MemoryRegionCache + * + * @cache: The #MemoryRegionCache whose memory should be released. + */ +void address_space_cache_destroy(MemoryRegionCache *cache); + +/* address_space_get_iotlb_entry: translate an address into an IOTLB + * entry. Should be called from an RCU critical section. + */ +IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, + bool is_write, MemTxAttrs attrs); + +/* address_space_translate: translate an address range into an address space + * into a MemoryRegion and an address range into that section. Should be + * called from an RCU critical section, to avoid that the last reference + * to the returned region disappears after address_space_translate returns. + * + * @fv: #FlatView to be accessed + * @addr: address within that address space + * @xlat: pointer to address within the returned memory region section's + * #MemoryRegion. + * @len: pointer to length + * @is_write: indicates the transfer direction + * @attrs: memory attributes + */ +MemoryRegion *flatview_translate(FlatView *fv, + hwaddr addr, hwaddr *xlat, + hwaddr *len, bool is_write, + MemTxAttrs attrs); + +static inline MemoryRegion *address_space_translate(AddressSpace *as, + hwaddr addr, hwaddr *xlat, + hwaddr *len, bool is_write, + MemTxAttrs attrs) +{ + return flatview_translate(address_space_to_flatview(as), + addr, xlat, len, is_write, attrs); +} + +/* address_space_access_valid: check for validity of accessing an address + * space range + * + * Check whether memory is assigned to the given address space range, and + * access is permitted by any IOMMU regions that are active for the address + * space. + * + * For now, addr and len should be aligned to a page size. This limitation + * will be lifted in the future. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @len: length of the area to be checked + * @is_write: indicates the transfer direction + * @attrs: memory attributes + */ +bool address_space_access_valid(AddressSpace *as, hwaddr addr, hwaddr len, + bool is_write, MemTxAttrs attrs); + +/* address_space_map: map a physical memory region into a host virtual address + * + * May map a subset of the requested range, given by and returned in @plen. + * May return %NULL and set *@plen to zero(0), if resources needed to perform + * the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. + * Use cpu_register_map_client() to know when retrying the map operation is + * likely to succeed. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @plen: pointer to length of buffer; updated on return + * @is_write: indicates the transfer direction + * @attrs: memory attributes + */ +void *address_space_map(AddressSpace *as, hwaddr addr, + hwaddr *plen, bool is_write, MemTxAttrs attrs); + +/* address_space_unmap: Unmaps a memory region previously mapped by address_space_map() + * + * Will also mark the memory as dirty if @is_write == %true. @access_len gives + * the amount of memory that was actually read or written by the caller. + * + * @as: #AddressSpace used + * @buffer: host pointer as returned by address_space_map() + * @len: buffer length as returned by address_space_map() + * @access_len: amount of data actually transferred + * @is_write: indicates the transfer direction + */ +void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + bool is_write, hwaddr access_len); + + +/* Internal functions, part of the implementation of address_space_read. */ +MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, void *buf, hwaddr len); +MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, void *buf, + hwaddr len, hwaddr addr1, hwaddr l, + MemoryRegion *mr); +void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr); + +/* Internal functions, part of the implementation of address_space_read_cached + * and address_space_write_cached. */ +MemTxResult address_space_read_cached_slow(MemoryRegionCache *cache, + hwaddr addr, void *buf, hwaddr len); +MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, + hwaddr addr, const void *buf, + hwaddr len); + +static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) +{ + if (is_write) { + return memory_region_is_ram(mr) && !mr->readonly && + !mr->rom_device && !memory_region_is_ram_device(mr); + } else { + return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) || + memory_region_is_romd(mr); + } +} + +/** + * address_space_read: read from an address space. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, + * IOMMU fault). Called within RCU critical section. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @attrs: memory transaction attributes + * @buf: buffer with the data transferred + * @len: length of the data transferred + */ +static inline __attribute__((__always_inline__)) +MemTxResult address_space_read(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, void *buf, + hwaddr len) +{ + MemTxResult result = MEMTX_OK; + hwaddr l, addr1; + void *ptr; + MemoryRegion *mr; + FlatView *fv; + + if (__builtin_constant_p(len)) { + if (len) { + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); + if (len == l && memory_access_is_direct(mr, false)) { + ptr = qemu_map_ram_ptr(mr->ram_block, addr1); + memcpy(buf, ptr, len); + } else { + result = flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } + } + } else { + result = address_space_read_full(as, addr, attrs, buf, len); + } + return result; +} + +/** + * address_space_read_cached: read from a cached RAM region + * + * @cache: Cached region to be addressed + * @addr: address relative to the base of the RAM region + * @buf: buffer with the data transferred + * @len: length of the data transferred + */ +static inline MemTxResult +address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, + void *buf, hwaddr len) +{ + assert(addr < cache->len && len <= cache->len - addr); + fuzz_dma_read_cb(cache->xlat + addr, len, cache->mrs.mr, false); + if (likely(cache->ptr)) { + memcpy(buf, cache->ptr + addr, len); + return MEMTX_OK; + } else { + return address_space_read_cached_slow(cache, addr, buf, len); + } +} + +/** + * address_space_write_cached: write to a cached RAM region + * + * @cache: Cached region to be addressed + * @addr: address relative to the base of the RAM region + * @buf: buffer with the data transferred + * @len: length of the data transferred + */ +static inline MemTxResult +address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, + const void *buf, hwaddr len) +{ + assert(addr < cache->len && len <= cache->len - addr); + if (likely(cache->ptr)) { + memcpy(cache->ptr + addr, buf, len); + return MEMTX_OK; + } else { + return address_space_write_cached_slow(cache, addr, buf, len); + } +} + +#ifdef NEED_CPU_H +/* enum device_endian to MemOp. */ +static inline MemOp devend_memop(enum device_endian end) +{ + QEMU_BUILD_BUG_ON(DEVICE_HOST_ENDIAN != DEVICE_LITTLE_ENDIAN && + DEVICE_HOST_ENDIAN != DEVICE_BIG_ENDIAN); + +#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN) + /* Swap if non-host endianness or native (target) endianness */ + return (end == DEVICE_HOST_ENDIAN) ? 0 : MO_BSWAP; +#else + const int non_host_endianness = + DEVICE_LITTLE_ENDIAN ^ DEVICE_BIG_ENDIAN ^ DEVICE_HOST_ENDIAN; + + /* In this case, native (target) endianness needs no swap. */ + return (end == non_host_endianness) ? MO_BSWAP : 0; +#endif +} +#endif + +/* + * Inhibit technologies that require discarding of pages in RAM blocks, e.g., + * to manage the actual amount of memory consumed by the VM (then, the memory + * provided by RAM blocks might be bigger than the desired memory consumption). + * This *must* be set if: + * - Discarding parts of a RAM blocks does not result in the change being + * reflected in the VM and the pages getting freed. + * - All memory in RAM blocks is pinned or duplicated, invaldiating any previous + * discards blindly. + * - Discarding parts of a RAM blocks will result in integrity issues (e.g., + * encrypted VMs). + * Technologies that only temporarily pin the current working set of a + * driver are fine, because we don't expect such pages to be discarded + * (esp. based on guest action like balloon inflation). + * + * This is *not* to be used to protect from concurrent discards (esp., + * postcopy). + * + * Returns 0 if successful. Returns -EBUSY if a technology that relies on + * discards to work reliably is active. + */ +int ram_block_discard_disable(bool state); + +/* + * Inhibit technologies that disable discarding of pages in RAM blocks. + * + * Returns 0 if successful. Returns -EBUSY if discards are already set to + * broken. + */ +int ram_block_discard_require(bool state); + +/* + * Test if discarding of memory in ram blocks is disabled. + */ +bool ram_block_discard_is_disabled(void); + +/* + * Test if discarding of memory in ram blocks is required to work reliably. + */ +bool ram_block_discard_is_required(void); + +#endif + +#endif diff --git a/include/exec/memory_ldst.h.inc b/include/exec/memory_ldst.h.inc new file mode 100644 index 000000000..46e6c220d --- /dev/null +++ b/include/exec/memory_ldst.h.inc @@ -0,0 +1,71 @@ +/* + * Physical memory access templates + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2015 Linaro, Inc. + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifdef TARGET_ENDIANNESS +extern uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stw, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stl, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stq, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); +#else +extern uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stb, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stw_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stw_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stl_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stl_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stq_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); +extern void glue(address_space_stq_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); +#endif + +#undef ARG1_DECL +#undef ARG1 +#undef SUFFIX +#undef TARGET_ENDIANNESS diff --git a/include/exec/memory_ldst_cached.h.inc b/include/exec/memory_ldst_cached.h.inc new file mode 100644 index 000000000..01efad62d --- /dev/null +++ b/include/exec/memory_ldst_cached.h.inc @@ -0,0 +1,111 @@ +/* + * Memory access templates for MemoryRegionCache + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#define ADDRESS_SPACE_LD_CACHED(size) \ + glue(glue(address_space_ld, size), glue(ENDIANNESS, _cached)) +#define ADDRESS_SPACE_LD_CACHED_SLOW(size) \ + glue(glue(address_space_ld, size), glue(ENDIANNESS, _cached_slow)) +#define LD_P(size) \ + glue(glue(ld, size), glue(ENDIANNESS, _p)) + +static inline uint32_t ADDRESS_SPACE_LD_CACHED(l)(MemoryRegionCache *cache, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 4 <= cache->len - addr); + fuzz_dma_read_cb(cache->xlat + addr, 4, cache->mrs.mr, false); + if (likely(cache->ptr)) { + return LD_P(l)(cache->ptr + addr); + } else { + return ADDRESS_SPACE_LD_CACHED_SLOW(l)(cache, addr, attrs, result); + } +} + +static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 8 <= cache->len - addr); + fuzz_dma_read_cb(cache->xlat + addr, 8, cache->mrs.mr, false); + if (likely(cache->ptr)) { + return LD_P(q)(cache->ptr + addr); + } else { + return ADDRESS_SPACE_LD_CACHED_SLOW(q)(cache, addr, attrs, result); + } +} + +static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 2 <= cache->len - addr); + fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr, false); + if (likely(cache->ptr)) { + return LD_P(uw)(cache->ptr + addr); + } else { + return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result); + } +} + +#undef ADDRESS_SPACE_LD_CACHED +#undef ADDRESS_SPACE_LD_CACHED_SLOW +#undef LD_P + +#define ADDRESS_SPACE_ST_CACHED(size) \ + glue(glue(address_space_st, size), glue(ENDIANNESS, _cached)) +#define ADDRESS_SPACE_ST_CACHED_SLOW(size) \ + glue(glue(address_space_st, size), glue(ENDIANNESS, _cached_slow)) +#define ST_P(size) \ + glue(glue(st, size), glue(ENDIANNESS, _p)) + +static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 4 <= cache->len - addr); + if (likely(cache->ptr)) { + ST_P(l)(cache->ptr + addr, val); + } else { + ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result); + } +} + +static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 2 <= cache->len - addr); + if (likely(cache->ptr)) { + ST_P(w)(cache->ptr + addr, val); + } else { + ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result); + } +} + +static inline void ADDRESS_SPACE_ST_CACHED(q)(MemoryRegionCache *cache, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 8 <= cache->len - addr); + if (likely(cache->ptr)) { + ST_P(q)(cache->ptr + addr, val); + } else { + ADDRESS_SPACE_ST_CACHED_SLOW(q)(cache, addr, val, attrs, result); + } +} + +#undef ADDRESS_SPACE_ST_CACHED +#undef ADDRESS_SPACE_ST_CACHED_SLOW +#undef ST_P + +#undef ENDIANNESS diff --git a/include/exec/memory_ldst_phys.h.inc b/include/exec/memory_ldst_phys.h.inc new file mode 100644 index 000000000..b9dd53c38 --- /dev/null +++ b/include/exec/memory_ldst_phys.h.inc @@ -0,0 +1,147 @@ +/* + * Physical memory access templates + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2015 Linaro, Inc. + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifdef TARGET_ENDIANNESS +static inline uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} +#else +static inline uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint32_t glue(ldl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint64_t glue(ldq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldub, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stb, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline void glue(stq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} +#endif + +#undef ARG1_DECL +#undef ARG1 +#undef SUFFIX +#undef TARGET_ENDIANNESS diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h new file mode 100644 index 000000000..4834a9e2f --- /dev/null +++ b/include/exec/plugin-gen.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * plugin-gen.h - TCG-dependent definitions for generating plugin code + * + * This header should be included only from plugin.c and C files that emit + * TCG code. + */ +#ifndef QEMU_PLUGIN_GEN_H +#define QEMU_PLUGIN_GEN_H + +#include "qemu/plugin.h" +#include "tcg/tcg.h" + +struct DisasContextBase; + +#ifdef CONFIG_PLUGIN + +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb); +void plugin_gen_tb_end(CPUState *cpu); +void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); +void plugin_gen_insn_end(void); + +void plugin_gen_disable_mem_helpers(void); +void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info); + +static inline void plugin_insn_append(const void *from, size_t size) +{ + struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn; + + if (insn == NULL) { + return; + } + + insn->data = g_byte_array_append(insn->data, from, size); +} + +#else /* !CONFIG_PLUGIN */ + +static inline +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) +{ + return false; +} + +static inline +void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db) +{ } + +static inline void plugin_gen_insn_end(void) +{ } + +static inline void plugin_gen_tb_end(CPUState *cpu) +{ } + +static inline void plugin_gen_disable_mem_helpers(void) +{ } + +static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info) +{ } + +static inline void plugin_insn_append(const void *from, size_t size) +{ } + +#endif /* CONFIG_PLUGIN */ + +#endif /* QEMU_PLUGIN_GEN_H */ + diff --git a/include/exec/poison.h b/include/exec/poison.h new file mode 100644 index 000000000..7b9ac361d --- /dev/null +++ b/include/exec/poison.h @@ -0,0 +1,96 @@ +/* Poison identifiers that should not be used when building + target independent device code. */ + +#ifndef HW_POISON_H +#define HW_POISON_H +#ifdef __GNUC__ + +#pragma GCC poison TARGET_I386 +#pragma GCC poison TARGET_X86_64 +#pragma GCC poison TARGET_AARCH64 +#pragma GCC poison TARGET_ALPHA +#pragma GCC poison TARGET_ARM +#pragma GCC poison TARGET_CRIS +#pragma GCC poison TARGET_HPPA +#pragma GCC poison TARGET_LM32 +#pragma GCC poison TARGET_M68K +#pragma GCC poison TARGET_MICROBLAZE +#pragma GCC poison TARGET_MIPS +#pragma GCC poison TARGET_ABI_MIPSN32 +#pragma GCC poison TARGET_ABI_MIPSO32 +#pragma GCC poison TARGET_MIPS64 +#pragma GCC poison TARGET_ABI_MIPSN64 +#pragma GCC poison TARGET_MOXIE +#pragma GCC poison TARGET_NIOS2 +#pragma GCC poison TARGET_OPENRISC +#pragma GCC poison TARGET_PPC +#pragma GCC poison TARGET_PPC64 +#pragma GCC poison TARGET_ABI32 +#pragma GCC poison TARGET_RX +#pragma GCC poison TARGET_S390X +#pragma GCC poison TARGET_SH4 +#pragma GCC poison TARGET_SPARC +#pragma GCC poison TARGET_SPARC64 +#pragma GCC poison TARGET_TILEGX +#pragma GCC poison TARGET_TRICORE +#pragma GCC poison TARGET_UNICORE32 +#pragma GCC poison TARGET_XTENSA + +#pragma GCC poison TARGET_ALIGNED_ONLY +#pragma GCC poison TARGET_HAS_BFLT +#pragma GCC poison TARGET_NAME +#pragma GCC poison TARGET_SUPPORTS_MTTCG +#pragma GCC poison TARGET_WORDS_BIGENDIAN +#pragma GCC poison BSWAP_NEEDED + +#pragma GCC poison TARGET_LONG_BITS +#pragma GCC poison TARGET_FMT_lx +#pragma GCC poison TARGET_FMT_ld +#pragma GCC poison TARGET_FMT_lu + +#pragma GCC poison TARGET_PAGE_SIZE +#pragma GCC poison TARGET_PAGE_MASK +#pragma GCC poison TARGET_PAGE_BITS +#pragma GCC poison TARGET_PAGE_ALIGN + +#pragma GCC poison CPUArchState + +#pragma GCC poison CPU_INTERRUPT_HARD +#pragma GCC poison CPU_INTERRUPT_EXITTB +#pragma GCC poison CPU_INTERRUPT_HALT +#pragma GCC poison CPU_INTERRUPT_DEBUG +#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0 +#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1 +#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2 +#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3 +#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4 +#pragma GCC poison CPU_INTERRUPT_TGT_INT_0 +#pragma GCC poison CPU_INTERRUPT_TGT_INT_1 +#pragma GCC poison CPU_INTERRUPT_TGT_INT_2 + +#pragma GCC poison CONFIG_ALPHA_DIS +#pragma GCC poison CONFIG_ARM_A64_DIS +#pragma GCC poison CONFIG_ARM_DIS +#pragma GCC poison CONFIG_CRIS_DIS +#pragma GCC poison CONFIG_HPPA_DIS +#pragma GCC poison CONFIG_I386_DIS +#pragma GCC poison CONFIG_LM32_DIS +#pragma GCC poison CONFIG_M68K_DIS +#pragma GCC poison CONFIG_MICROBLAZE_DIS +#pragma GCC poison CONFIG_MIPS_DIS +#pragma GCC poison CONFIG_NANOMIPS_DIS +#pragma GCC poison CONFIG_MOXIE_DIS +#pragma GCC poison CONFIG_NIOS2_DIS +#pragma GCC poison CONFIG_PPC_DIS +#pragma GCC poison CONFIG_RISCV_DIS +#pragma GCC poison CONFIG_S390_DIS +#pragma GCC poison CONFIG_SH4_DIS +#pragma GCC poison CONFIG_SPARC_DIS +#pragma GCC poison CONFIG_XTENSA_DIS + +#pragma GCC poison CONFIG_LINUX_USER +#pragma GCC poison CONFIG_KVM +#pragma GCC poison CONFIG_SOFTMMU + +#endif +#endif diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h new file mode 100644 index 000000000..c6d2ef1d0 --- /dev/null +++ b/include/exec/ram_addr.h @@ -0,0 +1,516 @@ +/* + * Declarations for cpu physical memory functions + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +/* + * This header is for use by exec.c and memory.c ONLY. Do not include it. + * The functions declared here will be removed soon. + */ + +#ifndef RAM_ADDR_H +#define RAM_ADDR_H + +#ifndef CONFIG_USER_ONLY +#include "cpu.h" +#include "sysemu/xen.h" +#include "sysemu/tcg.h" +#include "exec/ramlist.h" +#include "exec/ramblock.h" + +/** + * clear_bmap_size: calculate clear bitmap size + * + * @pages: number of guest pages + * @shift: guest page number shift + * + * Returns: number of bits for the clear bitmap + */ +static inline long clear_bmap_size(uint64_t pages, uint8_t shift) +{ + return DIV_ROUND_UP(pages, 1UL << shift); +} + +/** + * clear_bmap_set: set clear bitmap for the page range + * + * @rb: the ramblock to operate on + * @start: the start page number + * @size: number of pages to set in the bitmap + * + * Returns: None + */ +static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, + uint64_t npages) +{ + uint8_t shift = rb->clear_bmap_shift; + + bitmap_set_atomic(rb->clear_bmap, start >> shift, + clear_bmap_size(npages, shift)); +} + +/** + * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set + * + * @rb: the ramblock to operate on + * @page: the page number to check + * + * Returns: true if the bit was set, false otherwise + */ +static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) +{ + uint8_t shift = rb->clear_bmap_shift; + + return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); +} + +static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) +{ + return (b && b->host && offset < b->used_length) ? true : false; +} + +static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) +{ + assert(offset_in_ramblock(block, offset)); + return (char *)block->host + offset; +} + +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, + RAMBlock *rb) +{ + uint64_t host_addr_offset = + (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); + return host_addr_offset >> TARGET_PAGE_BITS; +} + +bool ramblock_is_pmem(RAMBlock *rb); + +long qemu_minrampagesize(void); +long qemu_maxrampagesize(void); + +/** + * qemu_ram_alloc_from_file, + * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing + * file or device + * + * Parameters: + * @size: the size in bytes of the ram block + * @mr: the memory region where the ram block is + * @ram_flags: specify the properties of the ram block, which can be one + * or bit-or of following values + * - RAM_SHARED: mmap the backing file or device with MAP_SHARED + * - RAM_PMEM: the backend @mem_path or @fd is persistent memory + * Other bits are ignored. + * @mem_path or @fd: specify the backing file or device + * @errp: pointer to Error*, to store an error if it happens + * + * Return: + * On success, return a pointer to the ram block. + * On failure, return NULL. + */ +RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + uint32_t ram_flags, const char *mem_path, + Error **errp); +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + uint32_t ram_flags, int fd, + Error **errp); + +RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + MemoryRegion *mr, Error **errp); +RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, + Error **errp); +RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + MemoryRegion *mr, Error **errp); +void qemu_ram_free(RAMBlock *block); + +int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); + +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); + +/* Clear whole block of mem */ +static inline void qemu_ram_block_writeback(RAMBlock *block) +{ + qemu_ram_msync(block, 0, block->used_length); +} + +#define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) +#define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) + +void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); + +static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long end, page; + unsigned long idx, offset, base; + bool dirty = false; + + assert(client < DIRTY_MEMORY_NUM); + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + WITH_RCU_READ_LOCK_GUARD() { + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_bit(blocks->blocks[idx], + num, offset); + if (found < num) { + dirty = true; + break; + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + } + + return dirty; +} + +static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long end, page; + unsigned long idx, offset, base; + bool dirty = true; + + assert(client < DIRTY_MEMORY_NUM); + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + RCU_READ_LOCK_GUARD(); + + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); + if (found < num) { + dirty = false; + break; + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + + return dirty; +} + +static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, + unsigned client) +{ + return cpu_physical_memory_get_dirty(addr, 1, client); +} + +static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) +{ + bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); + bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); + bool migration = + cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); + return !(vga && code && migration); +} + +static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, + ram_addr_t length, + uint8_t mask) +{ + uint8_t ret = 0; + + if (mask & (1 << DIRTY_MEMORY_VGA) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { + ret |= (1 << DIRTY_MEMORY_VGA); + } + if (mask & (1 << DIRTY_MEMORY_CODE) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { + ret |= (1 << DIRTY_MEMORY_CODE); + } + if (mask & (1 << DIRTY_MEMORY_MIGRATION) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { + ret |= (1 << DIRTY_MEMORY_MIGRATION); + } + return ret; +} + +static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, + unsigned client) +{ + unsigned long page, idx, offset; + DirtyMemoryBlocks *blocks; + + assert(client < DIRTY_MEMORY_NUM); + + page = addr >> TARGET_PAGE_BITS; + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + + RCU_READ_LOCK_GUARD(); + + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + set_bit_atomic(offset, blocks->blocks[idx]); +} + +static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, + ram_addr_t length, + uint8_t mask) +{ + DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; + unsigned long end, page; + unsigned long idx, offset, base; + int i; + + if (!mask && !xen_enabled()) { + return; + } + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); + } + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + + if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], + offset, next - page); + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + } + + xen_hvm_modified_memory(start, length); +} + +#if !defined(_WIN32) +static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, + ram_addr_t start, + ram_addr_t pages) +{ + unsigned long i, j; + unsigned long page_number, c; + hwaddr addr; + ram_addr_t ram_addr; + unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; + unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE; + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + + /* start address is aligned at the start of a word? */ + if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && + (hpratio == 1)) { + unsigned long **blocks[DIRTY_MEMORY_NUM]; + unsigned long idx; + unsigned long offset; + long k; + long nr = BITS_TO_LONGS(pages); + + idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; + offset = BIT_WORD((start >> TARGET_PAGE_BITS) % + DIRTY_MEMORY_BLOCK_SIZE); + + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = + qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; + } + + for (k = 0; k < nr; k++) { + if (bitmap[k]) { + unsigned long temp = leul_to_cpu(bitmap[k]); + + qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); + + if (global_dirty_log) { + qatomic_or( + &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], + temp); + } + + if (tcg_enabled()) { + qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], + temp); + } + } + + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { + offset = 0; + idx++; + } + } + } + + xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); + } else { + uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; + + if (!global_dirty_log) { + clients &= ~(1 << DIRTY_MEMORY_MIGRATION); + } + + /* + * bitmap-traveling is faster than memory-traveling (for addr...) + * especially when most of the memory is not dirty. + */ + for (i = 0; i < len; i++) { + if (bitmap[i] != 0) { + c = leul_to_cpu(bitmap[i]); + do { + j = ctzl(c); + c &= ~(1ul << j); + page_number = (i * HOST_LONG_BITS + j) * hpratio; + addr = page_number * TARGET_PAGE_SIZE; + ram_addr = start + addr; + cpu_physical_memory_set_dirty_range(ram_addr, + TARGET_PAGE_SIZE * hpratio, clients); + } while (c != 0); + } + } + } +} +#endif /* not _WIN32 */ + +bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client); + +DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty + (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); + +bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, + ram_addr_t start, + ram_addr_t length); + +static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, + ram_addr_t length) +{ + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); +} + + +/* Called with RCU critical section */ +static inline +uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + ram_addr_t start, + ram_addr_t length) +{ + ram_addr_t addr; + unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); + uint64_t num_dirty = 0; + unsigned long *dest = rb->bmap; + + /* start address and length is aligned at the start of a word? */ + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == + (start + rb->offset) && + !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { + int k; + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); + unsigned long * const *src; + unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + + src = qatomic_rcu_read( + &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; + + for (k = page; k < page + nr; k++) { + if (src[idx][offset]) { + unsigned long bits = qatomic_xchg(&src[idx][offset], 0); + unsigned long new_dirty; + new_dirty = ~dest[k]; + dest[k] |= bits; + new_dirty &= bits; + num_dirty += ctpopl(new_dirty); + } + + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { + offset = 0; + idx++; + } + } + + if (rb->clear_bmap) { + /* + * Postpone the dirty bitmap clear to the point before we + * really send the pages, also we will split the clear + * dirty procedure into smaller chunks. + */ + clear_bmap_set(rb, start >> TARGET_PAGE_BITS, + length >> TARGET_PAGE_BITS); + } else { + /* Slow path - still do that in a huge chunk */ + memory_region_clear_dirty_bitmap(rb->mr, start, length); + } + } else { + ram_addr_t offset = rb->offset; + + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { + if (cpu_physical_memory_test_and_clear_dirty( + start + addr + offset, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { + long k = (start + addr) >> TARGET_PAGE_BITS; + if (!test_and_set_bit(k, dest)) { + num_dirty++; + } + } + } + } + + return num_dirty; +} +#endif +#endif diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h new file mode 100644 index 000000000..07d50864d --- /dev/null +++ b/include/exec/ramblock.h @@ -0,0 +1,64 @@ +/* + * Declarations for cpu physical memory functions + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +/* + * This header is for use by exec.c and memory.c ONLY. Do not include it. + * The functions declared here will be removed soon. + */ + +#ifndef QEMU_EXEC_RAMBLOCK_H +#define QEMU_EXEC_RAMBLOCK_H + +#ifndef CONFIG_USER_ONLY +#include "cpu-common.h" + +struct RAMBlock { + struct rcu_head rcu; + struct MemoryRegion *mr; + uint8_t *host; + uint8_t *colo_cache; /* For colo, VM's ram cache */ + ram_addr_t offset; + ram_addr_t used_length; + ram_addr_t max_length; + void (*resized)(const char*, uint64_t length, void *host); + uint32_t flags; + /* Protected by iothread lock. */ + char idstr[256]; + /* RCU-enabled, writes protected by the ramlist lock */ + QLIST_ENTRY(RAMBlock) next; + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + int fd; + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; + /* bitmap of already received pages in postcopy */ + unsigned long *receivedmap; + + /* + * bitmap to track already cleared dirty bitmap. When the bit is + * set, it means the corresponding memory chunk needs a log-clear. + * Set this up to non-NULL to enable the capability to postpone + * and split clearing of dirty bitmap on the remote node (e.g., + * KVM). The bitmap will be set only when doing global sync. + * + * NOTE: this bitmap is different comparing to the other bitmaps + * in that one bit can represent multiple guest pages (which is + * decided by the `clear_bmap_shift' variable below). On + * destination side, this should always be NULL, and the variable + * `clear_bmap_shift' is meaningless. + */ + unsigned long *clear_bmap; + uint8_t clear_bmap_shift; +}; +#endif +#endif diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h new file mode 100644 index 000000000..26704aa3b --- /dev/null +++ b/include/exec/ramlist.h @@ -0,0 +1,80 @@ +#ifndef RAMLIST_H +#define RAMLIST_H + +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "qemu/rcu.h" +#include "qemu/rcu_queue.h" + +typedef struct RAMBlockNotifier RAMBlockNotifier; + +#define DIRTY_MEMORY_VGA 0 +#define DIRTY_MEMORY_CODE 1 +#define DIRTY_MEMORY_MIGRATION 2 +#define DIRTY_MEMORY_NUM 3 /* num of dirty bits */ + +/* The dirty memory bitmap is split into fixed-size blocks to allow growth + * under RCU. The bitmap for a block can be accessed as follows: + * + * rcu_read_lock(); + * + * DirtyMemoryBlocks *blocks = + * qatomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]); + * + * ram_addr_t idx = (addr >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; + * unsigned long *block = blocks.blocks[idx]; + * ...access block bitmap... + * + * rcu_read_unlock(); + * + * Remember to check for the end of the block when accessing a range of + * addresses. Move on to the next block if you reach the end. + * + * Organization into blocks allows dirty memory to grow (but not shrink) under + * RCU. When adding new RAMBlocks requires the dirty memory to grow, a new + * DirtyMemoryBlocks array is allocated with pointers to existing blocks kept + * the same. Other threads can safely access existing blocks while dirty + * memory is being grown. When no threads are using the old DirtyMemoryBlocks + * anymore it is freed by RCU (but the underlying blocks stay because they are + * pointed to from the new DirtyMemoryBlocks). + */ +#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) +typedef struct { + struct rcu_head rcu; + unsigned long *blocks[]; +} DirtyMemoryBlocks; + +typedef struct RAMList { + QemuMutex mutex; + RAMBlock *mru_block; + /* RCU-enabled, writes protected by the ramlist lock. */ + QLIST_HEAD(, RAMBlock) blocks; + DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM]; + uint32_t version; + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; +} RAMList; +extern RAMList ram_list; + +/* Should be holding either ram_list.mutex, or the RCU lock. */ +#define INTERNAL_RAMBLOCK_FOREACH(block) \ + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) +/* Never use the INTERNAL_ version except for defining other macros */ +#define RAMBLOCK_FOREACH(block) INTERNAL_RAMBLOCK_FOREACH(block) + +void qemu_mutex_lock_ramlist(void); +void qemu_mutex_unlock_ramlist(void); + +struct RAMBlockNotifier { + void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size); + void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size); + QLIST_ENTRY(RAMBlockNotifier) next; +}; + +void ram_block_notifier_add(RAMBlockNotifier *n); +void ram_block_notifier_remove(RAMBlockNotifier *n); +void ram_block_notify_add(void *host, size_t size); +void ram_block_notify_remove(void *host, size_t size); + +void ram_block_dump(Monitor *mon); + +#endif /* RAMLIST_H */ diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h new file mode 100644 index 000000000..fbcae88f4 --- /dev/null +++ b/include/exec/softmmu-semi.h @@ -0,0 +1,101 @@ +/* + * Helper routines to provide target memory access for semihosting + * syscalls in system emulation mode. + * + * Copyright (c) 2007 CodeSourcery. + * + * This code is licensed under the GPL + */ + +#ifndef SOFTMMU_SEMI_H +#define SOFTMMU_SEMI_H + +#include "cpu.h" + +static inline uint64_t softmmu_tget64(CPUArchState *env, target_ulong addr) +{ + uint64_t val; + + cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 8, 0); + return tswap64(val); +} + +static inline uint32_t softmmu_tget32(CPUArchState *env, target_ulong addr) +{ + uint32_t val; + + cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 4, 0); + return tswap32(val); +} + +static inline uint32_t softmmu_tget8(CPUArchState *env, target_ulong addr) +{ + uint8_t val; + + cpu_memory_rw_debug(env_cpu(env), addr, &val, 1, 0); + return val; +} + +#define get_user_u64(arg, p) ({ arg = softmmu_tget64(env, p); 0; }) +#define get_user_u32(arg, p) ({ arg = softmmu_tget32(env, p) ; 0; }) +#define get_user_u8(arg, p) ({ arg = softmmu_tget8(env, p) ; 0; }) +#define get_user_ual(arg, p) get_user_u32(arg, p) + +static inline void softmmu_tput64(CPUArchState *env, + target_ulong addr, uint64_t val) +{ + val = tswap64(val); + cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 8, 1); +} + +static inline void softmmu_tput32(CPUArchState *env, + target_ulong addr, uint32_t val) +{ + val = tswap32(val); + cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 4, 1); +} +#define put_user_u64(arg, p) ({ softmmu_tput64(env, p, arg) ; 0; }) +#define put_user_u32(arg, p) ({ softmmu_tput32(env, p, arg) ; 0; }) +#define put_user_ual(arg, p) put_user_u32(arg, p) + +static void *softmmu_lock_user(CPUArchState *env, + target_ulong addr, target_ulong len, int copy) +{ + uint8_t *p; + /* TODO: Make this something that isn't fixed size. */ + p = malloc(len); + if (p && copy) { + cpu_memory_rw_debug(env_cpu(env), addr, p, len, 0); + } + return p; +} +#define lock_user(type, p, len, copy) softmmu_lock_user(env, p, len, copy) +static char *softmmu_lock_user_string(CPUArchState *env, target_ulong addr) +{ + char *p; + char *s; + uint8_t c; + /* TODO: Make this something that isn't fixed size. */ + s = p = malloc(1024); + if (!s) { + return NULL; + } + do { + cpu_memory_rw_debug(env_cpu(env), addr, &c, 1, 0); + addr++; + *(p++) = c; + } while (c); + return s; +} +#define lock_user_string(p) softmmu_lock_user_string(env, p) +static void softmmu_unlock_user(CPUArchState *env, void *p, target_ulong addr, + target_ulong len) +{ + if (len) { + cpu_memory_rw_debug(env_cpu(env), addr, p, len, 1); + } + free(p); +} +#define unlock_user(s, args, len) softmmu_unlock_user(env, s, args, len) + +#endif diff --git a/include/exec/target_page.h b/include/exec/target_page.h new file mode 100644 index 000000000..96726c36a --- /dev/null +++ b/include/exec/target_page.h @@ -0,0 +1,21 @@ +/* + * Target page sizes and friends for non target files + * + * Copyright (c) 2017 Red Hat Inc + * + * Authors: + * David Alan Gilbert + * Juan Quintela + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef EXEC_TARGET_PAGE_H +#define EXEC_TARGET_PAGE_H + +size_t qemu_target_page_size(void); +int qemu_target_page_bits(void); +int qemu_target_page_bits_min(void); + +#endif diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h new file mode 100644 index 000000000..ec4c13b45 --- /dev/null +++ b/include/exec/tb-context.h @@ -0,0 +1,42 @@ +/* + * Internal structs that QEMU exports to TCG + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QEMU_TB_CONTEXT_H +#define QEMU_TB_CONTEXT_H + +#include "qemu/thread.h" +#include "qemu/qht.h" + +#define CODE_GEN_HTABLE_BITS 15 +#define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS) + +typedef struct TranslationBlock TranslationBlock; +typedef struct TBContext TBContext; + +struct TBContext { + + struct qht htable; + + /* statistics */ + unsigned tb_flush_count; +}; + +extern TBContext tb_ctx; + +#endif diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h new file mode 100644 index 000000000..0a273d960 --- /dev/null +++ b/include/exec/tb-hash.h @@ -0,0 +1,69 @@ +/* + * internal execution defines for qemu + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef EXEC_TB_HASH_H +#define EXEC_TB_HASH_H + +#include "exec/cpu-defs.h" +#include "exec/exec-all.h" +#include "qemu/xxhash.h" + +#ifdef CONFIG_SOFTMMU + +/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for + addresses on the same page. The top bits are the same. This allows + TLB invalidation to quickly clear a subset of the hash table. */ +#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2) +#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS) +#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1) +#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE) + +static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc) +{ + target_ulong tmp; + tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); + return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK; +} + +static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +{ + target_ulong tmp; + tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); + return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK) + | (tmp & TB_JMP_ADDR_MASK)); +} + +#else + +/* In user-mode we can get better hashing because we do not have a TLB */ +static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +{ + return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1); +} + +#endif /* CONFIG_SOFTMMU */ + +static inline +uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, + uint32_t cf_mask, uint32_t trace_vcpu_dstate) +{ + return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); +} + +#endif diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h new file mode 100644 index 000000000..9cf475bb0 --- /dev/null +++ b/include/exec/tb-lookup.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef EXEC_TB_LOOKUP_H +#define EXEC_TB_LOOKUP_H + +#ifdef NEED_CPU_H +#include "cpu.h" +#else +#include "exec/poison.h" +#endif + +#include "exec/exec-all.h" +#include "exec/tb-hash.h" + +/* Might cause an exception, so have a longjmp destination ready */ +static inline TranslationBlock * +tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, + uint32_t *flags, uint32_t cf_mask) +{ + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + TranslationBlock *tb; + uint32_t hash; + + cpu_get_tb_cpu_state(env, pc, cs_base, flags); + hash = tb_jmp_cache_hash_func(*pc); + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); + + cf_mask &= ~CF_CLUSTER_MASK; + cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT; + + if (likely(tb && + tb->pc == *pc && + tb->cs_base == *cs_base && + tb->flags == *flags && + tb->trace_vcpu_dstate == *cpu->trace_dstate && + (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { + return tb; + } + tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask); + if (tb == NULL) { + return NULL; + } + qatomic_set(&cpu->tb_jmp_cache[hash], tb); + return tb; +} + +#endif /* EXEC_TB_LOOKUP_H */ diff --git a/include/exec/translator.h b/include/exec/translator.h new file mode 100644 index 000000000..638e1529c --- /dev/null +++ b/include/exec/translator.h @@ -0,0 +1,183 @@ +/* + * Generic intermediate code generation. + * + * Copyright (C) 2016-2017 Lluís Vilanova + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef EXEC__TRANSLATOR_H +#define EXEC__TRANSLATOR_H + +/* + * Include this header from a target-specific file, and add a + * + * DisasContextBase base; + * + * member in your target-specific DisasContext. + */ + + +#include "qemu/bswap.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/plugin-gen.h" +#include "tcg/tcg.h" + + +/** + * DisasJumpType: + * @DISAS_NEXT: Next instruction in program order. + * @DISAS_TOO_MANY: Too many instructions translated. + * @DISAS_NORETURN: Following code is dead. + * @DISAS_TARGET_*: Start of target-specific conditions. + * + * What instruction to disassemble next. + */ +typedef enum DisasJumpType { + DISAS_NEXT, + DISAS_TOO_MANY, + DISAS_NORETURN, + DISAS_TARGET_0, + DISAS_TARGET_1, + DISAS_TARGET_2, + DISAS_TARGET_3, + DISAS_TARGET_4, + DISAS_TARGET_5, + DISAS_TARGET_6, + DISAS_TARGET_7, + DISAS_TARGET_8, + DISAS_TARGET_9, + DISAS_TARGET_10, + DISAS_TARGET_11, +} DisasJumpType; + +/** + * DisasContextBase: + * @tb: Translation block for this disassembly. + * @pc_first: Address of first guest instruction in this TB. + * @pc_next: Address of next guest instruction in this TB (current during + * disassembly). + * @is_jmp: What instruction to disassemble next. + * @num_insns: Number of translated instructions (including current). + * @max_insns: Maximum number of instructions to be translated in this TB. + * @singlestep_enabled: "Hardware" single stepping enabled. + * + * Architecture-agnostic disassembly context. + */ +typedef struct DisasContextBase { + TranslationBlock *tb; + target_ulong pc_first; + target_ulong pc_next; + DisasJumpType is_jmp; + int num_insns; + int max_insns; + bool singlestep_enabled; +} DisasContextBase; + +/** + * TranslatorOps: + * @init_disas_context: + * Initialize the target-specific portions of DisasContext struct. + * The generic DisasContextBase has already been initialized. + * + * @tb_start: + * Emit any code required before the start of the main loop, + * after the generic gen_tb_start(). + * + * @insn_start: + * Emit the tcg_gen_insn_start opcode. + * + * @breakpoint_check: + * When called, the breakpoint has already been checked to match the PC, + * but the target may decide the breakpoint missed the address + * (e.g., due to conditions encoded in their flags). Return true to + * indicate that the breakpoint did hit, in which case no more breakpoints + * are checked. If the breakpoint did hit, emit any code required to + * signal the exception, and set db->is_jmp as necessary to terminate + * the main loop. + * + * @translate_insn: + * Disassemble one instruction and set db->pc_next for the start + * of the following instruction. Set db->is_jmp as necessary to + * terminate the main loop. + * + * @tb_stop: + * Emit any opcodes required to exit the TB, based on db->is_jmp. + * + * @disas_log: + * Print instruction disassembly to log. + */ +typedef struct TranslatorOps { + void (*init_disas_context)(DisasContextBase *db, CPUState *cpu); + void (*tb_start)(DisasContextBase *db, CPUState *cpu); + void (*insn_start)(DisasContextBase *db, CPUState *cpu); + bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu, + const CPUBreakpoint *bp); + void (*translate_insn)(DisasContextBase *db, CPUState *cpu); + void (*tb_stop)(DisasContextBase *db, CPUState *cpu); + void (*disas_log)(const DisasContextBase *db, CPUState *cpu); +} TranslatorOps; + +/** + * translator_loop: + * @ops: Target-specific operations. + * @db: Disassembly context. + * @cpu: Target vCPU. + * @tb: Translation block. + * @max_insns: Maximum number of insns to translate. + * + * Generic translator loop. + * + * Translation will stop in the following cases (in order): + * - When is_jmp set by #TranslatorOps::breakpoint_check. + * - set to DISAS_TOO_MANY exits after translating one more insn + * - set to any other value than DISAS_NEXT exits immediately. + * - When is_jmp set by #TranslatorOps::translate_insn. + * - set to any value other than DISAS_NEXT exits immediately. + * - When the TCG operation buffer is full. + * - When single-stepping is enabled (system-wide or on the current vCPU). + * - When too many instructions have been translated. + */ +void translator_loop(const TranslatorOps *ops, DisasContextBase *db, + CPUState *cpu, TranslationBlock *tb, int max_insns); + +void translator_loop_temp_check(DisasContextBase *db); + +/* + * Translator Load Functions + * + * These are intended to replace the direct usage of the cpu_ld*_code + * functions and are mandatory for front-ends that have been migrated + * to the common translator_loop. These functions are only intended + * to be called from the translation stage and should not be called + * from helper functions. Those functions should be converted to encode + * the relevant information at translation time. + */ + +#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ + static inline type \ + fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap) \ + { \ + type ret = load_fn(env, pc); \ + if (do_swap) { \ + ret = swap_fn(ret); \ + } \ + plugin_insn_append(&ret, sizeof(ret)); \ + return ret; \ + } \ + \ + static inline type fullname(CPUArchState *env, abi_ptr pc) \ + { \ + return fullname ## _swap(env, pc, false); \ + } + +GEN_TRANSLATOR_LD(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) +GEN_TRANSLATOR_LD(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) +GEN_TRANSLATOR_LD(translator_lduw, uint16_t, cpu_lduw_code, bswap16) +GEN_TRANSLATOR_LD(translator_ldl, uint32_t, cpu_ldl_code, bswap32) +GEN_TRANSLATOR_LD(translator_ldq, uint64_t, cpu_ldq_code, bswap64) +#undef GEN_TRANSLATOR_LD + +#endif /* EXEC__TRANSLATOR_H */ diff --git a/include/exec/user/abitypes.h b/include/exec/user/abitypes.h new file mode 100644 index 000000000..743b8bb9e --- /dev/null +++ b/include/exec/user/abitypes.h @@ -0,0 +1,71 @@ +#ifndef EXEC_USER_ABITYPES_H +#define EXEC_USER_ABITYPES_H + +#include "cpu.h" + +#ifdef TARGET_ABI32 +#define TARGET_ABI_BITS 32 +#else +#define TARGET_ABI_BITS TARGET_LONG_BITS +#endif + +#ifdef TARGET_M68K +#define ABI_INT_ALIGNMENT 2 +#define ABI_LONG_ALIGNMENT 2 +#define ABI_LLONG_ALIGNMENT 2 +#endif + +#if (defined(TARGET_I386) && !defined(TARGET_X86_64)) || defined(TARGET_SH4) +#define ABI_LLONG_ALIGNMENT 4 +#endif + +#ifndef ABI_SHORT_ALIGNMENT +#define ABI_SHORT_ALIGNMENT 2 +#endif +#ifndef ABI_INT_ALIGNMENT +#define ABI_INT_ALIGNMENT 4 +#endif +#ifndef ABI_LONG_ALIGNMENT +#define ABI_LONG_ALIGNMENT (TARGET_ABI_BITS / 8) +#endif +#ifndef ABI_LLONG_ALIGNMENT +#define ABI_LLONG_ALIGNMENT 8 +#endif + +typedef int16_t abi_short __attribute__ ((aligned(ABI_SHORT_ALIGNMENT))); +typedef uint16_t abi_ushort __attribute__((aligned(ABI_SHORT_ALIGNMENT))); +typedef int32_t abi_int __attribute__((aligned(ABI_INT_ALIGNMENT))); +typedef uint32_t abi_uint __attribute__((aligned(ABI_INT_ALIGNMENT))); +typedef int64_t abi_llong __attribute__((aligned(ABI_LLONG_ALIGNMENT))); +typedef uint64_t abi_ullong __attribute__((aligned(ABI_LLONG_ALIGNMENT))); + +#ifdef TARGET_ABI32 +typedef uint32_t abi_ulong __attribute__((aligned(ABI_LONG_ALIGNMENT))); +typedef int32_t abi_long __attribute__((aligned(ABI_LONG_ALIGNMENT))); +#define TARGET_ABI_FMT_lx "%08x" +#define TARGET_ABI_FMT_ld "%d" +#define TARGET_ABI_FMT_lu "%u" + +static inline abi_ulong tswapal(abi_ulong v) +{ + return tswap32(v); +} + +#else +typedef target_ulong abi_ulong __attribute__((aligned(ABI_LONG_ALIGNMENT))); +typedef target_long abi_long __attribute__((aligned(ABI_LONG_ALIGNMENT))); +#define TARGET_ABI_FMT_lx TARGET_FMT_lx +#define TARGET_ABI_FMT_ld TARGET_FMT_ld +#define TARGET_ABI_FMT_lu TARGET_FMT_lu +/* for consistency, define ABI32 too */ +#if TARGET_ABI_BITS == 32 +#define TARGET_ABI32 1 +#endif + +static inline abi_ulong tswapal(abi_ulong v) +{ + return tswapl(v); +} + +#endif +#endif diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h new file mode 100644 index 000000000..300a840d5 --- /dev/null +++ b/include/exec/user/thunk.h @@ -0,0 +1,203 @@ +/* + * Generic thunking code to convert data between host and target CPU + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef THUNK_H +#define THUNK_H + +#include "cpu.h" +#include "exec/user/abitypes.h" + +/* types enums definitions */ + +typedef enum argtype { + TYPE_NULL, + TYPE_CHAR, + TYPE_SHORT, + TYPE_INT, + TYPE_LONG, + TYPE_ULONG, + TYPE_PTRVOID, /* pointer on unknown data */ + TYPE_LONGLONG, + TYPE_ULONGLONG, + TYPE_PTR, + TYPE_ARRAY, + TYPE_STRUCT, + TYPE_OLDDEVT, +} argtype; + +#define MK_PTR(type) TYPE_PTR, type +#define MK_ARRAY(type, size) TYPE_ARRAY, (int)(size), type +#define MK_STRUCT(id) TYPE_STRUCT, id + +#define THUNK_TARGET 0 +#define THUNK_HOST 1 + +typedef struct { + /* standard struct handling */ + const argtype *field_types; + int nb_fields; + int *field_offsets[2]; + /* special handling */ + void (*convert[2])(void *dst, const void *src); + void (*print)(void *arg); + int size[2]; + int align[2]; + const char *name; +} StructEntry; + +/* Translation table for bitmasks... */ +typedef struct bitmask_transtbl { + unsigned int target_mask; + unsigned int target_bits; + unsigned int host_mask; + unsigned int host_bits; +} bitmask_transtbl; + +void thunk_register_struct(int id, const char *name, const argtype *types); +void thunk_register_struct_direct(int id, const char *name, + const StructEntry *se1); +const argtype *thunk_convert(void *dst, const void *src, + const argtype *type_ptr, int to_host); +const argtype *thunk_print(void *arg, const argtype *type_ptr); + +extern StructEntry *struct_entries; + +int thunk_type_size_array(const argtype *type_ptr, int is_host); +int thunk_type_align_array(const argtype *type_ptr, int is_host); + +static inline int thunk_type_size(const argtype *type_ptr, int is_host) +{ + int type, size; + const StructEntry *se; + + type = *type_ptr; + switch(type) { + case TYPE_CHAR: + return 1; + case TYPE_SHORT: + return 2; + case TYPE_INT: + return 4; + case TYPE_LONGLONG: + case TYPE_ULONGLONG: + return 8; + case TYPE_LONG: + case TYPE_ULONG: + case TYPE_PTRVOID: + case TYPE_PTR: + if (is_host) { + return sizeof(void *); + } else { + return TARGET_ABI_BITS / 8; + } + break; + case TYPE_OLDDEVT: + if (is_host) { +#if defined(HOST_X86_64) + return 8; +#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \ + defined(HOST_PARISC) || defined(HOST_SPARC64) + return 4; +#elif defined(HOST_PPC) + return sizeof(void *); +#else + return 2; +#endif + } else { +#if defined(TARGET_X86_64) + return 8; +#elif defined(TARGET_ALPHA) || defined(TARGET_IA64) || defined(TARGET_MIPS) || \ + defined(TARGET_PARISC) || defined(TARGET_SPARC64) + return 4; +#elif defined(TARGET_PPC) + return TARGET_ABI_BITS / 8; +#else + return 2; +#endif + } + break; + case TYPE_ARRAY: + size = type_ptr[1]; + return size * thunk_type_size_array(type_ptr + 2, is_host); + case TYPE_STRUCT: + se = struct_entries + type_ptr[1]; + return se->size[is_host]; + default: + g_assert_not_reached(); + } +} + +static inline int thunk_type_align(const argtype *type_ptr, int is_host) +{ + int type; + const StructEntry *se; + + type = *type_ptr; + switch(type) { + case TYPE_CHAR: + return 1; + case TYPE_SHORT: + if (is_host) { + return __alignof__(short); + } else { + return ABI_SHORT_ALIGNMENT; + } + case TYPE_INT: + if (is_host) { + return __alignof__(int); + } else { + return ABI_INT_ALIGNMENT; + } + case TYPE_LONGLONG: + case TYPE_ULONGLONG: + if (is_host) { + return __alignof__(long long); + } else { + return ABI_LLONG_ALIGNMENT; + } + case TYPE_LONG: + case TYPE_ULONG: + case TYPE_PTRVOID: + case TYPE_PTR: + if (is_host) { + return __alignof__(long); + } else { + return ABI_LONG_ALIGNMENT; + } + break; + case TYPE_OLDDEVT: + return thunk_type_size(type_ptr, is_host); + case TYPE_ARRAY: + return thunk_type_align_array(type_ptr + 2, is_host); + case TYPE_STRUCT: + se = struct_entries + type_ptr[1]; + return se->align[is_host]; + default: + g_assert_not_reached(); + } +} + +unsigned int target_to_host_bitmask(unsigned int target_mask, + const bitmask_transtbl * trans_tbl); +unsigned int host_to_target_bitmask(unsigned int host_mask, + const bitmask_transtbl * trans_tbl); + +void thunk_init(unsigned int max_structs); + +#endif diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h new file mode 100644 index 000000000..2f0674fbd --- /dev/null +++ b/include/fpu/softfloat-helpers.h @@ -0,0 +1,143 @@ +/* + * QEMU float support - standalone helpers + * + * This is provided for files that don't need the access to the full + * set of softfloat functions. Typically this is cpu initialisation + * code which wants to set default rounding and exceptions modes. + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#ifndef _SOFTFLOAT_HELPERS_H_ +#define _SOFTFLOAT_HELPERS_H_ + +#include "fpu/softfloat-types.h" + +static inline void set_float_detect_tininess(bool val, float_status *status) +{ + status->tininess_before_rounding = val; +} + +static inline void set_float_rounding_mode(FloatRoundMode val, + float_status *status) +{ + status->float_rounding_mode = val; +} + +static inline void set_float_exception_flags(int val, float_status *status) +{ + status->float_exception_flags = val; +} + +static inline void set_floatx80_rounding_precision(int val, + float_status *status) +{ + status->floatx80_rounding_precision = val; +} + +static inline void set_flush_to_zero(bool val, float_status *status) +{ + status->flush_to_zero = val; +} + +static inline void set_flush_inputs_to_zero(bool val, float_status *status) +{ + status->flush_inputs_to_zero = val; +} + +static inline void set_default_nan_mode(bool val, float_status *status) +{ + status->default_nan_mode = val; +} + +static inline void set_snan_bit_is_one(bool val, float_status *status) +{ + status->snan_bit_is_one = val; +} + +static inline void set_use_first_nan(bool val, float_status *status) +{ + status->use_first_nan = val; +} + +static inline void set_no_signaling_nans(bool val, float_status *status) +{ + status->no_signaling_nans = val; +} + +static inline bool get_float_detect_tininess(float_status *status) +{ + return status->tininess_before_rounding; +} + +static inline FloatRoundMode get_float_rounding_mode(float_status *status) +{ + return status->float_rounding_mode; +} + +static inline int get_float_exception_flags(float_status *status) +{ + return status->float_exception_flags; +} + +static inline int get_floatx80_rounding_precision(float_status *status) +{ + return status->floatx80_rounding_precision; +} + +static inline bool get_flush_to_zero(float_status *status) +{ + return status->flush_to_zero; +} + +static inline bool get_flush_inputs_to_zero(float_status *status) +{ + return status->flush_inputs_to_zero; +} + +static inline bool get_default_nan_mode(float_status *status) +{ + return status->default_nan_mode; +} + +#endif /* _SOFTFLOAT_HELPERS_H_ */ diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h new file mode 100644 index 000000000..a35ec2893 --- /dev/null +++ b/include/fpu/softfloat-macros.h @@ -0,0 +1,797 @@ +/* + * QEMU float support macros + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +#ifndef FPU_SOFTFLOAT_MACROS_H +#define FPU_SOFTFLOAT_MACROS_H + +#include "fpu/softfloat-types.h" + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 32, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +| The result is stored in the location pointed to by `zPtr'. +*----------------------------------------------------------------------------*/ + +static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) +{ + uint32_t z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 64, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +| The result is stored in the location pointed to by `zPtr'. +*----------------------------------------------------------------------------*/ + +static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) +{ + uint64_t z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +| _plus_ the number of bits given in `count'. The shifted result is at most +| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +| bits shifted off form a second 64-bit result as follows: The _last_ bit +| shifted off is the most-significant bit of the extra result, and the other +| 63 bits of the extra result are all zero if and only if _all_but_the_last_ +| bits shifted off were all zero. This extra result is stored in the location +| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. +| (This routine makes more sense if `a0' and `a1' are considered to form a +| fixed-point value with binary point between `a0' and `a1'. This fixed-point +| value is shifted right by the number of bits given in `count', and the +| integer part of the result is returned at the location pointed to by +| `z0Ptr'. The fractional part of the result may be slightly corrupted as +| described above, and is returned at the location pointed to by `z1Ptr'.) +*----------------------------------------------------------------------------*/ + +static inline void + shift64ExtraRightJamming( + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' can be arbitrarily large; in particular, if `count' is greater +| than 128, the result will be 0. The result is broken into two 64-bit pieces +| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + shift128Right( + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count ); + z0 = a0>>count; + } + else { + z1 = (count < 128) ? (a0 >> (count & 63)) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. If any nonzero bits are shifted off, they +| are ``jammed'' into the least significant bit of the result by setting the +| least significant bit to 1. The value of `count' can be arbitrarily large; +| in particular, if `count' is greater than 128, the result will be either +| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or +| nonzero. The result is broken into two 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + shift128RightJamming( + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count ) | ( ( a1<>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' must be less than 64. The result is broken into two 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count, + uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + *z1Ptr = a1 << count; + *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' may be greater than 64. The result is broken into two 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void shift128Left(uint64_t a0, uint64_t a1, int count, + uint64_t *z0Ptr, uint64_t *z1Ptr) +{ + if (count < 64) { + *z1Ptr = a1 << count; + *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); + } else { + *z1Ptr = 0; + *z0Ptr = a1 << (count - 64); + } +} + +/*---------------------------------------------------------------------------- +| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +| by the number of bits given in `count'. Any bits shifted off are lost. +| The value of `count' must be less than 64. The result is broken into three +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', +| `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + shortShift192Left( + uint64_t a0, + uint64_t a1, + uint64_t a2, + int count, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr + ) +{ + uint64_t z0, z1, z2; + int8_t negCount; + + z2 = a2<>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +| any carry out is lost. The result is broken into two 64-bit pieces which +| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + add128( + uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +{ + uint64_t z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/*---------------------------------------------------------------------------- +| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +| modulo 2^192, so any carry out is lost. The result is broken into three +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', +| `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + add192( + uint64_t a0, + uint64_t a1, + uint64_t a2, + uint64_t b0, + uint64_t b1, + uint64_t b2, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr + ) +{ + uint64_t z0, z1, z2; + int8_t carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +| 2^128, so any borrow out (carry out) is lost. The result is broken into two +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +| `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + sub128( + uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/*---------------------------------------------------------------------------- +| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +| from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +| result is broken into three 64-bit pieces which are stored at the locations +| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + sub192( + uint64_t a0, + uint64_t a1, + uint64_t a2, + uint64_t b0, + uint64_t b1, + uint64_t b2, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr + ) +{ + uint64_t z0, z1, z2; + int8_t borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +| into two 64-bit pieces which are stored at the locations pointed to by +| `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) +{ + uint32_t aHigh, aLow, bHigh, bLow; + uint64_t z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>32; + bLow = b; + bHigh = b>>32; + z1 = ( (uint64_t) aLow ) * bLow; + zMiddleA = ( (uint64_t) aLow ) * bHigh; + zMiddleB = ( (uint64_t) aHigh ) * bLow; + z0 = ( (uint64_t) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by +| `b' to obtain a 192-bit product. The product is broken into three 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +| `z2Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + mul128By64To192( + uint64_t a0, + uint64_t a1, + uint64_t b, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr + ) +{ + uint64_t z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +| product. The product is broken into four 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +*----------------------------------------------------------------------------*/ + +static inline void + mul128To256( + uint64_t a0, + uint64_t a1, + uint64_t b0, + uint64_t b1, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr, + uint64_t *z3Ptr + ) +{ + uint64_t z0, z1, z2, z3; + uint64_t more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Returns an approximation to the 64-bit integer quotient obtained by dividing +| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The +| divisor `b' must be at least 2^63. If q is the exact quotient truncated +| toward zero, the approximation returned lies between q and q + 2 inclusive. +| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +| unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) +{ + uint64_t b0, b1; + uint64_t rem0, rem1, term0, term1; + uint64_t z; + + if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (int64_t) rem0 ) < 0 ) { + z -= UINT64_C(0x100000000); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, + uint64_t n0, uint64_t d) +{ +#if defined(__x86_64__) + uint64_t q; + asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); + return q; +#elif defined(__s390x__) && !defined(__clang__) + /* Need to use a TImode type to get an even register pair for DLGR. */ + unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; + asm("dlgr %0, %1" : "+r"(n) : "r"(d)); + *r = n >> 64; + return n; +#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) + /* From Power ISA 2.06, programming note for divdeu. */ + uint64_t q1, q2, Q, r1, r2, R; + asm("divdeu %0,%2,%4; divdu %1,%3,%4" + : "=&r"(q1), "=r"(q2) + : "r"(n1), "r"(n0), "r"(d)); + r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ + r2 = n0 - (q2 * d); + Q = q1 + q2; + R = r1 + r2; + if (R >= d || R < r2) { /* overflow implies R > d */ + Q += 1; + R -= d; + } + *r = R; + return Q; +#else + uint64_t d0, d1, q0, q1, r1, r0, m; + + d0 = (uint32_t)d; + d1 = d >> 32; + + r1 = n1 % d1; + q1 = n1 / d1; + m = q1 * d0; + r1 = (r1 << 32) | (n0 >> 32); + if (r1 < m) { + q1 -= 1; + r1 += d; + if (r1 >= d) { + if (r1 < m) { + q1 -= 1; + r1 += d; + } + } + } + r1 -= m; + + r0 = r1 % d1; + q0 = r1 / d1; + m = q0 * d0; + r0 = (r0 << 32) | (uint32_t)n0; + if (r0 < m) { + q0 -= 1; + r0 += d; + if (r0 >= d) { + if (r0 < m) { + q0 -= 1; + r0 += d; + } + } + } + r0 -= m; + + *r = r0; + return (q1 << 32) | q0; +#endif +} + +/*---------------------------------------------------------------------------- +| Returns an approximation to the square root of the 32-bit significand given +| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +| `aExp' (the least significant bit) is 1, the integer returned approximates +| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +| case, the approximation returned lies strictly within +/-2 of the exact +| value. +*----------------------------------------------------------------------------*/ + +static inline uint32_t estimateSqrt32(int aExp, uint32_t a) +{ + static const uint16_t sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const uint16_t sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8_t index; + uint32_t z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); + } + return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +| is equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return a0 == b0 && a1 == b1; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return a0 < b0 || (a0 == b0 && a1 <= b1); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +| returns 0. +*----------------------------------------------------------------------------*/ + +static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return a0 < b0 || (a0 == b0 && a1 < b1); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +| not equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return a0 != b0 || a1 != b1; +} + +#endif diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h new file mode 100644 index 000000000..8a3f20fae --- /dev/null +++ b/include/fpu/softfloat-types.h @@ -0,0 +1,183 @@ +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * This header holds definitions for code that might be dealing with + * softfloat types but not need access to the actual library functions. + */ +/* +=============================================================================== +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +#ifndef SOFTFLOAT_TYPES_H +#define SOFTFLOAT_TYPES_H + +/* + * Software IEC/IEEE floating-point types. + */ + +typedef uint16_t float16; +typedef uint32_t float32; +typedef uint64_t float64; +#define float16_val(x) (x) +#define float32_val(x) (x) +#define float64_val(x) (x) +#define make_float16(x) (x) +#define make_float32(x) (x) +#define make_float64(x) (x) +#define const_float16(x) (x) +#define const_float32(x) (x) +#define const_float64(x) (x) +typedef struct { + uint64_t low; + uint16_t high; +} floatx80; +#define make_floatx80(exp, mant) ((floatx80) { mant, exp }) +#define make_floatx80_init(exp, mant) { .low = mant, .high = exp } +typedef struct { +#ifdef HOST_WORDS_BIGENDIAN + uint64_t high, low; +#else + uint64_t low, high; +#endif +} float128; +#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ }) +#define make_float128_init(high_, low_) { .high = high_, .low = low_ } + +/* + * Software neural-network floating-point types. + */ +typedef uint16_t bfloat16; + +/* + * Software IEC/IEEE floating-point underflow tininess-detection mode. + */ + +#define float_tininess_after_rounding false +#define float_tininess_before_rounding true + +/* + *Software IEC/IEEE floating-point rounding mode. + */ + +typedef enum __attribute__((__packed__)) { + float_round_nearest_even = 0, + float_round_down = 1, + float_round_up = 2, + float_round_to_zero = 3, + float_round_ties_away = 4, + /* Not an IEEE rounding mode: round to the closest odd mantissa value */ + float_round_to_odd = 5, +} FloatRoundMode; + +/* + * Software IEC/IEEE floating-point exception flags. + */ + +enum { + float_flag_invalid = 1, + float_flag_divbyzero = 4, + float_flag_overflow = 8, + float_flag_underflow = 16, + float_flag_inexact = 32, + float_flag_input_denormal = 64, + float_flag_output_denormal = 128 +}; + + +/* + * Floating Point Status. Individual architectures may maintain + * several versions of float_status for different functions. The + * correct status for the operation is then passed by reference to + * most of the softfloat functions. + */ + +typedef struct float_status { + FloatRoundMode float_rounding_mode; + uint8_t float_exception_flags; + signed char floatx80_rounding_precision; + bool tininess_before_rounding; + /* should denormalised results go to zero and set the inexact flag? */ + bool flush_to_zero; + /* should denormalised inputs go to zero and set the input_denormal flag? */ + bool flush_inputs_to_zero; + bool default_nan_mode; + /* + * The flags below are not used on all specializations and may + * constant fold away (see snan_bit_is_one()/no_signalling_nans() in + * softfloat-specialize.inc.c) + */ + bool snan_bit_is_one; + bool use_first_nan; + bool no_signaling_nans; +} float_status; + +#endif /* SOFTFLOAT_TYPES_H */ diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h new file mode 100644 index 000000000..78ad5ca73 --- /dev/null +++ b/include/fpu/softfloat.h @@ -0,0 +1,1303 @@ +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +#ifndef SOFTFLOAT_H +#define SOFTFLOAT_H + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point ordering relations +*----------------------------------------------------------------------------*/ + +typedef enum { + float_relation_less = -1, + float_relation_equal = 0, + float_relation_greater = 1, + float_relation_unordered = 2 +} FloatRelation; + +#include "fpu/softfloat-types.h" +#include "fpu/softfloat-helpers.h" + +/*---------------------------------------------------------------------------- +| Routine to raise any or all of the software IEC/IEEE floating-point +| exception flags. +*----------------------------------------------------------------------------*/ +void float_raise(uint8_t flags, float_status *status); + +/*---------------------------------------------------------------------------- +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. +*----------------------------------------------------------------------------*/ +float16 float16_squash_input_denormal(float16 a, float_status *status); +float32 float32_squash_input_denormal(float32 a, float_status *status); +float64 float64_squash_input_denormal(float64 a, float_status *status); +bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status); + +/*---------------------------------------------------------------------------- +| Options to indicate which negations to perform in float*_muladd() +| Using these differs from negating an input or output before calling +| the muladd function in that this means that a NaN doesn't have its +| sign bit inverted before it is propagated. +| We also support halving the result before rounding, as a special +| case to support the ARM fused-sqrt-step instruction FRSQRTS. +*----------------------------------------------------------------------------*/ +enum { + float_muladd_negate_c = 1, + float_muladd_negate_product = 2, + float_muladd_negate_result = 4, + float_muladd_halve_result = 8, +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ + +float16 int16_to_float16_scalbn(int16_t a, int, float_status *status); +float16 int32_to_float16_scalbn(int32_t a, int, float_status *status); +float16 int64_to_float16_scalbn(int64_t a, int, float_status *status); +float16 uint16_to_float16_scalbn(uint16_t a, int, float_status *status); +float16 uint32_to_float16_scalbn(uint32_t a, int, float_status *status); +float16 uint64_to_float16_scalbn(uint64_t a, int, float_status *status); + +float16 int8_to_float16(int8_t a, float_status *status); +float16 int16_to_float16(int16_t a, float_status *status); +float16 int32_to_float16(int32_t a, float_status *status); +float16 int64_to_float16(int64_t a, float_status *status); +float16 uint8_to_float16(uint8_t a, float_status *status); +float16 uint16_to_float16(uint16_t a, float_status *status); +float16 uint32_to_float16(uint32_t a, float_status *status); +float16 uint64_to_float16(uint64_t a, float_status *status); + +float32 int16_to_float32_scalbn(int16_t, int, float_status *status); +float32 int32_to_float32_scalbn(int32_t, int, float_status *status); +float32 int64_to_float32_scalbn(int64_t, int, float_status *status); +float32 uint16_to_float32_scalbn(uint16_t, int, float_status *status); +float32 uint32_to_float32_scalbn(uint32_t, int, float_status *status); +float32 uint64_to_float32_scalbn(uint64_t, int, float_status *status); + +float32 int16_to_float32(int16_t, float_status *status); +float32 int32_to_float32(int32_t, float_status *status); +float32 int64_to_float32(int64_t, float_status *status); +float32 uint16_to_float32(uint16_t, float_status *status); +float32 uint32_to_float32(uint32_t, float_status *status); +float32 uint64_to_float32(uint64_t, float_status *status); + +float64 int16_to_float64_scalbn(int16_t, int, float_status *status); +float64 int32_to_float64_scalbn(int32_t, int, float_status *status); +float64 int64_to_float64_scalbn(int64_t, int, float_status *status); +float64 uint16_to_float64_scalbn(uint16_t, int, float_status *status); +float64 uint32_to_float64_scalbn(uint32_t, int, float_status *status); +float64 uint64_to_float64_scalbn(uint64_t, int, float_status *status); + +float64 int16_to_float64(int16_t, float_status *status); +float64 int32_to_float64(int32_t, float_status *status); +float64 int64_to_float64(int64_t, float_status *status); +float64 uint16_to_float64(uint16_t, float_status *status); +float64 uint32_to_float64(uint32_t, float_status *status); +float64 uint64_to_float64(uint64_t, float_status *status); + +floatx80 int32_to_floatx80(int32_t, float_status *status); +floatx80 int64_to_floatx80(int64_t, float_status *status); + +float128 int32_to_float128(int32_t, float_status *status); +float128 int64_to_float128(int64_t, float_status *status); +float128 uint64_to_float128(uint64_t, float_status *status); + +/*---------------------------------------------------------------------------- +| Software half-precision conversion routines. +*----------------------------------------------------------------------------*/ + +float16 float32_to_float16(float32, bool ieee, float_status *status); +float32 float16_to_float32(float16, bool ieee, float_status *status); +float16 float64_to_float16(float64 a, bool ieee, float_status *status); +float64 float16_to_float64(float16 a, bool ieee, float_status *status); + +int8_t float16_to_int8_scalbn(float16, FloatRoundMode, int, + float_status *status); +int16_t float16_to_int16_scalbn(float16, FloatRoundMode, int, float_status *); +int32_t float16_to_int32_scalbn(float16, FloatRoundMode, int, float_status *); +int64_t float16_to_int64_scalbn(float16, FloatRoundMode, int, float_status *); + +int8_t float16_to_int8(float16, float_status *status); +int16_t float16_to_int16(float16, float_status *status); +int32_t float16_to_int32(float16, float_status *status); +int64_t float16_to_int64(float16, float_status *status); + +int16_t float16_to_int16_round_to_zero(float16, float_status *status); +int32_t float16_to_int32_round_to_zero(float16, float_status *status); +int64_t float16_to_int64_round_to_zero(float16, float_status *status); + +uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode, + int, float_status *status); +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode, + int, float_status *status); +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode, + int, float_status *status); +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode, + int, float_status *status); + +uint8_t float16_to_uint8(float16 a, float_status *status); +uint16_t float16_to_uint16(float16 a, float_status *status); +uint32_t float16_to_uint32(float16 a, float_status *status); +uint64_t float16_to_uint64(float16 a, float_status *status); + +uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *status); +uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *status); +uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *status); + +/*---------------------------------------------------------------------------- +| Software half-precision operations. +*----------------------------------------------------------------------------*/ + +float16 float16_round_to_int(float16, float_status *status); +float16 float16_add(float16, float16, float_status *status); +float16 float16_sub(float16, float16, float_status *status); +float16 float16_mul(float16, float16, float_status *status); +float16 float16_muladd(float16, float16, float16, int, float_status *status); +float16 float16_div(float16, float16, float_status *status); +float16 float16_scalbn(float16, int, float_status *status); +float16 float16_min(float16, float16, float_status *status); +float16 float16_max(float16, float16, float_status *status); +float16 float16_minnum(float16, float16, float_status *status); +float16 float16_maxnum(float16, float16, float_status *status); +float16 float16_minnummag(float16, float16, float_status *status); +float16 float16_maxnummag(float16, float16, float_status *status); +float16 float16_sqrt(float16, float_status *status); +FloatRelation float16_compare(float16, float16, float_status *status); +FloatRelation float16_compare_quiet(float16, float16, float_status *status); + +bool float16_is_quiet_nan(float16, float_status *status); +bool float16_is_signaling_nan(float16, float_status *status); +float16 float16_silence_nan(float16, float_status *status); + +static inline bool float16_is_any_nan(float16 a) +{ + return ((float16_val(a) & ~0x8000) > 0x7c00); +} + +static inline bool float16_is_neg(float16 a) +{ + return float16_val(a) >> 15; +} + +static inline bool float16_is_infinity(float16 a) +{ + return (float16_val(a) & 0x7fff) == 0x7c00; +} + +static inline bool float16_is_zero(float16 a) +{ + return (float16_val(a) & 0x7fff) == 0; +} + +static inline bool float16_is_zero_or_denormal(float16 a) +{ + return (float16_val(a) & 0x7c00) == 0; +} + +static inline bool float16_is_normal(float16 a) +{ + return (((float16_val(a) >> 10) + 1) & 0x1f) >= 2; +} + +static inline float16 float16_abs(float16 a) +{ + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float16(float16_val(a) & 0x7fff); +} + +static inline float16 float16_chs(float16 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float16(float16_val(a) ^ 0x8000); +} + +static inline float16 float16_set_sign(float16 a, int sign) +{ + return make_float16((float16_val(a) & 0x7fff) | (sign << 15)); +} + +static inline bool float16_eq(float16 a, float16 b, float_status *s) +{ + return float16_compare(a, b, s) == float_relation_equal; +} + +static inline bool float16_le(float16 a, float16 b, float_status *s) +{ + return float16_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float16_lt(float16 a, float16 b, float_status *s) +{ + return float16_compare(a, b, s) < float_relation_equal; +} + +static inline bool float16_unordered(float16 a, float16 b, float_status *s) +{ + return float16_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float16_eq_quiet(float16 a, float16 b, float_status *s) +{ + return float16_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float16_le_quiet(float16 a, float16 b, float_status *s) +{ + return float16_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float16_lt_quiet(float16 a, float16 b, float_status *s) +{ + return float16_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float16_unordered_quiet(float16 a, float16 b, + float_status *s) +{ + return float16_compare_quiet(a, b, s) == float_relation_unordered; +} + +#define float16_zero make_float16(0) +#define float16_half make_float16(0x3800) +#define float16_one make_float16(0x3c00) +#define float16_one_point_five make_float16(0x3e00) +#define float16_two make_float16(0x4000) +#define float16_three make_float16(0x4200) +#define float16_infinity make_float16(0x7c00) + +/*---------------------------------------------------------------------------- +| Software bfloat16 conversion routines. +*----------------------------------------------------------------------------*/ + +bfloat16 bfloat16_round_to_int(bfloat16, float_status *status); +bfloat16 float32_to_bfloat16(float32, float_status *status); +float32 bfloat16_to_float32(bfloat16, float_status *status); +bfloat16 float64_to_bfloat16(float64 a, float_status *status); +float64 bfloat16_to_float64(bfloat16 a, float_status *status); + +int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode, + int, float_status *status); +int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode, + int, float_status *status); +int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode, + int, float_status *status); + +int16_t bfloat16_to_int16(bfloat16, float_status *status); +int32_t bfloat16_to_int32(bfloat16, float_status *status); +int64_t bfloat16_to_int64(bfloat16, float_status *status); + +int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status); +int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status); +int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status); + +uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode, + int, float_status *status); +uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode, + int, float_status *status); +uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode, + int, float_status *status); + +uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status); +uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status); +uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status); + +uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status); +uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status); +uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status); + +bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status); +bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status); +bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status); +bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status); +bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status); +bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status); + +bfloat16 int16_to_bfloat16(int16_t a, float_status *status); +bfloat16 int32_to_bfloat16(int32_t a, float_status *status); +bfloat16 int64_to_bfloat16(int64_t a, float_status *status); +bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status); +bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status); +bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status); + +/*---------------------------------------------------------------------------- +| Software bfloat16 operations. +*----------------------------------------------------------------------------*/ + +bfloat16 bfloat16_add(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_sub(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_mul(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_div(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_muladd(bfloat16, bfloat16, bfloat16, int, + float_status *status); +float16 bfloat16_scalbn(bfloat16, int, float_status *status); +bfloat16 bfloat16_min(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_max(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_sqrt(bfloat16, float_status *status); +FloatRelation bfloat16_compare(bfloat16, bfloat16, float_status *status); +FloatRelation bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status); + +bool bfloat16_is_quiet_nan(bfloat16, float_status *status); +bool bfloat16_is_signaling_nan(bfloat16, float_status *status); +bfloat16 bfloat16_silence_nan(bfloat16, float_status *status); +bfloat16 bfloat16_default_nan(float_status *status); + +static inline bool bfloat16_is_any_nan(bfloat16 a) +{ + return ((a & ~0x8000) > 0x7F80); +} + +static inline bool bfloat16_is_neg(bfloat16 a) +{ + return a >> 15; +} + +static inline bool bfloat16_is_infinity(bfloat16 a) +{ + return (a & 0x7fff) == 0x7F80; +} + +static inline bool bfloat16_is_zero(bfloat16 a) +{ + return (a & 0x7fff) == 0; +} + +static inline bool bfloat16_is_zero_or_denormal(bfloat16 a) +{ + return (a & 0x7F80) == 0; +} + +static inline bool bfloat16_is_normal(bfloat16 a) +{ + return (((a >> 7) + 1) & 0xff) >= 2; +} + +static inline bfloat16 bfloat16_abs(bfloat16 a) +{ + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return a & 0x7fff; +} + +static inline bfloat16 bfloat16_chs(bfloat16 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return a ^ 0x8000; +} + +static inline bfloat16 bfloat16_set_sign(bfloat16 a, int sign) +{ + return (a & 0x7fff) | (sign << 15); +} + +static inline bool bfloat16_eq(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare(a, b, s) == float_relation_equal; +} + +static inline bool bfloat16_le(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare(a, b, s) <= float_relation_equal; +} + +static inline bool bfloat16_lt(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare(a, b, s) < float_relation_equal; +} + +static inline bool bfloat16_unordered(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare(a, b, s) == float_relation_unordered; +} + +static inline bool bfloat16_eq_quiet(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool bfloat16_le_quiet(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool bfloat16_lt_quiet(bfloat16 a, bfloat16 b, float_status *s) +{ + return bfloat16_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool bfloat16_unordered_quiet(bfloat16 a, bfloat16 b, + float_status *s) +{ + return bfloat16_compare_quiet(a, b, s) == float_relation_unordered; +} + +#define bfloat16_zero 0 +#define bfloat16_half 0x3f00 +#define bfloat16_one 0x3f80 +#define bfloat16_one_point_five 0x3fc0 +#define bfloat16_two 0x4000 +#define bfloat16_three 0x4040 +#define bfloat16_infinity 0x7f80 + +/*---------------------------------------------------------------------------- +| The pattern for a default generated half-precision NaN. +*----------------------------------------------------------------------------*/ +float16 float16_default_nan(float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ + +int16_t float32_to_int16_scalbn(float32, FloatRoundMode, int, float_status *); +int32_t float32_to_int32_scalbn(float32, FloatRoundMode, int, float_status *); +int64_t float32_to_int64_scalbn(float32, FloatRoundMode, int, float_status *); + +int16_t float32_to_int16(float32, float_status *status); +int32_t float32_to_int32(float32, float_status *status); +int64_t float32_to_int64(float32, float_status *status); + +int16_t float32_to_int16_round_to_zero(float32, float_status *status); +int32_t float32_to_int32_round_to_zero(float32, float_status *status); +int64_t float32_to_int64_round_to_zero(float32, float_status *status); + +uint16_t float32_to_uint16_scalbn(float32, FloatRoundMode, int, float_status *); +uint32_t float32_to_uint32_scalbn(float32, FloatRoundMode, int, float_status *); +uint64_t float32_to_uint64_scalbn(float32, FloatRoundMode, int, float_status *); + +uint16_t float32_to_uint16(float32, float_status *status); +uint32_t float32_to_uint32(float32, float_status *status); +uint64_t float32_to_uint64(float32, float_status *status); + +uint16_t float32_to_uint16_round_to_zero(float32, float_status *status); +uint32_t float32_to_uint32_round_to_zero(float32, float_status *status); +uint64_t float32_to_uint64_round_to_zero(float32, float_status *status); + +float64 float32_to_float64(float32, float_status *status); +floatx80 float32_to_floatx80(float32, float_status *status); +float128 float32_to_float128(float32, float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int(float32, float_status *status); +float32 float32_add(float32, float32, float_status *status); +float32 float32_sub(float32, float32, float_status *status); +float32 float32_mul(float32, float32, float_status *status); +float32 float32_div(float32, float32, float_status *status); +float32 float32_rem(float32, float32, float_status *status); +float32 float32_muladd(float32, float32, float32, int, float_status *status); +float32 float32_sqrt(float32, float_status *status); +float32 float32_exp2(float32, float_status *status); +float32 float32_log2(float32, float_status *status); +FloatRelation float32_compare(float32, float32, float_status *status); +FloatRelation float32_compare_quiet(float32, float32, float_status *status); +float32 float32_min(float32, float32, float_status *status); +float32 float32_max(float32, float32, float_status *status); +float32 float32_minnum(float32, float32, float_status *status); +float32 float32_maxnum(float32, float32, float_status *status); +float32 float32_minnummag(float32, float32, float_status *status); +float32 float32_maxnummag(float32, float32, float_status *status); +bool float32_is_quiet_nan(float32, float_status *status); +bool float32_is_signaling_nan(float32, float_status *status); +float32 float32_silence_nan(float32, float_status *status); +float32 float32_scalbn(float32, int, float_status *status); + +static inline float32 float32_abs(float32 a) +{ + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float32(float32_val(a) & 0x7fffffff); +} + +static inline float32 float32_chs(float32 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float32(float32_val(a) ^ 0x80000000); +} + +static inline bool float32_is_infinity(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0x7f800000; +} + +static inline bool float32_is_neg(float32 a) +{ + return float32_val(a) >> 31; +} + +static inline bool float32_is_zero(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0; +} + +static inline bool float32_is_any_nan(float32 a) +{ + return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL); +} + +static inline bool float32_is_zero_or_denormal(float32 a) +{ + return (float32_val(a) & 0x7f800000) == 0; +} + +static inline bool float32_is_normal(float32 a) +{ + return (((float32_val(a) >> 23) + 1) & 0xff) >= 2; +} + +static inline bool float32_is_denormal(float32 a) +{ + return float32_is_zero_or_denormal(a) && !float32_is_zero(a); +} + +static inline bool float32_is_zero_or_normal(float32 a) +{ + return float32_is_normal(a) || float32_is_zero(a); +} + +static inline float32 float32_set_sign(float32 a, int sign) +{ + return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31)); +} + +static inline bool float32_eq(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_equal; +} + +static inline bool float32_le(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float32_eq_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float32_le_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered_quiet(float32 a, float32 b, + float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_unordered; +} + +#define float32_zero make_float32(0) +#define float32_half make_float32(0x3f000000) +#define float32_one make_float32(0x3f800000) +#define float32_one_point_five make_float32(0x3fc00000) +#define float32_two make_float32(0x40000000) +#define float32_three make_float32(0x40400000) +#define float32_infinity make_float32(0x7f800000) + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| single-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +static inline float32 packFloat32(bool zSign, int zExp, uint32_t zSig) +{ + return make_float32( + (((uint32_t)zSign) << 31) + (((uint32_t)zExp) << 23) + zSig); +} + +/*---------------------------------------------------------------------------- +| The pattern for a default generated single-precision NaN. +*----------------------------------------------------------------------------*/ +float32 float32_default_nan(float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ + +int16_t float64_to_int16_scalbn(float64, FloatRoundMode, int, float_status *); +int32_t float64_to_int32_scalbn(float64, FloatRoundMode, int, float_status *); +int64_t float64_to_int64_scalbn(float64, FloatRoundMode, int, float_status *); + +int16_t float64_to_int16(float64, float_status *status); +int32_t float64_to_int32(float64, float_status *status); +int64_t float64_to_int64(float64, float_status *status); + +int16_t float64_to_int16_round_to_zero(float64, float_status *status); +int32_t float64_to_int32_round_to_zero(float64, float_status *status); +int64_t float64_to_int64_round_to_zero(float64, float_status *status); + +uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *); +uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *); +uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *); + +uint16_t float64_to_uint16(float64, float_status *status); +uint32_t float64_to_uint32(float64, float_status *status); +uint64_t float64_to_uint64(float64, float_status *status); + +uint16_t float64_to_uint16_round_to_zero(float64, float_status *status); +uint32_t float64_to_uint32_round_to_zero(float64, float_status *status); +uint64_t float64_to_uint64_round_to_zero(float64, float_status *status); + +float32 float64_to_float32(float64, float_status *status); +floatx80 float64_to_floatx80(float64, float_status *status); +float128 float64_to_float128(float64, float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 float64_round_to_int(float64, float_status *status); +float64 float64_add(float64, float64, float_status *status); +float64 float64_sub(float64, float64, float_status *status); +float64 float64_mul(float64, float64, float_status *status); +float64 float64_div(float64, float64, float_status *status); +float64 float64_rem(float64, float64, float_status *status); +float64 float64_muladd(float64, float64, float64, int, float_status *status); +float64 float64_sqrt(float64, float_status *status); +float64 float64_log2(float64, float_status *status); +FloatRelation float64_compare(float64, float64, float_status *status); +FloatRelation float64_compare_quiet(float64, float64, float_status *status); +float64 float64_min(float64, float64, float_status *status); +float64 float64_max(float64, float64, float_status *status); +float64 float64_minnum(float64, float64, float_status *status); +float64 float64_maxnum(float64, float64, float_status *status); +float64 float64_minnummag(float64, float64, float_status *status); +float64 float64_maxnummag(float64, float64, float_status *status); +bool float64_is_quiet_nan(float64 a, float_status *status); +bool float64_is_signaling_nan(float64, float_status *status); +float64 float64_silence_nan(float64, float_status *status); +float64 float64_scalbn(float64, int, float_status *status); + +static inline float64 float64_abs(float64 a) +{ + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float64(float64_val(a) & 0x7fffffffffffffffLL); +} + +static inline float64 float64_chs(float64 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float64(float64_val(a) ^ 0x8000000000000000LL); +} + +static inline bool float64_is_infinity(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; +} + +static inline bool float64_is_neg(float64 a) +{ + return float64_val(a) >> 63; +} + +static inline bool float64_is_zero(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL) == 0; +} + +static inline bool float64_is_any_nan(float64 a) +{ + return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL); +} + +static inline bool float64_is_zero_or_denormal(float64 a) +{ + return (float64_val(a) & 0x7ff0000000000000LL) == 0; +} + +static inline bool float64_is_normal(float64 a) +{ + return (((float64_val(a) >> 52) + 1) & 0x7ff) >= 2; +} + +static inline bool float64_is_denormal(float64 a) +{ + return float64_is_zero_or_denormal(a) && !float64_is_zero(a); +} + +static inline bool float64_is_zero_or_normal(float64 a) +{ + return float64_is_normal(a) || float64_is_zero(a); +} + +static inline float64 float64_set_sign(float64 a, int sign) +{ + return make_float64((float64_val(a) & 0x7fffffffffffffffULL) + | ((int64_t)sign << 63)); +} + +static inline bool float64_eq(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_equal; +} + +static inline bool float64_le(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float64_eq_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float64_le_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered_quiet(float64 a, float64 b, + float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_unordered; +} + +#define float64_zero make_float64(0) +#define float64_half make_float64(0x3fe0000000000000LL) +#define float64_one make_float64(0x3ff0000000000000LL) +#define float64_one_point_five make_float64(0x3FF8000000000000ULL) +#define float64_two make_float64(0x4000000000000000ULL) +#define float64_three make_float64(0x4008000000000000ULL) +#define float64_ln2 make_float64(0x3fe62e42fefa39efLL) +#define float64_infinity make_float64(0x7ff0000000000000LL) + +/*---------------------------------------------------------------------------- +| The pattern for a default generated double-precision NaN. +*----------------------------------------------------------------------------*/ +float64 float64_default_nan(float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32_t floatx80_to_int32(floatx80, float_status *status); +int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status); +int64_t floatx80_to_int64(floatx80, float_status *status); +int64_t floatx80_to_int64_round_to_zero(floatx80, float_status *status); +float32 floatx80_to_float32(floatx80, float_status *status); +float64 floatx80_to_float64(floatx80, float_status *status); +float128 floatx80_to_float128(floatx80, float_status *status); + +/*---------------------------------------------------------------------------- +| The pattern for an extended double-precision inf. +*----------------------------------------------------------------------------*/ +extern const floatx80 floatx80_infinity; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_round(floatx80 a, float_status *status); +floatx80 floatx80_round_to_int(floatx80, float_status *status); +floatx80 floatx80_add(floatx80, floatx80, float_status *status); +floatx80 floatx80_sub(floatx80, floatx80, float_status *status); +floatx80 floatx80_mul(floatx80, floatx80, float_status *status); +floatx80 floatx80_div(floatx80, floatx80, float_status *status); +floatx80 floatx80_modrem(floatx80, floatx80, bool, uint64_t *, + float_status *status); +floatx80 floatx80_mod(floatx80, floatx80, float_status *status); +floatx80 floatx80_rem(floatx80, floatx80, float_status *status); +floatx80 floatx80_sqrt(floatx80, float_status *status); +FloatRelation floatx80_compare(floatx80, floatx80, float_status *status); +FloatRelation floatx80_compare_quiet(floatx80, floatx80, float_status *status); +int floatx80_is_quiet_nan(floatx80, float_status *status); +int floatx80_is_signaling_nan(floatx80, float_status *status); +floatx80 floatx80_silence_nan(floatx80, float_status *status); +floatx80 floatx80_scalbn(floatx80, int, float_status *status); + +static inline floatx80 floatx80_abs(floatx80 a) +{ + a.high &= 0x7fff; + return a; +} + +static inline floatx80 floatx80_chs(floatx80 a) +{ + a.high ^= 0x8000; + return a; +} + +static inline bool floatx80_is_infinity(floatx80 a) +{ +#if defined(TARGET_M68K) + return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1); +#else + return (a.high & 0x7fff) == floatx80_infinity.high && + a.low == floatx80_infinity.low; +#endif +} + +static inline bool floatx80_is_neg(floatx80 a) +{ + return a.high >> 15; +} + +static inline bool floatx80_is_zero(floatx80 a) +{ + return (a.high & 0x7fff) == 0 && a.low == 0; +} + +static inline bool floatx80_is_zero_or_denormal(floatx80 a) +{ + return (a.high & 0x7fff) == 0; +} + +static inline bool floatx80_is_any_nan(floatx80 a) +{ + return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); +} + +static inline bool floatx80_eq(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_unordered; +} + +static inline bool floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b, + float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_unordered; +} + +/*---------------------------------------------------------------------------- +| Return whether the given value is an invalid floatx80 encoding. +| Invalid floatx80 encodings arise when the integer bit is not set, but +| the exponent is not zero. The only times the integer bit is permitted to +| be zero is in subnormal numbers and the value zero. +| This includes what the Intel software developer's manual calls pseudo-NaNs, +| pseudo-infinities and un-normal numbers. It does not include +| pseudo-denormals, which must still be correctly handled as inputs even +| if they are never generated as outputs. +*----------------------------------------------------------------------------*/ +static inline bool floatx80_invalid_encoding(floatx80 a) +{ +#if defined(TARGET_M68K) + /*------------------------------------------------------------------------- + | With m68k, the explicit integer bit can be zero in the case of: + | - zeros (exp == 0, mantissa == 0) + | - denormalized numbers (exp == 0, mantissa != 0) + | - unnormalized numbers (exp != 0, exp < 0x7FFF) + | - infinities (exp == 0x7FFF, mantissa == 0) + | - not-a-numbers (exp == 0x7FFF, mantissa != 0) + | + | For infinities and NaNs, the explicit integer bit can be either one or + | zero. + | + | The IEEE 754 standard does not define a zero integer bit. Such a number + | is an unnormalized number. Hardware does not directly support + | denormalized and unnormalized numbers, but implicitly supports them by + | trapping them as unimplemented data types, allowing efficient conversion + | in software. + | + | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL", + | "1.6 FLOATING-POINT DATA TYPES" + *------------------------------------------------------------------------*/ + return false; +#else + return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0; +#endif +} + +#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) +#define floatx80_zero_init make_floatx80_init(0x0000, 0x0000000000000000LL) +#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) +#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) +#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) +#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL) + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline uint64_t extractFloatx80Frac(floatx80 a) +{ + return a.low; +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline int32_t extractFloatx80Exp(floatx80 a) +{ + return a.high & 0x7FFF; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +static inline bool extractFloatx80Sign(floatx80 a) +{ + return a.high >> 15; +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +| extended double-precision floating-point value, returning the result. +*----------------------------------------------------------------------------*/ + +static inline floatx80 packFloatx80(bool zSign, int32_t zExp, uint64_t zSig) +{ + floatx80 z; + + z.low = zSig; + z.high = (((uint16_t)zSign) << 15) + zExp; + return z; +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, + uint64_t *zSigPtr); + +/*---------------------------------------------------------------------------- +| Takes two extended double-precision floating-point values `a' and `b', one +| of which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| rounded and packed into the extended double-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal extended +| double-precision floating-point number. +| If `roundingPrecision' is 32 or 64, the result is rounded to the same +| number of bits as single or double precision, respectively. Otherwise, the +| result is rounded to the full precision of the extended double-precision +| format. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. The +| handling of underflow and overflow follows the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, + int32_t zExp, uint64_t zSig0, uint64_t zSig1, + float_status *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent +| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. This routine is just like +| `roundAndPackFloatx80' except that the input significand does not have to be +| normalized. +*----------------------------------------------------------------------------*/ + +floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, + bool zSign, int32_t zExp, + uint64_t zSig0, uint64_t zSig1, + float_status *status); + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_default_nan(float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32_t float128_to_int32(float128, float_status *status); +int32_t float128_to_int32_round_to_zero(float128, float_status *status); +int64_t float128_to_int64(float128, float_status *status); +int64_t float128_to_int64_round_to_zero(float128, float_status *status); +uint64_t float128_to_uint64(float128, float_status *status); +uint64_t float128_to_uint64_round_to_zero(float128, float_status *status); +uint32_t float128_to_uint32(float128, float_status *status); +uint32_t float128_to_uint32_round_to_zero(float128, float_status *status); +float32 float128_to_float32(float128, float_status *status); +float64 float128_to_float64(float128, float_status *status); +floatx80 float128_to_floatx80(float128, float_status *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision operations. +*----------------------------------------------------------------------------*/ +float128 float128_round_to_int(float128, float_status *status); +float128 float128_add(float128, float128, float_status *status); +float128 float128_sub(float128, float128, float_status *status); +float128 float128_mul(float128, float128, float_status *status); +float128 float128_div(float128, float128, float_status *status); +float128 float128_rem(float128, float128, float_status *status); +float128 float128_sqrt(float128, float_status *status); +FloatRelation float128_compare(float128, float128, float_status *status); +FloatRelation float128_compare_quiet(float128, float128, float_status *status); +bool float128_is_quiet_nan(float128, float_status *status); +bool float128_is_signaling_nan(float128, float_status *status); +float128 float128_silence_nan(float128, float_status *status); +float128 float128_scalbn(float128, int, float_status *status); + +static inline float128 float128_abs(float128 a) +{ + a.high &= 0x7fffffffffffffffLL; + return a; +} + +static inline float128 float128_chs(float128 a) +{ + a.high ^= 0x8000000000000000LL; + return a; +} + +static inline bool float128_is_infinity(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; +} + +static inline bool float128_is_neg(float128 a) +{ + return a.high >> 63; +} + +static inline bool float128_is_zero(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; +} + +static inline bool float128_is_zero_or_denormal(float128 a) +{ + return (a.high & 0x7fff000000000000LL) == 0; +} + +static inline bool float128_is_normal(float128 a) +{ + return (((a.high >> 48) + 1) & 0x7fff) >= 2; +} + +static inline bool float128_is_denormal(float128 a) +{ + return float128_is_zero_or_denormal(a) && !float128_is_zero(a); +} + +static inline bool float128_is_any_nan(float128 a) +{ + return ((a.high >> 48) & 0x7fff) == 0x7fff && + ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0)); +} + +static inline bool float128_eq(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_equal; +} + +static inline bool float128_le(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float128_eq_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float128_le_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered_quiet(float128 a, float128 b, + float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_unordered; +} + +#define float128_zero make_float128(0, 0) + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. +*----------------------------------------------------------------------------*/ +float128 float128_default_nan(float_status *status); + +#endif /* SOFTFLOAT_H */ diff --git a/include/glib-compat.h b/include/glib-compat.h new file mode 100644 index 000000000..695a96f7e --- /dev/null +++ b/include/glib-compat.h @@ -0,0 +1,105 @@ +/* + * GLIB Compatibility Functions + * + * Copyright IBM, Corp. 2013 + * + * Authors: + * Anthony Liguori + * Michael Tokarev + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_GLIB_COMPAT_H +#define QEMU_GLIB_COMPAT_H + +/* Ask for warnings for anything that was marked deprecated in + * the defined version, or before. It is a candidate for rewrite. + */ +#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_48 + +/* Ask for warnings if code tries to use function that did not + * exist in the defined version. These risk breaking builds + */ +#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_48 + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +#include +#if defined(G_OS_UNIX) +#include +#include +#include +#endif + +/* + * Note that because of the GLIB_VERSION_MAX_ALLOWED constant above, allowing + * use of functions from newer GLib via this compat header needs a little + * trickery to prevent warnings being emitted. + * + * Consider a function from newer glib-X.Y that we want to use + * + * int g_foo(const char *wibble) + * + * We must define a static inline function with the same signature that does + * what we need, but with a "_qemu" suffix e.g. + * + * static inline void g_foo_qemu(const char *wibble) + * { + * #if GLIB_CHECK_VERSION(X, Y, 0) + * g_foo(wibble) + * #else + * g_something_equivalent_in_older_glib(wibble); + * #endif + * } + * + * The #pragma at the top of this file turns off -Wdeprecated-declarations, + * ensuring this wrapper function impl doesn't trigger the compiler warning + * about using too new glib APIs. Finally we can do + * + * #define g_foo(a) g_foo_qemu(a) + * + * So now the code elsewhere in QEMU, which *does* have the + * -Wdeprecated-declarations warning active, can call g_foo(...) as normal, + * without generating warnings. + */ + +#if defined(_WIN32) && !GLIB_CHECK_VERSION(2, 50, 0) +/* + * g_poll has a problem on Windows when using + * timeouts < 10ms, so use wrapper. + */ +#define g_poll(fds, nfds, timeout) g_poll_fixed(fds, nfds, timeout) +gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout); +#endif + +#if defined(G_OS_UNIX) +/* + * Note: The fallback implementation is not MT-safe, and it returns a copy of + * the libc passwd (must be g_free() after use) but not the content. Because of + * these important differences the caller must be aware of, it's not #define for + * GLib API substitution. + */ +static inline struct passwd * +g_unix_get_passwd_entry_qemu(const gchar *user_name, GError **error) +{ +#if GLIB_CHECK_VERSION(2, 64, 0) + return g_unix_get_passwd_entry(user_name, error); +#else + struct passwd *p = getpwnam(user_name); + if (!p) { + g_set_error_literal(error, G_UNIX_ERROR, 0, g_strerror(errno)); + return NULL; + } + return (struct passwd *)g_memdup(p, sizeof(*p)); +#endif +} +#endif /* G_OS_UNIX */ + +#pragma GCC diagnostic pop + +#endif diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h new file mode 100644 index 000000000..38a42f409 --- /dev/null +++ b/include/hw/acpi/acpi-defs.h @@ -0,0 +1,621 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ +#ifndef QEMU_ACPI_DEFS_H +#define QEMU_ACPI_DEFS_H + +enum { + ACPI_FADT_F_WBINVD, + ACPI_FADT_F_WBINVD_FLUSH, + ACPI_FADT_F_PROC_C1, + ACPI_FADT_F_P_LVL2_UP, + ACPI_FADT_F_PWR_BUTTON, + ACPI_FADT_F_SLP_BUTTON, + ACPI_FADT_F_FIX_RTC, + ACPI_FADT_F_RTC_S4, + ACPI_FADT_F_TMR_VAL_EXT, + ACPI_FADT_F_DCK_CAP, + ACPI_FADT_F_RESET_REG_SUP, + ACPI_FADT_F_SEALED_CASE, + ACPI_FADT_F_HEADLESS, + ACPI_FADT_F_CPU_SW_SLP, + ACPI_FADT_F_PCI_EXP_WAK, + ACPI_FADT_F_USE_PLATFORM_CLOCK, + ACPI_FADT_F_S4_RTC_STS_VALID, + ACPI_FADT_F_REMOTE_POWER_ON_CAPABLE, + ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL, + ACPI_FADT_F_FORCE_APIC_PHYSICAL_DESTINATION_MODE, + ACPI_FADT_F_HW_REDUCED_ACPI, + ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE, +}; + +typedef struct AcpiRsdpData { + uint8_t oem_id[6] QEMU_NONSTRING; /* OEM identification */ + uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ + + unsigned *rsdt_tbl_offset; + unsigned *xsdt_tbl_offset; +} AcpiRsdpData; + +/* Table structure from Linux kernel (the ACPI tables are under the + BSD license) */ + + +#define ACPI_TABLE_HEADER_DEF /* ACPI common table header */ \ + uint32_t signature; /* ACPI signature (4 ASCII characters) */ \ + uint32_t length; /* Length of table, in bytes, including header */ \ + uint8_t revision; /* ACPI Specification minor version # */ \ + uint8_t checksum; /* To make sum of entire table == 0 */ \ + uint8_t oem_id[6] \ + QEMU_NONSTRING; /* OEM identification */ \ + uint8_t oem_table_id[8] \ + QEMU_NONSTRING; /* OEM table identification */ \ + uint32_t oem_revision; /* OEM revision number */ \ + uint8_t asl_compiler_id[4] \ + QEMU_NONSTRING; /* ASL compiler vendor ID */ \ + uint32_t asl_compiler_revision; /* ASL compiler revision number */ + + +/* ACPI common table header */ +struct AcpiTableHeader { + ACPI_TABLE_HEADER_DEF +} QEMU_PACKED; +typedef struct AcpiTableHeader AcpiTableHeader; + +struct AcpiGenericAddress { + uint8_t space_id; /* Address space where struct or register exists */ + uint8_t bit_width; /* Size in bits of given register */ + uint8_t bit_offset; /* Bit offset within the register */ + uint8_t access_width; /* ACPI 3.0: Minimum Access size (ACPI 3.0), + ACPI 2.0: Reserved, Table 5-1 */ + uint64_t address; /* 64-bit address of struct or register */ +} QEMU_PACKED; + +typedef struct AcpiFadtData { + struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */ + struct AcpiGenericAddress pm1a_evt; /* PM1a_EVT_BLK */ + struct AcpiGenericAddress pm_tmr; /* PM_TMR_BLK */ + struct AcpiGenericAddress gpe0_blk; /* GPE0_BLK */ + struct AcpiGenericAddress reset_reg; /* RESET_REG */ + struct AcpiGenericAddress sleep_ctl; /* SLEEP_CONTROL_REG */ + struct AcpiGenericAddress sleep_sts; /* SLEEP_STATUS_REG */ + uint8_t reset_val; /* RESET_VALUE */ + uint8_t rev; /* Revision */ + uint32_t flags; /* Flags */ + uint32_t smi_cmd; /* SMI_CMD */ + uint16_t sci_int; /* SCI_INT */ + uint8_t int_model; /* INT_MODEL */ + uint8_t acpi_enable_cmd; /* ACPI_ENABLE */ + uint8_t acpi_disable_cmd; /* ACPI_DISABLE */ + uint8_t rtc_century; /* CENTURY */ + uint16_t plvl2_lat; /* P_LVL2_LAT */ + uint16_t plvl3_lat; /* P_LVL3_LAT */ + uint16_t arm_boot_arch; /* ARM_BOOT_ARCH */ + uint8_t minor_ver; /* FADT Minor Version */ + + /* + * respective tables offsets within ACPI_BUILD_TABLE_FILE, + * NULL if table doesn't exist (in that case field's value + * won't be patched by linker and will be kept set to 0) + */ + unsigned *facs_tbl_offset; /* FACS offset in */ + unsigned *dsdt_tbl_offset; + unsigned *xdsdt_tbl_offset; +} AcpiFadtData; + +#define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) +#define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) + +/* + * Serial Port Console Redirection Table (SPCR), Rev. 1.02 + * + * For .interface_type see Debug Port Table 2 (DBG2) serial port + * subtypes in Table 3, Rev. May 22, 2012 + */ +struct AcpiSerialPortConsoleRedirection { + ACPI_TABLE_HEADER_DEF + uint8_t interface_type; + uint8_t reserved1[3]; + struct AcpiGenericAddress base_address; + uint8_t interrupt_types; + uint8_t irq; + uint32_t gsi; + uint8_t baud; + uint8_t parity; + uint8_t stopbits; + uint8_t flowctrl; + uint8_t term_type; + uint8_t reserved2; + uint16_t pci_device_id; + uint16_t pci_vendor_id; + uint8_t pci_bus; + uint8_t pci_slot; + uint8_t pci_func; + uint32_t pci_flags; + uint8_t pci_seg; + uint32_t reserved3; +} QEMU_PACKED; +typedef struct AcpiSerialPortConsoleRedirection + AcpiSerialPortConsoleRedirection; + +/* + * ACPI 1.0 Root System Description Table (RSDT) + */ +struct AcpiRsdtDescriptorRev1 { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint32_t table_offset_entry[]; /* Array of pointers to other */ + /* ACPI tables */ +} QEMU_PACKED; +typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; + +/* + * ACPI 2.0 eXtended System Description Table (XSDT) + */ +struct AcpiXsdtDescriptorRev2 { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint64_t table_offset_entry[]; /* Array of pointers to other */ + /* ACPI tables */ +} QEMU_PACKED; +typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; + +/* + * ACPI 1.0 Firmware ACPI Control Structure (FACS) + */ +struct AcpiFacsDescriptorRev1 { + uint32_t signature; /* ACPI Signature */ + uint32_t length; /* Length of structure, in bytes */ + uint32_t hardware_signature; /* Hardware configuration signature */ + uint32_t firmware_waking_vector; /* ACPI OS waking vector */ + uint32_t global_lock; /* Global Lock */ + uint32_t flags; + uint8_t resverved3 [40]; /* Reserved - must be zero */ +} QEMU_PACKED; +typedef struct AcpiFacsDescriptorRev1 AcpiFacsDescriptorRev1; + +/* + * Differentiated System Description Table (DSDT) + */ + +/* + * MADT values and structures + */ + +/* Values for MADT PCATCompat */ + +#define ACPI_DUAL_PIC 0 +#define ACPI_MULTIPLE_APIC 1 + +/* Master MADT */ + +struct AcpiMultipleApicTable { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint32_t local_apic_address; /* Physical address of local APIC */ + uint32_t flags; +} QEMU_PACKED; +typedef struct AcpiMultipleApicTable AcpiMultipleApicTable; + +/* Values for Type in APIC sub-headers */ + +#define ACPI_APIC_PROCESSOR 0 +#define ACPI_APIC_IO 1 +#define ACPI_APIC_XRUPT_OVERRIDE 2 +#define ACPI_APIC_NMI 3 +#define ACPI_APIC_LOCAL_NMI 4 +#define ACPI_APIC_ADDRESS_OVERRIDE 5 +#define ACPI_APIC_IO_SAPIC 6 +#define ACPI_APIC_LOCAL_SAPIC 7 +#define ACPI_APIC_XRUPT_SOURCE 8 +#define ACPI_APIC_LOCAL_X2APIC 9 +#define ACPI_APIC_LOCAL_X2APIC_NMI 10 +#define ACPI_APIC_GENERIC_CPU_INTERFACE 11 +#define ACPI_APIC_GENERIC_DISTRIBUTOR 12 +#define ACPI_APIC_GENERIC_MSI_FRAME 13 +#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14 +#define ACPI_APIC_GENERIC_TRANSLATOR 15 +#define ACPI_APIC_RESERVED 16 /* 16 and greater are reserved */ + +/* + * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) + */ +#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ + uint8_t type; \ + uint8_t length; + +/* Sub-structures for MADT */ + +struct AcpiMadtProcessorApic { + ACPI_SUB_HEADER_DEF + uint8_t processor_id; /* ACPI processor id */ + uint8_t local_apic_id; /* Processor's local APIC id */ + uint32_t flags; +} QEMU_PACKED; +typedef struct AcpiMadtProcessorApic AcpiMadtProcessorApic; + +struct AcpiMadtIoApic { + ACPI_SUB_HEADER_DEF + uint8_t io_apic_id; /* I/O APIC ID */ + uint8_t reserved; /* Reserved - must be zero */ + uint32_t address; /* APIC physical address */ + uint32_t interrupt; /* Global system interrupt where INTI + * lines start */ +} QEMU_PACKED; +typedef struct AcpiMadtIoApic AcpiMadtIoApic; + +struct AcpiMadtIntsrcovr { + ACPI_SUB_HEADER_DEF + uint8_t bus; + uint8_t source; + uint32_t gsi; + uint16_t flags; +} QEMU_PACKED; +typedef struct AcpiMadtIntsrcovr AcpiMadtIntsrcovr; + +struct AcpiMadtLocalNmi { + ACPI_SUB_HEADER_DEF + uint8_t processor_id; /* ACPI processor id */ + uint16_t flags; /* MPS INTI flags */ + uint8_t lint; /* Local APIC LINT# */ +} QEMU_PACKED; +typedef struct AcpiMadtLocalNmi AcpiMadtLocalNmi; + +struct AcpiMadtProcessorX2Apic { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t x2apic_id; /* Processor's local x2APIC ID */ + uint32_t flags; + uint32_t uid; /* Processor object _UID */ +} QEMU_PACKED; +typedef struct AcpiMadtProcessorX2Apic AcpiMadtProcessorX2Apic; + +struct AcpiMadtLocalX2ApicNmi { + ACPI_SUB_HEADER_DEF + uint16_t flags; /* MPS INTI flags */ + uint32_t uid; /* Processor object _UID */ + uint8_t lint; /* Local APIC LINT# */ + uint8_t reserved[3]; /* Local APIC LINT# */ +} QEMU_PACKED; +typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi; + +struct AcpiMadtGenericCpuInterface { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t cpu_interface_number; + uint32_t uid; + uint32_t flags; + uint32_t parking_version; + uint32_t performance_interrupt; + uint64_t parked_address; + uint64_t base_address; + uint64_t gicv_base_address; + uint64_t gich_base_address; + uint32_t vgic_interrupt; + uint64_t gicr_base_address; + uint64_t arm_mpidr; +} QEMU_PACKED; + +typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface; + +/* GICC CPU Interface Flags */ +#define ACPI_MADT_GICC_ENABLED 1 + +struct AcpiMadtGenericDistributor { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t gic_id; + uint64_t base_address; + uint32_t global_irq_base; + /* ACPI 5.1 Errata 1228 Present GIC version in MADT table */ + uint8_t version; + uint8_t reserved2[3]; +} QEMU_PACKED; + +typedef struct AcpiMadtGenericDistributor AcpiMadtGenericDistributor; + +struct AcpiMadtGenericMsiFrame { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t gic_msi_frame_id; + uint64_t base_address; + uint32_t flags; + uint16_t spi_count; + uint16_t spi_base; +} QEMU_PACKED; + +typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame; + +struct AcpiMadtGenericRedistributor { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint64_t base_address; + uint32_t range_length; +} QEMU_PACKED; + +typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor; + +struct AcpiMadtGenericTranslator { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t translation_id; + uint64_t base_address; + uint32_t reserved2; +} QEMU_PACKED; + +typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator; + +/* + * Generic Timer Description Table (GTDT) + */ +#define ACPI_GTDT_INTERRUPT_MODE_LEVEL (0 << 0) +#define ACPI_GTDT_INTERRUPT_MODE_EDGE (1 << 0) +#define ACPI_GTDT_CAP_ALWAYS_ON (1 << 2) + +struct AcpiGenericTimerTable { + ACPI_TABLE_HEADER_DEF + uint64_t counter_block_addresss; + uint32_t reserved; + uint32_t secure_el1_interrupt; + uint32_t secure_el1_flags; + uint32_t non_secure_el1_interrupt; + uint32_t non_secure_el1_flags; + uint32_t virtual_timer_interrupt; + uint32_t virtual_timer_flags; + uint32_t non_secure_el2_interrupt; + uint32_t non_secure_el2_flags; + uint64_t counter_read_block_address; + uint32_t platform_timer_count; + uint32_t platform_timer_offset; +} QEMU_PACKED; +typedef struct AcpiGenericTimerTable AcpiGenericTimerTable; + +/* + * HPET Description Table + */ +struct Acpi20Hpet { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint32_t timer_block_id; + struct AcpiGenericAddress addr; + uint8_t hpet_number; + uint16_t min_tick; + uint8_t page_protect; +} QEMU_PACKED; +typedef struct Acpi20Hpet Acpi20Hpet; + +/* + * SRAT (NUMA topology description) table + */ + +struct AcpiSystemResourceAffinityTable { + ACPI_TABLE_HEADER_DEF + uint32_t reserved1; + uint32_t reserved2[2]; +} QEMU_PACKED; +typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable; + +#define ACPI_SRAT_PROCESSOR_APIC 0 +#define ACPI_SRAT_MEMORY 1 +#define ACPI_SRAT_PROCESSOR_x2APIC 2 +#define ACPI_SRAT_PROCESSOR_GICC 3 + +struct AcpiSratProcessorAffinity { + ACPI_SUB_HEADER_DEF + uint8_t proximity_lo; + uint8_t local_apic_id; + uint32_t flags; + uint8_t local_sapic_eid; + uint8_t proximity_hi[3]; + uint32_t reserved; +} QEMU_PACKED; +typedef struct AcpiSratProcessorAffinity AcpiSratProcessorAffinity; + +struct AcpiSratProcessorX2ApicAffinity { + ACPI_SUB_HEADER_DEF + uint16_t reserved; + uint32_t proximity_domain; + uint32_t x2apic_id; + uint32_t flags; + uint32_t clk_domain; + uint32_t reserved2; +} QEMU_PACKED; +typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity; + +struct AcpiSratMemoryAffinity { + ACPI_SUB_HEADER_DEF + uint32_t proximity; + uint16_t reserved1; + uint64_t base_addr; + uint64_t range_length; + uint32_t reserved2; + uint32_t flags; + uint32_t reserved3[2]; +} QEMU_PACKED; +typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity; + +struct AcpiSratProcessorGiccAffinity { + ACPI_SUB_HEADER_DEF + uint32_t proximity; + uint32_t acpi_processor_uid; + uint32_t flags; + uint32_t clock_domain; +} QEMU_PACKED; + +typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity; + +/* + * TCPA Description Table + * + * Following Level 00, Rev 00.37 of specs: + * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification + */ +struct Acpi20Tcpa { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint16_t platform_class; + uint32_t log_area_minimum_length; + uint64_t log_area_start_address; +} QEMU_PACKED; +typedef struct Acpi20Tcpa Acpi20Tcpa; + +/* DMAR - DMA Remapping table r2.2 */ +struct AcpiTableDmar { + ACPI_TABLE_HEADER_DEF + uint8_t host_address_width; /* Maximum DMA physical addressability */ + uint8_t flags; + uint8_t reserved[10]; +} QEMU_PACKED; +typedef struct AcpiTableDmar AcpiTableDmar; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INTR_REMAP 1 +#define ACPI_DMAR_X2APIC_OPT_OUT (1 << 1) + +/* Values for sub-structure type for DMAR */ +enum { + ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ + ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ + ACPI_DMAR_TYPE_ATSR = 2, /* ATSR */ + ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ + ACPI_DMAR_TYPE_ANDD = 4, /* ANDD */ + ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ +}; + +/* + * Sub-structures for DMAR + */ + +/* Device scope structure for DRHD. */ +struct AcpiDmarDeviceScope { + uint8_t entry_type; + uint8_t length; + uint16_t reserved; + uint8_t enumeration_id; + uint8_t bus; + struct { + uint8_t device; + uint8_t function; + } path[]; +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + +/* Type 0: Hardware Unit Definition */ +struct AcpiDmarHardwareUnit { + uint16_t type; + uint16_t length; + uint8_t flags; + uint8_t reserved; + uint16_t pci_segment; /* The PCI Segment associated with this unit */ + uint64_t address; /* Base address of remapping hardware register-set */ + AcpiDmarDeviceScope scope[]; +} QEMU_PACKED; +typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; + +/* Type 2: Root Port ATS Capability Reporting Structure */ +struct AcpiDmarRootPortATS { + uint16_t type; + uint16_t length; + uint8_t flags; + uint8_t reserved; + uint16_t pci_segment; + AcpiDmarDeviceScope scope[]; +} QEMU_PACKED; +typedef struct AcpiDmarRootPortATS AcpiDmarRootPortATS; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INCLUDE_PCI_ALL 1 +#define ACPI_DMAR_ATSR_ALL_PORTS 1 + +/* + * Input Output Remapping Table (IORT) + * Conforms to "IO Remapping Table System Software on ARM Platforms", + * Document number: ARM DEN 0049B, October 2015 + */ + +struct AcpiIortTable { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint32_t node_count; + uint32_t node_offset; + uint32_t reserved; +} QEMU_PACKED; +typedef struct AcpiIortTable AcpiIortTable; + +/* + * IORT node types + */ + +#define ACPI_IORT_NODE_HEADER_DEF /* Node format common fields */ \ + uint8_t type; \ + uint16_t length; \ + uint8_t revision; \ + uint32_t reserved; \ + uint32_t mapping_count; \ + uint32_t mapping_offset; + +/* Values for node Type above */ +enum { + ACPI_IORT_NODE_ITS_GROUP = 0x00, + ACPI_IORT_NODE_NAMED_COMPONENT = 0x01, + ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02, + ACPI_IORT_NODE_SMMU = 0x03, + ACPI_IORT_NODE_SMMU_V3 = 0x04 +}; + +struct AcpiIortIdMapping { + uint32_t input_base; + uint32_t id_count; + uint32_t output_base; + uint32_t output_reference; + uint32_t flags; +} QEMU_PACKED; +typedef struct AcpiIortIdMapping AcpiIortIdMapping; + +struct AcpiIortMemoryAccess { + uint32_t cache_coherency; + uint8_t hints; + uint16_t reserved; + uint8_t memory_flags; +} QEMU_PACKED; +typedef struct AcpiIortMemoryAccess AcpiIortMemoryAccess; + +struct AcpiIortItsGroup { + ACPI_IORT_NODE_HEADER_DEF + uint32_t its_count; + uint32_t identifiers[]; +} QEMU_PACKED; +typedef struct AcpiIortItsGroup AcpiIortItsGroup; + +#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE 1 + +struct AcpiIortSmmu3 { + ACPI_IORT_NODE_HEADER_DEF + uint64_t base_address; + uint32_t flags; + uint32_t reserved2; + uint64_t vatos_address; + uint32_t model; + uint32_t event_gsiv; + uint32_t pri_gsiv; + uint32_t gerr_gsiv; + uint32_t sync_gsiv; + AcpiIortIdMapping id_mapping_array[]; +} QEMU_PACKED; +typedef struct AcpiIortSmmu3 AcpiIortSmmu3; + +struct AcpiIortRC { + ACPI_IORT_NODE_HEADER_DEF + AcpiIortMemoryAccess memory_properties; + uint32_t ats_attribute; + uint32_t pci_segment_number; + AcpiIortIdMapping id_mapping_array[]; +} QEMU_PACKED; +typedef struct AcpiIortRC AcpiIortRC; + +#endif diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h new file mode 100644 index 000000000..22b0b65bb --- /dev/null +++ b/include/hw/acpi/acpi.h @@ -0,0 +1,199 @@ +#ifndef QEMU_HW_ACPI_H +#define QEMU_HW_ACPI_H + +/* + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * . + */ + +#include "qemu/notify.h" +#include "exec/memory.h" +#include "hw/acpi/acpi_dev_interface.h" + +/* + * current device naming scheme supports up to 256 memory devices + */ +#define ACPI_MAX_RAM_SLOTS 256 + +/* from linux include/acpi/actype.h */ +/* Default ACPI register widths */ + +#define ACPI_GPE_REGISTER_WIDTH 8 +#define ACPI_PM1_REGISTER_WIDTH 16 +#define ACPI_PM2_REGISTER_WIDTH 8 +#define ACPI_PM_TIMER_WIDTH 32 + +/* PC-style peripherals (also used by other machines). */ +#define ACPI_PM_PROP_S3_DISABLED "disable_s3" +#define ACPI_PM_PROP_S4_DISABLED "disable_s4" +#define ACPI_PM_PROP_S4_VAL "s4_val" +#define ACPI_PM_PROP_SCI_INT "sci_int" +#define ACPI_PM_PROP_ACPI_ENABLE_CMD "acpi_enable_cmd" +#define ACPI_PM_PROP_ACPI_DISABLE_CMD "acpi_disable_cmd" +#define ACPI_PM_PROP_PM_IO_BASE "pm_io_base" +#define ACPI_PM_PROP_GPE0_BLK "gpe0_blk" +#define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len" + +/* PM Timer ticks per second (HZ) */ +#define PM_TIMER_FREQUENCY 3579545 + + +/* ACPI fixed hardware registers */ + +/* from linux/drivers/acpi/acpica/aclocal.h */ +/* Masks used to access the bit_registers */ + +/* PM1x_STS */ +#define ACPI_BITMASK_TIMER_STATUS 0x0001 +#define ACPI_BITMASK_BUS_MASTER_STATUS 0x0010 +#define ACPI_BITMASK_GLOBAL_LOCK_STATUS 0x0020 +#define ACPI_BITMASK_POWER_BUTTON_STATUS 0x0100 +#define ACPI_BITMASK_SLEEP_BUTTON_STATUS 0x0200 +#define ACPI_BITMASK_RT_CLOCK_STATUS 0x0400 +#define ACPI_BITMASK_PCIEXP_WAKE_STATUS 0x4000 /* ACPI 3.0 */ +#define ACPI_BITMASK_WAKE_STATUS 0x8000 + +#define ACPI_BITMASK_ALL_FIXED_STATUS (\ + ACPI_BITMASK_TIMER_STATUS | \ + ACPI_BITMASK_BUS_MASTER_STATUS | \ + ACPI_BITMASK_GLOBAL_LOCK_STATUS | \ + ACPI_BITMASK_POWER_BUTTON_STATUS | \ + ACPI_BITMASK_SLEEP_BUTTON_STATUS | \ + ACPI_BITMASK_RT_CLOCK_STATUS | \ + ACPI_BITMASK_WAKE_STATUS) + +/* PM1x_EN */ +#define ACPI_BITMASK_TIMER_ENABLE 0x0001 +#define ACPI_BITMASK_GLOBAL_LOCK_ENABLE 0x0020 +#define ACPI_BITMASK_POWER_BUTTON_ENABLE 0x0100 +#define ACPI_BITMASK_SLEEP_BUTTON_ENABLE 0x0200 +#define ACPI_BITMASK_RT_CLOCK_ENABLE 0x0400 +#define ACPI_BITMASK_PCIEXP_WAKE_DISABLE 0x4000 /* ACPI 3.0 */ + +#define ACPI_BITMASK_PM1_COMMON_ENABLED ( \ + ACPI_BITMASK_RT_CLOCK_ENABLE | \ + ACPI_BITMASK_POWER_BUTTON_ENABLE | \ + ACPI_BITMASK_GLOBAL_LOCK_ENABLE | \ + ACPI_BITMASK_TIMER_ENABLE) + +/* PM1x_CNT */ +#define ACPI_BITMASK_SCI_ENABLE 0x0001 +#define ACPI_BITMASK_BUS_MASTER_RLD 0x0002 +#define ACPI_BITMASK_GLOBAL_LOCK_RELEASE 0x0004 +#define ACPI_BITMASK_SLEEP_TYPE 0x1C00 +#define ACPI_BITMASK_SLEEP_ENABLE 0x2000 + +/* PM2_CNT */ +#define ACPI_BITMASK_ARB_DISABLE 0x0001 + +/* structs */ +typedef struct ACPIPMTimer ACPIPMTimer; +typedef struct ACPIPM1EVT ACPIPM1EVT; +typedef struct ACPIPM1CNT ACPIPM1CNT; +typedef struct ACPIGPE ACPIGPE; +typedef struct ACPIREGS ACPIREGS; + +typedef void (*acpi_update_sci_fn)(ACPIREGS *ar); + +struct ACPIPMTimer { + QEMUTimer *timer; + MemoryRegion io; + int64_t overflow_time; + + acpi_update_sci_fn update_sci; +}; + +struct ACPIPM1EVT { + MemoryRegion io; + uint16_t sts; + uint16_t en; + acpi_update_sci_fn update_sci; +}; + +struct ACPIPM1CNT { + MemoryRegion io; + uint16_t cnt; + uint8_t s4_val; +}; + +struct ACPIGPE { + uint8_t len; + + uint8_t *sts; + uint8_t *en; +}; + +struct ACPIREGS { + ACPIPMTimer tmr; + ACPIGPE gpe; + struct { + ACPIPM1EVT evt; + ACPIPM1CNT cnt; + } pm1; + Notifier wakeup; +}; + +/* PM_TMR */ +void acpi_pm_tmr_update(ACPIREGS *ar, bool enable); +void acpi_pm_tmr_calc_overflow_time(ACPIREGS *ar); +void acpi_pm_tmr_init(ACPIREGS *ar, acpi_update_sci_fn update_sci, + MemoryRegion *parent); +void acpi_pm_tmr_reset(ACPIREGS *ar); + +/* PM1a_EVT: piix and ich9 don't implement PM1b. */ +uint16_t acpi_pm1_evt_get_sts(ACPIREGS *ar); +void acpi_pm1_evt_power_down(ACPIREGS *ar); +void acpi_pm1_evt_reset(ACPIREGS *ar); +void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn update_sci, + MemoryRegion *parent); + +/* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */ +void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, + bool disable_s3, bool disable_s4, uint8_t s4_val); +void acpi_pm1_cnt_update(ACPIREGS *ar, + bool sci_enable, bool sci_disable); +void acpi_pm1_cnt_reset(ACPIREGS *ar); + +/* GPE0 */ +void acpi_gpe_init(ACPIREGS *ar, uint8_t len); +void acpi_gpe_reset(ACPIREGS *ar); + +void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val); +uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); + +void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq, + AcpiEventStatusBits status); + +void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq); + +/* acpi.c */ +extern char unsigned *acpi_tables; +extern size_t acpi_tables_len; + +uint8_t *acpi_table_first(void); +uint8_t *acpi_table_next(uint8_t *current); +unsigned acpi_table_len(void *current); +void acpi_table_add(const QemuOpts *opts, Error **errp); + +typedef struct AcpiSlicOem AcpiSlicOem; +struct AcpiSlicOem { + char *id; + char *table_id; +}; +int acpi_get_slic_oem(AcpiSlicOem *oem); + +#endif /* QEMU_HW_ACPI_H */ diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h new file mode 100644 index 000000000..769ff55c7 --- /dev/null +++ b/include/hw/acpi/acpi_dev_interface.h @@ -0,0 +1,58 @@ +#ifndef ACPI_DEV_INTERFACE_H +#define ACPI_DEV_INTERFACE_H + +#include "qapi/qapi-types-acpi.h" +#include "qom/object.h" +#include "hw/boards.h" +#include "hw/qdev-core.h" + +/* These values are part of guest ABI, and can not be changed */ +typedef enum { + ACPI_PCI_HOTPLUG_STATUS = 2, + ACPI_CPU_HOTPLUG_STATUS = 4, + ACPI_MEMORY_HOTPLUG_STATUS = 8, + ACPI_NVDIMM_HOTPLUG_STATUS = 16, + ACPI_VMGENID_CHANGE_STATUS = 32, + ACPI_POWER_DOWN_STATUS = 64, +} AcpiEventStatusBits; + +#define TYPE_ACPI_DEVICE_IF "acpi-device-interface" + +typedef struct AcpiDeviceIfClass AcpiDeviceIfClass; +DECLARE_CLASS_CHECKERS(AcpiDeviceIfClass, ACPI_DEVICE_IF, + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF(obj) \ + INTERFACE_CHECK(AcpiDeviceIf, (obj), \ + TYPE_ACPI_DEVICE_IF) + +typedef struct AcpiDeviceIf AcpiDeviceIf; + +void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event); + +/** + * AcpiDeviceIfClass: + * + * ospm_status: returns status of ACPI device objects, reported + * via _OST method if device supports it. + * send_event: inject a specified event into guest + * madt_cpu: fills @entry with Interrupt Controller Structure + * for CPU indexed by @uid in @apic_ids array, + * returned structure types are: + * 0 - Local APIC, 9 - Local x2APIC, 0xB - GICC + * + * Interface is designed for providing unified interface + * to generic ACPI functionality that could be used without + * knowledge about internals of actual device that implements + * ACPI interface. + */ +struct AcpiDeviceIfClass { + /* */ + InterfaceClass parent_class; + + /* */ + void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); + void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); + void (*madt_cpu)(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *apic_ids, GArray *entry); +}; +#endif diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h new file mode 100644 index 000000000..fe0055fff --- /dev/null +++ b/include/hw/acpi/aml-build.h @@ -0,0 +1,444 @@ +#ifndef HW_ACPI_AML_BUILD_H +#define HW_ACPI_AML_BUILD_H + +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/bios-linker-loader.h" + +/* Reserve RAM space for tables: add another order of magnitude. */ +#define ACPI_BUILD_TABLE_MAX_SIZE 0x200000 + +#define ACPI_BUILD_APPNAME6 "BOCHS " +#define ACPI_BUILD_APPNAME4 "BXPC" + +#define ACPI_BUILD_TABLE_FILE "etc/acpi/tables" +#define ACPI_BUILD_RSDP_FILE "etc/acpi/rsdp" +#define ACPI_BUILD_TPMLOG_FILE "etc/tpm/log" +#define ACPI_BUILD_LOADER_FILE "etc/table-loader" + +#define AML_NOTIFY_METHOD "NTFY" + +typedef enum { + AML_NO_OPCODE = 0,/* has only data */ + AML_OPCODE, /* has opcode optionally followed by data */ + AML_PACKAGE, /* has opcode and uses PkgLength for its length */ + AML_EXT_PACKAGE, /* Same as AML_PACKAGE but also has 'ExOpPrefix' */ + AML_BUFFER, /* data encoded as 'DefBuffer' */ + AML_RES_TEMPLATE, /* encoded as ResourceTemplate macro */ +} AmlBlockFlags; + +struct Aml { + GArray *buf; + + /*< private >*/ + uint8_t op; + AmlBlockFlags block_flags; +}; + +typedef enum { + AML_COMPATIBILITY = 0, + AML_TYPEA = 1, + AML_TYPEB = 2, + AML_TYPEF = 3, +} AmlDmaType; + +typedef enum { + AML_NOTBUSMASTER = 0, + AML_BUSMASTER = 1, +} AmlDmaBusMaster; + +typedef enum { + AML_TRANSFER8 = 0, + AML_TRANSFER8_16 = 1, + AML_TRANSFER16 = 2, +} AmlTransferSize; + +typedef enum { + AML_DECODE10 = 0, + AML_DECODE16 = 1, +} AmlIODecode; + +typedef enum { + AML_ANY_ACC = 0, + AML_BYTE_ACC = 1, + AML_WORD_ACC = 2, + AML_DWORD_ACC = 3, + AML_QWORD_ACC = 4, + AML_BUFFER_ACC = 5, +} AmlAccessType; + +typedef enum { + AML_NOLOCK = 0, + AML_LOCK = 1, +} AmlLockRule; + +typedef enum { + AML_PRESERVE = 0, + AML_WRITE_AS_ONES = 1, + AML_WRITE_AS_ZEROS = 2, +} AmlUpdateRule; + +typedef enum { + AML_AS_SYSTEM_MEMORY = 0X00, + AML_AS_SYSTEM_IO = 0X01, + AML_AS_PCI_CONFIG = 0X02, + AML_AS_EMBEDDED_CTRL = 0X03, + AML_AS_SMBUS = 0X04, + AML_AS_FFH = 0X7F, +} AmlAddressSpace; + +typedef enum { + AML_SYSTEM_MEMORY = 0X00, + AML_SYSTEM_IO = 0X01, + AML_PCI_CONFIG = 0X02, +} AmlRegionSpace; + +typedef enum { + AML_MEMORY_RANGE = 0, + AML_IO_RANGE = 1, + AML_BUS_NUMBER_RANGE = 2, +} AmlResourceType; + +typedef enum { + AML_SUB_DECODE = 1 << 1, + AML_POS_DECODE = 0 +} AmlDecode; + +typedef enum { + AML_MAX_FIXED = 1 << 3, + AML_MAX_NOT_FIXED = 0, +} AmlMaxFixed; + +typedef enum { + AML_MIN_FIXED = 1 << 2, + AML_MIN_NOT_FIXED = 0 +} AmlMinFixed; + +/* + * ACPI 1.0b: Table 6-26 I/O Resource Flag (Resource Type = 1) Definitions + * _RNG field definition + */ +typedef enum { + AML_ISA_ONLY = 1, + AML_NON_ISA_ONLY = 2, + AML_ENTIRE_RANGE = 3, +} AmlISARanges; + +/* + * ACPI 1.0b: Table 6-25 Memory Resource Flag (Resource Type = 0) Definitions + * _MEM field definition + */ +typedef enum { + AML_NON_CACHEABLE = 0, + AML_CACHEABLE = 1, + AML_WRITE_COMBINING = 2, + AML_PREFETCHABLE = 3, +} AmlCacheable; + +/* + * ACPI 1.0b: Table 6-25 Memory Resource Flag (Resource Type = 0) Definitions + * _RW field definition + */ +typedef enum { + AML_READ_ONLY = 0, + AML_READ_WRITE = 1, +} AmlReadAndWrite; + +/* + * ACPI 5.0: Table 6-187 Extended Interrupt Descriptor Definition + * Interrupt Vector Flags Bits[0] Consumer/Producer + */ +typedef enum { + AML_CONSUMER_PRODUCER = 0, + AML_CONSUMER = 1, +} AmlConsumerAndProducer; + +/* + * ACPI 5.0: Table 6-187 Extended Interrupt Descriptor Definition + * _HE field definition + */ +typedef enum { + AML_LEVEL = 0, + AML_EDGE = 1, +} AmlLevelAndEdge; + +/* + * ACPI 5.0: Table 6-187 Extended Interrupt Descriptor Definition + * _LL field definition + */ +typedef enum { + AML_ACTIVE_HIGH = 0, + AML_ACTIVE_LOW = 1, +} AmlActiveHighAndLow; + +/* + * ACPI 5.0: Table 6-187 Extended Interrupt Descriptor Definition + * _SHR field definition + */ +typedef enum { + AML_EXCLUSIVE = 0, + AML_SHARED = 1, + AML_EXCLUSIVE_AND_WAKE = 2, + AML_SHARED_AND_WAKE = 3, +} AmlShared; + +/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: MethodFlags */ +typedef enum { + AML_NOTSERIALIZED = 0, + AML_SERIALIZED = 1, +} AmlSerializeFlag; + +/* + * ACPI 5.0: Table 6-189 GPIO Connection Descriptor Definition + * GPIO Connection Type + */ +typedef enum { + AML_INTERRUPT_CONNECTION = 0, + AML_IO_CONNECTION = 1, +} AmlGpioConnectionType; + +/* + * ACPI 5.0: Table 6-189 GPIO Connection Descriptor Definition + * _PPI field definition + */ +typedef enum { + AML_PULL_DEFAULT = 0, + AML_PULL_UP = 1, + AML_PULL_DOWN = 2, + AML_PULL_NONE = 3, +} AmlPinConfig; + +typedef enum { + MEM_AFFINITY_NOFLAGS = 0, + MEM_AFFINITY_ENABLED = (1 << 0), + MEM_AFFINITY_HOTPLUGGABLE = (1 << 1), + MEM_AFFINITY_NON_VOLATILE = (1 << 2), +} MemoryAffinityFlags; + +typedef +struct AcpiBuildTables { + GArray *table_data; + GArray *rsdp; + GArray *tcpalog; + GArray *vmgenid; + GArray *hardware_errors; + BIOSLinker *linker; +} AcpiBuildTables; + +/* + * ACPI 5.0: 6.4.3.8.2 Serial Bus Connection Descriptors + * Serial Bus Type + */ +#define AML_SERIAL_BUS_TYPE_I2C 1 +#define AML_SERIAL_BUS_TYPE_SPI 2 +#define AML_SERIAL_BUS_TYPE_UART 3 + +/* + * ACPI 5.0: 6.4.3.8.2 Serial Bus Connection Descriptors + * General Flags + */ +/* Slave Mode */ +#define AML_SERIAL_BUS_FLAG_MASTER_DEVICE (1 << 0) +/* Consumer/Producer */ +#define AML_SERIAL_BUS_FLAG_CONSUME_ONLY (1 << 1) + +/** + * init_aml_allocator: + * + * Called for initializing API allocator which allow to use + * AML API. + * Returns: toplevel container which accumulates all other + * AML elements for a table. + */ +Aml *init_aml_allocator(void); + +/** + * free_aml_allocator: + * + * Releases all elements used by AML API, frees associated memory + * and invalidates AML allocator. After this call @init_aml_allocator + * should be called again if AML API is to be used again. + */ +void free_aml_allocator(void); + +/** + * aml_append: + * @parent_ctx: context to which @child element is added + * @child: element that is copied into @parent_ctx context + * + * Joins Aml elements together and helps to construct AML tables + * Examle of usage: + * Aml *table = aml_def_block("SSDT", ...); + * Aml *sb = aml_scope("\\_SB"); + * Aml *dev = aml_device("PCI0"); + * + * aml_append(dev, aml_name_decl("HID", aml_eisaid("PNP0A03"))); + * aml_append(sb, dev); + * aml_append(table, sb); + */ +void aml_append(Aml *parent_ctx, Aml *child); + +/* non block AML object primitives */ +Aml *aml_name(const char *name_format, ...) GCC_FMT_ATTR(1, 2); +Aml *aml_name_decl(const char *name, Aml *val); +Aml *aml_debug(void); +Aml *aml_return(Aml *val); +Aml *aml_int(const uint64_t val); +Aml *aml_arg(int pos); +Aml *aml_to_integer(Aml *arg); +Aml *aml_to_hexstring(Aml *src, Aml *dst); +Aml *aml_to_buffer(Aml *src, Aml *dst); +Aml *aml_store(Aml *val, Aml *target); +Aml *aml_and(Aml *arg1, Aml *arg2, Aml *dst); +Aml *aml_or(Aml *arg1, Aml *arg2, Aml *dst); +Aml *aml_land(Aml *arg1, Aml *arg2); +Aml *aml_lor(Aml *arg1, Aml *arg2); +Aml *aml_shiftleft(Aml *arg1, Aml *count); +Aml *aml_shiftright(Aml *arg1, Aml *count, Aml *dst); +Aml *aml_lless(Aml *arg1, Aml *arg2); +Aml *aml_add(Aml *arg1, Aml *arg2, Aml *dst); +Aml *aml_subtract(Aml *arg1, Aml *arg2, Aml *dst); +Aml *aml_increment(Aml *arg); +Aml *aml_decrement(Aml *arg); +Aml *aml_index(Aml *arg1, Aml *idx); +Aml *aml_notify(Aml *arg1, Aml *arg2); +Aml *aml_break(void); +Aml *aml_call0(const char *method); +Aml *aml_call1(const char *method, Aml *arg1); +Aml *aml_call2(const char *method, Aml *arg1, Aml *arg2); +Aml *aml_call3(const char *method, Aml *arg1, Aml *arg2, Aml *arg3); +Aml *aml_call4(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4); +Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, + Aml *arg5); +Aml *aml_gpio_int(AmlConsumerAndProducer con_and_pro, + AmlLevelAndEdge edge_level, + AmlActiveHighAndLow active_level, AmlShared shared, + AmlPinConfig pin_config, uint16_t debounce_timeout, + const uint32_t pin_list[], uint32_t pin_count, + const char *resource_source_name, + const uint8_t *vendor_data, uint16_t vendor_data_len); +Aml *aml_memory32_fixed(uint32_t addr, uint32_t size, + AmlReadAndWrite read_and_write); +Aml *aml_interrupt(AmlConsumerAndProducer con_and_pro, + AmlLevelAndEdge level_and_edge, + AmlActiveHighAndLow high_and_low, AmlShared shared, + uint32_t *irq_list, uint8_t irq_count); +Aml *aml_io(AmlIODecode dec, uint16_t min_base, uint16_t max_base, + uint8_t aln, uint8_t len); +Aml *aml_operation_region(const char *name, AmlRegionSpace rs, + Aml *offset, uint32_t len); +Aml *aml_irq_no_flags(uint8_t irq); +Aml *aml_named_field(const char *name, unsigned length); +Aml *aml_reserved_field(unsigned length); +Aml *aml_local(int num); +Aml *aml_string(const char *name_format, ...) GCC_FMT_ATTR(1, 2); +Aml *aml_lnot(Aml *arg); +Aml *aml_equal(Aml *arg1, Aml *arg2); +Aml *aml_lgreater(Aml *arg1, Aml *arg2); +Aml *aml_lgreater_equal(Aml *arg1, Aml *arg2); +Aml *aml_processor(uint8_t proc_id, uint32_t pblk_addr, uint8_t pblk_len, + const char *name_format, ...) GCC_FMT_ATTR(4, 5); +Aml *aml_eisaid(const char *str); +Aml *aml_word_bus_number(AmlMinFixed min_fixed, AmlMaxFixed max_fixed, + AmlDecode dec, uint16_t addr_gran, + uint16_t addr_min, uint16_t addr_max, + uint16_t addr_trans, uint16_t len); +Aml *aml_word_io(AmlMinFixed min_fixed, AmlMaxFixed max_fixed, + AmlDecode dec, AmlISARanges isa_ranges, + uint16_t addr_gran, uint16_t addr_min, + uint16_t addr_max, uint16_t addr_trans, + uint16_t len); +Aml *aml_dword_io(AmlMinFixed min_fixed, AmlMaxFixed max_fixed, + AmlDecode dec, AmlISARanges isa_ranges, + uint32_t addr_gran, uint32_t addr_min, + uint32_t addr_max, uint32_t addr_trans, + uint32_t len); +Aml *aml_dword_memory(AmlDecode dec, AmlMinFixed min_fixed, + AmlMaxFixed max_fixed, AmlCacheable cacheable, + AmlReadAndWrite read_and_write, + uint32_t addr_gran, uint32_t addr_min, + uint32_t addr_max, uint32_t addr_trans, + uint32_t len); +Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed, + AmlMaxFixed max_fixed, AmlCacheable cacheable, + AmlReadAndWrite read_and_write, + uint64_t addr_gran, uint64_t addr_min, + uint64_t addr_max, uint64_t addr_trans, + uint64_t len); +Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, + uint8_t channel); +Aml *aml_sleep(uint64_t msec); +Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source); + +/* Block AML object primitives */ +Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2); +Aml *aml_device(const char *name_format, ...) GCC_FMT_ATTR(1, 2); +Aml *aml_method(const char *name, int arg_count, AmlSerializeFlag sflag); +Aml *aml_if(Aml *predicate); +Aml *aml_else(void); +Aml *aml_while(Aml *predicate); +Aml *aml_package(uint8_t num_elements); +Aml *aml_buffer(int buffer_size, uint8_t *byte_list); +Aml *aml_resource_template(void); +Aml *aml_field(const char *name, AmlAccessType type, AmlLockRule lock, + AmlUpdateRule rule); +Aml *aml_mutex(const char *name, uint8_t sync_level); +Aml *aml_acquire(Aml *mutex, uint16_t timeout); +Aml *aml_release(Aml *mutex); +Aml *aml_alias(const char *source_object, const char *alias_object); +Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits, + const char *name); +Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name); +Aml *aml_create_qword_field(Aml *srcbuf, Aml *index, const char *name); +Aml *aml_varpackage(uint32_t num_elements); +Aml *aml_touuid(const char *uuid); +Aml *aml_unicode(const char *str); +Aml *aml_refof(Aml *arg); +Aml *aml_derefof(Aml *arg); +Aml *aml_sizeof(Aml *arg); +Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target); +Aml *aml_object_type(Aml *object); + +void build_append_int_noprefix(GArray *table, uint64_t value, int size); +void +build_header(BIOSLinker *linker, GArray *table_data, + AcpiTableHeader *h, const char *sig, int len, uint8_t rev, + const char *oem_id, const char *oem_table_id); +void *acpi_data_push(GArray *table_data, unsigned size); +unsigned acpi_data_len(GArray *table); +void acpi_add_table(GArray *table_offsets, GArray *table_data); +void acpi_build_tables_init(AcpiBuildTables *tables); +void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre); +void +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data); +void +build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, + const char *oem_id, const char *oem_table_id); +void +build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, + const char *oem_id, const char *oem_table_id); + +int +build_append_named_dword(GArray *array, const char *name_format, ...) +GCC_FMT_ATTR(2, 3); + +void build_append_gas(GArray *table, AmlAddressSpace as, + uint8_t bit_width, uint8_t bit_offset, + uint8_t access_width, uint64_t address); + +static inline void +build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) +{ + build_append_gas(table, s->space_id, s->bit_width, s->bit_offset, + s->access_width, s->address); +} + +void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, + uint64_t len, int node, MemoryAffinityFlags flags); + +void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms); + +void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id); + +void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog); +#endif diff --git a/include/hw/acpi/bios-linker-loader.h b/include/hw/acpi/bios-linker-loader.h new file mode 100644 index 000000000..a711dbced --- /dev/null +++ b/include/hw/acpi/bios-linker-loader.h @@ -0,0 +1,39 @@ +#ifndef BIOS_LINKER_LOADER_H +#define BIOS_LINKER_LOADER_H + + +typedef struct BIOSLinker { + GArray *cmd_blob; + GArray *file_list; +} BIOSLinker; + +bool bios_linker_loader_can_write_pointer(void); + +BIOSLinker *bios_linker_loader_init(void); + +void bios_linker_loader_alloc(BIOSLinker *linker, + const char *file_name, + GArray *file_blob, + uint32_t alloc_align, + bool alloc_fseg); + +void bios_linker_loader_add_checksum(BIOSLinker *linker, const char *file, + unsigned start_offset, unsigned size, + unsigned checksum_offset); + +void bios_linker_loader_add_pointer(BIOSLinker *linker, + const char *dest_file, + uint32_t dst_patched_offset, + uint8_t dst_patched_size, + const char *src_file, + uint32_t src_offset); + +void bios_linker_loader_write_pointer(BIOSLinker *linker, + const char *dest_file, + uint32_t dst_patched_offset, + uint8_t dst_patched_size, + const char *src_file, + uint32_t src_offset); + +void bios_linker_loader_cleanup(BIOSLinker *linker); +#endif diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h new file mode 100644 index 000000000..0eeedaa49 --- /dev/null +++ b/include/hw/acpi/cpu.h @@ -0,0 +1,68 @@ +/* + * QEMU ACPI hotplug utilities + * + * Copyright (C) 2016 Red Hat Inc + * + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ACPI_CPU_H +#define ACPI_CPU_H + +#include "hw/qdev-core.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/hotplug.h" + +typedef struct AcpiCpuStatus { + struct CPUState *cpu; + uint64_t arch_id; + bool is_inserting; + bool is_removing; + uint32_t ost_event; + uint32_t ost_status; +} AcpiCpuStatus; + +typedef struct CPUHotplugState { + MemoryRegion ctrl_reg; + uint32_t selector; + uint8_t command; + uint32_t dev_count; + AcpiCpuStatus *devs; +} CPUHotplugState; + +void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, DeviceState *dev, Error **errp); + +void acpi_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp); + +void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp); + +void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + CPUHotplugState *state, hwaddr base_addr); + +typedef struct CPUHotplugFeatures { + bool acpi_1_compatible; + bool has_legacy_cphp; + const char *smi_path; +} CPUHotplugFeatures; + +void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + hwaddr io_base, + const char *res_root, + const char *event_handler_method); + +void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + +extern const VMStateDescription vmstate_cpu_hotplug; +#define VMSTATE_CPU_HOTPLUG(cpuhp, state) \ + VMSTATE_STRUCT(cpuhp, state, 1, \ + vmstate_cpu_hotplug, CPUHotplugState) + +#endif diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h new file mode 100644 index 000000000..3b932abbb --- /dev/null +++ b/include/hw/acpi/cpu_hotplug.h @@ -0,0 +1,40 @@ +/* + * QEMU ACPI hotplug utilities + * + * Copyright (C) 2013 Red Hat Inc + * + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_ACPI_CPU_HOTPLUG_H +#define HW_ACPI_CPU_HOTPLUG_H + +#include "hw/acpi/acpi.h" +#include "hw/acpi/pc-hotplug.h" +#include "hw/acpi/aml-build.h" +#include "hw/hotplug.h" +#include "hw/acpi/cpu.h" + +typedef struct AcpiCpuHotplug { + Object *device; + MemoryRegion io; + uint8_t sts[ACPI_GPE_PROC_LEN]; +} AcpiCpuHotplug; + +void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + AcpiCpuHotplug *g, DeviceState *dev, Error **errp); + +void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, + AcpiCpuHotplug *gpe_cpu, uint16_t base); + +void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu, + CPUHotplugState *cpuhp_state, + uint16_t io_port); + +void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, + uint16_t io_base); +#endif diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h new file mode 100644 index 000000000..6bed92e8f --- /dev/null +++ b/include/hw/acpi/generic_event_device.h @@ -0,0 +1,123 @@ +/* + * + * Copyright (c) 2018 Intel Corporation + * Copyright (c) 2019 Huawei Technologies R & D (UK) Ltd + * Written by Samuel Ortiz, Shameer Kolothum + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * The ACPI Generic Event Device (GED) is a hardware-reduced specific + * device[ACPI v6.1 Section 5.6.9] that handles all platform events, + * including the hotplug ones. Generic Event Device allows platforms + * to handle interrupts in ACPI ASL statements. It follows a very + * similar approach like the _EVT method from GPIO events. All + * interrupts are listed in _CRS and the handler is written in _EVT + * method. Here, we use a single interrupt for the GED device, relying + * on IO memory region to communicate the type of device affected by + * the interrupt. This way, we can support up to 32 events with a + * unique interrupt. + * + * Here is an example. + * + * Device (\_SB.GED) + * { + * Name (_HID, "ACPI0013") + * Name (_UID, Zero) + * Name (_CRS, ResourceTemplate () + * { + * Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) + * { + * 0x00000029, + * } + * }) + * OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) + * Field (EREG, DWordAcc, NoLock, WriteAsZeros) + * { + * ESEL, 32 + * } + * + * Method (_EVT, 1, Serialized) // _EVT: Event + * { + * Local0 = ESEL // ESEL = IO memory region which specifies the + * // device type. + * If (((Local0 & One) == One)) + * { + * MethodEvent1() + * } + * If ((Local0 & 0x2) == 0x2) + * { + * MethodEvent2() + * } + * ... + * } + * } + * + */ + +#ifndef HW_ACPI_GED_H +#define HW_ACPI_GED_H + +#include "hw/sysbus.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/ghes.h" +#include "qom/object.h" + +#define ACPI_POWER_BUTTON_DEVICE "PWRB" + +#define TYPE_ACPI_GED "acpi-ged" +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) + +#define TYPE_ACPI_GED_X86 "acpi-ged-x86" +#define ACPI_GED_X86(obj) \ + OBJECT_CHECK(AcpiGedX86State, (obj), TYPE_ACPI_GED_X86) + +#define ACPI_GED_EVT_SEL_OFFSET 0x0 +#define ACPI_GED_EVT_SEL_LEN 0x4 + +#define ACPI_GED_REG_SLEEP_CTL 0x00 +#define ACPI_GED_REG_SLEEP_STS 0x01 +#define ACPI_GED_REG_RESET 0x02 +#define ACPI_GED_REG_COUNT 0x03 + +/* ACPI_GED_REG_RESET value for reset*/ +#define ACPI_GED_RESET_VALUE 0x42 + +/* ACPI_GED_REG_SLEEP_CTL.SLP_TYP value for S5 (aka poweroff) */ +#define ACPI_GED_SLP_TYP_S5 0x05 + +#define GED_DEVICE "GED" +#define AML_GED_EVT_REG "EREG" +#define AML_GED_EVT_SEL "ESEL" + +/* + * Platforms need to specify the GED event bitmap + * to describe what kind of events they want to support + * through GED. + */ +#define ACPI_GED_MEM_HOTPLUG_EVT 0x1 +#define ACPI_GED_PWR_DOWN_EVT 0x2 +#define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 + +typedef struct GEDState { + MemoryRegion evt; + MemoryRegion regs; + uint32_t sel; +} GEDState; + +struct AcpiGedState { + SysBusDevice parent_obj; + MemHotplugState memhp_state; + MemoryRegion container_memhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; + AcpiGhesState ghes_state; +}; + +void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev, + uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base); +void acpi_dsdt_add_power_button(Aml *scope); + +#endif diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h new file mode 100644 index 000000000..4ad025e09 --- /dev/null +++ b/include/hw/acpi/ghes.h @@ -0,0 +1,74 @@ +/* + * Support for generating APEI tables and recording CPER for Guests + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. + * + * Author: Dongjiu Geng + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef ACPI_GHES_H +#define ACPI_GHES_H + +#include "hw/acpi/bios-linker-loader.h" + +/* + * Values for Hardware Error Notification Type field + */ +enum AcpiGhesNotifyType { + /* Polled */ + ACPI_GHES_NOTIFY_POLLED = 0, + /* External Interrupt */ + ACPI_GHES_NOTIFY_EXTERNAL = 1, + /* Local Interrupt */ + ACPI_GHES_NOTIFY_LOCAL = 2, + /* SCI */ + ACPI_GHES_NOTIFY_SCI = 3, + /* NMI */ + ACPI_GHES_NOTIFY_NMI = 4, + /* CMCI, ACPI 5.0: 18.3.2.7, Table 18-290 */ + ACPI_GHES_NOTIFY_CMCI = 5, + /* MCE, ACPI 5.0: 18.3.2.7, Table 18-290 */ + ACPI_GHES_NOTIFY_MCE = 6, + /* GPIO-Signal, ACPI 6.0: 18.3.2.7, Table 18-332 */ + ACPI_GHES_NOTIFY_GPIO = 7, + /* ARMv8 SEA, ACPI 6.1: 18.3.2.9, Table 18-345 */ + ACPI_GHES_NOTIFY_SEA = 8, + /* ARMv8 SEI, ACPI 6.1: 18.3.2.9, Table 18-345 */ + ACPI_GHES_NOTIFY_SEI = 9, + /* External Interrupt - GSIV, ACPI 6.1: 18.3.2.9, Table 18-345 */ + ACPI_GHES_NOTIFY_GSIV = 10, + /* Software Delegated Exception, ACPI 6.2: 18.3.2.9, Table 18-383 */ + ACPI_GHES_NOTIFY_SDEI = 11, + /* 12 and greater are reserved */ + ACPI_GHES_NOTIFY_RESERVED = 12 +}; + +enum { + ACPI_HEST_SRC_ID_SEA = 0, + /* future ids go here */ + ACPI_HEST_SRC_ID_RESERVED, +}; + +typedef struct AcpiGhesState { + uint64_t ghes_addr_le; +} AcpiGhesState; + +void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); +void acpi_build_hest(GArray *table_data, BIOSLinker *linker); +void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, + GArray *hardware_errors); +int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); +#endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h new file mode 100644 index 000000000..54571c77e --- /dev/null +++ b/include/hw/acpi/ich9.h @@ -0,0 +1,87 @@ +/* + * QEMU GMCH/ICH9 LPC PM Emulation + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ + +#ifndef HW_ACPI_ICH9_H +#define HW_ACPI_ICH9_H + +#include "hw/acpi/acpi.h" +#include "hw/acpi/cpu_hotplug.h" +#include "hw/acpi/cpu.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/acpi_dev_interface.h" +#include "hw/acpi/tco.h" + +typedef struct ICH9LPCPMRegs { + /* + * In ich9 spec says that pm1_cnt register is 32bit width and + * that the upper 16bits are reserved and unused. + * PM1a_CNT_BLK = 2 in FADT so it is defined as uint16_t. + */ + ACPIREGS acpi_regs; + + MemoryRegion io; + MemoryRegion io_gpe; + MemoryRegion io_smi; + + uint32_t smi_en; + uint32_t smi_en_wmask; + uint32_t smi_sts; + + qemu_irq irq; /* SCI */ + + uint32_t pm_io_base; + Notifier powerdown_notifier; + + bool cpu_hotplug_legacy; + AcpiCpuHotplug gpe_cpu; + CPUHotplugState cpuhp_state; + + MemHotplugState acpi_memory_hotplug; + + uint8_t disable_s3; + uint8_t disable_s4; + uint8_t s4_val; + uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; +} ICH9LPCPMRegs; + +#define ACPI_PM_PROP_TCO_ENABLED "enable_tco" + +void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, + bool smm_enabled, + qemu_irq sci_irq); + +void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base); +extern const VMStateDescription vmstate_ich9_pm; + +void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm); + +void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); + +void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); +#endif /* HW_ACPI_ICH9_H */ diff --git a/include/hw/acpi/ipmi.h b/include/hw/acpi/ipmi.h new file mode 100644 index 000000000..c14ad682a --- /dev/null +++ b/include/hw/acpi/ipmi.h @@ -0,0 +1,21 @@ +/* + * QEMU IPMI ACPI handling + * + * Copyright (c) 2015,2016 Corey Minyard + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HW_ACPI_IPMI_H +#define HW_ACPI_IPMI_H + +#include "hw/acpi/aml-build.h" + +/* + * Add ACPI IPMI entries for all registered IPMI devices whose parent + * bus matches the given bus. The resource is the ACPI resource that + * contains the IPMI device, this is required for the I2C CRS. + */ +void build_acpi_ipmi_devices(Aml *table, BusState *bus, const char *resource); + +#endif /* HW_ACPI_IPMI_H */ diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h new file mode 100644 index 000000000..dfe9cf3fd --- /dev/null +++ b/include/hw/acpi/memory_hotplug.h @@ -0,0 +1,57 @@ +#ifndef QEMU_HW_ACPI_MEMORY_HOTPLUG_H +#define QEMU_HW_ACPI_MEMORY_HOTPLUG_H + +#include "hw/qdev-core.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" + +#define MEMORY_SLOT_SCAN_METHOD "MSCN" +#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC" +#define MEMORY_HOTPLUG_IO_LEN 24 + +/** + * MemStatus: + * @is_removing: the memory device in slot has been requested to be ejected. + * + * This structure stores memory device's status. + */ +typedef struct MemStatus { + DeviceState *dimm; + bool is_enabled; + bool is_inserting; + bool is_removing; + uint32_t ost_event; + uint32_t ost_status; +} MemStatus; + +typedef struct MemHotplugState { + bool is_enabled; /* true if memory hotplug is supported */ + MemoryRegion io; + uint32_t selector; + uint32_t dev_count; + MemStatus *devs; +} MemHotplugState; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state, hwaddr io_base); + +void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, + DeviceState *dev, Error **errp); +void acpi_memory_unplug_request_cb(HotplugHandler *hotplug_dev, + MemHotplugState *mem_st, + DeviceState *dev, Error **errp); +void acpi_memory_unplug_cb(MemHotplugState *mem_st, + DeviceState *dev, Error **errp); + +extern const VMStateDescription vmstate_memory_hotplug; +#define VMSTATE_MEMORY_HOTPLUG(memhp, state) \ + VMSTATE_STRUCT(memhp, state, 1, \ + vmstate_memory_hotplug, MemHotplugState) + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list); + +void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs, hwaddr memhp_io_base); +#endif diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h new file mode 100644 index 000000000..31bc9191c --- /dev/null +++ b/include/hw/acpi/pc-hotplug.h @@ -0,0 +1,34 @@ +/* + * QEMU ACPI hotplug utilities shared defines + * + * Copyright (C) 2014 Red Hat Inc + * + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef PC_HOTPLUG_H +#define PC_HOTPLUG_H + +/* + * ONLY DEFINEs are permited in this file since it's shared + * between C and ASL code. + */ + +/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should + * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. + */ +#define ACPI_CPU_HOTPLUG_ID_LIMIT 256 + +/* 256 CPU IDs, 8 bits per entry: */ +#define ACPI_GPE_PROC_LEN 32 + +#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 +#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 +#define CPU_HOTPLUG_RESOURCE_DEVICE PRES + +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 + +#endif diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h new file mode 100644 index 000000000..bf2a3ed0b --- /dev/null +++ b/include/hw/acpi/pci.h @@ -0,0 +1,37 @@ +/* + * Support for generating PCI related ACPI tables and passing them to Guests + * + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2008-2010 Kevin O'Connor + * Copyright (C) 2013-2019 Red Hat Inc + * Copyright (C) 2019 Intel Corporation + * + * Author: Wei Yang + * Author: Michael S. Tsirkin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_ACPI_PCI_H +#define HW_ACPI_PCI_H + +#include "hw/acpi/bios-linker-loader.h" + +typedef struct AcpiMcfgInfo { + uint64_t base; + uint32_t size; +} AcpiMcfgInfo; + +void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info); +#endif diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h new file mode 100644 index 000000000..dfd375820 --- /dev/null +++ b/include/hw/acpi/pcihp.h @@ -0,0 +1,83 @@ +/* + * QEMU<->ACPI BIOS PCI hotplug interface + * + * QEMU supports PCI hotplug via ACPI. This module + * implements the interface between QEMU and the ACPI BIOS. + * Interface specification - see docs/specs/acpi_pci_hotplug.txt + * + * Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) + * Copyright (c) 2006 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_ACPI_PCIHP_H +#define HW_ACPI_PCIHP_H + +#include "hw/acpi/acpi.h" +#include "hw/hotplug.h" + +#define ACPI_PCIHP_IO_BASE_PROP "acpi-pcihp-io-base" +#define ACPI_PCIHP_IO_LEN_PROP "acpi-pcihp-io-len" + +typedef struct AcpiPciHpPciStatus { + uint32_t up; + uint32_t down; + uint32_t hotplug_enable; +} AcpiPciHpPciStatus; + +#define ACPI_PCIHP_PROP_BSEL "acpi-pcihp-bsel" +#define ACPI_PCIHP_MAX_HOTPLUG_BUS 256 +#define ACPI_PCIHP_BSEL_DEFAULT 0x0 + +typedef struct AcpiPciHpState { + AcpiPciHpPciStatus acpi_pcihp_pci_status[ACPI_PCIHP_MAX_HOTPLUG_BUS]; + uint32_t hotplug_select; + PCIBus *root; + MemoryRegion io; + bool legacy_piix; + uint16_t io_base; + uint16_t io_len; +} AcpiPciHpState; + +void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, + MemoryRegion *address_space_io, bool bridges_enabled); + +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + DeviceState *dev, Error **errp); +void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + DeviceState *dev, Error **errp); +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp); + +/* Called on reset */ +void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); + +extern const VMStateDescription vmstate_acpi_pcihp_pci_status; + +#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp) \ + VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \ + test_pcihp), \ + VMSTATE_STRUCT_ARRAY_TEST(pcihp.acpi_pcihp_pci_status, state, \ + ACPI_PCIHP_MAX_HOTPLUG_BUS, \ + test_pcihp, 1, \ + vmstate_acpi_pcihp_pci_status, \ + AcpiPciHpPciStatus) + +#endif diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h new file mode 100644 index 000000000..a1e0da821 --- /dev/null +++ b/include/hw/acpi/tco.h @@ -0,0 +1,82 @@ +/* + * QEMU ICH9 TCO emulation + * + * Copyright (c) 2015 Paulo Alcantara + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_ACPI_TCO_H +#define HW_ACPI_TCO_H + +#include "exec/memory.h" + +/* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */ +#define TCO_TICK_NSEC 600000000LL + +/* TCO I/O register offsets */ +enum { + TCO_RLD = 0x00, + TCO_DAT_IN = 0x02, + TCO_DAT_OUT = 0x03, + TCO1_STS = 0x04, + TCO2_STS = 0x06, + TCO1_CNT = 0x08, + TCO2_CNT = 0x0a, + TCO_MESSAGE1 = 0x0c, + TCO_MESSAGE2 = 0x0d, + TCO_WDCNT = 0x0e, + SW_IRQ_GEN = 0x10, + TCO_TMR = 0x12, +}; + +/* TCO I/O register control/status bits */ +enum { + SW_TCO_SMI = 1 << 1, + TCO_INT_STS = 1 << 2, + TCO_LOCK = 1 << 12, + TCO_TMR_HLT = 1 << 11, + TCO_TIMEOUT = 1 << 3, + TCO_SECOND_TO_STS = 1 << 1, + TCO_BOOT_STS = 1 << 2, +}; + +/* TCO I/O registers mask bits */ +enum { + TCO_RLD_MASK = 0x3ff, + TCO1_STS_MASK = 0xe870, + TCO2_STS_MASK = 0xfff8, + TCO1_CNT_MASK = 0xfeff, + TCO_TMR_MASK = 0x3ff, +}; + +typedef struct TCOIORegs { + struct { + uint16_t rld; + uint8_t din; + uint8_t dout; + uint16_t sts1; + uint16_t sts2; + uint16_t cnt1; + uint16_t cnt2; + uint8_t msg1; + uint8_t msg2; + uint8_t wdcnt; + uint16_t tmr; + } tco; + uint8_t sw_irq_gen; + + QEMUTimer *tco_timer; + int64_t expire_time; + uint8_t timeouts_no; + + MemoryRegion io; +} TCOIORegs; + +/* tco.c */ +void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent); + +extern const VMStateDescription vmstate_tco_io_sts; + +#endif /* HW_ACPI_TCO_H */ diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h new file mode 100644 index 000000000..1a2a57a21 --- /dev/null +++ b/include/hw/acpi/tpm.h @@ -0,0 +1,212 @@ +/* + * tpm.h - TPM ACPI definitions + * + * Copyright (C) 2014 IBM Corporation + * + * Authors: + * Stefan Berger + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Implementation of the TIS interface according to specs found at + * http://www.trustedcomputinggroup.org + * + */ +#ifndef HW_ACPI_TPM_H +#define HW_ACPI_TPM_H + +#include "qemu/units.h" +#include "hw/registerfields.h" +#include "hw/acpi/aml-build.h" +#include "sysemu/tpm.h" + +#define TPM_TIS_ADDR_BASE 0xFED40000 +#define TPM_TIS_ADDR_SIZE 0x5000 + +#define TPM_TIS_IRQ 5 + +#define TPM_TIS_NUM_LOCALITIES 5 /* per spec */ +#define TPM_TIS_LOCALITY_SHIFT 12 + +/* tis registers */ +#define TPM_TIS_REG_ACCESS 0x00 +#define TPM_TIS_REG_INT_ENABLE 0x08 +#define TPM_TIS_REG_INT_VECTOR 0x0c +#define TPM_TIS_REG_INT_STATUS 0x10 +#define TPM_TIS_REG_INTF_CAPABILITY 0x14 +#define TPM_TIS_REG_STS 0x18 +#define TPM_TIS_REG_DATA_FIFO 0x24 +#define TPM_TIS_REG_INTERFACE_ID 0x30 +#define TPM_TIS_REG_DATA_XFIFO 0x80 +#define TPM_TIS_REG_DATA_XFIFO_END 0xbc +#define TPM_TIS_REG_DID_VID 0xf00 +#define TPM_TIS_REG_RID 0xf04 + +/* vendor-specific registers */ +#define TPM_TIS_REG_DEBUG 0xf90 + +#define TPM_TIS_STS_TPM_FAMILY_MASK (0x3 << 26)/* TPM 2.0 */ +#define TPM_TIS_STS_TPM_FAMILY1_2 (0 << 26) /* TPM 2.0 */ +#define TPM_TIS_STS_TPM_FAMILY2_0 (1 << 26) /* TPM 2.0 */ +#define TPM_TIS_STS_RESET_ESTABLISHMENT_BIT (1 << 25) /* TPM 2.0 */ +#define TPM_TIS_STS_COMMAND_CANCEL (1 << 24) /* TPM 2.0 */ + +#define TPM_TIS_STS_VALID (1 << 7) +#define TPM_TIS_STS_COMMAND_READY (1 << 6) +#define TPM_TIS_STS_TPM_GO (1 << 5) +#define TPM_TIS_STS_DATA_AVAILABLE (1 << 4) +#define TPM_TIS_STS_EXPECT (1 << 3) +#define TPM_TIS_STS_SELFTEST_DONE (1 << 2) +#define TPM_TIS_STS_RESPONSE_RETRY (1 << 1) + +#define TPM_TIS_BURST_COUNT_SHIFT 8 +#define TPM_TIS_BURST_COUNT(X) \ + ((X) << TPM_TIS_BURST_COUNT_SHIFT) + +#define TPM_TIS_ACCESS_TPM_REG_VALID_STS (1 << 7) +#define TPM_TIS_ACCESS_ACTIVE_LOCALITY (1 << 5) +#define TPM_TIS_ACCESS_BEEN_SEIZED (1 << 4) +#define TPM_TIS_ACCESS_SEIZE (1 << 3) +#define TPM_TIS_ACCESS_PENDING_REQUEST (1 << 2) +#define TPM_TIS_ACCESS_REQUEST_USE (1 << 1) +#define TPM_TIS_ACCESS_TPM_ESTABLISHMENT (1 << 0) + +#define TPM_TIS_INT_ENABLED (1 << 31) +#define TPM_TIS_INT_DATA_AVAILABLE (1 << 0) +#define TPM_TIS_INT_STS_VALID (1 << 1) +#define TPM_TIS_INT_LOCALITY_CHANGED (1 << 2) +#define TPM_TIS_INT_COMMAND_READY (1 << 7) + +#define TPM_TIS_INT_POLARITY_MASK (3 << 3) +#define TPM_TIS_INT_POLARITY_LOW_LEVEL (1 << 3) + +#define TPM_TIS_INTERRUPTS_SUPPORTED (TPM_TIS_INT_LOCALITY_CHANGED | \ + TPM_TIS_INT_DATA_AVAILABLE | \ + TPM_TIS_INT_STS_VALID | \ + TPM_TIS_INT_COMMAND_READY) + +#define TPM_TIS_CAP_INTERFACE_VERSION1_3 (2 << 28) +#define TPM_TIS_CAP_INTERFACE_VERSION1_3_FOR_TPM2_0 (3 << 28) +#define TPM_TIS_CAP_DATA_TRANSFER_64B (3 << 9) +#define TPM_TIS_CAP_DATA_TRANSFER_LEGACY (0 << 9) +#define TPM_TIS_CAP_BURST_COUNT_DYNAMIC (0 << 8) +#define TPM_TIS_CAP_INTERRUPT_LOW_LEVEL (1 << 4) /* support is mandatory */ +#define TPM_TIS_CAPABILITIES_SUPPORTED1_3 \ + (TPM_TIS_CAP_INTERRUPT_LOW_LEVEL | \ + TPM_TIS_CAP_BURST_COUNT_DYNAMIC | \ + TPM_TIS_CAP_DATA_TRANSFER_64B | \ + TPM_TIS_CAP_INTERFACE_VERSION1_3 | \ + TPM_TIS_INTERRUPTS_SUPPORTED) + +#define TPM_TIS_CAPABILITIES_SUPPORTED2_0 \ + (TPM_TIS_CAP_INTERRUPT_LOW_LEVEL | \ + TPM_TIS_CAP_BURST_COUNT_DYNAMIC | \ + TPM_TIS_CAP_DATA_TRANSFER_64B | \ + TPM_TIS_CAP_INTERFACE_VERSION1_3_FOR_TPM2_0 | \ + TPM_TIS_INTERRUPTS_SUPPORTED) + +#define TPM_TIS_IFACE_ID_INTERFACE_TIS1_3 (0xf) /* TPM 2.0 */ +#define TPM_TIS_IFACE_ID_INTERFACE_FIFO (0x0) /* TPM 2.0 */ +#define TPM_TIS_IFACE_ID_INTERFACE_VER_FIFO (0 << 4) /* TPM 2.0 */ +#define TPM_TIS_IFACE_ID_CAP_5_LOCALITIES (1 << 8) /* TPM 2.0 */ +#define TPM_TIS_IFACE_ID_CAP_TIS_SUPPORTED (1 << 13) /* TPM 2.0 */ +#define TPM_TIS_IFACE_ID_INT_SEL_LOCK (1 << 19) /* TPM 2.0 */ + +#define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS1_3 \ + (TPM_TIS_IFACE_ID_INTERFACE_TIS1_3 | \ + (~0u << 4)/* all of it is don't care */) + +/* if backend was a TPM 2.0: */ +#define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0 \ + (TPM_TIS_IFACE_ID_INTERFACE_FIFO | \ + TPM_TIS_IFACE_ID_INTERFACE_VER_FIFO | \ + TPM_TIS_IFACE_ID_CAP_5_LOCALITIES | \ + TPM_TIS_IFACE_ID_CAP_TIS_SUPPORTED) + +#define TPM_TIS_TPM_DID 0x0001 +#define TPM_TIS_TPM_VID PCI_VENDOR_ID_IBM +#define TPM_TIS_TPM_RID 0x0001 + +#define TPM_TIS_NO_DATA_BYTE 0xff + + +REG32(CRB_LOC_STATE, 0x00) + FIELD(CRB_LOC_STATE, tpmEstablished, 0, 1) + FIELD(CRB_LOC_STATE, locAssigned, 1, 1) + FIELD(CRB_LOC_STATE, activeLocality, 2, 3) + FIELD(CRB_LOC_STATE, reserved, 5, 2) + FIELD(CRB_LOC_STATE, tpmRegValidSts, 7, 1) +REG32(CRB_LOC_CTRL, 0x08) +REG32(CRB_LOC_STS, 0x0C) + FIELD(CRB_LOC_STS, Granted, 0, 1) + FIELD(CRB_LOC_STS, beenSeized, 1, 1) +REG32(CRB_INTF_ID, 0x30) + FIELD(CRB_INTF_ID, InterfaceType, 0, 4) + FIELD(CRB_INTF_ID, InterfaceVersion, 4, 4) + FIELD(CRB_INTF_ID, CapLocality, 8, 1) + FIELD(CRB_INTF_ID, CapCRBIdleBypass, 9, 1) + FIELD(CRB_INTF_ID, Reserved1, 10, 1) + FIELD(CRB_INTF_ID, CapDataXferSizeSupport, 11, 2) + FIELD(CRB_INTF_ID, CapFIFO, 13, 1) + FIELD(CRB_INTF_ID, CapCRB, 14, 1) + FIELD(CRB_INTF_ID, CapIFRes, 15, 2) + FIELD(CRB_INTF_ID, InterfaceSelector, 17, 2) + FIELD(CRB_INTF_ID, IntfSelLock, 19, 1) + FIELD(CRB_INTF_ID, Reserved2, 20, 4) + FIELD(CRB_INTF_ID, RID, 24, 8) +REG32(CRB_INTF_ID2, 0x34) + FIELD(CRB_INTF_ID2, VID, 0, 16) + FIELD(CRB_INTF_ID2, DID, 16, 16) +REG32(CRB_CTRL_EXT, 0x38) +REG32(CRB_CTRL_REQ, 0x40) +REG32(CRB_CTRL_STS, 0x44) + FIELD(CRB_CTRL_STS, tpmSts, 0, 1) + FIELD(CRB_CTRL_STS, tpmIdle, 1, 1) +REG32(CRB_CTRL_CANCEL, 0x48) +REG32(CRB_CTRL_START, 0x4C) +REG32(CRB_INT_ENABLED, 0x50) +REG32(CRB_INT_STS, 0x54) +REG32(CRB_CTRL_CMD_SIZE, 0x58) +REG32(CRB_CTRL_CMD_LADDR, 0x5C) +REG32(CRB_CTRL_CMD_HADDR, 0x60) +REG32(CRB_CTRL_RSP_SIZE, 0x64) +REG32(CRB_CTRL_RSP_ADDR, 0x68) +REG32(CRB_DATA_BUFFER, 0x80) + +#define TPM_CRB_ADDR_BASE 0xFED40000 +#define TPM_CRB_ADDR_SIZE 0x1000 +#define TPM_CRB_ADDR_CTRL (TPM_CRB_ADDR_BASE + A_CRB_CTRL_REQ) +#define TPM_CRB_R_MAX R_CRB_DATA_BUFFER + +#define TPM_LOG_AREA_MINIMUM_SIZE (64 * KiB) + +#define TPM_TCPA_ACPI_CLASS_CLIENT 0 +#define TPM_TCPA_ACPI_CLASS_SERVER 1 + +#define TPM2_ACPI_CLASS_CLIENT 0 +#define TPM2_ACPI_CLASS_SERVER 1 + +#define TPM2_START_METHOD_MMIO 6 +#define TPM2_START_METHOD_CRB 7 + +/* + * Physical Presence Interface + */ +#define TPM_PPI_ADDR_SIZE 0x400 +#define TPM_PPI_ADDR_BASE 0xFED45000 + +#define TPM_PPI_VERSION_NONE 0 +#define TPM_PPI_VERSION_1_30 1 + +/* whether function is blocked by BIOS settings; bits 0, 1, 2 */ +#define TPM_PPI_FUNC_NOT_IMPLEMENTED (0 << 0) +#define TPM_PPI_FUNC_BIOS_ONLY (1 << 0) +#define TPM_PPI_FUNC_BLOCKED (2 << 0) +#define TPM_PPI_FUNC_ALLOWED_USR_REQ (3 << 0) +#define TPM_PPI_FUNC_ALLOWED_USR_NOT_REQ (4 << 0) +#define TPM_PPI_FUNC_MASK (7 << 0) + +void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev); + +#endif /* HW_ACPI_TPM_H */ diff --git a/include/hw/acpi/utils.h b/include/hw/acpi/utils.h new file mode 100644 index 000000000..140b4de60 --- /dev/null +++ b/include/hw/acpi/utils.h @@ -0,0 +1,9 @@ +#ifndef HW_ACPI_UTILS_H +#define HW_ACPI_UTILS_H + +#include "hw/nvram/fw_cfg.h" + +MemoryRegion *acpi_add_rom_blob(FWCfgCallback update, void *opaque, + GArray *blob, const char *name, + uint64_t max_size); +#endif diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h new file mode 100644 index 000000000..cb4ad37fc --- /dev/null +++ b/include/hw/acpi/vmgenid.h @@ -0,0 +1,37 @@ +#ifndef ACPI_VMGENID_H +#define ACPI_VMGENID_H + +#include "hw/acpi/bios-linker-loader.h" +#include "hw/qdev-core.h" +#include "qemu/uuid.h" +#include "qom/object.h" + +#define TYPE_VMGENID "vmgenid" +#define VMGENID_GUID "guid" +#define VMGENID_GUID_FW_CFG_FILE "etc/vmgenid_guid" +#define VMGENID_ADDR_FW_CFG_FILE "etc/vmgenid_addr" + +#define VMGENID_FW_CFG_SIZE 4096 /* Occupy a page of memory */ +#define VMGENID_GUID_OFFSET 40 /* allow space for + * OVMF SDT Header Probe Supressor + */ + +OBJECT_DECLARE_SIMPLE_TYPE(VmGenIdState, VMGENID) + +struct VmGenIdState { + DeviceState parent_obj; + QemuUUID guid; /* The 128-bit GUID seen by the guest */ + uint8_t vmgenid_addr_le[8]; /* Address of the GUID (little-endian) */ +}; + +/* returns NULL unless there is exactly one device */ +static inline Object *find_vmgenid_dev(void) +{ + return object_resolve_path_type("", TYPE_VMGENID, NULL); +} + +void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, + BIOSLinker *linker); +void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid); + +#endif diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h new file mode 100644 index 000000000..42b48981f --- /dev/null +++ b/include/hw/adc/stm32f2xx_adc.h @@ -0,0 +1,89 @@ +/* + * STM32F2XX ADC + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM32F2XX_ADC_H +#define HW_STM32F2XX_ADC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define ADC_SR 0x00 +#define ADC_CR1 0x04 +#define ADC_CR2 0x08 +#define ADC_SMPR1 0x0C +#define ADC_SMPR2 0x10 +#define ADC_JOFR1 0x14 +#define ADC_JOFR2 0x18 +#define ADC_JOFR3 0x1C +#define ADC_JOFR4 0x20 +#define ADC_HTR 0x24 +#define ADC_LTR 0x28 +#define ADC_SQR1 0x2C +#define ADC_SQR2 0x30 +#define ADC_SQR3 0x34 +#define ADC_JSQR 0x38 +#define ADC_JDR1 0x3C +#define ADC_JDR2 0x40 +#define ADC_JDR3 0x44 +#define ADC_JDR4 0x48 +#define ADC_DR 0x4C + +#define ADC_CR2_ADON 0x01 +#define ADC_CR2_CONT 0x02 +#define ADC_CR2_ALIGN 0x800 +#define ADC_CR2_SWSTART 0x40000000 + +#define ADC_CR1_RES 0x3000000 + +#define ADC_COMMON_ADDRESS 0x100 + +#define TYPE_STM32F2XX_ADC "stm32f2xx-adc" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXADCState, STM32F2XX_ADC) + +struct STM32F2XXADCState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t adc_sr; + uint32_t adc_cr1; + uint32_t adc_cr2; + uint32_t adc_smpr1; + uint32_t adc_smpr2; + uint32_t adc_jofr[4]; + uint32_t adc_htr; + uint32_t adc_ltr; + uint32_t adc_sqr1; + uint32_t adc_sqr2; + uint32_t adc_sqr3; + uint32_t adc_jsqr; + uint32_t adc_jdr[4]; + uint32_t adc_dr; + + qemu_irq irq; +}; + +#endif /* HW_STM32F2XX_ADC_H */ diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h new file mode 100644 index 000000000..a76dc7b84 --- /dev/null +++ b/include/hw/arm/allwinner-a10.h @@ -0,0 +1,44 @@ +#ifndef HW_ARM_ALLWINNER_A10_H +#define HW_ARM_ALLWINNER_A10_H + +#include "qemu/error-report.h" +#include "hw/char/serial.h" +#include "hw/arm/boot.h" +#include "hw/timer/allwinner-a10-pit.h" +#include "hw/intc/allwinner-a10-pic.h" +#include "hw/net/allwinner_emac.h" +#include "hw/sd/allwinner-sdhost.h" +#include "hw/ide/ahci.h" +#include "hw/usb/hcd-ohci.h" +#include "hw/usb/hcd-ehci.h" +#include "hw/rtc/allwinner-rtc.h" + +#include "target/arm/cpu.h" +#include "qom/object.h" + + +#define AW_A10_SDRAM_BASE 0x40000000 + +#define AW_A10_NUM_USB 2 + +#define TYPE_AW_A10 "allwinner-a10" +OBJECT_DECLARE_SIMPLE_TYPE(AwA10State, AW_A10) + +struct AwA10State { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + ARMCPU cpu; + AwA10PITState timer; + AwA10PICState intc; + AwEmacState emac; + AllwinnerAHCIState sata; + AwSdHostState mmc0; + AwRtcState rtc; + MemoryRegion sram_a; + EHCISysBusState ehci[AW_A10_NUM_USB]; + OHCISysBusState ohci[AW_A10_NUM_USB]; +}; + +#endif diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h new file mode 100644 index 000000000..cc308a5d2 --- /dev/null +++ b/include/hw/arm/allwinner-h3.h @@ -0,0 +1,161 @@ +/* + * Allwinner H3 System on Chip emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * The Allwinner H3 is a System on Chip containing four ARM Cortex A7 + * processor cores. Features and specifications include DDR2/DDR3 memory, + * SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and + * various I/O modules. + * + * This implementation is based on the following datasheet: + * + * https://linux-sunxi.org/File:Allwinner_H3_Datasheet_V1.2.pdf + * + * The latest datasheet and more info can be found on the Linux Sunxi wiki: + * + * https://linux-sunxi.org/H3 + */ + +#ifndef HW_ARM_ALLWINNER_H3_H +#define HW_ARM_ALLWINNER_H3_H + +#include "qom/object.h" +#include "hw/arm/boot.h" +#include "hw/timer/allwinner-a10-pit.h" +#include "hw/intc/arm_gic.h" +#include "hw/misc/allwinner-h3-ccu.h" +#include "hw/misc/allwinner-cpucfg.h" +#include "hw/misc/allwinner-h3-dramc.h" +#include "hw/misc/allwinner-h3-sysctrl.h" +#include "hw/misc/allwinner-sid.h" +#include "hw/sd/allwinner-sdhost.h" +#include "hw/net/allwinner-sun8i-emac.h" +#include "hw/rtc/allwinner-rtc.h" +#include "target/arm/cpu.h" +#include "sysemu/block-backend.h" + +/** + * Allwinner H3 device list + * + * This enumeration is can be used refer to a particular device in the + * Allwinner H3 SoC. For example, the physical memory base address for + * each device can be found in the AwH3State object in the memmap member + * using the device enum value as index. + * + * @see AwH3State + */ +enum { + AW_H3_DEV_SRAM_A1, + AW_H3_DEV_SRAM_A2, + AW_H3_DEV_SRAM_C, + AW_H3_DEV_SYSCTRL, + AW_H3_DEV_MMC0, + AW_H3_DEV_SID, + AW_H3_DEV_EHCI0, + AW_H3_DEV_OHCI0, + AW_H3_DEV_EHCI1, + AW_H3_DEV_OHCI1, + AW_H3_DEV_EHCI2, + AW_H3_DEV_OHCI2, + AW_H3_DEV_EHCI3, + AW_H3_DEV_OHCI3, + AW_H3_DEV_CCU, + AW_H3_DEV_PIT, + AW_H3_DEV_UART0, + AW_H3_DEV_UART1, + AW_H3_DEV_UART2, + AW_H3_DEV_UART3, + AW_H3_DEV_EMAC, + AW_H3_DEV_DRAMCOM, + AW_H3_DEV_DRAMCTL, + AW_H3_DEV_DRAMPHY, + AW_H3_DEV_GIC_DIST, + AW_H3_DEV_GIC_CPU, + AW_H3_DEV_GIC_HYP, + AW_H3_DEV_GIC_VCPU, + AW_H3_DEV_RTC, + AW_H3_DEV_CPUCFG, + AW_H3_DEV_SDRAM +}; + +/** Total number of CPU cores in the H3 SoC */ +#define AW_H3_NUM_CPUS (4) + +/** + * Allwinner H3 object model + * @{ + */ + +/** Object type for the Allwinner H3 SoC */ +#define TYPE_AW_H3 "allwinner-h3" + +/** Convert input object to Allwinner H3 state object */ +OBJECT_DECLARE_SIMPLE_TYPE(AwH3State, AW_H3) + +/** @} */ + +/** + * Allwinner H3 object + * + * This struct contains the state of all the devices + * which are currently emulated by the H3 SoC code. + */ +struct AwH3State { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + ARMCPU cpus[AW_H3_NUM_CPUS]; + const hwaddr *memmap; + AwA10PITState timer; + AwH3ClockCtlState ccu; + AwCpuCfgState cpucfg; + AwH3DramCtlState dramc; + AwH3SysCtrlState sysctrl; + AwSidState sid; + AwSdHostState mmc0; + AwSun8iEmacState emac; + AwRtcState rtc; + GICState gic; + MemoryRegion sram_a1; + MemoryRegion sram_a2; + MemoryRegion sram_c; +}; + +/** + * Emulate Boot ROM firmware setup functionality. + * + * A real Allwinner H3 SoC contains a Boot ROM + * which is the first code that runs right after + * the SoC is powered on. The Boot ROM is responsible + * for loading user code (e.g. a bootloader) from any + * of the supported external devices and writing the + * downloaded code to internal SRAM. After loading the SoC + * begins executing the code written to SRAM. + * + * This function emulates the Boot ROM by copying 32 KiB + * of data from the given block device and writes it to + * the start of the first internal SRAM memory. + * + * @s: Allwinner H3 state object pointer + * @blk: Block backend device object pointer + */ +void allwinner_h3_bootrom_setup(AwH3State *s, BlockBackend *blk); + +#endif /* HW_ARM_ALLWINNER_H3_H */ diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h new file mode 100644 index 000000000..77f86771c --- /dev/null +++ b/include/hw/arm/armsse.h @@ -0,0 +1,230 @@ +/* + * ARM SSE (Subsystems for Embedded): IoTKit, SSE-200 + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the Arm "Subsystems for Embedded" family of + * hardware, which include the IoT Kit and the SSE-050, SSE-100 and + * SSE-200. Currently we model: + * - the Arm IoT Kit which is documented in + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ecm0601256/index.html + * - the SSE-200 which is documented in + * http://infocenter.arm.com/help/topic/com.arm.doc.101104_0100_00_en/corelink_sse200_subsystem_for_embedded_technical_reference_manual_101104_0100_00_en.pdf + * + * The IoTKit contains: + * a Cortex-M33 + * the IDAU + * some timers and watchdogs + * two peripheral protection controllers + * a memory protection controller + * a security controller + * a bus fabric which arranges that some parts of the address + * space are secure and non-secure aliases of each other + * The SSE-200 additionally contains: + * a second Cortex-M33 + * two Message Handling Units (MHUs) + * an optional CryptoCell (which we do not model) + * more SRAM banks with associated MPCs + * multiple Power Policy Units (PPUs) + * a control interface for an icache for each CPU + * per-CPU identity and control register blocks + * + * QEMU interface: + * + QOM property "memory" is a MemoryRegion containing the devices provided + * by the board model. + * + QOM property "MAINCLK" is the frequency of the main system clock + * + QOM property "EXP_NUMIRQ" sets the number of expansion interrupts. + * (In hardware, the SSE-200 permits the number of expansion interrupts + * for the two CPUs to be configured separately, but we restrict it to + * being the same for both, to avoid having to have separate Property + * lists for different variants. This restriction can be relaxed later + * if necessary.) + * + QOM property "SRAM_ADDR_WIDTH" sets the number of bits used for the + * address of each SRAM bank (and thus the total amount of internal SRAM) + * + QOM property "init-svtor" sets the initial value of the CPU SVTOR register + * (where it expects to load the PC and SP from the vector table on reset) + * + QOM properties "CPU0_FPU", "CPU0_DSP", "CPU1_FPU" and "CPU1_DSP" which + * set whether the CPUs have the FPU and DSP features present. The default + * (matching the hardware) is that for CPU0 in an IoTKit and CPU1 in an + * SSE-200 both are present; CPU0 in an SSE-200 has neither. + * Since the IoTKit has only one CPU, it does not have the CPU1_* properties. + * + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0, + * which are wired to its NVIC lines 32 .. n+32 + * + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for + * CPU 1, which are wired to its NVIC lines 32 .. n+32 + * + sysbus MMIO region 0 is the "AHB Slave Expansion" which allows + * bus master devices in the board model to make transactions into + * all the devices and memory areas in the IoTKit + * Controlling up to 4 AHB expansion PPBs which a system using the IoTKit + * might provide: + * + named GPIO outputs apb_ppcexp{0,1,2,3}_nonsec[0..15] + * + named GPIO outputs apb_ppcexp{0,1,2,3}_ap[0..15] + * + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_enable + * + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_clear + * + named GPIO inputs apb_ppcexp{0,1,2,3}_irq_status + * Controlling each of the 4 expansion AHB PPCs which a system using the IoTKit + * might provide: + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_nonsec[0..15] + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_ap[0..15] + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear + * + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status + * Controlling each of the 16 expansion MPCs which a system using the IoTKit + * might provide: + * + named GPIO inputs mpcexp_status[0..15] + * Controlling each of the 16 expansion MSCs which a system using the IoTKit + * might provide: + * + named GPIO inputs mscexp_status[0..15] + * + named GPIO outputs mscexp_clear[0..15] + * + named GPIO outputs mscexp_ns[0..15] + */ + +#ifndef ARMSSE_H +#define ARMSSE_H + +#include "hw/sysbus.h" +#include "hw/arm/armv7m.h" +#include "hw/misc/iotkit-secctl.h" +#include "hw/misc/tz-ppc.h" +#include "hw/misc/tz-mpc.h" +#include "hw/timer/cmsdk-apb-timer.h" +#include "hw/timer/cmsdk-apb-dualtimer.h" +#include "hw/watchdog/cmsdk-apb-watchdog.h" +#include "hw/misc/iotkit-sysctl.h" +#include "hw/misc/iotkit-sysinfo.h" +#include "hw/misc/armsse-cpuid.h" +#include "hw/misc/armsse-mhu.h" +#include "hw/misc/unimp.h" +#include "hw/or-irq.h" +#include "hw/core/split-irq.h" +#include "hw/cpu/cluster.h" +#include "qom/object.h" + +#define TYPE_ARM_SSE "arm-sse" +OBJECT_DECLARE_TYPE(ARMSSE, ARMSSEClass, + ARM_SSE) + +/* + * These type names are for specific IoTKit subsystems; other than + * instantiating them, code using these devices should always handle + * them via the ARMSSE base class, so they have no IOTKIT() etc macros. + */ +#define TYPE_IOTKIT "iotkit" +#define TYPE_SSE200 "sse-200" + +/* We have an IRQ splitter and an OR gate input for each external PPC + * and the 2 internal PPCs + */ +#define NUM_EXTERNAL_PPCS (IOTS_NUM_AHB_EXP_PPC + IOTS_NUM_APB_EXP_PPC) +#define NUM_PPCS (NUM_EXTERNAL_PPCS + 2) + +#define MAX_SRAM_BANKS 4 +#if MAX_SRAM_BANKS > IOTS_NUM_MPC +#error Too many SRAM banks +#endif + +#define SSE_MAX_CPUS 2 + +/* These define what each PPU in the ppu[] index is for */ +#define CPU0CORE_PPU 0 +#define CPU1CORE_PPU 1 +#define DBG_PPU 2 +#define RAM0_PPU 3 +#define RAM1_PPU 4 +#define RAM2_PPU 5 +#define RAM3_PPU 6 +#define NUM_PPUS 7 + +struct ARMSSE { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + ARMv7MState armv7m[SSE_MAX_CPUS]; + CPUClusterState cluster[SSE_MAX_CPUS]; + IoTKitSecCtl secctl; + TZPPC apb_ppc0; + TZPPC apb_ppc1; + TZMPC mpc[IOTS_NUM_MPC]; + CMSDKAPBTIMER timer0; + CMSDKAPBTIMER timer1; + CMSDKAPBTIMER s32ktimer; + qemu_or_irq ppc_irq_orgate; + SplitIRQ sec_resp_splitter; + SplitIRQ ppc_irq_splitter[NUM_PPCS]; + SplitIRQ mpc_irq_splitter[IOTS_NUM_EXP_MPC + IOTS_NUM_MPC]; + qemu_or_irq mpc_irq_orgate; + qemu_or_irq nmi_orgate; + + SplitIRQ cpu_irq_splitter[32]; + + CMSDKAPBDualTimer dualtimer; + + CMSDKAPBWatchdog s32kwatchdog; + CMSDKAPBWatchdog nswatchdog; + CMSDKAPBWatchdog swatchdog; + + IoTKitSysCtl sysctl; + IoTKitSysCtl sysinfo; + + ARMSSEMHU mhu[2]; + UnimplementedDeviceState ppu[NUM_PPUS]; + UnimplementedDeviceState cachectrl[SSE_MAX_CPUS]; + UnimplementedDeviceState cpusecctrl[SSE_MAX_CPUS]; + + ARMSSECPUID cpuid[SSE_MAX_CPUS]; + + /* + * 'container' holds all devices seen by all CPUs. + * 'cpu_container[i]' is the view that CPU i has: this has the + * per-CPU devices of that CPU, plus as the background 'container' + * (or an alias of it, since we can only use it directly once). + * container_alias[i] is the alias of 'container' used by CPU i+1; + * CPU 0 can use 'container' directly. + */ + MemoryRegion container; + MemoryRegion container_alias[SSE_MAX_CPUS - 1]; + MemoryRegion cpu_container[SSE_MAX_CPUS]; + MemoryRegion alias1; + MemoryRegion alias2; + MemoryRegion alias3[SSE_MAX_CPUS]; + MemoryRegion sram[MAX_SRAM_BANKS]; + + qemu_irq *exp_irqs[SSE_MAX_CPUS]; + qemu_irq ppc0_irq; + qemu_irq ppc1_irq; + qemu_irq sec_resp_cfg; + qemu_irq sec_resp_cfg_in; + qemu_irq nsc_cfg_in; + + qemu_irq irq_status_in[NUM_EXTERNAL_PPCS]; + qemu_irq mpcexp_status_in[IOTS_NUM_EXP_MPC]; + + uint32_t nsccfg; + + /* Properties */ + MemoryRegion *board_memory; + uint32_t exp_numirq; + uint32_t mainclk_frq; + uint32_t sram_addr_width; + uint32_t init_svtor; + bool cpu_fpu[SSE_MAX_CPUS]; + bool cpu_dsp[SSE_MAX_CPUS]; +}; + +typedef struct ARMSSEInfo ARMSSEInfo; + +struct ARMSSEClass { + SysBusDeviceClass parent_class; + const ARMSSEInfo *info; +}; + + +#endif diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h new file mode 100644 index 000000000..0791dcb68 --- /dev/null +++ b/include/hw/arm/armv7m.h @@ -0,0 +1,78 @@ +/* + * ARMv7M CPU object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_H +#define HW_ARM_ARMV7M_H + +#include "hw/sysbus.h" +#include "hw/intc/armv7m_nvic.h" +#include "target/arm/idau.h" +#include "qom/object.h" + +#define TYPE_BITBAND "ARM,bitband-memory" +OBJECT_DECLARE_SIMPLE_TYPE(BitBandState, BITBAND) + +struct BitBandState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + AddressSpace source_as; + MemoryRegion iomem; + uint32_t base; + MemoryRegion *source_memory; +}; + +#define TYPE_ARMV7M "armv7m" +OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M) + +#define ARMV7M_NUM_BITBANDS 2 + +/* ARMv7M container object. + * + Unnamed GPIO input lines: external IRQ lines for the NVIC + * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ. + * If this GPIO is not wired up then the NVIC will default to performing + * a qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET). + * + Property "cpu-type": CPU type to instantiate + * + Property "num-irq": number of external IRQ lines + * + Property "memory": MemoryRegion defining the physical address space + * that CPU accesses see. (The NVIC, bitbanding and other CPU-internal + * devices will be automatically layered on top of this view.) + * + Property "idau": IDAU interface (forwarded to CPU object) + * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object) + * + Property "vfp": enable VFP (forwarded to CPU object) + * + Property "dsp": enable DSP (forwarded to CPU object) + * + Property "enable-bitband": expose bitbanded IO + */ +struct ARMv7MState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + NVICState nvic; + BitBandState bitband[ARMV7M_NUM_BITBANDS]; + ARMCPU *cpu; + + /* MemoryRegion we pass to the CPU, with our devices layered on + * top of the ones the board provides in board_memory. + */ + MemoryRegion container; + + /* Properties */ + char *cpu_type; + /* MemoryRegion the board provides to us (with its devices, RAM, etc) */ + MemoryRegion *board_memory; + Object *idau; + uint32_t init_svtor; + bool enable_bitband; + bool start_powered_off; + bool vfp; + bool dsp; +}; + +#endif diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h new file mode 100644 index 000000000..c9747b15f --- /dev/null +++ b/include/hw/arm/aspeed.h @@ -0,0 +1,44 @@ +/* + * Aspeed Machines + * + * Copyright 2018 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ +#ifndef ARM_ASPEED_H +#define ARM_ASPEED_H + +#include "hw/boards.h" +#include "qom/object.h" + +typedef struct AspeedMachineState AspeedMachineState; + +#define TYPE_ASPEED_MACHINE MACHINE_TYPE_NAME("aspeed") +typedef struct AspeedMachineClass AspeedMachineClass; +DECLARE_OBJ_CHECKERS(AspeedMachineState, AspeedMachineClass, + ASPEED_MACHINE, TYPE_ASPEED_MACHINE) + +#define ASPEED_MAC0_ON (1 << 0) +#define ASPEED_MAC1_ON (1 << 1) +#define ASPEED_MAC2_ON (1 << 2) +#define ASPEED_MAC3_ON (1 << 3) + + +struct AspeedMachineClass { + MachineClass parent_obj; + + const char *name; + const char *desc; + const char *soc_name; + uint32_t hw_strap1; + uint32_t hw_strap2; + const char *fmc_model; + const char *spi_model; + uint32_t num_cs; + uint32_t macs_mask; + void (*i2c_init)(AspeedMachineState *bmc); +}; + + +#endif diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h new file mode 100644 index 000000000..11cfe6e35 --- /dev/null +++ b/include/hw/arm/aspeed_soc.h @@ -0,0 +1,135 @@ +/* + * ASPEED SoC family + * + * Andrew Jeffery + * + * Copyright 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef ASPEED_SOC_H +#define ASPEED_SOC_H + +#include "hw/cpu/a15mpcore.h" +#include "hw/intc/aspeed_vic.h" +#include "hw/misc/aspeed_scu.h" +#include "hw/misc/aspeed_sdmc.h" +#include "hw/misc/aspeed_xdma.h" +#include "hw/timer/aspeed_timer.h" +#include "hw/rtc/aspeed_rtc.h" +#include "hw/i2c/aspeed_i2c.h" +#include "hw/ssi/aspeed_smc.h" +#include "hw/watchdog/wdt_aspeed.h" +#include "hw/net/ftgmac100.h" +#include "target/arm/cpu.h" +#include "hw/gpio/aspeed_gpio.h" +#include "hw/sd/aspeed_sdhci.h" +#include "hw/usb/hcd-ehci.h" +#include "qom/object.h" + +#define ASPEED_SPIS_NUM 2 +#define ASPEED_EHCIS_NUM 2 +#define ASPEED_WDTS_NUM 4 +#define ASPEED_CPUS_NUM 2 +#define ASPEED_MACS_NUM 4 + +struct AspeedSoCState { + /*< private >*/ + DeviceState parent; + + /*< public >*/ + ARMCPU cpu[ASPEED_CPUS_NUM]; + A15MPPrivState a7mpcore; + MemoryRegion *dram_mr; + MemoryRegion sram; + AspeedVICState vic; + AspeedRtcState rtc; + AspeedTimerCtrlState timerctrl; + AspeedI2CState i2c; + AspeedSCUState scu; + AspeedXDMAState xdma; + AspeedSMCState fmc; + AspeedSMCState spi[ASPEED_SPIS_NUM]; + EHCISysBusState ehci[ASPEED_EHCIS_NUM]; + AspeedSDMCState sdmc; + AspeedWDTState wdt[ASPEED_WDTS_NUM]; + FTGMAC100State ftgmac100[ASPEED_MACS_NUM]; + AspeedMiiState mii[ASPEED_MACS_NUM]; + AspeedGPIOState gpio; + AspeedGPIOState gpio_1_8v; + AspeedSDHCIState sdhci; + AspeedSDHCIState emmc; +}; + +#define TYPE_ASPEED_SOC "aspeed-soc" +OBJECT_DECLARE_TYPE(AspeedSoCState, AspeedSoCClass, ASPEED_SOC) + +struct AspeedSoCClass { + DeviceClass parent_class; + + const char *name; + const char *cpu_type; + uint32_t silicon_rev; + uint64_t sram_size; + int spis_num; + int ehcis_num; + int wdts_num; + int macs_num; + const int *irqmap; + const hwaddr *memmap; + uint32_t num_cpus; +}; + + +enum { + ASPEED_DEV_IOMEM, + ASPEED_DEV_UART1, + ASPEED_DEV_UART2, + ASPEED_DEV_UART3, + ASPEED_DEV_UART4, + ASPEED_DEV_UART5, + ASPEED_DEV_VUART, + ASPEED_DEV_FMC, + ASPEED_DEV_SPI1, + ASPEED_DEV_SPI2, + ASPEED_DEV_EHCI1, + ASPEED_DEV_EHCI2, + ASPEED_DEV_VIC, + ASPEED_DEV_SDMC, + ASPEED_DEV_SCU, + ASPEED_DEV_ADC, + ASPEED_DEV_VIDEO, + ASPEED_DEV_SRAM, + ASPEED_DEV_SDHCI, + ASPEED_DEV_GPIO, + ASPEED_DEV_GPIO_1_8V, + ASPEED_DEV_RTC, + ASPEED_DEV_TIMER1, + ASPEED_DEV_TIMER2, + ASPEED_DEV_TIMER3, + ASPEED_DEV_TIMER4, + ASPEED_DEV_TIMER5, + ASPEED_DEV_TIMER6, + ASPEED_DEV_TIMER7, + ASPEED_DEV_TIMER8, + ASPEED_DEV_WDT, + ASPEED_DEV_PWM, + ASPEED_DEV_LPC, + ASPEED_DEV_IBT, + ASPEED_DEV_I2C, + ASPEED_DEV_ETH1, + ASPEED_DEV_ETH2, + ASPEED_DEV_ETH3, + ASPEED_DEV_ETH4, + ASPEED_DEV_MII1, + ASPEED_DEV_MII2, + ASPEED_DEV_MII3, + ASPEED_DEV_MII4, + ASPEED_DEV_SDRAM, + ASPEED_DEV_XDMA, + ASPEED_DEV_EMMC, +}; + +#endif /* ASPEED_SOC_H */ diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h new file mode 100644 index 000000000..479e2346e --- /dev/null +++ b/include/hw/arm/bcm2835_peripherals.h @@ -0,0 +1,78 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous + * + * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft + * Written by Andrew Baumann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_PERIPHERALS_H +#define BCM2835_PERIPHERALS_H + +#include "hw/sysbus.h" +#include "hw/char/pl011.h" +#include "hw/char/bcm2835_aux.h" +#include "hw/display/bcm2835_fb.h" +#include "hw/dma/bcm2835_dma.h" +#include "hw/intc/bcm2835_ic.h" +#include "hw/misc/bcm2835_property.h" +#include "hw/misc/bcm2835_rng.h" +#include "hw/misc/bcm2835_mbox.h" +#include "hw/misc/bcm2835_mphi.h" +#include "hw/misc/bcm2835_thermal.h" +#include "hw/misc/bcm2835_cprman.h" +#include "hw/sd/sdhci.h" +#include "hw/sd/bcm2835_sdhost.h" +#include "hw/gpio/bcm2835_gpio.h" +#include "hw/timer/bcm2835_systmr.h" +#include "hw/usb/hcd-dwc2.h" +#include "hw/misc/unimp.h" +#include "qom/object.h" + +#define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PeripheralState, BCM2835_PERIPHERALS) + +struct BCM2835PeripheralState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion peri_mr, peri_mr_alias, gpu_bus_mr, mbox_mr; + MemoryRegion ram_alias[4]; + qemu_irq irq, fiq; + + BCM2835SystemTimerState systmr; + BCM2835MphiState mphi; + UnimplementedDeviceState txp; + UnimplementedDeviceState armtmr; + UnimplementedDeviceState powermgt; + BCM2835CprmanState cprman; + PL011State uart0; + BCM2835AuxState aux; + BCM2835FBState fb; + BCM2835DMAState dma; + BCM2835ICState ic; + BCM2835PropertyState property; + BCM2835RngState rng; + BCM2835MboxState mboxes; + SDHCIState sdhci; + BCM2835SDHostState sdhost; + BCM2835GpioState gpio; + Bcm2835ThermalState thermal; + UnimplementedDeviceState i2s; + UnimplementedDeviceState spi[1]; + UnimplementedDeviceState i2c[3]; + UnimplementedDeviceState otp; + UnimplementedDeviceState dbus; + UnimplementedDeviceState ave0; + UnimplementedDeviceState v3d; + UnimplementedDeviceState bscsl; + UnimplementedDeviceState smi; + DWC2State dwc2; + UnimplementedDeviceState sdramc; +}; + +#endif /* BCM2835_PERIPHERALS_H */ diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h new file mode 100644 index 000000000..6f90cabfa --- /dev/null +++ b/include/hw/arm/bcm2836.h @@ -0,0 +1,47 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous + * + * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft + * Written by Andrew Baumann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2836_H +#define BCM2836_H + +#include "hw/arm/bcm2835_peripherals.h" +#include "hw/intc/bcm2836_control.h" +#include "target/arm/cpu.h" +#include "qom/object.h" + +#define TYPE_BCM283X "bcm283x" +OBJECT_DECLARE_TYPE(BCM283XState, BCM283XClass, BCM283X) + +#define BCM283X_NCPUS 4 + +/* These type names are for specific SoCs; other than instantiating + * them, code using these devices should always handle them via the + * BCM283x base class, so they have no BCM2836(obj) etc macros. + */ +#define TYPE_BCM2835 "bcm2835" +#define TYPE_BCM2836 "bcm2836" +#define TYPE_BCM2837 "bcm2837" + +struct BCM283XState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + uint32_t enabled_cpus; + + struct { + ARMCPU core; + } cpu[BCM283X_NCPUS]; + BCM2836ControlState control; + BCM2835PeripheralState peripherals; +}; + +#endif /* BCM2836_H */ diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h new file mode 100644 index 000000000..ce2b48b88 --- /dev/null +++ b/include/hw/arm/boot.h @@ -0,0 +1,171 @@ +/* + * ARM kernel loader. + * + * Copyright (c) 2006 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the LGPL. + * + */ + +#ifndef HW_ARM_BOOT_H +#define HW_ARM_BOOT_H + +#include "target/arm/cpu-qom.h" +#include "qemu/notify.h" + +typedef enum { + ARM_ENDIANNESS_UNKNOWN = 0, + ARM_ENDIANNESS_LE, + ARM_ENDIANNESS_BE8, + ARM_ENDIANNESS_BE32, +} arm_endianness; + +/** + * armv7m_load_kernel: + * @cpu: CPU + * @kernel_filename: file to load + * @mem_size: mem_size: maximum image size to load + * + * Load the guest image for an ARMv7M system. This must be called by + * any ARMv7M board. (This is necessary to ensure that the CPU resets + * correctly on system reset, as well as for kernel loading.) + */ +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size); + +/* arm_boot.c */ +struct arm_boot_info { + uint64_t ram_size; + const char *kernel_filename; + const char *kernel_cmdline; + const char *initrd_filename; + const char *dtb_filename; + hwaddr loader_start; + hwaddr dtb_start; + hwaddr dtb_limit; + /* If set to True, arm_load_kernel() will not load DTB. + * It allows board to load DTB manually later. + * (default: False) + */ + bool skip_dtb_autoload; + /* multicore boards that use the default secondary core boot functions + * need to put the address of the secondary boot code, the boot reg, + * and the GIC address in the next 3 values, respectively. boards that + * have their own boot functions can use these values as they want. + */ + hwaddr smp_loader_start; + hwaddr smp_bootreg_addr; + hwaddr gic_cpu_if_addr; + int nb_cpus; + int board_id; + /* ARM machines that support the ARM Security Extensions use this field to + * control whether Linux is booted as secure(true) or non-secure(false). + */ + bool secure_boot; + int (*atag_board)(const struct arm_boot_info *info, void *p); + /* multicore boards that use the default secondary core boot functions + * can ignore these two function calls. If the default functions won't + * work, then write_secondary_boot() should write a suitable blob of + * code mimicking the secondary CPU startup process used by the board's + * boot loader/boot ROM code, and secondary_cpu_reset_hook() should + * perform any necessary CPU reset handling and set the PC for the + * secondary CPUs to point at this boot blob. + */ + void (*write_secondary_boot)(ARMCPU *cpu, + const struct arm_boot_info *info); + void (*secondary_cpu_reset_hook)(ARMCPU *cpu, + const struct arm_boot_info *info); + /* if a board is able to create a dtb without a dtb file then it + * sets get_dtb. This will only be used if no dtb file is provided + * by the user. On success, sets *size to the length of the created + * dtb, and returns a pointer to it. (The caller must free this memory + * with g_free() when it has finished with it.) On failure, returns NULL. + */ + void *(*get_dtb)(const struct arm_boot_info *info, int *size); + /* if a board needs to be able to modify a device tree provided by + * the user it should implement this hook. + */ + void (*modify_dtb)(const struct arm_boot_info *info, void *fdt); + /* Used internally by arm_boot.c */ + int is_linux; + hwaddr initrd_start; + hwaddr initrd_size; + hwaddr entry; + + /* Boot firmware has been loaded, typically at address 0, with -bios or + * -pflash. It also implies that fw_cfg_find() will succeed. + */ + bool firmware_loaded; + + /* Address at which board specific loader/setup code exists. If enabled, + * this code-blob will run before anything else. It must return to the + * caller via the link register. There is no stack set up. Enabled by + * defining write_board_setup, which is responsible for loading the blob + * to the specified address. + */ + hwaddr board_setup_addr; + void (*write_board_setup)(ARMCPU *cpu, + const struct arm_boot_info *info); + + /* + * If set, the board specific loader/setup blob will be run from secure + * mode, regardless of secure_boot. The blob becomes responsible for + * changing to non-secure state if implementing a non-secure boot, + * including setting up EL3/Secure registers such as the NSACR as + * required by the Linux booting ABI before the switch to non-secure. + */ + bool secure_board_setup; + + arm_endianness endianness; +}; + +/** + * arm_load_kernel - Loads memory with everything needed to boot + * + * @cpu: handle to the first CPU object + * @info: handle to the boot info struct + * Registers a machine init done notifier that copies to memory + * everything needed to boot, depending on machine and user options: + * kernel image, boot loaders, initrd, dtb. Also registers the CPU + * reset handler. + * + * In case the machine file supports the platform bus device and its + * dynamically instantiable sysbus devices, this function must be called + * before sysbus-fdt arm_register_platform_bus_fdt_creator. Indeed the + * machine init done notifiers are called in registration reverse order. + */ +void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info); + +AddressSpace *arm_boot_address_space(ARMCPU *cpu, + const struct arm_boot_info *info); + +/** + * arm_load_dtb() - load a device tree binary image into memory + * @addr: the address to load the image at + * @binfo: struct describing the boot environment + * @addr_limit: upper limit of the available memory area at @addr + * @as: address space to load image to + * + * Load a device tree supplied by the machine or by the user with the + * '-dtb' command line option, and put it at offset @addr in target + * memory. + * + * If @addr_limit contains a meaningful value (i.e., it is strictly greater + * than @addr), the device tree is only loaded if its size does not exceed + * the limit. + * + * Returns: the size of the device tree image on success, + * 0 if the image size exceeds the limit, + * -1 on errors. + * + * Note: Must not be called unless have_dtb(binfo) is true. + */ +int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as, MachineState *ms); + +/* Write a secure board setup routine with a dummy handler for SMCs */ +void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, + const struct arm_boot_info *info, + hwaddr mvbar_addr); + +#endif /* HW_ARM_BOOT_H */ diff --git a/include/hw/arm/digic.h b/include/hw/arm/digic.h new file mode 100644 index 000000000..8f2735c28 --- /dev/null +++ b/include/hw/arm/digic.h @@ -0,0 +1,43 @@ +/* + * Misc Canon DIGIC declarations. + * + * Copyright (C) 2013 Antony Pavlov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef HW_ARM_DIGIC_H +#define HW_ARM_DIGIC_H + +#include "cpu.h" +#include "hw/timer/digic-timer.h" +#include "hw/char/digic-uart.h" +#include "qom/object.h" + +#define TYPE_DIGIC "digic" + +OBJECT_DECLARE_SIMPLE_TYPE(DigicState, DIGIC) + +#define DIGIC4_NB_TIMERS 3 + +struct DigicState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + ARMCPU cpu; + + DigicTimerState timer[DIGIC4_NB_TIMERS]; + DigicUartState uart; +}; + +#endif /* HW_ARM_DIGIC_H */ diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h new file mode 100644 index 000000000..60b9e126f --- /dev/null +++ b/include/hw/arm/exynos4210.h @@ -0,0 +1,141 @@ +/* + * Samsung exynos4210 SoC emulation + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. All rights reserved. + * Maksim Kozlov + * Evgeny Voevodin + * Igor Mitsyanko + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef EXYNOS4210_H +#define EXYNOS4210_H + +#include "hw/or-irq.h" +#include "hw/sysbus.h" +#include "target/arm/cpu-qom.h" +#include "qom/object.h" + +#define EXYNOS4210_NCPUS 2 + +#define EXYNOS4210_DRAM0_BASE_ADDR 0x40000000 +#define EXYNOS4210_DRAM1_BASE_ADDR 0xa0000000 +#define EXYNOS4210_DRAM_MAX_SIZE 0x60000000 /* 1.5 GB */ + +#define EXYNOS4210_IROM_BASE_ADDR 0x00000000 +#define EXYNOS4210_IROM_SIZE 0x00010000 /* 64 KB */ +#define EXYNOS4210_IROM_MIRROR_BASE_ADDR 0x02000000 +#define EXYNOS4210_IROM_MIRROR_SIZE 0x00010000 /* 64 KB */ + +#define EXYNOS4210_IRAM_BASE_ADDR 0x02020000 +#define EXYNOS4210_IRAM_SIZE 0x00020000 /* 128 KB */ + +/* Secondary CPU startup code is in IROM memory */ +#define EXYNOS4210_SMP_BOOT_ADDR EXYNOS4210_IROM_BASE_ADDR +#define EXYNOS4210_SMP_BOOT_SIZE 0x1000 +#define EXYNOS4210_BASE_BOOT_ADDR EXYNOS4210_DRAM0_BASE_ADDR +/* Secondary CPU polling address to get loader start from */ +#define EXYNOS4210_SECOND_CPU_BOOTREG 0x10020814 + +#define EXYNOS4210_SMP_PRIVATE_BASE_ADDR 0x10500000 +#define EXYNOS4210_L2X0_BASE_ADDR 0x10502000 + +/* + * exynos4210 IRQ subsystem stub definitions. + */ +#define EXYNOS4210_IRQ_GATE_NINPUTS 2 /* Internal and External GIC */ + +#define EXYNOS4210_MAX_INT_COMBINER_OUT_IRQ 64 +#define EXYNOS4210_MAX_EXT_COMBINER_OUT_IRQ 16 +#define EXYNOS4210_MAX_INT_COMBINER_IN_IRQ \ + (EXYNOS4210_MAX_INT_COMBINER_OUT_IRQ * 8) +#define EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ \ + (EXYNOS4210_MAX_EXT_COMBINER_OUT_IRQ * 8) + +#define EXYNOS4210_COMBINER_GET_IRQ_NUM(grp, bit) ((grp)*8 + (bit)) +#define EXYNOS4210_COMBINER_GET_GRP_NUM(irq) ((irq) / 8) +#define EXYNOS4210_COMBINER_GET_BIT_NUM(irq) \ + ((irq) - 8 * EXYNOS4210_COMBINER_GET_GRP_NUM(irq)) + +/* IRQs number for external and internal GIC */ +#define EXYNOS4210_EXT_GIC_NIRQ (160-32) +#define EXYNOS4210_INT_GIC_NIRQ 64 + +#define EXYNOS4210_I2C_NUMBER 9 + +#define EXYNOS4210_NUM_DMA 3 + +typedef struct Exynos4210Irq { + qemu_irq int_combiner_irq[EXYNOS4210_MAX_INT_COMBINER_IN_IRQ]; + qemu_irq ext_combiner_irq[EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ]; + qemu_irq int_gic_irq[EXYNOS4210_INT_GIC_NIRQ]; + qemu_irq ext_gic_irq[EXYNOS4210_EXT_GIC_NIRQ]; + qemu_irq board_irqs[EXYNOS4210_MAX_INT_COMBINER_IN_IRQ]; +} Exynos4210Irq; + +struct Exynos4210State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + ARMCPU *cpu[EXYNOS4210_NCPUS]; + Exynos4210Irq irqs; + qemu_irq *irq_table; + + MemoryRegion chipid_mem; + MemoryRegion iram_mem; + MemoryRegion irom_mem; + MemoryRegion irom_alias_mem; + MemoryRegion boot_secondary; + MemoryRegion bootreg_mem; + I2CBus *i2c_if[EXYNOS4210_I2C_NUMBER]; + qemu_or_irq pl330_irq_orgate[EXYNOS4210_NUM_DMA]; +}; + +#define TYPE_EXYNOS4210_SOC "exynos4210" +OBJECT_DECLARE_SIMPLE_TYPE(Exynos4210State, EXYNOS4210_SOC) + +void exynos4210_write_secondary(ARMCPU *cpu, + const struct arm_boot_info *info); + +/* Initialize exynos4210 IRQ subsystem stub */ +qemu_irq *exynos4210_init_irq(Exynos4210Irq *env); + +/* Initialize board IRQs. + * These IRQs contain splitted Int/External Combiner and External Gic IRQs */ +void exynos4210_init_board_irqs(Exynos4210Irq *s); + +/* Get IRQ number from exynos4210 IRQ subsystem stub. + * To identify IRQ source use internal combiner group and bit number + * grp - group number + * bit - bit number inside group */ +uint32_t exynos4210_get_irq(uint32_t grp, uint32_t bit); + +/* + * Get Combiner input GPIO into irqs structure + */ +void exynos4210_combiner_get_gpioin(Exynos4210Irq *irqs, DeviceState *dev, + int ext); + +/* + * exynos4210 UART + */ +DeviceState *exynos4210_uart_create(hwaddr addr, + int fifo_size, + int channel, + Chardev *chr, + qemu_irq irq); + +#endif /* EXYNOS4210_H */ diff --git a/include/hw/arm/fdt.h b/include/hw/arm/fdt.h new file mode 100644 index 000000000..c3d501501 --- /dev/null +++ b/include/hw/arm/fdt.h @@ -0,0 +1,34 @@ +/* + * + * Copyright (c) 2015 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * Define macros useful when building ARM device tree nodes + */ + +#ifndef QEMU_ARM_FDT_H +#define QEMU_ARM_FDT_H + +#define GIC_FDT_IRQ_TYPE_SPI 0 +#define GIC_FDT_IRQ_TYPE_PPI 1 + +#define GIC_FDT_IRQ_FLAGS_EDGE_LO_HI 1 +#define GIC_FDT_IRQ_FLAGS_EDGE_HI_LO 2 +#define GIC_FDT_IRQ_FLAGS_LEVEL_HI 4 +#define GIC_FDT_IRQ_FLAGS_LEVEL_LO 8 + +#define GIC_FDT_IRQ_PPI_CPU_START 8 +#define GIC_FDT_IRQ_PPI_CPU_WIDTH 8 + +#endif diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h new file mode 100644 index 000000000..c1603b2ac --- /dev/null +++ b/include/hw/arm/fsl-imx25.h @@ -0,0 +1,279 @@ +/* + * Freescale i.MX25 SoC emulation + * + * Copyright (C) 2015 Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef FSL_IMX25_H +#define FSL_IMX25_H + +#include "hw/arm/boot.h" +#include "hw/intc/imx_avic.h" +#include "hw/misc/imx25_ccm.h" +#include "hw/char/imx_serial.h" +#include "hw/timer/imx_gpt.h" +#include "hw/timer/imx_epit.h" +#include "hw/net/imx_fec.h" +#include "hw/misc/imx_rngc.h" +#include "hw/i2c/imx_i2c.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/sd/sdhci.h" +#include "hw/usb/chipidea.h" +#include "hw/watchdog/wdt_imx2.h" +#include "exec/memory.h" +#include "target/arm/cpu.h" +#include "qom/object.h" + +#define TYPE_FSL_IMX25 "fsl,imx25" +OBJECT_DECLARE_SIMPLE_TYPE(FslIMX25State, FSL_IMX25) + +#define FSL_IMX25_NUM_UARTS 5 +#define FSL_IMX25_NUM_GPTS 4 +#define FSL_IMX25_NUM_EPITS 2 +#define FSL_IMX25_NUM_I2CS 3 +#define FSL_IMX25_NUM_GPIOS 4 +#define FSL_IMX25_NUM_ESDHCS 2 +#define FSL_IMX25_NUM_USBS 2 + +struct FslIMX25State { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + ARMCPU cpu; + IMXAVICState avic; + IMX25CCMState ccm; + IMXSerialState uart[FSL_IMX25_NUM_UARTS]; + IMXGPTState gpt[FSL_IMX25_NUM_GPTS]; + IMXEPITState epit[FSL_IMX25_NUM_EPITS]; + IMXFECState fec; + IMXRNGCState rngc; + IMXI2CState i2c[FSL_IMX25_NUM_I2CS]; + IMXGPIOState gpio[FSL_IMX25_NUM_GPIOS]; + SDHCIState esdhc[FSL_IMX25_NUM_ESDHCS]; + ChipideaState usb[FSL_IMX25_NUM_USBS]; + IMX2WdtState wdt; + MemoryRegion rom[2]; + MemoryRegion iram; + MemoryRegion iram_alias; + uint32_t phy_num; +}; + +/** + * i.MX25 memory map + **************************************************************** + * 0x0000_0000 0x0000_3FFF 16 Kbytes ROM (36 Kbytes) + * 0x0000_4000 0x0040_3FFF 4 Mbytes Reserved + * 0x0040_4000 0x0040_8FFF 20 Kbytes ROM (36 Kbytes) + * 0x0040_9000 0x0FFF_FFFF 252 Mbytes (minus 36 Kbytes) Reserved + * 0x1000_0000 0x1FFF_FFFF 256 Mbytes Reserved + * 0x2000_0000 0x2FFF_FFFF 256 Mbytes Reserved + * 0x3000_0000 0x3FFF_FFFF 256 Mbytes Reserved + * 0x4000_0000 0x43EF_FFFF 63 Mbytes Reserved + * 0x43F0_0000 0x43F0_3FFF 16 Kbytes AIPS A control registers + * 0x43F0_4000 0x43F0_7FFF 16 Kbytes ARM926 platform MAX + * 0x43F0_8000 0x43F0_BFFF 16 Kbytes ARM926 platform CLKCTL + * 0x43F0_C000 0x43F0_FFFF 16 Kbytes ARM926 platform ETB registers + * 0x43F1_0000 0x43F1_3FFF 16 Kbytes ARM926 platform ETB memory + * 0x43F1_4000 0x43F1_7FFF 16 Kbytes ARM926 platform AAPE registers + * 0x43F1_8000 0x43F7_FFFF 416 Kbytes Reserved + * 0x43F8_0000 0x43F8_3FFF 16 Kbytes I2C-1 + * 0x43F8_4000 0x43F8_7FFF 16 Kbytes I2C-3 + * 0x43F8_8000 0x43F8_BFFF 16 Kbytes CAN-1 + * 0x43F8_C000 0x43F8_FFFF 16 Kbytes CAN-2 + * 0x43F9_0000 0x43F9_3FFF 16 Kbytes UART-1 + * 0x43F9_4000 0x43F9_7FFF 16 Kbytes UART-2 + * 0x43F9_8000 0x43F9_BFFF 16 Kbytes I2C-2 + * 0x43F9_C000 0x43F9_FFFF 16 Kbytes 1-Wire + * 0x43FA_0000 0x43FA_3FFF 16 Kbytes ATA (CPU side) + * 0x43FA_4000 0x43FA_7FFF 16 Kbytes CSPI-1 + * 0x43FA_8000 0x43FA_BFFF 16 Kbytes KPP + * 0x43FA_C000 0x43FA_FFFF 16 Kbytes IOMUXC + * 0x43FB_0000 0x43FB_3FFF 16 Kbytes AUDMUX + * 0x43FB_4000 0x43FB_7FFF 16 Kbytes Reserved + * 0x43FB_8000 0x43FB_BFFF 16 Kbytes ECT (IP BUS A) + * 0x43FB_C000 0x43FB_FFFF 16 Kbytes ECT (IP BUS B) + * 0x43FC_0000 0x43FF_FFFF 256 Kbytes Reserved AIPS A off-platform slots + * 0x4400_0000 0x4FFF_FFFF 192 Mbytes Reserved + * 0x5000_0000 0x5000_3FFF 16 Kbytes SPBA base address + * 0x5000_4000 0x5000_7FFF 16 Kbytes CSPI-3 + * 0x5000_8000 0x5000_BFFF 16 Kbytes UART-4 + * 0x5000_C000 0x5000_FFFF 16 Kbytes UART-3 + * 0x5001_0000 0x5001_3FFF 16 Kbytes CSPI-2 + * 0x5001_4000 0x5001_7FFF 16 Kbytes SSI-2 + * 0x5001_C000 0x5001_FFFF 16 Kbytes Reserved + * 0x5002_0000 0x5002_3FFF 16 Kbytes ATA + * 0x5002_4000 0x5002_7FFF 16 Kbytes SIM-1 + * 0x5002_8000 0x5002_BFFF 16 Kbytes SIM-2 + * 0x5002_C000 0x5002_FFFF 16 Kbytes UART-5 + * 0x5003_0000 0x5003_3FFF 16 Kbytes TSC + * 0x5003_4000 0x5003_7FFF 16 Kbytes SSI-1 + * 0x5003_8000 0x5003_BFFF 16 Kbytes FEC + * 0x5003_C000 0x5003_FFFF 16 Kbytes SPBA registers + * 0x5004_0000 0x51FF_FFFF 32 Mbytes (minus 256 Kbytes) + * 0x5200_0000 0x53EF_FFFF 31 Mbytes Reserved + * 0x53F0_0000 0x53F0_3FFF 16 Kbytes AIPS B control registers + * 0x53F0_4000 0x53F7_FFFF 496 Kbytes Reserved + * 0x53F8_0000 0x53F8_3FFF 16 Kbytes CCM + * 0x53F8_4000 0x53F8_7FFF 16 Kbytes GPT-4 + * 0x53F8_8000 0x53F8_BFFF 16 Kbytes GPT-3 + * 0x53F8_C000 0x53F8_FFFF 16 Kbytes GPT-2 + * 0x53F9_0000 0x53F9_3FFF 16 Kbytes GPT-1 + * 0x53F9_4000 0x53F9_7FFF 16 Kbytes EPIT-1 + * 0x53F9_8000 0x53F9_BFFF 16 Kbytes EPIT-2 + * 0x53F9_C000 0x53F9_FFFF 16 Kbytes GPIO-4 + * 0x53FA_0000 0x53FA_3FFF 16 Kbytes PWM-2 + * 0x53FA_4000 0x53FA_7FFF 16 Kbytes GPIO-3 + * 0x53FA_8000 0x53FA_BFFF 16 Kbytes PWM-3 + * 0x53FA_C000 0x53FA_FFFF 16 Kbytes SCC + * 0x53FB_0000 0x53FB_3FFF 16 Kbytes RNGB + * 0x53FB_4000 0x53FB_7FFF 16 Kbytes eSDHC-1 + * 0x53FB_8000 0x53FB_BFFF 16 Kbytes eSDHC-2 + * 0x53FB_C000 0x53FB_FFFF 16 Kbytes LCDC + * 0x53FC_0000 0x53FC_3FFF 16 Kbytes SLCDC + * 0x53FC_4000 0x53FC_7FFF 16 Kbytes Reserved + * 0x53FC_8000 0x53FC_BFFF 16 Kbytes PWM-4 + * 0x53FC_C000 0x53FC_FFFF 16 Kbytes GPIO-1 + * 0x53FD_0000 0x53FD_3FFF 16 Kbytes GPIO-2 + * 0x53FD_4000 0x53FD_7FFF 16 Kbytes SDMA + * 0x53FD_8000 0x53FD_BFFF 16 Kbytes Reserved + * 0x53FD_C000 0x53FD_FFFF 16 Kbytes WDOG + * 0x53FE_0000 0x53FE_3FFF 16 Kbytes PWM-1 + * 0x53FE_4000 0x53FE_7FFF 16 Kbytes Reserved + * 0x53FE_8000 0x53FE_BFFF 16 Kbytes Reserved + * 0x53FE_C000 0x53FE_FFFF 16 Kbytes RTICv3 + * 0x53FF_0000 0x53FF_3FFF 16 Kbytes IIM + * 0x53FF_4000 0x53FF_7FFF 16 Kbytes USB + * 0x53FF_8000 0x53FF_BFFF 16 Kbytes CSI + * 0x53FF_C000 0x53FF_FFFF 16 Kbytes DryIce + * 0x5400_0000 0x5FFF_FFFF 192 Mbytes Reserved (aliased AIPS B slots) + * 0x6000_0000 0x67FF_FFFF 128 Mbytes ARM926 platform ROMPATCH + * 0x6800_0000 0x6FFF_FFFF 128 Mbytes ARM926 platform ASIC + * 0x7000_0000 0x77FF_FFFF 128 Mbytes Reserved + * 0x7800_0000 0x7801_FFFF 128 Kbytes RAM + * 0x7802_0000 0x7FFF_FFFF 128 Mbytes (minus 128 Kbytes) + * 0x8000_0000 0x8FFF_FFFF 256 Mbytes SDRAM bank 0 + * 0x9000_0000 0x9FFF_FFFF 256 Mbytes SDRAM bank 1 + * 0xA000_0000 0xA7FF_FFFF 128 Mbytes WEIM CS0 (flash 128) 1 + * 0xA800_0000 0xAFFF_FFFF 128 Mbytes WEIM CS1 (flash 64) 1 + * 0xB000_0000 0xB1FF_FFFF 32 Mbytes WEIM CS2 (SRAM) + * 0xB200_0000 0xB3FF_FFFF 32 Mbytes WEIM CS3 (SRAM) + * 0xB400_0000 0xB5FF_FFFF 32 Mbytes WEIM CS4 + * 0xB600_0000 0xB7FF_FFFF 32 Mbytes Reserved + * 0xB800_0000 0xB800_0FFF 4 Kbytes Reserved + * 0xB800_1000 0xB800_1FFF 4 Kbytes SDRAM control registers + * 0xB800_2000 0xB800_2FFF 4 Kbytes WEIM control registers + * 0xB800_3000 0xB800_3FFF 4 Kbytes M3IF control registers + * 0xB800_4000 0xB800_4FFF 4 Kbytes EMI control registers + * 0xB800_5000 0xBAFF_FFFF 32 Mbytes (minus 20 Kbytes) + * 0xBB00_0000 0xBB00_0FFF 4 Kbytes NAND flash main area buffer + * 0xBB00_1000 0xBB00_11FF 512 B NAND flash spare area buffer + * 0xBB00_1200 0xBB00_1DFF 3 Kbytes Reserved + * 0xBB00_1E00 0xBB00_1FFF 512 B NAND flash control registers + * 0xBB01_2000 0xBFFF_FFFF 96 Mbytes (minus 8 Kbytes) Reserved + * 0xC000_0000 0xFFFF_FFFF 1024 Mbytes Reserved + */ + +#define FSL_IMX25_ROM0_ADDR 0x00000000 +#define FSL_IMX25_ROM0_SIZE 0x4000 +#define FSL_IMX25_ROM1_ADDR 0x00404000 +#define FSL_IMX25_ROM1_SIZE 0x4000 +#define FSL_IMX25_I2C1_ADDR 0x43F80000 +#define FSL_IMX25_I2C1_SIZE 0x4000 +#define FSL_IMX25_I2C3_ADDR 0x43F84000 +#define FSL_IMX25_I2C3_SIZE 0x4000 +#define FSL_IMX25_UART1_ADDR 0x43F90000 +#define FSL_IMX25_UART1_SIZE 0x4000 +#define FSL_IMX25_UART2_ADDR 0x43F94000 +#define FSL_IMX25_UART2_SIZE 0x4000 +#define FSL_IMX25_I2C2_ADDR 0x43F98000 +#define FSL_IMX25_I2C2_SIZE 0x4000 +#define FSL_IMX25_UART4_ADDR 0x50008000 +#define FSL_IMX25_UART4_SIZE 0x4000 +#define FSL_IMX25_UART3_ADDR 0x5000C000 +#define FSL_IMX25_UART3_SIZE 0x4000 +#define FSL_IMX25_UART5_ADDR 0x5002C000 +#define FSL_IMX25_UART5_SIZE 0x4000 +#define FSL_IMX25_FEC_ADDR 0x50038000 +#define FSL_IMX25_CCM_ADDR 0x53F80000 +#define FSL_IMX25_CCM_SIZE 0x4000 +#define FSL_IMX25_GPT4_ADDR 0x53F84000 +#define FSL_IMX25_GPT4_SIZE 0x4000 +#define FSL_IMX25_GPT3_ADDR 0x53F88000 +#define FSL_IMX25_GPT3_SIZE 0x4000 +#define FSL_IMX25_GPT2_ADDR 0x53F8C000 +#define FSL_IMX25_GPT2_SIZE 0x4000 +#define FSL_IMX25_GPT1_ADDR 0x53F90000 +#define FSL_IMX25_GPT1_SIZE 0x4000 +#define FSL_IMX25_EPIT1_ADDR 0x53F94000 +#define FSL_IMX25_EPIT1_SIZE 0x4000 +#define FSL_IMX25_EPIT2_ADDR 0x53F98000 +#define FSL_IMX25_EPIT2_SIZE 0x4000 +#define FSL_IMX25_GPIO4_ADDR 0x53F9C000 +#define FSL_IMX25_GPIO4_SIZE 0x4000 +#define FSL_IMX25_GPIO3_ADDR 0x53FA4000 +#define FSL_IMX25_GPIO3_SIZE 0x4000 +#define FSL_IMX25_RNGC_ADDR 0x53FB0000 +#define FSL_IMX25_RNGC_SIZE 0x4000 +#define FSL_IMX25_ESDHC1_ADDR 0x53FB4000 +#define FSL_IMX25_ESDHC1_SIZE 0x4000 +#define FSL_IMX25_ESDHC2_ADDR 0x53FB8000 +#define FSL_IMX25_ESDHC2_SIZE 0x4000 +#define FSL_IMX25_GPIO1_ADDR 0x53FCC000 +#define FSL_IMX25_GPIO1_SIZE 0x4000 +#define FSL_IMX25_GPIO2_ADDR 0x53FD0000 +#define FSL_IMX25_GPIO2_SIZE 0x4000 +#define FSL_IMX25_WDT_ADDR 0x53FDC000 +#define FSL_IMX25_WDT_SIZE 0x4000 +#define FSL_IMX25_USB1_ADDR 0x53FF4000 +#define FSL_IMX25_USB1_SIZE 0x0200 +#define FSL_IMX25_USB2_ADDR 0x53FF4400 +#define FSL_IMX25_USB2_SIZE 0x0200 +#define FSL_IMX25_AVIC_ADDR 0x68000000 +#define FSL_IMX25_AVIC_SIZE 0x4000 +#define FSL_IMX25_IRAM_ADDR 0x78000000 +#define FSL_IMX25_IRAM_SIZE 0x20000 +#define FSL_IMX25_IRAM_ALIAS_ADDR 0x78020000 +#define FSL_IMX25_IRAM_ALIAS_SIZE 0x7FE0000 +#define FSL_IMX25_SDRAM0_ADDR 0x80000000 +#define FSL_IMX25_SDRAM0_SIZE 0x10000000 +#define FSL_IMX25_SDRAM1_ADDR 0x90000000 +#define FSL_IMX25_SDRAM1_SIZE 0x10000000 + +#define FSL_IMX25_UART1_IRQ 45 +#define FSL_IMX25_UART2_IRQ 32 +#define FSL_IMX25_UART3_IRQ 18 +#define FSL_IMX25_UART4_IRQ 5 +#define FSL_IMX25_UART5_IRQ 40 +#define FSL_IMX25_GPT1_IRQ 54 +#define FSL_IMX25_GPT2_IRQ 53 +#define FSL_IMX25_GPT3_IRQ 29 +#define FSL_IMX25_GPT4_IRQ 1 +#define FSL_IMX25_EPIT1_IRQ 28 +#define FSL_IMX25_EPIT2_IRQ 27 +#define FSL_IMX25_FEC_IRQ 57 +#define FSL_IMX25_RNGC_IRQ 22 +#define FSL_IMX25_I2C1_IRQ 3 +#define FSL_IMX25_I2C2_IRQ 4 +#define FSL_IMX25_I2C3_IRQ 10 +#define FSL_IMX25_GPIO1_IRQ 52 +#define FSL_IMX25_GPIO2_IRQ 51 +#define FSL_IMX25_GPIO3_IRQ 16 +#define FSL_IMX25_GPIO4_IRQ 23 +#define FSL_IMX25_ESDHC1_IRQ 9 +#define FSL_IMX25_ESDHC2_IRQ 8 +#define FSL_IMX25_USB1_IRQ 37 +#define FSL_IMX25_USB2_IRQ 35 +#define FSL_IMX25_WDT_IRQ 55 + +#endif /* FSL_IMX25_H */ diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h new file mode 100644 index 000000000..b9792d58a --- /dev/null +++ b/include/hw/arm/fsl-imx31.h @@ -0,0 +1,128 @@ +/* + * Freescale i.MX31 SoC emulation + * + * Copyright (C) 2015 Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef FSL_IMX31_H +#define FSL_IMX31_H + +#include "hw/arm/boot.h" +#include "hw/intc/imx_avic.h" +#include "hw/misc/imx31_ccm.h" +#include "hw/char/imx_serial.h" +#include "hw/timer/imx_gpt.h" +#include "hw/timer/imx_epit.h" +#include "hw/i2c/imx_i2c.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/watchdog/wdt_imx2.h" +#include "exec/memory.h" +#include "target/arm/cpu.h" +#include "qom/object.h" + +#define TYPE_FSL_IMX31 "fsl,imx31" +OBJECT_DECLARE_SIMPLE_TYPE(FslIMX31State, FSL_IMX31) + +#define FSL_IMX31_NUM_UARTS 2 +#define FSL_IMX31_NUM_EPITS 2 +#define FSL_IMX31_NUM_I2CS 3 +#define FSL_IMX31_NUM_GPIOS 3 + +struct FslIMX31State { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + ARMCPU cpu; + IMXAVICState avic; + IMX31CCMState ccm; + IMXSerialState uart[FSL_IMX31_NUM_UARTS]; + IMXGPTState gpt; + IMXEPITState epit[FSL_IMX31_NUM_EPITS]; + IMXI2CState i2c[FSL_IMX31_NUM_I2CS]; + IMXGPIOState gpio[FSL_IMX31_NUM_GPIOS]; + IMX2WdtState wdt; + MemoryRegion secure_rom; + MemoryRegion rom; + MemoryRegion iram; + MemoryRegion iram_alias; +}; + +#define FSL_IMX31_SECURE_ROM_ADDR 0x00000000 +#define FSL_IMX31_SECURE_ROM_SIZE 0x4000 +#define FSL_IMX31_ROM_ADDR 0x00404000 +#define FSL_IMX31_ROM_SIZE 0x4000 +#define FSL_IMX31_IRAM_ALIAS_ADDR 0x10000000 +#define FSL_IMX31_IRAM_ALIAS_SIZE 0xFFC0000 +#define FSL_IMX31_IRAM_ADDR 0x1FFFC000 +#define FSL_IMX31_IRAM_SIZE 0x4000 +#define FSL_IMX31_I2C1_ADDR 0x43F80000 +#define FSL_IMX31_I2C1_SIZE 0x4000 +#define FSL_IMX31_I2C3_ADDR 0x43F84000 +#define FSL_IMX31_I2C3_SIZE 0x4000 +#define FSL_IMX31_UART1_ADDR 0x43F90000 +#define FSL_IMX31_UART1_SIZE 0x4000 +#define FSL_IMX31_UART2_ADDR 0x43F94000 +#define FSL_IMX31_UART2_SIZE 0x4000 +#define FSL_IMX31_I2C2_ADDR 0x43F98000 +#define FSL_IMX31_I2C2_SIZE 0x4000 +#define FSL_IMX31_CCM_ADDR 0x53F80000 +#define FSL_IMX31_CCM_SIZE 0x4000 +#define FSL_IMX31_GPT_ADDR 0x53F90000 +#define FSL_IMX31_GPT_SIZE 0x4000 +#define FSL_IMX31_EPIT1_ADDR 0x53F94000 +#define FSL_IMX31_EPIT1_SIZE 0x4000 +#define FSL_IMX31_EPIT2_ADDR 0x53F98000 +#define FSL_IMX31_EPIT2_SIZE 0x4000 +#define FSL_IMX31_GPIO3_ADDR 0x53FA4000 +#define FSL_IMX31_GPIO3_SIZE 0x4000 +#define FSL_IMX31_GPIO1_ADDR 0x53FCC000 +#define FSL_IMX31_GPIO1_SIZE 0x4000 +#define FSL_IMX31_GPIO2_ADDR 0x53FD0000 +#define FSL_IMX31_GPIO2_SIZE 0x4000 +#define FSL_IMX31_WDT_ADDR 0x53FDC000 +#define FSL_IMX31_WDT_SIZE 0x4000 +#define FSL_IMX31_AVIC_ADDR 0x68000000 +#define FSL_IMX31_AVIC_SIZE 0x100 +#define FSL_IMX31_SDRAM0_ADDR 0x80000000 +#define FSL_IMX31_SDRAM0_SIZE 0x10000000 +#define FSL_IMX31_SDRAM1_ADDR 0x90000000 +#define FSL_IMX31_SDRAM1_SIZE 0x10000000 +#define FSL_IMX31_FLASH0_ADDR 0xA0000000 +#define FSL_IMX31_FLASH0_SIZE 0x8000000 +#define FSL_IMX31_FLASH1_ADDR 0xA8000000 +#define FSL_IMX31_FLASH1_SIZE 0x8000000 +#define FSL_IMX31_CS2_ADDR 0xB0000000 +#define FSL_IMX31_CS2_SIZE 0x2000000 +#define FSL_IMX31_CS3_ADDR 0xB2000000 +#define FSL_IMX31_CS3_SIZE 0x2000000 +#define FSL_IMX31_CS4_ADDR 0xB4000000 +#define FSL_IMX31_CS4_SIZE 0x2000000 +#define FSL_IMX31_CS5_ADDR 0xB6000000 +#define FSL_IMX31_CS5_SIZE 0x2000000 +#define FSL_IMX31_NAND_ADDR 0xB8000000 +#define FSL_IMX31_NAND_SIZE 0x1000 + +#define FSL_IMX31_EPIT2_IRQ 27 +#define FSL_IMX31_EPIT1_IRQ 28 +#define FSL_IMX31_GPT_IRQ 29 +#define FSL_IMX31_UART2_IRQ 32 +#define FSL_IMX31_UART1_IRQ 45 +#define FSL_IMX31_I2C1_IRQ 10 +#define FSL_IMX31_I2C2_IRQ 4 +#define FSL_IMX31_I2C3_IRQ 3 +#define FSL_IMX31_GPIO1_IRQ 52 +#define FSL_IMX31_GPIO2_IRQ 51 +#define FSL_IMX31_GPIO3_IRQ 56 + +#endif /* FSL_IMX31_H */ diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h new file mode 100644 index 000000000..29cc425ac --- /dev/null +++ b/include/hw/arm/fsl-imx6.h @@ -0,0 +1,464 @@ +/* + * Freescale i.MX31 SoC emulation + * + * Copyright (C) 2015 Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef FSL_IMX6_H +#define FSL_IMX6_H + +#include "hw/arm/boot.h" +#include "hw/cpu/a9mpcore.h" +#include "hw/misc/imx6_ccm.h" +#include "hw/misc/imx6_src.h" +#include "hw/watchdog/wdt_imx2.h" +#include "hw/char/imx_serial.h" +#include "hw/timer/imx_gpt.h" +#include "hw/timer/imx_epit.h" +#include "hw/i2c/imx_i2c.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/sd/sdhci.h" +#include "hw/ssi/imx_spi.h" +#include "hw/net/imx_fec.h" +#include "hw/usb/chipidea.h" +#include "hw/usb/imx-usb-phy.h" +#include "exec/memory.h" +#include "cpu.h" +#include "qom/object.h" + +#define TYPE_FSL_IMX6 "fsl,imx6" +OBJECT_DECLARE_SIMPLE_TYPE(FslIMX6State, FSL_IMX6) + +#define FSL_IMX6_NUM_CPUS 4 +#define FSL_IMX6_NUM_UARTS 5 +#define FSL_IMX6_NUM_EPITS 2 +#define FSL_IMX6_NUM_I2CS 3 +#define FSL_IMX6_NUM_GPIOS 7 +#define FSL_IMX6_NUM_ESDHCS 4 +#define FSL_IMX6_NUM_ECSPIS 5 +#define FSL_IMX6_NUM_WDTS 2 +#define FSL_IMX6_NUM_USB_PHYS 2 +#define FSL_IMX6_NUM_USBS 4 + +struct FslIMX6State { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + ARMCPU cpu[FSL_IMX6_NUM_CPUS]; + A9MPPrivState a9mpcore; + IMX6CCMState ccm; + IMX6SRCState src; + IMXSerialState uart[FSL_IMX6_NUM_UARTS]; + IMXGPTState gpt; + IMXEPITState epit[FSL_IMX6_NUM_EPITS]; + IMXI2CState i2c[FSL_IMX6_NUM_I2CS]; + IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS]; + SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS]; + IMXSPIState spi[FSL_IMX6_NUM_ECSPIS]; + IMX2WdtState wdt[FSL_IMX6_NUM_WDTS]; + IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS]; + ChipideaState usb[FSL_IMX6_NUM_USBS]; + IMXFECState eth; + MemoryRegion rom; + MemoryRegion caam; + MemoryRegion ocram; + MemoryRegion ocram_alias; + uint32_t phy_num; +}; + + +#define FSL_IMX6_MMDC_ADDR 0x10000000 +#define FSL_IMX6_MMDC_SIZE 0xF0000000 +#define FSL_IMX6_EIM_MEM_ADDR 0x08000000 +#define FSL_IMX6_EIM_MEM_SIZE 0x8000000 +#define FSL_IMX6_IPU_2_ADDR 0x02800000 +#define FSL_IMX6_IPU_2_SIZE 0x400000 +#define FSL_IMX6_IPU_1_ADDR 0x02400000 +#define FSL_IMX6_IPU_1_SIZE 0x400000 +#define FSL_IMX6_MIPI_HSI_ADDR 0x02208000 +#define FSL_IMX6_MIPI_HSI_SIZE 0x4000 +#define FSL_IMX6_OPENVG_ADDR 0x02204000 +#define FSL_IMX6_OPENVG_SIZE 0x4000 +#define FSL_IMX6_SATA_ADDR 0x02200000 +#define FSL_IMX6_SATA_SIZE 0x4000 +#define FSL_IMX6_AIPS_2_ADDR 0x02100000 +#define FSL_IMX6_AIPS_2_SIZE 0x100000 +/* AIPS2 */ +#define FSL_IMX6_UART5_ADDR 0x021F4000 +#define FSL_IMX6_UART5_SIZE 0x4000 +#define FSL_IMX6_UART4_ADDR 0x021F0000 +#define FSL_IMX6_UART4_SIZE 0x4000 +#define FSL_IMX6_UART3_ADDR 0x021EC000 +#define FSL_IMX6_UART3_SIZE 0x4000 +#define FSL_IMX6_UART2_ADDR 0x021E8000 +#define FSL_IMX6_UART2_SIZE 0x4000 +#define FSL_IMX6_VDOA_ADDR 0x021E4000 +#define FSL_IMX6_VDOA_SIZE 0x4000 +#define FSL_IMX6_MIPI_DSI_ADDR 0x021E0000 +#define FSL_IMX6_MIPI_DSI_SIZE 0x4000 +#define FSL_IMX6_MIPI_CSI_ADDR 0x021DC000 +#define FSL_IMX6_MIPI_CSI_SIZE 0x4000 +#define FSL_IMX6_AUDMUX_ADDR 0x021D8000 +#define FSL_IMX6_AUDMUX_SIZE 0x4000 +#define FSL_IMX6_TZASC2_ADDR 0x021D4000 +#define FSL_IMX6_TZASC2_SIZE 0x4000 +#define FSL_IMX6_TZASC1_ADDR 0x021D0000 +#define FSL_IMX6_TZASC1_SIZE 0x4000 +#define FSL_IMX6_CSU_ADDR 0x021C0000 +#define FSL_IMX6_CSU_SIZE 0x4000 +#define FSL_IMX6_OCOTPCTRL_ADDR 0x021BC000 +#define FSL_IMX6_OCOTPCTRL_SIZE 0x4000 +#define FSL_IMX6_EIM_ADDR 0x021B8000 +#define FSL_IMX6_EIM_SIZE 0x4000 +#define FSL_IMX6_MMDC1_ADDR 0x021B4000 +#define FSL_IMX6_MMDC1_SIZE 0x4000 +#define FSL_IMX6_MMDC0_ADDR 0x021B0000 +#define FSL_IMX6_MMDC0_SIZE 0x4000 +#define FSL_IMX6_ROMCP_ADDR 0x021AC000 +#define FSL_IMX6_ROMCP_SIZE 0x4000 +#define FSL_IMX6_I2C3_ADDR 0x021A8000 +#define FSL_IMX6_I2C3_SIZE 0x4000 +#define FSL_IMX6_I2C2_ADDR 0x021A4000 +#define FSL_IMX6_I2C2_SIZE 0x4000 +#define FSL_IMX6_I2C1_ADDR 0x021A0000 +#define FSL_IMX6_I2C1_SIZE 0x4000 +#define FSL_IMX6_uSDHC4_ADDR 0x0219C000 +#define FSL_IMX6_uSDHC4_SIZE 0x4000 +#define FSL_IMX6_uSDHC3_ADDR 0x02198000 +#define FSL_IMX6_uSDHC3_SIZE 0x4000 +#define FSL_IMX6_uSDHC2_ADDR 0x02194000 +#define FSL_IMX6_uSDHC2_SIZE 0x4000 +#define FSL_IMX6_uSDHC1_ADDR 0x02190000 +#define FSL_IMX6_uSDHC1_SIZE 0x4000 +#define FSL_IMX6_MLB150_ADDR 0x0218C000 +#define FSL_IMX6_MLB150_SIZE 0x4000 +#define FSL_IMX6_ENET_ADDR 0x02188000 +#define FSL_IMX6_ENET_SIZE 0x4000 +#define FSL_IMX6_USBOH3_USB_ADDR 0x02184000 +#define FSL_IMX6_USBOH3_USB_SIZE 0x4000 +#define FSL_IMX6_AIPS2_CFG_ADDR 0x0217C000 +#define FSL_IMX6_AIPS2_CFG_SIZE 0x4000 +/* DAP */ +#define FSL_IMX6_PTF_CTRL_ADDR 0x02160000 +#define FSL_IMX6_PTF_CTRL_SIZE 0x1000 +#define FSL_IMX6_PTM3_ADDR 0x0215F000 +#define FSL_IMX6_PTM3_SIZE 0x1000 +#define FSL_IMX6_PTM2_ADDR 0x0215E000 +#define FSL_IMX6_PTM2_SIZE 0x1000 +#define FSL_IMX6_PTM1_ADDR 0x0215D000 +#define FSL_IMX6_PTM1_SIZE 0x1000 +#define FSL_IMX6_PTM0_ADDR 0x0215C000 +#define FSL_IMX6_PTM0_SIZE 0x1000 +#define FSL_IMX6_CTI3_ADDR 0x0215B000 +#define FSL_IMX6_CTI3_SIZE 0x1000 +#define FSL_IMX6_CTI2_ADDR 0x0215A000 +#define FSL_IMX6_CTI2_SIZE 0x1000 +#define FSL_IMX6_CTI1_ADDR 0x02159000 +#define FSL_IMX6_CTI1_SIZE 0x1000 +#define FSL_IMX6_CTI0_ADDR 0x02158000 +#define FSL_IMX6_CTI0_SIZE 0x1000 +#define FSL_IMX6_CPU3_PMU_ADDR 0x02157000 +#define FSL_IMX6_CPU3_PMU_SIZE 0x1000 +#define FSL_IMX6_CPU3_DEBUG_IF_ADDR 0x02156000 +#define FSL_IMX6_CPU3_DEBUG_IF_SIZE 0x1000 +#define FSL_IMX6_CPU2_PMU_ADDR 0x02155000 +#define FSL_IMX6_CPU2_PMU_SIZE 0x1000 +#define FSL_IMX6_CPU2_DEBUG_IF_ADDR 0x02154000 +#define FSL_IMX6_CPU2_DEBUG_IF_SIZE 0x1000 +#define FSL_IMX6_CPU1_PMU_ADDR 0x02153000 +#define FSL_IMX6_CPU1_PMU_SIZE 0x1000 +#define FSL_IMX6_CPU1_DEBUG_IF_ADDR 0x02152000 +#define FSL_IMX6_CPU1_DEBUG_IF_SIZE 0x1000 +#define FSL_IMX6_CPU0_PMU_ADDR 0x02151000 +#define FSL_IMX6_CPU0_PMU_SIZE 0x1000 +#define FSL_IMX6_CPU0_DEBUG_IF_ADDR 0x02150000 +#define FSL_IMX6_CPU0_DEBUG_IF_SIZE 0x1000 +#define FSL_IMX6_CA9_INTEG_ADDR 0x0214F000 +#define FSL_IMX6_CA9_INTEG_SIZE 0x1000 +#define FSL_IMX6_FUNNEL_ADDR 0x02144000 +#define FSL_IMX6_FUNNEL_SIZE 0x1000 +#define FSL_IMX6_TPIU_ADDR 0x02143000 +#define FSL_IMX6_TPIU_SIZE 0x1000 +#define FSL_IMX6_EXT_CTI_ADDR 0x02142000 +#define FSL_IMX6_EXT_CTI_SIZE 0x1000 +#define FSL_IMX6_ETB_ADDR 0x02141000 +#define FSL_IMX6_ETB_SIZE 0x1000 +#define FSL_IMX6_DAP_ROM_TABLE_ADDR 0x02140000 +#define FSL_IMX6_DAP_ROM_TABLE_SIZE 0x1000 +/* DAP end */ +#define FSL_IMX6_CAAM_ADDR 0x02100000 +#define FSL_IMX6_CAAM_SIZE 0x10000 +/* AIPS2 end */ +#define FSL_IMX6_AIPS_1_ADDR 0x02000000 +#define FSL_IMX6_AIPS_1_SIZE 0x100000 +/* AIPS1 */ +#define FSL_IMX6_SDMA_ADDR 0x020EC000 +#define FSL_IMX6_SDMA_SIZE 0x4000 +#define FSL_IMX6_DCIC2_ADDR 0x020E8000 +#define FSL_IMX6_DCIC2_SIZE 0x4000 +#define FSL_IMX6_DCIC1_ADDR 0x020E4000 +#define FSL_IMX6_DCIC1_SIZE 0x4000 +#define FSL_IMX6_IOMUXC_ADDR 0x020E0000 +#define FSL_IMX6_IOMUXC_SIZE 0x4000 +#define FSL_IMX6_PGCARM_ADDR 0x020DCA00 +#define FSL_IMX6_PGCARM_SIZE 0x20 +#define FSL_IMX6_PGCPU_ADDR 0x020DC260 +#define FSL_IMX6_PGCPU_SIZE 0x20 +#define FSL_IMX6_GPC_ADDR 0x020DC000 +#define FSL_IMX6_GPC_SIZE 0x4000 +#define FSL_IMX6_SRC_ADDR 0x020D8000 +#define FSL_IMX6_SRC_SIZE 0x4000 +#define FSL_IMX6_EPIT2_ADDR 0x020D4000 +#define FSL_IMX6_EPIT2_SIZE 0x4000 +#define FSL_IMX6_EPIT1_ADDR 0x020D0000 +#define FSL_IMX6_EPIT1_SIZE 0x4000 +#define FSL_IMX6_SNVSHP_ADDR 0x020CC000 +#define FSL_IMX6_SNVSHP_SIZE 0x4000 +#define FSL_IMX6_USBPHY2_ADDR 0x020CA000 +#define FSL_IMX6_USBPHY2_SIZE 0x1000 +#define FSL_IMX6_USBPHY1_ADDR 0x020C9000 +#define FSL_IMX6_USBPHY1_SIZE 0x1000 +#define FSL_IMX6_ANALOG_ADDR 0x020C8000 +#define FSL_IMX6_ANALOG_SIZE 0x1000 +#define FSL_IMX6_CCM_ADDR 0x020C4000 +#define FSL_IMX6_CCM_SIZE 0x4000 +#define FSL_IMX6_WDOG2_ADDR 0x020C0000 +#define FSL_IMX6_WDOG2_SIZE 0x4000 +#define FSL_IMX6_WDOG1_ADDR 0x020BC000 +#define FSL_IMX6_WDOG1_SIZE 0x4000 +#define FSL_IMX6_KPP_ADDR 0x020B8000 +#define FSL_IMX6_KPP_SIZE 0x4000 +#define FSL_IMX6_GPIO7_ADDR 0x020B4000 +#define FSL_IMX6_GPIO7_SIZE 0x4000 +#define FSL_IMX6_GPIO6_ADDR 0x020B0000 +#define FSL_IMX6_GPIO6_SIZE 0x4000 +#define FSL_IMX6_GPIO5_ADDR 0x020AC000 +#define FSL_IMX6_GPIO5_SIZE 0x4000 +#define FSL_IMX6_GPIO4_ADDR 0x020A8000 +#define FSL_IMX6_GPIO4_SIZE 0x4000 +#define FSL_IMX6_GPIO3_ADDR 0x020A4000 +#define FSL_IMX6_GPIO3_SIZE 0x4000 +#define FSL_IMX6_GPIO2_ADDR 0x020A0000 +#define FSL_IMX6_GPIO2_SIZE 0x4000 +#define FSL_IMX6_GPIO1_ADDR 0x0209C000 +#define FSL_IMX6_GPIO1_SIZE 0x4000 +#define FSL_IMX6_GPT_ADDR 0x02098000 +#define FSL_IMX6_GPT_SIZE 0x4000 +#define FSL_IMX6_CAN2_ADDR 0x02094000 +#define FSL_IMX6_CAN2_SIZE 0x4000 +#define FSL_IMX6_CAN1_ADDR 0x02090000 +#define FSL_IMX6_CAN1_SIZE 0x4000 +#define FSL_IMX6_PWM4_ADDR 0x0208C000 +#define FSL_IMX6_PWM4_SIZE 0x4000 +#define FSL_IMX6_PWM3_ADDR 0x02088000 +#define FSL_IMX6_PWM3_SIZE 0x4000 +#define FSL_IMX6_PWM2_ADDR 0x02084000 +#define FSL_IMX6_PWM2_SIZE 0x4000 +#define FSL_IMX6_PWM1_ADDR 0x02080000 +#define FSL_IMX6_PWM1_SIZE 0x4000 +#define FSL_IMX6_AIPS1_CFG_ADDR 0x0207C000 +#define FSL_IMX6_AIPS1_CFG_SIZE 0x4000 +#define FSL_IMX6_VPU_ADDR 0x02040000 +#define FSL_IMX6_VPU_SIZE 0x3C000 +#define FSL_IMX6_AIPS1_SPBA_ADDR 0x0203C000 +#define FSL_IMX6_AIPS1_SPBA_SIZE 0x4000 +#define FSL_IMX6_ASRC_ADDR 0x02034000 +#define FSL_IMX6_ASRC_SIZE 0x4000 +#define FSL_IMX6_SSI3_ADDR 0x02030000 +#define FSL_IMX6_SSI3_SIZE 0x4000 +#define FSL_IMX6_SSI2_ADDR 0x0202C000 +#define FSL_IMX6_SSI2_SIZE 0x4000 +#define FSL_IMX6_SSI1_ADDR 0x02028000 +#define FSL_IMX6_SSI1_SIZE 0x4000 +#define FSL_IMX6_ESAI_ADDR 0x02024000 +#define FSL_IMX6_ESAI_SIZE 0x4000 +#define FSL_IMX6_UART1_ADDR 0x02020000 +#define FSL_IMX6_UART1_SIZE 0x4000 +#define FSL_IMX6_eCSPI5_ADDR 0x02018000 +#define FSL_IMX6_eCSPI5_SIZE 0x4000 +#define FSL_IMX6_eCSPI4_ADDR 0x02014000 +#define FSL_IMX6_eCSPI4_SIZE 0x4000 +#define FSL_IMX6_eCSPI3_ADDR 0x02010000 +#define FSL_IMX6_eCSPI3_SIZE 0x4000 +#define FSL_IMX6_eCSPI2_ADDR 0x0200C000 +#define FSL_IMX6_eCSPI2_SIZE 0x4000 +#define FSL_IMX6_eCSPI1_ADDR 0x02008000 +#define FSL_IMX6_eCSPI1_SIZE 0x4000 +#define FSL_IMX6_SPDIF_ADDR 0x02004000 +#define FSL_IMX6_SPDIF_SIZE 0x4000 +/* AIPS1 end */ +#define FSL_IMX6_PCIe_REG_ADDR 0x01FFC000 +#define FSL_IMX6_PCIe_REG_SIZE 0x4000 +#define FSL_IMX6_PCIe_ADDR 0x01000000 +#define FSL_IMX6_PCIe_SIZE 0xFFC000 +#define FSL_IMX6_GPV_1_PL301_CFG_ADDR 0x00C00000 +#define FSL_IMX6_GPV_1_PL301_CFG_SIZE 0x100000 +#define FSL_IMX6_GPV_0_PL301_CFG_ADDR 0x00B00000 +#define FSL_IMX6_GPV_0_PL301_CFG_SIZE 0x100000 +#define FSL_IMX6_PL310_ADDR 0x00A02000 +#define FSL_IMX6_PL310_SIZE 0x1000 +#define FSL_IMX6_A9MPCORE_ADDR 0x00A00000 +#define FSL_IMX6_A9MPCORE_SIZE 0x2000 +#define FSL_IMX6_OCRAM_ALIAS_ADDR 0x00940000 +#define FSL_IMX6_OCRAM_ALIAS_SIZE 0xC0000 +#define FSL_IMX6_OCRAM_ADDR 0x00900000 +#define FSL_IMX6_OCRAM_SIZE 0x40000 +#define FSL_IMX6_GPV_4_PL301_CFG_ADDR 0x00800000 +#define FSL_IMX6_GPV_4_PL301_CFG_SIZE 0x100000 +#define FSL_IMX6_GPV_3_PL301_CFG_ADDR 0x00300000 +#define FSL_IMX6_GPV_3_PL301_CFG_SIZE 0x100000 +#define FSL_IMX6_GPV_2_PL301_CFG_ADDR 0x00200000 +#define FSL_IMX6_GPV_2_PL301_CFG_SIZE 0x100000 +#define FSL_IMX6_DTCP_ADDR 0x00138000 +#define FSL_IMX6_DTCP_SIZE 0x4000 +#define FSL_IMX6_GPU_2D_ADDR 0x00134000 +#define FSL_IMX6_GPU_2D_SIZE 0x4000 +#define FSL_IMX6_GPU_3D_ADDR 0x00130000 +#define FSL_IMX6_GPU_3D_SIZE 0x4000 +#define FSL_IMX6_HDMI_ADDR 0x00120000 +#define FSL_IMX6_HDMI_SIZE 0x9000 +#define FSL_IMX6_BCH_ADDR 0x00114000 +#define FSL_IMX6_BCH_SIZE 0x4000 +#define FSL_IMX6_GPMI_ADDR 0x00112000 +#define FSL_IMX6_GPMI_SIZE 0x2000 +#define FSL_IMX6_APBH_BRIDGE_DMA_ADDR 0x00110000 +#define FSL_IMX6_APBH_BRIDGE_DMA_SIZE 0x2000 +#define FSL_IMX6_CAAM_MEM_ADDR 0x00100000 +#define FSL_IMX6_CAAM_MEM_SIZE 0x4000 +#define FSL_IMX6_ROM_ADDR 0x00000000 +#define FSL_IMX6_ROM_SIZE 0x18000 + +#define FSL_IMX6_IOMUXC_IRQ 0 +#define FSL_IMX6_DAP_IRQ 1 +#define FSL_IMX6_SDMA_IRQ 2 +#define FSL_IMX6_VPU_JPEG_IRQ 3 +#define FSL_IMX6_SNVS_PMIC_IRQ 4 +#define FSL_IMX6_IPU1_ERROR_IRQ 5 +#define FSL_IMX6_IPU1_SYNC_IRQ 6 +#define FSL_IMX6_IPU2_ERROR_IRQ 7 +#define FSL_IMX6_IPU2_SYNC_IRQ 8 +#define FSL_IMX6_GPU3D_IRQ 9 +#define FSL_IMX6_R2D_IRQ 10 +#define FSL_IMX6_V2D_IRQ 11 +#define FSL_IMX6_VPU_IRQ 12 +#define FSL_IMX6_APBH_BRIDGE_DMA_IRQ 13 +#define FSL_IMX6_EIM_IRQ 14 +#define FSL_IMX6_BCH_IRQ 15 +#define FSL_IMX6_GPMI_IRQ 16 +#define FSL_IMX6_DTCP_IRQ 17 +#define FSL_IMX6_VDOA_IRQ 18 +#define FSL_IMX6_SNVS_CONS_IRQ 19 +#define FSL_IMX6_SNVS_SEC_IRQ 20 +#define FSL_IMX6_CSU_IRQ 21 +#define FSL_IMX6_uSDHC1_IRQ 22 +#define FSL_IMX6_uSDHC2_IRQ 23 +#define FSL_IMX6_uSDHC3_IRQ 24 +#define FSL_IMX6_uSDHC4_IRQ 25 +#define FSL_IMX6_UART1_IRQ 26 +#define FSL_IMX6_UART2_IRQ 27 +#define FSL_IMX6_UART3_IRQ 28 +#define FSL_IMX6_UART4_IRQ 29 +#define FSL_IMX6_UART5_IRQ 30 +#define FSL_IMX6_ECSPI1_IRQ 31 +#define FSL_IMX6_ECSPI2_IRQ 32 +#define FSL_IMX6_ECSPI3_IRQ 33 +#define FSL_IMX6_ECSPI4_IRQ 34 +#define FSL_IMX6_ECSPI5_IRQ 35 +#define FSL_IMX6_I2C1_IRQ 36 +#define FSL_IMX6_I2C2_IRQ 37 +#define FSL_IMX6_I2C3_IRQ 38 +#define FSL_IMX6_SATA_IRQ 39 +#define FSL_IMX6_USB_HOST1_IRQ 40 +#define FSL_IMX6_USB_HOST2_IRQ 41 +#define FSL_IMX6_USB_HOST3_IRQ 42 +#define FSL_IMX6_USB_OTG_IRQ 43 +#define FSL_IMX6_USB_PHY_UTMI0_IRQ 44 +#define FSL_IMX6_USB_PHY_UTMI1_IRQ 45 +#define FSL_IMX6_SSI1_IRQ 46 +#define FSL_IMX6_SSI2_IRQ 47 +#define FSL_IMX6_SSI3_IRQ 48 +#define FSL_IMX6_TEMP_IRQ 49 +#define FSL_IMX6_ASRC_IRQ 50 +#define FSL_IMX6_ESAI_IRQ 51 +#define FSL_IMX6_SPDIF_IRQ 52 +#define FSL_IMX6_MLB150_IRQ 53 +#define FSL_IMX6_PMU1_IRQ 54 +#define FSL_IMX6_GPT_IRQ 55 +#define FSL_IMX6_EPIT1_IRQ 56 +#define FSL_IMX6_EPIT2_IRQ 57 +#define FSL_IMX6_GPIO1_INT7_IRQ 58 +#define FSL_IMX6_GPIO1_INT6_IRQ 59 +#define FSL_IMX6_GPIO1_INT5_IRQ 60 +#define FSL_IMX6_GPIO1_INT4_IRQ 61 +#define FSL_IMX6_GPIO1_INT3_IRQ 62 +#define FSL_IMX6_GPIO1_INT2_IRQ 63 +#define FSL_IMX6_GPIO1_INT1_IRQ 64 +#define FSL_IMX6_GPIO1_INT0_IRQ 65 +#define FSL_IMX6_GPIO1_LOW_IRQ 66 +#define FSL_IMX6_GPIO1_HIGH_IRQ 67 +#define FSL_IMX6_GPIO2_LOW_IRQ 68 +#define FSL_IMX6_GPIO2_HIGH_IRQ 69 +#define FSL_IMX6_GPIO3_LOW_IRQ 70 +#define FSL_IMX6_GPIO3_HIGH_IRQ 71 +#define FSL_IMX6_GPIO4_LOW_IRQ 72 +#define FSL_IMX6_GPIO4_HIGH_IRQ 73 +#define FSL_IMX6_GPIO5_LOW_IRQ 74 +#define FSL_IMX6_GPIO5_HIGH_IRQ 75 +#define FSL_IMX6_GPIO6_LOW_IRQ 76 +#define FSL_IMX6_GPIO6_HIGH_IRQ 77 +#define FSL_IMX6_GPIO7_LOW_IRQ 78 +#define FSL_IMX6_GPIO7_HIGH_IRQ 79 +#define FSL_IMX6_WDOG1_IRQ 80 +#define FSL_IMX6_WDOG2_IRQ 81 +#define FSL_IMX6_KPP_IRQ 82 +#define FSL_IMX6_PWM1_IRQ 83 +#define FSL_IMX6_PWM2_IRQ 84 +#define FSL_IMX6_PWM3_IRQ 85 +#define FSL_IMX6_PWM4_IRQ 86 +#define FSL_IMX6_CCM1_IRQ 87 +#define FSL_IMX6_CCM2_IRQ 88 +#define FSL_IMX6_GPC_IRQ 89 +#define FSL_IMX6_SRC_IRQ 91 +#define FSL_IMX6_CPU_L2_IRQ 92 +#define FSL_IMX6_CPU_PARITY_IRQ 93 +#define FSL_IMX6_CPU_PERF_IRQ 94 +#define FSL_IMX6_CPU_CTI_IRQ 95 +#define FSL_IMX6_SRC_COMB_IRQ 96 +#define FSL_IMX6_MIPI_CSI1_IRQ 100 +#define FSL_IMX6_MIPI_CSI2_IRQ 101 +#define FSL_IMX6_MIPI_DSI_IRQ 102 +#define FSL_IMX6_MIPI_HSI_IRQ 103 +#define FSL_IMX6_SJC_IRQ 104 +#define FSL_IMX6_CAAM0_IRQ 105 +#define FSL_IMX6_CAAM1_IRQ 106 +#define FSL_IMX6_ASC1_IRQ 108 +#define FSL_IMX6_ASC2_IRQ 109 +#define FSL_IMX6_FLEXCAN1_IRQ 110 +#define FSL_IMX6_FLEXCAN2_IRQ 111 +#define FSL_IMX6_HDMI_MASTER_IRQ 115 +#define FSL_IMX6_HDMI_CEC_IRQ 116 +#define FSL_IMX6_MLB150_LOW_IRQ 117 +#define FSL_IMX6_ENET_MAC_IRQ 118 +#define FSL_IMX6_ENET_MAC_1588_IRQ 119 +#define FSL_IMX6_PCIE1_IRQ 120 +#define FSL_IMX6_PCIE2_IRQ 121 +#define FSL_IMX6_PCIE3_IRQ 122 +#define FSL_IMX6_PCIE4_IRQ 123 +#define FSL_IMX6_DCIC1_IRQ 124 +#define FSL_IMX6_DCIC2_IRQ 125 +#define FSL_IMX6_MLB150_HIGH_IRQ 126 +#define FSL_IMX6_PMU2_IRQ 127 +#define FSL_IMX6_MAX_IRQ 128 + +#endif /* FSL_IMX6_H */ diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h new file mode 100644 index 000000000..f8ebfba4f --- /dev/null +++ b/include/hw/arm/fsl-imx6ul.h @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2018 Jean-Christophe Dubois + * + * i.MX6ul SoC definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef FSL_IMX6UL_H +#define FSL_IMX6UL_H + +#include "hw/arm/boot.h" +#include "hw/cpu/a15mpcore.h" +#include "hw/misc/imx6ul_ccm.h" +#include "hw/misc/imx6_src.h" +#include "hw/misc/imx7_snvs.h" +#include "hw/misc/imx7_gpr.h" +#include "hw/intc/imx_gpcv2.h" +#include "hw/watchdog/wdt_imx2.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/char/imx_serial.h" +#include "hw/timer/imx_gpt.h" +#include "hw/timer/imx_epit.h" +#include "hw/i2c/imx_i2c.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/sd/sdhci.h" +#include "hw/ssi/imx_spi.h" +#include "hw/net/imx_fec.h" +#include "hw/usb/chipidea.h" +#include "hw/usb/imx-usb-phy.h" +#include "exec/memory.h" +#include "cpu.h" +#include "qom/object.h" + +#define TYPE_FSL_IMX6UL "fsl,imx6ul" +OBJECT_DECLARE_SIMPLE_TYPE(FslIMX6ULState, FSL_IMX6UL) + +enum FslIMX6ULConfiguration { + FSL_IMX6UL_NUM_CPUS = 1, + FSL_IMX6UL_NUM_UARTS = 8, + FSL_IMX6UL_NUM_ETHS = 2, + FSL_IMX6UL_ETH_NUM_TX_RINGS = 2, + FSL_IMX6UL_NUM_USDHCS = 2, + FSL_IMX6UL_NUM_WDTS = 3, + FSL_IMX6UL_NUM_GPTS = 2, + FSL_IMX6UL_NUM_EPITS = 2, + FSL_IMX6UL_NUM_IOMUXCS = 2, + FSL_IMX6UL_NUM_GPIOS = 5, + FSL_IMX6UL_NUM_I2CS = 4, + FSL_IMX6UL_NUM_ECSPIS = 4, + FSL_IMX6UL_NUM_ADCS = 2, + FSL_IMX6UL_NUM_USB_PHYS = 2, + FSL_IMX6UL_NUM_USBS = 2, +}; + +struct FslIMX6ULState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + ARMCPU cpu; + A15MPPrivState a7mpcore; + IMXGPTState gpt[FSL_IMX6UL_NUM_GPTS]; + IMXEPITState epit[FSL_IMX6UL_NUM_EPITS]; + IMXGPIOState gpio[FSL_IMX6UL_NUM_GPIOS]; + IMX6ULCCMState ccm; + IMX6SRCState src; + IMX7SNVSState snvs; + IMXGPCv2State gpcv2; + IMX7GPRState gpr; + IMXSPIState spi[FSL_IMX6UL_NUM_ECSPIS]; + IMXI2CState i2c[FSL_IMX6UL_NUM_I2CS]; + IMXSerialState uart[FSL_IMX6UL_NUM_UARTS]; + IMXFECState eth[FSL_IMX6UL_NUM_ETHS]; + SDHCIState usdhc[FSL_IMX6UL_NUM_USDHCS]; + IMX2WdtState wdt[FSL_IMX6UL_NUM_WDTS]; + IMXUSBPHYState usbphy[FSL_IMX6UL_NUM_USB_PHYS]; + ChipideaState usb[FSL_IMX6UL_NUM_USBS]; + MemoryRegion rom; + MemoryRegion caam; + MemoryRegion ocram; + MemoryRegion ocram_alias; + + uint32_t phy_num[FSL_IMX6UL_NUM_ETHS]; +}; + +enum FslIMX6ULMemoryMap { + FSL_IMX6UL_MMDC_ADDR = 0x80000000, + FSL_IMX6UL_MMDC_SIZE = 2 * 1024 * 1024 * 1024UL, + + FSL_IMX6UL_QSPI1_MEM_ADDR = 0x60000000, + FSL_IMX6UL_EIM_ALIAS_ADDR = 0x58000000, + FSL_IMX6UL_EIM_CS_ADDR = 0x50000000, + FSL_IMX6UL_AES_ENCRYPT_ADDR = 0x10000000, + FSL_IMX6UL_QSPI1_RX_ADDR = 0x0C000000, + + /* AIPS-2 */ + FSL_IMX6UL_UART6_ADDR = 0x021FC000, + FSL_IMX6UL_I2C4_ADDR = 0x021F8000, + FSL_IMX6UL_UART5_ADDR = 0x021F4000, + FSL_IMX6UL_UART4_ADDR = 0x021F0000, + FSL_IMX6UL_UART3_ADDR = 0x021EC000, + FSL_IMX6UL_UART2_ADDR = 0x021E8000, + FSL_IMX6UL_WDOG3_ADDR = 0x021E4000, + FSL_IMX6UL_QSPI_ADDR = 0x021E0000, + FSL_IMX6UL_SYS_CNT_CTRL_ADDR = 0x021DC000, + FSL_IMX6UL_SYS_CNT_CMP_ADDR = 0x021D8000, + FSL_IMX6UL_SYS_CNT_RD_ADDR = 0x021D4000, + FSL_IMX6UL_TZASC_ADDR = 0x021D0000, + FSL_IMX6UL_PXP_ADDR = 0x021CC000, + FSL_IMX6UL_LCDIF_ADDR = 0x021C8000, + FSL_IMX6UL_CSI_ADDR = 0x021C4000, + FSL_IMX6UL_CSU_ADDR = 0x021C0000, + FSL_IMX6UL_OCOTP_CTRL_ADDR = 0x021BC000, + FSL_IMX6UL_EIM_ADDR = 0x021B8000, + FSL_IMX6UL_SIM2_ADDR = 0x021B4000, + FSL_IMX6UL_MMDC_CFG_ADDR = 0x021B0000, + FSL_IMX6UL_ROMCP_ADDR = 0x021AC000, + FSL_IMX6UL_I2C3_ADDR = 0x021A8000, + FSL_IMX6UL_I2C2_ADDR = 0x021A4000, + FSL_IMX6UL_I2C1_ADDR = 0x021A0000, + FSL_IMX6UL_ADC2_ADDR = 0x0219C000, + FSL_IMX6UL_ADC1_ADDR = 0x02198000, + FSL_IMX6UL_USDHC2_ADDR = 0x02194000, + FSL_IMX6UL_USDHC1_ADDR = 0x02190000, + FSL_IMX6UL_SIM1_ADDR = 0x0218C000, + FSL_IMX6UL_ENET1_ADDR = 0x02188000, + FSL_IMX6UL_USBO2_USBMISC_ADDR = 0x02184800, + FSL_IMX6UL_USBO2_USB_ADDR = 0x02184000, + FSL_IMX6UL_USBO2_PL301_ADDR = 0x02180000, + FSL_IMX6UL_AIPS2_CFG_ADDR = 0x0217C000, + FSL_IMX6UL_CAAM_ADDR = 0x02140000, + FSL_IMX6UL_A7MPCORE_DAP_ADDR = 0x02100000, + + /* AIPS-1 */ + FSL_IMX6UL_PWM8_ADDR = 0x020FC000, + FSL_IMX6UL_PWM7_ADDR = 0x020F8000, + FSL_IMX6UL_PWM6_ADDR = 0x020F4000, + FSL_IMX6UL_PWM5_ADDR = 0x020F0000, + FSL_IMX6UL_SDMA_ADDR = 0x020EC000, + FSL_IMX6UL_GPT2_ADDR = 0x020E8000, + FSL_IMX6UL_IOMUXC_GPR_ADDR = 0x020E4000, + FSL_IMX6UL_IOMUXC_ADDR = 0x020E0000, + FSL_IMX6UL_GPC_ADDR = 0x020DC000, + FSL_IMX6UL_SRC_ADDR = 0x020D8000, + FSL_IMX6UL_EPIT2_ADDR = 0x020D4000, + FSL_IMX6UL_EPIT1_ADDR = 0x020D0000, + FSL_IMX6UL_SNVS_HP_ADDR = 0x020CC000, + FSL_IMX6UL_USBPHY2_ADDR = 0x020CA000, + FSL_IMX6UL_USBPHY2_SIZE = (4 * 1024), + FSL_IMX6UL_USBPHY1_ADDR = 0x020C9000, + FSL_IMX6UL_USBPHY1_SIZE = (4 * 1024), + FSL_IMX6UL_ANALOG_ADDR = 0x020C8000, + FSL_IMX6UL_CCM_ADDR = 0x020C4000, + FSL_IMX6UL_WDOG2_ADDR = 0x020C0000, + FSL_IMX6UL_WDOG1_ADDR = 0x020BC000, + FSL_IMX6UL_KPP_ADDR = 0x020B8000, + FSL_IMX6UL_ENET2_ADDR = 0x020B4000, + FSL_IMX6UL_SNVS_LP_ADDR = 0x020B0000, + FSL_IMX6UL_GPIO5_ADDR = 0x020AC000, + FSL_IMX6UL_GPIO4_ADDR = 0x020A8000, + FSL_IMX6UL_GPIO3_ADDR = 0x020A4000, + FSL_IMX6UL_GPIO2_ADDR = 0x020A0000, + FSL_IMX6UL_GPIO1_ADDR = 0x0209C000, + FSL_IMX6UL_GPT1_ADDR = 0x02098000, + FSL_IMX6UL_CAN2_ADDR = 0x02094000, + FSL_IMX6UL_CAN1_ADDR = 0x02090000, + FSL_IMX6UL_PWM4_ADDR = 0x0208C000, + FSL_IMX6UL_PWM3_ADDR = 0x02088000, + FSL_IMX6UL_PWM2_ADDR = 0x02084000, + FSL_IMX6UL_PWM1_ADDR = 0x02080000, + FSL_IMX6UL_AIPS1_CFG_ADDR = 0x0207C000, + FSL_IMX6UL_BEE_ADDR = 0x02044000, + FSL_IMX6UL_TOUCH_CTRL_ADDR = 0x02040000, + FSL_IMX6UL_SPBA_ADDR = 0x0203C000, + FSL_IMX6UL_ASRC_ADDR = 0x02034000, + FSL_IMX6UL_SAI3_ADDR = 0x02030000, + FSL_IMX6UL_SAI2_ADDR = 0x0202C000, + FSL_IMX6UL_SAI1_ADDR = 0x02028000, + FSL_IMX6UL_UART8_ADDR = 0x02024000, + FSL_IMX6UL_UART1_ADDR = 0x02020000, + FSL_IMX6UL_UART7_ADDR = 0x02018000, + FSL_IMX6UL_ECSPI4_ADDR = 0x02014000, + FSL_IMX6UL_ECSPI3_ADDR = 0x02010000, + FSL_IMX6UL_ECSPI2_ADDR = 0x0200C000, + FSL_IMX6UL_ECSPI1_ADDR = 0x02008000, + FSL_IMX6UL_SPDIF_ADDR = 0x02004000, + + FSL_IMX6UL_APBH_DMA_ADDR = 0x01804000, + FSL_IMX6UL_APBH_DMA_SIZE = (32 * 1024), + + FSL_IMX6UL_A7MPCORE_ADDR = 0x00A00000, + + FSL_IMX6UL_OCRAM_ALIAS_ADDR = 0x00920000, + FSL_IMX6UL_OCRAM_ALIAS_SIZE = 0x00060000, + FSL_IMX6UL_OCRAM_MEM_ADDR = 0x00900000, + FSL_IMX6UL_OCRAM_MEM_SIZE = 0x00020000, + FSL_IMX6UL_CAAM_MEM_ADDR = 0x00100000, + FSL_IMX6UL_CAAM_MEM_SIZE = 0x00008000, + FSL_IMX6UL_ROM_ADDR = 0x00000000, + FSL_IMX6UL_ROM_SIZE = 0x00018000, +}; + +enum FslIMX6ULIRQs { + FSL_IMX6UL_IOMUXC_IRQ = 0, + FSL_IMX6UL_DAP_IRQ = 1, + FSL_IMX6UL_SDMA_IRQ = 2, + FSL_IMX6UL_TSC_IRQ = 3, + FSL_IMX6UL_SNVS_IRQ = 4, + FSL_IMX6UL_LCDIF_IRQ = 5, + FSL_IMX6UL_BEE_IRQ = 6, + FSL_IMX6UL_CSI_IRQ = 7, + FSL_IMX6UL_PXP_IRQ = 8, + FSL_IMX6UL_SCTR1_IRQ = 9, + FSL_IMX6UL_SCTR2_IRQ = 10, + FSL_IMX6UL_WDOG3_IRQ = 11, + FSL_IMX6UL_APBH_DMA_IRQ = 13, + FSL_IMX6UL_WEIM_IRQ = 14, + FSL_IMX6UL_RAWNAND1_IRQ = 15, + FSL_IMX6UL_RAWNAND2_IRQ = 16, + FSL_IMX6UL_UART6_IRQ = 17, + FSL_IMX6UL_SRTC_IRQ = 19, + FSL_IMX6UL_SRTC_SEC_IRQ = 20, + FSL_IMX6UL_CSU_IRQ = 21, + FSL_IMX6UL_USDHC1_IRQ = 22, + FSL_IMX6UL_USDHC2_IRQ = 23, + FSL_IMX6UL_SAI3_IRQ = 24, + FSL_IMX6UL_SAI32_IRQ = 25, + + FSL_IMX6UL_UART1_IRQ = 26, + FSL_IMX6UL_UART2_IRQ = 27, + FSL_IMX6UL_UART3_IRQ = 28, + FSL_IMX6UL_UART4_IRQ = 29, + FSL_IMX6UL_UART5_IRQ = 30, + + FSL_IMX6UL_ECSPI1_IRQ = 31, + FSL_IMX6UL_ECSPI2_IRQ = 32, + FSL_IMX6UL_ECSPI3_IRQ = 33, + FSL_IMX6UL_ECSPI4_IRQ = 34, + + FSL_IMX6UL_I2C4_IRQ = 35, + FSL_IMX6UL_I2C1_IRQ = 36, + FSL_IMX6UL_I2C2_IRQ = 37, + FSL_IMX6UL_I2C3_IRQ = 38, + + FSL_IMX6UL_UART7_IRQ = 39, + FSL_IMX6UL_UART8_IRQ = 40, + + FSL_IMX6UL_USB1_IRQ = 43, + FSL_IMX6UL_USB2_IRQ = 42, + FSL_IMX6UL_USB_PHY1_IRQ = 44, + FSL_IMX6UL_USB_PHY2_IRQ = 45, + + FSL_IMX6UL_CAAM_JQ2_IRQ = 46, + FSL_IMX6UL_CAAM_ERR_IRQ = 47, + FSL_IMX6UL_CAAM_RTIC_IRQ = 48, + FSL_IMX6UL_TEMP_IRQ = 49, + FSL_IMX6UL_ASRC_IRQ = 50, + FSL_IMX6UL_SPDIF_IRQ = 52, + FSL_IMX6UL_PMU_REG_IRQ = 54, + FSL_IMX6UL_GPT1_IRQ = 55, + + FSL_IMX6UL_EPIT1_IRQ = 56, + FSL_IMX6UL_EPIT2_IRQ = 57, + + FSL_IMX6UL_GPIO1_INT7_IRQ = 58, + FSL_IMX6UL_GPIO1_INT6_IRQ = 59, + FSL_IMX6UL_GPIO1_INT5_IRQ = 60, + FSL_IMX6UL_GPIO1_INT4_IRQ = 61, + FSL_IMX6UL_GPIO1_INT3_IRQ = 62, + FSL_IMX6UL_GPIO1_INT2_IRQ = 63, + FSL_IMX6UL_GPIO1_INT1_IRQ = 64, + FSL_IMX6UL_GPIO1_INT0_IRQ = 65, + FSL_IMX6UL_GPIO1_LOW_IRQ = 66, + FSL_IMX6UL_GPIO1_HIGH_IRQ = 67, + FSL_IMX6UL_GPIO2_LOW_IRQ = 68, + FSL_IMX6UL_GPIO2_HIGH_IRQ = 69, + FSL_IMX6UL_GPIO3_LOW_IRQ = 70, + FSL_IMX6UL_GPIO3_HIGH_IRQ = 71, + FSL_IMX6UL_GPIO4_LOW_IRQ = 72, + FSL_IMX6UL_GPIO4_HIGH_IRQ = 73, + FSL_IMX6UL_GPIO5_LOW_IRQ = 74, + FSL_IMX6UL_GPIO5_HIGH_IRQ = 75, + + FSL_IMX6UL_WDOG1_IRQ = 80, + FSL_IMX6UL_WDOG2_IRQ = 81, + + FSL_IMX6UL_KPP_IRQ = 82, + + FSL_IMX6UL_PWM1_IRQ = 83, + FSL_IMX6UL_PWM2_IRQ = 84, + FSL_IMX6UL_PWM3_IRQ = 85, + FSL_IMX6UL_PWM4_IRQ = 86, + + FSL_IMX6UL_CCM1_IRQ = 87, + FSL_IMX6UL_CCM2_IRQ = 88, + + FSL_IMX6UL_GPC_IRQ = 89, + + FSL_IMX6UL_SRC_IRQ = 91, + + FSL_IMX6UL_CPU_PERF_IRQ = 94, + FSL_IMX6UL_CPU_CTI_IRQ = 95, + + FSL_IMX6UL_SRC_WDOG_IRQ = 96, + + FSL_IMX6UL_SAI1_IRQ = 97, + FSL_IMX6UL_SAI2_IRQ = 98, + + FSL_IMX6UL_ADC1_IRQ = 100, + FSL_IMX6UL_ADC2_IRQ = 101, + + FSL_IMX6UL_SJC_IRQ = 104, + + FSL_IMX6UL_CAAM_RING0_IRQ = 105, + FSL_IMX6UL_CAAM_RING1_IRQ = 106, + + FSL_IMX6UL_QSPI_IRQ = 107, + + FSL_IMX6UL_TZASC_IRQ = 108, + + FSL_IMX6UL_GPT2_IRQ = 109, + + FSL_IMX6UL_CAN1_IRQ = 110, + FSL_IMX6UL_CAN2_IRQ = 111, + + FSL_IMX6UL_SIM1_IRQ = 112, + FSL_IMX6UL_SIM2_IRQ = 113, + + FSL_IMX6UL_PWM5_IRQ = 114, + FSL_IMX6UL_PWM6_IRQ = 115, + FSL_IMX6UL_PWM7_IRQ = 116, + FSL_IMX6UL_PWM8_IRQ = 117, + + FSL_IMX6UL_ENET1_IRQ = 118, + FSL_IMX6UL_ENET1_TIMER_IRQ = 119, + FSL_IMX6UL_ENET2_IRQ = 120, + FSL_IMX6UL_ENET2_TIMER_IRQ = 121, + + FSL_IMX6UL_PMU_CORE_IRQ = 127, + FSL_IMX6UL_MAX_IRQ = 128, +}; + +#endif /* FSL_IMX6UL_H */ diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h new file mode 100644 index 000000000..161fdc36d --- /dev/null +++ b/include/hw/arm/fsl-imx7.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2018, Impinj, Inc. + * + * i.MX7 SoC definitions + * + * Author: Andrey Smirnov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef FSL_IMX7_H +#define FSL_IMX7_H + +#include "hw/arm/boot.h" +#include "hw/cpu/a15mpcore.h" +#include "hw/intc/imx_gpcv2.h" +#include "hw/misc/imx7_ccm.h" +#include "hw/misc/imx7_snvs.h" +#include "hw/misc/imx7_gpr.h" +#include "hw/misc/imx6_src.h" +#include "hw/watchdog/wdt_imx2.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/char/imx_serial.h" +#include "hw/timer/imx_gpt.h" +#include "hw/timer/imx_epit.h" +#include "hw/i2c/imx_i2c.h" +#include "hw/gpio/imx_gpio.h" +#include "hw/sd/sdhci.h" +#include "hw/ssi/imx_spi.h" +#include "hw/net/imx_fec.h" +#include "hw/pci-host/designware.h" +#include "hw/usb/chipidea.h" +#include "cpu.h" +#include "qom/object.h" + +#define TYPE_FSL_IMX7 "fsl,imx7" +OBJECT_DECLARE_SIMPLE_TYPE(FslIMX7State, FSL_IMX7) + +enum FslIMX7Configuration { + FSL_IMX7_NUM_CPUS = 2, + FSL_IMX7_NUM_UARTS = 7, + FSL_IMX7_NUM_ETHS = 2, + FSL_IMX7_ETH_NUM_TX_RINGS = 3, + FSL_IMX7_NUM_USDHCS = 3, + FSL_IMX7_NUM_WDTS = 4, + FSL_IMX7_NUM_GPTS = 4, + FSL_IMX7_NUM_IOMUXCS = 2, + FSL_IMX7_NUM_GPIOS = 7, + FSL_IMX7_NUM_I2CS = 4, + FSL_IMX7_NUM_ECSPIS = 4, + FSL_IMX7_NUM_USBS = 3, + FSL_IMX7_NUM_ADCS = 2, +}; + +struct FslIMX7State { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + ARMCPU cpu[FSL_IMX7_NUM_CPUS]; + A15MPPrivState a7mpcore; + IMXGPTState gpt[FSL_IMX7_NUM_GPTS]; + IMXGPIOState gpio[FSL_IMX7_NUM_GPIOS]; + IMX7CCMState ccm; + IMX7AnalogState analog; + IMX7SNVSState snvs; + IMXGPCv2State gpcv2; + IMXSPIState spi[FSL_IMX7_NUM_ECSPIS]; + IMXI2CState i2c[FSL_IMX7_NUM_I2CS]; + IMXSerialState uart[FSL_IMX7_NUM_UARTS]; + IMXFECState eth[FSL_IMX7_NUM_ETHS]; + SDHCIState usdhc[FSL_IMX7_NUM_USDHCS]; + IMX2WdtState wdt[FSL_IMX7_NUM_WDTS]; + IMX7GPRState gpr; + ChipideaState usb[FSL_IMX7_NUM_USBS]; + DesignwarePCIEHost pcie; + uint32_t phy_num[FSL_IMX7_NUM_ETHS]; +}; + +enum FslIMX7MemoryMap { + FSL_IMX7_MMDC_ADDR = 0x80000000, + FSL_IMX7_MMDC_SIZE = 2 * 1024 * 1024 * 1024UL, + + FSL_IMX7_GPIO1_ADDR = 0x30200000, + FSL_IMX7_GPIO2_ADDR = 0x30210000, + FSL_IMX7_GPIO3_ADDR = 0x30220000, + FSL_IMX7_GPIO4_ADDR = 0x30230000, + FSL_IMX7_GPIO5_ADDR = 0x30240000, + FSL_IMX7_GPIO6_ADDR = 0x30250000, + FSL_IMX7_GPIO7_ADDR = 0x30260000, + + FSL_IMX7_IOMUXC_LPSR_GPR_ADDR = 0x30270000, + + FSL_IMX7_WDOG1_ADDR = 0x30280000, + FSL_IMX7_WDOG2_ADDR = 0x30290000, + FSL_IMX7_WDOG3_ADDR = 0x302A0000, + FSL_IMX7_WDOG4_ADDR = 0x302B0000, + + FSL_IMX7_IOMUXC_LPSR_ADDR = 0x302C0000, + + FSL_IMX7_GPT1_ADDR = 0x302D0000, + FSL_IMX7_GPT2_ADDR = 0x302E0000, + FSL_IMX7_GPT3_ADDR = 0x302F0000, + FSL_IMX7_GPT4_ADDR = 0x30300000, + + FSL_IMX7_IOMUXC_ADDR = 0x30330000, + FSL_IMX7_IOMUXC_GPR_ADDR = 0x30340000, + FSL_IMX7_IOMUXCn_SIZE = 0x1000, + + FSL_IMX7_OCOTP_ADDR = 0x30350000, + FSL_IMX7_OCOTP_SIZE = 0x10000, + + FSL_IMX7_ANALOG_ADDR = 0x30360000, + FSL_IMX7_SNVS_ADDR = 0x30370000, + FSL_IMX7_CCM_ADDR = 0x30380000, + + FSL_IMX7_SRC_ADDR = 0x30390000, + FSL_IMX7_SRC_SIZE = 0x1000, + + FSL_IMX7_ADC1_ADDR = 0x30610000, + FSL_IMX7_ADC2_ADDR = 0x30620000, + FSL_IMX7_ADCn_SIZE = 0x1000, + + FSL_IMX7_PWM1_ADDR = 0x30660000, + FSL_IMX7_PWM2_ADDR = 0x30670000, + FSL_IMX7_PWM3_ADDR = 0x30680000, + FSL_IMX7_PWM4_ADDR = 0x30690000, + FSL_IMX7_PWMn_SIZE = 0x10000, + + FSL_IMX7_PCIE_PHY_ADDR = 0x306D0000, + FSL_IMX7_PCIE_PHY_SIZE = 0x10000, + + FSL_IMX7_GPC_ADDR = 0x303A0000, + + FSL_IMX7_CAAM_ADDR = 0x30900000, + FSL_IMX7_CAAM_SIZE = 0x40000, + + FSL_IMX7_CAN1_ADDR = 0x30A00000, + FSL_IMX7_CAN2_ADDR = 0x30A10000, + FSL_IMX7_CANn_SIZE = 0x10000, + + FSL_IMX7_I2C1_ADDR = 0x30A20000, + FSL_IMX7_I2C2_ADDR = 0x30A30000, + FSL_IMX7_I2C3_ADDR = 0x30A40000, + FSL_IMX7_I2C4_ADDR = 0x30A50000, + + FSL_IMX7_ECSPI1_ADDR = 0x30820000, + FSL_IMX7_ECSPI2_ADDR = 0x30830000, + FSL_IMX7_ECSPI3_ADDR = 0x30840000, + FSL_IMX7_ECSPI4_ADDR = 0x30630000, + + FSL_IMX7_LCDIF_ADDR = 0x30730000, + FSL_IMX7_LCDIF_SIZE = 0x1000, + + FSL_IMX7_UART1_ADDR = 0x30860000, + /* + * Some versions of the reference manual claim that UART2 is @ + * 0x30870000, but experiments with HW + DT files in upstream + * Linux kernel show that not to be true and that block is + * acutally located @ 0x30890000 + */ + FSL_IMX7_UART2_ADDR = 0x30890000, + FSL_IMX7_UART3_ADDR = 0x30880000, + FSL_IMX7_UART4_ADDR = 0x30A60000, + FSL_IMX7_UART5_ADDR = 0x30A70000, + FSL_IMX7_UART6_ADDR = 0x30A80000, + FSL_IMX7_UART7_ADDR = 0x30A90000, + + FSL_IMX7_ENET1_ADDR = 0x30BE0000, + FSL_IMX7_ENET2_ADDR = 0x30BF0000, + + FSL_IMX7_USB1_ADDR = 0x30B10000, + FSL_IMX7_USBMISC1_ADDR = 0x30B10200, + FSL_IMX7_USB2_ADDR = 0x30B20000, + FSL_IMX7_USBMISC2_ADDR = 0x30B20200, + FSL_IMX7_USB3_ADDR = 0x30B30000, + FSL_IMX7_USBMISC3_ADDR = 0x30B30200, + FSL_IMX7_USBMISCn_SIZE = 0x200, + + FSL_IMX7_USDHC1_ADDR = 0x30B40000, + FSL_IMX7_USDHC2_ADDR = 0x30B50000, + FSL_IMX7_USDHC3_ADDR = 0x30B60000, + + FSL_IMX7_SDMA_ADDR = 0x30BD0000, + FSL_IMX7_SDMA_SIZE = 0x1000, + + FSL_IMX7_A7MPCORE_ADDR = 0x31000000, + FSL_IMX7_A7MPCORE_DAP_ADDR = 0x30000000, + + FSL_IMX7_PCIE_REG_ADDR = 0x33800000, + FSL_IMX7_PCIE_REG_SIZE = 16 * 1024, + + FSL_IMX7_GPR_ADDR = 0x30340000, + + FSL_IMX7_DMA_APBH_ADDR = 0x33000000, + FSL_IMX7_DMA_APBH_SIZE = 0x2000, +}; + +enum FslIMX7IRQs { + FSL_IMX7_USDHC1_IRQ = 22, + FSL_IMX7_USDHC2_IRQ = 23, + FSL_IMX7_USDHC3_IRQ = 24, + + FSL_IMX7_UART1_IRQ = 26, + FSL_IMX7_UART2_IRQ = 27, + FSL_IMX7_UART3_IRQ = 28, + FSL_IMX7_UART4_IRQ = 29, + FSL_IMX7_UART5_IRQ = 30, + FSL_IMX7_UART6_IRQ = 16, + + FSL_IMX7_ECSPI1_IRQ = 31, + FSL_IMX7_ECSPI2_IRQ = 32, + FSL_IMX7_ECSPI3_IRQ = 33, + FSL_IMX7_ECSPI4_IRQ = 34, + + FSL_IMX7_I2C1_IRQ = 35, + FSL_IMX7_I2C2_IRQ = 36, + FSL_IMX7_I2C3_IRQ = 37, + FSL_IMX7_I2C4_IRQ = 38, + + FSL_IMX7_USB1_IRQ = 43, + FSL_IMX7_USB2_IRQ = 42, + FSL_IMX7_USB3_IRQ = 40, + + FSL_IMX7_WDOG1_IRQ = 78, + FSL_IMX7_WDOG2_IRQ = 79, + FSL_IMX7_WDOG3_IRQ = 10, + FSL_IMX7_WDOG4_IRQ = 109, + + FSL_IMX7_PCI_INTA_IRQ = 125, + FSL_IMX7_PCI_INTB_IRQ = 124, + FSL_IMX7_PCI_INTC_IRQ = 123, + FSL_IMX7_PCI_INTD_IRQ = 122, + + FSL_IMX7_UART7_IRQ = 126, + +#define FSL_IMX7_ENET_IRQ(i, n) ((n) + ((i) ? 100 : 118)) + + FSL_IMX7_MAX_IRQ = 128, +}; + +#endif /* FSL_IMX7_H */ diff --git a/include/hw/arm/linux-boot-if.h b/include/hw/arm/linux-boot-if.h new file mode 100644 index 000000000..c85f33b2c --- /dev/null +++ b/include/hw/arm/linux-boot-if.h @@ -0,0 +1,39 @@ +/* + * hw/arm/linux-boot-if.h : interface for devices which need to behave + * specially for direct boot of an ARM Linux kernel + */ + +#ifndef HW_ARM_LINUX_BOOT_IF_H +#define HW_ARM_LINUX_BOOT_IF_H + +#include "qom/object.h" + +#define TYPE_ARM_LINUX_BOOT_IF "arm-linux-boot-if" +typedef struct ARMLinuxBootIfClass ARMLinuxBootIfClass; +DECLARE_CLASS_CHECKERS(ARMLinuxBootIfClass, ARM_LINUX_BOOT_IF, + TYPE_ARM_LINUX_BOOT_IF) +#define ARM_LINUX_BOOT_IF(obj) \ + INTERFACE_CHECK(ARMLinuxBootIf, (obj), TYPE_ARM_LINUX_BOOT_IF) + +typedef struct ARMLinuxBootIf ARMLinuxBootIf; + +struct ARMLinuxBootIfClass { + /*< private >*/ + InterfaceClass parent_class; + + /*< public >*/ + /** arm_linux_init: configure the device for a direct boot + * of an ARM Linux kernel (so that device reset puts it into + * the state the kernel expects after firmware initialization, + * rather than the true hardware reset state). This callback is + * called once after machine construction is complete (before the + * first system reset). + * + * @obj: the object implementing this interface + * @secure_boot: true if we are booting Secure, false for NonSecure + * (or for a CPU which doesn't support TrustZone) + */ + void (*arm_linux_init)(ARMLinuxBootIf *obj, bool secure_boot); +}; + +#endif diff --git a/include/hw/arm/msf2-soc.h b/include/hw/arm/msf2-soc.h new file mode 100644 index 000000000..d40618468 --- /dev/null +++ b/include/hw/arm/msf2-soc.h @@ -0,0 +1,70 @@ +/* + * Microsemi Smartfusion2 SoC + * + * Copyright (c) 2017 Subbaraya Sundeep + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_ARM_MSF2_SOC_H +#define HW_ARM_MSF2_SOC_H + +#include "hw/arm/armv7m.h" +#include "hw/timer/mss-timer.h" +#include "hw/misc/msf2-sysreg.h" +#include "hw/ssi/mss-spi.h" +#include "hw/net/msf2-emac.h" +#include "qom/object.h" + +#define TYPE_MSF2_SOC "msf2-soc" +OBJECT_DECLARE_SIMPLE_TYPE(MSF2State, MSF2_SOC) + +#define MSF2_NUM_SPIS 2 +#define MSF2_NUM_UARTS 2 + +/* + * System timer consists of two programmable 32-bit + * decrementing counters that generate individual interrupts to + * the Cortex-M3 processor + */ +#define MSF2_NUM_TIMERS 2 + +struct MSF2State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + ARMv7MState armv7m; + + char *cpu_type; + char *part_name; + uint64_t envm_size; + uint64_t esram_size; + + uint32_t m3clk; + uint8_t apb0div; + uint8_t apb1div; + + MSF2SysregState sysreg; + MSSTimerState timer; + MSSSpiState spi[MSF2_NUM_SPIS]; + MSF2EmacState emac; +}; + +#endif diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h new file mode 100644 index 000000000..5469247e3 --- /dev/null +++ b/include/hw/arm/npcm7xx.h @@ -0,0 +1,120 @@ +/* + * Nuvoton NPCM7xx SoC family. + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_H +#define NPCM7XX_H + +#include "hw/boards.h" +#include "hw/cpu/a9mpcore.h" +#include "hw/gpio/npcm7xx_gpio.h" +#include "hw/mem/npcm7xx_mc.h" +#include "hw/misc/npcm7xx_clk.h" +#include "hw/misc/npcm7xx_gcr.h" +#include "hw/misc/npcm7xx_rng.h" +#include "hw/nvram/npcm7xx_otp.h" +#include "hw/timer/npcm7xx_timer.h" +#include "hw/ssi/npcm7xx_fiu.h" +#include "hw/usb/hcd-ehci.h" +#include "hw/usb/hcd-ohci.h" +#include "target/arm/cpu.h" + +#define NPCM7XX_MAX_NUM_CPUS (2) + +/* The first half of the address space is reserved for DDR4 DRAM. */ +#define NPCM7XX_DRAM_BA (0x00000000) +#define NPCM7XX_DRAM_SZ (2 * GiB) + +/* Magic addresses for setting up direct kernel booting and SMP boot stubs. */ +#define NPCM7XX_LOADER_START (0x00000000) /* Start of SDRAM */ +#define NPCM7XX_SMP_LOADER_START (0xffff0000) /* Boot ROM */ +#define NPCM7XX_SMP_BOOTREG_ADDR (0xf080013c) /* GCR.SCRPAD */ +#define NPCM7XX_GIC_CPU_IF_ADDR (0xf03fe100) /* GIC within A9 */ +#define NPCM7XX_BOARD_SETUP_ADDR (0xffff1000) /* Boot ROM */ + +typedef struct NPCM7xxMachine { + MachineState parent; +} NPCM7xxMachine; + +#define TYPE_NPCM7XX_MACHINE MACHINE_TYPE_NAME("npcm7xx") +#define NPCM7XX_MACHINE(obj) \ + OBJECT_CHECK(NPCM7xxMachine, (obj), TYPE_NPCM7XX_MACHINE) + +typedef struct NPCM7xxMachineClass { + MachineClass parent; + + const char *soc_type; +} NPCM7xxMachineClass; + +#define NPCM7XX_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(NPCM7xxMachineClass, (klass), TYPE_NPCM7XX_MACHINE) +#define NPCM7XX_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(NPCM7xxMachineClass, (obj), TYPE_NPCM7XX_MACHINE) + +typedef struct NPCM7xxState { + DeviceState parent; + + ARMCPU cpu[NPCM7XX_MAX_NUM_CPUS]; + A9MPPrivState a9mpcore; + + MemoryRegion sram; + MemoryRegion irom; + MemoryRegion ram3; + MemoryRegion *dram; + + NPCM7xxGCRState gcr; + NPCM7xxCLKState clk; + NPCM7xxTimerCtrlState tim[3]; + NPCM7xxOTPState key_storage; + NPCM7xxOTPState fuse_array; + NPCM7xxMCState mc; + NPCM7xxRNGState rng; + NPCM7xxGPIOState gpio[8]; + EHCISysBusState ehci; + OHCISysBusState ohci; + NPCM7xxFIUState fiu[2]; +} NPCM7xxState; + +#define TYPE_NPCM7XX "npcm7xx" +#define NPCM7XX(obj) OBJECT_CHECK(NPCM7xxState, (obj), TYPE_NPCM7XX) + +#define TYPE_NPCM730 "npcm730" +#define TYPE_NPCM750 "npcm750" + +typedef struct NPCM7xxClass { + DeviceClass parent; + + /* Bitmask of modules that are permanently disabled on this chip. */ + uint32_t disabled_modules; + /* Number of CPU cores enabled in this SoC class (may be 1 or 2). */ + uint32_t num_cpus; +} NPCM7xxClass; + +#define NPCM7XX_CLASS(klass) \ + OBJECT_CLASS_CHECK(NPCM7xxClass, (klass), TYPE_NPCM7XX) +#define NPCM7XX_GET_CLASS(obj) \ + OBJECT_GET_CLASS(NPCM7xxClass, (obj), TYPE_NPCM7XX) + +/** + * npcm7xx_load_kernel - Loads memory with everything needed to boot + * @machine - The machine containing the SoC to be booted. + * @soc - The SoC containing the CPU to be booted. + * + * This will set up the ARM boot info structure for the specific NPCM7xx + * derivative and call arm_load_kernel() to set up loading of the kernel, etc. + * into memory, if requested by the user. + */ +void npcm7xx_load_kernel(MachineState *machine, NPCM7xxState *soc); + +#endif /* NPCM7XX_H */ diff --git a/include/hw/arm/nrf51.h b/include/hw/arm/nrf51.h new file mode 100644 index 000000000..de836beaa --- /dev/null +++ b/include/hw/arm/nrf51.h @@ -0,0 +1,46 @@ +/* + * Nordic Semiconductor nRF51 Series SOC Common Defines + * + * This file hosts generic defines used in various nRF51 peripheral devices. + * + * Reference Manual: http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.pdf + * Product Spec: http://infocenter.nordicsemi.com/pdf/nRF51822_PS_v3.1.pdf + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef NRF51_H +#define NRF51_H + +#define NRF51_FLASH_BASE 0x00000000 +#define NRF51_FICR_BASE 0x10000000 +#define NRF51_FICR_SIZE 0x00000100 +#define NRF51_UICR_BASE 0x10001000 +#define NRF51_SRAM_BASE 0x20000000 + +#define NRF51_IOMEM_BASE 0x40000000 +#define NRF51_IOMEM_SIZE 0x20000000 + +#define NRF51_PERIPHERAL_SIZE 0x00001000 +#define NRF51_UART_BASE 0x40002000 +#define NRF51_TWI_BASE 0x40003000 +#define NRF51_TIMER_BASE 0x40008000 +#define NRF51_RNG_BASE 0x4000D000 +#define NRF51_NVMC_BASE 0x4001E000 +#define NRF51_GPIO_BASE 0x50000000 + +#define NRF51_PRIVATE_BASE 0xF0000000 +#define NRF51_PRIVATE_SIZE 0x10000000 + +#define NRF51_PAGE_SIZE 1024 + +/* Trigger */ +#define NRF51_TRIGGER_TASK 0x01 + +/* Events */ +#define NRF51_EVENT_CLEAR 0x00 + +#endif diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h new file mode 100644 index 000000000..f8a6725b7 --- /dev/null +++ b/include/hw/arm/nrf51_soc.h @@ -0,0 +1,55 @@ +/* + * Nordic Semiconductor nRF51 SoC + * + * Copyright 2018 Joel Stanley + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef NRF51_SOC_H +#define NRF51_SOC_H + +#include "hw/sysbus.h" +#include "hw/arm/armv7m.h" +#include "hw/char/nrf51_uart.h" +#include "hw/misc/nrf51_rng.h" +#include "hw/gpio/nrf51_gpio.h" +#include "hw/nvram/nrf51_nvm.h" +#include "hw/timer/nrf51_timer.h" +#include "qom/object.h" + +#define TYPE_NRF51_SOC "nrf51-soc" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51State, NRF51_SOC) + +#define NRF51_NUM_TIMERS 3 + +struct NRF51State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + ARMv7MState cpu; + + NRF51UARTState uart; + NRF51RNGState rng; + NRF51NVMState nvm; + NRF51GPIOState gpio; + NRF51TimerState timer[NRF51_NUM_TIMERS]; + + MemoryRegion iomem; + MemoryRegion sram; + MemoryRegion flash; + MemoryRegion clock; + MemoryRegion twi; + + uint32_t sram_size; + uint32_t flash_size; + + MemoryRegion *board_memory; + + MemoryRegion container; + +}; + +#endif diff --git a/include/hw/arm/omap.h b/include/hw/arm/omap.h new file mode 100644 index 000000000..ff6a173f8 --- /dev/null +++ b/include/hw/arm/omap.h @@ -0,0 +1,1043 @@ +/* + * Texas Instruments OMAP processors. + * + * Copyright (C) 2006-2008 Andrzej Zaborowski + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_ARM_OMAP_H +#define HW_ARM_OMAP_H + +#include "exec/memory.h" +#include "hw/input/tsc2xxx.h" +#include "target/arm/cpu-qom.h" +#include "qemu/log.h" +#include "qom/object.h" + +# define OMAP_EMIFS_BASE 0x00000000 +# define OMAP2_Q0_BASE 0x00000000 +# define OMAP_CS0_BASE 0x00000000 +# define OMAP_CS1_BASE 0x04000000 +# define OMAP_CS2_BASE 0x08000000 +# define OMAP_CS3_BASE 0x0c000000 +# define OMAP_EMIFF_BASE 0x10000000 +# define OMAP_IMIF_BASE 0x20000000 +# define OMAP_LOCALBUS_BASE 0x30000000 +# define OMAP2_Q1_BASE 0x40000000 +# define OMAP2_L4_BASE 0x48000000 +# define OMAP2_SRAM_BASE 0x40200000 +# define OMAP2_L3_BASE 0x68000000 +# define OMAP2_Q2_BASE 0x80000000 +# define OMAP2_Q3_BASE 0xc0000000 +# define OMAP_MPUI_BASE 0xe1000000 + +# define OMAP730_SRAM_SIZE 0x00032000 +# define OMAP15XX_SRAM_SIZE 0x00030000 +# define OMAP16XX_SRAM_SIZE 0x00004000 +# define OMAP1611_SRAM_SIZE 0x0003e800 +# define OMAP242X_SRAM_SIZE 0x000a0000 +# define OMAP243X_SRAM_SIZE 0x00010000 +# define OMAP_CS0_SIZE 0x04000000 +# define OMAP_CS1_SIZE 0x04000000 +# define OMAP_CS2_SIZE 0x04000000 +# define OMAP_CS3_SIZE 0x04000000 + +/* omap_clk.c */ +struct omap_mpu_state_s; +typedef struct clk *omap_clk; +omap_clk omap_findclk(struct omap_mpu_state_s *mpu, const char *name); +void omap_clk_init(struct omap_mpu_state_s *mpu); +void omap_clk_adduser(struct clk *clk, qemu_irq user); +void omap_clk_get(omap_clk clk); +void omap_clk_put(omap_clk clk); +void omap_clk_onoff(omap_clk clk, int on); +void omap_clk_canidle(omap_clk clk, int can); +void omap_clk_setrate(omap_clk clk, int divide, int multiply); +int64_t omap_clk_getrate(omap_clk clk); +void omap_clk_reparent(omap_clk clk, omap_clk parent); + +/* omap_intc.c */ +#define TYPE_OMAP_INTC "common-omap-intc" +typedef struct omap_intr_handler_s omap_intr_handler; +DECLARE_INSTANCE_CHECKER(omap_intr_handler, OMAP_INTC, + TYPE_OMAP_INTC) + + +/* + * TODO: Ideally we should have a clock framework that + * let us wire these clocks up with QOM properties or links. + * + * qdev should support a generic means of defining a 'port' with + * an arbitrary interface for connecting two devices. Then we + * could reframe the omap clock API in terms of clock ports, + * and get some type safety. For now the best qdev provides is + * passing an arbitrary pointer. + * (It's not possible to pass in the string which is the clock + * name, because this device does not have the necessary information + * (ie the struct omap_mpu_state_s*) to do the clockname to pointer + * translation.) + */ +void omap_intc_set_iclk(omap_intr_handler *intc, omap_clk clk); +void omap_intc_set_fclk(omap_intr_handler *intc, omap_clk clk); + +/* omap_i2c.c */ +#define TYPE_OMAP_I2C "omap_i2c" +OBJECT_DECLARE_SIMPLE_TYPE(OMAPI2CState, OMAP_I2C) + + +/* TODO: clock framework (see above) */ +void omap_i2c_set_iclk(OMAPI2CState *i2c, omap_clk clk); +void omap_i2c_set_fclk(OMAPI2CState *i2c, omap_clk clk); + +/* omap_gpio.c */ +#define TYPE_OMAP1_GPIO "omap-gpio" +DECLARE_INSTANCE_CHECKER(struct omap_gpif_s, OMAP1_GPIO, + TYPE_OMAP1_GPIO) + +#define TYPE_OMAP2_GPIO "omap2-gpio" +DECLARE_INSTANCE_CHECKER(struct omap2_gpif_s, OMAP2_GPIO, + TYPE_OMAP2_GPIO) + +typedef struct omap_gpif_s omap_gpif; +typedef struct omap2_gpif_s omap2_gpif; + +/* TODO: clock framework (see above) */ +void omap_gpio_set_clk(omap_gpif *gpio, omap_clk clk); + +void omap2_gpio_set_iclk(omap2_gpif *gpio, omap_clk clk); +void omap2_gpio_set_fclk(omap2_gpif *gpio, uint8_t i, omap_clk clk); + +/* OMAP2 l4 Interconnect */ +struct omap_l4_s; +struct omap_l4_region_s { + hwaddr offset; + size_t size; + int access; +}; +struct omap_l4_agent_info_s { + int ta; + int region; + int regions; + int ta_region; +}; +struct omap_target_agent_s { + MemoryRegion iomem; + struct omap_l4_s *bus; + int regions; + const struct omap_l4_region_s *start; + hwaddr base; + uint32_t component; + uint32_t control; + uint32_t status; +}; +struct omap_l4_s *omap_l4_init(MemoryRegion *address_space, + hwaddr base, int ta_num); + +struct omap_target_agent_s; +struct omap_target_agent_s *omap_l4ta_get( + struct omap_l4_s *bus, + const struct omap_l4_region_s *regions, + const struct omap_l4_agent_info_s *agents, + int cs); +hwaddr omap_l4_attach(struct omap_target_agent_s *ta, + int region, MemoryRegion *mr); +hwaddr omap_l4_region_base(struct omap_target_agent_s *ta, + int region); +hwaddr omap_l4_region_size(struct omap_target_agent_s *ta, + int region); + +/* OMAP2 SDRAM controller */ +struct omap_sdrc_s; +struct omap_sdrc_s *omap_sdrc_init(MemoryRegion *sysmem, + hwaddr base); +void omap_sdrc_reset(struct omap_sdrc_s *s); + +/* OMAP2 general purpose memory controller */ +struct omap_gpmc_s; +struct omap_gpmc_s *omap_gpmc_init(struct omap_mpu_state_s *mpu, + hwaddr base, + qemu_irq irq, qemu_irq drq); +void omap_gpmc_reset(struct omap_gpmc_s *s); +void omap_gpmc_attach(struct omap_gpmc_s *s, int cs, MemoryRegion *iomem); +void omap_gpmc_attach_nand(struct omap_gpmc_s *s, int cs, DeviceState *nand); + +/* + * Common IRQ numbers for level 1 interrupt handler + * See /usr/include/asm-arm/arch-omap/irqs.h in Linux. + */ +# define OMAP_INT_CAMERA 1 +# define OMAP_INT_FIQ 3 +# define OMAP_INT_RTDX 6 +# define OMAP_INT_DSP_MMU_ABORT 7 +# define OMAP_INT_HOST 8 +# define OMAP_INT_ABORT 9 +# define OMAP_INT_BRIDGE_PRIV 13 +# define OMAP_INT_GPIO_BANK1 14 +# define OMAP_INT_UART3 15 +# define OMAP_INT_TIMER3 16 +# define OMAP_INT_DMA_CH0_6 19 +# define OMAP_INT_DMA_CH1_7 20 +# define OMAP_INT_DMA_CH2_8 21 +# define OMAP_INT_DMA_CH3 22 +# define OMAP_INT_DMA_CH4 23 +# define OMAP_INT_DMA_CH5 24 +# define OMAP_INT_DMA_LCD 25 +# define OMAP_INT_TIMER1 26 +# define OMAP_INT_WD_TIMER 27 +# define OMAP_INT_BRIDGE_PUB 28 +# define OMAP_INT_TIMER2 30 +# define OMAP_INT_LCD_CTRL 31 + +/* + * Common OMAP-15xx IRQ numbers for level 1 interrupt handler + */ +# define OMAP_INT_15XX_IH2_IRQ 0 +# define OMAP_INT_15XX_LB_MMU 17 +# define OMAP_INT_15XX_LOCAL_BUS 29 + +/* + * OMAP-1510 specific IRQ numbers for level 1 interrupt handler + */ +# define OMAP_INT_1510_SPI_TX 4 +# define OMAP_INT_1510_SPI_RX 5 +# define OMAP_INT_1510_DSP_MAILBOX1 10 +# define OMAP_INT_1510_DSP_MAILBOX2 11 + +/* + * OMAP-310 specific IRQ numbers for level 1 interrupt handler + */ +# define OMAP_INT_310_McBSP2_TX 4 +# define OMAP_INT_310_McBSP2_RX 5 +# define OMAP_INT_310_HSB_MAILBOX1 12 +# define OMAP_INT_310_HSAB_MMU 18 + +/* + * OMAP-1610 specific IRQ numbers for level 1 interrupt handler + */ +# define OMAP_INT_1610_IH2_IRQ 0 +# define OMAP_INT_1610_IH2_FIQ 2 +# define OMAP_INT_1610_McBSP2_TX 4 +# define OMAP_INT_1610_McBSP2_RX 5 +# define OMAP_INT_1610_DSP_MAILBOX1 10 +# define OMAP_INT_1610_DSP_MAILBOX2 11 +# define OMAP_INT_1610_LCD_LINE 12 +# define OMAP_INT_1610_GPTIMER1 17 +# define OMAP_INT_1610_GPTIMER2 18 +# define OMAP_INT_1610_SSR_FIFO_0 29 + +/* + * OMAP-730 specific IRQ numbers for level 1 interrupt handler + */ +# define OMAP_INT_730_IH2_FIQ 0 +# define OMAP_INT_730_IH2_IRQ 1 +# define OMAP_INT_730_USB_NON_ISO 2 +# define OMAP_INT_730_USB_ISO 3 +# define OMAP_INT_730_ICR 4 +# define OMAP_INT_730_EAC 5 +# define OMAP_INT_730_GPIO_BANK1 6 +# define OMAP_INT_730_GPIO_BANK2 7 +# define OMAP_INT_730_GPIO_BANK3 8 +# define OMAP_INT_730_McBSP2TX 10 +# define OMAP_INT_730_McBSP2RX 11 +# define OMAP_INT_730_McBSP2RX_OVF 12 +# define OMAP_INT_730_LCD_LINE 14 +# define OMAP_INT_730_GSM_PROTECT 15 +# define OMAP_INT_730_TIMER3 16 +# define OMAP_INT_730_GPIO_BANK5 17 +# define OMAP_INT_730_GPIO_BANK6 18 +# define OMAP_INT_730_SPGIO_WR 29 + +/* + * Common IRQ numbers for level 2 interrupt handler + */ +# define OMAP_INT_KEYBOARD 1 +# define OMAP_INT_uWireTX 2 +# define OMAP_INT_uWireRX 3 +# define OMAP_INT_I2C 4 +# define OMAP_INT_MPUIO 5 +# define OMAP_INT_USB_HHC_1 6 +# define OMAP_INT_McBSP3TX 10 +# define OMAP_INT_McBSP3RX 11 +# define OMAP_INT_McBSP1TX 12 +# define OMAP_INT_McBSP1RX 13 +# define OMAP_INT_UART1 14 +# define OMAP_INT_UART2 15 +# define OMAP_INT_USB_W2FC 20 +# define OMAP_INT_1WIRE 21 +# define OMAP_INT_OS_TIMER 22 +# define OMAP_INT_OQN 23 +# define OMAP_INT_GAUGE_32K 24 +# define OMAP_INT_RTC_TIMER 25 +# define OMAP_INT_RTC_ALARM 26 +# define OMAP_INT_DSP_MMU 28 + +/* + * OMAP-1510 specific IRQ numbers for level 2 interrupt handler + */ +# define OMAP_INT_1510_BT_MCSI1TX 16 +# define OMAP_INT_1510_BT_MCSI1RX 17 +# define OMAP_INT_1510_SoSSI_MATCH 19 +# define OMAP_INT_1510_MEM_STICK 27 +# define OMAP_INT_1510_COM_SPI_RO 31 + +/* + * OMAP-310 specific IRQ numbers for level 2 interrupt handler + */ +# define OMAP_INT_310_FAC 0 +# define OMAP_INT_310_USB_HHC_2 7 +# define OMAP_INT_310_MCSI1_FE 16 +# define OMAP_INT_310_MCSI2_FE 17 +# define OMAP_INT_310_USB_W2FC_ISO 29 +# define OMAP_INT_310_USB_W2FC_NON_ISO 30 +# define OMAP_INT_310_McBSP2RX_OF 31 + +/* + * OMAP-1610 specific IRQ numbers for level 2 interrupt handler + */ +# define OMAP_INT_1610_FAC 0 +# define OMAP_INT_1610_USB_HHC_2 7 +# define OMAP_INT_1610_USB_OTG 8 +# define OMAP_INT_1610_SoSSI 9 +# define OMAP_INT_1610_BT_MCSI1TX 16 +# define OMAP_INT_1610_BT_MCSI1RX 17 +# define OMAP_INT_1610_SoSSI_MATCH 19 +# define OMAP_INT_1610_MEM_STICK 27 +# define OMAP_INT_1610_McBSP2RX_OF 31 +# define OMAP_INT_1610_STI 32 +# define OMAP_INT_1610_STI_WAKEUP 33 +# define OMAP_INT_1610_GPTIMER3 34 +# define OMAP_INT_1610_GPTIMER4 35 +# define OMAP_INT_1610_GPTIMER5 36 +# define OMAP_INT_1610_GPTIMER6 37 +# define OMAP_INT_1610_GPTIMER7 38 +# define OMAP_INT_1610_GPTIMER8 39 +# define OMAP_INT_1610_GPIO_BANK2 40 +# define OMAP_INT_1610_GPIO_BANK3 41 +# define OMAP_INT_1610_MMC2 42 +# define OMAP_INT_1610_CF 43 +# define OMAP_INT_1610_WAKE_UP_REQ 46 +# define OMAP_INT_1610_GPIO_BANK4 48 +# define OMAP_INT_1610_SPI 49 +# define OMAP_INT_1610_DMA_CH6 53 +# define OMAP_INT_1610_DMA_CH7 54 +# define OMAP_INT_1610_DMA_CH8 55 +# define OMAP_INT_1610_DMA_CH9 56 +# define OMAP_INT_1610_DMA_CH10 57 +# define OMAP_INT_1610_DMA_CH11 58 +# define OMAP_INT_1610_DMA_CH12 59 +# define OMAP_INT_1610_DMA_CH13 60 +# define OMAP_INT_1610_DMA_CH14 61 +# define OMAP_INT_1610_DMA_CH15 62 +# define OMAP_INT_1610_NAND 63 + +/* + * OMAP-730 specific IRQ numbers for level 2 interrupt handler + */ +# define OMAP_INT_730_HW_ERRORS 0 +# define OMAP_INT_730_NFIQ_PWR_FAIL 1 +# define OMAP_INT_730_CFCD 2 +# define OMAP_INT_730_CFIREQ 3 +# define OMAP_INT_730_I2C 4 +# define OMAP_INT_730_PCC 5 +# define OMAP_INT_730_MPU_EXT_NIRQ 6 +# define OMAP_INT_730_SPI_100K_1 7 +# define OMAP_INT_730_SYREN_SPI 8 +# define OMAP_INT_730_VLYNQ 9 +# define OMAP_INT_730_GPIO_BANK4 10 +# define OMAP_INT_730_McBSP1TX 11 +# define OMAP_INT_730_McBSP1RX 12 +# define OMAP_INT_730_McBSP1RX_OF 13 +# define OMAP_INT_730_UART_MODEM_IRDA_2 14 +# define OMAP_INT_730_UART_MODEM_1 15 +# define OMAP_INT_730_MCSI 16 +# define OMAP_INT_730_uWireTX 17 +# define OMAP_INT_730_uWireRX 18 +# define OMAP_INT_730_SMC_CD 19 +# define OMAP_INT_730_SMC_IREQ 20 +# define OMAP_INT_730_HDQ_1WIRE 21 +# define OMAP_INT_730_TIMER32K 22 +# define OMAP_INT_730_MMC_SDIO 23 +# define OMAP_INT_730_UPLD 24 +# define OMAP_INT_730_USB_HHC_1 27 +# define OMAP_INT_730_USB_HHC_2 28 +# define OMAP_INT_730_USB_GENI 29 +# define OMAP_INT_730_USB_OTG 30 +# define OMAP_INT_730_CAMERA_IF 31 +# define OMAP_INT_730_RNG 32 +# define OMAP_INT_730_DUAL_MODE_TIMER 33 +# define OMAP_INT_730_DBB_RF_EN 34 +# define OMAP_INT_730_MPUIO_KEYPAD 35 +# define OMAP_INT_730_SHA1_MD5 36 +# define OMAP_INT_730_SPI_100K_2 37 +# define OMAP_INT_730_RNG_IDLE 38 +# define OMAP_INT_730_MPUIO 39 +# define OMAP_INT_730_LLPC_LCD_CTRL_OFF 40 +# define OMAP_INT_730_LLPC_OE_FALLING 41 +# define OMAP_INT_730_LLPC_OE_RISING 42 +# define OMAP_INT_730_LLPC_VSYNC 43 +# define OMAP_INT_730_WAKE_UP_REQ 46 +# define OMAP_INT_730_DMA_CH6 53 +# define OMAP_INT_730_DMA_CH7 54 +# define OMAP_INT_730_DMA_CH8 55 +# define OMAP_INT_730_DMA_CH9 56 +# define OMAP_INT_730_DMA_CH10 57 +# define OMAP_INT_730_DMA_CH11 58 +# define OMAP_INT_730_DMA_CH12 59 +# define OMAP_INT_730_DMA_CH13 60 +# define OMAP_INT_730_DMA_CH14 61 +# define OMAP_INT_730_DMA_CH15 62 +# define OMAP_INT_730_NAND 63 + +/* + * OMAP-24xx common IRQ numbers + */ +# define OMAP_INT_24XX_STI 4 +# define OMAP_INT_24XX_SYS_NIRQ 7 +# define OMAP_INT_24XX_L3_IRQ 10 +# define OMAP_INT_24XX_PRCM_MPU_IRQ 11 +# define OMAP_INT_24XX_SDMA_IRQ0 12 +# define OMAP_INT_24XX_SDMA_IRQ1 13 +# define OMAP_INT_24XX_SDMA_IRQ2 14 +# define OMAP_INT_24XX_SDMA_IRQ3 15 +# define OMAP_INT_243X_MCBSP2_IRQ 16 +# define OMAP_INT_243X_MCBSP3_IRQ 17 +# define OMAP_INT_243X_MCBSP4_IRQ 18 +# define OMAP_INT_243X_MCBSP5_IRQ 19 +# define OMAP_INT_24XX_GPMC_IRQ 20 +# define OMAP_INT_24XX_GUFFAW_IRQ 21 +# define OMAP_INT_24XX_IVA_IRQ 22 +# define OMAP_INT_24XX_EAC_IRQ 23 +# define OMAP_INT_24XX_CAM_IRQ 24 +# define OMAP_INT_24XX_DSS_IRQ 25 +# define OMAP_INT_24XX_MAIL_U0_MPU 26 +# define OMAP_INT_24XX_DSP_UMA 27 +# define OMAP_INT_24XX_DSP_MMU 28 +# define OMAP_INT_24XX_GPIO_BANK1 29 +# define OMAP_INT_24XX_GPIO_BANK2 30 +# define OMAP_INT_24XX_GPIO_BANK3 31 +# define OMAP_INT_24XX_GPIO_BANK4 32 +# define OMAP_INT_243X_GPIO_BANK5 33 +# define OMAP_INT_24XX_MAIL_U3_MPU 34 +# define OMAP_INT_24XX_WDT3 35 +# define OMAP_INT_24XX_WDT4 36 +# define OMAP_INT_24XX_GPTIMER1 37 +# define OMAP_INT_24XX_GPTIMER2 38 +# define OMAP_INT_24XX_GPTIMER3 39 +# define OMAP_INT_24XX_GPTIMER4 40 +# define OMAP_INT_24XX_GPTIMER5 41 +# define OMAP_INT_24XX_GPTIMER6 42 +# define OMAP_INT_24XX_GPTIMER7 43 +# define OMAP_INT_24XX_GPTIMER8 44 +# define OMAP_INT_24XX_GPTIMER9 45 +# define OMAP_INT_24XX_GPTIMER10 46 +# define OMAP_INT_24XX_GPTIMER11 47 +# define OMAP_INT_24XX_GPTIMER12 48 +# define OMAP_INT_24XX_PKA_IRQ 50 +# define OMAP_INT_24XX_SHA1MD5_IRQ 51 +# define OMAP_INT_24XX_RNG_IRQ 52 +# define OMAP_INT_24XX_MG_IRQ 53 +# define OMAP_INT_24XX_I2C1_IRQ 56 +# define OMAP_INT_24XX_I2C2_IRQ 57 +# define OMAP_INT_24XX_MCBSP1_IRQ_TX 59 +# define OMAP_INT_24XX_MCBSP1_IRQ_RX 60 +# define OMAP_INT_24XX_MCBSP2_IRQ_TX 62 +# define OMAP_INT_24XX_MCBSP2_IRQ_RX 63 +# define OMAP_INT_243X_MCBSP1_IRQ 64 +# define OMAP_INT_24XX_MCSPI1_IRQ 65 +# define OMAP_INT_24XX_MCSPI2_IRQ 66 +# define OMAP_INT_24XX_SSI1_IRQ0 67 +# define OMAP_INT_24XX_SSI1_IRQ1 68 +# define OMAP_INT_24XX_SSI2_IRQ0 69 +# define OMAP_INT_24XX_SSI2_IRQ1 70 +# define OMAP_INT_24XX_SSI_GDD_IRQ 71 +# define OMAP_INT_24XX_UART1_IRQ 72 +# define OMAP_INT_24XX_UART2_IRQ 73 +# define OMAP_INT_24XX_UART3_IRQ 74 +# define OMAP_INT_24XX_USB_IRQ_GEN 75 +# define OMAP_INT_24XX_USB_IRQ_NISO 76 +# define OMAP_INT_24XX_USB_IRQ_ISO 77 +# define OMAP_INT_24XX_USB_IRQ_HGEN 78 +# define OMAP_INT_24XX_USB_IRQ_HSOF 79 +# define OMAP_INT_24XX_USB_IRQ_OTG 80 +# define OMAP_INT_24XX_VLYNQ_IRQ 81 +# define OMAP_INT_24XX_MMC_IRQ 83 +# define OMAP_INT_24XX_MS_IRQ 84 +# define OMAP_INT_24XX_FAC_IRQ 85 +# define OMAP_INT_24XX_MCSPI3_IRQ 91 +# define OMAP_INT_243X_HS_USB_MC 92 +# define OMAP_INT_243X_HS_USB_DMA 93 +# define OMAP_INT_243X_CARKIT 94 +# define OMAP_INT_34XX_GPTIMER12 95 + +/* omap_dma.c */ +enum omap_dma_model { + omap_dma_3_0, + omap_dma_3_1, + omap_dma_3_2, + omap_dma_4, +}; + +struct soc_dma_s; +struct soc_dma_s *omap_dma_init(hwaddr base, qemu_irq *irqs, + MemoryRegion *sysmem, + qemu_irq lcd_irq, struct omap_mpu_state_s *mpu, omap_clk clk, + enum omap_dma_model model); +struct soc_dma_s *omap_dma4_init(hwaddr base, qemu_irq *irqs, + MemoryRegion *sysmem, + struct omap_mpu_state_s *mpu, int fifo, + int chans, omap_clk iclk, omap_clk fclk); +void omap_dma_reset(struct soc_dma_s *s); + +struct dma_irq_map { + int ih; + int intr; +}; + +/* Only used in OMAP DMA 3.x gigacells */ +enum omap_dma_port { + emiff = 0, + emifs, + imif, /* omap16xx: ocp_t1 */ + tipb, + local, /* omap16xx: ocp_t2 */ + tipb_mpui, + __omap_dma_port_last, +}; + +typedef enum { + constant = 0, + post_incremented, + single_index, + double_index, +} omap_dma_addressing_t; + +/* Only used in OMAP DMA 3.x gigacells */ +struct omap_dma_lcd_channel_s { + enum omap_dma_port src; + hwaddr src_f1_top; + hwaddr src_f1_bottom; + hwaddr src_f2_top; + hwaddr src_f2_bottom; + + /* Used in OMAP DMA 3.2 gigacell */ + unsigned char brust_f1; + unsigned char pack_f1; + unsigned char data_type_f1; + unsigned char brust_f2; + unsigned char pack_f2; + unsigned char data_type_f2; + unsigned char end_prog; + unsigned char repeat; + unsigned char auto_init; + unsigned char priority; + unsigned char fs; + unsigned char running; + unsigned char bs; + unsigned char omap_3_1_compatible_disable; + unsigned char dst; + unsigned char lch_type; + int16_t element_index_f1; + int16_t element_index_f2; + int32_t frame_index_f1; + int32_t frame_index_f2; + uint16_t elements_f1; + uint16_t frames_f1; + uint16_t elements_f2; + uint16_t frames_f2; + omap_dma_addressing_t mode_f1; + omap_dma_addressing_t mode_f2; + + /* Destination port is fixed. */ + int interrupts; + int condition; + int dual; + + int current_frame; + hwaddr phys_framebuffer[2]; + qemu_irq irq; + struct omap_mpu_state_s *mpu; +} *omap_dma_get_lcdch(struct soc_dma_s *s); + +/* + * DMA request numbers for OMAP1 + * See /usr/include/asm-arm/arch-omap/dma.h in Linux. + */ +# define OMAP_DMA_NO_DEVICE 0 +# define OMAP_DMA_MCSI1_TX 1 +# define OMAP_DMA_MCSI1_RX 2 +# define OMAP_DMA_I2C_RX 3 +# define OMAP_DMA_I2C_TX 4 +# define OMAP_DMA_EXT_NDMA_REQ0 5 +# define OMAP_DMA_EXT_NDMA_REQ1 6 +# define OMAP_DMA_UWIRE_TX 7 +# define OMAP_DMA_MCBSP1_TX 8 +# define OMAP_DMA_MCBSP1_RX 9 +# define OMAP_DMA_MCBSP3_TX 10 +# define OMAP_DMA_MCBSP3_RX 11 +# define OMAP_DMA_UART1_TX 12 +# define OMAP_DMA_UART1_RX 13 +# define OMAP_DMA_UART2_TX 14 +# define OMAP_DMA_UART2_RX 15 +# define OMAP_DMA_MCBSP2_TX 16 +# define OMAP_DMA_MCBSP2_RX 17 +# define OMAP_DMA_UART3_TX 18 +# define OMAP_DMA_UART3_RX 19 +# define OMAP_DMA_CAMERA_IF_RX 20 +# define OMAP_DMA_MMC_TX 21 +# define OMAP_DMA_MMC_RX 22 +# define OMAP_DMA_NAND 23 /* Not in OMAP310 */ +# define OMAP_DMA_IRQ_LCD_LINE 24 /* Not in OMAP310 */ +# define OMAP_DMA_MEMORY_STICK 25 /* Not in OMAP310 */ +# define OMAP_DMA_USB_W2FC_RX0 26 +# define OMAP_DMA_USB_W2FC_RX1 27 +# define OMAP_DMA_USB_W2FC_RX2 28 +# define OMAP_DMA_USB_W2FC_TX0 29 +# define OMAP_DMA_USB_W2FC_TX1 30 +# define OMAP_DMA_USB_W2FC_TX2 31 + +/* These are only for 1610 */ +# define OMAP_DMA_CRYPTO_DES_IN 32 +# define OMAP_DMA_SPI_TX 33 +# define OMAP_DMA_SPI_RX 34 +# define OMAP_DMA_CRYPTO_HASH 35 +# define OMAP_DMA_CCP_ATTN 36 +# define OMAP_DMA_CCP_FIFO_NOT_EMPTY 37 +# define OMAP_DMA_CMT_APE_TX_CHAN_0 38 +# define OMAP_DMA_CMT_APE_RV_CHAN_0 39 +# define OMAP_DMA_CMT_APE_TX_CHAN_1 40 +# define OMAP_DMA_CMT_APE_RV_CHAN_1 41 +# define OMAP_DMA_CMT_APE_TX_CHAN_2 42 +# define OMAP_DMA_CMT_APE_RV_CHAN_2 43 +# define OMAP_DMA_CMT_APE_TX_CHAN_3 44 +# define OMAP_DMA_CMT_APE_RV_CHAN_3 45 +# define OMAP_DMA_CMT_APE_TX_CHAN_4 46 +# define OMAP_DMA_CMT_APE_RV_CHAN_4 47 +# define OMAP_DMA_CMT_APE_TX_CHAN_5 48 +# define OMAP_DMA_CMT_APE_RV_CHAN_5 49 +# define OMAP_DMA_CMT_APE_TX_CHAN_6 50 +# define OMAP_DMA_CMT_APE_RV_CHAN_6 51 +# define OMAP_DMA_CMT_APE_TX_CHAN_7 52 +# define OMAP_DMA_CMT_APE_RV_CHAN_7 53 +# define OMAP_DMA_MMC2_TX 54 +# define OMAP_DMA_MMC2_RX 55 +# define OMAP_DMA_CRYPTO_DES_OUT 56 + +/* + * DMA request numbers for the OMAP2 + */ +# define OMAP24XX_DMA_NO_DEVICE 0 +# define OMAP24XX_DMA_XTI_DMA 1 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_EXT_DMAREQ0 2 +# define OMAP24XX_DMA_EXT_DMAREQ1 3 +# define OMAP24XX_DMA_GPMC 4 +# define OMAP24XX_DMA_GFX 5 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_DSS 6 +# define OMAP24XX_DMA_VLYNQ_TX 7 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_CWT 8 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_AES_TX 9 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_AES_RX 10 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_DES_TX 11 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_DES_RX 12 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_SHA1MD5_RX 13 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_EXT_DMAREQ2 14 +# define OMAP24XX_DMA_EXT_DMAREQ3 15 +# define OMAP24XX_DMA_EXT_DMAREQ4 16 +# define OMAP24XX_DMA_EAC_AC_RD 17 +# define OMAP24XX_DMA_EAC_AC_WR 18 +# define OMAP24XX_DMA_EAC_MD_UL_RD 19 +# define OMAP24XX_DMA_EAC_MD_UL_WR 20 +# define OMAP24XX_DMA_EAC_MD_DL_RD 21 +# define OMAP24XX_DMA_EAC_MD_DL_WR 22 +# define OMAP24XX_DMA_EAC_BT_UL_RD 23 +# define OMAP24XX_DMA_EAC_BT_UL_WR 24 +# define OMAP24XX_DMA_EAC_BT_DL_RD 25 +# define OMAP24XX_DMA_EAC_BT_DL_WR 26 +# define OMAP24XX_DMA_I2C1_TX 27 +# define OMAP24XX_DMA_I2C1_RX 28 +# define OMAP24XX_DMA_I2C2_TX 29 +# define OMAP24XX_DMA_I2C2_RX 30 +# define OMAP24XX_DMA_MCBSP1_TX 31 +# define OMAP24XX_DMA_MCBSP1_RX 32 +# define OMAP24XX_DMA_MCBSP2_TX 33 +# define OMAP24XX_DMA_MCBSP2_RX 34 +# define OMAP24XX_DMA_SPI1_TX0 35 +# define OMAP24XX_DMA_SPI1_RX0 36 +# define OMAP24XX_DMA_SPI1_TX1 37 +# define OMAP24XX_DMA_SPI1_RX1 38 +# define OMAP24XX_DMA_SPI1_TX2 39 +# define OMAP24XX_DMA_SPI1_RX2 40 +# define OMAP24XX_DMA_SPI1_TX3 41 +# define OMAP24XX_DMA_SPI1_RX3 42 +# define OMAP24XX_DMA_SPI2_TX0 43 +# define OMAP24XX_DMA_SPI2_RX0 44 +# define OMAP24XX_DMA_SPI2_TX1 45 +# define OMAP24XX_DMA_SPI2_RX1 46 + +# define OMAP24XX_DMA_UART1_TX 49 +# define OMAP24XX_DMA_UART1_RX 50 +# define OMAP24XX_DMA_UART2_TX 51 +# define OMAP24XX_DMA_UART2_RX 52 +# define OMAP24XX_DMA_UART3_TX 53 +# define OMAP24XX_DMA_UART3_RX 54 +# define OMAP24XX_DMA_USB_W2FC_TX0 55 +# define OMAP24XX_DMA_USB_W2FC_RX0 56 +# define OMAP24XX_DMA_USB_W2FC_TX1 57 +# define OMAP24XX_DMA_USB_W2FC_RX1 58 +# define OMAP24XX_DMA_USB_W2FC_TX2 59 +# define OMAP24XX_DMA_USB_W2FC_RX2 60 +# define OMAP24XX_DMA_MMC1_TX 61 +# define OMAP24XX_DMA_MMC1_RX 62 +# define OMAP24XX_DMA_MS 63 /* Not in OMAP2420 */ +# define OMAP24XX_DMA_EXT_DMAREQ5 64 + +/* omap[123].c */ +/* OMAP2 gp timer */ +struct omap_gp_timer_s; +struct omap_gp_timer_s *omap_gp_timer_init(struct omap_target_agent_s *ta, + qemu_irq irq, omap_clk fclk, omap_clk iclk); +void omap_gp_timer_reset(struct omap_gp_timer_s *s); + +/* OMAP2 sysctimer */ +struct omap_synctimer_s; +struct omap_synctimer_s *omap_synctimer_init(struct omap_target_agent_s *ta, + struct omap_mpu_state_s *mpu, omap_clk fclk, omap_clk iclk); +void omap_synctimer_reset(struct omap_synctimer_s *s); + +struct omap_uart_s; +struct omap_uart_s *omap_uart_init(hwaddr base, + qemu_irq irq, omap_clk fclk, omap_clk iclk, + qemu_irq txdma, qemu_irq rxdma, + const char *label, Chardev *chr); +struct omap_uart_s *omap2_uart_init(MemoryRegion *sysmem, + struct omap_target_agent_s *ta, + qemu_irq irq, omap_clk fclk, omap_clk iclk, + qemu_irq txdma, qemu_irq rxdma, + const char *label, Chardev *chr); +void omap_uart_reset(struct omap_uart_s *s); +void omap_uart_attach(struct omap_uart_s *s, Chardev *chr); + +struct omap_mpuio_s; +qemu_irq *omap_mpuio_in_get(struct omap_mpuio_s *s); +void omap_mpuio_out_set(struct omap_mpuio_s *s, int line, qemu_irq handler); +void omap_mpuio_key(struct omap_mpuio_s *s, int row, int col, int down); + +struct omap_uwire_s; +void omap_uwire_attach(struct omap_uwire_s *s, + uWireSlave *slave, int chipselect); + +/* OMAP2 spi */ +struct omap_mcspi_s; +struct omap_mcspi_s *omap_mcspi_init(struct omap_target_agent_s *ta, int chnum, + qemu_irq irq, qemu_irq *drq, omap_clk fclk, omap_clk iclk); +void omap_mcspi_attach(struct omap_mcspi_s *s, + uint32_t (*txrx)(void *opaque, uint32_t, int), void *opaque, + int chipselect); +void omap_mcspi_reset(struct omap_mcspi_s *s); + +struct I2SCodec { + void *opaque; + + /* The CPU can call this if it is generating the clock signal on the + * i2s port. The CODEC can ignore it if it is set up as a clock + * master and generates its own clock. */ + void (*set_rate)(void *opaque, int in, int out); + + void (*tx_swallow)(void *opaque); + qemu_irq rx_swallow; + qemu_irq tx_start; + + int tx_rate; + int cts; + int rx_rate; + int rts; + + struct i2s_fifo_s { + uint8_t *fifo; + int len; + int start; + int size; + } in, out; +}; +struct omap_mcbsp_s; +void omap_mcbsp_i2s_attach(struct omap_mcbsp_s *s, I2SCodec *slave); + +void omap_tap_init(struct omap_target_agent_s *ta, + struct omap_mpu_state_s *mpu); + +/* omap_lcdc.c */ +struct omap_lcd_panel_s; +void omap_lcdc_reset(struct omap_lcd_panel_s *s); +struct omap_lcd_panel_s *omap_lcdc_init(MemoryRegion *sysmem, + hwaddr base, + qemu_irq irq, + struct omap_dma_lcd_channel_s *dma, + omap_clk clk); + +/* omap_dss.c */ +struct rfbi_chip_s { + void *opaque; + void (*write)(void *opaque, int dc, uint16_t value); + void (*block)(void *opaque, int dc, void *buf, size_t len, int pitch); + uint16_t (*read)(void *opaque, int dc); +}; +struct omap_dss_s; +void omap_dss_reset(struct omap_dss_s *s); +struct omap_dss_s *omap_dss_init(struct omap_target_agent_s *ta, + MemoryRegion *sysmem, + hwaddr l3_base, + qemu_irq irq, qemu_irq drq, + omap_clk fck1, omap_clk fck2, omap_clk ck54m, + omap_clk ick1, omap_clk ick2); +void omap_rfbi_attach(struct omap_dss_s *s, int cs, struct rfbi_chip_s *chip); + +/* omap_mmc.c */ +struct omap_mmc_s; +struct omap_mmc_s *omap_mmc_init(hwaddr base, + MemoryRegion *sysmem, + BlockBackend *blk, + qemu_irq irq, qemu_irq dma[], omap_clk clk); +struct omap_mmc_s *omap2_mmc_init(struct omap_target_agent_s *ta, + BlockBackend *blk, qemu_irq irq, qemu_irq dma[], + omap_clk fclk, omap_clk iclk); +void omap_mmc_reset(struct omap_mmc_s *s); +void omap_mmc_handlers(struct omap_mmc_s *s, qemu_irq ro, qemu_irq cover); +void omap_mmc_enable(struct omap_mmc_s *s, int enable); + +/* omap_i2c.c */ +I2CBus *omap_i2c_bus(DeviceState *omap_i2c); + +# define cpu_is_omap310(cpu) (cpu->mpu_model == omap310) +# define cpu_is_omap1510(cpu) (cpu->mpu_model == omap1510) +# define cpu_is_omap1610(cpu) (cpu->mpu_model == omap1610) +# define cpu_is_omap1710(cpu) (cpu->mpu_model == omap1710) +# define cpu_is_omap2410(cpu) (cpu->mpu_model == omap2410) +# define cpu_is_omap2420(cpu) (cpu->mpu_model == omap2420) +# define cpu_is_omap2430(cpu) (cpu->mpu_model == omap2430) +# define cpu_is_omap3430(cpu) (cpu->mpu_model == omap3430) +# define cpu_is_omap3630(cpu) (cpu->mpu_model == omap3630) + +# define cpu_is_omap15xx(cpu) \ + (cpu_is_omap310(cpu) || cpu_is_omap1510(cpu)) +# define cpu_is_omap16xx(cpu) \ + (cpu_is_omap1610(cpu) || cpu_is_omap1710(cpu)) +# define cpu_is_omap24xx(cpu) \ + (cpu_is_omap2410(cpu) || cpu_is_omap2420(cpu) || cpu_is_omap2430(cpu)) + +# define cpu_class_omap1(cpu) \ + (cpu_is_omap15xx(cpu) || cpu_is_omap16xx(cpu)) +# define cpu_class_omap2(cpu) cpu_is_omap24xx(cpu) +# define cpu_class_omap3(cpu) \ + (cpu_is_omap3430(cpu) || cpu_is_omap3630(cpu)) + +struct omap_mpu_state_s { + enum omap_mpu_model { + omap310, + omap1510, + omap1610, + omap1710, + omap2410, + omap2420, + omap2422, + omap2423, + omap2430, + omap3430, + omap3630, + } mpu_model; + + ARMCPU *cpu; + + qemu_irq *drq; + + qemu_irq wakeup; + + MemoryRegion ulpd_pm_iomem; + MemoryRegion pin_cfg_iomem; + MemoryRegion id_iomem; + MemoryRegion id_iomem_e18; + MemoryRegion id_iomem_ed4; + MemoryRegion id_iomem_e20; + MemoryRegion mpui_iomem; + MemoryRegion tcmi_iomem; + MemoryRegion clkm_iomem; + MemoryRegion clkdsp_iomem; + MemoryRegion mpui_io_iomem; + MemoryRegion tap_iomem; + MemoryRegion imif_ram; + MemoryRegion sram; + + struct omap_dma_port_if_s { + uint32_t (*read[3])(struct omap_mpu_state_s *s, + hwaddr offset); + void (*write[3])(struct omap_mpu_state_s *s, + hwaddr offset, uint32_t value); + int (*addr_valid)(struct omap_mpu_state_s *s, + hwaddr addr); + } port[__omap_dma_port_last]; + + uint64_t sdram_size; + unsigned long sram_size; + + /* MPUI-TIPB peripherals */ + struct omap_uart_s *uart[3]; + + DeviceState *gpio; + + struct omap_mcbsp_s *mcbsp1; + struct omap_mcbsp_s *mcbsp3; + + /* MPU public TIPB peripherals */ + struct omap_32khz_timer_s *os_timer; + + struct omap_mmc_s *mmc; + + struct omap_mpuio_s *mpuio; + + struct omap_uwire_s *microwire; + + struct omap_pwl_s *pwl; + struct omap_pwt_s *pwt; + DeviceState *i2c[2]; + + struct omap_rtc_s *rtc; + + struct omap_mcbsp_s *mcbsp2; + + struct omap_lpg_s *led[2]; + + /* MPU private TIPB peripherals */ + DeviceState *ih[2]; + + struct soc_dma_s *dma; + + struct omap_mpu_timer_s *timer[3]; + struct omap_watchdog_timer_s *wdt; + + struct omap_lcd_panel_s *lcd; + + uint32_t ulpd_pm_regs[21]; + int64_t ulpd_gauge_start; + + uint32_t func_mux_ctrl[14]; + uint32_t comp_mode_ctrl[1]; + uint32_t pull_dwn_ctrl[4]; + uint32_t gate_inh_ctrl[1]; + uint32_t voltage_ctrl[1]; + uint32_t test_dbg_ctrl[1]; + uint32_t mod_conf_ctrl[1]; + int compat1509; + + uint32_t mpui_ctrl; + + struct omap_tipb_bridge_s *private_tipb; + struct omap_tipb_bridge_s *public_tipb; + + uint32_t tcmi_regs[17]; + + struct dpll_ctl_s *dpll[3]; + + omap_clk clks; + struct { + int cold_start; + int clocking_scheme; + uint16_t arm_ckctl; + uint16_t arm_idlect1; + uint16_t arm_idlect2; + uint16_t arm_ewupct; + uint16_t arm_rstct1; + uint16_t arm_rstct2; + uint16_t arm_ckout1; + int dpll1_mode; + uint16_t dsp_idlect1; + uint16_t dsp_idlect2; + uint16_t dsp_rstct2; + } clkm; + + /* OMAP2-only peripherals */ + struct omap_l4_s *l4; + + struct omap_gp_timer_s *gptimer[12]; + struct omap_synctimer_s *synctimer; + + struct omap_prcm_s *prcm; + struct omap_sdrc_s *sdrc; + struct omap_gpmc_s *gpmc; + struct omap_sysctl_s *sysc; + + struct omap_mcspi_s *mcspi[2]; + + struct omap_dss_s *dss; + + struct omap_eac_s *eac; +}; + +/* omap1.c */ +struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *sdram, + const char *core); + +/* omap2.c */ +struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sdram, + const char *core); + +uint32_t omap_badwidth_read8(void *opaque, hwaddr addr); +void omap_badwidth_write8(void *opaque, hwaddr addr, + uint32_t value); +uint32_t omap_badwidth_read16(void *opaque, hwaddr addr); +void omap_badwidth_write16(void *opaque, hwaddr addr, + uint32_t value); +uint32_t omap_badwidth_read32(void *opaque, hwaddr addr); +void omap_badwidth_write32(void *opaque, hwaddr addr, + uint32_t value); + +void omap_mpu_wakeup(void *opaque, int irq, int req); + +# define OMAP_BAD_REG(paddr) \ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad register %#08"HWADDR_PRIx"\n", \ + __func__, paddr) +# define OMAP_RO_REG(paddr) \ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Read-only register %#08" \ + HWADDR_PRIx "\n", \ + __func__, paddr) + +/* OMAP-specific Linux bootloader tags for the ATAG_BOARD area + (Board-specifc tags are not here) */ +#define OMAP_TAG_CLOCK 0x4f01 +#define OMAP_TAG_MMC 0x4f02 +#define OMAP_TAG_SERIAL_CONSOLE 0x4f03 +#define OMAP_TAG_USB 0x4f04 +#define OMAP_TAG_LCD 0x4f05 +#define OMAP_TAG_GPIO_SWITCH 0x4f06 +#define OMAP_TAG_UART 0x4f07 +#define OMAP_TAG_FBMEM 0x4f08 +#define OMAP_TAG_STI_CONSOLE 0x4f09 +#define OMAP_TAG_CAMERA_SENSOR 0x4f0a +#define OMAP_TAG_PARTITION 0x4f0b +#define OMAP_TAG_TEA5761 0x4f10 +#define OMAP_TAG_TMP105 0x4f11 +#define OMAP_TAG_BOOT_REASON 0x4f80 +#define OMAP_TAG_FLASH_PART_STR 0x4f81 +#define OMAP_TAG_VERSION_STR 0x4f82 + +enum { + OMAP_GPIOSW_TYPE_COVER = 0 << 4, + OMAP_GPIOSW_TYPE_CONNECTION = 1 << 4, + OMAP_GPIOSW_TYPE_ACTIVITY = 2 << 4, +}; + +#define OMAP_GPIOSW_INVERTED 0x0001 +#define OMAP_GPIOSW_OUTPUT 0x0002 + +# define OMAP_MPUI_REG_MASK 0x000007ff + +#endif diff --git a/include/hw/arm/primecell.h b/include/hw/arm/primecell.h new file mode 100644 index 000000000..7337c3b3c --- /dev/null +++ b/include/hw/arm/primecell.h @@ -0,0 +1,12 @@ +#ifndef PRIMECELL_H +#define PRIMECELL_H + +/* Declarations for ARM PrimeCell based periperals. */ +/* Also includes some devices that are currently only used by the + ARM boards. */ + +/* arm_sysctl GPIO lines */ +#define ARM_SYSCTL_GPIO_MMC_WPROT 0 +#define ARM_SYSCTL_GPIO_MMC_CARDIN 1 + +#endif diff --git a/include/hw/arm/pxa.h b/include/hw/arm/pxa.h new file mode 100644 index 000000000..1095504b8 --- /dev/null +++ b/include/hw/arm/pxa.h @@ -0,0 +1,200 @@ +/* + * Intel XScale PXA255/270 processor support. + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski + * + * This code is licensed under the GNU GPL v2. + */ + +#ifndef PXA_H +#define PXA_H + +#include "exec/memory.h" +#include "target/arm/cpu-qom.h" +#include "hw/pcmcia.h" +#include "qom/object.h" + +/* Interrupt numbers */ +# define PXA2XX_PIC_SSP3 0 +# define PXA2XX_PIC_USBH2 2 +# define PXA2XX_PIC_USBH1 3 +# define PXA2XX_PIC_KEYPAD 4 +# define PXA2XX_PIC_PWRI2C 6 +# define PXA25X_PIC_HWUART 7 +# define PXA27X_PIC_OST_4_11 7 +# define PXA2XX_PIC_GPIO_0 8 +# define PXA2XX_PIC_GPIO_1 9 +# define PXA2XX_PIC_GPIO_X 10 +# define PXA2XX_PIC_I2S 13 +# define PXA26X_PIC_ASSP 15 +# define PXA25X_PIC_NSSP 16 +# define PXA27X_PIC_SSP2 16 +# define PXA2XX_PIC_LCD 17 +# define PXA2XX_PIC_I2C 18 +# define PXA2XX_PIC_ICP 19 +# define PXA2XX_PIC_STUART 20 +# define PXA2XX_PIC_BTUART 21 +# define PXA2XX_PIC_FFUART 22 +# define PXA2XX_PIC_MMC 23 +# define PXA2XX_PIC_SSP 24 +# define PXA2XX_PIC_DMA 25 +# define PXA2XX_PIC_OST_0 26 +# define PXA2XX_PIC_RTC1HZ 30 +# define PXA2XX_PIC_RTCALARM 31 + +/* DMA requests */ +# define PXA2XX_RX_RQ_I2S 2 +# define PXA2XX_TX_RQ_I2S 3 +# define PXA2XX_RX_RQ_BTUART 4 +# define PXA2XX_TX_RQ_BTUART 5 +# define PXA2XX_RX_RQ_FFUART 6 +# define PXA2XX_TX_RQ_FFUART 7 +# define PXA2XX_RX_RQ_SSP1 13 +# define PXA2XX_TX_RQ_SSP1 14 +# define PXA2XX_RX_RQ_SSP2 15 +# define PXA2XX_TX_RQ_SSP2 16 +# define PXA2XX_RX_RQ_ICP 17 +# define PXA2XX_TX_RQ_ICP 18 +# define PXA2XX_RX_RQ_STUART 19 +# define PXA2XX_TX_RQ_STUART 20 +# define PXA2XX_RX_RQ_MMCI 21 +# define PXA2XX_TX_RQ_MMCI 22 +# define PXA2XX_USB_RQ(x) ((x) + 24) +# define PXA2XX_RX_RQ_SSP3 66 +# define PXA2XX_TX_RQ_SSP3 67 + +# define PXA2XX_SDRAM_BASE 0xa0000000 +# define PXA2XX_INTERNAL_BASE 0x5c000000 +# define PXA2XX_INTERNAL_SIZE 0x40000 + +/* pxa2xx_pic.c */ +DeviceState *pxa2xx_pic_init(hwaddr base, ARMCPU *cpu); + +/* pxa2xx_gpio.c */ +DeviceState *pxa2xx_gpio_init(hwaddr base, + ARMCPU *cpu, DeviceState *pic, int lines); +void pxa2xx_gpio_read_notifier(DeviceState *dev, qemu_irq handler); + +/* pxa2xx_dma.c */ +DeviceState *pxa255_dma_init(hwaddr base, qemu_irq irq); +DeviceState *pxa27x_dma_init(hwaddr base, qemu_irq irq); + +/* pxa2xx_lcd.c */ +typedef struct PXA2xxLCDState PXA2xxLCDState; +PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem, + hwaddr base, qemu_irq irq); +void pxa2xx_lcd_vsync_notifier(PXA2xxLCDState *s, qemu_irq handler); + +/* pxa2xx_mmci.c */ +#define TYPE_PXA2XX_MMCI "pxa2xx-mmci" +OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxMMCIState, PXA2XX_MMCI) + +PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem, + hwaddr base, + qemu_irq irq, qemu_irq rx_dma, qemu_irq tx_dma); +void pxa2xx_mmci_handlers(PXA2xxMMCIState *s, qemu_irq readonly, + qemu_irq coverswitch); + +/* pxa2xx_pcmcia.c */ +#define TYPE_PXA2XX_PCMCIA "pxa2xx-pcmcia" +OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxPCMCIAState, PXA2XX_PCMCIA) + +PXA2xxPCMCIAState *pxa2xx_pcmcia_init(MemoryRegion *sysmem, + hwaddr base); +int pxa2xx_pcmcia_attach(void *opaque, PCMCIACardState *card); +int pxa2xx_pcmcia_detach(void *opaque); +void pxa2xx_pcmcia_set_irq_cb(void *opaque, qemu_irq irq, qemu_irq cd_irq); + +/* pxa2xx_keypad.c */ +struct keymap { + int8_t column; + int8_t row; +}; +typedef struct PXA2xxKeyPadState PXA2xxKeyPadState; +PXA2xxKeyPadState *pxa27x_keypad_init(MemoryRegion *sysmem, + hwaddr base, + qemu_irq irq); +void pxa27x_register_keypad(PXA2xxKeyPadState *kp, + const struct keymap *map, int size); + +/* pxa2xx.c */ +typedef struct PXA2xxI2CState PXA2xxI2CState; +PXA2xxI2CState *pxa2xx_i2c_init(hwaddr base, + qemu_irq irq, uint32_t page_size); +I2CBus *pxa2xx_i2c_bus(PXA2xxI2CState *s); + +#define TYPE_PXA2XX_I2C "pxa2xx_i2c" +typedef struct PXA2xxI2SState PXA2xxI2SState; +OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxI2CState, PXA2XX_I2C) + +#define TYPE_PXA2XX_FIR "pxa2xx-fir" +OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxFIrState, PXA2XX_FIR) + +typedef struct { + ARMCPU *cpu; + DeviceState *pic; + qemu_irq reset; + MemoryRegion sdram; + MemoryRegion internal; + MemoryRegion cm_iomem; + MemoryRegion mm_iomem; + MemoryRegion pm_iomem; + DeviceState *dma; + DeviceState *gpio; + PXA2xxLCDState *lcd; + SSIBus **ssp; + PXA2xxI2CState *i2c[2]; + PXA2xxMMCIState *mmc; + PXA2xxPCMCIAState *pcmcia[2]; + PXA2xxI2SState *i2s; + PXA2xxFIrState *fir; + PXA2xxKeyPadState *kp; + + /* Power management */ + hwaddr pm_base; + uint32_t pm_regs[0x40]; + + /* Clock management */ + hwaddr cm_base; + uint32_t cm_regs[4]; + uint32_t clkcfg; + + /* Memory management */ + hwaddr mm_base; + uint32_t mm_regs[0x1a]; + + /* Performance monitoring */ + uint32_t pmnc; +} PXA2xxState; + +struct PXA2xxI2SState { + MemoryRegion iomem; + qemu_irq irq; + qemu_irq rx_dma; + qemu_irq tx_dma; + void (*data_req)(void *, int, int); + + uint32_t control[2]; + uint32_t status; + uint32_t mask; + uint32_t clk; + + int enable; + int rx_len; + int tx_len; + void (*codec_out)(void *, uint32_t); + uint32_t (*codec_in)(void *); + void *opaque; + + int fifo_len; + uint32_t fifo[16]; +}; + +# define PA_FMT "0x%08lx" + +PXA2xxState *pxa270_init(MemoryRegion *address_space, unsigned int sdram_size, + const char *revision); +PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size); + +#endif /* PXA_H */ diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h new file mode 100644 index 000000000..e0e6c8ce9 --- /dev/null +++ b/include/hw/arm/raspi_platform.h @@ -0,0 +1,174 @@ +/* + * bcm2708 aka bcm2835/2836 aka Raspberry Pi/Pi2 SoC platform defines + * + * These definitions are derived from those in Raspbian Linux at + * arch/arm/mach-{bcm2708,bcm2709}/include/mach/platform.h + * where they carry the following notice: + * + * Copyright (C) 2010 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Various undocumented addresses and names come from Herman Hermitage's VC4 + * documentation: + * https://github.com/hermanhermitage/videocoreiv/wiki/MMIO-Register-map + */ + +#ifndef HW_ARM_RASPI_PLATFORM_H +#define HW_ARM_RASPI_PLATFORM_H + +#define MSYNC_OFFSET 0x0000 /* Multicore Sync Block */ +#define CCPT_OFFSET 0x1000 /* Compact Camera Port 2 TX */ +#define INTE_OFFSET 0x2000 /* VC Interrupt controller */ +#define ST_OFFSET 0x3000 /* System Timer */ +#define TXP_OFFSET 0x4000 /* Transposer */ +#define JPEG_OFFSET 0x5000 +#define MPHI_OFFSET 0x6000 /* Message-based Parallel Host Intf. */ +#define DMA_OFFSET 0x7000 /* DMA controller, channels 0-14 */ +#define ARBA_OFFSET 0x9000 +#define BRDG_OFFSET 0xa000 +#define ARM_OFFSET 0xB000 /* ARM control block */ +#define ARMCTRL_OFFSET (ARM_OFFSET + 0x000) +#define ARMCTRL_IC_OFFSET (ARM_OFFSET + 0x200) /* Interrupt controller */ +#define ARMCTRL_TIMER0_1_OFFSET (ARM_OFFSET + 0x400) /* Timer 0 and 1 (SP804) */ +#define ARMCTRL_0_SBM_OFFSET (ARM_OFFSET + 0x800) /* User 0 (ARM) Semaphores + * Doorbells & Mailboxes */ +#define PM_OFFSET 0x100000 /* Power Management */ +#define CPRMAN_OFFSET 0x101000 /* Clock Management */ +#define AVS_OFFSET 0x103000 /* Audio Video Standard */ +#define RNG_OFFSET 0x104000 +#define GPIO_OFFSET 0x200000 +#define UART0_OFFSET 0x201000 /* PL011 */ +#define MMCI0_OFFSET 0x202000 /* Legacy MMC */ +#define I2S_OFFSET 0x203000 /* PCM */ +#define SPI0_OFFSET 0x204000 /* SPI master */ +#define BSC0_OFFSET 0x205000 /* BSC0 I2C/TWI */ +#define PIXV0_OFFSET 0x206000 +#define PIXV1_OFFSET 0x207000 +#define DPI_OFFSET 0x208000 +#define DSI0_OFFSET 0x209000 /* Display Serial Interface */ +#define PWM_OFFSET 0x20c000 +#define PERM_OFFSET 0x20d000 +#define TEC_OFFSET 0x20e000 +#define OTP_OFFSET 0x20f000 +#define SLIM_OFFSET 0x210000 /* SLIMbus */ +#define CPG_OFFSET 0x211000 +#define THERMAL_OFFSET 0x212000 +#define AVSP_OFFSET 0x213000 +#define BSC_SL_OFFSET 0x214000 /* SPI slave (bootrom) */ +#define AUX_OFFSET 0x215000 /* AUX: UART1/SPI1/SPI2 */ +#define EMMC1_OFFSET 0x300000 +#define EMMC2_OFFSET 0x340000 +#define HVS_OFFSET 0x400000 +#define SMI_OFFSET 0x600000 +#define DSI1_OFFSET 0x700000 +#define UCAM_OFFSET 0x800000 +#define CMI_OFFSET 0x802000 +#define BSC1_OFFSET 0x804000 /* BSC1 I2C/TWI */ +#define BSC2_OFFSET 0x805000 /* BSC2 I2C/TWI */ +#define VECA_OFFSET 0x806000 +#define PIXV2_OFFSET 0x807000 +#define HDMI_OFFSET 0x808000 +#define HDCP_OFFSET 0x809000 +#define ARBR0_OFFSET 0x80a000 +#define DBUS_OFFSET 0x900000 +#define AVE0_OFFSET 0x910000 +#define USB_OTG_OFFSET 0x980000 /* DTC_OTG USB controller */ +#define V3D_OFFSET 0xc00000 +#define SDRAMC_OFFSET 0xe00000 +#define L2CC_OFFSET 0xe01000 /* Level 2 Cache controller */ +#define L1CC_OFFSET 0xe02000 /* Level 1 Cache controller */ +#define ARBR1_OFFSET 0xe04000 +#define DMA15_OFFSET 0xE05000 /* DMA controller, channel 15 */ +#define DCRC_OFFSET 0xe07000 +#define AXIP_OFFSET 0xe08000 + +/* GPU interrupts */ +#define INTERRUPT_TIMER0 0 +#define INTERRUPT_TIMER1 1 +#define INTERRUPT_TIMER2 2 +#define INTERRUPT_TIMER3 3 +#define INTERRUPT_CODEC0 4 +#define INTERRUPT_CODEC1 5 +#define INTERRUPT_CODEC2 6 +#define INTERRUPT_JPEG 7 +#define INTERRUPT_ISP 8 +#define INTERRUPT_USB 9 +#define INTERRUPT_3D 10 +#define INTERRUPT_TRANSPOSER 11 +#define INTERRUPT_MULTICORESYNC0 12 +#define INTERRUPT_MULTICORESYNC1 13 +#define INTERRUPT_MULTICORESYNC2 14 +#define INTERRUPT_MULTICORESYNC3 15 +#define INTERRUPT_DMA0 16 +#define INTERRUPT_DMA1 17 +#define INTERRUPT_DMA2 18 +#define INTERRUPT_DMA3 19 +#define INTERRUPT_DMA4 20 +#define INTERRUPT_DMA5 21 +#define INTERRUPT_DMA6 22 +#define INTERRUPT_DMA7 23 +#define INTERRUPT_DMA8 24 +#define INTERRUPT_DMA9 25 +#define INTERRUPT_DMA10 26 +#define INTERRUPT_DMA11 27 +#define INTERRUPT_DMA12 28 +#define INTERRUPT_AUX 29 +#define INTERRUPT_ARM 30 +#define INTERRUPT_VPUDMA 31 +#define INTERRUPT_HOSTPORT 32 +#define INTERRUPT_VIDEOSCALER 33 +#define INTERRUPT_CCP2TX 34 +#define INTERRUPT_SDC 35 +#define INTERRUPT_DSI0 36 +#define INTERRUPT_AVE 37 +#define INTERRUPT_CAM0 38 +#define INTERRUPT_CAM1 39 +#define INTERRUPT_HDMI0 40 +#define INTERRUPT_HDMI1 41 +#define INTERRUPT_PIXELVALVE1 42 +#define INTERRUPT_I2CSPISLV 43 +#define INTERRUPT_DSI1 44 +#define INTERRUPT_PWA0 45 +#define INTERRUPT_PWA1 46 +#define INTERRUPT_CPR 47 +#define INTERRUPT_SMI 48 +#define INTERRUPT_GPIO0 49 +#define INTERRUPT_GPIO1 50 +#define INTERRUPT_GPIO2 51 +#define INTERRUPT_GPIO3 52 +#define INTERRUPT_I2C 53 +#define INTERRUPT_SPI 54 +#define INTERRUPT_I2SPCM 55 +#define INTERRUPT_SDIO 56 +#define INTERRUPT_UART0 57 +#define INTERRUPT_SLIMBUS 58 +#define INTERRUPT_VEC 59 +#define INTERRUPT_CPG 60 +#define INTERRUPT_RNG 61 +#define INTERRUPT_ARASANSDIO 62 +#define INTERRUPT_AVSPMON 63 + +/* ARM CPU IRQs use a private number space */ +#define INTERRUPT_ARM_TIMER 0 +#define INTERRUPT_ARM_MAILBOX 1 +#define INTERRUPT_ARM_DOORBELL_0 2 +#define INTERRUPT_ARM_DOORBELL_1 3 +#define INTERRUPT_VPU0_HALTED 4 +#define INTERRUPT_VPU1_HALTED 5 +#define INTERRUPT_ILLEGAL_TYPE0 6 +#define INTERRUPT_ILLEGAL_TYPE1 7 + +#endif diff --git a/include/hw/arm/sharpsl.h b/include/hw/arm/sharpsl.h new file mode 100644 index 000000000..e986b28c5 --- /dev/null +++ b/include/hw/arm/sharpsl.h @@ -0,0 +1,17 @@ +/* + * Common declarations for the Zaurii. + * + * This file is licensed under the GNU GPL. + */ + +#ifndef QEMU_SHARPSL_H +#define QEMU_SHARPSL_H + +#include "exec/hwaddr.h" + +/* zaurus.c */ + +#define SL_PXA_PARAM_BASE 0xa0000a00 +void sl_bootparam_write(hwaddr ptr); + +#endif diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h new file mode 100644 index 000000000..706be3c6d --- /dev/null +++ b/include/hw/arm/smmu-common.h @@ -0,0 +1,178 @@ +/* + * ARM SMMU Support + * + * Copyright (C) 2015-2016 Broadcom Corporation + * Copyright (c) 2017 Red Hat, Inc. + * Written by Prem Mallappa, Eric Auger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef HW_ARM_SMMU_COMMON_H +#define HW_ARM_SMMU_COMMON_H + +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "qom/object.h" + +#define SMMU_PCI_BUS_MAX 256 +#define SMMU_PCI_DEVFN_MAX 256 +#define SMMU_PCI_DEVFN(sid) (sid & 0xFF) + +#define SMMU_MAX_VA_BITS 48 + +/* + * Page table walk error types + */ +typedef enum { + SMMU_PTW_ERR_NONE, + SMMU_PTW_ERR_WALK_EABT, /* Translation walk external abort */ + SMMU_PTW_ERR_TRANSLATION, /* Translation fault */ + SMMU_PTW_ERR_ADDR_SIZE, /* Address Size fault */ + SMMU_PTW_ERR_ACCESS, /* Access fault */ + SMMU_PTW_ERR_PERMISSION, /* Permission fault */ +} SMMUPTWEventType; + +typedef struct SMMUPTWEventInfo { + SMMUPTWEventType type; + dma_addr_t addr; /* fetched address that induced an abort, if any */ +} SMMUPTWEventInfo; + +typedef struct SMMUTransTableInfo { + bool disabled; /* is the translation table disabled? */ + uint64_t ttb; /* TT base address */ + uint8_t tsz; /* input range, ie. 2^(64 -tsz)*/ + uint8_t granule_sz; /* granule page shift */ + bool had; /* hierarchical attribute disable */ +} SMMUTransTableInfo; + +typedef struct SMMUTLBEntry { + IOMMUTLBEntry entry; + uint8_t level; + uint8_t granule; +} SMMUTLBEntry; + +/* + * Generic structure populated by derived SMMU devices + * after decoding the configuration information and used as + * input to the page table walk + */ +typedef struct SMMUTransCfg { + int stage; /* translation stage */ + bool aa64; /* arch64 or aarch32 translation table */ + bool disabled; /* smmu is disabled */ + bool bypassed; /* translation is bypassed */ + bool aborted; /* translation is aborted */ + uint64_t ttb; /* TT base address */ + uint8_t oas; /* output address width */ + uint8_t tbi; /* Top Byte Ignore */ + uint16_t asid; + SMMUTransTableInfo tt[2]; + uint32_t iotlb_hits; /* counts IOTLB hits for this asid */ + uint32_t iotlb_misses; /* counts IOTLB misses for this asid */ +} SMMUTransCfg; + +typedef struct SMMUDevice { + void *smmu; + PCIBus *bus; + int devfn; + IOMMUMemoryRegion iommu; + AddressSpace as; + uint32_t cfg_cache_hits; + uint32_t cfg_cache_misses; + QLIST_ENTRY(SMMUDevice) next; +} SMMUDevice; + +typedef struct SMMUPciBus { + PCIBus *bus; + SMMUDevice *pbdev[]; /* Parent array is sparse, so dynamically alloc */ +} SMMUPciBus; + +typedef struct SMMUIOTLBKey { + uint64_t iova; + uint16_t asid; + uint8_t tg; + uint8_t level; +} SMMUIOTLBKey; + +struct SMMUState { + /* */ + SysBusDevice dev; + const char *mrtypename; + MemoryRegion iomem; + + GHashTable *smmu_pcibus_by_busptr; + GHashTable *configs; /* cache for configuration data */ + GHashTable *iotlb; + SMMUPciBus *smmu_pcibus_by_bus_num[SMMU_PCI_BUS_MAX]; + PCIBus *pci_bus; + QLIST_HEAD(, SMMUDevice) devices_with_notifiers; + uint8_t bus_num; + PCIBus *primary_bus; +}; + +struct SMMUBaseClass { + /* */ + SysBusDeviceClass parent_class; + + /*< public >*/ + + DeviceRealize parent_realize; + +}; + +#define TYPE_ARM_SMMU "arm-smmu" +OBJECT_DECLARE_TYPE(SMMUState, SMMUBaseClass, ARM_SMMU) + +/* Return the SMMUPciBus handle associated to a PCI bus number */ +SMMUPciBus *smmu_find_smmu_pcibus(SMMUState *s, uint8_t bus_num); + +/* Return the stream ID of an SMMU device */ +static inline uint16_t smmu_get_sid(SMMUDevice *sdev) +{ + return PCI_BUILD_BDF(pci_bus_num(sdev->bus), sdev->devfn); +} + +/** + * smmu_ptw - Perform the page table walk for a given iova / access flags + * pair, according to @cfg translation config + */ +int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); + +/** + * select_tt - compute which translation table shall be used according to + * the input iova and translation config and return the TT specific info + */ +SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova); + +/* Return the iommu mr associated to @sid, or NULL if none */ +IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); + +#define SMMU_IOTLB_MAX_SIZE 256 + +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + SMMUTransTableInfo *tt, hwaddr iova); +void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); +SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, + uint8_t tg, uint8_t level); +void smmu_iotlb_inv_all(SMMUState *s); +void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); +void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages, uint8_t ttl); + +/* Unmap the range of all the notifiers registered to any IOMMU mr */ +void smmu_inv_notifiers_all(SMMUState *s); + +/* Unmap the range of all the notifiers registered to @mr */ +void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr); + +#endif /* HW_ARM_SMMU_COMMON_H */ diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h new file mode 100644 index 000000000..c641e6073 --- /dev/null +++ b/include/hw/arm/smmuv3.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014-2016 Broadcom Corporation + * Copyright (c) 2017 Red Hat, Inc. + * Written by Prem Mallappa, Eric Auger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_ARM_SMMUV3_H +#define HW_ARM_SMMUV3_H + +#include "hw/arm/smmu-common.h" +#include "hw/registerfields.h" +#include "qom/object.h" + +#define TYPE_SMMUV3_IOMMU_MEMORY_REGION "smmuv3-iommu-memory-region" + +typedef struct SMMUQueue { + uint64_t base; /* base register */ + uint32_t prod; + uint32_t cons; + uint8_t entry_size; + uint8_t log2size; +} SMMUQueue; + +struct SMMUv3State { + SMMUState smmu_state; + + uint32_t features; + uint8_t sid_size; + uint8_t sid_split; + + uint32_t idr[6]; + uint32_t iidr; + uint32_t aidr; + uint32_t cr[3]; + uint32_t cr0ack; + uint32_t statusr; + uint32_t irq_ctrl; + uint32_t gerror; + uint32_t gerrorn; + uint64_t gerror_irq_cfg0; + uint32_t gerror_irq_cfg1; + uint32_t gerror_irq_cfg2; + uint64_t strtab_base; + uint32_t strtab_base_cfg; + uint64_t eventq_irq_cfg0; + uint32_t eventq_irq_cfg1; + uint32_t eventq_irq_cfg2; + + SMMUQueue eventq, cmdq; + + qemu_irq irq[4]; + QemuMutex mutex; +}; + +typedef enum { + SMMU_IRQ_EVTQ, + SMMU_IRQ_PRIQ, + SMMU_IRQ_CMD_SYNC, + SMMU_IRQ_GERROR, +} SMMUIrq; + +struct SMMUv3Class { + /*< private >*/ + SMMUBaseClass smmu_base_class; + /*< public >*/ + + DeviceRealize parent_realize; + DeviceReset parent_reset; +}; + +#define TYPE_ARM_SMMUV3 "arm-smmuv3" +OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3) + +#endif diff --git a/include/hw/arm/soc_dma.h b/include/hw/arm/soc_dma.h new file mode 100644 index 000000000..e93a7499a --- /dev/null +++ b/include/hw/arm/soc_dma.h @@ -0,0 +1,114 @@ +/* + * On-chip DMA controller framework. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_SOC_DMA_H +#define HW_SOC_DMA_H + +#include "exec/hwaddr.h" + +struct soc_dma_s; +struct soc_dma_ch_s; +typedef void (*soc_dma_io_t)(void *opaque, uint8_t *buf, int len); +typedef void (*soc_dma_transfer_t)(struct soc_dma_ch_s *ch); + +enum soc_dma_port_type { + soc_dma_port_mem, + soc_dma_port_fifo, + soc_dma_port_other, +}; + +enum soc_dma_access_type { + soc_dma_access_const, + soc_dma_access_linear, + soc_dma_access_other, +}; + +struct soc_dma_ch_s { + /* Private */ + struct soc_dma_s *dma; + int num; + QEMUTimer *timer; + + /* Set by soc_dma.c */ + int enable; + int update; + + /* This should be set by dma->setup_fn(). */ + int bytes; + /* Initialised by the DMA module, call soc_dma_ch_update after writing. */ + enum soc_dma_access_type type[2]; + hwaddr vaddr[2]; /* Updated by .transfer_fn(). */ + /* Private */ + void *paddr[2]; + soc_dma_io_t io_fn[2]; + void *io_opaque[2]; + + int running; + soc_dma_transfer_t transfer_fn; + + /* Set and used by the DMA module. */ + void *opaque; +}; + +struct soc_dma_s { + /* Following fields are set by the SoC DMA module and can be used + * by anybody. */ + uint64_t drqbmp; /* Is zeroed by soc_dma_reset() */ + qemu_irq *drq; + void *opaque; + int64_t freq; + soc_dma_transfer_t transfer_fn; + soc_dma_transfer_t setup_fn; + /* Set by soc_dma_init() for use by the DMA module. */ + struct soc_dma_ch_s *ch; +}; + +/* Call to activate or stop a DMA channel. */ +void soc_dma_set_request(struct soc_dma_ch_s *ch, int level); +/* Call after every write to one of the following fields and before + * calling soc_dma_set_request(ch, 1): + * ch->type[0...1], + * ch->vaddr[0...1], + * ch->paddr[0...1], + * or after a soc_dma_port_add_fifo() or soc_dma_port_add_mem(). */ +void soc_dma_ch_update(struct soc_dma_ch_s *ch); + +/* The SoC should call this when the DMA module is being reset. */ +void soc_dma_reset(struct soc_dma_s *s); +struct soc_dma_s *soc_dma_init(int n); + +void soc_dma_port_add_fifo(struct soc_dma_s *dma, hwaddr virt_base, + soc_dma_io_t fn, void *opaque, int out); +void soc_dma_port_add_mem(struct soc_dma_s *dma, uint8_t *phys_base, + hwaddr virt_base, size_t size); + +static inline void soc_dma_port_add_fifo_in(struct soc_dma_s *dma, + hwaddr virt_base, soc_dma_io_t fn, void *opaque) +{ + return soc_dma_port_add_fifo(dma, virt_base, fn, opaque, 0); +} + +static inline void soc_dma_port_add_fifo_out(struct soc_dma_s *dma, + hwaddr virt_base, soc_dma_io_t fn, void *opaque) +{ + return soc_dma_port_add_fifo(dma, virt_base, fn, opaque, 1); +} + +#endif diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h new file mode 100644 index 000000000..985ff63aa --- /dev/null +++ b/include/hw/arm/stm32f205_soc.h @@ -0,0 +1,68 @@ +/* + * STM32F205 SoC + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_ARM_STM32F205_SOC_H +#define HW_ARM_STM32F205_SOC_H + +#include "hw/misc/stm32f2xx_syscfg.h" +#include "hw/timer/stm32f2xx_timer.h" +#include "hw/char/stm32f2xx_usart.h" +#include "hw/adc/stm32f2xx_adc.h" +#include "hw/or-irq.h" +#include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" +#include "qom/object.h" + +#define TYPE_STM32F205_SOC "stm32f205-soc" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F205State, STM32F205_SOC) + +#define STM_NUM_USARTS 6 +#define STM_NUM_TIMERS 4 +#define STM_NUM_ADCS 3 +#define STM_NUM_SPIS 3 + +#define FLASH_BASE_ADDRESS 0x08000000 +#define FLASH_SIZE (1024 * 1024) +#define SRAM_BASE_ADDRESS 0x20000000 +#define SRAM_SIZE (128 * 1024) + +struct STM32F205State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + char *cpu_type; + + ARMv7MState armv7m; + + STM32F2XXSyscfgState syscfg; + STM32F2XXUsartState usart[STM_NUM_USARTS]; + STM32F2XXTimerState timer[STM_NUM_TIMERS]; + STM32F2XXADCState adc[STM_NUM_ADCS]; + STM32F2XXSPIState spi[STM_NUM_SPIS]; + + qemu_or_irq *adc_irqs; +}; + +#endif diff --git a/include/hw/arm/stm32f405_soc.h b/include/hw/arm/stm32f405_soc.h new file mode 100644 index 000000000..347105e70 --- /dev/null +++ b/include/hw/arm/stm32f405_soc.h @@ -0,0 +1,73 @@ +/* + * STM32F405 SoC + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_ARM_STM32F405_SOC_H +#define HW_ARM_STM32F405_SOC_H + +#include "hw/misc/stm32f4xx_syscfg.h" +#include "hw/timer/stm32f2xx_timer.h" +#include "hw/char/stm32f2xx_usart.h" +#include "hw/adc/stm32f2xx_adc.h" +#include "hw/misc/stm32f4xx_exti.h" +#include "hw/or-irq.h" +#include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" +#include "qom/object.h" + +#define TYPE_STM32F405_SOC "stm32f405-soc" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F405State, STM32F405_SOC) + +#define STM_NUM_USARTS 7 +#define STM_NUM_TIMERS 4 +#define STM_NUM_ADCS 6 +#define STM_NUM_SPIS 6 + +#define FLASH_BASE_ADDRESS 0x08000000 +#define FLASH_SIZE (1024 * 1024) +#define SRAM_BASE_ADDRESS 0x20000000 +#define SRAM_SIZE (192 * 1024) + +struct STM32F405State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + char *cpu_type; + + ARMv7MState armv7m; + + STM32F4xxSyscfgState syscfg; + STM32F4xxExtiState exti; + STM32F2XXUsartState usart[STM_NUM_USARTS]; + STM32F2XXTimerState timer[STM_NUM_TIMERS]; + qemu_or_irq adc_irqs; + STM32F2XXADCState adc[STM_NUM_ADCS]; + STM32F2XXSPIState spi[STM_NUM_SPIS]; + + MemoryRegion sram; + MemoryRegion flash; + MemoryRegion flash_alias; +}; + +#endif diff --git a/include/hw/arm/sysbus-fdt.h b/include/hw/arm/sysbus-fdt.h new file mode 100644 index 000000000..340c382cd --- /dev/null +++ b/include/hw/arm/sysbus-fdt.h @@ -0,0 +1,37 @@ +/* + * Dynamic sysbus device tree node generation API + * + * Copyright Linaro Limited, 2014 + * + * Authors: + * Alex Graf + * Eric Auger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + */ + +#ifndef HW_ARM_SYSBUS_FDT_H +#define HW_ARM_SYSBUS_FDT_H + +#include "exec/hwaddr.h" + +/** + * platform_bus_add_all_fdt_nodes - create all the platform bus nodes + * + * builds the parent platform bus node and all the nodes of dynamic + * sysbus devices attached to it. + */ +void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr, + hwaddr bus_size, int irq_start); +#endif diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h new file mode 100644 index 000000000..aad6d6984 --- /dev/null +++ b/include/hw/arm/virt.h @@ -0,0 +1,187 @@ +/* + * + * Copyright (c) 2015 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * Emulate a virtual board which works by passing Linux all the information + * it needs about what devices are present via the device tree. + * There are some restrictions about what we can do here: + * + we can only present devices whose Linux drivers will work based + * purely on the device tree with no platform data at all + * + we want to present a very stripped-down minimalist platform, + * both because this reduces the security attack surface from the guest + * and also because it reduces our exposure to being broken when + * the kernel updates its device tree bindings and requires further + * information in a device binding that we aren't providing. + * This is essentially the same approach kvmtool uses. + */ + +#ifndef QEMU_ARM_VIRT_H +#define QEMU_ARM_VIRT_H + +#include "exec/hwaddr.h" +#include "qemu/notify.h" +#include "hw/boards.h" +#include "hw/arm/boot.h" +#include "hw/block/flash.h" +#include "sysemu/kvm.h" +#include "hw/intc/arm_gicv3_common.h" +#include "qom/object.h" + +#define NUM_GICV2M_SPIS 64 +#define NUM_VIRTIO_TRANSPORTS 32 +#define NUM_SMMU_IRQS 4 + +#define ARCH_GIC_MAINT_IRQ 9 + +#define ARCH_TIMER_VIRT_IRQ 11 +#define ARCH_TIMER_S_EL1_IRQ 13 +#define ARCH_TIMER_NS_EL1_IRQ 14 +#define ARCH_TIMER_NS_EL2_IRQ 10 + +#define VIRTUAL_PMU_IRQ 7 + +#define PPI(irq) ((irq) + 16) + +/* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ +#define PVTIME_SIZE_PER_CPU 64 + +enum { + VIRT_FLASH, + VIRT_MEM, + VIRT_CPUPERIPHS, + VIRT_GIC_DIST, + VIRT_GIC_CPU, + VIRT_GIC_V2M, + VIRT_GIC_HYP, + VIRT_GIC_VCPU, + VIRT_GIC_ITS, + VIRT_GIC_REDIST, + VIRT_SMMU, + VIRT_UART, + VIRT_MMIO, + VIRT_RTC, + VIRT_FW_CFG, + VIRT_PCIE, + VIRT_PCIE_MMIO, + VIRT_PCIE_PIO, + VIRT_PCIE_ECAM, + VIRT_PLATFORM_BUS, + VIRT_GPIO, + VIRT_SECURE_UART, + VIRT_SECURE_MEM, + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, + VIRT_NVDIMM_ACPI, + VIRT_PVTIME, + VIRT_LOWMEMMAP_LAST, +}; + +/* indices of IO regions located after the RAM */ +enum { + VIRT_HIGH_GIC_REDIST2 = VIRT_LOWMEMMAP_LAST, + VIRT_HIGH_PCIE_ECAM, + VIRT_HIGH_PCIE_MMIO, +}; + +typedef enum VirtIOMMUType { + VIRT_IOMMU_NONE, + VIRT_IOMMU_SMMUV3, + VIRT_IOMMU_VIRTIO, +} VirtIOMMUType; + +typedef enum VirtMSIControllerType { + VIRT_MSI_CTRL_NONE, + VIRT_MSI_CTRL_GICV2M, + VIRT_MSI_CTRL_ITS, +} VirtMSIControllerType; + +typedef enum VirtGICType { + VIRT_GIC_VERSION_MAX, + VIRT_GIC_VERSION_HOST, + VIRT_GIC_VERSION_2, + VIRT_GIC_VERSION_3, + VIRT_GIC_VERSION_NOSEL, +} VirtGICType; + +struct VirtMachineClass { + MachineClass parent; + bool disallow_affinity_adjustment; + bool no_its; + bool no_pmu; + bool claim_edge_triggered_timers; + bool smbios_old_sys_ver; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ + bool kvm_no_adjvtime; + bool no_kvm_steal_time; + bool acpi_expose_flash; +}; + +struct VirtMachineState { + MachineState parent; + Notifier machine_done; + DeviceState *platform_bus_dev; + FWCfgState *fw_cfg; + PFlashCFI01 *flash[2]; + bool secure; + bool highmem; + bool highmem_ecam; + bool its; + bool virt; + bool ras; + bool mte; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; + VirtMSIControllerType msi_controller; + uint16_t virtio_iommu_bdf; + struct arm_boot_info bootinfo; + MemMapEntry *memmap; + char *pciehb_nodename; + const int *irqmap; + int smp_cpus; + void *fdt; + int fdt_size; + uint32_t clock_phandle; + uint32_t gic_phandle; + uint32_t msi_phandle; + uint32_t iommu_phandle; + int psci_conduit; + hwaddr highest_gpa; + DeviceState *gic; + DeviceState *acpi_dev; + Notifier powerdown_notifier; +}; + +#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + +#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + +void virt_acpi_setup(VirtMachineState *vms); +bool virt_is_acpi_enabled(VirtMachineState *vms); + +/* Return the number of used redistributor regions */ +static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) +{ + uint32_t redist0_capacity = + vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; + + assert(vms->gic_version == VIRT_GIC_VERSION_3); + + return vms->smp_cpus > redist0_capacity ? 2 : 1; +} + +#endif /* QEMU_ARM_VIRT_H */ diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h new file mode 100644 index 000000000..8ce8e63b5 --- /dev/null +++ b/include/hw/arm/xlnx-versal.h @@ -0,0 +1,155 @@ +/* + * Model of the Xilinx Versal + * + * Copyright (c) 2018 Xilinx Inc. + * Written by Edgar E. Iglesias + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#ifndef XLNX_VERSAL_H +#define XLNX_VERSAL_H + +#include "hw/sysbus.h" +#include "hw/arm/boot.h" +#include "hw/sd/sdhci.h" +#include "hw/intc/arm_gicv3.h" +#include "hw/char/pl011.h" +#include "hw/dma/xlnx-zdma.h" +#include "hw/net/cadence_gem.h" +#include "hw/rtc/xlnx-zynqmp-rtc.h" +#include "qom/object.h" + +#define TYPE_XLNX_VERSAL "xlnx-versal" +OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) + +#define XLNX_VERSAL_NR_ACPUS 2 +#define XLNX_VERSAL_NR_UARTS 2 +#define XLNX_VERSAL_NR_GEMS 2 +#define XLNX_VERSAL_NR_ADMAS 8 +#define XLNX_VERSAL_NR_SDS 2 +#define XLNX_VERSAL_NR_IRQS 192 + +struct Versal { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + struct { + struct { + MemoryRegion mr; + ARMCPU cpu[XLNX_VERSAL_NR_ACPUS]; + GICv3State gic; + } apu; + } fpd; + + MemoryRegion mr_ps; + + struct { + /* 4 ranges to access DDR. */ + MemoryRegion mr_ddr_ranges[4]; + } noc; + + struct { + MemoryRegion mr_ocm; + + struct { + PL011State uart[XLNX_VERSAL_NR_UARTS]; + CadenceGEMState gem[XLNX_VERSAL_NR_GEMS]; + XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS]; + } iou; + } lpd; + + /* The Platform Management Controller subsystem. */ + struct { + struct { + SDHCIState sd[XLNX_VERSAL_NR_SDS]; + } iou; + + XlnxZynqMPRTC rtc; + } pmc; + + struct { + MemoryRegion *mr_ddr; + uint32_t psci_conduit; + } cfg; +}; + +/* Memory-map and IRQ definitions. Copied a subset from + * auto-generated files. */ + +#define VERSAL_GIC_MAINT_IRQ 9 +#define VERSAL_TIMER_VIRT_IRQ 11 +#define VERSAL_TIMER_S_EL1_IRQ 13 +#define VERSAL_TIMER_NS_EL1_IRQ 14 +#define VERSAL_TIMER_NS_EL2_IRQ 10 + +#define VERSAL_UART0_IRQ_0 18 +#define VERSAL_UART1_IRQ_0 19 +#define VERSAL_GEM0_IRQ_0 56 +#define VERSAL_GEM0_WAKE_IRQ_0 57 +#define VERSAL_GEM1_IRQ_0 58 +#define VERSAL_GEM1_WAKE_IRQ_0 59 +#define VERSAL_ADMA_IRQ_0 60 +#define VERSAL_RTC_APB_ERR_IRQ 121 +#define VERSAL_SD0_IRQ_0 126 +#define VERSAL_RTC_ALARM_IRQ 142 +#define VERSAL_RTC_SECONDS_IRQ 143 + +/* Architecturally reserved IRQs suitable for virtualization. */ +#define VERSAL_RSVD_IRQ_FIRST 111 +#define VERSAL_RSVD_IRQ_LAST 118 + +#define MM_TOP_RSVD 0xa0000000U +#define MM_TOP_RSVD_SIZE 0x4000000 +#define MM_GIC_APU_DIST_MAIN 0xf9000000U +#define MM_GIC_APU_DIST_MAIN_SIZE 0x10000 +#define MM_GIC_APU_REDIST_0 0xf9080000U +#define MM_GIC_APU_REDIST_0_SIZE 0x80000 + +#define MM_UART0 0xff000000U +#define MM_UART0_SIZE 0x10000 +#define MM_UART1 0xff010000U +#define MM_UART1_SIZE 0x10000 + +#define MM_GEM0 0xff0c0000U +#define MM_GEM0_SIZE 0x10000 +#define MM_GEM1 0xff0d0000U +#define MM_GEM1_SIZE 0x10000 + +#define MM_ADMA_CH0 0xffa80000U +#define MM_ADMA_CH0_SIZE 0x10000 + +#define MM_OCM 0xfffc0000U +#define MM_OCM_SIZE 0x40000 + +#define MM_TOP_DDR 0x0 +#define MM_TOP_DDR_SIZE 0x80000000U +#define MM_TOP_DDR_2 0x800000000ULL +#define MM_TOP_DDR_2_SIZE 0x800000000ULL +#define MM_TOP_DDR_3 0xc000000000ULL +#define MM_TOP_DDR_3_SIZE 0x4000000000ULL +#define MM_TOP_DDR_4 0x10000000000ULL +#define MM_TOP_DDR_4_SIZE 0xb780000000ULL + +#define MM_PSM_START 0xffc80000U +#define MM_PSM_END 0xffcf0000U + +#define MM_CRL 0xff5e0000U +#define MM_CRL_SIZE 0x300000 +#define MM_IOU_SCNTR 0xff130000U +#define MM_IOU_SCNTR_SIZE 0x10000 +#define MM_IOU_SCNTRS 0xff140000U +#define MM_IOU_SCNTRS_SIZE 0x10000 +#define MM_FPD_CRF 0xfd1a0000U +#define MM_FPD_CRF_SIZE 0x140000 + +#define MM_PMC_SD0 0xf1040000U +#define MM_PMC_SD0_SIZE 0x10000 +#define MM_PMC_CRP 0xf1260000U +#define MM_PMC_CRP_SIZE 0x10000 +#define MM_PMC_RTC 0xf12a0000 +#define MM_PMC_RTC_SIZE 0x10000 +#endif diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h new file mode 100644 index 000000000..567d0dba0 --- /dev/null +++ b/include/hw/arm/xlnx-zynqmp.h @@ -0,0 +1,117 @@ +/* + * Xilinx Zynq MPSoC emulation + * + * Copyright (C) 2015 Xilinx Inc + * Written by Peter Crosthwaite + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef XLNX_ZYNQMP_H +#define XLNX_ZYNQMP_H + +#include "hw/arm/boot.h" +#include "hw/intc/arm_gic.h" +#include "hw/net/cadence_gem.h" +#include "hw/char/cadence_uart.h" +#include "hw/ide/ahci.h" +#include "hw/sd/sdhci.h" +#include "hw/ssi/xilinx_spips.h" +#include "hw/dma/xlnx_dpdma.h" +#include "hw/dma/xlnx-zdma.h" +#include "hw/display/xlnx_dp.h" +#include "hw/intc/xlnx-zynqmp-ipi.h" +#include "hw/rtc/xlnx-zynqmp-rtc.h" +#include "hw/cpu/cluster.h" +#include "target/arm/cpu.h" +#include "qom/object.h" + +#define TYPE_XLNX_ZYNQMP "xlnx,zynqmp" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP) + +#define XLNX_ZYNQMP_NUM_APU_CPUS 4 +#define XLNX_ZYNQMP_NUM_RPU_CPUS 2 +#define XLNX_ZYNQMP_NUM_GEMS 4 +#define XLNX_ZYNQMP_NUM_UARTS 2 +#define XLNX_ZYNQMP_NUM_SDHCI 2 +#define XLNX_ZYNQMP_NUM_SPIS 2 +#define XLNX_ZYNQMP_NUM_GDMA_CH 8 +#define XLNX_ZYNQMP_NUM_ADMA_CH 8 + +#define XLNX_ZYNQMP_NUM_QSPI_BUS 2 +#define XLNX_ZYNQMP_NUM_QSPI_BUS_CS 2 +#define XLNX_ZYNQMP_NUM_QSPI_FLASH 4 + +#define XLNX_ZYNQMP_NUM_OCM_BANKS 4 +#define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC0000 +#define XLNX_ZYNQMP_OCM_RAM_SIZE 0x10000 + +#define XLNX_ZYNQMP_GIC_REGIONS 6 + +/* ZynqMP maps the ARM GIC regions (GICC, GICD ...) at consecutive 64k offsets + * and under-decodes the 64k region. This mirrors the 4k regions to every 4k + * aligned address in the 64k region. To implement each GIC region needs a + * number of memory region aliases. + */ + +#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x1000 +#define XLNX_ZYNQMP_GIC_ALIASES (0x10000 / XLNX_ZYNQMP_GIC_REGION_SIZE) + +#define XLNX_ZYNQMP_MAX_LOW_RAM_SIZE 0x80000000ull + +#define XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE 0x800000000ull +#define XLNX_ZYNQMP_HIGH_RAM_START 0x800000000ull + +#define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \ + XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE) + +struct XlnxZynqMPState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CPUClusterState apu_cluster; + CPUClusterState rpu_cluster; + ARMCPU apu_cpu[XLNX_ZYNQMP_NUM_APU_CPUS]; + ARMCPU rpu_cpu[XLNX_ZYNQMP_NUM_RPU_CPUS]; + GICState gic; + MemoryRegion gic_mr[XLNX_ZYNQMP_GIC_REGIONS][XLNX_ZYNQMP_GIC_ALIASES]; + + MemoryRegion ocm_ram[XLNX_ZYNQMP_NUM_OCM_BANKS]; + + MemoryRegion *ddr_ram; + MemoryRegion ddr_ram_low, ddr_ram_high; + + CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS]; + CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS]; + SysbusAHCIState sata; + SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI]; + XilinxSPIPS spi[XLNX_ZYNQMP_NUM_SPIS]; + XlnxZynqMPQSPIPS qspi; + XlnxDPState dp; + XlnxDPDMAState dpdma; + XlnxZynqMPIPI ipi; + XlnxZynqMPRTC rtc; + XlnxZDMA gdma[XLNX_ZYNQMP_NUM_GDMA_CH]; + XlnxZDMA adma[XLNX_ZYNQMP_NUM_ADMA_CH]; + + char *boot_cpu; + ARMCPU *boot_cpu_ptr; + + /* Has the ARM Security extensions? */ + bool secure; + /* Has the ARM Virtualization extensions? */ + bool virt; + /* Has the RPU subsystem? */ + bool has_rpu; +}; + +#endif diff --git a/include/hw/audio/pcspk.h b/include/hw/audio/pcspk.h new file mode 100644 index 000000000..950617958 --- /dev/null +++ b/include/hw/audio/pcspk.h @@ -0,0 +1,40 @@ +/* + * QEMU PC speaker emulation + * + * Copyright (c) 2006 Joachim Henke + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_PCSPK_H +#define HW_PCSPK_H + +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" + +#define TYPE_PC_SPEAKER "isa-pcspk" + +static inline void pcspk_init(ISADevice *isadev, ISABus *bus, ISADevice *pit) +{ + object_property_set_link(OBJECT(isadev), "pit", OBJECT(pit), NULL); + isa_realize_and_unref(isadev, bus, &error_fatal); +} + +#endif /* HW_PCSPK_H */ diff --git a/include/hw/audio/soundhw.h b/include/hw/audio/soundhw.h new file mode 100644 index 000000000..f09a29785 --- /dev/null +++ b/include/hw/audio/soundhw.h @@ -0,0 +1,15 @@ +#ifndef HW_SOUNDHW_H +#define HW_SOUNDHW_H + +void isa_register_soundhw(const char *name, const char *descr, + int (*init_isa)(ISABus *bus)); + +void pci_register_soundhw(const char *name, const char *descr, + int (*init_pci)(PCIBus *bus)); +void deprecated_register_soundhw(const char *name, const char *descr, + int isa, const char *typename); + +void soundhw_init(void); +void select_soundhw(const char *optarg); + +#endif diff --git a/include/hw/audio/wm8750.h b/include/hw/audio/wm8750.h new file mode 100644 index 000000000..f7bafd5e3 --- /dev/null +++ b/include/hw/audio/wm8750.h @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#ifndef HW_DAC_WM8750_H +#define HW_DAC_WM8750_H + + +#define TYPE_WM8750 "wm8750" +#define TYPE_MV88W8618_AUDIO "mv88w8618_audio" + +typedef void data_req_cb(void *opaque, int free_out, int free_in); + +void wm8750_data_req_set(DeviceState *dev, data_req_cb *data_req, void *opaque); +void wm8750_dac_dat(void *opaque, uint32_t sample); +uint32_t wm8750_adc_dat(void *opaque); +void *wm8750_dac_buffer(void *opaque, int samples); +void wm8750_dac_commit(void *opaque); +void wm8750_set_bclk_in(void *opaque, int new_hz); + +#endif diff --git a/include/hw/block/block.h b/include/hw/block/block.h new file mode 100644 index 000000000..1e8b6253d --- /dev/null +++ b/include/hw/block/block.h @@ -0,0 +1,101 @@ +/* + * Common code for block device models + * + * Copyright (C) 2012 Red Hat, Inc. + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef HW_BLOCK_H +#define HW_BLOCK_H + +#include "exec/hwaddr.h" +#include "qapi/qapi-types-block-core.h" + +/* Configuration */ + +typedef struct BlockConf { + BlockBackend *blk; + uint32_t physical_block_size; + uint32_t logical_block_size; + uint32_t min_io_size; + uint32_t opt_io_size; + int32_t bootindex; + uint32_t discard_granularity; + /* geometry, not all devices use this */ + uint32_t cyls, heads, secs; + uint32_t lcyls, lheads, lsecs; + OnOffAuto wce; + bool share_rw; + BlockdevOnError rerror; + BlockdevOnError werror; +} BlockConf; + +static inline unsigned int get_physical_block_exp(BlockConf *conf) +{ + unsigned int exp = 0, size; + + for (size = conf->physical_block_size; + size > conf->logical_block_size; + size >>= 1) { + exp++; + } + + return exp; +} + +#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ + DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ + _conf.logical_block_size), \ + DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ + _conf.physical_block_size), \ + DEFINE_PROP_SIZE32("min_io_size", _state, _conf.min_io_size, 0), \ + DEFINE_PROP_SIZE32("opt_io_size", _state, _conf.opt_io_size, 0), \ + DEFINE_PROP_SIZE32("discard_granularity", _state, \ + _conf.discard_granularity, -1), \ + DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ + ON_OFF_AUTO_AUTO), \ + DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) + +#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ + DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ + DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) + +#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ + DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ + DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \ + DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0), \ + DEFINE_PROP_UINT32("lcyls", _state, _conf.lcyls, 0), \ + DEFINE_PROP_UINT32("lheads", _state, _conf.lheads, 0), \ + DEFINE_PROP_UINT32("lsecs", _state, _conf.lsecs, 0) + +#define DEFINE_BLOCK_ERROR_PROPERTIES(_state, _conf) \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ + BLOCKDEV_ON_ERROR_AUTO) + +/* Backend access helpers */ + +bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, + Error **errp); + +/* Configuration helpers */ + +bool blkconf_geometry(BlockConf *conf, int *trans, + unsigned cyls_max, unsigned heads_max, unsigned secs_max, + Error **errp); +bool blkconf_blocksizes(BlockConf *conf, Error **errp); +bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp); + +/* Hard disk geometry */ + +void hd_geometry_guess(BlockBackend *blk, + uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs, + int *ptrans); +int hd_bios_chs_auto_trans(uint32_t cyls, uint32_t heads, uint32_t secs); + +#endif diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h new file mode 100644 index 000000000..1ecca7cac --- /dev/null +++ b/include/hw/block/fdc.h @@ -0,0 +1,21 @@ +#ifndef HW_FDC_H +#define HW_FDC_H + +#include "exec/hwaddr.h" +#include "qapi/qapi-types-block.h" + +/* fdc.c */ +#define MAX_FD 2 + +#define TYPE_ISA_FDC "isa-fdc" + +void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds); +void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, + hwaddr mmio_base, DriveInfo **fds); +void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, + DriveInfo **fds, qemu_irq *fdc_tc); + +FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i); +int cmos_get_fd_drive_type(FloppyDriveType fd0); + +#endif diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h new file mode 100644 index 000000000..7dde0adce --- /dev/null +++ b/include/hw/block/flash.h @@ -0,0 +1,79 @@ +#ifndef HW_FLASH_H +#define HW_FLASH_H + +/* NOR flash devices */ + +#include "exec/hwaddr.h" +#include "qom/object.h" + +/* pflash_cfi01.c */ + +#define TYPE_PFLASH_CFI01 "cfi.pflash01" +OBJECT_DECLARE_SIMPLE_TYPE(PFlashCFI01, PFLASH_CFI01) + + +PFlashCFI01 *pflash_cfi01_register(hwaddr base, + const char *name, + hwaddr size, + BlockBackend *blk, + uint32_t sector_len, + int width, + uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, + int be); +BlockBackend *pflash_cfi01_get_blk(PFlashCFI01 *fl); +MemoryRegion *pflash_cfi01_get_memory(PFlashCFI01 *fl); +void pflash_cfi01_legacy_drive(PFlashCFI01 *dev, DriveInfo *dinfo); + +/* pflash_cfi02.c */ + +#define TYPE_PFLASH_CFI02 "cfi.pflash02" +OBJECT_DECLARE_SIMPLE_TYPE(PFlashCFI02, PFLASH_CFI02) + + +PFlashCFI02 *pflash_cfi02_register(hwaddr base, + const char *name, + hwaddr size, + BlockBackend *blk, + uint32_t sector_len, + int nb_mappings, + int width, + uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, + uint16_t unlock_addr0, + uint16_t unlock_addr1, + int be); + +/* nand.c */ +DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id); +void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, + uint8_t ce, uint8_t wp, uint8_t gnd); +void nand_getpins(DeviceState *dev, int *rb); +void nand_setio(DeviceState *dev, uint32_t value); +uint32_t nand_getio(DeviceState *dev); +uint32_t nand_getbuswidth(DeviceState *dev); + +#define NAND_MFR_TOSHIBA 0x98 +#define NAND_MFR_SAMSUNG 0xec +#define NAND_MFR_FUJITSU 0x04 +#define NAND_MFR_NATIONAL 0x8f +#define NAND_MFR_RENESAS 0x07 +#define NAND_MFR_STMICRO 0x20 +#define NAND_MFR_HYNIX 0xad +#define NAND_MFR_MICRON 0x2c + +/* onenand.c */ +void *onenand_raw_otp(DeviceState *onenand_device); + +/* ecc.c */ +typedef struct { + uint8_t cp; /* Column parity */ + uint16_t lp[2]; /* Line parity */ + uint16_t count; +} ECCState; + +uint8_t ecc_digest(ECCState *s, uint8_t sample); +void ecc_reset(ECCState *s); +extern VMStateDescription vmstate_ecc_state; + +#endif diff --git a/include/hw/block/swim.h b/include/hw/block/swim.h new file mode 100644 index 000000000..5a4902954 --- /dev/null +++ b/include/hw/block/swim.h @@ -0,0 +1,75 @@ +/* + * QEMU Macintosh floppy disk controller emulator (SWIM) + * + * Copyright (c) 2014-2018 Laurent Vivier + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef SWIM_H +#define SWIM_H + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define SWIM_MAX_FD 2 + +typedef struct SWIMCtrl SWIMCtrl; + +#define TYPE_SWIM_DRIVE "swim-drive" +OBJECT_DECLARE_SIMPLE_TYPE(SWIMDrive, SWIM_DRIVE) + +struct SWIMDrive { + DeviceState qdev; + int32_t unit; + BlockConf conf; +}; + +#define TYPE_SWIM_BUS "swim-bus" +OBJECT_DECLARE_SIMPLE_TYPE(SWIMBus, SWIM_BUS) + +struct SWIMBus { + BusState bus; + struct SWIMCtrl *ctrl; +}; + +typedef struct FDrive { + SWIMCtrl *swimctrl; + BlockBackend *blk; + BlockConf *conf; +} FDrive; + +struct SWIMCtrl { + MemoryRegion iomem; + FDrive drives[SWIM_MAX_FD]; + int mode; + /* IWM mode */ + int iwm_switch; + uint16_t regs[8]; +#define IWM_PH0 0 +#define IWM_PH1 1 +#define IWM_PH2 2 +#define IWM_PH3 3 +#define IWM_MTR 4 +#define IWM_DRIVE 5 +#define IWM_Q6 6 +#define IWM_Q7 7 + uint8_t iwm_data; + uint8_t iwm_mode; + /* SWIM mode */ + uint8_t swim_phase; + uint8_t swim_mode; + SWIMBus bus; +}; + +#define TYPE_SWIM "swim" +OBJECT_DECLARE_SIMPLE_TYPE(Swim, SWIM) + +struct Swim { + SysBusDevice parent_obj; + SWIMCtrl ctrl; +}; +#endif diff --git a/include/hw/boards.h b/include/hw/boards.h new file mode 100644 index 000000000..a49e3a6b4 --- /dev/null +++ b/include/hw/boards.h @@ -0,0 +1,370 @@ +/* Declarations for use by board files for creating devices. */ + +#ifndef HW_BOARDS_H +#define HW_BOARDS_H + +#include "exec/memory.h" +#include "sysemu/hostmem.h" +#include "sysemu/blockdev.h" +#include "sysemu/accel.h" +#include "qapi/qapi-types-machine.h" +#include "qemu/module.h" +#include "qom/object.h" +#include "hw/core/cpu.h" + +#define TYPE_MACHINE_SUFFIX "-machine" + +/* Machine class name that needs to be used for class-name-based machine + * type lookup to work. + */ +#define MACHINE_TYPE_NAME(machinename) (machinename TYPE_MACHINE_SUFFIX) + +#define TYPE_MACHINE "machine" +#undef MACHINE /* BSD defines it and QEMU does not use it */ +OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE) + +extern MachineState *current_machine; + +void machine_run_board_init(MachineState *machine); +bool machine_usb(MachineState *machine); +int machine_phandle_start(MachineState *machine); +bool machine_dump_guest_core(MachineState *machine); +bool machine_mem_merge(MachineState *machine); +HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); +void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, + Error **errp); + +void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type); +/* + * Checks that backend isn't used, preps it for exclusive usage and + * returns migratable MemoryRegion provided by backend. + */ +MemoryRegion *machine_consume_memdev(MachineState *machine, + HostMemoryBackend *backend); + +/** + * CPUArchId: + * @arch_id - architecture-dependent CPU ID of present or possible CPU + * @cpu - pointer to corresponding CPU object if it's present on NULL otherwise + * @type - QOM class name of possible @cpu object + * @props - CPU object properties, initialized by board + * #vcpus_count - number of threads provided by @cpu object + */ +typedef struct CPUArchId { + uint64_t arch_id; + int64_t vcpus_count; + CpuInstanceProperties props; + Object *cpu; + const char *type; +} CPUArchId; + +/** + * CPUArchIdList: + * @len - number of @CPUArchId items in @cpus array + * @cpus - array of present or possible CPUs for current machine configuration + */ +typedef struct { + int len; + CPUArchId cpus[]; +} CPUArchIdList; + +/** + * MachineClass: + * @deprecation_reason: If set, the machine is marked as deprecated. The + * string should provide some clear information about what to use instead. + * @max_cpus: maximum number of CPUs supported. Default: 1 + * @min_cpus: minimum number of CPUs supported. Default: 1 + * @default_cpus: number of CPUs instantiated if none are specified. Default: 1 + * @is_default: + * If true QEMU will use this machine by default if no '-M' option is given. + * @get_hotplug_handler: this function is called during bus-less + * device hotplug. If defined it returns pointer to an instance + * of HotplugHandler object, which handles hotplug operation + * for a given @dev. It may return NULL if @dev doesn't require + * any actions to be performed by hotplug handler. + * @cpu_index_to_instance_props: + * used to provide @cpu_index to socket/core/thread number mapping, allowing + * legacy code to perform maping from cpu_index to topology properties + * Returns: tuple of socket/core/thread ids given cpu_index belongs to. + * used to provide @cpu_index to socket number mapping, allowing + * a machine to group CPU threads belonging to the same socket/package + * Returns: socket number given cpu_index belongs to. + * @hw_version: + * Value of QEMU_VERSION when the machine was added to QEMU. + * Set only by old machines because they need to keep + * compatibility on code that exposed QEMU_VERSION to guests in + * the past (and now use qemu_hw_version()). + * @possible_cpu_arch_ids: + * Returns an array of @CPUArchId architecture-dependent CPU IDs + * which includes CPU IDs for present and possible to hotplug CPUs. + * Caller is responsible for freeing returned list. + * @get_default_cpu_node_id: + * returns default board specific node_id value for CPU slot specified by + * index @idx in @ms->possible_cpus[] + * @has_hotpluggable_cpus: + * If true, board supports CPUs creation with -device/device_add. + * @default_cpu_type: + * specifies default CPU_TYPE, which will be used for parsing target + * specific features and for creating CPUs if CPU name wasn't provided + * explicitly at CLI + * @minimum_page_bits: + * If non-zero, the board promises never to create a CPU with a page size + * smaller than this, so QEMU can use a more efficient larger page + * size than the target architecture's minimum. (Attempting to create + * such a CPU will fail.) Note that changing this is a migration + * compatibility break for the machine. + * @ignore_memory_transaction_failures: + * If this is flag is true then the CPU will ignore memory transaction + * failures which should cause the CPU to take an exception due to an + * access to an unassigned physical address; the transaction will instead + * return zero (for a read) or be ignored (for a write). This should be + * set only by legacy board models which rely on the old RAZ/WI behaviour + * for handling devices that QEMU does not yet model. New board models + * should instead use "unimplemented-device" for all memory ranges where + * the guest will attempt to probe for a device that QEMU doesn't + * implement and a stub device is required. + * @kvm_type: + * Return the type of KVM corresponding to the kvm-type string option or + * computed based on other criteria such as the host kernel capabilities. + * @numa_mem_supported: + * true if '--numa node.mem' option is supported and false otherwise + * @smp_parse: + * The function pointer to hook different machine specific functions for + * parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more + * machine specific topology fields, such as smp_dies for PCMachine. + * @hotplug_allowed: + * If the hook is provided, then it'll be called for each device + * hotplug to check whether the device hotplug is allowed. Return + * true to grant allowance or false to reject the hotplug. When + * false is returned, an error must be set to show the reason of + * the rejection. If the hook is not provided, all hotplug will be + * allowed. + * @default_ram_id: + * Specifies inital RAM MemoryRegion name to be used for default backend + * creation if user explicitly hasn't specified backend with "memory-backend" + * property. + * It also will be used as a way to optin into "-m" option support. + * If it's not set by board, '-m' will be ignored and generic code will + * not create default RAM MemoryRegion. + * @fixup_ram_size: + * Amends user provided ram size (with -m option) using machine + * specific algorithm. To be used by old machine types for compat + * purposes only. + * Applies only to default memory backend, i.e., explicit memory backend + * wasn't used. + */ +struct MachineClass { + /*< private >*/ + ObjectClass parent_class; + /*< public >*/ + + const char *family; /* NULL iff @name identifies a standalone machtype */ + char *name; + const char *alias; + const char *desc; + const char *deprecation_reason; + + void (*init)(MachineState *state); + void (*reset)(MachineState *state); + void (*wakeup)(MachineState *state); + int (*kvm_type)(MachineState *machine, const char *arg); + void (*smp_parse)(MachineState *ms, QemuOpts *opts); + + BlockInterfaceType block_default_type; + int units_per_default_bus; + int max_cpus; + int min_cpus; + int default_cpus; + unsigned int no_serial:1, + no_parallel:1, + no_floppy:1, + no_cdrom:1, + no_sdcard:1, + pci_allow_0_address:1, + legacy_fw_cfg_order:1; + bool is_default; + const char *default_machine_opts; + const char *default_boot_order; + const char *default_display; + GPtrArray *compat_props; + const char *hw_version; + ram_addr_t default_ram_size; + const char *default_cpu_type; + bool default_kernel_irqchip_split; + bool option_rom_has_mr; + bool rom_file_has_mr; + int minimum_page_bits; + bool has_hotpluggable_cpus; + bool ignore_memory_transaction_failures; + int numa_mem_align_shift; + const char **valid_cpu_types; + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; + bool numa_mem_supported; + bool auto_enable_numa; + const char *default_ram_id; + + HotplugHandler *(*get_hotplug_handler)(MachineState *machine, + DeviceState *dev); + bool (*hotplug_allowed)(MachineState *state, DeviceState *dev, + Error **errp); + CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine, + unsigned cpu_index); + const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); + int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx); + ram_addr_t (*fixup_ram_size)(ram_addr_t size); +}; + +/** + * DeviceMemoryState: + * @base: address in guest physical address space where the memory + * address space for memory devices starts + * @mr: address space container for memory devices + */ +typedef struct DeviceMemoryState { + hwaddr base; + MemoryRegion mr; +} DeviceMemoryState; + +/** + * CpuTopology: + * @cpus: the number of present logical processors on the machine + * @cores: the number of cores in one package + * @threads: the number of threads in one core + * @sockets: the number of sockets on the machine + * @max_cpus: the maximum number of logical processors on the machine + */ +typedef struct CpuTopology { + unsigned int cpus; + unsigned int cores; + unsigned int threads; + unsigned int sockets; + unsigned int max_cpus; +} CpuTopology; + +/** + * MachineState: + */ +struct MachineState { + /*< private >*/ + Object parent_obj; + Notifier sysbus_notifier; + + /*< public >*/ + + char *dtb; + char *dumpdtb; + int phandle_start; + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; + bool usb; + bool usb_disabled; + char *firmware; + bool iommu; + bool suppress_vmdesc; + bool enable_graphics; + char *memory_encryption; + char *ram_memdev_id; + /* + * convenience alias to ram_memdev_id backend memory region + * or to numa container memory region + */ + MemoryRegion *ram; + DeviceMemoryState *device_memory; + + ram_addr_t ram_size; + ram_addr_t maxram_size; + uint64_t ram_slots; + const char *boot_order; + char *kernel_filename; + char *kernel_cmdline; + char *initrd_filename; + const char *cpu_type; + AccelState *accelerator; + CPUArchIdList *possible_cpus; + CpuTopology smp; + struct NVDIMMState *nvdimms_state; + struct NumaState *numa_state; +}; + +#define DEFINE_MACHINE(namestr, machine_initfn) \ + static void machine_initfn##_class_init(ObjectClass *oc, void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + machine_initfn(mc); \ + } \ + static const TypeInfo machine_initfn##_typeinfo = { \ + .name = MACHINE_TYPE_NAME(namestr), \ + .parent = TYPE_MACHINE, \ + .class_init = machine_initfn##_class_init, \ + }; \ + static void machine_initfn##_register_types(void) \ + { \ + type_register_static(&machine_initfn##_typeinfo); \ + } \ + type_init(machine_initfn##_register_types) + +extern GlobalProperty hw_compat_5_1[]; +extern const size_t hw_compat_5_1_len; + +extern GlobalProperty hw_compat_5_0[]; +extern const size_t hw_compat_5_0_len; + +extern GlobalProperty hw_compat_4_2[]; +extern const size_t hw_compat_4_2_len; + +extern GlobalProperty hw_compat_4_1[]; +extern const size_t hw_compat_4_1_len; + +extern GlobalProperty hw_compat_4_0[]; +extern const size_t hw_compat_4_0_len; + +extern GlobalProperty hw_compat_3_1[]; +extern const size_t hw_compat_3_1_len; + +extern GlobalProperty hw_compat_3_0[]; +extern const size_t hw_compat_3_0_len; + +extern GlobalProperty hw_compat_2_12[]; +extern const size_t hw_compat_2_12_len; + +extern GlobalProperty hw_compat_2_11[]; +extern const size_t hw_compat_2_11_len; + +extern GlobalProperty hw_compat_2_10[]; +extern const size_t hw_compat_2_10_len; + +extern GlobalProperty hw_compat_2_9[]; +extern const size_t hw_compat_2_9_len; + +extern GlobalProperty hw_compat_2_8[]; +extern const size_t hw_compat_2_8_len; + +extern GlobalProperty hw_compat_2_7[]; +extern const size_t hw_compat_2_7_len; + +extern GlobalProperty hw_compat_2_6[]; +extern const size_t hw_compat_2_6_len; + +extern GlobalProperty hw_compat_2_5[]; +extern const size_t hw_compat_2_5_len; + +extern GlobalProperty hw_compat_2_4[]; +extern const size_t hw_compat_2_4_len; + +extern GlobalProperty hw_compat_2_3[]; +extern const size_t hw_compat_2_3_len; + +extern GlobalProperty hw_compat_2_2[]; +extern const size_t hw_compat_2_2_len; + +extern GlobalProperty hw_compat_2_1[]; +extern const size_t hw_compat_2_1_len; + +#endif diff --git a/include/hw/char/avr_usart.h b/include/hw/char/avr_usart.h new file mode 100644 index 000000000..bb5753240 --- /dev/null +++ b/include/hw/char/avr_usart.h @@ -0,0 +1,93 @@ +/* + * AVR USART + * + * Copyright (c) 2018 University of Kent + * Author: Sarah Harris + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#ifndef HW_CHAR_AVR_USART_H +#define HW_CHAR_AVR_USART_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "hw/hw.h" +#include "qom/object.h" + +/* Offsets of registers. */ +#define USART_DR 0x06 +#define USART_CSRA 0x00 +#define USART_CSRB 0x01 +#define USART_CSRC 0x02 +#define USART_BRRH 0x05 +#define USART_BRRL 0x04 + +/* Relevant bits in regiters. */ +#define USART_CSRA_RXC (1 << 7) +#define USART_CSRA_TXC (1 << 6) +#define USART_CSRA_DRE (1 << 5) +#define USART_CSRA_MPCM (1 << 0) + +#define USART_CSRB_RXCIE (1 << 7) +#define USART_CSRB_TXCIE (1 << 6) +#define USART_CSRB_DREIE (1 << 5) +#define USART_CSRB_RXEN (1 << 4) +#define USART_CSRB_TXEN (1 << 3) +#define USART_CSRB_CSZ2 (1 << 2) +#define USART_CSRB_RXB8 (1 << 1) +#define USART_CSRB_TXB8 (1 << 0) + +#define USART_CSRC_MSEL1 (1 << 7) +#define USART_CSRC_MSEL0 (1 << 6) +#define USART_CSRC_PM1 (1 << 5) +#define USART_CSRC_PM0 (1 << 4) +#define USART_CSRC_CSZ1 (1 << 2) +#define USART_CSRC_CSZ0 (1 << 1) + +#define TYPE_AVR_USART "avr-usart" +OBJECT_DECLARE_SIMPLE_TYPE(AVRUsartState, AVR_USART) + +struct AVRUsartState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + CharBackend chr; + + bool enabled; + + uint8_t data; + bool data_valid; + uint8_t char_mask; + /* Control and Status Registers */ + uint8_t csra; + uint8_t csrb; + uint8_t csrc; + /* Baud Rate Registers (low/high byte) */ + uint8_t brrh; + uint8_t brrl; + + /* Receive Complete */ + qemu_irq rxc_irq; + /* Transmit Complete */ + qemu_irq txc_irq; + /* Data Register Empty */ + qemu_irq dre_irq; +}; + +#endif /* HW_CHAR_AVR_USART_H */ diff --git a/include/hw/char/bcm2835_aux.h b/include/hw/char/bcm2835_aux.h new file mode 100644 index 000000000..9e081793a --- /dev/null +++ b/include/hw/char/bcm2835_aux.h @@ -0,0 +1,35 @@ +/* + * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft + * Written by Andrew Baumann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_AUX_H +#define BCM2835_AUX_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define TYPE_BCM2835_AUX "bcm2835-aux" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835AuxState, BCM2835_AUX) + +#define BCM2835_AUX_RX_FIFO_LEN 8 + +struct BCM2835AuxState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + CharBackend chr; + qemu_irq irq; + + uint8_t read_fifo[BCM2835_AUX_RX_FIFO_LEN]; + uint8_t read_pos, read_count; + uint8_t ier, iir; +}; + +#endif diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h new file mode 100644 index 000000000..e7f7cd846 --- /dev/null +++ b/include/hw/char/cadence_uart.h @@ -0,0 +1,56 @@ +/* + * Device model for Cadence UART + * + * Copyright (c) 2010 Xilinx Inc. + * Copyright (c) 2012 Peter A.G. Crosthwaite (peter.crosthwaite@petalogix.com) + * Copyright (c) 2012 PetaLogix Pty Ltd. + * Written by Haibing Ma + * M.Habib + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef CADENCE_UART_H +#define CADENCE_UART_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qapi/error.h" +#include "qemu/timer.h" +#include "qom/object.h" + +#define CADENCE_UART_RX_FIFO_SIZE 16 +#define CADENCE_UART_TX_FIFO_SIZE 16 + +#define CADENCE_UART_R_MAX (0x48/4) + +#define TYPE_CADENCE_UART "cadence_uart" +OBJECT_DECLARE_SIMPLE_TYPE(CadenceUARTState, CADENCE_UART) + +struct CadenceUARTState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + uint32_t r[CADENCE_UART_R_MAX]; + uint8_t rx_fifo[CADENCE_UART_RX_FIFO_SIZE]; + uint8_t tx_fifo[CADENCE_UART_TX_FIFO_SIZE]; + uint32_t rx_wpos; + uint32_t rx_count; + uint32_t tx_count; + uint64_t char_tx_time; + CharBackend chr; + qemu_irq irq; + QEMUTimer *fifo_trigger_handle; + Clock *refclk; +}; + +#endif diff --git a/include/hw/char/cmsdk-apb-uart.h b/include/hw/char/cmsdk-apb-uart.h new file mode 100644 index 000000000..9daff0eee --- /dev/null +++ b/include/hw/char/cmsdk-apb-uart.h @@ -0,0 +1,79 @@ +/* + * ARM CMSDK APB UART emulation + * + * Copyright (c) 2017 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#ifndef CMSDK_APB_UART_H +#define CMSDK_APB_UART_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define TYPE_CMSDK_APB_UART "cmsdk-apb-uart" +OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBUART, CMSDK_APB_UART) + +struct CMSDKAPBUART { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + CharBackend chr; + qemu_irq txint; + qemu_irq rxint; + qemu_irq txovrint; + qemu_irq rxovrint; + qemu_irq uartint; + guint watch_tag; + uint32_t pclk_frq; + + uint32_t state; + uint32_t ctrl; + uint32_t intstatus; + uint32_t bauddiv; + /* This UART has no FIFO, only a 1-character buffer for each of Tx and Rx */ + uint8_t txbuf; + uint8_t rxbuf; +}; + +/** + * cmsdk_apb_uart_create - convenience function to create TYPE_CMSDK_APB_UART + * @addr: location in system memory to map registers + * @chr: Chardev backend to connect UART to, or NULL if no backend + * @pclk_frq: frequency in Hz of the PCLK clock (used for calculating baud rate) + */ +static inline DeviceState *cmsdk_apb_uart_create(hwaddr addr, + qemu_irq txint, + qemu_irq rxint, + qemu_irq txovrint, + qemu_irq rxovrint, + qemu_irq uartint, + Chardev *chr, + uint32_t pclk_frq) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new(TYPE_CMSDK_APB_UART); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + qdev_prop_set_uint32(dev, "pclk-frq", pclk_frq); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, txint); + sysbus_connect_irq(s, 1, rxint); + sysbus_connect_irq(s, 2, txovrint); + sysbus_connect_irq(s, 3, rxovrint); + sysbus_connect_irq(s, 4, uartint); + return dev; +} + +#endif diff --git a/include/hw/char/digic-uart.h b/include/hw/char/digic-uart.h new file mode 100644 index 000000000..f710a1a09 --- /dev/null +++ b/include/hw/char/digic-uart.h @@ -0,0 +1,47 @@ +/* + * Canon DIGIC UART block declarations. + * + * Copyright (C) 2013 Antony Pavlov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef HW_CHAR_DIGIC_UART_H +#define HW_CHAR_DIGIC_UART_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define TYPE_DIGIC_UART "digic-uart" +OBJECT_DECLARE_SIMPLE_TYPE(DigicUartState, DIGIC_UART) + +enum { + R_TX = 0x00, + R_RX, + R_ST = (0x14 >> 2), + R_MAX +}; + +struct DigicUartState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion regs_region; + CharBackend chr; + + uint32_t reg_rx; + uint32_t reg_st; +}; + +#endif /* HW_CHAR_DIGIC_UART_H */ diff --git a/include/hw/char/escc.h b/include/hw/char/escc.h new file mode 100644 index 000000000..7e9482dee --- /dev/null +++ b/include/hw/char/escc.h @@ -0,0 +1,61 @@ +#ifndef HW_ESCC_H +#define HW_ESCC_H + +#include "chardev/char-fe.h" +#include "chardev/char-serial.h" +#include "hw/sysbus.h" +#include "ui/input.h" +#include "qom/object.h" + +/* escc.c */ +#define TYPE_ESCC "escc" +#define ESCC_SIZE 4 + +OBJECT_DECLARE_SIMPLE_TYPE(ESCCState, ESCC) + +typedef enum { + escc_chn_a, escc_chn_b, +} ESCCChnID; + +typedef enum { + escc_serial, escc_kbd, escc_mouse, +} ESCCChnType; + +#define ESCC_SERIO_QUEUE_SIZE 256 + +typedef struct { + uint8_t data[ESCC_SERIO_QUEUE_SIZE]; + int rptr, wptr, count; +} ESCCSERIOQueue; + +#define ESCC_SERIAL_REGS 16 +typedef struct ESCCChannelState { + qemu_irq irq; + uint32_t rxint, txint, rxint_under_svc, txint_under_svc; + struct ESCCChannelState *otherchn; + uint32_t reg; + uint8_t wregs[ESCC_SERIAL_REGS], rregs[ESCC_SERIAL_REGS]; + ESCCSERIOQueue queue; + CharBackend chr; + int e0_mode, led_mode, caps_lock_mode, num_lock_mode; + int disabled; + int clock; + uint32_t vmstate_dummy; + ESCCChnID chn; /* this channel, A (base+4) or B (base+0) */ + ESCCChnType type; + uint8_t rx, tx; + QemuInputHandlerState *hs; +} ESCCChannelState; + +struct ESCCState { + SysBusDevice parent_obj; + + struct ESCCChannelState chn[2]; + uint32_t it_shift; + bool bit_swap; + MemoryRegion mmio; + uint32_t disabled; + uint32_t frequency; +}; + +#endif diff --git a/include/hw/char/ibex_uart.h b/include/hw/char/ibex_uart.h new file mode 100644 index 000000000..03d19e3f6 --- /dev/null +++ b/include/hw/char/ibex_uart.h @@ -0,0 +1,107 @@ +/* + * QEMU lowRISC Ibex UART device + * + * Copyright (c) 2020 Western Digital + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_IBEX_UART_H +#define HW_IBEX_UART_H + +#include "hw/sysbus.h" +#include "hw/registerfields.h" +#include "chardev/char-fe.h" +#include "qemu/timer.h" +#include "qom/object.h" + +REG32(INTR_STATE, 0x00) + FIELD(INTR_STATE, TX_WATERMARK, 0, 1) + FIELD(INTR_STATE, RX_WATERMARK, 1, 1) + FIELD(INTR_STATE, TX_EMPTY, 2, 1) + FIELD(INTR_STATE, RX_OVERFLOW, 3, 1) +REG32(INTR_ENABLE, 0x04) +REG32(INTR_TEST, 0x08) +REG32(CTRL, 0x0C) + FIELD(CTRL, TX_ENABLE, 0, 1) + FIELD(CTRL, RX_ENABLE, 1, 1) + FIELD(CTRL, NF, 2, 1) + FIELD(CTRL, SLPBK, 4, 1) + FIELD(CTRL, LLPBK, 5, 1) + FIELD(CTRL, PARITY_EN, 6, 1) + FIELD(CTRL, PARITY_ODD, 7, 1) + FIELD(CTRL, RXBLVL, 8, 2) + FIELD(CTRL, NCO, 16, 16) +REG32(STATUS, 0x10) + FIELD(STATUS, TXFULL, 0, 1) + FIELD(STATUS, RXFULL, 1, 1) + FIELD(STATUS, TXEMPTY, 2, 1) + FIELD(STATUS, RXIDLE, 4, 1) + FIELD(STATUS, RXEMPTY, 5, 1) +REG32(RDATA, 0x14) +REG32(WDATA, 0x18) +REG32(FIFO_CTRL, 0x1c) + FIELD(FIFO_CTRL, RXRST, 0, 1) + FIELD(FIFO_CTRL, TXRST, 1, 1) + FIELD(FIFO_CTRL, RXILVL, 2, 3) + FIELD(FIFO_CTRL, TXILVL, 5, 2) +REG32(FIFO_STATUS, 0x20) +REG32(OVRD, 0x24) +REG32(VAL, 0x28) +REG32(TIMEOUT_CTRL, 0x2c) + +#define IBEX_UART_TX_FIFO_SIZE 16 +#define IBEX_UART_CLOCK 50000000 /* 50MHz clock */ + +#define TYPE_IBEX_UART "ibex-uart" +OBJECT_DECLARE_SIMPLE_TYPE(IbexUartState, IBEX_UART) + +struct IbexUartState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint8_t tx_fifo[IBEX_UART_TX_FIFO_SIZE]; + uint32_t tx_level; + + QEMUTimer *fifo_trigger_handle; + uint64_t char_tx_time; + + uint32_t uart_intr_state; + uint32_t uart_intr_enable; + uint32_t uart_ctrl; + uint32_t uart_status; + uint32_t uart_rdata; + uint32_t uart_fifo_ctrl; + uint32_t uart_fifo_status; + uint32_t uart_ovrd; + uint32_t uart_val; + uint32_t uart_timeout_ctrl; + + Clock *f_clk; + + CharBackend chr; + qemu_irq tx_watermark; + qemu_irq rx_watermark; + qemu_irq tx_empty; + qemu_irq rx_overflow; +}; +#endif /* HW_IBEX_UART_H */ diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h new file mode 100644 index 000000000..91c9894ad --- /dev/null +++ b/include/hw/char/imx_serial.h @@ -0,0 +1,109 @@ +/* + * Device model for i.MX UART + * + * Copyright (c) 2008 OKL + * Originally Written by Hans Jiang + * Copyright (c) 2011 NICTA Pty Ltd. + * Updated by Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef IMX_SERIAL_H +#define IMX_SERIAL_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define TYPE_IMX_SERIAL "imx.serial" +OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) + +#define URXD_CHARRDY (1<<15) /* character read is valid */ +#define URXD_ERR (1<<14) /* Character has error */ +#define URXD_FRMERR (1<<12) /* Character has frame error */ +#define URXD_BRK (1<<11) /* Break received */ + +#define USR1_PARTYER (1<<15) /* Parity Error */ +#define USR1_RTSS (1<<14) /* RTS pin status */ +#define USR1_TRDY (1<<13) /* Tx ready */ +#define USR1_RTSD (1<<12) /* RTS delta: pin changed state */ +#define USR1_ESCF (1<<11) /* Escape sequence interrupt */ +#define USR1_FRAMERR (1<<10) /* Framing error */ +#define USR1_RRDY (1<<9) /* receiver ready */ +#define USR1_AGTIM (1<<8) /* Aging timer interrupt */ +#define USR1_DTRD (1<<7) /* DTR changed */ +#define USR1_RXDS (1<<6) /* Receiver is idle */ +#define USR1_AIRINT (1<<5) /* Aysnch IR interrupt */ +#define USR1_AWAKE (1<<4) /* Falling edge detected on RXd pin */ + +#define USR2_ADET (1<<15) /* Autobaud complete */ +#define USR2_TXFE (1<<14) /* Transmit FIFO empty */ +#define USR2_DTRF (1<<13) /* DTR/DSR transition */ +#define USR2_IDLE (1<<12) /* UART has been idle for too long */ +#define USR2_ACST (1<<11) /* Autobaud counter stopped */ +#define USR2_RIDELT (1<<10) /* Ring Indicator delta */ +#define USR2_RIIN (1<<9) /* Ring Indicator Input */ +#define USR2_IRINT (1<<8) /* Serial Infrared Interrupt */ +#define USR2_WAKE (1<<7) /* Start bit detected */ +#define USR2_DCDDELT (1<<6) /* Data Carrier Detect delta */ +#define USR2_DCDIN (1<<5) /* Data Carrier Detect Input */ +#define USR2_RTSF (1<<4) /* RTS transition */ +#define USR2_TXDC (1<<3) /* Transmission complete */ +#define USR2_BRCD (1<<2) /* Break condition detected */ +#define USR2_ORE (1<<1) /* Overrun error */ +#define USR2_RDR (1<<0) /* Receive data ready */ + +#define UCR1_TRDYEN (1<<13) /* Tx Ready Interrupt Enable */ +#define UCR1_RRDYEN (1<<9) /* Rx Ready Interrupt Enable */ +#define UCR1_TXMPTYEN (1<<6) /* Tx Empty Interrupt Enable */ +#define UCR1_UARTEN (1<<0) /* UART Enable */ + +#define UCR2_TXEN (1<<2) /* Transmitter enable */ +#define UCR2_RXEN (1<<1) /* Receiver enable */ +#define UCR2_SRST (1<<0) /* Reset complete */ + +#define UCR4_DREN BIT(0) /* Receive Data Ready interrupt enable */ +#define UCR4_TCEN BIT(3) /* TX complete interrupt enable */ + +#define UTS1_TXEMPTY (1<<6) +#define UTS1_RXEMPTY (1<<5) +#define UTS1_TXFULL (1<<4) +#define UTS1_RXFULL (1<<3) + +struct IMXSerialState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + int32_t readbuff; + + uint32_t usr1; + uint32_t usr2; + uint32_t ucr1; + uint32_t ucr2; + uint32_t uts1; + + /* + * The registers below are implemented just so that the + * guest OS sees what it has written + */ + uint32_t onems; + uint32_t ufcr; + uint32_t ubmr; + uint32_t ubrc; + uint32_t ucr3; + uint32_t ucr4; + + qemu_irq irq; + CharBackend chr; +}; + +#endif diff --git a/include/hw/char/lm32_juart.h b/include/hw/char/lm32_juart.h new file mode 100644 index 000000000..6fce27832 --- /dev/null +++ b/include/hw/char/lm32_juart.h @@ -0,0 +1,13 @@ +#ifndef QEMU_HW_CHAR_LM32_JUART_H +#define QEMU_HW_CHAR_LM32_JUART_H + +#include "hw/qdev-core.h" + +#define TYPE_LM32_JUART "lm32-juart" + +uint32_t lm32_juart_get_jtx(DeviceState *d); +uint32_t lm32_juart_get_jrx(DeviceState *d); +void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx); +void lm32_juart_set_jrx(DeviceState *d, uint32_t jrx); + +#endif /* QEMU_HW_CHAR_LM32_JUART_H */ diff --git a/include/hw/char/mchp_pfsoc_mmuart.h b/include/hw/char/mchp_pfsoc_mmuart.h new file mode 100644 index 000000000..f61990215 --- /dev/null +++ b/include/hw/char/mchp_pfsoc_mmuart.h @@ -0,0 +1,61 @@ +/* + * Microchip PolarFire SoC MMUART emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MCHP_PFSOC_MMUART_H +#define HW_MCHP_PFSOC_MMUART_H + +#include "hw/char/serial.h" + +#define MCHP_PFSOC_MMUART_REG_SIZE 52 + +typedef struct MchpPfSoCMMUartState { + MemoryRegion iomem; + hwaddr base; + qemu_irq irq; + + SerialMM *serial; + + uint32_t reg[MCHP_PFSOC_MMUART_REG_SIZE / sizeof(uint32_t)]; +} MchpPfSoCMMUartState; + +/** + * mchp_pfsoc_mmuart_create - Create a Microchip PolarFire SoC MMUART + * + * This is a helper routine for board to create a MMUART device that is + * compatible with Microchip PolarFire SoC. + * + * @sysmem: system memory region to map + * @base: base address of the MMUART registers + * @irq: IRQ number of the MMUART device + * @chr: character device to associate to + * + * @return: a pointer to the device specific control structure + */ +MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem, + hwaddr base, qemu_irq irq, Chardev *chr); + +#endif /* HW_MCHP_PFSOC_MMUART_H */ diff --git a/include/hw/char/nrf51_uart.h b/include/hw/char/nrf51_uart.h new file mode 100644 index 000000000..561b6383c --- /dev/null +++ b/include/hw/char/nrf51_uart.h @@ -0,0 +1,78 @@ +/* + * nRF51 SoC UART emulation + * + * Copyright (c) 2018 Julia Suvorova + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#ifndef NRF51_UART_H +#define NRF51_UART_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "hw/registerfields.h" +#include "qom/object.h" + +#define UART_FIFO_LENGTH 6 +#define UART_SIZE 0x1000 + +#define TYPE_NRF51_UART "nrf51_soc.uart" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51UARTState, NRF51_UART) + +REG32(UART_STARTRX, 0x000) +REG32(UART_STOPRX, 0x004) +REG32(UART_STARTTX, 0x008) +REG32(UART_STOPTX, 0x00C) +REG32(UART_SUSPEND, 0x01C) + +REG32(UART_CTS, 0x100) +REG32(UART_NCTS, 0x104) +REG32(UART_RXDRDY, 0x108) +REG32(UART_TXDRDY, 0x11C) +REG32(UART_ERROR, 0x124) +REG32(UART_RXTO, 0x144) + +REG32(UART_INTEN, 0x300) + FIELD(UART_INTEN, CTS, 0, 1) + FIELD(UART_INTEN, NCTS, 1, 1) + FIELD(UART_INTEN, RXDRDY, 2, 1) + FIELD(UART_INTEN, TXDRDY, 7, 1) + FIELD(UART_INTEN, ERROR, 9, 1) + FIELD(UART_INTEN, RXTO, 17, 1) +REG32(UART_INTENSET, 0x304) +REG32(UART_INTENCLR, 0x308) +REG32(UART_ERRORSRC, 0x480) +REG32(UART_ENABLE, 0x500) +REG32(UART_PSELRTS, 0x508) +REG32(UART_PSELTXD, 0x50C) +REG32(UART_PSELCTS, 0x510) +REG32(UART_PSELRXD, 0x514) +REG32(UART_RXD, 0x518) +REG32(UART_TXD, 0x51C) +REG32(UART_BAUDRATE, 0x524) +REG32(UART_CONFIG, 0x56C) + +struct NRF51UARTState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + CharBackend chr; + qemu_irq irq; + guint watch_tag; + + uint8_t rx_fifo[UART_FIFO_LENGTH]; + unsigned int rx_fifo_pos; + unsigned int rx_fifo_len; + + uint32_t reg[0x56C]; + + bool rx_started; + bool tx_started; + bool pending_tx_byte; + bool enabled; +}; + +#endif diff --git a/include/hw/char/parallel.h b/include/hw/char/parallel.h new file mode 100644 index 000000000..0a23c0f57 --- /dev/null +++ b/include/hw/char/parallel.h @@ -0,0 +1,13 @@ +#ifndef HW_PARALLEL_H +#define HW_PARALLEL_H + +#include "hw/isa/isa.h" +#include "chardev/char.h" + +void parallel_hds_isa_init(ISABus *bus, int n); + +bool parallel_mm_init(MemoryRegion *address_space, + hwaddr base, int it_shift, qemu_irq irq, + Chardev *chr); + +#endif diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h new file mode 100644 index 000000000..dc2c90eed --- /dev/null +++ b/include/hw/char/pl011.h @@ -0,0 +1,91 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_PL011_H +#define HW_PL011_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qapi/error.h" +#include "qom/object.h" + +#define TYPE_PL011 "pl011" +OBJECT_DECLARE_SIMPLE_TYPE(PL011State, PL011) + +/* This shares the same struct (and cast macro) as the base pl011 device */ +#define TYPE_PL011_LUMINARY "pl011_luminary" + +struct PL011State { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t readbuff; + uint32_t flags; + uint32_t lcr; + uint32_t rsr; + uint32_t cr; + uint32_t dmacr; + uint32_t int_enabled; + uint32_t int_level; + uint32_t read_fifo[16]; + uint32_t ilpr; + uint32_t ibrd; + uint32_t fbrd; + uint32_t ifl; + int read_pos; + int read_count; + int read_trigger; + CharBackend chr; + qemu_irq irq[6]; + Clock *clk; + bool migrate_clk; + const unsigned char *id; +}; + +static inline DeviceState *pl011_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new("pl011"); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, irq); + + return dev; +} + +static inline DeviceState *pl011_luminary_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new("pl011_luminary"); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, irq); + + return dev; +} + +#endif diff --git a/include/hw/char/renesas_sci.h b/include/hw/char/renesas_sci.h new file mode 100644 index 000000000..a4764e3ee --- /dev/null +++ b/include/hw/char/renesas_sci.h @@ -0,0 +1,54 @@ +/* + * Renesas Serial Communication Interface + * + * Copyright (c) 2018 Yoshinori Sato + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_CHAR_RENESAS_SCI_H +#define HW_CHAR_RENESAS_SCI_H + +#include "chardev/char-fe.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_RENESAS_SCI "renesas-sci" +typedef struct RSCIState RSCIState; +DECLARE_INSTANCE_CHECKER(RSCIState, RSCI, + TYPE_RENESAS_SCI) + +enum { + ERI = 0, + RXI = 1, + TXI = 2, + TEI = 3, + SCI_NR_IRQ = 4 +}; + +struct RSCIState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion memory; + QEMUTimer timer; + CharBackend chr; + qemu_irq irq[SCI_NR_IRQ]; + + uint8_t smr; + uint8_t brr; + uint8_t scr; + uint8_t tdr; + uint8_t ssr; + uint8_t rdr; + uint8_t scmr; + uint8_t semr; + + uint8_t read_ssr; + int64_t trtime; + int64_t rx_next; + uint64_t input_freq; +}; + +#endif diff --git a/include/hw/char/riscv_htif.h b/include/hw/char/riscv_htif.h new file mode 100644 index 000000000..fb9452cf5 --- /dev/null +++ b/include/hw/char/riscv_htif.h @@ -0,0 +1,59 @@ +/* + * QEMU RISCV Host Target Interface (HTIF) Emulation + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RISCV_HTIF_H +#define HW_RISCV_HTIF_H + +#include "chardev/char.h" +#include "chardev/char-fe.h" +#include "exec/memory.h" +#include "target/riscv/cpu.h" + +#define TYPE_HTIF_UART "riscv.htif.uart" + +typedef struct HTIFState { + int allow_tohost; + int fromhost_inprogress; + + hwaddr tohost_offset; + hwaddr fromhost_offset; + uint64_t tohost_size; + uint64_t fromhost_size; + MemoryRegion mmio; + MemoryRegion *address_space; + MemoryRegion *main_mem; + void *main_mem_ram_ptr; + + CPURISCVState *env; + CharBackend chr; + uint64_t pending_read; +} HTIFState; + +extern const VMStateDescription vmstate_htif; +extern const MemoryRegionOps htif_io_ops; + +/* HTIF symbol callback */ +void htif_symbol_callback(const char *st_name, int st_info, uint64_t st_value, + uint64_t st_size); + +/* legacy pre qom */ +HTIFState *htif_mm_init(MemoryRegion *address_space, MemoryRegion *main_mem, + CPURISCVState *env, Chardev *chr); + +#endif diff --git a/include/hw/char/serial.h b/include/hw/char/serial.h new file mode 100644 index 000000000..8ba7eca3d --- /dev/null +++ b/include/hw/char/serial.h @@ -0,0 +1,116 @@ +/* + * QEMU 16550A UART emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2008 Citrix Systems, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SERIAL_H +#define HW_SERIAL_H + +#include "chardev/char-fe.h" +#include "exec/memory.h" +#include "qemu/fifo8.h" +#include "chardev/char.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define UART_FIFO_LENGTH 16 /* 16550A Fifo Length */ + +struct SerialState { + DeviceState parent; + + uint16_t divider; + uint8_t rbr; /* receive register */ + uint8_t thr; /* transmit holding register */ + uint8_t tsr; /* transmit shift register */ + uint8_t ier; + uint8_t iir; /* read only */ + uint8_t lcr; + uint8_t mcr; + uint8_t lsr; /* read only */ + uint8_t msr; /* read only */ + uint8_t scr; + uint8_t fcr; + uint8_t fcr_vmstate; /* we can't write directly this value + it has side effects */ + /* NOTE: this hidden state is necessary for tx irq generation as + it can be reset while reading iir */ + int thr_ipending; + qemu_irq irq; + CharBackend chr; + int last_break_enable; + uint32_t baudbase; + uint32_t tsr_retry; + guint watch_tag; + bool wakeup; + + /* Time when the last byte was successfully sent out of the tsr */ + uint64_t last_xmit_ts; + Fifo8 recv_fifo; + Fifo8 xmit_fifo; + /* Interrupt trigger level for recv_fifo */ + uint8_t recv_fifo_itl; + + QEMUTimer *fifo_timeout_timer; + int timeout_ipending; /* timeout interrupt pending state */ + + uint64_t char_transmit_time; /* time to transmit a char in ticks */ + int poll_msl; + + QEMUTimer *modem_status_poll; + MemoryRegion io; +}; +typedef struct SerialState SerialState; + +struct SerialMM { + SysBusDevice parent; + + SerialState serial; + + uint8_t regshift; + uint8_t endianness; +}; + +extern const VMStateDescription vmstate_serial; +extern const MemoryRegionOps serial_io_ops; + +void serial_set_frequency(SerialState *s, uint32_t frequency); + +#define TYPE_SERIAL "serial" +OBJECT_DECLARE_SIMPLE_TYPE(SerialState, SERIAL) + +#define TYPE_SERIAL_MM "serial-mm" +OBJECT_DECLARE_SIMPLE_TYPE(SerialMM, SERIAL_MM) + +SerialMM *serial_mm_init(MemoryRegion *address_space, + hwaddr base, int regshift, + qemu_irq irq, int baudbase, + Chardev *chr, enum device_endian end); + +/* serial-isa.c */ + +#define MAX_ISA_SERIAL_PORTS 4 + +#define TYPE_ISA_SERIAL "isa-serial" +void serial_hds_isa_init(ISABus *bus, int from, int to); + +#endif diff --git a/include/hw/char/sifive_uart.h b/include/hw/char/sifive_uart.h new file mode 100644 index 000000000..3e962be65 --- /dev/null +++ b/include/hw/char/sifive_uart.h @@ -0,0 +1,79 @@ +/* + * SiFive UART interface + * + * Copyright (c) 2016 Stefan O'Rear + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_UART_H +#define HW_SIFIVE_UART_H + +#include "chardev/char-fe.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +enum { + SIFIVE_UART_TXFIFO = 0, + SIFIVE_UART_RXFIFO = 4, + SIFIVE_UART_TXCTRL = 8, + SIFIVE_UART_TXMARK = 10, + SIFIVE_UART_RXCTRL = 12, + SIFIVE_UART_RXMARK = 14, + SIFIVE_UART_IE = 16, + SIFIVE_UART_IP = 20, + SIFIVE_UART_DIV = 24, + SIFIVE_UART_MAX = 32 +}; + +enum { + SIFIVE_UART_IE_TXWM = 1, /* Transmit watermark interrupt enable */ + SIFIVE_UART_IE_RXWM = 2 /* Receive watermark interrupt enable */ +}; + +enum { + SIFIVE_UART_IP_TXWM = 1, /* Transmit watermark interrupt pending */ + SIFIVE_UART_IP_RXWM = 2 /* Receive watermark interrupt pending */ +}; + +#define SIFIVE_UART_GET_TXCNT(txctrl) ((txctrl >> 16) & 0x7) +#define SIFIVE_UART_GET_RXCNT(rxctrl) ((rxctrl >> 16) & 0x7) + +#define TYPE_SIFIVE_UART "riscv.sifive.uart" + +typedef struct SiFiveUARTState SiFiveUARTState; +DECLARE_INSTANCE_CHECKER(SiFiveUARTState, SIFIVE_UART, + TYPE_SIFIVE_UART) + +struct SiFiveUARTState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + qemu_irq irq; + MemoryRegion mmio; + CharBackend chr; + uint8_t rx_fifo[8]; + unsigned int rx_fifo_len; + uint32_t ie; + uint32_t ip; + uint32_t txctrl; + uint32_t rxctrl; + uint32_t div; +}; + +SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base, + Chardev *chr, qemu_irq irq); + +#endif diff --git a/include/hw/char/stm32f2xx_usart.h b/include/hw/char/stm32f2xx_usart.h new file mode 100644 index 000000000..65bcc8547 --- /dev/null +++ b/include/hw/char/stm32f2xx_usart.h @@ -0,0 +1,77 @@ +/* + * STM32F2XX USART + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM32F2XX_USART_H +#define HW_STM32F2XX_USART_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define USART_SR 0x00 +#define USART_DR 0x04 +#define USART_BRR 0x08 +#define USART_CR1 0x0C +#define USART_CR2 0x10 +#define USART_CR3 0x14 +#define USART_GTPR 0x18 + +/* + * NB: The reset value mentioned in "24.6.1 Status register" seems bogus. + * Looking at "Table 98 USART register map and reset values", it seems it + * should be 0xc0, and that's how real hardware behaves. + */ +#define USART_SR_RESET (USART_SR_TXE | USART_SR_TC) + +#define USART_SR_TXE (1 << 7) +#define USART_SR_TC (1 << 6) +#define USART_SR_RXNE (1 << 5) + +#define USART_CR1_UE (1 << 13) +#define USART_CR1_RXNEIE (1 << 5) +#define USART_CR1_TE (1 << 3) +#define USART_CR1_RE (1 << 2) + +#define TYPE_STM32F2XX_USART "stm32f2xx-usart" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXUsartState, STM32F2XX_USART) + +struct STM32F2XXUsartState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t usart_sr; + uint32_t usart_dr; + uint32_t usart_brr; + uint32_t usart_cr1; + uint32_t usart_cr2; + uint32_t usart_cr3; + uint32_t usart_gtpr; + + CharBackend chr; + qemu_irq irq; +}; +#endif /* HW_STM32F2XX_USART_H */ diff --git a/include/hw/char/xilinx_uartlite.h b/include/hw/char/xilinx_uartlite.h new file mode 100644 index 000000000..bb32d0fcb --- /dev/null +++ b/include/hw/char/xilinx_uartlite.h @@ -0,0 +1,38 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef XILINX_UARTLITE_H +#define XILINX_UARTLITE_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" + +static inline DeviceState *xilinx_uartlite_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new("xlnx.xps-uartlite"); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, irq); + + return dev; +} + +#endif diff --git a/include/hw/clock.h b/include/hw/clock.h new file mode 100644 index 000000000..81bcf3e50 --- /dev/null +++ b/include/hw/clock.h @@ -0,0 +1,232 @@ +/* + * Hardware Clocks + * + * Copyright GreenSocs 2016-2020 + * + * Authors: + * Frederic Konrad + * Damien Hedde + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HW_CLOCK_H +#define QEMU_HW_CLOCK_H + +#include "qom/object.h" +#include "qemu/queue.h" + +#define TYPE_CLOCK "clock" +OBJECT_DECLARE_SIMPLE_TYPE(Clock, CLOCK) + +typedef void ClockCallback(void *opaque); + +/* + * clock store a value representing the clock's period in 2^-32ns unit. + * It can represent: + * + periods from 2^-32ns up to 4seconds + * + frequency from ~0.25Hz 2e10Ghz + * Resolution of frequency representation decreases with frequency: + * + at 100MHz, resolution is ~2mHz + * + at 1Ghz, resolution is ~0.2Hz + * + at 10Ghz, resolution is ~20Hz + */ +#define CLOCK_PERIOD_1SEC (1000000000llu << 32) + +/* + * macro helpers to convert to hertz / nanosecond + */ +#define CLOCK_PERIOD_FROM_NS(ns) ((ns) * (CLOCK_PERIOD_1SEC / 1000000000llu)) +#define CLOCK_PERIOD_TO_NS(per) ((per) / (CLOCK_PERIOD_1SEC / 1000000000llu)) +#define CLOCK_PERIOD_FROM_HZ(hz) (((hz) != 0) ? CLOCK_PERIOD_1SEC / (hz) : 0u) +#define CLOCK_PERIOD_TO_HZ(per) (((per) != 0) ? CLOCK_PERIOD_1SEC / (per) : 0u) + +/** + * Clock: + * @parent_obj: parent class + * @period: unsigned integer representing the period of the clock + * @canonical_path: clock path string cache (used for trace purpose) + * @callback: called when clock changes + * @callback_opaque: argument for @callback + * @source: source (or parent in clock tree) of the clock + * @children: list of clocks connected to this one (it is their source) + * @sibling: structure used to form a clock list + */ + + +struct Clock { + /*< private >*/ + Object parent_obj; + + /* all fields are private and should not be modified directly */ + + /* fields */ + uint64_t period; + char *canonical_path; + ClockCallback *callback; + void *callback_opaque; + + /* Clocks are organized in a clock tree */ + Clock *source; + QLIST_HEAD(, Clock) children; + QLIST_ENTRY(Clock) sibling; +}; + +/* + * vmstate description entry to be added in device vmsd. + */ +extern const VMStateDescription vmstate_clock; +#define VMSTATE_CLOCK(field, state) \ + VMSTATE_CLOCK_V(field, state, 0) +#define VMSTATE_CLOCK_V(field, state, version) \ + VMSTATE_STRUCT_POINTER_V(field, state, version, vmstate_clock, Clock) +#define VMSTATE_ARRAY_CLOCK(field, state, num) \ + VMSTATE_ARRAY_CLOCK_V(field, state, num, 0) +#define VMSTATE_ARRAY_CLOCK_V(field, state, num, version) \ + VMSTATE_ARRAY_OF_POINTER_TO_STRUCT(field, state, num, version, \ + vmstate_clock, Clock) + +/** + * clock_setup_canonical_path: + * @clk: clock + * + * compute the canonical path of the clock (used by log messages) + */ +void clock_setup_canonical_path(Clock *clk); + +/** + * clock_new: + * @parent: the clock parent + * @name: the clock object name + * + * Helper function to create a new clock and parent it to @parent. There is no + * need to call clock_setup_canonical_path on the returned clock as it is done + * by this function. + * + * @return the newly created clock + */ +Clock *clock_new(Object *parent, const char *name); + +/** + * clock_set_callback: + * @clk: the clock to register the callback into + * @cb: the callback function + * @opaque: the argument to the callback + * + * Register a callback called on every clock update. + */ +void clock_set_callback(Clock *clk, ClockCallback *cb, void *opaque); + +/** + * clock_clear_callback: + * @clk: the clock to delete the callback from + * + * Unregister the callback registered with clock_set_callback. + */ +void clock_clear_callback(Clock *clk); + +/** + * clock_set_source: + * @clk: the clock. + * @src: the source clock + * + * Setup @src as the clock source of @clk. The current @src period + * value is also copied to @clk and its subtree but no callback is + * called. + * Further @src update will be propagated to @clk and its subtree. + */ +void clock_set_source(Clock *clk, Clock *src); + +/** + * clock_set: + * @clk: the clock to initialize. + * @value: the clock's value, 0 means unclocked + * + * Set the local cached period value of @clk to @value. + * + * @return: true if the clock is changed. + */ +bool clock_set(Clock *clk, uint64_t value); + +static inline bool clock_set_hz(Clock *clk, unsigned hz) +{ + return clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz)); +} + +static inline bool clock_set_ns(Clock *clk, unsigned ns) +{ + return clock_set(clk, CLOCK_PERIOD_FROM_NS(ns)); +} + +/** + * clock_propagate: + * @clk: the clock + * + * Propagate the clock period that has been previously configured using + * @clock_set(). This will update recursively all connected clocks. + * It is an error to call this function on a clock which has a source. + * Note: this function must not be called during device inititialization + * or migration. + */ +void clock_propagate(Clock *clk); + +/** + * clock_update: + * @clk: the clock to update. + * @value: the new clock's value, 0 means unclocked + * + * Update the @clk to the new @value. All connected clocks will be informed + * of this update. This is equivalent to call @clock_set() then + * @clock_propagate(). + */ +static inline void clock_update(Clock *clk, uint64_t value) +{ + if (clock_set(clk, value)) { + clock_propagate(clk); + } +} + +static inline void clock_update_hz(Clock *clk, unsigned hz) +{ + clock_update(clk, CLOCK_PERIOD_FROM_HZ(hz)); +} + +static inline void clock_update_ns(Clock *clk, unsigned ns) +{ + clock_update(clk, CLOCK_PERIOD_FROM_NS(ns)); +} + +/** + * clock_get: + * @clk: the clk to fetch the clock + * + * @return: the current period. + */ +static inline uint64_t clock_get(const Clock *clk) +{ + return clk->period; +} + +static inline unsigned clock_get_hz(Clock *clk) +{ + return CLOCK_PERIOD_TO_HZ(clock_get(clk)); +} + +static inline unsigned clock_get_ns(Clock *clk) +{ + return CLOCK_PERIOD_TO_NS(clock_get(clk)); +} + +/** + * clock_is_enabled: + * @clk: a clock + * + * @return: true if the clock is running. + */ +static inline bool clock_is_enabled(const Clock *clk) +{ + return clock_get(clk) != 0; +} + +#endif /* QEMU_HW_CLOCK_H */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h new file mode 100644 index 000000000..3d92c967f --- /dev/null +++ b/include/hw/core/cpu.h @@ -0,0 +1,1151 @@ +/* + * QEMU CPU model + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ +#ifndef QEMU_CPU_H +#define QEMU_CPU_H + +#include "hw/qdev-core.h" +#include "disas/dis-asm.h" +#include "exec/hwaddr.h" +#include "exec/memattrs.h" +#include "qapi/qapi-types-run-state.h" +#include "qemu/bitmap.h" +#include "qemu/rcu_queue.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "qemu/plugin.h" +#include "qom/object.h" + +typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, + void *opaque); + +/** + * vaddr: + * Type wide enough to contain any #target_ulong virtual address. + */ +typedef uint64_t vaddr; +#define VADDR_PRId PRId64 +#define VADDR_PRIu PRIu64 +#define VADDR_PRIo PRIo64 +#define VADDR_PRIx PRIx64 +#define VADDR_PRIX PRIX64 +#define VADDR_MAX UINT64_MAX + +/** + * SECTION:cpu + * @section_id: QEMU-cpu + * @title: CPU Class + * @short_description: Base class for all CPUs + */ + +#define TYPE_CPU "cpu" + +/* Since this macro is used a lot in hot code paths and in conjunction with + * FooCPU *foo_env_get_cpu(), we deviate from usual QOM practice by using + * an unchecked cast. + */ +#define CPU(obj) ((CPUState *)(obj)) + +typedef struct CPUClass CPUClass; +DECLARE_CLASS_CHECKERS(CPUClass, CPU, + TYPE_CPU) + +typedef enum MMUAccessType { + MMU_DATA_LOAD = 0, + MMU_DATA_STORE = 1, + MMU_INST_FETCH = 2 +} MMUAccessType; + +typedef struct CPUWatchpoint CPUWatchpoint; + +struct TranslationBlock; + +/** + * CPUClass: + * @class_by_name: Callback to map -cpu command line model name to an + * instantiatable CPU type. + * @parse_features: Callback to parse command line arguments. + * @reset_dump_flags: #CPUDumpFlags to use for reset logging. + * @has_work: Callback for checking if there is work to do. + * @do_interrupt: Callback for interrupt handling. + * @do_unaligned_access: Callback for unaligned access handling, if + * the target defines #TARGET_ALIGNED_ONLY. + * @do_transaction_failed: Callback for handling failed memory transactions + * (ie bus faults or external aborts; not MMU faults) + * @virtio_is_big_endian: Callback to return %true if a CPU which supports + * runtime configurable endianness is currently big-endian. Non-configurable + * CPUs can use the default implementation of this method. This method should + * not be used by any callers other than the pre-1.0 virtio devices. + * @memory_rw_debug: Callback for GDB memory access. + * @dump_state: Callback for dumping state. + * @dump_statistics: Callback for dumping statistics. + * @get_arch_id: Callback for getting architecture-dependent CPU ID. + * @get_paging_enabled: Callback for inquiring whether paging is enabled. + * @get_memory_mapping: Callback for obtaining the memory mappings. + * @set_pc: Callback for setting the Program Counter register. This + * should have the semantics used by the target architecture when + * setting the PC from a source such as an ELF file entry point; + * for example on Arm it will also set the Thumb mode bit based + * on the least significant bit of the new PC value. + * If the target behaviour here is anything other than "set + * the PC register to the value passed in" then the target must + * also implement the synchronize_from_tb hook. + * @synchronize_from_tb: Callback for synchronizing state from a TCG + * #TranslationBlock. This is called when we abandon execution + * of a TB before starting it, and must set all parts of the CPU + * state which the previous TB in the chain may not have updated. + * This always includes at least the program counter; some targets + * will need to do more. If this hook is not implemented then the + * default is to call @set_pc(tb->pc). + * @tlb_fill: Callback for handling a softmmu tlb miss or user-only + * address fault. For system mode, if the access is valid, call + * tlb_set_page and return true; if the access is invalid, and + * probe is true, return false; otherwise raise an exception and + * do not return. For user-only mode, always raise an exception + * and do not return. + * @get_phys_page_debug: Callback for obtaining a physical address. + * @get_phys_page_attrs_debug: Callback for obtaining a physical address and the + * associated memory transaction attributes to use for the access. + * CPUs which use memory transaction attributes should implement this + * instead of get_phys_page_debug. + * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for + * a memory access with the specified memory transaction attributes. + * @gdb_read_register: Callback for letting GDB read a register. + * @gdb_write_register: Callback for letting GDB write a register. + * @debug_check_watchpoint: Callback: return true if the architectural + * watchpoint whose address has matched should really fire. + * @debug_excp_handler: Callback for handling debug exceptions. + * @write_elf64_note: Callback for writing a CPU-specific ELF note to a + * 64-bit VM coredump. + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + * @write_elf32_note: Callback for writing a CPU-specific ELF note to a + * 32-bit VM coredump. + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + * @vmsd: State description for migration. + * @gdb_num_core_regs: Number of core registers accessible to GDB. + * @gdb_core_xml_file: File name for core registers GDB XML description. + * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop + * before the insn which triggers a watchpoint rather than after it. + * @gdb_arch_name: Optional callback that returns the architecture name known + * to GDB. The caller must free the returned string with g_free. + * @gdb_get_dynamic_xml: Callback to return dynamically generated XML for the + * gdb stub. Returns a pointer to the XML contents for the specified XML file + * or NULL if the CPU doesn't have a dynamically generated content for it. + * @cpu_exec_enter: Callback for cpu_exec preparation. + * @cpu_exec_exit: Callback for cpu_exec cleanup. + * @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec. + * @disas_set_info: Setup architecture specific components of disassembly info + * @adjust_watchpoint_address: Perform a target-specific adjustment to an + * address before attempting to match it against watchpoints. + * @deprecation_note: If this CPUClass is deprecated, this field provides + * related information. + * + * Represents a CPU family or model. + */ +struct CPUClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + + ObjectClass *(*class_by_name)(const char *cpu_model); + void (*parse_features)(const char *typename, char *str, Error **errp); + + int reset_dump_flags; + bool (*has_work)(CPUState *cpu); + void (*do_interrupt)(CPUState *cpu); + void (*do_unaligned_access)(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); + void (*do_transaction_failed)(CPUState *cpu, hwaddr physaddr, vaddr addr, + unsigned size, MMUAccessType access_type, + int mmu_idx, MemTxAttrs attrs, + MemTxResult response, uintptr_t retaddr); + bool (*virtio_is_big_endian)(CPUState *cpu); + int (*memory_rw_debug)(CPUState *cpu, vaddr addr, + uint8_t *buf, int len, bool is_write); + void (*dump_state)(CPUState *cpu, FILE *, int flags); + GuestPanicInformation* (*get_crash_info)(CPUState *cpu); + void (*dump_statistics)(CPUState *cpu, int flags); + int64_t (*get_arch_id)(CPUState *cpu); + bool (*get_paging_enabled)(const CPUState *cpu); + void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, + Error **errp); + void (*set_pc)(CPUState *cpu, vaddr value); + void (*synchronize_from_tb)(CPUState *cpu, struct TranslationBlock *tb); + bool (*tlb_fill)(CPUState *cpu, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr); + hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs); + int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs); + int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg); + int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg); + bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp); + void (*debug_excp_handler)(CPUState *cpu); + + int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + + const VMStateDescription *vmsd; + const char *gdb_core_xml_file; + gchar * (*gdb_arch_name)(CPUState *cpu); + const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname); + void (*cpu_exec_enter)(CPUState *cpu); + void (*cpu_exec_exit)(CPUState *cpu); + bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request); + + void (*disas_set_info)(CPUState *cpu, disassemble_info *info); + vaddr (*adjust_watchpoint_address)(CPUState *cpu, vaddr addr, int len); + void (*tcg_initialize)(void); + + const char *deprecation_note; + /* Keep non-pointer data at the end to minimize holes. */ + int gdb_num_core_regs; + bool gdb_stop_before_watchpoint; +}; + +/* + * Low 16 bits: number of cycles left, used only in icount mode. + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs + * for this CPU and return to its top level loop (even in non-icount mode). + * This allows a single read-compare-cbranch-write sequence to test + * for both decrementer underflow and exceptions. + */ +typedef union IcountDecr { + uint32_t u32; + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint16_t high; + uint16_t low; +#else + uint16_t low; + uint16_t high; +#endif + } u16; +} IcountDecr; + +typedef struct CPUBreakpoint { + vaddr pc; + int flags; /* BP_* */ + QTAILQ_ENTRY(CPUBreakpoint) entry; +} CPUBreakpoint; + +struct CPUWatchpoint { + vaddr vaddr; + vaddr len; + vaddr hitaddr; + MemTxAttrs hitattrs; + int flags; /* BP_* */ + QTAILQ_ENTRY(CPUWatchpoint) entry; +}; + +#ifdef CONFIG_PLUGIN +/* + * For plugins we sometime need to save the resolved iotlb data before + * the memory regions get moved around by io_writex. + */ +typedef struct SavedIOTLB { + hwaddr addr; + MemoryRegionSection *section; + hwaddr mr_offset; +} SavedIOTLB; +#endif + +struct KVMState; +struct kvm_run; + +struct hax_vcpu_state; + +#define TB_JMP_CACHE_BITS 12 +#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) + +/* work queue */ + +/* The union type allows passing of 64 bit target pointers on 32 bit + * hosts in a single parameter + */ +typedef union { + int host_int; + unsigned long host_ulong; + void *host_ptr; + vaddr target_ptr; +} run_on_cpu_data; + +#define RUN_ON_CPU_HOST_PTR(p) ((run_on_cpu_data){.host_ptr = (p)}) +#define RUN_ON_CPU_HOST_INT(i) ((run_on_cpu_data){.host_int = (i)}) +#define RUN_ON_CPU_HOST_ULONG(ul) ((run_on_cpu_data){.host_ulong = (ul)}) +#define RUN_ON_CPU_TARGET_PTR(v) ((run_on_cpu_data){.target_ptr = (v)}) +#define RUN_ON_CPU_NULL RUN_ON_CPU_HOST_PTR(NULL) + +typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); + +struct qemu_work_item; + +#define CPU_UNSET_NUMA_NODE_ID -1 +#define CPU_TRACE_DSTATE_MAX_EVENTS 32 + +/** + * CPUState: + * @cpu_index: CPU index (informative). + * @cluster_index: Identifies which cluster this CPU is in. + * For boards which don't define clusters or for "loose" CPUs not assigned + * to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will + * be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER + * QOM parent. + * @nr_cores: Number of cores within this CPU package. + * @nr_threads: Number of threads within this CPU. + * @running: #true if CPU is currently running (lockless). + * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end; + * valid under cpu_list_lock. + * @created: Indicates whether the CPU thread has been successfully created. + * @interrupt_request: Indicates a pending interrupt request. + * @halted: Nonzero if the CPU is in suspended state. + * @stop: Indicates a pending stop request. + * @stopped: Indicates the CPU has been artificially stopped. + * @unplug: Indicates a pending CPU unplug request. + * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU + * @singlestep_enabled: Flags for single-stepping. + * @icount_extra: Instructions until next timer event. + * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution + * requires that IO only be performed on the last instruction of a TB + * so that interrupts take effect immediately. + * @cpu_ases: Pointer to array of CPUAddressSpaces (which define the + * AddressSpaces this CPU has) + * @num_ases: number of CPUAddressSpaces in @cpu_ases + * @as: Pointer to the first AddressSpace, for the convenience of targets which + * only have a single AddressSpace + * @env_ptr: Pointer to subclass-specific CPUArchState field. + * @icount_decr_ptr: Pointer to IcountDecr field within subclass. + * @gdb_regs: Additional GDB registers. + * @gdb_num_regs: Number of total registers accessible to GDB. + * @gdb_num_g_regs: Number of registers in GDB 'g' packets. + * @next_cpu: Next CPU sharing TB cache. + * @opaque: User data. + * @mem_io_pc: Host Program Counter at which the memory was accessed. + * @kvm_fd: vCPU file descriptor for KVM. + * @work_mutex: Lock to prevent multiple access to @work_list. + * @work_list: List of pending asynchronous work. + * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes + * to @trace_dstate). + * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask). + * @plugin_mask: Plugin event bitmap. Modified only via async work. + * @ignore_memory_transaction_failures: Cached copy of the MachineState + * flag of the same name: allows the board to suppress calling of the + * CPU do_transaction_failed hook function. + * + * State of one CPU core or thread. + */ +struct CPUState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + int nr_cores; + int nr_threads; + + struct QemuThread *thread; +#ifdef _WIN32 + HANDLE hThread; +#endif + int thread_id; + bool running, has_waiter; + struct QemuCond *halt_cond; + bool thread_kicked; + bool created; + bool stop; + bool stopped; + + /* Should CPU start in powered-off state? */ + bool start_powered_off; + + bool unplug; + bool crash_occurred; + bool exit_request; + bool in_exclusive_context; + uint32_t cflags_next_tb; + /* updates protected by BQL */ + uint32_t interrupt_request; + int singlestep_enabled; + int64_t icount_budget; + int64_t icount_extra; + uint64_t random_seed; + sigjmp_buf jmp_env; + + QemuMutex work_mutex; + QSIMPLEQ_HEAD(, qemu_work_item) work_list; + + CPUAddressSpace *cpu_ases; + int num_ases; + AddressSpace *as; + MemoryRegion *memory; + + void *env_ptr; /* CPUArchState */ + IcountDecr *icount_decr_ptr; + + /* Accessed in parallel; all accesses must be atomic */ + struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + + struct GDBRegisterState *gdb_regs; + int gdb_num_regs; + int gdb_num_g_regs; + QTAILQ_ENTRY(CPUState) node; + + /* ice debug support */ + QTAILQ_HEAD(, CPUBreakpoint) breakpoints; + + QTAILQ_HEAD(, CPUWatchpoint) watchpoints; + CPUWatchpoint *watchpoint_hit; + + void *opaque; + + /* In order to avoid passing too many arguments to the MMIO helpers, + * we store some rarely used information in the CPU context. + */ + uintptr_t mem_io_pc; + + int kvm_fd; + struct KVMState *kvm_state; + struct kvm_run *kvm_run; + + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); + + DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX); + +#ifdef CONFIG_PLUGIN + GArray *plugin_mem_cbs; + /* saved iotlb data from io_writex */ + SavedIOTLB saved_iotlb; +#endif + + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; + uint32_t halted; + uint32_t can_do_io; + int32_t exception_index; + + /* shared by kvm, hax and hvf */ + bool vcpu_dirty; + + /* Used to keep track of an outstanding cpu throttle thread for migration + * autoconverge + */ + bool throttle_thread_scheduled; + + bool ignore_memory_transaction_failures; + + struct hax_vcpu_state *hax_vcpu; + + int hvf_fd; + + /* track IOMMUs whose translations we've cached in the TCG TLB */ + GArray *iommu_notifiers; +}; + +typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ; +extern CPUTailQ cpus; + +#define first_cpu QTAILQ_FIRST_RCU(&cpus) +#define CPU_NEXT(cpu) QTAILQ_NEXT_RCU(cpu, node) +#define CPU_FOREACH(cpu) QTAILQ_FOREACH_RCU(cpu, &cpus, node) +#define CPU_FOREACH_SAFE(cpu, next_cpu) \ + QTAILQ_FOREACH_SAFE_RCU(cpu, &cpus, node, next_cpu) + +extern __thread CPUState *current_cpu; + +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) +{ + unsigned int i; + + for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { + qatomic_set(&cpu->tb_jmp_cache[i], NULL); + } +} + +/** + * qemu_tcg_mttcg_enabled: + * Check whether we are running MultiThread TCG or not. + * + * Returns: %true if we are in MTTCG mode %false otherwise. + */ +extern bool mttcg_enabled; +#define qemu_tcg_mttcg_enabled() (mttcg_enabled) + +/** + * cpu_paging_enabled: + * @cpu: The CPU whose state is to be inspected. + * + * Returns: %true if paging is enabled, %false otherwise. + */ +bool cpu_paging_enabled(const CPUState *cpu); + +/** + * cpu_get_memory_mapping: + * @cpu: The CPU whose memory mappings are to be obtained. + * @list: Where to write the memory mappings to. + * @errp: Pointer for reporting an #Error. + */ +void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, + Error **errp); + +#if !defined(CONFIG_USER_ONLY) + +/** + * cpu_write_elf64_note: + * @f: pointer to a function that writes memory to a file + * @cpu: The CPU whose memory is to be dumped + * @cpuid: ID number of the CPU + * @opaque: pointer to the CPUState struct + */ +int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + +/** + * cpu_write_elf64_qemunote: + * @f: pointer to a function that writes memory to a file + * @cpu: The CPU whose memory is to be dumped + * @cpuid: ID number of the CPU + * @opaque: pointer to the CPUState struct + */ +int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + +/** + * cpu_write_elf32_note: + * @f: pointer to a function that writes memory to a file + * @cpu: The CPU whose memory is to be dumped + * @cpuid: ID number of the CPU + * @opaque: pointer to the CPUState struct + */ +int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + +/** + * cpu_write_elf32_qemunote: + * @f: pointer to a function that writes memory to a file + * @cpu: The CPU whose memory is to be dumped + * @cpuid: ID number of the CPU + * @opaque: pointer to the CPUState struct + */ +int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + +/** + * cpu_get_crash_info: + * @cpu: The CPU to get crash information for + * + * Gets the previously saved crash information. + * Caller is responsible for freeing the data. + */ +GuestPanicInformation *cpu_get_crash_info(CPUState *cpu); + +#endif /* !CONFIG_USER_ONLY */ + +/** + * CPUDumpFlags: + * @CPU_DUMP_CODE: + * @CPU_DUMP_FPU: dump FPU register state, not just integer + * @CPU_DUMP_CCOP: dump info about TCG QEMU's condition code optimization state + */ +enum CPUDumpFlags { + CPU_DUMP_CODE = 0x00010000, + CPU_DUMP_FPU = 0x00020000, + CPU_DUMP_CCOP = 0x00040000, +}; + +/** + * cpu_dump_state: + * @cpu: The CPU whose state is to be dumped. + * @f: If non-null, dump to this stream, else to current print sink. + * + * Dumps CPU state. + */ +void cpu_dump_state(CPUState *cpu, FILE *f, int flags); + +/** + * cpu_dump_statistics: + * @cpu: The CPU whose state is to be dumped. + * @flags: Flags what to dump. + * + * Dump CPU statistics to the current monitor if we have one, else to + * stdout. + */ +void cpu_dump_statistics(CPUState *cpu, int flags); + +#ifndef CONFIG_USER_ONLY +/** + * cpu_get_phys_page_attrs_debug: + * @cpu: The CPU to obtain the physical page address for. + * @addr: The virtual address. + * @attrs: Updated on return with the memory transaction attributes to use + * for this access. + * + * Obtains the physical page corresponding to a virtual one, together + * with the corresponding memory transaction attributes to use for the access. + * Use it only for debugging because no protection checks are done. + * + * Returns: Corresponding physical page address or -1 if no page found. + */ +static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->get_phys_page_attrs_debug) { + return cc->get_phys_page_attrs_debug(cpu, addr, attrs); + } + /* Fallback for CPUs which don't implement the _attrs_ hook */ + *attrs = MEMTXATTRS_UNSPECIFIED; + return cc->get_phys_page_debug(cpu, addr); +} + +/** + * cpu_get_phys_page_debug: + * @cpu: The CPU to obtain the physical page address for. + * @addr: The virtual address. + * + * Obtains the physical page corresponding to a virtual one. + * Use it only for debugging because no protection checks are done. + * + * Returns: Corresponding physical page address or -1 if no page found. + */ +static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr) +{ + MemTxAttrs attrs = {}; + + return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs); +} + +/** cpu_asidx_from_attrs: + * @cpu: CPU + * @attrs: memory transaction attributes + * + * Returns the address space index specifying the CPU AddressSpace + * to use for a memory access with the given transaction attributes. + */ +static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + int ret = 0; + + if (cc->asidx_from_attrs) { + ret = cc->asidx_from_attrs(cpu, attrs); + assert(ret < cpu->num_ases && ret >= 0); + } + return ret; +} + +#endif /* CONFIG_USER_ONLY */ + +/** + * cpu_list_add: + * @cpu: The CPU to be added to the list of CPUs. + */ +void cpu_list_add(CPUState *cpu); + +/** + * cpu_list_remove: + * @cpu: The CPU to be removed from the list of CPUs. + */ +void cpu_list_remove(CPUState *cpu); + +/** + * cpu_reset: + * @cpu: The CPU whose state is to be reset. + */ +void cpu_reset(CPUState *cpu); + +/** + * cpu_class_by_name: + * @typename: The CPU base type. + * @cpu_model: The model string without any parameters. + * + * Looks up a CPU #ObjectClass matching name @cpu_model. + * + * Returns: A #CPUClass or %NULL if not matching class is found. + */ +ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model); + +/** + * cpu_create: + * @typename: The CPU type. + * + * Instantiates a CPU and realizes the CPU. + * + * Returns: A #CPUState or %NULL if an error occurred. + */ +CPUState *cpu_create(const char *typename); + +/** + * parse_cpu_option: + * @cpu_option: The -cpu option including optional parameters. + * + * processes optional parameters and registers them as global properties + * + * Returns: type of CPU to create or prints error and terminates process + * if an error occurred. + */ +const char *parse_cpu_option(const char *cpu_option); + +/** + * cpu_has_work: + * @cpu: The vCPU to check. + * + * Checks whether the CPU has work to do. + * + * Returns: %true if the CPU has work, %false otherwise. + */ +static inline bool cpu_has_work(CPUState *cpu) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + g_assert(cc->has_work); + return cc->has_work(cpu); +} + +/** + * qemu_cpu_is_self: + * @cpu: The vCPU to check against. + * + * Checks whether the caller is executing on the vCPU thread. + * + * Returns: %true if called from @cpu's thread, %false otherwise. + */ +bool qemu_cpu_is_self(CPUState *cpu); + +/** + * qemu_cpu_kick: + * @cpu: The vCPU to kick. + * + * Kicks @cpu's thread. + */ +void qemu_cpu_kick(CPUState *cpu); + +/** + * cpu_is_stopped: + * @cpu: The CPU to check. + * + * Checks whether the CPU is stopped. + * + * Returns: %true if run state is not running or if artificially stopped; + * %false otherwise. + */ +bool cpu_is_stopped(CPUState *cpu); + +/** + * do_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * @mutex: Mutex to release while waiting for @func to run. + * + * Used internally in the implementation of run_on_cpu. + */ +void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data, + QemuMutex *mutex); + +/** + * run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu. + */ +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data); + +/** + * async_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu asynchronously. + */ +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data); + +/** + * async_safe_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu asynchronously, + * while all other vCPUs are sleeping. + * + * Unlike run_on_cpu and async_run_on_cpu, the function is run outside the + * BQL. + */ +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data); + +/** + * cpu_in_exclusive_context() + * @cpu: The vCPU to check + * + * Returns true if @cpu is an exclusive context, for example running + * something which has previously been queued via async_safe_run_on_cpu(). + */ +static inline bool cpu_in_exclusive_context(const CPUState *cpu) +{ + return cpu->in_exclusive_context; +} + +/** + * qemu_get_cpu: + * @index: The CPUState@cpu_index value of the CPU to obtain. + * + * Gets a CPU matching @index. + * + * Returns: The CPU or %NULL if there is no matching CPU. + */ +CPUState *qemu_get_cpu(int index); + +/** + * cpu_exists: + * @id: Guest-exposed CPU ID to lookup. + * + * Search for CPU with specified ID. + * + * Returns: %true - CPU is found, %false - CPU isn't found. + */ +bool cpu_exists(int64_t id); + +/** + * cpu_by_arch_id: + * @id: Guest-exposed CPU ID of the CPU to obtain. + * + * Get a CPU with matching @id. + * + * Returns: The CPU or %NULL if there is no matching CPU. + */ +CPUState *cpu_by_arch_id(int64_t id); + +/** + * cpu_interrupt: + * @cpu: The CPU to set an interrupt on. + * @mask: The interrupts to set. + * + * Invokes the interrupt handler. + */ + +void cpu_interrupt(CPUState *cpu, int mask); + +#ifdef NEED_CPU_H + +#ifdef CONFIG_SOFTMMU +static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + cc->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr); +} + +static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, + vaddr addr, unsigned size, + MMUAccessType access_type, + int mmu_idx, MemTxAttrs attrs, + MemTxResult response, + uintptr_t retaddr) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (!cpu->ignore_memory_transaction_failures && cc->do_transaction_failed) { + cc->do_transaction_failed(cpu, physaddr, addr, size, access_type, + mmu_idx, attrs, response, retaddr); + } +} +#endif + +#endif /* NEED_CPU_H */ + +/** + * cpu_set_pc: + * @cpu: The CPU to set the program counter for. + * @addr: Program counter value. + * + * Sets the program counter for a CPU. + */ +static inline void cpu_set_pc(CPUState *cpu, vaddr addr) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + cc->set_pc(cpu, addr); +} + +/** + * cpu_reset_interrupt: + * @cpu: The CPU to clear the interrupt on. + * @mask: The interrupt mask to clear. + * + * Resets interrupts on the vCPU @cpu. + */ +void cpu_reset_interrupt(CPUState *cpu, int mask); + +/** + * cpu_exit: + * @cpu: The CPU to exit. + * + * Requests the CPU @cpu to exit execution. + */ +void cpu_exit(CPUState *cpu); + +/** + * cpu_resume: + * @cpu: The CPU to resume. + * + * Resumes CPU, i.e. puts CPU into runnable state. + */ +void cpu_resume(CPUState *cpu); + +/** + * cpu_remove_sync: + * @cpu: The CPU to remove. + * + * Requests the CPU to be removed and waits till it is removed. + */ +void cpu_remove_sync(CPUState *cpu); + +/** + * process_queued_cpu_work() - process all items on CPU work queue + * @cpu: The CPU which work queue to process. + */ +void process_queued_cpu_work(CPUState *cpu); + +/** + * cpu_exec_start: + * @cpu: The CPU for the current thread. + * + * Record that a CPU has started execution and can be interrupted with + * cpu_exit. + */ +void cpu_exec_start(CPUState *cpu); + +/** + * cpu_exec_end: + * @cpu: The CPU for the current thread. + * + * Record that a CPU has stopped execution and exclusive sections + * can be executed without interrupting it. + */ +void cpu_exec_end(CPUState *cpu); + +/** + * start_exclusive: + * + * Wait for a concurrent exclusive section to end, and then start + * a section of work that is run while other CPUs are not running + * between cpu_exec_start and cpu_exec_end. CPUs that are running + * cpu_exec are exited immediately. CPUs that call cpu_exec_start + * during the exclusive section go to sleep until this CPU calls + * end_exclusive. + */ +void start_exclusive(void); + +/** + * end_exclusive: + * + * Concludes an exclusive execution section started by start_exclusive. + */ +void end_exclusive(void); + +/** + * qemu_init_vcpu: + * @cpu: The vCPU to initialize. + * + * Initializes a vCPU. + */ +void qemu_init_vcpu(CPUState *cpu); + +#define SSTEP_ENABLE 0x1 /* Enable simulated HW single stepping */ +#define SSTEP_NOIRQ 0x2 /* Do not use IRQ while single stepping */ +#define SSTEP_NOTIMER 0x4 /* Do not Timers while single stepping */ + +/** + * cpu_single_step: + * @cpu: CPU to the flags for. + * @enabled: Flags to enable. + * + * Enables or disables single-stepping for @cpu. + */ +void cpu_single_step(CPUState *cpu, int enabled); + +/* Breakpoint/watchpoint flags */ +#define BP_MEM_READ 0x01 +#define BP_MEM_WRITE 0x02 +#define BP_MEM_ACCESS (BP_MEM_READ | BP_MEM_WRITE) +#define BP_STOP_BEFORE_ACCESS 0x04 +/* 0x08 currently unused */ +#define BP_GDB 0x10 +#define BP_CPU 0x20 +#define BP_ANY (BP_GDB | BP_CPU) +#define BP_WATCHPOINT_HIT_READ 0x40 +#define BP_WATCHPOINT_HIT_WRITE 0x80 +#define BP_WATCHPOINT_HIT (BP_WATCHPOINT_HIT_READ | BP_WATCHPOINT_HIT_WRITE) + +int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, + CPUBreakpoint **breakpoint); +int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags); +void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint); +void cpu_breakpoint_remove_all(CPUState *cpu, int mask); + +/* Return true if PC matches an installed breakpoint. */ +static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask) +{ + CPUBreakpoint *bp; + + if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) { + QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { + if (bp->pc == pc && (bp->flags & mask)) { + return true; + } + } + } + return false; +} + +#ifdef CONFIG_USER_ONLY +static inline int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, + int flags, CPUWatchpoint **watchpoint) +{ + return -ENOSYS; +} + +static inline int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, + vaddr len, int flags) +{ + return -ENOSYS; +} + +static inline void cpu_watchpoint_remove_by_ref(CPUState *cpu, + CPUWatchpoint *wp) +{ +} + +static inline void cpu_watchpoint_remove_all(CPUState *cpu, int mask) +{ +} + +static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, + MemTxAttrs atr, int fl, uintptr_t ra) +{ +} + +static inline int cpu_watchpoint_address_matches(CPUState *cpu, + vaddr addr, vaddr len) +{ + return 0; +} +#else +int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, + int flags, CPUWatchpoint **watchpoint); +int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, + vaddr len, int flags); +void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint); +void cpu_watchpoint_remove_all(CPUState *cpu, int mask); + +/** + * cpu_check_watchpoint: + * @cpu: cpu context + * @addr: guest virtual address + * @len: access length + * @attrs: memory access attributes + * @flags: watchpoint access type + * @ra: unwind return address + * + * Check for a watchpoint hit in [addr, addr+len) of the type + * specified by @flags. Exit via exception with a hit. + */ +void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, + MemTxAttrs attrs, int flags, uintptr_t ra); + +/** + * cpu_watchpoint_address_matches: + * @cpu: cpu context + * @addr: guest virtual address + * @len: access length + * + * Return the watchpoint flags that apply to [addr, addr+len). + * If no watchpoint is registered for the range, the result is 0. + */ +int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len); +#endif + +/** + * cpu_get_address_space: + * @cpu: CPU to get address space from + * @asidx: index identifying which address space to get + * + * Return the requested address space of this CPU. @asidx + * specifies which address space to read. + */ +AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx); + +void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); +extern Property cpu_common_props[]; +void cpu_exec_initfn(CPUState *cpu); +void cpu_exec_realizefn(CPUState *cpu, Error **errp); +void cpu_exec_unrealizefn(CPUState *cpu); + +/** + * target_words_bigendian: + * Returns true if the (default) endianness of the target is big endian, + * false otherwise. Note that in target-specific code, you can use + * TARGET_WORDS_BIGENDIAN directly instead. On the other hand, common + * code should normally never need to know about the endianness of the + * target, so please do *not* use this function unless you know very well + * what you are doing! + */ +bool target_words_bigendian(void); + +#ifdef NEED_CPU_H + +#ifdef CONFIG_SOFTMMU +extern const VMStateDescription vmstate_cpu_common; +#else +#define vmstate_cpu_common vmstate_dummy +#endif + +#define VMSTATE_CPU() { \ + .name = "parent_obj", \ + .size = sizeof(CPUState), \ + .vmsd = &vmstate_cpu_common, \ + .flags = VMS_STRUCT, \ + .offset = 0, \ +} + +#endif /* NEED_CPU_H */ + +#define UNASSIGNED_CPU_INDEX -1 +#define UNASSIGNED_CLUSTER_INDEX -1 + +#endif diff --git a/include/hw/core/generic-loader.h b/include/hw/core/generic-loader.h new file mode 100644 index 000000000..19d87b39c --- /dev/null +++ b/include/hw/core/generic-loader.h @@ -0,0 +1,47 @@ +/* + * Generic Loader + * + * Copyright (C) 2014 Li Guang + * Written by Li Guang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef GENERIC_LOADER_H +#define GENERIC_LOADER_H + +#include "elf.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +struct GenericLoaderState { + /* */ + DeviceState parent_obj; + + /* */ + CPUState *cpu; + + uint64_t addr; + uint64_t data; + uint8_t data_len; + uint32_t cpu_num; + + char *file; + + bool force_raw; + bool data_be; + bool set_pc; +}; + +#define TYPE_GENERIC_LOADER "loader" +OBJECT_DECLARE_SIMPLE_TYPE(GenericLoaderState, GENERIC_LOADER) + +#endif diff --git a/include/hw/core/split-irq.h b/include/hw/core/split-irq.h new file mode 100644 index 000000000..ff8852f40 --- /dev/null +++ b/include/hw/core/split-irq.h @@ -0,0 +1,55 @@ +/* + * IRQ splitter device. + * + * Copyright (c) 2018 Linaro Limited. + * Written by Peter Maydell + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This is a simple device which has one GPIO input line and multiple + * GPIO output lines. Any change on the input line is forwarded to all + * of the outputs. + * + * QEMU interface: + * + one unnamed GPIO input: the input line + * + N unnamed GPIO outputs: the output lines + * + QOM property "num-lines": sets the number of output lines + */ +#ifndef HW_SPLIT_IRQ_H +#define HW_SPLIT_IRQ_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_SPLIT_IRQ "split-irq" + +#define MAX_SPLIT_LINES 16 + + +OBJECT_DECLARE_SIMPLE_TYPE(SplitIRQ, SPLIT_IRQ) + +struct SplitIRQ { + DeviceState parent_obj; + + qemu_irq out_irq[MAX_SPLIT_LINES]; + uint16_t num_lines; +}; + +#endif diff --git a/include/hw/cpu/a15mpcore.h b/include/hw/cpu/a15mpcore.h new file mode 100644 index 000000000..75d39e545 --- /dev/null +++ b/include/hw/cpu/a15mpcore.h @@ -0,0 +1,44 @@ +/* + * Cortex-A15MPCore internal peripheral emulation. + * + * Copyright (c) 2012 Linaro Limited. + * Written by Peter Maydell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ +#ifndef HW_CPU_A15MPCORE_H +#define HW_CPU_A15MPCORE_H + +#include "hw/sysbus.h" +#include "hw/intc/arm_gic.h" +#include "qom/object.h" + +/* A15MP private memory region. */ + +#define TYPE_A15MPCORE_PRIV "a15mpcore_priv" +OBJECT_DECLARE_SIMPLE_TYPE(A15MPPrivState, A15MPCORE_PRIV) + +struct A15MPPrivState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t num_cpu; + uint32_t num_irq; + MemoryRegion container; + + GICState gic; +}; + +#endif diff --git a/include/hw/cpu/a9mpcore.h b/include/hw/cpu/a9mpcore.h new file mode 100644 index 000000000..e0396ab6a --- /dev/null +++ b/include/hw/cpu/a9mpcore.h @@ -0,0 +1,39 @@ +/* + * Cortex-A9MPCore internal peripheral emulation. + * + * Copyright (c) 2009 CodeSourcery. + * Copyright (c) 2011 Linaro Limited. + * Written by Paul Brook, Peter Maydell. + * + * This code is licensed under the GPL. + */ +#ifndef HW_CPU_A9MPCORE_H +#define HW_CPU_A9MPCORE_H + +#include "hw/sysbus.h" +#include "hw/intc/arm_gic.h" +#include "hw/misc/a9scu.h" +#include "hw/timer/arm_mptimer.h" +#include "hw/timer/a9gtimer.h" +#include "qom/object.h" + +#define TYPE_A9MPCORE_PRIV "a9mpcore_priv" +OBJECT_DECLARE_SIMPLE_TYPE(A9MPPrivState, A9MPCORE_PRIV) + +struct A9MPPrivState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t num_cpu; + MemoryRegion container; + uint32_t num_irq; + + A9SCUState scu; + GICState gic; + A9GTimerState gtimer; + ARMMPTimerState mptimer; + ARMMPTimerState wdt; +}; + +#endif diff --git a/include/hw/cpu/arm11mpcore.h b/include/hw/cpu/arm11mpcore.h new file mode 100644 index 000000000..2cac8c123 --- /dev/null +++ b/include/hw/cpu/arm11mpcore.h @@ -0,0 +1,35 @@ +/* + * ARM11MPCore internal peripheral emulation. + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ + +#ifndef HW_CPU_ARM11MPCORE_H +#define HW_CPU_ARM11MPCORE_H + +#include "hw/sysbus.h" +#include "hw/misc/arm11scu.h" +#include "hw/intc/arm_gic.h" +#include "hw/timer/arm_mptimer.h" +#include "qom/object.h" + +#define TYPE_ARM11MPCORE_PRIV "arm11mpcore_priv" +OBJECT_DECLARE_SIMPLE_TYPE(ARM11MPCorePriveState, ARM11MPCORE_PRIV) + +struct ARM11MPCorePriveState { + SysBusDevice parent_obj; + + uint32_t num_cpu; + MemoryRegion container; + uint32_t num_irq; + + ARM11SCUState scu; + GICState gic; + ARMMPTimerState mptimer; + ARMMPTimerState wdtimer; +}; + +#endif diff --git a/include/hw/cpu/cluster.h b/include/hw/cpu/cluster.h new file mode 100644 index 000000000..53fbf36af --- /dev/null +++ b/include/hw/cpu/cluster.h @@ -0,0 +1,81 @@ +/* + * QEMU CPU cluster + * + * Copyright (c) 2018 GreenSocs SAS + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ +#ifndef HW_CPU_CLUSTER_H +#define HW_CPU_CLUSTER_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +/* + * CPU Cluster type + * + * A cluster is a group of CPUs which are all identical and have the same view + * of the rest of the system. It is mainly an internal QEMU representation and + * does not necessarily match with the notion of clusters on the real hardware. + * + * If CPUs are not identical (for example, Cortex-A53 and Cortex-A57 CPUs in an + * Arm big.LITTLE system) they should be in different clusters. If the CPUs do + * not have the same view of memory (for example the main CPU and a management + * controller processor) they should be in different clusters. + * + * A cluster is created by creating an object of TYPE_CPU_CLUSTER, and then + * adding the CPUs to it as QOM child objects (e.g. using the + * object_initialize_child() or object_property_add_child() functions). + * The CPUs may be either direct children of the cluster object, or indirect + * children (e.g. children of children of the cluster object). + * + * All CPUs must be added as children before the cluster is realized. + * (Regrettably QOM provides no way to prevent adding children to a realized + * object and no way for the parent to be notified when a new child is added + * to it, so this restriction is not checked for, but the system will not + * behave correctly if it is not adhered to. The cluster will assert that + * it contains at least one CPU, which should catch most inadvertent + * violations of this constraint.) + * + * A CPU which is not put into any cluster will be considered implicitly + * to be in a cluster with all the other "loose" CPUs, so all CPUs that are + * not assigned to clusters must be identical. + */ + +#define TYPE_CPU_CLUSTER "cpu-cluster" +OBJECT_DECLARE_SIMPLE_TYPE(CPUClusterState, CPU_CLUSTER) + +/* + * This limit is imposed by TCG, which puts the cluster ID into an + * 8 bit field (and uses all-1s for the default "not in any cluster"). + */ +#define MAX_CLUSTERS 255 + +/** + * CPUClusterState: + * @cluster_id: The cluster ID. This value is for internal use only and should + * not be exposed directly to the user or to the guest. + * + * State of a CPU cluster. + */ +struct CPUClusterState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + uint32_t cluster_id; +}; + +#endif diff --git a/include/hw/cpu/core.h b/include/hw/cpu/core.h new file mode 100644 index 000000000..98ab91647 --- /dev/null +++ b/include/hw/cpu/core.h @@ -0,0 +1,33 @@ +/* + * CPU core abstract device + * + * Copyright (C) 2016 Bharata B Rao + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HW_CPU_CORE_H +#define HW_CPU_CORE_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define TYPE_CPU_CORE "cpu-core" + +OBJECT_DECLARE_SIMPLE_TYPE(CPUCore, CPU_CORE) + +struct CPUCore { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + int core_id; + int nr_threads; +}; + +/* Note: topology field names need to be kept in sync with + * 'CpuInstanceProperties' */ + +#define CPU_CORE_PROP_CORE_ID "core-id" + +#endif diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h new file mode 100644 index 000000000..9e99380e0 --- /dev/null +++ b/include/hw/cris/etraxfs.h @@ -0,0 +1,53 @@ +/* + * QEMU ETRAX System Emulator + * + * Copyright (c) 2008 Edgar E. Iglesias, Axis Communications AB. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_ETRAXFS_H +#define HW_ETRAXFS_H + +#include "net/net.h" +#include "hw/cris/etraxfs_dma.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" + +DeviceState *etraxfs_eth_init(NICInfo *nd, hwaddr base, int phyaddr, + struct etraxfs_dma_client *dma_out, + struct etraxfs_dma_client *dma_in); + +static inline DeviceState *etraxfs_ser_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new("etraxfs,serial"); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, irq); + return dev; +} + +#endif diff --git a/include/hw/cris/etraxfs_dma.h b/include/hw/cris/etraxfs_dma.h new file mode 100644 index 000000000..095d76b95 --- /dev/null +++ b/include/hw/cris/etraxfs_dma.h @@ -0,0 +1,36 @@ +#ifndef HW_ETRAXFS_DMA_H +#define HW_ETRAXFS_DMA_H + +#include "exec/hwaddr.h" + +struct dma_context_metadata { + /* data descriptor md */ + uint16_t metadata; +}; + +struct etraxfs_dma_client +{ + /* DMA controller. */ + int channel; + void *ctrl; + + /* client. */ + struct { + int (*push)(void *opaque, unsigned char *buf, + int len, bool eop); + void (*pull)(void *opaque); + void (*metadata_push)(void *opaque, + const struct dma_context_metadata *md); + void *opaque; + } client; +}; + +void *etraxfs_dmac_init(hwaddr base, int nr_channels); +void etraxfs_dmac_connect(void *opaque, int channel, qemu_irq *line, + int input); +void etraxfs_dmac_connect_client(void *opaque, int c, + struct etraxfs_dma_client *cl); +int etraxfs_dmac_input(struct etraxfs_dma_client *client, + void *buf, int len, int eop); + +#endif diff --git a/include/hw/display/bcm2835_fb.h b/include/hw/display/bcm2835_fb.h new file mode 100644 index 000000000..38671afff --- /dev/null +++ b/include/hw/display/bcm2835_fb.h @@ -0,0 +1,89 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous + * + * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft + * Written by Andrew Baumann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_FB_H +#define BCM2835_FB_H + +#include "hw/sysbus.h" +#include "ui/console.h" +#include "qom/object.h" + +#define TYPE_BCM2835_FB "bcm2835-fb" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835FBState, BCM2835_FB) + +/* + * Configuration information about the fb which the guest can program + * via the mailbox property interface. + */ +typedef struct { + uint32_t xres, yres; + uint32_t xres_virtual, yres_virtual; + uint32_t xoffset, yoffset; + uint32_t bpp; + uint32_t base; + uint32_t pixo; + uint32_t alpha; +} BCM2835FBConfig; + +struct BCM2835FBState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + uint32_t vcram_base, vcram_size; + MemoryRegion *dma_mr; + AddressSpace dma_as; + MemoryRegion iomem; + MemoryRegionSection fbsection; + QemuConsole *con; + qemu_irq mbox_irq; + + bool lock, invalidate, pending; + + BCM2835FBConfig config; + BCM2835FBConfig initial_config; +}; + +void bcm2835_fb_reconfigure(BCM2835FBState *s, BCM2835FBConfig *newconfig); + +/** + * bcm2835_fb_get_pitch: return number of bytes per line of the framebuffer + * @config: configuration info for the framebuffer + * + * Return the number of bytes per line of the framebuffer, ie the number + * that must be added to a pixel address to get the address of the pixel + * directly below it on screen. + */ +static inline uint32_t bcm2835_fb_get_pitch(BCM2835FBConfig *config) +{ + uint32_t xres = MAX(config->xres, config->xres_virtual); + return xres * (config->bpp >> 3); +} + +/** + * bcm2835_fb_get_size: return total size of framebuffer in bytes + * @config: configuration info for the framebuffer + */ +static inline uint32_t bcm2835_fb_get_size(BCM2835FBConfig *config) +{ + uint32_t yres = MAX(config->yres, config->yres_virtual); + return yres * bcm2835_fb_get_pitch(config); +} + +/** + * bcm2835_fb_validate_config: check provided config + * + * Validates the configuration information provided by the guest and + * adjusts it if necessary. + */ +void bcm2835_fb_validate_config(BCM2835FBConfig *config); + +#endif diff --git a/include/hw/display/blizzard.h b/include/hw/display/blizzard.h new file mode 100644 index 000000000..5b3301883 --- /dev/null +++ b/include/hw/display/blizzard.h @@ -0,0 +1,21 @@ +/* + * Epson S1D13744/S1D13745 (Blizzard/Hailstorm/Tornado) LCD/TV controller. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_DISPLAY_BLIZZARD_H +#define HW_DISPLAY_BLIZZARD_H + + +void *s1d13745_init(qemu_irq gpio_int); +void s1d13745_write(void *opaque, int dc, uint16_t value); +void s1d13745_write_block(void *opaque, int dc, + void *buf, size_t len, int pitch); +uint16_t s1d13745_read(void *opaque, int dc); + +#endif diff --git a/include/hw/display/bochs-vbe.h b/include/hw/display/bochs-vbe.h new file mode 100644 index 000000000..bc2f046ee --- /dev/null +++ b/include/hw/display/bochs-vbe.h @@ -0,0 +1,69 @@ +#ifndef HW_DISPLAY_BOCHS_VBE_H +#define HW_DISPLAY_BOCHS_VBE_H + +/* + * bochs vesa bios extension interface + */ + +#define VBE_DISPI_MAX_XRES 16000 +#define VBE_DISPI_MAX_YRES 12000 +#define VBE_DISPI_MAX_BPP 32 + +#define VBE_DISPI_INDEX_ID 0x0 +#define VBE_DISPI_INDEX_XRES 0x1 +#define VBE_DISPI_INDEX_YRES 0x2 +#define VBE_DISPI_INDEX_BPP 0x3 +#define VBE_DISPI_INDEX_ENABLE 0x4 +#define VBE_DISPI_INDEX_BANK 0x5 +#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 +#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 +#define VBE_DISPI_INDEX_X_OFFSET 0x8 +#define VBE_DISPI_INDEX_Y_OFFSET 0x9 +#define VBE_DISPI_INDEX_NB 0xa /* size of vbe_regs[] */ +#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa /* read-only, not in vbe_regs */ + +/* VBE_DISPI_INDEX_ID */ +#define VBE_DISPI_ID0 0xB0C0 +#define VBE_DISPI_ID1 0xB0C1 +#define VBE_DISPI_ID2 0xB0C2 +#define VBE_DISPI_ID3 0xB0C3 +#define VBE_DISPI_ID4 0xB0C4 +#define VBE_DISPI_ID5 0xB0C5 + +/* VBE_DISPI_INDEX_ENABLE */ +#define VBE_DISPI_DISABLED 0x00 +#define VBE_DISPI_ENABLED 0x01 +#define VBE_DISPI_GETCAPS 0x02 +#define VBE_DISPI_8BIT_DAC 0x20 +#define VBE_DISPI_LFB_ENABLED 0x40 +#define VBE_DISPI_NOCLEARMEM 0x80 + +/* only used by isa-vga, pci vga devices use a memory bar */ +#define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000 + + +/* + * qemu extension: mmio bar (region 2) + */ + +#define PCI_VGA_MMIO_SIZE 0x1000 + +/* vga register region */ +#define PCI_VGA_IOPORT_OFFSET 0x400 +#define PCI_VGA_IOPORT_SIZE (0x3e0 - 0x3c0) + +/* bochs vbe register region */ +#define PCI_VGA_BOCHS_OFFSET 0x500 +#define PCI_VGA_BOCHS_SIZE (0x0b * 2) + +/* qemu extension register region */ +#define PCI_VGA_QEXT_OFFSET 0x600 +#define PCI_VGA_QEXT_SIZE (2 * 4) + +/* qemu extension registers */ +#define PCI_VGA_QEXT_REG_SIZE (0 * 4) +#define PCI_VGA_QEXT_REG_BYTEORDER (1 * 4) +#define PCI_VGA_QEXT_LITTLE_ENDIAN 0x1e1e1e1e +#define PCI_VGA_QEXT_BIG_ENDIAN 0xbebebebe + +#endif /* HW_DISPLAY_BOCHS_VBE_H */ diff --git a/include/hw/display/dpcd.h b/include/hw/display/dpcd.h new file mode 100644 index 000000000..a4e37abf6 --- /dev/null +++ b/include/hw/display/dpcd.h @@ -0,0 +1,105 @@ +/* + * dpcd.h + * + * Copyright (C)2015 : GreenSocs Ltd + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option)any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef DPCD_H +#define DPCD_H +#include "qom/object.h" + + +#define TYPE_DPCD "dpcd" +OBJECT_DECLARE_SIMPLE_TYPE(DPCDState, DPCD) + +/* DCPD Revision. */ +#define DPCD_REVISION 0x00 +#define DPCD_REV_1_0 0x10 +#define DPCD_REV_1_1 0x11 + +/* DCPD Max Link Rate. */ +#define DPCD_MAX_LINK_RATE 0x01 +#define DPCD_1_62GBPS 0x06 +#define DPCD_2_7GBPS 0x0A +#define DPCD_5_4GBPS 0x14 + +#define DPCD_MAX_LANE_COUNT 0x02 +#define DPCD_ONE_LANE 0x01 +#define DPCD_TWO_LANES 0x02 +#define DPCD_FOUR_LANES 0x04 + +/* DCPD Max down spread. */ +#define DPCD_UP_TO_0_5 0x01 +#define DPCD_NO_AUX_HANDSHAKE_LINK_TRAINING 0x40 + +/* DCPD Downstream port type. */ +#define DPCD_DISPLAY_PORT 0x00 +#define DPCD_ANALOG 0x02 +#define DPCD_DVI_HDMI 0x04 +#define DPCD_OTHER 0x06 + +/* DPCD Format conversion. */ +#define DPCD_FORMAT_CONVERSION 0x08 + +/* Main link channel coding. */ +#define DPCD_ANSI_8B_10B 0x01 + +/* Down stream port count. */ +#define DPCD_OUI_SUPPORTED 0x80 + +/* Receiver port capability. */ +#define DPCD_RECEIVE_PORT0_CAP_0 0x08 +#define DPCD_RECEIVE_PORT0_CAP_1 0x09 +#define DPCD_EDID_PRESENT 0x02 +#define DPCD_ASSOCIATED_TO_PRECEDING_PORT 0x04 + +/* Down stream port capability. */ +#define DPCD_CAP_DISPLAY_PORT 0x000 +#define DPCD_CAP_ANALOG_VGA 0x001 +#define DPCD_CAP_DVI 0x002 +#define DPCD_CAP_HDMI 0x003 +#define DPCD_CAP_OTHER 0x100 + +#define DPCD_LANE0_1_STATUS 0x202 +#define DPCD_LANE0_CR_DONE (1 << 0) +#define DPCD_LANE0_CHANNEL_EQ_DONE (1 << 1) +#define DPCD_LANE0_SYMBOL_LOCKED (1 << 2) +#define DPCD_LANE1_CR_DONE (1 << 4) +#define DPCD_LANE1_CHANNEL_EQ_DONE (1 << 5) +#define DPCD_LANE1_SYMBOL_LOCKED (1 << 6) + +#define DPCD_LANE2_3_STATUS 0x203 +#define DPCD_LANE2_CR_DONE (1 << 0) +#define DPCD_LANE2_CHANNEL_EQ_DONE (1 << 1) +#define DPCD_LANE2_SYMBOL_LOCKED (1 << 2) +#define DPCD_LANE3_CR_DONE (1 << 4) +#define DPCD_LANE3_CHANNEL_EQ_DONE (1 << 5) +#define DPCD_LANE3_SYMBOL_LOCKED (1 << 6) + +#define DPCD_LANE_ALIGN_STATUS_UPDATED 0x204 +#define DPCD_INTERLANE_ALIGN_DONE 0x01 +#define DPCD_DOWNSTREAM_PORT_STATUS_CHANGED 0x40 +#define DPCD_LINK_STATUS_UPDATED 0x80 + +#define DPCD_SINK_STATUS 0x205 +#define DPCD_RECEIVE_PORT_0_STATUS 0x01 + +#endif /* DPCD_H */ diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h new file mode 100644 index 000000000..1f8fc9b37 --- /dev/null +++ b/include/hw/display/edid.h @@ -0,0 +1,30 @@ +#ifndef EDID_H +#define EDID_H + +typedef struct qemu_edid_info { + const char *vendor; /* http://www.uefi.org/pnp_id_list */ + const char *name; + const char *serial; + uint16_t width_mm; + uint16_t height_mm; + uint32_t prefx; + uint32_t prefy; + uint32_t maxx; + uint32_t maxy; +} qemu_edid_info; + +void qemu_edid_generate(uint8_t *edid, size_t size, + qemu_edid_info *info); +size_t qemu_edid_size(uint8_t *edid); +void qemu_edid_region_io(MemoryRegion *region, Object *owner, + uint8_t *edid, size_t size); + +uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res); + +#define DEFINE_EDID_PROPERTIES(_state, _edid_info) \ + DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0), \ + DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0), \ + DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0), \ + DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0) + +#endif /* EDID_H */ diff --git a/include/hw/display/i2c-ddc.h b/include/hw/display/i2c-ddc.h new file mode 100644 index 000000000..94b588058 --- /dev/null +++ b/include/hw/display/i2c-ddc.h @@ -0,0 +1,41 @@ +/* A simple I2C slave for returning monitor EDID data via DDC. + * + * Copyright (c) 2011 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef I2C_DDC_H +#define I2C_DDC_H + +#include "hw/display/edid.h" +#include "hw/i2c/i2c.h" +#include "qom/object.h" + +/* A simple I2C slave which just returns the contents of its EDID blob. */ +struct I2CDDCState { + /*< private >*/ + I2CSlave i2c; + /*< public >*/ + bool firstbyte; + uint8_t reg; + qemu_edid_info edid_info; + uint8_t edid_blob[128]; +}; + + +#define TYPE_I2CDDC "i2c-ddc" +OBJECT_DECLARE_SIMPLE_TYPE(I2CDDCState, I2CDDC) + +#endif /* I2C_DDC_H */ diff --git a/include/hw/display/macfb.h b/include/hw/display/macfb.h new file mode 100644 index 000000000..c133fa271 --- /dev/null +++ b/include/hw/display/macfb.h @@ -0,0 +1,59 @@ +/* + * QEMU Motorola 680x0 Macintosh Video Card Emulation + * Copyright (c) 2012-2018 Laurent Vivier + * + * some parts from QEMU G364 framebuffer Emulator. + * Copyright (c) 2007-2011 Herve Poussineau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MACFB_H +#define MACFB_H + +#include "qemu/osdep.h" +#include "exec/memory.h" +#include "ui/console.h" +#include "qom/object.h" + +typedef struct MacfbState { + MemoryRegion mem_vram; + MemoryRegion mem_ctrl; + QemuConsole *con; + + uint8_t *vram; + uint32_t vram_bit_mask; + uint32_t palette_current; + uint8_t color_palette[256 * 3]; + uint32_t width, height; /* in pixels */ + uint8_t depth; +} MacfbState; + +#define TYPE_MACFB "sysbus-macfb" +OBJECT_DECLARE_SIMPLE_TYPE(MacfbSysBusState, MACFB) + +struct MacfbSysBusState { + SysBusDevice busdev; + + MacfbState macfb; +}; + +#define TYPE_NUBUS_MACFB "nubus-macfb" +OBJECT_DECLARE_TYPE(MacfbNubusState, MacfbNubusDeviceClass, NUBUS_MACFB) + +struct MacfbNubusDeviceClass { + DeviceClass parent_class; + + DeviceRealize parent_realize; +}; + + +struct MacfbNubusState { + NubusDevice busdev; + + MacfbState macfb; +}; + +#endif diff --git a/include/hw/display/milkymist_tmu2.h b/include/hw/display/milkymist_tmu2.h new file mode 100644 index 000000000..fdce9535a --- /dev/null +++ b/include/hw/display/milkymist_tmu2.h @@ -0,0 +1,42 @@ +/* + * QEMU model of the Milkymist texture mapping unit. + * + * Copyright (c) 2010 Michael Walle + * Copyright (c) 2010 Sebastien Bourdeauducq + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + * + * Specification available at: + * http://milkymist.walle.cc/socdoc/tmu2.pdf + * + */ + +#ifndef HW_DISPLAY_MILKYMIST_TMU2_H +#define HW_DISPLAY_MILKYMIST_TMU2_H + +#include "exec/hwaddr.h" +#include "hw/qdev-core.h" + +#if defined(CONFIG_X11) && defined(CONFIG_OPENGL) +DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq); +#else +static inline DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq) +{ + return NULL; +} +#endif + +#endif /* HW_DISPLAY_MILKYMIST_TMU2_H */ diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h new file mode 100644 index 000000000..b33a2c467 --- /dev/null +++ b/include/hw/display/ramfb.h @@ -0,0 +1,12 @@ +#ifndef RAMFB_H +#define RAMFB_H + +/* ramfb.c */ +typedef struct RAMFBState RAMFBState; +void ramfb_display_update(QemuConsole *con, RAMFBState *s); +RAMFBState *ramfb_setup(Error **errp); + +/* ramfb-standalone.c */ +#define TYPE_RAMFB_DEVICE "ramfb" + +#endif /* RAMFB_H */ diff --git a/include/hw/display/tc6393xb.h b/include/hw/display/tc6393xb.h new file mode 100644 index 000000000..f9263bf98 --- /dev/null +++ b/include/hw/display/tc6393xb.h @@ -0,0 +1,21 @@ +/* + * Toshiba TC6393XB I/O Controller. + * Found in Sharp Zaurus SL-6000 (tosa) or some + * Toshiba e-Series PDAs. + * + * Copyright (c) 2007 Hervé Poussineau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_DISPLAY_TC6393XB_H +#define HW_DISPLAY_TC6393XB_H + +typedef struct TC6393xbState TC6393xbState; + +TC6393xbState *tc6393xb_init(struct MemoryRegion *sysmem, + uint32_t base, qemu_irq irq); +qemu_irq tc6393xb_l3v_get(TC6393xbState *s); + +#endif diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h new file mode 100644 index 000000000..ca0003dbf --- /dev/null +++ b/include/hw/display/vga.h @@ -0,0 +1,25 @@ +/* + * QEMU VGA Emulator. + * + * Copyright (c) 2003 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_HW_DISPLAY_VGA_H +#define QEMU_HW_DISPLAY_VGA_H + +#include "exec/hwaddr.h" + +enum vga_retrace_method { + VGA_RETRACE_DUMB, + VGA_RETRACE_PRECISE +}; + +extern enum vga_retrace_method vga_retrace_method; + +int isa_vga_mm_init(hwaddr vram_base, + hwaddr ctrl_base, int it_shift, + MemoryRegion *address_space); + +#endif diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h new file mode 100644 index 000000000..8ab4733bb --- /dev/null +++ b/include/hw/display/xlnx_dp.h @@ -0,0 +1,110 @@ +/* + * xlnx_dp.h + * + * Copyright (C) 2015 : GreenSocs Ltd + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef XLNX_DP_H +#define XLNX_DP_H + +#include "hw/sysbus.h" +#include "ui/console.h" +#include "hw/misc/auxbus.h" +#include "hw/i2c/i2c.h" +#include "hw/display/dpcd.h" +#include "hw/display/i2c-ddc.h" +#include "qemu/fifo8.h" +#include "qemu/units.h" +#include "hw/dma/xlnx_dpdma.h" +#include "audio/audio.h" +#include "qom/object.h" + +#define AUD_CHBUF_MAX_DEPTH (32 * KiB) +#define MAX_QEMU_BUFFER_SIZE (4 * KiB) + +#define DP_CORE_REG_ARRAY_SIZE (0x3AF >> 2) +#define DP_AVBUF_REG_ARRAY_SIZE (0x238 >> 2) +#define DP_VBLEND_REG_ARRAY_SIZE (0x1DF >> 2) +#define DP_AUDIO_REG_ARRAY_SIZE (0x50 >> 2) + +struct PixmanPlane { + pixman_format_code_t format; + DisplaySurface *surface; +}; + +struct XlnxDPState { + /*< private >*/ + SysBusDevice parent_obj; + + /* < public >*/ + MemoryRegion container; + + uint32_t core_registers[DP_CORE_REG_ARRAY_SIZE]; + MemoryRegion core_iomem; + + uint32_t avbufm_registers[DP_AVBUF_REG_ARRAY_SIZE]; + MemoryRegion avbufm_iomem; + + uint32_t vblend_registers[DP_VBLEND_REG_ARRAY_SIZE]; + MemoryRegion vblend_iomem; + + uint32_t audio_registers[DP_AUDIO_REG_ARRAY_SIZE]; + MemoryRegion audio_iomem; + + QemuConsole *console; + + /* + * This is the planes used to display in console. When the blending is + * enabled bout_plane is displayed in console else it's g_plane. + */ + struct PixmanPlane g_plane; + struct PixmanPlane v_plane; + struct PixmanPlane bout_plane; + + QEMUSoundCard aud_card; + SWVoiceOut *amixer_output_stream; + int16_t audio_buffer_0[AUD_CHBUF_MAX_DEPTH]; + int16_t audio_buffer_1[AUD_CHBUF_MAX_DEPTH]; + size_t audio_data_available[2]; + int64_t temp_buffer[AUD_CHBUF_MAX_DEPTH]; + int16_t out_buffer[AUD_CHBUF_MAX_DEPTH]; + size_t byte_left; /* byte available in out_buffer. */ + size_t data_ptr; /* next byte to be sent to QEMU. */ + + /* Associated DPDMA controller. */ + XlnxDPDMAState *dpdma; + + qemu_irq irq; + + AUXBus *aux_bus; + Fifo8 rx_fifo; + Fifo8 tx_fifo; + + /* + * XXX: This should be in an other module. + */ + DPCDState *dpcd; + I2CDDCState *edid; +}; + +#define TYPE_XLNX_DP "xlnx.v-dp" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxDPState, XLNX_DP) + +#endif diff --git a/include/hw/dma/bcm2835_dma.h b/include/hw/dma/bcm2835_dma.h new file mode 100644 index 000000000..1d26b1d8d --- /dev/null +++ b/include/hw/dma/bcm2835_dma.h @@ -0,0 +1,47 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_DMA_H +#define BCM2835_DMA_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +typedef struct { + uint32_t cs; + uint32_t conblk_ad; + uint32_t ti; + uint32_t source_ad; + uint32_t dest_ad; + uint32_t txfr_len; + uint32_t stride; + uint32_t nextconbk; + uint32_t debug; + + qemu_irq irq; +} BCM2835DMAChan; + +#define TYPE_BCM2835_DMA "bcm2835-dma" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835DMAState, BCM2835_DMA) + +#define BCM2835_DMA_NCHANS 16 + +struct BCM2835DMAState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + MemoryRegion iomem0, iomem15; + MemoryRegion *dma_mr; + AddressSpace dma_as; + + BCM2835DMAChan chan[BCM2835_DMA_NCHANS]; + uint32_t int_status; + uint32_t enable; +}; + +#endif diff --git a/include/hw/dma/i8257.h b/include/hw/dma/i8257.h new file mode 100644 index 000000000..f652345d6 --- /dev/null +++ b/include/hw/dma/i8257.h @@ -0,0 +1,50 @@ +#ifndef HW_I8257_H +#define HW_I8257_H + +#include "hw/isa/isa.h" +#include "exec/ioport.h" +#include "qom/object.h" + +#define TYPE_I8257 "i8257" +OBJECT_DECLARE_SIMPLE_TYPE(I8257State, I8257) + +typedef struct I8257Regs { + int now[2]; + uint16_t base[2]; + uint8_t mode; + uint8_t page; + uint8_t pageh; + uint8_t dack; + uint8_t eop; + IsaDmaTransferHandler transfer_handler; + void *opaque; +} I8257Regs; + +struct I8257State { + /* */ + ISADevice parent_obj; + + /* */ + int32_t base; + int32_t page_base; + int32_t pageh_base; + int32_t dshift; + + uint8_t status; + uint8_t command; + uint8_t mask; + uint8_t flip_flop; + I8257Regs regs[4]; + MemoryRegion channel_io; + MemoryRegion cont_io; + + QEMUBH *dma_bh; + bool dma_bh_scheduled; + int running; + PortioList portio_page; + PortioList portio_pageh; +}; + +void i8257_dma_init(ISABus *bus, bool high_page_enable); + +#endif diff --git a/include/hw/dma/pl080.h b/include/hw/dma/pl080.h new file mode 100644 index 000000000..1883f0427 --- /dev/null +++ b/include/hw/dma/pl080.h @@ -0,0 +1,72 @@ +/* + * ARM PrimeCell PL080/PL081 DMA controller + * + * Copyright (c) 2006 CodeSourcery. + * Copyright (c) 2018 Linaro Limited + * Written by Paul Brook, Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the Arm PrimeCell PL080/PL081 DMA controller: + * The PL080 TRM is: + * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0196g/DDI0196.pdf + * and the PL081 TRM is: + * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0218e/DDI0218.pdf + * + * QEMU interface: + * + sysbus IRQ 0: DMACINTR combined interrupt line + * + sysbus IRQ 1: DMACINTERR error interrupt request + * + sysbus IRQ 2: DMACINTTC count interrupt request + * + sysbus MMIO region 0: MemoryRegion for the device's registers + * + QOM property "downstream": MemoryRegion defining where DMA + * bus master transactions are made + */ + +#ifndef HW_DMA_PL080_H +#define HW_DMA_PL080_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define PL080_MAX_CHANNELS 8 + +typedef struct { + uint32_t src; + uint32_t dest; + uint32_t lli; + uint32_t ctrl; + uint32_t conf; +} pl080_channel; + +#define TYPE_PL080 "pl080" +#define TYPE_PL081 "pl081" +OBJECT_DECLARE_SIMPLE_TYPE(PL080State, PL080) + +struct PL080State { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint8_t tc_int; + uint8_t tc_mask; + uint8_t err_int; + uint8_t err_mask; + uint32_t conf; + uint32_t sync; + uint32_t req_single; + uint32_t req_burst; + pl080_channel chan[PL080_MAX_CHANNELS]; + int nchannels; + /* Flag to avoid recursive DMA invocations. */ + int running; + qemu_irq irq; + qemu_irq interr; + qemu_irq inttc; + + MemoryRegion *downstream; + AddressSpace downstream_as; +}; + +#endif diff --git a/include/hw/dma/sifive_pdma.h b/include/hw/dma/sifive_pdma.h new file mode 100644 index 000000000..e319bbd6c --- /dev/null +++ b/include/hw/dma/sifive_pdma.h @@ -0,0 +1,57 @@ +/* + * SiFive Platform DMA emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef SIFIVE_PDMA_H +#define SIFIVE_PDMA_H + +struct sifive_pdma_chan { + uint32_t control; + uint32_t next_config; + uint64_t next_bytes; + uint64_t next_dst; + uint64_t next_src; + uint32_t exec_config; + uint64_t exec_bytes; + uint64_t exec_dst; + uint64_t exec_src; + int state; +}; + +#define SIFIVE_PDMA_CHANS 4 +#define SIFIVE_PDMA_IRQS (SIFIVE_PDMA_CHANS * 2) +#define SIFIVE_PDMA_REG_SIZE 0x100000 +#define SIFIVE_PDMA_CHAN_NO(reg) ((reg & (SIFIVE_PDMA_REG_SIZE - 1)) >> 12) + +typedef struct SiFivePDMAState { + SysBusDevice parent; + MemoryRegion iomem; + qemu_irq irq[SIFIVE_PDMA_IRQS]; + + struct sifive_pdma_chan chan[SIFIVE_PDMA_CHANS]; +} SiFivePDMAState; + +#define TYPE_SIFIVE_PDMA "sifive.pdma" + +#define SIFIVE_PDMA(obj) \ + OBJECT_CHECK(SiFivePDMAState, (obj), TYPE_SIFIVE_PDMA) + +#endif /* SIFIVE_PDMA_H */ diff --git a/include/hw/dma/xlnx-zdma.h b/include/hw/dma/xlnx-zdma.h new file mode 100644 index 000000000..6602e7ffa --- /dev/null +++ b/include/hw/dma/xlnx-zdma.h @@ -0,0 +1,84 @@ +/* + * QEMU model of the ZynqMP generic DMA + * + * Copyright (c) 2014 Xilinx Inc. + * Copyright (c) 2018 FEIMTECH AB + * + * Written by Edgar E. Iglesias , + * Francisco Iglesias + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XLNX_ZDMA_H +#define XLNX_ZDMA_H + +#include "hw/sysbus.h" +#include "hw/register.h" +#include "sysemu/dma.h" +#include "qom/object.h" + +#define ZDMA_R_MAX (0x204 / 4) + +typedef enum { + DISABLED = 0, + ENABLED = 1, + PAUSED = 2, +} XlnxZDMAState; + +typedef union { + struct { + uint64_t addr; + uint32_t size; + uint32_t attr; + }; + uint32_t words[4]; +} XlnxZDMADescr; + +struct XlnxZDMA { + SysBusDevice parent_obj; + MemoryRegion iomem; + MemTxAttrs attr; + MemoryRegion *dma_mr; + AddressSpace *dma_as; + qemu_irq irq_zdma_ch_imr; + + struct { + uint32_t bus_width; + } cfg; + + XlnxZDMAState state; + bool error; + + XlnxZDMADescr dsc_src; + XlnxZDMADescr dsc_dst; + + uint32_t regs[ZDMA_R_MAX]; + RegisterInfo regs_info[ZDMA_R_MAX]; + + /* We don't model the common bufs. Must be at least 16 bytes + to model write only mode. */ + uint8_t buf[2048]; +}; + +#define TYPE_XLNX_ZDMA "xlnx.zdma" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZDMA, XLNX_ZDMA) + +#endif /* XLNX_ZDMA_H */ diff --git a/include/hw/dma/xlnx-zynq-devcfg.h b/include/hw/dma/xlnx-zynq-devcfg.h new file mode 100644 index 000000000..e4cf085d7 --- /dev/null +++ b/include/hw/dma/xlnx-zynq-devcfg.h @@ -0,0 +1,62 @@ +/* + * QEMU model of the Xilinx Devcfg Interface + * + * (C) 2011 PetaLogix Pty Ltd + * (C) 2014 Xilinx Inc. + * Written by Peter Crosthwaite + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XLNX_ZYNQ_DEVCFG_H +#define XLNX_ZYNQ_DEVCFG_H + +#include "hw/register.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_XLNX_ZYNQ_DEVCFG "xlnx.ps7-dev-cfg" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqDevcfg, XLNX_ZYNQ_DEVCFG) + +#define XLNX_ZYNQ_DEVCFG_R_MAX (0x100 / 4) + +#define XLNX_ZYNQ_DEVCFG_DMA_CMD_FIFO_LEN 10 + +typedef struct XlnxZynqDevcfgDMACmd { + uint32_t src_addr; + uint32_t dest_addr; + uint32_t src_len; + uint32_t dest_len; +} XlnxZynqDevcfgDMACmd; + +struct XlnxZynqDevcfg { + SysBusDevice parent_obj; + + MemoryRegion iomem; + qemu_irq irq; + + XlnxZynqDevcfgDMACmd dma_cmd_fifo[XLNX_ZYNQ_DEVCFG_DMA_CMD_FIFO_LEN]; + uint8_t dma_cmd_fifo_num; + + uint32_t regs[XLNX_ZYNQ_DEVCFG_R_MAX]; + RegisterInfo regs_info[XLNX_ZYNQ_DEVCFG_R_MAX]; +}; + +#endif diff --git a/include/hw/dma/xlnx_dpdma.h b/include/hw/dma/xlnx_dpdma.h new file mode 100644 index 000000000..40537a848 --- /dev/null +++ b/include/hw/dma/xlnx_dpdma.h @@ -0,0 +1,86 @@ +/* + * xlnx_dpdma.h + * + * Copyright (C) 2015 : GreenSocs Ltd + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef XLNX_DPDMA_H +#define XLNX_DPDMA_H + +#include "hw/sysbus.h" +#include "ui/console.h" +#include "sysemu/dma.h" +#include "qom/object.h" + +#define XLNX_DPDMA_REG_ARRAY_SIZE (0x1000 >> 2) + +struct XlnxDPDMAState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + MemoryRegion iomem; + uint32_t registers[XLNX_DPDMA_REG_ARRAY_SIZE]; + uint8_t *data[6]; + bool operation_finished[6]; + qemu_irq irq; +}; + + +#define TYPE_XLNX_DPDMA "xlnx.dpdma" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxDPDMAState, XLNX_DPDMA) + +/* + * xlnx_dpdma_start_operation: Start the operation on the specified channel. The + * DPDMA gets the current descriptor and retrieves + * data to the buffer specified by + * dpdma_set_host_data_location(). + * + * Returns The number of bytes transferred by the DPDMA + * or 0 if an error occurred. + * + * @s The DPDMA state. + * @channel The channel to start. + */ +size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel, + bool one_desc); + +/* + * xlnx_dpdma_set_host_data_location: Set the location in the host memory where + * to store the data out from the dma + * channel. + * + * @s The DPDMA state. + * @channel The channel associated to the pointer. + * @p The buffer where to store the data. + */ +/* XXX: add a maximum size arg and send an interrupt in case of overflow. */ +void xlnx_dpdma_set_host_data_location(XlnxDPDMAState *s, uint8_t channel, + void *p); + +/* + * xlnx_dpdma_trigger_vsync_irq: Trigger a VSYNC IRQ when the display is + * updated. + * + * @s The DPDMA state. + */ +void xlnx_dpdma_trigger_vsync_irq(XlnxDPDMAState *s); + +#endif /* XLNX_DPDMA_H */ diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h new file mode 100644 index 000000000..6fdff3dce --- /dev/null +++ b/include/hw/elf_ops.h @@ -0,0 +1,619 @@ +static void glue(bswap_ehdr, SZ)(struct elfhdr *ehdr) +{ + bswap16s(&ehdr->e_type); /* Object file type */ + bswap16s(&ehdr->e_machine); /* Architecture */ + bswap32s(&ehdr->e_version); /* Object file version */ + bswapSZs(&ehdr->e_entry); /* Entry point virtual address */ + bswapSZs(&ehdr->e_phoff); /* Program header table file offset */ + bswapSZs(&ehdr->e_shoff); /* Section header table file offset */ + bswap32s(&ehdr->e_flags); /* Processor-specific flags */ + bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */ + bswap16s(&ehdr->e_phentsize); /* Program header table entry size */ + bswap16s(&ehdr->e_phnum); /* Program header table entry count */ + bswap16s(&ehdr->e_shentsize); /* Section header table entry size */ + bswap16s(&ehdr->e_shnum); /* Section header table entry count */ + bswap16s(&ehdr->e_shstrndx); /* Section header string table index */ +} + +static void glue(bswap_phdr, SZ)(struct elf_phdr *phdr) +{ + bswap32s(&phdr->p_type); /* Segment type */ + bswapSZs(&phdr->p_offset); /* Segment file offset */ + bswapSZs(&phdr->p_vaddr); /* Segment virtual address */ + bswapSZs(&phdr->p_paddr); /* Segment physical address */ + bswapSZs(&phdr->p_filesz); /* Segment size in file */ + bswapSZs(&phdr->p_memsz); /* Segment size in memory */ + bswap32s(&phdr->p_flags); /* Segment flags */ + bswapSZs(&phdr->p_align); /* Segment alignment */ +} + +static void glue(bswap_shdr, SZ)(struct elf_shdr *shdr) +{ + bswap32s(&shdr->sh_name); + bswap32s(&shdr->sh_type); + bswapSZs(&shdr->sh_flags); + bswapSZs(&shdr->sh_addr); + bswapSZs(&shdr->sh_offset); + bswapSZs(&shdr->sh_size); + bswap32s(&shdr->sh_link); + bswap32s(&shdr->sh_info); + bswapSZs(&shdr->sh_addralign); + bswapSZs(&shdr->sh_entsize); +} + +static void glue(bswap_sym, SZ)(struct elf_sym *sym) +{ + bswap32s(&sym->st_name); + bswapSZs(&sym->st_value); + bswapSZs(&sym->st_size); + bswap16s(&sym->st_shndx); +} + +static void glue(bswap_rela, SZ)(struct elf_rela *rela) +{ + bswapSZs(&rela->r_offset); + bswapSZs(&rela->r_info); + bswapSZs((elf_word *)&rela->r_addend); +} + +static struct elf_shdr *glue(find_section, SZ)(struct elf_shdr *shdr_table, + int n, int type) +{ + int i; + for(i=0;ist_value) { + result = -1; + } else if (addr >= sym->st_value + sym->st_size) { + result = 1; + } + return result; +} + +static const char *glue(lookup_symbol, SZ)(struct syminfo *s, + hwaddr orig_addr) +{ + struct elf_sym *syms = glue(s->disas_symtab.elf, SZ); + struct elf_sym *sym; + + sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), + glue(symfind, SZ)); + if (sym != NULL) { + return s->disas_strtab + sym->st_name; + } + + return ""; +} + +static int glue(symcmp, SZ)(const void *s0, const void *s1) +{ + struct elf_sym *sym0 = (struct elf_sym *)s0; + struct elf_sym *sym1 = (struct elf_sym *)s1; + return (sym0->st_value < sym1->st_value) + ? -1 + : ((sym0->st_value > sym1->st_value) ? 1 : 0); +} + +static void glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab, + int clear_lsb, symbol_fn_t sym_cb) +{ + struct elf_shdr *symtab, *strtab; + g_autofree struct elf_shdr *shdr_table = NULL; + g_autofree struct elf_sym *syms = NULL; + g_autofree char *str = NULL; + struct syminfo *s; + int nsyms, i; + + shdr_table = load_at(fd, ehdr->e_shoff, + sizeof(struct elf_shdr) * ehdr->e_shnum); + if (!shdr_table) { + return ; + } + + if (must_swab) { + for (i = 0; i < ehdr->e_shnum; i++) { + glue(bswap_shdr, SZ)(shdr_table + i); + } + } + + symtab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_SYMTAB); + if (!symtab) { + return; + } + syms = load_at(fd, symtab->sh_offset, symtab->sh_size); + if (!syms) { + return; + } + + nsyms = symtab->sh_size / sizeof(struct elf_sym); + + /* String table */ + if (symtab->sh_link >= ehdr->e_shnum) { + return; + } + strtab = &shdr_table[symtab->sh_link]; + + str = load_at(fd, strtab->sh_offset, strtab->sh_size); + if (!str) { + return; + } + + i = 0; + while (i < nsyms) { + if (must_swab) { + glue(bswap_sym, SZ)(&syms[i]); + } + if (sym_cb) { + sym_cb(str + syms[i].st_name, syms[i].st_info, + syms[i].st_value, syms[i].st_size); + } + /* We are only interested in function symbols. + Throw everything else away. */ + if (syms[i].st_shndx == SHN_UNDEF || + syms[i].st_shndx >= SHN_LORESERVE || + ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) { + nsyms--; + if (i < nsyms) { + syms[i] = syms[nsyms]; + } + continue; + } + if (clear_lsb) { + /* The bottom address bit marks a Thumb or MIPS16 symbol. */ + syms[i].st_value &= ~(glue(glue(Elf, SZ), _Addr))1; + } + i++; + } + + /* check we have symbols left */ + if (nsyms == 0) { + return; + } + + syms = g_realloc(syms, nsyms * sizeof(*syms)); + qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ)); + for (i = 0; i < nsyms - 1; i++) { + if (syms[i].st_size == 0) { + syms[i].st_size = syms[i + 1].st_value - syms[i].st_value; + } + } + + /* Commit */ + s = g_malloc0(sizeof(*s)); + s->lookup_symbol = glue(lookup_symbol, SZ); + glue(s->disas_symtab.elf, SZ) = g_steal_pointer(&syms); + s->disas_num_syms = nsyms; + s->disas_strtab = g_steal_pointer(&str); + s->next = syminfos; + syminfos = s; +} + +static int glue(elf_reloc, SZ)(struct elfhdr *ehdr, int fd, int must_swab, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint8_t *data, + struct elf_phdr *ph, int elf_machine) +{ + struct elf_shdr *reltab, *shdr_table = NULL; + struct elf_rela *rels = NULL; + int nrels, i, ret = -1; + elf_word wordval; + void *addr; + + shdr_table = load_at(fd, ehdr->e_shoff, + sizeof(struct elf_shdr) * ehdr->e_shnum); + if (!shdr_table) { + return -1; + } + if (must_swab) { + for (i = 0; i < ehdr->e_shnum; i++) { + glue(bswap_shdr, SZ)(&shdr_table[i]); + } + } + + reltab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_RELA); + if (!reltab) { + goto fail; + } + rels = load_at(fd, reltab->sh_offset, reltab->sh_size); + if (!rels) { + goto fail; + } + nrels = reltab->sh_size / sizeof(struct elf_rela); + + for (i = 0; i < nrels; i++) { + if (must_swab) { + glue(bswap_rela, SZ)(&rels[i]); + } + if (rels[i].r_offset < ph->p_vaddr || + rels[i].r_offset >= ph->p_vaddr + ph->p_filesz) { + continue; + } + addr = &data[rels[i].r_offset - ph->p_vaddr]; + switch (elf_machine) { + case EM_S390: + switch (rels[i].r_info) { + case R_390_RELATIVE: + wordval = *(elf_word *)addr; + if (must_swab) { + bswapSZs(&wordval); + } + wordval = translate_fn(translate_opaque, wordval); + if (must_swab) { + bswapSZs(&wordval); + } + *(elf_word *)addr = wordval; + break; + default: + fprintf(stderr, "Unsupported relocation type %i!\n", + (int)rels[i].r_info); + } + } + } + + ret = 0; +fail: + g_free(rels); + g_free(shdr_table); + return ret; +} + +/* + * Given 'nhdr', a pointer to a range of ELF Notes, search through them + * for a note matching type 'elf_note_type' and return a pointer to + * the matching ELF note. + */ +static struct elf_note *glue(get_elf_note_type, SZ)(struct elf_note *nhdr, + elf_word note_size, + elf_word phdr_align, + elf_word elf_note_type) +{ + elf_word nhdr_size = sizeof(struct elf_note); + elf_word elf_note_entry_offset = 0; + elf_word note_type; + elf_word nhdr_namesz; + elf_word nhdr_descsz; + + if (nhdr == NULL) { + return NULL; + } + + note_type = nhdr->n_type; + while (note_type != elf_note_type) { + nhdr_namesz = nhdr->n_namesz; + nhdr_descsz = nhdr->n_descsz; + + elf_note_entry_offset = nhdr_size + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + + QEMU_ALIGN_UP(nhdr_descsz, phdr_align); + + /* + * If the offset calculated in this iteration exceeds the + * supplied size, we are done and no matching note was found. + */ + if (elf_note_entry_offset > note_size) { + return NULL; + } + + /* skip to the next ELF Note entry */ + nhdr = (void *)nhdr + elf_note_entry_offset; + note_type = nhdr->n_type; + } + + return nhdr; +} + +static int glue(load_elf, SZ)(const char *name, int fd, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, + int must_swab, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, + uint32_t *pflags, int elf_machine, + int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom, + symbol_fn_t sym_cb) +{ + struct elfhdr ehdr; + struct elf_phdr *phdr = NULL, *ph; + int size, i, total_size; + elf_word mem_size, file_size, data_offset; + uint64_t addr, low = (uint64_t)-1, high = 0; + GMappedFile *mapped_file = NULL; + uint8_t *data = NULL; + char label[128]; + int ret = ELF_LOAD_FAILED; + + if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) + goto fail; + if (must_swab) { + glue(bswap_ehdr, SZ)(&ehdr); + } + + if (elf_machine <= EM_NONE) { + /* The caller didn't specify an ARCH, we can figure it out */ + elf_machine = ehdr.e_machine; + } + + switch (elf_machine) { + case EM_PPC64: + if (ehdr.e_machine != EM_PPC64) { + if (ehdr.e_machine != EM_PPC) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + } + break; + case EM_X86_64: + if (ehdr.e_machine != EM_X86_64) { + if (ehdr.e_machine != EM_386) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + } + break; + case EM_MICROBLAZE: + if (ehdr.e_machine != EM_MICROBLAZE) { + if (ehdr.e_machine != EM_MICROBLAZE_OLD) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + } + break; + case EM_MOXIE: + if (ehdr.e_machine != EM_MOXIE) { + if (ehdr.e_machine != EM_MOXIE_OLD) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + } + break; + case EM_MIPS: + case EM_NANOMIPS: + if ((ehdr.e_machine != EM_MIPS) && + (ehdr.e_machine != EM_NANOMIPS)) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + break; + default: + if (elf_machine != ehdr.e_machine) { + ret = ELF_LOAD_WRONG_ARCH; + goto fail; + } + } + + if (pflags) { + *pflags = (elf_word)ehdr.e_flags; + } + if (pentry) + *pentry = (uint64_t)(elf_sword)ehdr.e_entry; + + glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb, sym_cb); + + size = ehdr.e_phnum * sizeof(phdr[0]); + if (lseek(fd, ehdr.e_phoff, SEEK_SET) != ehdr.e_phoff) { + goto fail; + } + phdr = g_malloc0(size); + if (!phdr) + goto fail; + if (read(fd, phdr, size) != size) + goto fail; + if (must_swab) { + for(i = 0; i < ehdr.e_phnum; i++) { + ph = &phdr[i]; + glue(bswap_phdr, SZ)(ph); + } + } + + /* + * Since we want to be able to modify the mapped buffer, we set the + * 'writeble' parameter to 'true'. Modifications to the buffer are not + * written back to the file. + */ + mapped_file = g_mapped_file_new_from_fd(fd, true, NULL); + if (!mapped_file) { + goto fail; + } + + total_size = 0; + for(i = 0; i < ehdr.e_phnum; i++) { + ph = &phdr[i]; + if (ph->p_type == PT_LOAD) { + mem_size = ph->p_memsz; /* Size of the ROM */ + file_size = ph->p_filesz; /* Size of the allocated data */ + data_offset = ph->p_offset; /* Offset where the data is located */ + + if (file_size > 0) { + if (g_mapped_file_get_length(mapped_file) < + file_size + data_offset) { + goto fail; + } + + data = (uint8_t *)g_mapped_file_get_contents(mapped_file); + data += data_offset; + } + + /* The ELF spec is somewhat vague about the purpose of the + * physical address field. One common use in the embedded world + * is that physical address field specifies the load address + * and the virtual address field specifies the execution address. + * Segments are packed into ROM or flash, and the relocation + * and zero-initialization of data is done at runtime. This + * means that the memsz header represents the runtime size of the + * segment, but the filesz represents the loadtime size. If + * we try to honour the memsz value for an ELF file like this + * we will end up with overlapping segments (which the + * loader.c code will later reject). + * We support ELF files using this scheme by by checking whether + * paddr + memsz for this segment would overlap with any other + * segment. If so, then we assume it's using this scheme and + * truncate the loaded segment to the filesz size. + * If the segment considered as being memsz size doesn't overlap + * then we use memsz for the segment length, to handle ELF files + * which assume that the loader will do the zero-initialization. + */ + if (mem_size > file_size) { + /* If this segment's zero-init portion overlaps another + * segment's data or zero-init portion, then truncate this one. + * Invalid ELF files where the segments overlap even when + * only file_size bytes are loaded will be rejected by + * the ROM overlap check in loader.c, so we don't try to + * explicitly detect those here. + */ + int j; + elf_word zero_start = ph->p_paddr + file_size; + elf_word zero_end = ph->p_paddr + mem_size; + + for (j = 0; j < ehdr.e_phnum; j++) { + struct elf_phdr *jph = &phdr[j]; + + if (i != j && jph->p_type == PT_LOAD) { + elf_word other_start = jph->p_paddr; + elf_word other_end = jph->p_paddr + jph->p_memsz; + + if (!(other_start >= zero_end || + zero_start >= other_end)) { + mem_size = file_size; + break; + } + } + } + } + + if (mem_size > INT_MAX - total_size) { + ret = ELF_LOAD_TOO_BIG; + goto fail; + } + + /* address_offset is hack for kernel images that are + linked at the wrong physical address. */ + if (translate_fn) { + addr = translate_fn(translate_opaque, ph->p_paddr); + glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn, + translate_opaque, data, ph, elf_machine); + } else { + addr = ph->p_paddr; + } + + if (data_swab) { + int j; + for (j = 0; j < file_size; j += (1 << data_swab)) { + uint8_t *dp = data + j; + switch (data_swab) { + case (1): + *(uint16_t *)dp = bswap16(*(uint16_t *)dp); + break; + case (2): + *(uint32_t *)dp = bswap32(*(uint32_t *)dp); + break; + case (3): + *(uint64_t *)dp = bswap64(*(uint64_t *)dp); + break; + default: + g_assert_not_reached(); + } + } + } + + /* the entry pointer in the ELF header is a virtual + * address, if the text segments paddr and vaddr differ + * we need to adjust the entry */ + if (pentry && !translate_fn && + ph->p_vaddr != ph->p_paddr && + ehdr.e_entry >= ph->p_vaddr && + ehdr.e_entry < ph->p_vaddr + ph->p_filesz && + ph->p_flags & PF_X) { + *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; + } + + /* Some ELF files really do have segments of zero size; + * just ignore them rather than trying to create empty + * ROM blobs, because the zero-length blob can falsely + * trigger the overlapping-ROM-blobs check. + */ + if (mem_size != 0) { + if (load_rom) { + snprintf(label, sizeof(label), "phdr #%d: %s", i, name); + + /* + * rom_add_elf_program() takes its own reference to + * 'mapped_file'. + */ + rom_add_elf_program(label, mapped_file, data, file_size, + mem_size, addr, as); + } else { + MemTxResult res; + + res = address_space_write(as ? as : &address_space_memory, + addr, MEMTXATTRS_UNSPECIFIED, + data, file_size); + if (res != MEMTX_OK) { + goto fail; + } + } + } + + total_size += mem_size; + if (addr < low) + low = addr; + if ((addr + mem_size) > high) + high = addr + mem_size; + + data = NULL; + + } else if (ph->p_type == PT_NOTE && elf_note_fn) { + struct elf_note *nhdr = NULL; + + file_size = ph->p_filesz; /* Size of the range of ELF notes */ + data_offset = ph->p_offset; /* Offset where the notes are located */ + + if (file_size > 0) { + if (g_mapped_file_get_length(mapped_file) < + file_size + data_offset) { + goto fail; + } + + data = (uint8_t *)g_mapped_file_get_contents(mapped_file); + data += data_offset; + } + + /* + * Search the ELF notes to find one with a type matching the + * value passed in via 'translate_opaque' + */ + nhdr = (struct elf_note *)data; + assert(translate_opaque != NULL); + nhdr = glue(get_elf_note_type, SZ)(nhdr, file_size, ph->p_align, + *(uint64_t *)translate_opaque); + if (nhdr != NULL) { + bool is64 = + sizeof(struct elf_note) == sizeof(struct elf64_note); + elf_note_fn((void *)nhdr, (void *)&ph->p_align, is64); + } + data = NULL; + } + } + + if (lowaddr) + *lowaddr = (uint64_t)(elf_sword)low; + if (highaddr) + *highaddr = (uint64_t)(elf_sword)high; + ret = total_size; + fail: + if (mapped_file) { + g_mapped_file_unref(mapped_file); + } + g_free(phdr); + return ret; +} diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h new file mode 100644 index 000000000..02a0ced0a --- /dev/null +++ b/include/hw/firmware/smbios.h @@ -0,0 +1,277 @@ +#ifndef QEMU_SMBIOS_H +#define QEMU_SMBIOS_H + +/* + * SMBIOS Support + * + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + + +#define SMBIOS_MAX_TYPE 127 + +/* memory area description, used by type 19 table */ +struct smbios_phys_mem_area { + uint64_t address; + uint64_t length; +}; + +/* + * SMBIOS spec defined tables + */ +typedef enum SmbiosEntryPointType { + SMBIOS_ENTRY_POINT_21, + SMBIOS_ENTRY_POINT_30, +} SmbiosEntryPointType; + +/* SMBIOS Entry Point + * There are two types of entry points defined in the SMBIOS specification + * (see below). BIOS must place the entry point(s) at a 16-byte-aligned + * address between 0xf0000 and 0xfffff. Note that either entry point type + * can be used in a 64-bit target system, except that SMBIOS 2.1 entry point + * only allows the SMBIOS struct table to reside below 4GB address space. + */ + +/* SMBIOS 2.1 (32-bit) Entry Point + * - introduced since SMBIOS 2.1 + * - supports structure table below 4GB only + */ +struct smbios_21_entry_point { + uint8_t anchor_string[4]; + uint8_t checksum; + uint8_t length; + uint8_t smbios_major_version; + uint8_t smbios_minor_version; + uint16_t max_structure_size; + uint8_t entry_point_revision; + uint8_t formatted_area[5]; + uint8_t intermediate_anchor_string[5]; + uint8_t intermediate_checksum; + uint16_t structure_table_length; + uint32_t structure_table_address; + uint16_t number_of_structures; + uint8_t smbios_bcd_revision; +} QEMU_PACKED; + +/* SMBIOS 3.0 (64-bit) Entry Point + * - introduced since SMBIOS 3.0 + * - supports structure table at 64-bit address space + */ +struct smbios_30_entry_point { + uint8_t anchor_string[5]; + uint8_t checksum; + uint8_t length; + uint8_t smbios_major_version; + uint8_t smbios_minor_version; + uint8_t smbios_doc_rev; + uint8_t entry_point_revision; + uint8_t reserved; + uint32_t structure_table_max_size; + uint64_t structure_table_address; +} QEMU_PACKED; + +typedef union { + struct smbios_21_entry_point ep21; + struct smbios_30_entry_point ep30; +} QEMU_PACKED SmbiosEntryPoint; + +/* This goes at the beginning of every SMBIOS structure. */ +struct smbios_structure_header { + uint8_t type; + uint8_t length; + uint16_t handle; +} QEMU_PACKED; + +/* SMBIOS type 0 - BIOS Information */ +struct smbios_type_0 { + struct smbios_structure_header header; + uint8_t vendor_str; + uint8_t bios_version_str; + uint16_t bios_starting_address_segment; + uint8_t bios_release_date_str; + uint8_t bios_rom_size; + uint64_t bios_characteristics; + uint8_t bios_characteristics_extension_bytes[2]; + uint8_t system_bios_major_release; + uint8_t system_bios_minor_release; + uint8_t embedded_controller_major_release; + uint8_t embedded_controller_minor_release; +} QEMU_PACKED; + +/* UUID encoding. The time_* fields are little-endian, as specified by SMBIOS + * version 2.6. + */ +struct smbios_uuid { + uint32_t time_low; + uint16_t time_mid; + uint16_t time_hi_and_version; + uint8_t clock_seq_hi_and_reserved; + uint8_t clock_seq_low; + uint8_t node[6]; +} QEMU_PACKED; + +/* SMBIOS type 1 - System Information */ +struct smbios_type_1 { + struct smbios_structure_header header; + uint8_t manufacturer_str; + uint8_t product_name_str; + uint8_t version_str; + uint8_t serial_number_str; + struct smbios_uuid uuid; + uint8_t wake_up_type; + uint8_t sku_number_str; + uint8_t family_str; +} QEMU_PACKED; + +/* SMBIOS type 2 - Base Board */ +struct smbios_type_2 { + struct smbios_structure_header header; + uint8_t manufacturer_str; + uint8_t product_str; + uint8_t version_str; + uint8_t serial_number_str; + uint8_t asset_tag_number_str; + uint8_t feature_flags; + uint8_t location_str; + uint16_t chassis_handle; + uint8_t board_type; + uint8_t contained_element_count; + /* contained elements follow */ +} QEMU_PACKED; + +/* SMBIOS type 3 - System Enclosure (v2.7) */ +struct smbios_type_3 { + struct smbios_structure_header header; + uint8_t manufacturer_str; + uint8_t type; + uint8_t version_str; + uint8_t serial_number_str; + uint8_t asset_tag_number_str; + uint8_t boot_up_state; + uint8_t power_supply_state; + uint8_t thermal_state; + uint8_t security_status; + uint32_t oem_defined; + uint8_t height; + uint8_t number_of_power_cords; + uint8_t contained_element_count; + uint8_t contained_element_record_length; + uint8_t sku_number_str; + /* contained elements follow */ +} QEMU_PACKED; + +/* SMBIOS type 4 - Processor Information (v2.6) */ +struct smbios_type_4 { + struct smbios_structure_header header; + uint8_t socket_designation_str; + uint8_t processor_type; + uint8_t processor_family; + uint8_t processor_manufacturer_str; + uint32_t processor_id[2]; + uint8_t processor_version_str; + uint8_t voltage; + uint16_t external_clock; + uint16_t max_speed; + uint16_t current_speed; + uint8_t status; + uint8_t processor_upgrade; + uint16_t l1_cache_handle; + uint16_t l2_cache_handle; + uint16_t l3_cache_handle; + uint8_t serial_number_str; + uint8_t asset_tag_number_str; + uint8_t part_number_str; + uint8_t core_count; + uint8_t core_enabled; + uint8_t thread_count; + uint16_t processor_characteristics; + uint16_t processor_family2; +} QEMU_PACKED; + +/* SMBIOS type 11 - OEM strings */ +struct smbios_type_11 { + struct smbios_structure_header header; + uint8_t count; +} QEMU_PACKED; + +/* SMBIOS type 16 - Physical Memory Array (v2.7) */ +struct smbios_type_16 { + struct smbios_structure_header header; + uint8_t location; + uint8_t use; + uint8_t error_correction; + uint32_t maximum_capacity; + uint16_t memory_error_information_handle; + uint16_t number_of_memory_devices; + uint64_t extended_maximum_capacity; +} QEMU_PACKED; + +/* SMBIOS type 17 - Memory Device (v2.8) */ +struct smbios_type_17 { + struct smbios_structure_header header; + uint16_t physical_memory_array_handle; + uint16_t memory_error_information_handle; + uint16_t total_width; + uint16_t data_width; + uint16_t size; + uint8_t form_factor; + uint8_t device_set; + uint8_t device_locator_str; + uint8_t bank_locator_str; + uint8_t memory_type; + uint16_t type_detail; + uint16_t speed; + uint8_t manufacturer_str; + uint8_t serial_number_str; + uint8_t asset_tag_number_str; + uint8_t part_number_str; + uint8_t attributes; + uint32_t extended_size; + uint16_t configured_clock_speed; + uint16_t minimum_voltage; + uint16_t maximum_voltage; + uint16_t configured_voltage; +} QEMU_PACKED; + +/* SMBIOS type 19 - Memory Array Mapped Address (v2.7) */ +struct smbios_type_19 { + struct smbios_structure_header header; + uint32_t starting_address; + uint32_t ending_address; + uint16_t memory_array_handle; + uint8_t partition_width; + uint64_t extended_starting_address; + uint64_t extended_ending_address; +} QEMU_PACKED; + +/* SMBIOS type 32 - System Boot Information */ +struct smbios_type_32 { + struct smbios_structure_header header; + uint8_t reserved[6]; + uint8_t boot_status; +} QEMU_PACKED; + +/* SMBIOS type 127 -- End-of-table */ +struct smbios_type_127 { + struct smbios_structure_header header; +} QEMU_PACKED; + +void smbios_entry_add(QemuOpts *opts, Error **errp); +void smbios_set_cpuid(uint32_t version, uint32_t features); +void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, + bool uuid_encoded, SmbiosEntryPointType ep_type); +uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); +void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, + uint8_t **tables, size_t *tables_len, + uint8_t **anchor, size_t *anchor_len); +#endif /* QEMU_SMBIOS_H */ diff --git a/include/hw/fw-path-provider.h b/include/hw/fw-path-provider.h new file mode 100644 index 000000000..8e1d45651 --- /dev/null +++ b/include/hw/fw-path-provider.h @@ -0,0 +1,44 @@ +/* + * Firmware patch provider class and helpers definitions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef FW_PATH_PROVIDER_H +#define FW_PATH_PROVIDER_H + +#include "qom/object.h" + +#define TYPE_FW_PATH_PROVIDER "fw-path-provider" + +typedef struct FWPathProviderClass FWPathProviderClass; +DECLARE_CLASS_CHECKERS(FWPathProviderClass, FW_PATH_PROVIDER, + TYPE_FW_PATH_PROVIDER) +#define FW_PATH_PROVIDER(obj) \ + INTERFACE_CHECK(FWPathProvider, (obj), TYPE_FW_PATH_PROVIDER) + +typedef struct FWPathProvider FWPathProvider; + +struct FWPathProviderClass { + InterfaceClass parent_class; + + char *(*get_dev_path)(FWPathProvider *p, BusState *bus, DeviceState *dev); +}; + +char *fw_path_provider_get_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev); +char *fw_path_provider_try_get_dev_path(Object *o, BusState *bus, + DeviceState *dev); + +#endif /* FW_PATH_PROVIDER_H */ diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h new file mode 100644 index 000000000..e1636ce7f --- /dev/null +++ b/include/hw/gpio/aspeed_gpio.h @@ -0,0 +1,97 @@ +/* + * ASPEED GPIO Controller + * + * Copyright (C) 2017-2018 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef ASPEED_GPIO_H +#define ASPEED_GPIO_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_GPIO "aspeed.gpio" +OBJECT_DECLARE_TYPE(AspeedGPIOState, AspeedGPIOClass, ASPEED_GPIO) + +#define ASPEED_GPIO_MAX_NR_SETS 8 +#define ASPEED_REGS_PER_BANK 14 +#define ASPEED_GPIO_MAX_NR_REGS (ASPEED_REGS_PER_BANK * ASPEED_GPIO_MAX_NR_SETS) +#define ASPEED_GPIO_NR_PINS 228 +#define ASPEED_GROUPS_PER_SET 4 +#define ASPEED_GPIO_NR_DEBOUNCE_REGS 3 +#define ASPEED_CHARS_PER_GROUP_LABEL 4 + +typedef struct GPIOSets GPIOSets; + +typedef struct GPIOSetProperties { + uint32_t input; + uint32_t output; + char group_label[ASPEED_GROUPS_PER_SET][ASPEED_CHARS_PER_GROUP_LABEL]; +} GPIOSetProperties; + +enum GPIORegType { + gpio_not_a_reg, + gpio_reg_data_value, + gpio_reg_direction, + gpio_reg_int_enable, + gpio_reg_int_sens_0, + gpio_reg_int_sens_1, + gpio_reg_int_sens_2, + gpio_reg_int_status, + gpio_reg_reset_tolerant, + gpio_reg_debounce_1, + gpio_reg_debounce_2, + gpio_reg_cmd_source_0, + gpio_reg_cmd_source_1, + gpio_reg_data_read, + gpio_reg_input_mask, +}; + +typedef struct AspeedGPIOReg { + uint16_t set_idx; + enum GPIORegType type; + } AspeedGPIOReg; + +struct AspeedGPIOClass { + SysBusDevice parent_obj; + const GPIOSetProperties *props; + uint32_t nr_gpio_pins; + uint32_t nr_gpio_sets; + uint32_t gap; + const AspeedGPIOReg *reg_table; +}; + +struct AspeedGPIOState { + /* */ + SysBusDevice parent; + + /*< public >*/ + MemoryRegion iomem; + int pending; + qemu_irq irq; + qemu_irq gpios[ASPEED_GPIO_NR_PINS]; + +/* Parallel GPIO Registers */ + uint32_t debounce_regs[ASPEED_GPIO_NR_DEBOUNCE_REGS]; + struct GPIOSets { + uint32_t data_value; /* Reflects pin values */ + uint32_t data_read; /* Contains last value written to data value */ + uint32_t direction; + uint32_t int_enable; + uint32_t int_sens_0; + uint32_t int_sens_1; + uint32_t int_sens_2; + uint32_t int_status; + uint32_t reset_tol; + uint32_t cmd_source_0; + uint32_t cmd_source_1; + uint32_t debounce_1; + uint32_t debounce_2; + uint32_t input_mask; + } sets[ASPEED_GPIO_MAX_NR_SETS]; +}; + +#endif /* _ASPEED_GPIO_H_ */ diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h new file mode 100644 index 000000000..1c53a0509 --- /dev/null +++ b/include/hw/gpio/bcm2835_gpio.h @@ -0,0 +1,40 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps + * Luc Michel + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_GPIO_H +#define BCM2835_GPIO_H + +#include "hw/sd/sd.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +struct BCM2835GpioState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + + /* SDBus selector */ + SDBus sdbus; + SDBus *sdbus_sdhci; + SDBus *sdbus_sdhost; + + uint8_t fsel[54]; + uint32_t lev0, lev1; + uint8_t sd_fsel; + qemu_irq out[54]; +}; + +#define TYPE_BCM2835_GPIO "bcm2835_gpio" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835GpioState, BCM2835_GPIO) + +#endif diff --git a/include/hw/gpio/imx_gpio.h b/include/hw/gpio/imx_gpio.h new file mode 100644 index 000000000..227860b9f --- /dev/null +++ b/include/hw/gpio/imx_gpio.h @@ -0,0 +1,64 @@ +/* + * i.MX processors GPIO registers definition. + * + * Copyright (C) 2015 Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef IMX_GPIO_H +#define IMX_GPIO_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IMX_GPIO "imx.gpio" +OBJECT_DECLARE_SIMPLE_TYPE(IMXGPIOState, IMX_GPIO) + +#define IMX_GPIO_MEM_SIZE 0x20 + +/* i.MX GPIO memory map */ +#define DR_ADDR 0x00 /* DATA REGISTER */ +#define GDIR_ADDR 0x04 /* DIRECTION REGISTER */ +#define PSR_ADDR 0x08 /* PAD STATUS REGISTER */ +#define ICR1_ADDR 0x0c /* INTERRUPT CONFIGURATION REGISTER 1 */ +#define ICR2_ADDR 0x10 /* INTERRUPT CONFIGURATION REGISTER 2 */ +#define IMR_ADDR 0x14 /* INTERRUPT MASK REGISTER */ +#define ISR_ADDR 0x18 /* INTERRUPT STATUS REGISTER */ +#define EDGE_SEL_ADDR 0x1c /* EDGE SEL REGISTER */ + +#define IMX_GPIO_PIN_COUNT 32 + +struct IMXGPIOState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t dr; + uint32_t gdir; + uint32_t psr; + uint64_t icr; + uint32_t imr; + uint32_t isr; + bool has_edge_sel; + uint32_t edge_sel; + bool has_upper_pin_irq; + + qemu_irq irq[2]; + qemu_irq output[IMX_GPIO_PIN_COUNT]; +}; + +#endif /* IMX_GPIO_H */ diff --git a/include/hw/gpio/npcm7xx_gpio.h b/include/hw/gpio/npcm7xx_gpio.h new file mode 100644 index 000000000..b1d771bd7 --- /dev/null +++ b/include/hw/gpio/npcm7xx_gpio.h @@ -0,0 +1,55 @@ +/* + * Nuvoton NPCM7xx General Purpose Input / Output (GPIO) + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef NPCM7XX_GPIO_H +#define NPCM7XX_GPIO_H + +#include "exec/memory.h" +#include "hw/sysbus.h" + +/* Number of pins managed by each controller. */ +#define NPCM7XX_GPIO_NR_PINS (32) + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_GPIO_NR_REGS (0x80 / sizeof(uint32_t)) + +typedef struct NPCM7xxGPIOState { + SysBusDevice parent; + + /* Properties to be defined by the SoC */ + uint32_t reset_pu; + uint32_t reset_pd; + uint32_t reset_osrc; + uint32_t reset_odsc; + + MemoryRegion mmio; + + qemu_irq irq; + qemu_irq output[NPCM7XX_GPIO_NR_PINS]; + + uint32_t pin_level; + uint32_t ext_level; + uint32_t ext_driven; + + uint32_t regs[NPCM7XX_GPIO_NR_REGS]; +} NPCM7xxGPIOState; + +#define TYPE_NPCM7XX_GPIO "npcm7xx-gpio" +#define NPCM7XX_GPIO(obj) \ + OBJECT_CHECK(NPCM7xxGPIOState, (obj), TYPE_NPCM7XX_GPIO) + +#endif /* NPCM7XX_GPIO_H */ diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h new file mode 100644 index 000000000..8f9c2f86d --- /dev/null +++ b/include/hw/gpio/nrf51_gpio.h @@ -0,0 +1,70 @@ +/* + * nRF51 System-on-Chip general purpose input/output register definition + * + * QEMU interface: + * + sysbus MMIO regions 0: GPIO registers + * + Unnamed GPIO inputs 0-31: Set tri-state input level for GPIO pin. + * Level -1: Externally Disconnected/Floating; Pull-up/down will be regarded + * Level 0: Input externally driven LOW + * Level 1: Input externally driven HIGH + * + Unnamed GPIO outputs 0-31: + * Level -1: Disconnected/Floating + * Level 0: Driven LOW + * Level 1: Driven HIGH + * + * Accuracy of the peripheral model: + * + The nRF51 GPIO output driver supports two modes, standard and high-current + * mode. These different drive modes are not modeled and handled the same. + * + Pin SENSEing is not modeled/implemented. + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + * + */ +#ifndef NRF51_GPIO_H +#define NRF51_GPIO_H + +#include "hw/sysbus.h" +#include "qom/object.h" +#define TYPE_NRF51_GPIO "nrf51_soc.gpio" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51GPIOState, NRF51_GPIO) + +#define NRF51_GPIO_PINS 32 + +#define NRF51_GPIO_SIZE 0x1000 + +#define NRF51_GPIO_REG_OUT 0x504 +#define NRF51_GPIO_REG_OUTSET 0x508 +#define NRF51_GPIO_REG_OUTCLR 0x50C +#define NRF51_GPIO_REG_IN 0x510 +#define NRF51_GPIO_REG_DIR 0x514 +#define NRF51_GPIO_REG_DIRSET 0x518 +#define NRF51_GPIO_REG_DIRCLR 0x51C +#define NRF51_GPIO_REG_CNF_START 0x700 +#define NRF51_GPIO_REG_CNF_END 0x77C + +#define NRF51_GPIO_PULLDOWN 1 +#define NRF51_GPIO_PULLUP 3 + +struct NRF51GPIOState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + qemu_irq irq; + + uint32_t out; + uint32_t in; + uint32_t in_mask; + uint32_t dir; + uint32_t cnf[NRF51_GPIO_PINS]; + + uint32_t old_out; + uint32_t old_out_connected; + + qemu_irq output[NRF51_GPIO_PINS]; +}; + + +#endif diff --git a/include/hw/gpio/sifive_gpio.h b/include/hw/gpio/sifive_gpio.h new file mode 100644 index 000000000..fc53785c9 --- /dev/null +++ b/include/hw/gpio/sifive_gpio.h @@ -0,0 +1,79 @@ +/* + * SiFive System-on-Chip general purpose input/output register definition + * + * Copyright 2019 AdaCore + * + * Base on nrf51_gpio.c: + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef SIFIVE_GPIO_H +#define SIFIVE_GPIO_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_SIFIVE_GPIO "sifive_soc.gpio" +typedef struct SIFIVEGPIOState SIFIVEGPIOState; +DECLARE_INSTANCE_CHECKER(SIFIVEGPIOState, SIFIVE_GPIO, + TYPE_SIFIVE_GPIO) + +#define SIFIVE_GPIO_PINS 32 + +#define SIFIVE_GPIO_SIZE 0x100 + +#define SIFIVE_GPIO_REG_VALUE 0x000 +#define SIFIVE_GPIO_REG_INPUT_EN 0x004 +#define SIFIVE_GPIO_REG_OUTPUT_EN 0x008 +#define SIFIVE_GPIO_REG_PORT 0x00C +#define SIFIVE_GPIO_REG_PUE 0x010 +#define SIFIVE_GPIO_REG_DS 0x014 +#define SIFIVE_GPIO_REG_RISE_IE 0x018 +#define SIFIVE_GPIO_REG_RISE_IP 0x01C +#define SIFIVE_GPIO_REG_FALL_IE 0x020 +#define SIFIVE_GPIO_REG_FALL_IP 0x024 +#define SIFIVE_GPIO_REG_HIGH_IE 0x028 +#define SIFIVE_GPIO_REG_HIGH_IP 0x02C +#define SIFIVE_GPIO_REG_LOW_IE 0x030 +#define SIFIVE_GPIO_REG_LOW_IP 0x034 +#define SIFIVE_GPIO_REG_IOF_EN 0x038 +#define SIFIVE_GPIO_REG_IOF_SEL 0x03C +#define SIFIVE_GPIO_REG_OUT_XOR 0x040 + +struct SIFIVEGPIOState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + + qemu_irq irq[SIFIVE_GPIO_PINS]; + qemu_irq output[SIFIVE_GPIO_PINS]; + + uint32_t value; /* Actual value of the pin */ + uint32_t input_en; + uint32_t output_en; + uint32_t port; /* Pin value requested by the user */ + uint32_t pue; + uint32_t ds; + uint32_t rise_ie; + uint32_t rise_ip; + uint32_t fall_ie; + uint32_t fall_ip; + uint32_t high_ie; + uint32_t high_ip; + uint32_t low_ie; + uint32_t low_ip; + uint32_t iof_en; + uint32_t iof_sel; + uint32_t out_xor; + uint32_t in; + uint32_t in_mask; + + /* config */ + uint32_t ngpio; +}; + +#endif /* SIFIVE_GPIO_H */ diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h new file mode 100644 index 000000000..e15f59c8b --- /dev/null +++ b/include/hw/hotplug.h @@ -0,0 +1,97 @@ +/* + * Hotplug handler interface. + * + * Copyright (c) 2014 Red Hat Inc. + * + * Authors: + * Igor Mammedov , + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HOTPLUG_H +#define HOTPLUG_H + +#include "qom/object.h" + +#define TYPE_HOTPLUG_HANDLER "hotplug-handler" + +typedef struct HotplugHandlerClass HotplugHandlerClass; +DECLARE_CLASS_CHECKERS(HotplugHandlerClass, HOTPLUG_HANDLER, + TYPE_HOTPLUG_HANDLER) +#define HOTPLUG_HANDLER(obj) \ + INTERFACE_CHECK(HotplugHandler, (obj), TYPE_HOTPLUG_HANDLER) + +typedef struct HotplugHandler HotplugHandler; + +/** + * hotplug_fn: + * @plug_handler: a device performing plug/uplug action + * @plugged_dev: a device that has been (un)plugged + * @errp: returns an error if this function fails + */ +typedef void (*hotplug_fn)(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp); + +/** + * HotplugDeviceClass: + * + * Interface to be implemented by a device performing + * hardware (un)plug functions. + * + * @parent: Opaque parent interface. + * @pre_plug: pre plug callback called at start of device.realize(true) + * @plug: plug callback called at end of device.realize(true). + * @unplug_request: unplug request callback. + * Used as a means to initiate device unplug for devices that + * require asynchronous unplug handling. + * @unplug: unplug callback. + * Used for device removal with devices that implement + * asynchronous and synchronous (surprise) removal. + */ +struct HotplugHandlerClass { + /* */ + InterfaceClass parent; + + /* */ + hotplug_fn pre_plug; + hotplug_fn plug; + hotplug_fn unplug_request; + hotplug_fn unplug; +}; + +/** + * hotplug_handler_plug: + * + * Call #HotplugHandlerClass.plug callback of @plug_handler. + */ +void hotplug_handler_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp); + +/** + * hotplug_handler_pre_plug: + * + * Call #HotplugHandlerClass.pre_plug callback of @plug_handler. + */ +void hotplug_handler_pre_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp); + +/** + * hotplug_handler_unplug_request: + * + * Calls #HotplugHandlerClass.unplug_request callback of @plug_handler. + */ +void hotplug_handler_unplug_request(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp); +/** + * hotplug_handler_unplug: + * + * Calls #HotplugHandlerClass.unplug callback of @plug_handler. + */ +void hotplug_handler_unplug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp); +#endif diff --git a/include/hw/hw.h b/include/hw/hw.h new file mode 100644 index 000000000..fc5301f29 --- /dev/null +++ b/include/hw/hw.h @@ -0,0 +1,10 @@ +#ifndef QEMU_HW_H +#define QEMU_HW_H + +#ifdef CONFIG_USER_ONLY +#error Cannot include hw/hw.h from user emulation +#endif + +void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +#endif diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h new file mode 100644 index 000000000..21dc28aee --- /dev/null +++ b/include/hw/hyperv/hyperv-proto.h @@ -0,0 +1,130 @@ +/* + * Definitions for Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_PROTO_H +#define HW_HYPERV_HYPERV_PROTO_H + +#include "qemu/bitmap.h" + +/* + * Hypercall status code + */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +/* + * Hypercall numbers + */ +#define HV_POST_MESSAGE 0x005c +#define HV_SIGNAL_EVENT 0x005d +#define HV_HYPERCALL_FAST (1u << 16) + +/* + * Message size + */ +#define HV_MESSAGE_PAYLOAD_SIZE 240 + +/* + * Message types + */ +#define HV_MESSAGE_NONE 0x00000000 +#define HV_MESSAGE_VMBUS 0x00000001 +#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 +#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 +#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 +#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 +#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 +#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 +#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 +#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 +#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 +#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 +#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 +#define HV_MESSAGE_X64_APIC_EOI 0x80010004 +#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 + +/* + * Message flags + */ +#define HV_MESSAGE_FLAG_PENDING 0x1 + +/* + * Number of synthetic interrupts + */ +#define HV_SINT_COUNT 16 + +/* + * Event flags number per SINT + */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) + +/* + * Connection id valid bits + */ +#define HV_CONNECTION_ID_MASK 0x00ffffff + +/* + * Input structure for POST_MESSAGE hypercall + */ +struct hyperv_post_message_input { + uint32_t connection_id; + uint32_t _reserved; + uint32_t message_type; + uint32_t payload_size; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +/* + * Input structure for SIGNAL_EVENT hypercall + */ +struct hyperv_signal_event_input { + uint32_t connection_id; + uint16_t flag_number; + uint16_t _reserved_zero; +}; + +/* + * SynIC message structures + */ +struct hyperv_message_header { + uint32_t message_type; + uint8_t payload_size; + uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ + uint8_t _reserved[2]; + uint64_t sender; +}; + +struct hyperv_message { + struct hyperv_message_header header; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +struct hyperv_message_page { + struct hyperv_message slot[HV_SINT_COUNT]; +}; + +/* + * SynIC event flags structures + */ +struct hyperv_event_flags { + DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); +}; + +struct hyperv_event_flags_page { + struct hyperv_event_flags slot[HV_SINT_COUNT]; +}; + +#endif diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h new file mode 100644 index 000000000..a63ee0003 --- /dev/null +++ b/include/hw/hyperv/hyperv.h @@ -0,0 +1,84 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_H +#define HW_HYPERV_HYPERV_H + +#include "cpu-qom.h" +#include "hw/hyperv/hyperv-proto.h" + +typedef struct HvSintRoute HvSintRoute; + +/* + * Callback executed in a bottom-half when the status of posting the message + * becomes known, before unblocking the connection for further messages + */ +typedef void (*HvSintMsgCb)(void *data, int status); + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintMsgCb cb, void *cb_data); +void hyperv_sint_route_ref(HvSintRoute *sint_route); +void hyperv_sint_route_unref(HvSintRoute *sint_route); + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route); + +/* + * Submit a message to be posted in vcpu context. If the submission succeeds, + * the status of posting the message is reported via the callback associated + * with the @sint_route; until then no more messages are accepted. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); +/* + * Set event flag @eventno, and signal the SINT if the flag has changed. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); + +/* + * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall. + * Executed in vcpu context. + */ +typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg, + void *data); +/* + * Associate @handler with the message connection @conn_id, such that @handler + * is called with @data when the guest executes HV_POST_MESSAGE hypercall on + * @conn_id. If @handler is NULL clear the association. + */ +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data); +/* + * Associate @notifier with the event connection @conn_id, such that @notifier + * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. + * If @notifier is NULL clear the association. + */ +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); + +/* + * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as + * specified in @param, and call the HvMsgHandler associated with the + * connection on the message contained therein. + */ +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast); +/* + * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with + * the connection as specified in @param. + */ +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast); + +static inline uint32_t hyperv_vp_index(CPUState *cs) +{ + return cs->cpu_index; +} + +void hyperv_synic_add(CPUState *cs); +void hyperv_synic_reset(CPUState *cs); +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr); +bool hyperv_is_synic_enabled(void); + +#endif diff --git a/include/hw/hyperv/vmbus-bridge.h b/include/hw/hyperv/vmbus-bridge.h new file mode 100644 index 000000000..1e5419574 --- /dev/null +++ b/include/hw/hyperv/vmbus-bridge.h @@ -0,0 +1,34 @@ +/* + * QEMU Hyper-V VMBus root bridge + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_BRIDGE_H +#define HW_HYPERV_VMBUS_BRIDGE_H + +#include "hw/sysbus.h" +#include "hw/hyperv/vmbus.h" +#include "qom/object.h" + +#define TYPE_VMBUS_BRIDGE "vmbus-bridge" + +struct VMBusBridge { + SysBusDevice parent_obj; + + uint8_t irq; + + VMBus *bus; +}; + +OBJECT_DECLARE_SIMPLE_TYPE(VMBusBridge, VMBUS_BRIDGE) + +static inline VMBusBridge *vmbus_bridge_find(void) +{ + return VMBUS_BRIDGE(object_resolve_path_type("", TYPE_VMBUS_BRIDGE, NULL)); +} + +#endif diff --git a/include/hw/hyperv/vmbus-proto.h b/include/hw/hyperv/vmbus-proto.h new file mode 100644 index 000000000..4628d3b32 --- /dev/null +++ b/include/hw/hyperv/vmbus-proto.h @@ -0,0 +1,222 @@ +/* + * QEMU Hyper-V VMBus support + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_PROTO_H +#define HW_HYPERV_VMBUS_PROTO_H + +#define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) +#define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) +#define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) +#define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) +#define VMBUS_VERSION_WIN10 ((4 << 16) | (0)) +#define VMBUS_VERSION_INVAL -1 +#define VMBUS_VERSION_CURRENT VMBUS_VERSION_WIN10 + +#define VMBUS_MESSAGE_CONNECTION_ID 1 +#define VMBUS_EVENT_CONNECTION_ID 2 +#define VMBUS_MONITOR_CONNECTION_ID 3 +#define VMBUS_SINT 2 + +#define VMBUS_MSG_INVALID 0 +#define VMBUS_MSG_OFFERCHANNEL 1 +#define VMBUS_MSG_RESCIND_CHANNELOFFER 2 +#define VMBUS_MSG_REQUESTOFFERS 3 +#define VMBUS_MSG_ALLOFFERS_DELIVERED 4 +#define VMBUS_MSG_OPENCHANNEL 5 +#define VMBUS_MSG_OPENCHANNEL_RESULT 6 +#define VMBUS_MSG_CLOSECHANNEL 7 +#define VMBUS_MSG_GPADL_HEADER 8 +#define VMBUS_MSG_GPADL_BODY 9 +#define VMBUS_MSG_GPADL_CREATED 10 +#define VMBUS_MSG_GPADL_TEARDOWN 11 +#define VMBUS_MSG_GPADL_TORNDOWN 12 +#define VMBUS_MSG_RELID_RELEASED 13 +#define VMBUS_MSG_INITIATE_CONTACT 14 +#define VMBUS_MSG_VERSION_RESPONSE 15 +#define VMBUS_MSG_UNLOAD 16 +#define VMBUS_MSG_UNLOAD_RESPONSE 17 +#define VMBUS_MSG_COUNT 18 + +#define VMBUS_MESSAGE_SIZE_ALIGN sizeof(uint64_t) + +#define VMBUS_PACKET_INVALID 0x0 +#define VMBUS_PACKET_SYNCH 0x1 +#define VMBUS_PACKET_ADD_XFER_PAGESET 0x2 +#define VMBUS_PACKET_RM_XFER_PAGESET 0x3 +#define VMBUS_PACKET_ESTABLISH_GPADL 0x4 +#define VMBUS_PACKET_TEARDOWN_GPADL 0x5 +#define VMBUS_PACKET_DATA_INBAND 0x6 +#define VMBUS_PACKET_DATA_USING_XFER_PAGES 0x7 +#define VMBUS_PACKET_DATA_USING_GPADL 0x8 +#define VMBUS_PACKET_DATA_USING_GPA_DIRECT 0x9 +#define VMBUS_PACKET_CANCEL_REQUEST 0xa +#define VMBUS_PACKET_COMP 0xb +#define VMBUS_PACKET_DATA_USING_ADDITIONAL_PKT 0xc +#define VMBUS_PACKET_ADDITIONAL_DATA 0xd + +#define VMBUS_CHANNEL_USER_DATA_SIZE 120 + +#define VMBUS_OFFER_MONITOR_ALLOCATED 0x1 +#define VMBUS_OFFER_INTERRUPT_DEDICATED 0x1 + +#define VMBUS_RING_BUFFER_FEAT_PENDING_SZ (1ul << 0) + +#define VMBUS_CHANNEL_ENUMERATE_DEVICE_INTERFACE 0x1 +#define VMBUS_CHANNEL_SERVER_SUPPORTS_TRANSFER_PAGES 0x2 +#define VMBUS_CHANNEL_SERVER_SUPPORTS_GPADLS 0x4 +#define VMBUS_CHANNEL_NAMED_PIPE_MODE 0x10 +#define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 +#define VMBUS_CHANNEL_PARENT_OFFER 0x200 +#define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 +#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 + +#define VMBUS_PACKET_FLAG_REQUEST_COMPLETION 1 + +typedef struct vmbus_message_header { + uint32_t message_type; + uint32_t _padding; +} vmbus_message_header; + +typedef struct vmbus_message_initiate_contact { + vmbus_message_header header; + uint32_t version_requested; + uint32_t target_vcpu; + uint64_t interrupt_page; + uint64_t monitor_page1; + uint64_t monitor_page2; +} vmbus_message_initiate_contact; + +typedef struct vmbus_message_version_response { + vmbus_message_header header; + uint8_t version_supported; + uint8_t status; +} vmbus_message_version_response; + +typedef struct vmbus_message_offer_channel { + vmbus_message_header header; + uint8_t type_uuid[16]; + uint8_t instance_uuid[16]; + uint64_t _reserved1; + uint64_t _reserved2; + uint16_t channel_flags; + uint16_t mmio_size_mb; + uint8_t user_data[VMBUS_CHANNEL_USER_DATA_SIZE]; + uint16_t sub_channel_index; + uint16_t _reserved3; + uint32_t child_relid; + uint8_t monitor_id; + uint8_t monitor_flags; + uint16_t interrupt_flags; + uint32_t connection_id; +} vmbus_message_offer_channel; + +typedef struct vmbus_message_rescind_channel_offer { + vmbus_message_header header; + uint32_t child_relid; +} vmbus_message_rescind_channel_offer; + +typedef struct vmbus_gpa_range { + uint32_t byte_count; + uint32_t byte_offset; + uint64_t pfn_array[]; +} vmbus_gpa_range; + +typedef struct vmbus_message_gpadl_header { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; + uint16_t range_buflen; + uint16_t rangecount; + vmbus_gpa_range range[]; +} QEMU_PACKED vmbus_message_gpadl_header; + +typedef struct vmbus_message_gpadl_body { + vmbus_message_header header; + uint32_t message_number; + uint32_t gpadl_id; + uint64_t pfn_array[]; +} vmbus_message_gpadl_body; + +typedef struct vmbus_message_gpadl_created { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; + uint32_t status; +} vmbus_message_gpadl_created; + +typedef struct vmbus_message_gpadl_teardown { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; +} vmbus_message_gpadl_teardown; + +typedef struct vmbus_message_gpadl_torndown { + vmbus_message_header header; + uint32_t gpadl_id; +} vmbus_message_gpadl_torndown; + +typedef struct vmbus_message_open_channel { + vmbus_message_header header; + uint32_t child_relid; + uint32_t open_id; + uint32_t ring_buffer_gpadl_id; + uint32_t target_vp; + uint32_t ring_buffer_offset; + uint8_t user_data[VMBUS_CHANNEL_USER_DATA_SIZE]; +} vmbus_message_open_channel; + +typedef struct vmbus_message_open_result { + vmbus_message_header header; + uint32_t child_relid; + uint32_t open_id; + uint32_t status; +} vmbus_message_open_result; + +typedef struct vmbus_message_close_channel { + vmbus_message_header header; + uint32_t child_relid; +} vmbus_message_close_channel; + +typedef struct vmbus_ring_buffer { + uint32_t write_index; + uint32_t read_index; + uint32_t interrupt_mask; + uint32_t pending_send_sz; + uint32_t _reserved1[12]; + uint32_t feature_bits; +} vmbus_ring_buffer; + +typedef struct vmbus_packet_hdr { + uint16_t type; + uint16_t offset_qwords; + uint16_t len_qwords; + uint16_t flags; + uint64_t transaction_id; +} vmbus_packet_hdr; + +typedef struct vmbus_pkt_gpa_direct { + uint32_t _reserved; + uint32_t rangecount; + vmbus_gpa_range range[]; +} vmbus_pkt_gpa_direct; + +typedef struct vmbus_xferpg_range { + uint32_t byte_count; + uint32_t byte_offset; +} vmbus_xferpg_range; + +typedef struct vmbus_pkt_xferpg { + uint16_t buffer_id; + uint8_t sender_owns_set; + uint8_t _reserved; + uint32_t rangecount; + vmbus_xferpg_range range[]; +} vmbus_pkt_xferpg; + +#endif diff --git a/include/hw/hyperv/vmbus.h b/include/hw/hyperv/vmbus.h new file mode 100644 index 000000000..f98bea388 --- /dev/null +++ b/include/hw/hyperv/vmbus.h @@ -0,0 +1,229 @@ +/* + * QEMU Hyper-V VMBus + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_H +#define HW_HYPERV_VMBUS_H + +#include "sysemu/sysemu.h" +#include "sysemu/dma.h" +#include "hw/qdev-core.h" +#include "migration/vmstate.h" +#include "hw/hyperv/vmbus-proto.h" +#include "qemu/uuid.h" +#include "qom/object.h" + +#define TYPE_VMBUS_DEVICE "vmbus-dev" + +OBJECT_DECLARE_TYPE(VMBusDevice, VMBusDeviceClass, + VMBUS_DEVICE) + +#define TYPE_VMBUS "vmbus" +OBJECT_DECLARE_SIMPLE_TYPE(VMBus, VMBUS) + +/* + * Object wrapping a GPADL -- GPA Descriptor List -- an array of guest physical + * pages, to be used for various buffers shared between the host and the guest. + */ +typedef struct VMBusGpadl VMBusGpadl; +/* + * VMBus channel -- a pair of ring buffers for either direction, placed within + * one GPADL, and the associated notification means. + */ +typedef struct VMBusChannel VMBusChannel; +/* + * Base class for VMBus devices. Includes one or more channels. Identified by + * class GUID and instance GUID. + */ + +typedef void(*VMBusChannelNotifyCb)(struct VMBusChannel *chan); + +struct VMBusDeviceClass { + DeviceClass parent; + + QemuUUID classid; + QemuUUID instanceid; /* Fixed UUID for singleton devices */ + uint16_t channel_flags; + uint16_t mmio_size_mb; + + /* Extentions to standard device callbacks */ + void (*vmdev_realize)(VMBusDevice *vdev, Error **errp); + void (*vmdev_unrealize)(VMBusDevice *vdev); + void (*vmdev_reset)(VMBusDevice *vdev); + /* + * Calculate the number of channels based on the device properties. Called + * at realize time. + **/ + uint16_t (*num_channels)(VMBusDevice *vdev); + /* + * Device-specific actions to complete the otherwise successful process of + * opening a channel. + * Return 0 on success, -errno on failure. + */ + int (*open_channel)(VMBusChannel *chan); + /* + * Device-specific actions to perform before closing a channel. + */ + void (*close_channel)(VMBusChannel *chan); + /* + * Main device worker; invoked in response to notifications from either + * side, when there's work to do with the data in the channel ring buffers. + */ + VMBusChannelNotifyCb chan_notify_cb; +}; + +struct VMBusDevice { + DeviceState parent; + QemuUUID instanceid; + uint16_t num_channels; + VMBusChannel *channels; + AddressSpace *dma_as; +}; + +extern const VMStateDescription vmstate_vmbus_dev; + +/* + * A unit of work parsed out of a message in the receive (i.e. guest->host) + * ring buffer of a channel. It's supposed to be subclassed (through + * embedding) by the specific devices. + */ +typedef struct VMBusChanReq { + VMBusChannel *chan; + uint16_t pkt_type; + uint32_t msglen; + void *msg; + uint64_t transaction_id; + bool need_comp; + QEMUSGList sgl; +} VMBusChanReq; + +VMBusDevice *vmbus_channel_device(VMBusChannel *chan); +VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx); +uint32_t vmbus_channel_idx(VMBusChannel *chan); +bool vmbus_channel_is_open(VMBusChannel *chan); + +/* + * Notify (on guest's behalf) the host side of the channel that there's data in + * the ringbuffer to process. + */ +void vmbus_channel_notify_host(VMBusChannel *chan); + +/* + * Reserve space for a packet in the send (i.e. host->guest) ringbuffer. If + * there isn't enough room, indicate that to the guest, to be notified when it + * becomes available. + * Return 0 on success, negative errno on failure. + * The ringbuffer indices are NOT updated, the requested space indicator may. + */ +int vmbus_channel_reserve(VMBusChannel *chan, + uint32_t desclen, uint32_t msglen); + +/* + * Send a packet to the guest. The space for the packet MUST be reserved + * first. + * Return total number of bytes placed in the send ringbuffer on success, + * negative errno on failure. + * The ringbuffer indices are updated on success, and the guest is signaled if + * needed. + */ +ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, + void *desc, uint32_t desclen, + void *msg, uint32_t msglen, + bool need_comp, uint64_t transaction_id); + +/* + * Prepare to fetch a batch of packets from the receive ring buffer. + * Return 0 on success, negative errno on failure. + */ +int vmbus_channel_recv_start(VMBusChannel *chan); + +/* + * Shortcut for a common case of sending a simple completion packet with no + * auxiliary descriptors. + */ +ssize_t vmbus_channel_send_completion(VMBusChanReq *req, + void *msg, uint32_t msglen); + +/* + * Peek at the receive (i.e. guest->host) ring buffer and extract a unit of + * work (a device-specific subclass of VMBusChanReq) from a packet if there's + * one. + * Return an allocated buffer, containing the request of @size with filled + * VMBusChanReq at the beginning, followed by the message payload, or NULL on + * failure. + * The ringbuffer indices are NOT updated, nor is the private copy of the read + * index. + */ +void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size); + +/* + * Update the private copy of the read index once the preceding peek is deemed + * successful. + * The ringbuffer indices are NOT updated. + */ +void vmbus_channel_recv_pop(VMBusChannel *chan); + +/* + * Propagate the private copy of the read index into the receive ring buffer, + * and thus complete the reception of a series of packets. Notify guest if + * needed. + * Return the number of bytes popped off the receive ring buffer by the + * preceding recv_peek/recv_pop calls on success, negative errno on failure. + */ +ssize_t vmbus_channel_recv_done(VMBusChannel *chan); + +/* + * Free the request allocated by vmbus_channel_recv_peek, together with its + * fields. + */ +void vmbus_free_req(void *req); + +/* + * Find and reference a GPADL by @gpadl_id. + * If not found return NULL. + */ +VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id); + +/* + * Unreference @gpadl. If the reference count drops to zero, free it. + * @gpadl may be NULL, in which case nothing is done. + */ +void vmbus_put_gpadl(VMBusGpadl *gpadl); + +/* + * Calculate total length in bytes of @gpadl. + * @gpadl must be valid. + */ +uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl); + +/* + * Copy data from @iov to @gpadl at offset @off. + * Return the number of bytes copied, or a negative status on failure. + */ +ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, + const struct iovec *iov, size_t iov_cnt); + +/* + * Map SGList contained in the request @req, at offset @off and no more than + * @len bytes, for io in direction @dir, and populate @iov with the mapped + * iovecs. + * Return the number of iovecs mapped, or negative status on failure. + */ +int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t len, size_t off); + +/* + * Unmap *iov mapped with vmbus_map_sgl, marking the number of bytes @accessed. + */ +void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t accessed); + +void vmbus_save_req(QEMUFile *f, VMBusChanReq *req); +void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size); + +#endif diff --git a/include/hw/i2c/arm_sbcon_i2c.h b/include/hw/i2c/arm_sbcon_i2c.h new file mode 100644 index 000000000..ad96781e7 --- /dev/null +++ b/include/hw/i2c/arm_sbcon_i2c.h @@ -0,0 +1,37 @@ +/* + * ARM SBCon two-wire serial bus interface (I2C bitbang) + * a.k.a. + * ARM Versatile I2C controller + * + * Copyright (c) 2006-2007 CodeSourcery. + * Copyright (c) 2012 Oskar Andero + * Copyright (C) 2020 Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_I2C_ARM_SBCON_H +#define HW_I2C_ARM_SBCON_H + +#include "hw/sysbus.h" +#include "hw/i2c/bitbang_i2c.h" +#include "qom/object.h" + +#define TYPE_VERSATILE_I2C "versatile_i2c" +#define TYPE_ARM_SBCON_I2C TYPE_VERSATILE_I2C + +typedef struct ArmSbconI2CState ArmSbconI2CState; +DECLARE_INSTANCE_CHECKER(ArmSbconI2CState, ARM_SBCON_I2C, + TYPE_ARM_SBCON_I2C) + +struct ArmSbconI2CState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + bitbang_i2c_interface bitbang; + int out; + int in; +}; + +#endif /* HW_I2C_ARM_SBCON_H */ diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h new file mode 100644 index 000000000..565f83306 --- /dev/null +++ b/include/hw/i2c/aspeed_i2c.h @@ -0,0 +1,94 @@ +/* + * ASPEED AST2400 I2C Controller + * + * Copyright (C) 2016 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef ASPEED_I2C_H +#define ASPEED_I2C_H + +#include "hw/i2c/i2c.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_I2C "aspeed.i2c" +#define TYPE_ASPEED_2400_I2C TYPE_ASPEED_I2C "-ast2400" +#define TYPE_ASPEED_2500_I2C TYPE_ASPEED_I2C "-ast2500" +#define TYPE_ASPEED_2600_I2C TYPE_ASPEED_I2C "-ast2600" +OBJECT_DECLARE_TYPE(AspeedI2CState, AspeedI2CClass, ASPEED_I2C) + +#define ASPEED_I2C_NR_BUSSES 16 +#define ASPEED_I2C_MAX_POOL_SIZE 0x800 + +struct AspeedI2CState; + +typedef struct AspeedI2CBus { + struct AspeedI2CState *controller; + + MemoryRegion mr; + + I2CBus *bus; + uint8_t id; + qemu_irq irq; + + uint32_t ctrl; + uint32_t timing[2]; + uint32_t intr_ctrl; + uint32_t intr_status; + uint32_t cmd; + uint32_t buf; + uint32_t pool_ctrl; + uint32_t dma_addr; + uint32_t dma_len; +} AspeedI2CBus; + +struct AspeedI2CState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + qemu_irq irq; + + uint32_t intr_status; + uint32_t ctrl_global; + MemoryRegion pool_iomem; + uint8_t pool[ASPEED_I2C_MAX_POOL_SIZE]; + + AspeedI2CBus busses[ASPEED_I2C_NR_BUSSES]; + MemoryRegion *dram_mr; + AddressSpace dram_as; +}; + + +struct AspeedI2CClass { + SysBusDeviceClass parent_class; + + uint8_t num_busses; + uint8_t reg_size; + uint8_t gap; + qemu_irq (*bus_get_irq)(AspeedI2CBus *); + + uint64_t pool_size; + hwaddr pool_base; + uint8_t *(*bus_pool_base)(AspeedI2CBus *); + bool check_sram; + bool has_dma; + +}; + +I2CBus *aspeed_i2c_get_bus(AspeedI2CState *s, int busnr); + +#endif /* ASPEED_I2C_H */ diff --git a/include/hw/i2c/bitbang_i2c.h b/include/hw/i2c/bitbang_i2c.h new file mode 100644 index 000000000..92334e901 --- /dev/null +++ b/include/hw/i2c/bitbang_i2c.h @@ -0,0 +1,50 @@ +#ifndef BITBANG_I2C_H +#define BITBANG_I2C_H + +#include "hw/i2c/i2c.h" + +typedef struct bitbang_i2c_interface bitbang_i2c_interface; + +#define BITBANG_I2C_SDA 0 +#define BITBANG_I2C_SCL 1 + +typedef enum bitbang_i2c_state { + STOPPED = 0, + SENDING_BIT7, + SENDING_BIT6, + SENDING_BIT5, + SENDING_BIT4, + SENDING_BIT3, + SENDING_BIT2, + SENDING_BIT1, + SENDING_BIT0, + WAITING_FOR_ACK, + RECEIVING_BIT7, + RECEIVING_BIT6, + RECEIVING_BIT5, + RECEIVING_BIT4, + RECEIVING_BIT3, + RECEIVING_BIT2, + RECEIVING_BIT1, + RECEIVING_BIT0, + SENDING_ACK, + SENT_NACK +} bitbang_i2c_state; + +struct bitbang_i2c_interface { + I2CBus *bus; + bitbang_i2c_state state; + int last_data; + int last_clock; + int device_out; + uint8_t buffer; + int current_addr; +}; + +/** + * bitbang_i2c_init: in-place initialize the bitbang_i2c_interface struct + */ +void bitbang_i2c_init(bitbang_i2c_interface *s, I2CBus *bus); +int bitbang_i2c_set(bitbang_i2c_interface *i2c, int line, int level); + +#endif diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h new file mode 100644 index 000000000..277dd9f2d --- /dev/null +++ b/include/hw/i2c/i2c.h @@ -0,0 +1,143 @@ +#ifndef QEMU_I2C_H +#define QEMU_I2C_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +/* The QEMU I2C implementation only supports simple transfers that complete + immediately. It does not support slave devices that need to be able to + defer their response (eg. CPU slave interfaces where the data is supplied + by the device driver in response to an interrupt). */ + +enum i2c_event { + I2C_START_RECV, + I2C_START_SEND, + I2C_FINISH, + I2C_NACK /* Masker NACKed a receive byte. */ +}; + + +#define TYPE_I2C_SLAVE "i2c-slave" +OBJECT_DECLARE_TYPE(I2CSlave, I2CSlaveClass, + I2C_SLAVE) + +struct I2CSlaveClass { + DeviceClass parent_class; + + /* Master to slave. Returns non-zero for a NAK, 0 for success. */ + int (*send)(I2CSlave *s, uint8_t data); + + /* + * Slave to master. This cannot fail, the device should always + * return something here. + */ + uint8_t (*recv)(I2CSlave *s); + + /* + * Notify the slave of a bus state change. For start event, + * returns non-zero to NAK an operation. For other events the + * return code is not used and should be zero. + */ + int (*event)(I2CSlave *s, enum i2c_event event); +}; + +struct I2CSlave { + DeviceState qdev; + + /* Remaining fields for internal use by the I2C code. */ + uint8_t address; +}; + +#define TYPE_I2C_BUS "i2c-bus" +OBJECT_DECLARE_SIMPLE_TYPE(I2CBus, I2C_BUS) + +typedef struct I2CNode I2CNode; + +struct I2CNode { + I2CSlave *elt; + QLIST_ENTRY(I2CNode) next; +}; + +struct I2CBus { + BusState qbus; + QLIST_HEAD(, I2CNode) current_devs; + uint8_t saved_address; + bool broadcast; +}; + +I2CBus *i2c_init_bus(DeviceState *parent, const char *name); +void i2c_set_slave_address(I2CSlave *dev, uint8_t address); +int i2c_bus_busy(I2CBus *bus); +int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv); +void i2c_end_transfer(I2CBus *bus); +void i2c_nack(I2CBus *bus); +int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send); +int i2c_send(I2CBus *bus, uint8_t data); +uint8_t i2c_recv(I2CBus *bus); + +/** + * Create an I2C slave device on the heap. + * @name: a device type name + * @addr: I2C address of the slave when put on a bus + * + * This only initializes the device state structure and allows + * properties to be set. Type @name must exist. The device still + * needs to be realized. See qdev-core.h. + */ +I2CSlave *i2c_slave_new(const char *name, uint8_t addr); + +/** + * Create and realize an I2C slave device on the heap. + * @bus: I2C bus to put it on + * @name: I2C slave device type name + * @addr: I2C address of the slave when put on a bus + * + * Create the device state structure, initialize it, put it on the + * specified @bus, and drop the reference to it (the device is realized). + */ +I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr); + +/** + * Realize and drop a reference an I2C slave device + * @dev: I2C slave device to realize + * @bus: I2C bus to put it on + * @addr: I2C address of the slave on the bus + * @errp: pointer to NULL initialized error object + * + * Returns: %true on success, %false on failure. + * + * Call 'realize' on @dev, put it on the specified @bus, and drop the + * reference to it. + * + * This function is useful if you have created @dev via qdev_new(), + * i2c_slave_new() or i2c_slave_try_new() (which take a reference to + * the device it returns to you), so that you can set properties on it + * before realizing it. If you don't need to set properties then + * i2c_slave_create_simple() is probably better (as it does the create, + * init and realize in one step). + * + * If you are embedding the I2C slave into another QOM device and + * initialized it via some variant on object_initialize_child() then + * do not use this function, because that family of functions arrange + * for the only reference to the child device to be held by the parent + * via the child<> property, and so the reference-count-drop done here + * would be incorrect. (Instead you would want i2c_slave_realize(), + * which doesn't currently exist but would be trivial to create if we + * had any code that wanted it.) + */ +bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp); + +/* lm832x.c */ +void lm832x_key_event(DeviceState *dev, int key, int state); + +extern const VMStateDescription vmstate_i2c_slave; + +#define VMSTATE_I2C_SLAVE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(I2CSlave), \ + .vmsd = &vmstate_i2c_slave, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, I2CSlave), \ +} + +#endif diff --git a/include/hw/i2c/imx_i2c.h b/include/hw/i2c/imx_i2c.h new file mode 100644 index 000000000..e4f91339f --- /dev/null +++ b/include/hw/i2c/imx_i2c.h @@ -0,0 +1,88 @@ +/* + * i.MX I2C Bus Serial Interface registers definition + * + * Copyright (C) 2013 Jean-Christophe Dubois. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef IMX_I2C_H +#define IMX_I2C_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IMX_I2C "imx.i2c" +OBJECT_DECLARE_SIMPLE_TYPE(IMXI2CState, IMX_I2C) + +#define IMX_I2C_MEM_SIZE 0x14 + +/* i.MX I2C memory map */ +#define IADR_ADDR 0x00 /* address register */ +#define IFDR_ADDR 0x04 /* frequency divider register */ +#define I2CR_ADDR 0x08 /* control register */ +#define I2SR_ADDR 0x0c /* status register */ +#define I2DR_ADDR 0x10 /* data register */ + +#define IADR_MASK 0xFE +#define IADR_RESET 0 + +#define IFDR_MASK 0x3F +#define IFDR_RESET 0 + +#define I2CR_IEN (1 << 7) +#define I2CR_IIEN (1 << 6) +#define I2CR_MSTA (1 << 5) +#define I2CR_MTX (1 << 4) +#define I2CR_TXAK (1 << 3) +#define I2CR_RSTA (1 << 2) +#define I2CR_MASK 0xFC +#define I2CR_RESET 0 + +#define I2SR_ICF (1 << 7) +#define I2SR_IAAF (1 << 6) +#define I2SR_IBB (1 << 5) +#define I2SR_IAL (1 << 4) +#define I2SR_SRW (1 << 2) +#define I2SR_IIF (1 << 1) +#define I2SR_RXAK (1 << 0) +#define I2SR_MASK 0xE9 +#define I2SR_RESET 0x81 + +#define I2DR_MASK 0xFF +#define I2DR_RESET 0 + +#define ADDR_RESET 0xFF00 + +struct IMXI2CState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + I2CBus *bus; + qemu_irq irq; + + uint16_t address; + + uint16_t iadr; + uint16_t ifdr; + uint16_t i2cr; + uint16_t i2sr; + uint16_t i2dr_read; + uint16_t i2dr_write; +}; + +#endif /* IMX_I2C_H */ diff --git a/include/hw/i2c/microbit_i2c.h b/include/hw/i2c/microbit_i2c.h new file mode 100644 index 000000000..3c29e09bf --- /dev/null +++ b/include/hw/i2c/microbit_i2c.h @@ -0,0 +1,42 @@ +/* + * Microbit stub for Nordic Semiconductor nRF51 SoC Two-Wire Interface + * http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.1.pdf + * + * Copyright 2019 Red Hat, Inc. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ + +#ifndef MICROBIT_I2C_H +#define MICROBIT_I2C_H + +#include "hw/sysbus.h" +#include "hw/arm/nrf51.h" +#include "qom/object.h" + +#define NRF51_TWI_TASK_STARTRX 0x000 +#define NRF51_TWI_TASK_STARTTX 0x008 +#define NRF51_TWI_TASK_STOP 0x014 +#define NRF51_TWI_EVENT_STOPPED 0x104 +#define NRF51_TWI_EVENT_RXDREADY 0x108 +#define NRF51_TWI_EVENT_TXDSENT 0x11c +#define NRF51_TWI_REG_ENABLE 0x500 +#define NRF51_TWI_REG_RXD 0x518 +#define NRF51_TWI_REG_TXD 0x51c +#define NRF51_TWI_REG_ADDRESS 0x588 + +#define TYPE_MICROBIT_I2C "microbit.i2c" +OBJECT_DECLARE_SIMPLE_TYPE(MicrobitI2CState, MICROBIT_I2C) + +#define MICROBIT_I2C_NREGS (NRF51_PERIPHERAL_SIZE / sizeof(uint32_t)) + +struct MicrobitI2CState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t regs[MICROBIT_I2C_NREGS]; + uint32_t read_idx; +}; + +#endif /* MICROBIT_I2C_H */ diff --git a/include/hw/i2c/pm_smbus.h b/include/hw/i2c/pm_smbus.h new file mode 100644 index 000000000..0d74207ef --- /dev/null +++ b/include/hw/i2c/pm_smbus.h @@ -0,0 +1,56 @@ +#ifndef PM_SMBUS_H +#define PM_SMBUS_H + +#include "exec/memory.h" +#include "hw/i2c/smbus_master.h" + +#define PM_SMBUS_MAX_MSG_SIZE 32 + +typedef struct PMSMBus { + I2CBus *smbus; + MemoryRegion io; + + uint8_t smb_stat; + uint8_t smb_ctl; + uint8_t smb_cmd; + uint8_t smb_addr; + uint8_t smb_data0; + uint8_t smb_data1; + uint8_t smb_data[PM_SMBUS_MAX_MSG_SIZE]; + uint8_t smb_blkdata; + uint8_t smb_auxctl; + uint32_t smb_index; + + /* Set by pm_smbus.c */ + void (*reset)(struct PMSMBus *s); + + /* Set by the user. */ + bool i2c_enable; + void (*set_irq)(struct PMSMBus *s, bool enabled); + void *opaque; + + /* Internally used by pm_smbus. */ + + /* Set on block transfers after the last byte has been read, so the + INTR bit can be set at the right time. */ + bool op_done; + + /* Set during an I2C block read, so we know how to handle data. */ + bool in_i2c_block_read; + + /* Used to work around a bug in AMIBIOS, see smb_transaction_start() */ + bool start_transaction_on_status_read; +} PMSMBus; + +void pm_smbus_init(DeviceState *parent, PMSMBus *smb, bool force_aux_blk); + +/* + * For backwards compatibility on migration, older versions don't have + * working migration for pm_smbus, this lets us ignore the migrations + * for older machine versions. + */ +bool pm_smbus_vmstate_needed(void); + +extern const VMStateDescription pmsmb_vmstate; + +#endif /* PM_SMBUS_H */ diff --git a/include/hw/i2c/ppc4xx_i2c.h b/include/hw/i2c/ppc4xx_i2c.h new file mode 100644 index 000000000..4e882fa3c --- /dev/null +++ b/include/hw/i2c/ppc4xx_i2c.h @@ -0,0 +1,63 @@ +/* + * PPC4xx I2C controller emulation + * + * Copyright (c) 2007 Jocelyn Mayer + * Copyright (c) 2012 François Revol + * Copyright (c) 2016-2018 BALATON Zoltan + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef PPC4XX_I2C_H +#define PPC4XX_I2C_H + +#include "hw/sysbus.h" +#include "hw/i2c/bitbang_i2c.h" +#include "qom/object.h" + +#define TYPE_PPC4xx_I2C "ppc4xx-i2c" +OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxI2CState, PPC4xx_I2C) + +struct PPC4xxI2CState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + I2CBus *bus; + qemu_irq irq; + MemoryRegion iomem; + bitbang_i2c_interface bitbang; + int mdidx; + uint8_t mdata[4]; + uint8_t lmadr; + uint8_t hmadr; + uint8_t cntl; + uint8_t mdcntl; + uint8_t sts; + uint8_t extsts; + uint8_t lsadr; + uint8_t hsadr; + uint8_t clkdiv; + uint8_t intrmsk; + uint8_t xfrcnt; + uint8_t xtcntlss; + uint8_t directcntl; +}; + +#endif /* PPC4XX_I2C_H */ diff --git a/include/hw/i2c/smbus_eeprom.h b/include/hw/i2c/smbus_eeprom.h new file mode 100644 index 000000000..68b0063ab --- /dev/null +++ b/include/hw/i2c/smbus_eeprom.h @@ -0,0 +1,36 @@ +/* + * QEMU SMBus EEPROM API + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SMBUS_EEPROM_H +#define HW_SMBUS_EEPROM_H + +#include "exec/cpu-common.h" +#include "hw/i2c/i2c.h" + +void smbus_eeprom_init_one(I2CBus *bus, uint8_t address, uint8_t *eeprom_buf); +void smbus_eeprom_init(I2CBus *bus, int nb_eeprom, + const uint8_t *eeprom_spd, int size); + +enum sdram_type { SDR = 0x4, DDR = 0x7, DDR2 = 0x8 }; +uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size); + +#endif diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h new file mode 100644 index 000000000..bb13bc423 --- /dev/null +++ b/include/hw/i2c/smbus_master.h @@ -0,0 +1,55 @@ +/* + * QEMU SMBus host (master) API + * + * Copyright (c) 2007 Arastra, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SMBUS_MASTER_H +#define HW_SMBUS_MASTER_H + +#include "hw/i2c/i2c.h" + +/* Master device commands. */ +int smbus_quick_command(I2CBus *bus, uint8_t addr, int read); +int smbus_receive_byte(I2CBus *bus, uint8_t addr); +int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data); +int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command); +int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data); +int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command); +int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data); + +/* + * Do a block transfer from an I2C device. If recv_len is set, then the + * first received byte is a length field and is used to know how much data + * to receive. Otherwise receive "len" bytes. If send_cmd is set, send + * the command byte first before receiving the data. + */ +int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data, + int len, bool recv_len, bool send_cmd); + +/* + * Do a block transfer to an I2C device. If send_len is set, send the + * "len" value before the data. + */ +int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data, + int len, bool send_len); + +#endif diff --git a/include/hw/i2c/smbus_slave.h b/include/hw/i2c/smbus_slave.h new file mode 100644 index 000000000..86bfe0a79 --- /dev/null +++ b/include/hw/i2c/smbus_slave.h @@ -0,0 +1,95 @@ +/* + * QEMU SMBus device (slave) API + * + * Copyright (c) 2007 Arastra, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SMBUS_SLAVE_H +#define HW_SMBUS_SLAVE_H + +#include "hw/i2c/i2c.h" +#include "qom/object.h" + +#define TYPE_SMBUS_DEVICE "smbus-device" +OBJECT_DECLARE_TYPE(SMBusDevice, SMBusDeviceClass, + SMBUS_DEVICE) + + +struct SMBusDeviceClass { + I2CSlaveClass parent_class; + + /* + * An operation with no data, special in SMBus. + * This may be NULL, quick commands are ignore in that case. + */ + void (*quick_cmd)(SMBusDevice *dev, uint8_t read); + + /* + * We can't distinguish between a word write and a block write with + * length 1, so pass the whole data block including the length byte + * (if present). The device is responsible figuring out what type of + * command this is. + * This may be NULL if no data is written to the device. Writes + * will be ignore in that case. + */ + int (*write_data)(SMBusDevice *dev, uint8_t *buf, uint8_t len); + + /* + * Likewise we can't distinguish between different reads, or even know + * the length of the read until the read is complete, so read data a + * byte at a time. The device is responsible for adding the length + * byte on block reads. This call cannot fail, it should return + * something, preferably 0xff if nothing is available. + * This may be NULL if no data is read from the device. Reads will + * return 0xff in that case. + */ + uint8_t (*receive_byte)(SMBusDevice *dev); +}; + +#define SMBUS_DATA_MAX_LEN 34 /* command + len + 32 bytes of data. */ + +struct SMBusDevice { + /* The SMBus protocol is implemented on top of I2C. */ + I2CSlave i2c; + + /* Remaining fields for internal use only. */ + int32_t mode; + int32_t data_len; + uint8_t data_buf[SMBUS_DATA_MAX_LEN]; +}; + +extern const VMStateDescription vmstate_smbus_device; + +#define VMSTATE_SMBUS_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(SMBusDevice), \ + .vmsd = &vmstate_smbus_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, SMBusDevice), \ +} + +/* + * Users should call this in their .needed functions to know if the + * SMBus slave data needs to be transferred. + */ +bool smbus_vmstate_needed(SMBusDevice *dev); + +#endif diff --git a/include/hw/i386/apic-msidef.h b/include/hw/i386/apic-msidef.h new file mode 100644 index 000000000..420b41167 --- /dev/null +++ b/include/hw/i386/apic-msidef.h @@ -0,0 +1,31 @@ +#ifndef HW_APIC_MSIDEF_H +#define HW_APIC_MSIDEF_H + +/* + * Intel APIC constants: from include/asm/msidef.h + */ + +/* + * Shifts for MSI data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR_MASK 0x000000ff + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_TRIGGER_SHIFT 15 + +/* + * Shift/mask fields for msi address + */ + +#define MSI_ADDR_DEST_MODE_SHIFT 2 + +#define MSI_ADDR_REDIRECTION_SHIFT 3 + +#define MSI_ADDR_DEST_ID_SHIFT 12 +#define MSI_ADDR_DEST_IDX_SHIFT 4 +#define MSI_ADDR_DEST_ID_MASK 0x000ff000 + +#endif /* HW_APIC_MSIDEF_H */ diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h new file mode 100644 index 000000000..da1d2fe15 --- /dev/null +++ b/include/hw/i386/apic.h @@ -0,0 +1,27 @@ +#ifndef APIC_H +#define APIC_H + + +/* apic.c */ +void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, + uint8_t vector_num, uint8_t trigger_mode); +int apic_accept_pic_intr(DeviceState *s); +void apic_deliver_pic_intr(DeviceState *s, int level); +void apic_deliver_nmi(DeviceState *d); +int apic_get_interrupt(DeviceState *s); +void apic_reset_irq_delivered(void); +int apic_get_irq_delivered(void); +void cpu_set_apic_base(DeviceState *s, uint64_t val); +uint64_t cpu_get_apic_base(DeviceState *s); +void cpu_set_apic_tpr(DeviceState *s, uint8_t val); +uint8_t cpu_get_apic_tpr(DeviceState *s); +void apic_init_reset(DeviceState *s); +void apic_sipi(DeviceState *s); +void apic_poll_irq(DeviceState *d); +void apic_designate_bsp(DeviceState *d, bool bsp); +int apic_get_highest_priority_irr(DeviceState *dev); + +/* pc.c */ +DeviceState *cpu_get_current_apic(void); + +#endif diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h new file mode 100644 index 000000000..c175e7e71 --- /dev/null +++ b/include/hw/i386/apic_internal.h @@ -0,0 +1,231 @@ +/* + * APIC support - internal interfaces + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ + +#ifndef QEMU_APIC_INTERNAL_H +#define QEMU_APIC_INTERNAL_H + +#include "cpu.h" +#include "exec/memory.h" +#include "qemu/timer.h" +#include "target/i386/cpu-qom.h" +#include "qom/object.h" + +/* APIC Local Vector Table */ +#define APIC_LVT_TIMER 0 +#define APIC_LVT_THERMAL 1 +#define APIC_LVT_PERFORM 2 +#define APIC_LVT_LINT0 3 +#define APIC_LVT_LINT1 4 +#define APIC_LVT_ERROR 5 +#define APIC_LVT_NB 6 + +/* APIC delivery modes */ +#define APIC_DM_FIXED 0 +#define APIC_DM_LOWPRI 1 +#define APIC_DM_SMI 2 +#define APIC_DM_NMI 4 +#define APIC_DM_INIT 5 +#define APIC_DM_SIPI 6 +#define APIC_DM_EXTINT 7 + +/* APIC destination mode */ +#define APIC_DESTMODE_FLAT 0xf +#define APIC_DESTMODE_CLUSTER 1 + +#define APIC_TRIGGER_EDGE 0 +#define APIC_TRIGGER_LEVEL 1 + +#define APIC_VECTOR_MASK 0xff +#define APIC_DCR_MASK 0xf + +#define APIC_LVT_TIMER_SHIFT 17 +#define APIC_LVT_MASKED_SHIFT 16 +#define APIC_LVT_LEVEL_TRIGGER_SHIFT 15 +#define APIC_LVT_REMOTE_IRR_SHIFT 14 +#define APIC_LVT_INT_POLARITY_SHIFT 13 +#define APIC_LVT_DELIV_STS_SHIFT 12 +#define APIC_LVT_DELIV_MOD_SHIFT 8 + +#define APIC_LVT_TIMER_TSCDEADLINE (2 << APIC_LVT_TIMER_SHIFT) +#define APIC_LVT_TIMER_PERIODIC (1 << APIC_LVT_TIMER_SHIFT) +#define APIC_LVT_MASKED (1 << APIC_LVT_MASKED_SHIFT) +#define APIC_LVT_LEVEL_TRIGGER (1 << APIC_LVT_LEVEL_TRIGGER_SHIFT) +#define APIC_LVT_REMOTE_IRR (1 << APIC_LVT_REMOTE_IRR_SHIFT) +#define APIC_LVT_INT_POLARITY (1 << APIC_LVT_INT_POLARITY_SHIFT) +#define APIC_LVT_DELIV_STS (1 << APIC_LVT_DELIV_STS_SHIFT) +#define APIC_LVT_DELIV_MOD (7 << APIC_LVT_DELIV_MOD_SHIFT) + +#define APIC_ESR_ILL_ADDRESS_SHIFT 7 +#define APIC_ESR_RECV_ILL_VECT_SHIFT 6 +#define APIC_ESR_SEND_ILL_VECT_SHIFT 5 +#define APIC_ESR_RECV_ACCEPT_SHIFT 3 +#define APIC_ESR_SEND_ACCEPT_SHIFT 2 +#define APIC_ESR_RECV_CHECK_SUM_SHIFT 1 + +#define APIC_ESR_ILLEGAL_ADDRESS (1 << APIC_ESR_ILL_ADDRESS_SHIFT) +#define APIC_ESR_RECV_ILLEGAL_VECT (1 << APIC_ESR_RECV_ILL_VECT_SHIFT) +#define APIC_ESR_SEND_ILLEGAL_VECT (1 << APIC_ESR_SEND_ILL_VECT_SHIFT) +#define APIC_ESR_RECV_ACCEPT (1 << APIC_ESR_RECV_ACCEPT_SHIFT) +#define APIC_ESR_SEND_ACCEPT (1 << APIC_ESR_SEND_ACCEPT_SHIFT) +#define APIC_ESR_RECV_CHECK_SUM (1 << APIC_ESR_RECV_CHECK_SUM_SHIFT) +#define APIC_ESR_SEND_CHECK_SUM 1 + +#define APIC_ICR_DEST_SHIFT 24 +#define APIC_ICR_DEST_SHORT_SHIFT 18 +#define APIC_ICR_TRIGGER_MOD_SHIFT 15 +#define APIC_ICR_LEVEL_SHIFT 14 +#define APIC_ICR_DELIV_STS_SHIFT 12 +#define APIC_ICR_DEST_MOD_SHIFT 11 +#define APIC_ICR_DELIV_MOD_SHIFT 8 + +#define APIC_ICR_DEST_SHORT (3 << APIC_ICR_DEST_SHORT_SHIFT) +#define APIC_ICR_TRIGGER_MOD (1 << APIC_ICR_TRIGGER_MOD_SHIFT) +#define APIC_ICR_LEVEL (1 << APIC_ICR_LEVEL_SHIFT) +#define APIC_ICR_DELIV_STS (1 << APIC_ICR_DELIV_STS_SHIFT) +#define APIC_ICR_DEST_MOD (1 << APIC_ICR_DEST_MOD_SHIFT) +#define APIC_ICR_DELIV_MOD (7 << APIC_ICR_DELIV_MOD_SHIFT) + +#define APIC_PR_CLASS_SHIFT 4 +#define APIC_PR_SUB_CLASS 0xf + +#define APIC_LOGDEST_XAPIC_SHIFT 4 +#define APIC_LOGDEST_XAPIC_ID 0xf + +#define APIC_LOGDEST_X2APIC_SHIFT 16 +#define APIC_LOGDEST_X2APIC_ID 0xffff + +#define APIC_SPURIO_FOCUS_SHIFT 9 +#define APIC_SPURIO_ENABLED_SHIFT 8 + +#define APIC_SPURIO_FOCUS (1 << APIC_SPURIO_FOCUS_SHIFT) +#define APIC_SPURIO_ENABLED (1 << APIC_SPURIO_ENABLED_SHIFT) + +#define APIC_SV_DIRECTED_IO (1 << 12) +#define APIC_SV_ENABLE (1 << 8) + +#define VAPIC_ENABLE_BIT 0 +#define VAPIC_ENABLE_MASK (1 << VAPIC_ENABLE_BIT) + +typedef struct APICCommonState APICCommonState; + +#define TYPE_APIC_COMMON "apic-common" +typedef struct APICCommonClass APICCommonClass; +DECLARE_OBJ_CHECKERS(APICCommonState, APICCommonClass, + APIC_COMMON, TYPE_APIC_COMMON) + +struct APICCommonClass { + DeviceClass parent_class; + + DeviceRealize realize; + DeviceUnrealize unrealize; + void (*set_base)(APICCommonState *s, uint64_t val); + void (*set_tpr)(APICCommonState *s, uint8_t val); + uint8_t (*get_tpr)(APICCommonState *s); + void (*enable_tpr_reporting)(APICCommonState *s, bool enable); + void (*vapic_base_update)(APICCommonState *s); + void (*external_nmi)(APICCommonState *s); + void (*pre_save)(APICCommonState *s); + void (*post_load)(APICCommonState *s); + void (*reset)(APICCommonState *s); + /* send_msi emulates an APIC bus and its proper place would be in a new + * device, but it's convenient to have it here for now. + */ + void (*send_msi)(MSIMessage *msi); +}; + +struct APICCommonState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + MemoryRegion io_memory; + X86CPU *cpu; + uint32_t apicbase; + uint8_t id; /* legacy APIC ID */ + uint32_t initial_apic_id; + uint8_t version; + uint8_t arb_id; + uint8_t tpr; + uint32_t spurious_vec; + uint8_t log_dest; + uint8_t dest_mode; + uint32_t isr[8]; /* in service register */ + uint32_t tmr[8]; /* trigger mode register */ + uint32_t irr[8]; /* interrupt request register */ + uint32_t lvt[APIC_LVT_NB]; + uint32_t esr; /* error register */ + uint32_t icr[2]; + + uint32_t divide_conf; + int count_shift; + uint32_t initial_count; + int64_t initial_count_load_time; + int64_t next_time; + QEMUTimer *timer; + int64_t timer_expiry; + int sipi_vector; + int wait_for_sipi; + + uint32_t vapic_control; + DeviceState *vapic; + hwaddr vapic_paddr; /* note: persistence via kvmvapic */ + bool legacy_instance_id; +}; + +typedef struct VAPICState { + uint8_t tpr; + uint8_t isr; + uint8_t zero; + uint8_t irr; + uint8_t enabled; +} QEMU_PACKED VAPICState; + +extern bool apic_report_tpr_access; + +void apic_report_irq_delivered(int delivered); +bool apic_next_timer(APICCommonState *s, int64_t current_time); +void apic_enable_tpr_access_reporting(DeviceState *d, bool enable); +void apic_enable_vapic(DeviceState *d, hwaddr paddr); + +void vapic_report_tpr_access(DeviceState *dev, CPUState *cpu, target_ulong ip, + TPRAccess access); + +int apic_get_ppr(APICCommonState *s); +uint32_t apic_get_current_count(APICCommonState *s); + +static inline void apic_set_bit(uint32_t *tab, int index) +{ + int i, mask; + i = index >> 5; + mask = 1 << (index & 0x1f); + tab[i] |= mask; +} + +static inline int apic_get_bit(uint32_t *tab, int index) +{ + int i, mask; + i = index >> 5; + mask = 1 << (index & 0x1f); + return !!(tab[i] & mask); +} + +APICCommonClass *apic_get_class(void); + +#endif /* QEMU_APIC_INTERNAL_H */ diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h new file mode 100644 index 000000000..d1ea000d3 --- /dev/null +++ b/include/hw/i386/ich9.h @@ -0,0 +1,256 @@ +#ifndef HW_ICH9_H +#define HW_ICH9_H + +#include "hw/isa/isa.h" +#include "hw/sysbus.h" +#include "hw/i386/pc.h" +#include "hw/isa/apm.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pci_bridge.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/ich9.h" +#include "hw/pci/pci_bus.h" +#include "qom/object.h" + +void ich9_lpc_set_irq(void *opaque, int irq_num, int level); +int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx); +PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin); +void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled); +I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); + +void ich9_generate_smi(void); + +#define ICH9_CC_SIZE (16 * 1024) /* 16KB. Chipset configuration registers */ + +#define TYPE_ICH9_LPC_DEVICE "ICH9-LPC" +OBJECT_DECLARE_SIMPLE_TYPE(ICH9LPCState, ICH9_LPC_DEVICE) + +struct ICH9LPCState { + /* ICH9 LPC PCI to ISA bridge */ + PCIDevice d; + + /* (pci device, intx) -> pirq + * In real chipset case, the unused slots are never used + * as ICH9 supports only D25-D31 irq routing. + * On the other hand in qemu case, any slot/function can be populated + * via command line option. + * So fallback interrupt routing for any devices in any slots is necessary. + */ + uint8_t irr[PCI_SLOT_MAX][PCI_NUM_PINS]; + + APMState apm; + ICH9LPCPMRegs pm; + uint32_t sci_level; /* track sci level */ + uint8_t sci_gsi; + + /* 2.24 Pin Straps */ + struct { + bool spkr_hi; + } pin_strap; + + /* 10.1 Chipset Configuration registers(Memory Space) + which is pointed by RCBA */ + uint8_t chip_config[ICH9_CC_SIZE]; + + /* + * 13.7.5 RST_CNT---Reset Control Register (LPC I/F---D31:F0) + * + * register contents and IO memory region + */ + uint8_t rst_cnt; + MemoryRegion rst_cnt_mem; + + /* SMI feature negotiation via fw_cfg */ + uint64_t smi_host_features; /* guest-invisible, host endian */ + uint8_t smi_host_features_le[8]; /* guest-visible, read-only, little + * endian uint64_t */ + uint8_t smi_guest_features_le[8]; /* guest-visible, read-write, little + * endian uint64_t */ + uint8_t smi_features_ok; /* guest-visible, read-only; selecting it + * triggers feature lockdown */ + uint64_t smi_negotiated_features; /* guest-invisible, host endian */ + + /* isa bus */ + ISABus *isa_bus; + MemoryRegion rcrb_mem; /* root complex register block */ + Notifier machine_ready; + + qemu_irq gsi[GSI_NUM_PINS]; +}; + +#define Q35_MASK(bit, ms_bit, ls_bit) \ +((uint##bit##_t)(((1ULL << ((ms_bit) + 1)) - 1) & ~((1ULL << ls_bit) - 1))) + +/* ICH9: Chipset Configuration Registers */ +#define ICH9_CC_ADDR_MASK (ICH9_CC_SIZE - 1) + +#define ICH9_CC +#define ICH9_CC_D28IP 0x310C +#define ICH9_CC_D28IP_SHIFT 4 +#define ICH9_CC_D28IP_MASK 0xf +#define ICH9_CC_D28IP_DEFAULT 0x00214321 +#define ICH9_CC_D31IR 0x3140 +#define ICH9_CC_D30IR 0x3142 +#define ICH9_CC_D29IR 0x3144 +#define ICH9_CC_D28IR 0x3146 +#define ICH9_CC_D27IR 0x3148 +#define ICH9_CC_D26IR 0x314C +#define ICH9_CC_D25IR 0x3150 +#define ICH9_CC_DIR_DEFAULT 0x3210 +#define ICH9_CC_D30IR_DEFAULT 0x0 +#define ICH9_CC_DIR_SHIFT 4 +#define ICH9_CC_DIR_MASK 0x7 +#define ICH9_CC_OIC 0x31FF +#define ICH9_CC_OIC_AEN 0x1 +#define ICH9_CC_GCS 0x3410 +#define ICH9_CC_GCS_DEFAULT 0x00000020 +#define ICH9_CC_GCS_NO_REBOOT (1 << 5) + +/* D28:F[0-5] */ +#define ICH9_PCIE_DEV 28 +#define ICH9_PCIE_FUNC_MAX 6 + + +/* D29:F0 USB UHCI Controller #1 */ +#define ICH9_USB_UHCI1_DEV 29 +#define ICH9_USB_UHCI1_FUNC 0 + +/* D30:F0 DMI-to-PCI bridge */ +#define ICH9_D2P_BRIDGE "ICH9 D2P BRIDGE" +#define ICH9_D2P_BRIDGE_SAVEVM_VERSION 0 + +#define ICH9_D2P_BRIDGE_DEV 30 +#define ICH9_D2P_BRIDGE_FUNC 0 + +#define ICH9_D2P_SECONDARY_DEFAULT (256 - 8) + +#define ICH9_D2P_A2_REVISION 0x92 + +/* D31:F0 LPC Processor Interface */ +#define ICH9_RST_CNT_IOPORT 0xCF9 + +/* D31:F1 LPC controller */ +#define ICH9_A2_LPC "ICH9 A2 LPC" +#define ICH9_A2_LPC_SAVEVM_VERSION 0 + +#define ICH9_LPC_DEV 31 +#define ICH9_LPC_FUNC 0 + +#define ICH9_A2_LPC_REVISION 0x2 +#define ICH9_LPC_NB_PIRQS 8 /* PCI A-H */ + +#define ICH9_LPC_PMBASE 0x40 +#define ICH9_LPC_PMBASE_BASE_ADDRESS_MASK Q35_MASK(32, 15, 7) +#define ICH9_LPC_PMBASE_RTE 0x1 +#define ICH9_LPC_PMBASE_DEFAULT 0x1 + +#define ICH9_LPC_ACPI_CTRL 0x44 +#define ICH9_LPC_ACPI_CTRL_ACPI_EN 0x80 +#define ICH9_LPC_ACPI_CTRL_SCI_IRQ_SEL_MASK Q35_MASK(8, 2, 0) +#define ICH9_LPC_ACPI_CTRL_9 0x0 +#define ICH9_LPC_ACPI_CTRL_10 0x1 +#define ICH9_LPC_ACPI_CTRL_11 0x2 +#define ICH9_LPC_ACPI_CTRL_20 0x4 +#define ICH9_LPC_ACPI_CTRL_21 0x5 +#define ICH9_LPC_ACPI_CTRL_DEFAULT 0x0 + +#define ICH9_LPC_PIRQA_ROUT 0x60 +#define ICH9_LPC_PIRQB_ROUT 0x61 +#define ICH9_LPC_PIRQC_ROUT 0x62 +#define ICH9_LPC_PIRQD_ROUT 0x63 + +#define ICH9_LPC_PIRQE_ROUT 0x68 +#define ICH9_LPC_PIRQF_ROUT 0x69 +#define ICH9_LPC_PIRQG_ROUT 0x6a +#define ICH9_LPC_PIRQH_ROUT 0x6b + +#define ICH9_LPC_PIRQ_ROUT_IRQEN 0x80 +#define ICH9_LPC_PIRQ_ROUT_MASK Q35_MASK(8, 3, 0) +#define ICH9_LPC_PIRQ_ROUT_DEFAULT 0x80 + +#define ICH9_LPC_GEN_PMCON_1 0xa0 +#define ICH9_LPC_GEN_PMCON_1_SMI_LOCK (1 << 4) +#define ICH9_LPC_GEN_PMCON_2 0xa2 +#define ICH9_LPC_GEN_PMCON_3 0xa4 +#define ICH9_LPC_GEN_PMCON_LOCK 0xa6 + +#define ICH9_LPC_RCBA 0xf0 +#define ICH9_LPC_RCBA_BA_MASK Q35_MASK(32, 31, 14) +#define ICH9_LPC_RCBA_EN 0x1 +#define ICH9_LPC_RCBA_DEFAULT 0x0 + +#define ICH9_LPC_PIC_NUM_PINS 16 +#define ICH9_LPC_IOAPIC_NUM_PINS 24 + +#define ICH9_GPIO_GSI "gsi" + +/* D31:F2 SATA Controller #1 */ +#define ICH9_SATA1_DEV 31 +#define ICH9_SATA1_FUNC 2 + +/* D31:F0 power management I/O registers + offset from the address ICH9_LPC_PMBASE */ + +/* ICH9 LPC PM I/O registers are 128 ports and 128-aligned */ +#define ICH9_PMIO_SIZE 128 +#define ICH9_PMIO_MASK (ICH9_PMIO_SIZE - 1) + +#define ICH9_PMIO_PM1_STS 0x00 +#define ICH9_PMIO_PM1_EN 0x02 +#define ICH9_PMIO_PM1_CNT 0x04 +#define ICH9_PMIO_PM1_TMR 0x08 +#define ICH9_PMIO_GPE0_STS 0x20 +#define ICH9_PMIO_GPE0_EN 0x28 +#define ICH9_PMIO_GPE0_LEN 16 +#define ICH9_PMIO_SMI_EN 0x30 +#define ICH9_PMIO_SMI_EN_APMC_EN (1 << 5) +#define ICH9_PMIO_SMI_EN_TCO_EN (1 << 13) +#define ICH9_PMIO_SMI_STS 0x34 +#define ICH9_PMIO_TCO_RLD 0x60 +#define ICH9_PMIO_TCO_LEN 32 + +/* FADT ACPI_ENABLE/ACPI_DISABLE */ +#define ICH9_APM_ACPI_ENABLE 0x2 +#define ICH9_APM_ACPI_DISABLE 0x3 + + +/* D31:F3 SMBus controller */ +#define TYPE_ICH9_SMB_DEVICE "ICH9 SMB" + +#define ICH9_A2_SMB_REVISION 0x02 +#define ICH9_SMB_PI 0x00 + +#define ICH9_SMB_SMBMBAR0 0x10 +#define ICH9_SMB_SMBMBAR1 0x14 +#define ICH9_SMB_SMBM_BAR 0 +#define ICH9_SMB_SMBM_SIZE (1 << 8) +#define ICH9_SMB_SMB_BASE 0x20 +#define ICH9_SMB_SMB_BASE_BAR 4 +#define ICH9_SMB_SMB_BASE_SIZE (1 << 5) +#define ICH9_SMB_HOSTC 0x40 +#define ICH9_SMB_HOSTC_SSRESET ((uint8_t)(1 << 3)) +#define ICH9_SMB_HOSTC_I2C_EN ((uint8_t)(1 << 2)) +#define ICH9_SMB_HOSTC_SMB_SMI_EN ((uint8_t)(1 << 1)) +#define ICH9_SMB_HOSTC_HST_EN ((uint8_t)(1 << 0)) + +/* D31:F3 SMBus I/O and memory mapped I/O registers */ +#define ICH9_SMB_DEV 31 +#define ICH9_SMB_FUNC 3 + +#define ICH9_SMB_HST_STS 0x00 +#define ICH9_SMB_HST_CNT 0x02 +#define ICH9_SMB_HST_CMD 0x03 +#define ICH9_SMB_XMIT_SLVA 0x04 +#define ICH9_SMB_HST_D0 0x05 +#define ICH9_SMB_HST_D1 0x06 +#define ICH9_SMB_HOST_BLOCK_DB 0x07 + +#define ICH9_LPC_SMI_NEGOTIATED_FEAT_PROP "x-smi-negotiated-features" + +/* bit positions used in fw_cfg SMI feature negotiation */ +#define ICH9_LPC_SMI_F_BROADCAST_BIT 0 +#define ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT 1 +#define ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT 2 + +#endif /* HW_ICH9_H */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h new file mode 100644 index 000000000..41783ee46 --- /dev/null +++ b/include/hw/i386/intel_iommu.h @@ -0,0 +1,282 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle + * Copyright (C) 2014 Le Tan, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef INTEL_IOMMU_H +#define INTEL_IOMMU_H + +#include "hw/i386/x86-iommu.h" +#include "qemu/iova-tree.h" +#include "qom/object.h" + +#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" +OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) + +#define TYPE_INTEL_IOMMU_MEMORY_REGION "intel-iommu-iommu-memory-region" + +/* DMAR Hardware Unit Definition address (IOMMU unit) */ +#define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL + +#define VTD_PCI_BUS_MAX 256 +#define VTD_PCI_SLOT_MAX 32 +#define VTD_PCI_FUNC_MAX 8 +#define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) +#define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) +#define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff) +#define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff) + +#define DMAR_REG_SIZE 0x230 +#define VTD_HOST_AW_39BIT 39 +#define VTD_HOST_AW_48BIT 48 +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) + +#define DMAR_REPORT_F_INTR (1) + +#define VTD_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) +#define VTD_MSI_ADDR_HI_SHIFT (32) +#define VTD_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) + +typedef struct VTDContextEntry VTDContextEntry; +typedef struct VTDContextCacheEntry VTDContextCacheEntry; +typedef struct VTDAddressSpace VTDAddressSpace; +typedef struct VTDIOTLBEntry VTDIOTLBEntry; +typedef struct VTDBus VTDBus; +typedef union VTD_IR_TableEntry VTD_IR_TableEntry; +typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; +typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; +typedef struct VTDPASIDEntry VTDPASIDEntry; + +/* Context-Entry */ +struct VTDContextEntry { + union { + struct { + uint64_t lo; + uint64_t hi; + }; + struct { + uint64_t val[4]; + }; + }; +}; + +struct VTDContextCacheEntry { + /* The cache entry is obsolete if + * context_cache_gen!=IntelIOMMUState.context_cache_gen + */ + uint32_t context_cache_gen; + struct VTDContextEntry context_entry; +}; + +/* PASID Directory Entry */ +struct VTDPASIDDirEntry { + uint64_t val; +}; + +/* PASID Table Entry */ +struct VTDPASIDEntry { + uint64_t val[8]; +}; + +struct VTDAddressSpace { + PCIBus *bus; + uint8_t devfn; + AddressSpace as; + IOMMUMemoryRegion iommu; + MemoryRegion root; /* The root container of the device */ + MemoryRegion nodmar; /* The alias of shared nodmar MR */ + MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ + IntelIOMMUState *iommu_state; + VTDContextCacheEntry context_cache_entry; + QLIST_ENTRY(VTDAddressSpace) next; + /* Superset of notifier flags that this address space has */ + IOMMUNotifierFlag notifier_flags; + IOVATree *iova_tree; /* Traces mapped IOVA ranges */ +}; + +struct VTDBus { + PCIBus* bus; /* A reference to the bus to provide translation for */ + /* A table of VTDAddressSpace objects indexed by devfn */ + VTDAddressSpace *dev_as[]; +}; + +struct VTDIOTLBEntry { + uint64_t gfn; + uint16_t domain_id; + uint64_t slpte; + uint64_t mask; + uint8_t access_flags; +}; + +/* VT-d Source-ID Qualifier types */ +enum { + VTD_SQ_FULL = 0x00, /* Full SID verification */ + VTD_SQ_IGN_3 = 0x01, /* Ignore bit 3 */ + VTD_SQ_IGN_2_3 = 0x02, /* Ignore bits 2 & 3 */ + VTD_SQ_IGN_1_3 = 0x03, /* Ignore bits 1-3 */ + VTD_SQ_MAX, +}; + +/* VT-d Source Validation Types */ +enum { + VTD_SVT_NONE = 0x00, /* No validation */ + VTD_SVT_ALL = 0x01, /* Do full validation */ + VTD_SVT_BUS = 0x02, /* Validate bus range */ + VTD_SVT_MAX, +}; + +/* Interrupt Remapping Table Entry Definition */ +union VTD_IR_TableEntry { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t present:1; /* Whether entry present/available */ +#else + uint32_t present:1; /* Whether entry present/available */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t __reserved_1:8; /* Reserved 1 */ +#endif + uint32_t dest_id; /* Destination ID */ + uint16_t source_id; /* Source-ID */ +#ifdef HOST_WORDS_BIGENDIAN + uint64_t __reserved_2:44; /* Reserved 2 */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t sid_q:2; /* Source-ID Qualifier */ +#else + uint64_t sid_q:2; /* Source-ID Qualifier */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t __reserved_2:44; /* Reserved 2 */ +#endif + } QEMU_PACKED irte; + uint64_t data[2]; +}; + +#define VTD_IR_INT_FORMAT_COMPAT (0) /* Compatible Interrupt */ +#define VTD_IR_INT_FORMAT_REMAP (1) /* Remappable Interrupt */ + +/* Programming format for MSI/MSI-X addresses */ +union VTD_IR_MSIAddress { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __head:12; /* Should always be: 0x0fee */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t __not_care:2; +#else + uint32_t __not_care:2; + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t __head:12; /* Should always be: 0x0fee */ +#endif + } QEMU_PACKED addr; + uint32_t data; +}; + +/* When IR is enabled, all MSI/MSI-X data bits should be zero */ +#define VTD_IR_MSI_DATA (0) + +/* The iommu (DMAR) device state struct */ +struct IntelIOMMUState { + X86IOMMUState x86_iommu; + MemoryRegion csrmem; + MemoryRegion mr_nodmar; + MemoryRegion mr_ir; + MemoryRegion mr_sys_alias; + uint8_t csr[DMAR_REG_SIZE]; /* register values */ + uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */ + uint8_t w1cmask[DMAR_REG_SIZE]; /* RW1C(Write 1 to Clear) bytes */ + uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */ + uint32_t version; + + bool caching_mode; /* RO - is cap CM enabled? */ + bool scalable_mode; /* RO - is Scalable Mode supported? */ + + dma_addr_t root; /* Current root table pointer */ + bool root_scalable; /* Type of root table (scalable or not) */ + bool dmar_enabled; /* Set if DMA remapping is enabled */ + + uint16_t iq_head; /* Current invalidation queue head */ + uint16_t iq_tail; /* Current invalidation queue tail */ + dma_addr_t iq; /* Current invalidation queue pointer */ + uint16_t iq_size; /* IQ Size in number of entries */ + bool iq_dw; /* IQ descriptor width 256bit or not */ + bool qi_enabled; /* Set if the QI is enabled */ + uint8_t iq_last_desc_type; /* The type of last completed descriptor */ + + /* The index of the Fault Recording Register to be used next. + * Wraps around from N-1 to 0, where N is the number of FRCD_REG. + */ + uint16_t next_frcd_reg; + + uint64_t cap; /* The value of capability reg */ + uint64_t ecap; /* The value of extended capability reg */ + + uint32_t context_cache_gen; /* Should be in [1,MAX] */ + GHashTable *iotlb; /* IOTLB */ + + GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ + VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ + /* list of registered notifiers */ + QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + + /* interrupt remapping */ + bool intr_enabled; /* Whether guest enabled IR */ + dma_addr_t intr_root; /* Interrupt remapping table pointer */ + uint32_t intr_size; /* Number of IR table entries */ + bool intr_eime; /* Extended interrupt mode enabled */ + OnOffAuto intr_eim; /* Toggle for EIM cabability */ + bool buggy_eim; /* Force buggy EIM unless eim=off */ + uint8_t aw_bits; /* Host/IOVA address width (in bits) */ + bool dma_drain; /* Whether DMA r/w draining enabled */ + + /* + * Protects IOMMU states in general. Currently it protects the + * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. + */ + QemuMutex iommu_lock; +}; + +/* Find the VTD Address space associated with the given bus pointer, + * create a new one if none exists + */ +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn); + +#endif diff --git a/include/hw/i386/ioapic.h b/include/hw/i386/ioapic.h new file mode 100644 index 000000000..06bfaaeac --- /dev/null +++ b/include/hw/i386/ioapic.h @@ -0,0 +1,31 @@ +/* + * ioapic.c IOAPIC emulation logic + * + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_IOAPIC_H +#define HW_IOAPIC_H + +#define IOAPIC_NUM_PINS 24 +#define IO_APIC_DEFAULT_ADDRESS 0xfec00000 + +#define TYPE_KVM_IOAPIC "kvm-ioapic" +#define TYPE_IOAPIC "ioapic" + +void ioapic_eoi_broadcast(int vector); + +#endif /* HW_IOAPIC_H */ diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h new file mode 100644 index 000000000..0f9002a2c --- /dev/null +++ b/include/hw/i386/ioapic_internal.h @@ -0,0 +1,118 @@ +/* + * IOAPIC emulation logic - internal interfaces + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2009 Xiantao Zhang, Intel + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QEMU_IOAPIC_INTERNAL_H +#define QEMU_IOAPIC_INTERNAL_H + +#include "exec/memory.h" +#include "hw/sysbus.h" +#include "qemu/notify.h" +#include "qom/object.h" + +#define MAX_IOAPICS 1 + +#define IOAPIC_LVT_DEST_SHIFT 56 +#define IOAPIC_LVT_DEST_IDX_SHIFT 48 +#define IOAPIC_LVT_MASKED_SHIFT 16 +#define IOAPIC_LVT_TRIGGER_MODE_SHIFT 15 +#define IOAPIC_LVT_REMOTE_IRR_SHIFT 14 +#define IOAPIC_LVT_POLARITY_SHIFT 13 +#define IOAPIC_LVT_DELIV_STATUS_SHIFT 12 +#define IOAPIC_LVT_DEST_MODE_SHIFT 11 +#define IOAPIC_LVT_DELIV_MODE_SHIFT 8 + +#define IOAPIC_LVT_MASKED (1 << IOAPIC_LVT_MASKED_SHIFT) +#define IOAPIC_LVT_TRIGGER_MODE (1 << IOAPIC_LVT_TRIGGER_MODE_SHIFT) +#define IOAPIC_LVT_REMOTE_IRR (1 << IOAPIC_LVT_REMOTE_IRR_SHIFT) +#define IOAPIC_LVT_POLARITY (1 << IOAPIC_LVT_POLARITY_SHIFT) +#define IOAPIC_LVT_DELIV_STATUS (1 << IOAPIC_LVT_DELIV_STATUS_SHIFT) +#define IOAPIC_LVT_DEST_MODE (1 << IOAPIC_LVT_DEST_MODE_SHIFT) +#define IOAPIC_LVT_DELIV_MODE (7 << IOAPIC_LVT_DELIV_MODE_SHIFT) + +/* Bits that are read-only for IOAPIC entry */ +#define IOAPIC_RO_BITS (IOAPIC_LVT_REMOTE_IRR | \ + IOAPIC_LVT_DELIV_STATUS) +#define IOAPIC_RW_BITS (~(uint64_t)IOAPIC_RO_BITS) + +#define IOAPIC_TRIGGER_EDGE 0 +#define IOAPIC_TRIGGER_LEVEL 1 + +/*io{apic,sapic} delivery mode*/ +#define IOAPIC_DM_FIXED 0x0 +#define IOAPIC_DM_LOWEST_PRIORITY 0x1 +#define IOAPIC_DM_PMI 0x2 +#define IOAPIC_DM_NMI 0x4 +#define IOAPIC_DM_INIT 0x5 +#define IOAPIC_DM_SIPI 0x6 +#define IOAPIC_DM_EXTINT 0x7 +#define IOAPIC_DM_MASK 0x7 + +#define IOAPIC_VECTOR_MASK 0xff + +#define IOAPIC_IOREGSEL 0x00 +#define IOAPIC_IOWIN 0x10 +#define IOAPIC_EOI 0x40 + +#define IOAPIC_REG_ID 0x00 +#define IOAPIC_REG_VER 0x01 +#define IOAPIC_REG_ARB 0x02 +#define IOAPIC_REG_REDTBL_BASE 0x10 +#define IOAPIC_ID 0x00 + +#define IOAPIC_ID_SHIFT 24 +#define IOAPIC_ID_MASK 0xf + +#define IOAPIC_VER_ENTRIES_SHIFT 16 + + +#define TYPE_IOAPIC_COMMON "ioapic-common" +OBJECT_DECLARE_TYPE(IOAPICCommonState, IOAPICCommonClass, IOAPIC_COMMON) + +struct IOAPICCommonClass { + SysBusDeviceClass parent_class; + + DeviceRealize realize; + DeviceUnrealize unrealize; + void (*pre_save)(IOAPICCommonState *s); + void (*post_load)(IOAPICCommonState *s); +}; + +struct IOAPICCommonState { + SysBusDevice busdev; + MemoryRegion io_memory; + uint8_t id; + uint8_t ioregsel; + uint32_t irr; + uint64_t ioredtbl[IOAPIC_NUM_PINS]; + Notifier machine_done; + uint8_t version; + uint64_t irq_count[IOAPIC_NUM_PINS]; + int irq_level[IOAPIC_NUM_PINS]; + int irq_eoi[IOAPIC_NUM_PINS]; + QEMUTimer *delayed_ioapic_service_timer; +}; + +void ioapic_reset_common(DeviceState *dev); + +void ioapic_print_redtbl(Monitor *mon, IOAPICCommonState *s); +void ioapic_stat_update_irq(IOAPICCommonState *s, int irq, int level); + +#endif /* QEMU_IOAPIC_INTERNAL_H */ diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h new file mode 100644 index 000000000..0fc216007 --- /dev/null +++ b/include/hw/i386/microvm.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Intel Corporation + * Copyright (c) 2019 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_I386_MICROVM_H +#define HW_I386_MICROVM_H + +#include "qemu-common.h" +#include "exec/hwaddr.h" +#include "qemu/notify.h" + +#include "hw/boards.h" +#include "hw/i386/x86.h" +#include "hw/acpi/acpi_dev_interface.h" +#include "hw/pci-host/gpex.h" +#include "qom/object.h" + +/* + * IRQ | pc | microvm (acpi=on) + * --------+------------+------------------ + * 0 | pit | + * 1 | kbd | + * 2 | cascade | + * 3 | serial 1 | + * 4 | serial 0 | serial + * 5 | - | + * 6 | floppy | + * 7 | parallel | + * 8 | rtc | rtc (rtc=on) + * 9 | acpi | acpi (ged) + * 10 | pci lnk | xhci (usb=on) + * 11 | pci lnk | + * 12 | ps2 | pcie + * 13 | fpu | pcie + * 14 | ide 0 | pcie + * 15 | ide 1 | pcie + * 16-23 | pci gsi | virtio + */ + +/* Platform virtio definitions */ +#define VIRTIO_MMIO_BASE 0xfeb00000 +#define VIRTIO_NUM_TRANSPORTS 8 +#define VIRTIO_CMDLINE_MAXLEN 64 + +#define GED_MMIO_BASE 0xfea00000 +#define GED_MMIO_BASE_MEMHP (GED_MMIO_BASE + 0x100) +#define GED_MMIO_BASE_REGS (GED_MMIO_BASE + 0x200) +#define GED_MMIO_IRQ 9 + +#define MICROVM_XHCI_BASE 0xfe900000 +#define MICROVM_XHCI_IRQ 10 + +#define PCIE_MMIO_BASE 0xc0000000 +#define PCIE_MMIO_SIZE 0x20000000 +#define PCIE_ECAM_BASE 0xe0000000 +#define PCIE_ECAM_SIZE 0x10000000 +#define PCIE_IRQ_BASE 12 + +/* Machine type options */ +#define MICROVM_MACHINE_PIT "pit" +#define MICROVM_MACHINE_PIC "pic" +#define MICROVM_MACHINE_RTC "rtc" +#define MICROVM_MACHINE_PCIE "pcie" +#define MICROVM_MACHINE_ISA_SERIAL "isa-serial" +#define MICROVM_MACHINE_OPTION_ROMS "x-option-roms" +#define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline" + +struct MicrovmMachineClass { + X86MachineClass parent; + HotplugHandler *(*orig_hotplug_handler)(MachineState *machine, + DeviceState *dev); +}; + +struct MicrovmMachineState { + X86MachineState parent; + + /* Machine type options */ + OnOffAuto pic; + OnOffAuto pit; + OnOffAuto rtc; + OnOffAuto pcie; + bool isa_serial; + bool option_roms; + bool auto_kernel_cmdline; + + /* Machine state */ + uint32_t virtio_irq_base; + bool kernel_cmdline_fixed; + Notifier machine_done; + Notifier powerdown_req; + struct GPEXConfig gpex; +}; + +#define TYPE_MICROVM_MACHINE MACHINE_TYPE_NAME("microvm") +OBJECT_DECLARE_TYPE(MicrovmMachineState, MicrovmMachineClass, MICROVM_MACHINE) + +#endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h new file mode 100644 index 000000000..7f8e1a791 --- /dev/null +++ b/include/hw/i386/pc.h @@ -0,0 +1,294 @@ +#ifndef HW_PC_H +#define HW_PC_H + +#include "qemu/notify.h" +#include "qapi/qapi-types-common.h" +#include "hw/boards.h" +#include "hw/block/fdc.h" +#include "hw/block/flash.h" +#include "hw/i386/x86.h" + +#include "hw/acpi/acpi_dev_interface.h" +#include "hw/hotplug.h" +#include "qom/object.h" + +#define HPET_INTCAP "hpet-intcap" + +/** + * PCMachineState: + * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling + * @boot_cpus: number of present VCPUs + * @smp_dies: number of dies per one package + */ +typedef struct PCMachineState { + /*< private >*/ + X86MachineState parent_obj; + + /* */ + + /* State for other subsystems/APIs: */ + Notifier machine_done; + + /* Pointers to devices and objects: */ + PCIBus *bus; + I2CBus *smbus; + PFlashCFI01 *flash[2]; + ISADevice *pcspk; + + /* Configuration options: */ + uint64_t max_ram_below_4g; + OnOffAuto vmport; + + bool acpi_build_enabled; + bool smbus_enabled; + bool sata_enabled; + bool pit_enabled; + bool hpet_enabled; + + /* NUMA information: */ + uint64_t numa_nodes; + uint64_t *node_mem; + + /* ACPI Memory hotplug IO base address */ + hwaddr memhp_io_base; +} PCMachineState; + +#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" +#define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" +#define PC_MACHINE_VMPORT "vmport" +#define PC_MACHINE_SMBUS "smbus" +#define PC_MACHINE_SATA "sata" +#define PC_MACHINE_PIT "pit" + +/** + * PCMachineClass: + * + * Compat fields: + * + * @enforce_aligned_dimm: check that DIMM's address/size is aligned by + * backend's alignment value if provided + * @acpi_data_size: Size of the chunk of memory at the top of RAM + * for the BIOS ACPI tables and other BIOS + * datastructures. + * @gigabyte_align: Make sure that guest addresses aligned at + * 1Gbyte boundaries get mapped to host + * addresses aligned at 1Gbyte boundaries. This + * way we can use 1GByte pages in the host. + * + */ +struct PCMachineClass { + /*< private >*/ + X86MachineClass parent_class; + + /*< public >*/ + + /* Device configuration: */ + bool pci_enabled; + bool kvmclock_enabled; + const char *default_nic_model; + + /* Compat options: */ + + /* Default CPU model version. See x86_cpu_set_default_version(). */ + int default_cpu_version; + + /* ACPI compat: */ + bool has_acpi_build; + bool rsdp_in_ram; + int legacy_acpi_table_size; + unsigned acpi_data_size; + bool do_not_add_smb_acpi; + int pci_root_uid; + + /* SMBIOS compat: */ + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; + + /* RAM / address space compat: */ + bool gigabyte_align; + bool has_reserved_memory; + bool enforce_aligned_dimm; + bool broken_reserved_end; + + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; + + /* use DMA capable linuxboot option rom */ + bool linuxboot_dma_enabled; + + /* use PVH to load kernels that support this feature */ + bool pvh_enabled; + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; +}; + +#define TYPE_PC_MACHINE "generic-pc-machine" +OBJECT_DECLARE_TYPE(PCMachineState, PCMachineClass, PC_MACHINE) + +/* ioapic.c */ + +GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled); + +/* pc.c */ +extern int fd_bootchk; + +void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + +void pc_smp_parse(MachineState *ms, QemuOpts *opts); + +void pc_guest_info_init(PCMachineState *pcms); + +#define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" +#define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end" +#define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start" +#define PCI_HOST_PROP_PCI_HOLE64_END "pci-hole64-end" +#define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" +#define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" +#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" + + +void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, + MemoryRegion *pci_address_space); + +void xen_load_linux(PCMachineState *pcms); +void pc_memory_init(PCMachineState *pcms, + MemoryRegion *system_memory, + MemoryRegion *rom_memory, + MemoryRegion **ram_memory); +uint64_t pc_pci_hole64_start(void); +DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); +void pc_basic_device_init(struct PCMachineState *pcms, + ISABus *isa_bus, qemu_irq *gsi, + ISADevice **rtc_state, + bool create_fdctrl, + uint32_t hpet_irqs); +void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd); +void pc_cmos_init(PCMachineState *pcms, + BusState *ide0, BusState *ide1, + ISADevice *s); +void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus); +void pc_pci_device_init(PCIBus *pci_bus); + +typedef void (*cpu_set_smm_t)(int smm, void *arg); + +void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs); + +ISADevice *pc_find_fdc0(void); + +/* port92.c */ +#define PORT92_A20_LINE "a20" + +#define TYPE_PORT92 "port92" + +/* pc_sysfw.c */ +void pc_system_flash_create(PCMachineState *pcms); +void pc_system_flash_cleanup_unused(PCMachineState *pcms); +void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); + +/* acpi-build.c */ +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *apic_ids, GArray *entry); + +extern GlobalProperty pc_compat_5_1[]; +extern const size_t pc_compat_5_1_len; + +extern GlobalProperty pc_compat_5_0[]; +extern const size_t pc_compat_5_0_len; + +extern GlobalProperty pc_compat_4_2[]; +extern const size_t pc_compat_4_2_len; + +extern GlobalProperty pc_compat_4_1[]; +extern const size_t pc_compat_4_1_len; + +extern GlobalProperty pc_compat_4_0[]; +extern const size_t pc_compat_4_0_len; + +extern GlobalProperty pc_compat_3_1[]; +extern const size_t pc_compat_3_1_len; + +extern GlobalProperty pc_compat_3_0[]; +extern const size_t pc_compat_3_0_len; + +extern GlobalProperty pc_compat_2_12[]; +extern const size_t pc_compat_2_12_len; + +extern GlobalProperty pc_compat_2_11[]; +extern const size_t pc_compat_2_11_len; + +extern GlobalProperty pc_compat_2_10[]; +extern const size_t pc_compat_2_10_len; + +extern GlobalProperty pc_compat_2_9[]; +extern const size_t pc_compat_2_9_len; + +extern GlobalProperty pc_compat_2_8[]; +extern const size_t pc_compat_2_8_len; + +extern GlobalProperty pc_compat_2_7[]; +extern const size_t pc_compat_2_7_len; + +extern GlobalProperty pc_compat_2_6[]; +extern const size_t pc_compat_2_6_len; + +extern GlobalProperty pc_compat_2_5[]; +extern const size_t pc_compat_2_5_len; + +extern GlobalProperty pc_compat_2_4[]; +extern const size_t pc_compat_2_4_len; + +extern GlobalProperty pc_compat_2_3[]; +extern const size_t pc_compat_2_3_len; + +extern GlobalProperty pc_compat_2_2[]; +extern const size_t pc_compat_2_2_len; + +extern GlobalProperty pc_compat_2_1[]; +extern const size_t pc_compat_2_1_len; + +extern GlobalProperty pc_compat_2_0[]; +extern const size_t pc_compat_2_0_len; + +extern GlobalProperty pc_compat_1_7[]; +extern const size_t pc_compat_1_7_len; + +extern GlobalProperty pc_compat_1_6[]; +extern const size_t pc_compat_1_6_len; + +extern GlobalProperty pc_compat_1_5[]; +extern const size_t pc_compat_1_5_len; + +extern GlobalProperty pc_compat_1_4[]; +extern const size_t pc_compat_1_4_len; + +/* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +#define PC_CPU_MODEL_IDS(v) \ + { "qemu32-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ + { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ + { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, + +#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ + static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + optsfn(mc); \ + mc->init = initfn; \ + } \ + static const TypeInfo pc_machine_type_##suffix = { \ + .name = namestr TYPE_MACHINE_SUFFIX, \ + .parent = TYPE_PC_MACHINE, \ + .class_init = pc_machine_##suffix##_class_init, \ + }; \ + static void pc_machine_init_##suffix(void) \ + { \ + type_register(&pc_machine_type_##suffix); \ + } \ + type_init(pc_machine_init_##suffix) + +extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); +#endif diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h new file mode 100644 index 000000000..81573f6cf --- /dev/null +++ b/include/hw/i386/topology.h @@ -0,0 +1,170 @@ +/* + * x86 CPU topology data structures and functions + * + * Copyright (c) 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef HW_I386_TOPOLOGY_H +#define HW_I386_TOPOLOGY_H + +/* This file implements the APIC-ID-based CPU topology enumeration logic, + * documented at the following document: + * Intel® 64 Architecture Processor Topology Enumeration + * http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/ + * + * This code should be compatible with AMD's "Extended Method" described at: + * AMD CPUID Specification (Publication #25481) + * Section 3: Multiple Core Calcuation + * as long as: + * nr_threads is set to 1; + * OFFSET_IDX is assumed to be 0; + * CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width(). + */ + + +#include "qemu/bitops.h" + +/* APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support + */ +typedef uint32_t apic_id_t; + +typedef struct X86CPUTopoIDs { + unsigned pkg_id; + unsigned die_id; + unsigned core_id; + unsigned smt_id; +} X86CPUTopoIDs; + +typedef struct X86CPUTopoInfo { + unsigned dies_per_pkg; + unsigned cores_per_die; + unsigned threads_per_core; +} X86CPUTopoInfo; + +/* Return the bit width needed for 'count' IDs + */ +static unsigned apicid_bitwidth_for_count(unsigned count) +{ + g_assert(count >= 1); + count -= 1; + return count ? 32 - clz32(count) : 0; +} + +/* Bit width of the SMT_ID (thread ID) field on the APIC ID + */ +static inline unsigned apicid_smt_width(X86CPUTopoInfo *topo_info) +{ + return apicid_bitwidth_for_count(topo_info->threads_per_core); +} + +/* Bit width of the Core_ID field + */ +static inline unsigned apicid_core_width(X86CPUTopoInfo *topo_info) +{ + return apicid_bitwidth_for_count(topo_info->cores_per_die); +} + +/* Bit width of the Die_ID field */ +static inline unsigned apicid_die_width(X86CPUTopoInfo *topo_info) +{ + return apicid_bitwidth_for_count(topo_info->dies_per_pkg); +} + +/* Bit offset of the Core_ID field + */ +static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) +{ + return apicid_smt_width(topo_info); +} + +/* Bit offset of the Die_ID field */ +static inline unsigned apicid_die_offset(X86CPUTopoInfo *topo_info) +{ + return apicid_core_offset(topo_info) + apicid_core_width(topo_info); +} + +/* Bit offset of the Pkg_ID (socket ID) field + */ +static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info) +{ + return apicid_die_offset(topo_info) + apicid_die_width(topo_info); +} + +/* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID + * + * The caller must make sure core_id < nr_cores and smt_id < nr_threads. + */ +static inline apic_id_t x86_apicid_from_topo_ids(X86CPUTopoInfo *topo_info, + const X86CPUTopoIDs *topo_ids) +{ + return (topo_ids->pkg_id << apicid_pkg_offset(topo_info)) | + (topo_ids->die_id << apicid_die_offset(topo_info)) | + (topo_ids->core_id << apicid_core_offset(topo_info)) | + topo_ids->smt_id; +} + +/* Calculate thread/core/package IDs for a specific topology, + * based on (contiguous) CPU index + */ +static inline void x86_topo_ids_from_idx(X86CPUTopoInfo *topo_info, + unsigned cpu_index, + X86CPUTopoIDs *topo_ids) +{ + unsigned nr_dies = topo_info->dies_per_pkg; + unsigned nr_cores = topo_info->cores_per_die; + unsigned nr_threads = topo_info->threads_per_core; + + topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); + topo_ids->die_id = cpu_index / (nr_cores * nr_threads) % nr_dies; + topo_ids->core_id = cpu_index / nr_threads % nr_cores; + topo_ids->smt_id = cpu_index % nr_threads; +} + +/* Calculate thread/core/package IDs for a specific topology, + * based on APIC ID + */ +static inline void x86_topo_ids_from_apicid(apic_id_t apicid, + X86CPUTopoInfo *topo_info, + X86CPUTopoIDs *topo_ids) +{ + topo_ids->smt_id = apicid & + ~(0xFFFFFFFFUL << apicid_smt_width(topo_info)); + topo_ids->core_id = + (apicid >> apicid_core_offset(topo_info)) & + ~(0xFFFFFFFFUL << apicid_core_width(topo_info)); + topo_ids->die_id = + (apicid >> apicid_die_offset(topo_info)) & + ~(0xFFFFFFFFUL << apicid_die_width(topo_info)); + topo_ids->pkg_id = apicid >> apicid_pkg_offset(topo_info); +} + +/* Make APIC ID for the CPU 'cpu_index' + * + * 'cpu_index' is a sequential, contiguous ID for the CPU. + */ +static inline apic_id_t x86_apicid_from_cpu_idx(X86CPUTopoInfo *topo_info, + unsigned cpu_index) +{ + X86CPUTopoIDs topo_ids; + x86_topo_ids_from_idx(topo_info, cpu_index, &topo_ids); + return x86_apicid_from_topo_ids(topo_info, &topo_ids); +} + +#endif /* HW_I386_TOPOLOGY_H */ diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h new file mode 100644 index 000000000..8f5e27c6f --- /dev/null +++ b/include/hw/i386/vmport.h @@ -0,0 +1,28 @@ +#ifndef HW_VMPORT_H +#define HW_VMPORT_H + +#include "hw/isa/isa.h" + +#define TYPE_VMPORT "vmport" +typedef uint32_t VMPortReadFunc(void *opaque, uint32_t address); + +typedef enum { + VMPORT_CMD_GETVERSION = 10, + VMPORT_CMD_GETBIOSUUID = 19, + VMPORT_CMD_GETRAMSIZE = 20, + VMPORT_CMD_VMMOUSE_DATA = 39, + VMPORT_CMD_VMMOUSE_STATUS = 40, + VMPORT_CMD_VMMOUSE_COMMAND = 41, + VMPORT_CMD_GETHZ = 45, + VMPORT_CMD_GET_VCPU_INFO = 68, + VMPORT_ENTRIES +} VMPortCommand; + +static inline void vmport_init(ISABus *bus) +{ + isa_create_simple(bus, TYPE_VMPORT); +} + +void vmport_register(VMPortCommand command, VMPortReadFunc *func, void *opaque); + +#endif diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h new file mode 100644 index 000000000..9de92d33a --- /dev/null +++ b/include/hw/i386/x86-iommu.h @@ -0,0 +1,175 @@ +/* + * Common IOMMU interface for X86 platform + * + * Copyright (C) 2016 Peter Xu, Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_I386_X86_IOMMU_H +#define HW_I386_X86_IOMMU_H + +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/pci/msi.h" +#include "qom/object.h" + +#define TYPE_X86_IOMMU_DEVICE ("x86-iommu") +OBJECT_DECLARE_TYPE(X86IOMMUState, X86IOMMUClass, X86_IOMMU_DEVICE) + +#define X86_IOMMU_SID_INVALID (0xffff) + +typedef struct X86IOMMUIrq X86IOMMUIrq; +typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage; + +typedef enum IommuType { + TYPE_INTEL, + TYPE_AMD, + TYPE_NONE +} IommuType; + +struct X86IOMMUClass { + SysBusDeviceClass parent; + /* Intel/AMD specific realize() hook */ + DeviceRealize realize; + /* MSI-based interrupt remapping */ + int (*int_remap)(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid); +}; + +/** + * iec_notify_fn - IEC (Interrupt Entry Cache) notifier hook, + * triggered when IR invalidation happens. + * @private: private data + * @global: whether this is a global IEC invalidation + * @index: IRTE index to invalidate (start from) + * @mask: invalidation mask + */ +typedef void (*iec_notify_fn)(void *private, bool global, + uint32_t index, uint32_t mask); + +struct IEC_Notifier { + iec_notify_fn iec_notify; + void *private; + QLIST_ENTRY(IEC_Notifier) list; +}; +typedef struct IEC_Notifier IEC_Notifier; + +struct X86IOMMUState { + SysBusDevice busdev; + OnOffAuto intr_supported; /* Whether vIOMMU supports IR */ + bool dt_supported; /* Whether vIOMMU supports DT */ + bool pt_supported; /* Whether vIOMMU supports pass-through */ + IommuType type; /* IOMMU type - AMD/Intel */ + QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ +}; + +bool x86_iommu_ir_supported(X86IOMMUState *s); + +/* Generic IRQ entry information when interrupt remapping is enabled */ +struct X86IOMMUIrq { + /* Used by both IOAPIC/MSI interrupt remapping */ + uint8_t trigger_mode; + uint8_t vector; + uint8_t delivery_mode; + uint32_t dest; + uint8_t dest_mode; + + /* only used by MSI interrupt remapping */ + uint8_t redir_hint; + uint8_t msi_addr_last_bits; +}; + +struct X86IOMMU_MSIMessage { + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __addr_head:12; /* 0xfee */ + uint32_t dest:8; + uint32_t __reserved:8; + uint32_t redir_hint:1; + uint32_t dest_mode:1; + uint32_t __not_used:2; +#else + uint32_t __not_used:2; + uint32_t dest_mode:1; + uint32_t redir_hint:1; + uint32_t __reserved:8; + uint32_t dest:8; + uint32_t __addr_head:12; /* 0xfee */ +#endif + uint32_t __addr_hi; + } QEMU_PACKED; + uint64_t msi_addr; + }; + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint16_t trigger_mode:1; + uint16_t level:1; + uint16_t __resved:3; + uint16_t delivery_mode:3; + uint16_t vector:8; +#else + uint16_t vector:8; + uint16_t delivery_mode:3; + uint16_t __resved:3; + uint16_t level:1; + uint16_t trigger_mode:1; +#endif + uint16_t __resved1; + } QEMU_PACKED; + uint32_t msi_data; + }; +}; + +/** + * x86_iommu_get_default - get default IOMMU device + * @return: pointer to default IOMMU device + */ +X86IOMMUState *x86_iommu_get_default(void); + +/* + * x86_iommu_get_type - get IOMMU type + */ +IommuType x86_iommu_get_type(void); + +/** + * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry + * Cache) notifiers + * @iommu: IOMMU device to register + * @fn: IEC notifier hook function + * @data: notifier private data + */ +void x86_iommu_iec_register_notifier(X86IOMMUState *iommu, + iec_notify_fn fn, void *data); + +/** + * x86_iommu_iec_notify_all - Notify IEC invalidations + * @iommu: IOMMU device that sends the notification + * @global: whether this is a global invalidation. If true, @index + * and @mask are undefined. + * @index: starting index of interrupt entry to invalidate + * @mask: index mask for the invalidation + */ +void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global, + uint32_t index, uint32_t mask); + +/** + * x86_iommu_irq_to_msi_message - Populate one MSIMessage from X86IOMMUIrq + * @X86IOMMUIrq: The IRQ information + * @out: Output MSI message + */ +void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *out); +#endif diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h new file mode 100644 index 000000000..739fac508 --- /dev/null +++ b/include/hw/i386/x86.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_I386_X86_H +#define HW_I386_X86_H + +#include "qemu-common.h" +#include "exec/hwaddr.h" +#include "qemu/notify.h" + +#include "hw/i386/topology.h" +#include "hw/boards.h" +#include "hw/nmi.h" +#include "hw/isa/isa.h" +#include "hw/i386/ioapic.h" +#include "qom/object.h" + +struct X86MachineClass { + /*< private >*/ + MachineClass parent; + + /*< public >*/ + + /* TSC rate migration: */ + bool save_tsc_khz; + /* Enables contiguous-apic-ID mode */ + bool compat_apic_id_mode; +}; + +struct X86MachineState { + /*< private >*/ + MachineState parent; + + /*< public >*/ + + /* Pointers to devices and objects: */ + ISADevice *rtc; + FWCfgState *fw_cfg; + qemu_irq *gsi; + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + + /* CPU and apic information: */ + bool apic_xrupt_override; + unsigned pci_irq_mask; + unsigned apic_id_limit; + uint16_t boot_cpus; + unsigned smp_dies; + + OnOffAuto smm; + OnOffAuto acpi; + + /* + * Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. + */ + AddressSpace *ioapic_as; +}; + +#define X86_MACHINE_SMM "smm" +#define X86_MACHINE_ACPI "acpi" + +#define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") +OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) + +void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); + +uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, + unsigned int cpu_index); + +void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); +void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, + unsigned cpu_index); +int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); +CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); +void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); +void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void x86_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw); + +void x86_load_linux(X86MachineState *x86ms, + FWCfgState *fw_cfg, + int acpi_data_size, + bool pvh_enabled, + bool linuxboot_dma_enabled); + +bool x86_machine_is_smm_enabled(const X86MachineState *x86ms); +bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms); + +/* Global System Interrupts */ + +#define GSI_NUM_PINS IOAPIC_NUM_PINS +#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) + +typedef struct GSIState { + qemu_irq i8259_irq[ISA_NUM_IRQS]; + qemu_irq ioapic_irq[IOAPIC_NUM_PINS]; +} GSIState; + +qemu_irq x86_allocate_cpu_irq(void); +void gsi_handler(void *opaque, int n, int level); +void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); + +#endif diff --git a/include/hw/ide.h b/include/hw/ide.h new file mode 100644 index 000000000..c5ce5da4f --- /dev/null +++ b/include/hw/ide.h @@ -0,0 +1,24 @@ +#ifndef HW_IDE_H +#define HW_IDE_H + +#include "hw/isa/isa.h" +#include "exec/memory.h" + +/* ide-isa.c */ +ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq, + DriveInfo *hd0, DriveInfo *hd1); + +/* ide-pci.c */ +int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux); + +/* ide-mmio.c */ +void mmio_ide_init_drives(DeviceState *dev, DriveInfo *hd0, DriveInfo *hd1); + +int ide_get_geometry(BusState *bus, int unit, + int16_t *cyls, int8_t *heads, int8_t *secs); +int ide_get_bios_chs_trans(BusState *bus, int unit); + +/* ide/core.c */ +void ide_drive_get(DriveInfo **hd, int max_bus); + +#endif /* HW_IDE_H */ diff --git a/include/hw/ide/ahci.h b/include/hw/ide/ahci.h new file mode 100644 index 000000000..210e5e734 --- /dev/null +++ b/include/hw/ide/ahci.h @@ -0,0 +1,88 @@ +/* + * QEMU AHCI Emulation + * + * Copyright (c) 2010 qiaochong@loongson.cn + * Copyright (c) 2010 Roland Elek + * Copyright (c) 2010 Sebastian Herbszt + * Copyright (c) 2010 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef HW_IDE_AHCI_H +#define HW_IDE_AHCI_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +typedef struct AHCIDevice AHCIDevice; + +typedef struct AHCIControlRegs { + uint32_t cap; + uint32_t ghc; + uint32_t irqstatus; + uint32_t impl; + uint32_t version; +} AHCIControlRegs; + +typedef struct AHCIState { + DeviceState *container; + + AHCIDevice *dev; + AHCIControlRegs control_regs; + MemoryRegion mem; + MemoryRegion idp; /* Index-Data Pair I/O port space */ + unsigned idp_offset; /* Offset of index in I/O port space */ + uint32_t idp_index; /* Current IDP index */ + int32_t ports; + qemu_irq irq; + AddressSpace *as; +} AHCIState; + + +#define TYPE_ICH9_AHCI "ich9-ahci" +OBJECT_DECLARE_SIMPLE_TYPE(AHCIPCIState, ICH9_AHCI) + +int32_t ahci_get_num_ports(PCIDevice *dev); +void ahci_ide_create_devs(PCIDevice *dev, DriveInfo **hd); + +#define TYPE_SYSBUS_AHCI "sysbus-ahci" +OBJECT_DECLARE_SIMPLE_TYPE(SysbusAHCIState, SYSBUS_AHCI) + +struct SysbusAHCIState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + AHCIState ahci; + uint32_t num_ports; +}; + +#define TYPE_ALLWINNER_AHCI "allwinner-ahci" +OBJECT_DECLARE_SIMPLE_TYPE(AllwinnerAHCIState, ALLWINNER_AHCI) + +#define ALLWINNER_AHCI_MMIO_OFF 0x80 +#define ALLWINNER_AHCI_MMIO_SIZE 0x80 + +struct AllwinnerAHCIState { + /*< private >*/ + SysbusAHCIState parent_obj; + /*< public >*/ + + MemoryRegion mmio; + uint32_t regs[ALLWINNER_AHCI_MMIO_SIZE/4]; +}; + +#endif /* HW_IDE_AHCI_H */ diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h new file mode 100644 index 000000000..2d09162ee --- /dev/null +++ b/include/hw/ide/internal.h @@ -0,0 +1,657 @@ +#ifndef HW_IDE_INTERNAL_H +#define HW_IDE_INTERNAL_H + +/* + * QEMU IDE Emulation -- internal header file + * only files in hw/ide/ are supposed to include this file. + * non-internal declarations are in hw/ide.h + */ + +#include "qapi/qapi-types-run-state.h" +#include "hw/ide.h" +#include "hw/irq.h" +#include "hw/isa/isa.h" +#include "sysemu/dma.h" +#include "hw/block/block.h" +#include "scsi/constants.h" + +/* debug IDE devices */ +#define USE_DMA_CDROM +#include "qom/object.h" + +typedef struct IDEDevice IDEDevice; +typedef struct IDEState IDEState; +typedef struct IDEDMA IDEDMA; +typedef struct IDEDMAOps IDEDMAOps; + +#define TYPE_IDE_BUS "IDE" +OBJECT_DECLARE_SIMPLE_TYPE(IDEBus, IDE_BUS) + +#define MAX_IDE_DEVS 2 + +/* Device/Head ("select") Register */ +#define ATA_DEV_SELECT 0x10 +/* ATA1,3: Defined as '1'. + * ATA2: Reserved. + * ATA3-7: obsolete. */ +#define ATA_DEV_ALWAYS_ON 0xA0 +#define ATA_DEV_LBA 0x40 +#define ATA_DEV_LBA_MSB 0x0F /* LBA 24:27 */ +#define ATA_DEV_HS 0x0F /* HS 3:0 */ + + +/* Bits of HD_STATUS */ +#define ERR_STAT 0x01 +#define INDEX_STAT 0x02 +#define ECC_STAT 0x04 /* Corrected error */ +#define DRQ_STAT 0x08 +#define SEEK_STAT 0x10 +#define SRV_STAT 0x10 +#define WRERR_STAT 0x20 +#define READY_STAT 0x40 +#define BUSY_STAT 0x80 + +/* Bits for HD_ERROR */ +#define MARK_ERR 0x01 /* Bad address mark */ +#define TRK0_ERR 0x02 /* couldn't find track 0 */ +#define ABRT_ERR 0x04 /* Command aborted */ +#define MCR_ERR 0x08 /* media change request */ +#define ID_ERR 0x10 /* ID field not found */ +#define MC_ERR 0x20 /* media changed */ +#define ECC_ERR 0x40 /* Uncorrectable ECC error */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ + +/* Bits of HD_NSECTOR */ +#define CD 0x01 +#define IO 0x02 +#define REL 0x04 +#define TAG_MASK 0xf8 + +/* Bits of Device Control register */ +#define IDE_CTRL_HOB 0x80 +#define IDE_CTRL_RESET 0x04 +#define IDE_CTRL_DISABLE_IRQ 0x02 + +/* ACS-2 T13/2015-D Table B.2 Command codes */ +#define WIN_NOP 0x00 +/* reserved 0x01..0x02 */ +#define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */ +/* reserved 0x04..0x05 */ +#define WIN_DSM 0x06 +/* reserved 0x07 */ +#define WIN_DEVICE_RESET 0x08 +/* reserved 0x09..0x0a */ +/* REQUEST SENSE DATA EXT 0x0B */ +/* reserved 0x0C..0x0F */ +#define WIN_RECAL 0x10 /* obsolete since ATA4 */ +/* obsolete since ATA3, retired in ATA4 0x11..0x1F */ +#define WIN_READ 0x20 /* 28-Bit */ +#define WIN_READ_ONCE 0x21 /* 28-Bit w/o retries, obsolete since ATA5 */ +/* obsolete since ATA4 0x22..0x23 */ +#define WIN_READ_EXT 0x24 /* 48-Bit */ +#define WIN_READDMA_EXT 0x25 /* 48-Bit */ +#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit, obsolete since ACS2 */ +#define WIN_READ_NATIVE_MAX_EXT 0x27 /* 48-Bit */ +/* reserved 0x28 */ +#define WIN_MULTREAD_EXT 0x29 /* 48-Bit */ +/* READ STREAM DMA EXT 0x2A */ +/* READ STREAM EXT 0x2B */ +/* reserved 0x2C..0x2E */ +/* READ LOG EXT 0x2F */ +#define WIN_WRITE 0x30 /* 28-Bit */ +#define WIN_WRITE_ONCE 0x31 /* 28-Bit w/o retries, obsolete since ATA5 */ +/* obsolete since ATA4 0x32..0x33 */ +#define WIN_WRITE_EXT 0x34 /* 48-Bit */ +#define WIN_WRITEDMA_EXT 0x35 /* 48-Bit */ +#define WIN_WRITEDMA_QUEUED_EXT 0x36 /* 48-Bit */ +#define WIN_SET_MAX_EXT 0x37 /* 48-Bit, obsolete since ACS2 */ +#define WIN_SET_MAX_EXT 0x37 /* 48-Bit */ +#define CFA_WRITE_SECT_WO_ERASE 0x38 /* CFA Write Sectors without erase */ +#define WIN_MULTWRITE_EXT 0x39 /* 48-Bit */ +/* WRITE STREAM DMA EXT 0x3A */ +/* WRITE STREAM EXT 0x3B */ +#define WIN_WRITE_VERIFY 0x3C /* 28-Bit, obsolete since ATA4 */ +/* WRITE DMA FUA EXT 0x3D */ +/* obsolete since ACS2 0x3E */ +/* WRITE LOG EXT 0x3F */ +#define WIN_VERIFY 0x40 /* 28-Bit - Read Verify Sectors */ +#define WIN_VERIFY_ONCE 0x41 /* 28-Bit - w/o retries, obsolete since ATA5 */ +#define WIN_VERIFY_EXT 0x42 /* 48-Bit */ +/* reserved 0x43..0x44 */ +/* WRITE UNCORRECTABLE EXT 0x45 */ +/* reserved 0x46 */ +/* READ LOG DMA EXT 0x47 */ +/* reserved 0x48..0x4F */ +/* obsolete since ATA4 0x50 */ +/* CONFIGURE STREAM 0x51 */ +/* reserved 0x52..0x56 */ +/* WRITE LOG DMA EXT 0x57 */ +/* reserved 0x58..0x5A */ +/* TRUSTED NON DATA 0x5B */ +/* TRUSTED RECEIVE 0x5C */ +/* TRUSTED RECEIVE DMA 0x5D */ +/* TRUSTED SEND 0x5E */ +/* TRUSTED SEND DMA 0x5F */ +/* READ FPDMA QUEUED 0x60 */ +/* WRITE FPDMA QUEUED 0x61 */ +/* reserved 0x62->0x6F */ +#define WIN_SEEK 0x70 /* obsolete since ATA7 */ +/* reserved 0x71-0x7F */ +/* vendor specific 0x80-0x86 */ +#define CFA_TRANSLATE_SECTOR 0x87 /* CFA Translate Sector */ +/* vendor specific 0x88-0x8F */ +#define WIN_DIAGNOSE 0x90 +#define WIN_SPECIFY 0x91 /* set drive geometry translation, obsolete since ATA6 */ +#define WIN_DOWNLOAD_MICROCODE 0x92 +/* DOWNLOAD MICROCODE DMA 0x93 */ +#define WIN_STANDBYNOW2 0x94 /* retired in ATA4 */ +#define WIN_IDLEIMMEDIATE2 0x95 /* force drive to become "ready", retired in ATA4 */ +#define WIN_STANDBY2 0x96 /* retired in ATA4 */ +#define WIN_SETIDLE2 0x97 /* retired in ATA4 */ +#define WIN_CHECKPOWERMODE2 0x98 /* retired in ATA4 */ +#define WIN_SLEEPNOW2 0x99 /* retired in ATA4 */ +/* vendor specific 0x9A */ +/* reserved 0x9B..0x9F */ +#define WIN_PACKETCMD 0xA0 /* Send a packet command. */ +#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */ +#define WIN_QUEUED_SERVICE 0xA2 /* obsolete since ACS2 */ +/* reserved 0xA3..0xAF */ +#define WIN_SMART 0xB0 /* self-monitoring and reporting */ +/* Device Configuration Overlay 0xB1 */ +/* reserved 0xB2..0xB3 */ +/* Sanitize Device 0xB4 */ +/* reserved 0xB5 */ +/* NV Cache 0xB6 */ +/* reserved for CFA 0xB7..0xBB */ +#define CFA_ACCESS_METADATA_STORAGE 0xB8 +/* reserved 0xBC..0xBF */ +#define CFA_ERASE_SECTORS 0xC0 /* microdrives implement as NOP */ +/* vendor specific 0xC1..0xC3 */ +#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode*/ +#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */ +#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */ +#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers, obsolete since ACS2 */ +#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */ +#define WIN_READDMA_ONCE 0xC9 /* 28-Bit - w/o retries, obsolete since ATA5 */ +#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */ +#define WIN_WRITEDMA_ONCE 0xCB /* 28-Bit - w/o retries, obsolete since ATA5 */ +#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers, obsolete since ACS2 */ +#define CFA_WRITE_MULTI_WO_ERASE 0xCD /* CFA Write multiple without erase */ +/* WRITE MULTIPLE FUA EXT 0xCE */ +/* reserved 0xCF..0xDO */ +/* CHECK MEDIA CARD TYPE 0xD1 */ +/* reserved for media card pass through 0xD2..0xD4 */ +/* reserved 0xD5..0xD9 */ +#define WIN_GETMEDIASTATUS 0xDA /* obsolete since ATA8 */ +/* obsolete since ATA3, retired in ATA4 0xDB..0xDD */ +#define WIN_DOORLOCK 0xDE /* lock door on removable drives, obsolete since ATA8 */ +#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives, obsolete since ATA8 */ +#define WIN_STANDBYNOW1 0xE0 +#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */ +#define WIN_STANDBY 0xE2 /* Set device in Standby Mode */ +#define WIN_SETIDLE1 0xE3 +#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */ +#define WIN_CHECKPOWERMODE1 0xE5 +#define WIN_SLEEPNOW1 0xE6 +#define WIN_FLUSH_CACHE 0xE7 +#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */ +/* READ BUFFER DMA 0xE9 */ +#define WIN_FLUSH_CACHE_EXT 0xEA /* 48-Bit */ +/* WRITE BUFFER DMA 0xEB */ +#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */ +#define WIN_MEDIAEJECT 0xED /* obsolete since ATA8 */ +/* obsolete since ATA4 0xEE */ +#define WIN_SETFEATURES 0xEF /* set special drive features */ +#define IBM_SENSE_CONDITION 0xF0 /* measure disk temperature, vendor specific */ +#define WIN_SECURITY_SET_PASS 0xF1 +#define WIN_SECURITY_UNLOCK 0xF2 +#define WIN_SECURITY_ERASE_PREPARE 0xF3 +#define WIN_SECURITY_ERASE_UNIT 0xF4 +#define WIN_SECURITY_FREEZE_LOCK 0xF5 +#define CFA_WEAR_LEVEL 0xF5 /* microdrives implement as NOP; not specified in T13! */ +#define WIN_SECURITY_DISABLE 0xF6 +/* vendor specific 0xF7 */ +#define WIN_READ_NATIVE_MAX 0xF8 /* return the native maximum address */ +#define WIN_SET_MAX 0xF9 +/* vendor specific 0xFA..0xFF */ + +/* set to 1 set disable mult support */ +#define MAX_MULT_SECTORS 16 + +#define IDE_DMA_BUF_SECTORS 256 + +/* feature values for Data Set Management */ +#define DSM_TRIM 0x01 + +#if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS) +#error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS" +#endif + +/* ATAPI defines */ + +#define ATAPI_PACKET_SIZE 12 + +/* The generic packet command opcodes for CD/DVD Logical Units, + * From Table 57 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */ +#define GPCMD_BLANK 0xa1 +#define GPCMD_CLOSE_TRACK 0x5b +#define GPCMD_FLUSH_CACHE 0x35 +#define GPCMD_FORMAT_UNIT 0x04 +#define GPCMD_GET_CONFIGURATION 0x46 +#define GPCMD_GET_EVENT_STATUS_NOTIFICATION 0x4a +#define GPCMD_GET_PERFORMANCE 0xac +#define GPCMD_INQUIRY 0x12 +#define GPCMD_LOAD_UNLOAD 0xa6 +#define GPCMD_MECHANISM_STATUS 0xbd +#define GPCMD_MODE_SELECT_10 0x55 +#define GPCMD_MODE_SENSE_10 0x5a +#define GPCMD_PAUSE_RESUME 0x4b +#define GPCMD_PLAY_AUDIO_10 0x45 +#define GPCMD_PLAY_AUDIO_MSF 0x47 +#define GPCMD_PLAY_AUDIO_TI 0x48 +#define GPCMD_PLAY_CD 0xbc +#define GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL 0x1e +#define GPCMD_READ_10 0x28 +#define GPCMD_READ_12 0xa8 +#define GPCMD_READ_CDVD_CAPACITY 0x25 +#define GPCMD_READ_CD 0xbe +#define GPCMD_READ_CD_MSF 0xb9 +#define GPCMD_READ_DISC_INFO 0x51 +#define GPCMD_READ_DVD_STRUCTURE 0xad +#define GPCMD_READ_FORMAT_CAPACITIES 0x23 +#define GPCMD_READ_HEADER 0x44 +#define GPCMD_READ_TRACK_RZONE_INFO 0x52 +#define GPCMD_READ_SUBCHANNEL 0x42 +#define GPCMD_READ_TOC_PMA_ATIP 0x43 +#define GPCMD_REPAIR_RZONE_TRACK 0x58 +#define GPCMD_REPORT_KEY 0xa4 +#define GPCMD_REQUEST_SENSE 0x03 +#define GPCMD_RESERVE_RZONE_TRACK 0x53 +#define GPCMD_SCAN 0xba +#define GPCMD_SEEK 0x2b +#define GPCMD_SEND_DVD_STRUCTURE 0xad +#define GPCMD_SEND_EVENT 0xa2 +#define GPCMD_SEND_KEY 0xa3 +#define GPCMD_SEND_OPC 0x54 +#define GPCMD_SET_READ_AHEAD 0xa7 +#define GPCMD_SET_STREAMING 0xb6 +#define GPCMD_START_STOP_UNIT 0x1b +#define GPCMD_STOP_PLAY_SCAN 0x4e +#define GPCMD_TEST_UNIT_READY 0x00 +#define GPCMD_VERIFY_10 0x2f +#define GPCMD_WRITE_10 0x2a +#define GPCMD_WRITE_AND_VERIFY_10 0x2e +/* This is listed as optional in ATAPI 2.6, but is (curiously) + * missing from Mt. Fuji, Table 57. It _is_ mentioned in Mt. Fuji + * Table 377 as an MMC command for SCSi devices though... Most ATAPI + * drives support it. */ +#define GPCMD_SET_SPEED 0xbb +/* This seems to be a SCSI specific CD-ROM opcode + * to play data at track/index */ +#define GPCMD_PLAYAUDIO_TI 0x48 +/* + * From MS Media Status Notification Support Specification. For + * older drives only. + */ +#define GPCMD_GET_MEDIA_STATUS 0xda +#define GPCMD_MODE_SENSE_6 0x1a + +#define ATAPI_INT_REASON_CD 0x01 /* 0 = data transfer */ +#define ATAPI_INT_REASON_IO 0x02 /* 1 = transfer to the host */ +#define ATAPI_INT_REASON_REL 0x04 +#define ATAPI_INT_REASON_TAG 0xf8 + +/* same constants as bochs */ +#define ASC_NO_SEEK_COMPLETE 0x02 +#define ASC_ILLEGAL_OPCODE 0x20 +#define ASC_LOGICAL_BLOCK_OOR 0x21 +#define ASC_INV_FIELD_IN_CMD_PACKET 0x24 +#define ASC_MEDIUM_MAY_HAVE_CHANGED 0x28 +#define ASC_INCOMPATIBLE_FORMAT 0x30 +#define ASC_MEDIUM_NOT_PRESENT 0x3a +#define ASC_SAVING_PARAMETERS_NOT_SUPPORTED 0x39 +#define ASC_DATA_PHASE_ERROR 0x4b +#define ASC_MEDIA_REMOVAL_PREVENTED 0x53 + +#define CFA_NO_ERROR 0x00 +#define CFA_MISC_ERROR 0x09 +#define CFA_INVALID_COMMAND 0x20 +#define CFA_INVALID_ADDRESS 0x21 +#define CFA_ADDRESS_OVERFLOW 0x2f + +#define SMART_READ_DATA 0xd0 +#define SMART_READ_THRESH 0xd1 +#define SMART_ATTR_AUTOSAVE 0xd2 +#define SMART_SAVE_ATTR 0xd3 +#define SMART_EXECUTE_OFFLINE 0xd4 +#define SMART_READ_LOG 0xd5 +#define SMART_WRITE_LOG 0xd6 +#define SMART_ENABLE 0xd8 +#define SMART_DISABLE 0xd9 +#define SMART_STATUS 0xda + +typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind; + +typedef void EndTransferFunc(IDEState *); + +typedef void DMAStartFunc(const IDEDMA *, IDEState *, BlockCompletionFunc *); +typedef void DMAVoidFunc(const IDEDMA *); +typedef int DMAIntFunc(const IDEDMA *, bool); +typedef int32_t DMAInt32Func(const IDEDMA *, int32_t len); +typedef void DMAu32Func(const IDEDMA *, uint32_t); +typedef void DMAStopFunc(const IDEDMA *, bool); + +struct unreported_events { + bool eject_request; + bool new_media; +}; + +enum ide_dma_cmd { + IDE_DMA_READ = 0, + IDE_DMA_WRITE, + IDE_DMA_TRIM, + IDE_DMA_ATAPI, + IDE_DMA__COUNT +}; + +extern const char *IDE_DMA_CMD_lookup[IDE_DMA__COUNT]; + +#define ide_cmd_is_read(s) \ + ((s)->dma_cmd == IDE_DMA_READ) + +typedef struct IDEBufferedRequest { + QLIST_ENTRY(IDEBufferedRequest) list; + QEMUIOVector qiov; + QEMUIOVector *original_qiov; + BlockCompletionFunc *original_cb; + void *original_opaque; + bool orphaned; +} IDEBufferedRequest; + +/* NOTE: IDEState represents in fact one drive */ +struct IDEState { + IDEBus *bus; + uint8_t unit; + /* ide config */ + IDEDriveKind drive_kind; + int cylinders, heads, sectors, chs_trans; + int64_t nb_sectors; + int mult_sectors; + int identify_set; + uint8_t identify_data[512]; + int drive_serial; + char drive_serial_str[21]; + char drive_model_str[41]; + uint64_t wwn; + /* ide regs */ + uint8_t feature; + uint8_t error; + uint32_t nsector; + uint8_t sector; + uint8_t lcyl; + uint8_t hcyl; + /* other part of tf for lba48 support */ + uint8_t hob_feature; + uint8_t hob_nsector; + uint8_t hob_sector; + uint8_t hob_lcyl; + uint8_t hob_hcyl; + + uint8_t select; + uint8_t status; + + /* set for lba48 access */ + uint8_t lba48; + BlockBackend *blk; + char version[9]; + /* ATAPI specific */ + struct unreported_events events; + uint8_t sense_key; + uint8_t asc; + bool tray_open; + bool tray_locked; + uint8_t cdrom_changed; + int packet_transfer_size; + int elementary_transfer_size; + int32_t io_buffer_index; + int lba; + int cd_sector_size; + int atapi_dma; /* true if dma is requested for the packet cmd */ + BlockAcctCookie acct; + BlockAIOCB *pio_aiocb; + QEMUIOVector qiov; + QLIST_HEAD(, IDEBufferedRequest) buffered_requests; + /* ATA DMA state */ + uint64_t io_buffer_offset; + int32_t io_buffer_size; + QEMUSGList sg; + /* PIO transfer handling */ + int req_nb_sectors; /* number of sectors per interrupt */ + EndTransferFunc *end_transfer_func; + uint8_t *data_ptr; + uint8_t *data_end; + uint8_t *io_buffer; + /* PIO save/restore */ + int32_t io_buffer_total_len; + int32_t cur_io_buffer_offset; + int32_t cur_io_buffer_len; + uint8_t end_transfer_fn_idx; + QEMUTimer *sector_write_timer; /* only used for win2k install hack */ + uint32_t irq_count; /* counts IRQs when using win2k install hack */ + /* CF-ATA extended error */ + uint8_t ext_error; + /* CF-ATA metadata storage */ + uint32_t mdata_size; + uint8_t *mdata_storage; + int media_changed; + enum ide_dma_cmd dma_cmd; + /* SMART */ + uint8_t smart_enabled; + uint8_t smart_autosave; + int smart_errors; + uint8_t smart_selftest_count; + uint8_t *smart_selftest_data; + /* AHCI */ + int ncq_queues; +}; + +struct IDEDMAOps { + DMAStartFunc *start_dma; + DMAVoidFunc *pio_transfer; + DMAInt32Func *prepare_buf; + DMAu32Func *commit_buf; + DMAIntFunc *rw_buf; + DMAVoidFunc *restart; + DMAVoidFunc *restart_dma; + DMAStopFunc *set_inactive; + DMAVoidFunc *cmd_done; + DMAVoidFunc *reset; +}; + +struct IDEDMA { + const struct IDEDMAOps *ops; + QEMUIOVector qiov; + BlockAIOCB *aiocb; +}; + +struct IDEBus { + BusState qbus; + IDEDevice *master; + IDEDevice *slave; + IDEState ifs[2]; + QEMUBH *bh; + + int bus_id; + int max_units; + IDEDMA *dma; + uint8_t unit; + uint8_t cmd; + qemu_irq irq; + + int error_status; + uint8_t retry_unit; + int64_t retry_sector_num; + uint32_t retry_nsector; + PortioList portio_list; + PortioList portio2_list; + VMChangeStateEntry *vmstate; +}; + +#define TYPE_IDE_DEVICE "ide-device" +OBJECT_DECLARE_TYPE(IDEDevice, IDEDeviceClass, IDE_DEVICE) + +struct IDEDeviceClass { + DeviceClass parent_class; + void (*realize)(IDEDevice *dev, Error **errp); +}; + +struct IDEDevice { + DeviceState qdev; + uint32_t unit; + BlockConf conf; + int chs_trans; + char *version; + char *serial; + char *model; + uint64_t wwn; + /* + * 0x0000 - rotation rate not reported + * 0x0001 - non-rotating medium (SSD) + * 0x0002-0x0400 - reserved + * 0x0401-0xffe - rotations per minute + * 0xffff - reserved + */ + uint16_t rotation_rate; +}; + +/* These are used for the error_status field of IDEBus */ +#define IDE_RETRY_MASK 0xf8 +#define IDE_RETRY_DMA 0x08 +#define IDE_RETRY_PIO 0x10 +#define IDE_RETRY_ATAPI 0x20 /* reused IDE_RETRY_READ bit */ +#define IDE_RETRY_READ 0x20 +#define IDE_RETRY_FLUSH 0x40 +#define IDE_RETRY_TRIM 0x80 +#define IDE_RETRY_HBA 0x100 + +#define IS_IDE_RETRY_DMA(_status) \ + ((_status) & IDE_RETRY_DMA) + +#define IS_IDE_RETRY_PIO(_status) \ + ((_status) & IDE_RETRY_PIO) + +/* + * The method of the IDE_RETRY_ATAPI determination is to use a previously + * impossible bit combination as a new status value. + */ +#define IS_IDE_RETRY_ATAPI(_status) \ + (((_status) & IDE_RETRY_MASK) == IDE_RETRY_ATAPI) + +static inline uint8_t ide_dma_cmd_to_retry(uint8_t dma_cmd) +{ + switch (dma_cmd) { + case IDE_DMA_READ: + return IDE_RETRY_DMA | IDE_RETRY_READ; + case IDE_DMA_WRITE: + return IDE_RETRY_DMA; + case IDE_DMA_TRIM: + return IDE_RETRY_DMA | IDE_RETRY_TRIM; + case IDE_DMA_ATAPI: + return IDE_RETRY_ATAPI; + default: + break; + } + return 0; +} + +static inline IDEState *idebus_active_if(IDEBus *bus) +{ + return bus->ifs + bus->unit; +} + +static inline void ide_set_irq(IDEBus *bus) +{ + if (!(bus->cmd & IDE_CTRL_DISABLE_IRQ)) { + qemu_irq_raise(bus->irq); + } +} + +/* hw/ide/core.c */ +extern const VMStateDescription vmstate_ide_bus; + +#define VMSTATE_IDE_BUS(_field, _state) \ + VMSTATE_STRUCT(_field, _state, 1, vmstate_ide_bus, IDEBus) + +#define VMSTATE_IDE_BUS_ARRAY(_field, _state, _num) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _num, 1, vmstate_ide_bus, IDEBus) + +extern const VMStateDescription vmstate_ide_drive; + +#define VMSTATE_IDE_DRIVES(_field, _state) \ + VMSTATE_STRUCT_ARRAY(_field, _state, 2, 3, vmstate_ide_drive, IDEState) + +#define VMSTATE_IDE_DRIVE(_field, _state) \ + VMSTATE_STRUCT(_field, _state, 1, vmstate_ide_drive, IDEState) + +void ide_bus_reset(IDEBus *bus); +int64_t ide_get_sector(IDEState *s); +void ide_set_sector(IDEState *s, int64_t sector_num); + +void ide_start_dma(IDEState *s, BlockCompletionFunc *cb); +void dma_buf_commit(IDEState *s, uint32_t tx_bytes); +void ide_dma_error(IDEState *s); +void ide_abort_command(IDEState *s); + +void ide_atapi_cmd_ok(IDEState *s); +void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc); +void ide_atapi_dma_restart(IDEState *s); +void ide_atapi_io_error(IDEState *s, int ret); + +void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val); +uint32_t ide_ioport_read(void *opaque, uint32_t addr1); +uint32_t ide_status_read(void *opaque, uint32_t addr); +void ide_ctrl_write(void *opaque, uint32_t addr, uint32_t val); +void ide_data_writew(void *opaque, uint32_t addr, uint32_t val); +uint32_t ide_data_readw(void *opaque, uint32_t addr); +void ide_data_writel(void *opaque, uint32_t addr, uint32_t val); +uint32_t ide_data_readl(void *opaque, uint32_t addr); + +int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, + const char *version, const char *serial, const char *model, + uint64_t wwn, + uint32_t cylinders, uint32_t heads, uint32_t secs, + int chs_trans, Error **errp); +void ide_init2(IDEBus *bus, qemu_irq irq); +void ide_exit(IDEState *s); +void ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); +void ide_register_restart_cb(IDEBus *bus); + +void ide_exec_cmd(IDEBus *bus, uint32_t val); + +void ide_transfer_start(IDEState *s, uint8_t *buf, int size, + EndTransferFunc *end_transfer_func); +bool ide_transfer_start_norecurse(IDEState *s, uint8_t *buf, int size, + EndTransferFunc *end_transfer_func); +void ide_transfer_stop(IDEState *s); +void ide_set_inactive(IDEState *s, bool more); +BlockAIOCB *ide_issue_trim( + int64_t offset, QEMUIOVector *qiov, + BlockCompletionFunc *cb, void *cb_opaque, void *opaque); +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); +void ide_cancel_dma_sync(IDEState *s); + +/* hw/ide/atapi.c */ +void ide_atapi_cmd(IDEState *s); +void ide_atapi_cmd_reply_end(IDEState *s); + +/* hw/ide/qdev.c */ +void ide_bus_new(IDEBus *idebus, size_t idebus_size, DeviceState *dev, + int bus_id, int max_units); +IDEDevice *ide_create_drive(IDEBus *bus, int unit, DriveInfo *drive); + +int ide_handle_rw_error(IDEState *s, int error, int op); + +#endif /* HW_IDE_INTERNAL_H */ diff --git a/include/hw/ide/pci.h b/include/hw/ide/pci.h new file mode 100644 index 000000000..d8384e1c4 --- /dev/null +++ b/include/hw/ide/pci.h @@ -0,0 +1,72 @@ +#ifndef HW_IDE_PCI_H +#define HW_IDE_PCI_H + +#include "hw/ide/internal.h" +#include "hw/pci/pci.h" +#include "qom/object.h" + +#define BM_STATUS_DMAING 0x01 +#define BM_STATUS_ERROR 0x02 +#define BM_STATUS_INT 0x04 + +#define BM_CMD_START 0x01 +#define BM_CMD_READ 0x08 + +typedef struct BMDMAState { + IDEDMA dma; + uint8_t cmd; + uint8_t status; + uint32_t addr; + + IDEBus *bus; + /* current transfer state */ + uint32_t cur_addr; + uint32_t cur_prd_last; + uint32_t cur_prd_addr; + uint32_t cur_prd_len; + BlockCompletionFunc *dma_cb; + MemoryRegion addr_ioport; + MemoryRegion extra_io; + qemu_irq irq; + + /* Bit 0-2 and 7: BM status register + * Bit 3-6: bus->error_status */ + uint8_t migration_compat_status; + uint8_t migration_retry_unit; + int64_t migration_retry_sector_num; + uint32_t migration_retry_nsector; + + struct PCIIDEState *pci_dev; +} BMDMAState; + +#define TYPE_PCI_IDE "pci-ide" +OBJECT_DECLARE_SIMPLE_TYPE(PCIIDEState, PCI_IDE) + +struct PCIIDEState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + IDEBus bus[2]; + BMDMAState bmdma[2]; + uint32_t secondary; /* used only for cmd646 */ + MemoryRegion bmdma_bar; + MemoryRegion cmd_bar[2]; + MemoryRegion data_bar[2]; +}; + +static inline IDEState *bmdma_active_if(BMDMAState *bmdma) +{ + assert(bmdma->bus->retry_unit != (uint8_t)-1); + return bmdma->bus->ifs + bmdma->bus->retry_unit; +} + +void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d); +void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val); +extern MemoryRegionOps bmdma_addr_ioport_ops; +void pci_ide_create_devs(PCIDevice *dev); + +extern const VMStateDescription vmstate_ide_pci; +extern const MemoryRegionOps pci_ide_cmd_le_ops; +extern const MemoryRegionOps pci_ide_data_le_ops; +#endif diff --git a/include/hw/input/adb-keys.h b/include/hw/input/adb-keys.h new file mode 100644 index 000000000..525fba8a6 --- /dev/null +++ b/include/hw/input/adb-keys.h @@ -0,0 +1,141 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2016 John Arbuckle + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* + * adb-keys.h + * + * Provides an enum of all the Macintosh keycodes. + * Additional information: http://www.archive.org/stream/apple-guide-macintosh-family-hardware/Apple_Guide_to_the_Macintosh_Family_Hardware_2e#page/n345/mode/2up + * page 308 + */ + +#ifndef ADB_KEYS_H +#define ADB_KEYS_H + +enum { + ADB_KEY_A = 0x00, + ADB_KEY_B = 0x0b, + ADB_KEY_C = 0x08, + ADB_KEY_D = 0x02, + ADB_KEY_E = 0x0e, + ADB_KEY_F = 0x03, + ADB_KEY_G = 0x05, + ADB_KEY_H = 0x04, + ADB_KEY_I = 0x22, + ADB_KEY_J = 0x26, + ADB_KEY_K = 0x28, + ADB_KEY_L = 0x25, + ADB_KEY_M = 0x2e, + ADB_KEY_N = 0x2d, + ADB_KEY_O = 0x1f, + ADB_KEY_P = 0x23, + ADB_KEY_Q = 0x0c, + ADB_KEY_R = 0x0f, + ADB_KEY_S = 0x01, + ADB_KEY_T = 0x11, + ADB_KEY_U = 0x20, + ADB_KEY_V = 0x09, + ADB_KEY_W = 0x0d, + ADB_KEY_X = 0x07, + ADB_KEY_Y = 0x10, + ADB_KEY_Z = 0x06, + + ADB_KEY_0 = 0x1d, + ADB_KEY_1 = 0x12, + ADB_KEY_2 = 0x13, + ADB_KEY_3 = 0x14, + ADB_KEY_4 = 0x15, + ADB_KEY_5 = 0x17, + ADB_KEY_6 = 0x16, + ADB_KEY_7 = 0x1a, + ADB_KEY_8 = 0x1c, + ADB_KEY_9 = 0x19, + + ADB_KEY_GRAVE_ACCENT = 0x32, + ADB_KEY_MINUS = 0x1b, + ADB_KEY_EQUAL = 0x18, + ADB_KEY_DELETE = 0x33, + ADB_KEY_CAPS_LOCK = 0x39, + ADB_KEY_TAB = 0x30, + ADB_KEY_RETURN = 0x24, + ADB_KEY_LEFT_BRACKET = 0x21, + ADB_KEY_RIGHT_BRACKET = 0x1e, + ADB_KEY_BACKSLASH = 0x2a, + ADB_KEY_SEMICOLON = 0x29, + ADB_KEY_APOSTROPHE = 0x27, + ADB_KEY_COMMA = 0x2b, + ADB_KEY_PERIOD = 0x2f, + ADB_KEY_FORWARD_SLASH = 0x2c, + ADB_KEY_LEFT_SHIFT = 0x38, + ADB_KEY_RIGHT_SHIFT = 0x7b, + ADB_KEY_SPACEBAR = 0x31, + ADB_KEY_LEFT_CONTROL = 0x36, + ADB_KEY_RIGHT_CONTROL = 0x7d, + ADB_KEY_LEFT_OPTION = 0x3a, + ADB_KEY_RIGHT_OPTION = 0x7c, + ADB_KEY_COMMAND = 0x37, + + ADB_KEY_KP_0 = 0x52, + ADB_KEY_KP_1 = 0x53, + ADB_KEY_KP_2 = 0x54, + ADB_KEY_KP_3 = 0x55, + ADB_KEY_KP_4 = 0x56, + ADB_KEY_KP_5 = 0x57, + ADB_KEY_KP_6 = 0x58, + ADB_KEY_KP_7 = 0x59, + ADB_KEY_KP_8 = 0x5b, + ADB_KEY_KP_9 = 0x5c, + ADB_KEY_KP_PERIOD = 0x41, + ADB_KEY_KP_ENTER = 0x4c, + ADB_KEY_KP_PLUS = 0x45, + ADB_KEY_KP_SUBTRACT = 0x4e, + ADB_KEY_KP_MULTIPLY = 0x43, + ADB_KEY_KP_DIVIDE = 0x4b, + ADB_KEY_KP_EQUAL = 0x51, + ADB_KEY_KP_CLEAR = 0x47, + + ADB_KEY_UP = 0x3e, + ADB_KEY_DOWN = 0x3d, + ADB_KEY_LEFT = 0x3b, + ADB_KEY_RIGHT = 0x3c, + + ADB_KEY_HELP = 0x72, + ADB_KEY_HOME = 0x73, + ADB_KEY_PAGE_UP = 0x74, + ADB_KEY_PAGE_DOWN = 0x79, + ADB_KEY_END = 0x77, + ADB_KEY_FORWARD_DELETE = 0x75, + + ADB_KEY_ESC = 0x35, + ADB_KEY_F1 = 0x7a, + ADB_KEY_F2 = 0x78, + ADB_KEY_F3 = 0x63, + ADB_KEY_F4 = 0x76, + ADB_KEY_F5 = 0x60, + ADB_KEY_F6 = 0x61, + ADB_KEY_F7 = 0x62, + ADB_KEY_F8 = 0x64, + ADB_KEY_F9 = 0x65, + ADB_KEY_F10 = 0x6d, + ADB_KEY_F11 = 0x67, + ADB_KEY_F12 = 0x6f, + ADB_KEY_F13 = 0x69, + ADB_KEY_F14 = 0x6b, + ADB_KEY_F15 = 0x71, + + ADB_KEY_VOLUME_UP = 0x48, + ADB_KEY_VOLUME_DOWN = 0x49, + ADB_KEY_VOLUME_MUTE = 0x4a, + ADB_KEY_POWER = 0x7f7f +}; + +/* Could not find the value for this key. */ +/* #define ADB_KEY_EJECT */ + +#endif /* ADB_KEYS_H */ diff --git a/include/hw/input/adb.h b/include/hw/input/adb.h new file mode 100644 index 000000000..20fced15f --- /dev/null +++ b/include/hw/input/adb.h @@ -0,0 +1,108 @@ +/* + * QEMU ADB emulation shared definitions and prototypes + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef ADB_H +#define ADB_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define MAX_ADB_DEVICES 16 + +#define ADB_MAX_OUT_LEN 16 + +typedef struct ADBDevice ADBDevice; + +/* buf = NULL means polling */ +typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out, + const uint8_t *buf, int len); + +typedef bool ADBDeviceHasData(ADBDevice *d); + +#define TYPE_ADB_DEVICE "adb-device" +OBJECT_DECLARE_TYPE(ADBDevice, ADBDeviceClass, ADB_DEVICE) + +struct ADBDevice { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + int devaddr; + int handler; +}; + + +struct ADBDeviceClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + + ADBDeviceRequest *devreq; + ADBDeviceHasData *devhasdata; +}; + +#define TYPE_ADB_BUS "apple-desktop-bus" +OBJECT_DECLARE_SIMPLE_TYPE(ADBBusState, ADB_BUS) + +#define ADB_STATUS_BUSTIMEOUT 0x1 +#define ADB_STATUS_POLLREPLY 0x2 + +struct ADBBusState { + /*< private >*/ + BusState parent_obj; + /*< public >*/ + + ADBDevice *devices[MAX_ADB_DEVICES]; + uint16_t pending; + int nb_devices; + int poll_index; + uint8_t status; + + QEMUTimer *autopoll_timer; + bool autopoll_enabled; + bool autopoll_blocked; + uint8_t autopoll_rate_ms; + uint16_t autopoll_mask; + void (*autopoll_cb)(void *opaque); + void *autopoll_cb_opaque; +}; + +int adb_request(ADBBusState *s, uint8_t *buf_out, + const uint8_t *buf, int len); +int adb_poll(ADBBusState *s, uint8_t *buf_out, uint16_t poll_mask); + +void adb_autopoll_block(ADBBusState *s); +void adb_autopoll_unblock(ADBBusState *s); + +void adb_set_autopoll_enabled(ADBBusState *s, bool enabled); +void adb_set_autopoll_rate_ms(ADBBusState *s, int rate_ms); +void adb_set_autopoll_mask(ADBBusState *s, uint16_t mask); +void adb_register_autopoll_callback(ADBBusState *s, void (*cb)(void *opaque), + void *opaque); + +#define TYPE_ADB_KEYBOARD "adb-keyboard" +#define TYPE_ADB_MOUSE "adb-mouse" + +#endif /* ADB_H */ diff --git a/include/hw/input/gamepad.h b/include/hw/input/gamepad.h new file mode 100644 index 000000000..6f6aa2406 --- /dev/null +++ b/include/hw/input/gamepad.h @@ -0,0 +1,18 @@ +/* + * Gamepad style buttons connected to IRQ/GPIO lines + * + * Copyright (c) 2007 CodeSourcery. + * Written by Paul Brook + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_INPUT_GAMEPAD_H +#define HW_INPUT_GAMEPAD_H + + +/* stellaris_input.c */ +void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode); + +#endif diff --git a/include/hw/input/hid.h b/include/hw/input/hid.h new file mode 100644 index 000000000..6a9d7bf46 --- /dev/null +++ b/include/hw/input/hid.h @@ -0,0 +1,83 @@ +#ifndef QEMU_HID_H +#define QEMU_HID_H + +#include "ui/input.h" + +#define HID_MOUSE 1 +#define HID_TABLET 2 +#define HID_KEYBOARD 3 + +typedef struct HIDPointerEvent { + int32_t xdx, ydy; /* relative iff it's a mouse, otherwise absolute */ + int32_t dz, buttons_state; +} HIDPointerEvent; + +#define QUEUE_LENGTH 16 /* should be enough for a triple-click */ +#define QUEUE_MASK (QUEUE_LENGTH-1u) +#define QUEUE_INCR(v) ((v)++, (v) &= QUEUE_MASK) + +typedef struct HIDState HIDState; +typedef void (*HIDEventFunc)(HIDState *s); + +typedef struct HIDMouseState { + HIDPointerEvent queue[QUEUE_LENGTH]; + int mouse_grabbed; +} HIDMouseState; + +typedef struct HIDKeyboardState { + uint32_t keycodes[QUEUE_LENGTH]; + uint16_t modifiers; + uint8_t leds; + uint8_t key[16]; + int32_t keys; +} HIDKeyboardState; + +struct HIDState { + union { + HIDMouseState ptr; + HIDKeyboardState kbd; + }; + uint32_t head; /* index into circular queue */ + uint32_t n; + int kind; + int32_t protocol; + uint8_t idle; + bool idle_pending; + QEMUTimer *idle_timer; + HIDEventFunc event; + QemuInputHandlerState *s; +}; + +void hid_init(HIDState *hs, int kind, HIDEventFunc event); +void hid_reset(HIDState *hs); +void hid_free(HIDState *hs); + +bool hid_has_events(HIDState *hs); +void hid_set_next_idle(HIDState *hs); +void hid_pointer_activate(HIDState *hs); +int hid_pointer_poll(HIDState *hs, uint8_t *buf, int len); +int hid_keyboard_poll(HIDState *hs, uint8_t *buf, int len); +int hid_keyboard_write(HIDState *hs, uint8_t *buf, int len); + +extern const VMStateDescription vmstate_hid_keyboard_device; + +#define VMSTATE_HID_KEYBOARD_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(HIDState), \ + .vmsd = &vmstate_hid_keyboard_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, HIDState), \ +} + +extern const VMStateDescription vmstate_hid_ptr_device; + +#define VMSTATE_HID_POINTER_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(HIDState), \ + .vmsd = &vmstate_hid_ptr_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, HIDState), \ +} + + +#endif /* QEMU_HID_H */ diff --git a/include/hw/input/i8042.h b/include/hw/input/i8042.h new file mode 100644 index 000000000..1d90432da --- /dev/null +++ b/include/hw/input/i8042.h @@ -0,0 +1,26 @@ +/* + * QEMU PS/2 Controller + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ +#ifndef HW_INPUT_I8042_H +#define HW_INPUT_I8042_H + +#include "hw/isa/isa.h" +#include "qom/object.h" + +#define TYPE_I8042 "i8042" +OBJECT_DECLARE_SIMPLE_TYPE(ISAKBDState, I8042) + +#define I8042_A20_LINE "a20" + + +void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq, + MemoryRegion *region, ram_addr_t size, + hwaddr mask); +void i8042_isa_mouse_fake_event(ISAKBDState *isa); +void i8042_setup_a20_line(ISADevice *dev, qemu_irq a20_out); + +#endif /* HW_INPUT_I8042_H */ diff --git a/include/hw/input/lasips2.h b/include/hw/input/lasips2.h new file mode 100644 index 000000000..0cd7b5906 --- /dev/null +++ b/include/hw/input/lasips2.h @@ -0,0 +1,16 @@ +/* + * QEMU LASI PS/2 emulation + * + * Copyright (c) 2019 Sven Schnelle + * + */ +#ifndef HW_INPUT_LASIPS2_H +#define HW_INPUT_LASIPS2_H + +#include "exec/hwaddr.h" + +#define TYPE_LASIPS2 "lasips2" + +void lasips2_init(MemoryRegion *address_space, hwaddr base, qemu_irq irq); + +#endif /* HW_INPUT_LASIPS2_H */ diff --git a/include/hw/input/ps2.h b/include/hw/input/ps2.h new file mode 100644 index 000000000..35d983897 --- /dev/null +++ b/include/hw/input/ps2.h @@ -0,0 +1,52 @@ +/* + * QEMU PS/2 keyboard/mouse emulation + * + * Copyright (C) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_PS2_H +#define HW_PS2_H + +#define PS2_MOUSE_BUTTON_LEFT 0x01 +#define PS2_MOUSE_BUTTON_RIGHT 0x02 +#define PS2_MOUSE_BUTTON_MIDDLE 0x04 +#define PS2_MOUSE_BUTTON_SIDE 0x08 +#define PS2_MOUSE_BUTTON_EXTRA 0x10 + +typedef struct PS2State PS2State; + +/* ps2.c */ +void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg); +void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg); +void ps2_write_mouse(void *, int val); +void ps2_write_keyboard(void *, int val); +uint32_t ps2_read_data(PS2State *s); +void ps2_queue_noirq(PS2State *s, int b); +void ps2_raise_irq(PS2State *s); +void ps2_queue(PS2State *s, int b); +void ps2_queue_2(PS2State *s, int b1, int b2); +void ps2_queue_3(PS2State *s, int b1, int b2, int b3); +void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4); +void ps2_keyboard_set_translation(void *opaque, int mode); +void ps2_mouse_fake_event(void *opaque); +int ps2_queue_empty(PS2State *s); + +#endif /* HW_PS2_H */ diff --git a/include/hw/input/tsc2xxx.h b/include/hw/input/tsc2xxx.h new file mode 100644 index 000000000..5b76ebc17 --- /dev/null +++ b/include/hw/input/tsc2xxx.h @@ -0,0 +1,41 @@ +/* + * TI touchscreen controller + * + * Copyright (c) 2006 Andrzej Zaborowski + * Copyright (C) 2008 Nokia Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_INPUT_TSC2XXX_H +#define HW_INPUT_TSC2XXX_H + +typedef struct MouseTransformInfo { + /* Touchscreen resolution */ + int x; + int y; + /* Calibration values as used/generated by tslib */ + int a[7]; +} MouseTransformInfo; + +typedef struct uWireSlave { + uint16_t (*receive)(void *opaque); + void (*send)(void *opaque, uint16_t data); + void *opaque; +} uWireSlave; + +/* tsc210x.c */ +uWireSlave *tsc2102_init(qemu_irq pint); +uWireSlave *tsc2301_init(qemu_irq penirq, qemu_irq kbirq, qemu_irq dav); +I2SCodec *tsc210x_codec(uWireSlave *chip); +uint32_t tsc210x_txrx(void *opaque, uint32_t value, int len); +void tsc210x_set_transform(uWireSlave *chip, MouseTransformInfo *info); +void tsc210x_key_event(uWireSlave *chip, int key, int down); + +/* tsc2005.c */ +void *tsc2005_init(qemu_irq pintdav); +uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len); +void tsc2005_set_transform(void *opaque, MouseTransformInfo *info); + +#endif diff --git a/include/hw/intc/allwinner-a10-pic.h b/include/hw/intc/allwinner-a10-pic.h new file mode 100644 index 000000000..b8364d3ed --- /dev/null +++ b/include/hw/intc/allwinner-a10-pic.h @@ -0,0 +1,43 @@ +#ifndef ALLWINNER_A10_PIC_H +#define ALLWINNER_A10_PIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_AW_A10_PIC "allwinner-a10-pic" +OBJECT_DECLARE_SIMPLE_TYPE(AwA10PICState, AW_A10_PIC) + +#define AW_A10_PIC_VECTOR 0 +#define AW_A10_PIC_BASE_ADDR 4 +#define AW_A10_PIC_PROTECT 8 +#define AW_A10_PIC_NMI 0xc +#define AW_A10_PIC_IRQ_PENDING 0x10 +#define AW_A10_PIC_FIQ_PENDING 0x20 +#define AW_A10_PIC_SELECT 0x30 +#define AW_A10_PIC_ENABLE 0x40 +#define AW_A10_PIC_MASK 0x50 + +#define AW_A10_PIC_INT_NR 95 +#define AW_A10_PIC_REG_NUM DIV_ROUND_UP(AW_A10_PIC_INT_NR, 32) + +struct AwA10PICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + MemoryRegion iomem; + qemu_irq parent_fiq; + qemu_irq parent_irq; + + uint32_t vector; + uint32_t base_addr; + uint32_t protect; + uint32_t nmi; + uint32_t irq_pending[AW_A10_PIC_REG_NUM]; + uint32_t fiq_pending[AW_A10_PIC_REG_NUM]; + uint32_t select[AW_A10_PIC_REG_NUM]; + uint32_t enable[AW_A10_PIC_REG_NUM]; + uint32_t mask[AW_A10_PIC_REG_NUM]; + /*priority setting here*/ +}; + +#endif diff --git a/include/hw/intc/arm_gic.h b/include/hw/intc/arm_gic.h new file mode 100644 index 000000000..116ccbb5a --- /dev/null +++ b/include/hw/intc/arm_gic.h @@ -0,0 +1,89 @@ +/* + * ARM GIC support + * + * Copyright (c) 2012 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +/* + * QEMU interface: + * + QOM property "num-cpu": number of CPUs to support + * + QOM property "num-irq": number of IRQs (including both SPIs and PPIs) + * + QOM property "revision": GIC version (1 or 2), or 0 for the 11MPCore GIC + * + QOM property "has-security-extensions": set true if the GIC should + * implement the security extensions + * + QOM property "has-virtualization-extensions": set true if the GIC should + * implement the virtualization extensions + * + unnamed GPIO inputs: (where P is number of SPIs, i.e. num-irq - 32) + * [0..P-1] SPIs + * [P..P+31] PPIs for CPU 0 + * [P+32..P+63] PPIs for CPU 1 + * ... + * + sysbus IRQs: (in order; number will vary depending on number of cores) + * - IRQ for CPU 0 + * - IRQ for CPU 1 + * ... + * - FIQ for CPU 0 + * - FIQ for CPU 1 + * ... + * - VIRQ for CPU 0 (exists even if virt extensions not present) + * - VIRQ for CPU 1 (exists even if virt extensions not present) + * ... + * - VFIQ for CPU 0 (exists even if virt extensions not present) + * - VFIQ for CPU 1 (exists even if virt extensions not present) + * ... + * - maintenance IRQ for CPU i/f 0 (only if virt extensions present) + * - maintenance IRQ for CPU i/f 1 (only if virt extensions present) + * + sysbus MMIO regions: (in order; numbers will vary depending on + * whether virtualization extensions are present and on number of cores) + * - distributor registers (GICD*) + * - CPU interface for the accessing core (GICC*) + * - virtual interface control registers (GICH*) (only if virt extns present) + * - virtual CPU interface for the accessing core (GICV*) (only if virt) + * - CPU 0 CPU interface registers + * - CPU 1 CPU interface registers + * ... + * - CPU 0 virtual interface control registers (only if virt extns present) + * - CPU 1 virtual interface control registers (only if virt extns present) + * ... + */ + +#ifndef HW_ARM_GIC_H +#define HW_ARM_GIC_H + +#include "arm_gic_common.h" +#include "qom/object.h" + +/* Number of SGI target-list bits */ +#define GIC_TARGETLIST_BITS 8 +#define GIC_MAX_PRIORITY_BITS 8 +#define GIC_MIN_PRIORITY_BITS 4 + +#define TYPE_ARM_GIC "arm_gic" +typedef struct ARMGICClass ARMGICClass; +/* This is reusing the GICState typedef from TYPE_ARM_GIC_COMMON */ +DECLARE_OBJ_CHECKERS(GICState, ARMGICClass, + ARM_GIC, TYPE_ARM_GIC) + +struct ARMGICClass { + /*< private >*/ + ARMGICCommonClass parent_class; + /*< public >*/ + + DeviceRealize parent_realize; +}; + +#endif diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h new file mode 100644 index 000000000..708037500 --- /dev/null +++ b/include/hw/intc/arm_gic_common.h @@ -0,0 +1,168 @@ +/* + * ARM GIC support + * + * Copyright (c) 2012 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_ARM_GIC_COMMON_H +#define HW_ARM_GIC_COMMON_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +/* Maximum number of possible interrupts, determined by the GIC architecture */ +#define GIC_MAXIRQ 1020 +/* First 32 are private to each CPU (SGIs and PPIs). */ +#define GIC_INTERNAL 32 +#define GIC_NR_SGIS 16 +/* Maximum number of possible CPU interfaces, determined by GIC architecture */ +#define GIC_NCPU 8 +/* Maximum number of possible CPU interfaces with their respective vCPU */ +#define GIC_NCPU_VCPU (GIC_NCPU * 2) + +#define MAX_NR_GROUP_PRIO 128 +#define GIC_NR_APRS (MAX_NR_GROUP_PRIO / 32) + +#define GIC_MIN_BPR 0 +#define GIC_MIN_ABPR (GIC_MIN_BPR + 1) + +/* Architectural maximum number of list registers in the virtual interface */ +#define GIC_MAX_LR 64 + +/* Only 32 priority levels and 32 preemption levels in the vCPU interfaces */ +#define GIC_VIRT_MAX_GROUP_PRIO_BITS 5 +#define GIC_VIRT_MAX_NR_GROUP_PRIO (1 << GIC_VIRT_MAX_GROUP_PRIO_BITS) +#define GIC_VIRT_NR_APRS (GIC_VIRT_MAX_NR_GROUP_PRIO / 32) + +#define GIC_VIRT_MIN_BPR 2 +#define GIC_VIRT_MIN_ABPR (GIC_VIRT_MIN_BPR + 1) + +typedef struct gic_irq_state { + /* The enable bits are only banked for per-cpu interrupts. */ + uint8_t enabled; + uint8_t pending; + uint8_t active; + uint8_t level; + bool model; /* 0 = N:N, 1 = 1:N */ + bool edge_trigger; /* true: edge-triggered, false: level-triggered */ + uint8_t group; +} gic_irq_state; + +struct GICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + qemu_irq parent_irq[GIC_NCPU]; + qemu_irq parent_fiq[GIC_NCPU]; + qemu_irq parent_virq[GIC_NCPU]; + qemu_irq parent_vfiq[GIC_NCPU]; + qemu_irq maintenance_irq[GIC_NCPU]; + + /* GICD_CTLR; for a GIC with the security extensions the NS banked version + * of this register is just an alias of bit 1 of the S banked version. + */ + uint32_t ctlr; + /* GICC_CTLR; again, the NS banked version is just aliases of bits of + * the S banked register, so our state only needs to store the S version. + */ + uint32_t cpu_ctlr[GIC_NCPU_VCPU]; + + gic_irq_state irq_state[GIC_MAXIRQ]; + uint8_t irq_target[GIC_MAXIRQ]; + uint8_t priority1[GIC_INTERNAL][GIC_NCPU]; + uint8_t priority2[GIC_MAXIRQ - GIC_INTERNAL]; + /* For each SGI on the target CPU, we store 8 bits + * indicating which source CPUs have made this SGI + * pending on the target CPU. These correspond to + * the bytes in the GIC_SPENDSGIR* registers as + * read by the target CPU. + */ + uint8_t sgi_pending[GIC_NR_SGIS][GIC_NCPU]; + + uint16_t priority_mask[GIC_NCPU_VCPU]; + uint16_t running_priority[GIC_NCPU_VCPU]; + uint16_t current_pending[GIC_NCPU_VCPU]; + uint32_t n_prio_bits; + + /* If we present the GICv2 without security extensions to a guest, + * the guest can configure the GICC_CTLR to configure group 1 binary point + * in the abpr. + * For a GIC with Security Extensions we use use bpr for the + * secure copy and abpr as storage for the non-secure copy of the register. + */ + uint8_t bpr[GIC_NCPU_VCPU]; + uint8_t abpr[GIC_NCPU_VCPU]; + + /* The APR is implementation defined, so we choose a layout identical to + * the KVM ABI layout for QEMU's implementation of the gic: + * If an interrupt for preemption level X is active, then + * APRn[X mod 32] == 0b1, where n = X / 32 + * otherwise the bit is clear. + */ + uint32_t apr[GIC_NR_APRS][GIC_NCPU]; + uint32_t nsapr[GIC_NR_APRS][GIC_NCPU]; + + /* Virtual interface control registers */ + uint32_t h_hcr[GIC_NCPU]; + uint32_t h_misr[GIC_NCPU]; + uint32_t h_lr[GIC_MAX_LR][GIC_NCPU]; + uint32_t h_apr[GIC_NCPU]; + + /* Number of LRs implemented in this GIC instance */ + uint32_t num_lrs; + + uint32_t num_cpu; + + MemoryRegion iomem; /* Distributor */ + /* This is just so we can have an opaque pointer which identifies + * both this GIC and which CPU interface we should be accessing. + */ + struct GICState *backref[GIC_NCPU]; + MemoryRegion cpuiomem[GIC_NCPU + 1]; /* CPU interfaces */ + MemoryRegion vifaceiomem[GIC_NCPU + 1]; /* Virtual interfaces */ + MemoryRegion vcpuiomem; /* vCPU interface */ + + uint32_t num_irq; + uint32_t revision; + bool security_extn; + bool virt_extn; + bool irq_reset_nonsecure; /* configure IRQs as group 1 (NS) on reset? */ + int dev_fd; /* kvm device fd if backed by kvm vgic support */ + Error *migration_blocker; +}; +typedef struct GICState GICState; + +#define TYPE_ARM_GIC_COMMON "arm_gic_common" +typedef struct ARMGICCommonClass ARMGICCommonClass; +DECLARE_OBJ_CHECKERS(GICState, ARMGICCommonClass, + ARM_GIC_COMMON, TYPE_ARM_GIC_COMMON) + +struct ARMGICCommonClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + void (*pre_save)(GICState *s); + void (*post_load)(GICState *s); +}; + +void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler, + const MemoryRegionOps *ops, + const MemoryRegionOps *virt_ops); + +#endif diff --git a/include/hw/intc/arm_gicv3.h b/include/hw/intc/arm_gicv3.h new file mode 100644 index 000000000..a81a6ae7e --- /dev/null +++ b/include/hw/intc/arm_gicv3.h @@ -0,0 +1,32 @@ +/* + * ARM Generic Interrupt Controller v3 + * + * Copyright (c) 2015 Huawei. + * Copyright (c) 2016 Linaro Limited + * Written by Shlomo Pongratz, Peter Maydell + * + * This code is licensed under the GPL, version 2 or (at your option) + * any later version. + */ + +#ifndef HW_ARM_GICV3_H +#define HW_ARM_GICV3_H + +#include "arm_gicv3_common.h" +#include "qom/object.h" + +#define TYPE_ARM_GICV3 "arm-gicv3" +typedef struct ARMGICv3Class ARMGICv3Class; +/* This is reusing the GICState typedef from TYPE_ARM_GICV3_COMMON */ +DECLARE_OBJ_CHECKERS(GICv3State, ARMGICv3Class, + ARM_GICV3, TYPE_ARM_GICV3) + +struct ARMGICv3Class { + /*< private >*/ + ARMGICv3CommonClass parent_class; + /*< public >*/ + + DeviceRealize parent_realize; +}; + +#endif diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h new file mode 100644 index 000000000..91491a2f6 --- /dev/null +++ b/include/hw/intc/arm_gicv3_common.h @@ -0,0 +1,298 @@ +/* + * ARM GIC support + * + * Copyright (c) 2012 Linaro Limited + * Copyright (c) 2015 Huawei. + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Written by Peter Maydell + * Reworked for GICv3 by Shlomo Pongratz and Pavel Fedin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_ARM_GICV3_COMMON_H +#define HW_ARM_GICV3_COMMON_H + +#include "hw/sysbus.h" +#include "hw/intc/arm_gic_common.h" +#include "qom/object.h" + +/* + * Maximum number of possible interrupts, determined by the GIC architecture. + * Note that this does not include LPIs. When implemented, these should be + * dealt with separately. + */ +#define GICV3_MAXIRQ 1020 +#define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL) + +#define GICV3_REDIST_SIZE 0x20000 + +/* Number of SGI target-list bits */ +#define GICV3_TARGETLIST_BITS 16 + +/* Maximum number of list registers (architectural limit) */ +#define GICV3_LR_MAX 16 + +/* Minimum BPR for Secure, or when security not enabled */ +#define GIC_MIN_BPR 0 +/* Minimum BPR for Nonsecure when security is enabled */ +#define GIC_MIN_BPR_NS (GIC_MIN_BPR + 1) + +/* For some distributor fields we want to model the array of 32-bit + * register values which hold various bitmaps corresponding to enabled, + * pending, etc bits. These macros and functions facilitate that; the + * APIs are generally modelled on the generic bitmap.h functions + * (which are unsuitable here because they use 'unsigned long' as the + * underlying storage type, which is very awkward when you need to + * access the data as 32-bit values.) + * Each bitmap contains a bit for each interrupt. Although there is + * space for the PPIs and SGIs, those bits (the first 32) are never + * used as that state lives in the redistributor. The unused bits are + * provided purely so that interrupt X's state is always in bit X; this + * avoids bugs where we forget to subtract GIC_INTERNAL from an + * interrupt number. + */ +#define GICV3_BMP_SIZE DIV_ROUND_UP(GICV3_MAXIRQ, 32) + +#define GIC_DECLARE_BITMAP(name) \ + uint32_t name[GICV3_BMP_SIZE] + +#define GIC_BIT_MASK(nr) (1U << ((nr) % 32)) +#define GIC_BIT_WORD(nr) ((nr) / 32) + +static inline void gic_bmp_set_bit(int nr, uint32_t *addr) +{ + uint32_t mask = GIC_BIT_MASK(nr); + uint32_t *p = addr + GIC_BIT_WORD(nr); + + *p |= mask; +} + +static inline void gic_bmp_clear_bit(int nr, uint32_t *addr) +{ + uint32_t mask = GIC_BIT_MASK(nr); + uint32_t *p = addr + GIC_BIT_WORD(nr); + + *p &= ~mask; +} + +static inline int gic_bmp_test_bit(int nr, const uint32_t *addr) +{ + return 1U & (addr[GIC_BIT_WORD(nr)] >> (nr & 31)); +} + +static inline void gic_bmp_replace_bit(int nr, uint32_t *addr, int val) +{ + uint32_t mask = GIC_BIT_MASK(nr); + uint32_t *p = addr + GIC_BIT_WORD(nr); + + *p &= ~mask; + *p |= (val & 1U) << (nr % 32); +} + +/* Return a pointer to the 32-bit word containing the specified bit. */ +static inline uint32_t *gic_bmp_ptr32(uint32_t *addr, int nr) +{ + return addr + GIC_BIT_WORD(nr); +} + +typedef struct GICv3State GICv3State; +typedef struct GICv3CPUState GICv3CPUState; + +/* Some CPU interface registers come in three flavours: + * Group0, Group1 (Secure) and Group1 (NonSecure) + * (where the latter two are exposed as a single banked system register). + * In the state struct they are implemented as a 3-element array which + * can be indexed into by the GICV3_G0, GICV3_G1 and GICV3_G1NS constants. + * If the CPU doesn't support EL3 then the G1 element is unused. + * + * These constants are also used to communicate the group to use for + * an interrupt or SGI when it is passed between the cpu interface and + * the redistributor or distributor. For those purposes the receiving end + * must be prepared to cope with a Group 1 Secure interrupt even if it does + * not have security support enabled, because security can be disabled + * independently in the CPU and in the GIC. In that case the receiver should + * treat an incoming Group 1 Secure interrupt as if it were Group 0. + * (This architectural requirement is why the _G1 element is the unused one + * in a no-EL3 CPU: we would otherwise have to translate back and forth + * between (G0, G1NS) from the distributor and (G0, G1) in the CPU i/f.) + */ +#define GICV3_G0 0 +#define GICV3_G1 1 +#define GICV3_G1NS 2 + +/* ICC_CTLR_EL1, GICD_STATUSR and GICR_STATUSR are banked but not + * group-related, so those indices are just 0 for S and 1 for NS. + * (If the CPU or the GIC, respectively, don't support the Security + * extensions then the S element is unused.) + */ +#define GICV3_S 0 +#define GICV3_NS 1 + +typedef struct { + int irq; + uint8_t prio; + int grp; +} PendingIrq; + +struct GICv3CPUState { + GICv3State *gic; + CPUState *cpu; + qemu_irq parent_irq; + qemu_irq parent_fiq; + qemu_irq parent_virq; + qemu_irq parent_vfiq; + + /* Redistributor */ + uint32_t level; /* Current IRQ level */ + /* RD_base page registers */ + uint32_t gicr_ctlr; + uint64_t gicr_typer; + uint32_t gicr_statusr[2]; + uint32_t gicr_waker; + uint64_t gicr_propbaser; + uint64_t gicr_pendbaser; + /* SGI_base page registers */ + uint32_t gicr_igroupr0; + uint32_t gicr_ienabler0; + uint32_t gicr_ipendr0; + uint32_t gicr_iactiver0; + uint32_t edge_trigger; /* ICFGR0 and ICFGR1 even bits */ + uint32_t gicr_igrpmodr0; + uint32_t gicr_nsacr; + uint8_t gicr_ipriorityr[GIC_INTERNAL]; + + /* CPU interface */ + uint64_t icc_sre_el1; + uint64_t icc_ctlr_el1[2]; + uint64_t icc_pmr_el1; + uint64_t icc_bpr[3]; + uint64_t icc_apr[3][4]; + uint64_t icc_igrpen[3]; + uint64_t icc_ctlr_el3; + + /* Virtualization control interface */ + uint64_t ich_apr[3][4]; /* ich_apr[GICV3_G1][x] never used */ + uint64_t ich_hcr_el2; + uint64_t ich_lr_el2[GICV3_LR_MAX]; + uint64_t ich_vmcr_el2; + + /* Properties of the CPU interface. These are initialized from + * the settings in the CPU proper. + * If the number of implemented list registers is 0 then the + * virtualization support is not implemented. + */ + int num_list_regs; + int vpribits; /* number of virtual priority bits */ + int vprebits; /* number of virtual preemption bits */ + + /* Current highest priority pending interrupt for this CPU. + * This is cached information that can be recalculated from the + * real state above; it doesn't need to be migrated. + */ + PendingIrq hppi; + /* This is temporary working state, to avoid a malloc in gicv3_update() */ + bool seenbetter; +}; + +struct GICv3State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem_dist; /* Distributor */ + MemoryRegion *iomem_redist; /* Redistributor Regions */ + uint32_t *redist_region_count; /* redistributor count within each region */ + uint32_t nb_redist_regions; /* number of redist regions */ + + uint32_t num_cpu; + uint32_t num_irq; + uint32_t revision; + bool security_extn; + bool irq_reset_nonsecure; + bool gicd_no_migration_shift_bug; + + int dev_fd; /* kvm device fd if backed by kvm vgic support */ + Error *migration_blocker; + + /* Distributor */ + + /* for a GIC with the security extensions the NS banked version of this + * register is just an alias of bit 1 of the S banked version. + */ + uint32_t gicd_ctlr; + uint32_t gicd_statusr[2]; + GIC_DECLARE_BITMAP(group); /* GICD_IGROUPR */ + GIC_DECLARE_BITMAP(grpmod); /* GICD_IGRPMODR */ + GIC_DECLARE_BITMAP(enabled); /* GICD_ISENABLER */ + GIC_DECLARE_BITMAP(pending); /* GICD_ISPENDR */ + GIC_DECLARE_BITMAP(active); /* GICD_ISACTIVER */ + GIC_DECLARE_BITMAP(level); /* Current level */ + GIC_DECLARE_BITMAP(edge_trigger); /* GICD_ICFGR even bits */ + uint8_t gicd_ipriority[GICV3_MAXIRQ]; + uint64_t gicd_irouter[GICV3_MAXIRQ]; + /* Cached information: pointer to the cpu i/f for the CPUs specified + * in the IROUTER registers + */ + GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ]; + uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)]; + + GICv3CPUState *cpu; +}; + +#define GICV3_BITMAP_ACCESSORS(BMP) \ + static inline void gicv3_gicd_##BMP##_set(GICv3State *s, int irq) \ + { \ + gic_bmp_set_bit(irq, s->BMP); \ + } \ + static inline int gicv3_gicd_##BMP##_test(GICv3State *s, int irq) \ + { \ + return gic_bmp_test_bit(irq, s->BMP); \ + } \ + static inline void gicv3_gicd_##BMP##_clear(GICv3State *s, int irq) \ + { \ + gic_bmp_clear_bit(irq, s->BMP); \ + } \ + static inline void gicv3_gicd_##BMP##_replace(GICv3State *s, \ + int irq, int value) \ + { \ + gic_bmp_replace_bit(irq, s->BMP, value); \ + } + +GICV3_BITMAP_ACCESSORS(group) +GICV3_BITMAP_ACCESSORS(grpmod) +GICV3_BITMAP_ACCESSORS(enabled) +GICV3_BITMAP_ACCESSORS(pending) +GICV3_BITMAP_ACCESSORS(active) +GICV3_BITMAP_ACCESSORS(level) +GICV3_BITMAP_ACCESSORS(edge_trigger) + +#define TYPE_ARM_GICV3_COMMON "arm-gicv3-common" +typedef struct ARMGICv3CommonClass ARMGICv3CommonClass; +DECLARE_OBJ_CHECKERS(GICv3State, ARMGICv3CommonClass, + ARM_GICV3_COMMON, TYPE_ARM_GICV3_COMMON) + +struct ARMGICv3CommonClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); +}; + +void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + const MemoryRegionOps *ops, Error **errp); + +#endif diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h new file mode 100644 index 000000000..5a0952b40 --- /dev/null +++ b/include/hw/intc/arm_gicv3_its_common.h @@ -0,0 +1,83 @@ +/* + * ITS support for ARM GICv3 + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Written by Pavel Fedin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef QEMU_ARM_GICV3_ITS_COMMON_H +#define QEMU_ARM_GICV3_ITS_COMMON_H + +#include "hw/sysbus.h" +#include "hw/intc/arm_gicv3_common.h" +#include "qom/object.h" + +#define ITS_CONTROL_SIZE 0x10000 +#define ITS_TRANS_SIZE 0x10000 +#define ITS_SIZE (ITS_CONTROL_SIZE + ITS_TRANS_SIZE) + +#define GITS_CTLR 0x0 +#define GITS_IIDR 0x4 +#define GITS_CBASER 0x80 +#define GITS_CWRITER 0x88 +#define GITS_CREADR 0x90 +#define GITS_BASER 0x100 + +struct GICv3ITSState { + SysBusDevice parent_obj; + + MemoryRegion iomem_main; + MemoryRegion iomem_its_cntrl; + MemoryRegion iomem_its_translation; + + GICv3State *gicv3; + + int dev_fd; /* kvm device fd if backed by kvm vgic support */ + uint64_t gits_translater_gpa; + bool translater_gpa_known; + + /* Registers */ + uint32_t ctlr; + uint32_t iidr; + uint64_t cbaser; + uint64_t cwriter; + uint64_t creadr; + uint64_t baser[8]; + + Error *migration_blocker; +}; + +typedef struct GICv3ITSState GICv3ITSState; + +void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops); + +#define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common" +typedef struct GICv3ITSCommonClass GICv3ITSCommonClass; +DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSCommonClass, + ARM_GICV3_ITS_COMMON, TYPE_ARM_GICV3_ITS_COMMON) + +struct GICv3ITSCommonClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + int (*send_msi)(GICv3ITSState *s, uint32_t data, uint16_t devid); + void (*pre_save)(GICv3ITSState *s); + void (*post_load)(GICv3ITSState *s); +}; + + +#endif diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h new file mode 100644 index 000000000..bb087b23c --- /dev/null +++ b/include/hw/intc/armv7m_nvic.h @@ -0,0 +1,95 @@ +/* + * ARMv7M NVIC object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_NVIC_H +#define HW_ARM_ARMV7M_NVIC_H + +#include "target/arm/cpu.h" +#include "hw/sysbus.h" +#include "hw/timer/armv7m_systick.h" +#include "qom/object.h" + +#define TYPE_NVIC "armv7m_nvic" + +typedef struct NVICState NVICState; +DECLARE_INSTANCE_CHECKER(NVICState, NVIC, + TYPE_NVIC) + +/* Highest permitted number of exceptions (architectural limit) */ +#define NVIC_MAX_VECTORS 512 +/* Number of internal exceptions */ +#define NVIC_INTERNAL_VECTORS 16 + +typedef struct VecInfo { + /* Exception priorities can range from -3 to 255; only the unmodifiable + * priority values for RESET, NMI and HardFault can be negative. + */ + int16_t prio; + uint8_t enabled; + uint8_t pending; + uint8_t active; + uint8_t level; /* exceptions <=15 never set level */ +} VecInfo; + +struct NVICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + ARMCPU *cpu; + + VecInfo vectors[NVIC_MAX_VECTORS]; + /* If the v8M security extension is implemented, some of the internal + * exceptions are banked between security states (ie there exists both + * a Secure and a NonSecure version of the exception and its state): + * HardFault, MemManage, UsageFault, SVCall, PendSV, SysTick (R_PJHV) + * The rest (including all the external exceptions) are not banked, though + * they may be configurable to target either Secure or NonSecure state. + * We store the secure exception state in sec_vectors[] for the banked + * exceptions, and otherwise use only vectors[] (including for exceptions + * like SecureFault that unconditionally target Secure state). + * Entries in sec_vectors[] for non-banked exception numbers are unused. + */ + VecInfo sec_vectors[NVIC_INTERNAL_VECTORS]; + /* The PRIGROUP field in AIRCR is banked */ + uint32_t prigroup[M_REG_NUM_BANKS]; + uint8_t num_prio_bits; + + /* v8M NVIC_ITNS state (stored as a bool per bit) */ + bool itns[NVIC_MAX_VECTORS]; + + /* The following fields are all cached state that can be recalculated + * from the vectors[] and sec_vectors[] arrays and the prigroup field: + * - vectpending + * - vectpending_is_secure + * - exception_prio + * - vectpending_prio + */ + unsigned int vectpending; /* highest prio pending enabled exception */ + /* true if vectpending is a banked secure exception, ie it is in + * sec_vectors[] rather than vectors[] + */ + bool vectpending_is_s_banked; + int exception_prio; /* group prio of the highest prio active exception */ + int vectpending_prio; /* group prio of the exeception in vectpending */ + + MemoryRegion sysregmem; + MemoryRegion sysreg_ns_mem; + MemoryRegion systickmem; + MemoryRegion systick_ns_mem; + MemoryRegion container; + + uint32_t num_irq; + qemu_irq excpout; + qemu_irq sysresetreq; + + SysTickState systick[M_REG_NUM_BANKS]; +}; + +#endif diff --git a/include/hw/intc/aspeed_vic.h b/include/hw/intc/aspeed_vic.h new file mode 100644 index 000000000..68d6ab997 --- /dev/null +++ b/include/hw/intc/aspeed_vic.h @@ -0,0 +1,49 @@ +/* + * ASPEED Interrupt Controller (New) + * + * Andrew Jeffery + * + * Copyright 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + * + * Need to add SVIC and CVIC support + */ +#ifndef ASPEED_VIC_H +#define ASPEED_VIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_VIC "aspeed.vic" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedVICState, ASPEED_VIC) + +#define ASPEED_VIC_NR_IRQS 51 + +struct AspeedVICState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq irq; + qemu_irq fiq; + + uint64_t level; + uint64_t raw; + uint64_t select; + uint64_t enable; + uint64_t trigger; + + /* 0=edge, 1=level */ + uint64_t sense; + + /* 0=single-edge, 1=dual-edge */ + uint64_t dual_edge; + + /* 0=low-sensitive/falling-edge, 1=high-sensitive/rising-edge */ + uint64_t event; +}; + +#endif /* ASPEED_VIC_H */ diff --git a/include/hw/intc/bcm2835_ic.h b/include/hw/intc/bcm2835_ic.h new file mode 100644 index 000000000..588eb76c5 --- /dev/null +++ b/include/hw/intc/bcm2835_ic.h @@ -0,0 +1,36 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_IC_H +#define BCM2835_IC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_IC "bcm2835-ic" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835ICState, BCM2835_IC) + +#define BCM2835_IC_GPU_IRQ "gpu-irq" +#define BCM2835_IC_ARM_IRQ "arm-irq" + +struct BCM2835ICState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + MemoryRegion iomem; + qemu_irq irq; + qemu_irq fiq; + + /* 64 GPU IRQs + 8 ARM IRQs = 72 total (GPU first) */ + uint64_t gpu_irq_level, gpu_irq_enable; + uint8_t arm_irq_level, arm_irq_enable; + bool fiq_enable; + uint8_t fiq_select; +}; + +#endif diff --git a/include/hw/intc/bcm2836_control.h b/include/hw/intc/bcm2836_control.h new file mode 100644 index 000000000..a410c817e --- /dev/null +++ b/include/hw/intc/bcm2836_control.h @@ -0,0 +1,61 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous + * + * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft + * Written by Andrew Baumann + * + * ARM Local Timer IRQ Copyright (c) 2019. Zoltán Baldaszti + * Added basic IRQ_TIMER interrupt support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2836_CONTROL_H +#define BCM2836_CONTROL_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" + +/* 4 mailboxes per core, for 16 total */ +#define BCM2836_NCORES 4 +#define BCM2836_MBPERCORE 4 + +#define TYPE_BCM2836_CONTROL "bcm2836-control" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2836ControlState, BCM2836_CONTROL) + +struct BCM2836ControlState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + MemoryRegion iomem; + + /* mailbox state */ + uint32_t mailboxes[BCM2836_NCORES * BCM2836_MBPERCORE]; + + /* interrupt routing/control registers */ + uint8_t route_gpu_irq, route_gpu_fiq; + uint32_t timercontrol[BCM2836_NCORES]; + uint32_t mailboxcontrol[BCM2836_NCORES]; + + /* interrupt status regs (derived from input pins; not visible to user) */ + bool gpu_irq, gpu_fiq; + uint8_t timerirqs[BCM2836_NCORES]; + + /* local timer */ + QEMUTimer timer; + uint32_t local_timer_control; + uint8_t route_localtimer; + + /* interrupt source registers, post-routing (also input-derived; visible) */ + uint32_t irqsrc[BCM2836_NCORES]; + uint32_t fiqsrc[BCM2836_NCORES]; + + /* outputs to CPU cores */ + qemu_irq irq[BCM2836_NCORES]; + qemu_irq fiq[BCM2836_NCORES]; +}; + +#endif diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h new file mode 100644 index 000000000..c0a7f6f54 --- /dev/null +++ b/include/hw/intc/heathrow_pic.h @@ -0,0 +1,52 @@ +/* + * Heathrow PIC support (OldWorld PowerMac) + * + * Copyright (c) 2005-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_INTC_HEATHROW_PIC_H +#define HW_INTC_HEATHROW_PIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_HEATHROW "heathrow" +OBJECT_DECLARE_SIMPLE_TYPE(HeathrowState, HEATHROW) + +typedef struct HeathrowPICState { + uint32_t events; + uint32_t mask; + uint32_t levels; + uint32_t level_triggered; +} HeathrowPICState; + +struct HeathrowState { + SysBusDevice parent_obj; + + MemoryRegion mem; + HeathrowPICState pics[2]; + qemu_irq irqs[1]; +}; + +#define HEATHROW_NUM_IRQS 64 + +#endif /* HW_INTC_HEATHROW_PIC_H */ diff --git a/include/hw/intc/i8259.h b/include/hw/intc/i8259.h new file mode 100644 index 000000000..e2b1e8c59 --- /dev/null +++ b/include/hw/intc/i8259.h @@ -0,0 +1,12 @@ +#ifndef HW_I8259_H +#define HW_I8259_H + +/* i8259.c */ + +extern DeviceState *isa_pic; +qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq); +qemu_irq *kvm_i8259_init(ISABus *bus); +int pic_get_output(DeviceState *d); +int pic_read_irq(DeviceState *d); + +#endif diff --git a/include/hw/intc/ibex_plic.h b/include/hw/intc/ibex_plic.h new file mode 100644 index 000000000..7fc495db9 --- /dev/null +++ b/include/hw/intc/ibex_plic.h @@ -0,0 +1,65 @@ +/* + * QEMU RISC-V lowRISC Ibex PLIC + * + * Copyright (c) 2020 Western Digital + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_IBEX_PLIC_H +#define HW_IBEX_PLIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IBEX_PLIC "ibex-plic" +OBJECT_DECLARE_SIMPLE_TYPE(IbexPlicState, IBEX_PLIC) + +struct IbexPlicState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + + uint32_t *pending; + uint32_t *hidden_pending; + uint32_t *claimed; + uint32_t *source; + uint32_t *priority; + uint32_t *enable; + uint32_t threshold; + uint32_t claim; + + /* config */ + uint32_t num_cpus; + uint32_t num_sources; + + uint32_t pending_base; + uint32_t pending_num; + + uint32_t source_base; + uint32_t source_num; + + uint32_t priority_base; + uint32_t priority_num; + + uint32_t enable_base; + uint32_t enable_num; + + uint32_t threshold_base; + + uint32_t claim_base; +}; + +#endif /* HW_IBEX_PLIC_H */ diff --git a/include/hw/intc/imx_avic.h b/include/hw/intc/imx_avic.h new file mode 100644 index 000000000..75fbd1a89 --- /dev/null +++ b/include/hw/intc/imx_avic.h @@ -0,0 +1,56 @@ +/* + * i.MX31 Vectored Interrupt Controller + * + * Note this is NOT the PL192 provided by ARM, but + * a custom implementation by Freescale. + * + * Copyright (c) 2008 OKL + * Copyright (c) 2011 NICTA Pty Ltd + * Originally written by Hans Jiang + * Updated by Jean-Christophe Dubois + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + * + * TODO: implement vectors. + */ +#ifndef IMX_AVIC_H +#define IMX_AVIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IMX_AVIC "imx.avic" +OBJECT_DECLARE_SIMPLE_TYPE(IMXAVICState, IMX_AVIC) + +#define IMX_AVIC_NUM_IRQS 64 + +/* Interrupt Control Bits */ +#define ABFLAG (1<<25) +#define ABFEN (1<<24) +#define NIDIS (1<<22) /* Normal Interrupt disable */ +#define FIDIS (1<<21) /* Fast interrupt disable */ +#define NIAD (1<<20) /* Normal Interrupt Arbiter Rise ARM level */ +#define FIAD (1<<19) /* Fast Interrupt Arbiter Rise ARM level */ +#define NM (1<<18) /* Normal interrupt mode */ + +#define PRIO_PER_WORD (sizeof(uint32_t) * 8 / 4) +#define PRIO_WORDS (IMX_AVIC_NUM_IRQS/PRIO_PER_WORD) + +struct IMXAVICState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + uint64_t pending; + uint64_t enabled; + uint64_t is_fiq; + uint32_t intcntl; + uint32_t intmask; + qemu_irq irq; + qemu_irq fiq; + uint32_t prio[PRIO_WORDS]; /* Priorities are 4-bits each */ +}; + +#endif /* IMX_AVIC_H */ diff --git a/include/hw/intc/imx_gpcv2.h b/include/hw/intc/imx_gpcv2.h new file mode 100644 index 000000000..7bdee7e80 --- /dev/null +++ b/include/hw/intc/imx_gpcv2.h @@ -0,0 +1,23 @@ +#ifndef IMX_GPCV2_H +#define IMX_GPCV2_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +enum IMXGPCv2Registers { + GPC_NUM = 0xE00 / sizeof(uint32_t), +}; + +struct IMXGPCv2State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + uint32_t regs[GPC_NUM]; +}; + +#define TYPE_IMX_GPCV2 "imx-gpcv2" +OBJECT_DECLARE_SIMPLE_TYPE(IMXGPCv2State, IMX_GPCV2) + +#endif /* IMX_GPCV2_H */ diff --git a/include/hw/intc/intc.h b/include/hw/intc/intc.h new file mode 100644 index 000000000..7018f608c --- /dev/null +++ b/include/hw/intc/intc.h @@ -0,0 +1,28 @@ +#ifndef INTC_H +#define INTC_H + +#include "qom/object.h" + +#define TYPE_INTERRUPT_STATS_PROVIDER "intctrl" + +typedef struct InterruptStatsProviderClass InterruptStatsProviderClass; +DECLARE_CLASS_CHECKERS(InterruptStatsProviderClass, INTERRUPT_STATS_PROVIDER, + TYPE_INTERRUPT_STATS_PROVIDER) +#define INTERRUPT_STATS_PROVIDER(obj) \ + INTERFACE_CHECK(InterruptStatsProvider, (obj), \ + TYPE_INTERRUPT_STATS_PROVIDER) + +typedef struct InterruptStatsProvider InterruptStatsProvider; + +struct InterruptStatsProviderClass { + InterfaceClass parent; + + /* The returned pointer and statistics must remain valid until + * the BQL is next dropped. + */ + bool (*get_statistics)(InterruptStatsProvider *obj, uint64_t **irq_counts, + unsigned int *nb_irqs); + void (*print_info)(InterruptStatsProvider *obj, Monitor *mon); +}; + +#endif diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h new file mode 100644 index 000000000..eeb136e26 --- /dev/null +++ b/include/hw/intc/mips_gic.h @@ -0,0 +1,218 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000, 07 MIPS Technologies, Inc. + * Copyright (C) 2016 Imagination Technologies + * + */ + +#ifndef MIPS_GIC_H +#define MIPS_GIC_H + +#include "qemu/units.h" +#include "hw/timer/mips_gictimer.h" +#include "hw/sysbus.h" +#include "cpu.h" +#include "qom/object.h" +/* + * GIC Specific definitions + */ + +/* The MIPS default location */ +#define GIC_BASE_ADDR 0x1bdc0000ULL +#define GIC_ADDRSPACE_SZ (128 * KiB) + +/* Constants */ +#define GIC_POL_POS 1 +#define GIC_POL_NEG 0 +#define GIC_TRIG_EDGE 1 +#define GIC_TRIG_LEVEL 0 + +#define MSK(n) ((1ULL << (n)) - 1) + +/* GIC Address Space */ +#define SHARED_SECTION_OFS 0x0000 +#define SHARED_SECTION_SIZE 0x8000 +#define VP_LOCAL_SECTION_OFS 0x8000 +#define VP_LOCAL_SECTION_SIZE 0x4000 +#define VP_OTHER_SECTION_OFS 0xc000 +#define VP_OTHER_SECTION_SIZE 0x4000 +#define USM_VISIBLE_SECTION_OFS 0x10000 +#define USM_VISIBLE_SECTION_SIZE 0x10000 + +/* Register Map for Shared Section */ + +#define GIC_SH_CONFIG_OFS 0x0000 + +/* Shared Global Counter */ +#define GIC_SH_COUNTERLO_OFS 0x0010 +#define GIC_SH_COUNTERHI_OFS 0x0014 +#define GIC_SH_REVISIONID_OFS 0x0020 + +/* Set/Clear corresponding bit in Edge Detect Register */ +#define GIC_SH_WEDGE_OFS 0x0280 + +/* Reset Mask - Disables Interrupt */ +#define GIC_SH_RMASK_OFS 0x0300 +#define GIC_SH_RMASK_LAST_OFS 0x031c + +/* Set Mask (WO) - Enables Interrupt */ +#define GIC_SH_SMASK_OFS 0x0380 +#define GIC_SH_SMASK_LAST_OFS 0x039c + +/* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */ +#define GIC_SH_MASK_OFS 0x0400 +#define GIC_SH_MASK_LAST_OFS 0x041c + +/* Pending Global Interrupts (RO) */ +#define GIC_SH_PEND_OFS 0x0480 +#define GIC_SH_PEND_LAST_OFS 0x049c + +#define GIC_SH_MAP0_PIN_OFS 0x0500 +#define GIC_SH_MAP255_PIN_OFS 0x08fc + +#define GIC_SH_MAP0_VP_OFS 0x2000 +#define GIC_SH_MAP255_VP_LAST_OFS 0x3fe4 + +/* Register Map for Local Section */ +#define GIC_VP_CTL_OFS 0x0000 +#define GIC_VP_PEND_OFS 0x0004 +#define GIC_VP_MASK_OFS 0x0008 +#define GIC_VP_RMASK_OFS 0x000c +#define GIC_VP_SMASK_OFS 0x0010 +#define GIC_VP_WD_MAP_OFS 0x0040 +#define GIC_VP_COMPARE_MAP_OFS 0x0044 +#define GIC_VP_TIMER_MAP_OFS 0x0048 +#define GIC_VP_FDC_MAP_OFS 0x004c +#define GIC_VP_PERFCTR_MAP_OFS 0x0050 +#define GIC_VP_SWINT0_MAP_OFS 0x0054 +#define GIC_VP_SWINT1_MAP_OFS 0x0058 +#define GIC_VP_OTHER_ADDR_OFS 0x0080 +#define GIC_VP_IDENT_OFS 0x0088 +#define GIC_VP_WD_CONFIG0_OFS 0x0090 +#define GIC_VP_WD_COUNT0_OFS 0x0094 +#define GIC_VP_WD_INITIAL0_OFS 0x0098 +#define GIC_VP_COMPARE_LO_OFS 0x00a0 +#define GIC_VP_COMPARE_HI_OFS 0x00a4 +#define GIC_VL_BRK_GROUP 0x3080 + +/* User-Mode Visible Section Register */ +/* Read-only alias for GIC Shared CounterLo */ +#define GIC_USER_MODE_COUNTERLO 0x0000 +/* Read-only alias for GIC Shared CounterHi */ +#define GIC_USER_MODE_COUNTERHI 0x0004 + +/* Masks */ +#define GIC_SH_CONFIG_COUNTSTOP_SHF 28 +#define GIC_SH_CONFIG_COUNTSTOP_MSK (MSK(1) << GIC_SH_CONFIG_COUNTSTOP_SHF) +#define GIC_SH_CONFIG_COUNTBITS_SHF 24 +#define GIC_SH_CONFIG_COUNTBITS_MSK (MSK(4) << GIC_SH_CONFIG_COUNTBITS_SHF) +#define GIC_SH_CONFIG_NUMINTRS_SHF 16 +#define GIC_SH_CONFIG_NUMINTRS_MSK (MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF) +#define GIC_SH_CONFIG_PVPS_SHF 0 +#define GIC_SH_CONFIG_PVPS_MSK (MSK(8) << GIC_SH_CONFIG_NUMVPS_SHF) + +#define GIC_SH_WEDGE_RW_SHF 31 +#define GIC_SH_WEDGE_RW_MSK (MSK(1) << GIC_SH_WEDGE_RW_SHF) + +#define GIC_MAP_TO_PIN_SHF 31 +#define GIC_MAP_TO_PIN_MSK (MSK(1) << GIC_MAP_TO_PIN_SHF) +#define GIC_MAP_TO_NMI_SHF 30 +#define GIC_MAP_TO_NMI_MSK (MSK(1) << GIC_MAP_TO_NMI_SHF) +#define GIC_MAP_TO_YQ_SHF 29 +#define GIC_MAP_TO_YQ_MSK (MSK(1) << GIC_MAP_TO_YQ_SHF) +#define GIC_MAP_SHF 0 +#define GIC_MAP_MSK (MSK(6) << GIC_MAP_SHF) +#define GIC_MAP_TO_PIN_REG_MSK \ + (GIC_MAP_TO_PIN_MSK | GIC_MAP_TO_NMI_MSK | GIC_MAP_TO_YQ_MSK | GIC_MAP_MSK) + +/* GIC_VP_CTL Masks */ +#define GIC_VP_CTL_FDC_RTBL_SHF 4 +#define GIC_VP_CTL_FDC_RTBL_MSK (MSK(1) << GIC_VP_CTL_FDC_RTBL_SHF) +#define GIC_VP_CTL_SWINT_RTBL_SHF 3 +#define GIC_VP_CTL_SWINT_RTBL_MSK (MSK(1) << GIC_VP_CTL_SWINT_RTBL_SHF) +#define GIC_VP_CTL_PERFCNT_RTBL_SHF 2 +#define GIC_VP_CTL_PERFCNT_RTBL_MSK (MSK(1) << GIC_VP_CTL_PERFCNT_RTBL_SHF) +#define GIC_VP_CTL_TIMER_RTBL_SHF 1 +#define GIC_VP_CTL_TIMER_RTBL_MSK (MSK(1) << GIC_VP_CTL_TIMER_RTBL_SHF) +#define GIC_VP_CTL_EIC_MODE_SHF 0 +#define GIC_VP_CTL_EIC_MODE_MSK (MSK(1) << GIC_VP_CTL_EIC_MODE_SHF) + +/* GIC_VP_MASK Masks */ +#define GIC_VP_MASK_FDC_SHF 6 +#define GIC_VP_MASK_FDC_MSK (MSK(1) << GIC_VP_MASK_FDC_SHF) +#define GIC_VP_MASK_SWINT1_SHF 5 +#define GIC_VP_MASK_SWINT1_MSK (MSK(1) << GIC_VP_MASK_SWINT1_SHF) +#define GIC_VP_MASK_SWINT0_SHF 4 +#define GIC_VP_MASK_SWINT0_MSK (MSK(1) << GIC_VP_MASK_SWINT0_SHF) +#define GIC_VP_MASK_PERFCNT_SHF 3 +#define GIC_VP_MASK_PERFCNT_MSK (MSK(1) << GIC_VP_MASK_PERFCNT_SHF) +#define GIC_VP_MASK_TIMER_SHF 2 +#define GIC_VP_MASK_TIMER_MSK (MSK(1) << GIC_VP_MASK_TIMER_SHF) +#define GIC_VP_MASK_CMP_SHF 1 +#define GIC_VP_MASK_CMP_MSK (MSK(1) << GIC_VP_MASK_CMP_SHF) +#define GIC_VP_MASK_WD_SHF 0 +#define GIC_VP_MASK_WD_MSK (MSK(1) << GIC_VP_MASK_WD_SHF) +#define GIC_VP_SET_RESET_MSK (MSK(7) << GIC_VP_MASK_WD_SHF) + +#define GIC_CPU_INT_MAX 5 /* Core Interrupt 7 */ +#define GIC_CPU_PIN_OFFSET 2 + +/* Local GIC interrupts. */ +#define GIC_NUM_LOCAL_INTRS 7 +#define GIC_LOCAL_INT_FDC 6 /* CPU fast debug channel */ +#define GIC_LOCAL_INT_SWINT1 5 /* CPU software interrupt 1 */ +#define GIC_LOCAL_INT_SWINT0 4 /* CPU software interrupt 0 */ +#define GIC_LOCAL_INT_PERFCTR 3 /* CPU performance counter */ +#define GIC_LOCAL_INT_TIMER 2 /* CPU timer interrupt */ +#define GIC_LOCAL_INT_COMPARE 1 /* GIC count and compare timer */ +#define GIC_LOCAL_INT_WD 0 /* GIC watchdog */ + +#define TYPE_MIPS_GIC "mips-gic" +OBJECT_DECLARE_SIMPLE_TYPE(MIPSGICState, MIPS_GIC) + +/* Support up to 32 VPs and 256 IRQs */ +#define GIC_MAX_VPS 32 +#define GIC_MAX_INTRS 256 + +typedef struct MIPSGICIRQState MIPSGICIRQState; +typedef struct MIPSGICVPState MIPSGICVPState; + +struct MIPSGICIRQState { + uint8_t enabled; + uint8_t pending; + uint32_t map_pin; + int32_t map_vp; + qemu_irq irq; +}; + +struct MIPSGICVPState { + uint32_t ctl; + uint32_t pend; + uint32_t mask; + uint32_t compare_map; + uint32_t other_addr; + CPUMIPSState *env; +}; + +struct MIPSGICState { + SysBusDevice parent_obj; + MemoryRegion mr; + + /* Shared Section Registers */ + uint32_t sh_config; + MIPSGICIRQState *irq_state; + + /* VP Local/Other Section Registers */ + MIPSGICVPState *vps; + + /* GIC VP Timer */ + MIPSGICTimerState *gic_timer; + + int32_t num_vps; + int32_t num_irq; +}; + +#endif /* MIPS_GIC_H */ diff --git a/include/hw/intc/realview_gic.h b/include/hw/intc/realview_gic.h new file mode 100644 index 000000000..f37339dc0 --- /dev/null +++ b/include/hw/intc/realview_gic.h @@ -0,0 +1,28 @@ +/* + * ARM RealView Emulation Baseboard Interrupt Controller + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ + +#ifndef HW_INTC_REALVIEW_GIC_H +#define HW_INTC_REALVIEW_GIC_H + +#include "hw/sysbus.h" +#include "hw/intc/arm_gic.h" +#include "qom/object.h" + +#define TYPE_REALVIEW_GIC "realview_gic" +OBJECT_DECLARE_SIMPLE_TYPE(RealViewGICState, REALVIEW_GIC) + +struct RealViewGICState { + SysBusDevice parent_obj; + + MemoryRegion container; + + GICState gic; +}; + +#endif diff --git a/include/hw/intc/rx_icu.h b/include/hw/intc/rx_icu.h new file mode 100644 index 000000000..7f5889b36 --- /dev/null +++ b/include/hw/intc/rx_icu.h @@ -0,0 +1,76 @@ +/* + * RX Interrupt Control Unit + * + * Copyright (c) 2019 Yoshinori Sato + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_INTC_RX_ICU_H +#define HW_INTC_RX_ICU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +enum TRG_MODE { + TRG_LEVEL = 0, + TRG_NEDGE = 1, /* Falling */ + TRG_PEDGE = 2, /* Raising */ + TRG_BEDGE = 3, /* Both */ +}; + +struct IRQSource { + enum TRG_MODE sense; + int level; +}; + +enum { + /* Software interrupt request */ + SWI = 27, + NR_IRQS = 256 +}; + +struct RXICUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion memory; + struct IRQSource src[NR_IRQS]; + uint32_t nr_irqs; + uint8_t *map; + uint32_t nr_sense; + uint8_t *init_sense; + + uint8_t ir[NR_IRQS]; + uint8_t dtcer[NR_IRQS]; + uint8_t ier[NR_IRQS / 8]; + uint8_t ipr[142]; + uint8_t dmasr[4]; + uint16_t fir; + uint8_t nmisr; + uint8_t nmier; + uint8_t nmiclr; + uint8_t nmicr; + int16_t req_irq; + qemu_irq _irq; + qemu_irq _fir; + qemu_irq _swi; +}; + +#define TYPE_RX_ICU "rx-icu" +OBJECT_DECLARE_SIMPLE_TYPE(RXICUState, RX_ICU) + +#endif /* RX_ICU_H */ diff --git a/include/hw/intc/sifive_clint.h b/include/hw/intc/sifive_clint.h new file mode 100644 index 000000000..a30be0f3d --- /dev/null +++ b/include/hw/intc/sifive_clint.h @@ -0,0 +1,60 @@ +/* + * SiFive CLINT (Core Local Interruptor) interface + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_CLINT_H +#define HW_SIFIVE_CLINT_H + +#include "hw/sysbus.h" + +#define TYPE_SIFIVE_CLINT "riscv.sifive.clint" + +#define SIFIVE_CLINT(obj) \ + OBJECT_CHECK(SiFiveCLINTState, (obj), TYPE_SIFIVE_CLINT) + +typedef struct SiFiveCLINTState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t hartid_base; + uint32_t num_harts; + uint32_t sip_base; + uint32_t timecmp_base; + uint32_t time_base; + uint32_t aperture_size; + uint32_t timebase_freq; +} SiFiveCLINTState; + +DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, + uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base, + uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq, + bool provide_rdtime); + +enum { + SIFIVE_SIP_BASE = 0x0, + SIFIVE_TIMECMP_BASE = 0x4000, + SIFIVE_TIME_BASE = 0xBFF8 +}; + +enum { + SIFIVE_CLINT_TIMEBASE_FREQ = 10000000 +}; + +#endif diff --git a/include/hw/intc/sifive_plic.h b/include/hw/intc/sifive_plic.h new file mode 100644 index 000000000..1e451a270 --- /dev/null +++ b/include/hw/intc/sifive_plic.h @@ -0,0 +1,84 @@ +/* + * SiFive PLIC (Platform Level Interrupt Controller) interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This provides a RISC-V PLIC device + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_PLIC_H +#define HW_SIFIVE_PLIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_SIFIVE_PLIC "riscv.sifive.plic" + +typedef struct SiFivePLICState SiFivePLICState; +DECLARE_INSTANCE_CHECKER(SiFivePLICState, SIFIVE_PLIC, + TYPE_SIFIVE_PLIC) + +typedef enum PLICMode { + PLICMode_U, + PLICMode_S, + PLICMode_H, + PLICMode_M +} PLICMode; + +typedef struct PLICAddr { + uint32_t addrid; + uint32_t hartid; + PLICMode mode; +} PLICAddr; + +struct SiFivePLICState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t num_addrs; + uint32_t num_harts; + uint32_t bitfield_words; + uint32_t num_enables; + PLICAddr *addr_config; + uint32_t *source_priority; + uint32_t *target_priority; + uint32_t *pending; + uint32_t *claimed; + uint32_t *enable; + + /* config */ + char *hart_config; + uint32_t hartid_base; + uint32_t num_sources; + uint32_t num_priorities; + uint32_t priority_base; + uint32_t pending_base; + uint32_t enable_base; + uint32_t enable_stride; + uint32_t context_base; + uint32_t context_stride; + uint32_t aperture_size; +}; + +DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, + uint32_t hartid_base, uint32_t num_sources, + uint32_t num_priorities, uint32_t priority_base, + uint32_t pending_base, uint32_t enable_base, + uint32_t enable_stride, uint32_t context_base, + uint32_t context_stride, uint32_t aperture_size); + +#endif diff --git a/include/hw/intc/xlnx-pmu-iomod-intc.h b/include/hw/intc/xlnx-pmu-iomod-intc.h new file mode 100644 index 000000000..ccc8bd272 --- /dev/null +++ b/include/hw/intc/xlnx-pmu-iomod-intc.h @@ -0,0 +1,57 @@ +/* + * QEMU model of Xilinx I/O Module Interrupt Controller + * + * Copyright (c) 2014 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_INTC_XLNX_PMU_IOMOD_INTC_H +#define HW_INTC_XLNX_PMU_IOMOD_INTC_H + +#include "hw/sysbus.h" +#include "hw/register.h" +#include "qom/object.h" + +#define TYPE_XLNX_PMU_IO_INTC "xlnx.pmu_io_intc" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxPMUIOIntc, XLNX_PMU_IO_INTC) + +/* This is R_PIT3_CONTROL + 1 */ +#define XLNXPMUIOINTC_R_MAX (0x78 + 1) + +struct XlnxPMUIOIntc { + SysBusDevice parent_obj; + MemoryRegion iomem; + + qemu_irq parent_irq; + + struct { + uint32_t intr_size; + uint32_t level_edge; + uint32_t positive; + } cfg; + + uint32_t irq_raw; + + uint32_t regs[XLNXPMUIOINTC_R_MAX]; + RegisterInfo regs_info[XLNXPMUIOINTC_R_MAX]; +}; + +#endif /* HW_INTC_XLNX_PMU_IOMOD_INTC_H */ diff --git a/include/hw/intc/xlnx-zynqmp-ipi.h b/include/hw/intc/xlnx-zynqmp-ipi.h new file mode 100644 index 000000000..33eff1d4f --- /dev/null +++ b/include/hw/intc/xlnx-zynqmp-ipi.h @@ -0,0 +1,56 @@ +/* + * QEMU model of the IPI Inter Processor Interrupt block + * + * Copyright (c) 2014 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XLNX_ZYNQMP_IPI_H +#define XLNX_ZYNQMP_IPI_H + +#include "hw/sysbus.h" +#include "hw/register.h" +#include "qom/object.h" + +#define TYPE_XLNX_ZYNQMP_IPI "xlnx.zynqmp_ipi" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPIPI, XLNX_ZYNQMP_IPI) + +/* This is R_IPI_IDR + 1 */ +#define R_XLNX_ZYNQMP_IPI_MAX ((0x1c / 4) + 1) + +#define NUM_IPIS 11 + +struct XlnxZynqMPIPI { + /* Private */ + SysBusDevice parent_obj; + + /* Public */ + MemoryRegion iomem; + qemu_irq irq; + + qemu_irq irq_trig_out[NUM_IPIS]; + qemu_irq irq_obs_out[NUM_IPIS]; + + uint32_t regs[R_XLNX_ZYNQMP_IPI_MAX]; + RegisterInfo regs_info[R_XLNX_ZYNQMP_IPI_MAX]; +}; + +#endif /* XLNX_ZYNQMP_IPI_H */ diff --git a/include/hw/ipack/ipack.h b/include/hw/ipack/ipack.h new file mode 100644 index 000000000..75014e74a --- /dev/null +++ b/include/hw/ipack/ipack.h @@ -0,0 +1,81 @@ +/* + * QEMU IndustryPack emulation + * + * Copyright (C) 2012 Igalia, S.L. + * Author: Alberto Garcia + * + * This code is licensed under the GNU GPL v2 or (at your option) any + * later version. + */ + +#ifndef QEMU_IPACK_H +#define QEMU_IPACK_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + + +#define TYPE_IPACK_BUS "IndustryPack" +OBJECT_DECLARE_SIMPLE_TYPE(IPackBus, IPACK_BUS) + +struct IPackBus { + /*< private >*/ + BusState parent_obj; + + /* All fields are private */ + uint8_t n_slots; + uint8_t free_slot; + qemu_irq_handler set_irq; +}; + + +#define TYPE_IPACK_DEVICE "ipack-device" +OBJECT_DECLARE_TYPE(IPackDevice, IPackDeviceClass, + IPACK_DEVICE) + +struct IPackDeviceClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + + DeviceRealize realize; + DeviceUnrealize unrealize; + + uint16_t (*io_read)(IPackDevice *dev, uint8_t addr); + void (*io_write)(IPackDevice *dev, uint8_t addr, uint16_t val); + + uint16_t (*id_read)(IPackDevice *dev, uint8_t addr); + void (*id_write)(IPackDevice *dev, uint8_t addr, uint16_t val); + + uint16_t (*int_read)(IPackDevice *dev, uint8_t addr); + void (*int_write)(IPackDevice *dev, uint8_t addr, uint16_t val); + + uint16_t (*mem_read16)(IPackDevice *dev, uint32_t addr); + void (*mem_write16)(IPackDevice *dev, uint32_t addr, uint16_t val); + + uint8_t (*mem_read8)(IPackDevice *dev, uint32_t addr); + void (*mem_write8)(IPackDevice *dev, uint32_t addr, uint8_t val); +}; + +struct IPackDevice { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + int32_t slot; + /* IRQ objects for the IndustryPack INT0# and INT1# */ + qemu_irq *irq; +}; + +extern const VMStateDescription vmstate_ipack_device; + +#define VMSTATE_IPACK_DEVICE(_field, _state) \ + VMSTATE_STRUCT(_field, _state, 1, vmstate_ipack_device, IPackDevice) + +IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot); +void ipack_bus_new_inplace(IPackBus *bus, size_t bus_size, + DeviceState *parent, + const char *name, uint8_t n_slots, + qemu_irq_handler handler); + +#endif diff --git a/include/hw/ipmi/ipmi.h b/include/hw/ipmi/ipmi.h new file mode 100644 index 000000000..77a7213ed --- /dev/null +++ b/include/hw/ipmi/ipmi.h @@ -0,0 +1,305 @@ +/* + * IPMI base class + * + * Copyright (c) 2015 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_IPMI_H +#define HW_IPMI_H + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define MAX_IPMI_MSG_SIZE 300 + +enum ipmi_op { + IPMI_RESET_CHASSIS, + IPMI_POWEROFF_CHASSIS, + IPMI_POWERON_CHASSIS, + IPMI_POWERCYCLE_CHASSIS, + IPMI_PULSE_DIAG_IRQ, + IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, + IPMI_SEND_NMI +}; + +#define IPMI_CC_INVALID_CMD 0xc1 +#define IPMI_CC_COMMAND_INVALID_FOR_LUN 0xc2 +#define IPMI_CC_TIMEOUT 0xc3 +#define IPMI_CC_OUT_OF_SPACE 0xc4 +#define IPMI_CC_INVALID_RESERVATION 0xc5 +#define IPMI_CC_REQUEST_DATA_TRUNCATED 0xc6 +#define IPMI_CC_REQUEST_DATA_LENGTH_INVALID 0xc7 +#define IPMI_CC_PARM_OUT_OF_RANGE 0xc9 +#define IPMI_CC_CANNOT_RETURN_REQ_NUM_BYTES 0xca +#define IPMI_CC_REQ_ENTRY_NOT_PRESENT 0xcb +#define IPMI_CC_INVALID_DATA_FIELD 0xcc +#define IPMI_CC_BMC_INIT_IN_PROGRESS 0xd2 +#define IPMI_CC_COMMAND_NOT_SUPPORTED 0xd5 +#define IPMI_CC_UNSPECIFIED 0xff + +#define IPMI_NETFN_APP 0x06 +#define IPMI_NETFN_OEM 0x3a + +#define IPMI_DEBUG 1 + +/* Specified in the SMBIOS spec. */ +#define IPMI_SMBIOS_KCS 0x01 +#define IPMI_SMBIOS_SMIC 0x02 +#define IPMI_SMBIOS_BT 0x03 +#define IPMI_SMBIOS_SSIF 0x04 + +/* + * Used for transferring information to interfaces that add + * entries to firmware tables. + */ +typedef struct IPMIFwInfo { + const char *interface_name; + int interface_type; + uint8_t ipmi_spec_major_revision; + uint8_t ipmi_spec_minor_revision; + uint8_t i2c_slave_address; + uint32_t uuid; + + uint64_t base_address; + uint64_t register_length; + uint8_t register_spacing; + enum { + IPMI_MEMSPACE_IO, + IPMI_MEMSPACE_MEM32, + IPMI_MEMSPACE_MEM64, + IPMI_MEMSPACE_SMBUS + } memspace; + + int interrupt_number; + enum { + IPMI_LEVEL_IRQ, + IPMI_EDGE_IRQ + } irq_type; +} IPMIFwInfo; + +/* + * Called by each instantiated IPMI interface device to get it's uuid. + */ +uint32_t ipmi_next_uuid(void); + +/* IPMI Interface types (KCS, SMIC, BT) are prefixed with this */ +#define TYPE_IPMI_INTERFACE_PREFIX "ipmi-interface-" + +/* + * An IPMI Interface, the interface for talking between the target + * and the BMC. + */ +#define TYPE_IPMI_INTERFACE "ipmi-interface" +#define IPMI_INTERFACE(obj) \ + INTERFACE_CHECK(IPMIInterface, (obj), TYPE_IPMI_INTERFACE) +typedef struct IPMIInterfaceClass IPMIInterfaceClass; +DECLARE_CLASS_CHECKERS(IPMIInterfaceClass, IPMI_INTERFACE, + TYPE_IPMI_INTERFACE) + +typedef struct IPMIInterface IPMIInterface; + +struct IPMIInterfaceClass { + InterfaceClass parent; + + /* + * min_size is the requested I/O size and must be a power of 2. + * This is so PCI (or other busses) can request a bigger range. + * Use 0 for the default. + */ + void (*init)(struct IPMIInterface *s, unsigned int min_size, Error **errp); + + /* + * Perform various operations on the hardware. If checkonly is + * true, it will return if the operation can be performed, but it + * will not do the operation. + */ + int (*do_hw_op)(struct IPMIInterface *s, enum ipmi_op op, int checkonly); + + /* + * Enable/disable irqs on the interface when the BMC requests this. + */ + void (*set_irq_enable)(struct IPMIInterface *s, int val); + + /* + * Handle an event that occurred on the interface, generally the. + * target writing to a register. + */ + void (*handle_if_event)(struct IPMIInterface *s); + + /* + * The interfaces use this to perform certain ops + */ + void (*set_atn)(struct IPMIInterface *s, int val, int irq); + + /* + * Got an IPMI warm/cold reset. + */ + void (*reset)(struct IPMIInterface *s, bool is_cold); + + /* + * Handle a response from the bmc. + */ + void (*handle_rsp)(struct IPMIInterface *s, uint8_t msg_id, + unsigned char *rsp, unsigned int rsp_len); + + /* + * Set by the owner to hold the backend data for the interface. + */ + void *(*get_backend_data)(struct IPMIInterface *s); + + /* + * Return the firmware info for a device. + */ + void (*get_fwinfo)(struct IPMIInterface *s, IPMIFwInfo *info); +}; + +/* + * Define a BMC simulator (or perhaps a connection to a real BMC) + */ +#define TYPE_IPMI_BMC "ipmi-bmc" +OBJECT_DECLARE_TYPE(IPMIBmc, IPMIBmcClass, + IPMI_BMC) + +struct IPMIBmc { + DeviceState parent; + + uint8_t slave_addr; + + IPMIInterface *intf; +}; + +struct IPMIBmcClass { + DeviceClass parent; + + /* Called when the system resets to report to the bmc. */ + void (*handle_reset)(struct IPMIBmc *s); + + /* + * Handle a command to the bmc. + */ + void (*handle_command)(struct IPMIBmc *s, + uint8_t *cmd, unsigned int cmd_len, + unsigned int max_cmd_len, + uint8_t msg_id); +}; + +/* + * Add a link property to obj that points to a BMC. + */ +void ipmi_bmc_find_and_link(Object *obj, Object **bmc); + +#ifdef IPMI_DEBUG +#define ipmi_debug(fs, ...) \ + fprintf(stderr, "IPMI (%s): " fs, __func__, ##__VA_ARGS__) +#else +#define ipmi_debug(fs, ...) +#endif + +struct ipmi_sdr_header { + uint8_t rec_id[2]; + uint8_t sdr_version; /* 0x51 */ + uint8_t rec_type; + uint8_t rec_length; +}; +#define IPMI_SDR_HEADER_SIZE sizeof(struct ipmi_sdr_header) + +#define ipmi_sdr_recid(sdr) ((sdr)->rec_id[0] | ((sdr)->rec_id[1] << 8)) +#define ipmi_sdr_length(sdr) ((sdr)->rec_length + IPMI_SDR_HEADER_SIZE) + +/* + * 43.2 SDR Type 02h. Compact Sensor Record + */ +#define IPMI_SDR_COMPACT_TYPE 2 + +struct ipmi_sdr_compact { + struct ipmi_sdr_header header; + + uint8_t sensor_owner_id; + uint8_t sensor_owner_lun; + uint8_t sensor_owner_number; /* byte 8 */ + uint8_t entity_id; + uint8_t entity_instance; + uint8_t sensor_init; + uint8_t sensor_caps; + uint8_t sensor_type; + uint8_t reading_type; + uint8_t assert_mask[2]; /* byte 16 */ + uint8_t deassert_mask[2]; + uint8_t discrete_mask[2]; + uint8_t sensor_unit1; + uint8_t sensor_unit2; + uint8_t sensor_unit3; + uint8_t sensor_direction[2]; /* byte 24 */ + uint8_t positive_threshold; + uint8_t negative_threshold; + uint8_t reserved[3]; + uint8_t oem; + uint8_t id_str_len; /* byte 32 */ + uint8_t id_string[16]; +}; + +typedef uint8_t ipmi_sdr_compact_buffer[sizeof(struct ipmi_sdr_compact)]; + +int ipmi_bmc_sdr_find(IPMIBmc *b, uint16_t recid, + const struct ipmi_sdr_compact **sdr, uint16_t *nextrec); +void ipmi_bmc_gen_event(IPMIBmc *b, uint8_t *evt, bool log); + +#define TYPE_IPMI_BMC_SIMULATOR "ipmi-bmc-sim" +OBJECT_DECLARE_SIMPLE_TYPE(IPMIBmcSim, IPMI_BMC_SIMULATOR) + + +typedef struct RspBuffer { + uint8_t buffer[MAX_IPMI_MSG_SIZE]; + unsigned int len; +} RspBuffer; + +static inline void rsp_buffer_set_error(RspBuffer *rsp, uint8_t byte) +{ + rsp->buffer[2] = byte; +} + +/* Add a byte to the response. */ +static inline void rsp_buffer_push(RspBuffer *rsp, uint8_t byte) +{ + if (rsp->len >= sizeof(rsp->buffer)) { + rsp_buffer_set_error(rsp, IPMI_CC_REQUEST_DATA_TRUNCATED); + return; + } + rsp->buffer[rsp->len++] = byte; +} + +typedef struct IPMICmdHandler { + void (*cmd_handler)(IPMIBmcSim *s, + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp); + unsigned int cmd_len_min; +} IPMICmdHandler; + +typedef struct IPMINetfn { + unsigned int cmd_nums; + const IPMICmdHandler *cmd_handlers; +} IPMINetfn; + +int ipmi_sim_register_netfn(IPMIBmcSim *s, unsigned int netfn, + const IPMINetfn *netfnd); + +#endif diff --git a/include/hw/ipmi/ipmi_bt.h b/include/hw/ipmi/ipmi_bt.h new file mode 100644 index 000000000..8a4316ea7 --- /dev/null +++ b/include/hw/ipmi/ipmi_bt.h @@ -0,0 +1,73 @@ +/* + * QEMU IPMI BT emulation + * + * Copyright (c) 2015 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_IPMI_BT_H +#define HW_IPMI_BT_H + +#include "hw/ipmi/ipmi.h" + +typedef struct IPMIBT { + IPMIBmc *bmc; + + bool do_wake; + + bool obf_irq_set; + bool atn_irq_set; + bool irqs_enabled; + + uint8_t outmsg[MAX_IPMI_MSG_SIZE]; + uint32_t outpos; + uint32_t outlen; + + uint8_t inmsg[MAX_IPMI_MSG_SIZE]; + uint32_t inlen; + + uint8_t control_reg; + uint8_t mask_reg; + + /* + * This is a response number that we send with the command to make + * sure that the response matches the command. + */ + uint8_t waiting_rsp; + uint8_t waiting_seq; + + uint32_t io_base; + unsigned long io_length; + MemoryRegion io; + unsigned long size_mask; + + void (*raise_irq)(struct IPMIBT *ib); + void (*lower_irq)(struct IPMIBT *ib); + void *opaque; + + bool use_irq; +} IPMIBT; + +void ipmi_bt_get_fwinfo(IPMIBT *ik, IPMIFwInfo *info); +void ipmi_bt_class_init(IPMIInterfaceClass *iic); +extern const VMStateDescription vmstate_IPMIBT; +int ipmi_bt_vmstate_post_load(void *opaque, int version); + +#endif /* HW_IPMI_BT_H */ diff --git a/include/hw/ipmi/ipmi_kcs.h b/include/hw/ipmi/ipmi_kcs.h new file mode 100644 index 000000000..6e6ef4c53 --- /dev/null +++ b/include/hw/ipmi/ipmi_kcs.h @@ -0,0 +1,76 @@ +/* + * QEMU IPMI KCS emulation + * + * Copyright (c) 2015,2017 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_IPMI_KCS_H +#define HW_IPMI_KCS_H + +#include "hw/ipmi/ipmi.h" + +typedef struct IPMIKCS { + IPMIBmc *bmc; + + bool do_wake; + + bool obf_irq_set; + bool atn_irq_set; + bool irqs_enabled; + + uint8_t outmsg[MAX_IPMI_MSG_SIZE]; + uint32_t outpos; + uint32_t outlen; + + uint8_t inmsg[MAX_IPMI_MSG_SIZE]; + uint32_t inlen; + bool write_end; + + uint8_t status_reg; + uint8_t data_out_reg; + + int16_t data_in_reg; /* -1 means not written */ + int16_t cmd_reg; + + /* + * This is a response number that we send with the command to make + * sure that the response matches the command. + */ + uint8_t waiting_rsp; + + uint32_t io_base; + unsigned long io_length; + MemoryRegion io; + unsigned long size_mask; + + void (*raise_irq)(struct IPMIKCS *ik); + void (*lower_irq)(struct IPMIKCS *ik); + void *opaque; + + bool use_irq; +} IPMIKCS; + +void ipmi_kcs_get_fwinfo(IPMIKCS *ik, IPMIFwInfo *info); +void ipmi_kcs_class_init(IPMIInterfaceClass *iic); +extern const VMStateDescription vmstate_IPMIKCS; +int ipmi_kcs_vmstate_post_load(void *opaque, int version); + +#endif /* HW_IPMI_KCS_H */ diff --git a/include/hw/irq.h b/include/hw/irq.h new file mode 100644 index 000000000..dc7abf199 --- /dev/null +++ b/include/hw/irq.h @@ -0,0 +1,76 @@ +#ifndef QEMU_IRQ_H +#define QEMU_IRQ_H + +/* Generic IRQ/GPIO pin infrastructure. */ + +#define TYPE_IRQ "irq" + +void qemu_set_irq(qemu_irq irq, int level); + +static inline void qemu_irq_raise(qemu_irq irq) +{ + qemu_set_irq(irq, 1); +} + +static inline void qemu_irq_lower(qemu_irq irq) +{ + qemu_set_irq(irq, 0); +} + +static inline void qemu_irq_pulse(qemu_irq irq) +{ + qemu_set_irq(irq, 1); + qemu_set_irq(irq, 0); +} + +/* Returns an array of N IRQs. Each IRQ is assigned the argument handler and + * opaque data. + */ +qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n); + +/* + * Allocates a single IRQ. The irq is assigned with a handler, an opaque + * data and the interrupt number. + */ +qemu_irq qemu_allocate_irq(qemu_irq_handler handler, void *opaque, int n); + +/* Extends an Array of IRQs. Old IRQs have their handlers and opaque data + * preserved. New IRQs are assigned the argument handler and opaque data. + */ +qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler, + void *opaque, int n); + +void qemu_free_irqs(qemu_irq *s, int n); +void qemu_free_irq(qemu_irq irq); + +/* Returns a new IRQ with opposite polarity. */ +qemu_irq qemu_irq_invert(qemu_irq irq); + +/* Returns a new IRQ which feeds into both the passed IRQs. + * It's probably better to use the TYPE_SPLIT_IRQ device instead. + */ +qemu_irq qemu_irq_split(qemu_irq irq1, qemu_irq irq2); + +/* For internal use in qtest. Similar to qemu_irq_split, but operating + on an existing vector of qemu_irq. */ +void qemu_irq_intercept_in(qemu_irq *gpio_in, qemu_irq_handler handler, int n); + +/** + * qemu_irq_is_connected: Return true if IRQ line is wired up + * + * If a qemu_irq has a device on the other (receiving) end of it, + * return true; otherwise return false. + * + * Usually device models don't need to care whether the machine model + * has wired up their outbound qemu_irq lines, because functions like + * qemu_set_irq() silently do nothing if there is nothing on the other + * end of the line. However occasionally a device model will want to + * provide default behaviour if its output is left floating, and + * it can use this function to identify when that is the case. + */ +static inline bool qemu_irq_is_connected(qemu_irq irq) +{ + return irq != NULL; +} + +#endif diff --git a/include/hw/isa/apm.h b/include/hw/isa/apm.h new file mode 100644 index 000000000..b6e070c00 --- /dev/null +++ b/include/hw/isa/apm.h @@ -0,0 +1,25 @@ +#ifndef APM_H +#define APM_H + +#include "exec/memory.h" + +#define APM_CNT_IOPORT 0xb2 +#define ACPI_PORT_SMI_CMD APM_CNT_IOPORT + +typedef void (*apm_ctrl_changed_t)(uint32_t val, void *arg); + +typedef struct APMState { + uint8_t apmc; + uint8_t apms; + + apm_ctrl_changed_t callback; + void *arg; + MemoryRegion io; +} APMState; + +void apm_init(PCIDevice *dev, APMState *s, apm_ctrl_changed_t callback, + void *arg); + +extern const VMStateDescription vmstate_apm; + +#endif /* APM_H */ diff --git a/include/hw/isa/i8259_internal.h b/include/hw/isa/i8259_internal.h new file mode 100644 index 000000000..a6ae8a583 --- /dev/null +++ b/include/hw/isa/i8259_internal.h @@ -0,0 +1,79 @@ +/* + * QEMU 8259 - internal interfaces + * + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_I8259_INTERNAL_H +#define QEMU_I8259_INTERNAL_H + +#include "hw/isa/isa.h" +#include "hw/intc/intc.h" +#include "hw/intc/i8259.h" +#include "qom/object.h" + + +#define TYPE_PIC_COMMON "pic-common" +OBJECT_DECLARE_TYPE(PICCommonState, PICCommonClass, PIC_COMMON) + +struct PICCommonClass { + ISADeviceClass parent_class; + + void (*pre_save)(PICCommonState *s); + void (*post_load)(PICCommonState *s); +}; + +struct PICCommonState { + ISADevice parent_obj; + + uint8_t last_irr; /* edge detection */ + uint8_t irr; /* interrupt request register */ + uint8_t imr; /* interrupt mask register */ + uint8_t isr; /* interrupt service register */ + uint8_t priority_add; /* highest irq priority */ + uint8_t irq_base; + uint8_t read_reg_select; + uint8_t poll; + uint8_t special_mask; + uint8_t init_state; + uint8_t auto_eoi; + uint8_t rotate_on_auto_eoi; + uint8_t special_fully_nested_mode; + uint8_t init4; /* true if 4 byte init */ + uint8_t single_mode; /* true if slave pic is not initialized */ + uint8_t elcr; /* PIIX edge/trigger selection*/ + uint8_t elcr_mask; + qemu_irq int_out[1]; + uint32_t master; /* reflects /SP input pin */ + uint32_t iobase; + uint32_t elcr_addr; + MemoryRegion base_io; + MemoryRegion elcr_io; +}; + +void pic_reset_common(PICCommonState *s); +ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master); +void pic_stat_update_irq(int irq, int level); +bool pic_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, unsigned int *nb_irqs); +void pic_print_info(InterruptStatsProvider *obj, Monitor *mon); + +#endif /* QEMU_I8259_INTERNAL_H */ diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h new file mode 100644 index 000000000..ddaae89a8 --- /dev/null +++ b/include/hw/isa/isa.h @@ -0,0 +1,149 @@ +#ifndef HW_ISA_H +#define HW_ISA_H + +/* ISA bus */ + +#include "exec/memory.h" +#include "exec/ioport.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define ISA_NUM_IRQS 16 + +#define TYPE_ISA_DEVICE "isa-device" +OBJECT_DECLARE_TYPE(ISADevice, ISADeviceClass, ISA_DEVICE) + +#define TYPE_ISA_BUS "ISA" +OBJECT_DECLARE_SIMPLE_TYPE(ISABus, ISA_BUS) + +#define TYPE_APPLE_SMC "isa-applesmc" +#define APPLESMC_MAX_DATA_LENGTH 32 +#define APPLESMC_PROP_IO_BASE "iobase" + +static inline uint16_t applesmc_port(void) +{ + Object *obj = object_resolve_path_type("", TYPE_APPLE_SMC, NULL); + + if (obj) { + return object_property_get_uint(obj, APPLESMC_PROP_IO_BASE, NULL); + } + return 0; +} + +#define TYPE_ISADMA "isa-dma" + +typedef struct IsaDmaClass IsaDmaClass; +DECLARE_CLASS_CHECKERS(IsaDmaClass, ISADMA, + TYPE_ISADMA) +#define ISADMA(obj) \ + INTERFACE_CHECK(IsaDma, (obj), TYPE_ISADMA) + +typedef enum { + ISADMA_TRANSFER_VERIFY, + ISADMA_TRANSFER_READ, + ISADMA_TRANSFER_WRITE, + ISADMA_TRANSFER_ILLEGAL, +} IsaDmaTransferMode; + +typedef int (*IsaDmaTransferHandler)(void *opaque, int nchan, int pos, + int size); + +struct IsaDmaClass { + InterfaceClass parent; + + bool (*has_autoinitialization)(IsaDma *obj, int nchan); + int (*read_memory)(IsaDma *obj, int nchan, void *buf, int pos, int len); + int (*write_memory)(IsaDma *obj, int nchan, void *buf, int pos, int len); + void (*hold_DREQ)(IsaDma *obj, int nchan); + void (*release_DREQ)(IsaDma *obj, int nchan); + void (*schedule)(IsaDma *obj); + void (*register_channel)(IsaDma *obj, int nchan, + IsaDmaTransferHandler transfer_handler, + void *opaque); +}; + +struct ISADeviceClass { + DeviceClass parent_class; + void (*build_aml)(ISADevice *dev, Aml *scope); +}; + +struct ISABus { + /*< private >*/ + BusState parent_obj; + /*< public >*/ + + MemoryRegion *address_space; + MemoryRegion *address_space_io; + qemu_irq *irqs; + IsaDma *dma[2]; +}; + +struct ISADevice { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + int8_t isairq[2]; /* -1 = unassigned */ + int nirqs; + int ioport_id; +}; + +ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space, + MemoryRegion *address_space_io, Error **errp); +void isa_bus_irqs(ISABus *bus, qemu_irq *irqs); +qemu_irq isa_get_irq(ISADevice *dev, unsigned isairq); +void isa_init_irq(ISADevice *dev, qemu_irq *p, unsigned isairq); +void isa_connect_gpio_out(ISADevice *isadev, int gpioirq, unsigned isairq); +void isa_bus_dma(ISABus *bus, IsaDma *dma8, IsaDma *dma16); +IsaDma *isa_get_dma(ISABus *bus, int nchan); +MemoryRegion *isa_address_space(ISADevice *dev); +MemoryRegion *isa_address_space_io(ISADevice *dev); +ISADevice *isa_new(const char *name); +ISADevice *isa_try_new(const char *name); +bool isa_realize_and_unref(ISADevice *dev, ISABus *bus, Error **errp); +ISADevice *isa_create_simple(ISABus *bus, const char *name); + +ISADevice *isa_vga_init(ISABus *bus); +void isa_build_aml(ISABus *bus, Aml *scope); + +/** + * isa_register_ioport: Install an I/O port region on the ISA bus. + * + * Register an I/O port region via memory_region_add_subregion + * inside the ISA I/O address space. + * + * @dev: the ISADevice against which these are registered; may be NULL. + * @io: the #MemoryRegion being registered. + * @start: the base I/O port. + */ +void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start); + +/** + * isa_register_portio_list: Initialize a set of ISA io ports + * + * Several ISA devices have many dis-joint I/O ports. Worse, these I/O + * ports can be interleaved with I/O ports from other devices. This + * function makes it easy to create multiple MemoryRegions for a single + * device and use the legacy portio routines. + * + * @dev: the ISADevice against which these are registered; may be NULL. + * @piolist: the PortioList associated with the io ports + * @start: the base I/O port against which the portio->offset is applied. + * @portio: the ports, sorted by offset. + * @opaque: passed into the portio callbacks. + * @name: passed into memory_region_init_io. + */ +void isa_register_portio_list(ISADevice *dev, + PortioList *piolist, + uint16_t start, + const MemoryRegionPortio *portio, + void *opaque, const char *name); + +static inline ISABus *isa_bus_from_device(ISADevice *d) +{ + return ISA_BUS(qdev_get_parent_bus(DEVICE(d))); +} + +#define TYPE_PIIX4_PCI_DEVICE "piix4-isa" + +#endif diff --git a/include/hw/isa/pc87312.h b/include/hw/isa/pc87312.h new file mode 100644 index 000000000..edaf723f4 --- /dev/null +++ b/include/hw/isa/pc87312.h @@ -0,0 +1,56 @@ +/* + * QEMU National Semiconductor PC87312 (Super I/O) + * + * Copyright (c) 2010-2012 Herve Poussineau + * Copyright (c) 2011-2012 Andreas Färber + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef QEMU_PC87312_H +#define QEMU_PC87312_H + +#include "hw/isa/superio.h" +#include "qom/object.h" + + +#define TYPE_PC87312 "pc87312" +OBJECT_DECLARE_SIMPLE_TYPE(PC87312State, PC87312) + +struct PC87312State { + /*< private >*/ + ISASuperIODevice parent_dev; + /*< public >*/ + + uint16_t iobase; + uint8_t config; /* initial configuration */ + + struct { + ISADevice *dev; + } ide; + + MemoryRegion io; + + uint8_t read_id_step; + uint8_t selected_index; + + uint8_t regs[3]; +}; + + +#endif diff --git a/include/hw/isa/superio.h b/include/hw/isa/superio.h new file mode 100644 index 000000000..b9f5c1915 --- /dev/null +++ b/include/hw/isa/superio.h @@ -0,0 +1,60 @@ +/* + * Generic ISA Super I/O + * + * Copyright (c) 2018 Philippe Mathieu-Daudé + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_ISA_SUPERIO_H +#define HW_ISA_SUPERIO_H + +#include "sysemu/sysemu.h" +#include "hw/isa/isa.h" +#include "qom/object.h" + +#define TYPE_ISA_SUPERIO "isa-superio" +typedef struct ISASuperIOClass ISASuperIOClass; +typedef struct ISASuperIODevice ISASuperIODevice; +DECLARE_OBJ_CHECKERS(ISASuperIODevice, ISASuperIOClass, + ISA_SUPERIO, TYPE_ISA_SUPERIO) + +#define SUPERIO_MAX_SERIAL_PORTS 4 + +struct ISASuperIODevice { + /*< private >*/ + ISADevice parent_obj; + /*< public >*/ + + ISADevice *parallel[MAX_PARALLEL_PORTS]; + ISADevice *serial[SUPERIO_MAX_SERIAL_PORTS]; + ISADevice *floppy; + ISADevice *kbc; + ISADevice *ide; +}; + +typedef struct ISASuperIOFuncs { + size_t count; + bool (*is_enabled)(ISASuperIODevice *sio, uint8_t index); + uint16_t (*get_iobase)(ISASuperIODevice *sio, uint8_t index); + unsigned int (*get_irq)(ISASuperIODevice *sio, uint8_t index); + unsigned int (*get_dma)(ISASuperIODevice *sio, uint8_t index); +} ISASuperIOFuncs; + +struct ISASuperIOClass { + /*< private >*/ + ISADeviceClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; + + ISASuperIOFuncs parallel; + ISASuperIOFuncs serial; + ISASuperIOFuncs floppy; + ISASuperIOFuncs ide; +}; + +#define TYPE_FDC37M81X_SUPERIO "fdc37m81x-superio" +#define TYPE_SMC37C669_SUPERIO "smc37c669-superio" + +#endif /* HW_ISA_SUPERIO_H */ diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h new file mode 100644 index 000000000..f23f45dfb --- /dev/null +++ b/include/hw/isa/vt82c686.h @@ -0,0 +1,14 @@ +#ifndef HW_VT82C686_H +#define HW_VT82C686_H + + +#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" + +/* vt82c686.c */ +ISABus *vt82c686b_isa_init(PCIBus * bus, int devfn); +void vt82c686b_ac97_init(PCIBus *bus, int devfn); +void vt82c686b_mc97_init(PCIBus *bus, int devfn); +I2CBus *vt82c686b_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, + qemu_irq sci_irq); + +#endif diff --git a/include/hw/kvm/clock.h b/include/hw/kvm/clock.h new file mode 100644 index 000000000..7994071c4 --- /dev/null +++ b/include/hw/kvm/clock.h @@ -0,0 +1,28 @@ +/* + * QEMU KVM support, paravirtual clock device + * + * Copyright (C) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_KVM_CLOCK_H +#define HW_KVM_CLOCK_H + +#ifdef CONFIG_KVM + +void kvmclock_create(bool create_always); + +#else /* CONFIG_KVM */ + +static inline void kvmclock_create(bool create_always) +{ +} + +#endif /* !CONFIG_KVM */ + +#endif diff --git a/include/hw/lm32/lm32_pic.h b/include/hw/lm32/lm32_pic.h new file mode 100644 index 000000000..9e5e03843 --- /dev/null +++ b/include/hw/lm32/lm32_pic.h @@ -0,0 +1,10 @@ +#ifndef QEMU_HW_LM32_PIC_H +#define QEMU_HW_LM32_PIC_H + + +uint32_t lm32_pic_get_ip(DeviceState *d); +uint32_t lm32_pic_get_im(DeviceState *d); +void lm32_pic_set_ip(DeviceState *d, uint32_t ip); +void lm32_pic_set_im(DeviceState *d, uint32_t im); + +#endif /* QEMU_HW_LM32_PIC_H */ diff --git a/include/hw/loader-fit.h b/include/hw/loader-fit.h new file mode 100644 index 000000000..0832e379d --- /dev/null +++ b/include/hw/loader-fit.h @@ -0,0 +1,41 @@ +/* + * Flattened Image Tree loader. + * + * Copyright (c) 2016 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_LOADER_FIT_H +#define HW_LOADER_FIT_H + +#include "exec/hwaddr.h" + +struct fit_loader_match { + const char *compatible; + const void *data; +}; + +struct fit_loader { + const struct fit_loader_match *matches; + hwaddr (*addr_to_phys)(void *opaque, uint64_t addr); + const void *(*fdt_filter)(void *opaque, const void *fdt, + const void *match_data, hwaddr *load_addr); + const void *(*kernel_filter)(void *opaque, const void *kernel, + hwaddr *load_addr, hwaddr *entry_addr); +}; + +int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque); + +#endif /* HW_LOADER_FIT_H */ diff --git a/include/hw/loader.h b/include/hw/loader.h new file mode 100644 index 000000000..a9eeea395 --- /dev/null +++ b/include/hw/loader.h @@ -0,0 +1,321 @@ +#ifndef LOADER_H +#define LOADER_H +#include "hw/nvram/fw_cfg.h" + +/* loader.c */ +/** + * get_image_size: retrieve size of an image file + * @filename: Path to the image file + * + * Returns the size of the image file on success, -1 otherwise. + * On error, errno is also set as appropriate. + */ +int64_t get_image_size(const char *filename); +/** + * load_image_size: load an image file into specified buffer + * @filename: Path to the image file + * @addr: Buffer to load image into + * @size: Size of buffer in bytes + * + * Load an image file from disk into the specified buffer. + * If the image is larger than the specified buffer, only + * @size bytes are read (this is not considered an error). + * + * Prefer to use the GLib function g_file_get_contents() rather + * than a "get_image_size()/g_malloc()/load_image_size()" sequence. + * + * Returns the number of bytes read, or -1 on error. On error, + * errno is also set as appropriate. + */ +ssize_t load_image_size(const char *filename, void *addr, size_t size); + +/**load_image_targphys_as: + * @filename: Path to the image file + * @addr: Address to load the image to + * @max_sz: The maximum size of the image to load + * @as: The AddressSpace to load the ELF to. The value of address_space_memory + * is used if nothing is supplied here. + * + * Load a fixed image into memory. + * + * Returns the size of the loaded image on success, -1 otherwise. + */ +int load_image_targphys_as(const char *filename, + hwaddr addr, uint64_t max_sz, AddressSpace *as); + +/**load_targphys_hex_as: + * @filename: Path to the .hex file + * @entry: Store the entry point given by the .hex file + * @as: The AddressSpace to load the .hex file to. The value of + * address_space_memory is used if nothing is supplied here. + * + * Load a fixed .hex file into memory. + * + * Returns the size of the loaded .hex file on success, -1 otherwise. + */ +int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as); + +/** load_image_targphys: + * Same as load_image_targphys_as(), but doesn't allow the caller to specify + * an AddressSpace. + */ +int load_image_targphys(const char *filename, hwaddr, + uint64_t max_sz); + +/** + * load_image_mr: load an image into a memory region + * @filename: Path to the image file + * @mr: Memory Region to load into + * + * Load the specified file into the memory region. + * The file loaded is registered as a ROM, so its contents will be + * reinstated whenever the system is reset. + * If the file is larger than the memory region's size the call will fail. + * Returns -1 on failure, or the size of the file. + */ +int load_image_mr(const char *filename, MemoryRegion *mr); + +/* This is the limit on the maximum uncompressed image size that + * load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents + * g_malloc() in those functions from allocating a huge amount of memory. + */ +#define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20) + +int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, + uint8_t **buffer); +int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); + +#define ELF_LOAD_FAILED -1 +#define ELF_LOAD_NOT_ELF -2 +#define ELF_LOAD_WRONG_ARCH -3 +#define ELF_LOAD_WRONG_ENDIAN -4 +#define ELF_LOAD_TOO_BIG -5 +const char *load_elf_strerror(int error); + +/** load_elf_ram_sym: + * @filename: Path of ELF file + * @elf_note_fn: optional function to parse ELF Note type + * passed via @translate_opaque + * @translate_fn: optional function to translate load addresses + * @translate_opaque: opaque data passed to @translate_fn + * @pentry: Populated with program entry point. Ignored if NULL. + * @lowaddr: Populated with lowest loaded address. Ignored if NULL. + * @highaddr: Populated with highest loaded address. Ignored if NULL. + * @pflags: Populated with ELF processor-specific flags. Ignore if NULL. + * @bigendian: Expected ELF endianness. 0 for LE otherwise BE + * @elf_machine: Expected ELF machine type + * @clear_lsb: Set to mask off LSB of addresses (Some architectures use + * this for non-address data) + * @data_swab: Set to order of byte swapping for data. 0 for no swap, 1 + * for swapping bytes within halfwords, 2 for bytes within + * words and 3 for within doublewords. + * @as: The AddressSpace to load the ELF to. The value of address_space_memory + * is used if nothing is supplied here. + * @load_rom : Load ELF binary as ROM + * @sym_cb: Callback function for symbol table entries + * + * Load an ELF file's contents to the emulated system's address space. + * Clients may optionally specify a callback to perform address + * translations. @pentry, @lowaddr and @highaddr are optional pointers + * which will be populated with various load information. @bigendian and + * @elf_machine give the expected endianness and machine for the ELF the + * load will fail if the target ELF does not match. Some architectures + * have some architecture-specific behaviours that come into effect when + * their particular values for @elf_machine are set. + * If @elf_machine is EM_NONE then the machine type will be read from the + * ELF header and no checks will be carried out against the machine type. + */ +typedef void (*symbol_fn_t)(const char *st_name, int st_info, + uint64_t st_value, uint64_t st_size); + +int load_elf_ram_sym(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, + int big_endian, int elf_machine, + int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom, symbol_fn_t sym_cb); + +/** load_elf_ram: + * Same as load_elf_ram_sym(), but doesn't allow the caller to specify a + * symbol callback function + */ +int load_elf_ram(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom); + +/** load_elf_as: + * Same as load_elf_ram(), but always loads the elf as ROM + */ +int load_elf_as(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab, + AddressSpace *as); + +/** load_elf: + * Same as load_elf_as(), but doesn't allow the caller to specify an + * AddressSpace. + */ +int load_elf(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab); + +/** load_elf_hdr: + * @filename: Path of ELF file + * @hdr: Buffer to populate with header data. Header data will not be + * filled if set to NULL. + * @is64: Set to true if the ELF is 64bit. Ignored if set to NULL + * @errp: Populated with an error in failure cases + * + * Inspect an ELF file's header. Read its full header contents into a + * buffer and/or determine if the ELF is 64bit. + */ +void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp); + +int load_aout(const char *filename, hwaddr addr, int max_sz, + int bswap_needed, hwaddr target_page_size); + +#define LOAD_UIMAGE_LOADADDR_INVALID (-1) + +/** load_uimage_as: + * @filename: Path of uimage file + * @ep: Populated with program entry point. Ignored if NULL. + * @loadaddr: load address if none specified in the image or when loading a + * ramdisk. Populated with the load address. Ignored if NULL or + * LOAD_UIMAGE_LOADADDR_INVALID (images which do not specify a load + * address will not be loadable). + * @is_linux: Is set to true if the image loaded is Linux. Ignored if NULL. + * @translate_fn: optional function to translate load addresses + * @translate_opaque: opaque data passed to @translate_fn + * @as: The AddressSpace to load the ELF to. The value of address_space_memory + * is used if nothing is supplied here. + * + * Loads a u-boot image into memory. + * + * Returns the size of the loaded image on success, -1 otherwise. + */ +int load_uimage_as(const char *filename, hwaddr *ep, + hwaddr *loadaddr, int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, AddressSpace *as); + +/** load_uimage: + * Same as load_uimage_as(), but doesn't allow the caller to specify an + * AddressSpace. + */ +int load_uimage(const char *filename, hwaddr *ep, + hwaddr *loadaddr, int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque); + +/** + * load_ramdisk_as: + * @filename: Path to the ramdisk image + * @addr: Memory address to load the ramdisk to + * @max_sz: Maximum allowed ramdisk size (for non-u-boot ramdisks) + * @as: The AddressSpace to load the ELF to. The value of address_space_memory + * is used if nothing is supplied here. + * + * Load a ramdisk image with U-Boot header to the specified memory + * address. + * + * Returns the size of the loaded image on success, -1 otherwise. + */ +int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, + AddressSpace *as); + +/** + * load_ramdisk: + * Same as load_ramdisk_as(), but doesn't allow the caller to specify + * an AddressSpace. + */ +int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); + +ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen); + +ssize_t read_targphys(const char *name, + int fd, hwaddr dst_addr, size_t nbytes); +void pstrcpy_targphys(const char *name, + hwaddr dest, int buf_size, + const char *source); + +extern bool option_rom_has_mr; +extern bool rom_file_has_mr; + +int rom_add_file(const char *file, const char *fw_dir, + hwaddr addr, int32_t bootindex, + bool option_rom, MemoryRegion *mr, AddressSpace *as); +MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, + size_t max_len, hwaddr addr, + const char *fw_file_name, + FWCfgCallback fw_callback, + void *callback_opaque, AddressSpace *as, + bool read_only); +int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, + size_t datasize, size_t romsize, hwaddr addr, + AddressSpace *as); +int rom_check_and_register_reset(void); +void rom_set_fw(FWCfgState *f); +void rom_set_order_override(int order); +void rom_reset_order_override(void); + +/** + * rom_transaction_begin: + * + * Call this before of a series of rom_add_*() calls. Call + * rom_transaction_end() afterwards to commit or abort. These functions are + * useful for undoing a series of rom_add_*() calls if image file loading fails + * partway through. + */ +void rom_transaction_begin(void); + +/** + * rom_transaction_end: + * @commit: true to commit added roms, false to drop added roms + * + * Call this after a series of rom_add_*() calls. See rom_transaction_begin(). + */ +void rom_transaction_end(bool commit); + +int rom_copy(uint8_t *dest, hwaddr addr, size_t size); +void *rom_ptr(hwaddr addr, size_t size); +void hmp_info_roms(Monitor *mon, const QDict *qdict); + +#define rom_add_file_fixed(_f, _a, _i) \ + rom_add_file(_f, NULL, _a, _i, false, NULL, NULL) +#define rom_add_blob_fixed(_f, _b, _l, _a) \ + rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, NULL, true) +#define rom_add_file_mr(_f, _mr, _i) \ + rom_add_file(_f, NULL, 0, _i, false, _mr, NULL) +#define rom_add_file_as(_f, _as, _i) \ + rom_add_file(_f, NULL, 0, _i, false, NULL, _as) +#define rom_add_file_fixed_as(_f, _a, _i, _as) \ + rom_add_file(_f, NULL, _a, _i, false, NULL, _as) +#define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ + rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) + +#define PC_ROM_MIN_VGA 0xc0000 +#define PC_ROM_MIN_OPTION 0xc8000 +#define PC_ROM_MAX 0xe0000 +#define PC_ROM_ALIGN 0x800 +#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA) + +int rom_add_vga(const char *file); +int rom_add_option(const char *file, int32_t bootindex); + +/* This is the usual maximum in uboot, so if a uImage overflows this, it would + * overflow on real hardware too. */ +#define UBOOT_MAX_GUNZIP_BYTES (64 << 20) + +#endif diff --git a/include/hw/m68k/mcf.h b/include/hw/m68k/mcf.h new file mode 100644 index 000000000..decf17ce4 --- /dev/null +++ b/include/hw/m68k/mcf.h @@ -0,0 +1,23 @@ +#ifndef HW_MCF_H +#define HW_MCF_H +/* Motorola ColdFire device prototypes. */ + +#include "target/m68k/cpu-qom.h" + +/* mcf_uart.c */ +uint64_t mcf_uart_read(void *opaque, hwaddr addr, + unsigned size); +void mcf_uart_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size); +void *mcf_uart_init(qemu_irq irq, Chardev *chr); +void mcf_uart_mm_init(hwaddr base, qemu_irq irq, Chardev *chr); + +/* mcf_intc.c */ +qemu_irq *mcf_intc_init(struct MemoryRegion *sysmem, + hwaddr base, + M68kCPU *cpu); + +/* mcf5206.c */ +#define TYPE_MCF5206_MBAR "mcf5206-mbar" + +#endif diff --git a/include/hw/m68k/mcf_fec.h b/include/hw/m68k/mcf_fec.h new file mode 100644 index 000000000..80d4f651b --- /dev/null +++ b/include/hw/m68k/mcf_fec.h @@ -0,0 +1,19 @@ +/* + * Definitions for the ColdFire Fast Ethernet Controller emulation. + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef HW_M68K_MCF_FEC_H +#define HW_M68K_MCF_FEC_H +#include "qom/object.h" + +#define TYPE_MCF_FEC_NET "mcf-fec" +OBJECT_DECLARE_SIMPLE_TYPE(mcf_fec_state, MCF_FEC_NET) + +#define FEC_NUM_IRQ 13 + +#endif diff --git a/include/hw/m68k/next-cube.h b/include/hw/m68k/next-cube.h new file mode 100644 index 000000000..a3be2b32a --- /dev/null +++ b/include/hw/m68k/next-cube.h @@ -0,0 +1,47 @@ + +#ifndef NEXT_CUBE_H +#define NEXT_CUBE_H + +#define TYPE_NEXTFB "next-fb" + +#define TYPE_NEXTKBD "next-kbd" + +enum next_dma_chan { + NEXTDMA_FD, + NEXTDMA_ENRX, + NEXTDMA_ENTX, + NEXTDMA_SCSI, + NEXTDMA_SCC, + NEXTDMA_SND +}; + +#define DMA_ENABLE 0x01000000 +#define DMA_SUPDATE 0x02000000 +#define DMA_COMPLETE 0x08000000 + +#define DMA_M2DEV 0x0 +#define DMA_SETENABLE 0x00010000 +#define DMA_SETSUPDATE 0x00020000 +#define DMA_DEV2M 0x00040000 +#define DMA_CLRCOMPLETE 0x00080000 +#define DMA_RESET 0x00100000 + +enum next_irqs { + NEXT_FD_I, + NEXT_KBD_I, + NEXT_PWR_I, + NEXT_ENRX_I, + NEXT_ENTX_I, + NEXT_SCSI_I, + NEXT_CLK_I, + NEXT_SCC_I, + NEXT_ENTX_DMA_I, + NEXT_ENRX_DMA_I, + NEXT_SCSI_DMA_I, + NEXT_SCC_DMA_I, + NEXT_SND_I +}; + +void next_irq(void *opaque, int number, int level); + +#endif /* NEXT_CUBE_H */ diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h new file mode 100644 index 000000000..48d2611fc --- /dev/null +++ b/include/hw/mem/memory-device.h @@ -0,0 +1,117 @@ +/* + * Memory Device Interface + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef MEMORY_DEVICE_H +#define MEMORY_DEVICE_H + +#include "hw/qdev-core.h" +#include "qapi/qapi-types-machine.h" +#include "qom/object.h" + +#define TYPE_MEMORY_DEVICE "memory-device" + +typedef struct MemoryDeviceClass MemoryDeviceClass; +DECLARE_CLASS_CHECKERS(MemoryDeviceClass, MEMORY_DEVICE, + TYPE_MEMORY_DEVICE) +#define MEMORY_DEVICE(obj) \ + INTERFACE_CHECK(MemoryDeviceState, (obj), TYPE_MEMORY_DEVICE) + +typedef struct MemoryDeviceState MemoryDeviceState; + +/** + * MemoryDeviceClass: + * + * All memory devices need to implement TYPE_MEMORY_DEVICE as an interface. + * + * A memory device is a device that owns a memory region which is + * mapped into guest physical address space at a certain address. The + * address in guest physical memory can either be specified explicitly + * or get assigned automatically. + * + * Conceptually, memory devices only span one memory region. If multiple + * successive memory regions are used, a covering memory region has to + * be provided. Scattered memory regions are not supported for single + * devices. + */ +struct MemoryDeviceClass { + /* private */ + InterfaceClass parent_class; + + /* + * Return the address of the memory device in guest physical memory. + * + * Called when (un)plugging a memory device or when iterating over + * all memory devices mapped into guest physical address space. + * + * If "0" is returned, no address has been specified by the user and + * no address has been assigned to this memory device yet. + */ + uint64_t (*get_addr)(const MemoryDeviceState *md); + + /* + * Set the address of the memory device in guest physical memory. + * + * Called when plugging the memory device to configure the determined + * address in guest physical memory. + */ + void (*set_addr)(MemoryDeviceState *md, uint64_t addr, Error **errp); + + /* + * Return the amount of memory provided by the memory device currently + * usable ("plugged") by the VM. + * + * Called when calculating the total amount of ram available to the + * VM (e.g. to report memory stats to the user). + * + * This is helpful for devices that dynamically manage the amount of + * memory accessible by the guest via the reserved memory region. For + * most devices, this corresponds to the size of the memory region. + */ + uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp); + + /* + * Return the memory region of the memory device. + * + * Called when (un)plugging the memory device, to (un)map the + * memory region in guest physical memory, but also to detect the + * required alignment during address assignment or when the size of the + * memory region is required. + */ + MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp); + + /* + * Optional: Return the desired minimum alignment of the device in guest + * physical address space. The final alignment is computed based on this + * alignment and the alignment requirements of the memory region. + * + * Called when plugging the memory device to detect the required alignment + * during address assignment. + */ + uint64_t (*get_min_alignment)(const MemoryDeviceState *md); + + /* + * Translate the memory device into #MemoryDeviceInfo. + */ + void (*fill_device_info)(const MemoryDeviceState *md, + MemoryDeviceInfo *info); +}; + +MemoryDeviceInfoList *qmp_memory_device_list(void); +uint64_t get_plugged_memory_size(void); +void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, + const uint64_t *legacy_align, Error **errp); +void memory_device_plug(MemoryDeviceState *md, MachineState *ms); +void memory_device_unplug(MemoryDeviceState *md, MachineState *ms); +uint64_t memory_device_get_region_size(const MemoryDeviceState *md, + Error **errp); + +#endif diff --git a/include/hw/mem/npcm7xx_mc.h b/include/hw/mem/npcm7xx_mc.h new file mode 100644 index 000000000..7ed38be24 --- /dev/null +++ b/include/hw/mem/npcm7xx_mc.h @@ -0,0 +1,36 @@ +/* + * Nuvoton NPCM7xx Memory Controller stub + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_MC_H +#define NPCM7XX_MC_H + +#include "exec/memory.h" +#include "hw/sysbus.h" + +/** + * struct NPCM7xxMCState - Device state for the memory controller. + * @parent: System bus device. + * @mmio: Memory region through which registers are accessed. + */ +typedef struct NPCM7xxMCState { + SysBusDevice parent; + + MemoryRegion mmio; +} NPCM7xxMCState; + +#define TYPE_NPCM7XX_MC "npcm7xx-mc" +#define NPCM7XX_MC(obj) OBJECT_CHECK(NPCM7xxMCState, (obj), TYPE_NPCM7XX_MC) + +#endif /* NPCM7XX_MC_H */ diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h new file mode 100644 index 000000000..c699842dd --- /dev/null +++ b/include/hw/mem/nvdimm.h @@ -0,0 +1,160 @@ +/* + * Non-Volatile Dual In-line Memory Module Virtualization Implementation + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong + * + * NVDIMM specifications and some documents can be found at: + * NVDIMM ACPI device and NFIT are introduced in ACPI 6: + * http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf + * NVDIMM Namespace specification: + * http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf + * DSM Interface Example: + * http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf + * Driver Writer's Guide: + * http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NVDIMM_H +#define QEMU_NVDIMM_H + +#include "hw/mem/pc-dimm.h" +#include "hw/acpi/bios-linker-loader.h" +#include "qemu/uuid.h" +#include "hw/acpi/aml-build.h" +#include "qom/object.h" + +#define NVDIMM_DEBUG 0 +#define nvdimm_debug(fmt, ...) \ + do { \ + if (NVDIMM_DEBUG) { \ + fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) + +/* + * The minimum label data size is required by NVDIMM Namespace + * specification, see the chapter 2 Namespaces: + * "NVDIMMs following the NVDIMM Block Mode Specification use an area + * at least 128KB in size, which holds around 1000 labels." + */ +#define MIN_NAMESPACE_LABEL_SIZE (128UL << 10) + +#define TYPE_NVDIMM "nvdimm" +OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM) + +#define NVDIMM_LABEL_SIZE_PROP "label-size" +#define NVDIMM_UUID_PROP "uuid" +#define NVDIMM_UNARMED_PROP "unarmed" + +struct NVDIMMDevice { + /* private */ + PCDIMMDevice parent_obj; + + /* public */ + + /* + * the size of label data in NVDIMM device which is presented to + * guest via __DSM "Get Namespace Label Size" function. + */ + uint64_t label_size; + + /* + * the address of label data which is read by __DSM "Get Namespace + * Label Data" function and written by __DSM "Set Namespace Label + * Data" function. + */ + void *label_data; + + /* + * it's the PMEM region in NVDIMM device, which is presented to + * guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported. + */ + MemoryRegion *nvdimm_mr; + + /* + * The 'on' value results in the unarmed flag set in ACPI NFIT, + * which can be used to notify guest implicitly that the host + * backend (e.g., files on HDD, /dev/pmemX, etc.) cannot guarantee + * the guest write persistence. + */ + bool unarmed; + + /* + * The PPC64 - spapr requires each nvdimm device have a uuid. + */ + QemuUUID uuid; +}; + +struct NVDIMMClass { + /* private */ + PCDIMMDeviceClass parent_class; + + /* public */ + + /* read @size bytes from NVDIMM label data at @offset into @buf. */ + void (*read_label_data)(NVDIMMDevice *nvdimm, void *buf, + uint64_t size, uint64_t offset); + /* write @size bytes from @buf to NVDIMM label data at @offset. */ + void (*write_label_data)(NVDIMMDevice *nvdimm, const void *buf, + uint64_t size, uint64_t offset); +}; + +#define NVDIMM_DSM_MEM_FILE "etc/acpi/nvdimm-mem" + +/* + * 32 bits IO port starting from 0x0a18 in guest is reserved for + * NVDIMM ACPI emulation. + */ +#define NVDIMM_ACPI_IO_BASE 0x0a18 +#define NVDIMM_ACPI_IO_LEN 4 + +/* + * NvdimmFitBuffer: + * @fit: FIT structures for present NVDIMMs. It is updated when + * the NVDIMM device is plugged or unplugged. + * @dirty: It allows OSPM to detect change and restart read in + * progress if there is any. + */ +struct NvdimmFitBuffer { + GArray *fit; + bool dirty; +}; +typedef struct NvdimmFitBuffer NvdimmFitBuffer; + +struct NVDIMMState { + /* detect if NVDIMM support is enabled. */ + bool is_enabled; + + /* the data of the fw_cfg file NVDIMM_DSM_MEM_FILE. */ + GArray *dsm_mem; + + NvdimmFitBuffer fit_buf; + + /* the IO region used by OSPM to transfer control to QEMU. */ + MemoryRegion io_mr; + + /* + * Platform capabilities, section 5.2.25.9 of ACPI 6.2 Errata A + */ + int32_t persistence; + char *persistence_string; + struct AcpiGenericAddress dsm_io; +}; +typedef struct NVDIMMState NVDIMMState; + +void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, + struct AcpiGenericAddress dsm_io, + FWCfgState *fw_cfg, Object *owner); +void nvdimm_build_srat(GArray *table_data); +void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, NVDIMMState *state, + uint32_t ram_slots); +void nvdimm_plug(NVDIMMState *state); +void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev); +#endif diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h new file mode 100644 index 000000000..3d3db8264 --- /dev/null +++ b/include/hw/mem/pc-dimm.h @@ -0,0 +1,77 @@ +/* + * PC DIMM device + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Vasilis Liaskovitis + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_PC_DIMM_H +#define QEMU_PC_DIMM_H + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define TYPE_PC_DIMM "pc-dimm" +OBJECT_DECLARE_TYPE(PCDIMMDevice, PCDIMMDeviceClass, + PC_DIMM) + +#define PC_DIMM_ADDR_PROP "addr" +#define PC_DIMM_SLOT_PROP "slot" +#define PC_DIMM_NODE_PROP "node" +#define PC_DIMM_SIZE_PROP "size" +#define PC_DIMM_MEMDEV_PROP "memdev" + +#define PC_DIMM_UNASSIGNED_SLOT -1 + +/** + * PCDIMMDevice: + * @addr: starting guest physical address, where @PCDIMMDevice is mapped. + * Default value: 0, means that address is auto-allocated. + * @node: numa node to which @PCDIMMDevice is attached. + * @slot: slot number into which @PCDIMMDevice is plugged in. + * Default value: -1, means that slot is auto-allocated. + * @hostmem: host memory backend providing memory for @PCDIMMDevice + */ +struct PCDIMMDevice { + /* private */ + DeviceState parent_obj; + + /* public */ + uint64_t addr; + uint32_t node; + int32_t slot; + HostMemoryBackend *hostmem; +}; + +/** + * PCDIMMDeviceClass: + * @realize: called after common dimm is realized so that the dimm based + * devices get the chance to do specified operations. + * @get_vmstate_memory_region: returns #MemoryRegion which indicates the + * memory of @dimm should be kept during live migration. Will not fail + * after the device was realized. + */ +struct PCDIMMDeviceClass { + /* private */ + DeviceClass parent_class; + + /* public */ + void (*realize)(PCDIMMDevice *dimm, Error **errp); + MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm, + Error **errp); +}; + +void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, + const uint64_t *legacy_align, Error **errp); +void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine); +void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine); +#endif diff --git a/include/hw/mips/bios.h b/include/hw/mips/bios.h new file mode 100644 index 000000000..c03007999 --- /dev/null +++ b/include/hw/mips/bios.h @@ -0,0 +1,14 @@ +#ifndef HW_MIPS_BIOS_H +#define HW_MIPS_BIOS_H + +#include "qemu/units.h" +#include "cpu.h" + +#define BIOS_SIZE (4 * MiB) +#ifdef TARGET_WORDS_BIGENDIAN +#define BIOS_FILENAME "mips_bios.bin" +#else +#define BIOS_FILENAME "mipsel_bios.bin" +#endif + +#endif diff --git a/include/hw/mips/cps.h b/include/hw/mips/cps.h new file mode 100644 index 000000000..04d636246 --- /dev/null +++ b/include/hw/mips/cps.h @@ -0,0 +1,52 @@ +/* + * Coherent Processing System emulation. + * + * Copyright (c) 2016 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef MIPS_CPS_H +#define MIPS_CPS_H + +#include "hw/sysbus.h" +#include "hw/clock.h" +#include "hw/misc/mips_cmgcr.h" +#include "hw/intc/mips_gic.h" +#include "hw/misc/mips_cpc.h" +#include "hw/misc/mips_itu.h" +#include "target/mips/cpu.h" +#include "qom/object.h" + +#define TYPE_MIPS_CPS "mips-cps" +OBJECT_DECLARE_SIMPLE_TYPE(MIPSCPSState, MIPS_CPS) + +struct MIPSCPSState { + SysBusDevice parent_obj; + + uint32_t num_vp; + uint32_t num_irq; + char *cpu_type; + + MemoryRegion container; + MIPSGCRState gcr; + MIPSGICState gic; + MIPSCPCState cpc; + MIPSITUState itu; + Clock *clock; +}; + +qemu_irq get_cps_irq(MIPSCPSState *cps, int pin_number); + +#endif diff --git a/include/hw/mips/cpudevs.h b/include/hw/mips/cpudevs.h new file mode 100644 index 000000000..291f59281 --- /dev/null +++ b/include/hw/mips/cpudevs.h @@ -0,0 +1,21 @@ +#ifndef HW_MIPS_CPUDEVS_H +#define HW_MIPS_CPUDEVS_H + +#include "target/mips/cpu-qom.h" + +/* Definitions for MIPS CPU internal devices. */ + +/* addr.c */ +uint64_t cpu_mips_kseg0_to_phys(void *opaque, uint64_t addr); +uint64_t cpu_mips_phys_to_kseg0(void *opaque, uint64_t addr); +uint64_t cpu_mips_kvm_um_phys_to_kseg0(void *opaque, uint64_t addr); +bool mips_um_ksegs_enabled(void); +void mips_um_ksegs_enable(void); + +/* mips_int.c */ +void cpu_mips_irq_init_cpu(MIPSCPU *cpu); + +/* mips_timer.c */ +void cpu_mips_clock_init(MIPSCPU *cpu); + +#endif diff --git a/include/hw/mips/mips.h b/include/hw/mips/mips.h new file mode 100644 index 000000000..6c9c8805f --- /dev/null +++ b/include/hw/mips/mips.h @@ -0,0 +1,25 @@ +#ifndef HW_MIPS_H +#define HW_MIPS_H +/* Definitions for mips board emulation. */ + +#include "qemu/units.h" + +/* Kernels can be configured with 64KB pages */ +#define INITRD_PAGE_SIZE (64 * KiB) + +#include "exec/memory.h" + +/* gt64xxx.c */ +PCIBus *gt64120_register(qemu_irq *pic); + +/* bonito.c */ +PCIBus *bonito_init(qemu_irq *pic); + +/* rc4030.c */ +typedef struct rc4030DMAState *rc4030_dma; +void rc4030_dma_read(void *dma, uint8_t *buf, int len); +void rc4030_dma_write(void *dma, uint8_t *buf, int len); + +DeviceState *rc4030_init(rc4030_dma **dmas, IOMMUMemoryRegion **dma_mr); + +#endif diff --git a/include/hw/misc/a9scu.h b/include/hw/misc/a9scu.h new file mode 100644 index 000000000..c3759fb8c --- /dev/null +++ b/include/hw/misc/a9scu.h @@ -0,0 +1,32 @@ +/* + * Cortex-A9MPCore Snoop Control Unit (SCU) emulation. + * + * Copyright (c) 2009 CodeSourcery. + * Copyright (c) 2011 Linaro Limited. + * Written by Paul Brook, Peter Maydell. + * + * This code is licensed under the GPL. + */ +#ifndef HW_MISC_A9SCU_H +#define HW_MISC_A9SCU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +/* A9MP private memory region. */ + +struct A9SCUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + uint32_t control; + uint32_t status; + uint32_t num_cpu; +}; + +#define TYPE_A9_SCU "a9-scu" +OBJECT_DECLARE_SIMPLE_TYPE(A9SCUState, A9_SCU) + +#endif diff --git a/include/hw/misc/allwinner-cpucfg.h b/include/hw/misc/allwinner-cpucfg.h new file mode 100644 index 000000000..a717b4729 --- /dev/null +++ b/include/hw/misc/allwinner-cpucfg.h @@ -0,0 +1,51 @@ +/* + * Allwinner CPU Configuration Module emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_CPUCFG_H +#define HW_MISC_ALLWINNER_CPUCFG_H + +#include "qom/object.h" +#include "hw/sysbus.h" + +/** + * Object model + * @{ + */ + +#define TYPE_AW_CPUCFG "allwinner-cpucfg" +OBJECT_DECLARE_SIMPLE_TYPE(AwCpuCfgState, AW_CPUCFG) + +/** @} */ + +/** + * Allwinner CPU Configuration Module instance state + */ +struct AwCpuCfgState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + uint32_t gen_ctrl; + uint32_t super_standby; + uint32_t entry_addr; + +}; + +#endif /* HW_MISC_ALLWINNER_CPUCFG_H */ diff --git a/include/hw/misc/allwinner-h3-ccu.h b/include/hw/misc/allwinner-h3-ccu.h new file mode 100644 index 000000000..a04875bfc --- /dev/null +++ b/include/hw/misc/allwinner-h3-ccu.h @@ -0,0 +1,65 @@ +/* + * Allwinner H3 Clock Control Unit emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_H3_CCU_H +#define HW_MISC_ALLWINNER_H3_CCU_H + +#include "qom/object.h" +#include "hw/sysbus.h" + +/** + * @name Constants + * @{ + */ + +/** Size of register I/O address space used by CCU device */ +#define AW_H3_CCU_IOSIZE (0x400) + +/** Total number of known registers */ +#define AW_H3_CCU_REGS_NUM (AW_H3_CCU_IOSIZE / sizeof(uint32_t)) + +/** @} */ + +/** + * @name Object model + * @{ + */ + +#define TYPE_AW_H3_CCU "allwinner-h3-ccu" +OBJECT_DECLARE_SIMPLE_TYPE(AwH3ClockCtlState, AW_H3_CCU) + +/** @} */ + +/** + * Allwinner H3 CCU object instance state. + */ +struct AwH3ClockCtlState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Array of hardware registers */ + uint32_t regs[AW_H3_CCU_REGS_NUM]; + +}; + +#endif /* HW_MISC_ALLWINNER_H3_CCU_H */ diff --git a/include/hw/misc/allwinner-h3-dramc.h b/include/hw/misc/allwinner-h3-dramc.h new file mode 100644 index 000000000..0b6c877ef --- /dev/null +++ b/include/hw/misc/allwinner-h3-dramc.h @@ -0,0 +1,105 @@ +/* + * Allwinner H3 SDRAM Controller emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_H3_DRAMC_H +#define HW_MISC_ALLWINNER_H3_DRAMC_H + +#include "qom/object.h" +#include "hw/sysbus.h" +#include "exec/hwaddr.h" + +/** + * Constants + * @{ + */ + +/** Highest register address used by DRAMCOM module */ +#define AW_H3_DRAMCOM_REGS_MAXADDR (0x804) + +/** Total number of known DRAMCOM registers */ +#define AW_H3_DRAMCOM_REGS_NUM (AW_H3_DRAMCOM_REGS_MAXADDR / \ + sizeof(uint32_t)) + +/** Highest register address used by DRAMCTL module */ +#define AW_H3_DRAMCTL_REGS_MAXADDR (0x88c) + +/** Total number of known DRAMCTL registers */ +#define AW_H3_DRAMCTL_REGS_NUM (AW_H3_DRAMCTL_REGS_MAXADDR / \ + sizeof(uint32_t)) + +/** Highest register address used by DRAMPHY module */ +#define AW_H3_DRAMPHY_REGS_MAXADDR (0x4) + +/** Total number of known DRAMPHY registers */ +#define AW_H3_DRAMPHY_REGS_NUM (AW_H3_DRAMPHY_REGS_MAXADDR / \ + sizeof(uint32_t)) + +/** @} */ + +/** + * Object model + * @{ + */ + +#define TYPE_AW_H3_DRAMC "allwinner-h3-dramc" +OBJECT_DECLARE_SIMPLE_TYPE(AwH3DramCtlState, AW_H3_DRAMC) + +/** @} */ + +/** + * Allwinner H3 SDRAM Controller object instance state. + */ +struct AwH3DramCtlState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** Physical base address for start of RAM */ + hwaddr ram_addr; + + /** Total RAM size in megabytes */ + uint32_t ram_size; + + /** + * @name Memory Regions + * @{ + */ + + MemoryRegion row_mirror; /**< Simulates rows for RAM size detection */ + MemoryRegion row_mirror_alias; /**< Alias of the row which is mirrored */ + MemoryRegion dramcom_iomem; /**< DRAMCOM module I/O registers */ + MemoryRegion dramctl_iomem; /**< DRAMCTL module I/O registers */ + MemoryRegion dramphy_iomem; /**< DRAMPHY module I/O registers */ + + /** @} */ + + /** + * @name Hardware Registers + * @{ + */ + + uint32_t dramcom[AW_H3_DRAMCOM_REGS_NUM]; /**< Array of DRAMCOM registers */ + uint32_t dramctl[AW_H3_DRAMCTL_REGS_NUM]; /**< Array of DRAMCTL registers */ + uint32_t dramphy[AW_H3_DRAMPHY_REGS_NUM] ;/**< Array of DRAMPHY registers */ + + /** @} */ + +}; + +#endif /* HW_MISC_ALLWINNER_H3_DRAMC_H */ diff --git a/include/hw/misc/allwinner-h3-sysctrl.h b/include/hw/misc/allwinner-h3-sysctrl.h new file mode 100644 index 000000000..ec1c22053 --- /dev/null +++ b/include/hw/misc/allwinner-h3-sysctrl.h @@ -0,0 +1,66 @@ +/* + * Allwinner H3 System Control emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_H3_SYSCTRL_H +#define HW_MISC_ALLWINNER_H3_SYSCTRL_H + +#include "qom/object.h" +#include "hw/sysbus.h" + +/** + * @name Constants + * @{ + */ + +/** Highest register address used by System Control device */ +#define AW_H3_SYSCTRL_REGS_MAXADDR (0x30) + +/** Total number of known registers */ +#define AW_H3_SYSCTRL_REGS_NUM ((AW_H3_SYSCTRL_REGS_MAXADDR / \ + sizeof(uint32_t)) + 1) + +/** @} */ + +/** + * @name Object model + * @{ + */ + +#define TYPE_AW_H3_SYSCTRL "allwinner-h3-sysctrl" +OBJECT_DECLARE_SIMPLE_TYPE(AwH3SysCtrlState, AW_H3_SYSCTRL) + +/** @} */ + +/** + * Allwinner H3 System Control object instance state + */ +struct AwH3SysCtrlState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Array of hardware registers */ + uint32_t regs[AW_H3_SYSCTRL_REGS_NUM]; + +}; + +#endif /* HW_MISC_ALLWINNER_H3_SYSCTRL_H */ diff --git a/include/hw/misc/allwinner-sid.h b/include/hw/misc/allwinner-sid.h new file mode 100644 index 000000000..3bfa887a9 --- /dev/null +++ b/include/hw/misc/allwinner-sid.h @@ -0,0 +1,59 @@ +/* + * Allwinner Security ID emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_SID_H +#define HW_MISC_ALLWINNER_SID_H + +#include "qom/object.h" +#include "hw/sysbus.h" +#include "qemu/uuid.h" + +/** + * Object model + * @{ + */ + +#define TYPE_AW_SID "allwinner-sid" +OBJECT_DECLARE_SIMPLE_TYPE(AwSidState, AW_SID) + +/** @} */ + +/** + * Allwinner Security ID object instance state + */ +struct AwSidState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Control register defines how and what to read */ + uint32_t control; + + /** RdKey register contains the data retrieved by the device */ + uint32_t rdkey; + + /** Stores the emulated device identifier */ + QemuUUID identifier; + +}; + +#endif /* HW_MISC_ALLWINNER_SID_H */ diff --git a/include/hw/misc/arm11scu.h b/include/hw/misc/arm11scu.h new file mode 100644 index 000000000..e5c0282ae --- /dev/null +++ b/include/hw/misc/arm11scu.h @@ -0,0 +1,30 @@ +/* + * ARM11MPCore Snoop Control Unit (SCU) emulation + * + * Copyright (c) 2006-2007 CodeSourcery. + * Copyright (c) 2013 SUSE LINUX Products GmbH + * Written by Paul Brook and Andreas Färber + * + * This code is licensed under the GPL. + */ + +#ifndef HW_MISC_ARM11SCU_H +#define HW_MISC_ARM11SCU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ARM11_SCU "arm11-scu" +OBJECT_DECLARE_SIMPLE_TYPE(ARM11SCUState, ARM11_SCU) + +struct ARM11SCUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t control; + uint32_t num_cpu; + MemoryRegion iomem; +}; + +#endif diff --git a/include/hw/misc/arm_integrator_debug.h b/include/hw/misc/arm_integrator_debug.h new file mode 100644 index 000000000..0077dacb4 --- /dev/null +++ b/include/hw/misc/arm_integrator_debug.h @@ -0,0 +1,19 @@ +/* + * ARM Integrator Board Debug, switch and LED section + * + * Browse the data sheet: + * + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0159b/Babbfijf.html + * + * Copyright (c) 2013 Alex Bennée + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef ARM_INTEGRATOR_DEBUG_H +#define ARM_INTEGRATOR_DEBUG_H + +#define TYPE_INTEGRATOR_DEBUG "integrator_debug" + +#endif diff --git a/include/hw/misc/armsse-cpuid.h b/include/hw/misc/armsse-cpuid.h new file mode 100644 index 000000000..a61355e51 --- /dev/null +++ b/include/hw/misc/armsse-cpuid.h @@ -0,0 +1,42 @@ +/* + * ARM SSE-200 CPU_IDENTITY register block + * + * Copyright (c) 2019 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the "CPU_IDENTITY" register block which is part of the + * Arm SSE-200 and documented in + * http://infocenter.arm.com/help/topic/com.arm.doc.101104_0100_00_en/corelink_sse200_subsystem_for_embedded_technical_reference_manual_101104_0100_00_en.pdf + * + * QEMU interface: + * + QOM property "CPUID": the value to use for the CPUID register + * + sysbus MMIO region 0: the system information register bank + */ + +#ifndef HW_MISC_ARMSSE_CPUID_H +#define HW_MISC_ARMSSE_CPUID_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ARMSSE_CPUID "armsse-cpuid" +OBJECT_DECLARE_SIMPLE_TYPE(ARMSSECPUID, ARMSSE_CPUID) + +struct ARMSSECPUID { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + /* Properties */ + uint32_t cpuid; +}; + +#endif diff --git a/include/hw/misc/armsse-mhu.h b/include/hw/misc/armsse-mhu.h new file mode 100644 index 000000000..2671b5b97 --- /dev/null +++ b/include/hw/misc/armsse-mhu.h @@ -0,0 +1,45 @@ +/* + * ARM SSE-200 Message Handling Unit (MHU) + * + * Copyright (c) 2019 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the Message Handling Unit (MHU) which is part of the + * Arm SSE-200 and documented in + * http://infocenter.arm.com/help/topic/com.arm.doc.101104_0100_00_en/corelink_sse200_subsystem_for_embedded_technical_reference_manual_101104_0100_00_en.pdf + * + * QEMU interface: + * + sysbus MMIO region 0: the system information register bank + * + sysbus IRQ 0: interrupt for CPU 0 + * + sysbus IRQ 1: interrupt for CPU 1 + */ + +#ifndef HW_MISC_ARMSSE_MHU_H +#define HW_MISC_ARMSSE_MHU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ARMSSE_MHU "armsse-mhu" +OBJECT_DECLARE_SIMPLE_TYPE(ARMSSEMHU, ARMSSE_MHU) + +struct ARMSSEMHU { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq cpu0irq; + qemu_irq cpu1irq; + + uint32_t cpu0intr; + uint32_t cpu1intr; +}; + +#endif diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h new file mode 100644 index 000000000..d49bfb02f --- /dev/null +++ b/include/hw/misc/aspeed_scu.h @@ -0,0 +1,317 @@ +/* + * ASPEED System Control Unit + * + * Andrew Jeffery + * + * Copyright 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ +#ifndef ASPEED_SCU_H +#define ASPEED_SCU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_SCU "aspeed.scu" +OBJECT_DECLARE_TYPE(AspeedSCUState, AspeedSCUClass, ASPEED_SCU) +#define TYPE_ASPEED_2400_SCU TYPE_ASPEED_SCU "-ast2400" +#define TYPE_ASPEED_2500_SCU TYPE_ASPEED_SCU "-ast2500" +#define TYPE_ASPEED_2600_SCU TYPE_ASPEED_SCU "-ast2600" + +#define ASPEED_SCU_NR_REGS (0x1A8 >> 2) +#define ASPEED_AST2600_SCU_NR_REGS (0xE20 >> 2) + +struct AspeedSCUState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t regs[ASPEED_AST2600_SCU_NR_REGS]; + uint32_t silicon_rev; + uint32_t hw_strap1; + uint32_t hw_strap2; + uint32_t hw_prot_key; +}; + +#define AST2400_A0_SILICON_REV 0x02000303U +#define AST2400_A1_SILICON_REV 0x02010303U +#define AST2500_A0_SILICON_REV 0x04000303U +#define AST2500_A1_SILICON_REV 0x04010303U +#define AST2600_A0_SILICON_REV 0x05000303U +#define AST2600_A1_SILICON_REV 0x05010303U + +#define ASPEED_IS_AST2500(si_rev) ((((si_rev) >> 24) & 0xff) == 0x04) + +extern bool is_supported_silicon_rev(uint32_t silicon_rev); + + +struct AspeedSCUClass { + SysBusDeviceClass parent_class; + + const uint32_t *resets; + uint32_t (*calc_hpll)(AspeedSCUState *s, uint32_t hpll_reg); + uint32_t apb_divider; + uint32_t nr_regs; + const MemoryRegionOps *ops; +}; + +#define ASPEED_SCU_PROT_KEY 0x1688A8A8 + +uint32_t aspeed_scu_get_apb_freq(AspeedSCUState *s); + +/* + * Extracted from Aspeed SDK v00.03.21. Fixes and extra definitions + * were added. + * + * Original header file : + * arch/arm/mach-aspeed/include/mach/regs-scu.h + * + * Copyright (C) 2012-2020 ASPEED Technology Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * History : + * 1. 2012/12/29 Ryan Chen Create + */ + +/* SCU08 Clock Selection Register + * + * 31 Enable Video Engine clock dynamic slow down + * 30:28 Video Engine clock slow down setting + * 27 2D Engine GCLK clock source selection + * 26 2D Engine GCLK clock throttling enable + * 25:23 APB PCLK divider selection + * 22:20 LPC Host LHCLK divider selection + * 19 LPC Host LHCLK clock generation/output enable control + * 18:16 MAC AHB bus clock divider selection + * 15 SD/SDIO clock running enable + * 14:12 SD/SDIO divider selection + * 11 Reserved + * 10:8 Video port output clock delay control bit + * 7 ARM CPU/AHB clock slow down enable + * 6:4 ARM CPU/AHB clock slow down setting + * 3:2 ECLK clock source selection + * 1 CPU/AHB clock slow down idle timer + * 0 CPU/AHB clock dynamic slow down enable (defined in bit[6:4]) + */ +#define SCU_CLK_GET_PCLK_DIV(x) (((x) >> 23) & 0x7) + +/* SCU24 H-PLL Parameter Register (for Aspeed AST2400 SOC) + * + * 18 H-PLL parameter selection + * 0: Select H-PLL by strapping resistors + * 1: Select H-PLL by the programmed registers (SCU24[17:0]) + * 17 Enable H-PLL bypass mode + * 16 Turn off H-PLL + * 10:5 H-PLL Numerator + * 4 H-PLL Output Divider + * 3:0 H-PLL Denumerator + * + * (Output frequency) = 24MHz * (2-OD) * [(Numerator+2) / (Denumerator+1)] + */ + +#define SCU_AST2400_H_PLL_PROGRAMMED (0x1 << 18) +#define SCU_AST2400_H_PLL_BYPASS_EN (0x1 << 17) +#define SCU_AST2400_H_PLL_OFF (0x1 << 16) + +/* SCU24 H-PLL Parameter Register (for Aspeed AST2500 SOC) + * + * 21 Enable H-PLL reset + * 20 Enable H-PLL bypass mode + * 19 Turn off H-PLL + * 18:13 H-PLL Post Divider + * 12:5 H-PLL Numerator (M) + * 4:0 H-PLL Denumerator (N) + * + * (Output frequency) = CLKIN(24MHz) * [(M+1) / (N+1)] / (P+1) + * + * The default frequency is 792Mhz when CLKIN = 24MHz + */ + +#define SCU_H_PLL_BYPASS_EN (0x1 << 20) +#define SCU_H_PLL_OFF (0x1 << 19) + +/* SCU70 Hardware Strapping Register definition (for Aspeed AST2400 SOC) + * + * 31:29 Software defined strapping registers + * 28:27 DRAM size setting (for VGA driver use) + * 26:24 DRAM configuration setting + * 23 Enable 25 MHz reference clock input + * 22 Enable GPIOE pass-through mode + * 21 Enable GPIOD pass-through mode + * 20 Disable LPC to decode SuperIO 0x2E/0x4E address + * 19 Disable ACPI function + * 23,18 Clock source selection + * 17 Enable BMC 2nd boot watchdog timer + * 16 SuperIO configuration address selection + * 15 VGA Class Code selection + * 14 Enable LPC dedicated reset pin function + * 13:12 SPI mode selection + * 11:10 CPU/AHB clock frequency ratio selection + * 9:8 H-PLL default clock frequency selection + * 7 Define MAC#2 interface + * 6 Define MAC#1 interface + * 5 Enable VGA BIOS ROM + * 4 Boot flash memory extended option + * 3:2 VGA memory size selection + * 1:0 BMC CPU boot code selection + */ +#define SCU_AST2400_HW_STRAP_SW_DEFINE(x) ((x) << 29) +#define SCU_AST2400_HW_STRAP_SW_DEFINE_MASK (0x7 << 29) + +#define SCU_AST2400_HW_STRAP_DRAM_SIZE(x) ((x) << 27) +#define SCU_AST2400_HW_STRAP_DRAM_SIZE_MASK (0x3 << 27) +#define DRAM_SIZE_64MB 0 +#define DRAM_SIZE_128MB 1 +#define DRAM_SIZE_256MB 2 +#define DRAM_SIZE_512MB 3 + +#define SCU_AST2400_HW_STRAP_DRAM_CONFIG(x) ((x) << 24) +#define SCU_AST2400_HW_STRAP_DRAM_CONFIG_MASK (0x7 << 24) + +#define SCU_HW_STRAP_GPIOE_PT_EN (0x1 << 22) +#define SCU_HW_STRAP_GPIOD_PT_EN (0x1 << 21) +#define SCU_HW_STRAP_LPC_DEC_SUPER_IO (0x1 << 20) +#define SCU_AST2400_HW_STRAP_ACPI_DIS (0x1 << 19) + +/* bit 23, 18 [1,0] */ +#define SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(x) (((((x) & 0x3) >> 1) << 23) \ + | (((x) & 0x1) << 18)) +#define SCU_AST2400_HW_STRAP_GET_CLK_SOURCE(x) (((((x) >> 23) & 0x1) << 1) \ + | (((x) >> 18) & 0x1)) +#define SCU_AST2400_HW_STRAP_CLK_SOURCE_MASK ((0x1 << 23) | (0x1 << 18)) +#define SCU_HW_STRAP_CLK_25M_IN (0x1 << 23) +#define AST2400_CLK_24M_IN 0 +#define AST2400_CLK_48M_IN 1 +#define AST2400_CLK_25M_IN_24M_USB_CKI 2 +#define AST2400_CLK_25M_IN_48M_USB_CKI 3 + +#define SCU_HW_STRAP_CLK_48M_IN (0x1 << 18) +#define SCU_HW_STRAP_2ND_BOOT_WDT (0x1 << 17) +#define SCU_HW_STRAP_SUPER_IO_CONFIG (0x1 << 16) +#define SCU_HW_STRAP_VGA_CLASS_CODE (0x1 << 15) +#define SCU_HW_STRAP_LPC_RESET_PIN (0x1 << 14) + +#define SCU_HW_STRAP_SPI_MODE(x) ((x) << 12) +#define SCU_HW_STRAP_SPI_MODE_MASK (0x3 << 12) +#define SCU_HW_STRAP_SPI_DIS 0 +#define SCU_HW_STRAP_SPI_MASTER 1 +#define SCU_HW_STRAP_SPI_M_S_EN 2 +#define SCU_HW_STRAP_SPI_PASS_THROUGH 3 + +#define SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(x) ((x) << 10) +#define SCU_AST2400_HW_STRAP_GET_CPU_AHB_RATIO(x) (((x) >> 10) & 3) +#define SCU_AST2400_HW_STRAP_CPU_AHB_RATIO_MASK (0x3 << 10) +#define AST2400_CPU_AHB_RATIO_1_1 0 +#define AST2400_CPU_AHB_RATIO_2_1 1 +#define AST2400_CPU_AHB_RATIO_4_1 2 +#define AST2400_CPU_AHB_RATIO_3_1 3 + +#define SCU_AST2400_HW_STRAP_GET_H_PLL_CLK(x) (((x) >> 8) & 0x3) +#define SCU_AST2400_HW_STRAP_H_PLL_CLK_MASK (0x3 << 8) +#define AST2400_CPU_384MHZ 0 +#define AST2400_CPU_360MHZ 1 +#define AST2400_CPU_336MHZ 2 +#define AST2400_CPU_408MHZ 3 + +#define SCU_HW_STRAP_MAC1_RGMII (0x1 << 7) +#define SCU_HW_STRAP_MAC0_RGMII (0x1 << 6) +#define SCU_HW_STRAP_VGA_BIOS_ROM (0x1 << 5) +#define SCU_HW_STRAP_SPI_WIDTH (0x1 << 4) + +#define SCU_HW_STRAP_VGA_SIZE_GET(x) (((x) >> 2) & 0x3) +#define SCU_HW_STRAP_VGA_MASK (0x3 << 2) +#define SCU_HW_STRAP_VGA_SIZE_SET(x) ((x) << 2) +#define VGA_8M_DRAM 0 +#define VGA_16M_DRAM 1 +#define VGA_32M_DRAM 2 +#define VGA_64M_DRAM 3 + +#define SCU_AST2400_HW_STRAP_BOOT_MODE(x) (x) +#define AST2400_NOR_BOOT 0 +#define AST2400_NAND_BOOT 1 +#define AST2400_SPI_BOOT 2 +#define AST2400_DIS_BOOT 3 + +/* + * SCU70 Hardware strapping register definition (for Aspeed AST2500 + * SoC and higher) + * + * 31 Enable SPI Flash Strap Auto Fetch Mode + * 30 Enable GPIO Strap Mode + * 29 Select UART Debug Port + * 28 Reserved (1) + * 27 Enable fast reset mode for ARM ICE debugger + * 26 Enable eSPI flash mode + * 25 Enable eSPI mode + * 24 Select DDR4 SDRAM + * 23 Select 25 MHz reference clock input mode + * 22 Enable GPIOE pass-through mode + * 21 Enable GPIOD pass-through mode + * 20 Disable LPC to decode SuperIO 0x2E/0x4E address + * 19 Enable ACPI function + * 18 Select USBCKI input frequency + * 17 Enable BMC 2nd boot watchdog timer + * 16 SuperIO configuration address selection + * 15 VGA Class Code selection + * 14 Select dedicated LPC reset input + * 13:12 SPI mode selection + * 11:9 AXI/AHB clock frequency ratio selection + * 8 Reserved (0) + * 7 Define MAC#2 interface + * 6 Define MAC#1 interface + * 5 Enable dedicated VGA BIOS ROM + * 4 Reserved (0) + * 3:2 VGA memory size selection + * 1 Reserved (1) + * 0 Disable CPU boot + */ +#define SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE (0x1 << 31) +#define SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE (0x1 << 30) +#define SCU_AST2500_HW_STRAP_UART_DEBUG (0x1 << 29) +#define UART_DEBUG_UART1 0 +#define UART_DEBUG_UART5 1 +#define SCU_AST2500_HW_STRAP_RESERVED28 (0x1 << 28) + +#define SCU_AST2500_HW_STRAP_FAST_RESET_DBG (0x1 << 27) +#define SCU_AST2500_HW_STRAP_ESPI_FLASH_ENABLE (0x1 << 26) +#define SCU_AST2500_HW_STRAP_ESPI_ENABLE (0x1 << 25) +#define SCU_AST2500_HW_STRAP_DDR4_ENABLE (0x1 << 24) +#define SCU_AST2500_HW_STRAP_25HZ_CLOCK_MODE (0x1 << 23) + +#define SCU_AST2500_HW_STRAP_ACPI_ENABLE (0x1 << 19) +#define SCU_AST2500_HW_STRAP_USBCKI_FREQ (0x1 << 18) +#define USBCKI_FREQ_24MHZ 0 +#define USBCKI_FREQ_28MHZ 1 + +#define SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(x) ((x) << 9) +#define SCU_AST2500_HW_STRAP_GET_AXI_AHB_RATIO(x) (((x) >> 9) & 7) +#define SCU_AST2500_HW_STRAP_CPU_AXI_RATIO_MASK (0x7 << 9) +#define AXI_AHB_RATIO_UNDEFINED 0 +#define AXI_AHB_RATIO_2_1 1 +#define AXI_AHB_RATIO_3_1 2 +#define AXI_AHB_RATIO_4_1 3 +#define AXI_AHB_RATIO_5_1 4 +#define AXI_AHB_RATIO_6_1 5 +#define AXI_AHB_RATIO_7_1 6 +#define AXI_AHB_RATIO_8_1 7 + +#define SCU_AST2500_HW_STRAP_RESERVED1 (0x1 << 1) +#define SCU_AST2500_HW_STRAP_DIS_BOOT (0x1 << 0) + +#define AST2500_HW_STRAP1_DEFAULTS ( \ + SCU_AST2500_HW_STRAP_RESERVED28 | \ + SCU_HW_STRAP_2ND_BOOT_WDT | \ + SCU_HW_STRAP_VGA_CLASS_CODE | \ + SCU_HW_STRAP_LPC_RESET_PIN | \ + SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) | \ + SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) | \ + SCU_AST2500_HW_STRAP_RESERVED1) + +#endif /* ASPEED_SCU_H */ diff --git a/include/hw/misc/aspeed_sdmc.h b/include/hw/misc/aspeed_sdmc.h new file mode 100644 index 000000000..ec2d59a14 --- /dev/null +++ b/include/hw/misc/aspeed_sdmc.h @@ -0,0 +1,56 @@ +/* + * ASPEED SDRAM Memory Controller + * + * Copyright (C) 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#ifndef ASPEED_SDMC_H +#define ASPEED_SDMC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_SDMC "aspeed.sdmc" +OBJECT_DECLARE_TYPE(AspeedSDMCState, AspeedSDMCClass, ASPEED_SDMC) +#define TYPE_ASPEED_2400_SDMC TYPE_ASPEED_SDMC "-ast2400" +#define TYPE_ASPEED_2500_SDMC TYPE_ASPEED_SDMC "-ast2500" +#define TYPE_ASPEED_2600_SDMC TYPE_ASPEED_SDMC "-ast2600" + +/* + * SDMC has 174 documented registers. In addition the u-boot device tree + * describes the following regions: + * - PHY status regs at offset 0x400, length 0x200 + * - PHY setting regs at offset 0x100, length 0x300 + * + * There are two sets of MRS (Mode Registers) configuration in ast2600 memory + * system: one is in the SDRAM MC (memory controller) which is used in run + * time, and the other is in the DDR-PHY IP which is used during DDR-PHY + * training. + */ +#define ASPEED_SDMC_NR_REGS (0x500 >> 2) + +struct AspeedSDMCState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t regs[ASPEED_SDMC_NR_REGS]; + uint64_t ram_size; + uint64_t max_ram_size; +}; + + +struct AspeedSDMCClass { + SysBusDeviceClass parent_class; + + uint64_t max_ram_size; + const uint64_t *valid_ram_sizes; + uint32_t (*compute_conf)(AspeedSDMCState *s, uint32_t data); + void (*write)(AspeedSDMCState *s, uint32_t reg, uint32_t data); +}; + +#endif /* ASPEED_SDMC_H */ diff --git a/include/hw/misc/aspeed_xdma.h b/include/hw/misc/aspeed_xdma.h new file mode 100644 index 000000000..9869ef472 --- /dev/null +++ b/include/hw/misc/aspeed_xdma.h @@ -0,0 +1,31 @@ +/* + * ASPEED XDMA Controller + * Eddie James + * + * Copyright (C) 2019 IBM Corp. + * SPDX-License-Identifer: GPL-2.0-or-later + */ + +#ifndef ASPEED_XDMA_H +#define ASPEED_XDMA_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_XDMA "aspeed.xdma" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedXDMAState, ASPEED_XDMA) + +#define ASPEED_XDMA_NUM_REGS (ASPEED_XDMA_REG_SIZE / sizeof(uint32_t)) +#define ASPEED_XDMA_REG_SIZE 0x7C + +struct AspeedXDMAState { + SysBusDevice parent; + + MemoryRegion iomem; + qemu_irq irq; + + char bmc_cmdq_readp_set; + uint32_t regs[ASPEED_XDMA_NUM_REGS]; +}; + +#endif /* ASPEED_XDMA_H */ diff --git a/include/hw/misc/auxbus.h b/include/hw/misc/auxbus.h new file mode 100644 index 000000000..b05799d2f --- /dev/null +++ b/include/hw/misc/auxbus.h @@ -0,0 +1,142 @@ +/* + * auxbus.h + * + * Copyright (C)2014 : GreenSocs Ltd + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option)any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef HW_MISC_AUXBUS_H +#define HW_MISC_AUXBUS_H + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +typedef struct AUXSlave AUXSlave; +typedef enum AUXCommand AUXCommand; +typedef enum AUXReply AUXReply; + +#define TYPE_AUXTOI2C "aux-to-i2c-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(AUXTOI2CState, AUXTOI2C) + +enum AUXCommand { + WRITE_I2C = 0, + READ_I2C = 1, + WRITE_I2C_STATUS = 2, + WRITE_I2C_MOT = 4, + READ_I2C_MOT = 5, + WRITE_AUX = 8, + READ_AUX = 9 +}; + +enum AUXReply { + AUX_I2C_ACK = 0, + AUX_NACK = 1, + AUX_DEFER = 2, + AUX_I2C_NACK = 4, + AUX_I2C_DEFER = 8 +}; + +#define TYPE_AUX_BUS "aux-bus" +OBJECT_DECLARE_SIMPLE_TYPE(AUXBus, AUX_BUS) + +struct AUXBus { + /* < private > */ + BusState qbus; + + /* < public > */ + AUXSlave *current_dev; + AUXSlave *dev; + uint32_t last_i2c_address; + AUXCommand last_transaction; + + AUXTOI2CState *bridge; + + MemoryRegion *aux_io; + AddressSpace aux_addr_space; +}; + +#define TYPE_AUX_SLAVE "aux-slave" +OBJECT_DECLARE_SIMPLE_TYPE(AUXSlave, AUX_SLAVE) + +struct AUXSlave { + /* < private > */ + DeviceState parent_obj; + + /* < public > */ + MemoryRegion *mmio; +}; + +/** + * aux_bus_init: Initialize an AUX bus. + * + * Returns the new AUX bus created. + * + * @parent The device where this bus is located. + * @name The name of the bus. + */ +AUXBus *aux_bus_init(DeviceState *parent, const char *name); + +/** + * aux_bus_realize: Realize an AUX bus. + * + * @bus: The AUX bus. + */ +void aux_bus_realize(AUXBus *bus); + +/* + * aux_request: Make a request on the bus. + * + * Returns the reply of the request. + * + * @bus Ths bus where the request happen. + * @cmd The command requested. + * @address The 20bits address of the slave. + * @len The length of the read or write. + * @data The data array which will be filled or read during transfer. + */ +AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, + uint8_t len, uint8_t *data); + +/* + * aux_get_i2c_bus: Get the i2c bus for I2C over AUX command. + * + * Returns the i2c bus associated to this AUX bus. + * + * @bus The AUX bus. + */ +I2CBus *aux_get_i2c_bus(AUXBus *bus); + +/* + * aux_init_mmio: Init an mmio for an AUX slave. + * + * @aux_slave The AUX slave. + * @mmio The mmio to be registered. + */ +void aux_init_mmio(AUXSlave *aux_slave, MemoryRegion *mmio); + +/* aux_map_slave: Map the mmio for an AUX slave on the bus. + * + * @dev The AUX slave. + * @addr The address for the slave's mmio. + */ +void aux_map_slave(AUXSlave *dev, hwaddr addr); + +#endif /* HW_MISC_AUXBUS_H */ diff --git a/include/hw/misc/avr_power.h b/include/hw/misc/avr_power.h new file mode 100644 index 000000000..707df030b --- /dev/null +++ b/include/hw/misc/avr_power.h @@ -0,0 +1,47 @@ +/* + * AVR Power Reduction Management + * + * Copyright (c) 2019-2020 Michael Rolnik + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MISC_AVR_POWER_H +#define HW_MISC_AVR_POWER_H + +#include "hw/sysbus.h" +#include "hw/hw.h" +#include "qom/object.h" + + +#define TYPE_AVR_MASK "avr-power" +OBJECT_DECLARE_SIMPLE_TYPE(AVRMaskState, AVR_MASK) + +struct AVRMaskState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + + uint8_t val; + qemu_irq irq[8]; +}; + +#endif /* HW_MISC_AVR_POWER_H */ diff --git a/include/hw/misc/bcm2835_cprman.h b/include/hw/misc/bcm2835_cprman.h new file mode 100644 index 000000000..3df4ceedd --- /dev/null +++ b/include/hw/misc/bcm2835_cprman.h @@ -0,0 +1,210 @@ +/* + * BCM2835 CPRMAN clock manager + * + * Copyright (c) 2020 Luc Michel + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_MISC_CPRMAN_H +#define HW_MISC_CPRMAN_H + +#include "hw/sysbus.h" +#include "hw/qdev-clock.h" + +#define TYPE_BCM2835_CPRMAN "bcm2835-cprman" + +typedef struct BCM2835CprmanState BCM2835CprmanState; + +DECLARE_INSTANCE_CHECKER(BCM2835CprmanState, CPRMAN, + TYPE_BCM2835_CPRMAN) + +#define CPRMAN_NUM_REGS (0x2000 / sizeof(uint32_t)) + +typedef enum CprmanPll { + CPRMAN_PLLA = 0, + CPRMAN_PLLC, + CPRMAN_PLLD, + CPRMAN_PLLH, + CPRMAN_PLLB, + + CPRMAN_NUM_PLL +} CprmanPll; + +typedef enum CprmanPllChannel { + CPRMAN_PLLA_CHANNEL_DSI0 = 0, + CPRMAN_PLLA_CHANNEL_CORE, + CPRMAN_PLLA_CHANNEL_PER, + CPRMAN_PLLA_CHANNEL_CCP2, + + CPRMAN_PLLC_CHANNEL_CORE2, + CPRMAN_PLLC_CHANNEL_CORE1, + CPRMAN_PLLC_CHANNEL_PER, + CPRMAN_PLLC_CHANNEL_CORE0, + + CPRMAN_PLLD_CHANNEL_DSI0, + CPRMAN_PLLD_CHANNEL_CORE, + CPRMAN_PLLD_CHANNEL_PER, + CPRMAN_PLLD_CHANNEL_DSI1, + + CPRMAN_PLLH_CHANNEL_AUX, + CPRMAN_PLLH_CHANNEL_RCAL, + CPRMAN_PLLH_CHANNEL_PIX, + + CPRMAN_PLLB_CHANNEL_ARM, + + CPRMAN_NUM_PLL_CHANNEL, + + /* Special values used when connecting clock sources to clocks */ + CPRMAN_CLOCK_SRC_NORMAL = -1, + CPRMAN_CLOCK_SRC_FORCE_GROUND = -2, + CPRMAN_CLOCK_SRC_DSI0HSCK = -3, +} CprmanPllChannel; + +typedef enum CprmanClockMux { + CPRMAN_CLOCK_GNRIC, + CPRMAN_CLOCK_VPU, + CPRMAN_CLOCK_SYS, + CPRMAN_CLOCK_PERIA, + CPRMAN_CLOCK_PERII, + CPRMAN_CLOCK_H264, + CPRMAN_CLOCK_ISP, + CPRMAN_CLOCK_V3D, + CPRMAN_CLOCK_CAM0, + CPRMAN_CLOCK_CAM1, + CPRMAN_CLOCK_CCP2, + CPRMAN_CLOCK_DSI0E, + CPRMAN_CLOCK_DSI0P, + CPRMAN_CLOCK_DPI, + CPRMAN_CLOCK_GP0, + CPRMAN_CLOCK_GP1, + CPRMAN_CLOCK_GP2, + CPRMAN_CLOCK_HSM, + CPRMAN_CLOCK_OTP, + CPRMAN_CLOCK_PCM, + CPRMAN_CLOCK_PWM, + CPRMAN_CLOCK_SLIM, + CPRMAN_CLOCK_SMI, + CPRMAN_CLOCK_TEC, + CPRMAN_CLOCK_TD0, + CPRMAN_CLOCK_TD1, + CPRMAN_CLOCK_TSENS, + CPRMAN_CLOCK_TIMER, + CPRMAN_CLOCK_UART, + CPRMAN_CLOCK_VEC, + CPRMAN_CLOCK_PULSE, + CPRMAN_CLOCK_SDC, + CPRMAN_CLOCK_ARM, + CPRMAN_CLOCK_AVEO, + CPRMAN_CLOCK_EMMC, + CPRMAN_CLOCK_EMMC2, + + CPRMAN_NUM_CLOCK_MUX +} CprmanClockMux; + +typedef enum CprmanClockMuxSource { + CPRMAN_CLOCK_SRC_GND = 0, + CPRMAN_CLOCK_SRC_XOSC, + CPRMAN_CLOCK_SRC_TD0, + CPRMAN_CLOCK_SRC_TD1, + CPRMAN_CLOCK_SRC_PLLA, + CPRMAN_CLOCK_SRC_PLLC, + CPRMAN_CLOCK_SRC_PLLD, + CPRMAN_CLOCK_SRC_PLLH, + CPRMAN_CLOCK_SRC_PLLC_CORE1, + CPRMAN_CLOCK_SRC_PLLC_CORE2, + + CPRMAN_NUM_CLOCK_MUX_SRC +} CprmanClockMuxSource; + +typedef struct CprmanPllState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CprmanPll id; + + uint32_t *reg_cm; + uint32_t *reg_a2w_ctrl; + uint32_t *reg_a2w_ana; /* ANA[0] .. ANA[3] */ + uint32_t prediv_mask; /* prediv bit in ana[1] */ + uint32_t *reg_a2w_frac; + + Clock *xosc_in; + Clock *out; +} CprmanPllState; + +typedef struct CprmanPllChannelState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CprmanPllChannel id; + CprmanPll parent; + + uint32_t *reg_cm; + uint32_t hold_mask; + uint32_t load_mask; + uint32_t *reg_a2w_ctrl; + int fixed_divider; + + Clock *pll_in; + Clock *out; +} CprmanPllChannelState; + +typedef struct CprmanClockMuxState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CprmanClockMux id; + + uint32_t *reg_ctl; + uint32_t *reg_div; + int int_bits; + int frac_bits; + + Clock *srcs[CPRMAN_NUM_CLOCK_MUX_SRC]; + Clock *out; + + /* + * Used by clock srcs update callback to retrieve both the clock and the + * source number. + */ + struct CprmanClockMuxState *backref[CPRMAN_NUM_CLOCK_MUX_SRC]; +} CprmanClockMuxState; + +typedef struct CprmanDsi0HsckMuxState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CprmanClockMux id; + + uint32_t *reg_cm; + + Clock *plla_in; + Clock *plld_in; + Clock *out; +} CprmanDsi0HsckMuxState; + +struct BCM2835CprmanState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + CprmanPllState plls[CPRMAN_NUM_PLL]; + CprmanPllChannelState channels[CPRMAN_NUM_PLL_CHANNEL]; + CprmanClockMuxState clock_muxes[CPRMAN_NUM_CLOCK_MUX]; + CprmanDsi0HsckMuxState dsi0hsck_mux; + + uint32_t regs[CPRMAN_NUM_REGS]; + uint32_t xosc_freq; + + Clock *xosc; + Clock *gnd; +}; + +#endif diff --git a/include/hw/misc/bcm2835_cprman_internals.h b/include/hw/misc/bcm2835_cprman_internals.h new file mode 100644 index 000000000..339759b30 --- /dev/null +++ b/include/hw/misc/bcm2835_cprman_internals.h @@ -0,0 +1,1019 @@ +/* + * BCM2835 CPRMAN clock manager + * + * Copyright (c) 2020 Luc Michel + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_MISC_CPRMAN_INTERNALS_H +#define HW_MISC_CPRMAN_INTERNALS_H + +#include "hw/registerfields.h" +#include "hw/misc/bcm2835_cprman.h" + +#define TYPE_CPRMAN_PLL "bcm2835-cprman-pll" +#define TYPE_CPRMAN_PLL_CHANNEL "bcm2835-cprman-pll-channel" +#define TYPE_CPRMAN_CLOCK_MUX "bcm2835-cprman-clock-mux" +#define TYPE_CPRMAN_DSI0HSCK_MUX "bcm2835-cprman-dsi0hsck-mux" + +DECLARE_INSTANCE_CHECKER(CprmanPllState, CPRMAN_PLL, + TYPE_CPRMAN_PLL) +DECLARE_INSTANCE_CHECKER(CprmanPllChannelState, CPRMAN_PLL_CHANNEL, + TYPE_CPRMAN_PLL_CHANNEL) +DECLARE_INSTANCE_CHECKER(CprmanClockMuxState, CPRMAN_CLOCK_MUX, + TYPE_CPRMAN_CLOCK_MUX) +DECLARE_INSTANCE_CHECKER(CprmanDsi0HsckMuxState, CPRMAN_DSI0HSCK_MUX, + TYPE_CPRMAN_DSI0HSCK_MUX) + +/* Register map */ + +/* PLLs */ +REG32(CM_PLLA, 0x104) + FIELD(CM_PLLA, LOADDSI0, 0, 1) + FIELD(CM_PLLA, HOLDDSI0, 1, 1) + FIELD(CM_PLLA, LOADCCP2, 2, 1) + FIELD(CM_PLLA, HOLDCCP2, 3, 1) + FIELD(CM_PLLA, LOADCORE, 4, 1) + FIELD(CM_PLLA, HOLDCORE, 5, 1) + FIELD(CM_PLLA, LOADPER, 6, 1) + FIELD(CM_PLLA, HOLDPER, 7, 1) + FIELD(CM_PLLx, ANARST, 8, 1) +REG32(CM_PLLC, 0x108) + FIELD(CM_PLLC, LOADCORE0, 0, 1) + FIELD(CM_PLLC, HOLDCORE0, 1, 1) + FIELD(CM_PLLC, LOADCORE1, 2, 1) + FIELD(CM_PLLC, HOLDCORE1, 3, 1) + FIELD(CM_PLLC, LOADCORE2, 4, 1) + FIELD(CM_PLLC, HOLDCORE2, 5, 1) + FIELD(CM_PLLC, LOADPER, 6, 1) + FIELD(CM_PLLC, HOLDPER, 7, 1) +REG32(CM_PLLD, 0x10c) + FIELD(CM_PLLD, LOADDSI0, 0, 1) + FIELD(CM_PLLD, HOLDDSI0, 1, 1) + FIELD(CM_PLLD, LOADDSI1, 2, 1) + FIELD(CM_PLLD, HOLDDSI1, 3, 1) + FIELD(CM_PLLD, LOADCORE, 4, 1) + FIELD(CM_PLLD, HOLDCORE, 5, 1) + FIELD(CM_PLLD, LOADPER, 6, 1) + FIELD(CM_PLLD, HOLDPER, 7, 1) +REG32(CM_PLLH, 0x110) + FIELD(CM_PLLH, LOADPIX, 0, 1) + FIELD(CM_PLLH, LOADAUX, 1, 1) + FIELD(CM_PLLH, LOADRCAL, 2, 1) +REG32(CM_PLLB, 0x170) + FIELD(CM_PLLB, LOADARM, 0, 1) + FIELD(CM_PLLB, HOLDARM, 1, 1) + +REG32(A2W_PLLA_CTRL, 0x1100) + FIELD(A2W_PLLx_CTRL, NDIV, 0, 10) + FIELD(A2W_PLLx_CTRL, PDIV, 12, 3) + FIELD(A2W_PLLx_CTRL, PWRDN, 16, 1) + FIELD(A2W_PLLx_CTRL, PRST_DISABLE, 17, 1) +REG32(A2W_PLLC_CTRL, 0x1120) +REG32(A2W_PLLD_CTRL, 0x1140) +REG32(A2W_PLLH_CTRL, 0x1160) +REG32(A2W_PLLB_CTRL, 0x11e0) + +REG32(A2W_PLLA_ANA0, 0x1010) +REG32(A2W_PLLA_ANA1, 0x1014) + FIELD(A2W_PLLx_ANA1, FB_PREDIV, 14, 1) +REG32(A2W_PLLA_ANA2, 0x1018) +REG32(A2W_PLLA_ANA3, 0x101c) + +REG32(A2W_PLLC_ANA0, 0x1030) +REG32(A2W_PLLC_ANA1, 0x1034) +REG32(A2W_PLLC_ANA2, 0x1038) +REG32(A2W_PLLC_ANA3, 0x103c) + +REG32(A2W_PLLD_ANA0, 0x1050) +REG32(A2W_PLLD_ANA1, 0x1054) +REG32(A2W_PLLD_ANA2, 0x1058) +REG32(A2W_PLLD_ANA3, 0x105c) + +REG32(A2W_PLLH_ANA0, 0x1070) +REG32(A2W_PLLH_ANA1, 0x1074) + FIELD(A2W_PLLH_ANA1, FB_PREDIV, 11, 1) +REG32(A2W_PLLH_ANA2, 0x1078) +REG32(A2W_PLLH_ANA3, 0x107c) + +REG32(A2W_PLLB_ANA0, 0x10f0) +REG32(A2W_PLLB_ANA1, 0x10f4) +REG32(A2W_PLLB_ANA2, 0x10f8) +REG32(A2W_PLLB_ANA3, 0x10fc) + +REG32(A2W_PLLA_FRAC, 0x1200) + FIELD(A2W_PLLx_FRAC, FRAC, 0, 20) +REG32(A2W_PLLC_FRAC, 0x1220) +REG32(A2W_PLLD_FRAC, 0x1240) +REG32(A2W_PLLH_FRAC, 0x1260) +REG32(A2W_PLLB_FRAC, 0x12e0) + +/* PLL channels */ +REG32(A2W_PLLA_DSI0, 0x1300) + FIELD(A2W_PLLx_CHANNELy, DIV, 0, 8) + FIELD(A2W_PLLx_CHANNELy, DISABLE, 8, 1) +REG32(A2W_PLLA_CORE, 0x1400) +REG32(A2W_PLLA_PER, 0x1500) +REG32(A2W_PLLA_CCP2, 0x1600) + +REG32(A2W_PLLC_CORE2, 0x1320) +REG32(A2W_PLLC_CORE1, 0x1420) +REG32(A2W_PLLC_PER, 0x1520) +REG32(A2W_PLLC_CORE0, 0x1620) + +REG32(A2W_PLLD_DSI0, 0x1340) +REG32(A2W_PLLD_CORE, 0x1440) +REG32(A2W_PLLD_PER, 0x1540) +REG32(A2W_PLLD_DSI1, 0x1640) + +REG32(A2W_PLLH_AUX, 0x1360) +REG32(A2W_PLLH_RCAL, 0x1460) +REG32(A2W_PLLH_PIX, 0x1560) +REG32(A2W_PLLH_STS, 0x1660) + +REG32(A2W_PLLB_ARM, 0x13e0) + +/* Clock muxes */ +REG32(CM_GNRICCTL, 0x000) + FIELD(CM_CLOCKx_CTL, SRC, 0, 4) + FIELD(CM_CLOCKx_CTL, ENABLE, 4, 1) + FIELD(CM_CLOCKx_CTL, KILL, 5, 1) + FIELD(CM_CLOCKx_CTL, GATE, 6, 1) + FIELD(CM_CLOCKx_CTL, BUSY, 7, 1) + FIELD(CM_CLOCKx_CTL, BUSYD, 8, 1) + FIELD(CM_CLOCKx_CTL, MASH, 9, 2) + FIELD(CM_CLOCKx_CTL, FLIP, 11, 1) +REG32(CM_GNRICDIV, 0x004) + FIELD(CM_CLOCKx_DIV, FRAC, 0, 12) +REG32(CM_VPUCTL, 0x008) +REG32(CM_VPUDIV, 0x00c) +REG32(CM_SYSCTL, 0x010) +REG32(CM_SYSDIV, 0x014) +REG32(CM_PERIACTL, 0x018) +REG32(CM_PERIADIV, 0x01c) +REG32(CM_PERIICTL, 0x020) +REG32(CM_PERIIDIV, 0x024) +REG32(CM_H264CTL, 0x028) +REG32(CM_H264DIV, 0x02c) +REG32(CM_ISPCTL, 0x030) +REG32(CM_ISPDIV, 0x034) +REG32(CM_V3DCTL, 0x038) +REG32(CM_V3DDIV, 0x03c) +REG32(CM_CAM0CTL, 0x040) +REG32(CM_CAM0DIV, 0x044) +REG32(CM_CAM1CTL, 0x048) +REG32(CM_CAM1DIV, 0x04c) +REG32(CM_CCP2CTL, 0x050) +REG32(CM_CCP2DIV, 0x054) +REG32(CM_DSI0ECTL, 0x058) +REG32(CM_DSI0EDIV, 0x05c) +REG32(CM_DSI0PCTL, 0x060) +REG32(CM_DSI0PDIV, 0x064) +REG32(CM_DPICTL, 0x068) +REG32(CM_DPIDIV, 0x06c) +REG32(CM_GP0CTL, 0x070) +REG32(CM_GP0DIV, 0x074) +REG32(CM_GP1CTL, 0x078) +REG32(CM_GP1DIV, 0x07c) +REG32(CM_GP2CTL, 0x080) +REG32(CM_GP2DIV, 0x084) +REG32(CM_HSMCTL, 0x088) +REG32(CM_HSMDIV, 0x08c) +REG32(CM_OTPCTL, 0x090) +REG32(CM_OTPDIV, 0x094) +REG32(CM_PCMCTL, 0x098) +REG32(CM_PCMDIV, 0x09c) +REG32(CM_PWMCTL, 0x0a0) +REG32(CM_PWMDIV, 0x0a4) +REG32(CM_SLIMCTL, 0x0a8) +REG32(CM_SLIMDIV, 0x0ac) +REG32(CM_SMICTL, 0x0b0) +REG32(CM_SMIDIV, 0x0b4) +REG32(CM_TCNTCTL, 0x0c0) +REG32(CM_TCNTCNT, 0x0c4) +REG32(CM_TECCTL, 0x0c8) +REG32(CM_TECDIV, 0x0cc) +REG32(CM_TD0CTL, 0x0d0) +REG32(CM_TD0DIV, 0x0d4) +REG32(CM_TD1CTL, 0x0d8) +REG32(CM_TD1DIV, 0x0dc) +REG32(CM_TSENSCTL, 0x0e0) +REG32(CM_TSENSDIV, 0x0e4) +REG32(CM_TIMERCTL, 0x0e8) +REG32(CM_TIMERDIV, 0x0ec) +REG32(CM_UARTCTL, 0x0f0) +REG32(CM_UARTDIV, 0x0f4) +REG32(CM_VECCTL, 0x0f8) +REG32(CM_VECDIV, 0x0fc) +REG32(CM_PULSECTL, 0x190) +REG32(CM_PULSEDIV, 0x194) +REG32(CM_SDCCTL, 0x1a8) +REG32(CM_SDCDIV, 0x1ac) +REG32(CM_ARMCTL, 0x1b0) +REG32(CM_AVEOCTL, 0x1b8) +REG32(CM_AVEODIV, 0x1bc) +REG32(CM_EMMCCTL, 0x1c0) +REG32(CM_EMMCDIV, 0x1c4) +REG32(CM_EMMC2CTL, 0x1d0) +REG32(CM_EMMC2DIV, 0x1d4) + +/* misc registers */ +REG32(CM_LOCK, 0x114) + FIELD(CM_LOCK, FLOCKH, 12, 1) + FIELD(CM_LOCK, FLOCKD, 11, 1) + FIELD(CM_LOCK, FLOCKC, 10, 1) + FIELD(CM_LOCK, FLOCKB, 9, 1) + FIELD(CM_LOCK, FLOCKA, 8, 1) + +REG32(CM_DSI0HSCK, 0x120) + FIELD(CM_DSI0HSCK, SELPLLD, 0, 1) + +/* + * This field is common to all registers. Each register write value must match + * the CPRMAN_PASSWORD magic value in its 8 MSB. + */ +FIELD(CPRMAN, PASSWORD, 24, 8) +#define CPRMAN_PASSWORD 0x5a + +/* PLL init info */ +typedef struct PLLInitInfo { + const char *name; + size_t cm_offset; + size_t a2w_ctrl_offset; + size_t a2w_ana_offset; + uint32_t prediv_mask; /* Prediv bit in ana[1] */ + size_t a2w_frac_offset; +} PLLInitInfo; + +#define FILL_PLL_INIT_INFO(pll_) \ + .cm_offset = R_CM_ ## pll_, \ + .a2w_ctrl_offset = R_A2W_ ## pll_ ## _CTRL, \ + .a2w_ana_offset = R_A2W_ ## pll_ ## _ANA0, \ + .a2w_frac_offset = R_A2W_ ## pll_ ## _FRAC + +static const PLLInitInfo PLL_INIT_INFO[] = { + [CPRMAN_PLLA] = { + .name = "plla", + .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK, + FILL_PLL_INIT_INFO(PLLA), + }, + [CPRMAN_PLLC] = { + .name = "pllc", + .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK, + FILL_PLL_INIT_INFO(PLLC), + }, + [CPRMAN_PLLD] = { + .name = "plld", + .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK, + FILL_PLL_INIT_INFO(PLLD), + }, + [CPRMAN_PLLH] = { + .name = "pllh", + .prediv_mask = R_A2W_PLLH_ANA1_FB_PREDIV_MASK, + FILL_PLL_INIT_INFO(PLLH), + }, + [CPRMAN_PLLB] = { + .name = "pllb", + .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK, + FILL_PLL_INIT_INFO(PLLB), + }, +}; + +#undef FILL_PLL_CHANNEL_INIT_INFO + +static inline void set_pll_init_info(BCM2835CprmanState *s, + CprmanPllState *pll, + CprmanPll id) +{ + pll->id = id; + pll->reg_cm = &s->regs[PLL_INIT_INFO[id].cm_offset]; + pll->reg_a2w_ctrl = &s->regs[PLL_INIT_INFO[id].a2w_ctrl_offset]; + pll->reg_a2w_ana = &s->regs[PLL_INIT_INFO[id].a2w_ana_offset]; + pll->prediv_mask = PLL_INIT_INFO[id].prediv_mask; + pll->reg_a2w_frac = &s->regs[PLL_INIT_INFO[id].a2w_frac_offset]; +} + + +/* PLL channel init info */ +typedef struct PLLChannelInitInfo { + const char *name; + CprmanPll parent; + size_t cm_offset; + uint32_t cm_hold_mask; + uint32_t cm_load_mask; + size_t a2w_ctrl_offset; + unsigned int fixed_divider; +} PLLChannelInitInfo; + +#define FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_) \ + .parent = CPRMAN_ ## pll_, \ + .cm_offset = R_CM_ ## pll_, \ + .cm_load_mask = R_CM_ ## pll_ ## _ ## LOAD ## channel_ ## _MASK, \ + .a2w_ctrl_offset = R_A2W_ ## pll_ ## _ ## channel_ + +#define FILL_PLL_CHANNEL_INIT_INFO(pll_, channel_) \ + FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_), \ + .cm_hold_mask = R_CM_ ## pll_ ## _ ## HOLD ## channel_ ## _MASK, \ + .fixed_divider = 1 + +#define FILL_PLL_CHANNEL_INIT_INFO_nohold(pll_, channel_) \ + FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_), \ + .cm_hold_mask = 0 + +static PLLChannelInitInfo PLL_CHANNEL_INIT_INFO[] = { + [CPRMAN_PLLA_CHANNEL_DSI0] = { + .name = "plla-dsi0", + FILL_PLL_CHANNEL_INIT_INFO(PLLA, DSI0), + }, + [CPRMAN_PLLA_CHANNEL_CORE] = { + .name = "plla-core", + FILL_PLL_CHANNEL_INIT_INFO(PLLA, CORE), + }, + [CPRMAN_PLLA_CHANNEL_PER] = { + .name = "plla-per", + FILL_PLL_CHANNEL_INIT_INFO(PLLA, PER), + }, + [CPRMAN_PLLA_CHANNEL_CCP2] = { + .name = "plla-ccp2", + FILL_PLL_CHANNEL_INIT_INFO(PLLA, CCP2), + }, + + [CPRMAN_PLLC_CHANNEL_CORE2] = { + .name = "pllc-core2", + FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE2), + }, + [CPRMAN_PLLC_CHANNEL_CORE1] = { + .name = "pllc-core1", + FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE1), + }, + [CPRMAN_PLLC_CHANNEL_PER] = { + .name = "pllc-per", + FILL_PLL_CHANNEL_INIT_INFO(PLLC, PER), + }, + [CPRMAN_PLLC_CHANNEL_CORE0] = { + .name = "pllc-core0", + FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE0), + }, + + [CPRMAN_PLLD_CHANNEL_DSI0] = { + .name = "plld-dsi0", + FILL_PLL_CHANNEL_INIT_INFO(PLLD, DSI0), + }, + [CPRMAN_PLLD_CHANNEL_CORE] = { + .name = "plld-core", + FILL_PLL_CHANNEL_INIT_INFO(PLLD, CORE), + }, + [CPRMAN_PLLD_CHANNEL_PER] = { + .name = "plld-per", + FILL_PLL_CHANNEL_INIT_INFO(PLLD, PER), + }, + [CPRMAN_PLLD_CHANNEL_DSI1] = { + .name = "plld-dsi1", + FILL_PLL_CHANNEL_INIT_INFO(PLLD, DSI1), + }, + + [CPRMAN_PLLH_CHANNEL_AUX] = { + .name = "pllh-aux", + .fixed_divider = 1, + FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, AUX), + }, + [CPRMAN_PLLH_CHANNEL_RCAL] = { + .name = "pllh-rcal", + .fixed_divider = 10, + FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, RCAL), + }, + [CPRMAN_PLLH_CHANNEL_PIX] = { + .name = "pllh-pix", + .fixed_divider = 10, + FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, PIX), + }, + + [CPRMAN_PLLB_CHANNEL_ARM] = { + .name = "pllb-arm", + FILL_PLL_CHANNEL_INIT_INFO(PLLB, ARM), + }, +}; + +#undef FILL_PLL_CHANNEL_INIT_INFO_nohold +#undef FILL_PLL_CHANNEL_INIT_INFO +#undef FILL_PLL_CHANNEL_INIT_INFO_common + +static inline void set_pll_channel_init_info(BCM2835CprmanState *s, + CprmanPllChannelState *channel, + CprmanPllChannel id) +{ + channel->id = id; + channel->parent = PLL_CHANNEL_INIT_INFO[id].parent; + channel->reg_cm = &s->regs[PLL_CHANNEL_INIT_INFO[id].cm_offset]; + channel->hold_mask = PLL_CHANNEL_INIT_INFO[id].cm_hold_mask; + channel->load_mask = PLL_CHANNEL_INIT_INFO[id].cm_load_mask; + channel->reg_a2w_ctrl = &s->regs[PLL_CHANNEL_INIT_INFO[id].a2w_ctrl_offset]; + channel->fixed_divider = PLL_CHANNEL_INIT_INFO[id].fixed_divider; +} + +/* Clock mux init info */ +typedef struct ClockMuxInitInfo { + const char *name; + size_t cm_offset; /* cm_offset[0]->CM_CTL, cm_offset[1]->CM_DIV */ + int int_bits; + int frac_bits; + + CprmanPllChannel src_mapping[CPRMAN_NUM_CLOCK_MUX_SRC]; +} ClockMuxInitInfo; + +/* + * Each clock mux can have up to 10 sources. Sources 0 to 3 are always the + * same (ground, xosc, td0, td1). Sources 4 to 9 are mux specific, and are not + * always populated. The following macros catch all those cases. + */ + +/* Unknown mapping. Connect everything to ground */ +#define SRC_MAPPING_INFO_unknown \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* gnd */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* xosc */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* test debug 0 */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* test debug 1 */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll a */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll d */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll h */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c, core1 */ \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c, core2 */ \ + } + +/* Only the oscillator and the two test debug clocks */ +#define SRC_MAPPING_INFO_xosc \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + } + +/* All the PLL "core" channels */ +#define SRC_MAPPING_INFO_core \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_PLLA_CHANNEL_CORE, \ + CPRMAN_PLLC_CHANNEL_CORE0, \ + CPRMAN_PLLD_CHANNEL_CORE, \ + CPRMAN_PLLH_CHANNEL_AUX, \ + CPRMAN_PLLC_CHANNEL_CORE1, \ + CPRMAN_PLLC_CHANNEL_CORE2, \ + } + +/* All the PLL "per" channels */ +#define SRC_MAPPING_INFO_periph \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_PLLA_CHANNEL_PER, \ + CPRMAN_PLLC_CHANNEL_PER, \ + CPRMAN_PLLD_CHANNEL_PER, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + } + +/* + * The DSI0 channels. This one got an intermediate mux between the PLL channels + * and the clock input. + */ +#define SRC_MAPPING_INFO_dsi0 \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_DSI0HSCK, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + } + +/* The DSI1 channel */ +#define SRC_MAPPING_INFO_dsi1 \ + .src_mapping = { \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_CLOCK_SRC_NORMAL, \ + CPRMAN_PLLD_CHANNEL_DSI1, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + CPRMAN_CLOCK_SRC_FORCE_GROUND, \ + } + +#define FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO(kind_) \ + SRC_MAPPING_INFO_ ## kind_ + +#define FILL_CLOCK_MUX_INIT_INFO(clock_, kind_) \ + .cm_offset = R_CM_ ## clock_ ## CTL, \ + FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO(kind_) + +static ClockMuxInitInfo CLOCK_MUX_INIT_INFO[] = { + [CPRMAN_CLOCK_GNRIC] = { + .name = "gnric", + FILL_CLOCK_MUX_INIT_INFO(GNRIC, unknown), + }, + [CPRMAN_CLOCK_VPU] = { + .name = "vpu", + .int_bits = 12, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(VPU, core), + }, + [CPRMAN_CLOCK_SYS] = { + .name = "sys", + FILL_CLOCK_MUX_INIT_INFO(SYS, unknown), + }, + [CPRMAN_CLOCK_PERIA] = { + .name = "peria", + FILL_CLOCK_MUX_INIT_INFO(PERIA, unknown), + }, + [CPRMAN_CLOCK_PERII] = { + .name = "perii", + FILL_CLOCK_MUX_INIT_INFO(PERII, unknown), + }, + [CPRMAN_CLOCK_H264] = { + .name = "h264", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(H264, core), + }, + [CPRMAN_CLOCK_ISP] = { + .name = "isp", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(ISP, core), + }, + [CPRMAN_CLOCK_V3D] = { + .name = "v3d", + FILL_CLOCK_MUX_INIT_INFO(V3D, core), + }, + [CPRMAN_CLOCK_CAM0] = { + .name = "cam0", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(CAM0, periph), + }, + [CPRMAN_CLOCK_CAM1] = { + .name = "cam1", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(CAM1, periph), + }, + [CPRMAN_CLOCK_CCP2] = { + .name = "ccp2", + FILL_CLOCK_MUX_INIT_INFO(CCP2, unknown), + }, + [CPRMAN_CLOCK_DSI0E] = { + .name = "dsi0e", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(DSI0E, dsi0), + }, + [CPRMAN_CLOCK_DSI0P] = { + .name = "dsi0p", + .int_bits = 0, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(DSI0P, dsi0), + }, + [CPRMAN_CLOCK_DPI] = { + .name = "dpi", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(DPI, periph), + }, + [CPRMAN_CLOCK_GP0] = { + .name = "gp0", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(GP0, periph), + }, + [CPRMAN_CLOCK_GP1] = { + .name = "gp1", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(GP1, periph), + }, + [CPRMAN_CLOCK_GP2] = { + .name = "gp2", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(GP2, periph), + }, + [CPRMAN_CLOCK_HSM] = { + .name = "hsm", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(HSM, periph), + }, + [CPRMAN_CLOCK_OTP] = { + .name = "otp", + .int_bits = 4, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(OTP, xosc), + }, + [CPRMAN_CLOCK_PCM] = { + .name = "pcm", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(PCM, periph), + }, + [CPRMAN_CLOCK_PWM] = { + .name = "pwm", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(PWM, periph), + }, + [CPRMAN_CLOCK_SLIM] = { + .name = "slim", + .int_bits = 12, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(SLIM, periph), + }, + [CPRMAN_CLOCK_SMI] = { + .name = "smi", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(SMI, periph), + }, + [CPRMAN_CLOCK_TEC] = { + .name = "tec", + .int_bits = 6, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(TEC, xosc), + }, + [CPRMAN_CLOCK_TD0] = { + .name = "td0", + FILL_CLOCK_MUX_INIT_INFO(TD0, unknown), + }, + [CPRMAN_CLOCK_TD1] = { + .name = "td1", + FILL_CLOCK_MUX_INIT_INFO(TD1, unknown), + }, + [CPRMAN_CLOCK_TSENS] = { + .name = "tsens", + .int_bits = 5, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(TSENS, xosc), + }, + [CPRMAN_CLOCK_TIMER] = { + .name = "timer", + .int_bits = 6, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(TIMER, xosc), + }, + [CPRMAN_CLOCK_UART] = { + .name = "uart", + .int_bits = 10, + .frac_bits = 12, + FILL_CLOCK_MUX_INIT_INFO(UART, periph), + }, + [CPRMAN_CLOCK_VEC] = { + .name = "vec", + .int_bits = 4, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(VEC, periph), + }, + [CPRMAN_CLOCK_PULSE] = { + .name = "pulse", + FILL_CLOCK_MUX_INIT_INFO(PULSE, xosc), + }, + [CPRMAN_CLOCK_SDC] = { + .name = "sdram", + .int_bits = 6, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(SDC, core), + }, + [CPRMAN_CLOCK_ARM] = { + .name = "arm", + FILL_CLOCK_MUX_INIT_INFO(ARM, unknown), + }, + [CPRMAN_CLOCK_AVEO] = { + .name = "aveo", + .int_bits = 4, + .frac_bits = 0, + FILL_CLOCK_MUX_INIT_INFO(AVEO, periph), + }, + [CPRMAN_CLOCK_EMMC] = { + .name = "emmc", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(EMMC, periph), + }, + [CPRMAN_CLOCK_EMMC2] = { + .name = "emmc2", + .int_bits = 4, + .frac_bits = 8, + FILL_CLOCK_MUX_INIT_INFO(EMMC2, unknown), + }, +}; + +#undef FILL_CLOCK_MUX_INIT_INFO +#undef FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO +#undef SRC_MAPPING_INFO_dsi1 +#undef SRC_MAPPING_INFO_dsi0 +#undef SRC_MAPPING_INFO_periph +#undef SRC_MAPPING_INFO_core +#undef SRC_MAPPING_INFO_xosc +#undef SRC_MAPPING_INFO_unknown + +static inline void set_clock_mux_init_info(BCM2835CprmanState *s, + CprmanClockMuxState *mux, + CprmanClockMux id) +{ + mux->id = id; + mux->reg_ctl = &s->regs[CLOCK_MUX_INIT_INFO[id].cm_offset]; + mux->reg_div = &s->regs[CLOCK_MUX_INIT_INFO[id].cm_offset + 1]; + mux->int_bits = CLOCK_MUX_INIT_INFO[id].int_bits; + mux->frac_bits = CLOCK_MUX_INIT_INFO[id].frac_bits; +} + + +/* + * Object reset info + * Those values have been dumped from a Raspberry Pi 3 Model B v1.2 using the + * clk debugfs interface in Linux. + */ +typedef struct PLLResetInfo { + uint32_t cm; + uint32_t a2w_ctrl; + uint32_t a2w_ana[4]; + uint32_t a2w_frac; +} PLLResetInfo; + +static const PLLResetInfo PLL_RESET_INFO[] = { + [CPRMAN_PLLA] = { + .cm = 0x0000008a, + .a2w_ctrl = 0x0002103a, + .a2w_frac = 0x00098000, + .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 } + }, + + [CPRMAN_PLLC] = { + .cm = 0x00000228, + .a2w_ctrl = 0x0002103e, + .a2w_frac = 0x00080000, + .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 } + }, + + [CPRMAN_PLLD] = { + .cm = 0x0000020a, + .a2w_ctrl = 0x00021034, + .a2w_frac = 0x00015556, + .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 } + }, + + [CPRMAN_PLLH] = { + .cm = 0x00000000, + .a2w_ctrl = 0x0002102d, + .a2w_frac = 0x00000000, + .a2w_ana = { 0x00900000, 0x0000000c, 0x00000000, 0x00000000 } + }, + + [CPRMAN_PLLB] = { + /* unknown */ + .cm = 0x00000000, + .a2w_ctrl = 0x00000000, + .a2w_frac = 0x00000000, + .a2w_ana = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } + } +}; + +typedef struct PLLChannelResetInfo { + /* + * Even though a PLL channel has a CM register, it shares it with its + * parent PLL. The parent already takes care of the reset value. + */ + uint32_t a2w_ctrl; +} PLLChannelResetInfo; + +static const PLLChannelResetInfo PLL_CHANNEL_RESET_INFO[] = { + [CPRMAN_PLLA_CHANNEL_DSI0] = { .a2w_ctrl = 0x00000100 }, + [CPRMAN_PLLA_CHANNEL_CORE] = { .a2w_ctrl = 0x00000003 }, + [CPRMAN_PLLA_CHANNEL_PER] = { .a2w_ctrl = 0x00000000 }, /* unknown */ + [CPRMAN_PLLA_CHANNEL_CCP2] = { .a2w_ctrl = 0x00000100 }, + + [CPRMAN_PLLC_CHANNEL_CORE2] = { .a2w_ctrl = 0x00000100 }, + [CPRMAN_PLLC_CHANNEL_CORE1] = { .a2w_ctrl = 0x00000100 }, + [CPRMAN_PLLC_CHANNEL_PER] = { .a2w_ctrl = 0x00000002 }, + [CPRMAN_PLLC_CHANNEL_CORE0] = { .a2w_ctrl = 0x00000002 }, + + [CPRMAN_PLLD_CHANNEL_DSI0] = { .a2w_ctrl = 0x00000100 }, + [CPRMAN_PLLD_CHANNEL_CORE] = { .a2w_ctrl = 0x00000004 }, + [CPRMAN_PLLD_CHANNEL_PER] = { .a2w_ctrl = 0x00000004 }, + [CPRMAN_PLLD_CHANNEL_DSI1] = { .a2w_ctrl = 0x00000100 }, + + [CPRMAN_PLLH_CHANNEL_AUX] = { .a2w_ctrl = 0x00000004 }, + [CPRMAN_PLLH_CHANNEL_RCAL] = { .a2w_ctrl = 0x00000000 }, + [CPRMAN_PLLH_CHANNEL_PIX] = { .a2w_ctrl = 0x00000000 }, + + [CPRMAN_PLLB_CHANNEL_ARM] = { .a2w_ctrl = 0x00000000 }, /* unknown */ +}; + +typedef struct ClockMuxResetInfo { + uint32_t cm_ctl; + uint32_t cm_div; +} ClockMuxResetInfo; + +static const ClockMuxResetInfo CLOCK_MUX_RESET_INFO[] = { + [CPRMAN_CLOCK_GNRIC] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_VPU] = { + .cm_ctl = 0x00000245, + .cm_div = 0x00003000, + }, + + [CPRMAN_CLOCK_SYS] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_PERIA] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_PERII] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_H264] = { + .cm_ctl = 0x00000244, + .cm_div = 0x00003000, + }, + + [CPRMAN_CLOCK_ISP] = { + .cm_ctl = 0x00000244, + .cm_div = 0x00003000, + }, + + [CPRMAN_CLOCK_V3D] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_CAM0] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_CAM1] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_CCP2] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_DSI0E] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_DSI0P] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_DPI] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_GP0] = { + .cm_ctl = 0x00000200, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_GP1] = { + .cm_ctl = 0x00000096, + .cm_div = 0x00014000, + }, + + [CPRMAN_CLOCK_GP2] = { + .cm_ctl = 0x00000291, + .cm_div = 0x00249f00, + }, + + [CPRMAN_CLOCK_HSM] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_OTP] = { + .cm_ctl = 0x00000091, + .cm_div = 0x00004000, + }, + + [CPRMAN_CLOCK_PCM] = { + .cm_ctl = 0x00000200, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_PWM] = { + .cm_ctl = 0x00000200, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_SLIM] = { + .cm_ctl = 0x00000200, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_SMI] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_TEC] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_TD0] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_TD1] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_TSENS] = { + .cm_ctl = 0x00000091, + .cm_div = 0x0000a000, + }, + + [CPRMAN_CLOCK_TIMER] = { + .cm_ctl = 0x00000291, + .cm_div = 0x00013333, + }, + + [CPRMAN_CLOCK_UART] = { + .cm_ctl = 0x00000296, + .cm_div = 0x0000a6ab, + }, + + [CPRMAN_CLOCK_VEC] = { + .cm_ctl = 0x00000097, + .cm_div = 0x00002000, + }, + + [CPRMAN_CLOCK_PULSE] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_SDC] = { + .cm_ctl = 0x00004006, + .cm_div = 0x00003000, + }, + + [CPRMAN_CLOCK_ARM] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, + + [CPRMAN_CLOCK_AVEO] = { + .cm_ctl = 0x00000000, + .cm_div = 0x00000000, + }, + + [CPRMAN_CLOCK_EMMC] = { + .cm_ctl = 0x00000295, + .cm_div = 0x00006000, + }, + + [CPRMAN_CLOCK_EMMC2] = { + .cm_ctl = 0, /* unknown */ + .cm_div = 0 + }, +}; + +#endif diff --git a/include/hw/misc/bcm2835_mbox.h b/include/hw/misc/bcm2835_mbox.h new file mode 100644 index 000000000..ade27af25 --- /dev/null +++ b/include/hw/misc/bcm2835_mbox.h @@ -0,0 +1,39 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_MBOX_H +#define BCM2835_MBOX_H + +#include "bcm2835_mbox_defs.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_MBOX "bcm2835-mbox" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835MboxState, BCM2835_MBOX) + +typedef struct { + uint32_t reg[MBOX_SIZE]; + uint32_t count; + uint32_t status; + uint32_t config; +} BCM2835Mbox; + +struct BCM2835MboxState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + MemoryRegion *mbox_mr; + AddressSpace mbox_as; + MemoryRegion iomem; + qemu_irq arm_irq; + + bool mbox_irq_disabled; + bool available[MBOX_CHAN_COUNT]; + BCM2835Mbox mbox[2]; +}; + +#endif diff --git a/include/hw/misc/bcm2835_mbox_defs.h b/include/hw/misc/bcm2835_mbox_defs.h new file mode 100644 index 000000000..9670bf33a --- /dev/null +++ b/include/hw/misc/bcm2835_mbox_defs.h @@ -0,0 +1,29 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_MBOX_DEFS_H +#define BCM2835_MBOX_DEFS_H + +/* Constants shared with the ARM identifying separate mailbox channels */ +#define MBOX_CHAN_POWER 0 /* for use by the power management interface */ +#define MBOX_CHAN_FB 1 /* for use by the frame buffer */ +#define MBOX_CHAN_VCHIQ 3 /* for use by the VCHIQ interface */ +#define MBOX_CHAN_PROPERTY 8 /* for use by the property channel */ +#define MBOX_CHAN_COUNT 9 + +#define MBOX_SIZE 32 +#define MBOX_INVALID_DATA 0x0f + +/* Layout of the private address space used for communication between + * the mbox device emulation, and child devices: each channel occupies + * 16 bytes of address space, but only two registers are presently defined. + */ +#define MBOX_AS_CHAN_SHIFT 4 +#define MBOX_AS_DATA 0 /* request / response data (RW at offset 0) */ +#define MBOX_AS_PENDING 4 /* pending response status (RO at offset 4) */ + +#endif /* BCM2835_MBOX_DEFS_H */ diff --git a/include/hw/misc/bcm2835_mphi.h b/include/hw/misc/bcm2835_mphi.h new file mode 100644 index 000000000..751363f49 --- /dev/null +++ b/include/hw/misc/bcm2835_mphi.h @@ -0,0 +1,44 @@ +/* + * BCM2835 SOC MPHI state definitions + * + * Copyright (c) 2020 Paul Zimmerman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef HW_MISC_BCM2835_MPHI_H +#define HW_MISC_BCM2835_MPHI_H + +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define MPHI_MMIO_SIZE 0x1000 + +typedef struct BCM2835MphiState BCM2835MphiState; + +struct BCM2835MphiState { + SysBusDevice parent_obj; + qemu_irq irq; + MemoryRegion iomem; + + uint32_t outdda; + uint32_t outddb; + uint32_t ctrl; + uint32_t intstat; + uint32_t swirq; +}; + +#define TYPE_BCM2835_MPHI "bcm2835-mphi" + +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835MphiState, BCM2835_MPHI) + +#endif diff --git a/include/hw/misc/bcm2835_property.h b/include/hw/misc/bcm2835_property.h new file mode 100644 index 000000000..712b76b7a --- /dev/null +++ b/include/hw/misc/bcm2835_property.h @@ -0,0 +1,36 @@ +/* + * Raspberry Pi emulation (c) 2012 Gregory Estrade + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_PROPERTY_H +#define BCM2835_PROPERTY_H + +#include "hw/sysbus.h" +#include "net/net.h" +#include "hw/display/bcm2835_fb.h" +#include "qom/object.h" + +#define TYPE_BCM2835_PROPERTY "bcm2835-property" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PropertyState, BCM2835_PROPERTY) + +struct BCM2835PropertyState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + MemoryRegion *dma_mr; + AddressSpace dma_as; + MemoryRegion iomem; + qemu_irq mbox_irq; + BCM2835FBState *fbdev; + + MACAddr macaddr; + uint32_t board_rev; + uint32_t addr; + bool pending; +}; + +#endif diff --git a/include/hw/misc/bcm2835_rng.h b/include/hw/misc/bcm2835_rng.h new file mode 100644 index 000000000..7c1fb3ef4 --- /dev/null +++ b/include/hw/misc/bcm2835_rng.h @@ -0,0 +1,27 @@ +/* + * BCM2835 Random Number Generator emulation + * + * Copyright (C) 2017 Marcin Chojnacki + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_RNG_H +#define BCM2835_RNG_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_RNG "bcm2835-rng" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835RngState, BCM2835_RNG) + +struct BCM2835RngState { + SysBusDevice busdev; + MemoryRegion iomem; + + uint32_t rng_ctrl; + uint32_t rng_status; +}; + +#endif diff --git a/include/hw/misc/bcm2835_thermal.h b/include/hw/misc/bcm2835_thermal.h new file mode 100644 index 000000000..f90f9e487 --- /dev/null +++ b/include/hw/misc/bcm2835_thermal.h @@ -0,0 +1,27 @@ +/* + * BCM2835 dummy thermal sensor + * + * Copyright (C) 2019 Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_MISC_BCM2835_THERMAL_H +#define HW_MISC_BCM2835_THERMAL_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_THERMAL "bcm2835-thermal" + +OBJECT_DECLARE_SIMPLE_TYPE(Bcm2835ThermalState, BCM2835_THERMAL) + +struct Bcm2835ThermalState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + MemoryRegion iomem; + uint32_t ctl; +}; + +#endif diff --git a/include/hw/misc/cbus.h b/include/hw/misc/cbus.h new file mode 100644 index 000000000..533498402 --- /dev/null +++ b/include/hw/misc/cbus.h @@ -0,0 +1,31 @@ +/* + * CBUS three-pin bus and the Retu / Betty / Tahvo / Vilma / Avilma / + * Hinku / Vinku / Ahne / Pihi chips used in various Nokia platforms. + * Based on reverse-engineering of a linux driver. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_MISC_CBUS_H +#define HW_MISC_CBUS_H + + +typedef struct { + qemu_irq clk; + qemu_irq dat; + qemu_irq sel; +} CBus; + +CBus *cbus_init(qemu_irq dat_out); +void cbus_attach(CBus *bus, void *slave_opaque); + +void *retu_init(qemu_irq irq, int vilma); +void *tahvo_init(qemu_irq irq, int betty); + +void retu_key_event(void *retu, int state); + +#endif diff --git a/include/hw/misc/empty_slot.h b/include/hw/misc/empty_slot.h new file mode 100644 index 000000000..dec56e56a --- /dev/null +++ b/include/hw/misc/empty_slot.h @@ -0,0 +1,19 @@ +/* + * QEMU Empty Slot + * + * The empty_slot device emulates known to a bus but not connected devices. + * + * Copyright (c) 2010 Artyom Tarasenko + * + * This code is licensed under the GNU GPL v2 or (at your option) any later + * version. + */ + +#ifndef HW_EMPTY_SLOT_H +#define HW_EMPTY_SLOT_H + +#include "exec/hwaddr.h" + +void empty_slot_init(const char *name, hwaddr addr, uint64_t slot_size); + +#endif diff --git a/include/hw/misc/grlib_ahb_apb_pnp.h b/include/hw/misc/grlib_ahb_apb_pnp.h new file mode 100644 index 000000000..341451bff --- /dev/null +++ b/include/hw/misc/grlib_ahb_apb_pnp.h @@ -0,0 +1,57 @@ +/* + * GRLIB AHB APB PNP + * + * Copyright (C) 2019 AdaCore + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef GRLIB_AHB_APB_PNP_H +#define GRLIB_AHB_APB_PNP_H +#include "qom/object.h" + +#define TYPE_GRLIB_AHB_PNP "grlib,ahbpnp" +OBJECT_DECLARE_SIMPLE_TYPE(AHBPnp, GRLIB_AHB_PNP) + +#define TYPE_GRLIB_APB_PNP "grlib,apbpnp" +OBJECT_DECLARE_SIMPLE_TYPE(APBPnp, GRLIB_APB_PNP) + +void grlib_ahb_pnp_add_entry(AHBPnp *dev, uint32_t address, uint32_t mask, + uint8_t vendor, uint16_t device, int slave, + int type); +void grlib_apb_pnp_add_entry(APBPnp *dev, uint32_t address, uint32_t mask, + uint8_t vendor, uint16_t device, uint8_t version, + uint8_t irq, int type); + +/* VENDORS */ +#define GRLIB_VENDOR_GAISLER (0x01) +/* DEVICES */ +#define GRLIB_LEON3_DEV (0x03) +#define GRLIB_APBMST_DEV (0x06) +#define GRLIB_APBUART_DEV (0x0C) +#define GRLIB_IRQMP_DEV (0x0D) +#define GRLIB_GPTIMER_DEV (0x11) +/* TYPE */ +#define GRLIB_CPU_AREA (0x00) +#define GRLIB_APBIO_AREA (0x01) +#define GRLIB_AHBMEM_AREA (0x02) + +#define GRLIB_AHB_MASTER (0x00) +#define GRLIB_AHB_SLAVE (0x01) + +#endif /* GRLIB_AHB_APB_PNP_H */ diff --git a/include/hw/misc/imx25_ccm.h b/include/hw/misc/imx25_ccm.h new file mode 100644 index 000000000..c3b89018c --- /dev/null +++ b/include/hw/misc/imx25_ccm.h @@ -0,0 +1,80 @@ +/* + * IMX25 Clock Control Module + * + * Copyright (C) 2012 NICTA + * Updated by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX25_CCM_H +#define IMX25_CCM_H + +#include "hw/misc/imx_ccm.h" +#include "qom/object.h" + +#define IMX25_CCM_MPCTL_REG 0 +#define IMX25_CCM_UPCTL_REG 1 +#define IMX25_CCM_CCTL_REG 2 +#define IMX25_CCM_CGCR0_REG 3 +#define IMX25_CCM_CGCR1_REG 4 +#define IMX25_CCM_CGCR2_REG 5 +#define IMX25_CCM_PCDR0_REG 6 +#define IMX25_CCM_PCDR1_REG 7 +#define IMX25_CCM_PCDR2_REG 8 +#define IMX25_CCM_PCDR3_REG 9 +#define IMX25_CCM_RCSR_REG 10 +#define IMX25_CCM_CRDR_REG 11 +#define IMX25_CCM_DCVR0_REG 12 +#define IMX25_CCM_DCVR1_REG 13 +#define IMX25_CCM_DCVR2_REG 14 +#define IMX25_CCM_DCVR3_REG 15 +#define IMX25_CCM_LTR0_REG 16 +#define IMX25_CCM_LTR1_REG 17 +#define IMX25_CCM_LTR2_REG 18 +#define IMX25_CCM_LTR3_REG 19 +#define IMX25_CCM_LTBR0_REG 20 +#define IMX25_CCM_LTBR1_REG 21 +#define IMX25_CCM_PMCR0_REG 22 +#define IMX25_CCM_PMCR1_REG 23 +#define IMX25_CCM_PMCR2_REG 24 +#define IMX25_CCM_MCR_REG 25 +#define IMX25_CCM_LPIMR0_REG 26 +#define IMX25_CCM_LPIMR1_REG 27 +#define IMX25_CCM_MAX_REG 28 + +/* CCTL */ +#define CCTL_ARM_CLK_DIV_SHIFT (30) +#define CCTL_ARM_CLK_DIV_MASK (0x3) +#define CCTL_AHB_CLK_DIV_SHIFT (28) +#define CCTL_AHB_CLK_DIV_MASK (0x3) +#define CCTL_MPLL_BYPASS_SHIFT (22) +#define CCTL_MPLL_BYPASS_MASK (0x1) +#define CCTL_USB_DIV_SHIFT (16) +#define CCTL_USB_DIV_MASK (0x3F) +#define CCTL_ARM_SRC_SHIFT (13) +#define CCTL_ARM_SRC_MASK (0x1) +#define CCTL_UPLL_DIS_SHIFT (23) +#define CCTL_UPLL_DIS_MASK (0x1) + +#define EXTRACT(value, name) (((value) >> CCTL_##name##_SHIFT) \ + & CCTL_##name##_MASK) +#define INSERT(value, name) (((value) & CCTL_##name##_MASK) << \ + CCTL_##name##_SHIFT) + +#define TYPE_IMX25_CCM "imx25.ccm" +OBJECT_DECLARE_SIMPLE_TYPE(IMX25CCMState, IMX25_CCM) + +struct IMX25CCMState { + /* */ + IMXCCMState parent_obj; + + /* */ + MemoryRegion iomem; + + uint32_t reg[IMX25_CCM_MAX_REG]; + +}; + +#endif /* IMX25_CCM_H */ diff --git a/include/hw/misc/imx31_ccm.h b/include/hw/misc/imx31_ccm.h new file mode 100644 index 000000000..18e08ee84 --- /dev/null +++ b/include/hw/misc/imx31_ccm.h @@ -0,0 +1,89 @@ +/* + * IMX31 Clock Control Module + * + * Copyright (C) 2012 NICTA + * Updated by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX31_CCM_H +#define IMX31_CCM_H + +#include "hw/misc/imx_ccm.h" +#include "qom/object.h" + +#define IMX31_CCM_CCMR_REG 0 +#define IMX31_CCM_PDR0_REG 1 +#define IMX31_CCM_PDR1_REG 2 +#define IMX31_CCM_RCSR_REG 3 +#define IMX31_CCM_MPCTL_REG 4 +#define IMX31_CCM_UPCTL_REG 5 +#define IMX31_CCM_SPCTL_REG 6 +#define IMX31_CCM_COSR_REG 7 +#define IMX31_CCM_CGR0_REG 8 +#define IMX31_CCM_CGR1_REG 9 +#define IMX31_CCM_CGR2_REG 10 +#define IMX31_CCM_WIMR_REG 11 +#define IMX31_CCM_LDC_REG 12 +#define IMX31_CCM_DCVR0_REG 13 +#define IMX31_CCM_DCVR1_REG 14 +#define IMX31_CCM_DCVR2_REG 15 +#define IMX31_CCM_DCVR3_REG 16 +#define IMX31_CCM_LTR0_REG 17 +#define IMX31_CCM_LTR1_REG 18 +#define IMX31_CCM_LTR2_REG 19 +#define IMX31_CCM_LTR3_REG 20 +#define IMX31_CCM_LTBR0_REG 21 +#define IMX31_CCM_LTBR1_REG 22 +#define IMX31_CCM_PMCR0_REG 23 +#define IMX31_CCM_PMCR1_REG 24 +#define IMX31_CCM_PDR2_REG 25 +#define IMX31_CCM_MAX_REG 26 + +/* CCMR */ +#define CCMR_FPME (1<<0) +#define CCMR_MPE (1<<3) +#define CCMR_MDS (1<<7) +#define CCMR_FPMF (1<<26) +#define CCMR_PRCS (3<<1) + +#define PMCR0_DFSUP1 (1<<31) + +/* PDR0 */ +#define PDR0_MCU_PODF_SHIFT (0) +#define PDR0_MCU_PODF_MASK (0x7) +#define PDR0_MAX_PODF_SHIFT (3) +#define PDR0_MAX_PODF_MASK (0x7) +#define PDR0_IPG_PODF_SHIFT (6) +#define PDR0_IPG_PODF_MASK (0x3) +#define PDR0_NFC_PODF_SHIFT (8) +#define PDR0_NFC_PODF_MASK (0x7) +#define PDR0_HSP_PODF_SHIFT (11) +#define PDR0_HSP_PODF_MASK (0x7) +#define PDR0_PER_PODF_SHIFT (16) +#define PDR0_PER_PODF_MASK (0x1f) +#define PDR0_CSI_PODF_SHIFT (23) +#define PDR0_CSI_PODF_MASK (0x1ff) + +#define EXTRACT(value, name) (((value) >> PDR0_##name##_PODF_SHIFT) \ + & PDR0_##name##_PODF_MASK) +#define INSERT(value, name) (((value) & PDR0_##name##_PODF_MASK) << \ + PDR0_##name##_PODF_SHIFT) + +#define TYPE_IMX31_CCM "imx31.ccm" +OBJECT_DECLARE_SIMPLE_TYPE(IMX31CCMState, IMX31_CCM) + +struct IMX31CCMState { + /* */ + IMXCCMState parent_obj; + + /* */ + MemoryRegion iomem; + + uint32_t reg[IMX31_CCM_MAX_REG]; + +}; + +#endif /* IMX31_CCM_H */ diff --git a/include/hw/misc/imx6_ccm.h b/include/hw/misc/imx6_ccm.h new file mode 100644 index 000000000..ccf46d735 --- /dev/null +++ b/include/hw/misc/imx6_ccm.h @@ -0,0 +1,198 @@ +/* + * IMX6 Clock Control Module + * + * Copyright (C) 2012 NICTA + * Updated by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX6_CCM_H +#define IMX6_CCM_H + +#include "hw/misc/imx_ccm.h" +#include "qemu/bitops.h" +#include "qom/object.h" + +#define CCM_CCR 0 +#define CCM_CCDR 1 +#define CCM_CSR 2 +#define CCM_CCSR 3 +#define CCM_CACRR 4 +#define CCM_CBCDR 5 +#define CCM_CBCMR 6 +#define CCM_CSCMR1 7 +#define CCM_CSCMR2 8 +#define CCM_CSCDR1 9 +#define CCM_CS1CDR 10 +#define CCM_CS2CDR 11 +#define CCM_CDCDR 12 +#define CCM_CHSCCDR 13 +#define CCM_CSCDR2 14 +#define CCM_CSCDR3 15 +#define CCM_CDHIPR 18 +#define CCM_CTOR 20 +#define CCM_CLPCR 21 +#define CCM_CISR 22 +#define CCM_CIMR 23 +#define CCM_CCOSR 24 +#define CCM_CGPR 25 +#define CCM_CCGR0 26 +#define CCM_CCGR1 27 +#define CCM_CCGR2 28 +#define CCM_CCGR3 29 +#define CCM_CCGR4 30 +#define CCM_CCGR5 31 +#define CCM_CCGR6 32 +#define CCM_CMEOR 34 +#define CCM_MAX 35 + +#define CCM_ANALOG_PLL_ARM 0 +#define CCM_ANALOG_PLL_ARM_SET 1 +#define CCM_ANALOG_PLL_ARM_CLR 2 +#define CCM_ANALOG_PLL_ARM_TOG 3 +#define CCM_ANALOG_PLL_USB1 4 +#define CCM_ANALOG_PLL_USB1_SET 5 +#define CCM_ANALOG_PLL_USB1_CLR 6 +#define CCM_ANALOG_PLL_USB1_TOG 7 +#define CCM_ANALOG_PLL_USB2 8 +#define CCM_ANALOG_PLL_USB2_SET 9 +#define CCM_ANALOG_PLL_USB2_CLR 10 +#define CCM_ANALOG_PLL_USB2_TOG 11 +#define CCM_ANALOG_PLL_SYS 12 +#define CCM_ANALOG_PLL_SYS_SET 13 +#define CCM_ANALOG_PLL_SYS_CLR 14 +#define CCM_ANALOG_PLL_SYS_TOG 15 +#define CCM_ANALOG_PLL_SYS_SS 16 +#define CCM_ANALOG_PLL_SYS_NUM 20 +#define CCM_ANALOG_PLL_SYS_DENOM 24 +#define CCM_ANALOG_PLL_AUDIO 28 +#define CCM_ANALOG_PLL_AUDIO_SET 29 +#define CCM_ANALOG_PLL_AUDIO_CLR 30 +#define CCM_ANALOG_PLL_AUDIO_TOG 31 +#define CCM_ANALOG_PLL_AUDIO_NUM 32 +#define CCM_ANALOG_PLL_AUDIO_DENOM 36 +#define CCM_ANALOG_PLL_VIDEO 40 +#define CCM_ANALOG_PLL_VIDEO_SET 41 +#define CCM_ANALOG_PLL_VIDEO_CLR 42 +#define CCM_ANALOG_PLL_VIDEO_TOG 44 +#define CCM_ANALOG_PLL_VIDEO_NUM 46 +#define CCM_ANALOG_PLL_VIDEO_DENOM 48 +#define CCM_ANALOG_PLL_MLB 52 +#define CCM_ANALOG_PLL_MLB_SET 53 +#define CCM_ANALOG_PLL_MLB_CLR 54 +#define CCM_ANALOG_PLL_MLB_TOG 55 +#define CCM_ANALOG_PLL_ENET 56 +#define CCM_ANALOG_PLL_ENET_SET 57 +#define CCM_ANALOG_PLL_ENET_CLR 58 +#define CCM_ANALOG_PLL_ENET_TOG 59 +#define CCM_ANALOG_PFD_480 60 +#define CCM_ANALOG_PFD_480_SET 61 +#define CCM_ANALOG_PFD_480_CLR 62 +#define CCM_ANALOG_PFD_480_TOG 63 +#define CCM_ANALOG_PFD_528 64 +#define CCM_ANALOG_PFD_528_SET 65 +#define CCM_ANALOG_PFD_528_CLR 66 +#define CCM_ANALOG_PFD_528_TOG 67 + +/* PMU registers */ +#define PMU_REG_1P1 68 +#define PMU_REG_3P0 72 +#define PMU_REG_2P5 76 +#define PMU_REG_CORE 80 + +#define CCM_ANALOG_MISC0 84 +#define PMU_MISC0 84 +#define CCM_ANALOG_MISC0_SET 85 +#define CCM_ANALOG_MISC0_CLR 86 +#define CCM_ANALOG_MISC0_TOG 87 + +#define PMU_MISC1 88 +#define PMU_MISC1_SET 89 +#define PMU_MISC1_CLR 90 +#define PMU_MISC1_TOG 91 + +#define CCM_ANALOG_MISC2 92 +#define PMU_MISC2 92 +#define CCM_ANALOG_MISC2_SET 93 +#define CCM_ANALOG_MISC2_CLR 94 +#define CCM_ANALOG_MISC2_TOG 95 + +#define USB_ANALOG_USB1_VBUS_DETECT 104 +#define USB_ANALOG_USB1_VBUS_DETECT_SET 105 +#define USB_ANALOG_USB1_VBUS_DETECT_CLR 106 +#define USB_ANALOG_USB1_VBUS_DETECT_TOG 107 +#define USB_ANALOG_USB1_CHRG_DETECT 108 +#define USB_ANALOG_USB1_CHRG_DETECT_SET 109 +#define USB_ANALOG_USB1_CHRG_DETECT_CLR 110 +#define USB_ANALOG_USB1_CHRG_DETECT_TOG 111 +#define USB_ANALOG_USB1_VBUS_DETECT_STAT 112 +#define USB_ANALOG_USB1_CHRG_DETECT_STAT 116 +#define USB_ANALOG_USB1_MISC 124 +#define USB_ANALOG_USB1_MISC_SET 125 +#define USB_ANALOG_USB1_MISC_CLR 126 +#define USB_ANALOG_USB1_MISC_TOG 127 +#define USB_ANALOG_USB2_VBUS_DETECT 128 +#define USB_ANALOG_USB2_VBUS_DETECT_SET 129 +#define USB_ANALOG_USB2_VBUS_DETECT_CLR 130 +#define USB_ANALOG_USB2_VBUS_DETECT_TOG 131 +#define USB_ANALOG_USB2_CHRG_DETECT 132 +#define USB_ANALOG_USB2_CHRG_DETECT_SET 133 +#define USB_ANALOG_USB2_CHRG_DETECT_CLR 134 +#define USB_ANALOG_USB2_CHRG_DETECT_TOG 135 +#define USB_ANALOG_USB2_VBUS_DETECT_STAT 136 +#define USB_ANALOG_USB2_CHRG_DETECT_STAT 140 +#define USB_ANALOG_USB2_MISC 148 +#define USB_ANALOG_USB2_MISC_SET 149 +#define USB_ANALOG_USB2_MISC_CLR 150 +#define USB_ANALOG_USB2_MISC_TOG 151 +#define USB_ANALOG_DIGPROG 152 +#define CCM_ANALOG_MAX 153 + +/* CCM_CBCMR */ +#define PRE_PERIPH_CLK_SEL_SHIFT (18) +#define PRE_PERIPH_CLK_SEL_LENGTH (2) + +/* CCM_CBCDR */ +#define AHB_PODF_SHIFT (10) +#define AHB_PODF_LENGTH (3) +#define IPG_PODF_SHIFT (8) +#define IPG_PODF_LENGTH (2) + +/* CCM_CSCMR1 */ +#define PERCLK_PODF_SHIFT (0) +#define PERCLK_PODF_LENGTH (6) + +/* CCM_ANALOG_PFD_528 */ +#define PFD0_FRAC_SHIFT (0) +#define PFD0_FRAC_LENGTH (6) +#define PFD2_FRAC_SHIFT (16) +#define PFD2_FRAC_LENGTH (6) + +/* CCM_ANALOG_PLL_SYS */ +#define DIV_SELECT_SHIFT (0) +#define DIV_SELECT_LENGTH (1) + +#define CCM_ANALOG_PLL_LOCK (1 << 31); + +#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH) + +#define TYPE_IMX6_CCM "imx6.ccm" +OBJECT_DECLARE_SIMPLE_TYPE(IMX6CCMState, IMX6_CCM) + +struct IMX6CCMState { + /* */ + IMXCCMState parent_obj; + + /* */ + MemoryRegion container; + MemoryRegion ioccm; + MemoryRegion ioanalog; + + uint32_t ccm[CCM_MAX]; + uint32_t analog[CCM_ANALOG_MAX]; + +}; + +#endif /* IMX6_CCM_H */ diff --git a/include/hw/misc/imx6_src.h b/include/hw/misc/imx6_src.h new file mode 100644 index 000000000..f380da381 --- /dev/null +++ b/include/hw/misc/imx6_src.h @@ -0,0 +1,74 @@ +/* + * IMX6 System Reset Controller + * + * Copyright (C) 2012 NICTA + * Updated by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX6_SRC_H +#define IMX6_SRC_H + +#include "hw/sysbus.h" +#include "qemu/bitops.h" +#include "qom/object.h" + +#define SRC_SCR 0 +#define SRC_SBMR1 1 +#define SRC_SRSR 2 +#define SRC_SISR 5 +#define SRC_SIMR 6 +#define SRC_SBMR2 7 +#define SRC_GPR1 8 +#define SRC_GPR2 9 +#define SRC_GPR3 10 +#define SRC_GPR4 11 +#define SRC_GPR5 12 +#define SRC_GPR6 13 +#define SRC_GPR7 14 +#define SRC_GPR8 15 +#define SRC_GPR9 16 +#define SRC_GPR10 17 +#define SRC_MAX 18 + +/* SRC_SCR */ +#define CORE3_ENABLE_SHIFT 24 +#define CORE3_ENABLE_LENGTH 1 +#define CORE2_ENABLE_SHIFT 23 +#define CORE2_ENABLE_LENGTH 1 +#define CORE1_ENABLE_SHIFT 22 +#define CORE1_ENABLE_LENGTH 1 +#define CORE3_RST_SHIFT 16 +#define CORE3_RST_LENGTH 1 +#define CORE2_RST_SHIFT 15 +#define CORE2_RST_LENGTH 1 +#define CORE1_RST_SHIFT 14 +#define CORE1_RST_LENGTH 1 +#define CORE0_RST_SHIFT 13 +#define CORE0_RST_LENGTH 1 +#define SW_IPU1_RST_SHIFT 3 +#define SW_IPU1_RST_LENGTH 1 +#define SW_IPU2_RST_SHIFT 12 +#define SW_IPU2_RST_LENGTH 1 +#define WARM_RST_ENABLE_SHIFT 0 +#define WARM_RST_ENABLE_LENGTH 1 + +#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH) + +#define TYPE_IMX6_SRC "imx6.src" +OBJECT_DECLARE_SIMPLE_TYPE(IMX6SRCState, IMX6_SRC) + +struct IMX6SRCState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + + uint32_t regs[SRC_MAX]; + +}; + +#endif /* IMX6_SRC_H */ diff --git a/include/hw/misc/imx6ul_ccm.h b/include/hw/misc/imx6ul_ccm.h new file mode 100644 index 000000000..edb5f784d --- /dev/null +++ b/include/hw/misc/imx6ul_ccm.h @@ -0,0 +1,227 @@ +/* + * IMX6UL Clock Control Module + * + * Copyright (C) 2018 by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX6UL_CCM_H +#define IMX6UL_CCM_H + +#include "hw/misc/imx_ccm.h" +#include "qemu/bitops.h" +#include "qom/object.h" + +#define CCM_CCR 0 +#define CCM_CCDR 1 +#define CCM_CSR 2 +#define CCM_CCSR 3 +#define CCM_CACRR 4 +#define CCM_CBCDR 5 +#define CCM_CBCMR 6 +#define CCM_CSCMR1 7 +#define CCM_CSCMR2 8 +#define CCM_CSCDR1 9 +#define CCM_CS1CDR 10 +#define CCM_CS2CDR 11 +#define CCM_CDCDR 12 +#define CCM_CHSCCDR 13 +#define CCM_CSCDR2 14 +#define CCM_CSCDR3 15 +#define CCM_CDHIPR 18 +#define CCM_CTOR 20 +#define CCM_CLPCR 21 +#define CCM_CISR 22 +#define CCM_CIMR 23 +#define CCM_CCOSR 24 +#define CCM_CGPR 25 +#define CCM_CCGR0 26 +#define CCM_CCGR1 27 +#define CCM_CCGR2 28 +#define CCM_CCGR3 29 +#define CCM_CCGR4 30 +#define CCM_CCGR5 31 +#define CCM_CCGR6 32 +#define CCM_CMEOR 34 +#define CCM_MAX 35 + +#define CCM_ANALOG_PLL_ARM 0 +#define CCM_ANALOG_PLL_ARM_SET 1 +#define CCM_ANALOG_PLL_ARM_CLR 2 +#define CCM_ANALOG_PLL_ARM_TOG 3 +#define CCM_ANALOG_PLL_USB1 4 +#define CCM_ANALOG_PLL_USB1_SET 5 +#define CCM_ANALOG_PLL_USB1_CLR 6 +#define CCM_ANALOG_PLL_USB1_TOG 7 +#define CCM_ANALOG_PLL_USB2 8 +#define CCM_ANALOG_PLL_USB2_SET 9 +#define CCM_ANALOG_PLL_USB2_CLR 10 +#define CCM_ANALOG_PLL_USB2_TOG 11 +#define CCM_ANALOG_PLL_SYS 12 +#define CCM_ANALOG_PLL_SYS_SET 13 +#define CCM_ANALOG_PLL_SYS_CLR 14 +#define CCM_ANALOG_PLL_SYS_TOG 15 +#define CCM_ANALOG_PLL_SYS_SS 16 +#define CCM_ANALOG_PLL_SYS_NUM 20 +#define CCM_ANALOG_PLL_SYS_DENOM 24 +#define CCM_ANALOG_PLL_AUDIO 28 +#define CCM_ANALOG_PLL_AUDIO_SET 29 +#define CCM_ANALOG_PLL_AUDIO_CLR 30 +#define CCM_ANALOG_PLL_AUDIO_TOG 31 +#define CCM_ANALOG_PLL_AUDIO_NUM 32 +#define CCM_ANALOG_PLL_AUDIO_DENOM 36 +#define CCM_ANALOG_PLL_VIDEO 40 +#define CCM_ANALOG_PLL_VIDEO_SET 41 +#define CCM_ANALOG_PLL_VIDEO_CLR 42 +#define CCM_ANALOG_PLL_VIDEO_TOG 44 +#define CCM_ANALOG_PLL_VIDEO_NUM 46 +#define CCM_ANALOG_PLL_VIDEO_DENOM 48 +#define CCM_ANALOG_PLL_ENET 56 +#define CCM_ANALOG_PLL_ENET_SET 57 +#define CCM_ANALOG_PLL_ENET_CLR 58 +#define CCM_ANALOG_PLL_ENET_TOG 59 +#define CCM_ANALOG_PFD_480 60 +#define CCM_ANALOG_PFD_480_SET 61 +#define CCM_ANALOG_PFD_480_CLR 62 +#define CCM_ANALOG_PFD_480_TOG 63 +#define CCM_ANALOG_PFD_528 64 +#define CCM_ANALOG_PFD_528_SET 65 +#define CCM_ANALOG_PFD_528_CLR 66 +#define CCM_ANALOG_PFD_528_TOG 67 + +/* PMU registers */ +#define PMU_REG_1P1 68 +#define PMU_REG_3P0 72 +#define PMU_REG_2P5 76 +#define PMU_REG_CORE 80 + +#define CCM_ANALOG_MISC0 84 +#define PMU_MISC0 CCM_ANALOG_MISC0 +#define CCM_ANALOG_MISC0_SET 85 +#define PMU_MISC0_SET CCM_ANALOG_MISC0_SET +#define CCM_ANALOG_MISC0_CLR 86 +#define PMU_MISC0_CLR CCM_ANALOG_MISC0_CLR +#define CCM_ANALOG_MISC0_TOG 87 +#define PMU_MISC0_TOG CCM_ANALOG_MISC0_TOG + +#define CCM_ANALOG_MISC1 88 +#define PMU_MISC1 CCM_ANALOG_MISC1 +#define CCM_ANALOG_MISC1_SET 89 +#define PMU_MISC1_SET CCM_ANALOG_MISC1_SET +#define CCM_ANALOG_MISC1_CLR 90 +#define PMU_MISC1_CLR CCM_ANALOG_MISC1_CLR +#define CCM_ANALOG_MISC1_TOG 91 +#define PMU_MISC1_TOG CCM_ANALOG_MISC1_TOG + +#define CCM_ANALOG_MISC2 92 +#define PMU_MISC2 CCM_ANALOG_MISC2 +#define CCM_ANALOG_MISC2_SET 93 +#define PMU_MISC2_SET CCM_ANALOG_MISC2_SET +#define CCM_ANALOG_MISC2_CLR 94 +#define PMU_MISC2_CLR CCM_ANALOG_MISC2_CLR +#define CCM_ANALOG_MISC2_TOG 95 +#define PMU_MISC2_TOG CCM_ANALOG_MISC2_TOG + +#define TEMPMON_TEMPSENSE0 96 +#define TEMPMON_TEMPSENSE0_SET 97 +#define TEMPMON_TEMPSENSE0_CLR 98 +#define TEMPMON_TEMPSENSE0_TOG 99 +#define TEMPMON_TEMPSENSE1 100 +#define TEMPMON_TEMPSENSE1_SET 101 +#define TEMPMON_TEMPSENSE1_CLR 102 +#define TEMPMON_TEMPSENSE1_TOG 103 +#define TEMPMON_TEMPSENSE2 164 +#define TEMPMON_TEMPSENSE2_SET 165 +#define TEMPMON_TEMPSENSE2_CLR 166 +#define TEMPMON_TEMPSENSE2_TOG 167 + +#define PMU_LOWPWR_CTRL 155 +#define PMU_LOWPWR_CTRL_SET 156 +#define PMU_LOWPWR_CTRL_CLR 157 +#define PMU_LOWPWR_CTRL_TOG 158 + +#define USB_ANALOG_USB1_VBUS_DETECT 104 +#define USB_ANALOG_USB1_VBUS_DETECT_SET 105 +#define USB_ANALOG_USB1_VBUS_DETECT_CLR 106 +#define USB_ANALOG_USB1_VBUS_DETECT_TOG 107 +#define USB_ANALOG_USB1_CHRG_DETECT 108 +#define USB_ANALOG_USB1_CHRG_DETECT_SET 109 +#define USB_ANALOG_USB1_CHRG_DETECT_CLR 110 +#define USB_ANALOG_USB1_CHRG_DETECT_TOG 111 +#define USB_ANALOG_USB1_VBUS_DETECT_STAT 112 +#define USB_ANALOG_USB1_CHRG_DETECT_STAT 116 +#define USB_ANALOG_USB1_MISC 124 +#define USB_ANALOG_USB1_MISC_SET 125 +#define USB_ANALOG_USB1_MISC_CLR 126 +#define USB_ANALOG_USB1_MISC_TOG 127 +#define USB_ANALOG_USB2_VBUS_DETECT 128 +#define USB_ANALOG_USB2_VBUS_DETECT_SET 129 +#define USB_ANALOG_USB2_VBUS_DETECT_CLR 130 +#define USB_ANALOG_USB2_VBUS_DETECT_TOG 131 +#define USB_ANALOG_USB2_CHRG_DETECT 132 +#define USB_ANALOG_USB2_CHRG_DETECT_SET 133 +#define USB_ANALOG_USB2_CHRG_DETECT_CLR 134 +#define USB_ANALOG_USB2_CHRG_DETECT_TOG 135 +#define USB_ANALOG_USB2_VBUS_DETECT_STAT 136 +#define USB_ANALOG_USB2_CHRG_DETECT_STAT 140 +#define USB_ANALOG_USB2_MISC 148 +#define USB_ANALOG_USB2_MISC_SET 149 +#define USB_ANALOG_USB2_MISC_CLR 150 +#define USB_ANALOG_USB2_MISC_TOG 151 +#define USB_ANALOG_DIGPROG 152 +#define CCM_ANALOG_MAX 4096 + +/* CCM_CBCMR */ +#define R_CBCMR_PRE_PERIPH_CLK_SEL_SHIFT (18) +#define R_CBCMR_PRE_PERIPH_CLK_SEL_LENGTH (2) +#define R_CBCMR_PERIPH_CLK2_SEL_SHIFT (12) +#define R_CBCMR_PERIPH_CLK2_SEL_LENGTH (2) + +/* CCM_CBCDR */ +#define R_CBCDR_AHB_PODF_SHIFT (10) +#define R_CBCDR_AHB_PODF_LENGTH (3) +#define R_CBCDR_IPG_PODF_SHIFT (8) +#define R_CBCDR_IPG_PODF_LENGTH (2) +#define R_CBCDR_PERIPH_CLK_SEL_SHIFT (25) +#define R_CBCDR_PERIPH_CLK_SEL_LENGTH (1) +#define R_CBCDR_PERIPH_CLK2_PODF_SHIFT (27) +#define R_CBCDR_PERIPH_CLK2_PODF_LENGTH (3) + +/* CCM_CSCMR1 */ +#define R_CSCMR1_PERCLK_PODF_SHIFT (0) +#define R_CSCMR1_PERCLK_PODF_LENGTH (6) +#define R_CSCMR1_PERCLK_CLK_SEL_SHIFT (6) +#define R_CSCMR1_PERCLK_CLK_SEL_LENGTH (1) + +/* CCM_ANALOG_PFD_528 */ +#define R_ANALOG_PFD_528_PFD0_FRAC_SHIFT (0) +#define R_ANALOG_PFD_528_PFD0_FRAC_LENGTH (6) +#define R_ANALOG_PFD_528_PFD2_FRAC_SHIFT (16) +#define R_ANALOG_PFD_528_PFD2_FRAC_LENGTH (6) + +/* CCM_ANALOG_PLL_SYS */ +#define R_ANALOG_PLL_SYS_DIV_SELECT_SHIFT (0) +#define R_ANALOG_PLL_SYS_DIV_SELECT_LENGTH (1) + +#define CCM_ANALOG_PLL_LOCK (1 << 31); + +#define TYPE_IMX6UL_CCM "imx6ul.ccm" +OBJECT_DECLARE_SIMPLE_TYPE(IMX6ULCCMState, IMX6UL_CCM) + +struct IMX6ULCCMState { + /* */ + IMXCCMState parent_obj; + + /* */ + MemoryRegion container; + MemoryRegion ioccm; + MemoryRegion ioanalog; + + uint32_t ccm[CCM_MAX]; + uint32_t analog[CCM_ANALOG_MAX]; + +}; + +#endif /* IMX6UL_CCM_H */ diff --git a/include/hw/misc/imx7_ccm.h b/include/hw/misc/imx7_ccm.h new file mode 100644 index 000000000..dcaebfb4e --- /dev/null +++ b/include/hw/misc/imx7_ccm.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2017, Impinj, Inc. + * + * i.MX7 CCM, PMU and ANALOG IP blocks emulation code + * + * Author: Andrey Smirnov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX7_CCM_H +#define IMX7_CCM_H + +#include "hw/misc/imx_ccm.h" +#include "qemu/bitops.h" +#include "qom/object.h" + +enum IMX7AnalogRegisters { + ANALOG_PLL_ARM, + ANALOG_PLL_ARM_SET, + ANALOG_PLL_ARM_CLR, + ANALOG_PLL_ARM_TOG, + ANALOG_PLL_DDR, + ANALOG_PLL_DDR_SET, + ANALOG_PLL_DDR_CLR, + ANALOG_PLL_DDR_TOG, + ANALOG_PLL_DDR_SS, + ANALOG_PLL_DDR_SS_SET, + ANALOG_PLL_DDR_SS_CLR, + ANALOG_PLL_DDR_SS_TOG, + ANALOG_PLL_DDR_NUM, + ANALOG_PLL_DDR_NUM_SET, + ANALOG_PLL_DDR_NUM_CLR, + ANALOG_PLL_DDR_NUM_TOG, + ANALOG_PLL_DDR_DENOM, + ANALOG_PLL_DDR_DENOM_SET, + ANALOG_PLL_DDR_DENOM_CLR, + ANALOG_PLL_DDR_DENOM_TOG, + ANALOG_PLL_480, + ANALOG_PLL_480_SET, + ANALOG_PLL_480_CLR, + ANALOG_PLL_480_TOG, + ANALOG_PLL_480A, + ANALOG_PLL_480A_SET, + ANALOG_PLL_480A_CLR, + ANALOG_PLL_480A_TOG, + ANALOG_PLL_480B, + ANALOG_PLL_480B_SET, + ANALOG_PLL_480B_CLR, + ANALOG_PLL_480B_TOG, + ANALOG_PLL_ENET, + ANALOG_PLL_ENET_SET, + ANALOG_PLL_ENET_CLR, + ANALOG_PLL_ENET_TOG, + ANALOG_PLL_AUDIO, + ANALOG_PLL_AUDIO_SET, + ANALOG_PLL_AUDIO_CLR, + ANALOG_PLL_AUDIO_TOG, + ANALOG_PLL_AUDIO_SS, + ANALOG_PLL_AUDIO_SS_SET, + ANALOG_PLL_AUDIO_SS_CLR, + ANALOG_PLL_AUDIO_SS_TOG, + ANALOG_PLL_AUDIO_NUM, + ANALOG_PLL_AUDIO_NUM_SET, + ANALOG_PLL_AUDIO_NUM_CLR, + ANALOG_PLL_AUDIO_NUM_TOG, + ANALOG_PLL_AUDIO_DENOM, + ANALOG_PLL_AUDIO_DENOM_SET, + ANALOG_PLL_AUDIO_DENOM_CLR, + ANALOG_PLL_AUDIO_DENOM_TOG, + ANALOG_PLL_VIDEO, + ANALOG_PLL_VIDEO_SET, + ANALOG_PLL_VIDEO_CLR, + ANALOG_PLL_VIDEO_TOG, + ANALOG_PLL_VIDEO_SS, + ANALOG_PLL_VIDEO_SS_SET, + ANALOG_PLL_VIDEO_SS_CLR, + ANALOG_PLL_VIDEO_SS_TOG, + ANALOG_PLL_VIDEO_NUM, + ANALOG_PLL_VIDEO_NUM_SET, + ANALOG_PLL_VIDEO_NUM_CLR, + ANALOG_PLL_VIDEO_NUM_TOG, + ANALOG_PLL_VIDEO_DENOM, + ANALOG_PLL_VIDEO_DENOM_SET, + ANALOG_PLL_VIDEO_DENOM_CLR, + ANALOG_PLL_VIDEO_DENOM_TOG, + ANALOG_PLL_MISC0, + ANALOG_PLL_MISC0_SET, + ANALOG_PLL_MISC0_CLR, + ANALOG_PLL_MISC0_TOG, + + ANALOG_DIGPROG = 0x800 / sizeof(uint32_t), + ANALOG_MAX, + + ANALOG_PLL_LOCK = BIT(31) +}; + +enum IMX7CCMRegisters { + CCM_MAX = 0xBE00 / sizeof(uint32_t) + 1, +}; + +enum IMX7PMURegisters { + PMU_MAX = 0x140 / sizeof(uint32_t), +}; + +#define TYPE_IMX7_CCM "imx7.ccm" +OBJECT_DECLARE_SIMPLE_TYPE(IMX7CCMState, IMX7_CCM) + +struct IMX7CCMState { + /* */ + IMXCCMState parent_obj; + + /* */ + MemoryRegion iomem; + + uint32_t ccm[CCM_MAX]; +}; + + +#define TYPE_IMX7_ANALOG "imx7.analog" +OBJECT_DECLARE_SIMPLE_TYPE(IMX7AnalogState, IMX7_ANALOG) + +struct IMX7AnalogState { + /* */ + IMXCCMState parent_obj; + + /* */ + struct { + MemoryRegion container; + MemoryRegion analog; + MemoryRegion digprog; + MemoryRegion pmu; + } mmio; + + uint32_t analog[ANALOG_MAX]; + uint32_t pmu[PMU_MAX]; +}; + +#endif /* IMX7_CCM_H */ diff --git a/include/hw/misc/imx7_gpr.h b/include/hw/misc/imx7_gpr.h new file mode 100644 index 000000000..df364bd8f --- /dev/null +++ b/include/hw/misc/imx7_gpr.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017, Impinj, Inc. + * + * i.MX7 GPR IP block emulation code + * + * Author: Andrey Smirnov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX7_GPR_H +#define IMX7_GPR_H + +#include "qemu/bitops.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IMX7_GPR "imx7.gpr" +OBJECT_DECLARE_SIMPLE_TYPE(IMX7GPRState, IMX7_GPR) + +struct IMX7GPRState { + /* */ + SysBusDevice parent_obj; + + MemoryRegion mmio; +}; + +#endif /* IMX7_GPR_H */ diff --git a/include/hw/misc/imx7_snvs.h b/include/hw/misc/imx7_snvs.h new file mode 100644 index 000000000..14a1d6fe6 --- /dev/null +++ b/include/hw/misc/imx7_snvs.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017, Impinj, Inc. + * + * i.MX7 SNVS block emulation code + * + * Author: Andrey Smirnov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX7_SNVS_H +#define IMX7_SNVS_H + +#include "qemu/bitops.h" +#include "hw/sysbus.h" +#include "qom/object.h" + + +enum IMX7SNVSRegisters { + SNVS_LPCR = 0x38, + SNVS_LPCR_TOP = BIT(6), + SNVS_LPCR_DP_EN = BIT(5) +}; + +#define TYPE_IMX7_SNVS "imx7.snvs" +OBJECT_DECLARE_SIMPLE_TYPE(IMX7SNVSState, IMX7_SNVS) + +struct IMX7SNVSState { + /* */ + SysBusDevice parent_obj; + + MemoryRegion mmio; +}; + +#endif /* IMX7_SNVS_H */ diff --git a/include/hw/misc/imx_ccm.h b/include/hw/misc/imx_ccm.h new file mode 100644 index 000000000..7e5678e97 --- /dev/null +++ b/include/hw/misc/imx_ccm.h @@ -0,0 +1,63 @@ +/* + * IMX Clock Control Module base class + * + * Copyright (C) 2012 NICTA + * Updated by Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX_CCM_H +#define IMX_CCM_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define CKIL_FREQ 32768 /* nominal 32khz clock */ + +/* PLL control registers */ +#define PD(v) (((v) >> 26) & 0xf) +#define MFD(v) (((v) >> 16) & 0x3ff) +#define MFI(v) (((v) >> 10) & 0xf); +#define MFN(v) ((v) & 0x3ff) + +#define PLL_PD(x) (((x) & 0xf) << 26) +#define PLL_MFD(x) (((x) & 0x3ff) << 16) +#define PLL_MFI(x) (((x) & 0xf) << 10) +#define PLL_MFN(x) (((x) & 0x3ff) << 0) + +#define TYPE_IMX_CCM "imx.ccm" +OBJECT_DECLARE_TYPE(IMXCCMState, IMXCCMClass, IMX_CCM) + +struct IMXCCMState { + /* */ + SysBusDevice parent_obj; + + /* */ + +}; + +typedef enum { + CLK_NONE, + CLK_IPG, + CLK_IPG_HIGH, + CLK_32k, + CLK_EXT, + CLK_HIGH_DIV, + CLK_HIGH, +} IMXClk; + +struct IMXCCMClass { + /* */ + SysBusDeviceClass parent_class; + + /* */ + uint32_t (*get_clock_frequency)(IMXCCMState *s, IMXClk clk); +}; + +uint32_t imx_ccm_calc_pll(uint32_t pllreg, uint32_t base_freq); + +uint32_t imx_ccm_get_clock_frequency(IMXCCMState *s, IMXClk clock); + +#endif /* IMX_CCM_H */ diff --git a/include/hw/misc/imx_rngc.h b/include/hw/misc/imx_rngc.h new file mode 100644 index 000000000..34ad69922 --- /dev/null +++ b/include/hw/misc/imx_rngc.h @@ -0,0 +1,36 @@ +/* + * Freescale i.MX RNGC emulation + * + * Copyright (C) 2020 Martin Kaiser + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX_RNGC_H +#define IMX_RNGC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IMX_RNGC "imx.rngc" +OBJECT_DECLARE_SIMPLE_TYPE(IMXRNGCState, IMX_RNGC) + +struct IMXRNGCState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint8_t op_self_test; + uint8_t op_seed; + uint8_t mask; + bool auto_seed; + + QEMUBH *self_test_bh; + QEMUBH *seed_bh; + qemu_irq irq; +}; + +#endif /* IMX_RNGC_H */ diff --git a/include/hw/misc/iotkit-secctl.h b/include/hw/misc/iotkit-secctl.h new file mode 100644 index 000000000..54c212b51 --- /dev/null +++ b/include/hw/misc/iotkit-secctl.h @@ -0,0 +1,125 @@ +/* + * ARM IoT Kit security controller + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the security controller which is part of the + * Arm IoT Kit and documented in + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ecm0601256/index.html + * + * QEMU interface: + * + sysbus MMIO region 0 is the "secure privilege control block" registers + * + sysbus MMIO region 1 is the "non-secure privilege control block" registers + * + named GPIO output "sec_resp_cfg" indicating whether blocked accesses + * should RAZ/WI or bus error + * + named GPIO output "nsc_cfg" whose value tracks the NSCCFG register value + * + named GPIO output "msc_irq" for the combined IRQ line from the MSCs + * Controlling the 2 APB PPCs in the IoTKit: + * + named GPIO outputs apb_ppc0_nonsec[0..2] and apb_ppc1_nonsec + * + named GPIO outputs apb_ppc0_ap[0..2] and apb_ppc1_ap + * + named GPIO outputs apb_ppc{0,1}_irq_enable + * + named GPIO outputs apb_ppc{0,1}_irq_clear + * + named GPIO inputs apb_ppc{0,1}_irq_status + * Controlling each of the 4 expansion APB PPCs which a system using the IoTKit + * might provide: + * + named GPIO outputs apb_ppcexp{0,1,2,3}_nonsec[0..15] + * + named GPIO outputs apb_ppcexp{0,1,2,3}_ap[0..15] + * + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_enable + * + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_clear + * + named GPIO inputs apb_ppcexp{0,1,2,3}_irq_status + * Controlling each of the 4 expansion AHB PPCs which a system using the IoTKit + * might provide: + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_nonsec[0..15] + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_ap[0..15] + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable + * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear + * + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status + * Controlling the (up to) 4 MPCs in the IoTKit/SSE: + * + named GPIO inputs mpc_status[0..3] + * Controlling each of the 16 expansion MPCs which a system using the IoTKit + * might provide: + * + named GPIO inputs mpcexp_status[0..15] + * Controlling each of the 16 expansion MSCs which a system using the IoTKit + * might provide: + * + named GPIO inputs mscexp_status[0..15] + * + named GPIO outputs mscexp_clear[0..15] + * + named GPIO outputs mscexp_ns[0..15] + */ + +#ifndef IOTKIT_SECCTL_H +#define IOTKIT_SECCTL_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IOTKIT_SECCTL "iotkit-secctl" +OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSecCtl, IOTKIT_SECCTL) + +#define IOTS_APB_PPC0_NUM_PORTS 3 +#define IOTS_APB_PPC1_NUM_PORTS 1 +#define IOTS_PPC_NUM_PORTS 16 +#define IOTS_NUM_APB_PPC 2 +#define IOTS_NUM_APB_EXP_PPC 4 +#define IOTS_NUM_AHB_EXP_PPC 4 +#define IOTS_NUM_EXP_MPC 16 +#define IOTS_NUM_MPC 4 +#define IOTS_NUM_EXP_MSC 16 + + +/* State and IRQ lines relating to a PPC. For the + * PPCs in the IoTKit not all the IRQ lines are used. + */ +typedef struct IoTKitSecCtlPPC { + qemu_irq nonsec[IOTS_PPC_NUM_PORTS]; + qemu_irq ap[IOTS_PPC_NUM_PORTS]; + qemu_irq irq_enable; + qemu_irq irq_clear; + + uint32_t ns; + uint32_t sp; + uint32_t nsp; + + /* Number of ports actually present */ + int numports; + /* Offset of this PPC's interrupt bits in SECPPCINTSTAT */ + int irq_bit_offset; + IoTKitSecCtl *parent; +} IoTKitSecCtlPPC; + +struct IoTKitSecCtl { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + qemu_irq sec_resp_cfg; + qemu_irq nsc_cfg_irq; + + MemoryRegion s_regs; + MemoryRegion ns_regs; + + uint32_t secppcintstat; + uint32_t secppcinten; + uint32_t secrespcfg; + uint32_t nsccfg; + uint32_t brginten; + uint32_t mpcintstatus; + + uint32_t secmscintstat; + uint32_t secmscinten; + uint32_t nsmscexp; + qemu_irq mscexp_clear[IOTS_NUM_EXP_MSC]; + qemu_irq mscexp_ns[IOTS_NUM_EXP_MSC]; + qemu_irq msc_irq; + + IoTKitSecCtlPPC apb[IOTS_NUM_APB_PPC]; + IoTKitSecCtlPPC apbexp[IOTS_NUM_APB_EXP_PPC]; + IoTKitSecCtlPPC ahbexp[IOTS_NUM_APB_EXP_PPC]; +}; + +#endif diff --git a/include/hw/misc/iotkit-sysctl.h b/include/hw/misc/iotkit-sysctl.h new file mode 100644 index 000000000..2b5636b21 --- /dev/null +++ b/include/hw/misc/iotkit-sysctl.h @@ -0,0 +1,72 @@ +/* + * ARM IoTKit system control element + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the "system control element" which is part of the + * Arm IoTKit and documented in + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ecm0601256/index.html + * Specifically, it implements the "system information block" and + * "system control register" blocks. + * + * QEMU interface: + * + QOM property "SYS_VERSION": value of the SYS_VERSION register of the + * system information block of the SSE + * (used to identify whether to provide SSE-200-only registers) + * + sysbus MMIO region 0: the system information register bank + * + sysbus MMIO region 1: the system control register bank + */ + +#ifndef HW_MISC_IOTKIT_SYSCTL_H +#define HW_MISC_IOTKIT_SYSCTL_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IOTKIT_SYSCTL "iotkit-sysctl" +OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSysCtl, IOTKIT_SYSCTL) + +struct IoTKitSysCtl { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t secure_debug; + uint32_t reset_syndrome; + uint32_t reset_mask; + uint32_t gretreg; + uint32_t initsvtor0; + uint32_t cpuwait; + uint32_t wicctrl; + uint32_t scsecctrl; + uint32_t fclk_div; + uint32_t sysclk_div; + uint32_t clock_force; + uint32_t initsvtor1; + uint32_t nmi_enable; + uint32_t ewctrl; + uint32_t pdcm_pd_sys_sense; + uint32_t pdcm_pd_sram0_sense; + uint32_t pdcm_pd_sram1_sense; + uint32_t pdcm_pd_sram2_sense; + uint32_t pdcm_pd_sram3_sense; + + /* Properties */ + uint32_t sys_version; + uint32_t cpuwait_rst; + uint32_t initsvtor0_rst; + uint32_t initsvtor1_rst; + + bool is_sse200; +}; + +#endif diff --git a/include/hw/misc/iotkit-sysinfo.h b/include/hw/misc/iotkit-sysinfo.h new file mode 100644 index 000000000..7e620e2ea --- /dev/null +++ b/include/hw/misc/iotkit-sysinfo.h @@ -0,0 +1,43 @@ +/* + * ARM IoTKit system information block + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the "system information block" which is part of the + * Arm IoTKit and documented in + * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ecm0601256/index.html + * QEMU interface: + * + QOM property "SYS_VERSION": value to use for SYS_VERSION register + * + QOM property "SYS_CONFIG": value to use for SYS_CONFIG register + * + sysbus MMIO region 0: the system information register bank + */ + +#ifndef HW_MISC_IOTKIT_SYSINFO_H +#define HW_MISC_IOTKIT_SYSINFO_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_IOTKIT_SYSINFO "iotkit-sysinfo" +OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSysInfo, IOTKIT_SYSINFO) + +struct IoTKitSysInfo { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + /* Properties */ + uint32_t sys_version; + uint32_t sys_config; +}; + +#endif diff --git a/include/hw/misc/ivshmem.h b/include/hw/misc/ivshmem.h new file mode 100644 index 000000000..433ef53d7 --- /dev/null +++ b/include/hw/misc/ivshmem.h @@ -0,0 +1,25 @@ + +/* + * Inter-VM Shared Memory PCI device. + * + * Author: + * Cam Macdonell + * + * Based On: cirrus_vga.c + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2004 Makoto Suzuki (suzu) + * + * and rtl8139.c + * Copyright (c) 2006 Igor Kovalenko + * + * This code is licensed under the GNU GPL v2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#ifndef IVSHMEM_H +#define IVSHMEM_H + +#define IVSHMEM_PROTOCOL_VERSION 0 + +#endif /* IVSHMEM_H */ diff --git a/include/hw/misc/led.h b/include/hw/misc/led.h new file mode 100644 index 000000000..aa359b87c --- /dev/null +++ b/include/hw/misc/led.h @@ -0,0 +1,97 @@ +/* + * QEMU single LED device + * + * Copyright (C) 2020 Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_MISC_LED_H +#define HW_MISC_LED_H + +#include "qom/object.h" +#include "hw/qdev-core.h" + +#define TYPE_LED "led" + +/** + * LEDColor: Color of a LED + * + * This set is restricted to physically available LED colors. + * + * LED colors from 'Table 1. Product performance of LUXEON Rebel Color + * Line' of the 'DS68 LUXEON Rebel Color Line' datasheet available at: + * https://www.lumileds.com/products/color-leds/luxeon-rebel-color/ + */ +typedef enum { /* Coarse wavelength range */ + LED_COLOR_VIOLET, /* 425 nm */ + LED_COLOR_BLUE, /* 475 nm */ + LED_COLOR_CYAN, /* 500 nm */ + LED_COLOR_GREEN, /* 535 nm */ + LED_COLOR_AMBER, /* 590 nm */ + LED_COLOR_ORANGE, /* 615 nm */ + LED_COLOR_RED, /* 630 nm */ +} LEDColor; + +struct LEDState { + /* Private */ + DeviceState parent_obj; + /* Public */ + + uint8_t intensity_percent; + qemu_irq irq; + + /* Properties */ + char *description; + char *color; + /* + * Determines whether a GPIO is using a positive (active-high) + * logic (when used with GPIO, the intensity at reset is related + * to the GPIO polarity). + */ + bool gpio_active_high; +}; +typedef struct LEDState LEDState; +DECLARE_INSTANCE_CHECKER(LEDState, LED, TYPE_LED) + +/** + * led_set_intensity: Set the intensity of a LED device + * @s: the LED object + * @intensity_percent: intensity as percentage in range 0 to 100. + */ +void led_set_intensity(LEDState *s, unsigned intensity_percent); + +/** + * led_get_intensity: + * @s: the LED object + * + * Returns: The LED intensity as percentage in range 0 to 100. + */ +unsigned led_get_intensity(LEDState *s); + +/** + * led_set_state: Set the state of a LED device + * @s: the LED object + * @is_emitting: boolean indicating whether the LED is emitting + * + * This utility is meant for LED connected to GPIO. + */ +void led_set_state(LEDState *s, bool is_emitting); + +/** + * led_create_simple: Create and realize a LED device + * @parentobj: the parent object + * @gpio_polarity: GPIO polarity + * @color: color of the LED + * @description: description of the LED (optional) + * + * Create the device state structure, initialize it, and + * drop the reference to it (the device is realized). + * + * Returns: The newly allocated and instantiated LED object. + */ +LEDState *led_create_simple(Object *parentobj, + GpioPolarity gpio_polarity, + LEDColor color, + const char *description); + +#endif /* HW_MISC_LED_H */ diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h new file mode 100644 index 000000000..a59f0bd42 --- /dev/null +++ b/include/hw/misc/mac_via.h @@ -0,0 +1,117 @@ +/* + * + * Copyright (c) 2011-2018 Laurent Vivier + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_MISC_MAC_VIA_H +#define HW_MISC_MAC_VIA_H + +#include "exec/memory.h" +#include "hw/sysbus.h" +#include "hw/misc/mos6522.h" +#include "qom/object.h" + + +/* VIA 1 */ +#define VIA1_IRQ_ONE_SECOND_BIT 0 +#define VIA1_IRQ_VBLANK_BIT 1 +#define VIA1_IRQ_ADB_READY_BIT 2 +#define VIA1_IRQ_ADB_DATA_BIT 3 +#define VIA1_IRQ_ADB_CLOCK_BIT 4 + +#define VIA1_IRQ_NB 8 + +#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT) +#define VIA1_IRQ_VBLANK (1 << VIA1_IRQ_VBLANK_BIT) +#define VIA1_IRQ_ADB_READY (1 << VIA1_IRQ_ADB_READY_BIT) +#define VIA1_IRQ_ADB_DATA (1 << VIA1_IRQ_ADB_DATA_BIT) +#define VIA1_IRQ_ADB_CLOCK (1 << VIA1_IRQ_ADB_CLOCK_BIT) + + +#define TYPE_MOS6522_Q800_VIA1 "mos6522-q800-via1" +OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA1State, MOS6522_Q800_VIA1) + +struct MOS6522Q800VIA1State { + /*< private >*/ + MOS6522State parent_obj; + + qemu_irq irqs[VIA1_IRQ_NB]; + uint8_t last_b; + uint8_t PRAM[256]; + + /* external timers */ + QEMUTimer *one_second_timer; + int64_t next_second; + QEMUTimer *VBL_timer; + int64_t next_VBL; +}; + + +/* VIA 2 */ +#define VIA2_IRQ_SCSI_DATA_BIT 0 +#define VIA2_IRQ_SLOT_BIT 1 +#define VIA2_IRQ_UNUSED_BIT 2 +#define VIA2_IRQ_SCSI_BIT 3 +#define VIA2_IRQ_ASC_BIT 4 + +#define VIA2_IRQ_NB 8 + +#define VIA2_IRQ_SCSI_DATA (1 << VIA2_IRQ_SCSI_DATA_BIT) +#define VIA2_IRQ_SLOT (1 << VIA2_IRQ_SLOT_BIT) +#define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT) +#define VIA2_IRQ_SCSI (1 << VIA2_IRQ_UNUSED_BIT) +#define VIA2_IRQ_ASC (1 << VIA2_IRQ_ASC_BIT) + +#define TYPE_MOS6522_Q800_VIA2 "mos6522-q800-via2" +OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2) + +struct MOS6522Q800VIA2State { + /*< private >*/ + MOS6522State parent_obj; +}; + + +#define TYPE_MAC_VIA "mac_via" +OBJECT_DECLARE_SIMPLE_TYPE(MacVIAState, MAC_VIA) + +struct MacVIAState { + SysBusDevice busdev; + + VMChangeStateEntry *vmstate; + + /* MMIO */ + MemoryRegion mmio; + MemoryRegion via1mem; + MemoryRegion via2mem; + + /* VIAs */ + MOS6522Q800VIA1State mos6522_via1; + MOS6522Q800VIA2State mos6522_via2; + + /* RTC */ + uint32_t tick_offset; + + uint8_t data_out; + int data_out_cnt; + uint8_t data_in; + uint8_t data_in_cnt; + uint8_t cmd; + int wprotect; + int alt; + BlockBackend *blk; + + /* ADB */ + ADBBusState adb_bus; + qemu_irq adb_data_ready; + int adb_data_in_size; + int adb_data_in_index; + int adb_data_out_index; + uint8_t adb_data_in[128]; + uint8_t adb_data_out[16]; + uint8_t adb_autopoll_cmd; +}; + +#endif diff --git a/include/hw/misc/macio/cuda.h b/include/hw/misc/macio/cuda.h new file mode 100644 index 000000000..a71deec96 --- /dev/null +++ b/include/hw/misc/macio/cuda.h @@ -0,0 +1,102 @@ +/* + * QEMU PowerMac CUDA device support + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef CUDA_H +#define CUDA_H + +#include "hw/misc/mos6522.h" +#include "qom/object.h" + +/* CUDA commands (2nd byte) */ +#define CUDA_WARM_START 0x0 +#define CUDA_AUTOPOLL 0x1 +#define CUDA_GET_6805_ADDR 0x2 +#define CUDA_GET_TIME 0x3 +#define CUDA_GET_PRAM 0x7 +#define CUDA_SET_6805_ADDR 0x8 +#define CUDA_SET_TIME 0x9 +#define CUDA_POWERDOWN 0xa +#define CUDA_POWERUP_TIME 0xb +#define CUDA_SET_PRAM 0xc +#define CUDA_MS_RESET 0xd +#define CUDA_SEND_DFAC 0xe +#define CUDA_BATTERY_SWAP_SENSE 0x10 +#define CUDA_RESET_SYSTEM 0x11 +#define CUDA_SET_IPL 0x12 +#define CUDA_FILE_SERVER_FLAG 0x13 +#define CUDA_SET_AUTO_RATE 0x14 +#define CUDA_GET_AUTO_RATE 0x16 +#define CUDA_SET_DEVICE_LIST 0x19 +#define CUDA_GET_DEVICE_LIST 0x1a +#define CUDA_SET_ONE_SECOND_MODE 0x1b +#define CUDA_SET_POWER_MESSAGES 0x21 +#define CUDA_GET_SET_IIC 0x22 +#define CUDA_WAKEUP 0x23 +#define CUDA_TIMER_TICKLE 0x24 +#define CUDA_COMBINED_FORMAT_IIC 0x25 + + +/* MOS6522 CUDA */ +struct MOS6522CUDAState { + /*< private >*/ + MOS6522State parent_obj; +}; + +#define TYPE_MOS6522_CUDA "mos6522-cuda" +OBJECT_DECLARE_SIMPLE_TYPE(MOS6522CUDAState, MOS6522_CUDA) + +/* Cuda */ +#define TYPE_CUDA "cuda" +OBJECT_DECLARE_SIMPLE_TYPE(CUDAState, CUDA) + +struct CUDAState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + MemoryRegion mem; + + ADBBusState adb_bus; + MOS6522CUDAState mos6522_cuda; + + uint32_t tick_offset; + uint64_t tb_frequency; + + uint8_t last_b; + uint8_t last_acr; + + /* MacOS 9 is racy and requires a delay upon setting the SR_INT bit */ + uint64_t sr_delay_ns; + QEMUTimer *sr_delay_timer; + + int data_in_size; + int data_in_index; + int data_out_index; + + qemu_irq irq; + uint8_t data_in[128]; + uint8_t data_out[16]; +}; + +#endif /* CUDA_H */ diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h new file mode 100644 index 000000000..4dee09a9d --- /dev/null +++ b/include/hw/misc/macio/gpio.h @@ -0,0 +1,51 @@ +/* + * PowerMac NewWorld MacIO GPIO emulation + * + * Copyright (c) 2016 Benjamin Herrenschmidt + * Copyright (c) 2018 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MACIO_GPIO_H +#define MACIO_GPIO_H + +#include "hw/ppc/openpic.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_MACIO_GPIO "macio-gpio" +OBJECT_DECLARE_SIMPLE_TYPE(MacIOGPIOState, MACIO_GPIO) + +struct MacIOGPIOState { + /*< private >*/ + SysBusDevice parent; + /*< public >*/ + + OpenPICState *pic; + + MemoryRegion gpiomem; + qemu_irq gpio_extirqs[10]; + uint8_t gpio_levels[8]; + uint8_t gpio_regs[36]; /* XXX Check count */ +}; + +void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state); + +#endif diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h new file mode 100644 index 000000000..22b4e64b2 --- /dev/null +++ b/include/hw/misc/macio/macio.h @@ -0,0 +1,123 @@ +/* + * PowerMac MacIO device emulation + * + * Copyright (c) 2005-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MACIO_H +#define MACIO_H + +#include "hw/char/escc.h" +#include "hw/pci/pci.h" +#include "hw/ide/internal.h" +#include "hw/intc/heathrow_pic.h" +#include "hw/misc/macio/cuda.h" +#include "hw/misc/macio/gpio.h" +#include "hw/misc/macio/pmu.h" +#include "hw/ppc/mac.h" +#include "hw/ppc/mac_dbdma.h" +#include "hw/ppc/openpic.h" +#include "qom/object.h" + +/* MacIO virtual bus */ +#define TYPE_MACIO_BUS "macio-bus" +OBJECT_DECLARE_SIMPLE_TYPE(MacIOBusState, MACIO_BUS) + +struct MacIOBusState { + /*< private >*/ + BusState parent_obj; +}; + +/* MacIO IDE */ +#define TYPE_MACIO_IDE "macio-ide" +OBJECT_DECLARE_SIMPLE_TYPE(MACIOIDEState, MACIO_IDE) + +struct MACIOIDEState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + uint32_t addr; + uint32_t channel; + qemu_irq real_ide_irq; + qemu_irq real_dma_irq; + qemu_irq ide_irq; + qemu_irq dma_irq; + + MemoryRegion mem; + IDEBus bus; + IDEDMA dma; + void *dbdma; + bool dma_active; + uint32_t timing_reg; + uint32_t irq_reg; +}; + +void macio_ide_init_drives(MACIOIDEState *ide, DriveInfo **hd_table); +void macio_ide_register_dma(MACIOIDEState *ide); + +#define TYPE_MACIO "macio" +OBJECT_DECLARE_SIMPLE_TYPE(MacIOState, MACIO) + +struct MacIOState { + /*< private >*/ + PCIDevice parent; + /*< public >*/ + + MacIOBusState macio_bus; + MemoryRegion bar; + CUDAState cuda; + PMUState pmu; + DBDMAState dbdma; + ESCCState escc; + uint64_t frequency; +}; + +#define TYPE_OLDWORLD_MACIO "macio-oldworld" +OBJECT_DECLARE_SIMPLE_TYPE(OldWorldMacIOState, OLDWORLD_MACIO) + +struct OldWorldMacIOState { + /*< private >*/ + MacIOState parent_obj; + /*< public >*/ + + HeathrowState *pic; + + MacIONVRAMState nvram; + MACIOIDEState ide[2]; +}; + +#define TYPE_NEWWORLD_MACIO "macio-newworld" +OBJECT_DECLARE_SIMPLE_TYPE(NewWorldMacIOState, NEWWORLD_MACIO) + +struct NewWorldMacIOState { + /*< private >*/ + MacIOState parent_obj; + /*< public >*/ + + bool has_pmu; + bool has_adb; + OpenPICState *pic; + MACIOIDEState ide[2]; + MacIOGPIOState gpio; +}; + +#endif /* MACIO_H */ diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h new file mode 100644 index 000000000..78237d99a --- /dev/null +++ b/include/hw/misc/macio/pmu.h @@ -0,0 +1,236 @@ +/* + * Definitions for talking to the PMU. The PMU is a microcontroller + * which controls battery charging and system power on PowerBook 3400 + * and 2400 models as well as the RTC and various other things. + * + * Copyright (C) 1998 Paul Mackerras. + * Copyright (C) 2016 Ben Herrenschmidt + */ + +#ifndef PMU_H +#define PMU_H + +#include "hw/misc/mos6522.h" +#include "hw/misc/macio/gpio.h" +#include "qom/object.h" + +/* + * PMU commands + */ + +#define PMU_POWER_CTRL0 0x10 /* control power of some devices */ +#define PMU_POWER_CTRL 0x11 /* control power of some devices */ +#define PMU_ADB_CMD 0x20 /* send ADB packet */ +#define PMU_ADB_POLL_OFF 0x21 /* disable ADB auto-poll */ +#define PMU_WRITE_NVRAM 0x33 /* write non-volatile RAM */ +#define PMU_READ_NVRAM 0x3b /* read non-volatile RAM */ +#define PMU_SET_RTC 0x30 /* set real-time clock */ +#define PMU_READ_RTC 0x38 /* read real-time clock */ +#define PMU_SET_VOLBUTTON 0x40 /* set volume up/down position */ +#define PMU_BACKLIGHT_BRIGHT 0x41 /* set backlight brightness */ +#define PMU_GET_VOLBUTTON 0x48 /* get volume up/down position */ +#define PMU_PCEJECT 0x4c /* eject PC-card from slot */ +#define PMU_BATTERY_STATE 0x6b /* report battery state etc. */ +#define PMU_SMART_BATTERY_STATE 0x6f /* report battery state (new way) */ +#define PMU_SET_INTR_MASK 0x70 /* set PMU interrupt mask */ +#define PMU_INT_ACK 0x78 /* read interrupt bits */ +#define PMU_SHUTDOWN 0x7e /* turn power off */ +#define PMU_CPU_SPEED 0x7d /* control CPU speed on some models */ +#define PMU_SLEEP 0x7f /* put CPU to sleep */ +#define PMU_POWER_EVENTS 0x8f /* Send power-event commands to PMU */ +#define PMU_I2C_CMD 0x9a /* I2C operations */ +#define PMU_RESET 0xd0 /* reset CPU */ +#define PMU_GET_BRIGHTBUTTON 0xd9 /* report brightness up/down pos */ +#define PMU_GET_COVER 0xdc /* report cover open/closed */ +#define PMU_SYSTEM_READY 0xdf /* tell PMU we are awake */ +#define PMU_DOWNLOAD_STATUS 0xe2 /* Called by MacOS during boot... */ +#define PMU_READ_PMU_RAM 0xe8 /* read the PMU RAM... ??? */ +#define PMU_GET_VERSION 0xea /* read the PMU version */ + +/* Bits to use with the PMU_POWER_CTRL0 command */ +#define PMU_POW0_ON 0x80 /* OR this to power ON the device */ +#define PMU_POW0_OFF 0x00 /* leave bit 7 to 0 to power it OFF */ +#define PMU_POW0_HARD_DRIVE 0x04 /* Hard drive power + * (on wallstreet/lombard ?) */ + +/* Bits to use with the PMU_POWER_CTRL command */ +#define PMU_POW_ON 0x80 /* OR this to power ON the device */ +#define PMU_POW_OFF 0x00 /* leave bit 7 to 0 to power it OFF */ +#define PMU_POW_BACKLIGHT 0x01 /* backlight power */ +#define PMU_POW_CHARGER 0x02 /* battery charger power */ +#define PMU_POW_IRLED 0x04 /* IR led power (on wallstreet) */ +#define PMU_POW_MEDIABAY 0x08 /* media bay power + * (wallstreet/lombard ?) */ + +/* Bits in PMU interrupt and interrupt mask bytes */ +#define PMU_INT_PCEJECT 0x04 /* PC-card eject buttons */ +#define PMU_INT_SNDBRT 0x08 /* sound/brightness up/down buttons */ +#define PMU_INT_ADB 0x10 /* ADB autopoll or reply data */ +#define PMU_INT_BATTERY 0x20 /* Battery state change */ +#define PMU_INT_ENVIRONMENT 0x40 /* Environment interrupts */ +#define PMU_INT_TICK 0x80 /* 1-second tick interrupt */ + +/* Other bits in PMU interrupt valid when PMU_INT_ADB is set */ +#define PMU_INT_ADB_AUTO 0x04 /* ADB autopoll, when PMU_INT_ADB */ +#define PMU_INT_WAITING_CHARGER 0x01 /* ??? */ +#define PMU_INT_AUTO_SRQ_POLL 0x02 /* ??? */ + +/* Bits in the environement message (either obtained via PMU_GET_COVER, + * or via PMU_INT_ENVIRONMENT on core99 */ +#define PMU_ENV_LID_CLOSED 0x01 /* The lid is closed */ + +/* I2C related definitions */ +#define PMU_I2C_MODE_SIMPLE 0 +#define PMU_I2C_MODE_STDSUB 1 +#define PMU_I2C_MODE_COMBINED 2 + +#define PMU_I2C_BUS_STATUS 0 +#define PMU_I2C_BUS_SYSCLK 1 +#define PMU_I2C_BUS_POWER 2 + +#define PMU_I2C_STATUS_OK 0 +#define PMU_I2C_STATUS_DATAREAD 1 +#define PMU_I2C_STATUS_BUSY 0xfe + +/* Kind of PMU (model) */ +enum { + PMU_UNKNOWN, + PMU_OHARE_BASED, /* 2400, 3400, 3500 (old G3 powerbook) */ + PMU_HEATHROW_BASED, /* PowerBook G3 series */ + PMU_PADDINGTON_BASED, /* 1999 PowerBook G3 */ + PMU_KEYLARGO_BASED, /* Core99 motherboard (PMU99) */ + PMU_68K_V1, /* 68K PMU, version 1 */ + PMU_68K_V2, /* 68K PMU, version 2 */ +}; + +/* PMU PMU_POWER_EVENTS commands */ +enum { + PMU_PWR_GET_POWERUP_EVENTS = 0x00, + PMU_PWR_SET_POWERUP_EVENTS = 0x01, + PMU_PWR_CLR_POWERUP_EVENTS = 0x02, + PMU_PWR_GET_WAKEUP_EVENTS = 0x03, + PMU_PWR_SET_WAKEUP_EVENTS = 0x04, + PMU_PWR_CLR_WAKEUP_EVENTS = 0x05, +}; + +/* Power events wakeup bits */ +enum { + PMU_PWR_WAKEUP_KEY = 0x01, /* Wake on key press */ + PMU_PWR_WAKEUP_AC_INSERT = 0x02, /* Wake on AC adapter plug */ + PMU_PWR_WAKEUP_AC_CHANGE = 0x04, + PMU_PWR_WAKEUP_LID_OPEN = 0x08, + PMU_PWR_WAKEUP_RING = 0x10, +}; + +/* + * This table indicates for each PMU opcode: + * - the number of data bytes to be sent with the command, or -1 + * if a length byte should be sent, + * - the number of response bytes which the PMU will return, or + * -1 if it will send a length byte. + */ + +static const int8_t pmu_data_len[256][2] = { +/* 0 1 2 3 4 5 6 7 */ + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0, 0}, + {-1, 0},{ 0, 0},{ 2, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0, -1}, + { 4, 0},{20, 0},{-1, 0},{ 3, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 4},{ 0, 20},{ 2, -1},{ 2, 1},{ 3, -1},{-1, -1},{-1, -1},{ 4, 0}, + { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{ 1, 0},{ 1, 0},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 0, 0},{ 2, 0},{ 2, 0},{-1, 0},{ 1, 0},{ 3, 0},{ 1, 0}, + { 0, 1},{ 1, 0},{ 0, 2},{ 0, 2},{ 0, -1},{-1, -1},{-1, -1},{-1, -1}, + { 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 3},{ 0, 3},{ 0, 2},{ 0, 8},{ 0, -1},{ 0, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{ 5, 1},{ 4, 1},{ 4, 1}, + { 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 5},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 1, 0},{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 0, 1},{ 0, 1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 2, 0},{ 2, 0},{ 2, 0},{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0}, + { 1, 1},{ 1, 0},{ 3, 0},{ 2, 0},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, + { 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + { 1, 1},{ 1, 1},{-1, -1},{-1, -1},{ 0, 1},{ 0, -1},{-1, -1},{-1, -1}, + {-1, 0},{ 4, 0},{ 0, 1},{-1, 0},{-1, 0},{ 4, 0},{-1, 0},{-1, 0}, + { 3, -1},{-1, -1},{ 0, 1},{-1, -1},{ 0, -1},{-1, -1},{-1, -1},{ 0, 0}, + {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, + {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1}, +}; + +/* Command protocol state machine */ +typedef enum { + pmu_state_idle, /* Waiting for command */ + pmu_state_cmd, /* Receiving command */ + pmu_state_rsp, /* Responding to command */ +} PMUCmdState; + +/* MOS6522 PMU */ +struct MOS6522PMUState { + /*< private >*/ + MOS6522State parent_obj; +}; + +#define TYPE_MOS6522_PMU "mos6522-pmu" +OBJECT_DECLARE_SIMPLE_TYPE(MOS6522PMUState, MOS6522_PMU) +/** + * PMUState: + * @last_b: last value of B register + */ + +struct PMUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion mem; + uint64_t frequency; + qemu_irq via_irq; + bool via_irq_state; + + /* PMU state */ + MOS6522PMUState mos6522_pmu; + + /* PMU low level protocol state */ + PMUCmdState cmd_state; + uint8_t last_b; + uint8_t cmd; + uint32_t cmdlen; + uint32_t rsplen; + uint8_t cmd_buf_pos; + uint8_t cmd_buf[128]; + uint8_t cmd_rsp_pos; + uint8_t cmd_rsp_sz; + uint8_t cmd_rsp[128]; + + /* PMU events/interrupts */ + uint8_t intbits; + uint8_t intmask; + + /* ADB */ + bool has_adb; + ADBBusState adb_bus; + uint8_t adb_reply_size; + uint8_t adb_reply[ADB_MAX_OUT_LEN]; + + /* RTC */ + uint32_t tick_offset; + QEMUTimer *one_sec_timer; + int64_t one_sec_target; + + /* GPIO */ + MacIOGPIOState *gpio; +}; + +#define TYPE_VIA_PMU "via-pmu" +OBJECT_DECLARE_SIMPLE_TYPE(PMUState, VIA_PMU) + +#endif /* PMU_H */ diff --git a/include/hw/misc/max111x.h b/include/hw/misc/max111x.h new file mode 100644 index 000000000..606cf1e0a --- /dev/null +++ b/include/hw/misc/max111x.h @@ -0,0 +1,56 @@ +/* + * Maxim MAX1110/1111 ADC chip emulation. + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski + * + * This code is licensed under the GNU GPLv2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_MISC_MAX111X_H +#define HW_MISC_MAX111X_H + +#include "hw/ssi/ssi.h" +#include "qom/object.h" + +/* + * This is a model of the Maxim MAX1110/1111 ADC chip, which for QEMU + * is an SSI slave device. It has either 4 (max1110) or 8 (max1111) + * 8-bit ADC channels. + * + * QEMU interface: + * + GPIO inputs 0..3 (for max1110) or 0..7 (for max1111): set the value + * of each ADC input, as an unsigned 8-bit value + * + GPIO output 0: interrupt line + * + Properties "input0" to "input3" (max1110) or "input0" to "input7" + * (max1111): initial reset values for ADC inputs. + * + * Known bugs: + * + the interrupt line is not correctly implemented, and will never + * be lowered once it has been asserted. + */ +struct MAX111xState { + SSISlave parent_obj; + + qemu_irq interrupt; + /* Values of inputs at system reset (settable by QOM property) */ + uint8_t reset_input[8]; + + uint8_t tb1, rb2, rb3; + int cycle; + + uint8_t input[8]; + int inputs, com; +}; + +#define TYPE_MAX_111X "max111x" + +OBJECT_DECLARE_SIMPLE_TYPE(MAX111xState, MAX_111X) + +#define TYPE_MAX_1110 "max1110" +#define TYPE_MAX_1111 "max1111" + +#endif diff --git a/include/hw/misc/mchp_pfsoc_dmc.h b/include/hw/misc/mchp_pfsoc_dmc.h new file mode 100644 index 000000000..2baa1413b --- /dev/null +++ b/include/hw/misc/mchp_pfsoc_dmc.h @@ -0,0 +1,56 @@ +/* + * Microchip PolarFire SoC DDR Memory Controller module emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef MCHP_PFSOC_DMC_H +#define MCHP_PFSOC_DMC_H + +/* DDR SGMII PHY module */ + +#define MCHP_PFSOC_DDR_SGMII_PHY_REG_SIZE 0x1000 + +typedef struct MchpPfSoCDdrSgmiiPhyState { + SysBusDevice parent; + MemoryRegion sgmii_phy; +} MchpPfSoCDdrSgmiiPhyState; + +#define TYPE_MCHP_PFSOC_DDR_SGMII_PHY "mchp.pfsoc.ddr_sgmii_phy" + +#define MCHP_PFSOC_DDR_SGMII_PHY(obj) \ + OBJECT_CHECK(MchpPfSoCDdrSgmiiPhyState, (obj), \ + TYPE_MCHP_PFSOC_DDR_SGMII_PHY) + +/* DDR CFG module */ + +#define MCHP_PFSOC_DDR_CFG_REG_SIZE 0x40000 + +typedef struct MchpPfSoCDdrCfgState { + SysBusDevice parent; + MemoryRegion cfg; +} MchpPfSoCDdrCfgState; + +#define TYPE_MCHP_PFSOC_DDR_CFG "mchp.pfsoc.ddr_cfg" + +#define MCHP_PFSOC_DDR_CFG(obj) \ + OBJECT_CHECK(MchpPfSoCDdrCfgState, (obj), \ + TYPE_MCHP_PFSOC_DDR_CFG) + +#endif /* MCHP_PFSOC_DMC_H */ diff --git a/include/hw/misc/mchp_pfsoc_ioscb.h b/include/hw/misc/mchp_pfsoc_ioscb.h new file mode 100644 index 000000000..9235523e3 --- /dev/null +++ b/include/hw/misc/mchp_pfsoc_ioscb.h @@ -0,0 +1,50 @@ +/* + * Microchip PolarFire SoC IOSCB module emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef MCHP_PFSOC_IOSCB_H +#define MCHP_PFSOC_IOSCB_H + +typedef struct MchpPfSoCIoscbState { + SysBusDevice parent; + MemoryRegion container; + MemoryRegion lane01; + MemoryRegion lane23; + MemoryRegion ctrl; + MemoryRegion cfg; + MemoryRegion pll_mss; + MemoryRegion cfm_mss; + MemoryRegion pll_ddr; + MemoryRegion bc_ddr; + MemoryRegion io_calib_ddr; + MemoryRegion pll_sgmii; + MemoryRegion dll_sgmii; + MemoryRegion cfm_sgmii; + MemoryRegion bc_sgmii; + MemoryRegion io_calib_sgmii; +} MchpPfSoCIoscbState; + +#define TYPE_MCHP_PFSOC_IOSCB "mchp.pfsoc.ioscb" + +#define MCHP_PFSOC_IOSCB(obj) \ + OBJECT_CHECK(MchpPfSoCIoscbState, (obj), TYPE_MCHP_PFSOC_IOSCB) + +#endif /* MCHP_PFSOC_IOSCB_H */ diff --git a/include/hw/misc/mchp_pfsoc_sysreg.h b/include/hw/misc/mchp_pfsoc_sysreg.h new file mode 100644 index 000000000..546ba68f6 --- /dev/null +++ b/include/hw/misc/mchp_pfsoc_sysreg.h @@ -0,0 +1,39 @@ +/* + * Microchip PolarFire SoC SYSREG module emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef MCHP_PFSOC_SYSREG_H +#define MCHP_PFSOC_SYSREG_H + +#define MCHP_PFSOC_SYSREG_REG_SIZE 0x2000 + +typedef struct MchpPfSoCSysregState { + SysBusDevice parent; + MemoryRegion sysreg; +} MchpPfSoCSysregState; + +#define TYPE_MCHP_PFSOC_SYSREG "mchp.pfsoc.sysreg" + +#define MCHP_PFSOC_SYSREG(obj) \ + OBJECT_CHECK(MchpPfSoCSysregState, (obj), \ + TYPE_MCHP_PFSOC_SYSREG) + +#endif /* MCHP_PFSOC_SYSREG_H */ diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h new file mode 100644 index 000000000..9fa58942d --- /dev/null +++ b/include/hw/misc/mips_cmgcr.h @@ -0,0 +1,91 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2015 Imagination Technologies + * + */ + +#ifndef MIPS_CMGCR_H +#define MIPS_CMGCR_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_MIPS_GCR "mips-gcr" +OBJECT_DECLARE_SIMPLE_TYPE(MIPSGCRState, MIPS_GCR) + +#define GCR_BASE_ADDR 0x1fbf8000ULL +#define GCR_ADDRSPACE_SZ 0x8000 + +/* Offsets to register blocks */ +#define MIPS_GCB_OFS 0x0000 /* Global Control Block */ +#define MIPS_CLCB_OFS 0x2000 /* Core Local Control Block */ +#define MIPS_COCB_OFS 0x4000 /* Core Other Control Block */ +#define MIPS_GDB_OFS 0x6000 /* Global Debug Block */ + +/* Global Control Block Register Map */ +#define GCR_CONFIG_OFS 0x0000 +#define GCR_BASE_OFS 0x0008 +#define GCR_REV_OFS 0x0030 +#define GCR_GIC_BASE_OFS 0x0080 +#define GCR_CPC_BASE_OFS 0x0088 +#define GCR_GIC_STATUS_OFS 0x00D0 +#define GCR_CPC_STATUS_OFS 0x00F0 +#define GCR_L2_CONFIG_OFS 0x0130 + +/* Core Local and Core Other Block Register Map */ +#define GCR_CL_CONFIG_OFS 0x0010 +#define GCR_CL_OTHER_OFS 0x0018 +#define GCR_CL_RESETBASE_OFS 0x0020 + +/* GCR_L2_CONFIG register fields */ +#define GCR_L2_CONFIG_BYPASS_SHF 20 +#define GCR_L2_CONFIG_BYPASS_MSK ((0x1ULL) << GCR_L2_CONFIG_BYPASS_SHF) + +/* GCR_BASE register fields */ +#define GCR_BASE_GCRBASE_MSK 0xffffffff8000ULL + +/* GCR_GIC_BASE register fields */ +#define GCR_GIC_BASE_GICEN_MSK 1 +#define GCR_GIC_BASE_GICBASE_MSK 0xFFFFFFFE0000ULL +#define GCR_GIC_BASE_MSK (GCR_GIC_BASE_GICEN_MSK | GCR_GIC_BASE_GICBASE_MSK) + +/* GCR_CPC_BASE register fields */ +#define GCR_CPC_BASE_CPCEN_MSK 1 +#define GCR_CPC_BASE_CPCBASE_MSK 0xFFFFFFFF8000ULL +#define GCR_CPC_BASE_MSK (GCR_CPC_BASE_CPCEN_MSK | GCR_CPC_BASE_CPCBASE_MSK) + +/* GCR_CL_OTHER_OFS register fields */ +#define GCR_CL_OTHER_VPOTHER_MSK 0x7 +#define GCR_CL_OTHER_MSK GCR_CL_OTHER_VPOTHER_MSK + +/* GCR_CL_RESETBASE_OFS register fields */ +#define GCR_CL_RESET_BASE_RESETBASE_MSK 0xFFFFF000U +#define GCR_CL_RESET_BASE_MSK GCR_CL_RESET_BASE_RESETBASE_MSK + +typedef struct MIPSGCRVPState MIPSGCRVPState; +struct MIPSGCRVPState { + uint32_t other; + uint64_t reset_base; +}; + +struct MIPSGCRState { + SysBusDevice parent_obj; + + int32_t gcr_rev; + int32_t num_vps; + hwaddr gcr_base; + MemoryRegion iomem; + MemoryRegion *cpc_mr; + MemoryRegion *gic_mr; + + uint64_t cpc_base; + uint64_t gic_base; + + /* VP Local/Other Registers */ + MIPSGCRVPState *vps; +}; + +#endif /* MIPS_CMGCR_H */ diff --git a/include/hw/misc/mips_cpc.h b/include/hw/misc/mips_cpc.h new file mode 100644 index 000000000..fcafbd5e0 --- /dev/null +++ b/include/hw/misc/mips_cpc.h @@ -0,0 +1,50 @@ +/* + * Cluster Power Controller emulation + * + * Copyright (c) 2016 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef MIPS_CPC_H +#define MIPS_CPC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define CPC_ADDRSPACE_SZ 0x6000 + +/* CPC blocks offsets relative to base address */ +#define CPC_CL_BASE_OFS 0x2000 +#define CPC_CO_BASE_OFS 0x4000 + +/* CPC register offsets relative to block offsets */ +#define CPC_VP_STOP_OFS 0x20 +#define CPC_VP_RUN_OFS 0x28 +#define CPC_VP_RUNNING_OFS 0x30 + +#define TYPE_MIPS_CPC "mips-cpc" +OBJECT_DECLARE_SIMPLE_TYPE(MIPSCPCState, MIPS_CPC) + +struct MIPSCPCState { + SysBusDevice parent_obj; + + uint32_t num_vp; + uint64_t vp_start_running; /* VPs running from restart */ + + MemoryRegion mr; + uint64_t vp_running; /* Indicates which VPs are in the run state */ +}; + +#endif /* MIPS_CPC_H */ diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h new file mode 100644 index 000000000..50d961106 --- /dev/null +++ b/include/hw/misc/mips_itu.h @@ -0,0 +1,83 @@ +/* + * Inter-Thread Communication Unit emulation. + * + * Copyright (c) 2016 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef MIPS_ITU_H +#define MIPS_ITU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_MIPS_ITU "mips-itu" +OBJECT_DECLARE_SIMPLE_TYPE(MIPSITUState, MIPS_ITU) + +#define ITC_CELL_DEPTH_SHIFT 2 +#define ITC_CELL_DEPTH (1u << ITC_CELL_DEPTH_SHIFT) + +typedef struct ITCStorageCell { + struct { + uint8_t FIFODepth; /* Log2 of the cell depth */ + uint8_t FIFOPtr; /* Number of elements in a FIFO cell */ + uint8_t FIFO; /* 1 - FIFO cell, 0 - Semaphore cell */ + uint8_t T; /* Trap Bit */ + uint8_t F; /* Full Bit */ + uint8_t E; /* Empty Bit */ + } tag; + + /* Index of the oldest element in the queue */ + uint8_t fifo_out; + + /* Circular buffer for FIFO. Semaphore cells use index 0 only */ + uint64_t data[ITC_CELL_DEPTH]; + + /* Bitmap tracking blocked threads on the cell. + TODO: support >64 threads ? */ + uint64_t blocked_threads; +} ITCStorageCell; + +#define ITC_ADDRESSMAP_NUM 2 + +struct MIPSITUState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + int32_t num_fifo; + int32_t num_semaphores; + + /* ITC Storage */ + ITCStorageCell *cell; + MemoryRegion storage_io; + + /* ITC Configuration Tags */ + uint64_t ITCAddressMap[ITC_ADDRESSMAP_NUM]; + MemoryRegion tag_io; + + /* ITU Control Register */ + uint64_t icr0; + + /* SAAR */ + bool saar_present; + void *saar; + +}; + +/* Get ITC Configuration Tag memory region. */ +MemoryRegion *mips_itu_get_tag_region(MIPSITUState *itu); + +#endif /* MIPS_ITU_H */ diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h new file mode 100644 index 000000000..fc95d22b0 --- /dev/null +++ b/include/hw/misc/mos6522.h @@ -0,0 +1,150 @@ +/* + * QEMU MOS6522 VIA emulation + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * Copyright (c) 2018 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MOS6522_H +#define MOS6522_H + +#include "exec/memory.h" +#include "hw/sysbus.h" +#include "hw/input/adb.h" +#include "qom/object.h" + +/* Bits in ACR */ +#define SR_CTRL 0x1c /* Shift register control bits */ +#define SR_EXT 0x0c /* Shift on external clock */ +#define SR_OUT 0x10 /* Shift out if 1 */ + +/* Bits in IFR and IER */ +#define IER_SET 0x80 /* set bits in IER */ +#define IER_CLR 0 /* clear bits in IER */ + +#define CA2_INT 0x01 +#define CA1_INT 0x02 +#define SR_INT 0x04 /* Shift register full/empty */ +#define CB2_INT 0x08 +#define CB1_INT 0x10 +#define T2_INT 0x20 /* Timer 2 interrupt */ +#define T1_INT 0x40 /* Timer 1 interrupt */ + +/* Bits in ACR */ +#define T1MODE 0xc0 /* Timer 1 mode */ +#define T1MODE_CONT 0x40 /* continuous interrupts */ + +/* VIA registers */ +#define VIA_REG_B 0x00 +#define VIA_REG_A 0x01 +#define VIA_REG_DIRB 0x02 +#define VIA_REG_DIRA 0x03 +#define VIA_REG_T1CL 0x04 +#define VIA_REG_T1CH 0x05 +#define VIA_REG_T1LL 0x06 +#define VIA_REG_T1LH 0x07 +#define VIA_REG_T2CL 0x08 +#define VIA_REG_T2CH 0x09 +#define VIA_REG_SR 0x0a +#define VIA_REG_ACR 0x0b +#define VIA_REG_PCR 0x0c +#define VIA_REG_IFR 0x0d +#define VIA_REG_IER 0x0e +#define VIA_REG_ANH 0x0f + +/** + * MOS6522Timer: + * @counter_value: counter value at load time + */ +typedef struct MOS6522Timer { + int index; + uint16_t latch; + uint16_t counter_value; + int64_t load_time; + int64_t next_irq_time; + uint64_t frequency; + QEMUTimer *timer; +} MOS6522Timer; + +/** + * MOS6522State: + * @b: B-side data + * @a: A-side data + * @dirb: B-side direction (1=output) + * @dira: A-side direction (1=output) + * @sr: Shift register + * @acr: Auxiliary control register + * @pcr: Peripheral control register + * @ifr: Interrupt flag register + * @ier: Interrupt enable register + * @anh: A-side data, no handshake + * @last_b: last value of B register + * @last_acr: last value of ACR register + */ +struct MOS6522State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion mem; + /* VIA registers */ + uint8_t b; + uint8_t a; + uint8_t dirb; + uint8_t dira; + uint8_t sr; + uint8_t acr; + uint8_t pcr; + uint8_t ifr; + uint8_t ier; + + MOS6522Timer timers[2]; + uint64_t frequency; + + qemu_irq irq; +}; + +#define TYPE_MOS6522 "mos6522" +OBJECT_DECLARE_TYPE(MOS6522State, MOS6522DeviceClass, MOS6522) + +struct MOS6522DeviceClass { + DeviceClass parent_class; + + DeviceReset parent_reset; + void (*set_sr_int)(MOS6522State *dev); + void (*portB_write)(MOS6522State *dev); + void (*portA_write)(MOS6522State *dev); + void (*update_irq)(MOS6522State *dev); + /* These are used to influence the CUDA MacOS timebase calibration */ + uint64_t (*get_timer1_counter_value)(MOS6522State *dev, MOS6522Timer *ti); + uint64_t (*get_timer2_counter_value)(MOS6522State *dev, MOS6522Timer *ti); + uint64_t (*get_timer1_load_time)(MOS6522State *dev, MOS6522Timer *ti); + uint64_t (*get_timer2_load_time)(MOS6522State *dev, MOS6522Timer *ti); +}; + + +extern const VMStateDescription vmstate_mos6522; + +uint64_t mos6522_read(void *opaque, hwaddr addr, unsigned size); +void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size); + +#endif /* MOS6522_H */ diff --git a/include/hw/misc/mps2-fpgaio.h b/include/hw/misc/mps2-fpgaio.h new file mode 100644 index 000000000..a010fdb2b --- /dev/null +++ b/include/hw/misc/mps2-fpgaio.h @@ -0,0 +1,56 @@ +/* + * ARM MPS2 FPGAIO emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the FPGAIO register block in the AN505 + * FPGA image for the MPS2 dev board; it is documented in the + * application note: + * http://infocenter.arm.com/help/topic/com.arm.doc.dai0505b/index.html + * + * QEMU interface: + * + sysbus MMIO region 0: the register bank + */ + +#ifndef MPS2_FPGAIO_H +#define MPS2_FPGAIO_H + +#include "hw/sysbus.h" +#include "hw/misc/led.h" +#include "qom/object.h" + +#define TYPE_MPS2_FPGAIO "mps2-fpgaio" +OBJECT_DECLARE_SIMPLE_TYPE(MPS2FPGAIO, MPS2_FPGAIO) + +struct MPS2FPGAIO { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + LEDState *led[2]; + + uint32_t led0; + uint32_t prescale; + uint32_t misc; + + /* QEMU_CLOCK_VIRTUAL time at which counter and pscntr were last synced */ + int64_t pscntr_sync_ticks; + /* Values of COUNTER and PSCNTR at time pscntr_sync_ticks */ + uint32_t counter; + uint32_t pscntr; + + uint32_t prescale_clk; + + /* These hold the CLOCK_VIRTUAL ns tick when the CLK1HZ/CLK100HZ was zero */ + int64_t clk1hz_tick_offset; + int64_t clk100hz_tick_offset; +}; + +#endif diff --git a/include/hw/misc/mps2-scc.h b/include/hw/misc/mps2-scc.h new file mode 100644 index 000000000..f65d87320 --- /dev/null +++ b/include/hw/misc/mps2-scc.h @@ -0,0 +1,46 @@ +/* + * ARM MPS2 SCC emulation + * + * Copyright (c) 2017 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#ifndef MPS2_SCC_H +#define MPS2_SCC_H + +#include "hw/sysbus.h" +#include "hw/misc/led.h" +#include "qom/object.h" + +#define TYPE_MPS2_SCC "mps2-scc" +OBJECT_DECLARE_SIMPLE_TYPE(MPS2SCC, MPS2_SCC) + +#define NUM_OSCCLK 3 + +struct MPS2SCC { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + LEDState *led[8]; + + uint32_t cfg0; + uint32_t cfg1; + uint32_t cfg4; + uint32_t cfgdata_rtn; + uint32_t cfgdata_out; + uint32_t cfgctrl; + uint32_t cfgstat; + uint32_t dll; + uint32_t aid; + uint32_t id; + uint32_t oscclk[NUM_OSCCLK]; + uint32_t oscclk_reset[NUM_OSCCLK]; +}; + +#endif diff --git a/include/hw/misc/msf2-sysreg.h b/include/hw/misc/msf2-sysreg.h new file mode 100644 index 000000000..fc1890e71 --- /dev/null +++ b/include/hw/misc/msf2-sysreg.h @@ -0,0 +1,78 @@ +/* + * Microsemi SmartFusion2 SYSREG + * + * Copyright (c) 2017 Subbaraya Sundeep + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MSF2_SYSREG_H +#define HW_MSF2_SYSREG_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +enum { + ESRAM_CR = 0x00 / 4, + ESRAM_MAX_LAT, + DDR_CR, + ENVM_CR, + ENVM_REMAP_BASE_CR, + ENVM_REMAP_FAB_CR, + CC_CR, + CC_REGION_CR, + CC_LOCK_BASE_ADDR_CR, + CC_FLUSH_INDX_CR, + DDRB_BUF_TIMER_CR, + DDRB_NB_ADDR_CR, + DDRB_NB_SIZE_CR, + DDRB_CR, + + SOFT_RESET_CR = 0x48 / 4, + M3_CR, + + GPIO_SYSRESET_SEL_CR = 0x58 / 4, + + MDDR_CR = 0x60 / 4, + + MSSDDR_PLL_STATUS_LOW_CR = 0x90 / 4, + MSSDDR_PLL_STATUS_HIGH_CR, + MSSDDR_FACC1_CR, + MSSDDR_FACC2_CR, + + MSSDDR_PLL_STATUS = 0x150 / 4, +}; + +#define MSF2_SYSREG_MMIO_SIZE 0x300 + +#define TYPE_MSF2_SYSREG "msf2-sysreg" +OBJECT_DECLARE_SIMPLE_TYPE(MSF2SysregState, MSF2_SYSREG) + +struct MSF2SysregState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + + uint8_t apb0div; + uint8_t apb1div; + + uint32_t regs[MSF2_SYSREG_MMIO_SIZE / 4]; +}; + +#endif /* HW_MSF2_SYSREG_H */ diff --git a/include/hw/misc/npcm7xx_clk.h b/include/hw/misc/npcm7xx_clk.h new file mode 100644 index 000000000..2338fbbdb --- /dev/null +++ b/include/hw/misc/npcm7xx_clk.h @@ -0,0 +1,50 @@ +/* + * Nuvoton NPCM7xx Clock Control Registers. + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_CLK_H +#define NPCM7XX_CLK_H + +#include "exec/memory.h" +#include "hw/sysbus.h" + +/* + * The reference clock frequency for the timer modules, and the SECCNT and + * CNTR25M registers in this module, is always 25 MHz. + */ +#define NPCM7XX_TIMER_REF_HZ (25000000) + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_CLK_NR_REGS (0x70 / sizeof(uint32_t)) + +#define NPCM7XX_WATCHDOG_RESET_GPIO_IN "npcm7xx-clk-watchdog-reset-gpio-in" + +typedef struct NPCM7xxCLKState { + SysBusDevice parent; + + MemoryRegion iomem; + + uint32_t regs[NPCM7XX_CLK_NR_REGS]; + + /* Time reference for SECCNT and CNTR25M, initialized by power on reset */ + int64_t ref_ns; +} NPCM7xxCLKState; + +#define TYPE_NPCM7XX_CLK "npcm7xx-clk" +#define NPCM7XX_CLK(obj) OBJECT_CHECK(NPCM7xxCLKState, (obj), TYPE_NPCM7XX_CLK) + +#endif /* NPCM7XX_CLK_H */ diff --git a/include/hw/misc/npcm7xx_gcr.h b/include/hw/misc/npcm7xx_gcr.h new file mode 100644 index 000000000..13109d9d3 --- /dev/null +++ b/include/hw/misc/npcm7xx_gcr.h @@ -0,0 +1,43 @@ +/* + * Nuvoton NPCM7xx System Global Control Registers. + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_GCR_H +#define NPCM7XX_GCR_H + +#include "exec/memory.h" +#include "hw/sysbus.h" + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_GCR_NR_REGS (0x148 / sizeof(uint32_t)) + +typedef struct NPCM7xxGCRState { + SysBusDevice parent; + + MemoryRegion iomem; + + uint32_t regs[NPCM7XX_GCR_NR_REGS]; + + uint32_t reset_pwron; + uint32_t reset_mdlr; + uint32_t reset_intcr3; +} NPCM7xxGCRState; + +#define TYPE_NPCM7XX_GCR "npcm7xx-gcr" +#define NPCM7XX_GCR(obj) OBJECT_CHECK(NPCM7xxGCRState, (obj), TYPE_NPCM7XX_GCR) + +#endif /* NPCM7XX_GCR_H */ diff --git a/include/hw/misc/npcm7xx_rng.h b/include/hw/misc/npcm7xx_rng.h new file mode 100644 index 000000000..5e85fd439 --- /dev/null +++ b/include/hw/misc/npcm7xx_rng.h @@ -0,0 +1,34 @@ +/* + * Nuvoton NPCM7xx Random Number Generator. + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_RNG_H +#define NPCM7XX_RNG_H + +#include "hw/sysbus.h" + +typedef struct NPCM7xxRNGState { + SysBusDevice parent; + + MemoryRegion iomem; + + uint8_t rngcs; + uint8_t rngd; + uint8_t rngmode; +} NPCM7xxRNGState; + +#define TYPE_NPCM7XX_RNG "npcm7xx-rng" +#define NPCM7XX_RNG(obj) OBJECT_CHECK(NPCM7xxRNGState, (obj), TYPE_NPCM7XX_RNG) + +#endif /* NPCM7XX_RNG_H */ diff --git a/include/hw/misc/nrf51_rng.h b/include/hw/misc/nrf51_rng.h new file mode 100644 index 000000000..9aff9a76f --- /dev/null +++ b/include/hw/misc/nrf51_rng.h @@ -0,0 +1,85 @@ +/* + * nRF51 Random Number Generator + * + * QEMU interface: + * + Property "period_unfiltered_us": Time between two biased values in + * microseconds. + * + Property "period_filtered_us": Time between two unbiased values in + * microseconds. + * + sysbus MMIO regions 0: Memory Region with tasks, events and registers + * to be mapped to the peripherals instance address by the SOC. + * + Named GPIO output "irq": Interrupt line of the peripheral. Must be + * connected to the associated peripheral interrupt line of the NVIC. + * + Named GPIO output "eep_valrdy": Event set when new random value is ready + * to be read. + * + Named GPIO input "tep_start": Task that triggers start of continuous + * generation of random values. + * + Named GPIO input "tep_stop": Task that ends continuous generation of + * random values. + * + * Accuracy of the peripheral model: + * + Stochastic properties of different configurations of the random source + * are not modeled. + * + Generation of unfiltered and filtered random values take at least the + * average generation time stated in the production specification; + * non-deterministic generation times are not modeled. + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef NRF51_RNG_H +#define NRF51_RNG_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" +#define TYPE_NRF51_RNG "nrf51_soc.rng" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51RNGState, NRF51_RNG) + +#define NRF51_RNG_SIZE 0x1000 + +#define NRF51_RNG_TASK_START 0x000 +#define NRF51_RNG_TASK_STOP 0x004 +#define NRF51_RNG_EVENT_VALRDY 0x100 +#define NRF51_RNG_REG_SHORTS 0x200 +#define NRF51_RNG_REG_SHORTS_VALRDY_STOP 0 +#define NRF51_RNG_REG_INTEN 0x300 +#define NRF51_RNG_REG_INTEN_VALRDY 0 +#define NRF51_RNG_REG_INTENSET 0x304 +#define NRF51_RNG_REG_INTENCLR 0x308 +#define NRF51_RNG_REG_CONFIG 0x504 +#define NRF51_RNG_REG_CONFIG_DECEN 0 +#define NRF51_RNG_REG_VALUE 0x508 + +struct NRF51RNGState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + qemu_irq irq; + + /* Event End Points */ + qemu_irq eep_valrdy; + + QEMUTimer timer; + + /* Time between generation of successive unfiltered values in us */ + uint16_t period_unfiltered_us; + /* Time between generation of successive filtered values in us */ + uint16_t period_filtered_us; + + uint8_t value; + + uint32_t active; + uint32_t event_valrdy; + uint32_t shortcut_stop_on_valrdy; + uint32_t interrupt_enabled; + uint32_t filter_enabled; + +}; + + +#endif /* NRF51_RNG_H */ diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h new file mode 100644 index 000000000..b6f4e264f --- /dev/null +++ b/include/hw/misc/pca9552.h @@ -0,0 +1,37 @@ +/* + * PCA9552 I2C LED blinker + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#ifndef PCA9552_H +#define PCA9552_H + +#include "hw/i2c/i2c.h" +#include "qom/object.h" + +#define TYPE_PCA9552 "pca9552" +#define TYPE_PCA955X "pca955x" +typedef struct PCA955xState PCA955xState; +DECLARE_INSTANCE_CHECKER(PCA955xState, PCA955X, + TYPE_PCA955X) + +#define PCA955X_NR_REGS 10 +#define PCA955X_PIN_COUNT_MAX 16 + +struct PCA955xState { + /*< private >*/ + I2CSlave i2c; + /*< public >*/ + + uint8_t len; + uint8_t pointer; + + uint8_t regs[PCA955X_NR_REGS]; + qemu_irq gpio[PCA955X_PIN_COUNT_MAX]; + char *description; /* For debugging purpose only */ +}; + +#endif diff --git a/include/hw/misc/pca9552_regs.h b/include/hw/misc/pca9552_regs.h new file mode 100644 index 000000000..d8051cfbd --- /dev/null +++ b/include/hw/misc/pca9552_regs.h @@ -0,0 +1,32 @@ +/* + * PCA9552 I2C LED blinker registers + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#ifndef PCA9552_REGS_H +#define PCA9552_REGS_H + +/* + * Bits [0:3] are used to address a specific register. + */ +#define PCA9552_INPUT0 0 /* read only input register 0 */ +#define PCA9552_INPUT1 1 /* read only input register 1 */ +#define PCA9552_PSC0 2 /* read/write frequency prescaler 0 */ +#define PCA9552_PWM0 3 /* read/write PWM register 0 */ +#define PCA9552_PSC1 4 /* read/write frequency prescaler 1 */ +#define PCA9552_PWM1 5 /* read/write PWM register 1 */ +#define PCA9552_LS0 6 /* read/write LED0 to LED3 selector */ +#define PCA9552_LS1 7 /* read/write LED4 to LED7 selector */ +#define PCA9552_LS2 8 /* read/write LED8 to LED11 selector */ +#define PCA9552_LS3 9 /* read/write LED12 to LED15 selector */ + +/* + * Bit [4] is used to activate the Auto-Increment option of the + * register address + */ +#define PCA9552_AUTOINC (1 << 4) + +#endif diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h new file mode 100644 index 000000000..ae0c8188c --- /dev/null +++ b/include/hw/misc/pvpanic.h @@ -0,0 +1,33 @@ +/* + * QEMU simulated pvpanic device. + * + * Copyright Fujitsu, Corp. 2013 + * + * Authors: + * Wen Congyang + * Hu Tao + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_MISC_PVPANIC_H +#define HW_MISC_PVPANIC_H + +#include "qom/object.h" + +#define TYPE_PVPANIC "pvpanic" + +#define PVPANIC_IOPORT_PROP "ioport" + +static inline uint16_t pvpanic_port(void) +{ + Object *o = object_resolve_path_type("", TYPE_PVPANIC, NULL); + if (!o) { + return 0; + } + return object_property_get_uint(o, PVPANIC_IOPORT_PROP, NULL); +} + +#endif diff --git a/include/hw/misc/sifive_e_prci.h b/include/hw/misc/sifive_e_prci.h new file mode 100644 index 000000000..262ca1618 --- /dev/null +++ b/include/hw/misc/sifive_e_prci.h @@ -0,0 +1,73 @@ +/* + * QEMU SiFive E PRCI (Power, Reset, Clock, Interrupt) interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_E_PRCI_H +#define HW_SIFIVE_E_PRCI_H +#include "qom/object.h" + +enum { + SIFIVE_E_PRCI_HFROSCCFG = 0x0, + SIFIVE_E_PRCI_HFXOSCCFG = 0x4, + SIFIVE_E_PRCI_PLLCFG = 0x8, + SIFIVE_E_PRCI_PLLOUTDIV = 0xC +}; + +enum { + SIFIVE_E_PRCI_HFROSCCFG_RDY = (1 << 31), + SIFIVE_E_PRCI_HFROSCCFG_EN = (1 << 30) +}; + +enum { + SIFIVE_E_PRCI_HFXOSCCFG_RDY = (1 << 31), + SIFIVE_E_PRCI_HFXOSCCFG_EN = (1 << 30) +}; + +enum { + SIFIVE_E_PRCI_PLLCFG_PLLSEL = (1 << 16), + SIFIVE_E_PRCI_PLLCFG_REFSEL = (1 << 17), + SIFIVE_E_PRCI_PLLCFG_BYPASS = (1 << 18), + SIFIVE_E_PRCI_PLLCFG_LOCK = (1 << 31) +}; + +enum { + SIFIVE_E_PRCI_PLLOUTDIV_DIV1 = (1 << 8) +}; + +#define SIFIVE_E_PRCI_REG_SIZE 0x1000 + +#define TYPE_SIFIVE_E_PRCI "riscv.sifive.e.prci" + +typedef struct SiFiveEPRCIState SiFiveEPRCIState; +DECLARE_INSTANCE_CHECKER(SiFiveEPRCIState, SIFIVE_E_PRCI, + TYPE_SIFIVE_E_PRCI) + +struct SiFiveEPRCIState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t hfrosccfg; + uint32_t hfxosccfg; + uint32_t pllcfg; + uint32_t plloutdiv; +}; + +DeviceState *sifive_e_prci_create(hwaddr addr); + +#endif diff --git a/include/hw/misc/sifive_test.h b/include/hw/misc/sifive_test.h new file mode 100644 index 000000000..88a38d00c --- /dev/null +++ b/include/hw/misc/sifive_test.h @@ -0,0 +1,47 @@ +/* + * QEMU Test Finisher interface + * + * Copyright (c) 2018 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_TEST_H +#define HW_SIFIVE_TEST_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_SIFIVE_TEST "riscv.sifive.test" + +typedef struct SiFiveTestState SiFiveTestState; +DECLARE_INSTANCE_CHECKER(SiFiveTestState, SIFIVE_TEST, + TYPE_SIFIVE_TEST) + +struct SiFiveTestState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; +}; + +enum { + FINISHER_FAIL = 0x3333, + FINISHER_PASS = 0x5555, + FINISHER_RESET = 0x7777 +}; + +DeviceState *sifive_test_create(hwaddr addr); + +#endif diff --git a/include/hw/misc/sifive_u_otp.h b/include/hw/misc/sifive_u_otp.h new file mode 100644 index 000000000..5d0d7df45 --- /dev/null +++ b/include/hw/misc/sifive_u_otp.h @@ -0,0 +1,87 @@ +/* + * QEMU SiFive U OTP (One-Time Programmable) Memory interface + * + * Copyright (c) 2019 Bin Meng + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_U_OTP_H +#define HW_SIFIVE_U_OTP_H +#include "qom/object.h" + +#define SIFIVE_U_OTP_PA 0x00 +#define SIFIVE_U_OTP_PAIO 0x04 +#define SIFIVE_U_OTP_PAS 0x08 +#define SIFIVE_U_OTP_PCE 0x0C +#define SIFIVE_U_OTP_PCLK 0x10 +#define SIFIVE_U_OTP_PDIN 0x14 +#define SIFIVE_U_OTP_PDOUT 0x18 +#define SIFIVE_U_OTP_PDSTB 0x1C +#define SIFIVE_U_OTP_PPROG 0x20 +#define SIFIVE_U_OTP_PTC 0x24 +#define SIFIVE_U_OTP_PTM 0x28 +#define SIFIVE_U_OTP_PTM_REP 0x2C +#define SIFIVE_U_OTP_PTR 0x30 +#define SIFIVE_U_OTP_PTRIM 0x34 +#define SIFIVE_U_OTP_PWE 0x38 + +#define SIFIVE_U_OTP_PWE_EN (1 << 0) + +#define SIFIVE_U_OTP_PCE_EN (1 << 0) + +#define SIFIVE_U_OTP_PDSTB_EN (1 << 0) + +#define SIFIVE_U_OTP_PTRIM_EN (1 << 0) + +#define SIFIVE_U_OTP_PA_MASK 0xfff +#define SIFIVE_U_OTP_NUM_FUSES 0x1000 +#define SIFIVE_U_OTP_FUSE_WORD 4 +#define SIFIVE_U_OTP_SERIAL_ADDR 0xfc + +#define SIFIVE_U_OTP_REG_SIZE 0x1000 + +#define TYPE_SIFIVE_U_OTP "riscv.sifive.u.otp" + +typedef struct SiFiveUOTPState SiFiveUOTPState; +DECLARE_INSTANCE_CHECKER(SiFiveUOTPState, SIFIVE_U_OTP, + TYPE_SIFIVE_U_OTP) + +struct SiFiveUOTPState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t pa; + uint32_t paio; + uint32_t pas; + uint32_t pce; + uint32_t pclk; + uint32_t pdin; + uint32_t pdstb; + uint32_t pprog; + uint32_t ptc; + uint32_t ptm; + uint32_t ptm_rep; + uint32_t ptr; + uint32_t ptrim; + uint32_t pwe; + uint32_t fuse[SIFIVE_U_OTP_NUM_FUSES]; + uint32_t fuse_wo[SIFIVE_U_OTP_NUM_FUSES]; + /* config */ + uint32_t serial; + BlockBackend *blk; +}; + +#endif /* HW_SIFIVE_U_OTP_H */ diff --git a/include/hw/misc/sifive_u_prci.h b/include/hw/misc/sifive_u_prci.h new file mode 100644 index 000000000..d9ebf40b7 --- /dev/null +++ b/include/hw/misc/sifive_u_prci.h @@ -0,0 +1,93 @@ +/* + * QEMU SiFive U PRCI (Power, Reset, Clock, Interrupt) interface + * + * Copyright (c) 2019 Bin Meng + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_U_PRCI_H +#define HW_SIFIVE_U_PRCI_H +#include "qom/object.h" + +#define SIFIVE_U_PRCI_HFXOSCCFG 0x00 +#define SIFIVE_U_PRCI_COREPLLCFG0 0x04 +#define SIFIVE_U_PRCI_DDRPLLCFG0 0x0C +#define SIFIVE_U_PRCI_DDRPLLCFG1 0x10 +#define SIFIVE_U_PRCI_GEMGXLPLLCFG0 0x1C +#define SIFIVE_U_PRCI_GEMGXLPLLCFG1 0x20 +#define SIFIVE_U_PRCI_CORECLKSEL 0x24 +#define SIFIVE_U_PRCI_DEVICESRESET 0x28 +#define SIFIVE_U_PRCI_CLKMUXSTATUS 0x2C + +/* + * Current FU540-C000 manual says ready bit is at bit 29, but + * freedom-u540-c000-bootloader codes (ux00prci.h) says it is at bit 31. + * We have to trust the actual code that works. + * + * see https://github.com/sifive/freedom-u540-c000-bootloader + */ + +#define SIFIVE_U_PRCI_HFXOSCCFG_EN (1 << 30) +#define SIFIVE_U_PRCI_HFXOSCCFG_RDY (1 << 31) + +/* xxxPLLCFG0 register bits */ +#define SIFIVE_U_PRCI_PLLCFG0_DIVR (1 << 0) +#define SIFIVE_U_PRCI_PLLCFG0_DIVF (31 << 6) +#define SIFIVE_U_PRCI_PLLCFG0_DIVQ (3 << 15) +#define SIFIVE_U_PRCI_PLLCFG0_FSE (1 << 25) +#define SIFIVE_U_PRCI_PLLCFG0_LOCK (1 << 31) + +/* xxxPLLCFG1 register bits */ +#define SIFIVE_U_PRCI_PLLCFG1_CKE (1 << 24) + +/* coreclksel register bits */ +#define SIFIVE_U_PRCI_CORECLKSEL_HFCLK (1 << 0) + + +#define SIFIVE_U_PRCI_REG_SIZE 0x1000 + +#define TYPE_SIFIVE_U_PRCI "riscv.sifive.u.prci" + +typedef struct SiFiveUPRCIState SiFiveUPRCIState; +DECLARE_INSTANCE_CHECKER(SiFiveUPRCIState, SIFIVE_U_PRCI, + TYPE_SIFIVE_U_PRCI) + +struct SiFiveUPRCIState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t hfxosccfg; + uint32_t corepllcfg0; + uint32_t ddrpllcfg0; + uint32_t ddrpllcfg1; + uint32_t gemgxlpllcfg0; + uint32_t gemgxlpllcfg1; + uint32_t coreclksel; + uint32_t devicesreset; + uint32_t clkmuxstatus; +}; + +/* + * Clock indexes for use by Device Tree data and the PRCI driver. + * + * These values are from sifive-fu540-prci.h in the Linux kernel. + */ +#define PRCI_CLK_COREPLL 0 +#define PRCI_CLK_DDRPLL 1 +#define PRCI_CLK_GEMGXLPLL 2 +#define PRCI_CLK_TLCLK 3 + +#endif /* HW_SIFIVE_U_PRCI_H */ diff --git a/include/hw/misc/stm32f2xx_syscfg.h b/include/hw/misc/stm32f2xx_syscfg.h new file mode 100644 index 000000000..8595a3b31 --- /dev/null +++ b/include/hw/misc/stm32f2xx_syscfg.h @@ -0,0 +1,58 @@ +/* + * STM32F2XX SYSCFG + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM32F2XX_SYSCFG_H +#define HW_STM32F2XX_SYSCFG_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define SYSCFG_MEMRMP 0x00 +#define SYSCFG_PMC 0x04 +#define SYSCFG_EXTICR1 0x08 +#define SYSCFG_EXTICR2 0x0C +#define SYSCFG_EXTICR3 0x10 +#define SYSCFG_EXTICR4 0x14 +#define SYSCFG_CMPCR 0x20 + +#define TYPE_STM32F2XX_SYSCFG "stm32f2xx-syscfg" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXSyscfgState, STM32F2XX_SYSCFG) + +struct STM32F2XXSyscfgState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t syscfg_memrmp; + uint32_t syscfg_pmc; + uint32_t syscfg_exticr1; + uint32_t syscfg_exticr2; + uint32_t syscfg_exticr3; + uint32_t syscfg_exticr4; + uint32_t syscfg_cmpcr; +}; + +#endif /* HW_STM32F2XX_SYSCFG_H */ diff --git a/include/hw/misc/stm32f4xx_exti.h b/include/hw/misc/stm32f4xx_exti.h new file mode 100644 index 000000000..24b6fa772 --- /dev/null +++ b/include/hw/misc/stm32f4xx_exti.h @@ -0,0 +1,60 @@ +/* + * STM32F4XX EXTI + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM_EXTI_H +#define HW_STM_EXTI_H + +#include "hw/sysbus.h" +#include "hw/hw.h" +#include "qom/object.h" + +#define EXTI_IMR 0x00 +#define EXTI_EMR 0x04 +#define EXTI_RTSR 0x08 +#define EXTI_FTSR 0x0C +#define EXTI_SWIER 0x10 +#define EXTI_PR 0x14 + +#define TYPE_STM32F4XX_EXTI "stm32f4xx-exti" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F4xxExtiState, STM32F4XX_EXTI) + +#define NUM_GPIO_EVENT_IN_LINES 16 +#define NUM_INTERRUPT_OUT_LINES 16 + +struct STM32F4xxExtiState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + + uint32_t exti_imr; + uint32_t exti_emr; + uint32_t exti_rtsr; + uint32_t exti_ftsr; + uint32_t exti_swier; + uint32_t exti_pr; + + qemu_irq irq[NUM_INTERRUPT_OUT_LINES]; +}; + +#endif diff --git a/include/hw/misc/stm32f4xx_syscfg.h b/include/hw/misc/stm32f4xx_syscfg.h new file mode 100644 index 000000000..8c31feccd --- /dev/null +++ b/include/hw/misc/stm32f4xx_syscfg.h @@ -0,0 +1,61 @@ +/* + * STM32F4xx SYSCFG + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM_SYSCFG_H +#define HW_STM_SYSCFG_H + +#include "hw/sysbus.h" +#include "hw/hw.h" +#include "qom/object.h" + +#define SYSCFG_MEMRMP 0x00 +#define SYSCFG_PMC 0x04 +#define SYSCFG_EXTICR1 0x08 +#define SYSCFG_EXTICR2 0x0C +#define SYSCFG_EXTICR3 0x10 +#define SYSCFG_EXTICR4 0x14 +#define SYSCFG_CMPCR 0x20 + +#define TYPE_STM32F4XX_SYSCFG "stm32f4xx-syscfg" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F4xxSyscfgState, STM32F4XX_SYSCFG) + +#define SYSCFG_NUM_EXTICR 4 + +struct STM32F4xxSyscfgState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t syscfg_memrmp; + uint32_t syscfg_pmc; + uint32_t syscfg_exticr[SYSCFG_NUM_EXTICR]; + uint32_t syscfg_cmpcr; + + qemu_irq irq; + qemu_irq gpio_out[16]; +}; + +#endif diff --git a/include/hw/misc/tmp105_regs.h b/include/hw/misc/tmp105_regs.h new file mode 100644 index 000000000..ef015ee5c --- /dev/null +++ b/include/hw/misc/tmp105_regs.h @@ -0,0 +1,51 @@ +/* + * Texas Instruments TMP105 Temperature Sensor I2C messages + * + * Browse the data sheet: + * + * http://www.ti.com/lit/gpn/tmp105 + * + * Copyright (C) 2012 Alex Horn + * Copyright (C) 2008-2012 Andrzej Zaborowski + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef TMP105_REGS_H +#define TMP105_REGS_H + +/** + * TMP105Reg: + * @TMP105_REG_TEMPERATURE: Temperature register + * @TMP105_REG_CONFIG: Configuration register + * @TMP105_REG_T_LOW: Low temperature register (also known as T_hyst) + * @TMP105_REG_T_HIGH: High temperature register (also known as T_OS) + * + * The following temperature sensors are + * compatible with the TMP105 registers: + * - adt75 + * - ds1775 + * - ds75 + * - lm75 + * - lm75a + * - max6625 + * - max6626 + * - mcp980x + * - stds75 + * - tcn75 + * - tmp100 + * - tmp101 + * - tmp105 + * - tmp175 + * - tmp275 + * - tmp75 + **/ +typedef enum TMP105Reg { + TMP105_REG_TEMPERATURE = 0, + TMP105_REG_CONFIG, + TMP105_REG_T_LOW, + TMP105_REG_T_HIGH, +} TMP105Reg; + +#endif diff --git a/include/hw/misc/tz-mpc.h b/include/hw/misc/tz-mpc.h new file mode 100644 index 000000000..74d5d822c --- /dev/null +++ b/include/hw/misc/tz-mpc.h @@ -0,0 +1,80 @@ +/* + * ARM AHB5 TrustZone Memory Protection Controller emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the TrustZone memory protection controller (MPC). + * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM + * (DDI 0571G): + * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g + * + * The MPC sits in front of memory and allows secure software to + * configure it to either pass through or reject transactions. + * Rejected transactions may be configured to either be aborted, or to + * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction. + * + * The MPC has a register interface which the guest uses to configure it. + * + * QEMU interface: + * + sysbus MMIO region 0: MemoryRegion for the MPC's config registers + * + sysbus MMIO region 1: MemoryRegion for the upstream end of the MPC + * + Property "downstream": MemoryRegion defining the downstream memory + * + Named GPIO output "irq": set for a transaction-failed interrupt + */ + +#ifndef TZ_MPC_H +#define TZ_MPC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_TZ_MPC "tz-mpc" +OBJECT_DECLARE_SIMPLE_TYPE(TZMPC, TZ_MPC) + +#define TZ_NUM_PORTS 16 + +#define TYPE_TZ_MPC_IOMMU_MEMORY_REGION "tz-mpc-iommu-memory-region" + + +struct TZMPC { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + + /* State */ + uint32_t ctrl; + uint32_t blk_idx; + uint32_t int_stat; + uint32_t int_en; + uint32_t int_info1; + uint32_t int_info2; + + uint32_t *blk_lut; + + qemu_irq irq; + + /* Properties */ + MemoryRegion *downstream; + + hwaddr blocksize; + uint32_t blk_max; + + /* MemoryRegions exposed to user */ + MemoryRegion regmr; + IOMMUMemoryRegion upstream; + + /* MemoryRegion used internally */ + MemoryRegion blocked_io; + + AddressSpace downstream_as; + AddressSpace blocked_io_as; +}; + +#endif diff --git a/include/hw/misc/tz-msc.h b/include/hw/misc/tz-msc.h new file mode 100644 index 000000000..77cc7f240 --- /dev/null +++ b/include/hw/misc/tz-msc.h @@ -0,0 +1,80 @@ +/* + * ARM TrustZone master security controller emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the TrustZone master security controller (MSC). + * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM + * (DDI 0571G): + * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g + * + * The MSC sits in front of a device which can be a bus master (such as + * a DMA controller) and allows secure software to configure it to either + * pass through or reject transactions made by that bus master. + * Rejected transactions may be configured to either be aborted, or to + * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction. + * + * The MSC has no register interface -- it is configured purely by a + * collection of input signals from other hardware in the system. Typically + * they are either hardwired or exposed in an ad-hoc register interface by + * the SoC that uses the MSC. + * + * We don't currently implement the irq_enable GPIO input, because on + * the MPS2 FPGA images it is always tied high, which is awkward to + * implement in QEMU. + * + * QEMU interface: + * + Named GPIO input "cfg_nonsec": set to 1 if the bus master should be + * treated as nonsecure, or 0 for secure + * + Named GPIO input "cfg_sec_resp": set to 1 if a rejected transaction should + * result in a transaction error, or 0 for the transaction to RAZ/WI + * + Named GPIO input "irq_clear": set to 1 to clear a pending interrupt + * + Named GPIO output "irq": set for a transaction-failed interrupt + * + Property "downstream": MemoryRegion defining where bus master transactions + * are made if they are not blocked + * + Property "idau": an object implementing IDAUInterface, which defines which + * addresses should be treated as secure and which as non-secure. + * This need not be the same IDAU as the one used by the CPU. + * + sysbus MMIO region 0: MemoryRegion defining the upstream end of the MSC; + * this should be passed to the bus master device as the region it should + * make memory transactions to + */ + +#ifndef TZ_MSC_H +#define TZ_MSC_H + +#include "hw/sysbus.h" +#include "target/arm/idau.h" +#include "qom/object.h" + +#define TYPE_TZ_MSC "tz-msc" +OBJECT_DECLARE_SIMPLE_TYPE(TZMSC, TZ_MSC) + +struct TZMSC { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + + /* State: these just track the values of our input signals */ + bool cfg_nonsec; + bool cfg_sec_resp; + bool irq_clear; + /* State: are we asserting irq ? */ + bool irq_status; + + qemu_irq irq; + MemoryRegion *downstream; + AddressSpace downstream_as; + MemoryRegion upstream; + IDAUInterface *idau; +}; + +#endif diff --git a/include/hw/misc/tz-ppc.h b/include/hw/misc/tz-ppc.h new file mode 100644 index 000000000..021d671b2 --- /dev/null +++ b/include/hw/misc/tz-ppc.h @@ -0,0 +1,107 @@ +/* + * ARM TrustZone peripheral protection controller emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the TrustZone peripheral protection controller (PPC). + * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM + * (DDI 0571G): + * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g + * + * The PPC sits in front of peripherals and allows secure software to + * configure it to either pass through or reject transactions. + * Rejected transactions may be configured to either be aborted, or to + * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction. + * + * The PPC has no register interface -- it is configured purely by a + * collection of input signals from other hardware in the system. Typically + * they are either hardwired or exposed in an ad-hoc register interface by + * the SoC that uses the PPC. + * + * This QEMU model can be used to model either the AHB5 or APB4 TZ PPC, + * since the only difference between them is that the AHB version has a + * "default" port which has no security checks applied. In QEMU the default + * port can be emulated simply by wiring its downstream devices directly + * into the parent address space, since the PPC does not need to intercept + * transactions there. + * + * In the hardware, selection of which downstream port to use is done by + * the user's decode logic asserting one of the hsel[] signals. In QEMU, + * we provide 16 MMIO regions, one per port, and the user maps these into + * the desired addresses to implement the address decode. + * + * QEMU interface: + * + sysbus MMIO regions 0..15: MemoryRegions defining the upstream end + * of each of the 16 ports of the PPC. When a port is unused (i.e. no + * downstream MemoryRegion is connected to it) at the end of the 0..15 + * range then no sysbus MMIO region is created for its upstream. When an + * unused port lies in the middle of the range with other used ports at + * higher port numbers, a dummy MMIO region is created to ensure that + * port N's upstream is always sysbus MMIO region N. Dummy regions should + * not be mapped, and will assert if any access is made to them. + * + Property "port[0..15]": MemoryRegion defining the downstream device(s) + * for each of the 16 ports of the PPC + * + Named GPIO inputs "cfg_nonsec[0..15]": set to 1 if the port should be + * accessible to NonSecure transactions + * + Named GPIO inputs "cfg_ap[0..15]": set to 1 if the port should be + * accessible to non-privileged transactions + * + Named GPIO input "cfg_sec_resp": set to 1 if a rejected transaction should + * result in a transaction error, or 0 for the transaction to RAZ/WI + * + Named GPIO input "irq_enable": set to 1 to enable interrupts + * + Named GPIO input "irq_clear": set to 1 to clear a pending interrupt + * + Named GPIO output "irq": set for a transaction-failed interrupt + * + Property "NONSEC_MASK": if a bit is set in this mask then accesses to + * the associated port do not have the TZ security check performed. (This + * corresponds to the hardware allowing this to be set as a Verilog + * parameter.) + */ + +#ifndef TZ_PPC_H +#define TZ_PPC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_TZ_PPC "tz-ppc" +OBJECT_DECLARE_SIMPLE_TYPE(TZPPC, TZ_PPC) + +#define TZ_NUM_PORTS 16 + + +typedef struct TZPPCPort { + TZPPC *ppc; + MemoryRegion upstream; + AddressSpace downstream_as; + MemoryRegion *downstream; +} TZPPCPort; + +struct TZPPC { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + + /* State: these just track the values of our input signals */ + bool cfg_nonsec[TZ_NUM_PORTS]; + bool cfg_ap[TZ_NUM_PORTS]; + bool cfg_sec_resp; + bool irq_enable; + bool irq_clear; + /* State: are we asserting irq ? */ + bool irq_status; + + qemu_irq irq; + + /* Properties */ + uint32_t nonsec_mask; + + TZPPCPort port[TZ_NUM_PORTS]; +}; + +#endif diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h new file mode 100644 index 000000000..518d627dc --- /dev/null +++ b/include/hw/misc/unimp.h @@ -0,0 +1,54 @@ +/* + * "Unimplemented" device + * + * Copyright Linaro Limited, 2017 + * Written by Peter Maydell + */ + +#ifndef HW_MISC_UNIMP_H +#define HW_MISC_UNIMP_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "qom/object.h" + +#define TYPE_UNIMPLEMENTED_DEVICE "unimplemented-device" + +OBJECT_DECLARE_SIMPLE_TYPE(UnimplementedDeviceState, UNIMPLEMENTED_DEVICE) + +struct UnimplementedDeviceState { + SysBusDevice parent_obj; + MemoryRegion iomem; + unsigned offset_fmt_width; + char *name; + uint64_t size; +}; + +/** + * create_unimplemented_device: create and map a dummy device + * @name: name of the device for debug logging + * @base: base address of the device's MMIO region + * @size: size of the device's MMIO region + * + * This utility function creates and maps an instance of unimplemented-device, + * which is a dummy device which simply logs all guest accesses to + * it via the qemu_log LOG_UNIMP debug log. + * The device is mapped at priority -1000, which means that you can + * use it to cover a large region and then map other devices on top of it + * if necessary. + */ +static inline void create_unimplemented_device(const char *name, + hwaddr base, + hwaddr size) +{ + DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE); + + qdev_prop_set_string(dev, "name", name); + qdev_prop_set_uint64(dev, "size", size); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, base, -1000); +} + +#endif diff --git a/include/hw/misc/vmcoreinfo.h b/include/hw/misc/vmcoreinfo.h new file mode 100644 index 000000000..0b7b55d40 --- /dev/null +++ b/include/hw/misc/vmcoreinfo.h @@ -0,0 +1,41 @@ +/* + * Virtual Machine coreinfo device + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Authors: Marc-André Lureau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef VMCOREINFO_H +#define VMCOREINFO_H + +#include "hw/qdev-core.h" +#include "standard-headers/linux/qemu_fw_cfg.h" +#include "qom/object.h" + +#define VMCOREINFO_DEVICE "vmcoreinfo" +typedef struct VMCoreInfoState VMCoreInfoState; +DECLARE_INSTANCE_CHECKER(VMCoreInfoState, VMCOREINFO, + VMCOREINFO_DEVICE) + +typedef struct fw_cfg_vmcoreinfo FWCfgVMCoreInfo; + +struct VMCoreInfoState { + DeviceState parent_obj; + + bool has_vmcoreinfo; + FWCfgVMCoreInfo vmcoreinfo; +}; + +/* returns NULL unless there is exactly one device */ +static inline VMCoreInfoState *vmcoreinfo_find(void) +{ + Object *o = object_resolve_path_type("", VMCOREINFO_DEVICE, NULL); + + return o ? VMCOREINFO(o) : NULL; +} + +#endif diff --git a/include/hw/misc/zynq-xadc.h b/include/hw/misc/zynq-xadc.h new file mode 100644 index 000000000..602bfb4ab --- /dev/null +++ b/include/hw/misc/zynq-xadc.h @@ -0,0 +1,46 @@ +/* + * Device model for Zynq ADC controller + * + * Copyright (c) 2015 Guenter Roeck + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef ZYNQ_XADC_H +#define ZYNQ_XADC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define ZYNQ_XADC_MMIO_SIZE 0x0020 +#define ZYNQ_XADC_NUM_IO_REGS (ZYNQ_XADC_MMIO_SIZE / 4) +#define ZYNQ_XADC_NUM_ADC_REGS 128 +#define ZYNQ_XADC_FIFO_DEPTH 15 + +#define TYPE_ZYNQ_XADC "xlnx,zynq-xadc" +OBJECT_DECLARE_SIMPLE_TYPE(ZynqXADCState, ZYNQ_XADC) + +struct ZynqXADCState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t regs[ZYNQ_XADC_NUM_IO_REGS]; + uint16_t xadc_regs[ZYNQ_XADC_NUM_ADC_REGS]; + uint16_t xadc_read_reg_previous; + uint16_t xadc_dfifo[ZYNQ_XADC_FIFO_DEPTH]; + uint16_t xadc_dfifo_entries; + + struct IRQState *qemu_irq; + +}; + +#endif /* ZYNQ_XADC_H */ diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h new file mode 100644 index 000000000..460a58f1c --- /dev/null +++ b/include/hw/net/allwinner-sun8i-emac.h @@ -0,0 +1,104 @@ +/* + * Allwinner Sun8i Ethernet MAC emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_NET_ALLWINNER_SUN8I_EMAC_H +#define HW_NET_ALLWINNER_SUN8I_EMAC_H + +#include "qom/object.h" +#include "net/net.h" +#include "hw/sysbus.h" + +/** + * Object model + * @{ + */ + +#define TYPE_AW_SUN8I_EMAC "allwinner-sun8i-emac" +OBJECT_DECLARE_SIMPLE_TYPE(AwSun8iEmacState, AW_SUN8I_EMAC) + +/** @} */ + +/** + * Allwinner Sun8i EMAC object instance state + */ +struct AwSun8iEmacState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Interrupt output signal to notify CPU */ + qemu_irq irq; + + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + + /** Generic Network Interface Controller (NIC) for networking API */ + NICState *nic; + + /** Generic Network Interface Controller (NIC) configuration */ + NICConf conf; + + /** + * @name Media Independent Interface (MII) + * @{ + */ + + uint8_t mii_phy_addr; /**< PHY address */ + uint32_t mii_cr; /**< Control */ + uint32_t mii_st; /**< Status */ + uint32_t mii_adv; /**< Advertised Abilities */ + + /** @} */ + + /** + * @name Hardware Registers + * @{ + */ + + uint32_t basic_ctl0; /**< Basic Control 0 */ + uint32_t basic_ctl1; /**< Basic Control 1 */ + uint32_t int_en; /**< Interrupt Enable */ + uint32_t int_sta; /**< Interrupt Status */ + uint32_t frm_flt; /**< Receive Frame Filter */ + + uint32_t rx_ctl0; /**< Receive Control 0 */ + uint32_t rx_ctl1; /**< Receive Control 1 */ + uint32_t rx_desc_head; /**< Receive Descriptor List Address */ + uint32_t rx_desc_curr; /**< Current Receive Descriptor Address */ + + uint32_t tx_ctl0; /**< Transmit Control 0 */ + uint32_t tx_ctl1; /**< Transmit Control 1 */ + uint32_t tx_desc_head; /**< Transmit Descriptor List Address */ + uint32_t tx_desc_curr; /**< Current Transmit Descriptor Address */ + uint32_t tx_flowctl; /**< Transmit Flow Control */ + + uint32_t mii_cmd; /**< Management Interface Command */ + uint32_t mii_data; /**< Management Interface Data */ + + /** @} */ + +}; + +#endif /* HW_NET_ALLWINNER_SUN8I_H */ diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h new file mode 100644 index 000000000..534e74898 --- /dev/null +++ b/include/hw/net/allwinner_emac.h @@ -0,0 +1,177 @@ +/* + * Emulation of Allwinner EMAC Fast Ethernet controller and + * Realtek RTL8201CP PHY + * + * Copyright (C) 2014 Beniamino Galvani + * + * Allwinner EMAC register definitions from Linux kernel are: + * Copyright 2012 Stefan Roese + * Copyright 2013 Maxime Ripard + * Copyright 1997 Sten Wang + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef ALLWINNER_EMAC_H +#define ALLWINNER_EMAC_H + +#include "qemu/units.h" +#include "net/net.h" +#include "qemu/fifo8.h" +#include "hw/net/mii.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_AW_EMAC "allwinner-emac" +OBJECT_DECLARE_SIMPLE_TYPE(AwEmacState, AW_EMAC) + +/* + * Allwinner EMAC register list + */ +#define EMAC_CTL_REG 0x00 + +#define EMAC_TX_MODE_REG 0x04 +#define EMAC_TX_FLOW_REG 0x08 +#define EMAC_TX_CTL0_REG 0x0C +#define EMAC_TX_CTL1_REG 0x10 +#define EMAC_TX_INS_REG 0x14 +#define EMAC_TX_PL0_REG 0x18 +#define EMAC_TX_PL1_REG 0x1C +#define EMAC_TX_STA_REG 0x20 +#define EMAC_TX_IO_DATA_REG 0x24 +#define EMAC_TX_IO_DATA1_REG 0x28 +#define EMAC_TX_TSVL0_REG 0x2C +#define EMAC_TX_TSVH0_REG 0x30 +#define EMAC_TX_TSVL1_REG 0x34 +#define EMAC_TX_TSVH1_REG 0x38 + +#define EMAC_RX_CTL_REG 0x3C +#define EMAC_RX_HASH0_REG 0x40 +#define EMAC_RX_HASH1_REG 0x44 +#define EMAC_RX_STA_REG 0x48 +#define EMAC_RX_IO_DATA_REG 0x4C +#define EMAC_RX_FBC_REG 0x50 + +#define EMAC_INT_CTL_REG 0x54 +#define EMAC_INT_STA_REG 0x58 + +#define EMAC_MAC_CTL0_REG 0x5C +#define EMAC_MAC_CTL1_REG 0x60 +#define EMAC_MAC_IPGT_REG 0x64 +#define EMAC_MAC_IPGR_REG 0x68 +#define EMAC_MAC_CLRT_REG 0x6C +#define EMAC_MAC_MAXF_REG 0x70 +#define EMAC_MAC_SUPP_REG 0x74 +#define EMAC_MAC_TEST_REG 0x78 +#define EMAC_MAC_MCFG_REG 0x7C +#define EMAC_MAC_MCMD_REG 0x80 +#define EMAC_MAC_MADR_REG 0x84 +#define EMAC_MAC_MWTD_REG 0x88 +#define EMAC_MAC_MRDD_REG 0x8C +#define EMAC_MAC_MIND_REG 0x90 +#define EMAC_MAC_SSRR_REG 0x94 +#define EMAC_MAC_A0_REG 0x98 +#define EMAC_MAC_A1_REG 0x9C +#define EMAC_MAC_A2_REG 0xA0 + +#define EMAC_SAFX_L_REG0 0xA4 +#define EMAC_SAFX_H_REG0 0xA8 +#define EMAC_SAFX_L_REG1 0xAC +#define EMAC_SAFX_H_REG1 0xB0 +#define EMAC_SAFX_L_REG2 0xB4 +#define EMAC_SAFX_H_REG2 0xB8 +#define EMAC_SAFX_L_REG3 0xBC +#define EMAC_SAFX_H_REG3 0xC0 + +/* CTL register fields */ +#define EMAC_CTL_RESET (1 << 0) +#define EMAC_CTL_TX_EN (1 << 1) +#define EMAC_CTL_RX_EN (1 << 2) + +/* TX MODE register fields */ +#define EMAC_TX_MODE_ABORTED_FRAME_EN (1 << 0) +#define EMAC_TX_MODE_DMA_EN (1 << 1) + +/* RX CTL register fields */ +#define EMAC_RX_CTL_AUTO_DRQ_EN (1 << 1) +#define EMAC_RX_CTL_DMA_EN (1 << 2) +#define EMAC_RX_CTL_PASS_ALL_EN (1 << 4) +#define EMAC_RX_CTL_PASS_CTL_EN (1 << 5) +#define EMAC_RX_CTL_PASS_CRC_ERR_EN (1 << 6) +#define EMAC_RX_CTL_PASS_LEN_ERR_EN (1 << 7) +#define EMAC_RX_CTL_PASS_LEN_OOR_EN (1 << 8) +#define EMAC_RX_CTL_ACCEPT_UNICAST_EN (1 << 16) +#define EMAC_RX_CTL_DA_FILTER_EN (1 << 17) +#define EMAC_RX_CTL_ACCEPT_MULTICAST_EN (1 << 20) +#define EMAC_RX_CTL_HASH_FILTER_EN (1 << 21) +#define EMAC_RX_CTL_ACCEPT_BROADCAST_EN (1 << 22) +#define EMAC_RX_CTL_SA_FILTER_EN (1 << 24) +#define EMAC_RX_CTL_SA_FILTER_INVERT_EN (1 << 25) + +/* RX IO DATA register fields */ +#define EMAC_RX_HEADER(len, status) (((len) & 0xffff) | ((status) << 16)) +#define EMAC_RX_IO_DATA_STATUS_CRC_ERR (1 << 4) +#define EMAC_RX_IO_DATA_STATUS_LEN_ERR (3 << 5) +#define EMAC_RX_IO_DATA_STATUS_OK (1 << 7) +#define EMAC_UNDOCUMENTED_MAGIC 0x0143414d /* header for RX frames */ + +/* INT CTL and INT STA registers fields */ +#define EMAC_INT_TX_CHAN(x) (1 << (x)) +#define EMAC_INT_RX (1 << 8) + +/* Due to lack of specifications, size of fifos is chosen arbitrarily */ +#define TX_FIFO_SIZE (4 * KiB) +#define RX_FIFO_SIZE (32 * KiB) + +#define NUM_TX_FIFOS 2 +#define RX_HDR_SIZE 8 +#define CRC_SIZE 4 + +#define PHY_REG_SHIFT 0 +#define PHY_ADDR_SHIFT 8 + +typedef struct RTL8201CPState { + uint16_t bmcr; + uint16_t bmsr; + uint16_t anar; + uint16_t anlpar; +} RTL8201CPState; + +struct AwEmacState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + qemu_irq irq; + NICState *nic; + NICConf conf; + RTL8201CPState mii; + uint8_t phy_addr; + + uint32_t ctl; + uint32_t tx_mode; + uint32_t rx_ctl; + uint32_t int_ctl; + uint32_t int_sta; + uint32_t phy_target; + + Fifo8 rx_fifo; + uint32_t rx_num_packets; + uint32_t rx_packet_size; + uint32_t rx_packet_pos; + + Fifo8 tx_fifo[NUM_TX_FIFOS]; + uint32_t tx_length[NUM_TX_FIFOS]; + uint32_t tx_channel; +}; + +#endif diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h new file mode 100644 index 000000000..91ebb5c8a --- /dev/null +++ b/include/hw/net/cadence_gem.h @@ -0,0 +1,97 @@ +/* + * QEMU Cadence GEM emulation + * + * Copyright (c) 2011 Xilinx, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef CADENCE_GEM_H +#define CADENCE_GEM_H +#include "qom/object.h" + +#define TYPE_CADENCE_GEM "cadence_gem" +OBJECT_DECLARE_SIMPLE_TYPE(CadenceGEMState, CADENCE_GEM) + +#include "net/net.h" +#include "hw/sysbus.h" + +#define CADENCE_GEM_MAXREG (0x00000800 / 4) /* Last valid GEM address */ + +/* Max number of words in a DMA descriptor. */ +#define DESC_MAX_NUM_WORDS 6 + +#define MAX_PRIORITY_QUEUES 8 +#define MAX_TYPE1_SCREENERS 16 +#define MAX_TYPE2_SCREENERS 16 + +#define MAX_JUMBO_FRAME_SIZE_MASK 0x3FFF +#define MAX_FRAME_SIZE MAX_JUMBO_FRAME_SIZE_MASK + +struct CadenceGEMState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + MemoryRegion *dma_mr; + AddressSpace dma_as; + NICState *nic; + NICConf conf; + qemu_irq irq[MAX_PRIORITY_QUEUES]; + + /* Static properties */ + uint8_t num_priority_queues; + uint8_t num_type1_screeners; + uint8_t num_type2_screeners; + uint32_t revision; + uint16_t jumbo_max_len; + + /* GEM registers backing store */ + uint32_t regs[CADENCE_GEM_MAXREG]; + /* Mask of register bits which are write only */ + uint32_t regs_wo[CADENCE_GEM_MAXREG]; + /* Mask of register bits which are read only */ + uint32_t regs_ro[CADENCE_GEM_MAXREG]; + /* Mask of register bits which are clear on read */ + uint32_t regs_rtc[CADENCE_GEM_MAXREG]; + /* Mask of register bits which are write 1 to clear */ + uint32_t regs_w1c[CADENCE_GEM_MAXREG]; + + /* PHY address */ + uint8_t phy_addr; + /* PHY registers backing store */ + uint16_t phy_regs[32]; + + uint8_t phy_loop; /* Are we in phy loopback? */ + + /* The current DMA descriptor pointers */ + uint32_t rx_desc_addr[MAX_PRIORITY_QUEUES]; + uint32_t tx_desc_addr[MAX_PRIORITY_QUEUES]; + + uint8_t can_rx_state; /* Debug only */ + + uint8_t tx_packet[MAX_FRAME_SIZE]; + uint8_t rx_packet[MAX_FRAME_SIZE]; + uint32_t rx_desc[MAX_PRIORITY_QUEUES][DESC_MAX_NUM_WORDS]; + + bool sar_active[4]; +}; + +#endif diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h new file mode 100644 index 000000000..765d1538a --- /dev/null +++ b/include/hw/net/ftgmac100.h @@ -0,0 +1,87 @@ +/* + * Faraday FTGMAC100 Gigabit Ethernet + * + * Copyright (C) 2016-2017, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef FTGMAC100_H +#define FTGMAC100_H +#include "qom/object.h" + +#define TYPE_FTGMAC100 "ftgmac100" +OBJECT_DECLARE_SIMPLE_TYPE(FTGMAC100State, FTGMAC100) + +#include "hw/sysbus.h" +#include "net/net.h" + +/* + * Max frame size for the receiving buffer + */ +#define FTGMAC100_MAX_FRAME_SIZE 9220 + +struct FTGMAC100State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + NICState *nic; + NICConf conf; + qemu_irq irq; + MemoryRegion iomem; + + uint8_t frame[FTGMAC100_MAX_FRAME_SIZE]; + + uint32_t irq_state; + uint32_t isr; + uint32_t ier; + uint32_t rx_enabled; + uint32_t rx_ring; + uint32_t rx_descriptor; + uint32_t tx_ring; + uint32_t tx_descriptor; + uint32_t math[2]; + uint32_t rbsr; + uint32_t itc; + uint32_t aptcr; + uint32_t dblac; + uint32_t revr; + uint32_t fear1; + uint32_t tpafcr; + uint32_t maccr; + uint32_t phycr; + uint32_t phydata; + uint32_t fcr; + + + uint32_t phy_status; + uint32_t phy_control; + uint32_t phy_advertise; + uint32_t phy_int; + uint32_t phy_int_mask; + + bool aspeed; + uint32_t txdes0_edotr; + uint32_t rxdes0_edorr; +}; + +#define TYPE_ASPEED_MII "aspeed-mmi" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedMiiState, ASPEED_MII) + +/* + * AST2600 MII controller + */ +struct AspeedMiiState { + /*< private >*/ + SysBusDevice parent_obj; + + FTGMAC100State *nic; + + MemoryRegion iomem; + uint32_t phycr; + uint32_t phydata; +}; + +#endif diff --git a/include/hw/net/imx_fec.h b/include/hw/net/imx_fec.h new file mode 100644 index 000000000..e3a8755db --- /dev/null +++ b/include/hw/net/imx_fec.h @@ -0,0 +1,280 @@ +/* + * i.MX FEC/ENET Ethernet Controller emulation. + * + * Copyright (c) 2013 Jean-Christophe Dubois. + * + * Based on Coldfire Fast Ethernet Controller emulation. + * + * Copyright (c) 2007 CodeSourcery. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef IMX_FEC_H +#define IMX_FEC_H +#include "qom/object.h" + +#define TYPE_IMX_FEC "imx.fec" +OBJECT_DECLARE_SIMPLE_TYPE(IMXFECState, IMX_FEC) + +#define TYPE_IMX_ENET "imx.enet" + +#include "hw/sysbus.h" +#include "net/net.h" + +#define ENET_EIR 1 +#define ENET_EIMR 2 +#define ENET_RDAR 4 +#define ENET_TDAR 5 +#define ENET_ECR 9 +#define ENET_MMFR 16 +#define ENET_MSCR 17 +#define ENET_MIBC 25 +#define ENET_RCR 33 +#define ENET_TCR 49 +#define ENET_PALR 57 +#define ENET_PAUR 58 +#define ENET_OPD 59 +#define ENET_IAUR 70 +#define ENET_IALR 71 +#define ENET_GAUR 72 +#define ENET_GALR 73 +#define ENET_TFWR 81 +#define ENET_FRBR 83 +#define ENET_FRSR 84 +#define ENET_TDSR1 89 +#define ENET_TDSR2 92 +#define ENET_RDSR 96 +#define ENET_TDSR 97 +#define ENET_MRBR 98 +#define ENET_RSFL 100 +#define ENET_RSEM 101 +#define ENET_RAEM 102 +#define ENET_RAFL 103 +#define ENET_TSEM 104 +#define ENET_TAEM 105 +#define ENET_TAFL 106 +#define ENET_TIPG 107 +#define ENET_FTRL 108 +#define ENET_TACC 112 +#define ENET_RACC 113 +#define ENET_TDAR1 121 +#define ENET_TDAR2 123 +#define ENET_MIIGSK_CFGR 192 +#define ENET_MIIGSK_ENR 194 +#define ENET_ATCR 256 +#define ENET_ATVR 257 +#define ENET_ATOFF 258 +#define ENET_ATPER 259 +#define ENET_ATCOR 260 +#define ENET_ATINC 261 +#define ENET_ATSTMP 262 +#define ENET_TGSR 385 +#define ENET_TCSR0 386 +#define ENET_TCCR0 387 +#define ENET_TCSR1 388 +#define ENET_TCCR1 389 +#define ENET_TCSR2 390 +#define ENET_TCCR2 391 +#define ENET_TCSR3 392 +#define ENET_TCCR3 393 +#define ENET_MAX 400 + + +/* EIR and EIMR */ +#define ENET_INT_HB (1 << 31) +#define ENET_INT_BABR (1 << 30) +#define ENET_INT_BABT (1 << 29) +#define ENET_INT_GRA (1 << 28) +#define ENET_INT_TXF (1 << 27) +#define ENET_INT_TXB (1 << 26) +#define ENET_INT_RXF (1 << 25) +#define ENET_INT_RXB (1 << 24) +#define ENET_INT_MII (1 << 23) +#define ENET_INT_EBERR (1 << 22) +#define ENET_INT_LC (1 << 21) +#define ENET_INT_RL (1 << 20) +#define ENET_INT_UN (1 << 19) +#define ENET_INT_PLR (1 << 18) +#define ENET_INT_WAKEUP (1 << 17) +#define ENET_INT_TS_AVAIL (1 << 16) +#define ENET_INT_TS_TIMER (1 << 15) +#define ENET_INT_TXF2 (1 << 7) +#define ENET_INT_TXB2 (1 << 6) +#define ENET_INT_TXF1 (1 << 3) +#define ENET_INT_TXB1 (1 << 2) + +#define ENET_INT_MAC (ENET_INT_HB | ENET_INT_BABR | ENET_INT_BABT | \ + ENET_INT_GRA | ENET_INT_TXF | ENET_INT_TXB | \ + ENET_INT_RXF | ENET_INT_RXB | ENET_INT_MII | \ + ENET_INT_EBERR | ENET_INT_LC | ENET_INT_RL | \ + ENET_INT_UN | ENET_INT_PLR | ENET_INT_WAKEUP | \ + ENET_INT_TS_AVAIL | ENET_INT_TXF1 | \ + ENET_INT_TXB1 | ENET_INT_TXF2 | ENET_INT_TXB2) + +/* RDAR */ +#define ENET_RDAR_RDAR (1 << 24) + +/* TDAR */ +#define ENET_TDAR_TDAR (1 << 24) + +/* ECR */ +#define ENET_ECR_RESET (1 << 0) +#define ENET_ECR_ETHEREN (1 << 1) +#define ENET_ECR_MAGICEN (1 << 2) +#define ENET_ECR_SLEEP (1 << 3) +#define ENET_ECR_EN1588 (1 << 4) +#define ENET_ECR_SPEED (1 << 5) +#define ENET_ECR_DBGEN (1 << 6) +#define ENET_ECR_STOPEN (1 << 7) +#define ENET_ECR_DSBWP (1 << 8) + +/* MIBC */ +#define ENET_MIBC_MIB_DIS (1 << 31) +#define ENET_MIBC_MIB_IDLE (1 << 30) +#define ENET_MIBC_MIB_CLEAR (1 << 29) + +/* RCR */ +#define ENET_RCR_LOOP (1 << 0) +#define ENET_RCR_DRT (1 << 1) +#define ENET_RCR_MII_MODE (1 << 2) +#define ENET_RCR_PROM (1 << 3) +#define ENET_RCR_BC_REJ (1 << 4) +#define ENET_RCR_FCE (1 << 5) +#define ENET_RCR_RGMII_EN (1 << 6) +#define ENET_RCR_RMII_MODE (1 << 8) +#define ENET_RCR_RMII_10T (1 << 9) +#define ENET_RCR_PADEN (1 << 12) +#define ENET_RCR_PAUFWD (1 << 13) +#define ENET_RCR_CRCFWD (1 << 14) +#define ENET_RCR_CFEN (1 << 15) +#define ENET_RCR_MAX_FL_SHIFT (16) +#define ENET_RCR_MAX_FL_LENGTH (14) +#define ENET_RCR_NLC (1 << 30) +#define ENET_RCR_GRS (1 << 31) + +#define ENET_MAX_FRAME_SIZE (1 << ENET_RCR_MAX_FL_LENGTH) + +/* TCR */ +#define ENET_TCR_GTS (1 << 0) +#define ENET_TCR_FDEN (1 << 2) +#define ENET_TCR_TFC_PAUSE (1 << 3) +#define ENET_TCR_RFC_PAUSE (1 << 4) +#define ENET_TCR_ADDSEL_SHIFT (5) +#define ENET_TCR_ADDSEL_LENGTH (3) +#define ENET_TCR_CRCFWD (1 << 9) + +/* RDSR */ +#define ENET_TWFR_TFWR_SHIFT (0) +#define ENET_TWFR_TFWR_LENGTH (6) +#define ENET_TWFR_STRFWD (1 << 8) + +#define ENET_RACC_SHIFT16 BIT(7) + +/* Buffer Descriptor. */ +typedef struct { + uint16_t length; + uint16_t flags; + uint32_t data; +} IMXFECBufDesc; + +#define ENET_BD_R (1 << 15) +#define ENET_BD_E (1 << 15) +#define ENET_BD_O1 (1 << 14) +#define ENET_BD_W (1 << 13) +#define ENET_BD_O2 (1 << 12) +#define ENET_BD_L (1 << 11) +#define ENET_BD_TC (1 << 10) +#define ENET_BD_ABC (1 << 9) +#define ENET_BD_M (1 << 8) +#define ENET_BD_BC (1 << 7) +#define ENET_BD_MC (1 << 6) +#define ENET_BD_LG (1 << 5) +#define ENET_BD_NO (1 << 4) +#define ENET_BD_CR (1 << 2) +#define ENET_BD_OV (1 << 1) +#define ENET_BD_TR (1 << 0) + +typedef struct { + uint16_t length; + uint16_t flags; + uint32_t data; + uint16_t status; + uint16_t option; + uint16_t checksum; + uint16_t head_proto; + uint32_t last_buffer; + uint32_t timestamp; + uint32_t reserved[2]; +} IMXENETBufDesc; + +#define ENET_BD_ME (1 << 15) +#define ENET_BD_TX_INT (1 << 14) +#define ENET_BD_TS (1 << 13) +#define ENET_BD_PINS (1 << 12) +#define ENET_BD_IINS (1 << 11) +#define ENET_BD_PE (1 << 10) +#define ENET_BD_CE (1 << 9) +#define ENET_BD_UC (1 << 8) +#define ENET_BD_RX_INT (1 << 7) + +#define ENET_BD_TXE (1 << 15) +#define ENET_BD_UE (1 << 13) +#define ENET_BD_EE (1 << 12) +#define ENET_BD_FE (1 << 11) +#define ENET_BD_LCE (1 << 10) +#define ENET_BD_OE (1 << 9) +#define ENET_BD_TSE (1 << 8) +#define ENET_BD_ICE (1 << 5) +#define ENET_BD_PCR (1 << 4) +#define ENET_BD_VLAN (1 << 2) +#define ENET_BD_IPV6 (1 << 1) +#define ENET_BD_FRAG (1 << 0) + +#define ENET_BD_BDU (1 << 31) + +#define ENET_TX_RING_NUM 3 + +#define FSL_IMX25_FEC_SIZE 0x4000 + +struct IMXFECState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + NICState *nic; + NICConf conf; + qemu_irq irq[2]; + MemoryRegion iomem; + + uint32_t regs[ENET_MAX]; + uint32_t rx_descriptor; + + uint32_t tx_descriptor[ENET_TX_RING_NUM]; + uint32_t tx_ring_num; + + uint32_t phy_status; + uint32_t phy_control; + uint32_t phy_advertise; + uint32_t phy_int; + uint32_t phy_int_mask; + uint32_t phy_num; + + bool is_fec; + + /* Buffer used to assemble a Tx frame */ + uint8_t frame[ENET_MAX_FRAME_SIZE]; +}; + +#endif diff --git a/include/hw/net/lan9118.h b/include/hw/net/lan9118.h new file mode 100644 index 000000000..3d0c67f33 --- /dev/null +++ b/include/hw/net/lan9118.h @@ -0,0 +1,20 @@ +/* + * SMSC LAN9118 Ethernet interface emulation + * + * Copyright (c) 2009 CodeSourcery, LLC. + * Written by Paul Brook + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_NET_LAN9118_H +#define HW_NET_LAN9118_H + +#include "net/net.h" + +#define TYPE_LAN9118 "lan9118" + +void lan9118_init(NICInfo *, uint32_t, qemu_irq); + +#endif diff --git a/include/hw/net/lance.h b/include/hw/net/lance.h new file mode 100644 index 000000000..f645d6af6 --- /dev/null +++ b/include/hw/net/lance.h @@ -0,0 +1,48 @@ +/* + * QEMU Lance (Am7990) device emulation + * + * Copyright (c) 2004 Antony T Curtis + * Copyright (c) 2017 Mark Cave-Ayland + * + * This represents the Sparc32 lance (Am7990) ethernet device which is an + * earlier register-compatible member of the AMD PC-Net II (Am79C970A) family. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef LANCE_H +#define LANCE_H + +#include "net/net.h" +#include "hw/net/pcnet.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_LANCE "lance" +typedef struct SysBusPCNetState SysBusPCNetState; +DECLARE_INSTANCE_CHECKER(SysBusPCNetState, SYSBUS_PCNET, + TYPE_LANCE) + +struct SysBusPCNetState { + SysBusDevice parent_obj; + + PCNetState state; +}; + +#endif diff --git a/include/hw/net/lasi_82596.h b/include/hw/net/lasi_82596.h new file mode 100644 index 000000000..7b62b0483 --- /dev/null +++ b/include/hw/net/lasi_82596.h @@ -0,0 +1,31 @@ +/* + * QEMU LASI i82596 device emulation + * + * Copyright (c) 201 Helge Deller + * + */ + +#ifndef LASI_82596_H +#define LASI_82596_H + +#include "net/net.h" +#include "hw/net/i82596.h" +#include "qom/object.h" + +#define TYPE_LASI_82596 "lasi_82596" +typedef struct SysBusI82596State SysBusI82596State; +DECLARE_INSTANCE_CHECKER(SysBusI82596State, SYSBUS_I82596, + TYPE_LASI_82596) + +struct SysBusI82596State { + SysBusDevice parent_obj; + + I82596State state; + uint16_t last_val; + int val_index:1; +}; + +SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space, + hwaddr hpa, qemu_irq irq); + +#endif diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h new file mode 100644 index 000000000..4ae4dcce7 --- /dev/null +++ b/include/hw/net/mii.h @@ -0,0 +1,115 @@ +/* + * Common network MII address and register definitions. + * + * Copyright (C) 2014 Beniamino Galvani + * + * Allwinner EMAC register definitions from Linux kernel are: + * Copyright 2012 Stefan Roese + * Copyright 2013 Maxime Ripard + * Copyright 1997 Sten Wang + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef MII_H +#define MII_H + +/* PHY registers */ +#define MII_BMCR 0 /* Basic mode control register */ +#define MII_BMSR 1 /* Basic mode status register */ +#define MII_PHYID1 2 /* ID register 1 */ +#define MII_PHYID2 3 /* ID register 2 */ +#define MII_ANAR 4 /* Autonegotiation advertisement */ +#define MII_ANLPAR 5 /* Autonegotiation lnk partner abilities */ +#define MII_ANER 6 /* Autonegotiation expansion */ +#define MII_ANNP 7 /* Autonegotiation next page */ +#define MII_ANLPRNP 8 /* Autonegotiation link partner rx next page */ +#define MII_CTRL1000 9 /* 1000BASE-T control */ +#define MII_STAT1000 10 /* 1000BASE-T status */ +#define MII_MDDACR 13 /* MMD access control */ +#define MII_MDDAADR 14 /* MMD access address data */ +#define MII_EXTSTAT 15 /* Extended Status */ +#define MII_NSR 16 +#define MII_LBREMR 17 +#define MII_REC 18 +#define MII_SNRDR 19 +#define MII_TEST 25 + +/* PHY registers fields */ +#define MII_BMCR_RESET (1 << 15) +#define MII_BMCR_LOOPBACK (1 << 14) +#define MII_BMCR_SPEED100 (1 << 13) /* LSB of Speed (100) */ +#define MII_BMCR_SPEED MII_BMCR_SPEED100 +#define MII_BMCR_AUTOEN (1 << 12) /* Autonegotiation enable */ +#define MII_BMCR_PDOWN (1 << 11) /* Enable low power state */ +#define MII_BMCR_ISOLATE (1 << 10) /* Isolate data paths from MII */ +#define MII_BMCR_ANRESTART (1 << 9) /* Auto negotiation restart */ +#define MII_BMCR_FD (1 << 8) /* Set duplex mode */ +#define MII_BMCR_CTST (1 << 7) /* Collision test */ +#define MII_BMCR_SPEED1000 (1 << 6) /* MSB of Speed (1000) */ + +#define MII_BMSR_100TX_FD (1 << 14) /* Can do 100mbps, full-duplex */ +#define MII_BMSR_100TX_HD (1 << 13) /* Can do 100mbps, half-duplex */ +#define MII_BMSR_10T_FD (1 << 12) /* Can do 10mbps, full-duplex */ +#define MII_BMSR_10T_HD (1 << 11) /* Can do 10mbps, half-duplex */ +#define MII_BMSR_100T2_FD (1 << 10) /* Can do 100mbps T2, full-duplex */ +#define MII_BMSR_100T2_HD (1 << 9) /* Can do 100mbps T2, half-duplex */ +#define MII_BMSR_EXTSTAT (1 << 8) /* Extended status in register 15 */ +#define MII_BMSR_MFPS (1 << 6) /* MII Frame Preamble Suppression */ +#define MII_BMSR_AN_COMP (1 << 5) /* Auto-negotiation complete */ +#define MII_BMSR_RFAULT (1 << 4) /* Remote fault */ +#define MII_BMSR_AUTONEG (1 << 3) /* Able to do auto-negotiation */ +#define MII_BMSR_LINK_ST (1 << 2) /* Link status */ +#define MII_BMSR_JABBER (1 << 1) /* Jabber detected */ +#define MII_BMSR_EXTCAP (1 << 0) /* Ext-reg capability */ + +#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymetric pause */ +#define MII_ANAR_PAUSE (1 << 10) /* Try for pause */ +#define MII_ANAR_TXFD (1 << 8) +#define MII_ANAR_TX (1 << 7) +#define MII_ANAR_10FD (1 << 6) +#define MII_ANAR_10 (1 << 5) +#define MII_ANAR_CSMACD (1 << 0) + +#define MII_ANLPAR_ACK (1 << 14) +#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */ +#define MII_ANLPAR_PAUSE (1 << 10) /* can pause */ +#define MII_ANLPAR_TXFD (1 << 8) +#define MII_ANLPAR_TX (1 << 7) +#define MII_ANLPAR_10FD (1 << 6) +#define MII_ANLPAR_10 (1 << 5) +#define MII_ANLPAR_CSMACD (1 << 0) + +#define MII_ANER_NWAY (1 << 0) /* Can do N-way auto-nego */ + +#define MII_CTRL1000_FULL (1 << 9) /* 1000BASE-T full duplex */ +#define MII_CTRL1000_HALF (1 << 8) /* 1000BASE-T half duplex */ + +#define MII_STAT1000_FULL (1 << 11) /* 1000BASE-T full duplex */ +#define MII_STAT1000_HALF (1 << 10) /* 1000BASE-T half duplex */ + +/* List of vendor identifiers */ +/* RealTek 8201 */ +#define RTL8201CP_PHYID1 0x0000 +#define RTL8201CP_PHYID2 0x8201 + +/* RealTek 8211E */ +#define RTL8211E_PHYID1 0x001c +#define RTL8211E_PHYID2 0xc915 + +/* National Semiconductor DP83840 */ +#define DP83840_PHYID1 0x2000 +#define DP83840_PHYID2 0x5c01 + +/* National Semiconductor DP83848 */ +#define DP83848_PHYID1 0x2000 +#define DP83848_PHYID2 0x5c90 + +#endif /* MII_H */ diff --git a/include/hw/net/msf2-emac.h b/include/hw/net/msf2-emac.h new file mode 100644 index 000000000..846ba6e6d --- /dev/null +++ b/include/hw/net/msf2-emac.h @@ -0,0 +1,53 @@ +/* + * QEMU model of the Smartfusion2 Ethernet MAC. + * + * Copyright (c) 2020 Subbaraya Sundeep . + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/sysbus.h" +#include "exec/memory.h" +#include "net/net.h" +#include "net/eth.h" +#include "qom/object.h" + +#define TYPE_MSS_EMAC "msf2-emac" +OBJECT_DECLARE_SIMPLE_TYPE(MSF2EmacState, MSS_EMAC) + +#define R_MAX (0x1a0 / 4) +#define PHY_MAX_REGS 32 + +struct MSF2EmacState { + SysBusDevice parent; + + MemoryRegion mmio; + MemoryRegion *dma_mr; + AddressSpace dma_as; + + qemu_irq irq; + NICState *nic; + NICConf conf; + + uint8_t mac_addr[ETH_ALEN]; + uint32_t rx_desc; + uint16_t phy_regs[PHY_MAX_REGS]; + + uint32_t regs[R_MAX]; +}; diff --git a/include/hw/net/ne2000-isa.h b/include/hw/net/ne2000-isa.h new file mode 100644 index 000000000..af59ee0b0 --- /dev/null +++ b/include/hw/net/ne2000-isa.h @@ -0,0 +1,39 @@ +/* + * QEMU NE2000 emulation -- isa bus windup + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_NET_NE2000_ISA_H +#define HW_NET_NE2000_ISA_H + +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "net/net.h" +#include "qapi/error.h" + +#define TYPE_ISA_NE2000 "ne2k_isa" + +static inline ISADevice *isa_ne2000_init(ISABus *bus, int base, int irq, + NICInfo *nd) +{ + ISADevice *d; + + qemu_check_nic_model(nd, "ne2k_isa"); + + d = isa_try_new(TYPE_ISA_NE2000); + if (d) { + DeviceState *dev = DEVICE(d); + + qdev_prop_set_uint32(dev, "iobase", base); + qdev_prop_set_uint32(dev, "irq", irq); + qdev_set_nic_properties(dev, nd); + isa_realize_and_unref(d, bus, &error_fatal); + } + return d; +} + +#endif diff --git a/include/hw/net/smc91c111.h b/include/hw/net/smc91c111.h new file mode 100644 index 000000000..df5b11dce --- /dev/null +++ b/include/hw/net/smc91c111.h @@ -0,0 +1,18 @@ +/* + * SMSC 91C111 Ethernet interface emulation + * + * Copyright (c) 2005 CodeSourcery, LLC. + * Written by Paul Brook + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_NET_SMC91C111_H +#define HW_NET_SMC91C111_H + +#include "net/net.h" + +void smc91c111_init(NICInfo *, uint32_t, qemu_irq); + +#endif diff --git a/include/hw/nmi.h b/include/hw/nmi.h new file mode 100644 index 000000000..fff41bebc --- /dev/null +++ b/include/hw/nmi.h @@ -0,0 +1,45 @@ +/* + * NMI monitor handler class and helpers definitions. + * + * Copyright IBM Corp., 2014 + * + * Author: Alexey Kardashevskiy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef NMI_H +#define NMI_H + +#include "qom/object.h" + +#define TYPE_NMI "nmi" + +typedef struct NMIClass NMIClass; +DECLARE_CLASS_CHECKERS(NMIClass, NMI, + TYPE_NMI) +#define NMI(obj) \ + INTERFACE_CHECK(NMIState, (obj), TYPE_NMI) + +typedef struct NMIState NMIState; + +struct NMIClass { + InterfaceClass parent_class; + + void (*nmi_monitor_handler)(NMIState *n, int cpu_index, Error **errp); +}; + +void nmi_monitor_handle(int cpu_index, Error **errp); + +#endif /* NMI_H */ diff --git a/include/hw/nubus/mac-nubus-bridge.h b/include/hw/nubus/mac-nubus-bridge.h new file mode 100644 index 000000000..36aa098dd --- /dev/null +++ b/include/hw/nubus/mac-nubus-bridge.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013-2018 Laurent Vivier + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_NUBUS_MAC_H +#define HW_NUBUS_MAC_H + +#include "hw/nubus/nubus.h" +#include "qom/object.h" + +#define TYPE_MAC_NUBUS_BRIDGE "mac-nubus-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(MacNubusState, MAC_NUBUS_BRIDGE) + +struct MacNubusState { + SysBusDevice sysbus_dev; + + NubusBus *bus; +}; + +#endif diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h new file mode 100644 index 000000000..e2b5cf260 --- /dev/null +++ b/include/hw/nubus/nubus.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013-2018 Laurent Vivier + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_NUBUS_NUBUS_H +#define HW_NUBUS_NUBUS_H + +#include "hw/qdev-properties.h" +#include "exec/address-spaces.h" +#include "qom/object.h" + +#define NUBUS_SUPER_SLOT_SIZE 0x10000000U +#define NUBUS_SUPER_SLOT_NB 0x9 + +#define NUBUS_SLOT_SIZE 0x01000000 +#define NUBUS_SLOT_NB 0xF + +#define NUBUS_FIRST_SLOT 0x9 +#define NUBUS_LAST_SLOT 0xF + +#define TYPE_NUBUS_DEVICE "nubus-device" +OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE) + +#define TYPE_NUBUS_BUS "nubus-bus" +OBJECT_DECLARE_SIMPLE_TYPE(NubusBus, NUBUS_BUS) + +#define TYPE_NUBUS_BRIDGE "nubus-bridge" + +struct NubusBus { + BusState qbus; + + MemoryRegion super_slot_io; + MemoryRegion slot_io; + + int current_slot; +}; + +struct NubusDevice { + DeviceState qdev; + + int slot_nb; + MemoryRegion slot_mem; + + /* Format Block */ + + MemoryRegion fblock_io; + + uint32_t rom_length; + uint32_t rom_crc; + uint8_t rom_rev; + uint8_t rom_format; + uint8_t byte_lanes; + int32_t directory_offset; + + /* ROM */ + + MemoryRegion rom_io; + const uint8_t *rom; +}; + +void nubus_register_rom(NubusDevice *dev, const uint8_t *rom, uint32_t size, + int revision, int format, uint8_t byte_lanes); + +#endif diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h new file mode 100644 index 000000000..4a0f5c21b --- /dev/null +++ b/include/hw/nvram/chrp_nvram.h @@ -0,0 +1,57 @@ +/* + * Common Hardware Reference Platform NVRAM functions. + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef CHRP_NVRAM_H +#define CHRP_NVRAM_H + +#include "qemu/bswap.h" + +/* OpenBIOS NVRAM partition */ +typedef struct { + uint8_t signature; + uint8_t checksum; + uint16_t len; /* Big endian, length divided by 16 */ + char name[12]; +} ChrpNvramPartHdr; + +#define CHRP_NVPART_SYSTEM 0x70 +#define CHRP_NVPART_FREE 0x7f + +static inline void +chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) +{ + unsigned int i, sum; + uint8_t *tmpptr; + + /* Length divided by 16 */ + header->len = cpu_to_be16(size >> 4); + + /* Checksum */ + tmpptr = (uint8_t *)header; + sum = *tmpptr; + for (i = 0; i < 14; i++) { + sum += tmpptr[2 + i]; + sum = (sum + ((sum & 0xff00) >> 8)) & 0xff; + } + header->checksum = sum & 0xff; +} + +/* chrp_nvram_create_system_partition() failure is fatal */ +int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); +int chrp_nvram_create_free_partition(uint8_t *data, int len); + +#endif diff --git a/include/hw/nvram/eeprom93xx.h b/include/hw/nvram/eeprom93xx.h new file mode 100644 index 000000000..8ba0e287f --- /dev/null +++ b/include/hw/nvram/eeprom93xx.h @@ -0,0 +1,40 @@ +/* + * QEMU EEPROM 93xx emulation + * + * Copyright (c) 2006-2007 Stefan Weil + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef EEPROM93XX_H +#define EEPROM93XX_H + +typedef struct _eeprom_t eeprom_t; + +/* Create a new EEPROM with (nwords * 2) bytes. */ +eeprom_t *eeprom93xx_new(DeviceState *dev, uint16_t nwords); + +/* Destroy an existing EEPROM. */ +void eeprom93xx_free(DeviceState *dev, eeprom_t *eeprom); + +/* Read from the EEPROM. */ +uint16_t eeprom93xx_read(eeprom_t *eeprom); + +/* Write to the EEPROM. */ +void eeprom93xx_write(eeprom_t *eeprom, int eecs, int eesk, int eedi); + +/* Get EEPROM data array. */ +uint16_t *eeprom93xx_data(eeprom_t *eeprom); + +#endif /* EEPROM93XX_H */ diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h new file mode 100644 index 000000000..8a9f5738b --- /dev/null +++ b/include/hw/nvram/fw_cfg.h @@ -0,0 +1,336 @@ +#ifndef FW_CFG_H +#define FW_CFG_H + +#include "exec/hwaddr.h" +#include "standard-headers/linux/qemu_fw_cfg.h" +#include "hw/sysbus.h" +#include "sysemu/dma.h" +#include "qom/object.h" + +#define TYPE_FW_CFG "fw_cfg" +#define TYPE_FW_CFG_IO "fw_cfg_io" +#define TYPE_FW_CFG_MEM "fw_cfg_mem" +#define TYPE_FW_CFG_DATA_GENERATOR_INTERFACE "fw_cfg-data-generator" + +OBJECT_DECLARE_SIMPLE_TYPE(FWCfgState, FW_CFG) +OBJECT_DECLARE_SIMPLE_TYPE(FWCfgIoState, FW_CFG_IO) +OBJECT_DECLARE_SIMPLE_TYPE(FWCfgMemState, FW_CFG_MEM) + +typedef struct FWCfgDataGeneratorClass FWCfgDataGeneratorClass; +DECLARE_CLASS_CHECKERS(FWCfgDataGeneratorClass, FW_CFG_DATA_GENERATOR, + TYPE_FW_CFG_DATA_GENERATOR_INTERFACE) + +struct FWCfgDataGeneratorClass { + /*< private >*/ + InterfaceClass parent_class; + /*< public >*/ + + /** + * get_data: + * @obj: the object implementing this interface + * @errp: pointer to a NULL-initialized error object + * + * Returns: reference to a byte array containing the data on success, + * or NULL on error. + * + * The caller should release the reference when no longer + * required. + */ + GByteArray *(*get_data)(Object *obj, Error **errp); +}; + +typedef struct fw_cfg_file FWCfgFile; + +#define FW_CFG_ORDER_OVERRIDE_VGA 70 +#define FW_CFG_ORDER_OVERRIDE_NIC 80 +#define FW_CFG_ORDER_OVERRIDE_USER 100 +#define FW_CFG_ORDER_OVERRIDE_DEVICE 110 + +void fw_cfg_set_order_override(FWCfgState *fw_cfg, int order); +void fw_cfg_reset_order_override(FWCfgState *fw_cfg); + +typedef struct FWCfgFiles { + uint32_t count; + FWCfgFile f[]; +} FWCfgFiles; + +typedef struct fw_cfg_dma_access FWCfgDmaAccess; + +typedef void (*FWCfgCallback)(void *opaque); +typedef void (*FWCfgWriteCallback)(void *opaque, off_t start, size_t len); + +struct FWCfgState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint16_t file_slots; + FWCfgEntry *entries[2]; + int *entry_order; + FWCfgFiles *files; + uint16_t cur_entry; + uint32_t cur_offset; + Notifier machine_ready; + + int fw_cfg_order_override; + + bool dma_enabled; + dma_addr_t dma_addr; + AddressSpace *dma_as; + MemoryRegion dma_iomem; + + /* restore during migration */ + bool acpi_mr_restore; + uint64_t table_mr_size; + uint64_t linker_mr_size; + uint64_t rsdp_mr_size; +}; + +struct FWCfgIoState { + /*< private >*/ + FWCfgState parent_obj; + /*< public >*/ + + MemoryRegion comb_iomem; +}; + +struct FWCfgMemState { + /*< private >*/ + FWCfgState parent_obj; + /*< public >*/ + + MemoryRegion ctl_iomem, data_iomem; + uint32_t data_width; + MemoryRegionOps wide_data_ops; +}; + +/** + * fw_cfg_add_bytes: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @data: pointer to start of item data + * @len: size of item data + * + * Add a new fw_cfg item, available by selecting the given key, as a raw + * "blob" of the given size. The data referenced by the starting pointer + * is only linked, NOT copied, into the data structure of the fw_cfg device. + */ +void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len); + +/** + * fw_cfg_add_string: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: NUL-terminated ascii string + * + * Add a new fw_cfg item, available by selecting the given key. The item + * data will consist of a dynamically allocated copy of the provided string, + * including its NUL terminator. + */ +void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value); + +/** + * fw_cfg_modify_string: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: NUL-terminated ascii string + * + * Replace the fw_cfg item available by selecting the given key. The new + * data will consist of a dynamically allocated copy of the provided string, + * including its NUL terminator. The data being replaced, assumed to have + * been dynamically allocated during an earlier call to either + * fw_cfg_add_string() or fw_cfg_modify_string(), is freed before returning. + */ +void fw_cfg_modify_string(FWCfgState *s, uint16_t key, const char *value); + +/** + * fw_cfg_add_i16: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 16-bit integer + * + * Add a new fw_cfg item, available by selecting the given key. The item + * data will consist of a dynamically allocated copy of the given 16-bit + * value, converted to little-endian representation. + */ +void fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value); + +/** + * fw_cfg_modify_i16: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 16-bit integer + * + * Replace the fw_cfg item available by selecting the given key. The new + * data will consist of a dynamically allocated copy of the given 16-bit + * value, converted to little-endian representation. The data being replaced, + * assumed to have been dynamically allocated during an earlier call to + * either fw_cfg_add_i16() or fw_cfg_modify_i16(), is freed before returning. + */ +void fw_cfg_modify_i16(FWCfgState *s, uint16_t key, uint16_t value); + +/** + * fw_cfg_add_i32: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 32-bit integer + * + * Add a new fw_cfg item, available by selecting the given key. The item + * data will consist of a dynamically allocated copy of the given 32-bit + * value, converted to little-endian representation. + */ +void fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value); + +/** + * fw_cfg_modify_i32: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 32-bit integer + * + * Replace the fw_cfg item available by selecting the given key. The new + * data will consist of a dynamically allocated copy of the given 32-bit + * value, converted to little-endian representation. The data being replaced, + * assumed to have been dynamically allocated during an earlier call to + * either fw_cfg_add_i32() or fw_cfg_modify_i32(), is freed before returning. + */ +void fw_cfg_modify_i32(FWCfgState *s, uint16_t key, uint32_t value); + +/** + * fw_cfg_add_i64: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 64-bit integer + * + * Add a new fw_cfg item, available by selecting the given key. The item + * data will consist of a dynamically allocated copy of the given 64-bit + * value, converted to little-endian representation. + */ +void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value); + +/** + * fw_cfg_modify_i64: + * @s: fw_cfg device being modified + * @key: selector key value for new fw_cfg item + * @value: 64-bit integer + * + * Replace the fw_cfg item available by selecting the given key. The new + * data will consist of a dynamically allocated copy of the given 64-bit + * value, converted to little-endian representation. The data being replaced, + * assumed to have been dynamically allocated during an earlier call to + * either fw_cfg_add_i64() or fw_cfg_modify_i64(), is freed before returning. + */ +void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value); + +/** + * fw_cfg_add_file: + * @s: fw_cfg device being modified + * @filename: name of new fw_cfg file item + * @data: pointer to start of item data + * @len: size of item data + * + * Add a new NAMED fw_cfg item as a raw "blob" of the given size. The data + * referenced by the starting pointer is only linked, NOT copied, into the + * data structure of the fw_cfg device. + * The next available (unused) selector key starting at FW_CFG_FILE_FIRST + * will be used; also, a new entry will be added to the file directory + * structure residing at key value FW_CFG_FILE_DIR, containing the item name, + * data size, and assigned selector key value. + */ +void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data, + size_t len); + +/** + * fw_cfg_add_file_callback: + * @s: fw_cfg device being modified + * @filename: name of new fw_cfg file item + * @select_cb: callback function when selecting + * @write_cb: callback function after a write + * @callback_opaque: argument to be passed into callback function + * @data: pointer to start of item data + * @len: size of item data + * @read_only: is file read only + * + * Add a new NAMED fw_cfg item as a raw "blob" of the given size. The data + * referenced by the starting pointer is only linked, NOT copied, into the + * data structure of the fw_cfg device. + * The next available (unused) selector key starting at FW_CFG_FILE_FIRST + * will be used; also, a new entry will be added to the file directory + * structure residing at key value FW_CFG_FILE_DIR, containing the item name, + * data size, and assigned selector key value. + * Additionally, set a callback function (and argument) to be called each + * time this item is selected (by having its selector key either written to + * the fw_cfg control register, or passed to QEMU in FWCfgDmaAccess.control + * with FW_CFG_DMA_CTL_SELECT). + */ +void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, + FWCfgCallback select_cb, + FWCfgWriteCallback write_cb, + void *callback_opaque, + void *data, size_t len, bool read_only); + +/** + * fw_cfg_modify_file: + * @s: fw_cfg device being modified + * @filename: name of new fw_cfg file item + * @data: pointer to start of item data + * @len: size of item data + * + * Replace a NAMED fw_cfg item. If an existing item is found, its callback + * information will be cleared, and a pointer to its data will be returned + * to the caller, so that it may be freed if necessary. If an existing item + * is not found, this call defaults to fw_cfg_add_file(), and NULL is + * returned to the caller. + * In either case, the new item data is only linked, NOT copied, into the + * data structure of the fw_cfg device. + * + * Returns: pointer to old item's data, or NULL if old item does not exist. + */ +void *fw_cfg_modify_file(FWCfgState *s, const char *filename, void *data, + size_t len); + +/** + * fw_cfg_add_from_generator: + * @s: fw_cfg device being modified + * @filename: name of new fw_cfg file item + * @gen_id: name of object implementing FW_CFG_DATA_GENERATOR interface + * @errp: pointer to a NULL initialized error object + * + * Add a new NAMED fw_cfg item with the content generated from the + * @gen_id object. The data generated by the @gen_id object is copied + * into the data structure of the fw_cfg device. + * The next available (unused) selector key starting at FW_CFG_FILE_FIRST + * will be used; also, a new entry will be added to the file directory + * structure residing at key value FW_CFG_FILE_DIR, containing the item name, + * data size, and assigned selector key value. + * + * Returns: %true on success, %false on error. + */ +bool fw_cfg_add_from_generator(FWCfgState *s, const char *filename, + const char *gen_id, Error **errp); + +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase, + AddressSpace *dma_as); +FWCfgState *fw_cfg_init_io(uint32_t iobase); +FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr); +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, + hwaddr data_addr, uint32_t data_width, + hwaddr dma_addr, AddressSpace *dma_as); + +FWCfgState *fw_cfg_find(void); +bool fw_cfg_dma_enabled(void *opaque); + +/** + * fw_cfg_arch_key_name: + * + * @key: The uint16 selector key. + * + * The key is architecture-specific (the FW_CFG_ARCH_LOCAL mask is expected + * to be set in the key). + * + * Returns: The stringified architecture-specific name if the selector + * refers to a well-known numerically defined item, or NULL on + * key lookup failure. + */ +const char *fw_cfg_arch_key_name(uint16_t key); + +#endif diff --git a/include/hw/nvram/npcm7xx_otp.h b/include/hw/nvram/npcm7xx_otp.h new file mode 100644 index 000000000..156bbd151 --- /dev/null +++ b/include/hw/nvram/npcm7xx_otp.h @@ -0,0 +1,79 @@ +/* + * Nuvoton NPCM7xx OTP (Fuse Array) Interface + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_OTP_H +#define NPCM7XX_OTP_H + +#include "exec/memory.h" +#include "hw/sysbus.h" + +/* Each OTP module holds 8192 bits of one-time programmable storage */ +#define NPCM7XX_OTP_ARRAY_BITS (8192) +#define NPCM7XX_OTP_ARRAY_BYTES (NPCM7XX_OTP_ARRAY_BITS / BITS_PER_BYTE) + +/* Fuse array offsets */ +#define NPCM7XX_FUSE_FUSTRAP (0) +#define NPCM7XX_FUSE_CP_FUSTRAP (12) +#define NPCM7XX_FUSE_DAC_CALIB (16) +#define NPCM7XX_FUSE_ADC_CALIB (24) +#define NPCM7XX_FUSE_DERIVATIVE (64) +#define NPCM7XX_FUSE_TEST_SIG (72) +#define NPCM7XX_FUSE_DIE_LOCATION (74) +#define NPCM7XX_FUSE_GP1 (80) +#define NPCM7XX_FUSE_GP2 (128) + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_OTP_NR_REGS (0x18 / sizeof(uint32_t)) + +/** + * struct NPCM7xxOTPState - Device state for one OTP module. + * @parent: System bus device. + * @mmio: Memory region through which registers are accessed. + * @regs: Register contents. + * @array: OTP storage array. + */ +typedef struct NPCM7xxOTPState { + SysBusDevice parent; + + MemoryRegion mmio; + uint32_t regs[NPCM7XX_OTP_NR_REGS]; + uint8_t array[NPCM7XX_OTP_ARRAY_BYTES]; +} NPCM7xxOTPState; + +#define TYPE_NPCM7XX_OTP "npcm7xx-otp" +#define NPCM7XX_OTP(obj) OBJECT_CHECK(NPCM7xxOTPState, (obj), TYPE_NPCM7XX_OTP) + +#define TYPE_NPCM7XX_KEY_STORAGE "npcm7xx-key-storage" +#define TYPE_NPCM7XX_FUSE_ARRAY "npcm7xx-fuse-array" + +typedef struct NPCM7xxOTPClass NPCM7xxOTPClass; + +/** + * npcm7xx_otp_array_write - ECC encode and write data to OTP array. + * @s: OTP module. + * @data: Data to be encoded and written. + * @offset: Offset of first byte to be written in the OTP array. + * @len: Number of bytes before ECC encoding. + * + * Each nibble of data is encoded into a byte, so the number of bytes written + * to the array will be @len * 2. + */ +extern void npcm7xx_otp_array_write(NPCM7xxOTPState *s, const void *data, + unsigned int offset, unsigned int len); + +#endif /* NPCM7XX_OTP_H */ diff --git a/include/hw/nvram/nrf51_nvm.h b/include/hw/nvram/nrf51_nvm.h new file mode 100644 index 000000000..d85e788df --- /dev/null +++ b/include/hw/nvram/nrf51_nvm.h @@ -0,0 +1,65 @@ +/* + * Nordic Semiconductor nRF51 non-volatile memory + * + * It provides an interface to erase regions in flash memory. + * Furthermore it provides the user and factory information registers. + * + * QEMU interface: + * + sysbus MMIO regions 0: NVMC peripheral registers + * + sysbus MMIO regions 1: FICR peripheral registers + * + sysbus MMIO regions 2: UICR peripheral registers + * + flash-size property: flash size in bytes. + * + * Accuracy of the peripheral model: + * + Code regions (MPU configuration) are disregarded. + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + * + */ +#ifndef NRF51_NVM_H +#define NRF51_NVM_H + +#include "hw/sysbus.h" +#include "qom/object.h" +#define TYPE_NRF51_NVM "nrf51_soc.nvm" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51NVMState, NRF51_NVM) + +#define NRF51_UICR_FIXTURE_SIZE 64 + +#define NRF51_NVMC_SIZE 0x1000 + +#define NRF51_NVMC_READY 0x400 +#define NRF51_NVMC_READY_READY 0x01 +#define NRF51_NVMC_CONFIG 0x504 +#define NRF51_NVMC_CONFIG_MASK 0x03 +#define NRF51_NVMC_CONFIG_WEN 0x01 +#define NRF51_NVMC_CONFIG_EEN 0x02 +#define NRF51_NVMC_ERASEPCR1 0x508 +#define NRF51_NVMC_ERASEPCR0 0x510 +#define NRF51_NVMC_ERASEALL 0x50C +#define NRF51_NVMC_ERASEUICR 0x514 +#define NRF51_NVMC_ERASE 0x01 + +#define NRF51_UICR_SIZE 0x100 + +struct NRF51NVMState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + MemoryRegion ficr; + MemoryRegion uicr; + MemoryRegion flash; + + uint32_t uicr_content[NRF51_UICR_FIXTURE_SIZE]; + uint32_t flash_size; + uint8_t *storage; + + uint32_t config; + +}; + + +#endif diff --git a/include/hw/nvram/sun_nvram.h b/include/hw/nvram/sun_nvram.h new file mode 100644 index 000000000..68eaa6030 --- /dev/null +++ b/include/hw/nvram/sun_nvram.h @@ -0,0 +1,34 @@ +#ifndef SUN_NVRAM_H +#define SUN_NVRAM_H + +/* Sun IDPROM structure at the end of NVRAM */ +/* from http://www.squirrel.com/squirrel/sun-nvram-hostid.faq.html */ +struct Sun_nvram { + uint8_t type; /* always 01 */ + uint8_t machine_id; /* first byte of host id (machine type) */ + uint8_t macaddr[6]; /* 6 byte ethernet address (first 3 bytes 08, 00, 20) */ + uint8_t date[4]; /* date of manufacture */ + uint8_t hostid[3]; /* remaining 3 bytes of host id (serial number) */ + uint8_t checksum; /* bitwise xor of previous bytes */ +}; + +static inline void +Sun_init_header(struct Sun_nvram *header, const uint8_t *macaddr, int machine_id) +{ + uint8_t tmp, *tmpptr; + unsigned int i; + + header->type = 1; + header->machine_id = machine_id & 0xff; + memcpy(&header->macaddr, macaddr, 6); + memcpy(&header->hostid , &macaddr[3], 3); + + /* Calculate checksum */ + tmp = 0; + tmpptr = (uint8_t *)header; + for (i = 0; i < 15; i++) + tmp ^= tmpptr[i]; + + header->checksum = tmp; +} +#endif /* SUN_NVRAM_H */ diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h new file mode 100644 index 000000000..f2f0a2738 --- /dev/null +++ b/include/hw/or-irq.h @@ -0,0 +1,51 @@ +/* + * QEMU IRQ/GPIO common code. + * + * Copyright (c) 2016 Alistair Francis . + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_OR_IRQ_H +#define HW_OR_IRQ_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_OR_IRQ "or-irq" + +/* This can safely be increased if necessary without breaking + * migration compatibility (as long as it remains greater than 15). + */ +#define MAX_OR_LINES 48 + +typedef struct OrIRQState qemu_or_irq; + +DECLARE_INSTANCE_CHECKER(qemu_or_irq, OR_IRQ, + TYPE_OR_IRQ) + +struct OrIRQState { + DeviceState parent_obj; + + qemu_irq out_irq; + bool levels[MAX_OR_LINES]; + uint16_t num_lines; +}; + +#endif diff --git a/include/hw/pci-bridge/simba.h b/include/hw/pci-bridge/simba.h new file mode 100644 index 000000000..979cb1743 --- /dev/null +++ b/include/hw/pci-bridge/simba.h @@ -0,0 +1,42 @@ +/* + * QEMU Simba PCI bridge + * + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2012,2013 Artyom Tarasenko + * Copyright (c) 2017 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_PCI_BRIDGE_SIMBA_H +#define HW_PCI_BRIDGE_SIMBA_H + +#include "hw/pci/pci_bridge.h" +#include "qom/object.h" + + +struct SimbaPCIBridge { + /*< private >*/ + PCIBridge parent_obj; +}; + +#define TYPE_SIMBA_PCI_BRIDGE "pbm-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(SimbaPCIBridge, SIMBA_PCI_BRIDGE) + +#endif diff --git a/include/hw/pci-host/designware.h b/include/hw/pci-host/designware.h new file mode 100644 index 000000000..6d9b51ae6 --- /dev/null +++ b/include/hw/pci-host/designware.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017, Impinj, Inc. + * + * Designware PCIe IP block emulation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * . + */ + +#ifndef DESIGNWARE_H +#define DESIGNWARE_H + +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pci_bridge.h" +#include "qom/object.h" + +#define TYPE_DESIGNWARE_PCIE_HOST "designware-pcie-host" +OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIEHost, DESIGNWARE_PCIE_HOST) + +#define TYPE_DESIGNWARE_PCIE_ROOT "designware-pcie-root" +OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIERoot, DESIGNWARE_PCIE_ROOT) + +struct DesignwarePCIERoot; + +typedef struct DesignwarePCIEViewport { + DesignwarePCIERoot *root; + + MemoryRegion cfg; + MemoryRegion mem; + + uint64_t base; + uint64_t target; + uint32_t limit; + uint32_t cr[2]; + + bool inbound; +} DesignwarePCIEViewport; + +typedef struct DesignwarePCIEMSIBank { + uint32_t enable; + uint32_t mask; + uint32_t status; +} DesignwarePCIEMSIBank; + +typedef struct DesignwarePCIEMSI { + uint64_t base; + MemoryRegion iomem; + +#define DESIGNWARE_PCIE_NUM_MSI_BANKS 1 + + DesignwarePCIEMSIBank intr[DESIGNWARE_PCIE_NUM_MSI_BANKS]; +} DesignwarePCIEMSI; + +struct DesignwarePCIERoot { + PCIBridge parent_obj; + + uint32_t atu_viewport; + +#define DESIGNWARE_PCIE_VIEWPORT_OUTBOUND 0 +#define DESIGNWARE_PCIE_VIEWPORT_INBOUND 1 +#define DESIGNWARE_PCIE_NUM_VIEWPORTS 4 + + DesignwarePCIEViewport viewports[2][DESIGNWARE_PCIE_NUM_VIEWPORTS]; + DesignwarePCIEMSI msi; +}; + +struct DesignwarePCIEHost { + PCIHostState parent_obj; + + DesignwarePCIERoot root; + + struct { + AddressSpace address_space; + MemoryRegion address_space_root; + + MemoryRegion memory; + MemoryRegion io; + + qemu_irq irqs[4]; + } pci; + + MemoryRegion mmio; +}; + +#endif /* DESIGNWARE_H */ diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h new file mode 100644 index 000000000..d52ea80d4 --- /dev/null +++ b/include/hw/pci-host/gpex.h @@ -0,0 +1,68 @@ +/* + * QEMU Generic PCI Express Bridge Emulation + * + * Copyright (C) 2015 Alexander Graf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + */ + +#ifndef HW_GPEX_H +#define HW_GPEX_H + +#include "exec/hwaddr.h" +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie_host.h" +#include "qom/object.h" + +#define TYPE_GPEX_HOST "gpex-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(GPEXHost, GPEX_HOST) + +#define TYPE_GPEX_ROOT_DEVICE "gpex-root" +OBJECT_DECLARE_SIMPLE_TYPE(GPEXRootState, GPEX_ROOT_DEVICE) + +#define GPEX_NUM_IRQS 4 + +struct GPEXRootState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ +}; + +struct GPEXHost { + /*< private >*/ + PCIExpressHost parent_obj; + /*< public >*/ + + GPEXRootState gpex_root; + + MemoryRegion io_ioport; + MemoryRegion io_mmio; + qemu_irq irq[GPEX_NUM_IRQS]; + int irq_num[GPEX_NUM_IRQS]; +}; + +struct GPEXConfig { + MemMapEntry ecam; + MemMapEntry mmio32; + MemMapEntry mmio64; + MemMapEntry pio; + int irq; +}; + +int gpex_set_irq_num(GPEXHost *s, int index, int gsi); + +void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg); + +#endif /* HW_GPEX_H */ diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h new file mode 100644 index 000000000..6c16eaf87 --- /dev/null +++ b/include/hw/pci-host/i440fx.h @@ -0,0 +1,51 @@ +/* + * QEMU i440FX North Bridge Emulation + * + * Copyright (c) 2006 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_PCI_I440FX_H +#define HW_PCI_I440FX_H + +#include "hw/hw.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci-host/pam.h" +#include "qom/object.h" + +#define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" +#define TYPE_I440FX_PCI_DEVICE "i440FX" + +OBJECT_DECLARE_SIMPLE_TYPE(PCII440FXState, I440FX_PCI_DEVICE) + +struct PCII440FXState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion *system_memory; + MemoryRegion *pci_address_space; + MemoryRegion *ram_memory; + PAMMemoryRegion pam_regions[13]; + MemoryRegion smram_region; + MemoryRegion smram, low_smram; +}; + +#define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" + +PCIBus *i440fx_init(const char *host_type, const char *pci_type, + PCII440FXState **pi440fx_state, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + ram_addr_t ram_size, + ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size, + MemoryRegion *pci_memory, + MemoryRegion *ram_memory); + +PCIBus *find_i440fx(void); + +#endif diff --git a/include/hw/pci-host/pam.h b/include/hw/pci-host/pam.h new file mode 100644 index 000000000..fec5cd35d --- /dev/null +++ b/include/hw/pci-host/pam.h @@ -0,0 +1,92 @@ +#ifndef QEMU_PAM_H +#define QEMU_PAM_H + +/* + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2011 Isaku Yamahata + * VA Linux Systems Japan K.K. + * Copyright (c) 2012 Jason Baron + * + * Split out from piix.c + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * SMRAM memory area and PAM memory area in Legacy address range for PC. + * PAM: Programmable Attribute Map registers + * + * 0xa0000 - 0xbffff compatible SMRAM + * + * 0xc0000 - 0xc3fff Expansion area memory segments + * 0xc4000 - 0xc7fff + * 0xc8000 - 0xcbfff + * 0xcc000 - 0xcffff + * 0xd0000 - 0xd3fff + * 0xd4000 - 0xd7fff + * 0xd8000 - 0xdbfff + * 0xdc000 - 0xdffff + * 0xe0000 - 0xe3fff Extended System BIOS Area Memory Segments + * 0xe4000 - 0xe7fff + * 0xe8000 - 0xebfff + * 0xec000 - 0xeffff + * + * 0xf0000 - 0xfffff System BIOS Area Memory Segments + */ + +#include "exec/memory.h" + +#define SMRAM_C_BASE 0xa0000 +#define SMRAM_C_END 0xc0000 +#define SMRAM_C_SIZE 0x20000 + +#define PAM_EXPAN_BASE 0xc0000 +#define PAM_EXPAN_SIZE 0x04000 + +#define PAM_EXBIOS_BASE 0xe0000 +#define PAM_EXBIOS_SIZE 0x04000 + +#define PAM_BIOS_BASE 0xf0000 +#define PAM_BIOS_END 0xfffff +/* 64KB: Intel 3 series express chipset family p. 58*/ +#define PAM_BIOS_SIZE 0x10000 + +/* PAM registers: log nibble and high nibble*/ +#define PAM_ATTR_WE ((uint8_t)2) +#define PAM_ATTR_RE ((uint8_t)1) +#define PAM_ATTR_MASK ((uint8_t)3) + +/* SMRAM register */ +#define SMRAM_D_OPEN ((uint8_t)(1 << 6)) +#define SMRAM_D_CLS ((uint8_t)(1 << 5)) +#define SMRAM_D_LCK ((uint8_t)(1 << 4)) +#define SMRAM_G_SMRAME ((uint8_t)(1 << 3)) +#define SMRAM_C_BASE_SEG_MASK ((uint8_t)0x7) +#define SMRAM_C_BASE_SEG ((uint8_t)0x2) /* hardwired to b010 */ + +typedef struct PAMMemoryRegion { + MemoryRegion alias[4]; /* index = PAM value */ + unsigned current; +} PAMMemoryRegion; + +void init_pam(DeviceState *dev, MemoryRegion *ram, MemoryRegion *system, + MemoryRegion *pci, PAMMemoryRegion *mem, uint32_t start, uint32_t size); +void pam_update(PAMMemoryRegion *mem, int idx, uint8_t val); + +#endif /* QEMU_PAM_H */ diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h new file mode 100644 index 000000000..e2a2e3624 --- /dev/null +++ b/include/hw/pci-host/pnv_phb3.h @@ -0,0 +1,167 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB3_H +#define PCI_HOST_PNV_PHB3_H + +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/xics.h" +#include "qom/object.h" + +typedef struct PnvPHB3 PnvPHB3; + +/* + * PHB3 XICS Source for MSIs + */ +#define TYPE_PHB3_MSI "phb3-msi" +typedef struct Phb3MsiState Phb3MsiState; +DECLARE_INSTANCE_CHECKER(Phb3MsiState, PHB3_MSI, + TYPE_PHB3_MSI) + +#define PHB3_MAX_MSI 2048 + +struct Phb3MsiState { + ICSState ics; + qemu_irq *qirqs; + + PnvPHB3 *phb; + uint64_t rba[PHB3_MAX_MSI / 64]; + uint32_t rba_sum; +}; + +void pnv_phb3_msi_update_config(Phb3MsiState *msis, uint32_t base, + uint32_t count); +void pnv_phb3_msi_send(Phb3MsiState *msis, uint64_t addr, uint16_t data, + int32_t dev_pe); +void pnv_phb3_msi_ffi(Phb3MsiState *msis, uint64_t val); +void pnv_phb3_msi_pic_print_info(Phb3MsiState *msis, Monitor *mon); + + +/* + * We have one such address space wrapper per possible device under + * the PHB since they need to be assigned statically at qemu device + * creation time. The relationship to a PE is done later dynamically. + * This means we can potentially create a lot of these guys. Q35 + * stores them as some kind of radix tree but we never really need to + * do fast lookups so instead we simply keep a QLIST of them for now, + * we can add the radix if needed later on. + * + * We do cache the PE number to speed things up a bit though. + */ +typedef struct PnvPhb3DMASpace { + PCIBus *bus; + uint8_t devfn; + int pe_num; /* Cached PE number */ +#define PHB_INVALID_PE (-1) + PnvPHB3 *phb; + AddressSpace dma_as; + IOMMUMemoryRegion dma_mr; + MemoryRegion msi32_mr; + MemoryRegion msi64_mr; + QLIST_ENTRY(PnvPhb3DMASpace) list; +} PnvPhb3DMASpace; + +/* + * PHB3 Power Bus Common Queue + */ +#define TYPE_PNV_PBCQ "pnv-pbcq" +OBJECT_DECLARE_SIMPLE_TYPE(PnvPBCQState, PNV_PBCQ) + +struct PnvPBCQState { + DeviceState parent; + + uint32_t nest_xbase; + uint32_t spci_xbase; + uint32_t pci_xbase; +#define PBCQ_NEST_REGS_COUNT 0x46 +#define PBCQ_PCI_REGS_COUNT 0x15 +#define PBCQ_SPCI_REGS_COUNT 0x5 + + uint64_t nest_regs[PBCQ_NEST_REGS_COUNT]; + uint64_t spci_regs[PBCQ_SPCI_REGS_COUNT]; + uint64_t pci_regs[PBCQ_PCI_REGS_COUNT]; + MemoryRegion mmbar0; + MemoryRegion mmbar1; + MemoryRegion phbbar; + uint64_t mmio0_base; + uint64_t mmio0_size; + uint64_t mmio1_base; + uint64_t mmio1_size; + PnvPHB3 *phb; + + MemoryRegion xscom_nest_regs; + MemoryRegion xscom_pci_regs; + MemoryRegion xscom_spci_regs; +}; + +/* + * PHB3 PCIe Root port + */ +#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus" + +#define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port" + +typedef struct PnvPHB3RootPort { + PCIESlot parent_obj; +} PnvPHB3RootPort; + +/* + * PHB3 PCIe Host Bridge for PowerNV machines (POWER8) + */ +#define TYPE_PNV_PHB3 "pnv-phb3" +OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB3, PNV_PHB3) + +#define PNV_PHB3_NUM_M64 16 +#define PNV_PHB3_NUM_REGS (0x1000 >> 3) +#define PNV_PHB3_NUM_LSI 8 +#define PNV_PHB3_NUM_PE 256 + +#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60) + +struct PnvPHB3 { + PCIExpressHost parent_obj; + + uint32_t chip_id; + uint32_t phb_id; + char bus_path[8]; + + uint64_t regs[PNV_PHB3_NUM_REGS]; + MemoryRegion mr_regs; + + MemoryRegion mr_m32; + MemoryRegion mr_m64[PNV_PHB3_NUM_M64]; + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + uint64_t ioda_LIST[8]; + uint64_t ioda_LXIVT[8]; + uint64_t ioda_TVT[512]; + uint64_t ioda_M64BT[16]; + uint64_t ioda_MDT[256]; + uint64_t ioda_PEEV[4]; + + uint32_t total_irq; + ICSState lsis; + qemu_irq *qirqs; + Phb3MsiState msis; + + PnvPBCQState pbcq; + + PnvPHB3RootPort root; + + QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces; +}; + +uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size); +void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size); +void pnv_phb3_update_regions(PnvPHB3 *phb); +void pnv_phb3_remap_irqs(PnvPHB3 *phb); + +#endif /* PCI_HOST_PNV_PHB3_H */ diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h new file mode 100644 index 000000000..a174ef1f7 --- /dev/null +++ b/include/hw/pci-host/pnv_phb3_regs.h @@ -0,0 +1,450 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2013-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB3_REGS_H +#define PCI_HOST_PNV_PHB3_REGS_H + +#include "qemu/host-utils.h" + +/* + * QEMU version of the GETFIELD/SETFIELD macros + * + * These are common with the PnvXive model. + */ +static inline uint64_t GETFIELD(uint64_t mask, uint64_t word) +{ + return (word & mask) >> ctz64(mask); +} + +static inline uint64_t SETFIELD(uint64_t mask, uint64_t word, + uint64_t value) +{ + return (word & ~mask) | ((value << ctz64(mask)) & mask); +} + +/* + * PBCQ XSCOM registers + */ + +#define PBCQ_NEST_IRSN_COMPARE 0x1a +#define PBCQ_NEST_IRSN_COMP PPC_BITMASK(0, 18) +#define PBCQ_NEST_IRSN_MASK 0x1b +#define PBCQ_NEST_LSI_SRC_ID 0x1f +#define PBCQ_NEST_LSI_SRC PPC_BITMASK(0, 7) +#define PBCQ_NEST_REGS_COUNT 0x46 +#define PBCQ_NEST_MMIO_BAR0 0x40 +#define PBCQ_NEST_MMIO_BAR1 0x41 +#define PBCQ_NEST_PHB_BAR 0x42 +#define PBCQ_NEST_MMIO_MASK0 0x43 +#define PBCQ_NEST_MMIO_MASK1 0x44 +#define PBCQ_NEST_BAR_EN 0x45 +#define PBCQ_NEST_BAR_EN_MMIO0 PPC_BIT(0) +#define PBCQ_NEST_BAR_EN_MMIO1 PPC_BIT(1) +#define PBCQ_NEST_BAR_EN_PHB PPC_BIT(2) +#define PBCQ_NEST_BAR_EN_IRSN_RX PPC_BIT(3) +#define PBCQ_NEST_BAR_EN_IRSN_TX PPC_BIT(4) + +#define PBCQ_PCI_REGS_COUNT 0x15 +#define PBCQ_PCI_BAR2 0x0b + +#define PBCQ_SPCI_REGS_COUNT 0x5 +#define PBCQ_SPCI_ASB_ADDR 0x0 +#define PBCQ_SPCI_ASB_STATUS 0x1 +#define PBCQ_SPCI_ASB_DATA 0x2 +#define PBCQ_SPCI_AIB_CAPP_EN 0x3 +#define PBCQ_SPCI_CAPP_SEC_TMR 0x4 + +/* + * PHB MMIO registers + */ + +/* PHB Fundamental register set A */ +#define PHB_LSI_SOURCE_ID 0x100 +#define PHB_LSI_SRC_ID PPC_BITMASK(5, 12) +#define PHB_DMA_CHAN_STATUS 0x110 +#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27) +#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28) +#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29) +#define PHB_CPU_LOADSTORE_STATUS 0x120 +#define PHB_CPU_LS_ANY_ERR PPC_BIT(27) +#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28) +#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29) +#define PHB_DMA_MSI_NODE_ID 0x128 +#define PHB_DMAMSI_NID_FIXED PPC_BIT(0) +#define PHB_DMAMSI_NID PPC_BITMASK(24, 31) +#define PHB_CONFIG_DATA 0x130 +#define PHB_LOCK0 0x138 +#define PHB_CONFIG_ADDRESS 0x140 +#define PHB_CA_ENABLE PPC_BIT(0) +#define PHB_CA_BUS PPC_BITMASK(4, 11) +#define PHB_CA_DEV PPC_BITMASK(12, 16) +#define PHB_CA_FUNC PPC_BITMASK(17, 19) +#define PHB_CA_REG PPC_BITMASK(20, 31) +#define PHB_CA_PE PPC_BITMASK(40, 47) +#define PHB_LOCK1 0x148 +#define PHB_IVT_BAR 0x150 +#define PHB_IVT_BAR_ENABLE PPC_BIT(0) +#define PHB_IVT_BASE_ADDRESS_MASK PPC_BITMASK(14, 48) +#define PHB_IVT_LENGTH_MASK PPC_BITMASK(52, 63) +#define PHB_RBA_BAR 0x158 +#define PHB_RBA_BAR_ENABLE PPC_BIT(0) +#define PHB_RBA_BASE_ADDRESS PPC_BITMASK(14, 55) +#define PHB_PHB3_CONFIG 0x160 +#define PHB_PHB3C_64B_TCE_EN PPC_BIT(2) +#define PHB_PHB3C_32BIT_MSI_EN PPC_BIT(8) +#define PHB_PHB3C_64BIT_MSI_EN PPC_BIT(14) +#define PHB_PHB3C_M32_EN PPC_BIT(16) +#define PHB_RTT_BAR 0x168 +#define PHB_RTT_BAR_ENABLE PPC_BIT(0) +#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(14, 46) +#define PHB_PELTV_BAR 0x188 +#define PHB_PELTV_BAR_ENABLE PPC_BIT(0) +#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(14, 50) +#define PHB_M32_BASE_ADDR 0x190 +#define PHB_M32_BASE_MASK 0x198 +#define PHB_M32_START_ADDR 0x1a0 +#define PHB_PEST_BAR 0x1a8 +#define PHB_PEST_BAR_ENABLE PPC_BIT(0) +#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(14, 51) +#define PHB_M64_UPPER_BITS 0x1f0 +#define PHB_INTREP_TIMER 0x1f8 +#define PHB_DMARD_SYNC 0x200 +#define PHB_DMARD_SYNC_START PPC_BIT(0) +#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1) +#define PHB_RTC_INVALIDATE 0x208 +#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31) +#define PHB_TCE_KILL 0x210 +#define PHB_TCE_KILL_ALL PPC_BIT(0) +#define PHB_TCE_SPEC_CTL 0x218 +#define PHB_IODA_ADDR 0x220 +#define PHB_IODA_AD_AUTOINC PPC_BIT(0) +#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15) +#define PHB_IODA_AD_TADR PPC_BITMASK(55, 63) +#define PHB_IODA_DATA0 0x228 +#define PHB_FFI_REQUEST 0x238 +#define PHB_FFI_LOCK_CLEAR PPC_BIT(3) +#define PHB_FFI_REQUEST_ISN PPC_BITMASK(49, 59) +#define PHB_FFI_LOCK 0x240 +#define PHB_FFI_LOCK_STATE PPC_BIT(0) +#define PHB_XIVE_UPDATE 0x248 /* Broken in DD1 */ +#define PHB_PHB3_GEN_CAP 0x250 +#define PHB_PHB3_TCE_CAP 0x258 +#define PHB_PHB3_IRQ_CAP 0x260 +#define PHB_PHB3_EEH_CAP 0x268 +#define PHB_IVC_INVALIDATE 0x2a0 +#define PHB_IVC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_IVC_INVALIDATE_SID PPC_BITMASK(16, 31) +#define PHB_IVC_UPDATE 0x2a8 +#define PHB_IVC_UPDATE_ENABLE_P PPC_BIT(0) +#define PHB_IVC_UPDATE_ENABLE_Q PPC_BIT(1) +#define PHB_IVC_UPDATE_ENABLE_SERVER PPC_BIT(2) +#define PHB_IVC_UPDATE_ENABLE_PRI PPC_BIT(3) +#define PHB_IVC_UPDATE_ENABLE_GEN PPC_BIT(4) +#define PHB_IVC_UPDATE_ENABLE_CON PPC_BIT(5) +#define PHB_IVC_UPDATE_GEN_MATCH PPC_BITMASK(6, 7) +#define PHB_IVC_UPDATE_SERVER PPC_BITMASK(8, 23) +#define PHB_IVC_UPDATE_PRI PPC_BITMASK(24, 31) +#define PHB_IVC_UPDATE_GEN PPC_BITMASK(32, 33) +#define PHB_IVC_UPDATE_P PPC_BITMASK(34, 34) +#define PHB_IVC_UPDATE_Q PPC_BITMASK(35, 35) +#define PHB_IVC_UPDATE_SID PPC_BITMASK(48, 63) +#define PHB_PAPR_ERR_INJ_CTL 0x2b0 +#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0) +#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1) +#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2) +#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3) +#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4) +#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5) +#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6) +#define PHB_PAPR_ERR_INJ_ADDR 0x2b8 +#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63) +#define PHB_PAPR_ERR_INJ_MASK 0x2c0 +#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11) +#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63) +#define PHB_ETU_ERR_SUMMARY 0x2c8 + +/* UTL registers */ +#define UTL_SYS_BUS_CONTROL 0x400 +#define UTL_STATUS 0x408 +#define UTL_SYS_BUS_AGENT_STATUS 0x410 +#define UTL_SYS_BUS_AGENT_ERR_SEVERITY 0x418 +#define UTL_SYS_BUS_AGENT_IRQ_EN 0x420 +#define UTL_SYS_BUS_BURST_SZ_CONF 0x440 +#define UTL_REVISION_ID 0x448 +#define UTL_BCLK_DOMAIN_DBG1 0x460 +#define UTL_BCLK_DOMAIN_DBG2 0x468 +#define UTL_BCLK_DOMAIN_DBG3 0x470 +#define UTL_BCLK_DOMAIN_DBG4 0x478 +#define UTL_BCLK_DOMAIN_DBG5 0x480 +#define UTL_BCLK_DOMAIN_DBG6 0x488 +#define UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0 +#define UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0 +#define UTL_IN_POST_HDR_BUF_ALLOC 0x4e0 +#define UTL_IN_POST_DAT_BUF_ALLOC 0x4f0 +#define UTL_OUT_NP_BUF_ALLOC 0x500 +#define UTL_IN_NP_BUF_ALLOC 0x510 +#define UTL_PCIE_TAGS_ALLOC 0x520 +#define UTL_GBIF_READ_TAGS_ALLOC 0x530 +#define UTL_PCIE_PORT_CONTROL 0x540 +#define UTL_PCIE_PORT_STATUS 0x548 +#define UTL_PCIE_PORT_ERROR_SEV 0x550 +#define UTL_PCIE_PORT_IRQ_EN 0x558 +#define UTL_RC_STATUS 0x560 +#define UTL_RC_ERR_SEVERITY 0x568 +#define UTL_RC_IRQ_EN 0x570 +#define UTL_EP_STATUS 0x578 +#define UTL_EP_ERR_SEVERITY 0x580 +#define UTL_EP_ERR_IRQ_EN 0x588 +#define UTL_PCI_PM_CTRL1 0x590 +#define UTL_PCI_PM_CTRL2 0x598 +#define UTL_GP_CTL1 0x5a0 +#define UTL_GP_CTL2 0x5a8 +#define UTL_PCLK_DOMAIN_DBG1 0x5b0 +#define UTL_PCLK_DOMAIN_DBG2 0x5b8 +#define UTL_PCLK_DOMAIN_DBG3 0x5c0 +#define UTL_PCLK_DOMAIN_DBG4 0x5c8 + +/* PCI-E Stack registers */ +#define PHB_PCIE_SYSTEM_CONFIG 0x600 +#define PHB_PCIE_BUS_NUMBER 0x608 +#define PHB_PCIE_SYSTEM_TEST 0x618 +#define PHB_PCIE_LINK_MANAGEMENT 0x630 +#define PHB_PCIE_LM_LINK_ACTIVE PPC_BIT(8) +#define PHB_PCIE_DLP_TRAIN_CTL 0x640 +#define PHB_PCIE_DLP_TCTX_DISABLE PPC_BIT(1) +#define PHB_PCIE_DLP_TCRX_DISABLED PPC_BIT(16) +#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19) +#define PHB_PCIE_DLP_TC_DL_LINKUP PPC_BIT(21) +#define PHB_PCIE_DLP_TC_DL_PGRESET PPC_BIT(22) +#define PHB_PCIE_DLP_TC_DL_LINKACT PPC_BIT(23) +#define PHB_PCIE_SLOP_LOOPBACK_STATUS 0x648 +#define PHB_PCIE_SYS_LINK_INIT 0x668 +#define PHB_PCIE_UTL_CONFIG 0x670 +#define PHB_PCIE_DLP_CONTROL 0x678 +#define PHB_PCIE_UTL_ERRLOG1 0x680 +#define PHB_PCIE_UTL_ERRLOG2 0x688 +#define PHB_PCIE_UTL_ERRLOG3 0x690 +#define PHB_PCIE_UTL_ERRLOG4 0x698 +#define PHB_PCIE_DLP_ERRLOG1 0x6a0 +#define PHB_PCIE_DLP_ERRLOG2 0x6a8 +#define PHB_PCIE_DLP_ERR_STATUS 0x6b0 +#define PHB_PCIE_DLP_ERR_COUNTERS 0x6b8 +#define PHB_PCIE_UTL_ERR_INJECT 0x6c0 +#define PHB_PCIE_TLDLP_ERR_INJECT 0x6c8 +#define PHB_PCIE_LANE_EQ_CNTL0 0x6d0 +#define PHB_PCIE_LANE_EQ_CNTL1 0x6d8 +#define PHB_PCIE_LANE_EQ_CNTL2 0x6e0 +#define PHB_PCIE_LANE_EQ_CNTL3 0x6e8 +#define PHB_PCIE_STRAPPING 0x700 + +/* Fundamental register set B */ +#define PHB_VERSION 0x800 +#define PHB_RESET 0x808 +#define PHB_CONTROL 0x810 +#define PHB_CTRL_IVE_128_BYTES PPC_BIT(24) +#define PHB_AIB_RX_CRED_INIT_TIMER 0x818 +#define PHB_AIB_RX_CMD_CRED 0x820 +#define PHB_AIB_RX_DATA_CRED 0x828 +#define PHB_AIB_TX_CMD_CRED 0x830 +#define PHB_AIB_TX_DATA_CRED 0x838 +#define PHB_AIB_TX_CHAN_MAPPING 0x840 +#define PHB_AIB_TAG_ENABLE 0x858 +#define PHB_AIB_FENCE_CTRL 0x860 +#define PHB_TCE_TAG_ENABLE 0x868 +#define PHB_TCE_WATERMARK 0x870 +#define PHB_TIMEOUT_CTRL1 0x878 +#define PHB_TIMEOUT_CTRL2 0x880 +#define PHB_Q_DMA_R 0x888 +#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0) +#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1) +#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4) +#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5) +#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6) +#define PHB_AIB_TAG_STATUS 0x900 +#define PHB_TCE_TAG_STATUS 0x908 + +/* FIR & Error registers */ +#define PHB_LEM_FIR_ACCUM 0xc00 +#define PHB_LEM_FIR_AND_MASK 0xc08 +#define PHB_LEM_FIR_OR_MASK 0xc10 +#define PHB_LEM_ERROR_MASK 0xc18 +#define PHB_LEM_ERROR_AND_MASK 0xc20 +#define PHB_LEM_ERROR_OR_MASK 0xc28 +#define PHB_LEM_ACTION0 0xc30 +#define PHB_LEM_ACTION1 0xc38 +#define PHB_LEM_WOF 0xc40 +#define PHB_ERR_STATUS 0xc80 +#define PHB_ERR1_STATUS 0xc88 +#define PHB_ERR_INJECT 0xc90 +#define PHB_ERR_LEM_ENABLE 0xc98 +#define PHB_ERR_IRQ_ENABLE 0xca0 +#define PHB_ERR_FREEZE_ENABLE 0xca8 +#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0 +#define PHB_ERR_LOG_0 0xcc0 +#define PHB_ERR_LOG_1 0xcc8 +#define PHB_ERR_STATUS_MASK 0xcd0 +#define PHB_ERR1_STATUS_MASK 0xcd8 + +#define PHB_OUT_ERR_STATUS 0xd00 +#define PHB_OUT_ERR1_STATUS 0xd08 +#define PHB_OUT_ERR_INJECT 0xd10 +#define PHB_OUT_ERR_LEM_ENABLE 0xd18 +#define PHB_OUT_ERR_IRQ_ENABLE 0xd20 +#define PHB_OUT_ERR_FREEZE_ENABLE 0xd28 +#define PHB_OUT_ERR_AIB_FENCE_ENABLE 0xd30 +#define PHB_OUT_ERR_LOG_0 0xd40 +#define PHB_OUT_ERR_LOG_1 0xd48 +#define PHB_OUT_ERR_STATUS_MASK 0xd50 +#define PHB_OUT_ERR1_STATUS_MASK 0xd58 + +#define PHB_INA_ERR_STATUS 0xd80 +#define PHB_INA_ERR1_STATUS 0xd88 +#define PHB_INA_ERR_INJECT 0xd90 +#define PHB_INA_ERR_LEM_ENABLE 0xd98 +#define PHB_INA_ERR_IRQ_ENABLE 0xda0 +#define PHB_INA_ERR_FREEZE_ENABLE 0xda8 +#define PHB_INA_ERR_AIB_FENCE_ENABLE 0xdb0 +#define PHB_INA_ERR_LOG_0 0xdc0 +#define PHB_INA_ERR_LOG_1 0xdc8 +#define PHB_INA_ERR_STATUS_MASK 0xdd0 +#define PHB_INA_ERR1_STATUS_MASK 0xdd8 + +#define PHB_INB_ERR_STATUS 0xe00 +#define PHB_INB_ERR1_STATUS 0xe08 +#define PHB_INB_ERR_INJECT 0xe10 +#define PHB_INB_ERR_LEM_ENABLE 0xe18 +#define PHB_INB_ERR_IRQ_ENABLE 0xe20 +#define PHB_INB_ERR_FREEZE_ENABLE 0xe28 +#define PHB_INB_ERR_AIB_FENCE_ENABLE 0xe30 +#define PHB_INB_ERR_LOG_0 0xe40 +#define PHB_INB_ERR_LOG_1 0xe48 +#define PHB_INB_ERR_STATUS_MASK 0xe50 +#define PHB_INB_ERR1_STATUS_MASK 0xe58 + +/* Performance monitor & Debug registers */ +#define PHB_TRACE_CONTROL 0xf80 +#define PHB_PERFMON_CONFIG 0xf88 +#define PHB_PERFMON_CTR0 0xf90 +#define PHB_PERFMON_CTR1 0xf98 +#define PHB_PERFMON_CTR2 0xfa0 +#define PHB_PERFMON_CTR3 0xfa8 +#define PHB_HOTPLUG_OVERRIDE 0xfb0 +#define PHB_HPOVR_FORCE_RESAMPLE PPC_BIT(9) +#define PHB_HPOVR_PRESENCE_A PPC_BIT(10) +#define PHB_HPOVR_PRESENCE_B PPC_BIT(11) +#define PHB_HPOVR_LINK_ACTIVE PPC_BIT(12) +#define PHB_HPOVR_LINK_BIFURCATED PPC_BIT(13) +#define PHB_HPOVR_LINK_LANE_SWAPPED PPC_BIT(14) + +/* + * IODA2 on-chip tables + */ + +#define IODA2_TBL_LIST 1 +#define IODA2_TBL_LXIVT 2 +#define IODA2_TBL_IVC_CAM 3 +#define IODA2_TBL_RBA 4 +#define IODA2_TBL_RCAM 5 +#define IODA2_TBL_MRT 6 +#define IODA2_TBL_PESTA 7 +#define IODA2_TBL_PESTB 8 +#define IODA2_TBL_TVT 9 +#define IODA2_TBL_TCAM 10 +#define IODA2_TBL_TDR 11 +#define IODA2_TBL_M64BT 16 +#define IODA2_TBL_M32DT 17 +#define IODA2_TBL_PEEV 20 + +/* LXIVT */ +#define IODA2_LXIVT_SERVER PPC_BITMASK(8, 23) +#define IODA2_LXIVT_PRIORITY PPC_BITMASK(24, 31) +#define IODA2_LXIVT_NODE_ID PPC_BITMASK(56, 63) + +/* IVT */ +#define IODA2_IVT_SERVER PPC_BITMASK(0, 23) +#define IODA2_IVT_PRIORITY PPC_BITMASK(24, 31) +#define IODA2_IVT_GEN PPC_BITMASK(37, 38) +#define IODA2_IVT_P PPC_BITMASK(39, 39) +#define IODA2_IVT_Q PPC_BITMASK(47, 47) +#define IODA2_IVT_PE PPC_BITMASK(48, 63) + +/* TVT */ +#define IODA2_TVT_TABLE_ADDR PPC_BITMASK(0, 47) +#define IODA2_TVT_NUM_LEVELS PPC_BITMASK(48, 50) +#define IODA2_TVE_1_LEVEL 0 +#define IODA2_TVE_2_LEVELS 1 +#define IODA2_TVE_3_LEVELS 2 +#define IODA2_TVE_4_LEVELS 3 +#define IODA2_TVE_5_LEVELS 4 +#define IODA2_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55) +#define IODA2_TVT_IO_PSIZE PPC_BITMASK(59, 63) + +/* PESTA */ +#define IODA2_PESTA_MMIO_FROZEN PPC_BIT(0) + +/* PESTB */ +#define IODA2_PESTB_DMA_STOPPED PPC_BIT(0) + +/* M32DT */ +#define IODA2_M32DT_PE PPC_BITMASK(8, 15) + +/* M64BT */ +#define IODA2_M64BT_ENABLE PPC_BIT(0) +#define IODA2_M64BT_SINGLE_PE PPC_BIT(1) +#define IODA2_M64BT_BASE PPC_BITMASK(2, 31) +#define IODA2_M64BT_MASK PPC_BITMASK(34, 63) +#define IODA2_M64BT_SINGLE_BASE PPC_BITMASK(2, 26) +#define IODA2_M64BT_PE_HI PPC_BITMASK(27, 31) +#define IODA2_M64BT_SINGLE_MASK PPC_BITMASK(34, 58) +#define IODA2_M64BT_PE_LOW PPC_BITMASK(59, 63) + +/* + * IODA2 in-memory tables + */ + +/* + * PEST + * + * 2x8 bytes entries, PEST0 and PEST1 + */ + +#define IODA2_PEST0_MMIO_CAUSE PPC_BIT(2) +#define IODA2_PEST0_CFG_READ PPC_BIT(3) +#define IODA2_PEST0_CFG_WRITE PPC_BIT(4) +#define IODA2_PEST0_TTYPE PPC_BITMASK(5, 7) +#define PEST_TTYPE_DMA_WRITE 0 +#define PEST_TTYPE_MSI 1 +#define PEST_TTYPE_DMA_READ 2 +#define PEST_TTYPE_DMA_READ_RESP 3 +#define PEST_TTYPE_MMIO_LOAD 4 +#define PEST_TTYPE_MMIO_STORE 5 +#define PEST_TTYPE_OTHER 7 +#define IODA2_PEST0_CA_RETURN PPC_BIT(8) +#define IODA2_PEST0_UTL_RTOS_TIMEOUT PPC_BIT(8) /* Same bit as CA return */ +#define IODA2_PEST0_UR_RETURN PPC_BIT(9) +#define IODA2_PEST0_UTL_NONFATAL PPC_BIT(10) +#define IODA2_PEST0_UTL_FATAL PPC_BIT(11) +#define IODA2_PEST0_PARITY_UE PPC_BIT(13) +#define IODA2_PEST0_UTL_CORRECTABLE PPC_BIT(14) +#define IODA2_PEST0_UTL_INTERRUPT PPC_BIT(15) +#define IODA2_PEST0_MMIO_XLATE PPC_BIT(16) +#define IODA2_PEST0_IODA2_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */ +#define IODA2_PEST0_TCE_PAGE_FAULT PPC_BIT(18) +#define IODA2_PEST0_TCE_ACCESS_FAULT PPC_BIT(19) +#define IODA2_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20) +#define IODA2_PEST0_AIB_SIZE_INVALID PPC_BIT(21) +#define IODA2_PEST0_LEM_BIT PPC_BITMASK(26, 31) +#define IODA2_PEST0_RID PPC_BITMASK(32, 47) +#define IODA2_PEST0_MSI_DATA PPC_BITMASK(48, 63) + +#define IODA2_PEST1_FAIL_ADDR PPC_BITMASK(3, 63) + + +#endif /* PCI_HOST_PNV_PHB3_REGS_H */ diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h new file mode 100644 index 000000000..27556ae53 --- /dev/null +++ b/include/hw/pci-host/pnv_phb4.h @@ -0,0 +1,224 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB4_H +#define PCI_HOST_PNV_PHB4_H + +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/xive.h" +#include "qom/object.h" + +typedef struct PnvPhb4PecStack PnvPhb4PecStack; +typedef struct PnvPHB4 PnvPHB4; +typedef struct PnvChip PnvChip; + +/* + * We have one such address space wrapper per possible device under + * the PHB since they need to be assigned statically at qemu device + * creation time. The relationship to a PE is done later + * dynamically. This means we can potentially create a lot of these + * guys. Q35 stores them as some kind of radix tree but we never + * really need to do fast lookups so instead we simply keep a QLIST of + * them for now, we can add the radix if needed later on. + * + * We do cache the PE number to speed things up a bit though. + */ +typedef struct PnvPhb4DMASpace { + PCIBus *bus; + uint8_t devfn; + int pe_num; /* Cached PE number */ +#define PHB_INVALID_PE (-1) + PnvPHB4 *phb; + AddressSpace dma_as; + IOMMUMemoryRegion dma_mr; + MemoryRegion msi32_mr; + MemoryRegion msi64_mr; + QLIST_ENTRY(PnvPhb4DMASpace) list; +} PnvPhb4DMASpace; + +/* + * PHB4 PCIe Root port + */ +#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root-bus" +#define TYPE_PNV_PHB4_ROOT_PORT "pnv-phb4-root-port" + +typedef struct PnvPHB4RootPort { + PCIESlot parent_obj; +} PnvPHB4RootPort; + +/* + * PHB4 PCIe Host Bridge for PowerNV machines (POWER9) + */ +#define TYPE_PNV_PHB4 "pnv-phb4" +OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4, PNV_PHB4) + +#define PNV_PHB4_MAX_LSIs 8 +#define PNV_PHB4_MAX_INTs 4096 +#define PNV_PHB4_MAX_MIST (PNV_PHB4_MAX_INTs >> 2) +#define PNV_PHB4_MAX_MMIO_WINDOWS 32 +#define PNV_PHB4_MIN_MMIO_WINDOWS 16 +#define PNV_PHB4_NUM_REGS (0x3000 >> 3) +#define PNV_PHB4_MAX_PEs 512 +#define PNV_PHB4_MAX_TVEs (PNV_PHB4_MAX_PEs * 2) +#define PNV_PHB4_MAX_PEEVs (PNV_PHB4_MAX_PEs / 64) +#define PNV_PHB4_MAX_MBEs (PNV_PHB4_MAX_MMIO_WINDOWS * 2) + +#define PNV_PHB4_VERSION 0x000000a400000002ull +#define PNV_PHB4_DEVICE_ID 0x04c1 + +#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60) + +struct PnvPHB4 { + PCIExpressHost parent_obj; + + PnvPHB4RootPort root; + + uint32_t chip_id; + uint32_t phb_id; + + uint64_t version; + uint16_t device_id; + + char bus_path[8]; + + /* Main register images */ + uint64_t regs[PNV_PHB4_NUM_REGS]; + MemoryRegion mr_regs; + + /* Extra SCOM-only register */ + uint64_t scom_hv_ind_addr_reg; + + /* + * Geometry of the PHB. There are two types, small and big PHBs, a + * number of resources (number of PEs, windows etc...) are doubled + * for a big PHB + */ + bool big_phb; + + /* Memory regions for MMIO space */ + MemoryRegion mr_mmio[PNV_PHB4_MAX_MMIO_WINDOWS]; + + /* PCI side space */ + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + /* On-chip IODA tables */ + uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs]; + uint64_t ioda_MIST[PNV_PHB4_MAX_MIST]; + uint64_t ioda_TVT[PNV_PHB4_MAX_TVEs]; + uint64_t ioda_MBT[PNV_PHB4_MAX_MBEs]; + uint64_t ioda_MDT[PNV_PHB4_MAX_PEs]; + uint64_t ioda_PEEV[PNV_PHB4_MAX_PEEVs]; + + /* + * The internal PESTA/B is 2 bits per PE split into two tables, we + * store them in a single array here to avoid wasting space. + */ + uint8_t ioda_PEST_AB[PNV_PHB4_MAX_PEs]; + + /* P9 Interrupt generation */ + XiveSource xsrc; + qemu_irq *qirqs; + + PnvPhb4PecStack *stack; + + QLIST_HEAD(, PnvPhb4DMASpace) dma_spaces; +}; + +void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon); +void pnv_phb4_update_regions(PnvPhb4PecStack *stack); +extern const MemoryRegionOps pnv_phb4_xscom_ops; + +/* + * PHB4 PEC (PCI Express Controller) + */ +#define TYPE_PNV_PHB4_PEC "pnv-phb4-pec" +OBJECT_DECLARE_TYPE(PnvPhb4PecState, PnvPhb4PecClass, PNV_PHB4_PEC) + +#define TYPE_PNV_PHB4_PEC_STACK "pnv-phb4-pec-stack" +OBJECT_DECLARE_SIMPLE_TYPE(PnvPhb4PecStack, PNV_PHB4_PEC_STACK) + +/* Per-stack data */ +struct PnvPhb4PecStack { + DeviceState parent; + + /* My own stack number */ + uint32_t stack_no; + + /* Nest registers */ +#define PHB4_PEC_NEST_STK_REGS_COUNT 0x17 + uint64_t nest_regs[PHB4_PEC_NEST_STK_REGS_COUNT]; + MemoryRegion nest_regs_mr; + + /* PCI registers (excluding pass-through) */ +#define PHB4_PEC_PCI_STK_REGS_COUNT 0xf + uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT]; + MemoryRegion pci_regs_mr; + + /* PHB pass-through XSCOM */ + MemoryRegion phb_regs_mr; + + /* Memory windows from PowerBus to PHB */ + MemoryRegion mmbar0; + MemoryRegion mmbar1; + MemoryRegion phbbar; + MemoryRegion intbar; + uint64_t mmio0_base; + uint64_t mmio0_size; + uint64_t mmio1_base; + uint64_t mmio1_size; + + /* The owner PEC */ + PnvPhb4PecState *pec; + + /* The actual PHB */ + PnvPHB4 phb; +}; + +struct PnvPhb4PecState { + DeviceState parent; + + /* PEC number in chip */ + uint32_t index; + uint32_t chip_id; + + MemoryRegion *system_memory; + + /* Nest registers, excuding per-stack */ +#define PHB4_PEC_NEST_REGS_COUNT 0xf + uint64_t nest_regs[PHB4_PEC_NEST_REGS_COUNT]; + MemoryRegion nest_regs_mr; + + /* PCI registers, excluding per-stack */ +#define PHB4_PEC_PCI_REGS_COUNT 0x2 + uint64_t pci_regs[PHB4_PEC_PCI_REGS_COUNT]; + MemoryRegion pci_regs_mr; + + /* Stacks */ + #define PHB4_PEC_MAX_STACKS 3 + uint32_t num_stacks; + PnvPhb4PecStack stacks[PHB4_PEC_MAX_STACKS]; +}; + + +struct PnvPhb4PecClass { + DeviceClass parent_class; + + uint32_t (*xscom_nest_base)(PnvPhb4PecState *pec); + uint32_t xscom_nest_size; + uint32_t (*xscom_pci_base)(PnvPhb4PecState *pec); + uint32_t xscom_pci_size; + const char *compat; + int compat_size; + const char *stk_compat; + int stk_compat_size; +}; + +#endif /* PCI_HOST_PNV_PHB4_H */ diff --git a/include/hw/pci-host/pnv_phb4_regs.h b/include/hw/pci-host/pnv_phb4_regs.h new file mode 100644 index 000000000..55df2c3e5 --- /dev/null +++ b/include/hw/pci-host/pnv_phb4_regs.h @@ -0,0 +1,553 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2013-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB4_REGS_H +#define PCI_HOST_PNV_PHB4_REGS_H + +/* + * PEC XSCOM registers + * + * There a 3 PECs in P9. Each PEC can have several PHBs. Each PEC has some + * "global" registers and some "per-stack" (per-PHB) registers. Those are + * organized in two XSCOM ranges, the "Nest" range and the "PCI" range, each + * range contains both some "PEC" registers and some "per-stack" registers. + * + * Finally the PCI range also contains an additional range per stack that + * passes through to some of the PHB own registers. + * + * PEC0 can contain 1 PHB (PHB0) + * PEC1 can contain 2 PHBs (PHB1 and PHB2) + * PEC2 can contain 3 PHBs (PHB3, PHB4 and PHB5) + */ + +/* + * This is the "stack" offset, it's the offset from a given range base + * to the first "per-stack" registers and also the stride between + * stacks, thus for PEC2, the global registers are at offset 0, the + * PHB3 registers at offset 0x40, the PHB4 at offset 0x80 etc.... + * + * It is *also* the offset to the pass-through SCOM region but in this case + * it is 0 based, ie PHB3 is at 0x100 PHB4 is a 0x140 etc.. + */ +#define PEC_STACK_OFFSET 0x40 + +/* XSCOM Nest global registers */ +#define PEC_NEST_PBCQ_HW_CONFIG 0x00 +#define PEC_NEST_DROP_PRIO_CTRL 0x01 +#define PEC_NEST_PBCQ_ERR_INJECT 0x02 +#define PEC_NEST_PCI_NEST_CLK_TRACE_CTL 0x03 +#define PEC_NEST_PBCQ_PMON_CTRL 0x04 +#define PEC_NEST_PBCQ_PBUS_ADDR_EXT 0x05 +#define PEC_NEST_PBCQ_PRED_VEC_TIMEOUT 0x06 +#define PEC_NEST_CAPP_CTRL 0x07 +#define PEC_NEST_PBCQ_READ_STK_OVR 0x08 +#define PEC_NEST_PBCQ_WRITE_STK_OVR 0x09 +#define PEC_NEST_PBCQ_STORE_STK_OVR 0x0a +#define PEC_NEST_PBCQ_RETRY_BKOFF_CTRL 0x0b + +/* XSCOM Nest per-stack registers */ +#define PEC_NEST_STK_PCI_NEST_FIR 0x00 +#define PEC_NEST_STK_PCI_NEST_FIR_CLR 0x01 +#define PEC_NEST_STK_PCI_NEST_FIR_SET 0x02 +#define PEC_NEST_STK_PCI_NEST_FIR_MSK 0x03 +#define PEC_NEST_STK_PCI_NEST_FIR_MSKC 0x04 +#define PEC_NEST_STK_PCI_NEST_FIR_MSKS 0x05 +#define PEC_NEST_STK_PCI_NEST_FIR_ACT0 0x06 +#define PEC_NEST_STK_PCI_NEST_FIR_ACT1 0x07 +#define PEC_NEST_STK_PCI_NEST_FIR_WOF 0x08 +#define PEC_NEST_STK_ERR_REPORT_0 0x0a +#define PEC_NEST_STK_ERR_REPORT_1 0x0b +#define PEC_NEST_STK_PBCQ_GNRL_STATUS 0x0c +#define PEC_NEST_STK_PBCQ_MODE 0x0d +#define PEC_NEST_STK_MMIO_BAR0 0x0e +#define PEC_NEST_STK_MMIO_BAR0_MASK 0x0f +#define PEC_NEST_STK_MMIO_BAR1 0x10 +#define PEC_NEST_STK_MMIO_BAR1_MASK 0x11 +#define PEC_NEST_STK_PHB_REGS_BAR 0x12 +#define PEC_NEST_STK_INT_BAR 0x13 +#define PEC_NEST_STK_BAR_EN 0x14 +#define PEC_NEST_STK_BAR_EN_MMIO0 PPC_BIT(0) +#define PEC_NEST_STK_BAR_EN_MMIO1 PPC_BIT(1) +#define PEC_NEST_STK_BAR_EN_PHB PPC_BIT(2) +#define PEC_NEST_STK_BAR_EN_INT PPC_BIT(3) +#define PEC_NEST_STK_DATA_FRZ_TYPE 0x15 +#define PEC_NEST_STK_PBCQ_TUN_BAR 0x16 + +/* XSCOM PCI global registers */ +#define PEC_PCI_PBAIB_HW_CONFIG 0x00 +#define PEC_PCI_PBAIB_READ_STK_OVR 0x02 + +/* XSCOM PCI per-stack registers */ +#define PEC_PCI_STK_PCI_FIR 0x00 +#define PEC_PCI_STK_PCI_FIR_CLR 0x01 +#define PEC_PCI_STK_PCI_FIR_SET 0x02 +#define PEC_PCI_STK_PCI_FIR_MSK 0x03 +#define PEC_PCI_STK_PCI_FIR_MSKC 0x04 +#define PEC_PCI_STK_PCI_FIR_MSKS 0x05 +#define PEC_PCI_STK_PCI_FIR_ACT0 0x06 +#define PEC_PCI_STK_PCI_FIR_ACT1 0x07 +#define PEC_PCI_STK_PCI_FIR_WOF 0x08 +#define PEC_PCI_STK_ETU_RESET 0x0a +#define PEC_PCI_STK_PBAIB_ERR_REPORT 0x0b +#define PEC_PCI_STK_PBAIB_TX_CMD_CRED 0x0d +#define PEC_PCI_STK_PBAIB_TX_DAT_CRED 0x0e + +/* + * PHB "SCOM" registers. This is accessed via the above window + * and provides a backdoor to the PHB when the AIB bus is not + * functional. Some of these directly map some of the PHB MMIO + * registers, some are specific and allow indirect access to a + * wider range of PHB registers + */ +#define PHB_SCOM_HV_IND_ADDR 0x00 +#define PHB_SCOM_HV_IND_ADDR_VALID PPC_BIT(0) +#define PHB_SCOM_HV_IND_ADDR_4B PPC_BIT(1) +#define PHB_SCOM_HV_IND_ADDR_AUTOINC PPC_BIT(2) +#define PHB_SCOM_HV_IND_ADDR_ADDR PPC_BITMASK(51, 63) +#define PHB_SCOM_HV_IND_DATA 0x01 +#define PHB_SCOM_ETU_LEM_FIR 0x08 +#define PHB_SCOM_ETU_LEM_FIR_AND 0x09 +#define PHB_SCOM_ETU_LEM_FIR_OR 0x0a +#define PHB_SCOM_ETU_LEM_FIR_MSK 0x0b +#define PHB_SCOM_ETU_LEM_ERR_MSK_AND 0x0c +#define PHB_SCOM_ETU_LEM_ERR_MSK_OR 0x0d +#define PHB_SCOM_ETU_LEM_ACT0 0x0e +#define PHB_SCOM_ETU_LEM_ACT1 0x0f +#define PHB_SCOM_ETU_LEM_WOF 0x10 +#define PHB_SCOM_ETU_PMON_CONFIG 0x17 +#define PHB_SCOM_ETU_PMON_CTR0 0x18 +#define PHB_SCOM_ETU_PMON_CTR1 0x19 +#define PHB_SCOM_ETU_PMON_CTR2 0x1a +#define PHB_SCOM_ETU_PMON_CTR3 0x1b + + +/* + * PHB MMIO registers + */ + +/* PHB Fundamental register set A */ +#define PHB_LSI_SOURCE_ID 0x100 +#define PHB_LSI_SRC_ID PPC_BITMASK(4, 12) +#define PHB_DMA_CHAN_STATUS 0x110 +#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27) +#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28) +#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29) +#define PHB_CPU_LOADSTORE_STATUS 0x120 +#define PHB_CPU_LS_ANY_ERR PPC_BIT(27) +#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28) +#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29) +#define PHB_CONFIG_DATA 0x130 +#define PHB_LOCK0 0x138 +#define PHB_CONFIG_ADDRESS 0x140 +#define PHB_CA_ENABLE PPC_BIT(0) +#define PHB_CA_STATUS PPC_BITMASK(1, 3) +#define PHB_CA_STATUS_GOOD 0 +#define PHB_CA_STATUS_UR 1 +#define PHB_CA_STATUS_CRS 2 +#define PHB_CA_STATUS_CA 4 +#define PHB_CA_BUS PPC_BITMASK(4, 11) +#define PHB_CA_DEV PPC_BITMASK(12, 16) +#define PHB_CA_FUNC PPC_BITMASK(17, 19) +#define PHB_CA_BDFN PPC_BITMASK(4, 19) /* bus,dev,func */ +#define PHB_CA_REG PPC_BITMASK(20, 31) +#define PHB_CA_PE PPC_BITMASK(39, 47) +#define PHB_LOCK1 0x148 +#define PHB_PHB4_CONFIG 0x160 +#define PHB_PHB4C_32BIT_MSI_EN PPC_BIT(8) +#define PHB_PHB4C_64BIT_MSI_EN PPC_BIT(14) +#define PHB_RTT_BAR 0x168 +#define PHB_RTT_BAR_ENABLE PPC_BIT(0) +#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(8, 46) +#define PHB_PELTV_BAR 0x188 +#define PHB_PELTV_BAR_ENABLE PPC_BIT(0) +#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(8, 50) +#define PHB_M32_START_ADDR 0x1a0 +#define PHB_PEST_BAR 0x1a8 +#define PHB_PEST_BAR_ENABLE PPC_BIT(0) +#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(8, 51) +#define PHB_ASN_CMPM 0x1C0 +#define PHB_ASN_CMPM_ENABLE PPC_BIT(63) +#define PHB_CAPI_CMPM 0x1C8 +#define PHB_CAPI_CMPM_ENABLE PPC_BIT(63) +#define PHB_M64_AOMASK 0x1d0 +#define PHB_M64_UPPER_BITS 0x1f0 +#define PHB_NXLATE_PREFIX 0x1f8 +#define PHB_DMARD_SYNC 0x200 +#define PHB_DMARD_SYNC_START PPC_BIT(0) +#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1) +#define PHB_RTC_INVALIDATE 0x208 +#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31) +#define PHB_TCE_KILL 0x210 +#define PHB_TCE_KILL_ALL PPC_BIT(0) +#define PHB_TCE_KILL_PE PPC_BIT(1) +#define PHB_TCE_KILL_ONE PPC_BIT(2) +#define PHB_TCE_KILL_PSEL PPC_BIT(3) +#define PHB_TCE_KILL_64K 0x1000 /* Address override */ +#define PHB_TCE_KILL_2M 0x2000 /* Address override */ +#define PHB_TCE_KILL_1G 0x3000 /* Address override */ +#define PHB_TCE_KILL_PENUM PPC_BITMASK(55, 63) +#define PHB_TCE_SPEC_CTL 0x218 +#define PHB_IODA_ADDR 0x220 +#define PHB_IODA_AD_AUTOINC PPC_BIT(0) +#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15) +#define PHB_IODA_AD_MIST_PWV PPC_BITMASK(28, 31) +#define PHB_IODA_AD_TADR PPC_BITMASK(54, 63) +#define PHB_IODA_DATA0 0x228 +#define PHB_PHB4_GEN_CAP 0x250 +#define PHB_PHB4_TCE_CAP 0x258 +#define PHB_PHB4_IRQ_CAP 0x260 +#define PHB_PHB4_EEH_CAP 0x268 +#define PHB_PAPR_ERR_INJ_CTL 0x2b0 +#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0) +#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1) +#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2) +#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3) +#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4) +#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5) +#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6) +#define PHB_PAPR_ERR_INJ_ADDR 0x2b8 +#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63) +#define PHB_PAPR_ERR_INJ_MASK 0x2c0 +#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11) +#define PHB_PAPR_ERR_INJ_MASK_CFG_ALL PPC_BITMASK(4, 19) +#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63) +#define PHB_ETU_ERR_SUMMARY 0x2c8 +#define PHB_INT_NOTIFY_ADDR 0x300 +#define PHB_INT_NOTIFY_INDEX 0x308 + +/* Fundamental register set B */ +#define PHB_VERSION 0x800 +#define PHB_CTRLR 0x810 +#define PHB_CTRLR_IRQ_PGSZ_64K PPC_BIT(11) +#define PHB_CTRLR_IRQ_STORE_EOI PPC_BIT(12) +#define PHB_CTRLR_MMIO_RD_STRICT PPC_BIT(13) +#define PHB_CTRLR_MMIO_EEH_DISABLE PPC_BIT(14) +#define PHB_CTRLR_CFG_EEH_BLOCK PPC_BIT(15) +#define PHB_CTRLR_FENCE_LNKILL_DIS PPC_BIT(16) +#define PHB_CTRLR_TVT_ADDR_SEL PPC_BITMASK(17, 19) +#define TVT_DD1_1_PER_PE 0 +#define TVT_DD1_2_PER_PE 1 +#define TVT_DD1_4_PER_PE 2 +#define TVT_DD1_8_PER_PE 3 +#define TVT_DD1_16_PER_PE 4 +#define TVT_2_PER_PE 0 +#define TVT_4_PER_PE 1 +#define TVT_8_PER_PE 2 +#define TVT_16_PER_PE 3 +#define PHB_CTRLR_DMA_RD_SPACING PPC_BITMASK(28, 31) +#define PHB_AIB_FENCE_CTRL 0x860 +#define PHB_TCE_TAG_ENABLE 0x868 +#define PHB_TCE_WATERMARK 0x870 +#define PHB_TIMEOUT_CTRL1 0x878 +#define PHB_TIMEOUT_CTRL2 0x880 +#define PHB_Q_DMA_R 0x888 +#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0) +#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1) +#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4) +#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5) +#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6) +#define PHB_Q_DMA_R_TCE_KILL_STATUS PPC_BIT(7) +#define PHB_TCE_TAG_STATUS 0x908 + +/* FIR & Error registers */ +#define PHB_LEM_FIR_ACCUM 0xc00 +#define PHB_LEM_FIR_AND_MASK 0xc08 +#define PHB_LEM_FIR_OR_MASK 0xc10 +#define PHB_LEM_ERROR_MASK 0xc18 +#define PHB_LEM_ERROR_AND_MASK 0xc20 +#define PHB_LEM_ERROR_OR_MASK 0xc28 +#define PHB_LEM_ACTION0 0xc30 +#define PHB_LEM_ACTION1 0xc38 +#define PHB_LEM_WOF 0xc40 +#define PHB_ERR_STATUS 0xc80 +#define PHB_ERR1_STATUS 0xc88 +#define PHB_ERR_INJECT 0xc90 +#define PHB_ERR_LEM_ENABLE 0xc98 +#define PHB_ERR_IRQ_ENABLE 0xca0 +#define PHB_ERR_FREEZE_ENABLE 0xca8 +#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0 +#define PHB_ERR_LOG_0 0xcc0 +#define PHB_ERR_LOG_1 0xcc8 +#define PHB_ERR_STATUS_MASK 0xcd0 +#define PHB_ERR1_STATUS_MASK 0xcd8 + +#define PHB_TXE_ERR_STATUS 0xd00 +#define PHB_TXE_ERR1_STATUS 0xd08 +#define PHB_TXE_ERR_INJECT 0xd10 +#define PHB_TXE_ERR_LEM_ENABLE 0xd18 +#define PHB_TXE_ERR_IRQ_ENABLE 0xd20 +#define PHB_TXE_ERR_FREEZE_ENABLE 0xd28 +#define PHB_TXE_ERR_AIB_FENCE_ENABLE 0xd30 +#define PHB_TXE_ERR_LOG_0 0xd40 +#define PHB_TXE_ERR_LOG_1 0xd48 +#define PHB_TXE_ERR_STATUS_MASK 0xd50 +#define PHB_TXE_ERR1_STATUS_MASK 0xd58 + +#define PHB_RXE_ARB_ERR_STATUS 0xd80 +#define PHB_RXE_ARB_ERR1_STATUS 0xd88 +#define PHB_RXE_ARB_ERR_INJECT 0xd90 +#define PHB_RXE_ARB_ERR_LEM_ENABLE 0xd98 +#define PHB_RXE_ARB_ERR_IRQ_ENABLE 0xda0 +#define PHB_RXE_ARB_ERR_FREEZE_ENABLE 0xda8 +#define PHB_RXE_ARB_ERR_AIB_FENCE_ENABLE 0xdb0 +#define PHB_RXE_ARB_ERR_LOG_0 0xdc0 +#define PHB_RXE_ARB_ERR_LOG_1 0xdc8 +#define PHB_RXE_ARB_ERR_STATUS_MASK 0xdd0 +#define PHB_RXE_ARB_ERR1_STATUS_MASK 0xdd8 + +#define PHB_RXE_MRG_ERR_STATUS 0xe00 +#define PHB_RXE_MRG_ERR1_STATUS 0xe08 +#define PHB_RXE_MRG_ERR_INJECT 0xe10 +#define PHB_RXE_MRG_ERR_LEM_ENABLE 0xe18 +#define PHB_RXE_MRG_ERR_IRQ_ENABLE 0xe20 +#define PHB_RXE_MRG_ERR_FREEZE_ENABLE 0xe28 +#define PHB_RXE_MRG_ERR_AIB_FENCE_ENABLE 0xe30 +#define PHB_RXE_MRG_ERR_LOG_0 0xe40 +#define PHB_RXE_MRG_ERR_LOG_1 0xe48 +#define PHB_RXE_MRG_ERR_STATUS_MASK 0xe50 +#define PHB_RXE_MRG_ERR1_STATUS_MASK 0xe58 + +#define PHB_RXE_TCE_ERR_STATUS 0xe80 +#define PHB_RXE_TCE_ERR1_STATUS 0xe88 +#define PHB_RXE_TCE_ERR_INJECT 0xe90 +#define PHB_RXE_TCE_ERR_LEM_ENABLE 0xe98 +#define PHB_RXE_TCE_ERR_IRQ_ENABLE 0xea0 +#define PHB_RXE_TCE_ERR_FREEZE_ENABLE 0xea8 +#define PHB_RXE_TCE_ERR_AIB_FENCE_ENABLE 0xeb0 +#define PHB_RXE_TCE_ERR_LOG_0 0xec0 +#define PHB_RXE_TCE_ERR_LOG_1 0xec8 +#define PHB_RXE_TCE_ERR_STATUS_MASK 0xed0 +#define PHB_RXE_TCE_ERR1_STATUS_MASK 0xed8 + +/* Performance monitor & Debug registers */ +#define PHB_TRACE_CONTROL 0xf80 +#define PHB_PERFMON_CONFIG 0xf88 +#define PHB_PERFMON_CTR0 0xf90 +#define PHB_PERFMON_CTR1 0xf98 +#define PHB_PERFMON_CTR2 0xfa0 +#define PHB_PERFMON_CTR3 0xfa8 + +/* Root complex config space memory mapped */ +#define PHB_RC_CONFIG_BASE 0x1000 +#define PHB_RC_CONFIG_SIZE 0x800 + +/* PHB4 REGB registers */ + +/* PBL core */ +#define PHB_PBL_CONTROL 0x1800 +#define PHB_PBL_TIMEOUT_CTRL 0x1810 +#define PHB_PBL_NPTAG_ENABLE 0x1820 +#define PHB_PBL_NBW_CMP_MASK 0x1830 +#define PHB_PBL_NBW_MASK_ENABLE PPC_BIT(63) +#define PHB_PBL_SYS_LINK_INIT 0x1838 +#define PHB_PBL_BUF_STATUS 0x1840 +#define PHB_PBL_ERR_STATUS 0x1900 +#define PHB_PBL_ERR1_STATUS 0x1908 +#define PHB_PBL_ERR_INJECT 0x1910 +#define PHB_PBL_ERR_INF_ENABLE 0x1920 +#define PHB_PBL_ERR_ERC_ENABLE 0x1928 +#define PHB_PBL_ERR_FAT_ENABLE 0x1930 +#define PHB_PBL_ERR_LOG_0 0x1940 +#define PHB_PBL_ERR_LOG_1 0x1948 +#define PHB_PBL_ERR_STATUS_MASK 0x1950 +#define PHB_PBL_ERR1_STATUS_MASK 0x1958 + +/* PCI-E stack */ +#define PHB_PCIE_SCR 0x1A00 +#define PHB_PCIE_SCR_SLOT_CAP PPC_BIT(15) +#define PHB_PCIE_SCR_MAXLINKSPEED PPC_BITMASK(32, 35) + + +#define PHB_PCIE_CRESET 0x1A10 +#define PHB_PCIE_CRESET_CFG_CORE PPC_BIT(0) +#define PHB_PCIE_CRESET_TLDLP PPC_BIT(1) +#define PHB_PCIE_CRESET_PBL PPC_BIT(2) +#define PHB_PCIE_CRESET_PERST_N PPC_BIT(3) +#define PHB_PCIE_CRESET_PIPE_N PPC_BIT(4) + + +#define PHB_PCIE_HOTPLUG_STATUS 0x1A20 +#define PHB_PCIE_HPSTAT_PRESENCE PPC_BIT(10) + +#define PHB_PCIE_DLP_TRAIN_CTL 0x1A40 +#define PHB_PCIE_DLP_LINK_WIDTH PPC_BITMASK(30, 35) +#define PHB_PCIE_DLP_LINK_SPEED PPC_BITMASK(36, 39) +#define PHB_PCIE_DLP_LTSSM_TRC PPC_BITMASK(24, 27) +#define PHB_PCIE_DLP_LTSSM_RESET 0 +#define PHB_PCIE_DLP_LTSSM_DETECT 1 +#define PHB_PCIE_DLP_LTSSM_POLLING 2 +#define PHB_PCIE_DLP_LTSSM_CONFIG 3 +#define PHB_PCIE_DLP_LTSSM_L0 4 +#define PHB_PCIE_DLP_LTSSM_REC 5 +#define PHB_PCIE_DLP_LTSSM_L1 6 +#define PHB_PCIE_DLP_LTSSM_L2 7 +#define PHB_PCIE_DLP_LTSSM_HOTRESET 8 +#define PHB_PCIE_DLP_LTSSM_DISABLED 9 +#define PHB_PCIE_DLP_LTSSM_LOOPBACK 10 +#define PHB_PCIE_DLP_TL_LINKACT PPC_BIT(23) +#define PHB_PCIE_DLP_DL_PGRESET PPC_BIT(22) +#define PHB_PCIE_DLP_TRAINING PPC_BIT(20) +#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19) + +#define PHB_PCIE_DLP_CTL 0x1A78 +#define PHB_PCIE_DLP_CTL_BYPASS_PH2 PPC_BIT(4) +#define PHB_PCIE_DLP_CTL_BYPASS_PH3 PPC_BIT(5) + +#define PHB_PCIE_DLP_TRWCTL 0x1A80 +#define PHB_PCIE_DLP_TRWCTL_EN PPC_BIT(0) + +#define PHB_PCIE_DLP_ERRLOG1 0x1AA0 +#define PHB_PCIE_DLP_ERRLOG2 0x1AA8 +#define PHB_PCIE_DLP_ERR_STATUS 0x1AB0 +#define PHB_PCIE_DLP_ERR_COUNTERS 0x1AB8 + +#define PHB_PCIE_LANE_EQ_CNTL0 0x1AD0 +#define PHB_PCIE_LANE_EQ_CNTL1 0x1AD8 +#define PHB_PCIE_LANE_EQ_CNTL2 0x1AE0 +#define PHB_PCIE_LANE_EQ_CNTL3 0x1AE8 +#define PHB_PCIE_LANE_EQ_CNTL20 0x1AF0 +#define PHB_PCIE_LANE_EQ_CNTL21 0x1AF8 +#define PHB_PCIE_LANE_EQ_CNTL22 0x1B00 /* DD1 only */ +#define PHB_PCIE_LANE_EQ_CNTL23 0x1B08 /* DD1 only */ +#define PHB_PCIE_TRACE_CTRL 0x1B20 +#define PHB_PCIE_MISC_STRAP 0x1B30 + +/* Error */ +#define PHB_REGB_ERR_STATUS 0x1C00 +#define PHB_REGB_ERR1_STATUS 0x1C08 +#define PHB_REGB_ERR_INJECT 0x1C10 +#define PHB_REGB_ERR_INF_ENABLE 0x1C20 +#define PHB_REGB_ERR_ERC_ENABLE 0x1C28 +#define PHB_REGB_ERR_FAT_ENABLE 0x1C30 +#define PHB_REGB_ERR_LOG_0 0x1C40 +#define PHB_REGB_ERR_LOG_1 0x1C48 +#define PHB_REGB_ERR_STATUS_MASK 0x1C50 +#define PHB_REGB_ERR1_STATUS_MASK 0x1C58 + +/* + * IODA3 on-chip tables + */ + +#define IODA3_TBL_LIST 1 +#define IODA3_TBL_MIST 2 +#define IODA3_TBL_RCAM 5 +#define IODA3_TBL_MRT 6 +#define IODA3_TBL_PESTA 7 +#define IODA3_TBL_PESTB 8 +#define IODA3_TBL_TVT 9 +#define IODA3_TBL_TCR 10 +#define IODA3_TBL_TDR 11 +#define IODA3_TBL_MBT 16 +#define IODA3_TBL_MDT 17 +#define IODA3_TBL_PEEV 20 + +/* LIST */ +#define IODA3_LIST_P PPC_BIT(6) +#define IODA3_LIST_Q PPC_BIT(7) +#define IODA3_LIST_STATE PPC_BIT(14) + +/* MIST */ +#define IODA3_MIST_P3 PPC_BIT(48 + 0) +#define IODA3_MIST_Q3 PPC_BIT(48 + 1) +#define IODA3_MIST_PE3 PPC_BITMASK(48 + 4, 48 + 15) + +/* TVT */ +#define IODA3_TVT_TABLE_ADDR PPC_BITMASK(0, 47) +#define IODA3_TVT_NUM_LEVELS PPC_BITMASK(48, 50) +#define IODA3_TVE_1_LEVEL 0 +#define IODA3_TVE_2_LEVELS 1 +#define IODA3_TVE_3_LEVELS 2 +#define IODA3_TVE_4_LEVELS 3 +#define IODA3_TVE_5_LEVELS 4 +#define IODA3_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55) +#define IODA3_TVT_NON_TRANSLATE_50 PPC_BIT(56) +#define IODA3_TVT_IO_PSIZE PPC_BITMASK(59, 63) + +/* PESTA */ +#define IODA3_PESTA_MMIO_FROZEN PPC_BIT(0) +#define IODA3_PESTA_TRANS_TYPE PPC_BITMASK(5, 7) +#define IODA3_PESTA_TRANS_TYPE_MMIOLOAD 0x4 +#define IODA3_PESTA_CA_CMPLT_TMT PPC_BIT(8) +#define IODA3_PESTA_UR PPC_BIT(9) + +/* PESTB */ +#define IODA3_PESTB_DMA_STOPPED PPC_BIT(0) + +/* MDT */ +/* FIXME: check this field with Eric and add a B, C and D */ +#define IODA3_MDT_PE_A PPC_BITMASK(0, 15) +#define IODA3_MDT_PE_B PPC_BITMASK(16, 31) +#define IODA3_MDT_PE_C PPC_BITMASK(32, 47) +#define IODA3_MDT_PE_D PPC_BITMASK(48, 63) + +/* MBT */ +#define IODA3_MBT0_ENABLE PPC_BIT(0) +#define IODA3_MBT0_TYPE PPC_BIT(1) +#define IODA3_MBT0_TYPE_M32 IODA3_MBT0_TYPE +#define IODA3_MBT0_TYPE_M64 0 +#define IODA3_MBT0_MODE PPC_BITMASK(2, 3) +#define IODA3_MBT0_MODE_PE_SEG 0 +#define IODA3_MBT0_MODE_MDT 1 +#define IODA3_MBT0_MODE_SINGLE_PE 2 +#define IODA3_MBT0_SEG_DIV PPC_BITMASK(4, 5) +#define IODA3_MBT0_SEG_DIV_MAX 0 +#define IODA3_MBT0_SEG_DIV_128 1 +#define IODA3_MBT0_SEG_DIV_64 2 +#define IODA3_MBT0_SEG_DIV_8 3 +#define IODA3_MBT0_MDT_COLUMN PPC_BITMASK(4, 5) +#define IODA3_MBT0_BASE_ADDR PPC_BITMASK(8, 51) + +#define IODA3_MBT1_ENABLE PPC_BIT(0) +#define IODA3_MBT1_MASK PPC_BITMASK(8, 51) +#define IODA3_MBT1_SEG_BASE PPC_BITMASK(55, 63) +#define IODA3_MBT1_SINGLE_PE_NUM PPC_BITMASK(55, 63) + +/* + * IODA3 in-memory tables + */ + +/* + * PEST + * + * 2x8 bytes entries, PEST0 and PEST1 + */ + +#define IODA3_PEST0_MMIO_CAUSE PPC_BIT(2) +#define IODA3_PEST0_CFG_READ PPC_BIT(3) +#define IODA3_PEST0_CFG_WRITE PPC_BIT(4) +#define IODA3_PEST0_TTYPE PPC_BITMASK(5, 7) +#define PEST_TTYPE_DMA_WRITE 0 +#define PEST_TTYPE_MSI 1 +#define PEST_TTYPE_DMA_READ 2 +#define PEST_TTYPE_DMA_READ_RESP 3 +#define PEST_TTYPE_MMIO_LOAD 4 +#define PEST_TTYPE_MMIO_STORE 5 +#define PEST_TTYPE_OTHER 7 +#define IODA3_PEST0_CA_RETURN PPC_BIT(8) +#define IODA3_PEST0_UR_RETURN PPC_BIT(9) +#define IODA3_PEST0_PCIE_NONFATAL PPC_BIT(10) +#define IODA3_PEST0_PCIE_FATAL PPC_BIT(11) +#define IODA3_PEST0_PARITY_UE PPC_BIT(13) +#define IODA3_PEST0_PCIE_CORRECTABLE PPC_BIT(14) +#define IODA3_PEST0_PCIE_INTERRUPT PPC_BIT(15) +#define IODA3_PEST0_MMIO_XLATE PPC_BIT(16) +#define IODA3_PEST0_IODA3_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */ +#define IODA3_PEST0_TCE_PAGE_FAULT PPC_BIT(18) +#define IODA3_PEST0_TCE_ACCESS_FAULT PPC_BIT(19) +#define IODA3_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20) +#define IODA3_PEST0_AIB_SIZE_INVALID PPC_BIT(21) +#define IODA3_PEST0_LEM_BIT PPC_BITMASK(26, 31) +#define IODA3_PEST0_RID PPC_BITMASK(32, 47) +#define IODA3_PEST0_MSI_DATA PPC_BITMASK(48, 63) + +#define IODA3_PEST1_FAIL_ADDR PPC_BITMASK(3, 63) + + +#endif /* PCI_HOST_PNV_PHB4_REGS_H */ diff --git a/include/hw/pci-host/ppce500.h b/include/hw/pci-host/ppce500.h new file mode 100644 index 000000000..e3a374230 --- /dev/null +++ b/include/hw/pci-host/ppce500.h @@ -0,0 +1,9 @@ +#ifndef PCI_HOST_PPCE500_H +#define PCI_HOST_PPCE500_H + +static inline int ppce500_pci_map_irq_slot(int devno, int irq_num) +{ + return (devno + irq_num) % 4; +} + +#endif diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h new file mode 100644 index 000000000..bbb958176 --- /dev/null +++ b/include/hw/pci-host/q35.h @@ -0,0 +1,199 @@ +/* + * q35.h + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * Copyright (C) 2012 Jason Baron + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + */ + +#ifndef HW_Q35_H +#define HW_Q35_H + +#include "hw/pci/pci.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci-host/pam.h" +#include "qemu/units.h" +#include "qemu/range.h" +#include "qom/object.h" + +#define TYPE_Q35_HOST_DEVICE "q35-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(Q35PCIHost, Q35_HOST_DEVICE) + +#define TYPE_MCH_PCI_DEVICE "mch" +OBJECT_DECLARE_SIMPLE_TYPE(MCHPCIState, MCH_PCI_DEVICE) + +struct MCHPCIState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion *ram_memory; + MemoryRegion *pci_address_space; + MemoryRegion *system_memory; + MemoryRegion *address_space_io; + PAMMemoryRegion pam_regions[13]; + MemoryRegion smram_region, open_high_smram; + MemoryRegion smram, low_smram, high_smram; + MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; + Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; + uint64_t pci_hole64_size; + uint32_t short_root_bus; + uint16_t ext_tseg_mbytes; +}; + +struct Q35PCIHost { + /*< private >*/ + PCIExpressHost parent_obj; + /*< public >*/ + + bool pci_hole64_fix; + MCHPCIState mch; +}; + +#define Q35_MASK(bit, ms_bit, ls_bit) \ +((uint##bit##_t)(((1ULL << ((ms_bit) + 1)) - 1) & ~((1ULL << ls_bit) - 1))) + +/* + * gmch part + */ + +#define MCH_HOST_PROP_RAM_MEM "ram-mem" +#define MCH_HOST_PROP_PCI_MEM "pci-mem" +#define MCH_HOST_PROP_SYSTEM_MEM "system-mem" +#define MCH_HOST_PROP_IO_MEM "io-mem" + +/* PCI configuration */ +#define MCH_HOST_BRIDGE "MCH" + +#define MCH_HOST_BRIDGE_CONFIG_ADDR 0xcf8 +#define MCH_HOST_BRIDGE_CONFIG_DATA 0xcfc + +/* D0:F0 configuration space */ +#define MCH_HOST_BRIDGE_REVISION_DEFAULT 0x0 + +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES 0x50 +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_SIZE 2 +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY 0xffff +#define MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_MAX 0xfff + +#define MCH_HOST_BRIDGE_SMBASE_SIZE (128 * KiB) +#define MCH_HOST_BRIDGE_SMBASE_ADDR 0x30000 +#define MCH_HOST_BRIDGE_F_SMBASE 0x9c +#define MCH_HOST_BRIDGE_F_SMBASE_QUERY 0xff +#define MCH_HOST_BRIDGE_F_SMBASE_IN_RAM 0x01 +#define MCH_HOST_BRIDGE_F_SMBASE_LCK 0x02 + +#define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */ +#define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */ +#define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000 +#define MCH_HOST_BRIDGE_PCIEXBAR_MAX (0x10000000) /* 256M */ +#define MCH_HOST_BRIDGE_PCIEXBAR_ADMSK Q35_MASK(64, 35, 28) +#define MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK ((uint64_t)(1 << 26)) +#define MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK ((uint64_t)(1 << 25)) +#define MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_MASK ((uint64_t)(0x3 << 1)) +#define MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_256M ((uint64_t)(0x0 << 1)) +#define MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_128M ((uint64_t)(0x1 << 1)) +#define MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_64M ((uint64_t)(0x2 << 1)) +#define MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD ((uint64_t)(0x3 << 1)) +#define MCH_HOST_BRIDGE_PCIEXBAREN ((uint64_t)1) + +#define MCH_HOST_BRIDGE_PAM_NB 7 +#define MCH_HOST_BRIDGE_PAM_SIZE 7 +#define MCH_HOST_BRIDGE_PAM0 0x90 +#define MCH_HOST_BRIDGE_PAM_BIOS_AREA 0xf0000 +#define MCH_HOST_BRIDGE_PAM_AREA_SIZE 0x10000 /* 16KB */ +#define MCH_HOST_BRIDGE_PAM1 0x91 +#define MCH_HOST_BRIDGE_PAM_EXPAN_AREA 0xc0000 +#define MCH_HOST_BRIDGE_PAM_EXPAN_SIZE 0x04000 +#define MCH_HOST_BRIDGE_PAM2 0x92 +#define MCH_HOST_BRIDGE_PAM3 0x93 +#define MCH_HOST_BRIDGE_PAM4 0x94 +#define MCH_HOST_BRIDGE_PAM_EXBIOS_AREA 0xe0000 +#define MCH_HOST_BRIDGE_PAM_EXBIOS_SIZE 0x04000 +#define MCH_HOST_BRIDGE_PAM5 0x95 +#define MCH_HOST_BRIDGE_PAM6 0x96 +#define MCH_HOST_BRIDGE_PAM_WE_HI ((uint8_t)(0x2 << 4)) +#define MCH_HOST_BRIDGE_PAM_RE_HI ((uint8_t)(0x1 << 4)) +#define MCH_HOST_BRIDGE_PAM_HI_MASK ((uint8_t)(0x3 << 4)) +#define MCH_HOST_BRIDGE_PAM_WE_LO ((uint8_t)0x2) +#define MCH_HOST_BRIDGE_PAM_RE_LO ((uint8_t)0x1) +#define MCH_HOST_BRIDGE_PAM_LO_MASK ((uint8_t)0x3) +#define MCH_HOST_BRIDGE_PAM_WE ((uint8_t)0x2) +#define MCH_HOST_BRIDGE_PAM_RE ((uint8_t)0x1) +#define MCH_HOST_BRIDGE_PAM_MASK ((uint8_t)0x3) + +#define MCH_HOST_BRIDGE_SMRAM 0x9d +#define MCH_HOST_BRIDGE_SMRAM_SIZE 2 +#define MCH_HOST_BRIDGE_SMRAM_D_OPEN ((uint8_t)(1 << 6)) +#define MCH_HOST_BRIDGE_SMRAM_D_CLS ((uint8_t)(1 << 5)) +#define MCH_HOST_BRIDGE_SMRAM_D_LCK ((uint8_t)(1 << 4)) +#define MCH_HOST_BRIDGE_SMRAM_G_SMRAME ((uint8_t)(1 << 3)) +#define MCH_HOST_BRIDGE_SMRAM_C_BASE_SEG_MASK ((uint8_t)0x7) +#define MCH_HOST_BRIDGE_SMRAM_C_BASE_SEG ((uint8_t)0x2) /* hardwired to b010 */ +#define MCH_HOST_BRIDGE_SMRAM_C_BASE 0xa0000 +#define MCH_HOST_BRIDGE_SMRAM_C_END 0xc0000 +#define MCH_HOST_BRIDGE_SMRAM_C_SIZE 0x20000 +#define MCH_HOST_BRIDGE_UPPER_SYSTEM_BIOS_END 0x100000 +#define MCH_HOST_BRIDGE_SMRAM_DEFAULT \ + MCH_HOST_BRIDGE_SMRAM_C_BASE_SEG +#define MCH_HOST_BRIDGE_SMRAM_WMASK \ + (MCH_HOST_BRIDGE_SMRAM_D_OPEN | \ + MCH_HOST_BRIDGE_SMRAM_D_CLS | \ + MCH_HOST_BRIDGE_SMRAM_D_LCK | \ + MCH_HOST_BRIDGE_SMRAM_G_SMRAME) +#define MCH_HOST_BRIDGE_SMRAM_WMASK_LCK \ + MCH_HOST_BRIDGE_SMRAM_D_CLS + +#define MCH_HOST_BRIDGE_ESMRAMC 0x9e +#define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 7)) +#define MCH_HOST_BRIDGE_ESMRAMC_E_SMERR ((uint8_t)(1 << 6)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE ((uint8_t)(1 << 5)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_L1 ((uint8_t)(1 << 4)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_L2 ((uint8_t)(1 << 3)) +#define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK ((uint8_t)(0x3 << 1)) +#define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB ((uint8_t)(0x0 << 1)) +#define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB ((uint8_t)(0x1 << 1)) +#define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB ((uint8_t)(0x2 << 1)) +#define MCH_HOST_BRIDGE_ESMRAMC_T_EN ((uint8_t)1) +#define MCH_HOST_BRIDGE_ESMRAMC_DEFAULT \ + (MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE | \ + MCH_HOST_BRIDGE_ESMRAMC_SM_L1 | \ + MCH_HOST_BRIDGE_ESMRAMC_SM_L2) +#define MCH_HOST_BRIDGE_ESMRAMC_WMASK \ + (MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME | \ + MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK | \ + MCH_HOST_BRIDGE_ESMRAMC_T_EN) +#define MCH_HOST_BRIDGE_ESMRAMC_WMASK_LCK 0 + +/* D1:F0 PCIE* port*/ +#define MCH_PCIE_DEV 1 +#define MCH_PCIE_FUNC 0 + +uint64_t mch_mcfg_base(void); + +/* + * Arbitrary but unique BNF number for IOAPIC device. + * + * TODO: make sure there would have no conflict with real PCI bus + */ +#define Q35_PSEUDO_BUS_PLATFORM (0xff) +#define Q35_PSEUDO_DEVFN_IOAPIC (0x00) + +#endif /* HW_Q35_H */ diff --git a/include/hw/pci-host/sabre.h b/include/hw/pci-host/sabre.h new file mode 100644 index 000000000..01190241b --- /dev/null +++ b/include/hw/pci-host/sabre.h @@ -0,0 +1,53 @@ +#ifndef HW_PCI_HOST_SABRE_H +#define HW_PCI_HOST_SABRE_H + +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/sparc/sun4u_iommu.h" +#include "qom/object.h" + +#define MAX_IVEC 0x40 + +/* OBIO IVEC IRQs */ +#define OBIO_HDD_IRQ 0x20 +#define OBIO_NIC_IRQ 0x21 +#define OBIO_LPT_IRQ 0x22 +#define OBIO_FDD_IRQ 0x27 +#define OBIO_KBD_IRQ 0x29 +#define OBIO_MSE_IRQ 0x2a +#define OBIO_SER_IRQ 0x2b + +struct SabrePCIState { + PCIDevice parent_obj; +}; + +#define TYPE_SABRE_PCI_DEVICE "sabre-pci" +OBJECT_DECLARE_SIMPLE_TYPE(SabrePCIState, SABRE_PCI_DEVICE) + +struct SabreState { + PCIHostState parent_obj; + + hwaddr special_base; + hwaddr mem_base; + MemoryRegion sabre_config; + MemoryRegion pci_config; + MemoryRegion pci_mmio; + MemoryRegion pci_ioport; + uint64_t pci_irq_in; + IOMMUState *iommu; + PCIBridge *bridgeA; + PCIBridge *bridgeB; + uint32_t pci_control[16]; + uint32_t pci_irq_map[8]; + uint32_t pci_err_irq_map[4]; + uint32_t obio_irq_map[32]; + qemu_irq ivec_irqs[MAX_IVEC]; + unsigned int irq_request; + uint32_t reset_control; + unsigned int nr_resets; +}; + +#define TYPE_SABRE "sabre" +OBJECT_DECLARE_SIMPLE_TYPE(SabreState, SABRE) + +#endif diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h new file mode 100644 index 000000000..4f58f0223 --- /dev/null +++ b/include/hw/pci-host/spapr.h @@ -0,0 +1,215 @@ +/* + * QEMU SPAPR PCI BUS definitions + * + * Copyright (c) 2011 Alexey Kardashevskiy + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PCI_HOST_SPAPR_H +#define PCI_HOST_SPAPR_H + +#include "hw/ppc/spapr.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/ppc/xics.h" +#include "qom/object.h" + +#define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" + +OBJECT_DECLARE_SIMPLE_TYPE(SpaprPhbState, SPAPR_PCI_HOST_BRIDGE) + +#define SPAPR_PCI_DMA_MAX_WINDOWS 2 + + +typedef struct SpaprPciMsi { + uint32_t first_irq; + uint32_t num; +} SpaprPciMsi; + +typedef struct SpaprPciMsiMig { + uint32_t key; + SpaprPciMsi value; +} SpaprPciMsiMig; + +typedef struct SpaprPciLsi { + uint32_t irq; +} SpaprPciLsi; + +typedef struct SpaprPhbPciNvGpuConfig SpaprPhbPciNvGpuConfig; + +struct SpaprPhbState { + PCIHostState parent_obj; + + uint32_t index; + uint64_t buid; + char *dtbusname; + bool dr_enabled; + + MemoryRegion memspace, iospace; + hwaddr mem_win_addr, mem_win_size, mem64_win_addr, mem64_win_size; + uint64_t mem64_win_pciaddr; + hwaddr io_win_addr, io_win_size; + MemoryRegion mem32window, mem64window, iowindow, msiwindow; + + uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]; + hwaddr dma_win_addr, dma_win_size; + AddressSpace iommu_as; + MemoryRegion iommu_root; + + SpaprPciLsi lsi_table[PCI_NUM_PINS]; + + GHashTable *msi; + /* Temporary cache for migration purposes */ + int32_t msi_devs_num; + SpaprPciMsiMig *msi_devs; + + QLIST_ENTRY(SpaprPhbState) list; + + bool ddw_enabled; + uint64_t page_size_mask; + uint64_t dma64_win_addr; + + uint32_t numa_node; + + bool pcie_ecs; /* Allow access to PCIe extended config space? */ + + /* Fields for migration compatibility hacks */ + bool pre_2_8_migration; + uint32_t mig_liobn; + hwaddr mig_mem_win_addr, mig_mem_win_size; + hwaddr mig_io_win_addr, mig_io_win_size; + hwaddr nv2_gpa_win_addr; + hwaddr nv2_atsd_win_addr; + SpaprPhbPciNvGpuConfig *nvgpus; + bool pre_5_1_assoc; +}; + +#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL +#define SPAPR_PCI_MEM32_WIN_SIZE \ + ((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET) +#define SPAPR_PCI_MEM64_WIN_SIZE 0x10000000000ULL /* 1 TiB */ + +/* All PCI outbound windows will be within this range */ +#define SPAPR_PCI_BASE (1ULL << 45) /* 32 TiB */ +#define SPAPR_PCI_LIMIT (1ULL << 46) /* 64 TiB */ + +#define SPAPR_MAX_PHBS ((SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / \ + SPAPR_PCI_MEM64_WIN_SIZE - 1) + +#define SPAPR_PCI_IO_WIN_SIZE 0x10000 + +#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL + +#define SPAPR_PCI_NV2RAM64_WIN_BASE SPAPR_PCI_LIMIT +#define SPAPR_PCI_NV2RAM64_WIN_SIZE (2 * TiB) /* For up to 6 GPUs 256GB each */ + +/* Max number of these GPUsper a physical box */ +#define NVGPU_MAX_NUM 6 +/* Max number of NVLinks per GPU in any physical box */ +#define NVGPU_MAX_LINKS 3 + +/* + * GPU RAM starts at 64TiB so huge DMA window to cover it all ends at 128TiB + * which is enough. We do not need DMA for ATSD so we put them at 128TiB. + */ +#define SPAPR_PCI_NV2ATSD_WIN_BASE (128 * TiB) +#define SPAPR_PCI_NV2ATSD_WIN_SIZE (NVGPU_MAX_NUM * NVGPU_MAX_LINKS * \ + 64 * KiB) + +int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb, + uint32_t intc_phandle, void *fdt, int *node_offset); + +void spapr_pci_rtas_init(void); + +SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid); +PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid, + uint32_t config_addr); + +/* DRC callbacks */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev); +int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); + +/* VFIO EEH hooks */ +#ifdef CONFIG_LINUX +bool spapr_phb_eeh_available(SpaprPhbState *sphb); +int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, + unsigned int addr, int option); +int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state); +int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option); +int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb); +void spapr_phb_vfio_reset(DeviceState *qdev); +void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp); +void spapr_phb_nvgpu_free(SpaprPhbState *sphb); +void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off, + Error **errp); +void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt); +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset, + SpaprPhbState *sphb); +#else +static inline bool spapr_phb_eeh_available(SpaprPhbState *sphb) +{ + return false; +} +static inline int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, + unsigned int addr, int option) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, + int *state) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) +{ + return RTAS_OUT_HW_ERROR; +} +static inline int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) +{ + return RTAS_OUT_HW_ERROR; +} +static inline void spapr_phb_vfio_reset(DeviceState *qdev) +{ +} +static inline void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) +{ +} +static inline void spapr_phb_nvgpu_free(SpaprPhbState *sphb) +{ +} +static inline void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, + int bus_off, Error **errp) +{ +} +static inline void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, + void *fdt) +{ +} +static inline void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, + int offset, + SpaprPhbState *sphb) +{ +} +#endif + +void spapr_phb_dma_reset(SpaprPhbState *sphb); + +static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) +{ + return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; +} + +#endif /* PCI_HOST_SPAPR_H */ diff --git a/include/hw/pci-host/uninorth.h b/include/hw/pci-host/uninorth.h new file mode 100644 index 000000000..62bd81e72 --- /dev/null +++ b/include/hw/pci-host/uninorth.h @@ -0,0 +1,68 @@ +/* + * QEMU Uninorth PCI host (for all Mac99 and newer machines) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef UNINORTH_H +#define UNINORTH_H + +#include "hw/pci/pci_host.h" +#include "qom/object.h" + +/* UniNorth version */ +#define UNINORTH_VERSION_10A 0x7 + +#define TYPE_UNI_NORTH_PCI_HOST_BRIDGE "uni-north-pci-pcihost" +#define TYPE_UNI_NORTH_AGP_HOST_BRIDGE "uni-north-agp-pcihost" +#define TYPE_UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE "uni-north-internal-pci-pcihost" +#define TYPE_U3_AGP_HOST_BRIDGE "u3-agp-pcihost" + +typedef struct UNINHostState UNINHostState; +DECLARE_INSTANCE_CHECKER(UNINHostState, UNI_NORTH_PCI_HOST_BRIDGE, + TYPE_UNI_NORTH_PCI_HOST_BRIDGE) +DECLARE_INSTANCE_CHECKER(UNINHostState, UNI_NORTH_AGP_HOST_BRIDGE, + TYPE_UNI_NORTH_AGP_HOST_BRIDGE) +DECLARE_INSTANCE_CHECKER(UNINHostState, UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE, + TYPE_UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE) +DECLARE_INSTANCE_CHECKER(UNINHostState, U3_AGP_HOST_BRIDGE, + TYPE_U3_AGP_HOST_BRIDGE) + +struct UNINHostState { + PCIHostState parent_obj; + + uint32_t ofw_addr; + qemu_irq irqs[4]; + MemoryRegion pci_mmio; + MemoryRegion pci_hole; + MemoryRegion pci_io; +}; + +struct UNINState { + SysBusDevice parent_obj; + + MemoryRegion mem; +}; + +#define TYPE_UNI_NORTH "uni-north" +OBJECT_DECLARE_SIMPLE_TYPE(UNINState, UNI_NORTH) + +#endif /* UNINORTH_H */ diff --git a/include/hw/pci-host/xilinx-pcie.h b/include/hw/pci-host/xilinx-pcie.h new file mode 100644 index 000000000..89be88d87 --- /dev/null +++ b/include/hw/pci-host/xilinx-pcie.h @@ -0,0 +1,66 @@ +/* + * Xilinx PCIe host controller emulation. + * + * Copyright (c) 2016 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XILINX_PCIE_H +#define HW_XILINX_PCIE_H + +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pcie_host.h" +#include "qom/object.h" + +#define TYPE_XILINX_PCIE_HOST "xilinx-pcie-host" +OBJECT_DECLARE_SIMPLE_TYPE(XilinxPCIEHost, XILINX_PCIE_HOST) + +#define TYPE_XILINX_PCIE_ROOT "xilinx-pcie-root" +OBJECT_DECLARE_SIMPLE_TYPE(XilinxPCIERoot, XILINX_PCIE_ROOT) + +struct XilinxPCIERoot { + PCIBridge parent_obj; +}; + +typedef struct XilinxPCIEInt { + uint32_t fifo_reg1; + uint32_t fifo_reg2; +} XilinxPCIEInt; + +struct XilinxPCIEHost { + PCIExpressHost parent_obj; + + char name[16]; + + uint32_t bus_nr; + uint64_t cfg_base, cfg_size; + uint64_t mmio_base, mmio_size; + bool link_up; + qemu_irq irq; + + MemoryRegion mmio, io; + + XilinxPCIERoot root; + + uint32_t intr; + uint32_t intr_mask; + XilinxPCIEInt intr_fifo[16]; + unsigned int intr_fifo_r, intr_fifo_w; + uint32_t rpscr; +}; + +#endif /* HW_XILINX_PCIE_H */ diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h new file mode 100644 index 000000000..408768848 --- /dev/null +++ b/include/hw/pci/msi.h @@ -0,0 +1,52 @@ +/* + * msi.h + * + * Copyright (c) 2010 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef QEMU_MSI_H +#define QEMU_MSI_H + +#include "hw/pci/pci.h" + +struct MSIMessage { + uint64_t address; + uint32_t data; +}; + +extern bool msi_nonbroken; + +void msi_set_message(PCIDevice *dev, MSIMessage msg); +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector); +bool msi_enabled(const PCIDevice *dev); +int msi_init(struct PCIDevice *dev, uint8_t offset, + unsigned int nr_vectors, bool msi64bit, + bool msi_per_vector_mask, Error **errp); +void msi_uninit(struct PCIDevice *dev); +void msi_reset(PCIDevice *dev); +bool msi_is_masked(const PCIDevice *dev, unsigned int vector); +void msi_notify(PCIDevice *dev, unsigned int vector); +void msi_send_message(PCIDevice *dev, MSIMessage msg); +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len); +unsigned int msi_nr_vectors_allocated(const PCIDevice *dev); + +static inline bool msi_present(const PCIDevice *dev) +{ + return dev->cap_present & QEMU_PCI_CAP_MSI; +} + +#endif /* QEMU_MSI_H */ diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h new file mode 100644 index 000000000..4c4a60c73 --- /dev/null +++ b/include/hw/pci/msix.h @@ -0,0 +1,64 @@ +#ifndef QEMU_MSIX_H +#define QEMU_MSIX_H + +#include "hw/pci/pci.h" + +#define MSIX_CAP_LENGTH 12 + +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg); +MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector); +int msix_init(PCIDevice *dev, unsigned short nentries, + MemoryRegion *table_bar, uint8_t table_bar_nr, + unsigned table_offset, MemoryRegion *pba_bar, + uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos, + Error **errp); +int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries, + uint8_t bar_nr, Error **errp); + +void msix_write_config(PCIDevice *dev, uint32_t address, uint32_t val, int len); + +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, + MemoryRegion *pba_bar); +void msix_uninit_exclusive_bar(PCIDevice *dev); + +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev); + +void msix_save(PCIDevice *dev, QEMUFile *f); +void msix_load(PCIDevice *dev, QEMUFile *f); + +int msix_enabled(PCIDevice *dev); +int msix_present(PCIDevice *dev); + +bool msix_is_masked(PCIDevice *dev, unsigned vector); +void msix_set_pending(PCIDevice *dev, unsigned vector); +void msix_clr_pending(PCIDevice *dev, int vector); + +int msix_vector_use(PCIDevice *dev, unsigned vector); +void msix_vector_unuse(PCIDevice *dev, unsigned vector); +void msix_unuse_all_vectors(PCIDevice *dev); + +void msix_notify(PCIDevice *dev, unsigned vector); + +void msix_reset(PCIDevice *dev); + +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier, + MSIVectorPollNotifier poll_notifier); +void msix_unset_vector_notifiers(PCIDevice *dev); + +extern const VMStateDescription vmstate_msix; + +#define VMSTATE_MSIX_TEST(_field, _state, _test) { \ + .name = (stringify(_field)), \ + .size = sizeof(PCIDevice), \ + .vmsd = &vmstate_msix, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, PCIDevice), \ + .field_exists = (_test) \ +} + +#define VMSTATE_MSIX(_f, _s) \ + VMSTATE_MSIX_TEST(_f, _s, NULL) + +#endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h new file mode 100644 index 000000000..72ce649ee --- /dev/null +++ b/include/hw/pci/pci.h @@ -0,0 +1,865 @@ +#ifndef QEMU_PCI_H +#define QEMU_PCI_H + +#include "exec/memory.h" +#include "sysemu/dma.h" + +/* PCI includes legacy ISA access. */ +#include "hw/isa/isa.h" + +#include "hw/pci/pcie.h" +#include "qom/object.h" + +extern bool pci_available; + +/* PCI bus */ + +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) +#define PCI_BUS_NUM(x) (((x) >> 8) & 0xff) +#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) +#define PCI_FUNC(devfn) ((devfn) & 0x07) +#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) +#define PCI_BUS_MAX 256 +#define PCI_DEVFN_MAX 256 +#define PCI_SLOT_MAX 32 +#define PCI_FUNC_MAX 8 + +/* Class, Vendor and Device IDs from Linux's pci_ids.h */ +#include "hw/pci/pci_ids.h" + +/* QEMU-specific Vendor and Device ID definitions */ + +/* IBM (0x1014) */ +#define PCI_DEVICE_ID_IBM_440GX 0x027f +#define PCI_DEVICE_ID_IBM_OPENPIC2 0xffff + +/* Hitachi (0x1054) */ +#define PCI_VENDOR_ID_HITACHI 0x1054 +#define PCI_DEVICE_ID_HITACHI_SH7751R 0x350e + +/* Apple (0x106b) */ +#define PCI_DEVICE_ID_APPLE_343S1201 0x0010 +#define PCI_DEVICE_ID_APPLE_UNI_N_I_PCI 0x001e +#define PCI_DEVICE_ID_APPLE_UNI_N_PCI 0x001f +#define PCI_DEVICE_ID_APPLE_UNI_N_KEYL 0x0022 +#define PCI_DEVICE_ID_APPLE_IPID_USB 0x003f + +/* Realtek (0x10ec) */ +#define PCI_DEVICE_ID_REALTEK_8029 0x8029 + +/* Xilinx (0x10ee) */ +#define PCI_DEVICE_ID_XILINX_XC2VP30 0x0300 + +/* Marvell (0x11ab) */ +#define PCI_DEVICE_ID_MARVELL_GT6412X 0x4620 + +/* QEMU/Bochs VGA (0x1234) */ +#define PCI_VENDOR_ID_QEMU 0x1234 +#define PCI_DEVICE_ID_QEMU_VGA 0x1111 +#define PCI_DEVICE_ID_QEMU_IPMI 0x1112 + +/* VMWare (0x15ad) */ +#define PCI_VENDOR_ID_VMWARE 0x15ad +#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405 +#define PCI_DEVICE_ID_VMWARE_SVGA 0x0710 +#define PCI_DEVICE_ID_VMWARE_NET 0x0720 +#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730 +#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0 +#define PCI_DEVICE_ID_VMWARE_IDE 0x1729 +#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0 + +/* Intel (0x8086) */ +#define PCI_DEVICE_ID_INTEL_82551IT 0x1209 +#define PCI_DEVICE_ID_INTEL_82557 0x1229 +#define PCI_DEVICE_ID_INTEL_82801IR 0x2922 + +/* Red Hat / Qumranet (for QEMU) -- see pci-ids.txt */ +#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_SUBDEVICE_ID_QEMU 0x1100 + +#define PCI_DEVICE_ID_VIRTIO_NET 0x1000 +#define PCI_DEVICE_ID_VIRTIO_BLOCK 0x1001 +#define PCI_DEVICE_ID_VIRTIO_BALLOON 0x1002 +#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003 +#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004 +#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 +#define PCI_DEVICE_ID_VIRTIO_9P 0x1009 +#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 +#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 +#define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014 +#define PCI_DEVICE_ID_VIRTIO_MEM 0x1015 + +#define PCI_VENDOR_ID_REDHAT 0x1b36 +#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 +#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002 +#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003 +#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004 +#define PCI_DEVICE_ID_REDHAT_TEST 0x0005 +#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 +#define PCI_DEVICE_ID_REDHAT_SDHCI 0x0007 +#define PCI_DEVICE_ID_REDHAT_PCIE_HOST 0x0008 +#define PCI_DEVICE_ID_REDHAT_PXB 0x0009 +#define PCI_DEVICE_ID_REDHAT_BRIDGE_SEAT 0x000a +#define PCI_DEVICE_ID_REDHAT_PXB_PCIE 0x000b +#define PCI_DEVICE_ID_REDHAT_PCIE_RP 0x000c +#define PCI_DEVICE_ID_REDHAT_XHCI 0x000d +#define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e +#define PCI_DEVICE_ID_REDHAT_MDPY 0x000f +#define PCI_DEVICE_ID_REDHAT_NVME 0x0010 +#define PCI_DEVICE_ID_REDHAT_QXL 0x0100 + +#define FMT_PCIBUS PRIx64 + +typedef uint64_t pcibus_t; + +struct PCIHostDeviceAddress { + unsigned int domain; + unsigned int bus; + unsigned int slot; + unsigned int function; +}; + +typedef void PCIConfigWriteFunc(PCIDevice *pci_dev, + uint32_t address, uint32_t data, int len); +typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, + uint32_t address, int len); +typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, + pcibus_t addr, pcibus_t size, int type); +typedef void PCIUnregisterFunc(PCIDevice *pci_dev); + +typedef struct PCIIORegion { + pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ +#define PCI_BAR_UNMAPPED (~(pcibus_t)0) + pcibus_t size; + uint8_t type; + MemoryRegion *memory; + MemoryRegion *address_space; +} PCIIORegion; + +#define PCI_ROM_SLOT 6 +#define PCI_NUM_REGIONS 7 + +enum { + QEMU_PCI_VGA_MEM, + QEMU_PCI_VGA_IO_LO, + QEMU_PCI_VGA_IO_HI, + QEMU_PCI_VGA_NUM_REGIONS, +}; + +#define QEMU_PCI_VGA_MEM_BASE 0xa0000 +#define QEMU_PCI_VGA_MEM_SIZE 0x20000 +#define QEMU_PCI_VGA_IO_LO_BASE 0x3b0 +#define QEMU_PCI_VGA_IO_LO_SIZE 0xc +#define QEMU_PCI_VGA_IO_HI_BASE 0x3c0 +#define QEMU_PCI_VGA_IO_HI_SIZE 0x20 + +#include "hw/pci/pci_regs.h" + +/* PCI HEADER_TYPE */ +#define PCI_HEADER_TYPE_MULTI_FUNCTION 0x80 + +/* Size of the standard PCI config header */ +#define PCI_CONFIG_HEADER_SIZE 0x40 +/* Size of the standard PCI config space */ +#define PCI_CONFIG_SPACE_SIZE 0x100 +/* Size of the standard PCIe config space: 4KB */ +#define PCIE_CONFIG_SPACE_SIZE 0x1000 + +#define PCI_NUM_PINS 4 /* A-D */ + +/* Bits in cap_present field. */ +enum { + QEMU_PCI_CAP_MSI = 0x1, + QEMU_PCI_CAP_MSIX = 0x2, + QEMU_PCI_CAP_EXPRESS = 0x4, + + /* multifunction capable device */ +#define QEMU_PCI_CAP_MULTIFUNCTION_BITNR 3 + QEMU_PCI_CAP_MULTIFUNCTION = (1 << QEMU_PCI_CAP_MULTIFUNCTION_BITNR), + + /* command register SERR bit enabled - unused since QEMU v5.0 */ +#define QEMU_PCI_CAP_SERR_BITNR 4 + QEMU_PCI_CAP_SERR = (1 << QEMU_PCI_CAP_SERR_BITNR), + /* Standard hot plug controller. */ +#define QEMU_PCI_SHPC_BITNR 5 + QEMU_PCI_CAP_SHPC = (1 << QEMU_PCI_SHPC_BITNR), +#define QEMU_PCI_SLOTID_BITNR 6 + QEMU_PCI_CAP_SLOTID = (1 << QEMU_PCI_SLOTID_BITNR), + /* PCI Express capability - Power Controller Present */ +#define QEMU_PCIE_SLTCAP_PCP_BITNR 7 + QEMU_PCIE_SLTCAP_PCP = (1 << QEMU_PCIE_SLTCAP_PCP_BITNR), + /* Link active status in endpoint capability is always set */ +#define QEMU_PCIE_LNKSTA_DLLLA_BITNR 8 + QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR), +#define QEMU_PCIE_EXTCAP_INIT_BITNR 9 + QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), +}; + +#define TYPE_PCI_DEVICE "pci-device" +typedef struct PCIDeviceClass PCIDeviceClass; +DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass, + PCI_DEVICE, TYPE_PCI_DEVICE) + +/* Implemented by devices that can be plugged on PCI Express buses */ +#define INTERFACE_PCIE_DEVICE "pci-express-device" + +/* Implemented by devices that can be plugged on Conventional PCI buses */ +#define INTERFACE_CONVENTIONAL_PCI_DEVICE "conventional-pci-device" + +typedef struct PCIINTxRoute { + enum { + PCI_INTX_ENABLED, + PCI_INTX_INVERTED, + PCI_INTX_DISABLED, + } mode; + int irq; +} PCIINTxRoute; + +struct PCIDeviceClass { + DeviceClass parent_class; + + void (*realize)(PCIDevice *dev, Error **errp); + PCIUnregisterFunc *exit; + PCIConfigReadFunc *config_read; + PCIConfigWriteFunc *config_write; + + uint16_t vendor_id; + uint16_t device_id; + uint8_t revision; + uint16_t class_id; + uint16_t subsystem_vendor_id; /* only for header type = 0 */ + uint16_t subsystem_id; /* only for header type = 0 */ + + /* + * pci-to-pci bridge or normal device. + * This doesn't mean pci host switch. + * When card bus bridge is supported, this would be enhanced. + */ + bool is_bridge; + + /* rom bar */ + const char *romfile; +}; + +typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev); +typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector, + MSIMessage msg); +typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector); +typedef void (*MSIVectorPollNotifier)(PCIDevice *dev, + unsigned int vector_start, + unsigned int vector_end); + +enum PCIReqIDType { + PCI_REQ_ID_INVALID = 0, + PCI_REQ_ID_BDF, + PCI_REQ_ID_SECONDARY_BUS, + PCI_REQ_ID_MAX, +}; +typedef enum PCIReqIDType PCIReqIDType; + +struct PCIReqIDCache { + PCIDevice *dev; + PCIReqIDType type; +}; +typedef struct PCIReqIDCache PCIReqIDCache; + +struct PCIDevice { + DeviceState qdev; + bool partially_hotplugged; + + /* PCI config space */ + uint8_t *config; + + /* Used to enable config checks on load. Note that writable bits are + * never checked even if set in cmask. */ + uint8_t *cmask; + + /* Used to implement R/W bytes */ + uint8_t *wmask; + + /* Used to implement RW1C(Write 1 to Clear) bytes */ + uint8_t *w1cmask; + + /* Used to allocate config space for capabilities. */ + uint8_t *used; + + /* the following fields are read only */ + int32_t devfn; + /* Cached device to fetch requester ID from, to avoid the PCI + * tree walking every time we invoke PCI request (e.g., + * MSI). For conventional PCI root complex, this field is + * meaningless. */ + PCIReqIDCache requester_id_cache; + char name[64]; + PCIIORegion io_regions[PCI_NUM_REGIONS]; + AddressSpace bus_master_as; + MemoryRegion bus_master_container_region; + MemoryRegion bus_master_enable_region; + + /* do not access the following fields */ + PCIConfigReadFunc *config_read; + PCIConfigWriteFunc *config_write; + + /* Legacy PCI VGA regions */ + MemoryRegion *vga_regions[QEMU_PCI_VGA_NUM_REGIONS]; + bool has_vga; + + /* Current IRQ levels. Used internally by the generic PCI code. */ + uint8_t irq_state; + + /* Capability bits */ + uint32_t cap_present; + + /* Offset of MSI-X capability in config space */ + uint8_t msix_cap; + + /* MSI-X entries */ + int msix_entries_nr; + + /* Space to store MSIX table & pending bit array */ + uint8_t *msix_table; + uint8_t *msix_pba; + /* MemoryRegion container for msix exclusive BAR setup */ + MemoryRegion msix_exclusive_bar; + /* Memory Regions for MSIX table and pending bit entries. */ + MemoryRegion msix_table_mmio; + MemoryRegion msix_pba_mmio; + /* Reference-count for entries actually in use by driver. */ + unsigned *msix_entry_used; + /* MSIX function mask set or MSIX disabled */ + bool msix_function_masked; + /* Version id needed for VMState */ + int32_t version_id; + + /* Offset of MSI capability in config space */ + uint8_t msi_cap; + + /* PCI Express */ + PCIExpressDevice exp; + + /* SHPC */ + SHPCDevice *shpc; + + /* Location of option rom */ + char *romfile; + bool has_rom; + MemoryRegion rom; + uint32_t rom_bar; + + /* INTx routing notifier */ + PCIINTxRoutingNotifier intx_routing_notifier; + + /* MSI-X notifiers */ + MSIVectorUseNotifier msix_vector_use_notifier; + MSIVectorReleaseNotifier msix_vector_release_notifier; + MSIVectorPollNotifier msix_vector_poll_notifier; + + /* ID of standby device in net_failover pair */ + char *failover_pair_id; +}; + +void pci_register_bar(PCIDevice *pci_dev, int region_num, + uint8_t attr, MemoryRegion *memory); +void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem, + MemoryRegion *io_lo, MemoryRegion *io_hi); +void pci_unregister_vga(PCIDevice *pci_dev); +pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num); + +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, + uint8_t offset, uint8_t size, + Error **errp); + +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size); + +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id); + + +uint32_t pci_default_read_config(PCIDevice *d, + uint32_t address, int len); +void pci_default_write_config(PCIDevice *d, + uint32_t address, uint32_t val, int len); +void pci_device_save(PCIDevice *s, QEMUFile *f); +int pci_device_load(PCIDevice *s, QEMUFile *f); +MemoryRegion *pci_address_space(PCIDevice *dev); +MemoryRegion *pci_address_space_io(PCIDevice *dev); + +/* + * Should not normally be used by devices. For use by sPAPR target + * where QEMU emulates firmware. + */ +int pci_bar(PCIDevice *d, int reg); + +typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); +typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); +typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); + +#define TYPE_PCI_BUS "PCI" +OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS) +#define TYPE_PCIE_BUS "PCIE" + +bool pci_bus_is_express(PCIBus *bus); + +void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, + const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename); +PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename); +void pci_root_bus_cleanup(PCIBus *bus); +void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, + void *irq_opaque, int nirq); +void pci_bus_irqs_cleanup(PCIBus *bus); +int pci_bus_get_irq_level(PCIBus *bus, int irq_num); +/* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */ +static inline int pci_swizzle(int slot, int pin) +{ + return (slot + pin) % PCI_NUM_PINS; +} +int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin); +PCIBus *pci_register_root_bus(DeviceState *parent, const char *name, + pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, + void *irq_opaque, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, int nirq, + const char *typename); +void pci_unregister_root_bus(PCIBus *bus); +void pci_bus_set_route_irq_fn(PCIBus *, pci_route_irq_fn); +PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin); +bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new); +void pci_bus_fire_intx_routing_notifier(PCIBus *bus); +void pci_device_set_intx_routing_notifier(PCIDevice *dev, + PCIINTxRoutingNotifier notifier); +void pci_device_reset(PCIDevice *dev); + +PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, + const char *default_model, + const char *default_devaddr); + +PCIDevice *pci_vga_init(PCIBus *bus); + +static inline PCIBus *pci_get_bus(const PCIDevice *dev) +{ + return PCI_BUS(qdev_get_parent_bus(DEVICE(dev))); +} +int pci_bus_num(PCIBus *s); +static inline int pci_dev_bus_num(const PCIDevice *dev) +{ + return pci_bus_num(pci_get_bus(dev)); +} + +int pci_bus_numa_node(PCIBus *bus); +void pci_for_each_device(PCIBus *bus, int bus_num, + void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), + void *opaque); +void pci_for_each_device_reverse(PCIBus *bus, int bus_num, + void (*fn)(PCIBus *bus, PCIDevice *d, + void *opaque), + void *opaque); +void pci_for_each_bus_depth_first(PCIBus *bus, + void *(*begin)(PCIBus *bus, void *parent_state), + void (*end)(PCIBus *bus, void *state), + void *parent_state); +PCIDevice *pci_get_function_0(PCIDevice *pci_dev); + +/* Use this wrapper when specific scan order is not required. */ +static inline +void pci_for_each_bus(PCIBus *bus, + void (*fn)(PCIBus *bus, void *opaque), + void *opaque) +{ + pci_for_each_bus_depth_first(bus, NULL, fn, opaque); +} + +PCIBus *pci_device_root_bus(const PCIDevice *d); +const char *pci_root_bus_path(PCIDevice *dev); +PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn); +int pci_qdev_find_device(const char *id, PCIDevice **pdev); +void pci_bus_get_w64_range(PCIBus *bus, Range *range); + +void pci_device_deassert_intx(PCIDevice *dev); + +typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int); + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); + +static inline void +pci_set_byte(uint8_t *config, uint8_t val) +{ + *config = val; +} + +static inline uint8_t +pci_get_byte(const uint8_t *config) +{ + return *config; +} + +static inline void +pci_set_word(uint8_t *config, uint16_t val) +{ + stw_le_p(config, val); +} + +static inline uint16_t +pci_get_word(const uint8_t *config) +{ + return lduw_le_p(config); +} + +static inline void +pci_set_long(uint8_t *config, uint32_t val) +{ + stl_le_p(config, val); +} + +static inline uint32_t +pci_get_long(const uint8_t *config) +{ + return ldl_le_p(config); +} + +/* + * PCI capabilities and/or their fields + * are generally DWORD aligned only so + * mechanism used by pci_set/get_quad() + * must be tolerant to unaligned pointers + * + */ +static inline void +pci_set_quad(uint8_t *config, uint64_t val) +{ + stq_le_p(config, val); +} + +static inline uint64_t +pci_get_quad(const uint8_t *config) +{ + return ldq_le_p(config); +} + +static inline void +pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val) +{ + pci_set_word(&pci_config[PCI_VENDOR_ID], val); +} + +static inline void +pci_config_set_device_id(uint8_t *pci_config, uint16_t val) +{ + pci_set_word(&pci_config[PCI_DEVICE_ID], val); +} + +static inline void +pci_config_set_revision(uint8_t *pci_config, uint8_t val) +{ + pci_set_byte(&pci_config[PCI_REVISION_ID], val); +} + +static inline void +pci_config_set_class(uint8_t *pci_config, uint16_t val) +{ + pci_set_word(&pci_config[PCI_CLASS_DEVICE], val); +} + +static inline void +pci_config_set_prog_interface(uint8_t *pci_config, uint8_t val) +{ + pci_set_byte(&pci_config[PCI_CLASS_PROG], val); +} + +static inline void +pci_config_set_interrupt_pin(uint8_t *pci_config, uint8_t val) +{ + pci_set_byte(&pci_config[PCI_INTERRUPT_PIN], val); +} + +/* + * helper functions to do bit mask operation on configuration space. + * Just to set bit, use test-and-set and discard returned value. + * Just to clear bit, use test-and-clear and discard returned value. + * NOTE: They aren't atomic. + */ +static inline uint8_t +pci_byte_test_and_clear_mask(uint8_t *config, uint8_t mask) +{ + uint8_t val = pci_get_byte(config); + pci_set_byte(config, val & ~mask); + return val & mask; +} + +static inline uint8_t +pci_byte_test_and_set_mask(uint8_t *config, uint8_t mask) +{ + uint8_t val = pci_get_byte(config); + pci_set_byte(config, val | mask); + return val & mask; +} + +static inline uint16_t +pci_word_test_and_clear_mask(uint8_t *config, uint16_t mask) +{ + uint16_t val = pci_get_word(config); + pci_set_word(config, val & ~mask); + return val & mask; +} + +static inline uint16_t +pci_word_test_and_set_mask(uint8_t *config, uint16_t mask) +{ + uint16_t val = pci_get_word(config); + pci_set_word(config, val | mask); + return val & mask; +} + +static inline uint32_t +pci_long_test_and_clear_mask(uint8_t *config, uint32_t mask) +{ + uint32_t val = pci_get_long(config); + pci_set_long(config, val & ~mask); + return val & mask; +} + +static inline uint32_t +pci_long_test_and_set_mask(uint8_t *config, uint32_t mask) +{ + uint32_t val = pci_get_long(config); + pci_set_long(config, val | mask); + return val & mask; +} + +static inline uint64_t +pci_quad_test_and_clear_mask(uint8_t *config, uint64_t mask) +{ + uint64_t val = pci_get_quad(config); + pci_set_quad(config, val & ~mask); + return val & mask; +} + +static inline uint64_t +pci_quad_test_and_set_mask(uint8_t *config, uint64_t mask) +{ + uint64_t val = pci_get_quad(config); + pci_set_quad(config, val | mask); + return val & mask; +} + +/* Access a register specified by a mask */ +static inline void +pci_set_byte_by_mask(uint8_t *config, uint8_t mask, uint8_t reg) +{ + uint8_t val = pci_get_byte(config); + uint8_t rval = reg << ctz32(mask); + pci_set_byte(config, (~mask & val) | (mask & rval)); +} + +static inline uint8_t +pci_get_byte_by_mask(uint8_t *config, uint8_t mask) +{ + uint8_t val = pci_get_byte(config); + return (val & mask) >> ctz32(mask); +} + +static inline void +pci_set_word_by_mask(uint8_t *config, uint16_t mask, uint16_t reg) +{ + uint16_t val = pci_get_word(config); + uint16_t rval = reg << ctz32(mask); + pci_set_word(config, (~mask & val) | (mask & rval)); +} + +static inline uint16_t +pci_get_word_by_mask(uint8_t *config, uint16_t mask) +{ + uint16_t val = pci_get_word(config); + return (val & mask) >> ctz32(mask); +} + +static inline void +pci_set_long_by_mask(uint8_t *config, uint32_t mask, uint32_t reg) +{ + uint32_t val = pci_get_long(config); + uint32_t rval = reg << ctz32(mask); + pci_set_long(config, (~mask & val) | (mask & rval)); +} + +static inline uint32_t +pci_get_long_by_mask(uint8_t *config, uint32_t mask) +{ + uint32_t val = pci_get_long(config); + return (val & mask) >> ctz32(mask); +} + +static inline void +pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg) +{ + uint64_t val = pci_get_quad(config); + uint64_t rval = reg << ctz32(mask); + pci_set_quad(config, (~mask & val) | (mask & rval)); +} + +static inline uint64_t +pci_get_quad_by_mask(uint8_t *config, uint64_t mask) +{ + uint64_t val = pci_get_quad(config); + return (val & mask) >> ctz32(mask); +} + +PCIDevice *pci_new_multifunction(int devfn, bool multifunction, + const char *name); +PCIDevice *pci_new(int devfn, const char *name); +bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp); + +PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, + bool multifunction, + const char *name); +PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name); + +void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev); + +qemu_irq pci_allocate_irq(PCIDevice *pci_dev); +void pci_set_irq(PCIDevice *pci_dev, int level); + +static inline void pci_irq_assert(PCIDevice *pci_dev) +{ + pci_set_irq(pci_dev, 1); +} + +static inline void pci_irq_deassert(PCIDevice *pci_dev) +{ + pci_set_irq(pci_dev, 0); +} + +/* + * FIXME: PCI does not work this way. + * All the callers to this method should be fixed. + */ +static inline void pci_irq_pulse(PCIDevice *pci_dev) +{ + pci_irq_assert(pci_dev); + pci_irq_deassert(pci_dev); +} + +static inline int pci_is_express(const PCIDevice *d) +{ + return d->cap_present & QEMU_PCI_CAP_EXPRESS; +} + +static inline int pci_is_express_downstream_port(const PCIDevice *d) +{ + uint8_t type; + + if (!pci_is_express(d) || !d->exp.exp_cap) { + return 0; + } + + type = pcie_cap_get_type(d); + + return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT; +} + +static inline uint32_t pci_config_size(const PCIDevice *d) +{ + return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE; +} + +static inline uint16_t pci_get_bdf(PCIDevice *dev) +{ + return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn); +} + +uint16_t pci_requester_id(PCIDevice *dev); + +/* DMA access functions */ +static inline AddressSpace *pci_get_address_space(PCIDevice *dev) +{ + return &dev->bus_master_as; +} + +static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir) +{ + return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir); +} + +static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr, + void *buf, dma_addr_t len) +{ + return pci_dma_rw(dev, addr, buf, len, DMA_DIRECTION_TO_DEVICE); +} + +static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr, + const void *buf, dma_addr_t len) +{ + return pci_dma_rw(dev, addr, (void *) buf, len, DMA_DIRECTION_FROM_DEVICE); +} + +#define PCI_DMA_DEFINE_LDST(_l, _s, _bits) \ + static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev, \ + dma_addr_t addr) \ + { \ + return ld##_l##_dma(pci_get_address_space(dev), addr); \ + } \ + static inline void st##_s##_pci_dma(PCIDevice *dev, \ + dma_addr_t addr, uint##_bits##_t val) \ + { \ + st##_s##_dma(pci_get_address_space(dev), addr, val); \ + } + +PCI_DMA_DEFINE_LDST(ub, b, 8); +PCI_DMA_DEFINE_LDST(uw_le, w_le, 16) +PCI_DMA_DEFINE_LDST(l_le, l_le, 32); +PCI_DMA_DEFINE_LDST(q_le, q_le, 64); +PCI_DMA_DEFINE_LDST(uw_be, w_be, 16) +PCI_DMA_DEFINE_LDST(l_be, l_be, 32); +PCI_DMA_DEFINE_LDST(q_be, q_be, 64); + +#undef PCI_DMA_DEFINE_LDST + +static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr, + dma_addr_t *plen, DMADirection dir) +{ + void *buf; + + buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir); + return buf; +} + +static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len); +} + +static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev, + int alloc_hint) +{ + qemu_sglist_init(qsg, DEVICE(dev), alloc_hint, pci_get_address_space(dev)); +} + +extern const VMStateDescription vmstate_pci_device; + +#define VMSTATE_PCI_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(PCIDevice), \ + .vmsd = &vmstate_pci_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, PCIDevice), \ +} + +#define VMSTATE_PCI_DEVICE_POINTER(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(PCIDevice), \ + .vmsd = &vmstate_pci_device, \ + .flags = VMS_STRUCT|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, PCIDevice), \ +} + +MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); + +#endif diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h new file mode 100644 index 000000000..a94d35003 --- /dev/null +++ b/include/hw/pci/pci_bridge.h @@ -0,0 +1,158 @@ +/* + * QEMU PCI bridge + * + * Copyright (c) 2004 Fabrice Bellard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * split out pci bus specific stuff from pci.[hc] to pci_bridge.[hc] + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +#ifndef QEMU_PCI_BRIDGE_H +#define QEMU_PCI_BRIDGE_H + +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "qom/object.h" + +typedef struct PCIBridgeWindows PCIBridgeWindows; + +/* + * Aliases for each of the address space windows that the bridge + * can forward. Mapped into the bridge's parent's address space, + * as subregions. + */ +struct PCIBridgeWindows { + MemoryRegion alias_pref_mem; + MemoryRegion alias_mem; + MemoryRegion alias_io; + /* + * When bridge control VGA forwarding is enabled, bridges will + * provide positive decode on the PCI VGA defined I/O port and + * MMIO ranges. When enabled forwarding is only qualified on the + * I/O and memory enable bits in the bridge command register. + */ + MemoryRegion alias_vga[QEMU_PCI_VGA_NUM_REGIONS]; +}; + +#define TYPE_PCI_BRIDGE "base-pci-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(PCIBridge, PCI_BRIDGE) + +struct PCIBridge { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + /* private member */ + PCIBus sec_bus; + /* + * Memory regions for the bridge's address spaces. These regions are not + * directly added to system_memory/system_io or its descendants. + * Bridge's secondary bus points to these, so that devices + * under the bridge see these regions as its address spaces. + * The regions are as large as the entire address space - + * they don't take into account any windows. + */ + MemoryRegion address_space_mem; + MemoryRegion address_space_io; + + PCIBridgeWindows *windows; + + pci_map_irq_fn map_irq; + const char *bus_name; +}; + +#define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr" +#define PCI_BRIDGE_DEV_PROP_MSI "msi" +#define PCI_BRIDGE_DEV_PROP_SHPC "shpc" + +int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset, + uint16_t svid, uint16_t ssid, + Error **errp); + +PCIDevice *pci_bridge_get_device(PCIBus *bus); +PCIBus *pci_bridge_get_sec_bus(PCIBridge *br); + +pcibus_t pci_bridge_get_base(const PCIDevice *bridge, uint8_t type); +pcibus_t pci_bridge_get_limit(const PCIDevice *bridge, uint8_t type); + +void pci_bridge_update_mappings(PCIBridge *br); +void pci_bridge_write_config(PCIDevice *d, + uint32_t address, uint32_t val, int len); +void pci_bridge_disable_base_limit(PCIDevice *dev); +void pci_bridge_reset(DeviceState *qdev); + +void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename); +void pci_bridge_exitfn(PCIDevice *pci_dev); + +void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +/* + * before qdev initialization(qdev_init()), this function sets bus_name and + * map_irq callback which are necessary for pci_bridge_initfn() to + * initialize bus. + */ +void pci_bridge_map_irq(PCIBridge *br, const char* bus_name, + pci_map_irq_fn map_irq); + +/* TODO: add this define to pci_regs.h in linux and then in qemu. */ +#define PCI_BRIDGE_CTL_VGA_16BIT 0x10 /* VGA 16-bit decode */ +#define PCI_BRIDGE_CTL_DISCARD 0x100 /* Primary discard timer */ +#define PCI_BRIDGE_CTL_SEC_DISCARD 0x200 /* Secondary discard timer */ +#define PCI_BRIDGE_CTL_DISCARD_STATUS 0x400 /* Discard timer status */ +#define PCI_BRIDGE_CTL_DISCARD_SERR 0x800 /* Discard timer SERR# enable */ + +typedef struct PCIBridgeQemuCap { + uint8_t id; /* Standard PCI capability header field */ + uint8_t next; /* Standard PCI capability header field */ + uint8_t len; /* Standard PCI vendor-specific capability header field */ + uint8_t type; /* Red Hat vendor-specific capability type. + Types are defined with REDHAT_PCI_CAP_ prefix */ + + uint32_t bus_res; /* Minimum number of buses to reserve */ + uint64_t io; /* IO space to reserve */ + uint32_t mem; /* Non-prefetchable memory to reserve */ + /* At most one of the following two fields may be set to a value + * different from -1 */ + uint32_t mem_pref_32; /* Prefetchable memory to reserve (32-bit MMIO) */ + uint64_t mem_pref_64; /* Prefetchable memory to reserve (64-bit MMIO) */ +} PCIBridgeQemuCap; + +#define REDHAT_PCI_CAP_RESOURCE_RESERVE 1 + +/* + * PCI BUS/IO/MEM/PREFMEM additional resources recorded as a + * capability in PCI configuration space to reserve on firmware init. + */ +typedef struct PCIResReserve { + uint32_t bus; + uint64_t io; + uint64_t mem_non_pref; + uint64_t mem_pref_32; + uint64_t mem_pref_64; +} PCIResReserve; + +int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, + PCIResReserve res_reserve, Error **errp); + +#endif /* QEMU_PCI_BRIDGE_H */ diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h new file mode 100644 index 000000000..347440d42 --- /dev/null +++ b/include/hw/pci/pci_bus.h @@ -0,0 +1,66 @@ +#ifndef QEMU_PCI_BUS_H +#define QEMU_PCI_BUS_H + +#include "hw/pci/pci.h" + +/* + * PCI Bus datastructures. + * + * Do not access the following members directly; + * use accessor functions in pci.h + */ + +struct PCIBusClass { + /*< private >*/ + BusClass parent_class; + /*< public >*/ + + int (*bus_num)(PCIBus *bus); + uint16_t (*numa_node)(PCIBus *bus); +}; + +enum PCIBusFlags { + /* This bus is the root of a PCI domain */ + PCI_BUS_IS_ROOT = 0x0001, + /* PCIe extended configuration space is accessible on this bus */ + PCI_BUS_EXTENDED_CONFIG_SPACE = 0x0002, +}; + +struct PCIBus { + BusState qbus; + enum PCIBusFlags flags; + PCIIOMMUFunc iommu_fn; + void *iommu_opaque; + uint8_t devfn_min; + uint32_t slot_reserved_mask; + pci_set_irq_fn set_irq; + pci_map_irq_fn map_irq; + pci_route_irq_fn route_intx_to_irq; + void *irq_opaque; + PCIDevice *devices[PCI_SLOT_MAX * PCI_FUNC_MAX]; + PCIDevice *parent_dev; + MemoryRegion *address_space_mem; + MemoryRegion *address_space_io; + + QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */ + QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */ + + /* The bus IRQ state is the logical OR of the connected devices. + Keep a count of the number of devices with raised IRQs. */ + int nirq; + int *irq_count; + + Notifier machine_done; +}; + +static inline bool pci_bus_is_root(PCIBus *bus) +{ + return !!(bus->flags & PCI_BUS_IS_ROOT); +} + +static inline bool pci_bus_allows_extended_config_space(PCIBus *bus) +{ + return !!(bus->flags & PCI_BUS_EXTENDED_CONFIG_SPACE); +} + +#endif /* QEMU_PCI_BUS_H */ diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h new file mode 100644 index 000000000..52e038c01 --- /dev/null +++ b/include/hw/pci/pci_host.h @@ -0,0 +1,70 @@ +/* + * QEMU Common PCI Host bridge configuration data space access routines. + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* Worker routines for a PCI host controller that uses an {address,data} + register pair to access PCI configuration space. */ + +#ifndef PCI_HOST_H +#define PCI_HOST_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_PCI_HOST_BRIDGE "pci-host-bridge" +OBJECT_DECLARE_TYPE(PCIHostState, PCIHostBridgeClass, PCI_HOST_BRIDGE) + +struct PCIHostState { + SysBusDevice busdev; + + MemoryRegion conf_mem; + MemoryRegion data_mem; + MemoryRegion mmcfg; + uint32_t config_reg; + bool mig_enabled; + PCIBus *bus; + + QLIST_ENTRY(PCIHostState) next; +}; + +struct PCIHostBridgeClass { + SysBusDeviceClass parent_class; + + const char *(*root_bus_path)(PCIHostState *, PCIBus *); +}; + +/* common internal helpers for PCI/PCIe hosts, cut off overflows */ +void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, + uint32_t limit, uint32_t val, uint32_t len); +uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, + uint32_t limit, uint32_t len); + +void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len); +uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len); + +extern const MemoryRegionOps pci_host_conf_le_ops; +extern const MemoryRegionOps pci_host_conf_be_ops; +extern const MemoryRegionOps pci_host_data_le_ops; +extern const MemoryRegionOps pci_host_data_be_ops; + +#endif /* PCI_HOST_H */ diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h new file mode 100644 index 000000000..11f8ab714 --- /dev/null +++ b/include/hw/pci/pci_ids.h @@ -0,0 +1,277 @@ +/* + * PCI Class, Vendor and Device IDs + * + * Please keep sorted. + * + * Abbreviated version of linux/pci_ids.h + * + * QEMU-specific definitions belong in pci.h + */ + +#ifndef HW_PCI_IDS_H +#define HW_PCI_IDS_H + +/* Device classes and subclasses */ + +#define PCI_CLASS_NOT_DEFINED 0x0000 +#define PCI_CLASS_NOT_DEFINED_VGA 0x0001 + +#define PCI_BASE_CLASS_STORAGE 0x01 +#define PCI_CLASS_STORAGE_SCSI 0x0100 +#define PCI_CLASS_STORAGE_IDE 0x0101 +#define PCI_CLASS_STORAGE_FLOPPY 0x0102 +#define PCI_CLASS_STORAGE_IPI 0x0103 +#define PCI_CLASS_STORAGE_RAID 0x0104 +#define PCI_CLASS_STORAGE_ATA 0x0105 +#define PCI_CLASS_STORAGE_SATA 0x0106 +#define PCI_CLASS_STORAGE_SAS 0x0107 +#define PCI_CLASS_STORAGE_EXPRESS 0x0108 +#define PCI_CLASS_STORAGE_OTHER 0x0180 + +#define PCI_BASE_CLASS_NETWORK 0x02 +#define PCI_CLASS_NETWORK_ETHERNET 0x0200 +#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201 +#define PCI_CLASS_NETWORK_FDDI 0x0202 +#define PCI_CLASS_NETWORK_ATM 0x0203 +#define PCI_CLASS_NETWORK_ISDN 0x0204 +#define PCI_CLASS_NETWORK_WORLDFIP 0x0205 +#define PCI_CLASS_NETWORK_PICMG214 0x0206 +#define PCI_CLASS_NETWORK_OTHER 0x0280 + +#define PCI_BASE_CLASS_DISPLAY 0x03 +#define PCI_CLASS_DISPLAY_VGA 0x0300 +#define PCI_CLASS_DISPLAY_XGA 0x0301 +#define PCI_CLASS_DISPLAY_3D 0x0302 +#define PCI_CLASS_DISPLAY_OTHER 0x0380 + +#define PCI_BASE_CLASS_MULTIMEDIA 0x04 +#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400 +#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401 +#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402 +#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480 + +#define PCI_BASE_CLASS_MEMORY 0x05 +#define PCI_CLASS_MEMORY_RAM 0x0500 +#define PCI_CLASS_MEMORY_FLASH 0x0501 +#define PCI_CLASS_MEMORY_OTHER 0x0580 + +#define PCI_BASE_CLASS_BRIDGE 0x06 +#define PCI_CLASS_BRIDGE_HOST 0x0600 +#define PCI_CLASS_BRIDGE_ISA 0x0601 +#define PCI_CLASS_BRIDGE_EISA 0x0602 +#define PCI_CLASS_BRIDGE_MC 0x0603 +#define PCI_CLASS_BRIDGE_PCI 0x0604 +#define PCI_CLASS_BRIDGE_PCI_INF_SUB 0x01 +#define PCI_CLASS_BRIDGE_PCMCIA 0x0605 +#define PCI_CLASS_BRIDGE_NUBUS 0x0606 +#define PCI_CLASS_BRIDGE_CARDBUS 0x0607 +#define PCI_CLASS_BRIDGE_RACEWAY 0x0608 +#define PCI_CLASS_BRIDGE_PCI_SEMITP 0x0609 +#define PCI_CLASS_BRIDGE_IB_PCI 0x060a +#define PCI_CLASS_BRIDGE_OTHER 0x0680 + +#define PCI_BASE_CLASS_COMMUNICATION 0x07 +#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700 +#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701 +#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702 +#define PCI_CLASS_COMMUNICATION_MODEM 0x0703 +#define PCI_CLASS_COMMUNICATION_GPIB 0x0704 +#define PCI_CLASS_COMMUNICATION_SC 0x0705 +#define PCI_CLASS_COMMUNICATION_OTHER 0x0780 + +#define PCI_BASE_CLASS_SYSTEM 0x08 +#define PCI_CLASS_SYSTEM_PIC 0x0800 +#define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 +#define PCI_CLASS_SYSTEM_PIC_IOXAPIC 0x080020 +#define PCI_CLASS_SYSTEM_DMA 0x0801 +#define PCI_CLASS_SYSTEM_TIMER 0x0802 +#define PCI_CLASS_SYSTEM_RTC 0x0803 +#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 +#define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_OTHER 0x0880 + +#define PCI_BASE_CLASS_INPUT 0x09 +#define PCI_CLASS_INPUT_KEYBOARD 0x0900 +#define PCI_CLASS_INPUT_PEN 0x0901 +#define PCI_CLASS_INPUT_MOUSE 0x0902 +#define PCI_CLASS_INPUT_SCANNER 0x0903 +#define PCI_CLASS_INPUT_GAMEPORT 0x0904 +#define PCI_CLASS_INPUT_OTHER 0x0980 + +#define PCI_BASE_CLASS_DOCKING 0x0a +#define PCI_CLASS_DOCKING_GENERIC 0x0a00 +#define PCI_CLASS_DOCKING_OTHER 0x0a80 + +#define PCI_BASE_CLASS_PROCESSOR 0x0b +#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02 +#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20 +#define PCI_CLASS_PROCESSOR_MIPS 0x0b30 +#define PCI_CLASS_PROCESSOR_CO 0x0b40 + +#define PCI_BASE_CLASS_SERIAL 0x0c +#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00 +#define PCI_CLASS_SERIAL_ACCESS 0x0c01 +#define PCI_CLASS_SERIAL_SSA 0x0c02 +#define PCI_CLASS_SERIAL_USB 0x0c03 +#define PCI_CLASS_SERIAL_USB_UHCI 0x0c0300 +#define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310 +#define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320 +#define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330 +#define PCI_CLASS_SERIAL_USB_UNKNOWN 0x0c0380 +#define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe +#define PCI_CLASS_SERIAL_FIBER 0x0c04 +#define PCI_CLASS_SERIAL_SMBUS 0x0c05 +#define PCI_CLASS_SERIAL_IB 0x0c06 +#define PCI_CLASS_SERIAL_IPMI 0x0c07 +#define PCI_CLASS_SERIAL_SERCOS 0x0c08 +#define PCI_CLASS_SERIAL_CANBUS 0x0c09 + +#define PCI_BASE_CLASS_WIRELESS 0x0d +#define PCI_CLASS_WIRELESS_IRDA 0x0d00 +#define PCI_CLASS_WIRELESS_CIR 0x0d01 +#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10 +#define PCI_CLASS_WIRELESS_BLUETOOTH 0x0d11 +#define PCI_CLASS_WIRELESS_BROADBAND 0x0d12 +#define PCI_CLASS_WIRELESS_OTHER 0x0d80 + +#define PCI_BASE_CLASS_SATELLITE 0x0f +#define PCI_CLASS_SATELLITE_TV 0x0f00 +#define PCI_CLASS_SATELLITE_AUDIO 0x0f01 +#define PCI_CLASS_SATELLITE_VOICE 0x0f03 +#define PCI_CLASS_SATELLITE_DATA 0x0f04 + +#define PCI_BASE_CLASS_CRYPT 0x10 +#define PCI_CLASS_CRYPT_NETWORK 0x1000 +#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001 +#define PCI_CLASS_CRYPT_OTHER 0x1080 + +#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11 +#define PCI_CLASS_SP_DPIO 0x1100 +#define PCI_CLASS_SP_PERF 0x1101 +#define PCI_CLASS_SP_SYNCH 0x1110 +#define PCI_CLASS_SP_MANAGEMENT 0x1120 +#define PCI_CLASS_SP_OTHER 0x1180 + +#define PCI_CLASS_OTHERS 0xff + +/* Vendors and devices. Sort key: vendor first, device next. */ + +#define PCI_VENDOR_ID_LSI_LOGIC 0x1000 +#define PCI_DEVICE_ID_LSI_53C810 0x0001 +#define PCI_DEVICE_ID_LSI_53C895A 0x0012 +#define PCI_DEVICE_ID_LSI_SAS1068 0x0054 +#define PCI_DEVICE_ID_LSI_SAS1078 0x0060 +#define PCI_DEVICE_ID_LSI_SAS0079 0x0079 + +#define PCI_VENDOR_ID_DEC 0x1011 +#define PCI_DEVICE_ID_DEC_21143 0x0019 +#define PCI_DEVICE_ID_DEC_21154 0x0026 + +#define PCI_VENDOR_ID_CIRRUS 0x1013 + +#define PCI_VENDOR_ID_IBM 0x1014 + +#define PCI_VENDOR_ID_AMD 0x1022 +#define PCI_DEVICE_ID_AMD_LANCE 0x2000 +#define PCI_DEVICE_ID_AMD_SCSI 0x2020 + +#define PCI_VENDOR_ID_TI 0x104c + +#define PCI_VENDOR_ID_MOTOROLA 0x1057 +#define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002 +#define PCI_DEVICE_ID_MOTOROLA_RAVEN 0x4801 + +#define PCI_VENDOR_ID_APPLE 0x106b +#define PCI_DEVICE_ID_APPLE_UNI_N_AGP 0x0020 +#define PCI_DEVICE_ID_APPLE_U3_AGP 0x004b +#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC 0x0021 + +#define PCI_VENDOR_ID_SUN 0x108e +#define PCI_DEVICE_ID_SUN_EBUS 0x1000 +#define PCI_DEVICE_ID_SUN_HME 0x1001 +#define PCI_DEVICE_ID_SUN_SIMBA 0x5000 +#define PCI_DEVICE_ID_SUN_SABRE 0xa000 + +#define PCI_VENDOR_ID_CMD 0x1095 +#define PCI_DEVICE_ID_CMD_646 0x0646 + +#define PCI_VENDOR_ID_REALTEK 0x10ec +#define PCI_DEVICE_ID_REALTEK_8139 0x8139 + +#define PCI_VENDOR_ID_XILINX 0x10ee + +#define PCI_VENDOR_ID_VIA 0x1106 +#define PCI_DEVICE_ID_VIA_ISA_BRIDGE 0x0686 +#define PCI_DEVICE_ID_VIA_IDE 0x0571 +#define PCI_DEVICE_ID_VIA_UHCI 0x3038 +#define PCI_DEVICE_ID_VIA_ACPI 0x3057 +#define PCI_DEVICE_ID_VIA_AC97 0x3058 +#define PCI_DEVICE_ID_VIA_MC97 0x3068 + +#define PCI_VENDOR_ID_MARVELL 0x11ab + +#define PCI_VENDOR_ID_SILICON_MOTION 0x126f +#define PCI_DEVICE_ID_SM501 0x0501 + +#define PCI_VENDOR_ID_ENSONIQ 0x1274 +#define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 + +#define PCI_VENDOR_ID_CHELSIO 0x1425 + +#define PCI_VENDOR_ID_FREESCALE 0x1957 +#define PCI_DEVICE_ID_MPC8533E 0x0030 + +#define PCI_VENDOR_ID_INTEL 0x8086 +#define PCI_DEVICE_ID_INTEL_82378 0x0484 +#define PCI_DEVICE_ID_INTEL_82441 0x1237 +#define PCI_DEVICE_ID_INTEL_82801AA_5 0x2415 +#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e +#define PCI_DEVICE_ID_INTEL_82801D 0x24CD +#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 +#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 +#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 +#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110 +#define PCI_DEVICE_ID_INTEL_82371AB 0x7111 +#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112 +#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113 + +#define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910 +#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 +#define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912 +#define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913 +#define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914 +#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 +#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 +#define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 +#define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 + +#define PCI_DEVICE_ID_INTEL_82801I_UHCI1 0x2934 +#define PCI_DEVICE_ID_INTEL_82801I_UHCI2 0x2935 +#define PCI_DEVICE_ID_INTEL_82801I_UHCI3 0x2936 +#define PCI_DEVICE_ID_INTEL_82801I_UHCI4 0x2937 +#define PCI_DEVICE_ID_INTEL_82801I_UHCI5 0x2938 +#define PCI_DEVICE_ID_INTEL_82801I_UHCI6 0x2939 +#define PCI_DEVICE_ID_INTEL_82801I_EHCI1 0x293a +#define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c +#define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed + +#define PCI_DEVICE_ID_INTEL_P35_MCH 0x29c0 + +#define PCI_VENDOR_ID_XEN 0x5853 +#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001 + +#define PCI_VENDOR_ID_NEC 0x1033 +#define PCI_DEVICE_ID_NEC_UPD720200 0x0194 + +#define PCI_VENDOR_ID_TEWS 0x1498 +#define PCI_DEVICE_ID_TEWS_TPCI200 0x30C8 + +#define PCI_VENDOR_ID_VMWARE 0x15ad +#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 + +#define PCI_VENDOR_ID_SYNOPSYS 0x16C3 + +#define PCI_VENDOR_ID_NVIDIA 0x10de + +#endif diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h new file mode 100644 index 000000000..77ba64b93 --- /dev/null +++ b/include/hw/pci/pci_regs.h @@ -0,0 +1,8 @@ +#ifndef HW_PCI_PCI_REGS_H +#define HW_PCI_PCI_REGS_H + +#include "standard-headers/linux/pci_regs.h" + +#define PCI_PM_CAP_VER_1_1 0x0002 /* PCI PM spec ver. 1.1 */ + +#endif diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h new file mode 100644 index 000000000..6063bee0e --- /dev/null +++ b/include/hw/pci/pcie.h @@ -0,0 +1,150 @@ +/* + * pcie.h + * + * Copyright (c) 2010 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef QEMU_PCIE_H +#define QEMU_PCIE_H + +#include "hw/pci/pci_regs.h" +#include "hw/pci/pcie_regs.h" +#include "hw/pci/pcie_aer.h" +#include "hw/hotplug.h" + +typedef enum { + /* for attention and power indicator */ + PCI_EXP_HP_IND_RESERVED = PCI_EXP_SLTCTL_IND_RESERVED, + PCI_EXP_HP_IND_ON = PCI_EXP_SLTCTL_IND_ON, + PCI_EXP_HP_IND_BLINK = PCI_EXP_SLTCTL_IND_BLINK, + PCI_EXP_HP_IND_OFF = PCI_EXP_SLTCTL_IND_OFF, +} PCIExpressIndicator; + +typedef enum { + /* these bits must match the bits in Slot Control/Status registers. + * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx + * + * Not all the bits of slot control register match with the ones of + * slot status. Not some bits of slot status register is used to + * show status, not to report event occurrence. + * So such bits must be masked out when checking the software + * notification condition. + */ + PCI_EXP_HP_EV_ABP = PCI_EXP_SLTCTL_ABPE, + /* attention button pressed */ + PCI_EXP_HP_EV_PDC = PCI_EXP_SLTCTL_PDCE, + /* presence detect changed */ + PCI_EXP_HP_EV_CCI = PCI_EXP_SLTCTL_CCIE, + /* command completed */ + + PCI_EXP_HP_EV_SUPPORTED = PCI_EXP_HP_EV_ABP | + PCI_EXP_HP_EV_PDC | + PCI_EXP_HP_EV_CCI, + /* supported event mask */ + + /* events not listed aren't supported */ +} PCIExpressHotPlugEvent; + +struct PCIExpressDevice { + /* Offset of express capability in config space */ + uint8_t exp_cap; + /* Offset of Power Management capability in config space */ + uint8_t pm_cap; + + /* SLOT */ + bool hpev_notified; /* Logical AND of conditions for hot plug event. + Following 6.7.3.4: + Software Notification of Hot-Plug Events, an interrupt + is sent whenever the logical and of these conditions + transitions from false to true. */ + + /* AER */ + uint16_t aer_cap; + PCIEAERLog aer_log; + + /* Offset of ATS capability in config space */ + uint16_t ats_cap; + + /* ACS */ + uint16_t acs_cap; +}; + +#define COMPAT_PROP_PCP "power_controller_present" + +/* PCI express capability helper functions */ +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, + uint8_t port, Error **errp); +int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, + uint8_t type, uint8_t port); +int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset); +void pcie_cap_exit(PCIDevice *dev); +int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset); +void pcie_cap_v1_exit(PCIDevice *dev); +uint8_t pcie_cap_get_type(const PCIDevice *dev); +void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector); +uint8_t pcie_cap_flags_get_vector(PCIDevice *dev); + +void pcie_cap_deverr_init(PCIDevice *dev); +void pcie_cap_deverr_reset(PCIDevice *dev); + +void pcie_cap_lnkctl_init(PCIDevice *dev); +void pcie_cap_lnkctl_reset(PCIDevice *dev); + +void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s); +void pcie_cap_slot_reset(PCIDevice *dev); +void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta); +void pcie_cap_slot_write_config(PCIDevice *dev, + uint16_t old_slt_ctl, uint16_t old_slt_sta, + uint32_t addr, uint32_t val, int len); +int pcie_cap_slot_post_load(void *opaque, int version_id); +void pcie_cap_slot_push_attention_button(PCIDevice *dev); + +void pcie_cap_root_init(PCIDevice *dev); +void pcie_cap_root_reset(PCIDevice *dev); + +void pcie_cap_flr_init(PCIDevice *dev); +void pcie_cap_flr_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + +/* ARI forwarding capability and control */ +void pcie_cap_arifwd_init(PCIDevice *dev); +void pcie_cap_arifwd_reset(PCIDevice *dev); +bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev); + +/* PCI express extended capability helper functions */ +uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id); +void pcie_add_capability(PCIDevice *dev, + uint16_t cap_id, uint8_t cap_ver, + uint16_t offset, uint16_t size); +void pcie_sync_bridge_lnk(PCIDevice *dev); + +void pcie_acs_init(PCIDevice *dev, uint16_t offset); +void pcie_acs_reset(PCIDevice *dev); + +void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); +void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); +void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned); + +void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +#endif /* QEMU_PCIE_H */ diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h new file mode 100644 index 000000000..65e71d98f --- /dev/null +++ b/include/hw/pci/pcie_aer.h @@ -0,0 +1,103 @@ +/* + * pcie_aer.h + * + * Copyright (c) 2010 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef QEMU_PCIE_AER_H +#define QEMU_PCIE_AER_H + +#include "hw/pci/pci_regs.h" + +/* definitions which PCIExpressDevice uses */ + +/* AER log */ +struct PCIEAERLog { + /* This structure is saved/loaded. + So explicitly size them instead of unsigned int */ + + /* the number of currently recorded log in log member */ + uint16_t log_num; + + /* + * The maximum number of the log. Errors can be logged up to this. + * + * This is configurable property. + * The specified value will be clipped down to PCIE_AER_LOG_MAX_LIMIT + * to avoid unreasonable memory usage. + * I bet that 128 log size would be big enough, otherwise too many errors + * for system to function normaly. But could consecutive errors occur? + */ +#define PCIE_AER_LOG_MAX_DEFAULT 8 +#define PCIE_AER_LOG_MAX_LIMIT 128 + uint16_t log_max; + + /* Error log. log_max-sized array */ + PCIEAERErr *log; +}; + +/* aer error message: error signaling message has only error severity and + source id. See 2.2.8.3 error signaling messages */ +struct PCIEAERMsg { + /* + * PCI_ERR_ROOT_CMD_{COR, NONFATAL, FATAL}_EN + * = PCI_EXP_DEVCTL_{CERE, NFERE, FERE} + */ + uint32_t severity; + + uint16_t source_id; /* bdf */ +}; + +static inline bool +pcie_aer_msg_is_uncor(const PCIEAERMsg *msg) +{ + return msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN || + msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN; +} + +/* error */ +struct PCIEAERErr { + uint32_t status; /* error status bits */ + uint16_t source_id; /* bdf */ + +#define PCIE_AER_ERR_IS_CORRECTABLE 0x1 /* correctable/uncorrectable */ +#define PCIE_AER_ERR_MAYBE_ADVISORY 0x2 /* maybe advisory non-fatal */ +#define PCIE_AER_ERR_HEADER_VALID 0x4 /* TLP header is logged */ +#define PCIE_AER_ERR_TLP_PREFIX_PRESENT 0x8 /* TLP Prefix is logged */ + uint16_t flags; + + uint32_t header[4]; /* TLP header */ + uint32_t prefix[4]; /* TLP header prefix */ +}; + +extern const VMStateDescription vmstate_pcie_aer_log; + +int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, + uint16_t size, Error **errp); +void pcie_aer_exit(PCIDevice *dev); +void pcie_aer_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + +/* aer root port */ +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector); +void pcie_aer_root_init(PCIDevice *dev); +void pcie_aer_root_reset(PCIDevice *dev); +void pcie_aer_root_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len, + uint32_t root_cmd_prev); + +#endif /* QEMU_PCIE_AER_H */ diff --git a/include/hw/pci/pcie_host.h b/include/hw/pci/pcie_host.h new file mode 100644 index 000000000..076457b27 --- /dev/null +++ b/include/hw/pci/pcie_host.h @@ -0,0 +1,81 @@ +/* + * pcie_host.h + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef PCIE_HOST_H +#define PCIE_HOST_H + +#include "hw/pci/pci_host.h" +#include "exec/memory.h" +#include "qom/object.h" + +#define TYPE_PCIE_HOST_BRIDGE "pcie-host-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(PCIExpressHost, PCIE_HOST_BRIDGE) + +#define PCIE_HOST_MCFG_BASE "MCFG" +#define PCIE_HOST_MCFG_SIZE "mcfg_size" + +/* pcie_host::base_addr == PCIE_BASE_ADDR_UNMAPPED when it isn't mapped. */ +#define PCIE_BASE_ADDR_UNMAPPED ((hwaddr)-1ULL) + +struct PCIExpressHost { + PCIHostState pci; + + /* express part */ + + /* base address where MMCONFIG area is mapped. */ + hwaddr base_addr; + + /* the size of MMCONFIG area. It's host bridge dependent */ + hwaddr size; + + /* MMCONFIG mmio area */ + MemoryRegion mmio; +}; + +void pcie_host_mmcfg_unmap(PCIExpressHost *e); +void pcie_host_mmcfg_init(PCIExpressHost *e, uint32_t size); +void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, uint32_t size); +void pcie_host_mmcfg_update(PCIExpressHost *e, + int enable, + hwaddr addr, + uint32_t size); + +/* + * PCI express ECAM (Enhanced Configuration Address Mapping) format. + * AKA mmcfg address + * bit 20 - 28: bus number + * bit 15 - 19: device number + * bit 12 - 14: function number + * bit 0 - 11: offset in configuration space of a given device + */ +#define PCIE_MMCFG_SIZE_MAX (1ULL << 29) +#define PCIE_MMCFG_SIZE_MIN (1ULL << 20) +#define PCIE_MMCFG_BUS_BIT 20 +#define PCIE_MMCFG_BUS_MASK 0x1ff +#define PCIE_MMCFG_DEVFN_BIT 12 +#define PCIE_MMCFG_DEVFN_MASK 0xff +#define PCIE_MMCFG_CONFOFFSET_MASK 0xfff +#define PCIE_MMCFG_BUS(addr) (((addr) >> PCIE_MMCFG_BUS_BIT) & \ + PCIE_MMCFG_BUS_MASK) +#define PCIE_MMCFG_DEVFN(addr) (((addr) >> PCIE_MMCFG_DEVFN_BIT) & \ + PCIE_MMCFG_DEVFN_MASK) +#define PCIE_MMCFG_CONFOFFSET(addr) ((addr) & PCIE_MMCFG_CONFOFFSET_MASK) + +#endif /* PCIE_HOST_H */ diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h new file mode 100644 index 000000000..bea8ecad0 --- /dev/null +++ b/include/hw/pci/pcie_port.h @@ -0,0 +1,91 @@ +/* + * pcie_port.h + * + * Copyright (c) 2010 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef QEMU_PCIE_PORT_H +#define QEMU_PCIE_PORT_H + +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "qom/object.h" + +#define TYPE_PCIE_PORT "pcie-port" +OBJECT_DECLARE_SIMPLE_TYPE(PCIEPort, PCIE_PORT) + +struct PCIEPort { + /*< private >*/ + PCIBridge parent_obj; + /*< public >*/ + + /* pci express switch port */ + uint8_t port; +}; + +void pcie_port_init_reg(PCIDevice *d); + +#define TYPE_PCIE_SLOT "pcie-slot" +OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT) + +struct PCIESlot { + /*< private >*/ + PCIEPort parent_obj; + /*< public >*/ + + /* pci express switch port with slot */ + uint8_t chassis; + uint16_t slot; + + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + + /* Disable ACS (really for a pcie_root_port) */ + bool disable_acs; + + /* Indicates whether hot-plug is enabled on the slot */ + bool hotplug; + QLIST_ENTRY(PCIESlot) next; +}; + +void pcie_chassis_create(uint8_t chassis_number); +PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot); +int pcie_chassis_add_slot(struct PCIESlot *slot); +void pcie_chassis_del_slot(PCIESlot *s); + +#define TYPE_PCIE_ROOT_PORT "pcie-root-port-base" +typedef struct PCIERootPortClass PCIERootPortClass; +DECLARE_CLASS_CHECKERS(PCIERootPortClass, PCIE_ROOT_PORT, + TYPE_PCIE_ROOT_PORT) + +struct PCIERootPortClass { + PCIDeviceClass parent_class; + DeviceRealize parent_realize; + DeviceReset parent_reset; + + uint8_t (*aer_vector)(const PCIDevice *dev); + int (*interrupts_init)(PCIDevice *dev, Error **errp); + void (*interrupts_uninit)(PCIDevice *dev); + + int exp_offset; + int aer_offset; + int ssvid_offset; + int acs_offset; /* If nonzero, optional ACS capability offset */ + int ssid; +}; + +#endif /* QEMU_PCIE_PORT_H */ diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h new file mode 100644 index 000000000..1db86b0ec --- /dev/null +++ b/include/hw/pci/pcie_regs.h @@ -0,0 +1,182 @@ +/* + * constants for pcie configurations space from pci express spec. + * + * TODO: + * Those constants and macros should go to Linux pci_regs.h + * Once they're merged, they will go away. + */ +#ifndef QEMU_PCIE_REGS_H +#define QEMU_PCIE_REGS_H + + +/* express capability */ + +#define PCI_EXP_VER1_SIZEOF 0x14 /* express capability of ver. 1 */ +#define PCI_EXP_VER2_SIZEOF 0x3c /* express capability of ver. 2 */ +#define PCI_EXT_CAP_VER_SHIFT 16 +#define PCI_EXT_CAP_NEXT_SHIFT 20 +#define PCI_EXT_CAP_NEXT_MASK (0xffc << PCI_EXT_CAP_NEXT_SHIFT) + +#define PCI_EXT_CAP(id, ver, next) \ + ((id) | \ + ((ver) << PCI_EXT_CAP_VER_SHIFT) | \ + ((next) << PCI_EXT_CAP_NEXT_SHIFT)) + +#define PCI_EXT_CAP_ALIGN 4 +#define PCI_EXT_CAP_ALIGNUP(x) \ + (((x) + PCI_EXT_CAP_ALIGN - 1) & ~(PCI_EXT_CAP_ALIGN - 1)) + +/* PCI_EXP_FLAGS */ +#define PCI_EXP_FLAGS_VER1 1 +#define PCI_EXP_FLAGS_VER2 2 +#define PCI_EXP_FLAGS_IRQ_SHIFT ctz32(PCI_EXP_FLAGS_IRQ) +#define PCI_EXP_FLAGS_TYPE_SHIFT ctz32(PCI_EXP_FLAGS_TYPE) + +/* PCI_EXP_LINK{CAP, STA} */ +/* link speed */ +typedef enum PCIExpLinkSpeed { + QEMU_PCI_EXP_LNK_2_5GT = 1, + QEMU_PCI_EXP_LNK_5GT, + QEMU_PCI_EXP_LNK_8GT, + QEMU_PCI_EXP_LNK_16GT, +} PCIExpLinkSpeed; + +#define QEMU_PCI_EXP_LNKCAP_MLS(speed) (speed) +#define QEMU_PCI_EXP_LNKSTA_CLS QEMU_PCI_EXP_LNKCAP_MLS + +typedef enum PCIExpLinkWidth { + QEMU_PCI_EXP_LNK_X1 = 1, + QEMU_PCI_EXP_LNK_X2 = 2, + QEMU_PCI_EXP_LNK_X4 = 4, + QEMU_PCI_EXP_LNK_X8 = 8, + QEMU_PCI_EXP_LNK_X12 = 12, + QEMU_PCI_EXP_LNK_X16 = 16, + QEMU_PCI_EXP_LNK_X32 = 32, +} PCIExpLinkWidth; + +#define PCI_EXP_LNK_MLW_SHIFT ctz32(PCI_EXP_LNKCAP_MLW) +#define QEMU_PCI_EXP_LNKCAP_MLW(width) (width << PCI_EXP_LNK_MLW_SHIFT) +#define QEMU_PCI_EXP_LNKSTA_NLW QEMU_PCI_EXP_LNKCAP_MLW + +/* PCI_EXP_LINKCAP */ +#define PCI_EXP_LNKCAP_ASPMS_SHIFT ctz32(PCI_EXP_LNKCAP_ASPMS) +#define PCI_EXP_LNKCAP_ASPMS_0S (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT) + +#define PCI_EXP_LNKCAP_PN_SHIFT ctz32(PCI_EXP_LNKCAP_PN) + +#define PCI_EXP_SLTCAP_PSN_SHIFT ctz32(PCI_EXP_SLTCAP_PSN) + +#define PCI_EXP_SLTCTL_IND_RESERVED 0x0 +#define PCI_EXP_SLTCTL_IND_ON 0x1 +#define PCI_EXP_SLTCTL_IND_BLINK 0x2 +#define PCI_EXP_SLTCTL_IND_OFF 0x3 +#define PCI_EXP_SLTCTL_AIC_SHIFT ctz32(PCI_EXP_SLTCTL_AIC) +#define PCI_EXP_SLTCTL_AIC_OFF \ + (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT) + +#define PCI_EXP_SLTCTL_PIC_SHIFT ctz32(PCI_EXP_SLTCTL_PIC) +#define PCI_EXP_SLTCTL_PIC_OFF \ + (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT) +#define PCI_EXP_SLTCTL_PIC_ON \ + (PCI_EXP_SLTCTL_IND_ON << PCI_EXP_SLTCTL_PIC_SHIFT) + +#define PCI_EXP_SLTCTL_SUPPORTED \ + (PCI_EXP_SLTCTL_ABPE | \ + PCI_EXP_SLTCTL_PDCE | \ + PCI_EXP_SLTCTL_CCIE | \ + PCI_EXP_SLTCTL_HPIE | \ + PCI_EXP_SLTCTL_AIC | \ + PCI_EXP_SLTCTL_PCC | \ + PCI_EXP_SLTCTL_EIC) + +#define PCI_EXP_DEVCAP2_EFF 0x100000 +#define PCI_EXP_DEVCAP2_EETLPP 0x200000 + +#define PCI_EXP_DEVCTL2_EETLPPB 0x8000 + +/* ARI */ +#define PCI_ARI_VER 1 +#define PCI_ARI_SIZEOF 8 + +/* AER */ +#define PCI_ERR_VER 2 +#define PCI_ERR_SIZEOF 0x48 + +#define PCI_ERR_UNC_SDN 0x00000020 /* surprise down */ +#define PCI_ERR_UNC_ACSV 0x00200000 /* ACS Violation */ +#define PCI_ERR_UNC_INTN 0x00400000 /* Internal Error */ +#define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC Blcoked TLP */ +#define PCI_ERR_UNC_ATOP_EBLOCKED 0x01000000 /* atomic op egress blocked */ +#define PCI_ERR_UNC_TLP_PRF_BLOCKED 0x02000000 /* TLP Prefix Blocked */ +#define PCI_ERR_COR_ADV_NONFATAL 0x00002000 /* Advisory Non-Fatal */ +#define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ +#define PCI_ERR_COR_HL_OVERFLOW 0x00008000 /* Header Long Overflow */ +#define PCI_ERR_CAP_FEP_MASK 0x0000001f +#define PCI_ERR_CAP_MHRC 0x00000200 +#define PCI_ERR_CAP_MHRE 0x00000400 +#define PCI_ERR_CAP_TLP 0x00000800 + +#define PCI_ERR_HEADER_LOG_SIZE 16 +#define PCI_ERR_TLP_PREFIX_LOG 0x38 +#define PCI_ERR_TLP_PREFIX_LOG_SIZE 16 + +#define PCI_SEC_STATUS_RCV_SYSTEM_ERROR 0x4000 + +/* aer root error command/status */ +#define PCI_ERR_ROOT_CMD_EN_MASK (PCI_ERR_ROOT_CMD_COR_EN | \ + PCI_ERR_ROOT_CMD_NONFATAL_EN | \ + PCI_ERR_ROOT_CMD_FATAL_EN) + +#define PCI_ERR_ROOT_IRQ_MAX 32 +#define PCI_ERR_ROOT_IRQ 0xf8000000 +#define PCI_ERR_ROOT_IRQ_SHIFT ctz32(PCI_ERR_ROOT_IRQ) +#define PCI_ERR_ROOT_STATUS_REPORT_MASK (PCI_ERR_ROOT_COR_RCV | \ + PCI_ERR_ROOT_MULTI_COR_RCV | \ + PCI_ERR_ROOT_UNCOR_RCV | \ + PCI_ERR_ROOT_MULTI_UNCOR_RCV | \ + PCI_ERR_ROOT_FIRST_FATAL | \ + PCI_ERR_ROOT_NONFATAL_RCV | \ + PCI_ERR_ROOT_FATAL_RCV) + +#define PCI_ERR_UNC_SUPPORTED (PCI_ERR_UNC_DLP | \ + PCI_ERR_UNC_SDN | \ + PCI_ERR_UNC_POISON_TLP | \ + PCI_ERR_UNC_FCP | \ + PCI_ERR_UNC_COMP_TIME | \ + PCI_ERR_UNC_COMP_ABORT | \ + PCI_ERR_UNC_UNX_COMP | \ + PCI_ERR_UNC_RX_OVER | \ + PCI_ERR_UNC_MALF_TLP | \ + PCI_ERR_UNC_ECRC | \ + PCI_ERR_UNC_UNSUP | \ + PCI_ERR_UNC_ACSV | \ + PCI_ERR_UNC_INTN | \ + PCI_ERR_UNC_MCBTLP | \ + PCI_ERR_UNC_ATOP_EBLOCKED | \ + PCI_ERR_UNC_TLP_PRF_BLOCKED) + +#define PCI_ERR_UNC_SEVERITY_DEFAULT (PCI_ERR_UNC_DLP | \ + PCI_ERR_UNC_SDN | \ + PCI_ERR_UNC_FCP | \ + PCI_ERR_UNC_RX_OVER | \ + PCI_ERR_UNC_MALF_TLP | \ + PCI_ERR_UNC_INTN) + +#define PCI_ERR_COR_SUPPORTED (PCI_ERR_COR_RCVR | \ + PCI_ERR_COR_BAD_TLP | \ + PCI_ERR_COR_BAD_DLLP | \ + PCI_ERR_COR_REP_ROLL | \ + PCI_ERR_COR_REP_TIMER | \ + PCI_ERR_COR_ADV_NONFATAL | \ + PCI_ERR_COR_INTERNAL | \ + PCI_ERR_COR_HL_OVERFLOW) + +#define PCI_ERR_COR_MASK_DEFAULT (PCI_ERR_COR_ADV_NONFATAL | \ + PCI_ERR_COR_INTERNAL | \ + PCI_ERR_COR_HL_OVERFLOW) + +/* ACS */ +#define PCI_ACS_VER 0x1 +#define PCI_ACS_SIZEOF 8 + +#endif /* QEMU_PCIE_REGS_H */ diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h new file mode 100644 index 000000000..d5683b739 --- /dev/null +++ b/include/hw/pci/shpc.h @@ -0,0 +1,65 @@ +#ifndef SHPC_H +#define SHPC_H + +#include "exec/memory.h" +#include "hw/hotplug.h" +#include "hw/pci/pci.h" +#include "migration/vmstate.h" + +struct SHPCDevice { + /* Capability offset in device's config space */ + int cap; + + /* # of hot-pluggable slots */ + int nslots; + + /* SHPC WRS: working register set */ + uint8_t *config; + + /* Used to enable checks on load. Note that writable bits are + * never checked even if set in cmask. */ + uint8_t *cmask; + + /* Used to implement R/W bytes */ + uint8_t *wmask; + + /* Used to implement RW1C(Write 1 to Clear) bytes */ + uint8_t *w1cmask; + + /* MMIO for the SHPC BAR */ + MemoryRegion mmio; + + /* Bus controlled by this SHPC */ + PCIBus *sec_bus; + + /* MSI already requested for this event */ + int msi_requested; +}; + +void shpc_reset(PCIDevice *d); +int shpc_bar_size(PCIDevice *dev); +int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, + unsigned off, Error **errp); +void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar); +void shpc_free(PCIDevice *dev); +void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); + + +void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +extern VMStateInfo shpc_vmstate_info; +#define SHPC_VMSTATE(_field, _type, _test) \ + VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _type, _test, 0, \ + shpc_vmstate_info, 0) + +static inline bool shpc_present(const PCIDevice *dev) +{ + return dev->cap_present & QEMU_PCI_CAP_SHPC; +} + +#endif diff --git a/include/hw/pci/slotid_cap.h b/include/hw/pci/slotid_cap.h new file mode 100644 index 000000000..8b4dc0ce8 --- /dev/null +++ b/include/hw/pci/slotid_cap.h @@ -0,0 +1,11 @@ +#ifndef PCI_SLOTID_CAP_H +#define PCI_SLOTID_CAP_H + + +int slotid_cap_init(PCIDevice *dev, int nslots, + uint8_t chassis, + unsigned offset, + Error **errp); +void slotid_cap_cleanup(PCIDevice *dev); + +#endif diff --git a/include/hw/pcmcia.h b/include/hw/pcmcia.h new file mode 100644 index 000000000..e3ba44e0b --- /dev/null +++ b/include/hw/pcmcia.h @@ -0,0 +1,66 @@ +#ifndef HW_PCMCIA_H +#define HW_PCMCIA_H + +/* PCMCIA/Cardbus */ + +#include "hw/qdev-core.h" +#include "qom/object.h" + +typedef struct PCMCIASocket { + qemu_irq irq; + bool attached; +} PCMCIASocket; + +#define TYPE_PCMCIA_CARD "pcmcia-card" +OBJECT_DECLARE_TYPE(PCMCIACardState, PCMCIACardClass, PCMCIA_CARD) + +struct PCMCIACardState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + PCMCIASocket *slot; +}; + +struct PCMCIACardClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + + int (*attach)(PCMCIACardState *state); + int (*detach)(PCMCIACardState *state); + + const uint8_t *cis; + int cis_len; + + /* Only valid if attached */ + uint8_t (*attr_read)(PCMCIACardState *card, uint32_t address); + void (*attr_write)(PCMCIACardState *card, uint32_t address, uint8_t value); + uint16_t (*common_read)(PCMCIACardState *card, uint32_t address); + void (*common_write)(PCMCIACardState *card, + uint32_t address, uint16_t value); + uint16_t (*io_read)(PCMCIACardState *card, uint32_t address); + void (*io_write)(PCMCIACardState *card, uint32_t address, uint16_t value); +}; + +#define CISTPL_DEVICE 0x01 /* 5V Device Information Tuple */ +#define CISTPL_NO_LINK 0x14 /* No Link Tuple */ +#define CISTPL_VERS_1 0x15 /* Level 1 Version Tuple */ +#define CISTPL_JEDEC_C 0x18 /* JEDEC ID Tuple */ +#define CISTPL_JEDEC_A 0x19 /* JEDEC ID Tuple */ +#define CISTPL_CONFIG 0x1a /* Configuration Tuple */ +#define CISTPL_CFTABLE_ENTRY 0x1b /* 16-bit PCCard Configuration */ +#define CISTPL_DEVICE_OC 0x1c /* Additional Device Information */ +#define CISTPL_DEVICE_OA 0x1d /* Additional Device Information */ +#define CISTPL_DEVICE_GEO 0x1e /* Additional Device Information */ +#define CISTPL_DEVICE_GEO_A 0x1f /* Additional Device Information */ +#define CISTPL_MANFID 0x20 /* Manufacture ID Tuple */ +#define CISTPL_FUNCID 0x21 /* Function ID Tuple */ +#define CISTPL_FUNCE 0x22 /* Function Extension Tuple */ +#define CISTPL_END 0xff /* Tuple End */ +#define CISTPL_ENDMARK 0xff + +/* dscm1xxxx.c */ +PCMCIACardState *dscm1xxxx_init(DriveInfo *bdrv); + +#endif diff --git a/include/hw/platform-bus.h b/include/hw/platform-bus.h new file mode 100644 index 000000000..44f30c535 --- /dev/null +++ b/include/hw/platform-bus.h @@ -0,0 +1,52 @@ +#ifndef HW_PLATFORM_BUS_H +#define HW_PLATFORM_BUS_H + +/* + * Platform Bus device to support dynamic Sysbus devices + * + * Copyright (C) 2014 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Alexander Graf, + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/sysbus.h" +#include "qom/object.h" + + +#define TYPE_PLATFORM_BUS_DEVICE "platform-bus-device" +OBJECT_DECLARE_SIMPLE_TYPE(PlatformBusDevice, PLATFORM_BUS_DEVICE) + +struct PlatformBusDevice { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + uint32_t mmio_size; + MemoryRegion mmio; + + uint32_t num_irqs; + qemu_irq *irqs; + unsigned long *used_irqs; +}; + +int platform_bus_get_irqn(PlatformBusDevice *platform_bus, SysBusDevice *sbdev, + int n); +hwaddr platform_bus_get_mmio_addr(PlatformBusDevice *pbus, SysBusDevice *sbdev, + int n); + +void platform_bus_link_device(PlatformBusDevice *pbus, SysBusDevice *sbdev); + +#endif /* HW_PLATFORM_BUS_H */ diff --git a/include/hw/ppc/fdt.h b/include/hw/ppc/fdt.h new file mode 100644 index 000000000..a8cd85069 --- /dev/null +++ b/include/hw/ppc/fdt.h @@ -0,0 +1,29 @@ +/* + * QEMU PowerPC helper routines for the device tree. + * + * Copyright (C) 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_FDT_H +#define PPC_FDT_H + +#include "qemu/error-report.h" +#include "target/ppc/cpu-qom.h" + +#define _FDT(exp) \ + do { \ + int ret = (exp); \ + if (ret < 0) { \ + error_report("error creating device tree: %s: %s", \ + #exp, fdt_strerror(ret)); \ + exit(1); \ + } \ + } while (0) + +size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop, + size_t maxsize); + +#endif /* PPC_FDT_H */ diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h new file mode 100644 index 000000000..4a3f64451 --- /dev/null +++ b/include/hw/ppc/mac_dbdma.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2009 Laurent Vivier + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MAC_DBDMA_H +#define HW_MAC_DBDMA_H + +#include "exec/memory.h" +#include "qemu/iov.h" +#include "sysemu/dma.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +typedef struct DBDMA_io DBDMA_io; + +typedef void (*DBDMA_flush)(DBDMA_io *io); +typedef void (*DBDMA_rw)(DBDMA_io *io); +typedef void (*DBDMA_end)(DBDMA_io *io); +struct DBDMA_io { + void *opaque; + void *channel; + hwaddr addr; + int len; + int is_last; + int is_dma_out; + DBDMA_end dma_end; + /* DMA is in progress, don't start another one */ + bool processing; + /* DMA request */ + void *dma_mem; + dma_addr_t dma_len; + DMADirection dir; +}; + +/* + * DBDMA control/status registers. All little-endian. + */ + +#define DBDMA_CONTROL 0x00 +#define DBDMA_STATUS 0x01 +#define DBDMA_CMDPTR_HI 0x02 +#define DBDMA_CMDPTR_LO 0x03 +#define DBDMA_INTR_SEL 0x04 +#define DBDMA_BRANCH_SEL 0x05 +#define DBDMA_WAIT_SEL 0x06 +#define DBDMA_XFER_MODE 0x07 +#define DBDMA_DATA2PTR_HI 0x08 +#define DBDMA_DATA2PTR_LO 0x09 +#define DBDMA_RES1 0x0A +#define DBDMA_ADDRESS_HI 0x0B +#define DBDMA_BRANCH_ADDR_HI 0x0C +#define DBDMA_RES2 0x0D +#define DBDMA_RES3 0x0E +#define DBDMA_RES4 0x0F + +#define DBDMA_REGS 16 +#define DBDMA_SIZE (DBDMA_REGS * sizeof(uint32_t)) + +#define DBDMA_CHANNEL_SHIFT 7 +#define DBDMA_CHANNEL_SIZE (1 << DBDMA_CHANNEL_SHIFT) + +#define DBDMA_CHANNELS (0x1000 >> DBDMA_CHANNEL_SHIFT) + +/* Bits in control and status registers */ + +#define RUN 0x8000 +#define PAUSE 0x4000 +#define FLUSH 0x2000 +#define WAKE 0x1000 +#define DEAD 0x0800 +#define ACTIVE 0x0400 +#define BT 0x0100 +#define DEVSTAT 0x00ff + +/* + * DBDMA command structure. These fields are all little-endian! + */ + +typedef struct dbdma_cmd { + uint16_t req_count; /* requested byte transfer count */ + uint16_t command; /* command word (has bit-fields) */ + uint32_t phy_addr; /* physical data address */ + uint32_t cmd_dep; /* command-dependent field */ + uint16_t res_count; /* residual count after completion */ + uint16_t xfer_status; /* transfer status */ +} dbdma_cmd; + +/* DBDMA command values in command field */ + +#define COMMAND_MASK 0xf000 +#define OUTPUT_MORE 0x0000 /* transfer memory data to stream */ +#define OUTPUT_LAST 0x1000 /* ditto followed by end marker */ +#define INPUT_MORE 0x2000 /* transfer stream data to memory */ +#define INPUT_LAST 0x3000 /* ditto, expect end marker */ +#define STORE_WORD 0x4000 /* write word (4 bytes) to device reg */ +#define LOAD_WORD 0x5000 /* read word (4 bytes) from device reg */ +#define DBDMA_NOP 0x6000 /* do nothing */ +#define DBDMA_STOP 0x7000 /* suspend processing */ + +/* Key values in command field */ + +#define KEY_MASK 0x0700 +#define KEY_STREAM0 0x0000 /* usual data stream */ +#define KEY_STREAM1 0x0100 /* control/status stream */ +#define KEY_STREAM2 0x0200 /* device-dependent stream */ +#define KEY_STREAM3 0x0300 /* device-dependent stream */ +#define KEY_STREAM4 0x0400 /* reserved */ +#define KEY_REGS 0x0500 /* device register space */ +#define KEY_SYSTEM 0x0600 /* system memory-mapped space */ +#define KEY_DEVICE 0x0700 /* device memory-mapped space */ + +/* Interrupt control values in command field */ + +#define INTR_MASK 0x0030 +#define INTR_NEVER 0x0000 /* don't interrupt */ +#define INTR_IFSET 0x0010 /* intr if condition bit is 1 */ +#define INTR_IFCLR 0x0020 /* intr if condition bit is 0 */ +#define INTR_ALWAYS 0x0030 /* always interrupt */ + +/* Branch control values in command field */ + +#define BR_MASK 0x000c +#define BR_NEVER 0x0000 /* don't branch */ +#define BR_IFSET 0x0004 /* branch if condition bit is 1 */ +#define BR_IFCLR 0x0008 /* branch if condition bit is 0 */ +#define BR_ALWAYS 0x000c /* always branch */ + +/* Wait control values in command field */ + +#define WAIT_MASK 0x0003 +#define WAIT_NEVER 0x0000 /* don't wait */ +#define WAIT_IFSET 0x0001 /* wait if condition bit is 1 */ +#define WAIT_IFCLR 0x0002 /* wait if condition bit is 0 */ +#define WAIT_ALWAYS 0x0003 /* always wait */ + +typedef struct DBDMA_channel { + int channel; + uint32_t regs[DBDMA_REGS]; + qemu_irq irq; + DBDMA_io io; + DBDMA_rw rw; + DBDMA_flush flush; + dbdma_cmd current; +} DBDMA_channel; + +struct DBDMAState { + SysBusDevice parent_obj; + + MemoryRegion mem; + DBDMA_channel channels[DBDMA_CHANNELS]; + QEMUBH *bh; +}; +typedef struct DBDMAState DBDMAState; + +/* Externally callable functions */ + +void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, + DBDMA_rw rw, DBDMA_flush flush, + void *opaque); +void DBDMA_kick(DBDMAState *dbdma); + +#define TYPE_MAC_DBDMA "mac-dbdma" +OBJECT_DECLARE_SIMPLE_TYPE(DBDMAState, MAC_DBDMA) + +#endif diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h new file mode 100644 index 000000000..74ff44bff --- /dev/null +++ b/include/hw/ppc/openpic.h @@ -0,0 +1,189 @@ +#ifndef OPENPIC_H +#define OPENPIC_H + +#include "hw/sysbus.h" +#include "hw/core/cpu.h" +#include "qom/object.h" + +#define MAX_CPU 32 +#define MAX_MSI 8 +#define VID 0x03 /* MPIC version ID */ + +/* OpenPIC have 5 outputs per CPU connected and one IRQ out single output */ +enum { + OPENPIC_OUTPUT_INT = 0, /* IRQ */ + OPENPIC_OUTPUT_CINT, /* critical IRQ */ + OPENPIC_OUTPUT_MCK, /* Machine check event */ + OPENPIC_OUTPUT_DEBUG, /* Inconditional debug event */ + OPENPIC_OUTPUT_RESET, /* Core reset event */ + OPENPIC_OUTPUT_NB, +}; + +typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; + +#define OPENPIC_MODEL_RAVEN 0 +#define OPENPIC_MODEL_FSL_MPIC_20 1 +#define OPENPIC_MODEL_FSL_MPIC_42 2 +#define OPENPIC_MODEL_KEYLARGO 3 + +#define OPENPIC_MAX_SRC 256 +#define OPENPIC_MAX_TMR 4 +#define OPENPIC_MAX_IPI 4 +#define OPENPIC_MAX_IRQ (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \ + OPENPIC_MAX_TMR) + +/* Raven */ +#define RAVEN_MAX_CPU 2 +#define RAVEN_MAX_EXT 48 +#define RAVEN_MAX_IRQ 64 +#define RAVEN_MAX_TMR OPENPIC_MAX_TMR +#define RAVEN_MAX_IPI OPENPIC_MAX_IPI + +/* KeyLargo */ +#define KEYLARGO_MAX_CPU 4 +#define KEYLARGO_MAX_EXT 64 +#define KEYLARGO_MAX_IPI 4 +#define KEYLARGO_MAX_IRQ (64 + KEYLARGO_MAX_IPI) +#define KEYLARGO_MAX_TMR 0 +#define KEYLARGO_IPI_IRQ (KEYLARGO_MAX_EXT) /* First IPI IRQ */ +/* Timers don't exist but this makes the code happy... */ +#define KEYLARGO_TMR_IRQ (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI) + +/* Interrupt definitions */ +#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ +#define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */ +#define RAVEN_TMR_IRQ (RAVEN_MAX_EXT + 2) /* First timer IRQ */ +#define RAVEN_IPI_IRQ (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ +/* First doorbell IRQ */ +#define RAVEN_DBL_IRQ (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) + +typedef struct FslMpicInfo { + int max_ext; +} FslMpicInfo; + +typedef enum IRQType { + IRQ_TYPE_NORMAL = 0, + IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */ + IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ +} IRQType; + +/* Round up to the nearest 64 IRQs so that the queue length + * won't change when moving between 32 and 64 bit hosts. + */ +#define IRQQUEUE_SIZE_BITS ((OPENPIC_MAX_IRQ + 63) & ~63) + +typedef struct IRQQueue { + unsigned long *queue; + int32_t queue_size; /* Only used for VMSTATE_BITMAP */ + int next; + int priority; +} IRQQueue; + +typedef struct IRQSource { + uint32_t ivpr; /* IRQ vector/priority register */ + uint32_t idr; /* IRQ destination register */ + uint32_t destmask; /* bitmap of CPU destinations */ + int last_cpu; + int output; /* IRQ level, e.g. OPENPIC_OUTPUT_INT */ + int pending; /* TRUE if IRQ is pending */ + IRQType type; + bool level:1; /* level-triggered */ + bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ +} IRQSource; + +#define IVPR_MASK_SHIFT 31 +#define IVPR_MASK_MASK (1U << IVPR_MASK_SHIFT) +#define IVPR_ACTIVITY_SHIFT 30 +#define IVPR_ACTIVITY_MASK (1U << IVPR_ACTIVITY_SHIFT) +#define IVPR_MODE_SHIFT 29 +#define IVPR_MODE_MASK (1U << IVPR_MODE_SHIFT) +#define IVPR_POLARITY_SHIFT 23 +#define IVPR_POLARITY_MASK (1U << IVPR_POLARITY_SHIFT) +#define IVPR_SENSE_SHIFT 22 +#define IVPR_SENSE_MASK (1U << IVPR_SENSE_SHIFT) + +#define IVPR_PRIORITY_MASK (0xFU << 16) +#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16)) +#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask) + +/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */ +#define IDR_EP 0x80000000 /* external pin */ +#define IDR_CI 0x40000000 /* critical interrupt */ + +typedef struct OpenPICTimer { + uint32_t tccr; /* Global timer current count register */ + uint32_t tbcr; /* Global timer base count register */ + int n_IRQ; + bool qemu_timer_active; /* Is the qemu_timer is running? */ + struct QEMUTimer *qemu_timer; + struct OpenPICState *opp; /* Device timer is part of. */ + /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last + current_count written or read, only defined if qemu_timer_active. */ + uint64_t origin_time; +} OpenPICTimer; + +typedef struct OpenPICMSI { + uint32_t msir; /* Shared Message Signaled Interrupt Register */ +} OpenPICMSI; + +typedef struct IRQDest { + int32_t ctpr; /* CPU current task priority */ + IRQQueue raised; + IRQQueue servicing; + qemu_irq *irqs; + + /* Count of IRQ sources asserting on non-INT outputs */ + uint32_t outputs_active[OPENPIC_OUTPUT_NB]; +} IRQDest; + +#define TYPE_OPENPIC "openpic" +OBJECT_DECLARE_SIMPLE_TYPE(OpenPICState, OPENPIC) + +struct OpenPICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion mem; + + /* Behavior control */ + FslMpicInfo *fsl; + uint32_t model; + uint32_t flags; + uint32_t nb_irqs; + uint32_t vid; + uint32_t vir; /* Vendor identification register */ + uint32_t vector_mask; + uint32_t tfrr_reset; + uint32_t ivpr_reset; + uint32_t idr_reset; + uint32_t brr1; + uint32_t mpic_mode_mask; + + /* Sub-regions */ + MemoryRegion sub_io_mem[6]; + + /* Global registers */ + uint32_t frr; /* Feature reporting register */ + uint32_t gcr; /* Global configuration register */ + uint32_t pir; /* Processor initialization register */ + uint32_t spve; /* Spurious vector register */ + uint32_t tfrr; /* Timer frequency reporting register */ + /* Source registers */ + IRQSource src[OPENPIC_MAX_IRQ]; + /* Local registers per output pin */ + IRQDest dst[MAX_CPU]; + uint32_t nb_cpus; + /* Timer registers */ + OpenPICTimer timers[OPENPIC_MAX_TMR]; + uint32_t max_tmr; + + /* Shared MSI registers */ + OpenPICMSI msi[MAX_MSI]; + uint32_t max_irq; + uint32_t irq_ipi0; + uint32_t irq_tim0; + uint32_t irq_msi; +}; + +#endif /* OPENPIC_H */ diff --git a/include/hw/ppc/openpic_kvm.h b/include/hw/ppc/openpic_kvm.h new file mode 100644 index 000000000..9ef421525 --- /dev/null +++ b/include/hw/ppc/openpic_kvm.h @@ -0,0 +1,7 @@ +#ifndef OPENPIC_KVM_H +#define OPENPIC_KVM_H + +#define TYPE_KVM_OPENPIC "kvm-openpic" +int kvm_openpic_connect_vcpu(DeviceState *d, CPUState *cs); + +#endif /* OPENPIC_KVM_H */ diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h new file mode 100644 index 000000000..ee7eda3e0 --- /dev/null +++ b/include/hw/ppc/pnv.h @@ -0,0 +1,350 @@ +/* + * QEMU PowerPC PowerNV various definitions + * + * Copyright (c) 2014-2016 BenH, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_H +#define PPC_PNV_H + +#include "hw/boards.h" +#include "hw/sysbus.h" +#include "hw/ipmi/ipmi.h" +#include "hw/ppc/pnv_lpc.h" +#include "hw/ppc/pnv_pnor.h" +#include "hw/ppc/pnv_psi.h" +#include "hw/ppc/pnv_occ.h" +#include "hw/ppc/pnv_homer.h" +#include "hw/ppc/pnv_xive.h" +#include "hw/ppc/pnv_core.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/pci-host/pnv_phb4.h" +#include "qom/object.h" + +#define TYPE_PNV_CHIP "pnv-chip" +OBJECT_DECLARE_TYPE(PnvChip, PnvChipClass, + PNV_CHIP) + +struct PnvChip { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + uint32_t chip_id; + uint64_t ram_start; + uint64_t ram_size; + + uint32_t nr_cores; + uint32_t nr_threads; + uint64_t cores_mask; + PnvCore **cores; + + uint32_t num_phbs; + + MemoryRegion xscom_mmio; + MemoryRegion xscom; + AddressSpace xscom_as; + + gchar *dt_isa_nodename; +}; + +#define TYPE_PNV8_CHIP "pnv8-chip" +typedef struct Pnv8Chip Pnv8Chip; +DECLARE_INSTANCE_CHECKER(Pnv8Chip, PNV8_CHIP, + TYPE_PNV8_CHIP) + +struct Pnv8Chip { + /*< private >*/ + PnvChip parent_obj; + + /*< public >*/ + MemoryRegion icp_mmio; + + PnvLpcController lpc; + Pnv8Psi psi; + PnvOCC occ; + PnvHomer homer; + +#define PNV8_CHIP_PHB3_MAX 4 + PnvPHB3 phbs[PNV8_CHIP_PHB3_MAX]; + + XICSFabric *xics; +}; + +#define TYPE_PNV9_CHIP "pnv9-chip" +typedef struct Pnv9Chip Pnv9Chip; +DECLARE_INSTANCE_CHECKER(Pnv9Chip, PNV9_CHIP, + TYPE_PNV9_CHIP) + +struct Pnv9Chip { + /*< private >*/ + PnvChip parent_obj; + + /*< public >*/ + PnvXive xive; + Pnv9Psi psi; + PnvLpcController lpc; + PnvOCC occ; + PnvHomer homer; + + uint32_t nr_quads; + PnvQuad *quads; + +#define PNV9_CHIP_MAX_PEC 3 + PnvPhb4PecState pecs[PNV9_CHIP_MAX_PEC]; +}; + +/* + * A SMT8 fused core is a pair of SMT4 cores. + */ +#define PNV9_PIR2FUSEDCORE(pir) (((pir) >> 3) & 0xf) +#define PNV9_PIR2CHIP(pir) (((pir) >> 8) & 0x7f) + +#define TYPE_PNV10_CHIP "pnv10-chip" +typedef struct Pnv10Chip Pnv10Chip; +DECLARE_INSTANCE_CHECKER(Pnv10Chip, PNV10_CHIP, + TYPE_PNV10_CHIP) + +struct Pnv10Chip { + /*< private >*/ + PnvChip parent_obj; + + /*< public >*/ + Pnv9Psi psi; + PnvLpcController lpc; +}; + +struct PnvChipClass { + /*< private >*/ + SysBusDeviceClass parent_class; + + /*< public >*/ + uint64_t chip_cfam_id; + uint64_t cores_mask; + uint32_t num_phbs; + + DeviceRealize parent_realize; + + uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id); + void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp); + void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu); + void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu); + void (*intc_print_info)(PnvChip *chip, PowerPCCPU *cpu, Monitor *mon); + ISABus *(*isa_create)(PnvChip *chip, Error **errp); + void (*dt_populate)(PnvChip *chip, void *fdt); + void (*pic_print_info)(PnvChip *chip, Monitor *mon); + uint64_t (*xscom_core_base)(PnvChip *chip, uint32_t core_id); + uint32_t (*xscom_pcba)(PnvChip *chip, uint64_t addr); +}; + +#define PNV_CHIP_TYPE_SUFFIX "-" TYPE_PNV_CHIP +#define PNV_CHIP_TYPE_NAME(cpu_model) cpu_model PNV_CHIP_TYPE_SUFFIX + +#define TYPE_PNV_CHIP_POWER8E PNV_CHIP_TYPE_NAME("power8e_v2.1") +DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8E, + TYPE_PNV_CHIP_POWER8E) + +#define TYPE_PNV_CHIP_POWER8 PNV_CHIP_TYPE_NAME("power8_v2.0") +DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8, + TYPE_PNV_CHIP_POWER8) + +#define TYPE_PNV_CHIP_POWER8NVL PNV_CHIP_TYPE_NAME("power8nvl_v1.0") +DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8NVL, + TYPE_PNV_CHIP_POWER8NVL) + +#define TYPE_PNV_CHIP_POWER9 PNV_CHIP_TYPE_NAME("power9_v2.0") +DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER9, + TYPE_PNV_CHIP_POWER9) + +#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v1.0") +DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10, + TYPE_PNV_CHIP_POWER10) + +/* + * This generates a HW chip id depending on an index, as found on a + * two socket system with dual chip modules : + * + * 0x0, 0x1, 0x10, 0x11 + * + * 4 chips should be the maximum + * + * TODO: use a machine property to define the chip ids + */ +#define PNV_CHIP_HWID(i) ((((i) & 0x3e) << 3) | ((i) & 0x1)) + +/* + * Converts back a HW chip id to an index. This is useful to calculate + * the MMIO addresses of some controllers which depend on the chip id. + */ +#define PNV_CHIP_INDEX(chip) \ + (((chip)->chip_id >> 2) * 2 + ((chip)->chip_id & 0x3)) + +PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir); + +#define TYPE_PNV_MACHINE MACHINE_TYPE_NAME("powernv") +typedef struct PnvMachineClass PnvMachineClass; +typedef struct PnvMachineState PnvMachineState; +DECLARE_OBJ_CHECKERS(PnvMachineState, PnvMachineClass, + PNV_MACHINE, TYPE_PNV_MACHINE) + + +struct PnvMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ + const char *compat; + int compat_size; + + void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt); +}; + +struct PnvMachineState { + /*< private >*/ + MachineState parent_obj; + + uint32_t initrd_base; + long initrd_size; + + uint32_t num_chips; + PnvChip **chips; + + ISABus *isa_bus; + uint32_t cpld_irqstate; + + IPMIBmc *bmc; + Notifier powerdown_notifier; + + PnvPnor *pnor; + + hwaddr fw_load_addr; +}; + +#define PNV_FDT_ADDR 0x01000000 +#define PNV_TIMEBASE_FREQ 512000000ULL + +/* + * BMC helpers + */ +void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt); +void pnv_bmc_powerdown(IPMIBmc *bmc); +IPMIBmc *pnv_bmc_create(PnvPnor *pnor); +IPMIBmc *pnv_bmc_find(Error **errp); +void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor); + +/* + * POWER8 MMIO base addresses + */ +#define PNV_XSCOM_SIZE 0x800000000ull +#define PNV_XSCOM_BASE(chip) \ + (0x0003fc0000000000ull + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE) + +#define PNV_OCC_COMMON_AREA_SIZE 0x0000000000800000ull +#define PNV_OCC_COMMON_AREA_BASE 0x7fff800000ull +#define PNV_OCC_SENSOR_BASE(chip) (PNV_OCC_COMMON_AREA_BASE + \ + PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip))) + +#define PNV_HOMER_SIZE 0x0000000000400000ull +#define PNV_HOMER_BASE(chip) \ + (0x7ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV_HOMER_SIZE) + + +/* + * XSCOM 0x20109CA defines the ICP BAR: + * + * 0:29 : bits 14 to 43 of address to define 1 MB region. + * 30 : 1 to enable ICP to receive loads/stores against its BAR region + * 31:63 : Constant 0 + * + * Usually defined as : + * + * 0xffffe00200000000 -> 0x0003ffff80000000 + * 0xffffe00600000000 -> 0x0003ffff80100000 + * 0xffffe02200000000 -> 0x0003ffff80800000 + * 0xffffe02600000000 -> 0x0003ffff80900000 + */ +#define PNV_ICP_SIZE 0x0000000000100000ull +#define PNV_ICP_BASE(chip) \ + (0x0003ffff80000000ull + (uint64_t) PNV_CHIP_INDEX(chip) * PNV_ICP_SIZE) + + +#define PNV_PSIHB_SIZE 0x0000000000100000ull +#define PNV_PSIHB_BASE(chip) \ + (0x0003fffe80000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * PNV_PSIHB_SIZE) + +#define PNV_PSIHB_FSP_SIZE 0x0000000100000000ull +#define PNV_PSIHB_FSP_BASE(chip) \ + (0x0003ffe000000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * \ + PNV_PSIHB_FSP_SIZE) + +/* + * POWER9 MMIO base addresses + */ +#define PNV9_CHIP_BASE(chip, base) \ + ((base) + ((uint64_t) (chip)->chip_id << 42)) + +#define PNV9_XIVE_VC_SIZE 0x0000008000000000ull +#define PNV9_XIVE_VC_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006010000000000ull) + +#define PNV9_XIVE_PC_SIZE 0x0000001000000000ull +#define PNV9_XIVE_PC_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006018000000000ull) + +#define PNV9_LPCM_SIZE 0x0000000100000000ull +#define PNV9_LPCM_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006030000000000ull) + +#define PNV9_PSIHB_SIZE 0x0000000000100000ull +#define PNV9_PSIHB_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006030203000000ull) + +#define PNV9_XIVE_IC_SIZE 0x0000000000080000ull +#define PNV9_XIVE_IC_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006030203100000ull) + +#define PNV9_XIVE_TM_SIZE 0x0000000000040000ull +#define PNV9_XIVE_TM_BASE(chip) PNV9_CHIP_BASE(chip, 0x0006030203180000ull) + +#define PNV9_PSIHB_ESB_SIZE 0x0000000000010000ull +#define PNV9_PSIHB_ESB_BASE(chip) PNV9_CHIP_BASE(chip, 0x00060302031c0000ull) + +#define PNV9_XSCOM_SIZE 0x0000000400000000ull +#define PNV9_XSCOM_BASE(chip) PNV9_CHIP_BASE(chip, 0x00603fc00000000ull) + +#define PNV9_OCC_COMMON_AREA_SIZE 0x0000000000800000ull +#define PNV9_OCC_COMMON_AREA_BASE 0x203fff800000ull +#define PNV9_OCC_SENSOR_BASE(chip) (PNV9_OCC_COMMON_AREA_BASE + \ + PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip))) + +#define PNV9_HOMER_SIZE 0x0000000000400000ull +#define PNV9_HOMER_BASE(chip) \ + (0x203ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV9_HOMER_SIZE) + +/* + * POWER10 MMIO base addresses - 16TB stride per chip + */ +#define PNV10_CHIP_BASE(chip, base) \ + ((base) + ((uint64_t) (chip)->chip_id << 44)) + +#define PNV10_XSCOM_SIZE 0x0000000400000000ull +#define PNV10_XSCOM_BASE(chip) PNV10_CHIP_BASE(chip, 0x00603fc00000000ull) + +#define PNV10_LPCM_SIZE 0x0000000100000000ull +#define PNV10_LPCM_BASE(chip) PNV10_CHIP_BASE(chip, 0x0006030000000000ull) + +#define PNV10_PSIHB_ESB_SIZE 0x0000000000100000ull +#define PNV10_PSIHB_ESB_BASE(chip) PNV10_CHIP_BASE(chip, 0x0006030202000000ull) + +#define PNV10_PSIHB_SIZE 0x0000000000100000ull +#define PNV10_PSIHB_BASE(chip) PNV10_CHIP_BASE(chip, 0x0006030203000000ull) + +#endif /* PPC_PNV_H */ diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h new file mode 100644 index 000000000..6ecee98a7 --- /dev/null +++ b/include/hw/ppc/pnv_core.h @@ -0,0 +1,73 @@ +/* + * QEMU PowerPC PowerNV CPU Core model + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_CORE_H +#define PPC_PNV_CORE_H + +#include "hw/cpu/core.h" +#include "target/ppc/cpu.h" +#include "qom/object.h" + +#define TYPE_PNV_CORE "powernv-cpu-core" +OBJECT_DECLARE_TYPE(PnvCore, PnvCoreClass, + PNV_CORE) + +typedef struct PnvChip PnvChip; + +struct PnvCore { + /*< private >*/ + CPUCore parent_obj; + + /*< public >*/ + PowerPCCPU **threads; + uint32_t pir; + uint64_t hrmor; + PnvChip *chip; + + MemoryRegion xscom_regs; +}; + +struct PnvCoreClass { + DeviceClass parent_class; + + const MemoryRegionOps *xscom_ops; +}; + +#define PNV_CORE_TYPE_SUFFIX "-" TYPE_PNV_CORE +#define PNV_CORE_TYPE_NAME(cpu_model) cpu_model PNV_CORE_TYPE_SUFFIX + +typedef struct PnvCPUState { + Object *intc; +} PnvCPUState; + +static inline PnvCPUState *pnv_cpu_state(PowerPCCPU *cpu) +{ + return (PnvCPUState *)cpu->machine_data; +} + +#define TYPE_PNV_QUAD "powernv-cpu-quad" +OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD) + +struct PnvQuad { + DeviceState parent_obj; + + uint32_t id; + MemoryRegion xscom_regs; +}; +#endif /* PPC_PNV_CORE_H */ diff --git a/include/hw/ppc/pnv_homer.h b/include/hw/ppc/pnv_homer.h new file mode 100644 index 000000000..1889e3083 --- /dev/null +++ b/include/hw/ppc/pnv_homer.h @@ -0,0 +1,56 @@ +/* + * QEMU PowerPC PowerNV Emulation of a few HOMER related registers + * + * Copyright (c) 2019, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_HOMER_H +#define PPC_PNV_HOMER_H + +#include "hw/ppc/pnv.h" +#include "qom/object.h" + +#define TYPE_PNV_HOMER "pnv-homer" +OBJECT_DECLARE_TYPE(PnvHomer, PnvHomerClass, + PNV_HOMER) +#define TYPE_PNV8_HOMER TYPE_PNV_HOMER "-POWER8" +DECLARE_INSTANCE_CHECKER(PnvHomer, PNV8_HOMER, + TYPE_PNV8_HOMER) +#define TYPE_PNV9_HOMER TYPE_PNV_HOMER "-POWER9" +DECLARE_INSTANCE_CHECKER(PnvHomer, PNV9_HOMER, + TYPE_PNV9_HOMER) + +struct PnvHomer { + DeviceState parent; + + struct PnvChip *chip; + MemoryRegion pba_regs; + MemoryRegion regs; +}; + + +struct PnvHomerClass { + DeviceClass parent_class; + + int pba_size; + const MemoryRegionOps *pba_ops; + int homer_size; + const MemoryRegionOps *homer_ops; + + hwaddr core_max_base; +}; + +#endif /* PPC_PNV_HOMER_H */ diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h new file mode 100644 index 000000000..e893e763d --- /dev/null +++ b/include/hw/ppc/pnv_lpc.h @@ -0,0 +1,108 @@ +/* + * QEMU PowerPC PowerNV LPC controller + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_LPC_H +#define PPC_PNV_LPC_H + +#include "hw/ppc/pnv_psi.h" +#include "qom/object.h" + +#define TYPE_PNV_LPC "pnv-lpc" +typedef struct PnvLpcClass PnvLpcClass; +typedef struct PnvLpcController PnvLpcController; +DECLARE_OBJ_CHECKERS(PnvLpcController, PnvLpcClass, + PNV_LPC, TYPE_PNV_LPC) +#define TYPE_PNV8_LPC TYPE_PNV_LPC "-POWER8" +DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV8_LPC, + TYPE_PNV8_LPC) + +#define TYPE_PNV9_LPC TYPE_PNV_LPC "-POWER9" +DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV9_LPC, + TYPE_PNV9_LPC) + +#define TYPE_PNV10_LPC TYPE_PNV_LPC "-POWER10" +DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV10_LPC, + TYPE_PNV10_LPC) + +struct PnvLpcController { + DeviceState parent; + + uint64_t eccb_stat_reg; + uint32_t eccb_data_reg; + + /* OPB bus */ + MemoryRegion opb_mr; + AddressSpace opb_as; + + /* ISA IO and Memory space */ + MemoryRegion isa_io; + MemoryRegion isa_mem; + MemoryRegion isa_fw; + + /* Windows from OPB to ISA (aliases) */ + MemoryRegion opb_isa_io; + MemoryRegion opb_isa_mem; + MemoryRegion opb_isa_fw; + + /* Registers */ + MemoryRegion lpc_hc_regs; + MemoryRegion opb_master_regs; + + /* OPB Master LS registers */ + uint32_t opb_irq_route0; + uint32_t opb_irq_route1; + uint32_t opb_irq_stat; + uint32_t opb_irq_mask; + uint32_t opb_irq_pol; + uint32_t opb_irq_input; + + /* LPC HC registers */ + uint32_t lpc_hc_fw_seg_idsel; + uint32_t lpc_hc_fw_rd_acc_size; + uint32_t lpc_hc_irqser_ctrl; + uint32_t lpc_hc_irqmask; + uint32_t lpc_hc_irqstat; + uint32_t lpc_hc_error_addr; + + /* XSCOM registers */ + MemoryRegion xscom_regs; + + /* PSI to generate interrupts */ + PnvPsi *psi; +}; + + +struct PnvLpcClass { + DeviceClass parent_class; + + int psi_irq; + + DeviceRealize parent_realize; +}; + +/* + * Old compilers error on typdef forward declarations. Keep them happy. + */ +struct PnvChip; + +ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp); +int pnv_dt_lpc(struct PnvChip *chip, void *fdt, int root_offset, + uint64_t lpcm_addr, uint64_t lpcm_size); + +#endif /* PPC_PNV_LPC_H */ diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h new file mode 100644 index 000000000..b78185aec --- /dev/null +++ b/include/hw/ppc/pnv_occ.h @@ -0,0 +1,63 @@ +/* + * QEMU PowerPC PowerNV Emulation of a few OCC related registers + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_OCC_H +#define PPC_PNV_OCC_H + +#include "hw/ppc/pnv_psi.h" +#include "qom/object.h" + +#define TYPE_PNV_OCC "pnv-occ" +OBJECT_DECLARE_TYPE(PnvOCC, PnvOCCClass, + PNV_OCC) +#define TYPE_PNV8_OCC TYPE_PNV_OCC "-POWER8" +DECLARE_INSTANCE_CHECKER(PnvOCC, PNV8_OCC, + TYPE_PNV8_OCC) +#define TYPE_PNV9_OCC TYPE_PNV_OCC "-POWER9" +DECLARE_INSTANCE_CHECKER(PnvOCC, PNV9_OCC, + TYPE_PNV9_OCC) + +#define PNV_OCC_SENSOR_DATA_BLOCK_OFFSET 0x00580000 +#define PNV_OCC_SENSOR_DATA_BLOCK_SIZE 0x00025800 + +struct PnvOCC { + DeviceState xd; + + /* OCC Misc interrupt */ + uint64_t occmisc; + + PnvPsi *psi; + + MemoryRegion xscom_regs; + MemoryRegion sram_regs; +}; + + +struct PnvOCCClass { + DeviceClass parent_class; + + int xscom_size; + const MemoryRegionOps *xscom_ops; + int psi_irq; +}; + +#define PNV_OCC_SENSOR_DATA_BLOCK_BASE(i) \ + (PNV_OCC_SENSOR_DATA_BLOCK_OFFSET + (i) * PNV_OCC_SENSOR_DATA_BLOCK_SIZE) + +#endif /* PPC_PNV_OCC_H */ diff --git a/include/hw/ppc/pnv_pnor.h b/include/hw/ppc/pnv_pnor.h new file mode 100644 index 000000000..99f9a3adf --- /dev/null +++ b/include/hw/ppc/pnv_pnor.h @@ -0,0 +1,31 @@ +/* + * QEMU PowerNV PNOR simple model + * + * Copyright (c) 2019, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#ifndef _PPC_PNV_PNOR_H +#define _PPC_PNV_PNOR_H +#include "qom/object.h" + +/* + * PNOR offset on the LPC FW address space + */ +#define PNOR_SPI_OFFSET 0x0c000000UL + +#define TYPE_PNV_PNOR "pnv-pnor" +OBJECT_DECLARE_SIMPLE_TYPE(PnvPnor, PNV_PNOR) + +struct PnvPnor { + SysBusDevice parent_obj; + + BlockBackend *blk; + + uint8_t *storage; + int64_t size; + MemoryRegion mmio; +}; + +#endif /* _PPC_PNV_PNOR_H */ diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h new file mode 100644 index 000000000..eb841b34a --- /dev/null +++ b/include/hw/ppc/pnv_psi.h @@ -0,0 +1,120 @@ +/* + * QEMU PowerPC PowerNV Processor Service Interface (PSI) model + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_PSI_H +#define PPC_PNV_PSI_H + +#include "hw/sysbus.h" +#include "hw/ppc/xics.h" +#include "hw/ppc/xive.h" +#include "qom/object.h" + +#define TYPE_PNV_PSI "pnv-psi" +OBJECT_DECLARE_TYPE(PnvPsi, PnvPsiClass, + PNV_PSI) + +#define PSIHB_XSCOM_MAX 0x20 + +struct PnvPsi { + DeviceState parent; + + MemoryRegion regs_mr; + uint64_t bar; + + /* FSP region not supported */ + /* MemoryRegion fsp_mr; */ + uint64_t fsp_bar; + + /* Interrupt generation */ + qemu_irq *qirqs; + + /* Registers */ + uint64_t regs[PSIHB_XSCOM_MAX]; + + MemoryRegion xscom_regs; +}; + +#define TYPE_PNV8_PSI TYPE_PNV_PSI "-POWER8" +OBJECT_DECLARE_SIMPLE_TYPE(Pnv8Psi, PNV8_PSI) + +struct Pnv8Psi { + PnvPsi parent; + + ICSState ics; +}; + +#define TYPE_PNV9_PSI TYPE_PNV_PSI "-POWER9" +OBJECT_DECLARE_SIMPLE_TYPE(Pnv9Psi, PNV9_PSI) + +struct Pnv9Psi { + PnvPsi parent; + + XiveSource source; +}; + +#define TYPE_PNV10_PSI TYPE_PNV_PSI "-POWER10" + + +struct PnvPsiClass { + SysBusDeviceClass parent_class; + + uint32_t xscom_pcba; + uint32_t xscom_size; + uint64_t bar_mask; + const char *compat; + int compat_size; + + void (*irq_set)(PnvPsi *psi, int, bool state); +}; + +/* The PSI and FSP interrupts are muxed on the same IRQ number */ +typedef enum PnvPsiIrq { + PSIHB_IRQ_PSI, /* internal use only */ + PSIHB_IRQ_FSP, /* internal use only */ + PSIHB_IRQ_OCC, + PSIHB_IRQ_FSI, + PSIHB_IRQ_LPC_I2C, + PSIHB_IRQ_LOCAL_ERR, + PSIHB_IRQ_EXTERNAL, +} PnvPsiIrq; + +#define PSI_NUM_INTERRUPTS 6 + +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state); + +/* P9 PSI Interrupts */ +#define PSIHB9_IRQ_PSI 0 +#define PSIHB9_IRQ_OCC 1 +#define PSIHB9_IRQ_FSI 2 +#define PSIHB9_IRQ_LPCHC 3 +#define PSIHB9_IRQ_LOCAL_ERR 4 +#define PSIHB9_IRQ_GLOBAL_ERR 5 +#define PSIHB9_IRQ_TPM 6 +#define PSIHB9_IRQ_LPC_SIRQ0 7 +#define PSIHB9_IRQ_LPC_SIRQ1 8 +#define PSIHB9_IRQ_LPC_SIRQ2 9 +#define PSIHB9_IRQ_LPC_SIRQ3 10 +#define PSIHB9_IRQ_SBE_I2C 11 +#define PSIHB9_IRQ_DIO 12 +#define PSIHB9_IRQ_PSU 13 +#define PSIHB9_NUM_IRQS 14 + +void pnv_psi_pic_print_info(Pnv9Psi *psi, Monitor *mon); + +#endif /* PPC_PNV_PSI_H */ diff --git a/include/hw/ppc/pnv_xive.h b/include/hw/ppc/pnv_xive.h new file mode 100644 index 000000000..7928e2796 --- /dev/null +++ b/include/hw/ppc/pnv_xive.h @@ -0,0 +1,98 @@ +/* + * QEMU PowerPC XIVE interrupt controller model + * + * Copyright (c) 2017-2019, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_PNV_XIVE_H +#define PPC_PNV_XIVE_H + +#include "hw/ppc/xive.h" +#include "qom/object.h" + +struct PnvChip; + +#define TYPE_PNV_XIVE "pnv-xive" +OBJECT_DECLARE_TYPE(PnvXive, PnvXiveClass, + PNV_XIVE) + +#define XIVE_BLOCK_MAX 16 + +#define XIVE_TABLE_BLK_MAX 16 /* Block Scope Table (0-15) */ +#define XIVE_TABLE_MIG_MAX 16 /* Migration Register Table (1-15) */ +#define XIVE_TABLE_VDT_MAX 16 /* VDT Domain Table (0-15) */ +#define XIVE_TABLE_EDT_MAX 64 /* EDT Domain Table (0-63) */ + +struct PnvXive { + XiveRouter parent_obj; + + /* Owning chip */ + struct PnvChip *chip; + + /* XSCOM addresses giving access to the controller registers */ + MemoryRegion xscom_regs; + + /* Main MMIO regions that can be configured by FW */ + MemoryRegion ic_mmio; + MemoryRegion ic_reg_mmio; + MemoryRegion ic_notify_mmio; + MemoryRegion ic_lsi_mmio; + MemoryRegion tm_indirect_mmio; + MemoryRegion vc_mmio; + MemoryRegion pc_mmio; + MemoryRegion tm_mmio; + + /* + * IPI and END address spaces modeling the EDT segmentation in the + * VC region + */ + AddressSpace ipi_as; + MemoryRegion ipi_mmio; + MemoryRegion ipi_edt_mmio; + + AddressSpace end_as; + MemoryRegion end_mmio; + MemoryRegion end_edt_mmio; + + /* Shortcut values for the Main MMIO regions */ + hwaddr ic_base; + uint32_t ic_shift; + hwaddr vc_base; + uint32_t vc_shift; + hwaddr pc_base; + uint32_t pc_shift; + hwaddr tm_base; + uint32_t tm_shift; + + /* Our XIVE source objects for IPIs and ENDs */ + XiveSource ipi_source; + XiveENDSource end_source; + + /* Interrupt controller registers */ + uint64_t regs[0x300]; + + /* + * Virtual Structure Descriptor tables : EAT, SBE, ENDT, NVTT, IRQ + * These are in a SRAM protected by ECC. + */ + uint64_t vsds[5][XIVE_BLOCK_MAX]; + + /* Translation tables */ + uint64_t blk[XIVE_TABLE_BLK_MAX]; + uint64_t mig[XIVE_TABLE_MIG_MAX]; + uint64_t vdt[XIVE_TABLE_VDT_MAX]; + uint64_t edt[XIVE_TABLE_EDT_MAX]; +}; + +struct PnvXiveClass { + XiveRouterClass parent_class; + + DeviceRealize parent_realize; +}; + +void pnv_xive_pic_print_info(PnvXive *xive, Monitor *mon); + +#endif /* PPC_PNV_XIVE_H */ diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h new file mode 100644 index 000000000..8578f5a20 --- /dev/null +++ b/include/hw/ppc/pnv_xscom.h @@ -0,0 +1,148 @@ +/* + * QEMU PowerPC PowerNV XSCOM bus definitions + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef PPC_PNV_XSCOM_H +#define PPC_PNV_XSCOM_H + +#include "qom/object.h" + +typedef struct PnvXScomInterface PnvXScomInterface; + +#define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface" +#define PNV_XSCOM_INTERFACE(obj) \ + INTERFACE_CHECK(PnvXScomInterface, (obj), TYPE_PNV_XSCOM_INTERFACE) +typedef struct PnvXScomInterfaceClass PnvXScomInterfaceClass; +DECLARE_CLASS_CHECKERS(PnvXScomInterfaceClass, PNV_XSCOM_INTERFACE, + TYPE_PNV_XSCOM_INTERFACE) + +struct PnvXScomInterfaceClass { + InterfaceClass parent; + int (*dt_xscom)(PnvXScomInterface *dev, void *fdt, int offset); +}; + +/* + * Layout of the XSCOM PCB addresses of EX core 1 (POWER 8) + * + * GPIO 0x1100xxxx + * SCOM 0x1101xxxx + * OHA 0x1102xxxx + * CLOCK CTL 0x1103xxxx + * FIR 0x1104xxxx + * THERM 0x1105xxxx + * 0x1106xxxx + * .. + * 0x110Exxxx + * PCB SLAVE 0x110Fxxxx + */ + +#define PNV_XSCOM_EX_CORE_BASE 0x10000000ull + +#define PNV_XSCOM_EX_BASE(core) \ + (PNV_XSCOM_EX_CORE_BASE | ((uint64_t)(core) << 24)) +#define PNV_XSCOM_EX_SIZE 0x100000 + +#define PNV_XSCOM_LPC_BASE 0xb0020 +#define PNV_XSCOM_LPC_SIZE 0x4 + +#define PNV_XSCOM_PSIHB_BASE 0x2010900 +#define PNV_XSCOM_PSIHB_SIZE 0x20 + +#define PNV_XSCOM_OCC_BASE 0x0066000 +#define PNV_XSCOM_OCC_SIZE 0x6000 + +#define PNV_XSCOM_PBA_BASE 0x2013f00 +#define PNV_XSCOM_PBA_SIZE 0x40 + +#define PNV_XSCOM_PBCQ_NEST_BASE 0x2012000 +#define PNV_XSCOM_PBCQ_NEST_SIZE 0x46 + +#define PNV_XSCOM_PBCQ_PCI_BASE 0x9012000 +#define PNV_XSCOM_PBCQ_PCI_SIZE 0x15 + +#define PNV_XSCOM_PBCQ_SPCI_BASE 0x9013c00 +#define PNV_XSCOM_PBCQ_SPCI_SIZE 0x5 + +/* + * Layout of the XSCOM PCB addresses (POWER 9) + */ +#define PNV9_XSCOM_EC_BASE(core) \ + ((uint64_t)(((core) & 0x1F) + 0x20) << 24) +#define PNV9_XSCOM_EC_SIZE 0x100000 + +#define PNV9_XSCOM_EQ_BASE(core) \ + ((uint64_t)(((core) & 0x1C) + 0x40) << 22) +#define PNV9_XSCOM_EQ_SIZE 0x100000 + +#define PNV9_XSCOM_OCC_BASE PNV_XSCOM_OCC_BASE +#define PNV9_XSCOM_OCC_SIZE 0x8000 + +#define PNV9_XSCOM_PBA_BASE 0x5012b00 +#define PNV9_XSCOM_PBA_SIZE 0x40 + +#define PNV9_XSCOM_PSIHB_BASE 0x5012900 +#define PNV9_XSCOM_PSIHB_SIZE 0x100 + +#define PNV9_XSCOM_XIVE_BASE 0x5013000 +#define PNV9_XSCOM_XIVE_SIZE 0x300 + +#define PNV9_XSCOM_PEC_NEST_BASE 0x4010c00 +#define PNV9_XSCOM_PEC_NEST_SIZE 0x100 + +#define PNV9_XSCOM_PEC_PCI_BASE 0xd010800 +#define PNV9_XSCOM_PEC_PCI_SIZE 0x200 + +/* XSCOM PCI "pass-through" window to PHB SCOM */ +#define PNV9_XSCOM_PEC_PCI_STK0 0x100 +#define PNV9_XSCOM_PEC_PCI_STK1 0x140 +#define PNV9_XSCOM_PEC_PCI_STK2 0x180 + +/* + * Layout of the XSCOM PCB addresses (POWER 10) + */ +#define PNV10_XSCOM_EQ_CHIPLET(core) (0x20 + ((core) >> 2)) +#define PNV10_XSCOM_EQ(chiplet) ((chiplet) << 24) +#define PNV10_XSCOM_EC(proc) \ + ((0x2 << 16) | ((1 << (3 - (proc))) << 12)) + +#define PNV10_XSCOM_EQ_BASE(core) \ + ((uint64_t) PNV10_XSCOM_EQ(PNV10_XSCOM_EQ_CHIPLET(core))) +#define PNV10_XSCOM_EQ_SIZE 0x100000 + +#define PNV10_XSCOM_EC_BASE(core) \ + ((uint64_t) PNV10_XSCOM_EQ_BASE(core) | PNV10_XSCOM_EC(core & 0x3)) +#define PNV10_XSCOM_EC_SIZE 0x100000 + +#define PNV10_XSCOM_PSIHB_BASE 0x3011D00 +#define PNV10_XSCOM_PSIHB_SIZE 0x100 + +void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp); +int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, + uint64_t xscom_base, uint64_t xscom_size, + const char *compat, int compat_size); + +void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, + MemoryRegion *mr); +void pnv_xscom_region_init(MemoryRegion *mr, + struct Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size); + +#endif /* PPC_PNV_XSCOM_H */ diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h new file mode 100644 index 000000000..93e614cff --- /dev/null +++ b/include/hw/ppc/ppc.h @@ -0,0 +1,115 @@ +#ifndef HW_PPC_H +#define HW_PPC_H + +#include "target/ppc/cpu-qom.h" + +void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level); +PowerPCCPU *ppc_get_vcpu_by_pir(int pir); +int ppc_cpu_pir(PowerPCCPU *cpu); + +/* PowerPC hardware exceptions management helpers */ +typedef void (*clk_setup_cb)(void *opaque, uint32_t freq); +typedef struct clk_setup_t clk_setup_t; +struct clk_setup_t { + clk_setup_cb cb; + void *opaque; +}; +static inline void clk_setup (clk_setup_t *clk, uint32_t freq) +{ + if (clk->cb != NULL) + (*clk->cb)(clk->opaque, freq); +} + +struct ppc_tb_t { + /* Time base management */ + int64_t tb_offset; /* Compensation */ + int64_t atb_offset; /* Compensation */ + int64_t vtb_offset; + uint32_t tb_freq; /* TB frequency */ + /* Decrementer management */ + uint64_t decr_next; /* Tick for next decr interrupt */ + uint32_t decr_freq; /* decrementer frequency */ + QEMUTimer *decr_timer; + /* Hypervisor decrementer management */ + uint64_t hdecr_next; /* Tick for next hdecr interrupt */ + QEMUTimer *hdecr_timer; + int64_t purr_offset; + void *opaque; + uint32_t flags; +}; + +/* PPC Timers flags */ +#define PPC_TIMER_BOOKE (1 << 0) /* Enable Booke support */ +#define PPC_TIMER_E500 (1 << 1) /* Enable e500 support */ +#define PPC_DECR_UNDERFLOW_TRIGGERED (1 << 2) /* Decr interrupt triggered when + * the most significant bit + * changes from 0 to 1. + */ +#define PPC_DECR_ZERO_TRIGGERED (1 << 3) /* Decr interrupt triggered when + * the decrementer reaches zero. + */ +#define PPC_DECR_UNDERFLOW_LEVEL (1 << 4) /* Decr interrupt active when + * the most significant bit is 1. + */ + +uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset); +clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq); +/* Embedded PowerPC DCR management */ +typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn); +typedef void (*dcr_write_cb)(void *opaque, int dcrn, uint32_t val); +int ppc_dcr_init (CPUPPCState *env, int (*dcr_read_error)(int dcrn), + int (*dcr_write_error)(int dcrn)); +int ppc_dcr_register (CPUPPCState *env, int dcrn, void *opaque, + dcr_read_cb drc_read, dcr_write_cb dcr_write); +clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq, + unsigned int decr_excp); + +/* Embedded PowerPC reset */ +void ppc40x_core_reset(PowerPCCPU *cpu); +void ppc40x_chip_reset(PowerPCCPU *cpu); +void ppc40x_system_reset(PowerPCCPU *cpu); + +#if defined(CONFIG_USER_ONLY) +static inline void ppc40x_irq_init(PowerPCCPU *cpu) {} +static inline void ppc6xx_irq_init(PowerPCCPU *cpu) {} +static inline void ppc970_irq_init(PowerPCCPU *cpu) {} +static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} +static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} +static inline void ppce500_irq_init(PowerPCCPU *cpu) {} +static inline void ppc_irq_reset(PowerPCCPU *cpu) {} +#else +void ppc40x_irq_init(PowerPCCPU *cpu); +void ppce500_irq_init(PowerPCCPU *cpu); +void ppc6xx_irq_init(PowerPCCPU *cpu); +void ppc970_irq_init(PowerPCCPU *cpu); +void ppcPOWER7_irq_init(PowerPCCPU *cpu); +void ppcPOWER9_irq_init(PowerPCCPU *cpu); +void ppc_irq_reset(PowerPCCPU *cpu); +#endif + +/* PPC machines for OpenBIOS */ +enum { + ARCH_PREP = 0, + ARCH_MAC99, + ARCH_HEATHROW, + ARCH_MAC99_U3, +}; + +#define FW_CFG_PPC_WIDTH (FW_CFG_ARCH_LOCAL + 0x00) +#define FW_CFG_PPC_HEIGHT (FW_CFG_ARCH_LOCAL + 0x01) +#define FW_CFG_PPC_DEPTH (FW_CFG_ARCH_LOCAL + 0x02) +#define FW_CFG_PPC_TBFREQ (FW_CFG_ARCH_LOCAL + 0x03) +#define FW_CFG_PPC_CLOCKFREQ (FW_CFG_ARCH_LOCAL + 0x04) +#define FW_CFG_PPC_IS_KVM (FW_CFG_ARCH_LOCAL + 0x05) +#define FW_CFG_PPC_KVM_HC (FW_CFG_ARCH_LOCAL + 0x06) +#define FW_CFG_PPC_KVM_PID (FW_CFG_ARCH_LOCAL + 0x07) +#define FW_CFG_PPC_NVRAM_ADDR (FW_CFG_ARCH_LOCAL + 0x08) +#define FW_CFG_PPC_BUSFREQ (FW_CFG_ARCH_LOCAL + 0x09) +#define FW_CFG_PPC_NVRAM_FLAT (FW_CFG_ARCH_LOCAL + 0x0a) +#define FW_CFG_PPC_VIACONFIG (FW_CFG_ARCH_LOCAL + 0x0b) + +#define PPC_SERIAL_MM_BAUDBASE 399193 + +/* ppc_booke.c */ +void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags); +#endif diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h new file mode 100644 index 000000000..cc19c8da5 --- /dev/null +++ b/include/hw/ppc/ppc4xx.h @@ -0,0 +1,61 @@ +/* + * QEMU PowerPC 4xx emulation shared definitions + * + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef PPC4XX_H +#define PPC4XX_H + +#include "hw/ppc/ppc.h" +#include "exec/memory.h" + +/* PowerPC 4xx core initialization */ +PowerPCCPU *ppc4xx_init(const char *cpu_model, + clk_setup_t *cpu_clk, clk_setup_t *tb_clk, + uint32_t sysclk); + +/* PowerPC 4xx universal interrupt controller */ +enum { + PPCUIC_OUTPUT_INT = 0, + PPCUIC_OUTPUT_CINT = 1, + PPCUIC_OUTPUT_NB, +}; +qemu_irq *ppcuic_init (CPUPPCState *env, qemu_irq *irqs, + uint32_t dcr_base, int has_ssr, int has_vr); + +void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks, + MemoryRegion ram_memories[], + hwaddr ram_bases[], hwaddr ram_sizes[], + const ram_addr_t sdram_bank_sizes[]); + +void ppc4xx_sdram_init (CPUPPCState *env, qemu_irq irq, int nbanks, + MemoryRegion ram_memories[], + hwaddr *ram_bases, + hwaddr *ram_sizes, + int do_init); + +void ppc4xx_mal_init(CPUPPCState *env, uint8_t txcnum, uint8_t rxcnum, + qemu_irq irqs[4]); + +#define TYPE_PPC4xx_PCI_HOST_BRIDGE "ppc4xx-pcihost" + +#endif /* PPC4XX_H */ diff --git a/include/hw/ppc/ppc_e500.h b/include/hw/ppc/ppc_e500.h new file mode 100644 index 000000000..b66c0e3ee --- /dev/null +++ b/include/hw/ppc/ppc_e500.h @@ -0,0 +1,6 @@ +#ifndef HW_PPC_E500_H +#define HW_PPC_E500_H + +void ppce500_set_mpic_proxy(bool enabled); + +#endif diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h new file mode 100644 index 000000000..2e89e36cf --- /dev/null +++ b/include/hw/ppc/spapr.h @@ -0,0 +1,949 @@ +#ifndef HW_SPAPR_H +#define HW_SPAPR_H + +#include "qemu/units.h" +#include "sysemu/dma.h" +#include "hw/boards.h" +#include "hw/ppc/spapr_drc.h" +#include "hw/mem/pc-dimm.h" +#include "hw/ppc/spapr_ovec.h" +#include "hw/ppc/spapr_irq.h" +#include "qom/object.h" +#include "hw/ppc/spapr_xive.h" /* For SpaprXive */ +#include "hw/ppc/xics.h" /* For ICSState */ +#include "hw/ppc/spapr_tpm_proxy.h" + +struct SpaprVioBus; +struct SpaprPhbState; +struct SpaprNvram; + +typedef struct SpaprEventLogEntry SpaprEventLogEntry; +typedef struct SpaprEventSource SpaprEventSource; +typedef struct SpaprPendingHpt SpaprPendingHpt; + +#define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL +#define SPAPR_ENTRY_POINT 0x100 + +#define SPAPR_TIMEBASE_FREQ 512000000ULL + +#define TYPE_SPAPR_RTC "spapr-rtc" + +OBJECT_DECLARE_SIMPLE_TYPE(SpaprRtcState, SPAPR_RTC) + +struct SpaprRtcState { + /*< private >*/ + DeviceState parent_obj; + int64_t ns_offset; +}; + +typedef struct SpaprDimmState SpaprDimmState; + +#define TYPE_SPAPR_MACHINE "spapr-machine" +OBJECT_DECLARE_TYPE(SpaprMachineState, SpaprMachineClass, SPAPR_MACHINE) + +typedef enum { + SPAPR_RESIZE_HPT_DEFAULT = 0, + SPAPR_RESIZE_HPT_DISABLED, + SPAPR_RESIZE_HPT_ENABLED, + SPAPR_RESIZE_HPT_REQUIRED, +} SpaprResizeHpt; + +/** + * Capabilities + */ + +/* Hardware Transactional Memory */ +#define SPAPR_CAP_HTM 0x00 +/* Vector Scalar Extensions */ +#define SPAPR_CAP_VSX 0x01 +/* Decimal Floating Point */ +#define SPAPR_CAP_DFP 0x02 +/* Cache Flush on Privilege Change */ +#define SPAPR_CAP_CFPC 0x03 +/* Speculation Barrier Bounds Checking */ +#define SPAPR_CAP_SBBC 0x04 +/* Indirect Branch Serialisation */ +#define SPAPR_CAP_IBS 0x05 +/* HPT Maximum Page Size (encoded as a shift) */ +#define SPAPR_CAP_HPT_MAXPAGESIZE 0x06 +/* Nested KVM-HV */ +#define SPAPR_CAP_NESTED_KVM_HV 0x07 +/* Large Decrementer */ +#define SPAPR_CAP_LARGE_DECREMENTER 0x08 +/* Count Cache Flush Assist HW Instruction */ +#define SPAPR_CAP_CCF_ASSIST 0x09 +/* Implements PAPR FWNMI option */ +#define SPAPR_CAP_FWNMI 0x0A +/* Num Caps */ +#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI + 1) + +/* + * Capability Values + */ +/* Bool Caps */ +#define SPAPR_CAP_OFF 0x00 +#define SPAPR_CAP_ON 0x01 + +/* Custom Caps */ + +/* Generic */ +#define SPAPR_CAP_BROKEN 0x00 +#define SPAPR_CAP_WORKAROUND 0x01 +#define SPAPR_CAP_FIXED 0x02 +/* SPAPR_CAP_IBS (cap-ibs) */ +#define SPAPR_CAP_FIXED_IBS 0x02 +#define SPAPR_CAP_FIXED_CCD 0x03 +#define SPAPR_CAP_FIXED_NA 0x10 /* Lets leave a bit of a gap... */ + +#define FDT_MAX_SIZE 0x100000 + +/* + * NUMA related macros. MAX_DISTANCE_REF_POINTS was taken + * from Linux kernel arch/powerpc/mm/numa.h. It represents the + * amount of associativity domains for non-CPU resources. + * + * NUMA_ASSOC_SIZE is the base array size of an ibm,associativity + * array for any non-CPU resource. + * + * VCPU_ASSOC_SIZE represents the size of ibm,associativity array + * for CPUs, which has an extra element (vcpu_id) in the end. + */ +#define MAX_DISTANCE_REF_POINTS 4 +#define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1) +#define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1) + +typedef struct SpaprCapabilities SpaprCapabilities; +struct SpaprCapabilities { + uint8_t caps[SPAPR_CAP_NUM]; +}; + +/** + * SpaprMachineClass: + */ +struct SpaprMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ + bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ + bool dr_phb_enabled; /* enable dynamic-reconfig/hotplug of PHBs */ + bool update_dt_enabled; /* enable KVMPPC_H_UPDATE_DT */ + bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ + bool pre_2_10_has_unused_icps; + bool legacy_irq_allocation; + uint32_t nr_xirqs; + bool broken_host_serial_model; /* present real host info to the guest */ + bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + hwaddr rma_limit; /* clamp the RMA to this size */ + bool pre_5_1_assoc_refpoints; + bool pre_5_2_numa_associativity; + + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, hwaddr *nv2gpa, + hwaddr *nv2atsd, Error **errp); + SpaprResizeHpt resize_hpt_default; + SpaprCapabilities default_caps; + SpaprIrq *irq; +}; + +/** + * SpaprMachineState: + */ +struct SpaprMachineState { + /*< private >*/ + MachineState parent_obj; + + struct SpaprVioBus *vio_bus; + QLIST_HEAD(, SpaprPhbState) phbs; + struct SpaprNvram *nvram; + SpaprRtcState rtc; + + SpaprResizeHpt resize_hpt; + void *htab; + uint32_t htab_shift; + uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROCESS_TABLE */ + SpaprPendingHpt *pending_hpt; /* in-progress resize */ + + hwaddr rma_size; + uint32_t fdt_size; + uint32_t fdt_initial_size; + void *fdt_blob; + long kernel_size; + bool kernel_le; + uint64_t kernel_addr; + uint32_t initrd_base; + long initrd_size; + uint64_t rtc_offset; /* Now used only during incoming migration */ + struct PPCTimebase tb; + bool has_graphics; + uint32_t vsmt; /* Virtual SMT mode (KVM's "core stride") */ + + Notifier epow_notifier; + QTAILQ_HEAD(, SpaprEventLogEntry) pending_events; + bool use_hotplug_event_source; + SpaprEventSource *event_sources; + + /* ibm,client-architecture-support option negotiation */ + bool cas_pre_isa3_guest; + SpaprOptionVector *ov5; /* QEMU-supported option vectors */ + SpaprOptionVector *ov5_cas; /* negotiated (via CAS) option vectors */ + uint32_t max_compat_pvr; + + /* Migration state */ + int htab_save_index; + bool htab_first_pass; + int htab_fd; + + /* Pending DIMM unplug cache. It is populated when a LMB + * unplug starts. It can be regenerated if a migration + * occurs during the unplug process. */ + QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs; + + /* State related to FWNMI option */ + + /* System Reset and Machine Check Notification Routine addresses + * registered by "ibm,nmi-register" RTAS call. + */ + target_ulong fwnmi_system_reset_addr; + target_ulong fwnmi_machine_check_addr; + + /* Machine Check FWNMI synchronization, fwnmi_machine_check_interlock is + * set to -1 if a FWNMI machine check is not in progress, else is set to + * the CPU that was delivered the machine check, and is set back to -1 + * when that CPU makes an "ibm,nmi-interlock" RTAS call. The cond is used + * to synchronize other CPUs. + */ + int fwnmi_machine_check_interlock; + QemuCond fwnmi_machine_check_interlock_cond; + + /*< public >*/ + char *kvm_type; + char *host_model; + char *host_serial; + + int32_t irq_map_nr; + unsigned long *irq_map; + SpaprIrq *irq; + qemu_irq *qirqs; + SpaprInterruptController *active_intc; + ICSState *ics; + SpaprXive *xive; + + bool cmd_line_caps[SPAPR_CAP_NUM]; + SpaprCapabilities def, eff, mig; + + unsigned gpu_numa_id; + SpaprTpmProxy *tpm_proxy; + + uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE]; + + Error *fwnmi_migration_blocker; +}; + +#define H_SUCCESS 0 +#define H_BUSY 1 /* Hardware busy -- retry later */ +#define H_CLOSED 2 /* Resource closed */ +#define H_NOT_AVAILABLE 3 +#define H_CONSTRAINED 4 /* Resource request constrained to max allowed */ +#define H_PARTIAL 5 +#define H_IN_PROGRESS 14 /* Kind of like busy */ +#define H_PAGE_REGISTERED 15 +#define H_PARTIAL_STORE 16 +#define H_PENDING 17 /* returned from H_POLL_PENDING */ +#define H_CONTINUE 18 /* Returned from H_Join on success */ +#define H_LONG_BUSY_START_RANGE 9900 /* Start of long busy range */ +#define H_LONG_BUSY_ORDER_1_MSEC 9900 /* Long busy, hint that 1msec \ + is a good time to retry */ +#define H_LONG_BUSY_ORDER_10_MSEC 9901 /* Long busy, hint that 10msec \ + is a good time to retry */ +#define H_LONG_BUSY_ORDER_100_MSEC 9902 /* Long busy, hint that 100msec \ + is a good time to retry */ +#define H_LONG_BUSY_ORDER_1_SEC 9903 /* Long busy, hint that 1sec \ + is a good time to retry */ +#define H_LONG_BUSY_ORDER_10_SEC 9904 /* Long busy, hint that 10sec \ + is a good time to retry */ +#define H_LONG_BUSY_ORDER_100_SEC 9905 /* Long busy, hint that 100sec \ + is a good time to retry */ +#define H_LONG_BUSY_END_RANGE 9905 /* End of long busy range */ +#define H_HARDWARE -1 /* Hardware error */ +#define H_FUNCTION -2 /* Function not supported */ +#define H_PRIVILEGE -3 /* Caller not privileged */ +#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ +#define H_BAD_MODE -5 /* Illegal msr value */ +#define H_PTEG_FULL -6 /* PTEG is full */ +#define H_NOT_FOUND -7 /* PTE was not found" */ +#define H_RESERVED_DABR -8 /* DABR address is reserved by the hypervisor on this processor" */ +#define H_NO_MEM -9 +#define H_AUTHORITY -10 +#define H_PERMISSION -11 +#define H_DROPPED -12 +#define H_SOURCE_PARM -13 +#define H_DEST_PARM -14 +#define H_REMOTE_PARM -15 +#define H_RESOURCE -16 +#define H_ADAPTER_PARM -17 +#define H_RH_PARM -18 +#define H_RCQ_PARM -19 +#define H_SCQ_PARM -20 +#define H_EQ_PARM -21 +#define H_RT_PARM -22 +#define H_ST_PARM -23 +#define H_SIGT_PARM -24 +#define H_TOKEN_PARM -25 +#define H_MLENGTH_PARM -27 +#define H_MEM_PARM -28 +#define H_MEM_ACCESS_PARM -29 +#define H_ATTR_PARM -30 +#define H_PORT_PARM -31 +#define H_MCG_PARM -32 +#define H_VL_PARM -33 +#define H_TSIZE_PARM -34 +#define H_TRACE_PARM -35 + +#define H_MASK_PARM -37 +#define H_MCG_FULL -38 +#define H_ALIAS_EXIST -39 +#define H_P_COUNTER -40 +#define H_TABLE_FULL -41 +#define H_ALT_TABLE -42 +#define H_MR_CONDITION -43 +#define H_NOT_ENOUGH_RESOURCES -44 +#define H_R_STATE -45 +#define H_RESCINDEND -46 +#define H_P2 -55 +#define H_P3 -56 +#define H_P4 -57 +#define H_P5 -58 +#define H_P6 -59 +#define H_P7 -60 +#define H_P8 -61 +#define H_P9 -62 +#define H_OVERLAP -68 +#define H_UNSUPPORTED_FLAG -256 +#define H_MULTI_THREADS_ACTIVE -9005 + + +/* Long Busy is a condition that can be returned by the firmware + * when a call cannot be completed now, but the identical call + * should be retried later. This prevents calls blocking in the + * firmware for long periods of time. Annoyingly the firmware can return + * a range of return codes, hinting at how long we should wait before + * retrying. If you don't care for the hint, the macro below is a good + * way to check for the long_busy return codes + */ +#define H_IS_LONG_BUSY(x) ((x >= H_LONG_BUSY_START_RANGE) \ + && (x <= H_LONG_BUSY_END_RANGE)) + +/* Flags */ +#define H_LARGE_PAGE (1ULL<<(63-16)) +#define H_EXACT (1ULL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ +#define H_R_XLATE (1ULL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ +#define H_READ_4 (1ULL<<(63-26)) /* Return 4 PTEs */ +#define H_PAGE_STATE_CHANGE (1ULL<<(63-28)) +#define H_PAGE_UNUSED ((1ULL<<(63-29)) | (1ULL<<(63-30))) +#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED) +#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1ULL<<(63-31))) +#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE +#define H_AVPN (1ULL<<(63-32)) /* An avpn is provided as a sanity test */ +#define H_ANDCOND (1ULL<<(63-33)) +#define H_ICACHE_INVALIDATE (1ULL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ +#define H_ICACHE_SYNCHRONIZE (1ULL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */ +#define H_ZERO_PAGE (1ULL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */ +#define H_COPY_PAGE (1ULL<<(63-49)) +#define H_N (1ULL<<(63-61)) +#define H_PP1 (1ULL<<(63-62)) +#define H_PP2 (1ULL<<(63-63)) + +/* Values for 2nd argument to H_SET_MODE */ +#define H_SET_MODE_RESOURCE_SET_CIABR 1 +#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 +#define H_SET_MODE_RESOURCE_LE 4 + +/* Flags for H_SET_MODE_RESOURCE_LE */ +#define H_SET_MODE_ENDIAN_BIG 0 +#define H_SET_MODE_ENDIAN_LITTLE 1 + +/* VASI States */ +#define H_VASI_INVALID 0 +#define H_VASI_ENABLED 1 +#define H_VASI_ABORTED 2 +#define H_VASI_SUSPENDING 3 +#define H_VASI_SUSPENDED 4 +#define H_VASI_RESUMED 5 +#define H_VASI_COMPLETED 6 + +/* DABRX flags */ +#define H_DABRX_HYPERVISOR (1ULL<<(63-61)) +#define H_DABRX_KERNEL (1ULL<<(63-62)) +#define H_DABRX_USER (1ULL<<(63-63)) + +/* Values for KVM_PPC_GET_CPU_CHAR & H_GET_CPU_CHARACTERISTICS */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 PPC_BIT(0) +#define H_CPU_CHAR_BCCTRL_SERIALISED PPC_BIT(1) +#define H_CPU_CHAR_L1D_FLUSH_ORI30 PPC_BIT(2) +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 PPC_BIT(3) +#define H_CPU_CHAR_L1D_THREAD_PRIV PPC_BIT(4) +#define H_CPU_CHAR_HON_BRANCH_HINTS PPC_BIT(5) +#define H_CPU_CHAR_THR_RECONF_TRIG PPC_BIT(6) +#define H_CPU_CHAR_CACHE_COUNT_DIS PPC_BIT(7) +#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST PPC_BIT(9) +#define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0) +#define H_CPU_BEHAV_L1D_FLUSH_PR PPC_BIT(1) +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR PPC_BIT(2) +#define H_CPU_BEHAV_FLUSH_COUNT_CACHE PPC_BIT(5) + +/* Each control block has to be on a 4K boundary */ +#define H_CB_ALIGNMENT 4096 + +/* pSeries hypervisor opcodes */ +#define H_REMOVE 0x04 +#define H_ENTER 0x08 +#define H_READ 0x0c +#define H_CLEAR_MOD 0x10 +#define H_CLEAR_REF 0x14 +#define H_PROTECT 0x18 +#define H_GET_TCE 0x1c +#define H_PUT_TCE 0x20 +#define H_SET_SPRG0 0x24 +#define H_SET_DABR 0x28 +#define H_PAGE_INIT 0x2c +#define H_SET_ASR 0x30 +#define H_ASR_ON 0x34 +#define H_ASR_OFF 0x38 +#define H_LOGICAL_CI_LOAD 0x3c +#define H_LOGICAL_CI_STORE 0x40 +#define H_LOGICAL_CACHE_LOAD 0x44 +#define H_LOGICAL_CACHE_STORE 0x48 +#define H_LOGICAL_ICBI 0x4c +#define H_LOGICAL_DCBF 0x50 +#define H_GET_TERM_CHAR 0x54 +#define H_PUT_TERM_CHAR 0x58 +#define H_REAL_TO_LOGICAL 0x5c +#define H_HYPERVISOR_DATA 0x60 +#define H_EOI 0x64 +#define H_CPPR 0x68 +#define H_IPI 0x6c +#define H_IPOLL 0x70 +#define H_XIRR 0x74 +#define H_PERFMON 0x7c +#define H_MIGRATE_DMA 0x78 +#define H_REGISTER_VPA 0xDC +#define H_CEDE 0xE0 +#define H_CONFER 0xE4 +#define H_PROD 0xE8 +#define H_GET_PPP 0xEC +#define H_SET_PPP 0xF0 +#define H_PURR 0xF4 +#define H_PIC 0xF8 +#define H_REG_CRQ 0xFC +#define H_FREE_CRQ 0x100 +#define H_VIO_SIGNAL 0x104 +#define H_SEND_CRQ 0x108 +#define H_COPY_RDMA 0x110 +#define H_REGISTER_LOGICAL_LAN 0x114 +#define H_FREE_LOGICAL_LAN 0x118 +#define H_ADD_LOGICAL_LAN_BUFFER 0x11C +#define H_SEND_LOGICAL_LAN 0x120 +#define H_BULK_REMOVE 0x124 +#define H_MULTICAST_CTRL 0x130 +#define H_SET_XDABR 0x134 +#define H_STUFF_TCE 0x138 +#define H_PUT_TCE_INDIRECT 0x13C +#define H_CHANGE_LOGICAL_LAN_MAC 0x14C +#define H_VTERM_PARTNER_INFO 0x150 +#define H_REGISTER_VTERM 0x154 +#define H_FREE_VTERM 0x158 +#define H_RESET_EVENTS 0x15C +#define H_ALLOC_RESOURCE 0x160 +#define H_FREE_RESOURCE 0x164 +#define H_MODIFY_QP 0x168 +#define H_QUERY_QP 0x16C +#define H_REREGISTER_PMR 0x170 +#define H_REGISTER_SMR 0x174 +#define H_QUERY_MR 0x178 +#define H_QUERY_MW 0x17C +#define H_QUERY_HCA 0x180 +#define H_QUERY_PORT 0x184 +#define H_MODIFY_PORT 0x188 +#define H_DEFINE_AQP1 0x18C +#define H_GET_TRACE_BUFFER 0x190 +#define H_DEFINE_AQP0 0x194 +#define H_RESIZE_MR 0x198 +#define H_ATTACH_MCQP 0x19C +#define H_DETACH_MCQP 0x1A0 +#define H_CREATE_RPT 0x1A4 +#define H_REMOVE_RPT 0x1A8 +#define H_REGISTER_RPAGES 0x1AC +#define H_DISABLE_AND_GETC 0x1B0 +#define H_ERROR_DATA 0x1B4 +#define H_GET_HCA_INFO 0x1B8 +#define H_GET_PERF_COUNT 0x1BC +#define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 +#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 +#define H_QUERY_INT_STATE 0x1E4 +#define H_POLL_PENDING 0x1D8 +#define H_ILLAN_ATTRIBUTES 0x244 +#define H_MODIFY_HEA_QP 0x250 +#define H_QUERY_HEA_QP 0x254 +#define H_QUERY_HEA 0x258 +#define H_QUERY_HEA_PORT 0x25C +#define H_MODIFY_HEA_PORT 0x260 +#define H_REG_BCMC 0x264 +#define H_DEREG_BCMC 0x268 +#define H_REGISTER_HEA_RPAGES 0x26C +#define H_DISABLE_AND_GET_HEA 0x270 +#define H_GET_HEA_INFO 0x274 +#define H_ALLOC_HEA_RESOURCE 0x278 +#define H_ADD_CONN 0x284 +#define H_DEL_CONN 0x288 +#define H_JOIN 0x298 +#define H_VASI_STATE 0x2A4 +#define H_ENABLE_CRQ 0x2B0 +#define H_GET_EM_PARMS 0x2B8 +#define H_SET_MPP 0x2D0 +#define H_GET_MPP 0x2D4 +#define H_HOME_NODE_ASSOCIATIVITY 0x2EC +#define H_XIRR_X 0x2FC +#define H_RANDOM 0x300 +#define H_SET_MODE 0x31C +#define H_RESIZE_HPT_PREPARE 0x36C +#define H_RESIZE_HPT_COMMIT 0x370 +#define H_CLEAN_SLB 0x374 +#define H_INVALIDATE_PID 0x378 +#define H_REGISTER_PROC_TBL 0x37C +#define H_SIGNAL_SYS_RESET 0x380 + +#define H_INT_GET_SOURCE_INFO 0x3A8 +#define H_INT_SET_SOURCE_CONFIG 0x3AC +#define H_INT_GET_SOURCE_CONFIG 0x3B0 +#define H_INT_GET_QUEUE_INFO 0x3B4 +#define H_INT_SET_QUEUE_CONFIG 0x3B8 +#define H_INT_GET_QUEUE_CONFIG 0x3BC +#define H_INT_SET_OS_REPORTING_LINE 0x3C0 +#define H_INT_GET_OS_REPORTING_LINE 0x3C4 +#define H_INT_ESB 0x3C8 +#define H_INT_SYNC 0x3CC +#define H_INT_RESET 0x3D0 +#define H_SCM_READ_METADATA 0x3E4 +#define H_SCM_WRITE_METADATA 0x3E8 +#define H_SCM_BIND_MEM 0x3EC +#define H_SCM_UNBIND_MEM 0x3F0 +#define H_SCM_UNBIND_ALL 0x3FC + +#define MAX_HCALL_OPCODE H_SCM_UNBIND_ALL + +/* The hcalls above are standardized in PAPR and implemented by pHyp + * as well. + * + * We also need some hcalls which are specific to qemu / KVM-on-POWER. + * We put those into the 0xf000-0xfffc range which is reserved by PAPR + * for "platform-specific" hcalls. + */ +#define KVMPPC_HCALL_BASE 0xf000 +#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0) +#define KVMPPC_H_LOGICAL_MEMOP (KVMPPC_HCALL_BASE + 0x1) +/* Client Architecture support */ +#define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) +#define KVMPPC_H_UPDATE_DT (KVMPPC_HCALL_BASE + 0x3) +#define KVMPPC_HCALL_MAX KVMPPC_H_UPDATE_DT + +/* + * The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating + * Secure VM mode via an Ultravisor / Protected Execution Facility + */ +#define SVM_HCALL_BASE 0xEF00 +#define SVM_H_TPM_COMM 0xEF10 +#define SVM_HCALL_MAX SVM_H_TPM_COMM + + +typedef struct SpaprDeviceTreeUpdateHeader { + uint32_t version_id; +} SpaprDeviceTreeUpdateHeader; + +#define hcall_dprintf(fmt, ...) \ + do { \ + qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt, __func__, ## __VA_ARGS__); \ + } while (0) + +typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, SpaprMachineState *sm, + target_ulong opcode, + target_ulong *args); + +void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); +target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, + target_ulong *args); + +target_ulong do_client_architecture_support(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong addr, + target_ulong fdt_bufsize); + +/* Virtual Processor Area structure constants */ +#define VPA_MIN_SIZE 640 +#define VPA_SIZE_OFFSET 0x4 +#define VPA_SHARED_PROC_OFFSET 0x9 +#define VPA_SHARED_PROC_VAL 0x2 +#define VPA_DISPATCH_COUNTER 0x100 + +/* ibm,set-eeh-option */ +#define RTAS_EEH_DISABLE 0 +#define RTAS_EEH_ENABLE 1 +#define RTAS_EEH_THAW_IO 2 +#define RTAS_EEH_THAW_DMA 3 + +/* ibm,get-config-addr-info2 */ +#define RTAS_GET_PE_ADDR 0 +#define RTAS_GET_PE_MODE 1 +#define RTAS_PE_MODE_NONE 0 +#define RTAS_PE_MODE_NOT_SHARED 1 +#define RTAS_PE_MODE_SHARED 2 + +/* ibm,read-slot-reset-state2 */ +#define RTAS_EEH_PE_STATE_NORMAL 0 +#define RTAS_EEH_PE_STATE_RESET 1 +#define RTAS_EEH_PE_STATE_STOPPED_IO_DMA 2 +#define RTAS_EEH_PE_STATE_STOPPED_DMA 4 +#define RTAS_EEH_PE_STATE_UNAVAIL 5 +#define RTAS_EEH_NOT_SUPPORT 0 +#define RTAS_EEH_SUPPORT 1 +#define RTAS_EEH_PE_UNAVAIL_INFO 1000 +#define RTAS_EEH_PE_RECOVER_INFO 0 + +/* ibm,set-slot-reset */ +#define RTAS_SLOT_RESET_DEACTIVATE 0 +#define RTAS_SLOT_RESET_HOT 1 +#define RTAS_SLOT_RESET_FUNDAMENTAL 3 + +/* ibm,slot-error-detail */ +#define RTAS_SLOT_TEMP_ERR_LOG 1 +#define RTAS_SLOT_PERM_ERR_LOG 2 + +/* RTAS return codes */ +#define RTAS_OUT_SUCCESS 0 +#define RTAS_OUT_NO_ERRORS_FOUND 1 +#define RTAS_OUT_HW_ERROR -1 +#define RTAS_OUT_BUSY -2 +#define RTAS_OUT_PARAM_ERROR -3 +#define RTAS_OUT_NOT_SUPPORTED -3 +#define RTAS_OUT_NO_SUCH_INDICATOR -3 +#define RTAS_OUT_NOT_AUTHORIZED -9002 +#define RTAS_OUT_SYSPARM_PARAM_ERROR -9999 + +/* DDW pagesize mask values from ibm,query-pe-dma-window */ +#define RTAS_DDW_PGSIZE_4K 0x01 +#define RTAS_DDW_PGSIZE_64K 0x02 +#define RTAS_DDW_PGSIZE_16M 0x04 +#define RTAS_DDW_PGSIZE_32M 0x08 +#define RTAS_DDW_PGSIZE_64M 0x10 +#define RTAS_DDW_PGSIZE_128M 0x20 +#define RTAS_DDW_PGSIZE_256M 0x40 +#define RTAS_DDW_PGSIZE_16G 0x80 + +/* RTAS tokens */ +#define RTAS_TOKEN_BASE 0x2000 + +#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00) +#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01) +#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02) +#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03) +#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04) +#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05) +#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06) +#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07) +#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08) +#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09) +#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A) +#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B) +#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C) +#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D) +#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E) +#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F) +#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10) +#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11) +#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12) +#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13) +#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14) +#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15) +#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16) +#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17) +#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18) +#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19) +#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A) +#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B) +#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C) +#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D) +#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E) +#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F) +#define RTAS_IBM_SET_EEH_OPTION (RTAS_TOKEN_BASE + 0x20) +#define RTAS_IBM_GET_CONFIG_ADDR_INFO2 (RTAS_TOKEN_BASE + 0x21) +#define RTAS_IBM_READ_SLOT_RESET_STATE2 (RTAS_TOKEN_BASE + 0x22) +#define RTAS_IBM_SET_SLOT_RESET (RTAS_TOKEN_BASE + 0x23) +#define RTAS_IBM_CONFIGURE_PE (RTAS_TOKEN_BASE + 0x24) +#define RTAS_IBM_SLOT_ERROR_DETAIL (RTAS_TOKEN_BASE + 0x25) +#define RTAS_IBM_QUERY_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x26) +#define RTAS_IBM_CREATE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x27) +#define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28) +#define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29) +#define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A) +#define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B) +#define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C) + +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D) + +/* RTAS ibm,get-system-parameter token values */ +#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20 +#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 +#define RTAS_SYSPARM_UUID 48 + +/* RTAS indicator/sensor types + * + * as defined by PAPR+ 2.7 7.3.5.4, Table 41 + * + * NOTE: currently only DR-related sensors are implemented here + */ +#define RTAS_SENSOR_TYPE_ISOLATION_STATE 9001 +#define RTAS_SENSOR_TYPE_DR 9002 +#define RTAS_SENSOR_TYPE_ALLOCATION_STATE 9003 +#define RTAS_SENSOR_TYPE_ENTITY_SENSE RTAS_SENSOR_TYPE_ALLOCATION_STATE + +/* Possible values for the platform-processor-diagnostics-run-mode parameter + * of the RTAS ibm,get-system-parameter call. + */ +#define DIAGNOSTICS_RUN_MODE_DISABLED 0 +#define DIAGNOSTICS_RUN_MODE_STAGGERED 1 +#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2 +#define DIAGNOSTICS_RUN_MODE_PERIODIC 3 + +static inline uint64_t ppc64_phys_to_real(uint64_t addr) +{ + return addr & ~0xF000000000000000ULL; +} + +static inline uint32_t rtas_ld(target_ulong phys, int n) +{ + return ldl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n)); +} + +static inline uint64_t rtas_ldq(target_ulong phys, int n) +{ + return (uint64_t)rtas_ld(phys, n) << 32 | rtas_ld(phys, n + 1); +} + +static inline void rtas_st(target_ulong phys, int n, uint32_t val) +{ + stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val); +} + +typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, SpaprMachineState *sm, + uint32_t token, + uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets); +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn); +target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *sm, + uint32_t token, uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets); +void spapr_dt_rtas_tokens(void *fdt, int rtas); +void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); + +#define SPAPR_TCE_PAGE_SHIFT 12 +#define SPAPR_TCE_PAGE_SIZE (1ULL << SPAPR_TCE_PAGE_SHIFT) +#define SPAPR_TCE_PAGE_MASK (SPAPR_TCE_PAGE_SIZE - 1) + +#define SPAPR_VIO_BASE_LIOBN 0x00000000 +#define SPAPR_VIO_LIOBN(reg) (0x00000000 | (reg)) +#define SPAPR_PCI_LIOBN(phb_index, window_num) \ + (0x80000000 | ((phb_index) << 8) | (window_num)) +#define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000)) +#define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) + +#define RTAS_SIZE 2048 +#define RTAS_ERROR_LOG_MAX 2048 + +/* Offset from rtas-base where error log is placed */ +#define RTAS_ERROR_LOG_OFFSET 0x30 + +#define RTAS_EVENT_SCAN_RATE 1 + +/* This helper should be used to encode interrupt specifiers when the related + * "interrupt-controller" node has its "#interrupt-cells" property set to 2 (ie, + * VIO devices, RTAS event sources and PHBs). + */ +static inline void spapr_dt_irq(uint32_t *intspec, int irq, bool is_lsi) +{ + intspec[0] = cpu_to_be32(irq); + intspec[1] = is_lsi ? cpu_to_be32(1) : 0; +} + + +#define TYPE_SPAPR_TCE_TABLE "spapr-tce-table" +OBJECT_DECLARE_SIMPLE_TYPE(SpaprTceTable, SPAPR_TCE_TABLE) + +#define TYPE_SPAPR_IOMMU_MEMORY_REGION "spapr-iommu-memory-region" +DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, SPAPR_IOMMU_MEMORY_REGION, + TYPE_SPAPR_IOMMU_MEMORY_REGION) + +struct SpaprTceTable { + DeviceState parent; + uint32_t liobn; + uint32_t nb_table; + uint64_t bus_offset; + uint32_t page_shift; + uint64_t *table; + uint32_t mig_nb_table; + uint64_t *mig_table; + bool bypass; + bool need_vfio; + bool skipping_replay; + int fd; + MemoryRegion root; + IOMMUMemoryRegion iommu; + struct SpaprVioDevice *vdev; /* for @bypass migration compatibility only */ + QLIST_ENTRY(SpaprTceTable) list; +}; + +SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn); + +struct SpaprEventLogEntry { + uint32_t summary; + uint32_t extended_length; + void *extended_log; + QTAILQ_ENTRY(SpaprEventLogEntry) next; +}; + +void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space); +void spapr_events_init(SpaprMachineState *sm); +void spapr_dt_events(SpaprMachineState *sm, void *fdt); +void close_htab_fd(SpaprMachineState *spapr); +void spapr_setup_hpt(SpaprMachineState *spapr); +void spapr_free_hpt(SpaprMachineState *spapr); +SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn); +void spapr_tce_table_enable(SpaprTceTable *tcet, + uint32_t page_shift, uint64_t bus_offset, + uint32_t nb_table); +void spapr_tce_table_disable(SpaprTceTable *tcet); +void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio); + +MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet); +int spapr_dma_dt(void *fdt, int node_off, const char *propname, + uint32_t liobn, uint64_t window, uint32_t size); +int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, + SpaprTceTable *tcet); +void spapr_pci_switch_vga(bool big_endian); +void spapr_hotplug_req_add_by_index(SpaprDrc *drc); +void spapr_hotplug_req_remove_by_index(SpaprDrc *drc); +void spapr_hotplug_req_add_by_count(SpaprDrcType drc_type, + uint32_t count); +void spapr_hotplug_req_remove_by_count(SpaprDrcType drc_type, + uint32_t count); +void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type, + uint32_t count, uint32_t index); +void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, + uint32_t count, uint32_t index); +int spapr_hpt_shift_for_ramsize(uint64_t ramsize); +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp); +void spapr_clear_pending_events(SpaprMachineState *spapr); +void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr); +int spapr_max_server_number(SpaprMachineState *spapr); +void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, + uint64_t pte0, uint64_t pte1); +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); +bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr); + +/* DRC callbacks. */ +void spapr_core_release(DeviceState *dev); +int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); +void spapr_lmb_release(DeviceState *dev); +int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); +void spapr_phb_release(DeviceState *dev); +int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); + +void spapr_rtc_read(SpaprRtcState *rtc, struct tm *tm, uint32_t *ns); +int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset); + +#define TYPE_SPAPR_RNG "spapr-rng" + +#define SPAPR_MEMORY_BLOCK_SIZE ((hwaddr)1 << 28) /* 256MB */ + +/* + * This defines the maximum number of DIMM slots we can have for sPAPR + * guest. This is not defined by sPAPR but we are defining it to 32 slots + * based on default number of slots provided by PowerPC kernel. + */ +#define SPAPR_MAX_RAM_SLOTS 32 + +/* 1GB alignment for hotplug memory region */ +#define SPAPR_DEVICE_MEM_ALIGN (1 * GiB) + +/* + * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory + * property under ibm,dynamic-reconfiguration-memory node. + */ +#define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6 + +/* + * Defines for flag value in ibm,dynamic-memory property under + * ibm,dynamic-reconfiguration-memory node. + */ +#define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 +#define SPAPR_LMB_FLAGS_DRC_INVALID 0x00000020 +#define SPAPR_LMB_FLAGS_RESERVED 0x00000080 +#define SPAPR_LMB_FLAGS_HOTREMOVABLE 0x00000100 + +void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg); + +#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) + +int spapr_get_vcpu_id(PowerPCCPU *cpu); +bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp); +PowerPCCPU *spapr_find_cpu(int vcpu_id); + +int spapr_caps_pre_load(void *opaque); +int spapr_caps_pre_save(void *opaque); + +/* + * Handling of optional capabilities + */ +extern const VMStateDescription vmstate_spapr_cap_htm; +extern const VMStateDescription vmstate_spapr_cap_vsx; +extern const VMStateDescription vmstate_spapr_cap_dfp; +extern const VMStateDescription vmstate_spapr_cap_cfpc; +extern const VMStateDescription vmstate_spapr_cap_sbbc; +extern const VMStateDescription vmstate_spapr_cap_ibs; +extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize; +extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv; +extern const VMStateDescription vmstate_spapr_cap_large_decr; +extern const VMStateDescription vmstate_spapr_cap_ccf_assist; +extern const VMStateDescription vmstate_spapr_cap_fwnmi; + +static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap) +{ + return spapr->eff.caps[cap]; +} + +void spapr_caps_init(SpaprMachineState *spapr); +void spapr_caps_apply(SpaprMachineState *spapr); +void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu); +void spapr_caps_add_properties(SpaprMachineClass *smc); +int spapr_caps_post_migration(SpaprMachineState *spapr); + +bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + Error **errp); +/* + * XIVE definitions + */ +#define SPAPR_OV5_XIVE_LEGACY 0x0 +#define SPAPR_OV5_XIVE_EXPLOIT 0x40 +#define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ + +void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); +hwaddr spapr_get_rtas_addr(void); +#endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h new file mode 100644 index 000000000..dab3dfc76 --- /dev/null +++ b/include/hw/ppc/spapr_cpu_core.h @@ -0,0 +1,58 @@ +/* + * sPAPR CPU core device. + * + * Copyright (C) 2016 Bharata B Rao + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HW_SPAPR_CPU_CORE_H +#define HW_SPAPR_CPU_CORE_H + +#include "hw/cpu/core.h" +#include "hw/qdev-core.h" +#include "target/ppc/cpu-qom.h" +#include "target/ppc/cpu.h" +#include "qom/object.h" + +#define TYPE_SPAPR_CPU_CORE "spapr-cpu-core" +OBJECT_DECLARE_TYPE(SpaprCpuCore, SpaprCpuCoreClass, + SPAPR_CPU_CORE) + +#define SPAPR_CPU_CORE_TYPE_NAME(model) model "-" TYPE_SPAPR_CPU_CORE + +struct SpaprCpuCore { + /*< private >*/ + CPUCore parent_obj; + + /*< public >*/ + PowerPCCPU **threads; + int node_id; + bool pre_3_0_migration; /* older machine don't know about SpaprCpuState */ +}; + +struct SpaprCpuCoreClass { + DeviceClass parent_class; + const char *cpu_type; +}; + +const char *spapr_get_cpu_core_type(const char *cpu_type); +void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, + target_ulong r1, target_ulong r3, + target_ulong r4); + +typedef struct SpaprCpuState { + uint64_t vpa_addr; + uint64_t slb_shadow_addr, slb_shadow_size; + uint64_t dtl_addr, dtl_size; + bool prod; /* not migrated, only used to improve dispatch latencies */ + struct ICPState *icp; + struct XiveTCTX *tctx; +} SpaprCpuState; + +static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu) +{ + return (SpaprCpuState *)cpu->machine_data; +} + +#endif diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h new file mode 100644 index 000000000..165b28149 --- /dev/null +++ b/include/hw/ppc/spapr_drc.h @@ -0,0 +1,249 @@ +/* + * QEMU SPAPR Dynamic Reconfiguration Connector Implementation + * + * Copyright IBM Corp. 2014 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_DRC_H +#define HW_SPAPR_DRC_H + +#include +#include "qom/object.h" +#include "sysemu/runstate.h" +#include "hw/qdev-core.h" +#include "qapi/error.h" + +#define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" +#define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DR_CONNECTOR) +#define SPAPR_DR_CONNECTOR_CLASS(klass) \ + OBJECT_CLASS_CHECK(SpaprDrcClass, klass, \ + TYPE_SPAPR_DR_CONNECTOR) +#define SPAPR_DR_CONNECTOR(obj) OBJECT_CHECK(SpaprDrc, (obj), \ + TYPE_SPAPR_DR_CONNECTOR) + +#define TYPE_SPAPR_DRC_PHYSICAL "spapr-drc-physical" +#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(SpaprDrcPhysical, (obj), \ + TYPE_SPAPR_DRC_PHYSICAL) + +#define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical" + +#define TYPE_SPAPR_DRC_CPU "spapr-drc-cpu" + +#define TYPE_SPAPR_DRC_PCI "spapr-drc-pci" + +#define TYPE_SPAPR_DRC_LMB "spapr-drc-lmb" + +#define TYPE_SPAPR_DRC_PHB "spapr-drc-phb" + +#define TYPE_SPAPR_DRC_PMEM "spapr-drc-pmem" + +/* + * Various hotplug types managed by SpaprDrc + * + * these are somewhat arbitrary, but to make things easier + * when generating DRC indexes later we've aligned the bit + * positions with the values used to assign DRC indexes on + * pSeries. we use those values as bit shifts to allow for + * the OR'ing of these values in various QEMU routines, but + * for values exposed to the guest (via DRC indexes for + * instance) we will use the shift amounts. + */ +typedef enum { + SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU = 1, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB = 2, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO = 3, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI = 4, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB = 8, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM = 9, +} SpaprDrcTypeShift; + +typedef enum { + SPAPR_DR_CONNECTOR_TYPE_ANY = ~0, + SPAPR_DR_CONNECTOR_TYPE_CPU = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU, + SPAPR_DR_CONNECTOR_TYPE_PHB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB, + SPAPR_DR_CONNECTOR_TYPE_VIO = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO, + SPAPR_DR_CONNECTOR_TYPE_PCI = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI, + SPAPR_DR_CONNECTOR_TYPE_LMB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB, + SPAPR_DR_CONNECTOR_TYPE_PMEM = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM, +} SpaprDrcType; + +/* + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177 + * + * isolated: put device under firmware control + * unisolated: claim OS control of device (may or may not be in use) + */ +typedef enum { + SPAPR_DR_ISOLATION_STATE_ISOLATED = 0, + SPAPR_DR_ISOLATION_STATE_UNISOLATED = 1 +} SpaprDRIsolationState; + +/* + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177 + * + * unusable: mark device as unavailable to OS + * usable: mark device as available to OS + * exchange: (currently unused) + * recover: (currently unused) + */ +typedef enum { + SPAPR_DR_ALLOCATION_STATE_UNUSABLE = 0, + SPAPR_DR_ALLOCATION_STATE_USABLE = 1, + SPAPR_DR_ALLOCATION_STATE_EXCHANGE = 2, + SPAPR_DR_ALLOCATION_STATE_RECOVER = 3 +} SpaprDRAllocationState; + +/* + * DR-indicator (LED/visual indicator) + * + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177, + * and PAPR+ 2.7 13.5.4.1, Table 180 + * + * inactive: hotpluggable entity inactive and safely removable + * active: hotpluggable entity in use and not safely removable + * identify: (currently unused) + * action: (currently unused) + */ +typedef enum { + SPAPR_DR_INDICATOR_INACTIVE = 0, + SPAPR_DR_INDICATOR_ACTIVE = 1, + SPAPR_DR_INDICATOR_IDENTIFY = 2, + SPAPR_DR_INDICATOR_ACTION = 3, +} SpaprDRIndicatorState; + +/* + * returned via get-sensor-state RTAS calls + * as documented by PAPR+ 2.7 13.5.3.3, Table 175: + * + * empty: connector slot empty (e.g. empty hotpluggable PCI slot) + * present: connector slot populated and device available to OS + * unusable: device not currently available to OS + * exchange: (currently unused) + * recover: (currently unused) + */ +typedef enum { + SPAPR_DR_ENTITY_SENSE_EMPTY = 0, + SPAPR_DR_ENTITY_SENSE_PRESENT = 1, + SPAPR_DR_ENTITY_SENSE_UNUSABLE = 2, + SPAPR_DR_ENTITY_SENSE_EXCHANGE = 3, + SPAPR_DR_ENTITY_SENSE_RECOVER = 4, +} SpaprDREntitySense; + +typedef enum { + SPAPR_DR_CC_RESPONSE_NEXT_SIB = 1, /* currently unused */ + SPAPR_DR_CC_RESPONSE_NEXT_CHILD = 2, + SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY = 3, + SPAPR_DR_CC_RESPONSE_PREV_PARENT = 4, + SPAPR_DR_CC_RESPONSE_SUCCESS = 0, + SPAPR_DR_CC_RESPONSE_ERROR = -1, + SPAPR_DR_CC_RESPONSE_CONTINUE = -2, + SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003, +} SpaprDRCCResponse; + +typedef enum { + /* + * Values come from Fig. 12 in LoPAPR section 13.4 + * + * These are exposed in the migration stream, so don't change + * them. + */ + SPAPR_DRC_STATE_INVALID = 0, + SPAPR_DRC_STATE_LOGICAL_UNUSABLE = 1, + SPAPR_DRC_STATE_LOGICAL_AVAILABLE = 2, + SPAPR_DRC_STATE_LOGICAL_UNISOLATE = 3, + SPAPR_DRC_STATE_LOGICAL_CONFIGURED = 4, + SPAPR_DRC_STATE_PHYSICAL_AVAILABLE = 5, + SPAPR_DRC_STATE_PHYSICAL_POWERON = 6, + SPAPR_DRC_STATE_PHYSICAL_UNISOLATE = 7, + SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, +} SpaprDrcState; + +typedef struct SpaprDrc { + /*< private >*/ + DeviceState parent; + + uint32_t id; + Object *owner; + + uint32_t state; + + /* RTAS ibm,configure-connector state */ + /* (only valid in UNISOLATE state) */ + int ccs_offset; + int ccs_depth; + + /* device pointer, via link property */ + DeviceState *dev; + bool unplug_requested; + void *fdt; + int fdt_start_offset; +} SpaprDrc; + +struct SpaprMachineState; + +typedef struct SpaprDrcClass { + /*< private >*/ + DeviceClass parent; + SpaprDrcState empty_state; + SpaprDrcState ready_state; + + /*< public >*/ + SpaprDrcTypeShift typeshift; + const char *typename; /* used in device tree, PAPR 13.5.2.6 & C.6.1 */ + const char *drc_name_prefix; /* used other places in device tree */ + + SpaprDREntitySense (*dr_entity_sense)(SpaprDrc *drc); + uint32_t (*isolate)(SpaprDrc *drc); + uint32_t (*unisolate)(SpaprDrc *drc); + void (*release)(DeviceState *dev); + + int (*dt_populate)(SpaprDrc *drc, struct SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); +} SpaprDrcClass; + +typedef struct SpaprDrcPhysical { + /*< private >*/ + SpaprDrc parent; + + /* DR-indicator */ + uint32_t dr_indicator; +} SpaprDrcPhysical; + +static inline bool spapr_drc_hotplugged(DeviceState *dev) +{ + return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE); +} + +void spapr_drc_reset(SpaprDrc *drc); + +uint32_t spapr_drc_index(SpaprDrc *drc); +SpaprDrcType spapr_drc_type(SpaprDrc *drc); + +SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, + uint32_t id); +SpaprDrc *spapr_drc_by_index(uint32_t index); +SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id); +int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask); + +bool spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp); +void spapr_drc_detach(SpaprDrc *drc); + +/* Returns true if a hot plug/unplug request is pending */ +bool spapr_drc_transient(SpaprDrc *drc); + +static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) +{ + return drc->unplug_requested; +} + +#endif /* HW_SPAPR_DRC_H */ diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h new file mode 100644 index 000000000..c22a72c9e --- /dev/null +++ b/include/hw/ppc/spapr_irq.h @@ -0,0 +1,117 @@ +/* + * QEMU PowerPC sPAPR IRQ backend definitions + * + * Copyright (c) 2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_IRQ_H +#define HW_SPAPR_IRQ_H + +#include "target/ppc/cpu-qom.h" +#include "qom/object.h" + +/* + * IRQ range offsets per device type + */ +#define SPAPR_IRQ_IPI 0x0 + +#define SPAPR_XIRQ_BASE XICS_IRQ_BASE /* 0x1000 */ +#define SPAPR_IRQ_EPOW (SPAPR_XIRQ_BASE + 0x0000) +#define SPAPR_IRQ_HOTPLUG (SPAPR_XIRQ_BASE + 0x0001) +#define SPAPR_IRQ_VIO (SPAPR_XIRQ_BASE + 0x0100) /* 256 VIO devices */ +#define SPAPR_IRQ_PCI_LSI (SPAPR_XIRQ_BASE + 0x0200) /* 32+ PHBs devices */ + +/* Offset of the dynamic range covered by the bitmap allocator */ +#define SPAPR_IRQ_MSI (SPAPR_XIRQ_BASE + 0x0300) + +#define SPAPR_NR_XIRQS 0x1000 + +struct SpaprMachineState; + +typedef struct SpaprInterruptController SpaprInterruptController; + +#define TYPE_SPAPR_INTC "spapr-interrupt-controller" +#define SPAPR_INTC(obj) \ + INTERFACE_CHECK(SpaprInterruptController, (obj), TYPE_SPAPR_INTC) +typedef struct SpaprInterruptControllerClass SpaprInterruptControllerClass; +DECLARE_CLASS_CHECKERS(SpaprInterruptControllerClass, SPAPR_INTC, + TYPE_SPAPR_INTC) + +struct SpaprInterruptControllerClass { + InterfaceClass parent; + + int (*activate)(SpaprInterruptController *intc, uint32_t nr_servers, + Error **errp); + void (*deactivate)(SpaprInterruptController *intc); + + /* + * These methods will typically be called on all intcs, active and + * inactive + */ + int (*cpu_intc_create)(SpaprInterruptController *intc, + PowerPCCPU *cpu, Error **errp); + void (*cpu_intc_reset)(SpaprInterruptController *intc, PowerPCCPU *cpu); + void (*cpu_intc_destroy)(SpaprInterruptController *intc, PowerPCCPU *cpu); + int (*claim_irq)(SpaprInterruptController *intc, int irq, bool lsi, + Error **errp); + void (*free_irq)(SpaprInterruptController *intc, int irq); + + /* These methods should only be called on the active intc */ + void (*set_irq)(SpaprInterruptController *intc, int irq, int val); + void (*print_info)(SpaprInterruptController *intc, Monitor *mon); + void (*dt)(SpaprInterruptController *intc, uint32_t nr_servers, + void *fdt, uint32_t phandle); + int (*post_load)(SpaprInterruptController *intc, int version_id); +}; + +void spapr_irq_update_active_intc(struct SpaprMachineState *spapr); + +int spapr_irq_cpu_intc_create(struct SpaprMachineState *spapr, + PowerPCCPU *cpu, Error **errp); +void spapr_irq_cpu_intc_reset(struct SpaprMachineState *spapr, PowerPCCPU *cpu); +void spapr_irq_cpu_intc_destroy(struct SpaprMachineState *spapr, PowerPCCPU *cpu); +void spapr_irq_print_info(struct SpaprMachineState *spapr, Monitor *mon); +void spapr_irq_dt(struct SpaprMachineState *spapr, uint32_t nr_servers, + void *fdt, uint32_t phandle); + +uint32_t spapr_irq_nr_msis(struct SpaprMachineState *spapr); +int spapr_irq_msi_alloc(struct SpaprMachineState *spapr, uint32_t num, bool align, + Error **errp); +void spapr_irq_msi_free(struct SpaprMachineState *spapr, int irq, uint32_t num); + +typedef struct SpaprIrq { + bool xics; + bool xive; +} SpaprIrq; + +extern SpaprIrq spapr_irq_xics; +extern SpaprIrq spapr_irq_xics_legacy; +extern SpaprIrq spapr_irq_xive; +extern SpaprIrq spapr_irq_dual; + +void spapr_irq_init(struct SpaprMachineState *spapr, Error **errp); +int spapr_irq_claim(struct SpaprMachineState *spapr, int irq, bool lsi, Error **errp); +void spapr_irq_free(struct SpaprMachineState *spapr, int irq, int num); +qemu_irq spapr_qirq(struct SpaprMachineState *spapr, int irq); +int spapr_irq_post_load(struct SpaprMachineState *spapr, int version_id); +void spapr_irq_reset(struct SpaprMachineState *spapr, Error **errp); +int spapr_irq_get_phandle(struct SpaprMachineState *spapr, void *fdt, Error **errp); + +typedef int (*SpaprInterruptControllerInitKvm)(SpaprInterruptController *, + uint32_t, Error **); + +int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, + SpaprInterruptController *intc, + uint32_t nr_servers, + Error **errp); + +/* + * XICS legacy routines + */ +int spapr_irq_find(struct SpaprMachineState *spapr, int num, bool align, Error **errp); +#define spapr_irq_findone(spapr, errp) spapr_irq_find(spapr, 1, false, errp) + +#endif diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h new file mode 100644 index 000000000..b3fd95063 --- /dev/null +++ b/include/hw/ppc/spapr_numa.h @@ -0,0 +1,35 @@ +/* + * QEMU PowerPC pSeries Logical Partition NUMA associativity handling + * + * Copyright IBM Corp. 2020 + * + * Authors: + * Daniel Henrique Barboza + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_NUMA_H +#define HW_SPAPR_NUMA_H + +#include "hw/boards.h" +#include "hw/ppc/spapr.h" + +/* + * Having both SpaprMachineState and MachineState as arguments + * feels odd, but it will spare a MACHINE() call inside the + * function. spapr_machine_init() is the only caller for it, and + * it has both pointers resolved already. + */ +void spapr_numa_associativity_init(SpaprMachineState *spapr, + MachineState *machine); +void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas); +void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, + int offset, int nodeid); +int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, + int offset, PowerPCCPU *cpu); +int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, + int offset); + +#endif /* HW_SPAPR_NUMA_H */ diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h new file mode 100644 index 000000000..344582d2f --- /dev/null +++ b/include/hw/ppc/spapr_nvdimm.h @@ -0,0 +1,35 @@ +/* + * QEMU PowerPC PAPR SCM backend definitions + * + * Copyright (c) 2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_NVDIMM_H +#define HW_SPAPR_NVDIMM_H + +#include "hw/mem/nvdimm.h" +#include "hw/ppc/spapr.h" + +/* + * The nvdimm size should be aligned to SCM block size. + * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE + * inorder to have SCM regions not to overlap with dimm memory regions. + * The SCM devices can have variable block sizes. For now, fixing the + * block size to the minimum value. + */ +#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE + +/* Have an explicit check for alignment */ +QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); + +int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); +void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt); +bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, + uint64_t size, Error **errp); +bool spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp); + +#endif diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h new file mode 100644 index 000000000..d4dee9e06 --- /dev/null +++ b/include/hw/ppc/spapr_ovec.h @@ -0,0 +1,81 @@ +/* + * QEMU SPAPR Option/Architecture Vector Definitions + * + * Each architecture option is organized/documented by the following + * in LoPAPR 1.1, Table 244: + * + * : the bit-vector in which the option is located + * : the byte offset of the vector entry + * : the bit offset within the vector entry + * + * where each vector entry can be one or more bytes. + * + * Firmware expects a somewhat literal encoding of this bit-vector + * structure, where each entry is stored in little-endian so that the + * byte ordering reflects that of the documentation, but where each bit + * offset is from "left-to-right" in the traditional representation of + * a byte value where the MSB is the left-most bit. Thus, each + * individual byte encodes the option bits in reverse order of the + * documented bit. + * + * These definitions/helpers attempt to abstract away this internal + * representation so that we can define/set/test for individual option + * bits using only the documented values. This is done mainly by relying + * on a bitmap to approximate the documented "bit-vector" structure and + * handling conversations to-from the internal representation under the + * covers. + * + * Copyright IBM Corp. 2016 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SPAPR_OVEC_H +#define SPAPR_OVEC_H + +#include "cpu.h" + +typedef struct SpaprOptionVector SpaprOptionVector; + +#define OV_BIT(byte, bit) ((byte - 1) * BITS_PER_BYTE + bit) + +/* option vector 1 */ +#define OV1_PPC_3_00 OV_BIT(3, 0) /* guest supports PowerPC 3.00? */ + +/* option vector 5 */ +#define OV5_DRCONF_MEMORY OV_BIT(2, 2) +#define OV5_FORM1_AFFINITY OV_BIT(5, 0) +#define OV5_HP_EVT OV_BIT(6, 5) +#define OV5_HPT_RESIZE OV_BIT(6, 7) +#define OV5_DRMEM_V2 OV_BIT(22, 0) +#define OV5_XIVE_BOTH OV_BIT(23, 0) +#define OV5_XIVE_EXPLOIT OV_BIT(23, 1) /* 1=exploitation 0=legacy */ + +/* ISA 3.00 MMU features: */ +#define OV5_MMU_BOTH OV_BIT(24, 0) /* Radix and hash */ +#define OV5_MMU_RADIX_300 OV_BIT(24, 1) /* 1=Radix only, 0=Hash only */ +#define OV5_MMU_RADIX_GTSE OV_BIT(26, 1) /* Radix GTSE */ + +/* interfaces */ +SpaprOptionVector *spapr_ovec_new(void); +SpaprOptionVector *spapr_ovec_clone(SpaprOptionVector *ov_orig); +void spapr_ovec_intersect(SpaprOptionVector *ov, + SpaprOptionVector *ov1, + SpaprOptionVector *ov2); +bool spapr_ovec_subset(SpaprOptionVector *ov1, SpaprOptionVector *ov2); +void spapr_ovec_cleanup(SpaprOptionVector *ov); +void spapr_ovec_set(SpaprOptionVector *ov, long bitnr); +void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr); +bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr); +SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector); +int spapr_dt_ovec(void *fdt, int fdt_offset, + SpaprOptionVector *ov, const char *name); + +/* migration */ +extern const VMStateDescription vmstate_spapr_ovec; + +#endif /* SPAPR_OVEC_H */ diff --git a/include/hw/ppc/spapr_rtas.h b/include/hw/ppc/spapr_rtas.h new file mode 100644 index 000000000..383611f10 --- /dev/null +++ b/include/hw/ppc/spapr_rtas.h @@ -0,0 +1,10 @@ +#ifndef HW_SPAPR_RTAS_H +#define HW_SPAPR_RTAS_H +/* + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args, + uint32_t nret, uint64_t rets); +#endif /* HW_SPAPR_RTAS_H */ diff --git a/include/hw/ppc/spapr_tpm_proxy.h b/include/hw/ppc/spapr_tpm_proxy.h new file mode 100644 index 000000000..96d2a9697 --- /dev/null +++ b/include/hw/ppc/spapr_tpm_proxy.h @@ -0,0 +1,30 @@ +/* + * SPAPR TPM Proxy/Hypercall + * + * Copyright IBM Corp. 2019 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_TPM_PROXY_H +#define HW_SPAPR_TPM_PROXY_H + +#include "qom/object.h" +#include "hw/qdev-core.h" + +#define TYPE_SPAPR_TPM_PROXY "spapr-tpm-proxy" +OBJECT_DECLARE_SIMPLE_TYPE(SpaprTpmProxy, SPAPR_TPM_PROXY) + +struct SpaprTpmProxy { + /*< private >*/ + DeviceState parent; + + char *host_path; + int host_fd; +}; + +#endif /* HW_SPAPR_TPM_PROXY_H */ diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h new file mode 100644 index 000000000..4bea87f39 --- /dev/null +++ b/include/hw/ppc/spapr_vio.h @@ -0,0 +1,141 @@ +#ifndef HW_SPAPR_VIO_H +#define HW_SPAPR_VIO_H + +/* + * QEMU sPAPR VIO bus definitions + * + * Copyright (c) 2010 David Gibson, IBM Corporation + * Based on the s390 virtio bus definitions: + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/ppc/spapr.h" +#include "sysemu/dma.h" +#include "hw/irq.h" +#include "qom/object.h" + +#define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device" +OBJECT_DECLARE_TYPE(SpaprVioDevice, SpaprVioDeviceClass, + VIO_SPAPR_DEVICE) + +#define TYPE_SPAPR_VIO_BUS "spapr-vio-bus" +OBJECT_DECLARE_SIMPLE_TYPE(SpaprVioBus, SPAPR_VIO_BUS) + +#define TYPE_SPAPR_VIO_BRIDGE "spapr-vio-bridge" + +typedef struct SpaprVioCrq { + uint64_t qladdr; + uint32_t qsize; + uint32_t qnext; + int(*SendFunc)(struct SpaprVioDevice *vdev, uint8_t *crq); +} SpaprVioCrq; + + +struct SpaprVioDeviceClass { + DeviceClass parent_class; + + const char *dt_name, *dt_type, *dt_compatible; + target_ulong signal_mask; + uint32_t rtce_window_size; + void (*realize)(SpaprVioDevice *dev, Error **errp); + void (*reset)(SpaprVioDevice *dev); + int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off); + const char *(*get_dt_compatible)(SpaprVioDevice *dev); +}; + +struct SpaprVioDevice { + DeviceState qdev; + uint32_t reg; + uint32_t irq; + uint64_t signal_state; + SpaprVioCrq crq; + AddressSpace as; + MemoryRegion mrroot; + MemoryRegion mrbypass; + SpaprTceTable *tcet; +}; + +#define DEFINE_SPAPR_PROPERTIES(type, field) \ + DEFINE_PROP_UINT32("reg", type, field.reg, -1) + +struct SpaprVioBus { + BusState bus; + uint32_t next_reg; +}; + +SpaprVioBus *spapr_vio_bus_init(void); +SpaprVioDevice *spapr_vio_find_by_reg(SpaprVioBus *bus, uint32_t reg); +void spapr_dt_vdevice(SpaprVioBus *bus, void *fdt); +gchar *spapr_vio_stdout_path(SpaprVioBus *bus); + +static inline void spapr_vio_irq_pulse(SpaprVioDevice *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + + qemu_irq_pulse(spapr_qirq(spapr, dev->irq)); +} + +static inline bool spapr_vio_dma_valid(SpaprVioDevice *dev, uint64_t taddr, + uint32_t size, DMADirection dir) +{ + return dma_memory_valid(&dev->as, taddr, size, dir); +} + +static inline int spapr_vio_dma_read(SpaprVioDevice *dev, uint64_t taddr, + void *buf, uint32_t size) +{ + return (dma_memory_read(&dev->as, taddr, buf, size) != 0) ? + H_DEST_PARM : H_SUCCESS; +} + +static inline int spapr_vio_dma_write(SpaprVioDevice *dev, uint64_t taddr, + const void *buf, uint32_t size) +{ + return (dma_memory_write(&dev->as, taddr, buf, size) != 0) ? + H_DEST_PARM : H_SUCCESS; +} + +static inline int spapr_vio_dma_set(SpaprVioDevice *dev, uint64_t taddr, + uint8_t c, uint32_t size) +{ + return (dma_memory_set(&dev->as, taddr, c, size) != 0) ? + H_DEST_PARM : H_SUCCESS; +} + +#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->as, (_addr), (_val))) +#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->as, (_addr), (_val))) +#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->as, (_addr), (_val))) +#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->as, (_addr), (_val))) +#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->as, (_addr))) + +int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq); + +SpaprVioDevice *vty_lookup(SpaprMachineState *spapr, target_ulong reg); +void vty_putchars(SpaprVioDevice *sdev, uint8_t *buf, int len); +void spapr_vty_create(SpaprVioBus *bus, Chardev *chardev); +void spapr_vlan_create(SpaprVioBus *bus, NICInfo *nd); +void spapr_vscsi_create(SpaprVioBus *bus); + +SpaprVioDevice *spapr_vty_get_default(SpaprVioBus *bus); + +extern const VMStateDescription vmstate_spapr_vio; + +#define VMSTATE_SPAPR_VIO(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 0, vmstate_spapr_vio, SpaprVioDevice) + +void spapr_vio_set_bypass(SpaprVioDevice *dev, bool bypass); + +#endif /* HW_SPAPR_VIO_H */ diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h new file mode 100644 index 000000000..26c8d90d7 --- /dev/null +++ b/include/hw/ppc/spapr_xive.h @@ -0,0 +1,101 @@ +/* + * QEMU PowerPC sPAPR XIVE interrupt controller model + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_SPAPR_XIVE_H +#define PPC_SPAPR_XIVE_H + +#include "hw/ppc/spapr_irq.h" +#include "hw/ppc/xive.h" + +#define TYPE_SPAPR_XIVE "spapr-xive" +#define SPAPR_XIVE(obj) OBJECT_CHECK(SpaprXive, (obj), TYPE_SPAPR_XIVE) +#define SPAPR_XIVE_CLASS(klass) \ + OBJECT_CLASS_CHECK(SpaprXiveClass, (klass), TYPE_SPAPR_XIVE) +#define SPAPR_XIVE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SpaprXiveClass, (obj), TYPE_SPAPR_XIVE) + +typedef struct SpaprXive { + XiveRouter parent; + + /* Internal interrupt source for IPIs and virtual devices */ + XiveSource source; + hwaddr vc_base; + + /* END ESB MMIOs */ + XiveENDSource end_source; + hwaddr end_base; + + /* DT */ + gchar *nodename; + + /* Routing table */ + XiveEAS *eat; + uint32_t nr_irqs; + XiveEND *endt; + uint32_t nr_ends; + + /* TIMA mapping address */ + hwaddr tm_base; + MemoryRegion tm_mmio; + + /* KVM support */ + int fd; + void *tm_mmap; + MemoryRegion tm_mmio_kvm; + VMChangeStateEntry *change; + + uint8_t hv_prio; +} SpaprXive; + +typedef struct SpaprXiveClass { + XiveRouterClass parent; + + DeviceRealize parent_realize; +} SpaprXiveClass; + +/* + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value + * to the controller block id value. It can nevertheless be changed + * for testing purpose. + */ +#define SPAPR_XIVE_BLOCK_ID 0x0 + +void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon); + +struct SpaprMachineState; +void spapr_xive_hcall_init(struct SpaprMachineState *spapr); +void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable); +void spapr_xive_map_mmio(SpaprXive *xive); + +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + uint32_t *out_server, uint8_t *out_prio); + +/* + * KVM XIVE device helpers + */ +int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, + Error **errp); +void kvmppc_xive_disconnect(SpaprInterruptController *intc); +void kvmppc_xive_reset(SpaprXive *xive, Error **errp); +int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp); +void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp); +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write); +int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); +int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp); +int kvmppc_xive_pre_save(SpaprXive *xive); +int kvmppc_xive_post_load(SpaprXive *xive, int version_id); + +#endif /* PPC_SPAPR_XIVE_H */ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h new file mode 100644 index 000000000..00b80b08c --- /dev/null +++ b/include/hw/ppc/xics.h @@ -0,0 +1,196 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics + * + * Copyright (c) 2010,2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef XICS_H +#define XICS_H + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define XICS_IPI 0x2 +#define XICS_BUID 0x1 +#define XICS_IRQ_BASE (XICS_BUID << 12) + +/* + * We currently only support one BUID which is our interrupt base + * (the kernel implementation supports more but we don't exploit + * that yet) + */ +typedef struct PnvICPState PnvICPState; +typedef struct ICSStateClass ICSStateClass; +typedef struct ICSState ICSState; +typedef struct ICSIRQState ICSIRQState; +typedef struct XICSFabric XICSFabric; + +#define TYPE_ICP "icp" +OBJECT_DECLARE_TYPE(ICPState, ICPStateClass, + ICP) + +#define TYPE_PNV_ICP "pnv-icp" +DECLARE_INSTANCE_CHECKER(PnvICPState, PNV_ICP, + TYPE_PNV_ICP) + + +struct ICPStateClass { + DeviceClass parent_class; + + DeviceRealize parent_realize; +}; + +struct ICPState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + CPUState *cs; + ICSState *xirr_owner; + uint32_t xirr; + uint8_t pending_priority; + uint8_t mfrr; + qemu_irq output; + + XICSFabric *xics; +}; + +#define ICP_PROP_XICS "xics" +#define ICP_PROP_CPU "cpu" + +struct PnvICPState { + ICPState parent_obj; + + MemoryRegion mmio; + uint32_t links[3]; +}; + +#define TYPE_ICS "ics" +DECLARE_OBJ_CHECKERS(ICSState, ICSStateClass, + ICS, TYPE_ICS) + + +struct ICSStateClass { + DeviceClass parent_class; + + DeviceRealize parent_realize; + DeviceReset parent_reset; + + void (*reject)(ICSState *s, uint32_t irq); + void (*resend)(ICSState *s); +}; + +struct ICSState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + uint32_t nr_irqs; + uint32_t offset; + ICSIRQState *irqs; + XICSFabric *xics; +}; + +#define ICS_PROP_XICS "xics" + +static inline bool ics_valid_irq(ICSState *ics, uint32_t nr) +{ + return (nr >= ics->offset) && (nr < (ics->offset + ics->nr_irqs)); +} + +struct ICSIRQState { + uint32_t server; + uint8_t priority; + uint8_t saved_priority; +#define XICS_STATUS_ASSERTED 0x1 +#define XICS_STATUS_SENT 0x2 +#define XICS_STATUS_REJECTED 0x4 +#define XICS_STATUS_MASKED_PENDING 0x8 +#define XICS_STATUS_PRESENTED 0x10 +#define XICS_STATUS_QUEUED 0x20 + uint8_t status; +/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */ +#define XICS_FLAGS_IRQ_LSI 0x1 +#define XICS_FLAGS_IRQ_MSI 0x2 +#define XICS_FLAGS_IRQ_MASK 0x3 + uint8_t flags; +}; + +#define TYPE_XICS_FABRIC "xics-fabric" +#define XICS_FABRIC(obj) \ + INTERFACE_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC) +typedef struct XICSFabricClass XICSFabricClass; +DECLARE_CLASS_CHECKERS(XICSFabricClass, XICS_FABRIC, + TYPE_XICS_FABRIC) + +struct XICSFabricClass { + InterfaceClass parent; + ICSState *(*ics_get)(XICSFabric *xi, int irq); + void (*ics_resend)(XICSFabric *xi); + ICPState *(*icp_get)(XICSFabric *xi, int server); +}; + +ICPState *xics_icp_get(XICSFabric *xi, int server); + +/* Internal XICS interfaces */ +void icp_set_cppr(ICPState *icp, uint8_t cppr); +void icp_set_mfrr(ICPState *icp, uint8_t mfrr); +uint32_t icp_accept(ICPState *ss); +uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr); +void icp_eoi(ICPState *icp, uint32_t xirr); +void icp_irq(ICSState *ics, int server, int nr, uint8_t priority); +void icp_reset(ICPState *icp); + +void ics_write_xive(ICSState *ics, int nr, int server, + uint8_t priority, uint8_t saved_priority); +void ics_set_irq(void *opaque, int srcno, int val); + +static inline bool ics_irq_free(ICSState *ics, uint32_t srcno) +{ + return !(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK); +} + +void ics_set_irq_type(ICSState *ics, int srcno, bool lsi); +void icp_pic_print_info(ICPState *icp, Monitor *mon); +void ics_pic_print_info(ICSState *ics, Monitor *mon); + +void ics_resend(ICSState *ics); +void icp_resend(ICPState *ss); + +Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, + Error **errp); +void icp_destroy(ICPState *icp); + +/* KVM */ +void icp_get_kvm_state(ICPState *icp); +int icp_set_kvm_state(ICPState *icp, Error **errp); +void icp_synchronize_state(ICPState *icp); +void icp_kvm_realize(DeviceState *dev, Error **errp); + +void ics_get_kvm_state(ICSState *ics); +int ics_set_kvm_state_one(ICSState *ics, int srcno, Error **errp); +int ics_set_kvm_state(ICSState *ics, Error **errp); +void ics_synchronize_state(ICSState *ics); +void ics_kvm_set_irq(ICSState *ics, int srcno, int val); + +#endif /* XICS_H */ diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h new file mode 100644 index 000000000..0b8182e40 --- /dev/null +++ b/include/hw/ppc/xics_spapr.h @@ -0,0 +1,43 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics + * + * Copyright (c) 2010, 2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XICS_SPAPR_H +#define XICS_SPAPR_H + +#include "hw/ppc/spapr.h" +#include "qom/object.h" + +#define TYPE_ICS_SPAPR "ics-spapr" +/* This is reusing the ICSState typedef from TYPE_ICS */ +DECLARE_INSTANCE_CHECKER(ICSState, ICS_SPAPR, + TYPE_ICS_SPAPR) + +int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers, + Error **errp); +void xics_kvm_disconnect(SpaprInterruptController *intc); +bool xics_kvm_has_broken_disconnect(SpaprMachineState *spapr); + +#endif /* XICS_SPAPR_H */ diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h new file mode 100644 index 000000000..445eccfe6 --- /dev/null +++ b/include/hw/ppc/xive.h @@ -0,0 +1,488 @@ +/* + * QEMU PowerPC XIVE interrupt controller model + * + * + * The POWER9 processor comes with a new interrupt controller, called + * XIVE as "eXternal Interrupt Virtualization Engine". + * + * = Overall architecture + * + * + * XIVE Interrupt Controller + * +------------------------------------+ IPIs + * | +---------+ +---------+ +--------+ | +-------+ + * | |VC | |CQ | |PC |----> | CORES | + * | | esb | | | | |----> | | + * | | eas | | Bridge | | tctx |----> | | + * | |SC end | | | | nvt | | | | + * +------+ | +---------+ +----+----+ +--------+ | +-+-+-+-+ + * | RAM | +------------------|-----------------+ | | | + * | | | | | | + * | | | | | | + * | | +--------------------v------------------------v-v-v--+ other + * | <--+ Power Bus +--> chips + * | esb | +---------+-----------------------+------------------+ + * | eas | | | + * | end | +--|------+ | + * | nvt | +----+----+ | +----+----+ + * +------+ |SC | | |SC | + * | | | | | + * | PQ-bits | | | PQ-bits | + * | local |-+ | in VC | + * +---------+ +---------+ + * PCIe NX,NPU,CAPI + * + * SC: Source Controller (aka. IVSE) + * VC: Virtualization Controller (aka. IVRE) + * PC: Presentation Controller (aka. IVPE) + * CQ: Common Queue (Bridge) + * + * PQ-bits: 2 bits source state machine (P:pending Q:queued) + * esb: Event State Buffer (Array of PQ bits in an IVSE) + * eas: Event Assignment Structure + * end: Event Notification Descriptor + * nvt: Notification Virtual Target + * tctx: Thread interrupt Context + * + * + * The XIVE IC is composed of three sub-engines : + * + * - Interrupt Virtualization Source Engine (IVSE), or Source + * Controller (SC). These are found in PCI PHBs, in the PSI host + * bridge controller, but also inside the main controller for the + * core IPIs and other sub-chips (NX, CAP, NPU) of the + * chip/processor. They are configured to feed the IVRE with events. + * + * - Interrupt Virtualization Routing Engine (IVRE) or Virtualization + * Controller (VC). Its job is to match an event source with an + * Event Notification Descriptor (END). + * + * - Interrupt Virtualization Presentation Engine (IVPE) or + * Presentation Controller (PC). It maintains the interrupt context + * state of each thread and handles the delivery of the external + * exception to the thread. + * + * In XIVE 1.0, the sub-engines used to be referred as: + * + * SC Source Controller + * VC Virtualization Controller + * PC Presentation Controller + * CQ Common Queue (PowerBUS Bridge) + * + * + * = XIVE internal tables + * + * Each of the sub-engines uses a set of tables to redirect exceptions + * from event sources to CPU threads. + * + * +-------+ + * User or OS | EQ | + * or +------>|entries| + * Hypervisor | | .. | + * Memory | +-------+ + * | ^ + * | | + * +-------------------------------------------------+ + * | | + * Hypervisor +------+ +---+--+ +---+--+ +------+ + * Memory | ESB | | EAT | | ENDT | | NVTT | + * (skiboot) +----+-+ +----+-+ +----+-+ +------+ + * ^ | ^ | ^ | ^ + * | | | | | | | + * +-------------------------------------------------+ + * | | | | | | | + * | | | | | | | + * +----|--|--------|--|--------|--|-+ +-|-----+ +------+ + * | | | | | | | | | | tctx| |Thread| + * IPI or --> | + v + v + v |---| + .. |-----> | + * HW events --> | | | | | | + * IVSE | IVRE | | IVPE | +------+ + * +---------------------------------+ +-------+ + * + * + * + * The IVSE have a 2-bits state machine, P for pending and Q for queued, + * for each source that allows events to be triggered. They are stored in + * an Event State Buffer (ESB) array and can be controlled by MMIOs. + * + * If the event is let through, the IVRE looks up in the Event Assignment + * Structure (EAS) table for an Event Notification Descriptor (END) + * configured for the source. Each Event Notification Descriptor defines + * a notification path to a CPU and an in-memory Event Queue, in which + * will be enqueued an EQ data for the OS to pull. + * + * The IVPE determines if a Notification Virtual Target (NVT) can + * handle the event by scanning the thread contexts of the VCPUs + * dispatched on the processor HW threads. It maintains the state of + * the thread interrupt context (TCTX) of each thread in a NVT table. + * + * = Acronyms + * + * Description In XIVE 1.0, used to be referred as + * + * EAS Event Assignment Structure IVE Interrupt Virt. Entry + * EAT Event Assignment Table IVT Interrupt Virt. Table + * ENDT Event Notif. Descriptor Table EQDT Event Queue Desc. Table + * EQ Event Queue same + * ESB Event State Buffer SBE State Bit Entry + * NVT Notif. Virtual Target VPD Virtual Processor Desc. + * NVTT Notif. Virtual Target Table VPDT Virtual Processor Desc. Table + * TCTX Thread interrupt Context + * + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + * + */ + +#ifndef PPC_XIVE_H +#define PPC_XIVE_H + +#include "sysemu/kvm.h" +#include "hw/sysbus.h" +#include "hw/ppc/xive_regs.h" +#include "qom/object.h" + +/* + * XIVE Notifier (Interface between Source and Router) + */ + +typedef struct XiveNotifier XiveNotifier; + +#define TYPE_XIVE_NOTIFIER "xive-notifier" +#define XIVE_NOTIFIER(obj) \ + INTERFACE_CHECK(XiveNotifier, (obj), TYPE_XIVE_NOTIFIER) +typedef struct XiveNotifierClass XiveNotifierClass; +DECLARE_CLASS_CHECKERS(XiveNotifierClass, XIVE_NOTIFIER, + TYPE_XIVE_NOTIFIER) + +struct XiveNotifierClass { + InterfaceClass parent; + void (*notify)(XiveNotifier *xn, uint32_t lisn); +}; + +/* + * XIVE Interrupt Source + */ + +#define TYPE_XIVE_SOURCE "xive-source" +OBJECT_DECLARE_SIMPLE_TYPE(XiveSource, XIVE_SOURCE) + +/* + * XIVE Interrupt Source characteristics, which define how the ESB are + * controlled. + */ +#define XIVE_SRC_H_INT_ESB 0x1 /* ESB managed with hcall H_INT_ESB */ +#define XIVE_SRC_STORE_EOI 0x2 /* Store EOI supported */ + +struct XiveSource { + DeviceState parent; + + /* IRQs */ + uint32_t nr_irqs; + unsigned long *lsi_map; + + /* PQ bits and LSI assertion bit */ + uint8_t *status; + + /* ESB memory region */ + uint64_t esb_flags; + uint32_t esb_shift; + MemoryRegion esb_mmio; + MemoryRegion esb_mmio_emulated; + + /* KVM support */ + void *esb_mmap; + MemoryRegion esb_mmio_kvm; + + XiveNotifier *xive; +}; + +/* + * ESB MMIO setting. Can be one page, for both source triggering and + * source management, or two different pages. See below for magic + * values. + */ +#define XIVE_ESB_4K 12 /* PSI HB only */ +#define XIVE_ESB_4K_2PAGE 13 +#define XIVE_ESB_64K 16 +#define XIVE_ESB_64K_2PAGE 17 + +static inline bool xive_source_esb_has_2page(XiveSource *xsrc) +{ + return xsrc->esb_shift == XIVE_ESB_64K_2PAGE || + xsrc->esb_shift == XIVE_ESB_4K_2PAGE; +} + +static inline size_t xive_source_esb_len(XiveSource *xsrc) +{ + return (1ull << xsrc->esb_shift) * xsrc->nr_irqs; +} + +/* The trigger page is always the first/even page */ +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + return (1ull << xsrc->esb_shift) * srcno; +} + +/* In a two pages ESB MMIO setting, the odd page is for management */ +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno) +{ + hwaddr addr = xive_source_esb_page(xsrc, srcno); + + if (xive_source_esb_has_2page(xsrc)) { + addr += (1 << (xsrc->esb_shift - 1)); + } + + return addr; +} + +/* + * Each interrupt source has a 2-bit state machine which can be + * controlled by MMIO. P indicates that an interrupt is pending (has + * been sent to a queue and is waiting for an EOI). Q indicates that + * the interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee that a + * given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + */ +#define XIVE_STATUS_ASSERTED 0x4 /* Extra bit for LSI */ +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + +#define XIVE_ESB_RESET 0x0 +#define XIVE_ESB_PENDING XIVE_ESB_VAL_P +#define XIVE_ESB_QUEUED (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q) +#define XIVE_ESB_OFF XIVE_ESB_VAL_Q + +/* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * The following offsets into the ESB MMIO allow to read or manipulate + * the PQ bits. They must be used with an 8-byte load instruction. + * They all return the previous state of the interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a store and + * some ESBs support doing a trigger via a separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno); +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq); + +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, + Monitor *mon); + +static inline bool xive_source_irq_is_lsi(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + return test_bit(srcno, xsrc->lsi_map); +} + +static inline void xive_source_irq_set_lsi(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + bitmap_set(xsrc->lsi_map, srcno, 1); +} + +void xive_source_set_irq(void *opaque, int srcno, int val); + +/* + * XIVE Thread interrupt Management (TM) context + */ + +#define TYPE_XIVE_TCTX "xive-tctx" +OBJECT_DECLARE_SIMPLE_TYPE(XiveTCTX, XIVE_TCTX) + +/* + * XIVE Thread interrupt Management register rings : + * + * QW-0 User event-based exception state + * QW-1 O/S OS context for priority management, interrupt acks + * QW-2 Pool hypervisor pool context for virtual processors dispatched + * QW-3 Physical physical thread context and security context + */ +#define XIVE_TM_RING_COUNT 4 +#define XIVE_TM_RING_SIZE 0x10 + +typedef struct XivePresenter XivePresenter; + +struct XiveTCTX { + DeviceState parent_obj; + + CPUState *cs; + qemu_irq hv_output; + qemu_irq os_output; + + uint8_t regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE]; + + XivePresenter *xptr; +}; + +/* + * XIVE Router + */ +typedef struct XiveFabric XiveFabric; + +struct XiveRouter { + SysBusDevice parent; + + XiveFabric *xfb; +}; + +#define TYPE_XIVE_ROUTER "xive-router" +OBJECT_DECLARE_TYPE(XiveRouter, XiveRouterClass, + XIVE_ROUTER) + +struct XiveRouterClass { + SysBusDeviceClass parent; + + /* XIVE table accessors */ + int (*get_eas)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, + XiveEAS *eas); + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); + int (*write_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end, uint8_t word_number); + int (*get_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt); + int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt, uint8_t word_number); + uint8_t (*get_block_id)(XiveRouter *xrtr); +}; + +int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, + XiveEAS *eas); +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); +int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end, uint8_t word_number); +int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt); +int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt, uint8_t word_number); +void xive_router_notify(XiveNotifier *xn, uint32_t lisn); + +/* + * XIVE Presenter + */ + +typedef struct XiveTCTXMatch { + XiveTCTX *tctx; + uint8_t ring; +} XiveTCTXMatch; + +#define TYPE_XIVE_PRESENTER "xive-presenter" +#define XIVE_PRESENTER(obj) \ + INTERFACE_CHECK(XivePresenter, (obj), TYPE_XIVE_PRESENTER) +typedef struct XivePresenterClass XivePresenterClass; +DECLARE_CLASS_CHECKERS(XivePresenterClass, XIVE_PRESENTER, + TYPE_XIVE_PRESENTER) + +struct XivePresenterClass { + InterfaceClass parent; + int (*match_nvt)(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match); + bool (*in_kernel)(const XivePresenter *xptr); +}; + +int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, + uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint32_t logic_serv); + +/* + * XIVE Fabric (Interface between Interrupt Controller and Machine) + */ + +#define TYPE_XIVE_FABRIC "xive-fabric" +#define XIVE_FABRIC(obj) \ + INTERFACE_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC) +typedef struct XiveFabricClass XiveFabricClass; +DECLARE_CLASS_CHECKERS(XiveFabricClass, XIVE_FABRIC, + TYPE_XIVE_FABRIC) + +struct XiveFabricClass { + InterfaceClass parent; + int (*match_nvt)(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match); +}; + +/* + * XIVE END ESBs + */ + +#define TYPE_XIVE_END_SOURCE "xive-end-source" +OBJECT_DECLARE_SIMPLE_TYPE(XiveENDSource, XIVE_END_SOURCE) + +struct XiveENDSource { + DeviceState parent; + + uint32_t nr_ends; + + /* ESB memory region */ + uint32_t esb_shift; + MemoryRegion esb_mmio; + + XiveRouter *xrtr; +}; + +/* + * For legacy compatibility, the exceptions define up to 256 different + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] + * and the least favored level 0xFF. + */ +#define XIVE_PRIORITY_MAX 7 + +/* + * XIVE Thread Interrupt Management Aera (TIMA) + * + * This region gives access to the registers of the thread interrupt + * management context. It is four page wide, each page providing a + * different view of the registers. The page with the lower offset is + * the most privileged and gives access to the entire context. + */ +#define XIVE_TM_HW_PAGE 0x0 +#define XIVE_TM_HV_PAGE 0x1 +#define XIVE_TM_OS_PAGE 0x2 +#define XIVE_TM_USER_PAGE 0x3 + +void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, + uint64_t value, unsigned size); +uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, + unsigned size); + +void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon); +Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp); +void xive_tctx_reset(XiveTCTX *tctx); +void xive_tctx_destroy(XiveTCTX *tctx); +void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb); + +/* + * KVM XIVE device helpers + */ + +int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); +int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); + +#endif /* PPC_XIVE_H */ diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h new file mode 100644 index 000000000..787969282 --- /dev/null +++ b/include/hw/ppc/xive_regs.h @@ -0,0 +1,299 @@ +/* + * QEMU PowerPC XIVE internal structure definitions + * + * + * The XIVE structures are accessed by the HW and their format is + * architected to be big-endian. Some macros are provided to ease + * access to the different fields. + * + * + * Copyright (c) 2016-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_XIVE_REGS_H +#define PPC_XIVE_REGS_H + +#include "qemu/bswap.h" +#include "qemu/host-utils.h" + +/* + * Interrupt source number encoding on PowerBUS + */ +/* + * Trigger data definition + * + * The trigger definition is used for triggers both for HW source + * interrupts (PHB, PSI), as well as for rerouting interrupts between + * Interrupt Controller. + * + * HW source controllers set bit0 of word0 to ‘0’ as they provide EAS + * information (EAS block + EAS index) in the 8 byte data and not END + * information, which is use for rerouting interrupts. + * + * bit1 of word0 to ‘1’ signals that the state bit check has been + * performed. + */ +#define XIVE_TRIGGER_END PPC_BIT(0) +#define XIVE_TRIGGER_PQ PPC_BIT(1) + +/* + * QEMU macros to manipulate the trigger payload in native endian + */ +#define XIVE_EAS_BLOCK(n) (((n) >> 28) & 0xf) +#define XIVE_EAS_INDEX(n) ((n) & 0x0fffffff) +#define XIVE_EAS(blk, idx) ((uint32_t)(blk) << 28 | (idx)) + +#define TM_SHIFT 16 + +/* TM register offsets */ +#define TM_QW0_USER 0x000 /* All rings */ +#define TM_QW1_OS 0x010 /* Ring 0..2 */ +#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */ +#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */ + +/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */ +#define TM_NSR 0x0 /* + + - + */ +#define TM_CPPR 0x1 /* - + - + */ +#define TM_IPB 0x2 /* - + + + */ +#define TM_LSMFB 0x3 /* - + + + */ +#define TM_ACK_CNT 0x4 /* - + - - */ +#define TM_INC 0x5 /* - + - + */ +#define TM_AGE 0x6 /* - + - + */ +#define TM_PIPR 0x7 /* - + - + */ + +#define TM_WORD0 0x0 +#define TM_WORD1 0x4 + +/* + * QW word 2 contains the valid bit at the top and other fields + * depending on the QW. + */ +#define TM_WORD2 0x8 +#define TM_QW0W2_VU PPC_BIT32(0) +#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1, 31) /* XX 2,31 ? */ +#define TM_QW1W2_VO PPC_BIT32(0) +#define TM_QW1W2_OS_CAM PPC_BITMASK32(8, 31) +#define TM_QW2W2_VP PPC_BIT32(0) +#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8, 31) +#define TM_QW3W2_VT PPC_BIT32(0) +#define TM_QW3W2_LP PPC_BIT32(6) +#define TM_QW3W2_LE PPC_BIT32(7) +#define TM_QW3W2_T PPC_BIT32(31) + +/* + * In addition to normal loads to "peek" and writes (only when invalid) + * using 4 and 8 bytes accesses, the above registers support these + * "special" byte operations: + * + * - Byte load from QW0[NSR] - User level NSR (EBB) + * - Byte store to QW0[NSR] - User level NSR (EBB) + * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access + * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0 + * otherwise VT||0000000 + * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present) + * + * Then we have all these "special" CI ops at these offset that trigger + * all sorts of side effects: + */ +#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/ +#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */ +#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */ +#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user + * context */ +#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */ +#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS + * context to reg */ +#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool + * context to reg*/ +#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */ +#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd + * line */ +#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */ +#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even + * line */ +#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */ +/* XXX more... */ + +/* NSR fields for the various QW ack types */ +#define TM_QW0_NSR_EB PPC_BIT8(0) +#define TM_QW1_NSR_EO PPC_BIT8(0) +#define TM_QW3_NSR_HE PPC_BITMASK8(0, 1) +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define TM_QW3_NSR_I PPC_BIT8(2) +#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3, 7) + +/* + * EAS (Event Assignment Structure) + * + * One per interrupt source. Targets an interrupt to a given Event + * Notification Descriptor (END) and provides the corresponding + * logical interrupt number (END data) + */ +typedef struct XiveEAS { + /* + * Use a single 64-bit definition to make it easier to perform + * atomic updates + */ + uint64_t w; +#define EAS_VALID PPC_BIT(0) +#define EAS_END_BLOCK PPC_BITMASK(4, 7) /* Destination END block# */ +#define EAS_END_INDEX PPC_BITMASK(8, 31) /* Destination END index */ +#define EAS_MASKED PPC_BIT(32) /* Masked */ +#define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ +} XiveEAS; + +#define xive_eas_is_valid(eas) (be64_to_cpu((eas)->w) & EAS_VALID) +#define xive_eas_is_masked(eas) (be64_to_cpu((eas)->w) & EAS_MASKED) + +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); + +static inline uint64_t xive_get_field64(uint64_t mask, uint64_t word) +{ + return (be64_to_cpu(word) & mask) >> ctz64(mask); +} + +static inline uint64_t xive_set_field64(uint64_t mask, uint64_t word, + uint64_t value) +{ + uint64_t tmp = + (be64_to_cpu(word) & ~mask) | ((value << ctz64(mask)) & mask); + return cpu_to_be64(tmp); +} + +static inline uint32_t xive_get_field32(uint32_t mask, uint32_t word) +{ + return (be32_to_cpu(word) & mask) >> ctz32(mask); +} + +static inline uint32_t xive_set_field32(uint32_t mask, uint32_t word, + uint32_t value) +{ + uint32_t tmp = + (be32_to_cpu(word) & ~mask) | ((value << ctz32(mask)) & mask); + return cpu_to_be32(tmp); +} + +/* Event Notification Descriptor (END) */ +typedef struct XiveEND { + uint32_t w0; +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ +#define END_W0_QSIZE PPC_BITMASK32(12, 15) +#define END_W0_SW0 PPC_BIT32(16) +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ +#define END_QSIZE_4K 0 +#define END_QSIZE_64K 4 +#define END_W0_HWDEP PPC_BITMASK32(24, 31) + uint32_t w1; +#define END_W1_ESn PPC_BITMASK32(0, 1) +#define END_W1_ESn_P PPC_BIT32(0) +#define END_W1_ESn_Q PPC_BIT32(1) +#define END_W1_ESe PPC_BITMASK32(2, 3) +#define END_W1_ESe_P PPC_BIT32(2) +#define END_W1_ESe_Q PPC_BIT32(3) +#define END_W1_GENERATION PPC_BIT32(9) +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) + uint32_t w2; +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) + uint32_t w3; +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) + uint32_t w4; +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) + uint32_t w5; +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) + uint32_t w6; +#define END_W6_FORMAT_BIT PPC_BIT32(8) +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) + uint32_t w7; +#define END_W7_F0_IGNORE PPC_BIT32(0) +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) +#define END_W7_F1_WAKEZ PPC_BIT32(0) +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) +} XiveEND; + +#define xive_end_is_valid(end) (be32_to_cpu((end)->w0) & END_W0_VALID) +#define xive_end_is_enqueue(end) (be32_to_cpu((end)->w0) & END_W0_ENQUEUE) +#define xive_end_is_notify(end) (be32_to_cpu((end)->w0) & END_W0_UCOND_NOTIFY) +#define xive_end_is_backlog(end) (be32_to_cpu((end)->w0) & END_W0_BACKLOG) +#define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL) +#define xive_end_is_uncond_escalation(end) \ + (be32_to_cpu((end)->w0) & END_W0_UNCOND_ESCALATE) +#define xive_end_is_silent_escalation(end) \ + (be32_to_cpu((end)->w0) & END_W0_SILENT_ESCALATE) + +static inline uint64_t xive_end_qaddr(XiveEND *end) +{ + return ((uint64_t) be32_to_cpu(end->w2) & 0x0fffffff) << 32 | + be32_to_cpu(end->w3); +} + +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); +void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon); +void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); + +/* Notification Virtual Target (NVT) */ +typedef struct XiveNVT { + uint32_t w0; +#define NVT_W0_VALID PPC_BIT32(0) + uint32_t w1; +#define NVT_W1_EQ_BLOCK PPC_BITMASK32(0, 3) +#define NVT_W1_EQ_INDEX PPC_BITMASK32(4, 31) + uint32_t w2; + uint32_t w3; + uint32_t w4; +#define NVT_W4_IPB PPC_BITMASK32(16, 23) + uint32_t w5; + uint32_t w6; + uint32_t w7; + uint32_t w8; +#define NVT_W8_GRP_VALID PPC_BIT32(0) + uint32_t w9; + uint32_t wa; + uint32_t wb; + uint32_t wc; + uint32_t wd; + uint32_t we; + uint32_t wf; +} XiveNVT; + +#define xive_nvt_is_valid(nvt) (be32_to_cpu((nvt)->w0) & NVT_W0_VALID) + +/* + * The VP number space in a block is defined by the END_W6_NVT_INDEX + * field of the XIVE END + */ +#define XIVE_NVT_SHIFT 19 +#define XIVE_NVT_COUNT (1 << XIVE_NVT_SHIFT) + +static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx) +{ + return (nvt_blk << XIVE_NVT_SHIFT) | nvt_idx; +} + +static inline uint32_t xive_nvt_idx(uint32_t cam_line) +{ + return cam_line & ((1 << XIVE_NVT_SHIFT) - 1); +} + +static inline uint32_t xive_nvt_blk(uint32_t cam_line) +{ + return (cam_line >> XIVE_NVT_SHIFT) & 0xf; +} + +#endif /* PPC_XIVE_REGS_H */ diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h new file mode 100644 index 000000000..412763fff --- /dev/null +++ b/include/hw/ptimer.h @@ -0,0 +1,279 @@ +/* + * General purpose implementation of a simple periodic countdown timer. + * + * Copyright (c) 2007 CodeSourcery. + * + * This code is licensed under the GNU LGPL. + */ +#ifndef PTIMER_H +#define PTIMER_H + +#include "qemu/timer.h" + +/* + * The ptimer API implements a simple periodic countdown timer. + * The countdown timer has a value (which can be read and written via + * ptimer_get_count() and ptimer_set_count()). When it is enabled + * using ptimer_run(), the value will count downwards at the frequency + * which has been configured using ptimer_set_period() or ptimer_set_freq(). + * When it reaches zero it will trigger a callback function, and + * can be set to either reload itself from a specified limit value + * and keep counting down, or to stop (as a one-shot timer). + * + * A transaction-based API is used for modifying ptimer state: all calls + * to functions which modify ptimer state must be between matched calls to + * ptimer_transaction_begin() and ptimer_transaction_commit(). + * When ptimer_transaction_commit() is called it will evaluate the state + * of the timer after all the changes in the transaction, and call the + * callback if necessary. (See the ptimer_init() documentation for the full + * list of state-modifying functions and detailed semantics of the callback.) + * + * Forgetting to set the period/frequency (or setting it to zero) is a + * bug in the QEMU device and will cause warning messages to be printed + * to stderr when the guest attempts to enable the timer. + */ + +/* The default ptimer policy retains backward compatibility with the legacy + * timers. Custom policies are adjusting the default one. Consider providing + * a correct policy for your timer. + * + * The rough edges of the default policy: + * - Starting to run with a period = 0 emits error message and stops the + * timer without a trigger. + * + * - Setting period to 0 of the running timer emits error message and + * stops the timer without a trigger. + * + * - Starting to run with counter = 0 or setting it to "0" while timer + * is running causes a trigger and reloads counter with a limit value. + * If limit = 0, ptimer emits error message and stops the timer. + * + * - Counter value of the running timer is one less than the actual value. + * + * - Changing period/frequency of the running timer loses time elapsed + * since the last period, effectively restarting the timer with a + * counter = counter value at the moment of change (.i.e. one less). + */ +#define PTIMER_POLICY_DEFAULT 0 + +/* Periodic timer counter stays with "0" for a one period before wrapping + * around. */ +#define PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD (1 << 0) + +/* Running periodic timer that has counter = limit = 0 would continuously + * re-trigger every period. */ +#define PTIMER_POLICY_CONTINUOUS_TRIGGER (1 << 1) + +/* Starting to run with/setting counter to "0" won't trigger immediately, + * but after a one period for both oneshot and periodic modes. */ +#define PTIMER_POLICY_NO_IMMEDIATE_TRIGGER (1 << 2) + +/* Starting to run with/setting counter to "0" won't re-load counter + * immediately, but after a one period. */ +#define PTIMER_POLICY_NO_IMMEDIATE_RELOAD (1 << 3) + +/* Make counter value of the running timer represent the actual value and + * not the one less. */ +#define PTIMER_POLICY_NO_COUNTER_ROUND_DOWN (1 << 4) + +/* + * Starting to run with a zero counter, or setting the counter to "0" via + * ptimer_set_count() or ptimer_set_limit() will not trigger the timer + * (though it will cause a reload). Only a counter decrement to "0" + * will cause a trigger. Not compatible with NO_IMMEDIATE_TRIGGER; + * ptimer_init() will assert() that you don't set both. + */ +#define PTIMER_POLICY_TRIGGER_ONLY_ON_DECREMENT (1 << 5) + +/* ptimer.c */ +typedef struct ptimer_state ptimer_state; +typedef void (*ptimer_cb)(void *opaque); + +/** + * ptimer_init - Allocate and return a new ptimer + * @callback: function to call on ptimer expiry + * @callback_opaque: opaque pointer passed to @callback + * @policy: PTIMER_POLICY_* bits specifying behaviour + * + * The ptimer returned must be freed using ptimer_free(). + * + * If a ptimer is created using this API then will use the + * transaction-based API for modifying ptimer state: all calls + * to functions which modify ptimer state: + * - ptimer_set_period() + * - ptimer_set_freq() + * - ptimer_set_limit() + * - ptimer_set_count() + * - ptimer_run() + * - ptimer_stop() + * must be between matched calls to ptimer_transaction_begin() + * and ptimer_transaction_commit(). When ptimer_transaction_commit() + * is called it will evaluate the state of the timer after all the + * changes in the transaction, and call the callback if necessary. + * + * The callback function is always called from within a transaction + * begin/commit block, so the callback should not call the + * ptimer_transaction_begin() function itself. If the callback changes + * the ptimer state such that another ptimer expiry is triggered, then + * the callback will be called a second time after the first call returns. + */ +ptimer_state *ptimer_init(ptimer_cb callback, + void *callback_opaque, + uint8_t policy_mask); + +/** + * ptimer_free - Free a ptimer + * @s: timer to free + * + * Free a ptimer created using ptimer_init(). + */ +void ptimer_free(ptimer_state *s); + +/** + * ptimer_transaction_begin() - Start a ptimer modification transaction + * + * This function must be called before making any calls to functions + * which modify the ptimer's state (see the ptimer_init() documentation + * for a list of these), and must always have a matched call to + * ptimer_transaction_commit(). + * It is an error to call this function for a BH-based ptimer; + * attempting to do this will trigger an assert. + */ +void ptimer_transaction_begin(ptimer_state *s); + +/** + * ptimer_transaction_commit() - Commit a ptimer modification transaction + * + * This function must be called after calls to functions which modify + * the ptimer's state, and completes the update of the ptimer. If the + * ptimer state now means that we should trigger the timer expiry + * callback, it will be called directly. + */ +void ptimer_transaction_commit(ptimer_state *s); + +/** + * ptimer_set_period - Set counter increment interval in nanoseconds + * @s: ptimer to configure + * @period: period of the counter in nanoseconds + * + * Note that if your counter behaviour is specified as having a + * particular frequency rather than a period then ptimer_set_freq() + * may be more appropriate. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_set_period(ptimer_state *s, int64_t period); + +/** + * ptimer_set_freq - Set counter frequency in Hz + * @s: ptimer to configure + * @freq: counter frequency in Hz + * + * This does the same thing as ptimer_set_period(), so you only + * need to call one of them. If the counter behaviour is specified + * as setting the frequency then this function is more appropriate, + * because it allows specifying an effective period which is + * precise to fractions of a nanosecond, avoiding rounding errors. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_set_freq(ptimer_state *s, uint32_t freq); + +/** + * ptimer_get_limit - Get the configured limit of the ptimer + * @s: ptimer to query + * + * This function returns the current limit (reload) value + * of the down-counter; that is, the value which it will be + * reset to when it hits zero. + * + * Generally timer devices using ptimers should be able to keep + * their reload register state inside the ptimer using the get + * and set limit functions rather than needing to also track it + * in their own state structure. + */ +uint64_t ptimer_get_limit(ptimer_state *s); + +/** + * ptimer_set_limit - Set the limit of the ptimer + * @s: ptimer + * @limit: initial countdown value + * @reload: if nonzero, then reset the counter to the new limit + * + * Set the limit value of the down-counter. The @reload flag can + * be used to emulate the behaviour of timers which immediately + * reload the counter when their reload register is written to. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload); + +/** + * ptimer_get_count - Get the current value of the ptimer + * @s: ptimer + * + * Return the current value of the down-counter. This will + * return the correct value whether the counter is enabled or + * disabled. + */ +uint64_t ptimer_get_count(ptimer_state *s); + +/** + * ptimer_set_count - Set the current value of the ptimer + * @s: ptimer + * @count: count value to set + * + * Set the value of the down-counter. If the counter is currently + * enabled this will arrange for a timer callback at the appropriate + * point in the future. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_set_count(ptimer_state *s, uint64_t count); + +/** + * ptimer_run - Start a ptimer counting + * @s: ptimer + * @oneshot: non-zero if this timer should only count down once + * + * Start a ptimer counting down; when it reaches zero the callback function + * passed to ptimer_init() will be invoked. + * If the @oneshot argument is zero, + * the counter value will then be reloaded from the limit and it will + * start counting down again. If @oneshot is non-zero, then the counter + * will disable itself when it reaches zero. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_run(ptimer_state *s, int oneshot); + +/** + * ptimer_stop - Stop a ptimer counting + * @s: ptimer + * + * Pause a timer (the count stays at its current value until ptimer_run() + * is called to start it counting again). + * + * Note that this can cause it to "lose" time, even if it is immediately + * restarted. + * + * This function will assert if it is called outside a + * ptimer_transaction_begin/commit block. + */ +void ptimer_stop(ptimer_state *s); + +extern const VMStateDescription vmstate_ptimer; + +#define VMSTATE_PTIMER(_field, _state) \ + VMSTATE_STRUCT_POINTER_V(_field, _state, 1, vmstate_ptimer, ptimer_state) + +#define VMSTATE_PTIMER_ARRAY(_f, _s, _n) \ + VMSTATE_ARRAY_OF_POINTER_TO_STRUCT(_f, _s, _n, 0, \ + vmstate_ptimer, ptimer_state) + +#endif diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h new file mode 100644 index 000000000..64ca4d266 --- /dev/null +++ b/include/hw/qdev-clock.h @@ -0,0 +1,157 @@ +/* + * Device's clock input and output + * + * Copyright GreenSocs 2016-2020 + * + * Authors: + * Frederic Konrad + * Damien Hedde + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QDEV_CLOCK_H +#define QDEV_CLOCK_H + +#include "hw/clock.h" + +/** + * qdev_init_clock_in: + * @dev: the device to add an input clock to + * @name: the name of the clock (can't be NULL). + * @callback: optional callback to be called on update or NULL. + * @opaque: argument for the callback + * @returns: a pointer to the newly added clock + * + * Add an input clock to device @dev as a clock named @name. + * This adds a child<> property. + * The callback will be called with @opaque as opaque parameter. + */ +Clock *qdev_init_clock_in(DeviceState *dev, const char *name, + ClockCallback *callback, void *opaque); + +/** + * qdev_init_clock_out: + * @dev: the device to add an output clock to + * @name: the name of the clock (can't be NULL). + * @returns: a pointer to the newly added clock + * + * Add an output clock to device @dev as a clock named @name. + * This adds a child<> property. + */ +Clock *qdev_init_clock_out(DeviceState *dev, const char *name); + +/** + * qdev_get_clock_in: + * @dev: the device which has the clock + * @name: the name of the clock (can't be NULL). + * @returns: a pointer to the clock + * + * Get the input clock @name from @dev or NULL if does not exist. + */ +Clock *qdev_get_clock_in(DeviceState *dev, const char *name); + +/** + * qdev_get_clock_out: + * @dev: the device which has the clock + * @name: the name of the clock (can't be NULL). + * @returns: a pointer to the clock + * + * Get the output clock @name from @dev or NULL if does not exist. + */ +Clock *qdev_get_clock_out(DeviceState *dev, const char *name); + +/** + * qdev_connect_clock_in: + * @dev: a device + * @name: the name of an input clock in @dev + * @source: the source clock (an output clock of another device for example) + * + * Set the source clock of input clock @name of device @dev to @source. + * @source period update will be propagated to @name clock. + * + * Must be called before @dev is realized. + */ +void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source); + +/** + * qdev_alias_clock: + * @dev: the device which has the clock + * @name: the name of the clock in @dev (can't be NULL) + * @alias_dev: the device to add the clock + * @alias_name: the name of the clock in @container + * @returns: a pointer to the clock + * + * Add a clock @alias_name in @alias_dev which is an alias of the clock @name + * in @dev. The direction _in_ or _out_ will the same as the original. + * An alias clock must not be modified or used by @alias_dev and should + * typically be only only for device composition purpose. + */ +Clock *qdev_alias_clock(DeviceState *dev, const char *name, + DeviceState *alias_dev, const char *alias_name); + +/** + * qdev_finalize_clocklist: + * @dev: the device being finalized + * + * Clear the clocklist from @dev. Only used internally in qdev. + */ +void qdev_finalize_clocklist(DeviceState *dev); + +/** + * ClockPortInitElem: + * @name: name of the clock (can't be NULL) + * @output: indicates whether the clock is input or output + * @callback: for inputs, optional callback to be called on clock's update + * with device as opaque + * @offset: optional offset to store the ClockIn or ClockOut pointer in device + * state structure (0 means unused) + */ +struct ClockPortInitElem { + const char *name; + bool is_output; + ClockCallback *callback; + size_t offset; +}; + +#define clock_offset_value(devstate, field) \ + (offsetof(devstate, field) + \ + type_check(Clock *, typeof_field(devstate, field))) + +#define QDEV_CLOCK(out_not_in, devstate, field, cb) { \ + .name = (stringify(field)), \ + .is_output = out_not_in, \ + .callback = cb, \ + .offset = clock_offset_value(devstate, field), \ +} + +/** + * QDEV_CLOCK_(IN|OUT): + * @devstate: structure type. @dev argument of qdev_init_clocks below must be + * a pointer to that same type. + * @field: a field in @_devstate (must be Clock*) + * @callback: (for input only) callback (or NULL) to be called with the device + * state as argument + * + * The name of the clock will be derived from @field + */ +#define QDEV_CLOCK_IN(devstate, field, callback) \ + QDEV_CLOCK(false, devstate, field, callback) + +#define QDEV_CLOCK_OUT(devstate, field) \ + QDEV_CLOCK(true, devstate, field, NULL) + +#define QDEV_CLOCK_END { .name = NULL } + +typedef struct ClockPortInitElem ClockPortInitArray[]; + +/** + * qdev_init_clocks: + * @dev: the device to add clocks to + * @clocks: a QDEV_CLOCK_END-terminated array which contains the + * clocks information. + */ +void qdev_init_clocks(DeviceState *dev, const ClockPortInitArray clocks); + +#endif /* QDEV_CLOCK_H */ diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h new file mode 100644 index 000000000..5e737195b --- /dev/null +++ b/include/hw/qdev-core.h @@ -0,0 +1,857 @@ +#ifndef QDEV_CORE_H +#define QDEV_CORE_H + +#include "qemu/queue.h" +#include "qemu/bitmap.h" +#include "qemu/rcu.h" +#include "qemu/rcu_queue.h" +#include "qom/object.h" +#include "hw/hotplug.h" +#include "hw/resettable.h" + +enum { + DEV_NVECTORS_UNSPECIFIED = -1, +}; + +#define TYPE_DEVICE "device" +OBJECT_DECLARE_TYPE(DeviceState, DeviceClass, DEVICE) + +typedef enum DeviceCategory { + DEVICE_CATEGORY_BRIDGE, + DEVICE_CATEGORY_USB, + DEVICE_CATEGORY_STORAGE, + DEVICE_CATEGORY_NETWORK, + DEVICE_CATEGORY_INPUT, + DEVICE_CATEGORY_DISPLAY, + DEVICE_CATEGORY_SOUND, + DEVICE_CATEGORY_MISC, + DEVICE_CATEGORY_CPU, + DEVICE_CATEGORY_MAX +} DeviceCategory; + +typedef void (*DeviceRealize)(DeviceState *dev, Error **errp); +typedef void (*DeviceUnrealize)(DeviceState *dev); +typedef void (*DeviceReset)(DeviceState *dev); +typedef void (*BusRealize)(BusState *bus, Error **errp); +typedef void (*BusUnrealize)(BusState *bus); + +/** + * DeviceClass: + * @props: Properties accessing state fields. + * @realize: Callback function invoked when the #DeviceState:realized + * property is changed to %true. + * @unrealize: Callback function invoked when the #DeviceState:realized + * property is changed to %false. + * @hotpluggable: indicates if #DeviceClass is hotpluggable, available + * as readonly "hotpluggable" property of #DeviceState instance + * + * # Realization # + * Devices are constructed in two stages, + * 1) object instantiation via object_initialize() and + * 2) device realization via #DeviceState:realized property. + * The former may not fail (and must not abort or exit, since it is called + * during device introspection already), and the latter may return error + * information to the caller and must be re-entrant. + * Trivial field initializations should go into #TypeInfo.instance_init. + * Operations depending on @props static properties should go into @realize. + * After successful realization, setting static properties will fail. + * + * As an interim step, the #DeviceState:realized property can also be + * set with qdev_realize(). + * In the future, devices will propagate this state change to their children + * and along busses they expose. + * The point in time will be deferred to machine creation, so that values + * set in @realize will not be introspectable beforehand. Therefore devices + * must not create children during @realize; they should initialize them via + * object_initialize() in their own #TypeInfo.instance_init and forward the + * realization events appropriately. + * + * Any type may override the @realize and/or @unrealize callbacks but needs + * to call the parent type's implementation if keeping their functionality + * is desired. Refer to QOM documentation for further discussion and examples. + * + * + * + * Since TYPE_DEVICE doesn't implement @realize and @unrealize, types + * derived directly from it need not call their parent's @realize and + * @unrealize. + * For other types consult the documentation and implementation of the + * respective parent types. + * + * + * + * # Hiding a device # + * To hide a device, a DeviceListener function should_be_hidden() needs to + * be registered. + * It can be used to defer adding a device and therefore hide it from the + * guest. The handler registering to this DeviceListener can save the QOpts + * passed to it for re-using it later and must return that it wants the device + * to be/remain hidden or not. When the handler function decides the device + * shall not be hidden it will be added in qdev_device_add() and + * realized as any other device. Otherwise qdev_device_add() will return early + * without adding the device. The guest will not see a "hidden" device + * until it was marked don't hide and qdev_device_add called again. + * + */ +struct DeviceClass { + /*< private >*/ + ObjectClass parent_class; + /*< public >*/ + + DECLARE_BITMAP(categories, DEVICE_CATEGORY_MAX); + const char *fw_name; + const char *desc; + + /* + * The underscore at the end ensures a compile-time error if someone + * assigns to dc->props instead of using device_class_set_props. + */ + Property *props_; + + /* + * Can this device be instantiated with -device / device_add? + * All devices should support instantiation with device_add, and + * this flag should not exist. But we're not there, yet. Some + * devices fail to instantiate with cryptic error messages. + * Others instantiate, but don't work. Exposing users to such + * behavior would be cruel; clearing this flag will protect them. + * It should never be cleared without a comment explaining why it + * is cleared. + * TODO remove once we're there + */ + bool user_creatable; + bool hotpluggable; + + /* callbacks */ + /* + * Reset method here is deprecated and replaced by methods in the + * resettable class interface to implement a multi-phase reset. + * TODO: remove once every reset callback is unused + */ + DeviceReset reset; + DeviceRealize realize; + DeviceUnrealize unrealize; + + /* device state */ + const VMStateDescription *vmsd; + + /* Private to qdev / bus. */ + const char *bus_type; +}; + +typedef struct NamedGPIOList NamedGPIOList; + +struct NamedGPIOList { + char *name; + qemu_irq *in; + int num_in; + int num_out; + QLIST_ENTRY(NamedGPIOList) node; +}; + +typedef struct Clock Clock; +typedef struct NamedClockList NamedClockList; + +struct NamedClockList { + char *name; + Clock *clock; + bool output; + bool alias; + QLIST_ENTRY(NamedClockList) node; +}; + +/** + * DeviceState: + * @realized: Indicates whether the device has been fully constructed. + * When accessed outside big qemu lock, must be accessed with + * qatomic_load_acquire() + * @reset: ResettableState for the device; handled by Resettable interface. + * + * This structure should not be accessed directly. We declare it here + * so that it can be embedded in individual device state structures. + */ +struct DeviceState { + /*< private >*/ + Object parent_obj; + /*< public >*/ + + const char *id; + char *canonical_path; + bool realized; + bool pending_deleted_event; + QemuOpts *opts; + int hotplugged; + bool allow_unplug_during_migration; + BusState *parent_bus; + QLIST_HEAD(, NamedGPIOList) gpios; + QLIST_HEAD(, NamedClockList) clocks; + QLIST_HEAD(, BusState) child_bus; + int num_child_bus; + int instance_id_alias; + int alias_required_for_version; + ResettableState reset; +}; + +struct DeviceListener { + void (*realize)(DeviceListener *listener, DeviceState *dev); + void (*unrealize)(DeviceListener *listener, DeviceState *dev); + /* + * This callback is called upon init of the DeviceState and allows to + * inform qdev that a device should be hidden, depending on the device + * opts, for example, to hide a standby device. + */ + int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); + QTAILQ_ENTRY(DeviceListener) link; +}; + +#define TYPE_BUS "bus" +DECLARE_OBJ_CHECKERS(BusState, BusClass, + BUS, TYPE_BUS) + +struct BusClass { + ObjectClass parent_class; + + /* FIXME first arg should be BusState */ + void (*print_dev)(Monitor *mon, DeviceState *dev, int indent); + char *(*get_dev_path)(DeviceState *dev); + + /* + * This callback is used to create Open Firmware device path in accordance + * with OF spec http://forthworks.com/standards/of1275.pdf. Individual bus + * bindings can be found at http://playground.sun.com/1275/bindings/. + */ + char *(*get_fw_dev_path)(DeviceState *dev); + + void (*reset)(BusState *bus); + + /* + * Return whether the device can be added to @bus, + * based on the address that was set (via device properties) + * before realize. If not, on return @errp contains the + * human-readable error message. + */ + bool (*check_address)(BusState *bus, DeviceState *dev, Error **errp); + + BusRealize realize; + BusUnrealize unrealize; + + /* maximum devices allowed on the bus, 0: no limit. */ + int max_dev; + /* number of automatically allocated bus ids (e.g. ide.0) */ + int automatic_ids; +}; + +typedef struct BusChild { + struct rcu_head rcu; + DeviceState *child; + int index; + QTAILQ_ENTRY(BusChild) sibling; +} BusChild; + +#define QDEV_HOTPLUG_HANDLER_PROPERTY "hotplug-handler" + +/** + * BusState: + * @hotplug_handler: link to a hotplug handler associated with bus. + * @reset: ResettableState for the bus; handled by Resettable interface. + */ +struct BusState { + Object obj; + DeviceState *parent; + char *name; + HotplugHandler *hotplug_handler; + int max_index; + bool realized; + int num_children; + + /* + * children is a RCU QTAILQ, thus readers must use RCU to access it, + * and writers must hold the big qemu lock + */ + + QTAILQ_HEAD(, BusChild) children; + QLIST_ENTRY(BusState) sibling; + ResettableState reset; +}; + +/** + * Property: + * @set_default: true if the default value should be set from @defval, + * in which case @info->set_default_value must not be NULL + * (if false then no default value is set by the property system + * and the field retains whatever value it was given by instance_init). + * @defval: default value for the property. This is used only if @set_default + * is true. + */ +struct Property { + const char *name; + const PropertyInfo *info; + ptrdiff_t offset; + uint8_t bitnr; + bool set_default; + union { + int64_t i; + uint64_t u; + } defval; + int arrayoffset; + const PropertyInfo *arrayinfo; + int arrayfieldsize; + const char *link_type; +}; + +struct PropertyInfo { + const char *name; + const char *description; + const QEnumLookup *enum_table; + int (*print)(DeviceState *dev, Property *prop, char *dest, size_t len); + void (*set_default_value)(ObjectProperty *op, const Property *prop); + void (*create)(ObjectClass *oc, Property *prop); + ObjectPropertyAccessor *get; + ObjectPropertyAccessor *set; + ObjectPropertyRelease *release; +}; + +/** + * GlobalProperty: + * @used: Set to true if property was used when initializing a device. + * @optional: If set to true, GlobalProperty will be skipped without errors + * if the property doesn't exist. + * + * An error is fatal for non-hotplugged devices, when the global is applied. + */ +typedef struct GlobalProperty { + const char *driver; + const char *property; + const char *value; + bool used; + bool optional; +} GlobalProperty; + +static inline void +compat_props_add(GPtrArray *arr, + GlobalProperty props[], size_t nelem) +{ + int i; + for (i = 0; i < nelem; i++) { + g_ptr_array_add(arr, (void *)&props[i]); + } +} + +/*** Board API. This should go away once we have a machine config file. ***/ + +/** + * qdev_new: Create a device on the heap + * @name: device type to create (we assert() that this type exists) + * + * This only allocates the memory and initializes the device state + * structure, ready for the caller to set properties if they wish. + * The device still needs to be realized. + * The returned object has a reference count of 1. + */ +DeviceState *qdev_new(const char *name); +/** + * qdev_try_new: Try to create a device on the heap + * @name: device type to create + * + * This is like qdev_new(), except it returns %NULL when type @name + * does not exist, rather than asserting. + */ +DeviceState *qdev_try_new(const char *name); +/** + * qdev_realize: Realize @dev. + * @dev: device to realize + * @bus: bus to plug it into (may be NULL) + * @errp: pointer to error object + * + * "Realize" the device, i.e. perform the second phase of device + * initialization. + * @dev must not be plugged into a bus already. + * If @bus, plug @dev into @bus. This takes a reference to @dev. + * If @dev has no QOM parent, make one up, taking another reference. + * On success, return true. + * On failure, store an error through @errp and return false. + * + * If you created @dev using qdev_new(), you probably want to use + * qdev_realize_and_unref() instead. + */ +bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp); +/** + * qdev_realize_and_unref: Realize @dev and drop a reference + * @dev: device to realize + * @bus: bus to plug it into (may be NULL) + * @errp: pointer to error object + * + * Realize @dev and drop a reference. + * This is like qdev_realize(), except the caller must hold a + * (private) reference, which is dropped on return regardless of + * success or failure. Intended use:: + * + * dev = qdev_new(); + * [...] + * qdev_realize_and_unref(dev, bus, errp); + * + * Now @dev can go away without further ado. + * + * If you are embedding the device into some other QOM device and + * initialized it via some variant on object_initialize_child() then + * do not use this function, because that family of functions arrange + * for the only reference to the child device to be held by the parent + * via the child<> property, and so the reference-count-drop done here + * would be incorrect. For that use case you want qdev_realize(). + */ +bool qdev_realize_and_unref(DeviceState *dev, BusState *bus, Error **errp); +/** + * qdev_unrealize: Unrealize a device + * @dev: device to unrealize + * + * This function will "unrealize" a device, which is the first phase + * of correctly destroying a device that has been realized. It will: + * + * - unrealize any child buses by calling qbus_unrealize() + * (this will recursively unrealize any devices on those buses) + * - call the the unrealize method of @dev + * + * The device can then be freed by causing its reference count to go + * to zero. + * + * Warning: most devices in QEMU do not expect to be unrealized. Only + * devices which are hot-unpluggable should be unrealized (as part of + * the unplugging process); all other devices are expected to last for + * the life of the simulation and should not be unrealized and freed. + */ +void qdev_unrealize(DeviceState *dev); +void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, + int required_for_version); +HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev); +HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev); +bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); +/** + * qdev_get_hotplug_handler: Get handler responsible for device wiring + * + * Find HOTPLUG_HANDLER for @dev that provides [pre|un]plug callbacks for it. + * + * Note: in case @dev has a parent bus, it will be returned as handler unless + * machine handler overrides it. + * + * Returns: pointer to object that implements TYPE_HOTPLUG_HANDLER interface + * or NULL if there aren't any. + */ +HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev); +void qdev_unplug(DeviceState *dev, Error **errp); +void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +void qdev_machine_creation_done(void); +bool qdev_machine_modified(void); + +/** + * GpioPolarity: Polarity of a GPIO line + * + * GPIO lines use either positive (active-high) logic, + * or negative (active-low) logic. + * + * In active-high logic (%GPIO_POLARITY_ACTIVE_HIGH), a pin is + * active when the voltage on the pin is high (relative to ground); + * whereas in active-low logic (%GPIO_POLARITY_ACTIVE_LOW), a pin + * is active when the voltage on the pin is low (or grounded). + */ +typedef enum { + GPIO_POLARITY_ACTIVE_LOW, + GPIO_POLARITY_ACTIVE_HIGH +} GpioPolarity; + +/** + * qdev_get_gpio_in: Get one of a device's anonymous input GPIO lines + * @dev: Device whose GPIO we want + * @n: Number of the anonymous GPIO line (which must be in range) + * + * Returns the qemu_irq corresponding to an anonymous input GPIO line + * (which the device has set up with qdev_init_gpio_in()). The index + * @n of the GPIO line must be valid (i.e. be at least 0 and less than + * the total number of anonymous input GPIOs the device has); this + * function will assert() if passed an invalid index. + * + * This function is intended to be used by board code or SoC "container" + * device models to wire up the GPIO lines; usually the return value + * will be passed to qdev_connect_gpio_out() or a similar function to + * connect another device's output GPIO line to this input. + * + * For named input GPIO lines, use qdev_get_gpio_in_named(). + */ +qemu_irq qdev_get_gpio_in(DeviceState *dev, int n); +/** + * qdev_get_gpio_in_named: Get one of a device's named input GPIO lines + * @dev: Device whose GPIO we want + * @name: Name of the input GPIO array + * @n: Number of the GPIO line in that array (which must be in range) + * + * Returns the qemu_irq corresponding to a named input GPIO line + * (which the device has set up with qdev_init_gpio_in_named()). + * The @name string must correspond to an input GPIO array which exists on + * the device, and the index @n of the GPIO line must be valid (i.e. + * be at least 0 and less than the total number of input GPIOs in that + * array); this function will assert() if passed an invalid name or index. + * + * For anonymous input GPIO lines, use qdev_get_gpio_in(). + */ +qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n); + +/** + * qdev_connect_gpio_out: Connect one of a device's anonymous output GPIO lines + * @dev: Device whose GPIO to connect + * @n: Number of the anonymous output GPIO line (which must be in range) + * @pin: qemu_irq to connect the output line to + * + * This function connects an anonymous output GPIO line on a device + * up to an arbitrary qemu_irq, so that when the device asserts that + * output GPIO line, the qemu_irq's callback is invoked. + * The index @n of the GPIO line must be valid (i.e. be at least 0 and + * less than the total number of anonymous output GPIOs the device has + * created with qdev_init_gpio_out()); otherwise this function will assert(). + * + * Outbound GPIO lines can be connected to any qemu_irq, but the common + * case is connecting them to another device's inbound GPIO line, using + * the qemu_irq returned by qdev_get_gpio_in() or qdev_get_gpio_in_named(). + * + * It is not valid to try to connect one outbound GPIO to multiple + * qemu_irqs at once, or to connect multiple outbound GPIOs to the + * same qemu_irq. (Warning: there is no assertion or other guard to + * catch this error: the model will just not do the right thing.) + * Instead, for fan-out you can use the TYPE_IRQ_SPLIT device: connect + * a device's outbound GPIO to the splitter's input, and connect each + * of the splitter's outputs to a different device. For fan-in you + * can use the TYPE_OR_IRQ device, which is a model of a logical OR + * gate with multiple inputs and one output. + * + * For named output GPIO lines, use qdev_connect_gpio_out_named(). + */ +void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin); +/** + * qdev_connect_gpio_out: Connect one of a device's anonymous output GPIO lines + * @dev: Device whose GPIO to connect + * @name: Name of the output GPIO array + * @n: Number of the anonymous output GPIO line (which must be in range) + * @pin: qemu_irq to connect the output line to + * + * This function connects an anonymous output GPIO line on a device + * up to an arbitrary qemu_irq, so that when the device asserts that + * output GPIO line, the qemu_irq's callback is invoked. + * The @name string must correspond to an output GPIO array which exists on + * the device, and the index @n of the GPIO line must be valid (i.e. + * be at least 0 and less than the total number of input GPIOs in that + * array); this function will assert() if passed an invalid name or index. + * + * Outbound GPIO lines can be connected to any qemu_irq, but the common + * case is connecting them to another device's inbound GPIO line, using + * the qemu_irq returned by qdev_get_gpio_in() or qdev_get_gpio_in_named(). + * + * It is not valid to try to connect one outbound GPIO to multiple + * qemu_irqs at once, or to connect multiple outbound GPIOs to the + * same qemu_irq; see qdev_connect_gpio_out() for details. + * + * For named output GPIO lines, use qdev_connect_gpio_out_named(). + */ +void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n, + qemu_irq pin); +/** + * qdev_get_gpio_out_connector: Get the qemu_irq connected to an output GPIO + * @dev: Device whose output GPIO we are interested in + * @name: Name of the output GPIO array + * @n: Number of the output GPIO line within that array + * + * Returns whatever qemu_irq is currently connected to the specified + * output GPIO line of @dev. This will be NULL if the output GPIO line + * has never been wired up to the anything. Note that the qemu_irq + * returned does not belong to @dev -- it will be the input GPIO or + * IRQ of whichever device the board code has connected up to @dev's + * output GPIO. + * + * You probably don't need to use this function -- it is used only + * by the platform-bus subsystem. + */ +qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n); +/** + * qdev_intercept_gpio_out: Intercept an existing GPIO connection + * @dev: Device to intercept the outbound GPIO line from + * @icpt: New qemu_irq to connect instead + * @name: Name of the output GPIO array + * @n: Number of the GPIO line in the array + * + * This function is provided only for use by the qtest testing framework + * and is not suitable for use in non-testing parts of QEMU. + * + * This function breaks an existing connection of an outbound GPIO + * line from @dev, and replaces it with the new qemu_irq @icpt, as if + * ``qdev_connect_gpio_out_named(dev, icpt, name, n)`` had been called. + * The previously connected qemu_irq is returned, so it can be restored + * by a second call to qdev_intercept_gpio_out() if desired. + */ +qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n); + +BusState *qdev_get_child_bus(DeviceState *dev, const char *name); + +/*** Device API. ***/ + +/** + * qdev_init_gpio_in: create an array of anonymous input GPIO lines + * @dev: Device to create input GPIOs for + * @handler: Function to call when GPIO line value is set + * @n: Number of GPIO lines to create + * + * Devices should use functions in the qdev_init_gpio_in* family in + * their instance_init or realize methods to create any input GPIO + * lines they need. There is no functional difference between + * anonymous and named GPIO lines. Stylistically, named GPIOs are + * preferable (easier to understand at callsites) unless a device + * has exactly one uniform kind of GPIO input whose purpose is obvious. + * Note that input GPIO lines can serve as 'sinks' for IRQ lines. + * + * See qdev_get_gpio_in() for how code that uses such a device can get + * hold of an input GPIO line to manipulate it. + */ +void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n); +/** + * qdev_init_gpio_out: create an array of anonymous output GPIO lines + * @dev: Device to create output GPIOs for + * @pins: Pointer to qemu_irq or qemu_irq array for the GPIO lines + * @n: Number of GPIO lines to create + * + * Devices should use functions in the qdev_init_gpio_out* family + * in their instance_init or realize methods to create any output + * GPIO lines they need. There is no functional difference between + * anonymous and named GPIO lines. Stylistically, named GPIOs are + * preferable (easier to understand at callsites) unless a device + * has exactly one uniform kind of GPIO output whose purpose is obvious. + * + * The @pins argument should be a pointer to either a "qemu_irq" + * (if @n == 1) or a "qemu_irq []" array (if @n > 1) in the device's + * state structure. The device implementation can then raise and + * lower the GPIO line by calling qemu_set_irq(). (If anything is + * connected to the other end of the GPIO this will cause the handler + * function for that input GPIO to be called.) + * + * See qdev_connect_gpio_out() for how code that uses such a device + * can connect to one of its output GPIO lines. + */ +void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n); +/** + * qdev_init_gpio_out: create an array of named output GPIO lines + * @dev: Device to create output GPIOs for + * @pins: Pointer to qemu_irq or qemu_irq array for the GPIO lines + * @name: Name to give this array of GPIO lines + * @n: Number of GPIO lines to create + * + * Like qdev_init_gpio_out(), but creates an array of GPIO output lines + * with a name. Code using the device can then connect these GPIO lines + * using qdev_connect_gpio_out_named(). + */ +void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins, + const char *name, int n); +/** + * qdev_init_gpio_in_named_with_opaque: create an array of input GPIO lines + * for the specified device + * + * @dev: Device to create input GPIOs for + * @handler: Function to call when GPIO line value is set + * @opaque: Opaque data pointer to pass to @handler + * @name: Name of the GPIO input (must be unique for this device) + * @n: Number of GPIO lines in this input set + */ +void qdev_init_gpio_in_named_with_opaque(DeviceState *dev, + qemu_irq_handler handler, + void *opaque, + const char *name, int n); + +/** + * qdev_init_gpio_in_named: create an array of input GPIO lines + * for the specified device + * + * Like qdev_init_gpio_in_named_with_opaque(), but the opaque pointer + * passed to the handler is @dev (which is the most commonly desired behaviour). + */ +static inline void qdev_init_gpio_in_named(DeviceState *dev, + qemu_irq_handler handler, + const char *name, int n) +{ + qdev_init_gpio_in_named_with_opaque(dev, handler, dev, name, n); +} + +/** + * qdev_pass_gpios: create GPIO lines on container which pass through to device + * @dev: Device which has GPIO lines + * @container: Container device which needs to expose them + * @name: Name of GPIO array to pass through (NULL for the anonymous GPIO array) + * + * In QEMU, complicated devices like SoCs are often modelled with a + * "container" QOM device which itself contains other QOM devices and + * which wires them up appropriately. This function allows the container + * to create GPIO arrays on itself which simply pass through to a GPIO + * array of one of its internal devices. + * + * If @dev has both input and output GPIOs named @name then both will + * be passed through. It is not possible to pass a subset of the array + * with this function. + * + * To users of the container device, the GPIO array created on @container + * behaves exactly like any other. + */ +void qdev_pass_gpios(DeviceState *dev, DeviceState *container, + const char *name); + +BusState *qdev_get_parent_bus(DeviceState *dev); + +/*** BUS API. ***/ + +DeviceState *qdev_find_recursive(BusState *bus, const char *id); + +/* Returns 0 to walk children, > 0 to skip walk, < 0 to terminate walk. */ +typedef int (qbus_walkerfn)(BusState *bus, void *opaque); +typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque); + +void qbus_create_inplace(void *bus, size_t size, const char *typename, + DeviceState *parent, const char *name); +BusState *qbus_create(const char *typename, DeviceState *parent, const char *name); +bool qbus_realize(BusState *bus, Error **errp); +void qbus_unrealize(BusState *bus); + +/* Returns > 0 if either devfn or busfn skip walk somewhere in cursion, + * < 0 if either devfn or busfn terminate walk somewhere in cursion, + * 0 otherwise. */ +int qbus_walk_children(BusState *bus, + qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn, + qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn, + void *opaque); +int qdev_walk_children(DeviceState *dev, + qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn, + qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn, + void *opaque); + +/** + * @qdev_reset_all: + * Reset @dev. See @qbus_reset_all() for more details. + * + * Note: This function is deprecated and will be removed when it becomes unused. + * Please use device_cold_reset() now. + */ +void qdev_reset_all(DeviceState *dev); +void qdev_reset_all_fn(void *opaque); + +/** + * @qbus_reset_all: + * @bus: Bus to be reset. + * + * Reset @bus and perform a bus-level ("hard") reset of all devices connected + * to it, including recursive processing of all buses below @bus itself. A + * hard reset means that qbus_reset_all will reset all state of the device. + * For PCI devices, for example, this will include the base address registers + * or configuration space. + * + * Note: This function is deprecated and will be removed when it becomes unused. + * Please use bus_cold_reset() now. + */ +void qbus_reset_all(BusState *bus); +void qbus_reset_all_fn(void *opaque); + +/** + * device_cold_reset: + * Reset device @dev and perform a recursive processing using the resettable + * interface. It triggers a RESET_TYPE_COLD. + */ +void device_cold_reset(DeviceState *dev); + +/** + * bus_cold_reset: + * + * Reset bus @bus and perform a recursive processing using the resettable + * interface. It triggers a RESET_TYPE_COLD. + */ +void bus_cold_reset(BusState *bus); + +/** + * device_is_in_reset: + * Return true if the device @dev is currently being reset. + */ +bool device_is_in_reset(DeviceState *dev); + +/** + * bus_is_in_reset: + * Return true if the bus @bus is currently being reset. + */ +bool bus_is_in_reset(BusState *bus); + +/* This should go away once we get rid of the NULL bus hack */ +BusState *sysbus_get_default(void); + +char *qdev_get_fw_dev_path(DeviceState *dev); +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev); + +/** + * @qdev_machine_init + * + * Initialize platform devices before machine init. This is a hack until full + * support for composition is added. + */ +void qdev_machine_init(void); + +/** + * device_legacy_reset: + * + * Reset a single device (by calling the reset method). + * Note: This function is deprecated and will be removed when it becomes unused. + * Please use device_cold_reset() now. + */ +void device_legacy_reset(DeviceState *dev); + +void device_class_set_props(DeviceClass *dc, Property *props); + +/** + * device_class_set_parent_reset: + * TODO: remove the function when DeviceClass's reset method + * is not used anymore. + */ +void device_class_set_parent_reset(DeviceClass *dc, + DeviceReset dev_reset, + DeviceReset *parent_reset); +void device_class_set_parent_realize(DeviceClass *dc, + DeviceRealize dev_realize, + DeviceRealize *parent_realize); +void device_class_set_parent_unrealize(DeviceClass *dc, + DeviceUnrealize dev_unrealize, + DeviceUnrealize *parent_unrealize); + +const VMStateDescription *qdev_get_vmsd(DeviceState *dev); + +const char *qdev_fw_name(DeviceState *dev); + +Object *qdev_get_machine(void); + +/* FIXME: make this a link<> */ +bool qdev_set_parent_bus(DeviceState *dev, BusState *bus, Error **errp); + +extern bool qdev_hotplug; +extern bool qdev_hot_removed; + +char *qdev_get_dev_path(DeviceState *dev); + +void qbus_set_hotplug_handler(BusState *bus, Object *handler); +void qbus_set_bus_hotplug_handler(BusState *bus); + +static inline bool qbus_is_hotpluggable(BusState *bus) +{ + return bus->hotplug_handler; +} + +void device_listener_register(DeviceListener *listener); +void device_listener_unregister(DeviceListener *listener); + +/** + * @qdev_should_hide_device: + * @opts: QemuOpts as passed on cmdline. + * + * Check if a device should be added. + * When a device is added via qdev_device_add() this will be called, + * and return if the device should be added now or not. + */ +bool qdev_should_hide_device(QemuOpts *opts); + +#endif diff --git a/include/hw/qdev-dma.h b/include/hw/qdev-dma.h new file mode 100644 index 000000000..b00391aa0 --- /dev/null +++ b/include/hw/qdev-dma.h @@ -0,0 +1,16 @@ +/* + * Support for dma_addr_t typed properties + * + * Copyright (C) 2012 David Gibson, IBM Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_QDEV_DMA_H +#define HW_QDEV_DMA_H + +#define DEFINE_PROP_DMAADDR(_n, _s, _f, _d) \ + DEFINE_PROP_UINT64(_n, _s, _f, _d) + +#endif diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h new file mode 100644 index 000000000..443745006 --- /dev/null +++ b/include/hw/qdev-properties.h @@ -0,0 +1,327 @@ +#ifndef QEMU_QDEV_PROPERTIES_H +#define QEMU_QDEV_PROPERTIES_H + +#include "hw/qdev-core.h" + +/*** qdev-properties.c ***/ + +extern const PropertyInfo qdev_prop_bit; +extern const PropertyInfo qdev_prop_bit64; +extern const PropertyInfo qdev_prop_bool; +extern const PropertyInfo qdev_prop_enum; +extern const PropertyInfo qdev_prop_uint8; +extern const PropertyInfo qdev_prop_uint16; +extern const PropertyInfo qdev_prop_uint32; +extern const PropertyInfo qdev_prop_int32; +extern const PropertyInfo qdev_prop_uint64; +extern const PropertyInfo qdev_prop_int64; +extern const PropertyInfo qdev_prop_size; +extern const PropertyInfo qdev_prop_string; +extern const PropertyInfo qdev_prop_chr; +extern const PropertyInfo qdev_prop_tpm; +extern const PropertyInfo qdev_prop_macaddr; +extern const PropertyInfo qdev_prop_reserved_region; +extern const PropertyInfo qdev_prop_on_off_auto; +extern const PropertyInfo qdev_prop_multifd_compression; +extern const PropertyInfo qdev_prop_losttickpolicy; +extern const PropertyInfo qdev_prop_blockdev_on_error; +extern const PropertyInfo qdev_prop_bios_chs_trans; +extern const PropertyInfo qdev_prop_fdc_drive_type; +extern const PropertyInfo qdev_prop_drive; +extern const PropertyInfo qdev_prop_drive_iothread; +extern const PropertyInfo qdev_prop_netdev; +extern const PropertyInfo qdev_prop_pci_devfn; +extern const PropertyInfo qdev_prop_size32; +extern const PropertyInfo qdev_prop_blocksize; +extern const PropertyInfo qdev_prop_pci_host_devaddr; +extern const PropertyInfo qdev_prop_uuid; +extern const PropertyInfo qdev_prop_arraylen; +extern const PropertyInfo qdev_prop_audiodev; +extern const PropertyInfo qdev_prop_link; +extern const PropertyInfo qdev_prop_off_auto_pcibar; +extern const PropertyInfo qdev_prop_pcie_link_speed; +extern const PropertyInfo qdev_prop_pcie_link_width; + +#define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ + .name = (_name), \ + .info = &(_prop), \ + .offset = offsetof(_state, _field) \ + + type_check(_type, typeof_field(_state, _field)), \ + } + +#define DEFINE_PROP_SIGNED(_name, _state, _field, _defval, _prop, _type) { \ + .name = (_name), \ + .info = &(_prop), \ + .offset = offsetof(_state, _field) \ + + type_check(_type,typeof_field(_state, _field)), \ + .set_default = true, \ + .defval.i = (_type)_defval, \ + } + +#define DEFINE_PROP_SIGNED_NODEFAULT(_name, _state, _field, _prop, _type) { \ + .name = (_name), \ + .info = &(_prop), \ + .offset = offsetof(_state, _field) \ + + type_check(_type, typeof_field(_state, _field)), \ + } + +#define DEFINE_PROP_BIT(_name, _state, _field, _bit, _defval) { \ + .name = (_name), \ + .info = &(qdev_prop_bit), \ + .bitnr = (_bit), \ + .offset = offsetof(_state, _field) \ + + type_check(uint32_t,typeof_field(_state, _field)), \ + .set_default = true, \ + .defval.u = (bool)_defval, \ + } + +#define DEFINE_PROP_UNSIGNED(_name, _state, _field, _defval, _prop, _type) { \ + .name = (_name), \ + .info = &(_prop), \ + .offset = offsetof(_state, _field) \ + + type_check(_type, typeof_field(_state, _field)), \ + .set_default = true, \ + .defval.u = (_type)_defval, \ + } + +#define DEFINE_PROP_UNSIGNED_NODEFAULT(_name, _state, _field, _prop, _type) { \ + .name = (_name), \ + .info = &(_prop), \ + .offset = offsetof(_state, _field) \ + + type_check(_type, typeof_field(_state, _field)), \ + } + +#define DEFINE_PROP_BIT64(_name, _state, _field, _bit, _defval) { \ + .name = (_name), \ + .info = &(qdev_prop_bit64), \ + .bitnr = (_bit), \ + .offset = offsetof(_state, _field) \ + + type_check(uint64_t, typeof_field(_state, _field)), \ + .set_default = true, \ + .defval.u = (bool)_defval, \ + } + +#define DEFINE_PROP_BOOL(_name, _state, _field, _defval) { \ + .name = (_name), \ + .info = &(qdev_prop_bool), \ + .offset = offsetof(_state, _field) \ + + type_check(bool, typeof_field(_state, _field)), \ + .set_default = true, \ + .defval.u = (bool)_defval, \ + } + +#define PROP_ARRAY_LEN_PREFIX "len-" + +/** + * DEFINE_PROP_ARRAY: + * @_name: name of the array + * @_state: name of the device state structure type + * @_field: uint32_t field in @_state to hold the array length + * @_arrayfield: field in @_state (of type '@_arraytype *') which + * will point to the array + * @_arrayprop: PropertyInfo defining what property the array elements have + * @_arraytype: C type of the array elements + * + * Define device properties for a variable-length array _name. A + * static property "len-arrayname" is defined. When the device creator + * sets this property to the desired length of array, further dynamic + * properties "arrayname[0]", "arrayname[1]", ... are defined so the + * device creator can set the array element values. Setting the + * "len-arrayname" property more than once is an error. + * + * When the array length is set, the @_field member of the device + * struct is set to the array length, and @_arrayfield is set to point + * to (zero-initialised) memory allocated for the array. For a zero + * length array, @_field will be set to 0 and @_arrayfield to NULL. + * It is the responsibility of the device deinit code to free the + * @_arrayfield memory. + */ +#define DEFINE_PROP_ARRAY(_name, _state, _field, \ + _arrayfield, _arrayprop, _arraytype) { \ + .name = (PROP_ARRAY_LEN_PREFIX _name), \ + .info = &(qdev_prop_arraylen), \ + .set_default = true, \ + .defval.u = 0, \ + .offset = offsetof(_state, _field) \ + + type_check(uint32_t, typeof_field(_state, _field)), \ + .arrayinfo = &(_arrayprop), \ + .arrayfieldsize = sizeof(_arraytype), \ + .arrayoffset = offsetof(_state, _arrayfield), \ + } + +#define DEFINE_PROP_LINK(_name, _state, _field, _type, _ptr_type) { \ + .name = (_name), \ + .info = &(qdev_prop_link), \ + .offset = offsetof(_state, _field) \ + + type_check(_ptr_type, typeof_field(_state, _field)), \ + .link_type = _type, \ + } + +#define DEFINE_PROP_UINT8(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint8, uint8_t) +#define DEFINE_PROP_UINT16(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint16, uint16_t) +#define DEFINE_PROP_UINT32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint32, uint32_t) +#define DEFINE_PROP_INT32(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_int32, int32_t) +#define DEFINE_PROP_UINT64(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint64, uint64_t) +#define DEFINE_PROP_INT64(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_int64, int64_t) +#define DEFINE_PROP_SIZE(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size, uint64_t) +#define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) + +#define DEFINE_PROP_CHR(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharBackend) +#define DEFINE_PROP_STRING(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*) +#define DEFINE_PROP_NETDEV(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers) +#define DEFINE_PROP_DRIVE(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *) +#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *) +#define DEFINE_PROP_MACADDR(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) +#define DEFINE_PROP_RESERVED_REGION(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_reserved_region, ReservedRegion) +#define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto) +#define DEFINE_PROP_MULTIFD_COMPRESSION(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_multifd_compression, \ + MultiFDCompression) +#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ + LostTickPolicy) +#define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \ + BlockdevOnError) +#define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) +#define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) +#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint32_t) +#define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) +#define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \ + OffAutoPCIBAR) +#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \ + PCIExpLinkSpeed) +#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \ + PCIExpLinkWidth) + +#define DEFINE_PROP_UUID(_name, _state, _field) { \ + .name = (_name), \ + .info = &qdev_prop_uuid, \ + .offset = offsetof(_state, _field) \ + + type_check(QemuUUID, typeof_field(_state, _field)), \ + .set_default = true, \ + } +#define DEFINE_PROP_AUDIODEV(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_audiodev, QEMUSoundCard) + +#define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) { \ + .name = (_name), \ + .info = &qdev_prop_uuid, \ + .offset = offsetof(_state, _field) \ + + type_check(QemuUUID, typeof_field(_state, _field)), \ + } + +#define DEFINE_PROP_END_OF_LIST() \ + {} + +/* + * Set properties between creation and realization. + * + * Returns: %true on success, %false on error. + */ +bool qdev_prop_set_drive_err(DeviceState *dev, const char *name, + BlockBackend *value, Error **errp); + +/* + * Set properties between creation and realization. + * @value must be valid. Each property may be set at most once. + */ +void qdev_prop_set_bit(DeviceState *dev, const char *name, bool value); +void qdev_prop_set_uint8(DeviceState *dev, const char *name, uint8_t value); +void qdev_prop_set_uint16(DeviceState *dev, const char *name, uint16_t value); +void qdev_prop_set_uint32(DeviceState *dev, const char *name, uint32_t value); +void qdev_prop_set_int32(DeviceState *dev, const char *name, int32_t value); +void qdev_prop_set_uint64(DeviceState *dev, const char *name, uint64_t value); +void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value); +void qdev_prop_set_chr(DeviceState *dev, const char *name, Chardev *value); +void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value); +void qdev_prop_set_drive(DeviceState *dev, const char *name, + BlockBackend *value); +void qdev_prop_set_macaddr(DeviceState *dev, const char *name, + const uint8_t *value); +void qdev_prop_set_enum(DeviceState *dev, const char *name, int value); + +void *qdev_get_prop_ptr(DeviceState *dev, Property *prop); + +void qdev_prop_register_global(GlobalProperty *prop); +const GlobalProperty *qdev_find_global_prop(DeviceState *dev, + const char *name); +int qdev_prop_check_globals(void); +void qdev_prop_set_globals(DeviceState *dev); +void error_set_from_qdev_prop_error(Error **errp, int ret, DeviceState *dev, + Property *prop, const char *value); + +/** + * qdev_property_add_static: + * @dev: Device to add the property to. + * @prop: The qdev property definition. + * + * Add a static QOM property to @dev for qdev property @prop. + * On error, store error in @errp. Static properties access data in a struct. + * The type of the QOM property is derived from prop->info. + */ +void qdev_property_add_static(DeviceState *dev, Property *prop); + +/** + * qdev_alias_all_properties: Create aliases on source for all target properties + * @target: Device which has properties to be aliased + * @source: Object to add alias properties to + * + * Add alias properties to the @source object for all qdev properties on + * the @target DeviceState. + * + * This is useful when @target is an internal implementation object + * owned by @source, and you want to expose all the properties of that + * implementation object as properties on the @source object so that users + * of @source can set them. + */ +void qdev_alias_all_properties(DeviceState *target, Object *source); + +/** + * @qdev_prop_set_after_realize: + * @dev: device + * @name: name of property + * @errp: indirect pointer to Error to be set + * Set the Error object to report that an attempt was made to set a property + * on a device after it has already been realized. This is a utility function + * which allows property-setter functions to easily report the error in + * a friendly format identifying both the device and the property. + */ +void qdev_prop_set_after_realize(DeviceState *dev, const char *name, + Error **errp); + +/** + * qdev_prop_allow_set_link_before_realize: + * + * Set the #Error object if an attempt is made to set the link after realize. + * This function should be used as the check() argument to + * object_property_add_link(). + */ +void qdev_prop_allow_set_link_before_realize(const Object *obj, + const char *name, + Object *val, Error **errp); + +#endif diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h new file mode 100644 index 000000000..e77e43a17 --- /dev/null +++ b/include/hw/rdma/rdma.h @@ -0,0 +1,37 @@ +/* + * RDMA device interface + * + * Copyright (C) 2019 Oracle + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * Yuval Shaia + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef RDMA_H +#define RDMA_H + +#include "qom/object.h" + +#define INTERFACE_RDMA_PROVIDER "rdma" + +typedef struct RdmaProviderClass RdmaProviderClass; +DECLARE_CLASS_CHECKERS(RdmaProviderClass, RDMA_PROVIDER, + INTERFACE_RDMA_PROVIDER) +#define RDMA_PROVIDER(obj) \ + INTERFACE_CHECK(RdmaProvider, (obj), \ + INTERFACE_RDMA_PROVIDER) + +typedef struct RdmaProvider RdmaProvider; + +struct RdmaProviderClass { + InterfaceClass parent; + + void (*print_statistics)(Monitor *mon, RdmaProvider *obj); +}; + +#endif diff --git a/include/hw/register.h b/include/hw/register.h new file mode 100644 index 000000000..03c8926d2 --- /dev/null +++ b/include/hw/register.h @@ -0,0 +1,221 @@ +/* + * Register Definition API + * + * Copyright (c) 2016 Xilinx Inc. + * Copyright (c) 2013 Peter Crosthwaite + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef REGISTER_H +#define REGISTER_H + +#include "hw/qdev-core.h" +#include "exec/memory.h" +#include "hw/registerfields.h" +#include "qom/object.h" + +typedef struct RegisterInfo RegisterInfo; +typedef struct RegisterAccessInfo RegisterAccessInfo; +typedef struct RegisterInfoArray RegisterInfoArray; + +/** + * Access description for a register that is part of guest accessible device + * state. + * + * @name: String name of the register + * @ro: whether or not the bit is read-only + * @w1c: bits with the common write 1 to clear semantic. + * @reset: reset value. + * @cor: Bits that are clear on read + * @rsvd: Bits that are reserved and should not be changed + * + * @pre_write: Pre write callback. Passed the value that's to be written, + * immediately before the actual write. The returned value is what is written, + * giving the handler a chance to modify the written value. + * @post_write: Post write callback. Passed the written value. Most write side + * effects should be implemented here. This is called during device reset. + * + * @post_read: Post read callback. Passes the value that is about to be returned + * for a read. The return value from this function is what is ultimately read, + * allowing this function to modify the value before return to the client. + */ + +struct RegisterAccessInfo { + const char *name; + uint64_t ro; + uint64_t w1c; + uint64_t reset; + uint64_t cor; + uint64_t rsvd; + uint64_t unimp; + + uint64_t (*pre_write)(RegisterInfo *reg, uint64_t val); + void (*post_write)(RegisterInfo *reg, uint64_t val); + + uint64_t (*post_read)(RegisterInfo *reg, uint64_t val); + + hwaddr addr; +}; + +/** + * A register that is part of guest accessible state + * @data: pointer to the register data. Will be cast + * to the relevant uint type depending on data_size. + * @data_size: Size of the register in bytes. Must be + * 1, 2, 4 or 8 + * + * @access: Access description of this register + * + * @debug: Whether or not verbose debug is enabled + * @prefix: String prefix for log and debug messages + * + * @opaque: Opaque data for the register + */ + +struct RegisterInfo { + /* */ + DeviceState parent_obj; + + /* */ + void *data; + int data_size; + + const RegisterAccessInfo *access; + + void *opaque; +}; + +#define TYPE_REGISTER "qemu,register" +DECLARE_INSTANCE_CHECKER(RegisterInfo, REGISTER, + TYPE_REGISTER) + +/** + * This structure is used to group all of the individual registers which are + * modeled using the RegisterInfo structure. + * + * @r is an array containing of all the relevant RegisterInfo structures. + * + * @num_elements is the number of elements in the array r + * + * @mem: optional Memory region for the register + */ + +struct RegisterInfoArray { + MemoryRegion mem; + + int num_elements; + RegisterInfo **r; + + bool debug; + const char *prefix; +}; + +/** + * write a value to a register, subject to its restrictions + * @reg: register to write to + * @val: value to write + * @we: write enable mask + * @prefix: The device prefix that should be printed before the register name + * @debug: Should the write operation debug information be printed? + */ + +void register_write(RegisterInfo *reg, uint64_t val, uint64_t we, + const char *prefix, bool debug); + +/** + * read a value from a register, subject to its restrictions + * @reg: register to read from + * @re: read enable mask + * @prefix: The device prefix that should be printed before the register name + * @debug: Should the read operation debug information be printed? + * returns: value read + */ + +uint64_t register_read(RegisterInfo *reg, uint64_t re, const char* prefix, + bool debug); + +/** + * Resets a register. This will also call the post_write hook if it exists. + * @reg: The register to reset. + */ + +void register_reset(RegisterInfo *reg); + +/** + * Initialize a register. + * @reg: Register to initialize + */ + +void register_init(RegisterInfo *reg); + +/** + * Memory API MMIO write handler that will write to a Register API register. + * @opaque: RegisterInfo to write to + * @addr: Address to write + * @value: Value to write + * @size: Number of bytes to write + */ + +void register_write_memory(void *opaque, hwaddr addr, uint64_t value, + unsigned size); + +/** + * Memory API MMIO read handler that will read from a Register API register. + * @opaque: RegisterInfo to read from + * @addr: Address to read + * @size: Number of bytes to read + * returns: Value read from register + */ + +uint64_t register_read_memory(void *opaque, hwaddr addr, unsigned size); + +/** + * Init a block of registers into a container MemoryRegion. A + * number of constant register definitions are parsed to create a corresponding + * array of RegisterInfo's. + * + * @owner: device owning the registers + * @rae: Register definitions to init + * @num: number of registers to init (length of @rae) + * @ri: Register array to init, must already be allocated + * @data: Array to use for register data, must already be allocated + * @ops: Memory region ops to access registers. + * @debug enabled: turn on/off verbose debug information + * @memory_size: Size of the memory region + * returns: A structure containing all of the registers and an initialized + * memory region (r_array->mem) the caller should add to a container. + */ + +RegisterInfoArray *register_init_block8(DeviceState *owner, + const RegisterAccessInfo *rae, + int num, RegisterInfo *ri, + uint8_t *data, + const MemoryRegionOps *ops, + bool debug_enabled, + uint64_t memory_size); + +RegisterInfoArray *register_init_block32(DeviceState *owner, + const RegisterAccessInfo *rae, + int num, RegisterInfo *ri, + uint32_t *data, + const MemoryRegionOps *ops, + bool debug_enabled, + uint64_t memory_size); + +/** + * This function should be called to cleanup the registers that were initialized + * when calling register_init_block32(). This function should only be called + * from the device's instance_finalize function. + * + * Any memory operations that the device performed that require cleanup (such + * as creating subregions) need to be called before calling this function. + * + * @r_array: A structure containing all of the registers, as returned by + * register_init_block32() + */ + +void register_finalize_block(RegisterInfoArray *r_array); + +#endif diff --git a/include/hw/registerfields.h b/include/hw/registerfields.h new file mode 100644 index 000000000..93fa4a84c --- /dev/null +++ b/include/hw/registerfields.h @@ -0,0 +1,103 @@ +/* + * Register Definition API: field macros + * + * Copyright (c) 2016 Xilinx Inc. + * Copyright (c) 2013 Peter Crosthwaite + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef REGISTERFIELDS_H +#define REGISTERFIELDS_H + +#include "qemu/bitops.h" + +/* Define constants for a 32 bit register */ + +/* This macro will define A_FOO, for the byte address of a register + * as well as R_FOO for the uint32_t[] register number (A_FOO / 4). + */ +#define REG32(reg, addr) \ + enum { A_ ## reg = (addr) }; \ + enum { R_ ## reg = (addr) / 4 }; + +#define REG8(reg, addr) \ + enum { A_ ## reg = (addr) }; \ + enum { R_ ## reg = (addr) }; + +#define REG16(reg, addr) \ + enum { A_ ## reg = (addr) }; \ + enum { R_ ## reg = (addr) / 2 }; + +/* Define SHIFT, LENGTH and MASK constants for a field within a register */ + +/* This macro will define R_FOO_BAR_MASK, R_FOO_BAR_SHIFT and R_FOO_BAR_LENGTH + * constants for field BAR in register FOO. + */ +#define FIELD(reg, field, shift, length) \ + enum { R_ ## reg ## _ ## field ## _SHIFT = (shift)}; \ + enum { R_ ## reg ## _ ## field ## _LENGTH = (length)}; \ + enum { R_ ## reg ## _ ## field ## _MASK = \ + MAKE_64BIT_MASK(shift, length)}; + +/* Extract a field from a register */ +#define FIELD_EX8(storage, reg, field) \ + extract8((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH) +#define FIELD_EX16(storage, reg, field) \ + extract16((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH) +#define FIELD_EX32(storage, reg, field) \ + extract32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH) +#define FIELD_EX64(storage, reg, field) \ + extract64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH) + +/* Extract a field from an array of registers */ +#define ARRAY_FIELD_EX32(regs, reg, field) \ + FIELD_EX32((regs)[R_ ## reg], reg, field) + +/* Deposit a register field. + * Assigning values larger then the target field will result in + * compilation warnings. + */ +#define FIELD_DP8(storage, reg, field, val) ({ \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } _v = { .v = val }; \ + uint8_t _d; \ + _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ + _d; }) +#define FIELD_DP16(storage, reg, field, val) ({ \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } _v = { .v = val }; \ + uint16_t _d; \ + _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ + _d; }) +#define FIELD_DP32(storage, reg, field, val) ({ \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } _v = { .v = val }; \ + uint32_t _d; \ + _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ + _d; }) +#define FIELD_DP64(storage, reg, field, val) ({ \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } _v = { .v = val }; \ + uint64_t _d; \ + _d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ + _d; }) + +/* Deposit a field to array of registers. */ +#define ARRAY_FIELD_DP32(regs, reg, field, val) \ + (regs)[R_ ## reg] = FIELD_DP32((regs)[R_ ## reg], reg, field, val); + +#endif diff --git a/include/hw/resettable.h b/include/hw/resettable.h new file mode 100644 index 000000000..bdcd1276b --- /dev/null +++ b/include/hw/resettable.h @@ -0,0 +1,246 @@ +/* + * Resettable interface header. + * + * Copyright (c) 2019 GreenSocs SAS + * + * Authors: + * Damien Hedde + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_RESETTABLE_H +#define HW_RESETTABLE_H + +#include "qom/object.h" + +#define TYPE_RESETTABLE_INTERFACE "resettable" + +typedef struct ResettableClass ResettableClass; +DECLARE_CLASS_CHECKERS(ResettableClass, RESETTABLE, + TYPE_RESETTABLE_INTERFACE) + + +typedef struct ResettableState ResettableState; + +/** + * ResetType: + * Types of reset. + * + * + Cold: reset resulting from a power cycle of the object. + * + * TODO: Support has to be added to handle more types. In particular, + * ResettableState structure needs to be expanded. + */ +typedef enum ResetType { + RESET_TYPE_COLD, +} ResetType; + +/* + * ResettableClass: + * Interface for resettable objects. + * + * See docs/devel/reset.rst for more detailed information about how QEMU models + * reset. This whole API must only be used when holding the iothread mutex. + * + * All objects which can be reset must implement this interface; + * it is usually provided by a base class such as DeviceClass or BusClass. + * Every Resettable object must maintain some state tracking the + * progress of a reset operation by providing a ResettableState structure. + * The functions defined in this module take care of updating the + * state of the reset. + * The base class implementation of the interface provides this + * state and implements the associated method: get_state. + * + * Concrete object implementations (typically specific devices + * such as a UART model) should provide the functions + * for the phases.enter, phases.hold and phases.exit methods, which + * they can set in their class init function, either directly or + * by calling resettable_class_set_parent_phases(). + * The phase methods are guaranteed to only only ever be called once + * for any reset event, in the order 'enter', 'hold', 'exit'. + * An object will always move quickly from 'enter' to 'hold' + * but might remain in 'hold' for an arbitrary period of time + * before eventually reset is deasserted and the 'exit' phase is called. + * Object implementations should be prepared for functions handling + * inbound connections from other devices (such as qemu_irq handler + * functions) to be called at any point during reset after their + * 'enter' method has been called. + * + * Users of a resettable object should not call these methods + * directly, but instead use the function resettable_reset(). + * + * @phases.enter: This phase is called when the object enters reset. It + * should reset local state of the object, but it must not do anything that + * has a side-effect on other objects, such as raising or lowering a qemu_irq + * line or reading or writing guest memory. It takes the reset's type as + * argument. + * + * @phases.hold: This phase is called for entry into reset, once every object + * in the system which is being reset has had its @phases.enter method called. + * At this point devices can do actions that affect other objects. + * + * @phases.exit: This phase is called when the object leaves the reset state. + * Actions affecting other objects are permitted. + * + * @get_state: Mandatory method which must return a pointer to a + * ResettableState. + * + * @get_transitional_function: transitional method to handle Resettable objects + * not yet fully moved to this interface. It will be removed as soon as it is + * not needed anymore. This method is optional and may return a pointer to a + * function to be used instead of the phases. If the method exists and returns + * a non-NULL function pointer then that function is executed as a replacement + * of the 'hold' phase method taking the object as argument. The two other phase + * methods are not executed. + * + * @child_foreach: Executes a given callback on every Resettable child. Child + * in this context means a child in the qbus tree, so the children of a qbus + * are the devices on it, and the children of a device are all the buses it + * owns. This is not the same as the QOM object hierarchy. The function takes + * additional opaque and ResetType arguments which must be passed unmodified to + * the callback. + */ +typedef void (*ResettableEnterPhase)(Object *obj, ResetType type); +typedef void (*ResettableHoldPhase)(Object *obj); +typedef void (*ResettableExitPhase)(Object *obj); +typedef ResettableState * (*ResettableGetState)(Object *obj); +typedef void (*ResettableTrFunction)(Object *obj); +typedef ResettableTrFunction (*ResettableGetTrFunction)(Object *obj); +typedef void (*ResettableChildCallback)(Object *, void *opaque, + ResetType type); +typedef void (*ResettableChildForeach)(Object *obj, + ResettableChildCallback cb, + void *opaque, ResetType type); +typedef struct ResettablePhases { + ResettableEnterPhase enter; + ResettableHoldPhase hold; + ResettableExitPhase exit; +} ResettablePhases; +struct ResettableClass { + InterfaceClass parent_class; + + /* Phase methods */ + ResettablePhases phases; + + /* State access method */ + ResettableGetState get_state; + + /* Transitional method for legacy reset compatibility */ + ResettableGetTrFunction get_transitional_function; + + /* Hierarchy handling method */ + ResettableChildForeach child_foreach; +}; + +/** + * ResettableState: + * Structure holding reset related state. The fields should not be accessed + * directly; the definition is here to allow further inclusion into other + * objects. + * + * @count: Number of reset level the object is into. It is incremented when + * the reset operation starts and decremented when it finishes. + * @hold_phase_pending: flag which indicates that we need to invoke the 'hold' + * phase handler for this object. + * @exit_phase_in_progress: true if we are currently in the exit phase + */ +struct ResettableState { + unsigned count; + bool hold_phase_pending; + bool exit_phase_in_progress; +}; + +/** + * resettable_state_clear: + * Clear the state. It puts the state to the initial (zeroed) state required + * to reuse an object. Typically used in realize step of base classes + * implementing the interface. + */ +static inline void resettable_state_clear(ResettableState *state) +{ + memset(state, 0, sizeof(ResettableState)); +} + +/** + * resettable_reset: + * Trigger a reset on an object @obj of type @type. @obj must implement + * Resettable interface. + * + * Calling this function is equivalent to calling @resettable_assert_reset() + * then @resettable_release_reset(). + */ +void resettable_reset(Object *obj, ResetType type); + +/** + * resettable_assert_reset: + * Put an object @obj into reset. @obj must implement Resettable interface. + * + * @resettable_release_reset() must eventually be called after this call. + * There must be one call to @resettable_release_reset() per call of + * @resettable_assert_reset(), with the same type argument. + * + * NOTE: Until support for migration is added, the @resettable_release_reset() + * must not be delayed. It must occur just after @resettable_assert_reset() so + * that migration cannot be triggered in between. Prefer using + * @resettable_reset() for now. + */ +void resettable_assert_reset(Object *obj, ResetType type); + +/** + * resettable_release_reset: + * Release the object @obj from reset. @obj must implement Resettable interface. + * + * See @resettable_assert_reset() description for details. + */ +void resettable_release_reset(Object *obj, ResetType type); + +/** + * resettable_is_in_reset: + * Return true if @obj is under reset. + * + * @obj must implement Resettable interface. + */ +bool resettable_is_in_reset(Object *obj); + +/** + * resettable_change_parent: + * Indicate that the parent of Ressettable @obj is changing from @oldp to @newp. + * All 3 objects must implement resettable interface. @oldp or @newp may be + * NULL. + * + * This function will adapt the reset state of @obj so that it is coherent + * with the reset state of @newp. It may trigger @resettable_assert_reset() + * or @resettable_release_reset(). It will do such things only if the reset + * state of @newp and @oldp are different. + * + * When using this function during reset, it must only be called during + * a hold phase method. Calling this during enter or exit phase is an error. + */ +void resettable_change_parent(Object *obj, Object *newp, Object *oldp); + +/** + * resettable_cold_reset_fn: + * Helper to call resettable_reset((Object *) opaque, RESET_TYPE_COLD). + * + * This function is typically useful to register a reset handler with + * qemu_register_reset. + */ +void resettable_cold_reset_fn(void *opaque); + +/** + * resettable_class_set_parent_phases: + * + * Save @rc current reset phases into @parent_phases and override @rc phases + * by the given new methods (@enter, @hold and @exit). + * Each phase is overridden only if the new one is not NULL allowing to + * override a subset of phases. + */ +void resettable_class_set_parent_phases(ResettableClass *rc, + ResettableEnterPhase enter, + ResettableHoldPhase hold, + ResettableExitPhase exit, + ResettablePhases *parent_phases); + +#endif diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h new file mode 100644 index 000000000..0b0198872 --- /dev/null +++ b/include/hw/riscv/boot.h @@ -0,0 +1,51 @@ +/* + * QEMU RISC-V Boot Helper + * + * Copyright (c) 2017 SiFive, Inc. + * Copyright (c) 2019 Alistair Francis + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RISCV_BOOT_H +#define RISCV_BOOT_H + +#include "exec/cpu-defs.h" +#include "hw/loader.h" + +bool riscv_is_32_bit(MachineState *machine); + +target_ulong riscv_calc_kernel_start_addr(MachineState *machine, + target_ulong firmware_end_addr); +target_ulong riscv_find_and_load_firmware(MachineState *machine, + const char *default_machine_firmware, + hwaddr firmware_load_addr, + symbol_fn_t sym_cb); +char *riscv_find_firmware(const char *firmware_filename); +target_ulong riscv_load_firmware(const char *firmware_filename, + hwaddr firmware_load_addr, + symbol_fn_t sym_cb); +target_ulong riscv_load_kernel(const char *kernel_filename, + target_ulong firmware_end_addr, + symbol_fn_t sym_cb); +hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size, + uint64_t kernel_entry, hwaddr *start); +uint32_t riscv_load_fdt(hwaddr dram_start, uint64_t dram_size, void *fdt); +void riscv_setup_rom_reset_vec(hwaddr saddr, hwaddr rom_base, + hwaddr rom_size, uint64_t kernel_entry, + uint32_t fdt_load_addr, void *fdt); +void riscv_rom_copy_firmware_info(hwaddr rom_base, hwaddr rom_size, + uint32_t reset_vec_size, + uint64_t kernel_entry); + +#endif /* RISCV_BOOT_H */ diff --git a/include/hw/riscv/boot_opensbi.h b/include/hw/riscv/boot_opensbi.h new file mode 100644 index 000000000..0d5ddd6c3 --- /dev/null +++ b/include/hw/riscv/boot_opensbi.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (c) 2019 Western Digital Corporation or its affiliates. + * + * Based on include/sbi/{fw_dynamic.h,sbi_scratch.h} from the OpenSBI project. + */ +#ifndef OPENSBI_H +#define OPENSBI_H + +/** Expected value of info magic ('OSBI' ascii string in hex) */ +#define FW_DYNAMIC_INFO_MAGIC_VALUE 0x4942534f + +/** Maximum supported info version */ +#define FW_DYNAMIC_INFO_VERSION 0x2 + +/** Possible next mode values */ +#define FW_DYNAMIC_INFO_NEXT_MODE_U 0x0 +#define FW_DYNAMIC_INFO_NEXT_MODE_S 0x1 +#define FW_DYNAMIC_INFO_NEXT_MODE_M 0x3 + +enum sbi_scratch_options { + /** Disable prints during boot */ + SBI_SCRATCH_NO_BOOT_PRINTS = (1 << 0), + /** Enable runtime debug prints */ + SBI_SCRATCH_DEBUG_PRINTS = (1 << 1), +}; + +/** Representation dynamic info passed by previous booting stage */ +struct fw_dynamic_info { + /** Info magic */ + target_long magic; + /** Info version */ + target_long version; + /** Next booting stage address */ + target_long next_addr; + /** Next booting stage mode */ + target_long next_mode; + /** Options for OpenSBI library */ + target_long options; + /** + * Preferred boot HART id + * + * It is possible that the previous booting stage uses same link + * address as the FW_DYNAMIC firmware. In this case, the relocation + * lottery mechanism can potentially overwrite the previous booting + * stage while other HARTs are still running in the previous booting + * stage leading to boot-time crash. To avoid this boot-time crash, + * the previous booting stage can specify last HART that will jump + * to the FW_DYNAMIC firmware as the preferred boot HART. + * + * To avoid specifying a preferred boot HART, the previous booting + * stage can set it to -1UL which will force the FW_DYNAMIC firmware + * to use the relocation lottery mechanism. + */ + target_long boot_hart; +}; + +#endif diff --git a/include/hw/riscv/microchip_pfsoc.h b/include/hw/riscv/microchip_pfsoc.h new file mode 100644 index 000000000..51d44637d --- /dev/null +++ b/include/hw/riscv/microchip_pfsoc.h @@ -0,0 +1,147 @@ +/* + * Microchip PolarFire SoC machine interface + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_MICROCHIP_PFSOC_H +#define HW_MICROCHIP_PFSOC_H + +#include "hw/char/mchp_pfsoc_mmuart.h" +#include "hw/dma/sifive_pdma.h" +#include "hw/misc/mchp_pfsoc_dmc.h" +#include "hw/misc/mchp_pfsoc_ioscb.h" +#include "hw/misc/mchp_pfsoc_sysreg.h" +#include "hw/net/cadence_gem.h" +#include "hw/sd/cadence_sdhci.h" + +typedef struct MicrochipPFSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CPUClusterState e_cluster; + CPUClusterState u_cluster; + RISCVHartArrayState e_cpus; + RISCVHartArrayState u_cpus; + DeviceState *plic; + MchpPfSoCDdrSgmiiPhyState ddr_sgmii_phy; + MchpPfSoCDdrCfgState ddr_cfg; + MchpPfSoCIoscbState ioscb; + MchpPfSoCMMUartState *serial0; + MchpPfSoCMMUartState *serial1; + MchpPfSoCMMUartState *serial2; + MchpPfSoCMMUartState *serial3; + MchpPfSoCMMUartState *serial4; + MchpPfSoCSysregState sysreg; + SiFivePDMAState dma; + CadenceGEMState gem0; + CadenceGEMState gem1; + CadenceSDHCIState sdhci; +} MicrochipPFSoCState; + +#define TYPE_MICROCHIP_PFSOC "microchip.pfsoc" +#define MICROCHIP_PFSOC(obj) \ + OBJECT_CHECK(MicrochipPFSoCState, (obj), TYPE_MICROCHIP_PFSOC) + +typedef struct MicrochipIcicleKitState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + MicrochipPFSoCState soc; +} MicrochipIcicleKitState; + +#define TYPE_MICROCHIP_ICICLE_KIT_MACHINE \ + MACHINE_TYPE_NAME("microchip-icicle-kit") +#define MICROCHIP_ICICLE_KIT_MACHINE(obj) \ + OBJECT_CHECK(MicrochipIcicleKitState, (obj), \ + TYPE_MICROCHIP_ICICLE_KIT_MACHINE) + +enum { + MICROCHIP_PFSOC_RSVD0, + MICROCHIP_PFSOC_DEBUG, + MICROCHIP_PFSOC_E51_DTIM, + MICROCHIP_PFSOC_BUSERR_UNIT0, + MICROCHIP_PFSOC_BUSERR_UNIT1, + MICROCHIP_PFSOC_BUSERR_UNIT2, + MICROCHIP_PFSOC_BUSERR_UNIT3, + MICROCHIP_PFSOC_BUSERR_UNIT4, + MICROCHIP_PFSOC_CLINT, + MICROCHIP_PFSOC_L2CC, + MICROCHIP_PFSOC_DMA, + MICROCHIP_PFSOC_L2LIM, + MICROCHIP_PFSOC_PLIC, + MICROCHIP_PFSOC_MMUART0, + MICROCHIP_PFSOC_SYSREG, + MICROCHIP_PFSOC_MPUCFG, + MICROCHIP_PFSOC_DDR_SGMII_PHY, + MICROCHIP_PFSOC_EMMC_SD, + MICROCHIP_PFSOC_DDR_CFG, + MICROCHIP_PFSOC_MMUART1, + MICROCHIP_PFSOC_MMUART2, + MICROCHIP_PFSOC_MMUART3, + MICROCHIP_PFSOC_MMUART4, + MICROCHIP_PFSOC_I2C1, + MICROCHIP_PFSOC_GEM0, + MICROCHIP_PFSOC_GEM1, + MICROCHIP_PFSOC_GPIO0, + MICROCHIP_PFSOC_GPIO1, + MICROCHIP_PFSOC_GPIO2, + MICROCHIP_PFSOC_ENVM_CFG, + MICROCHIP_PFSOC_ENVM_DATA, + MICROCHIP_PFSOC_IOSCB, + MICROCHIP_PFSOC_DRAM_LO, + MICROCHIP_PFSOC_DRAM_LO_ALIAS, + MICROCHIP_PFSOC_DRAM_HI, + MICROCHIP_PFSOC_DRAM_HI_ALIAS +}; + +enum { + MICROCHIP_PFSOC_DMA_IRQ0 = 5, + MICROCHIP_PFSOC_DMA_IRQ1 = 6, + MICROCHIP_PFSOC_DMA_IRQ2 = 7, + MICROCHIP_PFSOC_DMA_IRQ3 = 8, + MICROCHIP_PFSOC_DMA_IRQ4 = 9, + MICROCHIP_PFSOC_DMA_IRQ5 = 10, + MICROCHIP_PFSOC_DMA_IRQ6 = 11, + MICROCHIP_PFSOC_DMA_IRQ7 = 12, + MICROCHIP_PFSOC_GEM0_IRQ = 64, + MICROCHIP_PFSOC_GEM1_IRQ = 70, + MICROCHIP_PFSOC_EMMC_SD_IRQ = 88, + MICROCHIP_PFSOC_MMUART0_IRQ = 90, + MICROCHIP_PFSOC_MMUART1_IRQ = 91, + MICROCHIP_PFSOC_MMUART2_IRQ = 92, + MICROCHIP_PFSOC_MMUART3_IRQ = 93, + MICROCHIP_PFSOC_MMUART4_IRQ = 94, +}; + +#define MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT 1 +#define MICROCHIP_PFSOC_COMPUTE_CPU_COUNT 4 + +#define MICROCHIP_PFSOC_PLIC_HART_CONFIG "MS" +#define MICROCHIP_PFSOC_PLIC_NUM_SOURCES 185 +#define MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES 7 +#define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE 0x04 +#define MICROCHIP_PFSOC_PLIC_PENDING_BASE 0x1000 +#define MICROCHIP_PFSOC_PLIC_ENABLE_BASE 0x2000 +#define MICROCHIP_PFSOC_PLIC_ENABLE_STRIDE 0x80 +#define MICROCHIP_PFSOC_PLIC_CONTEXT_BASE 0x200000 +#define MICROCHIP_PFSOC_PLIC_CONTEXT_STRIDE 0x1000 + +#endif /* HW_MICROCHIP_PFSOC_H */ diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h new file mode 100644 index 000000000..fcce942ce --- /dev/null +++ b/include/hw/riscv/numa.h @@ -0,0 +1,113 @@ +/* + * QEMU RISC-V NUMA Helper + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RISCV_NUMA_H +#define RISCV_NUMA_H + +#include "hw/sysbus.h" +#include "sysemu/numa.h" + +/** + * riscv_socket_count: + * @ms: pointer to machine state + * + * Returns: number of sockets for a numa system and 1 for a non-numa system + */ +int riscv_socket_count(const MachineState *ms); + +/** + * riscv_socket_first_hartid: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: first hartid for a valid socket and -1 for an invalid socket + */ +int riscv_socket_first_hartid(const MachineState *ms, int socket_id); + +/** + * riscv_socket_last_hartid: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: last hartid for a valid socket and -1 for an invalid socket + */ +int riscv_socket_last_hartid(const MachineState *ms, int socket_id); + +/** + * riscv_socket_hart_count: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: number of harts for a valid socket and -1 for an invalid socket + */ +int riscv_socket_hart_count(const MachineState *ms, int socket_id); + +/** + * riscv_socket_mem_offset: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: offset of ram belonging to given socket + */ +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id); + +/** + * riscv_socket_mem_size: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: size of ram belonging to given socket + */ +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id); + +/** + * riscv_socket_check_hartids: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: true if hardids belonging to given socket are contiguous else false + */ +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id); + +/** + * riscv_socket_fdt_write_id: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Write NUMA node-id FDT property for given FDT node + */ +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt, + const char *node_name, int socket_id); + +/** + * riscv_socket_fdt_write_distance_matrix: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Write NUMA distance matrix in FDT for given machine + */ +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt); + +CpuInstanceProperties +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index); + +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx); + +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms); + +#endif /* RISCV_NUMA_H */ diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h new file mode 100644 index 000000000..5ff0c0f85 --- /dev/null +++ b/include/hw/riscv/opentitan.h @@ -0,0 +1,84 @@ +/* + * QEMU RISC-V Board Compatible with OpenTitan FPGA platform + * + * Copyright (c) 2020 Western Digital + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_OPENTITAN_H +#define HW_OPENTITAN_H + +#include "hw/riscv/riscv_hart.h" +#include "hw/intc/ibex_plic.h" +#include "hw/char/ibex_uart.h" +#include "qom/object.h" + +#define TYPE_RISCV_IBEX_SOC "riscv.lowrisc.ibex.soc" +OBJECT_DECLARE_SIMPLE_TYPE(LowRISCIbexSoCState, RISCV_IBEX_SOC) + +struct LowRISCIbexSoCState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + IbexPlicState plic; + IbexUartState uart; + + MemoryRegion flash_mem; + MemoryRegion rom; +}; + +typedef struct OpenTitanState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + LowRISCIbexSoCState soc; +} OpenTitanState; + +enum { + IBEX_DEV_ROM, + IBEX_DEV_RAM, + IBEX_DEV_FLASH, + IBEX_DEV_UART, + IBEX_DEV_GPIO, + IBEX_DEV_SPI, + IBEX_DEV_FLASH_CTRL, + IBEX_DEV_RV_TIMER, + IBEX_DEV_AES, + IBEX_DEV_HMAC, + IBEX_DEV_PLIC, + IBEX_DEV_PWRMGR, + IBEX_DEV_RSTMGR, + IBEX_DEV_CLKMGR, + IBEX_DEV_PINMUX, + IBEX_DEV_ALERT_HANDLER, + IBEX_DEV_NMI_GEN, + IBEX_DEV_USBDEV, + IBEX_DEV_PADCTRL, +}; + +enum { + IBEX_UART_RX_PARITY_ERR_IRQ = 0x28, + IBEX_UART_RX_TIMEOUT_IRQ = 0x27, + IBEX_UART_RX_BREAK_ERR_IRQ = 0x26, + IBEX_UART_RX_FRAME_ERR_IRQ = 0x25, + IBEX_UART_RX_OVERFLOW_IRQ = 0x24, + IBEX_UART_TX_EMPTY_IRQ = 0x23, + IBEX_UART_RX_WATERMARK_IRQ = 0x22, + IBEX_UART_TX_WATERMARK_IRQ = 0x21, +}; + +#endif diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h new file mode 100644 index 000000000..bbc21cdc9 --- /dev/null +++ b/include/hw/riscv/riscv_hart.h @@ -0,0 +1,44 @@ +/* + * QEMU RISC-V Hart Array interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * Holds the state of a heterogenous array of RISC-V harts + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RISCV_HART_H +#define HW_RISCV_HART_H + +#include "hw/sysbus.h" +#include "target/riscv/cpu.h" +#include "qom/object.h" + +#define TYPE_RISCV_HART_ARRAY "riscv.hart_array" + +OBJECT_DECLARE_SIMPLE_TYPE(RISCVHartArrayState, RISCV_HART_ARRAY) + +struct RISCVHartArrayState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + uint32_t num_harts; + uint32_t hartid_base; + char *cpu_type; + uint64_t resetvec; + RISCVCPU *harts; +}; + +#endif diff --git a/include/hw/riscv/sifive_cpu.h b/include/hw/riscv/sifive_cpu.h new file mode 100644 index 000000000..136799633 --- /dev/null +++ b/include/hw/riscv/sifive_cpu.h @@ -0,0 +1,31 @@ +/* + * SiFive CPU types + * + * Copyright (c) 2017 SiFive, Inc. + * Copyright (c) 2019 Bin Meng + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_CPU_H +#define HW_SIFIVE_CPU_H + +#if defined(TARGET_RISCV32) +#define SIFIVE_E_CPU TYPE_RISCV_CPU_SIFIVE_E31 +#define SIFIVE_U_CPU TYPE_RISCV_CPU_SIFIVE_U34 +#elif defined(TARGET_RISCV64) +#define SIFIVE_E_CPU TYPE_RISCV_CPU_SIFIVE_E51 +#define SIFIVE_U_CPU TYPE_RISCV_CPU_SIFIVE_U54 +#endif + +#endif /* HW_SIFIVE_CPU_H */ diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h new file mode 100644 index 000000000..83604da80 --- /dev/null +++ b/include/hw/riscv/sifive_e.h @@ -0,0 +1,93 @@ +/* + * SiFive E series machine interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_E_H +#define HW_SIFIVE_E_H + +#include "hw/riscv/riscv_hart.h" +#include "hw/riscv/sifive_cpu.h" +#include "hw/gpio/sifive_gpio.h" + +#define TYPE_RISCV_E_SOC "riscv.sifive.e.soc" +#define RISCV_E_SOC(obj) \ + OBJECT_CHECK(SiFiveESoCState, (obj), TYPE_RISCV_E_SOC) + +typedef struct SiFiveESoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + DeviceState *plic; + SIFIVEGPIOState gpio; + MemoryRegion xip_mem; + MemoryRegion mask_rom; +} SiFiveESoCState; + +typedef struct SiFiveEState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + SiFiveESoCState soc; + bool revb; +} SiFiveEState; + +#define TYPE_RISCV_E_MACHINE MACHINE_TYPE_NAME("sifive_e") +#define RISCV_E_MACHINE(obj) \ + OBJECT_CHECK(SiFiveEState, (obj), TYPE_RISCV_E_MACHINE) + +enum { + SIFIVE_E_DEV_DEBUG, + SIFIVE_E_DEV_MROM, + SIFIVE_E_DEV_OTP, + SIFIVE_E_DEV_CLINT, + SIFIVE_E_DEV_PLIC, + SIFIVE_E_DEV_AON, + SIFIVE_E_DEV_PRCI, + SIFIVE_E_DEV_OTP_CTRL, + SIFIVE_E_DEV_GPIO0, + SIFIVE_E_DEV_UART0, + SIFIVE_E_DEV_QSPI0, + SIFIVE_E_DEV_PWM0, + SIFIVE_E_DEV_UART1, + SIFIVE_E_DEV_QSPI1, + SIFIVE_E_DEV_PWM1, + SIFIVE_E_DEV_QSPI2, + SIFIVE_E_DEV_PWM2, + SIFIVE_E_DEV_XIP, + SIFIVE_E_DEV_DTIM +}; + +enum { + SIFIVE_E_UART0_IRQ = 3, + SIFIVE_E_UART1_IRQ = 4, + SIFIVE_E_GPIO0_IRQ0 = 8 +}; + +#define SIFIVE_E_PLIC_HART_CONFIG "M" +#define SIFIVE_E_PLIC_NUM_SOURCES 127 +#define SIFIVE_E_PLIC_NUM_PRIORITIES 7 +#define SIFIVE_E_PLIC_PRIORITY_BASE 0x04 +#define SIFIVE_E_PLIC_PENDING_BASE 0x1000 +#define SIFIVE_E_PLIC_ENABLE_BASE 0x2000 +#define SIFIVE_E_PLIC_ENABLE_STRIDE 0x80 +#define SIFIVE_E_PLIC_CONTEXT_BASE 0x200000 +#define SIFIVE_E_PLIC_CONTEXT_STRIDE 0x1000 + +#endif diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h new file mode 100644 index 000000000..a9f7b4a08 --- /dev/null +++ b/include/hw/riscv/sifive_u.h @@ -0,0 +1,150 @@ +/* + * SiFive U series machine interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SIFIVE_U_H +#define HW_SIFIVE_U_H + +#include "hw/dma/sifive_pdma.h" +#include "hw/net/cadence_gem.h" +#include "hw/riscv/riscv_hart.h" +#include "hw/riscv/sifive_cpu.h" +#include "hw/gpio/sifive_gpio.h" +#include "hw/misc/sifive_u_otp.h" +#include "hw/misc/sifive_u_prci.h" + +#define TYPE_RISCV_U_SOC "riscv.sifive.u.soc" +#define RISCV_U_SOC(obj) \ + OBJECT_CHECK(SiFiveUSoCState, (obj), TYPE_RISCV_U_SOC) + +typedef struct SiFiveUSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + CPUClusterState e_cluster; + CPUClusterState u_cluster; + RISCVHartArrayState e_cpus; + RISCVHartArrayState u_cpus; + DeviceState *plic; + SiFiveUPRCIState prci; + SIFIVEGPIOState gpio; + SiFiveUOTPState otp; + SiFivePDMAState dma; + CadenceGEMState gem; + + uint32_t serial; + char *cpu_type; +} SiFiveUSoCState; + +#define TYPE_RISCV_U_MACHINE MACHINE_TYPE_NAME("sifive_u") +#define RISCV_U_MACHINE(obj) \ + OBJECT_CHECK(SiFiveUState, (obj), TYPE_RISCV_U_MACHINE) + +typedef struct SiFiveUState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + SiFiveUSoCState soc; + + void *fdt; + int fdt_size; + + bool start_in_flash; + uint32_t msel; + uint32_t serial; +} SiFiveUState; + +enum { + SIFIVE_U_DEV_DEBUG, + SIFIVE_U_DEV_MROM, + SIFIVE_U_DEV_CLINT, + SIFIVE_U_DEV_L2CC, + SIFIVE_U_DEV_PDMA, + SIFIVE_U_DEV_L2LIM, + SIFIVE_U_DEV_PLIC, + SIFIVE_U_DEV_PRCI, + SIFIVE_U_DEV_UART0, + SIFIVE_U_DEV_UART1, + SIFIVE_U_DEV_GPIO, + SIFIVE_U_DEV_OTP, + SIFIVE_U_DEV_DMC, + SIFIVE_U_DEV_FLASH0, + SIFIVE_U_DEV_DRAM, + SIFIVE_U_DEV_GEM, + SIFIVE_U_DEV_GEM_MGMT +}; + +enum { + SIFIVE_U_L2CC_IRQ0 = 1, + SIFIVE_U_L2CC_IRQ1 = 2, + SIFIVE_U_L2CC_IRQ2 = 3, + SIFIVE_U_UART0_IRQ = 4, + SIFIVE_U_UART1_IRQ = 5, + SIFIVE_U_GPIO_IRQ0 = 7, + SIFIVE_U_GPIO_IRQ1 = 8, + SIFIVE_U_GPIO_IRQ2 = 9, + SIFIVE_U_GPIO_IRQ3 = 10, + SIFIVE_U_GPIO_IRQ4 = 11, + SIFIVE_U_GPIO_IRQ5 = 12, + SIFIVE_U_GPIO_IRQ6 = 13, + SIFIVE_U_GPIO_IRQ7 = 14, + SIFIVE_U_GPIO_IRQ8 = 15, + SIFIVE_U_GPIO_IRQ9 = 16, + SIFIVE_U_GPIO_IRQ10 = 17, + SIFIVE_U_GPIO_IRQ11 = 18, + SIFIVE_U_GPIO_IRQ12 = 19, + SIFIVE_U_GPIO_IRQ13 = 20, + SIFIVE_U_GPIO_IRQ14 = 21, + SIFIVE_U_GPIO_IRQ15 = 22, + SIFIVE_U_PDMA_IRQ0 = 23, + SIFIVE_U_PDMA_IRQ1 = 24, + SIFIVE_U_PDMA_IRQ2 = 25, + SIFIVE_U_PDMA_IRQ3 = 26, + SIFIVE_U_PDMA_IRQ4 = 27, + SIFIVE_U_PDMA_IRQ5 = 28, + SIFIVE_U_PDMA_IRQ6 = 29, + SIFIVE_U_PDMA_IRQ7 = 30, + SIFIVE_U_GEM_IRQ = 0x35 +}; + +enum { + SIFIVE_U_HFCLK_FREQ = 33333333, + SIFIVE_U_RTCCLK_FREQ = 1000000 +}; + +enum { + MSEL_MEMMAP_QSPI0_FLASH = 1, + MSEL_L2LIM_QSPI0_FLASH = 6, + MSEL_L2LIM_QSPI2_SD = 11 +}; + +#define SIFIVE_U_MANAGEMENT_CPU_COUNT 1 +#define SIFIVE_U_COMPUTE_CPU_COUNT 4 + +#define SIFIVE_U_PLIC_HART_CONFIG "MS" +#define SIFIVE_U_PLIC_NUM_SOURCES 54 +#define SIFIVE_U_PLIC_NUM_PRIORITIES 7 +#define SIFIVE_U_PLIC_PRIORITY_BASE 0x04 +#define SIFIVE_U_PLIC_PENDING_BASE 0x1000 +#define SIFIVE_U_PLIC_ENABLE_BASE 0x2000 +#define SIFIVE_U_PLIC_ENABLE_STRIDE 0x80 +#define SIFIVE_U_PLIC_CONTEXT_BASE 0x200000 +#define SIFIVE_U_PLIC_CONTEXT_STRIDE 0x1000 + +#endif diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h new file mode 100644 index 000000000..cddeca2e7 --- /dev/null +++ b/include/hw/riscv/spike.h @@ -0,0 +1,56 @@ +/* + * Spike machine interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RISCV_SPIKE_H +#define HW_RISCV_SPIKE_H + +#include "hw/riscv/riscv_hart.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define SPIKE_CPUS_MAX 8 +#define SPIKE_SOCKETS_MAX 8 + +#define TYPE_SPIKE_MACHINE MACHINE_TYPE_NAME("spike") +typedef struct SpikeState SpikeState; +DECLARE_INSTANCE_CHECKER(SpikeState, SPIKE_MACHINE, + TYPE_SPIKE_MACHINE) + +struct SpikeState { + /*< private >*/ + MachineState parent; + + /*< public >*/ + RISCVHartArrayState soc[SPIKE_SOCKETS_MAX]; + void *fdt; + int fdt_size; +}; + +enum { + SPIKE_MROM, + SPIKE_CLINT, + SPIKE_DRAM +}; + +#if defined(TARGET_RISCV32) +#define SPIKE_V1_10_0_CPU TYPE_RISCV_CPU_BASE32 +#elif defined(TARGET_RISCV64) +#define SPIKE_V1_10_0_CPU TYPE_RISCV_CPU_BASE64 +#endif + +#endif diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h new file mode 100644 index 000000000..b4ed9a32e --- /dev/null +++ b/include/hw/riscv/virt.h @@ -0,0 +1,98 @@ +/* + * QEMU RISC-V VirtIO machine interface + * + * Copyright (c) 2017 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RISCV_VIRT_H +#define HW_RISCV_VIRT_H + +#include "hw/riscv/riscv_hart.h" +#include "hw/sysbus.h" +#include "hw/block/flash.h" +#include "qom/object.h" + +#define VIRT_CPUS_MAX 8 +#define VIRT_SOCKETS_MAX 8 + +#define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +typedef struct RISCVVirtState RISCVVirtState; +DECLARE_INSTANCE_CHECKER(RISCVVirtState, RISCV_VIRT_MACHINE, + TYPE_RISCV_VIRT_MACHINE) + +struct RISCVVirtState { + /*< private >*/ + MachineState parent; + + /*< public >*/ + RISCVHartArrayState soc[VIRT_SOCKETS_MAX]; + DeviceState *plic[VIRT_SOCKETS_MAX]; + PFlashCFI01 *flash[2]; + + void *fdt; + int fdt_size; +}; + +enum { + VIRT_DEBUG, + VIRT_MROM, + VIRT_TEST, + VIRT_RTC, + VIRT_CLINT, + VIRT_PLIC, + VIRT_UART0, + VIRT_VIRTIO, + VIRT_FLASH, + VIRT_DRAM, + VIRT_PCIE_MMIO, + VIRT_PCIE_PIO, + VIRT_PCIE_ECAM +}; + +enum { + UART0_IRQ = 10, + RTC_IRQ = 11, + VIRTIO_IRQ = 1, /* 1 to 8 */ + VIRTIO_COUNT = 8, + PCIE_IRQ = 0x20, /* 32 to 35 */ + VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */ +}; + +#define VIRT_PLIC_HART_CONFIG "MS" +#define VIRT_PLIC_NUM_SOURCES 127 +#define VIRT_PLIC_NUM_PRIORITIES 7 +#define VIRT_PLIC_PRIORITY_BASE 0x04 +#define VIRT_PLIC_PENDING_BASE 0x1000 +#define VIRT_PLIC_ENABLE_BASE 0x2000 +#define VIRT_PLIC_ENABLE_STRIDE 0x80 +#define VIRT_PLIC_CONTEXT_BASE 0x200000 +#define VIRT_PLIC_CONTEXT_STRIDE 0x1000 +#define VIRT_PLIC_SIZE(__num_context) \ + (VIRT_PLIC_CONTEXT_BASE + (__num_context) * VIRT_PLIC_CONTEXT_STRIDE) + +#define FDT_PCI_ADDR_CELLS 3 +#define FDT_PCI_INT_CELLS 1 +#define FDT_PLIC_ADDR_CELLS 0 +#define FDT_PLIC_INT_CELLS 1 +#define FDT_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + 1 + \ + FDT_PLIC_ADDR_CELLS + FDT_PLIC_INT_CELLS) + +#if defined(TARGET_RISCV32) +#define VIRT_CPU TYPE_RISCV_CPU_BASE32 +#elif defined(TARGET_RISCV64) +#define VIRT_CPU TYPE_RISCV_CPU_BASE64 +#endif + +#endif diff --git a/include/hw/rtc/allwinner-rtc.h b/include/hw/rtc/allwinner-rtc.h new file mode 100644 index 000000000..bf415431c --- /dev/null +++ b/include/hw/rtc/allwinner-rtc.h @@ -0,0 +1,129 @@ +/* + * Allwinner Real Time Clock emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_MISC_ALLWINNER_RTC_H +#define HW_MISC_ALLWINNER_RTC_H + +#include "qom/object.h" +#include "hw/sysbus.h" + +/** + * Constants + * @{ + */ + +/** Highest register address used by RTC device */ +#define AW_RTC_REGS_MAXADDR (0x200) + +/** Total number of known registers */ +#define AW_RTC_REGS_NUM (AW_RTC_REGS_MAXADDR / sizeof(uint32_t)) + +/** @} */ + +/** + * Object model types + * @{ + */ + +/** Generic Allwinner RTC device (abstract) */ +#define TYPE_AW_RTC "allwinner-rtc" + +/** Allwinner RTC sun4i family (A10, A12) */ +#define TYPE_AW_RTC_SUN4I TYPE_AW_RTC "-sun4i" + +/** Allwinner RTC sun6i family and newer (A31, H2+, H3, etc) */ +#define TYPE_AW_RTC_SUN6I TYPE_AW_RTC "-sun6i" + +/** Allwinner RTC sun7i family (A20) */ +#define TYPE_AW_RTC_SUN7I TYPE_AW_RTC "-sun7i" + +/** @} */ + +/** + * Object model macros + * @{ + */ + +OBJECT_DECLARE_TYPE(AwRtcState, AwRtcClass, AW_RTC) + +/** @} */ + +/** + * Allwinner RTC per-object instance state. + */ +struct AwRtcState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + /** + * Actual year represented by the device when year counter is zero + * + * Can be overridden by the user using the corresponding 'base-year' + * property. The base year used by the target OS driver can vary, for + * example the Linux driver for sun6i uses 1970 while NetBSD uses 2000. + */ + int base_year; + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Array of hardware registers */ + uint32_t regs[AW_RTC_REGS_NUM]; + +}; + +/** + * Allwinner RTC class-level struct. + * + * This struct is filled by each sunxi device specific code + * such that the generic code can use this struct to support + * all devices. + */ +struct AwRtcClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + /** Defines device specific register map */ + const uint8_t *regmap; + + /** Size of the regmap in bytes */ + size_t regmap_size; + + /** + * Read device specific register + * + * @offset: register offset to read + * @return true if register read successful, false otherwise + */ + bool (*read)(AwRtcState *s, uint32_t offset); + + /** + * Write device specific register + * + * @offset: register offset to write + * @data: value to set in register + * @return true if register write successful, false otherwise + */ + bool (*write)(AwRtcState *s, uint32_t offset, uint32_t data); + +}; + +#endif /* HW_MISC_ALLWINNER_RTC_H */ diff --git a/include/hw/rtc/aspeed_rtc.h b/include/hw/rtc/aspeed_rtc.h new file mode 100644 index 000000000..df61e4605 --- /dev/null +++ b/include/hw/rtc/aspeed_rtc.h @@ -0,0 +1,28 @@ +/* + * ASPEED Real Time Clock + * Joel Stanley + * + * Copyright 2019 IBM Corp + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_RTC_ASPEED_RTC_H +#define HW_RTC_ASPEED_RTC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +struct AspeedRtcState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + qemu_irq irq; + + uint32_t reg[0x18]; + int offset; + +}; + +#define TYPE_ASPEED_RTC "aspeed.rtc" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedRtcState, ASPEED_RTC) + +#endif /* HW_RTC_ASPEED_RTC_H */ diff --git a/include/hw/rtc/goldfish_rtc.h b/include/hw/rtc/goldfish_rtc.h new file mode 100644 index 000000000..79ca7daf5 --- /dev/null +++ b/include/hw/rtc/goldfish_rtc.h @@ -0,0 +1,47 @@ +/* + * Goldfish virtual platform RTC + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * For more details on Google Goldfish virtual platform refer: + * https://android.googlesource.com/platform/external/qemu/+/master/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RTC_GOLDFISH_RTC_H +#define HW_RTC_GOLDFISH_RTC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_GOLDFISH_RTC "goldfish_rtc" +OBJECT_DECLARE_SIMPLE_TYPE(GoldfishRTCState, GOLDFISH_RTC) + +struct GoldfishRTCState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + QEMUTimer *timer; + qemu_irq irq; + + uint64_t tick_offset; + uint64_t tick_offset_vmstate; + uint64_t alarm_next; + uint32_t alarm_running; + uint32_t irq_pending; + uint32_t irq_enabled; + uint32_t time_high; +}; + +#endif diff --git a/include/hw/rtc/m48t59.h b/include/hw/rtc/m48t59.h new file mode 100644 index 000000000..d9b45eb16 --- /dev/null +++ b/include/hw/rtc/m48t59.h @@ -0,0 +1,50 @@ +/* + * QEMU M48T59 and M48T08 NVRAM emulation + * + * Copyright (c) 2003-2005, 2007 Jocelyn Mayer + * Copyright (c) 2013 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_RTC_M48T59_H +#define HW_RTC_M48T59_H + +#include "exec/hwaddr.h" +#include "qom/object.h" + +#define TYPE_NVRAM "nvram" + +typedef struct NvramClass NvramClass; +DECLARE_CLASS_CHECKERS(NvramClass, NVRAM, + TYPE_NVRAM) +#define NVRAM(obj) \ + INTERFACE_CHECK(Nvram, (obj), TYPE_NVRAM) + +typedef struct Nvram Nvram; + +struct NvramClass { + InterfaceClass parent; + + uint32_t (*read)(Nvram *obj, uint32_t addr); + void (*write)(Nvram *obj, uint32_t addr, uint32_t val); + void (*toggle_lock)(Nvram *obj, int lock); +}; + +#endif /* HW_M48T59_H */ diff --git a/include/hw/rtc/mc146818rtc.h b/include/hw/rtc/mc146818rtc.h new file mode 100644 index 000000000..5b45b2292 --- /dev/null +++ b/include/hw/rtc/mc146818rtc.h @@ -0,0 +1,58 @@ +/* + * QEMU MC146818 RTC emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ + +#ifndef HW_RTC_MC146818RTC_H +#define HW_RTC_MC146818RTC_H + +#include "qapi/qapi-types-machine.h" +#include "qemu/queue.h" +#include "qemu/timer.h" +#include "hw/isa/isa.h" +#include "qom/object.h" + +#define TYPE_MC146818_RTC "mc146818rtc" +OBJECT_DECLARE_SIMPLE_TYPE(RTCState, MC146818_RTC) + +struct RTCState { + ISADevice parent_obj; + + MemoryRegion io; + MemoryRegion coalesced_io; + uint8_t cmos_data[128]; + uint8_t cmos_index; + int32_t base_year; + uint64_t base_rtc; + uint64_t last_update; + int64_t offset; + qemu_irq irq; + int it_shift; + /* periodic timer */ + QEMUTimer *periodic_timer; + int64_t next_periodic_time; + /* update-ended timer */ + QEMUTimer *update_timer; + uint64_t next_alarm_time; + uint16_t irq_reinject_on_ack_count; + uint32_t irq_coalesced; + uint32_t period; + QEMUTimer *coalesced_timer; + Notifier clock_reset_notifier; + LostTickPolicy lost_tick_policy; + Notifier suspend_notifier; + QLIST_ENTRY(RTCState) link; +}; + +#define RTC_ISA_IRQ 8 +#define RTC_ISA_BASE 0x70 + +ISADevice *mc146818_rtc_init(ISABus *bus, int base_year, + qemu_irq intercept_irq); +void rtc_set_memory(ISADevice *dev, int addr, int val); +int rtc_get_memory(ISADevice *dev, int addr); + +#endif /* MC146818RTC_H */ diff --git a/include/hw/rtc/mc146818rtc_regs.h b/include/hw/rtc/mc146818rtc_regs.h new file mode 100644 index 000000000..12197e055 --- /dev/null +++ b/include/hw/rtc/mc146818rtc_regs.h @@ -0,0 +1,89 @@ +/* + * QEMU MC146818 RTC emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_RTC_MC146818RTC_REGS_H +#define HW_RTC_MC146818RTC_REGS_H + +#include "qemu/timer.h" +#include "qemu/host-utils.h" + +#define RTC_SECONDS 0 +#define RTC_SECONDS_ALARM 1 +#define RTC_MINUTES 2 +#define RTC_MINUTES_ALARM 3 +#define RTC_HOURS 4 +#define RTC_HOURS_ALARM 5 +#define RTC_ALARM_DONT_CARE 0xC0 + +#define RTC_DAY_OF_WEEK 6 +#define RTC_DAY_OF_MONTH 7 +#define RTC_MONTH 8 +#define RTC_YEAR 9 + +#define RTC_REG_A 10 +#define RTC_REG_B 11 +#define RTC_REG_C 12 +#define RTC_REG_D 13 + +/* PC cmos mappings */ +#define RTC_CENTURY 0x32 +#define RTC_IBM_PS2_CENTURY_BYTE 0x37 + +#define REG_A_UIP 0x80 + +#define REG_B_SET 0x80 +#define REG_B_PIE 0x40 +#define REG_B_AIE 0x20 +#define REG_B_UIE 0x10 +#define REG_B_SQWE 0x08 +#define REG_B_DM 0x04 +#define REG_B_24H 0x02 + +#define REG_C_UF 0x10 +#define REG_C_IRQF 0x80 +#define REG_C_PF 0x40 +#define REG_C_AF 0x20 +#define REG_C_MASK 0x70 + +static inline uint32_t periodic_period_to_clock(int period_code) +{ + if (!period_code) { + return 0; + } + + if (period_code <= 2) { + period_code += 7; + } + /* period in 32 Khz cycles */ + return 1 << (period_code - 1); +} + +#define RTC_CLOCK_RATE 32768 + +static inline int64_t periodic_clock_to_ns(int64_t clocks) +{ + return muldiv64(clocks, NANOSECONDS_PER_SECOND, RTC_CLOCK_RATE); +} + +#endif diff --git a/include/hw/rtc/pl031.h b/include/hw/rtc/pl031.h new file mode 100644 index 000000000..9fd4be1ab --- /dev/null +++ b/include/hw/rtc/pl031.h @@ -0,0 +1,48 @@ +/* + * ARM AMBA PrimeCell PL031 RTC + * + * Copyright (c) 2007 CodeSourcery + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_RTC_PL031_H +#define HW_RTC_PL031_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" + +#define TYPE_PL031 "pl031" +OBJECT_DECLARE_SIMPLE_TYPE(PL031State, PL031) + +struct PL031State { + SysBusDevice parent_obj; + + MemoryRegion iomem; + QEMUTimer *timer; + qemu_irq irq; + + /* + * Needed to preserve the tick_count across migration, even if the + * absolute value of the rtc_clock is different on the source and + * destination. + */ + uint32_t tick_offset_vmstate; + uint32_t tick_offset; + bool tick_offset_migrated; + bool migrate_tick_offset; + + uint32_t mr; + uint32_t lr; + uint32_t cr; + uint32_t im; + uint32_t is; +}; + +#endif diff --git a/include/hw/rtc/sun4v-rtc.h b/include/hw/rtc/sun4v-rtc.h new file mode 100644 index 000000000..fd868f6ed --- /dev/null +++ b/include/hw/rtc/sun4v-rtc.h @@ -0,0 +1,19 @@ +/* + * QEMU sun4v Real Time Clock device + * + * The sun4v_rtc device (sun4v tod clock) + * + * Copyright (c) 2016 Artyom Tarasenko + * + * This code is licensed under the GNU GPL v3 or (at your option) any later + * version. + */ + +#ifndef HW_RTC_SUN4V +#define HW_RTC_SUN4V + +#include "exec/hwaddr.h" + +void sun4v_rtc_init(hwaddr addr); + +#endif diff --git a/include/hw/rtc/xlnx-zynqmp-rtc.h b/include/hw/rtc/xlnx-zynqmp-rtc.h new file mode 100644 index 000000000..5f1ad0a94 --- /dev/null +++ b/include/hw/rtc/xlnx-zynqmp-rtc.h @@ -0,0 +1,92 @@ +/* + * QEMU model of the Xilinx ZynqMP Real Time Clock (RTC). + * + * Copyright (c) 2017 Xilinx Inc. + * + * Written-by: Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_RTC_XLNX_ZYNQMP_H +#define HW_RTC_XLNX_ZYNQMP_H + +#include "hw/register.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_XLNX_ZYNQMP_RTC "xlnx-zynmp.rtc" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPRTC, XLNX_ZYNQMP_RTC) + +REG32(SET_TIME_WRITE, 0x0) +REG32(SET_TIME_READ, 0x4) +REG32(CALIB_WRITE, 0x8) + FIELD(CALIB_WRITE, FRACTION_EN, 20, 1) + FIELD(CALIB_WRITE, FRACTION_DATA, 16, 4) + FIELD(CALIB_WRITE, MAX_TICK, 0, 16) +REG32(CALIB_READ, 0xc) + FIELD(CALIB_READ, FRACTION_EN, 20, 1) + FIELD(CALIB_READ, FRACTION_DATA, 16, 4) + FIELD(CALIB_READ, MAX_TICK, 0, 16) +REG32(CURRENT_TIME, 0x10) +REG32(CURRENT_TICK, 0x14) + FIELD(CURRENT_TICK, VALUE, 0, 16) +REG32(ALARM, 0x18) +REG32(RTC_INT_STATUS, 0x20) + FIELD(RTC_INT_STATUS, ALARM, 1, 1) + FIELD(RTC_INT_STATUS, SECONDS, 0, 1) +REG32(RTC_INT_MASK, 0x24) + FIELD(RTC_INT_MASK, ALARM, 1, 1) + FIELD(RTC_INT_MASK, SECONDS, 0, 1) +REG32(RTC_INT_EN, 0x28) + FIELD(RTC_INT_EN, ALARM, 1, 1) + FIELD(RTC_INT_EN, SECONDS, 0, 1) +REG32(RTC_INT_DIS, 0x2c) + FIELD(RTC_INT_DIS, ALARM, 1, 1) + FIELD(RTC_INT_DIS, SECONDS, 0, 1) +REG32(ADDR_ERROR, 0x30) + FIELD(ADDR_ERROR, STATUS, 0, 1) +REG32(ADDR_ERROR_INT_MASK, 0x34) + FIELD(ADDR_ERROR_INT_MASK, MASK, 0, 1) +REG32(ADDR_ERROR_INT_EN, 0x38) + FIELD(ADDR_ERROR_INT_EN, MASK, 0, 1) +REG32(ADDR_ERROR_INT_DIS, 0x3c) + FIELD(ADDR_ERROR_INT_DIS, MASK, 0, 1) +REG32(CONTROL, 0x40) + FIELD(CONTROL, BATTERY_DISABLE, 31, 1) + FIELD(CONTROL, OSC_CNTRL, 24, 4) + FIELD(CONTROL, SLVERR_ENABLE, 0, 1) +REG32(SAFETY_CHK, 0x50) + +#define XLNX_ZYNQMP_RTC_R_MAX (R_SAFETY_CHK + 1) + +struct XlnxZynqMPRTC { + SysBusDevice parent_obj; + MemoryRegion iomem; + qemu_irq irq_rtc_int; + qemu_irq irq_addr_error_int; + + uint32_t tick_offset; + + uint32_t regs[XLNX_ZYNQMP_RTC_R_MAX]; + RegisterInfo regs_info[XLNX_ZYNQMP_RTC_R_MAX]; +}; + +#endif diff --git a/include/hw/rx/rx62n.h b/include/hw/rx/rx62n.h new file mode 100644 index 000000000..3ed80dba0 --- /dev/null +++ b/include/hw/rx/rx62n.h @@ -0,0 +1,79 @@ +/* + * RX62N MCU Object + * + * Datasheet: RX62N Group, RX621 Group User's Manual: Hardware + * (Rev.1.40 R01UH0033EJ0140) + * + * Copyright (c) 2019 Yoshinori Sato + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RX_RX62N_MCU_H +#define HW_RX_RX62N_MCU_H + +#include "target/rx/cpu.h" +#include "hw/intc/rx_icu.h" +#include "hw/timer/renesas_tmr.h" +#include "hw/timer/renesas_cmt.h" +#include "hw/char/renesas_sci.h" +#include "qemu/units.h" +#include "qom/object.h" + +#define TYPE_RX62N_MCU "rx62n-mcu" +typedef struct RX62NState RX62NState; +DECLARE_INSTANCE_CHECKER(RX62NState, RX62N_MCU, + TYPE_RX62N_MCU) + +#define TYPE_R5F562N7_MCU "r5f562n7-mcu" +#define TYPE_R5F562N8_MCU "r5f562n8-mcu" + +#define EXT_CS_BASE 0x01000000 +#define VECTOR_TABLE_BASE 0xffffff80 +#define RX62N_CFLASH_BASE 0xfff80000 + +#define RX62N_NR_TMR 2 +#define RX62N_NR_CMT 2 +#define RX62N_NR_SCI 6 + +struct RX62NState { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + RXCPU cpu; + RXICUState icu; + RTMRState tmr[RX62N_NR_TMR]; + RCMTState cmt[RX62N_NR_CMT]; + RSCIState sci[RX62N_NR_SCI]; + + MemoryRegion *sysmem; + bool kernel; + + MemoryRegion iram; + MemoryRegion iomem1; + MemoryRegion d_flash; + MemoryRegion iomem2; + MemoryRegion iomem3; + MemoryRegion c_flash; + qemu_irq irq[NR_IRQS]; + + /* Input Clock (XTAL) frequency */ + uint32_t xtal_freq_hz; + /* Peripheral Module Clock frequency */ + uint32_t pclk_freq_hz; +}; + +#endif diff --git a/include/hw/s390x/3270-ccw.h b/include/hw/s390x/3270-ccw.h new file mode 100644 index 000000000..143988229 --- /dev/null +++ b/include/hw/s390x/3270-ccw.h @@ -0,0 +1,48 @@ +/* + * Emulated ccw-attached 3270 definitions + * + * Copyright 2017 IBM Corp. + * Author(s): Yang Chen + * Jing Liu + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_3270_CCW_H +#define HW_S390X_3270_CCW_H + +#include "hw/sysbus.h" +#include "hw/s390x/css.h" +#include "hw/s390x/ccw-device.h" +#include "qom/object.h" + +#define EMULATED_CCW_3270_CU_TYPE 0x3270 +#define EMULATED_CCW_3270_CHPID_TYPE 0x1a + +#define TYPE_EMULATED_CCW_3270 "emulated-ccw-3270" + +/* Local Channel Commands */ +#define TC_WRITE 0x01 /* Write */ +#define TC_RDBUF 0x02 /* Read buffer */ +#define TC_EWRITE 0x05 /* Erase write */ +#define TC_READMOD 0x06 /* Read modified */ +#define TC_EWRITEA 0x0d /* Erase write alternate */ +#define TC_WRITESF 0x11 /* Write structured field */ + +OBJECT_DECLARE_TYPE(EmulatedCcw3270Device, EmulatedCcw3270Class, EMULATED_CCW_3270) + +struct EmulatedCcw3270Device { + CcwDevice parent_obj; +}; + +struct EmulatedCcw3270Class { + CCWDeviceClass parent_class; + + void (*init)(EmulatedCcw3270Device *, Error **); + int (*read_payload_3270)(EmulatedCcw3270Device *); + int (*write_payload_3270)(EmulatedCcw3270Device *, uint8_t); +}; + +#endif diff --git a/include/hw/s390x/adapter.h b/include/hw/s390x/adapter.h new file mode 100644 index 000000000..7f1703508 --- /dev/null +++ b/include/hw/s390x/adapter.h @@ -0,0 +1,23 @@ +/* + * s390 adapter definitions + * + * Copyright 2013,2014 IBM Corp. + * Author(s): Cornelia Huck + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef S390X_ADAPTER_H +#define S390X_ADAPTER_H + +struct AdapterInfo { + uint64_t ind_addr; + uint64_t summary_addr; + uint64_t ind_offset; + uint32_t summary_offset; + uint32_t adapter_id; +}; + +#endif diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h new file mode 100644 index 000000000..470e439a9 --- /dev/null +++ b/include/hw/s390x/ap-bridge.h @@ -0,0 +1,19 @@ +/* + * ap bridge + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_AP_BRIDGE_H +#define HW_S390X_AP_BRIDGE_H + +#define TYPE_AP_BRIDGE "ap-bridge" +#define TYPE_AP_BUS "ap-bus" + +void s390_init_ap(void); + +#endif diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h new file mode 100644 index 000000000..e502745de --- /dev/null +++ b/include/hw/s390x/ap-device.h @@ -0,0 +1,27 @@ +/* + * Adjunct Processor (AP) matrix device interfaces + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_AP_DEVICE_H +#define HW_S390X_AP_DEVICE_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define TYPE_AP_DEVICE "ap-device" + +struct APDevice { + DeviceState parent_obj; +}; +typedef struct APDevice APDevice; + +DECLARE_INSTANCE_CHECKER(APDevice, AP_DEVICE, + TYPE_AP_DEVICE) + +#endif /* HW_S390X_AP_DEVICE_H */ diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h new file mode 100644 index 000000000..deb606d71 --- /dev/null +++ b/include/hw/s390x/css-bridge.h @@ -0,0 +1,37 @@ +/* + * virtual css bridge definition + * + * Copyright 2012,2016 IBM Corp. + * Author(s): Cornelia Huck + * Pierre Morel + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_CSS_BRIDGE_H +#define HW_S390X_CSS_BRIDGE_H + +#include "qom/object.h" +#include "hw/sysbus.h" + +/* virtual css bridge */ +struct VirtualCssBridge { + SysBusDevice sysbus_dev; + bool css_dev_path; +}; + +#define TYPE_VIRTUAL_CSS_BRIDGE "virtual-css-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(VirtualCssBridge, VIRTUAL_CSS_BRIDGE) + +/* virtual css bus type */ +struct VirtualCssBus { + BusState parent_obj; +}; + +#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus" +OBJECT_DECLARE_SIMPLE_TYPE(VirtualCssBus, VIRTUAL_CSS_BUS) +VirtualCssBus *virtual_css_bus_init(void); + +#endif diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h new file mode 100644 index 000000000..785866630 --- /dev/null +++ b/include/hw/s390x/css.h @@ -0,0 +1,330 @@ +/* + * Channel subsystem structures and definitions. + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef CSS_H +#define CSS_H + +#include "hw/s390x/adapter.h" +#include "hw/s390x/s390_flic.h" +#include "hw/s390x/ioinst.h" +#include "sysemu/kvm.h" +#include "target/s390x/cpu-qom.h" + +/* Channel subsystem constants. */ +#define MAX_DEVNO 65535 +#define MAX_SCHID 65535 +#define MAX_SSID 3 +#define MAX_CSSID 255 +#define MAX_CHPID 255 + +#define MAX_ISC 7 + +#define MAX_CIWS 62 + +#define VIRTUAL_CSSID 0xfe +#define VIRTIO_CCW_CHPID 0 /* used by convention */ + +typedef struct CIW { + uint8_t type; + uint8_t command; + uint16_t count; +} QEMU_PACKED CIW; + +typedef struct SenseId { + /* common part */ + uint8_t reserved; /* always 0x'FF' */ + uint16_t cu_type; /* control unit type */ + uint8_t cu_model; /* control unit model */ + uint16_t dev_type; /* device type */ + uint8_t dev_model; /* device model */ + uint8_t unused; /* padding byte */ + /* extended part */ + CIW ciw[MAX_CIWS]; /* variable # of CIWs */ +} SenseId; /* Note: No QEMU_PACKED due to unaligned members */ + +/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */ +typedef struct CMB { + uint16_t ssch_rsch_count; + uint16_t sample_count; + uint32_t device_connect_time; + uint32_t function_pending_time; + uint32_t device_disconnect_time; + uint32_t control_unit_queuing_time; + uint32_t device_active_only_time; + uint32_t reserved[2]; +} QEMU_PACKED CMB; + +typedef struct CMBE { + uint32_t ssch_rsch_count; + uint32_t sample_count; + uint32_t device_connect_time; + uint32_t function_pending_time; + uint32_t device_disconnect_time; + uint32_t control_unit_queuing_time; + uint32_t device_active_only_time; + uint32_t device_busy_time; + uint32_t initial_command_response_time; + uint32_t reserved[7]; +} QEMU_PACKED CMBE; + +typedef enum CcwDataStreamOp { + CDS_OP_R = 0, /* read, false when used as is_write */ + CDS_OP_W = 1, /* write, true when used as is_write */ + CDS_OP_A = 2 /* advance, should not be used as is_write */ +} CcwDataStreamOp; + +/* normal usage is via SuchchDev.cds instead of instantiating */ +typedef struct CcwDataStream { +#define CDS_F_IDA 0x01 +#define CDS_F_MIDA 0x02 +#define CDS_F_I2K 0x04 +#define CDS_F_C64 0x08 +#define CDS_F_FMT 0x10 /* CCW format-1 */ +#define CDS_F_STREAM_BROKEN 0x80 + uint8_t flags; + uint8_t at_idaw; + uint16_t at_byte; + uint16_t count; + uint32_t cda_orig; + int (*op_handler)(struct CcwDataStream *cds, void *buff, int len, + CcwDataStreamOp op); + hwaddr cda; + bool do_skip; +} CcwDataStream; + +/* + * IO instructions conclude according to this. Currently we have only + * cc codes. Valid values are 0, 1, 2, 3 and the generic semantic for + * IO instructions is described briefly. For more details consult the PoP. + */ +typedef enum IOInstEnding { + /* produced expected result */ + IOINST_CC_EXPECTED = 0, + /* status conditions were present or produced alternate result */ + IOINST_CC_STATUS_PRESENT = 1, + /* inst. ineffective because busy with previously initiated function */ + IOINST_CC_BUSY = 2, + /* inst. ineffective because not operational */ + IOINST_CC_NOT_OPERATIONAL = 3 +} IOInstEnding; + +typedef struct SubchDev SubchDev; +struct SubchDev { + /* channel-subsystem related things: */ + SCHIB curr_status; /* Needs alignment and thus must come first */ + ORB orb; + uint8_t cssid; + uint8_t ssid; + uint16_t schid; + uint16_t devno; + uint8_t sense_data[32]; + hwaddr channel_prog; + CCW1 last_cmd; + bool last_cmd_valid; + bool ccw_fmt_1; + bool thinint_active; + uint8_t ccw_no_data_cnt; + uint16_t migrated_schid; /* used for missmatch detection */ + CcwDataStream cds; + /* transport-provided data: */ + int (*ccw_cb) (SubchDev *, CCW1); + void (*disable_cb)(SubchDev *); + IOInstEnding (*do_subchannel_work) (SubchDev *); + SenseId id; + void *driver_data; +}; + +static inline void sch_gen_unit_exception(SubchDev *sch) +{ + sch->curr_status.scsw.ctrl &= ~SCSW_ACTL_START_PEND; + sch->curr_status.scsw.ctrl |= SCSW_STCTL_PRIMARY | + SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | + SCSW_STCTL_STATUS_PEND; + sch->curr_status.scsw.cpa = sch->channel_prog + 8; + sch->curr_status.scsw.dstat = SCSW_DSTAT_UNIT_EXCEP; +} + +extern const VMStateDescription vmstate_subch_dev; + +/* + * Identify a device within the channel subsystem. + * Note that this can be used to identify either the subchannel or + * the attached I/O device, as there's always one I/O device per + * subchannel. + */ +typedef struct CssDevId { + uint8_t cssid; + uint8_t ssid; + uint16_t devid; + bool valid; +} CssDevId; + +extern const PropertyInfo css_devid_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) + +typedef struct IndAddr { + hwaddr addr; + uint64_t map; + unsigned long refcnt; + int32_t len; + QTAILQ_ENTRY(IndAddr) sibling; +} IndAddr; + +extern const VMStateDescription vmstate_ind_addr; + +#define VMSTATE_PTR_TO_IND_ADDR(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 1, vmstate_ind_addr, IndAddr*) + +IndAddr *get_indicator(hwaddr ind_addr, int len); +void release_indicator(AdapterInfo *adapter, IndAddr *indicator); +int map_indicator(AdapterInfo *adapter, IndAddr *indicator); + +typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid); +int css_create_css_image(uint8_t cssid, bool default_image); +bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno); +void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, + uint16_t devno, SubchDev *sch); +void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type); +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); +unsigned int css_find_free_chpid(uint8_t cssid); +uint16_t css_build_subchannel_id(SubchDev *sch); +void copy_scsw_to_guest(SCSW *dest, const SCSW *src); +void css_inject_io_interrupt(SubchDev *sch); +void css_reset(void); +void css_reset_sch(SubchDev *sch); +void css_crw_add_to_queue(CRW crw); +void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, + int chain, uint16_t rsid); +void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, + int hotplugged, int add); +void css_generate_chp_crws(uint8_t cssid, uint8_t chpid); +void css_generate_css_crws(uint8_t cssid); +void css_clear_sei_pending(void); +IOInstEnding s390_ccw_cmd_request(SubchDev *sch); +IOInstEnding do_subchannel_work_virtual(SubchDev *sub); +IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); + +int s390_ccw_halt(SubchDev *sch); +int s390_ccw_clear(SubchDev *sch); +IOInstEnding s390_ccw_store(SubchDev *sch); + +typedef enum { + CSS_IO_ADAPTER_VIRTIO = 0, + CSS_IO_ADAPTER_PCI = 1, + CSS_IO_ADAPTER_TYPE_NUMS, +} CssIoAdapterType; + +void css_adapter_interrupt(CssIoAdapterType type, uint8_t isc); +int css_do_sic(CPUS390XState *env, uint8_t isc, uint16_t mode); +uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc); +void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable, + uint8_t flags, Error **errp); + +#ifndef CONFIG_USER_ONLY +SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid); +bool css_subch_visible(SubchDev *sch); +void css_conditional_io_interrupt(SubchDev *sch); +IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib); +bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid); +IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib); +IOInstEnding css_do_xsch(SubchDev *sch); +IOInstEnding css_do_csch(SubchDev *sch); +IOInstEnding css_do_hsch(SubchDev *sch); +IOInstEnding css_do_ssch(SubchDev *sch, ORB *orb); +int css_do_tsch_get_irb(SubchDev *sch, IRB *irb, int *irb_len); +void css_do_tsch_update_subch(SubchDev *sch); +int css_do_stcrw(CRW *crw); +void css_undo_stcrw(CRW *crw); +int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid, + int rfmt, void *buf); +void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo); +int css_enable_mcsse(void); +int css_enable_mss(void); +IOInstEnding css_do_rsch(SubchDev *sch); +int css_do_rchp(uint8_t cssid, uint8_t chpid); +bool css_present(uint8_t cssid); +#endif + +extern const PropertyInfo css_devid_ro_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID_RO(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_ro_propinfo, CssDevId) + +/** + * Create a subchannel for the given bus id. + * + * If @p bus_id is valid, verify that it is not already in use, and find a + * free devno for it. + * If @p bus_id is not valid find a free subchannel id and device number + * across all subchannel sets and all css images starting from the default + * css image. + * + * If either of the former actions succeed, allocate a subchannel structure, + * initialise it with the bus id, subchannel id and device number, register + * it with the CSS and return it. Otherwise return NULL. + * + * The caller becomes owner of the returned subchannel structure and + * is responsible for unregistering and freeing it. + */ +SubchDev *css_create_sch(CssDevId bus_id, Error **errp); + +/** Turn on css migration */ +void css_register_vmstate(void); + + +void ccw_dstream_init(CcwDataStream *cds, CCW1 const *ccw, ORB const *orb); + +static inline void ccw_dstream_rewind(CcwDataStream *cds) +{ + cds->at_byte = 0; + cds->at_idaw = 0; + cds->cda = cds->cda_orig; +} + +static inline bool ccw_dstream_good(CcwDataStream *cds) +{ + return !(cds->flags & CDS_F_STREAM_BROKEN); +} + +static inline uint16_t ccw_dstream_residual_count(CcwDataStream *cds) +{ + return cds->count - cds->at_byte; +} + +static inline uint16_t ccw_dstream_avail(CcwDataStream *cds) +{ + return ccw_dstream_good(cds) ? ccw_dstream_residual_count(cds) : 0; +} + +static inline int ccw_dstream_advance(CcwDataStream *cds, int len) +{ + return cds->op_handler(cds, NULL, len, CDS_OP_A); +} + +static inline int ccw_dstream_write_buf(CcwDataStream *cds, void *buff, int len) +{ + return cds->op_handler(cds, buff, len, CDS_OP_W); +} + +static inline int ccw_dstream_read_buf(CcwDataStream *cds, void *buff, int len) +{ + return cds->op_handler(cds, buff, len, CDS_OP_R); +} + +#define ccw_dstream_read(cds, v) ccw_dstream_read_buf((cds), &(v), sizeof(v)) +#define ccw_dstream_write(cds, v) ccw_dstream_write_buf((cds), &(v), sizeof(v)) + +#endif diff --git a/include/hw/s390x/ebcdic.h b/include/hw/s390x/ebcdic.h new file mode 100644 index 000000000..69a04cab6 --- /dev/null +++ b/include/hw/s390x/ebcdic.h @@ -0,0 +1,104 @@ +/* + * EBCDIC/ASCII conversion Support + * + * Copyright (c) 2011 Alexander Graf + * Copyright IBM, Corp. 2013 + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#ifndef EBCDIC_H +#define EBCDIC_H + +/* EBCDIC handling */ +static const uint8_t ebcdic2ascii[] = { + 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F, + 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07, + 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07, + 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, + 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, + 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, + 0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21, + 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, + 0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E, + 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, + 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, + 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, + 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, + 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, + 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, + 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, + 0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, + 0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07, + 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, + 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, + 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, + 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07, +}; + +static const uint8_t ascii2ebcdic[] = { + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, + 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, + 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F, + 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, + 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, + 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, + 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, + 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, + 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF +}; + +static inline void ebcdic_put(uint8_t *p, const char *ascii, int len) +{ + int i; + + for (i = 0; i < len; i++) { + p[i] = ascii2ebcdic[(uint8_t)ascii[i]]; + } +} + +static inline void ascii_put(uint8_t *p, const char *ebcdic, int len) +{ + int i; + + for (i = 0; i < len; i++) { + p[i] = ebcdic2ascii[(uint8_t)ebcdic[i]]; + } +} + +#endif /* EBCDIC_H */ diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h new file mode 100644 index 000000000..3ffd575d8 --- /dev/null +++ b/include/hw/s390x/event-facility.h @@ -0,0 +1,208 @@ +/* + * SCLP + * Event Facility definitions + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Heinz Graalfs + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_S390_SCLP_EVENT_FACILITY_H +#define HW_S390_SCLP_EVENT_FACILITY_H + +#include "qemu/thread.h" +#include "hw/qdev-core.h" +#include "hw/s390x/sclp.h" +#include "qom/object.h" + +/* SCLP event types */ +#define SCLP_EVENT_OPRTNS_COMMAND 0x01 +#define SCLP_EVENT_MESSAGE 0x02 +#define SCLP_EVENT_CONFIG_MGT_DATA 0x04 +#define SCLP_EVENT_PMSGCMD 0x09 +#define SCLP_EVENT_ASCII_CONSOLE_DATA 0x1a +#define SCLP_EVENT_SIGNAL_QUIESCE 0x1d + +/* SCLP event masks */ +#define SCLP_EVMASK(T) (1ULL << (sizeof(sccb_mask_t) * 8 - (T))) + +#define SCLP_EVENT_MASK_OP_CMD SCLP_EVMASK(SCLP_EVENT_OPRTNS_COMMAND) +#define SCLP_EVENT_MASK_MSG SCLP_EVMASK(SCLP_EVENT_MESSAGE) +#define SCLP_EVENT_MASK_CONFIG_MGT_DATA SCLP_EVMASK(SCLP_EVENT_CONFIG_MGT_DATA) +#define SCLP_EVENT_MASK_PMSGCMD SCLP_EVMASK(SCLP_EVENT_PMSGCMD) +#define SCLP_EVENT_MASK_MSG_ASCII SCLP_EVMASK(SCLP_EVENT_ASCII_CONSOLE_DATA) +#define SCLP_EVENT_MASK_SIGNAL_QUIESCE SCLP_EVMASK(SCLP_EVENT_SIGNAL_QUIESCE) + +#define SCLP_UNCONDITIONAL_READ 0x00 +#define SCLP_SELECTIVE_READ 0x01 + +#define TYPE_SCLP_EVENT "s390-sclp-event-type" +OBJECT_DECLARE_TYPE(SCLPEvent, SCLPEventClass, + SCLP_EVENT) + +#define TYPE_SCLP_CPU_HOTPLUG "sclp-cpu-hotplug" +#define TYPE_SCLP_QUIESCE "sclpquiesce" + +#define SCLP_EVENT_MASK_LEN_MAX 1021 + +typedef struct WriteEventMask { + SCCBHeader h; + uint16_t _reserved; + uint16_t mask_length; + uint8_t masks[]; +/* + * Layout of the masks is + * uint8_t cp_receive_mask[mask_length]; + * uint8_t cp_send_mask[mask_length]; + * uint8_t receive_mask[mask_length]; + * uint8_t send_mask[mask_length]; + * where 1 <= mask_length <= SCLP_EVENT_MASK_LEN_MAX + */ +} QEMU_PACKED WriteEventMask; + +#define WEM_CP_RECEIVE_MASK(wem, mask_len) ((wem)->masks) +#define WEM_CP_SEND_MASK(wem, mask_len) ((wem)->masks + (mask_len)) +#define WEM_RECEIVE_MASK(wem, mask_len) ((wem)->masks + 2 * (mask_len)) +#define WEM_SEND_MASK(wem, mask_len) ((wem)->masks + 3 * (mask_len)) + +typedef uint64_t sccb_mask_t; + +typedef struct EventBufferHeader { + uint16_t length; + uint8_t type; + uint8_t flags; + uint16_t _reserved; +} QEMU_PACKED EventBufferHeader; + +typedef struct MdbHeader { + uint16_t length; + uint16_t type; + uint32_t tag; + uint32_t revision_code; +} QEMU_PACKED MdbHeader; + +typedef struct MTO { + uint16_t line_type_flags; + uint8_t alarm_control; + uint8_t _reserved[3]; + char message[]; +} QEMU_PACKED MTO; + +typedef struct GO { + uint32_t domid; + uint8_t hhmmss_time[8]; + uint8_t th_time[3]; + uint8_t _reserved_0; + uint8_t dddyyyy_date[7]; + uint8_t _reserved_1; + uint16_t general_msg_flags; + uint8_t _reserved_2[10]; + uint8_t originating_system_name[8]; + uint8_t job_guest_name[8]; +} QEMU_PACKED GO; + +#define MESSAGE_TEXT 0x0004 + +typedef struct MDBO { + uint16_t length; + uint16_t type; + union { + GO go; + MTO mto; + }; +} QEMU_PACKED MDBO; + +typedef struct MDB { + MdbHeader header; + MDBO mdbo[]; +} QEMU_PACKED MDB; + +typedef struct SclpMsg { + EventBufferHeader header; + MDB mdb; +} QEMU_PACKED SclpMsg; + +#define GDS_ID_MDSMU 0x1310 +#define GDS_ID_CPMSU 0x1212 +#define GDS_ID_TEXTCMD 0x1320 + +typedef struct GdsVector { + uint16_t length; + uint16_t gds_id; +} QEMU_PACKED GdsVector; + +#define GDS_KEY_SELFDEFTEXTMSG 0x31 +#define GDS_KEY_TEXTMSG 0x30 + +typedef struct GdsSubvector { + uint8_t length; + uint8_t key; +} QEMU_PACKED GdsSubvector; + +/* MDS Message Unit */ +typedef struct MDMSU { + GdsVector mdmsu; + GdsVector cpmsu; + GdsVector text_command; + GdsSubvector self_def_text_message; + GdsSubvector text_message; +} QEMU_PACKED MDMSU; + +typedef struct WriteEventData { + SCCBHeader h; + EventBufferHeader ebh; +} QEMU_PACKED WriteEventData; + +typedef struct ReadEventData { + SCCBHeader h; + union { + sccb_mask_t mask; + EventBufferHeader ebh; + }; +} QEMU_PACKED ReadEventData; + +struct SCLPEvent { + DeviceState qdev; + bool event_pending; + char *name; +}; + +struct SCLPEventClass { + DeviceClass parent_class; + int (*init)(SCLPEvent *event); + + /* get SCLP's send mask */ + sccb_mask_t (*get_send_mask)(void); + + /* get SCLP's receive mask */ + sccb_mask_t (*get_receive_mask)(void); + + int (*read_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr, + int *slen); + + int (*write_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr); + + /* can we handle this event type? */ + bool (*can_handle_event)(uint8_t type); +}; + +#define TYPE_SCLP_EVENT_FACILITY "s390-sclp-event-facility" +typedef struct SCLPEventFacility SCLPEventFacility; +typedef struct SCLPEventFacilityClass SCLPEventFacilityClass; +DECLARE_OBJ_CHECKERS(SCLPEventFacility, SCLPEventFacilityClass, + EVENT_FACILITY, TYPE_SCLP_EVENT_FACILITY) + +struct SCLPEventFacilityClass { + SysBusDeviceClass parent_class; + void (*command_handler)(SCLPEventFacility *ef, SCCB *sccb, uint64_t code); + bool (*event_pending)(SCLPEventFacility *ef); +}; + +BusState *sclp_get_event_facility_bus(void); + +#endif diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h new file mode 100644 index 000000000..c6737a30d --- /dev/null +++ b/include/hw/s390x/ioinst.h @@ -0,0 +1,248 @@ +/* + * S/390 channel I/O instructions + * + * Copyright 2012 IBM Corp. + * Author(s): Cornelia Huck + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. +*/ + +#ifndef S390X_IOINST_H +#define S390X_IOINST_H + +/* + * Channel I/O related definitions, as defined in the Principles + * Of Operation (and taken from the Linux implementation). + */ + +/* subchannel status word (command mode only) */ +typedef struct SCSW { + uint16_t flags; + uint16_t ctrl; + uint32_t cpa; + uint8_t dstat; + uint8_t cstat; + uint16_t count; +} SCSW; +QEMU_BUILD_BUG_MSG(sizeof(SCSW) != 12, "size of SCSW is wrong"); + +#define SCSW_FLAGS_MASK_KEY 0xf000 +#define SCSW_FLAGS_MASK_SCTL 0x0800 +#define SCSW_FLAGS_MASK_ESWF 0x0400 +#define SCSW_FLAGS_MASK_CC 0x0300 +#define SCSW_FLAGS_MASK_FMT 0x0080 +#define SCSW_FLAGS_MASK_PFCH 0x0040 +#define SCSW_FLAGS_MASK_ISIC 0x0020 +#define SCSW_FLAGS_MASK_ALCC 0x0010 +#define SCSW_FLAGS_MASK_SSI 0x0008 +#define SCSW_FLAGS_MASK_ZCC 0x0004 +#define SCSW_FLAGS_MASK_ECTL 0x0002 +#define SCSW_FLAGS_MASK_PNO 0x0001 + +#define SCSW_CTRL_MASK_FCTL 0x7000 +#define SCSW_CTRL_MASK_ACTL 0x0fe0 +#define SCSW_CTRL_MASK_STCTL 0x001f + +#define SCSW_FCTL_CLEAR_FUNC 0x1000 +#define SCSW_FCTL_HALT_FUNC 0x2000 +#define SCSW_FCTL_START_FUNC 0x4000 + +#define SCSW_ACTL_SUSP 0x0020 +#define SCSW_ACTL_DEVICE_ACTIVE 0x0040 +#define SCSW_ACTL_SUBCH_ACTIVE 0x0080 +#define SCSW_ACTL_CLEAR_PEND 0x0100 +#define SCSW_ACTL_HALT_PEND 0x0200 +#define SCSW_ACTL_START_PEND 0x0400 +#define SCSW_ACTL_RESUME_PEND 0x0800 + +#define SCSW_STCTL_STATUS_PEND 0x0001 +#define SCSW_STCTL_SECONDARY 0x0002 +#define SCSW_STCTL_PRIMARY 0x0004 +#define SCSW_STCTL_INTERMEDIATE 0x0008 +#define SCSW_STCTL_ALERT 0x0010 + +#define SCSW_DSTAT_ATTENTION 0x80 +#define SCSW_DSTAT_STAT_MOD 0x40 +#define SCSW_DSTAT_CU_END 0x20 +#define SCSW_DSTAT_BUSY 0x10 +#define SCSW_DSTAT_CHANNEL_END 0x08 +#define SCSW_DSTAT_DEVICE_END 0x04 +#define SCSW_DSTAT_UNIT_CHECK 0x02 +#define SCSW_DSTAT_UNIT_EXCEP 0x01 + +#define SCSW_CSTAT_PCI 0x80 +#define SCSW_CSTAT_INCORR_LEN 0x40 +#define SCSW_CSTAT_PROG_CHECK 0x20 +#define SCSW_CSTAT_PROT_CHECK 0x10 +#define SCSW_CSTAT_DATA_CHECK 0x08 +#define SCSW_CSTAT_CHN_CTRL_CHK 0x04 +#define SCSW_CSTAT_INTF_CTRL_CHK 0x02 +#define SCSW_CSTAT_CHAIN_CHECK 0x01 + +/* path management control word */ +typedef struct PMCW { + uint32_t intparm; + uint16_t flags; + uint16_t devno; + uint8_t lpm; + uint8_t pnom; + uint8_t lpum; + uint8_t pim; + uint16_t mbi; + uint8_t pom; + uint8_t pam; + uint8_t chpid[8]; + uint32_t chars; +} PMCW; +QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong"); + +#define PMCW_FLAGS_MASK_QF 0x8000 +#define PMCW_FLAGS_MASK_W 0x4000 +#define PMCW_FLAGS_MASK_ISC 0x3800 +#define PMCW_FLAGS_MASK_ENA 0x0080 +#define PMCW_FLAGS_MASK_LM 0x0060 +#define PMCW_FLAGS_MASK_MME 0x0018 +#define PMCW_FLAGS_MASK_MP 0x0004 +#define PMCW_FLAGS_MASK_TF 0x0002 +#define PMCW_FLAGS_MASK_DNV 0x0001 +#define PMCW_FLAGS_MASK_INVALID 0x0700 + +#define PMCW_CHARS_MASK_ST 0x00e00000 +#define PMCW_CHARS_MASK_MBFC 0x00000004 +#define PMCW_CHARS_MASK_XMWME 0x00000002 +#define PMCW_CHARS_MASK_CSENSE 0x00000001 +#define PMCW_CHARS_MASK_INVALID 0xff1ffff8 + +/* subchannel information block */ +typedef struct SCHIB { + PMCW pmcw; + SCSW scsw; + uint64_t mba; + uint8_t mda[4]; +} QEMU_PACKED SCHIB; + +/* interruption response block */ +typedef struct IRB { + SCSW scsw; + uint32_t esw[5]; + uint32_t ecw[8]; + uint32_t emw[8]; +} IRB; +QEMU_BUILD_BUG_MSG(sizeof(IRB) != 96, "size of IRB is wrong"); + +/* operation request block */ +typedef struct ORB { + uint32_t intparm; + uint16_t ctrl0; + uint8_t lpm; + uint8_t ctrl1; + uint32_t cpa; +} ORB; +QEMU_BUILD_BUG_MSG(sizeof(ORB) != 12, "size of ORB is wrong"); + +#define ORB_CTRL0_MASK_KEY 0xf000 +#define ORB_CTRL0_MASK_SPND 0x0800 +#define ORB_CTRL0_MASK_STR 0x0400 +#define ORB_CTRL0_MASK_MOD 0x0200 +#define ORB_CTRL0_MASK_SYNC 0x0100 +#define ORB_CTRL0_MASK_FMT 0x0080 +#define ORB_CTRL0_MASK_PFCH 0x0040 +#define ORB_CTRL0_MASK_ISIC 0x0020 +#define ORB_CTRL0_MASK_ALCC 0x0010 +#define ORB_CTRL0_MASK_SSIC 0x0008 +#define ORB_CTRL0_MASK_C64 0x0002 +#define ORB_CTRL0_MASK_I2K 0x0001 +#define ORB_CTRL0_MASK_INVALID 0x0004 + +#define ORB_CTRL1_MASK_ILS 0x80 +#define ORB_CTRL1_MASK_MIDAW 0x40 +#define ORB_CTRL1_MASK_ORBX 0x01 +#define ORB_CTRL1_MASK_INVALID 0x3e + +/* channel command word (type 0) */ +typedef struct CCW0 { + uint8_t cmd_code; + uint8_t cda0; + uint16_t cda1; + uint8_t flags; + uint8_t reserved; + uint16_t count; +} CCW0; +QEMU_BUILD_BUG_MSG(sizeof(CCW0) != 8, "size of CCW0 is wrong"); + +/* channel command word (type 1) */ +typedef struct CCW1 { + uint8_t cmd_code; + uint8_t flags; + uint16_t count; + uint32_t cda; +} CCW1; +QEMU_BUILD_BUG_MSG(sizeof(CCW1) != 8, "size of CCW1 is wrong"); + +#define CCW_FLAG_DC 0x80 +#define CCW_FLAG_CC 0x40 +#define CCW_FLAG_SLI 0x20 +#define CCW_FLAG_SKIP 0x10 +#define CCW_FLAG_PCI 0x08 +#define CCW_FLAG_IDA 0x04 +#define CCW_FLAG_SUSPEND 0x02 +#define CCW_FLAG_MIDA 0x01 + +#define CCW_CMD_NOOP 0x03 +#define CCW_CMD_BASIC_SENSE 0x04 +#define CCW_CMD_TIC 0x08 +#define CCW_CMD_SENSE_ID 0xe4 + +typedef struct CRW { + uint16_t flags; + uint16_t rsid; +} CRW; +QEMU_BUILD_BUG_MSG(sizeof(CRW) != 4, "size of CRW is wrong"); + +#define CRW_FLAGS_MASK_S 0x4000 +#define CRW_FLAGS_MASK_R 0x2000 +#define CRW_FLAGS_MASK_C 0x1000 +#define CRW_FLAGS_MASK_RSC 0x0f00 +#define CRW_FLAGS_MASK_A 0x0080 +#define CRW_FLAGS_MASK_ERC 0x003f + +#define CRW_ERC_EVENT 0x00 /* event information pending */ +#define CRW_ERC_AVAIL 0x01 /* available */ +#define CRW_ERC_INIT 0x02 /* initialized */ +#define CRW_ERC_TERROR 0x03 /* temporary error */ +#define CRW_ERC_IPI 0x04 /* installed parm initialized */ +#define CRW_ERC_TERM 0x05 /* terminal */ +#define CRW_ERC_PERRN 0x06 /* perm. error, facility not init */ +#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ +#define CRW_ERC_PMOD 0x08 /* installed parameters modified */ +#define CRW_ERC_IPR 0x0A /* installed parameters restored */ + +#define CRW_RSC_SUBCH 0x3 +#define CRW_RSC_CHP 0x4 +#define CRW_RSC_CSS 0xb + +/* I/O interruption code */ +typedef struct IOIntCode { + uint32_t subsys_id; + uint32_t intparm; + uint32_t interrupt_id; +} QEMU_PACKED IOIntCode; + +/* schid disintegration */ +#define IOINST_SCHID_ONE(_schid) ((_schid & 0x00010000) >> 16) +#define IOINST_SCHID_M(_schid) ((_schid & 0x00080000) >> 19) +#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24) +#define IOINST_SCHID_SSID(_schid) ((_schid & 0x00060000) >> 17) +#define IOINST_SCHID_NR(_schid) (_schid & 0x0000ffff) + +#define IO_INT_WORD_ISC(_int_word) ((_int_word & 0x38000000) >> 27) +#define ISC_TO_ISC_BITS(_isc) ((0x80 >> _isc) << 24) + +#define IO_INT_WORD_AI 0x80000000 + +int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, + int *schid); + +#endif diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h new file mode 100644 index 000000000..aee758bc2 --- /dev/null +++ b/include/hw/s390x/pv.h @@ -0,0 +1,58 @@ +/* + * Protected Virtualization header + * + * Copyright IBM Corp. 2020 + * Author(s): + * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390_PV_H +#define HW_S390_PV_H + +#ifdef CONFIG_KVM +#include "cpu.h" +#include "hw/s390x/s390-virtio-ccw.h" + +static inline bool s390_is_pv(void) +{ + static S390CcwMachineState *ccw; + Object *obj; + + if (ccw) { + return ccw->pv; + } + + /* we have to bail out for the "none" machine */ + obj = object_dynamic_cast(qdev_get_machine(), + TYPE_S390_CCW_MACHINE); + if (!obj) { + return false; + } + ccw = S390_CCW_MACHINE(obj); + return ccw->pv; +} + +int s390_pv_vm_enable(void); +void s390_pv_vm_disable(void); +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); +void s390_pv_prep_reset(void); +int s390_pv_verify(void); +void s390_pv_unshare(void); +void s390_pv_inject_reset_error(CPUState *cs); +#else /* CONFIG_KVM */ +static inline bool s390_is_pv(void) { return false; } +static inline int s390_pv_vm_enable(void) { return 0; } +static inline void s390_pv_vm_disable(void) {} +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } +static inline void s390_pv_prep_reset(void) {} +static inline int s390_pv_verify(void) { return 0; } +static inline void s390_pv_unshare(void) {} +static inline void s390_pv_inject_reset_error(CPUState *cs) {}; +#endif /* CONFIG_KVM */ + +#endif /* HW_S390_PV_H */ diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h new file mode 100644 index 000000000..2c807ee3a --- /dev/null +++ b/include/hw/s390x/s390-ccw.h @@ -0,0 +1,42 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi + * Xiao Feng Ren + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_CCW_H +#define HW_S390_CCW_H + +#include "hw/s390x/ccw-device.h" +#include "qom/object.h" + +#define TYPE_S390_CCW "s390-ccw" +typedef struct S390CCWDevice S390CCWDevice; +typedef struct S390CCWDeviceClass S390CCWDeviceClass; +DECLARE_OBJ_CHECKERS(S390CCWDevice, S390CCWDeviceClass, + S390_CCW_DEVICE, TYPE_S390_CCW) + +struct S390CCWDevice { + CcwDevice parent_obj; + CssDevId hostid; + char *mdevid; + int32_t bootindex; +}; + +struct S390CCWDeviceClass { + CCWDeviceClass parent_class; + void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp); + void (*unrealize)(S390CCWDevice *dev); + IOInstEnding (*handle_request) (SubchDev *sch); + int (*handle_halt) (SubchDev *sch); + int (*handle_clear) (SubchDev *sch); + IOInstEnding (*handle_store) (SubchDev *sch); +}; + +#endif diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h new file mode 100644 index 000000000..49ae9f03d --- /dev/null +++ b/include/hw/s390x/s390-pci-bus.h @@ -0,0 +1,394 @@ +/* + * s390 PCI BUS definitions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka + * Hong Bo Li + * Yi Min Zhao + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_BUS_H +#define HW_S390_PCI_BUS_H + +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/s390x/sclp.h" +#include "hw/s390x/s390_flic.h" +#include "hw/s390x/css.h" +#include "hw/s390x/s390-pci-clp.h" +#include "qom/object.h" + +#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost" +#define TYPE_S390_PCI_BUS "s390-pcibus" +#define TYPE_S390_PCI_DEVICE "zpci" +#define TYPE_S390_PCI_IOMMU "s390-pci-iommu" +#define TYPE_S390_IOMMU_MEMORY_REGION "s390-iommu-memory-region" +#define FH_MASK_ENABLE 0x80000000 +#define FH_MASK_INSTANCE 0x7f000000 +#define FH_MASK_SHM 0x00ff0000 +#define FH_MASK_INDEX 0x0000ffff +#define FH_SHM_VFIO 0x00010000 +#define FH_SHM_EMUL 0x00020000 +#define ZPCI_MAX_FID 0xffffffff +#define ZPCI_MAX_UID 0xffff +#define UID_UNDEFINED 0 +#define UID_CHECKING_ENABLED 0x01 + +OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) +OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) +OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) +OBJECT_DECLARE_SIMPLE_TYPE(S390PCIIOMMU, S390_PCI_IOMMU) + +#define HP_EVENT_TO_CONFIGURED 0x0301 +#define HP_EVENT_RESERVED_TO_STANDBY 0x0302 +#define HP_EVENT_DECONFIGURE_REQUEST 0x0303 +#define HP_EVENT_CONFIGURED_TO_STBRES 0x0304 +#define HP_EVENT_STANDBY_TO_RESERVED 0x0308 + +#define ERR_EVENT_INVALAS 0x1 +#define ERR_EVENT_OORANGE 0x2 +#define ERR_EVENT_INVALTF 0x3 +#define ERR_EVENT_TPROTE 0x4 +#define ERR_EVENT_APROTE 0x5 +#define ERR_EVENT_KEYE 0x6 +#define ERR_EVENT_INVALTE 0x7 +#define ERR_EVENT_INVALTL 0x8 +#define ERR_EVENT_TT 0x9 +#define ERR_EVENT_INVALMS 0xa +#define ERR_EVENT_SERR 0xb +#define ERR_EVENT_NOMSI 0x10 +#define ERR_EVENT_INVALBV 0x11 +#define ERR_EVENT_AIBV 0x12 +#define ERR_EVENT_AIRERR 0x13 +#define ERR_EVENT_FMBA 0x2a +#define ERR_EVENT_FMBUP 0x2b +#define ERR_EVENT_FMBPRO 0x2c +#define ERR_EVENT_CCONF 0x30 +#define ERR_EVENT_SERVAC 0x3a +#define ERR_EVENT_PERMERR 0x3b + +#define ERR_EVENT_Q_BIT 0x2 +#define ERR_EVENT_MVN_OFFSET 16 + +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MASK 0x7ff + +#define ZPCI_MSI_ADDR 0xfe00000000000000ULL +#define ZPCI_SDMA_ADDR 0x100000000ULL +#define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_DEFAULT_ACC 0 +#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) + +/* I/O Translation Anchor (IOTA) */ +enum ZpciIoatDtype { + ZPCI_IOTA_STO = 0, + ZPCI_IOTA_RTTO = 1, + ZPCI_IOTA_RSTO = 2, + ZPCI_IOTA_RFTO = 3, + ZPCI_IOTA_PFAA = 4, + ZPCI_IOTA_IOPFAA = 5, + ZPCI_IOTA_IOPTO = 7 +}; + +#define ZPCI_IOTA_IOT_ENABLED 0x800ULL +#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2) +#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2) +#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2) +#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2) +#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2) +#define ZPCI_IOTA_FS_4K 0 +#define ZPCI_IOTA_FS_1M 1 +#define ZPCI_IOTA_FS_2G 2 +#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) + +#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) +#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) +#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) +#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF) +#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY |\ + ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G) + +/* I/O Region and segment tables */ +#define ZPCI_INDEX_MASK 0x7ffULL + +#define ZPCI_TABLE_TYPE_MASK 0xc +#define ZPCI_TABLE_TYPE_RFX 0xc +#define ZPCI_TABLE_TYPE_RSX 0x8 +#define ZPCI_TABLE_TYPE_RTX 0x4 +#define ZPCI_TABLE_TYPE_SX 0x0 + +#define ZPCI_TABLE_LEN_RFX 0x3 +#define ZPCI_TABLE_LEN_RSX 0x3 +#define ZPCI_TABLE_LEN_RTX 0x3 + +#define ZPCI_TABLE_OFFSET_MASK 0xc0 +#define ZPCI_TABLE_SIZE 0x4000 +#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE +#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long)) +#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE) + +#define ZPCI_TABLE_BITS 11 +#define ZPCI_PT_BITS 8 +#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) +#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) + +#define ZPCI_RTE_FLAG_MASK 0x3fffULL +#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) +#define ZPCI_STE_FLAG_MASK 0x7ffULL +#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK) + +#define ZPCI_SFAA_MASK (~((1ULL << 20) - 1)) + +/* I/O Page tables */ +#define ZPCI_PTE_VALID_MASK 0x400 +#define ZPCI_PTE_INVALID 0x400 +#define ZPCI_PTE_VALID 0x000 +#define ZPCI_PT_SIZE 0x800 +#define ZPCI_PT_ALIGN ZPCI_PT_SIZE +#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE) +#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1) + +#define ZPCI_PTE_FLAG_MASK 0xfffULL +#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK) + +/* Shared bits */ +#define ZPCI_TABLE_VALID 0x00 +#define ZPCI_TABLE_INVALID 0x20 +#define ZPCI_TABLE_PROTECTED 0x200 +#define ZPCI_TABLE_UNPROTECTED 0x000 +#define ZPCI_TABLE_FC 0x400 + +#define ZPCI_TABLE_VALID_MASK 0x20 +#define ZPCI_TABLE_PROT_MASK 0x200 + +#define ZPCI_ETT_RT 1 +#define ZPCI_ETT_ST 0 +#define ZPCI_ETT_PT -1 + +/* PCI Function States + * + * reserved: default; device has just been plugged or is in progress of being + * unplugged + * standby: device is present but not configured; transition from any + * configured state/to this state via sclp configure/deconfigure + * + * The following states make up the "configured" meta-state: + * disabled: device is configured but not enabled; transition between this + * state and enabled via clp enable/disable + * enbaled: device is ready for use; transition to disabled via clp disable; + * may enter an error state + * blocked: ignore all DMA and interrupts; transition back to enabled or from + * error state via mpcifc + * error: an error occurred; transition back to enabled via mpcifc + * permanent error: an unrecoverable error occurred; transition to standby via + * sclp deconfigure + */ +typedef enum { + ZPCI_FS_RESERVED, + ZPCI_FS_STANDBY, + ZPCI_FS_DISABLED, + ZPCI_FS_ENABLED, + ZPCI_FS_BLOCKED, + ZPCI_FS_ERROR, + ZPCI_FS_PERMANENT_ERROR, +} ZpciState; + +typedef struct SeiContainer { + QTAILQ_ENTRY(SeiContainer) link; + uint32_t fid; + uint32_t fh; + uint8_t cc; + uint16_t pec; + uint64_t faddr; + uint32_t e; +} SeiContainer; + +typedef struct PciCcdfErr { + uint32_t reserved1; + uint32_t fh; + uint32_t fid; + uint32_t e; + uint64_t faddr; + uint32_t reserved3; + uint16_t reserved4; + uint16_t pec; +} QEMU_PACKED PciCcdfErr; + +typedef struct PciCcdfAvail { + uint32_t reserved1; + uint32_t fh; + uint32_t fid; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint16_t reserved6; + uint16_t pec; +} QEMU_PACKED PciCcdfAvail; + +typedef struct ChscSeiNt2Res { + uint16_t length; + uint16_t code; + uint16_t reserved1; + uint8_t reserved2; + uint8_t nt; + uint8_t flags; + uint8_t reserved3; + uint8_t reserved4; + uint8_t cc; + uint32_t reserved5[13]; + uint8_t ccdf[4016]; +} QEMU_PACKED ChscSeiNt2Res; + +typedef struct S390MsixInfo { + uint8_t table_bar; + uint8_t pba_bar; + uint16_t entries; + uint32_t table_offset; + uint32_t pba_offset; +} S390MsixInfo; + +typedef struct S390IOTLBEntry { + uint64_t iova; + uint64_t translated_addr; + uint64_t len; + uint64_t perm; +} S390IOTLBEntry; + +typedef struct S390PCIDMACount { + int id; + int users; + uint32_t avail; + QTAILQ_ENTRY(S390PCIDMACount) link; +} S390PCIDMACount; + +struct S390PCIIOMMU { + Object parent_obj; + S390PCIBusDevice *pbdev; + AddressSpace as; + MemoryRegion mr; + IOMMUMemoryRegion iommu_mr; + bool enabled; + uint64_t g_iota; + uint64_t pba; + uint64_t pal; + GHashTable *iotlb; + S390PCIDMACount *dma_limit; +}; + +typedef struct S390PCIIOMMUTable { + uint64_t key; + S390PCIIOMMU *iommu[PCI_SLOT_MAX]; +} S390PCIIOMMUTable; + +/* Function Measurement Block */ +#define DEFAULT_MUI 4000 +#define UPDATE_U_BIT 0x1ULL +#define FMBK_MASK 0xfULL + +typedef struct ZpciFmbFmt0 { + uint64_t dma_rbytes; + uint64_t dma_wbytes; +} ZpciFmbFmt0; + +#define ZPCI_FMB_CNT_LD 0 +#define ZPCI_FMB_CNT_ST 1 +#define ZPCI_FMB_CNT_STB 2 +#define ZPCI_FMB_CNT_RPCIT 3 +#define ZPCI_FMB_CNT_MAX 4 + +#define ZPCI_FMB_FORMAT 0 + +typedef struct ZpciFmb { + uint32_t format; + uint32_t sample; + uint64_t last_update; + uint64_t counter[ZPCI_FMB_CNT_MAX]; + ZpciFmbFmt0 fmt0; +} ZpciFmb; +QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + +#define ZPCI_DEFAULT_FN_GRP 0x20 +typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; + QTAILQ_ENTRY(S390PCIGroup) link; +} S390PCIGroup; +S390PCIGroup *s390_group_create(int id); +S390PCIGroup *s390_group_find(int id); + +struct S390PCIBusDevice { + DeviceState qdev; + PCIDevice *pdev; + ZpciState state; + char *target; + uint16_t uid; + uint32_t idx; + uint32_t fh; + uint32_t fid; + bool fid_defined; + uint64_t fmb_addr; + ZpciFmb fmb; + QEMUTimer *fmb_timer; + uint8_t isc; + uint16_t noi; + uint16_t maxstbl; + uint8_t sum; + S390PCIGroup *pci_group; + ClpRspQueryPci zpci_fn; + S390MsixInfo msix; + AdapterRoutes routes; + S390PCIIOMMU *iommu; + MemoryRegion msix_notify_mr; + IndAddr *summary_ind; + IndAddr *indicator; + bool pci_unplug_request_processed; + bool unplug_requested; + QTAILQ_ENTRY(S390PCIBusDevice) link; +}; + +struct S390PCIBus { + BusState qbus; +}; + +struct S390pciState { + PCIHostState parent_obj; + uint32_t next_idx; + int bus_no; + S390PCIBus *bus; + GHashTable *iommu_table; + GHashTable *zpci_table; + QTAILQ_HEAD(, SeiContainer) pending_sei; + QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; + QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; + QTAILQ_HEAD(, S390PCIGroup) zpci_groups; +}; + +S390pciState *s390_get_phb(void); +int pci_chsc_sei_nt2_get_event(void *res); +int pci_chsc_sei_nt2_have_event(void); +void s390_pci_sclp_configure(SCCB *sccb); +void s390_pci_sclp_deconfigure(SCCB *sccb); +void s390_pci_iommu_enable(S390PCIIOMMU *iommu); +void s390_pci_iommu_disable(S390PCIIOMMU *iommu); +void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid, + uint64_t faddr, uint32_t e); +uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr, + S390IOTLBEntry *entry); +S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx); +S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh); +S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid); +S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s, + const char *target); +S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s, + S390PCIBusDevice *pbdev); + +#endif diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h new file mode 100644 index 000000000..96b8e3f13 --- /dev/null +++ b/include/hw/s390x/s390-pci-clp.h @@ -0,0 +1,215 @@ +/* + * s390 CLP instruction definitions + * + * Copyright 2019 IBM Corp. + * Author(s): Pierre Morel + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_CLP +#define HW_S390_PCI_CLP + +/* CLP common request & response block size */ +#define CLP_BLK_SIZE 4096 +#define PCI_BAR_COUNT 6 +#define PCI_MAX_FUNCTIONS 4096 + +typedef struct ClpReqHdr { + uint16_t len; + uint16_t cmd; +} QEMU_PACKED ClpReqHdr; + +typedef struct ClpRspHdr { + uint16_t len; + uint16_t rsp; +} QEMU_PACKED ClpRspHdr; + +/* CLP Response Codes */ +#define CLP_RC_OK 0x0010 /* Command request successfully */ +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ +#define CLP_RC_PERM 0x0030 /* Command not authorized */ +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ +#define CLP_RC_NODATA 0x0080 /* No data available */ +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ + +/* + * Call Logical Processor - Command Codes + */ +#define CLP_LIST_PCI 0x0002 +#define CLP_QUERY_PCI_FN 0x0003 +#define CLP_QUERY_PCI_FNGRP 0x0004 +#define CLP_SET_PCI_FN 0x0005 + +/* PCI function handle list entry */ +typedef struct ClpFhListEntry { + uint16_t device_id; + uint16_t vendor_id; +#define CLP_FHLIST_MASK_CONFIG 0x80000000 + uint32_t config; + uint32_t fid; + uint32_t fh; +} QEMU_PACKED ClpFhListEntry; + +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ + +/* request or response block header length */ +#define LIST_PCI_HDR_LEN 32 + +/* Number of function handles fitting in response block */ +#define CLP_FH_LIST_NR_ENTRIES \ + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ + / sizeof(ClpFhListEntry)) + +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ + +#define CLP_UTIL_STR_LEN 64 +#define CLP_PFIP_NR_SEGMENTS 4 + +#define CLP_MASK_FMT 0xf0000000 + +/* List PCI functions request */ +typedef struct ClpReqListPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint64_t resume_token; + uint64_t reserved2; +} QEMU_PACKED ClpReqListPci; + +/* List PCI functions response */ +typedef struct ClpRspListPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint64_t resume_token; + uint32_t mdd; + uint16_t max_fn; + uint8_t flags; + uint8_t entry_size; + ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES]; +} QEMU_PACKED ClpRspListPci; + +/* Query PCI function request */ +typedef struct ClpReqQueryPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint32_t reserved2; + uint64_t reserved3; +} QEMU_PACKED ClpReqQueryPci; + +/* Query PCI function response */ +typedef struct ClpRspQueryPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint16_t vfn; /* virtual fn number */ +#define CLP_RSP_QPCI_MASK_UTIL 0x01 + uint8_t flags; + uint8_t pfgid; + uint32_t fid; /* pci function id */ + uint8_t bar_size[PCI_BAR_COUNT]; + uint16_t pchid; + uint32_t bar[PCI_BAR_COUNT]; + uint8_t pfip[CLP_PFIP_NR_SEGMENTS]; + uint16_t reserved2; + uint8_t fmbl; + uint8_t pft; + uint64_t sdma; /* start dma as */ + uint64_t edma; /* end dma as */ + uint32_t reserved3[11]; + uint32_t uid; + uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */ +} QEMU_PACKED ClpRspQueryPci; + +/* Query PCI function group request */ +typedef struct ClpReqQueryPciGrp { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint8_t reserved2[3]; + uint8_t g; + uint32_t reserved3; + uint64_t reserved4; +} QEMU_PACKED ClpReqQueryPciGrp; + +/* Query PCI function group response */ +typedef struct ClpRspQueryPciGrp { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; +#define CLP_RSP_QPCIG_MASK_NOI 0xfff + uint16_t i; + uint8_t version; +#define CLP_RSP_QPCIG_MASK_FRAME 0x2 +#define CLP_RSP_QPCIG_MASK_REFRESH 0x1 + uint8_t fr; + uint16_t maxstbl; + uint16_t mui; + uint64_t reserved3; + uint64_t dasm; /* dma address space mask */ + uint64_t msia; /* MSI address */ + uint64_t reserved4; + uint64_t reserved5; +} QEMU_PACKED ClpRspQueryPciGrp; + +/* Set PCI function request */ +typedef struct ClpReqSetPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint16_t reserved2; + uint8_t oc; /* operation controls */ + uint8_t ndas; /* number of dma spaces */ + uint64_t reserved3; +} QEMU_PACKED ClpReqSetPci; + +/* Set PCI function response */ +typedef struct ClpRspSetPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint32_t reserved3; + uint64_t reserved4; +} QEMU_PACKED ClpRspSetPci; + +typedef struct ClpReqRspListPci { + ClpReqListPci request; + ClpRspListPci response; +} QEMU_PACKED ClpReqRspListPci; + +typedef struct ClpReqRspSetPci { + ClpReqSetPci request; + ClpRspSetPci response; +} QEMU_PACKED ClpReqRspSetPci; + +typedef struct ClpReqRspQueryPci { + ClpReqQueryPci request; + ClpRspQueryPci response; +} QEMU_PACKED ClpReqRspQueryPci; + +typedef struct ClpReqRspQueryPciGrp { + ClpReqQueryPciGrp request; + ClpRspQueryPciGrp response; +} QEMU_PACKED ClpReqRspQueryPciGrp; + +#endif diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h new file mode 100644 index 000000000..a55c448aa --- /dev/null +++ b/include/hw/s390x/s390-pci-inst.h @@ -0,0 +1,119 @@ +/* + * s390 PCI instruction definitions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka + * Hong Bo Li + * Yi Min Zhao + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_INST_H +#define HW_S390_PCI_INST_H + +#include "s390-pci-bus.h" +#include "sysemu/dma.h" + +/* Load/Store status codes */ +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 +#define ZPCI_PCI_ST_BLOCKED 12 +#define ZPCI_PCI_ST_INSUF_RES 16 +#define ZPCI_PCI_ST_INVAL_AS 20 +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 + +/* Load/Store return codes */ +#define ZPCI_PCI_LS_OK 0 +#define ZPCI_PCI_LS_ERR 1 +#define ZPCI_PCI_LS_BUSY 2 +#define ZPCI_PCI_LS_INVAL_HANDLE 3 + +/* Modify PCI status codes */ +#define ZPCI_MOD_ST_RES_NOT_AVAIL 4 +#define ZPCI_MOD_ST_INSUF_RES 16 +#define ZPCI_MOD_ST_SEQUENCE 24 +#define ZPCI_MOD_ST_DMAAS_INVAL 28 +#define ZPCI_MOD_ST_FRAME_INVAL 32 +#define ZPCI_MOD_ST_ERROR_RECOVER 40 + +/* Modify PCI Function Controls */ +#define ZPCI_MOD_FC_REG_INT 2 +#define ZPCI_MOD_FC_DEREG_INT 3 +#define ZPCI_MOD_FC_REG_IOAT 4 +#define ZPCI_MOD_FC_DEREG_IOAT 5 +#define ZPCI_MOD_FC_REREG_IOAT 6 +#define ZPCI_MOD_FC_RESET_ERROR 7 +#define ZPCI_MOD_FC_RESET_BLOCK 9 +#define ZPCI_MOD_FC_SET_MEASURE 10 + +/* Store PCI Function Controls status codes */ +#define ZPCI_STPCIFC_ST_PERM_ERROR 8 +#define ZPCI_STPCIFC_ST_INVAL_DMAAS 28 +#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40 + +/* Refresh PCI Translations status codes */ +#define ZPCI_RPCIT_ST_INSUFF_RES 16 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* Function Information Block */ +typedef struct ZpciFib { + uint8_t fmt; /* format */ + uint8_t reserved1[7]; + uint8_t fc; /* function controls */ + uint8_t reserved2; + uint16_t reserved3; + uint32_t reserved4; + uint64_t pba; /* PCI base address */ + uint64_t pal; /* PCI address limit */ + uint64_t iota; /* I/O Translation Anchor */ +#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7) +#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff) +#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f) +#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1) +#define FIB_DATA_AISBO(x) ((x) & 0x3f) + uint32_t data; + uint32_t reserved5; + uint64_t aibv; /* Adapter int bit vector address */ + uint64_t aisb; /* Adapter int summary bit address */ + uint64_t fmb_addr; /* Function measurement address and key */ + uint32_t reserved6; + uint32_t gd; +} QEMU_PACKED ZpciFib; + +int pci_dereg_irqs(S390PCIBusDevice *pbdev); +void pci_dereg_ioat(S390PCIIOMMU *iommu); +int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra); +int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra); +int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra); +int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra); +int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr, + uint8_t ar, uintptr_t ra); +int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + uintptr_t ra); +int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + uintptr_t ra); +void fmb_timer_free(S390PCIBusDevice *pbdev); + +#define ZPCI_IO_BAR_MIN 0 +#define ZPCI_IO_BAR_MAX 5 +#define ZPCI_CONFIG_BAR 15 + +#endif diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h new file mode 100644 index 000000000..ff708aef5 --- /dev/null +++ b/include/hw/s390x/s390-pci-vfio.h @@ -0,0 +1,39 @@ +/* + * s390 vfio-pci interfaces + * + * Copyright 2020 IBM Corp. + * Author(s): Matthew Rosato + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_VFIO_H +#define HW_S390_PCI_VFIO_H + +#include "hw/s390x/s390-pci-bus.h" +#include CONFIG_DEVICES + +#ifdef CONFIG_VFIO +bool s390_pci_update_dma_avail(int fd, unsigned int *avail); +S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev); +void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); +void s390_pci_get_clp_info(S390PCIBusDevice *pbdev); +#else +static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) +{ + return false; +} +static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev) +{ + return NULL; +} +static inline void s390_pci_end_dma_count(S390pciState *s, + S390PCIDMACount *cnt) { } +static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { } +#endif + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h new file mode 100644 index 000000000..3331990e0 --- /dev/null +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -0,0 +1,57 @@ +/* + * virtio ccw machine definitions + * + * Copyright 2012, 2016 IBM Corp. + * Author(s): Cornelia Huck + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390X_S390_VIRTIO_CCW_H +#define HW_S390X_S390_VIRTIO_CCW_H + +#include "hw/boards.h" +#include "qom/object.h" + +#define TYPE_S390_CCW_MACHINE "s390-ccw-machine" + +OBJECT_DECLARE_TYPE(S390CcwMachineState, S390CcwMachineClass, S390_CCW_MACHINE) + + +struct S390CcwMachineState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + bool aes_key_wrap; + bool dea_key_wrap; + bool pv; + uint8_t loadparm[8]; +}; + +struct S390CcwMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ + bool ri_allowed; + bool cpu_model_allowed; + bool css_migration_enabled; + bool hpage_1m_allowed; +}; + +/* runtime-instrumentation allowed by the machine */ +bool ri_allowed(void); +/* cpu model allowed by the machine */ +bool cpu_model_allowed(void); +/* 1M huge page mappings allowed by the machine */ +bool hpage_1m_allowed(void); + +/** + * Returns true if (vmstate based) migration of the channel subsystem + * is enabled, false if it is disabled. + */ +bool css_migration_enabled(void); + +#endif diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h new file mode 100644 index 000000000..3907a13d0 --- /dev/null +++ b/include/hw/s390x/s390_flic.h @@ -0,0 +1,142 @@ +/* + * QEMU S390x floating interrupt controller (flic) + * + * Copyright 2014 IBM Corp. + * Author(s): Jens Freimann + * Cornelia Huck + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_FLIC_H +#define HW_S390_FLIC_H + +#include "hw/sysbus.h" +#include "hw/s390x/adapter.h" +#include "hw/virtio/virtio.h" +#include "qemu/queue.h" +#include "qom/object.h" + +/* + * Reserve enough gsis to accommodate all virtio devices. + * If any other user of adapter routes needs more of these, + * we need to bump the value; but virtio looks like the + * maximum right now. + */ +#define ADAPTER_ROUTES_MAX_GSI VIRTIO_QUEUE_MAX + +typedef struct AdapterRoutes { + AdapterInfo adapter; + int num_routes; + int gsi[ADAPTER_ROUTES_MAX_GSI]; +} AdapterRoutes; + +extern const VMStateDescription vmstate_adapter_routes; + +#define VMSTATE_ADAPTER_ROUTES(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 1, vmstate_adapter_routes, AdapterRoutes) + +#define TYPE_S390_FLIC_COMMON "s390-flic" +OBJECT_DECLARE_TYPE(S390FLICState, S390FLICStateClass, + S390_FLIC_COMMON) + +struct S390FLICState { + SysBusDevice parent_obj; + /* to limit AdapterRoutes.num_routes for compat */ + uint32_t adapter_routes_max_batch; + bool ais_supported; +}; + + +struct S390FLICStateClass { + DeviceClass parent_class; + + int (*register_io_adapter)(S390FLICState *fs, uint32_t id, uint8_t isc, + bool swap, bool maskable, uint8_t flags); + int (*io_adapter_map)(S390FLICState *fs, uint32_t id, uint64_t map_addr, + bool do_map); + int (*add_adapter_routes)(S390FLICState *fs, AdapterRoutes *routes); + void (*release_adapter_routes)(S390FLICState *fs, AdapterRoutes *routes); + int (*clear_io_irq)(S390FLICState *fs, uint16_t subchannel_id, + uint16_t subchannel_nr); + int (*modify_ais_mode)(S390FLICState *fs, uint8_t isc, uint16_t mode); + int (*inject_airq)(S390FLICState *fs, uint8_t type, uint8_t isc, + uint8_t flags); + void (*inject_service)(S390FLICState *fs, uint32_t parm); + void (*inject_io)(S390FLICState *fs, uint16_t subchannel_id, + uint16_t subchannel_nr, uint32_t io_int_parm, + uint32_t io_int_word); + void (*inject_crw_mchk)(S390FLICState *fs); +}; + +#define TYPE_KVM_S390_FLIC "s390-flic-kvm" +typedef struct KVMS390FLICState KVMS390FLICState; +DECLARE_INSTANCE_CHECKER(KVMS390FLICState, KVM_S390_FLIC, + TYPE_KVM_S390_FLIC) + +#define TYPE_QEMU_S390_FLIC "s390-flic-qemu" +OBJECT_DECLARE_SIMPLE_TYPE(QEMUS390FLICState, QEMU_S390_FLIC) + +#define SIC_IRQ_MODE_ALL 0 +#define SIC_IRQ_MODE_SINGLE 1 +#define AIS_MODE_MASK(isc) (0x80 >> isc) + +#define ISC_TO_PENDING_IO(_isc) (0x80 >> (_isc)) +#define CR6_TO_PENDING_IO(_cr6) (((_cr6) >> 24) & 0xff) + +/* organize the ISC bits so that the macros above work */ +#define FLIC_PENDING_IO_ISC7 (1 << 0) +#define FLIC_PENDING_IO_ISC6 (1 << 1) +#define FLIC_PENDING_IO_ISC5 (1 << 2) +#define FLIC_PENDING_IO_ISC4 (1 << 3) +#define FLIC_PENDING_IO_ISC3 (1 << 4) +#define FLIC_PENDING_IO_ISC2 (1 << 5) +#define FLIC_PENDING_IO_ISC1 (1 << 6) +#define FLIC_PENDING_IO_ISC0 (1 << 7) +#define FLIC_PENDING_SERVICE (1 << 8) +#define FLIC_PENDING_MCHK_CR (1 << 9) + +#define FLIC_PENDING_IO (FLIC_PENDING_IO_ISC0 | FLIC_PENDING_IO_ISC1 | \ + FLIC_PENDING_IO_ISC2 | FLIC_PENDING_IO_ISC3 | \ + FLIC_PENDING_IO_ISC4 | FLIC_PENDING_IO_ISC5 | \ + FLIC_PENDING_IO_ISC6 | FLIC_PENDING_IO_ISC7) + +typedef struct QEMUS390FlicIO { + uint16_t id; + uint16_t nr; + uint32_t parm; + uint32_t word; + QLIST_ENTRY(QEMUS390FlicIO) next; +} QEMUS390FlicIO; + +struct QEMUS390FLICState { + S390FLICState parent_obj; + uint32_t pending; + uint32_t service_param; + uint8_t simm; + uint8_t nimm; + QLIST_HEAD(, QEMUS390FlicIO) io[8]; +}; + +uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic); +QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, + uint64_t cr6); +void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic); +bool qemu_s390_flic_has_service(QEMUS390FLICState *flic); +bool qemu_s390_flic_has_io(QEMUS390FLICState *fs, uint64_t cr6); +bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic); +bool qemu_s390_flic_has_any(QEMUS390FLICState *flic); + +void s390_flic_init(void); + +S390FLICState *s390_get_flic(void); +QEMUS390FLICState *s390_get_qemu_flic(S390FLICState *fs); +S390FLICStateClass *s390_get_flic_class(S390FLICState *fs); +void s390_crw_mchk(void); +void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr, + uint32_t io_int_parm, uint32_t io_int_word); +bool ais_needed(void *opaque); + +#endif /* HW_S390_FLIC_H */ diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h new file mode 100644 index 000000000..d3ade40a5 --- /dev/null +++ b/include/hw/s390x/sclp.h @@ -0,0 +1,232 @@ +/* + * SCLP Support + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Christian Borntraeger + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_S390_SCLP_H +#define HW_S390_SCLP_H + +#include "hw/sysbus.h" +#include "target/s390x/cpu-qom.h" +#include "qom/object.h" + +#define SCLP_CMD_CODE_MASK 0xffff00ff + +/* SCLP command codes */ +#define SCLP_CMDW_READ_SCP_INFO 0x00020001 +#define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 +#define SCLP_READ_STORAGE_ELEMENT_INFO 0x00040001 +#define SCLP_ATTACH_STORAGE_ELEMENT 0x00080001 +#define SCLP_ASSIGN_STORAGE 0x000D0001 +#define SCLP_UNASSIGN_STORAGE 0x000C0001 +#define SCLP_CMD_READ_EVENT_DATA 0x00770005 +#define SCLP_CMD_WRITE_EVENT_DATA 0x00760005 +#define SCLP_CMD_WRITE_EVENT_MASK 0x00780005 + +/* SCLP Memory hotplug codes */ +#define SCLP_FC_ASSIGN_ATTACH_READ_STOR 0xE00000000000ULL +#define SCLP_STARTING_SUBINCREMENT_ID 0x10001 +#define SCLP_INCREMENT_UNIT 0x10000 +#define MAX_STORAGE_INCREMENTS 1020 + +/* CPU hotplug SCLP codes */ +#define SCLP_HAS_CPU_INFO 0x0C00000000000000ULL +#define SCLP_CMDW_READ_CPU_INFO 0x00010001 +#define SCLP_CMDW_CONFIGURE_CPU 0x00110001 +#define SCLP_CMDW_DECONFIGURE_CPU 0x00100001 + +/* SCLP PCI codes */ +#define SCLP_HAS_IOA_RECONFIG 0x0000000040000000ULL +#define SCLP_CMDW_CONFIGURE_IOA 0x001a0001 +#define SCLP_CMDW_DECONFIGURE_IOA 0x001b0001 +#define SCLP_RECONFIG_PCI_ATYPE 2 + +/* SCLP response codes */ +#define SCLP_RC_NORMAL_READ_COMPLETION 0x0010 +#define SCLP_RC_NORMAL_COMPLETION 0x0020 +#define SCLP_RC_SCCB_BOUNDARY_VIOLATION 0x0100 +#define SCLP_RC_NO_ACTION_REQUIRED 0x0120 +#define SCLP_RC_INVALID_SCLP_COMMAND 0x01f0 +#define SCLP_RC_CONTAINED_EQUIPMENT_CHECK 0x0340 +#define SCLP_RC_INSUFFICIENT_SCCB_LENGTH 0x0300 +#define SCLP_RC_STANDBY_READ_COMPLETION 0x0410 +#define SCLP_RC_ADAPTER_IN_RESERVED_STATE 0x05f0 +#define SCLP_RC_ADAPTER_TYPE_NOT_RECOGNIZED 0x06f0 +#define SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED 0x09f0 +#define SCLP_RC_INVALID_FUNCTION 0x40f0 +#define SCLP_RC_NO_EVENT_BUFFERS_STORED 0x60f0 +#define SCLP_RC_INVALID_SELECTION_MASK 0x70f0 +#define SCLP_RC_INCONSISTENT_LENGTHS 0x72f0 +#define SCLP_RC_EVENT_BUFFER_SYNTAX_ERROR 0x73f0 +#define SCLP_RC_INVALID_MASK_LENGTH 0x74f0 + + +/* Service Call Control Block (SCCB) and its elements */ + +#define SCCB_SIZE 4096 + +#define SCLP_VARIABLE_LENGTH_RESPONSE 0x80 +#define SCLP_EVENT_BUFFER_ACCEPTED 0x80 + +#define SCLP_FC_NORMAL_WRITE 0 + +/* + * Normally packed structures are not the right thing to do, since all code + * must take care of endianness. We cannot use ldl_phys and friends for two + * reasons, though: + * - some of the embedded structures below the SCCB can appear multiple times + * at different locations, so there is no fixed offset + * - we work on a private copy of the SCCB, since there are several length + * fields, that would cause a security nightmare if we allow the guest to + * alter the structure while we parse it. We cannot use ldl_p and friends + * either without doing pointer arithmetics + * So we have to double check that all users of sclp data structures use the + * right endianness wrappers. + */ +typedef struct SCCBHeader { + uint16_t length; + uint8_t function_code; + uint8_t control_mask[3]; + uint16_t response_code; +} QEMU_PACKED SCCBHeader; + +#define SCCB_DATA_LEN (SCCB_SIZE - sizeof(SCCBHeader)) +#define SCCB_CPU_FEATURE_LEN 6 + +/* CPU information */ +typedef struct CPUEntry { + uint8_t address; + uint8_t reserved0; + uint8_t features[SCCB_CPU_FEATURE_LEN]; + uint8_t reserved2[6]; + uint8_t type; + uint8_t reserved1; +} QEMU_PACKED CPUEntry; + +#define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128 +typedef struct ReadInfo { + SCCBHeader h; + uint16_t rnmax; + uint8_t rnsize; + uint8_t _reserved1[16 - 11]; /* 11-15 */ + uint16_t entries_cpu; /* 16-17 */ + uint16_t offset_cpu; /* 18-19 */ + uint8_t _reserved2[24 - 20]; /* 20-23 */ + uint8_t loadparm[8]; /* 24-31 */ + uint8_t _reserved3[48 - 32]; /* 32-47 */ + uint64_t facilities; /* 48-55 */ + uint8_t _reserved0[76 - 56]; /* 56-75 */ + uint32_t ibc_val; + uint8_t conf_char[99 - 80]; /* 80-98 */ + uint8_t mha_pow; + uint32_t rnsize2; + uint64_t rnmax2; + uint8_t _reserved6[116 - 112]; /* 112-115 */ + uint8_t conf_char_ext[120 - 116]; /* 116-119 */ + uint16_t highest_cpu; + uint8_t _reserved5[124 - 122]; /* 122-123 */ + uint32_t hmfai; + uint8_t _reserved7[134 - 128]; /* 128-133 */ + uint8_t fac134; + uint8_t _reserved8[144 - 135]; /* 135-143 */ + struct CPUEntry entries[]; + /* + * When the Extended-Length SCCB (ELS) feature is enabled the + * start of the entries field begins at an offset denoted by the + * offset_cpu field, otherwise it's at an offset of 128. + */ +} QEMU_PACKED ReadInfo; + +typedef struct ReadCpuInfo { + SCCBHeader h; + uint16_t nr_configured; /* 8-9 */ + uint16_t offset_configured; /* 10-11 */ + uint16_t nr_standby; /* 12-13 */ + uint16_t offset_standby; /* 14-15 */ + uint8_t reserved0[24-16]; /* 16-23 */ + struct CPUEntry entries[]; +} QEMU_PACKED ReadCpuInfo; + +typedef struct ReadStorageElementInfo { + SCCBHeader h; + uint16_t max_id; + uint16_t assigned; + uint16_t standby; + uint8_t _reserved0[16 - 14]; /* 14-15 */ + uint32_t entries[]; +} QEMU_PACKED ReadStorageElementInfo; + +typedef struct AttachStorageElement { + SCCBHeader h; + uint8_t _reserved0[10 - 8]; /* 8-9 */ + uint16_t assigned; + uint8_t _reserved1[16 - 12]; /* 12-15 */ + uint32_t entries[]; +} QEMU_PACKED AttachStorageElement; + +typedef struct AssignStorage { + SCCBHeader h; + uint16_t rn; +} QEMU_PACKED AssignStorage; + +typedef struct IoaCfgSccb { + SCCBHeader header; + uint8_t atype; + uint8_t reserved1; + uint16_t reserved2; + uint32_t aid; +} QEMU_PACKED IoaCfgSccb; + +typedef struct SCCB { + SCCBHeader h; + char data[]; + } QEMU_PACKED SCCB; + +#define TYPE_SCLP "sclp" +OBJECT_DECLARE_TYPE(SCLPDevice, SCLPDeviceClass, + SCLP) + +struct SCLPEventFacility; + +struct SCLPDevice { + /* private */ + DeviceState parent_obj; + struct SCLPEventFacility *event_facility; + int increment_size; + + /* public */ +}; + +struct SCLPDeviceClass { + /* private */ + DeviceClass parent_class; + void (*read_SCP_info)(SCLPDevice *sclp, SCCB *sccb); + void (*read_cpu_info)(SCLPDevice *sclp, SCCB *sccb); + + /* public */ + void (*execute)(SCLPDevice *sclp, SCCB *sccb, uint32_t code); + void (*service_interrupt)(SCLPDevice *sclp, uint32_t sccb); +}; + +static inline int sccb_data_len(SCCB *sccb) +{ + return be16_to_cpu(sccb->h.length) - sizeof(sccb->h); +} + + +void s390_sclp_init(void); +void sclp_service_interrupt(uint32_t sccb); +void raise_irq_cpu_hotplug(void); +int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + uint32_t code); + +#endif diff --git a/include/hw/s390x/storage-attributes.h b/include/hw/s390x/storage-attributes.h new file mode 100644 index 000000000..5239eb538 --- /dev/null +++ b/include/hw/s390x/storage-attributes.h @@ -0,0 +1,79 @@ +/* + * s390 storage attributes device + * + * Copyright 2016 IBM Corp. + * Author(s): Claudio Imbrenda + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef S390_STORAGE_ATTRIBUTES_H +#define S390_STORAGE_ATTRIBUTES_H + +#include "hw/qdev-core.h" +#include "monitor/monitor.h" +#include "qom/object.h" + +#define TYPE_S390_STATTRIB "s390-storage_attributes" +#define TYPE_QEMU_S390_STATTRIB "s390-storage_attributes-qemu" +#define TYPE_KVM_S390_STATTRIB "s390-storage_attributes-kvm" + +OBJECT_DECLARE_TYPE(S390StAttribState, S390StAttribClass, S390_STATTRIB) + +struct S390StAttribState { + DeviceState parent_obj; + uint64_t migration_cur_gfn; + bool migration_enabled; +}; + + +struct S390StAttribClass { + DeviceClass parent_class; + /* Return value: < 0 on error, or new count */ + int (*get_stattr)(S390StAttribState *sa, uint64_t *start_gfn, + uint32_t count, uint8_t *values); + int (*peek_stattr)(S390StAttribState *sa, uint64_t start_gfn, + uint32_t count, uint8_t *values); + int (*set_stattr)(S390StAttribState *sa, uint64_t start_gfn, + uint32_t count, uint8_t *values); + void (*synchronize)(S390StAttribState *sa); + int (*set_migrationmode)(S390StAttribState *sa, bool value); + int (*get_active)(S390StAttribState *sa); + long long (*get_dirtycount)(S390StAttribState *sa); +}; + +typedef struct QEMUS390StAttribState QEMUS390StAttribState; +DECLARE_INSTANCE_CHECKER(QEMUS390StAttribState, QEMU_S390_STATTRIB, + TYPE_QEMU_S390_STATTRIB) + +struct QEMUS390StAttribState { + S390StAttribState parent_obj; +}; + +typedef struct KVMS390StAttribState KVMS390StAttribState; +DECLARE_INSTANCE_CHECKER(KVMS390StAttribState, KVM_S390_STATTRIB, + TYPE_KVM_S390_STATTRIB) + +struct KVMS390StAttribState { + S390StAttribState parent_obj; + uint64_t still_dirty; + uint8_t *incoming_buffer; +}; + +void s390_stattrib_init(void); + +#ifdef CONFIG_KVM +Object *kvm_s390_stattrib_create(void); +#else +static inline Object *kvm_s390_stattrib_create(void) +{ + return NULL; +} +#endif + +void hmp_info_cmma(Monitor *mon, const QDict *qdict); +void hmp_migrationmode(Monitor *mon, const QDict *qdict); + +#endif /* S390_STORAGE_ATTRIBUTES_H */ diff --git a/include/hw/s390x/storage-keys.h b/include/hw/s390x/storage-keys.h new file mode 100644 index 000000000..2888d42d0 --- /dev/null +++ b/include/hw/s390x/storage-keys.h @@ -0,0 +1,57 @@ +/* + * s390 storage key device + * + * Copyright 2015 IBM Corp. + * Author(s): Jason J. Herne + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef S390_STORAGE_KEYS_H +#define S390_STORAGE_KEYS_H + +#include "hw/qdev-core.h" +#include "monitor/monitor.h" +#include "qom/object.h" + +#define TYPE_S390_SKEYS "s390-skeys" +OBJECT_DECLARE_TYPE(S390SKeysState, S390SKeysClass, S390_SKEYS) + +struct S390SKeysState { + DeviceState parent_obj; + bool migration_enabled; + +}; + + +struct S390SKeysClass { + DeviceClass parent_class; + int (*skeys_enabled)(S390SKeysState *ks); + int (*get_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count, + uint8_t *keys); + int (*set_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count, + uint8_t *keys); +}; + +#define TYPE_KVM_S390_SKEYS "s390-skeys-kvm" +#define TYPE_QEMU_S390_SKEYS "s390-skeys-qemu" +typedef struct QEMUS390SKeysState QEMUS390SKeysState; +DECLARE_INSTANCE_CHECKER(QEMUS390SKeysState, QEMU_S390_SKEYS, + TYPE_QEMU_S390_SKEYS) + +struct QEMUS390SKeysState { + S390SKeysState parent_obj; + uint8_t *keydata; + uint32_t key_count; +}; + +void s390_skeys_init(void); + +S390SKeysState *s390_get_skeys_device(void); + +void hmp_dump_skeys(Monitor *mon, const QDict *qdict); +void hmp_info_skeys(Monitor *mon, const QDict *qdict); + +#endif /* S390_STORAGE_KEYS_H */ diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h new file mode 100644 index 000000000..ff3195a4b --- /dev/null +++ b/include/hw/s390x/tod.h @@ -0,0 +1,54 @@ +/* + * TOD (Time Of Day) clock + * + * Copyright 2018 Red Hat, Inc. + * Author(s): David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_S390_TOD_H +#define HW_S390_TOD_H + +#include "hw/qdev-core.h" +#include "target/s390x/s390-tod.h" +#include "qom/object.h" + +typedef struct S390TOD { + uint8_t high; + uint64_t low; +} S390TOD; + +#define TYPE_S390_TOD "s390-tod" +OBJECT_DECLARE_TYPE(S390TODState, S390TODClass, S390_TOD) +#define TYPE_KVM_S390_TOD TYPE_S390_TOD "-kvm" +#define TYPE_QEMU_S390_TOD TYPE_S390_TOD "-qemu" + +struct S390TODState { + /* private */ + DeviceState parent_obj; + + /* + * Used by TCG to remember the time base. Used by KVM to backup the TOD + * while the TOD is stopped. + */ + S390TOD base; + /* Used by KVM to remember if the TOD is stopped and base is valid. */ + bool stopped; +}; + +struct S390TODClass { + /* private */ + DeviceClass parent_class; + void (*parent_realize)(DeviceState *dev, Error **errp); + + /* public */ + void (*get)(const S390TODState *td, S390TOD *tod, Error **errp); + void (*set)(S390TODState *td, const S390TOD *tod, Error **errp); +}; + +void s390_init_tod(void); +S390TODState *s390_get_todstate(void); + +#endif diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h new file mode 100644 index 000000000..63a909eb7 --- /dev/null +++ b/include/hw/s390x/vfio-ccw.h @@ -0,0 +1,27 @@ +/* + * vfio based subchannel assignment support + * + * Copyright 2017, 2019 IBM Corp. + * Author(s): Dong Jia Shi + * Xiao Feng Ren + * Pierre Morel + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_VFIO_CCW_H +#define HW_VFIO_CCW_H + +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/s390-ccw.h" +#include "hw/s390x/ccw-device.h" +#include "qom/object.h" + +#define TYPE_VFIO_CCW "vfio-ccw" +OBJECT_DECLARE_SIMPLE_TYPE(VFIOCCWDevice, VFIO_CCW) + +#define TYPE_VFIO_CCW "vfio-ccw" + +#endif diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h new file mode 100644 index 000000000..952170432 --- /dev/null +++ b/include/hw/scsi/emulation.h @@ -0,0 +1,16 @@ +#ifndef HW_SCSI_EMULATION_H +#define HW_SCSI_EMULATION_H + +typedef struct SCSIBlockLimits { + bool wsnz; + uint16_t min_io_size; + uint32_t max_unmap_descr; + uint32_t opt_io_size; + uint32_t max_unmap_sectors; + uint32_t unmap_sectors; + uint32_t max_io_sectors; +} SCSIBlockLimits; + +int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl); + +#endif diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h new file mode 100644 index 000000000..60cc3047a --- /dev/null +++ b/include/hw/scsi/esp.h @@ -0,0 +1,161 @@ +#ifndef QEMU_HW_ESP_H +#define QEMU_HW_ESP_H + +#include "hw/scsi/scsi.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +/* esp.c */ +#define ESP_MAX_DEVS 7 +typedef void (*ESPDMAMemoryReadWriteFunc)(void *opaque, uint8_t *buf, int len); + +#define ESP_REGS 16 +#define TI_BUFSZ 16 +#define ESP_CMDBUF_SZ 32 + +typedef struct ESPState ESPState; + +enum pdma_origin_id { + PDMA, + TI, + CMD, + ASYNC, +}; + +struct ESPState { + uint8_t rregs[ESP_REGS]; + uint8_t wregs[ESP_REGS]; + qemu_irq irq; + qemu_irq irq_data; + uint8_t chip_id; + bool tchi_written; + int32_t ti_size; + uint32_t ti_rptr, ti_wptr; + uint32_t status; + uint32_t deferred_status; + bool deferred_complete; + uint32_t dma; + uint8_t ti_buf[TI_BUFSZ]; + SCSIBus bus; + SCSIDevice *current_dev; + SCSIRequest *current_req; + uint8_t cmdbuf[ESP_CMDBUF_SZ]; + uint32_t cmdlen; + uint32_t do_cmd; + + /* The amount of data left in the current DMA transfer. */ + uint32_t dma_left; + /* The size of the current DMA transfer. Zero if no transfer is in + progress. */ + uint32_t dma_counter; + int dma_enabled; + + uint32_t async_len; + uint8_t *async_buf; + + ESPDMAMemoryReadWriteFunc dma_memory_read; + ESPDMAMemoryReadWriteFunc dma_memory_write; + void *dma_opaque; + void (*dma_cb)(ESPState *s); + uint8_t pdma_buf[32]; + int pdma_origin; + uint32_t pdma_len; + uint32_t pdma_start; + uint32_t pdma_cur; + void (*pdma_cb)(ESPState *s); +}; + +#define TYPE_ESP "esp" +OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, ESP) + +struct SysBusESPState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + MemoryRegion pdma; + uint32_t it_shift; + ESPState esp; +}; + +#define ESP_TCLO 0x0 +#define ESP_TCMID 0x1 +#define ESP_FIFO 0x2 +#define ESP_CMD 0x3 +#define ESP_RSTAT 0x4 +#define ESP_WBUSID 0x4 +#define ESP_RINTR 0x5 +#define ESP_WSEL 0x5 +#define ESP_RSEQ 0x6 +#define ESP_WSYNTP 0x6 +#define ESP_RFLAGS 0x7 +#define ESP_WSYNO 0x7 +#define ESP_CFG1 0x8 +#define ESP_RRES1 0x9 +#define ESP_WCCF 0x9 +#define ESP_RRES2 0xa +#define ESP_WTEST 0xa +#define ESP_CFG2 0xb +#define ESP_CFG3 0xc +#define ESP_RES3 0xd +#define ESP_TCHI 0xe +#define ESP_RES4 0xf + +#define CMD_DMA 0x80 +#define CMD_CMD 0x7f + +#define CMD_NOP 0x00 +#define CMD_FLUSH 0x01 +#define CMD_RESET 0x02 +#define CMD_BUSRESET 0x03 +#define CMD_TI 0x10 +#define CMD_ICCS 0x11 +#define CMD_MSGACC 0x12 +#define CMD_PAD 0x18 +#define CMD_SATN 0x1a +#define CMD_RSTATN 0x1b +#define CMD_SEL 0x41 +#define CMD_SELATN 0x42 +#define CMD_SELATNS 0x43 +#define CMD_ENSEL 0x44 +#define CMD_DISSEL 0x45 + +#define STAT_DO 0x00 +#define STAT_DI 0x01 +#define STAT_CD 0x02 +#define STAT_ST 0x03 +#define STAT_MO 0x06 +#define STAT_MI 0x07 +#define STAT_PIO_MASK 0x06 + +#define STAT_TC 0x10 +#define STAT_PE 0x20 +#define STAT_GE 0x40 +#define STAT_INT 0x80 + +#define BUSID_DID 0x07 + +#define INTR_FC 0x08 +#define INTR_BS 0x10 +#define INTR_DC 0x20 +#define INTR_RST 0x80 + +#define SEQ_0 0x0 +#define SEQ_CD 0x4 + +#define CFG1_RESREPT 0x40 + +#define TCHI_FAS100A 0x4 +#define TCHI_AM53C974 0x12 + +void esp_dma_enable(ESPState *s, int irq, int level); +void esp_request_cancelled(SCSIRequest *req); +void esp_command_complete(SCSIRequest *req, uint32_t status, size_t resid); +void esp_transfer_data(SCSIRequest *req, uint32_t len); +void esp_hard_reset(ESPState *s); +uint64_t esp_reg_read(ESPState *s, uint32_t saddr); +void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val); +extern const VMStateDescription vmstate_esp; + +#endif diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h new file mode 100644 index 000000000..09fa5c9d2 --- /dev/null +++ b/include/hw/scsi/scsi.h @@ -0,0 +1,198 @@ +#ifndef QEMU_HW_SCSI_H +#define QEMU_HW_SCSI_H + +#include "block/aio.h" +#include "hw/block/block.h" +#include "hw/qdev-core.h" +#include "scsi/utils.h" +#include "qemu/notify.h" +#include "qom/object.h" + +#define MAX_SCSI_DEVS 255 + +typedef struct SCSIBus SCSIBus; +typedef struct SCSIBusInfo SCSIBusInfo; +typedef struct SCSIDevice SCSIDevice; +typedef struct SCSIRequest SCSIRequest; +typedef struct SCSIReqOps SCSIReqOps; + +#define SCSI_SENSE_BUF_SIZE_OLD 96 +#define SCSI_SENSE_BUF_SIZE 252 + +struct SCSIRequest { + SCSIBus *bus; + SCSIDevice *dev; + const SCSIReqOps *ops; + uint32_t refcount; + uint32_t tag; + uint32_t lun; + uint32_t status; + void *hba_private; + size_t resid; + SCSICommand cmd; + NotifierList cancel_notifiers; + + /* Note: + * - fields before sense are initialized by scsi_req_alloc; + * - sense[] is uninitialized; + * - fields after sense are memset to 0 by scsi_req_alloc. + * */ + + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + uint32_t sense_len; + bool enqueued; + bool io_canceled; + bool retry; + bool dma_started; + BlockAIOCB *aiocb; + QEMUSGList *sg; + QTAILQ_ENTRY(SCSIRequest) next; +}; + +#define TYPE_SCSI_DEVICE "scsi-device" +OBJECT_DECLARE_TYPE(SCSIDevice, SCSIDeviceClass, SCSI_DEVICE) + +struct SCSIDeviceClass { + DeviceClass parent_class; + void (*realize)(SCSIDevice *dev, Error **errp); + void (*unrealize)(SCSIDevice *dev); + int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf, + void *hba_private); + SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private); + void (*unit_attention_reported)(SCSIDevice *s); +}; + +struct SCSIDevice +{ + DeviceState qdev; + VMChangeStateEntry *vmsentry; + QEMUBH *bh; + uint32_t id; + BlockConf conf; + SCSISense unit_attention; + bool sense_is_ua; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + uint32_t sense_len; + QTAILQ_HEAD(, SCSIRequest) requests; + uint32_t channel; + uint32_t lun; + int blocksize; + int type; + uint64_t max_lba; + uint64_t wwn; + uint64_t port_wwn; + int scsi_version; + int default_scsi_version; + bool needs_vpd_bl_emulation; + bool hba_supports_iothread; +}; + +extern const VMStateDescription vmstate_scsi_device; + +#define VMSTATE_SCSI_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(SCSIDevice), \ + .vmsd = &vmstate_scsi_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, SCSIDevice), \ +} + +/* cdrom.c */ +int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track); +int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); + +/* scsi-bus.c */ +struct SCSIReqOps { + size_t size; + void (*free_req)(SCSIRequest *req); + int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); + void (*read_data)(SCSIRequest *req); + void (*write_data)(SCSIRequest *req); + uint8_t *(*get_buf)(SCSIRequest *req); + + void (*save_request)(QEMUFile *f, SCSIRequest *req); + void (*load_request)(QEMUFile *f, SCSIRequest *req); +}; + +struct SCSIBusInfo { + int tcq; + int max_channel, max_target, max_lun; + int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf, + void *hba_private); + void (*transfer_data)(SCSIRequest *req, uint32_t arg); + void (*complete)(SCSIRequest *req, uint32_t arg, size_t resid); + void (*cancel)(SCSIRequest *req); + void (*change)(SCSIBus *bus, SCSIDevice *dev, SCSISense sense); + QEMUSGList *(*get_sg_list)(SCSIRequest *req); + + void (*save_request)(QEMUFile *f, SCSIRequest *req); + void *(*load_request)(QEMUFile *f, SCSIRequest *req); + void (*free_request)(SCSIBus *bus, void *priv); +}; + +#define TYPE_SCSI_BUS "SCSI" +OBJECT_DECLARE_SIMPLE_TYPE(SCSIBus, SCSI_BUS) + +struct SCSIBus { + BusState qbus; + int busnr; + + SCSISense unit_attention; + const SCSIBusInfo *info; +}; + +void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, + const SCSIBusInfo *info, const char *bus_name); + +static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) +{ + return DO_UPCAST(SCSIBus, qbus, d->qdev.parent_bus); +} + +SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, + int unit, bool removable, int bootindex, + bool share_rw, + BlockdevOnError rerror, + BlockdevOnError werror, + const char *serial, Error **errp); +void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); +void scsi_legacy_handle_cmdline(void); + +SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, + uint32_t tag, uint32_t lun, void *hba_private); +SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun, + uint8_t *buf, void *hba_private); +int32_t scsi_req_enqueue(SCSIRequest *req); +SCSIRequest *scsi_req_ref(SCSIRequest *req); +void scsi_req_unref(SCSIRequest *req); + +int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf, + void *hba_private); +int scsi_req_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf); +void scsi_req_build_sense(SCSIRequest *req, SCSISense sense); +void scsi_req_print(SCSIRequest *req); +void scsi_req_continue(SCSIRequest *req); +void scsi_req_data(SCSIRequest *req, int len); +void scsi_req_complete(SCSIRequest *req, int status); +uint8_t *scsi_req_get_buf(SCSIRequest *req); +int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len); +void scsi_req_cancel_complete(SCSIRequest *req); +void scsi_req_cancel(SCSIRequest *req); +void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); +void scsi_req_retry(SCSIRequest *req); +void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); +void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); +void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); +void scsi_device_unit_attention_reported(SCSIDevice *dev); +void scsi_generic_read_device_inquiry(SCSIDevice *dev); +int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed); +int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size, + uint8_t *buf, uint8_t buf_size); +SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun); +SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int target, int lun); + +/* scsi-generic.c. */ +extern const SCSIReqOps scsi_generic_req_ops; + +#endif diff --git a/include/hw/sd/allwinner-sdhost.h b/include/hw/sd/allwinner-sdhost.h new file mode 100644 index 000000000..bfe08ff4e --- /dev/null +++ b/include/hw/sd/allwinner-sdhost.h @@ -0,0 +1,136 @@ +/* + * Allwinner (sun4i and above) SD Host Controller emulation + * + * Copyright (C) 2019 Niek Linnenbank + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef HW_SD_ALLWINNER_SDHOST_H +#define HW_SD_ALLWINNER_SDHOST_H + +#include "qom/object.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" + +/** + * Object model types + * @{ + */ + +/** Generic Allwinner SD Host Controller (abstract) */ +#define TYPE_AW_SDHOST "allwinner-sdhost" + +/** Allwinner sun4i family (A10, A12) */ +#define TYPE_AW_SDHOST_SUN4I TYPE_AW_SDHOST "-sun4i" + +/** Allwinner sun5i family and newer (A13, H2+, H3, etc) */ +#define TYPE_AW_SDHOST_SUN5I TYPE_AW_SDHOST "-sun5i" + +/** @} */ + +/** + * Object model macros + * @{ + */ + +OBJECT_DECLARE_TYPE(AwSdHostState, AwSdHostClass, AW_SDHOST) + +/** @} */ + +/** + * Allwinner SD Host Controller object instance state. + */ +struct AwSdHostState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + /** Secure Digital (SD) bus, which connects to SD card (if present) */ + SDBus sdbus; + + /** Maps I/O registers in physical memory */ + MemoryRegion iomem; + + /** Interrupt output signal to notify CPU */ + qemu_irq irq; + + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + + /** Number of bytes left in current DMA transfer */ + uint32_t transfer_cnt; + + /** + * @name Hardware Registers + * @{ + */ + + uint32_t global_ctl; /**< Global Control */ + uint32_t clock_ctl; /**< Clock Control */ + uint32_t timeout; /**< Timeout */ + uint32_t bus_width; /**< Bus Width */ + uint32_t block_size; /**< Block Size */ + uint32_t byte_count; /**< Byte Count */ + + uint32_t command; /**< Command */ + uint32_t command_arg; /**< Command Argument */ + uint32_t response[4]; /**< Command Response */ + + uint32_t irq_mask; /**< Interrupt Mask */ + uint32_t irq_status; /**< Raw Interrupt Status */ + uint32_t status; /**< Status */ + + uint32_t fifo_wlevel; /**< FIFO Water Level */ + uint32_t fifo_func_sel; /**< FIFO Function Select */ + uint32_t debug_enable; /**< Debug Enable */ + uint32_t auto12_arg; /**< Auto Command 12 Argument */ + uint32_t newtiming_set; /**< SD New Timing Set */ + uint32_t newtiming_debug; /**< SD New Timing Debug */ + uint32_t hardware_rst; /**< Hardware Reset */ + uint32_t dmac; /**< Internal DMA Controller Control */ + uint32_t desc_base; /**< Descriptor List Base Address */ + uint32_t dmac_status; /**< Internal DMA Controller Status */ + uint32_t dmac_irq; /**< Internal DMA Controller IRQ Enable */ + uint32_t card_threshold; /**< Card Threshold Control */ + uint32_t startbit_detect; /**< eMMC DDR Start Bit Detection Control */ + uint32_t response_crc; /**< Response CRC */ + uint32_t data_crc[8]; /**< Data CRC */ + uint32_t status_crc; /**< Status CRC */ + + /** @} */ + +}; + +/** + * Allwinner SD Host Controller class-level struct. + * + * This struct is filled by each sunxi device specific code + * such that the generic code can use this struct to support + * all devices. + */ +struct AwSdHostClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + /** Maximum buffer size in bytes per DMA descriptor */ + size_t max_desc_size; + +}; + +#endif /* HW_SD_ALLWINNER_SDHOST_H */ diff --git a/include/hw/sd/aspeed_sdhci.h b/include/hw/sd/aspeed_sdhci.h new file mode 100644 index 000000000..b093d1b86 --- /dev/null +++ b/include/hw/sd/aspeed_sdhci.h @@ -0,0 +1,35 @@ +/* + * Aspeed SD Host Controller + * Eddie James + * + * Copyright (C) 2019 IBM Corp + * SPDX-License-Identifer: GPL-2.0-or-later + */ + +#ifndef ASPEED_SDHCI_H +#define ASPEED_SDHCI_H + +#include "hw/sd/sdhci.h" +#include "qom/object.h" + +#define TYPE_ASPEED_SDHCI "aspeed.sdhci" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedSDHCIState, ASPEED_SDHCI) + +#define ASPEED_SDHCI_CAPABILITIES 0x01E80080 +#define ASPEED_SDHCI_NUM_SLOTS 2 +#define ASPEED_SDHCI_NUM_REGS (ASPEED_SDHCI_REG_SIZE / sizeof(uint32_t)) +#define ASPEED_SDHCI_REG_SIZE 0x100 + +struct AspeedSDHCIState { + SysBusDevice parent; + + SDHCIState slots[ASPEED_SDHCI_NUM_SLOTS]; + uint8_t num_slots; + + MemoryRegion iomem; + qemu_irq irq; + + uint32_t regs[ASPEED_SDHCI_NUM_REGS]; +}; + +#endif /* ASPEED_SDHCI_H */ diff --git a/include/hw/sd/bcm2835_sdhost.h b/include/hw/sd/bcm2835_sdhost.h new file mode 100644 index 000000000..f6bca5c39 --- /dev/null +++ b/include/hw/sd/bcm2835_sdhost.h @@ -0,0 +1,48 @@ +/* + * Raspberry Pi (BCM2835) SD Host Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps + * Luc Michel + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_SDHOST_H +#define BCM2835_SDHOST_H + +#include "hw/sysbus.h" +#include "hw/sd/sd.h" +#include "qom/object.h" + +#define TYPE_BCM2835_SDHOST "bcm2835-sdhost" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SDHostState, BCM2835_SDHOST) + +#define BCM2835_SDHOST_FIFO_LEN 16 + +struct BCM2835SDHostState { + SysBusDevice busdev; + SDBus sdbus; + MemoryRegion iomem; + + uint32_t cmd; + uint32_t cmdarg; + uint32_t status; + uint32_t rsp[4]; + uint32_t config; + uint32_t edm; + uint32_t vdd; + uint32_t hbct; + uint32_t hblc; + int32_t fifo_pos; + int32_t fifo_len; + uint32_t fifo[BCM2835_SDHOST_FIFO_LEN]; + uint32_t datacnt; + + qemu_irq irq; +}; + +#endif diff --git a/include/hw/sd/cadence_sdhci.h b/include/hw/sd/cadence_sdhci.h new file mode 100644 index 000000000..cd8288b7d --- /dev/null +++ b/include/hw/sd/cadence_sdhci.h @@ -0,0 +1,47 @@ +/* + * Cadence SDHCI emulation + * + * Copyright (c) 2020 Wind River Systems, Inc. + * + * Author: + * Bin Meng + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef CADENCE_SDHCI_H +#define CADENCE_SDHCI_H + +#include "hw/sd/sdhci.h" + +#define CADENCE_SDHCI_REG_SIZE 0x100 +#define CADENCE_SDHCI_NUM_REGS (CADENCE_SDHCI_REG_SIZE / sizeof(uint32_t)) + +typedef struct CadenceSDHCIState { + SysBusDevice parent; + + MemoryRegion container; + MemoryRegion iomem; + BusState *bus; + + uint32_t regs[CADENCE_SDHCI_NUM_REGS]; + + SDHCIState sdhci; +} CadenceSDHCIState; + +#define TYPE_CADENCE_SDHCI "cadence.sdhci" +#define CADENCE_SDHCI(obj) OBJECT_CHECK(CadenceSDHCIState, (obj), \ + TYPE_CADENCE_SDHCI) + +#endif /* CADENCE_SDHCI_H */ diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h new file mode 100644 index 000000000..59d108d45 --- /dev/null +++ b/include/hw/sd/sd.h @@ -0,0 +1,209 @@ +/* + * SD Memory Card emulation. Mostly correct for MMC too. + * + * Copyright (c) 2006 Andrzej Zaborowski + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HW_SD_H +#define HW_SD_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define OUT_OF_RANGE (1 << 31) +#define ADDRESS_ERROR (1 << 30) +#define BLOCK_LEN_ERROR (1 << 29) +#define ERASE_SEQ_ERROR (1 << 28) +#define ERASE_PARAM (1 << 27) +#define WP_VIOLATION (1 << 26) +#define CARD_IS_LOCKED (1 << 25) +#define LOCK_UNLOCK_FAILED (1 << 24) +#define COM_CRC_ERROR (1 << 23) +#define ILLEGAL_COMMAND (1 << 22) +#define CARD_ECC_FAILED (1 << 21) +#define CC_ERROR (1 << 20) +#define SD_ERROR (1 << 19) +#define CID_CSD_OVERWRITE (1 << 16) +#define WP_ERASE_SKIP (1 << 15) +#define CARD_ECC_DISABLED (1 << 14) +#define ERASE_RESET (1 << 13) +#define CURRENT_STATE (7 << 9) +#define READY_FOR_DATA (1 << 8) +#define APP_CMD (1 << 5) +#define AKE_SEQ_ERROR (1 << 3) + +enum SDPhySpecificationVersion { + SD_PHY_SPECv1_10_VERS = 1, + SD_PHY_SPECv2_00_VERS = 2, + SD_PHY_SPECv3_01_VERS = 3, +}; + +typedef enum { + SD_VOLTAGE_0_4V = 400, /* currently not supported */ + SD_VOLTAGE_1_8V = 1800, + SD_VOLTAGE_3_0V = 3000, + SD_VOLTAGE_3_3V = 3300, +} sd_voltage_mv_t; + +typedef enum { + UHS_NOT_SUPPORTED = 0, + UHS_I = 1, + UHS_II = 2, /* currently not supported */ + UHS_III = 3, /* currently not supported */ +} sd_uhs_mode_t; + +typedef enum { + sd_none = -1, + sd_bc = 0, /* broadcast -- no response */ + sd_bcr, /* broadcast with response */ + sd_ac, /* addressed -- no data transfer */ + sd_adtc, /* addressed with data transfer */ +} sd_cmd_type_t; + +typedef struct { + uint8_t cmd; + uint32_t arg; + uint8_t crc; +} SDRequest; + + +#define TYPE_SD_CARD "sd-card" +OBJECT_DECLARE_TYPE(SDState, SDCardClass, SD_CARD) + +struct SDCardClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + + int (*do_command)(SDState *sd, SDRequest *req, uint8_t *response); + /** + * Write a byte to a SD card. + * @sd: card + * @value: byte to write + * + * Write a byte on the data lines of a SD card. + */ + void (*write_byte)(SDState *sd, uint8_t value); + /** + * Read a byte from a SD card. + * @sd: card + * + * Read a byte from the data lines of a SD card. + * + * Return: byte value read + */ + uint8_t (*read_byte)(SDState *sd); + bool (*data_ready)(SDState *sd); + void (*set_voltage)(SDState *sd, uint16_t millivolts); + uint8_t (*get_dat_lines)(SDState *sd); + bool (*get_cmd_line)(SDState *sd); + void (*enable)(SDState *sd, bool enable); + bool (*get_inserted)(SDState *sd); + bool (*get_readonly)(SDState *sd); +}; + +#define TYPE_SD_BUS "sd-bus" +OBJECT_DECLARE_TYPE(SDBus, SDBusClass, + SD_BUS) + +struct SDBus { + BusState qbus; +}; + +struct SDBusClass { + /*< private >*/ + BusClass parent_class; + /*< public >*/ + + /* These methods are called by the SD device to notify the controller + * when the card insertion or readonly status changes + */ + void (*set_inserted)(DeviceState *dev, bool inserted); + void (*set_readonly)(DeviceState *dev, bool readonly); +}; + +/* Functions to be used by qdevified callers (working via + * an SDBus rather than directly with SDState) + */ +void sdbus_set_voltage(SDBus *sdbus, uint16_t millivolts); +uint8_t sdbus_get_dat_lines(SDBus *sdbus); +bool sdbus_get_cmd_line(SDBus *sdbus); +int sdbus_do_command(SDBus *sd, SDRequest *req, uint8_t *response); +/** + * Write a byte to a SD bus. + * @sd: bus + * @value: byte to write + * + * Write a byte on the data lines of a SD bus. + */ +void sdbus_write_byte(SDBus *sd, uint8_t value); +/** + * Read a byte from a SD bus. + * @sd: bus + * + * Read a byte from the data lines of a SD bus. + * + * Return: byte value read + */ +uint8_t sdbus_read_byte(SDBus *sd); +/** + * Write data to a SD bus. + * @sdbus: bus + * @buf: data to write + * @length: number of bytes to write + * + * Write multiple bytes of data on the data lines of a SD bus. + */ +void sdbus_write_data(SDBus *sdbus, const void *buf, size_t length); +/** + * Read data from a SD bus. + * @sdbus: bus + * @buf: buffer to read data into + * @length: number of bytes to read + * + * Read multiple bytes of data on the data lines of a SD bus. + */ +void sdbus_read_data(SDBus *sdbus, void *buf, size_t length); +bool sdbus_data_ready(SDBus *sd); +bool sdbus_get_inserted(SDBus *sd); +bool sdbus_get_readonly(SDBus *sd); +/** + * sdbus_reparent_card: Reparent an SD card from one controller to another + * @from: controller bus to remove card from + * @to: controller bus to move card to + * + * Reparent an SD card, effectively unplugging it from one controller + * and inserting it into another. This is useful for SoCs like the + * bcm2835 which have two SD controllers and connect a single SD card + * to them, selected by the guest reprogramming GPIO line routing. + */ +void sdbus_reparent_card(SDBus *from, SDBus *to); + +/* Functions to be used by SD devices to report back to qdevified controllers */ +void sdbus_set_inserted(SDBus *sd, bool inserted); +void sdbus_set_readonly(SDBus *sd, bool inserted); + +#endif /* HW_SD_H */ diff --git a/include/hw/sd/sdcard_legacy.h b/include/hw/sd/sdcard_legacy.h new file mode 100644 index 000000000..0dc388955 --- /dev/null +++ b/include/hw/sd/sdcard_legacy.h @@ -0,0 +1,50 @@ +/* + * SD Memory Card emulation (deprecated legacy API) + * + * Copyright (c) 2006 Andrzej Zaborowski + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HW_SDCARD_LEGACY_H +#define HW_SDCARD_LEGACY_H + +#include "hw/sd/sd.h" + +/* Legacy functions to be used only by non-qdevified callers */ +SDState *sd_init(BlockBackend *blk, bool is_spi); +int sd_do_command(SDState *card, SDRequest *request, uint8_t *response); +void sd_write_byte(SDState *card, uint8_t value); +uint8_t sd_read_byte(SDState *card); +void sd_set_cb(SDState *card, qemu_irq readonly, qemu_irq insert); + +/* sd_enable should not be used -- it is only used on the nseries boards, + * where it is part of a broken implementation of the MMC card slot switch + * (there should be two card slots which are multiplexed to a single MMC + * controller, but instead we model it with one card and controller and + * disable the card when the second slot is selected, so it looks like the + * second slot is always empty). + */ +void sd_enable(SDState *card, bool enable); + +#endif /* HW_SDCARD_LEGACY_H */ diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h new file mode 100644 index 000000000..01a64c544 --- /dev/null +++ b/include/hw/sd/sdhci.h @@ -0,0 +1,129 @@ +/* + * SD Association Host Standard Specification v2.0 controller emulation + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * Mitsyanko Igor + * Peter A.G. Crosthwaite + * + * Based on MMC controller for Samsung S5PC1xx-based board emulation + * by Alexey Merkulov and Vladimir Monakhov. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU _General Public License along + * with this program; if not, see . + */ + +#ifndef SDHCI_H +#define SDHCI_H + +#include "hw/pci/pci.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" +#include "qom/object.h" + +/* SD/MMC host controller state */ +struct SDHCIState { + /*< private >*/ + union { + PCIDevice pcidev; + SysBusDevice busdev; + }; + + /*< public >*/ + SDBus sdbus; + MemoryRegion iomem; + AddressSpace sysbus_dma_as; + AddressSpace *dma_as; + MemoryRegion *dma_mr; + const MemoryRegionOps *io_ops; + + QEMUTimer *insert_timer; /* timer for 'changing' sd card. */ + QEMUTimer *transfer_timer; + qemu_irq irq; + + /* Registers cleared on reset */ + uint32_t sdmasysad; /* SDMA System Address register */ + uint16_t blksize; /* Host DMA Buff Boundary and Transfer BlkSize Reg */ + uint16_t blkcnt; /* Blocks count for current transfer */ + uint32_t argument; /* Command Argument Register */ + uint16_t trnmod; /* Transfer Mode Setting Register */ + uint16_t cmdreg; /* Command Register */ + uint32_t rspreg[4]; /* Response Registers 0-3 */ + uint32_t prnsts; /* Present State Register */ + uint8_t hostctl1; /* Host Control Register */ + uint8_t pwrcon; /* Power control Register */ + uint8_t blkgap; /* Block Gap Control Register */ + uint8_t wakcon; /* WakeUp Control Register */ + uint16_t clkcon; /* Clock control Register */ + uint8_t timeoutcon; /* Timeout Control Register */ + uint8_t admaerr; /* ADMA Error Status Register */ + uint16_t norintsts; /* Normal Interrupt Status Register */ + uint16_t errintsts; /* Error Interrupt Status Register */ + uint16_t norintstsen; /* Normal Interrupt Status Enable Register */ + uint16_t errintstsen; /* Error Interrupt Status Enable Register */ + uint16_t norintsigen; /* Normal Interrupt Signal Enable Register */ + uint16_t errintsigen; /* Error Interrupt Signal Enable Register */ + uint16_t acmd12errsts; /* Auto CMD12 error status register */ + uint16_t hostctl2; /* Host Control 2 */ + uint64_t admasysaddr; /* ADMA System Address Register */ + uint16_t vendor_spec; /* Vendor specific register */ + + /* Read-only registers */ + uint64_t capareg; /* Capabilities Register */ + uint64_t maxcurr; /* Maximum Current Capabilities Register */ + uint16_t version; /* Host Controller Version Register */ + + uint8_t *fifo_buffer; /* SD host i/o FIFO buffer */ + uint32_t buf_maxsz; + uint16_t data_count; /* current element in FIFO buffer */ + uint8_t stopped_state;/* Current SDHC state */ + bool pending_insert_state; + /* Buffer Data Port Register - virtual access point to R and W buffers */ + /* Software Reset Register - always reads as 0 */ + /* Force Event Auto CMD12 Error Interrupt Reg - write only */ + /* Force Event Error Interrupt Register- write only */ + /* RO Host Controller Version Register always reads as 0x2401 */ + + /* Configurable properties */ + bool pending_insert_quirk; /* Quirk for Raspberry Pi card insert int */ + uint32_t quirks; + uint8_t sd_spec_version; + uint8_t uhs_mode; + uint8_t vendor; /* For vendor specific functionality */ +}; +typedef struct SDHCIState SDHCIState; + +#define SDHCI_VENDOR_NONE 0 +#define SDHCI_VENDOR_IMX 1 + +/* + * Controller does not provide transfer-complete interrupt when not + * busy. + * + * NOTE: This definition is taken out of Linux kernel and so the + * original bit number is preserved + */ +#define SDHCI_QUIRK_NO_BUSY_IRQ BIT(14) + +#define TYPE_PCI_SDHCI "sdhci-pci" +DECLARE_INSTANCE_CHECKER(SDHCIState, PCI_SDHCI, + TYPE_PCI_SDHCI) + +#define TYPE_SYSBUS_SDHCI "generic-sdhci" +DECLARE_INSTANCE_CHECKER(SDHCIState, SYSBUS_SDHCI, + TYPE_SYSBUS_SDHCI) + +#define TYPE_IMX_USDHC "imx-usdhc" + +#define TYPE_S3C_SDHCI "s3c-sdhci" + +#endif /* SDHCI_H */ diff --git a/include/hw/semihosting/console.h b/include/hw/semihosting/console.h new file mode 100644 index 000000000..0238f540f --- /dev/null +++ b/include/hw/semihosting/console.h @@ -0,0 +1,69 @@ +/* + * Semihosting Console + * + * Copyright (c) 2019 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef SEMIHOST_CONSOLE_H +#define SEMIHOST_CONSOLE_H + +#include "cpu.h" + +/** + * qemu_semihosting_console_outs: + * @env: CPUArchState + * @s: host address of null terminated guest string + * + * Send a null terminated guest string to the debug console. This may + * be the remote gdb session if a softmmu guest is currently being + * debugged. + * + * Returns: number of bytes written. + */ +int qemu_semihosting_console_outs(CPUArchState *env, target_ulong s); + +/** + * qemu_semihosting_console_outc: + * @env: CPUArchState + * @s: host address of null terminated guest string + * + * Send single character from guest memory to the debug console. This + * may be the remote gdb session if a softmmu guest is currently being + * debugged. + * + * Returns: nothing + */ +void qemu_semihosting_console_outc(CPUArchState *env, target_ulong c); + +/** + * qemu_semihosting_console_inc: + * @env: CPUArchState + * + * Receive single character from debug console. This may be the remote + * gdb session if a softmmu guest is currently being debugged. As this + * call may block if no data is available we suspend the CPU and will + * re-execute the instruction when data is there. Therefore two + * conditions must be met: + * - CPUState is synchronized before calling this function + * - pc is only updated once the character is successfully returned + * + * Returns: character read OR cpu_loop_exit! + */ +target_ulong qemu_semihosting_console_inc(CPUArchState *env); + +/** + * qemu_semihosting_log_out: + * @s: pointer to string + * @len: length of string + * + * Send a string to the debug output. Unlike console_out these strings + * can't be sent to a remote gdb instance as they don't exist in guest + * memory. + * + * Returns: number of bytes written + */ +int qemu_semihosting_log_out(const char *s, int len); + +#endif /* SEMIHOST_CONSOLE_H */ diff --git a/include/hw/semihosting/semihost.h b/include/hw/semihosting/semihost.h new file mode 100644 index 000000000..0c55ade3a --- /dev/null +++ b/include/hw/semihosting/semihost.h @@ -0,0 +1,77 @@ +/* + * Semihosting support + * + * Copyright (c) 2015 Imagination Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef SEMIHOST_H +#define SEMIHOST_H + +typedef enum SemihostingTarget { + SEMIHOSTING_TARGET_AUTO = 0, + SEMIHOSTING_TARGET_NATIVE, + SEMIHOSTING_TARGET_GDB +} SemihostingTarget; + +#ifdef CONFIG_USER_ONLY +static inline bool semihosting_enabled(void) +{ + return true; +} + +static inline SemihostingTarget semihosting_get_target(void) +{ + return SEMIHOSTING_TARGET_AUTO; +} + +static inline const char *semihosting_get_arg(int i) +{ + return NULL; +} + +static inline int semihosting_get_argc(void) +{ + return 0; +} + +static inline const char *semihosting_get_cmdline(void) +{ + return NULL; +} + +static inline Chardev *semihosting_get_chardev(void) +{ + return NULL; +} +static inline void qemu_semihosting_console_init(void) +{ +} +#else /* !CONFIG_USER_ONLY */ +bool semihosting_enabled(void); +SemihostingTarget semihosting_get_target(void); +const char *semihosting_get_arg(int i); +int semihosting_get_argc(void); +const char *semihosting_get_cmdline(void); +void semihosting_arg_fallback(const char *file, const char *cmd); +Chardev *semihosting_get_chardev(void); +/* for vl.c hooks */ +void qemu_semihosting_enable(void); +int qemu_semihosting_config_options(const char *opt); +void qemu_semihosting_connect_chardevs(void); +void qemu_semihosting_console_init(void); +#endif /* CONFIG_USER_ONLY */ + +#endif /* SEMIHOST_H */ diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h new file mode 100644 index 000000000..93f464bf4 --- /dev/null +++ b/include/hw/sh4/sh.h @@ -0,0 +1,48 @@ +#ifndef QEMU_SH_H +#define QEMU_SH_H +/* Definitions for SH board emulation. */ + +#include "hw/sh4/sh_intc.h" +#include "target/sh4/cpu-qom.h" + +#define A7ADDR(x) ((x) & 0x1fffffff) +#define P4ADDR(x) ((x) | 0xe0000000) + +/* sh7750.c */ +struct SH7750State; + +struct SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem); + +typedef struct { + /* The callback will be triggered if any of the designated lines change */ + uint16_t portamask_trigger; + uint16_t portbmask_trigger; + /* Return 0 if no action was taken */ + int (*port_change_cb) (uint16_t porta, uint16_t portb, + uint16_t * periph_pdtra, + uint16_t * periph_portdira, + uint16_t * periph_pdtrb, + uint16_t * periph_portdirb); +} sh7750_io_device; + +int sh7750_register_io_device(struct SH7750State *s, + sh7750_io_device * device); + +/* sh_serial.c */ +#define SH_SERIAL_FEAT_SCIF (1 << 0) +void sh_serial_init(MemoryRegion *sysmem, + hwaddr base, int feat, + uint32_t freq, Chardev *chr, + qemu_irq eri_source, + qemu_irq rxi_source, + qemu_irq txi_source, + qemu_irq tei_source, + qemu_irq bri_source); + +/* sh7750.c */ +qemu_irq sh7750_irl(struct SH7750State *s); + +/* tc58128.c */ +int tc58128_init(struct SH7750State *s, const char *zone1, const char *zone2); + +#endif diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h new file mode 100644 index 000000000..65f342505 --- /dev/null +++ b/include/hw/sh4/sh_intc.h @@ -0,0 +1,81 @@ +#ifndef SH_INTC_H +#define SH_INTC_H + +#include "exec/memory.h" + +typedef unsigned char intc_enum; + +struct intc_vect { + intc_enum enum_id; + unsigned short vect; +}; + +#define INTC_VECT(enum_id, vect) { enum_id, vect } + +struct intc_group { + intc_enum enum_id; + intc_enum enum_ids[32]; +}; + +#define INTC_GROUP(enum_id, ...) { enum_id, { __VA_ARGS__ } } + +struct intc_mask_reg { + unsigned long set_reg, clr_reg, reg_width; + intc_enum enum_ids[32]; + unsigned long value; +}; + +struct intc_prio_reg { + unsigned long set_reg, clr_reg, reg_width, field_width; + intc_enum enum_ids[16]; + unsigned long value; +}; + +#define _INTC_ARRAY(a) a, ARRAY_SIZE(a) + +struct intc_source { + unsigned short vect; + intc_enum next_enum_id; + + int asserted; /* emulates the interrupt signal line from device to intc */ + int enable_count; + int enable_max; + int pending; /* emulates the result of signal and masking */ + struct intc_desc *parent; +}; + +struct intc_desc { + MemoryRegion iomem; + MemoryRegion *iomem_aliases; + qemu_irq *irqs; + struct intc_source *sources; + int nr_sources; + struct intc_mask_reg *mask_regs; + int nr_mask_regs; + struct intc_prio_reg *prio_regs; + int nr_prio_regs; + int pending; /* number of interrupt sources that has pending set */ +}; + +int sh_intc_get_pending_vector(struct intc_desc *desc, int imask); +struct intc_source *sh_intc_source(struct intc_desc *desc, intc_enum id); +void sh_intc_toggle_source(struct intc_source *source, + int enable_adj, int assert_adj); + +void sh_intc_register_sources(struct intc_desc *desc, + struct intc_vect *vectors, + int nr_vectors, + struct intc_group *groups, + int nr_groups); + +int sh_intc_init(MemoryRegion *sysmem, + struct intc_desc *desc, + int nr_sources, + struct intc_mask_reg *mask_regs, + int nr_mask_regs, + struct intc_prio_reg *prio_regs, + int nr_prio_regs); + +void sh_intc_set_irl(void *opaque, int n, int level); + +#endif /* SH_INTC_H */ diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h new file mode 100644 index 000000000..6387f2b61 --- /dev/null +++ b/include/hw/southbridge/piix.h @@ -0,0 +1,79 @@ +/* + * QEMU PIIX South Bridge Emulation + * + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2018 Hervé Poussineau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_SOUTHBRIDGE_PIIX_H +#define HW_SOUTHBRIDGE_PIIX_H + +#include "hw/pci/pci.h" +#include "qom/object.h" + +#define TYPE_PIIX4_PM "PIIX4_PM" + +I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, + qemu_irq sci_irq, qemu_irq smi_irq, + int smm_enabled, DeviceState **piix4_pm); + +/* PIRQRC[A:D]: PIRQx Route Control Registers */ +#define PIIX_PIRQCA 0x60 +#define PIIX_PIRQCB 0x61 +#define PIIX_PIRQCC 0x62 +#define PIIX_PIRQCD 0x63 + +/* + * Reset Control Register: PCI-accessible ISA-Compatible Register at address + * 0xcf9, provided by the PCI/ISA bridge (PIIX3 PCI function 0, 8086:7000). + */ +#define PIIX_RCR_IOPORT 0xcf9 + +#define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ +#define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */ + +struct PIIXState { + PCIDevice dev; + + /* + * bitmap to track pic levels. + * The pic level is the logical OR of all the PCI irqs mapped to it + * So one PIC level is tracked by PIIX_NUM_PIRQS bits. + * + * PIRQ is mapped to PIC pins, we track it by + * PIIX_NUM_PIRQS * PIIX_NUM_PIC_IRQS = 64 bits with + * pic_irq * PIIX_NUM_PIRQS + pirq + */ +#if PIIX_NUM_PIC_IRQS * PIIX_NUM_PIRQS > 64 +#error "unable to encode pic state in 64bit in pic_levels." +#endif + uint64_t pic_levels; + + qemu_irq *pic; + + /* This member isn't used. Just for save/load compatibility */ + int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; + + /* Reset Control Register contents */ + uint8_t rcr; + + /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */ + MemoryRegion rcr_mem; +}; +typedef struct PIIXState PIIX3State; + +#define TYPE_PIIX3_PCI_DEVICE "pci-piix3" +DECLARE_INSTANCE_CHECKER(PIIX3State, PIIX3_PCI_DEVICE, + TYPE_PIIX3_PCI_DEVICE) + +extern PCIDevice *piix4_dev; + +PIIX3State *piix3_create(PCIBus *pci_bus, ISABus **isa_bus); + +DeviceState *piix4_create(PCIBus *pci_bus, ISABus **isa_bus, I2CBus **smbus); + +#endif diff --git a/include/hw/sparc/grlib.h b/include/hw/sparc/grlib.h new file mode 100644 index 000000000..78b6178fc --- /dev/null +++ b/include/hw/sparc/grlib.h @@ -0,0 +1,49 @@ +/* + * QEMU GRLIB Components + * + * Copyright (c) 2010-2019 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef GRLIB_H +#define GRLIB_H + +#include "hw/sysbus.h" + +/* Emulation of GrLib device is base on the GRLIB IP Core User's Manual: + * http://www.gaisler.com/products/grlib/grip.pdf + */ + +/* IRQMP */ +#define TYPE_GRLIB_IRQMP "grlib,irqmp" + +typedef void (*set_pil_in_fn) (void *opaque, uint32_t pil_in); + +void grlib_irqmp_set_irq(void *opaque, int irq, int level); + +void grlib_irqmp_ack(DeviceState *dev, int intno); + +/* GPTimer */ +#define TYPE_GRLIB_GPTIMER "grlib,gptimer" + +/* APB UART */ +#define TYPE_GRLIB_APB_UART "grlib,apbuart" + +#endif /* GRLIB_H */ diff --git a/include/hw/sparc/sparc32_dma.h b/include/hw/sparc/sparc32_dma.h new file mode 100644 index 000000000..cde8ec02c --- /dev/null +++ b/include/hw/sparc/sparc32_dma.h @@ -0,0 +1,63 @@ +#ifndef SPARC32_DMA_H +#define SPARC32_DMA_H + +#include "hw/sysbus.h" +#include "hw/scsi/esp.h" +#include "hw/net/lance.h" +#include "qom/object.h" + +#define DMA_REGS 4 + +#define TYPE_SPARC32_DMA_DEVICE "sparc32-dma-device" +OBJECT_DECLARE_SIMPLE_TYPE(DMADeviceState, SPARC32_DMA_DEVICE) + + +struct DMADeviceState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t dmaregs[DMA_REGS]; + qemu_irq irq; + void *iommu; + qemu_irq gpio[2]; +}; + +#define TYPE_SPARC32_ESPDMA_DEVICE "sparc32-espdma" +OBJECT_DECLARE_SIMPLE_TYPE(ESPDMADeviceState, SPARC32_ESPDMA_DEVICE) + +struct ESPDMADeviceState { + DMADeviceState parent_obj; + + SysBusESPState esp; +}; + +#define TYPE_SPARC32_LEDMA_DEVICE "sparc32-ledma" +OBJECT_DECLARE_SIMPLE_TYPE(LEDMADeviceState, SPARC32_LEDMA_DEVICE) + +struct LEDMADeviceState { + DMADeviceState parent_obj; + + SysBusPCNetState lance; +}; + +#define TYPE_SPARC32_DMA "sparc32-dma" +OBJECT_DECLARE_SIMPLE_TYPE(SPARC32DMAState, SPARC32_DMA) + +struct SPARC32DMAState { + SysBusDevice parent_obj; + + MemoryRegion dmamem; + MemoryRegion ledma_alias; + ESPDMADeviceState espdma; + LEDMADeviceState ledma; +}; + +/* sparc32_dma.c */ +void ledma_memory_read(void *opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap); +void ledma_memory_write(void *opaque, hwaddr addr, + uint8_t *buf, int len, int do_bswap); +void espdma_memory_read(void *opaque, uint8_t *buf, int len); +void espdma_memory_write(void *opaque, uint8_t *buf, int len); + +#endif diff --git a/include/hw/sparc/sparc64.h b/include/hw/sparc/sparc64.h new file mode 100644 index 000000000..4ced36fb5 --- /dev/null +++ b/include/hw/sparc/sparc64.h @@ -0,0 +1,12 @@ +#ifndef HW_SPARC_SPARC64_H +#define HW_SPARC_SPARC64_H + +#include "target/sparc/cpu-qom.h" + +#define IVEC_MAX 0x40 + +SPARCCPU *sparc64_cpu_devinit(const char *cpu_type, uint64_t prom_addr); + +void sparc64_cpu_set_ivec_irq(void *opaque, int irq, int level); + +#endif diff --git a/include/hw/sparc/sun4m_iommu.h b/include/hw/sparc/sun4m_iommu.h new file mode 100644 index 000000000..4e2ab34cd --- /dev/null +++ b/include/hw/sparc/sun4m_iommu.h @@ -0,0 +1,53 @@ +/* + * QEMU Sun4m iommu emulation + * + * Copyright (c) 2003-2005 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SUN4M_IOMMU_H +#define SUN4M_IOMMU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define IOMMU_NREGS (4 * 4096 / 4) + +struct IOMMUState { + SysBusDevice parent_obj; + + AddressSpace iommu_as; + IOMMUMemoryRegion iommu; + + MemoryRegion iomem; + uint32_t regs[IOMMU_NREGS]; + hwaddr iostart; + qemu_irq irq; + uint32_t version; +}; +typedef struct IOMMUState IOMMUState; + +#define TYPE_SUN4M_IOMMU "sun4m-iommu" +DECLARE_INSTANCE_CHECKER(IOMMUState, SUN4M_IOMMU, + TYPE_SUN4M_IOMMU) + +#define TYPE_SUN4M_IOMMU_MEMORY_REGION "sun4m-iommu-memory-region" + +#endif diff --git a/include/hw/sparc/sun4u_iommu.h b/include/hw/sparc/sun4u_iommu.h new file mode 100644 index 000000000..f94566a72 --- /dev/null +++ b/include/hw/sparc/sun4u_iommu.h @@ -0,0 +1,52 @@ +/* + * QEMU sun4u IOMMU emulation + * + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2012,2013 Artyom Tarasenko + * Copyright (c) 2017 Mark Cave-Ayland + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SUN4U_IOMMU_H +#define SUN4U_IOMMU_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define IOMMU_NREGS 3 + +struct IOMMUState { + SysBusDevice parent_obj; + + AddressSpace iommu_as; + IOMMUMemoryRegion iommu; + + MemoryRegion iomem; + uint64_t regs[IOMMU_NREGS]; +}; +typedef struct IOMMUState IOMMUState; + +#define TYPE_SUN4U_IOMMU "sun4u-iommu" +DECLARE_INSTANCE_CHECKER(IOMMUState, SUN4U_IOMMU, + TYPE_SUN4U_IOMMU) + +#define TYPE_SUN4U_IOMMU_MEMORY_REGION "sun4u-iommu-memory-region" + +#endif diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h new file mode 100644 index 000000000..3dd354b52 --- /dev/null +++ b/include/hw/ssi/aspeed_smc.h @@ -0,0 +1,119 @@ +/* + * ASPEED AST2400 SMC Controller (SPI Flash Only) + * + * Copyright (C) 2016 IBM Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef ASPEED_SMC_H +#define ASPEED_SMC_H + +#include "hw/ssi/ssi.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +typedef struct AspeedSegments { + hwaddr addr; + uint32_t size; +} AspeedSegments; + +struct AspeedSMCState; +typedef struct AspeedSMCController { + const char *name; + uint8_t r_conf; + uint8_t r_ce_ctrl; + uint8_t r_ctrl0; + uint8_t r_timings; + uint8_t nregs_timings; + uint8_t conf_enable_w0; + uint8_t max_slaves; + const AspeedSegments *segments; + hwaddr flash_window_base; + uint32_t flash_window_size; + bool has_dma; + hwaddr dma_flash_mask; + hwaddr dma_dram_mask; + uint32_t nregs; + uint32_t (*segment_to_reg)(const struct AspeedSMCState *s, + const AspeedSegments *seg); + void (*reg_to_segment)(const struct AspeedSMCState *s, uint32_t reg, + AspeedSegments *seg); +} AspeedSMCController; + +typedef struct AspeedSMCFlash { + struct AspeedSMCState *controller; + + uint8_t id; + uint32_t size; + + MemoryRegion mmio; + DeviceState *flash; +} AspeedSMCFlash; + +#define TYPE_ASPEED_SMC "aspeed.smc" +OBJECT_DECLARE_TYPE(AspeedSMCState, AspeedSMCClass, ASPEED_SMC) + +struct AspeedSMCClass { + SysBusDevice parent_obj; + const AspeedSMCController *ctrl; +}; + +#define ASPEED_SMC_R_MAX (0x100 / 4) + +struct AspeedSMCState { + SysBusDevice parent_obj; + + const AspeedSMCController *ctrl; + + MemoryRegion mmio; + MemoryRegion mmio_flash; + + qemu_irq irq; + int irqline; + + uint32_t num_cs; + qemu_irq *cs_lines; + bool inject_failure; + + SSIBus *spi; + + uint32_t regs[ASPEED_SMC_R_MAX]; + + /* depends on the controller type */ + uint8_t r_conf; + uint8_t r_ce_ctrl; + uint8_t r_ctrl0; + uint8_t r_timings; + uint8_t conf_enable_w0; + + /* for DMA support */ + uint64_t sdram_base; + + AddressSpace flash_as; + MemoryRegion *dram_mr; + AddressSpace dram_as; + + AspeedSMCFlash *flashes; + + uint8_t snoop_index; + uint8_t snoop_dummies; +}; + +#endif /* ASPEED_SMC_H */ diff --git a/include/hw/ssi/imx_spi.h b/include/hw/ssi/imx_spi.h new file mode 100644 index 000000000..b82b17f36 --- /dev/null +++ b/include/hw/ssi/imx_spi.h @@ -0,0 +1,104 @@ +/* + * IMX SPI Controller + * + * Copyright 2016 Jean-Christophe Dubois + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX_SPI_H +#define IMX_SPI_H + +#include "hw/sysbus.h" +#include "hw/ssi/ssi.h" +#include "qemu/bitops.h" +#include "qemu/fifo32.h" +#include "qom/object.h" + +#define ECSPI_FIFO_SIZE 64 + +#define ECSPI_RXDATA 0 +#define ECSPI_TXDATA 1 +#define ECSPI_CONREG 2 +#define ECSPI_CONFIGREG 3 +#define ECSPI_INTREG 4 +#define ECSPI_DMAREG 5 +#define ECSPI_STATREG 6 +#define ECSPI_PERIODREG 7 +#define ECSPI_TESTREG 8 +#define ECSPI_MSGDATA 16 +#define ECSPI_MAX 17 + +/* ECSPI_CONREG */ +#define ECSPI_CONREG_EN (1 << 0) +#define ECSPI_CONREG_HT (1 << 1) +#define ECSPI_CONREG_XCH (1 << 2) +#define ECSPI_CONREG_SMC (1 << 3) +#define ECSPI_CONREG_CHANNEL_MODE_SHIFT 4 +#define ECSPI_CONREG_CHANNEL_MODE_LENGTH 4 +#define ECSPI_CONREG_DRCTL_SHIFT 16 +#define ECSPI_CONREG_DRCTL_LENGTH 2 +#define ECSPI_CONREG_CHANNEL_SELECT_SHIFT 18 +#define ECSPI_CONREG_CHANNEL_SELECT_LENGTH 2 +#define ECSPI_CONREG_BURST_LENGTH_SHIFT 20 +#define ECSPI_CONREG_BURST_LENGTH_LENGTH 12 + +/* ECSPI_CONFIGREG */ +#define ECSPI_CONFIGREG_SS_CTL_SHIFT 8 +#define ECSPI_CONFIGREG_SS_CTL_LENGTH 4 + +/* ECSPI_INTREG */ +#define ECSPI_INTREG_TEEN (1 << 0) +#define ECSPI_INTREG_TDREN (1 << 1) +#define ECSPI_INTREG_TFEN (1 << 2) +#define ECSPI_INTREG_RREN (1 << 3) +#define ECSPI_INTREG_RDREN (1 << 4) +#define ECSPI_INTREG_RFEN (1 << 5) +#define ECSPI_INTREG_ROEN (1 << 6) +#define ECSPI_INTREG_TCEN (1 << 7) + +/* ECSPI_DMAREG */ +#define ECSPI_DMAREG_RXTDEN (1 << 31) +#define ECSPI_DMAREG_RXDEN (1 << 23) +#define ECSPI_DMAREG_TEDEN (1 << 7) +#define ECSPI_DMAREG_RX_THRESHOLD_SHIFT 16 +#define ECSPI_DMAREG_RX_THRESHOLD_LENGTH 6 + +/* ECSPI_STATREG */ +#define ECSPI_STATREG_TE (1 << 0) +#define ECSPI_STATREG_TDR (1 << 1) +#define ECSPI_STATREG_TF (1 << 2) +#define ECSPI_STATREG_RR (1 << 3) +#define ECSPI_STATREG_RDR (1 << 4) +#define ECSPI_STATREG_RF (1 << 5) +#define ECSPI_STATREG_RO (1 << 6) +#define ECSPI_STATREG_TC (1 << 7) + +#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH) + +#define TYPE_IMX_SPI "imx.spi" +OBJECT_DECLARE_SIMPLE_TYPE(IMXSPIState, IMX_SPI) + +struct IMXSPIState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + + qemu_irq irq; + + qemu_irq cs_lines[4]; + + SSIBus *bus; + + uint32_t regs[ECSPI_MAX]; + + Fifo32 rx_fifo; + Fifo32 tx_fifo; + + int16_t burst_length; +}; + +#endif /* IMX_SPI_H */ diff --git a/include/hw/ssi/mss-spi.h b/include/hw/ssi/mss-spi.h new file mode 100644 index 000000000..ce6279c43 --- /dev/null +++ b/include/hw/ssi/mss-spi.h @@ -0,0 +1,59 @@ +/* + * Microsemi SmartFusion2 SPI + * + * Copyright (c) 2017 Subbaraya Sundeep + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MSS_SPI_H +#define HW_MSS_SPI_H + +#include "hw/sysbus.h" +#include "hw/ssi/ssi.h" +#include "qemu/fifo32.h" +#include "qom/object.h" + +#define TYPE_MSS_SPI "mss-spi" +OBJECT_DECLARE_SIMPLE_TYPE(MSSSpiState, MSS_SPI) + +#define R_SPI_MAX 16 + +struct MSSSpiState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + + qemu_irq irq; + + qemu_irq cs_line; + + SSIBus *spi; + + Fifo32 rx_fifo; + Fifo32 tx_fifo; + + int fifo_depth; + uint32_t frame_count; + bool enabled; + + uint32_t regs[R_SPI_MAX]; +}; + +#endif /* HW_MSS_SPI_H */ diff --git a/include/hw/ssi/npcm7xx_fiu.h b/include/hw/ssi/npcm7xx_fiu.h new file mode 100644 index 000000000..a3a170428 --- /dev/null +++ b/include/hw/ssi/npcm7xx_fiu.h @@ -0,0 +1,73 @@ +/* + * Nuvoton NPCM7xx Flash Interface Unit (FIU) + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_FIU_H +#define NPCM7XX_FIU_H + +#include "hw/ssi/ssi.h" +#include "hw/sysbus.h" + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_FIU_NR_REGS (0x7c / sizeof(uint32_t)) + +typedef struct NPCM7xxFIUState NPCM7xxFIUState; + +/** + * struct NPCM7xxFIUFlash - Per-chipselect flash controller state. + * @direct_access: Memory region for direct flash access. + * @fiu: Pointer to flash controller shared state. + */ +typedef struct NPCM7xxFIUFlash { + MemoryRegion direct_access; + NPCM7xxFIUState *fiu; +} NPCM7xxFIUFlash; + +/** + * NPCM7xxFIUState - Device state for one Flash Interface Unit. + * @parent: System bus device. + * @mmio: Memory region for register access. + * @cs_count: Number of flash chips that may be connected to this module. + * @active_cs: Currently active chip select, or -1 if no chip is selected. + * @cs_lines: GPIO lines that may be wired to flash chips. + * @flash: Array of @cs_count per-flash-chip state objects. + * @spi: The SPI bus mastered by this controller. + * @regs: Register contents. + * + * Each FIU has a shared bank of registers, and controls up to four chip + * selects. Each chip select has a dedicated memory region which may be used to + * read and write the flash connected to that chip select as if it were memory. + */ +struct NPCM7xxFIUState { + SysBusDevice parent; + + MemoryRegion mmio; + + int32_t cs_count; + int32_t active_cs; + qemu_irq *cs_lines; + NPCM7xxFIUFlash *flash; + + SSIBus *spi; + + uint32_t regs[NPCM7XX_FIU_NR_REGS]; +}; + +#define TYPE_NPCM7XX_FIU "npcm7xx-fiu" +#define NPCM7XX_FIU(obj) OBJECT_CHECK(NPCM7xxFIUState, (obj), TYPE_NPCM7XX_FIU) + +#endif /* NPCM7XX_FIU_H */ diff --git a/include/hw/ssi/pl022.h b/include/hw/ssi/pl022.h new file mode 100644 index 000000000..545b52689 --- /dev/null +++ b/include/hw/ssi/pl022.h @@ -0,0 +1,52 @@ +/* + * ARM PrimeCell PL022 Synchronous Serial Port + * + * Copyright (c) 2007 CodeSourcery. + * Written by Paul Brook + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* This is a model of the Arm PrimeCell PL022 synchronous serial port. + * The PL022 TRM is: + * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0194h/DDI0194H_ssp_pl022_trm.pdf + * + * QEMU interface: + * + sysbus IRQ: SSPINTR combined interrupt line + * + sysbus MMIO region 0: MemoryRegion for the device's registers + */ + +#ifndef HW_SSI_PL022_H +#define HW_SSI_PL022_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_PL022 "pl022" +OBJECT_DECLARE_SIMPLE_TYPE(PL022State, PL022) + +struct PL022State { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t cr0; + uint32_t cr1; + uint32_t bitmask; + uint32_t sr; + uint32_t cpsr; + uint32_t is; + uint32_t im; + /* The FIFO head points to the next empty entry. */ + int tx_fifo_head; + int rx_fifo_head; + int tx_fifo_len; + int rx_fifo_len; + uint16_t tx_fifo[8]; + uint16_t rx_fifo[8]; + qemu_irq irq; + SSIBus *ssi; +}; + +#endif diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h new file mode 100644 index 000000000..fe3028c39 --- /dev/null +++ b/include/hw/ssi/ssi.h @@ -0,0 +1,107 @@ +/* QEMU Synchronous Serial Interface support. */ + +/* In principle SSI is a point-point interface. As such the qemu + implementation has a single slave device on a "bus". + However it is fairly common for boards to have multiple slaves + connected to a single master, and select devices with an external + chip select. This is implemented in qemu by having an explicit mux device. + It is assumed that master and slave are both using the same transfer width. + */ + +#ifndef QEMU_SSI_H +#define QEMU_SSI_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +typedef enum SSICSMode SSICSMode; + +#define TYPE_SSI_SLAVE "ssi-slave" +OBJECT_DECLARE_TYPE(SSISlave, SSISlaveClass, + SSI_SLAVE) + +#define SSI_GPIO_CS "ssi-gpio-cs" + +enum SSICSMode { + SSI_CS_NONE = 0, + SSI_CS_LOW, + SSI_CS_HIGH, +}; + +/* Slave devices. */ +struct SSISlaveClass { + DeviceClass parent_class; + + void (*realize)(SSISlave *dev, Error **errp); + + /* if you have standard or no CS behaviour, just override transfer. + * This is called when the device cs is active (true by default). + */ + uint32_t (*transfer)(SSISlave *dev, uint32_t val); + /* called when the CS line changes. Optional, devices only need to implement + * this if they have side effects associated with the cs line (beyond + * tristating the txrx lines). + */ + int (*set_cs)(SSISlave *dev, bool select); + /* define whether or not CS exists and is active low/high */ + SSICSMode cs_polarity; + + /* if you have non-standard CS behaviour override this to take control + * of the CS behaviour at the device level. transfer, set_cs, and + * cs_polarity are unused if this is overwritten. Transfer_raw will + * always be called for the device for every txrx access to the parent bus + */ + uint32_t (*transfer_raw)(SSISlave *dev, uint32_t val); +}; + +struct SSISlave { + DeviceState parent_obj; + + /* Chip select state */ + bool cs; +}; + +extern const VMStateDescription vmstate_ssi_slave; + +#define VMSTATE_SSI_SLAVE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(SSISlave), \ + .vmsd = &vmstate_ssi_slave, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, SSISlave), \ +} + +DeviceState *ssi_create_slave(SSIBus *bus, const char *name); +/** + * ssi_realize_and_unref: realize and unref an SSI slave device + * @dev: SSI slave device to realize + * @bus: SSI bus to put it on + * @errp: error pointer + * + * Call 'realize' on @dev, put it on the specified @bus, and drop the + * reference to it. Errors are reported via @errp and by returning + * false. + * + * This function is useful if you have created @dev via qdev_new() + * (which takes a reference to the device it returns to you), so that + * you can set properties on it before realizing it. If you don't need + * to set properties then ssi_create_slave() is probably better (as it + * does the create, init and realize in one step). + * + * If you are embedding the SSI slave into another QOM device and + * initialized it via some variant on object_initialize_child() then + * do not use this function, because that family of functions arrange + * for the only reference to the child device to be held by the parent + * via the child<> property, and so the reference-count-drop done here + * would be incorrect. (Instead you would want ssi_realize(), which + * doesn't currently exist but would be trivial to create if we had + * any code that wanted it.) + */ +bool ssi_realize_and_unref(DeviceState *dev, SSIBus *bus, Error **errp); + +/* Master interface. */ +SSIBus *ssi_create_bus(DeviceState *parent, const char *name); + +uint32_t ssi_transfer(SSIBus *bus, uint32_t val); + +#endif diff --git a/include/hw/ssi/stm32f2xx_spi.h b/include/hw/ssi/stm32f2xx_spi.h new file mode 100644 index 000000000..3683b4ad3 --- /dev/null +++ b/include/hw/ssi/stm32f2xx_spi.h @@ -0,0 +1,71 @@ +/* + * STM32F2XX SPI + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM32F2XX_SPI_H +#define HW_STM32F2XX_SPI_H + +#include "hw/sysbus.h" +#include "hw/ssi/ssi.h" +#include "qom/object.h" + +#define STM_SPI_CR1 0x00 +#define STM_SPI_CR2 0x04 +#define STM_SPI_SR 0x08 +#define STM_SPI_DR 0x0C +#define STM_SPI_CRCPR 0x10 +#define STM_SPI_RXCRCR 0x14 +#define STM_SPI_TXCRCR 0x18 +#define STM_SPI_I2SCFGR 0x1C +#define STM_SPI_I2SPR 0x20 + +#define STM_SPI_CR1_SPE (1 << 6) +#define STM_SPI_CR1_MSTR (1 << 2) + +#define STM_SPI_SR_RXNE 1 + +#define TYPE_STM32F2XX_SPI "stm32f2xx-spi" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXSPIState, STM32F2XX_SPI) + +struct STM32F2XXSPIState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t spi_cr1; + uint32_t spi_cr2; + uint32_t spi_sr; + uint32_t spi_dr; + uint32_t spi_crcpr; + uint32_t spi_rxcrcr; + uint32_t spi_txcrcr; + uint32_t spi_i2scfgr; + uint32_t spi_i2spr; + + qemu_irq irq; + SSIBus *ssi; +}; + +#endif /* HW_STM32F2XX_SPI_H */ diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h new file mode 100644 index 000000000..b96de21b3 --- /dev/null +++ b/include/hw/ssi/xilinx_spips.h @@ -0,0 +1,143 @@ +/* + * Header file for the Xilinx Zynq SPI controller + * + * Copyright (C) 2015 Xilinx Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XILINX_SPIPS_H +#define XILINX_SPIPS_H + +#include "hw/ssi/ssi.h" +#include "qemu/fifo32.h" +#include "hw/stream.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +typedef struct XilinxSPIPS XilinxSPIPS; + +#define XLNX_SPIPS_R_MAX (0x100 / 4) +#define XLNX_ZYNQMP_SPIPS_R_MAX (0x830 / 4) + +/* Bite off 4k chunks at a time */ +#define LQSPI_CACHE_SIZE 1024 + +#define QSPI_DMA_MAX_BURST_SIZE 2048 + +typedef enum { + READ = 0x3, READ_4 = 0x13, + FAST_READ = 0xb, FAST_READ_4 = 0x0c, + DOR = 0x3b, DOR_4 = 0x3c, + QOR = 0x6b, QOR_4 = 0x6c, + DIOR = 0xbb, DIOR_4 = 0xbc, + QIOR = 0xeb, QIOR_4 = 0xec, + + PP = 0x2, PP_4 = 0x12, + DPP = 0xa2, + QPP = 0x32, QPP_4 = 0x34, +} FlashCMD; + +struct XilinxSPIPS { + SysBusDevice parent_obj; + + MemoryRegion iomem; + MemoryRegion mmlqspi; + + qemu_irq irq; + int irqline; + + uint8_t num_cs; + uint8_t num_busses; + + uint8_t snoop_state; + int cmd_dummies; + uint8_t link_state; + uint8_t link_state_next; + uint8_t link_state_next_when; + qemu_irq *cs_lines; + bool *cs_lines_state; + SSIBus **spi; + + Fifo8 rx_fifo; + Fifo8 tx_fifo; + + uint8_t num_txrx_bytes; + uint32_t rx_discard; + + uint32_t regs[XLNX_SPIPS_R_MAX]; + + bool man_start_com; +}; + +struct XilinxQSPIPS { + XilinxSPIPS parent_obj; + + uint8_t lqspi_buf[LQSPI_CACHE_SIZE]; + hwaddr lqspi_cached_addr; + Error *migration_blocker; + bool mmio_execution_enabled; +}; +typedef struct XilinxQSPIPS XilinxQSPIPS; + +struct XlnxZynqMPQSPIPS { + XilinxQSPIPS parent_obj; + + StreamSlave *dma; + int gqspi_irqline; + + uint32_t regs[XLNX_ZYNQMP_SPIPS_R_MAX]; + + /* GQSPI has seperate tx/rx fifos */ + Fifo8 rx_fifo_g; + Fifo8 tx_fifo_g; + Fifo32 fifo_g; + /* + * At the end of each generic command, misaligned extra bytes are discard + * or padded to tx and rx respectively to round it out (and avoid need for + * individual byte access. Since we use byte fifos, keep track of the + * alignment WRT to word access. + */ + uint8_t rx_fifo_g_align; + uint8_t tx_fifo_g_align; + bool man_start_com_g; + uint32_t dma_burst_size; + uint8_t dma_buf[QSPI_DMA_MAX_BURST_SIZE]; +}; + +struct XilinxSPIPSClass { + SysBusDeviceClass parent_class; + + const MemoryRegionOps *reg_ops; + + uint32_t rx_fifo_size; + uint32_t tx_fifo_size; +}; + +#define TYPE_XILINX_SPIPS "xlnx.ps7-spi" +#define TYPE_XILINX_QSPIPS "xlnx.ps7-qspi" +#define TYPE_XLNX_ZYNQMP_QSPIPS "xlnx.usmp-gqspi" + +OBJECT_DECLARE_TYPE(XilinxSPIPS, XilinxSPIPSClass, XILINX_SPIPS) + +OBJECT_DECLARE_SIMPLE_TYPE(XilinxQSPIPS, XILINX_QSPIPS) + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPQSPIPS, XLNX_ZYNQMP_QSPIPS) + +#endif /* XILINX_SPIPS_H */ diff --git a/include/hw/stream.h b/include/hw/stream.h new file mode 100644 index 000000000..e39d5a5b5 --- /dev/null +++ b/include/hw/stream.h @@ -0,0 +1,54 @@ +#ifndef STREAM_H +#define STREAM_H + +#include "qom/object.h" + +/* stream slave. Used until qdev provides a generic way. */ +#define TYPE_STREAM_SLAVE "stream-slave" + +typedef struct StreamSlaveClass StreamSlaveClass; +DECLARE_CLASS_CHECKERS(StreamSlaveClass, STREAM_SLAVE, + TYPE_STREAM_SLAVE) +#define STREAM_SLAVE(obj) \ + INTERFACE_CHECK(StreamSlave, (obj), TYPE_STREAM_SLAVE) + +typedef struct StreamSlave StreamSlave; + +typedef void (*StreamCanPushNotifyFn)(void *opaque); + +struct StreamSlaveClass { + InterfaceClass parent; + /** + * can push - determine if a stream slave is capable of accepting at least + * one byte of data. Returns false if cannot accept. If not implemented, the + * slave is assumed to always be capable of receiving. + * @notify: Optional callback that the slave will call when the slave is + * capable of receiving again. Only called if false is returned. + * @notify_opaque: opaque data to pass to notify call. + */ + bool (*can_push)(StreamSlave *obj, StreamCanPushNotifyFn notify, + void *notify_opaque); + /** + * push - push data to a Stream slave. The number of bytes pushed is + * returned. If the slave short returns, the master must wait before trying + * again, the slave may continue to just return 0 waiting for the vm time to + * advance. The can_push() function can be used to trap the point in time + * where the slave is ready to receive again, otherwise polling on a QEMU + * timer will work. + * @obj: Stream slave to push to + * @buf: Data to write + * @len: Maximum number of bytes to write + * @eop: End of packet flag + */ + size_t (*push)(StreamSlave *obj, unsigned char *buf, size_t len, bool eop); +}; + +size_t +stream_push(StreamSlave *sink, uint8_t *buf, size_t len, bool eop); + +bool +stream_can_push(StreamSlave *sink, StreamCanPushNotifyFn notify, + void *notify_opaque); + + +#endif /* STREAM_H */ diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h new file mode 100644 index 000000000..3564b7b6a --- /dev/null +++ b/include/hw/sysbus.h @@ -0,0 +1,107 @@ +#ifndef HW_SYSBUS_H +#define HW_SYSBUS_H + +/* Devices attached directly to the main system bus. */ + +#include "hw/qdev-core.h" +#include "exec/memory.h" +#include "qom/object.h" + +#define QDEV_MAX_MMIO 32 +#define QDEV_MAX_PIO 32 + +#define TYPE_SYSTEM_BUS "System" +DECLARE_INSTANCE_CHECKER(BusState, SYSTEM_BUS, + TYPE_SYSTEM_BUS) + + +#define TYPE_SYS_BUS_DEVICE "sys-bus-device" +OBJECT_DECLARE_TYPE(SysBusDevice, SysBusDeviceClass, + SYS_BUS_DEVICE) + +/** + * SysBusDeviceClass: + * + * SysBusDeviceClass is not overriding #DeviceClass.realize, so derived + * classes overriding it are not required to invoke its implementation. + */ + +#define SYSBUS_DEVICE_GPIO_IRQ "sysbus-irq" + +struct SysBusDeviceClass { + /*< private >*/ + DeviceClass parent_class; + + /* + * Let the sysbus device format its own non-PIO, non-MMIO unit address. + * + * Sometimes a class of SysBusDevices has neither MMIO nor PIO resources, + * yet instances of it would like to distinguish themselves, in + * OpenFirmware device paths, from other instances of the same class on the + * sysbus. For that end we expose this callback. + * + * The implementation is not supposed to change *@dev, or incur other + * observable change. + * + * The function returns a dynamically allocated string. On error, NULL + * should be returned; the unit address portion of the OFW node will be + * omitted then. (This is not considered a fatal error.) + */ + char *(*explicit_ofw_unit_address)(const SysBusDevice *dev); + void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq); +}; + +struct SysBusDevice { + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ + + int num_mmio; + struct { + hwaddr addr; + MemoryRegion *memory; + } mmio[QDEV_MAX_MMIO]; + int num_pio; + uint32_t pio[QDEV_MAX_PIO]; +}; + +typedef void FindSysbusDeviceFunc(SysBusDevice *sbdev, void *opaque); + +void sysbus_init_mmio(SysBusDevice *dev, MemoryRegion *memory); +MemoryRegion *sysbus_mmio_get_region(SysBusDevice *dev, int n); +void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p); +void sysbus_pass_irq(SysBusDevice *dev, SysBusDevice *target); +void sysbus_init_ioports(SysBusDevice *dev, uint32_t ioport, uint32_t size); + + +bool sysbus_has_irq(SysBusDevice *dev, int n); +bool sysbus_has_mmio(SysBusDevice *dev, unsigned int n); +void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq); +bool sysbus_is_irq_connected(SysBusDevice *dev, int n); +qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n); +void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr); +void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr, + int priority); +void sysbus_mmio_unmap(SysBusDevice *dev, int n); +void sysbus_add_io(SysBusDevice *dev, hwaddr addr, + MemoryRegion *mem); +MemoryRegion *sysbus_address_space(SysBusDevice *dev); + +bool sysbus_realize(SysBusDevice *dev, Error **errp); +bool sysbus_realize_and_unref(SysBusDevice *dev, Error **errp); + +/* Call func for every dynamically created sysbus device in the system */ +void foreach_dynamic_sysbus_device(FindSysbusDeviceFunc *func, void *opaque); + +/* Legacy helper function for creating devices. */ +DeviceState *sysbus_create_varargs(const char *name, + hwaddr addr, ...); + +static inline DeviceState *sysbus_create_simple(const char *name, + hwaddr addr, + qemu_irq irq) +{ + return sysbus_create_varargs(name, addr, irq, NULL); +} + +#endif /* HW_SYSBUS_H */ diff --git a/include/hw/timer/a9gtimer.h b/include/hw/timer/a9gtimer.h new file mode 100644 index 000000000..6ae9122e4 --- /dev/null +++ b/include/hw/timer/a9gtimer.h @@ -0,0 +1,97 @@ +/* + * Global peripheral timer block for ARM A9MP + * + * (C) 2013 Xilinx Inc. + * + * Written by François LEGAL + * Written by Peter Crosthwaite + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef A9GTIMER_H +#define A9GTIMER_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define A9_GTIMER_MAX_CPUS 4 + +#define TYPE_A9_GTIMER "arm.cortex-a9-global-timer" +OBJECT_DECLARE_SIMPLE_TYPE(A9GTimerState, A9_GTIMER) + +#define R_COUNTER_LO 0x00 +#define R_COUNTER_HI 0x04 + +#define R_CONTROL 0x08 +#define R_CONTROL_TIMER_ENABLE (1 << 0) +#define R_CONTROL_COMP_ENABLE (1 << 1) +#define R_CONTROL_IRQ_ENABLE (1 << 2) +#define R_CONTROL_AUTO_INCREMENT (1 << 3) +#define R_CONTROL_PRESCALER_SHIFT 8 +#define R_CONTROL_PRESCALER_LEN 8 +#define R_CONTROL_PRESCALER_MASK (((1 << R_CONTROL_PRESCALER_LEN) - 1) << \ + R_CONTROL_PRESCALER_SHIFT) + +#define R_CONTROL_BANKED (R_CONTROL_COMP_ENABLE | \ + R_CONTROL_IRQ_ENABLE | \ + R_CONTROL_AUTO_INCREMENT) +#define R_CONTROL_NEEDS_SYNC (R_CONTROL_TIMER_ENABLE | \ + R_CONTROL_PRESCALER_MASK) + +#define R_INTERRUPT_STATUS 0x0C +#define R_COMPARATOR_LO 0x10 +#define R_COMPARATOR_HI 0x14 +#define R_AUTO_INCREMENT 0x18 + +typedef struct A9GTimerPerCPU A9GTimerPerCPU; + +struct A9GTimerPerCPU { + A9GTimerState *parent; + + uint32_t control; /* only per cpu banked bits valid */ + uint64_t compare; + uint32_t status; + uint32_t inc; + + MemoryRegion iomem; + qemu_irq irq; /* PPI interrupts */ +}; + +struct A9GTimerState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + /* static props */ + uint32_t num_cpu; + + QEMUTimer *timer; + + uint64_t counter; /* current timer value */ + + uint64_t ref_counter; + uint64_t cpu_ref_time; /* the cpu time as of last update of ref_counter */ + uint32_t control; /* only non per cpu banked bits valid */ + + A9GTimerPerCPU per_cpu[A9_GTIMER_MAX_CPUS]; +}; + +typedef struct A9GTimerUpdate { + uint64_t now; + uint64_t new; +} A9GTimerUpdate; + +#endif /* A9GTIMER_H */ diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h new file mode 100644 index 000000000..8435758ad --- /dev/null +++ b/include/hw/timer/allwinner-a10-pit.h @@ -0,0 +1,68 @@ +#ifndef ALLWINNER_A10_PIT_H +#define ALLWINNER_A10_PIT_H + +#include "hw/ptimer.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_AW_A10_PIT "allwinner-A10-timer" +OBJECT_DECLARE_SIMPLE_TYPE(AwA10PITState, AW_A10_PIT) + +#define AW_A10_PIT_TIMER_NR 6 +#define AW_A10_PIT_TIMER_IRQ 0x1 +#define AW_A10_PIT_WDOG_IRQ 0x100 + +#define AW_A10_PIT_TIMER_IRQ_EN 0 +#define AW_A10_PIT_TIMER_IRQ_ST 0x4 + +#define AW_A10_PIT_TIMER_CONTROL 0x0 +#define AW_A10_PIT_TIMER_EN 0x1 +#define AW_A10_PIT_TIMER_RELOAD 0x2 +#define AW_A10_PIT_TIMER_MODE 0x80 + +#define AW_A10_PIT_TIMER_INTERVAL 0x4 +#define AW_A10_PIT_TIMER_COUNT 0x8 +#define AW_A10_PIT_WDOG_CONTROL 0x90 +#define AW_A10_PIT_WDOG_MODE 0x94 + +#define AW_A10_PIT_COUNT_CTL 0xa0 +#define AW_A10_PIT_COUNT_RL_EN 0x2 +#define AW_A10_PIT_COUNT_CLR_EN 0x1 +#define AW_A10_PIT_COUNT_LO 0xa4 +#define AW_A10_PIT_COUNT_HI 0xa8 + +#define AW_A10_PIT_TIMER_BASE 0x10 +#define AW_A10_PIT_TIMER_BASE_END \ + (AW_A10_PIT_TIMER_BASE * 6 + AW_A10_PIT_TIMER_COUNT) + +#define AW_A10_PIT_DEFAULT_CLOCK 0x4 + + +typedef struct AwA10TimerContext { + AwA10PITState *container; + int index; +} AwA10TimerContext; + +struct AwA10PITState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + qemu_irq irq[AW_A10_PIT_TIMER_NR]; + ptimer_state * timer[AW_A10_PIT_TIMER_NR]; + AwA10TimerContext timer_context[AW_A10_PIT_TIMER_NR]; + MemoryRegion iomem; + uint32_t clk_freq[4]; + + uint32_t irq_enable; + uint32_t irq_status; + uint32_t control[AW_A10_PIT_TIMER_NR]; + uint32_t interval[AW_A10_PIT_TIMER_NR]; + uint32_t count[AW_A10_PIT_TIMER_NR]; + uint32_t watch_dog_mode; + uint32_t watch_dog_control; + uint32_t count_lo; + uint32_t count_hi; + uint32_t count_ctl; +}; + +#endif diff --git a/include/hw/timer/arm_mptimer.h b/include/hw/timer/arm_mptimer.h new file mode 100644 index 000000000..65a96e2a0 --- /dev/null +++ b/include/hw/timer/arm_mptimer.h @@ -0,0 +1,51 @@ +/* + * Private peripheral timer/watchdog blocks for ARM 11MPCore and A9MP + * + * Copyright (c) 2006-2007 CodeSourcery. + * Copyright (c) 2011 Linaro Limited + * Written by Paul Brook, Peter Maydell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ +#ifndef HW_TIMER_ARM_MPTIMER_H +#define HW_TIMER_ARM_MPTIMER_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define ARM_MPTIMER_MAX_CPUS 4 + +/* State of a single timer or watchdog block */ +typedef struct { + uint32_t control; + uint32_t status; + struct ptimer_state *timer; + qemu_irq irq; + MemoryRegion iomem; +} TimerBlock; + +#define TYPE_ARM_MPTIMER "arm_mptimer" +OBJECT_DECLARE_SIMPLE_TYPE(ARMMPTimerState, ARM_MPTIMER) + +struct ARMMPTimerState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t num_cpu; + TimerBlock timerblock[ARM_MPTIMER_MAX_CPUS]; + MemoryRegion iomem; +}; + +#endif diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h new file mode 100644 index 000000000..84496faaf --- /dev/null +++ b/include/hw/timer/armv7m_systick.h @@ -0,0 +1,58 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#ifndef HW_TIMER_ARMV7M_SYSTICK_H +#define HW_TIMER_ARMV7M_SYSTICK_H + +#include "hw/sysbus.h" +#include "qom/object.h" +#include "hw/ptimer.h" + +#define TYPE_SYSTICK "armv7m_systick" + +OBJECT_DECLARE_SIMPLE_TYPE(SysTickState, SYSTICK) + +struct SysTickState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t control; + uint32_t reload; + int64_t tick; + ptimer_state *ptimer; + MemoryRegion iomem; + qemu_irq irq; +}; + +/* + * Multiplication factor to convert from system clock ticks to qemu timer + * ticks. This should be set (by board code, usually) to a value + * equal to NANOSECONDS_PER_SECOND / frq, where frq is the clock frequency + * in Hz of the CPU. + * + * This value is used by the systick device when it is running in + * its "use the CPU clock" mode (ie when SYST_CSR.CLKSOURCE == 1) to + * set how fast the timer should tick. + * + * TODO: we should refactor this so that rather than using a global + * we use a device property or something similar. This is complicated + * because (a) the property would need to be plumbed through from the + * board code down through various layers to the systick device + * and (b) the property needs to be modifiable after realize, because + * the stellaris board uses this to implement the behaviour where the + * guest can reprogram the PLL registers to downclock the CPU, and the + * systick device needs to react accordingly. Possibly this should + * be deferred until we have a good API for modelling clock trees. + */ +extern int system_clock_scale; + +#endif diff --git a/include/hw/timer/aspeed_timer.h b/include/hw/timer/aspeed_timer.h new file mode 100644 index 000000000..d36034a10 --- /dev/null +++ b/include/hw/timer/aspeed_timer.h @@ -0,0 +1,77 @@ +/* + * ASPEED AST2400 Timer + * + * Andrew Jeffery + * + * Copyright (C) 2016 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef ASPEED_TIMER_H +#define ASPEED_TIMER_H + +#include "qemu/timer.h" +#include "hw/misc/aspeed_scu.h" +#include "qom/object.h" + +#define TYPE_ASPEED_TIMER "aspeed.timer" +OBJECT_DECLARE_TYPE(AspeedTimerCtrlState, AspeedTimerClass, ASPEED_TIMER) +#define TYPE_ASPEED_2400_TIMER TYPE_ASPEED_TIMER "-ast2400" +#define TYPE_ASPEED_2500_TIMER TYPE_ASPEED_TIMER "-ast2500" +#define TYPE_ASPEED_2600_TIMER TYPE_ASPEED_TIMER "-ast2600" + +#define ASPEED_TIMER_NR_TIMERS 8 + +typedef struct AspeedTimer { + qemu_irq irq; + + uint8_t id; + QEMUTimer timer; + + /** + * Track the line level as the ASPEED timers implement edge triggered + * interrupts, signalling with both the rising and falling edge. + */ + int32_t level; + uint32_t reload; + uint32_t match[2]; + uint64_t start; +} AspeedTimer; + +struct AspeedTimerCtrlState { + /*< private >*/ + SysBusDevice parent; + + /*< public >*/ + MemoryRegion iomem; + + uint32_t ctrl; + uint32_t ctrl2; + uint32_t ctrl3; + uint32_t irq_sts; + AspeedTimer timers[ASPEED_TIMER_NR_TIMERS]; + + AspeedSCUState *scu; +}; + + +struct AspeedTimerClass { + SysBusDeviceClass parent_class; + + uint64_t (*read)(AspeedTimerCtrlState *s, hwaddr offset); + void (*write)(AspeedTimerCtrlState *s, hwaddr offset, uint64_t value); +}; + +#endif /* ASPEED_TIMER_H */ diff --git a/include/hw/timer/avr_timer16.h b/include/hw/timer/avr_timer16.h new file mode 100644 index 000000000..053625433 --- /dev/null +++ b/include/hw/timer/avr_timer16.h @@ -0,0 +1,94 @@ +/* + * AVR 16-bit timer + * + * Copyright (c) 2018 University of Kent + * Author: Ed Robbins + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +/* + * Driver for 16 bit timers on 8 bit AVR devices. + * Note: + * On ATmega640/V-1280/V-1281/V-2560/V-2561/V timers 1, 3, 4 and 5 are 16 bit + */ + +#ifndef HW_TIMER_AVR_TIMER16_H +#define HW_TIMER_AVR_TIMER16_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "hw/hw.h" +#include "qom/object.h" + +enum NextInterrupt { + OVERFLOW, + COMPA, + COMPB, + COMPC, + CAPT +}; + +#define TYPE_AVR_TIMER16 "avr-timer16" +OBJECT_DECLARE_SIMPLE_TYPE(AVRTimer16State, AVR_TIMER16) + +struct AVRTimer16State { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + MemoryRegion imsk_iomem; + MemoryRegion ifr_iomem; + QEMUTimer *timer; + qemu_irq capt_irq; + qemu_irq compa_irq; + qemu_irq compb_irq; + qemu_irq compc_irq; + qemu_irq ovf_irq; + + bool enabled; + + /* registers */ + uint8_t cra; + uint8_t crb; + uint8_t crc; + uint8_t cntl; + uint8_t cnth; + uint8_t icrl; + uint8_t icrh; + uint8_t ocral; + uint8_t ocrah; + uint8_t ocrbl; + uint8_t ocrbh; + uint8_t ocrcl; + uint8_t ocrch; + /* + * Reads and writes to CNT and ICR utilise a bizarre temporary + * register, which we emulate + */ + uint8_t rtmp; + uint8_t imsk; + uint8_t ifr; + + uint8_t id; + uint64_t cpu_freq_hz; + uint64_t freq_hz; + uint64_t period_ns; + uint64_t reset_time_ns; + enum NextInterrupt next_interrupt; +}; + +#endif /* HW_TIMER_AVR_TIMER16_H */ diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h new file mode 100644 index 000000000..bd3097d74 --- /dev/null +++ b/include/hw/timer/bcm2835_systmr.h @@ -0,0 +1,42 @@ +/* + * BCM2835 SYS timer emulation + * + * Copyright (c) 2019 Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef BCM2835_SYSTIMER_H +#define BCM2835_SYSTIMER_H + +#include "hw/sysbus.h" +#include "hw/irq.h" +#include "qemu/timer.h" +#include "qom/object.h" + +#define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SystemTimerState, BCM2835_SYSTIMER) + +#define BCM2835_SYSTIMER_COUNT 4 + +typedef struct { + unsigned id; + QEMUTimer timer; + qemu_irq irq; + BCM2835SystemTimerState *state; +} BCM2835SystemTimerCompare; + +struct BCM2835SystemTimerState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + struct { + uint32_t ctrl_status; + uint32_t compare[BCM2835_SYSTIMER_COUNT]; + } reg; + BCM2835SystemTimerCompare tmr[BCM2835_SYSTIMER_COUNT]; +}; + +#endif diff --git a/include/hw/timer/cmsdk-apb-dualtimer.h b/include/hw/timer/cmsdk-apb-dualtimer.h new file mode 100644 index 000000000..08d9e6fa3 --- /dev/null +++ b/include/hw/timer/cmsdk-apb-dualtimer.h @@ -0,0 +1,71 @@ +/* + * ARM CMSDK APB dual-timer emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the "APB dual-input timer" which is part of the Cortex-M + * System Design Kit (CMSDK) and documented in the Cortex-M System + * Design Kit Technical Reference Manual (ARM DDI0479C): + * https://developer.arm.com/products/system-design/system-design-kits/cortex-m-system-design-kit + * + * QEMU interface: + * + QOM property "pclk-frq": frequency at which the timer is clocked + * + sysbus MMIO region 0: the register bank + * + sysbus IRQ 0: combined timer interrupt TIMINTC + * + sysbus IRO 1: timer block 1 interrupt TIMINT1 + * + sysbus IRQ 2: timer block 2 interrupt TIMINT2 + */ + +#ifndef CMSDK_APB_DUALTIMER_H +#define CMSDK_APB_DUALTIMER_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_CMSDK_APB_DUALTIMER "cmsdk-apb-dualtimer" +OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBDualTimer, CMSDK_APB_DUALTIMER) + + +/* One of the two identical timer modules in the dual-timer module */ +typedef struct CMSDKAPBDualTimerModule { + CMSDKAPBDualTimer *parent; + struct ptimer_state *timer; + qemu_irq timerint; + /* + * We must track the guest LOAD and VALUE register state by hand + * rather than leaving this state only in the ptimer limit/count, + * because if CONTROL.SIZE is 0 then only the low 16 bits of the + * counter actually counts, but the high half is still guest + * accessible. + */ + uint32_t load; + uint32_t value; + uint32_t control; + uint32_t intstatus; +} CMSDKAPBDualTimerModule; + +#define CMSDK_APB_DUALTIMER_NUM_MODULES 2 + +struct CMSDKAPBDualTimer { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq timerintc; + uint32_t pclk_frq; + + CMSDKAPBDualTimerModule timermod[CMSDK_APB_DUALTIMER_NUM_MODULES]; + uint32_t timeritcr; + uint32_t timeritop; +}; + +#endif diff --git a/include/hw/timer/cmsdk-apb-timer.h b/include/hw/timer/cmsdk-apb-timer.h new file mode 100644 index 000000000..0d80b2a48 --- /dev/null +++ b/include/hw/timer/cmsdk-apb-timer.h @@ -0,0 +1,60 @@ +/* + * ARM CMSDK APB timer emulation + * + * Copyright (c) 2017 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#ifndef CMSDK_APB_TIMER_H +#define CMSDK_APB_TIMER_H + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_CMSDK_APB_TIMER "cmsdk-apb-timer" +OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBTIMER, CMSDK_APB_TIMER) + +struct CMSDKAPBTIMER { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq timerint; + uint32_t pclk_frq; + struct ptimer_state *timer; + + uint32_t ctrl; + uint32_t value; + uint32_t reload; + uint32_t intstatus; +}; + +/** + * cmsdk_apb_timer_create - convenience function to create TYPE_CMSDK_APB_TIMER + * @addr: location in system memory to map registers + * @pclk_frq: frequency in Hz of the PCLK clock (used for calculating baud rate) + */ +static inline DeviceState *cmsdk_apb_timer_create(hwaddr addr, + qemu_irq timerint, + uint32_t pclk_frq) +{ + DeviceState *dev; + SysBusDevice *s; + + dev = qdev_new(TYPE_CMSDK_APB_TIMER); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_uint32(dev, "pclk-frq", pclk_frq); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, addr); + sysbus_connect_irq(s, 0, timerint); + return dev; +} + +#endif diff --git a/include/hw/timer/digic-timer.h b/include/hw/timer/digic-timer.h new file mode 100644 index 000000000..da82fb466 --- /dev/null +++ b/include/hw/timer/digic-timer.h @@ -0,0 +1,46 @@ +/* + * Canon DIGIC timer block declarations. + * + * Copyright (C) 2013 Antony Pavlov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef HW_TIMER_DIGIC_TIMER_H +#define HW_TIMER_DIGIC_TIMER_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_DIGIC_TIMER "digic-timer" +OBJECT_DECLARE_SIMPLE_TYPE(DigicTimerState, DIGIC_TIMER) + +#define DIGIC_TIMER_CONTROL 0x00 +#define DIGIC_TIMER_CONTROL_RST 0x80000000 +#define DIGIC_TIMER_CONTROL_EN 0x00000001 +#define DIGIC_TIMER_RELVALUE 0x08 +#define DIGIC_TIMER_VALUE 0x0c + +struct DigicTimerState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; + ptimer_state *ptimer; + + uint32_t control; + uint32_t relvalue; +}; + +#endif /* HW_TIMER_DIGIC_TIMER_H */ diff --git a/include/hw/timer/hpet.h b/include/hw/timer/hpet.h new file mode 100644 index 000000000..f04c4d323 --- /dev/null +++ b/include/hw/timer/hpet.h @@ -0,0 +1,86 @@ +/* + * QEMU Emulated HPET support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Beth Kon + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef HW_HPET_H +#define HW_HPET_H + +#include "qom/object.h" + +#define HPET_BASE 0xfed00000 +#define HPET_LEN 0x400 +#define HPET_CLK_PERIOD 10 /* 10 ns*/ + +#define FS_PER_NS 1000000 /* 1000000 femtoseconds == 1 ns */ +#define HPET_MIN_TIMERS 3 +#define HPET_MAX_TIMERS 32 + +#define HPET_NUM_IRQ_ROUTES 32 + +#define HPET_LEGACY_PIT_INT 0 +#define HPET_LEGACY_RTC_INT 1 + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 +#define HPET_TN_CFG 0x000 +#define HPET_TN_CMP 0x008 +#define HPET_TN_ROUTE 0x010 +#define HPET_CFG_WRITE_MASK 0x3 + +#define HPET_ID_NUM_TIM_SHIFT 8 +#define HPET_ID_NUM_TIM_MASK 0x1f00 + +#define HPET_TN_TYPE_LEVEL 0x002 +#define HPET_TN_ENABLE 0x004 +#define HPET_TN_PERIODIC 0x008 +#define HPET_TN_PERIODIC_CAP 0x010 +#define HPET_TN_SIZE_CAP 0x020 +#define HPET_TN_SETVAL 0x040 +#define HPET_TN_32BIT 0x100 +#define HPET_TN_INT_ROUTE_MASK 0x3e00 +#define HPET_TN_FSB_ENABLE 0x4000 +#define HPET_TN_FSB_CAP 0x8000 +#define HPET_TN_CFG_WRITE_MASK 0x7f4e +#define HPET_TN_INT_ROUTE_SHIFT 9 +#define HPET_TN_INT_ROUTE_CAP_SHIFT 32 +#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0xffff80b1U + +struct hpet_fw_entry +{ + uint32_t event_timer_block_id; + uint64_t address; + uint16_t min_tick; + uint8_t page_prot; +} QEMU_PACKED; + +struct hpet_fw_config +{ + uint8_t count; + struct hpet_fw_entry hpet[8]; +} QEMU_PACKED; + +extern struct hpet_fw_config hpet_cfg; + +#define TYPE_HPET "hpet" + +static inline bool hpet_find(void) +{ + return object_resolve_path_type("", TYPE_HPET, NULL); +} + +#endif diff --git a/include/hw/timer/i8254.h b/include/hw/timer/i8254.h new file mode 100644 index 000000000..3e569f42b --- /dev/null +++ b/include/hw/timer/i8254.h @@ -0,0 +1,80 @@ +/* + * QEMU 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_I8254_H +#define HW_I8254_H + +#include "hw/qdev-properties.h" +#include "hw/isa/isa.h" +#include "qapi/error.h" +#include "qom/object.h" + +#define PIT_FREQ 1193182 + +typedef struct PITChannelInfo { + int gate; + int mode; + int initial_count; + int out; +} PITChannelInfo; + +#define TYPE_PIT_COMMON "pit-common" +OBJECT_DECLARE_TYPE(PITCommonState, PITCommonClass, PIT_COMMON) + +#define TYPE_I8254 "isa-pit" +#define TYPE_KVM_I8254 "kvm-pit" + +static inline ISADevice *i8254_pit_init(ISABus *bus, int base, int isa_irq, + qemu_irq alt_irq) +{ + DeviceState *dev; + ISADevice *d; + + d = isa_new(TYPE_I8254); + dev = DEVICE(d); + qdev_prop_set_uint32(dev, "iobase", base); + isa_realize_and_unref(d, bus, &error_fatal); + qdev_connect_gpio_out(dev, 0, + isa_irq >= 0 ? isa_get_irq(d, isa_irq) : alt_irq); + + return d; +} + +static inline ISADevice *kvm_pit_init(ISABus *bus, int base) +{ + DeviceState *dev; + ISADevice *d; + + d = isa_new(TYPE_KVM_I8254); + dev = DEVICE(d); + qdev_prop_set_uint32(dev, "iobase", base); + isa_realize_and_unref(d, bus, &error_fatal); + + return d; +} + +void pit_set_gate(ISADevice *dev, int channel, int val); +void pit_get_channel_info(ISADevice *dev, int channel, PITChannelInfo *info); + +#endif /* HW_I8254_H */ diff --git a/include/hw/timer/i8254_internal.h b/include/hw/timer/i8254_internal.h new file mode 100644 index 000000000..a9a600d94 --- /dev/null +++ b/include/hw/timer/i8254_internal.h @@ -0,0 +1,76 @@ +/* + * QEMU 8253/8254 - internal interfaces + * + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_I8254_INTERNAL_H +#define QEMU_I8254_INTERNAL_H + +#include "hw/isa/isa.h" +#include "hw/timer/i8254.h" +#include "qemu/timer.h" + +typedef struct PITChannelState { + int count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + int64_t count_load_time; + /* irq handling */ + int64_t next_transition_time; + QEMUTimer *irq_timer; + qemu_irq irq; + uint32_t irq_disabled; +} PITChannelState; + +struct PITCommonState { + ISADevice dev; + MemoryRegion ioports; + uint32_t iobase; + PITChannelState channels[3]; +}; + +struct PITCommonClass { + ISADeviceClass parent_class; + + void (*set_channel_gate)(PITCommonState *s, PITChannelState *sc, int val); + void (*get_channel_info)(PITCommonState *s, PITChannelState *sc, + PITChannelInfo *info); + void (*pre_save)(PITCommonState *s); + void (*post_load)(PITCommonState *s); +}; + +int pit_get_out(PITChannelState *s, int64_t current_time); +int64_t pit_get_next_transition_time(PITChannelState *s, int64_t current_time); +void pit_get_channel_info_common(PITCommonState *s, PITChannelState *sc, + PITChannelInfo *info); +void pit_reset_common(PITCommonState *s); + +#endif /* QEMU_I8254_INTERNAL_H */ diff --git a/include/hw/timer/imx_epit.h b/include/hw/timer/imx_epit.h new file mode 100644 index 000000000..2acc41e98 --- /dev/null +++ b/include/hw/timer/imx_epit.h @@ -0,0 +1,81 @@ +/* + * i.MX EPIT Timer + * + * Copyright (c) 2008 OK Labs + * Copyright (c) 2011 NICTA Pty Ltd + * Originally written by Hans Jiang + * Updated by Peter Chubb + * Updated by Jean-Christophe Dubois + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef IMX_EPIT_H +#define IMX_EPIT_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "hw/misc/imx_ccm.h" +#include "qom/object.h" + +/* + * EPIT: Enhanced periodic interrupt timer + */ + +#define CR_EN (1 << 0) +#define CR_ENMOD (1 << 1) +#define CR_OCIEN (1 << 2) +#define CR_RLD (1 << 3) +#define CR_PRESCALE_SHIFT (4) +#define CR_PRESCALE_MASK (0xfff) +#define CR_SWR (1 << 16) +#define CR_IOVW (1 << 17) +#define CR_DBGEN (1 << 18) +#define CR_WAITEN (1 << 19) +#define CR_DOZEN (1 << 20) +#define CR_STOPEN (1 << 21) +#define CR_CLKSRC_SHIFT (24) +#define CR_CLKSRC_MASK (0x3 << CR_CLKSRC_SHIFT) + +#define EPIT_TIMER_MAX 0XFFFFFFFFUL + +#define TYPE_IMX_EPIT "imx.epit" +OBJECT_DECLARE_SIMPLE_TYPE(IMXEPITState, IMX_EPIT) + +struct IMXEPITState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + ptimer_state *timer_reload; + ptimer_state *timer_cmp; + MemoryRegion iomem; + IMXCCMState *ccm; + + uint32_t cr; + uint32_t sr; + uint32_t lr; + uint32_t cmp; + uint32_t cnt; + + uint32_t freq; + qemu_irq irq; +}; + +#endif /* IMX_EPIT_H */ diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h new file mode 100644 index 000000000..ff5c8a351 --- /dev/null +++ b/include/hw/timer/imx_gpt.h @@ -0,0 +1,119 @@ +/* + * i.MX GPT Timer + * + * Copyright (c) 2008 OK Labs + * Copyright (c) 2011 NICTA Pty Ltd + * Originally written by Hans Jiang + * Updated by Peter Chubb + * Updated by Jean-Christophe Dubois + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef IMX_GPT_H +#define IMX_GPT_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "hw/misc/imx_ccm.h" +#include "qom/object.h" + +/* + * GPT : General purpose timer + * + * This timer counts up continuously while it is enabled, resetting itself + * to 0 when it reaches GPT_TIMER_MAX (in freerun mode) or when it + * reaches the value of one of the ocrX (in periodic mode). + */ + +#define GPT_TIMER_MAX 0XFFFFFFFFUL + +/* Control register. Not all of these bits have any effect (yet) */ +#define GPT_CR_EN (1 << 0) /* GPT Enable */ +#define GPT_CR_ENMOD (1 << 1) /* GPT Enable Mode */ +#define GPT_CR_DBGEN (1 << 2) /* GPT Debug mode enable */ +#define GPT_CR_WAITEN (1 << 3) /* GPT Wait Mode Enable */ +#define GPT_CR_DOZEN (1 << 4) /* GPT Doze mode enable */ +#define GPT_CR_STOPEN (1 << 5) /* GPT Stop Mode Enable */ +#define GPT_CR_CLKSRC_SHIFT (6) +#define GPT_CR_CLKSRC_MASK (0x7) + +#define GPT_CR_FRR (1 << 9) /* Freerun or Restart */ +#define GPT_CR_SWR (1 << 15) /* Software Reset */ +#define GPT_CR_IM1 (3 << 16) /* Input capture channel 1 mode (2 bits) */ +#define GPT_CR_IM2 (3 << 18) /* Input capture channel 2 mode (2 bits) */ +#define GPT_CR_OM1 (7 << 20) /* Output Compare Channel 1 Mode (3 bits) */ +#define GPT_CR_OM2 (7 << 23) /* Output Compare Channel 2 Mode (3 bits) */ +#define GPT_CR_OM3 (7 << 26) /* Output Compare Channel 3 Mode (3 bits) */ +#define GPT_CR_FO1 (1 << 29) /* Force Output Compare Channel 1 */ +#define GPT_CR_FO2 (1 << 30) /* Force Output Compare Channel 2 */ +#define GPT_CR_FO3 (1 << 31) /* Force Output Compare Channel 3 */ + +#define GPT_SR_OF1 (1 << 0) +#define GPT_SR_OF2 (1 << 1) +#define GPT_SR_OF3 (1 << 2) +#define GPT_SR_ROV (1 << 5) + +#define GPT_IR_OF1IE (1 << 0) +#define GPT_IR_OF2IE (1 << 1) +#define GPT_IR_OF3IE (1 << 2) +#define GPT_IR_ROVIE (1 << 5) + +#define TYPE_IMX25_GPT "imx25.gpt" +#define TYPE_IMX31_GPT "imx31.gpt" +#define TYPE_IMX6_GPT "imx6.gpt" +#define TYPE_IMX7_GPT "imx7.gpt" + +#define TYPE_IMX_GPT TYPE_IMX25_GPT + +typedef struct IMXGPTState IMXGPTState; +DECLARE_INSTANCE_CHECKER(IMXGPTState, IMX_GPT, + TYPE_IMX_GPT) + +struct IMXGPTState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + ptimer_state *timer; + MemoryRegion iomem; + IMXCCMState *ccm; + + uint32_t cr; + uint32_t pr; + uint32_t sr; + uint32_t ir; + uint32_t ocr1; + uint32_t ocr2; + uint32_t ocr3; + uint32_t icr1; + uint32_t icr2; + uint32_t cnt; + + uint32_t next_timeout; + uint32_t next_int; + + uint32_t freq; + + qemu_irq irq; + + const IMXClk *clocks; +}; + +#endif /* IMX_GPT_H */ diff --git a/include/hw/timer/mips_gictimer.h b/include/hw/timer/mips_gictimer.h new file mode 100644 index 000000000..c7ca6c821 --- /dev/null +++ b/include/hw/timer/mips_gictimer.h @@ -0,0 +1,47 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2016 Imagination Technologies + * + */ + +#ifndef MIPS_GICTIMER_H +#define MIPS_GICTIMER_H + +typedef struct MIPSGICTimerVPState MIPSGICTimerVPState; +typedef struct MIPSGICTimerState MIPSGICTimerState; + +typedef void MIPSGICTimerCB(void *opaque, uint32_t vp_index); + +struct MIPSGICTimerVPState { + QEMUTimer *qtimer; + uint32_t vp_index; + uint32_t comparelo; + MIPSGICTimerState *gictimer; +}; + +struct MIPSGICTimerState { + void *opaque; + uint8_t countstop; + uint32_t sh_counterlo; + int32_t num_vps; + MIPSGICTimerVPState *vptimers; + MIPSGICTimerCB *cb; +}; + +uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic); +uint32_t mips_gictimer_get_sh_count(MIPSGICTimerState *gic); +void mips_gictimer_store_sh_count(MIPSGICTimerState *gic, uint64_t count); +uint32_t mips_gictimer_get_vp_compare(MIPSGICTimerState *gictimer, + uint32_t vp_index); +void mips_gictimer_store_vp_compare(MIPSGICTimerState *gic, uint32_t vp_index, + uint64_t compare); +uint8_t mips_gictimer_get_countstop(MIPSGICTimerState *gic); +void mips_gictimer_start_count(MIPSGICTimerState *gic); +void mips_gictimer_stop_count(MIPSGICTimerState *gic); +MIPSGICTimerState *mips_gictimer_init(void *opaque, uint32_t nvps, + MIPSGICTimerCB *cb); + +#endif /* MIPS_GICTIMER_H */ diff --git a/include/hw/timer/mss-timer.h b/include/hw/timer/mss-timer.h new file mode 100644 index 000000000..da3851290 --- /dev/null +++ b/include/hw/timer/mss-timer.h @@ -0,0 +1,63 @@ +/* + * Microsemi SmartFusion2 Timer. + * + * Copyright (c) 2017 Subbaraya Sundeep + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_MSS_TIMER_H +#define HW_MSS_TIMER_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_MSS_TIMER "mss-timer" +OBJECT_DECLARE_SIMPLE_TYPE(MSSTimerState, MSS_TIMER) + +/* + * There are two 32-bit down counting timers. + * Timers 1 and 2 can be concatenated into a single 64-bit Timer + * that operates either in Periodic mode or in One-shot mode. + * Writing 1 to the TIM64_MODE register bit 0 sets the Timers in 64-bit mode. + * In 64-bit mode, writing to the 32-bit registers has no effect. + * Similarly, in 32-bit mode, writing to the 64-bit mode registers + * has no effect. Only two 32-bit timers are supported currently. + */ +#define NUM_TIMERS 2 + +#define R_TIM1_MAX 6 + +struct Msf2Timer { + ptimer_state *ptimer; + + uint32_t regs[R_TIM1_MAX]; + qemu_irq irq; +}; + +struct MSSTimerState { + SysBusDevice parent_obj; + + MemoryRegion mmio; + uint32_t freq_hz; + struct Msf2Timer timers[NUM_TIMERS]; +}; + +#endif /* HW_MSS_TIMER_H */ diff --git a/include/hw/timer/npcm7xx_timer.h b/include/hw/timer/npcm7xx_timer.h new file mode 100644 index 000000000..6993fd723 --- /dev/null +++ b/include/hw/timer/npcm7xx_timer.h @@ -0,0 +1,112 @@ +/* + * Nuvoton NPCM7xx Timer Controller + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#ifndef NPCM7XX_TIMER_H +#define NPCM7XX_TIMER_H + +#include "exec/memory.h" +#include "hw/sysbus.h" +#include "qemu/timer.h" + +/* Each Timer Module (TIM) instance holds five 25 MHz timers. */ +#define NPCM7XX_TIMERS_PER_CTRL (5) + +/* + * Number of registers in our device state structure. Don't change this without + * incrementing the version_id in the vmstate. + */ +#define NPCM7XX_TIMER_NR_REGS (0x54 / sizeof(uint32_t)) + +/* The basic watchdog timer period is 2^14 clock cycles. */ +#define NPCM7XX_WATCHDOG_BASETIME_SHIFT 14 + +#define NPCM7XX_WATCHDOG_RESET_GPIO_OUT "npcm7xx-clk-watchdog-reset-gpio-out" + +typedef struct NPCM7xxTimerCtrlState NPCM7xxTimerCtrlState; + +/** + * struct NPCM7xxBaseTimer - Basic functionality that both regular timer and + * watchdog timer use. + * @qtimer: QEMU timer that notifies us on expiration. + * @expires_ns: Absolute virtual expiration time. + * @remaining_ns: Remaining time until expiration if timer is paused. + */ +typedef struct NPCM7xxBaseTimer { + QEMUTimer qtimer; + int64_t expires_ns; + int64_t remaining_ns; +} NPCM7xxBaseTimer; + +/** + * struct NPCM7xxTimer - Individual timer state. + * @ctrl: The timer module that owns this timer. + * @irq: GIC interrupt line to fire on expiration (if enabled). + * @base_timer: The basic timer functionality for this timer. + * @tcsr: The Timer Control and Status Register. + * @ticr: The Timer Initial Count Register. + */ +typedef struct NPCM7xxTimer { + NPCM7xxTimerCtrlState *ctrl; + + qemu_irq irq; + NPCM7xxBaseTimer base_timer; + + uint32_t tcsr; + uint32_t ticr; +} NPCM7xxTimer; + +/** + * struct NPCM7xxWatchdogTimer - The watchdog timer state. + * @ctrl: The timer module that owns this timer. + * @irq: GIC interrupt line to fire on expiration (if enabled). + * @reset_signal: The GPIO used to send a reset signal. + * @base_timer: The basic timer functionality for this timer. + * @wtcr: The Watchdog Timer Control Register. + */ +typedef struct NPCM7xxWatchdogTimer { + NPCM7xxTimerCtrlState *ctrl; + + qemu_irq irq; + qemu_irq reset_signal; + NPCM7xxBaseTimer base_timer; + + uint32_t wtcr; +} NPCM7xxWatchdogTimer; + +/** + * struct NPCM7xxTimerCtrlState - Timer Module device state. + * @parent: System bus device. + * @iomem: Memory region through which registers are accessed. + * @index: The index of this timer module. + * @tisr: The Timer Interrupt Status Register. + * @timer: The five individual timers managed by this module. + * @watchdog_timer: The watchdog timer managed by this module. + */ +struct NPCM7xxTimerCtrlState { + SysBusDevice parent; + + MemoryRegion iomem; + + uint32_t tisr; + + NPCM7xxTimer timer[NPCM7XX_TIMERS_PER_CTRL]; + NPCM7xxWatchdogTimer watchdog_timer; +}; + +#define TYPE_NPCM7XX_TIMER "npcm7xx-timer" +#define NPCM7XX_TIMER(obj) \ + OBJECT_CHECK(NPCM7xxTimerCtrlState, (obj), TYPE_NPCM7XX_TIMER) + +#endif /* NPCM7XX_TIMER_H */ diff --git a/include/hw/timer/nrf51_timer.h b/include/hw/timer/nrf51_timer.h new file mode 100644 index 000000000..76827c11d --- /dev/null +++ b/include/hw/timer/nrf51_timer.h @@ -0,0 +1,82 @@ +/* + * nRF51 System-on-Chip Timer peripheral + * + * QEMU interface: + * + sysbus MMIO regions 0: GPIO registers + * + sysbus irq + * + * Copyright 2018 Steffen Görtz + * + * This code is licensed under the GPL version 2 or later. See + * the COPYING file in the top-level directory. + */ +#ifndef NRF51_TIMER_H +#define NRF51_TIMER_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" +#define TYPE_NRF51_TIMER "nrf51_soc.timer" +OBJECT_DECLARE_SIMPLE_TYPE(NRF51TimerState, NRF51_TIMER) + +#define NRF51_TIMER_REG_COUNT 4 + +#define NRF51_TIMER_TASK_START 0x000 +#define NRF51_TIMER_TASK_STOP 0x004 +#define NRF51_TIMER_TASK_COUNT 0x008 +#define NRF51_TIMER_TASK_CLEAR 0x00C +#define NRF51_TIMER_TASK_SHUTDOWN 0x010 +#define NRF51_TIMER_TASK_CAPTURE_0 0x040 +#define NRF51_TIMER_TASK_CAPTURE_3 0x04C + +#define NRF51_TIMER_EVENT_COMPARE_0 0x140 +#define NRF51_TIMER_EVENT_COMPARE_1 0x144 +#define NRF51_TIMER_EVENT_COMPARE_2 0x148 +#define NRF51_TIMER_EVENT_COMPARE_3 0x14C + +#define NRF51_TIMER_REG_SHORTS 0x200 +#define NRF51_TIMER_REG_SHORTS_MASK 0xf0f +#define NRF51_TIMER_REG_INTENSET 0x304 +#define NRF51_TIMER_REG_INTENCLR 0x308 +#define NRF51_TIMER_REG_INTEN_MASK 0xf0000 +#define NRF51_TIMER_REG_MODE 0x504 +#define NRF51_TIMER_REG_MODE_MASK 0x01 +#define NRF51_TIMER_TIMER 0 +#define NRF51_TIMER_COUNTER 1 +#define NRF51_TIMER_REG_BITMODE 0x508 +#define NRF51_TIMER_REG_BITMODE_MASK 0x03 +#define NRF51_TIMER_WIDTH_16 0 +#define NRF51_TIMER_WIDTH_8 1 +#define NRF51_TIMER_WIDTH_24 2 +#define NRF51_TIMER_WIDTH_32 3 +#define NRF51_TIMER_REG_PRESCALER 0x510 +#define NRF51_TIMER_REG_PRESCALER_MASK 0x0F +#define NRF51_TIMER_REG_CC0 0x540 +#define NRF51_TIMER_REG_CC3 0x54C + +struct NRF51TimerState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + qemu_irq irq; + + uint8_t id; + QEMUTimer timer; + int64_t timer_start_ns; + int64_t update_counter_ns; + uint32_t counter; + + bool running; + + uint8_t events_compare[NRF51_TIMER_REG_COUNT]; + uint32_t cc[NRF51_TIMER_REG_COUNT]; + uint32_t shorts; + uint32_t inten; + uint32_t mode; + uint32_t bitmode; + uint32_t prescaler; + +}; + + +#endif diff --git a/include/hw/timer/renesas_cmt.h b/include/hw/timer/renesas_cmt.h new file mode 100644 index 000000000..1c0b65c1d --- /dev/null +++ b/include/hw/timer/renesas_cmt.h @@ -0,0 +1,43 @@ +/* + * Renesas Compare-match timer Object + * + * Copyright (c) 2019 Yoshinori Sato + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_TIMER_RENESAS_CMT_H +#define HW_TIMER_RENESAS_CMT_H + +#include "qemu/timer.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_RENESAS_CMT "renesas-cmt" +typedef struct RCMTState RCMTState; +DECLARE_INSTANCE_CHECKER(RCMTState, RCMT, + TYPE_RENESAS_CMT) + +enum { + CMT_CH = 2, + CMT_NR_IRQ = 1 * CMT_CH +}; + +struct RCMTState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint64_t input_freq; + MemoryRegion memory; + + uint16_t cmstr; + uint16_t cmcr[CMT_CH]; + uint16_t cmcnt[CMT_CH]; + uint16_t cmcor[CMT_CH]; + int64_t tick[CMT_CH]; + qemu_irq cmi[CMT_CH]; + QEMUTimer timer[CMT_CH]; +}; + +#endif diff --git a/include/hw/timer/renesas_tmr.h b/include/hw/timer/renesas_tmr.h new file mode 100644 index 000000000..caf7eec0d --- /dev/null +++ b/include/hw/timer/renesas_tmr.h @@ -0,0 +1,58 @@ +/* + * Renesas 8bit timer Object + * + * Copyright (c) 2018 Yoshinori Sato + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_TIMER_RENESAS_TMR_H +#define HW_TIMER_RENESAS_TMR_H + +#include "qemu/timer.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_RENESAS_TMR "renesas-tmr" +typedef struct RTMRState RTMRState; +DECLARE_INSTANCE_CHECKER(RTMRState, RTMR, + TYPE_RENESAS_TMR) + +enum timer_event { + cmia = 0, + cmib = 1, + ovi = 2, + none = 3, + TMR_NR_EVENTS = 4 +}; + +enum { + TMR_CH = 2, + TMR_NR_IRQ = 3 * TMR_CH +}; + +struct RTMRState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint64_t input_freq; + MemoryRegion memory; + + int64_t tick; + uint8_t tcnt[TMR_CH]; + uint8_t tcora[TMR_CH]; + uint8_t tcorb[TMR_CH]; + uint8_t tcr[TMR_CH]; + uint8_t tccr[TMR_CH]; + uint8_t tcor[TMR_CH]; + uint8_t tcsr[TMR_CH]; + int64_t div_round[TMR_CH]; + uint8_t next[TMR_CH]; + qemu_irq cmia[TMR_CH]; + qemu_irq cmib[TMR_CH]; + qemu_irq ovi[TMR_CH]; + QEMUTimer timer[TMR_CH]; +}; + +#endif diff --git a/include/hw/timer/stm32f2xx_timer.h b/include/hw/timer/stm32f2xx_timer.h new file mode 100644 index 000000000..90f40f174 --- /dev/null +++ b/include/hw/timer/stm32f2xx_timer.h @@ -0,0 +1,102 @@ +/* + * STM32F2XX Timer + * + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_STM32F2XX_TIMER_H +#define HW_STM32F2XX_TIMER_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" + +#define TIM_CR1 0x00 +#define TIM_CR2 0x04 +#define TIM_SMCR 0x08 +#define TIM_DIER 0x0C +#define TIM_SR 0x10 +#define TIM_EGR 0x14 +#define TIM_CCMR1 0x18 +#define TIM_CCMR2 0x1C +#define TIM_CCER 0x20 +#define TIM_CNT 0x24 +#define TIM_PSC 0x28 +#define TIM_ARR 0x2C +#define TIM_CCR1 0x34 +#define TIM_CCR2 0x38 +#define TIM_CCR3 0x3C +#define TIM_CCR4 0x40 +#define TIM_DCR 0x48 +#define TIM_DMAR 0x4C +#define TIM_OR 0x50 + +#define TIM_CR1_CEN 1 + +#define TIM_EGR_UG 1 + +#define TIM_CCER_CC2E (1 << 4) +#define TIM_CCMR1_OC2M2 (1 << 14) +#define TIM_CCMR1_OC2M1 (1 << 13) +#define TIM_CCMR1_OC2M0 (1 << 12) +#define TIM_CCMR1_OC2PE (1 << 11) + +#define TIM_DIER_UIE 1 + +#define TYPE_STM32F2XX_TIMER "stm32f2xx-timer" +typedef struct STM32F2XXTimerState STM32F2XXTimerState; +DECLARE_INSTANCE_CHECKER(STM32F2XXTimerState, STM32F2XXTIMER, + TYPE_STM32F2XX_TIMER) + +struct STM32F2XXTimerState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + QEMUTimer *timer; + qemu_irq irq; + + int64_t tick_offset; + uint64_t hit_time; + uint64_t freq_hz; + + uint32_t tim_cr1; + uint32_t tim_cr2; + uint32_t tim_smcr; + uint32_t tim_dier; + uint32_t tim_sr; + uint32_t tim_egr; + uint32_t tim_ccmr1; + uint32_t tim_ccmr2; + uint32_t tim_ccer; + uint32_t tim_psc; + uint32_t tim_arr; + uint32_t tim_ccr1; + uint32_t tim_ccr2; + uint32_t tim_ccr3; + uint32_t tim_ccr4; + uint32_t tim_dcr; + uint32_t tim_dmar; + uint32_t tim_or; +}; + +#endif /* HW_STM32F2XX_TIMER_H */ diff --git a/include/hw/timer/tmu012.h b/include/hw/timer/tmu012.h new file mode 100644 index 000000000..808ed8de1 --- /dev/null +++ b/include/hw/timer/tmu012.h @@ -0,0 +1,23 @@ +/* + * SuperH Timer + * + * Copyright (c) 2007 Magnus Damm + * + * This code is licensed under the GPL. + */ + +#ifndef HW_TIMER_TMU012_H +#define HW_TIMER_TMU012_H + +#include "exec/hwaddr.h" + +#define TMU012_FEAT_TOCR (1 << 0) +#define TMU012_FEAT_3CHAN (1 << 1) +#define TMU012_FEAT_EXTCLK (1 << 2) + +void tmu012_init(MemoryRegion *sysmem, hwaddr base, + int feat, uint32_t freq, + qemu_irq ch0_irq, qemu_irq ch1_irq, + qemu_irq ch2_irq0, qemu_irq ch2_irq1); + +#endif diff --git a/include/hw/tricore/tricore.h b/include/hw/tricore/tricore.h new file mode 100644 index 000000000..c19ed3f01 --- /dev/null +++ b/include/hw/tricore/tricore.h @@ -0,0 +1,10 @@ +#ifndef HW_TRICORE_H +#define HW_TRICORE_H + +#include "exec/memory.h" + +struct tricore_boot_info { + uint64_t ram_size; + const char *kernel_filename; +}; +#endif diff --git a/include/hw/unicore32/puv3.h b/include/hw/unicore32/puv3.h new file mode 100644 index 000000000..f587a1f62 --- /dev/null +++ b/include/hw/unicore32/puv3.h @@ -0,0 +1,40 @@ +/* + * Misc PKUnity SoC declarations + * + * Copyright (C) 2010-2012 Guan Xuetao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation, or any later version. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HW_PUV3_H +#define QEMU_HW_PUV3_H + +#define PUV3_REGS_OFFSET (0x1000) /* 4K is reasonable */ + +/* Hardware interrupts */ +#define PUV3_IRQS_NR (32) + +#define PUV3_IRQS_GPIOLOW0 (0) +#define PUV3_IRQS_GPIOLOW1 (1) +#define PUV3_IRQS_GPIOLOW2 (2) +#define PUV3_IRQS_GPIOLOW3 (3) +#define PUV3_IRQS_GPIOLOW4 (4) +#define PUV3_IRQS_GPIOLOW5 (5) +#define PUV3_IRQS_GPIOLOW6 (6) +#define PUV3_IRQS_GPIOLOW7 (7) +#define PUV3_IRQS_GPIOHIGH (8) +#define PUV3_IRQS_PS2_KBD (22) +#define PUV3_IRQS_PS2_AUX (23) +#define PUV3_IRQS_OST0 (26) + +/* All puv3_*.c use DPRINTF for debug. */ +#ifdef DEBUG_PUV3 +#define DPRINTF(fmt, ...) printf("%s: " fmt , __func__, ## __VA_ARGS__) +#else +#define DPRINTF(fmt, ...) do {} while (0) +#endif + +#endif /* QEMU_HW_PUV3_H */ diff --git a/include/hw/usb.h b/include/hw/usb.h new file mode 100644 index 000000000..a70a72e91 --- /dev/null +++ b/include/hw/usb.h @@ -0,0 +1,573 @@ +#ifndef QEMU_USB_H +#define QEMU_USB_H + +/* + * QEMU USB API + * + * Copyright (c) 2005 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "qemu/iov.h" +#include "qemu/queue.h" +#include "qom/object.h" + +/* Constants related to the USB / PCI interaction */ +#define USB_SBRN 0x60 /* Serial Bus Release Number Register */ +#define USB_RELEASE_1 0x10 /* USB 1.0 */ +#define USB_RELEASE_2 0x20 /* USB 2.0 */ +#define USB_RELEASE_3 0x30 /* USB 3.0 */ + +#define USB_TOKEN_SETUP 0x2d +#define USB_TOKEN_IN 0x69 /* device -> host */ +#define USB_TOKEN_OUT 0xe1 /* host -> device */ + +#define USB_RET_SUCCESS (0) +#define USB_RET_NODEV (-1) +#define USB_RET_NAK (-2) +#define USB_RET_STALL (-3) +#define USB_RET_BABBLE (-4) +#define USB_RET_IOERROR (-5) +#define USB_RET_ASYNC (-6) +#define USB_RET_ADD_TO_QUEUE (-7) +#define USB_RET_REMOVE_FROM_QUEUE (-8) + +#define USB_SPEED_LOW 0 +#define USB_SPEED_FULL 1 +#define USB_SPEED_HIGH 2 +#define USB_SPEED_SUPER 3 + +#define USB_SPEED_MASK_LOW (1 << USB_SPEED_LOW) +#define USB_SPEED_MASK_FULL (1 << USB_SPEED_FULL) +#define USB_SPEED_MASK_HIGH (1 << USB_SPEED_HIGH) +#define USB_SPEED_MASK_SUPER (1 << USB_SPEED_SUPER) + +#define USB_STATE_NOTATTACHED 0 +#define USB_STATE_ATTACHED 1 +//#define USB_STATE_POWERED 2 +#define USB_STATE_DEFAULT 3 +//#define USB_STATE_ADDRESS 4 +//#define USB_STATE_CONFIGURED 5 +#define USB_STATE_SUSPENDED 6 + +#define USB_CLASS_AUDIO 1 +#define USB_CLASS_COMM 2 +#define USB_CLASS_HID 3 +#define USB_CLASS_PHYSICAL 5 +#define USB_CLASS_STILL_IMAGE 6 +#define USB_CLASS_PRINTER 7 +#define USB_CLASS_MASS_STORAGE 8 +#define USB_CLASS_HUB 9 +#define USB_CLASS_CDC_DATA 0x0a +#define USB_CLASS_CSCID 0x0b +#define USB_CLASS_CONTENT_SEC 0x0d +#define USB_CLASS_APP_SPEC 0xfe +#define USB_CLASS_VENDOR_SPEC 0xff + +#define USB_SUBCLASS_UNDEFINED 0 +#define USB_SUBCLASS_AUDIO_CONTROL 1 +#define USB_SUBCLASS_AUDIO_STREAMING 2 +#define USB_SUBCLASS_AUDIO_MIDISTREAMING 3 + +#define USB_DIR_OUT 0 +#define USB_DIR_IN 0x80 + +#define USB_TYPE_MASK (0x03 << 5) +#define USB_TYPE_STANDARD (0x00 << 5) +#define USB_TYPE_CLASS (0x01 << 5) +#define USB_TYPE_VENDOR (0x02 << 5) +#define USB_TYPE_RESERVED (0x03 << 5) + +#define USB_RECIP_MASK 0x1f +#define USB_RECIP_DEVICE 0x00 +#define USB_RECIP_INTERFACE 0x01 +#define USB_RECIP_ENDPOINT 0x02 +#define USB_RECIP_OTHER 0x03 + +#define DeviceRequest ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8) +#define DeviceOutRequest ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8) +#define VendorDeviceRequest ((USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_DEVICE)<<8) +#define VendorDeviceOutRequest \ + ((USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_DEVICE)<<8) + +#define InterfaceRequest \ + ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) +#define InterfaceOutRequest \ + ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) +#define ClassInterfaceRequest \ + ((USB_DIR_IN|USB_TYPE_CLASS|USB_RECIP_INTERFACE)<<8) +#define ClassInterfaceOutRequest \ + ((USB_DIR_OUT|USB_TYPE_CLASS|USB_RECIP_INTERFACE)<<8) +#define VendorInterfaceRequest \ + ((USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE)<<8) +#define VendorInterfaceOutRequest \ + ((USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE)<<8) + +#define EndpointRequest ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) +#define EndpointOutRequest \ + ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) + +#define USB_REQ_GET_STATUS 0x00 +#define USB_REQ_CLEAR_FEATURE 0x01 +#define USB_REQ_SET_FEATURE 0x03 +#define USB_REQ_SET_ADDRESS 0x05 +#define USB_REQ_GET_DESCRIPTOR 0x06 +#define USB_REQ_SET_DESCRIPTOR 0x07 +#define USB_REQ_GET_CONFIGURATION 0x08 +#define USB_REQ_SET_CONFIGURATION 0x09 +#define USB_REQ_GET_INTERFACE 0x0A +#define USB_REQ_SET_INTERFACE 0x0B +#define USB_REQ_SYNCH_FRAME 0x0C +#define USB_REQ_SET_SEL 0x30 +#define USB_REQ_SET_ISOCH_DELAY 0x31 + +#define USB_DEVICE_SELF_POWERED 0 +#define USB_DEVICE_REMOTE_WAKEUP 1 + +#define USB_DT_DEVICE 0x01 +#define USB_DT_CONFIG 0x02 +#define USB_DT_STRING 0x03 +#define USB_DT_INTERFACE 0x04 +#define USB_DT_ENDPOINT 0x05 +#define USB_DT_DEVICE_QUALIFIER 0x06 +#define USB_DT_OTHER_SPEED_CONFIG 0x07 +#define USB_DT_DEBUG 0x0A +#define USB_DT_INTERFACE_ASSOC 0x0B +#define USB_DT_BOS 0x0F +#define USB_DT_DEVICE_CAPABILITY 0x10 +#define USB_DT_CS_INTERFACE 0x24 +#define USB_DT_CS_ENDPOINT 0x25 +#define USB_DT_ENDPOINT_COMPANION 0x30 + +#define USB_DEV_CAP_WIRELESS 0x01 +#define USB_DEV_CAP_USB2_EXT 0x02 +#define USB_DEV_CAP_SUPERSPEED 0x03 + +#define USB_CFG_ATT_ONE (1 << 7) /* should always be set */ +#define USB_CFG_ATT_SELFPOWER (1 << 6) +#define USB_CFG_ATT_WAKEUP (1 << 5) +#define USB_CFG_ATT_BATTERY (1 << 4) + +#define USB_ENDPOINT_XFER_CONTROL 0 +#define USB_ENDPOINT_XFER_ISOC 1 +#define USB_ENDPOINT_XFER_BULK 2 +#define USB_ENDPOINT_XFER_INT 3 +#define USB_ENDPOINT_XFER_INVALID 255 + +#define USB_INTERFACE_INVALID 255 + +typedef struct USBBusOps USBBusOps; +typedef struct USBPort USBPort; +typedef struct USBDevice USBDevice; +typedef struct USBPacket USBPacket; +typedef struct USBCombinedPacket USBCombinedPacket; +typedef struct USBEndpoint USBEndpoint; + +typedef struct USBDesc USBDesc; +typedef struct USBDescID USBDescID; +typedef struct USBDescDevice USBDescDevice; +typedef struct USBDescConfig USBDescConfig; +typedef struct USBDescIfaceAssoc USBDescIfaceAssoc; +typedef struct USBDescIface USBDescIface; +typedef struct USBDescEndpoint USBDescEndpoint; +typedef struct USBDescOther USBDescOther; +typedef struct USBDescString USBDescString; +typedef struct USBDescMSOS USBDescMSOS; + +struct USBDescString { + uint8_t index; + char *str; + QLIST_ENTRY(USBDescString) next; +}; + +#define USB_MAX_ENDPOINTS 15 +#define USB_MAX_INTERFACES 16 + +struct USBEndpoint { + uint8_t nr; + uint8_t pid; + uint8_t type; + uint8_t ifnum; + int max_packet_size; + int max_streams; + bool pipeline; + bool halted; + USBDevice *dev; + QTAILQ_HEAD(, USBPacket) queue; +}; + +enum USBDeviceFlags { + USB_DEV_FLAG_FULL_PATH, + USB_DEV_FLAG_IS_HOST, + USB_DEV_FLAG_MSOS_DESC_ENABLE, + USB_DEV_FLAG_MSOS_DESC_IN_USE, +}; + +/* definition of a USB device */ +struct USBDevice { + DeviceState qdev; + USBPort *port; + char *port_path; + char *serial; + void *opaque; + uint32_t flags; + + /* Actual connected speed */ + int speed; + /* Supported speeds, not in info because it may be variable (hostdevs) */ + int speedmask; + uint8_t addr; + char product_desc[32]; + int auto_attach; + bool attached; + + int32_t state; + uint8_t setup_buf[8]; + uint8_t data_buf[4096]; + int32_t remote_wakeup; + int32_t setup_state; + int32_t setup_len; + int32_t setup_index; + + USBEndpoint ep_ctl; + USBEndpoint ep_in[USB_MAX_ENDPOINTS]; + USBEndpoint ep_out[USB_MAX_ENDPOINTS]; + + QLIST_HEAD(, USBDescString) strings; + const USBDesc *usb_desc; /* Overrides class usb_desc if not NULL */ + const USBDescDevice *device; + + int configuration; + int ninterfaces; + int altsetting[USB_MAX_INTERFACES]; + const USBDescConfig *config; + const USBDescIface *ifaces[USB_MAX_INTERFACES]; +}; + +#define TYPE_USB_DEVICE "usb-device" +OBJECT_DECLARE_TYPE(USBDevice, USBDeviceClass, USB_DEVICE) + +typedef void (*USBDeviceRealize)(USBDevice *dev, Error **errp); +typedef void (*USBDeviceUnrealize)(USBDevice *dev); + +struct USBDeviceClass { + DeviceClass parent_class; + + USBDeviceRealize realize; + USBDeviceUnrealize unrealize; + + /* + * Walk (enabled) downstream ports, check for a matching device. + * Only hubs implement this. + */ + USBDevice *(*find_device)(USBDevice *dev, uint8_t addr); + + /* + * Called when a packet is canceled. + */ + void (*cancel_packet)(USBDevice *dev, USBPacket *p); + + /* + * Attach the device + */ + void (*handle_attach)(USBDevice *dev); + + /* + * Reset the device + */ + void (*handle_reset)(USBDevice *dev); + + /* + * Process control request. + * Called from handle_packet(). + * + * Status gets stored in p->status, and if p->status == USB_RET_SUCCESS + * then the number of bytes transferred is stored in p->actual_length + */ + void (*handle_control)(USBDevice *dev, USBPacket *p, int request, int value, + int index, int length, uint8_t *data); + + /* + * Process data transfers (both BULK and ISOC). + * Called from handle_packet(). + * + * Status gets stored in p->status, and if p->status == USB_RET_SUCCESS + * then the number of bytes transferred is stored in p->actual_length + */ + void (*handle_data)(USBDevice *dev, USBPacket *p); + + void (*set_interface)(USBDevice *dev, int interface, + int alt_old, int alt_new); + + /* + * Called when the hcd is done queuing packets for an endpoint, only + * necessary for devices which can return USB_RET_ADD_TO_QUEUE. + */ + void (*flush_ep_queue)(USBDevice *dev, USBEndpoint *ep); + + /* + * Called by the hcd to let the device know the queue for an endpoint + * has been unlinked / stopped. Optional may be NULL. + */ + void (*ep_stopped)(USBDevice *dev, USBEndpoint *ep); + + /* + * Called by the hcd to alloc / free streams on a bulk endpoint. + * Optional may be NULL. + */ + int (*alloc_streams)(USBDevice *dev, USBEndpoint **eps, int nr_eps, + int streams); + void (*free_streams)(USBDevice *dev, USBEndpoint **eps, int nr_eps); + + const char *product_desc; + const USBDesc *usb_desc; + bool attached_settable; +}; + +typedef struct USBPortOps { + void (*attach)(USBPort *port); + void (*detach)(USBPort *port); + /* + * This gets called when a device downstream from the device attached to + * the port (iow attached through a hub) gets detached. + */ + void (*child_detach)(USBPort *port, USBDevice *child); + void (*wakeup)(USBPort *port); + /* + * Note that port->dev will be different then the device from which + * the packet originated when a hub is involved. + */ + void (*complete)(USBPort *port, USBPacket *p); +} USBPortOps; + +/* USB port on which a device can be connected */ +struct USBPort { + USBDevice *dev; + int speedmask; + int hubcount; + char path[16]; + USBPortOps *ops; + void *opaque; + int index; /* internal port index, may be used with the opaque */ + QTAILQ_ENTRY(USBPort) next; +}; + +typedef void USBCallback(USBPacket * packet, void *opaque); + +typedef enum USBPacketState { + USB_PACKET_UNDEFINED = 0, + USB_PACKET_SETUP, + USB_PACKET_QUEUED, + USB_PACKET_ASYNC, + USB_PACKET_COMPLETE, + USB_PACKET_CANCELED, +} USBPacketState; + +/* Structure used to hold information about an active USB packet. */ +struct USBPacket { + /* Data fields for use by the driver. */ + int pid; + uint64_t id; + USBEndpoint *ep; + unsigned int stream; + QEMUIOVector iov; + uint64_t parameter; /* control transfers */ + bool short_not_ok; + bool int_req; + int status; /* USB_RET_* status code */ + int actual_length; /* Number of bytes actually transferred */ + /* Internal use by the USB layer. */ + USBPacketState state; + USBCombinedPacket *combined; + QTAILQ_ENTRY(USBPacket) queue; + QTAILQ_ENTRY(USBPacket) combined_entry; +}; + +struct USBCombinedPacket { + USBPacket *first; + QTAILQ_HEAD(, USBPacket) packets; + QEMUIOVector iov; +}; + +void usb_packet_init(USBPacket *p); +void usb_packet_set_state(USBPacket *p, USBPacketState state); +void usb_packet_check_state(USBPacket *p, USBPacketState expected); +void usb_packet_setup(USBPacket *p, int pid, + USBEndpoint *ep, unsigned int stream, + uint64_t id, bool short_not_ok, bool int_req); +void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len); +int usb_packet_map(USBPacket *p, QEMUSGList *sgl); +void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl); +void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes); +void usb_packet_skip(USBPacket *p, size_t bytes); +size_t usb_packet_size(USBPacket *p); +void usb_packet_cleanup(USBPacket *p); + +static inline bool usb_packet_is_inflight(USBPacket *p) +{ + return (p->state == USB_PACKET_QUEUED || + p->state == USB_PACKET_ASYNC); +} + +USBDevice *usb_find_device(USBPort *port, uint8_t addr); + +void usb_handle_packet(USBDevice *dev, USBPacket *p); +void usb_packet_complete(USBDevice *dev, USBPacket *p); +void usb_packet_complete_one(USBDevice *dev, USBPacket *p); +void usb_cancel_packet(USBPacket * p); + +void usb_ep_init(USBDevice *dev); +void usb_ep_reset(USBDevice *dev); +void usb_ep_dump(USBDevice *dev); +struct USBEndpoint *usb_ep_get(USBDevice *dev, int pid, int ep); +uint8_t usb_ep_get_type(USBDevice *dev, int pid, int ep); +void usb_ep_set_type(USBDevice *dev, int pid, int ep, uint8_t type); +void usb_ep_set_ifnum(USBDevice *dev, int pid, int ep, uint8_t ifnum); +void usb_ep_set_max_packet_size(USBDevice *dev, int pid, int ep, + uint16_t raw); +void usb_ep_set_max_streams(USBDevice *dev, int pid, int ep, uint8_t raw); +void usb_ep_set_halted(USBDevice *dev, int pid, int ep, bool halted); +USBPacket *usb_ep_find_packet_by_id(USBDevice *dev, int pid, int ep, + uint64_t id); + +void usb_ep_combine_input_packets(USBEndpoint *ep); +void usb_combined_input_packet_complete(USBDevice *dev, USBPacket *p); +void usb_combined_packet_cancel(USBDevice *dev, USBPacket *p); + +void usb_pick_speed(USBPort *port); +void usb_attach(USBPort *port); +void usb_detach(USBPort *port); +void usb_port_reset(USBPort *port); +void usb_device_reset(USBDevice *dev); +void usb_wakeup(USBEndpoint *ep, unsigned int stream); +void usb_generic_async_ctrl_complete(USBDevice *s, USBPacket *p); + +/* usb-linux.c */ +void hmp_info_usbhost(Monitor *mon, const QDict *qdict); +bool usb_host_dev_is_scsi_storage(USBDevice *usbdev); + +/* usb ports of the VM */ + +#define VM_USB_HUB_SIZE 8 + +/* usb-bus.c */ + +#define TYPE_USB_BUS "usb-bus" +OBJECT_DECLARE_SIMPLE_TYPE(USBBus, USB_BUS) + +struct USBBus { + BusState qbus; + USBBusOps *ops; + int busnr; + int nfree; + int nused; + QTAILQ_HEAD(, USBPort) free; + QTAILQ_HEAD(, USBPort) used; + QTAILQ_ENTRY(USBBus) next; +}; + +struct USBBusOps { + void (*register_companion)(USBBus *bus, USBPort *ports[], + uint32_t portcount, uint32_t firstport, + Error **errp); + void (*wakeup_endpoint)(USBBus *bus, USBEndpoint *ep, unsigned int stream); +}; + +void usb_bus_new(USBBus *bus, size_t bus_size, + USBBusOps *ops, DeviceState *host); +void usb_bus_release(USBBus *bus); +USBBus *usb_bus_find(int busnr); +void usb_legacy_register(const char *typename, const char *usbdevice_name, + USBDevice *(*usbdevice_init)(const char *params)); +USBDevice *usb_new(const char *name); +bool usb_realize_and_unref(USBDevice *dev, USBBus *bus, Error **errp); +USBDevice *usb_create_simple(USBBus *bus, const char *name); +USBDevice *usbdevice_create(const char *cmdline); +void usb_register_port(USBBus *bus, USBPort *port, void *opaque, int index, + USBPortOps *ops, int speedmask); +void usb_register_companion(const char *masterbus, USBPort *ports[], + uint32_t portcount, uint32_t firstport, + void *opaque, USBPortOps *ops, int speedmask, + Error **errp); +void usb_port_location(USBPort *downstream, USBPort *upstream, int portnr); +void usb_unregister_port(USBBus *bus, USBPort *port); +void usb_claim_port(USBDevice *dev, Error **errp); +void usb_release_port(USBDevice *dev); +void usb_device_attach(USBDevice *dev, Error **errp); +int usb_device_detach(USBDevice *dev); +void usb_check_attach(USBDevice *dev, Error **errp); + +static inline USBBus *usb_bus_from_device(USBDevice *d) +{ + return DO_UPCAST(USBBus, qbus, d->qdev.parent_bus); +} + +extern const VMStateDescription vmstate_usb_device; + +#define VMSTATE_USB_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(USBDevice), \ + .vmsd = &vmstate_usb_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, USBDevice), \ +} + +USBDevice *usb_device_find_device(USBDevice *dev, uint8_t addr); + +void usb_device_cancel_packet(USBDevice *dev, USBPacket *p); + +void usb_device_handle_attach(USBDevice *dev); + +void usb_device_handle_reset(USBDevice *dev); + +void usb_device_handle_control(USBDevice *dev, USBPacket *p, int request, + int val, int index, int length, uint8_t *data); + +void usb_device_handle_data(USBDevice *dev, USBPacket *p); + +void usb_device_set_interface(USBDevice *dev, int interface, + int alt_old, int alt_new); + +void usb_device_flush_ep_queue(USBDevice *dev, USBEndpoint *ep); + +void usb_device_ep_stopped(USBDevice *dev, USBEndpoint *ep); + +int usb_device_alloc_streams(USBDevice *dev, USBEndpoint **eps, int nr_eps, + int streams); +void usb_device_free_streams(USBDevice *dev, USBEndpoint **eps, int nr_eps); + +const char *usb_device_get_product_desc(USBDevice *dev); + +const USBDesc *usb_device_get_usb_desc(USBDevice *dev); + +/* quirks.c */ + +/* In bulk endpoints are streaming data sources (iow behave like isoc eps) */ +#define USB_QUIRK_BUFFER_BULK_IN 0x01 +/* Bulk pkts in FTDI format, need special handling when combining packets */ +#define USB_QUIRK_IS_FTDI 0x02 + +int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, + uint8_t interface_class, uint8_t interface_subclass, + uint8_t interface_protocol); + +#endif diff --git a/include/hw/usb/chipidea.h b/include/hw/usb/chipidea.h new file mode 100644 index 000000000..fe4113ee0 --- /dev/null +++ b/include/hw/usb/chipidea.h @@ -0,0 +1,17 @@ +#ifndef CHIPIDEA_H +#define CHIPIDEA_H + +#include "hw/usb/hcd-ehci.h" +#include "qom/object.h" + +struct ChipideaState { + /*< private >*/ + EHCISysBusState parent_obj; + + MemoryRegion iomem[3]; +}; + +#define TYPE_CHIPIDEA "usb-chipidea" +OBJECT_DECLARE_SIMPLE_TYPE(ChipideaState, CHIPIDEA) + +#endif /* CHIPIDEA_H */ diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h new file mode 100644 index 000000000..40af23a0b --- /dev/null +++ b/include/hw/usb/dwc2-regs.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Imported from the Linux kernel file drivers/usb/dwc2/hw.h, commit + * a89bae709b3492b478480a2c9734e7e9393b279c ("usb: dwc2: Move + * UTMI_PHY_DATA defines closer") + * + * hw.h - DesignWare HS OTG Controller hardware definitions + * + * Copyright 2004-2013 Synopsys, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the above-listed copyright holders may not be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DWC2_HW_H__ +#define __DWC2_HW_H__ + +#define HSOTG_REG(x) (x) + +#define GOTGCTL HSOTG_REG(0x000) +#define GOTGCTL_CHIRPEN BIT(27) +#define GOTGCTL_MULT_VALID_BC_MASK (0x1f << 22) +#define GOTGCTL_MULT_VALID_BC_SHIFT 22 +#define GOTGCTL_OTGVER BIT(20) +#define GOTGCTL_BSESVLD BIT(19) +#define GOTGCTL_ASESVLD BIT(18) +#define GOTGCTL_DBNC_SHORT BIT(17) +#define GOTGCTL_CONID_B BIT(16) +#define GOTGCTL_DBNCE_FLTR_BYPASS BIT(15) +#define GOTGCTL_DEVHNPEN BIT(11) +#define GOTGCTL_HSTSETHNPEN BIT(10) +#define GOTGCTL_HNPREQ BIT(9) +#define GOTGCTL_HSTNEGSCS BIT(8) +#define GOTGCTL_SESREQ BIT(1) +#define GOTGCTL_SESREQSCS BIT(0) + +#define GOTGINT HSOTG_REG(0x004) +#define GOTGINT_DBNCE_DONE BIT(19) +#define GOTGINT_A_DEV_TOUT_CHG BIT(18) +#define GOTGINT_HST_NEG_DET BIT(17) +#define GOTGINT_HST_NEG_SUC_STS_CHNG BIT(9) +#define GOTGINT_SES_REQ_SUC_STS_CHNG BIT(8) +#define GOTGINT_SES_END_DET BIT(2) + +#define GAHBCFG HSOTG_REG(0x008) +#define GAHBCFG_AHB_SINGLE BIT(23) +#define GAHBCFG_NOTI_ALL_DMA_WRIT BIT(22) +#define GAHBCFG_REM_MEM_SUPP BIT(21) +#define GAHBCFG_P_TXF_EMP_LVL BIT(8) +#define GAHBCFG_NP_TXF_EMP_LVL BIT(7) +#define GAHBCFG_DMA_EN BIT(5) +#define GAHBCFG_HBSTLEN_MASK (0xf << 1) +#define GAHBCFG_HBSTLEN_SHIFT 1 +#define GAHBCFG_HBSTLEN_SINGLE 0 +#define GAHBCFG_HBSTLEN_INCR 1 +#define GAHBCFG_HBSTLEN_INCR4 3 +#define GAHBCFG_HBSTLEN_INCR8 5 +#define GAHBCFG_HBSTLEN_INCR16 7 +#define GAHBCFG_GLBL_INTR_EN BIT(0) +#define GAHBCFG_CTRL_MASK (GAHBCFG_P_TXF_EMP_LVL | \ + GAHBCFG_NP_TXF_EMP_LVL | \ + GAHBCFG_DMA_EN | \ + GAHBCFG_GLBL_INTR_EN) + +#define GUSBCFG HSOTG_REG(0x00C) +#define GUSBCFG_FORCEDEVMODE BIT(30) +#define GUSBCFG_FORCEHOSTMODE BIT(29) +#define GUSBCFG_TXENDDELAY BIT(28) +#define GUSBCFG_ICTRAFFICPULLREMOVE BIT(27) +#define GUSBCFG_ICUSBCAP BIT(26) +#define GUSBCFG_ULPI_INT_PROT_DIS BIT(25) +#define GUSBCFG_INDICATORPASSTHROUGH BIT(24) +#define GUSBCFG_INDICATORCOMPLEMENT BIT(23) +#define GUSBCFG_TERMSELDLPULSE BIT(22) +#define GUSBCFG_ULPI_INT_VBUS_IND BIT(21) +#define GUSBCFG_ULPI_EXT_VBUS_DRV BIT(20) +#define GUSBCFG_ULPI_CLK_SUSP_M BIT(19) +#define GUSBCFG_ULPI_AUTO_RES BIT(18) +#define GUSBCFG_ULPI_FS_LS BIT(17) +#define GUSBCFG_OTG_UTMI_FS_SEL BIT(16) +#define GUSBCFG_PHY_LP_CLK_SEL BIT(15) +#define GUSBCFG_USBTRDTIM_MASK (0xf << 10) +#define GUSBCFG_USBTRDTIM_SHIFT 10 +#define GUSBCFG_HNPCAP BIT(9) +#define GUSBCFG_SRPCAP BIT(8) +#define GUSBCFG_DDRSEL BIT(7) +#define GUSBCFG_PHYSEL BIT(6) +#define GUSBCFG_FSINTF BIT(5) +#define GUSBCFG_ULPI_UTMI_SEL BIT(4) +#define GUSBCFG_PHYIF16 BIT(3) +#define GUSBCFG_PHYIF8 (0 << 3) +#define GUSBCFG_TOUTCAL_MASK (0x7 << 0) +#define GUSBCFG_TOUTCAL_SHIFT 0 +#define GUSBCFG_TOUTCAL_LIMIT 0x7 +#define GUSBCFG_TOUTCAL(_x) ((_x) << 0) + +#define GRSTCTL HSOTG_REG(0x010) +#define GRSTCTL_AHBIDLE BIT(31) +#define GRSTCTL_DMAREQ BIT(30) +#define GRSTCTL_TXFNUM_MASK (0x1f << 6) +#define GRSTCTL_TXFNUM_SHIFT 6 +#define GRSTCTL_TXFNUM_LIMIT 0x1f +#define GRSTCTL_TXFNUM(_x) ((_x) << 6) +#define GRSTCTL_TXFFLSH BIT(5) +#define GRSTCTL_RXFFLSH BIT(4) +#define GRSTCTL_IN_TKNQ_FLSH BIT(3) +#define GRSTCTL_FRMCNTRRST BIT(2) +#define GRSTCTL_HSFTRST BIT(1) +#define GRSTCTL_CSFTRST BIT(0) + +#define GINTSTS HSOTG_REG(0x014) +#define GINTMSK HSOTG_REG(0x018) +#define GINTSTS_WKUPINT BIT(31) +#define GINTSTS_SESSREQINT BIT(30) +#define GINTSTS_DISCONNINT BIT(29) +#define GINTSTS_CONIDSTSCHNG BIT(28) +#define GINTSTS_LPMTRANRCVD BIT(27) +#define GINTSTS_PTXFEMP BIT(26) +#define GINTSTS_HCHINT BIT(25) +#define GINTSTS_PRTINT BIT(24) +#define GINTSTS_RESETDET BIT(23) +#define GINTSTS_FET_SUSP BIT(22) +#define GINTSTS_INCOMPL_IP BIT(21) +#define GINTSTS_INCOMPL_SOOUT BIT(21) +#define GINTSTS_INCOMPL_SOIN BIT(20) +#define GINTSTS_OEPINT BIT(19) +#define GINTSTS_IEPINT BIT(18) +#define GINTSTS_EPMIS BIT(17) +#define GINTSTS_RESTOREDONE BIT(16) +#define GINTSTS_EOPF BIT(15) +#define GINTSTS_ISOUTDROP BIT(14) +#define GINTSTS_ENUMDONE BIT(13) +#define GINTSTS_USBRST BIT(12) +#define GINTSTS_USBSUSP BIT(11) +#define GINTSTS_ERLYSUSP BIT(10) +#define GINTSTS_I2CINT BIT(9) +#define GINTSTS_ULPI_CK_INT BIT(8) +#define GINTSTS_GOUTNAKEFF BIT(7) +#define GINTSTS_GINNAKEFF BIT(6) +#define GINTSTS_NPTXFEMP BIT(5) +#define GINTSTS_RXFLVL BIT(4) +#define GINTSTS_SOF BIT(3) +#define GINTSTS_OTGINT BIT(2) +#define GINTSTS_MODEMIS BIT(1) +#define GINTSTS_CURMODE_HOST BIT(0) + +#define GRXSTSR HSOTG_REG(0x01C) +#define GRXSTSP HSOTG_REG(0x020) +#define GRXSTS_FN_MASK (0x7f << 25) +#define GRXSTS_FN_SHIFT 25 +#define GRXSTS_PKTSTS_MASK (0xf << 17) +#define GRXSTS_PKTSTS_SHIFT 17 +#define GRXSTS_PKTSTS_GLOBALOUTNAK 1 +#define GRXSTS_PKTSTS_OUTRX 2 +#define GRXSTS_PKTSTS_HCHIN 2 +#define GRXSTS_PKTSTS_OUTDONE 3 +#define GRXSTS_PKTSTS_HCHIN_XFER_COMP 3 +#define GRXSTS_PKTSTS_SETUPDONE 4 +#define GRXSTS_PKTSTS_DATATOGGLEERR 5 +#define GRXSTS_PKTSTS_SETUPRX 6 +#define GRXSTS_PKTSTS_HCHHALTED 7 +#define GRXSTS_HCHNUM_MASK (0xf << 0) +#define GRXSTS_HCHNUM_SHIFT 0 +#define GRXSTS_DPID_MASK (0x3 << 15) +#define GRXSTS_DPID_SHIFT 15 +#define GRXSTS_BYTECNT_MASK (0x7ff << 4) +#define GRXSTS_BYTECNT_SHIFT 4 +#define GRXSTS_EPNUM_MASK (0xf << 0) +#define GRXSTS_EPNUM_SHIFT 0 + +#define GRXFSIZ HSOTG_REG(0x024) +#define GRXFSIZ_DEPTH_MASK (0xffff << 0) +#define GRXFSIZ_DEPTH_SHIFT 0 + +#define GNPTXFSIZ HSOTG_REG(0x028) +/* Use FIFOSIZE_* constants to access this register */ + +#define GNPTXSTS HSOTG_REG(0x02C) +#define GNPTXSTS_NP_TXQ_TOP_MASK (0x7f << 24) +#define GNPTXSTS_NP_TXQ_TOP_SHIFT 24 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_MASK (0xff << 16) +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_SHIFT 16 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_GET(_v) (((_v) >> 16) & 0xff) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_MASK (0xffff << 0) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_SHIFT 0 +#define GNPTXSTS_NP_TXF_SPC_AVAIL_GET(_v) (((_v) >> 0) & 0xffff) + +#define GI2CCTL HSOTG_REG(0x0030) +#define GI2CCTL_BSYDNE BIT(31) +#define GI2CCTL_RW BIT(30) +#define GI2CCTL_I2CDATSE0 BIT(28) +#define GI2CCTL_I2CDEVADDR_MASK (0x3 << 26) +#define GI2CCTL_I2CDEVADDR_SHIFT 26 +#define GI2CCTL_I2CSUSPCTL BIT(25) +#define GI2CCTL_ACK BIT(24) +#define GI2CCTL_I2CEN BIT(23) +#define GI2CCTL_ADDR_MASK (0x7f << 16) +#define GI2CCTL_ADDR_SHIFT 16 +#define GI2CCTL_REGADDR_MASK (0xff << 8) +#define GI2CCTL_REGADDR_SHIFT 8 +#define GI2CCTL_RWDATA_MASK (0xff << 0) +#define GI2CCTL_RWDATA_SHIFT 0 + +#define GPVNDCTL HSOTG_REG(0x0034) +#define GGPIO HSOTG_REG(0x0038) +#define GGPIO_STM32_OTG_GCCFG_PWRDWN BIT(16) + +#define GUID HSOTG_REG(0x003c) +#define GSNPSID HSOTG_REG(0x0040) +#define GHWCFG1 HSOTG_REG(0x0044) +#define GSNPSID_ID_MASK GENMASK(31, 16) + +#define GHWCFG2 HSOTG_REG(0x0048) +#define GHWCFG2_OTG_ENABLE_IC_USB BIT(31) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_MASK (0x1f << 26) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT 26 +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_MASK (0x3 << 24) +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT 24 +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_MASK (0x3 << 22) +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT 22 +#define GHWCFG2_MULTI_PROC_INT BIT(20) +#define GHWCFG2_DYNAMIC_FIFO BIT(19) +#define GHWCFG2_PERIO_EP_SUPPORTED BIT(18) +#define GHWCFG2_NUM_HOST_CHAN_MASK (0xf << 14) +#define GHWCFG2_NUM_HOST_CHAN_SHIFT 14 +#define GHWCFG2_NUM_DEV_EP_MASK (0xf << 10) +#define GHWCFG2_NUM_DEV_EP_SHIFT 10 +#define GHWCFG2_FS_PHY_TYPE_MASK (0x3 << 8) +#define GHWCFG2_FS_PHY_TYPE_SHIFT 8 +#define GHWCFG2_FS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_FS_PHY_TYPE_DEDICATED 1 +#define GHWCFG2_FS_PHY_TYPE_SHARED_UTMI 2 +#define GHWCFG2_FS_PHY_TYPE_SHARED_ULPI 3 +#define GHWCFG2_HS_PHY_TYPE_MASK (0x3 << 6) +#define GHWCFG2_HS_PHY_TYPE_SHIFT 6 +#define GHWCFG2_HS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_HS_PHY_TYPE_UTMI 1 +#define GHWCFG2_HS_PHY_TYPE_ULPI 2 +#define GHWCFG2_HS_PHY_TYPE_UTMI_ULPI 3 +#define GHWCFG2_POINT2POINT BIT(5) +#define GHWCFG2_ARCHITECTURE_MASK (0x3 << 3) +#define GHWCFG2_ARCHITECTURE_SHIFT 3 +#define GHWCFG2_SLAVE_ONLY_ARCH 0 +#define GHWCFG2_EXT_DMA_ARCH 1 +#define GHWCFG2_INT_DMA_ARCH 2 +#define GHWCFG2_OP_MODE_MASK (0x7 << 0) +#define GHWCFG2_OP_MODE_SHIFT 0 +#define GHWCFG2_OP_MODE_HNP_SRP_CAPABLE 0 +#define GHWCFG2_OP_MODE_SRP_ONLY_CAPABLE 1 +#define GHWCFG2_OP_MODE_NO_HNP_SRP_CAPABLE 2 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_DEVICE 3 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_DEVICE 4 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_HOST 5 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST 6 +#define GHWCFG2_OP_MODE_UNDEFINED 7 + +#define GHWCFG3 HSOTG_REG(0x004c) +#define GHWCFG3_DFIFO_DEPTH_MASK (0xffff << 16) +#define GHWCFG3_DFIFO_DEPTH_SHIFT 16 +#define GHWCFG3_OTG_LPM_EN BIT(15) +#define GHWCFG3_BC_SUPPORT BIT(14) +#define GHWCFG3_OTG_ENABLE_HSIC BIT(13) +#define GHWCFG3_ADP_SUPP BIT(12) +#define GHWCFG3_SYNCH_RESET_TYPE BIT(11) +#define GHWCFG3_OPTIONAL_FEATURES BIT(10) +#define GHWCFG3_VENDOR_CTRL_IF BIT(9) +#define GHWCFG3_I2C BIT(8) +#define GHWCFG3_OTG_FUNC BIT(7) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_MASK (0x7 << 4) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT 4 +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_MASK (0xf << 0) +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT 0 + +#define GHWCFG4 HSOTG_REG(0x0050) +#define GHWCFG4_DESC_DMA_DYN BIT(31) +#define GHWCFG4_DESC_DMA BIT(30) +#define GHWCFG4_NUM_IN_EPS_MASK (0xf << 26) +#define GHWCFG4_NUM_IN_EPS_SHIFT 26 +#define GHWCFG4_DED_FIFO_EN BIT(25) +#define GHWCFG4_DED_FIFO_SHIFT 25 +#define GHWCFG4_SESSION_END_FILT_EN BIT(24) +#define GHWCFG4_B_VALID_FILT_EN BIT(23) +#define GHWCFG4_A_VALID_FILT_EN BIT(22) +#define GHWCFG4_VBUS_VALID_FILT_EN BIT(21) +#define GHWCFG4_IDDIG_FILT_EN BIT(20) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_MASK (0xf << 16) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_SHIFT 16 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK (0x3 << 14) +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT 14 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8 0 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_16 1 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16 2 +#define GHWCFG4_ACG_SUPPORTED BIT(12) +#define GHWCFG4_IPG_ISOC_SUPPORTED BIT(11) +#define GHWCFG4_SERVICE_INTERVAL_SUPPORTED BIT(10) +#define GHWCFG4_XHIBER BIT(7) +#define GHWCFG4_HIBER BIT(6) +#define GHWCFG4_MIN_AHB_FREQ BIT(5) +#define GHWCFG4_POWER_OPTIMIZ BIT(4) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK (0xf << 0) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT 0 + +#define GLPMCFG HSOTG_REG(0x0054) +#define GLPMCFG_INVSELHSIC BIT(31) +#define GLPMCFG_HSICCON BIT(30) +#define GLPMCFG_RSTRSLPSTS BIT(29) +#define GLPMCFG_ENBESL BIT(28) +#define GLPMCFG_LPM_RETRYCNT_STS_MASK (0x7 << 25) +#define GLPMCFG_LPM_RETRYCNT_STS_SHIFT 25 +#define GLPMCFG_SNDLPM BIT(24) +#define GLPMCFG_RETRY_CNT_MASK (0x7 << 21) +#define GLPMCFG_RETRY_CNT_SHIFT 21 +#define GLPMCFG_LPM_REJECT_CTRL_CONTROL BIT(21) +#define GLPMCFG_LPM_ACCEPT_CTRL_ISOC BIT(22) +#define GLPMCFG_LPM_CHNL_INDX_MASK (0xf << 17) +#define GLPMCFG_LPM_CHNL_INDX_SHIFT 17 +#define GLPMCFG_L1RESUMEOK BIT(16) +#define GLPMCFG_SLPSTS BIT(15) +#define GLPMCFG_COREL1RES_MASK (0x3 << 13) +#define GLPMCFG_COREL1RES_SHIFT 13 +#define GLPMCFG_HIRD_THRES_MASK (0x1f << 8) +#define GLPMCFG_HIRD_THRES_SHIFT 8 +#define GLPMCFG_HIRD_THRES_EN (0x10 << 8) +#define GLPMCFG_ENBLSLPM BIT(7) +#define GLPMCFG_BREMOTEWAKE BIT(6) +#define GLPMCFG_HIRD_MASK (0xf << 2) +#define GLPMCFG_HIRD_SHIFT 2 +#define GLPMCFG_APPL1RES BIT(1) +#define GLPMCFG_LPMCAP BIT(0) + +#define GPWRDN HSOTG_REG(0x0058) +#define GPWRDN_MULT_VAL_ID_BC_MASK (0x1f << 24) +#define GPWRDN_MULT_VAL_ID_BC_SHIFT 24 +#define GPWRDN_ADP_INT BIT(23) +#define GPWRDN_BSESSVLD BIT(22) +#define GPWRDN_IDSTS BIT(21) +#define GPWRDN_LINESTATE_MASK (0x3 << 19) +#define GPWRDN_LINESTATE_SHIFT 19 +#define GPWRDN_STS_CHGINT_MSK BIT(18) +#define GPWRDN_STS_CHGINT BIT(17) +#define GPWRDN_SRP_DET_MSK BIT(16) +#define GPWRDN_SRP_DET BIT(15) +#define GPWRDN_CONNECT_DET_MSK BIT(14) +#define GPWRDN_CONNECT_DET BIT(13) +#define GPWRDN_DISCONN_DET_MSK BIT(12) +#define GPWRDN_DISCONN_DET BIT(11) +#define GPWRDN_RST_DET_MSK BIT(10) +#define GPWRDN_RST_DET BIT(9) +#define GPWRDN_LNSTSCHG_MSK BIT(8) +#define GPWRDN_LNSTSCHG BIT(7) +#define GPWRDN_DIS_VBUS BIT(6) +#define GPWRDN_PWRDNSWTCH BIT(5) +#define GPWRDN_PWRDNRSTN BIT(4) +#define GPWRDN_PWRDNCLMP BIT(3) +#define GPWRDN_RESTORE BIT(2) +#define GPWRDN_PMUACTV BIT(1) +#define GPWRDN_PMUINTSEL BIT(0) + +#define GDFIFOCFG HSOTG_REG(0x005c) +#define GDFIFOCFG_EPINFOBASE_MASK (0xffff << 16) +#define GDFIFOCFG_EPINFOBASE_SHIFT 16 +#define GDFIFOCFG_GDFIFOCFG_MASK (0xffff << 0) +#define GDFIFOCFG_GDFIFOCFG_SHIFT 0 + +#define ADPCTL HSOTG_REG(0x0060) +#define ADPCTL_AR_MASK (0x3 << 27) +#define ADPCTL_AR_SHIFT 27 +#define ADPCTL_ADP_TMOUT_INT_MSK BIT(26) +#define ADPCTL_ADP_SNS_INT_MSK BIT(25) +#define ADPCTL_ADP_PRB_INT_MSK BIT(24) +#define ADPCTL_ADP_TMOUT_INT BIT(23) +#define ADPCTL_ADP_SNS_INT BIT(22) +#define ADPCTL_ADP_PRB_INT BIT(21) +#define ADPCTL_ADPENA BIT(20) +#define ADPCTL_ADPRES BIT(19) +#define ADPCTL_ENASNS BIT(18) +#define ADPCTL_ENAPRB BIT(17) +#define ADPCTL_RTIM_MASK (0x7ff << 6) +#define ADPCTL_RTIM_SHIFT 6 +#define ADPCTL_PRB_PER_MASK (0x3 << 4) +#define ADPCTL_PRB_PER_SHIFT 4 +#define ADPCTL_PRB_DELTA_MASK (0x3 << 2) +#define ADPCTL_PRB_DELTA_SHIFT 2 +#define ADPCTL_PRB_DSCHRG_MASK (0x3 << 0) +#define ADPCTL_PRB_DSCHRG_SHIFT 0 + +#define GREFCLK HSOTG_REG(0x0064) +#define GREFCLK_REFCLKPER_MASK (0x1ffff << 15) +#define GREFCLK_REFCLKPER_SHIFT 15 +#define GREFCLK_REF_CLK_MODE BIT(14) +#define GREFCLK_SOF_CNT_WKUP_ALERT_MASK (0x3ff) +#define GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT 0 + +#define GINTMSK2 HSOTG_REG(0x0068) +#define GINTMSK2_WKUP_ALERT_INT_MSK BIT(0) + +#define GINTSTS2 HSOTG_REG(0x006c) +#define GINTSTS2_WKUP_ALERT_INT BIT(0) + +#define HPTXFSIZ HSOTG_REG(0x100) +/* Use FIFOSIZE_* constants to access this register */ + +#define DPTXFSIZN(_a) HSOTG_REG(0x104 + (((_a) - 1) * 4)) +/* Use FIFOSIZE_* constants to access this register */ + +/* These apply to the GNPTXFSIZ, HPTXFSIZ and DPTXFSIZN registers */ +#define FIFOSIZE_DEPTH_MASK (0xffff << 16) +#define FIFOSIZE_DEPTH_SHIFT 16 +#define FIFOSIZE_STARTADDR_MASK (0xffff << 0) +#define FIFOSIZE_STARTADDR_SHIFT 0 +#define FIFOSIZE_DEPTH_GET(_x) (((_x) >> 16) & 0xffff) + +/* Device mode registers */ + +#define DCFG HSOTG_REG(0x800) +#define DCFG_DESCDMA_EN BIT(23) +#define DCFG_EPMISCNT_MASK (0x1f << 18) +#define DCFG_EPMISCNT_SHIFT 18 +#define DCFG_EPMISCNT_LIMIT 0x1f +#define DCFG_EPMISCNT(_x) ((_x) << 18) +#define DCFG_IPG_ISOC_SUPPORDED BIT(17) +#define DCFG_PERFRINT_MASK (0x3 << 11) +#define DCFG_PERFRINT_SHIFT 11 +#define DCFG_PERFRINT_LIMIT 0x3 +#define DCFG_PERFRINT(_x) ((_x) << 11) +#define DCFG_DEVADDR_MASK (0x7f << 4) +#define DCFG_DEVADDR_SHIFT 4 +#define DCFG_DEVADDR_LIMIT 0x7f +#define DCFG_DEVADDR(_x) ((_x) << 4) +#define DCFG_NZ_STS_OUT_HSHK BIT(2) +#define DCFG_DEVSPD_MASK (0x3 << 0) +#define DCFG_DEVSPD_SHIFT 0 +#define DCFG_DEVSPD_HS 0 +#define DCFG_DEVSPD_FS 1 +#define DCFG_DEVSPD_LS 2 +#define DCFG_DEVSPD_FS48 3 + +#define DCTL HSOTG_REG(0x804) +#define DCTL_SERVICE_INTERVAL_SUPPORTED BIT(19) +#define DCTL_PWRONPRGDONE BIT(11) +#define DCTL_CGOUTNAK BIT(10) +#define DCTL_SGOUTNAK BIT(9) +#define DCTL_CGNPINNAK BIT(8) +#define DCTL_SGNPINNAK BIT(7) +#define DCTL_TSTCTL_MASK (0x7 << 4) +#define DCTL_TSTCTL_SHIFT 4 +#define DCTL_GOUTNAKSTS BIT(3) +#define DCTL_GNPINNAKSTS BIT(2) +#define DCTL_SFTDISCON BIT(1) +#define DCTL_RMTWKUPSIG BIT(0) + +#define DSTS HSOTG_REG(0x808) +#define DSTS_SOFFN_MASK (0x3fff << 8) +#define DSTS_SOFFN_SHIFT 8 +#define DSTS_SOFFN_LIMIT 0x3fff +#define DSTS_SOFFN(_x) ((_x) << 8) +#define DSTS_ERRATICERR BIT(3) +#define DSTS_ENUMSPD_MASK (0x3 << 1) +#define DSTS_ENUMSPD_SHIFT 1 +#define DSTS_ENUMSPD_HS 0 +#define DSTS_ENUMSPD_FS 1 +#define DSTS_ENUMSPD_LS 2 +#define DSTS_ENUMSPD_FS48 3 +#define DSTS_SUSPSTS BIT(0) + +#define DIEPMSK HSOTG_REG(0x810) +#define DIEPMSK_NAKMSK BIT(13) +#define DIEPMSK_BNAININTRMSK BIT(9) +#define DIEPMSK_TXFIFOUNDRNMSK BIT(8) +#define DIEPMSK_TXFIFOEMPTY BIT(7) +#define DIEPMSK_INEPNAKEFFMSK BIT(6) +#define DIEPMSK_INTKNEPMISMSK BIT(5) +#define DIEPMSK_INTKNTXFEMPMSK BIT(4) +#define DIEPMSK_TIMEOUTMSK BIT(3) +#define DIEPMSK_AHBERRMSK BIT(2) +#define DIEPMSK_EPDISBLDMSK BIT(1) +#define DIEPMSK_XFERCOMPLMSK BIT(0) + +#define DOEPMSK HSOTG_REG(0x814) +#define DOEPMSK_BNAMSK BIT(9) +#define DOEPMSK_BACK2BACKSETUP BIT(6) +#define DOEPMSK_STSPHSERCVDMSK BIT(5) +#define DOEPMSK_OUTTKNEPDISMSK BIT(4) +#define DOEPMSK_SETUPMSK BIT(3) +#define DOEPMSK_AHBERRMSK BIT(2) +#define DOEPMSK_EPDISBLDMSK BIT(1) +#define DOEPMSK_XFERCOMPLMSK BIT(0) + +#define DAINT HSOTG_REG(0x818) +#define DAINTMSK HSOTG_REG(0x81C) +#define DAINT_OUTEP_SHIFT 16 +#define DAINT_OUTEP(_x) (1 << ((_x) + 16)) +#define DAINT_INEP(_x) (1 << (_x)) + +#define DTKNQR1 HSOTG_REG(0x820) +#define DTKNQR2 HSOTG_REG(0x824) +#define DTKNQR3 HSOTG_REG(0x830) +#define DTKNQR4 HSOTG_REG(0x834) +#define DIEPEMPMSK HSOTG_REG(0x834) + +#define DVBUSDIS HSOTG_REG(0x828) +#define DVBUSPULSE HSOTG_REG(0x82C) + +#define DIEPCTL0 HSOTG_REG(0x900) +#define DIEPCTL(_a) HSOTG_REG(0x900 + ((_a) * 0x20)) + +#define DOEPCTL0 HSOTG_REG(0xB00) +#define DOEPCTL(_a) HSOTG_REG(0xB00 + ((_a) * 0x20)) + +/* EP0 specialness: + * bits[29..28] - reserved (no SetD0PID, SetD1PID) + * bits[25..22] - should always be zero, this isn't a periodic endpoint + * bits[10..0] - MPS setting different for EP0 + */ +#define D0EPCTL_MPS_MASK (0x3 << 0) +#define D0EPCTL_MPS_SHIFT 0 +#define D0EPCTL_MPS_64 0 +#define D0EPCTL_MPS_32 1 +#define D0EPCTL_MPS_16 2 +#define D0EPCTL_MPS_8 3 + +#define DXEPCTL_EPENA BIT(31) +#define DXEPCTL_EPDIS BIT(30) +#define DXEPCTL_SETD1PID BIT(29) +#define DXEPCTL_SETODDFR BIT(29) +#define DXEPCTL_SETD0PID BIT(28) +#define DXEPCTL_SETEVENFR BIT(28) +#define DXEPCTL_SNAK BIT(27) +#define DXEPCTL_CNAK BIT(26) +#define DXEPCTL_TXFNUM_MASK (0xf << 22) +#define DXEPCTL_TXFNUM_SHIFT 22 +#define DXEPCTL_TXFNUM_LIMIT 0xf +#define DXEPCTL_TXFNUM(_x) ((_x) << 22) +#define DXEPCTL_STALL BIT(21) +#define DXEPCTL_SNP BIT(20) +#define DXEPCTL_EPTYPE_MASK (0x3 << 18) +#define DXEPCTL_EPTYPE_CONTROL (0x0 << 18) +#define DXEPCTL_EPTYPE_ISO (0x1 << 18) +#define DXEPCTL_EPTYPE_BULK (0x2 << 18) +#define DXEPCTL_EPTYPE_INTERRUPT (0x3 << 18) + +#define DXEPCTL_NAKSTS BIT(17) +#define DXEPCTL_DPID BIT(16) +#define DXEPCTL_EOFRNUM BIT(16) +#define DXEPCTL_USBACTEP BIT(15) +#define DXEPCTL_NEXTEP_MASK (0xf << 11) +#define DXEPCTL_NEXTEP_SHIFT 11 +#define DXEPCTL_NEXTEP_LIMIT 0xf +#define DXEPCTL_NEXTEP(_x) ((_x) << 11) +#define DXEPCTL_MPS_MASK (0x7ff << 0) +#define DXEPCTL_MPS_SHIFT 0 +#define DXEPCTL_MPS_LIMIT 0x7ff +#define DXEPCTL_MPS(_x) ((_x) << 0) + +#define DIEPINT(_a) HSOTG_REG(0x908 + ((_a) * 0x20)) +#define DOEPINT(_a) HSOTG_REG(0xB08 + ((_a) * 0x20)) +#define DXEPINT_SETUP_RCVD BIT(15) +#define DXEPINT_NYETINTRPT BIT(14) +#define DXEPINT_NAKINTRPT BIT(13) +#define DXEPINT_BBLEERRINTRPT BIT(12) +#define DXEPINT_PKTDRPSTS BIT(11) +#define DXEPINT_BNAINTR BIT(9) +#define DXEPINT_TXFIFOUNDRN BIT(8) +#define DXEPINT_OUTPKTERR BIT(8) +#define DXEPINT_TXFEMP BIT(7) +#define DXEPINT_INEPNAKEFF BIT(6) +#define DXEPINT_BACK2BACKSETUP BIT(6) +#define DXEPINT_INTKNEPMIS BIT(5) +#define DXEPINT_STSPHSERCVD BIT(5) +#define DXEPINT_INTKNTXFEMP BIT(4) +#define DXEPINT_OUTTKNEPDIS BIT(4) +#define DXEPINT_TIMEOUT BIT(3) +#define DXEPINT_SETUP BIT(3) +#define DXEPINT_AHBERR BIT(2) +#define DXEPINT_EPDISBLD BIT(1) +#define DXEPINT_XFERCOMPL BIT(0) + +#define DIEPTSIZ0 HSOTG_REG(0x910) +#define DIEPTSIZ0_PKTCNT_MASK (0x3 << 19) +#define DIEPTSIZ0_PKTCNT_SHIFT 19 +#define DIEPTSIZ0_PKTCNT_LIMIT 0x3 +#define DIEPTSIZ0_PKTCNT(_x) ((_x) << 19) +#define DIEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DIEPTSIZ0_XFERSIZE_SHIFT 0 +#define DIEPTSIZ0_XFERSIZE_LIMIT 0x7f +#define DIEPTSIZ0_XFERSIZE(_x) ((_x) << 0) + +#define DOEPTSIZ0 HSOTG_REG(0xB10) +#define DOEPTSIZ0_SUPCNT_MASK (0x3 << 29) +#define DOEPTSIZ0_SUPCNT_SHIFT 29 +#define DOEPTSIZ0_SUPCNT_LIMIT 0x3 +#define DOEPTSIZ0_SUPCNT(_x) ((_x) << 29) +#define DOEPTSIZ0_PKTCNT BIT(19) +#define DOEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DOEPTSIZ0_XFERSIZE_SHIFT 0 + +#define DIEPTSIZ(_a) HSOTG_REG(0x910 + ((_a) * 0x20)) +#define DOEPTSIZ(_a) HSOTG_REG(0xB10 + ((_a) * 0x20)) +#define DXEPTSIZ_MC_MASK (0x3 << 29) +#define DXEPTSIZ_MC_SHIFT 29 +#define DXEPTSIZ_MC_LIMIT 0x3 +#define DXEPTSIZ_MC(_x) ((_x) << 29) +#define DXEPTSIZ_PKTCNT_MASK (0x3ff << 19) +#define DXEPTSIZ_PKTCNT_SHIFT 19 +#define DXEPTSIZ_PKTCNT_LIMIT 0x3ff +#define DXEPTSIZ_PKTCNT_GET(_v) (((_v) >> 19) & 0x3ff) +#define DXEPTSIZ_PKTCNT(_x) ((_x) << 19) +#define DXEPTSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define DXEPTSIZ_XFERSIZE_SHIFT 0 +#define DXEPTSIZ_XFERSIZE_LIMIT 0x7ffff +#define DXEPTSIZ_XFERSIZE_GET(_v) (((_v) >> 0) & 0x7ffff) +#define DXEPTSIZ_XFERSIZE(_x) ((_x) << 0) + +#define DIEPDMA(_a) HSOTG_REG(0x914 + ((_a) * 0x20)) +#define DOEPDMA(_a) HSOTG_REG(0xB14 + ((_a) * 0x20)) + +#define DTXFSTS(_a) HSOTG_REG(0x918 + ((_a) * 0x20)) + +#define PCGCTL HSOTG_REG(0x0e00) +#define PCGCTL_IF_DEV_MODE BIT(31) +#define PCGCTL_P2HD_PRT_SPD_MASK (0x3 << 29) +#define PCGCTL_P2HD_PRT_SPD_SHIFT 29 +#define PCGCTL_P2HD_DEV_ENUM_SPD_MASK (0x3 << 27) +#define PCGCTL_P2HD_DEV_ENUM_SPD_SHIFT 27 +#define PCGCTL_MAC_DEV_ADDR_MASK (0x7f << 20) +#define PCGCTL_MAC_DEV_ADDR_SHIFT 20 +#define PCGCTL_MAX_TERMSEL BIT(19) +#define PCGCTL_MAX_XCVRSELECT_MASK (0x3 << 17) +#define PCGCTL_MAX_XCVRSELECT_SHIFT 17 +#define PCGCTL_PORT_POWER BIT(16) +#define PCGCTL_PRT_CLK_SEL_MASK (0x3 << 14) +#define PCGCTL_PRT_CLK_SEL_SHIFT 14 +#define PCGCTL_ESS_REG_RESTORED BIT(13) +#define PCGCTL_EXTND_HIBER_SWITCH BIT(12) +#define PCGCTL_EXTND_HIBER_PWRCLMP BIT(11) +#define PCGCTL_ENBL_EXTND_HIBER BIT(10) +#define PCGCTL_RESTOREMODE BIT(9) +#define PCGCTL_RESETAFTSUSP BIT(8) +#define PCGCTL_DEEP_SLEEP BIT(7) +#define PCGCTL_PHY_IN_SLEEP BIT(6) +#define PCGCTL_ENBL_SLEEP_GATING BIT(5) +#define PCGCTL_RSTPDWNMODULE BIT(3) +#define PCGCTL_PWRCLMP BIT(2) +#define PCGCTL_GATEHCLK BIT(1) +#define PCGCTL_STOPPCLK BIT(0) + +#define PCGCCTL1 HSOTG_REG(0xe04) +#define PCGCCTL1_TIMER (0x3 << 1) +#define PCGCCTL1_GATEEN BIT(0) + +#define EPFIFO(_a) HSOTG_REG(0x1000 + ((_a) * 0x1000)) + +/* Host Mode Registers */ + +#define HCFG HSOTG_REG(0x0400) +#define HCFG_MODECHTIMEN BIT(31) +#define HCFG_PERSCHEDENA BIT(26) +#define HCFG_FRLISTEN_MASK (0x3 << 24) +#define HCFG_FRLISTEN_SHIFT 24 +#define HCFG_FRLISTEN_8 (0 << 24) +#define FRLISTEN_8_SIZE 8 +#define HCFG_FRLISTEN_16 BIT(24) +#define FRLISTEN_16_SIZE 16 +#define HCFG_FRLISTEN_32 (2 << 24) +#define FRLISTEN_32_SIZE 32 +#define HCFG_FRLISTEN_64 (3 << 24) +#define FRLISTEN_64_SIZE 64 +#define HCFG_DESCDMA BIT(23) +#define HCFG_RESVALID_MASK (0xff << 8) +#define HCFG_RESVALID_SHIFT 8 +#define HCFG_ENA32KHZ BIT(7) +#define HCFG_FSLSSUPP BIT(2) +#define HCFG_FSLSPCLKSEL_MASK (0x3 << 0) +#define HCFG_FSLSPCLKSEL_SHIFT 0 +#define HCFG_FSLSPCLKSEL_30_60_MHZ 0 +#define HCFG_FSLSPCLKSEL_48_MHZ 1 +#define HCFG_FSLSPCLKSEL_6_MHZ 2 + +#define HFIR HSOTG_REG(0x0404) +#define HFIR_FRINT_MASK (0xffff << 0) +#define HFIR_FRINT_SHIFT 0 +#define HFIR_RLDCTRL BIT(16) + +#define HFNUM HSOTG_REG(0x0408) +#define HFNUM_FRREM_MASK (0xffff << 16) +#define HFNUM_FRREM_SHIFT 16 +#define HFNUM_FRNUM_MASK (0xffff << 0) +#define HFNUM_FRNUM_SHIFT 0 +#define HFNUM_MAX_FRNUM 0x3fff + +#define HPTXSTS HSOTG_REG(0x0410) +#define TXSTS_QTOP_ODD BIT(31) +#define TXSTS_QTOP_CHNEP_MASK (0xf << 27) +#define TXSTS_QTOP_CHNEP_SHIFT 27 +#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) +#define TXSTS_QTOP_TOKEN_SHIFT 25 +#define TXSTS_QTOP_TERMINATE BIT(24) +#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_SHIFT 16 +#define TXSTS_FSPCAVAIL_MASK (0xffff << 0) +#define TXSTS_FSPCAVAIL_SHIFT 0 + +#define HAINT HSOTG_REG(0x0414) +#define HAINTMSK HSOTG_REG(0x0418) +#define HFLBADDR HSOTG_REG(0x041c) + +#define HPRT0 HSOTG_REG(0x0440) +#define HPRT0_SPD_MASK (0x3 << 17) +#define HPRT0_SPD_SHIFT 17 +#define HPRT0_SPD_HIGH_SPEED 0 +#define HPRT0_SPD_FULL_SPEED 1 +#define HPRT0_SPD_LOW_SPEED 2 +#define HPRT0_TSTCTL_MASK (0xf << 13) +#define HPRT0_TSTCTL_SHIFT 13 +#define HPRT0_PWR BIT(12) +#define HPRT0_LNSTS_MASK (0x3 << 10) +#define HPRT0_LNSTS_SHIFT 10 +#define HPRT0_RST BIT(8) +#define HPRT0_SUSP BIT(7) +#define HPRT0_RES BIT(6) +#define HPRT0_OVRCURRCHG BIT(5) +#define HPRT0_OVRCURRACT BIT(4) +#define HPRT0_ENACHG BIT(3) +#define HPRT0_ENA BIT(2) +#define HPRT0_CONNDET BIT(1) +#define HPRT0_CONNSTS BIT(0) + +#define HCCHAR(_ch) HSOTG_REG(0x0500 + 0x20 * (_ch)) +#define HCCHAR_CHENA BIT(31) +#define HCCHAR_CHDIS BIT(30) +#define HCCHAR_ODDFRM BIT(29) +#define HCCHAR_DEVADDR_MASK (0x7f << 22) +#define HCCHAR_DEVADDR_SHIFT 22 +#define HCCHAR_MULTICNT_MASK (0x3 << 20) +#define HCCHAR_MULTICNT_SHIFT 20 +#define HCCHAR_EPTYPE_MASK (0x3 << 18) +#define HCCHAR_EPTYPE_SHIFT 18 +#define HCCHAR_LSPDDEV BIT(17) +#define HCCHAR_EPDIR BIT(15) +#define HCCHAR_EPNUM_MASK (0xf << 11) +#define HCCHAR_EPNUM_SHIFT 11 +#define HCCHAR_MPS_MASK (0x7ff << 0) +#define HCCHAR_MPS_SHIFT 0 + +#define HCSPLT(_ch) HSOTG_REG(0x0504 + 0x20 * (_ch)) +#define HCSPLT_SPLTENA BIT(31) +#define HCSPLT_COMPSPLT BIT(16) +#define HCSPLT_XACTPOS_MASK (0x3 << 14) +#define HCSPLT_XACTPOS_SHIFT 14 +#define HCSPLT_XACTPOS_MID 0 +#define HCSPLT_XACTPOS_END 1 +#define HCSPLT_XACTPOS_BEGIN 2 +#define HCSPLT_XACTPOS_ALL 3 +#define HCSPLT_HUBADDR_MASK (0x7f << 7) +#define HCSPLT_HUBADDR_SHIFT 7 +#define HCSPLT_PRTADDR_MASK (0x7f << 0) +#define HCSPLT_PRTADDR_SHIFT 0 + +#define HCINT(_ch) HSOTG_REG(0x0508 + 0x20 * (_ch)) +#define HCINTMSK(_ch) HSOTG_REG(0x050c + 0x20 * (_ch)) +#define HCINTMSK_RESERVED14_31 (0x3ffff << 14) +#define HCINTMSK_FRM_LIST_ROLL BIT(13) +#define HCINTMSK_XCS_XACT BIT(12) +#define HCINTMSK_BNA BIT(11) +#define HCINTMSK_DATATGLERR BIT(10) +#define HCINTMSK_FRMOVRUN BIT(9) +#define HCINTMSK_BBLERR BIT(8) +#define HCINTMSK_XACTERR BIT(7) +#define HCINTMSK_NYET BIT(6) +#define HCINTMSK_ACK BIT(5) +#define HCINTMSK_NAK BIT(4) +#define HCINTMSK_STALL BIT(3) +#define HCINTMSK_AHBERR BIT(2) +#define HCINTMSK_CHHLTD BIT(1) +#define HCINTMSK_XFERCOMPL BIT(0) + +#define HCTSIZ(_ch) HSOTG_REG(0x0510 + 0x20 * (_ch)) +#define TSIZ_DOPNG BIT(31) +#define TSIZ_SC_MC_PID_MASK (0x3 << 29) +#define TSIZ_SC_MC_PID_SHIFT 29 +#define TSIZ_SC_MC_PID_DATA0 0 +#define TSIZ_SC_MC_PID_DATA2 1 +#define TSIZ_SC_MC_PID_DATA1 2 +#define TSIZ_SC_MC_PID_MDATA 3 +#define TSIZ_SC_MC_PID_SETUP 3 +#define TSIZ_PKTCNT_MASK (0x3ff << 19) +#define TSIZ_PKTCNT_SHIFT 19 +#define TSIZ_NTD_MASK (0xff << 8) +#define TSIZ_NTD_SHIFT 8 +#define TSIZ_SCHINFO_MASK (0xff << 0) +#define TSIZ_SCHINFO_SHIFT 0 +#define TSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define TSIZ_XFERSIZE_SHIFT 0 + +#define HCDMA(_ch) HSOTG_REG(0x0514 + 0x20 * (_ch)) + +#define HCDMAB(_ch) HSOTG_REG(0x051c + 0x20 * (_ch)) + +#define HCFIFO(_ch) HSOTG_REG(0x1000 + 0x1000 * (_ch)) + +/** + * struct dwc2_dma_desc - DMA descriptor structure, + * used for both host and gadget modes + * + * @status: DMA descriptor status quadlet + * @buf: DMA descriptor data buffer pointer + * + * DMA Descriptor structure contains two quadlets: + * Status quadlet and Data buffer pointer. + */ +struct dwc2_dma_desc { + uint32_t status; + uint32_t buf; +} __packed; + +/* Host Mode DMA descriptor status quadlet */ + +#define HOST_DMA_A BIT(31) +#define HOST_DMA_STS_MASK (0x3 << 28) +#define HOST_DMA_STS_SHIFT 28 +#define HOST_DMA_STS_PKTERR BIT(28) +#define HOST_DMA_EOL BIT(26) +#define HOST_DMA_IOC BIT(25) +#define HOST_DMA_SUP BIT(24) +#define HOST_DMA_ALT_QTD BIT(23) +#define HOST_DMA_QTD_OFFSET_MASK (0x3f << 17) +#define HOST_DMA_QTD_OFFSET_SHIFT 17 +#define HOST_DMA_ISOC_NBYTES_MASK (0xfff << 0) +#define HOST_DMA_ISOC_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_MASK (0x1ffff << 0) +#define HOST_DMA_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_LIMIT 131071 + +/* Device Mode DMA descriptor status quadlet */ + +#define DEV_DMA_BUFF_STS_MASK (0x3 << 30) +#define DEV_DMA_BUFF_STS_SHIFT 30 +#define DEV_DMA_BUFF_STS_HREADY 0 +#define DEV_DMA_BUFF_STS_DMABUSY 1 +#define DEV_DMA_BUFF_STS_DMADONE 2 +#define DEV_DMA_BUFF_STS_HBUSY 3 +#define DEV_DMA_STS_MASK (0x3 << 28) +#define DEV_DMA_STS_SHIFT 28 +#define DEV_DMA_STS_SUCC 0 +#define DEV_DMA_STS_BUFF_FLUSH 1 +#define DEV_DMA_STS_BUFF_ERR 3 +#define DEV_DMA_L BIT(27) +#define DEV_DMA_SHORT BIT(26) +#define DEV_DMA_IOC BIT(25) +#define DEV_DMA_SR BIT(24) +#define DEV_DMA_MTRF BIT(23) +#define DEV_DMA_ISOC_PID_MASK (0x3 << 23) +#define DEV_DMA_ISOC_PID_SHIFT 23 +#define DEV_DMA_ISOC_PID_DATA0 0 +#define DEV_DMA_ISOC_PID_DATA2 1 +#define DEV_DMA_ISOC_PID_DATA1 2 +#define DEV_DMA_ISOC_PID_MDATA 3 +#define DEV_DMA_ISOC_FRNUM_MASK (0x7ff << 12) +#define DEV_DMA_ISOC_FRNUM_SHIFT 12 +#define DEV_DMA_ISOC_TX_NBYTES_MASK (0xfff << 0) +#define DEV_DMA_ISOC_TX_NBYTES_LIMIT 0xfff +#define DEV_DMA_ISOC_RX_NBYTES_MASK (0x7ff << 0) +#define DEV_DMA_ISOC_RX_NBYTES_LIMIT 0x7ff +#define DEV_DMA_ISOC_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_MASK (0xffff << 0) +#define DEV_DMA_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_LIMIT 0xffff + +#define MAX_DMA_DESC_NUM_GENERIC 64 +#define MAX_DMA_DESC_NUM_HS_ISOC 256 + +#endif /* __DWC2_HW_H__ */ diff --git a/include/hw/usb/ehci-regs.h b/include/hw/usb/ehci-regs.h new file mode 100644 index 000000000..3e91b8e61 --- /dev/null +++ b/include/hw/usb/ehci-regs.h @@ -0,0 +1,82 @@ +#ifndef HW_USB_EHCI_REGS_H +#define HW_USB_EHCI_REGS_H + +/* Capability Registers Base Address - section 2.2 */ +#define CAPLENGTH 0x0000 /* 1-byte, 0x0001 reserved */ +#define HCIVERSION 0x0002 /* 2-bytes, i/f version # */ +#define HCSPARAMS 0x0004 /* 4-bytes, structural params */ +#define HCCPARAMS 0x0008 /* 4-bytes, capability params */ +#define EECP HCCPARAMS + 1 +#define HCSPPORTROUTE1 0x000c +#define HCSPPORTROUTE2 0x0010 + +#define USBCMD 0x0000 +#define USBCMD_RUNSTOP (1 << 0) // run / Stop +#define USBCMD_HCRESET (1 << 1) // HC Reset +#define USBCMD_FLS (3 << 2) // Frame List Size +#define USBCMD_FLS_SH 2 // Frame List Size Shift +#define USBCMD_PSE (1 << 4) // Periodic Schedule Enable +#define USBCMD_ASE (1 << 5) // Asynch Schedule Enable +#define USBCMD_IAAD (1 << 6) // Int Asynch Advance Doorbell +#define USBCMD_LHCR (1 << 7) // Light Host Controller Reset +#define USBCMD_ASPMC (3 << 8) // Async Sched Park Mode Count +#define USBCMD_ASPME (1 << 11) // Async Sched Park Mode Enable +#define USBCMD_ITC (0x7f << 16) // Int Threshold Control +#define USBCMD_ITC_SH 16 // Int Threshold Control Shift + +#define USBSTS 0x0004 +#define USBSTS_RO_MASK 0x0000003f +#define USBSTS_INT (1 << 0) // USB Interrupt +#define USBSTS_ERRINT (1 << 1) // Error Interrupt +#define USBSTS_PCD (1 << 2) // Port Change Detect +#define USBSTS_FLR (1 << 3) // Frame List Rollover +#define USBSTS_HSE (1 << 4) // Host System Error +#define USBSTS_IAA (1 << 5) // Interrupt on Async Advance +#define USBSTS_HALT (1 << 12) // HC Halted +#define USBSTS_REC (1 << 13) // Reclamation +#define USBSTS_PSS (1 << 14) // Periodic Schedule Status +#define USBSTS_ASS (1 << 15) // Asynchronous Schedule Status + +/* + * Interrupt enable bits correspond to the interrupt active bits in USBSTS + * so no need to redefine here. + */ +#define USBINTR 0x0008 +#define USBINTR_MASK 0x0000003f + +#define FRINDEX 0x000c +#define CTRLDSSEGMENT 0x0010 +#define PERIODICLISTBASE 0x0014 +#define ASYNCLISTADDR 0x0018 +#define ASYNCLISTADDR_MASK 0xffffffe0 + +#define CONFIGFLAG 0x0040 + +/* + * Bits that are reserved or are read-only are masked out of values + * written to us by software + */ +#define PORTSC_RO_MASK 0x007001c0 +#define PORTSC_RWC_MASK 0x0000002a +#define PORTSC_WKOC_E (1 << 22) // Wake on Over Current Enable +#define PORTSC_WKDS_E (1 << 21) // Wake on Disconnect Enable +#define PORTSC_WKCN_E (1 << 20) // Wake on Connect Enable +#define PORTSC_PTC (15 << 16) // Port Test Control +#define PORTSC_PTC_SH 16 // Port Test Control shift +#define PORTSC_PIC (3 << 14) // Port Indicator Control +#define PORTSC_PIC_SH 14 // Port Indicator Control Shift +#define PORTSC_POWNER (1 << 13) // Port Owner +#define PORTSC_PPOWER (1 << 12) // Port Power +#define PORTSC_LINESTAT (3 << 10) // Port Line Status +#define PORTSC_LINESTAT_SH 10 // Port Line Status Shift +#define PORTSC_PRESET (1 << 8) // Port Reset +#define PORTSC_SUSPEND (1 << 7) // Port Suspend +#define PORTSC_FPRES (1 << 6) // Force Port Resume +#define PORTSC_OCC (1 << 5) // Over Current Change +#define PORTSC_OCA (1 << 4) // Over Current Active +#define PORTSC_PEDC (1 << 3) // Port Enable/Disable Change +#define PORTSC_PED (1 << 2) // Port Enable/Disable +#define PORTSC_CSC (1 << 1) // Connect Status Change +#define PORTSC_CONNECT (1 << 0) // Current Connect Status + +#endif /* HW_USB_EHCI_REGS_H */ diff --git a/include/hw/usb/hcd-musb.h b/include/hw/usb/hcd-musb.h new file mode 100644 index 000000000..c874b9f29 --- /dev/null +++ b/include/hw/usb/hcd-musb.h @@ -0,0 +1,47 @@ +/* + * "Inventra" High-speed Dual-Role Controller (MUSB-HDRC), Mentor Graphics, + * USB2.0 OTG compliant core used in various chips. + * + * Only host-mode and non-DMA accesses are currently supported. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_USB_MUSB_H +#define HW_USB_MUSB_H + +enum musb_irq_source_e { + musb_irq_suspend = 0, + musb_irq_resume, + musb_irq_rst_babble, + musb_irq_sof, + musb_irq_connect, + musb_irq_disconnect, + musb_irq_vbus_request, + musb_irq_vbus_error, + musb_irq_rx, + musb_irq_tx, + musb_set_vbus, + musb_set_session, + /* Add new interrupts here */ + musb_irq_max /* total number of interrupts defined */ +}; + +/* TODO convert hcd-musb to QOM/qdev and remove MUSBReadFunc/MUSBWriteFunc */ +typedef void MUSBWriteFunc(void *opaque, hwaddr addr, uint32_t value); +typedef uint32_t MUSBReadFunc(void *opaque, hwaddr addr); +extern MUSBReadFunc * const musb_read[]; +extern MUSBWriteFunc * const musb_write[]; + +typedef struct MUSBState MUSBState; + +MUSBState *musb_init(DeviceState *parent_device, int gpio_base); +void musb_reset(MUSBState *s); +uint32_t musb_core_intr_get(MUSBState *s); +void musb_core_intr_clear(MUSBState *s, uint32_t mask); +void musb_set_size(MUSBState *s, int epnum, int size, int is_tx); + +#endif diff --git a/include/hw/usb/hid.h b/include/hw/usb/hid.h new file mode 100644 index 000000000..1c142584f --- /dev/null +++ b/include/hw/usb/hid.h @@ -0,0 +1,17 @@ +#ifndef HW_USB_HID_H +#define HW_USB_HID_H + +/* HID interface requests */ +#define HID_GET_REPORT 0xa101 +#define HID_GET_IDLE 0xa102 +#define HID_GET_PROTOCOL 0xa103 +#define HID_SET_REPORT 0x2109 +#define HID_SET_IDLE 0x210a +#define HID_SET_PROTOCOL 0x210b + +/* HID descriptor types */ +#define USB_DT_HID 0x21 +#define USB_DT_REPORT 0x22 +#define USB_DT_PHY 0x23 + +#endif diff --git a/include/hw/usb/imx-usb-phy.h b/include/hw/usb/imx-usb-phy.h new file mode 100644 index 000000000..d1e867b77 --- /dev/null +++ b/include/hw/usb/imx-usb-phy.h @@ -0,0 +1,54 @@ +#ifndef IMX_USB_PHY_H +#define IMX_USB_PHY_H + +#include "hw/sysbus.h" +#include "qemu/bitops.h" +#include "qom/object.h" + +enum IMXUsbPhyRegisters { + USBPHY_PWD, + USBPHY_PWD_SET, + USBPHY_PWD_CLR, + USBPHY_PWD_TOG, + USBPHY_TX, + USBPHY_TX_SET, + USBPHY_TX_CLR, + USBPHY_TX_TOG, + USBPHY_RX, + USBPHY_RX_SET, + USBPHY_RX_CLR, + USBPHY_RX_TOG, + USBPHY_CTRL, + USBPHY_CTRL_SET, + USBPHY_CTRL_CLR, + USBPHY_CTRL_TOG, + USBPHY_STATUS, + USBPHY_DEBUG = 0x14, + USBPHY_DEBUG_SET, + USBPHY_DEBUG_CLR, + USBPHY_DEBUG_TOG, + USBPHY_DEBUG0_STATUS, + USBPHY_DEBUG1 = 0x1c, + USBPHY_DEBUG1_SET, + USBPHY_DEBUG1_CLR, + USBPHY_DEBUG1_TOG, + USBPHY_VERSION, + USBPHY_MAX +}; + +#define USBPHY_CTRL_SFTRST BIT(31) + +#define TYPE_IMX_USBPHY "imx.usbphy" +OBJECT_DECLARE_SIMPLE_TYPE(IMXUSBPHYState, IMX_USBPHY) + +struct IMXUSBPHYState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + + uint32_t usbphy[USBPHY_MAX]; +}; + +#endif /* IMX_USB_PHY_H */ diff --git a/include/hw/usb/uhci-regs.h b/include/hw/usb/uhci-regs.h new file mode 100644 index 000000000..fd45d29db --- /dev/null +++ b/include/hw/usb/uhci-regs.h @@ -0,0 +1,40 @@ +#ifndef HW_USB_UHCI_REGS_H +#define HW_USB_UHCI_REGS_H + +#define UHCI_CMD_FGR (1 << 4) +#define UHCI_CMD_EGSM (1 << 3) +#define UHCI_CMD_GRESET (1 << 2) +#define UHCI_CMD_HCRESET (1 << 1) +#define UHCI_CMD_RS (1 << 0) + +#define UHCI_STS_HCHALTED (1 << 5) +#define UHCI_STS_HCPERR (1 << 4) +#define UHCI_STS_HSERR (1 << 3) +#define UHCI_STS_RD (1 << 2) +#define UHCI_STS_USBERR (1 << 1) +#define UHCI_STS_USBINT (1 << 0) + +#define TD_CTRL_SPD (1 << 29) +#define TD_CTRL_ERROR_SHIFT 27 +#define TD_CTRL_IOS (1 << 25) +#define TD_CTRL_IOC (1 << 24) +#define TD_CTRL_ACTIVE (1 << 23) +#define TD_CTRL_STALL (1 << 22) +#define TD_CTRL_BABBLE (1 << 20) +#define TD_CTRL_NAK (1 << 19) +#define TD_CTRL_TIMEOUT (1 << 18) + +#define UHCI_PORT_SUSPEND (1 << 12) +#define UHCI_PORT_RESET (1 << 9) +#define UHCI_PORT_LSDA (1 << 8) +#define UHCI_PORT_RSVD1 (1 << 7) +#define UHCI_PORT_RD (1 << 6) +#define UHCI_PORT_ENC (1 << 3) +#define UHCI_PORT_EN (1 << 2) +#define UHCI_PORT_CSC (1 << 1) +#define UHCI_PORT_CCS (1 << 0) + +#define UHCI_PORT_READ_ONLY (0x1bb) +#define UHCI_PORT_WRITE_CLEAR (UHCI_PORT_CSC | UHCI_PORT_ENC) + +#endif /* HW_USB_UHCI_REGS_H */ diff --git a/include/hw/usb/xhci.h b/include/hw/usb/xhci.h new file mode 100644 index 000000000..5c90e1373 --- /dev/null +++ b/include/hw/usb/xhci.h @@ -0,0 +1,21 @@ +#ifndef HW_USB_XHCI_H +#define HW_USB_XHCI_H + +#define TYPE_XHCI "base-xhci" +#define TYPE_NEC_XHCI "nec-usb-xhci" +#define TYPE_QEMU_XHCI "qemu-xhci" +#define TYPE_XHCI_SYSBUS "sysbus-xhci" + +#define XHCI_MAXPORTS_2 15 +#define XHCI_MAXPORTS_3 15 + +#define XHCI_MAXPORTS (XHCI_MAXPORTS_2 + XHCI_MAXPORTS_3) +#define XHCI_MAXSLOTS 64 +#define XHCI_MAXINTRS 16 + +/* must be power of 2 */ +#define XHCI_LEN_REGS 0x4000 + +void xhci_sysbus_build_aml(Aml *scope, uint32_t mmio, unsigned int irq); + +#endif diff --git a/include/hw/vfio/vfio-amd-xgbe.h b/include/hw/vfio/vfio-amd-xgbe.h new file mode 100644 index 000000000..a894546c0 --- /dev/null +++ b/include/hw/vfio/vfio-amd-xgbe.h @@ -0,0 +1,46 @@ +/* + * VFIO AMD XGBE device + * + * Copyright Linaro Limited, 2015 + * + * Authors: + * Eric Auger + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef HW_VFIO_VFIO_AMD_XGBE_H +#define HW_VFIO_VFIO_AMD_XGBE_H + +#include "hw/vfio/vfio-platform.h" +#include "qom/object.h" + +#define TYPE_VFIO_AMD_XGBE "vfio-amd-xgbe" + +/** + * This device exposes: + * - 5 MMIO regions: MAC, PCS, SerDes Rx/Tx regs, + SerDes Integration Registers 1/2 & 2/2 + * - 2 level sensitive IRQs and optional DMA channel IRQs + */ +struct VFIOAmdXgbeDevice { + VFIOPlatformDevice vdev; +}; + +typedef struct VFIOAmdXgbeDevice VFIOAmdXgbeDevice; + +struct VFIOAmdXgbeDeviceClass { + /*< private >*/ + VFIOPlatformDeviceClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; +}; + +typedef struct VFIOAmdXgbeDeviceClass VFIOAmdXgbeDeviceClass; + +DECLARE_OBJ_CHECKERS(VFIOAmdXgbeDevice, VFIOAmdXgbeDeviceClass, + VFIO_AMD_XGBE_DEVICE, TYPE_VFIO_AMD_XGBE) + +#endif diff --git a/include/hw/vfio/vfio-calxeda-xgmac.h b/include/hw/vfio/vfio-calxeda-xgmac.h new file mode 100644 index 000000000..8482f151d --- /dev/null +++ b/include/hw/vfio/vfio-calxeda-xgmac.h @@ -0,0 +1,43 @@ +/* + * VFIO calxeda xgmac device + * + * Copyright Linaro Limited, 2014 + * + * Authors: + * Eric Auger + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef HW_VFIO_VFIO_CALXEDA_XGMAC_H +#define HW_VFIO_VFIO_CALXEDA_XGMAC_H + +#include "hw/vfio/vfio-platform.h" +#include "qom/object.h" + +#define TYPE_VFIO_CALXEDA_XGMAC "vfio-calxeda-xgmac" + +/** + * This device exposes: + * - a single MMIO region corresponding to its register space + * - 3 IRQS (main and 2 power related IRQs) + */ +struct VFIOCalxedaXgmacDevice { + VFIOPlatformDevice vdev; +}; +typedef struct VFIOCalxedaXgmacDevice VFIOCalxedaXgmacDevice; + +struct VFIOCalxedaXgmacDeviceClass { + /*< private >*/ + VFIOPlatformDeviceClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; +}; +typedef struct VFIOCalxedaXgmacDeviceClass VFIOCalxedaXgmacDeviceClass; + +DECLARE_OBJ_CHECKERS(VFIOCalxedaXgmacDevice, VFIOCalxedaXgmacDeviceClass, + VFIO_CALXEDA_XGMAC_DEVICE, TYPE_VFIO_CALXEDA_XGMAC) + +#endif diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h new file mode 100644 index 000000000..6141162d7 --- /dev/null +++ b/include/hw/vfio/vfio-common.h @@ -0,0 +1,235 @@ +/* + * common header for vfio based device assignment support + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#ifndef HW_VFIO_VFIO_COMMON_H +#define HW_VFIO_VFIO_COMMON_H + +#include "exec/memory.h" +#include "qemu/queue.h" +#include "qemu/notify.h" +#include "ui/console.h" +#include "hw/display/ramfb.h" +#ifdef CONFIG_LINUX +#include +#endif +#include "sysemu/sysemu.h" + +#define VFIO_MSG_PREFIX "vfio %s: " + +enum { + VFIO_DEVICE_TYPE_PCI = 0, + VFIO_DEVICE_TYPE_PLATFORM = 1, + VFIO_DEVICE_TYPE_CCW = 2, + VFIO_DEVICE_TYPE_AP = 3, +}; + +typedef struct VFIOMmap { + MemoryRegion mem; + void *mmap; + off_t offset; + size_t size; +} VFIOMmap; + +typedef struct VFIORegion { + struct VFIODevice *vbasedev; + off_t fd_offset; /* offset of region within device fd */ + MemoryRegion *mem; /* slow, read/write access */ + size_t size; + uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint32_t nr_mmaps; + VFIOMmap *mmaps; + uint8_t nr; /* cache the region number for debug */ +} VFIORegion; + +typedef struct VFIOMigration { + struct VFIODevice *vbasedev; + VMChangeStateEntry *vm_state; + VFIORegion region; + uint32_t device_state; + int vm_running; + Notifier migration_state; + uint64_t pending_bytes; +} VFIOMigration; + +typedef struct VFIOAddressSpace { + AddressSpace *as; + QLIST_HEAD(, VFIOContainer) containers; + QLIST_ENTRY(VFIOAddressSpace) list; +} VFIOAddressSpace; + +struct VFIOGroup; + +typedef struct VFIOContainer { + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; + MemoryListener prereg_listener; + unsigned iommu_type; + Error *error; + bool initialized; + bool dirty_pages_supported; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_ENTRY(VFIOContainer) next; +} VFIOContainer; + +typedef struct VFIOGuestIOMMU { + VFIOContainer *container; + IOMMUMemoryRegion *iommu; + hwaddr iommu_offset; + IOMMUNotifier n; + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +} VFIOGuestIOMMU; + +typedef struct VFIOHostDMAWindow { + hwaddr min_iova; + hwaddr max_iova; + uint64_t iova_pgsizes; + QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; +} VFIOHostDMAWindow; + +typedef struct VFIODeviceOps VFIODeviceOps; + +typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) next; + struct VFIOGroup *group; + char *sysfsdev; + char *name; + DeviceState *dev; + int fd; + int type; + bool reset_works; + bool needs_reset; + bool no_mmap; + bool ram_block_discard_allowed; + bool enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; + unsigned int flags; + VFIOMigration *migration; + Error *migration_blocker; + OnOffAuto pre_copy_dirty_page_tracking; +} VFIODevice; + +struct VFIODeviceOps { + void (*vfio_compute_needs_reset)(VFIODevice *vdev); + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); + Object *(*vfio_get_object)(VFIODevice *vdev); + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); + int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); +}; + +typedef struct VFIOGroup { + int fd; + int groupid; + VFIOContainer *container; + QLIST_HEAD(, VFIODevice) device_list; + QLIST_ENTRY(VFIOGroup) next; + QLIST_ENTRY(VFIOGroup) container_next; + bool ram_block_discard_allowed; +} VFIOGroup; + +typedef struct VFIODMABuf { + QemuDmaBuf buf; + uint32_t pos_x, pos_y, pos_updates; + uint32_t hot_x, hot_y, hot_updates; + int dmabuf_id; + QTAILQ_ENTRY(VFIODMABuf) next; +} VFIODMABuf; + +typedef struct VFIODisplay { + QemuConsole *con; + RAMFBState *ramfb; + struct vfio_region_info *edid_info; + struct vfio_region_gfx_edid *edid_regs; + uint8_t *edid_blob; + QEMUTimer *edid_link_timer; + struct { + VFIORegion buffer; + DisplaySurface *surface; + } region; + struct { + QTAILQ_HEAD(, VFIODMABuf) bufs; + VFIODMABuf *primary; + VFIODMABuf *cursor; + } dmabuf; +} VFIODisplay; + +void vfio_put_base_device(VFIODevice *vbasedev); +void vfio_disable_irqindex(VFIODevice *vbasedev, int index); +void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); +void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index); +int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, + int action, int fd, Error **errp); +void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size); +uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size); +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); +int vfio_region_mmap(VFIORegion *region); +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); +void vfio_region_unmap(VFIORegion *region); +void vfio_region_exit(VFIORegion *region); +void vfio_region_finalize(VFIORegion *region); +void vfio_reset_handler(void *opaque); +VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); +void vfio_put_group(VFIOGroup *group); +int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp); + +extern const MemoryRegionOps vfio_region_ops; +typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; +extern VFIOGroupList vfio_group_list; + +bool vfio_mig_active(void); +int64_t vfio_mig_bytes_transferred(void); + +#ifdef CONFIG_LINUX +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + uint32_t subtype, struct vfio_region_info **info); +bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); +struct vfio_info_cap_header * +vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id); +bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, + unsigned int *avail); +struct vfio_info_cap_header * +vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); +#endif +extern const MemoryListener vfio_prereg_listener; + +int vfio_spapr_create_window(VFIOContainer *container, + MemoryRegionSection *section, + hwaddr *pgsize); +int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + +int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); +void vfio_migration_finalize(VFIODevice *vbasedev); + +#endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h new file mode 100644 index 000000000..c414c3dff --- /dev/null +++ b/include/hw/vfio/vfio-platform.h @@ -0,0 +1,76 @@ +/* + * vfio based device assignment support - platform devices + * + * Copyright Linaro Limited, 2014 + * + * Authors: + * Kim Phillips + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on vfio based PCI device assignment support: + * Copyright Red Hat, Inc. 2012 + */ + +#ifndef HW_VFIO_VFIO_PLATFORM_H +#define HW_VFIO_VFIO_PLATFORM_H + +#include "hw/sysbus.h" +#include "hw/vfio/vfio-common.h" +#include "qemu/event_notifier.h" +#include "qemu/queue.h" +#include "qom/object.h" + +#define TYPE_VFIO_PLATFORM "vfio-platform" + +enum { + VFIO_IRQ_INACTIVE = 0, + VFIO_IRQ_PENDING = 1, + VFIO_IRQ_ACTIVE = 2, + /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */ +}; + +typedef struct VFIOINTp { + QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */ + QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */ + EventNotifier *interrupt; /* eventfd triggered on interrupt */ + EventNotifier *unmask; /* eventfd for unmask on QEMU bypass */ + qemu_irq qemuirq; + struct VFIOPlatformDevice *vdev; /* back pointer to device */ + int state; /* inactive, pending, active */ + uint8_t pin; /* index */ + uint32_t flags; /* IRQ info flags */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ +} VFIOINTp; + +/* function type for user side eventfd handler */ +typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp); + +struct VFIOPlatformDevice { + SysBusDevice sbdev; + VFIODevice vbasedev; /* not a QOM object */ + VFIORegion **regions; + QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQs */ + /* queue of pending IRQs */ + QSIMPLEQ_HEAD(, VFIOINTp) pending_intp_queue; + char *compat; /* DT compatible values, separated by NUL */ + unsigned int num_compat; /* number of compatible values */ + uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ + QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */ + QemuMutex intp_mutex; /* protect the intp_list IRQ state */ + bool irqfd_allowed; /* debug option to force irqfd on/off */ +}; +typedef struct VFIOPlatformDevice VFIOPlatformDevice; + +struct VFIOPlatformDeviceClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ +}; +typedef struct VFIOPlatformDeviceClass VFIOPlatformDeviceClass; + +DECLARE_OBJ_CHECKERS(VFIOPlatformDevice, VFIOPlatformDeviceClass, + VFIO_PLATFORM_DEVICE, TYPE_VFIO_PLATFORM) + +#endif /* HW_VFIO_VFIO_PLATFORM_H */ diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h new file mode 100644 index 000000000..86248f543 --- /dev/null +++ b/include/hw/vfio/vfio.h @@ -0,0 +1,7 @@ +#ifndef HW_VFIO_H +#define HW_VFIO_H + +bool vfio_eeh_as_ok(AddressSpace *as); +int vfio_eeh_as_op(AddressSpace *as, uint32_t op); + +#endif diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h new file mode 100644 index 000000000..8a6f8e2a7 --- /dev/null +++ b/include/hw/virtio/vhost-backend.h @@ -0,0 +1,194 @@ +/* + * vhost-backend + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_BACKEND_H +#define VHOST_BACKEND_H + +#include "exec/memory.h" + +typedef enum VhostBackendType { + VHOST_BACKEND_TYPE_NONE = 0, + VHOST_BACKEND_TYPE_KERNEL = 1, + VHOST_BACKEND_TYPE_USER = 2, + VHOST_BACKEND_TYPE_VDPA = 3, + VHOST_BACKEND_TYPE_MAX = 4, +} VhostBackendType; + +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + +struct vhost_inflight; +struct vhost_dev; +struct vhost_log; +struct vhost_memory; +struct vhost_vring_file; +struct vhost_vring_state; +struct vhost_vring_addr; +struct vhost_scsi_target; +struct vhost_iotlb_msg; +struct vhost_virtqueue; + +typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); +typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); +typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); + +typedef int (*vhost_net_set_backend_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_net_set_mtu_op)(struct vhost_dev *dev, uint16_t mtu); +typedef int (*vhost_scsi_set_endpoint_op)(struct vhost_dev *dev, + struct vhost_scsi_target *target); +typedef int (*vhost_scsi_clear_endpoint_op)(struct vhost_dev *dev, + struct vhost_scsi_target *target); +typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev, + int *version); +typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log); +typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev, + struct vhost_memory *mem); +typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev, + struct vhost_vring_addr *addr); +typedef int (*vhost_set_vring_endian_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_num_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_base_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_get_vring_base_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_kick_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_set_vring_call_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_set_vring_busyloop_timeout_op)(struct vhost_dev *dev, + struct vhost_vring_state *r); +typedef int (*vhost_set_features_op)(struct vhost_dev *dev, + uint64_t features); +typedef int (*vhost_get_features_op)(struct vhost_dev *dev, + uint64_t *features); +typedef int (*vhost_set_backend_cap_op)(struct vhost_dev *dev); +typedef int (*vhost_set_owner_op)(struct vhost_dev *dev); +typedef int (*vhost_reset_device_op)(struct vhost_dev *dev); +typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx); +typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev, + int enable); +typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); +typedef int (*vhost_migration_done_op)(struct vhost_dev *dev, + char *mac_addr); +typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, + uint64_t start1, uint64_t size1, + uint64_t start2, uint64_t size2); +typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, + uint64_t guest_cid); +typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); +typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, + int enabled); +typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); +typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); +typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); + +typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev, + void *session_info, + uint64_t *session_id); +typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev, + uint64_t session_id); + +typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev, + MemoryRegionSection *section); + +typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, + uint16_t queue_size, + struct vhost_inflight *inflight); + +typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, + struct vhost_inflight *inflight); + +typedef int (*vhost_dev_start_op)(struct vhost_dev *dev, bool started); + +typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev, + struct vhost_vring_addr *addr, + struct vhost_virtqueue *vq); + +typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); + +typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); + +typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; + vhost_backend_cleanup vhost_backend_cleanup; + vhost_backend_memslots_limit vhost_backend_memslots_limit; + vhost_net_set_backend_op vhost_net_set_backend; + vhost_net_set_mtu_op vhost_net_set_mtu; + vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint; + vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint; + vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version; + vhost_set_log_base_op vhost_set_log_base; + vhost_set_mem_table_op vhost_set_mem_table; + vhost_set_vring_addr_op vhost_set_vring_addr; + vhost_set_vring_endian_op vhost_set_vring_endian; + vhost_set_vring_num_op vhost_set_vring_num; + vhost_set_vring_base_op vhost_set_vring_base; + vhost_get_vring_base_op vhost_get_vring_base; + vhost_set_vring_kick_op vhost_set_vring_kick; + vhost_set_vring_call_op vhost_set_vring_call; + vhost_set_vring_busyloop_timeout_op vhost_set_vring_busyloop_timeout; + vhost_set_features_op vhost_set_features; + vhost_get_features_op vhost_get_features; + vhost_set_backend_cap_op vhost_set_backend_cap; + vhost_set_owner_op vhost_set_owner; + vhost_reset_device_op vhost_reset_device; + vhost_get_vq_index_op vhost_get_vq_index; + vhost_set_vring_enable_op vhost_set_vring_enable; + vhost_requires_shm_log_op vhost_requires_shm_log; + vhost_migration_done_op vhost_migration_done; + vhost_backend_can_merge_op vhost_backend_can_merge; + vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; + vhost_vsock_set_running_op vhost_vsock_set_running; + vhost_set_iotlb_callback_op vhost_set_iotlb_callback; + vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg; + vhost_get_config_op vhost_get_config; + vhost_set_config_op vhost_set_config; + vhost_crypto_create_session_op vhost_crypto_create_session; + vhost_crypto_close_session_op vhost_crypto_close_session; + vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; + vhost_get_inflight_fd_op vhost_get_inflight_fd; + vhost_set_inflight_fd_op vhost_set_inflight_fd; + vhost_dev_start_op vhost_dev_start; + vhost_vq_get_addr_op vhost_vq_get_addr; + vhost_get_device_id_op vhost_get_device_id; + vhost_force_iommu_op vhost_force_iommu; +} VhostOps; + +extern const VhostOps user_ops; +extern const VhostOps vdpa_ops; + +int vhost_set_backend_type(struct vhost_dev *dev, + VhostBackendType backend_type); + +int vhost_backend_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm); + +int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len); + +int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); + +int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); + +#endif /* VHOST_BACKEND_H */ diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h new file mode 100644 index 000000000..18f115527 --- /dev/null +++ b/include/hw/virtio/vhost-scsi-common.h @@ -0,0 +1,50 @@ +/* + * vhost_scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_SCSI_COMMON_H +#define VHOST_SCSI_COMMON_H + +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/fw-path-provider.h" +#include "qom/object.h" + +#define TYPE_VHOST_SCSI_COMMON "vhost-scsi-common" +OBJECT_DECLARE_SIMPLE_TYPE(VHostSCSICommon, VHOST_SCSI_COMMON) + +struct VHostSCSICommon { + VirtIOSCSICommon parent_obj; + + Error *migration_blocker; + + struct vhost_dev dev; + const int *feature_bits; + int32_t bootindex; + int channel; + int target; + int lun; + uint64_t host_features; + bool migratable; + + struct vhost_inflight *inflight; +}; + +int vhost_scsi_common_start(VHostSCSICommon *vsc); +void vhost_scsi_common_stop(VHostSCSICommon *vsc); +char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev); +void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config); +uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp); + +#endif /* VHOST_SCSI_COMMON_H */ diff --git a/include/hw/virtio/vhost-scsi.h b/include/hw/virtio/vhost-scsi.h new file mode 100644 index 000000000..7dc2bdd69 --- /dev/null +++ b/include/hw/virtio/vhost-scsi.h @@ -0,0 +1,35 @@ +/* + * vhost_scsi host device + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_SCSI_H +#define VHOST_SCSI_H + +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-scsi-common.h" +#include "qom/object.h" + +enum vhost_scsi_vq_list { + VHOST_SCSI_VQ_CONTROL = 0, + VHOST_SCSI_VQ_EVENT = 1, + VHOST_SCSI_VQ_NUM_FIXED = 2, +}; + +#define TYPE_VHOST_SCSI "vhost-scsi" +OBJECT_DECLARE_SIMPLE_TYPE(VHostSCSI, VHOST_SCSI) + +struct VHostSCSI { + VHostSCSICommon parent_obj; +}; + +#endif diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h new file mode 100644 index 000000000..7c91f1504 --- /dev/null +++ b/include/hw/virtio/vhost-user-blk.h @@ -0,0 +1,56 @@ +/* + * vhost-user-blk host device + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu + * + * Based on vhost-scsi.h, Copyright IBM, Corp. 2011 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_BLK_H +#define VHOST_USER_BLK_H + +#include "standard-headers/linux/virtio_blk.h" +#include "hw/block/block.h" +#include "chardev/char-fe.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "qom/object.h" + +#define TYPE_VHOST_USER_BLK "vhost-user-blk" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserBlk, VHOST_USER_BLK) + +#define VHOST_USER_BLK_AUTO_NUM_QUEUES UINT16_MAX + +struct VHostUserBlk { + VirtIODevice parent_obj; + CharBackend chardev; + int32_t bootindex; + struct virtio_blk_config blkcfg; + uint16_t num_queues; + uint32_t queue_size; + uint32_t config_wce; + struct vhost_dev dev; + struct vhost_inflight *inflight; + VhostUserState vhost_user; + struct vhost_virtqueue *vhost_vqs; + VirtQueue **virtqs; + + /* + * There are at least two steps of initialization of the + * vhost-user device. The first is a "connect" step and + * second is a "start" step. Make a separation between + * those initialization phases by using two fields. + */ + /* vhost_user_blk_connect/vhost_user_blk_disconnect */ + bool connected; + /* vhost_user_blk_start/vhost_user_blk_stop */ + bool started_vu; +}; + +#endif diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h new file mode 100644 index 000000000..698575277 --- /dev/null +++ b/include/hw/virtio/vhost-user-fs.h @@ -0,0 +1,46 @@ +/* + * Vhost-user filesystem virtio device + * + * Copyright 2018-2019 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_USER_FS_H +#define _QEMU_VHOST_USER_FS_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" +#include "qom/object.h" + +#define TYPE_VHOST_USER_FS "vhost-user-fs-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS) + +typedef struct { + CharBackend chardev; + char *tag; + uint16_t num_request_queues; + uint16_t queue_size; +} VHostUserFSConf; + +struct VHostUserFS { + /*< private >*/ + VirtIODevice parent; + VHostUserFSConf conf; + struct vhost_virtqueue *vhost_vqs; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue **req_vqs; + VirtQueue *hiprio_vq; + + /*< public >*/ +}; + +#endif /* _QEMU_VHOST_USER_FS_H */ diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h new file mode 100644 index 000000000..521b08e55 --- /dev/null +++ b/include/hw/virtio/vhost-user-scsi.h @@ -0,0 +1,34 @@ +/* + * vhost-user-scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi + * + * This file is largely based on "vhost-scsi.h" by: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_SCSI_H +#define VHOST_USER_SCSI_H + +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "hw/virtio/vhost-scsi-common.h" +#include "qom/object.h" + +#define TYPE_VHOST_USER_SCSI "vhost-user-scsi" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI) + +struct VHostUserSCSI { + VHostSCSICommon parent_obj; + VhostUserState vhost_user; +}; + +#endif /* VHOST_USER_SCSI_H */ diff --git a/include/hw/virtio/vhost-user-vsock.h b/include/hw/virtio/vhost-user-vsock.h new file mode 100644 index 000000000..4cfd55824 --- /dev/null +++ b/include/hw/virtio/vhost-user-vsock.h @@ -0,0 +1,36 @@ +/* + * Vhost-user vsock virtio device + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_USER_VSOCK_H +#define _QEMU_VHOST_USER_VSOCK_H + +#include "hw/virtio/vhost-vsock-common.h" +#include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_vsock.h" +#include "qom/object.h" + +#define TYPE_VHOST_USER_VSOCK "vhost-user-vsock-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserVSock, VHOST_USER_VSOCK) + +typedef struct { + CharBackend chardev; +} VHostUserVSockConf; + +struct VHostUserVSock { + /*< private >*/ + VHostVSockCommon parent; + VhostUserState vhost_user; + VHostUserVSockConf conf; + struct virtio_vsock_config vsockcfg; + + /*< public >*/ +}; + +#endif /* _QEMU_VHOST_USER_VSOCK_H */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h new file mode 100644 index 000000000..a9abca328 --- /dev/null +++ b/include/hw/virtio/vhost-user.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017-2018 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_VHOST_USER_H +#define HW_VIRTIO_VHOST_USER_H + +#include "chardev/char-fe.h" +#include "hw/virtio/virtio.h" + +typedef struct VhostUserHostNotifier { + MemoryRegion mr; + void *addr; + bool set; +} VhostUserHostNotifier; + +typedef struct VhostUserState { + CharBackend *chr; + VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX]; + int memory_slots; +} VhostUserState; + +bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp); +void vhost_user_cleanup(VhostUserState *user); + +#endif diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h new file mode 100644 index 000000000..9b81a409d --- /dev/null +++ b/include/hw/virtio/vhost-vdpa.h @@ -0,0 +1,27 @@ +/* + * vhost-vdpa.h + * + * Copyright(c) 2017-2018 Intel Corporation. + * Copyright(c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_VIRTIO_VHOST_VDPA_H +#define HW_VIRTIO_VHOST_VDPA_H + +#include "hw/virtio/virtio.h" + +typedef struct vhost_vdpa { + int device_fd; + uint32_t msg_type; + MemoryListener listener; + struct vhost_dev *dev; +} VhostVDPA; + +extern AddressSpace address_space_memory; +extern int vhost_vdpa_get_device_id(struct vhost_dev *dev, + uint32_t *device_id); +#endif diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h new file mode 100644 index 000000000..e412b5ee9 --- /dev/null +++ b/include/hw/virtio/vhost-vsock-common.h @@ -0,0 +1,47 @@ +/* + * Parent class for vhost-vsock devices + * + * Copyright 2015-2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_VSOCK_COMMON_H +#define _QEMU_VHOST_VSOCK_COMMON_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "qom/object.h" + +#define TYPE_VHOST_VSOCK_COMMON "vhost-vsock-common" +OBJECT_DECLARE_SIMPLE_TYPE(VHostVSockCommon, VHOST_VSOCK_COMMON) + +enum { + VHOST_VSOCK_SAVEVM_VERSION = 0, + + VHOST_VSOCK_QUEUE_SIZE = 128, +}; + +struct VHostVSockCommon { + VirtIODevice parent; + + struct vhost_virtqueue vhost_vqs[2]; + struct vhost_dev vhost_dev; + + VirtQueue *event_vq; + VirtQueue *recv_vq; + VirtQueue *trans_vq; + + QEMUTimer *post_load_timer; +}; + +int vhost_vsock_common_start(VirtIODevice *vdev); +void vhost_vsock_common_stop(VirtIODevice *vdev); +int vhost_vsock_common_pre_save(void *opaque); +int vhost_vsock_common_post_load(void *opaque, int version_id); +void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name); +void vhost_vsock_common_unrealize(VirtIODevice *vdev); + +#endif /* _QEMU_VHOST_VSOCK_COMMON_H */ diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h new file mode 100644 index 000000000..84f4e727c --- /dev/null +++ b/include/hw/virtio/vhost-vsock.h @@ -0,0 +1,36 @@ +/* + * Vhost vsock virtio device + * + * Copyright 2015 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QEMU_VHOST_VSOCK_H +#define QEMU_VHOST_VSOCK_H + +#include "hw/virtio/vhost-vsock-common.h" +#include "qom/object.h" + +#define TYPE_VHOST_VSOCK "vhost-vsock-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostVSock, VHOST_VSOCK) + +typedef struct { + uint64_t guest_cid; + char *vhostfd; +} VHostVSockConf; + +struct VHostVSock { + /*< private >*/ + VHostVSockCommon parent; + VHostVSockConf conf; + + /*< public >*/ +}; + +#endif /* QEMU_VHOST_VSOCK_H */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h new file mode 100644 index 000000000..4a8bc7541 --- /dev/null +++ b/include/hw/virtio/vhost.h @@ -0,0 +1,149 @@ +#ifndef VHOST_H +#define VHOST_H + +#include "hw/virtio/vhost-backend.h" +#include "hw/virtio/virtio.h" +#include "exec/memory.h" + +/* Generic structures common for any vhost based device. */ + +struct vhost_inflight { + int fd; + void *addr; + uint64_t size; + uint64_t offset; + uint16_t queue_size; +}; + +struct vhost_virtqueue { + int kick; + int call; + void *desc; + void *avail; + void *used; + int num; + unsigned long long desc_phys; + unsigned desc_size; + unsigned long long avail_phys; + unsigned avail_size; + unsigned long long used_phys; + unsigned used_size; + EventNotifier masked_notifier; + struct vhost_dev *dev; +}; + +typedef unsigned long vhost_log_chunk_t; +#define VHOST_LOG_PAGE 0x1000 +#define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) +#define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) +#define VHOST_INVALID_FEATURE_BIT (0xff) + +struct vhost_log { + unsigned long long size; + int refcnt; + int fd; + vhost_log_chunk_t *log; +}; + +struct vhost_dev; +struct vhost_iommu { + struct vhost_dev *hdev; + MemoryRegion *mr; + hwaddr iommu_offset; + IOMMUNotifier n; + QLIST_ENTRY(vhost_iommu) iommu_next; +}; + +typedef struct VhostDevConfigOps { + /* Vhost device config space changed callback + */ + int (*vhost_dev_config_notifier)(struct vhost_dev *dev); +} VhostDevConfigOps; + +struct vhost_memory; +struct vhost_dev { + VirtIODevice *vdev; + MemoryListener memory_listener; + MemoryListener iommu_listener; + struct vhost_memory *mem; + int n_mem_sections; + MemoryRegionSection *mem_sections; + int n_tmp_sections; + MemoryRegionSection *tmp_sections; + struct vhost_virtqueue *vqs; + int nvqs; + /* the first virtqueue which would be used by this vhost dev */ + int vq_index; + uint64_t features; + uint64_t acked_features; + uint64_t backend_features; + uint64_t protocol_features; + uint64_t max_queues; + uint64_t backend_cap; + bool started; + bool log_enabled; + uint64_t log_size; + Error *migration_blocker; + const VhostOps *vhost_ops; + void *opaque; + struct vhost_log *log; + QLIST_ENTRY(vhost_dev) entry; + QLIST_HEAD(, vhost_iommu) iommu_list; + IOMMUNotifier n; + const VhostDevConfigOps *config_ops; +}; + +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + int backend; + NetClientState *nc; +}; + +int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, + uint32_t busyloop_timeout); +void vhost_dev_cleanup(struct vhost_dev *hdev); +int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); +void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); +int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + +/* Test and clear masked event pending status. + * Should be called after unmask to avoid losing events. + */ +bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); + +/* Mask/unmask events from this vq. + */ +void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + bool mask); +uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + uint64_t features); +void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, + uint64_t features); +bool vhost_has_free_slot(void); + +int vhost_net_set_backend(struct vhost_dev *hdev, + struct vhost_vring_file *file); + +int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); +int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); +int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); +/* notifier callback in case vhost device config space changed + */ +void vhost_dev_set_config_notifier(struct vhost_dev *dev, + const VhostDevConfigOps *ops); + +void vhost_dev_reset_inflight(struct vhost_inflight *inflight); +void vhost_dev_free_inflight(struct vhost_inflight *inflight); +void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f); +int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f); +int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev); +int vhost_dev_set_inflight(struct vhost_dev *dev, + struct vhost_inflight *inflight); +int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight); +#endif diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h new file mode 100644 index 000000000..6818a23a2 --- /dev/null +++ b/include/hw/virtio/virtio-access.h @@ -0,0 +1,243 @@ +/* + * Virtio Accessor Support: In case your target can change endian. + * + * Copyright IBM, Corp. 2013 + * + * Authors: + * Rusty Russell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + */ + +#ifndef QEMU_VIRTIO_ACCESS_H +#define QEMU_VIRTIO_ACCESS_H + +#include "exec/hwaddr.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" + +#if defined(TARGET_PPC64) || defined(TARGET_ARM) +#define LEGACY_VIRTIO_IS_BIENDIAN 1 +#endif + +static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) +{ +#if defined(LEGACY_VIRTIO_IS_BIENDIAN) + return virtio_is_big_endian(vdev); +#elif defined(TARGET_WORDS_BIGENDIAN) + if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + /* Devices conforming to VIRTIO 1.0 or later are always LE. */ + return false; + } + return true; +#else + return false; +#endif +} + +static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) +{ + AddressSpace *dma_as = vdev->dma_as; + + if (virtio_access_is_big_endian(vdev)) { + return lduw_be_phys(dma_as, pa); + } + return lduw_le_phys(dma_as, pa); +} + +static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) +{ + AddressSpace *dma_as = vdev->dma_as; + + if (virtio_access_is_big_endian(vdev)) { + return ldl_be_phys(dma_as, pa); + } + return ldl_le_phys(dma_as, pa); +} + +static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) +{ + AddressSpace *dma_as = vdev->dma_as; + + if (virtio_access_is_big_endian(vdev)) { + return ldq_be_phys(dma_as, pa); + } + return ldq_le_phys(dma_as, pa); +} + +static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, + uint16_t value) +{ + AddressSpace *dma_as = vdev->dma_as; + + if (virtio_access_is_big_endian(vdev)) { + stw_be_phys(dma_as, pa, value); + } else { + stw_le_phys(dma_as, pa, value); + } +} + +static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, + uint32_t value) +{ + AddressSpace *dma_as = vdev->dma_as; + + if (virtio_access_is_big_endian(vdev)) { + stl_be_phys(dma_as, pa, value); + } else { + stl_le_phys(dma_as, pa, value); + } +} + +static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) +{ + if (virtio_access_is_big_endian(vdev)) { + stw_be_p(ptr, v); + } else { + stw_le_p(ptr, v); + } +} + +static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) +{ + if (virtio_access_is_big_endian(vdev)) { + stl_be_p(ptr, v); + } else { + stl_le_p(ptr, v); + } +} + +static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) +{ + if (virtio_access_is_big_endian(vdev)) { + stq_be_p(ptr, v); + } else { + stq_le_p(ptr, v); + } +} + +static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) +{ + if (virtio_access_is_big_endian(vdev)) { + return lduw_be_p(ptr); + } else { + return lduw_le_p(ptr); + } +} + +static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) +{ + if (virtio_access_is_big_endian(vdev)) { + return ldl_be_p(ptr); + } else { + return ldl_le_p(ptr); + } +} + +static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) +{ + if (virtio_access_is_big_endian(vdev)) { + return ldq_be_p(ptr); + } else { + return ldq_le_p(ptr); + } +} + +static inline uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) +{ +#ifdef HOST_WORDS_BIGENDIAN + return virtio_access_is_big_endian(vdev) ? s : bswap16(s); +#else + return virtio_access_is_big_endian(vdev) ? bswap16(s) : s; +#endif +} + +static inline uint16_t virtio_lduw_phys_cached(VirtIODevice *vdev, + MemoryRegionCache *cache, + hwaddr pa) +{ + if (virtio_access_is_big_endian(vdev)) { + return lduw_be_phys_cached(cache, pa); + } + return lduw_le_phys_cached(cache, pa); +} + +static inline uint32_t virtio_ldl_phys_cached(VirtIODevice *vdev, + MemoryRegionCache *cache, + hwaddr pa) +{ + if (virtio_access_is_big_endian(vdev)) { + return ldl_be_phys_cached(cache, pa); + } + return ldl_le_phys_cached(cache, pa); +} + +static inline uint64_t virtio_ldq_phys_cached(VirtIODevice *vdev, + MemoryRegionCache *cache, + hwaddr pa) +{ + if (virtio_access_is_big_endian(vdev)) { + return ldq_be_phys_cached(cache, pa); + } + return ldq_le_phys_cached(cache, pa); +} + +static inline void virtio_stw_phys_cached(VirtIODevice *vdev, + MemoryRegionCache *cache, + hwaddr pa, uint16_t value) +{ + if (virtio_access_is_big_endian(vdev)) { + stw_be_phys_cached(cache, pa, value); + } else { + stw_le_phys_cached(cache, pa, value); + } +} + +static inline void virtio_stl_phys_cached(VirtIODevice *vdev, + MemoryRegionCache *cache, + hwaddr pa, uint32_t value) +{ + if (virtio_access_is_big_endian(vdev)) { + stl_be_phys_cached(cache, pa, value); + } else { + stl_le_phys_cached(cache, pa, value); + } +} + +static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) +{ + *s = virtio_tswap16(vdev, *s); +} + +static inline uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) +{ +#ifdef HOST_WORDS_BIGENDIAN + return virtio_access_is_big_endian(vdev) ? s : bswap32(s); +#else + return virtio_access_is_big_endian(vdev) ? bswap32(s) : s; +#endif +} + +static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) +{ + *s = virtio_tswap32(vdev, *s); +} + +static inline uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) +{ +#ifdef HOST_WORDS_BIGENDIAN + return virtio_access_is_big_endian(vdev) ? s : bswap64(s); +#else + return virtio_access_is_big_endian(vdev) ? bswap64(s) : s; +#endif +} + +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) +{ + *s = virtio_tswap64(vdev, *s); +} +#endif /* QEMU_VIRTIO_ACCESS_H */ diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h new file mode 100644 index 000000000..5139cf8ab --- /dev/null +++ b/include/hw/virtio/virtio-balloon.h @@ -0,0 +1,76 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori + * Rusty Russell + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_BALLOON_H +#define QEMU_VIRTIO_BALLOON_H + +#include "standard-headers/linux/virtio_balloon.h" +#include "hw/virtio/virtio.h" +#include "sysemu/iothread.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_BALLOON "virtio-balloon-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBalloon, VIRTIO_BALLOON) + +#define VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN 0x80000000 + +typedef struct virtio_balloon_stat VirtIOBalloonStat; + +typedef struct virtio_balloon_stat_modern { + uint16_t tag; + uint8_t reserved[6]; + uint64_t val; +} VirtIOBalloonStatModern; + +enum virtio_balloon_free_page_hint_status { + FREE_PAGE_HINT_S_STOP = 0, + FREE_PAGE_HINT_S_REQUESTED = 1, + FREE_PAGE_HINT_S_START = 2, + FREE_PAGE_HINT_S_DONE = 3, +}; + +struct VirtIOBalloon { + VirtIODevice parent_obj; + VirtQueue *ivq, *dvq, *svq, *free_page_vq, *reporting_vq; + uint32_t free_page_hint_status; + uint32_t num_pages; + uint32_t actual; + uint32_t free_page_hint_cmd_id; + uint64_t stats[VIRTIO_BALLOON_S_NR]; + VirtQueueElement *stats_vq_elem; + size_t stats_vq_offset; + QEMUTimer *stats_timer; + IOThread *iothread; + QEMUBH *free_page_bh; + /* + * Lock to synchronize threads to access the free page reporting related + * fields (e.g. free_page_hint_status). + */ + QemuMutex free_page_lock; + QemuCond free_page_cond; + /* + * Set to block iothread to continue reading free page hints as the VM is + * stopped. + */ + bool block_iothread; + NotifierWithReturn free_page_hint_notify; + int64_t stats_last_update; + int64_t stats_poll_interval; + uint32_t host_features; + + bool qemu_4_0_config_size; + uint32_t poison_val; +}; + +#endif diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h new file mode 100644 index 000000000..214ab7482 --- /dev/null +++ b/include/hw/virtio/virtio-blk.h @@ -0,0 +1,95 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_BLK_H +#define QEMU_VIRTIO_BLK_H + +#include "standard-headers/linux/virtio_blk.h" +#include "hw/virtio/virtio.h" +#include "hw/block/block.h" +#include "sysemu/iothread.h" +#include "sysemu/block-backend.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_BLK "virtio-blk-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK) + +/* This is the last element of the write scatter-gather list */ +struct virtio_blk_inhdr +{ + unsigned char status; +}; + +#define VIRTIO_BLK_AUTO_NUM_QUEUES UINT16_MAX + +struct VirtIOBlkConf +{ + BlockConf conf; + IOThread *iothread; + char *serial; + uint32_t request_merging; + uint16_t num_queues; + uint16_t queue_size; + bool seg_max_adjust; + uint32_t max_discard_sectors; + uint32_t max_write_zeroes_sectors; + bool x_enable_wce_if_config_wce; +}; + +struct VirtIOBlockDataPlane; + +struct VirtIOBlockReq; +struct VirtIOBlock { + VirtIODevice parent_obj; + BlockBackend *blk; + void *rq; + QEMUBH *bh; + VirtIOBlkConf conf; + unsigned short sector_mask; + bool original_wce; + VMChangeStateEntry *change; + bool dataplane_disabled; + bool dataplane_started; + struct VirtIOBlockDataPlane *dataplane; + uint64_t host_features; + size_t config_size; +}; + +typedef struct VirtIOBlockReq { + VirtQueueElement elem; + int64_t sector_num; + VirtIOBlock *dev; + VirtQueue *vq; + IOVDiscardUndo inhdr_undo; + IOVDiscardUndo outhdr_undo; + struct virtio_blk_inhdr *in; + struct virtio_blk_outhdr out; + QEMUIOVector qiov; + size_t in_len; + struct VirtIOBlockReq *next; + struct VirtIOBlockReq *mr_next; + BlockAcctCookie acct; +} VirtIOBlockReq; + +#define VIRTIO_BLK_MAX_MERGE_REQS 32 + +typedef struct MultiReqBuffer { + VirtIOBlockReq *reqs[VIRTIO_BLK_MAX_MERGE_REQS]; + unsigned int num_reqs; + bool is_write; +} MultiReqBuffer; + +bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); + +#endif diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h new file mode 100644 index 000000000..ef8abe49c --- /dev/null +++ b/include/hw/virtio/virtio-bus.h @@ -0,0 +1,158 @@ +/* + * VirtioBus + * + * Copyright (C) 2012 : GreenSocs Ltd + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef VIRTIO_BUS_H +#define VIRTIO_BUS_H + +#include "hw/qdev-core.h" +#include "hw/virtio/virtio.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_BUS "virtio-bus" +typedef struct VirtioBusClass VirtioBusClass; +typedef struct VirtioBusState VirtioBusState; +DECLARE_OBJ_CHECKERS(VirtioBusState, VirtioBusClass, + VIRTIO_BUS, TYPE_VIRTIO_BUS) + + +struct VirtioBusClass { + /* This is what a VirtioBus must implement */ + BusClass parent; + void (*notify)(DeviceState *d, uint16_t vector); + void (*save_config)(DeviceState *d, QEMUFile *f); + void (*save_queue)(DeviceState *d, int n, QEMUFile *f); + void (*save_extra_state)(DeviceState *d, QEMUFile *f); + int (*load_config)(DeviceState *d, QEMUFile *f); + int (*load_queue)(DeviceState *d, int n, QEMUFile *f); + int (*load_done)(DeviceState *d, QEMUFile *f); + int (*load_extra_state)(DeviceState *d, QEMUFile *f); + bool (*has_extra_state)(DeviceState *d); + bool (*query_guest_notifiers)(DeviceState *d); + int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign); + int (*set_host_notifier_mr)(DeviceState *d, int n, + MemoryRegion *mr, bool assign); + void (*vmstate_change)(DeviceState *d, bool running); + /* + * Expose the features the transport layer supports before + * the negotiation takes place. + */ + void (*pre_plugged)(DeviceState *d, Error **errp); + /* + * transport independent init function. + * This is called by virtio-bus just after the device is plugged. + */ + void (*device_plugged)(DeviceState *d, Error **errp); + /* + * transport independent exit function. + * This is called by virtio-bus just before the device is unplugged. + */ + void (*device_unplugged)(DeviceState *d); + int (*query_nvectors)(DeviceState *d); + /* + * ioeventfd handling: if the transport implements ioeventfd_assign, + * it must implement ioeventfd_enabled as well. + */ + /* Returns true if the ioeventfd is enabled for the device. */ + bool (*ioeventfd_enabled)(DeviceState *d); + /* + * Assigns/deassigns the ioeventfd backing for the transport on + * the device for queue number n. Returns an error value on + * failure. + */ + int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier, + int n, bool assign); + /* + * Whether queue number n is enabled. + */ + bool (*queue_enabled)(DeviceState *d, int n); + /* + * Does the transport have variable vring alignment? + * (ie can it ever call virtio_queue_set_align()?) + * Note that changing this will break migration for this transport. + */ + bool has_variable_vring_alignment; + AddressSpace *(*get_dma_as)(DeviceState *d); +}; + +struct VirtioBusState { + BusState parent_obj; + + /* + * Set if ioeventfd has been started. + */ + bool ioeventfd_started; + + /* + * Set if ioeventfd has been grabbed by vhost. When ioeventfd + * is grabbed by vhost, we track its started/stopped state (which + * depends in turn on the virtio status register), but do not + * register a handler for the ioeventfd. When ioeventfd is + * released, if ioeventfd_started is true we finally register + * the handler so that QEMU's device model can use ioeventfd. + */ + int ioeventfd_grabbed; +}; + +void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp); +void virtio_bus_reset(VirtioBusState *bus); +void virtio_bus_device_unplugged(VirtIODevice *bus); +/* Get the device id of the plugged device. */ +uint16_t virtio_bus_get_vdev_id(VirtioBusState *bus); +/* Get the config_len field of the plugged device. */ +size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus); +/* Get bad features of the plugged device. */ +uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus); +/* Get config of the plugged device. */ +void virtio_bus_get_vdev_config(VirtioBusState *bus, uint8_t *config); +/* Set config of the plugged device. */ +void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config); + +static inline VirtIODevice *virtio_bus_get_device(VirtioBusState *bus) +{ + BusState *qbus = &bus->parent_obj; + BusChild *kid = QTAILQ_FIRST(&qbus->children); + DeviceState *qdev = kid ? kid->child : NULL; + + /* This is used on the data path, the cast is guaranteed + * to succeed by the qdev machinery. + */ + return (VirtIODevice *)qdev; +} + +/* Return whether the proxy allows ioeventfd. */ +bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus); +/* Start the ioeventfd. */ +int virtio_bus_start_ioeventfd(VirtioBusState *bus); +/* Stop the ioeventfd. */ +void virtio_bus_stop_ioeventfd(VirtioBusState *bus); +/* Tell the bus that vhost is grabbing the ioeventfd. */ +int virtio_bus_grab_ioeventfd(VirtioBusState *bus); +/* bus that vhost is not using the ioeventfd anymore. */ +void virtio_bus_release_ioeventfd(VirtioBusState *bus); +/* Switch from/to the generic ioeventfd handler */ +int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign); +/* Tell the bus that the ioeventfd handler is no longer required. */ +void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n); + +#endif /* VIRTIO_BUS_H */ diff --git a/include/hw/virtio/virtio-crypto.h b/include/hw/virtio/virtio-crypto.h new file mode 100644 index 000000000..a2228d7b2 --- /dev/null +++ b/include/hw/virtio/virtio-crypto.h @@ -0,0 +1,102 @@ +/* + * Virtio crypto Support + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QEMU_VIRTIO_CRYPTO_H +#define QEMU_VIRTIO_CRYPTO_H + +#include "standard-headers/linux/virtio_crypto.h" +#include "hw/virtio/virtio.h" +#include "sysemu/iothread.h" +#include "sysemu/cryptodev.h" +#include "qom/object.h" + + +#define DEBUG_VIRTIO_CRYPTO 0 + +#define DPRINTF(fmt, ...) \ +do { \ + if (DEBUG_VIRTIO_CRYPTO) { \ + fprintf(stderr, "virtio_crypto: " fmt, ##__VA_ARGS__); \ + } \ +} while (0) + + +#define TYPE_VIRTIO_CRYPTO "virtio-crypto-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOCrypto, VIRTIO_CRYPTO) +#define VIRTIO_CRYPTO_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_CRYPTO) + + +typedef struct VirtIOCryptoConf { + CryptoDevBackend *cryptodev; + + /* Supported service mask */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +} VirtIOCryptoConf; + +struct VirtIOCrypto; + +typedef struct VirtIOCryptoReq { + VirtQueueElement elem; + /* flags of operation, such as type of algorithm */ + uint32_t flags; + struct virtio_crypto_inhdr *in; + struct iovec *in_iov; /* Head address of dest iovec */ + unsigned int in_num; /* Number of dest iovec */ + size_t in_len; + VirtQueue *vq; + struct VirtIOCrypto *vcrypto; + union { + CryptoDevBackendSymOpInfo *sym_op_info; + } u; +} VirtIOCryptoReq; + +typedef struct VirtIOCryptoQueue { + VirtQueue *dataq; + QEMUBH *dataq_bh; + struct VirtIOCrypto *vcrypto; +} VirtIOCryptoQueue; + +struct VirtIOCrypto { + VirtIODevice parent_obj; + + VirtQueue *ctrl_vq; + VirtIOCryptoQueue *vqs; + VirtIOCryptoConf conf; + CryptoDevBackend *cryptodev; + + uint32_t max_queues; + uint32_t status; + + int multiqueue; + uint32_t curr_queues; + size_t config_size; + uint8_t vhost_started; +}; + +#endif /* QEMU_VIRTIO_CRYPTO_H */ diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h new file mode 100644 index 000000000..203f9e171 --- /dev/null +++ b/include/hw/virtio/virtio-gpu-bswap.h @@ -0,0 +1,62 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_GPU_BSWAP_H +#define HW_VIRTIO_GPU_BSWAP_H + +#include "qemu/bswap.h" +#include "standard-headers/linux/virtio_gpu.h" + +static inline void +virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) +{ + le32_to_cpus(&hdr->type); + le32_to_cpus(&hdr->flags); + le64_to_cpus(&hdr->fence_id); + le32_to_cpus(&hdr->ctx_id); + le32_to_cpus(&hdr->padding); +} + +static inline void +virtio_gpu_bswap_32(void *ptr, size_t size) +{ +#ifdef HOST_WORDS_BIGENDIAN + + size_t i; + struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr; + + virtio_gpu_ctrl_hdr_bswap(hdr); + + i = sizeof(struct virtio_gpu_ctrl_hdr); + while (i < size) { + le32_to_cpus((uint32_t *)(ptr + i)); + i = i + sizeof(uint32_t); + } + +#endif +} + +static inline void +virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d) +{ + virtio_gpu_ctrl_hdr_bswap(&t2d->hdr); + le32_to_cpus(&t2d->r.x); + le32_to_cpus(&t2d->r.y); + le32_to_cpus(&t2d->r.width); + le32_to_cpus(&t2d->r.height); + le64_to_cpus(&t2d->offset); + le32_to_cpus(&t2d->resource_id); + le32_to_cpus(&t2d->padding); +} + +#endif diff --git a/include/hw/virtio/virtio-gpu-pci.h b/include/hw/virtio/virtio-gpu-pci.h new file mode 100644 index 000000000..225cbbc2e --- /dev/null +++ b/include/hw/virtio/virtio-gpu-pci.h @@ -0,0 +1,39 @@ +/* + * Virtio GPU PCI Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_GPU_PCI_H +#define HW_VIRTIO_GPU_PCI_H + +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-gpu.h" +#include "qom/object.h" + + +/* + * virtio-gpu-pci-base: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_GPU_PCI_BASE "virtio-gpu-pci-base" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUPCIBase, VIRTIO_GPU_PCI_BASE) + +struct VirtIOGPUPCIBase { + VirtIOPCIProxy parent_obj; + VirtIOGPUBase *vgpu; +}; + +/* to share between PCI and VGA */ +#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state) \ + DEFINE_PROP_BIT("ioeventfd", _state, flags, \ + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \ + DEFINE_PROP_UINT32("vectors", _state, nvectors, 3) + +#endif /* HW_VIRTIO_GPU_PCI_H */ diff --git a/include/hw/virtio/virtio-gpu-pixman.h b/include/hw/virtio/virtio-gpu-pixman.h new file mode 100644 index 000000000..4dba78275 --- /dev/null +++ b/include/hw/virtio/virtio-gpu-pixman.h @@ -0,0 +1,45 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_GPU_PIXMAN_H +#define HW_VIRTIO_GPU_PIXMAN_H + +#include "ui/qemu-pixman.h" +#include "standard-headers/linux/virtio_gpu.h" + +static inline pixman_format_code_t +virtio_gpu_get_pixman_format(uint32_t virtio_gpu_format) +{ + switch (virtio_gpu_format) { + case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: + return PIXMAN_BE_b8g8r8x8; + case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: + return PIXMAN_BE_b8g8r8a8; + case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: + return PIXMAN_BE_x8r8g8b8; + case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: + return PIXMAN_BE_a8r8g8b8; + case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: + return PIXMAN_BE_r8g8b8x8; + case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: + return PIXMAN_BE_r8g8b8a8; + case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: + return PIXMAN_BE_x8b8g8r8; + case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: + return PIXMAN_BE_a8b8g8r8; + default: + return 0; + } +} + +#endif diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h new file mode 100644 index 000000000..1aed7275c --- /dev/null +++ b/include/hw/virtio/virtio-gpu.h @@ -0,0 +1,221 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_GPU_H +#define HW_VIRTIO_GPU_H + +#include "qemu/queue.h" +#include "ui/qemu-pixman.h" +#include "ui/console.h" +#include "hw/virtio/virtio.h" +#include "qemu/log.h" +#include "sysemu/vhost-user-backend.h" + +#include "standard-headers/linux/virtio_gpu.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base" +OBJECT_DECLARE_TYPE(VirtIOGPUBase, VirtIOGPUBaseClass, + VIRTIO_GPU_BASE) + +#define TYPE_VIRTIO_GPU "virtio-gpu-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPU, VIRTIO_GPU) + +#define TYPE_VHOST_USER_GPU "vhost-user-gpu" +OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU) + +#define VIRTIO_ID_GPU 16 + +struct virtio_gpu_simple_resource { + uint32_t resource_id; + uint32_t width; + uint32_t height; + uint32_t format; + uint64_t *addrs; + struct iovec *iov; + unsigned int iov_cnt; + uint32_t scanout_bitmask; + pixman_image_t *image; + uint64_t hostmem; + QTAILQ_ENTRY(virtio_gpu_simple_resource) next; +}; + +struct virtio_gpu_scanout { + QemuConsole *con; + DisplaySurface *ds; + uint32_t width, height; + int x, y; + int invalidate; + uint32_t resource_id; + struct virtio_gpu_update_cursor cursor; + QEMUCursor *current_cursor; +}; + +struct virtio_gpu_requested_state { + uint16_t width_mm, height_mm; + uint32_t width, height; + int x, y; +}; + +enum virtio_gpu_base_conf_flags { + VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1, + VIRTIO_GPU_FLAG_STATS_ENABLED, + VIRTIO_GPU_FLAG_EDID_ENABLED, +}; + +#define virtio_gpu_virgl_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED)) +#define virtio_gpu_stats_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_STATS_ENABLED)) +#define virtio_gpu_edid_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED)) + +struct virtio_gpu_base_conf { + uint32_t max_outputs; + uint32_t flags; + uint32_t xres; + uint32_t yres; +}; + +struct virtio_gpu_ctrl_command { + VirtQueueElement elem; + VirtQueue *vq; + struct virtio_gpu_ctrl_hdr cmd_hdr; + uint32_t error; + bool finished; + QTAILQ_ENTRY(virtio_gpu_ctrl_command) next; +}; + +struct VirtIOGPUBase { + VirtIODevice parent_obj; + + Error *migration_blocker; + + struct virtio_gpu_base_conf conf; + struct virtio_gpu_config virtio_config; + const GraphicHwOps *hw_ops; + + bool use_virgl_renderer; + int renderer_blocked; + int enable; + + struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS]; + + int enabled_output_bitmask; + struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS]; +}; + +struct VirtIOGPUBaseClass { + VirtioDeviceClass parent; + + void (*gl_unblock)(VirtIOGPUBase *g); +}; + +#define VIRTIO_GPU_BASE_PROPERTIES(_state, _conf) \ + DEFINE_PROP_UINT32("max_outputs", _state, _conf.max_outputs, 1), \ + DEFINE_PROP_BIT("edid", _state, _conf.flags, \ + VIRTIO_GPU_FLAG_EDID_ENABLED, true), \ + DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1024), \ + DEFINE_PROP_UINT32("yres", _state, _conf.yres, 768) + +struct VirtIOGPU { + VirtIOGPUBase parent_obj; + + uint64_t conf_max_hostmem; + + VirtQueue *ctrl_vq; + VirtQueue *cursor_vq; + + QEMUBH *ctrl_bh; + QEMUBH *cursor_bh; + + QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist; + QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq; + QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq; + + uint64_t hostmem; + + bool renderer_inited; + bool renderer_reset; + QEMUTimer *fence_poll; + QEMUTimer *print_stats; + + uint32_t inflight; + struct { + uint32_t max_inflight; + uint32_t requests; + uint32_t req_3d; + uint32_t bytes_3d; + } stats; +}; + +struct VhostUserGPU { + VirtIOGPUBase parent_obj; + + VhostUserBackend *vhost; + int vhost_gpu_fd; /* closed by the chardev */ + CharBackend vhost_chr; + QemuDmaBuf dmabuf[VIRTIO_GPU_MAX_SCANOUTS]; + bool backend_blocked; +}; + +#define VIRTIO_GPU_FILL_CMD(out) do { \ + size_t s; \ + s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ + &out, sizeof(out)); \ + if (s != sizeof(out)) { \ + qemu_log_mask(LOG_GUEST_ERROR, \ + "%s: command size incorrect %zu vs %zu\n", \ + __func__, s, sizeof(out)); \ + return; \ + } \ + } while (0) + +/* virtio-gpu-base.c */ +bool virtio_gpu_base_device_realize(DeviceState *qdev, + VirtIOHandleOutput ctrl_cb, + VirtIOHandleOutput cursor_cb, + Error **errp); +void virtio_gpu_base_reset(VirtIOGPUBase *g); +void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g, + struct virtio_gpu_resp_display_info *dpy_info); + +/* virtio-gpu.c */ +void virtio_gpu_ctrl_response(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd, + struct virtio_gpu_ctrl_hdr *resp, + size_t resp_len); +void virtio_gpu_ctrl_response_nodata(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd, + enum virtio_gpu_ctrl_type type); +void virtio_gpu_get_display_info(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd); +void virtio_gpu_get_edid(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd); +int virtio_gpu_create_mapping_iov(VirtIOGPU *g, + struct virtio_gpu_resource_attach_backing *ab, + struct virtio_gpu_ctrl_command *cmd, + uint64_t **addr, struct iovec **iov); +void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g, + struct iovec *iov, uint32_t count); +void virtio_gpu_process_cmdq(VirtIOGPU *g); + +/* virtio-gpu-3d.c */ +void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd); +void virtio_gpu_virgl_fence_poll(VirtIOGPU *g); +void virtio_gpu_virgl_reset(VirtIOGPU *g); +int virtio_gpu_virgl_init(VirtIOGPU *g); +int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g); + +#endif diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h new file mode 100644 index 000000000..f2da63d30 --- /dev/null +++ b/include/hw/virtio/virtio-input.h @@ -0,0 +1,115 @@ +#ifndef QEMU_VIRTIO_INPUT_H +#define QEMU_VIRTIO_INPUT_H + +#include "ui/input.h" +#include "sysemu/vhost-user-backend.h" + +/* ----------------------------------------------------------------- */ +/* virtio input protocol */ + +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_input.h" +#include "qom/object.h" + +typedef struct virtio_input_absinfo virtio_input_absinfo; +typedef struct virtio_input_config virtio_input_config; +typedef struct virtio_input_event virtio_input_event; + +/* ----------------------------------------------------------------- */ +/* qemu internals */ + +#define TYPE_VIRTIO_INPUT "virtio-input-device" +OBJECT_DECLARE_TYPE(VirtIOInput, VirtIOInputClass, + VIRTIO_INPUT) +#define VIRTIO_INPUT_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT) + +#define TYPE_VIRTIO_INPUT_HID "virtio-input-hid-device" +#define TYPE_VIRTIO_KEYBOARD "virtio-keyboard-device" +#define TYPE_VIRTIO_MOUSE "virtio-mouse-device" +#define TYPE_VIRTIO_TABLET "virtio-tablet-device" + +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputHID, VIRTIO_INPUT_HID) +#define VIRTIO_INPUT_HID_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HID) + +#define TYPE_VIRTIO_INPUT_HOST "virtio-input-host-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputHost, VIRTIO_INPUT_HOST) +#define VIRTIO_INPUT_HOST_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HOST) + +#define TYPE_VHOST_USER_INPUT "vhost-user-input" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserInput, VHOST_USER_INPUT) +#define VHOST_USER_INPUT_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VHOST_USER_INPUT) + +typedef struct VirtIOInputConfig VirtIOInputConfig; + +struct VirtIOInputConfig { + virtio_input_config config; + QTAILQ_ENTRY(VirtIOInputConfig) node; +}; + +struct VirtIOInput { + VirtIODevice parent_obj; + uint8_t cfg_select; + uint8_t cfg_subsel; + uint32_t cfg_size; + QTAILQ_HEAD(, VirtIOInputConfig) cfg_list; + VirtQueue *evt, *sts; + char *serial; + + struct { + virtio_input_event event; + VirtQueueElement *elem; + } *queue; + uint32_t qindex, qsize; + + bool active; +}; + +struct VirtIOInputClass { + /*< private >*/ + VirtioDeviceClass parent; + /*< public >*/ + + DeviceRealize realize; + DeviceUnrealize unrealize; + void (*change_active)(VirtIOInput *vinput); + void (*handle_status)(VirtIOInput *vinput, virtio_input_event *event); +}; + +struct VirtIOInputHID { + VirtIOInput parent_obj; + char *display; + uint32_t head; + QemuInputHandler *handler; + QemuInputHandlerState *hs; + int ledstate; + bool wheel_axis; +}; + +struct VirtIOInputHost { + VirtIOInput parent_obj; + char *evdev; + int fd; +}; + +struct VHostUserInput { + VirtIOInput parent_obj; + + VhostUserBackend *vhost; +}; + +void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event); +void virtio_input_init_config(VirtIOInput *vinput, + virtio_input_config *config); +virtio_input_config *virtio_input_find_config(VirtIOInput *vinput, + uint8_t select, + uint8_t subsel); +void virtio_input_add_config(VirtIOInput *vinput, + virtio_input_config *config); +void virtio_input_idstr_config(VirtIOInput *vinput, + uint8_t select, const char *string); + +#endif /* QEMU_VIRTIO_INPUT_H */ diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h new file mode 100644 index 000000000..273e35c04 --- /dev/null +++ b/include/hw/virtio/virtio-iommu.h @@ -0,0 +1,63 @@ +/* + * virtio-iommu device + * + * Copyright (c) 2020 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + */ + +#ifndef QEMU_VIRTIO_IOMMU_H +#define QEMU_VIRTIO_IOMMU_H + +#include "standard-headers/linux/virtio_iommu.h" +#include "hw/virtio/virtio.h" +#include "hw/pci/pci.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_IOMMU "virtio-iommu-device" +#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOIOMMU, VIRTIO_IOMMU) + +#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region" + +typedef struct IOMMUDevice { + void *viommu; + PCIBus *bus; + int devfn; + IOMMUMemoryRegion iommu_mr; + AddressSpace as; +} IOMMUDevice; + +typedef struct IOMMUPciBus { + PCIBus *bus; + IOMMUDevice *pbdev[]; /* Parent array is sparse, so dynamically alloc */ +} IOMMUPciBus; + +struct VirtIOIOMMU { + VirtIODevice parent_obj; + VirtQueue *req_vq; + VirtQueue *event_vq; + struct virtio_iommu_config config; + uint64_t features; + GHashTable *as_by_busptr; + IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; + PCIBus *primary_bus; + ReservedRegion *reserved_regions; + uint32_t nb_reserved_regions; + GTree *domains; + QemuMutex mutex; + GTree *endpoints; +}; + +#endif diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h new file mode 100644 index 000000000..4eeb82d5d --- /dev/null +++ b/include/hw/virtio/virtio-mem.h @@ -0,0 +1,83 @@ +/* + * Virtio MEM device + * + * Copyright (C) 2020 Red Hat, Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_MEM_H +#define HW_VIRTIO_MEM_H + +#include "standard-headers/linux/virtio_mem.h" +#include "hw/virtio/virtio.h" +#include "qapi/qapi-types-misc.h" +#include "sysemu/hostmem.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_MEM "virtio-mem" + +OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass, + VIRTIO_MEM) + +#define VIRTIO_MEM_MEMDEV_PROP "memdev" +#define VIRTIO_MEM_NODE_PROP "node" +#define VIRTIO_MEM_SIZE_PROP "size" +#define VIRTIO_MEM_REQUESTED_SIZE_PROP "requested-size" +#define VIRTIO_MEM_BLOCK_SIZE_PROP "block-size" +#define VIRTIO_MEM_ADDR_PROP "memaddr" + +struct VirtIOMEM { + VirtIODevice parent_obj; + + /* guest -> host request queue */ + VirtQueue *vq; + + /* bitmap used to track unplugged memory */ + int32_t bitmap_size; + unsigned long *bitmap; + + /* assigned memory backend and memory region */ + HostMemoryBackend *memdev; + + /* NUMA node */ + uint32_t node; + + /* assigned address of the region in guest physical memory */ + uint64_t addr; + + /* usable region size (<= region_size) */ + uint64_t usable_region_size; + + /* actual size (how much the guest plugged) */ + uint64_t size; + + /* requested size */ + uint64_t requested_size; + + /* block size and alignment */ + uint64_t block_size; + + /* notifiers to notify when "size" changes */ + NotifierList size_change_notifiers; + + /* don't migrate unplugged memory */ + NotifierWithReturn precopy_notifier; +}; + +struct VirtIOMEMClass { + /* private */ + VirtIODevice parent; + + /* public */ + void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); + MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); + void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); + void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); +}; + +#endif diff --git a/include/hw/virtio/virtio-mmio.h b/include/hw/virtio/virtio-mmio.h new file mode 100644 index 000000000..d4c4c386a --- /dev/null +++ b/include/hw/virtio/virtio-mmio.h @@ -0,0 +1,70 @@ +/* + * Virtio MMIO bindings + * + * Copyright (c) 2011 Linaro Limited + * + * Author: + * Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_VIRTIO_MMIO_H +#define HW_VIRTIO_MMIO_H + +#include "hw/virtio/virtio-bus.h" +#include "qom/object.h" + +/* QOM macros */ +/* virtio-mmio-bus */ +#define TYPE_VIRTIO_MMIO_BUS "virtio-mmio-bus" +/* This is reusing the VirtioBusState typedef from TYPE_VIRTIO_BUS */ +DECLARE_OBJ_CHECKERS(VirtioBusState, VirtioBusClass, + VIRTIO_MMIO_BUS, TYPE_VIRTIO_MMIO_BUS) + +/* virtio-mmio */ +#define TYPE_VIRTIO_MMIO "virtio-mmio" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOMMIOProxy, VIRTIO_MMIO) + +#define VIRT_MAGIC 0x74726976 /* 'virt' */ +#define VIRT_VERSION 2 +#define VIRT_VERSION_LEGACY 1 +#define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ + +typedef struct VirtIOMMIOQueue { + uint16_t num; + bool enabled; + uint32_t desc[2]; + uint32_t avail[2]; + uint32_t used[2]; +} VirtIOMMIOQueue; + +struct VirtIOMMIOProxy { + /* Generic */ + SysBusDevice parent_obj; + MemoryRegion iomem; + qemu_irq irq; + bool legacy; + /* Guest accessible state needing migration and reset */ + uint32_t host_features_sel; + uint32_t guest_features_sel; + uint32_t guest_page_shift; + /* virtio-bus */ + VirtioBusState bus; + bool format_transport_address; + /* Fields only used for non-legacy (v2) devices */ + uint32_t guest_features[2]; + VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX]; +}; + +#endif diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h new file mode 100644 index 000000000..f4852ac27 --- /dev/null +++ b/include/hw/virtio/virtio-net.h @@ -0,0 +1,222 @@ +/* + * Virtio Network Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_NET_H +#define QEMU_VIRTIO_NET_H + +#include "qemu/units.h" +#include "standard-headers/linux/virtio_net.h" +#include "hw/virtio/virtio.h" +#include "net/announce.h" +#include "qemu/option_int.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_NET "virtio-net-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) + +#define TX_TIMER_INTERVAL 150000 /* 150 us */ + +/* Limit the number of packets that can be sent via a single flush + * of the TX queue. This gives us a guaranteed exit condition and + * ensures fairness in the io path. 256 conveniently matches the + * length of the TX queue and shows a good balance of performance + * and latency. */ +#define TX_BURST 256 + +typedef struct virtio_net_conf +{ + uint32_t txtimer; + int32_t txburst; + char *tx; + uint16_t rx_queue_size; + uint16_t tx_queue_size; + uint16_t mtu; + int32_t speed; + char *duplex_str; + uint8_t duplex; + char *primary_id_str; +} virtio_net_conf; + +/* Coalesced packets type & status */ +typedef enum { + RSC_COALESCE, /* Data been coalesced */ + RSC_FINAL, /* Will terminate current connection */ + RSC_NO_MATCH, /* No matched in the buffer pool */ + RSC_BYPASS, /* Packet to be bypass, not tcp, tcp ctrl, etc */ + RSC_CANDIDATE /* Data want to be coalesced */ +} CoalesceStatus; + +typedef struct VirtioNetRscStat { + uint32_t received; + uint32_t coalesced; + uint32_t over_size; + uint32_t cache; + uint32_t empty_cache; + uint32_t no_match_cache; + uint32_t win_update; + uint32_t no_match; + uint32_t tcp_syn; + uint32_t tcp_ctrl_drain; + uint32_t dup_ack; + uint32_t dup_ack1; + uint32_t dup_ack2; + uint32_t pure_ack; + uint32_t ack_out_of_win; + uint32_t data_out_of_win; + uint32_t data_out_of_order; + uint32_t data_after_pure_ack; + uint32_t bypass_not_tcp; + uint32_t tcp_option; + uint32_t tcp_all_opt; + uint32_t ip_frag; + uint32_t ip_ecn; + uint32_t ip_hacked; + uint32_t ip_option; + uint32_t purge_failed; + uint32_t drain_failed; + uint32_t final_failed; + int64_t timer; +} VirtioNetRscStat; + +/* Rsc unit general info used to checking if can coalescing */ +typedef struct VirtioNetRscUnit { + void *ip; /* ip header */ + uint16_t *ip_plen; /* data len pointer in ip header field */ + struct tcp_header *tcp; /* tcp header */ + uint16_t tcp_hdrlen; /* tcp header len */ + uint16_t payload; /* pure payload without virtio/eth/ip/tcp */ +} VirtioNetRscUnit; + +/* Coalesced segment */ +typedef struct VirtioNetRscSeg { + QTAILQ_ENTRY(VirtioNetRscSeg) next; + void *buf; + size_t size; + uint16_t packets; + uint16_t dup_ack; + bool is_coalesced; /* need recal ipv4 header checksum, mark here */ + VirtioNetRscUnit unit; + NetClientState *nc; +} VirtioNetRscSeg; + + +/* Chain is divided by protocol(ipv4/v6) and NetClientInfo */ +typedef struct VirtioNetRscChain { + QTAILQ_ENTRY(VirtioNetRscChain) next; + VirtIONet *n; /* VirtIONet */ + uint16_t proto; + uint8_t gso_type; + uint16_t max_payload; + QEMUTimer *drain_timer; + QTAILQ_HEAD(, VirtioNetRscSeg) buffers; + VirtioNetRscStat stat; +} VirtioNetRscChain; + +/* Maximum packet size we can receive from tap device: header + 64k */ +#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB)) + +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 + +typedef struct VirtioNetRssData { + bool enabled; + bool redirect; + bool populate_hash; + uint32_t hash_types; + uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + uint16_t indirections_len; + uint16_t *indirections_table; + uint16_t default_queue; +} VirtioNetRssData; + +typedef struct VirtIONetQueue { + VirtQueue *rx_vq; + VirtQueue *tx_vq; + QEMUTimer *tx_timer; + QEMUBH *tx_bh; + uint32_t tx_waiting; + struct { + VirtQueueElement *elem; + } async_tx; + struct VirtIONet *n; +} VirtIONetQueue; + +struct VirtIONet { + VirtIODevice parent_obj; + uint8_t mac[ETH_ALEN]; + uint16_t status; + VirtIONetQueue *vqs; + VirtQueue *ctrl_vq; + NICState *nic; + /* RSC Chains - temporary storage of coalesced data, + all these data are lost in case of migration */ + QTAILQ_HEAD(, VirtioNetRscChain) rsc_chains; + uint32_t tx_timeout; + int32_t tx_burst; + uint32_t has_vnet_hdr; + size_t host_hdr_len; + size_t guest_hdr_len; + uint64_t host_features; + uint32_t rsc_timeout; + uint8_t rsc4_enabled; + uint8_t rsc6_enabled; + uint8_t has_ufo; + uint32_t mergeable_rx_bufs; + uint8_t promisc; + uint8_t allmulti; + uint8_t alluni; + uint8_t nomulti; + uint8_t nouni; + uint8_t nobcast; + uint8_t vhost_started; + struct { + uint32_t in_use; + uint32_t first_multi; + uint8_t multi_overflow; + uint8_t uni_overflow; + uint8_t *macs; + } mac_table; + uint32_t *vlans; + virtio_net_conf net_conf; + NICConf nic_conf; + DeviceState *qdev; + int multiqueue; + uint16_t max_queues; + uint16_t curr_queues; + size_t config_size; + char *netclient_name; + char *netclient_type; + uint64_t curr_guest_offloads; + /* used on saved state restore phase to preserve the curr_guest_offloads */ + uint64_t saved_guest_offloads; + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; + QemuOpts *primary_device_opts; + QDict *primary_device_dict; + DeviceState *primary_dev; + BusState *primary_bus; + char *primary_device_id; + char *standby_id; + bool primary_should_be_hidden; + bool failover; + DeviceListener primary_listener; + Notifier migration_state; + VirtioNetRssData rss_data; + struct NetRxPkt *rx_pkt; +}; + +void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + const char *type); + +#endif diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h new file mode 100644 index 000000000..fc4fd1f7f --- /dev/null +++ b/include/hw/virtio/virtio-pmem.h @@ -0,0 +1,46 @@ +/* + * Virtio PMEM device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_PMEM_H +#define HW_VIRTIO_PMEM_H + +#include "hw/virtio/virtio.h" +#include "qapi/qapi-types-machine.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_PMEM "virtio-pmem" + +OBJECT_DECLARE_TYPE(VirtIOPMEM, VirtIOPMEMClass, + VIRTIO_PMEM) + +#define VIRTIO_PMEM_ADDR_PROP "memaddr" +#define VIRTIO_PMEM_MEMDEV_PROP "memdev" + +struct VirtIOPMEM { + VirtIODevice parent_obj; + + VirtQueue *rq_vq; + uint64_t start; + HostMemoryBackend *memdev; +}; + +struct VirtIOPMEMClass { + /* private */ + VirtIODevice parent; + + /* public */ + void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi); + MemoryRegion *(*get_memory_region)(VirtIOPMEM *pmem, Error **errp); +}; + +#endif diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h new file mode 100644 index 000000000..82734255d --- /dev/null +++ b/include/hw/virtio/virtio-rng.h @@ -0,0 +1,51 @@ +/* + * Virtio RNG Support + * + * Copyright Red Hat, Inc. 2012 + * Copyright Amit Shah + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QEMU_VIRTIO_RNG_H +#define QEMU_VIRTIO_RNG_H + +#include "hw/virtio/virtio.h" +#include "sysemu/rng.h" +#include "standard-headers/linux/virtio_rng.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_RNG "virtio-rng-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIORNG, VIRTIO_RNG) +#define VIRTIO_RNG_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_RNG) + +struct VirtIORNGConf { + RngBackend *rng; + uint64_t max_bytes; + uint32_t period_ms; +}; + +struct VirtIORNG { + VirtIODevice parent_obj; + + /* Only one vq - guest puts buffer(s) on it when it needs entropy */ + VirtQueue *vq; + + VirtIORNGConf conf; + + RngBackend *rng; + + /* We purposefully don't migrate this state. The quota will reset on the + * destination as a result. Rate limiting is host state, not guest state. + */ + QEMUTimer *rate_limit_timer; + int64_t quota_remaining; + bool activate_timer; + + VMChangeStateEntry *vmstate; +}; + +#endif diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h new file mode 100644 index 000000000..543681bc1 --- /dev/null +++ b/include/hw/virtio/virtio-scsi.h @@ -0,0 +1,166 @@ +/* + * Virtio SCSI HBA + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_SCSI_H +#define QEMU_VIRTIO_SCSI_H +#include "qom/object.h" + +/* Override CDB/sense data size: they are dynamic (guest controlled) in QEMU */ +#define VIRTIO_SCSI_CDB_SIZE 0 +#define VIRTIO_SCSI_SENSE_SIZE 0 +#include "standard-headers/linux/virtio_scsi.h" +#include "hw/virtio/virtio.h" +#include "hw/pci/pci.h" +#include "hw/scsi/scsi.h" +#include "chardev/char-fe.h" +#include "sysemu/iothread.h" + +#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSCSICommon, VIRTIO_SCSI_COMMON) + +#define TYPE_VIRTIO_SCSI "virtio-scsi-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSCSI, VIRTIO_SCSI) + +#define VIRTIO_SCSI_MAX_CHANNEL 0 +#define VIRTIO_SCSI_MAX_TARGET 255 +#define VIRTIO_SCSI_MAX_LUN 16383 + +/* Number of virtqueues that are always present */ +#define VIRTIO_SCSI_VQ_NUM_FIXED 2 + +#define VIRTIO_SCSI_AUTO_NUM_QUEUES UINT32_MAX + +typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq; +typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp; +typedef struct virtio_scsi_ctrl_tmf_req VirtIOSCSICtrlTMFReq; +typedef struct virtio_scsi_ctrl_tmf_resp VirtIOSCSICtrlTMFResp; +typedef struct virtio_scsi_ctrl_an_req VirtIOSCSICtrlANReq; +typedef struct virtio_scsi_ctrl_an_resp VirtIOSCSICtrlANResp; +typedef struct virtio_scsi_event VirtIOSCSIEvent; +typedef struct virtio_scsi_config VirtIOSCSIConfig; + +struct VirtIOSCSIConf { + uint32_t num_queues; + uint32_t virtqueue_size; + bool seg_max_adjust; + uint32_t max_sectors; + uint32_t cmd_per_lun; +#ifdef CONFIG_VHOST_SCSI + char *vhostfd; + char *wwpn; +#endif + CharBackend chardev; + uint32_t boot_tpgt; + IOThread *iothread; +}; + +struct VirtIOSCSI; + +struct VirtIOSCSICommon { + VirtIODevice parent_obj; + VirtIOSCSIConf conf; + + uint32_t sense_size; + uint32_t cdb_size; + VirtQueue *ctrl_vq; + VirtQueue *event_vq; + VirtQueue **cmd_vqs; +}; + +struct VirtIOSCSI { + VirtIOSCSICommon parent_obj; + + SCSIBus bus; + int resetting; + bool events_dropped; + + /* Fields for dataplane below */ + AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + + bool dataplane_started; + bool dataplane_starting; + bool dataplane_stopping; + bool dataplane_fenced; + uint32_t host_features; +}; + +typedef struct VirtIOSCSIReq { + /* Note: + * - fields up to resp_iov are initialized by virtio_scsi_init_req; + * - fields starting at vring are zeroed by virtio_scsi_init_req. + * */ + VirtQueueElement elem; + + VirtIOSCSI *dev; + VirtQueue *vq; + QEMUSGList qsgl; + QEMUIOVector resp_iov; + + union { + /* Used for two-stage request submission */ + QTAILQ_ENTRY(VirtIOSCSIReq) next; + + /* Used for cancellation of request during TMFs */ + int remaining; + }; + + SCSIRequest *sreq; + size_t resp_size; + enum SCSIXferMode mode; + union { + VirtIOSCSICmdResp cmd; + VirtIOSCSICtrlTMFResp tmf; + VirtIOSCSICtrlANResp an; + VirtIOSCSIEvent event; + } resp; + union { + VirtIOSCSICmdReq cmd; + VirtIOSCSICtrlTMFReq tmf; + VirtIOSCSICtrlANReq an; + } req; +} VirtIOSCSIReq; + +static inline void virtio_scsi_acquire(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_acquire(s->ctx); + } +} + +static inline void virtio_scsi_release(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_release(s->ctx); + } +} + +void virtio_scsi_common_realize(DeviceState *dev, + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, + VirtIOHandleOutput cmd, + Error **errp); + +void virtio_scsi_common_unrealize(DeviceState *dev); +bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); +bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); +bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); +void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +void virtio_scsi_free_req(VirtIOSCSIReq *req); +void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, + uint32_t event, uint32_t reason); + +void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); +int virtio_scsi_dataplane_start(VirtIODevice *s); +void virtio_scsi_dataplane_stop(VirtIODevice *s); + +#endif /* QEMU_VIRTIO_SCSI_H */ diff --git a/include/hw/virtio/virtio-serial.h b/include/hw/virtio/virtio-serial.h new file mode 100644 index 000000000..d87c62eab --- /dev/null +++ b/include/hw/virtio/virtio-serial.h @@ -0,0 +1,227 @@ +/* + * Virtio Serial / Console Support + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2009, 2010 + * + * Authors: + * Christian Ehrhardt + * Amit Shah + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_SERIAL_H +#define QEMU_VIRTIO_SERIAL_H + +#include "standard-headers/linux/virtio_console.h" +#include "hw/virtio/virtio.h" +#include "qom/object.h" + +struct virtio_serial_conf { + /* Max. number of ports we can have for a virtio-serial device */ + uint32_t max_virtserial_ports; +}; + +#define TYPE_VIRTIO_SERIAL_PORT "virtio-serial-port" +OBJECT_DECLARE_TYPE(VirtIOSerialPort, VirtIOSerialPortClass, + VIRTIO_SERIAL_PORT) + +typedef struct VirtIOSerial VirtIOSerial; + +#define TYPE_VIRTIO_SERIAL_BUS "virtio-serial-bus" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSerialBus, VIRTIO_SERIAL_BUS) + + +struct VirtIOSerialPortClass { + DeviceClass parent_class; + + /* Is this a device that binds with hvc in the guest? */ + bool is_console; + + /* + * The per-port (or per-app) realize function that's called when a + * new device is found on the bus. + */ + DeviceRealize realize; + /* + * Per-port unrealize function that's called when a port gets + * hot-unplugged or removed. + */ + DeviceUnrealize unrealize; + + /* Callbacks for guest events */ + /* Guest opened/closed device. */ + void (*set_guest_connected)(VirtIOSerialPort *port, int guest_connected); + + /* Enable/disable backend for virtio serial port */ + void (*enable_backend)(VirtIOSerialPort *port, bool enable); + + /* Guest is now ready to accept data (virtqueues set up). */ + void (*guest_ready)(VirtIOSerialPort *port); + + /* + * Guest has enqueued a buffer for the host to write into. + * Called each time a buffer is enqueued by the guest; + * irrespective of whether there already were free buffers the + * host could have consumed. + * + * This is dependent on both the guest and host end being + * connected. + */ + void (*guest_writable)(VirtIOSerialPort *port); + + /* + * Guest wrote some data to the port. This data is handed over to + * the app via this callback. The app can return a size less than + * 'len'. In this case, throttling will be enabled for this port. + */ + ssize_t (*have_data)(VirtIOSerialPort *port, const uint8_t *buf, + ssize_t len); +}; + +/* + * This is the state that's shared between all the ports. Some of the + * state is configurable via command-line options. Some of it can be + * set by individual devices in their initfn routines. Some of the + * state is set by the generic qdev device init routine. + */ +struct VirtIOSerialPort { + DeviceState dev; + + QTAILQ_ENTRY(VirtIOSerialPort) next; + + /* + * This field gives us the virtio device as well as the qdev bus + * that we are associated with + */ + VirtIOSerial *vser; + + VirtQueue *ivq, *ovq; + + /* + * This name is sent to the guest and exported via sysfs. + * The guest could create symlinks based on this information. + * The name is in the reverse fqdn format, like org.qemu.console.0 + */ + char *name; + + /* + * This id helps identify ports between the guest and the host. + * The guest sends a "header" with this id with each data packet + * that it sends and the host can then find out which associated + * device to send out this data to + */ + uint32_t id; + + /* + * This is the elem that we pop from the virtqueue. A slow + * backend that consumes guest data (e.g. the file backend for + * qemu chardevs) can cause the guest to block till all the output + * is flushed. This isn't desired, so we keep a note of the last + * element popped and continue consuming it once the backend + * becomes writable again. + */ + VirtQueueElement *elem; + + /* + * The index and the offset into the iov buffer that was popped in + * elem above. + */ + uint32_t iov_idx; + uint64_t iov_offset; + + /* + * When unthrottling we use a bottom-half to call flush_queued_data. + */ + QEMUBH *bh; + + /* Is the corresponding guest device open? */ + bool guest_connected; + /* Is this device open for IO on the host? */ + bool host_connected; + /* Do apps not want to receive data? */ + bool throttled; +}; + +/* The virtio-serial bus on top of which the ports will ride as devices */ +struct VirtIOSerialBus { + BusState qbus; + + /* This is the parent device that provides the bus for ports. */ + VirtIOSerial *vser; + + /* The maximum number of ports that can ride on top of this bus */ + uint32_t max_nr_ports; +}; + +typedef struct VirtIOSerialPostLoad { + QEMUTimer *timer; + uint32_t nr_active_ports; + struct { + VirtIOSerialPort *port; + uint8_t host_connected; + } *connected; +} VirtIOSerialPostLoad; + +struct VirtIOSerial { + VirtIODevice parent_obj; + + VirtQueue *c_ivq, *c_ovq; + /* Arrays of ivqs and ovqs: one per port */ + VirtQueue **ivqs, **ovqs; + + VirtIOSerialBus bus; + + QTAILQ_HEAD(, VirtIOSerialPort) ports; + + QLIST_ENTRY(VirtIOSerial) next; + + /* bitmap for identifying active ports */ + uint32_t *ports_map; + + struct VirtIOSerialPostLoad *post_load; + + virtio_serial_conf serial; + + uint64_t host_features; +}; + +/* Interface to the virtio-serial bus */ + +/* + * Open a connection to the port + * Returns 0 on success (always). + */ +int virtio_serial_open(VirtIOSerialPort *port); + +/* + * Close the connection to the port + * Returns 0 on success (always). + */ +int virtio_serial_close(VirtIOSerialPort *port); + +/* + * Send data to Guest + */ +ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf, + size_t size); + +/* + * Query whether a guest is ready to receive data. + */ +size_t virtio_serial_guest_ready(VirtIOSerialPort *port); + +/* + * Flow control: Ports can signal to the virtio-serial core to stop + * sending data or re-start sending data, depending on the 'throttle' + * value here. + */ +void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle); + +#define TYPE_VIRTIO_SERIAL "virtio-serial-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSerial, VIRTIO_SERIAL) + +#endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h new file mode 100644 index 000000000..b7ece7a6a --- /dev/null +++ b/include/hw/virtio/virtio.h @@ -0,0 +1,400 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_H +#define QEMU_VIRTIO_H + +#include "exec/memory.h" +#include "hw/qdev-core.h" +#include "net/net.h" +#include "migration/vmstate.h" +#include "qemu/event_notifier.h" +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_ring.h" +#include "qom/object.h" + +/* A guest should never accept this. It implies negotiation is broken. */ +#define VIRTIO_F_BAD_FEATURE 30 + +#define VIRTIO_LEGACY_FEATURES ((0x1ULL << VIRTIO_F_BAD_FEATURE) | \ + (0x1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (0x1ULL << VIRTIO_F_ANY_LAYOUT)) + +struct VirtQueue; + +static inline hwaddr vring_align(hwaddr addr, + unsigned long align) +{ + return QEMU_ALIGN_UP(addr, align); +} + +typedef struct VirtIOFeature { + uint64_t flags; + size_t end; +} VirtIOFeature; + +size_t virtio_feature_get_config_size(VirtIOFeature *features, + uint64_t host_features); + +typedef struct VirtQueue VirtQueue; + +#define VIRTQUEUE_MAX_SIZE 1024 + +typedef struct VirtQueueElement +{ + unsigned int index; + unsigned int len; + unsigned int ndescs; + unsigned int out_num; + unsigned int in_num; + hwaddr *in_addr; + hwaddr *out_addr; + struct iovec *in_sg; + struct iovec *out_sg; +} VirtQueueElement; + +#define VIRTIO_QUEUE_MAX 1024 + +#define VIRTIO_NO_VECTOR 0xffff + +#define TYPE_VIRTIO_DEVICE "virtio-device" +OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) + +enum virtio_device_endian { + VIRTIO_DEVICE_ENDIAN_UNKNOWN, + VIRTIO_DEVICE_ENDIAN_LITTLE, + VIRTIO_DEVICE_ENDIAN_BIG, +}; + +struct VirtIODevice +{ + DeviceState parent_obj; + const char *name; + uint8_t status; + uint8_t isr; + uint16_t queue_sel; + uint64_t guest_features; + uint64_t host_features; + uint64_t backend_features; + size_t config_len; + void *config; + uint16_t config_vector; + uint32_t generation; + int nvectors; + VirtQueue *vq; + MemoryListener listener; + uint16_t device_id; + bool vm_running; + bool broken; /* device in invalid state, needs reset */ + bool use_disabled_flag; /* allow use of 'disable' flag when needed */ + bool disabled; /* device in temporarily disabled state */ + bool use_started; + bool started; + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ + bool disable_legacy_check; + VMChangeStateEntry *vmstate; + char *bus_name; + uint8_t device_endian; + bool use_guest_notifier_mask; + AddressSpace *dma_as; + QLIST_HEAD(, VirtQueue) *vector_queues; +}; + +struct VirtioDeviceClass { + /*< private >*/ + DeviceClass parent; + /*< public >*/ + + /* This is what a VirtioDevice must implement */ + DeviceRealize realize; + DeviceUnrealize unrealize; + uint64_t (*get_features)(VirtIODevice *vdev, + uint64_t requested_features, + Error **errp); + uint64_t (*bad_features)(VirtIODevice *vdev); + void (*set_features)(VirtIODevice *vdev, uint64_t val); + int (*validate_features)(VirtIODevice *vdev); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); + void (*reset)(VirtIODevice *vdev); + void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* For transitional devices, this is a bitmap of features + * that are only exposed on the legacy interface but not + * the modern one. + */ + uint64_t legacy_features; + /* Test and clear event pending status. + * Should be called after unmask to avoid losing events. + * If backend does not support masking, + * must check in frontend instead. + */ + bool (*guest_notifier_pending)(VirtIODevice *vdev, int n); + /* Mask/unmask events from this vq. Any events reported + * while masked will become pending. + * If backend does not support masking, + * must mask in frontend instead. + */ + void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); + int (*start_ioeventfd)(VirtIODevice *vdev); + void (*stop_ioeventfd)(VirtIODevice *vdev); + /* Saving and loading of a device; trying to deprecate save/load + * use vmsd for new devices. + */ + void (*save)(VirtIODevice *vdev, QEMUFile *f); + int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id); + /* Post load hook in vmsd is called early while device is processed, and + * when VirtIODevice isn't fully initialized. Devices should use this instead, + * unless they specifically want to verify the migration stream as it's + * processed, e.g. for bounds checking. + */ + int (*post_load)(VirtIODevice *vdev); + const VMStateDescription *vmsd; + bool (*primary_unplug_pending)(void *opaque); +}; + +void virtio_instance_init_common(Object *proxy_obj, void *data, + size_t vdev_size, const char *vdev_name); + +void virtio_init(VirtIODevice *vdev, const char *name, + uint16_t device_id, size_t config_size); +void virtio_cleanup(VirtIODevice *vdev); + +void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3); + +/* Set the child bus name. */ +void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name); + +typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); +typedef bool (*VirtIOHandleAIOOutput)(VirtIODevice *, VirtQueue *); + +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output); + +void virtio_del_queue(VirtIODevice *vdev, int n); + +void virtio_delete_queue(VirtQueue *vq); + +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); +void virtqueue_flush(VirtQueue *vq, unsigned int count); +void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); +void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); +bool virtqueue_rewind(VirtQueue *vq, unsigned int num); +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx); + +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem); +void *virtqueue_pop(VirtQueue *vq, size_t sz); +unsigned int virtqueue_drop_all(VirtQueue *vq); +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz); +void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, + VirtQueueElement *elem); +int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, + unsigned int out_bytes); +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes); + +void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); + +int virtio_save(VirtIODevice *vdev, QEMUFile *f); + +extern const VMStateInfo virtio_vmstate_info; + +#define VMSTATE_VIRTIO_DEVICE \ + { \ + .name = "virtio", \ + .info = &virtio_vmstate_info, \ + .flags = VMS_SINGLE, \ + } + +int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); + +void virtio_notify_config(VirtIODevice *vdev); + +bool virtio_queue_get_notification(VirtQueue *vq); +void virtio_queue_set_notification(VirtQueue *vq, int enable); + +int virtio_queue_ready(VirtQueue *vq); + +int virtio_queue_empty(VirtQueue *vq); + +/* Host binding interface. */ + +uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); +uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr); +uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr); +void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data); +void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data); +void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data); +uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr); +uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr); +uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr); +void virtio_config_modern_writeb(VirtIODevice *vdev, + uint32_t addr, uint32_t data); +void virtio_config_modern_writew(VirtIODevice *vdev, + uint32_t addr, uint32_t data); +void virtio_config_modern_writel(VirtIODevice *vdev, + uint32_t addr, uint32_t data); +void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr); +hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n); +void virtio_queue_set_num(VirtIODevice *vdev, int n, int num); +int virtio_queue_get_num(VirtIODevice *vdev, int n); +int virtio_queue_get_max_num(VirtIODevice *vdev, int n); +int virtio_get_num_queues(VirtIODevice *vdev); +void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, + hwaddr avail, hwaddr used); +void virtio_queue_update_rings(VirtIODevice *vdev, int n); +void virtio_queue_set_align(VirtIODevice *vdev, int n, int align); +void virtio_queue_notify(VirtIODevice *vdev, int n); +uint16_t virtio_queue_vector(VirtIODevice *vdev, int n); +void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector); +int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n, + MemoryRegion *mr, bool assign); +int virtio_set_status(VirtIODevice *vdev, uint8_t val); +void virtio_reset(void *opaque); +void virtio_update_irq(VirtIODevice *vdev); +int virtio_set_features(VirtIODevice *vdev, uint64_t val); + +/* Base devices. */ +typedef struct VirtIOBlkConf VirtIOBlkConf; +struct virtio_net_conf; +typedef struct virtio_serial_conf virtio_serial_conf; +typedef struct virtio_input_conf virtio_input_conf; +typedef struct VirtIOSCSIConf VirtIOSCSIConf; +typedef struct VirtIORNGConf VirtIORNGConf; + +#define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \ + DEFINE_PROP_BIT64("indirect_desc", _state, _field, \ + VIRTIO_RING_F_INDIRECT_DESC, true), \ + DEFINE_PROP_BIT64("event_idx", _state, _field, \ + VIRTIO_RING_F_EVENT_IDX, true), \ + DEFINE_PROP_BIT64("notify_on_empty", _state, _field, \ + VIRTIO_F_NOTIFY_ON_EMPTY, true), \ + DEFINE_PROP_BIT64("any_layout", _state, _field, \ + VIRTIO_F_ANY_LAYOUT, true), \ + DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ + VIRTIO_F_IOMMU_PLATFORM, false), \ + DEFINE_PROP_BIT64("packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) + +hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); +bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n); +bool virtio_queue_enabled(VirtIODevice *vdev, int n); +hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); +hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n); +hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n); +hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n); +hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n); +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, + unsigned int idx); +void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n); +void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n); +void virtio_queue_update_used_idx(VirtIODevice *vdev, int n); +VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); +uint16_t virtio_get_queue_index(VirtQueue *vq); +EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); +void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd); +int virtio_device_start_ioeventfd(VirtIODevice *vdev); +int virtio_device_grab_ioeventfd(VirtIODevice *vdev); +void virtio_device_release_ioeventfd(VirtIODevice *vdev); +bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev); +EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); +void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); +void virtio_queue_host_notifier_read(EventNotifier *n); +void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, + VirtIOHandleAIOOutput handle_output); +VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); +VirtQueue *virtio_vector_next_queue(VirtQueue *vq); + +static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) +{ + assert(fbit < 64); + *features |= (1ULL << fbit); +} + +static inline void virtio_clear_feature(uint64_t *features, unsigned int fbit) +{ + assert(fbit < 64); + *features &= ~(1ULL << fbit); +} + +static inline bool virtio_has_feature(uint64_t features, unsigned int fbit) +{ + assert(fbit < 64); + return !!(features & (1ULL << fbit)); +} + +static inline bool virtio_vdev_has_feature(VirtIODevice *vdev, + unsigned int fbit) +{ + return virtio_has_feature(vdev->guest_features, fbit); +} + +static inline bool virtio_host_has_feature(VirtIODevice *vdev, + unsigned int fbit) +{ + return virtio_has_feature(vdev->host_features, fbit); +} + +static inline bool virtio_is_big_endian(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); + return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG; + } + /* Devices conforming to VIRTIO 1.0 or later are always LE. */ + return false; +} + +static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) +{ + if (vdev->use_started) { + return vdev->started; + } + + return status & VIRTIO_CONFIG_S_DRIVER_OK; +} + +static inline void virtio_set_started(VirtIODevice *vdev, bool started) +{ + if (started) { + vdev->start_on_kick = false; + } + + if (vdev->use_started) { + vdev->started = started; + } +} + +static inline void virtio_set_disabled(VirtIODevice *vdev, bool disable) +{ + if (vdev->use_disabled_flag) { + vdev->disabled = disable; + } +} + +static inline bool virtio_device_disabled(VirtIODevice *vdev) +{ + return unlikely(vdev->disabled || vdev->broken); +} + +bool virtio_legacy_allowed(VirtIODevice *vdev); +bool virtio_legacy_check_disabled(VirtIODevice *vdev); + +#endif diff --git a/include/hw/vmstate-if.h b/include/hw/vmstate-if.h new file mode 100644 index 000000000..52df571d1 --- /dev/null +++ b/include/hw/vmstate-if.h @@ -0,0 +1,39 @@ +/* + * VMState interface + * + * Copyright (c) 2009-2019 Red Hat Inc + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef VMSTATE_IF_H +#define VMSTATE_IF_H + +#include "qom/object.h" + +#define TYPE_VMSTATE_IF "vmstate-if" + +typedef struct VMStateIfClass VMStateIfClass; +DECLARE_CLASS_CHECKERS(VMStateIfClass, VMSTATE_IF, + TYPE_VMSTATE_IF) +#define VMSTATE_IF(obj) \ + INTERFACE_CHECK(VMStateIf, (obj), TYPE_VMSTATE_IF) + +typedef struct VMStateIf VMStateIf; + +struct VMStateIfClass { + InterfaceClass parent_class; + + char * (*get_id)(VMStateIf *obj); +}; + +static inline char *vmstate_if_get_id(VMStateIf *vmif) +{ + if (!vmif) { + return NULL; + } + + return VMSTATE_IF_GET_CLASS(vmif)->get_id(vmif); +} + +#endif /* VMSTATE_IF_H */ diff --git a/include/hw/watchdog/cmsdk-apb-watchdog.h b/include/hw/watchdog/cmsdk-apb-watchdog.h new file mode 100644 index 000000000..3da0d43e3 --- /dev/null +++ b/include/hw/watchdog/cmsdk-apb-watchdog.h @@ -0,0 +1,67 @@ +/* + * ARM CMSDK APB watchdog emulation + * + * Copyright (c) 2018 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the "APB watchdog" which is part of the Cortex-M + * System Design Kit (CMSDK) and documented in the Cortex-M System + * Design Kit Technical Reference Manual (ARM DDI0479C): + * https://developer.arm.com/products/system-design/system-design-kits/cortex-m-system-design-kit + * + * QEMU interface: + * + QOM property "wdogclk-frq": frequency at which the watchdog is clocked + * + sysbus MMIO region 0: the register bank + * + sysbus IRQ 0: watchdog interrupt + * + * In real hardware the watchdog's reset output is just a GPIO line + * which can then be masked by the board or treated as a simple interrupt. + * (For instance the IoTKit does this with the non-secure watchdog, so that + * secure code can control whether non-secure code can perform a system + * reset via its watchdog.) In QEMU, we just wire up the watchdog reset + * to watchdog_perform_action(), at least for the moment. + */ + +#ifndef CMSDK_APB_WATCHDOG_H +#define CMSDK_APB_WATCHDOG_H + +#include "hw/sysbus.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_CMSDK_APB_WATCHDOG "cmsdk-apb-watchdog" +OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBWatchdog, CMSDK_APB_WATCHDOG) + +/* + * This shares the same struct (and cast macro) as the base + * cmsdk-apb-watchdog device. + */ +#define TYPE_LUMINARY_WATCHDOG "luminary-watchdog" + +struct CMSDKAPBWatchdog { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq wdogint; + uint32_t wdogclk_frq; + bool is_luminary; + struct ptimer_state *timer; + + uint32_t control; + uint32_t intstatus; + uint32_t lock; + uint32_t itcr; + uint32_t itop; + uint32_t resetstatus; + const uint32_t *id; +}; + +#endif diff --git a/include/hw/watchdog/sbsa_gwdt.h b/include/hw/watchdog/sbsa_gwdt.h new file mode 100644 index 000000000..70b137de3 --- /dev/null +++ b/include/hw/watchdog/sbsa_gwdt.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Linaro Limited + * + * Authors: + * Shashi Mallela + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#ifndef WDT_SBSA_GWDT_H +#define WDT_SBSA_GWDT_H + +#include "qemu/bitops.h" +#include "hw/sysbus.h" +#include "hw/irq.h" + +#define TYPE_WDT_SBSA "sbsa_gwdt" +#define SBSA_GWDT(obj) \ + OBJECT_CHECK(SBSA_GWDTState, (obj), TYPE_WDT_SBSA) +#define SBSA_GWDT_CLASS(klass) \ + OBJECT_CLASS_CHECK(SBSA_GWDTClass, (klass), TYPE_WDT_SBSA) +#define SBSA_GWDT_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SBSA_GWDTClass, (obj), TYPE_WDT_SBSA) + +/* SBSA Generic Watchdog register definitions */ +/* refresh frame */ +#define SBSA_GWDT_WRR 0x000 + +/* control frame */ +#define SBSA_GWDT_WCS 0x000 +#define SBSA_GWDT_WOR 0x008 +#define SBSA_GWDT_WORU 0x00C +#define SBSA_GWDT_WCV 0x010 +#define SBSA_GWDT_WCVU 0x014 + +/* Watchdog Interface Identification Register */ +#define SBSA_GWDT_W_IIDR 0xFCC + +/* Watchdog Control and Status Register Bits */ +#define SBSA_GWDT_WCS_EN BIT(0) +#define SBSA_GWDT_WCS_WS0 BIT(1) +#define SBSA_GWDT_WCS_WS1 BIT(2) + +#define SBSA_GWDT_WOR_MASK 0x0000FFFF + +/* + * Watchdog Interface Identification Register definition + * considering JEP106 code for ARM in Bits [11:0] + */ +#define SBSA_GWDT_ID 0x1043B + +/* 2 Separate memory regions for each of refresh & control register frames */ +#define SBSA_GWDT_RMMIO_SIZE 0x1000 +#define SBSA_GWDT_CMMIO_SIZE 0x1000 + +#define SBSA_TIMER_FREQ 62500000 /* Hz */ + +typedef struct SBSA_GWDTState { + /* */ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion rmmio; + MemoryRegion cmmio; + qemu_irq irq; + + QEMUTimer *timer; + + uint32_t id; + uint32_t wcs; + uint32_t worl; + uint32_t woru; + uint32_t wcvl; + uint32_t wcvu; +} SBSA_GWDTState; + +#endif /* WDT_SBSA_GWDT_H */ diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h new file mode 100644 index 000000000..80b03661e --- /dev/null +++ b/include/hw/watchdog/wdt_aspeed.h @@ -0,0 +1,49 @@ +/* + * ASPEED Watchdog Controller + * + * Copyright (C) 2016-2017 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef WDT_ASPEED_H +#define WDT_ASPEED_H + +#include "hw/misc/aspeed_scu.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_ASPEED_WDT "aspeed.wdt" +OBJECT_DECLARE_TYPE(AspeedWDTState, AspeedWDTClass, ASPEED_WDT) +#define TYPE_ASPEED_2400_WDT TYPE_ASPEED_WDT "-ast2400" +#define TYPE_ASPEED_2500_WDT TYPE_ASPEED_WDT "-ast2500" +#define TYPE_ASPEED_2600_WDT TYPE_ASPEED_WDT "-ast2600" + +#define ASPEED_WDT_REGS_MAX (0x20 / 4) + +struct AspeedWDTState { + /*< private >*/ + SysBusDevice parent_obj; + QEMUTimer *timer; + + /*< public >*/ + MemoryRegion iomem; + uint32_t regs[ASPEED_WDT_REGS_MAX]; + + AspeedSCUState *scu; + uint32_t pclk_freq; +}; + + +struct AspeedWDTClass { + SysBusDeviceClass parent_class; + + uint32_t offset; + uint32_t ext_pulse_width_mask; + uint32_t reset_ctrl_reg; + void (*reset_pulse)(AspeedWDTState *s, uint32_t property); + void (*wdt_reload)(AspeedWDTState *s); +}; + +#endif /* WDT_ASPEED_H */ diff --git a/include/hw/watchdog/wdt_diag288.h b/include/hw/watchdog/wdt_diag288.h new file mode 100644 index 000000000..f72c1d331 --- /dev/null +++ b/include/hw/watchdog/wdt_diag288.h @@ -0,0 +1,35 @@ +#ifndef WDT_DIAG288_H +#define WDT_DIAG288_H + +#include "hw/qdev-core.h" +#include "qom/object.h" + +#define TYPE_WDT_DIAG288 "diag288" +typedef struct DIAG288Class DIAG288Class; +typedef struct DIAG288State DIAG288State; +DECLARE_OBJ_CHECKERS(DIAG288State, DIAG288Class, + DIAG288, TYPE_WDT_DIAG288) + +#define WDT_DIAG288_INIT 0 +#define WDT_DIAG288_CHANGE 1 +#define WDT_DIAG288_CANCEL 2 + +struct DIAG288State { + /*< private >*/ + DeviceState parent_obj; + QEMUTimer *timer; + bool enabled; + + /*< public >*/ +}; + +struct DIAG288Class { + /*< private >*/ + DeviceClass parent_class; + + /*< public >*/ + int (*handle_timer)(DIAG288State *dev, + uint64_t func, uint64_t timeout); +}; + +#endif /* WDT_DIAG288_H */ diff --git a/include/hw/watchdog/wdt_imx2.h b/include/hw/watchdog/wdt_imx2.h new file mode 100644 index 000000000..023d83f48 --- /dev/null +++ b/include/hw/watchdog/wdt_imx2.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017, Impinj, Inc. + * + * i.MX2 Watchdog IP block + * + * Author: Andrey Smirnov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef IMX2_WDT_H +#define IMX2_WDT_H + +#include "qemu/bitops.h" +#include "hw/sysbus.h" +#include "hw/irq.h" +#include "hw/ptimer.h" +#include "qom/object.h" + +#define TYPE_IMX2_WDT "imx2.wdt" +OBJECT_DECLARE_SIMPLE_TYPE(IMX2WdtState, IMX2_WDT) + +enum IMX2WdtRegisters { + IMX2_WDT_WCR = 0x0000, /* Control Register */ + IMX2_WDT_WSR = 0x0002, /* Service Register */ + IMX2_WDT_WRSR = 0x0004, /* Reset Status Register */ + IMX2_WDT_WICR = 0x0006, /* Interrupt Control Register */ + IMX2_WDT_WMCR = 0x0008, /* Misc Register */ +}; + +#define IMX2_WDT_MMIO_SIZE 0x000a + +/* Control Register definitions */ +#define IMX2_WDT_WCR_WT (0xFF << 8) /* Watchdog Timeout Field */ +#define IMX2_WDT_WCR_WDW BIT(7) /* WDOG Disable for Wait */ +#define IMX2_WDT_WCR_WDA BIT(5) /* WDOG Assertion */ +#define IMX2_WDT_WCR_SRS BIT(4) /* Software Reset Signal */ +#define IMX2_WDT_WCR_WDT BIT(3) /* WDOG Timeout Assertion */ +#define IMX2_WDT_WCR_WDE BIT(2) /* Watchdog Enable */ +#define IMX2_WDT_WCR_WDBG BIT(1) /* Watchdog Debug Enable */ +#define IMX2_WDT_WCR_WDZST BIT(0) /* Watchdog Timer Suspend */ + +#define IMX2_WDT_WCR_LOCK_MASK (IMX2_WDT_WCR_WDZST | IMX2_WDT_WCR_WDBG \ + | IMX2_WDT_WCR_WDW) + +/* Service Register definitions */ +#define IMX2_WDT_SEQ1 0x5555 /* service sequence 1 */ +#define IMX2_WDT_SEQ2 0xAAAA /* service sequence 2 */ + +/* Reset Status Register definitions */ +#define IMX2_WDT_WRSR_TOUT BIT(1) /* Reset due to Timeout */ +#define IMX2_WDT_WRSR_SFTW BIT(0) /* Reset due to software reset */ + +/* Interrupt Control Register definitions */ +#define IMX2_WDT_WICR_WIE BIT(15) /* Interrupt Enable */ +#define IMX2_WDT_WICR_WTIS BIT(14) /* Interrupt Status */ +#define IMX2_WDT_WICR_WICT 0xff /* Interrupt Timeout */ +#define IMX2_WDT_WICR_WICT_DEF 0x04 /* Default interrupt timeout (2s) */ + +#define IMX2_WDT_WICR_LOCK_MASK (IMX2_WDT_WICR_WIE | IMX2_WDT_WICR_WICT) + +/* Misc Control Register definitions */ +#define IMX2_WDT_WMCR_PDE BIT(0) /* Power-Down Enable */ + +struct IMX2WdtState { + /* */ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + qemu_irq irq; + + struct ptimer_state *timer; + struct ptimer_state *itimer; + + bool pretimeout_support; + bool wicr_locked; + + uint16_t wcr; + uint16_t wsr; + uint16_t wrsr; + uint16_t wicr; + uint16_t wmcr; + + bool wcr_locked; /* affects WDZST, WDBG, and WDW */ + bool wcr_wde_locked; /* affects WDE */ + bool wcr_wdt_locked; /* affects WDT (never cleared) */ +}; + +#endif /* IMX2_WDT_H */ diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h new file mode 100644 index 000000000..2af0cbdde --- /dev/null +++ b/include/hw/xen/interface/grant_table.h @@ -0,0 +1,36 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h new file mode 100644 index 000000000..d07fa1e07 --- /dev/null +++ b/include/hw/xen/interface/io/blkif.h @@ -0,0 +1,712 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * hotplug script to attach the device and provide a suitable + * handler (ie: a block device) for blkback to use. + * + * physical-device + * Values: "MAJOR:MINOR" + * Notes: 11 + * + * MAJOR and MINOR are the major number and minor number of the + * backing device respectively. + * + * physical-device-path + * Values: path string + * + * A string that contains the absolute path to the disk image. On + * NetBSD and Linux this is always a block device, while on FreeBSD + * it can be either a block device or a regular file. + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer (or not to offer) discard to the frontend. If the property + * is missing the backend should offer discard if the backing storage + * actually supports it. + * + * discard-alignment + * Values: + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: + * + * The logical block size, in bytes, of the underlying storage. This + * must be a power of two with a minimum value of 512. + * + * NOTE: Because of implementation bugs in some frontends this must be + * set to 512, unless the frontend advertizes a non-zero value + * in its "feature-large-sector-size" xenbus node. (See below). + * + * physical-sector-size + * Values: + * Default Value: <"sector-size"> + * + * The physical block size, in bytes, of the backend storage. This + * must be an integer multiple of "sector-size". + * + * sectors + * Values: + * + * The size of the backend device, expressed in units of "sector-size". + * The product of "sector-size" and "sectors" must also be an integer + * multiple of "physical-sector-size", if that node is present. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *--------------------------------- Features --------------------------------- + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + * feature-large-sector-size + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the frontend will correctly supply and + * interpret all sector-based quantities in terms of the "sector-size" + * value supplied in the backend info, whatever that may be set to. + * If this node is not present or its value is "0" then it is assumed + * that the frontend requires that the logical block size is 512 as it + * is hardcoded (which is the case in some frontend implementations). + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + *(11) Only used by Linux and NetBSD. + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "" + * + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as + * 'sector_number' in blkif_request, blkif_request_discard and + * blkif_request_indirect are sector-based quantities. See the description + * of the "feature-large-sector-size" frontend xenbus node above for + * more information. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + uint8_t operation; /* BLKIF_OP_DISCARD */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to discard*/ +}; +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_request_indirect { + uint8_t operation; /* BLKIF_OP_INDIRECT */ + uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ + uint16_t nr_segments; /* number of segments */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + uint64_t pad; /* Make it 64 byte aligned on i386 */ +#endif +}; +typedef struct blkif_request_indirect blkif_request_indirect_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h new file mode 100644 index 000000000..e2155d1cf --- /dev/null +++ b/include/hw/xen/interface/io/console.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#ifdef XEN_WANT_FLEX_CONSOLE_RING +#include "ring.h" +DEFINE_XEN_FLEX_RING(xencons); +#endif + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h new file mode 100644 index 000000000..ea87ebec0 --- /dev/null +++ b/include/hw/xen/interface/io/fbif.h @@ -0,0 +1,156 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +#endif diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h new file mode 100644 index 000000000..1d68cd458 --- /dev/null +++ b/include/hw/xen/interface/io/kbdif.h @@ -0,0 +1,566 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* + ***************************************************************************** + * Feature and Parameter Negotiation + ***************************************************************************** + * + * The two halves of a para-virtual driver utilize nodes within + * XenStore to communicate capabilities and to negotiate operating parameters. + * This section enumerates these nodes which reside in the respective front and + * backend portions of XenStore, following XenBus convention. + * + * All data in XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *---------------------------- Features supported ---------------------------- + * + * Capable backend advertises supported features by publishing + * corresponding entries in XenStore and puts 1 as the value of the entry. + * If a feature is not supported then 0 must be set or feature entry omitted. + * + * feature-disable-keyboard + * Values: + * + * If there is no need to expose a virtual keyboard device by the + * frontend then this must be set to 1. + * + * feature-disable-pointer + * Values: + * + * If there is no need to expose a virtual pointer device by the + * frontend then this must be set to 1. + * + * feature-abs-pointer + * Values: + * + * Backends, which support reporting of absolute coordinates for pointer + * device should set this to 1. + * + * feature-multi-touch + * Values: + * + * Backends, which support reporting of multi-touch events + * should set this to 1. + * + * feature-raw-pointer + * Values: + * + * Backends, which support reporting raw (unscaled) absolute coordinates + * for pointer devices should set this to 1. Raw (unscaled) values have + * a range of [0, 0x7fff]. + * + *----------------------- Device Instance Parameters ------------------------ + * + * unique-id + * Values: + * + * After device instance initialization it is assigned a unique ID, + * so every instance of the frontend can be identified by the backend + * by this ID. This can be UUID or such. + * + *------------------------- Pointer Device Parameters ------------------------ + * + * width + * Values: + * + * Maximum X coordinate (width) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * height + * Values: + * + * Maximum Y coordinate (height) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + *----------------------- Multi-touch Device Parameters ---------------------- + * + * multi-touch-num-contacts + * Values: + * + * Number of simultaneous touches reported. + * + * multi-touch-width + * Values: + * + * Width of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * multi-touch-height + * Values: + * + * Height of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *------------------------------ Feature request ----------------------------- + * + * Capable frontend requests features from backend via setting corresponding + * entries to 1 in XenStore. Requests for features not advertised as supported + * by the backend have no effect. + * + * request-abs-pointer + * Values: + * + * Request backend to report absolute pointer coordinates + * (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION). + * + * request-multi-touch + * Values: + * + * Request backend to report multi-touch events. + * + * request-raw-pointer + * Values: + * + * Request backend to report raw unscaled absolute pointer coordinates. + * This option is only valid if request-abs-pointer is also set. + * Raw unscaled coordinates have the range [0, 0x7fff] + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * page-gref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page in a single page sized event ring buffer. + * + * page-ref + * Values: + * + * OBSOLETE, not recommended for use. + * PFN of the shared page. + */ + +/* + * EVENT CODES. + */ + +#define XENKBD_TYPE_MOTION 1 +#define XENKBD_TYPE_RESERVED 2 +#define XENKBD_TYPE_KEY 3 +#define XENKBD_TYPE_POS 4 +#define XENKBD_TYPE_MTOUCH 5 + +/* Multi-touch event sub-codes */ + +#define XENKBD_MT_EV_DOWN 0 +#define XENKBD_MT_EV_UP 1 +#define XENKBD_MT_EV_MOTION 2 +#define XENKBD_MT_EV_SYN 3 +#define XENKBD_MT_EV_SHAPE 4 +#define XENKBD_MT_EV_ORIENT 5 + +/* + * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS. + */ + +#define XENKBD_DRIVER_NAME "vkbd" + +#define XENKBD_FIELD_FEAT_DSBL_KEYBRD "feature-disable-keyboard" +#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer" +#define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer" +#define XENKBD_FIELD_FEAT_RAW_POINTER "feature-raw-pointer" +#define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch" +#define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer" +#define XENKBD_FIELD_REQ_RAW_POINTER "request-raw-pointer" +#define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch" +#define XENKBD_FIELD_RING_GREF "page-gref" +#define XENKBD_FIELD_EVT_CHANNEL "event-channel" +#define XENKBD_FIELD_WIDTH "width" +#define XENKBD_FIELD_HEIGHT "height" +#define XENKBD_FIELD_MT_WIDTH "multi-touch-width" +#define XENKBD_FIELD_MT_HEIGHT "multi-touch-height" +#define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts" +#define XENKBD_FIELD_UNIQUE_ID "unique-id" + +/* OBSOLETE, not recommended for use */ +#define XENKBD_FIELD_RING_REF "page-ref" + +/* + ***************************************************************************** + * Description of the protocol between frontend and backend driver. + ***************************************************************************** + * + * The two halves of a Para-virtual driver communicate with + * each other using a shared page and an event channel. + * Shared page contains a ring with event structures. + * + * All reserved fields in the structures below must be 0. + * + ***************************************************************************** + * Backend to frontend events + ***************************************************************************** + * + * Frontends should ignore unknown in events. + * All event packets have the same length (40 octets) + * All event packets have common header: + * + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code, XENKBD_TYPE_??? + * + * + * Pointer relative movement event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MOTION | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | rel_x | 8 + * +----------------+----------------+----------------+----------------+ + * | rel_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * rel_x - int32_t, relative X motion + * rel_y - int32_t, relative Y motion + * rel_z - int32_t, relative Z motion (wheel) + */ + +struct xenkbd_motion +{ + uint8_t type; + int32_t rel_x; + int32_t rel_y; + int32_t rel_z; +}; + +/* + * Key event (includes pointer buttons) + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_KEY | pressed | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | keycode | 8 + * +----------------+----------------+----------------+----------------+ + * | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * pressed - uint8_t, 1 if pressed; 0 otherwise + * keycode - uint32_t, KEY_* from linux/input.h + */ + +struct xenkbd_key +{ + uint8_t type; + uint8_t pressed; + uint32_t keycode; +}; + +/* + * Pointer absolute position event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_POS | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position (in FB pixels) + * abs_y - int32_t, absolute Y position (in FB pixels) + * rel_z - int32_t, relative Z motion (wheel) + */ + +struct xenkbd_position +{ + uint8_t type; + int32_t abs_x; + int32_t abs_y; + int32_t rel_z; +}; + +/* + * Multi-touch event and its sub-types + * + * All multi-touch event packets have common header: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | event_type | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * + * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_??? + * contact_id - unt8_t, ID of the contact + * + * Touch interactions can consist of one or more contacts. + * For each contact, a series of events is generated, starting + * with a down event, followed by zero or more motion events, + * and ending with an up event. Events relating to the same + * contact point can be identified by the ID of the sequence: contact ID. + * Contact ID may be reused after XENKBD_MT_EV_UP event and + * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range. + * + * For further information please refer to documentation on Wayland [1], + * Linux [2] and Windows [3] multi-touch support. + * + * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml + * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt + * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx + * + * + * Multi-touch down event - sent when a new touch is made: touch is assigned + * a unique contact ID, sent with this and consequent events related + * to this touch. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels + * abs_y - int32_t, absolute Y position, in pixels + * + * Multi-touch contact release event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch motion event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels, + * abs_y - int32_t, absolute Y position, in pixels, + * + * Multi-touch input synchronization event - shows end of a set of events + * which logically belong together. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch shape event - touch point's shape has changed its shape. + * Shape is approximated by an ellipse through the major and minor axis + * lengths: major is the longer diameter of the ellipse and minor is the + * shorter one. Center of the ellipse is reported via + * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | major | 12 + * +----------------+----------------+----------------+----------------+ + * | minor | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * major - unt32_t, length of the major axis, pixels + * minor - unt32_t, length of the minor axis, pixels + * + * Multi-touch orientation event - touch point's shape has changed + * its orientation: calculated as a clockwise angle between the major axis + * of the ellipse and positive Y axis in degrees, [-180; +180]. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | orientation | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * orientation - int16_t, clockwise angle of the major axis + */ + +struct xenkbd_mtouch { + uint8_t type; /* XENKBD_TYPE_MTOUCH */ + uint8_t event_type; /* XENKBD_MT_EV_??? */ + uint8_t contact_id; + uint8_t reserved[5]; /* reserved for the future use */ + union { + struct { + int32_t abs_x; /* absolute X position, pixels */ + int32_t abs_y; /* absolute Y position, pixels */ + } pos; + struct { + uint32_t major; /* length of the major axis, pixels */ + uint32_t minor; /* length of the minor axis, pixels */ + } shape; + int16_t orientation; /* clockwise angle of the major axis */ + } u; +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + struct xenkbd_mtouch mtouch; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* + ***************************************************************************** + * Frontend to backend events + ***************************************************************************** + * + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + + * All event packets have the same length (40 octets) + * All event packets have common header: + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* + ***************************************************************************** + * Shared page + ***************************************************************************** + */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */ diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h new file mode 100644 index 000000000..48fa53095 --- /dev/null +++ b/include/hw/xen/interface/io/netif.h @@ -0,0 +1,1010 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "//feature-ctrl-ring = "1" + * + * The frontend provides a control ring to the backend by setting: + * + * /local/domain//device/vif//ctrl-ring-ref = + * /local/domain//device/vif//event-channel-ctrl = + * + * where is the grant reference of the shared page used to + * implement the control ring and is an event channel to be used + * as a mailbox interrupt. These keys must be set before the frontend + * moves into the connected state. + * + * The control ring uses a fixed request/response message size and is + * balanced (i.e. one request to one response), so operationally it is much + * the same as a transmit or receive ring. + * Note that there is no requirement that responses are issued in the same + * order as requests. + */ + +/* + * Hash types + * ========== + * + * For the purposes of the definitions below, 'Packet[]' is an array of + * octets containing an IP packet without options, 'Array[X..Y]' means a + * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is + * used to indicate concatenation of arrays. + */ + +/* + * A hash calculated over an IP version 4 header as follows: + * + * Buffer[0..8] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + * + * Result = Hash(Buffer, 8) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) + +/* + * A hash calculated over an IP version 4 header and TCP header as + * follows: + * + * Buffer[0..12] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + + * Packet[20..21] (source port) + + * Packet[22..23] (destination port) + * + * Result = Hash(Buffer, 12) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + +/* + * A hash calculated over an IP version 6 header as follows: + * + * Buffer[0..32] = Packet[8..23] (source address ) + + * Packet[24..39] (destination address) + * + * Result = Hash(Buffer, 32) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) + +/* + * A hash calculated over an IP version 6 header and TCP header as + * follows: + * + * Buffer[0..36] = Packet[8..23] (source address) + + * Packet[24..39] (destination address) + + * Packet[40..41] (source port) + + * Packet[42..43] (destination port) + * + * Result = Hash(Buffer, 36) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + +/* + * Hash algorithms + * =============== + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 + +/* + * Toeplitz hash: + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 + +/* + * Control requests (struct xen_netif_ctrl_request) + * ================================================ + * + * All requests have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | data[0] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data[1] | data[2] | + * +-----+-----+-----+-----+-----------------------+ + * + * id: the request identifier, echoed in response. + * type: the type of request (see below) + * data[]: any data associated with the request (determined by type) + */ + +struct xen_netif_ctrl_request { + uint16_t id; + uint16_t type; + +#define XEN_NETIF_CTRL_TYPE_INVALID 0 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 +#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8 +#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING 9 +#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING 10 + + uint32_t data[3]; +}; + +/* + * Control responses (struct xen_netif_ctrl_response) + * ================================================== + * + * All responses have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data | + * +-----+-----+-----+-----+ + * + * id: the corresponding request identifier + * type: the type of the corresponding request + * status: the status of request processing + * data: any data associated with the response (determined by type and + * status) + */ + +struct xen_netif_ctrl_response { + uint16_t id; + uint16_t type; + uint32_t status; + +#define XEN_NETIF_CTRL_STATUS_SUCCESS 0 +#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 +#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 +#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 + + uint32_t data; +}; + +/* + * Static Grants (struct xen_netif_gref) + * ===================================== + * + * A frontend may provide a fixed set of grant references to be mapped on + * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * prior its usage in the command ring allows for creation of these mappings. + * The backend will maintain a fixed amount of these mappings. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many + * of these mappings can be kept. + * + * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has + * the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * grant ref: grant reference (IN) + * flags: flags describing the control operation (IN) + * status: XEN_NETIF_CTRL_STATUS_* (OUT) + * + * 'status' is an output parameter which does not require to be set to zero + * prior to its usage in the corresponding control messages. + */ + +struct xen_netif_gref { + grant_ref_t ref; + uint16_t flags; + +#define _XEN_NETIF_CTRLF_GREF_readonly 0 +#define XEN_NETIF_CTRLF_GREF_readonly (1U<<_XEN_NETIF_CTRLF_GREF_readonly) + + uint16_t status; +}; + +/* + * Control messages + * ================ + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * -------------------------------------- + * + * This is sent by the frontend to set the desired hash algorithm. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not + * supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables + * hashing and the backend is free to choose how it steers packets + * to queues (which is the default behaviour). + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to query the types of hash supported by + * the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = supported hash types (if operation was successful) + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to set the types of hash that the backend + * should calculate. (See above for hash type definitions). + * Note that the 'maximal' type of hash should always be chosen. For + * example, if the frontend sets both IPV4 and IPV4_TCP hash types then + * the latter hash type should be calculated for any TCP packet and the + * former only calculated for non-TCP packets. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag + * value is invalid or + * unsupported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * Also, setting data[0] to zero disables hashing and the backend + * is free to choose how it steers packets to queues. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * -------------------------------- + * + * This is sent by the frontend to set the key of the hash if the algorithm + * requires it. (See hash algorithms above). + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * data[0] = grant reference of page containing the key (assumed to + * start at beginning of grant) + * data[1] = size of key in octets + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Any key octets not specified are assumed to be zero (the key + * is assumed to be empty by default) and specifying a new key + * invalidates any previous key, hence specifying a key size of + * zero will clear the key (which ensures that the calculated hash + * will always be zero). + * The maximum size of key is algorithm and backend specific, but + * is also limited by the single grant reference. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to query the maximum size of mapping + * table supported by the backend. The size is specified in terms of + * table entries. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the mapping table + * (if operation was successful) or zero if a mapping table is + * not supported (i.e. hash mapping is done only by modular + * arithmetic). + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * ------------------------------------- + * + * This is sent by the frontend to set the actual size of the mapping + * table to be used by the backend. The size is specified in terms of + * table entries. + * Any previous table is invalidated by this message and any new table + * is assumed to be zero filled. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * data[0] = number of entries in mapping table + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Setting data[0] to 0 means that hash mapping should be done + * using modular arithmetic. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * ------------------------------------ + * + * This is sent by the frontend to set the content of the table mapping + * hash value to queue number. The backend should calculate the hash from + * the packet header, use it as an index into the table (modulo the size + * of the table) and then steer the packet to the queue number found at + * that index. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * data[0] = grant reference of page containing the mapping (sub-)table + * (assumed to start at beginning of grant) + * data[1] = size of (sub-)table in entries + * data[2] = offset, in entries, of sub-table within overall table + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content + * is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: The overall table has the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[0] | mapping[1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | . | + * | . | + * | . | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[N-2] | mapping[N-1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * message and each mapping must specifies a queue between 0 and + * "multi-queue-num-queues" (see above). + * The backend may support a mapping table larger than can be + * mapped by a single grant reference. Thus sub-tables within a + * larger table can be individually set by sending multiple messages + * with differing offset values. Specifying a new sub-table does not + * invalidate any table data outside that range. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to fetch the number of grefs that can be kept + * mapped in the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * data[0] = queue index (assumed 0 for single queue) + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is + * out of range + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the gref mapping table + * (if operation was successful) or zero if it is not supported. + * + * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to map a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Each entry in the input table has the format outlined + * in struct xen_netif_gref. + * Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct + * xen_netif_gref 'status' field is not used and therefore the response + * 'status' determines the success of this operation. In case of + * failure none of grants mappings get added in the backend. + * + * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to unmap a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = number of entries that were unmapped + * + * NOTE: Each entry in the input table has the format outlined in struct + * xen_netif_gref. + * The struct xen_netif_gref 'status' field determines if the entry + * was successfully removed. + * The entries used are only the ones representing grant references that + * were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * operation. Any other entries will have their status set to + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion. + */ + +DEFINE_RING_TYPES(xen_netif_ctrl, + struct xen_netif_ctrl_request, + struct xen_netif_ctrl_response); + +/* + * Guest transmit + * ============== + * + * This is the 'wire' format for transmit (frontend -> backend) packets: + * + * Fragment 1: netif_tx_request_t - flags = NETTXF_* + * size = total packet size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETTXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_tx_request_t - (only if fragment N-1 flags include + * NETTXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for receive + * (backend -> frontend) packets. Specifically, in a multi-fragment + * packet the actual size of fragment 1 can only be determined by + * subtracting the sizes of fragments 2..N from the total packet size. + * + * Ring slot size is 12 octets, however not all request/response + * structs use the full size. + * + * tx request data (netif_tx_request_t) + * ------------------------------------ + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | offset | flags | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | size | + * +-----+-----+-----+-----+ + * + * grant ref: Reference to buffer page. + * offset: Offset within buffer page. + * flags: NETTXF_*. + * id: request identifier, echoed in response. + * size: packet size in bytes. + * + * tx response (netif_tx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | status | unused | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | unused | + * +-----+-----+-----+-----+ + * + * id: reflects id in transmit request + * status: NETIF_RSP_* + * + * Guest receive + * ============= + * + * This is the 'wire' format for receive (backend -> frontend) packets: + * + * Fragment 1: netif_rx_request_t - flags = NETRXF_* + * size = fragment size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETRXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_rx_request_t - (only if fragment N-1 flags include + * NETRXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for transmit + * (frontend -> backend) packets. Specifically, in a multi-fragment + * packet the size of the packet can only be determined by summing the + * sizes of fragments 1..N. + * + * Ring slot size is 8 octets. + * + * rx request (netif_rx_request_t) + * ------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | pad | gref | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: request identifier, echoed in response. + * gref: reference to incoming granted frame. + * + * rx response (netif_rx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | offset | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: reflects id in receive request + * offset: offset in page of start of received packet + * flags: NETRXF_* + * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. + * + * NOTE: Historically, to support GSO on the frontend receive side, Linux + * netfront does not make use of the rx response id (because, as + * described below, extra info structures overlay the id field). + * Instead it assumes that responses always appear in the same ring + * slot as their corresponding request. Thus, to maintain + * compatibility, backends must make sure this is the case. + * + * Extra Info + * ========== + * + * Can be present if initial request or response has NET{T,R}XF_extra_info, + * or previous extra request has XEN_NETIF_EXTRA_MORE. + * + * The struct therefore needs to fit into either a tx or rx slot and + * is therefore limited to 8 octets. + * + * NOTE: Because extra info data overlays the usual request/response + * structures, there is no id information in the opposite direction. + * So, if an extra info overlays an rx response the frontend can + * assume that it is in the same ring slot as the request that was + * consumed to make the slot available, and the backend must ensure + * this assumption is true. + * + * extra info (netif_extra_info_t) + * ------------------------------- + * + * General format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| type specific data | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | padding for tx | + * +-----+-----+-----+-----+ + * + * type: XEN_NETIF_EXTRA_TYPE_* + * flags: XEN_NETIF_EXTRA_FLAG_* + * padding for tx: present only in the tx case due to 8 octet limit + * from rx case. Not shown in type specific entries + * below. + * + * XEN_NETIF_EXTRA_TYPE_GSO: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| size |type | pad | features | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_GSO + * flags: XEN_NETIF_EXTRA_FLAG_* + * size: Maximum payload size of each segment. For example, + * for TCP this is just the path MSS. + * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of + * the packet and any extra features required to segment the + * packet properly. + * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO + * features required to process this packet, such as ECN + * support for TCPv4. + * + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| addr | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} + * flags: XEN_NETIF_EXTRA_FLAG_* + * addr: address to add/remove + * + * XEN_NETIF_EXTRA_TYPE_HASH: + * + * A backend that supports teoplitz hashing is assumed to accept + * this type of extra info in transmit packets. + * A frontend that enables hashing is assumed to accept + * this type of extra info in receive packets. + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags|htype| alg |LSB ---- value ---- MSB| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_HASH + * flags: XEN_NETIF_EXTRA_FLAG_* + * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) + * alg: The algorithm used to calculate the hash (one of + * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) + * value: Hash value + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF +struct netif_tx_request { + grant_ref_t gref; + uint16_t offset; + uint16_t flags; + uint16_t id; + uint16_t size; +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ +#define XEN_NETIF_EXTRA_TYPE_MAX (5) + +/* netif_extra_info_t flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types */ +#define XEN_NETIF_GSO_TYPE_NONE (0) +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) +#define XEN_NETIF_GSO_TYPE_TCPV6 (2) + +/* + * This structure needs to fit within both netif_tx_request_t and + * netif_rx_response_t for compatibility. + */ +struct netif_extra_info { + uint8_t type; + uint8_t flags; + union { + struct { + uint16_t size; + uint8_t type; + uint8_t pad; + uint16_t features; + } gso; + struct { + uint8_t addr[6]; + } mcast; + struct { + uint8_t type; + uint8_t algorithm; + uint8_t value[4]; + } hash; + uint16_t pad[3]; + } u; +}; +typedef struct netif_extra_info netif_extra_info_t; + +struct netif_tx_response { + uint16_t id; + int16_t status; +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + uint16_t pad; + grant_ref_t gref; +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +/* Packet has GSO prefix. Deprecated but included for compatibility */ +#define _NETRXF_gso_prefix (4) +#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; + uint16_t flags; + int16_t status; +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */ +#define NETIF_RSP_NULL 1 + +#endif diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h new file mode 100644 index 000000000..52b4de0f8 --- /dev/null +++ b/include/hw/xen/interface/io/protocols.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2008, Keir Fraser + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff --git a/include/hw/xen/interface/io/ring.h b/include/hw/xen/interface/io/ring.h new file mode 100644 index 000000000..5d048b335 --- /dev/null +++ b/include/hw/xen/interface/io/ring.h @@ -0,0 +1,483 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (uint8_t, uint16_t, etc) + * They are provided by stdint.h of the standard headers. + * + * Before using the different macros, you need to provide the following + * macros: + * - xen_mb() a memory barrier + * - xen_rmb() a read memory barrier + * - xen_wmb() a write memory barrier + * Example of those can be found in xenctrl.h. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof_field(struct _s##_sring, ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } pvt; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +/* + * Get a local copy of a request. + * + * Use this in preference to RING_GET_REQUEST() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where _req is a struct which consists of only bitfields. + */ +#define RING_COPY_REQUEST(_r, _idx, _req) do { \ + /* Use volatile to force the copy into _req. */ \ + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +} while (0) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + uint8_t pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + uint8_t pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h new file mode 100644 index 000000000..c6a58639d --- /dev/null +++ b/include/hw/xen/interface/io/usbif.h @@ -0,0 +1,254 @@ +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * num-ports + * Values: unsigned [1...31] + * + * Number of ports for this (virtual) USB host connector. + * + * usb-ver + * Values: unsigned [1...2] + * + * USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0. + * + * port/[1...31] + * Values: string + * + * Physical USB device connected to the given port, e.g. "3-1.5". + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * urb-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for urb requests. + * + * conn-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for connection/disconnection requests. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + */ + +enum usb_spec_version { + USB_VER_UNKNOWN = 0, + USB_VER_USB11, + USB_VER_USB20, + USB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in usbif_request + * + * - port number: bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag: bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ + +#define USBIF_PIPE_PORT_MASK 0x0000001f +#define USBIF_PIPE_UNLINK 0x00000020 +#define USBIF_PIPE_DIR 0x00000080 +#define USBIF_PIPE_DEV_MASK 0x0000007f +#define USBIF_PIPE_DEV_SHIFT 8 +#define USBIF_PIPE_EP_MASK 0x0000000f +#define USBIF_PIPE_EP_SHIFT 15 +#define USBIF_PIPE_TYPE_MASK 0x00000003 +#define USBIF_PIPE_TYPE_SHIFT 30 +#define USBIF_PIPE_TYPE_ISOC 0 +#define USBIF_PIPE_TYPE_INT 1 +#define USBIF_PIPE_TYPE_CTRL 2 +#define USBIF_PIPE_TYPE_BULK 3 + +#define usbif_pipeportnum(pipe) ((pipe) & USBIF_PIPE_PORT_MASK) +#define usbif_setportnum_pipe(pipe, portnum) ((pipe) | (portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & USBIF_PIPE_UNLINK) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe) | USBIF_PIPE_UNLINK) + +#define usbif_pipein(pipe) ((pipe) & USBIF_PIPE_DIR) +#define usbif_pipeout(pipe) (!usbif_pipein(pipe)) + +#define usbif_pipedevice(pipe) \ + (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK) + +#define usbif_pipeendpoint(pipe) \ + (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK) + +#define usbif_pipetype(pipe) \ + (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK) +#define usbif_pipeisoc(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC) +#define usbif_pipeint(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT) +#define usbif_pipectrl(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL) +#define usbif_pipebulk(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK) + +#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) +#define USBIF_MAX_PORTNR 31 +#define USBIF_RING_SIZE 4096 + +/* + * RING for transferring urbs. + */ +struct usbif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct usbif_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; +#define USBIF_SHORT_NOT_OK 0x0001 + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct usbif_urb_request usbif_urb_request_t; + +struct usbif_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; +typedef struct usbif_urb_response usbif_urb_response_t; + +DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct usbif_conn_request { + uint16_t id; +}; +typedef struct usbif_conn_request usbif_conn_request_t; + +struct usbif_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +#define USBIF_SPEED_NONE 0 +#define USBIF_SPEED_LOW 1 +#define USBIF_SPEED_FULL 2 +#define USBIF_SPEED_HIGH 3 +}; +typedef struct usbif_conn_response usbif_conn_response_t; + +DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h new file mode 100644 index 000000000..2fbf2a7fd --- /dev/null +++ b/include/hw/xen/interface/io/xenbus.h @@ -0,0 +1,70 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ diff --git a/include/hw/xen/start_info.h b/include/hw/xen/start_info.h new file mode 100644 index 000000000..6ed487779 --- /dev/null +++ b/include/hw/xen/start_info.h @@ -0,0 +1,146 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2016, Citrix Systems, Inc. + */ + +#ifndef XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H +#define XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H + +/* + * Start of day structure passed to PVH guests and to HVM guests in %ebx. + * + * NOTE: nothing will be loaded at physical address 0, so a 0 value in any + * of the address fields should be treated as not present. + * + * 0 +----------------+ + * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE + * | | ("xEn3" with the 0x80 bit of the "E" set). + * 4 +----------------+ + * | version | Version of this structure. Current version is 1. New + * | | versions are guaranteed to be backwards-compatible. + * 8 +----------------+ + * | flags | SIF_xxx flags. + * 12 +----------------+ + * | nr_modules | Number of modules passed to the kernel. + * 16 +----------------+ + * | modlist_paddr | Physical address of an array of modules + * | | (layout of the structure below). + * 24 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 32 +----------------+ + * | rsdp_paddr | Physical address of the RSDP ACPI data structure. + * 40 +----------------+ + * | memmap_paddr | Physical address of the (optional) memory map. Only + * | | present in version 1 and newer of the structure. + * 48 +----------------+ + * | memmap_entries | Number of entries in the memory map table. Only + * | | present in version 1 and newer of the structure. + * | | Zero if there is no memory map being provided. + * 52 +----------------+ + * | reserved | Version 1 and newer only. + * 56 +----------------+ + * + * The layout of each entry in the module structure is the following: + * + * 0 +----------------+ + * | paddr | Physical address of the module. + * 8 +----------------+ + * | size | Size of the module in bytes. + * 16 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 24 +----------------+ + * | reserved | + * 32 +----------------+ + * + * The layout of each entry in the memory map table is as follows: + * + * 0 +----------------+ + * | addr | Base address + * 8 +----------------+ + * | size | Size of mapping in bytes + * 16 +----------------+ + * | type | Type of mapping as defined between the hypervisor + * | | and guest it's starting. E820_TYPE_xxx, for example. + * 20 +----------------| + * | reserved | + * 24 +----------------+ + * + * The address and sizes are always a 64bit little endian unsigned integer. + * + * NB: Xen on x86 will always try to place all the data below the 4GiB + * boundary. + * + * Version numbers of the hvm_start_info structure have evolved like this: + * + * Version 0: + * + * Version 1: Added the memmap_paddr/memmap_entries fields (plus 4 bytes of + * padding) to the end of the hvm_start_info struct. These new + * fields can be used to pass a memory map to the guest. The + * memory map is optional and so guests that understand version 1 + * of the structure must check that memmap_entries is non-zero + * before trying to read the memory map. + */ +#define XEN_HVM_START_MAGIC_VALUE 0x336ec578 + +/* + * C representation of the x86/HVM start info layout. + * + * The canonical definition of this layout is above, this is just a way to + * represent the layout described there using C types. + */ +struct hvm_start_info { + uint32_t magic; /* Contains the magic value 0x336ec578 */ + /* ("xEn3" with the 0x80 bit of the "E" set).*/ + uint32_t version; /* Version of this structure. */ + uint32_t flags; /* SIF_xxx flags. */ + uint32_t nr_modules; /* Number of modules passed to the kernel. */ + uint64_t modlist_paddr; /* Physical address of an array of */ + /* hvm_modlist_entry. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */ + /* structure. */ + uint64_t memmap_paddr; /* Physical address of an array of */ + /* hvm_memmap_table_entry. Only present in */ + /* version 1 and newer of the structure */ + uint32_t memmap_entries; /* Number of entries in the memmap table. */ + /* Only present in version 1 and newer of */ + /* the structure. Value will be zero if */ + /* there is no memory map being provided. */ + uint32_t reserved; +}; + +struct hvm_modlist_entry { + uint64_t paddr; /* Physical address of the module. */ + uint64_t size; /* Size of the module in bytes. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t reserved; +}; + +struct hvm_memmap_table_entry { + uint64_t addr; /* Base address of the memory region */ + uint64_t size; /* Size of the memory region in bytes */ + uint32_t type; /* Mapping type */ + uint32_t reserved; +}; + +#endif /* XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H */ diff --git a/include/hw/xen/xen-backend.h b/include/hw/xen/xen-backend.h new file mode 100644 index 000000000..aac2fd454 --- /dev/null +++ b/include/hw/xen/xen-backend.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_XEN_BACKEND_H +#define HW_XEN_BACKEND_H + +#include "hw/xen/xen-bus.h" + +typedef struct XenBackendInstance XenBackendInstance; + +typedef void (*XenBackendDeviceCreate)(XenBackendInstance *backend, + QDict *opts, Error **errp); +typedef void (*XenBackendDeviceDestroy)(XenBackendInstance *backend, + Error **errp); + +typedef struct XenBackendInfo { + const char *type; + XenBackendDeviceCreate create; + XenBackendDeviceDestroy destroy; +} XenBackendInfo; + +XenBus *xen_backend_get_bus(XenBackendInstance *backend); +const char *xen_backend_get_name(XenBackendInstance *backend); + +void xen_backend_set_device(XenBackendInstance *backend, + XenDevice *xendevice); +XenDevice *xen_backend_get_device(XenBackendInstance *backend); + +void xen_backend_register(const XenBackendInfo *info); +const char **xen_backend_get_types(unsigned int *nr); + +void xen_backend_device_create(XenBus *xenbus, const char *type, + const char *name, QDict *opts, Error **errp); +bool xen_backend_try_device_destroy(XenDevice *xendev, Error **errp); + +#endif /* HW_XEN_BACKEND_H */ diff --git a/include/hw/xen/xen-block.h b/include/hw/xen/xen-block.h new file mode 100644 index 000000000..d692ea758 --- /dev/null +++ b/include/hw/xen/xen-block.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_XEN_BLOCK_H +#define HW_XEN_BLOCK_H + +#include "hw/xen/xen-bus.h" +#include "hw/block/block.h" +#include "hw/block/dataplane/xen-block.h" +#include "sysemu/iothread.h" +#include "qom/object.h" + +typedef enum XenBlockVdevType { + XEN_BLOCK_VDEV_TYPE_INVALID, + XEN_BLOCK_VDEV_TYPE_DP, + XEN_BLOCK_VDEV_TYPE_XVD, + XEN_BLOCK_VDEV_TYPE_HD, + XEN_BLOCK_VDEV_TYPE_SD, + XEN_BLOCK_VDEV_TYPE__MAX +} XenBlockVdevType; + +typedef struct XenBlockVdev { + XenBlockVdevType type; + unsigned long disk; + unsigned long partition; + unsigned long number; +} XenBlockVdev; + + +typedef struct XenBlockProperties { + XenBlockVdev vdev; + BlockConf conf; + unsigned int max_ring_page_order; + IOThread *iothread; +} XenBlockProperties; + +typedef struct XenBlockDrive { + char *id; + char *node_name; +} XenBlockDrive; + +typedef struct XenBlockIOThread { + char *id; +} XenBlockIOThread; + +struct XenBlockDevice { + XenDevice xendev; + XenBlockProperties props; + const char *device_type; + unsigned int info; + XenBlockDataPlane *dataplane; + XenBlockDrive *drive; + XenBlockIOThread *iothread; +}; +typedef struct XenBlockDevice XenBlockDevice; + +typedef void (*XenBlockDeviceRealize)(XenBlockDevice *blockdev, Error **errp); +typedef void (*XenBlockDeviceUnrealize)(XenBlockDevice *blockdev); + +struct XenBlockDeviceClass { + /*< private >*/ + XenDeviceClass parent_class; + /*< public >*/ + XenBlockDeviceRealize realize; + XenBlockDeviceUnrealize unrealize; +}; + +#define TYPE_XEN_BLOCK_DEVICE "xen-block" +OBJECT_DECLARE_TYPE(XenBlockDevice, XenBlockDeviceClass, XEN_BLOCK_DEVICE) + +struct XenDiskDevice { + XenBlockDevice blockdev; +}; + +#define TYPE_XEN_DISK_DEVICE "xen-disk" +OBJECT_DECLARE_SIMPLE_TYPE(XenDiskDevice, XEN_DISK_DEVICE) + +struct XenCDRomDevice { + XenBlockDevice blockdev; +}; + +#define TYPE_XEN_CDROM_DEVICE "xen-cdrom" +OBJECT_DECLARE_SIMPLE_TYPE(XenCDRomDevice, XEN_CDROM_DEVICE) + +#endif /* HW_XEN_BLOCK_H */ diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h new file mode 100644 index 000000000..4c0f74744 --- /dev/null +++ b/include/hw/xen/xen-bus-helper.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_XEN_BUS_HELPER_H +#define HW_XEN_BUS_HELPER_H + +#include "hw/xen/xen_common.h" + +const char *xs_strstate(enum xenbus_state state); + +void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, struct xs_permissions perms[], + unsigned int nr_perms, Error **errp); +void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, Error **errp); + +/* Write to node/key unless node is empty, in which case write to key */ +void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, va_list ap) + GCC_FMT_ATTR(6, 0); +void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, ...) + GCC_FMT_ATTR(6, 7); + +/* Read from node/key unless node is empty, in which case read from key */ +int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, va_list ap); +int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, ...); + +/* Watch node/key unless node is empty, in which case watch key */ +void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, + char *token, Error **errp); +void xs_node_unwatch(struct xs_handle *xsh, const char *node, const char *key, + const char *token, Error **errp); + +#endif /* HW_XEN_BUS_HELPER_H */ diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h new file mode 100644 index 000000000..6bdbf3ff8 --- /dev/null +++ b/include/hw/xen/xen-bus.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_XEN_BUS_H +#define HW_XEN_BUS_H + +#include "hw/xen/xen_common.h" +#include "hw/sysbus.h" +#include "qemu/notify.h" +#include "qom/object.h" + +typedef void (*XenWatchHandler)(void *opaque); + +typedef struct XenWatchList XenWatchList; +typedef struct XenWatch XenWatch; +typedef struct XenEventChannel XenEventChannel; + +struct XenDevice { + DeviceState qdev; + domid_t frontend_id; + char *name; + struct xs_handle *xsh; + XenWatchList *watch_list; + char *backend_path, *frontend_path; + enum xenbus_state backend_state, frontend_state; + Notifier exit; + XenWatch *backend_state_watch, *frontend_state_watch; + bool backend_online; + XenWatch *backend_online_watch; + xengnttab_handle *xgth; + bool feature_grant_copy; + bool inactive; + QLIST_HEAD(, XenEventChannel) event_channels; + QLIST_ENTRY(XenDevice) list; +}; +typedef struct XenDevice XenDevice; + +typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp); +typedef void (*XenDeviceRealize)(XenDevice *xendev, Error **errp); +typedef void (*XenDeviceFrontendChanged)(XenDevice *xendev, + enum xenbus_state frontend_state, + Error **errp); +typedef void (*XenDeviceUnrealize)(XenDevice *xendev); + +struct XenDeviceClass { + /*< private >*/ + DeviceClass parent_class; + /*< public >*/ + const char *backend; + const char *device; + XenDeviceGetName get_name; + XenDeviceRealize realize; + XenDeviceFrontendChanged frontend_changed; + XenDeviceUnrealize unrealize; +}; + +#define TYPE_XEN_DEVICE "xen-device" +OBJECT_DECLARE_TYPE(XenDevice, XenDeviceClass, XEN_DEVICE) + +struct XenBus { + BusState qbus; + domid_t backend_id; + struct xs_handle *xsh; + XenWatchList *watch_list; + unsigned int backend_types; + XenWatch **backend_watch; + QLIST_HEAD(, XenDevice) inactive_devices; +}; + +struct XenBusClass { + /*< private >*/ + BusClass parent_class; +}; + +#define TYPE_XEN_BUS "xen-bus" +OBJECT_DECLARE_TYPE(XenBus, XenBusClass, + XEN_BUS) + +void xen_bus_init(void); + +void xen_device_backend_set_state(XenDevice *xendev, + enum xenbus_state state); +enum xenbus_state xen_device_backend_get_state(XenDevice *xendev); + +void xen_device_backend_printf(XenDevice *xendev, const char *key, + const char *fmt, ...) + GCC_FMT_ATTR(3, 4); +void xen_device_frontend_printf(XenDevice *xendev, const char *key, + const char *fmt, ...) + GCC_FMT_ATTR(3, 4); + +int xen_device_frontend_scanf(XenDevice *xendev, const char *key, + const char *fmt, ...); + +void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, + Error **errp); +void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, + unsigned int nr_refs, int prot, + Error **errp); +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, + unsigned int nr_refs, Error **errp); + +typedef struct XenDeviceGrantCopySegment { + union { + void *virt; + struct { + uint32_t ref; + off_t offset; + } foreign; + } source, dest; + size_t len; +} XenDeviceGrantCopySegment; + +void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, + XenDeviceGrantCopySegment segs[], + unsigned int nr_segs, Error **errp); + +typedef bool (*XenEventHandler)(void *opaque); + +XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, + unsigned int port, + XenEventHandler handler, + void *opaque, Error **errp); +void xen_device_set_event_channel_context(XenDevice *xendev, + XenEventChannel *channel, + AioContext *ctx, + Error **errp); +void xen_device_notify_event_channel(XenDevice *xendev, + XenEventChannel *channel, + Error **errp); +void xen_device_unbind_event_channel(XenDevice *xendev, + XenEventChannel *channel, + Error **errp); + +#endif /* HW_XEN_BUS_H */ diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h new file mode 100644 index 000000000..be281e1f3 --- /dev/null +++ b/include/hw/xen/xen-legacy-backend.h @@ -0,0 +1,105 @@ +#ifndef HW_XEN_LEGACY_BACKEND_H +#define HW_XEN_LEGACY_BACKEND_H + +#include "hw/xen/xen_common.h" +#include "hw/xen/xen_pvdev.h" +#include "net/net.h" +#include "qom/object.h" + +#define TYPE_XENSYSDEV "xen-sysdev" +#define TYPE_XENSYSBUS "xen-sysbus" +#define TYPE_XENBACKEND "xen-backend" + +typedef struct XenLegacyDevice XenLegacyDevice; +DECLARE_INSTANCE_CHECKER(XenLegacyDevice, XENBACKEND, + TYPE_XENBACKEND) + +/* variables */ +extern struct xs_handle *xenstore; +extern const char *xen_protocol; +extern DeviceState *xen_sysdev; +extern BusState *xen_sysbus; + +int xenstore_mkdir(char *path, int p); +int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node, + const char *val); +int xenstore_write_be_int(struct XenLegacyDevice *xendev, const char *node, + int ival); +int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node, + int64_t ival); +char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node); +int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node, + int *ival); +void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev); +void xenstore_update_be(char *watch, char *type, int dom, + struct XenDevOps *ops); +char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node); +int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node, + int *ival); +int xenstore_read_fe_uint64(struct XenLegacyDevice *xendev, const char *node, + uint64_t *uval); + +void xen_be_check_state(struct XenLegacyDevice *xendev); + +/* xen backend driver bits */ +int xen_be_init(void); +void xen_be_register_common(void); +int xen_be_register(const char *type, struct XenDevOps *ops); +int xen_be_set_state(struct XenLegacyDevice *xendev, enum xenbus_state state); +int xen_be_bind_evtchn(struct XenLegacyDevice *xendev); +void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, + unsigned int nr_refs); +void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, + unsigned int nr_refs, int prot); +void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, + unsigned int nr_refs); + +typedef struct XenGrantCopySegment { + union { + void *virt; + struct { + uint32_t ref; + off_t offset; + } foreign; + } source, dest; + size_t len; +} XenGrantCopySegment; + +int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, + bool to_domain, XenGrantCopySegment segs[], + unsigned int nr_segs); + +static inline void *xen_be_map_grant_ref(struct XenLegacyDevice *xendev, + uint32_t ref, int prot) +{ + return xen_be_map_grant_refs(xendev, &ref, 1, prot); +} + +static inline void xen_be_unmap_grant_ref(struct XenLegacyDevice *xendev, + void *ptr) +{ + return xen_be_unmap_grant_refs(xendev, ptr, 1); +} + +/* actual backend drivers */ +extern struct XenDevOps xen_console_ops; /* xen_console.c */ +extern struct XenDevOps xen_kbdmouse_ops; /* xen_framebuffer.c */ +extern struct XenDevOps xen_framebuffer_ops; /* xen_framebuffer.c */ +extern struct XenDevOps xen_blkdev_ops; /* xen_disk.c */ +#ifdef CONFIG_VIRTFS +extern struct XenDevOps xen_9pfs_ops; /* xen-9p-backend.c */ +#endif +extern struct XenDevOps xen_netdev_ops; /* xen_nic.c */ +#ifdef CONFIG_USB_LIBUSB +extern struct XenDevOps xen_usb_ops; /* xen-usb.c */ +#endif + +/* configuration (aka xenbus setup) */ +void xen_config_cleanup(void); +int xen_config_dev_blk(DriveInfo *disk); +int xen_config_dev_nic(NICInfo *nic); +int xen_config_dev_vfb(int vdev, const char *type); +int xen_config_dev_vkbd(int vdev); +int xen_config_dev_console(int vdev); + +#endif /* HW_XEN_LEGACY_BACKEND_H */ diff --git a/include/hw/xen/xen-x86.h b/include/hw/xen/xen-x86.h new file mode 100644 index 000000000..85e3db1b8 --- /dev/null +++ b/include/hw/xen/xen-x86.h @@ -0,0 +1,15 @@ +/* + * Xen X86-specific + * + * Copyright 2020 Red Hat, Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef QEMU_HW_XEN_X86_H +#define QEMU_HW_XEN_X86_H + +#include "hw/i386/pc.h" + +void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory); + +#endif /* QEMU_HW_XEN_X86_H */ diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h new file mode 100644 index 000000000..0f9962b1c --- /dev/null +++ b/include/hw/xen/xen.h @@ -0,0 +1,35 @@ +#ifndef QEMU_HW_XEN_H +#define QEMU_HW_XEN_H + +/* + * public xen header + * stuff needed outside xen-*.c, i.e. interfaces to qemu. + * must not depend on any xen headers being present in + * /usr/include/xen, so it can be included unconditionally. + */ + +#include "exec/cpu-common.h" + +/* xen-machine.c */ +enum xen_mode { + XEN_EMULATE = 0, // xen emulation, using xenner (default) + XEN_ATTACH // attach to xen domain created by libxl +}; + +extern uint32_t xen_domid; +extern enum xen_mode xen_mode; +extern bool xen_domid_restrict; + +int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); +void xen_piix3_set_irq(void *opaque, int irq_num, int level); +void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); +void xen_hvm_inject_msi(uint64_t addr, uint32_t data); +int xen_is_pirq_msi(uint32_t msi_data); + +qemu_irq *xen_interrupt_controller_init(void); + +void xenstore_store_pv_console_info(int i, Chardev *chr); + +void xen_register_framebuffer(struct MemoryRegion *mr); + +#endif /* QEMU_HW_XEN_H */ diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h new file mode 100644 index 000000000..82e56339d --- /dev/null +++ b/include/hw/xen/xen_common.h @@ -0,0 +1,690 @@ +#ifndef QEMU_HW_XEN_COMMON_H +#define QEMU_HW_XEN_COMMON_H + +/* + * If we have new enough libxenctrl then we do not want/need these compat + * interfaces, despite what the user supplied cflags might say. They + * must be undefined before including xenctrl.h + */ +#undef XC_WANT_COMPAT_EVTCHN_API +#undef XC_WANT_COMPAT_GNTTAB_API +#undef XC_WANT_COMPAT_MAP_FOREIGN_API + +#include +#include +#include "hw/xen/interface/io/xenbus.h" + +#include "hw/xen/xen.h" +#include "hw/pci/pci.h" +#include "hw/xen/trace.h" + +extern xc_interface *xen_xc; + +/* + * We don't support Xen prior to 4.2.0. + */ + +/* Xen 4.2 through 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +typedef xc_interface xenforeignmemory_handle; +typedef xc_evtchn xenevtchn_handle; +typedef xc_gnttab xengnttab_handle; +typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; + +#define xenevtchn_open(l, f) xc_evtchn_open(l, f); +#define xenevtchn_close(h) xc_evtchn_close(h) +#define xenevtchn_fd(h) xc_evtchn_fd(h) +#define xenevtchn_pending(h) xc_evtchn_pending(h) +#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p) +#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p) +#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) +#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) + +#define xengnttab_open(l, f) xc_gnttab_open(l, f) +#define xengnttab_close(h) xc_gnttab_close(h) +#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n) +#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p) +#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n) +#define xengnttab_map_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_grant_refs(h, c, d, r, p) +#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_domain_grant_refs(h, c, d, r, p) + +#define xenforeignmemory_open(l, f) xen_xc +#define xenforeignmemory_close(h) + +static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom, + int prot, size_t pages, + const xen_pfn_t arr[/*pages*/], + int err[/*pages*/]) +{ + if (err) + return xc_map_foreign_bulk(h, dom, prot, arr, err, pages); + else + return xc_map_foreign_pages(h, dom, prot, arr, pages); +} + +#define xenforeignmemory_unmap(h, p, s) munmap(p, s * XC_PAGE_SIZE) + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +#include +#include +#include + +#endif + +extern xenforeignmemory_handle *xen_fmem; + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40900 + +typedef xc_interface xendevicemodel_handle; + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40900 */ + +#undef XC_WANT_COMPAT_DEVICEMODEL_API +#include + +#endif + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 + +static inline int xendevicemodel_relocate_memory( + xendevicemodel_handle *dmod, domid_t domid, uint32_t size, uint64_t src_gfn, + uint64_t dst_gfn) +{ + uint32_t i; + int rc; + + for (i = 0; i < size; i++) { + unsigned long idx = src_gfn + i; + xen_pfn_t gpfn = dst_gfn + i; + + rc = xc_domain_add_to_physmap(xen_xc, domid, XENMAPSPACE_gmfn, idx, + gpfn); + if (rc) { + return rc; + } + } + + return 0; +} + +static inline int xendevicemodel_pin_memory_cacheattr( + xendevicemodel_handle *dmod, domid_t domid, uint64_t start, uint64_t end, + uint32_t type) +{ + return xc_domain_pin_memory_cacheattr(xen_xc, domid, start, end, type); +} + +typedef void xenforeignmemory_resource_handle; + +#define XENMEM_resource_ioreq_server 0 + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + +static inline xenforeignmemory_resource_handle *xenforeignmemory_map_resource( + xenforeignmemory_handle *fmem, domid_t domid, unsigned int type, + unsigned int id, unsigned long frame, unsigned long nr_frames, + void **paddr, int prot, int flags) +{ + errno = EOPNOTSUPP; + return NULL; +} + +#endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 */ + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000 + +#define XEN_COMPAT_PHYSMAP +static inline void *xenforeignmemory_map2(xenforeignmemory_handle *h, + uint32_t dom, void *addr, + int prot, int flags, size_t pages, + const xen_pfn_t arr[/*pages*/], + int err[/*pages*/]) +{ + assert(addr == NULL && flags == 0); + return xenforeignmemory_map(h, dom, prot, pages, arr, err); +} + +static inline int xentoolcore_restrict_all(domid_t domid) +{ + errno = ENOTTY; + return -1; +} + +static inline int xendevicemodel_shutdown(xendevicemodel_handle *dmod, + domid_t domid, unsigned int reason) +{ + errno = ENOTTY; + return -1; +} + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 41000 */ + +#include + +#endif + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40900 + +static inline xendevicemodel_handle *xendevicemodel_open( + struct xentoollog_logger *logger, unsigned int open_flags) +{ + return xen_xc; +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40500 + +static inline int xendevicemodel_create_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, int handle_bufioreq, + ioservid_t *id) +{ + return xc_hvm_create_ioreq_server(dmod, domid, handle_bufioreq, + id); +} + +static inline int xendevicemodel_get_ioreq_server_info( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, + xen_pfn_t *ioreq_pfn, xen_pfn_t *bufioreq_pfn, + evtchn_port_t *bufioreq_port) +{ + return xc_hvm_get_ioreq_server_info(dmod, domid, id, ioreq_pfn, + bufioreq_pfn, bufioreq_port); +} + +static inline int xendevicemodel_map_io_range_to_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int is_mmio, + uint64_t start, uint64_t end) +{ + return xc_hvm_map_io_range_to_ioreq_server(dmod, domid, id, is_mmio, + start, end); +} + +static inline int xendevicemodel_unmap_io_range_from_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int is_mmio, + uint64_t start, uint64_t end) +{ + return xc_hvm_unmap_io_range_from_ioreq_server(dmod, domid, id, is_mmio, + start, end); +} + +static inline int xendevicemodel_map_pcidev_to_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, + uint16_t segment, uint8_t bus, uint8_t device, uint8_t function) +{ + return xc_hvm_map_pcidev_to_ioreq_server(dmod, domid, id, segment, + bus, device, function); +} + +static inline int xendevicemodel_unmap_pcidev_from_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, + uint16_t segment, uint8_t bus, uint8_t device, uint8_t function) +{ + return xc_hvm_unmap_pcidev_from_ioreq_server(dmod, domid, id, segment, + bus, device, function); +} + +static inline int xendevicemodel_destroy_ioreq_server( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id) +{ + return xc_hvm_destroy_ioreq_server(dmod, domid, id); +} + +static inline int xendevicemodel_set_ioreq_server_state( + xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int enabled) +{ + return xc_hvm_set_ioreq_server_state(dmod, domid, id, enabled); +} + +#endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40500 */ + +static inline int xendevicemodel_set_pci_intx_level( + xendevicemodel_handle *dmod, domid_t domid, uint16_t segment, + uint8_t bus, uint8_t device, uint8_t intx, unsigned int level) +{ + return xc_hvm_set_pci_intx_level(dmod, domid, segment, bus, device, + intx, level); +} + +static inline int xendevicemodel_set_isa_irq_level( + xendevicemodel_handle *dmod, domid_t domid, uint8_t irq, + unsigned int level) +{ + return xc_hvm_set_isa_irq_level(dmod, domid, irq, level); +} + +static inline int xendevicemodel_set_pci_link_route( + xendevicemodel_handle *dmod, domid_t domid, uint8_t link, uint8_t irq) +{ + return xc_hvm_set_pci_link_route(dmod, domid, link, irq); +} + +static inline int xendevicemodel_inject_msi( + xendevicemodel_handle *dmod, domid_t domid, uint64_t msi_addr, + uint32_t msi_data) +{ + return xc_hvm_inject_msi(dmod, domid, msi_addr, msi_data); +} + +static inline int xendevicemodel_track_dirty_vram( + xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn, + uint32_t nr, unsigned long *dirty_bitmap) +{ + return xc_hvm_track_dirty_vram(dmod, domid, first_pfn, nr, + dirty_bitmap); +} + +static inline int xendevicemodel_modified_memory( + xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn, + uint32_t nr) +{ + return xc_hvm_modified_memory(dmod, domid, first_pfn, nr); +} + +static inline int xendevicemodel_set_mem_type( + xendevicemodel_handle *dmod, domid_t domid, hvmmem_type_t mem_type, + uint64_t first_pfn, uint32_t nr) +{ + return xc_hvm_set_mem_type(dmod, domid, mem_type, first_pfn, nr); +} + +#endif + +extern xendevicemodel_handle *xen_dmod; + +static inline int xen_set_mem_type(domid_t domid, hvmmem_type_t type, + uint64_t first_pfn, uint32_t nr) +{ + return xendevicemodel_set_mem_type(xen_dmod, domid, type, first_pfn, + nr); +} + +static inline int xen_set_pci_intx_level(domid_t domid, uint16_t segment, + uint8_t bus, uint8_t device, + uint8_t intx, unsigned int level) +{ + return xendevicemodel_set_pci_intx_level(xen_dmod, domid, segment, bus, + device, intx, level); +} + +static inline int xen_set_pci_link_route(domid_t domid, uint8_t link, + uint8_t irq) +{ + return xendevicemodel_set_pci_link_route(xen_dmod, domid, link, irq); +} + +static inline int xen_inject_msi(domid_t domid, uint64_t msi_addr, + uint32_t msi_data) +{ + return xendevicemodel_inject_msi(xen_dmod, domid, msi_addr, msi_data); +} + +static inline int xen_set_isa_irq_level(domid_t domid, uint8_t irq, + unsigned int level) +{ + return xendevicemodel_set_isa_irq_level(xen_dmod, domid, irq, level); +} + +static inline int xen_track_dirty_vram(domid_t domid, uint64_t first_pfn, + uint32_t nr, unsigned long *bitmap) +{ + return xendevicemodel_track_dirty_vram(xen_dmod, domid, first_pfn, nr, + bitmap); +} + +static inline int xen_modified_memory(domid_t domid, uint64_t first_pfn, + uint32_t nr) +{ + return xendevicemodel_modified_memory(xen_dmod, domid, first_pfn, nr); +} + +static inline int xen_restrict(domid_t domid) +{ + int rc; + rc = xentoolcore_restrict_all(domid); + trace_xen_domid_restrict(rc ? errno : 0); + return rc; +} + +void destroy_hvm_domain(bool reboot); + +/* shutdown/destroy current domain because of an error */ +void xen_shutdown_fatal_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +#ifdef HVM_PARAM_VMPORT_REGS_PFN +static inline int xen_get_vmport_regs_pfn(xc_interface *xc, domid_t dom, + xen_pfn_t *vmport_regs_pfn) +{ + int rc; + uint64_t value; + rc = xc_hvm_param_get(xc, dom, HVM_PARAM_VMPORT_REGS_PFN, &value); + if (rc >= 0) { + *vmport_regs_pfn = (xen_pfn_t) value; + } + return rc; +} +#else +static inline int xen_get_vmport_regs_pfn(xc_interface *xc, domid_t dom, + xen_pfn_t *vmport_regs_pfn) +{ + return -ENOSYS; +} +#endif + +/* Xen before 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40600 + +#ifndef HVM_IOREQSRV_BUFIOREQ_ATOMIC +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 +#endif + +#endif + +static inline int xen_get_default_ioreq_server_info(domid_t dom, + xen_pfn_t *ioreq_pfn, + xen_pfn_t *bufioreq_pfn, + evtchn_port_t + *bufioreq_evtchn) +{ + unsigned long param; + int rc; + + rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_IOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n"); + return -1; + } + + *ioreq_pfn = param; + + rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_BUFIOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n"); + return -1; + } + + *bufioreq_pfn = param; + + rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_BUFIOREQ_EVTCHN, + ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); + return -1; + } + + *bufioreq_evtchn = param; + + return 0; +} + +/* Xen before 4.5 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40500 + +#ifndef HVM_PARAM_BUFIOREQ_EVTCHN +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 +#endif + +#define IOREQ_TYPE_PCI_CONFIG 2 + +typedef uint16_t ioservid_t; + +static inline void xen_map_memory_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_unmap_memory_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_map_io_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_unmap_io_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_map_pcidev(domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ +} + +static inline void xen_unmap_pcidev(domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ +} + +static inline void xen_create_ioreq_server(domid_t dom, + ioservid_t *ioservid) +{ +} + +static inline void xen_destroy_ioreq_server(domid_t dom, + ioservid_t ioservid) +{ +} + +static inline int xen_get_ioreq_server_info(domid_t dom, + ioservid_t ioservid, + xen_pfn_t *ioreq_pfn, + xen_pfn_t *bufioreq_pfn, + evtchn_port_t *bufioreq_evtchn) +{ + return xen_get_default_ioreq_server_info(dom, ioreq_pfn, + bufioreq_pfn, + bufioreq_evtchn); +} + +static inline int xen_set_ioreq_server_state(domid_t dom, + ioservid_t ioservid, + bool enable) +{ + return 0; +} + +/* Xen 4.5 */ +#else + +static bool use_default_ioreq_server; + +static inline void xen_map_memory_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + if (use_default_ioreq_server) { + return; + } + + trace_xen_map_mmio_range(ioservid, start_addr, end_addr); + xendevicemodel_map_io_range_to_ioreq_server(xen_dmod, dom, ioservid, 1, + start_addr, end_addr); +} + +static inline void xen_unmap_memory_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + if (use_default_ioreq_server) { + return; + } + + trace_xen_unmap_mmio_range(ioservid, start_addr, end_addr); + xendevicemodel_unmap_io_range_from_ioreq_server(xen_dmod, dom, ioservid, + 1, start_addr, end_addr); +} + +static inline void xen_map_io_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + if (use_default_ioreq_server) { + return; + } + + trace_xen_map_portio_range(ioservid, start_addr, end_addr); + xendevicemodel_map_io_range_to_ioreq_server(xen_dmod, dom, ioservid, 0, + start_addr, end_addr); +} + +static inline void xen_unmap_io_section(domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + if (use_default_ioreq_server) { + return; + } + + trace_xen_unmap_portio_range(ioservid, start_addr, end_addr); + xendevicemodel_unmap_io_range_from_ioreq_server(xen_dmod, dom, ioservid, + 0, start_addr, end_addr); +} + +static inline void xen_map_pcidev(domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ + if (use_default_ioreq_server) { + return; + } + + trace_xen_map_pcidev(ioservid, pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xendevicemodel_map_pcidev_to_ioreq_server(xen_dmod, dom, ioservid, 0, + pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} + +static inline void xen_unmap_pcidev(domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ + if (use_default_ioreq_server) { + return; + } + + trace_xen_unmap_pcidev(ioservid, pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xendevicemodel_unmap_pcidev_from_ioreq_server(xen_dmod, dom, ioservid, 0, + pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} + +static inline void xen_create_ioreq_server(domid_t dom, + ioservid_t *ioservid) +{ + int rc = xendevicemodel_create_ioreq_server(xen_dmod, dom, + HVM_IOREQSRV_BUFIOREQ_ATOMIC, + ioservid); + + if (rc == 0) { + trace_xen_ioreq_server_create(*ioservid); + return; + } + + *ioservid = 0; + use_default_ioreq_server = true; + trace_xen_default_ioreq_server(); +} + +static inline void xen_destroy_ioreq_server(domid_t dom, + ioservid_t ioservid) +{ + if (use_default_ioreq_server) { + return; + } + + trace_xen_ioreq_server_destroy(ioservid); + xendevicemodel_destroy_ioreq_server(xen_dmod, dom, ioservid); +} + +static inline int xen_get_ioreq_server_info(domid_t dom, + ioservid_t ioservid, + xen_pfn_t *ioreq_pfn, + xen_pfn_t *bufioreq_pfn, + evtchn_port_t *bufioreq_evtchn) +{ + if (use_default_ioreq_server) { + return xen_get_default_ioreq_server_info(dom, ioreq_pfn, + bufioreq_pfn, + bufioreq_evtchn); + } + + return xendevicemodel_get_ioreq_server_info(xen_dmod, dom, ioservid, + ioreq_pfn, bufioreq_pfn, + bufioreq_evtchn); +} + +static inline int xen_set_ioreq_server_state(domid_t dom, + ioservid_t ioservid, + bool enable) +{ + if (use_default_ioreq_server) { + return 0; + } + + trace_xen_ioreq_server_state(ioservid, enable); + return xendevicemodel_set_ioreq_server_state(xen_dmod, dom, ioservid, + enable); +} + +#endif + +/* Xen before 4.8 */ + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40800 + +struct xengnttab_grant_copy_segment { + union xengnttab_copy_ptr { + void *virt; + struct { + uint32_t ref; + uint16_t offset; + uint16_t domid; + } foreign; + } source, dest; + uint16_t len; + uint16_t flags; + int16_t status; +}; + +typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t; + +static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count, + xengnttab_grant_copy_segment_t *segs) +{ + return -ENOSYS; +} +#endif + +#endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h new file mode 100644 index 000000000..83e5174d9 --- /dev/null +++ b/include/hw/xen/xen_pvdev.h @@ -0,0 +1,81 @@ +#ifndef QEMU_HW_XEN_PVDEV_H +#define QEMU_HW_XEN_PVDEV_H + +#include "hw/xen/xen_common.h" +/* ------------------------------------------------------------- */ + +#define XEN_BUFSIZE 1024 + +struct XenLegacyDevice; + +/* driver uses grant tables -> open gntdev device (xendev->gnttabdev) */ +#define DEVOPS_FLAG_NEED_GNTDEV 1 +/* don't expect frontend doing correct state transitions (aka console quirk) */ +#define DEVOPS_FLAG_IGNORE_STATE 2 + +struct XenDevOps { + size_t size; + uint32_t flags; + void (*alloc)(struct XenLegacyDevice *xendev); + int (*init)(struct XenLegacyDevice *xendev); + int (*initialise)(struct XenLegacyDevice *xendev); + void (*connected)(struct XenLegacyDevice *xendev); + void (*event)(struct XenLegacyDevice *xendev); + void (*disconnect)(struct XenLegacyDevice *xendev); + int (*free)(struct XenLegacyDevice *xendev); + void (*backend_changed)(struct XenLegacyDevice *xendev, + const char *node); + void (*frontend_changed)(struct XenLegacyDevice *xendev, + const char *node); + int (*backend_register)(void); +}; + +struct XenLegacyDevice { + DeviceState qdev; + const char *type; + int dom; + int dev; + char name[64]; + int debug; + + enum xenbus_state be_state; + enum xenbus_state fe_state; + int online; + char be[XEN_BUFSIZE]; + char *fe; + char *protocol; + int remote_port; + int local_port; + + xenevtchn_handle *evtchndev; + xengnttab_handle *gnttabdev; + + struct XenDevOps *ops; + QTAILQ_ENTRY(XenLegacyDevice) next; +}; + +/* ------------------------------------------------------------- */ + +/* xenstore helper functions */ +int xenstore_write_str(const char *base, const char *node, const char *val); +int xenstore_write_int(const char *base, const char *node, int ival); +int xenstore_write_int64(const char *base, const char *node, int64_t ival); +char *xenstore_read_str(const char *base, const char *node); +int xenstore_read_int(const char *base, const char *node, int *ival); +int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval); +void xenstore_update(void *unused); + +const char *xenbus_strstate(enum xenbus_state state); + +void xen_pv_evtchn_event(void *opaque); +void xen_pv_insert_xendev(struct XenLegacyDevice *xendev); +void xen_pv_del_xendev(struct XenLegacyDevice *xendev); +struct XenLegacyDevice *xen_pv_find_xendev(const char *type, int dom, int dev); + +void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev); +int xen_pv_send_notify(struct XenLegacyDevice *xendev); + +void xen_pv_printf(struct XenLegacyDevice *xendev, int msg_level, + const char *fmt, ...) GCC_FMT_ATTR(3, 4); + +#endif /* QEMU_HW_XEN_PVDEV_H */ diff --git a/include/hw/xtensa/mx_pic.h b/include/hw/xtensa/mx_pic.h new file mode 100644 index 000000000..500424c8d --- /dev/null +++ b/include/hw/xtensa/mx_pic.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2013 - 2019, Max Filippov, Open Source and Linux Lab. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Open Source and Linux Lab nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef XTENSA_MX_PIC_H +#define XTENSA_MX_PIC_H + +#include "exec/memory.h" + +struct XtensaMxPic; +typedef struct XtensaMxPic XtensaMxPic; + +XtensaMxPic *xtensa_mx_pic_init(unsigned n_extint); +void xtensa_mx_pic_reset(void *opaque); +MemoryRegion *xtensa_mx_pic_register_cpu(XtensaMxPic *mx, + qemu_irq *irq, + qemu_irq runstall); +qemu_irq *xtensa_mx_pic_get_extints(XtensaMxPic *mx); + +#endif diff --git a/include/hw/xtensa/xtensa-isa.h b/include/hw/xtensa/xtensa-isa.h new file mode 100644 index 000000000..a289531bd --- /dev/null +++ b/include/hw/xtensa/xtensa-isa.h @@ -0,0 +1,836 @@ +/* Interface definition for configurable Xtensa ISA support. + * + * Copyright (c) 2001-2013 Tensilica Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HW_XTENSA_XTENSA_ISA_H +#define HW_XTENSA_XTENSA_ISA_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Version number: This is intended to help support code that works with + * versions of this library from multiple Xtensa releases. + */ + +#define XTENSA_ISA_VERSION 7000 + +/* + * This file defines the interface to the Xtensa ISA library. This + * library contains most of the ISA-specific information for a + * particular Xtensa processor. For example, the set of valid + * instructions, their opcode encodings and operand fields are all + * included here. + * + * This interface basically defines a number of abstract data types. + * + * . an instruction buffer - for holding the raw instruction bits + * . ISA info - information about the ISA as a whole + * . instruction formats - instruction size and slot structure + * . opcodes - information about individual instructions + * . operands - information about register and immediate instruction operands + * . stateOperands - information about processor state instruction operands + * . interfaceOperands - information about interface instruction operands + * . register files - register file information + * . processor states - internal processor state information + * . system registers - "special registers" and "user registers" + * . interfaces - TIE interfaces that are external to the processor + * . functional units - TIE shared functions + * + * The interface defines a set of functions to access each data type. + * With the exception of the instruction buffer, the internal + * representations of the data structures are hidden. All accesses must + * be made through the functions defined here. + */ + +typedef struct xtensa_isa_opaque { int unused; } *xtensa_isa; + + +/* + * Most of the Xtensa ISA entities (e.g., opcodes, regfiles, etc.) are + * represented here using sequential integers beginning with 0. The + * specific values are only fixed for a particular instantiation of an + * xtensa_isa structure, so these values should only be used + * internally. + */ + +typedef int xtensa_opcode; +typedef int xtensa_format; +typedef int xtensa_regfile; +typedef int xtensa_state; +typedef int xtensa_sysreg; +typedef int xtensa_interface; +typedef int xtensa_funcUnit; + + +/* Define a unique value for undefined items. */ + +#define XTENSA_UNDEFINED -1 + + +/* + * Overview of using this interface to decode/encode instructions: + * + * Each Xtensa instruction is associated with a particular instruction + * format, where the format defines a fixed number of slots for + * operations. The formats for the core Xtensa ISA have only one slot, + * but FLIX instructions may have multiple slots. Within each slot, + * there is a single opcode and some number of associated operands. + * + * The encoding and decoding functions operate on instruction buffers, + * not on the raw bytes of the instructions. The same instruction + * buffer data structure is used for both entire instructions and + * individual slots in those instructions -- the contents of a slot need + * to be extracted from or inserted into the buffer for the instruction + * as a whole. + * + * Decoding an instruction involves first finding the format, which + * identifies the number of slots, and then decoding each slot + * separately. A slot is decoded by finding the opcode and then using + * the opcode to determine how many operands there are. For example: + * + * xtensa_insnbuf_from_chars + * xtensa_format_decode + * for each slot { + * xtensa_format_get_slot + * xtensa_opcode_decode + * for each operand { + * xtensa_operand_get_field + * xtensa_operand_decode + * } + * } + * + * Encoding an instruction is roughly the same procedure in reverse: + * + * xtensa_format_encode + * for each slot { + * xtensa_opcode_encode + * for each operand { + * xtensa_operand_encode + * xtensa_operand_set_field + * } + * xtensa_format_set_slot + * } + * xtensa_insnbuf_to_chars + */ + + +/* Error handling. */ + +/* + * Error codes. The code for the most recent error condition can be + * retrieved with the "errno" function. For any result other than + * xtensa_isa_ok, an error message containing additional information + * about the problem can be retrieved using the "error_msg" function. + * The error messages are stored in an internal buffer, which should + * not be freed and may be overwritten by subsequent operations. + */ + +typedef enum xtensa_isa_status_enum { + xtensa_isa_ok = 0, + xtensa_isa_bad_format, + xtensa_isa_bad_slot, + xtensa_isa_bad_opcode, + xtensa_isa_bad_operand, + xtensa_isa_bad_field, + xtensa_isa_bad_iclass, + xtensa_isa_bad_regfile, + xtensa_isa_bad_sysreg, + xtensa_isa_bad_state, + xtensa_isa_bad_interface, + xtensa_isa_bad_funcUnit, + xtensa_isa_wrong_slot, + xtensa_isa_no_field, + xtensa_isa_out_of_memory, + xtensa_isa_buffer_overflow, + xtensa_isa_internal_error, + xtensa_isa_bad_value +} xtensa_isa_status; + +xtensa_isa_status xtensa_isa_errno(xtensa_isa isa); + +char *xtensa_isa_error_msg(xtensa_isa isa); + + + +/* Instruction buffers. */ + +typedef uint32_t xtensa_insnbuf_word; +typedef xtensa_insnbuf_word *xtensa_insnbuf; + + +/* Get the size in "insnbuf_words" of the xtensa_insnbuf array. */ + +int xtensa_insnbuf_size(xtensa_isa isa); + + +/* Allocate an xtensa_insnbuf of the right size. */ + +xtensa_insnbuf xtensa_insnbuf_alloc(xtensa_isa isa); + + +/* Release an xtensa_insnbuf. */ + +void xtensa_insnbuf_free(xtensa_isa isa, xtensa_insnbuf buf); + + +/* + * Conversion between raw memory (char arrays) and our internal + * instruction representation. This is complicated by the Xtensa ISA's + * variable instruction lengths. When converting to chars, the buffer + * must contain a valid instruction so we know how many bytes to copy; + * thus, the "to_chars" function returns the number of bytes copied or + * XTENSA_UNDEFINED on error. The "from_chars" function first reads the + * minimal number of bytes required to decode the instruction length and + * then proceeds to copy the entire instruction into the buffer; if the + * memory does not contain a valid instruction, it copies the maximum + * number of bytes required for the longest Xtensa instruction. The + * "num_chars" argument may be used to limit the number of bytes that + * can be read or written. Otherwise, if "num_chars" is zero, the + * functions may read or write past the end of the code. + */ + +int xtensa_insnbuf_to_chars(xtensa_isa isa, const xtensa_insnbuf insn, + unsigned char *cp, int num_chars); + +void xtensa_insnbuf_from_chars(xtensa_isa isa, xtensa_insnbuf insn, + const unsigned char *cp, int num_chars); + + + +/* ISA information. */ + +/* Initialize the ISA information. */ + +xtensa_isa xtensa_isa_init(void *xtensa_modules, xtensa_isa_status *errno_p, + char **error_msg_p); + + +/* Deallocate an xtensa_isa structure. */ + +void xtensa_isa_free(xtensa_isa isa); + + +/* Get the maximum instruction size in bytes. */ + +int xtensa_isa_maxlength(xtensa_isa isa); + + +/* + * Decode the length in bytes of an instruction in raw memory (not an + * insnbuf). This function reads only the minimal number of bytes + * required to decode the instruction length. Returns + * XTENSA_UNDEFINED on error. + */ + +int xtensa_isa_length_from_chars(xtensa_isa isa, const unsigned char *cp); + + +/* + * Get the number of stages in the processor's pipeline. The pipeline + * stage values returned by other functions in this library will range + * from 0 to N-1, where N is the value returned by this function. + * Note that the stage numbers used here may not correspond to the + * actual processor hardware, e.g., the hardware may have additional + * stages before stage 0. Returns XTENSA_UNDEFINED on error. + */ + +int xtensa_isa_num_pipe_stages(xtensa_isa isa); + + +/* Get the number of various entities that are defined for this processor. */ + +int xtensa_isa_num_formats(xtensa_isa isa); + +int xtensa_isa_num_opcodes(xtensa_isa isa); + +int xtensa_isa_num_regfiles(xtensa_isa isa); + +int xtensa_isa_num_states(xtensa_isa isa); + +int xtensa_isa_num_sysregs(xtensa_isa isa); + +int xtensa_isa_num_interfaces(xtensa_isa isa); + +int xtensa_isa_num_funcUnits(xtensa_isa isa); + + + +/* Instruction formats. */ + +/* Get the name of a format. Returns null on error. */ + +const char *xtensa_format_name(xtensa_isa isa, xtensa_format fmt); + + +/* + * Given a format name, return the format number. Returns + * XTENSA_UNDEFINED if the name is not a valid format. + */ + +xtensa_format xtensa_format_lookup(xtensa_isa isa, const char *fmtname); + + +/* + * Decode the instruction format from a binary instruction buffer. + * Returns XTENSA_UNDEFINED if the format is not recognized. + */ + +xtensa_format xtensa_format_decode(xtensa_isa isa, const xtensa_insnbuf insn); + + +/* + * Set the instruction format field(s) in a binary instruction buffer. + * All the other fields are set to zero. Returns non-zero on error. + */ + +int xtensa_format_encode(xtensa_isa isa, xtensa_format fmt, + xtensa_insnbuf insn); + + +/* + * Find the length (in bytes) of an instruction. Returns + * XTENSA_UNDEFINED on error. + */ + +int xtensa_format_length(xtensa_isa isa, xtensa_format fmt); + + +/* + * Get the number of slots in an instruction. Returns XTENSA_UNDEFINED + * on error. + */ + +int xtensa_format_num_slots(xtensa_isa isa, xtensa_format fmt); + + +/* + * Get the opcode for a no-op in a particular slot. + * Returns XTENSA_UNDEFINED on error. + */ + +xtensa_opcode xtensa_format_slot_nop_opcode(xtensa_isa isa, xtensa_format fmt, + int slot); + + +/* + * Get the bits for a specified slot out of an insnbuf for the + * instruction as a whole and put them into an insnbuf for that one + * slot, and do the opposite to set a slot. Return non-zero on error. + */ + +int xtensa_format_get_slot(xtensa_isa isa, xtensa_format fmt, int slot, + const xtensa_insnbuf insn, xtensa_insnbuf slotbuf); + +int xtensa_format_set_slot(xtensa_isa isa, xtensa_format fmt, int slot, + xtensa_insnbuf insn, const xtensa_insnbuf slotbuf); + + + +/* Opcode information. */ + +/* + * Translate a mnemonic name to an opcode. Returns XTENSA_UNDEFINED if + * the name is not a valid opcode mnemonic. + */ + +xtensa_opcode xtensa_opcode_lookup(xtensa_isa isa, const char *opname); + + +/* + * Decode the opcode for one instruction slot from a binary instruction + * buffer. Returns the opcode or XTENSA_UNDEFINED if the opcode is + * illegal. + */ + +xtensa_opcode xtensa_opcode_decode(xtensa_isa isa, xtensa_format fmt, int slot, + const xtensa_insnbuf slotbuf); + + +/* + * Set the opcode field(s) for an instruction slot. All other fields + * in the slot are set to zero. Returns non-zero if the opcode cannot + * be encoded. + */ + +int xtensa_opcode_encode(xtensa_isa isa, xtensa_format fmt, int slot, + xtensa_insnbuf slotbuf, xtensa_opcode opc); + + +/* Get the mnemonic name for an opcode. Returns null on error. */ + +const char *xtensa_opcode_name(xtensa_isa isa, xtensa_opcode opc); + + +/* Check various properties of opcodes. These functions return 0 if + * the condition is false, 1 if the condition is true, and + * XTENSA_UNDEFINED on error. The instructions are classified as + * follows: + * + * branch: conditional branch; may fall through to next instruction (B*) + * jump: unconditional branch (J, JX, RET*, RF*) + * loop: zero-overhead loop (LOOP*) + * call: unconditional call; control returns to next instruction (CALL*) + * + * For the opcodes that affect control flow in some way, the branch + * target may be specified by an immediate operand or it may be an + * address stored in a register. You can distinguish these by + * checking if the instruction has a PC-relative immediate + * operand. + */ + +int xtensa_opcode_is_branch(xtensa_isa isa, xtensa_opcode opc); + +int xtensa_opcode_is_jump(xtensa_isa isa, xtensa_opcode opc); + +int xtensa_opcode_is_loop(xtensa_isa isa, xtensa_opcode opc); + +int xtensa_opcode_is_call(xtensa_isa isa, xtensa_opcode opc); + + +/* + * Find the number of ordinary operands, state operands, and interface + * operands for an instruction. These return XTENSA_UNDEFINED on + * error. + */ + +int xtensa_opcode_num_operands(xtensa_isa isa, xtensa_opcode opc); + +int xtensa_opcode_num_stateOperands(xtensa_isa isa, xtensa_opcode opc); + +int xtensa_opcode_num_interfaceOperands(xtensa_isa isa, xtensa_opcode opc); + + +/* + * Get functional unit usage requirements for an opcode. Each "use" + * is identified by a pair. The + * "num_funcUnit_uses" function returns the number of these "uses" or + * XTENSA_UNDEFINED on error. The "funcUnit_use" function returns + * a pointer to a "use" pair or null on error. + */ + +typedef struct xtensa_funcUnit_use_struct { + xtensa_funcUnit unit; + int stage; +} xtensa_funcUnit_use; + +int xtensa_opcode_num_funcUnit_uses(xtensa_isa isa, xtensa_opcode opc); + +xtensa_funcUnit_use *xtensa_opcode_funcUnit_use(xtensa_isa isa, + xtensa_opcode opc, int u); + + + +/* Operand information. */ + +/* Get the name of an operand. Returns null on error. */ + +const char *xtensa_operand_name(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * Some operands are "invisible", i.e., not explicitly specified in + * assembly language. When assembling an instruction, you need not set + * the values of invisible operands, since they are either hardwired or + * derived from other field values. The values of invisible operands + * can be examined in the same way as other operands, but remember that + * an invisible operand may get its value from another visible one, so + * the entire instruction must be available before examining the + * invisible operand values. This function returns 1 if an operand is + * visible, 0 if it is invisible, or XTENSA_UNDEFINED on error. Note + * that whether an operand is visible is orthogonal to whether it is + * "implicit", i.e., whether it is encoded in a field in the + * instruction. + */ + +int xtensa_operand_is_visible(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * Check if an operand is an input ('i'), output ('o'), or inout ('m') + * operand. Note: The output operand of a conditional assignment + * (e.g., movnez) appears here as an inout ('m') even if it is declared + * in the TIE code as an output ('o'); this allows the compiler to + * properly handle register allocation for conditional assignments. + * Returns 0 on error. + */ + +char xtensa_operand_inout(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * Get and set the raw (encoded) value of the field for the specified + * operand. The "set" function does not check if the value fits in the + * field; that is done by the "encode" function below. Both of these + * functions return non-zero on error, e.g., if the field is not defined + * for the specified slot. + */ + +int xtensa_operand_get_field(xtensa_isa isa, xtensa_opcode opc, int opnd, + xtensa_format fmt, int slot, + const xtensa_insnbuf slotbuf, uint32_t *valp); + +int xtensa_operand_set_field(xtensa_isa isa, xtensa_opcode opc, int opnd, + xtensa_format fmt, int slot, + xtensa_insnbuf slotbuf, uint32_t val); + + +/* + * Encode and decode operands. The raw bits in the operand field may + * be encoded in a variety of different ways. These functions hide + * the details of that encoding. The result values are returned through + * the argument pointer. The return value is non-zero on error. + */ + +int xtensa_operand_encode(xtensa_isa isa, xtensa_opcode opc, int opnd, + uint32_t *valp); + +int xtensa_operand_decode(xtensa_isa isa, xtensa_opcode opc, int opnd, + uint32_t *valp); + + +/* + * An operand may be either a register operand or an immediate of some + * sort (e.g., PC-relative or not). The "is_register" function returns + * 0 if the operand is an immediate, 1 if it is a register, and + * XTENSA_UNDEFINED on error. The "regfile" function returns the + * regfile for a register operand, or XTENSA_UNDEFINED on error. + */ + +int xtensa_operand_is_register(xtensa_isa isa, xtensa_opcode opc, int opnd); + +xtensa_regfile xtensa_operand_regfile(xtensa_isa isa, xtensa_opcode opc, + int opnd); + + +/* + * Register operands may span multiple consecutive registers, e.g., a + * 64-bit data type may occupy two 32-bit registers. Only the first + * register is encoded in the operand field. This function specifies + * the number of consecutive registers occupied by this operand. For + * non-register operands, the return value is undefined. Returns + * XTENSA_UNDEFINED on error. + */ + +int xtensa_operand_num_regs(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * Some register operands do not completely identify the register being + * accessed. For example, the operand value may be added to an internal + * state value. By definition, this implies that the corresponding + * regfile is not allocatable. Unknown registers should generally be + * treated with worst-case assumptions. The function returns 0 if the + * register value is unknown, 1 if known, and XTENSA_UNDEFINED on + * error. + */ + +int xtensa_operand_is_known_reg(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * Check if an immediate operand is PC-relative. Returns 0 for register + * operands and non-PC-relative immediates, 1 for PC-relative + * immediates, and XTENSA_UNDEFINED on error. + */ + +int xtensa_operand_is_PCrelative(xtensa_isa isa, xtensa_opcode opc, int opnd); + + +/* + * For PC-relative offset operands, the interpretation of the offset may + * vary between opcodes, e.g., is it relative to the current PC or that + * of the next instruction? The following functions are defined to + * perform PC-relative relocations and to undo them (as in the + * disassembler). The "do_reloc" function takes the desired address + * value and the PC of the current instruction and sets the value to the + * corresponding PC-relative offset (which can then be encoded and + * stored into the operand field). The "undo_reloc" function takes the + * unencoded offset value and the current PC and sets the value to the + * appropriate address. The return values are non-zero on error. Note + * that these functions do not replace the encode/decode functions; the + * operands must be encoded/decoded separately and the encode functions + * are responsible for detecting invalid operand values. + */ + +int xtensa_operand_do_reloc(xtensa_isa isa, xtensa_opcode opc, int opnd, + uint32_t *valp, uint32_t pc); + +int xtensa_operand_undo_reloc(xtensa_isa isa, xtensa_opcode opc, int opnd, + uint32_t *valp, uint32_t pc); + + + +/* State Operands. */ + +/* + * Get the state accessed by a state operand. Returns XTENSA_UNDEFINED + * on error. + */ + +xtensa_state xtensa_stateOperand_state(xtensa_isa isa, xtensa_opcode opc, + int stOp); + + +/* + * Check if a state operand is an input ('i'), output ('o'), or inout + * ('m') operand. Returns 0 on error. + */ + +char xtensa_stateOperand_inout(xtensa_isa isa, xtensa_opcode opc, int stOp); + + + +/* Interface Operands. */ + +/* + * Get the external interface accessed by an interface operand. + * Returns XTENSA_UNDEFINED on error. + */ + +xtensa_interface xtensa_interfaceOperand_interface(xtensa_isa isa, + xtensa_opcode opc, + int ifOp); + + + +/* Register Files. */ + +/* + * Regfiles include both "real" regfiles and "views", where a view + * allows a group of adjacent registers in a real "parent" regfile to be + * viewed as a single register. A regfile view has all the same + * properties as its parent except for its (long) name, bit width, number + * of entries, and default ctype. You can use the parent function to + * distinguish these two classes. + */ + +/* + * Look up a regfile by either its name or its abbreviated "short name". + * Returns XTENSA_UNDEFINED on error. The "lookup_shortname" function + * ignores "view" regfiles since they always have the same shortname as + * their parents. + */ + +xtensa_regfile xtensa_regfile_lookup(xtensa_isa isa, const char *name); + +xtensa_regfile xtensa_regfile_lookup_shortname(xtensa_isa isa, + const char *shortname); + + +/* + * Get the name or abbreviated "short name" of a regfile. + * Returns null on error. + */ + +const char *xtensa_regfile_name(xtensa_isa isa, xtensa_regfile rf); + +const char *xtensa_regfile_shortname(xtensa_isa isa, xtensa_regfile rf); + + +/* + * Get the parent regfile of a "view" regfile. If the regfile is not a + * view, the result is the same as the input parameter. Returns + * XTENSA_UNDEFINED on error. + */ + +xtensa_regfile xtensa_regfile_view_parent(xtensa_isa isa, xtensa_regfile rf); + + +/* + * Get the bit width of a regfile or regfile view. + * Returns XTENSA_UNDEFINED on error. + */ + +int xtensa_regfile_num_bits(xtensa_isa isa, xtensa_regfile rf); + + +/* + * Get the number of regfile entries. Returns XTENSA_UNDEFINED on + * error. + */ + +int xtensa_regfile_num_entries(xtensa_isa isa, xtensa_regfile rf); + + + +/* Processor States. */ + +/* Look up a state by name. Returns XTENSA_UNDEFINED on error. */ + +xtensa_state xtensa_state_lookup(xtensa_isa isa, const char *name); + + +/* Get the name for a processor state. Returns null on error. */ + +const char *xtensa_state_name(xtensa_isa isa, xtensa_state st); + + +/* + * Get the bit width for a processor state. + * Returns XTENSA_UNDEFINED on error. + */ + +int xtensa_state_num_bits(xtensa_isa isa, xtensa_state st); + + +/* + * Check if a state is exported from the processor core. Returns 0 if + * the condition is false, 1 if the condition is true, and + * XTENSA_UNDEFINED on error. + */ + +int xtensa_state_is_exported(xtensa_isa isa, xtensa_state st); + + +/* + * Check for a "shared_or" state. Returns 0 if the condition is false, + * 1 if the condition is true, and XTENSA_UNDEFINED on error. + */ + +int xtensa_state_is_shared_or(xtensa_isa isa, xtensa_state st); + + + +/* Sysregs ("special registers" and "user registers"). */ + +/* + * Look up a register by its number and whether it is a "user register" + * or a "special register". Returns XTENSA_UNDEFINED if the sysreg does + * not exist. + */ + +xtensa_sysreg xtensa_sysreg_lookup(xtensa_isa isa, int num, int is_user); + + +/* + * Check if there exists a sysreg with a given name. + * If not, this function returns XTENSA_UNDEFINED. + */ + +xtensa_sysreg xtensa_sysreg_lookup_name(xtensa_isa isa, const char *name); + + +/* Get the name of a sysreg. Returns null on error. */ + +const char *xtensa_sysreg_name(xtensa_isa isa, xtensa_sysreg sysreg); + + +/* Get the register number. Returns XTENSA_UNDEFINED on error. */ + +int xtensa_sysreg_number(xtensa_isa isa, xtensa_sysreg sysreg); + + +/* + * Check if a sysreg is a "special register" or a "user register". + * Returns 0 for special registers, 1 for user registers and + * XTENSA_UNDEFINED on error. + */ + +int xtensa_sysreg_is_user(xtensa_isa isa, xtensa_sysreg sysreg); + + + +/* Interfaces. */ + +/* + * Find an interface by name. The return value is XTENSA_UNDEFINED if + * the specified interface is not found. + */ + +xtensa_interface xtensa_interface_lookup(xtensa_isa isa, const char *ifname); + + +/* Get the name of an interface. Returns null on error. */ + +const char *xtensa_interface_name(xtensa_isa isa, xtensa_interface intf); + + +/* + * Get the bit width for an interface. + * Returns XTENSA_UNDEFINED on error. + */ + +int xtensa_interface_num_bits(xtensa_isa isa, xtensa_interface intf); + + +/* + * Check if an interface is an input ('i') or output ('o') with respect + * to the Xtensa processor core. Returns 0 on error. + */ + +char xtensa_interface_inout(xtensa_isa isa, xtensa_interface intf); + + +/* + * Check if accessing an interface has potential side effects. + * Currently "data" interfaces have side effects and "control" + * interfaces do not. Returns 1 if there are side effects, 0 if not, + * and XTENSA_UNDEFINED on error. + */ + +int xtensa_interface_has_side_effect(xtensa_isa isa, xtensa_interface intf); + + +/* + * Some interfaces may be related such that accessing one interface + * has side effects on a set of related interfaces. The interfaces + * are partitioned into equivalence classes of related interfaces, and + * each class is assigned a unique identifier number. This function + * returns the class identifier for an interface, or XTENSA_UNDEFINED + * on error. These identifiers can be compared to determine if two + * interfaces are related; the specific values of the identifiers have + * no particular meaning otherwise. + */ + +int xtensa_interface_class_id(xtensa_isa isa, xtensa_interface intf); + + +/* Functional Units. */ + +/* + * Find a functional unit by name. The return value is XTENSA_UNDEFINED if + * the specified unit is not found. + */ + +xtensa_funcUnit xtensa_funcUnit_lookup(xtensa_isa isa, const char *fname); + + +/* Get the name of a functional unit. Returns null on error. */ + +const char *xtensa_funcUnit_name(xtensa_isa isa, xtensa_funcUnit fun); + + +/* + * Functional units may be replicated. See how many instances of a + * particular function unit exist. Returns XTENSA_UNDEFINED on error. + */ + +int xtensa_funcUnit_num_copies(xtensa_isa isa, xtensa_funcUnit fun); + + +#ifdef __cplusplus +} +#endif +#endif /* HW_XTENSA_XTENSA_ISA_H */ diff --git a/include/io/channel-buffer.h b/include/io/channel-buffer.h new file mode 100644 index 000000000..c9463ee65 --- /dev/null +++ b/include/io/channel-buffer.h @@ -0,0 +1,59 @@ +/* + * QEMU I/O channels memory buffer driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_BUFFER_H +#define QIO_CHANNEL_BUFFER_H + +#include "io/channel.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_BUFFER "qio-channel-buffer" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelBuffer, QIO_CHANNEL_BUFFER) + + +/** + * QIOChannelBuffer: + * + * The QIOChannelBuffer object provides a channel implementation + * that is able to perform I/O to/from a memory buffer. + * + */ + +struct QIOChannelBuffer { + QIOChannel parent; + size_t capacity; /* Total allocated memory */ + size_t usage; /* Current size of data */ + size_t offset; /* Offset for future I/O ops */ + uint8_t *data; +}; + + +/** + * qio_channel_buffer_new: + * @capacity: the initial buffer capacity to allocate + * + * Allocate a new buffer which is initially empty + * + * Returns: the new channel object + */ +QIOChannelBuffer * +qio_channel_buffer_new(size_t capacity); + +#endif /* QIO_CHANNEL_BUFFER_H */ diff --git a/include/io/channel-command.h b/include/io/channel-command.h new file mode 100644 index 000000000..27e42bdad --- /dev/null +++ b/include/io/channel-command.h @@ -0,0 +1,90 @@ +/* + * QEMU I/O channels external command driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_COMMAND_H +#define QIO_CHANNEL_COMMAND_H + +#include "io/channel.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_COMMAND "qio-channel-command" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelCommand, QIO_CHANNEL_COMMAND) + + + +/** + * QIOChannelCommand: + * + * The QIOChannelCommand class provides a channel implementation + * that can transport data with an externally running command + * via its stdio streams. + */ + +struct QIOChannelCommand { + QIOChannel parent; + int writefd; + int readfd; + pid_t pid; +}; + + +/** + * qio_channel_command_new_pid: + * @writefd: the FD connected to the command's stdin + * @readfd: the FD connected to the command's stdout + * @pid: the PID of the running child command + * @errp: pointer to a NULL-initialized error object + * + * Create a channel for performing I/O with the + * previously spawned command identified by @pid. + * The two file descriptors provide the connection + * to command's stdio streams, either one or which + * may be -1 to indicate that stream is not open. + * + * The channel will take ownership of the process + * @pid and will kill it when closing the channel. + * Similarly it will take responsibility for + * closing the file descriptors @writefd and @readfd. + * + * Returns: the command channel object, or NULL on error + */ +QIOChannelCommand * +qio_channel_command_new_pid(int writefd, + int readfd, + pid_t pid); + +/** + * qio_channel_command_new_spawn: + * @argv: the NULL terminated list of command arguments + * @flags: the I/O mode, one of O_RDONLY, O_WRONLY, O_RDWR + * @errp: pointer to a NULL-initialized error object + * + * Create a channel for performing I/O with the + * command to be spawned with arguments @argv. + * + * Returns: the command channel object, or NULL on error + */ +QIOChannelCommand * +qio_channel_command_new_spawn(const char *const argv[], + int flags, + Error **errp); + + +#endif /* QIO_CHANNEL_COMMAND_H */ diff --git a/include/io/channel-file.h b/include/io/channel-file.h new file mode 100644 index 000000000..50e8eb113 --- /dev/null +++ b/include/io/channel-file.h @@ -0,0 +1,92 @@ +/* + * QEMU I/O channels files driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_FILE_H +#define QIO_CHANNEL_FILE_H + +#include "io/channel.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_FILE "qio-channel-file" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelFile, QIO_CHANNEL_FILE) + + +/** + * QIOChannelFile: + * + * The QIOChannelFile object provides a channel implementation + * that is able to perform I/O on block devices, character + * devices, FIFOs, pipes and plain files. While it is technically + * able to work on sockets too on the UNIX platform, this is not + * portable to Windows and lacks some extra sockets specific + * functionality. So the QIOChannelSocket object is recommended + * for that use case. + * + */ + +struct QIOChannelFile { + QIOChannel parent; + int fd; +}; + + +/** + * qio_channel_file_new_fd: + * @fd: the file descriptor + * + * Create a new IO channel object for a file represented + * by the @fd parameter. @fd can be associated with a + * block device, character device, fifo, pipe, or a + * regular file. For sockets, the QIOChannelSocket class + * should be used instead, as this provides greater + * functionality and cross platform portability. + * + * The channel will own the passed in file descriptor + * and will take responsibility for closing it, so the + * caller must not close it. If appropriate the caller + * should dup() its FD before opening the channel. + * + * Returns: the new channel object + */ +QIOChannelFile * +qio_channel_file_new_fd(int fd); + +/** + * qio_channel_file_new_path: + * @path: the file path + * @flags: the open flags (O_RDONLY|O_WRONLY|O_RDWR, etc) + * @mode: the file creation mode if O_CREAT is set in @flags + * @errp: pointer to initialized error object + * + * Create a new IO channel object for a file represented + * by the @path parameter. @path can point to any + * type of file on which sequential I/O can be + * performed, whether it be a plain file, character + * device or block device. + * + * Returns: the new channel object + */ +QIOChannelFile * +qio_channel_file_new_path(const char *path, + int flags, + mode_t mode, + Error **errp); + +#endif /* QIO_CHANNEL_FILE_H */ diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h new file mode 100644 index 000000000..e747e6351 --- /dev/null +++ b/include/io/channel-socket.h @@ -0,0 +1,263 @@ +/* + * QEMU I/O channels sockets driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_SOCKET_H +#define QIO_CHANNEL_SOCKET_H + +#include "io/channel.h" +#include "io/task.h" +#include "qemu/sockets.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_SOCKET "qio-channel-socket" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSocket, QIO_CHANNEL_SOCKET) + + +/** + * QIOChannelSocket: + * + * The QIOChannelSocket class provides a channel implementation + * that can transport data over a UNIX socket or TCP socket. + * Beyond the core channel API, it also provides functionality + * for accepting client connections, tuning some socket + * parameters and getting socket address strings. + */ + +struct QIOChannelSocket { + QIOChannel parent; + int fd; + struct sockaddr_storage localAddr; + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; +}; + + +/** + * qio_channel_socket_new: + * + * Create a channel for performing I/O on a socket + * connection, that is initially closed. After + * creating the socket, it must be setup as a client + * connection or server. + * + * Returns: the socket channel object + */ +QIOChannelSocket * +qio_channel_socket_new(void); + +/** + * qio_channel_socket_new_fd: + * @fd: the socket file descriptor + * @errp: pointer to a NULL-initialized error object + * + * Create a channel for performing I/O on the socket + * connection represented by the file descriptor @fd. + * + * Returns: the socket channel object, or NULL on error + */ +QIOChannelSocket * +qio_channel_socket_new_fd(int fd, + Error **errp); + + +/** + * qio_channel_socket_connect_sync: + * @ioc: the socket channel object + * @addr: the address to connect to + * @errp: pointer to a NULL-initialized error object + * + * Attempt to connect to the address @addr. This method + * will run in the foreground so the caller will not regain + * execution control until the connection is established or + * an error occurs. + */ +int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + SocketAddress *addr, + Error **errp); + +/** + * qio_channel_socket_connect_async: + * @ioc: the socket channel object + * @addr: the address to connect to + * @callback: the function to invoke on completion + * @opaque: user data to pass to @callback + * @destroy: the function to free @opaque + * @context: the context to run the async task. If %NULL, the default + * context will be used. + * + * Attempt to connect to the address @addr. This method + * will run in the background so the caller will regain + * execution control immediately. The function @callback + * will be invoked on completion or failure. The @addr + * parameter will be copied, so may be freed as soon + * as this function returns without waiting for completion. + */ +void qio_channel_socket_connect_async(QIOChannelSocket *ioc, + SocketAddress *addr, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context); + + +/** + * qio_channel_socket_listen_sync: + * @ioc: the socket channel object + * @addr: the address to listen to + * @num: the expected ammount of connections + * @errp: pointer to a NULL-initialized error object + * + * Attempt to listen to the address @addr. This method + * will run in the foreground so the caller will not regain + * execution control until the connection is established or + * an error occurs. + */ +int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + SocketAddress *addr, + int num, + Error **errp); + +/** + * qio_channel_socket_listen_async: + * @ioc: the socket channel object + * @addr: the address to listen to + * @num: the expected ammount of connections + * @callback: the function to invoke on completion + * @opaque: user data to pass to @callback + * @destroy: the function to free @opaque + * @context: the context to run the async task. If %NULL, the default + * context will be used. + * + * Attempt to listen to the address @addr. This method + * will run in the background so the caller will regain + * execution control immediately. The function @callback + * will be invoked on completion or failure. The @addr + * parameter will be copied, so may be freed as soon + * as this function returns without waiting for completion. + */ +void qio_channel_socket_listen_async(QIOChannelSocket *ioc, + SocketAddress *addr, + int num, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context); + + +/** + * qio_channel_socket_dgram_sync: + * @ioc: the socket channel object + * @localAddr: the address to local bind address + * @remoteAddr: the address to remote peer address + * @errp: pointer to a NULL-initialized error object + * + * Attempt to initialize a datagram socket bound to + * @localAddr and communicating with peer @remoteAddr. + * This method will run in the foreground so the caller + * will not regain execution control until the socket + * is established or an error occurs. + */ +int qio_channel_socket_dgram_sync(QIOChannelSocket *ioc, + SocketAddress *localAddr, + SocketAddress *remoteAddr, + Error **errp); + +/** + * qio_channel_socket_dgram_async: + * @ioc: the socket channel object + * @localAddr: the address to local bind address + * @remoteAddr: the address to remote peer address + * @callback: the function to invoke on completion + * @opaque: user data to pass to @callback + * @destroy: the function to free @opaque + * @context: the context to run the async task. If %NULL, the default + * context will be used. + * + * Attempt to initialize a datagram socket bound to + * @localAddr and communicating with peer @remoteAddr. + * This method will run in the background so the caller + * will regain execution control immediately. The function + * @callback will be invoked on completion or failure. + * The @localAddr and @remoteAddr parameters will be copied, + * so may be freed as soon as this function returns without + * waiting for completion. + */ +void qio_channel_socket_dgram_async(QIOChannelSocket *ioc, + SocketAddress *localAddr, + SocketAddress *remoteAddr, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context); + + +/** + * qio_channel_socket_get_local_address: + * @ioc: the socket channel object + * @errp: pointer to a NULL-initialized error object + * + * Get the string representation of the local socket + * address. A pointer to the allocated address information + * struct will be returned, which the caller is required to + * release with a call qapi_free_SocketAddress() when no + * longer required. + * + * Returns: 0 on success, -1 on error + */ +SocketAddress * +qio_channel_socket_get_local_address(QIOChannelSocket *ioc, + Error **errp); + +/** + * qio_channel_socket_get_remote_address: + * @ioc: the socket channel object + * @errp: pointer to a NULL-initialized error object + * + * Get the string representation of the local socket + * address. A pointer to the allocated address information + * struct will be returned, which the caller is required to + * release with a call qapi_free_SocketAddress() when no + * longer required. + * + * Returns: the socket address struct, or NULL on error + */ +SocketAddress * +qio_channel_socket_get_remote_address(QIOChannelSocket *ioc, + Error **errp); + + +/** + * qio_channel_socket_accept: + * @ioc: the socket channel object + * @errp: pointer to a NULL-initialized error object + * + * If the socket represents a server, then this accepts + * a new client connection. The returned channel will + * represent the connected client socket. + * + * Returns: the new client channel, or NULL on error + */ +QIOChannelSocket * +qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp); + + +#endif /* QIO_CHANNEL_SOCKET_H */ diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h new file mode 100644 index 000000000..5672479e9 --- /dev/null +++ b/include/io/channel-tls.h @@ -0,0 +1,145 @@ +/* + * QEMU I/O channels TLS driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_TLS_H +#define QIO_CHANNEL_TLS_H + +#include "io/channel.h" +#include "io/task.h" +#include "crypto/tlssession.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_TLS "qio-channel-tls" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelTLS, QIO_CHANNEL_TLS) + + +/** + * QIOChannelTLS + * + * The QIOChannelTLS class provides a channel wrapper which + * can transparently run the TLS encryption protocol. It is + * usually used over a TCP socket, but there is actually no + * technical restriction on which type of master channel is + * used as the transport. + * + * This channel object is capable of running as either a + * TLS server or TLS client. + */ + +struct QIOChannelTLS { + QIOChannel parent; + QIOChannel *master; + QCryptoTLSSession *session; + QIOChannelShutdown shutdown; +}; + +/** + * qio_channel_tls_new_server: + * @master: the underlying channel object + * @creds: the credentials to use for TLS handshake + * @aclname: the access control list for validating clients + * @errp: pointer to a NULL-initialized error object + * + * Create a new TLS channel that runs the server side of + * a TLS session. The TLS session handshake will use the + * credentials provided in @creds. If the @aclname parameter + * is non-NULL, then the client will have to provide + * credentials (ie a x509 client certificate) which will + * then be validated against the ACL. + * + * After creating the channel, it is mandatory to call + * the qio_channel_tls_handshake() method before attempting + * todo any I/O on the channel. + * + * Once the handshake has completed, all I/O should be done + * via the new TLS channel object and not the original + * master channel + * + * Returns: the new TLS channel object, or NULL + */ +QIOChannelTLS * +qio_channel_tls_new_server(QIOChannel *master, + QCryptoTLSCreds *creds, + const char *aclname, + Error **errp); + +/** + * qio_channel_tls_new_client: + * @master: the underlying channel object + * @creds: the credentials to use for TLS handshake + * @hostname: the user specified server hostname + * @errp: pointer to a NULL-initialized error object + * + * Create a new TLS channel that runs the client side of + * a TLS session. The TLS session handshake will use the + * credentials provided in @creds. The @hostname parameter + * should provide the user specified hostname of the server + * and will be validated against the server's credentials + * (ie CommonName of the x509 certificate) + * + * After creating the channel, it is mandatory to call + * the qio_channel_tls_handshake() method before attempting + * todo any I/O on the channel. + * + * Once the handshake has completed, all I/O should be done + * via the new TLS channel object and not the original + * master channel + * + * Returns: the new TLS channel object, or NULL + */ +QIOChannelTLS * +qio_channel_tls_new_client(QIOChannel *master, + QCryptoTLSCreds *creds, + const char *hostname, + Error **errp); + +/** + * qio_channel_tls_handshake: + * @ioc: the TLS channel object + * @func: the callback to invoke when completed + * @opaque: opaque data to pass to @func + * @destroy: optional callback to free @opaque + * @context: the context that TLS handshake will run with. If %NULL, + * the default context will be used + * + * Perform the TLS session handshake. This method + * will return immediately and the handshake will + * continue in the background, provided the main + * loop is running. When the handshake is complete, + * or fails, the @func callback will be invoked. + */ +void qio_channel_tls_handshake(QIOChannelTLS *ioc, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context); + +/** + * qio_channel_tls_get_session: + * @ioc: the TLS channel object + * + * Get the TLS session used by the channel. + * + * Returns: the TLS session + */ +QCryptoTLSSession * +qio_channel_tls_get_session(QIOChannelTLS *ioc); + +#endif /* QIO_CHANNEL_TLS_H */ diff --git a/include/io/channel-util.h b/include/io/channel-util.h new file mode 100644 index 000000000..a5d720d9a --- /dev/null +++ b/include/io/channel-util.h @@ -0,0 +1,52 @@ +/* + * QEMU I/O channels utility APIs + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_UTIL_H +#define QIO_CHANNEL_UTIL_H + +#include "io/channel.h" + +/* + * This module provides helper functions that are useful when dealing + * with QIOChannel objects + */ + + +/** + * qio_channel_new_fd: + * @fd: the file descriptor + * @errp: pointer to a NULL-initialized error object + * + * Create a channel for performing I/O on the file + * descriptor @fd. The particular subclass of QIOChannel + * that is returned will depend on what underlying object + * the file descriptor is associated with. It may be either + * a QIOChannelSocket or a QIOChannelFile instance. Upon + * success, the returned QIOChannel instance will own + * the @fd file descriptor, and take responsibility for + * closing it when no longer required. On failure, the + * caller is responsible for closing @fd. + * + * Returns: the channel object, or NULL on error + */ +QIOChannel *qio_channel_new_fd(int fd, + Error **errp); + +#endif /* QIO_CHANNEL_UTIL_H */ diff --git a/include/io/channel-watch.h b/include/io/channel-watch.h new file mode 100644 index 000000000..a36aab8f8 --- /dev/null +++ b/include/io/channel-watch.h @@ -0,0 +1,90 @@ +/* + * QEMU I/O channels watch helper APIs + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_WATCH_H +#define QIO_CHANNEL_WATCH_H + +#include "io/channel.h" + +/* + * This module provides helper functions that will be needed by + * the various QIOChannel implementations, for creating watches + * on file descriptors / sockets + */ + +/** + * qio_channel_create_fd_watch: + * @ioc: the channel object + * @fd: the file descriptor + * @condition: the I/O condition + * + * Create a new main loop source that is able to + * monitor the file descriptor @fd for the + * I/O conditions in @condition. This is able + * monitor block devices, character devices, + * pipes but not plain files or, on Win32, sockets. + * + * Returns: the new main loop source + */ +GSource *qio_channel_create_fd_watch(QIOChannel *ioc, + int fd, + GIOCondition condition); + +/** + * qio_channel_create_socket_watch: + * @ioc: the channel object + * @fd: the file descriptor + * @condition: the I/O condition + * + * Create a new main loop source that is able to + * monitor the file descriptor @fd for the + * I/O conditions in @condition. This is equivalent + * to qio_channel_create_fd_watch on POSIX systems + * but not on Windows. + * + * Returns: the new main loop source + */ +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int fd, + GIOCondition condition); + +/** + * qio_channel_create_fd_pair_watch: + * @ioc: the channel object + * @fdread: the file descriptor for reading + * @fdwrite: the file descriptor for writing + * @condition: the I/O condition + * + * Create a new main loop source that is able to + * monitor the pair of file descriptors @fdread + * and @fdwrite for the I/O conditions in @condition. + * This is intended for monitoring unidirectional + * file descriptors such as pipes, where a pair + * of descriptors is required for bidirectional + * I/O + * + * Returns: the new main loop source + */ +GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, + int fdread, + int fdwrite, + GIOCondition condition); + +#endif /* QIO_CHANNEL_WATCH_H */ diff --git a/include/io/channel-websock.h b/include/io/channel-websock.h new file mode 100644 index 000000000..e180827c5 --- /dev/null +++ b/include/io/channel-websock.h @@ -0,0 +1,108 @@ +/* + * QEMU I/O channels driver websockets + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_WEBSOCK_H +#define QIO_CHANNEL_WEBSOCK_H + +#include "io/channel.h" +#include "qemu/buffer.h" +#include "io/task.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_WEBSOCK "qio-channel-websock" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelWebsock, QIO_CHANNEL_WEBSOCK) + +typedef union QIOChannelWebsockMask QIOChannelWebsockMask; + +union QIOChannelWebsockMask { + char c[4]; + uint32_t u; +}; + +/** + * QIOChannelWebsock + * + * The QIOChannelWebsock class provides a channel wrapper which + * can transparently run the HTTP websockets protocol. This is + * usually used over a TCP socket, but there is actually no + * technical restriction on which type of master channel is + * used as the transport. + * + * This channel object is currently only capable of running as + * a websocket server and is a pretty crude implementation + * of it, not supporting the full websockets protocol feature + * set. It is sufficient to use with a simple websockets + * client for encapsulating VNC for noVNC in-browser client. + */ + +struct QIOChannelWebsock { + QIOChannel parent; + QIOChannel *master; + Buffer encinput; + Buffer encoutput; + Buffer rawinput; + size_t payload_remain; + size_t pong_remain; + QIOChannelWebsockMask mask; + guint io_tag; + Error *io_err; + gboolean io_eof; + uint8_t opcode; +}; + +/** + * qio_channel_websock_new_server: + * @master: the underlying channel object + * + * Create a new websockets channel that runs the server + * side of the protocol. + * + * After creating the channel, it is mandatory to call + * the qio_channel_websock_handshake() method before attempting + * todo any I/O on the channel. + * + * Once the handshake has completed, all I/O should be done + * via the new websocket channel object and not the original + * master channel + * + * Returns: the new websockets channel object + */ +QIOChannelWebsock * +qio_channel_websock_new_server(QIOChannel *master); + +/** + * qio_channel_websock_handshake: + * @ioc: the websocket channel object + * @func: the callback to invoke when completed + * @opaque: opaque data to pass to @func + * @destroy: optional callback to free @opaque + * + * Perform the websocket handshake. This method + * will return immediately and the handshake will + * continue in the background, provided the main + * loop is running. When the handshake is complete, + * or fails, the @func callback will be invoked. + */ +void qio_channel_websock_handshake(QIOChannelWebsock *ioc, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy); + +#endif /* QIO_CHANNEL_WEBSOCK_H */ diff --git a/include/io/channel.h b/include/io/channel.h new file mode 100644 index 000000000..4d6fe45f6 --- /dev/null +++ b/include/io/channel.h @@ -0,0 +1,777 @@ +/* + * QEMU I/O channels + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_CHANNEL_H +#define QIO_CHANNEL_H + +#include "qom/object.h" +#include "qemu/coroutine.h" +#include "block/aio.h" + +#define TYPE_QIO_CHANNEL "qio-channel" +OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + QIO_CHANNEL) + + +#define QIO_CHANNEL_ERR_BLOCK -2 + +typedef enum QIOChannelFeature QIOChannelFeature; + +enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, +}; + + +typedef enum QIOChannelShutdown QIOChannelShutdown; + +enum QIOChannelShutdown { + QIO_CHANNEL_SHUTDOWN_READ = 1, + QIO_CHANNEL_SHUTDOWN_WRITE = 2, + QIO_CHANNEL_SHUTDOWN_BOTH = 3, +}; + +typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc, + GIOCondition condition, + gpointer data); + +/** + * QIOChannel: + * + * The QIOChannel defines the core API for a generic I/O channel + * class hierarchy. It is inspired by GIOChannel, but has the + * following differences + * + * - Use QOM to properly support arbitrary subclassing + * - Support use of iovecs for efficient I/O with multiple blocks + * - None of the character set translation, binary data exclusively + * - Direct support for QEMU Error object reporting + * - File descriptor passing + * + * This base class is abstract so cannot be instantiated. There + * will be subclasses for dealing with sockets, files, and higher + * level protocols such as TLS, WebSocket, etc. + */ + +struct QIOChannel { + Object parent; + unsigned int features; /* bitmask of QIOChannelFeatures */ + char *name; + AioContext *ctx; + Coroutine *read_coroutine; + Coroutine *write_coroutine; +#ifdef _WIN32 + HANDLE event; /* For use with GSource on Win32 */ +#endif +}; + +/** + * QIOChannelClass: + * + * This class defines the contract that all subclasses + * must follow to provide specific channel implementations. + * The first five callbacks are mandatory to support, others + * provide additional optional features. + * + * Consult the corresponding public API docs for a description + * of the semantics of each callback + */ +struct QIOChannelClass { + ObjectClass parent; + + /* Mandatory callbacks */ + ssize_t (*io_writev)(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); + GSource * (*io_create_watch)(QIOChannel *ioc, + GIOCondition condition); + int (*io_set_blocking)(QIOChannel *ioc, + bool enabled, + Error **errp); + + /* Optional callbacks */ + int (*io_shutdown)(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp); + void (*io_set_cork)(QIOChannel *ioc, + bool enabled); + void (*io_set_delay)(QIOChannel *ioc, + bool enabled); + off_t (*io_seek)(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp); + void (*io_set_aio_fd_handler)(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque); +}; + +/* General I/O handling functions */ + +/** + * qio_channel_has_feature: + * @ioc: the channel object + * @feature: the feature to check support of + * + * Determine whether the channel implementation supports + * the optional feature named in @feature. + * + * Returns: true if supported, false otherwise. + */ +bool qio_channel_has_feature(QIOChannel *ioc, + QIOChannelFeature feature); + +/** + * qio_channel_set_feature: + * @ioc: the channel object + * @feature: the feature to set support for + * + * Add channel support for the feature named in @feature. + */ +void qio_channel_set_feature(QIOChannel *ioc, + QIOChannelFeature feature); + +/** + * qio_channel_set_name: + * @ioc: the channel object + * @name: the name of the channel + * + * Sets the name of the channel, which serves as an aid + * to debugging. The name is used when creating GSource + * watches for this channel. + */ +void qio_channel_set_name(QIOChannel *ioc, + const char *name); + +/** + * qio_channel_readv_full: + * @ioc: the channel object + * @iov: the array of memory regions to read data into + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the + * memory regions referenced by @iov. Each element + * in the @iov will be fully populated with data + * before the next one is used. The @niov parameter + * specifies the total number of elements in @iov. + * + * It is not required for all @iov to be filled with + * data. If the channel is in blocking mode, at least + * one byte of data will be read, but no more is + * guaranteed. If the channel is non-blocking and no + * data is available, it will return QIO_CHANNEL_ERR_BLOCK + * + * If the channel has passed any file descriptors, + * the @fds array pointer will be allocated and + * the elements filled with the received file + * descriptors. The @nfds pointer will be updated + * to indicate the size of the @fds array that + * was allocated. It is the callers responsibility + * to call close() on each file descriptor and to + * call g_free() on the array pointer in @fds. + * + * It is an error to pass a non-NULL @fds parameter + * unless qio_channel_has_feature() returns a true + * value for the QIO_CHANNEL_FEATURE_FD_PASS constant. + * + * Returns: the number of bytes read, or -1 on error, + * or QIO_CHANNEL_ERR_BLOCK if no data is available + * and the channel is non-blocking + */ +ssize_t qio_channel_readv_full(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp); + + +/** + * qio_channel_writev_full: + * @ioc: the channel object + * @iov: the array of memory regions to write data from + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the + * memory regions referenced by @iov. Each element + * in the @iov will be fully sent, before the next + * one is used. The @niov parameter specifies the + * total number of elements in @iov. + * + * It is not required for all @iov data to be fully + * sent. If the channel is in blocking mode, at least + * one byte of data will be sent, but no more is + * guaranteed. If the channel is non-blocking and no + * data can be sent, it will return QIO_CHANNEL_ERR_BLOCK + * + * If there are file descriptors to send, the @fds + * array should be non-NULL and provide the handles. + * All file descriptors will be sent if at least one + * byte of data was sent. + * + * It is an error to pass a non-NULL @fds parameter + * unless qio_channel_has_feature() returns a true + * value for the QIO_CHANNEL_FEATURE_FD_PASS constant. + * + * Returns: the number of bytes sent, or -1 on error, + * or QIO_CHANNEL_ERR_BLOCK if no data is can be sent + * and the channel is non-blocking + */ +ssize_t qio_channel_writev_full(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp); + +/** + * qio_channel_readv_all_eof: + * @ioc: the channel object + * @iov: the array of memory regions to read data into + * @niov: the length of the @iov array + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the + * memory regions referenced by @iov. Each element + * in the @iov will be fully populated with data + * before the next one is used. The @niov parameter + * specifies the total number of elements in @iov. + * + * The function will wait for all requested data + * to be read, yielding from the current coroutine + * if required. + * + * If end-of-file occurs before any data is read, + * no error is reported; otherwise, if it occurs + * before all requested data has been read, an error + * will be reported. + * + * Returns: 1 if all bytes were read, 0 if end-of-file + * occurs without data, or -1 on error + */ +int qio_channel_readv_all_eof(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp); + +/** + * qio_channel_readv_all: + * @ioc: the channel object + * @iov: the array of memory regions to read data into + * @niov: the length of the @iov array + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the + * memory regions referenced by @iov. Each element + * in the @iov will be fully populated with data + * before the next one is used. The @niov parameter + * specifies the total number of elements in @iov. + * + * The function will wait for all requested data + * to be read, yielding from the current coroutine + * if required. + * + * If end-of-file occurs before all requested data + * has been read, an error will be reported. + * + * Returns: 0 if all bytes were read, or -1 on error + */ +int qio_channel_readv_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp); + + +/** + * qio_channel_writev_all: + * @ioc: the channel object + * @iov: the array of memory regions to write data from + * @niov: the length of the @iov array + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the + * memory regions referenced by @iov. Each element + * in the @iov will be fully sent, before the next + * one is used. The @niov parameter specifies the + * total number of elements in @iov. + * + * The function will wait for all requested data + * to be written, yielding from the current coroutine + * if required. + * + * Returns: 0 if all bytes were written, or -1 on error + */ +int qio_channel_writev_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **erp); + +/** + * qio_channel_readv: + * @ioc: the channel object + * @iov: the array of memory regions to read data into + * @niov: the length of the @iov array + * @errp: pointer to a NULL-initialized error object + * + * Behaves as qio_channel_readv_full() but does not support + * receiving of file handles. + */ +ssize_t qio_channel_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp); + +/** + * qio_channel_writev: + * @ioc: the channel object + * @iov: the array of memory regions to write data from + * @niov: the length of the @iov array + * @errp: pointer to a NULL-initialized error object + * + * Behaves as qio_channel_writev_full() but does not support + * sending of file handles. + */ +ssize_t qio_channel_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp); + +/** + * qio_channel_read: + * @ioc: the channel object + * @buf: the memory region to read data into + * @buflen: the length of @buf + * @errp: pointer to a NULL-initialized error object + * + * Behaves as qio_channel_readv_full() but does not support + * receiving of file handles, and only supports reading into + * a single memory region. + */ +ssize_t qio_channel_read(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp); + +/** + * qio_channel_write: + * @ioc: the channel object + * @buf: the memory regions to send data from + * @buflen: the length of @buf + * @errp: pointer to a NULL-initialized error object + * + * Behaves as qio_channel_writev_full() but does not support + * sending of file handles, and only supports writing from a + * single memory region. + */ +ssize_t qio_channel_write(QIOChannel *ioc, + const char *buf, + size_t buflen, + Error **errp); + +/** + * qio_channel_read_all_eof: + * @ioc: the channel object + * @buf: the memory region to read data into + * @buflen: the number of bytes to @buf + * @errp: pointer to a NULL-initialized error object + * + * Reads @buflen bytes into @buf, possibly blocking or (if the + * channel is non-blocking) yielding from the current coroutine + * multiple times until the entire content is read. If end-of-file + * occurs immediately it is not an error, but if it occurs after + * data has been read it will return an error rather than a + * short-read. Otherwise behaves as qio_channel_read(). + * + * Returns: 1 if all bytes were read, 0 if end-of-file occurs + * without data, or -1 on error + */ +int qio_channel_read_all_eof(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp); + +/** + * qio_channel_read_all: + * @ioc: the channel object + * @buf: the memory region to read data into + * @buflen: the number of bytes to @buf + * @errp: pointer to a NULL-initialized error object + * + * Reads @buflen bytes into @buf, possibly blocking or (if the + * channel is non-blocking) yielding from the current coroutine + * multiple times until the entire content is read. If end-of-file + * occurs it will return an error rather than a short-read. Otherwise + * behaves as qio_channel_read(). + * + * Returns: 0 if all bytes were read, or -1 on error + */ +int qio_channel_read_all(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp); + +/** + * qio_channel_write_all: + * @ioc: the channel object + * @buf: the memory region to write data into + * @buflen: the number of bytes to @buf + * @errp: pointer to a NULL-initialized error object + * + * Writes @buflen bytes from @buf, possibly blocking or (if the + * channel is non-blocking) yielding from the current coroutine + * multiple times until the entire content is written. Otherwise + * behaves as qio_channel_write(). + * + * Returns: 0 if all bytes were written, or -1 on error + */ +int qio_channel_write_all(QIOChannel *ioc, + const char *buf, + size_t buflen, + Error **errp); + +/** + * qio_channel_set_blocking: + * @ioc: the channel object + * @enabled: the blocking flag state + * @errp: pointer to a NULL-initialized error object + * + * If @enabled is true, then the channel is put into + * blocking mode, otherwise it will be non-blocking. + * + * In non-blocking mode, read/write operations may + * return QIO_CHANNEL_ERR_BLOCK if they would otherwise + * block on I/O + */ +int qio_channel_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp); + +/** + * qio_channel_close: + * @ioc: the channel object + * @errp: pointer to a NULL-initialized error object + * + * Close the channel, flushing any pending I/O + * + * Returns: 0 on success, -1 on error + */ +int qio_channel_close(QIOChannel *ioc, + Error **errp); + +/** + * qio_channel_shutdown: + * @ioc: the channel object + * @how: the direction to shutdown + * @errp: pointer to a NULL-initialized error object + * + * Shutdowns transmission and/or receiving of data + * without closing the underlying transport. + * + * Not all implementations will support this facility, + * so may report an error. To avoid errors, the + * caller may check for the feature flag + * QIO_CHANNEL_FEATURE_SHUTDOWN prior to calling + * this method. + * + * Returns: 0 on success, -1 on error + */ +int qio_channel_shutdown(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp); + +/** + * qio_channel_set_delay: + * @ioc: the channel object + * @enabled: the new flag state + * + * Controls whether the underlying transport is + * permitted to delay writes in order to merge + * small packets. If @enabled is true, then the + * writes may be delayed in order to opportunistically + * merge small packets into larger ones. If @enabled + * is false, writes are dispatched immediately with + * no delay. + * + * When @enabled is false, applications may wish to + * use the qio_channel_set_cork() method to explicitly + * control write merging. + * + * On channels which are backed by a socket, this + * API corresponds to the inverse of TCP_NODELAY flag, + * controlling whether the Nagle algorithm is active. + * + * This setting is merely a hint, so implementations are + * free to ignore this without it being considered an + * error. + */ +void qio_channel_set_delay(QIOChannel *ioc, + bool enabled); + +/** + * qio_channel_set_cork: + * @ioc: the channel object + * @enabled: the new flag state + * + * Controls whether the underlying transport is + * permitted to dispatch data that is written. + * If @enabled is true, then any data written will + * be queued in local buffers until @enabled is + * set to false once again. + * + * This feature is typically used when the automatic + * write coalescing facility is disabled via the + * qio_channel_set_delay() method. + * + * On channels which are backed by a socket, this + * API corresponds to the TCP_CORK flag. + * + * This setting is merely a hint, so implementations are + * free to ignore this without it being considered an + * error. + */ +void qio_channel_set_cork(QIOChannel *ioc, + bool enabled); + + +/** + * qio_channel_seek: + * @ioc: the channel object + * @offset: the position to seek to, relative to @whence + * @whence: one of the (POSIX) SEEK_* constants listed below + * @errp: pointer to a NULL-initialized error object + * + * Moves the current I/O position within the channel + * @ioc, to be @offset. The value of @offset is + * interpreted relative to @whence: + * + * SEEK_SET - the position is set to @offset bytes + * SEEK_CUR - the position is moved by @offset bytes + * SEEK_END - the position is set to end of the file plus @offset bytes + * + * Not all implementations will support this facility, + * so may report an error. + * + * Returns: the new position on success, (off_t)-1 on failure + */ +off_t qio_channel_io_seek(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp); + + +/** + * qio_channel_create_watch: + * @ioc: the channel object + * @condition: the I/O condition to monitor + * + * Create a new main loop source that is used to watch + * for the I/O condition @condition. Typically the + * qio_channel_add_watch() method would be used instead + * of this, since it directly attaches a callback to + * the source + * + * Returns: the new main loop source. + */ +GSource *qio_channel_create_watch(QIOChannel *ioc, + GIOCondition condition); + +/** + * qio_channel_add_watch: + * @ioc: the channel object + * @condition: the I/O condition to monitor + * @func: callback to invoke when the source becomes ready + * @user_data: opaque data to pass to @func + * @notify: callback to free @user_data + * + * Create a new main loop source that is used to watch + * for the I/O condition @condition. The callback @func + * will be registered against the source, to be invoked + * when the source becomes ready. The optional @user_data + * will be passed to @func when it is invoked. The @notify + * callback will be used to free @user_data when the + * watch is deleted + * + * The returned source ID can be used with g_source_remove() + * to remove and free the source when no longer required. + * Alternatively the @func callback can return a FALSE + * value. + * + * Returns: the source ID + */ +guint qio_channel_add_watch(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify); + +/** + * qio_channel_add_watch_full: + * @ioc: the channel object + * @condition: the I/O condition to monitor + * @func: callback to invoke when the source becomes ready + * @user_data: opaque data to pass to @func + * @notify: callback to free @user_data + * @context: the context to run the watch source + * + * Similar as qio_channel_add_watch(), but allows to specify context + * to run the watch source. + * + * Returns: the source ID + */ +guint qio_channel_add_watch_full(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify, + GMainContext *context); + +/** + * qio_channel_add_watch_source: + * @ioc: the channel object + * @condition: the I/O condition to monitor + * @func: callback to invoke when the source becomes ready + * @user_data: opaque data to pass to @func + * @notify: callback to free @user_data + * @context: gcontext to bind the source to + * + * Similar as qio_channel_add_watch(), but allows to specify context + * to run the watch source, meanwhile return the GSource object + * instead of tag ID, with the GSource referenced already. + * + * Note: callers is responsible to unref the source when not needed. + * + * Returns: the source pointer + */ +GSource *qio_channel_add_watch_source(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify, + GMainContext *context); + +/** + * qio_channel_attach_aio_context: + * @ioc: the channel object + * @ctx: the #AioContext to set the handlers on + * + * Request that qio_channel_yield() sets I/O handlers on + * the given #AioContext. If @ctx is %NULL, qio_channel_yield() + * uses QEMU's main thread event loop. + * + * You can move a #QIOChannel from one #AioContext to another even if + * I/O handlers are set for a coroutine. However, #QIOChannel provides + * no synchronization between the calls to qio_channel_yield() and + * qio_channel_attach_aio_context(). + * + * Therefore you should first call qio_channel_detach_aio_context() + * to ensure that the coroutine is not entered concurrently. Then, + * while the coroutine has yielded, call qio_channel_attach_aio_context(), + * and then aio_co_schedule() to place the coroutine on the new + * #AioContext. The calls to qio_channel_detach_aio_context() + * and qio_channel_attach_aio_context() should be protected with + * aio_context_acquire() and aio_context_release(). + */ +void qio_channel_attach_aio_context(QIOChannel *ioc, + AioContext *ctx); + +/** + * qio_channel_detach_aio_context: + * @ioc: the channel object + * + * Disable any I/O handlers set by qio_channel_yield(). With the + * help of aio_co_schedule(), this allows moving a coroutine that was + * paused by qio_channel_yield() to another context. + */ +void qio_channel_detach_aio_context(QIOChannel *ioc); + +/** + * qio_channel_yield: + * @ioc: the channel object + * @condition: the I/O condition to wait for + * + * Yields execution from the current coroutine until the condition + * indicated by @condition becomes available. @condition must + * be either %G_IO_IN or %G_IO_OUT; it cannot contain both. In + * addition, no two coroutine can be waiting on the same condition + * and channel at the same time. + * + * This must only be called from coroutine context. It is safe to + * reenter the coroutine externally while it is waiting; in this + * case the function will return even if @condition is not yet + * available. + */ +void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition); + +/** + * qio_channel_wait: + * @ioc: the channel object + * @condition: the I/O condition to wait for + * + * Block execution from the current thread until + * the condition indicated by @condition becomes + * available. + * + * This will enter a nested event loop to perform + * the wait. + */ +void qio_channel_wait(QIOChannel *ioc, + GIOCondition condition); + +/** + * qio_channel_set_aio_fd_handler: + * @ioc: the channel object + * @ctx: the AioContext to set the handlers on + * @io_read: the read handler + * @io_write: the write handler + * @opaque: the opaque value passed to the handler + * + * This is used internally by qio_channel_yield(). It can + * be used by channel implementations to forward the handlers + * to another channel (e.g. from #QIOChannelTLS to the + * underlying socket). + */ +void qio_channel_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque); + +#endif /* QIO_CHANNEL_H */ diff --git a/include/io/dns-resolver.h b/include/io/dns-resolver.h new file mode 100644 index 000000000..558ea517d --- /dev/null +++ b/include/io/dns-resolver.h @@ -0,0 +1,219 @@ +/* + * QEMU DNS resolver + * + * Copyright (c) 2016-2017 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_DNS_RESOLVER_H +#define QIO_DNS_RESOLVER_H + +#include "qapi/qapi-types-sockets.h" +#include "qom/object.h" +#include "io/task.h" + +#define TYPE_QIO_DNS_RESOLVER "qio-dns-resolver" +OBJECT_DECLARE_SIMPLE_TYPE(QIODNSResolver, + QIO_DNS_RESOLVER) + + +/** + * QIODNSResolver: + * + * The QIODNSResolver class provides a framework for doing + * DNS resolution on SocketAddress objects, independently + * of socket creation. + * + * + * Resolving addresses synchronously + * + * int mylisten(SocketAddress *addr, Error **errp) { + * QIODNSResolver *resolver = qio_dns_resolver_get_instance(); + * SocketAddress **rawaddrs = NULL; + * size_t nrawaddrs = 0; + * Error *err = NULL; + * QIOChannel **socks = NULL; + * size_t nsocks = 0; + * + * if (qio_dns_resolver_lookup_sync(dns, addr, &nrawaddrs, + * &rawaddrs, errp) < 0) { + * return -1; + * } + * + * for (i = 0; i < nrawaddrs; i++) { + * QIOChannel *sock = qio_channel_new(); + * Error *local_err = NULL; + * qio_channel_listen_sync(sock, rawaddrs[i], &local_err); + * if (local_err) { + * error_propagate(&err, local_err); + * } else { + * socks = g_renew(QIOChannelSocket *, socks, nsocks + 1); + * socks[nsocks++] = sock; + * } + * qapi_free_SocketAddress(rawaddrs[i]); + * } + * g_free(rawaddrs); + * + * if (nsocks == 0) { + * error_propagate(errp, err); + * } else { + * error_free(err); + * } + * } + * + * + * + * + * Resolving addresses asynchronously + * + * typedef struct MyListenData { + * Error *err; + * QIOChannelSocket **socks; + * size_t nsocks; + * } MyListenData; + * + * void mylistenresult(QIOTask *task, void *opaque) { + * MyListenData *data = opaque; + * QIODNSResolver *resolver = + * QIO_DNS_RESOLVER(qio_task_get_source(task); + * SocketAddress **rawaddrs = NULL; + * size_t nrawaddrs = 0; + * Error *err = NULL; + * + * if (qio_task_propagate_error(task, &data->err)) { + * return; + * } + * + * qio_dns_resolver_lookup_result(resolver, task, + * &nrawaddrs, &rawaddrs); + * + * for (i = 0; i < nrawaddrs; i++) { + * QIOChannel *sock = qio_channel_new(); + * Error *local_err = NULL; + * qio_channel_listen_sync(sock, rawaddrs[i], &local_err); + * if (local_err) { + * error_propagate(&err, local_err); + * } else { + * socks = g_renew(QIOChannelSocket *, socks, nsocks + 1); + * socks[nsocks++] = sock; + * } + * qapi_free_SocketAddress(rawaddrs[i]); + * } + * g_free(rawaddrs); + * + * if (nsocks == 0) { + * error_propagate(&data->err, err); + * } else { + * error_free(err); + * } + * } + * + * void mylisten(SocketAddress *addr, MyListenData *data) { + * QIODNSResolver *resolver = qio_dns_resolver_get_instance(); + * qio_dns_resolver_lookup_async(dns, addr, + * mylistenresult, data, NULL); + * } + * + * + */ +struct QIODNSResolver { + Object parent; +}; + + + +/** + * qio_dns_resolver_get_instance: + * + * Get the singleton dns resolver instance. The caller + * does not own a reference on the returned object. + * + * Returns: the single dns resolver instance + */ +QIODNSResolver *qio_dns_resolver_get_instance(void); + +/** + * qio_dns_resolver_lookup_sync: + * @resolver: the DNS resolver instance + * @addr: the address to resolve + * @naddr: pointer to hold number of resolved addresses + * @addrs: pointer to hold resolved addresses + * @errp: pointer to NULL initialized error object + * + * This will attempt to resolve the address provided + * in @addr. If resolution succeeds, @addrs will be filled + * with all the resolved addresses. @naddrs will specify + * the number of entries allocated in @addrs. The caller + * is responsible for freeing each entry in @addrs, as + * well as @addrs itself. @naddrs is guaranteed to be + * greater than zero on success. + * + * DNS resolution will be done synchronously so execution + * of the caller may be blocked for an arbitrary length + * of time. + * + * Returns: 0 if resolution was successful, -1 on error + */ +int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver, + SocketAddress *addr, + size_t *naddrs, + SocketAddress ***addrs, + Error **errp); + +/** + * qio_dns_resolver_lookup_async: + * @resolver: the DNS resolver instance + * @addr: the address to resolve + * @func: the callback to invoke on lookup completion + * @opaque: data blob to pass to @func + * @notify: the callback to free @opaque, or NULL + * + * This will attempt to resolve the address provided + * in @addr. The callback @func will be invoked when + * resolution has either completed or failed. On + * success, the @func should call the method + * qio_dns_resolver_lookup_result() to obtain the + * results. + * + * DNS resolution will be done asynchronously so execution + * of the caller will not be blocked. + */ +void qio_dns_resolver_lookup_async(QIODNSResolver *resolver, + SocketAddress *addr, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify notify); + +/** + * qio_dns_resolver_lookup_result: + * @resolver: the DNS resolver instance + * @task: the task object to get results for + * @naddr: pointer to hold number of resolved addresses + * @addrs: pointer to hold resolved addresses + * + * This method should be called from the callback passed + * to qio_dns_resolver_lookup_async() in order to obtain + * results. @addrs will be filled with all the resolved + * addresses. @naddrs will specify the number of entries + * allocated in @addrs. The caller is responsible for + * freeing each entry in @addrs, as well as @addrs itself. + */ +void qio_dns_resolver_lookup_result(QIODNSResolver *resolver, + QIOTask *task, + size_t *naddrs, + SocketAddress ***addrs); + +#endif /* QIO_DNS_RESOLVER_H */ diff --git a/include/io/net-listener.h b/include/io/net-listener.h new file mode 100644 index 000000000..ab9f291ed --- /dev/null +++ b/include/io/net-listener.h @@ -0,0 +1,186 @@ +/* + * QEMU network listener + * + * Copyright (c) 2016-2017 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#ifndef QIO_NET_LISTENER_H +#define QIO_NET_LISTENER_H + +#include "io/channel-socket.h" +#include "qom/object.h" + +#define TYPE_QIO_NET_LISTENER "qio-net-listener" +OBJECT_DECLARE_SIMPLE_TYPE(QIONetListener, + QIO_NET_LISTENER) + + +typedef void (*QIONetListenerClientFunc)(QIONetListener *listener, + QIOChannelSocket *sioc, + gpointer data); + +/** + * QIONetListener: + * + * The QIONetListener object encapsulates the management of a + * listening socket. It is able to listen on multiple sockets + * concurrently, to deal with the scenario where IPv4 / IPv6 + * needs separate sockets, or there is a need to listen on a + * subset of interface IP addresses, instead of the wildcard + * address. + */ +struct QIONetListener { + Object parent; + + char *name; + QIOChannelSocket **sioc; + GSource **io_source; + size_t nsioc; + + bool connected; + + QIONetListenerClientFunc io_func; + gpointer io_data; + GDestroyNotify io_notify; +}; + + + +/** + * qio_net_listener_new: + * + * Create a new network listener service, which is not + * listening on any sockets initially. + * + * Returns: the new listener + */ +QIONetListener *qio_net_listener_new(void); + + +/** + * qio_net_listener_set_name: + * @listener: the network listener object + * @name: the listener name + * + * Set the name of the listener. This is used as a debugging + * aid, to set names on any GSource instances associated + * with the listener + */ +void qio_net_listener_set_name(QIONetListener *listener, + const char *name); + +/** + * qio_net_listener_open_sync: + * @listener: the network listener object + * @addr: the address to listen on + * @num: the amount of expected connections + * @errp: pointer to a NULL initialized error object + * + * Synchronously open a listening connection on all + * addresses associated with @addr. This method may + * also be invoked multiple times, in order to have a + * single listener on multiple distinct addresses. + */ +int qio_net_listener_open_sync(QIONetListener *listener, + SocketAddress *addr, + int num, + Error **errp); + +/** + * qio_net_listener_add: + * @listener: the network listener object + * @sioc: the socket I/O channel + * + * Associate a listening socket I/O channel with the + * listener. The listener will acquire a new reference + * on @sioc, so the caller should release its own reference + * if it no longer requires the object. + */ +void qio_net_listener_add(QIONetListener *listener, + QIOChannelSocket *sioc); + +/** + * qio_net_listener_set_client_func_full: + * @listener: the network listener object + * @func: the callback function + * @data: opaque data to pass to @func + * @notify: callback to free @data + * @context: the context that the sources will be bound to. If %NULL, + * the default context will be used. + * + * Register @func to be invoked whenever a new client + * connects to the listener. @func will be invoked + * passing in the QIOChannelSocket instance for the + * client. + */ +void qio_net_listener_set_client_func_full(QIONetListener *listener, + QIONetListenerClientFunc func, + gpointer data, + GDestroyNotify notify, + GMainContext *context); + +/** + * qio_net_listener_set_client_func: + * @listener: the network listener object + * @func: the callback function + * @data: opaque data to pass to @func + * @notify: callback to free @data + * + * Wrapper of qio_net_listener_set_client_func_full(), only that the + * sources will always be bound to default main context. + */ +void qio_net_listener_set_client_func(QIONetListener *listener, + QIONetListenerClientFunc func, + gpointer data, + GDestroyNotify notify); + +/** + * qio_net_listener_wait_client: + * @listener: the network listener object + * + * Block execution of the caller until a new client arrives + * on one of the listening sockets. If there was previously + * a callback registered with qio_net_listener_set_client_func + * it will be temporarily disabled, and re-enabled afterwards. + * + * Returns: the new client socket + */ +QIOChannelSocket *qio_net_listener_wait_client(QIONetListener *listener); + + +/** + * qio_net_listener_disconnect: + * @listener: the network listener object + * + * Disconnect the listener, removing all I/O callback + * watches and closing the socket channels. + */ +void qio_net_listener_disconnect(QIONetListener *listener); + + +/** + * qio_net_listener_is_connected: + * @listener: the network listener object + * + * Determine if the listener is connected to any socket + * channels + * + * Returns: true if connected, false otherwise + */ +bool qio_net_listener_is_connected(QIONetListener *listener); + +#endif /* QIO_NET_LISTENER_H */ diff --git a/include/io/task.h b/include/io/task.h new file mode 100644 index 000000000..beec4f5cf --- /dev/null +++ b/include/io/task.h @@ -0,0 +1,350 @@ +/* + * QEMU I/O task + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QIO_TASK_H +#define QIO_TASK_H + +typedef struct QIOTask QIOTask; + +typedef void (*QIOTaskFunc)(QIOTask *task, + gpointer opaque); + +typedef void (*QIOTaskWorker)(QIOTask *task, + gpointer opaque); + +/** + * QIOTask: + * + * The QIOTask object provides a simple mechanism for reporting + * success / failure of long running background operations. + * + * A object on which the operation is to be performed could have + * a public API which accepts a task callback: + * + * + * Task function signature + * + * void myobject_operation(QMyObject *obj, + * QIOTaskFunc *func, + * gpointer opaque, + * GDestroyNotify notify); + * + * + * + * The 'func' parameter is the callback to be invoked, and 'opaque' + * is data to pass to it. The optional 'notify' function is used + * to free 'opaque' when no longer needed. + * + * When the operation completes, the 'func' callback will be + * invoked, allowing the calling code to determine the result + * of the operation. An example QIOTaskFunc implementation may + * look like + * + * + * Task callback implementation + * + * static void myobject_operation_notify(QIOTask *task, + * gpointer opaque) + * { + * Error *err = NULL; + * if (qio_task_propagate_error(task, &err)) { + * ...deal with the failure... + * error_free(err); + * } else { + * QMyObject *src = QMY_OBJECT(qio_task_get_source(task)); + * ...deal with the completion... + * } + * } + * + * + * + * Now, lets say the implementation of the method using the + * task wants to set a timer to run once a second checking + * for completion of some activity. It would do something + * like + * + * + * Task function implementation + * + * void myobject_operation(QMyObject *obj, + * QIOTaskFunc *func, + * gpointer opaque, + * GDestroyNotify notify) + * { + * QIOTask *task; + * + * task = qio_task_new(OBJECT(obj), func, opaque, notify); + * + * g_timeout_add_full(G_PRIORITY_DEFAULT, + * 1000, + * myobject_operation_timer, + * task, + * NULL); + * } + * + * + * + * It could equally have setup a watch on a file descriptor or + * created a background thread, or something else entirely. + * Notice that the source object is passed to the task, and + * QIOTask will hold a reference on that. This ensure that + * the QMyObject instance cannot be garbage collected while + * the async task is still in progress. + * + * In this case, myobject_operation_timer will fire after + * 3 secs and do + * + * + * Task timer function + * + * gboolean myobject_operation_timer(gpointer opaque) + * { + * QIOTask *task = QIO_TASK(opaque); + * Error *err = NULL; + * + * ...check something important... + * if (err) { + * qio_task_set_error(task, err); + * qio_task_complete(task); + * return FALSE; + * } else if (...work is completed ...) { + * qio_task_complete(task); + * return FALSE; + * } + * ...carry on polling ... + * return TRUE; + * } + * + * + * + * The 'qio_task_complete' call in this method will trigger + * the callback func 'myobject_operation_notify' shown + * earlier to deal with the results. + * + * Once this function returns false, object_unref will be called + * automatically on the task causing it to be released and the + * ref on QMyObject dropped too. + * + * The QIOTask module can also be used to perform operations + * in a background thread context, while still reporting the + * results in the main event thread. This allows code which + * cannot easily be rewritten to be asychronous (such as DNS + * lookups) to be easily run non-blocking. Reporting the + * results in the main thread context means that the caller + * typically does not need to be concerned about thread + * safety wrt the QEMU global mutex. + * + * For example, the socket_listen() method will block the caller + * while DNS lookups take place if given a name, instead of IP + * address. The C library often do not provide a practical async + * DNS API, so the to get non-blocking DNS lookups in a portable + * manner requires use of a thread. So achieve a non-blocking + * socket listen using QIOTask would require: + * + * + * static void myobject_listen_worker(QIOTask *task, + * gpointer opaque) + * { + * QMyObject obj = QMY_OBJECT(qio_task_get_source(task)); + * SocketAddress *addr = opaque; + * Error *err = NULL; + * + * obj->fd = socket_listen(addr, &err); + * + qio_task_set_error(task, err); + * } + * + * void myobject_listen_async(QMyObject *obj, + * SocketAddress *addr, + * QIOTaskFunc *func, + * gpointer opaque, + * GDestroyNotify notify) + * { + * QIOTask *task; + * SocketAddress *addrCopy; + * + * addrCopy = QAPI_CLONE(SocketAddress, addr); + * task = qio_task_new(OBJECT(obj), func, opaque, notify); + * + * qio_task_run_in_thread(task, myobject_listen_worker, + * addrCopy, + * qapi_free_SocketAddress); + * } + * + * + * NB, The 'func' callback passed into myobject_listen_async + * will be invoked from the main event thread, despite the + * actual operation being performed in a different thread. + */ + +/** + * qio_task_new: + * @source: the object on which the operation is invoked + * @func: the callback to invoke when the task completes + * @opaque: opaque data to pass to @func when invoked + * @destroy: optional callback to free @opaque + * + * Creates a new task struct to track completion of a + * background operation running on the object @source. + * When the operation completes or fails, the callback + * @func will be invoked. The callback can access the + * 'err' attribute in the task object to determine if + * the operation was successful or not. + * + * The returned task will be released when qio_task_complete() + * is invoked. + * + * Returns: the task struct + */ +QIOTask *qio_task_new(Object *source, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy); + +/** + * qio_task_run_in_thread: + * @task: the task struct + * @worker: the function to invoke in a thread + * @opaque: opaque data to pass to @worker + * @destroy: function to free @opaque + * @context: the context to run the complete hook. If %NULL, the + * default context will be used. + * + * Run a task in a background thread. When @worker + * returns it will call qio_task_complete() in + * the thread that is running the main loop associated + * with @context. + */ +void qio_task_run_in_thread(QIOTask *task, + QIOTaskWorker worker, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context); + + +/** + * qio_task_wait_thread: + * @task: the task struct + * + * Wait for completion of a task that was previously + * invoked using qio_task_run_in_thread. This MUST + * ONLY be invoked if the task has not already + * completed, since after the completion callback + * is invoked, @task will have been freed. + * + * To avoid racing with execution of the completion + * callback provided with qio_task_new, this method + * MUST ONLY be invoked from the thread that is + * running the main loop associated with @context + * parameter to qio_task_run_in_thread. + * + * When the thread has completed, the completion + * callback provided to qio_task_new will be invoked. + * When that callback returns @task will be freed, + * so @task must not be referenced after this + * method completes. + */ +void qio_task_wait_thread(QIOTask *task); + + +/** + * qio_task_complete: + * @task: the task struct + * + * Invoke the completion callback for @task and + * then free its memory. + */ +void qio_task_complete(QIOTask *task); + + +/** + * qio_task_set_error: + * @task: the task struct + * @err: pointer to the error, or NULL + * + * Associate an error with the task, which can later + * be retrieved with the qio_task_propagate_error() + * method. This method takes ownership of @err, so + * it is not valid to access it after this call + * completes. If @err is NULL this is a no-op. If + * this is call multiple times, only the first + * provided @err will be recorded, later ones will + * be discarded and freed. + */ +void qio_task_set_error(QIOTask *task, + Error *err); + + +/** + * qio_task_propagate_error: + * @task: the task struct + * @errp: pointer to a NULL-initialized error object + * + * Propagate the error associated with @task + * into @errp. + * + * Returns: true if an error was propagated, false otherwise + */ +bool qio_task_propagate_error(QIOTask *task, + Error **errp); + + +/** + * qio_task_set_result_pointer: + * @task: the task struct + * @result: pointer to the result data + * + * Associate an opaque result with the task, + * which can later be retrieved with the + * qio_task_get_result_pointer() method + * + */ +void qio_task_set_result_pointer(QIOTask *task, + gpointer result, + GDestroyNotify notify); + + +/** + * qio_task_get_result_pointer: + * @task: the task struct + * + * Retrieve the opaque result data associated + * with the task, if any. + * + * Returns: the task result, or NULL + */ +gpointer qio_task_get_result_pointer(QIOTask *task); + + +/** + * qio_task_get_source: + * @task: the task struct + * + * Get the source object associated with the background + * task. The caller does not own a reference on the + * returned Object, and so should call object_ref() + * if it wants to keep the object pointer outside the + * lifetime of the QIOTask object. + * + * Returns: the source object + */ +Object *qio_task_get_source(QIOTask *task); + +#endif /* QIO_TASK_H */ diff --git a/include/libdecnumber/dconfig.h b/include/libdecnumber/dconfig.h new file mode 100644 index 000000000..0f7dccef1 --- /dev/null +++ b/include/libdecnumber/dconfig.h @@ -0,0 +1,39 @@ +/* Configure decNumber for either host or target. + Copyright (C) 2008 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + + +#if defined(HOST_WORDS_BIGENDIAN) +#define WORDS_BIGENDIAN 1 +#else +#define WORDS_BIGENDIAN 0 +#endif + +#ifndef DECDPUN +#define DECDPUN 3 +#endif diff --git a/include/libdecnumber/decContext.h b/include/libdecnumber/decContext.h new file mode 100644 index 000000000..cea6e4279 --- /dev/null +++ b/include/libdecnumber/decContext.h @@ -0,0 +1,255 @@ +/* Decimal context header module for the decNumber C Library. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* Decimal Context module header */ +/* ------------------------------------------------------------------ */ +/* */ +/* Context variables must always have valid values: */ +/* */ +/* status -- [any bits may be cleared, but not set, by user] */ +/* round -- must be one of the enumerated rounding modes */ +/* */ +/* The following variables are implied for fixed size formats (i.e., */ +/* they are ignored) but should still be set correctly in case used */ +/* with decNumber functions: */ +/* */ +/* clamp -- must be either 0 or 1 */ +/* digits -- must be in the range 1 through 999999999 */ +/* emax -- must be in the range 0 through 999999999 */ +/* emin -- must be in the range 0 through -999999999 */ +/* extended -- must be either 0 or 1 [present only if DECSUBSET] */ +/* traps -- only defined bits may be set */ +/* */ +/* ------------------------------------------------------------------ */ + +#ifndef DECCONTEXT_H +#define DECCONTEXT_H + + #define DECCNAME "decContext" /* Short name */ + #define DECCFULLNAME "Decimal Context Descriptor" /* Verbose name */ + #define DECCAUTHOR "Mike Cowlishaw" /* Who to blame */ + + + /* Extended flags setting -- set this to 0 to use only IEEE flags */ + #define DECEXTFLAG 1 /* 1=enable extended flags */ + + /* Conditional code flag -- set this to 0 for best performance */ + #define DECSUBSET 0 /* 1=enable subset arithmetic */ + + /* Context for operations, with associated constants */ + enum rounding { + DEC_ROUND_CEILING, /* round towards +infinity */ + DEC_ROUND_UP, /* round away from 0 */ + DEC_ROUND_HALF_UP, /* 0.5 rounds up */ + DEC_ROUND_HALF_EVEN, /* 0.5 rounds to nearest even */ + DEC_ROUND_HALF_DOWN, /* 0.5 rounds down */ + DEC_ROUND_DOWN, /* round towards 0 (truncate) */ + DEC_ROUND_FLOOR, /* round towards -infinity */ + DEC_ROUND_05UP, /* round for reround */ + DEC_ROUND_MAX /* enum must be less than this */ + }; + #define DEC_ROUND_DEFAULT DEC_ROUND_HALF_EVEN; + + typedef struct { + int32_t digits; /* working precision */ + int32_t emax; /* maximum positive exponent */ + int32_t emin; /* minimum negative exponent */ + enum rounding round; /* rounding mode */ + uint32_t traps; /* trap-enabler flags */ + uint32_t status; /* status flags */ + uint8_t clamp; /* flag: apply IEEE exponent clamp */ + #if DECSUBSET + uint8_t extended; /* flag: special-values allowed */ + #endif + } decContext; + + /* Maxima and Minima for context settings */ + #define DEC_MAX_DIGITS 999999999 + #define DEC_MIN_DIGITS 1 + #define DEC_MAX_EMAX 999999999 + #define DEC_MIN_EMAX 0 + #define DEC_MAX_EMIN 0 + #define DEC_MIN_EMIN -999999999 + #define DEC_MAX_MATH 999999 /* max emax, etc., for math funcs. */ + + /* Classifications for decimal numbers, aligned with 754r (note */ + /* that 'normal' and 'subnormal' are meaningful only with a */ + /* decContext or a fixed size format). */ + enum decClass { + DEC_CLASS_SNAN, + DEC_CLASS_QNAN, + DEC_CLASS_NEG_INF, + DEC_CLASS_NEG_NORMAL, + DEC_CLASS_NEG_SUBNORMAL, + DEC_CLASS_NEG_ZERO, + DEC_CLASS_POS_ZERO, + DEC_CLASS_POS_SUBNORMAL, + DEC_CLASS_POS_NORMAL, + DEC_CLASS_POS_INF + }; + /* Strings for the decClasses */ + #define DEC_ClassString_SN "sNaN" + #define DEC_ClassString_QN "NaN" + #define DEC_ClassString_NI "-Infinity" + #define DEC_ClassString_NN "-Normal" + #define DEC_ClassString_NS "-Subnormal" + #define DEC_ClassString_NZ "-Zero" + #define DEC_ClassString_PZ "+Zero" + #define DEC_ClassString_PS "+Subnormal" + #define DEC_ClassString_PN "+Normal" + #define DEC_ClassString_PI "+Infinity" + #define DEC_ClassString_UN "Invalid" + + /* Trap-enabler and Status flags (exceptional conditions), and */ + /* their names. The top byte is reserved for internal use */ + #if DECEXTFLAG + /* Extended flags */ + #define DEC_Conversion_syntax 0x00000001 + #define DEC_Division_by_zero 0x00000002 + #define DEC_Division_impossible 0x00000004 + #define DEC_Division_undefined 0x00000008 + #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */ + #define DEC_Inexact 0x00000020 + #define DEC_Invalid_context 0x00000040 + #define DEC_Invalid_operation 0x00000080 + #if DECSUBSET + #define DEC_Lost_digits 0x00000100 + #endif + #define DEC_Overflow 0x00000200 + #define DEC_Clamped 0x00000400 + #define DEC_Rounded 0x00000800 + #define DEC_Subnormal 0x00001000 + #define DEC_Underflow 0x00002000 + #else + /* IEEE flags only */ + #define DEC_Conversion_syntax 0x00000010 + #define DEC_Division_by_zero 0x00000002 + #define DEC_Division_impossible 0x00000010 + #define DEC_Division_undefined 0x00000010 + #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */ + #define DEC_Inexact 0x00000001 + #define DEC_Invalid_context 0x00000010 + #define DEC_Invalid_operation 0x00000010 + #if DECSUBSET + #define DEC_Lost_digits 0x00000000 + #endif + #define DEC_Overflow 0x00000008 + #define DEC_Clamped 0x00000000 + #define DEC_Rounded 0x00000000 + #define DEC_Subnormal 0x00000000 + #define DEC_Underflow 0x00000004 + #endif + + /* IEEE 854 groupings for the flags */ + /* [DEC_Clamped, DEC_Lost_digits, DEC_Rounded, and DEC_Subnormal */ + /* are not in IEEE 854] */ + #define DEC_IEEE_854_Division_by_zero (DEC_Division_by_zero) + #if DECSUBSET + #define DEC_IEEE_854_Inexact (DEC_Inexact | DEC_Lost_digits) + #else + #define DEC_IEEE_854_Inexact (DEC_Inexact) + #endif + #define DEC_IEEE_854_Invalid_operation (DEC_Conversion_syntax | \ + DEC_Division_impossible | \ + DEC_Division_undefined | \ + DEC_Insufficient_storage | \ + DEC_Invalid_context | \ + DEC_Invalid_operation) + #define DEC_IEEE_854_Overflow (DEC_Overflow) + #define DEC_IEEE_854_Underflow (DEC_Underflow) + + /* flags which are normally errors (result is qNaN, infinite, or 0) */ + #define DEC_Errors (DEC_IEEE_854_Division_by_zero | \ + DEC_IEEE_854_Invalid_operation | \ + DEC_IEEE_854_Overflow | DEC_IEEE_854_Underflow) + /* flags which cause a result to become qNaN */ + #define DEC_NaNs DEC_IEEE_854_Invalid_operation + + /* flags which are normally for information only (finite results) */ + #if DECSUBSET + #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact \ + | DEC_Lost_digits) + #else + #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact) + #endif + + /* Name strings for the exceptional conditions */ + #define DEC_Condition_CS "Conversion syntax" + #define DEC_Condition_DZ "Division by zero" + #define DEC_Condition_DI "Division impossible" + #define DEC_Condition_DU "Division undefined" + #define DEC_Condition_IE "Inexact" + #define DEC_Condition_IS "Insufficient storage" + #define DEC_Condition_IC "Invalid context" + #define DEC_Condition_IO "Invalid operation" + #if DECSUBSET + #define DEC_Condition_LD "Lost digits" + #endif + #define DEC_Condition_OV "Overflow" + #define DEC_Condition_PA "Clamped" + #define DEC_Condition_RO "Rounded" + #define DEC_Condition_SU "Subnormal" + #define DEC_Condition_UN "Underflow" + #define DEC_Condition_ZE "No status" + #define DEC_Condition_MU "Multiple status" + #define DEC_Condition_Length 21 /* length of the longest string, */ + /* including terminator */ + + /* Initialization descriptors, used by decContextDefault */ + #define DEC_INIT_BASE 0 + #define DEC_INIT_DECIMAL32 32 + #define DEC_INIT_DECIMAL64 64 + #define DEC_INIT_DECIMAL128 128 + /* Synonyms */ + #define DEC_INIT_DECSINGLE DEC_INIT_DECIMAL32 + #define DEC_INIT_DECDOUBLE DEC_INIT_DECIMAL64 + #define DEC_INIT_DECQUAD DEC_INIT_DECIMAL128 + + /* decContext routines */ + + + extern decContext * decContextClearStatus(decContext *, uint32_t); + extern decContext * decContextDefault(decContext *, int32_t); + extern enum rounding decContextGetRounding(decContext *); + extern uint32_t decContextGetStatus(decContext *); + extern decContext * decContextRestoreStatus(decContext *, uint32_t, uint32_t); + extern uint32_t decContextSaveStatus(decContext *, uint32_t); + extern decContext * decContextSetRounding(decContext *, enum rounding); + extern decContext * decContextSetStatus(decContext *, uint32_t); + extern decContext * decContextSetStatusFromString(decContext *, const char *); + extern decContext * decContextSetStatusFromStringQuiet(decContext *, const char *); + extern decContext * decContextSetStatusQuiet(decContext *, uint32_t); + extern const char * decContextStatusToString(const decContext *); + extern uint32_t decContextTestSavedStatus(uint32_t, uint32_t); + extern uint32_t decContextTestStatus(decContext *, uint32_t); + extern decContext * decContextZeroStatus(decContext *); + +#endif diff --git a/include/libdecnumber/decDPD.h b/include/libdecnumber/decDPD.h new file mode 100644 index 000000000..26a21ec8e --- /dev/null +++ b/include/libdecnumber/decDPD.h @@ -0,0 +1,1214 @@ +/* Conversion lookup tables for the decNumber C Library. + Copyright (C) 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------------ */ +/* Binary Coded Decimal and Densely Packed Decimal conversion lookup tables */ +/* [Automatically generated -- do not edit. 2007.05.05] */ +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ +/* For details, see: http://www2.hursley.ibm.com/decimal/DPDecimal.html */ + + +/* This include file defines several DPD and BCD conversion tables: */ +/* */ +/* uint16_t BCD2DPD[2458]; -- BCD -> DPD (0x999 => 2457) */ +/* uint16_t BIN2DPD[1000]; -- Bin -> DPD (999 => 2457) */ +/* uint8_t BIN2CHAR[4001]; -- Bin -> CHAR (999 => '\3' '9' '9' '9') */ +/* uint8_t BIN2BCD8[4000]; -- Bin -> bytes (999 => 9 9 9 3) */ +/* uint16_t DPD2BCD[1024]; -- DPD -> BCD (0x3FF => 0x999) */ +/* uint16_t DPD2BIN[1024]; -- DPD -> BIN (0x3FF => 999) */ +/* uint32_t DPD2BINK[1024]; -- DPD -> BIN * 1000 (0x3FF => 999000) */ +/* uint32_t DPD2BINM[1024]; -- DPD -> BIN * 1E+6 (0x3FF => 999000000) */ +/* uint8_t DPD2BCD8[4096]; -- DPD -> bytes (x3FF => 9 9 9 3) */ +/* */ +/* In all cases the result (10 bits or 12 bits, or binary) is right-aligned */ +/* in the table entry. BIN2CHAR entries are a single byte length (0 for */ +/* value 0) followed by three digit characters; a trailing terminator is */ +/* included to allow 4-char moves always. BIN2BCD8 and DPD2BCD8 entries */ +/* are similar with the three BCD8 digits followed by a one-byte length */ +/* (again, length=0 for value 0). */ +/* */ +/* To use a table, its name, prefixed with DEC_, must be defined with a */ +/* value of 1 before this header file is included. For example: */ +/* #define DEC_BCD2DPD 1 */ +/* This mechanism allows software to only include tables that are needed. */ +/* ------------------------------------------------------------------------ */ + +#if defined(DEC_BCD2DPD) && DEC_BCD2DPD==1 && !defined(DECBCD2DPD) +#define DECBCD2DPD + +const uint16_t BCD2DPD[2458]={ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 0, 0, 0, 0, 0, 0, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, 0, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 0, 0, 0, 0, 0, + 0, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 0, 0, + 0, 0, 0, 0, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 0, 0, 0, 0, 0, 0, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 0, 0, 0, + 0, 0, 0, 10, 11, 42, 43, 74, 75, 106, 107, 78, 79, + 0, 0, 0, 0, 0, 0, 26, 27, 58, 59, 90, 91, 122, + 123, 94, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 0, + 0, 0, 0, 0, 144, 145, 146, 147, 148, 149, 150, 151, 152, + 153, 0, 0, 0, 0, 0, 0, 160, 161, 162, 163, 164, 165, + 166, 167, 168, 169, 0, 0, 0, 0, 0, 0, 176, 177, 178, + 179, 180, 181, 182, 183, 184, 185, 0, 0, 0, 0, 0, 0, + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 0, 0, 0, + 0, 0, 0, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, + 0, 0, 0, 0, 0, 0, 224, 225, 226, 227, 228, 229, 230, + 231, 232, 233, 0, 0, 0, 0, 0, 0, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 0, 0, 0, 0, 0, 0, 138, + 139, 170, 171, 202, 203, 234, 235, 206, 207, 0, 0, 0, 0, + 0, 0, 154, 155, 186, 187, 218, 219, 250, 251, 222, 223, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 256, 257, 258, + 259, 260, 261, 262, 263, 264, 265, 0, 0, 0, 0, 0, 0, + 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 0, 0, 0, + 0, 0, 0, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, + 0, 0, 0, 0, 0, 0, 304, 305, 306, 307, 308, 309, 310, + 311, 312, 313, 0, 0, 0, 0, 0, 0, 320, 321, 322, 323, + 324, 325, 326, 327, 328, 329, 0, 0, 0, 0, 0, 0, 336, + 337, 338, 339, 340, 341, 342, 343, 344, 345, 0, 0, 0, 0, + 0, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 0, + 0, 0, 0, 0, 0, 368, 369, 370, 371, 372, 373, 374, 375, + 376, 377, 0, 0, 0, 0, 0, 0, 266, 267, 298, 299, 330, + 331, 362, 363, 334, 335, 0, 0, 0, 0, 0, 0, 282, 283, + 314, 315, 346, 347, 378, 379, 350, 351, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 384, 385, 386, 387, 388, 389, 390, + 391, 392, 393, 0, 0, 0, 0, 0, 0, 400, 401, 402, 403, + 404, 405, 406, 407, 408, 409, 0, 0, 0, 0, 0, 0, 416, + 417, 418, 419, 420, 421, 422, 423, 424, 425, 0, 0, 0, 0, + 0, 0, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, + 0, 0, 0, 0, 0, 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 0, 0, 0, 0, 0, 0, 464, 465, 466, 467, 468, + 469, 470, 471, 472, 473, 0, 0, 0, 0, 0, 0, 480, 481, + 482, 483, 484, 485, 486, 487, 488, 489, 0, 0, 0, 0, 0, + 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 0, 0, + 0, 0, 0, 0, 394, 395, 426, 427, 458, 459, 490, 491, 462, + 463, 0, 0, 0, 0, 0, 0, 410, 411, 442, 443, 474, 475, + 506, 507, 478, 479, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 0, + 0, 0, 0, 0, 0, 528, 529, 530, 531, 532, 533, 534, 535, + 536, 537, 0, 0, 0, 0, 0, 0, 544, 545, 546, 547, 548, + 549, 550, 551, 552, 553, 0, 0, 0, 0, 0, 0, 560, 561, + 562, 563, 564, 565, 566, 567, 568, 569, 0, 0, 0, 0, 0, + 0, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 0, 0, + 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, + 601, 0, 0, 0, 0, 0, 0, 608, 609, 610, 611, 612, 613, + 614, 615, 616, 617, 0, 0, 0, 0, 0, 0, 624, 625, 626, + 627, 628, 629, 630, 631, 632, 633, 0, 0, 0, 0, 0, 0, + 522, 523, 554, 555, 586, 587, 618, 619, 590, 591, 0, 0, 0, + 0, 0, 0, 538, 539, 570, 571, 602, 603, 634, 635, 606, 607, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640, 641, + 642, 643, 644, 645, 646, 647, 648, 649, 0, 0, 0, 0, 0, + 0, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 0, 0, + 0, 0, 0, 0, 672, 673, 674, 675, 676, 677, 678, 679, 680, + 681, 0, 0, 0, 0, 0, 0, 688, 689, 690, 691, 692, 693, + 694, 695, 696, 697, 0, 0, 0, 0, 0, 0, 704, 705, 706, + 707, 708, 709, 710, 711, 712, 713, 0, 0, 0, 0, 0, 0, + 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 0, 0, 0, + 0, 0, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, + 0, 0, 0, 0, 0, 0, 752, 753, 754, 755, 756, 757, 758, + 759, 760, 761, 0, 0, 0, 0, 0, 0, 650, 651, 682, 683, + 714, 715, 746, 747, 718, 719, 0, 0, 0, 0, 0, 0, 666, + 667, 698, 699, 730, 731, 762, 763, 734, 735, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 768, 769, 770, 771, 772, 773, + 774, 775, 776, 777, 0, 0, 0, 0, 0, 0, 784, 785, 786, + 787, 788, 789, 790, 791, 792, 793, 0, 0, 0, 0, 0, 0, + 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 0, 0, 0, + 0, 0, 0, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, + 0, 0, 0, 0, 0, 0, 832, 833, 834, 835, 836, 837, 838, + 839, 840, 841, 0, 0, 0, 0, 0, 0, 848, 849, 850, 851, + 852, 853, 854, 855, 856, 857, 0, 0, 0, 0, 0, 0, 864, + 865, 866, 867, 868, 869, 870, 871, 872, 873, 0, 0, 0, 0, + 0, 0, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 0, + 0, 0, 0, 0, 0, 778, 779, 810, 811, 842, 843, 874, 875, + 846, 847, 0, 0, 0, 0, 0, 0, 794, 795, 826, 827, 858, + 859, 890, 891, 862, 863, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, + 0, 0, 0, 0, 0, 0, 912, 913, 914, 915, 916, 917, 918, + 919, 920, 921, 0, 0, 0, 0, 0, 0, 928, 929, 930, 931, + 932, 933, 934, 935, 936, 937, 0, 0, 0, 0, 0, 0, 944, + 945, 946, 947, 948, 949, 950, 951, 952, 953, 0, 0, 0, 0, + 0, 0, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 0, + 0, 0, 0, 0, 0, 976, 977, 978, 979, 980, 981, 982, 983, + 984, 985, 0, 0, 0, 0, 0, 0, 992, 993, 994, 995, 996, + 997, 998, 999, 1000, 1001, 0, 0, 0, 0, 0, 0, 1008, 1009, + 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 0, 0, 0, 0, 0, + 0, 906, 907, 938, 939, 970, 971, 1002, 1003, 974, 975, 0, 0, + 0, 0, 0, 0, 922, 923, 954, 955, 986, 987, 1018, 1019, 990, + 991, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, + 13, 268, 269, 524, 525, 780, 781, 46, 47, 0, 0, 0, 0, + 0, 0, 28, 29, 284, 285, 540, 541, 796, 797, 62, 63, 0, + 0, 0, 0, 0, 0, 44, 45, 300, 301, 556, 557, 812, 813, + 302, 303, 0, 0, 0, 0, 0, 0, 60, 61, 316, 317, 572, + 573, 828, 829, 318, 319, 0, 0, 0, 0, 0, 0, 76, 77, + 332, 333, 588, 589, 844, 845, 558, 559, 0, 0, 0, 0, 0, + 0, 92, 93, 348, 349, 604, 605, 860, 861, 574, 575, 0, 0, + 0, 0, 0, 0, 108, 109, 364, 365, 620, 621, 876, 877, 814, + 815, 0, 0, 0, 0, 0, 0, 124, 125, 380, 381, 636, 637, + 892, 893, 830, 831, 0, 0, 0, 0, 0, 0, 14, 15, 270, + 271, 526, 527, 782, 783, 110, 111, 0, 0, 0, 0, 0, 0, + 30, 31, 286, 287, 542, 543, 798, 799, 126, 127, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 140, 141, 396, 397, 652, + 653, 908, 909, 174, 175, 0, 0, 0, 0, 0, 0, 156, 157, + 412, 413, 668, 669, 924, 925, 190, 191, 0, 0, 0, 0, 0, + 0, 172, 173, 428, 429, 684, 685, 940, 941, 430, 431, 0, 0, + 0, 0, 0, 0, 188, 189, 444, 445, 700, 701, 956, 957, 446, + 447, 0, 0, 0, 0, 0, 0, 204, 205, 460, 461, 716, 717, + 972, 973, 686, 687, 0, 0, 0, 0, 0, 0, 220, 221, 476, + 477, 732, 733, 988, 989, 702, 703, 0, 0, 0, 0, 0, 0, + 236, 237, 492, 493, 748, 749, 1004, 1005, 942, 943, 0, 0, 0, + 0, 0, 0, 252, 253, 508, 509, 764, 765, 1020, 1021, 958, 959, + 0, 0, 0, 0, 0, 0, 142, 143, 398, 399, 654, 655, 910, + 911, 238, 239, 0, 0, 0, 0, 0, 0, 158, 159, 414, 415, + 670, 671, 926, 927, 254, 255}; +#endif + +#if defined(DEC_DPD2BCD) && DEC_DPD2BCD==1 && !defined(DECDPD2BCD) +#define DECDPD2BCD + +const uint16_t DPD2BCD[1024]={ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 128, 129, 2048, 2049, 2176, 2177, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 144, 145, 2064, 2065, 2192, 2193, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 130, 131, 2080, 2081, 2056, + 2057, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 146, 147, + 2096, 2097, 2072, 2073, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 132, 133, 2112, 2113, 136, 137, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 148, 149, 2128, 2129, 152, 153, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 134, 135, 2144, 2145, 2184, 2185, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 150, 151, 2160, + 2161, 2200, 2201, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, + 384, 385, 2304, 2305, 2432, 2433, 272, 273, 274, 275, 276, 277, 278, + 279, 280, 281, 400, 401, 2320, 2321, 2448, 2449, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 386, 387, 2336, 2337, 2312, 2313, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 402, 403, 2352, 2353, + 2328, 2329, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 388, + 389, 2368, 2369, 392, 393, 336, 337, 338, 339, 340, 341, 342, 343, + 344, 345, 404, 405, 2384, 2385, 408, 409, 352, 353, 354, 355, 356, + 357, 358, 359, 360, 361, 390, 391, 2400, 2401, 2440, 2441, 368, 369, + 370, 371, 372, 373, 374, 375, 376, 377, 406, 407, 2416, 2417, 2456, + 2457, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 640, 641, + 2050, 2051, 2178, 2179, 528, 529, 530, 531, 532, 533, 534, 535, 536, + 537, 656, 657, 2066, 2067, 2194, 2195, 544, 545, 546, 547, 548, 549, + 550, 551, 552, 553, 642, 643, 2082, 2083, 2088, 2089, 560, 561, 562, + 563, 564, 565, 566, 567, 568, 569, 658, 659, 2098, 2099, 2104, 2105, + 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 644, 645, 2114, + 2115, 648, 649, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, + 660, 661, 2130, 2131, 664, 665, 608, 609, 610, 611, 612, 613, 614, + 615, 616, 617, 646, 647, 2146, 2147, 2184, 2185, 624, 625, 626, 627, + 628, 629, 630, 631, 632, 633, 662, 663, 2162, 2163, 2200, 2201, 768, + 769, 770, 771, 772, 773, 774, 775, 776, 777, 896, 897, 2306, 2307, + 2434, 2435, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 912, + 913, 2322, 2323, 2450, 2451, 800, 801, 802, 803, 804, 805, 806, 807, + 808, 809, 898, 899, 2338, 2339, 2344, 2345, 816, 817, 818, 819, 820, + 821, 822, 823, 824, 825, 914, 915, 2354, 2355, 2360, 2361, 832, 833, + 834, 835, 836, 837, 838, 839, 840, 841, 900, 901, 2370, 2371, 904, + 905, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 916, 917, + 2386, 2387, 920, 921, 864, 865, 866, 867, 868, 869, 870, 871, 872, + 873, 902, 903, 2402, 2403, 2440, 2441, 880, 881, 882, 883, 884, 885, + 886, 887, 888, 889, 918, 919, 2418, 2419, 2456, 2457, 1024, 1025, 1026, + 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1152, 1153, 2052, 2053, 2180, 2181, + 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1168, 1169, 2068, + 2069, 2196, 2197, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, + 1154, 1155, 2084, 2085, 2120, 2121, 1072, 1073, 1074, 1075, 1076, 1077, 1078, + 1079, 1080, 1081, 1170, 1171, 2100, 2101, 2136, 2137, 1088, 1089, 1090, 1091, + 1092, 1093, 1094, 1095, 1096, 1097, 1156, 1157, 2116, 2117, 1160, 1161, 1104, + 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1172, 1173, 2132, 2133, + 1176, 1177, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1158, + 1159, 2148, 2149, 2184, 2185, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, + 1144, 1145, 1174, 1175, 2164, 2165, 2200, 2201, 1280, 1281, 1282, 1283, 1284, + 1285, 1286, 1287, 1288, 1289, 1408, 1409, 2308, 2309, 2436, 2437, 1296, 1297, + 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1424, 1425, 2324, 2325, 2452, + 2453, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1410, 1411, + 2340, 2341, 2376, 2377, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, + 1337, 1426, 1427, 2356, 2357, 2392, 2393, 1344, 1345, 1346, 1347, 1348, 1349, + 1350, 1351, 1352, 1353, 1412, 1413, 2372, 2373, 1416, 1417, 1360, 1361, 1362, + 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1428, 1429, 2388, 2389, 1432, 1433, + 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1414, 1415, 2404, + 2405, 2440, 2441, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, + 1430, 1431, 2420, 2421, 2456, 2457, 1536, 1537, 1538, 1539, 1540, 1541, 1542, + 1543, 1544, 1545, 1664, 1665, 2054, 2055, 2182, 2183, 1552, 1553, 1554, 1555, + 1556, 1557, 1558, 1559, 1560, 1561, 1680, 1681, 2070, 2071, 2198, 2199, 1568, + 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1666, 1667, 2086, 2087, + 2152, 2153, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1682, + 1683, 2102, 2103, 2168, 2169, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, + 1608, 1609, 1668, 1669, 2118, 2119, 1672, 1673, 1616, 1617, 1618, 1619, 1620, + 1621, 1622, 1623, 1624, 1625, 1684, 1685, 2134, 2135, 1688, 1689, 1632, 1633, + 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1670, 1671, 2150, 2151, 2184, + 2185, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1686, 1687, + 2166, 2167, 2200, 2201, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, + 1801, 1920, 1921, 2310, 2311, 2438, 2439, 1808, 1809, 1810, 1811, 1812, 1813, + 1814, 1815, 1816, 1817, 1936, 1937, 2326, 2327, 2454, 2455, 1824, 1825, 1826, + 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1922, 1923, 2342, 2343, 2408, 2409, + 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1938, 1939, 2358, + 2359, 2424, 2425, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865, + 1924, 1925, 2374, 2375, 1928, 1929, 1872, 1873, 1874, 1875, 1876, 1877, 1878, + 1879, 1880, 1881, 1940, 1941, 2390, 2391, 1944, 1945, 1888, 1889, 1890, 1891, + 1892, 1893, 1894, 1895, 1896, 1897, 1926, 1927, 2406, 2407, 2440, 2441, 1904, + 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1942, 1943, 2422, 2423, + 2456, 2457}; +#endif + +#if defined(DEC_BIN2DPD) && DEC_BIN2DPD==1 && !defined(DECBIN2DPD) +#define DECBIN2DPD + +const uint16_t BIN2DPD[1000]={ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 10, 11, 42, 43, 74, 75, + 106, 107, 78, 79, 26, 27, 58, 59, 90, 91, 122, 123, 94, + 95, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 144, 145, + 146, 147, 148, 149, 150, 151, 152, 153, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 138, 139, 170, 171, 202, 203, 234, 235, 206, 207, + 154, 155, 186, 187, 218, 219, 250, 251, 222, 223, 256, 257, 258, + 259, 260, 261, 262, 263, 264, 265, 272, 273, 274, 275, 276, 277, + 278, 279, 280, 281, 288, 289, 290, 291, 292, 293, 294, 295, 296, + 297, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 320, 321, + 322, 323, 324, 325, 326, 327, 328, 329, 336, 337, 338, 339, 340, + 341, 342, 343, 344, 345, 352, 353, 354, 355, 356, 357, 358, 359, + 360, 361, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 266, + 267, 298, 299, 330, 331, 362, 363, 334, 335, 282, 283, 314, 315, + 346, 347, 378, 379, 350, 351, 384, 385, 386, 387, 388, 389, 390, + 391, 392, 393, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, + 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 432, 433, 434, + 435, 436, 437, 438, 439, 440, 441, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 464, 465, 466, 467, 468, 469, 470, 471, 472, + 473, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 496, 497, + 498, 499, 500, 501, 502, 503, 504, 505, 394, 395, 426, 427, 458, + 459, 490, 491, 462, 463, 410, 411, 442, 443, 474, 475, 506, 507, + 478, 479, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, + 529, 530, 531, 532, 533, 534, 535, 536, 537, 544, 545, 546, 547, + 548, 549, 550, 551, 552, 553, 560, 561, 562, 563, 564, 565, 566, + 567, 568, 569, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, + 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 608, 609, 610, + 611, 612, 613, 614, 615, 616, 617, 624, 625, 626, 627, 628, 629, + 630, 631, 632, 633, 522, 523, 554, 555, 586, 587, 618, 619, 590, + 591, 538, 539, 570, 571, 602, 603, 634, 635, 606, 607, 640, 641, + 642, 643, 644, 645, 646, 647, 648, 649, 656, 657, 658, 659, 660, + 661, 662, 663, 664, 665, 672, 673, 674, 675, 676, 677, 678, 679, + 680, 681, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 704, + 705, 706, 707, 708, 709, 710, 711, 712, 713, 720, 721, 722, 723, + 724, 725, 726, 727, 728, 729, 736, 737, 738, 739, 740, 741, 742, + 743, 744, 745, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, + 650, 651, 682, 683, 714, 715, 746, 747, 718, 719, 666, 667, 698, + 699, 730, 731, 762, 763, 734, 735, 768, 769, 770, 771, 772, 773, + 774, 775, 776, 777, 784, 785, 786, 787, 788, 789, 790, 791, 792, + 793, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 816, 817, + 818, 819, 820, 821, 822, 823, 824, 825, 832, 833, 834, 835, 836, + 837, 838, 839, 840, 841, 848, 849, 850, 851, 852, 853, 854, 855, + 856, 857, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 880, + 881, 882, 883, 884, 885, 886, 887, 888, 889, 778, 779, 810, 811, + 842, 843, 874, 875, 846, 847, 794, 795, 826, 827, 858, 859, 890, + 891, 862, 863, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, + 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 928, 929, 930, + 931, 932, 933, 934, 935, 936, 937, 944, 945, 946, 947, 948, 949, + 950, 951, 952, 953, 960, 961, 962, 963, 964, 965, 966, 967, 968, + 969, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 992, 993, + 994, 995, 996, 997, 998, 999, 1000, 1001, 1008, 1009, 1010, 1011, 1012, + 1013, 1014, 1015, 1016, 1017, 906, 907, 938, 939, 970, 971, 1002, 1003, + 974, 975, 922, 923, 954, 955, 986, 987, 1018, 1019, 990, 991, 12, + 13, 268, 269, 524, 525, 780, 781, 46, 47, 28, 29, 284, 285, + 540, 541, 796, 797, 62, 63, 44, 45, 300, 301, 556, 557, 812, + 813, 302, 303, 60, 61, 316, 317, 572, 573, 828, 829, 318, 319, + 76, 77, 332, 333, 588, 589, 844, 845, 558, 559, 92, 93, 348, + 349, 604, 605, 860, 861, 574, 575, 108, 109, 364, 365, 620, 621, + 876, 877, 814, 815, 124, 125, 380, 381, 636, 637, 892, 893, 830, + 831, 14, 15, 270, 271, 526, 527, 782, 783, 110, 111, 30, 31, + 286, 287, 542, 543, 798, 799, 126, 127, 140, 141, 396, 397, 652, + 653, 908, 909, 174, 175, 156, 157, 412, 413, 668, 669, 924, 925, + 190, 191, 172, 173, 428, 429, 684, 685, 940, 941, 430, 431, 188, + 189, 444, 445, 700, 701, 956, 957, 446, 447, 204, 205, 460, 461, + 716, 717, 972, 973, 686, 687, 220, 221, 476, 477, 732, 733, 988, + 989, 702, 703, 236, 237, 492, 493, 748, 749, 1004, 1005, 942, 943, + 252, 253, 508, 509, 764, 765, 1020, 1021, 958, 959, 142, 143, 398, + 399, 654, 655, 910, 911, 238, 239, 158, 159, 414, 415, 670, 671, + 926, 927, 254, 255}; +#endif + +#if defined(DEC_DPD2BIN) && DEC_DPD2BIN==1 && !defined(DECDPD2BIN) +#define DECDPD2BIN + +const uint16_t DPD2BIN[1024]={ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 80, 81, 800, 801, 880, 881, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 90, 91, 810, 811, 890, 891, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 82, 83, 820, 821, 808, + 809, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 92, 93, + 830, 831, 818, 819, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 84, 85, 840, 841, 88, 89, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 94, 95, 850, 851, 98, 99, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 86, 87, 860, 861, 888, 889, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 96, 97, 870, + 871, 898, 899, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 180, 181, 900, 901, 980, 981, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 190, 191, 910, 911, 990, 991, 120, 121, 122, 123, + 124, 125, 126, 127, 128, 129, 182, 183, 920, 921, 908, 909, 130, + 131, 132, 133, 134, 135, 136, 137, 138, 139, 192, 193, 930, 931, + 918, 919, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 184, + 185, 940, 941, 188, 189, 150, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 194, 195, 950, 951, 198, 199, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 186, 187, 960, 961, 988, 989, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, 196, 197, 970, 971, 998, + 999, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 280, 281, + 802, 803, 882, 883, 210, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 290, 291, 812, 813, 892, 893, 220, 221, 222, 223, 224, 225, + 226, 227, 228, 229, 282, 283, 822, 823, 828, 829, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 292, 293, 832, 833, 838, 839, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 284, 285, 842, + 843, 288, 289, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 294, 295, 852, 853, 298, 299, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 286, 287, 862, 863, 888, 889, 270, 271, 272, 273, + 274, 275, 276, 277, 278, 279, 296, 297, 872, 873, 898, 899, 300, + 301, 302, 303, 304, 305, 306, 307, 308, 309, 380, 381, 902, 903, + 982, 983, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 390, + 391, 912, 913, 992, 993, 320, 321, 322, 323, 324, 325, 326, 327, + 328, 329, 382, 383, 922, 923, 928, 929, 330, 331, 332, 333, 334, + 335, 336, 337, 338, 339, 392, 393, 932, 933, 938, 939, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 384, 385, 942, 943, 388, + 389, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 394, 395, + 952, 953, 398, 399, 360, 361, 362, 363, 364, 365, 366, 367, 368, + 369, 386, 387, 962, 963, 988, 989, 370, 371, 372, 373, 374, 375, + 376, 377, 378, 379, 396, 397, 972, 973, 998, 999, 400, 401, 402, + 403, 404, 405, 406, 407, 408, 409, 480, 481, 804, 805, 884, 885, + 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 490, 491, 814, + 815, 894, 895, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, + 482, 483, 824, 825, 848, 849, 430, 431, 432, 433, 434, 435, 436, + 437, 438, 439, 492, 493, 834, 835, 858, 859, 440, 441, 442, 443, + 444, 445, 446, 447, 448, 449, 484, 485, 844, 845, 488, 489, 450, + 451, 452, 453, 454, 455, 456, 457, 458, 459, 494, 495, 854, 855, + 498, 499, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 486, + 487, 864, 865, 888, 889, 470, 471, 472, 473, 474, 475, 476, 477, + 478, 479, 496, 497, 874, 875, 898, 899, 500, 501, 502, 503, 504, + 505, 506, 507, 508, 509, 580, 581, 904, 905, 984, 985, 510, 511, + 512, 513, 514, 515, 516, 517, 518, 519, 590, 591, 914, 915, 994, + 995, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 582, 583, + 924, 925, 948, 949, 530, 531, 532, 533, 534, 535, 536, 537, 538, + 539, 592, 593, 934, 935, 958, 959, 540, 541, 542, 543, 544, 545, + 546, 547, 548, 549, 584, 585, 944, 945, 588, 589, 550, 551, 552, + 553, 554, 555, 556, 557, 558, 559, 594, 595, 954, 955, 598, 599, + 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 586, 587, 964, + 965, 988, 989, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, + 596, 597, 974, 975, 998, 999, 600, 601, 602, 603, 604, 605, 606, + 607, 608, 609, 680, 681, 806, 807, 886, 887, 610, 611, 612, 613, + 614, 615, 616, 617, 618, 619, 690, 691, 816, 817, 896, 897, 620, + 621, 622, 623, 624, 625, 626, 627, 628, 629, 682, 683, 826, 827, + 868, 869, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 692, + 693, 836, 837, 878, 879, 640, 641, 642, 643, 644, 645, 646, 647, + 648, 649, 684, 685, 846, 847, 688, 689, 650, 651, 652, 653, 654, + 655, 656, 657, 658, 659, 694, 695, 856, 857, 698, 699, 660, 661, + 662, 663, 664, 665, 666, 667, 668, 669, 686, 687, 866, 867, 888, + 889, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 696, 697, + 876, 877, 898, 899, 700, 701, 702, 703, 704, 705, 706, 707, 708, + 709, 780, 781, 906, 907, 986, 987, 710, 711, 712, 713, 714, 715, + 716, 717, 718, 719, 790, 791, 916, 917, 996, 997, 720, 721, 722, + 723, 724, 725, 726, 727, 728, 729, 782, 783, 926, 927, 968, 969, + 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 792, 793, 936, + 937, 978, 979, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, + 784, 785, 946, 947, 788, 789, 750, 751, 752, 753, 754, 755, 756, + 757, 758, 759, 794, 795, 956, 957, 798, 799, 760, 761, 762, 763, + 764, 765, 766, 767, 768, 769, 786, 787, 966, 967, 988, 989, 770, + 771, 772, 773, 774, 775, 776, 777, 778, 779, 796, 797, 976, 977, + 998, 999}; +#endif + +#if defined(DEC_DPD2BINK) && DEC_DPD2BINK==1 && !defined(DECDPD2BINK) +#define DECDPD2BINK + +const uint32_t DPD2BINK[1024]={ 0, 1000, 2000, 3000, 4000, 5000, + 6000, 7000, 8000, 9000, 80000, 81000, 800000, 801000, 880000, 881000, + 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, + 90000, 91000, 810000, 811000, 890000, 891000, 20000, 21000, 22000, 23000, + 24000, 25000, 26000, 27000, 28000, 29000, 82000, 83000, 820000, 821000, + 808000, 809000, 30000, 31000, 32000, 33000, 34000, 35000, 36000, 37000, + 38000, 39000, 92000, 93000, 830000, 831000, 818000, 819000, 40000, 41000, + 42000, 43000, 44000, 45000, 46000, 47000, 48000, 49000, 84000, 85000, + 840000, 841000, 88000, 89000, 50000, 51000, 52000, 53000, 54000, 55000, + 56000, 57000, 58000, 59000, 94000, 95000, 850000, 851000, 98000, 99000, + 60000, 61000, 62000, 63000, 64000, 65000, 66000, 67000, 68000, 69000, + 86000, 87000, 860000, 861000, 888000, 889000, 70000, 71000, 72000, 73000, + 74000, 75000, 76000, 77000, 78000, 79000, 96000, 97000, 870000, 871000, + 898000, 899000, 100000, 101000, 102000, 103000, 104000, 105000, 106000, 107000, + 108000, 109000, 180000, 181000, 900000, 901000, 980000, 981000, 110000, 111000, + 112000, 113000, 114000, 115000, 116000, 117000, 118000, 119000, 190000, 191000, + 910000, 911000, 990000, 991000, 120000, 121000, 122000, 123000, 124000, 125000, + 126000, 127000, 128000, 129000, 182000, 183000, 920000, 921000, 908000, 909000, + 130000, 131000, 132000, 133000, 134000, 135000, 136000, 137000, 138000, 139000, + 192000, 193000, 930000, 931000, 918000, 919000, 140000, 141000, 142000, 143000, + 144000, 145000, 146000, 147000, 148000, 149000, 184000, 185000, 940000, 941000, + 188000, 189000, 150000, 151000, 152000, 153000, 154000, 155000, 156000, 157000, + 158000, 159000, 194000, 195000, 950000, 951000, 198000, 199000, 160000, 161000, + 162000, 163000, 164000, 165000, 166000, 167000, 168000, 169000, 186000, 187000, + 960000, 961000, 988000, 989000, 170000, 171000, 172000, 173000, 174000, 175000, + 176000, 177000, 178000, 179000, 196000, 197000, 970000, 971000, 998000, 999000, + 200000, 201000, 202000, 203000, 204000, 205000, 206000, 207000, 208000, 209000, + 280000, 281000, 802000, 803000, 882000, 883000, 210000, 211000, 212000, 213000, + 214000, 215000, 216000, 217000, 218000, 219000, 290000, 291000, 812000, 813000, + 892000, 893000, 220000, 221000, 222000, 223000, 224000, 225000, 226000, 227000, + 228000, 229000, 282000, 283000, 822000, 823000, 828000, 829000, 230000, 231000, + 232000, 233000, 234000, 235000, 236000, 237000, 238000, 239000, 292000, 293000, + 832000, 833000, 838000, 839000, 240000, 241000, 242000, 243000, 244000, 245000, + 246000, 247000, 248000, 249000, 284000, 285000, 842000, 843000, 288000, 289000, + 250000, 251000, 252000, 253000, 254000, 255000, 256000, 257000, 258000, 259000, + 294000, 295000, 852000, 853000, 298000, 299000, 260000, 261000, 262000, 263000, + 264000, 265000, 266000, 267000, 268000, 269000, 286000, 287000, 862000, 863000, + 888000, 889000, 270000, 271000, 272000, 273000, 274000, 275000, 276000, 277000, + 278000, 279000, 296000, 297000, 872000, 873000, 898000, 899000, 300000, 301000, + 302000, 303000, 304000, 305000, 306000, 307000, 308000, 309000, 380000, 381000, + 902000, 903000, 982000, 983000, 310000, 311000, 312000, 313000, 314000, 315000, + 316000, 317000, 318000, 319000, 390000, 391000, 912000, 913000, 992000, 993000, + 320000, 321000, 322000, 323000, 324000, 325000, 326000, 327000, 328000, 329000, + 382000, 383000, 922000, 923000, 928000, 929000, 330000, 331000, 332000, 333000, + 334000, 335000, 336000, 337000, 338000, 339000, 392000, 393000, 932000, 933000, + 938000, 939000, 340000, 341000, 342000, 343000, 344000, 345000, 346000, 347000, + 348000, 349000, 384000, 385000, 942000, 943000, 388000, 389000, 350000, 351000, + 352000, 353000, 354000, 355000, 356000, 357000, 358000, 359000, 394000, 395000, + 952000, 953000, 398000, 399000, 360000, 361000, 362000, 363000, 364000, 365000, + 366000, 367000, 368000, 369000, 386000, 387000, 962000, 963000, 988000, 989000, + 370000, 371000, 372000, 373000, 374000, 375000, 376000, 377000, 378000, 379000, + 396000, 397000, 972000, 973000, 998000, 999000, 400000, 401000, 402000, 403000, + 404000, 405000, 406000, 407000, 408000, 409000, 480000, 481000, 804000, 805000, + 884000, 885000, 410000, 411000, 412000, 413000, 414000, 415000, 416000, 417000, + 418000, 419000, 490000, 491000, 814000, 815000, 894000, 895000, 420000, 421000, + 422000, 423000, 424000, 425000, 426000, 427000, 428000, 429000, 482000, 483000, + 824000, 825000, 848000, 849000, 430000, 431000, 432000, 433000, 434000, 435000, + 436000, 437000, 438000, 439000, 492000, 493000, 834000, 835000, 858000, 859000, + 440000, 441000, 442000, 443000, 444000, 445000, 446000, 447000, 448000, 449000, + 484000, 485000, 844000, 845000, 488000, 489000, 450000, 451000, 452000, 453000, + 454000, 455000, 456000, 457000, 458000, 459000, 494000, 495000, 854000, 855000, + 498000, 499000, 460000, 461000, 462000, 463000, 464000, 465000, 466000, 467000, + 468000, 469000, 486000, 487000, 864000, 865000, 888000, 889000, 470000, 471000, + 472000, 473000, 474000, 475000, 476000, 477000, 478000, 479000, 496000, 497000, + 874000, 875000, 898000, 899000, 500000, 501000, 502000, 503000, 504000, 505000, + 506000, 507000, 508000, 509000, 580000, 581000, 904000, 905000, 984000, 985000, + 510000, 511000, 512000, 513000, 514000, 515000, 516000, 517000, 518000, 519000, + 590000, 591000, 914000, 915000, 994000, 995000, 520000, 521000, 522000, 523000, + 524000, 525000, 526000, 527000, 528000, 529000, 582000, 583000, 924000, 925000, + 948000, 949000, 530000, 531000, 532000, 533000, 534000, 535000, 536000, 537000, + 538000, 539000, 592000, 593000, 934000, 935000, 958000, 959000, 540000, 541000, + 542000, 543000, 544000, 545000, 546000, 547000, 548000, 549000, 584000, 585000, + 944000, 945000, 588000, 589000, 550000, 551000, 552000, 553000, 554000, 555000, + 556000, 557000, 558000, 559000, 594000, 595000, 954000, 955000, 598000, 599000, + 560000, 561000, 562000, 563000, 564000, 565000, 566000, 567000, 568000, 569000, + 586000, 587000, 964000, 965000, 988000, 989000, 570000, 571000, 572000, 573000, + 574000, 575000, 576000, 577000, 578000, 579000, 596000, 597000, 974000, 975000, + 998000, 999000, 600000, 601000, 602000, 603000, 604000, 605000, 606000, 607000, + 608000, 609000, 680000, 681000, 806000, 807000, 886000, 887000, 610000, 611000, + 612000, 613000, 614000, 615000, 616000, 617000, 618000, 619000, 690000, 691000, + 816000, 817000, 896000, 897000, 620000, 621000, 622000, 623000, 624000, 625000, + 626000, 627000, 628000, 629000, 682000, 683000, 826000, 827000, 868000, 869000, + 630000, 631000, 632000, 633000, 634000, 635000, 636000, 637000, 638000, 639000, + 692000, 693000, 836000, 837000, 878000, 879000, 640000, 641000, 642000, 643000, + 644000, 645000, 646000, 647000, 648000, 649000, 684000, 685000, 846000, 847000, + 688000, 689000, 650000, 651000, 652000, 653000, 654000, 655000, 656000, 657000, + 658000, 659000, 694000, 695000, 856000, 857000, 698000, 699000, 660000, 661000, + 662000, 663000, 664000, 665000, 666000, 667000, 668000, 669000, 686000, 687000, + 866000, 867000, 888000, 889000, 670000, 671000, 672000, 673000, 674000, 675000, + 676000, 677000, 678000, 679000, 696000, 697000, 876000, 877000, 898000, 899000, + 700000, 701000, 702000, 703000, 704000, 705000, 706000, 707000, 708000, 709000, + 780000, 781000, 906000, 907000, 986000, 987000, 710000, 711000, 712000, 713000, + 714000, 715000, 716000, 717000, 718000, 719000, 790000, 791000, 916000, 917000, + 996000, 997000, 720000, 721000, 722000, 723000, 724000, 725000, 726000, 727000, + 728000, 729000, 782000, 783000, 926000, 927000, 968000, 969000, 730000, 731000, + 732000, 733000, 734000, 735000, 736000, 737000, 738000, 739000, 792000, 793000, + 936000, 937000, 978000, 979000, 740000, 741000, 742000, 743000, 744000, 745000, + 746000, 747000, 748000, 749000, 784000, 785000, 946000, 947000, 788000, 789000, + 750000, 751000, 752000, 753000, 754000, 755000, 756000, 757000, 758000, 759000, + 794000, 795000, 956000, 957000, 798000, 799000, 760000, 761000, 762000, 763000, + 764000, 765000, 766000, 767000, 768000, 769000, 786000, 787000, 966000, 967000, + 988000, 989000, 770000, 771000, 772000, 773000, 774000, 775000, 776000, 777000, + 778000, 779000, 796000, 797000, 976000, 977000, 998000, 999000}; +#endif + +#if defined(DEC_DPD2BINM) && DEC_DPD2BINM==1 && !defined(DECDPD2BINM) +#define DECDPD2BINM + +const uint32_t DPD2BINM[1024]={0, 1000000, 2000000, 3000000, 4000000, + 5000000, 6000000, 7000000, 8000000, 9000000, 80000000, 81000000, + 800000000, 801000000, 880000000, 881000000, 10000000, 11000000, 12000000, + 13000000, 14000000, 15000000, 16000000, 17000000, 18000000, 19000000, + 90000000, 91000000, 810000000, 811000000, 890000000, 891000000, 20000000, + 21000000, 22000000, 23000000, 24000000, 25000000, 26000000, 27000000, + 28000000, 29000000, 82000000, 83000000, 820000000, 821000000, 808000000, + 809000000, 30000000, 31000000, 32000000, 33000000, 34000000, 35000000, + 36000000, 37000000, 38000000, 39000000, 92000000, 93000000, 830000000, + 831000000, 818000000, 819000000, 40000000, 41000000, 42000000, 43000000, + 44000000, 45000000, 46000000, 47000000, 48000000, 49000000, 84000000, + 85000000, 840000000, 841000000, 88000000, 89000000, 50000000, 51000000, + 52000000, 53000000, 54000000, 55000000, 56000000, 57000000, 58000000, + 59000000, 94000000, 95000000, 850000000, 851000000, 98000000, 99000000, + 60000000, 61000000, 62000000, 63000000, 64000000, 65000000, 66000000, + 67000000, 68000000, 69000000, 86000000, 87000000, 860000000, 861000000, + 888000000, 889000000, 70000000, 71000000, 72000000, 73000000, 74000000, + 75000000, 76000000, 77000000, 78000000, 79000000, 96000000, 97000000, + 870000000, 871000000, 898000000, 899000000, 100000000, 101000000, 102000000, + 103000000, 104000000, 105000000, 106000000, 107000000, 108000000, 109000000, + 180000000, 181000000, 900000000, 901000000, 980000000, 981000000, 110000000, + 111000000, 112000000, 113000000, 114000000, 115000000, 116000000, 117000000, + 118000000, 119000000, 190000000, 191000000, 910000000, 911000000, 990000000, + 991000000, 120000000, 121000000, 122000000, 123000000, 124000000, 125000000, + 126000000, 127000000, 128000000, 129000000, 182000000, 183000000, 920000000, + 921000000, 908000000, 909000000, 130000000, 131000000, 132000000, 133000000, + 134000000, 135000000, 136000000, 137000000, 138000000, 139000000, 192000000, + 193000000, 930000000, 931000000, 918000000, 919000000, 140000000, 141000000, + 142000000, 143000000, 144000000, 145000000, 146000000, 147000000, 148000000, + 149000000, 184000000, 185000000, 940000000, 941000000, 188000000, 189000000, + 150000000, 151000000, 152000000, 153000000, 154000000, 155000000, 156000000, + 157000000, 158000000, 159000000, 194000000, 195000000, 950000000, 951000000, + 198000000, 199000000, 160000000, 161000000, 162000000, 163000000, 164000000, + 165000000, 166000000, 167000000, 168000000, 169000000, 186000000, 187000000, + 960000000, 961000000, 988000000, 989000000, 170000000, 171000000, 172000000, + 173000000, 174000000, 175000000, 176000000, 177000000, 178000000, 179000000, + 196000000, 197000000, 970000000, 971000000, 998000000, 999000000, 200000000, + 201000000, 202000000, 203000000, 204000000, 205000000, 206000000, 207000000, + 208000000, 209000000, 280000000, 281000000, 802000000, 803000000, 882000000, + 883000000, 210000000, 211000000, 212000000, 213000000, 214000000, 215000000, + 216000000, 217000000, 218000000, 219000000, 290000000, 291000000, 812000000, + 813000000, 892000000, 893000000, 220000000, 221000000, 222000000, 223000000, + 224000000, 225000000, 226000000, 227000000, 228000000, 229000000, 282000000, + 283000000, 822000000, 823000000, 828000000, 829000000, 230000000, 231000000, + 232000000, 233000000, 234000000, 235000000, 236000000, 237000000, 238000000, + 239000000, 292000000, 293000000, 832000000, 833000000, 838000000, 839000000, + 240000000, 241000000, 242000000, 243000000, 244000000, 245000000, 246000000, + 247000000, 248000000, 249000000, 284000000, 285000000, 842000000, 843000000, + 288000000, 289000000, 250000000, 251000000, 252000000, 253000000, 254000000, + 255000000, 256000000, 257000000, 258000000, 259000000, 294000000, 295000000, + 852000000, 853000000, 298000000, 299000000, 260000000, 261000000, 262000000, + 263000000, 264000000, 265000000, 266000000, 267000000, 268000000, 269000000, + 286000000, 287000000, 862000000, 863000000, 888000000, 889000000, 270000000, + 271000000, 272000000, 273000000, 274000000, 275000000, 276000000, 277000000, + 278000000, 279000000, 296000000, 297000000, 872000000, 873000000, 898000000, + 899000000, 300000000, 301000000, 302000000, 303000000, 304000000, 305000000, + 306000000, 307000000, 308000000, 309000000, 380000000, 381000000, 902000000, + 903000000, 982000000, 983000000, 310000000, 311000000, 312000000, 313000000, + 314000000, 315000000, 316000000, 317000000, 318000000, 319000000, 390000000, + 391000000, 912000000, 913000000, 992000000, 993000000, 320000000, 321000000, + 322000000, 323000000, 324000000, 325000000, 326000000, 327000000, 328000000, + 329000000, 382000000, 383000000, 922000000, 923000000, 928000000, 929000000, + 330000000, 331000000, 332000000, 333000000, 334000000, 335000000, 336000000, + 337000000, 338000000, 339000000, 392000000, 393000000, 932000000, 933000000, + 938000000, 939000000, 340000000, 341000000, 342000000, 343000000, 344000000, + 345000000, 346000000, 347000000, 348000000, 349000000, 384000000, 385000000, + 942000000, 943000000, 388000000, 389000000, 350000000, 351000000, 352000000, + 353000000, 354000000, 355000000, 356000000, 357000000, 358000000, 359000000, + 394000000, 395000000, 952000000, 953000000, 398000000, 399000000, 360000000, + 361000000, 362000000, 363000000, 364000000, 365000000, 366000000, 367000000, + 368000000, 369000000, 386000000, 387000000, 962000000, 963000000, 988000000, + 989000000, 370000000, 371000000, 372000000, 373000000, 374000000, 375000000, + 376000000, 377000000, 378000000, 379000000, 396000000, 397000000, 972000000, + 973000000, 998000000, 999000000, 400000000, 401000000, 402000000, 403000000, + 404000000, 405000000, 406000000, 407000000, 408000000, 409000000, 480000000, + 481000000, 804000000, 805000000, 884000000, 885000000, 410000000, 411000000, + 412000000, 413000000, 414000000, 415000000, 416000000, 417000000, 418000000, + 419000000, 490000000, 491000000, 814000000, 815000000, 894000000, 895000000, + 420000000, 421000000, 422000000, 423000000, 424000000, 425000000, 426000000, + 427000000, 428000000, 429000000, 482000000, 483000000, 824000000, 825000000, + 848000000, 849000000, 430000000, 431000000, 432000000, 433000000, 434000000, + 435000000, 436000000, 437000000, 438000000, 439000000, 492000000, 493000000, + 834000000, 835000000, 858000000, 859000000, 440000000, 441000000, 442000000, + 443000000, 444000000, 445000000, 446000000, 447000000, 448000000, 449000000, + 484000000, 485000000, 844000000, 845000000, 488000000, 489000000, 450000000, + 451000000, 452000000, 453000000, 454000000, 455000000, 456000000, 457000000, + 458000000, 459000000, 494000000, 495000000, 854000000, 855000000, 498000000, + 499000000, 460000000, 461000000, 462000000, 463000000, 464000000, 465000000, + 466000000, 467000000, 468000000, 469000000, 486000000, 487000000, 864000000, + 865000000, 888000000, 889000000, 470000000, 471000000, 472000000, 473000000, + 474000000, 475000000, 476000000, 477000000, 478000000, 479000000, 496000000, + 497000000, 874000000, 875000000, 898000000, 899000000, 500000000, 501000000, + 502000000, 503000000, 504000000, 505000000, 506000000, 507000000, 508000000, + 509000000, 580000000, 581000000, 904000000, 905000000, 984000000, 985000000, + 510000000, 511000000, 512000000, 513000000, 514000000, 515000000, 516000000, + 517000000, 518000000, 519000000, 590000000, 591000000, 914000000, 915000000, + 994000000, 995000000, 520000000, 521000000, 522000000, 523000000, 524000000, + 525000000, 526000000, 527000000, 528000000, 529000000, 582000000, 583000000, + 924000000, 925000000, 948000000, 949000000, 530000000, 531000000, 532000000, + 533000000, 534000000, 535000000, 536000000, 537000000, 538000000, 539000000, + 592000000, 593000000, 934000000, 935000000, 958000000, 959000000, 540000000, + 541000000, 542000000, 543000000, 544000000, 545000000, 546000000, 547000000, + 548000000, 549000000, 584000000, 585000000, 944000000, 945000000, 588000000, + 589000000, 550000000, 551000000, 552000000, 553000000, 554000000, 555000000, + 556000000, 557000000, 558000000, 559000000, 594000000, 595000000, 954000000, + 955000000, 598000000, 599000000, 560000000, 561000000, 562000000, 563000000, + 564000000, 565000000, 566000000, 567000000, 568000000, 569000000, 586000000, + 587000000, 964000000, 965000000, 988000000, 989000000, 570000000, 571000000, + 572000000, 573000000, 574000000, 575000000, 576000000, 577000000, 578000000, + 579000000, 596000000, 597000000, 974000000, 975000000, 998000000, 999000000, + 600000000, 601000000, 602000000, 603000000, 604000000, 605000000, 606000000, + 607000000, 608000000, 609000000, 680000000, 681000000, 806000000, 807000000, + 886000000, 887000000, 610000000, 611000000, 612000000, 613000000, 614000000, + 615000000, 616000000, 617000000, 618000000, 619000000, 690000000, 691000000, + 816000000, 817000000, 896000000, 897000000, 620000000, 621000000, 622000000, + 623000000, 624000000, 625000000, 626000000, 627000000, 628000000, 629000000, + 682000000, 683000000, 826000000, 827000000, 868000000, 869000000, 630000000, + 631000000, 632000000, 633000000, 634000000, 635000000, 636000000, 637000000, + 638000000, 639000000, 692000000, 693000000, 836000000, 837000000, 878000000, + 879000000, 640000000, 641000000, 642000000, 643000000, 644000000, 645000000, + 646000000, 647000000, 648000000, 649000000, 684000000, 685000000, 846000000, + 847000000, 688000000, 689000000, 650000000, 651000000, 652000000, 653000000, + 654000000, 655000000, 656000000, 657000000, 658000000, 659000000, 694000000, + 695000000, 856000000, 857000000, 698000000, 699000000, 660000000, 661000000, + 662000000, 663000000, 664000000, 665000000, 666000000, 667000000, 668000000, + 669000000, 686000000, 687000000, 866000000, 867000000, 888000000, 889000000, + 670000000, 671000000, 672000000, 673000000, 674000000, 675000000, 676000000, + 677000000, 678000000, 679000000, 696000000, 697000000, 876000000, 877000000, + 898000000, 899000000, 700000000, 701000000, 702000000, 703000000, 704000000, + 705000000, 706000000, 707000000, 708000000, 709000000, 780000000, 781000000, + 906000000, 907000000, 986000000, 987000000, 710000000, 711000000, 712000000, + 713000000, 714000000, 715000000, 716000000, 717000000, 718000000, 719000000, + 790000000, 791000000, 916000000, 917000000, 996000000, 997000000, 720000000, + 721000000, 722000000, 723000000, 724000000, 725000000, 726000000, 727000000, + 728000000, 729000000, 782000000, 783000000, 926000000, 927000000, 968000000, + 969000000, 730000000, 731000000, 732000000, 733000000, 734000000, 735000000, + 736000000, 737000000, 738000000, 739000000, 792000000, 793000000, 936000000, + 937000000, 978000000, 979000000, 740000000, 741000000, 742000000, 743000000, + 744000000, 745000000, 746000000, 747000000, 748000000, 749000000, 784000000, + 785000000, 946000000, 947000000, 788000000, 789000000, 750000000, 751000000, + 752000000, 753000000, 754000000, 755000000, 756000000, 757000000, 758000000, + 759000000, 794000000, 795000000, 956000000, 957000000, 798000000, 799000000, + 760000000, 761000000, 762000000, 763000000, 764000000, 765000000, 766000000, + 767000000, 768000000, 769000000, 786000000, 787000000, 966000000, 967000000, + 988000000, 989000000, 770000000, 771000000, 772000000, 773000000, 774000000, + 775000000, 776000000, 777000000, 778000000, 779000000, 796000000, 797000000, + 976000000, 977000000, 998000000, 999000000}; +#endif + +#if defined(DEC_BIN2CHAR) && DEC_BIN2CHAR==1 && !defined(DECBIN2CHAR) +#define DECBIN2CHAR + +const uint8_t BIN2CHAR[4001]={ + '\0','0','0','0', '\1','0','0','1', '\1','0','0','2', '\1','0','0','3', '\1','0','0','4', + '\1','0','0','5', '\1','0','0','6', '\1','0','0','7', '\1','0','0','8', '\1','0','0','9', + '\2','0','1','0', '\2','0','1','1', '\2','0','1','2', '\2','0','1','3', '\2','0','1','4', + '\2','0','1','5', '\2','0','1','6', '\2','0','1','7', '\2','0','1','8', '\2','0','1','9', + '\2','0','2','0', '\2','0','2','1', '\2','0','2','2', '\2','0','2','3', '\2','0','2','4', + '\2','0','2','5', '\2','0','2','6', '\2','0','2','7', '\2','0','2','8', '\2','0','2','9', + '\2','0','3','0', '\2','0','3','1', '\2','0','3','2', '\2','0','3','3', '\2','0','3','4', + '\2','0','3','5', '\2','0','3','6', '\2','0','3','7', '\2','0','3','8', '\2','0','3','9', + '\2','0','4','0', '\2','0','4','1', '\2','0','4','2', '\2','0','4','3', '\2','0','4','4', + '\2','0','4','5', '\2','0','4','6', '\2','0','4','7', '\2','0','4','8', '\2','0','4','9', + '\2','0','5','0', '\2','0','5','1', '\2','0','5','2', '\2','0','5','3', '\2','0','5','4', + '\2','0','5','5', '\2','0','5','6', '\2','0','5','7', '\2','0','5','8', '\2','0','5','9', + '\2','0','6','0', '\2','0','6','1', '\2','0','6','2', '\2','0','6','3', '\2','0','6','4', + '\2','0','6','5', '\2','0','6','6', '\2','0','6','7', '\2','0','6','8', '\2','0','6','9', + '\2','0','7','0', '\2','0','7','1', '\2','0','7','2', '\2','0','7','3', '\2','0','7','4', + '\2','0','7','5', '\2','0','7','6', '\2','0','7','7', '\2','0','7','8', '\2','0','7','9', + '\2','0','8','0', '\2','0','8','1', '\2','0','8','2', '\2','0','8','3', '\2','0','8','4', + '\2','0','8','5', '\2','0','8','6', '\2','0','8','7', '\2','0','8','8', '\2','0','8','9', + '\2','0','9','0', '\2','0','9','1', '\2','0','9','2', '\2','0','9','3', '\2','0','9','4', + '\2','0','9','5', '\2','0','9','6', '\2','0','9','7', '\2','0','9','8', '\2','0','9','9', + '\3','1','0','0', '\3','1','0','1', '\3','1','0','2', '\3','1','0','3', '\3','1','0','4', + '\3','1','0','5', '\3','1','0','6', '\3','1','0','7', '\3','1','0','8', '\3','1','0','9', + '\3','1','1','0', '\3','1','1','1', '\3','1','1','2', '\3','1','1','3', '\3','1','1','4', + '\3','1','1','5', '\3','1','1','6', '\3','1','1','7', '\3','1','1','8', '\3','1','1','9', + '\3','1','2','0', '\3','1','2','1', '\3','1','2','2', '\3','1','2','3', '\3','1','2','4', + '\3','1','2','5', '\3','1','2','6', '\3','1','2','7', '\3','1','2','8', '\3','1','2','9', + '\3','1','3','0', '\3','1','3','1', '\3','1','3','2', '\3','1','3','3', '\3','1','3','4', + '\3','1','3','5', '\3','1','3','6', '\3','1','3','7', '\3','1','3','8', '\3','1','3','9', + '\3','1','4','0', '\3','1','4','1', '\3','1','4','2', '\3','1','4','3', '\3','1','4','4', + '\3','1','4','5', '\3','1','4','6', '\3','1','4','7', '\3','1','4','8', '\3','1','4','9', + '\3','1','5','0', '\3','1','5','1', '\3','1','5','2', '\3','1','5','3', '\3','1','5','4', + '\3','1','5','5', '\3','1','5','6', '\3','1','5','7', '\3','1','5','8', '\3','1','5','9', + '\3','1','6','0', '\3','1','6','1', '\3','1','6','2', '\3','1','6','3', '\3','1','6','4', + '\3','1','6','5', '\3','1','6','6', '\3','1','6','7', '\3','1','6','8', '\3','1','6','9', + '\3','1','7','0', '\3','1','7','1', '\3','1','7','2', '\3','1','7','3', '\3','1','7','4', + '\3','1','7','5', '\3','1','7','6', '\3','1','7','7', '\3','1','7','8', '\3','1','7','9', + '\3','1','8','0', '\3','1','8','1', '\3','1','8','2', '\3','1','8','3', '\3','1','8','4', + '\3','1','8','5', '\3','1','8','6', '\3','1','8','7', '\3','1','8','8', '\3','1','8','9', + '\3','1','9','0', '\3','1','9','1', '\3','1','9','2', '\3','1','9','3', '\3','1','9','4', + '\3','1','9','5', '\3','1','9','6', '\3','1','9','7', '\3','1','9','8', '\3','1','9','9', + '\3','2','0','0', '\3','2','0','1', '\3','2','0','2', '\3','2','0','3', '\3','2','0','4', + '\3','2','0','5', '\3','2','0','6', '\3','2','0','7', '\3','2','0','8', '\3','2','0','9', + '\3','2','1','0', '\3','2','1','1', '\3','2','1','2', '\3','2','1','3', '\3','2','1','4', + '\3','2','1','5', '\3','2','1','6', '\3','2','1','7', '\3','2','1','8', '\3','2','1','9', + '\3','2','2','0', '\3','2','2','1', '\3','2','2','2', '\3','2','2','3', '\3','2','2','4', + '\3','2','2','5', '\3','2','2','6', '\3','2','2','7', '\3','2','2','8', '\3','2','2','9', + '\3','2','3','0', '\3','2','3','1', '\3','2','3','2', '\3','2','3','3', '\3','2','3','4', + '\3','2','3','5', '\3','2','3','6', '\3','2','3','7', '\3','2','3','8', '\3','2','3','9', + '\3','2','4','0', '\3','2','4','1', '\3','2','4','2', '\3','2','4','3', '\3','2','4','4', + '\3','2','4','5', '\3','2','4','6', '\3','2','4','7', '\3','2','4','8', '\3','2','4','9', + '\3','2','5','0', '\3','2','5','1', '\3','2','5','2', '\3','2','5','3', '\3','2','5','4', + '\3','2','5','5', '\3','2','5','6', '\3','2','5','7', '\3','2','5','8', '\3','2','5','9', + '\3','2','6','0', '\3','2','6','1', '\3','2','6','2', '\3','2','6','3', '\3','2','6','4', + '\3','2','6','5', '\3','2','6','6', '\3','2','6','7', '\3','2','6','8', '\3','2','6','9', + '\3','2','7','0', '\3','2','7','1', '\3','2','7','2', '\3','2','7','3', '\3','2','7','4', + '\3','2','7','5', '\3','2','7','6', '\3','2','7','7', '\3','2','7','8', '\3','2','7','9', + '\3','2','8','0', '\3','2','8','1', '\3','2','8','2', '\3','2','8','3', '\3','2','8','4', + '\3','2','8','5', '\3','2','8','6', '\3','2','8','7', '\3','2','8','8', '\3','2','8','9', + '\3','2','9','0', '\3','2','9','1', '\3','2','9','2', '\3','2','9','3', '\3','2','9','4', + '\3','2','9','5', '\3','2','9','6', '\3','2','9','7', '\3','2','9','8', '\3','2','9','9', + '\3','3','0','0', '\3','3','0','1', '\3','3','0','2', '\3','3','0','3', '\3','3','0','4', + '\3','3','0','5', '\3','3','0','6', '\3','3','0','7', '\3','3','0','8', '\3','3','0','9', + '\3','3','1','0', '\3','3','1','1', '\3','3','1','2', '\3','3','1','3', '\3','3','1','4', + '\3','3','1','5', '\3','3','1','6', '\3','3','1','7', '\3','3','1','8', '\3','3','1','9', + '\3','3','2','0', '\3','3','2','1', '\3','3','2','2', '\3','3','2','3', '\3','3','2','4', + '\3','3','2','5', '\3','3','2','6', '\3','3','2','7', '\3','3','2','8', '\3','3','2','9', + '\3','3','3','0', '\3','3','3','1', '\3','3','3','2', '\3','3','3','3', '\3','3','3','4', + '\3','3','3','5', '\3','3','3','6', '\3','3','3','7', '\3','3','3','8', '\3','3','3','9', + '\3','3','4','0', '\3','3','4','1', '\3','3','4','2', '\3','3','4','3', '\3','3','4','4', + '\3','3','4','5', '\3','3','4','6', '\3','3','4','7', '\3','3','4','8', '\3','3','4','9', + '\3','3','5','0', '\3','3','5','1', '\3','3','5','2', '\3','3','5','3', '\3','3','5','4', + '\3','3','5','5', '\3','3','5','6', '\3','3','5','7', '\3','3','5','8', '\3','3','5','9', + '\3','3','6','0', '\3','3','6','1', '\3','3','6','2', '\3','3','6','3', '\3','3','6','4', + '\3','3','6','5', '\3','3','6','6', '\3','3','6','7', '\3','3','6','8', '\3','3','6','9', + '\3','3','7','0', '\3','3','7','1', '\3','3','7','2', '\3','3','7','3', '\3','3','7','4', + '\3','3','7','5', '\3','3','7','6', '\3','3','7','7', '\3','3','7','8', '\3','3','7','9', + '\3','3','8','0', '\3','3','8','1', '\3','3','8','2', '\3','3','8','3', '\3','3','8','4', + '\3','3','8','5', '\3','3','8','6', '\3','3','8','7', '\3','3','8','8', '\3','3','8','9', + '\3','3','9','0', '\3','3','9','1', '\3','3','9','2', '\3','3','9','3', '\3','3','9','4', + '\3','3','9','5', '\3','3','9','6', '\3','3','9','7', '\3','3','9','8', '\3','3','9','9', + '\3','4','0','0', '\3','4','0','1', '\3','4','0','2', '\3','4','0','3', '\3','4','0','4', + '\3','4','0','5', '\3','4','0','6', '\3','4','0','7', '\3','4','0','8', '\3','4','0','9', + '\3','4','1','0', '\3','4','1','1', '\3','4','1','2', '\3','4','1','3', '\3','4','1','4', + '\3','4','1','5', '\3','4','1','6', '\3','4','1','7', '\3','4','1','8', '\3','4','1','9', + '\3','4','2','0', '\3','4','2','1', '\3','4','2','2', '\3','4','2','3', '\3','4','2','4', + '\3','4','2','5', '\3','4','2','6', '\3','4','2','7', '\3','4','2','8', '\3','4','2','9', + '\3','4','3','0', '\3','4','3','1', '\3','4','3','2', '\3','4','3','3', '\3','4','3','4', + '\3','4','3','5', '\3','4','3','6', '\3','4','3','7', '\3','4','3','8', '\3','4','3','9', + '\3','4','4','0', '\3','4','4','1', '\3','4','4','2', '\3','4','4','3', '\3','4','4','4', + '\3','4','4','5', '\3','4','4','6', '\3','4','4','7', '\3','4','4','8', '\3','4','4','9', + '\3','4','5','0', '\3','4','5','1', '\3','4','5','2', '\3','4','5','3', '\3','4','5','4', + '\3','4','5','5', '\3','4','5','6', '\3','4','5','7', '\3','4','5','8', '\3','4','5','9', + '\3','4','6','0', '\3','4','6','1', '\3','4','6','2', '\3','4','6','3', '\3','4','6','4', + '\3','4','6','5', '\3','4','6','6', '\3','4','6','7', '\3','4','6','8', '\3','4','6','9', + '\3','4','7','0', '\3','4','7','1', '\3','4','7','2', '\3','4','7','3', '\3','4','7','4', + '\3','4','7','5', '\3','4','7','6', '\3','4','7','7', '\3','4','7','8', '\3','4','7','9', + '\3','4','8','0', '\3','4','8','1', '\3','4','8','2', '\3','4','8','3', '\3','4','8','4', + '\3','4','8','5', '\3','4','8','6', '\3','4','8','7', '\3','4','8','8', '\3','4','8','9', + '\3','4','9','0', '\3','4','9','1', '\3','4','9','2', '\3','4','9','3', '\3','4','9','4', + '\3','4','9','5', '\3','4','9','6', '\3','4','9','7', '\3','4','9','8', '\3','4','9','9', + '\3','5','0','0', '\3','5','0','1', '\3','5','0','2', '\3','5','0','3', '\3','5','0','4', + '\3','5','0','5', '\3','5','0','6', '\3','5','0','7', '\3','5','0','8', '\3','5','0','9', + '\3','5','1','0', '\3','5','1','1', '\3','5','1','2', '\3','5','1','3', '\3','5','1','4', + '\3','5','1','5', '\3','5','1','6', '\3','5','1','7', '\3','5','1','8', '\3','5','1','9', + '\3','5','2','0', '\3','5','2','1', '\3','5','2','2', '\3','5','2','3', '\3','5','2','4', + '\3','5','2','5', '\3','5','2','6', '\3','5','2','7', '\3','5','2','8', '\3','5','2','9', + '\3','5','3','0', '\3','5','3','1', '\3','5','3','2', '\3','5','3','3', '\3','5','3','4', + '\3','5','3','5', '\3','5','3','6', '\3','5','3','7', '\3','5','3','8', '\3','5','3','9', + '\3','5','4','0', '\3','5','4','1', '\3','5','4','2', '\3','5','4','3', '\3','5','4','4', + '\3','5','4','5', '\3','5','4','6', '\3','5','4','7', '\3','5','4','8', '\3','5','4','9', + '\3','5','5','0', '\3','5','5','1', '\3','5','5','2', '\3','5','5','3', '\3','5','5','4', + '\3','5','5','5', '\3','5','5','6', '\3','5','5','7', '\3','5','5','8', '\3','5','5','9', + '\3','5','6','0', '\3','5','6','1', '\3','5','6','2', '\3','5','6','3', '\3','5','6','4', + '\3','5','6','5', '\3','5','6','6', '\3','5','6','7', '\3','5','6','8', '\3','5','6','9', + '\3','5','7','0', '\3','5','7','1', '\3','5','7','2', '\3','5','7','3', '\3','5','7','4', + '\3','5','7','5', '\3','5','7','6', '\3','5','7','7', '\3','5','7','8', '\3','5','7','9', + '\3','5','8','0', '\3','5','8','1', '\3','5','8','2', '\3','5','8','3', '\3','5','8','4', + '\3','5','8','5', '\3','5','8','6', '\3','5','8','7', '\3','5','8','8', '\3','5','8','9', + '\3','5','9','0', '\3','5','9','1', '\3','5','9','2', '\3','5','9','3', '\3','5','9','4', + '\3','5','9','5', '\3','5','9','6', '\3','5','9','7', '\3','5','9','8', '\3','5','9','9', + '\3','6','0','0', '\3','6','0','1', '\3','6','0','2', '\3','6','0','3', '\3','6','0','4', + '\3','6','0','5', '\3','6','0','6', '\3','6','0','7', '\3','6','0','8', '\3','6','0','9', + '\3','6','1','0', '\3','6','1','1', '\3','6','1','2', '\3','6','1','3', '\3','6','1','4', + '\3','6','1','5', '\3','6','1','6', '\3','6','1','7', '\3','6','1','8', '\3','6','1','9', + '\3','6','2','0', '\3','6','2','1', '\3','6','2','2', '\3','6','2','3', '\3','6','2','4', + '\3','6','2','5', '\3','6','2','6', '\3','6','2','7', '\3','6','2','8', '\3','6','2','9', + '\3','6','3','0', '\3','6','3','1', '\3','6','3','2', '\3','6','3','3', '\3','6','3','4', + '\3','6','3','5', '\3','6','3','6', '\3','6','3','7', '\3','6','3','8', '\3','6','3','9', + '\3','6','4','0', '\3','6','4','1', '\3','6','4','2', '\3','6','4','3', '\3','6','4','4', + '\3','6','4','5', '\3','6','4','6', '\3','6','4','7', '\3','6','4','8', '\3','6','4','9', + '\3','6','5','0', '\3','6','5','1', '\3','6','5','2', '\3','6','5','3', '\3','6','5','4', + '\3','6','5','5', '\3','6','5','6', '\3','6','5','7', '\3','6','5','8', '\3','6','5','9', + '\3','6','6','0', '\3','6','6','1', '\3','6','6','2', '\3','6','6','3', '\3','6','6','4', + '\3','6','6','5', '\3','6','6','6', '\3','6','6','7', '\3','6','6','8', '\3','6','6','9', + '\3','6','7','0', '\3','6','7','1', '\3','6','7','2', '\3','6','7','3', '\3','6','7','4', + '\3','6','7','5', '\3','6','7','6', '\3','6','7','7', '\3','6','7','8', '\3','6','7','9', + '\3','6','8','0', '\3','6','8','1', '\3','6','8','2', '\3','6','8','3', '\3','6','8','4', + '\3','6','8','5', '\3','6','8','6', '\3','6','8','7', '\3','6','8','8', '\3','6','8','9', + '\3','6','9','0', '\3','6','9','1', '\3','6','9','2', '\3','6','9','3', '\3','6','9','4', + '\3','6','9','5', '\3','6','9','6', '\3','6','9','7', '\3','6','9','8', '\3','6','9','9', + '\3','7','0','0', '\3','7','0','1', '\3','7','0','2', '\3','7','0','3', '\3','7','0','4', + '\3','7','0','5', '\3','7','0','6', '\3','7','0','7', '\3','7','0','8', '\3','7','0','9', + '\3','7','1','0', '\3','7','1','1', '\3','7','1','2', '\3','7','1','3', '\3','7','1','4', + '\3','7','1','5', '\3','7','1','6', '\3','7','1','7', '\3','7','1','8', '\3','7','1','9', + '\3','7','2','0', '\3','7','2','1', '\3','7','2','2', '\3','7','2','3', '\3','7','2','4', + '\3','7','2','5', '\3','7','2','6', '\3','7','2','7', '\3','7','2','8', '\3','7','2','9', + '\3','7','3','0', '\3','7','3','1', '\3','7','3','2', '\3','7','3','3', '\3','7','3','4', + '\3','7','3','5', '\3','7','3','6', '\3','7','3','7', '\3','7','3','8', '\3','7','3','9', + '\3','7','4','0', '\3','7','4','1', '\3','7','4','2', '\3','7','4','3', '\3','7','4','4', + '\3','7','4','5', '\3','7','4','6', '\3','7','4','7', '\3','7','4','8', '\3','7','4','9', + '\3','7','5','0', '\3','7','5','1', '\3','7','5','2', '\3','7','5','3', '\3','7','5','4', + '\3','7','5','5', '\3','7','5','6', '\3','7','5','7', '\3','7','5','8', '\3','7','5','9', + '\3','7','6','0', '\3','7','6','1', '\3','7','6','2', '\3','7','6','3', '\3','7','6','4', + '\3','7','6','5', '\3','7','6','6', '\3','7','6','7', '\3','7','6','8', '\3','7','6','9', + '\3','7','7','0', '\3','7','7','1', '\3','7','7','2', '\3','7','7','3', '\3','7','7','4', + '\3','7','7','5', '\3','7','7','6', '\3','7','7','7', '\3','7','7','8', '\3','7','7','9', + '\3','7','8','0', '\3','7','8','1', '\3','7','8','2', '\3','7','8','3', '\3','7','8','4', + '\3','7','8','5', '\3','7','8','6', '\3','7','8','7', '\3','7','8','8', '\3','7','8','9', + '\3','7','9','0', '\3','7','9','1', '\3','7','9','2', '\3','7','9','3', '\3','7','9','4', + '\3','7','9','5', '\3','7','9','6', '\3','7','9','7', '\3','7','9','8', '\3','7','9','9', + '\3','8','0','0', '\3','8','0','1', '\3','8','0','2', '\3','8','0','3', '\3','8','0','4', + '\3','8','0','5', '\3','8','0','6', '\3','8','0','7', '\3','8','0','8', '\3','8','0','9', + '\3','8','1','0', '\3','8','1','1', '\3','8','1','2', '\3','8','1','3', '\3','8','1','4', + '\3','8','1','5', '\3','8','1','6', '\3','8','1','7', '\3','8','1','8', '\3','8','1','9', + '\3','8','2','0', '\3','8','2','1', '\3','8','2','2', '\3','8','2','3', '\3','8','2','4', + '\3','8','2','5', '\3','8','2','6', '\3','8','2','7', '\3','8','2','8', '\3','8','2','9', + '\3','8','3','0', '\3','8','3','1', '\3','8','3','2', '\3','8','3','3', '\3','8','3','4', + '\3','8','3','5', '\3','8','3','6', '\3','8','3','7', '\3','8','3','8', '\3','8','3','9', + '\3','8','4','0', '\3','8','4','1', '\3','8','4','2', '\3','8','4','3', '\3','8','4','4', + '\3','8','4','5', '\3','8','4','6', '\3','8','4','7', '\3','8','4','8', '\3','8','4','9', + '\3','8','5','0', '\3','8','5','1', '\3','8','5','2', '\3','8','5','3', '\3','8','5','4', + '\3','8','5','5', '\3','8','5','6', '\3','8','5','7', '\3','8','5','8', '\3','8','5','9', + '\3','8','6','0', '\3','8','6','1', '\3','8','6','2', '\3','8','6','3', '\3','8','6','4', + '\3','8','6','5', '\3','8','6','6', '\3','8','6','7', '\3','8','6','8', '\3','8','6','9', + '\3','8','7','0', '\3','8','7','1', '\3','8','7','2', '\3','8','7','3', '\3','8','7','4', + '\3','8','7','5', '\3','8','7','6', '\3','8','7','7', '\3','8','7','8', '\3','8','7','9', + '\3','8','8','0', '\3','8','8','1', '\3','8','8','2', '\3','8','8','3', '\3','8','8','4', + '\3','8','8','5', '\3','8','8','6', '\3','8','8','7', '\3','8','8','8', '\3','8','8','9', + '\3','8','9','0', '\3','8','9','1', '\3','8','9','2', '\3','8','9','3', '\3','8','9','4', + '\3','8','9','5', '\3','8','9','6', '\3','8','9','7', '\3','8','9','8', '\3','8','9','9', + '\3','9','0','0', '\3','9','0','1', '\3','9','0','2', '\3','9','0','3', '\3','9','0','4', + '\3','9','0','5', '\3','9','0','6', '\3','9','0','7', '\3','9','0','8', '\3','9','0','9', + '\3','9','1','0', '\3','9','1','1', '\3','9','1','2', '\3','9','1','3', '\3','9','1','4', + '\3','9','1','5', '\3','9','1','6', '\3','9','1','7', '\3','9','1','8', '\3','9','1','9', + '\3','9','2','0', '\3','9','2','1', '\3','9','2','2', '\3','9','2','3', '\3','9','2','4', + '\3','9','2','5', '\3','9','2','6', '\3','9','2','7', '\3','9','2','8', '\3','9','2','9', + '\3','9','3','0', '\3','9','3','1', '\3','9','3','2', '\3','9','3','3', '\3','9','3','4', + '\3','9','3','5', '\3','9','3','6', '\3','9','3','7', '\3','9','3','8', '\3','9','3','9', + '\3','9','4','0', '\3','9','4','1', '\3','9','4','2', '\3','9','4','3', '\3','9','4','4', + '\3','9','4','5', '\3','9','4','6', '\3','9','4','7', '\3','9','4','8', '\3','9','4','9', + '\3','9','5','0', '\3','9','5','1', '\3','9','5','2', '\3','9','5','3', '\3','9','5','4', + '\3','9','5','5', '\3','9','5','6', '\3','9','5','7', '\3','9','5','8', '\3','9','5','9', + '\3','9','6','0', '\3','9','6','1', '\3','9','6','2', '\3','9','6','3', '\3','9','6','4', + '\3','9','6','5', '\3','9','6','6', '\3','9','6','7', '\3','9','6','8', '\3','9','6','9', + '\3','9','7','0', '\3','9','7','1', '\3','9','7','2', '\3','9','7','3', '\3','9','7','4', + '\3','9','7','5', '\3','9','7','6', '\3','9','7','7', '\3','9','7','8', '\3','9','7','9', + '\3','9','8','0', '\3','9','8','1', '\3','9','8','2', '\3','9','8','3', '\3','9','8','4', + '\3','9','8','5', '\3','9','8','6', '\3','9','8','7', '\3','9','8','8', '\3','9','8','9', + '\3','9','9','0', '\3','9','9','1', '\3','9','9','2', '\3','9','9','3', '\3','9','9','4', + '\3','9','9','5', '\3','9','9','6', '\3','9','9','7', '\3','9','9','8', '\3','9','9','9', '\0'}; +#endif + +#if defined(DEC_DPD2BCD8) && DEC_DPD2BCD8==1 && !defined(DECDPD2BCD8) +#define DECDPD2BCD8 + +const uint8_t DPD2BCD8[4096]={ + 0,0,0,0, 0,0,1,1, 0,0,2,1, 0,0,3,1, 0,0,4,1, 0,0,5,1, 0,0,6,1, 0,0,7,1, 0,0,8,1, + 0,0,9,1, 0,8,0,2, 0,8,1,2, 8,0,0,3, 8,0,1,3, 8,8,0,3, 8,8,1,3, 0,1,0,2, 0,1,1,2, + 0,1,2,2, 0,1,3,2, 0,1,4,2, 0,1,5,2, 0,1,6,2, 0,1,7,2, 0,1,8,2, 0,1,9,2, 0,9,0,2, + 0,9,1,2, 8,1,0,3, 8,1,1,3, 8,9,0,3, 8,9,1,3, 0,2,0,2, 0,2,1,2, 0,2,2,2, 0,2,3,2, + 0,2,4,2, 0,2,5,2, 0,2,6,2, 0,2,7,2, 0,2,8,2, 0,2,9,2, 0,8,2,2, 0,8,3,2, 8,2,0,3, + 8,2,1,3, 8,0,8,3, 8,0,9,3, 0,3,0,2, 0,3,1,2, 0,3,2,2, 0,3,3,2, 0,3,4,2, 0,3,5,2, + 0,3,6,2, 0,3,7,2, 0,3,8,2, 0,3,9,2, 0,9,2,2, 0,9,3,2, 8,3,0,3, 8,3,1,3, 8,1,8,3, + 8,1,9,3, 0,4,0,2, 0,4,1,2, 0,4,2,2, 0,4,3,2, 0,4,4,2, 0,4,5,2, 0,4,6,2, 0,4,7,2, + 0,4,8,2, 0,4,9,2, 0,8,4,2, 0,8,5,2, 8,4,0,3, 8,4,1,3, 0,8,8,2, 0,8,9,2, 0,5,0,2, + 0,5,1,2, 0,5,2,2, 0,5,3,2, 0,5,4,2, 0,5,5,2, 0,5,6,2, 0,5,7,2, 0,5,8,2, 0,5,9,2, + 0,9,4,2, 0,9,5,2, 8,5,0,3, 8,5,1,3, 0,9,8,2, 0,9,9,2, 0,6,0,2, 0,6,1,2, 0,6,2,2, + 0,6,3,2, 0,6,4,2, 0,6,5,2, 0,6,6,2, 0,6,7,2, 0,6,8,2, 0,6,9,2, 0,8,6,2, 0,8,7,2, + 8,6,0,3, 8,6,1,3, 8,8,8,3, 8,8,9,3, 0,7,0,2, 0,7,1,2, 0,7,2,2, 0,7,3,2, 0,7,4,2, + 0,7,5,2, 0,7,6,2, 0,7,7,2, 0,7,8,2, 0,7,9,2, 0,9,6,2, 0,9,7,2, 8,7,0,3, 8,7,1,3, + 8,9,8,3, 8,9,9,3, 1,0,0,3, 1,0,1,3, 1,0,2,3, 1,0,3,3, 1,0,4,3, 1,0,5,3, 1,0,6,3, + 1,0,7,3, 1,0,8,3, 1,0,9,3, 1,8,0,3, 1,8,1,3, 9,0,0,3, 9,0,1,3, 9,8,0,3, 9,8,1,3, + 1,1,0,3, 1,1,1,3, 1,1,2,3, 1,1,3,3, 1,1,4,3, 1,1,5,3, 1,1,6,3, 1,1,7,3, 1,1,8,3, + 1,1,9,3, 1,9,0,3, 1,9,1,3, 9,1,0,3, 9,1,1,3, 9,9,0,3, 9,9,1,3, 1,2,0,3, 1,2,1,3, + 1,2,2,3, 1,2,3,3, 1,2,4,3, 1,2,5,3, 1,2,6,3, 1,2,7,3, 1,2,8,3, 1,2,9,3, 1,8,2,3, + 1,8,3,3, 9,2,0,3, 9,2,1,3, 9,0,8,3, 9,0,9,3, 1,3,0,3, 1,3,1,3, 1,3,2,3, 1,3,3,3, + 1,3,4,3, 1,3,5,3, 1,3,6,3, 1,3,7,3, 1,3,8,3, 1,3,9,3, 1,9,2,3, 1,9,3,3, 9,3,0,3, + 9,3,1,3, 9,1,8,3, 9,1,9,3, 1,4,0,3, 1,4,1,3, 1,4,2,3, 1,4,3,3, 1,4,4,3, 1,4,5,3, + 1,4,6,3, 1,4,7,3, 1,4,8,3, 1,4,9,3, 1,8,4,3, 1,8,5,3, 9,4,0,3, 9,4,1,3, 1,8,8,3, + 1,8,9,3, 1,5,0,3, 1,5,1,3, 1,5,2,3, 1,5,3,3, 1,5,4,3, 1,5,5,3, 1,5,6,3, 1,5,7,3, + 1,5,8,3, 1,5,9,3, 1,9,4,3, 1,9,5,3, 9,5,0,3, 9,5,1,3, 1,9,8,3, 1,9,9,3, 1,6,0,3, + 1,6,1,3, 1,6,2,3, 1,6,3,3, 1,6,4,3, 1,6,5,3, 1,6,6,3, 1,6,7,3, 1,6,8,3, 1,6,9,3, + 1,8,6,3, 1,8,7,3, 9,6,0,3, 9,6,1,3, 9,8,8,3, 9,8,9,3, 1,7,0,3, 1,7,1,3, 1,7,2,3, + 1,7,3,3, 1,7,4,3, 1,7,5,3, 1,7,6,3, 1,7,7,3, 1,7,8,3, 1,7,9,3, 1,9,6,3, 1,9,7,3, + 9,7,0,3, 9,7,1,3, 9,9,8,3, 9,9,9,3, 2,0,0,3, 2,0,1,3, 2,0,2,3, 2,0,3,3, 2,0,4,3, + 2,0,5,3, 2,0,6,3, 2,0,7,3, 2,0,8,3, 2,0,9,3, 2,8,0,3, 2,8,1,3, 8,0,2,3, 8,0,3,3, + 8,8,2,3, 8,8,3,3, 2,1,0,3, 2,1,1,3, 2,1,2,3, 2,1,3,3, 2,1,4,3, 2,1,5,3, 2,1,6,3, + 2,1,7,3, 2,1,8,3, 2,1,9,3, 2,9,0,3, 2,9,1,3, 8,1,2,3, 8,1,3,3, 8,9,2,3, 8,9,3,3, + 2,2,0,3, 2,2,1,3, 2,2,2,3, 2,2,3,3, 2,2,4,3, 2,2,5,3, 2,2,6,3, 2,2,7,3, 2,2,8,3, + 2,2,9,3, 2,8,2,3, 2,8,3,3, 8,2,2,3, 8,2,3,3, 8,2,8,3, 8,2,9,3, 2,3,0,3, 2,3,1,3, + 2,3,2,3, 2,3,3,3, 2,3,4,3, 2,3,5,3, 2,3,6,3, 2,3,7,3, 2,3,8,3, 2,3,9,3, 2,9,2,3, + 2,9,3,3, 8,3,2,3, 8,3,3,3, 8,3,8,3, 8,3,9,3, 2,4,0,3, 2,4,1,3, 2,4,2,3, 2,4,3,3, + 2,4,4,3, 2,4,5,3, 2,4,6,3, 2,4,7,3, 2,4,8,3, 2,4,9,3, 2,8,4,3, 2,8,5,3, 8,4,2,3, + 8,4,3,3, 2,8,8,3, 2,8,9,3, 2,5,0,3, 2,5,1,3, 2,5,2,3, 2,5,3,3, 2,5,4,3, 2,5,5,3, + 2,5,6,3, 2,5,7,3, 2,5,8,3, 2,5,9,3, 2,9,4,3, 2,9,5,3, 8,5,2,3, 8,5,3,3, 2,9,8,3, + 2,9,9,3, 2,6,0,3, 2,6,1,3, 2,6,2,3, 2,6,3,3, 2,6,4,3, 2,6,5,3, 2,6,6,3, 2,6,7,3, + 2,6,8,3, 2,6,9,3, 2,8,6,3, 2,8,7,3, 8,6,2,3, 8,6,3,3, 8,8,8,3, 8,8,9,3, 2,7,0,3, + 2,7,1,3, 2,7,2,3, 2,7,3,3, 2,7,4,3, 2,7,5,3, 2,7,6,3, 2,7,7,3, 2,7,8,3, 2,7,9,3, + 2,9,6,3, 2,9,7,3, 8,7,2,3, 8,7,3,3, 8,9,8,3, 8,9,9,3, 3,0,0,3, 3,0,1,3, 3,0,2,3, + 3,0,3,3, 3,0,4,3, 3,0,5,3, 3,0,6,3, 3,0,7,3, 3,0,8,3, 3,0,9,3, 3,8,0,3, 3,8,1,3, + 9,0,2,3, 9,0,3,3, 9,8,2,3, 9,8,3,3, 3,1,0,3, 3,1,1,3, 3,1,2,3, 3,1,3,3, 3,1,4,3, + 3,1,5,3, 3,1,6,3, 3,1,7,3, 3,1,8,3, 3,1,9,3, 3,9,0,3, 3,9,1,3, 9,1,2,3, 9,1,3,3, + 9,9,2,3, 9,9,3,3, 3,2,0,3, 3,2,1,3, 3,2,2,3, 3,2,3,3, 3,2,4,3, 3,2,5,3, 3,2,6,3, + 3,2,7,3, 3,2,8,3, 3,2,9,3, 3,8,2,3, 3,8,3,3, 9,2,2,3, 9,2,3,3, 9,2,8,3, 9,2,9,3, + 3,3,0,3, 3,3,1,3, 3,3,2,3, 3,3,3,3, 3,3,4,3, 3,3,5,3, 3,3,6,3, 3,3,7,3, 3,3,8,3, + 3,3,9,3, 3,9,2,3, 3,9,3,3, 9,3,2,3, 9,3,3,3, 9,3,8,3, 9,3,9,3, 3,4,0,3, 3,4,1,3, + 3,4,2,3, 3,4,3,3, 3,4,4,3, 3,4,5,3, 3,4,6,3, 3,4,7,3, 3,4,8,3, 3,4,9,3, 3,8,4,3, + 3,8,5,3, 9,4,2,3, 9,4,3,3, 3,8,8,3, 3,8,9,3, 3,5,0,3, 3,5,1,3, 3,5,2,3, 3,5,3,3, + 3,5,4,3, 3,5,5,3, 3,5,6,3, 3,5,7,3, 3,5,8,3, 3,5,9,3, 3,9,4,3, 3,9,5,3, 9,5,2,3, + 9,5,3,3, 3,9,8,3, 3,9,9,3, 3,6,0,3, 3,6,1,3, 3,6,2,3, 3,6,3,3, 3,6,4,3, 3,6,5,3, + 3,6,6,3, 3,6,7,3, 3,6,8,3, 3,6,9,3, 3,8,6,3, 3,8,7,3, 9,6,2,3, 9,6,3,3, 9,8,8,3, + 9,8,9,3, 3,7,0,3, 3,7,1,3, 3,7,2,3, 3,7,3,3, 3,7,4,3, 3,7,5,3, 3,7,6,3, 3,7,7,3, + 3,7,8,3, 3,7,9,3, 3,9,6,3, 3,9,7,3, 9,7,2,3, 9,7,3,3, 9,9,8,3, 9,9,9,3, 4,0,0,3, + 4,0,1,3, 4,0,2,3, 4,0,3,3, 4,0,4,3, 4,0,5,3, 4,0,6,3, 4,0,7,3, 4,0,8,3, 4,0,9,3, + 4,8,0,3, 4,8,1,3, 8,0,4,3, 8,0,5,3, 8,8,4,3, 8,8,5,3, 4,1,0,3, 4,1,1,3, 4,1,2,3, + 4,1,3,3, 4,1,4,3, 4,1,5,3, 4,1,6,3, 4,1,7,3, 4,1,8,3, 4,1,9,3, 4,9,0,3, 4,9,1,3, + 8,1,4,3, 8,1,5,3, 8,9,4,3, 8,9,5,3, 4,2,0,3, 4,2,1,3, 4,2,2,3, 4,2,3,3, 4,2,4,3, + 4,2,5,3, 4,2,6,3, 4,2,7,3, 4,2,8,3, 4,2,9,3, 4,8,2,3, 4,8,3,3, 8,2,4,3, 8,2,5,3, + 8,4,8,3, 8,4,9,3, 4,3,0,3, 4,3,1,3, 4,3,2,3, 4,3,3,3, 4,3,4,3, 4,3,5,3, 4,3,6,3, + 4,3,7,3, 4,3,8,3, 4,3,9,3, 4,9,2,3, 4,9,3,3, 8,3,4,3, 8,3,5,3, 8,5,8,3, 8,5,9,3, + 4,4,0,3, 4,4,1,3, 4,4,2,3, 4,4,3,3, 4,4,4,3, 4,4,5,3, 4,4,6,3, 4,4,7,3, 4,4,8,3, + 4,4,9,3, 4,8,4,3, 4,8,5,3, 8,4,4,3, 8,4,5,3, 4,8,8,3, 4,8,9,3, 4,5,0,3, 4,5,1,3, + 4,5,2,3, 4,5,3,3, 4,5,4,3, 4,5,5,3, 4,5,6,3, 4,5,7,3, 4,5,8,3, 4,5,9,3, 4,9,4,3, + 4,9,5,3, 8,5,4,3, 8,5,5,3, 4,9,8,3, 4,9,9,3, 4,6,0,3, 4,6,1,3, 4,6,2,3, 4,6,3,3, + 4,6,4,3, 4,6,5,3, 4,6,6,3, 4,6,7,3, 4,6,8,3, 4,6,9,3, 4,8,6,3, 4,8,7,3, 8,6,4,3, + 8,6,5,3, 8,8,8,3, 8,8,9,3, 4,7,0,3, 4,7,1,3, 4,7,2,3, 4,7,3,3, 4,7,4,3, 4,7,5,3, + 4,7,6,3, 4,7,7,3, 4,7,8,3, 4,7,9,3, 4,9,6,3, 4,9,7,3, 8,7,4,3, 8,7,5,3, 8,9,8,3, + 8,9,9,3, 5,0,0,3, 5,0,1,3, 5,0,2,3, 5,0,3,3, 5,0,4,3, 5,0,5,3, 5,0,6,3, 5,0,7,3, + 5,0,8,3, 5,0,9,3, 5,8,0,3, 5,8,1,3, 9,0,4,3, 9,0,5,3, 9,8,4,3, 9,8,5,3, 5,1,0,3, + 5,1,1,3, 5,1,2,3, 5,1,3,3, 5,1,4,3, 5,1,5,3, 5,1,6,3, 5,1,7,3, 5,1,8,3, 5,1,9,3, + 5,9,0,3, 5,9,1,3, 9,1,4,3, 9,1,5,3, 9,9,4,3, 9,9,5,3, 5,2,0,3, 5,2,1,3, 5,2,2,3, + 5,2,3,3, 5,2,4,3, 5,2,5,3, 5,2,6,3, 5,2,7,3, 5,2,8,3, 5,2,9,3, 5,8,2,3, 5,8,3,3, + 9,2,4,3, 9,2,5,3, 9,4,8,3, 9,4,9,3, 5,3,0,3, 5,3,1,3, 5,3,2,3, 5,3,3,3, 5,3,4,3, + 5,3,5,3, 5,3,6,3, 5,3,7,3, 5,3,8,3, 5,3,9,3, 5,9,2,3, 5,9,3,3, 9,3,4,3, 9,3,5,3, + 9,5,8,3, 9,5,9,3, 5,4,0,3, 5,4,1,3, 5,4,2,3, 5,4,3,3, 5,4,4,3, 5,4,5,3, 5,4,6,3, + 5,4,7,3, 5,4,8,3, 5,4,9,3, 5,8,4,3, 5,8,5,3, 9,4,4,3, 9,4,5,3, 5,8,8,3, 5,8,9,3, + 5,5,0,3, 5,5,1,3, 5,5,2,3, 5,5,3,3, 5,5,4,3, 5,5,5,3, 5,5,6,3, 5,5,7,3, 5,5,8,3, + 5,5,9,3, 5,9,4,3, 5,9,5,3, 9,5,4,3, 9,5,5,3, 5,9,8,3, 5,9,9,3, 5,6,0,3, 5,6,1,3, + 5,6,2,3, 5,6,3,3, 5,6,4,3, 5,6,5,3, 5,6,6,3, 5,6,7,3, 5,6,8,3, 5,6,9,3, 5,8,6,3, + 5,8,7,3, 9,6,4,3, 9,6,5,3, 9,8,8,3, 9,8,9,3, 5,7,0,3, 5,7,1,3, 5,7,2,3, 5,7,3,3, + 5,7,4,3, 5,7,5,3, 5,7,6,3, 5,7,7,3, 5,7,8,3, 5,7,9,3, 5,9,6,3, 5,9,7,3, 9,7,4,3, + 9,7,5,3, 9,9,8,3, 9,9,9,3, 6,0,0,3, 6,0,1,3, 6,0,2,3, 6,0,3,3, 6,0,4,3, 6,0,5,3, + 6,0,6,3, 6,0,7,3, 6,0,8,3, 6,0,9,3, 6,8,0,3, 6,8,1,3, 8,0,6,3, 8,0,7,3, 8,8,6,3, + 8,8,7,3, 6,1,0,3, 6,1,1,3, 6,1,2,3, 6,1,3,3, 6,1,4,3, 6,1,5,3, 6,1,6,3, 6,1,7,3, + 6,1,8,3, 6,1,9,3, 6,9,0,3, 6,9,1,3, 8,1,6,3, 8,1,7,3, 8,9,6,3, 8,9,7,3, 6,2,0,3, + 6,2,1,3, 6,2,2,3, 6,2,3,3, 6,2,4,3, 6,2,5,3, 6,2,6,3, 6,2,7,3, 6,2,8,3, 6,2,9,3, + 6,8,2,3, 6,8,3,3, 8,2,6,3, 8,2,7,3, 8,6,8,3, 8,6,9,3, 6,3,0,3, 6,3,1,3, 6,3,2,3, + 6,3,3,3, 6,3,4,3, 6,3,5,3, 6,3,6,3, 6,3,7,3, 6,3,8,3, 6,3,9,3, 6,9,2,3, 6,9,3,3, + 8,3,6,3, 8,3,7,3, 8,7,8,3, 8,7,9,3, 6,4,0,3, 6,4,1,3, 6,4,2,3, 6,4,3,3, 6,4,4,3, + 6,4,5,3, 6,4,6,3, 6,4,7,3, 6,4,8,3, 6,4,9,3, 6,8,4,3, 6,8,5,3, 8,4,6,3, 8,4,7,3, + 6,8,8,3, 6,8,9,3, 6,5,0,3, 6,5,1,3, 6,5,2,3, 6,5,3,3, 6,5,4,3, 6,5,5,3, 6,5,6,3, + 6,5,7,3, 6,5,8,3, 6,5,9,3, 6,9,4,3, 6,9,5,3, 8,5,6,3, 8,5,7,3, 6,9,8,3, 6,9,9,3, + 6,6,0,3, 6,6,1,3, 6,6,2,3, 6,6,3,3, 6,6,4,3, 6,6,5,3, 6,6,6,3, 6,6,7,3, 6,6,8,3, + 6,6,9,3, 6,8,6,3, 6,8,7,3, 8,6,6,3, 8,6,7,3, 8,8,8,3, 8,8,9,3, 6,7,0,3, 6,7,1,3, + 6,7,2,3, 6,7,3,3, 6,7,4,3, 6,7,5,3, 6,7,6,3, 6,7,7,3, 6,7,8,3, 6,7,9,3, 6,9,6,3, + 6,9,7,3, 8,7,6,3, 8,7,7,3, 8,9,8,3, 8,9,9,3, 7,0,0,3, 7,0,1,3, 7,0,2,3, 7,0,3,3, + 7,0,4,3, 7,0,5,3, 7,0,6,3, 7,0,7,3, 7,0,8,3, 7,0,9,3, 7,8,0,3, 7,8,1,3, 9,0,6,3, + 9,0,7,3, 9,8,6,3, 9,8,7,3, 7,1,0,3, 7,1,1,3, 7,1,2,3, 7,1,3,3, 7,1,4,3, 7,1,5,3, + 7,1,6,3, 7,1,7,3, 7,1,8,3, 7,1,9,3, 7,9,0,3, 7,9,1,3, 9,1,6,3, 9,1,7,3, 9,9,6,3, + 9,9,7,3, 7,2,0,3, 7,2,1,3, 7,2,2,3, 7,2,3,3, 7,2,4,3, 7,2,5,3, 7,2,6,3, 7,2,7,3, + 7,2,8,3, 7,2,9,3, 7,8,2,3, 7,8,3,3, 9,2,6,3, 9,2,7,3, 9,6,8,3, 9,6,9,3, 7,3,0,3, + 7,3,1,3, 7,3,2,3, 7,3,3,3, 7,3,4,3, 7,3,5,3, 7,3,6,3, 7,3,7,3, 7,3,8,3, 7,3,9,3, + 7,9,2,3, 7,9,3,3, 9,3,6,3, 9,3,7,3, 9,7,8,3, 9,7,9,3, 7,4,0,3, 7,4,1,3, 7,4,2,3, + 7,4,3,3, 7,4,4,3, 7,4,5,3, 7,4,6,3, 7,4,7,3, 7,4,8,3, 7,4,9,3, 7,8,4,3, 7,8,5,3, + 9,4,6,3, 9,4,7,3, 7,8,8,3, 7,8,9,3, 7,5,0,3, 7,5,1,3, 7,5,2,3, 7,5,3,3, 7,5,4,3, + 7,5,5,3, 7,5,6,3, 7,5,7,3, 7,5,8,3, 7,5,9,3, 7,9,4,3, 7,9,5,3, 9,5,6,3, 9,5,7,3, + 7,9,8,3, 7,9,9,3, 7,6,0,3, 7,6,1,3, 7,6,2,3, 7,6,3,3, 7,6,4,3, 7,6,5,3, 7,6,6,3, + 7,6,7,3, 7,6,8,3, 7,6,9,3, 7,8,6,3, 7,8,7,3, 9,6,6,3, 9,6,7,3, 9,8,8,3, 9,8,9,3, + 7,7,0,3, 7,7,1,3, 7,7,2,3, 7,7,3,3, 7,7,4,3, 7,7,5,3, 7,7,6,3, 7,7,7,3, 7,7,8,3, + 7,7,9,3, 7,9,6,3, 7,9,7,3, 9,7,6,3, 9,7,7,3, 9,9,8,3, 9,9,9,3}; +#endif + +#if defined(DEC_BIN2BCD8) && DEC_BIN2BCD8==1 && !defined(DECBIN2BCD8) +#define DECBIN2BCD8 + +const uint8_t BIN2BCD8[4000]={ + 0,0,0,0, 0,0,1,1, 0,0,2,1, 0,0,3,1, 0,0,4,1, 0,0,5,1, 0,0,6,1, 0,0,7,1, 0,0,8,1, + 0,0,9,1, 0,1,0,2, 0,1,1,2, 0,1,2,2, 0,1,3,2, 0,1,4,2, 0,1,5,2, 0,1,6,2, 0,1,7,2, + 0,1,8,2, 0,1,9,2, 0,2,0,2, 0,2,1,2, 0,2,2,2, 0,2,3,2, 0,2,4,2, 0,2,5,2, 0,2,6,2, + 0,2,7,2, 0,2,8,2, 0,2,9,2, 0,3,0,2, 0,3,1,2, 0,3,2,2, 0,3,3,2, 0,3,4,2, 0,3,5,2, + 0,3,6,2, 0,3,7,2, 0,3,8,2, 0,3,9,2, 0,4,0,2, 0,4,1,2, 0,4,2,2, 0,4,3,2, 0,4,4,2, + 0,4,5,2, 0,4,6,2, 0,4,7,2, 0,4,8,2, 0,4,9,2, 0,5,0,2, 0,5,1,2, 0,5,2,2, 0,5,3,2, + 0,5,4,2, 0,5,5,2, 0,5,6,2, 0,5,7,2, 0,5,8,2, 0,5,9,2, 0,6,0,2, 0,6,1,2, 0,6,2,2, + 0,6,3,2, 0,6,4,2, 0,6,5,2, 0,6,6,2, 0,6,7,2, 0,6,8,2, 0,6,9,2, 0,7,0,2, 0,7,1,2, + 0,7,2,2, 0,7,3,2, 0,7,4,2, 0,7,5,2, 0,7,6,2, 0,7,7,2, 0,7,8,2, 0,7,9,2, 0,8,0,2, + 0,8,1,2, 0,8,2,2, 0,8,3,2, 0,8,4,2, 0,8,5,2, 0,8,6,2, 0,8,7,2, 0,8,8,2, 0,8,9,2, + 0,9,0,2, 0,9,1,2, 0,9,2,2, 0,9,3,2, 0,9,4,2, 0,9,5,2, 0,9,6,2, 0,9,7,2, 0,9,8,2, + 0,9,9,2, 1,0,0,3, 1,0,1,3, 1,0,2,3, 1,0,3,3, 1,0,4,3, 1,0,5,3, 1,0,6,3, 1,0,7,3, + 1,0,8,3, 1,0,9,3, 1,1,0,3, 1,1,1,3, 1,1,2,3, 1,1,3,3, 1,1,4,3, 1,1,5,3, 1,1,6,3, + 1,1,7,3, 1,1,8,3, 1,1,9,3, 1,2,0,3, 1,2,1,3, 1,2,2,3, 1,2,3,3, 1,2,4,3, 1,2,5,3, + 1,2,6,3, 1,2,7,3, 1,2,8,3, 1,2,9,3, 1,3,0,3, 1,3,1,3, 1,3,2,3, 1,3,3,3, 1,3,4,3, + 1,3,5,3, 1,3,6,3, 1,3,7,3, 1,3,8,3, 1,3,9,3, 1,4,0,3, 1,4,1,3, 1,4,2,3, 1,4,3,3, + 1,4,4,3, 1,4,5,3, 1,4,6,3, 1,4,7,3, 1,4,8,3, 1,4,9,3, 1,5,0,3, 1,5,1,3, 1,5,2,3, + 1,5,3,3, 1,5,4,3, 1,5,5,3, 1,5,6,3, 1,5,7,3, 1,5,8,3, 1,5,9,3, 1,6,0,3, 1,6,1,3, + 1,6,2,3, 1,6,3,3, 1,6,4,3, 1,6,5,3, 1,6,6,3, 1,6,7,3, 1,6,8,3, 1,6,9,3, 1,7,0,3, + 1,7,1,3, 1,7,2,3, 1,7,3,3, 1,7,4,3, 1,7,5,3, 1,7,6,3, 1,7,7,3, 1,7,8,3, 1,7,9,3, + 1,8,0,3, 1,8,1,3, 1,8,2,3, 1,8,3,3, 1,8,4,3, 1,8,5,3, 1,8,6,3, 1,8,7,3, 1,8,8,3, + 1,8,9,3, 1,9,0,3, 1,9,1,3, 1,9,2,3, 1,9,3,3, 1,9,4,3, 1,9,5,3, 1,9,6,3, 1,9,7,3, + 1,9,8,3, 1,9,9,3, 2,0,0,3, 2,0,1,3, 2,0,2,3, 2,0,3,3, 2,0,4,3, 2,0,5,3, 2,0,6,3, + 2,0,7,3, 2,0,8,3, 2,0,9,3, 2,1,0,3, 2,1,1,3, 2,1,2,3, 2,1,3,3, 2,1,4,3, 2,1,5,3, + 2,1,6,3, 2,1,7,3, 2,1,8,3, 2,1,9,3, 2,2,0,3, 2,2,1,3, 2,2,2,3, 2,2,3,3, 2,2,4,3, + 2,2,5,3, 2,2,6,3, 2,2,7,3, 2,2,8,3, 2,2,9,3, 2,3,0,3, 2,3,1,3, 2,3,2,3, 2,3,3,3, + 2,3,4,3, 2,3,5,3, 2,3,6,3, 2,3,7,3, 2,3,8,3, 2,3,9,3, 2,4,0,3, 2,4,1,3, 2,4,2,3, + 2,4,3,3, 2,4,4,3, 2,4,5,3, 2,4,6,3, 2,4,7,3, 2,4,8,3, 2,4,9,3, 2,5,0,3, 2,5,1,3, + 2,5,2,3, 2,5,3,3, 2,5,4,3, 2,5,5,3, 2,5,6,3, 2,5,7,3, 2,5,8,3, 2,5,9,3, 2,6,0,3, + 2,6,1,3, 2,6,2,3, 2,6,3,3, 2,6,4,3, 2,6,5,3, 2,6,6,3, 2,6,7,3, 2,6,8,3, 2,6,9,3, + 2,7,0,3, 2,7,1,3, 2,7,2,3, 2,7,3,3, 2,7,4,3, 2,7,5,3, 2,7,6,3, 2,7,7,3, 2,7,8,3, + 2,7,9,3, 2,8,0,3, 2,8,1,3, 2,8,2,3, 2,8,3,3, 2,8,4,3, 2,8,5,3, 2,8,6,3, 2,8,7,3, + 2,8,8,3, 2,8,9,3, 2,9,0,3, 2,9,1,3, 2,9,2,3, 2,9,3,3, 2,9,4,3, 2,9,5,3, 2,9,6,3, + 2,9,7,3, 2,9,8,3, 2,9,9,3, 3,0,0,3, 3,0,1,3, 3,0,2,3, 3,0,3,3, 3,0,4,3, 3,0,5,3, + 3,0,6,3, 3,0,7,3, 3,0,8,3, 3,0,9,3, 3,1,0,3, 3,1,1,3, 3,1,2,3, 3,1,3,3, 3,1,4,3, + 3,1,5,3, 3,1,6,3, 3,1,7,3, 3,1,8,3, 3,1,9,3, 3,2,0,3, 3,2,1,3, 3,2,2,3, 3,2,3,3, + 3,2,4,3, 3,2,5,3, 3,2,6,3, 3,2,7,3, 3,2,8,3, 3,2,9,3, 3,3,0,3, 3,3,1,3, 3,3,2,3, + 3,3,3,3, 3,3,4,3, 3,3,5,3, 3,3,6,3, 3,3,7,3, 3,3,8,3, 3,3,9,3, 3,4,0,3, 3,4,1,3, + 3,4,2,3, 3,4,3,3, 3,4,4,3, 3,4,5,3, 3,4,6,3, 3,4,7,3, 3,4,8,3, 3,4,9,3, 3,5,0,3, + 3,5,1,3, 3,5,2,3, 3,5,3,3, 3,5,4,3, 3,5,5,3, 3,5,6,3, 3,5,7,3, 3,5,8,3, 3,5,9,3, + 3,6,0,3, 3,6,1,3, 3,6,2,3, 3,6,3,3, 3,6,4,3, 3,6,5,3, 3,6,6,3, 3,6,7,3, 3,6,8,3, + 3,6,9,3, 3,7,0,3, 3,7,1,3, 3,7,2,3, 3,7,3,3, 3,7,4,3, 3,7,5,3, 3,7,6,3, 3,7,7,3, + 3,7,8,3, 3,7,9,3, 3,8,0,3, 3,8,1,3, 3,8,2,3, 3,8,3,3, 3,8,4,3, 3,8,5,3, 3,8,6,3, + 3,8,7,3, 3,8,8,3, 3,8,9,3, 3,9,0,3, 3,9,1,3, 3,9,2,3, 3,9,3,3, 3,9,4,3, 3,9,5,3, + 3,9,6,3, 3,9,7,3, 3,9,8,3, 3,9,9,3, 4,0,0,3, 4,0,1,3, 4,0,2,3, 4,0,3,3, 4,0,4,3, + 4,0,5,3, 4,0,6,3, 4,0,7,3, 4,0,8,3, 4,0,9,3, 4,1,0,3, 4,1,1,3, 4,1,2,3, 4,1,3,3, + 4,1,4,3, 4,1,5,3, 4,1,6,3, 4,1,7,3, 4,1,8,3, 4,1,9,3, 4,2,0,3, 4,2,1,3, 4,2,2,3, + 4,2,3,3, 4,2,4,3, 4,2,5,3, 4,2,6,3, 4,2,7,3, 4,2,8,3, 4,2,9,3, 4,3,0,3, 4,3,1,3, + 4,3,2,3, 4,3,3,3, 4,3,4,3, 4,3,5,3, 4,3,6,3, 4,3,7,3, 4,3,8,3, 4,3,9,3, 4,4,0,3, + 4,4,1,3, 4,4,2,3, 4,4,3,3, 4,4,4,3, 4,4,5,3, 4,4,6,3, 4,4,7,3, 4,4,8,3, 4,4,9,3, + 4,5,0,3, 4,5,1,3, 4,5,2,3, 4,5,3,3, 4,5,4,3, 4,5,5,3, 4,5,6,3, 4,5,7,3, 4,5,8,3, + 4,5,9,3, 4,6,0,3, 4,6,1,3, 4,6,2,3, 4,6,3,3, 4,6,4,3, 4,6,5,3, 4,6,6,3, 4,6,7,3, + 4,6,8,3, 4,6,9,3, 4,7,0,3, 4,7,1,3, 4,7,2,3, 4,7,3,3, 4,7,4,3, 4,7,5,3, 4,7,6,3, + 4,7,7,3, 4,7,8,3, 4,7,9,3, 4,8,0,3, 4,8,1,3, 4,8,2,3, 4,8,3,3, 4,8,4,3, 4,8,5,3, + 4,8,6,3, 4,8,7,3, 4,8,8,3, 4,8,9,3, 4,9,0,3, 4,9,1,3, 4,9,2,3, 4,9,3,3, 4,9,4,3, + 4,9,5,3, 4,9,6,3, 4,9,7,3, 4,9,8,3, 4,9,9,3, 5,0,0,3, 5,0,1,3, 5,0,2,3, 5,0,3,3, + 5,0,4,3, 5,0,5,3, 5,0,6,3, 5,0,7,3, 5,0,8,3, 5,0,9,3, 5,1,0,3, 5,1,1,3, 5,1,2,3, + 5,1,3,3, 5,1,4,3, 5,1,5,3, 5,1,6,3, 5,1,7,3, 5,1,8,3, 5,1,9,3, 5,2,0,3, 5,2,1,3, + 5,2,2,3, 5,2,3,3, 5,2,4,3, 5,2,5,3, 5,2,6,3, 5,2,7,3, 5,2,8,3, 5,2,9,3, 5,3,0,3, + 5,3,1,3, 5,3,2,3, 5,3,3,3, 5,3,4,3, 5,3,5,3, 5,3,6,3, 5,3,7,3, 5,3,8,3, 5,3,9,3, + 5,4,0,3, 5,4,1,3, 5,4,2,3, 5,4,3,3, 5,4,4,3, 5,4,5,3, 5,4,6,3, 5,4,7,3, 5,4,8,3, + 5,4,9,3, 5,5,0,3, 5,5,1,3, 5,5,2,3, 5,5,3,3, 5,5,4,3, 5,5,5,3, 5,5,6,3, 5,5,7,3, + 5,5,8,3, 5,5,9,3, 5,6,0,3, 5,6,1,3, 5,6,2,3, 5,6,3,3, 5,6,4,3, 5,6,5,3, 5,6,6,3, + 5,6,7,3, 5,6,8,3, 5,6,9,3, 5,7,0,3, 5,7,1,3, 5,7,2,3, 5,7,3,3, 5,7,4,3, 5,7,5,3, + 5,7,6,3, 5,7,7,3, 5,7,8,3, 5,7,9,3, 5,8,0,3, 5,8,1,3, 5,8,2,3, 5,8,3,3, 5,8,4,3, + 5,8,5,3, 5,8,6,3, 5,8,7,3, 5,8,8,3, 5,8,9,3, 5,9,0,3, 5,9,1,3, 5,9,2,3, 5,9,3,3, + 5,9,4,3, 5,9,5,3, 5,9,6,3, 5,9,7,3, 5,9,8,3, 5,9,9,3, 6,0,0,3, 6,0,1,3, 6,0,2,3, + 6,0,3,3, 6,0,4,3, 6,0,5,3, 6,0,6,3, 6,0,7,3, 6,0,8,3, 6,0,9,3, 6,1,0,3, 6,1,1,3, + 6,1,2,3, 6,1,3,3, 6,1,4,3, 6,1,5,3, 6,1,6,3, 6,1,7,3, 6,1,8,3, 6,1,9,3, 6,2,0,3, + 6,2,1,3, 6,2,2,3, 6,2,3,3, 6,2,4,3, 6,2,5,3, 6,2,6,3, 6,2,7,3, 6,2,8,3, 6,2,9,3, + 6,3,0,3, 6,3,1,3, 6,3,2,3, 6,3,3,3, 6,3,4,3, 6,3,5,3, 6,3,6,3, 6,3,7,3, 6,3,8,3, + 6,3,9,3, 6,4,0,3, 6,4,1,3, 6,4,2,3, 6,4,3,3, 6,4,4,3, 6,4,5,3, 6,4,6,3, 6,4,7,3, + 6,4,8,3, 6,4,9,3, 6,5,0,3, 6,5,1,3, 6,5,2,3, 6,5,3,3, 6,5,4,3, 6,5,5,3, 6,5,6,3, + 6,5,7,3, 6,5,8,3, 6,5,9,3, 6,6,0,3, 6,6,1,3, 6,6,2,3, 6,6,3,3, 6,6,4,3, 6,6,5,3, + 6,6,6,3, 6,6,7,3, 6,6,8,3, 6,6,9,3, 6,7,0,3, 6,7,1,3, 6,7,2,3, 6,7,3,3, 6,7,4,3, + 6,7,5,3, 6,7,6,3, 6,7,7,3, 6,7,8,3, 6,7,9,3, 6,8,0,3, 6,8,1,3, 6,8,2,3, 6,8,3,3, + 6,8,4,3, 6,8,5,3, 6,8,6,3, 6,8,7,3, 6,8,8,3, 6,8,9,3, 6,9,0,3, 6,9,1,3, 6,9,2,3, + 6,9,3,3, 6,9,4,3, 6,9,5,3, 6,9,6,3, 6,9,7,3, 6,9,8,3, 6,9,9,3, 7,0,0,3, 7,0,1,3, + 7,0,2,3, 7,0,3,3, 7,0,4,3, 7,0,5,3, 7,0,6,3, 7,0,7,3, 7,0,8,3, 7,0,9,3, 7,1,0,3, + 7,1,1,3, 7,1,2,3, 7,1,3,3, 7,1,4,3, 7,1,5,3, 7,1,6,3, 7,1,7,3, 7,1,8,3, 7,1,9,3, + 7,2,0,3, 7,2,1,3, 7,2,2,3, 7,2,3,3, 7,2,4,3, 7,2,5,3, 7,2,6,3, 7,2,7,3, 7,2,8,3, + 7,2,9,3, 7,3,0,3, 7,3,1,3, 7,3,2,3, 7,3,3,3, 7,3,4,3, 7,3,5,3, 7,3,6,3, 7,3,7,3, + 7,3,8,3, 7,3,9,3, 7,4,0,3, 7,4,1,3, 7,4,2,3, 7,4,3,3, 7,4,4,3, 7,4,5,3, 7,4,6,3, + 7,4,7,3, 7,4,8,3, 7,4,9,3, 7,5,0,3, 7,5,1,3, 7,5,2,3, 7,5,3,3, 7,5,4,3, 7,5,5,3, + 7,5,6,3, 7,5,7,3, 7,5,8,3, 7,5,9,3, 7,6,0,3, 7,6,1,3, 7,6,2,3, 7,6,3,3, 7,6,4,3, + 7,6,5,3, 7,6,6,3, 7,6,7,3, 7,6,8,3, 7,6,9,3, 7,7,0,3, 7,7,1,3, 7,7,2,3, 7,7,3,3, + 7,7,4,3, 7,7,5,3, 7,7,6,3, 7,7,7,3, 7,7,8,3, 7,7,9,3, 7,8,0,3, 7,8,1,3, 7,8,2,3, + 7,8,3,3, 7,8,4,3, 7,8,5,3, 7,8,6,3, 7,8,7,3, 7,8,8,3, 7,8,9,3, 7,9,0,3, 7,9,1,3, + 7,9,2,3, 7,9,3,3, 7,9,4,3, 7,9,5,3, 7,9,6,3, 7,9,7,3, 7,9,8,3, 7,9,9,3, 8,0,0,3, + 8,0,1,3, 8,0,2,3, 8,0,3,3, 8,0,4,3, 8,0,5,3, 8,0,6,3, 8,0,7,3, 8,0,8,3, 8,0,9,3, + 8,1,0,3, 8,1,1,3, 8,1,2,3, 8,1,3,3, 8,1,4,3, 8,1,5,3, 8,1,6,3, 8,1,7,3, 8,1,8,3, + 8,1,9,3, 8,2,0,3, 8,2,1,3, 8,2,2,3, 8,2,3,3, 8,2,4,3, 8,2,5,3, 8,2,6,3, 8,2,7,3, + 8,2,8,3, 8,2,9,3, 8,3,0,3, 8,3,1,3, 8,3,2,3, 8,3,3,3, 8,3,4,3, 8,3,5,3, 8,3,6,3, + 8,3,7,3, 8,3,8,3, 8,3,9,3, 8,4,0,3, 8,4,1,3, 8,4,2,3, 8,4,3,3, 8,4,4,3, 8,4,5,3, + 8,4,6,3, 8,4,7,3, 8,4,8,3, 8,4,9,3, 8,5,0,3, 8,5,1,3, 8,5,2,3, 8,5,3,3, 8,5,4,3, + 8,5,5,3, 8,5,6,3, 8,5,7,3, 8,5,8,3, 8,5,9,3, 8,6,0,3, 8,6,1,3, 8,6,2,3, 8,6,3,3, + 8,6,4,3, 8,6,5,3, 8,6,6,3, 8,6,7,3, 8,6,8,3, 8,6,9,3, 8,7,0,3, 8,7,1,3, 8,7,2,3, + 8,7,3,3, 8,7,4,3, 8,7,5,3, 8,7,6,3, 8,7,7,3, 8,7,8,3, 8,7,9,3, 8,8,0,3, 8,8,1,3, + 8,8,2,3, 8,8,3,3, 8,8,4,3, 8,8,5,3, 8,8,6,3, 8,8,7,3, 8,8,8,3, 8,8,9,3, 8,9,0,3, + 8,9,1,3, 8,9,2,3, 8,9,3,3, 8,9,4,3, 8,9,5,3, 8,9,6,3, 8,9,7,3, 8,9,8,3, 8,9,9,3, + 9,0,0,3, 9,0,1,3, 9,0,2,3, 9,0,3,3, 9,0,4,3, 9,0,5,3, 9,0,6,3, 9,0,7,3, 9,0,8,3, + 9,0,9,3, 9,1,0,3, 9,1,1,3, 9,1,2,3, 9,1,3,3, 9,1,4,3, 9,1,5,3, 9,1,6,3, 9,1,7,3, + 9,1,8,3, 9,1,9,3, 9,2,0,3, 9,2,1,3, 9,2,2,3, 9,2,3,3, 9,2,4,3, 9,2,5,3, 9,2,6,3, + 9,2,7,3, 9,2,8,3, 9,2,9,3, 9,3,0,3, 9,3,1,3, 9,3,2,3, 9,3,3,3, 9,3,4,3, 9,3,5,3, + 9,3,6,3, 9,3,7,3, 9,3,8,3, 9,3,9,3, 9,4,0,3, 9,4,1,3, 9,4,2,3, 9,4,3,3, 9,4,4,3, + 9,4,5,3, 9,4,6,3, 9,4,7,3, 9,4,8,3, 9,4,9,3, 9,5,0,3, 9,5,1,3, 9,5,2,3, 9,5,3,3, + 9,5,4,3, 9,5,5,3, 9,5,6,3, 9,5,7,3, 9,5,8,3, 9,5,9,3, 9,6,0,3, 9,6,1,3, 9,6,2,3, + 9,6,3,3, 9,6,4,3, 9,6,5,3, 9,6,6,3, 9,6,7,3, 9,6,8,3, 9,6,9,3, 9,7,0,3, 9,7,1,3, + 9,7,2,3, 9,7,3,3, 9,7,4,3, 9,7,5,3, 9,7,6,3, 9,7,7,3, 9,7,8,3, 9,7,9,3, 9,8,0,3, + 9,8,1,3, 9,8,2,3, 9,8,3,3, 9,8,4,3, 9,8,5,3, 9,8,6,3, 9,8,7,3, 9,8,8,3, 9,8,9,3, + 9,9,0,3, 9,9,1,3, 9,9,2,3, 9,9,3,3, 9,9,4,3, 9,9,5,3, 9,9,6,3, 9,9,7,3, 9,9,8,3, + 9,9,9,3}; +#endif diff --git a/include/libdecnumber/decNumber.h b/include/libdecnumber/decNumber.h new file mode 100644 index 000000000..aa115fed0 --- /dev/null +++ b/include/libdecnumber/decNumber.h @@ -0,0 +1,201 @@ +/* Decimal number arithmetic module header for the decNumber C Library. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* Decimal Number arithmetic module header */ +/* ------------------------------------------------------------------ */ + +#ifndef DECNUMBER_H +#define DECNUMBER_H + + #define DECNAME "decNumber" /* Short name */ + #define DECFULLNAME "Decimal Number Module" /* Verbose name */ + #define DECAUTHOR "Mike Cowlishaw" /* Who to blame */ + + #include "libdecnumber/decContext.h" + + /* Bit settings for decNumber.bits */ + #define DECNEG 0x80 /* Sign; 1=negative, 0=positive or zero */ + #define DECINF 0x40 /* 1=Infinity */ + #define DECNAN 0x20 /* 1=NaN */ + #define DECSNAN 0x10 /* 1=sNaN */ + /* The remaining bits are reserved; they must be 0 */ + #define DECSPECIAL (DECINF|DECNAN|DECSNAN) /* any special value */ + + /* Define the decNumber data structure. The size and shape of the */ + /* units array in the structure is determined by the following */ + /* constant. This must not be changed without recompiling the */ + /* decNumber library modules. */ + + #define DECDPUN 3 /* DECimal Digits Per UNit [must be >0 */ + /* and <10; 3 or powers of 2 are best]. */ + + /* DECNUMDIGITS is the default number of digits that can be held in */ + /* the structure. If undefined, 1 is assumed and it is assumed */ + /* that the structure will be immediately followed by extra space, */ + /* as required. DECNUMDIGITS is always >0. */ + #if !defined(DECNUMDIGITS) + #define DECNUMDIGITS 1 + #endif + + /* The size (integer data type) of each unit is determined by the */ + /* number of digits it will hold. */ + #if DECDPUN<=2 + #define decNumberUnit uint8_t + #elif DECDPUN<=4 + #define decNumberUnit uint16_t + #else + #define decNumberUnit uint32_t + #endif + /* The number of units needed is ceil(DECNUMDIGITS/DECDPUN) */ + #define DECNUMUNITS ((DECNUMDIGITS+DECDPUN-1)/DECDPUN) + + /* The data structure... */ + typedef struct { + int32_t digits; /* Count of digits in the coefficient; >0 */ + int32_t exponent; /* Unadjusted exponent, unbiased, in */ + /* range: -1999999997 through 999999999 */ + uint8_t bits; /* Indicator bits (see above) */ + /* Coefficient, from least significant unit */ + decNumberUnit lsu[DECNUMUNITS]; + } decNumber; + + /* Notes: */ + /* 1. If digits is > DECDPUN then there will one or more */ + /* decNumberUnits immediately following the first element of lsu.*/ + /* These contain the remaining (more significant) digits of the */ + /* number, and may be in the lsu array, or may be guaranteed by */ + /* some other mechanism (such as being contained in another */ + /* structure, or being overlaid on dynamically allocated */ + /* storage). */ + /* */ + /* Each integer of the coefficient (except potentially the last) */ + /* contains DECDPUN digits (e.g., a value in the range 0 through */ + /* 99999999 if DECDPUN is 8, or 0 through 999 if DECDPUN is 3). */ + /* */ + /* 2. A decNumber converted to a string may need up to digits+14 */ + /* characters. The worst cases (non-exponential and exponential */ + /* formats) are -0.00000{9...}# and -9.{9...}E+999999999# */ + /* (where # is '\0') */ + + + /* ---------------------------------------------------------------- */ + /* decNumber public functions and macros */ + /* ---------------------------------------------------------------- */ + + + /* Conversions */ + decNumber * decNumberFromInt32(decNumber *, int32_t); + decNumber * decNumberFromUInt32(decNumber *, uint32_t); + decNumber *decNumberFromInt64(decNumber *, int64_t); + decNumber *decNumberFromUInt64(decNumber *, uint64_t); + decNumber * decNumberFromString(decNumber *, const char *, decContext *); + char * decNumberToString(const decNumber *, char *); + char * decNumberToEngString(const decNumber *, char *); + uint32_t decNumberToUInt32(const decNumber *, decContext *); + int32_t decNumberToInt32(const decNumber *, decContext *); + int64_t decNumberIntegralToInt64(const decNumber *dn, decContext *set); + uint8_t * decNumberGetBCD(const decNumber *, uint8_t *); + decNumber * decNumberSetBCD(decNumber *, const uint8_t *, uint32_t); + + /* Operators and elementary functions */ + decNumber * decNumberAbs(decNumber *, const decNumber *, decContext *); + decNumber * decNumberAdd(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberAnd(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberCompare(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberCompareSignal(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberCompareTotal(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberCompareTotalMag(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberDivide(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberDivideInteger(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberExp(decNumber *, const decNumber *, decContext *); + decNumber * decNumberFMA(decNumber *, const decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberInvert(decNumber *, const decNumber *, decContext *); + decNumber * decNumberLn(decNumber *, const decNumber *, decContext *); + decNumber * decNumberLogB(decNumber *, const decNumber *, decContext *); + decNumber * decNumberLog10(decNumber *, const decNumber *, decContext *); + decNumber * decNumberMax(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberMaxMag(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberMin(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberMinMag(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberMinus(decNumber *, const decNumber *, decContext *); + decNumber * decNumberMultiply(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberNormalize(decNumber *, const decNumber *, decContext *); + decNumber * decNumberOr(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberPlus(decNumber *, const decNumber *, decContext *); + decNumber * decNumberPower(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberQuantize(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberReduce(decNumber *, const decNumber *, decContext *); + decNumber * decNumberRemainder(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberRemainderNear(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberRescale(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberRotate(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberSameQuantum(decNumber *, const decNumber *, const decNumber *); + decNumber * decNumberScaleB(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberShift(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberSquareRoot(decNumber *, const decNumber *, decContext *); + decNumber * decNumberSubtract(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberToIntegralExact(decNumber *, const decNumber *, decContext *); + decNumber * decNumberToIntegralValue(decNumber *, const decNumber *, decContext *); + decNumber * decNumberXor(decNumber *, const decNumber *, const decNumber *, decContext *); + + /* Utilities */ + enum decClass decNumberClass(const decNumber *, decContext *); + const char * decNumberClassToString(enum decClass); + decNumber * decNumberCopy(decNumber *, const decNumber *); + decNumber * decNumberCopyAbs(decNumber *, const decNumber *); + decNumber * decNumberCopyNegate(decNumber *, const decNumber *); + decNumber * decNumberCopySign(decNumber *, const decNumber *, const decNumber *); + decNumber * decNumberNextMinus(decNumber *, const decNumber *, decContext *); + decNumber * decNumberNextPlus(decNumber *, const decNumber *, decContext *); + decNumber * decNumberNextToward(decNumber *, const decNumber *, const decNumber *, decContext *); + decNumber * decNumberTrim(decNumber *); + const char * decNumberVersion(void); + decNumber * decNumberZero(decNumber *); + + /* Functions for testing decNumbers (normality depends on context) */ + int32_t decNumberIsNormal(const decNumber *, decContext *); + int32_t decNumberIsSubnormal(const decNumber *, decContext *); + + /* Macros for testing decNumber *dn */ + #define decNumberIsCanonical(dn) (1) /* All decNumbers are saintly */ + #define decNumberIsFinite(dn) (((dn)->bits&DECSPECIAL)==0) + #define decNumberIsInfinite(dn) (((dn)->bits&DECINF)!=0) + #define decNumberIsNaN(dn) (((dn)->bits&(DECNAN|DECSNAN))!=0) + #define decNumberIsNegative(dn) (((dn)->bits&DECNEG)!=0) + #define decNumberIsQNaN(dn) (((dn)->bits&(DECNAN))!=0) + #define decNumberIsSNaN(dn) (((dn)->bits&(DECSNAN))!=0) + #define decNumberIsSpecial(dn) (((dn)->bits&DECSPECIAL)!=0) + #define decNumberIsZero(dn) (*(dn)->lsu==0 \ + && (dn)->digits==1 \ + && (((dn)->bits&DECSPECIAL)==0)) + #define decNumberRadix(dn) (10) + +#endif diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h new file mode 100644 index 000000000..4d53c077f --- /dev/null +++ b/include/libdecnumber/decNumberLocal.h @@ -0,0 +1,663 @@ +/* Local definitions for the decNumber C Library. + Copyright (C) 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* decNumber package local type, tuning, and macro definitions */ +/* ------------------------------------------------------------------ */ +/* This header file is included by all modules in the decNumber */ +/* library, and contains local type definitions, tuning parameters, */ +/* etc. It should not need to be used by application programs. */ +/* decNumber.h or one of decDouble (etc.) must be included first. */ +/* ------------------------------------------------------------------ */ + +#ifndef DECNUMBERLOCAL_H +#define DECNUMBERLOCAL_H + + #define DECVERSION "decNumber 3.53" /* Package Version [16 max.] */ + #define DECNLAUTHOR "Mike Cowlishaw" /* Who to blame */ + + #include "libdecnumber/dconfig.h" + #include "libdecnumber/decContext.h" + + /* Conditional code flag -- set this to match hardware platform */ + /* 1=little-endian, 0=big-endian */ + #if WORDS_BIGENDIAN + #define DECLITEND 0 + #else + #define DECLITEND 1 + #endif + + /* Conditional code flag -- set this to 1 for best performance */ + #define DECUSE64 1 /* 1=use int64s, 0=int32 & smaller only */ + + /* Conditional check flags -- set these to 0 for best performance */ + #define DECCHECK 0 /* 1 to enable robust checking */ + #define DECALLOC 0 /* 1 to enable memory accounting */ + #define DECTRACE 0 /* 1 to trace certain internals, etc. */ + + /* Tuning parameter for decNumber (arbitrary precision) module */ + #define DECBUFFER 36 /* Size basis for local buffers. This */ + /* should be a common maximum precision */ + /* rounded up to a multiple of 4; must */ + /* be zero or positive. */ + + /* ---------------------------------------------------------------- */ + /* Definitions for all modules (general-purpose) */ + /* ---------------------------------------------------------------- */ + + /* Local names for common types -- for safety, decNumber modules do */ + /* not use int or long directly. */ + #define Flag uint8_t + #define Byte int8_t + #define uByte uint8_t + #define Short int16_t + #define uShort uint16_t + #define Int int32_t + #define uInt uint32_t + #define Unit decNumberUnit + #if DECUSE64 + #define Long int64_t + #define uLong uint64_t + #endif + + /* Development-use definitions */ + typedef long int LI; /* for printf arguments only */ + #define DECNOINT 0 /* 1 to check no internal use of 'int' */ + #if DECNOINT + /* if these interfere with your C includes, do not set DECNOINT */ + #define int ? /* enable to ensure that plain C 'int' */ + #define long ?? /* .. or 'long' types are not used */ + #endif + + /* Shared lookup tables */ + extern const uByte DECSTICKYTAB[10]; /* re-round digits if sticky */ + extern const uLong DECPOWERS[19]; /* powers of ten table */ + /* The following are included from decDPD.h */ + extern const uShort DPD2BIN[1024]; /* DPD -> 0-999 */ + extern const uShort BIN2DPD[1000]; /* 0-999 -> DPD */ + extern const uInt DPD2BINK[1024]; /* DPD -> 0-999000 */ + extern const uInt DPD2BINM[1024]; /* DPD -> 0-999000000 */ + extern const uByte DPD2BCD8[4096]; /* DPD -> ddd + len */ + extern const uByte BIN2BCD8[4000]; /* 0-999 -> ddd + len */ + extern const uShort BCD2DPD[2458]; /* 0-0x999 -> DPD (0x999=2457)*/ + + /* LONGMUL32HI -- set w=(u*v)>>32, where w, u, and v are uInts */ + /* (that is, sets w to be the high-order word of the 64-bit result; */ + /* the low-order word is simply u*v.) */ + /* This version is derived from Knuth via Hacker's Delight; */ + /* it seems to optimize better than some others tried */ + #define LONGMUL32HI(w, u, v) { \ + uInt u0, u1, v0, v1, w0, w1, w2, t; \ + u0=u & 0xffff; u1=u>>16; \ + v0=v & 0xffff; v1=v>>16; \ + w0=u0*v0; \ + t=u1*v0 + (w0>>16); \ + w1=t & 0xffff; w2=t>>16; \ + w1=u0*v1 + w1; \ + (w)=u1*v1 + w2 + (w1>>16);} + + /* ROUNDUP -- round an integer up to a multiple of n */ + #define ROUNDUP(i, n) ((((i)+(n)-1)/n)*n) + + /* ROUNDDOWN -- round an integer down to a multiple of n */ + #define ROUNDDOWN(i, n) (((i)/n)*n) + #define ROUNDDOWN4(i) ((i)&~3) /* special for n=4 */ + + /* References to multi-byte sequences under different sizes */ + /* Refer to a uInt from four bytes starting at a char* or uByte*, */ + /* etc. */ + #define UINTAT(b) (*((uInt *)(b))) + #define USHORTAT(b) (*((uShort *)(b))) + #define UBYTEAT(b) (*((uByte *)(b))) + + /* X10 and X100 -- multiply integer i by 10 or 100 */ + /* [shifts are usually faster than multiply; could be conditional] */ + #define X10(i) (((i)<<1)+((i)<<3)) + #define X100(i) (((i)<<2)+((i)<<5)+((i)<<6)) + + /* MAXI and MINI -- general max & min (not in ANSI) for integers */ + #define MAXI(x,y) ((x)<(y)?(y):(x)) + #define MINI(x,y) ((x)>(y)?(y):(x)) + + /* Useful constants */ + #define BILLION 1000000000 /* 10**9 */ + /* CHARMASK: 0x30303030 for ASCII/UTF8; 0xF0F0F0F0 for EBCDIC */ + #define CHARMASK ((((((((uInt)'0')<<8)+'0')<<8)+'0')<<8)+'0') + + + /* ---------------------------------------------------------------- */ + /* Definitions for arbitrary-precision modules (only valid after */ + /* decNumber.h has been included) */ + /* ---------------------------------------------------------------- */ + + /* Limits and constants */ + #define DECNUMMAXP 999999999 /* maximum precision code can handle */ + #define DECNUMMAXE 999999999 /* maximum adjusted exponent ditto */ + #define DECNUMMINE -999999999 /* minimum adjusted exponent ditto */ + #if (DECNUMMAXP != DEC_MAX_DIGITS) + #error Maximum digits mismatch + #endif + #if (DECNUMMAXE != DEC_MAX_EMAX) + #error Maximum exponent mismatch + #endif + #if (DECNUMMINE != DEC_MIN_EMIN) + #error Minimum exponent mismatch + #endif + + /* Set DECDPUNMAX -- the maximum integer that fits in DECDPUN */ + /* digits, and D2UTABLE -- the initializer for the D2U table */ + #if DECDPUN==1 + #define DECDPUNMAX 9 + #define D2UTABLE {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, \ + 18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, \ + 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, \ + 48,49} + #elif DECDPUN==2 + #define DECDPUNMAX 99 + #define D2UTABLE {0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10, \ + 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18, \ + 18,19,19,20,20,21,21,22,22,23,23,24,24,25} + #elif DECDPUN==3 + #define DECDPUNMAX 999 + #define D2UTABLE {0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7, \ + 8,8,8,9,9,9,10,10,10,11,11,11,12,12,12,13,13, \ + 13,14,14,14,15,15,15,16,16,16,17} + #elif DECDPUN==4 + #define DECDPUNMAX 9999 + #define D2UTABLE {0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6, \ + 6,6,6,7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,11, \ + 11,11,11,12,12,12,12,13} + #elif DECDPUN==5 + #define DECDPUNMAX 99999 + #define D2UTABLE {0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5, \ + 5,5,5,5,6,6,6,6,6,7,7,7,7,7,8,8,8,8,8,9,9,9, \ + 9,9,10,10,10,10} + #elif DECDPUN==6 + #define DECDPUNMAX 999999 + #define D2UTABLE {0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4, \ + 4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8, \ + 8,8,8,8,8,9} + #elif DECDPUN==7 + #define DECDPUNMAX 9999999 + #define D2UTABLE {0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3, \ + 4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7, \ + 7,7,7,7,7,7} + #elif DECDPUN==8 + #define DECDPUNMAX 99999999 + #define D2UTABLE {0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3, \ + 3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6, \ + 6,6,6,6,6,7} + #elif DECDPUN==9 + #define DECDPUNMAX 999999999 + #define D2UTABLE {0,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3, \ + 3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5, \ + 5,5,6,6,6,6} + #elif defined(DECDPUN) + #error DECDPUN must be in the range 1-9 + #endif + + /* ----- Shared data (in decNumber.c) ----- */ + /* Public lookup table used by the D2U macro (see below) */ + #define DECMAXD2U 49 + extern const uByte d2utable[DECMAXD2U+1]; + + /* ----- Macros ----- */ + /* ISZERO -- return true if decNumber dn is a zero */ + /* [performance-critical in some situations] */ + #define ISZERO(dn) decNumberIsZero(dn) /* now just a local name */ + + /* D2U -- return the number of Units needed to hold d digits */ + /* (runtime version, with table lookaside for small d) */ + #if DECDPUN==8 + #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+7)>>3)) + #elif DECDPUN==4 + #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+3)>>2)) + #else + #define D2U(d) ((d)<=DECMAXD2U?d2utable[d]:((d)+DECDPUN-1)/DECDPUN) + #endif + /* SD2U -- static D2U macro (for compile-time calculation) */ + #define SD2U(d) (((d)+DECDPUN-1)/DECDPUN) + + /* MSUDIGITS -- returns digits in msu, from digits, calculated */ + /* using D2U */ + #define MSUDIGITS(d) ((d)-(D2U(d)-1)*DECDPUN) + + /* D2N -- return the number of decNumber structs that would be */ + /* needed to contain that number of digits (and the initial */ + /* decNumber struct) safely. Note that one Unit is included in the */ + /* initial structure. Used for allocating space that is aligned on */ + /* a decNumber struct boundary. */ + #define D2N(d) \ + ((((SD2U(d)-1)*sizeof(Unit))+sizeof(decNumber)*2-1)/sizeof(decNumber)) + + /* TODIGIT -- macro to remove the leading digit from the unsigned */ + /* integer u at column cut (counting from the right, LSD=0) and */ + /* place it as an ASCII character into the character pointed to by */ + /* c. Note that cut must be <= 9, and the maximum value for u is */ + /* 2,000,000,000 (as is needed for negative exponents of */ + /* subnormals). The unsigned integer pow is used as a temporary */ + /* variable. */ + #define TODIGIT(u, cut, c, pow) { \ + *(c)='0'; \ + pow=DECPOWERS[cut]*2; \ + if ((u)>pow) { \ + pow*=4; \ + if ((u)>=pow) {(u)-=pow; *(c)+=8;} \ + pow/=2; \ + if ((u)>=pow) {(u)-=pow; *(c)+=4;} \ + pow/=2; \ + } \ + if ((u)>=pow) {(u)-=pow; *(c)+=2;} \ + pow/=2; \ + if ((u)>=pow) {(u)-=pow; *(c)+=1;} \ + } + + /* ---------------------------------------------------------------- */ + /* Definitions for fixed-precision modules (only valid after */ + /* decSingle.h, decDouble.h, or decQuad.h has been included) */ + /* ---------------------------------------------------------------- */ + + /* bcdnum -- a structure describing a format-independent finite */ + /* number, whose coefficient is a string of bcd8 uBytes */ + typedef struct { + uByte *msd; /* -> most significant digit */ + uByte *lsd; /* -> least ditto */ + uInt sign; /* 0=positive, DECFLOAT_Sign=negative */ + Int exponent; /* Unadjusted signed exponent (q), or */ + /* DECFLOAT_NaN etc. for a special */ + } bcdnum; + + /* Test if exponent or bcdnum exponent must be a special, etc. */ + #define EXPISSPECIAL(exp) ((exp)>=DECFLOAT_MinSp) + #define EXPISINF(exp) (exp==DECFLOAT_Inf) + #define EXPISNAN(exp) (exp==DECFLOAT_qNaN || exp==DECFLOAT_sNaN) + #define NUMISSPECIAL(num) (EXPISSPECIAL((num)->exponent)) + + /* Refer to a 32-bit word or byte in a decFloat (df) by big-endian */ + /* (array) notation (the 0 word or byte contains the sign bit), */ + /* automatically adjusting for endianness; similarly address a word */ + /* in the next-wider format (decFloatWider, or dfw) */ + #define DECWORDS (DECBYTES/4) + #define DECWWORDS (DECWBYTES/4) + #if DECLITEND + #define DFWORD(df, off) ((df)->words[DECWORDS-1-(off)]) + #define DFBYTE(df, off) ((df)->bytes[DECBYTES-1-(off)]) + #define DFWWORD(dfw, off) ((dfw)->words[DECWWORDS-1-(off)]) + #else + #define DFWORD(df, off) ((df)->words[off]) + #define DFBYTE(df, off) ((df)->bytes[off]) + #define DFWWORD(dfw, off) ((dfw)->words[off]) + #endif + + /* Tests for sign or specials, directly on DECFLOATs */ + #define DFISSIGNED(df) (DFWORD(df, 0)&0x80000000) + #define DFISSPECIAL(df) ((DFWORD(df, 0)&0x78000000)==0x78000000) + #define DFISINF(df) ((DFWORD(df, 0)&0x7c000000)==0x78000000) + #define DFISNAN(df) ((DFWORD(df, 0)&0x7c000000)==0x7c000000) + #define DFISQNAN(df) ((DFWORD(df, 0)&0x7e000000)==0x7c000000) + #define DFISSNAN(df) ((DFWORD(df, 0)&0x7e000000)==0x7e000000) + + /* Shared lookup tables */ + extern const uInt DECCOMBMSD[64]; /* Combination field -> MSD */ + extern const uInt DECCOMBFROM[48]; /* exp+msd -> Combination */ + + /* Private generic (utility) routine */ + #if DECCHECK || DECTRACE + extern void decShowNum(const bcdnum *, const char *); + #endif + + /* Format-dependent macros and constants */ + #if defined(DECPMAX) + + /* Useful constants */ + #define DECPMAX9 (ROUNDUP(DECPMAX, 9)/9) /* 'Pmax' in 10**9s */ + /* Top words for a zero */ + #define SINGLEZERO 0x22500000 + #define DOUBLEZERO 0x22380000 + #define QUADZERO 0x22080000 + /* [ZEROWORD is defined to be one of these in the DFISZERO macro] */ + + /* Format-dependent common tests: */ + /* DFISZERO -- test for (any) zero */ + /* DFISCCZERO -- test for coefficient continuation being zero */ + /* DFISCC01 -- test for coefficient contains only 0s and 1s */ + /* DFISINT -- test for finite and exponent q=0 */ + /* DFISUINT01 -- test for sign=0, finite, exponent q=0, and */ + /* MSD=0 or 1 */ + /* ZEROWORD is also defined here. */ + /* In DFISZERO the first test checks the least-significant word */ + /* (most likely to be non-zero); the penultimate tests MSD and */ + /* DPDs in the signword, and the final test excludes specials and */ + /* MSD>7. DFISINT similarly has to allow for the two forms of */ + /* MSD codes. DFISUINT01 only has to allow for one form of MSD */ + /* code. */ + #if DECPMAX==7 + #define ZEROWORD SINGLEZERO + /* [test macros not needed except for Zero] */ + #define DFISZERO(df) ((DFWORD(df, 0)&0x1c0fffff)==0 \ + && (DFWORD(df, 0)&0x60000000)!=0x60000000) + #elif DECPMAX==16 + #define ZEROWORD DOUBLEZERO + #define DFISZERO(df) ((DFWORD(df, 1)==0 \ + && (DFWORD(df, 0)&0x1c03ffff)==0 \ + && (DFWORD(df, 0)&0x60000000)!=0x60000000)) + #define DFISINT(df) ((DFWORD(df, 0)&0x63fc0000)==0x22380000 \ + ||(DFWORD(df, 0)&0x7bfc0000)==0x6a380000) + #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbfc0000)==0x22380000) + #define DFISCCZERO(df) (DFWORD(df, 1)==0 \ + && (DFWORD(df, 0)&0x0003ffff)==0) + #define DFISCC01(df) ((DFWORD(df, 0)&~0xfffc9124)==0 \ + && (DFWORD(df, 1)&~0x49124491)==0) + #elif DECPMAX==34 + #define ZEROWORD QUADZERO + #define DFISZERO(df) ((DFWORD(df, 3)==0 \ + && DFWORD(df, 2)==0 \ + && DFWORD(df, 1)==0 \ + && (DFWORD(df, 0)&0x1c003fff)==0 \ + && (DFWORD(df, 0)&0x60000000)!=0x60000000)) + #define DFISINT(df) ((DFWORD(df, 0)&0x63ffc000)==0x22080000 \ + ||(DFWORD(df, 0)&0x7bffc000)==0x6a080000) + #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbffc000)==0x22080000) + #define DFISCCZERO(df) (DFWORD(df, 3)==0 \ + && DFWORD(df, 2)==0 \ + && DFWORD(df, 1)==0 \ + && (DFWORD(df, 0)&0x00003fff)==0) + + #define DFISCC01(df) ((DFWORD(df, 0)&~0xffffc912)==0 \ + && (DFWORD(df, 1)&~0x44912449)==0 \ + && (DFWORD(df, 2)&~0x12449124)==0 \ + && (DFWORD(df, 3)&~0x49124491)==0) + #endif + + /* Macros to test if a certain 10 bits of a uInt or pair of uInts */ + /* are a canonical declet [higher or lower bits are ignored]. */ + /* declet is at offset 0 (from the right) in a uInt: */ + #define CANONDPD(dpd) (((dpd)&0x300)==0 || ((dpd)&0x6e)!=0x6e) + /* declet is at offset k (a multiple of 2) in a uInt: */ + #define CANONDPDOFF(dpd, k) (((dpd)&(0x300<<(k)))==0 \ + || ((dpd)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k))) + /* declet is at offset k (a multiple of 2) in a pair of uInts: */ + /* [the top 2 bits will always be in the more-significant uInt] */ + #define CANONDPDTWO(hi, lo, k) (((hi)&(0x300>>(32-(k))))==0 \ + || ((hi)&(0x6e>>(32-(k))))!=(0x6e>>(32-(k))) \ + || ((lo)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k))) + + /* Macro to test whether a full-length (length DECPMAX) BCD8 */ + /* coefficient is zero */ + /* test just the LSWord first, then the remainder */ + #if DECPMAX==7 + #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0 \ + && UINTAT((u)+DECPMAX-7)==0) + #elif DECPMAX==16 + #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0 \ + && (UINTAT((u)+DECPMAX-8)+UINTAT((u)+DECPMAX-12) \ + +UINTAT((u)+DECPMAX-16))==0) + #elif DECPMAX==34 + #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0 \ + && (UINTAT((u)+DECPMAX-8) +UINTAT((u)+DECPMAX-12) \ + +UINTAT((u)+DECPMAX-16)+UINTAT((u)+DECPMAX-20) \ + +UINTAT((u)+DECPMAX-24)+UINTAT((u)+DECPMAX-28) \ + +UINTAT((u)+DECPMAX-32)+USHORTAT((u)+DECPMAX-34))==0) + #endif + + /* Macros and masks for the exponent continuation field and MSD */ + /* Get the exponent continuation from a decFloat *df as an Int */ + #define GETECON(df) ((Int)((DFWORD((df), 0)&0x03ffffff)>>(32-6-DECECONL))) + /* Ditto, from the next-wider format */ + #define GETWECON(df) ((Int)((DFWWORD((df), 0)&0x03ffffff)>>(32-6-DECWECONL))) + /* Get the biased exponent similarly */ + #define GETEXP(df) ((Int)(DECCOMBEXP[DFWORD((df), 0)>>26]+GETECON(df))) + /* Get the unbiased exponent similarly */ + #define GETEXPUN(df) ((Int)GETEXP(df)-DECBIAS) + /* Get the MSD similarly (as uInt) */ + #define GETMSD(df) (DECCOMBMSD[DFWORD((df), 0)>>26]) + + /* Compile-time computes of the exponent continuation field masks */ + /* full exponent continuation field: */ + #define ECONMASK ((0x03ffffff>>(32-6-DECECONL))<<(32-6-DECECONL)) + /* same, not including its first digit (the qNaN/sNaN selector): */ + #define ECONNANMASK ((0x01ffffff>>(32-6-DECECONL))<<(32-6-DECECONL)) + + /* Macros to decode the coefficient in a finite decFloat *df into */ + /* a BCD string (uByte *bcdin) of length DECPMAX uBytes */ + + /* In-line sequence to convert 10 bits at right end of uInt dpd */ + /* to three BCD8 digits starting at uByte u. Note that an extra */ + /* byte is written to the right of the three digits because this */ + /* moves four at a time for speed; the alternative macro moves */ + /* exactly three bytes */ + #define dpd2bcd8(u, dpd) { \ + UINTAT(u)=UINTAT(&DPD2BCD8[((dpd)&0x3ff)*4]);} + + #define dpd2bcd83(u, dpd) { \ + *(u)=DPD2BCD8[((dpd)&0x3ff)*4]; \ + *(u+1)=DPD2BCD8[((dpd)&0x3ff)*4+1]; \ + *(u+2)=DPD2BCD8[((dpd)&0x3ff)*4+2];} + + /* Decode the declets. After extracting each one, it is decoded */ + /* to BCD8 using a table lookup (also used for variable-length */ + /* decode). Each DPD decode is 3 bytes BCD8 plus a one-byte */ + /* length which is not used, here). Fixed-length 4-byte moves */ + /* are fast, however, almost everywhere, and so are used except */ + /* for the final three bytes (to avoid overrun). The code below */ + /* is 36 instructions for Doubles and about 70 for Quads, even */ + /* on IA32. */ + + /* Two macros are defined for each format: */ + /* GETCOEFF extracts the coefficient of the current format */ + /* GETWCOEFF extracts the coefficient of the next-wider format. */ + /* The latter is a copy of the next-wider GETCOEFF using DFWWORD. */ + + #if DECPMAX==7 + #define GETCOEFF(df, bcd) { \ + uInt sourhi=DFWORD(df, 0); \ + *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ + dpd2bcd8(bcd+1, sourhi>>10); \ + dpd2bcd83(bcd+4, sourhi);} + #define GETWCOEFF(df, bcd) { \ + uInt sourhi=DFWWORD(df, 0); \ + uInt sourlo=DFWWORD(df, 1); \ + *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ + dpd2bcd8(bcd+1, sourhi>>8); \ + dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30)); \ + dpd2bcd8(bcd+7, sourlo>>20); \ + dpd2bcd8(bcd+10, sourlo>>10); \ + dpd2bcd83(bcd+13, sourlo);} + + #elif DECPMAX==16 + #define GETCOEFF(df, bcd) { \ + uInt sourhi=DFWORD(df, 0); \ + uInt sourlo=DFWORD(df, 1); \ + *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ + dpd2bcd8(bcd+1, sourhi>>8); \ + dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30)); \ + dpd2bcd8(bcd+7, sourlo>>20); \ + dpd2bcd8(bcd+10, sourlo>>10); \ + dpd2bcd83(bcd+13, sourlo);} + #define GETWCOEFF(df, bcd) { \ + uInt sourhi=DFWWORD(df, 0); \ + uInt sourmh=DFWWORD(df, 1); \ + uInt sourml=DFWWORD(df, 2); \ + uInt sourlo=DFWWORD(df, 3); \ + *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ + dpd2bcd8(bcd+1, sourhi>>4); \ + dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26)); \ + dpd2bcd8(bcd+7, sourmh>>16); \ + dpd2bcd8(bcd+10, sourmh>>6); \ + dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28)); \ + dpd2bcd8(bcd+16, sourml>>18); \ + dpd2bcd8(bcd+19, sourml>>8); \ + dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30)); \ + dpd2bcd8(bcd+25, sourlo>>20); \ + dpd2bcd8(bcd+28, sourlo>>10); \ + dpd2bcd83(bcd+31, sourlo);} + + #elif DECPMAX==34 + #define GETCOEFF(df, bcd) { \ + uInt sourhi=DFWORD(df, 0); \ + uInt sourmh=DFWORD(df, 1); \ + uInt sourml=DFWORD(df, 2); \ + uInt sourlo=DFWORD(df, 3); \ + *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ + dpd2bcd8(bcd+1, sourhi>>4); \ + dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26)); \ + dpd2bcd8(bcd+7, sourmh>>16); \ + dpd2bcd8(bcd+10, sourmh>>6); \ + dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28)); \ + dpd2bcd8(bcd+16, sourml>>18); \ + dpd2bcd8(bcd+19, sourml>>8); \ + dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30)); \ + dpd2bcd8(bcd+25, sourlo>>20); \ + dpd2bcd8(bcd+28, sourlo>>10); \ + dpd2bcd83(bcd+31, sourlo);} + + #define GETWCOEFF(df, bcd) {??} /* [should never be used] */ + #endif + + /* Macros to decode the coefficient in a finite decFloat *df into */ + /* a base-billion uInt array, with the least-significant */ + /* 0-999999999 'digit' at offset 0. */ + + /* Decode the declets. After extracting each one, it is decoded */ + /* to binary using a table lookup. Three tables are used; one */ + /* the usual DPD to binary, the other two pre-multiplied by 1000 */ + /* and 1000000 to avoid multiplication during decode. These */ + /* tables can also be used for multiplying up the MSD as the DPD */ + /* code for 0 through 9 is the identity. */ + #define DPD2BIN0 DPD2BIN /* for prettier code */ + + #if DECPMAX==7 + #define GETCOEFFBILL(df, buf) { \ + uInt sourhi=DFWORD(df, 0); \ + (buf)[0]=DPD2BIN0[sourhi&0x3ff] \ + +DPD2BINK[(sourhi>>10)&0x3ff] \ + +DPD2BINM[DECCOMBMSD[sourhi>>26]];} + + #elif DECPMAX==16 + #define GETCOEFFBILL(df, buf) { \ + uInt sourhi, sourlo; \ + sourlo=DFWORD(df, 1); \ + (buf)[0]=DPD2BIN0[sourlo&0x3ff] \ + +DPD2BINK[(sourlo>>10)&0x3ff] \ + +DPD2BINM[(sourlo>>20)&0x3ff]; \ + sourhi=DFWORD(df, 0); \ + (buf)[1]=DPD2BIN0[((sourhi<<2) | (sourlo>>30))&0x3ff] \ + +DPD2BINK[(sourhi>>8)&0x3ff] \ + +DPD2BINM[DECCOMBMSD[sourhi>>26]];} + + #elif DECPMAX==34 + #define GETCOEFFBILL(df, buf) { \ + uInt sourhi, sourmh, sourml, sourlo; \ + sourlo=DFWORD(df, 3); \ + (buf)[0]=DPD2BIN0[sourlo&0x3ff] \ + +DPD2BINK[(sourlo>>10)&0x3ff] \ + +DPD2BINM[(sourlo>>20)&0x3ff]; \ + sourml=DFWORD(df, 2); \ + (buf)[1]=DPD2BIN0[((sourml<<2) | (sourlo>>30))&0x3ff] \ + +DPD2BINK[(sourml>>8)&0x3ff] \ + +DPD2BINM[(sourml>>18)&0x3ff]; \ + sourmh=DFWORD(df, 1); \ + (buf)[2]=DPD2BIN0[((sourmh<<4) | (sourml>>28))&0x3ff] \ + +DPD2BINK[(sourmh>>6)&0x3ff] \ + +DPD2BINM[(sourmh>>16)&0x3ff]; \ + sourhi=DFWORD(df, 0); \ + (buf)[3]=DPD2BIN0[((sourhi<<6) | (sourmh>>26))&0x3ff] \ + +DPD2BINK[(sourhi>>4)&0x3ff] \ + +DPD2BINM[DECCOMBMSD[sourhi>>26]];} + + #endif + + /* Macros to decode the coefficient in a finite decFloat *df into */ + /* a base-thousand uInt array, with the least-significant 0-999 */ + /* 'digit' at offset 0. */ + + /* Decode the declets. After extracting each one, it is decoded */ + /* to binary using a table lookup. */ + #if DECPMAX==7 + #define GETCOEFFTHOU(df, buf) { \ + uInt sourhi=DFWORD(df, 0); \ + (buf)[0]=DPD2BIN[sourhi&0x3ff]; \ + (buf)[1]=DPD2BIN[(sourhi>>10)&0x3ff]; \ + (buf)[2]=DECCOMBMSD[sourhi>>26];} + + #elif DECPMAX==16 + #define GETCOEFFTHOU(df, buf) { \ + uInt sourhi, sourlo; \ + sourlo=DFWORD(df, 1); \ + (buf)[0]=DPD2BIN[sourlo&0x3ff]; \ + (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff]; \ + (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff]; \ + sourhi=DFWORD(df, 0); \ + (buf)[3]=DPD2BIN[((sourhi<<2) | (sourlo>>30))&0x3ff]; \ + (buf)[4]=DPD2BIN[(sourhi>>8)&0x3ff]; \ + (buf)[5]=DECCOMBMSD[sourhi>>26];} + + #elif DECPMAX==34 + #define GETCOEFFTHOU(df, buf) { \ + uInt sourhi, sourmh, sourml, sourlo; \ + sourlo=DFWORD(df, 3); \ + (buf)[0]=DPD2BIN[sourlo&0x3ff]; \ + (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff]; \ + (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff]; \ + sourml=DFWORD(df, 2); \ + (buf)[3]=DPD2BIN[((sourml<<2) | (sourlo>>30))&0x3ff]; \ + (buf)[4]=DPD2BIN[(sourml>>8)&0x3ff]; \ + (buf)[5]=DPD2BIN[(sourml>>18)&0x3ff]; \ + sourmh=DFWORD(df, 1); \ + (buf)[6]=DPD2BIN[((sourmh<<4) | (sourml>>28))&0x3ff]; \ + (buf)[7]=DPD2BIN[(sourmh>>6)&0x3ff]; \ + (buf)[8]=DPD2BIN[(sourmh>>16)&0x3ff]; \ + sourhi=DFWORD(df, 0); \ + (buf)[9]=DPD2BIN[((sourhi<<6) | (sourmh>>26))&0x3ff]; \ + (buf)[10]=DPD2BIN[(sourhi>>4)&0x3ff]; \ + (buf)[11]=DECCOMBMSD[sourhi>>26];} + + #endif + + /* Set a decFloat to the maximum positive finite number (Nmax) */ + #if DECPMAX==7 + #define DFSETNMAX(df) \ + {DFWORD(df, 0)=0x77f3fcff;} + #elif DECPMAX==16 + #define DFSETNMAX(df) \ + {DFWORD(df, 0)=0x77fcff3f; \ + DFWORD(df, 1)=0xcff3fcff;} + #elif DECPMAX==34 + #define DFSETNMAX(df) \ + {DFWORD(df, 0)=0x77ffcff3; \ + DFWORD(df, 1)=0xfcff3fcf; \ + DFWORD(df, 2)=0xf3fcff3f; \ + DFWORD(df, 3)=0xcff3fcff;} + #endif + + /* [end of format-dependent macros and constants] */ + #endif + +#endif diff --git a/include/libdecnumber/dpd/decimal128.h b/include/libdecnumber/dpd/decimal128.h new file mode 100644 index 000000000..aff261e55 --- /dev/null +++ b/include/libdecnumber/dpd/decimal128.h @@ -0,0 +1,99 @@ +/* Decimal 128-bit format module header for the decNumber C Library. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* Decimal 128-bit format module header */ +/* ------------------------------------------------------------------ */ + +#ifndef DECIMAL128_H +#define DECIMAL128_H + + #define DEC128NAME "decimal128" /* Short name */ + #define DEC128FULLNAME "Decimal 128-bit Number" /* Verbose name */ + #define DEC128AUTHOR "Mike Cowlishaw" /* Who to blame */ + + /* parameters for decimal128s */ + #define DECIMAL128_Bytes 16 /* length */ + #define DECIMAL128_Pmax 34 /* maximum precision (digits) */ + #define DECIMAL128_Emax 6144 /* maximum adjusted exponent */ + #define DECIMAL128_Emin -6143 /* minimum adjusted exponent */ + #define DECIMAL128_Bias 6176 /* bias for the exponent */ + #define DECIMAL128_String 43 /* maximum string length, +1 */ + #define DECIMAL128_EconL 12 /* exp. continuation length */ + /* highest biased exponent (Elimit-1) */ + #define DECIMAL128_Ehigh (DECIMAL128_Emax+DECIMAL128_Bias-DECIMAL128_Pmax+1) + + /* check enough digits, if pre-defined */ + #if defined(DECNUMDIGITS) + #if (DECNUMDIGITS=34 for safe use + #endif + #endif + + #ifndef DECNUMDIGITS + #define DECNUMDIGITS DECIMAL128_Pmax /* size if not already defined*/ + #endif + #include "libdecnumber/decNumber.h" + + /* Decimal 128-bit type, accessible by bytes */ + typedef struct { + uint8_t bytes[DECIMAL128_Bytes]; /* decimal128: 1, 5, 12, 110 bits*/ + } decimal128; + + /* special values [top byte excluding sign bit; last two bits are */ + /* don't-care for Infinity on input, last bit don't-care for NaN] */ + #if !defined(DECIMAL_NaN) + #define DECIMAL_NaN 0x7c /* 0 11111 00 NaN */ + #define DECIMAL_sNaN 0x7e /* 0 11111 10 sNaN */ + #define DECIMAL_Inf 0x78 /* 0 11110 00 Infinity */ + #endif + + #include "decimal128Local.h" + + /* ---------------------------------------------------------------- */ + /* Routines */ + /* ---------------------------------------------------------------- */ + + + /* String conversions */ + decimal128 * decimal128FromString(decimal128 *, const char *, decContext *); + char * decimal128ToString(const decimal128 *, char *); + char * decimal128ToEngString(const decimal128 *, char *); + + /* decNumber conversions */ + decimal128 * decimal128FromNumber(decimal128 *, const decNumber *, + decContext *); + decNumber * decimal128ToNumber(const decimal128 *, decNumber *); + + /* Format-dependent utilities */ + uint32_t decimal128IsCanonical(const decimal128 *); + decimal128 * decimal128Canonical(decimal128 *, const decimal128 *); + +#endif diff --git a/include/libdecnumber/dpd/decimal128Local.h b/include/libdecnumber/dpd/decimal128Local.h new file mode 100644 index 000000000..976542771 --- /dev/null +++ b/include/libdecnumber/dpd/decimal128Local.h @@ -0,0 +1,47 @@ +/* Local definitions for use with the decNumber C Library. + Copyright (C) 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#if !defined(DECIMAL128LOCAL) + +/* The compiler needs sign manipulation functions for decimal128 which + are not part of the decNumber package. */ + +/* Set sign; this assumes the sign was previously zero. */ +#define decimal128SetSign(d,b) \ + { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] |= ((unsigned) (b) << 7); } + +/* Clear sign. */ +#define decimal128ClearSign(d) \ + { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] &= ~0x80; } + +/* Flip sign. */ +#define decimal128FlipSign(d) \ + { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] ^= 0x80; } + +#endif diff --git a/include/libdecnumber/dpd/decimal32.h b/include/libdecnumber/dpd/decimal32.h new file mode 100644 index 000000000..6cb9e4362 --- /dev/null +++ b/include/libdecnumber/dpd/decimal32.h @@ -0,0 +1,97 @@ +/* Decimal 32-bit format module header for the decNumber C Library. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* Decimal 32-bit format module header */ +/* ------------------------------------------------------------------ */ + +#ifndef DECIMAL32_H +#define DECIMAL32_H + + #define DEC32NAME "decimal32" /* Short name */ + #define DEC32FULLNAME "Decimal 32-bit Number" /* Verbose name */ + #define DEC32AUTHOR "Mike Cowlishaw" /* Who to blame */ + + /* parameters for decimal32s */ + #define DECIMAL32_Bytes 4 /* length */ + #define DECIMAL32_Pmax 7 /* maximum precision (digits) */ + #define DECIMAL32_Emax 96 /* maximum adjusted exponent */ + #define DECIMAL32_Emin -95 /* minimum adjusted exponent */ + #define DECIMAL32_Bias 101 /* bias for the exponent */ + #define DECIMAL32_String 15 /* maximum string length, +1 */ + #define DECIMAL32_EconL 6 /* exp. continuation length */ + /* highest biased exponent (Elimit-1) */ + #define DECIMAL32_Ehigh (DECIMAL32_Emax+DECIMAL32_Bias-DECIMAL32_Pmax+1) + + /* check enough digits, if pre-defined */ + #if defined(DECNUMDIGITS) + #if (DECNUMDIGITS=7 for safe use + #endif + #endif + + #ifndef DECNUMDIGITS + #define DECNUMDIGITS DECIMAL32_Pmax /* size if not already defined*/ + #endif + #include "libdecnumber/decNumber.h" + + /* Decimal 32-bit type, accessible by bytes */ + typedef struct { + uint8_t bytes[DECIMAL32_Bytes]; /* decimal32: 1, 5, 6, 20 bits*/ + } decimal32; + + /* special values [top byte excluding sign bit; last two bits are */ + /* don't-care for Infinity on input, last bit don't-care for NaN] */ + #if !defined(DECIMAL_NaN) + #define DECIMAL_NaN 0x7c /* 0 11111 00 NaN */ + #define DECIMAL_sNaN 0x7e /* 0 11111 10 sNaN */ + #define DECIMAL_Inf 0x78 /* 0 11110 00 Infinity */ + #endif + + /* ---------------------------------------------------------------- */ + /* Routines */ + /* ---------------------------------------------------------------- */ + + + /* String conversions */ + decimal32 * decimal32FromString(decimal32 *, const char *, decContext *); + char * decimal32ToString(const decimal32 *, char *); + char * decimal32ToEngString(const decimal32 *, char *); + + /* decNumber conversions */ + decimal32 * decimal32FromNumber(decimal32 *, const decNumber *, + decContext *); + decNumber * decimal32ToNumber(const decimal32 *, decNumber *); + + /* Format-dependent utilities */ + uint32_t decimal32IsCanonical(const decimal32 *); + decimal32 * decimal32Canonical(decimal32 *, const decimal32 *); + +#endif diff --git a/include/libdecnumber/dpd/decimal64.h b/include/libdecnumber/dpd/decimal64.h new file mode 100644 index 000000000..f29e57064 --- /dev/null +++ b/include/libdecnumber/dpd/decimal64.h @@ -0,0 +1,99 @@ +/* Decimal 64-bit format module header for the decNumber C Library. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by IBM Corporation. Author Mike Cowlishaw. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + In addition to the permissions in the GNU General Public License, + the Free Software Foundation gives you unlimited permission to link + the compiled version of this file into combinations with other + programs, and to distribute those combinations without any + restriction coming from the use of this file. (The General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into a combine executable.) + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* ------------------------------------------------------------------ */ +/* Decimal 64-bit format module header */ +/* ------------------------------------------------------------------ */ + +#ifndef DECIMAL64_H +#define DECIMAL64_H + + #define DEC64NAME "decimal64" /* Short name */ + #define DEC64FULLNAME "Decimal 64-bit Number" /* Verbose name */ + #define DEC64AUTHOR "Mike Cowlishaw" /* Who to blame */ + + + /* parameters for decimal64s */ + #define DECIMAL64_Bytes 8 /* length */ + #define DECIMAL64_Pmax 16 /* maximum precision (digits) */ + #define DECIMAL64_Emax 384 /* maximum adjusted exponent */ + #define DECIMAL64_Emin -383 /* minimum adjusted exponent */ + #define DECIMAL64_Bias 398 /* bias for the exponent */ + #define DECIMAL64_String 24 /* maximum string length, +1 */ + #define DECIMAL64_EconL 8 /* exp. continuation length */ + /* highest biased exponent (Elimit-1) */ + #define DECIMAL64_Ehigh (DECIMAL64_Emax+DECIMAL64_Bias-DECIMAL64_Pmax+1) + + /* check enough digits, if pre-defined */ + #if defined(DECNUMDIGITS) + #if (DECNUMDIGITS=16 for safe use + #endif + #endif + + + #ifndef DECNUMDIGITS + #define DECNUMDIGITS DECIMAL64_Pmax /* size if not already defined*/ + #endif + #include "libdecnumber/decNumber.h" + + /* Decimal 64-bit type, accessible by bytes */ + typedef struct { + uint8_t bytes[DECIMAL64_Bytes]; /* decimal64: 1, 5, 8, 50 bits*/ + } decimal64; + + /* special values [top byte excluding sign bit; last two bits are */ + /* don't-care for Infinity on input, last bit don't-care for NaN] */ + #if !defined(DECIMAL_NaN) + #define DECIMAL_NaN 0x7c /* 0 11111 00 NaN */ + #define DECIMAL_sNaN 0x7e /* 0 11111 10 sNaN */ + #define DECIMAL_Inf 0x78 /* 0 11110 00 Infinity */ + #endif + + /* ---------------------------------------------------------------- */ + /* Routines */ + /* ---------------------------------------------------------------- */ + + + /* String conversions */ + decimal64 * decimal64FromString(decimal64 *, const char *, decContext *); + char * decimal64ToString(const decimal64 *, char *); + char * decimal64ToEngString(const decimal64 *, char *); + + /* decNumber conversions */ + decimal64 * decimal64FromNumber(decimal64 *, const decNumber *, + decContext *); + decNumber * decimal64ToNumber(const decimal64 *, decNumber *); + + /* Format-dependent utilities */ + uint32_t decimal64IsCanonical(const decimal64 *); + decimal64 * decimal64Canonical(decimal64 *, const decimal64 *); + +#endif diff --git a/include/migration/blocker.h b/include/migration/blocker.h new file mode 100644 index 000000000..acd27018e --- /dev/null +++ b/include/migration/blocker.h @@ -0,0 +1,35 @@ +/* + * QEMU migration blockers + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef MIGRATION_BLOCKER_H +#define MIGRATION_BLOCKER_H + +/** + * @migrate_add_blocker - prevent migration from proceeding + * + * @reason - an error to be returned whenever migration is attempted + * + * @errp - [out] The reason (if any) we cannot block migration right now. + * + * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set. + */ +int migrate_add_blocker(Error *reason, Error **errp); + +/** + * @migrate_del_blocker - remove a blocking error from migration + * + * @reason - the error blocking migration + */ +void migrate_del_blocker(Error *reason); + +#endif diff --git a/include/migration/colo.h b/include/migration/colo.h new file mode 100644 index 000000000..768e1f04c --- /dev/null +++ b/include/migration/colo.h @@ -0,0 +1,40 @@ +/* + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) + * (a.k.a. Fault Tolerance or Continuous Replication) + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2016 FUJITSU LIMITED + * Copyright (c) 2016 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_COLO_H +#define QEMU_COLO_H + +#include "qapi/qapi-types-migration.h" + +enum colo_event { + COLO_EVENT_NONE, + COLO_EVENT_CHECKPOINT, + COLO_EVENT_FAILOVER, +}; + +void migrate_start_colo_process(MigrationState *s); +bool migration_in_colo_state(void); + +/* loadvm */ +int migration_incoming_enable_colo(void); +void migration_incoming_disable_colo(void); +bool migration_incoming_colo_enabled(void); +void *colo_process_incoming_thread(void *opaque); +bool migration_incoming_in_colo_state(void); + +COLOMode get_colo_mode(void); + +/* failover */ +void colo_do_failover(void); + +void colo_checkpoint_notify(void *opaque); +#endif diff --git a/include/migration/cpu.h b/include/migration/cpu.h new file mode 100644 index 000000000..65abe3c8c --- /dev/null +++ b/include/migration/cpu.h @@ -0,0 +1,64 @@ +/* Declarations for use for CPU state serialization. */ + +#ifndef MIGRATION_CPU_H +#define MIGRATION_CPU_H + +#include "exec/cpu-defs.h" +#include "migration/qemu-file-types.h" +#include "migration/vmstate.h" + +#if TARGET_LONG_BITS == 64 +#define qemu_put_betl qemu_put_be64 +#define qemu_get_betl qemu_get_be64 +#define qemu_put_betls qemu_put_be64s +#define qemu_get_betls qemu_get_be64s +#define qemu_put_sbetl qemu_put_sbe64 +#define qemu_get_sbetl qemu_get_sbe64 +#define qemu_put_sbetls qemu_put_sbe64s +#define qemu_get_sbetls qemu_get_sbe64s + +#define VMSTATE_UINTTL_V(_f, _s, _v) \ + VMSTATE_UINT64_V(_f, _s, _v) +#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v) \ + VMSTATE_UINT64_EQUAL_V(_f, _s, _v) +#define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) +#define VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, _v) +#define VMSTATE_UINTTL_TEST(_f, _s, _t) \ + VMSTATE_UINT64_TEST(_f, _s, _t) +#define vmstate_info_uinttl vmstate_info_uint64 +#else +#define qemu_put_betl qemu_put_be32 +#define qemu_get_betl qemu_get_be32 +#define qemu_put_betls qemu_put_be32s +#define qemu_get_betls qemu_get_be32s +#define qemu_put_sbetl qemu_put_sbe32 +#define qemu_get_sbetl qemu_get_sbe32 +#define qemu_put_sbetls qemu_put_sbe32s +#define qemu_get_sbetls qemu_get_sbe32s + +#define VMSTATE_UINTTL_V(_f, _s, _v) \ + VMSTATE_UINT32_V(_f, _s, _v) +#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v) \ + VMSTATE_UINT32_EQUAL_V(_f, _s, _v) +#define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) +#define VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v) +#define VMSTATE_UINTTL_TEST(_f, _s, _t) \ + VMSTATE_UINT32_TEST(_f, _s, _t) +#define vmstate_info_uinttl vmstate_info_uint32 +#endif + +#define VMSTATE_UINTTL(_f, _s) \ + VMSTATE_UINTTL_V(_f, _s, 0) +#define VMSTATE_UINTTL_EQUAL(_f, _s) \ + VMSTATE_UINTTL_EQUAL_V(_f, _s, 0) +#define VMSTATE_UINTTL_ARRAY(_f, _s, _n) \ + VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, 0) +#define VMSTATE_UINTTL_2DARRAY(_f, _s, _n1, _n2) \ + VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, 0) + + +#endif diff --git a/include/migration/failover.h b/include/migration/failover.h new file mode 100644 index 000000000..475f88f53 --- /dev/null +++ b/include/migration/failover.h @@ -0,0 +1,25 @@ +/* + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) + * (a.k.a. Fault Tolerance or Continuous Replication) + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD. + * Copyright (c) 2016 FUJITSU LIMITED + * Copyright (c) 2016 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_FAILOVER_H +#define QEMU_FAILOVER_H + +#include "qapi/qapi-types-migration.h" + +void failover_init_state(void); +FailoverStatus failover_set_state(FailoverStatus old_state, + FailoverStatus new_state); +FailoverStatus failover_get_state(void); +void failover_request_active(Error **errp); +bool failover_request_is_active(void); + +#endif diff --git a/include/migration/global_state.h b/include/migration/global_state.h new file mode 100644 index 000000000..945eb35d5 --- /dev/null +++ b/include/migration/global_state.h @@ -0,0 +1,24 @@ +/* + * Global State configuration + * + * Copyright (c) 2014-2017 Red Hat Inc + * + * Authors: + * Juan Quintela + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_GLOBAL_STATE_H +#define QEMU_MIGRATION_GLOBAL_STATE_H + +#include "qapi/qapi-types-run-state.h" + +void register_global_state(void); +int global_state_store(void); +void global_state_store_running(void); +bool global_state_received(void); +RunState global_state_get_runstate(void); + +#endif diff --git a/include/migration/misc.h b/include/migration/misc.h new file mode 100644 index 000000000..34e7d7571 --- /dev/null +++ b/include/migration/misc.h @@ -0,0 +1,78 @@ +/* + * QEMU migration miscellaneus exported functions + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef MIGRATION_MISC_H +#define MIGRATION_MISC_H + +#include "qemu/notify.h" +#include "qapi/qapi-types-net.h" + +/* migration/ram.c */ + +typedef enum PrecopyNotifyReason { + PRECOPY_NOTIFY_SETUP = 0, + PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC = 1, + PRECOPY_NOTIFY_AFTER_BITMAP_SYNC = 2, + PRECOPY_NOTIFY_COMPLETE = 3, + PRECOPY_NOTIFY_CLEANUP = 4, + PRECOPY_NOTIFY_MAX = 5, +} PrecopyNotifyReason; + +typedef struct PrecopyNotifyData { + enum PrecopyNotifyReason reason; + Error **errp; +} PrecopyNotifyData; + +void precopy_infrastructure_init(void); +void precopy_add_notifier(NotifierWithReturn *n); +void precopy_remove_notifier(NotifierWithReturn *n); +int precopy_notify(PrecopyNotifyReason reason, Error **errp); +void precopy_enable_free_page_optimization(void); + +void ram_mig_init(void); +void qemu_guest_free_page_hint(void *addr, size_t len); + +/* migration/block.c */ + +#ifdef CONFIG_LIVE_BLOCK_MIGRATION +void blk_mig_init(void); +#else +static inline void blk_mig_init(void) {} +#endif + +AnnounceParameters *migrate_announce_params(void); +/* migration/savevm.c */ + +void dump_vmstate_json_to_file(FILE *out_fp); + +/* migration/migration.c */ +void migration_object_init(void); +void migration_shutdown(void); +void qemu_start_incoming_migration(const char *uri, Error **errp); +bool migration_is_idle(void); +bool migration_is_active(MigrationState *); +void add_migration_state_change_notifier(Notifier *notify); +void remove_migration_state_change_notifier(Notifier *notify); +bool migration_in_setup(MigrationState *); +bool migration_has_finished(MigrationState *); +bool migration_has_failed(MigrationState *); +/* ...and after the device transmission */ +bool migration_in_postcopy_after_devices(MigrationState *); +void migration_global_dump(Monitor *mon); +/* True if incomming migration entered POSTCOPY_INCOMING_DISCARD */ +bool migration_in_incoming_postcopy(void); + +/* migration/block-dirty-bitmap.c */ +void dirty_bitmap_mig_init(void); + +#endif diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h new file mode 100644 index 000000000..2867e3da8 --- /dev/null +++ b/include/migration/qemu-file-types.h @@ -0,0 +1,170 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRATION_QEMU_FILE_TYPES_H +#define MIGRATION_QEMU_FILE_TYPES_H + +int qemu_file_get_error(QEMUFile *f); + +void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size); +void qemu_put_byte(QEMUFile *f, int v); + +#define qemu_put_sbyte qemu_put_byte + +void qemu_put_be16(QEMUFile *f, unsigned int v); +void qemu_put_be32(QEMUFile *f, unsigned int v); +void qemu_put_be64(QEMUFile *f, uint64_t v); +size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); + +int qemu_get_byte(QEMUFile *f); + +static inline unsigned int qemu_get_ubyte(QEMUFile *f) +{ + return (unsigned int)qemu_get_byte(f); +} + +#define qemu_get_sbyte qemu_get_byte + +unsigned int qemu_get_be16(QEMUFile *f); +unsigned int qemu_get_be32(QEMUFile *f); +uint64_t qemu_get_be64(QEMUFile *f); + +static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) +{ + qemu_put_be64(f, *pv); +} + +static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv) +{ + qemu_put_be32(f, *pv); +} + +static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv) +{ + qemu_put_be16(f, *pv); +} + +static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv) +{ + qemu_put_byte(f, *pv); +} + +static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv) +{ + *pv = qemu_get_be64(f); +} + +static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv) +{ + *pv = qemu_get_be32(f); +} + +static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv) +{ + *pv = qemu_get_be16(f); +} + +static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv) +{ + *pv = qemu_get_byte(f); +} + +/* Signed versions for type safety */ +static inline void qemu_put_sbe16(QEMUFile *f, int v) +{ + qemu_put_be16(f, (unsigned int)v); +} + +static inline void qemu_put_sbe32(QEMUFile *f, int v) +{ + qemu_put_be32(f, (unsigned int)v); +} + +static inline void qemu_put_sbe64(QEMUFile *f, int64_t v) +{ + qemu_put_be64(f, (uint64_t)v); +} + +static inline int qemu_get_sbe16(QEMUFile *f) +{ + return (int)qemu_get_be16(f); +} + +static inline int qemu_get_sbe32(QEMUFile *f) +{ + return (int)qemu_get_be32(f); +} + +static inline int64_t qemu_get_sbe64(QEMUFile *f) +{ + return (int64_t)qemu_get_be64(f); +} + +static inline void qemu_put_s8s(QEMUFile *f, const int8_t *pv) +{ + qemu_put_8s(f, (const uint8_t *)pv); +} + +static inline void qemu_put_sbe16s(QEMUFile *f, const int16_t *pv) +{ + qemu_put_be16s(f, (const uint16_t *)pv); +} + +static inline void qemu_put_sbe32s(QEMUFile *f, const int32_t *pv) +{ + qemu_put_be32s(f, (const uint32_t *)pv); +} + +static inline void qemu_put_sbe64s(QEMUFile *f, const int64_t *pv) +{ + qemu_put_be64s(f, (const uint64_t *)pv); +} + +static inline void qemu_get_s8s(QEMUFile *f, int8_t *pv) +{ + qemu_get_8s(f, (uint8_t *)pv); +} + +static inline void qemu_get_sbe16s(QEMUFile *f, int16_t *pv) +{ + qemu_get_be16s(f, (uint16_t *)pv); +} + +static inline void qemu_get_sbe32s(QEMUFile *f, int32_t *pv) +{ + qemu_get_be32s(f, (uint32_t *)pv); +} + +static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) +{ + qemu_get_be64s(f, (uint64_t *)pv); +} + +size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); + +void qemu_put_counted_string(QEMUFile *f, const char *name); + +int qemu_file_rate_limit(QEMUFile *f); + +#endif diff --git a/include/migration/register.h b/include/migration/register.h new file mode 100644 index 000000000..c1dcff0f9 --- /dev/null +++ b/include/migration/register.h @@ -0,0 +1,81 @@ +/* + * QEMU migration vmstate registration + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef MIGRATION_REGISTER_H +#define MIGRATION_REGISTER_H + +#include "hw/vmstate-if.h" + +typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ + SaveStateHandler *save_state; + + void (*save_cleanup)(void *opaque); + int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); + int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); + + /* This runs both outside and inside the iothread lock. */ + bool (*is_active)(void *opaque); + bool (*has_postcopy)(void *opaque); + + /* is_active_iterate + * If it is not NULL then qemu_savevm_state_iterate will skip iteration if + * it returns false. For example, it is needed for only-postcopy-states, + * which needs to be handled by qemu_savevm_state_setup and + * qemu_savevm_state_pending, but do not need iterations until not in + * postcopy stage. + */ + bool (*is_active_iterate)(void *opaque); + + /* This runs outside the iothread lock in the migration case, and + * within the lock in the savevm case. The callback had better only + * use data that is local to the migration thread or protected + * by other locks. + */ + int (*save_live_iterate)(QEMUFile *f, void *opaque); + + /* This runs outside the iothread lock! */ + int (*save_setup)(QEMUFile *f, void *opaque); + void (*save_live_pending)(QEMUFile *f, void *opaque, + uint64_t threshold_size, + uint64_t *res_precopy_only, + uint64_t *res_compatible, + uint64_t *res_postcopy_only); + /* Note for save_live_pending: + * - res_precopy_only is for data which must be migrated in precopy phase + * or in stopped state, in other words - before target vm start + * - res_compatible is for data which may be migrated in any phase + * - res_postcopy_only is for data which must be migrated in postcopy phase + * or in stopped state, in other words - after source vm stop + * + * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the + * whole amount of pending data. + */ + + + LoadStateHandler *load_state; + int (*load_setup)(QEMUFile *f, void *opaque); + int (*load_cleanup)(void *opaque); + /* Called when postcopy migration wants to resume from failure */ + int (*resume_prepare)(MigrationState *s, void *opaque); +} SaveVMHandlers; + +int register_savevm_live(const char *idstr, + uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque); + +void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque); + +#endif diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h new file mode 100644 index 000000000..c85b6ec75 --- /dev/null +++ b/include/migration/snapshot.h @@ -0,0 +1,21 @@ +/* + * QEMU snapshots + * + * Copyright (c) 2004-2008 Fabrice Bellard + * Copyright (c) 2009-2015 Red Hat Inc + * + * Authors: + * Juan Quintela + * + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_SNAPSHOT_H +#define QEMU_MIGRATION_SNAPSHOT_H + +int save_snapshot(const char *name, Error **errp); +int load_snapshot(const char *name, Error **errp); + +#endif diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h new file mode 100644 index 000000000..4d71dc8fb --- /dev/null +++ b/include/migration/vmstate.h @@ -0,0 +1,1205 @@ +/* + * QEMU migration/snapshot declarations + * + * Copyright (c) 2009-2011 Red Hat, Inc. + * + * Original author: Juan Quintela + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_VMSTATE_H +#define QEMU_VMSTATE_H + +#include "hw/vmstate-if.h" + +typedef struct VMStateInfo VMStateInfo; +typedef struct VMStateField VMStateField; + +/* VMStateInfo allows customized migration of objects that don't fit in + * any category in VMStateFlags. Additional information is always passed + * into get and put in terms of field and vmdesc parameters. However + * these two parameters should only be used in cases when customized + * handling is needed, such as QTAILQ. For primitive data types such as + * integer, field and vmdesc parameters should be ignored inside get/put. + */ +struct VMStateInfo { + const char *name; + int (*get)(QEMUFile *f, void *pv, size_t size, const VMStateField *field); + int (*put)(QEMUFile *f, void *pv, size_t size, const VMStateField *field, + QJSON *vmdesc); +}; + +enum VMStateFlags { + /* Ignored */ + VMS_SINGLE = 0x001, + + /* The struct member at opaque + VMStateField.offset is a pointer + * to the actual field (e.g. struct a { uint8_t *b; + * }). Dereference the pointer before using it as basis for + * further pointer arithmetic (see e.g. VMS_ARRAY). Does not + * affect the meaning of VMStateField.num_offset or + * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for + * those. */ + VMS_POINTER = 0x002, + + /* The field is an array of fixed size. VMStateField.num contains + * the number of entries in the array. The size of each entry is + * given by VMStateField.size and / or opaque + + * VMStateField.size_offset; see VMS_VBUFFER and + * VMS_MULTIPLY. Each array entry will be processed individually + * (VMStateField.info.get()/put() if VMS_STRUCT is not set, + * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not + * be combined with VMS_VARRAY*. */ + VMS_ARRAY = 0x004, + + /* The field is itself a struct, containing one or more + * fields. Recurse into VMStateField.vmsd. Most useful in + * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each + * array entry. */ + VMS_STRUCT = 0x008, + + /* The field is an array of variable size. The int32_t at opaque + + * VMStateField.num_offset contains the number of entries in the + * array. See the VMS_ARRAY description regarding array handling + * in general. May not be combined with VMS_ARRAY or any other + * VMS_VARRAY*. */ + VMS_VARRAY_INT32 = 0x010, + + /* Ignored */ + VMS_BUFFER = 0x020, + + /* The field is a (fixed-size or variable-size) array of pointers + * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry + * before using it. Note: Does not imply any one of VMS_ARRAY / + * VMS_VARRAY*; these need to be set explicitly. */ + VMS_ARRAY_OF_POINTER = 0x040, + + /* The field is an array of variable size. The uint16_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT16 = 0x080, + + /* The size of the individual entries (a single array entry if + * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if + * neither is set) is variable (i.e. not known at compile-time), + * but the same for all entries. Use the int32_t at opaque + + * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine + * the size of each (and every) entry. */ + VMS_VBUFFER = 0x100, + + /* Multiply the entry size given by the int32_t at opaque + + * VMStateField.size_offset (see VMS_VBUFFER description) with + * VMStateField.size to determine the number of bytes to be + * allocated. Only valid in combination with VMS_VBUFFER. */ + VMS_MULTIPLY = 0x200, + + /* The field is an array of variable size. The uint8_t at opaque + + * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT8 = 0x400, + + /* The field is an array of variable size. The uint32_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT32 = 0x800, + + /* Fail loading the serialised VM state if this field is missing + * from the input. */ + VMS_MUST_EXIST = 0x1000, + + /* When loading serialised VM state, allocate memory for the + * (entire) field. Only valid in combination with + * VMS_POINTER. Note: Not all combinations with other flags are + * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't + * cause the individual entries to be allocated. */ + VMS_ALLOC = 0x2000, + + /* Multiply the number of entries given by the integer at opaque + + * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num + * to determine the number of entries in the array. Only valid in + * combination with one of VMS_VARRAY*. */ + VMS_MULTIPLY_ELEMENTS = 0x4000, + + /* A structure field that is like VMS_STRUCT, but uses + * VMStateField.struct_version_id to tell which version of the + * structure we are referencing to use. */ + VMS_VSTRUCT = 0x8000, +}; + +typedef enum { + MIG_PRI_DEFAULT = 0, + MIG_PRI_IOMMU, /* Must happen before PCI devices */ + MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ + MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */ + MIG_PRI_GICV3, /* Must happen before the ITS */ + MIG_PRI_MAX, +} MigrationPriority; + +struct VMStateField { + const char *name; + const char *err_hint; + size_t offset; + size_t size; + size_t start; + int num; + size_t num_offset; + size_t size_offset; + const VMStateInfo *info; + enum VMStateFlags flags; + const VMStateDescription *vmsd; + int version_id; + int struct_version_id; + bool (*field_exists)(void *opaque, int version_id); +}; + +struct VMStateDescription { + const char *name; + int unmigratable; + int version_id; + int minimum_version_id; + int minimum_version_id_old; + MigrationPriority priority; + LoadStateHandler *load_state_old; + int (*pre_load)(void *opaque); + int (*post_load)(void *opaque, int version_id); + int (*pre_save)(void *opaque); + int (*post_save)(void *opaque); + bool (*needed)(void *opaque); + bool (*dev_unplug_pending)(void *opaque); + + const VMStateField *fields; + const VMStateDescription **subsections; +}; + +extern const VMStateDescription vmstate_dummy; + +extern const VMStateInfo vmstate_info_bool; + +extern const VMStateInfo vmstate_info_int8; +extern const VMStateInfo vmstate_info_int16; +extern const VMStateInfo vmstate_info_int32; +extern const VMStateInfo vmstate_info_int64; + +extern const VMStateInfo vmstate_info_uint8_equal; +extern const VMStateInfo vmstate_info_uint16_equal; +extern const VMStateInfo vmstate_info_int32_equal; +extern const VMStateInfo vmstate_info_uint32_equal; +extern const VMStateInfo vmstate_info_uint64_equal; +extern const VMStateInfo vmstate_info_int32_le; + +extern const VMStateInfo vmstate_info_uint8; +extern const VMStateInfo vmstate_info_uint16; +extern const VMStateInfo vmstate_info_uint32; +extern const VMStateInfo vmstate_info_uint64; + +/** Put this in the stream when migrating a null pointer.*/ +#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ +extern const VMStateInfo vmstate_info_nullptr; + +extern const VMStateInfo vmstate_info_cpudouble; + +extern const VMStateInfo vmstate_info_timer; +extern const VMStateInfo vmstate_info_buffer; +extern const VMStateInfo vmstate_info_unused_buffer; +extern const VMStateInfo vmstate_info_tmp; +extern const VMStateInfo vmstate_info_bitmap; +extern const VMStateInfo vmstate_info_qtailq; +extern const VMStateInfo vmstate_info_gtree; +extern const VMStateInfo vmstate_info_qlist; + +#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) +/* + * Check that type t2 is an array of type t1 of size n, + * e.g. if t1 is 'foo' and n is 32 then t2 must be 'foo[32]' + */ +#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) +#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0) +/* + * type of element 0 of the specified (array) field of the type. + * Note that if the field is a pointer then this will return the + * pointed-to type rather than complaining. + */ +#define typeof_elt_of_field(type, field) typeof(((type *)0)->field[0]) +/* Check that field f in struct type t2 is an array of t1, of any size */ +#define type_check_varray(t1, t2, f) \ + (type_check(t1, typeof_elt_of_field(t2, f)) \ + + QEMU_BUILD_BUG_ON_ZERO(!QEMU_IS_ARRAY(((t2 *)0)->f))) + +#define vmstate_offset_value(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check(_type, typeof_field(_state, _field))) + +#define vmstate_offset_pointer(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check_pointer(_type, typeof_field(_state, _field))) + +#define vmstate_offset_array(_state, _field, _type, _num) \ + (offsetof(_state, _field) + \ + type_check_array(_type, typeof_field(_state, _field), _num)) + +#define vmstate_offset_2darray(_state, _field, _type, _n1, _n2) \ + (offsetof(_state, _field) + \ + type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2)) + +#define vmstate_offset_sub_array(_state, _field, _type, _start) \ + vmstate_offset_value(_state, _field[_start], _type) + +#define vmstate_offset_buffer(_state, _field) \ + vmstate_offset_array(_state, _field, uint8_t, \ + sizeof(typeof_field(_state, _field))) + +#define vmstate_offset_varray(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check_varray(_type, _state, _field)) + +/* In the macros below, if there is a _version, that means the macro's + * field will be processed only if the version being received is >= + * the _version specified. In general, if you add a new field, you + * would increment the structure's version and put that version + * number into the new field so it would only be processed with the + * new version. + * + * In particular, for VMSTATE_STRUCT() and friends the _version does + * *NOT* pick the version of the sub-structure. It works just as + * specified above. The version of the top-level structure received + * is passed down to all sub-structures. This means that the + * sub-structures must have version that are compatible with all the + * structures that use them. + * + * If you want to specify the version of the sub-structure, use + * VMSTATE_VSTRUCT(), which allows the specific sub-structure version + * to be directly specified. + */ + +#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = sizeof(_type), \ + .info = &(_info), \ + .flags = VMS_SINGLE, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_SINGLE_FULL(_field, _state, _test, _version, _info, \ + _type, _err_hint) { \ + .name = (stringify(_field)), \ + .err_hint = (_err_hint), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = sizeof(_type), \ + .info = &(_info), \ + .flags = VMS_SINGLE, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +/* Validate state using a boolean predicate. */ +#define VMSTATE_VALIDATE(_name, _test) { \ + .name = (_name), \ + .field_exists = (_test), \ + .flags = VMS_ARRAY | VMS_MUST_EXIST, \ + .num = 0, /* 0 elements: no data, only run _test */ \ +} + +#define VMSTATE_POINTER(_field, _state, _version, _info, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_SINGLE|VMS_POINTER, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_POINTER_TEST(_field, _state, _test, _info, _type) { \ + .name = (stringify(_field)), \ + .info = &(_info), \ + .field_exists = (_test), \ + .size = sizeof(_type), \ + .flags = VMS_SINGLE|VMS_POINTER, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_num), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY, \ + .offset = vmstate_offset_array(_state, _field, _type, _num), \ +} + +#define VMSTATE_2DARRAY(_field, _state, _n1, _n2, _version, _info, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_n1) * (_n2), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY, \ + .offset = vmstate_offset_2darray(_state, _field, _type, _n1, _n2), \ +} + +#define VMSTATE_VARRAY_MULTIPLY(_field, _state, _field_num, _multiply, _info, _type) { \ + .name = (stringify(_field)), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .num = (_multiply), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT32|VMS_MULTIPLY_ELEMENTS, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\ + .name = (stringify(_field)), \ + .field_exists = (_test), \ + .num = (_num), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY, \ + .offset = vmstate_offset_array(_state, _field, _type, _num),\ +} + +#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_num), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY, \ + .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \ +} + +#define VMSTATE_ARRAY_INT32_UNSAFE(_field, _state, _field_num, _info, _type) {\ + .name = (stringify(_field)), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_INT32, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +#define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_INT32|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_VARRAY_UINT32(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT32|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_VARRAY_UINT32_ALLOC(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT32|VMS_POINTER|VMS_ALLOC, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_VARRAY_UINT16_ALLOC(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT16 | VMS_POINTER | VMS_ALLOC, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT16, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +#define VMSTATE_VSTRUCT_TEST(_field, _state, _test, _version, _vmsd, _type, _struct_version) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .struct_version_id = (_struct_version), \ + .field_exists = (_test), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_VSTRUCT, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type *), \ + .flags = VMS_STRUCT|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_POINTER_TEST_V(_field, _state, _test, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type *), \ + .flags = VMS_STRUCT|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_ARRAY_OF_POINTER(_field, _state, _num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_num), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY|VMS_ARRAY_OF_POINTER, \ + .offset = vmstate_offset_array(_state, _field, _type, _num), \ +} + +#define VMSTATE_ARRAY_OF_POINTER_TO_STRUCT(_f, _s, _n, _v, _vmsd, _type) { \ + .name = (stringify(_f)), \ + .version_id = (_v), \ + .num = (_n), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type *), \ + .flags = VMS_ARRAY|VMS_STRUCT|VMS_ARRAY_OF_POINTER, \ + .offset = vmstate_offset_array(_s, _f, _type*, _n), \ +} + +#define VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, _num, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_num), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_ARRAY, \ + .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \ +} + +#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num = (_num), \ + .field_exists = (_test), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_ARRAY, \ + .offset = vmstate_offset_array(_state, _field, _type, _num),\ +} + +#define VMSTATE_STRUCT_2DARRAY_TEST(_field, _state, _n1, _n2, _test, \ + _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num = (_n1) * (_n2), \ + .field_exists = (_test), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT | VMS_ARRAY, \ + .offset = vmstate_offset_2darray(_state, _field, _type, \ + _n1, _n2), \ +} + +#define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_UINT8, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +/* a variable length array (i.e. _type *_field) but we know the + * length + */ +#define VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(_field, _state, _num, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num = (_num), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_ARRAY|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_STRUCT_VARRAY_POINTER_INT32(_field, _state, _field_num, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = 0, \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .size = sizeof(_type), \ + .vmsd = &(_vmsd), \ + .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = 0, \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .size = sizeof(_type), \ + .vmsd = &(_vmsd), \ + .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = 0, \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\ + .size = sizeof(_type), \ + .vmsd = &(_vmsd), \ + .flags = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_VARRAY_INT32(_field, _state, _field_num, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_INT32, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_UINT32, \ + .offset = vmstate_offset_varray(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = (_size - _start), \ + .info = &vmstate_info_buffer, \ + .flags = VMS_BUFFER, \ + .offset = vmstate_offset_buffer(_state, _field) + _start, \ +} + +#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, \ + _field_size, _multiply) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\ + .size = (_multiply), \ + .info = &vmstate_info_buffer, \ + .flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_VBUFFER(_field, _state, _version, _test, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\ + .info = &vmstate_info_buffer, \ + .flags = VMS_VBUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\ + .info = &vmstate_info_buffer, \ + .flags = VMS_VBUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_VBUFFER_ALLOC_UINT32(_field, _state, _version, \ + _test, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\ + .info = &vmstate_info_buffer, \ + .flags = VMS_VBUFFER|VMS_POINTER|VMS_ALLOC, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, _test, _version, _info, _size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = (_size), \ + .info = &(_info), \ + .flags = VMS_BUFFER, \ + .offset = offsetof(_state, _field), \ +} + +#define VMSTATE_BUFFER_POINTER_UNSAFE(_field, _state, _version, _size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .size = (_size), \ + .info = &vmstate_info_buffer, \ + .flags = VMS_BUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state + * and execute the vmsd on the temporary. Note that we're working with + * the whole of _state here, not a field within it. + * We compile time check that: + * That _tmp_type contains a 'parent' member that's a pointer to the + * '_state' type + * That the pointer is right at the start of _tmp_type. + */ +#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) { \ + .name = "tmp", \ + .size = sizeof(_tmp_type) + \ + QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ + type_check_pointer(_state, \ + typeof_field(_tmp_type, parent)), \ + .vmsd = &(_vmsd), \ + .info = &vmstate_info_tmp, \ +} + +#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \ + .name = "unused", \ + .field_exists = (_test), \ + .version_id = (_version), \ + .size = (_size), \ + .info = &vmstate_info_unused_buffer, \ + .flags = VMS_BUFFER, \ +} + +/* Discard size * field_num bytes, where field_num is a uint32 member */ +#define VMSTATE_UNUSED_VARRAY_UINT32(_state, _test, _version, _field_num, _size) {\ + .name = "unused", \ + .field_exists = (_test), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .version_id = (_version), \ + .size = (_size), \ + .info = &vmstate_info_unused_buffer, \ + .flags = VMS_VARRAY_UINT32 | VMS_BUFFER, \ +} + +/* _field_size should be a int32_t field in the _state struct giving the + * size of the bitmap _field in bits. + */ +#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\ + .info = &vmstate_info_bitmap, \ + .flags = VMS_VBUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +/* For migrating a QTAILQ. + * Target QTAILQ needs be properly initialized. + * _type: type of QTAILQ element + * _next: name of QTAILQ entry field in QTAILQ element + * _vmsd: VMSD for QTAILQ element + * size: size of QTAILQ element + * start: offset of QTAILQ entry in QTAILQ element + */ +#define VMSTATE_QTAILQ_V(_field, _state, _version, _vmsd, _type, _next) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .info = &vmstate_info_qtailq, \ + .offset = offsetof(_state, _field), \ + .start = offsetof(_type, _next), \ +} + +/* + * For migrating a GTree whose key is a pointer to _key_type and the + * value, a pointer to _val_type + * The target tree must have been properly initialized + * _vmsd: Start address of the 2 element array containing the data vmsd + * and the key vmsd, in that order + * _key_type: type of the key + * _val_type: type of the value + */ +#define VMSTATE_GTREE_V(_field, _state, _version, _vmsd, \ + _key_type, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = sizeof(_key_type), \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + +/* + * For migrating a GTree with direct key and the value a pointer + * to _val_type + * The target tree must have been properly initialized + * _vmsd: data vmsd + * _val_type: type of the value + */ +#define VMSTATE_GTREE_DIRECT_KEY_V(_field, _state, _version, _vmsd, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = 0, \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + +/* + * For migrating a QLIST + * Target QLIST needs be properly initialized. + * _type: type of QLIST element + * _next: name of QLIST_ENTRY entry field in QLIST element + * _vmsd: VMSD for QLIST element + * size: size of QLIST element + * start: offset of QLIST_ENTRY in QTAILQ element + */ +#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .info = &vmstate_info_qlist, \ + .offset = offsetof(_state, _field), \ + .start = offsetof(_type, _next), \ +} + +/* _f : field name + _f_n : num of elements field_name + _n : num of elements + _s : struct state name + _v : version +*/ + +#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ + VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) + +#define VMSTATE_VSTRUCT(_field, _state, _vmsd, _type, _struct_version)\ + VMSTATE_VSTRUCT_TEST(_field, _state, NULL, 0, _vmsd, _type, _struct_version) + +#define VMSTATE_VSTRUCT_V(_field, _state, _version, _vmsd, _type, _struct_version) \ + VMSTATE_VSTRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type, \ + _struct_version) + +#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ + VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) + +#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ + VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) + +#define VMSTATE_STRUCT_POINTER_TEST(_field, _state, _test, _vmsd, _type) \ + VMSTATE_STRUCT_POINTER_TEST_V(_field, _state, _test, 0, _vmsd, _type) + +#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ + VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, \ + _vmsd, _type) + +#define VMSTATE_STRUCT_2DARRAY(_field, _state, _n1, _n2, _version, \ + _vmsd, _type) \ + VMSTATE_STRUCT_2DARRAY_TEST(_field, _state, _n1, _n2, NULL, \ + _version, _vmsd, _type) + +#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) \ + VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, NULL, _version, _info, \ + _size) + +#define VMSTATE_BOOL_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool) + +#define VMSTATE_INT8_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t) +#define VMSTATE_INT16_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int16, int16_t) +#define VMSTATE_INT32_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int32, int32_t) +#define VMSTATE_INT64_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int64, int64_t) + +#define VMSTATE_UINT8_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, uint8_t) +#define VMSTATE_UINT16_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, uint16_t) +#define VMSTATE_UINT32_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, uint32_t) +#define VMSTATE_UINT64_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t) + +#ifdef CONFIG_LINUX + +#define VMSTATE_U8_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, __u8) +#define VMSTATE_U16_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, __u16) +#define VMSTATE_U32_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, __u32) +#define VMSTATE_U64_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, __u64) + +#endif + +#define VMSTATE_BOOL(_f, _s) \ + VMSTATE_BOOL_V(_f, _s, 0) + +#define VMSTATE_INT8(_f, _s) \ + VMSTATE_INT8_V(_f, _s, 0) +#define VMSTATE_INT16(_f, _s) \ + VMSTATE_INT16_V(_f, _s, 0) +#define VMSTATE_INT32(_f, _s) \ + VMSTATE_INT32_V(_f, _s, 0) +#define VMSTATE_INT64(_f, _s) \ + VMSTATE_INT64_V(_f, _s, 0) + +#define VMSTATE_UINT8(_f, _s) \ + VMSTATE_UINT8_V(_f, _s, 0) +#define VMSTATE_UINT16(_f, _s) \ + VMSTATE_UINT16_V(_f, _s, 0) +#define VMSTATE_UINT32(_f, _s) \ + VMSTATE_UINT32_V(_f, _s, 0) +#define VMSTATE_UINT64(_f, _s) \ + VMSTATE_UINT64_V(_f, _s, 0) + +#ifdef CONFIG_LINUX + +#define VMSTATE_U8(_f, _s) \ + VMSTATE_U8_V(_f, _s, 0) +#define VMSTATE_U16(_f, _s) \ + VMSTATE_U16_V(_f, _s, 0) +#define VMSTATE_U32(_f, _s) \ + VMSTATE_U32_V(_f, _s, 0) +#define VMSTATE_U64(_f, _s) \ + VMSTATE_U64_V(_f, _s, 0) + +#endif + +#define VMSTATE_UINT8_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_uint8_equal, uint8_t, _err_hint) + +#define VMSTATE_UINT16_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_uint16_equal, uint16_t, _err_hint) + +#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint16_equal, uint16_t, _err_hint) + +#define VMSTATE_INT32_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_int32_equal, int32_t, _err_hint) + +#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint32_equal, uint32_t, _err_hint) + +#define VMSTATE_UINT32_EQUAL(_f, _s, _err_hint) \ + VMSTATE_UINT32_EQUAL_V(_f, _s, 0, _err_hint) + +#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint64_equal, uint64_t, _err_hint) + +#define VMSTATE_UINT64_EQUAL(_f, _s, _err_hint) \ + VMSTATE_UINT64_EQUAL_V(_f, _s, 0, _err_hint) + +#define VMSTATE_INT32_POSITIVE_LE(_f, _s) \ + VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) + +#define VMSTATE_BOOL_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_bool, bool) + +#define VMSTATE_INT8_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_int8, int8_t) + +#define VMSTATE_INT16_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_int16, int16_t) + +#define VMSTATE_INT32_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_int32, int32_t) + +#define VMSTATE_INT64_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_int64, int64_t) + +#define VMSTATE_UINT8_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t) + +#define VMSTATE_UINT16_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t) + +#define VMSTATE_UINT32_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t) + +#define VMSTATE_UINT64_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t) + + +#define VMSTATE_TIMER_PTR_TEST(_f, _s, _test) \ + VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *) + +#define VMSTATE_TIMER_PTR_V(_f, _s, _v) \ + VMSTATE_POINTER(_f, _s, _v, vmstate_info_timer, QEMUTimer *) + +#define VMSTATE_TIMER_PTR(_f, _s) \ + VMSTATE_TIMER_PTR_V(_f, _s, 0) + +#define VMSTATE_TIMER_PTR_ARRAY(_f, _s, _n) \ + VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *) + +#define VMSTATE_TIMER_TEST(_f, _s, _test) \ + VMSTATE_SINGLE_TEST(_f, _s, _test, 0, vmstate_info_timer, QEMUTimer) + +#define VMSTATE_TIMER_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_timer, QEMUTimer) + +#define VMSTATE_TIMER(_f, _s) \ + VMSTATE_TIMER_V(_f, _s, 0) + +#define VMSTATE_TIMER_ARRAY(_f, _s, _n) \ + VMSTATE_ARRAY(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer) + +#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool) + +#define VMSTATE_BOOL_ARRAY(_f, _s, _n) \ + VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_BOOL_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_bool, bool) + +#define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t) + +#define VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint16, uint16_t) + +#define VMSTATE_UINT16_ARRAY(_f, _s, _n) \ + VMSTATE_UINT16_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_UINT16_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint16, uint16_t) + +#define VMSTATE_UINT16_2DARRAY(_f, _s, _n1, _n2) \ + VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, 0) + +#define VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint8, uint8_t) + +#define VMSTATE_UINT8_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint8, uint8_t) + +#define VMSTATE_UINT8_ARRAY(_f, _s, _n) \ + VMSTATE_UINT8_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_UINT8_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint8, uint8_t) + +#define VMSTATE_UINT8_2DARRAY(_f, _s, _n1, _n2) \ + VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, 0) + +#define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t) + +#define VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint32, uint32_t) + +#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \ + VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t) + +#define VMSTATE_UINT32_2DARRAY(_f, _s, _n1, _n2) \ + VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, 0) + +#define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t) + +#define VMSTATE_UINT64_ARRAY(_f, _s, _n) \ + VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_UINT64_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint64, uint64_t) + +#define VMSTATE_UINT64_2DARRAY(_f, _s, _n1, _n2) \ + VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, 0) + +#define VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, _v) \ + VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint64, uint64_t) + +#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t) + +#define VMSTATE_INT16_ARRAY(_f, _s, _n) \ + VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_INT32_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int32, int32_t) + +#define VMSTATE_INT32_ARRAY(_f, _s, _n) \ + VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t) + +#define VMSTATE_INT64_ARRAY(_f, _s, _n) \ + VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_CPUDOUBLE_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_cpudouble, CPU_DoubleU) + +#define VMSTATE_CPUDOUBLE_ARRAY(_f, _s, _n) \ + VMSTATE_CPUDOUBLE_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_BUFFER_V(_f, _s, _v) \ + VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) + +#define VMSTATE_BUFFER(_f, _s) \ + VMSTATE_BUFFER_V(_f, _s, 0) + +#define VMSTATE_PARTIAL_BUFFER(_f, _s, _size) \ + VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, 0, _size) + +#define VMSTATE_BUFFER_START_MIDDLE_V(_f, _s, _start, _v) \ + VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, _start, sizeof(typeof_field(_s, _f))) + +#define VMSTATE_BUFFER_START_MIDDLE(_f, _s, _start) \ + VMSTATE_BUFFER_START_MIDDLE_V(_f, _s, _start, 0) + +#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \ + VMSTATE_VBUFFER(_f, _s, 0, NULL, _size) + +#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \ + VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, _size) + +#define VMSTATE_BUFFER_TEST(_f, _s, _test) \ + VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f))) + +#define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \ + VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size) + +/* + * These VMSTATE_UNUSED*() macros can be used to fill in the holes + * when some of the vmstate fields are obsolete to be compatible with + * migrations between new/old binaries. + * + * CAUTION: when using any of the VMSTATE_UNUSED*() macros please be + * sure that the size passed in is the size that was actually *sent* + * rather than the size of the *structure*. One example is the + * boolean type - the size of the structure can vary depending on the + * definition of boolean, however the size we actually sent is always + * 1 byte (please refer to implementation of VMSTATE_BOOL_V and + * vmstate_info_bool). So here we should always pass in size==1 + * rather than size==sizeof(bool). + */ +#define VMSTATE_UNUSED_V(_v, _size) \ + VMSTATE_UNUSED_BUFFER(NULL, _v, _size) + +#define VMSTATE_UNUSED(_size) \ + VMSTATE_UNUSED_V(0, _size) + +#define VMSTATE_UNUSED_TEST(_test, _size) \ + VMSTATE_UNUSED_BUFFER(_test, 0, _size) + +#define VMSTATE_END_OF_LIST() \ + {} + +int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque, int version_id); +int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque, QJSON *vmdesc); +int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque, QJSON *vmdesc, int version_id); + +bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + +#define VMSTATE_INSTANCE_ID_ANY -1 + +/* Returns: 0 on success, -1 on failure */ +int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, + Error **errp); + +/* Returns: 0 on success, -1 on failure */ +static inline int vmstate_register(VMStateIf *obj, int instance_id, + const VMStateDescription *vmsd, + void *opaque) +{ + return vmstate_register_with_alias_id(obj, instance_id, vmsd, + opaque, -1, 0, NULL); +} + +void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, + void *opaque); + +void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev); +void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev); +void vmstate_register_ram_global(struct MemoryRegion *memory); + +bool vmstate_check_only_migratable(const VMStateDescription *vmsd); + +#endif diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h new file mode 100644 index 000000000..60fc92722 --- /dev/null +++ b/include/monitor/hmp-target.h @@ -0,0 +1,53 @@ +/* + * QEMU monitor + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MONITOR_HMP_TARGET_H +#define MONITOR_HMP_TARGET_H + +#include "cpu.h" + +#define MD_TLONG 0 +#define MD_I32 1 + +struct MonitorDef { + const char *name; + int offset; + target_long (*get_value)(Monitor *mon, const struct MonitorDef *md, + int val); + int type; +}; + +const MonitorDef *target_monitor_defs(void); +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval); + +CPUArchState *mon_get_cpu_env(Monitor *mon); +CPUState *mon_get_cpu(Monitor *mon); + +void hmp_info_mem(Monitor *mon, const QDict *qdict); +void hmp_info_tlb(Monitor *mon, const QDict *qdict); +void hmp_mce(Monitor *mon, const QDict *qdict); +void hmp_info_local_apic(Monitor *mon, const QDict *qdict); +void hmp_info_io_apic(Monitor *mon, const QDict *qdict); + +#endif /* MONITOR_HMP_TARGET_H */ diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h new file mode 100644 index 000000000..ed2913fd1 --- /dev/null +++ b/include/monitor/hmp.h @@ -0,0 +1,137 @@ +/* + * Human Monitor Interface + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef HMP_H +#define HMP_H + +#include "qemu/readline.h" + +void hmp_handle_error(Monitor *mon, Error *err); + +void hmp_info_name(Monitor *mon, const QDict *qdict); +void hmp_info_version(Monitor *mon, const QDict *qdict); +void hmp_info_kvm(Monitor *mon, const QDict *qdict); +void hmp_info_status(Monitor *mon, const QDict *qdict); +void hmp_info_uuid(Monitor *mon, const QDict *qdict); +void hmp_info_chardev(Monitor *mon, const QDict *qdict); +void hmp_info_mice(Monitor *mon, const QDict *qdict); +void hmp_info_migrate(Monitor *mon, const QDict *qdict); +void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict); +void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict); +void hmp_info_migrate_cache_size(Monitor *mon, const QDict *qdict); +void hmp_info_cpus(Monitor *mon, const QDict *qdict); +void hmp_info_vnc(Monitor *mon, const QDict *qdict); +void hmp_info_spice(Monitor *mon, const QDict *qdict); +void hmp_info_balloon(Monitor *mon, const QDict *qdict); +void hmp_info_irq(Monitor *mon, const QDict *qdict); +void hmp_info_pic(Monitor *mon, const QDict *qdict); +void hmp_info_rdma(Monitor *mon, const QDict *qdict); +void hmp_info_pci(Monitor *mon, const QDict *qdict); +void hmp_info_tpm(Monitor *mon, const QDict *qdict); +void hmp_info_iothreads(Monitor *mon, const QDict *qdict); +void hmp_quit(Monitor *mon, const QDict *qdict); +void hmp_stop(Monitor *mon, const QDict *qdict); +void hmp_sync_profile(Monitor *mon, const QDict *qdict); +void hmp_system_reset(Monitor *mon, const QDict *qdict); +void hmp_system_powerdown(Monitor *mon, const QDict *qdict); +void hmp_exit_preconfig(Monitor *mon, const QDict *qdict); +void hmp_announce_self(Monitor *mon, const QDict *qdict); +void hmp_cpu(Monitor *mon, const QDict *qdict); +void hmp_memsave(Monitor *mon, const QDict *qdict); +void hmp_pmemsave(Monitor *mon, const QDict *qdict); +void hmp_ringbuf_write(Monitor *mon, const QDict *qdict); +void hmp_ringbuf_read(Monitor *mon, const QDict *qdict); +void hmp_cont(Monitor *mon, const QDict *qdict); +void hmp_system_wakeup(Monitor *mon, const QDict *qdict); +void hmp_nmi(Monitor *mon, const QDict *qdict); +void hmp_set_link(Monitor *mon, const QDict *qdict); +void hmp_balloon(Monitor *mon, const QDict *qdict); +void hmp_loadvm(Monitor *mon, const QDict *qdict); +void hmp_savevm(Monitor *mon, const QDict *qdict); +void hmp_delvm(Monitor *mon, const QDict *qdict); +void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); +void hmp_migrate_continue(Monitor *mon, const QDict *qdict); +void hmp_migrate_incoming(Monitor *mon, const QDict *qdict); +void hmp_migrate_recover(Monitor *mon, const QDict *qdict); +void hmp_migrate_pause(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); +void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); +void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); +void hmp_set_password(Monitor *mon, const QDict *qdict); +void hmp_expire_password(Monitor *mon, const QDict *qdict); +void hmp_change(Monitor *mon, const QDict *qdict); +void hmp_migrate(Monitor *mon, const QDict *qdict); +void hmp_device_add(Monitor *mon, const QDict *qdict); +void hmp_device_del(Monitor *mon, const QDict *qdict); +void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); +void hmp_netdev_add(Monitor *mon, const QDict *qdict); +void hmp_netdev_del(Monitor *mon, const QDict *qdict); +void hmp_getfd(Monitor *mon, const QDict *qdict); +void hmp_closefd(Monitor *mon, const QDict *qdict); +void hmp_sendkey(Monitor *mon, const QDict *qdict); +void hmp_screendump(Monitor *mon, const QDict *qdict); +void hmp_chardev_add(Monitor *mon, const QDict *qdict); +void hmp_chardev_change(Monitor *mon, const QDict *qdict); +void hmp_chardev_remove(Monitor *mon, const QDict *qdict); +void hmp_chardev_send_break(Monitor *mon, const QDict *qdict); +void hmp_object_add(Monitor *mon, const QDict *qdict); +void hmp_object_del(Monitor *mon, const QDict *qdict); +void hmp_info_memdev(Monitor *mon, const QDict *qdict); +void hmp_info_numa(Monitor *mon, const QDict *qdict); +void hmp_info_memory_devices(Monitor *mon, const QDict *qdict); +void hmp_qom_list(Monitor *mon, const QDict *qdict); +void hmp_qom_get(Monitor *mon, const QDict *qdict); +void hmp_qom_set(Monitor *mon, const QDict *qdict); +void hmp_info_qom_tree(Monitor *mon, const QDict *dict); +void object_add_completion(ReadLineState *rs, int nb_args, const char *str); +void object_del_completion(ReadLineState *rs, int nb_args, const char *str); +void device_add_completion(ReadLineState *rs, int nb_args, const char *str); +void device_del_completion(ReadLineState *rs, int nb_args, const char *str); +void sendkey_completion(ReadLineState *rs, int nb_args, const char *str); +void chardev_remove_completion(ReadLineState *rs, int nb_args, const char *str); +void chardev_add_completion(ReadLineState *rs, int nb_args, const char *str); +void set_link_completion(ReadLineState *rs, int nb_args, const char *str); +void netdev_add_completion(ReadLineState *rs, int nb_args, const char *str); +void netdev_del_completion(ReadLineState *rs, int nb_args, const char *str); +void ringbuf_write_completion(ReadLineState *rs, int nb_args, const char *str); +void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *str); +void trace_event_completion(ReadLineState *rs, int nb_args, const char *str); +void watchdog_action_completion(ReadLineState *rs, int nb_args, + const char *str); +void migrate_set_capability_completion(ReadLineState *rs, int nb_args, + const char *str); +void migrate_set_parameter_completion(ReadLineState *rs, int nb_args, + const char *str); +void delvm_completion(ReadLineState *rs, int nb_args, const char *str); +void loadvm_completion(ReadLineState *rs, int nb_args, const char *str); +void hmp_rocker(Monitor *mon, const QDict *qdict); +void hmp_rocker_ports(Monitor *mon, const QDict *qdict); +void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict); +void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict); +void hmp_info_dump(Monitor *mon, const QDict *qdict); +void hmp_info_ramblock(Monitor *mon, const QDict *qdict); +void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict); +void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict); +void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict); +void hmp_info_sev(Monitor *mon, const QDict *qdict); +void hmp_info_replay(Monitor *mon, const QDict *qdict); +void hmp_replay_break(Monitor *mon, const QDict *qdict); +void hmp_replay_delete_break(Monitor *mon, const QDict *qdict); +void hmp_replay_seek(Monitor *mon, const QDict *qdict); + +#endif diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h new file mode 100644 index 000000000..348bfad3d --- /dev/null +++ b/include/monitor/monitor.h @@ -0,0 +1,51 @@ +#ifndef MONITOR_H +#define MONITOR_H + +#include "block/block.h" +#include "qapi/qapi-types-misc.h" +#include "qemu/readline.h" + +typedef struct MonitorHMP MonitorHMP; +typedef struct MonitorOptions MonitorOptions; + +#define QMP_REQ_QUEUE_LEN_MAX 8 + +extern QemuOptsList qemu_mon_opts; + +Monitor *monitor_cur(void); +Monitor *monitor_set_cur(Coroutine *co, Monitor *mon); +bool monitor_cur_is_qmp(void); + +void monitor_init_globals(void); +void monitor_init_globals_core(void); +void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp); +void monitor_init_hmp(Chardev *chr, bool use_readline, Error **errp); +int monitor_init(MonitorOptions *opts, bool allow_hmp, Error **errp); +int monitor_init_opts(QemuOpts *opts, Error **errp); +void monitor_cleanup(void); + +int monitor_suspend(Monitor *mon); +void monitor_resume(Monitor *mon); + +int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp); +int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp); + +int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) + GCC_FMT_ATTR(2, 0); +int monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3); +void monitor_flush(Monitor *mon); +int monitor_set_cpu(Monitor *mon, int cpu_index); +int monitor_get_cpu_index(Monitor *mon); + +void monitor_read_command(MonitorHMP *mon, int show_prompt); +int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func, + void *opaque); + +AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, + bool has_opaque, const char *opaque, + Error **errp); +int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags); +void monitor_fdset_dup_fd_remove(int dup_fd); +int64_t monitor_fdset_dup_fd_find(int dup_fd); + +#endif /* MONITOR_H */ diff --git a/include/monitor/qdev.h b/include/monitor/qdev.h new file mode 100644 index 000000000..eaa947d73 --- /dev/null +++ b/include/monitor/qdev.h @@ -0,0 +1,14 @@ +#ifndef MONITOR_QDEV_H +#define MONITOR_QDEV_H + +/*** monitor commands ***/ + +void hmp_info_qtree(Monitor *mon, const QDict *qdict); +void hmp_info_qdm(Monitor *mon, const QDict *qdict); +void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp); + +int qdev_device_help(QemuOpts *opts); +DeviceState *qdev_device_add(QemuOpts *opts, Error **errp); +void qdev_set_id(DeviceState *dev, const char *id); + +#endif diff --git a/include/net/announce.h b/include/net/announce.h new file mode 100644 index 000000000..3d90c83c2 --- /dev/null +++ b/include/net/announce.h @@ -0,0 +1,44 @@ +/* + * Self-announce facility + * (c) 2017-2019 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NET_ANNOUNCE_H +#define QEMU_NET_ANNOUNCE_H + +#include "qapi/qapi-types-net.h" +#include "qemu/timer.h" + +struct AnnounceTimer { + QEMUTimer *tm; + AnnounceParameters params; + QEMUClockType type; + int round; +}; + +/* Returns: update the timer to the next time point */ +int64_t qemu_announce_timer_step(AnnounceTimer *timer); + +/* + * Delete the underlying timer and other data + * If 'free_named' true and the timer is a named timer, then remove + * it from the list of named timers and free the AnnounceTimer itself. + */ +void qemu_announce_timer_del(AnnounceTimer *timer, bool free_named); + +/* + * Under BQL/main thread + * Reset the timer to the given parameters/type/notifier. + */ +void qemu_announce_timer_reset(AnnounceTimer *timer, + AnnounceParameters *params, + QEMUClockType type, + QEMUTimerCB *cb, + void *opaque); + +void qemu_announce_self(AnnounceTimer *timer, AnnounceParameters *params); + +#endif diff --git a/include/net/can_emu.h b/include/net/can_emu.h new file mode 100644 index 000000000..6f9b206bb --- /dev/null +++ b/include/net/can_emu.h @@ -0,0 +1,129 @@ +/* + * CAN common CAN bus emulation support + * + * Copyright (c) 2013-2014 Jin Yang + * Copyright (c) 2014-2018 Pavel Pisa + * + * Initial development supported by Google GSoC 2013 from RTEMS project slot + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef NET_CAN_EMU_H +#define NET_CAN_EMU_H + +#include "qemu/queue.h" +#include "qom/object.h" + +/* NOTE: the following two structures is copied from . */ + +/* + * Controller Area Network Identifier structure + * + * bit 0-28 : CAN identifier (11/29 bit) + * bit 29 : error frame flag (0 = data frame, 1 = error frame) + * bit 30 : remote transmission request flag (1 = rtr frame) + * bit 31 : frame format flag (0 = standard 11 bit, 1 = extended 29 bit) + */ +typedef uint32_t qemu_canid_t; + +typedef struct qemu_can_frame { + qemu_canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ + uint8_t can_dlc; /* data length code: 0 .. 8 */ + uint8_t flags; + uint8_t data[64] QEMU_ALIGNED(8); +} qemu_can_frame; + +/* Keep defines for QEMU separate from Linux ones for now */ + +#define QEMU_CAN_EFF_FLAG 0x80000000U /* EFF/SFF is set in the MSB */ +#define QEMU_CAN_RTR_FLAG 0x40000000U /* remote transmission request */ +#define QEMU_CAN_ERR_FLAG 0x20000000U /* error message frame */ + +#define QEMU_CAN_SFF_MASK 0x000007FFU /* standard frame format (SFF) */ +#define QEMU_CAN_EFF_MASK 0x1FFFFFFFU /* extended frame format (EFF) */ + +#define QEMU_CAN_FRMF_BRS 0x01 /* bit rate switch (2nd bitrate for data) */ +#define QEMU_CAN_FRMF_ESI 0x02 /* error state ind. of transmitting node */ +#define QEMU_CAN_FRMF_TYPE_FD 0x10 /* internal bit ind. of CAN FD frame */ + +/** + * struct qemu_can_filter - CAN ID based filter in can_register(). + * @can_id: relevant bits of CAN ID which are not masked out. + * @can_mask: CAN mask (see description) + * + * Description: + * A filter matches, when + * + * & mask == can_id & mask + * + * The filter can be inverted (QEMU_CAN_INV_FILTER bit set in can_id) or it can + * filter for error message frames (QEMU_CAN_ERR_FLAG bit set in mask). + */ +typedef struct qemu_can_filter { + qemu_canid_t can_id; + qemu_canid_t can_mask; +} qemu_can_filter; + +/* QEMU_CAN_INV_FILTER can be set in qemu_can_filter.can_id */ +#define QEMU_CAN_INV_FILTER 0x20000000U + +typedef struct CanBusClientState CanBusClientState; +typedef struct CanBusState CanBusState; + +typedef struct CanBusClientInfo { + bool (*can_receive)(CanBusClientState *); + ssize_t (*receive)(CanBusClientState *, + const struct qemu_can_frame *frames, size_t frames_cnt); +} CanBusClientInfo; + +struct CanBusClientState { + CanBusClientInfo *info; + CanBusState *bus; + int link_down; + QTAILQ_ENTRY(CanBusClientState) next; + CanBusClientState *peer; + char *model; + char *name; + void (*destructor)(CanBusClientState *); + bool fd_mode; +}; + +#define TYPE_CAN_BUS "can-bus" +OBJECT_DECLARE_SIMPLE_TYPE(CanBusState, CAN_BUS) + +int can_bus_filter_match(struct qemu_can_filter *filter, qemu_canid_t can_id); + +int can_bus_insert_client(CanBusState *bus, CanBusClientState *client); + +int can_bus_remove_client(CanBusClientState *client); + +ssize_t can_bus_client_send(CanBusClientState *, + const struct qemu_can_frame *frames, + size_t frames_cnt); + +int can_bus_client_set_filters(CanBusClientState *, + const struct qemu_can_filter *filters, + size_t filters_cnt); + +uint8_t can_dlc2len(uint8_t can_dlc); + +uint8_t can_len2dlc(uint8_t len); + +#endif diff --git a/include/net/can_host.h b/include/net/can_host.h new file mode 100644 index 000000000..caab71bdd --- /dev/null +++ b/include/net/can_host.h @@ -0,0 +1,51 @@ +/* + * CAN common CAN bus emulation support + * + * Copyright (c) 2013-2014 Jin Yang + * Copyright (c) 2014-2018 Pavel Pisa + * + * Initial development supported by Google GSoC 2013 from RTEMS project slot + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef NET_CAN_HOST_H +#define NET_CAN_HOST_H + +#include "net/can_emu.h" +#include "qom/object.h" + +#define TYPE_CAN_HOST "can-host" +OBJECT_DECLARE_TYPE(CanHostState, CanHostClass, CAN_HOST) + +struct CanHostState { + Object oc; + + CanBusState *bus; + CanBusClientState bus_client; +}; + +struct CanHostClass { + ObjectClass oc; + + void (*connect)(CanHostState *ch, Error **errp); + void (*disconnect)(CanHostState *ch); +}; + +#endif diff --git a/include/net/checksum.h b/include/net/checksum.h new file mode 100644 index 000000000..05a0d273f --- /dev/null +++ b/include/net/checksum.h @@ -0,0 +1,106 @@ +/* + * IP checksumming functions. + * (c) 2008 Gerd Hoffmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QEMU_NET_CHECKSUM_H +#define QEMU_NET_CHECKSUM_H + +#include "qemu/bswap.h" +struct iovec; + +uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq); +uint16_t net_checksum_finish(uint32_t sum); +uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto, + uint8_t *addrs, uint8_t *buf); +void net_checksum_calculate(uint8_t *data, int length); + +static inline uint32_t +net_checksum_add(int len, uint8_t *buf) +{ + return net_checksum_add_cont(len, buf, 0); +} + +static inline uint16_t +net_checksum_finish_nozero(uint32_t sum) +{ + return net_checksum_finish(sum) ?: 0xFFFF; +} + +static inline uint16_t +net_raw_checksum(uint8_t *data, int length) +{ + return net_checksum_finish(net_checksum_add(length, data)); +} + +/** + * net_checksum_add_iov: scatter-gather vector checksumming + * + * @iov: input scatter-gather array + * @iov_cnt: number of array elements + * @iov_off: starting iov offset for checksumming + * @size: length of data to be checksummed + * @csum_offset: offset of the checksum chunk + */ +uint32_t net_checksum_add_iov(const struct iovec *iov, + const unsigned int iov_cnt, + uint32_t iov_off, uint32_t size, + uint32_t csum_offset); + +typedef struct toeplitz_key_st { + uint32_t leftmost_32_bits; + uint8_t *next_byte; +} net_toeplitz_key; + +static inline +void net_toeplitz_key_init(net_toeplitz_key *key, uint8_t *key_bytes) +{ + key->leftmost_32_bits = be32_to_cpu(*(uint32_t *)key_bytes); + key->next_byte = key_bytes + sizeof(uint32_t); +} + +static inline +void net_toeplitz_add(uint32_t *result, + uint8_t *input, + uint32_t len, + net_toeplitz_key *key) +{ + register uint32_t accumulator = *result; + register uint32_t leftmost_32_bits = key->leftmost_32_bits; + register uint32_t byte; + + for (byte = 0; byte < len; byte++) { + register uint8_t input_byte = input[byte]; + register uint8_t key_byte = *(key->next_byte++); + register uint8_t bit; + + for (bit = 0; bit < 8; bit++) { + if (input_byte & (1 << 7)) { + accumulator ^= leftmost_32_bits; + } + + leftmost_32_bits = + (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7); + + input_byte <<= 1; + key_byte <<= 1; + } + } + + key->leftmost_32_bits = leftmost_32_bits; + *result = accumulator; +} + +#endif /* QEMU_NET_CHECKSUM_H */ diff --git a/include/net/eth.h b/include/net/eth.h new file mode 100644 index 000000000..0671be691 --- /dev/null +++ b/include/net/eth.h @@ -0,0 +1,425 @@ +/* + * QEMU network structures definitions and helper functions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Portions developed by Free Software Foundation, Inc + * Copyright (C) 1991-1997, 2001, 2003, 2006 Free Software Foundation, Inc. + * See netinet/ip6.h and netinet/in.h (GNU C Library) + * + * Portions developed by Igor Kovalenko + * Copyright (c) 2006 Igor Kovalenko + * See hw/rtl8139.c (QEMU) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_ETH_H +#define QEMU_ETH_H + +#include "qemu/bswap.h" +#include "qemu/iov.h" + +#define ETH_ALEN 6 +#define ETH_HLEN 14 + +struct eth_header { + uint8_t h_dest[ETH_ALEN]; /* destination eth addr */ + uint8_t h_source[ETH_ALEN]; /* source ether addr */ + uint16_t h_proto; /* packet type ID field */ +}; + +struct vlan_header { + uint16_t h_tci; /* priority and VLAN ID */ + uint16_t h_proto; /* encapsulated protocol */ +}; + +struct ip_header { + uint8_t ip_ver_len; /* version and header length */ + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + uint32_t ip_src, ip_dst; /* source and destination address */ +}; + +typedef struct tcp_header { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + uint32_t th_seq; /* sequence number */ + uint32_t th_ack; /* acknowledgment number */ + uint16_t th_offset_flags; /* data offset, reserved 6 bits, */ + /* TCP protocol flags */ + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +} tcp_header; + +#define TCP_FLAGS_ONLY(flags) ((flags) & 0x3f) + +#define TCP_HEADER_FLAGS(tcp) \ + TCP_FLAGS_ONLY(be16_to_cpu((tcp)->th_offset_flags)) + +#define TCP_FLAG_ACK 0x10 + +#define TCP_HEADER_DATA_OFFSET(tcp) \ + (((be16_to_cpu((tcp)->th_offset_flags) >> 12) & 0xf) << 2) + +typedef struct udp_header { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + uint16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +} udp_header; + +typedef struct ip_pseudo_header { + uint32_t ip_src; + uint32_t ip_dst; + uint8_t zeros; + uint8_t ip_proto; + uint16_t ip_payload; +} ip_pseudo_header; + +/* IPv6 address */ +struct in6_address { + union { + uint8_t __u6_addr8[16]; + } __in6_u; +}; + +struct ip6_header { + union { + struct ip6_hdrctl { + uint32_t ip6_un1_flow; /* 4 bits version, 8 bits TC, + 20 bits flow-ID */ + uint16_t ip6_un1_plen; /* payload length */ + uint8_t ip6_un1_nxt; /* next header */ + uint8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits tclass */ + struct ip6_ecn_access { + uint8_t ip6_un3_vfc; /* 4 bits version, top 4 bits tclass */ + uint8_t ip6_un3_ecn; /* 2 bits ECN, top 6 bits payload length */ + } ip6_un3; + } ip6_ctlun; + struct in6_address ip6_src; /* source address */ + struct in6_address ip6_dst; /* destination address */ +}; + +typedef struct ip6_pseudo_header { + struct in6_address ip6_src; + struct in6_address ip6_dst; + uint32_t len; + uint8_t zero[3]; + uint8_t next_hdr; +} ip6_pseudo_header; + +struct ip6_ext_hdr { + uint8_t ip6r_nxt; /* next header */ + uint8_t ip6r_len; /* length in units of 8 octets */ +}; + +struct ip6_ext_hdr_routing { + uint8_t nxt; + uint8_t len; + uint8_t rtype; + uint8_t segleft; + uint8_t rsvd[4]; +}; + +struct ip6_option_hdr { +#define IP6_OPT_PAD1 (0x00) +#define IP6_OPT_HOME (0xC9) + uint8_t type; + uint8_t len; +}; + +struct udp_hdr { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + uint16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +}; + +struct tcp_hdr { + u_short th_sport; /* source port */ + u_short th_dport; /* destination port */ + uint32_t th_seq; /* sequence number */ + uint32_t th_ack; /* acknowledgment number */ +#ifdef HOST_WORDS_BIGENDIAN + u_char th_off : 4, /* data offset */ + th_x2:4; /* (unused) */ +#else + u_char th_x2 : 4, /* (unused) */ + th_off:4; /* data offset */ +#endif + +#define TH_ELN 0x1 /* explicit loss notification */ +#define TH_ECN 0x2 /* explicit congestion notification */ +#define TH_FS 0x4 /* fast start */ + + u_char th_flags; +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 + u_short th_win; /* window */ + u_short th_sum; /* checksum */ + u_short th_urp; /* urgent pointer */ +}; + +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt +#define ip6_ecn_acc ip6_ctlun.ip6_un3.ip6_un3_ecn +#define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen + +#define PKT_GET_ETH_HDR(p) \ + ((struct eth_header *)(p)) +#define PKT_GET_VLAN_HDR(p) \ + ((struct vlan_header *) (((uint8_t *)(p)) + sizeof(struct eth_header))) +#define PKT_GET_DVLAN_HDR(p) \ + (PKT_GET_VLAN_HDR(p) + 1) +#define PKT_GET_IP_HDR(p) \ + ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) +#define IP_HDR_GET_LEN(p) \ + ((ldub_p(p + offsetof(struct ip_header, ip_ver_len)) & 0x0F) << 2) +#define IP_HDR_GET_P(p) \ + (ldub_p(p + offsetof(struct ip_header, ip_p))) +#define PKT_GET_IP_HDR_LEN(p) \ + (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p))) +#define PKT_GET_IP6_HDR(p) \ + ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) +#define IP_HEADER_VERSION(ip) \ + (((ip)->ip_ver_len >> 4) & 0xf) +#define IP4_IS_FRAGMENT(ip) \ + ((be16_to_cpu((ip)->ip_off) & (IP_OFFMASK | IP_MF)) != 0) + +#define ETH_P_IP (0x0800) /* Internet Protocol packet */ +#define ETH_P_ARP (0x0806) /* Address Resolution packet */ +#define ETH_P_IPV6 (0x86dd) +#define ETH_P_VLAN (0x8100) +#define ETH_P_DVLAN (0x88a8) +#define ETH_P_NCSI (0x88f8) +#define ETH_P_UNKNOWN (0xffff) +#define VLAN_VID_MASK 0x0fff +#define IP_HEADER_VERSION_4 (4) +#define IP_HEADER_VERSION_6 (6) +#define IP_PROTO_TCP (6) +#define IP_PROTO_UDP (17) +#define IPTOS_ECN_MASK 0x03 +#define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK) +#define IPTOS_ECN_CE 0x03 +#define IP6_ECN_MASK 0xC0 +#define IP6_ECN(x) ((x) & IP6_ECN_MASK) +#define IP6_ECN_CE 0xC0 +#define IP4_DONT_FRAGMENT_FLAG (1 << 14) + +#define IS_SPECIAL_VLAN_ID(x) \ + (((x) == 0) || ((x) == 0xFFF)) + +#define ETH_MAX_L2_HDR_LEN \ + (sizeof(struct eth_header) + 2 * sizeof(struct vlan_header)) + +#define ETH_MAX_IP4_HDR_LEN (60) +#define ETH_MAX_IP_DGRAM_LEN (0xFFFF) + +#define IP_FRAG_UNIT_SIZE (8) +#define IP_FRAG_ALIGN_SIZE(x) ((x) & ~0x7) +#define IP_RF 0x8000 /* reserved fragment flag */ +#define IP_DF 0x4000 /* don't fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + +#define IP6_EXT_GRANULARITY (8) /* Size granularity for + IPv6 extension headers */ + +/* IP6 extension header types */ +#define IP6_HOP_BY_HOP (0) +#define IP6_ROUTING (43) +#define IP6_FRAGMENT (44) +#define IP6_ESP (50) +#define IP6_AUTHENTICATION (51) +#define IP6_NONE (59) +#define IP6_DESTINATON (60) +#define IP6_MOBILITY (135) + +static inline int is_multicast_ether_addr(const uint8_t *addr) +{ + return 0x01 & addr[0]; +} + +static inline int is_broadcast_ether_addr(const uint8_t *addr) +{ + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; +} + +static inline int is_unicast_ether_addr(const uint8_t *addr) +{ + return !is_multicast_ether_addr(addr); +} + +typedef enum { + ETH_PKT_UCAST = 0xAABBCC00, + ETH_PKT_BCAST, + ETH_PKT_MCAST +} eth_pkt_types_e; + +static inline eth_pkt_types_e +get_eth_packet_type(const struct eth_header *ehdr) +{ + if (is_broadcast_ether_addr(ehdr->h_dest)) { + return ETH_PKT_BCAST; + } else if (is_multicast_ether_addr(ehdr->h_dest)) { + return ETH_PKT_MCAST; + } else { /* unicast */ + return ETH_PKT_UCAST; + } +} + +static inline uint32_t +eth_get_l2_hdr_length(const void *p) +{ + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto); + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p); + switch (proto) { + case ETH_P_VLAN: + return sizeof(struct eth_header) + sizeof(struct vlan_header); + case ETH_P_DVLAN: + if (be16_to_cpu(hvlan->h_proto) == ETH_P_VLAN) { + return sizeof(struct eth_header) + 2 * sizeof(struct vlan_header); + } else { + return sizeof(struct eth_header) + sizeof(struct vlan_header); + } + default: + return sizeof(struct eth_header); + } +} + +static inline uint32_t +eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt) +{ + uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)]; + size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p)); + + if (copied < ARRAY_SIZE(p)) { + return copied; + } + + return eth_get_l2_hdr_length(p); +} + +static inline uint16_t +eth_get_pkt_tci(const void *p) +{ + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto); + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p); + switch (proto) { + case ETH_P_VLAN: + case ETH_P_DVLAN: + return be16_to_cpu(hvlan->h_tci); + default: + return 0; + } +} + +size_t +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); + +size_t +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); + +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len); + +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new); + +static inline void +eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, + bool *is_new) +{ + eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new); +} + + +uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto); + +typedef struct eth_ip6_hdr_info_st { + uint8_t l4proto; + size_t full_hdr_len; + struct ip6_header ip6_hdr; + bool has_ext_hdrs; + bool rss_ex_src_valid; + struct in6_address rss_ex_src; + bool rss_ex_dst_valid; + struct in6_address rss_ex_dst; + bool fragment; +} eth_ip6_hdr_info; + +typedef struct eth_ip4_hdr_info_st { + struct ip_header ip4_hdr; + bool fragment; +} eth_ip4_hdr_info; + +typedef struct eth_l4_hdr_info_st { + union { + struct tcp_header tcp; + struct udp_header udp; + } hdr; + + bool has_tcp_data; +} eth_l4_hdr_info; + +void eth_get_protocols(const struct iovec *iov, int iovcnt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info); + +void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, + void *l3hdr, size_t l3hdr_len, + size_t l3payload_len, + size_t frag_offset, bool more_frags); + +void +eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len); + +uint32_t +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso); + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso); + +bool +eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info); + +#endif diff --git a/include/net/filter.h b/include/net/filter.h new file mode 100644 index 000000000..27ffc630d --- /dev/null +++ b/include/net/filter.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NET_FILTER_H +#define QEMU_NET_FILTER_H + +#include "qapi/qapi-types-net.h" +#include "qemu/queue.h" +#include "qom/object.h" +#include "net/queue.h" + +#define TYPE_NETFILTER "netfilter" +OBJECT_DECLARE_TYPE(NetFilterState, NetFilterClass, NETFILTER) + +typedef void (FilterSetup) (NetFilterState *nf, Error **errp); +typedef void (FilterCleanup) (NetFilterState *nf); +/* + * Return: + * 0: finished handling the packet, we should continue + * size: filter stolen this packet, we stop pass this packet further + */ +typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp); + +typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp); + +struct NetFilterClass { + ObjectClass parent_class; + + /* optional */ + FilterSetup *setup; + FilterCleanup *cleanup; + FilterStatusChanged *status_changed; + FilterHandleEvent *handle_event; + /* mandatory */ + FilterReceiveIOV *receive_iov; +}; + + +struct NetFilterState { + /* private */ + Object parent; + + /* protected */ + char *netdev_id; + NetClientState *netdev; + NetFilterDirection direction; + bool on; + char *position; + bool insert_before_flag; + QTAILQ_ENTRY(NetFilterState) next; +}; + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +/* pass the packet to the next filter */ +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque); + +void colo_notify_filters_event(int event, Error **errp); + +#endif /* QEMU_NET_FILTER_H */ diff --git a/include/net/net.h b/include/net/net.h new file mode 100644 index 000000000..03f058ecb --- /dev/null +++ b/include/net/net.h @@ -0,0 +1,242 @@ +#ifndef QEMU_NET_H +#define QEMU_NET_H + +#include "qemu/queue.h" +#include "qapi/qapi-types-net.h" +#include "net/queue.h" + +#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X" +#define MAC_ARG(x) ((uint8_t *)(x))[0], ((uint8_t *)(x))[1], \ + ((uint8_t *)(x))[2], ((uint8_t *)(x))[3], \ + ((uint8_t *)(x))[4], ((uint8_t *)(x))[5] + +#define MAX_QUEUE_NUM 1024 + +/* Maximum GSO packet size (64k) plus plenty of room for + * the ethernet and virtio_net headers + */ +#define NET_BUFSIZE (4096 + 65536) + +struct MACAddr { + uint8_t a[6]; +}; + +/* qdev nic properties */ + +typedef struct NICPeers { + NetClientState *ncs[MAX_QUEUE_NUM]; + int32_t queues; +} NICPeers; + +typedef struct NICConf { + MACAddr macaddr; + NICPeers peers; + int32_t bootindex; +} NICConf; + +#define DEFINE_NIC_PROPERTIES(_state, _conf) \ + DEFINE_PROP_MACADDR("mac", _state, _conf.macaddr), \ + DEFINE_PROP_NETDEV("netdev", _state, _conf.peers) + + +/* Net clients */ + +typedef void (NetPoll)(NetClientState *, bool enable); +typedef bool (NetCanReceive)(NetClientState *); +typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); +typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); +typedef void (NetCleanup) (NetClientState *); +typedef void (LinkStatusChanged)(NetClientState *); +typedef void (NetClientDestructor)(NetClientState *); +typedef RxFilterInfo *(QueryRxFilter)(NetClientState *); +typedef bool (HasUfo)(NetClientState *); +typedef bool (HasVnetHdr)(NetClientState *); +typedef bool (HasVnetHdrLen)(NetClientState *, int); +typedef void (UsingVnetHdr)(NetClientState *, bool); +typedef void (SetOffload)(NetClientState *, int, int, int, int, int); +typedef void (SetVnetHdrLen)(NetClientState *, int); +typedef int (SetVnetLE)(NetClientState *, bool); +typedef int (SetVnetBE)(NetClientState *, bool); +typedef struct SocketReadState SocketReadState; +typedef void (SocketReadStateFinalize)(SocketReadState *rs); +typedef void (NetAnnounce)(NetClientState *); + +typedef struct NetClientInfo { + NetClientDriver type; + size_t size; + NetReceive *receive; + NetReceive *receive_raw; + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; + QueryRxFilter *query_rx_filter; + NetPoll *poll; + HasUfo *has_ufo; + HasVnetHdr *has_vnet_hdr; + HasVnetHdrLen *has_vnet_hdr_len; + UsingVnetHdr *using_vnet_hdr; + SetOffload *set_offload; + SetVnetHdrLen *set_vnet_hdr_len; + SetVnetLE *set_vnet_le; + SetVnetBE *set_vnet_be; + NetAnnounce *announce; +} NetClientInfo; + +struct NetClientState { + NetClientInfo *info; + int link_down; + QTAILQ_ENTRY(NetClientState) next; + NetClientState *peer; + NetQueue *incoming_queue; + char *model; + char *name; + char info_str[256]; + unsigned receive_disabled : 1; + NetClientDestructor *destructor; + unsigned int queue_index; + unsigned rxfilter_notify_enabled:1; + int vring_enable; + int vnet_hdr_len; + bool is_netdev; + QTAILQ_HEAD(, NetFilterState) filters; +}; + +typedef struct NICState { + NetClientState *ncs; + NICConf *conf; + void *opaque; + bool peer_deleted; +} NICState; + +struct SocketReadState { + /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */ + int state; + /* This flag decide whether to read the vnet_hdr_len field */ + bool vnet_hdr; + uint32_t index; + uint32_t packet_len; + uint32_t vnet_hdr_len; + uint8_t buf[NET_BUFSIZE]; + SocketReadStateFinalize *finalize; +}; + +int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size); +char *qemu_mac_strdup_printf(const uint8_t *macaddr); +NetClientState *qemu_find_netdev(const char *id); +int qemu_find_net_clients_except(const char *id, NetClientState **ncs, + NetClientDriver type, int max); +NetClientState *qemu_new_net_client(NetClientInfo *info, + NetClientState *peer, + const char *model, + const char *name); +NICState *qemu_new_nic(NetClientInfo *info, + NICConf *conf, + const char *model, + const char *name, + void *opaque); +void qemu_del_nic(NICState *nic); +NetClientState *qemu_get_subqueue(NICState *nic, int queue_index); +NetClientState *qemu_get_queue(NICState *nic); +NICState *qemu_get_nic(NetClientState *nc); +void *qemu_get_nic_opaque(NetClientState *nc); +void qemu_del_net_client(NetClientState *nc); +typedef void (*qemu_nic_foreach)(NICState *nic, void *opaque); +void qemu_foreach_nic(qemu_nic_foreach func, void *opaque); +int qemu_can_receive_packet(NetClientState *nc); +int qemu_can_send_packet(NetClientState *nc); +ssize_t qemu_sendv_packet(NetClientState *nc, const struct iovec *iov, + int iovcnt); +ssize_t qemu_sendv_packet_async(NetClientState *nc, const struct iovec *iov, + int iovcnt, NetPacketSent *sent_cb); +ssize_t qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size); +ssize_t qemu_receive_packet(NetClientState *nc, const uint8_t *buf, int size); +ssize_t qemu_receive_packet_iov(NetClientState *nc, + const struct iovec *iov, + int iovcnt); +ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size); +ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf, + int size, NetPacketSent *sent_cb); +void qemu_purge_queued_packets(NetClientState *nc); +void qemu_flush_queued_packets(NetClientState *nc); +void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge); +void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]); +bool qemu_has_ufo(NetClientState *nc); +bool qemu_has_vnet_hdr(NetClientState *nc); +bool qemu_has_vnet_hdr_len(NetClientState *nc, int len); +void qemu_using_vnet_hdr(NetClientState *nc, bool enable); +void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, + int ecn, int ufo); +void qemu_set_vnet_hdr_len(NetClientState *nc, int len); +int qemu_set_vnet_le(NetClientState *nc, bool is_le); +int qemu_set_vnet_be(NetClientState *nc, bool is_be); +void qemu_macaddr_default_if_unset(MACAddr *macaddr); +int qemu_show_nic_models(const char *arg, const char *const *models); +void qemu_check_nic_model(NICInfo *nd, const char *model); +int qemu_find_nic_model(NICInfo *nd, const char * const *models, + const char *default_model); + +void print_net_client(Monitor *mon, NetClientState *nc); +void hmp_info_network(Monitor *mon, const QDict *qdict); +void net_socket_rs_init(SocketReadState *rs, + SocketReadStateFinalize *finalize, + bool vnet_hdr); +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index); + +/* NIC info */ + +#define MAX_NICS 8 + +struct NICInfo { + MACAddr macaddr; + char *model; + char *name; + char *devaddr; + NetClientState *netdev; + int used; /* is this slot in nd_table[] being used? */ + int instantiated; /* does this NICInfo correspond to an instantiated NIC? */ + int nvectors; +}; + +extern int nb_nics; +extern NICInfo nd_table[MAX_NICS]; +extern const char *host_net_devices[]; + +/* from net.c */ +int net_client_parse(QemuOptsList *opts_list, const char *str); +void show_netdevs(void); +int net_init_clients(Error **errp); +void net_check_clients(void); +void net_cleanup(void); +void hmp_host_net_add(Monitor *mon, const QDict *qdict); +void hmp_host_net_remove(Monitor *mon, const QDict *qdict); +void netdev_add(QemuOpts *opts, Error **errp); + +int net_hub_id_for_client(NetClientState *nc, int *id); +NetClientState *net_hub_port_find(int hub_id); + +#define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup" +#define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown" +#define DEFAULT_BRIDGE_HELPER CONFIG_QEMU_HELPERDIR "/qemu-bridge-helper" +#define DEFAULT_BRIDGE_INTERFACE "br0" + +void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd); + +#define POLYNOMIAL_BE 0x04c11db6 +#define POLYNOMIAL_LE 0xedb88320 +uint32_t net_crc32(const uint8_t *p, int len); +uint32_t net_crc32_le(const uint8_t *p, int len); + +#define vmstate_offset_macaddr(_state, _field) \ + vmstate_offset_array(_state, _field.a, uint8_t, \ + sizeof(typeof_field(_state, _field))) + +#define VMSTATE_MACADDR(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(MACAddr), \ + .info = &vmstate_info_buffer, \ + .flags = VMS_BUFFER, \ + .offset = vmstate_offset_macaddr(_state, _field), \ +} + +#endif diff --git a/include/net/queue.h b/include/net/queue.h new file mode 100644 index 000000000..9f2f289d7 --- /dev/null +++ b/include/net/queue.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_NET_QUEUE_H +#define QEMU_NET_QUEUE_H + + +typedef struct NetPacket NetPacket; +typedef struct NetQueue NetQueue; + +typedef void (NetPacketSent) (NetClientState *sender, ssize_t ret); + +#define QEMU_NET_PACKET_FLAG_NONE 0 +#define QEMU_NET_PACKET_FLAG_RAW (1<<0) + +/* Returns: + * >0 - success + * 0 - queue packet for future redelivery + * <0 - failure (discard packet) + */ +typedef ssize_t (NetQueueDeliverFunc)(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque); + +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque); + +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +void qemu_del_net_queue(NetQueue *queue); + +ssize_t qemu_net_queue_receive(NetQueue *queue, + const uint8_t *data, + size_t size); + +ssize_t qemu_net_queue_receive_iov(NetQueue *queue, + const struct iovec *iov, + int iovcnt); + +ssize_t qemu_net_queue_send(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb); + +ssize_t qemu_net_queue_send_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +void qemu_net_queue_purge(NetQueue *queue, NetClientState *from); +bool qemu_net_queue_flush(NetQueue *queue); + +#endif /* QEMU_NET_QUEUE_H */ diff --git a/include/net/slirp.h b/include/net/slirp.h new file mode 100644 index 000000000..bad3e1e24 --- /dev/null +++ b/include/net/slirp.h @@ -0,0 +1,37 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef QEMU_NET_SLIRP_H +#define QEMU_NET_SLIRP_H + + +#ifdef CONFIG_SLIRP + +void hmp_hostfwd_add(Monitor *mon, const QDict *qdict); +void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict); + +void hmp_info_usernet(Monitor *mon, const QDict *qdict); + +#endif + +#endif /* QEMU_NET_SLIRP_H */ diff --git a/include/net/tap.h b/include/net/tap.h new file mode 100644 index 000000000..5d585515f --- /dev/null +++ b/include/net/tap.h @@ -0,0 +1,39 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_NET_TAP_H +#define QEMU_NET_TAP_H + +#include "standard-headers/linux/virtio_net.h" + +int tap_enable(NetClientState *nc); +int tap_disable(NetClientState *nc); + +int tap_get_fd(NetClientState *nc); + +struct vhost_net; +struct vhost_net *tap_get_vhost_net(NetClientState *nc); + +#endif /* QEMU_NET_TAP_H */ diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h new file mode 100644 index 000000000..5bcd8a628 --- /dev/null +++ b/include/net/vhost-user.h @@ -0,0 +1,18 @@ +/* + * vhost-user.h + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_USER_H +#define VHOST_USER_H + +struct vhost_net; +struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc); +uint64_t vhost_user_get_acked_features(NetClientState *nc); + +#endif /* VHOST_USER_H */ diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h new file mode 100644 index 000000000..45e34b7cf --- /dev/null +++ b/include/net/vhost-vdpa.h @@ -0,0 +1,22 @@ +/* + * vhost-vdpa.h + * + * Copyright(c) 2017-2018 Intel Corporation. + * Copyright(c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_VDPA_H +#define VHOST_VDPA_H + +#define TYPE_VHOST_VDPA "vhost-vdpa" + +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc); +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc); + +extern const int vdpa_feature_bits[]; + +#endif /* VHOST_VDPA_H */ diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h new file mode 100644 index 000000000..172b0051d --- /dev/null +++ b/include/net/vhost_net.h @@ -0,0 +1,48 @@ +#ifndef VHOST_NET_H +#define VHOST_NET_H + +#include "net/net.h" +#include "hw/virtio/vhost-backend.h" + +#define VHOST_NET_INIT_FAILED \ + "vhost-net requested but could not be initialized" + +struct vhost_net; +typedef struct vhost_net VHostNetState; + +typedef struct VhostNetOptions { + VhostBackendType backend_type; + NetClientState *net_backend; + uint32_t busyloop_timeout; + void *opaque; +} VhostNetOptions; + +uint64_t vhost_net_get_max_queues(VHostNetState *net); +struct vhost_net *vhost_net_init(VhostNetOptions *options); + +int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues); +void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, int total_queues); + +void vhost_net_cleanup(VHostNetState *net); + +uint64_t vhost_net_get_features(VHostNetState *net, uint64_t features); +void vhost_net_ack_features(VHostNetState *net, uint64_t features); + +int vhost_net_get_config(struct vhost_net *net, uint8_t *config, + uint32_t config_len); + +int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); +bool vhost_net_virtqueue_pending(VHostNetState *net, int n); +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask); +int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); +VHostNetState *get_vhost_net(NetClientState *nc); + +int vhost_set_vring_enable(NetClientState * nc, int enable); + +uint64_t vhost_net_get_acked_features(VHostNetState *net); + +int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu); + +#endif diff --git a/include/qapi/clone-visitor.h b/include/qapi/clone-visitor.h new file mode 100644 index 000000000..adf9a788e --- /dev/null +++ b/include/qapi/clone-visitor.h @@ -0,0 +1,51 @@ +/* + * Clone Visitor + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QAPI_CLONE_VISITOR_H +#define QAPI_CLONE_VISITOR_H + +#include "qapi/visitor.h" + +/* + * The clone visitor is for direct use only by the QAPI_CLONE() macro; + * it requires that the root visit occur on an object, list, or + * alternate, and is not usable directly on built-in QAPI types. + */ +typedef struct QapiCloneVisitor QapiCloneVisitor; + +void *qapi_clone(const void *src, bool (*visit_type)(Visitor *, const char *, + void **, Error **)); +void qapi_clone_members(void *dst, const void *src, size_t sz, + bool (*visit_type_members)(Visitor *, void *, + Error **)); + +/* + * Deep-clone QAPI object @src of the given @type, and return the result. + * + * Not usable on QAPI scalars (integers, strings, enums), nor on a + * QAPI object that references the 'any' type. Safe when @src is NULL. + */ +#define QAPI_CLONE(type, src) \ + ((type *)qapi_clone(src, \ + (bool (*)(Visitor *, const char *, void **, \ + Error **))visit_type_ ## type)) + +/* + * Copy deep clones of @type members from @src to @dst. + * + * Not usable on QAPI scalars (integers, strings, enums), nor on a + * QAPI object that references the 'any' type. + */ +#define QAPI_CLONE_MEMBERS(type, dst, src) \ + qapi_clone_members(dst, src, sizeof(type), \ + (bool (*)(Visitor *, void *, \ + Error **))visit_type_ ## type ## _members) + +#endif diff --git a/include/qapi/dealloc-visitor.h b/include/qapi/dealloc-visitor.h new file mode 100644 index 000000000..c36715fdf --- /dev/null +++ b/include/qapi/dealloc-visitor.h @@ -0,0 +1,28 @@ +/* + * Dealloc Visitor + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QAPI_DEALLOC_VISITOR_H +#define QAPI_DEALLOC_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct QapiDeallocVisitor QapiDeallocVisitor; + +/* + * The dealloc visitor is primarily used only by generated + * qapi_free_FOO() functions, and is the only visitor designed to work + * correctly in the face of a partially-constructed QAPI tree. + */ +Visitor *qapi_dealloc_visitor_new(void); + +#endif diff --git a/include/qapi/error.h b/include/qapi/error.h new file mode 100644 index 000000000..eaa05c483 --- /dev/null +++ b/include/qapi/error.h @@ -0,0 +1,534 @@ +/* + * QEMU Error Objects + * + * Copyright IBM, Corp. 2011 + * Copyright (C) 2011-2015 Red Hat, Inc. + * + * Authors: + * Anthony Liguori + * Markus Armbruster + * + * This work is licensed under the terms of the GNU LGPL, version 2. See + * the COPYING.LIB file in the top-level directory. + */ + +/* + * Error reporting system loosely patterned after Glib's GError. + * + * = Rules = + * + * - Functions that use Error to report errors have an Error **errp + * parameter. It should be the last parameter, except for functions + * taking variable arguments. + * + * - You may pass NULL to not receive the error, &error_abort to abort + * on error, &error_fatal to exit(1) on error, or a pointer to a + * variable containing NULL to receive the error. + * + * - Separation of concerns: the function is responsible for detecting + * errors and failing cleanly; handling the error is its caller's + * job. Since the value of @errp is about handling the error, the + * function should not examine it. + * + * - The function may pass @errp to functions it calls to pass on + * their errors to its caller. If it dereferences @errp to check + * for errors, it must use ERRP_GUARD(). + * + * - On success, the function should not touch *errp. On failure, it + * should set a new error, e.g. with error_setg(errp, ...), or + * propagate an existing one, e.g. with error_propagate(errp, ...). + * + * - Whenever practical, also return a value that indicates success / + * failure. This can make the error checking more concise, and can + * avoid useless error object creation and destruction. Note that + * we still have many functions returning void. We recommend + * • bool-valued functions return true on success / false on failure, + * • pointer-valued functions return non-null / null pointer, and + * • integer-valued functions return non-negative / negative. + * + * = Creating errors = + * + * Create an error: + * error_setg(errp, "situation normal, all fouled up"); + * where @errp points to the location to receive the error. + * + * Create an error and add additional explanation: + * error_setg(errp, "invalid quark"); + * error_append_hint(errp, "Valid quarks are up, down, strange, " + * "charm, top, bottom.\n"); + * This may require use of ERRP_GUARD(); more on that below. + * + * Do *not* contract this to + * error_setg(errp, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * + * = Reporting and destroying errors = + * + * Report an error to the current monitor if we have one, else stderr: + * error_report_err(err); + * This frees the error object. + * + * Likewise, but with additional text prepended: + * error_reportf_err(err, "Could not frobnicate '%s': ", name); + * + * Report an error somewhere else: + * const char *msg = error_get_pretty(err); + * do with msg what needs to be done... + * error_free(err); + * Note that this loses hints added with error_append_hint(). + * + * Call a function ignoring errors: + * foo(arg, NULL); + * This is more concise than + * Error *err = NULL; + * foo(arg, &err); + * error_free(err); // don't do this + * + * Call a function aborting on errors: + * foo(arg, &error_abort); + * This is more concise and fails more nicely than + * Error *err = NULL; + * foo(arg, &err); + * assert(!err); // don't do this + * + * Call a function treating errors as fatal: + * foo(arg, &error_fatal); + * This is more concise than + * Error *err = NULL; + * foo(arg, &err); + * if (err) { // don't do this + * error_report_err(err); + * exit(1); + * } + * + * Handle an error without reporting it (just for completeness): + * error_free(err); + * + * Assert that an expected error occurred, but clean it up without + * reporting it (primarily useful in testsuites): + * error_free_or_abort(&err); + * + * = Passing errors around = + * + * Errors get passed to the caller through the conventional @errp + * parameter. + * + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); + * + * Call a function, receive an error from it, and pass it to the caller + * - when the function returns a value that indicates failure, say + * false: + * if (!foo(arg, errp)) { + * handle the error... + * } + * - when it does not, say because it is a void function: + * ERRP_GUARD(); + * foo(arg, errp); + * if (*errp) { + * handle the error... + * } + * More on ERRP_GUARD() below. + * + * Code predating ERRP_GUARD() still exists, and looks like this: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { + * handle the error... + * error_propagate(errp, err); // deprecated + * } + * Avoid in new code. Do *not* "optimize" it to + * foo(arg, errp); + * if (*errp) { // WRONG! + * handle the error... + * } + * because errp may be NULL without the ERRP_GUARD() guard. + * + * But when all you do with the error is pass it on, please use + * foo(arg, errp); + * for readability. + * + * Receive an error, and handle it locally + * - when the function returns a value that indicates failure, say + * false: + * Error *err = NULL; + * if (!foo(arg, &err)) { + * handle the error... + * } + * - when it does not, say because it is a void function: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { + * handle the error... + * } + * + * Pass an existing error to the caller: + * error_propagate(errp, err); + * This is rarely needed. When @err is a local variable, use of + * ERRP_GUARD() commonly results in more readable code. + * + * Pass an existing error to the caller with the message modified: + * error_propagate_prepend(errp, err, + * "Could not frobnicate '%s': ", name); + * This is more concise than + * error_propagate(errp, err); // don't do this + * error_prepend(errp, "Could not frobnicate '%s': ", name); + * and works even when @errp is &error_fatal. + * + * Receive and accumulate multiple errors (first one wins): + * Error *err = NULL, *local_err = NULL; + * foo(arg, &err); + * bar(arg, &local_err); + * error_propagate(&err, local_err); + * if (err) { + * handle the error... + * } + * + * Do *not* "optimize" this to + * Error *err = NULL; + * foo(arg, &err); + * bar(arg, &err); // WRONG! + * if (err) { + * handle the error... + * } + * because this may pass a non-null err to bar(). + * + * Likewise, do *not* + * Error *err = NULL; + * if (cond1) { + * error_setg(&err, ...); + * } + * if (cond2) { + * error_setg(&err, ...); // WRONG! + * } + * because this may pass a non-null err to error_setg(). + * + * = Why, when and how to use ERRP_GUARD() = + * + * Without ERRP_GUARD(), use of the @errp parameter is restricted: + * - It must not be dereferenced, because it may be null. + * - It should not be passed to error_prepend() or + * error_append_hint(), because that doesn't work with &error_fatal. + * ERRP_GUARD() lifts these restrictions. + * + * To use ERRP_GUARD(), add it right at the beginning of the function. + * @errp can then be used without worrying about the argument being + * NULL or &error_fatal. + * + * Using it when it's not needed is safe, but please avoid cluttering + * the source with useless code. + * + * = Converting to ERRP_GUARD() = + * + * To convert a function to use ERRP_GUARD(): + * + * 0. If the Error ** parameter is not named @errp, rename it to + * @errp. + * + * 1. Add an ERRP_GUARD() invocation, by convention right at the + * beginning of the function. This makes @errp safe to use. + * + * 2. Replace &err by errp, and err by *errp. Delete local variable + * @err. + * + * 3. Delete error_propagate(errp, *errp), replace + * error_propagate_prepend(errp, *errp, ...) by error_prepend(errp, ...) + * + * 4. Ensure @errp is valid at return: when you destroy *errp, set + * errp = NULL. + * + * Example: + * + * bool fn(..., Error **errp) + * { + * Error *err = NULL; + * + * foo(arg, &err); + * if (err) { + * handle the error... + * error_propagate(errp, err); + * return false; + * } + * ... + * } + * + * becomes + * + * bool fn(..., Error **errp) + * { + * ERRP_GUARD(); + * + * foo(arg, errp); + * if (*errp) { + * handle the error... + * return false; + * } + * ... + * } + * + * For mass-conversion, use scripts/coccinelle/errp-guard.cocci. + */ + +#ifndef ERROR_H +#define ERROR_H + +#include "qapi/qapi-types-error.h" + +/* + * Overall category of an error. + * Based on the qapi type QapiErrorClass, but reproduced here for nicer + * enum names. + */ +typedef enum ErrorClass { + ERROR_CLASS_GENERIC_ERROR = QAPI_ERROR_CLASS_GENERICERROR, + ERROR_CLASS_COMMAND_NOT_FOUND = QAPI_ERROR_CLASS_COMMANDNOTFOUND, + ERROR_CLASS_DEVICE_NOT_ACTIVE = QAPI_ERROR_CLASS_DEVICENOTACTIVE, + ERROR_CLASS_DEVICE_NOT_FOUND = QAPI_ERROR_CLASS_DEVICENOTFOUND, + ERROR_CLASS_KVM_MISSING_CAP = QAPI_ERROR_CLASS_KVMMISSINGCAP, +} ErrorClass; + +/* + * Get @err's human-readable error message. + */ +const char *error_get_pretty(const Error *err); + +/* + * Get @err's error class. + * Note: use of error classes other than ERROR_CLASS_GENERIC_ERROR is + * strongly discouraged. + */ +ErrorClass error_get_class(const Error *err); + +/* + * Create a new error object and assign it to *@errp. + * If @errp is NULL, the error is ignored. Don't bother creating one + * then. + * If @errp is &error_abort, print a suitable message and abort(). + * If @errp is &error_fatal, print a suitable message and exit(1). + * If @errp is anything else, *@errp must be NULL. + * The new error's class is ERROR_CLASS_GENERIC_ERROR, and its + * human-readable error message is made from printf-style @fmt, ... + * The resulting message should be a single phrase, with no newline or + * trailing punctuation. + * Please don't error_setg(&error_fatal, ...), use error_report() and + * exit(), because that's more obvious. + * Likewise, don't error_setg(&error_abort, ...), use assert(). + */ +#define error_setg(errp, fmt, ...) \ + error_setg_internal((errp), __FILE__, __LINE__, __func__, \ + (fmt), ## __VA_ARGS__) +void error_setg_internal(Error **errp, + const char *src, int line, const char *func, + const char *fmt, ...) + GCC_FMT_ATTR(5, 6); + +/* + * Just like error_setg(), with @os_error info added to the message. + * If @os_error is non-zero, ": " + strerror(os_error) is appended to + * the human-readable error message. + * + * The value of errno (which usually can get clobbered by almost any + * function call) will be preserved. + */ +#define error_setg_errno(errp, os_error, fmt, ...) \ + error_setg_errno_internal((errp), __FILE__, __LINE__, __func__, \ + (os_error), (fmt), ## __VA_ARGS__) +void error_setg_errno_internal(Error **errp, + const char *fname, int line, const char *func, + int os_error, const char *fmt, ...) + GCC_FMT_ATTR(6, 7); + +#ifdef _WIN32 +/* + * Just like error_setg(), with @win32_error info added to the message. + * If @win32_error is non-zero, ": " + g_win32_error_message(win32_err) + * is appended to the human-readable error message. + */ +#define error_setg_win32(errp, win32_err, fmt, ...) \ + error_setg_win32_internal((errp), __FILE__, __LINE__, __func__, \ + (win32_err), (fmt), ## __VA_ARGS__) +void error_setg_win32_internal(Error **errp, + const char *src, int line, const char *func, + int win32_err, const char *fmt, ...) + GCC_FMT_ATTR(6, 7); +#endif + +/* + * Propagate error object (if any) from @local_err to @dst_errp. + * If @local_err is NULL, do nothing (because there's nothing to + * propagate). + * Else, if @dst_errp is NULL, errors are being ignored. Free the + * error object. + * Else, if @dst_errp is &error_abort, print a suitable message and + * abort(). + * Else, if @dst_errp is &error_fatal, print a suitable message and + * exit(1). + * Else, if @dst_errp already contains an error, ignore this one: free + * the error object. + * Else, move the error object from @local_err to *@dst_errp. + * On return, @local_err is invalid. + * Please use ERRP_GUARD() instead when possible. + * Please don't error_propagate(&error_fatal, ...), use + * error_report_err() and exit(), because that's more obvious. + */ +void error_propagate(Error **dst_errp, Error *local_err); + + +/* + * Propagate error object (if any) with some text prepended. + * Behaves like + * error_prepend(&local_err, fmt, ...); + * error_propagate(dst_errp, local_err); + * Please use ERRP_GUARD() and error_prepend() instead when possible. + */ +void error_propagate_prepend(Error **dst_errp, Error *local_err, + const char *fmt, ...) + GCC_FMT_ATTR(3, 4); + +/* + * Prepend some text to @errp's human-readable error message. + * The text is made by formatting @fmt, @ap like vprintf(). + */ +void error_vprepend(Error *const *errp, const char *fmt, va_list ap) + GCC_FMT_ATTR(2, 0); + +/* + * Prepend some text to @errp's human-readable error message. + * The text is made by formatting @fmt, ... like printf(). + */ +void error_prepend(Error *const *errp, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/* + * Append a printf-style human-readable explanation to an existing error. + * If the error is later reported to a human user with + * error_report_err() or warn_report_err(), the hints will be shown, + * too. If it's reported via QMP, the hints will be ignored. + * Intended use is adding helpful hints on the human user interface, + * e.g. a list of valid values. It's not for clarifying a confusing + * error message. + * @errp may be NULL, but not &error_fatal or &error_abort. + * Trivially the case if you call it only after error_setg() or + * error_propagate(). + * May be called multiple times. The resulting hint should end with a + * newline. + */ +void error_append_hint(Error *const *errp, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/* + * Convenience function to report open() failure. + */ +#define error_setg_file_open(errp, os_errno, filename) \ + error_setg_file_open_internal((errp), __FILE__, __LINE__, __func__, \ + (os_errno), (filename)) +void error_setg_file_open_internal(Error **errp, + const char *src, int line, const char *func, + int os_errno, const char *filename); + +/* + * Return an exact copy of @err. + */ +Error *error_copy(const Error *err); + +/* + * Free @err. + * @err may be NULL. + */ +void error_free(Error *err); + +/* + * Convenience function to assert that *@errp is set, then silently free it. + */ +void error_free_or_abort(Error **errp); + +/* + * Convenience function to warn_report() and free @err. + * The report includes hints added with error_append_hint(). + */ +void warn_report_err(Error *err); + +/* + * Convenience function to error_report() and free @err. + * The report includes hints added with error_append_hint(). + */ +void error_report_err(Error *err); + +/* + * Convenience function to error_prepend(), warn_report() and free @err. + */ +void warn_reportf_err(Error *err, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/* + * Convenience function to error_prepend(), error_report() and free @err. + */ +void error_reportf_err(Error *err, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/* + * Just like error_setg(), except you get to specify the error class. + * Note: use of error classes other than ERROR_CLASS_GENERIC_ERROR is + * strongly discouraged. + */ +#define error_set(errp, err_class, fmt, ...) \ + error_set_internal((errp), __FILE__, __LINE__, __func__, \ + (err_class), (fmt), ## __VA_ARGS__) +void error_set_internal(Error **errp, + const char *src, int line, const char *func, + ErrorClass err_class, const char *fmt, ...) + GCC_FMT_ATTR(6, 7); + +/* + * Make @errp parameter easier to use regardless of argument value + * + * This macro is for use right at the beginning of a function that + * takes an Error **errp parameter to pass errors to its caller. The + * parameter must be named @errp. + * + * It must be used when the function dereferences @errp or passes + * @errp to error_prepend(), error_vprepend(), or error_append_hint(). + * It is safe to use even when it's not needed, but please avoid + * cluttering the source with useless code. + * + * If @errp is NULL or &error_fatal, rewrite it to point to a local + * Error variable, which will be automatically propagated to the + * original @errp on function exit. + * + * Note: &error_abort is not rewritten, because that would move the + * abort from the place where the error is created to the place where + * it's propagated. + */ +#define ERRP_GUARD() \ + g_auto(ErrorPropagator) _auto_errp_prop = {.errp = errp}; \ + do { \ + if (!errp || errp == &error_fatal) { \ + errp = &_auto_errp_prop.local_err; \ + } \ + } while (0) + +typedef struct ErrorPropagator { + Error *local_err; + Error **errp; +} ErrorPropagator; + +static inline void error_propagator_cleanup(ErrorPropagator *prop) +{ + error_propagate(prop->errp, prop->local_err); +} + +G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(ErrorPropagator, error_propagator_cleanup); + +/* + * Special error destination to abort on error. + * See error_setg() and error_propagate() for details. + */ +extern Error *error_abort; + +/* + * Special error destination to exit(1) on error. + * See error_setg() and error_propagate() for details. + */ +extern Error *error_fatal; + +#endif diff --git a/include/qapi/opts-visitor.h b/include/qapi/opts-visitor.h new file mode 100644 index 000000000..9b989e7e0 --- /dev/null +++ b/include/qapi/opts-visitor.h @@ -0,0 +1,39 @@ +/* + * Options Visitor + * + * Copyright Red Hat, Inc. 2012 + * + * Author: Laszlo Ersek + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef OPTS_VISITOR_H +#define OPTS_VISITOR_H + +#include "qapi/visitor.h" + +/* Inclusive upper bound on the size of any flattened range. This is a safety + * (= anti-annoyance) measure; wrong ranges should not cause long startup + * delays nor exhaust virtual memory. + */ +#define OPTS_VISITOR_RANGE_MAX 65536 + +typedef struct OptsVisitor OptsVisitor; + +/* Contrarily to qemu-option.c::parse_option_number(), OptsVisitor's "int" + * parser relies on strtoll() instead of strtoull(). Consequences: + * - string representations of negative numbers yield negative values, + * - values below INT64_MIN or LLONG_MIN are rejected, + * - values above INT64_MAX or LLONG_MAX are rejected. + * + * The Opts input visitor does not implement support for visiting QAPI + * alternates, numbers (other than integers), null, or arbitrary + * QTypes. It also requires a non-null list argument to + * visit_start_list(). + */ +Visitor *opts_visitor_new(const QemuOpts *opts); + +#endif diff --git a/include/qapi/qmp-event.h b/include/qapi/qmp-event.h new file mode 100644 index 000000000..b60f1d3a8 --- /dev/null +++ b/include/qapi/qmp-event.h @@ -0,0 +1,18 @@ +/* + * QMP Event related + * + * Copyright (c) 2014 Wenchao Xia + * + * Authors: + * Wenchao Xia + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QMP_EVENT_H +#define QMP_EVENT_H + +QDict *qmp_event_build_dict(const char *event_name); +#endif diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h new file mode 100644 index 000000000..af8d96c57 --- /dev/null +++ b/include/qapi/qmp/dispatch.h @@ -0,0 +1,63 @@ +/* + * Core Definitions for QAPI/QMP Dispatch + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QAPI_QMP_DISPATCH_H +#define QAPI_QMP_DISPATCH_H + +#include "monitor/monitor.h" +#include "qemu/queue.h" + +typedef void (QmpCommandFunc)(QDict *, QObject **, Error **); + +typedef enum QmpCommandOptions +{ + QCO_NO_OPTIONS = 0x0, + QCO_NO_SUCCESS_RESP = (1U << 0), + QCO_ALLOW_OOB = (1U << 1), + QCO_ALLOW_PRECONFIG = (1U << 2), + QCO_COROUTINE = (1U << 3), +} QmpCommandOptions; + +typedef struct QmpCommand +{ + const char *name; + /* Runs in coroutine context if QCO_COROUTINE is set */ + QmpCommandFunc *fn; + QmpCommandOptions options; + QTAILQ_ENTRY(QmpCommand) node; + bool enabled; +} QmpCommand; + +typedef QTAILQ_HEAD(QmpCommandList, QmpCommand) QmpCommandList; + +void qmp_register_command(QmpCommandList *cmds, const char *name, + QmpCommandFunc *fn, QmpCommandOptions options); +const QmpCommand *qmp_find_command(const QmpCommandList *cmds, + const char *name); +void qmp_disable_command(QmpCommandList *cmds, const char *name); +void qmp_enable_command(QmpCommandList *cmds, const char *name); + +bool qmp_command_is_enabled(const QmpCommand *cmd); +const char *qmp_command_name(const QmpCommand *cmd); +bool qmp_has_success_response(const QmpCommand *cmd); +QDict *qmp_error_response(Error *err); +QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request, + bool allow_oob, Monitor *cur_mon); +bool qmp_is_oob(const QDict *dict); + +typedef void (*qmp_cmd_callback_fn)(const QmpCommand *cmd, void *opaque); + +void qmp_for_each_command(const QmpCommandList *cmds, qmp_cmd_callback_fn fn, + void *opaque); + +#endif diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h new file mode 100644 index 000000000..7345a9bd5 --- /dev/null +++ b/include/qapi/qmp/json-parser.h @@ -0,0 +1,46 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QAPI_QMP_JSON_PARSER_H +#define QAPI_QMP_JSON_PARSER_H + +typedef struct JSONLexer { + int start_state, state; + GString *token; + int x, y; +} JSONLexer; + +typedef struct JSONMessageParser { + void (*emit)(void *opaque, QObject *json, Error *err); + void *opaque; + va_list *ap; + JSONLexer lexer; + int brace_count; + int bracket_count; + GQueue tokens; + uint64_t token_size; +} JSONMessageParser; + +void json_message_parser_init(JSONMessageParser *parser, + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap); + +void json_message_parser_feed(JSONMessageParser *parser, + const char *buffer, size_t size); + +void json_message_parser_flush(JSONMessageParser *parser); + +void json_message_parser_destroy(JSONMessageParser *parser); + +#endif diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h new file mode 100644 index 000000000..5f61e38e6 --- /dev/null +++ b/include/qapi/qmp/qbool.h @@ -0,0 +1,29 @@ +/* + * QBool Module + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QBOOL_H +#define QBOOL_H + +#include "qapi/qmp/qobject.h" + +struct QBool { + struct QObjectBase_ base; + bool value; +}; + +QBool *qbool_from_bool(bool value); +bool qbool_get_bool(const QBool *qb); +bool qbool_is_equal(const QObject *x, const QObject *y); +void qbool_destroy_obj(QObject *obj); + +#endif /* QBOOL_H */ diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h new file mode 100644 index 000000000..da942347a --- /dev/null +++ b/include/qapi/qmp/qdict.h @@ -0,0 +1,69 @@ +/* + * QDict Module + * + * Copyright (C) 2009 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QDICT_H +#define QDICT_H + +#include "qapi/qmp/qobject.h" +#include "qemu/queue.h" + +#define QDICT_BUCKET_MAX 512 + +typedef struct QDictEntry { + char *key; + QObject *value; + QLIST_ENTRY(QDictEntry) next; +} QDictEntry; + +struct QDict { + struct QObjectBase_ base; + size_t size; + QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX]; +}; + +/* Object API */ +QDict *qdict_new(void); +const char *qdict_entry_key(const QDictEntry *entry); +QObject *qdict_entry_value(const QDictEntry *entry); +size_t qdict_size(const QDict *qdict); +void qdict_put_obj(QDict *qdict, const char *key, QObject *value); +void qdict_del(QDict *qdict, const char *key); +int qdict_haskey(const QDict *qdict, const char *key); +QObject *qdict_get(const QDict *qdict, const char *key); +bool qdict_is_equal(const QObject *x, const QObject *y); +const QDictEntry *qdict_first(const QDict *qdict); +const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry); +void qdict_destroy_obj(QObject *obj); + +/* Helper to qdict_put_obj(), accepts any object */ +#define qdict_put(qdict, key, obj) \ + qdict_put_obj(qdict, key, QOBJECT(obj)) + +void qdict_put_bool(QDict *qdict, const char *key, bool value); +void qdict_put_int(QDict *qdict, const char *key, int64_t value); +void qdict_put_null(QDict *qdict, const char *key); +void qdict_put_str(QDict *qdict, const char *key, const char *value); + +double qdict_get_double(const QDict *qdict, const char *key); +int64_t qdict_get_int(const QDict *qdict, const char *key); +bool qdict_get_bool(const QDict *qdict, const char *key); +QList *qdict_get_qlist(const QDict *qdict, const char *key); +QDict *qdict_get_qdict(const QDict *qdict, const char *key); +const char *qdict_get_str(const QDict *qdict, const char *key); +int64_t qdict_get_try_int(const QDict *qdict, const char *key, + int64_t def_value); +bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value); +const char *qdict_get_try_str(const QDict *qdict, const char *key); + +QDict *qdict_clone_shallow(const QDict *src); + +#endif /* QDICT_H */ diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h new file mode 100644 index 000000000..7c76e24aa --- /dev/null +++ b/include/qapi/qmp/qerror.h @@ -0,0 +1,94 @@ +/* + * QError Module + * + * Copyright (C) 2009 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#ifndef QERROR_H +#define QERROR_H + +/* + * These macros will go away, please don't use in new code, and do not + * add new ones! + */ +#define QERR_BASE_NOT_FOUND \ + "Base '%s' not found" + +#define QERR_BUS_NO_HOTPLUG \ + "Bus '%s' does not support hotplugging" + +#define QERR_DEVICE_HAS_NO_MEDIUM \ + "Device '%s' has no medium" + +#define QERR_DEVICE_INIT_FAILED \ + "Device '%s' could not be initialized" + +#define QERR_DEVICE_IN_USE \ + "Device '%s' is in use" + +#define QERR_DEVICE_NO_HOTPLUG \ + "Device '%s' does not support hotplugging" + +#define QERR_FD_NOT_FOUND \ + "File descriptor named '%s' not found" + +#define QERR_FD_NOT_SUPPLIED \ + "No file descriptor supplied via SCM_RIGHTS" + +#define QERR_FEATURE_DISABLED \ + "The feature '%s' is not enabled" + +#define QERR_INVALID_BLOCK_FORMAT \ + "Invalid block format '%s'" + +#define QERR_INVALID_PARAMETER \ + "Invalid parameter '%s'" + +#define QERR_INVALID_PARAMETER_TYPE \ + "Invalid parameter type for '%s', expected: %s" + +#define QERR_INVALID_PARAMETER_VALUE \ + "Parameter '%s' expects %s" + +#define QERR_INVALID_PASSWORD \ + "Password incorrect" + +#define QERR_IO_ERROR \ + "An IO error has occurred" + +#define QERR_MIGRATION_ACTIVE \ + "There's a migration process in progress" + +#define QERR_MISSING_PARAMETER \ + "Parameter '%s' is missing" + +#define QERR_PERMISSION_DENIED \ + "Insufficient permission to perform this operation" + +#define QERR_PROPERTY_VALUE_BAD \ + "Property '%s.%s' doesn't take value '%s'" + +#define QERR_PROPERTY_VALUE_OUT_OF_RANGE \ + "Property %s.%s doesn't take value %" PRId64 " (minimum: %" PRId64 ", maximum: %" PRId64 ")" + +#define QERR_QGA_COMMAND_FAILED \ + "Guest agent command failed, error was '%s'" + +#define QERR_REPLAY_NOT_SUPPORTED \ + "Record/replay feature is not supported for '%s'" + +#define QERR_SET_PASSWD_FAILED \ + "Could not set password" + +#define QERR_UNDEFINED_ERROR \ + "An undefined error has occurred" + +#define QERR_UNSUPPORTED \ + "this feature or command is not currently supported" + +#endif /* QERROR_H */ diff --git a/include/qapi/qmp/qjson.h b/include/qapi/qmp/qjson.h new file mode 100644 index 000000000..5ebbe5a11 --- /dev/null +++ b/include/qapi/qmp/qjson.h @@ -0,0 +1,31 @@ +/* + * QObject JSON integration + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QJSON_H +#define QJSON_H + +QObject *qobject_from_json(const char *string, Error **errp); + +QObject *qobject_from_vjsonf_nofail(const char *string, va_list ap) + GCC_FMT_ATTR(1, 0); +QObject *qobject_from_jsonf_nofail(const char *string, ...) + GCC_FMT_ATTR(1, 2); +QDict *qdict_from_vjsonf_nofail(const char *string, va_list ap) + GCC_FMT_ATTR(1, 0); +QDict *qdict_from_jsonf_nofail(const char *string, ...) + GCC_FMT_ATTR(1, 2); + +QString *qobject_to_json(const QObject *obj); +QString *qobject_to_json_pretty(const QObject *obj); + +#endif /* QJSON_H */ diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h new file mode 100644 index 000000000..595b7943e --- /dev/null +++ b/include/qapi/qmp/qlist.h @@ -0,0 +1,67 @@ +/* + * QList Module + * + * Copyright (C) 2009 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QLIST_H +#define QLIST_H + +#include "qapi/qmp/qobject.h" +#include "qemu/queue.h" + +typedef struct QListEntry { + QObject *value; + QTAILQ_ENTRY(QListEntry) next; +} QListEntry; + +struct QList { + struct QObjectBase_ base; + QTAILQ_HEAD(,QListEntry) head; +}; + +#define qlist_append(qlist, obj) \ + qlist_append_obj(qlist, QOBJECT(obj)) + +void qlist_append_bool(QList *qlist, bool value); +void qlist_append_int(QList *qlist, int64_t value); +void qlist_append_null(QList *qlist); +void qlist_append_str(QList *qlist, const char *value); + +#define QLIST_FOREACH_ENTRY(qlist, var) \ + for ((var) = QTAILQ_FIRST(&(qlist)->head); \ + (var); \ + (var) = QTAILQ_NEXT((var), next)) + +static inline QObject *qlist_entry_obj(const QListEntry *entry) +{ + return entry->value; +} + +QList *qlist_new(void); +QList *qlist_copy(QList *src); +void qlist_append_obj(QList *qlist, QObject *obj); +QObject *qlist_pop(QList *qlist); +QObject *qlist_peek(QList *qlist); +int qlist_empty(const QList *qlist); +size_t qlist_size(const QList *qlist); +bool qlist_is_equal(const QObject *x, const QObject *y); +void qlist_destroy_obj(QObject *obj); + +static inline const QListEntry *qlist_first(const QList *qlist) +{ + return QTAILQ_FIRST(&qlist->head); +} + +static inline const QListEntry *qlist_next(const QListEntry *entry) +{ + return QTAILQ_NEXT(entry, next); +} + +#endif /* QLIST_H */ diff --git a/include/qapi/qmp/qlit.h b/include/qapi/qmp/qlit.h new file mode 100644 index 000000000..c0676d5da --- /dev/null +++ b/include/qapi/qmp/qlit.h @@ -0,0 +1,55 @@ +/* + * Copyright IBM, Corp. 2009 + * Copyright (c) 2013, 2015, 2017 Red Hat Inc. + * + * Authors: + * Anthony Liguori + * Markus Armbruster + * Marc-André Lureau + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ +#ifndef QLIT_H +#define QLIT_H + +#include "qobject.h" + +typedef struct QLitDictEntry QLitDictEntry; +typedef struct QLitObject QLitObject; + +struct QLitObject { + QType type; + union { + bool qbool; + int64_t qnum; + const char *qstr; + QLitDictEntry *qdict; + QLitObject *qlist; + } value; +}; + +struct QLitDictEntry { + const char *key; + QLitObject value; +}; + +#define QLIT_QNULL \ + { .type = QTYPE_QNULL } +#define QLIT_QBOOL(val) \ + { .type = QTYPE_QBOOL, .value.qbool = (val) } +#define QLIT_QNUM(val) \ + { .type = QTYPE_QNUM, .value.qnum = (val) } +#define QLIT_QSTR(val) \ + { .type = QTYPE_QSTRING, .value.qstr = (val) } +#define QLIT_QDICT(val) \ + { .type = QTYPE_QDICT, .value.qdict = (val) } +#define QLIT_QLIST(val) \ + { .type = QTYPE_QLIST, .value.qlist = (val) } + +bool qlit_equal_qobject(const QLitObject *lhs, const QObject *rhs); + +QObject *qobject_from_qlit(const QLitObject *qlit); + +#endif /* QLIT_H */ diff --git a/include/qapi/qmp/qnull.h b/include/qapi/qmp/qnull.h new file mode 100644 index 000000000..c1426882c --- /dev/null +++ b/include/qapi/qmp/qnull.h @@ -0,0 +1,31 @@ +/* + * QNull + * + * Copyright (C) 2015 Red Hat, Inc. + * + * Authors: + * Markus Armbruster + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QNULL_H +#define QNULL_H + +#include "qapi/qmp/qobject.h" + +struct QNull { + struct QObjectBase_ base; +}; + +extern QNull qnull_; + +static inline QNull *qnull(void) +{ + return qobject_ref(&qnull_); +} + +bool qnull_is_equal(const QObject *x, const QObject *y); + +#endif /* QNULL_H */ diff --git a/include/qapi/qmp/qnum.h b/include/qapi/qmp/qnum.h new file mode 100644 index 000000000..bbae0a5ec --- /dev/null +++ b/include/qapi/qmp/qnum.h @@ -0,0 +1,74 @@ +/* + * QNum Module + * + * Copyright (C) 2009 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * Anthony Liguori + * Marc-André Lureau + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QNUM_H +#define QNUM_H + +#include "qapi/qmp/qobject.h" + +typedef enum { + QNUM_I64, + QNUM_U64, + QNUM_DOUBLE +} QNumKind; + +/* + * QNum encapsulates how our dialect of JSON fills in the blanks left + * by the JSON specification (RFC 8259) regarding numbers. + * + * Conceptually, we treat number as an abstract type with three + * concrete subtypes: floating-point, signed integer, unsigned + * integer. QNum implements this as a discriminated union of double, + * int64_t, uint64_t. + * + * The JSON parser picks the subtype as follows. If the number has a + * decimal point or an exponent, it is floating-point. Else if it + * fits into int64_t, it's signed integer. Else if it fits into + * uint64_t, it's unsigned integer. Else it's floating-point. + * + * Any number can serve as double: qnum_get_double() converts under + * the hood. + * + * An integer can serve as signed / unsigned integer as long as it is + * in range: qnum_get_try_int() / qnum_get_try_uint() check range and + * convert under the hood. + */ +struct QNum { + struct QObjectBase_ base; + QNumKind kind; + union { + int64_t i64; + uint64_t u64; + double dbl; + } u; +}; + +QNum *qnum_from_int(int64_t value); +QNum *qnum_from_uint(uint64_t value); +QNum *qnum_from_double(double value); + +bool qnum_get_try_int(const QNum *qn, int64_t *val); +int64_t qnum_get_int(const QNum *qn); + +bool qnum_get_try_uint(const QNum *qn, uint64_t *val); +uint64_t qnum_get_uint(const QNum *qn); + +double qnum_get_double(QNum *qn); + +char *qnum_to_string(QNum *qn); + +bool qnum_is_equal(const QObject *x, const QObject *y); +void qnum_destroy_obj(QObject *obj); + +#endif /* QNUM_H */ diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h new file mode 100644 index 000000000..fcfd54922 --- /dev/null +++ b/include/qapi/qmp/qobject.h @@ -0,0 +1,145 @@ +/* + * QEMU Object Model. + * + * Based on ideas by Avi Kivity + * + * Copyright (C) 2009, 2015 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * QObject Reference Counts Terminology + * ------------------------------------ + * + * - Returning references: A function that returns an object may + * return it as either a weak or a strong reference. If the + * reference is strong, you are responsible for calling + * qobject_unref() on the reference when you are done. + * + * If the reference is weak, the owner of the reference may free it at + * any time in the future. Before storing the reference anywhere, you + * should call qobject_ref() to make the reference strong. + * + * - Transferring ownership: when you transfer ownership of a reference + * by calling a function, you are no longer responsible for calling + * qobject_unref() when the reference is no longer needed. In other words, + * when the function returns you must behave as if the reference to the + * passed object was weak. + */ +#ifndef QOBJECT_H +#define QOBJECT_H + +#include "qapi/qapi-builtin-types.h" + +/* Not for use outside include/qapi/qmp/ */ +struct QObjectBase_ { + QType type; + size_t refcnt; +}; + +/* this struct must have no other members than base */ +struct QObject { + struct QObjectBase_ base; +}; + +#define QOBJECT(obj) ({ \ + typeof(obj) _obj = (obj); \ + _obj ? container_of(&(_obj)->base, QObject, base) : NULL; \ +}) + +/* Required for qobject_to() */ +#define QTYPE_CAST_TO_QNull QTYPE_QNULL +#define QTYPE_CAST_TO_QNum QTYPE_QNUM +#define QTYPE_CAST_TO_QString QTYPE_QSTRING +#define QTYPE_CAST_TO_QDict QTYPE_QDICT +#define QTYPE_CAST_TO_QList QTYPE_QLIST +#define QTYPE_CAST_TO_QBool QTYPE_QBOOL + +QEMU_BUILD_BUG_MSG(QTYPE__MAX != 7, + "The QTYPE_CAST_TO_* list needs to be extended"); + +#define qobject_to(type, obj) \ + ((type *)qobject_check_type(obj, glue(QTYPE_CAST_TO_, type))) + +/* Initialize an object to default values */ +static inline void qobject_init(QObject *obj, QType type) +{ + assert(QTYPE_NONE < type && type < QTYPE__MAX); + obj->base.refcnt = 1; + obj->base.type = type; +} + +static inline void qobject_ref_impl(QObject *obj) +{ + if (obj) { + obj->base.refcnt++; + } +} + +/** + * qobject_is_equal(): Return whether the two objects are equal. + * + * Any of the pointers may be NULL; return true if both are. Always + * return false if only one is (therefore a QNull object is not + * considered equal to a NULL pointer). + */ +bool qobject_is_equal(const QObject *x, const QObject *y); + +/** + * qobject_destroy(): Free resources used by the object + */ +void qobject_destroy(QObject *obj); + +static inline void qobject_unref_impl(QObject *obj) +{ + assert(!obj || obj->base.refcnt); + if (obj && --obj->base.refcnt == 0) { + qobject_destroy(obj); + } +} + +/** + * qobject_ref(): Increment QObject's reference count + * + * Returns: the same @obj. The type of @obj will be propagated to the + * return type. + */ +#define qobject_ref(obj) ({ \ + typeof(obj) _o = (obj); \ + qobject_ref_impl(QOBJECT(_o)); \ + _o; \ +}) + +/** + * qobject_unref(): Decrement QObject's reference count, deallocate + * when it reaches zero + */ +#define qobject_unref(obj) qobject_unref_impl(QOBJECT(obj)) + +/** + * qobject_type(): Return the QObject's type + */ +static inline QType qobject_type(const QObject *obj) +{ + assert(QTYPE_NONE < obj->base.type && obj->base.type < QTYPE__MAX); + return obj->base.type; +} + +/** + * qobject_check_type(): Helper function for the qobject_to() macro. + * Return @obj, but only if @obj is not NULL and @type is equal to + * @obj's type. Return NULL otherwise. + */ +static inline QObject *qobject_check_type(const QObject *obj, QType type) +{ + if (obj && qobject_type(obj) == type) { + return (QObject *)obj; + } else { + return NULL; + } +} + +#endif /* QOBJECT_H */ diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h new file mode 100644 index 000000000..e2e356e5e --- /dev/null +++ b/include/qapi/qmp/qstring.h @@ -0,0 +1,39 @@ +/* + * QString Module + * + * Copyright (C) 2009 Red Hat Inc. + * + * Authors: + * Luiz Capitulino + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QSTRING_H +#define QSTRING_H + +#include "qapi/qmp/qobject.h" + +struct QString { + struct QObjectBase_ base; + char *string; + size_t length; + size_t capacity; +}; + +QString *qstring_new(void); +QString *qstring_from_str(const char *str); +QString *qstring_from_substr(const char *str, size_t start, size_t end); +size_t qstring_get_length(const QString *qstring); +const char *qstring_get_str(const QString *qstring); +const char *qstring_get_try_str(const QString *qstring); +const char *qobject_get_try_str(const QObject *qstring); +void qstring_append_int(QString *qstring, int64_t value); +void qstring_append(QString *qstring, const char *str); +void qstring_append_chr(QString *qstring, int c); +bool qstring_is_equal(const QObject *x, const QObject *y); +char *qstring_free(QString *qstring, bool return_str); +void qstring_destroy_obj(QObject *obj); + +#endif /* QSTRING_H */ diff --git a/include/qapi/qobject-input-visitor.h b/include/qapi/qobject-input-visitor.h new file mode 100644 index 000000000..95985e25e --- /dev/null +++ b/include/qapi/qobject-input-visitor.h @@ -0,0 +1,82 @@ +/* + * Input Visitor + * + * Copyright (C) 2017 Red Hat, Inc. + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QOBJECT_INPUT_VISITOR_H +#define QOBJECT_INPUT_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct QObjectInputVisitor QObjectInputVisitor; + +/* + * Create a QObject input visitor for @obj + * + * A QObject input visitor visit builds a QAPI object from a QObject. + * This simultaneously walks the QAPI object being built and the + * QObject. The latter walk starts at @obj. + * + * visit_type_FOO() creates an instance of QAPI type FOO. The visited + * QObject must match FOO. QDict matches struct/union types, QList + * matches list types, QString matches type 'str' and enumeration + * types, QNum matches integer and float types, QBool matches type + * 'bool'. Type 'any' is matched by QObject. A QAPI alternate type + * is matched when one of its member types is. + * + * visit_start_struct() ... visit_end_struct() visits a QDict and + * creates a QAPI struct/union. Visits in between visit the + * dictionary members. visit_optional() is true when the QDict has + * this member. visit_check_struct() fails if unvisited members + * remain. + * + * visit_start_list() ... visit_end_list() visits a QList and creates + * a QAPI list. Visits in between visit list members, one after the + * other. visit_next_list() returns NULL when all QList members have + * been visited. visit_check_list() fails if unvisited members + * remain. + * + * visit_start_alternate() ... visit_end_alternate() visits a QObject + * and creates a QAPI alternate. The visit in between visits the same + * QObject and initializes the alternate member that is in use. + * + * Error messages refer to parts of @obj in JavaScript/Python syntax. + * For example, 'a.b[2]' refers to the second member of the QList + * member 'b' of the QDict member 'a' of QDict @obj. + * + * The caller is responsible for freeing the visitor with + * visit_free(). + */ +Visitor *qobject_input_visitor_new(QObject *obj); + +/* + * Create a QObject input visitor for @obj for use with keyval_parse() + * + * This is like qobject_input_visitor_new(), except scalars are all + * QString, and error messages refer to parts of @obj in the syntax + * keyval_parse() uses for KEYs. + */ +Visitor *qobject_input_visitor_new_keyval(QObject *obj); + +/* + * Create a QObject input visitor for parsing @str. + * + * If @str looks like JSON, parse it as JSON, else as KEY=VALUE,... + * @implied_key applies to KEY=VALUE, and works as in keyval_parse(). + * On failure, store an error through @errp and return NULL. + * On success, return a new QObject input visitor for the parse. + */ +Visitor *qobject_input_visitor_new_str(const char *str, + const char *implied_key, + Error **errp); + +#endif diff --git a/include/qapi/qobject-output-visitor.h b/include/qapi/qobject-output-visitor.h new file mode 100644 index 000000000..2b1726baf --- /dev/null +++ b/include/qapi/qobject-output-visitor.h @@ -0,0 +1,56 @@ +/* + * Output Visitor + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QOBJECT_OUTPUT_VISITOR_H +#define QOBJECT_OUTPUT_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct QObjectOutputVisitor QObjectOutputVisitor; + +/** + * Create a QObject output visitor for @obj + * + * A QObject output visitor visit builds a QObject from QAPI Object. + * This simultaneously walks the QAPI object and the QObject being + * built. The latter walk starts at @obj. + * + * visit_type_FOO() creates a QObject for QAPI type FOO. It creates a + * QDict for struct/union types, a QList for list types, QString for + * type 'str' and enumeration types, QNum for integer and float + * types, QBool for type 'bool'. For type 'any', it increments the + * QObject's reference count. For QAPI alternate types, it creates + * the QObject for the member that is in use. + * + * visit_start_struct() ... visit_end_struct() visits a QAPI + * struct/union and creates a QDict. Visits in between visit the + * members. visit_optional() is true when the struct/union has this + * member. visit_check_struct() does nothing. + * + * visit_start_list() ... visit_end_list() visits a QAPI list and + * creates a QList. Visits in between visit list members, one after + * the other. visit_next_list() returns NULL when all QAPI list + * members have been visited. visit_check_list() does nothing. + * + * visit_start_alternate() ... visit_end_alternate() visits a QAPI + * alternate. The visit in between creates the QObject for the + * alternate member that is in use. + * + * Errors are not expected to happen. + * + * The caller is responsible for freeing the visitor with + * visit_free(). + */ +Visitor *qobject_output_visitor_new(QObject **result); + +#endif diff --git a/include/qapi/string-input-visitor.h b/include/qapi/string-input-visitor.h new file mode 100644 index 000000000..921f3875b --- /dev/null +++ b/include/qapi/string-input-visitor.h @@ -0,0 +1,27 @@ +/* + * String parsing Visitor + * + * Copyright Red Hat, Inc. 2012 + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef STRING_INPUT_VISITOR_H +#define STRING_INPUT_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct StringInputVisitor StringInputVisitor; + +/* + * The string input visitor does not implement support for visiting + * QAPI structs, alternates, null, or arbitrary QTypes. Only flat lists + * of integers (except type "size") are supported. + */ +Visitor *string_input_visitor_new(const char *str); + +#endif diff --git a/include/qapi/string-output-visitor.h b/include/qapi/string-output-visitor.h new file mode 100644 index 000000000..268dfe998 --- /dev/null +++ b/include/qapi/string-output-visitor.h @@ -0,0 +1,35 @@ +/* + * String printing Visitor + * + * Copyright Red Hat, Inc. 2012 + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef STRING_OUTPUT_VISITOR_H +#define STRING_OUTPUT_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct StringOutputVisitor StringOutputVisitor; + +/* + * Create a new string output visitor. + * + * Using @human creates output that is a bit easier for humans to read + * (for example, showing integer values in both decimal and hex). + * + * If everything else succeeds, pass @result to visit_complete() to + * collect the result of the visit. + * + * The string output visitor does not implement support for visiting + * QAPI structs, alternates, null, or arbitrary QTypes. It also + * requires a non-null list argument to visit_start_list(). + */ +Visitor *string_output_visitor_new(bool human, char **result); + +#endif diff --git a/include/qapi/util.h b/include/qapi/util.h new file mode 100644 index 000000000..6178e98e9 --- /dev/null +++ b/include/qapi/util.h @@ -0,0 +1,40 @@ +/* + * QAPI util functions + * + * Copyright Fujitsu, Inc. 2014 + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QAPI_UTIL_H +#define QAPI_UTIL_H + +typedef struct QEnumLookup { + const char *const *array; + int size; +} QEnumLookup; + +const char *qapi_enum_lookup(const QEnumLookup *lookup, int val); +int qapi_enum_parse(const QEnumLookup *lookup, const char *buf, + int def, Error **errp); +bool qapi_bool_parse(const char *name, const char *value, bool *obj, + Error **errp); + +int parse_qapi_name(const char *name, bool complete); + +/* + * For any GenericList @list, insert @element at the front. + * + * Note that this macro evaluates @element exactly once, so it is safe + * to have side-effects with that argument. + */ +#define QAPI_LIST_PREPEND(list, element) do { \ + typeof(list) _tmp = g_malloc(sizeof(*(list))); \ + _tmp->value = (element); \ + _tmp->next = (list); \ + (list) = _tmp; \ +} while (0) + +#endif diff --git a/include/qapi/visitor-impl.h b/include/qapi/visitor-impl.h new file mode 100644 index 000000000..7362c043b --- /dev/null +++ b/include/qapi/visitor-impl.h @@ -0,0 +1,126 @@ +/* + * Core Definitions for QAPI Visitor implementations + * + * Copyright (C) 2012-2016 Red Hat, Inc. + * + * Author: Paolo Bonizni + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ +#ifndef QAPI_VISITOR_IMPL_H +#define QAPI_VISITOR_IMPL_H + +#include "qapi/visitor.h" + +/* + * This file describes the callback interface for implementing a QAPI + * visitor. For the client interface, see visitor.h. When + * implementing the callbacks, it is easiest to declare a struct with + * 'Visitor visitor;' as the first member. A callback's contract + * matches the corresponding public functions' contract unless stated + * otherwise. In the comments below, some callbacks are marked "must + * be set for $TYPE visits to work"; if a visitor implementation omits + * that callback, it should also document that it is only useful for a + * subset of QAPI. + */ + +/* + * There are four classes of visitors; setting the class determines + * how QAPI enums are visited, as well as what additional restrictions + * can be asserted. The values are intentionally chosen so as to + * permit some assertions based on whether a given bit is set (that + * is, some assertions apply to input and clone visitors, some + * assertions apply to output and clone visitors). + */ +typedef enum VisitorType { + VISITOR_INPUT = 1, + VISITOR_OUTPUT = 2, + VISITOR_CLONE = 3, + VISITOR_DEALLOC = 4, +} VisitorType; + +struct Visitor +{ + /* + * Only input visitors may fail! + */ + + /* Must be set to visit structs */ + bool (*start_struct)(Visitor *v, const char *name, void **obj, + size_t size, Error **errp); + + /* Optional; intended for input visitors */ + bool (*check_struct)(Visitor *v, Error **errp); + + /* Must be set to visit structs */ + void (*end_struct)(Visitor *v, void **obj); + + /* Must be set; implementations may require @list to be non-null, + * but must document it. */ + bool (*start_list)(Visitor *v, const char *name, GenericList **list, + size_t size, Error **errp); + + /* Must be set */ + GenericList *(*next_list)(Visitor *v, GenericList *tail, size_t size); + + /* Optional; intended for input visitors */ + bool (*check_list)(Visitor *v, Error **errp); + + /* Must be set */ + void (*end_list)(Visitor *v, void **list); + + /* Must be set by input and clone visitors to visit alternates */ + bool (*start_alternate)(Visitor *v, const char *name, + GenericAlternate **obj, size_t size, + Error **errp); + + /* Optional */ + void (*end_alternate)(Visitor *v, void **obj); + + /* Must be set */ + bool (*type_int64)(Visitor *v, const char *name, int64_t *obj, + Error **errp); + + /* Must be set */ + bool (*type_uint64)(Visitor *v, const char *name, uint64_t *obj, + Error **errp); + + /* Optional; fallback is type_uint64() */ + bool (*type_size)(Visitor *v, const char *name, uint64_t *obj, + Error **errp); + + /* Must be set */ + bool (*type_bool)(Visitor *v, const char *name, bool *obj, Error **errp); + + /* Must be set */ + bool (*type_str)(Visitor *v, const char *name, char **obj, Error **errp); + + /* Must be set to visit numbers */ + bool (*type_number)(Visitor *v, const char *name, double *obj, + Error **errp); + + /* Must be set to visit arbitrary QTypes */ + bool (*type_any)(Visitor *v, const char *name, QObject **obj, + Error **errp); + + /* Must be set to visit explicit null values. */ + bool (*type_null)(Visitor *v, const char *name, QNull **obj, + Error **errp); + + /* Must be set for input visitors to visit structs, optional otherwise. + The core takes care of the return type in the public interface. */ + void (*optional)(Visitor *v, const char *name, bool *present); + + /* Must be set */ + VisitorType type; + + /* Must be set for output visitors, optional otherwise. */ + void (*complete)(Visitor *v, void *opaque); + + /* Must be set */ + void (*free)(Visitor *v); +}; + +#endif diff --git a/include/qapi/visitor.h b/include/qapi/visitor.h new file mode 100644 index 000000000..ebc19ede7 --- /dev/null +++ b/include/qapi/visitor.h @@ -0,0 +1,677 @@ +/* + * Core Definitions for QAPI Visitor Classes + * + * Copyright (C) 2012-2016 Red Hat, Inc. + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QAPI_VISITOR_H +#define QAPI_VISITOR_H + +#include "qapi/qapi-builtin-types.h" + +/* + * The QAPI schema defines both a set of C data types, and a QMP wire + * format. QAPI objects can contain references to other QAPI objects, + * resulting in a directed acyclic graph. QAPI also generates visitor + * functions to walk these graphs. This file represents the interface + * for doing work at each node of a QAPI graph; it can also be used + * for a virtual walk, where there is no actual QAPI C struct. + * + * There are four kinds of visitors: input visitors (QObject, string, + * and QemuOpts) parse an external representation and build the + * corresponding QAPI object, output visitors (QObject and string) + * take a QAPI object and generate an external representation, the + * dealloc visitor takes a QAPI object (possibly partially + * constructed) and recursively frees it, and the clone visitor + * performs a deep clone of a QAPI object. + * + * While the dealloc and QObject input/output visitors are general, + * the string, QemuOpts, and clone visitors have some implementation + * limitations; see the documentation for each visitor for more + * details on what it supports. Also, see visitor-impl.h for the + * callback contracts implemented by each visitor, and + * docs/devel/qapi-code-gen.txt for more about the QAPI code + * generator. + * + * All of the visitors are created via: + * + * Visitor *subtype_visitor_new(parameters...); + * + * A visitor should be used for exactly one top-level visit_type_FOO() + * or virtual walk; if that is successful, the caller can optionally + * call visit_complete() (useful only for output visits, but safe to + * call on all visits). Then, regardless of success or failure, the + * user should call visit_free() to clean up resources. It is okay to + * free the visitor without completing the visit, if some other error + * is detected in the meantime. + * + * The clone and dealloc visitor should not be used directly outside + * of QAPI code. Use the qapi_free_FOO() and QAPI_CLONE() instead, + * described below. + * + * All QAPI types have a corresponding function with a signature + * roughly compatible with this: + * + * bool visit_type_FOO(Visitor *v, const char *name, T obj, Error **errp); + * + * where T is FOO for scalar types, and FOO * otherwise. The scalar + * visitors are declared here; the remaining visitors are generated in + * qapi-visit-MODULE.h. + * + * The @name parameter of visit_type_FOO() describes the relation + * between this QAPI value and its parent container. When visiting + * the root of a tree, @name is ignored; when visiting a member of an + * object, @name is the key associated with the value; when visiting a + * member of a list, @name is NULL; and when visiting the member of an + * alternate, @name should equal the name used for visiting the + * alternate. + * + * The visit_type_FOO() functions take a non-null @obj argument; they + * allocate *@obj during input visits, leave it unchanged during + * output and clone visits, and free it (recursively) during a dealloc + * visit. + * + * Each function also takes the customary @errp argument (see + * qapi/error.h for details), for reporting any errors (such as if a + * member @name is not present, or is present but not the specified + * type). Only input visitors can fail. + * + * If an error is detected during visit_type_FOO() with an input + * visitor, then *@obj will be set to NULL for pointer types, and left + * unchanged for scalar types. + * + * Using an output or clone visitor with an incomplete object has + * undefined behavior (other than a special case for visit_type_str() + * treating NULL like ""), while the dealloc visitor safely handles + * incomplete objects. Since input visitors never produce an + * incomplete object, such an object is possible only by manual + * construction. + * + * visit_type_FOO() returns true on success, false on error. + * + * For the QAPI object types (structs, unions, and alternates), there + * is an additional generated function in qapi-visit-MODULE.h + * compatible with: + * + * bool visit_type_FOO_members(Visitor *v, FOO *obj, Error **errp); + * + * for visiting the members of a type without also allocating the QAPI + * struct. It also returns true on success, false on error. + * + * Additionally, QAPI pointer types (structs, unions, alternates, and + * lists) have a generated function in qapi-types-MODULE.h compatible + * with: + * + * void qapi_free_FOO(FOO *obj); + * + * Does nothing when @obj is NULL. + * + * Such objects may also be used with macro + * + * Type *QAPI_CLONE(Type, src); + * + * in order to perform a deep clone of @src. + * + * For QAPI types can that inherit from a base type, a function is + * generated for going from the derived type to the base type: + * + * BASE *qapi_CHILD_base(CHILD *obj); + * + * Typical input visitor usage involves: + * + * + * Foo *f; + * Error *err = NULL; + * Visitor *v; + * + * v = FOO_visitor_new(...); + * if (!visit_type_Foo(v, NULL, &f, &err)) { + * ...handle error... + * } else { + * ...use f... + * } + * visit_free(v); + * qapi_free_Foo(f); + * + * + * For a list, it is: + * + * FooList *l; + * Error *err = NULL; + * Visitor *v; + * + * v = FOO_visitor_new(...); + * if (!visit_type_FooList(v, NULL, &l, &err)) { + * ...handle error... + * } else { + * for ( ; l; l = l->next) { + * ...use l->value... + * } + * } + * visit_free(v); + * qapi_free_FooList(l); + * + * + * Typical output visitor usage: + * + * + * Foo *f = ...obtain populated object... + * Visitor *v; + * Type *result; + * + * v = FOO_visitor_new(..., &result); + * visit_type_Foo(v, NULL, &f, &error_abort); + * visit_complete(v, &result); + * visit_free(v); + * ...use result... + * + * + * It is also possible to use the visitors to do a virtual walk, where + * no actual QAPI object is present. In this situation, decisions + * about what needs to be walked are made by the calling code, and + * structured visits are split between pairs of start and end methods + * (where the end method must be called if the start function + * succeeded, even if an intermediate visit encounters an error). + * Thus, a virtual walk corresponding to '{ "list": [1, 2] }' looks + * like: + * + * + * Visitor *v; + * Error *err = NULL; + * bool ok = false; + * int value; + * + * v = FOO_visitor_new(...); + * if (!visit_start_struct(v, NULL, NULL, 0, &err)) { + * goto out; + * } + * if (!visit_start_list(v, "list", NULL, 0, &err)) { + * goto outobj; + * } + * value = 1; + * if (!visit_type_int(v, NULL, &value, &err)) { + * goto outlist; + * } + * value = 2; + * if (!visit_type_int(v, NULL, &value, &err)) { + * goto outlist; + * } + * ok = true; + * outlist: + * if (ok) { + * ok = visit_check_list(v, &err); + * } + * visit_end_list(v, NULL); + * if (ok) { + * ok = visit_check_struct(v, &err); + * } + * outobj: + * visit_end_struct(v, NULL); + * out: + * visit_free(v); + * + * + * This file provides helpers for use by the generated + * visit_type_FOO(): visit_optional() for the 'has_member' field + * associated with optional 'member' in the C struct, + * visit_next_list() for advancing through a FooList linked list, and + * visit_is_input() for cleaning up on failure. + */ + +/*** Useful types ***/ + +/* This struct is layout-compatible with all other *List structs + * created by the QAPI generator. It is used as a typical + * singly-linked list. */ +typedef struct GenericList { + struct GenericList *next; + char padding[]; +} GenericList; + +/* This struct is layout-compatible with all Alternate types + * created by the QAPI generator. */ +typedef struct GenericAlternate { + QType type; + char padding[]; +} GenericAlternate; + +/*** Visitor cleanup ***/ + +/* + * Complete the visit, collecting any output. + * + * May only be called only once after a successful top-level + * visit_type_FOO() or visit_end_ITEM(), and marks the end of the + * visit. The @opaque pointer should match the output parameter + * passed to the subtype_visitor_new() used to create an output + * visitor, or NULL for any other visitor. Needed for output + * visitors, but may also be called with other visitors. + */ +void visit_complete(Visitor *v, void *opaque); + +/* + * Free @v and any resources it has tied up. + * + * May be called whether or not the visit has been successfully + * completed, but should not be called until a top-level + * visit_type_FOO() or visit_start_ITEM() has been performed on the + * visitor. Safe if @v is NULL. + */ +void visit_free(Visitor *v); + + +/*** Visiting structures ***/ + +/* + * Start visiting an object @obj (struct or union). + * + * @name expresses the relationship of this object to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL for a real walk, in which case @size + * determines how much memory an input or clone visitor will allocate + * into *@obj. @obj may also be NULL for a virtual walk, in which + * case @size is ignored. + * + * On failure, set *@obj to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + * + * After visit_start_struct() succeeds, the caller may visit its + * members one after the other, passing the member's name and address + * within the struct. Finally, visit_end_struct() needs to be called + * with the same @obj to clean up, even if intermediate visits fail. + * See the examples above. + * + * FIXME Should this be named visit_start_object, since it is also + * used for QAPI unions, and maps to JSON objects? + */ +bool visit_start_struct(Visitor *v, const char *name, void **obj, + size_t size, Error **errp); + +/* + * Prepare for completing an object visit. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + * + * Should be called prior to visit_end_struct() if all other + * intermediate visit steps were successful, to allow the visitor one + * last chance to report errors. May be skipped on a cleanup path, + * where there is no need to check for further errors. + */ +bool visit_check_struct(Visitor *v, Error **errp); + +/* + * Complete an object visit started earlier. + * + * @obj must match what was passed to the paired visit_start_struct(). + * + * Must be called after any successful use of visit_start_struct(), + * even if intermediate processing was skipped due to errors, to allow + * the backend to release any resources. Destroying the visitor early + * with visit_free() behaves as if this was implicitly called. + */ +void visit_end_struct(Visitor *v, void **obj); + + +/*** Visiting lists ***/ + +/* + * Start visiting a list. + * + * @name expresses the relationship of this list to its parent + * container; see the general description of @name above. + * + * @list must be non-NULL for a real walk, in which case @size + * determines how much memory an input or clone visitor will allocate + * into *@list (at least sizeof(GenericList)). Some visitors also + * allow @list to be NULL for a virtual walk, in which case @size is + * ignored. + * + * On failure, set *@list to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + * + * After visit_start_list() succeeds, the caller may visit its members + * one after the other. A real visit (where @list is non-NULL) uses + * visit_next_list() for traversing the linked list, while a virtual + * visit (where @list is NULL) uses other means. For each list + * element, call the appropriate visit_type_FOO() with name set to + * NULL and obj set to the address of the value member of the list + * element. Finally, visit_end_list() needs to be called with the + * same @list to clean up, even if intermediate visits fail. See the + * examples above. + */ +bool visit_start_list(Visitor *v, const char *name, GenericList **list, + size_t size, Error **errp); + +/* + * Iterate over a GenericList during a non-virtual list visit. + * + * @size represents the size of a linked list node (at least + * sizeof(GenericList)). + * + * @tail must not be NULL; on the first call, @tail is the value of + * *list after visit_start_list(), and on subsequent calls @tail must + * be the previously returned value. Should be called in a loop until + * a NULL return; for each non-NULL return, the caller then calls the + * appropriate visit_type_*() for the element type of the list, with + * that function's name parameter set to NULL and obj set to the + * address of @tail->value. + */ +GenericList *visit_next_list(Visitor *v, GenericList *tail, size_t size); + +/* + * Prepare for completing a list visit. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + * + * Should be called prior to visit_end_list() if all other + * intermediate visit steps were successful, to allow the visitor one + * last chance to report errors. May be skipped on a cleanup path, + * where there is no need to check for further errors. + */ +bool visit_check_list(Visitor *v, Error **errp); + +/* + * Complete a list visit started earlier. + * + * @list must match what was passed to the paired visit_start_list(). + * + * Must be called after any successful use of visit_start_list(), even + * if intermediate processing was skipped due to errors, to allow the + * backend to release any resources. Destroying the visitor early + * with visit_free() behaves as if this was implicitly called. + */ +void visit_end_list(Visitor *v, void **list); + + +/*** Visiting alternates ***/ + +/* + * Start the visit of an alternate @obj. + * + * @name expresses the relationship of this alternate to its parent + * container; see the general description of @name above. + * + * @obj must not be NULL. Input and clone visitors use @size to + * determine how much memory to allocate into *@obj, then determine + * the qtype of the next thing to be visited, and store it in + * (*@obj)->type. Other visitors leave @obj unchanged. + * + * On failure, set *@obj to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + * + * If successful, this must be paired with visit_end_alternate() with + * the same @obj to clean up, even if visiting the contents of the + * alternate fails. + */ +bool visit_start_alternate(Visitor *v, const char *name, + GenericAlternate **obj, size_t size, + Error **errp); + +/* + * Finish visiting an alternate type. + * + * @obj must match what was passed to the paired visit_start_alternate(). + * + * Must be called after any successful use of visit_start_alternate(), + * even if intermediate processing was skipped due to errors, to allow + * the backend to release any resources. Destroying the visitor early + * with visit_free() behaves as if this was implicitly called. + * + */ +void visit_end_alternate(Visitor *v, void **obj); + + +/*** Other helpers ***/ + +/* + * Does optional struct member @name need visiting? + * + * @name must not be NULL. This function is only useful between + * visit_start_struct() and visit_end_struct(), since only objects + * have optional keys. + * + * @present points to the address of the optional member's has_ flag. + * + * Input visitors set *@present according to input; other visitors + * leave it unchanged. In either case, return *@present for + * convenience. + */ +bool visit_optional(Visitor *v, const char *name, bool *present); + +/* + * Visit an enum value. + * + * @name expresses the relationship of this enum to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors parse input and set *@obj to + * the enumeration value, leaving @obj unchanged on error; other + * visitors use *@obj but leave it unchanged. + * + * Currently, all input visitors parse text input, and all output + * visitors produce text output. The mapping between enumeration + * values and strings is done by the visitor core, using @lookup. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + * + * May call visit_type_str() under the hood, and the enum visit may + * fail even if the corresponding string visit succeeded; this implies + * that an input visitor's visit_type_str() must have no unwelcome + * side effects. + */ +bool visit_type_enum(Visitor *v, const char *name, int *obj, + const QEnumLookup *lookup, Error **errp); + +/* + * Check if visitor is an input visitor. + */ +bool visit_is_input(Visitor *v); + +/* + * Check if visitor is a dealloc visitor. + */ +bool visit_is_dealloc(Visitor *v); + +/*** Visiting built-in types ***/ + +/* + * Visit an integer value. + * + * @name expresses the relationship of this integer to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors set *@obj to the value; + * other visitors will leave *@obj unchanged. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + */ +bool visit_type_int(Visitor *v, const char *name, int64_t *obj, Error **errp); + +/* + * Visit a uint8_t value. + * Like visit_type_int(), except clamps the value to uint8_t range. + */ +bool visit_type_uint8(Visitor *v, const char *name, uint8_t *obj, + Error **errp); + +/* + * Visit a uint16_t value. + * Like visit_type_int(), except clamps the value to uint16_t range. + */ +bool visit_type_uint16(Visitor *v, const char *name, uint16_t *obj, + Error **errp); + +/* + * Visit a uint32_t value. + * Like visit_type_int(), except clamps the value to uint32_t range. + */ +bool visit_type_uint32(Visitor *v, const char *name, uint32_t *obj, + Error **errp); + +/* + * Visit a uint64_t value. + * Like visit_type_int(), except clamps the value to uint64_t range, + * that is, ensures it is unsigned. + */ +bool visit_type_uint64(Visitor *v, const char *name, uint64_t *obj, + Error **errp); + +/* + * Visit an int8_t value. + * Like visit_type_int(), except clamps the value to int8_t range. + */ +bool visit_type_int8(Visitor *v, const char *name, int8_t *obj, Error **errp); + +/* + * Visit an int16_t value. + * Like visit_type_int(), except clamps the value to int16_t range. + */ +bool visit_type_int16(Visitor *v, const char *name, int16_t *obj, + Error **errp); + +/* + * Visit an int32_t value. + * Like visit_type_int(), except clamps the value to int32_t range. + */ +bool visit_type_int32(Visitor *v, const char *name, int32_t *obj, + Error **errp); + +/* + * Visit an int64_t value. + * Identical to visit_type_int(). + */ +bool visit_type_int64(Visitor *v, const char *name, int64_t *obj, + Error **errp); + +/* + * Visit a uint64_t value. + * Like visit_type_uint64(), except that some visitors may choose to + * recognize additional syntax, such as suffixes for easily scaling + * values. + */ +bool visit_type_size(Visitor *v, const char *name, uint64_t *obj, + Error **errp); + +/* + * Visit a boolean value. + * + * @name expresses the relationship of this boolean to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors set *@obj to the value; + * other visitors will leave *@obj unchanged. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + */ +bool visit_type_bool(Visitor *v, const char *name, bool *obj, Error **errp); + +/* + * Visit a string value. + * + * @name expresses the relationship of this string to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input and clone visitors set *@obj to the + * value (always using "" rather than NULL for an empty string). + * Other visitors leave *@obj unchanged, and commonly treat NULL like + * "". + * + * It is safe to cast away const when preparing a (const char *) value + * into @obj for use by an output visitor. + * + * On failure, set *@obj to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + * + * FIXME: Callers that try to output NULL *obj should not be allowed. + */ +bool visit_type_str(Visitor *v, const char *name, char **obj, Error **errp); + +/* + * Visit a number (i.e. double) value. + * + * @name expresses the relationship of this number to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors set *@obj to the value; + * other visitors will leave *@obj unchanged. Visitors should + * document if infinity or NaN are not permitted. + * + * On failure, store an error through @errp. Can happen only when @v + * is an input visitor. + * + * Return true on success, false on failure. + */ +bool visit_type_number(Visitor *v, const char *name, double *obj, + Error **errp); + +/* + * Visit an arbitrary value. + * + * @name expresses the relationship of this value to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors set *@obj to the value; + * other visitors will leave *@obj unchanged. *@obj must be non-NULL + * for output visitors. + * + * On failure, set *@obj to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + * + * Note that some kinds of input can't express arbitrary QObject. + * E.g. the visitor returned by qobject_input_visitor_new_keyval() + * can't create numbers or booleans, only strings. + */ +bool visit_type_any(Visitor *v, const char *name, QObject **obj, Error **errp); + +/* + * Visit a JSON null value. + * + * @name expresses the relationship of the null value to its parent + * container; see the general description of @name above. + * + * @obj must be non-NULL. Input visitors set *@obj to the value; + * other visitors ignore *@obj. + * + * On failure, set *@obj to NULL and store an error through @errp. + * Can happen only when @v is an input visitor. + * + * Return true on success, false on failure. + */ +bool visit_type_null(Visitor *v, const char *name, QNull **obj, + Error **errp); + +#endif diff --git a/include/qemu-common.h b/include/qemu-common.h new file mode 100644 index 000000000..fda7dc6ca --- /dev/null +++ b/include/qemu-common.h @@ -0,0 +1,163 @@ +/* + * This file is supposed to be included only by .c files. No header file should + * depend on qemu-common.h, as this would easily lead to circular header + * dependencies. + * + * If a header file uses a definition from qemu-common.h, that definition + * must be moved to a separate header file, and the header that uses it + * must include that header. + */ +#ifndef QEMU_COMMON_H +#define QEMU_COMMON_H + +#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR) + +/* Copyright string for -version arguments, About dialogs, etc */ +#define QEMU_COPYRIGHT "Copyright (c) 2003-2020 " \ + "Fabrice Bellard and the QEMU Project developers" + +/* Bug reporting information for --help arguments, About dialogs, etc */ +#define QEMU_HELP_BOTTOM \ + "See for how to report bugs.\n" \ + "More information on the QEMU project at ." + +/* main function, renamed */ +#if defined(CONFIG_COCOA) +int qemu_main(int argc, char **argv, char **envp); +#endif + +void qemu_get_timedate(struct tm *tm, int offset); +int qemu_timedate_diff(struct tm *tm); + +void *qemu_oom_check(void *ptr); + +ssize_t qemu_write_full(int fd, const void *buf, size_t count) + QEMU_WARN_UNUSED_RESULT; + +#ifndef _WIN32 +int qemu_pipe(int pipefd[2]); +/* like openpty() but also makes it raw; return master fd */ +int qemu_openpty_raw(int *aslave, char *pty_name); +#endif + +#ifdef _WIN32 +/* MinGW needs type casts for the 'buf' and 'optval' arguments. */ +#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \ + getsockopt(sockfd, level, optname, (void *)optval, optlen) +#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \ + setsockopt(sockfd, level, optname, (const void *)optval, optlen) +#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags) +#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \ + sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen) +#else +#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \ + getsockopt(sockfd, level, optname, optval, optlen) +#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \ + setsockopt(sockfd, level, optname, optval, optlen) +#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags) +#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \ + sendto(sockfd, buf, len, flags, destaddr, addrlen) +#endif + +void cpu_exec_init_all(void); +void cpu_exec_step_atomic(CPUState *cpu); + +/** + * set_preferred_target_page_bits: + * @bits: number of bits needed to represent an address within the page + * + * Set the preferred target page size (the actual target page + * size may be smaller than any given CPU's preference). + * Returns true on success, false on failure (which can only happen + * if this is called after the system has already finalized its + * choice of page size and the requested page size is smaller than that). + */ +bool set_preferred_target_page_bits(int bits); + +/** + * finalize_target_page_bits: + * Commit the final value set by set_preferred_target_page_bits. + */ +void finalize_target_page_bits(void); + +/** + * Sends a (part of) iovec down a socket, yielding when the socket is full, or + * Receives data into a (part of) iovec from a socket, + * yielding when there is no data in the socket. + * The same interface as qemu_sendv_recvv(), with added yielding. + * XXX should mark these as coroutine_fn + */ +ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) +#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true) + +/** + * The same as above, but with just a single buffer + */ +ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send); +#define qemu_co_recv(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, false) +#define qemu_co_send(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, true) + +void qemu_progress_init(int enabled, float min_skip); +void qemu_progress_end(void); +void qemu_progress_print(float delta, int max); +const char *qemu_get_vm_name(void); + +#define QEMU_FILE_TYPE_BIOS 0 +#define QEMU_FILE_TYPE_KEYMAP 1 +/** + * qemu_find_file: + * @type: QEMU_FILE_TYPE_BIOS (for BIOS, VGA BIOS) + * or QEMU_FILE_TYPE_KEYMAP (for keymaps). + * @name: Relative or absolute file name + * + * If @name exists on disk as an absolute path, or a path relative + * to the current directory, then returns @name unchanged. + * Otherwise searches for @name file in the data directories, either + * configured at build time (DATADIR) or registered with the -L command + * line option. + * + * The caller must use g_free() to free the returned data when it is + * no longer required. + * + * Returns: a path that can access @name, or NULL if no matching file exists. + */ +char *qemu_find_file(int type, const char *name); + +/* OS specific functions */ +void os_setup_early_signal_handling(void); +int os_parse_cmd_args(int index, const char *optarg); + +/* + * Hexdump a line of a byte buffer into a hexadecimal/ASCII buffer + */ +#define QEMU_HEXDUMP_LINE_BYTES 16 /* Number of bytes to dump */ +#define QEMU_HEXDUMP_LINE_LEN 75 /* Number of characters in line */ +void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr, + unsigned int len, bool ascii); + +/* + * Hexdump a buffer to a file. An optional string prefix is added to every line + */ + +void qemu_hexdump(FILE *fp, const char *prefix, + const void *bufptr, size_t size); + +/* + * helper to parse debug environment variables + */ +int parse_debug_env(const char *name, int max, int initial); + +const char *qemu_ether_ntoa(const MACAddr *mac); +void page_size_init(void); + +/* returns non-zero if dump is in progress, otherwise zero is + * returned. */ +bool dump_in_progress(void); + +#endif diff --git a/include/qemu-io.h b/include/qemu-io.h new file mode 100644 index 000000000..3af513004 --- /dev/null +++ b/include/qemu-io.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QEMU_IO_H +#define QEMU_IO_H + + +#define CMD_FLAG_GLOBAL ((int)0x80000000) /* don't iterate "args" */ + +/* Implement a qemu-io command. + * Operate on @blk using @argc/@argv as the command's arguments, and + * return 0 on success or negative errno on failure. + */ +typedef int (*cfunc_t)(BlockBackend *blk, int argc, char **argv); + +typedef void (*helpfunc_t)(void); + +typedef struct cmdinfo { + const char* name; + const char* altname; + cfunc_t cfunc; + int argmin; + int argmax; + int canpush; + int flags; + const char *args; + const char *oneline; + helpfunc_t help; + uint64_t perm; +} cmdinfo_t; + +extern bool qemuio_misalign; + +int qemuio_command(BlockBackend *blk, const char *cmd); + +void qemuio_add_command(const cmdinfo_t *ci); +void qemuio_command_usage(const cmdinfo_t *ci); +void qemuio_complete_command(const char *input, + void (*fn)(const char *cmd, void *opaque), + void *opaque); + +#endif /* QEMU_IO_H */ diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h new file mode 100644 index 000000000..c1d211a35 --- /dev/null +++ b/include/qemu/atomic.h @@ -0,0 +1,509 @@ +/* + * Simple interface for atomic operations. + * + * Copyright (C) 2013 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC_H +#define QEMU_ATOMIC_H + +/* Compiler barrier */ +#define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) + +/* The variable that receives the old value of an atomically-accessed + * variable must be non-qualified, because atomic builtins return values + * through a pointer-type argument as in __atomic_load(&var, &old, MODEL). + * + * This macro has to handle types smaller than int manually, because of + * implicit promotion. int and larger types, as well as pointers, can be + * converted to a non-qualified type just by applying a binary operator. + */ +#define typeof_strip_qual(expr) \ + typeof( \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(expr), bool) || \ + __builtin_types_compatible_p(typeof(expr), const bool) || \ + __builtin_types_compatible_p(typeof(expr), volatile bool) || \ + __builtin_types_compatible_p(typeof(expr), const volatile bool), \ + (bool)1, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(expr), signed char) || \ + __builtin_types_compatible_p(typeof(expr), const signed char) || \ + __builtin_types_compatible_p(typeof(expr), volatile signed char) || \ + __builtin_types_compatible_p(typeof(expr), const volatile signed char), \ + (signed char)1, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(expr), unsigned char) || \ + __builtin_types_compatible_p(typeof(expr), const unsigned char) || \ + __builtin_types_compatible_p(typeof(expr), volatile unsigned char) || \ + __builtin_types_compatible_p(typeof(expr), const volatile unsigned char), \ + (unsigned char)1, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(expr), signed short) || \ + __builtin_types_compatible_p(typeof(expr), const signed short) || \ + __builtin_types_compatible_p(typeof(expr), volatile signed short) || \ + __builtin_types_compatible_p(typeof(expr), const volatile signed short), \ + (signed short)1, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(expr), unsigned short) || \ + __builtin_types_compatible_p(typeof(expr), const unsigned short) || \ + __builtin_types_compatible_p(typeof(expr), volatile unsigned short) || \ + __builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \ + (unsigned short)1, \ + (expr)+0)))))) + +#ifdef __ATOMIC_RELAXED +/* For C11 atomic ops */ + +/* Manual memory barriers + * + *__atomic_thread_fence does not include a compiler barrier; instead, + * the barrier is part of __atomic_load/__atomic_store's "volatile-like" + * semantics. If smp_wmb() is a no-op, absence of the barrier means that + * the compiler is free to reorder stores on each side of the barrier. + * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). + */ + +#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) +#define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) +#define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) + +/* Most compilers currently treat consume and acquire the same, but really + * no processors except Alpha need a barrier here. Leave it in if + * using Thread Sanitizer to avoid warnings, otherwise optimize it away. + */ +#if defined(__SANITIZE_THREAD__) +#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) +#elif defined(__alpha__) +#define smp_read_barrier_depends() asm volatile("mb":::"memory") +#else +#define smp_read_barrier_depends() barrier() +#endif + +/* + * A signal barrier forces all pending local memory ops to be observed before + * a SIGSEGV is delivered to the *same* thread. In practice this is exactly + * the same as barrier(), but since we have the correct builtin, use it. + */ +#define signal_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST) + +/* Sanity check that the size of an atomic operation isn't "overly large". + * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not + * want to use them because we ought not need them, and this lets us do a + * bit of sanity checking that other 32-bit hosts might build. + * + * That said, we have a problem on 64-bit ILP32 hosts in that in order to + * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS. + * We'd prefer not want to pull in everything else TCG related, so handle + * those few cases by hand. + * + * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for + * Sparc we always force the use of sparcv9 in configure. MIPS n32 (ILP32) & + * n64 (LP64) ABIs are both detected using __mips64. + */ +#if defined(__x86_64__) || defined(__sparc__) || defined(__mips64) +# define ATOMIC_REG_SIZE 8 +#else +# define ATOMIC_REG_SIZE sizeof(void *) +#endif + +/* Weak atomic operations prevent the compiler moving other + * loads/stores past the atomic operation load/store. However there is + * no explicit memory barrier for the processor. + * + * The C11 memory model says that variables that are accessed from + * different threads should at least be done with __ATOMIC_RELAXED + * primitives or the result is undefined. Generally this has little to + * no effect on the generated code but not using the atomic primitives + * will get flagged by sanitizers as a violation. + */ +#define qatomic_read__nocheck(ptr) \ + __atomic_load_n(ptr, __ATOMIC_RELAXED) + +#define qatomic_read(ptr) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + qatomic_read__nocheck(ptr); \ + }) + +#define qatomic_set__nocheck(ptr, i) \ + __atomic_store_n(ptr, i, __ATOMIC_RELAXED) + +#define qatomic_set(ptr, i) do { \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + qatomic_set__nocheck(ptr, i); \ +} while(0) + +/* See above: most compilers currently treat consume and acquire the + * same, but this slows down qatomic_rcu_read unnecessarily. + */ +#ifdef __SANITIZE_THREAD__ +#define qatomic_rcu_read__nocheck(ptr, valptr) \ + __atomic_load(ptr, valptr, __ATOMIC_CONSUME); +#else +#define qatomic_rcu_read__nocheck(ptr, valptr) \ + __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ + smp_read_barrier_depends(); +#endif + +#define qatomic_rcu_read(ptr) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + typeof_strip_qual(*ptr) _val; \ + qatomic_rcu_read__nocheck(ptr, &_val); \ + _val; \ + }) + +#define qatomic_rcu_set(ptr, i) do { \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ +} while(0) + +#define qatomic_load_acquire(ptr) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + typeof_strip_qual(*ptr) _val; \ + __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ + _val; \ + }) + +#define qatomic_store_release(ptr, i) do { \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ +} while(0) + + +/* All the remaining operations are fully sequentially consistent */ + +#define qatomic_xchg__nocheck(ptr, i) ({ \ + __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \ +}) + +#define qatomic_xchg(ptr, i) ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + qatomic_xchg__nocheck(ptr, i); \ +}) + +/* Returns the eventual value, failed or not */ +#define qatomic_cmpxchg__nocheck(ptr, old, new) ({ \ + typeof_strip_qual(*ptr) _old = (old); \ + (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \ + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ + _old; \ +}) + +#define qatomic_cmpxchg(ptr, old, new) ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ + qatomic_cmpxchg__nocheck(ptr, old, new); \ +}) + +/* Provide shorter names for GCC atomic builtins, return old value */ +#define qatomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) +#define qatomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) + +#define qatomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST) + +#define qatomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST) +#define qatomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST) +#define qatomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define qatomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST) + +/* And even shorter names that return void. */ +#define qatomic_inc(ptr) \ + ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) +#define qatomic_dec(ptr) \ + ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) +#define qatomic_add(ptr, n) \ + ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) +#define qatomic_sub(ptr, n) \ + ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) +#define qatomic_and(ptr, n) \ + ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) +#define qatomic_or(ptr, n) \ + ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) +#define qatomic_xor(ptr, n) \ + ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)) + +#else /* __ATOMIC_RELAXED */ + +/* + * We use GCC builtin if it's available, as that can use mfence on + * 32-bit as well, e.g. if built with -march=pentium-m. However, on + * i386 the spec is buggy, and the implementation followed it until + * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). + */ +#if defined(__i386__) || defined(__x86_64__) +#if !QEMU_GNUC_PREREQ(4, 4) +#if defined __x86_64__ +#define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) +#else +#define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) +#endif +#endif +#endif + + +#ifdef __alpha__ +#define smp_read_barrier_depends() asm volatile("mb":::"memory") +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) + +/* + * Because of the strongly ordered storage model, wmb() and rmb() are nops + * here (a compiler barrier only). QEMU doesn't do accesses to write-combining + * qemu memory or non-temporal load/stores from C code. + */ +#define smp_mb_release() barrier() +#define smp_mb_acquire() barrier() + +/* + * __sync_lock_test_and_set() is documented to be an acquire barrier only, + * but it is a full barrier at the hardware level. Add a compiler barrier + * to make it a full barrier also at the compiler level. + */ +#define qatomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) + +#elif defined(_ARCH_PPC) + +/* + * We use an eieio() for wmb() on powerpc. This assumes we don't + * need to order cacheable and non-cacheable stores with respect to + * each other. + * + * smp_mb has the same problem as on x86 for not-very-new GCC + * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). + */ +#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) +#if defined(__powerpc64__) +#define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) +#define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) +#else +#define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; }) +#define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; }) +#endif +#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) + +#endif /* _ARCH_PPC */ + +/* + * For (host) platforms we don't have explicit barrier definitions + * for, we use the gcc __sync_synchronize() primitive to generate a + * full barrier. This should be safe on all platforms, though it may + * be overkill for smp_mb_acquire() and smp_mb_release(). + */ +#ifndef smp_mb +#define smp_mb() __sync_synchronize() +#endif + +#ifndef smp_mb_acquire +#define smp_mb_acquire() __sync_synchronize() +#endif + +#ifndef smp_mb_release +#define smp_mb_release() __sync_synchronize() +#endif + +#ifndef smp_read_barrier_depends +#define smp_read_barrier_depends() barrier() +#endif + +#ifndef signal_barrier +#define signal_barrier() barrier() +#endif + +/* These will only be atomic if the processor does the fetch or store + * in a single issue memory operation + */ +#define qatomic_read__nocheck(p) (*(__typeof__(*(p)) volatile*) (p)) +#define qatomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i)) + +#define qatomic_read(ptr) qatomic_read__nocheck(ptr) +#define qatomic_set(ptr, i) qatomic_set__nocheck(ptr,i) + +/** + * qatomic_rcu_read - reads a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * qatomic_rcu_read also includes a compiler barrier to ensure that + * value-speculative optimizations (e.g. VSS: Value Speculation + * Scheduling) does not perform the data read before the pointer read + * by speculating the value of the pointer. + * + * Should match qatomic_rcu_set(), qatomic_xchg(), qatomic_cmpxchg(). + */ +#define qatomic_rcu_read(ptr) ({ \ + typeof(*ptr) _val = qatomic_read(ptr); \ + smp_read_barrier_depends(); \ + _val; \ +}) + +/** + * qatomic_rcu_set - assigns (publicizes) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match qatomic_rcu_read(). + */ +#define qatomic_rcu_set(ptr, i) do { \ + smp_wmb(); \ + qatomic_set(ptr, i); \ +} while (0) + +#define qatomic_load_acquire(ptr) ({ \ + typeof(*ptr) _val = qatomic_read(ptr); \ + smp_mb_acquire(); \ + _val; \ +}) + +#define qatomic_store_release(ptr, i) do { \ + smp_mb_release(); \ + qatomic_set(ptr, i); \ +} while (0) + +#ifndef qatomic_xchg +#if defined(__clang__) +#define qatomic_xchg(ptr, i) __sync_swap(ptr, i) +#else +/* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ +#define qatomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) +#endif +#endif +#define qatomic_xchg__nocheck qatomic_xchg + +/* Provide shorter names for GCC atomic builtins. */ +#define qatomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) +#define qatomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) + +#define qatomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n) +#define qatomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n) +#define qatomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n) +#define qatomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n) +#define qatomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n) + +#define qatomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1) +#define qatomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1) +#define qatomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n) +#define qatomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n) +#define qatomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n) +#define qatomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n) +#define qatomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n) + +#define qatomic_cmpxchg(ptr, old, new) \ + __sync_val_compare_and_swap(ptr, old, new) +#define qatomic_cmpxchg__nocheck(ptr, old, new) qatomic_cmpxchg(ptr, old, new) + +/* And even shorter names that return void. */ +#define qatomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) +#define qatomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) +#define qatomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) +#define qatomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) +#define qatomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) +#define qatomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) +#define qatomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n)) + +#endif /* __ATOMIC_RELAXED */ + +#ifndef smp_wmb +#define smp_wmb() smp_mb_release() +#endif +#ifndef smp_rmb +#define smp_rmb() smp_mb_acquire() +#endif + +/* This is more efficient than a store plus a fence. */ +#if !defined(__SANITIZE_THREAD__) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) +#endif +#endif + +/* qatomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. + * + * As long as they are used as paired operations they are safe to + * use. See docs/devel/atomics.txt for more discussion. + */ + +#ifndef qatomic_mb_read +#define qatomic_mb_read(ptr) \ + qatomic_load_acquire(ptr) +#endif + +#ifndef qatomic_mb_set +#define qatomic_mb_set(ptr, i) do { \ + qatomic_store_release(ptr, i); \ + smp_mb(); \ +} while(0) +#endif + +#define qatomic_fetch_inc_nonzero(ptr) ({ \ + typeof_strip_qual(*ptr) _oldn = qatomic_read(ptr); \ + while (_oldn && qatomic_cmpxchg(ptr, _oldn, _oldn + 1) != _oldn) { \ + _oldn = qatomic_read(ptr); \ + } \ + _oldn; \ +}) + +/* Abstractions to access atomically (i.e. "once") i64/u64 variables */ +#ifdef CONFIG_ATOMIC64 +static inline int64_t qatomic_read_i64(const int64_t *ptr) +{ + /* use __nocheck because sizeof(void *) might be < sizeof(u64) */ + return qatomic_read__nocheck(ptr); +} + +static inline uint64_t qatomic_read_u64(const uint64_t *ptr) +{ + return qatomic_read__nocheck(ptr); +} + +static inline void qatomic_set_i64(int64_t *ptr, int64_t val) +{ + qatomic_set__nocheck(ptr, val); +} + +static inline void qatomic_set_u64(uint64_t *ptr, uint64_t val) +{ + qatomic_set__nocheck(ptr, val); +} + +static inline void qatomic64_init(void) +{ +} +#else /* !CONFIG_ATOMIC64 */ +int64_t qatomic_read_i64(const int64_t *ptr); +uint64_t qatomic_read_u64(const uint64_t *ptr); +void qatomic_set_i64(int64_t *ptr, int64_t val); +void qatomic_set_u64(uint64_t *ptr, uint64_t val); +void qatomic64_init(void); +#endif /* !CONFIG_ATOMIC64 */ + +#endif /* QEMU_ATOMIC_H */ diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h new file mode 100644 index 000000000..ad2bcf45b --- /dev/null +++ b/include/qemu/atomic128.h @@ -0,0 +1,155 @@ +/* + * Simple interface for 128-bit atomic operations. + * + * Copyright (C) 2018 Linaro, Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC128_H +#define QEMU_ATOMIC128_H + +#include "qemu/int128.h" + +/* + * GCC is a house divided about supporting large atomic operations. + * + * For hosts that only have large compare-and-swap, a legalistic reading + * of the C++ standard means that one cannot implement __atomic_read on + * read-only memory, and thus all atomic operations must synchronize + * through libatomic. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 + * + * This interpretation is not especially helpful for QEMU. + * For softmmu, all RAM is always read/write from the hypervisor. + * For user-only, if the guest doesn't implement such an __atomic_read + * then the host need not worry about it either. + * + * Moreover, using libatomic is not an option, because its interface is + * built for std::atomic, and requires that *all* accesses to such an + * object go through the library. In our case we do not have an object + * in the C/C++ sense, but a view of memory as seen by the guest. + * The guest may issue a large atomic operation and then access those + * pieces using word-sized accesses. From the hypervisor, we have no + * way to connect those two actions. + * + * Therefore, special case each platform. + */ + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return qatomic_cmpxchg__nocheck(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(CONFIG_CMPXCHG128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return __sync_val_compare_and_swap_16(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(__aarch64__) +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "cmp %[oldl], %[cmpl]\n\t" + "ccmp %[oldh], %[cmph], #0, eq\n\t" + "b.ne 1f\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b\n" + "1:" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), + [newl] "r"(newl), [newh] "r"(newh) + : "memory", "cc"); + + return int128_make128(oldl, oldh); +} +# define HAVE_CMPXCHG128 1 +#else +/* Fallback definition that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); +# define HAVE_CMPXCHG128 0 +#endif /* Some definition for HAVE_CMPXCHG128 */ + + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_read(Int128 *ptr) +{ + return qatomic_read__nocheck(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + qatomic_set__nocheck(ptr, val); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) +/* We can do better than cmpxchg for AArch64. */ +static inline Int128 atomic16_read(Int128 *ptr) +{ + uint64_t l, h; + uint32_t tmp; + + /* The load must be paired with the store to guarantee not tearing. */ + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); + + return int128_make128(l, h); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l = int128_getlo(val), h = int128_gethi(val); + uint64_t t1, t2; + + /* Load into temporaries to acquire the exclusive access lock. */ + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" + "stxp %w[t1], %[l], %[h], %[mem]\n\t" + "cbnz %w[t1], 0b" + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) + : [l] "r"(l), [h] "r"(h)); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 +static inline Int128 atomic16_read(Int128 *ptr) +{ + /* Maybe replace 0 with 0, returning the old value. */ + return atomic16_cmpxchg(ptr, 0, 0); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + Int128 old = *ptr, cmp; + do { + cmp = old; + old = atomic16_cmpxchg(ptr, cmp, val); + } while (old != cmp); +} + +# define HAVE_ATOMIC128 1 +#else +/* Fallback definitions that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr); +void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val); +# define HAVE_ATOMIC128 0 +#endif /* Some definition for HAVE_ATOMIC128 */ + +#endif /* QEMU_ATOMIC128_H */ diff --git a/include/qemu/base64.h b/include/qemu/base64.h new file mode 100644 index 000000000..46a75fbbe --- /dev/null +++ b/include/qemu/base64.h @@ -0,0 +1,57 @@ +/* + * QEMU base64 helpers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QEMU_BASE64_H +#define QEMU_BASE64_H + + + +/** + * qbase64_decode: + * @input: the (possibly) base64 encoded text + * @in_len: length of @input or -1 if NUL terminated + * @out_len: filled with length of decoded data + * @errp: pointer to a NULL-initialized error object + * + * Attempt to decode the (possibly) base64 encoded + * text provided in @input. If the @input text may + * contain embedded NUL characters, or may not be + * NUL terminated, then @in_len must be set to the + * known size of the @input buffer. + * + * Note that embedded NULs, or lack of a NUL terminator + * are considered invalid base64 data and errors + * will be reported to this effect. + * + * If decoding is successful, the decoded data will + * be returned and @out_len set to indicate the + * number of bytes in the decoded data. The caller + * must use g_free() to free the returned data when + * it is no longer required. + * + * Returns: the decoded data or NULL + */ +uint8_t *qbase64_decode(const char *input, + size_t in_len, + size_t *out_len, + Error **errp); + + +#endif /* QEMU_BASE64_H */ diff --git a/include/qemu/bcd.h b/include/qemu/bcd.h new file mode 100644 index 000000000..dfebacf1f --- /dev/null +++ b/include/qemu/bcd.h @@ -0,0 +1,15 @@ +#ifndef QEMU_BCD_H +#define QEMU_BCD_H + +/* Convert a byte between binary and BCD. */ +static inline uint8_t to_bcd(uint8_t val) +{ + return ((val / 10) << 4) | (val % 10); +} + +static inline uint8_t from_bcd(uint8_t val) +{ + return ((val >> 4) * 10) + (val & 0x0f); +} + +#endif diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h new file mode 100644 index 000000000..82a1d2f41 --- /dev/null +++ b/include/qemu/bitmap.h @@ -0,0 +1,283 @@ +/* + * Bitmap Module + * + * Copyright (C) 2010 Corentin Chary + * + * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BITMAP_H +#define BITMAP_H + + +#include "qemu/bitops.h" + +/* + * The available bitmap operations and their rough meaning in the + * case that the bitmap is a single unsigned long are thus: + * + * Note that nbits should be always a compile time evaluable constant. + * Otherwise many inlines will generate horrible code. + * + * bitmap_zero(dst, nbits) *dst = 0UL + * bitmap_fill(dst, nbits) *dst = ~0UL + * bitmap_copy(dst, src, nbits) *dst = *src + * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 + * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 + * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 + * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) + * bitmap_complement(dst, src, nbits) *dst = ~(*src) + * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? + * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? + * bitmap_empty(src, nbits) Are all bits zero in *src? + * bitmap_full(src, nbits) Are all bits set in *src? + * bitmap_set(dst, pos, nbits) Set specified bit area + * bitmap_set_atomic(dst, pos, nbits) Set specified bit area with atomic ops + * bitmap_clear(dst, pos, nbits) Clear specified bit area + * bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area + * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area + * bitmap_to_le(dst, src, nbits) Convert bitmap to little endian + * bitmap_from_le(dst, src, nbits) Convert bitmap from little endian + * bitmap_copy_with_src_offset(dst, src, offset, nbits) + * *dst = *src (with an offset into src) + * bitmap_copy_with_dst_offset(dst, src, offset, nbits) + * *dst = *src (with an offset into dst) + */ + +/* + * Also the following operations apply to bitmaps. + * + * set_bit(bit, addr) *addr |= bit + * clear_bit(bit, addr) *addr &= ~bit + * change_bit(bit, addr) *addr ^= bit + * test_bit(bit, addr) Is bit set in *addr? + * test_and_set_bit(bit, addr) Set bit and return old value + * test_and_clear_bit(bit, addr) Clear bit and return old value + * test_and_change_bit(bit, addr) Change bit and return old value + * find_first_zero_bit(addr, nbits) Position first zero bit in *addr + * find_first_bit(addr, nbits) Position first set bit in *addr + * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + */ + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define small_nbits(nbits) \ + ((nbits) <= BITS_PER_LONG) + +int slow_bitmap_empty(const unsigned long *bitmap, long bits); +int slow_bitmap_full(const unsigned long *bitmap, long bits); +int slow_bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +void slow_bitmap_complement(unsigned long *dst, const unsigned long *src, + long bits); +int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +int slow_bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits); +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits); + +static inline unsigned long *bitmap_try_new(long nbits) +{ + long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + return g_try_malloc0(len); +} + +static inline unsigned long *bitmap_new(long nbits) +{ + unsigned long *ptr = bitmap_try_new(nbits); + if (ptr == NULL) { + abort(); + } + return ptr; +} + +static inline void bitmap_zero(unsigned long *dst, long nbits) +{ + if (small_nbits(nbits)) { + *dst = 0UL; + } else { + long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} + +static inline void bitmap_fill(unsigned long *dst, long nbits) +{ + size_t nlongs = BITS_TO_LONGS(nbits); + if (!small_nbits(nbits)) { + long len = (nlongs - 1) * sizeof(unsigned long); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + +static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, + long nbits) +{ + if (small_nbits(nbits)) { + *dst = *src; + } else { + long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memcpy(dst, src, len); + } +} + +static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + return (*dst = *src1 & *src2) != 0; + } + return slow_bitmap_and(dst, src1, src2, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + *dst = *src1 | *src2; + } else { + slow_bitmap_or(dst, src1, src2, nbits); + } +} + +static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + *dst = *src1 ^ *src2; + } else { + slow_bitmap_xor(dst, src1, src2, nbits); + } +} + +static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + return (*dst = *src1 & ~(*src2)) != 0; + } + return slow_bitmap_andnot(dst, src1, src2, nbits); +} + +static inline void bitmap_complement(unsigned long *dst, + const unsigned long *src, + long nbits) +{ + if (small_nbits(nbits)) { + *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); + } else { + slow_bitmap_complement(dst, src, nbits); + } +} + +static inline int bitmap_equal(const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_equal(src1, src2, nbits); + } +} + +static inline int bitmap_empty(const unsigned long *src, long nbits) +{ + if (small_nbits(nbits)) { + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_empty(src, nbits); + } +} + +static inline int bitmap_full(const unsigned long *src, long nbits) +{ + if (small_nbits(nbits)) { + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_full(src, nbits); + } +} + +static inline int bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, long nbits) +{ + if (small_nbits(nbits)) { + return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + } else { + return slow_bitmap_intersects(src1, src2, nbits); + } +} + +static inline long bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + if (unlikely(!nbits)) { + return 0; + } + + if (small_nbits(nbits)) { + return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_count_one(bitmap, nbits); + } +} + +static inline long bitmap_count_one_with_offset(const unsigned long *bitmap, + long offset, long nbits) +{ + long aligned_offset = QEMU_ALIGN_DOWN(offset, BITS_PER_LONG); + long redundant_bits = offset - aligned_offset; + long bits_to_count = nbits + redundant_bits; + const unsigned long *bitmap_start = bitmap + + aligned_offset / BITS_PER_LONG; + + return bitmap_count_one(bitmap_start, bits_to_count) - + bitmap_count_one(bitmap_start, redundant_bits); +} + +void bitmap_set(unsigned long *map, long i, long len); +void bitmap_set_atomic(unsigned long *map, long i, long len); +void bitmap_clear(unsigned long *map, long start, long nr); +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr); +void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src, + long nr); +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned long nr, + unsigned long align_mask); + +static inline unsigned long *bitmap_zero_extend(unsigned long *old, + long old_nbits, long new_nbits) +{ + long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long); + unsigned long *new = g_realloc(old, new_len); + bitmap_clear(new, old_nbits, new_nbits - old_nbits); + return new; +} + +void bitmap_to_le(unsigned long *dst, const unsigned long *src, + long nbits); +void bitmap_from_le(unsigned long *dst, const unsigned long *src, + long nbits); + +void bitmap_copy_with_src_offset(unsigned long *dst, const unsigned long *src, + unsigned long offset, unsigned long nbits); +void bitmap_copy_with_dst_offset(unsigned long *dst, const unsigned long *src, + unsigned long shift, unsigned long nbits); + +#endif /* BITMAP_H */ diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h new file mode 100644 index 000000000..3acbf3384 --- /dev/null +++ b/include/qemu/bitops.h @@ -0,0 +1,583 @@ +/* + * Bitops Module + * + * Copyright (C) 2010 Corentin Chary + * + * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BITOPS_H +#define BITOPS_H + + +#include "host-utils.h" +#include "atomic.h" + +#define BITS_PER_BYTE CHAR_BIT +#define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) + +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) + +#define MAKE_64BIT_MASK(shift, length) \ + (((~0ULL) >> (64 - (length))) << (shift)) + +/** + * set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + */ +static inline void set_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + *p |= mask; +} + +/** + * set_bit_atomic - Set a bit in memory atomically + * @nr: the bit to set + * @addr: the address to start counting from + */ +static inline void set_bit_atomic(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + qatomic_or(p, mask); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + */ +static inline void clear_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + *p &= ~mask; +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + */ +static inline void change_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + *p ^= mask; +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + */ +static inline int test_and_set_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + */ +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + */ +static inline int test_and_change_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(long nr, const unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit, or size. + */ +unsigned long find_last_bit(const unsigned long *addr, + unsigned long size); + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ + +unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit. + */ +static inline unsigned long find_first_bit(const unsigned long *addr, + unsigned long size) +{ + unsigned long result, tmp; + + for (result = 0; result < size; result += BITS_PER_LONG) { + tmp = *addr++; + if (tmp) { + result += ctzl(tmp); + return result < size ? result : size; + } + } + /* Not found */ + return size; +} + +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first cleared bit. + */ +static inline unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size) +{ + return find_next_zero_bit(addr, size, 0); +} + +/** + * rol8 - rotate an 8-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint8_t rol8(uint8_t word, unsigned int shift) +{ + return (word << shift) | (word >> ((8 - shift) & 7)); +} + +/** + * ror8 - rotate an 8-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint8_t ror8(uint8_t word, unsigned int shift) +{ + return (word >> shift) | (word << ((8 - shift) & 7)); +} + +/** + * rol16 - rotate a 16-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint16_t rol16(uint16_t word, unsigned int shift) +{ + return (word << shift) | (word >> ((16 - shift) & 15)); +} + +/** + * ror16 - rotate a 16-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint16_t ror16(uint16_t word, unsigned int shift) +{ + return (word >> shift) | (word << ((16 - shift) & 15)); +} + +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint32_t rol32(uint32_t word, unsigned int shift) +{ + return (word << shift) | (word >> ((32 - shift) & 31)); +} + +/** + * ror32 - rotate a 32-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint32_t ror32(uint32_t word, unsigned int shift) +{ + return (word >> shift) | (word << ((32 - shift) & 31)); +} + +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint64_t rol64(uint64_t word, unsigned int shift) +{ + return (word << shift) | (word >> ((64 - shift) & 63)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint64_t ror64(uint64_t word, unsigned int shift) +{ + return (word >> shift) | (word << ((64 - shift) & 63)); +} + +/** + * extract32: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 32 bit input @value the bit field specified by the + * @start and @length parameters, and return it. The bit field must + * lie entirely within the 32 bit word. It is valid to request that + * all 32 bits are returned (ie @length 32 and @start 0). + * + * Returns: the value of the bit field extracted from the input value. + */ +static inline uint32_t extract32(uint32_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 32 - start); + return (value >> start) & (~0U >> (32 - length)); +} + +/** + * extract8: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 8 bit input @value the bit field specified by the + * @start and @length parameters, and return it. The bit field must + * lie entirely within the 8 bit word. It is valid to request that + * all 8 bits are returned (ie @length 8 and @start 0). + * + * Returns: the value of the bit field extracted from the input value. + */ +static inline uint8_t extract8(uint8_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 8 - start); + return extract32(value, start, length); +} + +/** + * extract16: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 16 bit input @value the bit field specified by the + * @start and @length parameters, and return it. The bit field must + * lie entirely within the 16 bit word. It is valid to request that + * all 16 bits are returned (ie @length 16 and @start 0). + * + * Returns: the value of the bit field extracted from the input value. + */ +static inline uint16_t extract16(uint16_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 16 - start); + return extract32(value, start, length); +} + +/** + * extract64: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 64 bit input @value the bit field specified by the + * @start and @length parameters, and return it. The bit field must + * lie entirely within the 64 bit word. It is valid to request that + * all 64 bits are returned (ie @length 64 and @start 0). + * + * Returns: the value of the bit field extracted from the input value. + */ +static inline uint64_t extract64(uint64_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 64 - start); + return (value >> start) & (~0ULL >> (64 - length)); +} + +/** + * sextract32: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 32 bit input @value the bit field specified by the + * @start and @length parameters, and return it, sign extended to + * an int32_t (ie with the most significant bit of the field propagated + * to all the upper bits of the return value). The bit field must lie + * entirely within the 32 bit word. It is valid to request that + * all 32 bits are returned (ie @length 32 and @start 0). + * + * Returns: the sign extended value of the bit field extracted from the + * input value. + */ +static inline int32_t sextract32(uint32_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 32 - start); + /* Note that this implementation relies on right shift of signed + * integers being an arithmetic shift. + */ + return ((int32_t)(value << (32 - length - start))) >> (32 - length); +} + +/** + * sextract64: + * @value: the value to extract the bit field from + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * + * Extract from the 64 bit input @value the bit field specified by the + * @start and @length parameters, and return it, sign extended to + * an int64_t (ie with the most significant bit of the field propagated + * to all the upper bits of the return value). The bit field must lie + * entirely within the 64 bit word. It is valid to request that + * all 64 bits are returned (ie @length 64 and @start 0). + * + * Returns: the sign extended value of the bit field extracted from the + * input value. + */ +static inline int64_t sextract64(uint64_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 64 - start); + /* Note that this implementation relies on right shift of signed + * integers being an arithmetic shift. + */ + return ((int64_t)(value << (64 - length - start))) >> (64 - length); +} + +/** + * deposit32: + * @value: initial value to insert bit field into + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * @fieldval: the value to insert into the bit field + * + * Deposit @fieldval into the 32 bit @value at the bit field specified + * by the @start and @length parameters, and return the modified + * @value. Bits of @value outside the bit field are not modified. + * Bits of @fieldval above the least significant @length bits are + * ignored. The bit field must lie entirely within the 32 bit word. + * It is valid to request that all 32 bits are modified (ie @length + * 32 and @start 0). + * + * Returns: the modified @value. + */ +static inline uint32_t deposit32(uint32_t value, int start, int length, + uint32_t fieldval) +{ + uint32_t mask; + assert(start >= 0 && length > 0 && length <= 32 - start); + mask = (~0U >> (32 - length)) << start; + return (value & ~mask) | ((fieldval << start) & mask); +} + +/** + * deposit64: + * @value: initial value to insert bit field into + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * @fieldval: the value to insert into the bit field + * + * Deposit @fieldval into the 64 bit @value at the bit field specified + * by the @start and @length parameters, and return the modified + * @value. Bits of @value outside the bit field are not modified. + * Bits of @fieldval above the least significant @length bits are + * ignored. The bit field must lie entirely within the 64 bit word. + * It is valid to request that all 64 bits are modified (ie @length + * 64 and @start 0). + * + * Returns: the modified @value. + */ +static inline uint64_t deposit64(uint64_t value, int start, int length, + uint64_t fieldval) +{ + uint64_t mask; + assert(start >= 0 && length > 0 && length <= 64 - start); + mask = (~0ULL >> (64 - length)) << start; + return (value & ~mask) | ((fieldval << start) & mask); +} + +/** + * half_shuffle32: + * @x: 32-bit value (of which only the bottom 16 bits are of interest) + * + * Given an input value:: + * + * xxxx xxxx xxxx xxxx ABCD EFGH IJKL MNOP + * + * return the value where the bottom 16 bits are spread out into + * the odd bits in the word, and the even bits are zeroed:: + * + * 0A0B 0C0D 0E0F 0G0H 0I0J 0K0L 0M0N 0O0P + * + * Any bits set in the top half of the input are ignored. + * + * Returns: the shuffled bits. + */ +static inline uint32_t half_shuffle32(uint32_t x) +{ + /* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling Bits". + * It ignores any bits set in the top half of the input. + */ + x = ((x & 0xFF00) << 8) | (x & 0x00FF); + x = ((x << 4) | x) & 0x0F0F0F0F; + x = ((x << 2) | x) & 0x33333333; + x = ((x << 1) | x) & 0x55555555; + return x; +} + +/** + * half_shuffle64: + * @x: 64-bit value (of which only the bottom 32 bits are of interest) + * + * Given an input value:: + * + * xxxx xxxx xxxx .... xxxx xxxx ABCD EFGH IJKL MNOP QRST UVWX YZab cdef + * + * return the value where the bottom 32 bits are spread out into + * the odd bits in the word, and the even bits are zeroed:: + * + * 0A0B 0C0D 0E0F 0G0H 0I0J 0K0L 0M0N .... 0U0V 0W0X 0Y0Z 0a0b 0c0d 0e0f + * + * Any bits set in the top half of the input are ignored. + * + * Returns: the shuffled bits. + */ +static inline uint64_t half_shuffle64(uint64_t x) +{ + /* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling Bits". + * It ignores any bits set in the top half of the input. + */ + x = ((x & 0xFFFF0000ULL) << 16) | (x & 0xFFFF); + x = ((x << 8) | x) & 0x00FF00FF00FF00FFULL; + x = ((x << 4) | x) & 0x0F0F0F0F0F0F0F0FULL; + x = ((x << 2) | x) & 0x3333333333333333ULL; + x = ((x << 1) | x) & 0x5555555555555555ULL; + return x; +} + +/** + * half_unshuffle32: + * @x: 32-bit value (of which only the odd bits are of interest) + * + * Given an input value:: + * + * xAxB xCxD xExF xGxH xIxJ xKxL xMxN xOxP + * + * return the value where all the odd bits are compressed down + * into the low half of the word, and the high half is zeroed:: + * + * 0000 0000 0000 0000 ABCD EFGH IJKL MNOP + * + * Any even bits set in the input are ignored. + * + * Returns: the unshuffled bits. + */ +static inline uint32_t half_unshuffle32(uint32_t x) +{ + /* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling Bits". + * where it is called an inverse half shuffle. + */ + x &= 0x55555555; + x = ((x >> 1) | x) & 0x33333333; + x = ((x >> 2) | x) & 0x0F0F0F0F; + x = ((x >> 4) | x) & 0x00FF00FF; + x = ((x >> 8) | x) & 0x0000FFFF; + return x; +} + +/** + * half_unshuffle64: + * @x: 64-bit value (of which only the odd bits are of interest) + * + * Given an input value:: + * + * xAxB xCxD xExF xGxH xIxJ xKxL xMxN .... xUxV xWxX xYxZ xaxb xcxd xexf + * + * return the value where all the odd bits are compressed down + * into the low half of the word, and the high half is zeroed:: + * + * 0000 0000 0000 .... 0000 0000 ABCD EFGH IJKL MNOP QRST UVWX YZab cdef + * + * Any even bits set in the input are ignored. + * + * Returns: the unshuffled bits. + */ +static inline uint64_t half_unshuffle64(uint64_t x) +{ + /* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling Bits". + * where it is called an inverse half shuffle. + */ + x &= 0x5555555555555555ULL; + x = ((x >> 1) | x) & 0x3333333333333333ULL; + x = ((x >> 2) | x) & 0x0F0F0F0F0F0F0F0FULL; + x = ((x >> 4) | x) & 0x00FF00FF00FF00FFULL; + x = ((x >> 8) | x) & 0x0000FFFF0000FFFFULL; + x = ((x >> 16) | x) & 0x00000000FFFFFFFFULL; + return x; +} + +#endif diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h new file mode 100644 index 000000000..8b01c3804 --- /dev/null +++ b/include/qemu/bswap.h @@ -0,0 +1,557 @@ +#ifndef BSWAP_H +#define BSWAP_H + +#include "fpu/softfloat-types.h" + +#ifdef CONFIG_MACHINE_BSWAP_H +# include +# include +#elif defined(__FreeBSD__) +# include +#elif defined(__HAIKU__) +# include +#elif defined(CONFIG_BYTESWAP_H) +# include + +static inline uint16_t bswap16(uint16_t x) +{ + return bswap_16(x); +} + +static inline uint32_t bswap32(uint32_t x) +{ + return bswap_32(x); +} + +static inline uint64_t bswap64(uint64_t x) +{ + return bswap_64(x); +} +# else +static inline uint16_t bswap16(uint16_t x) +{ + return (((x & 0x00ff) << 8) | + ((x & 0xff00) >> 8)); +} + +static inline uint32_t bswap32(uint32_t x) +{ + return (((x & 0x000000ffU) << 24) | + ((x & 0x0000ff00U) << 8) | + ((x & 0x00ff0000U) >> 8) | + ((x & 0xff000000U) >> 24)); +} + +static inline uint64_t bswap64(uint64_t x) +{ + return (((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56)); +} +#endif /* ! CONFIG_MACHINE_BSWAP_H */ + +static inline void bswap16s(uint16_t *s) +{ + *s = bswap16(*s); +} + +static inline void bswap32s(uint32_t *s) +{ + *s = bswap32(*s); +} + +static inline void bswap64s(uint64_t *s) +{ + *s = bswap64(*s); +} + +#if defined(HOST_WORDS_BIGENDIAN) +#define be_bswap(v, size) (v) +#define le_bswap(v, size) glue(bswap, size)(v) +#define be_bswaps(v, size) +#define le_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0) +#else +#define le_bswap(v, size) (v) +#define be_bswap(v, size) glue(bswap, size)(v) +#define le_bswaps(v, size) +#define be_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0) +#endif + +/** + * Endianness conversion functions between host cpu and specified endianness. + * (We list the complete set of prototypes produced by the macros below + * to assist people who search the headers to find their definitions.) + * + * uint16_t le16_to_cpu(uint16_t v); + * uint32_t le32_to_cpu(uint32_t v); + * uint64_t le64_to_cpu(uint64_t v); + * uint16_t be16_to_cpu(uint16_t v); + * uint32_t be32_to_cpu(uint32_t v); + * uint64_t be64_to_cpu(uint64_t v); + * + * Convert the value @v from the specified format to the native + * endianness of the host CPU by byteswapping if necessary, and + * return the converted value. + * + * uint16_t cpu_to_le16(uint16_t v); + * uint32_t cpu_to_le32(uint32_t v); + * uint64_t cpu_to_le64(uint64_t v); + * uint16_t cpu_to_be16(uint16_t v); + * uint32_t cpu_to_be32(uint32_t v); + * uint64_t cpu_to_be64(uint64_t v); + * + * Convert the value @v from the native endianness of the host CPU to + * the specified format by byteswapping if necessary, and return + * the converted value. + * + * void le16_to_cpus(uint16_t *v); + * void le32_to_cpus(uint32_t *v); + * void le64_to_cpus(uint64_t *v); + * void be16_to_cpus(uint16_t *v); + * void be32_to_cpus(uint32_t *v); + * void be64_to_cpus(uint64_t *v); + * + * Do an in-place conversion of the value pointed to by @v from the + * specified format to the native endianness of the host CPU. + * + * void cpu_to_le16s(uint16_t *v); + * void cpu_to_le32s(uint32_t *v); + * void cpu_to_le64s(uint64_t *v); + * void cpu_to_be16s(uint16_t *v); + * void cpu_to_be32s(uint32_t *v); + * void cpu_to_be64s(uint64_t *v); + * + * Do an in-place conversion of the value pointed to by @v from the + * native endianness of the host CPU to the specified format. + * + * Both X_to_cpu() and cpu_to_X() perform the same operation; you + * should use whichever one is better documenting of the function your + * code is performing. + * + * Do not use these functions for conversion of values which are in guest + * memory, since the data may not be sufficiently aligned for the host CPU's + * load and store instructions. Instead you should use the ld*_p() and + * st*_p() functions, which perform loads and stores of data of any + * required size and endianness and handle possible misalignment. + */ + +#define CPU_CONVERT(endian, size, type)\ +static inline type endian ## size ## _to_cpu(type v)\ +{\ + return glue(endian, _bswap)(v, size);\ +}\ +\ +static inline type cpu_to_ ## endian ## size(type v)\ +{\ + return glue(endian, _bswap)(v, size);\ +}\ +\ +static inline void endian ## size ## _to_cpus(type *p)\ +{\ + glue(endian, _bswaps)(p, size);\ +}\ +\ +static inline void cpu_to_ ## endian ## size ## s(type *p)\ +{\ + glue(endian, _bswaps)(p, size);\ +} + +CPU_CONVERT(be, 16, uint16_t) +CPU_CONVERT(be, 32, uint32_t) +CPU_CONVERT(be, 64, uint64_t) + +CPU_CONVERT(le, 16, uint16_t) +CPU_CONVERT(le, 32, uint32_t) +CPU_CONVERT(le, 64, uint64_t) + +/* + * Same as cpu_to_le{16,32}, except that gcc will figure the result is + * a compile-time constant if you pass in a constant. So this can be + * used to initialize static variables. + */ +#if defined(HOST_WORDS_BIGENDIAN) +# define const_le32(_x) \ + ((((_x) & 0x000000ffU) << 24) | \ + (((_x) & 0x0000ff00U) << 8) | \ + (((_x) & 0x00ff0000U) >> 8) | \ + (((_x) & 0xff000000U) >> 24)) +# define const_le16(_x) \ + ((((_x) & 0x00ff) << 8) | \ + (((_x) & 0xff00) >> 8)) +#else +# define const_le32(_x) (_x) +# define const_le16(_x) (_x) +#endif + +/* Unions for reinterpreting between floats and integers. */ + +typedef union { + float32 f; + uint32_t l; +} CPU_FloatU; + +typedef union { + float64 d; +#if defined(HOST_WORDS_BIGENDIAN) + struct { + uint32_t upper; + uint32_t lower; + } l; +#else + struct { + uint32_t lower; + uint32_t upper; + } l; +#endif + uint64_t ll; +} CPU_DoubleU; + +typedef union { + floatx80 d; + struct { + uint64_t lower; + uint16_t upper; + } l; +} CPU_LDoubleU; + +typedef union { + float128 q; +#if defined(HOST_WORDS_BIGENDIAN) + struct { + uint32_t upmost; + uint32_t upper; + uint32_t lower; + uint32_t lowest; + } l; + struct { + uint64_t upper; + uint64_t lower; + } ll; +#else + struct { + uint32_t lowest; + uint32_t lower; + uint32_t upper; + uint32_t upmost; + } l; + struct { + uint64_t lower; + uint64_t upper; + } ll; +#endif +} CPU_QuadU; + +/* unaligned/endian-independent pointer access */ + +/* + * the generic syntax is: + * + * load: ld{type}{sign}{size}_{endian}_p(ptr) + * + * store: st{type}{size}_{endian}_p(ptr, val) + * + * Note there are small differences with the softmmu access API! + * + * type is: + * (empty): integer access + * f : float access + * + * sign is: + * (empty): for 32 or 64 bit sizes (including floats and doubles) + * u : unsigned + * s : signed + * + * size is: + * b: 8 bits + * w: 16 bits + * l: 32 bits + * q: 64 bits + * + * endian is: + * he : host endian + * be : big endian + * le : little endian + * te : target endian + * (except for byte accesses, which have no endian infix). + * + * The target endian accessors are obviously only available to source + * files which are built per-target; they are defined in cpu-all.h. + * + * In all cases these functions take a host pointer. + * For accessors that take a guest address rather than a + * host address, see the cpu_{ld,st}_* accessors defined in + * cpu_ldst.h. + * + * For cases where the size to be used is not fixed at compile time, + * there are + * stn_{endian}_p(ptr, sz, val) + * which stores @val to @ptr as an @endian-order number @sz bytes in size + * and + * ldn_{endian}_p(ptr, sz) + * which loads @sz bytes from @ptr as an unsigned @endian-order number + * and returns it in a uint64_t. + */ + +static inline int ldub_p(const void *ptr) +{ + return *(uint8_t *)ptr; +} + +static inline int ldsb_p(const void *ptr) +{ + return *(int8_t *)ptr; +} + +static inline void stb_p(void *ptr, uint8_t v) +{ + *(uint8_t *)ptr = v; +} + +/* + * Any compiler worth its salt will turn these memcpy into native unaligned + * operations. Thus we don't need to play games with packed attributes, or + * inline byte-by-byte stores. + * Some compilation environments (eg some fortify-source implementations) + * may intercept memcpy() in a way that defeats the compiler optimization, + * though, so we use __builtin_memcpy() to give ourselves the best chance + * of good performance. + */ + +static inline int lduw_he_p(const void *ptr) +{ + uint16_t r; + __builtin_memcpy(&r, ptr, sizeof(r)); + return r; +} + +static inline int ldsw_he_p(const void *ptr) +{ + int16_t r; + __builtin_memcpy(&r, ptr, sizeof(r)); + return r; +} + +static inline void stw_he_p(void *ptr, uint16_t v) +{ + __builtin_memcpy(ptr, &v, sizeof(v)); +} + +static inline int ldl_he_p(const void *ptr) +{ + int32_t r; + __builtin_memcpy(&r, ptr, sizeof(r)); + return r; +} + +static inline void stl_he_p(void *ptr, uint32_t v) +{ + __builtin_memcpy(ptr, &v, sizeof(v)); +} + +static inline uint64_t ldq_he_p(const void *ptr) +{ + uint64_t r; + __builtin_memcpy(&r, ptr, sizeof(r)); + return r; +} + +static inline void stq_he_p(void *ptr, uint64_t v) +{ + __builtin_memcpy(ptr, &v, sizeof(v)); +} + +static inline int lduw_le_p(const void *ptr) +{ + return (uint16_t)le_bswap(lduw_he_p(ptr), 16); +} + +static inline int ldsw_le_p(const void *ptr) +{ + return (int16_t)le_bswap(lduw_he_p(ptr), 16); +} + +static inline int ldl_le_p(const void *ptr) +{ + return le_bswap(ldl_he_p(ptr), 32); +} + +static inline uint64_t ldq_le_p(const void *ptr) +{ + return le_bswap(ldq_he_p(ptr), 64); +} + +static inline void stw_le_p(void *ptr, uint16_t v) +{ + stw_he_p(ptr, le_bswap(v, 16)); +} + +static inline void stl_le_p(void *ptr, uint32_t v) +{ + stl_he_p(ptr, le_bswap(v, 32)); +} + +static inline void stq_le_p(void *ptr, uint64_t v) +{ + stq_he_p(ptr, le_bswap(v, 64)); +} + +/* float access */ + +static inline float32 ldfl_le_p(const void *ptr) +{ + CPU_FloatU u; + u.l = ldl_le_p(ptr); + return u.f; +} + +static inline void stfl_le_p(void *ptr, float32 v) +{ + CPU_FloatU u; + u.f = v; + stl_le_p(ptr, u.l); +} + +static inline float64 ldfq_le_p(const void *ptr) +{ + CPU_DoubleU u; + u.ll = ldq_le_p(ptr); + return u.d; +} + +static inline void stfq_le_p(void *ptr, float64 v) +{ + CPU_DoubleU u; + u.d = v; + stq_le_p(ptr, u.ll); +} + +static inline int lduw_be_p(const void *ptr) +{ + return (uint16_t)be_bswap(lduw_he_p(ptr), 16); +} + +static inline int ldsw_be_p(const void *ptr) +{ + return (int16_t)be_bswap(lduw_he_p(ptr), 16); +} + +static inline int ldl_be_p(const void *ptr) +{ + return be_bswap(ldl_he_p(ptr), 32); +} + +static inline uint64_t ldq_be_p(const void *ptr) +{ + return be_bswap(ldq_he_p(ptr), 64); +} + +static inline void stw_be_p(void *ptr, uint16_t v) +{ + stw_he_p(ptr, be_bswap(v, 16)); +} + +static inline void stl_be_p(void *ptr, uint32_t v) +{ + stl_he_p(ptr, be_bswap(v, 32)); +} + +static inline void stq_be_p(void *ptr, uint64_t v) +{ + stq_he_p(ptr, be_bswap(v, 64)); +} + +/* float access */ + +static inline float32 ldfl_be_p(const void *ptr) +{ + CPU_FloatU u; + u.l = ldl_be_p(ptr); + return u.f; +} + +static inline void stfl_be_p(void *ptr, float32 v) +{ + CPU_FloatU u; + u.f = v; + stl_be_p(ptr, u.l); +} + +static inline float64 ldfq_be_p(const void *ptr) +{ + CPU_DoubleU u; + u.ll = ldq_be_p(ptr); + return u.d; +} + +static inline void stfq_be_p(void *ptr, float64 v) +{ + CPU_DoubleU u; + u.d = v; + stq_be_p(ptr, u.ll); +} + +static inline unsigned long leul_to_cpu(unsigned long v) +{ +#if HOST_LONG_BITS == 32 + return le_bswap(v, 32); +#elif HOST_LONG_BITS == 64 + return le_bswap(v, 64); +#else +# error Unknown sizeof long +#endif +} + +/* Store v to p as a sz byte value in host order */ +#define DO_STN_LDN_P(END) \ + static inline void stn_## END ## _p(void *ptr, int sz, uint64_t v) \ + { \ + switch (sz) { \ + case 1: \ + stb_p(ptr, v); \ + break; \ + case 2: \ + stw_ ## END ## _p(ptr, v); \ + break; \ + case 4: \ + stl_ ## END ## _p(ptr, v); \ + break; \ + case 8: \ + stq_ ## END ## _p(ptr, v); \ + break; \ + default: \ + g_assert_not_reached(); \ + } \ + } \ + static inline uint64_t ldn_## END ## _p(const void *ptr, int sz) \ + { \ + switch (sz) { \ + case 1: \ + return ldub_p(ptr); \ + case 2: \ + return lduw_ ## END ## _p(ptr); \ + case 4: \ + return (uint32_t)ldl_ ## END ## _p(ptr); \ + case 8: \ + return ldq_ ## END ## _p(ptr); \ + default: \ + g_assert_not_reached(); \ + } \ + } + +DO_STN_LDN_P(he) +DO_STN_LDN_P(le) +DO_STN_LDN_P(be) + +#undef DO_STN_LDN_P + +#undef le_bswap +#undef be_bswap +#undef le_bswaps +#undef be_bswaps + +#endif /* BSWAP_H */ diff --git a/include/qemu/buffer.h b/include/qemu/buffer.h new file mode 100644 index 000000000..d34d2c857 --- /dev/null +++ b/include/qemu/buffer.h @@ -0,0 +1,160 @@ +/* + * QEMU generic buffers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QEMU_BUFFER_H +#define QEMU_BUFFER_H + + +typedef struct Buffer Buffer; + +/** + * Buffer: + * + * The Buffer object provides a simple dynamically resizing + * array, with separate tracking of capacity and usage. This + * is typically useful when buffering I/O or processing data. + */ + +struct Buffer { + char *name; + size_t capacity; + size_t offset; + uint64_t avg_size; + uint8_t *buffer; +}; + +/** + * buffer_init: + * @buffer: the buffer object + * @name: buffer name + * + * Optionally attach a name to the buffer, to make it easier + * to identify in debug traces. + */ +void buffer_init(Buffer *buffer, const char *name, ...) + GCC_FMT_ATTR(2, 3); + +/** + * buffer_shrink: + * @buffer: the buffer object + * + * Try to shrink the buffer. Checks current buffer capacity and size + * and reduces capacity in case only a fraction of the buffer is + * actually used. + */ +void buffer_shrink(Buffer *buffer); + +/** + * buffer_reserve: + * @buffer: the buffer object + * @len: the minimum required free space + * + * Ensure that the buffer has space allocated for at least + * @len bytes. If the current buffer is too small, it will + * be reallocated, possibly to a larger size than requested. + */ +void buffer_reserve(Buffer *buffer, size_t len); + +/** + * buffer_reset: + * @buffer: the buffer object + * + * Reset the length of the stored data to zero, but do + * not free / reallocate the memory buffer + */ +void buffer_reset(Buffer *buffer); + +/** + * buffer_free: + * @buffer: the buffer object + * + * Reset the length of the stored data to zero and also + * free the internal memory buffer + */ +void buffer_free(Buffer *buffer); + +/** + * buffer_append: + * @buffer: the buffer object + * @data: the data block to append + * @len: the length of @data in bytes + * + * Append the contents of @data to the end of the buffer. + * The caller must ensure that the buffer has sufficient + * free space for @len bytes, typically by calling the + * buffer_reserve() method prior to appending. + */ +void buffer_append(Buffer *buffer, const void *data, size_t len); + +/** + * buffer_advance: + * @buffer: the buffer object + * @len: the number of bytes to skip + * + * Remove @len bytes of data from the head of the buffer. + * The internal buffer will not be reallocated, so will + * have at least @len bytes of free space after this + * call completes + */ +void buffer_advance(Buffer *buffer, size_t len); + +/** + * buffer_end: + * @buffer: the buffer object + * + * Get a pointer to the tail end of the internal buffer + * The returned pointer is only valid until the next + * call to buffer_reserve(). + * + * Returns: the tail of the buffer + */ +uint8_t *buffer_end(Buffer *buffer); + +/** + * buffer_empty: + * @buffer: the buffer object + * + * Determine if the buffer contains any current data + * + * Returns: true if the buffer holds data, false otherwise + */ +gboolean buffer_empty(Buffer *buffer); + +/** + * buffer_move_empty: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, without copying data. 'to' buffer must be empty. + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move_empty(Buffer *to, Buffer *from); + +/** + * buffer_move: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, copying data (unless 'to' buffer happens to be empty). + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move(Buffer *to, Buffer *from); + +#endif /* QEMU_BUFFER_H */ diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h new file mode 100644 index 000000000..4e4503004 --- /dev/null +++ b/include/qemu/co-shared-resource.h @@ -0,0 +1,71 @@ +/* + * Helper functionality for distributing a fixed total amount of + * an abstract resource among multiple coroutines. + * + * Copyright (c) 2019 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_CO_SHARED_RESOURCE_H +#define QEMU_CO_SHARED_RESOURCE_H + + +typedef struct SharedResource SharedResource; + +/* + * Create SharedResource structure + * + * @total: total amount of some resource to be shared between clients + * + * Note: this API is not thread-safe. + */ +SharedResource *shres_create(uint64_t total); + +/* + * Release SharedResource structure + * + * This function may only be called once everything allocated by all + * clients has been deallocated. + */ +void shres_destroy(SharedResource *s); + +/* + * Try to allocate an amount of @n. Return true on success, and false + * if there is too little left of the collective resource to fulfill + * the request. + */ +bool co_try_get_from_shres(SharedResource *s, uint64_t n); + +/* + * Allocate an amount of @n, and, if necessary, yield until + * that becomes possible. + */ +void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n); + +/* + * Deallocate an amount of @n. The total amount allocated by a caller + * does not need to be deallocated/released with a single call, but may + * be split over several calls. For example, get(4), get(3), and then + * put(5), put(2). + */ +void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n); + + +#endif /* QEMU_CO_SHARED_RESOURCE_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h new file mode 100644 index 000000000..c76281f35 --- /dev/null +++ b/include/qemu/compiler.h @@ -0,0 +1,246 @@ +/* compiler.h: macros to abstract away compiler specifics + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#if defined __clang_analyzer__ || defined __COVERITY__ +#define QEMU_STATIC_ANALYSIS 1 +#endif + +/*---------------------------------------------------------------------------- +| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler. +| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h. +*----------------------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define QEMU_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define QEMU_GNUC_PREREQ(maj, min) 0 +#endif + +#define QEMU_NORETURN __attribute__ ((__noreturn__)) + +#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) + +#define QEMU_SENTINEL __attribute__((sentinel)) + +#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) +# define QEMU_PACKED __attribute__((gcc_struct, packed)) +#else +# define QEMU_PACKED __attribute__((packed)) +#endif + +#define QEMU_ALIGNED(X) __attribute__((aligned(X))) + +#ifndef glue +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) +#define stringify(s) tostring(s) +#define tostring(s) #s +#endif + +#ifndef likely +#if __GNUC__ < 3 +#define __builtin_expect(x, n) (x) +#endif + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof(((type *) 0)->member) *__mptr = (ptr); \ + (type *) ((char *) __mptr - offsetof(type, member));}) +#endif + +#define sizeof_field(type, field) sizeof(((type *)0)->field) + +/* + * Calculate the number of bytes up to and including the given 'field' of + * 'container'. + */ +#define endof(container, field) \ + (offsetof(container, field) + sizeof_field(container, field)) + +/* Convert from a base type to a parent type, with compile time checking. */ +#ifdef __GNUC__ +#define DO_UPCAST(type, field, dev) ( __extension__ ( { \ + char __attribute__((unused)) offset_must_be_zero[ \ + -offsetof(type, field)]; \ + container_of(dev, type, field);})) +#else +#define DO_UPCAST(type, field, dev) container_of(dev, type, field) +#endif + +#define typeof_field(type, field) typeof(((type *)0)->field) +#define type_check(t1,t2) ((t1*)0 - (t2*)0) + +#define QEMU_BUILD_BUG_ON_STRUCT(x) \ + struct { \ + int:(x) ? -1 : 1; \ + } + +/* QEMU_BUILD_BUG_MSG() emits the message given if _Static_assert is + * supported; otherwise, it will be omitted from the compiler error + * message (but as it remains present in the source code, it can still + * be useful when debugging). */ +#if defined(CONFIG_STATIC_ASSERT) +#define QEMU_BUILD_BUG_MSG(x, msg) _Static_assert(!(x), msg) +#elif defined(__COUNTER__) +#define QEMU_BUILD_BUG_MSG(x, msg) typedef QEMU_BUILD_BUG_ON_STRUCT(x) \ + glue(qemu_build_bug_on__, __COUNTER__) __attribute__((unused)) +#else +#define QEMU_BUILD_BUG_MSG(x, msg) +#endif + +#define QEMU_BUILD_BUG_ON(x) QEMU_BUILD_BUG_MSG(x, "not expecting: " #x) + +#define QEMU_BUILD_BUG_ON_ZERO(x) (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - \ + sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) + +#if defined __GNUC__ +# if !QEMU_GNUC_PREREQ(4, 4) + /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */ +# define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m))) +# else + /* Use gnu_printf when supported (qemu uses standard format strings). */ +# define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m))) +# if defined(_WIN32) + /* Map __printf__ to __gnu_printf__ because we want standard format strings + * even when MinGW or GLib include files use __printf__. */ +# define __printf__ __gnu_printf__ +# endif +# endif +#else +#define GCC_FMT_ATTR(n, m) +#endif + +#ifndef __has_warning +#define __has_warning(x) 0 /* compatibility with non-clang compilers */ +#endif + +#ifndef __has_feature +#define __has_feature(x) 0 /* compatibility with non-clang compilers */ +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 /* compatibility with non-clang compilers */ +#endif + +#if __has_builtin(__builtin_assume_aligned) || !defined(__clang__) +#define HAS_ASSUME_ALIGNED +#endif + +#ifndef __has_attribute +#define __has_attribute(x) 0 /* compatibility with older GCC */ +#endif + +/* + * GCC doesn't provide __has_attribute() until GCC 5, but we know all the GCC + * versions we support have the "flatten" attribute. Clang may not have the + * "flatten" attribute but always has __has_attribute() to check for it. + */ +#if __has_attribute(flatten) || !defined(__clang__) +# define QEMU_FLATTEN __attribute__((flatten)) +#else +# define QEMU_FLATTEN +#endif + +/* + * If __attribute__((error)) is present, use it to produce an error at + * compile time. Otherwise, one must wait for the linker to diagnose + * the missing symbol. + */ +#if __has_attribute(error) +# define QEMU_ERROR(X) __attribute__((error(X))) +#else +# define QEMU_ERROR(X) +#endif + +/* + * The nonstring variable attribute specifies that an object or member + * declaration with type array of char or pointer to char is intended + * to store character arrays that do not necessarily contain a terminating + * NUL character. This is useful in detecting uses of such arrays or pointers + * with functions that expect NUL-terminated strings, and to avoid warnings + * when such an array or pointer is used as an argument to a bounded string + * manipulation function such as strncpy. + */ +#if __has_attribute(nonstring) +# define QEMU_NONSTRING __attribute__((nonstring)) +#else +# define QEMU_NONSTRING +#endif + +/* + * Forced inlining may be desired to encourage constant propagation + * of function parameters. However, it can also make debugging harder, + * so disable it for a non-optimizing build. + */ +#if defined(__OPTIMIZE__) +#define QEMU_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define QEMU_ALWAYS_INLINE +#endif + +/* Implement C11 _Generic via GCC builtins. Example: + * + * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) + * + * The first argument is the discriminator. The last is the default value. + * The middle ones are tuples in "(type, expansion)" format. + */ + +/* First, find out the number of generic cases. */ +#define QEMU_GENERIC(x, ...) \ + QEMU_GENERIC_(typeof(x), __VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* There will be extra arguments, but they are not used. */ +#define QEMU_GENERIC_(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, count, ...) \ + QEMU_GENERIC##count(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) + +/* Two more helper macros, this time to extract items from a parenthesized + * list. + */ +#define QEMU_FIRST_(a, b) a +#define QEMU_SECOND_(a, b) b + +/* ... and a final one for the common part of the "recursion". */ +#define QEMU_GENERIC_IF(x, type_then, else_) \ + __builtin_choose_expr(__builtin_types_compatible_p(x, \ + QEMU_FIRST_ type_then), \ + QEMU_SECOND_ type_then, else_) + +/* CPP poor man's "recursion". */ +#define QEMU_GENERIC1(x, a0, ...) (a0) +#define QEMU_GENERIC2(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC1(x, __VA_ARGS__)) +#define QEMU_GENERIC3(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC2(x, __VA_ARGS__)) +#define QEMU_GENERIC4(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC3(x, __VA_ARGS__)) +#define QEMU_GENERIC5(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC4(x, __VA_ARGS__)) +#define QEMU_GENERIC6(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC5(x, __VA_ARGS__)) +#define QEMU_GENERIC7(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC6(x, __VA_ARGS__)) +#define QEMU_GENERIC8(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC7(x, __VA_ARGS__)) +#define QEMU_GENERIC9(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC8(x, __VA_ARGS__)) +#define QEMU_GENERIC10(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC9(x, __VA_ARGS__)) + +/** + * qemu_build_not_reached() + * + * The compiler, during optimization, is expected to prove that a call + * to this function cannot be reached and remove it. If the compiler + * supports QEMU_ERROR, this will be reported at compile time; otherwise + * this will be reported at link time due to the missing symbol. + */ +#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__) +extern void QEMU_NORETURN QEMU_ERROR("code path is reachable") + qemu_build_not_reached(void); +#else +#define qemu_build_not_reached() g_assert_not_reached() +#endif + +#endif /* COMPILER_H */ diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h new file mode 100644 index 000000000..d74f92015 --- /dev/null +++ b/include/qemu/config-file.h @@ -0,0 +1,24 @@ +#ifndef QEMU_CONFIG_FILE_H +#define QEMU_CONFIG_FILE_H + + +QemuOptsList *qemu_find_opts(const char *group); +QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); +QemuOpts *qemu_find_opts_singleton(const char *group); + +void qemu_add_opts(QemuOptsList *list); +void qemu_add_drive_opts(QemuOptsList *list); +int qemu_set_option(const char *str); +int qemu_global_option(const char *str); + +void qemu_config_write(FILE *fp); +int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname); + +int qemu_read_config_file(const char *filename); + +/* Parse QDict options as a replacement for a config file (allowing multiple + enumerated (0..(n-1)) configuration "sections") */ +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp); + +#endif /* QEMU_CONFIG_FILE_H */ diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h new file mode 100644 index 000000000..84eab6e3b --- /dev/null +++ b/include/qemu/coroutine.h @@ -0,0 +1,324 @@ +/* + * QEMU coroutine implementation + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * Kevin Wolf + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QEMU_COROUTINE_H +#define QEMU_COROUTINE_H + +#include "qemu/queue.h" +#include "qemu/timer.h" + +/** + * Coroutines are a mechanism for stack switching and can be used for + * cooperative userspace threading. These functions provide a simple but + * useful flavor of coroutines that is suitable for writing sequential code, + * rather than callbacks, for operations that need to give up control while + * waiting for events to complete. + * + * These functions are re-entrant and may be used outside the global mutex. + */ + +/** + * Mark a function that executes in coroutine context + * + * Functions that execute in coroutine context cannot be called directly from + * normal functions. In the future it would be nice to enable compiler or + * static checker support for catching such errors. This annotation might make + * it possible and in the meantime it serves as documentation. + * + * For example: + * + * static void coroutine_fn foo(void) { + * .... + * } + */ +#define coroutine_fn + +typedef struct Coroutine Coroutine; + +/** + * Coroutine entry point + * + * When the coroutine is entered for the first time, opaque is passed in as an + * argument. + * + * When this function returns, the coroutine is destroyed automatically and + * execution continues in the caller who last entered the coroutine. + */ +typedef void coroutine_fn CoroutineEntry(void *opaque); + +/** + * Create a new coroutine + * + * Use qemu_coroutine_enter() to actually transfer control to the coroutine. + * The opaque argument is passed as the argument to the entry point. + */ +Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque); + +/** + * Transfer control to a coroutine + */ +void qemu_coroutine_enter(Coroutine *coroutine); + +/** + * Transfer control to a coroutine if it's not active (i.e. part of the call + * stack of the running coroutine). Otherwise, do nothing. + */ +void qemu_coroutine_enter_if_inactive(Coroutine *co); + +/** + * Transfer control to a coroutine and associate it with ctx + */ +void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co); + +/** + * Transfer control back to a coroutine's caller + * + * This function does not return until the coroutine is re-entered using + * qemu_coroutine_enter(). + */ +void coroutine_fn qemu_coroutine_yield(void); + +/** + * Get the AioContext of the given coroutine + */ +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co); + +/** + * Get the currently executing coroutine + */ +Coroutine *coroutine_fn qemu_coroutine_self(void); + +/** + * Return whether or not currently inside a coroutine + * + * This can be used to write functions that work both when in coroutine context + * and when not in coroutine context. Note that such functions cannot use the + * coroutine_fn annotation since they work outside coroutine context. + */ +bool qemu_in_coroutine(void); + +/** + * Return true if the coroutine is currently entered + * + * A coroutine is "entered" if it has not yielded from the current + * qemu_coroutine_enter() call used to run it. This does not mean that the + * coroutine is currently executing code since it may have transferred control + * to another coroutine using qemu_coroutine_enter(). + * + * When several coroutines enter each other there may be no way to know which + * ones have already been entered. In such situations this function can be + * used to avoid recursively entering coroutines. + */ +bool qemu_coroutine_entered(Coroutine *co); + +/** + * Provides a mutex that can be used to synchronise coroutines + */ +struct CoWaitRecord; +struct CoMutex { + /* Count of pending lockers; 0 for a free mutex, 1 for an + * uncontended mutex. + */ + unsigned locked; + + /* Context that is holding the lock. Useful to avoid spinning + * when two coroutines on the same AioContext try to get the lock. :) + */ + AioContext *ctx; + + /* A queue of waiters. Elements are added atomically in front of + * from_push. to_pop is only populated, and popped from, by whoever + * is in charge of the next wakeup. This can be an unlocker or, + * through the handoff protocol, a locker that is about to go to sleep. + */ + QSLIST_HEAD(, CoWaitRecord) from_push, to_pop; + + unsigned handoff, sequence; + + Coroutine *holder; +}; + +/** + * Initialises a CoMutex. This must be called before any other operation is used + * on the CoMutex. + */ +void qemu_co_mutex_init(CoMutex *mutex); + +/** + * Locks the mutex. If the lock cannot be taken immediately, control is + * transferred to the caller of the current coroutine. + */ +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); + +/** + * Unlocks the mutex and schedules the next coroutine that was waiting for this + * lock to be run. + */ +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); + +/** + * Assert that the current coroutine holds @mutex. + */ +static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex) +{ + /* + * mutex->holder doesn't need any synchronisation if the assertion holds + * true because the mutex protects it. If it doesn't hold true, we still + * don't mind if another thread takes or releases mutex behind our back, + * because the condition will be false no matter whether we read NULL or + * the pointer for any other coroutine. + */ + assert(qatomic_read(&mutex->locked) && + mutex->holder == qemu_coroutine_self()); +} + +/** + * CoQueues are a mechanism to queue coroutines in order to continue executing + * them later. They are similar to condition variables, but they need help + * from an external mutex in order to maintain thread-safety. + */ +typedef struct CoQueue { + QSIMPLEQ_HEAD(, Coroutine) entries; +} CoQueue; + +/** + * Initialise a CoQueue. This must be called before any other operation is used + * on the CoQueue. + */ +void qemu_co_queue_init(CoQueue *queue); + +/** + * Adds the current coroutine to the CoQueue and transfers control to the + * caller of the coroutine. The mutex is unlocked during the wait and + * locked again afterwards. + */ +#define qemu_co_queue_wait(queue, lock) \ + qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock)) +void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock); + +/** + * Removes the next coroutine from the CoQueue, and wake it up. + * Returns true if a coroutine was removed, false if the queue is empty. + */ +bool coroutine_fn qemu_co_queue_next(CoQueue *queue); + +/** + * Empties the CoQueue; all coroutines are woken up. + */ +void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue); + +/** + * Removes the next coroutine from the CoQueue, and wake it up. Unlike + * qemu_co_queue_next, this function releases the lock during aio_co_wake + * because it is meant to be used outside coroutine context; in that case, the + * coroutine is entered immediately, before qemu_co_enter_next returns. + * + * If used in coroutine context, qemu_co_enter_next is equivalent to + * qemu_co_queue_next. + */ +#define qemu_co_enter_next(queue, lock) \ + qemu_co_enter_next_impl(queue, QEMU_MAKE_LOCKABLE(lock)) +bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); + +/** + * Checks if the CoQueue is empty. + */ +bool qemu_co_queue_empty(CoQueue *queue); + + +typedef struct CoRwlock { + int pending_writer; + int reader; + CoMutex mutex; + CoQueue queue; +} CoRwlock; + +/** + * Initialises a CoRwlock. This must be called before any other operation + * is used on the CoRwlock + */ +void qemu_co_rwlock_init(CoRwlock *lock); + +/** + * Read locks the CoRwlock. If the lock cannot be taken immediately because + * of a parallel writer, control is transferred to the caller of the current + * coroutine. + */ +void qemu_co_rwlock_rdlock(CoRwlock *lock); + +/** + * Write Locks the CoRwlock from a reader. This is a bit more efficient than + * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock. + * However, if the lock cannot be upgraded immediately, control is transferred + * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade + * only overrides CoRwlock fairness if there are no concurrent readers, so + * another writer might run while @qemu_co_rwlock_upgrade blocks. + */ +void qemu_co_rwlock_upgrade(CoRwlock *lock); + +/** + * Downgrades a write-side critical section to a reader. Downgrading with + * @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock + * followed by @qemu_co_rwlock_rdlock. This makes it more efficient, but + * may also sometimes be necessary for correctness. + */ +void qemu_co_rwlock_downgrade(CoRwlock *lock); + +/** + * Write Locks the mutex. If the lock cannot be taken immediately because + * of a parallel reader, control is transferred to the caller of the current + * coroutine. + */ +void qemu_co_rwlock_wrlock(CoRwlock *lock); + +/** + * Unlocks the read/write lock and schedules the next coroutine that was + * waiting for this lock to be run. + */ +void qemu_co_rwlock_unlock(CoRwlock *lock); + +typedef struct QemuCoSleepState QemuCoSleepState; + +/** + * Yield the coroutine for a given duration. During this yield, @sleep_state + * (if not NULL) is set to an opaque pointer, which may be used for + * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the + * timer fires. Don't save the obtained value to other variables and don't call + * qemu_co_sleep_wake from another aio context. + */ +void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, + QemuCoSleepState **sleep_state); +static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) +{ + qemu_co_sleep_ns_wakeable(type, ns, NULL); +} + +/** + * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be + * deleted. @sleep_state must be the variable whose address was given to + * qemu_co_sleep_ns() and should be checked to be non-NULL before calling + * qemu_co_sleep_wake(). + */ +void qemu_co_sleep_wake(QemuCoSleepState *sleep_state); + +/** + * Yield until a file descriptor becomes readable + * + * Note that this function clobbers the handlers for the file descriptor. + */ +void coroutine_fn yield_until_fd_readable(int fd); + +#include "qemu/lockable.h" + +#endif /* QEMU_COROUTINE_H */ diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h new file mode 100644 index 000000000..1da148552 --- /dev/null +++ b/include/qemu/coroutine_int.h @@ -0,0 +1,77 @@ +/* + * Coroutine internals + * + * Copyright (c) 2011 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_COROUTINE_INT_H +#define QEMU_COROUTINE_INT_H + +#include "qemu/queue.h" +#include "qemu/coroutine.h" + +#ifdef CONFIG_SAFESTACK +/* Pointer to the unsafe stack, defined by the compiler */ +extern __thread void *__safestack_unsafe_stack_ptr; +#endif + +#define COROUTINE_STACK_SIZE (1 << 20) + +typedef enum { + COROUTINE_YIELD = 1, + COROUTINE_TERMINATE = 2, + COROUTINE_ENTER = 3, +} CoroutineAction; + +struct Coroutine { + CoroutineEntry *entry; + void *entry_arg; + Coroutine *caller; + + /* Only used when the coroutine has terminated. */ + QSLIST_ENTRY(Coroutine) pool_next; + + size_t locks_held; + + /* Only used when the coroutine has yielded. */ + AioContext *ctx; + + /* Used to catch and abort on illegal co-routine entry. + * Will contain the name of the function that had first + * scheduled the coroutine. */ + const char *scheduled; + + QSIMPLEQ_ENTRY(Coroutine) co_queue_next; + + /* Coroutines that should be woken up when we yield or terminate. + * Only used when the coroutine is running. + */ + QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup; + + QSLIST_ENTRY(Coroutine) co_scheduled_next; +}; + +Coroutine *qemu_coroutine_new(void); +void qemu_coroutine_delete(Coroutine *co); +CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); + +#endif diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h new file mode 100644 index 000000000..09fc245b9 --- /dev/null +++ b/include/qemu/cpuid.h @@ -0,0 +1,60 @@ +/* cpuid.h: Macros to identify the properties of an x86 host. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_CPUID_H +#define QEMU_CPUID_H + +#ifndef CONFIG_CPUID_H +# error " is unusable with this compiler" +#endif + +#include + +/* Cover the uses that we have within qemu. */ +/* ??? Irritating that we have the same information in target/i386/. */ + +/* Leaf 1, %edx */ +#ifndef bit_CMOV +#define bit_CMOV (1 << 15) +#endif +#ifndef bit_SSE2 +#define bit_SSE2 (1 << 26) +#endif + +/* Leaf 1, %ecx */ +#ifndef bit_SSE4_1 +#define bit_SSE4_1 (1 << 19) +#endif +#ifndef bit_MOVBE +#define bit_MOVBE (1 << 22) +#endif +#ifndef bit_OSXSAVE +#define bit_OSXSAVE (1 << 27) +#endif +#ifndef bit_AVX +#define bit_AVX (1 << 28) +#endif + +/* Leaf 7, %ebx */ +#ifndef bit_BMI +#define bit_BMI (1 << 3) +#endif +#ifndef bit_AVX2 +#define bit_AVX2 (1 << 5) +#endif +#ifndef bit_AVX512F +#define bit_AVX512F (1 << 16) +#endif +#ifndef bit_BMI2 +#define bit_BMI2 (1 << 8) +#endif + +/* Leaf 0x80000001, %ecx */ +#ifndef bit_LZCNT +#define bit_LZCNT (1 << 5) +#endif + +#endif /* QEMU_CPUID_H */ diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h new file mode 100644 index 000000000..5b78884c3 --- /dev/null +++ b/include/qemu/crc32c.h @@ -0,0 +1,34 @@ +/* + * Castagnoli CRC32C Checksum Algorithm + * + * Polynomial: 0x11EDC6F41 + * + * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman + * "Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits",IEEE Transactions on Communication, + * Volume 41, Number 6, June 1993 + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * Based on the Linux kernel cryptographic crc32c module, + * + * Copyright (c) 2004 Cisco Systems, Inc. + * Copyright (c) 2008 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef QEMU_CRC32C_H +#define QEMU_CRC32C_H + + +uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length); + +#endif diff --git a/include/qemu/ctype.h b/include/qemu/ctype.h new file mode 100644 index 000000000..3691f0984 --- /dev/null +++ b/include/qemu/ctype.h @@ -0,0 +1,27 @@ +/* + * QEMU TCG support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_CTYPE_H +#define QEMU_CTYPE_H + +#define qemu_isalnum(c) isalnum((unsigned char)(c)) +#define qemu_isalpha(c) isalpha((unsigned char)(c)) +#define qemu_iscntrl(c) iscntrl((unsigned char)(c)) +#define qemu_isdigit(c) isdigit((unsigned char)(c)) +#define qemu_isgraph(c) isgraph((unsigned char)(c)) +#define qemu_islower(c) islower((unsigned char)(c)) +#define qemu_isprint(c) isprint((unsigned char)(c)) +#define qemu_ispunct(c) ispunct((unsigned char)(c)) +#define qemu_isspace(c) isspace((unsigned char)(c)) +#define qemu_isupper(c) isupper((unsigned char)(c)) +#define qemu_isxdigit(c) isxdigit((unsigned char)(c)) +#define qemu_tolower(c) tolower((unsigned char)(c)) +#define qemu_toupper(c) toupper((unsigned char)(c)) +#define qemu_isascii(c) isascii((unsigned char)(c)) +#define qemu_toascii(c) toascii((unsigned char)(c)) + +#endif diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h new file mode 100644 index 000000000..986ed8e15 --- /dev/null +++ b/include/qemu/cutils.h @@ -0,0 +1,212 @@ +#ifndef QEMU_CUTILS_H +#define QEMU_CUTILS_H + +/** + * pstrcpy: + * @buf: buffer to copy string into + * @buf_size: size of @buf in bytes + * @str: string to copy + * + * Copy @str into @buf, including the trailing NUL, but do not + * write more than @buf_size bytes. The resulting buffer is + * always NUL terminated (even if the source string was too long). + * If @buf_size is zero or negative then no bytes are copied. + * + * This function is similar to strncpy(), but avoids two of that + * function's problems: + * * if @str fits in the buffer, pstrcpy() does not zero-fill the + * remaining space at the end of @buf + * * if @str is too long, pstrcpy() will copy the first @buf_size-1 + * bytes and then add a NUL + */ +void pstrcpy(char *buf, int buf_size, const char *str); +/** + * strpadcpy: + * @buf: buffer to copy string into + * @buf_size: size of @buf in bytes + * @str: string to copy + * @pad: character to pad the remainder of @buf with + * + * Copy @str into @buf (but *not* its trailing NUL!), and then pad the + * rest of the buffer with the @pad character. If @str is too large + * for the buffer then it is truncated, so that @buf contains the + * first @buf_size characters of @str, with no terminator. + */ +void strpadcpy(char *buf, int buf_size, const char *str, char pad); +/** + * pstrcat: + * @buf: buffer containing existing string + * @buf_size: size of @buf in bytes + * @s: string to concatenate to @buf + * + * Append a copy of @s to the string already in @buf, but do not + * allow the buffer to overflow. If the existing contents of @buf + * plus @str would total more than @buf_size bytes, then write + * as much of @str as will fit followed by a NUL terminator. + * + * @buf must already contain a NUL-terminated string, or the + * behaviour is undefined. + * + * Returns: @buf. + */ +char *pstrcat(char *buf, int buf_size, const char *s); +/** + * strstart: + * @str: string to test + * @val: prefix string to look for + * @ptr: NULL, or pointer to be written to indicate start of + * the remainder of the string + * + * Test whether @str starts with the prefix @val. + * If it does (including the degenerate case where @str and @val + * are equal) then return true. If @ptr is not NULL then a + * pointer to the first character following the prefix is written + * to it. If @val is not a prefix of @str then return false (and + * @ptr is not written to). + * + * Returns: true if @str starts with prefix @val, false otherwise. + */ +int strstart(const char *str, const char *val, const char **ptr); +/** + * stristart: + * @str: string to test + * @val: prefix string to look for + * @ptr: NULL, or pointer to be written to indicate start of + * the remainder of the string + * + * Test whether @str starts with the case-insensitive prefix @val. + * This function behaves identically to strstart(), except that the + * comparison is made after calling qemu_toupper() on each pair of + * characters. + * + * Returns: true if @str starts with case-insensitive prefix @val, + * false otherwise. + */ +int stristart(const char *str, const char *val, const char **ptr); +/** + * qemu_strnlen: + * @s: string + * @max_len: maximum number of bytes in @s to scan + * + * Return the length of the string @s, like strlen(), but do not + * examine more than @max_len bytes of the memory pointed to by @s. + * If no NUL terminator is found within @max_len bytes, then return + * @max_len instead. + * + * This function has the same behaviour as the POSIX strnlen() + * function. + * + * Returns: length of @s in bytes, or @max_len, whichever is smaller. + */ +int qemu_strnlen(const char *s, int max_len); +/** + * qemu_strsep: + * @input: pointer to string to parse + * @delim: string containing delimiter characters to search for + * + * Locate the first occurrence of any character in @delim within + * the string referenced by @input, and replace it with a NUL. + * The location of the next character after the delimiter character + * is stored into @input. + * If the end of the string was reached without finding a delimiter + * character, then NULL is stored into @input. + * If @input points to a NULL pointer on entry, return NULL. + * The return value is always the original value of *@input (and + * so now points to a NUL-terminated string corresponding to the + * part of the input up to the first delimiter). + * + * This function has the same behaviour as the BSD strsep() function. + * + * Returns: the pointer originally in @input. + */ +char *qemu_strsep(char **input, const char *delim); +#ifdef HAVE_STRCHRNUL +static inline const char *qemu_strchrnul(const char *s, int c) +{ + return strchrnul(s, c); +} +#else +const char *qemu_strchrnul(const char *s, int c); +#endif +time_t mktimegm(struct tm *tm); +int qemu_fdatasync(int fd); +int qemu_msync(void *addr, size_t length, int fd); +int fcntl_setfl(int fd, int flag); +int qemu_parse_fd(const char *param); +int qemu_strtoi(const char *nptr, const char **endptr, int base, + int *result); +int qemu_strtoui(const char *nptr, const char **endptr, int base, + unsigned int *result); +int qemu_strtol(const char *nptr, const char **endptr, int base, + long *result); +int qemu_strtoul(const char *nptr, const char **endptr, int base, + unsigned long *result); +int qemu_strtoi64(const char *nptr, const char **endptr, int base, + int64_t *result); +int qemu_strtou64(const char *nptr, const char **endptr, int base, + uint64_t *result); +int qemu_strtod(const char *nptr, const char **endptr, double *result); +int qemu_strtod_finite(const char *nptr, const char **endptr, double *result); + +int parse_uint(const char *s, unsigned long long *value, char **endptr, + int base); +int parse_uint_full(const char *s, unsigned long long *value, int base); + +int qemu_strtosz(const char *nptr, const char **end, uint64_t *result); +int qemu_strtosz_MiB(const char *nptr, const char **end, uint64_t *result); +int qemu_strtosz_metric(const char *nptr, const char **end, uint64_t *result); + +char *size_to_str(uint64_t val); + +/** + * freq_to_str: + * @freq_hz: frequency to stringify + * + * Return human readable string for frequency @freq_hz. + * Use SI units like KHz, MHz, and so forth. + * + * The caller is responsible for releasing the value returned + * with g_free() after use. + */ +char *freq_to_str(uint64_t freq_hz); + +/* used to print char* safely */ +#define STR_OR_NULL(str) ((str) ? (str) : "null") + +bool buffer_is_zero(const void *buf, size_t len); +bool test_buffer_is_zero_next_accel(void); + +/* + * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) + * Input is limited to 14-bit numbers + */ + +int uleb128_encode_small(uint8_t *out, uint32_t n); +int uleb128_decode_small(const uint8_t *in, uint32_t *n); + +/** + * qemu_pstrcmp0: + * @str1: a non-NULL pointer to a C string (*str1 can be NULL) + * @str2: a non-NULL pointer to a C string (*str2 can be NULL) + * + * Compares *str1 and *str2 with g_strcmp0(). + * + * Returns: an integer less than, equal to, or greater than zero, if + * *str1 is <, == or > than *str2. + */ +int qemu_pstrcmp0(const char **str1, const char **str2); + + +/** + * get_relocated_path: + * @dir: the directory (typically a `CONFIG_*DIR` variable) to be relocated. + * + * Returns a path for @dir that uses the directory of the running executable + * as the prefix. For example, if `bindir` is `/usr/bin` and @dir is + * `/usr/share/qemu`, the function will append `../share/qemu` to the + * directory that contains the running executable and return the result. + * The returned string should be freed by the caller. + */ +char *get_relocated_path(const char *dir); + +#endif diff --git a/include/qemu/dbus.h b/include/qemu/dbus.h new file mode 100644 index 000000000..9d591f9ee --- /dev/null +++ b/include/qemu/dbus.h @@ -0,0 +1,19 @@ +/* + * Helpers for using D-Bus + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef DBUS_H +#define DBUS_H + +#include + +GStrv qemu_dbus_get_queued_owners(GDBusConnection *connection, + const char *name, + Error **errp); + +#endif /* DBUS_H */ diff --git a/include/qemu/drm.h b/include/qemu/drm.h new file mode 100644 index 000000000..fab74d4be --- /dev/null +++ b/include/qemu/drm.h @@ -0,0 +1,6 @@ +#ifndef QEMU_DRM_H +#define QEMU_DRM_H + +int qemu_drm_rendernode_open(const char *rendernode); + +#endif diff --git a/include/qemu/envlist.h b/include/qemu/envlist.h new file mode 100644 index 000000000..b9addcc11 --- /dev/null +++ b/include/qemu/envlist.h @@ -0,0 +1,22 @@ +#ifndef ENVLIST_H +#define ENVLIST_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct envlist envlist_t; + +envlist_t *envlist_create(void); +void envlist_free(envlist_t *); +int envlist_setenv(envlist_t *, const char *); +int envlist_unsetenv(envlist_t *, const char *); +int envlist_parse_set(envlist_t *, const char *); +int envlist_parse_unset(envlist_t *, const char *); +char **envlist_to_environ(const envlist_t *, size_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* ENVLIST_H */ diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h new file mode 100644 index 000000000..a5ad95ff1 --- /dev/null +++ b/include/qemu/error-report.h @@ -0,0 +1,81 @@ +/* + * Error reporting + * + * Copyright (C) 2010 Red Hat Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_ERROR_REPORT_H +#define QEMU_ERROR_REPORT_H + +typedef struct Location { + /* all members are private to qemu-error.c */ + enum { LOC_NONE, LOC_CMDLINE, LOC_FILE } kind; + int num; + const void *ptr; + struct Location *prev; +} Location; + +Location *loc_push_restore(Location *loc); +Location *loc_push_none(Location *loc); +Location *loc_pop(Location *loc); +Location *loc_save(Location *loc); +void loc_restore(Location *loc); +void loc_set_none(void); +void loc_set_cmdline(char **argv, int idx, int cnt); +void loc_set_file(const char *fname, int lno); + +int error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); +int error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2); +int error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); +int error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); +void warn_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); +void info_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); + +void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2); +void warn_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2); +void info_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +bool error_report_once_cond(bool *printed, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); +bool warn_report_once_cond(bool *printed, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +void error_init(const char *argv0); + +/* + * Similar to error_report(), except it prints the message just once. + * Return true when it prints, false otherwise. + */ +#define error_report_once(fmt, ...) \ + ({ \ + static bool print_once_; \ + error_report_once_cond(&print_once_, \ + fmt, ##__VA_ARGS__); \ + }) + +/* + * Similar to warn_report(), except it prints the message just once. + * Return true when it prints, false otherwise. + */ +#define warn_report_once(fmt, ...) \ + ({ \ + static bool print_once_; \ + warn_report_once_cond(&print_once_, \ + fmt, ##__VA_ARGS__); \ + }) + +const char *error_get_progname(void); + +extern bool error_with_timestamp; +extern bool error_with_guestname; +extern const char *error_guest_name; + +#endif diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h new file mode 100644 index 000000000..3380b662f --- /dev/null +++ b/include/qemu/event_notifier.h @@ -0,0 +1,44 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_EVENT_NOTIFIER_H +#define QEMU_EVENT_NOTIFIER_H + + +#ifdef _WIN32 +#include +#endif + +struct EventNotifier { +#ifdef _WIN32 + HANDLE event; +#else + int rfd; + int wfd; +#endif +}; + +typedef void EventNotifierHandler(EventNotifier *); + +int event_notifier_init(EventNotifier *, int active); +void event_notifier_cleanup(EventNotifier *); +int event_notifier_set(EventNotifier *); +int event_notifier_test_and_clear(EventNotifier *); + +#ifdef CONFIG_POSIX +void event_notifier_init_fd(EventNotifier *, int fd); +int event_notifier_get_fd(const EventNotifier *); +#else +HANDLE event_notifier_get_handle(EventNotifier *); +#endif + +#endif diff --git a/include/qemu/fifo32.h b/include/qemu/fifo32.h new file mode 100644 index 000000000..4e9fd1b5e --- /dev/null +++ b/include/qemu/fifo32.h @@ -0,0 +1,190 @@ +/* + * Generic FIFO32 component, based on FIFO8. + * + * Copyright (c) 2016 Jean-Christophe Dubois + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef FIFO32_H +#define FIFO32_H + +#include "qemu/fifo8.h" + +typedef struct { + Fifo8 fifo; +} Fifo32; + +/** + * fifo32_create: + * @fifo: struct Fifo32 to initialise with new FIFO + * @capacity: capacity of the newly created FIFO expressed in 32 bit words + * + * Create a FIFO of the specified size. Clients should call fifo32_destroy() + * when finished using the fifo. The FIFO is initially empty. + */ + +static inline void fifo32_create(Fifo32 *fifo, uint32_t capacity) +{ + fifo8_create(&fifo->fifo, capacity * sizeof(uint32_t)); +} + +/** + * fifo32_destroy: + * @fifo: FIFO to cleanup + * + * Cleanup a FIFO created with fifo32_create(). Frees memory created for FIFO + * storage. The FIFO is no longer usable after this has been called. + */ + +static inline void fifo32_destroy(Fifo32 *fifo) +{ + fifo8_destroy(&fifo->fifo); +} + +/** + * fifo32_num_free: + * @fifo: FIFO to check + * + * Return the number of free uint32_t slots in the FIFO. + * + * Returns: Number of free 32 bit words. + */ + +static inline uint32_t fifo32_num_free(Fifo32 *fifo) +{ + return DIV_ROUND_UP(fifo8_num_free(&fifo->fifo), sizeof(uint32_t)); +} + +/** + * fifo32_num_used: + * @fifo: FIFO to check + * + * Return the number of used uint32_t slots in the FIFO. + * + * Returns: Number of used 32 bit words. + */ + +static inline uint32_t fifo32_num_used(Fifo32 *fifo) +{ + return DIV_ROUND_UP(fifo8_num_used(&fifo->fifo), sizeof(uint32_t)); +} + +/** + * fifo32_push: + * @fifo: FIFO to push to + * @data: 32 bits data word to push + * + * Push a 32 bits data word to the FIFO. Behaviour is undefined if the FIFO + * is full. Clients are responsible for checking for fullness using + * fifo32_is_full(). + */ + +static inline void fifo32_push(Fifo32 *fifo, uint32_t data) +{ + int i; + + for (i = 0; i < sizeof(data); i++) { + fifo8_push(&fifo->fifo, data & 0xff); + data >>= 8; + } +} + +/** + * fifo32_push_all: + * @fifo: FIFO to push to + * @data: data to push + * @size: number of 32 bit words to push + * + * Push a 32 bit word array to the FIFO. Behaviour is undefined if the FIFO + * is full. Clients are responsible for checking the space left in the FIFO + * using fifo32_num_free(). + */ + +static inline void fifo32_push_all(Fifo32 *fifo, const uint32_t *data, + uint32_t num) +{ + int i; + + for (i = 0; i < num; i++) { + fifo32_push(fifo, data[i]); + } +} + +/** + * fifo32_pop: + * @fifo: fifo to pop from + * + * Pop a 32 bits data word from the FIFO. Behaviour is undefined if the FIFO + * is empty. Clients are responsible for checking for emptiness using + * fifo32_is_empty(). + * + * Returns: The popped 32 bits data word. + */ + +static inline uint32_t fifo32_pop(Fifo32 *fifo) +{ + uint32_t ret = 0; + int i; + + for (i = 0; i < sizeof(uint32_t); i++) { + ret |= (fifo8_pop(&fifo->fifo) << (i * 8)); + } + + return ret; +} + +/** + * There is no fifo32_pop_buf() because the data is not stored in the buffer + * as a set of native-order words. + */ + +/** + * fifo32_reset: + * @fifo: FIFO to reset + * + * Reset a FIFO. All data is discarded and the FIFO is emptied. + */ + +static inline void fifo32_reset(Fifo32 *fifo) +{ + fifo8_reset(&fifo->fifo); +} + +/** + * fifo32_is_empty: + * @fifo: FIFO to check + * + * Check if a FIFO is empty. + * + * Returns: True if the fifo is empty, false otherwise. + */ + +static inline bool fifo32_is_empty(Fifo32 *fifo) +{ + return fifo8_is_empty(&fifo->fifo); +} + +/** + * fifo32_is_full: + * @fifo: FIFO to check + * + * Check if a FIFO is full. + * + * Returns: True if the fifo is full, false otherwise. + */ + +static inline bool fifo32_is_full(Fifo32 *fifo) +{ + return fifo8_num_free(&fifo->fifo) < sizeof(uint32_t); +} + +#define VMSTATE_FIFO32(_field, _state) VMSTATE_FIFO8(_field.fifo, _state) + +#endif /* FIFO32_H */ diff --git a/include/qemu/fifo8.h b/include/qemu/fifo8.h new file mode 100644 index 000000000..489c35429 --- /dev/null +++ b/include/qemu/fifo8.h @@ -0,0 +1,159 @@ +#ifndef QEMU_FIFO8_H +#define QEMU_FIFO8_H + + +typedef struct { + /* All fields are private */ + uint8_t *data; + uint32_t capacity; + uint32_t head; + uint32_t num; +} Fifo8; + +/** + * fifo8_create: + * @fifo: struct Fifo8 to initialise with new FIFO + * @capacity: capacity of the newly created FIFO + * + * Create a FIFO of the specified size. Clients should call fifo8_destroy() + * when finished using the fifo. The FIFO is initially empty. + */ + +void fifo8_create(Fifo8 *fifo, uint32_t capacity); + +/** + * fifo8_destroy: + * @fifo: FIFO to cleanup + * + * Cleanup a FIFO created with fifo8_create(). Frees memory created for FIFO + *storage. The FIFO is no longer usable after this has been called. + */ + +void fifo8_destroy(Fifo8 *fifo); + +/** + * fifo8_push: + * @fifo: FIFO to push to + * @data: data byte to push + * + * Push a data byte to the FIFO. Behaviour is undefined if the FIFO is full. + * Clients are responsible for checking for fullness using fifo8_is_full(). + */ + +void fifo8_push(Fifo8 *fifo, uint8_t data); + +/** + * fifo8_push_all: + * @fifo: FIFO to push to + * @data: data to push + * @size: number of bytes to push + * + * Push a byte array to the FIFO. Behaviour is undefined if the FIFO is full. + * Clients are responsible for checking the space left in the FIFO using + * fifo8_num_free(). + */ + +void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num); + +/** + * fifo8_pop: + * @fifo: fifo to pop from + * + * Pop a data byte from the FIFO. Behaviour is undefined if the FIFO is empty. + * Clients are responsible for checking for emptyness using fifo8_is_empty(). + * + * Returns: The popped data byte. + */ + +uint8_t fifo8_pop(Fifo8 *fifo); + +/** + * fifo8_pop_buf: + * @fifo: FIFO to pop from + * @max: maximum number of bytes to pop + * @num: actual number of returned bytes + * + * Pop a number of elements from the FIFO up to a maximum of max. The buffer + * containing the popped data is returned. This buffer points directly into + * the FIFO backing store and data is invalidated once any of the fifo8_* APIs + * are called on the FIFO. + * + * The function may return fewer bytes than requested when the data wraps + * around in the ring buffer; in this case only a contiguous part of the data + * is returned. + * + * The number of valid bytes returned is populated in *num; will always return + * at least 1 byte. max must not be 0 or greater than the number of bytes in + * the FIFO. + * + * Clients are responsible for checking the availability of requested data + * using fifo8_num_used(). + * + * Returns: A pointer to popped data. + */ +const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num); + +/** + * fifo8_reset: + * @fifo: FIFO to reset + * + * Reset a FIFO. All data is discarded and the FIFO is emptied. + */ + +void fifo8_reset(Fifo8 *fifo); + +/** + * fifo8_is_empty: + * @fifo: FIFO to check + * + * Check if a FIFO is empty. + * + * Returns: True if the fifo is empty, false otherwise. + */ + +bool fifo8_is_empty(Fifo8 *fifo); + +/** + * fifo8_is_full: + * @fifo: FIFO to check + * + * Check if a FIFO is full. + * + * Returns: True if the fifo is full, false otherwise. + */ + +bool fifo8_is_full(Fifo8 *fifo); + +/** + * fifo8_num_free: + * @fifo: FIFO to check + * + * Return the number of free bytes in the FIFO. + * + * Returns: Number of free bytes. + */ + +uint32_t fifo8_num_free(Fifo8 *fifo); + +/** + * fifo8_num_used: + * @fifo: FIFO to check + * + * Return the number of used bytes in the FIFO. + * + * Returns: Number of used bytes. + */ + +uint32_t fifo8_num_used(Fifo8 *fifo); + +extern const VMStateDescription vmstate_fifo8; + +#define VMSTATE_FIFO8(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(Fifo8), \ + .vmsd = &vmstate_fifo8, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, Fifo8), \ +} + +#endif /* QEMU_FIFO8_H */ diff --git a/include/qemu/filemonitor.h b/include/qemu/filemonitor.h new file mode 100644 index 000000000..8715d5c04 --- /dev/null +++ b/include/qemu/filemonitor.h @@ -0,0 +1,127 @@ +/* + * QEMU file monitor helper + * + * Copyright (c) 2018 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef QEMU_FILEMONITOR_H +#define QEMU_FILEMONITOR_H + + + +typedef struct QFileMonitor QFileMonitor; + +typedef enum { + /* File has been created in a dir */ + QFILE_MONITOR_EVENT_CREATED, + /* File has been modified in a dir */ + QFILE_MONITOR_EVENT_MODIFIED, + /* File has been deleted in a dir */ + QFILE_MONITOR_EVENT_DELETED, + /* File has attributes changed */ + QFILE_MONITOR_EVENT_ATTRIBUTES, + /* Dir is no longer being monitored (due to deletion) */ + QFILE_MONITOR_EVENT_IGNORED, +} QFileMonitorEvent; + + +/** + * QFileMonitorHandler: + * @id: id from qemu_file_monitor_add_watch() + * @event: the file change that occurred + * @filename: the name of the file affected + * @opaque: opaque data provided to qemu_file_monitor_add_watch() + * + * Invoked whenever a file changes. If @event is + * QFILE_MONITOR_EVENT_IGNORED, @filename will be + * empty. + * + */ +typedef void (*QFileMonitorHandler)(int64_t id, + QFileMonitorEvent event, + const char *filename, + void *opaque); + +/** + * qemu_file_monitor_new: + * @errp: pointer to a NULL-initialized error object + * + * Create a handle for a file monitoring object. + * + * This object does locking internally to enable it to be + * safe to use from multiple threads + * + * If the platform does not support file monitoring, an + * error will be reported. Likewise if file monitoring + * is supported, but cannot be initialized + * + * Currently this is implemented on Linux platforms with + * the inotify subsystem. + * + * Returns: the new monitoring object, or NULL on error + */ +QFileMonitor *qemu_file_monitor_new(Error **errp); + +/** + * qemu_file_monitor_free: + * @mon: the file monitor context + * + * Free resources associated with the file monitor, + * including any currently registered watches. + */ +void qemu_file_monitor_free(QFileMonitor *mon); + +/** + * qemu_file_monitor_add_watch: + * @mon: the file monitor context + * @dirpath: the directory whose contents to watch + * @filename: optional filename to filter on + * @cb: the function to invoke when @dirpath has changes + * @opaque: data to pass to @cb + * @errp: pointer to a NULL-initialized error object + * + * Register to receive notifications of changes + * in the directory @dirpath. All files in the + * directory will be monitored. If the caller is + * only interested in one specific file, @filename + * can be used to filter events. + * + * Returns: a positive integer watch ID, or -1 on error + */ +int64_t qemu_file_monitor_add_watch(QFileMonitor *mon, + const char *dirpath, + const char *filename, + QFileMonitorHandler cb, + void *opaque, + Error **errp); + +/** + * qemu_file_monitor_remove_watch: + * @mon: the file monitor context + * @dirpath: the directory whose contents to unwatch + * @id: id of the watch to remove + * + * Removes the file monitoring watch @id, associated + * with the directory @dirpath. This must never be + * called from a QFileMonitorHandler callback, or a + * deadlock will result. + */ +void qemu_file_monitor_remove_watch(QFileMonitor *mon, + const char *dirpath, + int64_t id); + +#endif /* QEMU_FILEMONITOR_H */ diff --git a/include/qemu/futex.h b/include/qemu/futex.h new file mode 100644 index 000000000..91ae88966 --- /dev/null +++ b/include/qemu/futex.h @@ -0,0 +1,41 @@ +/* + * Wrappers around Linux futex syscall + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_FUTEX_H +#define QEMU_FUTEX_H + +#include +#include + +#define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void qemu_futex_wake(void *f, int n) +{ + qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + while (qemu_futex(f, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } +} + +#endif /* QEMU_FUTEX_H */ diff --git a/include/qemu/guest-random.h b/include/qemu/guest-random.h new file mode 100644 index 000000000..09ff9c223 --- /dev/null +++ b/include/qemu/guest-random.h @@ -0,0 +1,68 @@ +/* + * QEMU guest-visible random functions + * + * Copyright 2019 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#ifndef QEMU_GUEST_RANDOM_H +#define QEMU_GUEST_RANDOM_H + +/** + * qemu_guest_random_seed_main(const char *optarg, Error **errp) + * @optarg: a non-NULL pointer to a C string + * @errp: an error indicator + * + * The @optarg value is that which accompanies the -seed argument. + * This forces qemu_guest_getrandom into deterministic mode. + * + * Returns 0 on success, < 0 on failure while setting *errp. + */ +int qemu_guest_random_seed_main(const char *optarg, Error **errp); + +/** + * qemu_guest_random_seed_thread_part1(void) + * + * If qemu_getrandom is in deterministic mode, returns an + * independent seed for the new thread. Otherwise returns 0. + */ +uint64_t qemu_guest_random_seed_thread_part1(void); + +/** + * qemu_guest_random_seed_thread_part2(uint64_t seed) + * @seed: a value for the new thread. + * + * If qemu_guest_getrandom is in deterministic mode, this stores an + * independent seed for the new thread. Otherwise a no-op. + */ +void qemu_guest_random_seed_thread_part2(uint64_t seed); + +/** + * qemu_guest_getrandom(void *buf, size_t len, Error **errp) + * @buf: a buffer of bytes to be written + * @len: the number of bytes in @buf + * @errp: an error indicator + * + * Fills len bytes in buf with random data. This should only be used + * for data presented to the guest. Host-side crypto services should + * use qcrypto_random_bytes. + * + * Returns 0 on success, < 0 on failure while setting *errp. + */ +int qemu_guest_getrandom(void *buf, size_t len, Error **errp); + +/** + * qemu_guest_getrandom_nofail(void *buf, size_t len) + * @buf: a buffer of bytes to be written + * @len: the number of bytes in @buf + * + * Like qemu_guest_getrandom, but will assert for failure. + * Use this when there is no reasonable recovery. + */ +void qemu_guest_getrandom_nofail(void *buf, size_t len); + +#endif /* QEMU_GUEST_RANDOM_H */ diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h new file mode 100644 index 000000000..5e71b6d6f --- /dev/null +++ b/include/qemu/hbitmap.h @@ -0,0 +1,352 @@ +/* + * Hierarchical Bitmap Data Type + * + * Copyright Red Hat, Inc., 2012 + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef HBITMAP_H +#define HBITMAP_H + +#include "bitops.h" +#include "host-utils.h" + +typedef struct HBitmap HBitmap; +typedef struct HBitmapIter HBitmapIter; + +#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6) + +/* For 32-bit, the largest that fits in a 4 GiB address space. + * For 64-bit, the number of sectors in 1 PiB. Good luck, in + * either case... :) + */ +#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41) + +/* We need to place a sentinel in level 0 to speed up iteration. Thus, + * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The + * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE + * is an exact multiple of BITS_PER_LEVEL. + */ +#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1) + +struct HBitmapIter { + const HBitmap *hb; + + /* Copied from hb for access in the inline functions (hb is opaque). */ + int granularity; + + /* Entry offset into the last-level array of longs. */ + size_t pos; + + /* The currently-active path in the tree. Each item of cur[i] stores + * the bits (i.e. the subtrees) yet to be processed under that node. + */ + unsigned long cur[HBITMAP_LEVELS]; +}; + +/** + * hbitmap_alloc: + * @size: Number of bits in the bitmap. + * @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity + * bits will be represented by a single bit. Each operation on a + * range of bits first rounds the bits to determine which group they land + * in, and then affect the entire set; iteration will only visit the first + * bit of each group. + * + * Allocate a new HBitmap. + */ +HBitmap *hbitmap_alloc(uint64_t size, int granularity); + +/** + * hbitmap_truncate: + * @hb: The bitmap to change the size of. + * @size: The number of elements to change the bitmap to accommodate. + * + * truncate or grow an existing bitmap to accommodate a new number of elements. + * This may invalidate existing HBitmapIterators. + */ +void hbitmap_truncate(HBitmap *hb, uint64_t size); + +/** + * hbitmap_merge: + * + * Store result of merging @a and @b into @result. + * @result is allowed to be equal to @a or @b. + * + * Return true if the merge was successful, + * false if it was not attempted. + */ +bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result); + +/** + * hbitmap_can_merge: + * + * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and + * necessary for hbitmap_merge will not fail. + * + */ +bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b); + +/** + * hbitmap_empty: + * @hb: HBitmap to operate on. + * + * Return whether the bitmap is empty. + */ +bool hbitmap_empty(const HBitmap *hb); + +/** + * hbitmap_granularity: + * @hb: HBitmap to operate on. + * + * Return the granularity of the HBitmap. + */ +int hbitmap_granularity(const HBitmap *hb); + +/** + * hbitmap_count: + * @hb: HBitmap to operate on. + * + * Return the number of bits set in the HBitmap. + */ +uint64_t hbitmap_count(const HBitmap *hb); + +/** + * hbitmap_set: + * @hb: HBitmap to operate on. + * @start: First bit to set (0-based). + * @count: Number of bits to set. + * + * Set a consecutive range of bits in an HBitmap. + */ +void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); + +/** + * hbitmap_reset: + * @hb: HBitmap to operate on. + * @start: First bit to reset (0-based). + * @count: Number of bits to reset. + * + * Reset a consecutive range of bits in an HBitmap. + * @start and @count must be aligned to bitmap granularity. The only exception + * is resetting the tail of the bitmap: @count may be equal to hb->orig_size - + * @start, in this case @count may be not aligned. The sum of @start + @count is + * allowed to be greater than hb->orig_size, but only if @start < hb->orig_size + * and @start + @count = ALIGN_UP(hb->orig_size, granularity). + */ +void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); + +/** + * hbitmap_reset_all: + * @hb: HBitmap to operate on. + * + * Reset all bits in an HBitmap. + */ +void hbitmap_reset_all(HBitmap *hb); + +/** + * hbitmap_get: + * @hb: HBitmap to operate on. + * @item: Bit to query (0-based). + * + * Return whether the @item-th bit in an HBitmap is set. + */ +bool hbitmap_get(const HBitmap *hb, uint64_t item); + +/** + * hbitmap_is_serializable: + * @hb: HBitmap which should be (de-)serialized. + * + * Returns whether the bitmap can actually be (de-)serialized. Other + * (de-)serialization functions may only be invoked if this function returns + * true. + * + * Calling (de-)serialization functions does not affect a bitmap's + * (de-)serializability. + */ +bool hbitmap_is_serializable(const HBitmap *hb); + +/** + * hbitmap_serialization_align: + * @hb: HBitmap to operate on. + * + * Required alignment of serialization chunks, used by other serialization + * functions. For every chunk: + * 1. Chunk start should be aligned to this granularity. + * 2. Chunk size should be aligned too, except for last chunk (for which + * start + count == hb->size) + */ +uint64_t hbitmap_serialization_align(const HBitmap *hb); + +/** + * hbitmap_serialization_size: + * @hb: HBitmap to operate on. + * @start: Starting bit + * @count: Number of bits + * + * Return number of bytes hbitmap_(de)serialize_part needs + */ +uint64_t hbitmap_serialization_size(const HBitmap *hb, + uint64_t start, uint64_t count); + +/** + * hbitmap_serialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to store serialized bitmap. + * @start: First bit to store. + * @count: Number of bits to store. + * + * Stores HBitmap data corresponding to given region. The format of saved data + * is linear sequence of bits, so it can be used by hbitmap_deserialize_part + * independently of endianness and size of HBitmap level array elements + */ +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count); + +/** + * hbitmap_deserialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to restore bitmap data from. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Restores HBitmap data corresponding to given region. The format is the same + * as for hbitmap_serialize_part. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_zeroes + * @hb: HBitmap to operate on. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Fills the bitmap with zeroes. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_ones + * @hb: HBitmap to operate on. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Fills the bitmap with ones. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_finish + * @hb: HBitmap to operate on. + * + * Repair HBitmap after calling hbitmap_deserialize_data. Actually, all HBitmap + * layers are restored here. + */ +void hbitmap_deserialize_finish(HBitmap *hb); + +/** + * hbitmap_sha256: + * @bitmap: HBitmap to operate on. + * + * Returns SHA256 hash of the last level. + */ +char *hbitmap_sha256(const HBitmap *bitmap, Error **errp); + +/** + * hbitmap_free: + * @hb: HBitmap to operate on. + * + * Free an HBitmap and all of its associated memory. + */ +void hbitmap_free(HBitmap *hb); + +/** + * hbitmap_iter_init: + * @hbi: HBitmapIter to initialize. + * @hb: HBitmap to iterate on. + * @first: First bit to visit (0-based, must be strictly less than the + * size of the bitmap). + * + * Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return + * the lowest-numbered bit that is set in @hb, starting at @first. + * + * Concurrent setting of bits is acceptable, and will at worst cause the + * iteration to miss some of those bits. + * + * The concurrent resetting of bits is OK. + */ +void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first); + +/* + * hbitmap_next_dirty: + * + * Find next dirty bit within selected range. If not found, return -1. + * + * @hb: The HBitmap to operate on + * @start: The bit to start from. + * @count: Number of bits to proceed. If @start+@count > bitmap size, the whole + * bitmap is looked through. You can use INT64_MAX as @count to search up to + * the bitmap end. + */ +int64_t hbitmap_next_dirty(const HBitmap *hb, int64_t start, int64_t count); + +/* hbitmap_next_zero: + * + * Find next not dirty bit within selected range. If not found, return -1. + * + * @hb: The HBitmap to operate on + * @start: The bit to start from. + * @count: Number of bits to proceed. If @start+@count > bitmap size, the whole + * bitmap is looked through. You can use INT64_MAX as @count to search up to + * the bitmap end. + */ +int64_t hbitmap_next_zero(const HBitmap *hb, int64_t start, int64_t count); + +/* hbitmap_next_dirty_area: + * @hb: The HBitmap to operate on + * @start: the offset to start from + * @end: end of requested area + * @max_dirty_count: limit for out parameter dirty_count + * @dirty_start: on success: start of found area + * @dirty_count: on success: length of found area + * + * If dirty area found within [@start, @end), returns true and sets + * @dirty_start and @dirty_count appropriately. @dirty_count will not exceed + * @max_dirty_count. + * If dirty area was not found, returns false and leaves @dirty_start and + * @dirty_count unchanged. + */ +bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end, + int64_t max_dirty_count, + int64_t *dirty_start, int64_t *dirty_count); + +/** + * hbitmap_iter_next: + * @hbi: HBitmapIter to operate on. + * + * Return the next bit that is set in @hbi's associated HBitmap, + * or -1 if all remaining bits are zero. + */ +int64_t hbitmap_iter_next(HBitmapIter *hbi); + +#endif diff --git a/include/qemu/help_option.h b/include/qemu/help_option.h new file mode 100644 index 000000000..ca6389a15 --- /dev/null +++ b/include/qemu/help_option.h @@ -0,0 +1,33 @@ +#ifndef QEMU_HELP_OPTION_H +#define QEMU_HELP_OPTION_H + +/** + * is_help_option: + * @s: string to test + * + * Check whether @s is one of the standard strings which indicate + * that the user is asking for a list of the valid values for a + * command option like -cpu or -M. The current accepted strings + * are 'help' and '?'. '?' is deprecated (it is a shell wildcard + * which makes it annoying to use in a reliable way) but provided + * for backwards compatibility. + * + * Returns: true if @s is a request for a list. + */ +static inline bool is_help_option(const char *s) +{ + return !strcmp(s, "?") || !strcmp(s, "help"); +} + +static inline int starts_with_help_option(const char *s) +{ + if (*s == '?') { + return 1; + } + if (g_str_has_prefix(s, "help")) { + return 4; + } + return 0; +} + +#endif diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h new file mode 100644 index 000000000..cdca2991d --- /dev/null +++ b/include/qemu/host-utils.h @@ -0,0 +1,440 @@ +/* + * Utility compute operations used by translated code. + * + * Copyright (c) 2007 Thiemo Seufer + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HOST_UTILS_H +#define HOST_UTILS_H + +#include "qemu/bswap.h" + +#ifdef CONFIG_INT128 +static inline void mulu64(uint64_t *plow, uint64_t *phigh, + uint64_t a, uint64_t b) +{ + __uint128_t r = (__uint128_t)a * b; + *plow = r; + *phigh = r >> 64; +} + +static inline void muls64(uint64_t *plow, uint64_t *phigh, + int64_t a, int64_t b) +{ + __int128_t r = (__int128_t)a * b; + *plow = r; + *phigh = r >> 64; +} + +/* compute with 96 bit intermediate result: (a*b)/c */ +static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + return (__int128_t)a * b / c; +} + +static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) +{ + if (divisor == 0) { + return 1; + } else { + __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; + __uint128_t result = dividend / divisor; + *plow = result; + *phigh = dividend % divisor; + return result > UINT64_MAX; + } +} + +static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) +{ + if (divisor == 0) { + return 1; + } else { + __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; + __int128_t result = dividend / divisor; + *plow = result; + *phigh = dividend % divisor; + return result != *plow; + } +} +#else +void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); +void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); +int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); +int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); + +static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + union { + uint64_t ll; + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t high, low; +#else + uint32_t low, high; +#endif + } l; + } u, res; + uint64_t rl, rh; + + u.ll = a; + rl = (uint64_t)u.l.low * (uint64_t)b; + rh = (uint64_t)u.l.high * (uint64_t)b; + rh += (rl >> 32); + res.l.high = rh / c; + res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; + return res.ll; +} +#endif + +/** + * clz32 - count leading zeros in a 32-bit value. + * @val: The value to search + * + * Returns 32 if the value is zero. Note that the GCC builtin is + * undefined if the value is zero. + */ +static inline int clz32(uint32_t val) +{ + return val ? __builtin_clz(val) : 32; +} + +/** + * clo32 - count leading ones in a 32-bit value. + * @val: The value to search + * + * Returns 32 if the value is -1. + */ +static inline int clo32(uint32_t val) +{ + return clz32(~val); +} + +/** + * clz64 - count leading zeros in a 64-bit value. + * @val: The value to search + * + * Returns 64 if the value is zero. Note that the GCC builtin is + * undefined if the value is zero. + */ +static inline int clz64(uint64_t val) +{ + return val ? __builtin_clzll(val) : 64; +} + +/** + * clo64 - count leading ones in a 64-bit value. + * @val: The value to search + * + * Returns 64 if the value is -1. + */ +static inline int clo64(uint64_t val) +{ + return clz64(~val); +} + +/** + * ctz32 - count trailing zeros in a 32-bit value. + * @val: The value to search + * + * Returns 32 if the value is zero. Note that the GCC builtin is + * undefined if the value is zero. + */ +static inline int ctz32(uint32_t val) +{ + return val ? __builtin_ctz(val) : 32; +} + +/** + * cto32 - count trailing ones in a 32-bit value. + * @val: The value to search + * + * Returns 32 if the value is -1. + */ +static inline int cto32(uint32_t val) +{ + return ctz32(~val); +} + +/** + * ctz64 - count trailing zeros in a 64-bit value. + * @val: The value to search + * + * Returns 64 if the value is zero. Note that the GCC builtin is + * undefined if the value is zero. + */ +static inline int ctz64(uint64_t val) +{ + return val ? __builtin_ctzll(val) : 64; +} + +/** + * cto64 - count trailing ones in a 64-bit value. + * @val: The value to search + * + * Returns 64 if the value is -1. + */ +static inline int cto64(uint64_t val) +{ + return ctz64(~val); +} + +/** + * clrsb32 - count leading redundant sign bits in a 32-bit value. + * @val: The value to search + * + * Returns the number of bits following the sign bit that are equal to it. + * No special cases; output range is [0-31]. + */ +static inline int clrsb32(uint32_t val) +{ +#if __has_builtin(__builtin_clrsb) || !defined(__clang__) + return __builtin_clrsb(val); +#else + return clz32(val ^ ((int32_t)val >> 1)) - 1; +#endif +} + +/** + * clrsb64 - count leading redundant sign bits in a 64-bit value. + * @val: The value to search + * + * Returns the number of bits following the sign bit that are equal to it. + * No special cases; output range is [0-63]. + */ +static inline int clrsb64(uint64_t val) +{ +#if __has_builtin(__builtin_clrsbll) || !defined(__clang__) + return __builtin_clrsbll(val); +#else + return clz64(val ^ ((int64_t)val >> 1)) - 1; +#endif +} + +/** + * ctpop8 - count the population of one bits in an 8-bit value. + * @val: The value to search + */ +static inline int ctpop8(uint8_t val) +{ + return __builtin_popcount(val); +} + +/** + * ctpop16 - count the population of one bits in a 16-bit value. + * @val: The value to search + */ +static inline int ctpop16(uint16_t val) +{ + return __builtin_popcount(val); +} + +/** + * ctpop32 - count the population of one bits in a 32-bit value. + * @val: The value to search + */ +static inline int ctpop32(uint32_t val) +{ + return __builtin_popcount(val); +} + +/** + * ctpop64 - count the population of one bits in a 64-bit value. + * @val: The value to search + */ +static inline int ctpop64(uint64_t val) +{ + return __builtin_popcountll(val); +} + +/** + * revbit8 - reverse the bits in an 8-bit value. + * @x: The value to modify. + */ +static inline uint8_t revbit8(uint8_t x) +{ + /* Assign the correct nibble position. */ + x = ((x & 0xf0) >> 4) + | ((x & 0x0f) << 4); + /* Assign the correct bit position. */ + x = ((x & 0x88) >> 3) + | ((x & 0x44) >> 1) + | ((x & 0x22) << 1) + | ((x & 0x11) << 3); + return x; +} + +/** + * revbit16 - reverse the bits in a 16-bit value. + * @x: The value to modify. + */ +static inline uint16_t revbit16(uint16_t x) +{ + /* Assign the correct byte position. */ + x = bswap16(x); + /* Assign the correct nibble position. */ + x = ((x & 0xf0f0) >> 4) + | ((x & 0x0f0f) << 4); + /* Assign the correct bit position. */ + x = ((x & 0x8888) >> 3) + | ((x & 0x4444) >> 1) + | ((x & 0x2222) << 1) + | ((x & 0x1111) << 3); + return x; +} + +/** + * revbit32 - reverse the bits in a 32-bit value. + * @x: The value to modify. + */ +static inline uint32_t revbit32(uint32_t x) +{ + /* Assign the correct byte position. */ + x = bswap32(x); + /* Assign the correct nibble position. */ + x = ((x & 0xf0f0f0f0u) >> 4) + | ((x & 0x0f0f0f0fu) << 4); + /* Assign the correct bit position. */ + x = ((x & 0x88888888u) >> 3) + | ((x & 0x44444444u) >> 1) + | ((x & 0x22222222u) << 1) + | ((x & 0x11111111u) << 3); + return x; +} + +/** + * revbit64 - reverse the bits in a 64-bit value. + * @x: The value to modify. + */ +static inline uint64_t revbit64(uint64_t x) +{ + /* Assign the correct byte position. */ + x = bswap64(x); + /* Assign the correct nibble position. */ + x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) + | ((x & 0x0f0f0f0f0f0f0f0full) << 4); + /* Assign the correct bit position. */ + x = ((x & 0x8888888888888888ull) >> 3) + | ((x & 0x4444444444444444ull) >> 1) + | ((x & 0x2222222222222222ull) << 1) + | ((x & 0x1111111111111111ull) << 3); + return x; +} + +/* Host type specific sizes of these routines. */ + +#if ULONG_MAX == UINT32_MAX +# define clzl clz32 +# define ctzl ctz32 +# define clol clo32 +# define ctol cto32 +# define ctpopl ctpop32 +# define revbitl revbit32 +#elif ULONG_MAX == UINT64_MAX +# define clzl clz64 +# define ctzl ctz64 +# define clol clo64 +# define ctol cto64 +# define ctpopl ctpop64 +# define revbitl revbit64 +#else +# error Unknown sizeof long +#endif + +static inline bool is_power_of_2(uint64_t value) +{ + if (!value) { + return false; + } + + return !(value & (value - 1)); +} + +/** + * Return @value rounded down to the nearest power of two or zero. + */ +static inline uint64_t pow2floor(uint64_t value) +{ + if (!value) { + /* Avoid undefined shift by 64 */ + return 0; + } + return 0x8000000000000000ull >> clz64(value); +} + +/* + * Return @value rounded up to the nearest power of two modulo 2^64. + * This is *zero* for @value > 2^63, so be careful. + */ +static inline uint64_t pow2ceil(uint64_t value) +{ + int n = clz64(value - 1); + + if (!n) { + /* + * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63 + * Therefore, either @value == 0 or @value > 2^63. + * If it's 0, return 1, else return 0. + */ + return !value; + } + return 0x8000000000000000ull >> (n - 1); +} + +static inline uint32_t pow2roundup32(uint32_t x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + +/** + * urshift - 128-bit Unsigned Right Shift. + * @plow: in/out - lower 64-bit integer. + * @phigh: in/out - higher 64-bit integer. + * @shift: in - bytes to shift, between 0 and 127. + * + * Result is zero-extended and stored in plow/phigh, which are + * input/output variables. Shift values outside the range will + * be mod to 128. In other words, the caller is responsible to + * verify/assert both the shift range and plow/phigh pointers. + */ +void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); + +/** + * ulshift - 128-bit Unsigned Left Shift. + * @plow: in/out - lower 64-bit integer. + * @phigh: in/out - higher 64-bit integer. + * @shift: in - bytes to shift, between 0 and 127. + * @overflow: out - true if any 1-bit is shifted out. + * + * Result is zero-extended and stored in plow/phigh, which are + * input/output variables. Shift values outside the range will + * be mod to 128. In other words, the caller is responsible to + * verify/assert both the shift range and plow/phigh pointers. + */ +void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); + +#endif diff --git a/include/qemu/id.h b/include/qemu/id.h new file mode 100644 index 000000000..b55c406e6 --- /dev/null +++ b/include/qemu/id.h @@ -0,0 +1,14 @@ +#ifndef QEMU_ID_H +#define QEMU_ID_H + +typedef enum IdSubSystems { + ID_QDEV, + ID_BLOCK, + ID_CHR, + ID_MAX /* last element, used as array size */ +} IdSubSystems; + +char *id_generate(IdSubSystems id); +bool id_wellformed(const char *id); + +#endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h new file mode 100644 index 000000000..76ea40592 --- /dev/null +++ b/include/qemu/int128.h @@ -0,0 +1,321 @@ +#ifndef INT128_H +#define INT128_H + +#ifdef CONFIG_INT128 +#include "qemu/bswap.h" + +typedef __int128_t Int128; + +static inline Int128 int128_make64(uint64_t a) +{ + return a; +} + +static inline Int128 int128_make128(uint64_t lo, uint64_t hi) +{ + return (__uint128_t)hi << 64 | lo; +} + +static inline uint64_t int128_get64(Int128 a) +{ + uint64_t r = a; + assert(r == a); + return r; +} + +static inline uint64_t int128_getlo(Int128 a) +{ + return a; +} + +static inline int64_t int128_gethi(Int128 a) +{ + return a >> 64; +} + +static inline Int128 int128_zero(void) +{ + return 0; +} + +static inline Int128 int128_one(void) +{ + return 1; +} + +static inline Int128 int128_2_64(void) +{ + return (Int128)1 << 64; +} + +static inline Int128 int128_exts64(int64_t a) +{ + return a; +} + +static inline Int128 int128_and(Int128 a, Int128 b) +{ + return a & b; +} + +static inline Int128 int128_rshift(Int128 a, int n) +{ + return a >> n; +} + +static inline Int128 int128_lshift(Int128 a, int n) +{ + return a << n; +} + +static inline Int128 int128_add(Int128 a, Int128 b) +{ + return a + b; +} + +static inline Int128 int128_neg(Int128 a) +{ + return -a; +} + +static inline Int128 int128_sub(Int128 a, Int128 b) +{ + return a - b; +} + +static inline bool int128_nonneg(Int128 a) +{ + return a >= 0; +} + +static inline bool int128_eq(Int128 a, Int128 b) +{ + return a == b; +} + +static inline bool int128_ne(Int128 a, Int128 b) +{ + return a != b; +} + +static inline bool int128_ge(Int128 a, Int128 b) +{ + return a >= b; +} + +static inline bool int128_lt(Int128 a, Int128 b) +{ + return a < b; +} + +static inline bool int128_le(Int128 a, Int128 b) +{ + return a <= b; +} + +static inline bool int128_gt(Int128 a, Int128 b) +{ + return a > b; +} + +static inline bool int128_nz(Int128 a) +{ + return a != 0; +} + +static inline Int128 int128_min(Int128 a, Int128 b) +{ + return a < b ? a : b; +} + +static inline Int128 int128_max(Int128 a, Int128 b) +{ + return a > b ? a : b; +} + +static inline void int128_addto(Int128 *a, Int128 b) +{ + *a += b; +} + +static inline void int128_subfrom(Int128 *a, Int128 b) +{ + *a -= b; +} + +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +} + +#else /* !CONFIG_INT128 */ + +typedef struct Int128 Int128; + +struct Int128 { + uint64_t lo; + int64_t hi; +}; + +static inline Int128 int128_make64(uint64_t a) +{ + return (Int128) { a, 0 }; +} + +static inline Int128 int128_make128(uint64_t lo, uint64_t hi) +{ + return (Int128) { lo, hi }; +} + +static inline uint64_t int128_get64(Int128 a) +{ + assert(!a.hi); + return a.lo; +} + +static inline uint64_t int128_getlo(Int128 a) +{ + return a.lo; +} + +static inline int64_t int128_gethi(Int128 a) +{ + return a.hi; +} + +static inline Int128 int128_zero(void) +{ + return int128_make64(0); +} + +static inline Int128 int128_one(void) +{ + return int128_make64(1); +} + +static inline Int128 int128_2_64(void) +{ + return (Int128) { 0, 1 }; +} + +static inline Int128 int128_exts64(int64_t a) +{ + return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 }; +} + +static inline Int128 int128_and(Int128 a, Int128 b) +{ + return (Int128) { a.lo & b.lo, a.hi & b.hi }; +} + +static inline Int128 int128_rshift(Int128 a, int n) +{ + int64_t h; + if (!n) { + return a; + } + h = a.hi >> (n & 63); + if (n >= 64) { + return int128_make128(h, h >> 63); + } else { + return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h); + } +} + +static inline Int128 int128_lshift(Int128 a, int n) +{ + uint64_t l = a.lo << (n & 63); + if (n >= 64) { + return int128_make128(0, l); + } else if (n > 0) { + return int128_make128(l, (a.hi << n) | (a.lo >> (64 - n))); + } + return a; +} + +static inline Int128 int128_add(Int128 a, Int128 b) +{ + uint64_t lo = a.lo + b.lo; + + /* a.lo <= a.lo + b.lo < a.lo + k (k is the base, 2^64). Hence, + * a.lo + b.lo >= k implies 0 <= lo = a.lo + b.lo - k < a.lo. + * Similarly, a.lo + b.lo < k implies a.lo <= lo = a.lo + b.lo < k. + * + * So the carry is lo < a.lo. + */ + return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo)); +} + +static inline Int128 int128_neg(Int128 a) +{ + uint64_t lo = -a.lo; + return int128_make128(lo, ~(uint64_t)a.hi + !lo); +} + +static inline Int128 int128_sub(Int128 a, Int128 b) +{ + return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo)); +} + +static inline bool int128_nonneg(Int128 a) +{ + return a.hi >= 0; +} + +static inline bool int128_eq(Int128 a, Int128 b) +{ + return a.lo == b.lo && a.hi == b.hi; +} + +static inline bool int128_ne(Int128 a, Int128 b) +{ + return !int128_eq(a, b); +} + +static inline bool int128_ge(Int128 a, Int128 b) +{ + return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo); +} + +static inline bool int128_lt(Int128 a, Int128 b) +{ + return !int128_ge(a, b); +} + +static inline bool int128_le(Int128 a, Int128 b) +{ + return int128_ge(b, a); +} + +static inline bool int128_gt(Int128 a, Int128 b) +{ + return !int128_le(a, b); +} + +static inline bool int128_nz(Int128 a) +{ + return a.lo || a.hi; +} + +static inline Int128 int128_min(Int128 a, Int128 b) +{ + return int128_le(a, b) ? a : b; +} + +static inline Int128 int128_max(Int128 a, Int128 b) +{ + return int128_ge(a, b) ? a : b; +} + +static inline void int128_addto(Int128 *a, Int128 b) +{ + *a = int128_add(*a, b); +} + +static inline void int128_subfrom(Int128 *a, Int128 b) +{ + *a = int128_sub(*a, b); +} + +#endif /* CONFIG_INT128 */ +#endif /* INT128_H */ diff --git a/include/qemu/iov.h b/include/qemu/iov.h new file mode 100644 index 000000000..b6b283a5e --- /dev/null +++ b/include/qemu/iov.h @@ -0,0 +1,252 @@ +/* + * Helpers for using (partial) iovecs. + * + * Copyright (C) 2010 Red Hat, Inc. + * + * Author(s): + * Amit Shah + * Michael Tokarev + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef IOV_H +#define IOV_H + +/** + * count and return data size, in bytes, of an iovec + * starting at `iov' of `iov_cnt' number of elements. + */ +size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); + +/** + * Copy from single continuous buffer to scatter-gather vector of buffers + * (iovec) and back like memcpy() between two continuous memory regions. + * Data in single continuous buffer starting at address `buf' and + * `bytes' bytes long will be copied to/from an iovec `iov' with + * `iov_cnt' number of elements, starting at byte position `offset' + * within the iovec. If the iovec does not contain enough space, + * only part of data will be copied, up to the end of the iovec. + * Number of bytes actually copied will be returned, which is + * min(bytes, iov_size(iov)-offset) + * `Offset' must point to the inside of iovec. + */ +size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes); + +static inline size_t +iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + if (__builtin_constant_p(bytes) && iov_cnt && + offset <= iov[0].iov_len && bytes <= iov[0].iov_len - offset) { + memcpy(iov[0].iov_base + offset, buf, bytes); + return bytes; + } else { + return iov_from_buf_full(iov, iov_cnt, offset, buf, bytes); + } +} + +static inline size_t +iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes) +{ + if (__builtin_constant_p(bytes) && iov_cnt && + offset <= iov[0].iov_len && bytes <= iov[0].iov_len - offset) { + memcpy(buf, iov[0].iov_base + offset, bytes); + return bytes; + } else { + return iov_to_buf_full(iov, iov_cnt, offset, buf, bytes); + } +} + +/** + * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, + * starting at byte offset `start', to value `fillc', repeating it + * `bytes' number of times. `Offset' must point to the inside of iovec. + * If `bytes' is large enough, only last bytes portion of iovec, + * up to the end of it, will be filled with the specified value. + * Function return actual number of bytes processed, which is + * min(size, iov_size(iov) - offset). + */ +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes); + +/* + * Send/recv data from/to iovec buffers directly + * + * `offset' bytes in the beginning of iovec buffer are skipped and + * next `bytes' bytes are used, which must be within data of iovec. + * + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); + * + * is logically equivalent to + * + * char *buf = malloc(bytes); + * iov_to_buf(iov, iovcnt, offset, buf, bytes); + * r = send(sockfd, buf, bytes, 0); + * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. + */ +ssize_t iov_send_recv(int sockfd, const struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) + +/** + * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements + * in file `fp', prefixing each line with `prefix' and processing not more + * than `limit' data bytes. + */ +void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, + FILE *fp, const char *prefix, size_t limit); + +/* + * Partial copy of vector from iov to dst_iov (data is not copied). + * dst_iov overlaps iov at a specified offset. + * size of dst_iov is at most bytes. dst vector count is returned. + */ +unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, + const struct iovec *iov, unsigned int iov_cnt, + size_t offset, size_t bytes); + +/* + * Remove a given number of bytes from the front or back of a vector. + * This may update iov and/or iov_cnt to exclude iovec elements that are + * no longer required. + * + * The number of bytes actually discarded is returned. This number may be + * smaller than requested if the vector is too small. + */ +size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt, + size_t bytes); +size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, + size_t bytes); + +/* Information needed to undo an iov_discard_*() operation */ +typedef struct { + struct iovec *modified_iov; + struct iovec orig; +} IOVDiscardUndo; + +/* + * Undo an iov_discard_front_undoable() or iov_discard_back_undoable() + * operation. If multiple operations are made then each one needs a separate + * IOVDiscardUndo and iov_discard_undo() must be called in the reverse order + * that the operations were made. + */ +void iov_discard_undo(IOVDiscardUndo *undo); + +/* + * Undoable versions of iov_discard_front() and iov_discard_back(). Use + * iov_discard_undo() to reset to the state before the discard operations. + */ +size_t iov_discard_front_undoable(struct iovec **iov, unsigned int *iov_cnt, + size_t bytes, IOVDiscardUndo *undo); +size_t iov_discard_back_undoable(struct iovec *iov, unsigned int *iov_cnt, + size_t bytes, IOVDiscardUndo *undo); + +typedef struct QEMUIOVector { + struct iovec *iov; + int niov; + + /* + * For external @iov (qemu_iovec_init_external()) or allocated @iov + * (qemu_iovec_init()), @size is the cumulative size of iovecs and + * @local_iov is invalid and unused. + * + * For embedded @iov (QEMU_IOVEC_INIT_BUF() or qemu_iovec_init_buf()), + * @iov is equal to &@local_iov, and @size is valid, as it has same + * offset and type as @local_iov.iov_len, which is guaranteed by + * static assertion below. + * + * @nalloc is always valid and is -1 both for embedded and external + * cases. It is included in the union only to ensure the padding prior + * to the @size field will not result in a 0-length array. + */ + union { + struct { + int nalloc; + struct iovec local_iov; + }; + struct { + char __pad[sizeof(int) + offsetof(struct iovec, iov_len)]; + size_t size; + }; + }; +} QEMUIOVector; + +QEMU_BUILD_BUG_ON(offsetof(QEMUIOVector, size) != + offsetof(QEMUIOVector, local_iov.iov_len)); + +#define QEMU_IOVEC_INIT_BUF(self, buf, len) \ +{ \ + .iov = &(self).local_iov, \ + .niov = 1, \ + .nalloc = -1, \ + .local_iov = { \ + .iov_base = (void *)(buf), /* cast away const */ \ + .iov_len = (len), \ + }, \ +} + +/* + * qemu_iovec_init_buf + * + * Initialize embedded QEMUIOVector. + * + * Note: "const" is used over @buf pointer to make it simple to pass + * const pointers, appearing in read functions. Then this "const" is + * cast away by QEMU_IOVEC_INIT_BUF(). + */ +static inline void qemu_iovec_init_buf(QEMUIOVector *qiov, + const void *buf, size_t len) +{ + *qiov = (QEMUIOVector) QEMU_IOVEC_INIT_BUF(*qiov, buf, len); +} + +static inline void *qemu_iovec_buf(QEMUIOVector *qiov) +{ + /* Only supports embedded iov */ + assert(qiov->nalloc == -1 && qiov->iov == &qiov->local_iov); + + return qiov->local_iov.iov_base; +} + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +void qemu_iovec_init_extended( + QEMUIOVector *qiov, + void *head_buf, size_t head_len, + QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, + void *tail_buf, size_t tail_len); +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); +int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); +size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes); +bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); +void qemu_iovec_destroy(QEMUIOVector *qiov); +void qemu_iovec_reset(QEMUIOVector *qiov); +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes); +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes); +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes); +ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b); +void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf); +void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes); + +#endif diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h new file mode 100644 index 000000000..b66cf93c4 --- /dev/null +++ b/include/qemu/iova-tree.h @@ -0,0 +1,133 @@ +/* + * An very simplified iova tree implementation based on GTree. + * + * Copyright 2018 Red Hat, Inc. + * + * Authors: + * Peter Xu + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + */ +#ifndef IOVA_TREE_H +#define IOVA_TREE_H + +/* + * Currently the iova tree will only allow to keep ranges + * information, and no extra user data is allowed for each element. A + * benefit is that we can merge adjacent ranges internally within the + * tree. It can save a lot of memory when the ranges are splitted but + * mostly continuous. + * + * Note that current implementation does not provide any thread + * protections. Callers of the iova tree should be responsible + * for the thread safety issue. + */ + +#include "exec/memory.h" +#include "exec/hwaddr.h" + +#define IOVA_OK (0) +#define IOVA_ERR_INVALID (-1) /* Invalid parameters */ +#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */ + +typedef struct IOVATree IOVATree; +typedef struct DMAMap { + hwaddr iova; + hwaddr translated_addr; + hwaddr size; /* Inclusive */ + IOMMUAccessFlags perm; +} QEMU_PACKED DMAMap; +typedef gboolean (*iova_tree_iterator)(DMAMap *map); + +/** + * iova_tree_new: + * + * Create a new iova tree. + * + * Returns: the tree pointer when succeeded, or NULL if error. + */ +IOVATree *iova_tree_new(void); + +/** + * iova_tree_insert: + * + * @tree: the iova tree to insert + * @map: the mapping to insert + * + * Insert an iova range to the tree. If there is overlapped + * ranges, IOVA_ERR_OVERLAP will be returned. + * + * Return: 0 if succeeded, or <0 if error. + */ +int iova_tree_insert(IOVATree *tree, DMAMap *map); + +/** + * iova_tree_remove: + * + * @tree: the iova tree to remove range from + * @map: the map range to remove + * + * Remove mappings from the tree that are covered by the map range + * provided. The range does not need to be exactly what has inserted, + * all the mappings that are included in the provided range will be + * removed from the tree. Here map->translated_addr is meaningless. + * + * Return: 0 if succeeded, or <0 if error. + */ +int iova_tree_remove(IOVATree *tree, DMAMap *map); + +/** + * iova_tree_find: + * + * @tree: the iova tree to search from + * @map: the mapping to search + * + * Search for a mapping in the iova tree that overlaps with the + * mapping range specified. Only the first found mapping will be + * returned. + * + * Return: DMAMap pointer if found, or NULL if not found. Note that + * the returned DMAMap pointer is maintained internally. User should + * only read the content but never modify or free the content. Also, + * user is responsible to make sure the pointer is valid (say, no + * concurrent deletion in progress). + */ +DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map); + +/** + * iova_tree_find_address: + * + * @tree: the iova tree to search from + * @iova: the iova address to find + * + * Similar to iova_tree_find(), but it tries to find mapping with + * range iova=iova & size=0. + * + * Return: same as iova_tree_find(). + */ +DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova); + +/** + * iova_tree_foreach: + * + * @tree: the iova tree to iterate on + * @iterator: the interator for the mappings, return true to stop + * + * Iterate over the iova tree. + * + * Return: 1 if found any overlap, 0 if not, <0 if error. + */ +void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator); + +/** + * iova_tree_destroy: + * + * @tree: the iova tree to destroy + * + * Destroy an existing iova tree. + * + * Return: None. + */ +void iova_tree_destroy(IOVATree *tree); + +#endif diff --git a/include/qemu/jhash.h b/include/qemu/jhash.h new file mode 100644 index 000000000..84d14dc7f --- /dev/null +++ b/include/qemu/jhash.h @@ -0,0 +1,59 @@ +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are my fault. + * Jozsef + */ + +#ifndef QEMU_JHASH_H +#define QEMU_JHASH_H + +#include "qemu/bitops.h" + +/* + * hashtable relation copy from linux kernel jhash + */ + +/* __jhash_mix -- mix 3 32-bit values reversibly. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +/* An arbitrary initial parameter */ +#define JHASH_INITVAL 0xdeadbeef + +#endif /* QEMU_JHASH_H */ diff --git a/include/qemu/job.h b/include/qemu/job.h new file mode 100644 index 000000000..32aabb1c6 --- /dev/null +++ b/include/qemu/job.h @@ -0,0 +1,561 @@ +/* + * Declarations for background jobs + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012, 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef JOB_H +#define JOB_H + +#include "qapi/qapi-types-job.h" +#include "qemu/queue.h" +#include "qemu/progress_meter.h" +#include "qemu/coroutine.h" +#include "block/aio.h" + +typedef struct JobDriver JobDriver; +typedef struct JobTxn JobTxn; + + +/** + * Long-running operation. + */ +typedef struct Job { + /** The ID of the job. May be NULL for internal jobs. */ + char *id; + + /** The type of this job. */ + const JobDriver *driver; + + /** Reference count of the block job */ + int refcnt; + + /** Current state; See @JobStatus for details. */ + JobStatus status; + + /** AioContext to run the job coroutine in */ + AioContext *aio_context; + + /** + * The coroutine that executes the job. If not NULL, it is reentered when + * busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Timer that is used by @job_sleep_ns. Accessed under job_mutex (in + * job.c). + */ + QEMUTimer sleep_timer; + + /** + * Counter for pause request. If non-zero, the block job is either paused, + * or if busy == true will pause itself as soon as possible. + */ + int pause_count; + + /** + * Set to false by the job while the coroutine has yielded and may be + * re-entered by job_enter(). There may still be I/O or event loop activity + * pending. Accessed under block_job_mutex (in blockjob.c). + * + * When the job is deferred to the main loop, busy is true as long as the + * bottom half is still pending. + */ + bool busy; + + /** + * Set to true by the job while it is in a quiescent state, where + * no I/O or event loop activity is pending. + */ + bool paused; + + /** + * Set to true if the job is paused by user. Can be unpaused with the + * block-job-resume QMP command. + */ + bool user_paused; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #aio_poll will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Set to true if the job should abort immediately without waiting + * for data to be in sync. + */ + bool force_cancel; + + /** Set to true when the job has deferred work to the main loop. */ + bool deferred_to_main_loop; + + /** True if this job should automatically finalize itself */ + bool auto_finalize; + + /** True if this job should automatically dismiss itself */ + bool auto_dismiss; + + ProgressMeter progress; + + /** + * Return code from @run and/or @prepare callback(s). + * Not final until the job has reached the CONCLUDED status. + * 0 on success, -errno on failure. + */ + int ret; + + /** + * Error object for a failed job. + * If job->ret is nonzero and an error object was not set, it will be set + * to strerror(-job->ret) during job_completed. + */ + Error *err; + + /** The completion function that will be called when the job completes. */ + BlockCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; + + /** Notifiers called when a cancelled job is finalised */ + NotifierList on_finalize_cancelled; + + /** Notifiers called when a successfully completed job is finalised */ + NotifierList on_finalize_completed; + + /** Notifiers called when the job transitions to PENDING */ + NotifierList on_pending; + + /** Notifiers called when the job transitions to READY */ + NotifierList on_ready; + + /** Notifiers called when the job coroutine yields or terminates */ + NotifierList on_idle; + + /** Element of the list of jobs */ + QLIST_ENTRY(Job) job_list; + + /** Transaction this job is part of */ + JobTxn *txn; + + /** Element of the list of jobs in a job transaction */ + QLIST_ENTRY(Job) txn_list; +} Job; + +/** + * Callbacks and other information about a Job driver. + */ +struct JobDriver { + /** Derived Job struct size */ + size_t instance_size; + + /** Enum describing the operation */ + JobType job_type; + + /** + * Mandatory: Entrypoint for the Coroutine. + * + * This callback will be invoked when moving from CREATED to RUNNING. + * + * If this callback returns nonzero, the job transaction it is part of is + * aborted. If it returns zero, the job moves into the WAITING state. If it + * is the last job to complete in its transaction, all jobs in the + * transaction move from WAITING to PENDING. + */ + int coroutine_fn (*run)(Job *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when the job transitions + * into the paused state. Paused jobs must not perform any asynchronous + * I/O or event loop activity. This callback is used to quiesce jobs. + */ + void coroutine_fn (*pause)(Job *job); + + /** + * If the callback is not NULL, it will be invoked when the job transitions + * out of the paused state. Any asynchronous I/O or event loop activity + * should be restarted from this callback. + */ + void coroutine_fn (*resume)(Job *job); + + /** + * Called when the job is resumed by the user (i.e. user_paused becomes + * false). .user_resume is called before .resume. + */ + void (*user_resume)(Job *job); + + /** + * Optional callback for job types whose completion must be triggered + * manually. + */ + void (*complete)(Job *job, Error **errp); + + /** + * If the callback is not NULL, prepare will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's completion + * if it is not in a transaction. + * + * This callback will not be invoked if the job has already failed. + * If it fails, abort and then clean will be called. + */ + int (*prepare)(Job *job); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(Job *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(Job *job); + + /** + * If the callback is not NULL, it will be invoked after a call to either + * .commit() or .abort(). Regardless of which callback is invoked after + * completion, .clean() will always be called, even if the job does not + * belong to a transaction group. + */ + void (*clean)(Job *job); + + + /** Called when the job is freed */ + void (*free)(Job *job); +}; + +typedef enum JobCreateFlags { + /* Default behavior */ + JOB_DEFAULT = 0x00, + /* Job is not QMP-created and should not send QMP events */ + JOB_INTERNAL = 0x01, + /* Job requires manual finalize step */ + JOB_MANUAL_FINALIZE = 0x02, + /* Job requires manual dismiss step */ + JOB_MANUAL_DISMISS = 0x04, +} JobCreateFlags; + +/** + * Allocate and return a new job transaction. Jobs can be added to the + * transaction using job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +JobTxn *job_txn_new(void); + +/** + * Release a reference that was previously acquired with job_txn_add_job or + * job_txn_new. If it's the last reference to the object, it will be freed. + */ +void job_txn_unref(JobTxn *txn); + +/** + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either job_txn_unref() or job_completed() to release + * the reference that is automatically grabbed here. + * + * If @txn is NULL, the function does nothing. + */ +void job_txn_add_job(JobTxn *txn, Job *job); + +/** + * Create a new long-running job and return it. + * + * @job_id: The id of the newly-created job, or %NULL for internal jobs + * @driver: The class object for the newly-created job. + * @txn: The transaction this job belongs to, if any. %NULL otherwise. + * @ctx: The AioContext to run the job coroutine in. + * @flags: Creation flags for the job. See @JobCreateFlags. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + */ +void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, + AioContext *ctx, int flags, BlockCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * Add a reference to Job refcnt, it will be decreased with job_unref, and then + * be freed if it comes to be the last reference. + */ +void job_ref(Job *job); + +/** + * Release a reference that was previously acquired with job_ref() or + * job_create(). If it's the last reference to the object, it will be freed. + */ +void job_unref(Job *job); + +/** + * @job: The job that has made progress + * @done: How much progress the job made since the last call + * + * Updates the progress counter of the job. + */ +void job_progress_update(Job *job, uint64_t done); + +/** + * @job: The job whose expected progress end value is set + * @remaining: Missing progress (on top of the current progress counter value) + * until the new expected end value is reached + * + * Sets the expected end value of the progress counter of a job so that a + * completion percentage can be calculated when the progress is updated. + */ +void job_progress_set_remaining(Job *job, uint64_t remaining); + +/** + * @job: The job whose expected progress end value is updated + * @delta: Value which is to be added to the current expected end + * value + * + * Increases the expected end value of the progress counter of a job. + * This is useful for parenthesis operations: If a job has to + * conditionally perform a high-priority operation as part of its + * progress, it calls this function with the expected operation's + * length before, and job_progress_update() afterwards. + * (So the operation acts as a parenthesis in regards to the main job + * operation running in background.) + */ +void job_progress_increase_remaining(Job *job, uint64_t delta); + +/** To be called when a cancelled job is finalised. */ +void job_event_cancelled(Job *job); + +/** To be called when a successfully completed job is finalised. */ +void job_event_completed(Job *job); + +/** + * Conditionally enter the job coroutine if the job is ready to run, not + * already busy and fn() returns true. fn() is called while under the job_lock + * critical section. + */ +void job_enter_cond(Job *job, bool(*fn)(Job *job)); + +/** + * @job: A job that has not yet been started. + * + * Begins execution of a job. + * Takes ownership of one reference to the job object. + */ +void job_start(Job *job); + +/** + * @job: The job to enter. + * + * Continue the specified job by entering the coroutine. + */ +void job_enter(Job *job); + +/** + * @job: The job that is ready to pause. + * + * Pause now if job_pause() has been called. Jobs that perform lots of I/O + * must call this between requests so that the job can be paused. + */ +void coroutine_fn job_pause_point(Job *job); + +/** + * @job: The job that calls the function. + * + * Yield the job coroutine. + */ +void job_yield(Job *job); + +/** + * @job: The job that calls the function. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * %QEMU_CLOCK_REALTIME nanoseconds. Canceling the job will immediately + * interrupt the wait. + */ +void coroutine_fn job_sleep_ns(Job *job, int64_t ns); + + +/** Returns the JobType of a given Job. */ +JobType job_type(const Job *job); + +/** Returns the enum string for the JobType of a given Job. */ +const char *job_type_str(const Job *job); + +/** Returns true if the job should not be visible to the management layer. */ +bool job_is_internal(Job *job); + +/** Returns whether the job is scheduled for cancellation. */ +bool job_is_cancelled(Job *job); + +/** Returns whether the job is in a completed state. */ +bool job_is_completed(Job *job); + +/** Returns whether the job is ready to be completed. */ +bool job_is_ready(Job *job); + +/** + * Request @job to pause at the next pause point. Must be paired with + * job_resume(). If the job is supposed to be resumed by user action, call + * job_user_pause() instead. + */ +void job_pause(Job *job); + +/** Resumes a @job paused with job_pause. */ +void job_resume(Job *job); + +/** + * Asynchronously pause the specified @job. + * Do not allow a resume until a matching call to job_user_resume. + */ +void job_user_pause(Job *job, Error **errp); + +/** Returns true if the job is user-paused. */ +bool job_user_paused(Job *job); + +/** + * Resume the specified @job. + * Must be paired with a preceding job_user_pause. + */ +void job_user_resume(Job *job, Error **errp); + +/** + * Get the next element from the list of block jobs after @job, or the + * first one if @job is %NULL. + * + * Returns the requested job, or %NULL if there are no more jobs left. + */ +Job *job_next(Job *job); + +/** + * Get the job identified by @id (which must not be %NULL). + * + * Returns the requested job, or %NULL if it doesn't exist. + */ +Job *job_get(const char *id); + +/** + * Check whether the verb @verb can be applied to @job in its current state. + * Returns 0 if the verb can be applied; otherwise errp is set and -EPERM + * returned. + */ +int job_apply_verb(Job *job, JobVerb verb, Error **errp); + +/** The @job could not be started, free it. */ +void job_early_fail(Job *job); + +/** Moves the @job from RUNNING to READY */ +void job_transition_to_ready(Job *job); + +/** Asynchronously complete the specified @job. */ +void job_complete(Job *job, Error **errp); + +/** + * Asynchronously cancel the specified @job. If @force is true, the job should + * be cancelled immediately without waiting for a consistent state. + */ +void job_cancel(Job *job, bool force); + +/** + * Cancels the specified job like job_cancel(), but may refuse to do so if the + * operation isn't meaningful in the current state of the job. + */ +void job_user_cancel(Job *job, bool force, Error **errp); + +/** + * Synchronously cancel the @job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + * + * Callers must hold the AioContext lock of job->aio_context. + */ +int job_cancel_sync(Job *job); + +/** Synchronously cancels all jobs using job_cancel_sync(). */ +void job_cancel_sync_all(void); + +/** + * @job: The job to be completed. + * @errp: Error object which may be set by job_complete(); this is not + * necessarily set on every error, the job return value has to be + * checked as well. + * + * Synchronously complete the job. The completion callback is called before the + * function returns, unless it is NULL (which is permissible when using this + * function). + * + * Returns the return value from the job. + * + * Callers must hold the AioContext lock of job->aio_context. + */ +int job_complete_sync(Job *job, Error **errp); + +/** + * For a @job that has finished its work and is pending awaiting explicit + * acknowledgement to commit its work, this will commit that work. + * + * FIXME: Make the below statement universally true: + * For jobs that support the manual workflow mode, all graph changes that occur + * as a result will occur after this command and before a successful reply. + */ +void job_finalize(Job *job, Error **errp); + +/** + * Remove the concluded @job from the query list and resets the passed pointer + * to %NULL. Returns an error if the job is not actually concluded. + */ +void job_dismiss(Job **job, Error **errp); + +/** + * Synchronously finishes the given @job. If @finish is given, it is called to + * trigger completion or cancellation of the job. + * + * Returns 0 if the job is successfully completed, -ECANCELED if the job was + * cancelled before completing, and -errno in other error cases. + * + * Callers must hold the AioContext lock of job->aio_context. + */ +int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp); + +#endif diff --git a/include/qemu/lockable.h b/include/qemu/lockable.h new file mode 100644 index 000000000..b62002314 --- /dev/null +++ b/include/qemu/lockable.h @@ -0,0 +1,177 @@ +/* + * Polymorphic locking functions (aka poor man templates) + * + * Copyright Red Hat, Inc. 2017, 2018 + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QEMU_LOCKABLE_H +#define QEMU_LOCKABLE_H + +#include "qemu/coroutine.h" +#include "qemu/thread.h" + +typedef void QemuLockUnlockFunc(void *); + +struct QemuLockable { + void *object; + QemuLockUnlockFunc *lock; + QemuLockUnlockFunc *unlock; +}; + +/* This function gives an error if an invalid, non-NULL pointer type is passed + * to QEMU_MAKE_LOCKABLE. For optimized builds, we can rely on dead-code elimination + * from the compiler, and give the errors already at link time. + */ +#if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__) +void unknown_lock_type(void *); +#else +static inline void unknown_lock_type(void *unused) +{ + abort(); +} +#endif + +static inline __attribute__((__always_inline__)) QemuLockable * +qemu_make_lockable(void *x, QemuLockable *lockable) +{ + /* We cannot test this in a macro, otherwise we get compiler + * warnings like "the address of 'm' will always evaluate as 'true'". + */ + return x ? lockable : NULL; +} + +/* Auxiliary macros to simplify QEMU_MAKE_LOCABLE. */ +#define QEMU_LOCK_FUNC(x) ((QemuLockUnlockFunc *) \ + QEMU_GENERIC(x, \ + (QemuMutex *, qemu_mutex_lock), \ + (QemuRecMutex *, qemu_rec_mutex_lock), \ + (CoMutex *, qemu_co_mutex_lock), \ + (QemuSpin *, qemu_spin_lock), \ + unknown_lock_type)) + +#define QEMU_UNLOCK_FUNC(x) ((QemuLockUnlockFunc *) \ + QEMU_GENERIC(x, \ + (QemuMutex *, qemu_mutex_unlock), \ + (QemuRecMutex *, qemu_rec_mutex_unlock), \ + (CoMutex *, qemu_co_mutex_unlock), \ + (QemuSpin *, qemu_spin_unlock), \ + unknown_lock_type)) + +/* In C, compound literals have the lifetime of an automatic variable. + * In C++ it would be different, but then C++ wouldn't need QemuLockable + * either... + */ +#define QEMU_MAKE_LOCKABLE_(x) (&(QemuLockable) { \ + .object = (x), \ + .lock = QEMU_LOCK_FUNC(x), \ + .unlock = QEMU_UNLOCK_FUNC(x), \ + }) + +/* QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable + * + * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin). + * + * Returns a QemuLockable object that can be passed around + * to a function that can operate with locks of any kind, or + * NULL if @x is %NULL. + */ +#define QEMU_MAKE_LOCKABLE(x) \ + QEMU_GENERIC(x, \ + (QemuLockable *, (x)), \ + qemu_make_lockable((x), QEMU_MAKE_LOCKABLE_(x))) + +/* QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable + * + * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin). + * + * Returns a QemuLockable object that can be passed around + * to a function that can operate with locks of any kind. + */ +#define QEMU_MAKE_LOCKABLE_NONNULL(x) \ + QEMU_GENERIC(x, \ + (QemuLockable *, (x)), \ + QEMU_MAKE_LOCKABLE_(x)) + +static inline void qemu_lockable_lock(QemuLockable *x) +{ + x->lock(x->object); +} + +static inline void qemu_lockable_unlock(QemuLockable *x) +{ + x->unlock(x->object); +} + +static inline QemuLockable *qemu_lockable_auto_lock(QemuLockable *x) +{ + qemu_lockable_lock(x); + return x; +} + +static inline void qemu_lockable_auto_unlock(QemuLockable *x) +{ + if (x) { + qemu_lockable_unlock(x); + } +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuLockable, qemu_lockable_auto_unlock) + +#define WITH_QEMU_LOCK_GUARD_(x, var) \ + for (g_autoptr(QemuLockable) var = \ + qemu_lockable_auto_lock(QEMU_MAKE_LOCKABLE_NONNULL((x))); \ + var; \ + qemu_lockable_auto_unlock(var), var = NULL) + +/** + * WITH_QEMU_LOCK_GUARD - Lock a lock object for scope + * + * @x: a lock object (currently one of QemuMutex, CoMutex, QemuSpin). + * + * This macro defines a lock scope such that entering the scope takes the lock + * and leaving the scope releases the lock. Return statements are allowed + * within the scope and release the lock. Break and continue statements leave + * the scope early and release the lock. + * + * WITH_QEMU_LOCK_GUARD(&mutex) { + * ... + * if (error) { + * return; <-- mutex is automatically unlocked + * } + * + * if (early_exit) { + * break; <-- leave this scope early + * } + * ... + * } + */ +#define WITH_QEMU_LOCK_GUARD(x) \ + WITH_QEMU_LOCK_GUARD_((x), glue(qemu_lockable_auto, __COUNTER__)) + +/** + * QEMU_LOCK_GUARD - Lock an object until the end of the scope + * + * @x: a lock object (currently one of QemuMutex, CoMutex, QemuSpin). + * + * This macro takes a lock until the end of the scope. Return statements + * release the lock. + * + * ... <-- mutex not locked + * QEMU_LOCK_GUARD(&mutex); <-- mutex locked from here onwards + * ... + * if (error) { + * return; <-- mutex is automatically unlocked + * } + */ +#define QEMU_LOCK_GUARD(x) \ + g_autoptr(QemuLockable) \ + glue(qemu_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \ + qemu_lockable_auto_lock(QEMU_MAKE_LOCKABLE((x))) + +#endif diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h new file mode 100644 index 000000000..2f0a5b080 --- /dev/null +++ b/include/qemu/log-for-trace.h @@ -0,0 +1,35 @@ +/* log-for-trace.h: logging basics required by the trace.h generated + * by the log trace backend. + * + * This should not be included directly by any .c file: if you + * need to use the logging functions include "qemu/log.h". + * + * The purpose of splitting these parts out into their own header + * is to catch the easy mistake where a .c file includes trace.h + * but forgets to include qemu/log.h. Without this split, that + * would result in the .c file compiling fine when the default + * trace backend is in use but failing to compile with any other + * backend. + * + * This code is licensed under the GNU General Public License, + * version 2 or (at your option) any later version. + */ + +#ifndef QEMU_LOG_FOR_TRACE_H +#define QEMU_LOG_FOR_TRACE_H + +/* Private global variable, don't use */ +extern int qemu_loglevel; + +#define LOG_TRACE (1 << 15) + +/* Returns true if a bit is set in the current loglevel mask */ +static inline bool qemu_loglevel_mask(int mask) +{ + return (qemu_loglevel & mask) != 0; +} + +/* main logging function */ +int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...); + +#endif diff --git a/include/qemu/log.h b/include/qemu/log.h new file mode 100644 index 000000000..9b8066020 --- /dev/null +++ b/include/qemu/log.h @@ -0,0 +1,167 @@ +#ifndef QEMU_LOG_H +#define QEMU_LOG_H + +/* A small part of this API is split into its own header */ +#include "qemu/log-for-trace.h" +#include "qemu/rcu.h" + +typedef struct QemuLogFile { + struct rcu_head rcu; + FILE *fd; +} QemuLogFile; + +/* Private global variable, don't use */ +extern QemuLogFile *qemu_logfile; + + +/* + * The new API: + * + */ + +/* Log settings checking macros: */ + +/* Returns true if qemu_log() will really write somewhere + */ +static inline bool qemu_log_enabled(void) +{ + return qemu_logfile != NULL; +} + +/* Returns true if qemu_log() will write somewhere else than stderr + */ +static inline bool qemu_log_separate(void) +{ + QemuLogFile *logfile; + bool res = false; + + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile && logfile->fd != stderr) { + res = true; + } + rcu_read_unlock(); + return res; +} + +#define CPU_LOG_TB_OUT_ASM (1 << 0) +#define CPU_LOG_TB_IN_ASM (1 << 1) +#define CPU_LOG_TB_OP (1 << 2) +#define CPU_LOG_TB_OP_OPT (1 << 3) +#define CPU_LOG_INT (1 << 4) +#define CPU_LOG_EXEC (1 << 5) +#define CPU_LOG_PCALL (1 << 6) +#define CPU_LOG_TB_CPU (1 << 8) +#define CPU_LOG_RESET (1 << 9) +#define LOG_UNIMP (1 << 10) +#define LOG_GUEST_ERROR (1 << 11) +#define CPU_LOG_MMU (1 << 12) +#define CPU_LOG_TB_NOCHAIN (1 << 13) +#define CPU_LOG_PAGE (1 << 14) +/* LOG_TRACE (1 << 15) is defined in log-for-trace.h */ +#define CPU_LOG_TB_OP_IND (1 << 16) +#define CPU_LOG_TB_FPU (1 << 17) +#define CPU_LOG_PLUGIN (1 << 18) +/* LOG_STRACE is used for user-mode strace logging. */ +#define LOG_STRACE (1 << 19) + +/* Lock output for a series of related logs. Since this is not needed + * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we + * assume that qemu_loglevel_mask has already been tested, and that + * qemu_loglevel is never set when qemu_logfile is unset. + */ + +static inline FILE *qemu_log_lock(void) +{ + QemuLogFile *logfile; + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile) { + qemu_flockfile(logfile->fd); + return logfile->fd; + } else { + return NULL; + } +} + +static inline void qemu_log_unlock(FILE *fd) +{ + if (fd) { + qemu_funlockfile(fd); + } + rcu_read_unlock(); +} + +/* Logging functions: */ + +/* vfprintf-like logging function + */ +static inline void GCC_FMT_ATTR(1, 0) +qemu_log_vprintf(const char *fmt, va_list va) +{ + QemuLogFile *logfile; + + rcu_read_lock(); + logfile = qatomic_rcu_read(&qemu_logfile); + if (logfile) { + vfprintf(logfile->fd, fmt, va); + } + rcu_read_unlock(); +} + +/* log only if a bit is set on the current loglevel mask: + * @mask: bit to check in the mask + * @fmt: printf-style format string + * @args: optional arguments for format string + */ +#define qemu_log_mask(MASK, FMT, ...) \ + do { \ + if (unlikely(qemu_loglevel_mask(MASK))) { \ + qemu_log(FMT, ## __VA_ARGS__); \ + } \ + } while (0) + +/* log only if a bit is set on the current loglevel mask + * and we are in the address range we care about: + * @mask: bit to check in the mask + * @addr: address to check in dfilter + * @fmt: printf-style format string + * @args: optional arguments for format string + */ +#define qemu_log_mask_and_addr(MASK, ADDR, FMT, ...) \ + do { \ + if (unlikely(qemu_loglevel_mask(MASK)) && \ + qemu_log_in_addr_range(ADDR)) { \ + qemu_log(FMT, ## __VA_ARGS__); \ + } \ + } while (0) + +/* Maintenance: */ + +/* define log items */ +typedef struct QEMULogItem { + int mask; + const char *name; + const char *help; +} QEMULogItem; + +extern const QEMULogItem qemu_log_items[]; + +void qemu_set_log(int log_flags); +void qemu_log_needs_buffers(void); +void qemu_set_log_filename(const char *filename, Error **errp); +void qemu_set_dfilter_ranges(const char *ranges, Error **errp); +bool qemu_log_in_addr_range(uint64_t addr); +int qemu_str_to_log_mask(const char *str); + +/* Print a usage message listing all the valid logging categories + * to the specified FILE*. + */ +void qemu_print_log_usage(FILE *f); + +/* fflush() the log file */ +void qemu_log_flush(void); +/* Close the log file */ +void qemu_log_close(void); + +#endif diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h new file mode 100644 index 000000000..d6892fd20 --- /dev/null +++ b/include/qemu/main-loop.h @@ -0,0 +1,333 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_MAIN_LOOP_H +#define QEMU_MAIN_LOOP_H + +#include "block/aio.h" + +#define SIG_IPI SIGUSR1 + +/** + * qemu_init_main_loop: Set up the process so that it can run the main loop. + * + * This includes setting up signal handlers. It should be called before + * any other threads are created. In addition, threads other than the + * main one should block signals that are trapped by the main loop. + * For simplicity, you can consider these signals to be safe: SIGUSR1, + * SIGUSR2, thread signals (SIGFPE, SIGILL, SIGSEGV, SIGBUS) and real-time + * signals if available. Remember that Windows in practice does not have + * signals, though. + * + * In the case of QEMU tools, this will also start/initialize timers. + */ +int qemu_init_main_loop(Error **errp); + +/** + * main_loop_wait: Run one iteration of the main loop. + * + * If @nonblocking is true, poll for events, otherwise suspend until + * one actually occurs. The main loop usually consists of a loop that + * repeatedly calls main_loop_wait(false). + * + * Main loop services include file descriptor callbacks, bottom halves + * and timers (defined in qemu/timer.h). Bottom halves are similar to timers + * that execute immediately, but have a lower overhead and scheduling them + * is wait-free, thread-safe and signal-safe. + * + * It is sometimes useful to put a whole program in a coroutine. In this + * case, the coroutine actually should be started from within the main loop, + * so that the main loop can run whenever the coroutine yields. To do this, + * you can use a bottom half to enter the coroutine as soon as the main loop + * starts: + * + * void enter_co_bh(void *opaque) { + * QEMUCoroutine *co = opaque; + * qemu_coroutine_enter(co); + * } + * + * ... + * QEMUCoroutine *co = qemu_coroutine_create(coroutine_entry, NULL); + * QEMUBH *start_bh = qemu_bh_new(enter_co_bh, co); + * qemu_bh_schedule(start_bh); + * while (...) { + * main_loop_wait(false); + * } + * + * (In the future we may provide a wrapper for this). + * + * @nonblocking: Whether the caller should block until an event occurs. + */ +void main_loop_wait(int nonblocking); + +/** + * qemu_get_aio_context: Return the main loop's AioContext + */ +AioContext *qemu_get_aio_context(void); + +/** + * qemu_notify_event: Force processing of pending events. + * + * Similar to signaling a condition variable, qemu_notify_event forces + * main_loop_wait to look at pending events and exit. The caller of + * main_loop_wait will usually call it again very soon, so qemu_notify_event + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling qemu_notify_event is rarely necessary, because main loop + * services (bottom halves and timers) call it themselves. + */ +void qemu_notify_event(void); + +#ifdef _WIN32 +/* return TRUE if no sleep should be done afterwards */ +typedef int PollingFunc(void *opaque); + +/** + * qemu_add_polling_cb: Register a Windows-specific polling callback + * + * Currently, under Windows some events are polled rather than waited for. + * Polling callbacks do not ensure that @func is called timely, because + * the main loop might wait for an arbitrarily long time. If possible, + * you should instead create a separate thread that does a blocking poll + * and set a Win32 event object. The event can then be passed to + * qemu_add_wait_object. + * + * Polling callbacks really have nothing Windows specific in them, but + * as they are a hack and are currently not necessary under POSIX systems, + * they are only available when QEMU is running under Windows. + * + * @func: The function that does the polling, and returns 1 to force + * immediate completion of main_loop_wait. + * @opaque: A pointer-size value that is passed to @func. + */ +int qemu_add_polling_cb(PollingFunc *func, void *opaque); + +/** + * qemu_del_polling_cb: Unregister a Windows-specific polling callback + * + * This function removes a callback that was registered with + * qemu_add_polling_cb. + * + * @func: The function that was passed to qemu_add_polling_cb. + * @opaque: A pointer-size value that was passed to qemu_add_polling_cb. + */ +void qemu_del_polling_cb(PollingFunc *func, void *opaque); + +/* Wait objects handling */ +typedef void WaitObjectFunc(void *opaque); + +/** + * qemu_add_wait_object: Register a callback for a Windows handle + * + * Under Windows, the iohandler mechanism can only be used with sockets. + * QEMU must use the WaitForMultipleObjects API to wait on other handles. + * This function registers a #HANDLE with QEMU, so that it will be included + * in the main loop's calls to WaitForMultipleObjects. When the handle + * is in a signaled state, QEMU will call @func. + * + * @handle: The Windows handle to be observed. + * @func: A function to be called when @handle is in a signaled state. + * @opaque: A pointer-size value that is passed to @func. + */ +int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); + +/** + * qemu_del_wait_object: Unregister a callback for a Windows handle + * + * This function removes a callback that was registered with + * qemu_add_wait_object. + * + * @func: The function that was passed to qemu_add_wait_object. + * @opaque: A pointer-size value that was passed to qemu_add_wait_object. + */ +void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); +#endif + +/* async I/O support */ + +typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); + +/** + * IOCanReadHandler: Return the number of bytes that #IOReadHandler can accept + * + * This function reports how many bytes #IOReadHandler is prepared to accept. + * #IOReadHandler may be invoked with up to this number of bytes. If this + * function returns 0 then #IOReadHandler is not invoked. + * + * This function is typically called from an event loop. If the number of + * bytes changes outside the event loop (e.g. because a vcpu thread drained the + * buffer), then it is necessary to kick the event loop so that this function + * is called again. aio_notify() or qemu_notify_event() can be used to kick + * the event loop. + */ +typedef int IOCanReadHandler(void *opaque); + +/** + * qemu_set_fd_handler: Register a file descriptor with the main loop + * + * This function tells the main loop to wake up whenever one of the + * following conditions is true: + * + * 1) if @fd_write is not %NULL, when the file descriptor is writable; + * + * 2) if @fd_read is not %NULL, when the file descriptor is readable. + * + * The callbacks that are set up by qemu_set_fd_handler are level-triggered. + * If @fd_read does not read from @fd, or @fd_write does not write to @fd + * until its buffers are full, they will be called again on the next + * iteration. + * + * @fd: The file descriptor to be observed. Under Windows it must be + * a #SOCKET. + * + * @fd_read: A level-triggered callback that is fired if @fd is readable + * at the beginning of a main loop iteration, or if it becomes readable + * during one. + * + * @fd_write: A level-triggered callback that is fired when @fd is writable + * at the beginning of a main loop iteration, or if it becomes writable + * during one. + * + * @opaque: A pointer-sized value that is passed to @fd_read and @fd_write. + */ +void qemu_set_fd_handler(int fd, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque); + + +/** + * event_notifier_set_handler: Register an EventNotifier with the main loop + * + * This function tells the main loop to wake up whenever the + * #EventNotifier was set. + * + * @e: The #EventNotifier to be observed. + * + * @handler: A level-triggered callback that is fired when @e + * has been set. @e is passed to it as a parameter. + */ +void event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler); + +GSource *iohandler_get_g_source(void); +AioContext *iohandler_get_aio_context(void); +#ifdef CONFIG_POSIX +/** + * qemu_add_child_watch: Register a child process for reaping. + * + * Under POSIX systems, a parent process must read the exit status of + * its child processes using waitpid, or the operating system will not + * free some of the resources attached to that process. + * + * This function directs the QEMU main loop to observe a child process + * and call waitpid as soon as it exits; the watch is then removed + * automatically. It is useful whenever QEMU forks a child process + * but will find out about its termination by other means such as a + * "broken pipe". + * + * @pid: The pid that QEMU should observe. + */ +int qemu_add_child_watch(pid_t pid); +#endif + +/** + * qemu_mutex_iothread_locked: Return lock status of the main loop mutex. + * + * The main loop mutex is the coarsest lock in QEMU, and as such it + * must always be taken outside other locks. This function helps + * functions take different paths depending on whether the current + * thread is running within the main loop mutex. + */ +bool qemu_mutex_iothread_locked(void); + +/** + * qemu_mutex_lock_iothread: Lock the main loop mutex. + * + * This function locks the main loop mutex. The mutex is taken by + * main() in vl.c and always taken except while waiting on + * external events (such as with select). The mutex should be taken + * by threads other than the main loop thread when calling + * qemu_bh_new(), qemu_set_fd_handler() and basically all other + * functions documented in this file. + * + * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread + * is a no-op there. + */ +#define qemu_mutex_lock_iothread() \ + qemu_mutex_lock_iothread_impl(__FILE__, __LINE__) +void qemu_mutex_lock_iothread_impl(const char *file, int line); + +/** + * qemu_mutex_unlock_iothread: Unlock the main loop mutex. + * + * This function unlocks the main loop mutex. The mutex is taken by + * main() in vl.c and always taken except while waiting on + * external events (such as with select). The mutex should be unlocked + * as soon as possible by threads other than the main loop thread, + * because it prevents the main loop from processing callbacks, + * including timers and bottom halves. + * + * NOTE: tools currently are single-threaded and qemu_mutex_unlock_iothread + * is a no-op there. + */ +void qemu_mutex_unlock_iothread(void); + +/* + * qemu_cond_wait_iothread: Wait on condition for the main loop mutex + * + * This function atomically releases the main loop mutex and causes + * the calling thread to block on the condition. + */ +void qemu_cond_wait_iothread(QemuCond *cond); + +/* + * qemu_cond_timedwait_iothread: like the previous, but with timeout + */ +void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); + +/* internal interfaces */ + +void qemu_fd_register(int fd); + +QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); +void qemu_bh_schedule_idle(QEMUBH *bh); + +enum { + MAIN_LOOP_POLL_FILL, + MAIN_LOOP_POLL_ERR, + MAIN_LOOP_POLL_OK, +}; + +typedef struct MainLoopPoll { + int state; + uint32_t timeout; + GArray *pollfds; +} MainLoopPoll; + +void main_loop_poll_add_notifier(Notifier *notify); +void main_loop_poll_remove_notifier(Notifier *notify); + +#endif diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h new file mode 100644 index 000000000..975b6bdb7 --- /dev/null +++ b/include/qemu/memfd.h @@ -0,0 +1,47 @@ +#ifndef QEMU_MEMFD_H +#define QEMU_MEMFD_H + + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004U +#endif + +#ifndef MFD_HUGE_SHIFT +#define MFD_HUGE_SHIFT 26 +#endif + +#if defined CONFIG_LINUX && !defined CONFIG_MEMFD +int memfd_create(const char *name, unsigned int flags); +#endif + +int qemu_memfd_create(const char *name, size_t size, bool hugetlb, + uint64_t hugetlbsize, unsigned int seals, Error **errp); +bool qemu_memfd_alloc_check(void); +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, + int *fd, Error **errp); +void qemu_memfd_free(void *ptr, size_t size, int fd); +bool qemu_memfd_check(unsigned int flags); + +#endif /* QEMU_MEMFD_H */ diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h new file mode 100644 index 000000000..e786266b9 --- /dev/null +++ b/include/qemu/mmap-alloc.h @@ -0,0 +1,32 @@ +#ifndef QEMU_MMAP_ALLOC_H +#define QEMU_MMAP_ALLOC_H + + +size_t qemu_fd_getpagesize(int fd); + +size_t qemu_mempath_getpagesize(const char *mem_path); + +/** + * qemu_ram_mmap: mmap the specified file or device. + * + * Parameters: + * @fd: the file or the device to mmap + * @size: the number of bytes to be mmaped + * @align: if not zero, specify the alignment of the starting mapping address; + * otherwise, the alignment in use will be determined by QEMU. + * @shared: map has RAM_SHARED flag. + * @is_pmem: map has RAM_PMEM flag. + * + * Return: + * On success, return a pointer to the mapped area. + * On failure, return MAP_FAILED. + */ +void *qemu_ram_mmap(int fd, + size_t size, + size_t align, + bool shared, + bool is_pmem); + +void qemu_ram_munmap(int fd, void *ptr, size_t size); + +#endif diff --git a/include/qemu/module.h b/include/qemu/module.h new file mode 100644 index 000000000..4b42dd285 --- /dev/null +++ b/include/qemu/module.h @@ -0,0 +1,77 @@ +/* + * QEMU Module Infrastructure + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_MODULE_H +#define QEMU_MODULE_H + + +#define DSO_STAMP_FUN glue(qemu_stamp, CONFIG_STAMP) +#define DSO_STAMP_FUN_STR stringify(DSO_STAMP_FUN) + +#ifdef BUILD_DSO +void DSO_STAMP_FUN(void); +/* This is a dummy symbol to identify a loaded DSO as a QEMU module, so we can + * distinguish "version mismatch" from "not a QEMU module", when the stamp + * check fails during module loading */ +void qemu_module_dummy(void); + +#define module_init(function, type) \ +static void __attribute__((constructor)) do_qemu_init_ ## function(void) \ +{ \ + register_dso_module_init(function, type); \ +} +#else +/* This should not be used directly. Use block_init etc. instead. */ +#define module_init(function, type) \ +static void __attribute__((constructor)) do_qemu_init_ ## function(void) \ +{ \ + register_module_init(function, type); \ +} +#endif + +typedef enum { + MODULE_INIT_MIGRATION, + MODULE_INIT_BLOCK, + MODULE_INIT_OPTS, + MODULE_INIT_QOM, + MODULE_INIT_TRACE, + MODULE_INIT_XEN_BACKEND, + MODULE_INIT_LIBQOS, + MODULE_INIT_FUZZ_TARGET, + MODULE_INIT_MAX +} module_init_type; + +#define block_init(function) module_init(function, MODULE_INIT_BLOCK) +#define opts_init(function) module_init(function, MODULE_INIT_OPTS) +#define type_init(function) module_init(function, MODULE_INIT_QOM) +#define trace_init(function) module_init(function, MODULE_INIT_TRACE) +#define xen_backend_init(function) module_init(function, \ + MODULE_INIT_XEN_BACKEND) +#define libqos_init(function) module_init(function, MODULE_INIT_LIBQOS) +#define fuzz_target_init(function) module_init(function, \ + MODULE_INIT_FUZZ_TARGET) +#define migration_init(function) module_init(function, MODULE_INIT_MIGRATION) +#define block_module_load_one(lib) module_load_one("block-", lib, false) +#define ui_module_load_one(lib) module_load_one("ui-", lib, false) +#define audio_module_load_one(lib) module_load_one("audio-", lib, false) + +void register_module_init(void (*fn)(void), module_init_type type); +void register_dso_module_init(void (*fn)(void), module_init_type type); + +void module_call_init(module_init_type type); +bool module_load_one(const char *prefix, const char *lib_name, bool mayfail); +void module_load_qom_one(const char *type); +void module_load_qom_all(void); +int module_load_check(const char *name); + +#endif diff --git a/include/qemu/notify.h b/include/qemu/notify.h new file mode 100644 index 000000000..bcfa70fb2 --- /dev/null +++ b/include/qemu/notify.h @@ -0,0 +1,74 @@ +/* + * Notifier lists + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_NOTIFY_H +#define QEMU_NOTIFY_H + +#include "qemu/queue.h" + +typedef struct Notifier Notifier; + +struct Notifier +{ + void (*notify)(Notifier *notifier, void *data); + QLIST_ENTRY(Notifier) node; +}; + +typedef struct NotifierList +{ + QLIST_HEAD(, Notifier) notifiers; +} NotifierList; + +#define NOTIFIER_LIST_INITIALIZER(head) \ + { QLIST_HEAD_INITIALIZER((head).notifiers) } + +void notifier_list_init(NotifierList *list); + +void notifier_list_add(NotifierList *list, Notifier *notifier); + +void notifier_remove(Notifier *notifier); + +void notifier_list_notify(NotifierList *list, void *data); + +bool notifier_list_empty(NotifierList *list); + +/* Same as Notifier but allows .notify() to return errors */ +typedef struct NotifierWithReturn NotifierWithReturn; + +struct NotifierWithReturn { + /** + * Return 0 on success (next notifier will be invoked), otherwise + * notifier_with_return_list_notify() will stop and return the value. + */ + int (*notify)(NotifierWithReturn *notifier, void *data); + QLIST_ENTRY(NotifierWithReturn) node; +}; + +typedef struct NotifierWithReturnList { + QLIST_HEAD(, NotifierWithReturn) notifiers; +} NotifierWithReturnList; + +#define NOTIFIER_WITH_RETURN_LIST_INITIALIZER(head) \ + { QLIST_HEAD_INITIALIZER((head).notifiers) } + +void notifier_with_return_list_init(NotifierWithReturnList *list); + +void notifier_with_return_list_add(NotifierWithReturnList *list, + NotifierWithReturn *notifier); + +void notifier_with_return_remove(NotifierWithReturn *notifier); + +int notifier_with_return_list_notify(NotifierWithReturnList *list, + void *data); + +#endif diff --git a/include/qemu/nvdimm-utils.h b/include/qemu/nvdimm-utils.h new file mode 100644 index 000000000..4b8b198ba --- /dev/null +++ b/include/qemu/nvdimm-utils.h @@ -0,0 +1,7 @@ +#ifndef NVDIMM_UTILS_H +#define NVDIMM_UTILS_H + +#include "qemu/osdep.h" + +GSList *nvdimm_get_device_list(void); +#endif diff --git a/include/qemu/option.h b/include/qemu/option.h new file mode 100644 index 000000000..ac69352e0 --- /dev/null +++ b/include/qemu/option.h @@ -0,0 +1,154 @@ +/* + * Commandline option parsing functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OPTION_H +#define QEMU_OPTION_H + +#include "qemu/queue.h" + +/** + * get_opt_value + * @p: a pointer to the option name, delimited by commas + * @value: a non-NULL pointer that will received the delimited options + * + * The @value char pointer will be allocated and filled with + * the delimited options. + * + * Returns the position of the comma delimiter/zero byte after the + * option name in @p. + * The memory pointer in @value must be released with a call to g_free() + * when no longer required. + */ +const char *get_opt_value(const char *p, char **value); + +bool parse_option_size(const char *name, const char *value, + uint64_t *ret, Error **errp); +bool has_help_option(const char *param); + +enum QemuOptType { + QEMU_OPT_STRING = 0, /* no parsing (use string as-is) */ + QEMU_OPT_BOOL, /* on/off */ + QEMU_OPT_NUMBER, /* simple number */ + QEMU_OPT_SIZE, /* size, accepts (K)ilo, (M)ega, (G)iga, (T)era postfix */ +}; + +typedef struct QemuOptDesc { + const char *name; + enum QemuOptType type; + const char *help; + const char *def_value_str; +} QemuOptDesc; + +struct QemuOptsList { + const char *name; + const char *implied_opt_name; + bool merge_lists; /* Merge multiple uses of option into a single list? */ + QTAILQ_HEAD(, QemuOpts) head; + QemuOptDesc desc[]; +}; + +const char *qemu_opt_get(QemuOpts *opts, const char *name); +char *qemu_opt_get_del(QemuOpts *opts, const char *name); +/** + * qemu_opt_has_help_opt: + * @opts: options to search for a help request + * + * Check whether the options specified by @opts include one of the + * standard strings which indicate that the user is asking for a + * list of the valid values for a command line option (as defined + * by is_help_option()). + * + * Returns: true if @opts includes 'help' or equivalent. + */ +bool qemu_opt_has_help_opt(QemuOpts *opts); +QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name); +bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval); +uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval); +uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval); +bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval); +uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name, + uint64_t defval); +uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name, + uint64_t defval); +int qemu_opt_unset(QemuOpts *opts, const char *name); +bool qemu_opt_set(QemuOpts *opts, const char *name, const char *value, + Error **errp); +bool qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val, + Error **errp); +bool qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val, + Error **errp); +typedef int (*qemu_opt_loopfunc)(void *opaque, + const char *name, const char *value, + Error **errp); +int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, + Error **errp); + +typedef struct { + QemuOpts *opts; + QemuOpt *opt; + const char *name; +} QemuOptsIter; + +void qemu_opt_iter_init(QemuOptsIter *iter, QemuOpts *opts, const char *name); +const char *qemu_opt_iter_next(QemuOptsIter *iter); + +QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id); +QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, + int fail_if_exists, Error **errp); +void qemu_opts_reset(QemuOptsList *list); +void qemu_opts_loc_restore(QemuOpts *opts); +bool qemu_opts_set(QemuOptsList *list, const char *id, + const char *name, const char *value, Error **errp); +const char *qemu_opts_id(QemuOpts *opts); +void qemu_opts_set_id(QemuOpts *opts, char *id); +void qemu_opts_del(QemuOpts *opts); +bool qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp); +bool qemu_opts_do_parse(QemuOpts *opts, const char *params, + const char *firstname, Error **errp); +QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params, + bool permit_abbrev); +QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, + bool permit_abbrev, Error **errp); +void qemu_opts_set_defaults(QemuOptsList *list, const char *params, + int permit_abbrev); +QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict, + Error **errp); +QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict, + QemuOptsList *list, bool del); +QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict); +bool qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp); + +typedef int (*qemu_opts_loopfunc)(void *opaque, QemuOpts *opts, Error **errp); +int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, + void *opaque, Error **errp); +void qemu_opts_print(QemuOpts *opts, const char *sep); +void qemu_opts_print_help(QemuOptsList *list, bool print_caption); +void qemu_opts_free(QemuOptsList *list); +QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list); + +QDict *keyval_parse(const char *params, const char *implied_key, + bool *help, Error **errp); + +#endif diff --git a/include/qemu/option_int.h b/include/qemu/option_int.h new file mode 100644 index 000000000..5dd9a5162 --- /dev/null +++ b/include/qemu/option_int.h @@ -0,0 +1,54 @@ +/* + * Commandline option parsing functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OPTION_INT_H +#define QEMU_OPTION_INT_H + +#include "qemu/option.h" +#include "qemu/error-report.h" + +struct QemuOpt { + char *name; + char *str; + + const QemuOptDesc *desc; + union { + bool boolean; + uint64_t uint; + } value; + + QemuOpts *opts; + QTAILQ_ENTRY(QemuOpt) next; +}; + +struct QemuOpts { + char *id; + QemuOptsList *list; + Location loc; + QTAILQ_HEAD(, QemuOpt) head; + QTAILQ_ENTRY(QemuOpts) next; +}; + +#endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h new file mode 100644 index 000000000..f9ec8c84e --- /dev/null +++ b/include/qemu/osdep.h @@ -0,0 +1,689 @@ +/* + * OS includes and handling of OS dependencies + * + * This header exists to pull in some common system headers that + * most code in QEMU will want, and to fix up some possible issues with + * it (missing defines, Windows weirdness, and so on). + * + * To avoid getting into possible circular include dependencies, this + * file should not include any other QEMU headers, with the exceptions + * of config-host.h, config-target.h, qemu/compiler.h, + * sysemu/os-posix.h, sysemu/os-win32.h, glib-compat.h and + * qemu/typedefs.h, all of which are doing a similar job to this file + * and are under similar constraints. + * + * This header also contains prototypes for functions defined in + * os-*.c and util/oslib-*.c; those would probably be better split + * out into separate header files. + * + * In an ideal world this header would contain only: + * (1) things which everybody needs + * (2) things without which code would work on most platforms but + * fail to compile or misbehave on a minority of host OSes + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_OSDEP_H +#define QEMU_OSDEP_H + +#include "config-host.h" +#ifdef NEED_CPU_H +#include CONFIG_TARGET +#else +#include "exec/poison.h" +#endif + +#include "qemu/compiler.h" + +/* Older versions of C++ don't get definitions of various macros from + * stdlib.h unless we define these macros before first inclusion of + * that system header. + */ +#ifndef __STDC_CONSTANT_MACROS +#define __STDC_CONSTANT_MACROS +#endif +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +/* The following block of code temporarily renames the daemon() function so the + * compiler does not see the warning associated with it in stdlib.h on OSX + */ +#ifdef __APPLE__ +#define daemon qemu_fake_daemon_function +#include +#undef daemon +extern int daemon(int, int); +#endif + +#ifdef _WIN32 +/* as defined in sdkddkver.h */ +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 /* Vista */ +#endif +/* reduces the number of implicitly included headers */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#endif + +/* enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) */ +#ifdef __MINGW32__ +#define __USE_MINGW_ANSI_STDIO 1 +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +/* Put unistd.h before time.h as that triggers localtime_r/gmtime_r + * function availability on recentish Mingw-w64 platforms. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* setjmp must be declared before sysemu/os-win32.h + * because it is redefined there. */ +#include +#include + +#ifdef HAVE_SYS_SIGNAL_H +#include +#endif + +#ifndef _WIN32 +#include +#else +#define WIFEXITED(x) 1 +#define WEXITSTATUS(x) (x) +#endif + +#ifdef _WIN32 +#include "sysemu/os-win32.h" +#endif + +#ifdef CONFIG_POSIX +#include "sysemu/os-posix.h" +#endif + +#include "glib-compat.h" +#include "qemu/typedefs.h" + +/* + * For mingw, as of v6.0.0, the function implementing the assert macro is + * not marked as noreturn, so the compiler cannot delete code following an + * assert(false) as unused. We rely on this within the code base to delete + * code that is unreachable when features are disabled. + * All supported versions of Glib's g_assert() satisfy this requirement. + */ +#ifdef __MINGW32__ +#undef assert +#define assert(x) g_assert(x) +#endif + +/* + * According to waitpid man page: + * WCOREDUMP + * This macro is not specified in POSIX.1-2001 and is not + * available on some UNIX implementations (e.g., AIX, SunOS). + * Therefore, enclose its use inside #ifdef WCOREDUMP ... #endif. + */ +#ifndef WCOREDUMP +#define WCOREDUMP(status) 0 +#endif +/* + * We have a lot of unaudited code that may fail in strange ways, or + * even be a security risk during migration, if you disable assertions + * at compile-time. You may comment out these safety checks if you + * absolutely want to disable assertion overhead, but it is not + * supported upstream so the risk is all yours. Meanwhile, please + * submit patches to remove any side-effects inside an assertion, or + * fixing error handling that should use Error instead of assert. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif +#ifdef G_DISABLE_ASSERT +#error building with G_DISABLE_ASSERT is not supported +#endif + +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif +#ifndef O_BINARY +#define O_BINARY 0 +#endif +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0 +#endif +#ifndef ENOMEDIUM +#define ENOMEDIUM ENODEV +#endif +#if !defined(ENOTSUP) +#define ENOTSUP 4096 +#endif +#if !defined(ECANCELED) +#define ECANCELED 4097 +#endif +#if !defined(EMEDIUMTYPE) +#define EMEDIUMTYPE 4098 +#endif +#if !defined(ESHUTDOWN) +#define ESHUTDOWN 4099 +#endif + +/* time_t may be either 32 or 64 bits depending on the host OS, and + * can be either signed or unsigned, so we can't just hardcode a + * specific maximum value. This is not a C preprocessor constant, + * so you can't use TIME_MAX in an #ifdef, but for our purposes + * this isn't a problem. + */ + +/* The macros TYPE_SIGNED, TYPE_WIDTH, and TYPE_MAXIMUM are from + * Gnulib, and are under the LGPL v2.1 or (at your option) any + * later version. + */ + +/* True if the real type T is signed. */ +#define TYPE_SIGNED(t) (!((t)0 < (t)-1)) + +/* The width in bits of the integer type or expression T. + * Padding bits are not supported. + */ +#define TYPE_WIDTH(t) (sizeof(t) * CHAR_BIT) + +/* The maximum and minimum values for the integer type T. */ +#define TYPE_MAXIMUM(t) \ + ((t) (!TYPE_SIGNED(t) \ + ? (t)-1 \ + : ((((t)1 << (TYPE_WIDTH(t) - 2)) - 1) * 2 + 1))) + +#ifndef TIME_MAX +#define TIME_MAX TYPE_MAXIMUM(time_t) +#endif + +/* HOST_LONG_BITS is the size of a native pointer in bits. */ +#if UINTPTR_MAX == UINT32_MAX +# define HOST_LONG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define HOST_LONG_BITS 64 +#else +# error Unknown pointer size +#endif + +/* Mac OSX has a bug that incorrectly defines SIZE_MAX with + * the wrong type. Our replacement isn't usable in preprocessor + * expressions, but it is sufficient for our needs. */ +#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX +#undef SIZE_MAX +#define SIZE_MAX ((size_t)-1) +#endif + +/* + * Two variations of MIN/MAX macros. The first is for runtime use, and + * evaluates arguments only once (so it is safe even with side + * effects), but will not work in constant contexts (such as array + * size declarations) because of the '{}'. The second is for constant + * expression use, where evaluating arguments twice is safe because + * the result is going to be constant anyway, but will not work in a + * runtime context because of a void expression where a value is + * expected. Thus, both gcc and clang will fail to compile if you use + * the wrong macro (even if the error may seem a bit cryptic). + * + * Note that neither form is usable as an #if condition; if you truly + * need to write conditional code that depends on a minimum or maximum + * determined by the pre-processor instead of the compiler, you'll + * have to open-code it. Sadly, Coverity is severely confused by the + * constant variants, so we have to dumb things down there. + */ +#undef MIN +#define MIN(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a < _b ? _a : _b; \ + }) +#undef MAX +#define MAX(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a > _b ? _a : _b; \ + }) + +#ifdef __COVERITY__ +# define MIN_CONST(a, b) ((a) < (b) ? (a) : (b)) +# define MAX_CONST(a, b) ((a) > (b) ? (a) : (b)) +#else +# define MIN_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) < (b) ? (a) : (b), \ + ((void)0)) +# define MAX_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) > (b) ? (a) : (b), \ + ((void)0)) +#endif + +/* + * Minimum function that returns zero only if both values are zero. + * Intended for use with unsigned values only. + */ +#ifndef MIN_NON_ZERO +#define MIN_NON_ZERO(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b; \ + }) +#endif + +/* Round number down to multiple */ +#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +/* Round number up to multiple. Safe when m is not a power of 2 (see + * ROUND_UP for a faster version when a power of 2 is guaranteed) */ +#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) + +/* Check if n is a multiple of m */ +#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0) + +/* n-byte align pointer down */ +#define QEMU_ALIGN_PTR_DOWN(p, n) \ + ((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n))) + +/* n-byte align pointer up */ +#define QEMU_ALIGN_PTR_UP(p, n) \ + ((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n))) + +/* Check if pointer p is n-bytes aligned */ +#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) + +/* Round number up to multiple. Requires that d be a power of 2 (see + * QEMU_ALIGN_UP for a safer but slower version on arbitrary + * numbers); works even if d is a smaller type than n. */ +#ifndef ROUND_UP +#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d))) +#endif + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +/* + * &(x)[0] is always a pointer - if it's same type as x then the argument is a + * pointer, not an array. + */ +#define QEMU_IS_ARRAY(x) (!__builtin_types_compatible_p(typeof(x), \ + typeof(&(x)[0]))) +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0])) + \ + QEMU_BUILD_BUG_ON_ZERO(!QEMU_IS_ARRAY(x))) +#endif + +int qemu_daemon(int nochdir, int noclose); +void *qemu_try_memalign(size_t alignment, size_t size); +void *qemu_memalign(size_t alignment, size_t size); +void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared); +void qemu_vfree(void *ptr); +void qemu_anon_ram_free(void *ptr, size_t size); + +#define QEMU_MADV_INVALID -1 + +#if defined(CONFIG_MADVISE) + +#define QEMU_MADV_WILLNEED MADV_WILLNEED +#define QEMU_MADV_DONTNEED MADV_DONTNEED +#ifdef MADV_DONTFORK +#define QEMU_MADV_DONTFORK MADV_DONTFORK +#else +#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID +#endif +#ifdef MADV_MERGEABLE +#define QEMU_MADV_MERGEABLE MADV_MERGEABLE +#else +#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#endif +#ifdef MADV_UNMERGEABLE +#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE +#else +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#endif +#ifdef MADV_DODUMP +#define QEMU_MADV_DODUMP MADV_DODUMP +#else +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID +#endif +#ifdef MADV_DONTDUMP +#define QEMU_MADV_DONTDUMP MADV_DONTDUMP +#else +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID +#endif +#ifdef MADV_HUGEPAGE +#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE +#else +#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID +#endif +#ifdef MADV_NOHUGEPAGE +#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE +#else +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID +#endif +#ifdef MADV_REMOVE +#define QEMU_MADV_REMOVE MADV_REMOVE +#else +#define QEMU_MADV_REMOVE QEMU_MADV_INVALID +#endif + +#elif defined(CONFIG_POSIX_MADVISE) + +#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED +#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED +#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID +#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID +#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_REMOVE QEMU_MADV_INVALID + +#else /* no-op */ + +#define QEMU_MADV_WILLNEED QEMU_MADV_INVALID +#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID +#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID +#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID +#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID +#define QEMU_MADV_REMOVE QEMU_MADV_INVALID + +#endif + +#ifdef _WIN32 +#define HAVE_CHARDEV_SERIAL 1 +#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \ + || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \ + || defined(__GLIBC__) || defined(__APPLE__) +#define HAVE_CHARDEV_SERIAL 1 +#endif + +#if defined(__linux__) || defined(__FreeBSD__) || \ + defined(__FreeBSD_kernel__) || defined(__DragonFly__) +#define HAVE_CHARDEV_PARPORT 1 +#endif + +#if defined(__HAIKU__) +#define SIGIO SIGPOLL +#endif + +#if defined(CONFIG_LINUX) +#ifndef BUS_MCEERR_AR +#define BUS_MCEERR_AR 4 +#endif +#ifndef BUS_MCEERR_AO +#define BUS_MCEERR_AO 5 +#endif +#endif + +#if defined(__linux__) && \ + (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) \ + || defined(__powerpc64__)) + /* Use 2 MiB alignment so transparent hugepages can be used by KVM. + Valgrind does not support alignments larger than 1 MiB, + therefore we need special code which handles running on Valgrind. */ +# define QEMU_VMALLOC_ALIGN (512 * 4096) +#elif defined(__linux__) && defined(__s390x__) + /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ +# define QEMU_VMALLOC_ALIGN (256 * 4096) +#elif defined(__linux__) && defined(__sparc__) +#include +# define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size, SHMLBA) +#else +# define QEMU_VMALLOC_ALIGN qemu_real_host_page_size +#endif + +#ifdef CONFIG_POSIX +struct qemu_signalfd_siginfo { + uint32_t ssi_signo; /* Signal number */ + int32_t ssi_errno; /* Error number (unused) */ + int32_t ssi_code; /* Signal code */ + uint32_t ssi_pid; /* PID of sender */ + uint32_t ssi_uid; /* Real UID of sender */ + int32_t ssi_fd; /* File descriptor (SIGIO) */ + uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ + uint32_t ssi_band; /* Band event (SIGIO) */ + uint32_t ssi_overrun; /* POSIX timer overrun count */ + uint32_t ssi_trapno; /* Trap number that caused signal */ + int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ + int32_t ssi_int; /* Integer sent by sigqueue(2) */ + uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ + uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ + uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ + uint64_t ssi_addr; /* Address that generated signal + (for hardware-generated signals) */ + uint8_t pad[48]; /* Pad size to 128 bytes (allow for + additional fields in the future) */ +}; + +int qemu_signalfd(const sigset_t *mask); +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info); +#endif + +int qemu_madvise(void *addr, size_t len, int advice); +int qemu_mprotect_rwx(void *addr, size_t size); +int qemu_mprotect_none(void *addr, size_t size); + +/* + * Don't introduce new usage of this function, prefer the following + * qemu_open/qemu_create that take an "Error **errp" + */ +int qemu_open_old(const char *name, int flags, ...); +int qemu_open(const char *name, int flags, Error **errp); +int qemu_create(const char *name, int flags, mode_t mode, Error **errp); +int qemu_close(int fd); +int qemu_unlink(const char *name); +#ifndef _WIN32 +int qemu_dup_flags(int fd, int flags); +int qemu_dup(int fd); +int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive); +int qemu_unlock_fd(int fd, int64_t start, int64_t len); +int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive); +bool qemu_has_ofd_lock(void); +#endif + +#if defined(__HAIKU__) && defined(__i386__) +#define FMT_pid "%ld" +#elif defined(WIN64) +#define FMT_pid "%" PRId64 +#else +#define FMT_pid "%d" +#endif + +bool qemu_write_pidfile(const char *pidfile, Error **errp); + +int qemu_get_thread_id(void); + +#ifndef CONFIG_IOVEC +struct iovec { + void *iov_base; + size_t iov_len; +}; +/* + * Use the same value as Linux for now. + */ +#define IOV_MAX 1024 + +ssize_t readv(int fd, const struct iovec *iov, int iov_cnt); +ssize_t writev(int fd, const struct iovec *iov, int iov_cnt); +#else +#include +#endif + +#ifdef _WIN32 +static inline void qemu_timersub(const struct timeval *val1, + const struct timeval *val2, + struct timeval *res) +{ + res->tv_sec = val1->tv_sec - val2->tv_sec; + if (val1->tv_usec < val2->tv_usec) { + res->tv_sec--; + res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000; + } else { + res->tv_usec = val1->tv_usec - val2->tv_usec; + } +} +#else +#define qemu_timersub timersub +#endif + +void qemu_set_cloexec(int fd); + +/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default + * instead of QEMU_VERSION, so setting hw_version on MachineClass + * is no longer mandatory. + * + * Do NOT change this string, or it will break compatibility on all + * machine classes that don't set hw_version. + */ +#define QEMU_HW_VERSION "2.5+" + +/* QEMU "hardware version" setting. Used to replace code that exposed + * QEMU_VERSION to guests in the past and need to keep compatibility. + * Do not use qemu_hw_version() in new code. + */ +void qemu_set_hw_version(const char *); +const char *qemu_hw_version(void); + +void fips_set_state(bool requested); +bool fips_get_state(void); + +/* Return a dynamically allocated pathname denoting a file or directory that is + * appropriate for storing local state. + * + * @relative_pathname need not start with a directory separator; one will be + * added automatically. + * + * The caller is responsible for releasing the value returned with g_free() + * after use. + */ +char *qemu_get_local_state_pathname(const char *relative_pathname); + +/* Find program directory, and save it for later usage with + * qemu_get_exec_dir(). + * Try OS specific API first, if not working, parse from argv0. */ +void qemu_init_exec_dir(const char *argv0); + +/* Get the saved exec dir. */ +const char *qemu_get_exec_dir(void); + +/** + * qemu_getauxval: + * @type: the auxiliary vector key to lookup + * + * Search the auxiliary vector for @type, returning the value + * or 0 if @type is not present. + */ +unsigned long qemu_getauxval(unsigned long type); + +void qemu_set_tty_echo(int fd, bool echo); + +void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, + Error **errp); + +/** + * qemu_get_pid_name: + * @pid: pid of a process + * + * For given @pid fetch its name. Caller is responsible for + * freeing the string when no longer needed. + * Returns allocated string on success, NULL on failure. + */ +char *qemu_get_pid_name(pid_t pid); + +/** + * qemu_fork: + * + * A version of fork that avoids signal handler race + * conditions that can lead to child process getting + * signals that are otherwise only expected by the + * parent. It also resets all signal handlers to the + * default settings. + * + * Returns 0 to child process, pid number to parent + * or -1 on failure. + */ +pid_t qemu_fork(Error **errp); + +/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even + * when intptr_t is 32-bit and we are aligning a long long. + */ +extern uintptr_t qemu_real_host_page_size; +extern intptr_t qemu_real_host_page_mask; + +extern int qemu_icache_linesize; +extern int qemu_icache_linesize_log; +extern int qemu_dcache_linesize; +extern int qemu_dcache_linesize_log; + +/* + * After using getopt or getopt_long, if you need to parse another set + * of options, then you must reset optind. Unfortunately the way to + * do this varies between implementations of getopt. + */ +static inline void qemu_reset_optind(void) +{ +#ifdef HAVE_OPTRESET + optind = 1; + optreset = 1; +#else + optind = 0; +#endif +} + +/** + * qemu_get_host_name: + * @errp: Error object + * + * Operating system agnostic way of querying host name. + * + * Returns allocated hostname (caller should free), NULL on failure. + */ +char *qemu_get_host_name(Error **errp); + +/** + * qemu_get_host_physmem: + * + * Operating system agnostic way of querying host memory. + * + * Returns amount of physical memory on the system. This is purely + * advisery and may return 0 if we can't work it out. At the other + * end we saturate to SIZE_MAX if you are lucky enough to have that + * much memory. + */ +size_t qemu_get_host_physmem(void); + +#endif diff --git a/include/qemu/path.h b/include/qemu/path.h new file mode 100644 index 000000000..c6292a970 --- /dev/null +++ b/include/qemu/path.h @@ -0,0 +1,7 @@ +#ifndef QEMU_PATH_H +#define QEMU_PATH_H + +void init_paths(const char *prefix); +const char *path(const char *pathname); + +#endif diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h new file mode 100644 index 000000000..fbbe99474 --- /dev/null +++ b/include/qemu/plugin-memory.h @@ -0,0 +1,40 @@ +/* + * Plugin Memory API + * + * Copyright (c) 2019 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _PLUGIN_MEMORY_H_ +#define _PLUGIN_MEMORY_H_ + +struct qemu_plugin_hwaddr { + bool is_io; + bool is_store; + union { + struct { + MemoryRegionSection *section; + hwaddr offset; + } io; + struct { + uint64_t hostaddr; + } ram; + } v; +}; + +/** + * tlb_plugin_lookup: query last TLB lookup + * @cpu: cpu environment + * + * This function can be used directly after a memory operation to + * query information about the access. It is used by the plugin + * infrastructure to expose more information about the address. + * + * It would only fail if not called from an instrumented memory access + * which would be an abuse of the API. + */ +bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, + bool is_store, struct qemu_plugin_hwaddr *data); + +#endif /* _PLUGIN_MEMORY_H_ */ diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h new file mode 100644 index 000000000..ab790ad10 --- /dev/null +++ b/include/qemu/plugin.h @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_PLUGIN_H +#define QEMU_PLUGIN_H + +#include "qemu/config-file.h" +#include "qemu/qemu-plugin.h" +#include "qemu/error-report.h" +#include "qemu/queue.h" +#include "qemu/option.h" + +/* + * Events that plugins can subscribe to. + */ +enum qemu_plugin_event { + QEMU_PLUGIN_EV_VCPU_INIT, + QEMU_PLUGIN_EV_VCPU_EXIT, + QEMU_PLUGIN_EV_VCPU_TB_TRANS, + QEMU_PLUGIN_EV_VCPU_IDLE, + QEMU_PLUGIN_EV_VCPU_RESUME, + QEMU_PLUGIN_EV_VCPU_SYSCALL, + QEMU_PLUGIN_EV_VCPU_SYSCALL_RET, + QEMU_PLUGIN_EV_FLUSH, + QEMU_PLUGIN_EV_ATEXIT, + QEMU_PLUGIN_EV_MAX, /* total number of plugin events we support */ +}; + +/* + * Option parsing/processing. + * Note that we can load an arbitrary number of plugins. + */ +struct qemu_plugin_desc; +typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList; + +#ifdef CONFIG_PLUGIN +extern QemuOptsList qemu_plugin_opts; + +static inline void qemu_plugin_add_opts(void) +{ + qemu_add_opts(&qemu_plugin_opts); +} + +void qemu_plugin_opt_parse(const char *optarg, QemuPluginList *head); +int qemu_plugin_load_list(QemuPluginList *head); + +union qemu_plugin_cb_sig { + qemu_plugin_simple_cb_t simple; + qemu_plugin_udata_cb_t udata; + qemu_plugin_vcpu_simple_cb_t vcpu_simple; + qemu_plugin_vcpu_udata_cb_t vcpu_udata; + qemu_plugin_vcpu_tb_trans_cb_t vcpu_tb_trans; + qemu_plugin_vcpu_mem_cb_t vcpu_mem; + qemu_plugin_vcpu_syscall_cb_t vcpu_syscall; + qemu_plugin_vcpu_syscall_ret_cb_t vcpu_syscall_ret; + void *generic; +}; + +enum plugin_dyn_cb_type { + PLUGIN_CB_INSN, + PLUGIN_CB_MEM, + PLUGIN_N_CB_TYPES, +}; + +enum plugin_dyn_cb_subtype { + PLUGIN_CB_REGULAR, + PLUGIN_CB_INLINE, + PLUGIN_N_CB_SUBTYPES, +}; + +/* + * A dynamic callback has an insertion point that is determined at run-time. + * Usually the insertion point is somewhere in the code cache; think for + * instance of a callback to be called upon the execution of a particular TB. + */ +struct qemu_plugin_dyn_cb { + union qemu_plugin_cb_sig f; + void *userp; + unsigned tcg_flags; + enum plugin_dyn_cb_subtype type; + /* @rw applies to mem callbacks only (both regular and inline) */ + enum qemu_plugin_mem_rw rw; + /* fields specific to each dyn_cb type go here */ + union { + struct { + enum qemu_plugin_op op; + uint64_t imm; + } inline_insn; + }; +}; + +struct qemu_plugin_insn { + GByteArray *data; + uint64_t vaddr; + void *haddr; + GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES]; + bool calls_helpers; + bool mem_helper; +}; + +/* + * qemu_plugin_insn allocate and cleanup functions. We don't expect to + * cleanup many of these structures. They are reused for each fresh + * translation. + */ + +static inline void qemu_plugin_insn_cleanup_fn(gpointer data) +{ + struct qemu_plugin_insn *insn = (struct qemu_plugin_insn *) data; + g_byte_array_free(insn->data, true); +} + +static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void) +{ + int i, j; + struct qemu_plugin_insn *insn = g_new0(struct qemu_plugin_insn, 1); + insn->data = g_byte_array_sized_new(4); + + for (i = 0; i < PLUGIN_N_CB_TYPES; i++) { + for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) { + insn->cbs[i][j] = g_array_new(false, false, + sizeof(struct qemu_plugin_dyn_cb)); + } + } + return insn; +} + +struct qemu_plugin_tb { + GPtrArray *insns; + size_t n; + uint64_t vaddr; + uint64_t vaddr2; + void *haddr1; + void *haddr2; + GArray *cbs[PLUGIN_N_CB_SUBTYPES]; +}; + +/** + * qemu_plugin_tb_insn_get(): get next plugin record for translation. + * + */ +static inline +struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb) +{ + struct qemu_plugin_insn *insn; + int i, j; + + if (unlikely(tb->n == tb->insns->len)) { + struct qemu_plugin_insn *new_insn = qemu_plugin_insn_alloc(); + g_ptr_array_add(tb->insns, new_insn); + } + insn = g_ptr_array_index(tb->insns, tb->n++); + g_byte_array_set_size(insn->data, 0); + insn->calls_helpers = false; + insn->mem_helper = false; + + for (i = 0; i < PLUGIN_N_CB_TYPES; i++) { + for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) { + g_array_set_size(insn->cbs[i][j], 0); + } + } + + return insn; +} + +void qemu_plugin_vcpu_init_hook(CPUState *cpu); +void qemu_plugin_vcpu_exit_hook(CPUState *cpu); +void qemu_plugin_tb_trans_cb(CPUState *cpu, struct qemu_plugin_tb *tb); +void qemu_plugin_vcpu_idle_cb(CPUState *cpu); +void qemu_plugin_vcpu_resume_cb(CPUState *cpu); +void +qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, + uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, + uint64_t a6, uint64_t a7, uint64_t a8); +void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret); + +void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo); + +void qemu_plugin_flush_cb(void); + +void qemu_plugin_atexit_cb(void); + +void qemu_plugin_add_dyn_cb_arr(GArray *arr); + +void qemu_plugin_disable_mem_helpers(CPUState *cpu); + +#else /* !CONFIG_PLUGIN */ + +static inline void qemu_plugin_add_opts(void) +{ } + +static inline void qemu_plugin_opt_parse(const char *optarg, + QemuPluginList *head) +{ + error_report("plugin interface not enabled in this build"); + exit(1); +} + +static inline int qemu_plugin_load_list(QemuPluginList *head) +{ + return 0; +} + +static inline void qemu_plugin_vcpu_init_hook(CPUState *cpu) +{ } + +static inline void qemu_plugin_vcpu_exit_hook(CPUState *cpu) +{ } + +static inline void qemu_plugin_tb_trans_cb(CPUState *cpu, + struct qemu_plugin_tb *tb) +{ } + +static inline void qemu_plugin_vcpu_idle_cb(CPUState *cpu) +{ } + +static inline void qemu_plugin_vcpu_resume_cb(CPUState *cpu) +{ } + +static inline void +qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, uint64_t a2, + uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6, + uint64_t a7, uint64_t a8) +{ } + +static inline +void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret) +{ } + +static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, + uint32_t meminfo) +{ } + +static inline void qemu_plugin_flush_cb(void) +{ } + +static inline void qemu_plugin_atexit_cb(void) +{ } + +static inline +void qemu_plugin_add_dyn_cb_arr(GArray *arr) +{ } + +static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu) +{ } + +#endif /* !CONFIG_PLUGIN */ + +#endif /* QEMU_PLUGIN_H */ diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h new file mode 100644 index 000000000..d2d7ad085 --- /dev/null +++ b/include/qemu/pmem.h @@ -0,0 +1,36 @@ +/* + * QEMU header file for libpmem. + * + * Copyright (c) 2018 Intel Corporation. + * + * Author: Haozhong Zhang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PMEM_H +#define QEMU_PMEM_H + +#ifdef CONFIG_LIBPMEM +#include +#else /* !CONFIG_LIBPMEM */ + +static inline void * +pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) +{ + /* If 'pmem' option is 'on', we should always have libpmem support, + or qemu will report a error and exit, never come here. */ + g_assert_not_reached(); + return NULL; +} + +static inline void +pmem_persist(const void *addr, size_t len) +{ + g_assert_not_reached(); +} + +#endif /* CONFIG_LIBPMEM */ + +#endif /* QEMU_PMEM_H */ diff --git a/include/qemu/processor.h b/include/qemu/processor.h new file mode 100644 index 000000000..8e16c9277 --- /dev/null +++ b/include/qemu/processor.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2016, Emilio G. Cota + * + * License: GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_PROCESSOR_H +#define QEMU_PROCESSOR_H + +#include "qemu/atomic.h" + +#if defined(__i386__) || defined(__x86_64__) +# define cpu_relax() asm volatile("rep; nop" ::: "memory") + +#elif defined(__aarch64__) +# define cpu_relax() asm volatile("yield" ::: "memory") + +#elif defined(__powerpc64__) +/* set Hardware Multi-Threading (HMT) priority to low; then back to medium */ +# define cpu_relax() asm volatile("or 1, 1, 1;" \ + "or 2, 2, 2;" ::: "memory") + +#else +# define cpu_relax() barrier() +#endif + +#endif /* QEMU_PROCESSOR_H */ diff --git a/include/qemu/progress_meter.h b/include/qemu/progress_meter.h new file mode 100644 index 000000000..9a23ff071 --- /dev/null +++ b/include/qemu/progress_meter.h @@ -0,0 +1,58 @@ +/* + * Helper functionality for some process progress tracking. + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012, 2018 Red Hat, Inc. + * Copyright (c) 2020 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_PROGRESS_METER_H +#define QEMU_PROGRESS_METER_H + +typedef struct ProgressMeter { + /** + * Current progress. The unit is arbitrary as long as the ratio between + * current and total represents the estimated percentage + * of work already done. + */ + uint64_t current; + + /** Estimated current value at the completion of the process */ + uint64_t total; +} ProgressMeter; + +static inline void progress_work_done(ProgressMeter *pm, uint64_t done) +{ + pm->current += done; +} + +static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining) +{ + pm->total = pm->current + remaining; +} + +static inline void progress_increase_remaining(ProgressMeter *pm, + uint64_t delta) +{ + pm->total += delta; +} + +#endif /* QEMU_PROGRESS_METER_H */ diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h new file mode 100644 index 000000000..bfb321153 --- /dev/null +++ b/include/qemu/qdist.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2016, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_QDIST_H +#define QEMU_QDIST_H + +#include "qemu/bitops.h" + +/* + * Samples with the same 'x value' end up in the same qdist_entry, + * e.g. inc(0.1) and inc(0.1) end up as {x=0.1, count=2}. + * + * Binning happens only at print time, so that we retain the flexibility to + * choose the binning. This might not be ideal for workloads that do not care + * much about precision and insert many samples all with different x values; + * in that case, pre-binning (e.g. entering both 0.115 and 0.097 as 0.1) + * should be considered. + */ +struct qdist_entry { + double x; + unsigned long count; +}; + +struct qdist { + struct qdist_entry *entries; + size_t n; + size_t size; +}; + +#define QDIST_PR_BORDER BIT(0) +#define QDIST_PR_LABELS BIT(1) +/* the remaining options only work if PR_LABELS is set */ +#define QDIST_PR_NODECIMAL BIT(2) +#define QDIST_PR_PERCENT BIT(3) +#define QDIST_PR_100X BIT(4) +#define QDIST_PR_NOBINRANGE BIT(5) + +void qdist_init(struct qdist *dist); +void qdist_destroy(struct qdist *dist); + +void qdist_add(struct qdist *dist, double x, long count); +void qdist_inc(struct qdist *dist, double x); +double qdist_xmin(const struct qdist *dist); +double qdist_xmax(const struct qdist *dist); +double qdist_avg(const struct qdist *dist); +unsigned long qdist_sample_count(const struct qdist *dist); +size_t qdist_unique_entries(const struct qdist *dist); + +/* callers must free the returned string with g_free() */ +char *qdist_pr_plain(const struct qdist *dist, size_t n_groups); + +/* callers must free the returned string with g_free() */ +char *qdist_pr(const struct qdist *dist, size_t n_groups, uint32_t opt); + +/* Only qdist code and test code should ever call this function */ +void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n); + +#endif /* QEMU_QDIST_H */ diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h new file mode 100644 index 000000000..bab8b0d4b --- /dev/null +++ b/include/qemu/qemu-plugin.h @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * Copyright (C) 2019, Linaro + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef QEMU_PLUGIN_API_H +#define QEMU_PLUGIN_API_H + +#include +#include +#include + +/* + * For best performance, build the plugin with -fvisibility=hidden so that + * QEMU_PLUGIN_LOCAL is implicit. Then, just mark qemu_plugin_install with + * QEMU_PLUGIN_EXPORT. For more info, see + * https://gcc.gnu.org/wiki/Visibility + */ +#if defined _WIN32 || defined __CYGWIN__ + #ifdef BUILDING_DLL + #define QEMU_PLUGIN_EXPORT __declspec(dllexport) + #else + #define QEMU_PLUGIN_EXPORT __declspec(dllimport) + #endif + #define QEMU_PLUGIN_LOCAL +#else + #if __GNUC__ >= 4 + #define QEMU_PLUGIN_EXPORT __attribute__((visibility("default"))) + #define QEMU_PLUGIN_LOCAL __attribute__((visibility("hidden"))) + #else + #define QEMU_PLUGIN_EXPORT + #define QEMU_PLUGIN_LOCAL + #endif +#endif + +typedef uint64_t qemu_plugin_id_t; + +/* + * Versioning plugins: + * + * The plugin API will pass a minimum and current API version that + * QEMU currently supports. The minimum API will be incremented if an + * API needs to be deprecated. + * + * The plugins export the API they were built against by exposing the + * symbol qemu_plugin_version which can be checked. + */ + +extern QEMU_PLUGIN_EXPORT int qemu_plugin_version; + +#define QEMU_PLUGIN_VERSION 0 + +typedef struct { + /* string describing architecture */ + const char *target_name; + struct { + int min; + int cur; + } version; + /* is this a full system emulation? */ + bool system_emulation; + union { + /* + * smp_vcpus may change if vCPUs can be hot-plugged, max_vcpus + * is the system-wide limit. + */ + struct { + int smp_vcpus; + int max_vcpus; + } system; + }; +} qemu_info_t; + +/** + * qemu_plugin_install() - Install a plugin + * @id: this plugin's opaque ID + * @info: a block describing some details about the guest + * @argc: number of arguments + * @argv: array of arguments (@argc elements) + * + * All plugins must export this symbol. + * + * Note: Calling qemu_plugin_uninstall() from this function is a bug. To raise + * an error during install, return !0. + * + * Note: @info is only live during the call. Copy any information we + * want to keep. + * + * Note: @argv remains valid throughout the lifetime of the loaded plugin. + */ +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, + int argc, char **argv); + +/* + * Prototypes for the various callback styles we will be registering + * in the following functions. + */ +typedef void (*qemu_plugin_simple_cb_t)(qemu_plugin_id_t id); + +typedef void (*qemu_plugin_udata_cb_t)(qemu_plugin_id_t id, void *userdata); + +typedef void (*qemu_plugin_vcpu_simple_cb_t)(qemu_plugin_id_t id, + unsigned int vcpu_index); + +typedef void (*qemu_plugin_vcpu_udata_cb_t)(unsigned int vcpu_index, + void *userdata); + +/** + * qemu_plugin_uninstall() - Uninstall a plugin + * @id: this plugin's opaque ID + * @cb: callback to be called once the plugin has been removed + * + * Do NOT assume that the plugin has been uninstalled once this function + * returns. Plugins are uninstalled asynchronously, and therefore the given + * plugin receives callbacks until @cb is called. + * + * Note: Calling this function from qemu_plugin_install() is a bug. + */ +void qemu_plugin_uninstall(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb); + +/** + * qemu_plugin_reset() - Reset a plugin + * @id: this plugin's opaque ID + * @cb: callback to be called once the plugin has been reset + * + * Unregisters all callbacks for the plugin given by @id. + * + * Do NOT assume that the plugin has been reset once this function returns. + * Plugins are reset asynchronously, and therefore the given plugin receives + * callbacks until @cb is called. + */ +void qemu_plugin_reset(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb); + +/** + * qemu_plugin_register_vcpu_init_cb() - register a vCPU initialization callback + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called every time a vCPU is initialized. + * + * See also: qemu_plugin_register_vcpu_exit_cb() + */ +void qemu_plugin_register_vcpu_init_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_simple_cb_t cb); + +/** + * qemu_plugin_register_vcpu_exit_cb() - register a vCPU exit callback + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called every time a vCPU exits. + * + * See also: qemu_plugin_register_vcpu_init_cb() + */ +void qemu_plugin_register_vcpu_exit_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_simple_cb_t cb); + +/** + * qemu_plugin_register_vcpu_idle_cb() - register a vCPU idle callback + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called every time a vCPU idles. + */ +void qemu_plugin_register_vcpu_idle_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_simple_cb_t cb); + +/** + * qemu_plugin_register_vcpu_resume_cb() - register a vCPU resume callback + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called every time a vCPU resumes execution. + */ +void qemu_plugin_register_vcpu_resume_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_simple_cb_t cb); + +/* + * Opaque types that the plugin is given during the translation and + * instrumentation phase. + */ +struct qemu_plugin_tb; +struct qemu_plugin_insn; + +enum qemu_plugin_cb_flags { + QEMU_PLUGIN_CB_NO_REGS, /* callback does not access the CPU's regs */ + QEMU_PLUGIN_CB_R_REGS, /* callback reads the CPU's regs */ + QEMU_PLUGIN_CB_RW_REGS, /* callback reads and writes the CPU's regs */ +}; + +enum qemu_plugin_mem_rw { + QEMU_PLUGIN_MEM_R = 1, + QEMU_PLUGIN_MEM_W, + QEMU_PLUGIN_MEM_RW, +}; + +/** + * qemu_plugin_register_vcpu_tb_trans_cb() - register a translate cb + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called every time a translation occurs. The @cb + * function is passed an opaque qemu_plugin_type which it can query + * for additional information including the list of translated + * instructions. At this point the plugin can register further + * callbacks to be triggered when the block or individual instruction + * executes. + */ +typedef void (*qemu_plugin_vcpu_tb_trans_cb_t)(qemu_plugin_id_t id, + struct qemu_plugin_tb *tb); + +void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_tb_trans_cb_t cb); + +/** + * qemu_plugin_register_vcpu_tb_trans_exec_cb() - register execution callback + * @tb: the opaque qemu_plugin_tb handle for the translation + * @cb: callback function + * @flags: does the plugin read or write the CPU's registers? + * @userdata: any plugin data to pass to the @cb? + * + * The @cb function is called every time a translated unit executes. + */ +void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb, + qemu_plugin_vcpu_udata_cb_t cb, + enum qemu_plugin_cb_flags flags, + void *userdata); + +enum qemu_plugin_op { + QEMU_PLUGIN_INLINE_ADD_U64, +}; + +/** + * qemu_plugin_register_vcpu_tb_trans_exec_inline() - execution inline op + * @tb: the opaque qemu_plugin_tb handle for the translation + * @op: the type of qemu_plugin_op (e.g. ADD_U64) + * @ptr: the target memory location for the op + * @imm: the op data (e.g. 1) + * + * Insert an inline op to every time a translated unit executes. + * Useful if you just want to increment a single counter somewhere in + * memory. + */ +void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb, + enum qemu_plugin_op op, + void *ptr, uint64_t imm); + +/** + * qemu_plugin_register_vcpu_insn_exec_cb() - register insn execution cb + * @insn: the opaque qemu_plugin_insn handle for an instruction + * @cb: callback function + * @flags: does the plugin read or write the CPU's registers? + * @userdata: any plugin data to pass to the @cb? + * + * The @cb function is called every time an instruction is executed + */ +void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn, + qemu_plugin_vcpu_udata_cb_t cb, + enum qemu_plugin_cb_flags flags, + void *userdata); + +/** + * qemu_plugin_register_vcpu_insn_exec_inline() - insn execution inline op + * @insn: the opaque qemu_plugin_insn handle for an instruction + * @cb: callback function + * @op: the type of qemu_plugin_op (e.g. ADD_U64) + * @ptr: the target memory location for the op + * @imm: the op data (e.g. 1) + * + * Insert an inline op to every time an instruction executes. Useful + * if you just want to increment a single counter somewhere in memory. + */ +void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn, + enum qemu_plugin_op op, + void *ptr, uint64_t imm); + +/* + * Helpers to query information about the instructions in a block + */ +size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb); + +uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb); + +struct qemu_plugin_insn * +qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx); + +const void *qemu_plugin_insn_data(const struct qemu_plugin_insn *insn); + +size_t qemu_plugin_insn_size(const struct qemu_plugin_insn *insn); + +uint64_t qemu_plugin_insn_vaddr(const struct qemu_plugin_insn *insn); +void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn); + +/* + * Memory Instrumentation + * + * The anonymous qemu_plugin_meminfo_t and qemu_plugin_hwaddr types + * can be used in queries to QEMU to get more information about a + * given memory access. + */ +typedef uint32_t qemu_plugin_meminfo_t; +struct qemu_plugin_hwaddr; + +/* meminfo queries */ +unsigned int qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info); +bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info); +bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info); +bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info); + +/* + * qemu_plugin_get_hwaddr(): + * @vaddr: the virtual address of the memory operation + * + * For system emulation returns a qemu_plugin_hwaddr handle to query + * details about the actual physical address backing the virtual + * address. For linux-user guests it just returns NULL. + * + * This handle is *only* valid for the duration of the callback. Any + * information about the handle should be recovered before the + * callback returns. + */ +struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info, + uint64_t vaddr); + +/* + * The following additional queries can be run on the hwaddr structure + * to return information about it. For non-IO accesses the device + * offset will be into the appropriate block of RAM. + */ +bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr); +uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr); + +typedef void +(*qemu_plugin_vcpu_mem_cb_t)(unsigned int vcpu_index, + qemu_plugin_meminfo_t info, uint64_t vaddr, + void *userdata); + +void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn, + qemu_plugin_vcpu_mem_cb_t cb, + enum qemu_plugin_cb_flags flags, + enum qemu_plugin_mem_rw rw, + void *userdata); + +void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn, + enum qemu_plugin_mem_rw rw, + enum qemu_plugin_op op, void *ptr, + uint64_t imm); + + + +typedef void +(*qemu_plugin_vcpu_syscall_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_index, + int64_t num, uint64_t a1, uint64_t a2, + uint64_t a3, uint64_t a4, uint64_t a5, + uint64_t a6, uint64_t a7, uint64_t a8); + +void qemu_plugin_register_vcpu_syscall_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_syscall_cb_t cb); + +typedef void +(*qemu_plugin_vcpu_syscall_ret_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_idx, + int64_t num, int64_t ret); + +void +qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id, + qemu_plugin_vcpu_syscall_ret_cb_t cb); + + +/** + * qemu_plugin_insn_disas() - return disassembly string for instruction + * @insn: instruction reference + * + * Returns an allocated string containing the disassembly + */ + +char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn); + +/** + * qemu_plugin_vcpu_for_each() - iterate over the existing vCPU + * @id: plugin ID + * @cb: callback function + * + * The @cb function is called once for each existing vCPU. + * + * See also: qemu_plugin_register_vcpu_init_cb() + */ +void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id, + qemu_plugin_vcpu_simple_cb_t cb); + +void qemu_plugin_register_flush_cb(qemu_plugin_id_t id, + qemu_plugin_simple_cb_t cb); + +void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id, + qemu_plugin_udata_cb_t cb, void *userdata); + +/* returns -1 in user-mode */ +int qemu_plugin_n_vcpus(void); + +/* returns -1 in user-mode */ +int qemu_plugin_n_max_vcpus(void); + +/** + * qemu_plugin_outs() - output string via QEMU's logging system + * @string: a string + */ +void qemu_plugin_outs(const char *string); + +#endif /* QEMU_PLUGIN_API_H */ diff --git a/include/qemu/qemu-print.h b/include/qemu/qemu-print.h new file mode 100644 index 000000000..40b596262 --- /dev/null +++ b/include/qemu/qemu-print.h @@ -0,0 +1,23 @@ +/* + * Print to stream or current monitor + * + * Copyright (C) 2019 Red Hat Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PRINT_H +#define QEMU_PRINT_H + +int qemu_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); +int qemu_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +int qemu_vfprintf(FILE *stream, const char *fmt, va_list ap) + GCC_FMT_ATTR(2, 0); +int qemu_fprintf(FILE *stream, const char *fmt, ...) GCC_FMT_ATTR(2, 3); + +#endif diff --git a/include/qemu/qht.h b/include/qemu/qht.h new file mode 100644 index 000000000..648483748 --- /dev/null +++ b/include/qemu/qht.h @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2016, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_QHT_H +#define QEMU_QHT_H + +#include "qemu/seqlock.h" +#include "qemu/thread.h" +#include "qemu/qdist.h" + +typedef bool (*qht_cmp_func_t)(const void *a, const void *b); + +struct qht { + struct qht_map *map; + qht_cmp_func_t cmp; + QemuMutex lock; /* serializes setters of ht->map */ + unsigned int mode; +}; + +/** + * struct qht_stats - Statistics of a QHT + * @head_buckets: number of head buckets + * @used_head_buckets: number of non-empty head buckets + * @entries: total number of entries + * @chain: frequency distribution representing the number of buckets in each + * chain, excluding empty chains. + * @occupancy: frequency distribution representing chain occupancy rate. + * Valid range: from 0.0 (empty) to 1.0 (full occupancy). + * + * An entry is a pointer-hash pair. + * Each bucket can host several entries. + * Chains are chains of buckets, whose first link is always a head bucket. + */ +struct qht_stats { + size_t head_buckets; + size_t used_head_buckets; + size_t entries; + struct qdist chain; + struct qdist occupancy; +}; + +typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp); +typedef void (*qht_iter_func_t)(void *p, uint32_t h, void *up); +typedef bool (*qht_iter_bool_func_t)(void *p, uint32_t h, void *up); + +#define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */ +#define QHT_MODE_RAW_MUTEXES 0x2 /* bypass the profiler (QSP) */ + +/** + * qht_init - Initialize a QHT + * @ht: QHT to be initialized + * @cmp: default comparison function. Cannot be NULL. + * @n_elems: number of entries the hash table should be optimized for. + * @mode: bitmask with OR'ed QHT_MODE_* + */ +void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems, + unsigned int mode); + +/** + * qht_destroy - destroy a previously initialized QHT + * @ht: QHT to be destroyed + * + * Call only when there are no readers/writers left. + */ +void qht_destroy(struct qht *ht); + +/** + * qht_insert - Insert a pointer into the hash table + * @ht: QHT to insert to + * @p: pointer to be inserted + * @hash: hash corresponding to @p + * @existing: address where the pointer to an existing entry can be copied to + * + * Attempting to insert a NULL @p is a bug. + * Inserting the same pointer @p with different @hash values is a bug. + * + * In case of successful operation, smp_wmb() is implied before the pointer is + * inserted into the hash table. + * + * Returns true on success. + * Returns false if there is an existing entry in the table that is equivalent + * (i.e. ht->cmp matches and the hash is the same) to @p-@h. If @existing + * is !NULL, a pointer to this existing entry is copied to it. + */ +bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing); + +/** + * qht_lookup_custom - Look up a pointer using a custom comparison function. + * @ht: QHT to be looked up + * @userp: pointer to pass to @func + * @hash: hash of the pointer to be looked up + * @func: function to compare existing pointers against @userp + * + * Needs to be called under an RCU read-critical section. + * + * smp_read_barrier_depends() is implied before the call to @func. + * + * The user-provided @func compares pointers in QHT against @userp. + * If the function returns true, a match has been found. + * + * Returns the corresponding pointer when a match is found. + * Returns NULL otherwise. + */ +void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, + qht_lookup_func_t func); + +/** + * qht_lookup - Look up a pointer in a QHT + * @ht: QHT to be looked up + * @userp: pointer to pass to the comparison function + * @hash: hash of the pointer to be looked up + * + * Calls qht_lookup_custom() using @ht's default comparison function. + */ +void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash); + +/** + * qht_remove - remove a pointer from the hash table + * @ht: QHT to remove from + * @p: pointer to be removed + * @hash: hash corresponding to @p + * + * Attempting to remove a NULL @p is a bug. + * + * Just-removed @p pointers cannot be immediately freed; they need to remain + * valid until the end of the RCU grace period in which qht_remove() is called. + * This guarantees that concurrent lookups will always compare against valid + * data. + * + * Returns true on success. + * Returns false if the @p-@hash pair was not found. + */ +bool qht_remove(struct qht *ht, const void *p, uint32_t hash); + +/** + * qht_reset - reset a QHT + * @ht: QHT to be reset + * + * All entries in the hash table are reset. No resizing is performed. + * + * If concurrent readers may exist, the objects pointed to by the hash table + * must remain valid for the existing RCU grace period -- see qht_remove(). + * See also: qht_reset_size() + */ +void qht_reset(struct qht *ht); + +/** + * qht_reset_size - reset and resize a QHT + * @ht: QHT to be reset and resized + * @n_elems: number of entries the resized hash table should be optimized for. + * + * Returns true if the resize was necessary and therefore performed. + * Returns false otherwise. + * + * If concurrent readers may exist, the objects pointed to by the hash table + * must remain valid for the existing RCU grace period -- see qht_remove(). + * See also: qht_reset(), qht_resize(). + */ +bool qht_reset_size(struct qht *ht, size_t n_elems); + +/** + * qht_resize - resize a QHT + * @ht: QHT to be resized + * @n_elems: number of entries the resized hash table should be optimized for + * + * Returns true on success. + * Returns false if the resize was not necessary and therefore not performed. + * See also: qht_reset_size(). + */ +bool qht_resize(struct qht *ht, size_t n_elems); + +/** + * qht_iter - Iterate over a QHT + * @ht: QHT to be iterated over + * @func: function to be called for each entry in QHT + * @userp: additional pointer to be passed to @func + * + * Each time it is called, user-provided @func is passed a pointer-hash pair, + * plus @userp. + * + * Note: @ht cannot be accessed from @func + * See also: qht_iter_remove() + */ +void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp); + +/** + * qht_iter_remove - Iterate over a QHT, optionally removing entries + * @ht: QHT to be iterated over + * @func: function to be called for each entry in QHT + * @userp: additional pointer to be passed to @func + * + * Each time it is called, user-provided @func is passed a pointer-hash pair, + * plus @userp. If @func returns true, the pointer-hash pair is removed. + * + * Note: @ht cannot be accessed from @func + * See also: qht_iter() + */ +void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp); + +/** + * qht_statistics_init - Gather statistics from a QHT + * @ht: QHT to gather statistics from + * @stats: pointer to a &struct qht_stats to be filled in + * + * Does NOT need to be called under an RCU read-critical section, + * since it does not dereference any pointers stored in the hash table. + * + * When done with @stats, pass the struct to qht_statistics_destroy(). + * Failing to do this will leak memory. + */ +void qht_statistics_init(struct qht *ht, struct qht_stats *stats); + +/** + * qht_statistics_destroy - Destroy a &struct qht_stats + * @stats: &struct qht_stats to be destroyed + * + * See also: qht_statistics_init(). + */ +void qht_statistics_destroy(struct qht_stats *stats); + +#endif /* QEMU_QHT_H */ diff --git a/include/qemu/qsp.h b/include/qemu/qsp.h new file mode 100644 index 000000000..bf36aabfa --- /dev/null +++ b/include/qemu/qsp.h @@ -0,0 +1,27 @@ +/* + * qsp.c - QEMU Synchronization Profiler + * + * Copyright (C) 2018, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Note: this header file can *only* be included from thread.h. + */ +#ifndef QEMU_QSP_H +#define QEMU_QSP_H + +enum QSPSortBy { + QSP_SORT_BY_TOTAL_WAIT_TIME, + QSP_SORT_BY_AVG_WAIT_TIME, +}; + +void qsp_report(size_t max, enum QSPSortBy sort_by, + bool callsite_coalesce); + +bool qsp_is_enabled(void); +void qsp_enable(void); +void qsp_disable(void); +void qsp_reset(void); + +#endif /* QEMU_QSP_H */ diff --git a/include/qemu/qtree.h b/include/qemu/qtree.h new file mode 100644 index 000000000..d65f35350 --- /dev/null +++ b/include/qemu/qtree.h @@ -0,0 +1,202 @@ +/* + * GLIB - Library of useful routines for C programming + * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* + * Modified by the GLib Team and others 1997-2000. See the AUTHORS + * file for a list of people on the GLib Team. See the ChangeLog + * files for a list of changes. These files are distributed with + * GLib at ftp://ftp.gtk.org/pub/gtk/. + */ + +/* + * QTree is a partial import of Glib's GTree. The parts excluded correspond + * to API calls either deprecated (e.g. g_tree_traverse) or recently added + * (e.g. g_tree_search_node, added in 2.68); neither have callers in QEMU. + * + * The reason for this import is to allow us to control the memory allocator + * used by the tree implementation. Until Glib 2.75.3, GTree uses Glib's + * slice allocator, which causes problems when forking in user-mode; + * see https://gitlab.com/qemu-project/qemu/-/issues/285 and glib's + * "45b5a6c1e gslice: Remove slice allocator and use malloc() instead". + * + * TODO: remove QTree when QEMU's minimum Glib version is >= 2.75.3. + */ + +#ifndef QEMU_QTREE_H +#define QEMU_QTREE_H + +#include "qemu/osdep.h" + +#ifdef HAVE_GLIB_WITH_SLICE_ALLOCATOR + +typedef struct _QTree QTree; + +typedef struct _QTreeNode QTreeNode; + +typedef gboolean (*QTraverseNodeFunc)(QTreeNode *node, + gpointer user_data); + +/* + * Balanced binary trees + */ +QTree *q_tree_new(GCompareFunc key_compare_func); +QTree *q_tree_new_with_data(GCompareDataFunc key_compare_func, + gpointer key_compare_data); +QTree *q_tree_new_full(GCompareDataFunc key_compare_func, + gpointer key_compare_data, + GDestroyNotify key_destroy_func, + GDestroyNotify value_destroy_func); +QTree *q_tree_ref(QTree *tree); +void q_tree_unref(QTree *tree); +void q_tree_destroy(QTree *tree); +void q_tree_insert(QTree *tree, + gpointer key, + gpointer value); +void q_tree_replace(QTree *tree, + gpointer key, + gpointer value); +gboolean q_tree_remove(QTree *tree, + gconstpointer key); +gboolean q_tree_steal(QTree *tree, + gconstpointer key); +gpointer q_tree_lookup(QTree *tree, + gconstpointer key); +gboolean q_tree_lookup_extended(QTree *tree, + gconstpointer lookup_key, + gpointer *orig_key, + gpointer *value); +void q_tree_foreach(QTree *tree, + GTraverseFunc func, + gpointer user_data); +gpointer q_tree_search(QTree *tree, + GCompareFunc search_func, + gconstpointer user_data); +gint q_tree_height(QTree *tree); +gint q_tree_nnodes(QTree *tree); + +#else /* !HAVE_GLIB_WITH_SLICE_ALLOCATOR */ + +typedef GTree QTree; +typedef GTreeNode QTreeNode; +typedef GTraverseNodeFunc QTraverseNodeFunc; + +static inline QTree *q_tree_new(GCompareFunc key_compare_func) +{ + return g_tree_new(key_compare_func); +} + +static inline QTree *q_tree_new_with_data(GCompareDataFunc key_compare_func, + gpointer key_compare_data) +{ + return g_tree_new_with_data(key_compare_func, key_compare_data); +} + +static inline QTree *q_tree_new_full(GCompareDataFunc key_compare_func, + gpointer key_compare_data, + GDestroyNotify key_destroy_func, + GDestroyNotify value_destroy_func) +{ + return g_tree_new_full(key_compare_func, key_compare_data, + key_destroy_func, value_destroy_func); +} + +static inline QTree *q_tree_ref(QTree *tree) +{ + return g_tree_ref(tree); +} + +static inline void q_tree_unref(QTree *tree) +{ + g_tree_unref(tree); +} + +static inline void q_tree_destroy(QTree *tree) +{ + g_tree_destroy(tree); +} + +static inline void q_tree_insert(QTree *tree, + gpointer key, + gpointer value) +{ + g_tree_insert(tree, key, value); +} + +static inline void q_tree_replace(QTree *tree, + gpointer key, + gpointer value) +{ + g_tree_replace(tree, key, value); +} + +static inline gboolean q_tree_remove(QTree *tree, + gconstpointer key) +{ + return g_tree_remove(tree, key); +} + +static inline gboolean q_tree_steal(QTree *tree, + gconstpointer key) +{ + return g_tree_steal(tree, key); +} + +static inline gpointer q_tree_lookup(QTree *tree, + gconstpointer key) +{ + return g_tree_lookup(tree, key); +} + +static inline gboolean q_tree_lookup_extended(QTree *tree, + gconstpointer lookup_key, + gpointer *orig_key, + gpointer *value) +{ + return g_tree_lookup_extended(tree, lookup_key, orig_key, value); +} + +static inline void q_tree_foreach(QTree *tree, + GTraverseFunc func, + gpointer user_data) +{ + return g_tree_foreach(tree, func, user_data); +} + +static inline gpointer q_tree_search(QTree *tree, + GCompareFunc search_func, + gconstpointer user_data) +{ + return g_tree_search(tree, search_func, user_data); +} + +static inline gint q_tree_height(QTree *tree) +{ + return g_tree_height(tree); +} + +static inline gint q_tree_nnodes(QTree *tree) +{ + return g_tree_nnodes(tree); +} + +#endif /* HAVE_GLIB_WITH_SLICE_ALLOCATOR */ + +#endif /* QEMU_QTREE_H */ + diff --git a/include/qemu/queue.h b/include/qemu/queue.h new file mode 100644 index 000000000..e029e7bf6 --- /dev/null +++ b/include/qemu/queue.h @@ -0,0 +1,576 @@ +/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */ + +/* + * QEMU version: Copy from netbsd, removed debug code, removed some of + * the implementations. Left in singly-linked lists, lists, simple + * queues, and tail queues. + */ + +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +#ifndef QEMU_SYS_QUEUE_H +#define QEMU_SYS_QUEUE_H + +/* + * This file defines four types of data structures: singly-linked lists, + * lists, simple queues, and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The + * elements are singly linked for minimum space and pointer manipulation + * overhead at the expense of O(n) removal for arbitrary elements. New + * elements can be added to the list after an existing element or at the + * head of the list. Elements being removed from the head of the list + * should use the explicit macro for this purpose for optimum + * efficiency. A singly-linked list may only be traversed in the forward + * direction. Singly-linked lists are ideal for applications with large + * datasets and few or no removals or for implementing a LIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A simple queue is headed by a pair of pointers, one the head of the + * list and the other to the tail of the list. The elements are singly + * linked to save space, so elements can only be removed from the + * head of the list. New elements can be added to the list after + * an existing element, at the head of the list, or at the end of the + * list. A simple queue may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * List definitions. + */ +#define QLIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define QLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define QLIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ +#define QLIST_INIT(head) do { \ + (head)->lh_first = NULL; \ +} while (/*CONSTCOND*/0) + +#define QLIST_SWAP(dstlist, srclist, field) do { \ + void *tmplist; \ + tmplist = (srclist)->lh_first; \ + (srclist)->lh_first = (dstlist)->lh_first; \ + if ((srclist)->lh_first != NULL) { \ + (srclist)->lh_first->field.le_prev = &(srclist)->lh_first; \ + } \ + (dstlist)->lh_first = tmplist; \ + if ((dstlist)->lh_first != NULL) { \ + (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first; \ + } \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (/*CONSTCOND*/0) + +#define QLIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ + (elm)->field.le_next = NULL; \ + (elm)->field.le_prev = NULL; \ +} while (/*CONSTCOND*/0) + +/* + * Like QLIST_REMOVE() but safe to call when elm is not in a list + */ +#define QLIST_SAFE_REMOVE(elm, field) do { \ + if ((elm)->field.le_prev != NULL) { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ + (elm)->field.le_next = NULL; \ + (elm)->field.le_prev = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +/* Is elm in a list? */ +#define QLIST_IS_INSERTED(elm, field) ((elm)->field.le_prev != NULL) + +#define QLIST_FOREACH(var, head, field) \ + for ((var) = ((head)->lh_first); \ + (var); \ + (var) = ((var)->field.le_next)) + +#define QLIST_FOREACH_SAFE(var, head, field, next_var) \ + for ((var) = ((head)->lh_first); \ + (var) && ((next_var) = ((var)->field.le_next), 1); \ + (var) = (next_var)) + +/* + * List access methods. + */ +#define QLIST_EMPTY(head) ((head)->lh_first == NULL) +#define QLIST_FIRST(head) ((head)->lh_first) +#define QLIST_NEXT(elm, field) ((elm)->field.le_next) + + +/* + * Singly-linked List definitions. + */ +#define QSLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define QSLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define QSLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define QSLIST_INIT(head) do { \ + (head)->slh_first = NULL; \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_AFTER(slistelm, elm, field) do { \ + (elm)->field.sle_next = (slistelm)->field.sle_next; \ + (slistelm)->field.sle_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.sle_next = (head)->slh_first; \ + (head)->slh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \ + typeof(elm) save_sle_next; \ + do { \ + save_sle_next = (elm)->field.sle_next = (head)->slh_first; \ + } while (qatomic_cmpxchg(&(head)->slh_first, save_sle_next, (elm)) !=\ + save_sle_next); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_MOVE_ATOMIC(dest, src) do { \ + (dest)->slh_first = qatomic_xchg(&(src)->slh_first, NULL); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_REMOVE_HEAD(head, field) do { \ + typeof((head)->slh_first) elm = (head)->slh_first; \ + (head)->slh_first = elm->field.sle_next; \ + elm->field.sle_next = NULL; \ +} while (/*CONSTCOND*/0) + +#define QSLIST_REMOVE_AFTER(slistelm, field) do { \ + typeof(slistelm) next = (slistelm)->field.sle_next; \ + (slistelm)->field.sle_next = next->field.sle_next; \ + next->field.sle_next = NULL; \ +} while (/*CONSTCOND*/0) + +#define QSLIST_REMOVE(head, elm, type, field) do { \ + if ((head)->slh_first == (elm)) { \ + QSLIST_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->slh_first; \ + while (curelm->field.sle_next != (elm)) \ + curelm = curelm->field.sle_next; \ + curelm->field.sle_next = curelm->field.sle_next->field.sle_next; \ + (elm)->field.sle_next = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +#define QSLIST_FOREACH(var, head, field) \ + for((var) = (head)->slh_first; (var); (var) = (var)->field.sle_next) + +#define QSLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = QSLIST_FIRST((head)); \ + (var) && ((tvar) = QSLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +/* + * Singly-linked List access methods. + */ +#define QSLIST_EMPTY(head) ((head)->slh_first == NULL) +#define QSLIST_FIRST(head) ((head)->slh_first) +#define QSLIST_NEXT(elm, field) ((elm)->field.sle_next) + + +/* + * Simple queue definitions. + */ +#define QSIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqh_first; /* first element */ \ + struct type **sqh_last; /* addr of last next element */ \ +} + +#define QSIMPLEQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).sqh_first } + +#define QSIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqe_next; /* next element */ \ +} + +/* + * Simple queue functions. + */ +#define QSIMPLEQ_INIT(head) do { \ + (head)->sqh_first = NULL; \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (head)->sqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.sqe_next = NULL; \ + *(head)->sqh_last = (elm); \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (listelm)->field.sqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \ + typeof((head)->sqh_first) elm = (head)->sqh_first; \ + if (((head)->sqh_first = elm->field.sqe_next) == NULL) \ + (head)->sqh_last = &(head)->sqh_first; \ + elm->field.sqe_next = NULL; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_SPLIT_AFTER(head, elm, field, removed) do { \ + QSIMPLEQ_INIT(removed); \ + if (((removed)->sqh_first = (head)->sqh_first) != NULL) { \ + if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) { \ + (head)->sqh_last = &(head)->sqh_first; \ + } \ + (removed)->sqh_last = &(elm)->field.sqe_next; \ + (elm)->field.sqe_next = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE(head, elm, type, field) do { \ + if ((head)->sqh_first == (elm)) { \ + QSIMPLEQ_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->sqh_first; \ + while (curelm->field.sqe_next != (elm)) \ + curelm = curelm->field.sqe_next; \ + if ((curelm->field.sqe_next = \ + curelm->field.sqe_next->field.sqe_next) == NULL) \ + (head)->sqh_last = &(curelm)->field.sqe_next; \ + (elm)->field.sqe_next = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_FOREACH(var, head, field) \ + for ((var) = ((head)->sqh_first); \ + (var); \ + (var) = ((var)->field.sqe_next)) + +#define QSIMPLEQ_FOREACH_SAFE(var, head, field, next) \ + for ((var) = ((head)->sqh_first); \ + (var) && ((next = ((var)->field.sqe_next)), 1); \ + (var) = (next)) + +#define QSIMPLEQ_CONCAT(head1, head2) do { \ + if (!QSIMPLEQ_EMPTY((head2))) { \ + *(head1)->sqh_last = (head2)->sqh_first; \ + (head1)->sqh_last = (head2)->sqh_last; \ + QSIMPLEQ_INIT((head2)); \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_PREPEND(head1, head2) do { \ + if (!QSIMPLEQ_EMPTY((head2))) { \ + *(head2)->sqh_last = (head1)->sqh_first; \ + (head1)->sqh_first = (head2)->sqh_first; \ + QSIMPLEQ_INIT((head2)); \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_LAST(head, type, field) \ + (QSIMPLEQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->sqh_last) - offsetof(struct type, field)))) + +/* + * Simple queue access methods. + */ +#define QSIMPLEQ_EMPTY_ATOMIC(head) \ + (qatomic_read(&((head)->sqh_first)) == NULL) +#define QSIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL) +#define QSIMPLEQ_FIRST(head) ((head)->sqh_first) +#define QSIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) + +typedef struct QTailQLink { + void *tql_next; + struct QTailQLink *tql_prev; +} QTailQLink; + +/* + * Tail queue definitions. The union acts as a poor man template, as if + * it were QTailQLink. + */ +#define QTAILQ_HEAD(name, type) \ +union name { \ + struct type *tqh_first; /* first element */ \ + QTailQLink tqh_circ; /* link for circular backwards list */ \ +} + +#define QTAILQ_HEAD_INITIALIZER(head) \ + { .tqh_circ = { NULL, &(head).tqh_circ } } + +#define QTAILQ_ENTRY(type) \ +union { \ + struct type *tqe_next; /* next element */ \ + QTailQLink tqe_circ; /* link for circular backwards list */ \ +} + +/* + * Tail queue functions. + */ +#define QTAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_circ.tql_prev = &(head)->tqh_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + else \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_circ.tql_prev = &(head)->tqh_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_circ.tql_prev = (head)->tqh_circ.tql_prev; \ + (head)->tqh_circ.tql_prev->tql_next = (elm); \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + else \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_circ.tql_prev = &(listelm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_circ.tql_prev = (listelm)->field.tqe_circ.tql_prev; \ + (elm)->field.tqe_next = (listelm); \ + (listelm)->field.tqe_circ.tql_prev->tql_next = (elm); \ + (listelm)->field.tqe_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + (elm)->field.tqe_circ.tql_prev; \ + else \ + (head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \ + (elm)->field.tqe_circ.tql_prev->tql_next = (elm)->field.tqe_next; \ + (elm)->field.tqe_circ.tql_prev = NULL; \ + (elm)->field.tqe_circ.tql_next = NULL; \ + (elm)->field.tqe_next = NULL; \ +} while (/*CONSTCOND*/0) + +/* remove @left, @right and all elements in between from @head */ +#define QTAILQ_REMOVE_SEVERAL(head, left, right, field) do { \ + if (((right)->field.tqe_next) != NULL) \ + (right)->field.tqe_next->field.tqe_circ.tql_prev = \ + (left)->field.tqe_circ.tql_prev; \ + else \ + (head)->tqh_circ.tql_prev = (left)->field.tqe_circ.tql_prev; \ + (left)->field.tqe_circ.tql_prev->tql_next = (right)->field.tqe_next; \ + } while (/*CONSTCOND*/0) + +#define QTAILQ_FOREACH(var, head, field) \ + for ((var) = ((head)->tqh_first); \ + (var); \ + (var) = ((var)->field.tqe_next)) + +#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \ + for ((var) = ((head)->tqh_first); \ + (var) && ((next_var) = ((var)->field.tqe_next), 1); \ + (var) = (next_var)) + +#define QTAILQ_FOREACH_REVERSE(var, head, field) \ + for ((var) = QTAILQ_LAST(head); \ + (var); \ + (var) = QTAILQ_PREV(var, field)) + +#define QTAILQ_FOREACH_REVERSE_SAFE(var, head, field, prev_var) \ + for ((var) = QTAILQ_LAST(head); \ + (var) && ((prev_var) = QTAILQ_PREV(var, field), 1); \ + (var) = (prev_var)) + +/* + * Tail queue access methods. + */ +#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL) +#define QTAILQ_FIRST(head) ((head)->tqh_first) +#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define QTAILQ_IN_USE(elm, field) ((elm)->field.tqe_circ.tql_prev != NULL) + +#define QTAILQ_LINK_PREV(link) \ + ((link).tql_prev->tql_prev->tql_next) +#define QTAILQ_LAST(head) \ + ((typeof((head)->tqh_first)) QTAILQ_LINK_PREV((head)->tqh_circ)) +#define QTAILQ_PREV(elm, field) \ + ((typeof((elm)->field.tqe_next)) QTAILQ_LINK_PREV((elm)->field.tqe_circ)) + +#define field_at_offset(base, offset, type) \ + ((type *) (((char *) (base)) + (offset))) + +/* + * Raw access of elements of a tail queue head. Offsets are all zero + * because it's a union. + */ +#define QTAILQ_RAW_FIRST(head) \ + field_at_offset(head, 0, void *) +#define QTAILQ_RAW_TQH_CIRC(head) \ + field_at_offset(head, 0, QTailQLink) + +/* + * Raw access of elements of a tail entry + */ +#define QTAILQ_RAW_NEXT(elm, entry) \ + field_at_offset(elm, entry, void *) +#define QTAILQ_RAW_TQE_CIRC(elm, entry) \ + field_at_offset(elm, entry, QTailQLink) +/* + * Tail queue traversal using pointer arithmetic. + */ +#define QTAILQ_RAW_FOREACH(elm, head, entry) \ + for ((elm) = *QTAILQ_RAW_FIRST(head); \ + (elm); \ + (elm) = *QTAILQ_RAW_NEXT(elm, entry)) +/* + * Tail queue insertion using pointer arithmetic. + */ +#define QTAILQ_RAW_INSERT_TAIL(head, elm, entry) do { \ + *QTAILQ_RAW_NEXT(elm, entry) = NULL; \ + QTAILQ_RAW_TQE_CIRC(elm, entry)->tql_prev = QTAILQ_RAW_TQH_CIRC(head)->tql_prev; \ + QTAILQ_RAW_TQH_CIRC(head)->tql_prev->tql_next = (elm); \ + QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \ +} while (/*CONSTCOND*/0) + +#define QLIST_RAW_FIRST(head) \ + field_at_offset(head, 0, void *) + +#define QLIST_RAW_NEXT(elm, entry) \ + field_at_offset(elm, entry, void *) + +#define QLIST_RAW_PREVIOUS(elm, entry) \ + field_at_offset(elm, entry + sizeof(void *), void *) + +#define QLIST_RAW_FOREACH(elm, head, entry) \ + for ((elm) = *QLIST_RAW_FIRST(head); \ + (elm); \ + (elm) = *QLIST_RAW_NEXT(elm, entry)) + +#define QLIST_RAW_INSERT_AFTER(head, prev, elem, entry) do { \ + *QLIST_RAW_NEXT(prev, entry) = elem; \ + *QLIST_RAW_PREVIOUS(elem, entry) = QLIST_RAW_NEXT(prev, entry); \ + *QLIST_RAW_NEXT(elem, entry) = NULL; \ +} while (0) + +#define QLIST_RAW_INSERT_HEAD(head, elm, entry) do { \ + void *first = *QLIST_RAW_FIRST(head); \ + *QLIST_RAW_FIRST(head) = elm; \ + *QLIST_RAW_PREVIOUS(elm, entry) = QLIST_RAW_FIRST(head); \ + if (first) { \ + *QLIST_RAW_NEXT(elm, entry) = first; \ + *QLIST_RAW_PREVIOUS(first, entry) = QLIST_RAW_NEXT(elm, entry); \ + } else { \ + *QLIST_RAW_NEXT(elm, entry) = NULL; \ + } \ +} while (0) + +#endif /* QEMU_SYS_QUEUE_H */ diff --git a/include/qemu/range.h b/include/qemu/range.h new file mode 100644 index 000000000..f62b363e0 --- /dev/null +++ b/include/qemu/range.h @@ -0,0 +1,222 @@ +/* + * QEMU 64-bit address ranges + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QEMU_RANGE_H +#define QEMU_RANGE_H + +/* + * Operations on 64 bit address ranges. + * Notes: + * - Ranges must not wrap around 0, but can include UINT64_MAX. + */ + +struct Range { + /* + * Do not access members directly, use the functions! + * A non-empty range has @lob <= @upb. + * An empty range has @lob == @upb + 1. + */ + uint64_t lob; /* inclusive lower bound */ + uint64_t upb; /* inclusive upper bound */ +}; + +static inline void range_invariant(const Range *range) +{ + assert(range->lob <= range->upb || range->lob == range->upb + 1); +} + +/* Compound literal encoding the empty range */ +#define range_empty ((Range){ .lob = 1, .upb = 0 }) + +/* Is @range empty? */ +static inline bool range_is_empty(const Range *range) +{ + range_invariant(range); + return range->lob > range->upb; +} + +/* Does @range contain @val? */ +static inline bool range_contains(const Range *range, uint64_t val) +{ + return val >= range->lob && val <= range->upb; +} + +/* Initialize @range to the empty range */ +static inline void range_make_empty(Range *range) +{ + *range = range_empty; + assert(range_is_empty(range)); +} + +/* + * Initialize @range to span the interval [@lob,@upb]. + * Both bounds are inclusive. + * The interval must not be empty, i.e. @lob must be less than or + * equal @upb. + */ +static inline void range_set_bounds(Range *range, uint64_t lob, uint64_t upb) +{ + range->lob = lob; + range->upb = upb; + assert(!range_is_empty(range)); +} + +/* + * Initialize @range to span the interval [@lob,@upb_plus1). + * The lower bound is inclusive, the upper bound is exclusive. + * Zero @upb_plus1 is special: if @lob is also zero, set @range to the + * empty range. Else, set @range to [@lob,UINT64_MAX]. + */ +static inline void range_set_bounds1(Range *range, + uint64_t lob, uint64_t upb_plus1) +{ + if (!lob && !upb_plus1) { + *range = range_empty; + } else { + range->lob = lob; + range->upb = upb_plus1 - 1; + } + range_invariant(range); +} + +/* Return @range's lower bound. @range must not be empty. */ +static inline uint64_t range_lob(Range *range) +{ + assert(!range_is_empty(range)); + return range->lob; +} + +/* Return @range's upper bound. @range must not be empty. */ +static inline uint64_t range_upb(Range *range) +{ + assert(!range_is_empty(range)); + return range->upb; +} + +/* + * Initialize @range to span the interval [@lob,@lob + @size - 1]. + * @size may be 0. If the range would overflow, returns -ERANGE, otherwise + * 0. + */ +static inline int QEMU_WARN_UNUSED_RESULT range_init(Range *range, uint64_t lob, + uint64_t size) +{ + if (lob + size < lob) { + return -ERANGE; + } + range->lob = lob; + range->upb = lob + size - 1; + range_invariant(range); + return 0; +} + +/* + * Initialize @range to span the interval [@lob,@lob + @size - 1]. + * @size may be 0. Range must not overflow. + */ +static inline void range_init_nofail(Range *range, uint64_t lob, uint64_t size) +{ + range->lob = lob; + range->upb = lob + size - 1; + range_invariant(range); +} + +/* + * Get the size of @range. + */ +static inline uint64_t range_size(const Range *range) +{ + return range->upb - range->lob + 1; +} + +/* + * Check if @range1 overlaps with @range2. If one of the ranges is empty, + * the result is always "false". + */ +static inline bool range_overlaps_range(const Range *range1, + const Range *range2) +{ + if (range_is_empty(range1) || range_is_empty(range2)) { + return false; + } + return !(range2->upb < range1->lob || range1->upb < range2->lob); +} + +/* + * Check if @range1 contains @range2. If one of the ranges is empty, + * the result is always "false". + */ +static inline bool range_contains_range(const Range *range1, + const Range *range2) +{ + if (range_is_empty(range1) || range_is_empty(range2)) { + return false; + } + return range1->lob <= range2->lob && range1->upb >= range2->upb; +} + +/* + * Extend @range to the smallest interval that includes @extend_by, too. + */ +static inline void range_extend(Range *range, Range *extend_by) +{ + if (range_is_empty(extend_by)) { + return; + } + if (range_is_empty(range)) { + *range = *extend_by; + return; + } + if (range->lob > extend_by->lob) { + range->lob = extend_by->lob; + } + if (range->upb < extend_by->upb) { + range->upb = extend_by->upb; + } + range_invariant(range); +} + +/* Get last byte of a range from offset + length. + * Undefined for ranges that wrap around 0. */ +static inline uint64_t range_get_last(uint64_t offset, uint64_t len) +{ + return offset + len - 1; +} + +/* Check whether a given range covers a given byte. */ +static inline int range_covers_byte(uint64_t offset, uint64_t len, + uint64_t byte) +{ + return offset <= byte && byte <= range_get_last(offset, len); +} + +/* Check whether 2 given ranges overlap. + * Undefined if ranges that wrap around 0. */ +static inline int ranges_overlap(uint64_t first1, uint64_t len1, + uint64_t first2, uint64_t len2) +{ + uint64_t last1 = range_get_last(first1, len1); + uint64_t last2 = range_get_last(first2, len2); + + return !(last2 < first1 || last1 < first2); +} + +GList *range_list_insert(GList *list, Range *data); + +#endif diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h new file mode 100644 index 000000000..01da8d63f --- /dev/null +++ b/include/qemu/ratelimit.h @@ -0,0 +1,75 @@ +/* + * Ratelimiting calculations + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QEMU_RATELIMIT_H +#define QEMU_RATELIMIT_H + +#include "qemu/timer.h" + +typedef struct { + int64_t slice_start_time; + int64_t slice_end_time; + uint64_t slice_quota; + uint64_t slice_ns; + uint64_t dispatched; +} RateLimit; + +/** Calculate and return delay for next request in ns + * + * Record that we sent @n data units (where @n matches the scale chosen + * during ratelimit_set_speed). If we may send more data units + * in the current time slice, return 0 (i.e. no delay). Otherwise + * return the amount of time (in ns) until the start of the next time + * slice that will permit sending the next chunk of data. + * + * Recording sent data units even after exceeding the quota is + * permitted; the time slice will be extended accordingly. + */ +static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) +{ + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + double delay_slices; + + assert(limit->slice_quota && limit->slice_ns); + + if (limit->slice_end_time < now) { + /* Previous, possibly extended, time slice finished; reset the + * accounting. */ + limit->slice_start_time = now; + limit->slice_end_time = now + limit->slice_ns; + limit->dispatched = 0; + } + + limit->dispatched += n; + if (limit->dispatched < limit->slice_quota) { + /* We may send further data within the current time slice, no + * need to delay the next request. */ + return 0; + } + + /* Quota exceeded. Wait based on the excess amount and then start a new + * slice. */ + delay_slices = (double)limit->dispatched / limit->slice_quota; + limit->slice_end_time = limit->slice_start_time + + (uint64_t)(delay_slices * limit->slice_ns); + return limit->slice_end_time - now; +} + +static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed, + uint64_t slice_ns) +{ + limit->slice_ns = slice_ns; + limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1); +} + +#endif diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h new file mode 100644 index 000000000..515d327cf --- /dev/null +++ b/include/qemu/rcu.h @@ -0,0 +1,187 @@ +#ifndef QEMU_RCU_H +#define QEMU_RCU_H + +/* + * urcu-mb.h + * + * Userspace RCU header with explicit memory barrier. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + + +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/atomic.h" +#include "qemu/sys_membarrier.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Important ! + * + * Each thread containing read-side critical sections must be registered + * with rcu_register_thread() before calling rcu_read_lock(). + * rcu_unregister_thread() should be called before the thread exits. + */ + +#ifdef DEBUG_RCU +#define rcu_assert(args...) assert(args) +#else +#define rcu_assert(args...) +#endif + +/* + * Global quiescent period counter with low-order bits unused. + * Using a int rather than a char to eliminate false register dependencies + * causing stalls on some architectures. + */ +extern unsigned long rcu_gp_ctr; + +extern QemuEvent rcu_gp_event; + +struct rcu_reader_data { + /* Data used by both reader and synchronize_rcu() */ + unsigned long ctr; + bool waiting; + + /* Data used by reader only */ + unsigned depth; + + /* Data used for registry, protected by rcu_registry_lock */ + QLIST_ENTRY(rcu_reader_data) node; +}; + +extern __thread struct rcu_reader_data rcu_reader; + +static inline void rcu_read_lock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + unsigned ctr; + + if (p_rcu_reader->depth++ > 0) { + return; + } + + ctr = qatomic_read(&rcu_gp_ctr); + qatomic_set(&p_rcu_reader->ctr, ctr); + + /* Write p_rcu_reader->ctr before reading RCU-protected pointers. */ + smp_mb_placeholder(); +} + +static inline void rcu_read_unlock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + + assert(p_rcu_reader->depth != 0); + if (--p_rcu_reader->depth > 0) { + return; + } + + /* Ensure that the critical section is seen to precede the + * store to p_rcu_reader->ctr. Together with the following + * smp_mb_placeholder(), this ensures writes to p_rcu_reader->ctr + * are sequentially consistent. + */ + qatomic_store_release(&p_rcu_reader->ctr, 0); + + /* Write p_rcu_reader->ctr before reading p_rcu_reader->waiting. */ + smp_mb_placeholder(); + if (unlikely(qatomic_read(&p_rcu_reader->waiting))) { + qatomic_set(&p_rcu_reader->waiting, false); + qemu_event_set(&rcu_gp_event); + } +} + +extern void synchronize_rcu(void); + +/* + * Reader thread registration. + */ +extern void rcu_register_thread(void); +extern void rcu_unregister_thread(void); + +/* + * Support for fork(). fork() support is enabled at startup. + */ +extern void rcu_enable_atfork(void); +extern void rcu_disable_atfork(void); + +struct rcu_head; +typedef void RCUCBFunc(struct rcu_head *head); + +struct rcu_head { + struct rcu_head *next; + RCUCBFunc *func; +}; + +extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); +extern void drain_call_rcu(void); + +/* The operands of the minus operator must have the same type, + * which must be the one that we specify in the cast. + */ +#define call_rcu(head, func, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(head)), field)], \ + func_type_invalid = (func) - (void (*)(typeof(head)))(func); \ + &(head)->field; \ + }), \ + (RCUCBFunc *)(func)) + +#define g_free_rcu(obj, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \ + &(obj)->field; \ + }), \ + (RCUCBFunc *)g_free); + +typedef void RCUReadAuto; +static inline RCUReadAuto *rcu_read_auto_lock(void) +{ + rcu_read_lock(); + /* Anything non-NULL causes the cleanup function to be called */ + return (void *)(uintptr_t)0x1; +} + +static inline void rcu_read_auto_unlock(RCUReadAuto *r) +{ + rcu_read_unlock(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock) + +#define WITH_RCU_READ_LOCK_GUARD() \ + WITH_RCU_READ_LOCK_GUARD_(glue(_rcu_read_auto, __COUNTER__)) + +#define WITH_RCU_READ_LOCK_GUARD_(var) \ + for (g_autoptr(RCUReadAuto) var = rcu_read_auto_lock(); \ + (var); rcu_read_auto_unlock(var), (var) = NULL) + +#define RCU_READ_LOCK_GUARD() \ + g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock() + +#ifdef __cplusplus +} +#endif + +#endif /* QEMU_RCU_H */ diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h new file mode 100644 index 000000000..0e53ddd53 --- /dev/null +++ b/include/qemu/rcu_queue.h @@ -0,0 +1,317 @@ +#ifndef QEMU_RCU_QUEUE_H +#define QEMU_RCU_QUEUE_H + +/* + * rcu_queue.h + * + * RCU-friendly versions of the queue.h primitives. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Copyright (c) 2013 Mike D. Day, IBM Corporation. + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu/queue.h" +#include "qemu/atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * List access methods. + */ +#define QLIST_EMPTY_RCU(head) (qatomic_read(&(head)->lh_first) == NULL) +#define QLIST_FIRST_RCU(head) (qatomic_rcu_read(&(head)->lh_first)) +#define QLIST_NEXT_RCU(elm, field) (qatomic_rcu_read(&(elm)->field.le_next)) + +/* + * List functions. + */ + + +/* + * The difference between qatomic_read/set and qatomic_rcu_read/set + * is in the including of a read/write memory barrier to the volatile + * access. atomic_rcu_* macros include the memory barrier, the + * plain atomic macros do not. Therefore, it should be correct to + * issue a series of reads or writes to the same element using only + * the atomic_* macro, until the last read or write, which should be + * atomic_rcu_* to introduce a read or write memory barrier as + * appropriate. + */ + +/* Upon publication of the listelm->next value, list readers + * will see the new node when following next pointers from + * antecedent nodes, but may not see the new node when following + * prev pointers from subsequent nodes until after the RCU grace + * period expires. + * see linux/include/rculist.h __list_add_rcu(new, prev, next) + */ +#define QLIST_INSERT_AFTER_RCU(listelm, elm, field) do { \ + (elm)->field.le_next = (listelm)->field.le_next; \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ + qatomic_rcu_set(&(listelm)->field.le_next, (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the listelm->prev->next value, list + * readers will see the new element when following prev pointers + * from subsequent elements, but may not see the new element + * when following next pointers from antecedent elements + * until after the RCU grace period expires. + */ +#define QLIST_INSERT_BEFORE_RCU(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + qatomic_rcu_set((listelm)->field.le_prev, (elm)); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the head->first value, list readers + * will see the new element when following the head, but may + * not see the new element when following prev pointers from + * subsequent elements until after the RCU grace period has + * expired. + */ +#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.le_prev = &(head)->lh_first; \ + (elm)->field.le_next = (head)->lh_first; \ + qatomic_rcu_set((&(head)->lh_first), (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + + +/* prior to publication of the elm->prev->next value, some list + * readers may still see the removed element when following + * the antecedent's next pointer. + */ +#define QLIST_REMOVE_RCU(elm, field) do { \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + } \ + qatomic_set((elm)->field.le_prev, (elm)->field.le_next); \ +} while (/*CONSTCOND*/0) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_RCU(var, head, field) \ + for ((var) = qatomic_rcu_read(&(head)->lh_first); \ + (var); \ + (var) = qatomic_rcu_read(&(var)->field.le_next)) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_SAFE_RCU(var, head, field, next_var) \ + for ((var) = (qatomic_rcu_read(&(head)->lh_first)); \ + (var) && \ + ((next_var) = qatomic_rcu_read(&(var)->field.le_next), 1); \ + (var) = (next_var)) + +/* + * RCU simple queue + */ + +/* Simple queue access methods */ +#define QSIMPLEQ_EMPTY_RCU(head) \ + (qatomic_read(&(head)->sqh_first) == NULL) +#define QSIMPLEQ_FIRST_RCU(head) qatomic_rcu_read(&(head)->sqh_first) +#define QSIMPLEQ_NEXT_RCU(elm, field) qatomic_rcu_read(&(elm)->field.sqe_next) + +/* Simple queue functions */ +#define QSIMPLEQ_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.sqe_next = (head)->sqh_first; \ + if ((elm)->field.sqe_next == NULL) { \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + } \ + qatomic_rcu_set(&(head)->sqh_first, (elm)); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_TAIL_RCU(head, elm, field) do { \ + (elm)->field.sqe_next = NULL; \ + qatomic_rcu_set((head)->sqh_last, (elm)); \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_AFTER_RCU(head, listelm, elm, field) do { \ + (elm)->field.sqe_next = (listelm)->field.sqe_next; \ + if ((elm)->field.sqe_next == NULL) { \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + } \ + qatomic_rcu_set(&(listelm)->field.sqe_next, (elm)); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE_HEAD_RCU(head, field) do { \ + qatomic_set(&(head)->sqh_first, (head)->sqh_first->field.sqe_next);\ + if ((head)->sqh_first == NULL) { \ + (head)->sqh_last = &(head)->sqh_first; \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE_RCU(head, elm, type, field) do { \ + if ((head)->sqh_first == (elm)) { \ + QSIMPLEQ_REMOVE_HEAD_RCU((head), field); \ + } else { \ + struct type *curr = (head)->sqh_first; \ + while (curr->field.sqe_next != (elm)) { \ + curr = curr->field.sqe_next; \ + } \ + qatomic_set(&curr->field.sqe_next, \ + curr->field.sqe_next->field.sqe_next); \ + if (curr->field.sqe_next == NULL) { \ + (head)->sqh_last = &(curr)->field.sqe_next; \ + } \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_FOREACH_RCU(var, head, field) \ + for ((var) = qatomic_rcu_read(&(head)->sqh_first); \ + (var); \ + (var) = qatomic_rcu_read(&(var)->field.sqe_next)) + +#define QSIMPLEQ_FOREACH_SAFE_RCU(var, head, field, next) \ + for ((var) = qatomic_rcu_read(&(head)->sqh_first); \ + (var) && ((next) = qatomic_rcu_read(&(var)->field.sqe_next), 1);\ + (var) = (next)) + +/* + * RCU tail queue + */ + +/* Tail queue access methods */ +#define QTAILQ_EMPTY_RCU(head) (qatomic_read(&(head)->tqh_first) == NULL) +#define QTAILQ_FIRST_RCU(head) qatomic_rcu_read(&(head)->tqh_first) +#define QTAILQ_NEXT_RCU(elm, field) qatomic_rcu_read(&(elm)->field.tqe_next) + +/* Tail queue functions */ +#define QTAILQ_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.tqe_next = (head)->tqh_first; \ + if ((elm)->field.tqe_next != NULL) { \ + (head)->tqh_first->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + } else { \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + } \ + qatomic_rcu_set(&(head)->tqh_first, (elm)); \ + (elm)->field.tqe_circ.tql_prev = &(head)->tqh_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_TAIL_RCU(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_circ.tql_prev = (head)->tqh_circ.tql_prev; \ + qatomic_rcu_set(&(head)->tqh_circ.tql_prev->tql_next, (elm)); \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_AFTER_RCU(head, listelm, elm, field) do { \ + (elm)->field.tqe_next = (listelm)->field.tqe_next; \ + if ((elm)->field.tqe_next != NULL) { \ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + } else { \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + } \ + qatomic_rcu_set(&(listelm)->field.tqe_next, (elm)); \ + (elm)->field.tqe_circ.tql_prev = &(listelm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_BEFORE_RCU(listelm, elm, field) do { \ + (elm)->field.tqe_circ.tql_prev = (listelm)->field.tqe_circ.tql_prev; \ + (elm)->field.tqe_next = (listelm); \ + qatomic_rcu_set(&(listelm)->field.tqe_circ.tql_prev->tql_next, (elm));\ + (listelm)->field.tqe_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_REMOVE_RCU(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) { \ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + (elm)->field.tqe_circ.tql_prev; \ + } else { \ + (head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \ + } \ + qatomic_set(&(elm)->field.tqe_circ.tql_prev->tql_next, \ + (elm)->field.tqe_next); \ + (elm)->field.tqe_circ.tql_prev = NULL; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_FOREACH_RCU(var, head, field) \ + for ((var) = qatomic_rcu_read(&(head)->tqh_first); \ + (var); \ + (var) = qatomic_rcu_read(&(var)->field.tqe_next)) + +#define QTAILQ_FOREACH_SAFE_RCU(var, head, field, next) \ + for ((var) = qatomic_rcu_read(&(head)->tqh_first); \ + (var) && ((next) = qatomic_rcu_read(&(var)->field.tqe_next), 1);\ + (var) = (next)) + +/* + * RCU singly-linked list + */ + +/* Singly-linked list access methods */ +#define QSLIST_EMPTY_RCU(head) (qatomic_read(&(head)->slh_first) == NULL) +#define QSLIST_FIRST_RCU(head) qatomic_rcu_read(&(head)->slh_first) +#define QSLIST_NEXT_RCU(elm, field) qatomic_rcu_read(&(elm)->field.sle_next) + +/* Singly-linked list functions */ +#define QSLIST_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.sle_next = (head)->slh_first; \ + qatomic_rcu_set(&(head)->slh_first, (elm)); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_AFTER_RCU(head, listelm, elm, field) do { \ + (elm)->field.sle_next = (listelm)->field.sle_next; \ + qatomic_rcu_set(&(listelm)->field.sle_next, (elm)); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_REMOVE_HEAD_RCU(head, field) do { \ + qatomic_set(&(head)->slh_first, (head)->slh_first->field.sle_next);\ +} while (/*CONSTCOND*/0) + +#define QSLIST_REMOVE_RCU(head, elm, type, field) do { \ + if ((head)->slh_first == (elm)) { \ + QSLIST_REMOVE_HEAD_RCU((head), field); \ + } else { \ + struct type *curr = (head)->slh_first; \ + while (curr->field.sle_next != (elm)) { \ + curr = curr->field.sle_next; \ + } \ + qatomic_set(&curr->field.sle_next, \ + curr->field.sle_next->field.sle_next); \ + } \ +} while (/*CONSTCOND*/0) + +#define QSLIST_FOREACH_RCU(var, head, field) \ + for ((var) = qatomic_rcu_read(&(head)->slh_first); \ + (var); \ + (var) = qatomic_rcu_read(&(var)->field.sle_next)) + +#define QSLIST_FOREACH_SAFE_RCU(var, head, field, next) \ + for ((var) = qatomic_rcu_read(&(head)->slh_first); \ + (var) && ((next) = qatomic_rcu_read(&(var)->field.sle_next), 1); \ + (var) = (next)) + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_RCU_QUEUE_H */ diff --git a/include/qemu/readline.h b/include/qemu/readline.h new file mode 100644 index 000000000..e81258322 --- /dev/null +++ b/include/qemu/readline.h @@ -0,0 +1,64 @@ +#ifndef READLINE_H +#define READLINE_H + +#define READLINE_CMD_BUF_SIZE 4095 +#define READLINE_MAX_CMDS 64 +#define READLINE_MAX_COMPLETIONS 256 + +typedef void GCC_FMT_ATTR(2, 3) ReadLinePrintfFunc(void *opaque, + const char *fmt, ...); +typedef void ReadLineFlushFunc(void *opaque); +typedef void ReadLineFunc(void *opaque, const char *str, + void *readline_opaque); +typedef void ReadLineCompletionFunc(void *opaque, + const char *cmdline); + +typedef struct ReadLineState { + char cmd_buf[READLINE_CMD_BUF_SIZE + 1]; + int cmd_buf_index; + int cmd_buf_size; + + char last_cmd_buf[READLINE_CMD_BUF_SIZE + 1]; + int last_cmd_buf_index; + int last_cmd_buf_size; + + int esc_state; + int esc_param; + + char *history[READLINE_MAX_CMDS]; + int hist_entry; + + ReadLineCompletionFunc *completion_finder; + char *completions[READLINE_MAX_COMPLETIONS]; + int nb_completions; + int completion_index; + + ReadLineFunc *readline_func; + void *readline_opaque; + int read_password; + char prompt[256]; + + ReadLinePrintfFunc *printf_func; + ReadLineFlushFunc *flush_func; + void *opaque; +} ReadLineState; + +void readline_add_completion(ReadLineState *rs, const char *str); +void readline_set_completion_index(ReadLineState *rs, int completion_index); + +const char *readline_get_history(ReadLineState *rs, unsigned int index); + +void readline_handle_byte(ReadLineState *rs, int ch); + +void readline_start(ReadLineState *rs, const char *prompt, int read_password, + ReadLineFunc *readline_func, void *readline_opaque); +void readline_restart(ReadLineState *rs); +void readline_show_prompt(ReadLineState *rs); + +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, + ReadLineCompletionFunc *completion_finder); +void readline_free(ReadLineState *rs); + +#endif /* READLINE_H */ diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h new file mode 100644 index 000000000..8382c4c77 --- /dev/null +++ b/include/qemu/selfmap.h @@ -0,0 +1,44 @@ +/* + * Utility functions to read our own memory map + * + * Copyright (c) 2020 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _SELFMAP_H_ +#define _SELFMAP_H_ + +typedef struct { + unsigned long start; + unsigned long end; + + /* flags */ + bool is_read; + bool is_write; + bool is_exec; + bool is_priv; + + unsigned long offset; + gchar *dev; + uint64_t inode; + gchar *path; +} MapInfo; + + +/** + * read_self_maps: + * + * Read /proc/self/maps and return a list of MapInfo structures. + */ +GSList *read_self_maps(void); + +/** + * free_self_maps: + * @info: a GSlist + * + * Free a list of MapInfo structures. + */ +void free_self_maps(GSList *info); + +#endif /* _SELFMAP_H_ */ diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h new file mode 100644 index 000000000..ecb7d2c86 --- /dev/null +++ b/include/qemu/seqlock.h @@ -0,0 +1,85 @@ +/* + * Seqlock implementation for QEMU + * + * Copyright Red Hat, Inc. 2013 + * + * Author: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_SEQLOCK_H +#define QEMU_SEQLOCK_H + +#include "qemu/atomic.h" +#include "qemu/thread.h" +#include "qemu/lockable.h" + +typedef struct QemuSeqLock QemuSeqLock; + +struct QemuSeqLock { + unsigned sequence; +}; + +static inline void seqlock_init(QemuSeqLock *sl) +{ + sl->sequence = 0; +} + +/* Lock out other writers and update the count. */ +static inline void seqlock_write_begin(QemuSeqLock *sl) +{ + qatomic_set(&sl->sequence, sl->sequence + 1); + + /* Write sequence before updating other fields. */ + smp_wmb(); +} + +static inline void seqlock_write_end(QemuSeqLock *sl) +{ + /* Write other fields before finalizing sequence. */ + smp_wmb(); + + qatomic_set(&sl->sequence, sl->sequence + 1); +} + +/* Lock out other writers and update the count. */ +static inline void seqlock_write_lock_impl(QemuSeqLock *sl, QemuLockable *lock) +{ + qemu_lockable_lock(lock); + seqlock_write_begin(sl); +} +#define seqlock_write_lock(sl, lock) \ + seqlock_write_lock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) + +/* Update the count and release the lock. */ +static inline void seqlock_write_unlock_impl(QemuSeqLock *sl, QemuLockable *lock) +{ + seqlock_write_end(sl); + qemu_lockable_unlock(lock); +} +#define seqlock_write_unlock(sl, lock) \ + seqlock_write_unlock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) + + +static inline unsigned seqlock_read_begin(const QemuSeqLock *sl) +{ + /* Always fail if a write is in progress. */ + unsigned ret = qatomic_read(&sl->sequence); + + /* Read sequence before reading other fields. */ + smp_rmb(); + return ret & ~1; +} + +static inline int seqlock_read_retry(const QemuSeqLock *sl, unsigned start) +{ + /* Read other fields before reading final sequence. */ + smp_rmb(); + return unlikely(qatomic_read(&sl->sequence) != start); +} + +#endif diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h new file mode 100644 index 000000000..7d1f81357 --- /dev/null +++ b/include/qemu/sockets.h @@ -0,0 +1,114 @@ +/* headers to use the BSD sockets */ + +#ifndef QEMU_SOCKETS_H +#define QEMU_SOCKETS_H + +#ifdef _WIN32 + +int inet_aton(const char *cp, struct in_addr *ia); + +#endif /* !_WIN32 */ + +#include "qapi/qapi-types-sockets.h" + +/* misc helpers */ +bool fd_is_socket(int fd); +int qemu_socket(int domain, int type, int protocol); +int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int socket_set_cork(int fd, int v); +int socket_set_nodelay(int fd); +void qemu_set_block(int fd); +int qemu_try_set_nonblock(int fd); +void qemu_set_nonblock(int fd); +int socket_set_fast_reuse(int fd); + +#ifdef WIN32 +/* Windows has different names for the same constants with the same values */ +#define SHUT_RD 0 +#define SHUT_WR 1 +#define SHUT_RDWR 2 +#endif + +int inet_ai_family_from_address(InetSocketAddress *addr, + Error **errp); +int inet_parse(InetSocketAddress *addr, const char *str, Error **errp); +int inet_connect(const char *str, Error **errp); +int inet_connect_saddr(InetSocketAddress *saddr, Error **errp); + +NetworkAddressFamily inet_netfamily(int family); + +int unix_listen(const char *path, Error **errp); +int unix_connect(const char *path, Error **errp); + +SocketAddress *socket_parse(const char *str, Error **errp); +int socket_connect(SocketAddress *addr, Error **errp); +int socket_listen(SocketAddress *addr, int num, Error **errp); +void socket_listen_cleanup(int fd, Error **errp); +int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); + +/* Old, ipv4 only bits. Don't use for new code. */ +int parse_host_port(struct sockaddr_in *saddr, const char *str, + Error **errp); +int socket_init(void); + +/** + * socket_sockaddr_to_address: + * @sa: socket address struct + * @salen: size of @sa struct + * @errp: pointer to uninitialized error object + * + * Get the string representation of the socket + * address. A pointer to the allocated address information + * struct will be returned, which the caller is required to + * release with a call qapi_free_SocketAddress() when no + * longer required. + * + * Returns: the socket address struct, or NULL on error + */ +SocketAddress * +socket_sockaddr_to_address(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp); + +/** + * socket_local_address: + * @fd: the socket file handle + * @errp: pointer to uninitialized error object + * + * Get the string representation of the local socket + * address. A pointer to the allocated address information + * struct will be returned, which the caller is required to + * release with a call qapi_free_SocketAddress() when no + * longer required. + * + * Returns: the socket address struct, or NULL on error + */ +SocketAddress *socket_local_address(int fd, Error **errp); + +/** + * socket_remote_address: + * @fd: the socket file handle + * @errp: pointer to uninitialized error object + * + * Get the string representation of the remote socket + * address. A pointer to the allocated address information + * struct will be returned, which the caller is required to + * release with a call qapi_free_SocketAddress() when no + * longer required. + * + * Returns: the socket address struct, or NULL on error + */ +SocketAddress *socket_remote_address(int fd, Error **errp); + +/** + * socket_address_flatten: + * @addr: the socket address to flatten + * + * Convert SocketAddressLegacy to SocketAddress. Caller is responsible + * for freeing with qapi_free_SocketAddress(). + * + * Returns: the argument converted to SocketAddress. + */ +SocketAddress *socket_address_flatten(SocketAddressLegacy *addr); + +#endif /* QEMU_SOCKETS_H */ diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h new file mode 100644 index 000000000..fdd3d1b8f --- /dev/null +++ b/include/qemu/stats64.h @@ -0,0 +1,193 @@ +/* + * Atomic operations on 64-bit quantities. + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_STATS64_H +#define QEMU_STATS64_H + +#include "qemu/atomic.h" + +/* This provides atomic operations on 64-bit type, using a reader-writer + * spinlock on architectures that do not have 64-bit accesses. Even on + * those architectures, it tries hard not to take the lock. + */ + +typedef struct Stat64 { +#ifdef CONFIG_ATOMIC64 + uint64_t value; +#else + uint32_t low, high; + uint32_t lock; +#endif +} Stat64; + +#ifdef CONFIG_ATOMIC64 +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { value }; +} + +static inline uint64_t stat64_get(const Stat64 *s) +{ + return qatomic_read__nocheck(&s->value); +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + qatomic_add(&s->value, value); +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint64_t orig = qatomic_read__nocheck(&s->value); + while (orig > value) { + orig = qatomic_cmpxchg__nocheck(&s->value, orig, value); + } +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint64_t orig = qatomic_read__nocheck(&s->value); + while (orig < value) { + orig = qatomic_cmpxchg__nocheck(&s->value, orig, value); + } +} +#else +uint64_t stat64_get(const Stat64 *s); +bool stat64_min_slow(Stat64 *s, uint64_t value); +bool stat64_max_slow(Stat64 *s, uint64_t value); +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high); + +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 }; +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + high = value >> 32; + low = (uint32_t) value; + if (!low) { + if (high) { + qatomic_add(&s->high, high); + } + return; + } + + for (;;) { + uint32_t orig = s->low; + uint32_t result = orig + low; + uint32_t old; + + if (result < low || high) { + /* If the high part is affected, take the lock. */ + if (stat64_add32_carry(s, low, high)) { + return; + } + continue; + } + + /* No carry, try with a 32-bit cmpxchg. The result is independent of + * the high 32 bits, so it can race just fine with stat64_add32_carry + * and even stat64_get! + */ + old = qatomic_cmpxchg(&s->low, orig, result); + if (orig == old) { + return; + } + } +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = qatomic_read(&s->high); + if (orig_high < high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1235,0x0000 changes to + * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with + * the write barrier in stat64_min_slow. + */ + smp_rmb(); + orig_low = qatomic_read(&s->low); + if (orig_low <= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = qatomic_read(&s->high); + if (orig_high < high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_min_slow(s, value)); +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = qatomic_read(&s->high); + if (orig_high > high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1234,0x8000 changes to + * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with + * the write barrier in stat64_max_slow. + */ + smp_rmb(); + orig_low = qatomic_read(&s->low); + if (orig_low >= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = qatomic_read(&s->high); + if (orig_high > high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_max_slow(s, value)); +} + +#endif + +#endif diff --git a/include/qemu/sys_membarrier.h b/include/qemu/sys_membarrier.h new file mode 100644 index 000000000..b5bfa21d5 --- /dev/null +++ b/include/qemu/sys_membarrier.h @@ -0,0 +1,27 @@ +/* + * Process-global memory barriers + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Author: Paolo Bonzini + */ + +#ifndef QEMU_SYS_MEMBARRIER_H +#define QEMU_SYS_MEMBARRIER_H + +#ifdef CONFIG_MEMBARRIER +/* Only block reordering at the compiler level in the performance-critical + * side. The slow side forces processor-level ordering on all other cores + * through a system call. + */ +extern void smp_mb_global_init(void); +extern void smp_mb_global(void); +#define smp_mb_placeholder() barrier() +#else +/* Keep it simple, execute a real memory barrier on both sides. */ +static inline void smp_mb_global_init(void) {} +#define smp_mb_global() smp_mb() +#define smp_mb_placeholder() smp_mb() +#endif + +#endif diff --git a/include/qemu/systemd.h b/include/qemu/systemd.h new file mode 100644 index 000000000..f0ea1266d --- /dev/null +++ b/include/qemu/systemd.h @@ -0,0 +1,26 @@ +/* + * systemd socket activation support + * + * Copyright 2017 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Richard W.M. Jones + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_SYSTEMD_H +#define QEMU_SYSTEMD_H + +#define FIRST_SOCKET_ACTIVATION_FD 3 /* defined by systemd ABI */ + +/* + * Check if socket activation was requested via use of the + * LISTEN_FDS and LISTEN_PID environment variables. + * + * Returns 0 if no socket activation, or the number of FDs. + */ +unsigned int check_socket_activation(void); + +#endif diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h new file mode 100644 index 000000000..c90352506 --- /dev/null +++ b/include/qemu/thread-posix.h @@ -0,0 +1,51 @@ +#ifndef QEMU_THREAD_POSIX_H +#define QEMU_THREAD_POSIX_H + +#include +#include + +typedef QemuMutex QemuRecMutex; +#define qemu_rec_mutex_destroy qemu_mutex_destroy +#define qemu_rec_mutex_lock_impl qemu_mutex_lock_impl +#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl +#define qemu_rec_mutex_unlock qemu_mutex_unlock + +struct QemuMutex { + pthread_mutex_t lock; +#ifdef CONFIG_DEBUG_MUTEX + const char *file; + int line; +#endif + bool initialized; +}; + +struct QemuCond { + pthread_cond_t cond; + bool initialized; +}; + +struct QemuSemaphore { +#ifndef CONFIG_SEM_TIMEDWAIT + pthread_mutex_t lock; + pthread_cond_t cond; + unsigned int count; +#else + sem_t sem; +#endif + bool initialized; +}; + +struct QemuEvent { +#ifndef __linux__ + pthread_mutex_t lock; + pthread_cond_t cond; +#endif + unsigned value; + bool initialized; +}; + +struct QemuThread { + pthread_t thread; +}; + +#endif diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h new file mode 100644 index 000000000..d0a1a9597 --- /dev/null +++ b/include/qemu/thread-win32.h @@ -0,0 +1,52 @@ +#ifndef QEMU_THREAD_WIN32_H +#define QEMU_THREAD_WIN32_H + +#include + +struct QemuMutex { + SRWLOCK lock; +#ifdef CONFIG_DEBUG_MUTEX + const char *file; + int line; +#endif + bool initialized; +}; + +typedef struct QemuRecMutex QemuRecMutex; +struct QemuRecMutex { + CRITICAL_SECTION lock; + bool initialized; +}; + +void qemu_rec_mutex_destroy(QemuRecMutex *mutex); +void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line); +int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, + int line); +void qemu_rec_mutex_unlock(QemuRecMutex *mutex); + +struct QemuCond { + CONDITION_VARIABLE var; + bool initialized; +}; + +struct QemuSemaphore { + HANDLE sema; + bool initialized; +}; + +struct QemuEvent { + int value; + HANDLE event; + bool initialized; +}; + +typedef struct QemuThreadData QemuThreadData; +struct QemuThread { + QemuThreadData *data; + unsigned tid; +}; + +/* Only valid for joinable threads. */ +HANDLE qemu_thread_get_handle(struct QemuThread *thread); + +#endif diff --git a/include/qemu/thread.h b/include/qemu/thread.h new file mode 100644 index 000000000..543576318 --- /dev/null +++ b/include/qemu/thread.h @@ -0,0 +1,389 @@ +#ifndef QEMU_THREAD_H +#define QEMU_THREAD_H + +#include "qemu/processor.h" +#include "qemu/atomic.h" + +typedef struct QemuCond QemuCond; +typedef struct QemuSemaphore QemuSemaphore; +typedef struct QemuEvent QemuEvent; +typedef struct QemuLockCnt QemuLockCnt; +typedef struct QemuThread QemuThread; + +#ifdef _WIN32 +#include "qemu/thread-win32.h" +#else +#include "qemu/thread-posix.h" +#endif + +/* include QSP header once QemuMutex, QemuCond etc. are defined */ +#include "qemu/qsp.h" + +#define QEMU_THREAD_JOINABLE 0 +#define QEMU_THREAD_DETACHED 1 + +void qemu_mutex_init(QemuMutex *mutex); +void qemu_mutex_destroy(QemuMutex *mutex); +int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line); +void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line); +void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line); + +typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l); +typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l); +typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l); +typedef int (*QemuRecMutexTrylockFunc)(QemuRecMutex *m, const char *f, int l); +typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f, + int l); +typedef bool (*QemuCondTimedWaitFunc)(QemuCond *c, QemuMutex *m, int ms, + const char *f, int l); + +extern QemuMutexLockFunc qemu_bql_mutex_lock_func; +extern QemuMutexLockFunc qemu_mutex_lock_func; +extern QemuMutexTrylockFunc qemu_mutex_trylock_func; +extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func; +extern QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func; +extern QemuCondWaitFunc qemu_cond_wait_func; +extern QemuCondTimedWaitFunc qemu_cond_timedwait_func; + +/* convenience macros to bypass the profiler */ +#define qemu_mutex_lock__raw(m) \ + qemu_mutex_lock_impl(m, __FILE__, __LINE__) +#define qemu_mutex_trylock__raw(m) \ + qemu_mutex_trylock_impl(m, __FILE__, __LINE__) + +#ifdef __COVERITY__ +/* + * Coverity is severely confused by the indirect function calls, + * hide them. + */ +#define qemu_mutex_lock(m) \ + qemu_mutex_lock_impl(m, __FILE__, __LINE__) +#define qemu_mutex_trylock(m) \ + qemu_mutex_trylock_impl(m, __FILE__, __LINE__) +#define qemu_rec_mutex_lock(m) \ + qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__) +#define qemu_rec_mutex_trylock(m) \ + qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__) +#define qemu_cond_wait(c, m) \ + qemu_cond_wait_impl(c, m, __FILE__, __LINE__) +#define qemu_cond_timedwait(c, m, ms) \ + qemu_cond_timedwait_impl(c, m, ms, __FILE__, __LINE__) +#else +#define qemu_mutex_lock(m) ({ \ + QemuMutexLockFunc _f = qatomic_read(&qemu_mutex_lock_func); \ + _f(m, __FILE__, __LINE__); \ + }) + +#define qemu_mutex_trylock(m) ({ \ + QemuMutexTrylockFunc _f = qatomic_read(&qemu_mutex_trylock_func); \ + _f(m, __FILE__, __LINE__); \ + }) + +#define qemu_rec_mutex_lock(m) ({ \ + QemuRecMutexLockFunc _f = qatomic_read(&qemu_rec_mutex_lock_func);\ + _f(m, __FILE__, __LINE__); \ + }) + +#define qemu_rec_mutex_trylock(m) ({ \ + QemuRecMutexTrylockFunc _f; \ + _f = qatomic_read(&qemu_rec_mutex_trylock_func); \ + _f(m, __FILE__, __LINE__); \ + }) + +#define qemu_cond_wait(c, m) ({ \ + QemuCondWaitFunc _f = qatomic_read(&qemu_cond_wait_func); \ + _f(c, m, __FILE__, __LINE__); \ + }) + +#define qemu_cond_timedwait(c, m, ms) ({ \ + QemuCondTimedWaitFunc _f = qatomic_read(&qemu_cond_timedwait_func);\ + _f(c, m, ms, __FILE__, __LINE__); \ + }) +#endif + +#define qemu_mutex_unlock(mutex) \ + qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__) + +static inline void (qemu_mutex_lock)(QemuMutex *mutex) +{ + qemu_mutex_lock(mutex); +} + +static inline int (qemu_mutex_trylock)(QemuMutex *mutex) +{ + return qemu_mutex_trylock(mutex); +} + +static inline void (qemu_mutex_unlock)(QemuMutex *mutex) +{ + qemu_mutex_unlock(mutex); +} + +static inline void (qemu_rec_mutex_lock)(QemuRecMutex *mutex) +{ + qemu_rec_mutex_lock(mutex); +} + +static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex) +{ + return qemu_rec_mutex_trylock(mutex); +} + +/* Prototypes for other functions are in thread-posix.h/thread-win32.h. */ +void qemu_rec_mutex_init(QemuRecMutex *mutex); + +void qemu_cond_init(QemuCond *cond); +void qemu_cond_destroy(QemuCond *cond); + +/* + * IMPORTANT: The implementation does not guarantee that pthread_cond_signal + * and pthread_cond_broadcast can be called except while the same mutex is + * held as in the corresponding pthread_cond_wait calls! + */ +void qemu_cond_signal(QemuCond *cond); +void qemu_cond_broadcast(QemuCond *cond); +void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, + const char *file, const int line); +bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, const int line); + +static inline void (qemu_cond_wait)(QemuCond *cond, QemuMutex *mutex) +{ + qemu_cond_wait(cond, mutex); +} + +/* Returns true if timeout has not expired, and false otherwise */ +static inline bool (qemu_cond_timedwait)(QemuCond *cond, QemuMutex *mutex, + int ms) +{ + return qemu_cond_timedwait(cond, mutex, ms); +} + +void qemu_sem_init(QemuSemaphore *sem, int init); +void qemu_sem_post(QemuSemaphore *sem); +void qemu_sem_wait(QemuSemaphore *sem); +int qemu_sem_timedwait(QemuSemaphore *sem, int ms); +void qemu_sem_destroy(QemuSemaphore *sem); + +void qemu_event_init(QemuEvent *ev, bool init); +void qemu_event_set(QemuEvent *ev); +void qemu_event_reset(QemuEvent *ev); +void qemu_event_wait(QemuEvent *ev); +void qemu_event_destroy(QemuEvent *ev); + +void qemu_thread_create(QemuThread *thread, const char *name, + void *(*start_routine)(void *), + void *arg, int mode); +void *qemu_thread_join(QemuThread *thread); +void qemu_thread_get_self(QemuThread *thread); +bool qemu_thread_is_self(QemuThread *thread); +void qemu_thread_exit(void *retval) QEMU_NORETURN; +void qemu_thread_naming(bool enable); + +struct Notifier; +/** + * qemu_thread_atexit_add: + * @notifier: Notifier to add + * + * Add the specified notifier to a list which will be run via + * notifier_list_notify() when this thread exits (either by calling + * qemu_thread_exit() or by returning from its start_routine). + * The usual usage is that the caller passes a Notifier which is + * a per-thread variable; it can then use the callback to free + * other per-thread data. + * + * If the thread exits as part of the entire process exiting, + * it is unspecified whether notifiers are called or not. + */ +void qemu_thread_atexit_add(struct Notifier *notifier); +/** + * qemu_thread_atexit_remove: + * @notifier: Notifier to remove + * + * Remove the specified notifier from the thread-exit notification + * list. It is not valid to try to remove a notifier which is not + * on the list. + */ +void qemu_thread_atexit_remove(struct Notifier *notifier); + +#ifdef CONFIG_TSAN +#include +#endif + +struct QemuSpin { + int value; +}; + +static inline void qemu_spin_init(QemuSpin *spin) +{ + __sync_lock_release(&spin->value); +#ifdef CONFIG_TSAN + __tsan_mutex_create(spin, __tsan_mutex_not_static); +#endif +} + +/* const parameter because the only purpose here is the TSAN annotation */ +static inline void qemu_spin_destroy(const QemuSpin *spin) +{ +#ifdef CONFIG_TSAN + __tsan_mutex_destroy((void *)spin, __tsan_mutex_not_static); +#endif +} + +static inline void qemu_spin_lock(QemuSpin *spin) +{ +#ifdef CONFIG_TSAN + __tsan_mutex_pre_lock(spin, 0); +#endif + while (unlikely(__sync_lock_test_and_set(&spin->value, true))) { + while (qatomic_read(&spin->value)) { + cpu_relax(); + } + } +#ifdef CONFIG_TSAN + __tsan_mutex_post_lock(spin, 0, 0); +#endif +} + +static inline bool qemu_spin_trylock(QemuSpin *spin) +{ +#ifdef CONFIG_TSAN + __tsan_mutex_pre_lock(spin, __tsan_mutex_try_lock); +#endif + bool busy = __sync_lock_test_and_set(&spin->value, true); +#ifdef CONFIG_TSAN + unsigned flags = __tsan_mutex_try_lock; + flags |= busy ? __tsan_mutex_try_lock_failed : 0; + __tsan_mutex_post_lock(spin, flags, 0); +#endif + return busy; +} + +static inline bool qemu_spin_locked(QemuSpin *spin) +{ + return qatomic_read(&spin->value); +} + +static inline void qemu_spin_unlock(QemuSpin *spin) +{ +#ifdef CONFIG_TSAN + __tsan_mutex_pre_unlock(spin, 0); +#endif + __sync_lock_release(&spin->value); +#ifdef CONFIG_TSAN + __tsan_mutex_post_unlock(spin, 0); +#endif +} + +struct QemuLockCnt { +#ifndef CONFIG_LINUX + QemuMutex mutex; +#endif + unsigned count; +}; + +/** + * qemu_lockcnt_init: initialize a QemuLockcnt + * @lockcnt: the lockcnt to initialize + * + * Initialize lockcnt's counter to zero and prepare its mutex + * for usage. + */ +void qemu_lockcnt_init(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_destroy: destroy a QemuLockcnt + * @lockcnt: the lockcnt to destruct + * + * Destroy lockcnt's mutex. + */ +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc: increment a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + * + * If the lockcnt's count is zero, wait for critical sections + * to finish and increment lockcnt's count to 1. If the count + * is not zero, just increment it. + * + * Because this function can wait on the mutex, it must not be + * called while the lockcnt's mutex is held by the current thread. + * For the same reason, qemu_lockcnt_inc can also contribute to + * AB-BA deadlocks. This is a sample deadlock scenario: + * + * thread 1 thread 2 + * ------------------------------------------------------- + * qemu_lockcnt_lock(&lc1); + * qemu_lockcnt_lock(&lc2); + * qemu_lockcnt_inc(&lc2); + * qemu_lockcnt_inc(&lc1); + */ +void qemu_lockcnt_inc(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec: decrement a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + */ +void qemu_lockcnt_dec(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_and_lock: decrement a QemuLockCnt's counter and + * possibly lock it. + * @lockcnt: the lockcnt to operate on + * + * Decrement lockcnt's count. If the new count is zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_if_lock: possibly decrement a QemuLockCnt's counter and + * lock it. + * @lockcnt: the lockcnt to operate on + * + * If the count is 1, decrement the count to zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_lock: lock a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on + * + * Remember that concurrent visits are not blocked unless the count is + * also zero. You can use qemu_lockcnt_count to check for this inside a + * critical section. + */ +void qemu_lockcnt_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_unlock: release a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on. + */ +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc_and_unlock: combined unlock/increment on a QemuLockCnt. + * @lockcnt: the lockcnt to operate on. + * + * This is the same as + * + * qemu_lockcnt_unlock(lockcnt); + * qemu_lockcnt_inc(lockcnt); + * + * but more efficient. + */ +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_count: query a LockCnt's count. + * @lockcnt: the lockcnt to query. + * + * Note that the count can change at any time. Still, while the + * lockcnt is locked, one can usefully check whether the count + * is non-zero. + */ +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt); + +#endif diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h new file mode 100644 index 000000000..3528a8f4a --- /dev/null +++ b/include/qemu/throttle-options.h @@ -0,0 +1,114 @@ +/* + * QEMU throttling command line options + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ +#ifndef THROTTLE_OPTIONS_H +#define THROTTLE_OPTIONS_H + +#define QEMU_OPT_IOPS_TOTAL "iops-total" +#define QEMU_OPT_IOPS_TOTAL_MAX "iops-total-max" +#define QEMU_OPT_IOPS_TOTAL_MAX_LENGTH "iops-total-max-length" +#define QEMU_OPT_IOPS_READ "iops-read" +#define QEMU_OPT_IOPS_READ_MAX "iops-read-max" +#define QEMU_OPT_IOPS_READ_MAX_LENGTH "iops-read-max-length" +#define QEMU_OPT_IOPS_WRITE "iops-write" +#define QEMU_OPT_IOPS_WRITE_MAX "iops-write-max" +#define QEMU_OPT_IOPS_WRITE_MAX_LENGTH "iops-write-max-length" +#define QEMU_OPT_BPS_TOTAL "bps-total" +#define QEMU_OPT_BPS_TOTAL_MAX "bps-total-max" +#define QEMU_OPT_BPS_TOTAL_MAX_LENGTH "bps-total-max-length" +#define QEMU_OPT_BPS_READ "bps-read" +#define QEMU_OPT_BPS_READ_MAX "bps-read-max" +#define QEMU_OPT_BPS_READ_MAX_LENGTH "bps-read-max-length" +#define QEMU_OPT_BPS_WRITE "bps-write" +#define QEMU_OPT_BPS_WRITE_MAX "bps-write-max" +#define QEMU_OPT_BPS_WRITE_MAX_LENGTH "bps-write-max-length" +#define QEMU_OPT_IOPS_SIZE "iops-size" +#define QEMU_OPT_THROTTLE_GROUP_NAME "throttle-group" + +#define THROTTLE_OPT_PREFIX "throttling." +#define THROTTLE_OPTS \ + { \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total I/O operations per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read operations per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write operations per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total bytes per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read bytes per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE,\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write bytes per second",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations read burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations write burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes read burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX,\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes write burst",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-total-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-read-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-write-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-total-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-read-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH,\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-write-max burst period, in seconds",\ + },{ \ + .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE,\ + .type = QEMU_OPT_NUMBER,\ + .help = "when limiting by iops max size of an I/O in bytes",\ + } + +#endif diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h new file mode 100644 index 000000000..05f634613 --- /dev/null +++ b/include/qemu/throttle.h @@ -0,0 +1,159 @@ +/* + * QEMU throttling infrastructure + * + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015-2016 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef THROTTLE_H +#define THROTTLE_H + +#include "qapi/qapi-types-block-core.h" +#include "qemu/timer.h" + +#define THROTTLE_VALUE_MAX 1000000000000000LL + +typedef enum { + THROTTLE_BPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_BPS_WRITE, + THROTTLE_OPS_TOTAL, + THROTTLE_OPS_READ, + THROTTLE_OPS_WRITE, + BUCKETS_COUNT, +} BucketType; + +/* + * This module implements I/O limits using the leaky bucket + * algorithm. The code is independent of the I/O units, but it is + * currently used for bytes per second and operations per second. + * + * Three parameters can be set by the user: + * + * - avg: the desired I/O limits in units per second. + * - max: the limit during bursts, also in units per second. + * - burst_length: the maximum length of the burst period, in seconds. + * + * Here's how it works: + * + * - The bucket level (number of performed I/O units) is kept in + * bkt.level and leaks at a rate of bkt.avg units per second. + * + * - The size of the bucket is bkt.max * bkt.burst_length. Once the + * bucket is full no more I/O is performed until the bucket leaks + * again. This is what makes the I/O rate bkt.avg. + * + * - The bkt.avg rate does not apply until the bucket is full, + * allowing the user to do bursts until then. The I/O limit during + * bursts is bkt.max. To enforce this limit we keep an additional + * bucket in bkt.burst_level that leaks at a rate of bkt.max units + * per second. + * + * - Because of all of the above, the user can perform I/O at a + * maximum of bkt.max units per second for at most bkt.burst_length + * seconds in a row. After that the bucket will be full and the I/O + * rate will go down to bkt.avg. + * + * - Since the bucket always leaks at a rate of bkt.avg, this also + * determines how much the user needs to wait before being able to + * do bursts again. + */ + +typedef struct LeakyBucket { + uint64_t avg; /* average goal in units per second */ + uint64_t max; /* leaky bucket max burst in units */ + double level; /* bucket level in units */ + double burst_level; /* bucket level in units (for computing bursts) */ + uint64_t burst_length; /* max length of the burst period, in seconds */ +} LeakyBucket; + +/* The following structure is used to configure a ThrottleState + * It contains a bit of state: the bucket field of the LeakyBucket structure. + * However it allows to keep the code clean and the bucket field is reset to + * zero at the right time. + */ +typedef struct ThrottleConfig { + LeakyBucket buckets[BUCKETS_COUNT]; /* leaky buckets */ + uint64_t op_size; /* size of an operation in bytes */ +} ThrottleConfig; + +typedef struct ThrottleState { + ThrottleConfig cfg; /* configuration */ + int64_t previous_leak; /* timestamp of the last leak done */ +} ThrottleState; + +typedef struct ThrottleTimers { + QEMUTimer *timers[2]; /* timers used to do the throttling */ + QEMUClockType clock_type; /* the clock used */ + + /* Callbacks */ + QEMUTimerCB *read_timer_cb; + QEMUTimerCB *write_timer_cb; + void *timer_opaque; +} ThrottleTimers; + +/* operations on single leaky buckets */ +void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta); + +int64_t throttle_compute_wait(LeakyBucket *bkt); + +/* init/destroy cycle */ +void throttle_init(ThrottleState *ts); + +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque); + +void throttle_timers_destroy(ThrottleTimers *tt); + +void throttle_timers_detach_aio_context(ThrottleTimers *tt); + +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context); + +bool throttle_timers_are_initialized(ThrottleTimers *tt); + +/* configuration */ +bool throttle_enabled(ThrottleConfig *cfg); + +bool throttle_is_valid(ThrottleConfig *cfg, Error **errp); + +void throttle_config(ThrottleState *ts, + QEMUClockType clock_type, + ThrottleConfig *cfg); + +void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg); + +void throttle_config_init(ThrottleConfig *cfg); + +/* usage */ +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write); + +void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); +void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg, + Error **errp); +void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var); + +#endif diff --git a/include/qemu/timed-average.h b/include/qemu/timed-average.h new file mode 100644 index 000000000..08245e7a1 --- /dev/null +++ b/include/qemu/timed-average.h @@ -0,0 +1,63 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TIMED_AVERAGE_H +#define TIMED_AVERAGE_H + + +#include "qemu/timer.h" + +typedef struct TimedAverageWindow TimedAverageWindow; +typedef struct TimedAverage TimedAverage; + +/* All fields of both structures are private */ + +struct TimedAverageWindow { + uint64_t min; /* minimum value accounted in the window */ + uint64_t max; /* maximum value accounted in the window */ + uint64_t sum; /* sum of all values */ + uint64_t count; /* number of values */ + int64_t expiration; /* the end of the current window in ns */ +}; + +struct TimedAverage { + uint64_t period; /* period in nanoseconds */ + TimedAverageWindow windows[2]; /* two overlapping windows of with + * an offset of period / 2 between them */ + unsigned current; /* the current window index: it's also the + * oldest window index */ + QEMUClockType clock_type; /* the clock used */ +}; + +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period); + +void timed_average_account(TimedAverage *ta, uint64_t value); + +uint64_t timed_average_min(TimedAverage *ta); +uint64_t timed_average_avg(TimedAverage *ta); +uint64_t timed_average_max(TimedAverage *ta); +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed); + +#endif diff --git a/include/qemu/timer.h b/include/qemu/timer.h new file mode 100644 index 000000000..bdecc5b41 --- /dev/null +++ b/include/qemu/timer.h @@ -0,0 +1,995 @@ +#ifndef QEMU_TIMER_H +#define QEMU_TIMER_H + +#include "qemu/bitops.h" +#include "qemu/notify.h" +#include "qemu/host-utils.h" + +#define NANOSECONDS_PER_SECOND 1000000000LL + +/* timers */ + +#define SCALE_MS 1000000 +#define SCALE_US 1000 +#define SCALE_NS 1 + +/** + * QEMUClockType: + * + * The following clock types are available: + * + * @QEMU_CLOCK_REALTIME: Real time clock + * + * The real time clock should be used only for stuff which does not + * change the virtual machine state, as it runs even if the virtual + * machine is stopped. + * + * @QEMU_CLOCK_VIRTUAL: virtual clock + * + * The virtual clock only runs during the emulation. It stops + * when the virtual machine is stopped. + * + * @QEMU_CLOCK_HOST: host clock + * + * The host clock should be used for device models that emulate accurate + * real time sources. It will continue to run when the virtual machine + * is suspended, and it will reflect system time changes the host may + * undergo (e.g. due to NTP). + * + * @QEMU_CLOCK_VIRTUAL_RT: realtime clock used for icount warp + * + * Outside icount mode, this clock is the same as @QEMU_CLOCK_VIRTUAL. + * In icount mode, this clock counts nanoseconds while the virtual + * machine is running. It is used to increase @QEMU_CLOCK_VIRTUAL + * while the CPUs are sleeping and thus not executing instructions. + */ + +typedef enum { + QEMU_CLOCK_REALTIME = 0, + QEMU_CLOCK_VIRTUAL = 1, + QEMU_CLOCK_HOST = 2, + QEMU_CLOCK_VIRTUAL_RT = 3, + QEMU_CLOCK_MAX +} QEMUClockType; + +/** + * QEMU Timer attributes: + * + * An individual timer may be given one or multiple attributes when initialized. + * Each attribute corresponds to one bit. Attributes modify the processing + * of timers when they fire. + * + * The following attributes are available: + * + * QEMU_TIMER_ATTR_EXTERNAL: drives external subsystem + * QEMU_TIMER_ATTR_ALL: mask for all existing attributes + * + * Timers with this attribute do not recorded in rr mode, therefore it could be + * used for the subsystems that operate outside the guest core. Applicable only + * with virtual clock type. + */ + +#define QEMU_TIMER_ATTR_EXTERNAL ((int)BIT(0)) +#define QEMU_TIMER_ATTR_ALL 0xffffffff + +typedef struct QEMUTimerList QEMUTimerList; + +struct QEMUTimerListGroup { + QEMUTimerList *tl[QEMU_CLOCK_MAX]; +}; + +typedef void QEMUTimerCB(void *opaque); +typedef void QEMUTimerListNotifyCB(void *opaque, QEMUClockType type); + +struct QEMUTimer { + int64_t expire_time; /* in nanoseconds */ + QEMUTimerList *timer_list; + QEMUTimerCB *cb; + void *opaque; + QEMUTimer *next; + int attributes; + int scale; +}; + +extern QEMUTimerListGroup main_loop_tlg; + +/* + * qemu_clock_get_ns; + * @type: the clock type + * + * Get the nanosecond value of a clock with + * type @type + * + * Returns: the clock value in nanoseconds + */ +int64_t qemu_clock_get_ns(QEMUClockType type); + +/** + * qemu_clock_get_ms; + * @type: the clock type + * + * Get the millisecond value of a clock with + * type @type + * + * Returns: the clock value in milliseconds + */ +static inline int64_t qemu_clock_get_ms(QEMUClockType type) +{ + return qemu_clock_get_ns(type) / SCALE_MS; +} + +/** + * qemu_clock_get_us; + * @type: the clock type + * + * Get the microsecond value of a clock with + * type @type + * + * Returns: the clock value in microseconds + */ +static inline int64_t qemu_clock_get_us(QEMUClockType type) +{ + return qemu_clock_get_ns(type) / SCALE_US; +} + +/** + * qemu_clock_has_timers: + * @type: the clock type + * + * Determines whether a clock's default timer list + * has timers attached + * + * Note that this function should not be used when other threads also access + * the timer list. The return value may be outdated by the time it is acted + * upon. + * + * Returns: true if the clock's default timer list + * has timers attached + */ +bool qemu_clock_has_timers(QEMUClockType type); + +/** + * qemu_clock_expired: + * @type: the clock type + * + * Determines whether a clock's default timer list + * has an expired timer. + * + * Returns: true if the clock's default timer list has + * an expired timer + */ +bool qemu_clock_expired(QEMUClockType type); + +/** + * qemu_clock_use_for_deadline: + * @type: the clock type + * + * Determine whether a clock should be used for deadline + * calculations. Some clocks, for instance vm_clock with + * icount_enabled() set, do not count in nanoseconds. + * Such clocks are not used for deadline calculations, and are presumed + * to interrupt any poll using qemu_notify/aio_notify + * etc. + * + * Returns: true if the clock runs in nanoseconds and + * should be used for a deadline. + */ +bool qemu_clock_use_for_deadline(QEMUClockType type); + +/** + * qemu_clock_deadline_ns_all: + * @type: the clock type + * @attr_mask: mask for the timer attributes that are included + * in deadline calculation + * + * Calculate the deadline across all timer lists associated + * with a clock (as opposed to just the default one) + * in nanoseconds, or -1 if no timer is set to expire. + * + * Returns: time until expiry in nanoseconds or -1 + */ +int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask); + +/** + * qemu_clock_get_main_loop_timerlist: + * @type: the clock type + * + * Return the default timer list associated with a clock. + * + * Returns: the default timer list + */ +QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type); + +/** + * qemu_clock_nofify: + * @type: the clock type + * + * Call the notifier callback connected with the default timer + * list linked to the clock, or qemu_notify() if none. + */ +void qemu_clock_notify(QEMUClockType type); + +/** + * qemu_clock_enable: + * @type: the clock type + * @enabled: true to enable, false to disable + * + * Enable or disable a clock + * Disabling the clock will wait for related timerlists to stop + * executing qemu_run_timers. Thus, this functions should not + * be used from the callback of a timer that is based on @clock. + * Doing so would cause a deadlock. + * + * Caller should hold BQL. + */ +void qemu_clock_enable(QEMUClockType type, bool enabled); + +/** + * qemu_clock_run_timers: + * @type: clock on which to operate + * + * Run all the timers associated with the default timer list + * of a clock. + * + * Returns: true if any timer ran. + */ +bool qemu_clock_run_timers(QEMUClockType type); + +/** + * qemu_clock_run_all_timers: + * + * Run all the timers associated with the default timer list + * of every clock. + * + * Returns: true if any timer ran. + */ +bool qemu_clock_run_all_timers(void); + + +/* + * QEMUTimerList + */ + +/** + * timerlist_new: + * @type: the clock type to associate with the timerlist + * @cb: the callback to call on notification + * @opaque: the opaque pointer to pass to the callback + * + * Create a new timerlist associated with the clock of + * type @type. + * + * Returns: a pointer to the QEMUTimerList created + */ +QEMUTimerList *timerlist_new(QEMUClockType type, + QEMUTimerListNotifyCB *cb, void *opaque); + +/** + * timerlist_free: + * @timer_list: the timer list to free + * + * Frees a timer_list. It must have no active timers. + */ +void timerlist_free(QEMUTimerList *timer_list); + +/** + * timerlist_has_timers: + * @timer_list: the timer list to operate on + * + * Determine whether a timer list has active timers + * + * Note that this function should not be used when other threads also access + * the timer list. The return value may be outdated by the time it is acted + * upon. + * + * Returns: true if the timer list has timers. + */ +bool timerlist_has_timers(QEMUTimerList *timer_list); + +/** + * timerlist_expired: + * @timer_list: the timer list to operate on + * + * Determine whether a timer list has any timers which + * are expired. + * + * Returns: true if the timer list has timers which + * have expired. + */ +bool timerlist_expired(QEMUTimerList *timer_list); + +/** + * timerlist_deadline_ns: + * @timer_list: the timer list to operate on + * + * Determine the deadline for a timer_list, i.e. + * the number of nanoseconds until the first timer + * expires. Return -1 if there are no timers. + * + * Returns: the number of nanoseconds until the earliest + * timer expires -1 if none + */ +int64_t timerlist_deadline_ns(QEMUTimerList *timer_list); + +/** + * timerlist_get_clock: + * @timer_list: the timer list to operate on + * + * Determine the clock type associated with a timer list. + * + * Returns: the clock type associated with the + * timer list. + */ +QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list); + +/** + * timerlist_run_timers: + * @timer_list: the timer list to use + * + * Call all expired timers associated with the timer list. + * + * Returns: true if any timer expired + */ +bool timerlist_run_timers(QEMUTimerList *timer_list); + +/** + * timerlist_notify: + * @timer_list: the timer list to use + * + * call the notifier callback associated with the timer list. + */ +void timerlist_notify(QEMUTimerList *timer_list); + +/* + * QEMUTimerListGroup + */ + +/** + * timerlistgroup_init: + * @tlg: the timer list group + * @cb: the callback to call when a notify is required + * @opaque: the opaque pointer to be passed to the callback. + * + * Initialise a timer list group. This must already be + * allocated in memory and zeroed. The notifier callback is + * called whenever a clock in the timer list group is + * reenabled or whenever a timer associated with any timer + * list is modified. If @cb is specified as null, qemu_notify() + * is used instead. + */ +void timerlistgroup_init(QEMUTimerListGroup *tlg, + QEMUTimerListNotifyCB *cb, void *opaque); + +/** + * timerlistgroup_deinit: + * @tlg: the timer list group + * + * Deinitialise a timer list group. This must already be + * initialised. Note the memory is not freed. + */ +void timerlistgroup_deinit(QEMUTimerListGroup *tlg); + +/** + * timerlistgroup_run_timers: + * @tlg: the timer list group + * + * Run the timers associated with a timer list group. + * This will run timers on multiple clocks. + * + * Returns: true if any timer callback ran + */ +bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg); + +/** + * timerlistgroup_deadline_ns: + * @tlg: the timer list group + * + * Determine the deadline of the soonest timer to + * expire associated with any timer list linked to + * the timer list group. Only clocks suitable for + * deadline calculation are included. + * + * Returns: the deadline in nanoseconds or -1 if no + * timers are to expire. + */ +int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg); + +/* + * QEMUTimer + */ + +/** + * timer_init_full: + * @ts: the timer to be initialised + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use + * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values + * @cb: the callback to be called when the timer expires + * @opaque: the opaque pointer to be passed to the callback + * + * Initialise a timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). + * The caller is responsible for allocating the memory. + * + * You need not call an explicit deinit call. Simply make + * sure it is not on a list with timer_del. + */ +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque); + +/** + * timer_init: + * @ts: the timer to be initialised + * @type: the clock to associate with the timer + * @scale: the scale value for the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Initialize a timer with the given scale on the default timer list + * associated with the clock. + * See timer_init_full for details. + */ +static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, NULL, type, scale, 0, cb, opaque); +} + +/** + * timer_init_ns: + * @ts: the timer to be initialised + * @type: the clock to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Initialize a timer with nanosecond scale on the default timer list + * associated with the clock. + * See timer_init_full for details. + */ +static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, + QEMUTimerCB *cb, void *opaque) +{ + timer_init(ts, type, SCALE_NS, cb, opaque); +} + +/** + * timer_init_us: + * @ts: the timer to be initialised + * @type: the clock to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Initialize a timer with microsecond scale on the default timer list + * associated with the clock. + * See timer_init_full for details. + */ +static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, + QEMUTimerCB *cb, void *opaque) +{ + timer_init(ts, type, SCALE_US, cb, opaque); +} + +/** + * timer_init_ms: + * @ts: the timer to be initialised + * @type: the clock to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Initialize a timer with millisecond scale on the default timer list + * associated with the clock. + * See timer_init_full for details. + */ +static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, + QEMUTimerCB *cb, void *opaque) +{ + timer_init(ts, type, SCALE_MS, cb, opaque); +} + +/** + * timer_new_full: + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use + * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values + * @cb: the callback to be called when the timer expires + * @opaque: the opaque pointer to be passed to the callback + * + * Create a new timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). + * The memory is allocated by the function. + * + * This is not the preferred interface unless you know you + * are going to call timer_free. Use timer_init or timer_init_full instead. + * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. + * + * Returns: a pointer to the timer + */ +static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); + timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque); + return ts; +} + +/** + * timer_new: + * @type: the clock type to use + * @scale: the scale value for the timer + * @cb: the callback to be called when the timer expires + * @opaque: the opaque pointer to be passed to the callback + * + * Create a new timer with the given scale, + * and associate it with the default timer list for the clock type @type. + * See timer_new_full for details. + * + * Returns: a pointer to the timer + */ +static inline QEMUTimer *timer_new(QEMUClockType type, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(NULL, type, scale, 0, cb, opaque); +} + +/** + * timer_new_ns: + * @type: the clock type to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Create a new timer with nanosecond scale on the default timer list + * associated with the clock. + * See timer_new_full for details. + * + * Returns: a pointer to the newly created timer + */ +static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, + void *opaque) +{ + return timer_new(type, SCALE_NS, cb, opaque); +} + +/** + * timer_new_us: + * @type: the clock type to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Create a new timer with microsecond scale on the default timer list + * associated with the clock. + * See timer_new_full for details. + * + * Returns: a pointer to the newly created timer + */ +static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb, + void *opaque) +{ + return timer_new(type, SCALE_US, cb, opaque); +} + +/** + * timer_new_ms: + * @type: the clock type to associate with the timer + * @cb: the callback to call when the timer expires + * @opaque: the opaque pointer to pass to the callback + * + * Create a new timer with millisecond scale on the default timer list + * associated with the clock. + * See timer_new_full for details. + * + * Returns: a pointer to the newly created timer + */ +static inline QEMUTimer *timer_new_ms(QEMUClockType type, QEMUTimerCB *cb, + void *opaque) +{ + return timer_new(type, SCALE_MS, cb, opaque); +} + +/** + * timer_deinit: + * @ts: the timer to be de-initialised + * + * Deassociate the timer from any timerlist. You should + * call timer_del before. After this call, any further + * timer_del call cannot cause dangling pointer accesses + * even if the previously used timerlist is freed. + */ +void timer_deinit(QEMUTimer *ts); + +/** + * timer_free: + * @ts: the timer + * + * Free a timer (it must not be on the active list) + */ +static inline void timer_free(QEMUTimer *ts) +{ + g_free(ts); +} + +/** + * timer_del: + * @ts: the timer + * + * Delete a timer from the active list. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_del(QEMUTimer *ts); + +/** + * timer_mod_ns: + * @ts: the timer + * @expire_time: the expiry time in nanoseconds + * + * Modify a timer to expire at @expire_time + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); + +/** + * timer_mod_anticipate_ns: + * @ts: the timer + * @expire_time: the expiry time in nanoseconds + * + * Modify a timer to expire at @expire_time or the current time, + * whichever comes earlier. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time); + +/** + * timer_mod: + * @ts: the timer + * @expire_time: the expire time in the units associated with the timer + * + * Modify a timer to expiry at @expire_time, taking into + * account the scale associated with the timer. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod(QEMUTimer *ts, int64_t expire_timer); + +/** + * timer_mod_anticipate: + * @ts: the timer + * @expire_time: the expire time in the units associated with the timer + * + * Modify a timer to expire at @expire_time or the current time, whichever + * comes earlier, taking into account the scale associated with the timer. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time); + +/** + * timer_pending: + * @ts: the timer + * + * Determines whether a timer is pending (i.e. is on the + * active list of timers, whether or not it has not yet expired). + * + * Returns: true if the timer is pending + */ +bool timer_pending(QEMUTimer *ts); + +/** + * timer_expired: + * @ts: the timer + * @current_time: the current time + * + * Determines whether a timer has expired. + * + * Returns: true if the timer has expired + */ +bool timer_expired(QEMUTimer *timer_head, int64_t current_time); + +/** + * timer_expire_time_ns: + * @ts: the timer + * + * Determine the expiry time of a timer + * + * Returns: the expiry time in nanoseconds + */ +uint64_t timer_expire_time_ns(QEMUTimer *ts); + +/** + * timer_get: + * @f: the file + * @ts: the timer + * + * Read a timer @ts from a file @f + */ +void timer_get(QEMUFile *f, QEMUTimer *ts); + +/** + * timer_put: + * @f: the file + * @ts: the timer + */ +void timer_put(QEMUFile *f, QEMUTimer *ts); + +/* + * General utility functions + */ + +/** + * qemu_timeout_ns_to_ms: + * @ns: nanosecond timeout value + * + * Convert a nanosecond timeout value (or -1) to + * a millisecond value (or -1), always rounding up. + * + * Returns: millisecond timeout value + */ +int qemu_timeout_ns_to_ms(int64_t ns); + +/** + * qemu_poll_ns: + * @fds: Array of file descriptors + * @nfds: number of file descriptors + * @timeout: timeout in nanoseconds + * + * Perform a poll like g_poll but with a timeout in nanoseconds. + * See g_poll documentation for further details. + * + * Returns: number of fds ready + */ +int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout); + +/** + * qemu_soonest_timeout: + * @timeout1: first timeout in nanoseconds (or -1 for infinite) + * @timeout2: second timeout in nanoseconds (or -1 for infinite) + * + * Calculates the soonest of two timeout values. -1 means infinite, which + * is later than any other value. + * + * Returns: soonest timeout value in nanoseconds (or -1 for infinite) + */ +static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2) +{ + /* we can abuse the fact that -1 (which means infinite) is a maximal + * value when cast to unsigned. As this is disgusting, it's kept in + * one inline function. + */ + return ((uint64_t) timeout1 < (uint64_t) timeout2) ? timeout1 : timeout2; +} + +/** + * initclocks: + * + * Initialise the clock & timer infrastructure + */ +void init_clocks(QEMUTimerListNotifyCB *notify_cb); + +static inline int64_t get_max_clock_jump(void) +{ + /* This should be small enough to prevent excessive interrupts from being + * generated by the RTC on clock jumps, but large enough to avoid frequent + * unnecessary resets in idle VMs. + */ + return 60 * NANOSECONDS_PER_SECOND; +} + +/* + * Low level clock functions + */ + +/* get host real time in nanosecond */ +static inline int64_t get_clock_realtime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000); +} + +/* Warning: don't insert tracepoints into these functions, they are + also used by simpletrace backend and tracepoints would cause + an infinite recursion! */ +#ifdef _WIN32 +extern int64_t clock_freq; + +static inline int64_t get_clock(void) +{ + LARGE_INTEGER ti; + QueryPerformanceCounter(&ti); + return muldiv64(ti.QuadPart, NANOSECONDS_PER_SECOND, clock_freq); +} + +#else + +extern int use_rt_clock; + +static inline int64_t get_clock(void) +{ + if (use_rt_clock) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } else { + /* XXX: using gettimeofday leads to problems if the date + changes, so it should be avoided. */ + return get_clock_realtime(); + } +} +#endif + +/*******************************************/ +/* host CPU ticks (if available) */ + +#if defined(_ARCH_PPC) + +static inline int64_t cpu_get_host_ticks(void) +{ + int64_t retval; +#ifdef _ARCH_PPC64 + /* This reads timebase in one 64bit go and includes Cell workaround from: + http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html + */ + __asm__ __volatile__ ("mftb %0\n\t" + "cmpwi %0,0\n\t" + "beq- $-8" + : "=r" (retval)); +#else + /* http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html */ + unsigned long junk; + __asm__ __volatile__ ("mfspr %1,269\n\t" /* mftbu */ + "mfspr %L0,268\n\t" /* mftb */ + "mfspr %0,269\n\t" /* mftbu */ + "cmpw %0,%1\n\t" + "bne $-16" + : "=r" (retval), "=r" (junk)); +#endif + return retval; +} + +#elif defined(__i386__) + +static inline int64_t cpu_get_host_ticks(void) +{ + int64_t val; + asm volatile ("rdtsc" : "=A" (val)); + return val; +} + +#elif defined(__x86_64__) + +static inline int64_t cpu_get_host_ticks(void) +{ + uint32_t low,high; + int64_t val; + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + val = high; + val <<= 32; + val |= low; + return val; +} + +#elif defined(__hppa__) + +static inline int64_t cpu_get_host_ticks(void) +{ + int val; + asm volatile ("mfctl %%cr16, %0" : "=r"(val)); + return val; +} + +#elif defined(__s390__) + +static inline int64_t cpu_get_host_ticks(void) +{ + int64_t val; + asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc"); + return val; +} + +#elif defined(__sparc__) + +static inline int64_t cpu_get_host_ticks (void) +{ +#if defined(_LP64) + uint64_t rval; + asm volatile("rd %%tick,%0" : "=r"(rval)); + return rval; +#else + /* We need an %o or %g register for this. For recent enough gcc + there is an "h" constraint for that. Don't bother with that. */ + union { + uint64_t i64; + struct { + uint32_t high; + uint32_t low; + } i32; + } rval; + asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1" + : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1"); + return rval.i64; +#endif +} + +#elif defined(__mips__) && \ + ((defined(__mips_isa_rev) && __mips_isa_rev >= 2) || defined(__linux__)) +/* + * binutils wants to use rdhwr only on mips32r2 + * but as linux kernel emulate it, it's fine + * to use it. + * + */ +#define MIPS_RDHWR(rd, value) { \ + __asm__ __volatile__ (".set push\n\t" \ + ".set mips32r2\n\t" \ + "rdhwr %0, "rd"\n\t" \ + ".set pop" \ + : "=r" (value)); \ + } + +static inline int64_t cpu_get_host_ticks(void) +{ + /* On kernels >= 2.6.25 rdhwr , $2 and $3 are emulated */ + uint32_t count; + static uint32_t cyc_per_count = 0; + + if (!cyc_per_count) { + MIPS_RDHWR("$3", cyc_per_count); + } + + MIPS_RDHWR("$2", count); + return (int64_t)(count * cyc_per_count); +} + +#elif defined(__alpha__) + +static inline int64_t cpu_get_host_ticks(void) +{ + uint64_t cc; + uint32_t cur, ofs; + + asm volatile("rpcc %0" : "=r"(cc)); + cur = cc; + ofs = cc >> 32; + return cur - ofs; +} + +#else +/* The host CPU doesn't have an easily accessible cycle counter. + Just return a monotonically increasing value. This will be + totally wrong, but hopefully better than nothing. */ +static inline int64_t cpu_get_host_ticks(void) +{ + return get_clock(); +} +#endif + +#ifdef CONFIG_PROFILER +static inline int64_t profile_getclock(void) +{ + return get_clock(); +} + +extern int64_t dev_time; +#endif + +#endif diff --git a/include/qemu/tsan.h b/include/qemu/tsan.h new file mode 100644 index 000000000..09cc665f9 --- /dev/null +++ b/include/qemu/tsan.h @@ -0,0 +1,71 @@ +#ifndef QEMU_TSAN_H +#define QEMU_TSAN_H +/* + * tsan.h + * + * This file defines macros used to give ThreadSanitizer + * additional information to help suppress warnings. + * This is necessary since TSan does not provide a header file + * for these annotations. The standard way to include these + * is via the below macros. + * + * Annotation examples can be found here: + * https://github.com/llvm/llvm-project/tree/master/compiler-rt/test/tsan + * annotate_happens_before.cpp or ignore_race.cpp are good places to start. + * + * The full set of annotations can be found here in tsan_interface_ann.cpp. + * https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/ + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifdef CONFIG_TSAN +/* + * Informs TSan of a happens before/after relationship. + */ +#define QEMU_TSAN_ANNOTATE_HAPPENS_BEFORE(addr) \ + AnnotateHappensBefore(__FILE__, __LINE__, (void *)(addr)) +#define QEMU_TSAN_ANNOTATE_HAPPENS_AFTER(addr) \ + AnnotateHappensAfter(__FILE__, __LINE__, (void *)(addr)) +/* + * Gives TSan more information about thread names it can report the + * name of the thread in the warning report. + */ +#define QEMU_TSAN_ANNOTATE_THREAD_NAME(name) \ + AnnotateThreadName(__FILE__, __LINE__, (void *)(name)) +/* + * Allows defining a region of code on which TSan will not record memory READS. + * This has the effect of disabling race detection for this section of code. + */ +#define QEMU_TSAN_ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) +#define QEMU_TSAN_ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) +/* + * Allows defining a region of code on which TSan will not record memory + * WRITES. This has the effect of disabling race detection for this + * section of code. + */ +#define QEMU_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) +#define QEMU_TSAN_ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) +#else +#define QEMU_TSAN_ANNOTATE_HAPPENS_BEFORE(addr) +#define QEMU_TSAN_ANNOTATE_HAPPENS_AFTER(addr) +#define QEMU_TSAN_ANNOTATE_THREAD_NAME(name) +#define QEMU_TSAN_ANNOTATE_IGNORE_READS_BEGIN() +#define QEMU_TSAN_ANNOTATE_IGNORE_READS_END() +#define QEMU_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() +#define QEMU_TSAN_ANNOTATE_IGNORE_WRITES_END() +#endif + +void AnnotateHappensBefore(const char *f, int l, void *addr); +void AnnotateHappensAfter(const char *f, int l, void *addr); +void AnnotateThreadName(const char *f, int l, char *name); +void AnnotateIgnoreReadsBegin(const char *f, int l); +void AnnotateIgnoreReadsEnd(const char *f, int l); +void AnnotateIgnoreWritesBegin(const char *f, int l); +void AnnotateIgnoreWritesEnd(const char *f, int l); +#endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h new file mode 100644 index 000000000..6281eae3b --- /dev/null +++ b/include/qemu/typedefs.h @@ -0,0 +1,141 @@ +#ifndef QEMU_TYPEDEFS_H +#define QEMU_TYPEDEFS_H + +/* + * This header is for selectively avoiding #include just to get a + * typedef name. + * + * Declaring a typedef name in its "obvious" place can result in + * inclusion cycles, in particular for complete struct and union + * types that need more types for their members. It can also result + * in headers pulling in many more headers, slowing down builds. + * + * You can break such cycles and unwanted dependencies by declaring + * the typedef name here. + * + * For struct types used in only a few headers, judicious use of the + * struct tag instead of the typedef name is commonly preferable. + */ + +/* + * Incomplete struct types + * Please keep this list in case-insensitive alphabetical order. + */ +typedef struct AdapterInfo AdapterInfo; +typedef struct AddressSpace AddressSpace; +typedef struct AioContext AioContext; +typedef struct Aml Aml; +typedef struct AnnounceTimer AnnounceTimer; +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter; +typedef struct BlockBackend BlockBackend; +typedef struct BlockBackendRootState BlockBackendRootState; +typedef struct BlockDriverState BlockDriverState; +typedef struct BusClass BusClass; +typedef struct BusState BusState; +typedef struct Chardev Chardev; +typedef struct CompatProperty CompatProperty; +typedef struct CoMutex CoMutex; +typedef struct CPUAddressSpace CPUAddressSpace; +typedef struct CPUState CPUState; +typedef struct DeviceListener DeviceListener; +typedef struct DeviceState DeviceState; +typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot; +typedef struct DisplayChangeListener DisplayChangeListener; +typedef struct DriveInfo DriveInfo; +typedef struct Error Error; +typedef struct EventNotifier EventNotifier; +typedef struct FlatView FlatView; +typedef struct FWCfgEntry FWCfgEntry; +typedef struct FWCfgIoState FWCfgIoState; +typedef struct FWCfgMemState FWCfgMemState; +typedef struct FWCfgState FWCfgState; +typedef struct HostMemoryBackend HostMemoryBackend; +typedef struct I2CBus I2CBus; +typedef struct I2SCodec I2SCodec; +typedef struct IOMMUMemoryRegion IOMMUMemoryRegion; +typedef struct ISABus ISABus; +typedef struct ISADevice ISADevice; +typedef struct IsaDma IsaDma; +typedef struct MACAddr MACAddr; +typedef struct ReservedRegion ReservedRegion; +typedef struct MachineClass MachineClass; +typedef struct MachineState MachineState; +typedef struct MemoryListener MemoryListener; +typedef struct MemoryMappingList MemoryMappingList; +typedef struct MemoryRegion MemoryRegion; +typedef struct MemoryRegionCache MemoryRegionCache; +typedef struct MemoryRegionSection MemoryRegionSection; +typedef struct MigrationIncomingState MigrationIncomingState; +typedef struct MigrationState MigrationState; +typedef struct Monitor Monitor; +typedef struct MonitorDef MonitorDef; +typedef struct MSIMessage MSIMessage; +typedef struct NetClientState NetClientState; +typedef struct NetFilterState NetFilterState; +typedef struct NICInfo NICInfo; +typedef struct NodeInfo NodeInfo; +typedef struct NumaNodeMem NumaNodeMem; +typedef struct Object Object; +typedef struct ObjectClass ObjectClass; +typedef struct PCIBridge PCIBridge; +typedef struct PCIBus PCIBus; +typedef struct PCIDevice PCIDevice; +typedef struct PCIEAERErr PCIEAERErr; +typedef struct PCIEAERLog PCIEAERLog; +typedef struct PCIEAERMsg PCIEAERMsg; +typedef struct PCIEPort PCIEPort; +typedef struct PCIESlot PCIESlot; +typedef struct PCIExpressDevice PCIExpressDevice; +typedef struct PCIExpressHost PCIExpressHost; +typedef struct PCIHostDeviceAddress PCIHostDeviceAddress; +typedef struct PCIHostState PCIHostState; +typedef struct PostcopyDiscardState PostcopyDiscardState; +typedef struct Property Property; +typedef struct PropertyInfo PropertyInfo; +typedef struct QBool QBool; +typedef struct QDict QDict; +typedef struct QEMUBH QEMUBH; +typedef struct QemuConsole QemuConsole; +typedef struct QEMUFile QEMUFile; +typedef struct QemuLockable QemuLockable; +typedef struct QemuMutex QemuMutex; +typedef struct QemuOpt QemuOpt; +typedef struct QemuOpts QemuOpts; +typedef struct QemuOptsList QemuOptsList; +typedef struct QEMUSGList QEMUSGList; +typedef struct QemuSpin QemuSpin; +typedef struct QEMUTimer QEMUTimer; +typedef struct QEMUTimerListGroup QEMUTimerListGroup; +typedef struct QJSON QJSON; +typedef struct QList QList; +typedef struct QNull QNull; +typedef struct QNum QNum; +typedef struct QObject QObject; +typedef struct QString QString; +typedef struct RAMBlock RAMBlock; +typedef struct Range Range; +typedef struct SavedIOTLB SavedIOTLB; +typedef struct SHPCDevice SHPCDevice; +typedef struct SSIBus SSIBus; +typedef struct VirtIODevice VirtIODevice; +typedef struct Visitor Visitor; +typedef struct VMChangeStateEntry VMChangeStateEntry; +typedef struct VMStateDescription VMStateDescription; + +/* + * Pointer types + * Such typedefs should be limited to cases where the typedef's users + * are oblivious of its "pointer-ness". + * Please keep this list in case-insensitive alphabetical order. + */ +typedef struct IRQState *qemu_irq; + +/* + * Function types + */ +typedef void SaveStateHandler(QEMUFile *f, void *opaque); +typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); +typedef void (*qemu_irq_handler)(void *opaque, int n, int level); + +#endif /* QEMU_TYPEDEFS_H */ diff --git a/include/qemu/unicode.h b/include/qemu/unicode.h new file mode 100644 index 000000000..7fa10b8e6 --- /dev/null +++ b/include/qemu/unicode.h @@ -0,0 +1,7 @@ +#ifndef QEMU_UNICODE_H +#define QEMU_UNICODE_H + +int mod_utf8_codepoint(const char *s, size_t n, char **end); +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint); + +#endif diff --git a/include/qemu/units.h b/include/qemu/units.h new file mode 100644 index 000000000..692db3fbb --- /dev/null +++ b/include/qemu/units.h @@ -0,0 +1,20 @@ +/* + * IEC binary prefixes definitions + * + * Copyright (C) 2015 Nikunj A Dadhania, IBM Corporation + * Copyright (C) 2018 Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_UNITS_H +#define QEMU_UNITS_H + +#define KiB (INT64_C(1) << 10) +#define MiB (INT64_C(1) << 20) +#define GiB (INT64_C(1) << 30) +#define TiB (INT64_C(1) << 40) +#define PiB (INT64_C(1) << 50) +#define EiB (INT64_C(1) << 60) + +#endif diff --git a/include/qemu/uri.h b/include/qemu/uri.h new file mode 100644 index 000000000..d201c6126 --- /dev/null +++ b/include/qemu/uri.h @@ -0,0 +1,111 @@ +/** + * Summary: library of generic URI related routines + * Description: library of generic URI related routines + * Implements RFC 2396 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * Author: Daniel Veillard + ** + * Copyright (C) 2007 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones + * + * Utility functions to help parse and assemble query strings. + */ + +#ifndef QEMU_URI_H +#define QEMU_URI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * URI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + */ +typedef struct URI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ + char *query; /* the query string (as it appears in the URI) */ +} URI; + +URI *uri_new(void); +char *uri_resolve(const char *URI, const char *base); +char *uri_resolve_relative(const char *URI, const char *base); +URI *uri_parse(const char *str); +URI *uri_parse_raw(const char *str, int raw); +int uri_parse_into(URI *uri, const char *str); +char *uri_to_string(URI *uri); +char *uri_string_escape(const char *str, const char *list); +char *uri_string_unescape(const char *str, int len, char *target); +void uri_free(URI *uri); + +/* Single web service query parameter 'name=value'. */ +typedef struct QueryParam { + char *name; /* Name (unescaped). */ + char *value; /* Value (unescaped). */ + int ignore; /* Ignore this field in qparam_get_query */ +} QueryParam; + +/* Set of parameters. */ +typedef struct QueryParams { + int n; /* number of parameters used */ + int alloc; /* allocated space */ + QueryParam *p; /* array of parameters */ +} QueryParams; + +struct QueryParams *query_params_new (int init_alloc); +extern QueryParams *query_params_parse (const char *query); +extern void query_params_free (QueryParams *ps); + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_URI_H */ diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h new file mode 100644 index 000000000..9925febfa --- /dev/null +++ b/include/qemu/uuid.h @@ -0,0 +1,87 @@ +/* + * QEMU UUID functions + * + * Copyright 2016 Red Hat, Inc. + * + * Authors: + * Fam Zheng + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef QEMU_UUID_H +#define QEMU_UUID_H + + +/* Version 4 UUID (pseudo random numbers), RFC4122 4.4. */ + +typedef struct { + union { + unsigned char data[16]; + struct { + /* Generated in BE endian, can be swapped with qemu_uuid_bswap. */ + uint32_t time_low; + uint16_t time_mid; + uint16_t time_high_and_version; + uint8_t clock_seq_and_reserved; + uint8_t clock_seq_low; + uint8_t node[6]; + } fields; + }; +} QemuUUID; + +/** + * UUID_LE - converts the fields of UUID to little-endian array, + * each of parameters is the filed of UUID. + * + * @time_low: The low field of the timestamp + * @time_mid: The middle field of the timestamp + * @time_hi_and_version: The high field of the timestamp + * multiplexed with the version number + * @clock_seq_hi_and_reserved: The high field of the clock + * sequence multiplexed with the variant + * @clock_seq_low: The low field of the clock sequence + * @node0: The spatially unique node0 identifier + * @node1: The spatially unique node1 identifier + * @node2: The spatially unique node2 identifier + * @node3: The spatially unique node3 identifier + * @node4: The spatially unique node4 identifier + * @node5: The spatially unique node5 identifier + */ +#define UUID_LE(time_low, time_mid, time_hi_and_version, \ + clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2, \ + node3, node4, node5) \ + { (time_low) & 0xff, ((time_low) >> 8) & 0xff, ((time_low) >> 16) & 0xff, \ + ((time_low) >> 24) & 0xff, (time_mid) & 0xff, ((time_mid) >> 8) & 0xff, \ + (time_hi_and_version) & 0xff, ((time_hi_and_version) >> 8) & 0xff, \ + (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\ + (node3), (node4), (node5) } + +#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \ + "%02hhx%02hhx-%02hhx%02hhx-" \ + "%02hhx%02hhx-" \ + "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" + +#define UUID_FMT_LEN 36 + +#define UUID_NONE "00000000-0000-0000-0000-000000000000" + +void qemu_uuid_generate(QemuUUID *out); + +int qemu_uuid_is_null(const QemuUUID *uu); + +int qemu_uuid_is_equal(const QemuUUID *lhv, const QemuUUID *rhv); + +void qemu_uuid_unparse(const QemuUUID *uuid, char *out); + +char *qemu_uuid_unparse_strdup(const QemuUUID *uuid); + +int qemu_uuid_parse(const char *str, QemuUUID *uuid); + +QemuUUID qemu_uuid_bswap(QemuUUID uuid); + +#endif diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h new file mode 100644 index 000000000..4491c8e1a --- /dev/null +++ b/include/qemu/vfio-helpers.h @@ -0,0 +1,32 @@ +/* + * QEMU VFIO helpers + * + * Copyright 2016 - 2018 Red Hat, Inc. + * + * Authors: + * Fam Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_VFIO_HELPERS_H +#define QEMU_VFIO_HELPERS_H + +typedef struct QEMUVFIOState QEMUVFIOState; + +QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp); +void qemu_vfio_close(QEMUVFIOState *s); +int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, + bool temporary, uint64_t *iova_list); +int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s); +void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host); +void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, + uint64_t offset, uint64_t size, int prot, + Error **errp); +void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar, + uint64_t offset, uint64_t size); +int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e, + int irq_type, Error **errp); + +#endif diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h new file mode 100644 index 000000000..0da4c2cc4 --- /dev/null +++ b/include/qemu/vhost-user-server.h @@ -0,0 +1,65 @@ +/* + * Sharing QEMU devices via vhost-user protocol + * + * Copyright (c) Coiby Xu . + * Copyright (c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef VHOST_USER_SERVER_H +#define VHOST_USER_SERVER_H + +#include "contrib/libvhost-user/libvhost-user.h" +#include "io/channel-socket.h" +#include "io/channel-file.h" +#include "io/net-listener.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "standard-headers/linux/virtio_blk.h" + +/* A kick fd that we monitor on behalf of libvhost-user */ +typedef struct VuFdWatch { + VuDev *vu_dev; + int fd; /*kick fd*/ + void *pvt; + vu_watch_cb cb; + QTAILQ_ENTRY(VuFdWatch) next; +} VuFdWatch; + +/** + * VuServer: + * A vhost-user server instance with user-defined VuDevIface callbacks. + * Vhost-user device backends can be implemented using VuServer. VuDevIface + * callbacks and virtqueue kicks run in the given AioContext. + */ +typedef struct { + QIONetListener *listener; + QEMUBH *restart_listener_bh; + AioContext *ctx; + int max_queues; + const VuDevIface *vu_iface; + + /* Protected by ctx lock */ + VuDev vu_dev; + QIOChannel *ioc; /* The I/O channel with the client */ + QIOChannelSocket *sioc; /* The underlying data channel with the client */ + QTAILQ_HEAD(, VuFdWatch) vu_fd_watches; + + Coroutine *co_trip; /* coroutine for processing VhostUserMsg */ +} VuServer; + +bool vhost_user_server_start(VuServer *server, + SocketAddress *unix_socket, + AioContext *ctx, + uint16_t max_queues, + const VuDevIface *vu_iface, + Error **errp); + +void vhost_user_server_stop(VuServer *server); + +void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); +void vhost_user_server_detach_aio_context(VuServer *server); + +#endif /* VHOST_USER_SERVER_H */ diff --git a/include/qemu/win_dump_defs.h b/include/qemu/win_dump_defs.h new file mode 100644 index 000000000..145096e8e --- /dev/null +++ b/include/qemu/win_dump_defs.h @@ -0,0 +1,179 @@ +/* + * Windows crashdump definitions + * + * Copyright (c) 2018 Virtuozzo International GmbH + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_WIN_DUMP_DEFS_H +#define QEMU_WIN_DUMP_DEFS_H + +typedef struct WinDumpPhyMemRun64 { + uint64_t BasePage; + uint64_t PageCount; +} QEMU_PACKED WinDumpPhyMemRun64; + +typedef struct WinDumpPhyMemDesc64 { + uint32_t NumberOfRuns; + uint32_t unused; + uint64_t NumberOfPages; + WinDumpPhyMemRun64 Run[43]; +} QEMU_PACKED WinDumpPhyMemDesc64; + +typedef struct WinDumpExceptionRecord { + uint32_t ExceptionCode; + uint32_t ExceptionFlags; + uint64_t ExceptionRecord; + uint64_t ExceptionAddress; + uint32_t NumberParameters; + uint32_t unused; + uint64_t ExceptionInformation[15]; +} QEMU_PACKED WinDumpExceptionRecord; + +typedef struct WinDumpHeader64 { + char Signature[4]; + char ValidDump[4]; + uint32_t MajorVersion; + uint32_t MinorVersion; + uint64_t DirectoryTableBase; + uint64_t PfnDatabase; + uint64_t PsLoadedModuleList; + uint64_t PsActiveProcessHead; + uint32_t MachineImageType; + uint32_t NumberProcessors; + union { + struct { + uint32_t BugcheckCode; + uint32_t unused0; + uint64_t BugcheckParameter1; + uint64_t BugcheckParameter2; + uint64_t BugcheckParameter3; + uint64_t BugcheckParameter4; + }; + uint8_t BugcheckData[40]; + }; + uint8_t VersionUser[32]; + uint64_t KdDebuggerDataBlock; + union { + WinDumpPhyMemDesc64 PhysicalMemoryBlock; + uint8_t PhysicalMemoryBlockBuffer[704]; + }; + union { + uint8_t ContextBuffer[3000]; + }; + WinDumpExceptionRecord Exception; + uint32_t DumpType; + uint32_t unused1; + uint64_t RequiredDumpSpace; + uint64_t SystemTime; + char Comment[128]; + uint64_t SystemUpTime; + uint32_t MiniDumpFields; + uint32_t SecondaryDataState; + uint32_t ProductType; + uint32_t SuiteMask; + uint32_t WriterStatus; + uint8_t unused2; + uint8_t KdSecondaryVersion; + uint8_t reserved[4018]; +} QEMU_PACKED WinDumpHeader64; + +#define KDBG_OWNER_TAG_OFFSET64 0x10 +#define KDBG_MM_PFN_DATABASE_OFFSET64 0xC0 +#define KDBG_KI_BUGCHECK_DATA_OFFSET64 0x88 +#define KDBG_KI_PROCESSOR_BLOCK_OFFSET64 0x218 +#define KDBG_OFFSET_PRCB_CONTEXT_OFFSET64 0x338 + +#define VMCOREINFO_ELF_NOTE_HDR_SIZE 24 + +#define WIN_CTX_X64 0x00100000L + +#define WIN_CTX_CTL 0x00000001L +#define WIN_CTX_INT 0x00000002L +#define WIN_CTX_SEG 0x00000004L +#define WIN_CTX_FP 0x00000008L +#define WIN_CTX_DBG 0x00000010L + +#define WIN_CTX_FULL (WIN_CTX_X64 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_FP) +#define WIN_CTX_ALL (WIN_CTX_FULL | WIN_CTX_SEG | WIN_CTX_DBG) + +#define LIVE_SYSTEM_DUMP 0x00000161 + +typedef struct WinM128A { + uint64_t low; + int64_t high; +} QEMU_ALIGNED(16) WinM128A; + +typedef struct WinContext { + uint64_t PHome[6]; + + uint32_t ContextFlags; + uint32_t MxCsr; + + uint16_t SegCs; + uint16_t SegDs; + uint16_t SegEs; + uint16_t SegFs; + uint16_t SegGs; + uint16_t SegSs; + uint32_t EFlags; + + uint64_t Dr0; + uint64_t Dr1; + uint64_t Dr2; + uint64_t Dr3; + uint64_t Dr6; + uint64_t Dr7; + + uint64_t Rax; + uint64_t Rcx; + uint64_t Rdx; + uint64_t Rbx; + uint64_t Rsp; + uint64_t Rbp; + uint64_t Rsi; + uint64_t Rdi; + uint64_t R8; + uint64_t R9; + uint64_t R10; + uint64_t R11; + uint64_t R12; + uint64_t R13; + uint64_t R14; + uint64_t R15; + + uint64_t Rip; + + struct { + uint16_t ControlWord; + uint16_t StatusWord; + uint8_t TagWord; + uint8_t Reserved1; + uint16_t ErrorOpcode; + uint32_t ErrorOffset; + uint16_t ErrorSelector; + uint16_t Reserved2; + uint32_t DataOffset; + uint16_t DataSelector; + uint16_t Reserved3; + uint32_t MxCsr; + uint32_t MxCsr_Mask; + WinM128A FloatRegisters[8]; + WinM128A XmmRegisters[16]; + uint8_t Reserved4[96]; + } FltSave; + + WinM128A VectorRegister[26]; + uint64_t VectorControl; + + uint64_t DebugControl; + uint64_t LastBranchToRip; + uint64_t LastBranchFromRip; + uint64_t LastExceptionToRip; + uint64_t LastExceptionFromRip; +} QEMU_ALIGNED(16) WinContext; + +#endif /* QEMU_WIN_DUMP_DEFS_H */ diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h new file mode 100644 index 000000000..a83fe8e74 --- /dev/null +++ b/include/qemu/xattr.h @@ -0,0 +1,29 @@ +/* + * Host xattr.h abstraction + * + * Copyright 2011 Red Hat Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2, or any + * later version. See the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_XATTR_H +#define QEMU_XATTR_H + +/* + * Modern distributions (e.g. Fedora 15), have no libattr.so, place attr.h + * in /usr/include/sys, and don't have ENOATTR. + */ + + +#ifdef CONFIG_LIBATTR +# include +#else +# define ENOATTR ENODATA +# include +#endif + +#endif diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h new file mode 100644 index 000000000..076f1f605 --- /dev/null +++ b/include/qemu/xxhash.h @@ -0,0 +1,122 @@ +/* + * xxHash - Fast Hash algorithm + * Copyright (C) 2012-2016, Yann Collet + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * + Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + */ + +#ifndef QEMU_XXHASH_H +#define QEMU_XXHASH_H + +#include "qemu/bitops.h" + +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + +#define QEMU_XXHASH_SEED 1 + +/* + * xxhash32, customized for input variables that are not guaranteed to be + * contiguous in memory. + */ +static inline uint32_t +qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) +{ + uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2; + uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; + uint32_t v3 = QEMU_XXHASH_SEED + 0; + uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1; + uint32_t a = ab; + uint32_t b = ab >> 32; + uint32_t c = cd; + uint32_t d = cd >> 32; + uint32_t h32; + + v1 += a * PRIME32_2; + v1 = rol32(v1, 13); + v1 *= PRIME32_1; + + v2 += b * PRIME32_2; + v2 = rol32(v2, 13); + v2 *= PRIME32_1; + + v3 += c * PRIME32_2; + v3 = rol32(v3, 13); + v3 *= PRIME32_1; + + v4 += d * PRIME32_2; + v4 = rol32(v4, 13); + v4 *= PRIME32_1; + + h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18); + h32 += 28; + + h32 += e * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + + h32 += f * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + + h32 += g * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + +static inline uint32_t qemu_xxhash2(uint64_t ab) +{ + return qemu_xxhash7(ab, 0, 0, 0, 0); +} + +static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd) +{ + return qemu_xxhash7(ab, cd, 0, 0, 0); +} + +static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e) +{ + return qemu_xxhash7(ab, cd, e, 0, 0); +} + +static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, + uint32_t f) +{ + return qemu_xxhash7(ab, cd, e, f, 0); +} + +#endif /* QEMU_XXHASH_H */ diff --git a/include/qom/object.h b/include/qom/object.h new file mode 100644 index 000000000..d378f13a1 --- /dev/null +++ b/include/qom/object.h @@ -0,0 +1,1940 @@ +/* + * QEMU Object Model + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_OBJECT_H +#define QEMU_OBJECT_H + +#include "qapi/qapi-builtin-types.h" +#include "qemu/module.h" +#include "qom/object.h" + +struct TypeImpl; +typedef struct TypeImpl *Type; + +typedef struct TypeInfo TypeInfo; + +typedef struct InterfaceClass InterfaceClass; +typedef struct InterfaceInfo InterfaceInfo; + +#define TYPE_OBJECT "object" + +typedef struct ObjectProperty ObjectProperty; + +/** + * typedef ObjectPropertyAccessor: + * @obj: the object that owns the property + * @v: the visitor that contains the property data + * @name: the name of the property + * @opaque: the object property opaque + * @errp: a pointer to an Error that is filled if getting/setting fails. + * + * Called when trying to get/set a property. + */ +typedef void (ObjectPropertyAccessor)(Object *obj, + Visitor *v, + const char *name, + void *opaque, + Error **errp); + +/** + * typedef ObjectPropertyResolve: + * @obj: the object that owns the property + * @opaque: the opaque registered with the property + * @part: the name of the property + * + * Resolves the #Object corresponding to property @part. + * + * The returned object can also be used as a starting point + * to resolve a relative path starting with "@part". + * + * Returns: If @path is the path that led to @obj, the function + * returns the #Object corresponding to "@path/@part". + * If "@path/@part" is not a valid object path, it returns #NULL. + */ +typedef Object *(ObjectPropertyResolve)(Object *obj, + void *opaque, + const char *part); + +/** + * typedef ObjectPropertyRelease: + * @obj: the object that owns the property + * @name: the name of the property + * @opaque: the opaque registered with the property + * + * Called when a property is removed from a object. + */ +typedef void (ObjectPropertyRelease)(Object *obj, + const char *name, + void *opaque); + +/** + * typedef ObjectPropertyInit: + * @obj: the object that owns the property + * @prop: the property to set + * + * Called when a property is initialized. + */ +typedef void (ObjectPropertyInit)(Object *obj, ObjectProperty *prop); + +struct ObjectProperty +{ + char *name; + char *type; + char *description; + ObjectPropertyAccessor *get; + ObjectPropertyAccessor *set; + ObjectPropertyResolve *resolve; + ObjectPropertyRelease *release; + ObjectPropertyInit *init; + void *opaque; + QObject *defval; +}; + +/** + * typedef ObjectUnparent: + * @obj: the object that is being removed from the composition tree + * + * Called when an object is being removed from the QOM composition tree. + * The function should remove any backlinks from children objects to @obj. + */ +typedef void (ObjectUnparent)(Object *obj); + +/** + * typedef ObjectFree: + * @obj: the object being freed + * + * Called when an object's last reference is removed. + */ +typedef void (ObjectFree)(void *obj); + +#define OBJECT_CLASS_CAST_CACHE 4 + +/** + * struct ObjectClass: + * + * The base for all classes. The only thing that #ObjectClass contains is an + * integer type handle. + */ +struct ObjectClass +{ + /* private: */ + Type type; + GSList *interfaces; + + const char *object_cast_cache[OBJECT_CLASS_CAST_CACHE]; + const char *class_cast_cache[OBJECT_CLASS_CAST_CACHE]; + + ObjectUnparent *unparent; + + GHashTable *properties; +}; + +/** + * struct Object: + * + * The base for all objects. The first member of this object is a pointer to + * a #ObjectClass. Since C guarantees that the first member of a structure + * always begins at byte 0 of that structure, as long as any sub-object places + * its parent as the first member, we can cast directly to a #Object. + * + * As a result, #Object contains a reference to the objects type as its + * first member. This allows identification of the real type of the object at + * run time. + */ +struct Object +{ + /* private: */ + ObjectClass *class; + ObjectFree *free; + GHashTable *properties; + uint32_t ref; + Object *parent; +}; + +/** + * DECLARE_INSTANCE_CHECKER: + * @InstanceType: instance struct name + * @OBJ_NAME: the object name in uppercase with underscore separators + * @TYPENAME: type name + * + * Direct usage of this macro should be avoided, and the complete + * OBJECT_DECLARE_TYPE macro is recommended instead. + * + * This macro will provide the instance type cast functions for a + * QOM type. + */ +#define DECLARE_INSTANCE_CHECKER(InstanceType, OBJ_NAME, TYPENAME) \ + static inline G_GNUC_UNUSED InstanceType * \ + OBJ_NAME(const void *obj) \ + { return OBJECT_CHECK(InstanceType, obj, TYPENAME); } + +/** + * DECLARE_CLASS_CHECKERS: + * @ClassType: class struct name + * @OBJ_NAME: the object name in uppercase with underscore separators + * @TYPENAME: type name + * + * Direct usage of this macro should be avoided, and the complete + * OBJECT_DECLARE_TYPE macro is recommended instead. + * + * This macro will provide the class type cast functions for a + * QOM type. + */ +#define DECLARE_CLASS_CHECKERS(ClassType, OBJ_NAME, TYPENAME) \ + static inline G_GNUC_UNUSED ClassType * \ + OBJ_NAME##_GET_CLASS(const void *obj) \ + { return OBJECT_GET_CLASS(ClassType, obj, TYPENAME); } \ + \ + static inline G_GNUC_UNUSED ClassType * \ + OBJ_NAME##_CLASS(const void *klass) \ + { return OBJECT_CLASS_CHECK(ClassType, klass, TYPENAME); } + +/** + * DECLARE_OBJ_CHECKERS: + * @InstanceType: instance struct name + * @ClassType: class struct name + * @OBJ_NAME: the object name in uppercase with underscore separators + * @TYPENAME: type name + * + * Direct usage of this macro should be avoided, and the complete + * OBJECT_DECLARE_TYPE macro is recommended instead. + * + * This macro will provide the three standard type cast functions for a + * QOM type. + */ +#define DECLARE_OBJ_CHECKERS(InstanceType, ClassType, OBJ_NAME, TYPENAME) \ + DECLARE_INSTANCE_CHECKER(InstanceType, OBJ_NAME, TYPENAME) \ + \ + DECLARE_CLASS_CHECKERS(ClassType, OBJ_NAME, TYPENAME) + +/** + * OBJECT_DECLARE_TYPE: + * @InstanceType: instance struct name + * @ClassType: class struct name + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * + * This macro is typically used in a header file, and will: + * + * - create the typedefs for the object and class structs + * - register the type for use with g_autoptr + * - provide three standard type cast functions + * + * The object struct and class struct need to be declared manually. + */ +#define OBJECT_DECLARE_TYPE(InstanceType, ClassType, MODULE_OBJ_NAME) \ + typedef struct InstanceType InstanceType; \ + typedef struct ClassType ClassType; \ + \ + G_DEFINE_AUTOPTR_CLEANUP_FUNC(InstanceType, object_unref) \ + \ + DECLARE_OBJ_CHECKERS(InstanceType, ClassType, \ + MODULE_OBJ_NAME, TYPE_##MODULE_OBJ_NAME) + +/** + * OBJECT_DECLARE_SIMPLE_TYPE: + * @InstanceType: instance struct name + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * + * This does the same as OBJECT_DECLARE_TYPE(), but with no class struct + * declared. + * + * This macro should be used unless the class struct needs to have + * virtual methods declared. + */ +#define OBJECT_DECLARE_SIMPLE_TYPE(InstanceType, MODULE_OBJ_NAME) \ + typedef struct InstanceType InstanceType; \ + \ + G_DEFINE_AUTOPTR_CLEANUP_FUNC(InstanceType, object_unref) \ + \ + DECLARE_INSTANCE_CHECKER(InstanceType, MODULE_OBJ_NAME, TYPE_##MODULE_OBJ_NAME) + + +/** + * OBJECT_DEFINE_TYPE_EXTENDED: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * @ABSTRACT: boolean flag to indicate whether the object can be instantiated + * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces + * + * This macro is typically used in a source file, and will: + * + * - declare prototypes for _finalize, _class_init and _init methods + * - declare the TypeInfo struct instance + * - provide the constructor to register the type + * + * After using this macro, implementations of the _finalize, _class_init, + * and _init methods need to be written. Any of these can be zero-line + * no-op impls if no special logic is required for a given type. + * + * This macro should rarely be used, instead one of the more specialized + * macros is usually a better choice. + */ +#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + ABSTRACT, ...) \ + static void \ + module_obj_name##_finalize(Object *obj); \ + static void \ + module_obj_name##_class_init(ObjectClass *oc, void *data); \ + static void \ + module_obj_name##_init(Object *obj); \ + \ + static const TypeInfo module_obj_name##_info = { \ + .parent = TYPE_##PARENT_MODULE_OBJ_NAME, \ + .name = TYPE_##MODULE_OBJ_NAME, \ + .instance_size = sizeof(ModuleObjName), \ + .instance_align = __alignof__(ModuleObjName), \ + .instance_init = module_obj_name##_init, \ + .instance_finalize = module_obj_name##_finalize, \ + .class_size = sizeof(ModuleObjName##Class), \ + .class_init = module_obj_name##_class_init, \ + .abstract = ABSTRACT, \ + .interfaces = (InterfaceInfo[]) { __VA_ARGS__ } , \ + }; \ + \ + static void \ + module_obj_name##_register_types(void) \ + { \ + type_register_static(&module_obj_name##_info); \ + } \ + type_init(module_obj_name##_register_types); + +/** + * OBJECT_DEFINE_TYPE: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * + * This is a specialization of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable + * for the common case of a non-abstract type, without any interfaces. + */ +#define OBJECT_DEFINE_TYPE(ModuleObjName, module_obj_name, MODULE_OBJ_NAME, \ + PARENT_MODULE_OBJ_NAME) \ + OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + false, { NULL }) + +/** + * OBJECT_DEFINE_TYPE_WITH_INTERFACES: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces + * + * This is a specialization of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable + * for the common case of a non-abstract type, with one or more implemented + * interfaces. + * + * Note when passing the list of interfaces, be sure to include the final + * NULL entry, e.g. { TYPE_USER_CREATABLE }, { NULL } + */ +#define OBJECT_DEFINE_TYPE_WITH_INTERFACES(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, \ + PARENT_MODULE_OBJ_NAME, ...) \ + OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + false, __VA_ARGS__) + +/** + * OBJECT_DEFINE_ABSTRACT_TYPE: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * + * This is a specialization of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable + * for defining an abstract type, without any interfaces. + */ +#define OBJECT_DEFINE_ABSTRACT_TYPE(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME) \ + OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + true, { NULL }) + +/** + * struct TypeInfo: + * @name: The name of the type. + * @parent: The name of the parent type. + * @instance_size: The size of the object (derivative of #Object). If + * @instance_size is 0, then the size of the object will be the size of the + * parent object. + * @instance_align: The required alignment of the object. If @instance_align + * is 0, then normal malloc alignment is sufficient; if non-zero, then we + * must use qemu_memalign for allocation. + * @instance_init: This function is called to initialize an object. The parent + * class will have already been initialized so the type is only responsible + * for initializing its own members. + * @instance_post_init: This function is called to finish initialization of + * an object, after all @instance_init functions were called. + * @instance_finalize: This function is called during object destruction. This + * is called before the parent @instance_finalize function has been called. + * An object should only free the members that are unique to its type in this + * function. + * @abstract: If this field is true, then the class is considered abstract and + * cannot be directly instantiated. + * @class_size: The size of the class object (derivative of #ObjectClass) + * for this object. If @class_size is 0, then the size of the class will be + * assumed to be the size of the parent class. This allows a type to avoid + * implementing an explicit class type if they are not adding additional + * virtual functions. + * @class_init: This function is called after all parent class initialization + * has occurred to allow a class to set its default virtual method pointers. + * This is also the function to use to override virtual methods from a parent + * class. + * @class_base_init: This function is called for all base classes after all + * parent class initialization has occurred, but before the class itself + * is initialized. This is the function to use to undo the effects of + * memcpy from the parent class to the descendants. + * @class_data: Data to pass to the @class_init, + * @class_base_init. This can be useful when building dynamic + * classes. + * @interfaces: The list of interfaces associated with this type. This + * should point to a static array that's terminated with a zero filled + * element. + */ +struct TypeInfo +{ + const char *name; + const char *parent; + + size_t instance_size; + size_t instance_align; + void (*instance_init)(Object *obj); + void (*instance_post_init)(Object *obj); + void (*instance_finalize)(Object *obj); + + bool abstract; + size_t class_size; + + void (*class_init)(ObjectClass *klass, void *data); + void (*class_base_init)(ObjectClass *klass, void *data); + void *class_data; + + InterfaceInfo *interfaces; +}; + +/** + * OBJECT: + * @obj: A derivative of #Object + * + * Converts an object to a #Object. Since all objects are #Objects, + * this function will always succeed. + */ +#define OBJECT(obj) \ + ((Object *)(obj)) + +/** + * OBJECT_CLASS: + * @class: A derivative of #ObjectClass. + * + * Converts a class to an #ObjectClass. Since all objects are #Objects, + * this function will always succeed. + */ +#define OBJECT_CLASS(class) \ + ((ObjectClass *)(class)) + +/** + * OBJECT_CHECK: + * @type: The C type to use for the return value. + * @obj: A derivative of @type to cast. + * @name: The QOM typename of @type + * + * A type safe version of @object_dynamic_cast_assert. Typically each class + * will define a macro based on this type to perform type safe dynamic_casts to + * this object type. + * + * If an invalid object is passed to this function, a run time assert will be + * generated. + */ +#define OBJECT_CHECK(type, obj, name) \ + ((type *)object_dynamic_cast_assert(OBJECT(obj), (name), \ + __FILE__, __LINE__, __func__)) + +/** + * OBJECT_CLASS_CHECK: + * @class_type: The C type to use for the return value. + * @class: A derivative class of @class_type to cast. + * @name: the QOM typename of @class_type. + * + * A type safe version of @object_class_dynamic_cast_assert. This macro is + * typically wrapped by each type to perform type safe casts of a class to a + * specific class type. + */ +#define OBJECT_CLASS_CHECK(class_type, class, name) \ + ((class_type *)object_class_dynamic_cast_assert(OBJECT_CLASS(class), (name), \ + __FILE__, __LINE__, __func__)) + +/** + * OBJECT_GET_CLASS: + * @class: The C type to use for the return value. + * @obj: The object to obtain the class for. + * @name: The QOM typename of @obj. + * + * This function will return a specific class for a given object. Its generally + * used by each type to provide a type safe macro to get a specific class type + * from an object. + */ +#define OBJECT_GET_CLASS(class, obj, name) \ + OBJECT_CLASS_CHECK(class, object_get_class(OBJECT(obj)), name) + +/** + * struct InterfaceInfo: + * @type: The name of the interface. + * + * The information associated with an interface. + */ +struct InterfaceInfo { + const char *type; +}; + +/** + * struct InterfaceClass: + * @parent_class: the base class + * + * The class for all interfaces. Subclasses of this class should only add + * virtual methods. + */ +struct InterfaceClass +{ + ObjectClass parent_class; + /* private: */ + ObjectClass *concrete_class; + Type interface_type; +}; + +#define TYPE_INTERFACE "interface" + +/** + * INTERFACE_CLASS: + * @klass: class to cast from + * Returns: An #InterfaceClass or raise an error if cast is invalid + */ +#define INTERFACE_CLASS(klass) \ + OBJECT_CLASS_CHECK(InterfaceClass, klass, TYPE_INTERFACE) + +/** + * INTERFACE_CHECK: + * @interface: the type to return + * @obj: the object to convert to an interface + * @name: the interface type name + * + * Returns: @obj casted to @interface if cast is valid, otherwise raise error. + */ +#define INTERFACE_CHECK(interface, obj, name) \ + ((interface *)object_dynamic_cast_assert(OBJECT((obj)), (name), \ + __FILE__, __LINE__, __func__)) + +/** + * object_new_with_class: + * @klass: The class to instantiate. + * + * This function will initialize a new object using heap allocated memory. + * The returned object has a reference count of 1, and will be freed when + * the last reference is dropped. + * + * Returns: The newly allocated and instantiated object. + */ +Object *object_new_with_class(ObjectClass *klass); + +/** + * object_new: + * @typename: The name of the type of the object to instantiate. + * + * This function will initialize a new object using heap allocated memory. + * The returned object has a reference count of 1, and will be freed when + * the last reference is dropped. + * + * Returns: The newly allocated and instantiated object. + */ +Object *object_new(const char *typename); + +/** + * object_new_with_props: + * @typename: The name of the type of the object to instantiate. + * @parent: the parent object + * @id: The unique ID of the object + * @errp: pointer to error object + * @...: list of property names and values + * + * This function will initialize a new object using heap allocated memory. + * The returned object has a reference count of 1, and will be freed when + * the last reference is dropped. + * + * The @id parameter will be used when registering the object as a + * child of @parent in the composition tree. + * + * The variadic parameters are a list of pairs of (propname, propvalue) + * strings. The propname of %NULL indicates the end of the property + * list. If the object implements the user creatable interface, the + * object will be marked complete once all the properties have been + * processed. + * + * .. code-block:: c + * :caption: Creating an object with properties + * + * Error *err = NULL; + * Object *obj; + * + * obj = object_new_with_props(TYPE_MEMORY_BACKEND_FILE, + * object_get_objects_root(), + * "hostmem0", + * &err, + * "share", "yes", + * "mem-path", "/dev/shm/somefile", + * "prealloc", "yes", + * "size", "1048576", + * NULL); + * + * if (!obj) { + * error_reportf_err(err, "Cannot create memory backend: "); + * } + * + * The returned object will have one stable reference maintained + * for as long as it is present in the object hierarchy. + * + * Returns: The newly allocated, instantiated & initialized object. + */ +Object *object_new_with_props(const char *typename, + Object *parent, + const char *id, + Error **errp, + ...) QEMU_SENTINEL; + +/** + * object_new_with_propv: + * @typename: The name of the type of the object to instantiate. + * @parent: the parent object + * @id: The unique ID of the object + * @errp: pointer to error object + * @vargs: list of property names and values + * + * See object_new_with_props() for documentation. + */ +Object *object_new_with_propv(const char *typename, + Object *parent, + const char *id, + Error **errp, + va_list vargs); + +bool object_apply_global_props(Object *obj, const GPtrArray *props, + Error **errp); +void object_set_machine_compat_props(GPtrArray *compat_props); +void object_set_accelerator_compat_props(GPtrArray *compat_props); +void object_register_sugar_prop(const char *driver, const char *prop, const char *value); +void object_apply_compat_props(Object *obj); + +/** + * object_set_props: + * @obj: the object instance to set properties on + * @errp: pointer to error object + * @...: list of property names and values + * + * This function will set a list of properties on an existing object + * instance. + * + * The variadic parameters are a list of pairs of (propname, propvalue) + * strings. The propname of %NULL indicates the end of the property + * list. + * + * .. code-block:: c + * :caption: Update an object's properties + * + * Error *err = NULL; + * Object *obj = ...get / create object...; + * + * if (!object_set_props(obj, + * &err, + * "share", "yes", + * "mem-path", "/dev/shm/somefile", + * "prealloc", "yes", + * "size", "1048576", + * NULL)) { + * error_reportf_err(err, "Cannot set properties: "); + * } + * + * The returned object will have one stable reference maintained + * for as long as it is present in the object hierarchy. + * + * Returns: %true on success, %false on error. + */ +bool object_set_props(Object *obj, Error **errp, ...) QEMU_SENTINEL; + +/** + * object_set_propv: + * @obj: the object instance to set properties on + * @errp: pointer to error object + * @vargs: list of property names and values + * + * See object_set_props() for documentation. + * + * Returns: %true on success, %false on error. + */ +bool object_set_propv(Object *obj, Error **errp, va_list vargs); + +/** + * object_initialize: + * @obj: A pointer to the memory to be used for the object. + * @size: The maximum size available at @obj for the object. + * @typename: The name of the type of the object to instantiate. + * + * This function will initialize an object. The memory for the object should + * have already been allocated. The returned object has a reference count of 1, + * and will be finalized when the last reference is dropped. + */ +void object_initialize(void *obj, size_t size, const char *typename); + +/** + * object_initialize_child_with_props: + * @parentobj: The parent object to add a property to + * @propname: The name of the property + * @childobj: A pointer to the memory to be used for the object. + * @size: The maximum size available at @childobj for the object. + * @type: The name of the type of the object to instantiate. + * @errp: If an error occurs, a pointer to an area to store the error + * @...: list of property names and values + * + * This function will initialize an object. The memory for the object should + * have already been allocated. The object will then be added as child property + * to a parent with object_property_add_child() function. The returned object + * has a reference count of 1 (for the "child<...>" property from the parent), + * so the object will be finalized automatically when the parent gets removed. + * + * The variadic parameters are a list of pairs of (propname, propvalue) + * strings. The propname of %NULL indicates the end of the property list. + * If the object implements the user creatable interface, the object will + * be marked complete once all the properties have been processed. + * + * Returns: %true on success, %false on failure. + */ +bool object_initialize_child_with_props(Object *parentobj, + const char *propname, + void *childobj, size_t size, const char *type, + Error **errp, ...) QEMU_SENTINEL; + +/** + * object_initialize_child_with_propsv: + * @parentobj: The parent object to add a property to + * @propname: The name of the property + * @childobj: A pointer to the memory to be used for the object. + * @size: The maximum size available at @childobj for the object. + * @type: The name of the type of the object to instantiate. + * @errp: If an error occurs, a pointer to an area to store the error + * @vargs: list of property names and values + * + * See object_initialize_child() for documentation. + * + * Returns: %true on success, %false on failure. + */ +bool object_initialize_child_with_propsv(Object *parentobj, + const char *propname, + void *childobj, size_t size, const char *type, + Error **errp, va_list vargs); + +/** + * object_initialize_child: + * @parent: The parent object to add a property to + * @propname: The name of the property + * @child: A precisely typed pointer to the memory to be used for the + * object. + * @type: The name of the type of the object to instantiate. + * + * This is like:: + * + * object_initialize_child_with_props(parent, propname, + * child, sizeof(*child), type, + * &error_abort, NULL) + */ +#define object_initialize_child(parent, propname, child, type) \ + object_initialize_child_internal((parent), (propname), \ + (child), sizeof(*(child)), (type)) +void object_initialize_child_internal(Object *parent, const char *propname, + void *child, size_t size, + const char *type); + +/** + * object_dynamic_cast: + * @obj: The object to cast. + * @typename: The @typename to cast to. + * + * This function will determine if @obj is-a @typename. @obj can refer to an + * object or an interface associated with an object. + * + * Returns: This function returns @obj on success or #NULL on failure. + */ +Object *object_dynamic_cast(Object *obj, const char *typename); + +/** + * object_dynamic_cast_assert: + * @obj: The object to cast. + * @typename: The @typename to cast to. + * @file: Source code file where function was called + * @line: Source code line where function was called + * @func: Name of function where this function was called + * + * See object_dynamic_cast() for a description of the parameters of this + * function. The only difference in behavior is that this function asserts + * instead of returning #NULL on failure if QOM cast debugging is enabled. + * This function is not meant to be called directly, but only through + * the wrapper macro OBJECT_CHECK. + */ +Object *object_dynamic_cast_assert(Object *obj, const char *typename, + const char *file, int line, const char *func); + +/** + * object_get_class: + * @obj: A derivative of #Object + * + * Returns: The #ObjectClass of the type associated with @obj. + */ +ObjectClass *object_get_class(Object *obj); + +/** + * object_get_typename: + * @obj: A derivative of #Object. + * + * Returns: The QOM typename of @obj. + */ +const char *object_get_typename(const Object *obj); + +/** + * type_register_static: + * @info: The #TypeInfo of the new type. + * + * @info and all of the strings it points to should exist for the life time + * that the type is registered. + * + * Returns: the new #Type. + */ +Type type_register_static(const TypeInfo *info); + +/** + * type_register: + * @info: The #TypeInfo of the new type + * + * Unlike type_register_static(), this call does not require @info or its + * string members to continue to exist after the call returns. + * + * Returns: the new #Type. + */ +Type type_register(const TypeInfo *info); + +/** + * type_register_static_array: + * @infos: The array of the new type #TypeInfo structures. + * @nr_infos: number of entries in @infos + * + * @infos and all of the strings it points to should exist for the life time + * that the type is registered. + */ +void type_register_static_array(const TypeInfo *infos, int nr_infos); + +/** + * DEFINE_TYPES: + * @type_array: The array containing #TypeInfo structures to register + * + * @type_array should be static constant that exists for the life time + * that the type is registered. + */ +#define DEFINE_TYPES(type_array) \ +static void do_qemu_init_ ## type_array(void) \ +{ \ + type_register_static_array(type_array, ARRAY_SIZE(type_array)); \ +} \ +type_init(do_qemu_init_ ## type_array) + +/** + * object_class_dynamic_cast_assert: + * @klass: The #ObjectClass to attempt to cast. + * @typename: The QOM typename of the class to cast to. + * @file: Source code file where function was called + * @line: Source code line where function was called + * @func: Name of function where this function was called + * + * See object_class_dynamic_cast() for a description of the parameters + * of this function. The only difference in behavior is that this function + * asserts instead of returning #NULL on failure if QOM cast debugging is + * enabled. This function is not meant to be called directly, but only through + * the wrapper macro OBJECT_CLASS_CHECK. + */ +ObjectClass *object_class_dynamic_cast_assert(ObjectClass *klass, + const char *typename, + const char *file, int line, + const char *func); + +/** + * object_class_dynamic_cast: + * @klass: The #ObjectClass to attempt to cast. + * @typename: The QOM typename of the class to cast to. + * + * Returns: If @typename is a class, this function returns @klass if + * @typename is a subtype of @klass, else returns #NULL. + * + * If @typename is an interface, this function returns the interface + * definition for @klass if @klass implements it unambiguously; #NULL + * is returned if @klass does not implement the interface or if multiple + * classes or interfaces on the hierarchy leading to @klass implement + * it. (FIXME: perhaps this can be detected at type definition time?) + */ +ObjectClass *object_class_dynamic_cast(ObjectClass *klass, + const char *typename); + +/** + * object_class_get_parent: + * @klass: The class to obtain the parent for. + * + * Returns: The parent for @klass or %NULL if none. + */ +ObjectClass *object_class_get_parent(ObjectClass *klass); + +/** + * object_class_get_name: + * @klass: The class to obtain the QOM typename for. + * + * Returns: The QOM typename for @klass. + */ +const char *object_class_get_name(ObjectClass *klass); + +/** + * object_class_is_abstract: + * @klass: The class to obtain the abstractness for. + * + * Returns: %true if @klass is abstract, %false otherwise. + */ +bool object_class_is_abstract(ObjectClass *klass); + +/** + * object_class_by_name: + * @typename: The QOM typename to obtain the class for. + * + * Returns: The class for @typename or %NULL if not found. + */ +ObjectClass *object_class_by_name(const char *typename); + +/** + * module_object_class_by_name: + * @typename: The QOM typename to obtain the class for. + * + * For objects which might be provided by a module. Behaves like + * object_class_by_name, but additionally tries to load the module + * needed in case the class is not available. + * + * Returns: The class for @typename or %NULL if not found. + */ +ObjectClass *module_object_class_by_name(const char *typename); + +void object_class_foreach(void (*fn)(ObjectClass *klass, void *opaque), + const char *implements_type, bool include_abstract, + void *opaque); + +/** + * object_class_get_list: + * @implements_type: The type to filter for, including its derivatives. + * @include_abstract: Whether to include abstract classes. + * + * Returns: A singly-linked list of the classes in reverse hashtable order. + */ +GSList *object_class_get_list(const char *implements_type, + bool include_abstract); + +/** + * object_class_get_list_sorted: + * @implements_type: The type to filter for, including its derivatives. + * @include_abstract: Whether to include abstract classes. + * + * Returns: A singly-linked list of the classes in alphabetical + * case-insensitive order. + */ +GSList *object_class_get_list_sorted(const char *implements_type, + bool include_abstract); + +/** + * object_ref: + * @obj: the object + * + * Increase the reference count of a object. A object cannot be freed as long + * as its reference count is greater than zero. + * Returns: @obj + */ +Object *object_ref(void *obj); + +/** + * object_unref: + * @obj: the object + * + * Decrease the reference count of a object. A object cannot be freed as long + * as its reference count is greater than zero. + */ +void object_unref(void *obj); + +/** + * object_property_try_add: + * @obj: the object to add a property to + * @name: the name of the property. This can contain any character except for + * a forward slash. In general, you should use hyphens '-' instead of + * underscores '_' when naming properties. + * @type: the type name of the property. This namespace is pretty loosely + * defined. Sub namespaces are constructed by using a prefix and then + * to angle brackets. For instance, the type 'virtio-net-pci' in the + * 'link' namespace would be 'link'. + * @get: The getter to be called to read a property. If this is NULL, then + * the property cannot be read. + * @set: the setter to be called to write a property. If this is NULL, + * then the property cannot be written. + * @release: called when the property is removed from the object. This is + * meant to allow a property to free its opaque upon object + * destruction. This may be NULL. + * @opaque: an opaque pointer to pass to the callbacks for the property + * @errp: pointer to error object + * + * Returns: The #ObjectProperty; this can be used to set the @resolve + * callback for child and link properties. + */ +ObjectProperty *object_property_try_add(Object *obj, const char *name, + const char *type, + ObjectPropertyAccessor *get, + ObjectPropertyAccessor *set, + ObjectPropertyRelease *release, + void *opaque, Error **errp); + +/** + * object_property_add: + * Same as object_property_try_add() with @errp hardcoded to + * &error_abort. + * + * @obj: the object to add a property to + * @name: the name of the property. This can contain any character except for + * a forward slash. In general, you should use hyphens '-' instead of + * underscores '_' when naming properties. + * @type: the type name of the property. This namespace is pretty loosely + * defined. Sub namespaces are constructed by using a prefix and then + * to angle brackets. For instance, the type 'virtio-net-pci' in the + * 'link' namespace would be 'link'. + * @get: The getter to be called to read a property. If this is NULL, then + * the property cannot be read. + * @set: the setter to be called to write a property. If this is NULL, + * then the property cannot be written. + * @release: called when the property is removed from the object. This is + * meant to allow a property to free its opaque upon object + * destruction. This may be NULL. + * @opaque: an opaque pointer to pass to the callbacks for the property + */ +ObjectProperty *object_property_add(Object *obj, const char *name, + const char *type, + ObjectPropertyAccessor *get, + ObjectPropertyAccessor *set, + ObjectPropertyRelease *release, + void *opaque); + +void object_property_del(Object *obj, const char *name); + +ObjectProperty *object_class_property_add(ObjectClass *klass, const char *name, + const char *type, + ObjectPropertyAccessor *get, + ObjectPropertyAccessor *set, + ObjectPropertyRelease *release, + void *opaque); + +/** + * object_property_set_default_bool: + * @prop: the property to set + * @value: the value to be written to the property + * + * Set the property default value. + */ +void object_property_set_default_bool(ObjectProperty *prop, bool value); + +/** + * object_property_set_default_str: + * @prop: the property to set + * @value: the value to be written to the property + * + * Set the property default value. + */ +void object_property_set_default_str(ObjectProperty *prop, const char *value); + +/** + * object_property_set_default_int: + * @prop: the property to set + * @value: the value to be written to the property + * + * Set the property default value. + */ +void object_property_set_default_int(ObjectProperty *prop, int64_t value); + +/** + * object_property_set_default_uint: + * @prop: the property to set + * @value: the value to be written to the property + * + * Set the property default value. + */ +void object_property_set_default_uint(ObjectProperty *prop, uint64_t value); + +/** + * object_property_find: + * @obj: the object + * @name: the name of the property + * + * Look up a property for an object. + * + * Return its #ObjectProperty if found, or NULL. + */ +ObjectProperty *object_property_find(Object *obj, const char *name); + +/** + * object_property_find_err: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Look up a property for an object. + * + * Return its #ObjectProperty if found, or NULL. + */ +ObjectProperty *object_property_find_err(Object *obj, + const char *name, + Error **errp); + +/** + * object_class_property_find: + * @klass: the object class + * @name: the name of the property + * + * Look up a property for an object class. + * + * Return its #ObjectProperty if found, or NULL. + */ +ObjectProperty *object_class_property_find(ObjectClass *klass, + const char *name); + +/** + * object_class_property_find_err: + * @klass: the object class + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Look up a property for an object class. + * + * Return its #ObjectProperty if found, or NULL. + */ +ObjectProperty *object_class_property_find_err(ObjectClass *klass, + const char *name, + Error **errp); + +typedef struct ObjectPropertyIterator { + ObjectClass *nextclass; + GHashTableIter iter; +} ObjectPropertyIterator; + +/** + * object_property_iter_init: + * @iter: the iterator instance + * @obj: the object + * + * Initializes an iterator for traversing all properties + * registered against an object instance, its class and all parent classes. + * + * It is forbidden to modify the property list while iterating, + * whether removing or adding properties. + * + * Typical usage pattern would be + * + * .. code-block:: c + * :caption: Using object property iterators + * + * ObjectProperty *prop; + * ObjectPropertyIterator iter; + * + * object_property_iter_init(&iter, obj); + * while ((prop = object_property_iter_next(&iter))) { + * ... do something with prop ... + * } + */ +void object_property_iter_init(ObjectPropertyIterator *iter, + Object *obj); + +/** + * object_class_property_iter_init: + * @iter: the iterator instance + * @klass: the class + * + * Initializes an iterator for traversing all properties + * registered against an object class and all parent classes. + * + * It is forbidden to modify the property list while iterating, + * whether removing or adding properties. + * + * This can be used on abstract classes as it does not create a temporary + * instance. + */ +void object_class_property_iter_init(ObjectPropertyIterator *iter, + ObjectClass *klass); + +/** + * object_property_iter_next: + * @iter: the iterator instance + * + * Return the next available property. If no further properties + * are available, a %NULL value will be returned and the @iter + * pointer should not be used again after this point without + * re-initializing it. + * + * Returns: the next property, or %NULL when all properties + * have been traversed. + */ +ObjectProperty *object_property_iter_next(ObjectPropertyIterator *iter); + +void object_unparent(Object *obj); + +/** + * object_property_get: + * @obj: the object + * @name: the name of the property + * @v: the visitor that will receive the property value. This should be an + * Output visitor and the data will be written with @name as the name. + * @errp: returns an error if this function fails + * + * Reads a property from a object. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_get(Object *obj, const char *name, Visitor *v, + Error **errp); + +/** + * object_property_set_str: + * @obj: the object + * @name: the name of the property + * @value: the value to be written to the property + * @errp: returns an error if this function fails + * + * Writes a string value to a property. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_str(Object *obj, const char *name, + const char *value, Error **errp); + +/** + * object_property_get_str: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to a C string, or NULL if + * an error occurs (including when the property value is not a string). + * The caller should free the string. + */ +char *object_property_get_str(Object *obj, const char *name, + Error **errp); + +/** + * object_property_set_link: + * @obj: the object + * @name: the name of the property + * @value: the value to be written to the property + * @errp: returns an error if this function fails + * + * Writes an object's canonical path to a property. + * + * If the link property was created with + * %OBJ_PROP_LINK_STRONG bit, the old target object is + * unreferenced, and a reference is added to the new target object. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_link(Object *obj, const char *name, + Object *value, Error **errp); + +/** + * object_property_get_link: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, resolved from a path to an Object, + * or NULL if an error occurs (including when the property value is not a + * string or not a valid object path). + */ +Object *object_property_get_link(Object *obj, const char *name, + Error **errp); + +/** + * object_property_set_bool: + * @obj: the object + * @name: the name of the property + * @value: the value to be written to the property + * @errp: returns an error if this function fails + * + * Writes a bool value to a property. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_bool(Object *obj, const char *name, + bool value, Error **errp); + +/** + * object_property_get_bool: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to a boolean, or false if + * an error occurs (including when the property value is not a bool). + */ +bool object_property_get_bool(Object *obj, const char *name, + Error **errp); + +/** + * object_property_set_int: + * @obj: the object + * @name: the name of the property + * @value: the value to be written to the property + * @errp: returns an error if this function fails + * + * Writes an integer value to a property. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_int(Object *obj, const char *name, + int64_t value, Error **errp); + +/** + * object_property_get_int: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to an integer, or -1 if + * an error occurs (including when the property value is not an integer). + */ +int64_t object_property_get_int(Object *obj, const char *name, + Error **errp); + +/** + * object_property_set_uint: + * @obj: the object + * @name: the name of the property + * @value: the value to be written to the property + * @errp: returns an error if this function fails + * + * Writes an unsigned integer value to a property. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_uint(Object *obj, const char *name, + uint64_t value, Error **errp); + +/** + * object_property_get_uint: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to an unsigned integer, or 0 + * an error occurs (including when the property value is not an integer). + */ +uint64_t object_property_get_uint(Object *obj, const char *name, + Error **errp); + +/** + * object_property_get_enum: + * @obj: the object + * @name: the name of the property + * @typename: the name of the enum data type + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to an integer (which + * can't be negative), or -1 on error (including when the property + * value is not an enum). + */ +int object_property_get_enum(Object *obj, const char *name, + const char *typename, Error **errp); + +/** + * object_property_set: + * @obj: the object + * @name: the name of the property + * @v: the visitor that will be used to write the property value. This should + * be an Input visitor and the data will be first read with @name as the + * name and then written as the property value. + * @errp: returns an error if this function fails + * + * Writes a property to a object. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set(Object *obj, const char *name, Visitor *v, + Error **errp); + +/** + * object_property_parse: + * @obj: the object + * @name: the name of the property + * @string: the string that will be used to parse the property value. + * @errp: returns an error if this function fails + * + * Parses a string and writes the result into a property of an object. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_parse(Object *obj, const char *name, + const char *string, Error **errp); + +/** + * object_property_print: + * @obj: the object + * @name: the name of the property + * @human: if true, print for human consumption + * @errp: returns an error if this function fails + * + * Returns a string representation of the value of the property. The + * caller shall free the string. + */ +char *object_property_print(Object *obj, const char *name, bool human, + Error **errp); + +/** + * object_property_get_type: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: The type name of the property. + */ +const char *object_property_get_type(Object *obj, const char *name, + Error **errp); + +/** + * object_get_root: + * + * Returns: the root object of the composition tree + */ +Object *object_get_root(void); + + +/** + * object_get_objects_root: + * + * Get the container object that holds user created + * object instances. This is the object at path + * "/objects" + * + * Returns: the user object container + */ +Object *object_get_objects_root(void); + +/** + * object_get_internal_root: + * + * Get the container object that holds internally used object + * instances. Any object which is put into this container must not be + * user visible, and it will not be exposed in the QOM tree. + * + * Returns: the internal object container + */ +Object *object_get_internal_root(void); + +/** + * object_get_canonical_path_component: + * @obj: the object + * + * Returns: The final component in the object's canonical path. The canonical + * path is the path within the composition tree starting from the root. + * %NULL if the object doesn't have a parent (and thus a canonical path). + */ +const char *object_get_canonical_path_component(const Object *obj); + +/** + * object_get_canonical_path: + * @obj: the object + * + * Returns: The canonical path for a object, newly allocated. This is + * the path within the composition tree starting from the root. Use + * g_free() to free it. + */ +char *object_get_canonical_path(const Object *obj); + +/** + * object_resolve_path: + * @path: the path to resolve + * @ambiguous: returns true if the path resolution failed because of an + * ambiguous match + * + * There are two types of supported paths--absolute paths and partial paths. + * + * Absolute paths are derived from the root object and can follow child<> or + * link<> properties. Since they can follow link<> properties, they can be + * arbitrarily long. Absolute paths look like absolute filenames and are + * prefixed with a leading slash. + * + * Partial paths look like relative filenames. They do not begin with a + * prefix. The matching rules for partial paths are subtle but designed to make + * specifying objects easy. At each level of the composition tree, the partial + * path is matched as an absolute path. The first match is not returned. At + * least two matches are searched for. A successful result is only returned if + * only one match is found. If more than one match is found, a flag is + * returned to indicate that the match was ambiguous. + * + * Returns: The matched object or NULL on path lookup failure. + */ +Object *object_resolve_path(const char *path, bool *ambiguous); + +/** + * object_resolve_path_type: + * @path: the path to resolve + * @typename: the type to look for. + * @ambiguous: returns true if the path resolution failed because of an + * ambiguous match + * + * This is similar to object_resolve_path. However, when looking for a + * partial path only matches that implement the given type are considered. + * This restricts the search and avoids spuriously flagging matches as + * ambiguous. + * + * For both partial and absolute paths, the return value goes through + * a dynamic cast to @typename. This is important if either the link, + * or the typename itself are of interface types. + * + * Returns: The matched object or NULL on path lookup failure. + */ +Object *object_resolve_path_type(const char *path, const char *typename, + bool *ambiguous); + +/** + * object_resolve_path_component: + * @parent: the object in which to resolve the path + * @part: the component to resolve. + * + * This is similar to object_resolve_path with an absolute path, but it + * only resolves one element (@part) and takes the others from @parent. + * + * Returns: The resolved object or NULL on path lookup failure. + */ +Object *object_resolve_path_component(Object *parent, const char *part); + +/** + * object_property_try_add_child: + * @obj: the object to add a property to + * @name: the name of the property + * @child: the child object + * @errp: pointer to error object + * + * Child properties form the composition tree. All objects need to be a child + * of another object. Objects can only be a child of one object. + * + * There is no way for a child to determine what its parent is. It is not + * a bidirectional relationship. This is by design. + * + * The value of a child property as a C string will be the child object's + * canonical path. It can be retrieved using object_property_get_str(). + * The child object itself can be retrieved using object_property_get_link(). + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_try_add_child(Object *obj, const char *name, + Object *child, Error **errp); + +/** + * object_property_add_child: + * @obj: the object to add a property to + * @name: the name of the property + * @child: the child object + * + * Same as object_property_try_add_child() with @errp hardcoded to + * &error_abort + */ +ObjectProperty *object_property_add_child(Object *obj, const char *name, + Object *child); + +typedef enum { + /* Unref the link pointer when the property is deleted */ + OBJ_PROP_LINK_STRONG = 0x1, + + /* private */ + OBJ_PROP_LINK_DIRECT = 0x2, + OBJ_PROP_LINK_CLASS = 0x4, +} ObjectPropertyLinkFlags; + +/** + * object_property_allow_set_link: + * @obj: the object to add a property to + * @name: the name of the property + * @child: the child object + * @errp: pointer to error object + * + * The default implementation of the object_property_add_link() check() + * callback function. It allows the link property to be set and never returns + * an error. + */ +void object_property_allow_set_link(const Object *obj, const char *name, + Object *child, Error **errp); + +/** + * object_property_add_link: + * @obj: the object to add a property to + * @name: the name of the property + * @type: the qobj type of the link + * @targetp: a pointer to where the link object reference is stored + * @check: callback to veto setting or NULL if the property is read-only + * @flags: additional options for the link + * + * Links establish relationships between objects. Links are unidirectional + * although two links can be combined to form a bidirectional relationship + * between objects. + * + * Links form the graph in the object model. + * + * The @check() callback is invoked when + * object_property_set_link() is called and can raise an error to prevent the + * link being set. If @check is NULL, the property is read-only + * and cannot be set. + * + * Ownership of the pointer that @child points to is transferred to the + * link property. The reference count for *@child is + * managed by the property from after the function returns till the + * property is deleted with object_property_del(). If the + * @flags %OBJ_PROP_LINK_STRONG bit is set, + * the reference count is decremented when the property is deleted or + * modified. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_link(Object *obj, const char *name, + const char *type, Object **targetp, + void (*check)(const Object *obj, const char *name, + Object *val, Error **errp), + ObjectPropertyLinkFlags flags); + +ObjectProperty *object_class_property_add_link(ObjectClass *oc, + const char *name, + const char *type, ptrdiff_t offset, + void (*check)(const Object *obj, const char *name, + Object *val, Error **errp), + ObjectPropertyLinkFlags flags); + +/** + * object_property_add_str: + * @obj: the object to add a property to + * @name: the name of the property + * @get: the getter or NULL if the property is write-only. This function must + * return a string to be freed by g_free(). + * @set: the setter or NULL if the property is read-only + * + * Add a string property using getters/setters. This function will add a + * property of type 'string'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_str(Object *obj, const char *name, + char *(*get)(Object *, Error **), + void (*set)(Object *, const char *, Error **)); + +ObjectProperty *object_class_property_add_str(ObjectClass *klass, + const char *name, + char *(*get)(Object *, Error **), + void (*set)(Object *, const char *, + Error **)); + +/** + * object_property_add_bool: + * @obj: the object to add a property to + * @name: the name of the property + * @get: the getter or NULL if the property is write-only. + * @set: the setter or NULL if the property is read-only + * + * Add a bool property using getters/setters. This function will add a + * property of type 'bool'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_bool(Object *obj, const char *name, + bool (*get)(Object *, Error **), + void (*set)(Object *, bool, Error **)); + +ObjectProperty *object_class_property_add_bool(ObjectClass *klass, + const char *name, + bool (*get)(Object *, Error **), + void (*set)(Object *, bool, Error **)); + +/** + * object_property_add_enum: + * @obj: the object to add a property to + * @name: the name of the property + * @typename: the name of the enum data type + * @lookup: enum value namelookup table + * @get: the getter or %NULL if the property is write-only. + * @set: the setter or %NULL if the property is read-only + * + * Add an enum property using getters/setters. This function will add a + * property of type '@typename'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_enum(Object *obj, const char *name, + const char *typename, + const QEnumLookup *lookup, + int (*get)(Object *, Error **), + void (*set)(Object *, int, Error **)); + +ObjectProperty *object_class_property_add_enum(ObjectClass *klass, + const char *name, + const char *typename, + const QEnumLookup *lookup, + int (*get)(Object *, Error **), + void (*set)(Object *, int, Error **)); + +/** + * object_property_add_tm: + * @obj: the object to add a property to + * @name: the name of the property + * @get: the getter or NULL if the property is write-only. + * + * Add a read-only struct tm valued property using a getter function. + * This function will add a property of type 'struct tm'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_tm(Object *obj, const char *name, + void (*get)(Object *, struct tm *, Error **)); + +ObjectProperty *object_class_property_add_tm(ObjectClass *klass, + const char *name, + void (*get)(Object *, struct tm *, Error **)); + +typedef enum { + /* Automatically add a getter to the property */ + OBJ_PROP_FLAG_READ = 1 << 0, + /* Automatically add a setter to the property */ + OBJ_PROP_FLAG_WRITE = 1 << 1, + /* Automatically add a getter and a setter to the property */ + OBJ_PROP_FLAG_READWRITE = (OBJ_PROP_FLAG_READ | OBJ_PROP_FLAG_WRITE), +} ObjectPropertyFlags; + +/** + * object_property_add_uint8_ptr: + * @obj: the object to add a property to + * @name: the name of the property + * @v: pointer to value + * @flags: bitwise-or'd ObjectPropertyFlags + * + * Add an integer property in memory. This function will add a + * property of type 'uint8'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_uint8_ptr(Object *obj, const char *name, + const uint8_t *v, + ObjectPropertyFlags flags); + +ObjectProperty *object_class_property_add_uint8_ptr(ObjectClass *klass, + const char *name, + const uint8_t *v, + ObjectPropertyFlags flags); + +/** + * object_property_add_uint16_ptr: + * @obj: the object to add a property to + * @name: the name of the property + * @v: pointer to value + * @flags: bitwise-or'd ObjectPropertyFlags + * + * Add an integer property in memory. This function will add a + * property of type 'uint16'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_uint16_ptr(Object *obj, const char *name, + const uint16_t *v, + ObjectPropertyFlags flags); + +ObjectProperty *object_class_property_add_uint16_ptr(ObjectClass *klass, + const char *name, + const uint16_t *v, + ObjectPropertyFlags flags); + +/** + * object_property_add_uint32_ptr: + * @obj: the object to add a property to + * @name: the name of the property + * @v: pointer to value + * @flags: bitwise-or'd ObjectPropertyFlags + * + * Add an integer property in memory. This function will add a + * property of type 'uint32'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_uint32_ptr(Object *obj, const char *name, + const uint32_t *v, + ObjectPropertyFlags flags); + +ObjectProperty *object_class_property_add_uint32_ptr(ObjectClass *klass, + const char *name, + const uint32_t *v, + ObjectPropertyFlags flags); + +/** + * object_property_add_uint64_ptr: + * @obj: the object to add a property to + * @name: the name of the property + * @v: pointer to value + * @flags: bitwise-or'd ObjectPropertyFlags + * + * Add an integer property in memory. This function will add a + * property of type 'uint64'. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_uint64_ptr(Object *obj, const char *name, + const uint64_t *v, + ObjectPropertyFlags flags); + +ObjectProperty *object_class_property_add_uint64_ptr(ObjectClass *klass, + const char *name, + const uint64_t *v, + ObjectPropertyFlags flags); + +/** + * object_property_add_alias: + * @obj: the object to add a property to + * @name: the name of the property + * @target_obj: the object to forward property access to + * @target_name: the name of the property on the forwarded object + * + * Add an alias for a property on an object. This function will add a property + * of the same type as the forwarded property. + * + * The caller must ensure that @target_obj stays alive as long as + * this property exists. In the case of a child object or an alias on the same + * object this will be the case. For aliases to other objects the caller is + * responsible for taking a reference. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_alias(Object *obj, const char *name, + Object *target_obj, const char *target_name); + +/** + * object_property_add_const_link: + * @obj: the object to add a property to + * @name: the name of the property + * @target: the object to be referred by the link + * + * Add an unmodifiable link for a property on an object. This function will + * add a property of type link where TYPE is the type of @target. + * + * The caller must ensure that @target stays alive as long as + * this property exists. In the case @target is a child of @obj, + * this will be the case. Otherwise, the caller is responsible for + * taking a reference. + * + * Returns: The newly added property on success, or %NULL on failure. + */ +ObjectProperty *object_property_add_const_link(Object *obj, const char *name, + Object *target); + +/** + * object_property_set_description: + * @obj: the object owning the property + * @name: the name of the property + * @description: the description of the property on the object + * + * Set an object property's description. + * + * Returns: %true on success, %false on failure. + */ +void object_property_set_description(Object *obj, const char *name, + const char *description); +void object_class_property_set_description(ObjectClass *klass, const char *name, + const char *description); + +/** + * object_child_foreach: + * @obj: the object whose children will be navigated + * @fn: the iterator function to be called + * @opaque: an opaque value that will be passed to the iterator + * + * Call @fn passing each child of @obj and @opaque to it, until @fn returns + * non-zero. + * + * It is forbidden to add or remove children from @obj from the @fn + * callback. + * + * Returns: The last value returned by @fn, or 0 if there is no child. + */ +int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque), + void *opaque); + +/** + * object_child_foreach_recursive: + * @obj: the object whose children will be navigated + * @fn: the iterator function to be called + * @opaque: an opaque value that will be passed to the iterator + * + * Call @fn passing each child of @obj and @opaque to it, until @fn returns + * non-zero. Calls recursively, all child nodes of @obj will also be passed + * all the way down to the leaf nodes of the tree. Depth first ordering. + * + * It is forbidden to add or remove children from @obj (or its + * child nodes) from the @fn callback. + * + * Returns: The last value returned by @fn, or 0 if there is no child. + */ +int object_child_foreach_recursive(Object *obj, + int (*fn)(Object *child, void *opaque), + void *opaque); +/** + * container_get: + * @root: root of the #path, e.g., object_get_root() + * @path: path to the container + * + * Return a container object whose path is @path. Create more containers + * along the path if necessary. + * + * Returns: the container object. + */ +Object *container_get(Object *root, const char *path); + +/** + * object_type_get_instance_size: + * @typename: Name of the Type whose instance_size is required + * + * Returns the instance_size of the given @typename. + */ +size_t object_type_get_instance_size(const char *typename); + +/** + * object_property_help: + * @name: the name of the property + * @type: the type of the property + * @defval: the default value + * @description: description of the property + * + * Returns: a user-friendly formatted string describing the property + * for help purposes. + */ +char *object_property_help(const char *name, const char *type, + QObject *defval, const char *description); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(Object, object_unref) + +#endif diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h new file mode 100644 index 000000000..07d5cc883 --- /dev/null +++ b/include/qom/object_interfaces.h @@ -0,0 +1,206 @@ +#ifndef OBJECT_INTERFACES_H +#define OBJECT_INTERFACES_H + +#include "qom/object.h" +#include "qapi/visitor.h" + +#define TYPE_USER_CREATABLE "user-creatable" + +typedef struct UserCreatableClass UserCreatableClass; +DECLARE_CLASS_CHECKERS(UserCreatableClass, USER_CREATABLE, + TYPE_USER_CREATABLE) +#define USER_CREATABLE(obj) \ + INTERFACE_CHECK(UserCreatable, (obj), \ + TYPE_USER_CREATABLE) + +typedef struct UserCreatable UserCreatable; + +/** + * UserCreatableClass: + * @parent_class: the base class + * @complete: callback to be called after @obj's properties are set. + * @can_be_deleted: callback to be called before an object is removed + * to check if @obj can be removed safely. + * + * Interface is designed to work with -object/object-add/object_add + * commands. + * Interface is mandatory for objects that are designed to be user + * creatable (i.e. -object/object-add/object_add, will accept only + * objects that inherit this interface). + * + * Interface also provides an optional ability to do the second + * stage * initialization of the object after its properties were + * set. + * + * For objects created without using -object/object-add/object_add, + * @user_creatable_complete() wrapper should be called manually if + * object's type implements USER_CREATABLE interface and needs + * complete() callback to be called. + */ +struct UserCreatableClass { + /* */ + InterfaceClass parent_class; + + /* */ + void (*complete)(UserCreatable *uc, Error **errp); + bool (*can_be_deleted)(UserCreatable *uc); +}; + +/** + * user_creatable_complete: + * @uc: the user-creatable object whose complete() method is called if defined + * @errp: if an error occurs, a pointer to an area to store the error + * + * Wrapper to call complete() method if one of types it's inherited + * from implements USER_CREATABLE interface, otherwise the call does + * nothing. + * + * Returns: %true on success, %false on failure. + */ +bool user_creatable_complete(UserCreatable *uc, Error **errp); + +/** + * user_creatable_can_be_deleted: + * @uc: the object whose can_be_deleted() method is called if implemented + * + * Wrapper to call can_be_deleted() method if one of types it's inherited + * from implements USER_CREATABLE interface. + */ +bool user_creatable_can_be_deleted(UserCreatable *uc); + +/** + * user_creatable_add_type: + * @type: the object type name + * @id: the unique ID for the object + * @qdict: the object properties + * @v: the visitor + * @errp: if an error occurs, a pointer to an area to store the error + * + * Create an instance of the user creatable object @type, placing + * it in the object composition tree with name @id, initializing + * it with properties from @qdict + * + * Returns: the newly created object or NULL on error + */ +Object *user_creatable_add_type(const char *type, const char *id, + const QDict *qdict, + Visitor *v, Error **errp); + +/** + * user_creatable_add_dict: + * @qdict: the object definition + * @keyval: if true, use a keyval visitor for processing @qdict (i.e. + * assume that all @qdict values are strings); otherwise, use + * the normal QObject visitor (i.e. assume all @qdict values + * have the QType expected by the QOM object type) + * @errp: if an error occurs, a pointer to an area to store the error + * + * Create an instance of the user creatable object that is defined by + * @qdict. The object type is taken from the QDict key 'qom-type', its + * ID from the key 'id'. The remaining entries in @qdict are used to + * initialize the object properties. + * + * Returns: %true on success, %false on failure. + */ +bool user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp); + +/** + * user_creatable_add_opts: + * @opts: the object definition + * @errp: if an error occurs, a pointer to an area to store the error + * + * Create an instance of the user creatable object whose type + * is defined in @opts by the 'qom-type' option, placing it + * in the object composition tree with name provided by the + * 'id' field. The remaining options in @opts are used to + * initialize the object properties. + * + * Returns: the newly created object or NULL on error + */ +Object *user_creatable_add_opts(QemuOpts *opts, Error **errp); + + +/** + * user_creatable_add_opts_predicate: + * @type: the QOM type to be added + * + * A callback function to determine whether an object + * of type @type should be created. Instances of this + * callback should be passed to user_creatable_add_opts_foreach + */ +typedef bool (*user_creatable_add_opts_predicate)(const char *type); + +/** + * user_creatable_add_opts_foreach: + * @opaque: a user_creatable_add_opts_predicate callback or NULL + * @opts: options to create + * @errp: unused + * + * An iterator callback to be used in conjunction with + * the qemu_opts_foreach() method for creating a list of + * objects from a set of QemuOpts + * + * The @opaque parameter can be passed a user_creatable_add_opts_predicate + * callback to filter which types of object are created during iteration. + * When it fails, report the error. + * + * Returns: 0 on success, -1 when an error was reported. + */ +int user_creatable_add_opts_foreach(void *opaque, + QemuOpts *opts, Error **errp); + +/** + * user_creatable_print_help: + * @type: the QOM type to be added + * @opts: options to create + * + * Prints help if requested in @type or @opts. Note that if @type is neither + * "help"/"?" nor a valid user creatable type, no help will be printed + * regardless of @opts. + * + * Returns: true if a help option was found and help was printed, false + * otherwise. + */ +bool user_creatable_print_help(const char *type, QemuOpts *opts); + +/** + * user_creatable_print_help_from_qdict: + * @args: options to create + * + * Prints help considering the other options given in @args (if "qom-type" is + * given and valid, print properties for the type, otherwise print valid types) + * + * In contrast to user_creatable_print_help(), this function can't return that + * no help was requested. It should only be called if we know that help is + * requested and it will always print some help. + */ +void user_creatable_print_help_from_qdict(QDict *args); + +/** + * user_creatable_del: + * @id: the unique ID for the object + * @errp: if an error occurs, a pointer to an area to store the error + * + * Delete an instance of the user creatable object identified + * by @id. + * + * Returns: %true on success, %false on failure. + */ +bool user_creatable_del(const char *id, Error **errp); + +/** + * user_creatable_cleanup: + * + * Delete all user-creatable objects and the user-creatable + * objects container. + */ +void user_creatable_cleanup(void); + +/** + * qmp_object_add: + * + * QMP command handler for object-add. See the QAPI schema for documentation. + */ +void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp); + +#endif diff --git a/include/qom/qom-qobject.h b/include/qom/qom-qobject.h new file mode 100644 index 000000000..73e4e0e47 --- /dev/null +++ b/include/qom/qom-qobject.h @@ -0,0 +1,43 @@ +/* + * QEMU Object Model - QObject wrappers + * + * Copyright (C) 2012 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_QOM_QOBJECT_H +#define QEMU_QOM_QOBJECT_H + +/* + * object_property_get_qobject: + * @obj: the object + * @name: the name of the property + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to QObject, or NULL if + * an error occurs. + */ +struct QObject *object_property_get_qobject(Object *obj, const char *name, + struct Error **errp); + +/** + * object_property_set_qobject: + * @obj: the object + * @name: the name of the property + * @value: The value that will be written to the property. + * @errp: returns an error if this function fails + * + * Writes a property to a object. + * + * Returns: %true on success, %false on failure. + */ +bool object_property_set_qobject(Object *obj, + const char *name, struct QObject *value, + struct Error **errp); + +#endif diff --git a/include/scsi/constants.h b/include/scsi/constants.h new file mode 100644 index 000000000..2a32c08b5 --- /dev/null +++ b/include/scsi/constants.h @@ -0,0 +1,319 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +/* + * This header file contains public constants and structures used by + * the scsi code for linux. + */ + +#ifndef SCSI_CONSTANTS_H +#define SCSI_CONSTANTS_H + +/* + * SCSI opcodes + */ + +#define TEST_UNIT_READY 0x00 +#define REWIND 0x01 +#define REQUEST_SENSE 0x03 +#define FORMAT_UNIT 0x04 +#define READ_BLOCK_LIMITS 0x05 +#define INITIALIZE_ELEMENT_STATUS 0x07 +#define REASSIGN_BLOCKS 0x07 +#define READ_6 0x08 +#define WRITE_6 0x0a +#define SET_CAPACITY 0x0b +#define READ_REVERSE 0x0f +#define WRITE_FILEMARKS 0x10 +#define SPACE 0x11 +#define INQUIRY 0x12 +#define RECOVER_BUFFERED_DATA 0x14 +#define MODE_SELECT 0x15 +#define RESERVE 0x16 +#define RELEASE 0x17 +#define COPY 0x18 +#define ERASE 0x19 +#define MODE_SENSE 0x1a +#define LOAD_UNLOAD 0x1b +#define SCAN 0x1b +#define START_STOP 0x1b +#define RECEIVE_DIAGNOSTIC 0x1c +#define SEND_DIAGNOSTIC 0x1d +#define ALLOW_MEDIUM_REMOVAL 0x1e +#define SET_WINDOW 0x24 +#define READ_CAPACITY_10 0x25 +#define GET_WINDOW 0x25 +#define READ_10 0x28 +#define WRITE_10 0x2a +#define SEND 0x2a +#define SEEK_10 0x2b +#define LOCATE_10 0x2b +#define POSITION_TO_ELEMENT 0x2b +#define WRITE_VERIFY_10 0x2e +#define VERIFY_10 0x2f +#define SEARCH_HIGH 0x30 +#define SEARCH_EQUAL 0x31 +#define OBJECT_POSITION 0x31 +#define SEARCH_LOW 0x32 +#define SET_LIMITS 0x33 +#define PRE_FETCH 0x34 +#define READ_POSITION 0x34 +#define GET_DATA_BUFFER_STATUS 0x34 +#define SYNCHRONIZE_CACHE 0x35 +#define LOCK_UNLOCK_CACHE 0x36 +#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37 +#define READ_DEFECT_DATA 0x37 +#define MEDIUM_SCAN 0x38 +#define COMPARE 0x39 +#define COPY_VERIFY 0x3a +#define WRITE_BUFFER 0x3b +#define READ_BUFFER 0x3c +#define UPDATE_BLOCK 0x3d +#define READ_LONG_10 0x3e +#define WRITE_LONG_10 0x3f +#define CHANGE_DEFINITION 0x40 +#define WRITE_SAME_10 0x41 +#define UNMAP 0x42 +#define READ_TOC 0x43 +#define REPORT_DENSITY_SUPPORT 0x44 +#define GET_CONFIGURATION 0x46 +#define SANITIZE 0x48 +#define GET_EVENT_STATUS_NOTIFICATION 0x4a +#define LOG_SELECT 0x4c +#define LOG_SENSE 0x4d +#define READ_DISC_INFORMATION 0x51 +#define RESERVE_TRACK 0x53 +#define MODE_SELECT_10 0x55 +#define RESERVE_10 0x56 +#define RELEASE_10 0x57 +#define MODE_SENSE_10 0x5a +#define SEND_CUE_SHEET 0x5d +#define PERSISTENT_RESERVE_IN 0x5e +#define PERSISTENT_RESERVE_OUT 0x5f +#define VARLENGTH_CDB 0x7f +#define WRITE_FILEMARKS_16 0x80 +#define READ_REVERSE_16 0x81 +#define ALLOW_OVERWRITE 0x82 +#define EXTENDED_COPY 0x83 +#define ATA_PASSTHROUGH_16 0x85 +#define ACCESS_CONTROL_IN 0x86 +#define ACCESS_CONTROL_OUT 0x87 +#define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 +#define WRITE_16 0x8a +#define WRITE_VERIFY_16 0x8e +#define VERIFY_16 0x8f +#define PRE_FETCH_16 0x90 +#define SPACE_16 0x91 +#define SYNCHRONIZE_CACHE_16 0x91 +#define LOCATE_16 0x92 +#define WRITE_SAME_16 0x93 +#define ERASE_16 0x93 +#define SERVICE_ACTION_IN_16 0x9e +#define WRITE_LONG_16 0x9f +#define REPORT_LUNS 0xa0 +#define ATA_PASSTHROUGH_12 0xa1 +#define MAINTENANCE_IN 0xa3 +#define MAINTENANCE_OUT 0xa4 +#define MOVE_MEDIUM 0xa5 +#define EXCHANGE_MEDIUM 0xa6 +#define SET_READ_AHEAD 0xa7 +#define READ_12 0xa8 +#define WRITE_12 0xaa +#define SERVICE_ACTION_IN_12 0xab +#define ERASE_12 0xac +#define READ_DVD_STRUCTURE 0xad +#define WRITE_VERIFY_12 0xae +#define VERIFY_12 0xaf +#define SEARCH_HIGH_12 0xb0 +#define SEARCH_EQUAL_12 0xb1 +#define SEARCH_LOW_12 0xb2 +#define READ_ELEMENT_STATUS 0xb8 +#define SEND_VOLUME_TAG 0xb6 +#define READ_DEFECT_DATA_12 0xb7 +#define SET_CD_SPEED 0xbb +#define MECHANISM_STATUS 0xbd +#define READ_CD 0xbe +#define SEND_DVD_STRUCTURE 0xbf + +/* + * SERVICE ACTION IN subcodes + */ +#define SAI_READ_CAPACITY_16 0x10 + +/* + * READ POSITION service action codes + */ +#define SHORT_FORM_BLOCK_ID 0x00 +#define SHORT_FORM_VENDOR_SPECIFIC 0x01 +#define LONG_FORM 0x06 +#define EXTENDED_FORM 0x08 + +/* + * SAM Status codes + */ + +#define GOOD 0x00 +#define CHECK_CONDITION 0x02 +#define CONDITION_GOOD 0x04 +#define BUSY 0x08 +#define INTERMEDIATE_GOOD 0x10 +#define INTERMEDIATE_C_GOOD 0x14 +#define RESERVATION_CONFLICT 0x18 +#define COMMAND_TERMINATED 0x22 +#define TASK_SET_FULL 0x28 +#define ACA_ACTIVE 0x30 +#define TASK_ABORTED 0x40 + +#define STATUS_MASK 0x3e + +/* + * SENSE KEYS + */ + +#define NO_SENSE 0x00 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x02 +#define MEDIUM_ERROR 0x03 +#define HARDWARE_ERROR 0x04 +#define ILLEGAL_REQUEST 0x05 +#define UNIT_ATTENTION 0x06 +#define DATA_PROTECT 0x07 +#define BLANK_CHECK 0x08 +#define COPY_ABORTED 0x0a +#define ABORTED_COMMAND 0x0b +#define VOLUME_OVERFLOW 0x0d +#define MISCOMPARE 0x0e + + +/* + * DEVICE TYPES + */ + +#define TYPE_DISK 0x00 +#define TYPE_TAPE 0x01 +#define TYPE_PRINTER 0x02 +#define TYPE_PROCESSOR 0x03 /* HP scanners use this */ +#define TYPE_WORM 0x04 /* Treated as ROM by our system */ +#define TYPE_ROM 0x05 +#define TYPE_SCANNER 0x06 +#define TYPE_MOD 0x07 /* Magneto-optical disk - + * - treated as TYPE_DISK */ +#define TYPE_MEDIUM_CHANGER 0x08 +#define TYPE_STORAGE_ARRAY 0x0c /* Storage array device */ +#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ +#define TYPE_RBC 0x0e /* Simplified Direct-Access Device */ +#define TYPE_OSD 0x11 /* Object-storage Device */ +#define TYPE_ZBC 0x14 /* Host-managed Zoned SCSI Device */ +#define TYPE_WLUN 0x1e /* Well known LUN */ +#define TYPE_NOT_PRESENT 0x1f +#define TYPE_INACTIVE 0x20 +#define TYPE_NO_LUN 0x7f + +/* Mode page codes for mode sense/set */ +#define MODE_PAGE_R_W_ERROR 0x01 +#define MODE_PAGE_HD_GEOMETRY 0x04 +#define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY 0x05 +#define MODE_PAGE_CACHING 0x08 +#define MODE_PAGE_AUDIO_CTL 0x0e +#define MODE_PAGE_POWER 0x1a +#define MODE_PAGE_FAULT_FAIL 0x1c +#define MODE_PAGE_TO_PROTECT 0x1d +#define MODE_PAGE_CAPABILITIES 0x2a +#define MODE_PAGE_ALLS 0x3f +/* Not in Mt. Fuji, but in ATAPI 2.6 -- deprecated now in favor + * of MODE_PAGE_SENSE_POWER */ +#define MODE_PAGE_CDROM 0x0d + +/* Event notification classes for GET EVENT STATUS NOTIFICATION */ +#define GESN_NO_EVENTS 0 +#define GESN_OPERATIONAL_CHANGE 1 +#define GESN_POWER_MANAGEMENT 2 +#define GESN_EXTERNAL_REQUEST 3 +#define GESN_MEDIA 4 +#define GESN_MULTIPLE_HOSTS 5 +#define GESN_DEVICE_BUSY 6 + +/* Event codes for MEDIA event status notification */ +#define MEC_NO_CHANGE 0 +#define MEC_EJECT_REQUESTED 1 +#define MEC_NEW_MEDIA 2 +#define MEC_MEDIA_REMOVAL 3 /* only for media changers */ +#define MEC_MEDIA_CHANGED 4 /* only for media changers */ +#define MEC_BG_FORMAT_COMPLETED 5 /* MRW or DVD+RW b/g format completed */ +#define MEC_BG_FORMAT_RESTARTED 6 /* MRW or DVD+RW b/g format restarted */ + +#define MS_TRAY_OPEN 1 +#define MS_MEDIA_PRESENT 2 + +/* + * Based on values from but extending CD_MINS + * to the maximum common size allowed by the Orange's Book ATIP + * + * 90 and 99 min CDs are also available but using them as the + * upper limit reduces the effectiveness of the heuristic to + * detect DVDs burned to less than 25% of their maximum capacity + */ + +/* Some generally useful CD-ROM information */ +#define CD_MINS 80 /* max. minutes per CD */ +#define CD_SECS 60 /* seconds per minute */ +#define CD_FRAMES 75 /* frames per second */ +#define CD_FRAMESIZE 2048 /* bytes per frame, "cooked" mode */ +#define CD_MAX_BYTES (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE) +#define CD_MAX_SECTORS (CD_MAX_BYTES / 512) + +/* + * The MMC values are not IDE specific and might need to be moved + * to a common header if they are also needed for the SCSI emulation + */ + +/* Profile list from MMC-6 revision 1 table 91 */ +#define MMC_PROFILE_NONE 0x0000 +#define MMC_PROFILE_CD_ROM 0x0008 +#define MMC_PROFILE_CD_R 0x0009 +#define MMC_PROFILE_CD_RW 0x000A +#define MMC_PROFILE_DVD_ROM 0x0010 +#define MMC_PROFILE_DVD_R_SR 0x0011 +#define MMC_PROFILE_DVD_RAM 0x0012 +#define MMC_PROFILE_DVD_RW_RO 0x0013 +#define MMC_PROFILE_DVD_RW_SR 0x0014 +#define MMC_PROFILE_DVD_R_DL_SR 0x0015 +#define MMC_PROFILE_DVD_R_DL_JR 0x0016 +#define MMC_PROFILE_DVD_RW_DL 0x0017 +#define MMC_PROFILE_DVD_DDR 0x0018 +#define MMC_PROFILE_DVD_PLUS_RW 0x001A +#define MMC_PROFILE_DVD_PLUS_R 0x001B +#define MMC_PROFILE_DVD_PLUS_RW_DL 0x002A +#define MMC_PROFILE_DVD_PLUS_R_DL 0x002B +#define MMC_PROFILE_BD_ROM 0x0040 +#define MMC_PROFILE_BD_R_SRM 0x0041 +#define MMC_PROFILE_BD_R_RRM 0x0042 +#define MMC_PROFILE_BD_RE 0x0043 +#define MMC_PROFILE_HDDVD_ROM 0x0050 +#define MMC_PROFILE_HDDVD_R 0x0051 +#define MMC_PROFILE_HDDVD_RAM 0x0052 +#define MMC_PROFILE_HDDVD_RW 0x0053 +#define MMC_PROFILE_HDDVD_R_DL 0x0058 +#define MMC_PROFILE_HDDVD_RW_DL 0x005A +#define MMC_PROFILE_INVALID 0xFFFF + +#define XCOPY_DESC_OFFSET 16 +#define IDENT_DESCR_TGT_DESCR_SIZE 32 +#define XCOPY_BLK2BLK_SEG_DESC_SIZE 28 + +#endif diff --git a/include/scsi/pr-manager.h b/include/scsi/pr-manager.h new file mode 100644 index 000000000..e4ecbe00f --- /dev/null +++ b/include/scsi/pr-manager.h @@ -0,0 +1,42 @@ +#ifndef PR_MANAGER_H +#define PR_MANAGER_H + +#include "qom/object.h" +#include "qapi/visitor.h" +#include "qom/object_interfaces.h" +#include "block/aio.h" +#include "qemu/coroutine.h" + +#define TYPE_PR_MANAGER "pr-manager" + +OBJECT_DECLARE_TYPE(PRManager, PRManagerClass, + PR_MANAGER) + +struct sg_io_hdr; + +struct PRManager { + /* */ + Object parent; +}; + +/** + * PRManagerClass: + * @parent_class: the base class + * @run: callback invoked in thread pool context + */ +struct PRManagerClass { + /* */ + ObjectClass parent_class; + + /* */ + int (*run)(PRManager *pr_mgr, int fd, struct sg_io_hdr *hdr); + bool (*is_connected)(PRManager *pr_mgr); +}; + +bool pr_manager_is_connected(PRManager *pr_mgr); +int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd, + struct sg_io_hdr *hdr); + +PRManager *pr_manager_lookup(const char *id, Error **errp); + +#endif diff --git a/include/scsi/utils.h b/include/scsi/utils.h new file mode 100644 index 000000000..fbc558827 --- /dev/null +++ b/include/scsi/utils.h @@ -0,0 +1,136 @@ +#ifndef SCSI_UTILS_H +#define SCSI_UTILS_H + +#ifdef CONFIG_LINUX +#include +#endif + +#define SCSI_CMD_BUF_SIZE 16 +#define SCSI_SENSE_LEN 18 +#define SCSI_SENSE_LEN_SCANNER 32 +#define SCSI_INQUIRY_LEN 36 + +enum SCSIXferMode { + SCSI_XFER_NONE, /* TEST_UNIT_READY, ... */ + SCSI_XFER_FROM_DEV, /* READ, INQUIRY, MODE_SENSE, ... */ + SCSI_XFER_TO_DEV, /* WRITE, MODE_SELECT, ... */ +}; + +typedef struct SCSICommand { + uint8_t buf[SCSI_CMD_BUF_SIZE]; + int len; + size_t xfer; + uint64_t lba; + enum SCSIXferMode mode; +} SCSICommand; + +typedef struct SCSISense { + uint8_t key; + uint8_t asc; + uint8_t ascq; +} SCSISense; + +int scsi_build_sense(uint8_t *buf, SCSISense sense); +SCSISense scsi_parse_sense_buf(const uint8_t *in_buf, int in_len); +int scsi_build_sense_buf(uint8_t *buf, size_t max_size, SCSISense sense, + bool fixed_sense); + +/* + * Predefined sense codes + */ + +/* No sense data available */ +extern const struct SCSISense sense_code_NO_SENSE; +/* LUN not ready, Manual intervention required */ +extern const struct SCSISense sense_code_LUN_NOT_READY; +/* LUN not ready, Medium not present */ +extern const struct SCSISense sense_code_NO_MEDIUM; +/* LUN not ready, medium removal prevented */ +extern const struct SCSISense sense_code_NOT_READY_REMOVAL_PREVENTED; +/* Hardware error, internal target failure */ +extern const struct SCSISense sense_code_TARGET_FAILURE; +/* Illegal request, invalid command operation code */ +extern const struct SCSISense sense_code_INVALID_OPCODE; +/* Illegal request, LBA out of range */ +extern const struct SCSISense sense_code_LBA_OUT_OF_RANGE; +/* Illegal request, Invalid field in CDB */ +extern const struct SCSISense sense_code_INVALID_FIELD; +/* Illegal request, Invalid field in parameter list */ +extern const struct SCSISense sense_code_INVALID_PARAM; +/* Illegal request, Parameter list length error */ +extern const struct SCSISense sense_code_INVALID_PARAM_LEN; +/* Illegal request, LUN not supported */ +extern const struct SCSISense sense_code_LUN_NOT_SUPPORTED; +/* Illegal request, Saving parameters not supported */ +extern const struct SCSISense sense_code_SAVING_PARAMS_NOT_SUPPORTED; +/* Illegal request, Incompatible format */ +extern const struct SCSISense sense_code_INCOMPATIBLE_FORMAT; +/* Illegal request, medium removal prevented */ +extern const struct SCSISense sense_code_ILLEGAL_REQ_REMOVAL_PREVENTED; +/* Illegal request, Invalid Transfer Tag */ +extern const struct SCSISense sense_code_INVALID_TAG; +/* Command aborted, I/O process terminated */ +extern const struct SCSISense sense_code_IO_ERROR; +/* Command aborted, I_T Nexus loss occurred */ +extern const struct SCSISense sense_code_I_T_NEXUS_LOSS; +/* Command aborted, Logical Unit failure */ +extern const struct SCSISense sense_code_LUN_FAILURE; +/* Command aborted, LUN Communication failure */ +extern const struct SCSISense sense_code_LUN_COMM_FAILURE; +/* Command aborted, Overlapped Commands Attempted */ +extern const struct SCSISense sense_code_OVERLAPPED_COMMANDS; +/* Medium error, Unrecovered read error */ +extern const struct SCSISense sense_code_READ_ERROR; +/* LUN not ready, Cause not reportable */ +extern const struct SCSISense sense_code_NOT_READY; +/* Unit attention, Capacity data has changed */ +extern const struct SCSISense sense_code_CAPACITY_CHANGED; +/* Unit attention, SCSI bus reset */ +extern const struct SCSISense sense_code_SCSI_BUS_RESET; +/* LUN not ready, Medium not present */ +extern const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM; +/* Unit attention, Power on, reset or bus device reset occurred */ +extern const struct SCSISense sense_code_RESET; +/* Unit attention, Medium may have changed*/ +extern const struct SCSISense sense_code_MEDIUM_CHANGED; +/* Unit attention, Reported LUNs data has changed */ +extern const struct SCSISense sense_code_REPORTED_LUNS_CHANGED; +/* Unit attention, Device internal reset */ +extern const struct SCSISense sense_code_DEVICE_INTERNAL_RESET; +/* Data Protection, Write Protected */ +extern const struct SCSISense sense_code_WRITE_PROTECTED; +/* Data Protection, Space Allocation Failed Write Protect */ +extern const struct SCSISense sense_code_SPACE_ALLOC_FAILED; + +#define SENSE_CODE(x) sense_code_ ## x + +int scsi_sense_to_errno(int key, int asc, int ascq); +int scsi_sense_buf_to_errno(const uint8_t *sense, size_t sense_size); +bool scsi_sense_buf_is_guest_recoverable(const uint8_t *sense, size_t sense_size); + +int scsi_convert_sense(uint8_t *in_buf, int in_len, + uint8_t *buf, int len, bool fixed); +const char *scsi_command_name(uint8_t cmd); + +uint64_t scsi_cmd_lba(SCSICommand *cmd); +uint32_t scsi_data_cdb_xfer(uint8_t *buf); +uint32_t scsi_cdb_xfer(uint8_t *buf); +int scsi_cdb_length(uint8_t *buf); + +/* Linux SG_IO interface. */ +#ifdef CONFIG_LINUX +#define SG_ERR_DRIVER_TIMEOUT 0x06 +#define SG_ERR_DRIVER_SENSE 0x08 + +#define SG_ERR_DID_OK 0x00 +#define SG_ERR_DID_NO_CONNECT 0x01 +#define SG_ERR_DID_BUS_BUSY 0x02 +#define SG_ERR_DID_TIME_OUT 0x03 + +#define SG_ERR_DRIVER_SENSE 0x08 + +int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr, + SCSISense *sense); +#endif + +#endif diff --git a/include/standard-headers/asm-s390/virtio-ccw.h b/include/standard-headers/asm-s390/virtio-ccw.h new file mode 100644 index 000000000..2b605f7e8 --- /dev/null +++ b/include/standard-headers/asm-s390/virtio-ccw.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * Author(s): Cornelia Huck + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h new file mode 100644 index 000000000..072e2ed54 --- /dev/null +++ b/include/standard-headers/asm-x86/bootparam.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_BOOTPARAM_H +#define _ASM_X86_BOOTPARAM_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 + +#define SETUP_INDIRECT (1<<31) + +/* SETUP_INDIRECT | max(SETUP_*) */ +#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE) + +/* ram_size flags */ +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + +/* loadflags */ +#define LOADED_HIGH (1<<0) +#define KASLR_FLAG (1<<1) +#define QUIET_FLAG (1<<5) +#define KEEP_SEGMENTS (1<<6) +#define CAN_USE_HEAP (1<<7) + +/* xloadflags */ +#define XLF_KERNEL_64 (1<<0) +#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) +#define XLF_EFI_HANDOVER_32 (1<<2) +#define XLF_EFI_HANDOVER_64 (1<<3) +#define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) + + +#endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h new file mode 100644 index 000000000..07877d329 --- /dev/null +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_KVM_PARA_H +#define _ASM_X86_KVM_PARA_H + +#include "standard-headers/linux/types.h" + +/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It + * should be used to determine that a VM is running under KVM. + */ +#define KVM_CPUID_SIGNATURE 0x40000000 + +/* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should + * be checked in eax. The performance hint feature bit should be checked + * in edx. + */ +#define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 +#define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_OP 2 +/* This indicates that the new set of kvmclock msrs + * are available. The use of 0x11 and 0x12 is deprecated + */ +#define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5 +#define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_PV_UNHALT 7 +#define KVM_FEATURE_PV_TLB_FLUSH 9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 +#define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12 +#define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_ASYNC_PF_INT 14 + +#define KVM_HINTS_REALTIME 0 + +/* The last 8 bits are used to indicate how to interpret the flags field + * in pvclock structure. If no bits are set, all flags are ignored. + */ +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 + +#define KVM_MSR_ENABLED 1 +/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ +#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 +#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 +#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 +#define MSR_KVM_STEAL_TIME 0x4b564d03 +#define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06 +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 + +struct kvm_steal_time { + uint64_t steal; + uint32_t version; + uint32_t flags; + uint8_t preempted; + uint8_t uint8_t_pad[3]; + uint32_t pad[11]; +}; + +#define KVM_VCPU_PREEMPTED (1 << 0) +#define KVM_VCPU_FLUSH_TLB (1 << 1) + +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 +struct kvm_clock_pairing { + int64_t sec; + int64_t nsec; + uint64_t tsc; + uint32_t flags; + uint32_t pad[9]; +}; + +#define KVM_STEAL_ALIGNMENT_BITS 5 +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) + +#define KVM_MAX_MMU_OP_BATCH 32 + +#define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) +#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + +/* MSR_KVM_ASYNC_PF_INT */ +#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) + + +/* Operations for KVM_HC_MMU_OP */ +#define KVM_MMU_OP_WRITE_PTE 1 +#define KVM_MMU_OP_FLUSH_TLB 2 +#define KVM_MMU_OP_RELEASE_PT 3 + +/* Payload for KVM_HC_MMU_OP */ +struct kvm_mmu_op_header { + uint32_t op; + uint32_t pad; +}; + +struct kvm_mmu_op_write_pte { + struct kvm_mmu_op_header header; + uint64_t pte_phys; + uint64_t pte_val; +}; + +struct kvm_mmu_op_flush_tlb { + struct kvm_mmu_op_header header; +}; + +struct kvm_mmu_op_release_pt { + struct kvm_mmu_op_header header; + uint64_t pt_phys; +}; + +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + +struct kvm_vcpu_pv_apf_data { + /* Used for 'page not present' events delivered via #PF */ + uint32_t flags; + + /* Used for 'page ready' events delivered via interrupt notification */ + uint32_t token; + + uint8_t pad[56]; + uint32_t enabled; +}; + +#define KVM_PV_EOI_BIT 0 +#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT) +#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK +#define KVM_PV_EOI_DISABLED 0x0 + +#endif /* _ASM_X86_KVM_PARA_H */ diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h new file mode 100644 index 000000000..a5a1c8234 --- /dev/null +++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_DEV_API_H__ +#define __PVRDMA_DEV_API_H__ + +#include "standard-headers/linux/types.h" + +#include "pvrdma_verbs.h" + +/* + * PVRDMA version macros. Some new features require updates to PVRDMA_VERSION. + * These macros allow us to check for different features if necessary. + */ + +#define PVRDMA_ROCEV1_VERSION 17 +#define PVRDMA_ROCEV2_VERSION 18 +#define PVRDMA_PPN64_VERSION 19 +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION + +#define PVRDMA_BOARD_ID 1 +#define PVRDMA_REV_ID 1 + +/* + * Masks and accessors for page directory, which is a two-level lookup: + * page directory -> page table -> page. Only one directory for now, but we + * could expand that easily. 9 bits for tables, 9 bits for pages, gives one + * gigabyte for memory regions and so forth. + */ + +#define PVRDMA_PDIR_SHIFT 18 +#define PVRDMA_PTABLE_SHIFT 9 +#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1) +#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff) +#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff) +#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512) +#define PVRDMA_MAX_FAST_REG_PAGES 128 + +/* + * Max MSI-X vectors. + */ + +#define PVRDMA_MAX_INTERRUPTS 3 + +/* Register offsets within PCI resource on BAR1. */ +#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */ +#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */ +#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */ +#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */ +#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */ +#define PVRDMA_REG_ERR 0x14 /* R: Device error. */ +#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */ +#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */ +#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */ +#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */ + +/* Object flags. */ +#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */ +#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */ +#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */ +#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */ + +/* + * Atomic operation capability (masked versions are extended atomic + * operations. + */ + +#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */ +#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */ +#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */ +#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */ + +/* + * Base Memory Management Extension flags to support Fast Reg Memory Regions + * and Fast Reg Work Requests. Each flag represents a verb operation and we + * must support all of them to qualify for the BMME device cap. + */ + +#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */ +#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */ +#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */ + +/* + * GID types. The interpretation of the gid_types bit field in the device + * capabilities will depend on the device mode. For now, the device only + * supports RoCE as mode, so only the different GID types for RoCE are + * defined. + */ + +#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0) +#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1) + +/* + * Version checks. This checks whether each version supports specific + * capabilities from the device. + */ + +#define PVRDMA_IS_VERSION17(_dev) \ + (_dev->dsr_version == PVRDMA_ROCEV1_VERSION && \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) + +#define PVRDMA_IS_VERSION18(_dev) \ + (_dev->dsr_version >= PVRDMA_ROCEV2_VERSION && \ + (_dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1 || \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2)) \ + +#define PVRDMA_SUPPORTED(_dev) \ + ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) && \ + (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev))) + +/* + * Get capability values based on device version. + */ + +#define PVRDMA_GET_CAP(_dev, _old_val, _val) \ + ((PVRDMA_IS_VERSION18(_dev)) ? _val : _old_val) + +enum pvrdma_pci_resource { + PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ + PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ + PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */ + PVRDMA_PCI_RESOURCE_LAST, /* Last. */ +}; + +enum pvrdma_device_ctl { + PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */ + PVRDMA_DEVICE_CTL_UNQUIESCE, /* Unquiesce device. */ + PVRDMA_DEVICE_CTL_RESET, /* Reset device. */ +}; + +enum pvrdma_intr_vector { + PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */ + PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */ + PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */ + /* Additional CQ notification vectors. */ +}; + +enum pvrdma_intr_cause { + PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE), + PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC), + PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ), +}; + +enum pvrdma_gos_bits { + PVRDMA_GOS_BITS_UNK, /* Unknown. */ + PVRDMA_GOS_BITS_32, /* 32-bit. */ + PVRDMA_GOS_BITS_64, /* 64-bit. */ +}; + +enum pvrdma_gos_type { + PVRDMA_GOS_TYPE_UNK, /* Unknown. */ + PVRDMA_GOS_TYPE_LINUX, /* Linux. */ +}; + +enum pvrdma_device_mode { + PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */ + PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */ + PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */ +}; + +struct pvrdma_gos_info { + uint32_t gos_bits:2; /* W: PVRDMA_GOS_BITS_ */ + uint32_t gos_type:4; /* W: PVRDMA_GOS_TYPE_ */ + uint32_t gos_ver:16; /* W: Guest OS version. */ + uint32_t gos_misc:10; /* W: Other. */ + uint32_t pad; /* Pad to 8-byte alignment. */ +}; + +struct pvrdma_device_caps { + uint64_t fw_ver; /* R: Query device. */ + uint64_t node_guid; + uint64_t sys_image_guid; + uint64_t max_mr_size; + uint64_t page_size_cap; + uint64_t atomic_arg_sizes; /* EX verbs. */ + uint32_t ex_comp_mask; /* EX verbs. */ + uint32_t device_cap_flags2; /* EX verbs. */ + uint32_t max_fa_bit_boundary; /* EX verbs. */ + uint32_t log_max_atomic_inline_arg; /* EX verbs. */ + uint32_t vendor_id; + uint32_t vendor_part_id; + uint32_t hw_ver; + uint32_t max_qp; + uint32_t max_qp_wr; + uint32_t device_cap_flags; + uint32_t max_sge; + uint32_t max_sge_rd; + uint32_t max_cq; + uint32_t max_cqe; + uint32_t max_mr; + uint32_t max_pd; + uint32_t max_qp_rd_atom; + uint32_t max_ee_rd_atom; + uint32_t max_res_rd_atom; + uint32_t max_qp_init_rd_atom; + uint32_t max_ee_init_rd_atom; + uint32_t max_ee; + uint32_t max_rdd; + uint32_t max_mw; + uint32_t max_raw_ipv6_qp; + uint32_t max_raw_ethy_qp; + uint32_t max_mcast_grp; + uint32_t max_mcast_qp_attach; + uint32_t max_total_mcast_qp_attach; + uint32_t max_ah; + uint32_t max_fmr; + uint32_t max_map_per_fmr; + uint32_t max_srq; + uint32_t max_srq_wr; + uint32_t max_srq_sge; + uint32_t max_uar; + uint32_t gid_tbl_len; + uint16_t max_pkeys; + uint8_t local_ca_ack_delay; + uint8_t phys_port_cnt; + uint8_t mode; /* PVRDMA_DEVICE_MODE_ */ + uint8_t atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */ + uint8_t bmme_flags; /* FRWR Mem Mgmt Extensions */ + uint8_t gid_types; /* PVRDMA_GID_TYPE_FLAG_ */ + uint32_t max_fast_reg_page_list_len; +}; + +struct pvrdma_ring_page_info { + uint32_t num_pages; /* Num pages incl. header. */ + uint32_t reserved; /* Reserved. */ + uint64_t pdir_dma; /* Page directory PA. */ +}; + +#pragma pack(push, 1) + +struct pvrdma_device_shared_region { + uint32_t driver_version; /* W: Driver version. */ + uint32_t pad; /* Pad to 8-byte align. */ + struct pvrdma_gos_info gos_info; /* W: Guest OS information. */ + uint64_t cmd_slot_dma; /* W: Command slot address. */ + uint64_t resp_slot_dma; /* W: Response slot address. */ + struct pvrdma_ring_page_info async_ring_pages; + /* W: Async ring page info. */ + struct pvrdma_ring_page_info cq_ring_pages; + /* W: CQ ring page info. */ + union { + uint32_t uar_pfn; /* W: UAR pageframe. */ + uint64_t uar_pfn64; /* W: 64-bit UAR page frame. */ + }; + struct pvrdma_device_caps caps; /* R: Device capabilities. */ +}; + +#pragma pack(pop) + +/* Event types. Currently a 1:1 mapping with enum ib_event. */ +enum pvrdma_eqe_type { + PVRDMA_EVENT_CQ_ERR, + PVRDMA_EVENT_QP_FATAL, + PVRDMA_EVENT_QP_REQ_ERR, + PVRDMA_EVENT_QP_ACCESS_ERR, + PVRDMA_EVENT_COMM_EST, + PVRDMA_EVENT_SQ_DRAINED, + PVRDMA_EVENT_PATH_MIG, + PVRDMA_EVENT_PATH_MIG_ERR, + PVRDMA_EVENT_DEVICE_FATAL, + PVRDMA_EVENT_PORT_ACTIVE, + PVRDMA_EVENT_PORT_ERR, + PVRDMA_EVENT_LID_CHANGE, + PVRDMA_EVENT_PKEY_CHANGE, + PVRDMA_EVENT_SM_CHANGE, + PVRDMA_EVENT_SRQ_ERR, + PVRDMA_EVENT_SRQ_LIMIT_REACHED, + PVRDMA_EVENT_QP_LAST_WQE_REACHED, + PVRDMA_EVENT_CLIENT_REREGISTER, + PVRDMA_EVENT_GID_CHANGE, +}; + +/* Event queue element. */ +struct pvrdma_eqe { + uint32_t type; /* Event type. */ + uint32_t info; /* Handle, other. */ +}; + +/* CQ notification queue element. */ +struct pvrdma_cqne { + uint32_t info; /* Handle */ +}; + +enum { + PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PKEY, + PVRDMA_CMD_CREATE_PD, + PVRDMA_CMD_DESTROY_PD, + PVRDMA_CMD_CREATE_MR, + PVRDMA_CMD_DESTROY_MR, + PVRDMA_CMD_CREATE_CQ, + PVRDMA_CMD_RESIZE_CQ, + PVRDMA_CMD_DESTROY_CQ, + PVRDMA_CMD_CREATE_QP, + PVRDMA_CMD_MODIFY_QP, + PVRDMA_CMD_QUERY_QP, + PVRDMA_CMD_DESTROY_QP, + PVRDMA_CMD_CREATE_UC, + PVRDMA_CMD_DESTROY_UC, + PVRDMA_CMD_CREATE_BIND, + PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_CREATE_SRQ, + PVRDMA_CMD_MODIFY_SRQ, + PVRDMA_CMD_QUERY_SRQ, + PVRDMA_CMD_DESTROY_SRQ, + PVRDMA_CMD_MAX, +}; + +enum { + PVRDMA_CMD_FIRST_RESP = (1 << 31), + PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP, + PVRDMA_CMD_QUERY_PKEY_RESP, + PVRDMA_CMD_CREATE_PD_RESP, + PVRDMA_CMD_DESTROY_PD_RESP_NOOP, + PVRDMA_CMD_CREATE_MR_RESP, + PVRDMA_CMD_DESTROY_MR_RESP_NOOP, + PVRDMA_CMD_CREATE_CQ_RESP, + PVRDMA_CMD_RESIZE_CQ_RESP, + PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, + PVRDMA_CMD_CREATE_QP_RESP, + PVRDMA_CMD_MODIFY_QP_RESP, + PVRDMA_CMD_QUERY_QP_RESP, + PVRDMA_CMD_DESTROY_QP_RESP, + PVRDMA_CMD_CREATE_UC_RESP, + PVRDMA_CMD_DESTROY_UC_RESP_NOOP, + PVRDMA_CMD_CREATE_BIND_RESP_NOOP, + PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_CREATE_SRQ_RESP, + PVRDMA_CMD_MODIFY_SRQ_RESP, + PVRDMA_CMD_QUERY_SRQ_RESP, + PVRDMA_CMD_DESTROY_SRQ_RESP, + PVRDMA_CMD_MAX_RESP, +}; + +struct pvrdma_cmd_hdr { + uint64_t response; /* Key for response lookup. */ + uint32_t cmd; /* PVRDMA_CMD_ */ + uint32_t reserved; /* Reserved. */ +}; + +struct pvrdma_cmd_resp_hdr { + uint64_t response; /* From cmd hdr. */ + uint32_t ack; /* PVRDMA_CMD_XXX_RESP */ + uint8_t err; /* Error. */ + uint8_t reserved[3]; /* Reserved. */ +}; + +struct pvrdma_cmd_query_port { + struct pvrdma_cmd_hdr hdr; + uint8_t port_num; + uint8_t reserved[7]; +}; + +struct pvrdma_cmd_query_port_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_port_attr attrs; +}; + +struct pvrdma_cmd_query_pkey { + struct pvrdma_cmd_hdr hdr; + uint8_t port_num; + uint8_t index; + uint8_t reserved[6]; +}; + +struct pvrdma_cmd_query_pkey_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint16_t pkey; + uint8_t reserved[6]; +}; + +struct pvrdma_cmd_create_uc { + struct pvrdma_cmd_hdr hdr; + union { + uint32_t pfn; /* UAR page frame number */ + uint64_t pfn64; /* 64-bit UAR page frame number */ + }; +}; + +struct pvrdma_cmd_create_uc_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t ctx_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_destroy_uc { + struct pvrdma_cmd_hdr hdr; + uint32_t ctx_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_pd { + struct pvrdma_cmd_hdr hdr; + uint32_t ctx_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_pd_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t pd_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_destroy_pd { + struct pvrdma_cmd_hdr hdr; + uint32_t pd_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_mr { + struct pvrdma_cmd_hdr hdr; + uint64_t start; + uint64_t length; + uint64_t pdir_dma; + uint32_t pd_handle; + uint32_t access_flags; + uint32_t flags; + uint32_t nchunks; +}; + +struct pvrdma_cmd_create_mr_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t mr_handle; + uint32_t lkey; + uint32_t rkey; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_destroy_mr { + struct pvrdma_cmd_hdr hdr; + uint32_t mr_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_cq { + struct pvrdma_cmd_hdr hdr; + uint64_t pdir_dma; + uint32_t ctx_handle; + uint32_t cqe; + uint32_t nchunks; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t cq_handle; + uint32_t cqe; +}; + +struct pvrdma_cmd_resize_cq { + struct pvrdma_cmd_hdr hdr; + uint32_t cq_handle; + uint32_t cqe; +}; + +struct pvrdma_cmd_resize_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t cqe; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_destroy_cq { + struct pvrdma_cmd_hdr hdr; + uint32_t cq_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_srq { + struct pvrdma_cmd_hdr hdr; + uint64_t pdir_dma; + uint32_t pd_handle; + uint32_t nchunks; + struct pvrdma_srq_attr attrs; + uint8_t srq_type; + uint8_t reserved[7]; +}; + +struct pvrdma_cmd_create_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t srqn; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_modify_srq { + struct pvrdma_cmd_hdr hdr; + uint32_t srq_handle; + uint32_t attr_mask; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_query_srq { + struct pvrdma_cmd_hdr hdr; + uint32_t srq_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_query_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_destroy_srq { + struct pvrdma_cmd_hdr hdr; + uint32_t srq_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_qp { + struct pvrdma_cmd_hdr hdr; + uint64_t pdir_dma; + uint32_t pd_handle; + uint32_t send_cq_handle; + uint32_t recv_cq_handle; + uint32_t srq_handle; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + uint32_t lkey; + uint32_t access_flags; + uint16_t total_chunks; + uint16_t send_chunks; + uint16_t max_atomic_arg; + uint8_t sq_sig_all; + uint8_t qp_type; + uint8_t is_srq; + uint8_t reserved[3]; +}; + +struct pvrdma_cmd_create_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t qpn; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t qpn; + uint32_t qp_handle; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct pvrdma_cmd_modify_qp { + struct pvrdma_cmd_hdr hdr; + uint32_t qp_handle; + uint32_t attr_mask; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_query_qp { + struct pvrdma_cmd_hdr hdr; + uint32_t qp_handle; + uint32_t attr_mask; +}; + +struct pvrdma_cmd_query_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_destroy_qp { + struct pvrdma_cmd_hdr hdr; + uint32_t qp_handle; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_destroy_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + uint32_t events_reported; + uint8_t reserved[4]; +}; + +struct pvrdma_cmd_create_bind { + struct pvrdma_cmd_hdr hdr; + uint32_t mtu; + uint32_t vlan; + uint32_t index; + uint8_t new_gid[16]; + uint8_t gid_type; + uint8_t reserved[3]; +}; + +struct pvrdma_cmd_destroy_bind { + struct pvrdma_cmd_hdr hdr; + uint32_t index; + uint8_t dest_gid[16]; + uint8_t reserved[4]; +}; + +union pvrdma_cmd_req { + struct pvrdma_cmd_hdr hdr; + struct pvrdma_cmd_query_port query_port; + struct pvrdma_cmd_query_pkey query_pkey; + struct pvrdma_cmd_create_uc create_uc; + struct pvrdma_cmd_destroy_uc destroy_uc; + struct pvrdma_cmd_create_pd create_pd; + struct pvrdma_cmd_destroy_pd destroy_pd; + struct pvrdma_cmd_create_mr create_mr; + struct pvrdma_cmd_destroy_mr destroy_mr; + struct pvrdma_cmd_create_cq create_cq; + struct pvrdma_cmd_resize_cq resize_cq; + struct pvrdma_cmd_destroy_cq destroy_cq; + struct pvrdma_cmd_create_qp create_qp; + struct pvrdma_cmd_modify_qp modify_qp; + struct pvrdma_cmd_query_qp query_qp; + struct pvrdma_cmd_destroy_qp destroy_qp; + struct pvrdma_cmd_create_bind create_bind; + struct pvrdma_cmd_destroy_bind destroy_bind; + struct pvrdma_cmd_create_srq create_srq; + struct pvrdma_cmd_modify_srq modify_srq; + struct pvrdma_cmd_query_srq query_srq; + struct pvrdma_cmd_destroy_srq destroy_srq; +}; + +union pvrdma_cmd_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_cmd_query_port_resp query_port_resp; + struct pvrdma_cmd_query_pkey_resp query_pkey_resp; + struct pvrdma_cmd_create_uc_resp create_uc_resp; + struct pvrdma_cmd_create_pd_resp create_pd_resp; + struct pvrdma_cmd_create_mr_resp create_mr_resp; + struct pvrdma_cmd_create_cq_resp create_cq_resp; + struct pvrdma_cmd_resize_cq_resp resize_cq_resp; + struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; + struct pvrdma_cmd_query_qp_resp query_qp_resp; + struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; + struct pvrdma_cmd_create_srq_resp create_srq_resp; + struct pvrdma_cmd_query_srq_resp query_srq_resp; +}; + +#endif /* __PVRDMA_DEV_API_H__ */ diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h new file mode 100644 index 000000000..7b4062a1a --- /dev/null +++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_RING_H__ +#define __PVRDMA_RING_H__ + +#include "standard-headers/linux/types.h" + +#define PVRDMA_INVALID_IDX -1 /* Invalid index. */ + +struct pvrdma_ring { + int prod_tail; /* Producer tail. */ + int cons_head; /* Consumer head. */ +}; + +struct pvrdma_ring_state { + struct pvrdma_ring tx; /* Tx ring. */ + struct pvrdma_ring rx; /* Rx ring. */ +}; + +static inline int pvrdma_idx_valid(uint32_t idx, uint32_t max_elems) +{ + /* Generates fewer instructions than a less-than. */ + return (idx & ~((max_elems << 1) - 1)) == 0; +} + +static inline int32_t pvrdma_idx(int *var, uint32_t max_elems) +{ + const unsigned int idx = qatomic_read(var); + + if (pvrdma_idx_valid(idx, max_elems)) + return idx & (max_elems - 1); + return PVRDMA_INVALID_IDX; +} + +static inline void pvrdma_idx_ring_inc(int *var, uint32_t max_elems) +{ + uint32_t idx = qatomic_read(var) + 1; /* Increment. */ + + idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */ + qatomic_set(var, idx); +} + +static inline int32_t pvrdma_idx_ring_has_space(const struct pvrdma_ring *r, + uint32_t max_elems, uint32_t *out_tail) +{ + const uint32_t tail = qatomic_read(&r->prod_tail); + const uint32_t head = qatomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_tail = tail & (max_elems - 1); + return tail != (head ^ max_elems); + } + return PVRDMA_INVALID_IDX; +} + +static inline int32_t pvrdma_idx_ring_has_data(const struct pvrdma_ring *r, + uint32_t max_elems, uint32_t *out_head) +{ + const uint32_t tail = qatomic_read(&r->prod_tail); + const uint32_t head = qatomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_head = head & (max_elems - 1); + return tail != head; + } + return PVRDMA_INVALID_IDX; +} + +#endif /* __PVRDMA_RING_H__ */ diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h new file mode 100644 index 000000000..1677208a4 --- /dev/null +++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_VERBS_H__ +#define __PVRDMA_VERBS_H__ + +#include "standard-headers/linux/types.h" + +union pvrdma_gid { + uint8_t raw[16]; + struct { + uint64_t subnet_prefix; + uint64_t interface_id; + } global; +}; + +enum pvrdma_link_layer { + PVRDMA_LINK_LAYER_UNSPECIFIED, + PVRDMA_LINK_LAYER_INFINIBAND, + PVRDMA_LINK_LAYER_ETHERNET, +}; + +enum pvrdma_mtu { + PVRDMA_MTU_256 = 1, + PVRDMA_MTU_512 = 2, + PVRDMA_MTU_1024 = 3, + PVRDMA_MTU_2048 = 4, + PVRDMA_MTU_4096 = 5, +}; + +static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) +{ + switch (mtu) { + case PVRDMA_MTU_256: return 256; + case PVRDMA_MTU_512: return 512; + case PVRDMA_MTU_1024: return 1024; + case PVRDMA_MTU_2048: return 2048; + case PVRDMA_MTU_4096: return 4096; + default: return -1; + } +} + +static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) +{ + switch (mtu) { + case 256: return PVRDMA_MTU_256; + case 512: return PVRDMA_MTU_512; + case 1024: return PVRDMA_MTU_1024; + case 2048: return PVRDMA_MTU_2048; + case 4096: + default: return PVRDMA_MTU_4096; + } +} + +enum pvrdma_port_state { + PVRDMA_PORT_NOP = 0, + PVRDMA_PORT_DOWN = 1, + PVRDMA_PORT_INIT = 2, + PVRDMA_PORT_ARMED = 3, + PVRDMA_PORT_ACTIVE = 4, + PVRDMA_PORT_ACTIVE_DEFER = 5, +}; + +enum pvrdma_port_cap_flags { + PVRDMA_PORT_SM = 1 << 1, + PVRDMA_PORT_NOTICE_SUP = 1 << 2, + PVRDMA_PORT_TRAP_SUP = 1 << 3, + PVRDMA_PORT_OPT_IPD_SUP = 1 << 4, + PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5, + PVRDMA_PORT_SL_MAP_SUP = 1 << 6, + PVRDMA_PORT_MKEY_NVRAM = 1 << 7, + PVRDMA_PORT_PKEY_NVRAM = 1 << 8, + PVRDMA_PORT_LED_INFO_SUP = 1 << 9, + PVRDMA_PORT_SM_DISABLED = 1 << 10, + PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + PVRDMA_PORT_CM_SUP = 1 << 16, + PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17, + PVRDMA_PORT_REINIT_SUP = 1 << 18, + PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19, + PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20, + PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21, + PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23, + PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24, + PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25, + PVRDMA_PORT_IP_BASED_GIDS = 1 << 26, + PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS, +}; + +enum pvrdma_port_width { + PVRDMA_WIDTH_1X = 1, + PVRDMA_WIDTH_4X = 2, + PVRDMA_WIDTH_8X = 4, + PVRDMA_WIDTH_12X = 8, +}; + +static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) +{ + switch (width) { + case PVRDMA_WIDTH_1X: return 1; + case PVRDMA_WIDTH_4X: return 4; + case PVRDMA_WIDTH_8X: return 8; + case PVRDMA_WIDTH_12X: return 12; + default: return -1; + } +} + +enum pvrdma_port_speed { + PVRDMA_SPEED_SDR = 1, + PVRDMA_SPEED_DDR = 2, + PVRDMA_SPEED_QDR = 4, + PVRDMA_SPEED_FDR10 = 8, + PVRDMA_SPEED_FDR = 16, + PVRDMA_SPEED_EDR = 32, +}; + +struct pvrdma_port_attr { + enum pvrdma_port_state state; + enum pvrdma_mtu max_mtu; + enum pvrdma_mtu active_mtu; + uint32_t gid_tbl_len; + uint32_t port_cap_flags; + uint32_t max_msg_sz; + uint32_t bad_pkey_cntr; + uint32_t qkey_viol_cntr; + uint16_t pkey_tbl_len; + uint16_t lid; + uint16_t sm_lid; + uint8_t lmc; + uint8_t max_vl_num; + uint8_t sm_sl; + uint8_t subnet_timeout; + uint8_t init_type_reply; + uint8_t active_width; + uint8_t active_speed; + uint8_t phys_state; + uint8_t reserved[2]; +}; + +struct pvrdma_global_route { + union pvrdma_gid dgid; + uint32_t flow_label; + uint8_t sgid_index; + uint8_t hop_limit; + uint8_t traffic_class; + uint8_t reserved; +}; + +struct pvrdma_grh { + uint32_t version_tclass_flow; + uint16_t paylen; + uint8_t next_hdr; + uint8_t hop_limit; + union pvrdma_gid sgid; + union pvrdma_gid dgid; +}; + +enum pvrdma_ah_flags { + PVRDMA_AH_GRH = 1, +}; + +enum pvrdma_rate { + PVRDMA_RATE_PORT_CURRENT = 0, + PVRDMA_RATE_2_5_GBPS = 2, + PVRDMA_RATE_5_GBPS = 5, + PVRDMA_RATE_10_GBPS = 3, + PVRDMA_RATE_20_GBPS = 6, + PVRDMA_RATE_30_GBPS = 4, + PVRDMA_RATE_40_GBPS = 7, + PVRDMA_RATE_60_GBPS = 8, + PVRDMA_RATE_80_GBPS = 9, + PVRDMA_RATE_120_GBPS = 10, + PVRDMA_RATE_14_GBPS = 11, + PVRDMA_RATE_56_GBPS = 12, + PVRDMA_RATE_112_GBPS = 13, + PVRDMA_RATE_168_GBPS = 14, + PVRDMA_RATE_25_GBPS = 15, + PVRDMA_RATE_100_GBPS = 16, + PVRDMA_RATE_200_GBPS = 17, + PVRDMA_RATE_300_GBPS = 18, +}; + +struct pvrdma_ah_attr { + struct pvrdma_global_route grh; + uint16_t dlid; + uint16_t vlan_id; + uint8_t sl; + uint8_t src_path_bits; + uint8_t static_rate; + uint8_t ah_flags; + uint8_t port_num; + uint8_t dmac[6]; + uint8_t reserved; +}; + +enum pvrdma_cq_notify_flags { + PVRDMA_CQ_SOLICITED = 1 << 0, + PVRDMA_CQ_NEXT_COMP = 1 << 1, + PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED | + PVRDMA_CQ_NEXT_COMP, + PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2, +}; + +struct pvrdma_qp_cap { + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + uint32_t reserved; +}; + +enum pvrdma_sig_type { + PVRDMA_SIGNAL_ALL_WR, + PVRDMA_SIGNAL_REQ_WR, +}; + +enum pvrdma_qp_type { + PVRDMA_QPT_SMI, + PVRDMA_QPT_GSI, + PVRDMA_QPT_RC, + PVRDMA_QPT_UC, + PVRDMA_QPT_UD, + PVRDMA_QPT_RAW_IPV6, + PVRDMA_QPT_RAW_ETHERTYPE, + PVRDMA_QPT_RAW_PACKET = 8, + PVRDMA_QPT_XRC_INI = 9, + PVRDMA_QPT_XRC_TGT, + PVRDMA_QPT_MAX, +}; + +enum pvrdma_qp_create_flags { + PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0, + PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, +}; + +enum pvrdma_qp_attr_mask { + PVRDMA_QP_STATE = 1 << 0, + PVRDMA_QP_CUR_STATE = 1 << 1, + PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + PVRDMA_QP_ACCESS_FLAGS = 1 << 3, + PVRDMA_QP_PKEY_INDEX = 1 << 4, + PVRDMA_QP_PORT = 1 << 5, + PVRDMA_QP_QKEY = 1 << 6, + PVRDMA_QP_AV = 1 << 7, + PVRDMA_QP_PATH_MTU = 1 << 8, + PVRDMA_QP_TIMEOUT = 1 << 9, + PVRDMA_QP_RETRY_CNT = 1 << 10, + PVRDMA_QP_RNR_RETRY = 1 << 11, + PVRDMA_QP_RQ_PSN = 1 << 12, + PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13, + PVRDMA_QP_ALT_PATH = 1 << 14, + PVRDMA_QP_MIN_RNR_TIMER = 1 << 15, + PVRDMA_QP_SQ_PSN = 1 << 16, + PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + PVRDMA_QP_PATH_MIG_STATE = 1 << 18, + PVRDMA_QP_CAP = 1 << 19, + PVRDMA_QP_DEST_QPN = 1 << 20, + PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN, +}; + +enum pvrdma_qp_state { + PVRDMA_QPS_RESET, + PVRDMA_QPS_INIT, + PVRDMA_QPS_RTR, + PVRDMA_QPS_RTS, + PVRDMA_QPS_SQD, + PVRDMA_QPS_SQE, + PVRDMA_QPS_ERR, +}; + +enum pvrdma_mig_state { + PVRDMA_MIG_MIGRATED, + PVRDMA_MIG_REARM, + PVRDMA_MIG_ARMED, +}; + +enum pvrdma_mw_type { + PVRDMA_MW_TYPE_1 = 1, + PVRDMA_MW_TYPE_2 = 2, +}; + +struct pvrdma_srq_attr { + uint32_t max_wr; + uint32_t max_sge; + uint32_t srq_limit; + uint32_t reserved; +}; + +struct pvrdma_qp_attr { + enum pvrdma_qp_state qp_state; + enum pvrdma_qp_state cur_qp_state; + enum pvrdma_mtu path_mtu; + enum pvrdma_mig_state path_mig_state; + uint32_t qkey; + uint32_t rq_psn; + uint32_t sq_psn; + uint32_t dest_qp_num; + uint32_t qp_access_flags; + uint16_t pkey_index; + uint16_t alt_pkey_index; + uint8_t en_sqd_async_notify; + uint8_t sq_draining; + uint8_t max_rd_atomic; + uint8_t max_dest_rd_atomic; + uint8_t min_rnr_timer; + uint8_t port_num; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t rnr_retry; + uint8_t alt_port_num; + uint8_t alt_timeout; + uint8_t reserved[5]; + struct pvrdma_qp_cap cap; + struct pvrdma_ah_attr ah_attr; + struct pvrdma_ah_attr alt_ah_attr; +}; + +enum pvrdma_send_flags { + PVRDMA_SEND_FENCE = 1 << 0, + PVRDMA_SEND_SIGNALED = 1 << 1, + PVRDMA_SEND_SOLICITED = 1 << 2, + PVRDMA_SEND_INLINE = 1 << 3, + PVRDMA_SEND_IP_CSUM = 1 << 4, + PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM, +}; + +enum pvrdma_access_flags { + PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0, + PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1, + PVRDMA_ACCESS_REMOTE_READ = 1 << 2, + PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3, + PVRDMA_ACCESS_MW_BIND = 1 << 4, + PVRDMA_ZERO_BASED = 1 << 5, + PVRDMA_ACCESS_ON_DEMAND = 1 << 6, + PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND, +}; + +#endif /* __PVRDMA_VERBS_H__ */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h new file mode 100644 index 000000000..c47e19810 --- /dev/null +++ b/include/standard-headers/drm/drm_fourcc.h @@ -0,0 +1,1235 @@ +/* + * Copyright 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DRM_FOURCC_H +#define DRM_FOURCC_H + + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: overview + * + * In the DRM subsystem, framebuffer pixel formats are described using the + * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the + * fourcc code, a Format Modifier may optionally be provided, in order to + * further describe the buffer's format - for example tiling or compression. + * + * Format Modifiers + * ---------------- + * + * Format modifiers are used in conjunction with a fourcc code, forming a + * unique fourcc:modifier pair. This format:modifier pair must fully define the + * format and data layout of the buffer, and should be the only way to describe + * that particular buffer. + * + * Having multiple fourcc:modifier pairs which describe the same layout should + * be avoided, as such aliases run the risk of different drivers exposing + * different names for the same data format, forcing userspace to understand + * that they are aliases. + * + * Format modifiers may change any property of the buffer, including the number + * of planes and/or the required allocation size. Format modifiers are + * vendor-namespaced, and as such the relationship between a fourcc code and a + * modifier is specific to the modifer being used. For example, some modifiers + * may preserve meaning - such as number of planes - from the fourcc code, + * whereas others may not. + * + * Modifiers must uniquely encode buffer layout. In other words, a buffer must + * match only a single modifier. A modifier must not be a subset of layouts of + * another modifier. For instance, it's incorrect to encode pitch alignment in + * a modifier: a buffer may match a 64-pixel aligned modifier and a 32-pixel + * aligned modifier. That said, modifiers can have implicit minimal + * requirements. + * + * For modifiers where the combination of fourcc code and modifier can alias, + * a canonical pair needs to be defined and used by all drivers. Preferred + * combinations are also encouraged where all combinations might lead to + * confusion and unnecessarily reduced interoperability. An example for the + * latter is AFBC, where the ABGR layouts are preferred over ARGB layouts. + * + * There are two kinds of modifier users: + * + * - Kernel and user-space drivers: for drivers it's important that modifiers + * don't alias, otherwise two drivers might support the same format but use + * different aliases, preventing them from sharing buffers in an efficient + * format. + * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users + * see modifiers as opaque tokens they can check for equality and intersect. + * These users musn't need to know to reason about the modifier value + * (i.e. they are not expected to extract information out of the modifier). + * + * Vendors should document their modifier usage in as much detail as + * possible, to ensure maximum compatibility across devices, drivers and + * applications. + * + * The authoritative list of format modifier codes is found in + * `include/uapi/drm/drm_fourcc.h` + */ + +#define fourcc_code(a, b, c, d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \ + ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) + +#define DRM_FORMAT_BIG_ENDIAN (1U<<31) /* format is big endian instead of little endian */ + +/* Reserve 0 for the invalid format specifier */ +#define DRM_FORMAT_INVALID 0 + +/* color index */ +#define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ + +/* 8 bpp Red */ +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +/* 16 bpp Red */ +#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + +/* 16 bpp RG */ +#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ + +/* 32 bpp RG */ +#define DRM_FORMAT_RG1616 fourcc_code('R', 'G', '3', '2') /* [31:0] R:G 16:16 little endian */ +#define DRM_FORMAT_GR1616 fourcc_code('G', 'R', '3', '2') /* [31:0] G:R 16:16 little endian */ + +/* 8 bpp RGB */ +#define DRM_FORMAT_RGB332 fourcc_code('R', 'G', 'B', '8') /* [7:0] R:G:B 3:3:2 */ +#define DRM_FORMAT_BGR233 fourcc_code('B', 'G', 'R', '8') /* [7:0] B:G:R 2:3:3 */ + +/* 16 bpp RGB */ +#define DRM_FORMAT_XRGB4444 fourcc_code('X', 'R', '1', '2') /* [15:0] x:R:G:B 4:4:4:4 little endian */ +#define DRM_FORMAT_XBGR4444 fourcc_code('X', 'B', '1', '2') /* [15:0] x:B:G:R 4:4:4:4 little endian */ +#define DRM_FORMAT_RGBX4444 fourcc_code('R', 'X', '1', '2') /* [15:0] R:G:B:x 4:4:4:4 little endian */ +#define DRM_FORMAT_BGRX4444 fourcc_code('B', 'X', '1', '2') /* [15:0] B:G:R:x 4:4:4:4 little endian */ + +#define DRM_FORMAT_ARGB4444 fourcc_code('A', 'R', '1', '2') /* [15:0] A:R:G:B 4:4:4:4 little endian */ +#define DRM_FORMAT_ABGR4444 fourcc_code('A', 'B', '1', '2') /* [15:0] A:B:G:R 4:4:4:4 little endian */ +#define DRM_FORMAT_RGBA4444 fourcc_code('R', 'A', '1', '2') /* [15:0] R:G:B:A 4:4:4:4 little endian */ +#define DRM_FORMAT_BGRA4444 fourcc_code('B', 'A', '1', '2') /* [15:0] B:G:R:A 4:4:4:4 little endian */ + +#define DRM_FORMAT_XRGB1555 fourcc_code('X', 'R', '1', '5') /* [15:0] x:R:G:B 1:5:5:5 little endian */ +#define DRM_FORMAT_XBGR1555 fourcc_code('X', 'B', '1', '5') /* [15:0] x:B:G:R 1:5:5:5 little endian */ +#define DRM_FORMAT_RGBX5551 fourcc_code('R', 'X', '1', '5') /* [15:0] R:G:B:x 5:5:5:1 little endian */ +#define DRM_FORMAT_BGRX5551 fourcc_code('B', 'X', '1', '5') /* [15:0] B:G:R:x 5:5:5:1 little endian */ + +#define DRM_FORMAT_ARGB1555 fourcc_code('A', 'R', '1', '5') /* [15:0] A:R:G:B 1:5:5:5 little endian */ +#define DRM_FORMAT_ABGR1555 fourcc_code('A', 'B', '1', '5') /* [15:0] A:B:G:R 1:5:5:5 little endian */ +#define DRM_FORMAT_RGBA5551 fourcc_code('R', 'A', '1', '5') /* [15:0] R:G:B:A 5:5:5:1 little endian */ +#define DRM_FORMAT_BGRA5551 fourcc_code('B', 'A', '1', '5') /* [15:0] B:G:R:A 5:5:5:1 little endian */ + +#define DRM_FORMAT_RGB565 fourcc_code('R', 'G', '1', '6') /* [15:0] R:G:B 5:6:5 little endian */ +#define DRM_FORMAT_BGR565 fourcc_code('B', 'G', '1', '6') /* [15:0] B:G:R 5:6:5 little endian */ + +/* 24 bpp RGB */ +#define DRM_FORMAT_RGB888 fourcc_code('R', 'G', '2', '4') /* [23:0] R:G:B little endian */ +#define DRM_FORMAT_BGR888 fourcc_code('B', 'G', '2', '4') /* [23:0] B:G:R little endian */ + +/* 32 bpp RGB */ +#define DRM_FORMAT_XRGB8888 fourcc_code('X', 'R', '2', '4') /* [31:0] x:R:G:B 8:8:8:8 little endian */ +#define DRM_FORMAT_XBGR8888 fourcc_code('X', 'B', '2', '4') /* [31:0] x:B:G:R 8:8:8:8 little endian */ +#define DRM_FORMAT_RGBX8888 fourcc_code('R', 'X', '2', '4') /* [31:0] R:G:B:x 8:8:8:8 little endian */ +#define DRM_FORMAT_BGRX8888 fourcc_code('B', 'X', '2', '4') /* [31:0] B:G:R:x 8:8:8:8 little endian */ + +#define DRM_FORMAT_ARGB8888 fourcc_code('A', 'R', '2', '4') /* [31:0] A:R:G:B 8:8:8:8 little endian */ +#define DRM_FORMAT_ABGR8888 fourcc_code('A', 'B', '2', '4') /* [31:0] A:B:G:R 8:8:8:8 little endian */ +#define DRM_FORMAT_RGBA8888 fourcc_code('R', 'A', '2', '4') /* [31:0] R:G:B:A 8:8:8:8 little endian */ +#define DRM_FORMAT_BGRA8888 fourcc_code('B', 'A', '2', '4') /* [31:0] B:G:R:A 8:8:8:8 little endian */ + +#define DRM_FORMAT_XRGB2101010 fourcc_code('X', 'R', '3', '0') /* [31:0] x:R:G:B 2:10:10:10 little endian */ +#define DRM_FORMAT_XBGR2101010 fourcc_code('X', 'B', '3', '0') /* [31:0] x:B:G:R 2:10:10:10 little endian */ +#define DRM_FORMAT_RGBX1010102 fourcc_code('R', 'X', '3', '0') /* [31:0] R:G:B:x 10:10:10:2 little endian */ +#define DRM_FORMAT_BGRX1010102 fourcc_code('B', 'X', '3', '0') /* [31:0] B:G:R:x 10:10:10:2 little endian */ + +#define DRM_FORMAT_ARGB2101010 fourcc_code('A', 'R', '3', '0') /* [31:0] A:R:G:B 2:10:10:10 little endian */ +#define DRM_FORMAT_ABGR2101010 fourcc_code('A', 'B', '3', '0') /* [31:0] A:B:G:R 2:10:10:10 little endian */ +#define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ +#define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ + +/* + * Floating point 64bpp RGB + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +#define DRM_FORMAT_XRGB16161616F fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616F fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + +/* + * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits + * of unused padding per component: + */ +#define DRM_FORMAT_AXBXGXRX106106106106 fourcc_code('A', 'B', '1', '0') /* [63:0] A:x:B:x:G:x:R:x 10:6:10:6:10:6:10:6 little endian */ + +/* packed YCbCr */ +#define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ +#define DRM_FORMAT_YVYU fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */ +#define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */ +#define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ + +#define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ +#define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ + +/* + * packed Y2xx indicate for each component, xx valid data occupy msb + * 16-xx padding occupy lsb + */ +#define DRM_FORMAT_Y210 fourcc_code('Y', '2', '1', '0') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 10:6:10:6:10:6:10:6 little endian per 2 Y pixels */ +#define DRM_FORMAT_Y212 fourcc_code('Y', '2', '1', '2') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 12:4:12:4:12:4:12:4 little endian per 2 Y pixels */ +#define DRM_FORMAT_Y216 fourcc_code('Y', '2', '1', '6') /* [63:0] Cr0:Y1:Cb0:Y0 16:16:16:16 little endian per 2 Y pixels */ + +/* + * packed Y4xx indicate for each component, xx valid data occupy msb + * 16-xx padding occupy lsb except Y410 + */ +#define DRM_FORMAT_Y410 fourcc_code('Y', '4', '1', '0') /* [31:0] A:Cr:Y:Cb 2:10:10:10 little endian */ +#define DRM_FORMAT_Y412 fourcc_code('Y', '4', '1', '2') /* [63:0] A:0:Cr:0:Y:0:Cb:0 12:4:12:4:12:4:12:4 little endian */ +#define DRM_FORMAT_Y416 fourcc_code('Y', '4', '1', '6') /* [63:0] A:Cr:Y:Cb 16:16:16:16 little endian */ + +#define DRM_FORMAT_XVYU2101010 fourcc_code('X', 'V', '3', '0') /* [31:0] X:Cr:Y:Cb 2:10:10:10 little endian */ +#define DRM_FORMAT_XVYU12_16161616 fourcc_code('X', 'V', '3', '6') /* [63:0] X:0:Cr:0:Y:0:Cb:0 12:4:12:4:12:4:12:4 little endian */ +#define DRM_FORMAT_XVYU16161616 fourcc_code('X', 'V', '4', '8') /* [63:0] X:Cr:Y:Cb 16:16:16:16 little endian */ + +/* + * packed YCbCr420 2x2 tiled formats + * first 64 bits will contain Y,Cb,Cr components for a 2x2 tile + */ +/* [63:0] A3:A2:Y3:0:Cr0:0:Y2:0:A1:A0:Y1:0:Cb0:0:Y0:0 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_Y0L0 fourcc_code('Y', '0', 'L', '0') +/* [63:0] X3:X2:Y3:0:Cr0:0:Y2:0:X1:X0:Y1:0:Cb0:0:Y0:0 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_X0L0 fourcc_code('X', '0', 'L', '0') + +/* [63:0] A3:A2:Y3:Cr0:Y2:A1:A0:Y1:Cb0:Y0 1:1:10:10:10:1:1:10:10:10 little endian */ +#define DRM_FORMAT_Y0L2 fourcc_code('Y', '0', 'L', '2') +/* [63:0] X3:X2:Y3:Cr0:Y2:X1:X0:Y1:Cb0:Y0 1:1:10:10:10:1:1:10:10:10 little endian */ +#define DRM_FORMAT_X0L2 fourcc_code('X', '0', 'L', '2') + +/* + * 1-plane YUV 4:2:0 + * In these formats, the component ordering is specified (Y, followed by U + * then V), but the exact Linear layout is undefined. + * These formats can only be used with a non-Linear modifier. + */ +#define DRM_FORMAT_YUV420_8BIT fourcc_code('Y', 'U', '0', '8') +#define DRM_FORMAT_YUV420_10BIT fourcc_code('Y', 'U', '1', '0') + +/* + * 2 plane RGB + A + * index 0 = RGB plane, same format as the corresponding non _A8 format has + * index 1 = A plane, [7:0] A + */ +#define DRM_FORMAT_XRGB8888_A8 fourcc_code('X', 'R', 'A', '8') +#define DRM_FORMAT_XBGR8888_A8 fourcc_code('X', 'B', 'A', '8') +#define DRM_FORMAT_RGBX8888_A8 fourcc_code('R', 'X', 'A', '8') +#define DRM_FORMAT_BGRX8888_A8 fourcc_code('B', 'X', 'A', '8') +#define DRM_FORMAT_RGB888_A8 fourcc_code('R', '8', 'A', '8') +#define DRM_FORMAT_BGR888_A8 fourcc_code('B', '8', 'A', '8') +#define DRM_FORMAT_RGB565_A8 fourcc_code('R', '5', 'A', '8') +#define DRM_FORMAT_BGR565_A8 fourcc_code('B', '5', 'A', '8') + +/* + * 2 plane YCbCr + * index 0 = Y plane, [7:0] Y + * index 1 = Cr:Cb plane, [15:0] Cr:Cb little endian + * or + * index 1 = Cb:Cr plane, [15:0] Cb:Cr little endian + */ +#define DRM_FORMAT_NV12 fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV21 fourcc_code('N', 'V', '2', '1') /* 2x2 subsampled Cb:Cr plane */ +#define DRM_FORMAT_NV16 fourcc_code('N', 'V', '1', '6') /* 2x1 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV61 fourcc_code('N', 'V', '6', '1') /* 2x1 subsampled Cb:Cr plane */ +#define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */ +/* + * 2 plane YCbCr + * index 0 = Y plane, [39:0] Y3:Y2:Y1:Y0 little endian + * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian + */ +#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#define DRM_FORMAT_P210 fourcc_code('P', '2', '1', '0') /* 2x1 subsampled Cr:Cb plane, 10 bit per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [12:4] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [12:4:12:4] little endian + */ +#define DRM_FORMAT_P012 fourcc_code('P', '0', '1', '2') /* 2x2 subsampled Cr:Cb plane 12 bits per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr:Cb plane, [31:0] Cr:Cb [16:16] little endian + */ +#define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ + +/* 3 plane non-subsampled (444) YCbCr + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian + * index 1: Cb plane, [15:0] Cb:x [10:6] little endian + * index 2: Cr plane, [15:0] Cr:x [10:6] little endian + */ +#define DRM_FORMAT_Q410 fourcc_code('Q', '4', '1', '0') + +/* 3 plane non-subsampled (444) YCrCb + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian + * index 1: Cr plane, [15:0] Cr:x [10:6] little endian + * index 2: Cb plane, [15:0] Cb:x [10:6] little endian + */ +#define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') + +/* + * 3 plane YCbCr + * index 0: Y plane, [7:0] Y + * index 1: Cb plane, [7:0] Cb + * index 2: Cr plane, [7:0] Cr + * or + * index 1: Cr plane, [7:0] Cr + * index 2: Cb plane, [7:0] Cb + */ +#define DRM_FORMAT_YUV410 fourcc_code('Y', 'U', 'V', '9') /* 4x4 subsampled Cb (1) and Cr (2) planes */ +#define DRM_FORMAT_YVU410 fourcc_code('Y', 'V', 'U', '9') /* 4x4 subsampled Cr (1) and Cb (2) planes */ +#define DRM_FORMAT_YUV411 fourcc_code('Y', 'U', '1', '1') /* 4x1 subsampled Cb (1) and Cr (2) planes */ +#define DRM_FORMAT_YVU411 fourcc_code('Y', 'V', '1', '1') /* 4x1 subsampled Cr (1) and Cb (2) planes */ +#define DRM_FORMAT_YUV420 fourcc_code('Y', 'U', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes */ +#define DRM_FORMAT_YVU420 fourcc_code('Y', 'V', '1', '2') /* 2x2 subsampled Cr (1) and Cb (2) planes */ +#define DRM_FORMAT_YUV422 fourcc_code('Y', 'U', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes */ +#define DRM_FORMAT_YVU422 fourcc_code('Y', 'V', '1', '6') /* 2x1 subsampled Cr (1) and Cb (2) planes */ +#define DRM_FORMAT_YUV444 fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */ +#define DRM_FORMAT_YVU444 fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */ + + +/* + * Format Modifiers: + * + * Format modifiers describe, typically, a re-ordering or modification + * of the data in a plane of an FB. This can be used to express tiled/ + * swizzled formats, or compression, or a combination of the two. + * + * The upper 8 bits of the format modifier are a vendor-id as assigned + * below. The lower 56 bits are assigned as vendor sees fit. + */ + +/* Vendor Ids: */ +#define DRM_FORMAT_MOD_VENDOR_NONE 0 +#define DRM_FORMAT_MOD_VENDOR_INTEL 0x01 +#define DRM_FORMAT_MOD_VENDOR_AMD 0x02 +#define DRM_FORMAT_MOD_VENDOR_NVIDIA 0x03 +#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04 +#define DRM_FORMAT_MOD_VENDOR_QCOM 0x05 +#define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06 +#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07 +#define DRM_FORMAT_MOD_VENDOR_ARM 0x08 +#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 +#define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a + +/* add more to the end as needed */ + +#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + +#define fourcc_mod_code(vendor, val) \ + ((((uint64_t)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) + +/* + * Format Modifier tokens: + * + * When adding a new token please document the layout with a code comment, + * similar to the fourcc codes above. drm_fourcc.h is considered the + * authoritative source for all of these. + * + * Generic modifier names: + * + * DRM_FORMAT_MOD_GENERIC_* definitions are used to provide vendor-neutral names + * for layouts which are common across multiple vendors. To preserve + * compatibility, in cases where a vendor-specific definition already exists and + * a generic name for it is desired, the common name is a purely symbolic alias + * and must use the same numerical value as the original definition. + * + * Note that generic names should only be used for modifiers which describe + * generic layouts (such as pixel re-ordering), which may have + * independently-developed support across multiple vendors. + * + * In future cases where a generic layout is identified before merging with a + * vendor-specific modifier, a new 'GENERIC' vendor or modifier using vendor + * 'NONE' could be considered. This should only be for obvious, exceptional + * cases to avoid polluting the 'GENERIC' namespace with modifiers which only + * apply to a single vendor. + * + * Generic names should not be used for cases where multiple hardware vendors + * have implementations of the same standardised compression scheme (such as + * AFBC). In those cases, all implementations should use the same format + * modifier(s), reflecting the vendor of the standard. + */ + +#define DRM_FORMAT_MOD_GENERIC_16_16_TILE DRM_FORMAT_MOD_SAMSUNG_16_16_TILE + +/* + * Invalid Modifier + * + * This modifier can be used as a sentinel to terminate the format modifiers + * list, or to initialize a variable with an invalid modifier. It might also be + * used to report an error back to userspace for certain APIs. + */ +#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED) + +/* + * Linear Layout + * + * Just plain linear layout. Note that this is different from no specifying any + * modifier (e.g. not setting DRM_MODE_FB_MODIFIERS in the DRM_ADDFB2 ioctl), + * which tells the driver to also take driver-internal information into account + * and so might actually result in a tiled framebuffer. + */ +#define DRM_FORMAT_MOD_LINEAR fourcc_mod_code(NONE, 0) + +/* + * Deprecated: use DRM_FORMAT_MOD_LINEAR instead + * + * The "none" format modifier doesn't actually mean that the modifier is + * implicit, instead it means that the layout is linear. Whether modifiers are + * used is out-of-band information carried in an API-specific way (e.g. in a + * flag for drm_mode_fb_cmd2). + */ +#define DRM_FORMAT_MOD_NONE 0 + +/* Intel framebuffer modifiers */ + +/* + * Intel X-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out row-major, with + * a platform-dependent stride. On top of that the memory can apply + * platform-depending swizzling of some higher address bits into bit6. + * + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. + */ +#define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) + +/* + * Intel Y-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out in OWORD (16 bytes) + * chunks column-major, with a platform-dependent height. On top of that the + * memory can apply platform-depending swizzling of some higher address bits + * into bit6. + * + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. + */ +#define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) + +/* + * Intel Yf-tiling layout + * + * This is a tiled layout using 4Kb tiles in row-major layout. + * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which + * are arranged in four groups (two wide, two high) with column-major layout. + * Each group therefore consits out of four 256 byte units, which are also laid + * out as 2x2 column-major. + * 256 byte units are made out of four 64 byte blocks of pixels, producing + * either a square block or a 2:1 unit. + * 64 byte blocks of pixels contain four pixel rows of 16 bytes, where the width + * in pixel depends on the pixel depth. + */ +#define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) + +/* + * Intel color control surface (CCS) for render compression + * + * The framebuffer format must be one of the 8:8:8:8 RGB formats. + * The main surface will be plane index 0 and must be Y/Yf-tiled, + * the CCS will be plane index 1. + * + * Each CCS tile matches a 1024x512 pixel area of the main surface. + * To match certain aspects of the 3D hardware the CCS is + * considered to be made up of normal 128Bx32 Y tiles, Thus + * the CCS pitch must be specified in multiples of 128 bytes. + * + * In reality the CCS tile appears to be a 64Bx64 Y tile, composed + * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks. + * But that fact is not relevant unless the memory is accessed + * directly. + */ +#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4) +#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5) + +/* + * Intel color control surfaces (CCS) for Gen-12 render compression. + * + * The main surface is Y-tiled and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * Y-tile widths. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS fourcc_mod_code(INTEL, 6) + +/* + * Intel color control surfaces (CCS) for Gen-12 media compression + * + * The main surface is Y-tiled and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * Y-tile widths. For semi-planar formats like NV12, CCS planes follow the + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces, + * planes 2 and 3 for the respective CCS. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) + +/* + * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks + * + * Macroblocks are laid in a Z-shape, and each pixel data is following the + * standard NV12 style. + * As for NV12, an image is the result of two frame buffers: one for Y, + * one for the interleaved Cb/Cr components (1/2 the height of the Y buffer). + * Alignment requirements are (for each buffer): + * - multiple of 128 pixels for the width + * - multiple of 32 pixels for the height + * + * For more information: see https://linuxtv.org/downloads/v4l-dvb-apis/re32.html + */ +#define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE fourcc_mod_code(SAMSUNG, 1) + +/* + * Tiled, 16 (pixels) x 16 (lines) - sized macroblocks + * + * This is a simple tiled layout using tiles of 16x16 pixels in a row-major + * layout. For YCbCr formats Cb/Cr components are taken in such a way that + * they correspond to their 16x16 luma block. + */ +#define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE fourcc_mod_code(SAMSUNG, 2) + +/* + * Qualcomm Compressed Format + * + * Refers to a compressed variant of the base format that is compressed. + * Implementation may be platform and base-format specific. + * + * Each macrotile consists of m x n (mostly 4 x 4) tiles. + * Pixel data pitch/stride is aligned with macrotile width. + * Pixel data height is aligned with macrotile height. + * Entire pixel data buffer is aligned with 4k(bytes). + */ +#define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1) + +/* Vivante framebuffer modifiers */ + +/* + * Vivante 4x4 tiling layout + * + * This is a simple tiled layout using tiles of 4x4 pixels in a row-major + * layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_TILED fourcc_mod_code(VIVANTE, 1) + +/* + * Vivante 64x64 super-tiling layout + * + * This is a tiled layout using 64x64 pixel super-tiles, where each super-tile + * contains 8x4 groups of 2x4 tiles of 4x4 pixels (like above) each, all in row- + * major layout. + * + * For more information: see + * https://github.com/etnaviv/etna_viv/blob/master/doc/hardware.md#texture-tiling + */ +#define DRM_FORMAT_MOD_VIVANTE_SUPER_TILED fourcc_mod_code(VIVANTE, 2) + +/* + * Vivante 4x4 tiling layout for dual-pipe + * + * Same as the 4x4 tiling layout, except every second 4x4 pixel tile starts at a + * different base address. Offsets from the base addresses are therefore halved + * compared to the non-split tiled layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_SPLIT_TILED fourcc_mod_code(VIVANTE, 3) + +/* + * Vivante 64x64 super-tiling layout for dual-pipe + * + * Same as the 64x64 super-tiling layout, except every second 4x4 pixel tile + * starts at a different base address. Offsets from the base addresses are + * therefore halved compared to the non-split super-tiled layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) + +/* NVIDIA frame buffer modifiers */ + +/* + * Tegra Tiled Layout, used by Tegra 2, 3 and 4. + * + * Pixels are arranged in simple tiles of 16 x 16 bytes. + */ +#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) + +/* + * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80, + * and Tegra GPUs starting with Tegra K1. + * + * Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies + * based on the architecture generation. GOBs themselves are then arranged in + * 3D blocks, with the block dimensions (in terms of GOBs) always being a power + * of two, and hence expressible as their log2 equivalent (E.g., "2" represents + * a block depth or height of "4"). + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + * + * Macro + * Bits Param Description + * ---- ----- ----------------------------------------------------------------- + * + * 3:0 h log2(height) of each block, in GOBs. Placed here for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 4:4 - Must be 1, to indicate block-linear layout. Necessary for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block + * size). Must be zero. + * + * Note there is no log2(width) parameter. Some portions of the + * hardware support a block width of two gobs, but it is impractical + * to use due to lack of support elsewhere, and has no known + * benefits. + * + * 11:9 - Reserved (To support 2D-array textures with variable array stride + * in blocks, specified via log2(tile width in blocks)). Must be + * zero. + * + * 19:12 k Page Kind. This value directly maps to a field in the page + * tables of all GPUs >= NV50. It affects the exact layout of bits + * in memory and can be derived from the tuple + * + * (format, GPU model, compression type, samples per pixel) + * + * Where compression type is defined below. If GPU model were + * implied by the format modifier, format, or memory buffer, page + * kind would not need to be included in the modifier itself, but + * since the modifier should define the layout of the associated + * memory buffer independent from any device or other context, it + * must be included here. + * + * 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed + * starting with Fermi GPUs. Additionally, the mapping between page + * kind and bit layout has changed at various points. + * + * 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping + * 1 = Gob Height 4, G80 - GT2XX Page Kind mapping + * 2 = Gob Height 8, Turing+ Page Kind mapping + * 3 = Reserved for future use. + * + * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further + * bit remapping step that occurs at an even lower level than the + * page kind and block linear swizzles. This causes the layout of + * surfaces mapped in those SOC's GPUs to be incompatible with the + * equivalent mapping on other GPUs in the same system. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout. + * 1 = Desktop GPU and Tegra Xavier+ Layout + * + * 25:23 c Lossless Framebuffer Compression type. + * + * 0 = none + * 1 = ROP/3D, layout 1, exact compression format implied by Page + * Kind field + * 2 = ROP/3D, layout 2, exact compression format implied by Page + * Kind field + * 3 = CDE horizontal + * 4 = CDE vertical + * 5 = Reserved for future use + * 6 = Reserved for future use + * 7 = Reserved for future use + * + * 55:25 - Reserved for future use. Must be zero. + */ +#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ + fourcc_mod_code(NVIDIA, (0x10 | \ + ((h) & 0xf) | \ + (((k) & 0xff) << 12) | \ + (((g) & 0x3) << 20) | \ + (((s) & 0x1) << 22) | \ + (((c) & 0x7) << 23))) + +/* To grandfather in prior block linear format modifiers to the above layout, + * the page kind "0", which corresponds to "pitch/linear" and hence is unusable + * with block-linear layouts, is remapped within drivers to the value 0xfe, + * which corresponds to the "generic" kind used for simple single-sample + * uncompressed color formats on Fermi - Volta GPUs. + */ +static inline uint64_t +drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) +{ + if (!(modifier & 0x10) || (modifier & (0xff << 12))) + return modifier; + else + return modifier | (0xfe << 12); +} + +/* + * 16Bx2 Block Linear layout, used by Tegra K1 and later + * + * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked + * vertically by a power of 2 (1 to 32 GOBs) to form a block. + * + * Within a GOB, data is ordered as 16B x 2 lines sectors laid in Z-shape. + * + * Parameter 'v' is the log2 encoding of the number of GOBs stacked vertically. + * Valid values are: + * + * 0 == ONE_GOB + * 1 == TWO_GOBS + * 2 == FOUR_GOBS + * 3 == EIGHT_GOBS + * 4 == SIXTEEN_GOBS + * 5 == THIRTYTWO_GOBS + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + */ +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v)) + +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) + +/* + * Some Broadcom modifiers take parameters, for example the number of + * vertical lines in the image. Reserve the lower 32 bits for modifier + * type, and the next 24 bits for parameters. Top 8 bits are the + * vendor code. + */ +#define __fourcc_mod_broadcom_param_shift 8 +#define __fourcc_mod_broadcom_param_bits 48 +#define fourcc_mod_broadcom_code(val, params) \ + fourcc_mod_code(BROADCOM, ((((uint64_t)params) << __fourcc_mod_broadcom_param_shift) | val)) +#define fourcc_mod_broadcom_param(m) \ + ((int)(((m) >> __fourcc_mod_broadcom_param_shift) & \ + ((1ULL << __fourcc_mod_broadcom_param_bits) - 1))) +#define fourcc_mod_broadcom_mod(m) \ + ((m) & ~(((1ULL << __fourcc_mod_broadcom_param_bits) - 1) << \ + __fourcc_mod_broadcom_param_shift)) + +/* + * Broadcom VC4 "T" format + * + * This is the primary layout that the V3D GPU can texture from (it + * can't do linear). The T format has: + * + * - 64b utiles of pixels in a raster-order grid according to cpp. It's 4x4 + * pixels at 32 bit depth. + * + * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually + * 16x16 pixels). + * + * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels). On + * even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows + * they're (TR, BR, BL, TL), where bottom left is start of memory. + * + * - an image made of 4k tiles in rows either left-to-right (even rows of 4k + * tiles) or right-to-left (odd rows of 4k tiles). + */ +#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1) + +/* + * Broadcom SAND format + * + * This is the native format that the H.264 codec block uses. For VC4 + * HVS, it is only valid for H.264 (NV12/21) and RGBA modes. + * + * The image can be considered to be split into columns, and the + * columns are placed consecutively into memory. The width of those + * columns can be either 32, 64, 128, or 256 pixels, but in practice + * only 128 pixel columns are used. + * + * The pitch between the start of each column is set to optimally + * switch between SDRAM banks. This is passed as the number of lines + * of column width in the modifier (we can't use the stride value due + * to various core checks that look at it , so you should set the + * stride to width*cpp). + * + * Note that the column height for this format modifier is the same + * for all of the planes, assuming that each column contains both Y + * and UV. Some SAND-using hardware stores UV in a separate tiled + * image from Y to reduce the column height, which is not supported + * with these modifiers. + */ + +#define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(2, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(3, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(4, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(5, v) + +#define DRM_FORMAT_MOD_BROADCOM_SAND32 \ + DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND64 \ + DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND128 \ + DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND256 \ + DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(0) + +/* Broadcom UIF format + * + * This is the common format for the current Broadcom multimedia + * blocks, including V3D 3.x and newer, newer video codecs, and + * displays. + * + * The image consists of utiles (64b blocks), UIF blocks (2x2 utiles), + * and macroblocks (4x4 UIF blocks). Those 4x4 UIF block groups are + * stored in columns, with padding between the columns to ensure that + * moving from one column to the next doesn't hit the same SDRAM page + * bank. + * + * To calculate the padding, it is assumed that each hardware block + * and the software driving it knows the platform's SDRAM page size, + * number of banks, and XOR address, and that it's identical between + * all blocks using the format. This tiling modifier will use XOR as + * necessary to reduce the padding. If a hardware block can't do XOR, + * the assumption is that a no-XOR tiling modifier will be created. + */ +#define DRM_FORMAT_MOD_BROADCOM_UIF fourcc_mod_code(BROADCOM, 6) + +/* + * Arm Framebuffer Compression (AFBC) modifiers + * + * AFBC is a proprietary lossless image compression protocol and format. + * It provides fine-grained random access and minimizes the amount of data + * transferred between IP blocks. + * + * AFBC has several features which may be supported and/or used, which are + * represented using bits in the modifier. Not all combinations are valid, + * and different devices or use-cases may support different combinations. + * + * Further information on the use of AFBC modifiers can be found in + * Documentation/gpu/afbc.rst + */ + +/* + * The top 4 bits (out of the 56 bits alloted for specifying vendor specific + * modifiers) denote the category for modifiers. Currently we have only two + * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen + * different categories. + */ +#define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ + fourcc_mod_code(ARM, ((uint64_t)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) + +#define DRM_FORMAT_MOD_ARM_TYPE_AFBC 0x00 +#define DRM_FORMAT_MOD_ARM_TYPE_MISC 0x01 + +#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFBC, __afbc_mode) + +/* + * AFBC superblock size + * + * Indicates the superblock size(s) used for the AFBC buffer. The buffer + * size (in pixels) must be aligned to a multiple of the superblock size. + * Four lowest significant bits(LSBs) are reserved for block size. + * + * Where one superblock size is specified, it applies to all planes of the + * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified, + * the first applies to the Luma plane and the second applies to the Chroma + * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma). + * Multiple superblock sizes are only valid for multi-plane YCbCr formats. + */ +#define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK 0xf +#define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 (1ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 (2ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 (3ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL) + +/* + * AFBC lossless colorspace transform + * + * Indicates that the buffer makes use of the AFBC lossless colorspace + * transform. + */ +#define AFBC_FORMAT_MOD_YTR (1ULL << 4) + +/* + * AFBC block-split + * + * Indicates that the payload of each superblock is split. The second + * half of the payload is positioned at a predefined offset from the start + * of the superblock payload. + */ +#define AFBC_FORMAT_MOD_SPLIT (1ULL << 5) + +/* + * AFBC sparse layout + * + * This flag indicates that the payload of each superblock must be stored at a + * predefined position relative to the other superblocks in the same AFBC + * buffer. This order is the same order used by the header buffer. In this mode + * each superblock is given the same amount of space as an uncompressed + * superblock of the particular format would require, rounding up to the next + * multiple of 128 bytes in size. + */ +#define AFBC_FORMAT_MOD_SPARSE (1ULL << 6) + +/* + * AFBC copy-block restrict + * + * Buffers with this flag must obey the copy-block restriction. The restriction + * is such that there are no copy-blocks referring across the border of 8x8 + * blocks. For the subsampled data the 8x8 limitation is also subsampled. + */ +#define AFBC_FORMAT_MOD_CBR (1ULL << 7) + +/* + * AFBC tiled layout + * + * The tiled layout groups superblocks in 8x8 or 4x4 tiles, where all + * superblocks inside a tile are stored together in memory. 8x8 tiles are used + * for pixel formats up to and including 32 bpp while 4x4 tiles are used for + * larger bpp formats. The order between the tiles is scan line. + * When the tiled layout is used, the buffer size (in pixels) must be aligned + * to the tile size. + */ +#define AFBC_FORMAT_MOD_TILED (1ULL << 8) + +/* + * AFBC solid color blocks + * + * Indicates that the buffer makes use of solid-color blocks, whereby bandwidth + * can be reduced if a whole superblock is a single color. + */ +#define AFBC_FORMAT_MOD_SC (1ULL << 9) + +/* + * AFBC double-buffer + * + * Indicates that the buffer is allocated in a layout safe for front-buffer + * rendering. + */ +#define AFBC_FORMAT_MOD_DB (1ULL << 10) + +/* + * AFBC buffer content hints + * + * Indicates that the buffer includes per-superblock content hints. + */ +#define AFBC_FORMAT_MOD_BCH (1ULL << 11) + +/* AFBC uncompressed storage mode + * + * Indicates that the buffer is using AFBC uncompressed storage mode. + * In this mode all superblock payloads in the buffer use the uncompressed + * storage mode, which is usually only used for data which cannot be compressed. + * The buffer layout is the same as for AFBC buffers without USM set, this only + * affects the storage mode of the individual superblocks. Note that even a + * buffer without USM set may use uncompressed storage mode for some or all + * superblocks, USM just guarantees it for all. + */ +#define AFBC_FORMAT_MOD_USM (1ULL << 12) + +/* + * Arm 16x16 Block U-Interleaved modifier + * + * This is used by Arm Mali Utgard and Midgard GPUs. It divides the image + * into 16x16 pixel blocks. Blocks are stored linearly in order, but pixels + * in the block are reordered. + */ +#define DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 1ULL) + +/* + * Allwinner tiled modifier + * + * This tiling mode is implemented by the VPU found on all Allwinner platforms, + * codenamed sunxi. It is associated with a YUV format that uses either 2 or 3 + * planes. + * + * With this tiling, the luminance samples are disposed in tiles representing + * 32x32 pixels and the chrominance samples in tiles representing 32x64 pixels. + * The pixel order in each tile is linear and the tiles are disposed linearly, + * both in row-major order. + */ +#define DRM_FORMAT_MOD_ALLWINNER_TILED fourcc_mod_code(ALLWINNER, 1) + +/* + * Amlogic Video Framebuffer Compression modifiers + * + * Amlogic uses a proprietary lossless image compression protocol and format + * for their hardware video codec accelerators, either video decoders or + * video input encoders. + * + * It considerably reduces memory bandwidth while writing and reading + * frames in memory. + * + * The underlying storage is considered to be 3 components, 8bit or 10-bit + * per component YCbCr 420, single plane : + * - DRM_FORMAT_YUV420_8BIT + * - DRM_FORMAT_YUV420_10BIT + * + * The first 8 bits of the mode defines the layout, then the following 8 bits + * defines the options changing the layout. + * + * Not all combinations are valid, and different SoCs may support different + * combinations of layout and options. + */ +#define __fourcc_mod_amlogic_layout_mask 0xf +#define __fourcc_mod_amlogic_options_shift 8 +#define __fourcc_mod_amlogic_options_mask 0xf + +#define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ + fourcc_mod_code(AMLOGIC, \ + ((__layout) & __fourcc_mod_amlogic_layout_mask) | \ + (((__options) & __fourcc_mod_amlogic_options_mask) \ + << __fourcc_mod_amlogic_options_shift)) + +/* Amlogic FBC Layouts */ + +/* + * Amlogic FBC Basic Layout + * + * The basic layout is composed of: + * - a body content organized in 64x32 superblocks with 4096 bytes per + * superblock in default mode. + * - a 32 bytes per 128x64 header block + * + * This layout is transferrable between Amlogic SoCs supporting this modifier. + */ +#define AMLOGIC_FBC_LAYOUT_BASIC (1ULL) + +/* + * Amlogic FBC Scatter Memory layout + * + * Indicates the header contains IOMMU references to the compressed + * frames content to optimize memory access and layout. + * + * In this mode, only the header memory address is needed, thus the + * content memory organization is tied to the current producer + * execution and cannot be saved/dumped neither transferrable between + * Amlogic SoCs supporting this modifier. + * + * Due to the nature of the layout, these buffers are not expected to + * be accessible by the user-space clients, but only accessible by the + * hardware producers and consumers. + * + * The user-space clients should expect a failure while trying to mmap + * the DMA-BUF handle returned by the producer. + */ +#define AMLOGIC_FBC_LAYOUT_SCATTER (2ULL) + +/* Amlogic FBC Layout Options Bit Mask */ + +/* + * Amlogic FBC Memory Saving mode + * + * Indicates the storage is packed when pixel size is multiple of word + * boudaries, i.e. 8bit should be stored in this mode to save allocation + * memory. + * + * This mode reduces body layout to 3072 bytes per 64x32 superblock with + * the basic layout and 3200 bytes per 64x32 superblock combined with + * the scatter layout. + */ +#define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0) + +/* + * AMD modifiers + * + * Memory layout: + * + * without DCC: + * - main surface + * + * with DCC & without DCC_RETILE: + * - main surface in plane 0 + * - DCC surface in plane 1 (RB-aligned, pipe-aligned if DCC_PIPE_ALIGN is set) + * + * with DCC & DCC_RETILE: + * - main surface in plane 0 + * - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned) + * - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned) + * + * For multi-plane formats the above surfaces get merged into one plane for + * each format plane, based on the required alignment only. + * + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * + * 7:0 TILE_VERSION Values are AMD_FMT_MOD_TILE_VER_* + * 12:8 TILE Values are AMD_FMT_MOD_TILE__* + * 13 DCC + * 14 DCC_RETILE + * 15 DCC_PIPE_ALIGN + * 16 DCC_INDEPENDENT_64B + * 17 DCC_INDEPENDENT_128B + * 19:18 DCC_MAX_COMPRESSED_BLOCK Values are AMD_FMT_MOD_DCC_BLOCK_* + * 20 DCC_CONSTANT_ENCODE + * 23:21 PIPE_XOR_BITS Only for some chips + * 26:24 BANK_XOR_BITS Only for some chips + * 29:27 PACKERS Only for some chips + * 32:30 RB Only for some chips + * 35:33 PIPE Only for some chips + * 55:36 - Reserved for future use, must be zero + */ +#define AMD_FMT_MOD fourcc_mod_code(AMD, 0) + +#define IS_AMD_FMT_MOD(val) (((val) >> 56) == DRM_FORMAT_MOD_VENDOR_AMD) + +/* Reserve 0 for GFX8 and older */ +#define AMD_FMT_MOD_TILE_VER_GFX9 1 +#define AMD_FMT_MOD_TILE_VER_GFX10 2 +#define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 + +/* + * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical + * version. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_S 9 + +/* + * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has + * GFX9 as canonical version. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 +#define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 +#define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 + +#define AMD_FMT_MOD_DCC_BLOCK_64B 0 +#define AMD_FMT_MOD_DCC_BLOCK_128B 1 +#define AMD_FMT_MOD_DCC_BLOCK_256B 2 + +#define AMD_FMT_MOD_TILE_VERSION_SHIFT 0 +#define AMD_FMT_MOD_TILE_VERSION_MASK 0xFF +#define AMD_FMT_MOD_TILE_SHIFT 8 +#define AMD_FMT_MOD_TILE_MASK 0x1F + +/* Whether DCC compression is enabled. */ +#define AMD_FMT_MOD_DCC_SHIFT 13 +#define AMD_FMT_MOD_DCC_MASK 0x1 + +/* + * Whether to include two DCC surfaces, one which is rb & pipe aligned, and + * one which is not-aligned. + */ +#define AMD_FMT_MOD_DCC_RETILE_SHIFT 14 +#define AMD_FMT_MOD_DCC_RETILE_MASK 0x1 + +/* Only set if DCC_RETILE = false */ +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_SHIFT 15 +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_MASK 0x1 + +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_SHIFT 16 +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 + +/* + * DCC supports embedding some clear colors directly in the DCC surface. + * However, on older GPUs the rendering HW ignores the embedded clear color + * and prefers the driver provided color. This necessitates doing a fastclear + * eliminate operation before a process transfers control. + * + * If this bit is set that means the fastclear eliminate is not needed for these + * embeddable colors. + */ +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 20 +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1 + +/* + * The below fields are for accounting for per GPU differences. These are only + * relevant for GFX9 and later and if the tile field is *_X/_T. + * + * PIPE_XOR_BITS = always needed + * BANK_XOR_BITS = only for TILE_VER_GFX9 + * PACKERS = only for TILE_VER_GFX10_RBPLUS + * RB = only for TILE_VER_GFX9 & DCC + * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN) + */ +#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 21 +#define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24 +#define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_PACKERS_SHIFT 27 +#define AMD_FMT_MOD_PACKERS_MASK 0x7 +#define AMD_FMT_MOD_RB_SHIFT 30 +#define AMD_FMT_MOD_RB_MASK 0x7 +#define AMD_FMT_MOD_PIPE_SHIFT 33 +#define AMD_FMT_MOD_PIPE_MASK 0x7 + +#define AMD_FMT_MOD_SET(field, value) \ + ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) +#define AMD_FMT_MOD_GET(field, value) \ + (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) +#define AMD_FMT_MOD_CLEAR(field) \ + (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + +#if defined(__cplusplus) +} +#endif + +#endif /* DRM_FOURCC_H */ diff --git a/include/standard-headers/linux/const.h b/include/standard-headers/linux/const.h new file mode 100644 index 000000000..5e4898725 --- /dev/null +++ b/include/standard-headers/linux/const.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* const.h: Macros for dealing with constants. */ + +#ifndef _LINUX_CONST_H +#define _LINUX_CONST_H + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. + */ + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#define _AT(T,X) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#endif + +#define _UL(x) (_AC(x, UL)) +#define _ULL(x) (_AC(x, ULL)) + +#define _BITUL(x) (_UL(1) << (x)) +#define _BITULL(x) (_ULL(1) << (x)) + +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + +#endif /* _LINUX_CONST_H */ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h new file mode 100644 index 000000000..8bfd01d23 --- /dev/null +++ b/include/standard-headers/linux/ethtool.h @@ -0,0 +1,2019 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ethtool.h: Defines for Linux ethtool. + * + * Copyright (C) 1998 David S. Miller (davem@redhat.com) + * Copyright 2001 Jeff Garzik + * Portions Copyright 2001 Sun Microsystems (thockin@sun.com) + * Portions Copyright 2002 Intel (eli.kupermann@intel.com, + * christopher.leech@intel.com, + * scott.feldman@intel.com) + * Portions Copyright (C) Sun Microsystems 2008 + */ + +#ifndef _LINUX_ETHTOOL_H +#define _LINUX_ETHTOOL_H + +#include "net/eth.h" + +#include "standard-headers/linux/const.h" +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/if_ether.h" + +#include /* for INT_MAX */ + +/* All structures exposed to userland should be defined such that they + * have the same layout for 32-bit and 64-bit userland. + */ + +/** + * struct ethtool_cmd - DEPRECATED, link control and status + * This structure is DEPRECATED, please use struct ethtool_link_settings. + * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET + * @supported: Bitmask of %SUPPORTED_* flags for the link modes, + * physical connectors and other link features for which the + * interface supports autonegotiation or auto-detection. + * Read-only. + * @advertising: Bitmask of %ADVERTISED_* flags for the link modes, + * physical connectors and other link features that are + * advertised through autonegotiation or enabled for + * auto-detection. + * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN + * @duplex: Duplex mode; one of %DUPLEX_* + * @port: Physical connector type; one of %PORT_* + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not + * applicable. For clause 45 PHYs this is the PRTAD. + * @transceiver: Historically used to distinguish different possible + * PHY types, but not in a consistent way. Deprecated. + * @autoneg: Enable/disable autonegotiation and auto-detection; + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO + * protocols supported by the interface; 0 if unknown. + * Read-only. + * @maxtxpkt: Historically used to report TX IRQ coalescing; now + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. + * @maxrxpkt: Historically used to report RX IRQ coalescing; now + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. + * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the + * value will be %ETH_TP_MDI_INVALID. Read-only. + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. + * When written successfully, the link should be renegotiated if + * necessary. + * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes + * and other link features that the link partner advertised + * through autonegotiation; 0 if unknown or not applicable. + * Read-only. + * + * The link speed in Mbps is split between @speed and @speed_hi. Use + * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to + * access it. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple + * link modes. If it is enabled then they are read-only; if the link + * is up they represent the negotiated link mode; if the link is down, + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + * + * Some hardware interfaces may have multiple PHYs and/or physical + * connectors fitted or do not allow the driver to detect which are + * fitted. For these interfaces @port and/or @phy_address may be + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. + * Otherwise, attempts to write different values may be ignored or + * rejected. + * + * Users should assume that all fields not marked read-only are + * writable and subject to validation by the driver. They should use + * %ETHTOOL_GSET to get the current values before making specific + * changes and then applying them with %ETHTOOL_SSET. + * + * Deprecated fields should be ignored by both users and drivers. + */ +struct ethtool_cmd { + uint32_t cmd; + uint32_t supported; + uint32_t advertising; + uint16_t speed; + uint8_t duplex; + uint8_t port; + uint8_t phy_address; + uint8_t transceiver; + uint8_t autoneg; + uint8_t mdio_support; + uint32_t maxtxpkt; + uint32_t maxrxpkt; + uint16_t speed_hi; + uint8_t eth_tp_mdix; + uint8_t eth_tp_mdix_ctrl; + uint32_t lp_advertising; + uint32_t reserved[2]; +}; + +static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, + uint32_t speed) +{ + ep->speed = (uint16_t)(speed & 0xFFFF); + ep->speed_hi = (uint16_t)(speed >> 16); +} + +static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) +{ + return (ep->speed_hi << 16) | ep->speed; +} + +/* Device supports clause 22 register access to PHY or peripherals + * using the interface defined in "standard-headers/linux/mii.h". This should not be + * set if there are known to be no such peripherals present or if + * the driver only emulates clause 22 registers for compatibility. + */ +#define ETH_MDIO_SUPPORTS_C22 1 + +/* Device supports clause 45 register access to PHY or peripherals + * using the interface defined in "standard-headers/linux/mii.h" and . + * This should not be set if there are known to be no such peripherals + * present. + */ +#define ETH_MDIO_SUPPORTS_C45 2 + +#define ETHTOOL_FWVERS_LEN 32 +#define ETHTOOL_BUSINFO_LEN 32 +#define ETHTOOL_EROMVERS_LEN 32 + +/** + * struct ethtool_drvinfo - general driver and device information + * @cmd: Command number = %ETHTOOL_GDRVINFO + * @driver: Driver short name. This should normally match the name + * in its bus driver structure (e.g. pci_driver::name). Must + * not be an empty string. + * @version: Driver version string; may be an empty string + * @fw_version: Firmware version string; may be an empty string + * @erom_version: Expansion ROM version string; may be an empty string + * @bus_info: Device bus address. This should match the dev_name() + * string for the underlying bus device, if there is one. May be + * an empty string. + * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and + * %ETHTOOL_SPFLAGS commands; also the number of strings in the + * %ETH_SS_PRIV_FLAGS set + * @n_stats: Number of uint64_t statistics returned by the %ETHTOOL_GSTATS + * command; also the number of strings in the %ETH_SS_STATS set + * @testinfo_len: Number of results returned by the %ETHTOOL_TEST + * command; also the number of strings in the %ETH_SS_TEST set + * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM + * and %ETHTOOL_SEEPROM commands, in bytes + * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS + * command, in bytes + * + * Users can use the %ETHTOOL_GSSET_INFO command to get the number of + * strings in any string set (from Linux 2.6.34). + * + * Drivers should set at most @driver, @version, @fw_version and + * @bus_info in their get_drvinfo() implementation. The ethtool + * core fills in the other fields using other driver operations. + */ +struct ethtool_drvinfo { + uint32_t cmd; + char driver[32]; + char version[32]; + char fw_version[ETHTOOL_FWVERS_LEN]; + char bus_info[ETHTOOL_BUSINFO_LEN]; + char erom_version[ETHTOOL_EROMVERS_LEN]; + char reserved2[12]; + uint32_t n_priv_flags; + uint32_t n_stats; + uint32_t testinfo_len; + uint32_t eedump_len; + uint32_t regdump_len; +}; + +#define SOPASS_MAX 6 + +/** + * struct ethtool_wolinfo - Wake-On-Lan configuration + * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL + * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes. + * Read-only. + * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes. + * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE + * is set in @wolopts. + */ +struct ethtool_wolinfo { + uint32_t cmd; + uint32_t supported; + uint32_t wolopts; + uint8_t sopass[SOPASS_MAX]; +}; + +/* for passing single values */ +struct ethtool_value { + uint32_t cmd; + uint32_t data; +}; + +#define PFC_STORM_PREVENTION_AUTO 0xffff +#define PFC_STORM_PREVENTION_DISABLE 0 + +enum tunable_id { + ETHTOOL_ID_UNSPEC, + ETHTOOL_RX_COPYBREAK, + ETHTOOL_TX_COPYBREAK, + ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + /* + * Add your fresh new tunable attribute above and remember to update + * tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_TUNABLE_COUNT, +}; + +enum tunable_type_id { + ETHTOOL_TUNABLE_UNSPEC, + ETHTOOL_TUNABLE_U8, + ETHTOOL_TUNABLE_U16, + ETHTOOL_TUNABLE_U32, + ETHTOOL_TUNABLE_U64, + ETHTOOL_TUNABLE_STRING, + ETHTOOL_TUNABLE_S8, + ETHTOOL_TUNABLE_S16, + ETHTOOL_TUNABLE_S32, + ETHTOOL_TUNABLE_S64, +}; + +struct ethtool_tunable { + uint32_t cmd; + uint32_t id; + uint32_t type_id; + uint32_t len; + void *data[0]; +}; + +#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff +#define DOWNSHIFT_DEV_DISABLE 0 + +/* Time in msecs after which link is reported as down + * 0 = lowest time supported by the PHY + * 0xff = off, link down detection according to standard + */ +#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0 +#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff + +/* Energy Detect Power Down (EDPD) is a feature supported by some PHYs, where + * the PHY's RX & TX blocks are put into a low-power mode when there is no + * link detected (typically cable is un-plugged). For RX, only a minimal + * link-detection is available, and for TX the PHY wakes up to send link pulses + * to avoid any lock-ups in case the peer PHY may also be running in EDPD mode. + * + * Some PHYs may support configuration of the wake-up interval for TX pulses, + * and some PHYs may support only disabling TX pulses entirely. For the latter + * a special value is required (ETHTOOL_PHY_EDPD_NO_TX) so that this can be + * configured from userspace (should the user want it). + * + * The interval units for TX wake-up are in milliseconds, since this should + * cover a reasonable range of intervals: + * - from 1 millisecond, which does not sound like much of a power-saver + * - to ~65 seconds which is quite a lot to wait for a link to come up when + * plugging a cable + */ +#define ETHTOOL_PHY_EDPD_DFLT_TX_MSECS 0xffff +#define ETHTOOL_PHY_EDPD_NO_TX 0xfffe +#define ETHTOOL_PHY_EDPD_DISABLE 0 + +enum phy_tunable_id { + ETHTOOL_PHY_ID_UNSPEC, + ETHTOOL_PHY_DOWNSHIFT, + ETHTOOL_PHY_FAST_LINK_DOWN, + ETHTOOL_PHY_EDPD, + /* + * Add your fresh new phy tunable attribute above and remember to update + * phy_tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_PHY_TUNABLE_COUNT, +}; + +/** + * struct ethtool_regs - hardware register dump + * @cmd: Command number = %ETHTOOL_GREGS + * @version: Dump format version. This is driver-specific and may + * distinguish different chips/revisions. Drivers must use new + * version numbers whenever the dump format changes in an + * incompatible way. + * @len: On entry, the real length of @data. On return, the number of + * bytes used. + * @data: Buffer for the register dump + * + * Users should use %ETHTOOL_GDRVINFO to find the maximum length of + * a register dump for the interface. They must allocate the buffer + * immediately following this structure. + */ +struct ethtool_regs { + uint32_t cmd; + uint32_t version; + uint32_t len; + uint8_t data[0]; +}; + +/** + * struct ethtool_eeprom - EEPROM dump + * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or + * %ETHTOOL_SEEPROM + * @magic: A 'magic cookie' value to guard against accidental changes. + * The value passed in to %ETHTOOL_SEEPROM must match the value + * returned by %ETHTOOL_GEEPROM for the same device. This is + * unused when @cmd is %ETHTOOL_GMODULEEEPROM. + * @offset: Offset within the EEPROM to begin reading/writing, in bytes + * @len: On entry, number of bytes to read/write. On successful + * return, number of bytes actually read/written. In case of + * error, this may indicate at what point the error occurred. + * @data: Buffer to read/write from + * + * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find + * the length of an on-board or module EEPROM, respectively. They + * must allocate the buffer immediately following this structure. + */ +struct ethtool_eeprom { + uint32_t cmd; + uint32_t magic; + uint32_t offset; + uint32_t len; + uint8_t data[0]; +}; + +/** + * struct ethtool_eee - Energy Efficient Ethernet information + * @cmd: ETHTOOL_{G,S}EEE + * @supported: Mask of %SUPPORTED_* flags for the speed/duplex combinations + * for which there is EEE support. + * @advertised: Mask of %ADVERTISED_* flags for the speed/duplex combinations + * advertised as eee capable. + * @lp_advertised: Mask of %ADVERTISED_* flags for the speed/duplex + * combinations advertised by the link partner as eee capable. + * @eee_active: Result of the eee auto negotiation. + * @eee_enabled: EEE configured mode (enabled/disabled). + * @tx_lpi_enabled: Whether the interface should assert its tx lpi, given + * that eee was negotiated. + * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting + * its tx lpi (after reaching 'idle' state). Effective only when eee + * was negotiated and tx_lpi_enabled was set. + */ +struct ethtool_eee { + uint32_t cmd; + uint32_t supported; + uint32_t advertised; + uint32_t lp_advertised; + uint32_t eee_active; + uint32_t eee_enabled; + uint32_t tx_lpi_enabled; + uint32_t tx_lpi_timer; + uint32_t reserved[2]; +}; + +/** + * struct ethtool_modinfo - plugin module eeprom information + * @cmd: %ETHTOOL_GMODULEINFO + * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx + * @eeprom_len: Length of the eeprom + * + * This structure is used to return the information to + * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. + * The type code indicates the eeprom data format + */ +struct ethtool_modinfo { + uint32_t cmd; + uint32_t type; + uint32_t eeprom_len; + uint32_t reserved[8]; +}; + +/** + * struct ethtool_coalesce - coalescing parameters for IRQs and stats updates + * @cmd: ETHTOOL_{G,S}COALESCE + * @rx_coalesce_usecs: How many usecs to delay an RX interrupt after + * a packet arrives. + * @rx_max_coalesced_frames: Maximum number of packets to receive + * before an RX interrupt. + * @rx_coalesce_usecs_irq: Same as @rx_coalesce_usecs, except that + * this value applies while an IRQ is being serviced by the host. + * @rx_max_coalesced_frames_irq: Same as @rx_max_coalesced_frames, + * except that this value applies while an IRQ is being serviced + * by the host. + * @tx_coalesce_usecs: How many usecs to delay a TX interrupt after + * a packet is sent. + * @tx_max_coalesced_frames: Maximum number of packets to be sent + * before a TX interrupt. + * @tx_coalesce_usecs_irq: Same as @tx_coalesce_usecs, except that + * this value applies while an IRQ is being serviced by the host. + * @tx_max_coalesced_frames_irq: Same as @tx_max_coalesced_frames, + * except that this value applies while an IRQ is being serviced + * by the host. + * @stats_block_coalesce_usecs: How many usecs to delay in-memory + * statistics block updates. Some drivers do not have an + * in-memory statistic block, and in such cases this value is + * ignored. This value must not be zero. + * @use_adaptive_rx_coalesce: Enable adaptive RX coalescing. + * @use_adaptive_tx_coalesce: Enable adaptive TX coalescing. + * @pkt_rate_low: Threshold for low packet rate (packets per second). + * @rx_coalesce_usecs_low: How many usecs to delay an RX interrupt after + * a packet arrives, when the packet rate is below @pkt_rate_low. + * @rx_max_coalesced_frames_low: Maximum number of packets to be received + * before an RX interrupt, when the packet rate is below @pkt_rate_low. + * @tx_coalesce_usecs_low: How many usecs to delay a TX interrupt after + * a packet is sent, when the packet rate is below @pkt_rate_low. + * @tx_max_coalesced_frames_low: Maximum nuumber of packets to be sent before + * a TX interrupt, when the packet rate is below @pkt_rate_low. + * @pkt_rate_high: Threshold for high packet rate (packets per second). + * @rx_coalesce_usecs_high: How many usecs to delay an RX interrupt after + * a packet arrives, when the packet rate is above @pkt_rate_high. + * @rx_max_coalesced_frames_high: Maximum number of packets to be received + * before an RX interrupt, when the packet rate is above @pkt_rate_high. + * @tx_coalesce_usecs_high: How many usecs to delay a TX interrupt after + * a packet is sent, when the packet rate is above @pkt_rate_high. + * @tx_max_coalesced_frames_high: Maximum number of packets to be sent before + * a TX interrupt, when the packet rate is above @pkt_rate_high. + * @rate_sample_interval: How often to do adaptive coalescing packet rate + * sampling, measured in seconds. Must not be zero. + * + * Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this + * would cause interrupts to never be generated. To disable + * coalescing, set usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + * + * This is deprecated. Drivers for hardware that does not support + * counting completions should validate that max_frames == !rx_usecs. + * + * Adaptive RX/TX coalescing is an algorithm implemented by some + * drivers to improve latency under low packet rates and improve + * throughput under high packet rates. Some drivers only implement + * one of RX or TX adaptive coalescing. Anything not implemented by + * the driver causes these values to be silently ignored. + * + * When the packet rate is below @pkt_rate_high but above + * @pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ +struct ethtool_coalesce { + uint32_t cmd; + uint32_t rx_coalesce_usecs; + uint32_t rx_max_coalesced_frames; + uint32_t rx_coalesce_usecs_irq; + uint32_t rx_max_coalesced_frames_irq; + uint32_t tx_coalesce_usecs; + uint32_t tx_max_coalesced_frames; + uint32_t tx_coalesce_usecs_irq; + uint32_t tx_max_coalesced_frames_irq; + uint32_t stats_block_coalesce_usecs; + uint32_t use_adaptive_rx_coalesce; + uint32_t use_adaptive_tx_coalesce; + uint32_t pkt_rate_low; + uint32_t rx_coalesce_usecs_low; + uint32_t rx_max_coalesced_frames_low; + uint32_t tx_coalesce_usecs_low; + uint32_t tx_max_coalesced_frames_low; + uint32_t pkt_rate_high; + uint32_t rx_coalesce_usecs_high; + uint32_t rx_max_coalesced_frames_high; + uint32_t tx_coalesce_usecs_high; + uint32_t tx_max_coalesced_frames_high; + uint32_t rate_sample_interval; +}; + +/** + * struct ethtool_ringparam - RX/TX ring parameters + * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM + * @rx_max_pending: Maximum supported number of pending entries per + * RX ring. Read-only. + * @rx_mini_max_pending: Maximum supported number of pending entries + * per RX mini ring. Read-only. + * @rx_jumbo_max_pending: Maximum supported number of pending entries + * per RX jumbo ring. Read-only. + * @tx_max_pending: Maximum supported number of pending entries per + * TX ring. Read-only. + * @rx_pending: Current maximum number of pending entries per RX ring + * @rx_mini_pending: Current maximum number of pending entries per RX + * mini ring + * @rx_jumbo_pending: Current maximum number of pending entries per RX + * jumbo ring + * @tx_pending: Current maximum supported number of pending entries + * per TX ring + * + * If the interface does not have separate RX mini and/or jumbo rings, + * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0. + * + * There may also be driver-dependent minimum values for the number + * of entries per ring. + */ +struct ethtool_ringparam { + uint32_t cmd; + uint32_t rx_max_pending; + uint32_t rx_mini_max_pending; + uint32_t rx_jumbo_max_pending; + uint32_t tx_max_pending; + uint32_t rx_pending; + uint32_t rx_mini_pending; + uint32_t rx_jumbo_pending; + uint32_t tx_pending; +}; + +/** + * struct ethtool_channels - configuring number of network channel + * @cmd: ETHTOOL_{G,S}CHANNELS + * @max_rx: Read only. Maximum number of receive channel the driver support. + * @max_tx: Read only. Maximum number of transmit channel the driver support. + * @max_other: Read only. Maximum number of other channel the driver support. + * @max_combined: Read only. Maximum number of combined channel the driver + * support. Set of queues RX, TX or other. + * @rx_count: Valid values are in the range 1 to the max_rx. + * @tx_count: Valid values are in the range 1 to the max_tx. + * @other_count: Valid values are in the range 1 to the max_other. + * @combined_count: Valid values are in the range 1 to the max_combined. + * + * This can be used to configure RX, TX and other channels. + */ + +struct ethtool_channels { + uint32_t cmd; + uint32_t max_rx; + uint32_t max_tx; + uint32_t max_other; + uint32_t max_combined; + uint32_t rx_count; + uint32_t tx_count; + uint32_t other_count; + uint32_t combined_count; +}; + +/** + * struct ethtool_pauseparam - Ethernet pause (flow control) parameters + * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM + * @autoneg: Flag to enable autonegotiation of pause frame use + * @rx_pause: Flag to enable reception of pause frames + * @tx_pause: Flag to enable transmission of pause frames + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + * If the link is autonegotiated, drivers should use + * mii_advertise_flowctrl() or similar code to set the advertised + * pause frame capabilities based on the @rx_pause and @tx_pause flags, + * even if @autoneg is zero. They should also allow the advertised + * pause frame capabilities to be controlled directly through the + * advertising field of &struct ethtool_cmd. + * + * If @autoneg is non-zero, the MAC is configured to send and/or + * receive pause frames according to the result of autonegotiation. + * Otherwise, it is configured directly based on the @rx_pause and + * @tx_pause flags. + */ +struct ethtool_pauseparam { + uint32_t cmd; + uint32_t autoneg; + uint32_t rx_pause; + uint32_t tx_pause; +}; + +/** + * enum ethtool_link_ext_state - link extended state + */ +enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_STATE_NO_CABLE, + ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, +}; + +/** + * enum ethtool_link_ext_substate_autoneg - more information in addition to + * ETHTOOL_LINK_EXT_STATE_AUTONEG. + */ +enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, +}; + +/** + * enum ethtool_link_ext_substate_link_training - more information in addition to + * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + */ +enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, +}; + +/** + * enum ethtool_link_ext_substate_logical_mismatch - more information in addition + * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + */ +enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, +}; + +/** + * enum ethtool_link_ext_substate_bad_signal_integrity - more information in + * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + */ +enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, +}; + +/** + * enum ethtool_link_ext_substate_cable_issue - more information in + * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. + */ +enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, +}; + +#define ETH_GSTRING_LEN 32 + +/** + * enum ethtool_stringset - string set ID + * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST + * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS + * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with + * %ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS + * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE; + * now deprecated + * @ETH_SS_FEATURES: Device feature names + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS + * @ETH_SS_PHY_TUNABLES: PHY tunable names + * @ETH_SS_LINK_MODES: link mode names + * @ETH_SS_MSG_CLASSES: debug message class names + * @ETH_SS_WOL_MODES: wake-on-lan modes + * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags + * @ETH_SS_TS_TX_TYPES: timestamping Tx types + * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types + */ +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, + ETH_SS_PRIV_FLAGS, + ETH_SS_NTUPLE_FILTERS, + ETH_SS_FEATURES, + ETH_SS_RSS_HASH_FUNCS, + ETH_SS_TUNABLES, + ETH_SS_PHY_STATS, + ETH_SS_PHY_TUNABLES, + ETH_SS_LINK_MODES, + ETH_SS_MSG_CLASSES, + ETH_SS_WOL_MODES, + ETH_SS_SOF_TIMESTAMPING, + ETH_SS_TS_TX_TYPES, + ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, + + /* add new constants above here */ + ETH_SS_COUNT +}; + +/** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS + * @string_set: String set ID; one of &enum ethtool_stringset + * @len: On return, the number of strings in the string set + * @data: Buffer for strings. Each string is null-padded to a size of + * %ETH_GSTRING_LEN. + * + * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in + * the string set. They must allocate a buffer of the appropriate + * size immediately following this structure. + */ +struct ethtool_gstrings { + uint32_t cmd; + uint32_t string_set; + uint32_t len; + uint8_t data[0]; +}; + +/** + * struct ethtool_sset_info - string set information + * @cmd: Command number = %ETHTOOL_GSSET_INFO + * @sset_mask: On entry, a bitmask of string sets to query, with bits + * numbered according to &enum ethtool_stringset. On return, a + * bitmask of those string sets queried that are supported. + * @data: Buffer for string set sizes. On return, this contains the + * size of each string set that was queried and supported, in + * order of ID. + * + * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on + * return @sset_mask == 0x6 (sets 1, 2). Then @data[0] contains the + * size of set 1 and @data[1] contains the size of set 2. + * + * Users must allocate a buffer of the appropriate size (4 * number of + * sets queried) immediately following this structure. + */ +struct ethtool_sset_info { + uint32_t cmd; + uint32_t reserved; + uint64_t sset_mask; + uint32_t data[0]; +}; + +/** + * enum ethtool_test_flags - flags definition of ethtool_test + * @ETH_TEST_FL_OFFLINE: if set perform online and offline tests, otherwise + * only online tests. + * @ETH_TEST_FL_FAILED: Driver set this flag if test fails. + * @ETH_TEST_FL_EXTERNAL_LB: Application request to perform external loopback + * test. + * @ETH_TEST_FL_EXTERNAL_LB_DONE: Driver performed the external loopback test + */ + +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), + ETH_TEST_FL_EXTERNAL_LB = (1 << 2), + ETH_TEST_FL_EXTERNAL_LB_DONE = (1 << 3), +}; + +/** + * struct ethtool_test - device self-test invocation + * @cmd: Command number = %ETHTOOL_TEST + * @flags: A bitmask of flags from &enum ethtool_test_flags. Some + * flags may be set by the user on entry; others may be set by + * the driver on return. + * @len: On return, the number of test results + * @data: Array of test results + * + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the + * number of test results that will be returned. They must allocate a + * buffer of the appropriate size (8 * number of results) immediately + * following this structure. + */ +struct ethtool_test { + uint32_t cmd; + uint32_t flags; + uint32_t reserved; + uint32_t len; + uint64_t data[0]; +}; + +/** + * struct ethtool_stats - device-specific statistics + * @cmd: Command number = %ETHTOOL_GSTATS + * @n_stats: On return, the number of statistics + * @data: Array of statistics + * + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the + * number of statistics that will be returned. They must allocate a + * buffer of the appropriate size (8 * number of statistics) + * immediately following this structure. + */ +struct ethtool_stats { + uint32_t cmd; + uint32_t n_stats; + uint64_t data[0]; +}; + +/** + * struct ethtool_perm_addr - permanent hardware address + * @cmd: Command number = %ETHTOOL_GPERMADDR + * @size: On entry, the size of the buffer. On return, the size of the + * address. The command fails if the buffer is too small. + * @data: Buffer for the address + * + * Users must allocate the buffer immediately following this structure. + * A buffer size of %MAX_ADDR_LEN should be sufficient for any address + * type. + */ +struct ethtool_perm_addr { + uint32_t cmd; + uint32_t size; + uint8_t data[0]; +}; + +/* boolean flags controlling per-interface behavior characteristics. + * When reading, the flag indicates whether or not a certain behavior + * is enabled/present. When writing, the flag indicates whether + * or not the driver should turn on (set) or off (clear) a behavior. + * + * Some behaviors may read-only (unconditionally absent or present). + * If such is the case, return EINVAL in the set-flags operation if the + * flag differs from the read-only value. + */ +enum ethtool_flags { + ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */ + ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */ + ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ + ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */ + ETH_FLAG_RXHASH = (1 << 28), +}; + +/* The following structures are for supporting RX network flow + * classification and RX n-tuple configuration. Note, all multibyte + * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to + * be in network byte order. + */ + +/** + * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc. + * @ip4src: Source host + * @ip4dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tos: Type-of-service + * + * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow. + */ +struct ethtool_tcpip4_spec { + uint32_t ip4src; + uint32_t ip4dst; + uint16_t psrc; + uint16_t pdst; + uint8_t tos; +}; + +/** + * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @spi: Security parameters index + * @tos: Type-of-service + * + * This can be used to specify an IPsec transport or tunnel over IPv4. + */ +struct ethtool_ah_espip4_spec { + uint32_t ip4src; + uint32_t ip4dst; + uint32_t spi; + uint8_t tos; +}; + +#define ETH_RX_NFC_IP4 1 + +/** + * struct ethtool_usrip4_spec - general flow specification for IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tos: Type-of-service + * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0 + * @proto: Transport protocol number; mask must be 0 + */ +struct ethtool_usrip4_spec { + uint32_t ip4src; + uint32_t ip4dst; + uint32_t l4_4_bytes; + uint8_t tos; + uint8_t ip_ver; + uint8_t proto; +}; + +/** + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc. + * @ip6src: Source host + * @ip6dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tclass: Traffic Class + * + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow. + */ +struct ethtool_tcpip6_spec { + uint32_t ip6src[4]; + uint32_t ip6dst[4]; + uint16_t psrc; + uint16_t pdst; + uint8_t tclass; +}; + +/** + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @spi: Security parameters index + * @tclass: Traffic Class + * + * This can be used to specify an IPsec transport or tunnel over IPv6. + */ +struct ethtool_ah_espip6_spec { + uint32_t ip6src[4]; + uint32_t ip6dst[4]; + uint32_t spi; + uint8_t tclass; +}; + +/** + * struct ethtool_usrip6_spec - general flow specification for IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tclass: Traffic Class + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers) + */ +struct ethtool_usrip6_spec { + uint32_t ip6src[4]; + uint32_t ip6dst[4]; + uint32_t l4_4_bytes; + uint8_t tclass; + uint8_t l4_proto; +}; + +union ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct ethtool_tcpip6_spec tcp_ip6_spec; + struct ethtool_tcpip6_spec udp_ip6_spec; + struct ethtool_tcpip6_spec sctp_ip6_spec; + struct ethtool_ah_espip6_spec ah_ip6_spec; + struct ethtool_ah_espip6_spec esp_ip6_spec; + struct ethtool_usrip6_spec usr_ip6_spec; + struct eth_header ether_spec; + uint8_t hdata[52]; +}; + +/** + * struct ethtool_flow_ext - additional RX flow fields + * @h_dest: destination MAC address + * @vlan_etype: VLAN EtherType + * @vlan_tci: VLAN tag control information + * @data: user defined data + * + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT + * is set in &struct ethtool_rx_flow_spec @flow_type. + * @h_dest is valid if %FLOW_MAC_EXT is set. + */ +struct ethtool_flow_ext { + uint8_t padding[2]; + unsigned char h_dest[ETH_ALEN]; + uint16_t vlan_etype; + uint16_t vlan_tci; + uint32_t data[2]; +}; + +/** + * struct ethtool_rx_flow_spec - classification rule for RX flows + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow fields to match (dependent on @flow_type) + * @h_ext: Additional fields to match + * @m_u: Masks for flow field bits to be matched + * @m_ext: Masks for additional field bits to be matched + * Note, all additional fields must be ignored unless @flow_type + * includes the %FLOW_EXT or %FLOW_MAC_EXT flag + * (see &struct ethtool_flow_ext description). + * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC + * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the + * packets should be used for Wake-on-LAN with %WAKE_FILTER + * @location: Location of rule in the table. Locations must be + * numbered such that a flow matching multiple rules will be + * classified according to the first (lowest numbered) rule. + */ +struct ethtool_rx_flow_spec { + uint32_t flow_type; + union ethtool_flow_union h_u; + struct ethtool_flow_ext h_ext; + union ethtool_flow_union m_u; + struct ethtool_flow_ext m_ext; + uint64_t ring_cookie; + uint32_t location; +}; + +/* How rings are laid out when accessing virtual functions or + * offloaded queues is device specific. To allow users to do flow + * steering and specify these queues the ring cookie is partitioned + * into a 32bit queue index with an 8 bit virtual function id. + * This also leaves the 3bytes for further specifiers. It is possible + * future devices may support more than 256 virtual functions if + * devices start supporting PCIe w/ARI. However at the moment I + * do not know of any devices that support this so I do not reserve + * space for this at this time. If a future patch consumes the next + * byte it should be aware of this possibility. + */ +#define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32 +static inline uint64_t ethtool_get_flow_spec_ring(uint64_t ring_cookie) +{ + return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie; +} + +static inline uint64_t ethtool_get_flow_spec_ring_vf(uint64_t ring_cookie) +{ + return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >> + ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; +} + +/** + * struct ethtool_rxnfc - command to get or set RX flow classification rules + * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, + * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE, + * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS + * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW + * @data: Command-dependent value + * @fs: Flow classification rule + * @rss_context: RSS context to be affected + * @rule_cnt: Number of rules to be affected + * @rule_locs: Array of used rule locations + * + * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating + * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following + * structure fields must not be used, except that if @flow_type includes + * the %FLOW_RSS flag, then @rss_context determines which RSS context to + * act on. + * + * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues + * on return. + * + * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined + * rules on return. If @data is non-zero on return then it is the + * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the + * driver supports any special location values. If that flag is not + * set in @data then special location values should not be used. + * + * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an + * existing rule on entry and @fs contains the rule on return; if + * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is + * filled with the RSS context ID associated with the rule. + * + * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the + * user buffer for @rule_locs on entry. On return, @data is the size + * of the rule table, @rule_cnt is the number of defined rules, and + * @rule_locs contains the locations of the defined rules. Drivers + * must use the second parameter to get_rxnfc() instead of @rule_locs. + * + * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update. + * @fs.@location either specifies the location to use or is a special + * location value with %RX_CLS_LOC_SPECIAL flag set. On return, + * @fs.@location is the actual rule location. If @fs.@flow_type + * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to + * use for flow spreading traffic which matches this rule. The value + * from the rxfh indirection table will be added to @fs.@ring_cookie + * to choose which ring to deliver to. + * + * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an + * existing rule on entry. + * + * A driver supporting the special location values for + * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused + * location, and may remove a rule at a later location (lower + * priority) that matches exactly the same set of flows. The special + * values are %RX_CLS_LOC_ANY, selecting any location; + * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum + * priority); and %RX_CLS_LOC_LAST, selecting the last suitable + * location (minimum priority). Additional special values may be + * defined in future and drivers must return -%EINVAL for any + * unrecognised value. + */ +struct ethtool_rxnfc { + uint32_t cmd; + uint32_t flow_type; + uint64_t data; + struct ethtool_rx_flow_spec fs; + union { + uint32_t rule_cnt; + uint32_t rss_context; + }; + uint32_t rule_locs[0]; +}; + + +/** + * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection + * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR + * @size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware + * indirection table. + * @ring_index: RX ring/queue index for each hash value + * + * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size + * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means + * the table should be reset to default values. This last feature + * is not supported by the original implementations. + */ +struct ethtool_rxfh_indir { + uint32_t cmd; + uint32_t size; + uint32_t ring_index[0]; +}; + +/** + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH + * @rss_context: RSS context identifier. Context 0 is the default for normal + * traffic; other contexts can be referenced as the destination for RX flow + * classification rules. %ETH_RXFH_CONTEXT_ALLOC is used with command + * %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will + * contain the ID of the newly allocated context. + * @indir_size: On entry, the array size of the user buffer for the + * indirection table, which may be zero, or (for %ETHTOOL_SRSSH), + * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH, + * the array size of the hardware indirection table. + * @key_size: On entry, the array size of the user buffer for the hash key, + * which may be zero. On return from %ETHTOOL_GRSSH, the size of the + * hardware hash key. + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. + * @rsvd: Reserved for future extensions. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of @indir_size uint32_t elements, followed by hash key of @key_size + * bytes. + * + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the + * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of + * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested + * and a @indir_size of zero means the indir table should be reset to default + * values (if @rss_context == 0) or that the RSS context should be deleted. + * An hfunc of zero means that hash function setting is not requested. + */ +struct ethtool_rxfh { + uint32_t cmd; + uint32_t rss_context; + uint32_t indir_size; + uint32_t key_size; + uint8_t hfunc; + uint8_t rsvd8[3]; + uint32_t rsvd32; + uint32_t rss_config[0]; +}; +#define ETH_RXFH_CONTEXT_ALLOC 0xffffffff +#define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff + +/** + * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow field values to match (dependent on @flow_type) + * @m_u: Masks for flow field value bits to be ignored + * @vlan_tag: VLAN tag to match + * @vlan_tag_mask: Mask for VLAN tag bits to be ignored + * @data: Driver-dependent data to match + * @data_mask: Mask for driver-dependent data bits to be ignored + * @action: RX ring/queue index to deliver to (non-negative) or other action + * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) + * + * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where + * a field value and mask are both zero this is treated as if all mask + * bits are set i.e. the field is ignored. + */ +struct ethtool_rx_ntuple_flow_spec { + uint32_t flow_type; + union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct eth_header ether_spec; + uint8_t hdata[72]; + } h_u, m_u; + + uint16_t vlan_tag; + uint16_t vlan_tag_mask; + uint64_t data; + uint64_t data_mask; + + int32_t action; +#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */ +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */ +}; + +/** + * struct ethtool_rx_ntuple - command to set or clear RX flow filter + * @cmd: Command number - %ETHTOOL_SRXNTUPLE + * @fs: Flow filter specification + */ +struct ethtool_rx_ntuple { + uint32_t cmd; + struct ethtool_rx_ntuple_flow_spec fs; +}; + +#define ETHTOOL_FLASH_MAX_FILENAME 128 +enum ethtool_flash_op_type { + ETHTOOL_FLASH_ALL_REGIONS = 0, +}; + +/* for passing firmware flashing related parameters */ +struct ethtool_flash { + uint32_t cmd; + uint32_t region; + char data[ETHTOOL_FLASH_MAX_FILENAME]; +}; + +/** + * struct ethtool_dump - used for retrieving, setting device dump + * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or + * %ETHTOOL_SET_DUMP + * @version: FW version of the dump, filled in by driver + * @flag: driver dependent flag for dump setting, filled in by driver during + * get and filled in by ethtool for set operation. + * flag must be initialized by macro ETH_FW_DUMP_DISABLE value when + * firmware dump is disabled. + * @len: length of dump data, used as the length of the user buffer on entry to + * %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver + * for %ETHTOOL_GET_DUMP_FLAG command + * @data: data collected for get dump data operation + */ +struct ethtool_dump { + uint32_t cmd; + uint32_t version; + uint32_t flag; + uint32_t len; + uint8_t data[0]; +}; + +#define ETH_FW_DUMP_DISABLE 0 + +/* for returning and changing feature sets */ + +/** + * struct ethtool_get_features_block - block with state of 32 features + * @available: mask of changeable features + * @requested: mask of features requested to be enabled if possible + * @active: mask of currently enabled features + * @never_changed: mask of features not changeable for any device + */ +struct ethtool_get_features_block { + uint32_t available; + uint32_t requested; + uint32_t active; + uint32_t never_changed; +}; + +/** + * struct ethtool_gfeatures - command to get state of device's features + * @cmd: command number = %ETHTOOL_GFEATURES + * @size: On entry, the number of elements in the features[] array; + * on return, the number of elements in features[] needed to hold + * all features + * @features: state of features + */ +struct ethtool_gfeatures { + uint32_t cmd; + uint32_t size; + struct ethtool_get_features_block features[0]; +}; + +/** + * struct ethtool_set_features_block - block with request for 32 features + * @valid: mask of features to be changed + * @requested: values of features to be changed + */ +struct ethtool_set_features_block { + uint32_t valid; + uint32_t requested; +}; + +/** + * struct ethtool_sfeatures - command to request change in device's features + * @cmd: command number = %ETHTOOL_SFEATURES + * @size: array size of the features[] array + * @features: feature change masks + */ +struct ethtool_sfeatures { + uint32_t cmd; + uint32_t size; + struct ethtool_set_features_block features[0]; +}; + +/** + * struct ethtool_ts_info - holds a device's timestamping and PHC association + * @cmd: command number = %ETHTOOL_GET_TS_INFO + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + * + * The bits in the 'tx_types' and 'rx_filters' fields correspond to + * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, + * respectively. For example, if the device supports HWTSTAMP_TX_ON, + * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set. + * + * Drivers should only report the filters they actually support without + * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for + * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the + * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op. + */ +struct ethtool_ts_info { + uint32_t cmd; + uint32_t so_timestamping; + int32_t phc_index; + uint32_t tx_types; + uint32_t tx_reserved[3]; + uint32_t rx_filters; + uint32_t rx_reserved[3]; +}; + +/* + * %ETHTOOL_SFEATURES changes features present in features[].valid to the + * values of corresponding bits in features[].requested. Bits in .requested + * not set in .valid or not changeable are ignored. + * + * Returns %EINVAL when .valid contains undefined or never-changeable bits + * or size is not equal to required number of features words (32-bit blocks). + * Returns >= 0 if request was completed; bits set in the value mean: + * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not + * changeable (not present in %ETHTOOL_GFEATURES' features[].available) + * those bits were ignored. + * %ETHTOOL_F_WISH - some or all changes requested were recorded but the + * resulting state of bits masked by .valid is not equal to .requested. + * Probably there are other device-specific constraints on some features + * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered + * here as though ignored bits were cleared. + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling + * compatibility functions. Requested offload state cannot be properly + * managed by kernel. + * + * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of + * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands + * for ETH_SS_FEATURES string set. First entry in the table corresponds to least + * significant bit in features[0] fields. Empty strings mark undefined features. + */ +enum ethtool_sfeatures_retval_bits { + ETHTOOL_F_UNSUPPORTED__BIT, + ETHTOOL_F_WISH__BIT, + ETHTOOL_F_COMPAT__BIT, +}; + +#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) +#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) +#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) + +#define MAX_NUM_QUEUE 4096 + +/** + * struct ethtool_per_queue_op - apply sub command to the queues in mask. + * @cmd: ETHTOOL_PERQUEUE + * @sub_command: the sub command which apply to each queues + * @queue_mask: Bitmap of the queues which sub command apply to + * @data: A complete command structure following for each of the queues addressed + */ +struct ethtool_per_queue_op { + uint32_t cmd; + uint32_t sub_command; + uint32_t queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + char data[]; +}; + +/** + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM + * @active_fec: FEC mode which is active on porte + * @fec: Bitmask of supported/configured FEC modes + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + */ +struct ethtool_fecparam { + uint32_t cmd; + /* bitmask of FEC modes */ + uint32_t active_fec; + uint32_t fec; + uint32_t reserved; +}; + +/** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver + * @ETHTOOL_FEC_OFF: No FEC Mode + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + */ +enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, + ETHTOOL_FEC_AUTO_BIT, + ETHTOOL_FEC_OFF_BIT, + ETHTOOL_FEC_RS_BIT, + ETHTOOL_FEC_BASER_BIT, + ETHTOOL_FEC_LLRS_BIT, +}; + +#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) +#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) +#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) +#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) +#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) +#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT) + +/* CMDs currently supported */ +#define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. + * Please use ETHTOOL_GLINKSETTINGS + */ +#define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings. + * Please use ETHTOOL_SLINKSETTINGS + */ +#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */ +#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ +#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */ +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */ +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */ +/* Get link status for host, i.e. whether the interface *and* the + * physical port (if there is one) are up (ethtool_value). */ +#define ETHTOOL_GLINK 0x0000000a +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */ +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */ +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */ +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ +#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ +#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ +#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ +#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ +#define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */ +#define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */ +#define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */ +#define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */ + +#define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ +#define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ +#define ETHTOOL_GRXRINGS 0x0000002d /* Get RX rings available for LB */ +#define ETHTOOL_GRXCLSRLCNT 0x0000002e /* Get RX class rule count */ +#define ETHTOOL_GRXCLSRULE 0x0000002f /* Get RX classification rule */ +#define ETHTOOL_GRXCLSRLALL 0x00000030 /* Get all RX classification rule */ +#define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */ +#define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ +#define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ +#define ETHTOOL_RESET 0x00000034 /* Reset hardware */ +#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ +#define ETHTOOL_GRXNTUPLE 0x00000036 /* deprecated */ +#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */ +#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */ +#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */ + +#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */ +#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */ +#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ +#define ETHTOOL_SCHANNELS 0x0000003d /* Set no of channels */ +#define ETHTOOL_SET_DUMP 0x0000003e /* Set dump settings */ +#define ETHTOOL_GET_DUMP_FLAG 0x0000003f /* Get dump settings */ +#define ETHTOOL_GET_DUMP_DATA 0x00000040 /* Get dump data */ +#define ETHTOOL_GET_TS_INFO 0x00000041 /* Get time stamping and PHC info */ +#define ETHTOOL_GMODULEINFO 0x00000042 /* Get plug-in module information */ +#define ETHTOOL_GMODULEEEPROM 0x00000043 /* Get plug-in module eeprom */ +#define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ +#define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ + +#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ +#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ +#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ +#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ +#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ + +#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ + +#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ +#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ +#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ +#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ +#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ +#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ + +/* compatibility with older code */ +#define SPARC_ETH_GSET ETHTOOL_GSET +#define SPARC_ETH_SSET ETHTOOL_SSET + +/* Link mode bit indices */ +enum ethtool_link_mode_bit_indices { + ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0, + ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1, + ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2, + ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5, + ETHTOOL_LINK_MODE_Autoneg_BIT = 6, + ETHTOOL_LINK_MODE_TP_BIT = 7, + ETHTOOL_LINK_MODE_AUI_BIT = 8, + ETHTOOL_LINK_MODE_MII_BIT = 9, + ETHTOOL_LINK_MODE_FIBRE_BIT = 10, + ETHTOOL_LINK_MODE_BNC_BIT = 11, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12, + ETHTOOL_LINK_MODE_Pause_BIT = 13, + ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15, + ETHTOOL_LINK_MODE_Backplane_BIT = 16, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* + * macro for bits > 31. The only way to use indices > 31 is to + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. + */ + + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, + ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67, + ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68, + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT = 69, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT = 70, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, + ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, + ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, + /* must be last entry */ + __ETHTOOL_LINK_MODE_MASK_NBITS +}; + +#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ + (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT)) + +/* DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new SUPPORTED_* macro for bits > 31. + */ +#define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new SUPPORTED_* macro for bits > 31, see + * notice above. + */ + +/* + * DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new ADERTISE_* macro for bits > 31. + */ +#define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new ADVERTISED_* macro for bits > 31, see + * notice above. + */ + +/* The following are all involved in forcing a particular link + * mode for the device for setting things. When getting the + * devices settings, these indicate the current mode and whether + * it was forced up into this mode or autonegotiated. + */ + +/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Update drivers/net/phy/phy.c:phy_speed_to_str() and + * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values. + */ +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define SPEED_5000 5000 +#define SPEED_10000 10000 +#define SPEED_14000 14000 +#define SPEED_20000 20000 +#define SPEED_25000 25000 +#define SPEED_40000 40000 +#define SPEED_50000 50000 +#define SPEED_56000 56000 +#define SPEED_100000 100000 +#define SPEED_200000 200000 +#define SPEED_400000 400000 + +#define SPEED_UNKNOWN -1 + +static inline int ethtool_validate_speed(uint32_t speed) +{ + return speed <= INT_MAX || speed == (uint32_t)SPEED_UNKNOWN; +} + +/* Duplex, half or full. */ +#define DUPLEX_HALF 0x00 +#define DUPLEX_FULL 0x01 +#define DUPLEX_UNKNOWN 0xff + +static inline int ethtool_validate_duplex(uint8_t duplex) +{ + switch (duplex) { + case DUPLEX_HALF: + case DUPLEX_FULL: + case DUPLEX_UNKNOWN: + return 1; + } + + return 0; +} + +#define MASTER_SLAVE_CFG_UNSUPPORTED 0 +#define MASTER_SLAVE_CFG_UNKNOWN 1 +#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2 +#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3 +#define MASTER_SLAVE_CFG_MASTER_FORCE 4 +#define MASTER_SLAVE_CFG_SLAVE_FORCE 5 +#define MASTER_SLAVE_STATE_UNSUPPORTED 0 +#define MASTER_SLAVE_STATE_UNKNOWN 1 +#define MASTER_SLAVE_STATE_MASTER 2 +#define MASTER_SLAVE_STATE_SLAVE 3 +#define MASTER_SLAVE_STATE_ERR 4 + +/* Which connector port. */ +#define PORT_TP 0x00 +#define PORT_AUI 0x01 +#define PORT_MII 0x02 +#define PORT_FIBRE 0x03 +#define PORT_BNC 0x04 +#define PORT_DA 0x05 +#define PORT_NONE 0xef +#define PORT_OTHER 0xff + +/* Which transceiver to use. */ +#define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */ +#define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */ +#define XCVR_DUMMY1 0x02 +#define XCVR_DUMMY2 0x03 +#define XCVR_DUMMY3 0x04 + +/* Enable or disable autonegotiation. */ +#define AUTONEG_DISABLE 0x00 +#define AUTONEG_ENABLE 0x01 + +/* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then + * the driver is required to renegotiate link + */ +#define ETH_TP_MDI_INVALID 0x00 /* status: unknown; control: unsupported */ +#define ETH_TP_MDI 0x01 /* status: MDI; control: force MDI */ +#define ETH_TP_MDI_X 0x02 /* status: MDI-X; control: force MDI-X */ +#define ETH_TP_MDI_AUTO 0x03 /* control: auto-select */ + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ +#define WAKE_FILTER (1 << 7) + +#define WOL_MODE_COUNT 8 + +/* L2-L4 network traffic flow types */ +#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ +#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ +#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ +#define AH_ESP_V4_FLOW 0x04 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ +#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ +#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ +#define AH_ESP_V6_FLOW 0x08 /* hash only */ +#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ +#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ +#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ +#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IP_USER_FLOW IPV4_USER_FLOW +#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ +#define IPV4_FLOW 0x10 /* hash only */ +#define IPV6_FLOW 0x11 /* hash only */ +#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ +/* Flag to enable additional fields in struct ethtool_rx_flow_spec */ +#define FLOW_EXT 0x80000000 +#define FLOW_MAC_EXT 0x40000000 +/* Flag to enable RSS spreading of traffic matching rule (nfc only) */ +#define FLOW_RSS 0x20000000 + +/* L3-L4 network traffic flow hash options */ +#define RXH_L2DA (1 << 1) +#define RXH_VLAN (1 << 2) +#define RXH_L3_PROTO (1 << 3) +#define RXH_IP_SRC (1 << 4) +#define RXH_IP_DST (1 << 5) +#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ +#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_DISCARD (1 << 31) + +#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL + +/* Special RX classification rule insert location values */ +#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */ +#define RX_CLS_LOC_ANY 0xffffffff +#define RX_CLS_LOC_FIRST 0xfffffffe +#define RX_CLS_LOC_LAST 0xfffffffd + +/* EEPROM Standards for plug in modules */ +#define ETH_MODULE_SFF_8079 0x1 +#define ETH_MODULE_SFF_8079_LEN 256 +#define ETH_MODULE_SFF_8472 0x2 +#define ETH_MODULE_SFF_8472_LEN 512 +#define ETH_MODULE_SFF_8636 0x3 +#define ETH_MODULE_SFF_8636_LEN 256 +#define ETH_MODULE_SFF_8436 0x4 +#define ETH_MODULE_SFF_8436_LEN 256 + +#define ETH_MODULE_SFF_8636_MAX_LEN 640 +#define ETH_MODULE_SFF_8436_MAX_LEN 640 + +/* Reset flags */ +/* The reset() operation must clear the flags for the components which + * were actually reset. On successful return, the flags indicate the + * components which were not reset, either because they do not exist + * in the hardware or because they cannot be reset independently. The + * driver must never reset any components that were not requested. + */ +enum ethtool_reset_flags { + /* These flags represent components dedicated to the interface + * the command is addressed to. Shift any flag left by + * ETH_RESET_SHARED_SHIFT to reset a shared component of the + * same type. + */ + ETH_RESET_MGMT = 1 << 0, /* Management processor */ + ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */ + ETH_RESET_DMA = 1 << 2, /* DMA engine */ + ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */ + ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */ + ETH_RESET_MAC = 1 << 5, /* Media access controller */ + ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ + ETH_RESET_RAM = 1 << 7, /* RAM shared between + * multiple components */ + ETH_RESET_AP = 1 << 8, /* Application processor */ + + ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to + * this interface */ + ETH_RESET_ALL = 0xffffffff, /* All components used by this + * interface, even if shared */ +}; +#define ETH_RESET_SHARED_SHIFT 16 + + +/** + * struct ethtool_link_settings - link control and status + * + * IMPORTANT, Backward compatibility notice: When implementing new + * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and + * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link + * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS + * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in + * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use + * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link + * settings; do not use %ETHTOOL_SLINKSETTINGS if + * %ETHTOOL_GLINKSETTINGS failed: stick to + * %ETHTOOL_GSET/%ETHTOOL_SSET in that case. + * + * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS + * @speed: Link speed (Mbps) + * @duplex: Duplex mode; one of %DUPLEX_* + * @port: Physical connector type; one of %PORT_* + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not + * applicable. For clause 45 PHYs this is the PRTAD. + * @autoneg: Enable/disable autonegotiation and auto-detection; + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO + * protocols supported by the interface; 0 if unknown. + * Read-only. + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the + * value will be %ETH_TP_MDI_INVALID. Read-only. + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. + * When written successfully, the link should be renegotiated if + * necessary. + * @link_mode_masks_nwords: Number of 32-bit words for each of the + * supported, advertising, lp_advertising link mode bitmaps. For + * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user + * (>= 0); on return, if handshake in progress, negative if + * request size unsupported by kernel: absolute value indicates + * kernel expected size and all the other fields but cmd + * are 0; otherwise (handshake completed), strictly positive + * to indicate size used by kernel and cmd field stays + * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For + * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive + * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise + * refused. For drivers: ignore this field (use kernel's + * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will + * be overwritten by kernel. + * @supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * @advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * @lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. + * @transceiver: Used to distinguish different possible PHY types, + * reported consistently by PHYLIB. Read-only. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple + * link modes. If it is enabled then they are read-only; if the link + * is up they represent the negotiated link mode; if the link is down, + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + * + * Some hardware interfaces may have multiple PHYs and/or physical + * connectors fitted or do not allow the driver to detect which are + * fitted. For these interfaces @port and/or @phy_address may be + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. + * Otherwise, attempts to write different values may be ignored or + * rejected. + * + * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt + * are not available in %ethtool_link_settings. These fields will be + * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will + * fail if any of them is set to non-zero value. + * + * Users should assume that all fields not marked read-only are + * writable and subject to validation by the driver. They should use + * %ETHTOOL_GLINKSETTINGS to get the current values before making specific + * changes and then applying them with %ETHTOOL_SLINKSETTINGS. + * + * Drivers that implement %get_link_ksettings and/or + * %set_link_ksettings should ignore the @cmd + * and @link_mode_masks_nwords fields (any change to them overwritten + * by kernel), and rely only on kernel's internal + * %__ETHTOOL_LINK_MODE_MASK_NBITS and + * %ethtool_link_mode_mask_t. Drivers that implement + * %set_link_ksettings() should validate all fields other than @cmd + * and @link_mode_masks_nwords that are not described as read-only or + * deprecated, and must ignore all fields described as read-only. + */ +struct ethtool_link_settings { + uint32_t cmd; + uint32_t speed; + uint8_t duplex; + uint8_t port; + uint8_t phy_address; + uint8_t autoneg; + uint8_t mdio_support; + uint8_t eth_tp_mdix; + uint8_t eth_tp_mdix_ctrl; + int8_t link_mode_masks_nwords; + uint8_t transceiver; + uint8_t master_slave_cfg; + uint8_t master_slave_state; + uint8_t reserved1[1]; + uint32_t reserved[7]; + uint32_t link_mode_masks[0]; + /* layout of link_mode_masks fields: + * uint32_t map_supported[link_mode_masks_nwords]; + * uint32_t map_advertising[link_mode_masks_nwords]; + * uint32_t map_lp_advertising[link_mode_masks_nwords]; + */ +}; +#endif /* _LINUX_ETHTOOL_H */ diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h new file mode 100644 index 000000000..950d7edb7 --- /dev/null +++ b/include/standard-headers/linux/fuse.h @@ -0,0 +1,955 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + This file defines the kernel interface of FUSE + Copyright (C) 2001-2008 Miklos Szeredi + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. + + This -- and only this -- header file may also be distributed under + the terms of the BSD Licence as follows: + + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. +*/ + +/* + * This file defines the kernel interface of FUSE + * + * Protocol changelog: + * + * 7.1: + * - add the following messages: + * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, + * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, + * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, + * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, + * FUSE_RELEASEDIR + * - add padding to messages to accommodate 32-bit servers on 64-bit kernels + * + * 7.2: + * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags + * - add FUSE_FSYNCDIR message + * + * 7.3: + * - add FUSE_ACCESS message + * - add FUSE_CREATE message + * - add filehandle to fuse_setattr_in + * + * 7.4: + * - add frsize to fuse_kstatfs + * - clean up request size limit checking + * + * 7.5: + * - add flags and max_write to fuse_init_out + * + * 7.6: + * - add max_readahead to fuse_init_in and fuse_init_out + * + * 7.7: + * - add FUSE_INTERRUPT message + * - add POSIX file lock support + * + * 7.8: + * - add lock_owner and flags fields to fuse_release_in + * - add FUSE_BMAP message + * - add FUSE_DESTROY message + * + * 7.9: + * - new fuse_getattr_in input argument of GETATTR + * - add lk_flags in fuse_lk_in + * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in + * - add blksize field to fuse_attr + * - add file flags field to fuse_read_in and fuse_write_in + * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in + * + * 7.10 + * - add nonseekable open flag + * + * 7.11 + * - add IOCTL message + * - add unsolicited notification support + * - add POLL message and NOTIFY_POLL notification + * + * 7.12 + * - add umask flag to input argument of create, mknod and mkdir + * - add notification messages for invalidation of inodes and + * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables + * + * 7.14 + * - add splice support to fuse device + * + * 7.15 + * - add store notify + * - add retrieve notify + * + * 7.16 + * - add BATCH_FORGET request + * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct + * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' + * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK + * + * 7.18 + * - add FUSE_IOCTL_DIR flag + * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE + * + * 7.20 + * - add FUSE_AUTO_INVAL_DATA + * + * 7.21 + * - add FUSE_READDIRPLUS + * - send the requested events in POLL request + * + * 7.22 + * - add FUSE_ASYNC_DIO + * + * 7.23 + * - add FUSE_WRITEBACK_CACHE + * - add time_gran to fuse_init_out + * - add reserved space to fuse_init_out + * - add FATTR_CTIME + * - add ctime and ctimensec to fuse_setattr_in + * - add FUSE_RENAME2 request + * - add FUSE_NO_OPEN_SUPPORT flag + * + * 7.24 + * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS + * + * 7.26 + * - add FUSE_HANDLE_KILLPRIV + * - add FUSE_POSIX_ACL + * + * 7.27 + * - add FUSE_ABORT_ERROR + * + * 7.28 + * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag + * + * 7.30 + * - add FUSE_EXPLICIT_INVAL_DATA + * - add FUSE_IOCTL_COMPAT_X32 + * + * 7.31 + * - add FUSE_WRITE_KILL_PRIV flag + * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING + * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag + * + * 7.32 + * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS + * + * 7.33 + * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID + * - add FUSE_OPEN_KILL_SUIDGID + */ + +#ifndef _LINUX_FUSE_H +#define _LINUX_FUSE_H + +#include + +/* + * Version negotiation: + * + * Both the kernel and userspace send the version they support in the + * INIT request and reply respectively. + * + * If the major versions match then both shall use the smallest + * of the two minor versions for communication. + * + * If the kernel supports a larger major version, then userspace shall + * reply with the major version it supports, ignore the rest of the + * INIT message and expect a new INIT message from the kernel with a + * matching major version. + * + * If the library supports a larger major version, then it shall fall + * back to the major protocol version sent by the kernel for + * communication and reply with that major version (and an arbitrary + * supported minor version). + */ + +/** Version number of this interface */ +#define FUSE_KERNEL_VERSION 7 + +/** Minor version number of this interface */ +#define FUSE_KERNEL_MINOR_VERSION 33 + +/** The node ID of the root inode */ +#define FUSE_ROOT_ID 1 + +/* Make sure all structures are padded to 64bit boundary, so 32bit + userspace works under 64bit kernels */ + +struct fuse_attr { + uint64_t ino; + uint64_t size; + uint64_t blocks; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint32_t rdev; + uint32_t blksize; + uint32_t flags; +}; + +struct fuse_kstatfs { + uint64_t blocks; + uint64_t bfree; + uint64_t bavail; + uint64_t files; + uint64_t ffree; + uint32_t bsize; + uint32_t namelen; + uint32_t frsize; + uint32_t padding; + uint32_t spare[6]; +}; + +struct fuse_file_lock { + uint64_t start; + uint64_t end; + uint32_t type; + uint32_t pid; /* tgid */ +}; + +/** + * Bitmasks for fuse_setattr_in.valid + */ +#define FATTR_MODE (1 << 0) +#define FATTR_UID (1 << 1) +#define FATTR_GID (1 << 2) +#define FATTR_SIZE (1 << 3) +#define FATTR_ATIME (1 << 4) +#define FATTR_MTIME (1 << 5) +#define FATTR_FH (1 << 6) +#define FATTR_ATIME_NOW (1 << 7) +#define FATTR_MTIME_NOW (1 << 8) +#define FATTR_LOCKOWNER (1 << 9) +#define FATTR_CTIME (1 << 10) +#define FATTR_KILL_SUIDGID (1 << 11) + +/** + * Flags returned by the OPEN request + * + * FOPEN_DIRECT_IO: bypass page cache for this open file + * FOPEN_KEEP_CACHE: don't invalidate the data cache on open + * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory + * FOPEN_STREAM: the file is stream-like (no file position at all) + */ +#define FOPEN_DIRECT_IO (1 << 0) +#define FOPEN_KEEP_CACHE (1 << 1) +#define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) +#define FOPEN_STREAM (1 << 4) + +/** + * INIT request/reply flags + * + * FUSE_ASYNC_READ: asynchronous read requests + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks + * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) + * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem + * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." + * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB + * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_SPLICE_WRITE: kernel supports splice write on the device + * FUSE_SPLICE_MOVE: kernel supports splice move on the device + * FUSE_SPLICE_READ: kernel supports splice read on the device + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks + * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories + * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus + * FUSE_ASYNC_DIO: asynchronous direct I/O submission + * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes + * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir + * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc + * FUSE_POSIX_ACL: filesystem supports posix acls + * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir + * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request + * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for + * foffset and moffset fields in struct + * fuse_setupmapping_out and fuse_removemapping_one. + * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts + * FUSE_HANDLE_KILLPRIV_V2: fs kills suid/sgid/cap on write/chown/trunc. + * Upon write/truncate suid/sgid is only killed if caller + * does not have CAP_FSETID. Additionally upon + * write/truncate sgid is killed only if file has group + * execute permission. (Same as Linux VFS behavior). + */ +#define FUSE_ASYNC_READ (1 << 0) +#define FUSE_POSIX_LOCKS (1 << 1) +#define FUSE_FILE_OPS (1 << 2) +#define FUSE_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_EXPORT_SUPPORT (1 << 4) +#define FUSE_BIG_WRITES (1 << 5) +#define FUSE_DONT_MASK (1 << 6) +#define FUSE_SPLICE_WRITE (1 << 7) +#define FUSE_SPLICE_MOVE (1 << 8) +#define FUSE_SPLICE_READ (1 << 9) +#define FUSE_FLOCK_LOCKS (1 << 10) +#define FUSE_HAS_IOCTL_DIR (1 << 11) +#define FUSE_AUTO_INVAL_DATA (1 << 12) +#define FUSE_DO_READDIRPLUS (1 << 13) +#define FUSE_READDIRPLUS_AUTO (1 << 14) +#define FUSE_ASYNC_DIO (1 << 15) +#define FUSE_WRITEBACK_CACHE (1 << 16) +#define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) +#define FUSE_HANDLE_KILLPRIV (1 << 19) +#define FUSE_POSIX_ACL (1 << 20) +#define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) +#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) +#define FUSE_MAP_ALIGNMENT (1 << 26) +#define FUSE_SUBMOUNTS (1 << 27) +#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) + +/** + * CUSE INIT request/reply flags + * + * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl + */ +#define CUSE_UNRESTRICTED_IOCTL (1 << 0) + +/** + * Release flags + */ +#define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) + +/** + * Getattr flags + */ +#define FUSE_GETATTR_FH (1 << 0) + +/** + * Lock flags + */ +#define FUSE_LK_FLOCK (1 << 0) + +/** + * WRITE flags + * + * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed + * FUSE_WRITE_LOCKOWNER: lock_owner field is valid + * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits + */ +#define FUSE_WRITE_CACHE (1 << 0) +#define FUSE_WRITE_LOCKOWNER (1 << 1) +#define FUSE_WRITE_KILL_SUIDGID (1 << 2) + +/* Obsolete alias; this flag implies killing suid/sgid only. */ +#define FUSE_WRITE_KILL_PRIV FUSE_WRITE_KILL_SUIDGID + +/** + * Read flags + */ +#define FUSE_READ_LOCKOWNER (1 << 1) + +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_32BIT: 32bit ioctl + * FUSE_IOCTL_DIR: is a directory + * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_32BIT (1 << 3) +#define FUSE_IOCTL_DIR (1 << 4) +#define FUSE_IOCTL_COMPAT_X32 (1 << 5) + +#define FUSE_IOCTL_MAX_IOV 256 + +/** + * Poll flags + * + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify + */ +#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) + +/** + * Fsync flags + * + * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata + */ +#define FUSE_FSYNC_FDATASYNC (1 << 0) + +/** + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: Object is a submount root + */ +#define FUSE_ATTR_SUBMOUNT (1 << 0) + +/** + * Open flags + * FUSE_OPEN_KILL_SUIDGID: Kill suid and sgid if executable + */ +#define FUSE_OPEN_KILL_SUIDGID (1 << 0) + +enum fuse_opcode { + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, + FUSE_LSEEK = 46, + FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, + + /* CUSE specific operations */ + CUSE_INIT = 4096, + + /* Reserved opcodes: helpful to detect structure endian-ness */ + CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ + FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ +}; + +enum fuse_notify_code { + FUSE_NOTIFY_POLL = 1, + FUSE_NOTIFY_INVAL_INODE = 2, + FUSE_NOTIFY_INVAL_ENTRY = 3, + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_CODE_MAX, +}; + +/* The read buffer is required to be at least 8k, but may be much larger */ +#define FUSE_MIN_READ_BUFFER 8192 + +#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 + +struct fuse_entry_out { + uint64_t nodeid; /* Inode ID */ + uint64_t generation; /* Inode generation: nodeid:gen must + be unique for the fs's lifetime */ + uint64_t entry_valid; /* Cache timeout for the name */ + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t entry_valid_nsec; + uint32_t attr_valid_nsec; + struct fuse_attr attr; +}; + +struct fuse_forget_in { + uint64_t nlookup; +}; + +struct fuse_forget_one { + uint64_t nodeid; + uint64_t nlookup; +}; + +struct fuse_batch_forget_in { + uint32_t count; + uint32_t dummy; +}; + +struct fuse_getattr_in { + uint32_t getattr_flags; + uint32_t dummy; + uint64_t fh; +}; + +#define FUSE_COMPAT_ATTR_OUT_SIZE 96 + +struct fuse_attr_out { + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t attr_valid_nsec; + uint32_t dummy; + struct fuse_attr attr; +}; + +#define FUSE_COMPAT_MKNOD_IN_SIZE 8 + +struct fuse_mknod_in { + uint32_t mode; + uint32_t rdev; + uint32_t umask; + uint32_t padding; +}; + +struct fuse_mkdir_in { + uint32_t mode; + uint32_t umask; +}; + +struct fuse_rename_in { + uint64_t newdir; +}; + +struct fuse_rename2_in { + uint64_t newdir; + uint32_t flags; + uint32_t padding; +}; + +struct fuse_link_in { + uint64_t oldnodeid; +}; + +struct fuse_setattr_in { + uint32_t valid; + uint32_t padding; + uint64_t fh; + uint64_t size; + uint64_t lock_owner; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t unused4; + uint32_t uid; + uint32_t gid; + uint32_t unused5; +}; + +struct fuse_open_in { + uint32_t flags; + uint32_t open_flags; /* FUSE_OPEN_... */ +}; + +struct fuse_create_in { + uint32_t flags; + uint32_t mode; + uint32_t umask; + uint32_t open_flags; /* FUSE_OPEN_... */ +}; + +struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; + uint32_t padding; +}; + +struct fuse_release_in { + uint64_t fh; + uint32_t flags; + uint32_t release_flags; + uint64_t lock_owner; +}; + +struct fuse_flush_in { + uint64_t fh; + uint32_t unused; + uint32_t padding; + uint64_t lock_owner; +}; + +struct fuse_read_in { + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t read_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; +}; + +#define FUSE_COMPAT_WRITE_IN_SIZE 24 + +struct fuse_write_in { + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t write_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; +}; + +struct fuse_write_out { + uint32_t size; + uint32_t padding; +}; + +#define FUSE_COMPAT_STATFS_SIZE 48 + +struct fuse_statfs_out { + struct fuse_kstatfs st; +}; + +struct fuse_fsync_in { + uint64_t fh; + uint32_t fsync_flags; + uint32_t padding; +}; + +struct fuse_setxattr_in { + uint32_t size; + uint32_t flags; +}; + +struct fuse_getxattr_in { + uint32_t size; + uint32_t padding; +}; + +struct fuse_getxattr_out { + uint32_t size; + uint32_t padding; +}; + +struct fuse_lk_in { + uint64_t fh; + uint64_t owner; + struct fuse_file_lock lk; + uint32_t lk_flags; + uint32_t padding; +}; + +struct fuse_lk_out { + struct fuse_file_lock lk; +}; + +struct fuse_access_in { + uint32_t mask; + uint32_t padding; +}; + +struct fuse_init_in { + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; +}; + +#define FUSE_COMPAT_INIT_OUT_SIZE 8 +#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 + +struct fuse_init_out { + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; + uint16_t max_background; + uint16_t congestion_threshold; + uint32_t max_write; + uint32_t time_gran; + uint16_t max_pages; + uint16_t map_alignment; + uint32_t unused[8]; +}; + +#define CUSE_INIT_INFO_MAX 4096 + +struct cuse_init_in { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; +}; + +struct cuse_init_out { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; + uint32_t max_read; + uint32_t max_write; + uint32_t dev_major; /* chardev major */ + uint32_t dev_minor; /* chardev minor */ + uint32_t spare[10]; +}; + +struct fuse_interrupt_in { + uint64_t unique; +}; + +struct fuse_bmap_in { + uint64_t block; + uint32_t blocksize; + uint32_t padding; +}; + +struct fuse_bmap_out { + uint64_t block; +}; + +struct fuse_ioctl_in { + uint64_t fh; + uint32_t flags; + uint32_t cmd; + uint64_t arg; + uint32_t in_size; + uint32_t out_size; +}; + +struct fuse_ioctl_iovec { + uint64_t base; + uint64_t len; +}; + +struct fuse_ioctl_out { + int32_t result; + uint32_t flags; + uint32_t in_iovs; + uint32_t out_iovs; +}; + +struct fuse_poll_in { + uint64_t fh; + uint64_t kh; + uint32_t flags; + uint32_t events; +}; + +struct fuse_poll_out { + uint32_t revents; + uint32_t padding; +}; + +struct fuse_notify_poll_wakeup_out { + uint64_t kh; +}; + +struct fuse_fallocate_in { + uint64_t fh; + uint64_t offset; + uint64_t length; + uint32_t mode; + uint32_t padding; +}; + +struct fuse_in_header { + uint32_t len; + uint32_t opcode; + uint64_t unique; + uint64_t nodeid; + uint32_t uid; + uint32_t gid; + uint32_t pid; + uint32_t padding; +}; + +struct fuse_out_header { + uint32_t len; + int32_t error; + uint64_t unique; +}; + +struct fuse_dirent { + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; + char name[]; +}; + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) +#define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) + +struct fuse_direntplus { + struct fuse_entry_out entry_out; + struct fuse_dirent dirent; +}; + +#define FUSE_NAME_OFFSET_DIRENTPLUS \ + offsetof(struct fuse_direntplus, dirent.name) +#define FUSE_DIRENTPLUS_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) + +struct fuse_notify_inval_inode_out { + uint64_t ino; + int64_t off; + int64_t len; +}; + +struct fuse_notify_inval_entry_out { + uint64_t parent; + uint32_t namelen; + uint32_t padding; +}; + +struct fuse_notify_delete_out { + uint64_t parent; + uint64_t child; + uint32_t namelen; + uint32_t padding; +}; + +struct fuse_notify_store_out { + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; +}; + +struct fuse_notify_retrieve_out { + uint64_t notify_unique; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; +}; + +/* Matches the size of fuse_write_in */ +struct fuse_notify_retrieve_in { + uint64_t dummy1; + uint64_t offset; + uint32_t size; + uint32_t dummy2; + uint64_t dummy3; + uint64_t dummy4; +}; + +/* Device ioctls: */ +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) + +struct fuse_lseek_in { + uint64_t fh; + uint64_t offset; + uint32_t whence; + uint32_t padding; +}; + +struct fuse_lseek_out { + uint64_t offset; +}; + +struct fuse_copy_file_range_in { + uint64_t fh_in; + uint64_t off_in; + uint64_t nodeid_out; + uint64_t fh_out; + uint64_t off_out; + uint64_t len; + uint64_t flags; +}; + +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) +#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) +struct fuse_setupmapping_in { + /* An already open handle */ + uint64_t fh; + /* Offset into the file to start the mapping */ + uint64_t foffset; + /* Length of mapping required */ + uint64_t len; + /* Flags, FUSE_SETUPMAPPING_FLAG_* */ + uint64_t flags; + /* Offset in Memory Window */ + uint64_t moffset; +}; + +struct fuse_removemapping_in { + /* number of fuse_removemapping_one follows */ + uint32_t count; +}; + +struct fuse_removemapping_one { + /* Offset into the dax window start the unmapping */ + uint64_t moffset; + /* Length of mapping required */ + uint64_t len; +}; + +#define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) + +#endif /* _LINUX_FUSE_H */ diff --git a/include/standard-headers/linux/if_ether.h b/include/standard-headers/linux/if_ether.h new file mode 100644 index 000000000..91cf735fb --- /dev/null +++ b/include/standard-headers/linux/if_ether.h @@ -0,0 +1 @@ +#define ETH_ALEN 6 diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h new file mode 100644 index 000000000..c403b9cb0 --- /dev/null +++ b/include/standard-headers/linux/input-event-codes.h @@ -0,0 +1,949 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Input event codes + * + * *** IMPORTANT *** + * This file is not only included from C-code but also from devicetree source + * files. As such this file MUST only contain comments and defines. + * + * Copyright (c) 1999-2002 Vojtech Pavlik + * Copyright (c) 2015 Hans de Goede + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#ifndef _INPUT_EVENT_CODES_H +#define _INPUT_EVENT_CODES_H + +/* + * Device properties and quirks + */ + +#define INPUT_PROP_POINTER 0x00 /* needs a pointer */ +#define INPUT_PROP_DIRECT 0x01 /* direct input devices */ +#define INPUT_PROP_BUTTONPAD 0x02 /* has button(s) under pad */ +#define INPUT_PROP_SEMI_MT 0x03 /* touch rectangle only */ +#define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ +#define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */ +#define INPUT_PROP_ACCELEROMETER 0x06 /* has accelerometer */ + +#define INPUT_PROP_MAX 0x1f +#define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) + +/* + * Event types + */ + +#define EV_SYN 0x00 +#define EV_KEY 0x01 +#define EV_REL 0x02 +#define EV_ABS 0x03 +#define EV_MSC 0x04 +#define EV_SW 0x05 +#define EV_LED 0x11 +#define EV_SND 0x12 +#define EV_REP 0x14 +#define EV_FF 0x15 +#define EV_PWR 0x16 +#define EV_FF_STATUS 0x17 +#define EV_MAX 0x1f +#define EV_CNT (EV_MAX+1) + +/* + * Synchronization events. + */ + +#define SYN_REPORT 0 +#define SYN_CONFIG 1 +#define SYN_MT_REPORT 2 +#define SYN_DROPPED 3 +#define SYN_MAX 0xf +#define SYN_CNT (SYN_MAX+1) + +/* + * Keys and buttons + * + * Most of the keys/buttons are modeled after USB HUT 1.12 + * (see http://www.usb.org/developers/hidpage). + * Abbreviations in the comments: + * AC - Application Control + * AL - Application Launch Button + * SC - System Control + */ + +#define KEY_RESERVED 0 +#define KEY_ESC 1 +#define KEY_1 2 +#define KEY_2 3 +#define KEY_3 4 +#define KEY_4 5 +#define KEY_5 6 +#define KEY_6 7 +#define KEY_7 8 +#define KEY_8 9 +#define KEY_9 10 +#define KEY_0 11 +#define KEY_MINUS 12 +#define KEY_EQUAL 13 +#define KEY_BACKSPACE 14 +#define KEY_TAB 15 +#define KEY_Q 16 +#define KEY_W 17 +#define KEY_E 18 +#define KEY_R 19 +#define KEY_T 20 +#define KEY_Y 21 +#define KEY_U 22 +#define KEY_I 23 +#define KEY_O 24 +#define KEY_P 25 +#define KEY_LEFTBRACE 26 +#define KEY_RIGHTBRACE 27 +#define KEY_ENTER 28 +#define KEY_LEFTCTRL 29 +#define KEY_A 30 +#define KEY_S 31 +#define KEY_D 32 +#define KEY_F 33 +#define KEY_G 34 +#define KEY_H 35 +#define KEY_J 36 +#define KEY_K 37 +#define KEY_L 38 +#define KEY_SEMICOLON 39 +#define KEY_APOSTROPHE 40 +#define KEY_GRAVE 41 +#define KEY_LEFTSHIFT 42 +#define KEY_BACKSLASH 43 +#define KEY_Z 44 +#define KEY_X 45 +#define KEY_C 46 +#define KEY_V 47 +#define KEY_B 48 +#define KEY_N 49 +#define KEY_M 50 +#define KEY_COMMA 51 +#define KEY_DOT 52 +#define KEY_SLASH 53 +#define KEY_RIGHTSHIFT 54 +#define KEY_KPASTERISK 55 +#define KEY_LEFTALT 56 +#define KEY_SPACE 57 +#define KEY_CAPSLOCK 58 +#define KEY_F1 59 +#define KEY_F2 60 +#define KEY_F3 61 +#define KEY_F4 62 +#define KEY_F5 63 +#define KEY_F6 64 +#define KEY_F7 65 +#define KEY_F8 66 +#define KEY_F9 67 +#define KEY_F10 68 +#define KEY_NUMLOCK 69 +#define KEY_SCROLLLOCK 70 +#define KEY_KP7 71 +#define KEY_KP8 72 +#define KEY_KP9 73 +#define KEY_KPMINUS 74 +#define KEY_KP4 75 +#define KEY_KP5 76 +#define KEY_KP6 77 +#define KEY_KPPLUS 78 +#define KEY_KP1 79 +#define KEY_KP2 80 +#define KEY_KP3 81 +#define KEY_KP0 82 +#define KEY_KPDOT 83 + +#define KEY_ZENKAKUHANKAKU 85 +#define KEY_102ND 86 +#define KEY_F11 87 +#define KEY_F12 88 +#define KEY_RO 89 +#define KEY_KATAKANA 90 +#define KEY_HIRAGANA 91 +#define KEY_HENKAN 92 +#define KEY_KATAKANAHIRAGANA 93 +#define KEY_MUHENKAN 94 +#define KEY_KPJPCOMMA 95 +#define KEY_KPENTER 96 +#define KEY_RIGHTCTRL 97 +#define KEY_KPSLASH 98 +#define KEY_SYSRQ 99 +#define KEY_RIGHTALT 100 +#define KEY_LINEFEED 101 +#define KEY_HOME 102 +#define KEY_UP 103 +#define KEY_PAGEUP 104 +#define KEY_LEFT 105 +#define KEY_RIGHT 106 +#define KEY_END 107 +#define KEY_DOWN 108 +#define KEY_PAGEDOWN 109 +#define KEY_INSERT 110 +#define KEY_DELETE 111 +#define KEY_MACRO 112 +#define KEY_MUTE 113 +#define KEY_VOLUMEDOWN 114 +#define KEY_VOLUMEUP 115 +#define KEY_POWER 116 /* SC System Power Down */ +#define KEY_KPEQUAL 117 +#define KEY_KPPLUSMINUS 118 +#define KEY_PAUSE 119 +#define KEY_SCALE 120 /* AL Compiz Scale (Expose) */ + +#define KEY_KPCOMMA 121 +#define KEY_HANGEUL 122 +#define KEY_HANGUEL KEY_HANGEUL +#define KEY_HANJA 123 +#define KEY_YEN 124 +#define KEY_LEFTMETA 125 +#define KEY_RIGHTMETA 126 +#define KEY_COMPOSE 127 + +#define KEY_STOP 128 /* AC Stop */ +#define KEY_AGAIN 129 +#define KEY_PROPS 130 /* AC Properties */ +#define KEY_UNDO 131 /* AC Undo */ +#define KEY_FRONT 132 +#define KEY_COPY 133 /* AC Copy */ +#define KEY_OPEN 134 /* AC Open */ +#define KEY_PASTE 135 /* AC Paste */ +#define KEY_FIND 136 /* AC Search */ +#define KEY_CUT 137 /* AC Cut */ +#define KEY_HELP 138 /* AL Integrated Help Center */ +#define KEY_MENU 139 /* Menu (show menu) */ +#define KEY_CALC 140 /* AL Calculator */ +#define KEY_SETUP 141 +#define KEY_SLEEP 142 /* SC System Sleep */ +#define KEY_WAKEUP 143 /* System Wake Up */ +#define KEY_FILE 144 /* AL Local Machine Browser */ +#define KEY_SENDFILE 145 +#define KEY_DELETEFILE 146 +#define KEY_XFER 147 +#define KEY_PROG1 148 +#define KEY_PROG2 149 +#define KEY_WWW 150 /* AL Internet Browser */ +#define KEY_MSDOS 151 +#define KEY_COFFEE 152 /* AL Terminal Lock/Screensaver */ +#define KEY_SCREENLOCK KEY_COFFEE +#define KEY_ROTATE_DISPLAY 153 /* Display orientation for e.g. tablets */ +#define KEY_DIRECTION KEY_ROTATE_DISPLAY +#define KEY_CYCLEWINDOWS 154 +#define KEY_MAIL 155 +#define KEY_BOOKMARKS 156 /* AC Bookmarks */ +#define KEY_COMPUTER 157 +#define KEY_BACK 158 /* AC Back */ +#define KEY_FORWARD 159 /* AC Forward */ +#define KEY_CLOSECD 160 +#define KEY_EJECTCD 161 +#define KEY_EJECTCLOSECD 162 +#define KEY_NEXTSONG 163 +#define KEY_PLAYPAUSE 164 +#define KEY_PREVIOUSSONG 165 +#define KEY_STOPCD 166 +#define KEY_RECORD 167 +#define KEY_REWIND 168 +#define KEY_PHONE 169 /* Media Select Telephone */ +#define KEY_ISO 170 +#define KEY_CONFIG 171 /* AL Consumer Control Configuration */ +#define KEY_HOMEPAGE 172 /* AC Home */ +#define KEY_REFRESH 173 /* AC Refresh */ +#define KEY_EXIT 174 /* AC Exit */ +#define KEY_MOVE 175 +#define KEY_EDIT 176 +#define KEY_SCROLLUP 177 +#define KEY_SCROLLDOWN 178 +#define KEY_KPLEFTPAREN 179 +#define KEY_KPRIGHTPAREN 180 +#define KEY_NEW 181 /* AC New */ +#define KEY_REDO 182 /* AC Redo/Repeat */ + +#define KEY_F13 183 +#define KEY_F14 184 +#define KEY_F15 185 +#define KEY_F16 186 +#define KEY_F17 187 +#define KEY_F18 188 +#define KEY_F19 189 +#define KEY_F20 190 +#define KEY_F21 191 +#define KEY_F22 192 +#define KEY_F23 193 +#define KEY_F24 194 + +#define KEY_PLAYCD 200 +#define KEY_PAUSECD 201 +#define KEY_PROG3 202 +#define KEY_PROG4 203 +#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_SUSPEND 205 +#define KEY_CLOSE 206 /* AC Close */ +#define KEY_PLAY 207 +#define KEY_FASTFORWARD 208 +#define KEY_BASSBOOST 209 +#define KEY_PRINT 210 /* AC Print */ +#define KEY_HP 211 +#define KEY_CAMERA 212 +#define KEY_SOUND 213 +#define KEY_QUESTION 214 +#define KEY_EMAIL 215 +#define KEY_CHAT 216 +#define KEY_SEARCH 217 +#define KEY_CONNECT 218 +#define KEY_FINANCE 219 /* AL Checkbook/Finance */ +#define KEY_SPORT 220 +#define KEY_SHOP 221 +#define KEY_ALTERASE 222 +#define KEY_CANCEL 223 /* AC Cancel */ +#define KEY_BRIGHTNESSDOWN 224 +#define KEY_BRIGHTNESSUP 225 +#define KEY_MEDIA 226 + +#define KEY_SWITCHVIDEOMODE 227 /* Cycle between available video + outputs (Monitor/LCD/TV-out/etc) */ +#define KEY_KBDILLUMTOGGLE 228 +#define KEY_KBDILLUMDOWN 229 +#define KEY_KBDILLUMUP 230 + +#define KEY_SEND 231 /* AC Send */ +#define KEY_REPLY 232 /* AC Reply */ +#define KEY_FORWARDMAIL 233 /* AC Forward Msg */ +#define KEY_SAVE 234 /* AC Save */ +#define KEY_DOCUMENTS 235 + +#define KEY_BATTERY 236 + +#define KEY_BLUETOOTH 237 +#define KEY_WLAN 238 +#define KEY_UWB 239 + +#define KEY_UNKNOWN 240 + +#define KEY_VIDEO_NEXT 241 /* drive next video source */ +#define KEY_VIDEO_PREV 242 /* drive previous video source */ +#define KEY_BRIGHTNESS_CYCLE 243 /* brightness up, after max is min */ +#define KEY_BRIGHTNESS_AUTO 244 /* Set Auto Brightness: manual + brightness control is off, + rely on ambient */ +#define KEY_BRIGHTNESS_ZERO KEY_BRIGHTNESS_AUTO +#define KEY_DISPLAY_OFF 245 /* display device to off state */ + +#define KEY_WWAN 246 /* Wireless WAN (LTE, UMTS, GSM, etc.) */ +#define KEY_WIMAX KEY_WWAN +#define KEY_RFKILL 247 /* Key that controls all radios */ + +#define KEY_MICMUTE 248 /* Mute / unmute the microphone */ + +/* Code 255 is reserved for special needs of AT keyboard driver */ + +#define BTN_MISC 0x100 +#define BTN_0 0x100 +#define BTN_1 0x101 +#define BTN_2 0x102 +#define BTN_3 0x103 +#define BTN_4 0x104 +#define BTN_5 0x105 +#define BTN_6 0x106 +#define BTN_7 0x107 +#define BTN_8 0x108 +#define BTN_9 0x109 + +#define BTN_MOUSE 0x110 +#define BTN_LEFT 0x110 +#define BTN_RIGHT 0x111 +#define BTN_MIDDLE 0x112 +#define BTN_SIDE 0x113 +#define BTN_EXTRA 0x114 +#define BTN_FORWARD 0x115 +#define BTN_BACK 0x116 +#define BTN_TASK 0x117 + +#define BTN_JOYSTICK 0x120 +#define BTN_TRIGGER 0x120 +#define BTN_THUMB 0x121 +#define BTN_THUMB2 0x122 +#define BTN_TOP 0x123 +#define BTN_TOP2 0x124 +#define BTN_PINKIE 0x125 +#define BTN_BASE 0x126 +#define BTN_BASE2 0x127 +#define BTN_BASE3 0x128 +#define BTN_BASE4 0x129 +#define BTN_BASE5 0x12a +#define BTN_BASE6 0x12b +#define BTN_DEAD 0x12f + +#define BTN_GAMEPAD 0x130 +#define BTN_SOUTH 0x130 +#define BTN_A BTN_SOUTH +#define BTN_EAST 0x131 +#define BTN_B BTN_EAST +#define BTN_C 0x132 +#define BTN_NORTH 0x133 +#define BTN_X BTN_NORTH +#define BTN_WEST 0x134 +#define BTN_Y BTN_WEST +#define BTN_Z 0x135 +#define BTN_TL 0x136 +#define BTN_TR 0x137 +#define BTN_TL2 0x138 +#define BTN_TR2 0x139 +#define BTN_SELECT 0x13a +#define BTN_START 0x13b +#define BTN_MODE 0x13c +#define BTN_THUMBL 0x13d +#define BTN_THUMBR 0x13e + +#define BTN_DIGI 0x140 +#define BTN_TOOL_PEN 0x140 +#define BTN_TOOL_RUBBER 0x141 +#define BTN_TOOL_BRUSH 0x142 +#define BTN_TOOL_PENCIL 0x143 +#define BTN_TOOL_AIRBRUSH 0x144 +#define BTN_TOOL_FINGER 0x145 +#define BTN_TOOL_MOUSE 0x146 +#define BTN_TOOL_LENS 0x147 +#define BTN_TOOL_QUINTTAP 0x148 /* Five fingers on trackpad */ +#define BTN_STYLUS3 0x149 +#define BTN_TOUCH 0x14a +#define BTN_STYLUS 0x14b +#define BTN_STYLUS2 0x14c +#define BTN_TOOL_DOUBLETAP 0x14d +#define BTN_TOOL_TRIPLETAP 0x14e +#define BTN_TOOL_QUADTAP 0x14f /* Four fingers on trackpad */ + +#define BTN_WHEEL 0x150 +#define BTN_GEAR_DOWN 0x150 +#define BTN_GEAR_UP 0x151 + +#define KEY_OK 0x160 +#define KEY_SELECT 0x161 +#define KEY_GOTO 0x162 +#define KEY_CLEAR 0x163 +#define KEY_POWER2 0x164 +#define KEY_OPTION 0x165 +#define KEY_INFO 0x166 /* AL OEM Features/Tips/Tutorial */ +#define KEY_TIME 0x167 +#define KEY_VENDOR 0x168 +#define KEY_ARCHIVE 0x169 +#define KEY_PROGRAM 0x16a /* Media Select Program Guide */ +#define KEY_CHANNEL 0x16b +#define KEY_FAVORITES 0x16c +#define KEY_EPG 0x16d +#define KEY_PVR 0x16e /* Media Select Home */ +#define KEY_MHP 0x16f +#define KEY_LANGUAGE 0x170 +#define KEY_TITLE 0x171 +#define KEY_SUBTITLE 0x172 +#define KEY_ANGLE 0x173 +#define KEY_FULL_SCREEN 0x174 /* AC View Toggle */ +#define KEY_ZOOM KEY_FULL_SCREEN +#define KEY_MODE 0x175 +#define KEY_KEYBOARD 0x176 +#define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */ +#define KEY_SCREEN KEY_ASPECT_RATIO +#define KEY_PC 0x178 /* Media Select Computer */ +#define KEY_TV 0x179 /* Media Select TV */ +#define KEY_TV2 0x17a /* Media Select Cable */ +#define KEY_VCR 0x17b /* Media Select VCR */ +#define KEY_VCR2 0x17c /* VCR Plus */ +#define KEY_SAT 0x17d /* Media Select Satellite */ +#define KEY_SAT2 0x17e +#define KEY_CD 0x17f /* Media Select CD */ +#define KEY_TAPE 0x180 /* Media Select Tape */ +#define KEY_RADIO 0x181 +#define KEY_TUNER 0x182 /* Media Select Tuner */ +#define KEY_PLAYER 0x183 +#define KEY_TEXT 0x184 +#define KEY_DVD 0x185 /* Media Select DVD */ +#define KEY_AUX 0x186 +#define KEY_MP3 0x187 +#define KEY_AUDIO 0x188 /* AL Audio Browser */ +#define KEY_VIDEO 0x189 /* AL Movie Browser */ +#define KEY_DIRECTORY 0x18a +#define KEY_LIST 0x18b +#define KEY_MEMO 0x18c /* Media Select Messages */ +#define KEY_CALENDAR 0x18d +#define KEY_RED 0x18e +#define KEY_GREEN 0x18f +#define KEY_YELLOW 0x190 +#define KEY_BLUE 0x191 +#define KEY_CHANNELUP 0x192 /* Channel Increment */ +#define KEY_CHANNELDOWN 0x193 /* Channel Decrement */ +#define KEY_FIRST 0x194 +#define KEY_LAST 0x195 /* Recall Last */ +#define KEY_AB 0x196 +#define KEY_NEXT 0x197 +#define KEY_RESTART 0x198 +#define KEY_SLOW 0x199 +#define KEY_SHUFFLE 0x19a +#define KEY_BREAK 0x19b +#define KEY_PREVIOUS 0x19c +#define KEY_DIGITS 0x19d +#define KEY_TEEN 0x19e +#define KEY_TWEN 0x19f +#define KEY_VIDEOPHONE 0x1a0 /* Media Select Video Phone */ +#define KEY_GAMES 0x1a1 /* Media Select Games */ +#define KEY_ZOOMIN 0x1a2 /* AC Zoom In */ +#define KEY_ZOOMOUT 0x1a3 /* AC Zoom Out */ +#define KEY_ZOOMRESET 0x1a4 /* AC Zoom */ +#define KEY_WORDPROCESSOR 0x1a5 /* AL Word Processor */ +#define KEY_EDITOR 0x1a6 /* AL Text Editor */ +#define KEY_SPREADSHEET 0x1a7 /* AL Spreadsheet */ +#define KEY_GRAPHICSEDITOR 0x1a8 /* AL Graphics Editor */ +#define KEY_PRESENTATION 0x1a9 /* AL Presentation App */ +#define KEY_DATABASE 0x1aa /* AL Database App */ +#define KEY_NEWS 0x1ab /* AL Newsreader */ +#define KEY_VOICEMAIL 0x1ac /* AL Voicemail */ +#define KEY_ADDRESSBOOK 0x1ad /* AL Contacts/Address Book */ +#define KEY_MESSENGER 0x1ae /* AL Instant Messaging */ +#define KEY_DISPLAYTOGGLE 0x1af /* Turn display (LCD) on and off */ +#define KEY_BRIGHTNESS_TOGGLE KEY_DISPLAYTOGGLE +#define KEY_SPELLCHECK 0x1b0 /* AL Spell Check */ +#define KEY_LOGOFF 0x1b1 /* AL Logoff */ + +#define KEY_DOLLAR 0x1b2 +#define KEY_EURO 0x1b3 + +#define KEY_FRAMEBACK 0x1b4 /* Consumer - transport controls */ +#define KEY_FRAMEFORWARD 0x1b5 +#define KEY_CONTEXT_MENU 0x1b6 /* GenDesc - system context menu */ +#define KEY_MEDIA_REPEAT 0x1b7 /* Consumer - transport control */ +#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ +#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ +#define KEY_IMAGES 0x1ba /* AL Image Browser */ +#define KEY_NOTIFICATION_CENTER 0x1bc /* Show/hide the notification center */ +#define KEY_PICKUP_PHONE 0x1bd /* Answer incoming call */ +#define KEY_HANGUP_PHONE 0x1be /* Decline incoming call */ + +#define KEY_DEL_EOL 0x1c0 +#define KEY_DEL_EOS 0x1c1 +#define KEY_INS_LINE 0x1c2 +#define KEY_DEL_LINE 0x1c3 + +#define KEY_FN 0x1d0 +#define KEY_FN_ESC 0x1d1 +#define KEY_FN_F1 0x1d2 +#define KEY_FN_F2 0x1d3 +#define KEY_FN_F3 0x1d4 +#define KEY_FN_F4 0x1d5 +#define KEY_FN_F5 0x1d6 +#define KEY_FN_F6 0x1d7 +#define KEY_FN_F7 0x1d8 +#define KEY_FN_F8 0x1d9 +#define KEY_FN_F9 0x1da +#define KEY_FN_F10 0x1db +#define KEY_FN_F11 0x1dc +#define KEY_FN_F12 0x1dd +#define KEY_FN_1 0x1de +#define KEY_FN_2 0x1df +#define KEY_FN_D 0x1e0 +#define KEY_FN_E 0x1e1 +#define KEY_FN_F 0x1e2 +#define KEY_FN_S 0x1e3 +#define KEY_FN_B 0x1e4 +#define KEY_FN_RIGHT_SHIFT 0x1e5 + +#define KEY_BRL_DOT1 0x1f1 +#define KEY_BRL_DOT2 0x1f2 +#define KEY_BRL_DOT3 0x1f3 +#define KEY_BRL_DOT4 0x1f4 +#define KEY_BRL_DOT5 0x1f5 +#define KEY_BRL_DOT6 0x1f6 +#define KEY_BRL_DOT7 0x1f7 +#define KEY_BRL_DOT8 0x1f8 +#define KEY_BRL_DOT9 0x1f9 +#define KEY_BRL_DOT10 0x1fa + +#define KEY_NUMERIC_0 0x200 /* used by phones, remote controls, */ +#define KEY_NUMERIC_1 0x201 /* and other keypads */ +#define KEY_NUMERIC_2 0x202 +#define KEY_NUMERIC_3 0x203 +#define KEY_NUMERIC_4 0x204 +#define KEY_NUMERIC_5 0x205 +#define KEY_NUMERIC_6 0x206 +#define KEY_NUMERIC_7 0x207 +#define KEY_NUMERIC_8 0x208 +#define KEY_NUMERIC_9 0x209 +#define KEY_NUMERIC_STAR 0x20a +#define KEY_NUMERIC_POUND 0x20b +#define KEY_NUMERIC_A 0x20c /* Phone key A - HUT Telephony 0xb9 */ +#define KEY_NUMERIC_B 0x20d +#define KEY_NUMERIC_C 0x20e +#define KEY_NUMERIC_D 0x20f + +#define KEY_CAMERA_FOCUS 0x210 +#define KEY_WPS_BUTTON 0x211 /* WiFi Protected Setup key */ + +#define KEY_TOUCHPAD_TOGGLE 0x212 /* Request switch touchpad on or off */ +#define KEY_TOUCHPAD_ON 0x213 +#define KEY_TOUCHPAD_OFF 0x214 + +#define KEY_CAMERA_ZOOMIN 0x215 +#define KEY_CAMERA_ZOOMOUT 0x216 +#define KEY_CAMERA_UP 0x217 +#define KEY_CAMERA_DOWN 0x218 +#define KEY_CAMERA_LEFT 0x219 +#define KEY_CAMERA_RIGHT 0x21a + +#define KEY_ATTENDANT_ON 0x21b +#define KEY_ATTENDANT_OFF 0x21c +#define KEY_ATTENDANT_TOGGLE 0x21d /* Attendant call on or off */ +#define KEY_LIGHTS_TOGGLE 0x21e /* Reading light on or off */ + +#define BTN_DPAD_UP 0x220 +#define BTN_DPAD_DOWN 0x221 +#define BTN_DPAD_LEFT 0x222 +#define BTN_DPAD_RIGHT 0x223 + +#define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ +#define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ + +#define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ +#define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ +#define KEY_JOURNAL 0x242 /* AL Log/Journal/Timecard */ +#define KEY_CONTROLPANEL 0x243 /* AL Control Panel */ +#define KEY_APPSELECT 0x244 /* AL Select Task/Application */ +#define KEY_SCREENSAVER 0x245 /* AL Screen Saver */ +#define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ +#define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ +#define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ + +#define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ +#define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ + +#define KEY_KBDINPUTASSIST_PREV 0x260 +#define KEY_KBDINPUTASSIST_NEXT 0x261 +#define KEY_KBDINPUTASSIST_PREVGROUP 0x262 +#define KEY_KBDINPUTASSIST_NEXTGROUP 0x263 +#define KEY_KBDINPUTASSIST_ACCEPT 0x264 +#define KEY_KBDINPUTASSIST_CANCEL 0x265 + +/* Diagonal movement keys */ +#define KEY_RIGHT_UP 0x266 +#define KEY_RIGHT_DOWN 0x267 +#define KEY_LEFT_UP 0x268 +#define KEY_LEFT_DOWN 0x269 + +#define KEY_ROOT_MENU 0x26a /* Show Device's Root Menu */ +/* Show Top Menu of the Media (e.g. DVD) */ +#define KEY_MEDIA_TOP_MENU 0x26b +#define KEY_NUMERIC_11 0x26c +#define KEY_NUMERIC_12 0x26d +/* + * Toggle Audio Description: refers to an audio service that helps blind and + * visually impaired consumers understand the action in a program. Note: in + * some countries this is referred to as "Video Description". + */ +#define KEY_AUDIO_DESC 0x26e +#define KEY_3D_MODE 0x26f +#define KEY_NEXT_FAVORITE 0x270 +#define KEY_STOP_RECORD 0x271 +#define KEY_PAUSE_RECORD 0x272 +#define KEY_VOD 0x273 /* Video on Demand */ +#define KEY_UNMUTE 0x274 +#define KEY_FASTREVERSE 0x275 +#define KEY_SLOWREVERSE 0x276 +/* + * Control a data application associated with the currently viewed channel, + * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) + */ +#define KEY_DATA 0x277 +#define KEY_ONSCREEN_KEYBOARD 0x278 +/* Electronic privacy screen control */ +#define KEY_PRIVACY_SCREEN_TOGGLE 0x279 + +/* Select an area of screen to be copied */ +#define KEY_SELECTIVE_SCREENSHOT 0x27a + +/* + * Some keyboards have keys which do not have a defined meaning, these keys + * are intended to be programmed / bound to macros by the user. For most + * keyboards with these macro-keys the key-sequence to inject, or action to + * take, is all handled by software on the host side. So from the kernel's + * point of view these are just normal keys. + * + * The KEY_MACRO# codes below are intended for such keys, which may be labeled + * e.g. G1-G18, or S1 - S30. The KEY_MACRO# codes MUST NOT be used for keys + * where the marking on the key does indicate a defined meaning / purpose. + * + * The KEY_MACRO# codes MUST also NOT be used as fallback for when no existing + * KEY_FOO define matches the marking / purpose. In this case a new KEY_FOO + * define MUST be added. + */ +#define KEY_MACRO1 0x290 +#define KEY_MACRO2 0x291 +#define KEY_MACRO3 0x292 +#define KEY_MACRO4 0x293 +#define KEY_MACRO5 0x294 +#define KEY_MACRO6 0x295 +#define KEY_MACRO7 0x296 +#define KEY_MACRO8 0x297 +#define KEY_MACRO9 0x298 +#define KEY_MACRO10 0x299 +#define KEY_MACRO11 0x29a +#define KEY_MACRO12 0x29b +#define KEY_MACRO13 0x29c +#define KEY_MACRO14 0x29d +#define KEY_MACRO15 0x29e +#define KEY_MACRO16 0x29f +#define KEY_MACRO17 0x2a0 +#define KEY_MACRO18 0x2a1 +#define KEY_MACRO19 0x2a2 +#define KEY_MACRO20 0x2a3 +#define KEY_MACRO21 0x2a4 +#define KEY_MACRO22 0x2a5 +#define KEY_MACRO23 0x2a6 +#define KEY_MACRO24 0x2a7 +#define KEY_MACRO25 0x2a8 +#define KEY_MACRO26 0x2a9 +#define KEY_MACRO27 0x2aa +#define KEY_MACRO28 0x2ab +#define KEY_MACRO29 0x2ac +#define KEY_MACRO30 0x2ad + +/* + * Some keyboards with the macro-keys described above have some extra keys + * for controlling the host-side software responsible for the macro handling: + * -A macro recording start/stop key. Note that not all keyboards which emit + * KEY_MACRO_RECORD_START will also emit KEY_MACRO_RECORD_STOP if + * KEY_MACRO_RECORD_STOP is not advertised, then KEY_MACRO_RECORD_START + * should be interpreted as a recording start/stop toggle; + * -Keys for switching between different macro (pre)sets, either a key for + * cycling through the configured presets or keys to directly select a preset. + */ +#define KEY_MACRO_RECORD_START 0x2b0 +#define KEY_MACRO_RECORD_STOP 0x2b1 +#define KEY_MACRO_PRESET_CYCLE 0x2b2 +#define KEY_MACRO_PRESET1 0x2b3 +#define KEY_MACRO_PRESET2 0x2b4 +#define KEY_MACRO_PRESET3 0x2b5 + +/* + * Some keyboards have a buildin LCD panel where the contents are controlled + * by the host. Often these have a number of keys directly below the LCD + * intended for controlling a menu shown on the LCD. These keys often don't + * have any labeling so we just name them KEY_KBD_LCD_MENU# + */ +#define KEY_KBD_LCD_MENU1 0x2b8 +#define KEY_KBD_LCD_MENU2 0x2b9 +#define KEY_KBD_LCD_MENU3 0x2ba +#define KEY_KBD_LCD_MENU4 0x2bb +#define KEY_KBD_LCD_MENU5 0x2bc + +#define BTN_TRIGGER_HAPPY 0x2c0 +#define BTN_TRIGGER_HAPPY1 0x2c0 +#define BTN_TRIGGER_HAPPY2 0x2c1 +#define BTN_TRIGGER_HAPPY3 0x2c2 +#define BTN_TRIGGER_HAPPY4 0x2c3 +#define BTN_TRIGGER_HAPPY5 0x2c4 +#define BTN_TRIGGER_HAPPY6 0x2c5 +#define BTN_TRIGGER_HAPPY7 0x2c6 +#define BTN_TRIGGER_HAPPY8 0x2c7 +#define BTN_TRIGGER_HAPPY9 0x2c8 +#define BTN_TRIGGER_HAPPY10 0x2c9 +#define BTN_TRIGGER_HAPPY11 0x2ca +#define BTN_TRIGGER_HAPPY12 0x2cb +#define BTN_TRIGGER_HAPPY13 0x2cc +#define BTN_TRIGGER_HAPPY14 0x2cd +#define BTN_TRIGGER_HAPPY15 0x2ce +#define BTN_TRIGGER_HAPPY16 0x2cf +#define BTN_TRIGGER_HAPPY17 0x2d0 +#define BTN_TRIGGER_HAPPY18 0x2d1 +#define BTN_TRIGGER_HAPPY19 0x2d2 +#define BTN_TRIGGER_HAPPY20 0x2d3 +#define BTN_TRIGGER_HAPPY21 0x2d4 +#define BTN_TRIGGER_HAPPY22 0x2d5 +#define BTN_TRIGGER_HAPPY23 0x2d6 +#define BTN_TRIGGER_HAPPY24 0x2d7 +#define BTN_TRIGGER_HAPPY25 0x2d8 +#define BTN_TRIGGER_HAPPY26 0x2d9 +#define BTN_TRIGGER_HAPPY27 0x2da +#define BTN_TRIGGER_HAPPY28 0x2db +#define BTN_TRIGGER_HAPPY29 0x2dc +#define BTN_TRIGGER_HAPPY30 0x2dd +#define BTN_TRIGGER_HAPPY31 0x2de +#define BTN_TRIGGER_HAPPY32 0x2df +#define BTN_TRIGGER_HAPPY33 0x2e0 +#define BTN_TRIGGER_HAPPY34 0x2e1 +#define BTN_TRIGGER_HAPPY35 0x2e2 +#define BTN_TRIGGER_HAPPY36 0x2e3 +#define BTN_TRIGGER_HAPPY37 0x2e4 +#define BTN_TRIGGER_HAPPY38 0x2e5 +#define BTN_TRIGGER_HAPPY39 0x2e6 +#define BTN_TRIGGER_HAPPY40 0x2e7 + +/* We avoid low common keys in module aliases so they don't get huge. */ +#define KEY_MIN_INTERESTING KEY_MUTE +#define KEY_MAX 0x2ff +#define KEY_CNT (KEY_MAX+1) + +/* + * Relative axes + */ + +#define REL_X 0x00 +#define REL_Y 0x01 +#define REL_Z 0x02 +#define REL_RX 0x03 +#define REL_RY 0x04 +#define REL_RZ 0x05 +#define REL_HWHEEL 0x06 +#define REL_DIAL 0x07 +#define REL_WHEEL 0x08 +#define REL_MISC 0x09 +/* + * 0x0a is reserved and should not be used in input drivers. + * It was used by HID as REL_MISC+1 and userspace needs to detect if + * the next REL_* event is correct or is just REL_MISC + n. + * We define here REL_RESERVED so userspace can rely on it and detect + * the situation described above. + */ +#define REL_RESERVED 0x0a +#define REL_WHEEL_HI_RES 0x0b +#define REL_HWHEEL_HI_RES 0x0c +#define REL_MAX 0x0f +#define REL_CNT (REL_MAX+1) + +/* + * Absolute axes + */ + +#define ABS_X 0x00 +#define ABS_Y 0x01 +#define ABS_Z 0x02 +#define ABS_RX 0x03 +#define ABS_RY 0x04 +#define ABS_RZ 0x05 +#define ABS_THROTTLE 0x06 +#define ABS_RUDDER 0x07 +#define ABS_WHEEL 0x08 +#define ABS_GAS 0x09 +#define ABS_BRAKE 0x0a +#define ABS_HAT0X 0x10 +#define ABS_HAT0Y 0x11 +#define ABS_HAT1X 0x12 +#define ABS_HAT1Y 0x13 +#define ABS_HAT2X 0x14 +#define ABS_HAT2Y 0x15 +#define ABS_HAT3X 0x16 +#define ABS_HAT3Y 0x17 +#define ABS_PRESSURE 0x18 +#define ABS_DISTANCE 0x19 +#define ABS_TILT_X 0x1a +#define ABS_TILT_Y 0x1b +#define ABS_TOOL_WIDTH 0x1c + +#define ABS_VOLUME 0x20 + +#define ABS_MISC 0x28 + +/* + * 0x2e is reserved and should not be used in input drivers. + * It was used by HID as ABS_MISC+6 and userspace needs to detect if + * the next ABS_* event is correct or is just ABS_MISC + n. + * We define here ABS_RESERVED so userspace can rely on it and detect + * the situation described above. + */ +#define ABS_RESERVED 0x2e + +#define ABS_MT_SLOT 0x2f /* MT slot being modified */ +#define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ +#define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ +#define ABS_MT_WIDTH_MAJOR 0x32 /* Major axis of approaching ellipse */ +#define ABS_MT_WIDTH_MINOR 0x33 /* Minor axis (omit if circular) */ +#define ABS_MT_ORIENTATION 0x34 /* Ellipse orientation */ +#define ABS_MT_POSITION_X 0x35 /* Center X touch position */ +#define ABS_MT_POSITION_Y 0x36 /* Center Y touch position */ +#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ +#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ +#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ +#define ABS_MT_PRESSURE 0x3a /* Pressure on contact area */ +#define ABS_MT_DISTANCE 0x3b /* Contact hover distance */ +#define ABS_MT_TOOL_X 0x3c /* Center X tool position */ +#define ABS_MT_TOOL_Y 0x3d /* Center Y tool position */ + + +#define ABS_MAX 0x3f +#define ABS_CNT (ABS_MAX+1) + +/* + * Switch events + */ + +#define SW_LID 0x00 /* set = lid shut */ +#define SW_TABLET_MODE 0x01 /* set = tablet mode */ +#define SW_HEADPHONE_INSERT 0x02 /* set = inserted */ +#define SW_RFKILL_ALL 0x03 /* rfkill master switch, type "any" + set = radio enabled */ +#define SW_RADIO SW_RFKILL_ALL /* deprecated */ +#define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ +#define SW_DOCK 0x05 /* set = plugged into dock */ +#define SW_LINEOUT_INSERT 0x06 /* set = inserted */ +#define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ +#define SW_VIDEOOUT_INSERT 0x08 /* set = inserted */ +#define SW_CAMERA_LENS_COVER 0x09 /* set = lens covered */ +#define SW_KEYPAD_SLIDE 0x0a /* set = keypad slide out */ +#define SW_FRONT_PROXIMITY 0x0b /* set = front proximity sensor active */ +#define SW_ROTATE_LOCK 0x0c /* set = rotate locked/disabled */ +#define SW_LINEIN_INSERT 0x0d /* set = inserted */ +#define SW_MUTE_DEVICE 0x0e /* set = device disabled */ +#define SW_PEN_INSERTED 0x0f /* set = pen inserted */ +#define SW_MACHINE_COVER 0x10 /* set = cover closed */ +#define SW_MAX_ 0x10 +#define SW_CNT (SW_MAX_+1) + +/* + * Misc events + */ + +#define MSC_SERIAL 0x00 +#define MSC_PULSELED 0x01 +#define MSC_GESTURE 0x02 +#define MSC_RAW 0x03 +#define MSC_SCAN 0x04 +#define MSC_TIMESTAMP 0x05 +#define MSC_MAX 0x07 +#define MSC_CNT (MSC_MAX+1) + +/* + * LEDs + */ + +#define LED_NUML 0x00 +#define LED_CAPSL 0x01 +#define LED_SCROLLL 0x02 +#define LED_COMPOSE 0x03 +#define LED_KANA 0x04 +#define LED_SLEEP 0x05 +#define LED_SUSPEND 0x06 +#define LED_MUTE 0x07 +#define LED_MISC 0x08 +#define LED_MAIL 0x09 +#define LED_CHARGING 0x0a +#define LED_MAX 0x0f +#define LED_CNT (LED_MAX+1) + +/* + * Autorepeat values + */ + +#define REP_DELAY 0x00 +#define REP_PERIOD 0x01 +#define REP_MAX 0x01 +#define REP_CNT (REP_MAX+1) + +/* + * Sounds + */ + +#define SND_CLICK 0x00 +#define SND_BELL 0x01 +#define SND_TONE 0x02 +#define SND_MAX 0x07 +#define SND_CNT (SND_MAX+1) + +#endif diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h new file mode 100644 index 000000000..f89c98619 --- /dev/null +++ b/include/standard-headers/linux/input.h @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 1999-2002 Vojtech Pavlik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#ifndef _INPUT_H +#define _INPUT_H + + +#include +#include +#include "standard-headers/linux/types.h" + +#include "standard-headers/linux/input-event-codes.h" + +/* + * The event structure itself + * Note that __USE_TIME_BITS64 is defined by libc based on + * application's request to use 64 bit time_t. + */ + +struct input_event { +#if (HOST_LONG_BITS != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__) + struct timeval time; +#define input_event_sec time.tv_sec +#define input_event_usec time.tv_usec +#else + unsigned long __sec; +#if defined(__sparc__) && defined(__arch64__) + unsigned int __usec; + unsigned int __pad; +#else + unsigned long __usec; +#endif +#define input_event_sec __sec +#define input_event_usec __usec +#endif + uint16_t type; + uint16_t code; + int32_t value; +}; + +/* + * Protocol version. + */ + +#define EV_VERSION 0x010001 + +/* + * IOCTLs (0x00 - 0x7f) + */ + +struct input_id { + uint16_t bustype; + uint16_t vendor; + uint16_t product; + uint16_t version; +}; + +/** + * struct input_absinfo - used by EVIOCGABS/EVIOCSABS ioctls + * @value: latest reported value for the axis. + * @minimum: specifies minimum value for the axis. + * @maximum: specifies maximum value for the axis. + * @fuzz: specifies fuzz value that is used to filter noise from + * the event stream. + * @flat: values that are within this value will be discarded by + * joydev interface and reported as 0 instead. + * @resolution: specifies resolution for the values reported for + * the axis. + * + * Note that input core does not clamp reported values to the + * [minimum, maximum] limits, such task is left to userspace. + * + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) + * is reported in units per millimeter (units/mm), resolution + * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in + * in units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). + */ +struct input_absinfo { + int32_t value; + int32_t minimum; + int32_t maximum; + int32_t fuzz; + int32_t flat; + int32_t resolution; +}; + +/** + * struct input_keymap_entry - used by EVIOCGKEYCODE/EVIOCSKEYCODE ioctls + * @scancode: scancode represented in machine-endian form. + * @len: length of the scancode that resides in @scancode buffer. + * @index: index in the keymap, may be used instead of scancode + * @flags: allows to specify how kernel should handle the request. For + * example, setting INPUT_KEYMAP_BY_INDEX flag indicates that kernel + * should perform lookup in keymap by @index instead of @scancode + * @keycode: key code assigned to this scancode + * + * The structure is used to retrieve and modify keymap data. Users have + * option of performing lookup either by @scancode itself or by @index + * in keymap entry. EVIOCGKEYCODE will also return scancode or index + * (depending on which element was used to perform lookup). + */ +struct input_keymap_entry { +#define INPUT_KEYMAP_BY_INDEX (1 << 0) + uint8_t flags; + uint8_t len; + uint16_t index; + uint32_t keycode; + uint8_t scancode[32]; +}; + +struct input_mask { + uint32_t type; + uint32_t codes_size; + uint64_t codes_ptr; +}; + +#define EVIOCGVERSION _IOR('E', 0x01, int) /* get driver version */ +#define EVIOCGID _IOR('E', 0x02, struct input_id) /* get device ID */ +#define EVIOCGREP _IOR('E', 0x03, unsigned int[2]) /* get repeat settings */ +#define EVIOCSREP _IOW('E', 0x03, unsigned int[2]) /* set repeat settings */ + +#define EVIOCGKEYCODE _IOR('E', 0x04, unsigned int[2]) /* get keycode */ +#define EVIOCGKEYCODE_V2 _IOR('E', 0x04, struct input_keymap_entry) +#define EVIOCSKEYCODE _IOW('E', 0x04, unsigned int[2]) /* set keycode */ +#define EVIOCSKEYCODE_V2 _IOW('E', 0x04, struct input_keymap_entry) + +#define EVIOCGNAME(len) _IOC(_IOC_READ, 'E', 0x06, len) /* get device name */ +#define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ +#define EVIOCGUNIQ(len) _IOC(_IOC_READ, 'E', 0x08, len) /* get unique identifier */ +#define EVIOCGPROP(len) _IOC(_IOC_READ, 'E', 0x09, len) /* get device properties */ + +/** + * EVIOCGMTSLOTS(len) - get MT slot values + * @len: size of the data buffer in bytes + * + * The ioctl buffer argument should be binary equivalent to + * + * struct input_mt_request_layout { + * uint32_t code; + * int32_t values[num_slots]; + * }; + * + * where num_slots is the (arbitrary) number of MT slots to extract. + * + * The ioctl size argument (len) is the size of the buffer, which + * should satisfy len = (num_slots + 1) * sizeof(int32_t). If len is + * too small to fit all available slots, the first num_slots are + * returned. + * + * Before the call, code is set to the wanted ABS_MT event type. On + * return, values[] is filled with the slot values for the specified + * ABS_MT code. + * + * If the request code is not an ABS_MT value, -EINVAL is returned. + */ +#define EVIOCGMTSLOTS(len) _IOC(_IOC_READ, 'E', 0x0a, len) + +#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global key state */ +#define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ +#define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ +#define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ + +#define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + (ev), len) /* get event bits */ +#define EVIOCGABS(abs) _IOR('E', 0x40 + (abs), struct input_absinfo) /* get abs value/limits */ +#define EVIOCSABS(abs) _IOW('E', 0xc0 + (abs), struct input_absinfo) /* set abs value/limits */ + +#define EVIOCSFF _IOW('E', 0x80, struct ff_effect) /* send a force effect to a force feedback device */ +#define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */ +#define EVIOCGEFFECTS _IOR('E', 0x84, int) /* Report number of effects playable at the same time */ + +#define EVIOCGRAB _IOW('E', 0x90, int) /* Grab/Release device */ +#define EVIOCREVOKE _IOW('E', 0x91, int) /* Revoke device access */ + +/** + * EVIOCGMASK - Retrieve current event mask + * + * This ioctl allows user to retrieve the current event mask for specific + * event type. The argument must be of type "struct input_mask" and + * specifies the event type to query, the address of the receive buffer and + * the size of the receive buffer. + * + * The event mask is a per-client mask that specifies which events are + * forwarded to the client. Each event code is represented by a single bit + * in the event mask. If the bit is set, the event is passed to the client + * normally. Otherwise, the event is filtered and will never be queued on + * the client's receive buffer. + * + * Event masks do not affect global state of the input device. They only + * affect the file descriptor they are applied to. + * + * The default event mask for a client has all bits set, i.e. all events + * are forwarded to the client. If the kernel is queried for an unknown + * event type or if the receive buffer is larger than the number of + * event codes known to the kernel, the kernel returns all zeroes for those + * codes. + * + * At maximum, codes_size bytes are copied. + * + * This ioctl may fail with ENODEV in case the file is revoked, EFAULT + * if the receive-buffer points to invalid memory, or EINVAL if the kernel + * does not implement the ioctl. + */ +#define EVIOCGMASK _IOR('E', 0x92, struct input_mask) /* Get event-masks */ + +/** + * EVIOCSMASK - Set event mask + * + * This ioctl is the counterpart to EVIOCGMASK. Instead of receiving the + * current event mask, this changes the client's event mask for a specific + * type. See EVIOCGMASK for a description of event-masks and the + * argument-type. + * + * This ioctl provides full forward compatibility. If the passed event type + * is unknown to the kernel, or if the number of event codes specified in + * the mask is bigger than what is known to the kernel, the ioctl is still + * accepted and applied. However, any unknown codes are left untouched and + * stay cleared. That means, the kernel always filters unknown codes + * regardless of what the client requests. If the new mask doesn't cover + * all known event-codes, all remaining codes are automatically cleared and + * thus filtered. + * + * This ioctl may fail with ENODEV in case the file is revoked. EFAULT is + * returned if the receive-buffer points to invalid memory. EINVAL is returned + * if the kernel does not implement the ioctl. + */ +#define EVIOCSMASK _IOW('E', 0x93, struct input_mask) /* Set event-masks */ + +#define EVIOCSCLOCKID _IOW('E', 0xa0, int) /* Set clockid to be used for timestamps */ + +/* + * IDs. + */ + +#define ID_BUS 0 +#define ID_VENDOR 1 +#define ID_PRODUCT 2 +#define ID_VERSION 3 + +#define BUS_PCI 0x01 +#define BUS_ISAPNP 0x02 +#define BUS_USB 0x03 +#define BUS_HIL 0x04 +#define BUS_BLUETOOTH 0x05 +#define BUS_VIRTUAL 0x06 + +#define BUS_ISA 0x10 +#define BUS_I8042 0x11 +#define BUS_XTKBD 0x12 +#define BUS_RS232 0x13 +#define BUS_GAMEPORT 0x14 +#define BUS_PARPORT 0x15 +#define BUS_AMIGA 0x16 +#define BUS_ADB 0x17 +#define BUS_I2C 0x18 +#define BUS_HOST 0x19 +#define BUS_GSC 0x1A +#define BUS_ATARI 0x1B +#define BUS_SPI 0x1C +#define BUS_RMI 0x1D +#define BUS_CEC 0x1E +#define BUS_INTEL_ISHTP 0x1F + +/* + * MT_TOOL types + */ +#define MT_TOOL_FINGER 0x00 +#define MT_TOOL_PEN 0x01 +#define MT_TOOL_PALM 0x02 +#define MT_TOOL_DIAL 0x0a +#define MT_TOOL_MAX 0x0f + +/* + * Values describing the status of a force-feedback effect + */ +#define FF_STATUS_STOPPED 0x00 +#define FF_STATUS_PLAYING 0x01 +#define FF_STATUS_MAX 0x01 + +/* + * Structures used in ioctls to upload effects to a device + * They are pieces of a bigger structure (called ff_effect) + */ + +/* + * All duration values are expressed in ms. Values above 32767 ms (0x7fff) + * should not be used and have unspecified results. + */ + +/** + * struct ff_replay - defines scheduling of the force-feedback effect + * @length: duration of the effect + * @delay: delay before effect should start playing + */ +struct ff_replay { + uint16_t length; + uint16_t delay; +}; + +/** + * struct ff_trigger - defines what triggers the force-feedback effect + * @button: number of the button triggering the effect + * @interval: controls how soon the effect can be re-triggered + */ +struct ff_trigger { + uint16_t button; + uint16_t interval; +}; + +/** + * struct ff_envelope - generic force-feedback effect envelope + * @attack_length: duration of the attack (ms) + * @attack_level: level at the beginning of the attack + * @fade_length: duration of fade (ms) + * @fade_level: level at the end of fade + * + * The @attack_level and @fade_level are absolute values; when applying + * envelope force-feedback core will convert to positive/negative + * value based on polarity of the default level of the effect. + * Valid range for the attack and fade levels is 0x0000 - 0x7fff + */ +struct ff_envelope { + uint16_t attack_length; + uint16_t attack_level; + uint16_t fade_length; + uint16_t fade_level; +}; + +/** + * struct ff_constant_effect - defines parameters of a constant force-feedback effect + * @level: strength of the effect; may be negative + * @envelope: envelope data + */ +struct ff_constant_effect { + int16_t level; + struct ff_envelope envelope; +}; + +/** + * struct ff_ramp_effect - defines parameters of a ramp force-feedback effect + * @start_level: beginning strength of the effect; may be negative + * @end_level: final strength of the effect; may be negative + * @envelope: envelope data + */ +struct ff_ramp_effect { + int16_t start_level; + int16_t end_level; + struct ff_envelope envelope; +}; + +/** + * struct ff_condition_effect - defines a spring or friction force-feedback effect + * @right_saturation: maximum level when joystick moved all way to the right + * @left_saturation: same for the left side + * @right_coeff: controls how fast the force grows when the joystick moves + * to the right + * @left_coeff: same for the left side + * @deadband: size of the dead zone, where no force is produced + * @center: position of the dead zone + */ +struct ff_condition_effect { + uint16_t right_saturation; + uint16_t left_saturation; + + int16_t right_coeff; + int16_t left_coeff; + + uint16_t deadband; + int16_t center; +}; + +/** + * struct ff_periodic_effect - defines parameters of a periodic force-feedback effect + * @waveform: kind of the effect (wave) + * @period: period of the wave (ms) + * @magnitude: peak value + * @offset: mean value of the wave (roughly) + * @phase: 'horizontal' shift + * @envelope: envelope data + * @custom_len: number of samples (FF_CUSTOM only) + * @custom_data: buffer of samples (FF_CUSTOM only) + * + * Known waveforms - FF_SQUARE, FF_TRIANGLE, FF_SINE, FF_SAW_UP, + * FF_SAW_DOWN, FF_CUSTOM. The exact syntax FF_CUSTOM is undefined + * for the time being as no driver supports it yet. + * + * Note: the data pointed by custom_data is copied by the driver. + * You can therefore dispose of the memory after the upload/update. + */ +struct ff_periodic_effect { + uint16_t waveform; + uint16_t period; + int16_t magnitude; + int16_t offset; + uint16_t phase; + + struct ff_envelope envelope; + + uint32_t custom_len; + int16_t *custom_data; +}; + +/** + * struct ff_rumble_effect - defines parameters of a periodic force-feedback effect + * @strong_magnitude: magnitude of the heavy motor + * @weak_magnitude: magnitude of the light one + * + * Some rumble pads have two motors of different weight. Strong_magnitude + * represents the magnitude of the vibration generated by the heavy one. + */ +struct ff_rumble_effect { + uint16_t strong_magnitude; + uint16_t weak_magnitude; +}; + +/** + * struct ff_effect - defines force feedback effect + * @type: type of the effect (FF_CONSTANT, FF_PERIODIC, FF_RAMP, FF_SPRING, + * FF_FRICTION, FF_DAMPER, FF_RUMBLE, FF_INERTIA, or FF_CUSTOM) + * @id: an unique id assigned to an effect + * @direction: direction of the effect + * @trigger: trigger conditions (struct ff_trigger) + * @replay: scheduling of the effect (struct ff_replay) + * @u: effect-specific structure (one of ff_constant_effect, ff_ramp_effect, + * ff_periodic_effect, ff_condition_effect, ff_rumble_effect) further + * defining effect parameters + * + * This structure is sent through ioctl from the application to the driver. + * To create a new effect application should set its @id to -1; the kernel + * will return assigned @id which can later be used to update or delete + * this effect. + * + * Direction of the effect is encoded as follows: + * 0 deg -> 0x0000 (down) + * 90 deg -> 0x4000 (left) + * 180 deg -> 0x8000 (up) + * 270 deg -> 0xC000 (right) + */ +struct ff_effect { + uint16_t type; + int16_t id; + uint16_t direction; + struct ff_trigger trigger; + struct ff_replay replay; + + union { + struct ff_constant_effect constant; + struct ff_ramp_effect ramp; + struct ff_periodic_effect periodic; + struct ff_condition_effect condition[2]; /* One for each axis */ + struct ff_rumble_effect rumble; + } u; +}; + +/* + * Force feedback effect types + */ + +#define FF_RUMBLE 0x50 +#define FF_PERIODIC 0x51 +#define FF_CONSTANT 0x52 +#define FF_SPRING 0x53 +#define FF_FRICTION 0x54 +#define FF_DAMPER 0x55 +#define FF_INERTIA 0x56 +#define FF_RAMP 0x57 + +#define FF_EFFECT_MIN FF_RUMBLE +#define FF_EFFECT_MAX FF_RAMP + +/* + * Force feedback periodic effect types + */ + +#define FF_SQUARE 0x58 +#define FF_TRIANGLE 0x59 +#define FF_SINE 0x5a +#define FF_SAW_UP 0x5b +#define FF_SAW_DOWN 0x5c +#define FF_CUSTOM 0x5d + +#define FF_WAVEFORM_MIN FF_SQUARE +#define FF_WAVEFORM_MAX FF_CUSTOM + +/* + * Set ff device properties + */ + +#define FF_GAIN 0x60 +#define FF_AUTOCENTER 0x61 + +/* + * ff->playback(effect_id = FF_GAIN) is the first effect_id to + * cause a collision with another ff method, in this case ff->set_gain(). + * Therefore the greatest safe value for effect_id is FF_GAIN - 1, + * and thus the total number of effects should never exceed FF_GAIN. + */ +#define FF_MAX_EFFECTS FF_GAIN + +#define FF_MAX 0x7f +#define FF_CNT (FF_MAX+1) + +#endif /* _INPUT_H */ diff --git a/include/standard-headers/linux/kernel.h b/include/standard-headers/linux/kernel.h new file mode 100644 index 000000000..7848c5ae2 --- /dev/null +++ b/include/standard-headers/linux/kernel.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_KERNEL_H +#define _LINUX_KERNEL_H + +#include "standard-headers/linux/sysinfo.h" +#include "standard-headers/linux/const.h" + +#endif /* _LINUX_KERNEL_H */ diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h new file mode 100644 index 000000000..e709ae823 --- /dev/null +++ b/include/standard-headers/linux/pci_regs.h @@ -0,0 +1,1095 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PCI standard defines + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * For HyperTransport information, please consult the following manuals + * from http://www.hypertransport.org : + * + * The HyperTransport I/O Link Specification + */ + +#ifndef LINUX_PCI_REGS_H +#define LINUX_PCI_REGS_H + +/* + * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of + * configuration space. PCI-X Mode 2 and PCIe devices have 4096 bytes of + * configuration space. + */ +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_STD_HEADER_SIZEOF 64 +#define PCI_STD_NUM_BARS 6 /* Number of standard BARs */ +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_IMM_READY 0x01 /* Immediate Readiness */ +#define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 MHz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_MASK 0x7f +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) /* Standard 4K I/O windows */ +#define PCI_IO_1K_RANGE_MASK (~0x03UL) /* Intel 1K I/O windows */ +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_ISA 0x04 /* Enable ISA mode */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_HT 0x08 /* HyperTransport */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */ +#define PCI_CAP_ID_DBG 0x0A /* Debug port */ +#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ +#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ +#define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ +#define PCI_CAP_ID_EA 0x14 /* PCI Enhanced Allocation */ +#define PCI_CAP_ID_MAX PCI_CAP_ID_EA +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxiliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3hot 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Vital Product Data */ + +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ +#define PCI_CAP_VPD_SIZEOF 8 + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupt registers */ + +#define PCI_MSI_FLAGS 2 /* Message Control */ +#define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ +#define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ +#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ +#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ + +/* MSI-X registers (in MSI-X capability) */ +#define PCI_MSIX_FLAGS 2 /* Message Control */ +#define PCI_MSIX_FLAGS_QSIZE 0x07FF /* Table size */ +#define PCI_MSIX_FLAGS_MASKALL 0x4000 /* Mask all vectors for this function */ +#define PCI_MSIX_FLAGS_ENABLE 0x8000 /* MSI-X enable */ +#define PCI_MSIX_TABLE 4 /* Table offset */ +#define PCI_MSIX_TABLE_BIR 0x00000007 /* BAR index */ +#define PCI_MSIX_TABLE_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ +#define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ +#define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ +#define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ + +/* MSI-X Table entry format (in memory mapped by a BAR) */ +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ +#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 + +/* CompactPCI Hotswap Register */ + +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ + +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 +#define PCI_CAP_AF_SIZEOF 6 /* size of AF registers */ + +/* PCI Enhanced Allocation registers */ + +#define PCI_EA_NUM_ENT 2 /* Number of Capability Entries */ +#define PCI_EA_NUM_ENT_MASK 0x3f /* Num Entries Mask */ +#define PCI_EA_FIRST_ENT 4 /* First EA Entry in List */ +#define PCI_EA_FIRST_ENT_BRIDGE 8 /* First EA Entry for Bridges */ +#define PCI_EA_ES 0x00000007 /* Entry Size */ +#define PCI_EA_BEI 0x000000f0 /* BAR Equivalent Indicator */ + +/* EA fixed Secondary and Subordinate bus numbers for Bridge */ +#define PCI_EA_SEC_BUS_MASK 0xff +#define PCI_EA_SUB_BUS_MASK 0xff00 +#define PCI_EA_SUB_BUS_SHIFT 8 + +/* 0-5 map to BARs 0-5 respectively */ +#define PCI_EA_BEI_BAR0 0 +#define PCI_EA_BEI_BAR5 5 +#define PCI_EA_BEI_BRIDGE 6 /* Resource behind bridge */ +#define PCI_EA_BEI_ENI 7 /* Equivalent Not Indicated */ +#define PCI_EA_BEI_ROM 8 /* Expansion ROM */ +/* 9-14 map to VF BARs 0-5 respectively */ +#define PCI_EA_BEI_VF_BAR0 9 +#define PCI_EA_BEI_VF_BAR5 14 +#define PCI_EA_BEI_RESERVED 15 /* Reserved - Treat like ENI */ +#define PCI_EA_PP 0x0000ff00 /* Primary Properties */ +#define PCI_EA_SP 0x00ff0000 /* Secondary Properties */ +#define PCI_EA_P_MEM 0x00 /* Non-Prefetch Memory */ +#define PCI_EA_P_MEM_PREFETCH 0x01 /* Prefetchable Memory */ +#define PCI_EA_P_IO 0x02 /* I/O Space */ +#define PCI_EA_P_VF_MEM_PREFETCH 0x03 /* VF Prefetchable Memory */ +#define PCI_EA_P_VF_MEM 0x04 /* VF Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM 0x05 /* Bridge Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM_PREFETCH 0x06 /* Bridge Prefetchable Memory */ +#define PCI_EA_P_BRIDGE_IO 0x07 /* Bridge I/O Space */ +/* 0x08-0xfc reserved */ +#define PCI_EA_P_MEM_RESERVED 0xfd /* Reserved Memory */ +#define PCI_EA_P_IO_RESERVED 0xfe /* Reserved I/O Space */ +#define PCI_EA_P_UNAVAILABLE 0xff /* Entry Unavailable */ +#define PCI_EA_WRITABLE 0x40000000 /* Writable: 1 = RW, 0 = HwInit */ +#define PCI_EA_ENABLE 0x80000000 /* Enable for this entry */ +#define PCI_EA_BASE 4 /* Base Address Offset */ +#define PCI_EA_MAX_OFFSET 8 /* MaxOffset (resource length) */ +/* bit 0 is reserved */ +#define PCI_EA_IS_64 0x00000002 /* 64-bit field flag */ +#define PCI_EA_FIELD_MASK 0xfffffffc /* For Base & Max Offset */ + +/* PCI-X registers (Type 0 (non-bridge) devices) */ + +#define PCI_X_CMD 2 /* Modes & Features */ +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ +#define PCI_X_CMD_READ_512 0x0000 /* 512 byte maximum read byte count */ +#define PCI_X_CMD_READ_1K 0x0004 /* 1Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_2K 0x0008 /* 2Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_4K 0x000c /* 4Kbyte maximum read byte count */ +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ + /* Max # of outstanding split transactions */ +#define PCI_X_CMD_SPLIT_1 0x0000 /* Max 1 */ +#define PCI_X_CMD_SPLIT_2 0x0010 /* Max 2 */ +#define PCI_X_CMD_SPLIT_3 0x0020 /* Max 3 */ +#define PCI_X_CMD_SPLIT_4 0x0030 /* Max 4 */ +#define PCI_X_CMD_SPLIT_8 0x0040 /* Max 8 */ +#define PCI_X_CMD_SPLIT_12 0x0050 /* Max 12 */ +#define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ +#define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ +#define PCI_X_ECC_CSR 8 /* ECC control and status */ +#define PCI_CAP_PCIX_SIZEOF_V0 8 /* size of registers for Version 0 */ +#define PCI_CAP_PCIX_SIZEOF_V1 24 /* size for Version 1 */ +#define PCI_CAP_PCIX_SIZEOF_V2 PCI_CAP_PCIX_SIZEOF_V1 /* Same for v2 */ + +/* PCI-X registers (Type 1 (bridge) devices) */ + +#define PCI_X_BRIDGE_SSTATUS 2 /* Secondary Status */ +#define PCI_X_SSTATUS_64BIT 0x0001 /* Secondary AD interface is 64 bits */ +#define PCI_X_SSTATUS_133MHZ 0x0002 /* 133 MHz capable */ +#define PCI_X_SSTATUS_FREQ 0x03c0 /* Secondary Bus Mode and Frequency */ +#define PCI_X_SSTATUS_VERS 0x3000 /* PCI-X Capability Version */ +#define PCI_X_SSTATUS_V1 0x1000 /* Mode 2, not Mode 1 */ +#define PCI_X_SSTATUS_V2 0x2000 /* Mode 1 or Modes 1 and 2 */ +#define PCI_X_SSTATUS_266MHZ 0x4000 /* 266 MHz capable */ +#define PCI_X_SSTATUS_533MHZ 0x8000 /* 533 MHz capable */ +#define PCI_X_BRIDGE_STATUS 4 /* Bridge Status */ + +/* PCI Bridge Subsystem ID registers */ + +#define PCI_SSVID_VENDOR_ID 4 /* PCI Bridge subsystem vendor ID */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI Bridge subsystem device ID */ + +/* PCI Express capability registers */ + +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCIe to PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIe Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ +#define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ +#define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ +#define PCI_EXP_DEVCAP_L0S 0x000001c0 /* L0s Acceptable Latency */ +#define PCI_EXP_DEVCAP_L1 0x00000e00 /* L1 Acceptable Latency */ +#define PCI_EXP_DEVCAP_ATN_BUT 0x00001000 /* Attention Button Present */ +#define PCI_EXP_DEVCAP_ATN_IND 0x00002000 /* Attention Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_IND 0x00004000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_RBER 0x00008000 /* Role-Based Error Reporting */ +#define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ +#define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_READRQ_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ +#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ +#define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ +#define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ +#define PCI_EXP_DEVSTA_URD 0x0008 /* Unsupported Request Detected */ +#define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ +#define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ +#define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ +#define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */ +#define PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */ +#define PCI_EXP_LNKCAP_SLS_64_0GB 0x00000006 /* LNKCAP2 SLS Vector bit 5 */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_ASPM_L0S 0x00000400 /* ASPM L0s Support */ +#define PCI_EXP_LNKCAP_ASPM_L1 0x00000800 /* ASPM L1 Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Surprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ +#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ +#define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ +#define PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ +#define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_32_0GB 0x0005 /* Current Link Speed 32.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_64_0GB 0x0006 /* Current Link Speed 64.0GT/s */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ +#define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ +#define PCI_EXP_LNKSTA_NLW_X4 0x0040 /* Current Link Width x4 */ +#define PCI_EXP_LNKSTA_NLW_X8 0x0080 /* Current Link Width x8 */ +#define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ +#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_ATTN_IND_SHIFT 6 /* Attention Indicator shift */ +#define PCI_EXP_SLTCTL_ATTN_IND_ON 0x0040 /* Attention Indicator on */ +#define PCI_EXP_SLTCTL_ATTN_IND_BLINK 0x0080 /* Attention Indicator blinking */ +#define PCI_EXP_SLTCTL_ATTN_IND_OFF 0x00c0 /* Attention Indicator off */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PWR_IND_ON 0x0100 /* Power Indicator on */ +#define PCI_EXP_SLTCTL_PWR_IND_BLINK 0x0200 /* Power Indicator blinking */ +#define PCI_EXP_SLTCTL_PWR_IND_OFF 0x0300 /* Power Indicator off */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_PWR_ON 0x0000 /* Power On */ +#define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ +#define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ +#define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ +#define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ +#define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ +#define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ +#define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ +/* + * The Device Capabilities 2, Device Status 2, Device Control 2, + * Link Capabilities 2, Link Status 2, Link Control 2, + * Slot Capabilities 2, Slot Status 2, and Slot Control 2 registers + * are only present on devices with PCIe Capability version 2. + * Use pcie_capability_read_word() and similar interfaces to use them + * safely. + */ +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ +#define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP32 0x00000080 /* 32b AtomicOp completion */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */ +#define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ +#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ +#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ +#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ +#define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ +#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ +#define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ +#define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ +#define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ +#define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ +#define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ +#define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ +#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ +#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ +#define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ +#define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ +#define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */ +#define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_LNKCTL2_TLS 0x000f +#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ +#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ +#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ +#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ +#define PCI_EXP_LNKCTL2_TLS_32_0GT 0x0005 /* Supported Speed 32GT/s */ +#define PCI_EXP_LNKCTL2_TLS_64_0GT 0x0006 /* Supported Speed 64GT/s */ +#define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ +#define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ +#define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ +#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ +#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 0x01 /* Advanced Error Reporting */ +#define PCI_EXT_CAP_ID_VC 0x02 /* Virtual Channel Capability */ +#define PCI_EXT_CAP_ID_DSN 0x03 /* Device Serial Number */ +#define PCI_EXT_CAP_ID_PWR 0x04 /* Power Budgeting */ +#define PCI_EXT_CAP_ID_RCLD 0x05 /* Root Complex Link Declaration */ +#define PCI_EXT_CAP_ID_RCILC 0x06 /* Root Complex Internal Link Control */ +#define PCI_EXT_CAP_ID_RCEC 0x07 /* Root Complex Event Collector */ +#define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */ +#define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */ +#define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */ +#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */ +#define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */ +#define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */ +#define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */ +#define PCI_EXT_CAP_ID_ATS 0x0F /* Address Translation Services */ +#define PCI_EXT_CAP_ID_SRIOV 0x10 /* Single Root I/O Virtualization */ +#define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */ +#define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */ +#define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */ +#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */ +#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */ +#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */ +#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */ +#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */ +#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */ +#define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ +#define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ +#define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ +#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ +#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ +#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ +#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT + +#define PCI_EXT_CAP_DSN_SIZEOF 12 +#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNC_ACSV 0x00200000 /* ACS Violation */ +#define PCI_ERR_UNC_INTN 0x00400000 /* internal error */ +#define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ +#define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ +#define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ +#define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ +#define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 /* Multiple FATAL/NONFATAL */ +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First UNC is Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ +#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ + +/* Virtual Channel */ +#define PCI_VC_PORT_CAP1 4 +#define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ +#define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ +#define PCI_VC_CAP1_ARB_SIZE 0x00000c00 +#define PCI_VC_PORT_CAP2 8 +#define PCI_VC_CAP2_32_PHASE 0x00000002 +#define PCI_VC_CAP2_64_PHASE 0x00000004 +#define PCI_VC_CAP2_128_PHASE 0x00000008 +#define PCI_VC_CAP2_ARB_OFF 0xff000000 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_PORT_STATUS_TABLE 0x00000001 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CAP_32_PHASE 0x00000002 +#define PCI_VC_RES_CAP_64_PHASE 0x00000004 +#define PCI_VC_RES_CAP_128_PHASE 0x00000008 +#define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 +#define PCI_VC_RES_CAP_256_PHASE 0x00000020 +#define PCI_VC_RES_CAP_ARB_OFF 0xff000000 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 +#define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 +#define PCI_VC_RES_CTRL_ID 0x07000000 +#define PCI_VC_RES_CTRL_ENABLE 0x80000000 +#define PCI_VC_RES_STATUS 26 +#define PCI_VC_RES_STATUS_TABLE 0x00000001 +#define PCI_VC_RES_STATUS_NEGO 0x00000002 +#define PCI_CAP_VC_BASE_SIZEOF 0x10 +#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +#define PCI_EXT_CAP_PWR_SIZEOF 16 + +/* Root Complex Event Collector Endpoint Association */ +#define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ +#define PCI_RCEC_BUSN 8 /* RCEC Associated Bus Numbers */ +#define PCI_RCEC_BUSN_REG_VER 0x02 /* Least version with BUSN present */ +#define PCI_RCEC_BUSN_NEXT(x) (((x) >> 8) & 0xff) +#define PCI_RCEC_BUSN_LAST(x) (((x) >> 16) & 0xff) + +/* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */ +#define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */ +#define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff) +#define PCI_VNDR_HEADER_REV(x) (((x) >> 16) & 0xf) +#define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff) + +/* + * HyperTransport sub capability types + * + * Unfortunately there are both 3 bit and 5 bit capability types defined + * in the HT spec, catering for that is a little messy. You probably don't + * want to use these directly, just use pci_find_ht_capability() and it + * will do the right thing for you. + */ +#define HT_3BIT_CAP_MASK 0xE0 +#define HT_CAPTYPE_SLAVE 0x00 /* Slave/Primary link configuration */ +#define HT_CAPTYPE_HOST 0x20 /* Host/Secondary link configuration */ + +#define HT_5BIT_CAP_MASK 0xF8 +#define HT_CAPTYPE_IRQ 0x80 /* IRQ Configuration */ +#define HT_CAPTYPE_REMAPPING_40 0xA0 /* 40 bit address remapping */ +#define HT_CAPTYPE_REMAPPING_64 0xA2 /* 64 bit address remapping */ +#define HT_CAPTYPE_UNITID_CLUMP 0x90 /* Unit ID clumping */ +#define HT_CAPTYPE_EXTCONF 0x98 /* Extended Configuration Space Access */ +#define HT_CAPTYPE_MSI_MAPPING 0xA8 /* MSI Mapping Capability */ +#define HT_MSI_FLAGS 0x02 /* Offset to flags */ +#define HT_MSI_FLAGS_ENABLE 0x1 /* Mapping enable */ +#define HT_MSI_FLAGS_FIXED 0x2 /* Fixed mapping only */ +#define HT_MSI_FIXED_ADDR 0x00000000FEE00000ULL /* Fixed addr */ +#define HT_MSI_ADDR_LO 0x04 /* Offset to low addr bits */ +#define HT_MSI_ADDR_LO_MASK 0xFFF00000 /* Low address bit mask */ +#define HT_MSI_ADDR_HI 0x08 /* Offset to high addr bits */ +#define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ +#define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ +#define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 HyperTransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* HyperTransport power management configuration */ +#define HT_CAP_SIZEOF_LONG 28 /* slave & primary */ +#define HT_CAP_SIZEOF_SHORT 24 /* host & secondary */ + +/* Alternative Routing-ID Interpretation */ +#define PCI_ARI_CAP 0x04 /* ARI Capability Register */ +#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */ +#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */ +#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */ +#define PCI_ARI_CTRL 0x06 /* ARI Control Register */ +#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */ +#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ +#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +#define PCI_EXT_CAP_ARI_SIZEOF 8 + +/* Address Translation Service */ +#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ +#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ +#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CAP_PAGE_ALIGNED 0x0020 /* Page Aligned Request */ +#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ +#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ +#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ +#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ +#define PCI_EXT_CAP_ATS_SIZEOF 8 + +/* Page Request Interface */ +#define PCI_PRI_CTRL 0x04 /* PRI control register */ +#define PCI_PRI_CTRL_ENABLE 0x0001 /* Enable */ +#define PCI_PRI_CTRL_RESET 0x0002 /* Reset */ +#define PCI_PRI_STATUS 0x06 /* PRI status register */ +#define PCI_PRI_STATUS_RF 0x0001 /* Response Failure */ +#define PCI_PRI_STATUS_UPRGI 0x0002 /* Unexpected PRG index */ +#define PCI_PRI_STATUS_STOPPED 0x0100 /* PRI Stopped */ +#define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ +#define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ +#define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ +#define PCI_EXT_CAP_PRI_SIZEOF 16 + +/* Process Address Space ID */ +#define PCI_PASID_CAP 0x04 /* PASID feature register */ +#define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ +#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ +#define PCI_PASID_CTRL 0x06 /* PASID control register */ +#define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ +#define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ +#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ +#define PCI_EXT_CAP_PASID_SIZEOF 8 + +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x00000001 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x0001 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x0002 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x0004 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x0008 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x0010 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x0001 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +#define PCI_EXT_CAP_SRIOV_SIZEOF 64 + +#define PCI_LTR_MAX_SNOOP_LAT 0x4 +#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +#define PCI_LTR_VALUE_MASK 0x000003ff +#define PCI_LTR_SCALE_MASK 0x00001c00 +#define PCI_LTR_SCALE_SHIFT 10 +#define PCI_EXT_CAP_LTR_SIZEOF 8 + +/* Access Control Service */ +#define PCI_ACS_CAP 0x04 /* ACS Capability Register */ +#define PCI_ACS_SV 0x0001 /* Source Validation */ +#define PCI_ACS_TB 0x0002 /* Translation Blocking */ +#define PCI_ACS_RR 0x0004 /* P2P Request Redirect */ +#define PCI_ACS_CR 0x0008 /* P2P Completion Redirect */ +#define PCI_ACS_UF 0x0010 /* Upstream Forwarding */ +#define PCI_ACS_EC 0x0020 /* P2P Egress Control */ +#define PCI_ACS_DT 0x0040 /* Direct Translated P2P */ +#define PCI_ACS_EGRESS_BITS 0x05 /* ACS Egress Control Vector Size */ +#define PCI_ACS_CTRL 0x06 /* ACS Control Register */ +#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ + +#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ +#define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ + +/* SATA capability */ +#define PCI_SATA_REGS 4 /* SATA REGs specifier */ +#define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ +#define PCI_SATA_REGS_INLINE 0xF /* REGS in config space */ +#define PCI_SATA_SIZEOF_SHORT 8 +#define PCI_SATA_SIZEOF_LONG 16 + +/* Resizable BARs */ +#define PCI_REBAR_CAP 4 /* capability register */ +#define PCI_REBAR_CAP_SIZES 0x00FFFFF0 /* supported BAR sizes */ +#define PCI_REBAR_CTRL 8 /* control register */ +#define PCI_REBAR_CTRL_BAR_IDX 0x00000007 /* BAR index */ +#define PCI_REBAR_CTRL_NBAR_MASK 0x000000E0 /* # of resizable BARs */ +#define PCI_REBAR_CTRL_NBAR_SHIFT 5 /* shift for # of BARs */ +#define PCI_REBAR_CTRL_BAR_SIZE 0x00001F00 /* BAR size */ +#define PCI_REBAR_CTRL_BAR_SHIFT 8 /* shift for BAR size */ + +/* Dynamic Power Allocation */ +#define PCI_DPA_CAP 4 /* capability register */ +#define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ +#define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ + +/* TPH Requester */ +#define PCI_TPH_CAP 4 /* capability register */ +#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ +#define PCI_TPH_LOC_NONE 0x000 /* no location */ +#define PCI_TPH_LOC_CAP 0x200 /* in capability */ +#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ +#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ + +/* Downstream Port Containment */ +#define PCI_EXP_DPC_CAP 4 /* DPC Capability */ +#define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */ +#define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ +#define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ +#define PCI_EXP_DPC_CAP_SW_TRIGGER 0x0080 /* Software Triggering Supported */ +#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ +#define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ + +#define PCI_EXP_DPC_CTL 6 /* DPC control */ +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ +#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ +#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ + +#define PCI_EXP_DPC_STATUS 8 /* DPC Status */ +#define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ +#define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ +#define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ +#define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ +#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ + +#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ + +#define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ +#define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */ +#define PCI_EXP_DPC_RP_PIO_SEVERITY 0x14 /* RP PIO Severity */ +#define PCI_EXP_DPC_RP_PIO_SYSERROR 0x18 /* RP PIO SysError */ +#define PCI_EXP_DPC_RP_PIO_EXCEPTION 0x1C /* RP PIO Exception */ +#define PCI_EXP_DPC_RP_PIO_HEADER_LOG 0x20 /* RP PIO Header Log */ +#define PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG 0x30 /* RP PIO ImpSpec Log */ +#define PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG 0x34 /* RP PIO TLP Prefix Log */ + +/* Precision Time Measurement */ +#define PCI_PTM_CAP 0x04 /* PTM Capability */ +#define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ +#define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ +#define PCI_PTM_CTRL 0x08 /* PTM Control */ +#define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ +#define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ + +/* ASPM L1 PM Substates */ +#define PCI_L1SS_CAP 0x04 /* Capabilities Register */ +#define PCI_L1SS_CAP_PCIPM_L1_2 0x00000001 /* PCI-PM L1.2 Supported */ +#define PCI_L1SS_CAP_PCIPM_L1_1 0x00000002 /* PCI-PM L1.1 Supported */ +#define PCI_L1SS_CAP_ASPM_L1_2 0x00000004 /* ASPM L1.2 Supported */ +#define PCI_L1SS_CAP_ASPM_L1_1 0x00000008 /* ASPM L1.1 Supported */ +#define PCI_L1SS_CAP_L1_PM_SS 0x00000010 /* L1 PM Substates Supported */ +#define PCI_L1SS_CAP_CM_RESTORE_TIME 0x0000ff00 /* Port Common_Mode_Restore_Time */ +#define PCI_L1SS_CAP_P_PWR_ON_SCALE 0x00030000 /* Port T_POWER_ON scale */ +#define PCI_L1SS_CAP_P_PWR_ON_VALUE 0x00f80000 /* Port T_POWER_ON value */ +#define PCI_L1SS_CTL1 0x08 /* Control 1 Register */ +#define PCI_L1SS_CTL1_PCIPM_L1_2 0x00000001 /* PCI-PM L1.2 Enable */ +#define PCI_L1SS_CTL1_PCIPM_L1_1 0x00000002 /* PCI-PM L1.1 Enable */ +#define PCI_L1SS_CTL1_ASPM_L1_2 0x00000004 /* ASPM L1.2 Enable */ +#define PCI_L1SS_CTL1_ASPM_L1_1 0x00000008 /* ASPM L1.1 Enable */ +#define PCI_L1SS_CTL1_L1_2_MASK 0x00000005 +#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000f +#define PCI_L1SS_CTL1_CM_RESTORE_TIME 0x0000ff00 /* Common_Mode_Restore_Time */ +#define PCI_L1SS_CTL1_LTR_L12_TH_VALUE 0x03ff0000 /* LTR_L1.2_THRESHOLD_Value */ +#define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ +#define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ + +/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ +#define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ + +/* Data Link Feature */ +#define PCI_DLF_CAP 0x04 /* Capabilities Register */ +#define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ + +/* Physical Layer 16.0 GT/s */ +#define PCI_PL_16GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ +#define PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK 0x0000000F +#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 +#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 + +#endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/qemu_fw_cfg.h b/include/standard-headers/linux/qemu_fw_cfg.h new file mode 100644 index 000000000..cb93f6678 --- /dev/null +++ b/include/standard-headers/linux/qemu_fw_cfg.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +#ifndef _LINUX_FW_CFG_H +#define _LINUX_FW_CFG_H + +#include "standard-headers/linux/types.h" + +#define FW_CFG_ACPI_DEVICE_ID "QEMU0002" + +/* selector key values for "well-known" fw_cfg entries */ +#define FW_CFG_SIGNATURE 0x00 +#define FW_CFG_ID 0x01 +#define FW_CFG_UUID 0x02 +#define FW_CFG_RAM_SIZE 0x03 +#define FW_CFG_NOGRAPHIC 0x04 +#define FW_CFG_NB_CPUS 0x05 +#define FW_CFG_MACHINE_ID 0x06 +#define FW_CFG_KERNEL_ADDR 0x07 +#define FW_CFG_KERNEL_SIZE 0x08 +#define FW_CFG_KERNEL_CMDLINE 0x09 +#define FW_CFG_INITRD_ADDR 0x0a +#define FW_CFG_INITRD_SIZE 0x0b +#define FW_CFG_BOOT_DEVICE 0x0c +#define FW_CFG_NUMA 0x0d +#define FW_CFG_BOOT_MENU 0x0e +#define FW_CFG_MAX_CPUS 0x0f +#define FW_CFG_KERNEL_ENTRY 0x10 +#define FW_CFG_KERNEL_DATA 0x11 +#define FW_CFG_INITRD_DATA 0x12 +#define FW_CFG_CMDLINE_ADDR 0x13 +#define FW_CFG_CMDLINE_SIZE 0x14 +#define FW_CFG_CMDLINE_DATA 0x15 +#define FW_CFG_SETUP_ADDR 0x16 +#define FW_CFG_SETUP_SIZE 0x17 +#define FW_CFG_SETUP_DATA 0x18 +#define FW_CFG_FILE_DIR 0x19 + +#define FW_CFG_FILE_FIRST 0x20 +#define FW_CFG_FILE_SLOTS_MIN 0x10 + +#define FW_CFG_WRITE_CHANNEL 0x4000 +#define FW_CFG_ARCH_LOCAL 0x8000 +#define FW_CFG_ENTRY_MASK (~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)) + +#define FW_CFG_INVALID 0xffff + +/* width in bytes of fw_cfg control register */ +#define FW_CFG_CTL_SIZE 0x02 + +/* fw_cfg "file name" is up to 56 characters (including terminating nul) */ +#define FW_CFG_MAX_FILE_PATH 56 + +/* size in bytes of fw_cfg signature */ +#define FW_CFG_SIG_SIZE 4 + +/* FW_CFG_ID bits */ +#define FW_CFG_VERSION 0x01 +#define FW_CFG_VERSION_DMA 0x02 + +/* fw_cfg file directory entry type */ +struct fw_cfg_file { + uint32_t size; + uint16_t select; + uint16_t reserved; + char name[FW_CFG_MAX_FILE_PATH]; +}; + +/* FW_CFG_DMA_CONTROL bits */ +#define FW_CFG_DMA_CTL_ERROR 0x01 +#define FW_CFG_DMA_CTL_READ 0x02 +#define FW_CFG_DMA_CTL_SKIP 0x04 +#define FW_CFG_DMA_CTL_SELECT 0x08 +#define FW_CFG_DMA_CTL_WRITE 0x10 + +#define FW_CFG_DMA_SIGNATURE 0x51454d5520434647ULL /* "QEMU CFG" */ + +/* Control as first field allows for different structures selected by this + * field, which might be useful in the future + */ +struct fw_cfg_dma_access { + uint32_t control; + uint32_t length; + uint64_t address; +}; + +#define FW_CFG_VMCOREINFO_FILENAME "etc/vmcoreinfo" + +#define FW_CFG_VMCOREINFO_FORMAT_NONE 0x0 +#define FW_CFG_VMCOREINFO_FORMAT_ELF 0x1 + +struct fw_cfg_vmcoreinfo { + uint16_t host_format; + uint16_t guest_format; + uint32_t size; + uint64_t paddr; +}; + +#endif diff --git a/include/standard-headers/linux/sysinfo.h b/include/standard-headers/linux/sysinfo.h new file mode 100644 index 000000000..e3c06aca8 --- /dev/null +++ b/include/standard-headers/linux/sysinfo.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_SYSINFO_H +#define _LINUX_SYSINFO_H + +#include "standard-headers/linux/types.h" + +#define SI_LOAD_SHIFT 16 +struct sysinfo { + long uptime; /* Seconds since boot */ + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ + unsigned long totalram; /* Total usable main memory size */ + unsigned long freeram; /* Available memory size */ + unsigned long sharedram; /* Amount of shared memory */ + unsigned long bufferram; /* Memory used by buffers */ + unsigned long totalswap; /* Total swap space size */ + unsigned long freeswap; /* swap space still available */ + uint16_t procs; /* Number of current processes */ + uint16_t pad; /* Explicit padding for m68k */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + uint32_t mem_unit; /* Memory unit size in bytes */ + char _f[20-2*sizeof(unsigned long)-sizeof(uint32_t)]; /* Padding: libc5 uses this.. */ +}; + +#endif /* _LINUX_SYSINFO_H */ diff --git a/include/standard-headers/linux/types.h b/include/standard-headers/linux/types.h new file mode 100644 index 000000000..9dbbc73e4 --- /dev/null +++ b/include/standard-headers/linux/types.h @@ -0,0 +1,3 @@ +/* For QEMU all types are already defined via osdep.h, so this + * header does not need to do anything. + */ diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h new file mode 100644 index 000000000..0bd2684a2 --- /dev/null +++ b/include/standard-headers/linux/vhost_types.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_VHOST_TYPES_H +#define _LINUX_VHOST_TYPES_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include "standard-headers/linux/types.h" + +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_ring.h" + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + uint64_t used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + uint64_t avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + uint64_t log_guest_addr; +}; + +/* no alignment requirement */ +struct vhost_iotlb_msg { + uint64_t iova; + uint64_t size; + uint64_t uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + uint8_t perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 +/* + * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying + * multiple mappings in one go: beginning with + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of + * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END. + * When one of these two values is used as the message type, the rest + * of the fields in the message are ignored. There's no guarantee that + * these changes take place automatically in the device. + */ +#define VHOST_IOTLB_BATCH_BEGIN 5 +#define VHOST_IOTLB_BATCH_END 6 + uint8_t type; +}; + +#define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + uint8_t padding[64]; + }; +}; + +struct vhost_msg_v2 { + uint32_t type; + uint32_t reserved; + union { + struct vhost_iotlb_msg iotlb; + uint8_t padding[64]; + }; +}; + +struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ + uint64_t userspace_addr; + uint64_t flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[0]; +}; + +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt field in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +/* VHOST_VDPA specific definitions */ + +struct vhost_vdpa_config { + uint32_t off; + uint32_t len; + uint8_t buf[0]; +}; + +/* vhost vdpa IOVA range + * @first: First address that can be mapped by vhost-vDPA + * @last: Last address that can be mapped by vhost-vDPA + */ +struct vhost_vdpa_iova_range { + uint64_t first; + uint64_t last; +}; + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif diff --git a/include/standard-headers/linux/virtio_9p.h b/include/standard-headers/linux/virtio_9p.h new file mode 100644 index 000000000..f5604fc5f --- /dev/null +++ b/include/standard-headers/linux/virtio_9p.h @@ -0,0 +1,44 @@ +#ifndef _LINUX_VIRTIO_9P_H +#define _LINUX_VIRTIO_9P_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +/* The feature bitmap for virtio 9P */ + +/* The mount point is specified in a config variable */ +#define VIRTIO_9P_MOUNT_TAG 0 + +struct virtio_9p_config { + /* length of the tag name */ + __virtio16 tag_len; + /* non-NULL terminated tag name */ + uint8_t tag[0]; +} QEMU_PACKED; + +#endif /* _LINUX_VIRTIO_9P_H */ diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h new file mode 100644 index 000000000..f343bfefd --- /dev/null +++ b/include/standard-headers/linux/virtio_balloon.h @@ -0,0 +1,119 @@ +#ifndef _LINUX_VIRTIO_BALLOON_H +#define _LINUX_VIRTIO_BALLOON_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +/* The feature bitmap for virtio balloon */ +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ +#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ +#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ +#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */ + +/* Size of a PFN in the balloon interface. */ +#define VIRTIO_BALLOON_PFN_SHIFT 12 + +#define VIRTIO_BALLOON_CMD_ID_STOP 0 +#define VIRTIO_BALLOON_CMD_ID_DONE 1 +struct virtio_balloon_config { + /* Number of pages host wants Guest to give up. */ + uint32_t num_pages; + /* Number of pages we've actually got in balloon. */ + uint32_t actual; + /* + * Free page hint command id, readonly by guest. + * Was previously named free_page_report_cmd_id so we + * need to carry that name for legacy support. + */ + union { + uint32_t free_page_hint_cmd_id; + uint32_t free_page_report_cmd_id; /* deprecated */ + }; + /* Stores PAGE_POISON if page poisoning is in use */ + uint32_t poison_val; +}; + +#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ +#define VIRTIO_BALLOON_S_SWAP_OUT 1 /* Amount of memory swapped out */ +#define VIRTIO_BALLOON_S_MAJFLT 2 /* Number of major faults */ +#define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ +#define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ +#define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ +#define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */ +#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ +#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */ +#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ +#define VIRTIO_BALLOON_S_NR 10 + +#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ + VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ + VIRTIO_BALLOON_S_NAMES_prefix "swap-out", \ + VIRTIO_BALLOON_S_NAMES_prefix "major-faults", \ + VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", \ + VIRTIO_BALLOON_S_NAMES_prefix "free-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "total-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ +} + +#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") + +/* + * Memory statistics structure. + * Driver fills an array of these structures and passes to device. + * + * NOTE: fields are laid out in a way that would make compiler add padding + * between and after fields, so we have to use compiler-specific attributes to + * pack it, to disable this padding. This also often causes compiler to + * generate suboptimal code. + * + * We maintain this statistics structure format for backwards compatibility, + * but don't follow this example. + * + * If implementing a similar structure, do something like the below instead: + * struct virtio_balloon_stat { + * __virtio16 tag; + * uint8_t reserved[6]; + * __virtio64 val; + * }; + * + * In other words, add explicit reserved fields to align field and + * structure boundaries at field size, avoiding compiler padding + * without the packed attribute. + */ +struct virtio_balloon_stat { + __virtio16 tag; + __virtio64 val; +} QEMU_PACKED; + +#endif /* _LINUX_VIRTIO_BALLOON_H */ diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h new file mode 100644 index 000000000..2dcc90826 --- /dev/null +++ b/include/standard-headers/linux/virtio_blk.h @@ -0,0 +1,201 @@ +#ifndef _LINUX_VIRTIO_BLK_H +#define _LINUX_VIRTIO_BLK_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_types.h" + +/* Feature bits */ +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ +#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ + +/* Legacy feature bits */ +#ifndef VIRTIO_BLK_NO_LEGACY +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_FLUSH 9 /* Flush command supported */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ + +struct virtio_blk_config { + /* The capacity (in 512-byte sectors). */ + __virtio64 capacity; + /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ + __virtio32 size_max; + /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ + __virtio32 seg_max; + /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ + struct virtio_blk_geometry { + __virtio16 cylinders; + uint8_t heads; + uint8_t sectors; + } geometry; + + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + __virtio32 blk_size; + + /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ + /* exponent for physical block per logical block. */ + uint8_t physical_block_exp; + /* alignment offset in logical blocks. */ + uint8_t alignment_offset; + /* minimum I/O size without performance penalty in logical blocks. */ + __virtio16 min_io_size; + /* optimal sustained I/O size in logical blocks. */ + __virtio32 opt_io_size; + + /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ + uint8_t wce; + uint8_t unused; + + /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + __virtio16 num_queues; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ + /* + * The maximum discard sectors (in 512-byte sectors) for + * one segment. + */ + __virtio32 max_discard_sectors; + /* + * The maximum number of discard segments in a + * discard command. + */ + __virtio32 max_discard_seg; + /* Discard commands must be aligned to this number of sectors. */ + __virtio32 discard_sector_alignment; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ + /* + * The maximum number of write zeroes sectors (in 512-byte sectors) in + * one segment. + */ + __virtio32 max_write_zeroes_sectors; + /* + * The maximum number of segments in a write zeroes + * command. + */ + __virtio32 max_write_zeroes_seg; + /* + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the + * deallocation of one or more of the sectors. + */ + uint8_t write_zeroes_may_unmap; + + uint8_t unused1[3]; +} QEMU_PACKED; + +/* + * Command types + * + * Usage is a bit tricky as some bits are used as flags and some are not. + * + * Rules: + * VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or + * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of its own + * and may not be combined with any of the other flags. + */ + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 + +#ifndef VIRTIO_BLK_NO_LEGACY +/* This bit says it's a scsi command, not an actual read or write. */ +#define VIRTIO_BLK_T_SCSI_CMD 2 +#endif /* VIRTIO_BLK_NO_LEGACY */ + +/* Cache flush command */ +#define VIRTIO_BLK_T_FLUSH 4 + +/* Get device ID command */ +#define VIRTIO_BLK_T_GET_ID 8 + +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeroes command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + +#ifndef VIRTIO_BLK_NO_LEGACY +/* Barrier before this op. */ +#define VIRTIO_BLK_T_BARRIER 0x80000000 +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +/* + * This comes first in the read scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, + * this is the first element of the read scatter-gather list. + */ +struct virtio_blk_outhdr { + /* VIRTIO_BLK_T* */ + __virtio32 type; + /* io priority. */ + __virtio32 ioprio; + /* Sector (ie. 512 byte offset) */ + __virtio64 sector; +}; + +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + +/* Discard/write zeroes range for each request. */ +struct virtio_blk_discard_write_zeroes { + /* discard/write zeroes start sector */ + uint64_t sector; + /* number of discard/write zeroes sectors */ + uint32_t num_sectors; + /* flags for this range */ + uint32_t flags; +}; + +#ifndef VIRTIO_BLK_NO_LEGACY +struct virtio_scsi_inhdr { + __virtio32 errors; + __virtio32 data_len; + __virtio32 sense_len; + __virtio32 residual; +}; +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +/* And this is the final byte of the write scatter-gather list. */ +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 +#endif /* _LINUX_VIRTIO_BLK_H */ diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h new file mode 100644 index 000000000..22e3a85f6 --- /dev/null +++ b/include/standard-headers/linux/virtio_config.h @@ -0,0 +1,93 @@ +#ifndef _LINUX_VIRTIO_CONFIG_H +#define _LINUX_VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +/* Virtio devices use a standardized configuration space to define their + * features and pass configuration information, but each implementation can + * store and access that space differently. */ +#include "standard-headers/linux/types.h" + +/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* Driver has finished configuring features */ +#define VIRTIO_CONFIG_S_FEATURES_OK 8 +/* Device entered invalid state, driver must reset it */ +#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80 + +/* + * Virtio feature bits VIRTIO_TRANSPORT_F_START through + * VIRTIO_TRANSPORT_F_END are reserved for the transport + * being used (e.g. virtio_ring, virtio_pci etc.), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 38 + +#ifndef VIRTIO_CONFIG_NO_LEGACY +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 + +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 +#endif /* VIRTIO_CONFIG_NO_LEGACY */ + +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 32 + +/* + * If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + * If set - use platform DMA tools to access the memory. + * + * Note the reverse polarity (compared to most other features), + * this is for compatibility with legacy systems. + */ +#define VIRTIO_F_ACCESS_PLATFORM 33 +/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */ +#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM + +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + +/* + * Does the device support Single Root I/O Virtualization? + */ +#define VIRTIO_F_SR_IOV 37 +#endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/standard-headers/linux/virtio_console.h b/include/standard-headers/linux/virtio_console.h new file mode 100644 index 000000000..71f5f648e --- /dev/null +++ b/include/standard-headers/linux/virtio_console.h @@ -0,0 +1,78 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 + * Copyright (C) Amit Shah , 2009, 2010, 2011 + */ +#ifndef _LINUX_VIRTIO_CONSOLE_H +#define _LINUX_VIRTIO_CONSOLE_H +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +/* Feature bits */ +#define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ +#define VIRTIO_CONSOLE_F_MULTIPORT 1 /* Does host provide multiple ports? */ +#define VIRTIO_CONSOLE_F_EMERG_WRITE 2 /* Does host support emergency write? */ + +#define VIRTIO_CONSOLE_BAD_ID (~(uint32_t)0) + +struct virtio_console_config { + /* colums of the screens */ + __virtio16 cols; + /* rows of the screens */ + __virtio16 rows; + /* max. number of ports this device can hold */ + __virtio32 max_nr_ports; + /* emergency write register */ + __virtio32 emerg_wr; +} QEMU_PACKED; + +/* + * A message that's passed between the Host and the Guest for a + * particular port. + */ +struct virtio_console_control { + __virtio32 id; /* Port number */ + __virtio16 event; /* The kind of control event (see below) */ + __virtio16 value; /* Extra information for the key */ +}; + +/* Some events for control messages */ +#define VIRTIO_CONSOLE_DEVICE_READY 0 +#define VIRTIO_CONSOLE_PORT_ADD 1 +#define VIRTIO_CONSOLE_PORT_REMOVE 2 +#define VIRTIO_CONSOLE_PORT_READY 3 +#define VIRTIO_CONSOLE_CONSOLE_PORT 4 +#define VIRTIO_CONSOLE_RESIZE 5 +#define VIRTIO_CONSOLE_PORT_OPEN 6 +#define VIRTIO_CONSOLE_PORT_NAME 7 + + +#endif /* _LINUX_VIRTIO_CONSOLE_H */ diff --git a/include/standard-headers/linux/virtio_crypto.h b/include/standard-headers/linux/virtio_crypto.h new file mode 100644 index 000000000..5ff0b4ee5 --- /dev/null +++ b/include/standard-headers/linux/virtio_crypto.h @@ -0,0 +1,450 @@ +#ifndef _VIRTIO_CRYPTO_H +#define _VIRTIO_CRYPTO_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + + +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 +#define VIRTIO_CRYPTO_SERVICE_HASH 1 +#define VIRTIO_CRYPTO_SERVICE_MAC 2 +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 + +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) + +struct virtio_crypto_ctrl_header { +#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) +#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) +#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) +#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) +#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) +#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) +#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) +#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) + uint32_t opcode; + uint32_t algo; + uint32_t flag; + /* data virtqueue id */ + uint32_t queue_id; +}; + +struct virtio_crypto_cipher_session_para { +#define VIRTIO_CRYPTO_NO_CIPHER 0 +#define VIRTIO_CRYPTO_CIPHER_ARC4 1 +#define VIRTIO_CRYPTO_CIPHER_AES_ECB 2 +#define VIRTIO_CRYPTO_CIPHER_AES_CBC 3 +#define VIRTIO_CRYPTO_CIPHER_AES_CTR 4 +#define VIRTIO_CRYPTO_CIPHER_DES_ECB 5 +#define VIRTIO_CRYPTO_CIPHER_DES_CBC 6 +#define VIRTIO_CRYPTO_CIPHER_3DES_ECB 7 +#define VIRTIO_CRYPTO_CIPHER_3DES_CBC 8 +#define VIRTIO_CRYPTO_CIPHER_3DES_CTR 9 +#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8 10 +#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2 11 +#define VIRTIO_CRYPTO_CIPHER_AES_F8 12 +#define VIRTIO_CRYPTO_CIPHER_AES_XTS 13 +#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3 14 + uint32_t algo; + /* length of key */ + uint32_t keylen; + +#define VIRTIO_CRYPTO_OP_ENCRYPT 1 +#define VIRTIO_CRYPTO_OP_DECRYPT 2 + /* encrypt or decrypt */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_session_input { + /* Device-writable part */ + uint64_t session_id; + uint32_t status; + uint32_t padding; +}; + +struct virtio_crypto_cipher_session_req { + struct virtio_crypto_cipher_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_hash_session_para { +#define VIRTIO_CRYPTO_NO_HASH 0 +#define VIRTIO_CRYPTO_HASH_MD5 1 +#define VIRTIO_CRYPTO_HASH_SHA1 2 +#define VIRTIO_CRYPTO_HASH_SHA_224 3 +#define VIRTIO_CRYPTO_HASH_SHA_256 4 +#define VIRTIO_CRYPTO_HASH_SHA_384 5 +#define VIRTIO_CRYPTO_HASH_SHA_512 6 +#define VIRTIO_CRYPTO_HASH_SHA3_224 7 +#define VIRTIO_CRYPTO_HASH_SHA3_256 8 +#define VIRTIO_CRYPTO_HASH_SHA3_384 9 +#define VIRTIO_CRYPTO_HASH_SHA3_512 10 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128 11 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256 12 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + uint8_t padding[8]; +}; + +struct virtio_crypto_hash_create_session_req { + struct virtio_crypto_hash_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_session_para { +#define VIRTIO_CRYPTO_NO_MAC 0 +#define VIRTIO_CRYPTO_MAC_HMAC_MD5 1 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA1 2 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224 3 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256 4 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384 5 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512 6 +#define VIRTIO_CRYPTO_MAC_CMAC_3DES 25 +#define VIRTIO_CRYPTO_MAC_CMAC_AES 26 +#define VIRTIO_CRYPTO_MAC_KASUMI_F9 27 +#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2 28 +#define VIRTIO_CRYPTO_MAC_GMAC_AES 41 +#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH 42 +#define VIRTIO_CRYPTO_MAC_CBCMAC_AES 49 +#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9 50 +#define VIRTIO_CRYPTO_MAC_XCBC_AES 53 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + /* length of authenticated key */ + uint32_t auth_key_len; + uint32_t padding; +}; + +struct virtio_crypto_mac_create_session_req { + struct virtio_crypto_mac_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_aead_session_para { +#define VIRTIO_CRYPTO_NO_AEAD 0 +#define VIRTIO_CRYPTO_AEAD_GCM 1 +#define VIRTIO_CRYPTO_AEAD_CCM 2 +#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305 3 + uint32_t algo; + /* length of key */ + uint32_t key_len; + /* hash result length */ + uint32_t hash_result_len; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_aead_create_session_req { + struct virtio_crypto_aead_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_alg_chain_session_para { +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 + uint32_t alg_chain_order; +/* Plain hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN 1 +/* Authenticated hash (mac) */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH 2 +/* Nested hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED 3 + uint32_t hash_mode; + struct virtio_crypto_cipher_session_para cipher_param; + union { + struct virtio_crypto_hash_session_para hash_param; + struct virtio_crypto_mac_session_para mac_param; + uint8_t padding[16]; + } u; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + uint32_t padding; +}; + +struct virtio_crypto_alg_chain_session_req { + struct virtio_crypto_alg_chain_session_para para; +}; + +struct virtio_crypto_sym_create_session_req { + union { + struct virtio_crypto_cipher_session_req cipher; + struct virtio_crypto_alg_chain_session_req chain; + uint8_t padding[48]; + } u; + + /* Device-readable part */ + +/* No operation */ +#define VIRTIO_CRYPTO_SYM_OP_NONE 0 +/* Cipher only operation on the data */ +#define VIRTIO_CRYPTO_SYM_OP_CIPHER 1 +/* + * Chain any cipher with any hash or mac operation. The order + * depends on the value of alg_chain_order param + */ +#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING 2 + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_destroy_session_req { + /* Device-readable part */ + uint64_t session_id; + uint8_t padding[48]; +}; + +/* The request of the control virtqueue's packet */ +struct virtio_crypto_op_ctrl_req { + struct virtio_crypto_ctrl_header header; + + union { + struct virtio_crypto_sym_create_session_req + sym_create_session; + struct virtio_crypto_hash_create_session_req + hash_create_session; + struct virtio_crypto_mac_create_session_req + mac_create_session; + struct virtio_crypto_aead_create_session_req + aead_create_session; + struct virtio_crypto_destroy_session_req + destroy_session; + uint8_t padding[56]; + } u; +}; + +struct virtio_crypto_op_header { +#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) +#define VIRTIO_CRYPTO_CIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) +#define VIRTIO_CRYPTO_HASH \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) +#define VIRTIO_CRYPTO_MAC \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) +#define VIRTIO_CRYPTO_AEAD_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) +#define VIRTIO_CRYPTO_AEAD_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) + uint32_t opcode; + /* algo should be service-specific algorithms */ + uint32_t algo; + /* session_id should be service-specific algorithms */ + uint64_t session_id; + /* control flag to control the request */ + uint32_t flag; + uint32_t padding; +}; + +struct virtio_crypto_cipher_para { + /* + * Byte Length of valid IV/Counter + * + * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for + * SNOW3G in UEA2 mode, this is the length of the IV (which + * must be the same as the block length of the cipher). + * For block ciphers in CTR mode, this is the length of the counter + * (which must be the same as the block length of the cipher). + * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. + * + * The IV/Counter will be updated after every partial cryptographic + * operation. + */ + uint32_t iv_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; + uint32_t padding; +}; + +struct virtio_crypto_hash_para { + /* length of source data */ + uint32_t src_data_len; + /* hash result length */ + uint32_t hash_result_len; +}; + +struct virtio_crypto_mac_para { + struct virtio_crypto_hash_para hash; +}; + +struct virtio_crypto_aead_para { + /* + * Byte Length of valid IV data pointed to by the below iv_addr + * parameter. + * + * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which + * case iv_addr points to J0. + * For CCM mode, this is the length of the nonce, which can be in the + * range 7 to 13 inclusive. + */ + uint32_t iv_len; + /* length of additional auth data */ + uint32_t aad_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; +}; + +struct virtio_crypto_cipher_data_req { + /* Device-readable part */ + struct virtio_crypto_cipher_para para; + uint8_t padding[24]; +}; + +struct virtio_crypto_hash_data_req { + /* Device-readable part */ + struct virtio_crypto_hash_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_data_req { + /* Device-readable part */ + struct virtio_crypto_mac_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_alg_chain_data_para { + uint32_t iv_len; + /* Length of source data */ + uint32_t src_data_len; + /* Length of destination data */ + uint32_t dst_data_len; + /* Starting point for cipher processing in source data */ + uint32_t cipher_start_src_offset; + /* Length of the source data that the cipher will be computed on */ + uint32_t len_to_cipher; + /* Starting point for hash processing in source data */ + uint32_t hash_start_src_offset; + /* Length of the source data that the hash will be computed on */ + uint32_t len_to_hash; + /* Length of the additional auth data */ + uint32_t aad_len; + /* Length of the hash result */ + uint32_t hash_result_len; + uint32_t reserved; +}; + +struct virtio_crypto_alg_chain_data_req { + /* Device-readable part */ + struct virtio_crypto_alg_chain_data_para para; +}; + +struct virtio_crypto_sym_data_req { + union { + struct virtio_crypto_cipher_data_req cipher; + struct virtio_crypto_alg_chain_data_req chain; + uint8_t padding[40]; + } u; + + /* See above VIRTIO_CRYPTO_SYM_OP_* */ + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_aead_data_req { + /* Device-readable part */ + struct virtio_crypto_aead_para para; + uint8_t padding[32]; +}; + +/* The request of the data virtqueue's packet */ +struct virtio_crypto_op_data_req { + struct virtio_crypto_op_header header; + + union { + struct virtio_crypto_sym_data_req sym_req; + struct virtio_crypto_hash_data_req hash_req; + struct virtio_crypto_mac_data_req mac_req; + struct virtio_crypto_aead_data_req aead_req; + uint8_t padding[48]; + } u; +}; + +#define VIRTIO_CRYPTO_OK 0 +#define VIRTIO_CRYPTO_ERR 1 +#define VIRTIO_CRYPTO_BADMSG 2 +#define VIRTIO_CRYPTO_NOTSUPP 3 +#define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ + +/* The accelerator hardware is ready */ +#define VIRTIO_CRYPTO_S_HW_READY (1 << 0) + +struct virtio_crypto_config { + /* See VIRTIO_CRYPTO_OP_* above */ + uint32_t status; + + /* + * Maximum number of data queue + */ + uint32_t max_dataqueues; + + /* + * Specifies the services mask which the device support, + * see VIRTIO_CRYPTO_SERVICE_* above + */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + uint32_t reserve; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +struct virtio_crypto_inhdr { + /* See VIRTIO_CRYPTO_* above */ + uint8_t status; +}; +#endif diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h new file mode 100644 index 000000000..a32fe8a64 --- /dev/null +++ b/include/standard-headers/linux/virtio_fs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + +#ifndef _LINUX_VIRTIO_FS_H +#define _LINUX_VIRTIO_FS_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_types.h" + +struct virtio_fs_config { + /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */ + uint8_t tag[36]; + + /* Number of request queues */ + uint32_t num_request_queues; +} QEMU_PACKED; + +/* For the id field in virtio_pci_shm_cap */ +#define VIRTIO_FS_SHMCAP_ID_CACHE 0 + +#endif /* _LINUX_VIRTIO_FS_H */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h new file mode 100644 index 000000000..1357e4774 --- /dev/null +++ b/include/standard-headers/linux/virtio_gpu.h @@ -0,0 +1,444 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef VIRTIO_GPU_HW_H +#define VIRTIO_GPU_HW_H + +#include "standard-headers/linux/types.h" + +/* + * VIRTIO_GPU_CMD_CTX_* + * VIRTIO_GPU_CMD_*_3D + */ +#define VIRTIO_GPU_F_VIRGL 0 + +/* + * VIRTIO_GPU_CMD_GET_EDID + */ +#define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 + +/* + * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB + */ +#define VIRTIO_GPU_F_RESOURCE_BLOB 3 + +enum virtio_gpu_ctrl_type { + VIRTIO_GPU_UNDEFINED = 0, + + /* 2d commands */ + VIRTIO_GPU_CMD_GET_DISPLAY_INFO = 0x0100, + VIRTIO_GPU_CMD_RESOURCE_CREATE_2D, + VIRTIO_GPU_CMD_RESOURCE_UNREF, + VIRTIO_GPU_CMD_SET_SCANOUT, + VIRTIO_GPU_CMD_RESOURCE_FLUSH, + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D, + VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING, + VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING, + VIRTIO_GPU_CMD_GET_CAPSET_INFO, + VIRTIO_GPU_CMD_GET_CAPSET, + VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, + VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, + VIRTIO_GPU_CMD_SET_SCANOUT_BLOB, + + /* 3d commands */ + VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, + VIRTIO_GPU_CMD_CTX_DESTROY, + VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE, + VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE, + VIRTIO_GPU_CMD_RESOURCE_CREATE_3D, + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, + VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D, + VIRTIO_GPU_CMD_SUBMIT_3D, + VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB, + VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB, + + /* cursor commands */ + VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300, + VIRTIO_GPU_CMD_MOVE_CURSOR, + + /* success responses */ + VIRTIO_GPU_RESP_OK_NODATA = 0x1100, + VIRTIO_GPU_RESP_OK_DISPLAY_INFO, + VIRTIO_GPU_RESP_OK_CAPSET_INFO, + VIRTIO_GPU_RESP_OK_CAPSET, + VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, + VIRTIO_GPU_RESP_OK_MAP_INFO, + + /* error responses */ + VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, + VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, +}; + +enum virtio_gpu_shm_id { + VIRTIO_GPU_SHM_ID_UNDEFINED = 0, + /* + * VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB + * VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB + */ + VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1 +}; + +#define VIRTIO_GPU_FLAG_FENCE (1 << 0) + +struct virtio_gpu_ctrl_hdr { + uint32_t type; + uint32_t flags; + uint64_t fence_id; + uint32_t ctx_id; + uint32_t padding; +}; + +/* data passed in the cursor vq */ + +struct virtio_gpu_cursor_pos { + uint32_t scanout_id; + uint32_t x; + uint32_t y; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_UPDATE_CURSOR, VIRTIO_GPU_CMD_MOVE_CURSOR */ +struct virtio_gpu_update_cursor { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_cursor_pos pos; /* update & move */ + uint32_t resource_id; /* update only */ + uint32_t hot_x; /* update only */ + uint32_t hot_y; /* update only */ + uint32_t padding; +}; + +/* data passed in the control vq, 2d related */ + +struct virtio_gpu_rect { + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_UNREF */ +struct virtio_gpu_resource_unref { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: create a 2d resource with a format */ +struct virtio_gpu_resource_create_2d { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t format; + uint32_t width; + uint32_t height; +}; + +/* VIRTIO_GPU_CMD_SET_SCANOUT */ +struct virtio_gpu_set_scanout { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint32_t scanout_id; + uint32_t resource_id; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_FLUSH */ +struct virtio_gpu_resource_flush { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint32_t resource_id; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: simple transfer to_host */ +struct virtio_gpu_transfer_to_host_2d { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint64_t offset; + uint32_t resource_id; + uint32_t padding; +}; + +struct virtio_gpu_mem_entry { + uint64_t addr; + uint32_t length; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING */ +struct virtio_gpu_resource_attach_backing { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t nr_entries; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING */ +struct virtio_gpu_resource_detach_backing { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}; + +/* VIRTIO_GPU_RESP_OK_DISPLAY_INFO */ +#define VIRTIO_GPU_MAX_SCANOUTS 16 +struct virtio_gpu_resp_display_info { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_display_one { + struct virtio_gpu_rect r; + uint32_t enabled; + uint32_t flags; + } pmodes[VIRTIO_GPU_MAX_SCANOUTS]; +}; + +/* data passed in the control vq, 3d related */ + +struct virtio_gpu_box { + uint32_t x, y, z; + uint32_t w, h, d; +}; + +/* VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D */ +struct virtio_gpu_transfer_host_3d { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_box box; + uint64_t offset; + uint32_t resource_id; + uint32_t level; + uint32_t stride; + uint32_t layer_stride; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_3D */ +#define VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP (1 << 0) +struct virtio_gpu_resource_create_3d { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t target; + uint32_t format; + uint32_t bind; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t array_size; + uint32_t last_level; + uint32_t nr_samples; + uint32_t flags; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_CTX_CREATE */ +struct virtio_gpu_ctx_create { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t nlen; + uint32_t padding; + char debug_name[64]; +}; + +/* VIRTIO_GPU_CMD_CTX_DESTROY */ +struct virtio_gpu_ctx_destroy { + struct virtio_gpu_ctrl_hdr hdr; +}; + +/* VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE, VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE */ +struct virtio_gpu_ctx_resource { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_SUBMIT_3D */ +struct virtio_gpu_cmd_submit { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t size; + uint32_t padding; +}; + +#define VIRTIO_GPU_CAPSET_VIRGL 1 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 + +/* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ +struct virtio_gpu_get_capset_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_index; + uint32_t padding; +}; + +/* VIRTIO_GPU_RESP_OK_CAPSET_INFO */ +struct virtio_gpu_resp_capset_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_id; + uint32_t capset_max_version; + uint32_t capset_max_size; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_GET_CAPSET */ +struct virtio_gpu_get_capset { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_id; + uint32_t capset_version; +}; + +/* VIRTIO_GPU_RESP_OK_CAPSET */ +struct virtio_gpu_resp_capset { + struct virtio_gpu_ctrl_hdr hdr; + uint8_t capset_data[]; +}; + +/* VIRTIO_GPU_CMD_GET_EDID */ +struct virtio_gpu_cmd_get_edid { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t scanout; + uint32_t padding; +}; + +/* VIRTIO_GPU_RESP_OK_EDID */ +struct virtio_gpu_resp_edid { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t size; + uint32_t padding; + uint8_t edid[1024]; +}; + +#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) + +struct virtio_gpu_config { + uint32_t events_read; + uint32_t events_clear; + uint32_t num_scanouts; + uint32_t num_capsets; +}; + +/* simple formats for fbcon/X use */ +enum virtio_gpu_formats { + VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM = 1, + VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM = 2, + VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM = 3, + VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM = 4, + + VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM = 67, + VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM = 68, + + VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM = 121, + VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, +}; + +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + uint8_t uuid[16]; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ +struct virtio_gpu_resource_create_blob { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; +#define VIRTIO_GPU_BLOB_MEM_GUEST 0x0001 +#define VIRTIO_GPU_BLOB_MEM_HOST3D 0x0002 +#define VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST 0x0003 + +#define VIRTIO_GPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +#define VIRTIO_GPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 + /* zero is invalid blob mem */ + uint32_t blob_mem; + uint32_t blob_flags; + uint32_t nr_entries; + uint64_t blob_id; + uint64_t size; + /* + * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow + */ +}; + +/* VIRTIO_GPU_CMD_SET_SCANOUT_BLOB */ +struct virtio_gpu_set_scanout_blob { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint32_t scanout_id; + uint32_t resource_id; + uint32_t width; + uint32_t height; + uint32_t format; + uint32_t padding; + uint32_t strides[4]; + uint32_t offsets[4]; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB */ +struct virtio_gpu_resource_map_blob { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; + uint64_t offset; +}; + +/* VIRTIO_GPU_RESP_OK_MAP_INFO */ +#define VIRTIO_GPU_MAP_CACHE_MASK 0x0f +#define VIRTIO_GPU_MAP_CACHE_NONE 0x00 +#define VIRTIO_GPU_MAP_CACHE_CACHED 0x01 +#define VIRTIO_GPU_MAP_CACHE_UNCACHED 0x02 +#define VIRTIO_GPU_MAP_CACHE_WC 0x03 +struct virtio_gpu_resp_map_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t map_info; + uint32_t padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB */ +struct virtio_gpu_resource_unmap_blob { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}; + +#endif diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h new file mode 100644 index 000000000..bc1c0621f --- /dev/null +++ b/include/standard-headers/linux/virtio_ids.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_VIRTIO_IDS_H +#define _LINUX_VIRTIO_IDS_H +/* + * Virtio IDs + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ + +#endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/standard-headers/linux/virtio_input.h b/include/standard-headers/linux/virtio_input.h new file mode 100644 index 000000000..a98a7974c --- /dev/null +++ b/include/standard-headers/linux/virtio_input.h @@ -0,0 +1,76 @@ +#ifndef _LINUX_VIRTIO_INPUT_H +#define _LINUX_VIRTIO_INPUT_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#include "standard-headers/linux/types.h" + +enum virtio_input_config_select { + VIRTIO_INPUT_CFG_UNSET = 0x00, + VIRTIO_INPUT_CFG_ID_NAME = 0x01, + VIRTIO_INPUT_CFG_ID_SERIAL = 0x02, + VIRTIO_INPUT_CFG_ID_DEVIDS = 0x03, + VIRTIO_INPUT_CFG_PROP_BITS = 0x10, + VIRTIO_INPUT_CFG_EV_BITS = 0x11, + VIRTIO_INPUT_CFG_ABS_INFO = 0x12, +}; + +struct virtio_input_absinfo { + uint32_t min; + uint32_t max; + uint32_t fuzz; + uint32_t flat; + uint32_t res; +}; + +struct virtio_input_devids { + uint16_t bustype; + uint16_t vendor; + uint16_t product; + uint16_t version; +}; + +struct virtio_input_config { + uint8_t select; + uint8_t subsel; + uint8_t size; + uint8_t reserved[5]; + union { + char string[128]; + uint8_t bitmap[128]; + struct virtio_input_absinfo abs; + struct virtio_input_devids ids; + } u; +}; + +struct virtio_input_event { + uint16_t type; + uint16_t code; + uint32_t value; +}; + +#endif /* _LINUX_VIRTIO_INPUT_H */ diff --git a/include/standard-headers/linux/virtio_iommu.h b/include/standard-headers/linux/virtio_iommu.h new file mode 100644 index 000000000..b9443b83a --- /dev/null +++ b/include/standard-headers/linux/virtio_iommu.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Virtio-iommu definition v0.12 + * + * Copyright (C) 2019 Arm Ltd. + */ +#ifndef _LINUX_VIRTIO_IOMMU_H +#define _LINUX_VIRTIO_IOMMU_H + +#include "standard-headers/linux/types.h" + +/* Feature bits */ +#define VIRTIO_IOMMU_F_INPUT_RANGE 0 +#define VIRTIO_IOMMU_F_DOMAIN_RANGE 1 +#define VIRTIO_IOMMU_F_MAP_UNMAP 2 +#define VIRTIO_IOMMU_F_BYPASS 3 +#define VIRTIO_IOMMU_F_PROBE 4 +#define VIRTIO_IOMMU_F_MMIO 5 + +struct virtio_iommu_range_64 { + uint64_t start; + uint64_t end; +}; + +struct virtio_iommu_range_32 { + uint32_t start; + uint32_t end; +}; + +struct virtio_iommu_config { + /* Supported page sizes */ + uint64_t page_size_mask; + /* Supported IOVA range */ + struct virtio_iommu_range_64 input_range; + /* Max domain ID size */ + struct virtio_iommu_range_32 domain_range; + /* Probe buffer size */ + uint32_t probe_size; +}; + +/* Request types */ +#define VIRTIO_IOMMU_T_ATTACH 0x01 +#define VIRTIO_IOMMU_T_DETACH 0x02 +#define VIRTIO_IOMMU_T_MAP 0x03 +#define VIRTIO_IOMMU_T_UNMAP 0x04 +#define VIRTIO_IOMMU_T_PROBE 0x05 + +/* Status types */ +#define VIRTIO_IOMMU_S_OK 0x00 +#define VIRTIO_IOMMU_S_IOERR 0x01 +#define VIRTIO_IOMMU_S_UNSUPP 0x02 +#define VIRTIO_IOMMU_S_DEVERR 0x03 +#define VIRTIO_IOMMU_S_INVAL 0x04 +#define VIRTIO_IOMMU_S_RANGE 0x05 +#define VIRTIO_IOMMU_S_NOENT 0x06 +#define VIRTIO_IOMMU_S_FAULT 0x07 +#define VIRTIO_IOMMU_S_NOMEM 0x08 + +struct virtio_iommu_req_head { + uint8_t type; + uint8_t reserved[3]; +}; + +struct virtio_iommu_req_tail { + uint8_t status; + uint8_t reserved[3]; +}; + +struct virtio_iommu_req_attach { + struct virtio_iommu_req_head head; + uint32_t domain; + uint32_t endpoint; + uint8_t reserved[8]; + struct virtio_iommu_req_tail tail; +}; + +struct virtio_iommu_req_detach { + struct virtio_iommu_req_head head; + uint32_t domain; + uint32_t endpoint; + uint8_t reserved[8]; + struct virtio_iommu_req_tail tail; +}; + +#define VIRTIO_IOMMU_MAP_F_READ (1 << 0) +#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) +#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 2) + +#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \ + VIRTIO_IOMMU_MAP_F_WRITE | \ + VIRTIO_IOMMU_MAP_F_MMIO) + +struct virtio_iommu_req_map { + struct virtio_iommu_req_head head; + uint32_t domain; + uint64_t virt_start; + uint64_t virt_end; + uint64_t phys_start; + uint32_t flags; + struct virtio_iommu_req_tail tail; +}; + +struct virtio_iommu_req_unmap { + struct virtio_iommu_req_head head; + uint32_t domain; + uint64_t virt_start; + uint64_t virt_end; + uint8_t reserved[4]; + struct virtio_iommu_req_tail tail; +}; + +#define VIRTIO_IOMMU_PROBE_T_NONE 0 +#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1 + +#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff + +struct virtio_iommu_probe_property { + uint16_t type; + uint16_t length; +}; + +#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0 +#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1 + +struct virtio_iommu_probe_resv_mem { + struct virtio_iommu_probe_property head; + uint8_t subtype; + uint8_t reserved[3]; + uint64_t start; + uint64_t end; +}; + +struct virtio_iommu_req_probe { + struct virtio_iommu_req_head head; + uint32_t endpoint; + uint8_t reserved[64]; + + uint8_t properties[]; + + /* + * Tail follows the variable-length properties array. No padding, + * property lengths are all aligned on 8 bytes. + */ +}; + +/* Fault types */ +#define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0 +#define VIRTIO_IOMMU_FAULT_R_DOMAIN 1 +#define VIRTIO_IOMMU_FAULT_R_MAPPING 2 + +#define VIRTIO_IOMMU_FAULT_F_READ (1 << 0) +#define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1) +#define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2) +#define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8) + +struct virtio_iommu_fault { + uint8_t reason; + uint8_t reserved[3]; + uint32_t flags; + uint32_t endpoint; + uint8_t reserved2[4]; + uint64_t address; +}; + +#endif diff --git a/include/standard-headers/linux/virtio_mem.h b/include/standard-headers/linux/virtio_mem.h new file mode 100644 index 000000000..05e5ade75 --- /dev/null +++ b/include/standard-headers/linux/virtio_mem.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Virtio Mem Device + * + * Copyright Red Hat, Inc. 2020 + * + * Authors: + * David Hildenbrand + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MEM_H +#define _LINUX_VIRTIO_MEM_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +/* + * Each virtio-mem device manages a dedicated region in physical address + * space. Each device can belong to a single NUMA node, multiple devices + * for a single NUMA node are possible. A virtio-mem device is like a + * "resizable DIMM" consisting of small memory blocks that can be plugged + * or unplugged. The device driver is responsible for (un)plugging memory + * blocks on demand. + * + * Virtio-mem devices can only operate on their assigned memory region in + * order to (un)plug memory. A device cannot (un)plug memory belonging to + * other devices. + * + * The "region_size" corresponds to the maximum amount of memory that can + * be provided by a device. The "size" corresponds to the amount of memory + * that is currently plugged. "requested_size" corresponds to a request + * from the device to the device driver to (un)plug blocks. The + * device driver should try to (un)plug blocks in order to reach the + * "requested_size". It is impossible to plug more memory than requested. + * + * The "usable_region_size" represents the memory region that can actually + * be used to (un)plug memory. It is always at least as big as the + * "requested_size" and will grow dynamically. It will only shrink when + * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). + * + * There are no guarantees what will happen if unplugged memory is + * read/written. Such memory should, in general, not be touched. E.g., + * even writing might succeed, but the values will simply be discarded at + * random points in time. + * + * It can happen that the device cannot process a request, because it is + * busy. The device driver has to retry later. + * + * Usually, during system resets all memory will get unplugged, so the + * device driver can start with a clean state. However, in specific + * scenarios (if the device is busy) it can happen that the device still + * has memory plugged. The device driver can request to unplug all memory + * (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the + * device is busy. + */ + +/* --- virtio-mem: feature bits --- */ + +/* node_id is an ACPI PXM and is valid */ +#define VIRTIO_MEM_F_ACPI_PXM 0 + + +/* --- virtio-mem: guest -> host requests --- */ + +/* request to plug memory blocks */ +#define VIRTIO_MEM_REQ_PLUG 0 +/* request to unplug memory blocks */ +#define VIRTIO_MEM_REQ_UNPLUG 1 +/* request to unplug all blocks and shrink the usable size */ +#define VIRTIO_MEM_REQ_UNPLUG_ALL 2 +/* request information about the plugged state of memory blocks */ +#define VIRTIO_MEM_REQ_STATE 3 + +struct virtio_mem_req_plug { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req_unplug { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req_state { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_req_plug plug; + struct virtio_mem_req_unplug unplug; + struct virtio_mem_req_state state; + } u; +}; + + +/* --- virtio-mem: host -> guest response --- */ + +/* + * Request processed successfully, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ACK 0 +/* + * Request denied - e.g. trying to plug more than requested, applicable for + * - VIRTIO_MEM_REQ_PLUG + */ +#define VIRTIO_MEM_RESP_NACK 1 +/* + * Request cannot be processed right now, try again later, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + */ +#define VIRTIO_MEM_RESP_BUSY 2 +/* + * Error in request (e.g. addresses/alignment), applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ERROR 3 + + +/* State of memory blocks is "plugged" */ +#define VIRTIO_MEM_STATE_PLUGGED 0 +/* State of memory blocks is "unplugged" */ +#define VIRTIO_MEM_STATE_UNPLUGGED 1 +/* State of memory blocks is "mixed" */ +#define VIRTIO_MEM_STATE_MIXED 2 + +struct virtio_mem_resp_state { + __virtio16 state; +}; + +struct virtio_mem_resp { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_resp_state state; + } u; +}; + +/* --- virtio-mem: configuration --- */ + +struct virtio_mem_config { + /* Block size and alignment. Cannot change. */ + uint64_t block_size; + /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ + uint16_t node_id; + uint8_t padding[6]; + /* Start address of the memory region. Cannot change. */ + uint64_t addr; + /* Region size (maximum). Cannot change. */ + uint64_t region_size; + /* + * Currently usable region size. Can grow up to region_size. Can + * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config + * update will be sent). + */ + uint64_t usable_region_size; + /* + * Currently used size. Changes due to plug/unplug requests, but no + * config updates will be sent. + */ + uint64_t plugged_size; + /* Requested size. New plug requests cannot exceed it. Can change. */ + uint64_t requested_size; +}; + +#endif /* _LINUX_VIRTIO_MEM_H */ diff --git a/include/standard-headers/linux/virtio_mmio.h b/include/standard-headers/linux/virtio_mmio.h new file mode 100644 index 000000000..0650f91be --- /dev/null +++ b/include/standard-headers/linux/virtio_mmio.h @@ -0,0 +1,152 @@ +/* + * Virtio platform device driver + * + * Copyright 2011, ARM Ltd. + * + * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MMIO_H +#define _LINUX_VIRTIO_MMIO_H + +/* + * Control registers + */ + +/* Magic value ("virt" string) - Read Only */ +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 + +/* Virtio device version - Read Only */ +#define VIRTIO_MMIO_VERSION 0x004 + +/* Virtio device ID - Read Only */ +#define VIRTIO_MMIO_DEVICE_ID 0x008 + +/* Virtio vendor ID - Read Only */ +#define VIRTIO_MMIO_VENDOR_ID 0x00c + +/* Bitmask of the features supported by the device (host) + * (32 bits per set) - Read Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 + +/* Device (host) features set selector - Write Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 + +/* Bitmask of features activated by the driver (guest) + * (32 bits per set) - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 + +/* Activated features set selector - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 + + +#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ + +/* Guest's memory page size in bytes - Write Only */ +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 + +#endif + + +/* Queue selector - Write Only */ +#define VIRTIO_MMIO_QUEUE_SEL 0x030 + +/* Maximum size of the currently selected queue - Read Only */ +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 + +/* Queue size for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_NUM 0x038 + + +#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ + +/* Used Ring alignment for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c + +/* Guest's PFN for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_PFN 0x040 + +#endif + + +/* Ready bit for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_READY 0x044 + +/* Queue notifier - Write Only */ +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 + +/* Interrupt status - Read Only */ +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 + +/* Interrupt acknowledge - Write Only */ +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 + +/* Device status register - Read Write */ +#define VIRTIO_MMIO_STATUS 0x070 + +/* Selected queue's Descriptor Table address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 + +/* Selected queue's Available Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 + +/* Selected queue's Used Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 +#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 + +/* Shared memory region id */ +#define VIRTIO_MMIO_SHM_SEL 0x0ac + +/* Shared memory region length, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_LEN_LOW 0x0b0 +#define VIRTIO_MMIO_SHM_LEN_HIGH 0x0b4 + +/* Shared memory region base address, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_BASE_LOW 0x0b8 +#define VIRTIO_MMIO_SHM_BASE_HIGH 0x0bc + +/* Configuration atomicity value */ +#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc + +/* The config space is defined by each driver as + * the per-driver configuration space - Read Write */ +#define VIRTIO_MMIO_CONFIG 0x100 + + + +/* + * Interrupt flags (re: interrupt status & acknowledge registers) + */ + +#define VIRTIO_MMIO_INT_VRING (1 << 0) +#define VIRTIO_MMIO_INT_CONFIG (1 << 1) + +#endif diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h new file mode 100644 index 000000000..e0a070518 --- /dev/null +++ b/include/standard-headers/linux/virtio_net.h @@ -0,0 +1,358 @@ +#ifndef _LINUX_VIRTIO_NET_H +#define _LINUX_VIRTIO_NET_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/if_ether.h" + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ +#define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */ +#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the + * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ + +#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ +#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ +#define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device + * with the same MAC. + */ +#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ + +#ifndef VIRTIO_NET_NO_LEGACY +#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +#endif /* VIRTIO_NET_NO_LEGACY */ + +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +/* supported/enabled hash types */ +#define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2) +#define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5) +#define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6) +#define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7) +#define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8) + +struct virtio_net_config { + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + uint8_t mac[ETH_ALEN]; + /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ + __virtio16 status; + /* Maximum number of each of transmit and receive queues; + * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. + * Legal values are between 1 and 0x8000 + */ + __virtio16 max_virtqueue_pairs; + /* Default maximum transmit unit advice */ + __virtio16 mtu; + /* + * speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Any other value stands for unknown. + */ + uint32_t speed; + /* + * 0x00 - half duplex + * 0x01 - full duplex + * Any other value stands for unknown. + */ + uint8_t duplex; + /* maximum size of RSS key */ + uint8_t rss_max_key_size; + /* maximum number of indirection table entries */ + uint16_t rss_max_indirection_table_length; + /* bitmask of supported VIRTIO_NET_RSS_HASH_ types */ + uint32_t supported_hash_types; +} QEMU_PACKED; + +/* + * This header comes first in the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + * + * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf, + * only flattened. + */ +struct virtio_net_hdr_v1 { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ + uint8_t gso_type; + __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ + union { + struct { + __virtio16 csum_start; + __virtio16 csum_offset; + }; + /* Checksum calculation */ + struct { + /* Position to start checksumming from */ + __virtio16 start; + /* Offset after that to place checksum */ + __virtio16 offset; + } csum; + /* Receive Segment Coalescing */ + struct { + /* Number of coalesced segments */ + uint16_t segments; + /* Number of duplicated acks */ + uint16_t dup_acks; + } rsc; + }; + __virtio16 num_buffers; /* Number of merged rx buffers */ +}; + +struct virtio_net_hdr_v1_hash { + struct virtio_net_hdr_v1 hdr; + uint32_t hash_value; +#define VIRTIO_NET_HASH_REPORT_NONE 0 +#define VIRTIO_NET_HASH_REPORT_IPv4 1 +#define VIRTIO_NET_HASH_REPORT_TCPv4 2 +#define VIRTIO_NET_HASH_REPORT_UDPv4 3 +#define VIRTIO_NET_HASH_REPORT_IPv6 4 +#define VIRTIO_NET_HASH_REPORT_TCPv6 5 +#define VIRTIO_NET_HASH_REPORT_UDPv6 6 +#define VIRTIO_NET_HASH_REPORT_IPv6_EX 7 +#define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8 +#define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9 + uint16_t hash_report; + uint16_t padding; +}; + +#ifndef VIRTIO_NET_NO_LEGACY +/* This header comes first in the scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must + * be the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. */ +struct virtio_net_hdr { + /* See VIRTIO_NET_HDR_F_* */ + uint8_t flags; + /* See VIRTIO_NET_HDR_GSO_* */ + uint8_t gso_type; + __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ + __virtio16 csum_start; /* Position to start checksumming from */ + __virtio16 csum_offset; /* Offset after that to place checksum */ +}; + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + __virtio16 num_buffers; /* Number of merged rx buffers */ +}; +#endif /* ...VIRTIO_NET_NO_LEGACY */ + +/* + * Control virtqueue data structures + * + * The control virtqueue expects a header in the first sg entry + * and an ack/status response in the last entry. Data for the + * command goes in between. + */ +struct virtio_net_ctrl_hdr { + uint8_t class; + uint8_t cmd; +} QEMU_PACKED; + +typedef uint8_t virtio_net_ctrl_ack; + +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +/* + * Control the RX mode, ie. promisucous, allmulti, etc... + * All commands require an "out" sg entry containing a 1 byte + * state value, zero = disable, non-zero = enable. Commands + * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. + * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. + */ +#define VIRTIO_NET_CTRL_RX 0 + #define VIRTIO_NET_CTRL_RX_PROMISC 0 + #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + #define VIRTIO_NET_CTRL_RX_ALLUNI 2 + #define VIRTIO_NET_CTRL_RX_NOMULTI 3 + #define VIRTIO_NET_CTRL_RX_NOUNI 4 + #define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/* + * Control the MAC + * + * The MAC filter table is managed by the hypervisor, the guest should + * assume the size is infinite. Filtering should be considered + * non-perfect, ie. based on hypervisor resources, the guest may + * received packets from sources not specified in the filter list. + * + * In addition to the class/cmd header, the TABLE_SET command requires + * two out scatterlists. Each contains a 4 byte count of entries followed + * by a concatenated byte stream of the ETH_ALEN MAC addresses. The + * first sg list contains unicast addresses, the second is for multicast. + * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature + * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. + */ +struct virtio_net_ctrl_mac { + __virtio32 entries; + uint8_t macs[][ETH_ALEN]; +} QEMU_PACKED; + +#define VIRTIO_NET_CTRL_MAC 1 + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/* + * Control VLAN filtering + * + * The VLAN filter table is controlled via a simple ADD/DEL interface. + * VLAN IDs not added may be filterd by the hypervisor. Del is the + * opposite of add. Both commands expect an out entry containing a 2 + * byte VLAN ID. VLAN filterting is available with the + * VIRTIO_NET_F_CTRL_VLAN feature bit. + */ +#define VIRTIO_NET_CTRL_VLAN 2 + #define VIRTIO_NET_CTRL_VLAN_ADD 0 + #define VIRTIO_NET_CTRL_VLAN_DEL 1 + +/* + * Control link announce acknowledgement + * + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that + * driver has recevied the notification; device would clear the + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives + * this command. + */ +#define VIRTIO_NET_CTRL_ANNOUNCE 3 + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 + +/* + * Control Receive Flow Steering + */ +#define VIRTIO_NET_CTRL_MQ 4 +/* + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET + * enables Receive Flow Steering, specifying the number of the transmit and + * receive queues that will be used. After the command is consumed and acked by + * the device, the device will not steer new packets on receive virtqueues + * other than specified nor read from transmit virtqueues other than specified. + * Accordingly, driver should not transmit new packets on virtqueues other than + * specified. + */ +struct virtio_net_ctrl_mq { + __virtio16 virtqueue_pairs; +}; + + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +/* + * The command VIRTIO_NET_CTRL_MQ_RSS_CONFIG has the same effect as + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET does and additionally configures + * the receive steering to use a hash calculated for incoming packet + * to decide on receive virtqueue to place the packet. The command + * also provides parameters to calculate a hash and receive virtqueue. + */ +struct virtio_net_rss_config { + uint32_t hash_types; + uint16_t indirection_table_mask; + uint16_t unclassified_queue; + uint16_t indirection_table[1/* + indirection_table_mask */]; + uint16_t max_tx_vq; + uint8_t hash_key_length; + uint8_t hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 + +/* + * The command VIRTIO_NET_CTRL_MQ_HASH_CONFIG requests the device + * to include in the virtio header of the packet the value of the + * calculated hash and the report type of hash. It also provides + * parameters for hash calculation. The command requires feature + * VIRTIO_NET_F_HASH_REPORT to be negotiated to extend the + * layout of virtio header as defined in virtio_net_hdr_v1_hash. + */ +struct virtio_net_hash_config { + uint32_t hash_types; + /* for compatibility with virtio_net_rss_config */ + uint16_t reserved[4]; + uint8_t hash_key_length; + uint8_t hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 + +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + +#endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h new file mode 100644 index 000000000..db7a8e2fc --- /dev/null +++ b/include/standard-headers/linux/virtio_pci.h @@ -0,0 +1,208 @@ +/* + * Virtio PCI driver + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_PCI_H +#define _LINUX_VIRTIO_PCI_H + +#include "standard-headers/linux/types.h" + +#ifndef VIRTIO_PCI_NO_LEGACY + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +/* MSI-X registers: only enabled if MSI-X is enabled. */ +/* A 16-bit vector for configuration changes. */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 +/* A 16-bit vector for selected queue notifications. */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 + +/* The remaining space is defined by each driver as the per-driver + * configuration space */ +#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) +/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) + +/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* VIRTIO_PCI_NO_LEGACY */ + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + +#ifndef VIRTIO_PCI_NO_MODERN + +/* IDs for different capabilities. Must all exist. */ + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR access */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 +/* Additional shared memory capability */ +#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + uint8_t cap_next; /* Generic PCI field: next ptr. */ + uint8_t cap_len; /* Generic PCI field: capability length */ + uint8_t cfg_type; /* Identifies the structure. */ + uint8_t bar; /* Where to find it. */ + uint8_t id; /* Multiple capabilities of the same type */ + uint8_t padding[2]; /* Pad to full dword. */ + uint32_t offset; /* Offset within bar. */ + uint32_t length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_cap64 { + struct virtio_pci_cap cap; + uint32_t offset_hi; /* Most sig 32 bits of offset */ + uint32_t length_hi; /* Most sig 32 bits of length */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + uint32_t device_feature_select; /* read-write */ + uint32_t device_feature; /* read-only */ + uint32_t guest_feature_select; /* read-write */ + uint32_t guest_feature; /* read-write */ + uint16_t msix_config; /* read-write */ + uint16_t num_queues; /* read-only */ + uint8_t device_status; /* read-write */ + uint8_t config_generation; /* read-only */ + + /* About a specific virtqueue. */ + uint16_t queue_select; /* read-write */ + uint16_t queue_size; /* read-write, power of 2. */ + uint16_t queue_msix_vector; /* read-write */ + uint16_t queue_enable; /* read-write */ + uint16_t queue_notify_off; /* read-only */ + uint32_t queue_desc_lo; /* read-write */ + uint32_t queue_desc_hi; /* read-write */ + uint32_t queue_avail_lo; /* read-write */ + uint32_t queue_avail_hi; /* read-write */ + uint32_t queue_used_lo; /* read-write */ + uint32_t queue_used_hi; /* read-write */ +}; + +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + uint8_t pci_cfg_data[4]; /* Data for BAR access. */ +}; + +/* Macro versions of offsets for the Old Timers! */ +#define VIRTIO_PCI_CAP_VNDR 0 +#define VIRTIO_PCI_CAP_NEXT 1 +#define VIRTIO_PCI_CAP_LEN 2 +#define VIRTIO_PCI_CAP_CFG_TYPE 3 +#define VIRTIO_PCI_CAP_BAR 4 +#define VIRTIO_PCI_CAP_OFFSET 8 +#define VIRTIO_PCI_CAP_LENGTH 12 + +#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 + +#define VIRTIO_PCI_COMMON_DFSELECT 0 +#define VIRTIO_PCI_COMMON_DF 4 +#define VIRTIO_PCI_COMMON_GFSELECT 8 +#define VIRTIO_PCI_COMMON_GF 12 +#define VIRTIO_PCI_COMMON_MSIX 16 +#define VIRTIO_PCI_COMMON_NUMQ 18 +#define VIRTIO_PCI_COMMON_STATUS 20 +#define VIRTIO_PCI_COMMON_CFGGENERATION 21 +#define VIRTIO_PCI_COMMON_Q_SELECT 22 +#define VIRTIO_PCI_COMMON_Q_SIZE 24 +#define VIRTIO_PCI_COMMON_Q_MSIX 26 +#define VIRTIO_PCI_COMMON_Q_ENABLE 28 +#define VIRTIO_PCI_COMMON_Q_NOFF 30 +#define VIRTIO_PCI_COMMON_Q_DESCLO 32 +#define VIRTIO_PCI_COMMON_Q_DESCHI 36 +#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 +#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 +#define VIRTIO_PCI_COMMON_Q_USEDLO 48 +#define VIRTIO_PCI_COMMON_Q_USEDHI 52 + +#endif /* VIRTIO_PCI_NO_MODERN */ + +#endif diff --git a/include/standard-headers/linux/virtio_pmem.h b/include/standard-headers/linux/virtio_pmem.h new file mode 100644 index 000000000..fc029de79 --- /dev/null +++ b/include/standard-headers/linux/virtio_pmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ +/* + * Definitions for virtio-pmem devices. + * + * Copyright (C) 2019 Red Hat, Inc. + * + * Author(s): Pankaj Gupta + */ + +#ifndef _LINUX_VIRTIO_PMEM_H +#define _LINUX_VIRTIO_PMEM_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +struct virtio_pmem_config { + uint64_t start; + uint64_t size; +}; + +#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 + +struct virtio_pmem_resp { + /* Host return status corresponding to flush request */ + uint32_t ret; +}; + +struct virtio_pmem_req { + /* command type */ + uint32_t type; +}; + +#endif diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h new file mode 100644 index 000000000..0fa0e1067 --- /dev/null +++ b/include/standard-headers/linux/virtio_ring.h @@ -0,0 +1,242 @@ +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H +/* An interface for efficient virtio implementation, currently for use by KVM, + * but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ +#include +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + __virtio64 addr; + /* Length. */ + __virtio32 len; + /* The flags as indicated above. */ + __virtio16 flags; + /* We chain unused descriptors via this, too */ + __virtio16 next; +}; + +struct vring_avail { + __virtio16 flags; + __virtio16 idx; + __virtio16 ring[]; +}; + +/* uint32_t is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + __virtio32 id; + /* Total length of the descriptor chain which was used (written to) */ + __virtio32 len; +}; + +typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_elem_t; + +struct vring_used { + __virtio16 flags; + __virtio16 idx; + vring_used_elem_t ring[]; +}; + +/* + * The ring element addresses are passed between components with different + * alignments assumptions. Thus, we might need to decrease the compiler-selected + * alignment, and so must use a typedef to make sure the aligned attribute + * actually takes hold: + * + * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes + * + * When used on a struct, or struct member, the aligned attribute can only + * increase the alignment; in order to decrease it, the packed attribute must + * be specified as well. When used as part of a typedef, the aligned attribute + * can both increase and decrease alignment, and specifying the packed + * attribute generates a warning. + */ +typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE))) + vring_desc_t; +typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE))) + vring_avail_t; +typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_t; + +struct vring { + unsigned int num; + + vring_desc_t *desc; + + vring_avail_t *avail; + + vring_used_t *used; +}; + +#ifndef VIRTIO_RING_NO_LEGACY + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __virtio16 avail_flags; + * __virtio16 avail_idx; + * __virtio16 available[num]; + * __virtio16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __virtio16 used_flags; + * __virtio16 used_idx; + * struct vring_used_elem used[num]; + * __virtio16 avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc)); + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + + align-1) & ~(align - 1)); +} + +static inline unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; +} + +#endif /* VIRTIO_RING_NO_LEGACY */ + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + uint16_t off_wrap; + /* Descriptor Ring Change Event Flags. */ + uint16_t flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + uint64_t addr; + /* Buffer Length. */ + uint32_t len; + /* Buffer ID. */ + uint16_t id; + /* The flags depending on descriptor type. */ + uint16_t flags; +}; + +#endif /* _LINUX_VIRTIO_RING_H */ diff --git a/include/standard-headers/linux/virtio_rng.h b/include/standard-headers/linux/virtio_rng.h new file mode 100644 index 000000000..60fc798bd --- /dev/null +++ b/include/standard-headers/linux/virtio_rng.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_VIRTIO_RNG_H +#define _LINUX_VIRTIO_RNG_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +#endif /* _LINUX_VIRTIO_RNG_H */ diff --git a/include/standard-headers/linux/virtio_scsi.h b/include/standard-headers/linux/virtio_scsi.h new file mode 100644 index 000000000..663f36cbb --- /dev/null +++ b/include/standard-headers/linux/virtio_scsi.h @@ -0,0 +1,172 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_SCSI_H +#define _LINUX_VIRTIO_SCSI_H + +#include "standard-headers/linux/virtio_types.h" + +/* Default values of the CDB and sense data size configuration fields */ +#define VIRTIO_SCSI_CDB_DEFAULT_SIZE 32 +#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96 + +#ifndef VIRTIO_SCSI_CDB_SIZE +#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE +#endif +#ifndef VIRTIO_SCSI_SENSE_SIZE +#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE +#endif + +/* SCSI command request, followed by data-out */ +struct virtio_scsi_cmd_req { + uint8_t lun[8]; /* Logical Unit Number */ + __virtio64 tag; /* Command identifier */ + uint8_t task_attr; /* Task attribute */ + uint8_t prio; /* SAM command priority field */ + uint8_t crn; + uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; +} QEMU_PACKED; + +/* SCSI command request, followed by protection information */ +struct virtio_scsi_cmd_req_pi { + uint8_t lun[8]; /* Logical Unit Number */ + __virtio64 tag; /* Command identifier */ + uint8_t task_attr; /* Task attribute */ + uint8_t prio; /* SAM command priority field */ + uint8_t crn; + __virtio32 pi_bytesout; /* DataOUT PI Number of bytes */ + __virtio32 pi_bytesin; /* DataIN PI Number of bytes */ + uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; +} QEMU_PACKED; + +/* Response, followed by sense data and data-in */ +struct virtio_scsi_cmd_resp { + __virtio32 sense_len; /* Sense data length */ + __virtio32 resid; /* Residual bytes in data buffer */ + __virtio16 status_qualifier; /* Status qualifier */ + uint8_t status; /* Command completion status */ + uint8_t response; /* Response values */ + uint8_t sense[VIRTIO_SCSI_SENSE_SIZE]; +} QEMU_PACKED; + +/* Task Management Request */ +struct virtio_scsi_ctrl_tmf_req { + __virtio32 type; + __virtio32 subtype; + uint8_t lun[8]; + __virtio64 tag; +} QEMU_PACKED; + +struct virtio_scsi_ctrl_tmf_resp { + uint8_t response; +} QEMU_PACKED; + +/* Asynchronous notification query/subscription */ +struct virtio_scsi_ctrl_an_req { + __virtio32 type; + uint8_t lun[8]; + __virtio32 event_requested; +} QEMU_PACKED; + +struct virtio_scsi_ctrl_an_resp { + __virtio32 event_actual; + uint8_t response; +} QEMU_PACKED; + +struct virtio_scsi_event { + __virtio32 event; + uint8_t lun[8]; + __virtio32 reason; +} QEMU_PACKED; + +struct virtio_scsi_config { + __virtio32 num_queues; + __virtio32 seg_max; + __virtio32 max_sectors; + __virtio32 cmd_per_lun; + __virtio32 event_info_size; + __virtio32 sense_size; + __virtio32 cdb_size; + __virtio16 max_channel; + __virtio16 max_target; + __virtio32 max_lun; +} QEMU_PACKED; + +/* Feature Bits */ +#define VIRTIO_SCSI_F_INOUT 0 +#define VIRTIO_SCSI_F_HOTPLUG 1 +#define VIRTIO_SCSI_F_CHANGE 2 +#define VIRTIO_SCSI_F_T10_PI 3 + +/* Response codes */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_OVERRUN 1 +#define VIRTIO_SCSI_S_ABORTED 2 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_RESET 4 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 +#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 +#define VIRTIO_SCSI_S_INCORRECT_LUN 12 + +/* Controlq type codes. */ +#define VIRTIO_SCSI_T_TMF 0 +#define VIRTIO_SCSI_T_AN_QUERY 1 +#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 + +/* Valid TMF subtypes. */ +#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 +#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +/* Events. */ +#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 +#define VIRTIO_SCSI_T_NO_EVENT 0 +#define VIRTIO_SCSI_T_TRANSPORT_RESET 1 +#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 +#define VIRTIO_SCSI_T_PARAM_CHANGE 3 + +/* Reasons of transport reset event */ +#define VIRTIO_SCSI_EVT_RESET_HARD 0 +#define VIRTIO_SCSI_EVT_RESET_RESCAN 1 +#define VIRTIO_SCSI_EVT_RESET_REMOVED 2 + +#define VIRTIO_SCSI_S_SIMPLE 0 +#define VIRTIO_SCSI_S_ORDERED 1 +#define VIRTIO_SCSI_S_HEAD 2 +#define VIRTIO_SCSI_S_ACA 3 + + +#endif /* _LINUX_VIRTIO_SCSI_H */ diff --git a/include/standard-headers/linux/virtio_types.h b/include/standard-headers/linux/virtio_types.h new file mode 100644 index 000000000..fd0d3511f --- /dev/null +++ b/include/standard-headers/linux/virtio_types.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_VIRTIO_TYPES_H +#define _LINUX_VIRTIO_TYPES_H +/* Type definitions for virtio implementations. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) 2014 Red Hat, Inc. + * Author: Michael S. Tsirkin + */ +#include "standard-headers/linux/types.h" + +/* + * __virtio{16,32,64} have the following meaning: + * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian + * - __le{16,32,64} for standard-compliant virtio devices + */ + +typedef uint16_t __virtio16; +typedef uint32_t __virtio32; +typedef uint64_t __virtio64; + +#endif /* _LINUX_VIRTIO_TYPES_H */ diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h new file mode 100644 index 000000000..be443211c --- /dev/null +++ b/include/standard-headers/linux/virtio_vsock.h @@ -0,0 +1,94 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _LINUX_VIRTIO_VSOCK_H +#define _LINUX_VIRTIO_VSOCK_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +struct virtio_vsock_config { + uint64_t guest_cid; +} QEMU_PACKED; + +enum virtio_vsock_event_id { + VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, +}; + +struct virtio_vsock_event { + uint32_t id; +} QEMU_PACKED; + +struct virtio_vsock_hdr { + uint64_t src_cid; + uint64_t dst_cid; + uint32_t src_port; + uint32_t dst_port; + uint32_t len; + uint16_t type; /* enum virtio_vsock_type */ + uint16_t op; /* enum virtio_vsock_op */ + uint32_t flags; + uint32_t buf_alloc; + uint32_t fwd_cnt; +} QEMU_PACKED; + +enum virtio_vsock_type { + VIRTIO_VSOCK_TYPE_STREAM = 1, +}; + +enum virtio_vsock_op { + VIRTIO_VSOCK_OP_INVALID = 0, + + /* Connect operations */ + VIRTIO_VSOCK_OP_REQUEST = 1, + VIRTIO_VSOCK_OP_RESPONSE = 2, + VIRTIO_VSOCK_OP_RST = 3, + VIRTIO_VSOCK_OP_SHUTDOWN = 4, + + /* To send payload */ + VIRTIO_VSOCK_OP_RW = 5, + + /* Tell the peer our credit info */ + VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, + /* Request the peer to send the credit info to us */ + VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, +}; + +/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ +enum virtio_vsock_shutdown { + VIRTIO_VSOCK_SHUTDOWN_RCV = 1, + VIRTIO_VSOCK_SHUTDOWN_SEND = 2, +}; + +#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h new file mode 100644 index 000000000..0989426a3 --- /dev/null +++ b/include/standard-headers/rdma/vmw_pvrdma-abi.h @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __VMW_PVRDMA_ABI_H__ +#define __VMW_PVRDMA_ABI_H__ + +#include "standard-headers/linux/types.h" + +#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */ +#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */ +#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */ +#define PVRDMA_UAR_QP_SEND (1 << 30) /* Send bit. */ +#define PVRDMA_UAR_QP_RECV (1 << 31) /* Recv bit. */ +#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */ +#define PVRDMA_UAR_CQ_ARM_SOL (1 << 29) /* Arm solicited bit. */ +#define PVRDMA_UAR_CQ_ARM (1 << 30) /* Arm bit. */ +#define PVRDMA_UAR_CQ_POLL (1 << 31) /* Poll bit. */ +#define PVRDMA_UAR_SRQ_OFFSET 8 /* SRQ doorbell. */ +#define PVRDMA_UAR_SRQ_RECV (1 << 30) /* Recv bit. */ + +enum pvrdma_wr_opcode { + PVRDMA_WR_RDMA_WRITE, + PVRDMA_WR_RDMA_WRITE_WITH_IMM, + PVRDMA_WR_SEND, + PVRDMA_WR_SEND_WITH_IMM, + PVRDMA_WR_RDMA_READ, + PVRDMA_WR_ATOMIC_CMP_AND_SWP, + PVRDMA_WR_ATOMIC_FETCH_AND_ADD, + PVRDMA_WR_LSO, + PVRDMA_WR_SEND_WITH_INV, + PVRDMA_WR_RDMA_READ_WITH_INV, + PVRDMA_WR_LOCAL_INV, + PVRDMA_WR_FAST_REG_MR, + PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP, + PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD, + PVRDMA_WR_BIND_MW, + PVRDMA_WR_REG_SIG_MR, + PVRDMA_WR_ERROR, +}; + +enum pvrdma_wc_status { + PVRDMA_WC_SUCCESS, + PVRDMA_WC_LOC_LEN_ERR, + PVRDMA_WC_LOC_QP_OP_ERR, + PVRDMA_WC_LOC_EEC_OP_ERR, + PVRDMA_WC_LOC_PROT_ERR, + PVRDMA_WC_WR_FLUSH_ERR, + PVRDMA_WC_MW_BIND_ERR, + PVRDMA_WC_BAD_RESP_ERR, + PVRDMA_WC_LOC_ACCESS_ERR, + PVRDMA_WC_REM_INV_REQ_ERR, + PVRDMA_WC_REM_ACCESS_ERR, + PVRDMA_WC_REM_OP_ERR, + PVRDMA_WC_RETRY_EXC_ERR, + PVRDMA_WC_RNR_RETRY_EXC_ERR, + PVRDMA_WC_LOC_RDD_VIOL_ERR, + PVRDMA_WC_REM_INV_RD_REQ_ERR, + PVRDMA_WC_REM_ABORT_ERR, + PVRDMA_WC_INV_EECN_ERR, + PVRDMA_WC_INV_EEC_STATE_ERR, + PVRDMA_WC_FATAL_ERR, + PVRDMA_WC_RESP_TIMEOUT_ERR, + PVRDMA_WC_GENERAL_ERR, +}; + +enum pvrdma_wc_opcode { + PVRDMA_WC_SEND, + PVRDMA_WC_RDMA_WRITE, + PVRDMA_WC_RDMA_READ, + PVRDMA_WC_COMP_SWAP, + PVRDMA_WC_FETCH_ADD, + PVRDMA_WC_BIND_MW, + PVRDMA_WC_LSO, + PVRDMA_WC_LOCAL_INV, + PVRDMA_WC_FAST_REG_MR, + PVRDMA_WC_MASKED_COMP_SWAP, + PVRDMA_WC_MASKED_FETCH_ADD, + PVRDMA_WC_RECV = 1 << 7, + PVRDMA_WC_RECV_RDMA_WITH_IMM, +}; + +enum pvrdma_wc_flags { + PVRDMA_WC_GRH = 1 << 0, + PVRDMA_WC_WITH_IMM = 1 << 1, + PVRDMA_WC_WITH_INVALIDATE = 1 << 2, + PVRDMA_WC_IP_CSUM_OK = 1 << 3, + PVRDMA_WC_WITH_SMAC = 1 << 4, + PVRDMA_WC_WITH_VLAN = 1 << 5, + PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6, + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, +}; + +struct pvrdma_alloc_ucontext_resp { + uint32_t qp_tab_size; + uint32_t reserved; +}; + +struct pvrdma_alloc_pd_resp { + uint32_t pdn; + uint32_t reserved; +}; + +struct pvrdma_create_cq { + uint64_t __attribute__((aligned(8))) buf_addr; + uint32_t buf_size; + uint32_t reserved; +}; + +struct pvrdma_create_cq_resp { + uint32_t cqn; + uint32_t reserved; +}; + +struct pvrdma_resize_cq { + uint64_t __attribute__((aligned(8))) buf_addr; + uint32_t buf_size; + uint32_t reserved; +}; + +struct pvrdma_create_srq { + uint64_t __attribute__((aligned(8))) buf_addr; + uint32_t buf_size; + uint32_t reserved; +}; + +struct pvrdma_create_srq_resp { + uint32_t srqn; + uint32_t reserved; +}; + +struct pvrdma_create_qp { + uint64_t __attribute__((aligned(8))) rbuf_addr; + uint64_t __attribute__((aligned(8))) sbuf_addr; + uint32_t rbuf_size; + uint32_t sbuf_size; + uint64_t __attribute__((aligned(8))) qp_addr; +}; + +struct pvrdma_create_qp_resp { + uint32_t qpn; + uint32_t qp_handle; +}; + +/* PVRDMA masked atomic compare and swap */ +struct pvrdma_ex_cmp_swap { + uint64_t __attribute__((aligned(8))) swap_val; + uint64_t __attribute__((aligned(8))) compare_val; + uint64_t __attribute__((aligned(8))) swap_mask; + uint64_t __attribute__((aligned(8))) compare_mask; +}; + +/* PVRDMA masked atomic fetch and add */ +struct pvrdma_ex_fetch_add { + uint64_t __attribute__((aligned(8))) add_val; + uint64_t __attribute__((aligned(8))) field_boundary; +}; + +/* PVRDMA address vector. */ +struct pvrdma_av { + uint32_t port_pd; + uint32_t sl_tclass_flowlabel; + uint8_t dgid[16]; + uint8_t src_path_bits; + uint8_t gid_index; + uint8_t stat_rate; + uint8_t hop_limit; + uint8_t dmac[6]; + uint8_t reserved[6]; +}; + +/* PVRDMA scatter/gather entry */ +struct pvrdma_sge { + uint64_t __attribute__((aligned(8))) addr; + uint32_t length; + uint32_t lkey; +}; + +/* PVRDMA receive queue work request */ +struct pvrdma_rq_wqe_hdr { + uint64_t __attribute__((aligned(8))) wr_id; /* wr id */ + uint32_t num_sge; /* size of s/g array */ + uint32_t total_len; /* reserved */ +}; +/* Use pvrdma_sge (ib_sge) for receive queue s/g array elements. */ + +/* PVRDMA send queue work request */ +struct pvrdma_sq_wqe_hdr { + uint64_t __attribute__((aligned(8))) wr_id; /* wr id */ + uint32_t num_sge; /* size of s/g array */ + uint32_t total_len; /* reserved */ + uint32_t opcode; /* operation type */ + uint32_t send_flags; /* wr flags */ + union { + uint32_t imm_data; + uint32_t invalidate_rkey; + } ex; + uint32_t reserved; + union { + struct { + uint64_t __attribute__((aligned(8))) remote_addr; + uint32_t rkey; + uint8_t reserved[4]; + } rdma; + struct { + uint64_t __attribute__((aligned(8))) remote_addr; + uint64_t __attribute__((aligned(8))) compare_add; + uint64_t __attribute__((aligned(8))) swap; + uint32_t rkey; + uint32_t reserved; + } atomic; + struct { + uint64_t __attribute__((aligned(8))) remote_addr; + uint32_t log_arg_sz; + uint32_t rkey; + union { + struct pvrdma_ex_cmp_swap cmp_swap; + struct pvrdma_ex_fetch_add fetch_add; + } wr_data; + } masked_atomics; + struct { + uint64_t __attribute__((aligned(8))) iova_start; + uint64_t __attribute__((aligned(8))) pl_pdir_dma; + uint32_t page_shift; + uint32_t page_list_len; + uint32_t length; + uint32_t access_flags; + uint32_t rkey; + uint32_t reserved; + } fast_reg; + struct { + uint32_t remote_qpn; + uint32_t remote_qkey; + struct pvrdma_av av; + } ud; + } wr; +}; +/* Use pvrdma_sge (ib_sge) for send queue s/g array elements. */ + +/* Completion queue element. */ +struct pvrdma_cqe { + uint64_t __attribute__((aligned(8))) wr_id; + uint64_t __attribute__((aligned(8))) qp; + uint32_t opcode; + uint32_t status; + uint32_t byte_len; + uint32_t imm_data; + uint32_t src_qp; + uint32_t wc_flags; + uint32_t vendor_err; + uint16_t pkey_index; + uint16_t slid; + uint8_t sl; + uint8_t dlid_path_bits; + uint8_t port_num; + uint8_t smac[6]; + uint8_t network_hdr_type; + uint8_t reserved2[6]; /* Pad to next power of 2 (64). */ +}; + +#endif /* __VMW_PVRDMA_ABI_H__ */ diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h new file mode 100644 index 000000000..e08b8ab8f --- /dev/null +++ b/include/sysemu/accel.h @@ -0,0 +1,77 @@ +/* QEMU accelerator interfaces + * + * Copyright (c) 2014 Red Hat Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef HW_ACCEL_H +#define HW_ACCEL_H + +#include "qom/object.h" +#include "exec/hwaddr.h" + +typedef struct AccelState { + /*< private >*/ + Object parent_obj; +} AccelState; + +typedef struct AccelClass { + /*< private >*/ + ObjectClass parent_class; + /*< public >*/ + + const char *name; +#ifndef CONFIG_USER_ONLY + int (*init_machine)(MachineState *ms); + void (*setup_post)(MachineState *ms, AccelState *accel); + bool (*has_memory)(MachineState *ms, AddressSpace *as, + hwaddr start_addr, hwaddr size); +#endif + bool *allowed; + /* + * Array of global properties that would be applied when specific + * accelerator is chosen. It works like MachineClass.compat_props + * but it's for accelerators not machines. Accelerator-provided + * global properties may be overridden by machine-type + * compat_props or user-provided global properties. + */ + GPtrArray *compat_props; +} AccelClass; + +#define TYPE_ACCEL "accel" + +#define ACCEL_CLASS_SUFFIX "-" TYPE_ACCEL +#define ACCEL_CLASS_NAME(a) (a ACCEL_CLASS_SUFFIX) + +#define ACCEL_CLASS(klass) \ + OBJECT_CLASS_CHECK(AccelClass, (klass), TYPE_ACCEL) +#define ACCEL(obj) \ + OBJECT_CHECK(AccelState, (obj), TYPE_ACCEL) +#define ACCEL_GET_CLASS(obj) \ + OBJECT_GET_CLASS(AccelClass, (obj), TYPE_ACCEL) + +AccelClass *accel_find(const char *opt_name); +int accel_init_machine(AccelState *accel, MachineState *ms); + +/* Called just before os_setup_post (ie just before drop OS privs) */ +void accel_setup_post(MachineState *ms); + +AccelState *current_accel(void); + +#endif diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h new file mode 100644 index 000000000..54f069d49 --- /dev/null +++ b/include/sysemu/arch_init.h @@ -0,0 +1,38 @@ +#ifndef QEMU_ARCH_INIT_H +#define QEMU_ARCH_INIT_H + + +enum { + QEMU_ARCH_ALL = -1, + QEMU_ARCH_ALPHA = (1 << 0), + QEMU_ARCH_ARM = (1 << 1), + QEMU_ARCH_CRIS = (1 << 2), + QEMU_ARCH_I386 = (1 << 3), + QEMU_ARCH_M68K = (1 << 4), + QEMU_ARCH_LM32 = (1 << 5), + QEMU_ARCH_MICROBLAZE = (1 << 6), + QEMU_ARCH_MIPS = (1 << 7), + QEMU_ARCH_PPC = (1 << 8), + QEMU_ARCH_S390X = (1 << 9), + QEMU_ARCH_SH4 = (1 << 10), + QEMU_ARCH_SPARC = (1 << 11), + QEMU_ARCH_XTENSA = (1 << 12), + QEMU_ARCH_OPENRISC = (1 << 13), + QEMU_ARCH_UNICORE32 = (1 << 14), + QEMU_ARCH_MOXIE = (1 << 15), + QEMU_ARCH_TRICORE = (1 << 16), + QEMU_ARCH_NIOS2 = (1 << 17), + QEMU_ARCH_HPPA = (1 << 18), + QEMU_ARCH_RISCV = (1 << 19), + QEMU_ARCH_RX = (1 << 20), + QEMU_ARCH_AVR = (1 << 21), + + QEMU_ARCH_NONE = (1 << 31), +}; + +extern const uint32_t arch_type; + +int kvm_available(void); +int xen_available(void); + +#endif diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h new file mode 100644 index 000000000..867687b73 --- /dev/null +++ b/include/sysemu/balloon.h @@ -0,0 +1,27 @@ +/* + * Balloon + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_BALLOON_H +#define QEMU_BALLOON_H + +#include "exec/cpu-common.h" +#include "qapi/qapi-types-machine.h" + +typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target); +typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info); + +int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, + QEMUBalloonStatus *stat_func, void *opaque); +void qemu_remove_balloon_handler(void *opaque); + +#endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h new file mode 100644 index 000000000..880e90329 --- /dev/null +++ b/include/sysemu/block-backend.h @@ -0,0 +1,272 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_H +#define BLOCK_BACKEND_H + +#include "qemu/iov.h" +#include "block/throttle-groups.h" + +/* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState + * API, which we don't want used by many BlockBackend users. Some of + * the types belong here, and the rest should be split into a common + * header and one for the BlockDriverState API. + */ +#include "block/block.h" + +/* Callbacks for block device models */ +typedef struct BlockDevOps { + /* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ + void (*change_media_cb)(void *opaque, bool load, Error **errp); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); + /* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ + bool (*is_tray_open)(void *opaque); + /* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ + bool (*is_medium_locked)(void *opaque); + /* + * Runs when the size changed (e.g. monitor command block_resize) + */ + void (*resize_cb)(void *opaque); + /* + * Runs when the backend receives a drain request. + */ + void (*drained_begin)(void *opaque); + /* + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); +} BlockDevOps; + +/* This struct is embedded in (the private) BlockBackend struct and contains + * fields that must be public. This is in particular for QLIST_ENTRY() and + * friends so that BlockBackends can be kept in lists outside block-backend.c + * */ +typedef struct BlockBackendPublic { + ThrottleGroupMember throttle_group_member; +} BlockBackendPublic; + +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); +BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp); +BlockBackend *blk_new_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +int blk_get_refcnt(BlockBackend *blk); +void blk_ref(BlockBackend *blk); +void blk_unref(BlockBackend *blk); +void blk_remove_all_bs(void); +const char *blk_name(const BlockBackend *blk); +BlockBackend *blk_by_name(const char *name); +BlockBackend *blk_next(BlockBackend *blk); +BlockBackend *blk_all_next(BlockBackend *blk); +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); +void monitor_remove_blk(BlockBackend *blk); + +BlockBackendPublic *blk_get_public(BlockBackend *blk); +BlockBackend *blk_by_public(BlockBackendPublic *public); + +BlockDriverState *blk_bs(BlockBackend *blk); +void blk_remove_bs(BlockBackend *blk); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); +bool bdrv_has_blk(BlockDriverState *bs); +bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); + +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); +void blk_iostatus_enable(BlockBackend *blk); +bool blk_iostatus_is_enabled(const BlockBackend *blk); +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); +void blk_iostatus_disable(BlockBackend *blk); +void blk_iostatus_reset(BlockBackend *blk); +void blk_iostatus_set_err(BlockBackend *blk, int error); +int blk_attach_dev(BlockBackend *blk, DeviceState *dev); +void blk_detach_dev(BlockBackend *blk, DeviceState *dev); +DeviceState *blk_get_attached_dev(BlockBackend *blk); +char *blk_get_attached_dev_id(BlockBackend *blk); +BlockBackend *blk_by_dev(void *dev); +BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + +static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, + unsigned int bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + return blk_co_preadv(blk, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, + unsigned int bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + return blk_co_pwritev(blk, offset, bytes, &qiov, flags); +} + +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int bytes, BdrvRequestFlags flags); +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int bytes, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, + BdrvRequestFlags flags); +int64_t blk_getlength(BlockBackend *blk); +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); +int64_t blk_nb_sectors(BlockBackend *blk); +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); +void blk_aio_cancel(BlockAIOCB *acb); +void blk_aio_cancel_async(BlockAIOCB *acb); +int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); +BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); +int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes); +int blk_co_flush(BlockBackend *blk); +int blk_flush(BlockBackend *blk); +int blk_commit_all(void); +void blk_inc_in_flight(BlockBackend *blk); +void blk_dec_in_flight(BlockBackend *blk); +void blk_drain(BlockBackend *blk); +void blk_drain_all(void); +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + int error); +void blk_error_action(BlockBackend *blk, BlockErrorAction action, + bool is_read, int error); +bool blk_supports_write_perm(BlockBackend *blk); +bool blk_is_writable(BlockBackend *blk); +bool blk_is_sg(BlockBackend *blk); +bool blk_enable_write_cache(BlockBackend *blk); +void blk_set_enable_write_cache(BlockBackend *blk, bool wce); +void blk_invalidate_cache(BlockBackend *blk, Error **errp); +bool blk_is_inserted(BlockBackend *blk); +bool blk_is_available(BlockBackend *blk); +void blk_lock_medium(BlockBackend *blk, bool locked); +void blk_eject(BlockBackend *blk, bool eject_flag); +int blk_get_flags(BlockBackend *blk); +uint32_t blk_get_request_alignment(BlockBackend *blk); +uint32_t blk_get_max_transfer(BlockBackend *blk); +int blk_get_max_iov(BlockBackend *blk); +void blk_set_guest_block_size(BlockBackend *blk, int align); +void *blk_try_blockalign(BlockBackend *blk, size_t size); +void *blk_blockalign(BlockBackend *blk, size_t size); +bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); +void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); +void blk_op_block_all(BlockBackend *blk, Error *reason); +void blk_op_unblock_all(BlockBackend *blk, Error *reason); +AioContext *blk_get_aio_context(BlockBackend *blk); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); +void blk_add_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); +void blk_remove_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque); +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); +void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify); +void blk_io_plug(BlockBackend *blk); +void blk_io_unplug(BlockBackend *blk); +BlockAcctStats *blk_get_stats(BlockBackend *blk); +BlockBackendRootState *blk_get_root_state(BlockBackend *blk); +void blk_update_root_state(BlockBackend *blk); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); +int blk_get_open_flags_from_root_state(BlockBackend *blk); + +void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int bytes, BdrvRequestFlags flags); +int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + int bytes); +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); +int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); + +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); +void blk_io_limits_disable(BlockBackend *blk); +void blk_io_limits_enable(BlockBackend *blk, const char *group); +void blk_io_limits_update_group(BlockBackend *blk, const char *group); +void blk_set_force_allow_inactivate(BlockBackend *blk); + +void blk_register_buf(BlockBackend *blk, void *host, size_t size); +void blk_unregister_buf(BlockBackend *blk, void *host); + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +const BdrvChild *blk_root(BlockBackend *blk); + +int blk_make_empty(BlockBackend *blk, Error **errp); + +#endif diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h new file mode 100644 index 000000000..3b5fcda08 --- /dev/null +++ b/include/sysemu/blockdev.h @@ -0,0 +1,62 @@ +/* + * QEMU host block devices + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef BLOCKDEV_H +#define BLOCKDEV_H + +#include "block/block.h" +#include "qemu/queue.h" + +void blockdev_mark_auto_del(BlockBackend *blk); +void blockdev_auto_del(BlockBackend *blk); + +typedef enum { + IF_DEFAULT = -1, /* for use with drive_add() only */ + /* + * IF_NONE must be zero, because we want MachineClass member + * block_default_type to default-initialize to IF_NONE + */ + IF_NONE = 0, + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, + IF_COUNT +} BlockInterfaceType; + +struct DriveInfo { + BlockInterfaceType type; + int bus; + int unit; + int auto_del; /* see blockdev_mark_auto_del() */ + bool is_default; /* Added by default_drive() ? */ + int media_cd; + QemuOpts *opts; + bool claimed_by_board; + QTAILQ_ENTRY(DriveInfo) next; +}; + +DriveInfo *blk_legacy_dinfo(BlockBackend *blk); +DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo); +BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); + +void override_max_devs(BlockInterfaceType type, int max_devs); + +DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit); +void drive_mark_claimed_by_board(void); +void drive_check_orphaned(void); +DriveInfo *drive_get_by_index(BlockInterfaceType type, int index); +int drive_get_max_bus(BlockInterfaceType type); +int drive_get_max_devs(BlockInterfaceType type); +DriveInfo *drive_get_next(BlockInterfaceType type); + +QemuOpts *drive_def(const char *optstr); +QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, + const char *optstr); +DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type, + Error **errp); + +#endif diff --git a/include/sysemu/cpu-throttle.h b/include/sysemu/cpu-throttle.h new file mode 100644 index 000000000..d65bdef6d --- /dev/null +++ b/include/sysemu/cpu-throttle.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ + +#ifndef SYSEMU_CPU_THROTTLE_H +#define SYSEMU_CPU_THROTTLE_H + +#include "qemu/timer.h" + +/** + * cpu_throttle_init: + * + * Initialize the CPU throttling API. + */ +void cpu_throttle_init(void); + +/** + * cpu_throttle_set: + * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99. + * + * Throttles all vcpus by forcing them to sleep for the given percentage of + * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly. + * (example: 10ms sleep for every 30ms awake). + * + * cpu_throttle_set can be called as needed to adjust new_throttle_pct. + * Once the throttling starts, it will remain in effect until cpu_throttle_stop + * is called. + */ +void cpu_throttle_set(int new_throttle_pct); + +/** + * cpu_throttle_stop: + * + * Stops the vcpu throttling started by cpu_throttle_set. + */ +void cpu_throttle_stop(void); + +/** + * cpu_throttle_active: + * + * Returns: %true if the vcpus are currently being throttled, %false otherwise. + */ +bool cpu_throttle_active(void); + +/** + * cpu_throttle_get_percentage: + * + * Returns the vcpu throttle percentage. See cpu_throttle_set for details. + * + * Returns: The throttle percentage in range 1 to 99. + */ +int cpu_throttle_get_percentage(void); + +#endif /* SYSEMU_CPU_THROTTLE_H */ diff --git a/include/sysemu/cpu-timers.h b/include/sysemu/cpu-timers.h new file mode 100644 index 000000000..ed6ee5c46 --- /dev/null +++ b/include/sysemu/cpu-timers.h @@ -0,0 +1,90 @@ +/* + * CPU timers state API + * + * Copyright 2020 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef SYSEMU_CPU_TIMERS_H +#define SYSEMU_CPU_TIMERS_H + +#include "qemu/timer.h" + +/* init the whole cpu timers API, including icount, ticks, and cpu_throttle */ +void cpu_timers_init(void); + +/* icount - Instruction Counter API */ + +/* + * icount enablement state: + * + * 0 = Disabled - Do not count executed instructions. + * 1 = Enabled - Fixed conversion of insn to ns via "shift" option + * 2 = Enabled - Runtime adaptive algorithm to compute shift + */ +#ifdef CONFIG_TCG +extern int use_icount; +#define icount_enabled() (use_icount) +#else +#define icount_enabled() 0 +#endif + +/* + * Update the icount with the executed instructions. Called by + * cpus-tcg vCPU thread so the main-loop can see time has moved forward. + */ +void icount_update(CPUState *cpu); + +/* get raw icount value */ +int64_t icount_get_raw(void); + +/* return the virtual CPU time in ns, based on the instruction counter. */ +int64_t icount_get(void); +/* + * convert an instruction counter value to ns, based on the icount shift. + * This shift is set as a fixed value with the icount "shift" option + * (precise mode), or it is constantly approximated and corrected at + * runtime in adaptive mode. + */ +int64_t icount_to_ns(int64_t icount); + +/* configure the icount options, including "shift" */ +void icount_configure(QemuOpts *opts, Error **errp); + +/* used by tcg vcpu thread to calc icount budget */ +int64_t icount_round(int64_t count); + +/* if the CPUs are idle, start accounting real time to virtual clock. */ +void icount_start_warp_timer(void); +void icount_account_warp_timer(void); + +/* + * CPU Ticks and Clock + */ + +/* Caller must hold BQL */ +void cpu_enable_ticks(void); +/* Caller must hold BQL */ +void cpu_disable_ticks(void); + +/* + * return the time elapsed in VM between vm_start and vm_stop. + * cpu_get_ticks() uses units of the host CPU cycle counter. + */ +int64_t cpu_get_ticks(void); + +/* + * Returns the monotonic time elapsed in VM, i.e., + * the time between vm_start and vm_stop + */ +int64_t cpu_get_clock(void); + +void qemu_timer_notify_cb(void *opaque, QEMUClockType type); + +/* get the VIRTUAL clock and VM elapsed ticks via the cpus accel interface */ +int64_t cpus_get_virtual_clock(void); +int64_t cpus_get_elapsed_ticks(void); + +#endif /* SYSEMU_CPU_TIMERS_H */ diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h new file mode 100644 index 000000000..e8156728c --- /dev/null +++ b/include/sysemu/cpus.h @@ -0,0 +1,74 @@ +#ifndef QEMU_CPUS_H +#define QEMU_CPUS_H + +#include "qemu/timer.h" + +/* cpus.c */ + +/* CPU execution threads */ + +typedef struct CpusAccel { + void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY */ + void (*kick_vcpu_thread)(CPUState *cpu); + + void (*synchronize_post_reset)(CPUState *cpu); + void (*synchronize_post_init)(CPUState *cpu); + void (*synchronize_state)(CPUState *cpu); + void (*synchronize_pre_loadvm)(CPUState *cpu); + + void (*handle_interrupt)(CPUState *cpu, int mask); + + int64_t (*get_virtual_clock)(void); + int64_t (*get_elapsed_ticks)(void); +} CpusAccel; + +/* register accel-specific cpus interface implementation */ +void cpus_register_accel(const CpusAccel *i); + +/* Create a dummy vcpu for CpusAccel->create_vcpu_thread */ +void dummy_start_vcpu_thread(CPUState *); + +/* interface available for cpus accelerator threads */ + +/* For temporary buffers for forming a name */ +#define VCPU_THREAD_NAME_SIZE 16 + +void cpus_kick_thread(CPUState *cpu); +bool cpu_work_list_empty(CPUState *cpu); +bool cpu_thread_is_idle(CPUState *cpu); +bool all_cpu_threads_idle(void); +bool cpu_can_run(CPUState *cpu); +void qemu_wait_io_event_common(CPUState *cpu); +void qemu_wait_io_event(CPUState *cpu); +void cpu_thread_signal_created(CPUState *cpu); +void cpu_thread_signal_destroyed(CPUState *cpu); +void cpu_handle_guest_debug(CPUState *cpu); + +/* end interface for cpus accelerator threads */ + +bool qemu_in_vcpu_thread(void); +void qemu_init_cpu_loop(void); +void resume_all_vcpus(void); +void pause_all_vcpus(void); +void cpu_stop_current(void); + +extern int icount_align_option; + +/* Unblock cpu */ +void qemu_cpu_kick_self(void); + +void cpu_synchronize_all_states(void); +void cpu_synchronize_all_post_reset(void); +void cpu_synchronize_all_post_init(void); +void cpu_synchronize_all_pre_loadvm(void); + +#ifndef CONFIG_USER_ONLY +/* vl.c */ +/* *-user doesn't have configurable SMP topology */ +extern int smp_cores; +extern int smp_threads; +#endif + +void list_cpus(const char *optarg); + +#endif diff --git a/include/sysemu/cryptodev-vhost-user.h b/include/sysemu/cryptodev-vhost-user.h new file mode 100644 index 000000000..60710502c --- /dev/null +++ b/include/sysemu/cryptodev-vhost-user.h @@ -0,0 +1,50 @@ +/* + * QEMU Crypto Device Common Vhost User Implement + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#ifndef CRYPTODEV_VHOST_USER_H +#define CRYPTODEV_VHOST_USER_H + +#include "sysemu/cryptodev-vhost.h" + +#define VHOST_USER_MAX_AUTH_KEY_LEN 512 +#define VHOST_USER_MAX_CIPHER_KEY_LEN 64 + + +/** + * cryptodev_vhost_user_get_vhost: + * @cc: the client object for each queue + * @b: the cryptodev backend common vhost object + * @queue: the queue index + * + * Gets a new cryptodev backend common vhost object based on + * @b and @queue + * + * Returns: the cryptodev backend common vhost object + */ +CryptoDevBackendVhost * +cryptodev_vhost_user_get_vhost( + CryptoDevBackendClient *cc, + CryptoDevBackend *b, + uint16_t queue); + +#endif /* CRYPTODEV_VHOST_USER_H */ diff --git a/include/sysemu/cryptodev-vhost.h b/include/sysemu/cryptodev-vhost.h new file mode 100644 index 000000000..e8cab1356 --- /dev/null +++ b/include/sysemu/cryptodev-vhost.h @@ -0,0 +1,153 @@ +/* + * QEMU Crypto Device Common Vhost Implement + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei + * Jay Zhou + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ +#ifndef CRYPTODEV_VHOST_H +#define CRYPTODEV_VHOST_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "chardev/char.h" + +#include "sysemu/cryptodev.h" + + +typedef struct CryptoDevBackendVhostOptions { + VhostBackendType backend_type; + void *opaque; + int total_queues; + CryptoDevBackendClient *cc; +} CryptoDevBackendVhostOptions; + +typedef struct CryptoDevBackendVhost { + struct vhost_dev dev; + struct vhost_virtqueue vqs[1]; + int backend; + CryptoDevBackendClient *cc; +} CryptoDevBackendVhost; + +/** + * cryptodev_vhost_get_max_queues: + * @crypto: the cryptodev backend common vhost object + * + * Get the maximum queue number of @crypto. + * + * + * Returns: the maximum queue number + */ +uint64_t +cryptodev_vhost_get_max_queues( + CryptoDevBackendVhost *crypto); + + +/** + * cryptodev_vhost_init: + * @options: the common vhost object's option + * + * Creates a new cryptodev backend common vhost object + * + ** The returned object must be released with + * cryptodev_vhost_cleanup() when no + * longer required + * + * Returns: the cryptodev backend common vhost object + */ +struct CryptoDevBackendVhost * +cryptodev_vhost_init( + CryptoDevBackendVhostOptions *options); + +/** + * cryptodev_vhost_cleanup: + * @crypto: the cryptodev backend common vhost object + * + * Clean the resouce associated with @crypto that realizaed + * by cryptodev_vhost_init() + * + */ +void cryptodev_vhost_cleanup( + CryptoDevBackendVhost *crypto); + +/** + * cryptodev_get_vhost: + * @cc: the client object for each queue + * @b: the cryptodev backend common vhost object + * @queue: the cryptodev backend queue index + * + * Gets a new cryptodev backend common vhost object based on + * @b and @queue + * + * Returns: the cryptodev backend common vhost object + */ +CryptoDevBackendVhost * +cryptodev_get_vhost(CryptoDevBackendClient *cc, + CryptoDevBackend *b, + uint16_t queue); +/** + * cryptodev_vhost_start: + * @dev: the virtio crypto object + * @total_queues: the total count of queue + * + * Starts the vhost crypto logic + * + * Returns: 0 for success, negative for errors + */ +int cryptodev_vhost_start(VirtIODevice *dev, int total_queues); + +/** + * cryptodev_vhost_stop: + * @dev: the virtio crypto object + * @total_queues: the total count of queue + * + * Stops the vhost crypto logic + * + */ +void cryptodev_vhost_stop(VirtIODevice *dev, int total_queues); + +/** + * cryptodev_vhost_virtqueue_mask: + * @dev: the virtio crypto object + * @queue: the cryptodev backend queue index + * @idx: the virtqueue index + * @mask: mask or not (true or false) + * + * Mask/unmask events for @idx virtqueue on @dev device + * + */ +void cryptodev_vhost_virtqueue_mask(VirtIODevice *dev, + int queue, + int idx, bool mask); + +/** + * cryptodev_vhost_virtqueue_pending: + * @dev: the virtio crypto object + * @queue: the cryptodev backend queue index + * @idx: the virtqueue index + * + * Test and clear event pending status for @idx virtqueue on @dev device. + * Should be called after unmask to avoid losing events. + * + * Returns: true for success, false for errors + */ +bool cryptodev_vhost_virtqueue_pending(VirtIODevice *dev, + int queue, int idx); + +#endif /* CRYPTODEV_VHOST_H */ diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h new file mode 100644 index 000000000..f4d4057d4 --- /dev/null +++ b/include/sysemu/cryptodev.h @@ -0,0 +1,340 @@ +/* + * QEMU Crypto Device Implementation + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ +#ifndef CRYPTODEV_H +#define CRYPTODEV_H + +#include "qemu/queue.h" +#include "qom/object.h" + +/** + * CryptoDevBackend: + * + * The CryptoDevBackend object is an interface + * for different cryptodev backends, which provides crypto + * operation wrapper. + * + */ + +#define TYPE_CRYPTODEV_BACKEND "cryptodev-backend" + +OBJECT_DECLARE_TYPE(CryptoDevBackend, CryptoDevBackendClass, + CRYPTODEV_BACKEND) + + +#define MAX_CRYPTO_QUEUE_NUM 64 + +typedef struct CryptoDevBackendConf CryptoDevBackendConf; +typedef struct CryptoDevBackendPeers CryptoDevBackendPeers; +typedef struct CryptoDevBackendClient + CryptoDevBackendClient; + +enum CryptoDevBackendAlgType { + CRYPTODEV_BACKEND_ALG_SYM, + CRYPTODEV_BACKEND_ALG__MAX, +}; + +/** + * CryptoDevBackendSymSessionInfo: + * + * @op_code: operation code (refer to virtio_crypto.h) + * @cipher_alg: algorithm type of CIPHER + * @key_len: byte length of cipher key + * @hash_alg: algorithm type of HASH/MAC + * @hash_result_len: byte length of HASH operation result + * @auth_key_len: byte length of authenticated key + * @add_len: byte length of additional authenticated data + * @op_type: operation type (refer to virtio_crypto.h) + * @direction: encryption or direction for CIPHER + * @hash_mode: HASH mode for HASH operation (refer to virtio_crypto.h) + * @alg_chain_order: order of algorithm chaining (CIPHER then HASH, + * or HASH then CIPHER) + * @cipher_key: point to a key of CIPHER + * @auth_key: point to an authenticated key of MAC + * + */ +typedef struct CryptoDevBackendSymSessionInfo { + /* corresponding with virtio crypto spec */ + uint32_t op_code; + uint32_t cipher_alg; + uint32_t key_len; + uint32_t hash_alg; + uint32_t hash_result_len; + uint32_t auth_key_len; + uint32_t add_len; + uint8_t op_type; + uint8_t direction; + uint8_t hash_mode; + uint8_t alg_chain_order; + uint8_t *cipher_key; + uint8_t *auth_key; +} CryptoDevBackendSymSessionInfo; + +/** + * CryptoDevBackendSymOpInfo: + * + * @session_id: session index which was previously + * created by cryptodev_backend_sym_create_session() + * @aad_len: byte length of additional authenticated data + * @iv_len: byte length of initialization vector or counter + * @src_len: byte length of source data + * @dst_len: byte length of destination data + * @digest_result_len: byte length of hash digest result + * @hash_start_src_offset: Starting point for hash processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @cipher_start_src_offset: Starting point for cipher processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @len_to_hash: byte length of source data on which the hash + * operation will be computed, only used for algorithm chain + * @len_to_cipher: byte length of source data on which the cipher + * operation will be computed, only used for algorithm chain + * @op_type: operation type (refer to virtio_crypto.h) + * @iv: point to the initialization vector or counter + * @src: point to the source data + * @dst: point to the destination data + * @aad_data: point to the additional authenticated data + * @digest_result: point to the digest result data + * @data[0]: point to the extensional memory by one memory allocation + * + */ +typedef struct CryptoDevBackendSymOpInfo { + uint64_t session_id; + uint32_t aad_len; + uint32_t iv_len; + uint32_t src_len; + uint32_t dst_len; + uint32_t digest_result_len; + uint32_t hash_start_src_offset; + uint32_t cipher_start_src_offset; + uint32_t len_to_hash; + uint32_t len_to_cipher; + uint8_t op_type; + uint8_t *iv; + uint8_t *src; + uint8_t *dst; + uint8_t *aad_data; + uint8_t *digest_result; + uint8_t data[]; +} CryptoDevBackendSymOpInfo; + +struct CryptoDevBackendClass { + ObjectClass parent_class; + + void (*init)(CryptoDevBackend *backend, Error **errp); + void (*cleanup)(CryptoDevBackend *backend, Error **errp); + + int64_t (*create_session)(CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp); + int (*close_session)(CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp); + int (*do_sym_op)(CryptoDevBackend *backend, + CryptoDevBackendSymOpInfo *op_info, + uint32_t queue_index, Error **errp); +}; + +typedef enum CryptoDevBackendOptionsType { + CRYPTODEV_BACKEND_TYPE_NONE = 0, + CRYPTODEV_BACKEND_TYPE_BUILTIN = 1, + CRYPTODEV_BACKEND_TYPE_VHOST_USER = 2, + CRYPTODEV_BACKEND_TYPE__MAX, +} CryptoDevBackendOptionsType; + +struct CryptoDevBackendClient { + CryptoDevBackendOptionsType type; + char *model; + char *name; + char *info_str; + unsigned int queue_index; + int vring_enable; + QTAILQ_ENTRY(CryptoDevBackendClient) next; +}; + +struct CryptoDevBackendPeers { + CryptoDevBackendClient *ccs[MAX_CRYPTO_QUEUE_NUM]; + uint32_t queues; +}; + +struct CryptoDevBackendConf { + CryptoDevBackendPeers peers; + + /* Supported service mask */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +struct CryptoDevBackend { + Object parent_obj; + + bool ready; + /* Tag the cryptodev backend is used by virtio-crypto or not */ + bool is_used; + CryptoDevBackendConf conf; +}; + +/** + * cryptodev_backend_new_client: + * @model: the cryptodev backend model + * @name: the cryptodev backend name, can be NULL + * + * Creates a new cryptodev backend client object + * with the @name in the model @model. + * + * The returned object must be released with + * cryptodev_backend_free_client() when no + * longer required + * + * Returns: a new cryptodev backend client object + */ +CryptoDevBackendClient * +cryptodev_backend_new_client(const char *model, + const char *name); +/** + * cryptodev_backend_free_client: + * @cc: the cryptodev backend client object + * + * Release the memory associated with @cc that + * was previously allocated by cryptodev_backend_new_client() + */ +void cryptodev_backend_free_client( + CryptoDevBackendClient *cc); + +/** + * cryptodev_backend_cleanup: + * @backend: the cryptodev backend object + * @errp: pointer to a NULL-initialized error object + * + * Clean the resouce associated with @backend that realizaed + * by the specific backend's init() callback + */ +void cryptodev_backend_cleanup( + CryptoDevBackend *backend, + Error **errp); + +/** + * cryptodev_backend_sym_create_session: + * @backend: the cryptodev backend object + * @sess_info: parameters needed by session creating + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Create a session for symmetric algorithms + * + * Returns: session id on success, or -1 on error + */ +int64_t cryptodev_backend_sym_create_session( + CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp); + +/** + * cryptodev_backend_sym_close_session: + * @backend: the cryptodev backend object + * @session_id: the session id + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Close a session for symmetric algorithms which was previously + * created by cryptodev_backend_sym_create_session() + * + * Returns: 0 on success, or Negative on error + */ +int cryptodev_backend_sym_close_session( + CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp); + +/** + * cryptodev_backend_crypto_operation: + * @backend: the cryptodev backend object + * @opaque: pointer to a VirtIOCryptoReq object + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Do crypto operation, such as encryption and + * decryption + * + * Returns: VIRTIO_CRYPTO_OK on success, + * or -VIRTIO_CRYPTO_* on error + */ +int cryptodev_backend_crypto_operation( + CryptoDevBackend *backend, + void *opaque, + uint32_t queue_index, Error **errp); + +/** + * cryptodev_backend_set_used: + * @backend: the cryptodev backend object + * @used: ture or false + * + * Set the cryptodev backend is used by virtio-crypto or not + */ +void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used); + +/** + * cryptodev_backend_is_used: + * @backend: the cryptodev backend object + * + * Return the status that the cryptodev backend is used + * by virtio-crypto or not + * + * Returns: true on used, or false on not used + */ +bool cryptodev_backend_is_used(CryptoDevBackend *backend); + +/** + * cryptodev_backend_set_ready: + * @backend: the cryptodev backend object + * @ready: ture or false + * + * Set the cryptodev backend is ready or not, which is called + * by the children of the cryptodev banckend interface. + */ +void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready); + +/** + * cryptodev_backend_is_ready: + * @backend: the cryptodev backend object + * + * Return the status that the cryptodev backend is ready or not + * + * Returns: true on ready, or false on not ready + */ +bool cryptodev_backend_is_ready(CryptoDevBackend *backend); + +#endif /* CRYPTODEV_H */ diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h new file mode 100644 index 000000000..982c89345 --- /dev/null +++ b/include/sysemu/device_tree.h @@ -0,0 +1,190 @@ +/* + * Header with function prototypes to help device tree manipulation using + * libfdt. It also provides functions to read entries from device tree proc + * interface. + * + * Copyright 2008 IBM Corporation. + * Authors: Jerone Young + * Hollis Blanchard + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#ifndef DEVICE_TREE_H +#define DEVICE_TREE_H + +void *create_device_tree(int *sizep); +void *load_device_tree(const char *filename_path, int *sizep); +#ifdef CONFIG_LINUX +/** + * load_device_tree_from_sysfs: reads the device tree information in the + * /proc/device-tree directory and return the corresponding binary blob + * buffer pointer. Asserts in case of error. + */ +void *load_device_tree_from_sysfs(void); +#endif + +/** + * qemu_fdt_node_path: return the paths of nodes matching a given + * name and compat string + * @fdt: pointer to the dt blob + * @name: node name + * @compat: compatibility string + * @errp: handle to an error object + * + * returns a newly allocated NULL-terminated array of node paths. + * Use g_strfreev() to free it. If one or more nodes were found, the + * array contains the path of each node and the last element equals to + * NULL. If there is no error but no matching node was found, the + * returned array contains a single element equal to NULL. If an error + * was encountered when parsing the blob, the function returns NULL + * + * @name may be NULL to wildcard names and only match compatibility + * strings. + */ +char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat, + Error **errp); + +/** + * qemu_fdt_node_unit_path: return the paths of nodes matching a given + * node-name, ie. node-name and node-name@unit-address + * @fdt: pointer to the dt blob + * @name: node name + * @errp: handle to an error object + * + * returns a newly allocated NULL-terminated array of node paths. + * Use g_strfreev() to free it. If one or more nodes were found, the + * array contains the path of each node and the last element equals to + * NULL. If there is no error but no matching node was found, the + * returned array contains a single element equal to NULL. If an error + * was encountered when parsing the blob, the function returns NULL + */ +char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp); + +int qemu_fdt_setprop(void *fdt, const char *node_path, + const char *property, const void *val, int size); +int qemu_fdt_setprop_cell(void *fdt, const char *node_path, + const char *property, uint32_t val); +int qemu_fdt_setprop_u64(void *fdt, const char *node_path, + const char *property, uint64_t val); +int qemu_fdt_setprop_string(void *fdt, const char *node_path, + const char *property, const char *string); +int qemu_fdt_setprop_phandle(void *fdt, const char *node_path, + const char *property, + const char *target_node_path); +/** + * qemu_fdt_getprop: retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @node_path: node path + * @property: name of the property to find + * @lenp: fdt error if any or length of the property on success + * @errp: handle to an error object + * + * returns a pointer to the property on success and NULL on failure + */ +const void *qemu_fdt_getprop(void *fdt, const char *node_path, + const char *property, int *lenp, + Error **errp); +/** + * qemu_fdt_getprop_cell: retrieve the value of a given 4 byte property + * @fdt: pointer to the device tree blob + * @node_path: node path + * @property: name of the property to find + * @lenp: fdt error if any or -EINVAL if the property size is different from + * 4 bytes, or 4 (expected length of the property) upon success. + * @errp: handle to an error object + * + * returns the property value on success + */ +uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path, + const char *property, int *lenp, + Error **errp); +uint32_t qemu_fdt_get_phandle(void *fdt, const char *path); +uint32_t qemu_fdt_alloc_phandle(void *fdt); +int qemu_fdt_nop_node(void *fdt, const char *node_path); +int qemu_fdt_add_subnode(void *fdt, const char *name); + +#define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ + do { \ + uint32_t qdt_tmp[] = { __VA_ARGS__ }; \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(qdt_tmp); i++) { \ + qdt_tmp[i] = cpu_to_be32(qdt_tmp[i]); \ + } \ + qemu_fdt_setprop(fdt, node_path, property, qdt_tmp, \ + sizeof(qdt_tmp)); \ + } while (0) + +void qemu_fdt_dumpdtb(void *fdt, int size); + +/** + * qemu_fdt_setprop_sized_cells_from_array: + * @fdt: device tree blob + * @node_path: node to set property on + * @property: property to set + * @numvalues: number of values + * @values: array of number-of-cells, value pairs + * + * Set the specified property on the specified node in the device tree + * to be an array of cells. The values of the cells are specified via + * the values list, which alternates between "number of cells used by + * this value" and "value". + * number-of-cells must be either 1 or 2 (other values will result in + * an error being returned). If a value is too large to fit in the + * number of cells specified for it, an error is returned. + * + * This function is useful because device tree nodes often have cell arrays + * which are either lists of addresses or lists of address,size tuples, but + * the number of cells used for each element vary depending on the + * #address-cells and #size-cells properties of their parent node. + * If you know all your cell elements are one cell wide you can use the + * simpler qemu_fdt_setprop_cells(). If you're not setting up the + * array programmatically, qemu_fdt_setprop_sized_cells may be more + * convenient. + * + * Return value: 0 on success, <0 on error. + */ +int qemu_fdt_setprop_sized_cells_from_array(void *fdt, + const char *node_path, + const char *property, + int numvalues, + uint64_t *values); + +/** + * qemu_fdt_setprop_sized_cells: + * @fdt: device tree blob + * @node_path: node to set property on + * @property: property to set + * @...: list of number-of-cells, value pairs + * + * Set the specified property on the specified node in the device tree + * to be an array of cells. The values of the cells are specified via + * the variable arguments, which alternates between "number of cells + * used by this value" and "value". + * + * This is a convenience wrapper for the function + * qemu_fdt_setprop_sized_cells_from_array(). + * + * Return value: 0 on success, <0 on error. + */ +#define qemu_fdt_setprop_sized_cells(fdt, node_path, property, ...) \ + ({ \ + uint64_t qdt_tmp[] = { __VA_ARGS__ }; \ + qemu_fdt_setprop_sized_cells_from_array(fdt, node_path, \ + property, \ + ARRAY_SIZE(qdt_tmp) / 2, \ + qdt_tmp); \ + }) + +#define FDT_PCI_RANGE_RELOCATABLE 0x80000000 +#define FDT_PCI_RANGE_PREFETCHABLE 0x40000000 +#define FDT_PCI_RANGE_ALIASED 0x20000000 +#define FDT_PCI_RANGE_TYPE_MASK 0x03000000 +#define FDT_PCI_RANGE_MMIO_64BIT 0x03000000 +#define FDT_PCI_RANGE_MMIO 0x02000000 +#define FDT_PCI_RANGE_IOPORT 0x01000000 +#define FDT_PCI_RANGE_CONFIG 0x00000000 + +#endif /* DEVICE_TREE_H */ diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h new file mode 100644 index 000000000..80c5bc3e0 --- /dev/null +++ b/include/sysemu/dma.h @@ -0,0 +1,218 @@ +/* + * DMA helper functions + * + * Copyright (c) 2009 Red Hat + * + * This work is licensed under the terms of the GNU General Public License + * (GNU GPL), version 2 or later. + */ + +#ifndef DMA_H +#define DMA_H + +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "block/block.h" +#include "block/accounting.h" + +typedef struct ScatterGatherEntry ScatterGatherEntry; + +typedef enum { + DMA_DIRECTION_TO_DEVICE = 0, + DMA_DIRECTION_FROM_DEVICE = 1, +} DMADirection; + +struct QEMUSGList { + ScatterGatherEntry *sg; + int nsg; + int nalloc; + size_t size; + DeviceState *dev; + AddressSpace *as; +}; + +#ifndef CONFIG_USER_ONLY + +/* + * When an IOMMU is present, bus addresses become distinct from + * CPU/memory physical addresses and may be a different size. Because + * the IOVA size depends more on the bus than on the platform, we more + * or less have to treat these as 64-bit always to cover all (or at + * least most) cases. + */ +typedef uint64_t dma_addr_t; + +#define DMA_ADDR_BITS 64 +#define DMA_ADDR_FMT "%" PRIx64 + +static inline void dma_barrier(AddressSpace *as, DMADirection dir) +{ + /* + * This is called before DMA read and write operations + * unless the _relaxed form is used and is responsible + * for providing some sane ordering of accesses vs + * concurrently running VCPUs. + * + * Users of map(), unmap() or lower level st/ld_* + * operations are responsible for providing their own + * ordering via barriers. + * + * This primitive implementation does a simple smp_mb() + * before each operation which provides pretty much full + * ordering. + * + * A smarter implementation can be devised if needed to + * use lighter barriers based on the direction of the + * transfer, the DMA context, etc... + */ + smp_mb(); +} + +/* Checks that the given range of addresses is valid for DMA. This is + * useful for certain cases, but usually you should just use + * dma_memory_{read,write}() and check for errors */ +static inline bool dma_memory_valid(AddressSpace *as, + dma_addr_t addr, dma_addr_t len, + DMADirection dir) +{ + return address_space_access_valid(as, addr, len, + dir == DMA_DIRECTION_FROM_DEVICE, + MEMTXATTRS_UNSPECIFIED); +} + +static inline int dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len, + DMADirection dir) +{ + return (bool)address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, + buf, len, dir == DMA_DIRECTION_FROM_DEVICE); +} + +static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len) +{ + return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE); +} + +static inline int dma_memory_write_relaxed(AddressSpace *as, dma_addr_t addr, + const void *buf, dma_addr_t len) +{ + return dma_memory_rw_relaxed(as, addr, (void *)buf, len, + DMA_DIRECTION_FROM_DEVICE); +} + +static inline int dma_memory_rw(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len, + DMADirection dir) +{ + dma_barrier(as, dir); + + return dma_memory_rw_relaxed(as, addr, buf, len, dir); +} + +static inline int dma_memory_read(AddressSpace *as, dma_addr_t addr, + void *buf, dma_addr_t len) +{ + return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE); +} + +static inline int dma_memory_write(AddressSpace *as, dma_addr_t addr, + const void *buf, dma_addr_t len) +{ + return dma_memory_rw(as, addr, (void *)buf, len, + DMA_DIRECTION_FROM_DEVICE); +} + +int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len); + +static inline void *dma_memory_map(AddressSpace *as, + dma_addr_t addr, dma_addr_t *len, + DMADirection dir) +{ + hwaddr xlen = *len; + void *p; + + p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE, + MEMTXATTRS_UNSPECIFIED); + *len = xlen; + return p; +} + +static inline void dma_memory_unmap(AddressSpace *as, + void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + address_space_unmap(as, buffer, (hwaddr)len, + dir == DMA_DIRECTION_FROM_DEVICE, access_len); +} + +#define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ + static inline uint##_bits##_t ld##_lname##_##_end##_dma(AddressSpace *as, \ + dma_addr_t addr) \ + { \ + uint##_bits##_t val; \ + dma_memory_read(as, addr, &val, (_bits) / 8); \ + return _end##_bits##_to_cpu(val); \ + } \ + static inline void st##_sname##_##_end##_dma(AddressSpace *as, \ + dma_addr_t addr, \ + uint##_bits##_t val) \ + { \ + val = cpu_to_##_end##_bits(val); \ + dma_memory_write(as, addr, &val, (_bits) / 8); \ + } + +static inline uint8_t ldub_dma(AddressSpace *as, dma_addr_t addr) +{ + uint8_t val; + + dma_memory_read(as, addr, &val, 1); + return val; +} + +static inline void stb_dma(AddressSpace *as, dma_addr_t addr, uint8_t val) +{ + dma_memory_write(as, addr, &val, 1); +} + +DEFINE_LDST_DMA(uw, w, 16, le); +DEFINE_LDST_DMA(l, l, 32, le); +DEFINE_LDST_DMA(q, q, 64, le); +DEFINE_LDST_DMA(uw, w, 16, be); +DEFINE_LDST_DMA(l, l, 32, be); +DEFINE_LDST_DMA(q, q, 64, be); + +#undef DEFINE_LDST_DMA + +struct ScatterGatherEntry { + dma_addr_t base; + dma_addr_t len; +}; + +void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, + AddressSpace *as); +void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len); +void qemu_sglist_destroy(QEMUSGList *qsg); +#endif + +typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque); + +BlockAIOCB *dma_blk_io(AioContext *ctx, + QEMUSGList *sg, uint64_t offset, uint32_t align, + DMAIOFunc *io_func, void *io_func_opaque, + BlockCompletionFunc *cb, void *opaque, DMADirection dir); +BlockAIOCB *dma_blk_read(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *dma_blk_write(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + BlockCompletionFunc *cb, void *opaque); +uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg); +uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg); + +void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, + QEMUSGList *sg, enum BlockAcctType type); + +#endif diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h new file mode 100644 index 000000000..e25b02e99 --- /dev/null +++ b/include/sysemu/dump-arch.h @@ -0,0 +1,31 @@ +/* + * QEMU dump + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef DUMP_ARCH_H +#define DUMP_ARCH_H + +typedef struct ArchDumpInfo { + int d_machine; /* Architecture */ + int d_endian; /* ELFDATA2LSB or ELFDATA2MSB */ + int d_class; /* ELFCLASS32 or ELFCLASS64 */ + uint32_t page_size; /* The target's page size. If it's variable and + * unknown, then this should be the maximum. */ + uint64_t phys_base; /* The target's physmem base. */ +} ArchDumpInfo; + +struct GuestPhysBlockList; /* memory_mapping.h */ +int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks); +ssize_t cpu_get_note_size(int class, int machine, int nr_cpus); + +#endif diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h new file mode 100644 index 000000000..250143cb5 --- /dev/null +++ b/include/sysemu/dump.h @@ -0,0 +1,203 @@ +/* + * QEMU dump + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef DUMP_H +#define DUMP_H + +#include "qapi/qapi-types-dump.h" + +#define MAKEDUMPFILE_SIGNATURE "makedumpfile" +#define MAX_SIZE_MDF_HEADER (4096) /* max size of makedumpfile_header */ +#define TYPE_FLAT_HEADER (1) /* type of flattened format */ +#define VERSION_FLAT_HEADER (1) /* version of flattened format */ +#define END_FLAG_FLAT_HEADER (-1) + +#ifndef ARCH_PFN_OFFSET +#define ARCH_PFN_OFFSET (0) +#endif + +/* + * flag for compressed format + */ +#define DUMP_DH_COMPRESSED_ZLIB (0x1) +#define DUMP_DH_COMPRESSED_LZO (0x2) +#define DUMP_DH_COMPRESSED_SNAPPY (0x4) + +#define KDUMP_SIGNATURE "KDUMP " +#define SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) +#define DUMP_LEVEL (1) +#define DISKDUMP_HEADER_BLOCKS (1) + +#include "sysemu/dump-arch.h" +#include "sysemu/memory_mapping.h" + +typedef struct QEMU_PACKED MakedumpfileHeader { + char signature[16]; /* = "makedumpfile" */ + int64_t type; + int64_t version; +} MakedumpfileHeader; + +typedef struct QEMU_PACKED MakedumpfileDataHeader { + int64_t offset; + int64_t buf_size; +} MakedumpfileDataHeader; + +typedef struct QEMU_PACKED NewUtsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +} NewUtsname; + +typedef struct QEMU_PACKED DiskDumpHeader32 { + char signature[SIG_LEN]; /* = "KDUMP " */ + uint32_t header_version; /* Dump header version */ + NewUtsname utsname; /* copy of system_utsname */ + char timestamp[10]; /* Time stamp */ + uint32_t status; /* Above flags */ + uint32_t block_size; /* Size of a block in byte */ + uint32_t sub_hdr_size; /* Size of arch dependent header in block */ + uint32_t bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t max_mapnr; /* = max_mapnr , + obsoleted in header_version 6 */ + uint32_t total_ram_blocks; /* Number of blocks should be written */ + uint32_t device_blocks; /* Number of total blocks in dump device */ + uint32_t written_blocks; /* Number of written blocks */ + uint32_t current_cpu; /* CPU# which handles dump */ + uint32_t nr_cpus; /* Number of CPUs */ +} DiskDumpHeader32; + +typedef struct QEMU_PACKED DiskDumpHeader64 { + char signature[SIG_LEN]; /* = "KDUMP " */ + uint32_t header_version; /* Dump header version */ + NewUtsname utsname; /* copy of system_utsname */ + char timestamp[22]; /* Time stamp */ + uint32_t status; /* Above flags */ + uint32_t block_size; /* Size of a block in byte */ + uint32_t sub_hdr_size; /* Size of arch dependent header in block */ + uint32_t bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t max_mapnr; /* = max_mapnr, + obsoleted in header_version 6 */ + uint32_t total_ram_blocks; /* Number of blocks should be written */ + uint32_t device_blocks; /* Number of total blocks in dump device */ + uint32_t written_blocks; /* Number of written blocks */ + uint32_t current_cpu; /* CPU# which handles dump */ + uint32_t nr_cpus; /* Number of CPUs */ +} DiskDumpHeader64; + +typedef struct QEMU_PACKED KdumpSubHeader32 { + uint32_t phys_base; + uint32_t dump_level; /* header_version 1 and later */ + uint32_t split; /* header_version 2 and later */ + uint32_t start_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint32_t end_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t offset_vmcoreinfo; /* header_version 3 and later */ + uint32_t size_vmcoreinfo; /* header_version 3 and later */ + uint64_t offset_note; /* header_version 4 and later */ + uint32_t note_size; /* header_version 4 and later */ + uint64_t offset_eraseinfo; /* header_version 5 and later */ + uint32_t size_eraseinfo; /* header_version 5 and later */ + uint64_t start_pfn_64; /* header_version 6 and later */ + uint64_t end_pfn_64; /* header_version 6 and later */ + uint64_t max_mapnr_64; /* header_version 6 and later */ +} KdumpSubHeader32; + +typedef struct QEMU_PACKED KdumpSubHeader64 { + uint64_t phys_base; + uint32_t dump_level; /* header_version 1 and later */ + uint32_t split; /* header_version 2 and later */ + uint64_t start_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t end_pfn; /* header_version 2 and later, + obsoleted in header_version 6 */ + uint64_t offset_vmcoreinfo; /* header_version 3 and later */ + uint64_t size_vmcoreinfo; /* header_version 3 and later */ + uint64_t offset_note; /* header_version 4 and later */ + uint64_t note_size; /* header_version 4 and later */ + uint64_t offset_eraseinfo; /* header_version 5 and later */ + uint64_t size_eraseinfo; /* header_version 5 and later */ + uint64_t start_pfn_64; /* header_version 6 and later */ + uint64_t end_pfn_64; /* header_version 6 and later */ + uint64_t max_mapnr_64; /* header_version 6 and later */ +} KdumpSubHeader64; + +typedef struct DataCache { + int fd; /* fd of the file where to write the cached data */ + uint8_t *buf; /* buffer for cached data */ + size_t buf_size; /* size of the buf */ + size_t data_size; /* size of cached data in buf */ + off_t offset; /* offset of the file */ +} DataCache; + +typedef struct QEMU_PACKED PageDescriptor { + uint64_t offset; /* the offset of the page data*/ + uint32_t size; /* the size of this dump page */ + uint32_t flags; /* flags */ + uint64_t page_flags; /* page flags */ +} PageDescriptor; + +typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; + uint16_t phdr_num; + uint32_t sh_info; + bool have_section; + bool resume; + bool detached; + ssize_t note_size; + hwaddr memory_offset; + int fd; + + GuestPhysBlock *next_block; + ram_addr_t start; + bool has_filter; + int64_t begin; + int64_t length; + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ + uint32_t nr_cpus; /* number of guest's cpu */ + uint64_t max_mapnr; /* the biggest guest's phys-mem's number */ + size_t len_dump_bitmap; /* the size of the place used to store + dump_bitmap in vmcore */ + off_t offset_dump_bitmap; /* offset of dump_bitmap part in vmcore */ + off_t offset_page; /* offset of page part in vmcore */ + size_t num_dumpable; /* number of page that can be dumped */ + uint32_t flag_compress; /* indicate the compression format */ + DumpStatus status; /* current dump status */ + + bool has_format; /* whether format is provided */ + DumpGuestMemoryFormat format; /* valid only if has_format == true */ + QemuThread dump_thread; /* thread for detached dump */ + + int64_t total_size; /* total memory size (in bytes) to + * be dumped. When filter is + * enabled, this will only count + * those to be written. */ + int64_t written_size; /* written memory size (in bytes), + * this could be used to calculate + * how much work we have + * finished. */ + uint8_t *guest_note; /* ELF note content */ + size_t guest_note_size; +} DumpState; + +uint16_t cpu_to_dump16(DumpState *s, uint16_t val); +uint32_t cpu_to_dump32(DumpState *s, uint32_t val); +uint64_t cpu_to_dump64(DumpState *s, uint64_t val); +#endif diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h new file mode 100644 index 000000000..12fb54f99 --- /dev/null +++ b/include/sysemu/hax.h @@ -0,0 +1,37 @@ +/* + * QEMU HAXM support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong + * Xin Xiaohui + * Zhang Xiantao + * + * Copyright 2016 Google, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_HAX_H +#define QEMU_HAX_H + +int hax_sync_vcpus(void); + +#ifdef CONFIG_HAX + +int hax_enabled(void); + +#else /* CONFIG_HAX */ + +#define hax_enabled() (0) + +#endif /* CONFIG_HAX */ + +#endif /* QEMU_HAX_H */ diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h new file mode 100644 index 000000000..df5644723 --- /dev/null +++ b/include/sysemu/hostmem.h @@ -0,0 +1,83 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_HOSTMEM_H +#define SYSEMU_HOSTMEM_H + +#include "sysemu/numa.h" +#include "qapi/qapi-types-machine.h" +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/bitmap.h" + +#define TYPE_MEMORY_BACKEND "memory-backend" +OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass, + MEMORY_BACKEND) + +/* hostmem-ram.c */ +/** + * @TYPE_MEMORY_BACKEND_RAM: + * name of backend that uses mmap on the anonymous RAM + */ + +#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram" + +/* hostmem-file.c */ +/** + * @TYPE_MEMORY_BACKEND_FILE: + * name of backend that uses mmap on a file descriptor + */ +#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file" + + +/** + * HostMemoryBackendClass: + * @parent_class: opaque parent class container + */ +struct HostMemoryBackendClass { + ObjectClass parent_class; + + void (*alloc)(HostMemoryBackend *backend, Error **errp); +}; + +/** + * @HostMemoryBackend + * + * @parent: opaque parent object container + * @size: amount of memory backend provides + * @mr: MemoryRegion representing host memory belonging to backend + * @prealloc_threads: number of threads to be used for preallocatining RAM + */ +struct HostMemoryBackend { + /* private */ + Object parent; + + /* protected */ + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share; + uint32_t prealloc_threads; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); + HostMemPolicy policy; + + MemoryRegion mr; +}; + +bool host_memory_backend_mr_inited(HostMemoryBackend *backend); +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend); + +void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped); +bool host_memory_backend_is_mapped(HostMemoryBackend *backend); +size_t host_memory_backend_pagesize(HostMemoryBackend *memdev); +char *host_memory_backend_get_name(HostMemoryBackend *backend); + +#endif diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h new file mode 100644 index 000000000..f893768df --- /dev/null +++ b/include/sysemu/hvf.h @@ -0,0 +1,35 @@ +/* + * QEMU Hypervisor.framework (HVF) support + * + * Copyright Google Inc., 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in non-HVF-specific code */ + +#ifndef HVF_H +#define HVF_H + +#include "sysemu/accel.h" +#include "qom/object.h" + +#ifdef CONFIG_HVF +uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, + int reg); +extern bool hvf_allowed; +#define hvf_enabled() (hvf_allowed) +#else /* !CONFIG_HVF */ +#define hvf_enabled() 0 +#define hvf_get_supported_cpuid(func, idx, reg) 0 +#endif /* !CONFIG_HVF */ + +#define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") + +typedef struct HVFState HVFState; +DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE, + TYPE_HVF_ACCEL) + +#endif diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h new file mode 100644 index 000000000..ffed6192a --- /dev/null +++ b/include/sysemu/hw_accel.h @@ -0,0 +1,25 @@ +/* + * QEMU Hardware accelerators support + * + * Copyright 2016 Google, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_HW_ACCEL_H +#define QEMU_HW_ACCEL_H + +#include "hw/core/cpu.h" +#include "sysemu/hax.h" +#include "sysemu/kvm.h" +#include "sysemu/hvf.h" +#include "sysemu/whpx.h" + +void cpu_synchronize_state(CPUState *cpu); +void cpu_synchronize_post_reset(CPUState *cpu); +void cpu_synchronize_post_init(CPUState *cpu); +void cpu_synchronize_pre_loadvm(CPUState *cpu); + +#endif /* QEMU_HW_ACCEL_H */ diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h new file mode 100644 index 000000000..0c5284dbb --- /dev/null +++ b/include/sysemu/iothread.h @@ -0,0 +1,60 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef IOTHREAD_H +#define IOTHREAD_H + +#include "block/aio.h" +#include "qemu/thread.h" +#include "qom/object.h" + +#define TYPE_IOTHREAD "iothread" + +struct IOThread { + Object parent_obj; + + QemuThread thread; + AioContext *ctx; + bool run_gcontext; /* whether we should run gcontext */ + GMainContext *worker_context; + GMainLoop *main_loop; + QemuSemaphore init_done_sem; /* is thread init done? */ + bool stopping; /* has iothread_stop() been called? */ + bool running; /* should iothread_run() continue? */ + int thread_id; + + /* AioContext poll parameters */ + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; +}; +typedef struct IOThread IOThread; + +DECLARE_INSTANCE_CHECKER(IOThread, IOTHREAD, + TYPE_IOTHREAD) + +char *iothread_get_id(IOThread *iothread); +IOThread *iothread_by_id(const char *id); +AioContext *iothread_get_aio_context(IOThread *iothread); +GMainContext *iothread_get_g_main_context(IOThread *iothread); + +/* + * Helpers used to allocate iothreads for internal use. These + * iothreads will not be seen by monitor clients when query using + * "query-iothreads". + */ +IOThread *iothread_create(const char *id, Error **errp); +void iothread_stop(IOThread *iothread); +void iothread_destroy(IOThread *iothread); + +#endif /* IOTHREAD_H */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h new file mode 100644 index 000000000..bb5d5cf49 --- /dev/null +++ b/include/sysemu/kvm.h @@ -0,0 +1,560 @@ +/* + * QEMU KVM support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_H +#define QEMU_KVM_H + +#include "qemu/queue.h" +#include "hw/core/cpu.h" +#include "exec/memattrs.h" +#include "sysemu/accel.h" +#include "qom/object.h" + +#ifdef NEED_CPU_H +# ifdef CONFIG_KVM +# include +# define CONFIG_KVM_IS_POSSIBLE +# endif +#else +# define CONFIG_KVM_IS_POSSIBLE +#endif + +#ifdef CONFIG_KVM_IS_POSSIBLE + +extern bool kvm_allowed; +extern bool kvm_kernel_irqchip; +extern bool kvm_split_irqchip; +extern bool kvm_async_interrupts_allowed; +extern bool kvm_halt_in_kernel_allowed; +extern bool kvm_eventfds_allowed; +extern bool kvm_irqfds_allowed; +extern bool kvm_resamplefds_allowed; +extern bool kvm_msi_via_irqfd_allowed; +extern bool kvm_gsi_routing_allowed; +extern bool kvm_gsi_direct_mapping; +extern bool kvm_readonly_mem_allowed; +extern bool kvm_direct_msi_allowed; +extern bool kvm_ioeventfd_any_length_allowed; +extern bool kvm_msi_use_devid; + +#define kvm_enabled() (kvm_allowed) +/** + * kvm_irqchip_in_kernel: + * + * Returns: true if an in-kernel irqchip was created. + * What this actually means is architecture and machine model + * specific: on PC, for instance, it means that the LAPIC + * is in kernel. This function should never be used from generic + * target-independent code: use one of the following functions or + * some other specific check instead. + */ +#define kvm_irqchip_in_kernel() (kvm_kernel_irqchip) + +/** + * kvm_irqchip_is_split: + * + * Returns: true if the irqchip implementation is split between + * user and kernel space. The details are architecture and + * machine specific. On PC, it means that the PIC, IOAPIC, and + * PIT are in user space while the LAPIC is in the kernel. + */ +#define kvm_irqchip_is_split() (kvm_split_irqchip) + +/** + * kvm_async_interrupts_enabled: + * + * Returns: true if we can deliver interrupts to KVM + * asynchronously (ie by ioctl from any thread at any time) + * rather than having to do interrupt delivery synchronously + * (where the vcpu must be stopped at a suitable point first). + */ +#define kvm_async_interrupts_enabled() (kvm_async_interrupts_allowed) + +/** + * kvm_halt_in_kernel + * + * Returns: true if halted cpus should still get a KVM_RUN ioctl to run + * inside of kernel space. This only works if MP state is implemented. + */ +#define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed) + +/** + * kvm_eventfds_enabled: + * + * Returns: true if we can use eventfds to receive notifications + * from a KVM CPU (ie the kernel supports eventds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_eventfds_enabled() (kvm_eventfds_allowed) + +/** + * kvm_irqfds_enabled: + * + * Returns: true if we can use irqfds to inject interrupts into + * a KVM CPU (ie the kernel supports irqfds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_irqfds_enabled() (kvm_irqfds_allowed) + +/** + * kvm_resamplefds_enabled: + * + * Returns: true if we can use resamplefds to inject interrupts into + * a KVM CPU (ie the kernel supports resamplefds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_resamplefds_enabled() (kvm_resamplefds_allowed) + +/** + * kvm_msi_via_irqfd_enabled: + * + * Returns: true if we can route a PCI MSI (Message Signaled Interrupt) + * to a KVM CPU via an irqfd. This requires that the kernel supports + * this and that we're running in a configuration that permits it. + */ +#define kvm_msi_via_irqfd_enabled() (kvm_msi_via_irqfd_allowed) + +/** + * kvm_gsi_routing_enabled: + * + * Returns: true if GSI routing is enabled (ie the kernel supports + * it and we're running in a configuration that permits it). + */ +#define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed) + +/** + * kvm_gsi_direct_mapping: + * + * Returns: true if GSI direct mapping is enabled. + */ +#define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping) + +/** + * kvm_readonly_mem_enabled: + * + * Returns: true if KVM readonly memory is enabled (ie the kernel + * supports it and we're running in a configuration that permits it). + */ +#define kvm_readonly_mem_enabled() (kvm_readonly_mem_allowed) + +/** + * kvm_direct_msi_enabled: + * + * Returns: true if KVM allows direct MSI injection. + */ +#define kvm_direct_msi_enabled() (kvm_direct_msi_allowed) + +/** + * kvm_ioeventfd_any_length_enabled: + * Returns: true if KVM allows any length io eventfd. + */ +#define kvm_ioeventfd_any_length_enabled() (kvm_ioeventfd_any_length_allowed) + +/** + * kvm_msi_devid_required: + * Returns: true if KVM requires a device id to be provided while + * defining an MSI routing entry. + */ +#define kvm_msi_devid_required() (kvm_msi_use_devid) + +#else + +#define kvm_enabled() (0) +#define kvm_irqchip_in_kernel() (false) +#define kvm_irqchip_is_split() (false) +#define kvm_async_interrupts_enabled() (false) +#define kvm_halt_in_kernel() (false) +#define kvm_eventfds_enabled() (false) +#define kvm_irqfds_enabled() (false) +#define kvm_resamplefds_enabled() (false) +#define kvm_msi_via_irqfd_enabled() (false) +#define kvm_gsi_routing_allowed() (false) +#define kvm_gsi_direct_mapping() (false) +#define kvm_readonly_mem_enabled() (false) +#define kvm_direct_msi_enabled() (false) +#define kvm_ioeventfd_any_length_enabled() (false) +#define kvm_msi_devid_required() (false) + +#endif /* CONFIG_KVM_IS_POSSIBLE */ + +struct kvm_run; +struct kvm_lapic_state; +struct kvm_irq_routing_entry; + +typedef struct KVMCapabilityInfo { + const char *name; + int value; +} KVMCapabilityInfo; + +#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } +#define KVM_CAP_LAST_INFO { NULL, 0 } + +struct KVMState; + +#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm") +typedef struct KVMState KVMState; +DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE, + TYPE_KVM_ACCEL) + +extern KVMState *kvm_state; +typedef struct Notifier Notifier; + +/* external API */ + +bool kvm_has_free_slot(MachineState *ms); +bool kvm_has_sync_mmu(void); +int kvm_has_vcpu_events(void); +int kvm_has_robust_singlestep(void); +int kvm_has_debugregs(void); +int kvm_max_nested_state_length(void); +int kvm_has_pit_state2(void); +int kvm_has_many_ioeventfds(void); +int kvm_has_gsi_routing(void); +int kvm_has_intx_set_mask(void); + +/** + * kvm_arm_supports_user_irq + * + * Not all KVM implementations support notifications for kernel generated + * interrupt events to user space. This function indicates whether the current + * KVM implementation does support them. + * + * Returns: true if KVM supports using kernel generated IRQs from user space + */ +bool kvm_arm_supports_user_irq(void); + +/** + * kvm_memcrypt_enabled - return boolean indicating whether memory encryption + * is enabled + * Returns: 1 memory encryption is enabled + * 0 memory encryption is disabled + */ +bool kvm_memcrypt_enabled(void); + +/** + * kvm_memcrypt_encrypt_data: encrypt the memory range + * + * Return: 1 failed to encrypt the range + * 0 succesfully encrypted memory region + */ +int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len); + + +#ifdef NEED_CPU_H +#include "cpu.h" + +void kvm_flush_coalesced_mmio_buffer(void); + +int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr, + target_ulong len, int type); +int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr, + target_ulong len, int type); +void kvm_remove_all_breakpoints(CPUState *cpu); +int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); + +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +int kvm_on_sigbus(int code, void *addr); + +/* interface with exec.c */ + +void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared)); + +/* internal API */ + +int kvm_ioctl(KVMState *s, int type, ...); + +int kvm_vm_ioctl(KVMState *s, int type, ...); + +int kvm_vcpu_ioctl(CPUState *cpu, int type, ...); + +/** + * kvm_device_ioctl - call an ioctl on a kvm device + * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE + * @type: The device-ctrl ioctl number + * + * Returns: -errno on error, nonnegative on success + */ +int kvm_device_ioctl(int fd, int type, ...); + +/** + * kvm_vm_check_attr - check for existence of a specific vm attribute + * @s: The KVMState pointer + * @group: the group + * @attr: the attribute of that group to query for + * + * Returns: 1 if the attribute exists + * 0 if the attribute either does not exist or if the vm device + * interface is unavailable + */ +int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr); + +/** + * kvm_device_check_attr - check for existence of a specific device attribute + * @fd: The device file descriptor + * @group: the group + * @attr: the attribute of that group to query for + * + * Returns: 1 if the attribute exists + * 0 if the attribute either does not exist or if the vm device + * interface is unavailable + */ +int kvm_device_check_attr(int fd, uint32_t group, uint64_t attr); + +/** + * kvm_device_access - set or get value of a specific device attribute + * @fd: The device file descriptor + * @group: the group + * @attr: the attribute of that group to set or get + * @val: pointer to a storage area for the value + * @write: true for set and false for get operation + * @errp: error object handle + * + * Returns: 0 on success + * < 0 on error + * Use kvm_device_check_attr() in order to check for the availability + * of optional attributes. + */ +int kvm_device_access(int fd, int group, uint64_t attr, + void *val, bool write, Error **errp); + +/** + * kvm_create_device - create a KVM device for the device control API + * @KVMState: The KVMState pointer + * @type: The KVM device type (see Documentation/virtual/kvm/devices in the + * kernel source) + * @test: If true, only test if device can be created, but don't actually + * create the device. + * + * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd; + */ +int kvm_create_device(KVMState *s, uint64_t type, bool test); + +/** + * kvm_device_supported - probe whether KVM supports specific device + * + * @vmfd: The fd handler for VM + * @type: type of device + * + * @return: true if supported, otherwise false. + */ +bool kvm_device_supported(int vmfd, uint64_t type); + +/* Arch specific hooks */ + +extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; + +void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); +MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); + +int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); + +int kvm_arch_process_async_events(CPUState *cpu); + +int kvm_arch_get_registers(CPUState *cpu); + +/* state subset only touched by the VCPU itself during runtime */ +#define KVM_PUT_RUNTIME_STATE 1 +/* state subset modified during VCPU reset */ +#define KVM_PUT_RESET_STATE 2 +/* full state set, modified during initialization or on vmload */ +#define KVM_PUT_FULL_STATE 3 + +int kvm_arch_put_registers(CPUState *cpu, int level); + +int kvm_arch_init(MachineState *ms, KVMState *s); + +int kvm_arch_init_vcpu(CPUState *cpu); +int kvm_arch_destroy_vcpu(CPUState *cpu); + +bool kvm_vcpu_id_is_valid(int vcpu_id); + +/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ +unsigned long kvm_arch_vcpu_id(CPUState *cpu); + +#ifdef KVM_HAVE_MCE_INJECTION +void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#endif + +void kvm_arch_init_irq_routing(KVMState *s); + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data, PCIDevice *dev); + +/* Notify arch about newly added MSI routes */ +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev); +/* Notify arch about released MSI routes */ +int kvm_arch_release_virq_post(int virq); + +int kvm_arch_msi_data_to_gsi(uint32_t data); + +int kvm_set_irq(KVMState *s, int irq, int level); +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); + +void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); + +void kvm_irqchip_add_change_notifier(Notifier *n); +void kvm_irqchip_remove_change_notifier(Notifier *n); +void kvm_irqchip_change_notify(void); + +void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); + +struct kvm_guest_debug; +struct kvm_debug_exit_arch; + +struct kvm_sw_breakpoint { + target_ulong pc; + target_ulong saved_insn; + int use_count; + QTAILQ_ENTRY(kvm_sw_breakpoint) entry; +}; + +struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, + target_ulong pc); + +int kvm_sw_breakpoints_active(CPUState *cpu); + +int kvm_arch_insert_sw_breakpoint(CPUState *cpu, + struct kvm_sw_breakpoint *bp); +int kvm_arch_remove_sw_breakpoint(CPUState *cpu, + struct kvm_sw_breakpoint *bp); +int kvm_arch_insert_hw_breakpoint(target_ulong addr, + target_ulong len, int type); +int kvm_arch_remove_hw_breakpoint(target_ulong addr, + target_ulong len, int type); +void kvm_arch_remove_all_hw_breakpoints(void); + +void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg); + +bool kvm_arch_stop_on_emulation_error(CPUState *cpu); + +int kvm_check_extension(KVMState *s, unsigned int extension); + +int kvm_vm_check_extension(KVMState *s, unsigned int extension); + +#define kvm_vm_enable_cap(s, capability, cap_flags, ...) \ + ({ \ + struct kvm_enable_cap cap = { \ + .cap = capability, \ + .flags = cap_flags, \ + }; \ + uint64_t args_tmp[] = { __VA_ARGS__ }; \ + size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ + memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ + kvm_vm_ioctl(s, KVM_ENABLE_CAP, &cap); \ + }) + +#define kvm_vcpu_enable_cap(cpu, capability, cap_flags, ...) \ + ({ \ + struct kvm_enable_cap cap = { \ + .cap = capability, \ + .flags = cap_flags, \ + }; \ + uint64_t args_tmp[] = { __VA_ARGS__ }; \ + size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ + memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ + kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap); \ + }) + +uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, + uint32_t index, int reg); +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); + + +void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + +#if !defined(CONFIG_USER_ONLY) +int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); +#endif + +#endif /* NEED_CPU_H */ + +void kvm_cpu_synchronize_state(CPUState *cpu); + +void kvm_init_cpu_signals(CPUState *cpu); + +/** + * kvm_irqchip_add_msi_route - Add MSI route for specific vector + * @s: KVM state + * @vector: which vector to add. This can be either MSI/MSIX + * vector. The function will automatically detect whether + * MSI/MSIX is enabled, and fetch corresponding MSI + * message. + * @dev: Owner PCI device to add the route. If @dev is specified + * as @NULL, an empty MSI message will be inited. + * @return: virq (>=0) when success, errno (<0) when failed. + */ +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); +int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, + PCIDevice *dev); +void kvm_irqchip_commit_routes(KVMState *s); +void kvm_irqchip_release_virq(KVMState *s, int virq); + +int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); +int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint); + +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq); +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq); +int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, + EventNotifier *rn, qemu_irq irq); +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq); +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi); +void kvm_pc_setup_irq_routing(bool pci_enabled); +void kvm_init_irq_routing(KVMState *s); + +bool kvm_kernel_irqchip_allowed(void); +bool kvm_kernel_irqchip_required(void); +bool kvm_kernel_irqchip_split(void); + +/** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer + * + * Allow architectures to create an in-kernel irq chip themselves. + * + * Returns: < 0: error + * 0: irq chip was not created + * > 0: irq chip was created + */ +int kvm_arch_irqchip_create(KVMState *s); + +/** + * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl + * @id: The register ID + * @source: The pointer to the value to be set. It must point to a variable + * of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); + +/** + * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl + * @id: The register ID + * @target: The pointer where the value is to be stored. It must point to a + * variable of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); +struct ppc_radix_page_info *kvm_get_radix_page_info(void); +int kvm_get_max_memslots(void); + +/* Notify resamplefd for EOI of specific interrupts. */ +void kvm_resample_fd_notify(int gsi); + +#endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h new file mode 100644 index 000000000..65740806d --- /dev/null +++ b/include/sysemu/kvm_int.h @@ -0,0 +1,52 @@ +/* + * Internal definitions for a target's KVM support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_INT_H +#define QEMU_KVM_INT_H + +#include "exec/memory.h" +#include "sysemu/accel.h" +#include "sysemu/kvm.h" + +typedef struct KVMSlot +{ + hwaddr start_addr; + ram_addr_t memory_size; + void *ram; + int slot; + int flags; + int old_flags; + /* Dirty bitmap cache for the slot */ + unsigned long *dirty_bmap; +} KVMSlot; + +typedef struct KVMMemoryListener { + MemoryListener listener; + /* Protects the slots and all inside them */ + QemuMutex slots_lock; + KVMSlot *slots; + int as_id; +} KVMMemoryListener; + +void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id); + +void kvm_set_max_memslot_size(hwaddr max_slot_size); + +/** + * kvm_hwpoison_page_add: + * + * Parameters: + * @ram_addr: the address in the RAM for the poisoned page + * + * Add a poisoned page to the list + * + * Return: None. + */ +void kvm_hwpoison_page_add(ram_addr_t ram_addr); +#endif diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h new file mode 100644 index 000000000..4b20f1a63 --- /dev/null +++ b/include/sysemu/memory_mapping.h @@ -0,0 +1,86 @@ +/* + * QEMU memory mapping + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_MAPPING_H +#define MEMORY_MAPPING_H + +#include "qemu/queue.h" +#include "exec/cpu-defs.h" +#include "exec/memory.h" + +typedef struct GuestPhysBlock { + /* visible to guest, reflects PCI hole, etc */ + hwaddr target_start; + + /* implies size */ + hwaddr target_end; + + /* points into host memory */ + uint8_t *host_addr; + + /* points to the MemoryRegion that this block belongs to */ + MemoryRegion *mr; + + QTAILQ_ENTRY(GuestPhysBlock) next; +} GuestPhysBlock; + +/* point-in-time snapshot of guest-visible physical mappings */ +typedef struct GuestPhysBlockList { + unsigned num; + QTAILQ_HEAD(, GuestPhysBlock) head; +} GuestPhysBlockList; + +/* The physical and virtual address in the memory mapping are contiguous. */ +typedef struct MemoryMapping { + hwaddr phys_addr; + target_ulong virt_addr; + ram_addr_t length; + QTAILQ_ENTRY(MemoryMapping) next; +} MemoryMapping; + +struct MemoryMappingList { + unsigned int num; + MemoryMapping *last_mapping; + QTAILQ_HEAD(, MemoryMapping) head; +}; + +/* + * add or merge the memory region [phys_addr, phys_addr + length) into the + * memory mapping's list. The region's virtual address starts with virt_addr, + * and is contiguous. The list is sorted by phys_addr. + */ +void memory_mapping_list_add_merge_sorted(MemoryMappingList *list, + hwaddr phys_addr, + hwaddr virt_addr, + ram_addr_t length); + +void memory_mapping_list_free(MemoryMappingList *list); + +void memory_mapping_list_init(MemoryMappingList *list); + +void guest_phys_blocks_free(GuestPhysBlockList *list); +void guest_phys_blocks_init(GuestPhysBlockList *list); +void guest_phys_blocks_append(GuestPhysBlockList *list); + +void qemu_get_guest_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks, + Error **errp); + +/* get guest's memory mapping without do paging(virtual address is 0). */ +void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks); + +void memory_mapping_filter(MemoryMappingList *list, int64_t begin, + int64_t length); + +#endif diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h new file mode 100644 index 000000000..4173ef2af --- /dev/null +++ b/include/sysemu/numa.h @@ -0,0 +1,113 @@ +#ifndef SYSEMU_NUMA_H +#define SYSEMU_NUMA_H + +#include "qemu/bitmap.h" +#include "qapi/qapi-types-machine.h" +#include "exec/cpu-common.h" + +struct CPUArchId; + +#define MAX_NODES 128 +#define NUMA_NODE_UNASSIGNED MAX_NODES +#define NUMA_DISTANCE_MIN 10 +#define NUMA_DISTANCE_DEFAULT 20 +#define NUMA_DISTANCE_MAX 254 +#define NUMA_DISTANCE_UNREACHABLE 255 + +/* the value of AcpiHmatLBInfo flags */ +enum { + HMAT_LB_MEM_MEMORY = 0, + HMAT_LB_MEM_CACHE_1ST_LEVEL = 1, + HMAT_LB_MEM_CACHE_2ND_LEVEL = 2, + HMAT_LB_MEM_CACHE_3RD_LEVEL = 3, + HMAT_LB_LEVELS /* must be the last entry */ +}; + +/* the value of AcpiHmatLBInfo data type */ +enum { + HMAT_LB_DATA_ACCESS_LATENCY = 0, + HMAT_LB_DATA_READ_LATENCY = 1, + HMAT_LB_DATA_WRITE_LATENCY = 2, + HMAT_LB_DATA_ACCESS_BANDWIDTH = 3, + HMAT_LB_DATA_READ_BANDWIDTH = 4, + HMAT_LB_DATA_WRITE_BANDWIDTH = 5, + HMAT_LB_TYPES /* must be the last entry */ +}; + +#define UINT16_BITS 16 + +struct NodeInfo { + uint64_t node_mem; + struct HostMemoryBackend *node_memdev; + bool present; + bool has_cpu; + uint8_t lb_info_provided; + uint16_t initiator; + uint8_t distance[MAX_NODES]; +}; + +struct NumaNodeMem { + uint64_t node_mem; + uint64_t node_plugged_mem; +}; + +struct HMAT_LB_Data { + uint8_t initiator; + uint8_t target; + uint64_t data; +}; +typedef struct HMAT_LB_Data HMAT_LB_Data; + +struct HMAT_LB_Info { + /* Indicates it's memory or the specified level memory side cache. */ + uint8_t hierarchy; + + /* Present the type of data, access/read/write latency or bandwidth. */ + uint8_t data_type; + + /* The range bitmap of bandwidth for calculating common base */ + uint64_t range_bitmap; + + /* The common base unit for latencies or bandwidths */ + uint64_t base; + + /* Array to store the latencies or bandwidths */ + GArray *list; +}; +typedef struct HMAT_LB_Info HMAT_LB_Info; + +struct NumaState { + /* Number of NUMA nodes */ + int num_nodes; + + /* Allow setting NUMA distance for different NUMA nodes */ + bool have_numa_distance; + + /* Detect if HMAT support is enabled. */ + bool hmat_enabled; + + /* NUMA nodes information */ + NodeInfo nodes[MAX_NODES]; + + /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ + HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; + + /* Memory Side Cache Information Structure */ + NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS]; +}; +typedef struct NumaState NumaState; + +void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); +void parse_numa_opts(MachineState *ms); +void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, + Error **errp); +void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, + Error **errp); +void numa_complete_configuration(MachineState *ms); +void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); +extern QemuOptsList qemu_numa_opts; +void numa_cpu_pre_plug(const struct CPUArchId *slot, DeviceState *dev, + Error **errp); +bool numa_uses_legacy_mem(void); + +#endif diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h new file mode 100644 index 000000000..629c8c648 --- /dev/null +++ b/include/sysemu/os-posix.h @@ -0,0 +1,95 @@ +/* + * posix specific declarations + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Jes Sorensen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OS_POSIX_H +#define QEMU_OS_POSIX_H + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SYSMACROS +#include +#endif + +void os_set_line_buffering(void); +void os_set_proc_name(const char *s); +void os_setup_signal_handling(void); +void os_daemonize(void); +void os_setup_post(void); +int os_mlock(void); + +#define closesocket(s) close(s) +#define ioctlsocket(s, r, v) ioctl(s, r, v) + +typedef struct timeval qemu_timeval; +#define qemu_gettimeofday(tp) gettimeofday(tp, NULL) + +bool is_daemonized(void); + +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + +/* POSIX and Mingw32 differ in the name of the stdio lock functions. */ + +static inline void qemu_flockfile(FILE *f) +{ + flockfile(f); +} + +static inline void qemu_funlockfile(FILE *f) +{ + funlockfile(f); +} + +#endif diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h new file mode 100644 index 000000000..5346d51e8 --- /dev/null +++ b/include/sysemu/os-win32.h @@ -0,0 +1,197 @@ +/* + * win32 specific declarations + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Jes Sorensen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OS_WIN32_H +#define QEMU_OS_WIN32_H + +#include +#include +#include + +#if defined(_WIN64) +/* On w64, setjmp is implemented by _setjmp which needs a second parameter. + * If this parameter is NULL, longjump does no stack unwinding. + * That is what we need for QEMU. Passing the value of register rsp (default) + * lets longjmp try a stack unwinding which will crash with generated code. */ +# undef setjmp +# define setjmp(env) _setjmp(env, NULL) +#endif +/* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify + * "longjmp and don't touch the signal masks". Since we know that the + * savemask parameter will always be zero we can safely define these + * in terms of setjmp/longjmp on Win32. + */ +#define sigjmp_buf jmp_buf +#define sigsetjmp(env, savemask) setjmp(env) +#define siglongjmp(env, val) longjmp(env, val) + +/* Missing POSIX functions. Don't use MinGW-w64 macros. */ +#ifndef _POSIX_THREAD_SAFE_FUNCTIONS +#undef gmtime_r +struct tm *gmtime_r(const time_t *timep, struct tm *result); +#undef localtime_r +struct tm *localtime_r(const time_t *timep, struct tm *result); +#endif /* _POSIX_THREAD_SAFE_FUNCTIONS */ + +static inline void os_setup_signal_handling(void) {} +static inline void os_daemonize(void) {} +static inline void os_setup_post(void) {} +void os_set_line_buffering(void); +static inline void os_set_proc_name(const char *dummy) {} + +int getpagesize(void); + +#if !defined(EPROTONOSUPPORT) +# define EPROTONOSUPPORT EINVAL +#endif + +typedef struct { + long tv_sec; + long tv_usec; +} qemu_timeval; +int qemu_gettimeofday(qemu_timeval *tp); + +static inline bool is_daemonized(void) +{ + return false; +} + +static inline int os_mlock(void) +{ + return -ENOSYS; +} + +#define fsync _commit + +#if !defined(lseek) +# define lseek _lseeki64 +#endif + +int qemu_ftruncate64(int, int64_t); + +#if !defined(ftruncate) +# define ftruncate qemu_ftruncate64 +#endif + +static inline char *realpath(const char *path, char *resolved_path) +{ + _fullpath(resolved_path, path, _MAX_PATH); + return resolved_path; +} + +/* ??? Mingw appears to export _lock_file and _unlock_file as the functions + * with which to lock a stdio handle. But something is wrong in the markup, + * either in the header or the library, such that we get undefined references + * to "_imp___lock_file" etc when linking. Since we seem to have no other + * alternative, and the usage within the logging functions isn't critical, + * ignore FILE locking. + */ + +static inline void qemu_flockfile(FILE *f) +{ +} + +static inline void qemu_funlockfile(FILE *f) +{ +} + +/* We wrap all the sockets functions so that we can + * set errno based on WSAGetLastError() + */ + +#undef connect +#define connect qemu_connect_wrap +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef listen +#define listen qemu_listen_wrap +int qemu_listen_wrap(int sockfd, int backlog); + +#undef bind +#define bind qemu_bind_wrap +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef socket +#define socket qemu_socket_wrap +int qemu_socket_wrap(int domain, int type, int protocol); + +#undef accept +#define accept qemu_accept_wrap +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef shutdown +#define shutdown qemu_shutdown_wrap +int qemu_shutdown_wrap(int sockfd, int how); + +#undef ioctlsocket +#define ioctlsocket qemu_ioctlsocket_wrap +int qemu_ioctlsocket_wrap(int fd, int req, void *val); + +#undef closesocket +#define closesocket qemu_closesocket_wrap +int qemu_closesocket_wrap(int fd); + +#undef getsockopt +#define getsockopt qemu_getsockopt_wrap +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen); + +#undef setsockopt +#define setsockopt qemu_setsockopt_wrap +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen); + +#undef getpeername +#define getpeername qemu_getpeername_wrap +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef getsockname +#define getsockname qemu_getsockname_wrap +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef send +#define send qemu_send_wrap +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags); + +#undef sendto +#define sendto qemu_sendto_wrap +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen); + +#undef recv +#define recv qemu_recv_wrap +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags); + +#undef recvfrom +#define recvfrom qemu_recvfrom_wrap +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen); + +#endif diff --git a/include/sysemu/qtest.h b/include/sysemu/qtest.h new file mode 100644 index 000000000..4c53537ef --- /dev/null +++ b/include/sysemu/qtest.h @@ -0,0 +1,35 @@ +/* + * Test Server + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QTEST_H +#define QTEST_H + + +extern bool qtest_allowed; + +static inline bool qtest_enabled(void) +{ + return qtest_allowed; +} + +bool qtest_driver(void); + +void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp); + +void qtest_server_set_send_handler(void (*send)(void *, const char *), + void *opaque); +void qtest_server_inproc_recv(void *opaque, const char *buf); + +int64_t qtest_get_virtual_clock(void); + +#endif diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h new file mode 100644 index 000000000..56c0c17c3 --- /dev/null +++ b/include/sysemu/replay.h @@ -0,0 +1,232 @@ +#ifndef REPLAY_H +#define REPLAY_H + +/* + * replay.h + * + * Copyright (c) 2010-2015 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qapi/qapi-types-misc.h" +#include "qapi/qapi-types-run-state.h" +#include "qapi/qapi-types-replay.h" +#include "qapi/qapi-types-ui.h" +#include "block/aio.h" + +/* replay clock kinds */ +enum ReplayClockKind { + /* host_clock */ + REPLAY_CLOCK_HOST, + /* virtual_rt_clock */ + REPLAY_CLOCK_VIRTUAL_RT, + REPLAY_CLOCK_COUNT +}; +typedef enum ReplayClockKind ReplayClockKind; + +/* IDs of the checkpoints */ +enum ReplayCheckpoint { + CHECKPOINT_CLOCK_WARP_START, + CHECKPOINT_CLOCK_WARP_ACCOUNT, + CHECKPOINT_RESET_REQUESTED, + CHECKPOINT_SUSPEND_REQUESTED, + CHECKPOINT_CLOCK_VIRTUAL, + CHECKPOINT_CLOCK_HOST, + CHECKPOINT_CLOCK_VIRTUAL_RT, + CHECKPOINT_INIT, + CHECKPOINT_RESET, + CHECKPOINT_COUNT +}; +typedef enum ReplayCheckpoint ReplayCheckpoint; + +typedef struct ReplayNetState ReplayNetState; + +extern ReplayMode replay_mode; + +/* Name of the initial VM snapshot */ +extern char *replay_snapshot; + +/* Replay locking + * + * The locks are needed to protect the shared structures and log file + * when doing record/replay. They also are the main sync-point between + * the main-loop thread and the vCPU thread. This was a role + * previously filled by the BQL which has been busy trying to reduce + * its impact across the code. This ensures blocks of events stay + * sequential and reproducible. + */ + +void replay_mutex_lock(void); +void replay_mutex_unlock(void); + +/* Replay process control functions */ + +/*! Enables recording or saving event log with specified parameters */ +void replay_configure(struct QemuOpts *opts); +/*! Initializes timers used for snapshotting and enables events recording */ +void replay_start(void); +/*! Closes replay log file and frees other resources. */ +void replay_finish(void); +/*! Adds replay blocker with the specified error description */ +void replay_add_blocker(Error *reason); +/* Returns name of the replay log file */ +const char *replay_get_filename(void); +/* + * Start making one step in backward direction. + * Used by gdbstub for backwards debugging. + * Returns true on success. + */ +bool replay_reverse_step(void); +/* + * Start searching the last breakpoint/watchpoint. + * Used by gdbstub for backwards debugging. + * Returns true if the process successfully started. + */ +bool replay_reverse_continue(void); +/* + * Returns true if replay module is processing + * reverse_continue or reverse_step request + */ +bool replay_running_debug(void); +/* Called in reverse debugging mode to collect breakpoint information */ +void replay_breakpoint(void); +/* Called when gdb is attached to gdbstub */ +void replay_gdb_attached(void); + +/* Processing the instructions */ + +/*! Returns number of executed instructions. */ +uint64_t replay_get_current_icount(void); +/*! Returns number of instructions to execute in replay mode. */ +int replay_get_instructions(void); +/*! Updates instructions counter in replay mode. */ +void replay_account_executed_instructions(void); + +/* Interrupts and exceptions */ + +/*! Called by exception handler to write or read + exception processing events. */ +bool replay_exception(void); +/*! Used to determine that exception is pending. + Does not proceed to the next event in the log. */ +bool replay_has_exception(void); +/*! Called by interrupt handlers to write or read + interrupt processing events. + \return true if interrupt should be processed */ +bool replay_interrupt(void); +/*! Tries to read interrupt event from the file. + Returns true, when interrupt request is pending */ +bool replay_has_interrupt(void); + +/* Processing clocks and other time sources */ + +/*! Save the specified clock */ +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, + int64_t raw_icount); +/*! Read the specified clock from the log or return cached data */ +int64_t replay_read_clock(ReplayClockKind kind); +/*! Saves or reads the clock depending on the current replay mode. */ +#define REPLAY_CLOCK(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), icount_get_raw()) \ + : (value)) +#define REPLAY_CLOCK_LOCKED(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), icount_get_raw_locked()) \ + : (value)) + +/* Processing data from random generators */ + +/* Saves the values from the random number generator */ +void replay_save_random(int ret, void *buf, size_t len); +/* Loads the saved values for the random number generator */ +int replay_read_random(void *buf, size_t len); + +/* Events */ + +/*! Called when qemu shutdown is requested. */ +void replay_shutdown_request(ShutdownCause cause); +/*! Should be called at check points in the execution. + These check points are skipped, if they were not met. + Saves checkpoint in the SAVE mode and validates in the PLAY mode. + Returns 0 in PLAY mode if checkpoint was not found. + Returns 1 in all other cases. */ +bool replay_checkpoint(ReplayCheckpoint checkpoint); +/*! Used to determine that checkpoint is pending. + Does not proceed to the next event in the log. */ +bool replay_has_checkpoint(void); + +/* Asynchronous events queue */ + +/*! Disables storing events in the queue */ +void replay_disable_events(void); +/*! Enables storing events in the queue */ +void replay_enable_events(void); +/*! Returns true when saving events is enabled */ +bool replay_events_enabled(void); +/* Flushes events queue */ +void replay_flush_events(void); +/*! Adds bottom half event to the queue */ +void replay_bh_schedule_event(QEMUBH *bh); +/* Adds oneshot bottom half event to the queue */ +void replay_bh_schedule_oneshot_event(AioContext *ctx, + QEMUBHFunc *cb, void *opaque); +/*! Adds input event to the queue */ +void replay_input_event(QemuConsole *src, InputEvent *evt); +/*! Adds input sync event to the queue */ +void replay_input_sync_event(void); +/*! Adds block layer event to the queue */ +void replay_block_event(QEMUBH *bh, uint64_t id); +/*! Returns ID for the next block event */ +uint64_t blkreplay_next_id(void); + +/* Character device */ + +/*! Registers char driver to save it's events */ +void replay_register_char_driver(struct Chardev *chr); +/*! Saves write to char device event to the log */ +void replay_chr_be_write(struct Chardev *s, uint8_t *buf, int len); +/*! Writes char write return value to the replay log. */ +void replay_char_write_event_save(int res, int offset); +/*! Reads char write return value from the replay log. */ +void replay_char_write_event_load(int *res, int *offset); +/*! Reads information about read_all character event. */ +int replay_char_read_all_load(uint8_t *buf); +/*! Writes character read_all error code into the replay log. */ +void replay_char_read_all_save_error(int res); +/*! Writes character read_all execution result into the replay log. */ +void replay_char_read_all_save_buf(uint8_t *buf, int offset); + +/* Network */ + +/*! Registers replay network filter attached to some backend. */ +ReplayNetState *replay_register_net(NetFilterState *nfs); +/*! Unregisters replay network filter. */ +void replay_unregister_net(ReplayNetState *rns); +/*! Called to write network packet to the replay log. */ +void replay_net_packet_event(ReplayNetState *rns, unsigned flags, + const struct iovec *iov, int iovcnt); + +/* Audio */ + +/*! Saves/restores number of played samples of audio out operation. */ +void replay_audio_out(size_t *played); +/*! Saves/restores recorded samples of audio in operation. */ +void replay_audio_in(size_t *recorded, void *samples, size_t *wpos, size_t size); + +/* VM state operations */ + +/*! Called at the start of execution. + Loads or saves initial vmstate depending on execution mode. */ +void replay_vmstate_init(void); +/*! Called to ensure that replay state is consistent and VM snapshot + can be created */ +bool replay_can_snapshot(void); + +#endif diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h new file mode 100644 index 000000000..0b0d6d759 --- /dev/null +++ b/include/sysemu/reset.h @@ -0,0 +1,10 @@ +#ifndef QEMU_SYSEMU_RESET_H +#define QEMU_SYSEMU_RESET_H + +typedef void QEMUResetHandler(void *opaque); + +void qemu_register_reset(QEMUResetHandler *func, void *opaque); +void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); +void qemu_devices_reset(void); + +#endif diff --git a/include/sysemu/rng-random.h b/include/sysemu/rng-random.h new file mode 100644 index 000000000..0fdc6c697 --- /dev/null +++ b/include/sysemu/rng-random.h @@ -0,0 +1,21 @@ +/* + * QEMU Random Number Generator Backend + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_RNG_RANDOM_H +#define QEMU_RNG_RANDOM_H + +#include "qom/object.h" + +#define TYPE_RNG_RANDOM "rng-random" +OBJECT_DECLARE_SIMPLE_TYPE(RngRandom, RNG_RANDOM) + + +#endif diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h new file mode 100644 index 000000000..e383f87d2 --- /dev/null +++ b/include/sysemu/rng.h @@ -0,0 +1,89 @@ +/* + * QEMU Random Number Generator Backend + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_RNG_H +#define QEMU_RNG_H + +#include "qemu/queue.h" +#include "qom/object.h" + +#define TYPE_RNG_BACKEND "rng-backend" +OBJECT_DECLARE_TYPE(RngBackend, RngBackendClass, + RNG_BACKEND) + +#define TYPE_RNG_BUILTIN "rng-builtin" + +typedef struct RngRequest RngRequest; + +typedef void (EntropyReceiveFunc)(void *opaque, + const void *data, + size_t size); + +struct RngRequest +{ + EntropyReceiveFunc *receive_entropy; + uint8_t *data; + void *opaque; + size_t offset; + size_t size; + QSIMPLEQ_ENTRY(RngRequest) next; +}; + +struct RngBackendClass +{ + ObjectClass parent_class; + + void (*request_entropy)(RngBackend *s, RngRequest *req); + + void (*opened)(RngBackend *s, Error **errp); +}; + +struct RngBackend +{ + Object parent; + + /*< protected >*/ + bool opened; + QSIMPLEQ_HEAD(, RngRequest) requests; +}; + + +/** + * rng_backend_request_entropy: + * @s: the backend to request entropy from + * @size: the number of bytes of data to request + * @receive_entropy: a function to be invoked when entropy is available + * @opaque: data that should be passed to @receive_entropy + * + * This function is used by the front-end to request entropy from an entropy + * source. This function can be called multiple times before @receive_entropy + * is invoked with different values of @receive_entropy and @opaque. The + * backend will queue each request and handle appropriately. + * + * The backend does not need to pass the full amount of data to @receive_entropy + * but will pass a value greater than 0. + */ +void rng_backend_request_entropy(RngBackend *s, size_t size, + EntropyReceiveFunc *receive_entropy, + void *opaque); + +/** + * rng_backend_free_request: + * @s: the backend that created the request + * @req: the request to finalize + * + * Used by child rng backend classes to finalize requests once they've been + * processed. The request is removed from the list of active requests and + * deleted. + */ +void rng_backend_finalize_request(RngBackend *s, RngRequest *req); +#endif diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h new file mode 100644 index 000000000..f76009485 --- /dev/null +++ b/include/sysemu/runstate.h @@ -0,0 +1,69 @@ +#ifndef SYSEMU_RUNSTATE_H +#define SYSEMU_RUNSTATE_H + +#include "qapi/qapi-types-run-state.h" +#include "qemu/notify.h" + +bool runstate_check(RunState state); +void runstate_set(RunState new_state); +int runstate_is_running(void); +bool runstate_needs_reset(void); +bool runstate_store(char *str, size_t size); + +typedef void VMChangeStateHandler(void *opaque, int running, RunState state); + +VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, + void *opaque); +VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( + VMChangeStateHandler *cb, void *opaque, int priority); +VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, + VMChangeStateHandler *cb, + void *opaque); +void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); +void vm_state_notify(int running, RunState state); + +static inline bool shutdown_caused_by_guest(ShutdownCause cause) +{ + return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN; +} + +void vm_start(void); +int vm_prepare_start(void); +int vm_stop(RunState state); +int vm_stop_force_state(RunState state); +int vm_shutdown(void); + +typedef enum WakeupReason { + /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ + QEMU_WAKEUP_REASON_NONE = 0, + QEMU_WAKEUP_REASON_RTC, + QEMU_WAKEUP_REASON_PMTIMER, + QEMU_WAKEUP_REASON_OTHER, +} WakeupReason; + +void qemu_exit_preconfig_request(void); +void qemu_system_reset_request(ShutdownCause reason); +void qemu_system_suspend_request(void); +void qemu_register_suspend_notifier(Notifier *notifier); +bool qemu_wakeup_suspend_enabled(void); +void qemu_system_wakeup_request(WakeupReason reason, Error **errp); +void qemu_system_wakeup_enable(WakeupReason reason, bool enabled); +void qemu_register_wakeup_notifier(Notifier *notifier); +void qemu_register_wakeup_support(void); +void qemu_system_shutdown_request(ShutdownCause reason); +void qemu_system_powerdown_request(void); +void qemu_register_powerdown_notifier(Notifier *notifier); +void qemu_register_shutdown_notifier(Notifier *notifier); +void qemu_system_debug_request(void); +void qemu_system_vmstop_request(RunState reason); +void qemu_system_vmstop_request_prepare(void); +bool qemu_vmstop_requested(RunState *r); +ShutdownCause qemu_shutdown_requested_get(void); +ShutdownCause qemu_reset_requested_get(void); +void qemu_system_killed(int signal, pid_t pid); +void qemu_system_reset(ShutdownCause reason); +void qemu_system_guest_panicked(GuestPanicInformation *info); +void qemu_system_guest_crashloaded(GuestPanicInformation *info); + +#endif + diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h new file mode 100644 index 000000000..fe859894f --- /dev/null +++ b/include/sysemu/seccomp.h @@ -0,0 +1,26 @@ +/* + * QEMU seccomp mode 2 support with libseccomp + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Eduardo Otubo + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#ifndef QEMU_SECCOMP_H +#define QEMU_SECCOMP_H + +#define QEMU_SECCOMP_SET_DEFAULT (1 << 0) +#define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) +#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) +#define QEMU_SECCOMP_SET_SPAWN (1 << 3) +#define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4) + +int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp); + +#endif diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h new file mode 100644 index 000000000..98c1ec8d3 --- /dev/null +++ b/include/sysemu/sev.h @@ -0,0 +1,21 @@ +/* + * QEMU Secure Encrypted Virutualization (SEV) support + * + * Copyright: Advanced Micro Devices, 2016-2018 + * + * Authors: + * Brijesh Singh + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_SEV_H +#define QEMU_SEV_H + +#include "sysemu/kvm.h" + +void *sev_guest_init(const char *id); +int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len); +#endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h new file mode 100644 index 000000000..817ff4cf7 --- /dev/null +++ b/include/sysemu/sysemu.h @@ -0,0 +1,130 @@ +#ifndef SYSEMU_H +#define SYSEMU_H +/* Misc. things related to the system emulator. */ + +#include "qemu/timer.h" +#include "qemu/notify.h" +#include "qemu/uuid.h" + +/* vl.c */ + +extern const char *bios_name; +extern int only_migratable; +extern const char *qemu_name; +extern QemuUUID qemu_uuid; +extern bool qemu_uuid_set; + +void qemu_add_data_dir(char *path); + +void qemu_add_exit_notifier(Notifier *notify); +void qemu_remove_exit_notifier(Notifier *notify); + +extern bool machine_init_done; + +void qemu_add_machine_init_done_notifier(Notifier *notify); +void qemu_remove_machine_init_done_notifier(Notifier *notify); + +extern int autostart; + +typedef enum { + VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL, + VGA_TCX, VGA_CG3, VGA_DEVICE, VGA_VIRTIO, + VGA_TYPE_MAX, +} VGAInterfaceType; + +extern int vga_interface_type; + +extern int graphic_width; +extern int graphic_height; +extern int graphic_depth; +extern int display_opengl; +extern const char *keyboard_layout; +extern int win2k_install_hack; +extern int alt_grab; +extern int ctrl_grab; +extern int graphic_rotate; +extern int no_shutdown; +extern int old_param; +extern int boot_menu; +extern bool boot_strict; +extern uint8_t *boot_splash_filedata; +extern bool enable_mlock; +extern bool enable_cpu_pm; +extern QEMUClockType rtc_clock; + +#define MAX_OPTION_ROMS 16 +typedef struct QEMUOptionRom { + const char *name; + int32_t bootindex; +} QEMUOptionRom; +extern QEMUOptionRom option_rom[MAX_OPTION_ROMS]; +extern int nb_option_roms; + +#define MAX_PROM_ENVS 128 +extern const char *prom_envs[MAX_PROM_ENVS]; +extern unsigned int nb_prom_envs; + +/* pcie aer error injection */ +void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict); + +/* serial ports */ + +/* Return the Chardev for serial port i, or NULL if none */ +Chardev *serial_hd(int i); +/* return the number of serial ports defined by the user. serial_hd(i) + * will always return NULL for any i which is greater than or equal to this. + */ +int serial_max_hds(void); + +/* parallel ports */ + +#define MAX_PARALLEL_PORTS 3 + +extern Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + +void hmp_info_usb(Monitor *mon, const QDict *qdict); + +void add_boot_device_path(int32_t bootindex, DeviceState *dev, + const char *suffix); +char *get_boot_devices_list(size_t *size); + +DeviceState *get_boot_device(uint32_t position); +void check_boot_index(int32_t bootindex, Error **errp); +void del_boot_device_path(DeviceState *dev, const char *suffix); +void device_add_bootindex_property(Object *obj, int32_t *bootindex, + const char *name, const char *suffix, + DeviceState *dev); +void restore_boot_order(void *opaque); +void validate_bootdevices(const char *devices, Error **errp); +void add_boot_device_lchs(DeviceState *dev, const char *suffix, + uint32_t lcyls, uint32_t lheads, uint32_t lsecs); +void del_boot_device_lchs(DeviceState *dev, const char *suffix); +char *get_boot_devices_lchs_list(size_t *size); + +/* handler to set the boot_device order for a specific type of MachineClass */ +typedef void QEMUBootSetHandler(void *opaque, const char *boot_order, + Error **errp); +void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque); +void qemu_boot_set(const char *boot_order, Error **errp); + +QemuOpts *qemu_get_machine_opts(void); + +bool defaults_enabled(void); + +void qemu_init(int argc, char **argv, char **envp); +void qemu_main_loop(void); +void qemu_cleanup(void); + +extern QemuOptsList qemu_legacy_drive_opts; +extern QemuOptsList qemu_common_drive_opts; +extern QemuOptsList qemu_drive_opts; +extern QemuOptsList bdrv_runtime_opts; +extern QemuOptsList qemu_chardev_opts; +extern QemuOptsList qemu_device_opts; +extern QemuOptsList qemu_netdev_opts; +extern QemuOptsList qemu_nic_opts; +extern QemuOptsList qemu_net_opts; +extern QemuOptsList qemu_global_opts; +extern QemuOptsList qemu_semihosting_config_opts; + +#endif diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h new file mode 100644 index 000000000..d9d3ca855 --- /dev/null +++ b/include/sysemu/tcg.h @@ -0,0 +1,19 @@ +/* + * QEMU TCG support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_TCG_H +#define SYSEMU_TCG_H + +void tcg_exec_init(unsigned long tb_size); +#ifdef CONFIG_TCG +extern bool tcg_allowed; +#define tcg_enabled() (tcg_allowed) +#else +#define tcg_enabled() 0 +#endif + +#endif diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h new file mode 100644 index 000000000..1a85564e4 --- /dev/null +++ b/include/sysemu/tpm.h @@ -0,0 +1,76 @@ +/* + * Public TPM functions + * + * Copyright (C) 2011-2013 IBM Corporation + * + * Authors: + * Stefan Berger + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_TPM_H +#define QEMU_TPM_H + +#include "qapi/qapi-types-tpm.h" +#include "qom/object.h" + +int tpm_config_parse(QemuOptsList *opts_list, const char *optarg); +int tpm_init(void); +void tpm_cleanup(void); + +typedef enum TPMVersion { + TPM_VERSION_UNSPEC = 0, + TPM_VERSION_1_2 = 1, + TPM_VERSION_2_0 = 2, +} TPMVersion; + +#define TYPE_TPM_IF "tpm-if" +typedef struct TPMIfClass TPMIfClass; +DECLARE_CLASS_CHECKERS(TPMIfClass, TPM_IF, + TYPE_TPM_IF) +#define TPM_IF(obj) \ + INTERFACE_CHECK(TPMIf, (obj), TYPE_TPM_IF) + +typedef struct TPMIf TPMIf; + +struct TPMIfClass { + InterfaceClass parent_class; + + enum TpmModel model; + void (*request_completed)(TPMIf *obj, int ret); + enum TPMVersion (*get_version)(TPMIf *obj); +}; + +#define TYPE_TPM_TIS_ISA "tpm-tis" +#define TYPE_TPM_TIS_SYSBUS "tpm-tis-device" +#define TYPE_TPM_CRB "tpm-crb" +#define TYPE_TPM_SPAPR "tpm-spapr" + +#define TPM_IS_TIS_ISA(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA) +#define TPM_IS_TIS_SYSBUS(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS) +#define TPM_IS_CRB(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) +#define TPM_IS_SPAPR(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR) + +/* returns NULL unless there is exactly one TPM device */ +static inline TPMIf *tpm_find(void) +{ + Object *obj = object_resolve_path_type("", TYPE_TPM_IF, NULL); + + return TPM_IF(obj); +} + +static inline TPMVersion tpm_get_version(TPMIf *ti) +{ + if (!ti) { + return TPM_VERSION_UNSPEC; + } + + return TPM_IF_GET_CLASS(ti)->get_version(ti); +} + +#endif /* QEMU_TPM_H */ diff --git a/include/sysemu/tpm_backend.h b/include/sysemu/tpm_backend.h new file mode 100644 index 000000000..6f078f5f4 --- /dev/null +++ b/include/sysemu/tpm_backend.h @@ -0,0 +1,212 @@ +/* + * QEMU TPM Backend + * + * Copyright IBM, Corp. 2013 + * + * Authors: + * Stefan Berger + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TPM_BACKEND_H +#define TPM_BACKEND_H + +#include "qom/object.h" +#include "qemu/option.h" +#include "sysemu/tpm.h" +#include "qapi/error.h" + +#define TYPE_TPM_BACKEND "tpm-backend" +OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass, + TPM_BACKEND) + + +typedef struct TPMBackendCmd { + uint8_t locty; + const uint8_t *in; + uint32_t in_len; + uint8_t *out; + uint32_t out_len; + bool selftest_done; +} TPMBackendCmd; + +struct TPMBackend { + Object parent; + + /*< protected >*/ + TPMIf *tpmif; + bool opened; + bool had_startup_error; + TPMBackendCmd *cmd; + + /* */ + char *id; + + QLIST_ENTRY(TPMBackend) list; +}; + +struct TPMBackendClass { + ObjectClass parent_class; + + enum TpmType type; + const QemuOptDesc *opts; + /* get a descriptive text of the backend to display to the user */ + const char *desc; + + TPMBackend *(*create)(QemuOpts *opts); + + /* start up the TPM on the backend - optional */ + int (*startup_tpm)(TPMBackend *t, size_t buffersize); + + /* optional */ + void (*reset)(TPMBackend *t); + + void (*cancel_cmd)(TPMBackend *t); + + /* optional */ + bool (*get_tpm_established_flag)(TPMBackend *t); + + /* optional */ + int (*reset_tpm_established_flag)(TPMBackend *t, uint8_t locty); + + TPMVersion (*get_tpm_version)(TPMBackend *t); + + size_t (*get_buffer_size)(TPMBackend *t); + + TpmTypeOptions *(*get_tpm_options)(TPMBackend *t); + + void (*handle_request)(TPMBackend *s, TPMBackendCmd *cmd, Error **errp); +}; + +/** + * tpm_backend_get_type: + * @s: the backend + * + * Returns the TpmType of the backend. + */ +enum TpmType tpm_backend_get_type(TPMBackend *s); + +/** + * tpm_backend_init: + * @s: the backend to initialized + * @tpmif: TPM interface + * @datacb: callback for sending data to frontend + * @errp: a pointer to return the #Error object if an error occurs. + * + * Initialize the backend with the given variables. + * + * Returns 0 on success. + */ +int tpm_backend_init(TPMBackend *s, TPMIf *tpmif, Error **errp); + +/** + * tpm_backend_startup_tpm: + * @s: the backend whose TPM support is to be started + * @buffersize: the buffer size the TPM is supposed to use, + * 0 to leave it as-is + * + * Returns 0 on success. + */ +int tpm_backend_startup_tpm(TPMBackend *s, size_t buffersize); + +/** + * tpm_backend_had_startup_error: + * @s: the backend to query for a statup error + * + * Check whether the backend had an error during startup. Returns + * false if no error occurred and the backend can be used, true + * otherwise. + */ +bool tpm_backend_had_startup_error(TPMBackend *s); + +/** + * tpm_backend_deliver_request: + * @s: the backend to send the request to + * @cmd: the command to deliver + * + * Send a request to the backend. The backend will then send the request + * to the TPM implementation. + */ +void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd); + +/** + * tpm_backend_reset: + * @s: the backend to reset + * + * Reset the backend into a well defined state with all previous errors + * reset. + */ +void tpm_backend_reset(TPMBackend *s); + +/** + * tpm_backend_cancel_cmd: + * @s: the backend + * + * Cancel any ongoing command being processed by the TPM implementation + * on behalf of the QEMU guest. + */ +void tpm_backend_cancel_cmd(TPMBackend *s); + +/** + * tpm_backend_get_tpm_established_flag: + * @s: the backend + * + * Get the TPM establishment flag. This function may be called very + * frequently by the frontend since for example in the TIS implementation + * this flag is part of a register. + */ +bool tpm_backend_get_tpm_established_flag(TPMBackend *s); + +/** + * tpm_backend_reset_tpm_established_flag: + * @s: the backend + * @locty: the locality number + * + * Reset the TPM establishment flag. + */ +int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty); + +/** + * tpm_backend_get_tpm_version: + * @s: the backend to call into + * + * Get the TPM Version that is emulated at the backend. + * + * Returns TPMVersion. + */ +TPMVersion tpm_backend_get_tpm_version(TPMBackend *s); + +/** + * tpm_backend_get_buffer_size: + * @s: the backend to call into + * + * Get the TPM's buffer size. + * + * Returns buffer size. + */ +size_t tpm_backend_get_buffer_size(TPMBackend *s); + +/** + * tpm_backend_finish_sync: + * @s: the backend to call into + * + * Finish the pending command synchronously (this will call aio_poll() + * on qemu main AIOContext until it ends) + */ +void tpm_backend_finish_sync(TPMBackend *s); + +/** + * tpm_backend_query_tpm: + * @s: the backend + * + * Query backend tpm info + * + * Returns newly allocated TPMInfo + */ +TPMInfo *tpm_backend_query_tpm(TPMBackend *s); + +TPMBackend *qemu_find_tpm_be(const char *id); + +#endif diff --git a/include/sysemu/tpm_util.h b/include/sysemu/tpm_util.h new file mode 100644 index 000000000..08f05172a --- /dev/null +++ b/include/sysemu/tpm_util.h @@ -0,0 +1,72 @@ +/* + * TPM utility functions + * + * Copyright (c) 2010 - 2015 IBM Corporation + * Authors: + * Stefan Berger + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ + +#ifndef SYSEMU_TPM_UTIL_H +#define SYSEMU_TPM_UTIL_H + +#include "sysemu/tpm.h" +#include "qemu/bswap.h" + +void tpm_util_write_fatal_error_response(uint8_t *out, uint32_t out_len); + +bool tpm_util_is_selftest(const uint8_t *in, uint32_t in_len); + +int tpm_util_test_tpmdev(int tpm_fd, TPMVersion *tpm_version); + +static inline uint16_t tpm_cmd_get_tag(const void *b) +{ + return lduw_be_p(b); +} + +static inline void tpm_cmd_set_tag(void *b, uint16_t tag) +{ + stw_be_p(b, tag); +} + +static inline uint32_t tpm_cmd_get_size(const void *b) +{ + return ldl_be_p(b + 2); +} + +static inline void tpm_cmd_set_size(void *b, uint32_t size) +{ + stl_be_p(b + 2, size); +} + +static inline uint32_t tpm_cmd_get_ordinal(const void *b) +{ + return ldl_be_p(b + 6); +} + +static inline uint32_t tpm_cmd_get_errcode(const void *b) +{ + return ldl_be_p(b + 6); +} + +static inline void tpm_cmd_set_error(void *b, uint32_t error) +{ + stl_be_p(b + 6, error); +} + +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string); + +#endif /* SYSEMU_TPM_UTIL_H */ diff --git a/include/sysemu/vhost-user-backend.h b/include/sysemu/vhost-user-backend.h new file mode 100644 index 000000000..327b0b84f --- /dev/null +++ b/include/sysemu/vhost-user-backend.h @@ -0,0 +1,48 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_VHOST_USER_BACKEND_H +#define QEMU_VHOST_USER_BACKEND_H + +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" +#include "io/channel.h" + +#define TYPE_VHOST_USER_BACKEND "vhost-user-backend" +OBJECT_DECLARE_SIMPLE_TYPE(VhostUserBackend, + VHOST_USER_BACKEND) + + + +struct VhostUserBackend { + /* private */ + Object parent; + + char *chr_name; + CharBackend chr; + VhostUserState vhost_user; + struct vhost_dev dev; + VirtIODevice *vdev; + bool started; + bool completed; +}; + +int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp); +void vhost_user_backend_start(VhostUserBackend *b); +void vhost_user_backend_stop(VhostUserBackend *b); + +#endif diff --git a/include/sysemu/watchdog.h b/include/sysemu/watchdog.h new file mode 100644 index 000000000..a08d16380 --- /dev/null +++ b/include/sysemu/watchdog.h @@ -0,0 +1,45 @@ +/* + * Virtual hardware watchdog. + * + * Copyright (C) 2009 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * By Richard W.M. Jones (rjones@redhat.com). + */ + +#ifndef QEMU_WATCHDOG_H +#define QEMU_WATCHDOG_H + +#include "qemu/queue.h" +#include "qapi/qapi-types-run-state.h" + +struct WatchdogTimerModel { + QLIST_ENTRY(WatchdogTimerModel) entry; + + /* Short name of the device - used to select it on the command line. */ + const char *wdt_name; + /* Longer description (eg. manufacturer and full model number). */ + const char *wdt_description; +}; +typedef struct WatchdogTimerModel WatchdogTimerModel; + +/* in hw/watchdog.c */ +int select_watchdog(const char *p); +int select_watchdog_action(const char *action); +WatchdogAction get_watchdog_action(void); +void watchdog_add_model(WatchdogTimerModel *model); +void watchdog_perform_action(void); + +#endif /* QEMU_WATCHDOG_H */ diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h new file mode 100644 index 000000000..59edf1374 --- /dev/null +++ b/include/sysemu/whpx.h @@ -0,0 +1,26 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) support + * + * Copyright Microsoft, Corp. 2017 + * + * Authors: + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_WHPX_H +#define QEMU_WHPX_H + +#ifdef CONFIG_WHPX + +int whpx_enabled(void); + +#else /* CONFIG_WHPX */ + +#define whpx_enabled() (0) + +#endif /* CONFIG_WHPX */ + +#endif /* QEMU_WHPX_H */ diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h new file mode 100644 index 000000000..c8e7c2f6c --- /dev/null +++ b/include/sysemu/xen-mapcache.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +#include "exec/cpu-common.h" + +typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, + ram_addr_t size); +#ifdef CONFIG_XEN + +void xen_map_cache_init(phys_offset_to_gaddr_t f, + void *opaque); +uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, + uint8_t lock, bool dma); +ram_addr_t xen_ram_addr_from_mapcache(void *ptr); +void xen_invalidate_map_cache_entry(uint8_t *buffer); +void xen_invalidate_map_cache(void); +uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size); +#else + +static inline void xen_map_cache_init(phys_offset_to_gaddr_t f, + void *opaque) +{ +} + +static inline uint8_t *xen_map_cache(hwaddr phys_addr, + hwaddr size, + uint8_t lock, + bool dma) +{ + abort(); +} + +static inline ram_addr_t xen_ram_addr_from_mapcache(void *ptr) +{ + abort(); +} + +static inline void xen_invalidate_map_cache_entry(uint8_t *buffer) +{ +} + +static inline void xen_invalidate_map_cache(void) +{ +} + +static inline uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size) +{ + abort(); +} + +#endif + +#endif /* XEN_MAPCACHE_H */ diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h new file mode 100644 index 000000000..0ca25697e --- /dev/null +++ b/include/sysemu/xen.h @@ -0,0 +1,50 @@ +/* + * QEMU Xen support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_XEN_H +#define SYSEMU_XEN_H + +#include "exec/cpu-common.h" + +#ifdef NEED_CPU_H +# ifdef CONFIG_XEN +# define CONFIG_XEN_IS_POSSIBLE +# endif +#else +# define CONFIG_XEN_IS_POSSIBLE +#endif + +#ifdef CONFIG_XEN_IS_POSSIBLE + +extern bool xen_allowed; + +#define xen_enabled() (xen_allowed) + +#ifndef CONFIG_USER_ONLY +void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + struct MemoryRegion *mr, Error **errp); +#endif + +#else /* !CONFIG_XEN_IS_POSSIBLE */ + +#define xen_enabled() 0 +#ifndef CONFIG_USER_ONLY +static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) +{ + /* nothing */ +} +static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + MemoryRegion *mr, Error **errp) +{ + g_assert_not_reached(); +} +#endif + +#endif /* CONFIG_XEN_IS_POSSIBLE */ + +#endif diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h new file mode 100644 index 000000000..704bd8645 --- /dev/null +++ b/include/tcg/tcg-gvec-desc.h @@ -0,0 +1,66 @@ +/* + * Generic vector operation descriptor + * + * Copyright (c) 2018 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef TCG_TCG_GVEC_DESC_H +#define TCG_TCG_GVEC_DESC_H + +/* + * This configuration allows MAXSZ to represent 2048 bytes, and + * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32. + * + * Encode this with: + * 0, 1, 3 -> 8, 16, 32 + * 2 -> maxsz + * + * This steals the input that would otherwise map to 24 to match maxsz. + */ +#define SIMD_MAXSZ_SHIFT 0 +#define SIMD_MAXSZ_BITS 8 + +#define SIMD_OPRSZ_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS) +#define SIMD_OPRSZ_BITS 2 + +#define SIMD_DATA_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS) +#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT) + +/* Create a descriptor from components. */ +uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data); + +/* Extract the max vector size from a descriptor. */ +static inline intptr_t simd_maxsz(uint32_t desc) +{ + return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8; +} + +/* Extract the operation size from a descriptor. */ +static inline intptr_t simd_oprsz(uint32_t desc) +{ + uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS); + intptr_t o = f * 8 + 8; + intptr_t m = simd_maxsz(desc); + return f == 2 ? m : o; +} + +/* Extract the operation-specific data from a descriptor. */ +static inline int32_t simd_data(uint32_t desc) +{ + return sextract32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS); +} + +#endif diff --git a/include/tcg/tcg-mo.h b/include/tcg/tcg-mo.h new file mode 100644 index 000000000..c2c55704e --- /dev/null +++ b/include/tcg/tcg-mo.h @@ -0,0 +1,48 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_MO_H +#define TCG_MO_H + +typedef enum { + /* Used to indicate the type of accesses on which ordering + is to be ensured. Modeled after SPARC barriers. + + This is of the form TCG_MO_A_B where A is before B in program order. + */ + TCG_MO_LD_LD = 0x01, + TCG_MO_ST_LD = 0x02, + TCG_MO_LD_ST = 0x04, + TCG_MO_ST_ST = 0x08, + TCG_MO_ALL = 0x0F, /* OR of the above */ + + /* Used to indicate the kind of ordering which is to be ensured by the + instruction. These types are derived from x86/aarch64 instructions. + It should be noted that these are different from C11 semantics. */ + TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */ + TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */ + TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */ +} TCGBar; + +#endif /* TCG_MO_H */ diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h new file mode 100644 index 000000000..c69a7de98 --- /dev/null +++ b/include/tcg/tcg-op-gvec.h @@ -0,0 +1,404 @@ +/* + * Generic vector operation expansion + * + * Copyright (c) 2018 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef TCG_TCG_OP_GVEC_H +#define TCG_TCG_OP_GVEC_H + +/* + * "Generic" vectors. All operands are given as offsets from ENV, + * and therefore cannot also be allocated via tcg_global_mem_new_*. + * OPRSZ is the byte size of the vector upon which the operation is performed. + * MAXSZ is the byte size of the full vector; bytes beyond OPSZ are cleared. + * + * All sizes must be 8 or any multiple of 16. + * When OPRSZ is 8, the alignment may be 8, otherwise must be 16. + * Operands may completely, but not partially, overlap. + */ + +/* Expand a call to a gvec-style helper, with pointers to two vector + operands, and a descriptor (see tcg-gvec-desc.h). */ +typedef void gen_helper_gvec_2(TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_2 *fn); + +/* Similarly, passing an extra data value. */ +typedef void gen_helper_gvec_2i(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); +void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_2i *fn); + +/* Similarly, passing an extra pointer (e.g. env or float_status). */ +typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs, + TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_2_ptr *fn); + +/* Similarly, with three vector operands. */ +typedef void gen_helper_gvec_3(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_3 *fn); + +/* Similarly, with four vector operands. */ +typedef void gen_helper_gvec_4(TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_4 *fn); + +/* Similarly, with five vector operands. */ +typedef void gen_helper_gvec_5(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, uint32_t xofs, uint32_t oprsz, + uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn); + +typedef void gen_helper_gvec_3_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, + TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_3_ptr *fn); + +typedef void gen_helper_gvec_4_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz, + uint32_t maxsz, int32_t data, + gen_helper_gvec_4_ptr *fn); + +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_5_ptr *fn); + +/* Expand a gvec operation. Either inline or out-of-line depending on + the actual vector size and the operations supported by the host. */ +typedef struct { + /* Expand inline as a 64-bit or 32-bit integer. + Only one of these will be non-NULL. */ + void (*fni8)(TCGv_i64, TCGv_i64); + void (*fni4)(TCGv_i32, TCGv_i32); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec); + /* Expand out-of-line helper w/descriptor. */ + gen_helper_gvec_2 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The data argument to the out-of-line helper. */ + int32_t data; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load dest as a 2nd source operand. */ + bool load_dest; +} GVecGen2; + +typedef struct { + /* Expand inline as a 64-bit or 32-bit integer. + Only one of these will be non-NULL. */ + void (*fni8)(TCGv_i64, TCGv_i64, int64_t); + void (*fni4)(TCGv_i32, TCGv_i32, int32_t); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, int64_t); + /* Expand out-of-line helper w/descriptor, data in descriptor. */ + gen_helper_gvec_2 *fno; + /* Expand out-of-line helper w/descriptor, data as argument. */ + gen_helper_gvec_2i *fnoi; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load dest as a 3rd source operand. */ + bool load_dest; +} GVecGen2i; + +typedef struct { + /* Expand inline as a 64-bit or 32-bit integer. + Only one of these will be non-NULL. */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec); + /* Expand out-of-line helper w/descriptor. */ + gen_helper_gvec_2i *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The data argument to the out-of-line helper. */ + uint32_t data; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load scalar as 1st source operand. */ + bool scalar_first; +} GVecGen2s; + +typedef struct { + /* Expand inline as a 64-bit or 32-bit integer. + Only one of these will be non-NULL. */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec); + /* Expand out-of-line helper w/descriptor. */ + gen_helper_gvec_3 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The data argument to the out-of-line helper. */ + int32_t data; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load dest as a 3rd source operand. */ + bool load_dest; +} GVecGen3; + +typedef struct { + /* + * Expand inline as a 64-bit or 32-bit integer. Only one of these will be + * non-NULL. + */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t); + /* Expand out-of-line helper w/descriptor, data in descriptor. */ + gen_helper_gvec_3 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load dest as a 3rd source operand. */ + bool load_dest; +} GVecGen3i; + +typedef struct { + /* Expand inline as a 64-bit or 32-bit integer. + Only one of these will be non-NULL. */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec); + /* Expand out-of-line helper w/descriptor. */ + gen_helper_gvec_4 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The data argument to the out-of-line helper. */ + int32_t data; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Write aofs as a 2nd dest operand. */ + bool write_aofs; +} GVecGen4; + +void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen2 *); +void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, + uint32_t maxsz, int64_t c, const GVecGen2i *); +void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, + uint32_t maxsz, TCGv_i64 c, const GVecGen2s *); +void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen3 *); +void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen3i *); +void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); + +/* Expand a specific vector operation. */ + +void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); + +/* Saturated arithmetic. */ +void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +/* Min/max. */ +void tcg_gen_gvec_smin(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_umin(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_smax(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_umax(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_nand(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_nor(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t s, uint32_t m); +void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s, + uint32_t m, uint64_t imm); +void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s, + uint32_t m, TCGv_i32); +void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s, + uint32_t m, TCGv_i64); + +#if TARGET_LONG_BITS == 64 +# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64 +#else +# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32 +#endif + +void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); + +/* + * Perform vector shift by vector element, modulo the element size. + * E.g. D[i] = A[i] << (B[i] % (8 << vece)). + */ +void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz); + +/* + * Perform vector bit select: d = (b & a) | (c & ~a). + */ +void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz); + +/* + * 64-bit vector operations. Use these when the register has been allocated + * with tcg_global_mem_new_i64, and so we cannot also address it via pointer. + * OPRSZ = MAXSZ = 8. + */ + +void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 a); +void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 a); +void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a); + +void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); +void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); + +#endif diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h new file mode 100644 index 000000000..5abf17fec --- /dev/null +++ b/include/tcg/tcg-op.h @@ -0,0 +1,1333 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_TCG_OP_H +#define TCG_TCG_OP_H + +#include "tcg/tcg.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" + +/* Basic output routines. Not for general consumption. */ + +void tcg_gen_op1(TCGOpcode, TCGArg); +void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); +void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); +void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); +void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); +void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); + +void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); +void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); +void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); + +static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) +{ + tcg_gen_op1(opc, tcgv_i32_arg(a1)); +} + +static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) +{ + tcg_gen_op1(opc, tcgv_i64_arg(a1)); +} + +static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1) +{ + tcg_gen_op1(opc, a1); +} + +static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) +{ + tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); +} + +static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) +{ + tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); +} + +static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) +{ + tcg_gen_op2(opc, tcgv_i32_arg(a1), a2); +} + +static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) +{ + tcg_gen_op2(opc, tcgv_i64_arg(a1), a2); +} + +static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) +{ + tcg_gen_op2(opc, a1, a2); +} + +static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, + TCGv_i32 a2, TCGv_i32 a3) +{ + tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); +} + +static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, + TCGv_i64 a2, TCGv_i64 a3) +{ + tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); +} + +static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, + TCGv_i32 a2, TCGArg a3) +{ + tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); +} + +static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, + TCGv_i64 a2, TCGArg a3) +{ + tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); +} + +static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) +{ + tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); +} + +static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) +{ + tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); +} + +static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4)); +} + +static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4)); +} + +static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), a4); +} + +static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), a4); +} + +static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGArg a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); +} + +static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGArg a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); +} + +static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5)); +} + +static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5)); +} + +static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5); +} + +static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5); +} + +static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), a4, a5); +} + +static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), a4, a5); +} + +static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGv_i32 a5, TCGv_i32 a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), + tcgv_i32_arg(a6)); +} + +static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, + TCGv_i64 a5, TCGv_i64 a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), + tcgv_i64_arg(a6)); +} + +static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGv_i32 a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6); +} + +static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, + TCGv_i64 a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); +} + +static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGArg a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); +} + +static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, + TCGArg a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6); +} + + +/* Generic ops. */ + +static inline void gen_set_label(TCGLabel *l) +{ + l->present = 1; + tcg_gen_op1(INDEX_op_set_label, label_arg(l)); +} + +static inline void tcg_gen_br(TCGLabel *l) +{ + l->refs++; + tcg_gen_op1(INDEX_op_br, label_arg(l)); +} + +void tcg_gen_mb(TCGBar); + +/* Helper calls. */ + +/* 32 bit ops */ + +void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2); +void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); +void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2); +void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, + unsigned int ofs, unsigned int len); +void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, + unsigned int ofs); +void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); +void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); +void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, int32_t arg2); +void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, + TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2); +void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, + TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); + +static inline void tcg_gen_discard_i32(TCGv_i32 arg) +{ + tcg_gen_op1_i32(INDEX_op_discard, arg); +} + +static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (ret != arg) { + tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); + } +} + +static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) +{ + tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); +} + +static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (TCG_TARGET_HAS_neg_i32) { + tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); + } else { + tcg_gen_subfi_i32(ret, 0, arg); + } +} + +static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (TCG_TARGET_HAS_not_i32) { + tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); + } else { + tcg_gen_xori_i32(ret, arg, -1); + } +} + +/* 64 bit ops */ + +void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2); +void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); +void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2); +void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, + unsigned int ofs, unsigned int len); +void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len); +void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, + unsigned int ofs); +void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); +void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); +void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, int64_t arg2); +void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, + TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2); +void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); + +#if TCG_TARGET_REG_BITS == 64 +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ + tcg_gen_op1_i64(INDEX_op_discard, arg); +} + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (ret != arg) { + tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); + } +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ + tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); +} +#else /* TCG_TARGET_REG_BITS == 32 */ +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + +void tcg_gen_discard_i64(TCGv_i64 arg); +void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg); +void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +#endif /* TCG_TARGET_REG_BITS */ + +static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_HAS_neg_i64) { + tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); + } else { + tcg_gen_subfi_i64(ret, 0, arg); + } +} + +/* Size changing operations. */ + +void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); +void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); +void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); +void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); + +static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) +{ + tcg_gen_deposit_i64(ret, lo, hi, 32, 32); +} + +/* QEMU specific operations. */ + +#ifndef TARGET_LONG_BITS +#error must include QEMU headers +#endif + +#if TARGET_INSN_START_WORDS == 1 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc) +{ + tcg_gen_op1(INDEX_op_insn_start, pc); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc) +{ + tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32)); +} +# endif +#elif TARGET_INSN_START_WORDS == 2 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1) +{ + tcg_gen_op2(INDEX_op_insn_start, pc, a1); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1) +{ + tcg_gen_op4(INDEX_op_insn_start, + (uint32_t)pc, (uint32_t)(pc >> 32), + (uint32_t)a1, (uint32_t)(a1 >> 32)); +} +# endif +#elif TARGET_INSN_START_WORDS == 3 +# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, + target_ulong a2) +{ + tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2); +} +# else +static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, + target_ulong a2) +{ + tcg_gen_op6(INDEX_op_insn_start, + (uint32_t)pc, (uint32_t)(pc >> 32), + (uint32_t)a1, (uint32_t)(a1 >> 32), + (uint32_t)a2, (uint32_t)(a2 >> 32)); +} +# endif +#else +# error "Unhandled number of operands to insn_start" +#endif + +/** + * tcg_gen_exit_tb() - output exit_tb TCG operation + * @tb: The TranslationBlock from which we are exiting + * @idx: Direct jump slot index, or exit request + * + * See tcg/README for more info about this TCG operation. + * See also tcg.h and the block comment above TB_EXIT_MASK. + * + * For a normal exit from the TB, back to the main loop, @tb should + * be NULL and @idx should be 0. Otherwise, @tb should be valid and + * @idx should be one of the TB_EXIT_ values. + */ +void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx); + +/** + * tcg_gen_goto_tb() - output goto_tb TCG operation + * @idx: Direct jump slot index (0 or 1) + * + * See tcg/README for more info about this TCG operation. + * + * NOTE: In softmmu emulation, direct jumps with goto_tb are only safe within + * the pages this TB resides in because we don't take care of direct jumps when + * address mapping changes, e.g. in tlb_flush(). In user mode, there's only a + * static address translation, so the destination address is always valid, TBs + * are always invalidated properly, and direct jumps are reset when mapping + * changes. + */ +void tcg_gen_goto_tb(unsigned idx); + +/** + * tcg_gen_lookup_and_goto_ptr() - look up the current TB, jump to it if valid + * @addr: Guest address of the target TB + * + * If the TB is not valid, jump to the epilogue. + * + * This operation is optional. If the TCG backend does not implement goto_ptr, + * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument. + */ +void tcg_gen_lookup_and_goto_ptr(void); + +static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, + unsigned wr) +{ + tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr); +} + +static inline void tcg_gen_plugin_cb_end(void) +{ + tcg_emit_op(INDEX_op_plugin_cb_end); +} + +#if TARGET_LONG_BITS == 32 +#define tcg_temp_new() tcg_temp_new_i32() +#define tcg_global_reg_new tcg_global_reg_new_i32 +#define tcg_global_mem_new tcg_global_mem_new_i32 +#define tcg_temp_local_new() tcg_temp_local_new_i32() +#define tcg_temp_free tcg_temp_free_i32 +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32 +#else +#define tcg_temp_new() tcg_temp_new_i64() +#define tcg_global_reg_new tcg_global_reg_new_i64 +#define tcg_global_mem_new tcg_global_mem_new_i64 +#define tcg_temp_local_new() tcg_temp_local_new_i64() +#define tcg_temp_free tcg_temp_free_i64 +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64 +#endif + +void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp); +void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp); +void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp); +void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp); + +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB); +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB); +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW); +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW); +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL); +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL); +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEQ); +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB); +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW); +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL); +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ); +} + +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32, + TCGArg, MemOp); +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, + TCGArg, MemOp); + +void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); + +void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); + +void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); +void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); + +void tcg_gen_mov_vec(TCGv_vec, TCGv_vec); +void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32); +void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64); +void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long); +void tcg_gen_dup8i_vec(TCGv_vec, uint32_t); +void tcg_gen_dup16i_vec(TCGv_vec, uint32_t); +void tcg_gen_dup32i_vec(TCGv_vec, uint32_t); +void tcg_gen_dup64i_vec(TCGv_vec, uint64_t); +void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t); +void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a); +void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a); +void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a); +void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); + +void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); + +void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); + +void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); + +void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, + TCGv_vec a, TCGv_vec b); + +void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_vec b, TCGv_vec c); +void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, + TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d); + +void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset); +void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset); +void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_movi_tl tcg_gen_movi_i64 +#define tcg_gen_mov_tl tcg_gen_mov_i64 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64 +#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64 +#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64 +#define tcg_gen_ld_tl tcg_gen_ld_i64 +#define tcg_gen_st8_tl tcg_gen_st8_i64 +#define tcg_gen_st16_tl tcg_gen_st16_i64 +#define tcg_gen_st32_tl tcg_gen_st32_i64 +#define tcg_gen_st_tl tcg_gen_st_i64 +#define tcg_gen_add_tl tcg_gen_add_i64 +#define tcg_gen_addi_tl tcg_gen_addi_i64 +#define tcg_gen_sub_tl tcg_gen_sub_i64 +#define tcg_gen_neg_tl tcg_gen_neg_i64 +#define tcg_gen_abs_tl tcg_gen_abs_i64 +#define tcg_gen_subfi_tl tcg_gen_subfi_i64 +#define tcg_gen_subi_tl tcg_gen_subi_i64 +#define tcg_gen_and_tl tcg_gen_and_i64 +#define tcg_gen_andi_tl tcg_gen_andi_i64 +#define tcg_gen_or_tl tcg_gen_or_i64 +#define tcg_gen_ori_tl tcg_gen_ori_i64 +#define tcg_gen_xor_tl tcg_gen_xor_i64 +#define tcg_gen_xori_tl tcg_gen_xori_i64 +#define tcg_gen_not_tl tcg_gen_not_i64 +#define tcg_gen_shl_tl tcg_gen_shl_i64 +#define tcg_gen_shli_tl tcg_gen_shli_i64 +#define tcg_gen_shr_tl tcg_gen_shr_i64 +#define tcg_gen_shri_tl tcg_gen_shri_i64 +#define tcg_gen_sar_tl tcg_gen_sar_i64 +#define tcg_gen_sari_tl tcg_gen_sari_i64 +#define tcg_gen_brcond_tl tcg_gen_brcond_i64 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 +#define tcg_gen_setcond_tl tcg_gen_setcond_i64 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64 +#define tcg_gen_mul_tl tcg_gen_mul_i64 +#define tcg_gen_muli_tl tcg_gen_muli_i64 +#define tcg_gen_div_tl tcg_gen_div_i64 +#define tcg_gen_rem_tl tcg_gen_rem_i64 +#define tcg_gen_divu_tl tcg_gen_divu_i64 +#define tcg_gen_remu_tl tcg_gen_remu_i64 +#define tcg_gen_discard_tl tcg_gen_discard_i64 +#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 +#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 +#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 +#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64 +#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64 +#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 +#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i64 +#define tcg_gen_eqv_tl tcg_gen_eqv_i64 +#define tcg_gen_nand_tl tcg_gen_nand_i64 +#define tcg_gen_nor_tl tcg_gen_nor_i64 +#define tcg_gen_orc_tl tcg_gen_orc_i64 +#define tcg_gen_clz_tl tcg_gen_clz_i64 +#define tcg_gen_ctz_tl tcg_gen_ctz_i64 +#define tcg_gen_clzi_tl tcg_gen_clzi_i64 +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i64 +#define tcg_gen_clrsb_tl tcg_gen_clrsb_i64 +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64 +#define tcg_gen_rotl_tl tcg_gen_rotl_i64 +#define tcg_gen_rotli_tl tcg_gen_rotli_i64 +#define tcg_gen_rotr_tl tcg_gen_rotr_i64 +#define tcg_gen_rotri_tl tcg_gen_rotri_i64 +#define tcg_gen_deposit_tl tcg_gen_deposit_i64 +#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64 +#define tcg_gen_extract_tl tcg_gen_extract_i64 +#define tcg_gen_sextract_tl tcg_gen_sextract_i64 +#define tcg_gen_extract2_tl tcg_gen_extract2_i64 +#define tcg_const_tl tcg_const_i64 +#define tcg_const_local_tl tcg_const_local_i64 +#define tcg_gen_movcond_tl tcg_gen_movcond_i64 +#define tcg_gen_add2_tl tcg_gen_add2_i64 +#define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 +#define tcg_gen_muls2_tl tcg_gen_muls2_i64 +#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64 +#define tcg_gen_smin_tl tcg_gen_smin_i64 +#define tcg_gen_umin_tl tcg_gen_umin_i64 +#define tcg_gen_smax_tl tcg_gen_smax_i64 +#define tcg_gen_umax_tl tcg_gen_umax_i64 +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64 +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64 +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64 +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64 +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64 +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64 +#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64 +#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64 +#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64 +#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64 +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64 +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64 +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64 +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64 +#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64 +#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64 +#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64 +#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64 +#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec +#else +#define tcg_gen_movi_tl tcg_gen_movi_i32 +#define tcg_gen_mov_tl tcg_gen_mov_i32 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32 +#define tcg_gen_ld32u_tl tcg_gen_ld_i32 +#define tcg_gen_ld32s_tl tcg_gen_ld_i32 +#define tcg_gen_ld_tl tcg_gen_ld_i32 +#define tcg_gen_st8_tl tcg_gen_st8_i32 +#define tcg_gen_st16_tl tcg_gen_st16_i32 +#define tcg_gen_st32_tl tcg_gen_st_i32 +#define tcg_gen_st_tl tcg_gen_st_i32 +#define tcg_gen_add_tl tcg_gen_add_i32 +#define tcg_gen_addi_tl tcg_gen_addi_i32 +#define tcg_gen_sub_tl tcg_gen_sub_i32 +#define tcg_gen_neg_tl tcg_gen_neg_i32 +#define tcg_gen_abs_tl tcg_gen_abs_i32 +#define tcg_gen_subfi_tl tcg_gen_subfi_i32 +#define tcg_gen_subi_tl tcg_gen_subi_i32 +#define tcg_gen_and_tl tcg_gen_and_i32 +#define tcg_gen_andi_tl tcg_gen_andi_i32 +#define tcg_gen_or_tl tcg_gen_or_i32 +#define tcg_gen_ori_tl tcg_gen_ori_i32 +#define tcg_gen_xor_tl tcg_gen_xor_i32 +#define tcg_gen_xori_tl tcg_gen_xori_i32 +#define tcg_gen_not_tl tcg_gen_not_i32 +#define tcg_gen_shl_tl tcg_gen_shl_i32 +#define tcg_gen_shli_tl tcg_gen_shli_i32 +#define tcg_gen_shr_tl tcg_gen_shr_i32 +#define tcg_gen_shri_tl tcg_gen_shri_i32 +#define tcg_gen_sar_tl tcg_gen_sar_i32 +#define tcg_gen_sari_tl tcg_gen_sari_i32 +#define tcg_gen_brcond_tl tcg_gen_brcond_i32 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 +#define tcg_gen_setcond_tl tcg_gen_setcond_i32 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32 +#define tcg_gen_mul_tl tcg_gen_mul_i32 +#define tcg_gen_muli_tl tcg_gen_muli_i32 +#define tcg_gen_div_tl tcg_gen_div_i32 +#define tcg_gen_rem_tl tcg_gen_rem_i32 +#define tcg_gen_divu_tl tcg_gen_divu_i32 +#define tcg_gen_remu_tl tcg_gen_remu_i32 +#define tcg_gen_discard_tl tcg_gen_discard_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32 +#define tcg_gen_extu_i32_tl tcg_gen_mov_i32 +#define tcg_gen_ext_i32_tl tcg_gen_mov_i32 +#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32 +#define tcg_gen_ext32u_tl tcg_gen_mov_i32 +#define tcg_gen_ext32s_tl tcg_gen_mov_i32 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 +#define tcg_gen_andc_tl tcg_gen_andc_i32 +#define tcg_gen_eqv_tl tcg_gen_eqv_i32 +#define tcg_gen_nand_tl tcg_gen_nand_i32 +#define tcg_gen_nor_tl tcg_gen_nor_i32 +#define tcg_gen_orc_tl tcg_gen_orc_i32 +#define tcg_gen_clz_tl tcg_gen_clz_i32 +#define tcg_gen_ctz_tl tcg_gen_ctz_i32 +#define tcg_gen_clzi_tl tcg_gen_clzi_i32 +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i32 +#define tcg_gen_clrsb_tl tcg_gen_clrsb_i32 +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32 +#define tcg_gen_rotl_tl tcg_gen_rotl_i32 +#define tcg_gen_rotli_tl tcg_gen_rotli_i32 +#define tcg_gen_rotr_tl tcg_gen_rotr_i32 +#define tcg_gen_rotri_tl tcg_gen_rotri_i32 +#define tcg_gen_deposit_tl tcg_gen_deposit_i32 +#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32 +#define tcg_gen_extract_tl tcg_gen_extract_i32 +#define tcg_gen_sextract_tl tcg_gen_sextract_i32 +#define tcg_gen_extract2_tl tcg_gen_extract2_i32 +#define tcg_const_tl tcg_const_i32 +#define tcg_const_local_tl tcg_const_local_i32 +#define tcg_gen_movcond_tl tcg_gen_movcond_i32 +#define tcg_gen_add2_tl tcg_gen_add2_i32 +#define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 +#define tcg_gen_muls2_tl tcg_gen_muls2_i32 +#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32 +#define tcg_gen_smin_tl tcg_gen_smin_i32 +#define tcg_gen_umin_tl tcg_gen_umin_i32 +#define tcg_gen_smax_tl tcg_gen_smax_i32 +#define tcg_gen_umax_tl tcg_gen_umax_i32 +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32 +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32 +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32 +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32 +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32 +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32 +#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32 +#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32 +#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32 +#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32 +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32 +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32 +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32 +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32 +#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32 +#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32 +#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32 +#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32 +#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec +#endif + +#if UINTPTR_MAX == UINT32_MAX +# define PTR i32 +# define NAT TCGv_i32 +#else +# define PTR i64 +# define NAT TCGv_i64 +#endif + +static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o) +{ + glue(tcg_gen_ld_,PTR)((NAT)r, a, o); +} + +static inline void tcg_gen_st_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o) +{ + glue(tcg_gen_st_, PTR)((NAT)r, a, o); +} + +static inline void tcg_gen_discard_ptr(TCGv_ptr a) +{ + glue(tcg_gen_discard_,PTR)((NAT)a); +} + +static inline void tcg_gen_add_ptr(TCGv_ptr r, TCGv_ptr a, TCGv_ptr b) +{ + glue(tcg_gen_add_,PTR)((NAT)r, (NAT)a, (NAT)b); +} + +static inline void tcg_gen_addi_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t b) +{ + glue(tcg_gen_addi_,PTR)((NAT)r, (NAT)a, b); +} + +static inline void tcg_gen_brcondi_ptr(TCGCond cond, TCGv_ptr a, + intptr_t b, TCGLabel *label) +{ + glue(tcg_gen_brcondi_,PTR)(cond, (NAT)a, b, label); +} + +static inline void tcg_gen_ext_i32_ptr(TCGv_ptr r, TCGv_i32 a) +{ +#if UINTPTR_MAX == UINT32_MAX + tcg_gen_mov_i32((NAT)r, a); +#else + tcg_gen_ext_i32_i64((NAT)r, a); +#endif +} + +static inline void tcg_gen_trunc_i64_ptr(TCGv_ptr r, TCGv_i64 a) +{ +#if UINTPTR_MAX == UINT32_MAX + tcg_gen_extrl_i64_i32((NAT)r, a); +#else + tcg_gen_mov_i64((NAT)r, a); +#endif +} + +static inline void tcg_gen_extu_ptr_i64(TCGv_i64 r, TCGv_ptr a) +{ +#if UINTPTR_MAX == UINT32_MAX + tcg_gen_extu_i32_i64(r, (NAT)a); +#else + tcg_gen_mov_i64(r, (NAT)a); +#endif +} + +static inline void tcg_gen_trunc_ptr_i32(TCGv_i32 r, TCGv_ptr a) +{ +#if UINTPTR_MAX == UINT32_MAX + tcg_gen_mov_i32(r, (NAT)a); +#else + tcg_gen_extrl_i64_i32(r, (NAT)a); +#endif +} + +#undef PTR +#undef NAT + +#endif /* TCG_TCG_OP_H */ diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h new file mode 100644 index 000000000..67092e82c --- /dev/null +++ b/include/tcg/tcg-opc.h @@ -0,0 +1,281 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * DEF(name, oargs, iargs, cargs, flags) + */ + +/* predefined ops */ +DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) +DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) + +/* variable number of parameters */ +DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) + +DEF(br, 0, 0, 1, TCG_OPF_BB_END) + +#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0) +#if TCG_TARGET_REG_BITS == 32 +# define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT +#else +# define IMPL64 TCG_OPF_64BIT +#endif + +DEF(mb, 0, 0, 1, 0) + +DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) +DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(setcond_i32, 1, 2, 1, 0) +DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) +/* load/store */ +DEF(ld8u_i32, 1, 1, 1, 0) +DEF(ld8s_i32, 1, 1, 1, 0) +DEF(ld16u_i32, 1, 1, 1, 0) +DEF(ld16s_i32, 1, 1, 1, 0) +DEF(ld_i32, 1, 1, 1, 0) +DEF(st8_i32, 0, 2, 1, 0) +DEF(st16_i32, 0, 2, 1, 0) +DEF(st_i32, 0, 2, 1, 0) +/* arith */ +DEF(add_i32, 1, 2, 0, 0) +DEF(sub_i32, 1, 2, 0, 0) +DEF(mul_i32, 1, 2, 0, 0) +DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) +DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) +DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) +DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) +DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) +DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) +DEF(and_i32, 1, 2, 0, 0) +DEF(or_i32, 1, 2, 0, 0) +DEF(xor_i32, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i32, 1, 2, 0, 0) +DEF(shr_i32, 1, 2, 0, 0) +DEF(sar_i32, 1, 2, 0, 0) +DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) +DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) +DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) +DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) +DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) +DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) + +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) + +DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) +DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) +DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) +DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) +DEF(brcond2_i32, 0, 4, 2, + TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) + +DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) +DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) +DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) +DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) +DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32)) +DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32)) +DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) +DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) +DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) +DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32)) +DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) +DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) +DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) +DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32)) +DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32)) +DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32)) + +DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(setcond_i64, 1, 2, 1, IMPL64) +DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) +/* load/store */ +DEF(ld8u_i64, 1, 1, 1, IMPL64) +DEF(ld8s_i64, 1, 1, 1, IMPL64) +DEF(ld16u_i64, 1, 1, 1, IMPL64) +DEF(ld16s_i64, 1, 1, 1, IMPL64) +DEF(ld32u_i64, 1, 1, 1, IMPL64) +DEF(ld32s_i64, 1, 1, 1, IMPL64) +DEF(ld_i64, 1, 1, 1, IMPL64) +DEF(st8_i64, 0, 2, 1, IMPL64) +DEF(st16_i64, 0, 2, 1, IMPL64) +DEF(st32_i64, 0, 2, 1, IMPL64) +DEF(st_i64, 0, 2, 1, IMPL64) +/* arith */ +DEF(add_i64, 1, 2, 0, IMPL64) +DEF(sub_i64, 1, 2, 0, IMPL64) +DEF(mul_i64, 1, 2, 0, IMPL64) +DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) +DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) +DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) +DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) +DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) +DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) +DEF(and_i64, 1, 2, 0, IMPL64) +DEF(or_i64, 1, 2, 0, IMPL64) +DEF(xor_i64, 1, 2, 0, IMPL64) +/* shifts/rotates */ +DEF(shl_i64, 1, 2, 0, IMPL64) +DEF(shr_i64, 1, 2, 0, IMPL64) +DEF(sar_i64, 1, 2, 0, IMPL64) +DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) +DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) +DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) +DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64)) +DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64)) +DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64)) + +/* size changing ops */ +DEF(ext_i32_i64, 1, 1, 0, IMPL64) +DEF(extu_i32_i64, 1, 1, 0, IMPL64) +DEF(extrl_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrl_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) +DEF(extrh_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrh_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) + +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64) +DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) +DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) +DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) +DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) +DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) +DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) +DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) +DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) +DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) +DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) +DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) +DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) +DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64)) +DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) +DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) +DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) +DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64)) +DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64)) +DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64)) + +DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) +DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) +DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) +DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) +DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64)) +DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64)) + +#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) + +/* QEMU specific */ +DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, + TCG_OPF_NOT_PRESENT) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_ptr, 0, 1, 0, + TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) + +DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT) +DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT) + +DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) + +/* Host vector support. */ + +#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) + +DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) +DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) + +DEF(dup_vec, 1, 1, 0, IMPLVEC) +DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32)) + +DEF(ld_vec, 1, 1, 1, IMPLVEC) +DEF(st_vec, 0, 2, 1, IMPLVEC) +DEF(dupm_vec, 1, 1, 1, IMPLVEC) + +DEF(add_vec, 1, 2, 0, IMPLVEC) +DEF(sub_vec, 1, 2, 0, IMPLVEC) +DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec)) +DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec)) +DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec)) +DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) +DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) +DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) +DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) +DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) +DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) +DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) +DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) + +DEF(and_vec, 1, 2, 0, IMPLVEC) +DEF(or_vec, 1, 2, 0, IMPLVEC) +DEF(xor_vec, 1, 2, 0, IMPLVEC) +DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec)) +DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec)) +DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec)) + +DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) +DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) +DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) +DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec)) + +DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) +DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) +DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) +DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec)) + +DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) +DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) + +DEF(cmp_vec, 1, 2, 1, IMPLVEC) + +DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec)) +DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec)) + +DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) + +#if TCG_TARGET_MAYBE_vec +#include "tcg-target.opc.h" +#endif + +#undef TLADDR_ARGS +#undef DATA64_ARGS +#undef IMPL +#undef IMPL64 +#undef IMPLVEC +#undef DEF diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h new file mode 100644 index 000000000..8ff9dad4e --- /dev/null +++ b/include/tcg/tcg.h @@ -0,0 +1,1445 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_H +#define TCG_H + +#include "cpu.h" +#include "exec/memop.h" +#include "exec/tb-context.h" +#include "qemu/bitops.h" +#include "qemu/plugin.h" +#include "qemu/queue.h" +#include "tcg/tcg-mo.h" +#include "tcg-target.h" +#include "qemu/int128.h" + +/* XXX: make safe guess about sizes */ +#define MAX_OP_PER_INSTR 266 + +#if HOST_LONG_BITS == 32 +#define MAX_OPC_PARAM_PER_ARG 2 +#else +#define MAX_OPC_PARAM_PER_ARG 1 +#endif +#define MAX_OPC_PARAM_IARGS 6 +#define MAX_OPC_PARAM_OARGS 1 +#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) + +/* A Call op needs up to 4 + 2N parameters on 32-bit archs, + * and up to 4 + N parameters on 64-bit archs + * (N = number of input arguments + output arguments). */ +#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) + +#define CPU_TEMP_BUF_NLONGS 128 + +/* Default target word size to pointer size. */ +#ifndef TCG_TARGET_REG_BITS +# if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +# elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +# else +# error Unknown pointer size for tcg target +# endif +#endif + +#if TCG_TARGET_REG_BITS == 32 +typedef int32_t tcg_target_long; +typedef uint32_t tcg_target_ulong; +#define TCG_PRIlx PRIx32 +#define TCG_PRIld PRId32 +#elif TCG_TARGET_REG_BITS == 64 +typedef int64_t tcg_target_long; +typedef uint64_t tcg_target_ulong; +#define TCG_PRIlx PRIx64 +#define TCG_PRIld PRId64 +#else +#error unsupported +#endif + +/* Oversized TCG guests make things like MTTCG hard + * as we can't use atomics for cputlb updates. + */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +#define TCG_OVERSIZED_GUEST 1 +#else +#define TCG_OVERSIZED_GUEST 0 +#endif + +#if TCG_TARGET_NB_REGS <= 32 +typedef uint32_t TCGRegSet; +#elif TCG_TARGET_NB_REGS <= 64 +typedef uint64_t TCGRegSet; +#else +#error unsupported +#endif + +#if TCG_TARGET_REG_BITS == 32 +/* Turn some undef macros into false macros. */ +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_div2_i64 0 +#define TCG_TARGET_HAS_rot_i64 0 +#define TCG_TARGET_HAS_ext8s_i64 0 +#define TCG_TARGET_HAS_ext16s_i64 0 +#define TCG_TARGET_HAS_ext32s_i64 0 +#define TCG_TARGET_HAS_ext8u_i64 0 +#define TCG_TARGET_HAS_ext16u_i64 0 +#define TCG_TARGET_HAS_ext32u_i64 0 +#define TCG_TARGET_HAS_bswap16_i64 0 +#define TCG_TARGET_HAS_bswap32_i64 0 +#define TCG_TARGET_HAS_bswap64_i64 0 +#define TCG_TARGET_HAS_neg_i64 0 +#define TCG_TARGET_HAS_not_i64 0 +#define TCG_TARGET_HAS_andc_i64 0 +#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_extract_i64 0 +#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 +/* Turn some undef macros into true macros. */ +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#endif + +#ifndef TCG_TARGET_deposit_i32_valid +#define TCG_TARGET_deposit_i32_valid(ofs, len) 1 +#endif +#ifndef TCG_TARGET_deposit_i64_valid +#define TCG_TARGET_deposit_i64_valid(ofs, len) 1 +#endif +#ifndef TCG_TARGET_extract_i32_valid +#define TCG_TARGET_extract_i32_valid(ofs, len) 1 +#endif +#ifndef TCG_TARGET_extract_i64_valid +#define TCG_TARGET_extract_i64_valid(ofs, len) 1 +#endif + +/* Only one of DIV or DIV2 should be defined. */ +#if defined(TCG_TARGET_HAS_div_i32) +#define TCG_TARGET_HAS_div2_i32 0 +#elif defined(TCG_TARGET_HAS_div2_i32) +#define TCG_TARGET_HAS_div_i32 0 +#define TCG_TARGET_HAS_rem_i32 0 +#endif +#if defined(TCG_TARGET_HAS_div_i64) +#define TCG_TARGET_HAS_div2_i64 0 +#elif defined(TCG_TARGET_HAS_div2_i64) +#define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 +#endif + +/* For 32-bit targets, some sort of unsigned widening multiply is required. */ +#if TCG_TARGET_REG_BITS == 32 \ + && !(defined(TCG_TARGET_HAS_mulu2_i32) \ + || defined(TCG_TARGET_HAS_muluh_i32)) +# error "Missing unsigned widening multiply" +#endif + +#if !defined(TCG_TARGET_HAS_v64) \ + && !defined(TCG_TARGET_HAS_v128) \ + && !defined(TCG_TARGET_HAS_v256) +#define TCG_TARGET_MAYBE_vec 0 +#define TCG_TARGET_HAS_abs_vec 0 +#define TCG_TARGET_HAS_neg_vec 0 +#define TCG_TARGET_HAS_not_vec 0 +#define TCG_TARGET_HAS_andc_vec 0 +#define TCG_TARGET_HAS_orc_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 +#define TCG_TARGET_HAS_shi_vec 0 +#define TCG_TARGET_HAS_shs_vec 0 +#define TCG_TARGET_HAS_shv_vec 0 +#define TCG_TARGET_HAS_mul_vec 0 +#define TCG_TARGET_HAS_sat_vec 0 +#define TCG_TARGET_HAS_minmax_vec 0 +#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 0 +#else +#define TCG_TARGET_MAYBE_vec 1 +#endif +#ifndef TCG_TARGET_HAS_v64 +#define TCG_TARGET_HAS_v64 0 +#endif +#ifndef TCG_TARGET_HAS_v128 +#define TCG_TARGET_HAS_v128 0 +#endif +#ifndef TCG_TARGET_HAS_v256 +#define TCG_TARGET_HAS_v256 0 +#endif + +#ifndef TARGET_INSN_START_EXTRA_WORDS +# define TARGET_INSN_START_WORDS 1 +#else +# define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS) +#endif + +typedef enum TCGOpcode { +#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, +#include "tcg/tcg-opc.h" +#undef DEF + NB_OPS, +} TCGOpcode; + +#define tcg_regset_set_reg(d, r) ((d) |= (TCGRegSet)1 << (r)) +#define tcg_regset_reset_reg(d, r) ((d) &= ~((TCGRegSet)1 << (r))) +#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1) + +#ifndef TCG_TARGET_INSN_UNIT_SIZE +# error "Missing TCG_TARGET_INSN_UNIT_SIZE" +#elif TCG_TARGET_INSN_UNIT_SIZE == 1 +typedef uint8_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 2 +typedef uint16_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 4 +typedef uint32_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 8 +typedef uint64_t tcg_insn_unit; +#else +/* The port better have done this. */ +#endif + + +#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS +# define tcg_debug_assert(X) do { assert(X); } while (0) +#else +# define tcg_debug_assert(X) \ + do { if (!(X)) { __builtin_unreachable(); } } while (0) +#endif + +typedef struct TCGRelocation TCGRelocation; +struct TCGRelocation { + QSIMPLEQ_ENTRY(TCGRelocation) next; + tcg_insn_unit *ptr; + intptr_t addend; + int type; +}; + +typedef struct TCGLabel TCGLabel; +struct TCGLabel { + unsigned present : 1; + unsigned has_value : 1; + unsigned id : 14; + unsigned refs : 16; + union { + uintptr_t value; + tcg_insn_unit *value_ptr; + } u; + QSIMPLEQ_HEAD(, TCGRelocation) relocs; + QSIMPLEQ_ENTRY(TCGLabel) next; +}; + +typedef struct TCGPool { + struct TCGPool *next; + int size; + uint8_t data[] __attribute__ ((aligned)); +} TCGPool; + +#define TCG_POOL_CHUNK_SIZE 32768 + +#define TCG_MAX_TEMPS 512 +#define TCG_MAX_INSNS 512 + +/* when the size of the arguments of a called function is smaller than + this value, they are statically allocated in the TB stack frame */ +#define TCG_STATIC_CALL_ARGS_SIZE 128 + +typedef enum TCGType { + TCG_TYPE_I32, + TCG_TYPE_I64, + + TCG_TYPE_V64, + TCG_TYPE_V128, + TCG_TYPE_V256, + + TCG_TYPE_COUNT, /* number of different types */ + + /* An alias for the size of the host register. */ +#if TCG_TARGET_REG_BITS == 32 + TCG_TYPE_REG = TCG_TYPE_I32, +#else + TCG_TYPE_REG = TCG_TYPE_I64, +#endif + + /* An alias for the size of the native pointer. */ +#if UINTPTR_MAX == UINT32_MAX + TCG_TYPE_PTR = TCG_TYPE_I32, +#else + TCG_TYPE_PTR = TCG_TYPE_I64, +#endif + + /* An alias for the size of the target "long", aka register. */ +#if TARGET_LONG_BITS == 64 + TCG_TYPE_TL = TCG_TYPE_I64, +#else + TCG_TYPE_TL = TCG_TYPE_I32, +#endif +} TCGType; + +/** + * get_alignment_bits + * @memop: MemOp value + * + * Extract the alignment size from the memop. + */ +static inline unsigned get_alignment_bits(MemOp memop) +{ + unsigned a = memop & MO_AMASK; + + if (a == MO_UNALN) { + /* No alignment required. */ + a = 0; + } else if (a == MO_ALIGN) { + /* A natural alignment requirement. */ + a = memop & MO_SIZE; + } else { + /* A specific alignment requirement. */ + a = a >> MO_ASHIFT; + } +#if defined(CONFIG_SOFTMMU) + /* The requested alignment cannot overlap the TLB flags. */ + tcg_debug_assert((TLB_FLAGS_MASK & ((1 << a) - 1)) == 0); +#endif + return a; +} + +typedef tcg_target_ulong TCGArg; + +/* Define type and accessor macros for TCG variables. + + TCG variables are the inputs and outputs of TCG ops, as described + in tcg/README. Target CPU front-end code uses these types to deal + with TCG variables as it emits TCG code via the tcg_gen_* functions. + They come in several flavours: + * TCGv_i32 : 32 bit integer type + * TCGv_i64 : 64 bit integer type + * TCGv_ptr : a host pointer type + * TCGv_vec : a host vector type; the exact size is not exposed + to the CPU front-end code. + * TCGv : an integer type the same size as target_ulong + (an alias for either TCGv_i32 or TCGv_i64) + The compiler's type checking will complain if you mix them + up and pass the wrong sized TCGv to a function. + + Users of tcg_gen_* don't need to know about any of the internal + details of these, and should treat them as opaque types. + You won't be able to look inside them in a debugger either. + + Internal implementation details follow: + + Note that there is no definition of the structs TCGv_i32_d etc anywhere. + This is deliberate, because the values we store in variables of type + TCGv_i32 are not really pointers-to-structures. They're just small + integers, but keeping them in pointer types like this means that the + compiler will complain if you accidentally pass a TCGv_i32 to a + function which takes a TCGv_i64, and so on. Only the internals of + TCG need to care about the actual contents of the types. */ + +typedef struct TCGv_i32_d *TCGv_i32; +typedef struct TCGv_i64_d *TCGv_i64; +typedef struct TCGv_ptr_d *TCGv_ptr; +typedef struct TCGv_vec_d *TCGv_vec; +typedef TCGv_ptr TCGv_env; +#if TARGET_LONG_BITS == 32 +#define TCGv TCGv_i32 +#elif TARGET_LONG_BITS == 64 +#define TCGv TCGv_i64 +#else +#error Unhandled TARGET_LONG_BITS value +#endif + +/* call flags */ +/* Helper does not read globals (either directly or through an exception). It + implies TCG_CALL_NO_WRITE_GLOBALS. */ +#define TCG_CALL_NO_READ_GLOBALS 0x0001 +/* Helper does not write globals */ +#define TCG_CALL_NO_WRITE_GLOBALS 0x0002 +/* Helper can be safely suppressed if the return value is not used. */ +#define TCG_CALL_NO_SIDE_EFFECTS 0x0004 +/* Helper is QEMU_NORETURN. */ +#define TCG_CALL_NO_RETURN 0x0008 + +/* convenience version of most used call flags */ +#define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS +#define TCG_CALL_NO_WG TCG_CALL_NO_WRITE_GLOBALS +#define TCG_CALL_NO_SE TCG_CALL_NO_SIDE_EFFECTS +#define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) +#define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE) + +/* Used to align parameters. See the comment before tcgv_i32_temp. */ +#define TCG_CALL_DUMMY_ARG ((TCGArg)0) + +/* Conditions. Note that these are laid out for easy manipulation by + the functions below: + bit 0 is used for inverting; + bit 1 is signed, + bit 2 is unsigned, + bit 3 is used with bit 0 for swapping signed/unsigned. */ +typedef enum { + /* non-signed */ + TCG_COND_NEVER = 0 | 0 | 0 | 0, + TCG_COND_ALWAYS = 0 | 0 | 0 | 1, + TCG_COND_EQ = 8 | 0 | 0 | 0, + TCG_COND_NE = 8 | 0 | 0 | 1, + /* signed */ + TCG_COND_LT = 0 | 0 | 2 | 0, + TCG_COND_GE = 0 | 0 | 2 | 1, + TCG_COND_LE = 8 | 0 | 2 | 0, + TCG_COND_GT = 8 | 0 | 2 | 1, + /* unsigned */ + TCG_COND_LTU = 0 | 4 | 0 | 0, + TCG_COND_GEU = 0 | 4 | 0 | 1, + TCG_COND_LEU = 8 | 4 | 0 | 0, + TCG_COND_GTU = 8 | 4 | 0 | 1, +} TCGCond; + +/* Invert the sense of the comparison. */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ + return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison. */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ + return c & 6 ? (TCGCond)(c ^ 9) : c; +} + +/* Create an "unsigned" version of a "signed" comparison. */ +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ + return c & 2 ? (TCGCond)(c ^ 6) : c; +} + +/* Create a "signed" version of an "unsigned" comparison. */ +static inline TCGCond tcg_signed_cond(TCGCond c) +{ + return c & 4 ? (TCGCond)(c ^ 6) : c; +} + +/* Must a comparison be considered unsigned? */ +static inline bool is_unsigned_cond(TCGCond c) +{ + return (c & 4) != 0; +} + +/* Create a "high" version of a double-word comparison. + This removes equality from a LTE or GTE comparison. */ +static inline TCGCond tcg_high_cond(TCGCond c) +{ + switch (c) { + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GEU: + case TCG_COND_LEU: + return (TCGCond)(c ^ 8); + default: + return c; + } +} + +typedef enum TCGTempVal { + TEMP_VAL_DEAD, + TEMP_VAL_REG, + TEMP_VAL_MEM, + TEMP_VAL_CONST, +} TCGTempVal; + +typedef struct TCGTemp { + TCGReg reg:8; + TCGTempVal val_type:8; + TCGType base_type:8; + TCGType type:8; + unsigned int fixed_reg:1; + unsigned int indirect_reg:1; + unsigned int indirect_base:1; + unsigned int mem_coherent:1; + unsigned int mem_allocated:1; + /* If true, the temp is saved across both basic blocks and + translation blocks. */ + unsigned int temp_global:1; + /* If true, the temp is saved across basic blocks but dead + at the end of translation blocks. If false, the temp is + dead at the end of basic blocks. */ + unsigned int temp_local:1; + unsigned int temp_allocated:1; + + tcg_target_long val; + struct TCGTemp *mem_base; + intptr_t mem_offset; + const char *name; + + /* Pass-specific information that can be stored for a temporary. + One word worth of integer data, and one pointer to data + allocated separately. */ + uintptr_t state; + void *state_ptr; +} TCGTemp; + +typedef struct TCGContext TCGContext; + +typedef struct TCGTempSet { + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; +} TCGTempSet; + +/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, + this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. + There are never more than 2 outputs, which means that we can store all + dead + sync data within 16 bits. */ +#define DEAD_ARG 4 +#define SYNC_ARG 1 +typedef uint16_t TCGLifeData; + +/* The layout here is designed to avoid a bitfield crossing of + a 32-bit boundary, which would cause GCC to add extra padding. */ +typedef struct TCGOp { + TCGOpcode opc : 8; /* 8 */ + + /* Parameters for this opcode. See below. */ + unsigned param1 : 4; /* 12 */ + unsigned param2 : 4; /* 16 */ + + /* Lifetime data of the operands. */ + unsigned life : 16; /* 32 */ + + /* Next and previous opcodes. */ + QTAILQ_ENTRY(TCGOp) link; +#ifdef CONFIG_PLUGIN + QSIMPLEQ_ENTRY(TCGOp) plugin_link; +#endif + + /* Arguments for the opcode. */ + TCGArg args[MAX_OPC_PARAM]; + + /* Register preferences for the output(s). */ + TCGRegSet output_pref[2]; +} TCGOp; + +#define TCGOP_CALLI(X) (X)->param1 +#define TCGOP_CALLO(X) (X)->param2 + +#define TCGOP_VECL(X) (X)->param1 +#define TCGOP_VECE(X) (X)->param2 + +/* Make sure operands fit in the bitfields above. */ +QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); + +typedef struct TCGProfile { + int64_t cpu_exec_time; + int64_t tb_count1; + int64_t tb_count; + int64_t op_count; /* total insn count */ + int op_count_max; /* max insn per TB */ + int temp_count_max; + int64_t temp_count; + int64_t del_op_count; + int64_t code_in_len; + int64_t code_out_len; + int64_t search_out_len; + int64_t interm_time; + int64_t code_time; + int64_t la_time; + int64_t opt_time; + int64_t restore_count; + int64_t restore_time; + int64_t table_op_count[NB_OPS]; +} TCGProfile; + +struct TCGContext { + uint8_t *pool_cur, *pool_end; + TCGPool *pool_first, *pool_current, *pool_first_large; + int nb_labels; + int nb_globals; + int nb_temps; + int nb_indirects; + int nb_ops; + + /* goto_tb support */ + tcg_insn_unit *code_buf; + uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */ + uintptr_t *tb_jmp_insn_offset; /* tb->jmp_target_arg if direct_jump */ + uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_arg if !direct_jump */ + + TCGRegSet reserved_regs; + uint32_t tb_cflags; /* cflags of the current TB */ + intptr_t current_frame_offset; + intptr_t frame_start; + intptr_t frame_end; + TCGTemp *frame_temp; + + tcg_insn_unit *code_ptr; + +#ifdef CONFIG_PROFILER + TCGProfile prof; +#endif + +#ifdef CONFIG_DEBUG_TCG + int temps_in_use; + int goto_tb_issue_mask; + const TCGOpcode *vecop_list; +#endif + + /* Code generation. Note that we specifically do not use tcg_insn_unit + here, because there's too much arithmetic throughout that relies + on addition and subtraction working on bytes. Rely on the GCC + extension that allows arithmetic on void*. */ + void *code_gen_prologue; + void *code_gen_epilogue; + void *code_gen_buffer; + size_t code_gen_buffer_size; + void *code_gen_ptr; + void *data_gen_ptr; + + /* Threshold to flush the translated code buffer. */ + void *code_gen_highwater; + + size_t tb_phys_invalidate_count; + + /* Track which vCPU triggers events */ + CPUState *cpu; /* *_trans */ + + /* These structures are private to tcg-target.c.inc. */ +#ifdef TCG_TARGET_NEED_LDST_LABELS + QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels; +#endif +#ifdef TCG_TARGET_NEED_POOL_LABELS + struct TCGLabelPoolData *pool_labels; +#endif + + TCGLabel *exitreq_label; + +#ifdef CONFIG_PLUGIN + /* + * We keep one plugin_tb struct per TCGContext. Note that on every TB + * translation we clear but do not free its contents; this way we + * avoid a lot of malloc/free churn, since after a few TB's it's + * unlikely that we'll need to allocate either more instructions or more + * space for instructions (for variable-instruction-length ISAs). + */ + struct qemu_plugin_tb *plugin_tb; + + /* descriptor of the instruction being translated */ + struct qemu_plugin_insn *plugin_insn; + + /* list to quickly access the injected ops */ + QSIMPLEQ_HEAD(, TCGOp) plugin_ops; +#endif + + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; + TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ + + QTAILQ_HEAD(, TCGOp) ops, free_ops; + QSIMPLEQ_HEAD(, TCGLabel) labels; + + /* Tells which temporary holds a given register. + It does not take into account fixed registers */ + TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS]; + + uint16_t gen_insn_end_off[TCG_MAX_INSNS]; + target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; +}; + +extern TCGContext tcg_init_ctx; +extern __thread TCGContext *tcg_ctx; +extern TCGv_env cpu_env; + +static inline size_t temp_idx(TCGTemp *ts) +{ + ptrdiff_t n = ts - tcg_ctx->temps; + tcg_debug_assert(n >= 0 && n < tcg_ctx->nb_temps); + return n; +} + +static inline TCGArg temp_arg(TCGTemp *ts) +{ + return (uintptr_t)ts; +} + +static inline TCGTemp *arg_temp(TCGArg a) +{ + return (TCGTemp *)(uintptr_t)a; +} + +/* Using the offset of a temporary, relative to TCGContext, rather than + its index means that we don't use 0. That leaves offset 0 free for + a NULL representation without having to leave index 0 unused. */ +static inline TCGTemp *tcgv_i32_temp(TCGv_i32 v) +{ + uintptr_t o = (uintptr_t)v; + TCGTemp *t = (void *)tcg_ctx + o; + tcg_debug_assert(offsetof(TCGContext, temps[temp_idx(t)]) == o); + return t; +} + +static inline TCGTemp *tcgv_i64_temp(TCGv_i64 v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + +static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + +static inline TCGTemp *tcgv_vec_temp(TCGv_vec v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + +static inline TCGArg tcgv_i32_arg(TCGv_i32 v) +{ + return temp_arg(tcgv_i32_temp(v)); +} + +static inline TCGArg tcgv_i64_arg(TCGv_i64 v) +{ + return temp_arg(tcgv_i64_temp(v)); +} + +static inline TCGArg tcgv_ptr_arg(TCGv_ptr v) +{ + return temp_arg(tcgv_ptr_temp(v)); +} + +static inline TCGArg tcgv_vec_arg(TCGv_vec v) +{ + return temp_arg(tcgv_vec_temp(v)); +} + +static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t) +{ + (void)temp_idx(t); /* trigger embedded assert */ + return (TCGv_i32)((void *)t - (void *)tcg_ctx); +} + +static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t) +{ + return (TCGv_i64)temp_tcgv_i32(t); +} + +static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t) +{ + return (TCGv_ptr)temp_tcgv_i32(t); +} + +static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) +{ + return (TCGv_vec)temp_tcgv_i32(t); +} + +#if TCG_TARGET_REG_BITS == 32 +static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) +{ + return temp_tcgv_i32(tcgv_i64_temp(t)); +} + +static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) +{ + return temp_tcgv_i32(tcgv_i64_temp(t) + 1); +} +#endif + +static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg) +{ + return op->args[arg]; +} + +static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v) +{ + op->args[arg] = v; +} + +static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg) +{ +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + return tcg_get_insn_param(op, arg); +#else + return tcg_get_insn_param(op, arg * 2) | + ((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32); +#endif +} + +static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v) +{ +#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + tcg_set_insn_param(op, arg, v); +#else + tcg_set_insn_param(op, arg * 2, v); + tcg_set_insn_param(op, arg * 2 + 1, v >> 32); +#endif +} + +/* The last op that was emitted. */ +static inline TCGOp *tcg_last_op(void) +{ + return QTAILQ_LAST(&tcg_ctx->ops); +} + +/* Test for whether to terminate the TB for using too many opcodes. */ +static inline bool tcg_op_buf_full(void) +{ + /* This is not a hard limit, it merely stops translation when + * we have produced "enough" opcodes. We want to limit TB size + * such that a RISC host can reasonably use a 16-bit signed + * branch within the TB. We also need to be mindful of the + * 16-bit unsigned offsets, TranslationBlock.jmp_reset_offset[] + * and TCGContext.gen_insn_end_off[]. + */ + return tcg_ctx->nb_ops >= 4000; +} + +/* pool based memory allocation */ + +/* user-mode: mmap_lock must be held for tcg_malloc_internal. */ +void *tcg_malloc_internal(TCGContext *s, int size); +void tcg_pool_reset(TCGContext *s); +TranslationBlock *tcg_tb_alloc(TCGContext *s); + +void tcg_region_init(void); +void tb_destroy(TranslationBlock *tb); +void tcg_region_reset_all(void); + +size_t tcg_code_size(void); +size_t tcg_code_capacity(void); + +void tcg_tb_insert(TranslationBlock *tb); +void tcg_tb_remove(TranslationBlock *tb); +size_t tcg_tb_phys_invalidate_count(void); +TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); +void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); +size_t tcg_nb_tbs(void); + +/* user-mode: Called with mmap_lock held. */ +static inline void *tcg_malloc(int size) +{ + TCGContext *s = tcg_ctx; + uint8_t *ptr, *ptr_end; + + /* ??? This is a weak placeholder for minimum malloc alignment. */ + size = QEMU_ALIGN_UP(size, 8); + + ptr = s->pool_cur; + ptr_end = ptr + size; + if (unlikely(ptr_end > s->pool_end)) { + return tcg_malloc_internal(tcg_ctx, size); + } else { + s->pool_cur = ptr_end; + return ptr; + } +} + +void tcg_context_init(TCGContext *s); +void tcg_register_thread(void); +void tcg_prologue_init(TCGContext *s); +void tcg_func_start(TCGContext *s); + +int tcg_gen_code(TCGContext *s, TranslationBlock *tb); + +void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); + +TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, + intptr_t, const char *); +TCGTemp *tcg_temp_new_internal(TCGType, bool); +void tcg_temp_free_internal(TCGTemp *); +TCGv_vec tcg_temp_new_vec(TCGType type); +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match); + +static inline void tcg_temp_free_i32(TCGv_i32 arg) +{ + tcg_temp_free_internal(tcgv_i32_temp(arg)); +} + +static inline void tcg_temp_free_i64(TCGv_i64 arg) +{ + tcg_temp_free_internal(tcgv_i64_temp(arg)); +} + +static inline void tcg_temp_free_ptr(TCGv_ptr arg) +{ + tcg_temp_free_internal(tcgv_ptr_temp(arg)); +} + +static inline void tcg_temp_free_vec(TCGv_vec arg) +{ + tcg_temp_free_internal(tcgv_vec_temp(arg)); +} + +static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); + return temp_tcgv_i32(t); +} + +static inline TCGv_i32 tcg_temp_new_i32(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, false); + return temp_tcgv_i32(t); +} + +static inline TCGv_i32 tcg_temp_local_new_i32(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, true); + return temp_tcgv_i32(t); +} + +static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); + return temp_tcgv_i64(t); +} + +static inline TCGv_i64 tcg_temp_new_i64(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, false); + return temp_tcgv_i64(t); +} + +static inline TCGv_i64 tcg_temp_local_new_i64(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, true); + return temp_tcgv_i64(t); +} + +static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_PTR, reg, offset, name); + return temp_tcgv_ptr(t); +} + +static inline TCGv_ptr tcg_temp_new_ptr(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, false); + return temp_tcgv_ptr(t); +} + +static inline TCGv_ptr tcg_temp_local_new_ptr(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, true); + return temp_tcgv_ptr(t); +} + +#if defined(CONFIG_DEBUG_TCG) +/* If you call tcg_clear_temp_count() at the start of a section of + * code which is not supposed to leak any TCG temporaries, then + * calling tcg_check_temp_count() at the end of the section will + * return 1 if the section did in fact leak a temporary. + */ +void tcg_clear_temp_count(void); +int tcg_check_temp_count(void); +#else +#define tcg_clear_temp_count() do { } while (0) +#define tcg_check_temp_count() 0 +#endif + +int64_t tcg_cpu_exec_time(void); +void tcg_dump_info(void); +void tcg_dump_op_count(void); + +#define TCG_CT_CONST 1 /* any constant of register size */ + +typedef struct TCGArgConstraint { + unsigned ct : 16; + unsigned alias_index : 4; + unsigned sort_index : 4; + bool oalias : 1; + bool ialias : 1; + bool newreg : 1; + TCGRegSet regs; +} TCGArgConstraint; + +#define TCG_MAX_OP_ARGS 16 + +/* Bits for TCGOpDef->flags, 8 bits available, all used. */ +enum { + /* Instruction exits the translation block. */ + TCG_OPF_BB_EXIT = 0x01, + /* Instruction defines the end of a basic block. */ + TCG_OPF_BB_END = 0x02, + /* Instruction clobbers call registers and potentially update globals. */ + TCG_OPF_CALL_CLOBBER = 0x04, + /* Instruction has side effects: it cannot be removed if its outputs + are not used, and might trigger exceptions. */ + TCG_OPF_SIDE_EFFECTS = 0x08, + /* Instruction operands are 64-bits (otherwise 32-bits). */ + TCG_OPF_64BIT = 0x10, + /* Instruction is optional and not implemented by the host, or insn + is generic and should not be implemened by the host. */ + TCG_OPF_NOT_PRESENT = 0x20, + /* Instruction operands are vectors. */ + TCG_OPF_VECTOR = 0x40, + /* Instruction is a conditional branch. */ + TCG_OPF_COND_BRANCH = 0x80 +}; + +typedef struct TCGOpDef { + const char *name; + uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; + uint8_t flags; + TCGArgConstraint *args_ct; +} TCGOpDef; + +extern TCGOpDef tcg_op_defs[]; +extern const size_t tcg_op_defs_max; + +typedef struct TCGTargetOpDef { + TCGOpcode op; + const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGTargetOpDef; + +#define tcg_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\ + abort();\ +} while (0) + +bool tcg_op_supported(TCGOpcode op); + +void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); + +TCGOp *tcg_emit_op(TCGOpcode opc); +void tcg_op_remove(TCGContext *s, TCGOp *op); +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); + +void tcg_optimize(TCGContext *s); + +TCGv_i32 tcg_const_i32(int32_t val); +TCGv_i64 tcg_const_i64(int64_t val); +TCGv_i32 tcg_const_local_i32(int32_t val); +TCGv_i64 tcg_const_local_i64(int64_t val); +TCGv_vec tcg_const_zeros_vec(TCGType); +TCGv_vec tcg_const_ones_vec(TCGType); +TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec); +TCGv_vec tcg_const_ones_vec_matching(TCGv_vec); + +#if UINTPTR_MAX == UINT32_MAX +# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x))) +# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i32((intptr_t)(x))) +#else +# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i64((intptr_t)(x))) +# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i64((intptr_t)(x))) +#endif + +TCGLabel *gen_new_label(void); + +/** + * label_arg + * @l: label + * + * Encode a label for storage in the TCG opcode stream. + */ + +static inline TCGArg label_arg(TCGLabel *l) +{ + return (uintptr_t)l; +} + +/** + * arg_label + * @i: value + * + * The opposite of label_arg. Retrieve a label from the + * encoding of the TCG opcode stream. + */ + +static inline TCGLabel *arg_label(TCGArg i) +{ + return (TCGLabel *)(uintptr_t)i; +} + +/** + * tcg_ptr_byte_diff + * @a, @b: addresses to be differenced + * + * There are many places within the TCG backends where we need a byte + * difference between two pointers. While this can be accomplished + * with local casting, it's easy to get wrong -- especially if one is + * concerned with the signedness of the result. + * + * This version relies on GCC's void pointer arithmetic to get the + * correct result. + */ + +static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) +{ + return a - b; +} + +/** + * tcg_pcrel_diff + * @s: the tcg context + * @target: address of the target + * + * Produce a pc-relative difference, from the current code_ptr + * to the destination address. + */ + +static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target) +{ + return tcg_ptr_byte_diff(target, s->code_ptr); +} + +/** + * tcg_current_code_size + * @s: the tcg context + * + * Compute the current code size within the translation block. + * This is used to fill in qemu's data structures for goto_tb. + */ + +static inline size_t tcg_current_code_size(TCGContext *s) +{ + return tcg_ptr_byte_diff(s->code_ptr, s->code_buf); +} + +/* Combine the MemOp and mmu_idx parameters into a single value. */ +typedef uint32_t TCGMemOpIdx; + +/** + * make_memop_idx + * @op: memory operation + * @idx: mmu index + * + * Encode these values into a single parameter. + */ +static inline TCGMemOpIdx make_memop_idx(MemOp op, unsigned idx) +{ + tcg_debug_assert(idx <= 15); + return (op << 4) | idx; +} + +/** + * get_memop + * @oi: combined op/idx parameter + * + * Extract the memory operation from the combined value. + */ +static inline MemOp get_memop(TCGMemOpIdx oi) +{ + return oi >> 4; +} + +/** + * get_mmuidx + * @oi: combined op/idx parameter + * + * Extract the mmu index from the combined value. + */ +static inline unsigned get_mmuidx(TCGMemOpIdx oi) +{ + return oi & 15; +} + +/** + * tcg_qemu_tb_exec: + * @env: pointer to CPUArchState for the CPU + * @tb_ptr: address of generated code for the TB to execute + * + * Start executing code from a given translation block. + * Where translation blocks have been linked, execution + * may proceed from the given TB into successive ones. + * Control eventually returns only when some action is needed + * from the top-level loop: either control must pass to a TB + * which has not yet been directly linked, or an asynchronous + * event such as an interrupt needs handling. + * + * Return: The return value is the value passed to the corresponding + * tcg_gen_exit_tb() at translation time of the last TB attempted to execute. + * The value is either zero or a 4-byte aligned pointer to that TB combined + * with additional information in its two least significant bits. The + * additional information is encoded as follows: + * 0, 1: the link between this TB and the next is via the specified + * TB index (0 or 1). That is, we left the TB via (the equivalent + * of) "goto_tb ". The main loop uses this to determine + * how to link the TB just executed to the next. + * 2: we are using instruction counting code generation, and we + * did not start executing this TB because the instruction counter + * would hit zero midway through it. In this case the pointer + * returned is the TB we were about to execute, and the caller must + * arrange to execute the remaining count of instructions. + * 3: we stopped because the CPU's exit_request flag was set + * (usually meaning that there is an interrupt that needs to be + * handled). The pointer returned is the TB we were about to execute + * when we noticed the pending exit request. + * + * If the bottom two bits indicate an exit-via-index then the CPU + * state is correctly synchronised and ready for execution of the next + * TB (and in particular the guest PC is the address to execute next). + * Otherwise, we gave up on execution of this TB before it started, and + * the caller must fix up the CPU state by calling the CPU's + * synchronize_from_tb() method with the TB pointer we return (falling + * back to calling the CPU's set_pc method with tb->pb if no + * synchronize_from_tb() method exists). + * + * Note that TCG targets may use a different definition of tcg_qemu_tb_exec + * to this default (which just calls the prologue.code emitted by + * tcg_target_qemu_prologue()). + */ +#define TB_EXIT_MASK 3 +#define TB_EXIT_IDX0 0 +#define TB_EXIT_IDX1 1 +#define TB_EXIT_IDXMAX 1 +#define TB_EXIT_REQUESTED 3 + +#ifdef HAVE_TCG_QEMU_TB_EXEC +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); +#else +# define tcg_qemu_tb_exec(env, tb_ptr) \ + ((uintptr_t (*)(void *, void *))tcg_ctx->code_gen_prologue)(env, tb_ptr) +#endif + +void tcg_register_jit(void *buf, size_t buf_size); + +#if TCG_TARGET_MAYBE_vec +/* Return zero if the tuple (opc, type, vece) is unsupportable; + return > 0 if it is directly supportable; + return < 0 if we must call tcg_expand_vec_op. */ +int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned); +#else +static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) +{ + return 0; +} +#endif + +/* Expand the tuple (opc, type, vece) on the given arguments. */ +void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...); + +/* Replicate a constant C accoring to the log2 of the element size. */ +uint64_t dup_const(unsigned vece, uint64_t c); + +#define dup_const(VECE, C) \ + (__builtin_constant_p(VECE) \ + ? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \ + : (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \ + : (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \ + : dup_const(VECE, C)) \ + : dup_const(VECE, C)) + + +/* + * Memory helpers that will be used by TCG generated code. + */ +#ifdef CONFIG_SOFTMMU +/* Value zero-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr); + +/* Temporary aliases until backends are converted. */ +#ifdef TARGET_WORDS_BIGENDIAN +# define helper_ret_ldsw_mmu helper_be_ldsw_mmu +# define helper_ret_lduw_mmu helper_be_lduw_mmu +# define helper_ret_ldsl_mmu helper_be_ldsl_mmu +# define helper_ret_ldul_mmu helper_be_ldul_mmu +# define helper_ret_ldl_mmu helper_be_ldul_mmu +# define helper_ret_ldq_mmu helper_be_ldq_mmu +# define helper_ret_stw_mmu helper_be_stw_mmu +# define helper_ret_stl_mmu helper_be_stl_mmu +# define helper_ret_stq_mmu helper_be_stq_mmu +#else +# define helper_ret_ldsw_mmu helper_le_ldsw_mmu +# define helper_ret_lduw_mmu helper_le_lduw_mmu +# define helper_ret_ldsl_mmu helper_le_ldsl_mmu +# define helper_ret_ldul_mmu helper_le_ldul_mmu +# define helper_ret_ldl_mmu helper_le_ldul_mmu +# define helper_ret_ldq_mmu helper_le_ldq_mmu +# define helper_ret_stw_mmu helper_le_stw_mmu +# define helper_ret_stl_mmu helper_le_stl_mmu +# define helper_ret_stq_mmu helper_le_stq_mmu +#endif + +uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \ +TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu \ + (CPUArchState *env, target_ulong addr, TYPE val, \ + TCGMemOpIdx oi, uintptr_t retaddr); + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_be) +#else +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) +#endif + +GEN_ATOMIC_HELPER_ALL(fetch_add) +GEN_ATOMIC_HELPER_ALL(fetch_sub) +GEN_ATOMIC_HELPER_ALL(fetch_and) +GEN_ATOMIC_HELPER_ALL(fetch_or) +GEN_ATOMIC_HELPER_ALL(fetch_xor) +GEN_ATOMIC_HELPER_ALL(fetch_smin) +GEN_ATOMIC_HELPER_ALL(fetch_umin) +GEN_ATOMIC_HELPER_ALL(fetch_smax) +GEN_ATOMIC_HELPER_ALL(fetch_umax) + +GEN_ATOMIC_HELPER_ALL(add_fetch) +GEN_ATOMIC_HELPER_ALL(sub_fetch) +GEN_ATOMIC_HELPER_ALL(and_fetch) +GEN_ATOMIC_HELPER_ALL(or_fetch) +GEN_ATOMIC_HELPER_ALL(xor_fetch) +GEN_ATOMIC_HELPER_ALL(smin_fetch) +GEN_ATOMIC_HELPER_ALL(umin_fetch) +GEN_ATOMIC_HELPER_ALL(smax_fetch) +GEN_ATOMIC_HELPER_ALL(umax_fetch) + +GEN_ATOMIC_HELPER_ALL(xchg) + +#undef GEN_ATOMIC_HELPER_ALL +#undef GEN_ATOMIC_HELPER +#endif /* CONFIG_SOFTMMU */ + +/* + * These aren't really a "proper" helpers because TCG cannot manage Int128. + * However, use the same format as the others, for use by the backends. + * + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; + * the ld/st functions are only defined if HAVE_ATOMIC128, + * as defined by . + */ +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); + +#ifdef CONFIG_DEBUG_TCG +void tcg_assert_listed_vecop(TCGOpcode); +#else +static inline void tcg_assert_listed_vecop(TCGOpcode op) { } +#endif + +static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n) +{ +#ifdef CONFIG_DEBUG_TCG + const TCGOpcode *o = tcg_ctx->vecop_list; + tcg_ctx->vecop_list = n; + return o; +#else + return NULL; +#endif +} + +bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned); + +#endif /* TCG_H */ diff --git a/include/trace-tcg.h b/include/trace-tcg.h new file mode 100644 index 000000000..da68608c8 --- /dev/null +++ b/include/trace-tcg.h @@ -0,0 +1,6 @@ +#ifndef TRACE_TCG_H +#define TRACE_TCG_H + +#include "trace/generated-tcg-tracers.h" + +#endif /* TRACE_TCG_H */ diff --git a/include/ui/console.h b/include/ui/console.h new file mode 100644 index 000000000..e7303d8b9 --- /dev/null +++ b/include/ui/console.h @@ -0,0 +1,449 @@ +#ifndef CONSOLE_H +#define CONSOLE_H + +#include "ui/qemu-pixman.h" +#include "qom/object.h" +#include "qemu/notify.h" +#include "qemu/error-report.h" +#include "qapi/qapi-types-ui.h" + +#ifdef CONFIG_OPENGL +# include +# include "ui/shader.h" +#endif + +/* keyboard/mouse support */ + +#define MOUSE_EVENT_LBUTTON 0x01 +#define MOUSE_EVENT_RBUTTON 0x02 +#define MOUSE_EVENT_MBUTTON 0x04 +#define MOUSE_EVENT_WHEELUP 0x08 +#define MOUSE_EVENT_WHEELDN 0x10 + +/* identical to the ps/2 keyboard bits */ +#define QEMU_SCROLL_LOCK_LED (1 << 0) +#define QEMU_NUM_LOCK_LED (1 << 1) +#define QEMU_CAPS_LOCK_LED (1 << 2) + +/* in ms */ +#define GUI_REFRESH_INTERVAL_DEFAULT 30 +#define GUI_REFRESH_INTERVAL_IDLE 3000 + +/* Color number is match to standard vga palette */ +enum qemu_color_names { + QEMU_COLOR_BLACK = 0, + QEMU_COLOR_BLUE = 1, + QEMU_COLOR_GREEN = 2, + QEMU_COLOR_CYAN = 3, + QEMU_COLOR_RED = 4, + QEMU_COLOR_MAGENTA = 5, + QEMU_COLOR_YELLOW = 6, + QEMU_COLOR_WHITE = 7 +}; +/* Convert to curses char attributes */ +#define ATTR2CHTYPE(c, fg, bg, bold) \ + ((bold) << 21 | (bg) << 11 | (fg) << 8 | (c)) + +typedef void QEMUPutKBDEvent(void *opaque, int keycode); +typedef void QEMUPutLEDEvent(void *opaque, int ledstate); +typedef void QEMUPutMouseEvent(void *opaque, int dx, int dy, int dz, int buttons_state); + +typedef struct QEMUPutMouseEntry QEMUPutMouseEntry; +typedef struct QEMUPutKbdEntry QEMUPutKbdEntry; +typedef struct QEMUPutLEDEntry QEMUPutLEDEntry; + +QEMUPutKbdEntry *qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, + void *opaque); +QEMUPutMouseEntry *qemu_add_mouse_event_handler(QEMUPutMouseEvent *func, + void *opaque, int absolute, + const char *name); +void qemu_remove_mouse_event_handler(QEMUPutMouseEntry *entry); +void qemu_activate_mouse_event_handler(QEMUPutMouseEntry *entry); + +QEMUPutLEDEntry *qemu_add_led_event_handler(QEMUPutLEDEvent *func, void *opaque); +void qemu_remove_led_event_handler(QEMUPutLEDEntry *entry); + +void kbd_put_ledstate(int ledstate); + +void hmp_mouse_set(Monitor *mon, const QDict *qdict); + +/* keysym is a unicode code except for special keys (see QEMU_KEY_xxx + constants) */ +#define QEMU_KEY_ESC1(c) ((c) | 0xe100) +#define QEMU_KEY_BACKSPACE 0x007f +#define QEMU_KEY_UP QEMU_KEY_ESC1('A') +#define QEMU_KEY_DOWN QEMU_KEY_ESC1('B') +#define QEMU_KEY_RIGHT QEMU_KEY_ESC1('C') +#define QEMU_KEY_LEFT QEMU_KEY_ESC1('D') +#define QEMU_KEY_HOME QEMU_KEY_ESC1(1) +#define QEMU_KEY_END QEMU_KEY_ESC1(4) +#define QEMU_KEY_PAGEUP QEMU_KEY_ESC1(5) +#define QEMU_KEY_PAGEDOWN QEMU_KEY_ESC1(6) +#define QEMU_KEY_DELETE QEMU_KEY_ESC1(3) + +#define QEMU_KEY_CTRL_UP 0xe400 +#define QEMU_KEY_CTRL_DOWN 0xe401 +#define QEMU_KEY_CTRL_LEFT 0xe402 +#define QEMU_KEY_CTRL_RIGHT 0xe403 +#define QEMU_KEY_CTRL_HOME 0xe404 +#define QEMU_KEY_CTRL_END 0xe405 +#define QEMU_KEY_CTRL_PAGEUP 0xe406 +#define QEMU_KEY_CTRL_PAGEDOWN 0xe407 + +void kbd_put_keysym_console(QemuConsole *s, int keysym); +bool kbd_put_qcode_console(QemuConsole *s, int qcode, bool ctrl); +void kbd_put_string_console(QemuConsole *s, const char *str, int len); +void kbd_put_keysym(int keysym); + +/* consoles */ + +#define TYPE_QEMU_CONSOLE "qemu-console" +OBJECT_DECLARE_TYPE(QemuConsole, QemuConsoleClass, QEMU_CONSOLE) + + +struct QemuConsoleClass { + ObjectClass parent_class; +}; + +#define QEMU_ALLOCATED_FLAG 0x01 + +typedef struct DisplaySurface { + pixman_format_code_t format; + pixman_image_t *image; + uint8_t flags; +#ifdef CONFIG_OPENGL + GLenum glformat; + GLenum gltype; + GLuint texture; +#endif +} DisplaySurface; + +typedef struct QemuUIInfo { + /* physical dimension */ + uint16_t width_mm; + uint16_t height_mm; + /* geometry */ + int xoff; + int yoff; + uint32_t width; + uint32_t height; +} QemuUIInfo; + +/* cursor data format is 32bit RGBA */ +typedef struct QEMUCursor { + int width, height; + int hot_x, hot_y; + int refcount; + uint32_t data[]; +} QEMUCursor; + +QEMUCursor *cursor_alloc(int width, int height); +void cursor_get(QEMUCursor *c); +void cursor_put(QEMUCursor *c); +QEMUCursor *cursor_builtin_hidden(void); +QEMUCursor *cursor_builtin_left_ptr(void); +void cursor_print_ascii_art(QEMUCursor *c, const char *prefix); +int cursor_get_mono_bpl(QEMUCursor *c); +void cursor_set_mono(QEMUCursor *c, + uint32_t foreground, uint32_t background, uint8_t *image, + int transparent, uint8_t *mask); +void cursor_get_mono_image(QEMUCursor *c, int foreground, uint8_t *mask); +void cursor_get_mono_mask(QEMUCursor *c, int transparent, uint8_t *mask); + +typedef void *QEMUGLContext; +typedef struct QEMUGLParams QEMUGLParams; + +struct QEMUGLParams { + int major_ver; + int minor_ver; +}; + +typedef struct QemuDmaBuf { + int fd; + uint32_t width; + uint32_t height; + uint32_t stride; + uint32_t fourcc; + uint64_t modifier; + uint32_t texture; + bool y0_top; +} QemuDmaBuf; + +typedef struct DisplayState DisplayState; + +typedef struct DisplayChangeListenerOps { + const char *dpy_name; + + void (*dpy_refresh)(DisplayChangeListener *dcl); + + void (*dpy_gfx_update)(DisplayChangeListener *dcl, + int x, int y, int w, int h); + void (*dpy_gfx_switch)(DisplayChangeListener *dcl, + struct DisplaySurface *new_surface); + bool (*dpy_gfx_check_format)(DisplayChangeListener *dcl, + pixman_format_code_t format); + + void (*dpy_text_cursor)(DisplayChangeListener *dcl, + int x, int y); + void (*dpy_text_resize)(DisplayChangeListener *dcl, + int w, int h); + void (*dpy_text_update)(DisplayChangeListener *dcl, + int x, int y, int w, int h); + + void (*dpy_mouse_set)(DisplayChangeListener *dcl, + int x, int y, int on); + void (*dpy_cursor_define)(DisplayChangeListener *dcl, + QEMUCursor *cursor); + + QEMUGLContext (*dpy_gl_ctx_create)(DisplayChangeListener *dcl, + QEMUGLParams *params); + void (*dpy_gl_ctx_destroy)(DisplayChangeListener *dcl, + QEMUGLContext ctx); + int (*dpy_gl_ctx_make_current)(DisplayChangeListener *dcl, + QEMUGLContext ctx); + QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl); + + void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl); + void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl, + uint32_t backing_id, + bool backing_y_0_top, + uint32_t backing_width, + uint32_t backing_height, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h); + void (*dpy_gl_scanout_dmabuf)(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf); + void (*dpy_gl_cursor_dmabuf)(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf, bool have_hot, + uint32_t hot_x, uint32_t hot_y); + void (*dpy_gl_cursor_position)(DisplayChangeListener *dcl, + uint32_t pos_x, uint32_t pos_y); + void (*dpy_gl_release_dmabuf)(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf); + void (*dpy_gl_update)(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); + +} DisplayChangeListenerOps; + +struct DisplayChangeListener { + uint64_t update_interval; + const DisplayChangeListenerOps *ops; + DisplayState *ds; + QemuConsole *con; + + QLIST_ENTRY(DisplayChangeListener) next; +}; + +DisplayState *init_displaystate(void); +DisplaySurface *qemu_create_displaysurface_from(int width, int height, + pixman_format_code_t format, + int linesize, uint8_t *data); +DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image); +DisplaySurface *qemu_create_message_surface(int w, int h, + const char *msg); +PixelFormat qemu_default_pixelformat(int bpp); + +DisplaySurface *qemu_create_displaysurface(int width, int height); +void qemu_free_displaysurface(DisplaySurface *surface); + +static inline int is_surface_bgr(DisplaySurface *surface) +{ + if (PIXMAN_FORMAT_BPP(surface->format) == 32 && + PIXMAN_FORMAT_TYPE(surface->format) == PIXMAN_TYPE_ABGR) { + return 1; + } else { + return 0; + } +} + +static inline int is_buffer_shared(DisplaySurface *surface) +{ + return !(surface->flags & QEMU_ALLOCATED_FLAG); +} + +void register_displaychangelistener(DisplayChangeListener *dcl); +void update_displaychangelistener(DisplayChangeListener *dcl, + uint64_t interval); +void unregister_displaychangelistener(DisplayChangeListener *dcl); + +bool dpy_ui_info_supported(QemuConsole *con); +const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con); +int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info); + +void dpy_gfx_update(QemuConsole *con, int x, int y, int w, int h); +void dpy_gfx_update_full(QemuConsole *con); +void dpy_gfx_replace_surface(QemuConsole *con, + DisplaySurface *surface); +void dpy_text_cursor(QemuConsole *con, int x, int y); +void dpy_text_update(QemuConsole *con, int x, int y, int w, int h); +void dpy_text_resize(QemuConsole *con, int w, int h); +void dpy_mouse_set(QemuConsole *con, int x, int y, int on); +void dpy_cursor_define(QemuConsole *con, QEMUCursor *cursor); +bool dpy_cursor_define_supported(QemuConsole *con); +bool dpy_gfx_check_format(QemuConsole *con, + pixman_format_code_t format); + +void dpy_gl_scanout_disable(QemuConsole *con); +void dpy_gl_scanout_texture(QemuConsole *con, + uint32_t backing_id, bool backing_y_0_top, + uint32_t backing_width, uint32_t backing_height, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); +void dpy_gl_scanout_dmabuf(QemuConsole *con, + QemuDmaBuf *dmabuf); +void dpy_gl_cursor_dmabuf(QemuConsole *con, QemuDmaBuf *dmabuf, + bool have_hot, uint32_t hot_x, uint32_t hot_y); +void dpy_gl_cursor_position(QemuConsole *con, + uint32_t pos_x, uint32_t pos_y); +void dpy_gl_release_dmabuf(QemuConsole *con, + QemuDmaBuf *dmabuf); +void dpy_gl_update(QemuConsole *con, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); + +QEMUGLContext dpy_gl_ctx_create(QemuConsole *con, + QEMUGLParams *params); +void dpy_gl_ctx_destroy(QemuConsole *con, QEMUGLContext ctx); +int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx); +QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con); + +bool console_has_gl(QemuConsole *con); +bool console_has_gl_dmabuf(QemuConsole *con); + +static inline int surface_stride(DisplaySurface *s) +{ + return pixman_image_get_stride(s->image); +} + +static inline void *surface_data(DisplaySurface *s) +{ + return pixman_image_get_data(s->image); +} + +static inline int surface_width(DisplaySurface *s) +{ + return pixman_image_get_width(s->image); +} + +static inline int surface_height(DisplaySurface *s) +{ + return pixman_image_get_height(s->image); +} + +static inline int surface_bits_per_pixel(DisplaySurface *s) +{ + int bits = PIXMAN_FORMAT_BPP(s->format); + return bits; +} + +static inline int surface_bytes_per_pixel(DisplaySurface *s) +{ + int bits = PIXMAN_FORMAT_BPP(s->format); + return DIV_ROUND_UP(bits, 8); +} + +static inline pixman_format_code_t surface_format(DisplaySurface *s) +{ + return s->format; +} + +typedef uint32_t console_ch_t; + +static inline void console_write_ch(console_ch_t *dest, uint32_t ch) +{ + *dest = ch; +} + +typedef struct GraphicHwOps { + void (*invalidate)(void *opaque); + void (*gfx_update)(void *opaque); + bool gfx_update_async; /* if true, calls graphic_hw_update_done() */ + void (*text_update)(void *opaque, console_ch_t *text); + void (*update_interval)(void *opaque, uint64_t interval); + int (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info); + void (*gl_block)(void *opaque, bool block); +} GraphicHwOps; + +QemuConsole *graphic_console_init(DeviceState *dev, uint32_t head, + const GraphicHwOps *ops, + void *opaque); +void graphic_console_set_hwops(QemuConsole *con, + const GraphicHwOps *hw_ops, + void *opaque); +void graphic_console_close(QemuConsole *con); + +void graphic_hw_update(QemuConsole *con); +void graphic_hw_update_done(QemuConsole *con); +void graphic_hw_invalidate(QemuConsole *con); +void graphic_hw_text_update(QemuConsole *con, console_ch_t *chardata); +void graphic_hw_gl_block(QemuConsole *con, bool block); + +void qemu_console_early_init(void); + +QemuConsole *qemu_console_lookup_by_index(unsigned int index); +QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head); +QemuConsole *qemu_console_lookup_by_device_name(const char *device_id, + uint32_t head, Error **errp); +QemuConsole *qemu_console_lookup_unused(void); +bool qemu_console_is_visible(QemuConsole *con); +bool qemu_console_is_graphic(QemuConsole *con); +bool qemu_console_is_fixedsize(QemuConsole *con); +bool qemu_console_is_gl_blocked(QemuConsole *con); +char *qemu_console_get_label(QemuConsole *con); +int qemu_console_get_index(QemuConsole *con); +uint32_t qemu_console_get_head(QemuConsole *con); +QemuUIInfo *qemu_console_get_ui_info(QemuConsole *con); +int qemu_console_get_width(QemuConsole *con, int fallback); +int qemu_console_get_height(QemuConsole *con, int fallback); +/* Return the low-level window id for the console */ +int qemu_console_get_window_id(QemuConsole *con); +/* Set the low-level window id for the console */ +void qemu_console_set_window_id(QemuConsole *con, int window_id); + +void console_select(unsigned int index); +void qemu_console_resize(QemuConsole *con, int width, int height); +DisplaySurface *qemu_console_surface(QemuConsole *con); + +/* console-gl.c */ +#ifdef CONFIG_OPENGL +bool console_gl_check_format(DisplayChangeListener *dcl, + pixman_format_code_t format); +void surface_gl_create_texture(QemuGLShader *gls, + DisplaySurface *surface); +void surface_gl_update_texture(QemuGLShader *gls, + DisplaySurface *surface, + int x, int y, int w, int h); +void surface_gl_render_texture(QemuGLShader *gls, + DisplaySurface *surface); +void surface_gl_destroy_texture(QemuGLShader *gls, + DisplaySurface *surface); +void surface_gl_setup_viewport(QemuGLShader *gls, + DisplaySurface *surface, + int ww, int wh); +#endif + +typedef struct QemuDisplay QemuDisplay; + +struct QemuDisplay { + DisplayType type; + void (*early_init)(DisplayOptions *opts); + void (*init)(DisplayState *ds, DisplayOptions *opts); +}; + +void qemu_display_register(QemuDisplay *ui); +bool qemu_display_find_default(DisplayOptions *opts); +void qemu_display_early_init(DisplayOptions *opts); +void qemu_display_init(DisplayState *ds, DisplayOptions *opts); +void qemu_display_help(void); + +/* vnc.c */ +void vnc_display_init(const char *id, Error **errp); +void vnc_display_open(const char *id, Error **errp); +void vnc_display_add_client(const char *id, int csock, bool skipauth); +int vnc_display_password(const char *id, const char *password); +int vnc_display_pw_expire(const char *id, time_t expires); +QemuOpts *vnc_parse(const char *str, Error **errp); +int vnc_init_func(void *opaque, QemuOpts *opts, Error **errp); + +/* input.c */ +int index_from_key(const char *key, size_t key_length); + +#endif diff --git a/include/ui/egl-context.h b/include/ui/egl-context.h new file mode 100644 index 000000000..f004ce11a --- /dev/null +++ b/include/ui/egl-context.h @@ -0,0 +1,14 @@ +#ifndef EGL_CONTEXT_H +#define EGL_CONTEXT_H + +#include "ui/console.h" +#include "ui/egl-helpers.h" + +QEMUGLContext qemu_egl_create_context(DisplayChangeListener *dcl, + QEMUGLParams *params); +void qemu_egl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx); +int qemu_egl_make_context_current(DisplayChangeListener *dcl, + QEMUGLContext ctx); +QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl); + +#endif /* EGL_CONTEXT_H */ diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h new file mode 100644 index 000000000..94a4b3e6f --- /dev/null +++ b/include/ui/egl-helpers.h @@ -0,0 +1,55 @@ +#ifndef EGL_HELPERS_H +#define EGL_HELPERS_H + +#include +#include +#include +#include "ui/console.h" +#include "ui/shader.h" + +extern EGLDisplay *qemu_egl_display; +extern EGLConfig qemu_egl_config; +extern DisplayGLMode qemu_egl_mode; + +typedef struct egl_fb { + int width; + int height; + GLuint texture; + GLuint framebuffer; + bool delete_texture; +} egl_fb; + +void egl_fb_destroy(egl_fb *fb); +void egl_fb_setup_default(egl_fb *fb, int width, int height); +void egl_fb_setup_for_tex(egl_fb *fb, int width, int height, + GLuint texture, bool delete); +void egl_fb_setup_new_tex(egl_fb *fb, int width, int height); +void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip); +void egl_fb_read(DisplaySurface *dst, egl_fb *src); + +void egl_texture_blit(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip); +void egl_texture_blend(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip, + int x, int y, double scale_x, double scale_y); + +#ifdef CONFIG_OPENGL_DMABUF + +extern int qemu_egl_rn_fd; +extern struct gbm_device *qemu_egl_rn_gbm_dev; +extern EGLContext qemu_egl_rn_ctx; + +int egl_rendernode_init(const char *rendernode, DisplayGLMode mode); +int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc, + EGLuint64KHR *modifier); + +void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf); +void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf); + +#endif + +EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, EGLNativeWindowType win); + +int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy, DisplayGLMode mode); +int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy, DisplayGLMode mode); +EGLContext qemu_egl_init_ctx(void); + +#endif /* EGL_HELPERS_H */ diff --git a/include/ui/gtk.h b/include/ui/gtk.h new file mode 100644 index 000000000..eaeb450f9 --- /dev/null +++ b/include/ui/gtk.h @@ -0,0 +1,151 @@ +#ifndef UI_GTK_H +#define UI_GTK_H + +/* Work around an -Wstrict-prototypes warning in GTK headers */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include +#pragma GCC diagnostic pop + +#include + +#ifdef GDK_WINDOWING_X11 +#include +#include +#endif + +#ifdef GDK_WINDOWING_WAYLAND +#include +#endif + +#include "ui/kbd-state.h" +#if defined(CONFIG_OPENGL) +#include "ui/egl-helpers.h" +#include "ui/egl-context.h" +#endif + +#define MILLISEC_PER_SEC 1000000 + +typedef struct GtkDisplayState GtkDisplayState; + +typedef struct VirtualGfxConsole { + GtkWidget *drawing_area; + DisplayChangeListener dcl; + QKbdState *kbd; + DisplaySurface *ds; + pixman_image_t *convert; + cairo_surface_t *surface; + double scale_x; + double scale_y; +#if defined(CONFIG_OPENGL) + QemuGLShader *gls; + EGLContext ectx; + EGLSurface esurface; + int glupdates; + int x, y, w, h; + egl_fb guest_fb; + egl_fb win_fb; + egl_fb cursor_fb; + int cursor_x; + int cursor_y; + bool y0_top; + bool scanout_mode; +#endif +} VirtualGfxConsole; + +#if defined(CONFIG_VTE) +typedef struct VirtualVteConsole { + GtkWidget *box; + GtkWidget *scrollbar; + GtkWidget *terminal; + Chardev *chr; + bool echo; +} VirtualVteConsole; +#endif + +typedef enum VirtualConsoleType { + GD_VC_GFX, + GD_VC_VTE, +} VirtualConsoleType; + +typedef struct VirtualConsole { + GtkDisplayState *s; + char *label; + GtkWidget *window; + GtkWidget *menu_item; + GtkWidget *tab_item; + GtkWidget *focus; + VirtualConsoleType type; + union { + VirtualGfxConsole gfx; +#if defined(CONFIG_VTE) + VirtualVteConsole vte; +#endif + }; +} VirtualConsole; + +extern bool gtk_use_gl_area; + +/* ui/gtk.c */ +void gd_update_windowsize(VirtualConsole *vc); + +/* ui/gtk-egl.c */ +void gd_egl_init(VirtualConsole *vc); +void gd_egl_draw(VirtualConsole *vc); +void gd_egl_update(DisplayChangeListener *dcl, + int x, int y, int w, int h); +void gd_egl_refresh(DisplayChangeListener *dcl); +void gd_egl_switch(DisplayChangeListener *dcl, + DisplaySurface *surface); +QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl, + QEMUGLParams *params); +void gd_egl_scanout_disable(DisplayChangeListener *dcl); +void gd_egl_scanout_texture(DisplayChangeListener *dcl, + uint32_t backing_id, + bool backing_y_0_top, + uint32_t backing_width, + uint32_t backing_height, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h); +void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf); +void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf, bool have_hot, + uint32_t hot_x, uint32_t hot_y); +void gd_egl_cursor_position(DisplayChangeListener *dcl, + uint32_t pos_x, uint32_t pos_y); +void gd_egl_release_dmabuf(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf); +void gd_egl_scanout_flush(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); +void gtk_egl_init(DisplayGLMode mode); +int gd_egl_make_current(DisplayChangeListener *dcl, + QEMUGLContext ctx); + +/* ui/gtk-gl-area.c */ +void gd_gl_area_init(VirtualConsole *vc); +void gd_gl_area_draw(VirtualConsole *vc); +void gd_gl_area_update(DisplayChangeListener *dcl, + int x, int y, int w, int h); +void gd_gl_area_refresh(DisplayChangeListener *dcl); +void gd_gl_area_switch(DisplayChangeListener *dcl, + DisplaySurface *surface); +QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl, + QEMUGLParams *params); +void gd_gl_area_destroy_context(DisplayChangeListener *dcl, + QEMUGLContext ctx); +void gd_gl_area_scanout_texture(DisplayChangeListener *dcl, + uint32_t backing_id, + bool backing_y_0_top, + uint32_t backing_width, + uint32_t backing_height, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h); +void gd_gl_area_scanout_flush(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); +void gtk_gl_area_init(void); +QEMUGLContext gd_gl_area_get_current_context(DisplayChangeListener *dcl); +int gd_gl_area_make_current(DisplayChangeListener *dcl, + QEMUGLContext ctx); + +#endif /* UI_GTK_H */ diff --git a/include/ui/input.h b/include/ui/input.h new file mode 100644 index 000000000..c86219a1c --- /dev/null +++ b/include/ui/input.h @@ -0,0 +1,120 @@ +#ifndef INPUT_H +#define INPUT_H + +#include "qapi/qapi-types-ui.h" +#include "qemu/notify.h" + +#define INPUT_EVENT_MASK_KEY (1<> 5) << 5) | ((g >> 5) << 2) | (b >> 6); +} + +static inline unsigned int rgb_to_pixel15(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3); +} + +static inline unsigned int rgb_to_pixel15bgr(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((b >> 3) << 10) | ((g >> 3) << 5) | (r >> 3); +} + +static inline unsigned int rgb_to_pixel16(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3); +} + +static inline unsigned int rgb_to_pixel16bgr(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((b >> 3) << 11) | ((g >> 2) << 5) | (r >> 3); +} + +static inline unsigned int rgb_to_pixel24(unsigned int r, unsigned int g, + unsigned int b) +{ + return (r << 16) | (g << 8) | b; +} + +static inline unsigned int rgb_to_pixel24bgr(unsigned int r, unsigned int g, + unsigned int b) +{ + return (b << 16) | (g << 8) | r; +} + +static inline unsigned int rgb_to_pixel32(unsigned int r, unsigned int g, + unsigned int b) +{ + return (r << 16) | (g << 8) | b; +} + +static inline unsigned int rgb_to_pixel32bgr(unsigned int r, unsigned int g, + unsigned int b) +{ + return (b << 16) | (g << 8) | r; +} diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h new file mode 100644 index 000000000..87737a6f1 --- /dev/null +++ b/include/ui/qemu-pixman.h @@ -0,0 +1,91 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PIXMAN_H +#define QEMU_PIXMAN_H + +/* pixman-0.16.0 headers have a redundant declaration */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#include +#pragma GCC diagnostic pop + +/* + * pixman image formats are defined to be native endian, + * that means host byte order on qemu. So we go define + * fixed formats here for cases where it is needed, like + * feeding libjpeg / libpng and writing screenshots. + */ + +#ifdef HOST_WORDS_BIGENDIAN +# define PIXMAN_BE_r8g8b8 PIXMAN_r8g8b8 +# define PIXMAN_BE_x8r8g8b8 PIXMAN_x8r8g8b8 +# define PIXMAN_BE_a8r8g8b8 PIXMAN_a8r8g8b8 +# define PIXMAN_BE_b8g8r8x8 PIXMAN_b8g8r8x8 +# define PIXMAN_BE_b8g8r8a8 PIXMAN_b8g8r8a8 +# define PIXMAN_BE_r8g8b8x8 PIXMAN_r8g8b8x8 +# define PIXMAN_BE_r8g8b8a8 PIXMAN_r8g8b8a8 +# define PIXMAN_BE_x8b8g8r8 PIXMAN_x8b8g8r8 +# define PIXMAN_BE_a8b8g8r8 PIXMAN_a8b8g8r8 +# define PIXMAN_LE_r8g8b8 PIXMAN_b8g8r8 +# define PIXMAN_LE_a8r8g8b8 PIXMAN_b8g8r8a8 +# define PIXMAN_LE_x8r8g8b8 PIXMAN_b8g8r8x8 +#else +# define PIXMAN_BE_r8g8b8 PIXMAN_b8g8r8 +# define PIXMAN_BE_x8r8g8b8 PIXMAN_b8g8r8x8 +# define PIXMAN_BE_a8r8g8b8 PIXMAN_b8g8r8a8 +# define PIXMAN_BE_b8g8r8x8 PIXMAN_x8r8g8b8 +# define PIXMAN_BE_b8g8r8a8 PIXMAN_a8r8g8b8 +# define PIXMAN_BE_r8g8b8x8 PIXMAN_x8b8g8r8 +# define PIXMAN_BE_r8g8b8a8 PIXMAN_a8b8g8r8 +# define PIXMAN_BE_x8b8g8r8 PIXMAN_r8g8b8x8 +# define PIXMAN_BE_a8b8g8r8 PIXMAN_r8g8b8a8 +# define PIXMAN_LE_r8g8b8 PIXMAN_r8g8b8 +# define PIXMAN_LE_a8r8g8b8 PIXMAN_a8r8g8b8 +# define PIXMAN_LE_x8r8g8b8 PIXMAN_x8r8g8b8 +#endif + +/* -------------------------------------------------------------------- */ + +typedef struct PixelFormat { + uint8_t bits_per_pixel; + uint8_t bytes_per_pixel; + uint8_t depth; /* color depth in bits */ + uint32_t rmask, gmask, bmask, amask; + uint8_t rshift, gshift, bshift, ashift; + uint8_t rmax, gmax, bmax, amax; + uint8_t rbits, gbits, bbits, abits; +} PixelFormat; + +PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format); +pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian); +pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format); +int qemu_pixman_get_type(int rshift, int gshift, int bshift); +pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf); +bool qemu_pixman_check_format(DisplayChangeListener *dcl, + pixman_format_code_t format); + +pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format, + int width); +void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb, + int width, int x, int y); +void qemu_pixman_linebuf_copy(pixman_image_t *fb, int width, int x, int y, + pixman_image_t *linebuf); +pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format, + pixman_image_t *image); +void qemu_pixman_image_unref(pixman_image_t *image); + +pixman_color_t qemu_pixman_color(PixelFormat *pf, uint32_t color); +pixman_image_t *qemu_pixman_glyph_from_vgafont(int height, const uint8_t *font, + unsigned int ch); +void qemu_pixman_glyph_render(pixman_image_t *glyph, + pixman_image_t *surface, + pixman_color_t *fgcol, + pixman_color_t *bgcol, + int x, int y, int cw, int ch); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(pixman_image_t, qemu_pixman_image_unref) + +#endif /* QEMU_PIXMAN_H */ diff --git a/include/ui/qemu-spice-module.h b/include/ui/qemu-spice-module.h new file mode 100644 index 000000000..1f22d557e --- /dev/null +++ b/include/ui/qemu-spice-module.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QEMU_SPICE_MODULE_H +#define QEMU_SPICE_MODULE_H + +#ifdef CONFIG_SPICE +#include +#endif + +typedef struct SpiceInfo SpiceInfo; + +struct QemuSpiceOps { + void (*init)(void); + void (*display_init)(void); + int (*migrate_info)(const char *h, int p, int t, const char *s); + int (*set_passwd)(const char *passwd, + bool fail_if_connected, bool disconnect_if_connected); + int (*set_pw_expire)(time_t expires); + int (*display_add_client)(int csock, int skipauth, int tls); +#ifdef CONFIG_SPICE + int (*add_interface)(SpiceBaseInstance *sin); + SpiceInfo* (*qmp_query)(Error **errp); +#endif +}; + +extern int using_spice; +extern struct QemuSpiceOps qemu_spice; + +#endif diff --git a/include/ui/qemu-spice.h b/include/ui/qemu-spice.h new file mode 100644 index 000000000..2beb79297 --- /dev/null +++ b/include/ui/qemu-spice.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef QEMU_SPICE_H +#define QEMU_SPICE_H + +#include "qapi/error.h" +#include "ui/qemu-spice-module.h" + +#ifdef CONFIG_SPICE + +#include +#include "qemu/config-file.h" + +void qemu_spice_input_init(void); +void qemu_spice_display_init(void); +bool qemu_spice_have_display_interface(QemuConsole *con); +int qemu_spice_add_display_interface(QXLInstance *qxlin, QemuConsole *con); +int qemu_spice_migrate_info(const char *hostname, int port, int tls_port, + const char *subject); + +#if !defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06) +#define SPICE_NEEDS_SET_MM_TIME 1 +#else +#define SPICE_NEEDS_SET_MM_TIME 0 +#endif + +#else /* CONFIG_SPICE */ + +#include "qemu/error-report.h" + +#define spice_displays 0 + +#endif /* CONFIG_SPICE */ + +static inline bool qemu_using_spice(Error **errp) +{ + if (!using_spice) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "SPICE is not in use"); + return false; + } + return true; +} + +#endif /* QEMU_SPICE_H */ diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h new file mode 100644 index 000000000..0875b8d56 --- /dev/null +++ b/include/ui/sdl2.h @@ -0,0 +1,86 @@ +#ifndef SDL2_H +#define SDL2_H + +/* Avoid compiler warning because macro is redefined in SDL_syswm.h. */ +#undef WIN32_LEAN_AND_MEAN + +#include +#include +#ifdef CONFIG_SDL_IMAGE +# include +#endif + +#include "ui/kbd-state.h" +#ifdef CONFIG_OPENGL +# include "ui/egl-helpers.h" +#endif + +struct sdl2_console { + DisplayChangeListener dcl; + DisplaySurface *surface; + DisplayOptions *opts; + SDL_Texture *texture; + SDL_Window *real_window; + SDL_Renderer *real_renderer; + int idx; + int last_vm_running; /* per console for caption reasons */ + int x, y, w, h; + int hidden; + int opengl; + int updates; + int idle_counter; + int ignore_hotkeys; + SDL_GLContext winctx; + QKbdState *kbd; +#ifdef CONFIG_OPENGL + QemuGLShader *gls; + egl_fb guest_fb; + egl_fb win_fb; + bool y0_top; + bool scanout_mode; +#endif +}; + +void sdl2_window_create(struct sdl2_console *scon); +void sdl2_window_destroy(struct sdl2_console *scon); +void sdl2_window_resize(struct sdl2_console *scon); +void sdl2_poll_events(struct sdl2_console *scon); + +void sdl2_process_key(struct sdl2_console *scon, + SDL_KeyboardEvent *ev); + +void sdl2_2d_update(DisplayChangeListener *dcl, + int x, int y, int w, int h); +void sdl2_2d_switch(DisplayChangeListener *dcl, + DisplaySurface *new_surface); +void sdl2_2d_refresh(DisplayChangeListener *dcl); +void sdl2_2d_redraw(struct sdl2_console *scon); +bool sdl2_2d_check_format(DisplayChangeListener *dcl, + pixman_format_code_t format); + +void sdl2_gl_update(DisplayChangeListener *dcl, + int x, int y, int w, int h); +void sdl2_gl_switch(DisplayChangeListener *dcl, + DisplaySurface *new_surface); +void sdl2_gl_refresh(DisplayChangeListener *dcl); +void sdl2_gl_redraw(struct sdl2_console *scon); + +QEMUGLContext sdl2_gl_create_context(DisplayChangeListener *dcl, + QEMUGLParams *params); +void sdl2_gl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx); +int sdl2_gl_make_context_current(DisplayChangeListener *dcl, + QEMUGLContext ctx); +QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl); + +void sdl2_gl_scanout_disable(DisplayChangeListener *dcl); +void sdl2_gl_scanout_texture(DisplayChangeListener *dcl, + uint32_t backing_id, + bool backing_y_0_top, + uint32_t backing_width, + uint32_t backing_height, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h); +void sdl2_gl_scanout_flush(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); + +#endif /* SDL2_H */ diff --git a/include/ui/shader.h b/include/ui/shader.h new file mode 100644 index 000000000..4c5acb2ce --- /dev/null +++ b/include/ui/shader.h @@ -0,0 +1,13 @@ +#ifndef QEMU_SHADER_H +#define QEMU_SHADER_H + +#include + +typedef struct QemuGLShader QemuGLShader; + +void qemu_gl_run_texture_blit(QemuGLShader *gls, bool flip); + +QemuGLShader *qemu_gl_init_shader(void); +void qemu_gl_fini_shader(QemuGLShader *gls); + +#endif /* QEMU_SHADER_H */ diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h new file mode 100644 index 000000000..4a47ffdd4 --- /dev/null +++ b/include/ui/spice-display.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef UI_SPICE_DISPLAY_H +#define UI_SPICE_DISPLAY_H + +#include +#include +#include +#include + +#include "qemu/thread.h" +#include "ui/qemu-pixman.h" +#include "ui/console.h" + +#if defined(CONFIG_OPENGL_DMABUF) +# if SPICE_SERVER_VERSION >= 0x000d01 /* release 0.13.1 */ +# define HAVE_SPICE_GL 1 +# include "ui/egl-helpers.h" +# include "ui/egl-context.h" +# endif +#endif + +#define NUM_MEMSLOTS 8 +#define MEMSLOT_GENERATION_BITS 8 +#define MEMSLOT_SLOT_BITS 8 + +#define MEMSLOT_GROUP_HOST 0 +#define MEMSLOT_GROUP_GUEST 1 +#define NUM_MEMSLOTS_GROUPS 2 + +/* + * Internal enum to differenciate between options for + * io calls that have a sync (old) version and an _async (new) + * version: + * QXL_SYNC: use the old version + * QXL_ASYNC: use the new version and make sure there are no two + * happening at the same time. This is used for guest initiated + * calls + */ +typedef enum qxl_async_io { + QXL_SYNC, + QXL_ASYNC, +} qxl_async_io; + +enum { + QXL_COOKIE_TYPE_IO, + QXL_COOKIE_TYPE_RENDER_UPDATE_AREA, + QXL_COOKIE_TYPE_POST_LOAD_MONITORS_CONFIG, + QXL_COOKIE_TYPE_GL_DRAW_DONE, +}; + +typedef struct QXLCookie { + int type; + uint64_t io; + union { + uint32_t surface_id; + QXLRect area; + struct { + QXLRect area; + int redraw; + } render; + void *data; + } u; +} QXLCookie; + +QXLCookie *qxl_cookie_new(int type, uint64_t io); + +typedef struct SimpleSpiceDisplay SimpleSpiceDisplay; +typedef struct SimpleSpiceUpdate SimpleSpiceUpdate; +typedef struct SimpleSpiceCursor SimpleSpiceCursor; + +struct SimpleSpiceDisplay { + DisplaySurface *ds; + DisplayChangeListener dcl; + void *buf; + int bufsize; + QXLInstance qxl; + uint32_t unique; + pixman_image_t *surface; + pixman_image_t *mirror; + int32_t num_surfaces; + + QXLRect dirty; + int notify; + + /* + * All struct members below this comment can be accessed from + * both spice server and qemu (iothread) context and any access + * to them must be protected by the lock. + */ + QemuMutex lock; + QTAILQ_HEAD(, SimpleSpiceUpdate) updates; + + /* cursor (without qxl): displaychangelistener -> spice server */ + SimpleSpiceCursor *ptr_define; + SimpleSpiceCursor *ptr_move; + int16_t ptr_x, ptr_y; + int16_t hot_x, hot_y; + + /* cursor (with qxl): qxl local renderer -> displaychangelistener */ + QEMUCursor *cursor; + int mouse_x, mouse_y; + QEMUBH *cursor_bh; + +#ifdef HAVE_SPICE_GL + /* opengl rendering */ + QEMUBH *gl_unblock_bh; + QEMUTimer *gl_unblock_timer; + QemuGLShader *gls; + int gl_updates; + bool have_scanout; + bool have_surface; + + QemuDmaBuf *guest_dmabuf; + bool guest_dmabuf_refresh; + bool render_cursor; + + egl_fb guest_fb; + egl_fb blit_fb; + egl_fb cursor_fb; + bool have_hot; +#endif +}; + +struct SimpleSpiceUpdate { + QXLDrawable drawable; + QXLImage image; + QXLCommandExt ext; + uint8_t *bitmap; + QTAILQ_ENTRY(SimpleSpiceUpdate) next; +}; + +struct SimpleSpiceCursor { + QXLCursorCmd cmd; + QXLCommandExt ext; + QXLCursor cursor; +}; + +extern bool spice_opengl; + +int qemu_spice_rect_is_empty(const QXLRect* r); +void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r); + +void qemu_spice_destroy_update(SimpleSpiceDisplay *sdpy, SimpleSpiceUpdate *update); +void qemu_spice_create_host_memslot(SimpleSpiceDisplay *ssd); +void qemu_spice_create_host_primary(SimpleSpiceDisplay *ssd); +void qemu_spice_destroy_host_primary(SimpleSpiceDisplay *ssd); +void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd); + +void qemu_spice_display_update(SimpleSpiceDisplay *ssd, + int x, int y, int w, int h); +void qemu_spice_display_switch(SimpleSpiceDisplay *ssd, + DisplaySurface *surface); +void qemu_spice_display_refresh(SimpleSpiceDisplay *ssd); +void qemu_spice_cursor_refresh_bh(void *opaque); + +void qemu_spice_add_memslot(SimpleSpiceDisplay *ssd, QXLDevMemSlot *memslot, + qxl_async_io async); +void qemu_spice_del_memslot(SimpleSpiceDisplay *ssd, uint32_t gid, + uint32_t sid); +void qemu_spice_create_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id, + QXLDevSurfaceCreate *surface, + qxl_async_io async); +void qemu_spice_destroy_primary_surface(SimpleSpiceDisplay *ssd, + uint32_t id, qxl_async_io async); +void qemu_spice_wakeup(SimpleSpiceDisplay *ssd); +void qemu_spice_display_start(void); +void qemu_spice_display_stop(void); +int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd); + +bool qemu_spice_fill_device_address(QemuConsole *con, + char *device_address, + size_t size); + +#endif diff --git a/include/ui/win32-kbd-hook.h b/include/ui/win32-kbd-hook.h new file mode 100644 index 000000000..4bd9f00f9 --- /dev/null +++ b/include/ui/win32-kbd-hook.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef UI_WIN32_KBD_HOOK_H +#define UI_WIN32_KBD_HOOK_H + +void win32_kbd_set_window(void *hwnd); +void win32_kbd_set_grab(bool grab); + +#endif diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h new file mode 100644 index 000000000..42e3b48b0 --- /dev/null +++ b/include/user/syscall-trace.h @@ -0,0 +1,42 @@ +/* + * Common System Call Tracing Wrappers for *-user + * + * Copyright (c) 2019 Linaro + * Written by Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _SYSCALL_TRACE_H_ +#define _SYSCALL_TRACE_H_ + +#include "trace/trace-root.h" + +/* + * These helpers just provide a common place for the various + * subsystems that want to track syscalls to put their hooks in. We + * could potentially unify the -strace code here as well. + */ + +static inline void record_syscall_start(void *cpu, int num, + abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, + abi_long arg5, abi_long arg6, + abi_long arg7, abi_long arg8) +{ + trace_guest_user_syscall(cpu, num, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8); + qemu_plugin_vcpu_syscall(cpu, num, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8); +} + +static inline void record_syscall_return(void *cpu, int num, abi_long ret) +{ + trace_guest_user_syscall_ret(cpu, num, ret); + qemu_plugin_vcpu_syscall_ret(cpu, num, ret); +} + + +#endif /* _SYSCALL_TRACE_H_ */ diff --git a/io/channel-buffer.c b/io/channel-buffer.c new file mode 100644 index 000000000..baa4e2b08 --- /dev/null +++ b/io/channel-buffer.c @@ -0,0 +1,251 @@ +/* + * QEMU I/O channels memory buffer driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel-buffer.h" +#include "io/channel-watch.h" +#include "qemu/module.h" +#include "qemu/sockets.h" +#include "trace.h" + +QIOChannelBuffer * +qio_channel_buffer_new(size_t capacity) +{ + QIOChannelBuffer *ioc; + + ioc = QIO_CHANNEL_BUFFER(object_new(TYPE_QIO_CHANNEL_BUFFER)); + + if (capacity) { + ioc->data = g_new0(uint8_t, capacity); + ioc->capacity = capacity; + } + + return ioc; +} + + +static void qio_channel_buffer_finalize(Object *obj) +{ + QIOChannelBuffer *ioc = QIO_CHANNEL_BUFFER(obj); + g_free(ioc->data); + ioc->capacity = ioc->usage = ioc->offset = 0; +} + + +static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); + ssize_t ret = 0; + size_t i; + + for (i = 0; i < niov; i++) { + size_t want = iov[i].iov_len; + if (bioc->offset >= bioc->usage) { + break; + } + if ((bioc->offset + want) > bioc->usage) { + want = bioc->usage - bioc->offset; + } + memcpy(iov[i].iov_base, bioc->data + bioc->offset, want); + ret += want; + bioc->offset += want; + } + + return ret; +} + +static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); + ssize_t ret = 0; + size_t i; + size_t towrite = 0; + + for (i = 0; i < niov; i++) { + towrite += iov[i].iov_len; + } + + if ((bioc->offset + towrite) > bioc->capacity) { + bioc->capacity = bioc->offset + towrite; + bioc->data = g_realloc(bioc->data, bioc->capacity); + } + + if (bioc->offset > bioc->usage) { + memset(bioc->data, 0, bioc->offset - bioc->usage); + bioc->usage = bioc->offset; + } + + for (i = 0; i < niov; i++) { + memcpy(bioc->data + bioc->usage, + iov[i].iov_base, + iov[i].iov_len); + bioc->usage += iov[i].iov_len; + bioc->offset += iov[i].iov_len; + ret += iov[i].iov_len; + } + + return ret; +} + +static int qio_channel_buffer_set_blocking(QIOChannel *ioc G_GNUC_UNUSED, + bool enabled G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return 0; +} + + +static off_t qio_channel_buffer_seek(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp) +{ + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); + + bioc->offset = offset; + + return offset; +} + + +static int qio_channel_buffer_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); + + g_free(bioc->data); + bioc->data = NULL; + bioc->capacity = bioc->usage = bioc->offset = 0; + + return 0; +} + + +typedef struct QIOChannelBufferSource QIOChannelBufferSource; +struct QIOChannelBufferSource { + GSource parent; + QIOChannelBuffer *bioc; + GIOCondition condition; +}; + +static gboolean +qio_channel_buffer_source_prepare(GSource *source, + gint *timeout) +{ + QIOChannelBufferSource *bsource = (QIOChannelBufferSource *)source; + + *timeout = -1; + + return (G_IO_IN | G_IO_OUT) & bsource->condition; +} + +static gboolean +qio_channel_buffer_source_check(GSource *source) +{ + QIOChannelBufferSource *bsource = (QIOChannelBufferSource *)source; + + return (G_IO_IN | G_IO_OUT) & bsource->condition; +} + +static gboolean +qio_channel_buffer_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelBufferSource *bsource = (QIOChannelBufferSource *)source; + + return (*func)(QIO_CHANNEL(bsource->bioc), + ((G_IO_IN | G_IO_OUT) & bsource->condition), + user_data); +} + +static void +qio_channel_buffer_source_finalize(GSource *source) +{ + QIOChannelBufferSource *ssource = (QIOChannelBufferSource *)source; + + object_unref(OBJECT(ssource->bioc)); +} + +GSourceFuncs qio_channel_buffer_source_funcs = { + qio_channel_buffer_source_prepare, + qio_channel_buffer_source_check, + qio_channel_buffer_source_dispatch, + qio_channel_buffer_source_finalize +}; + +static GSource *qio_channel_buffer_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); + QIOChannelBufferSource *ssource; + GSource *source; + + source = g_source_new(&qio_channel_buffer_source_funcs, + sizeof(QIOChannelBufferSource)); + ssource = (QIOChannelBufferSource *)source; + + ssource->bioc = bioc; + object_ref(OBJECT(bioc)); + + ssource->condition = condition; + + return source; +} + + +static void qio_channel_buffer_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_buffer_writev; + ioc_klass->io_readv = qio_channel_buffer_readv; + ioc_klass->io_set_blocking = qio_channel_buffer_set_blocking; + ioc_klass->io_seek = qio_channel_buffer_seek; + ioc_klass->io_close = qio_channel_buffer_close; + ioc_klass->io_create_watch = qio_channel_buffer_create_watch; +} + +static const TypeInfo qio_channel_buffer_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_BUFFER, + .instance_size = sizeof(QIOChannelBuffer), + .instance_finalize = qio_channel_buffer_finalize, + .class_init = qio_channel_buffer_class_init, +}; + +static void qio_channel_buffer_register_types(void) +{ + type_register_static(&qio_channel_buffer_info); +} + +type_init(qio_channel_buffer_register_types); diff --git a/io/channel-command.c b/io/channel-command.c new file mode 100644 index 000000000..b2a9e2713 --- /dev/null +++ b/io/channel-command.c @@ -0,0 +1,392 @@ +/* + * QEMU I/O channels external command driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel-command.h" +#include "io/channel-watch.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qemu/sockets.h" +#include "trace.h" + + +QIOChannelCommand * +qio_channel_command_new_pid(int writefd, + int readfd, + pid_t pid) +{ + QIOChannelCommand *ioc; + + ioc = QIO_CHANNEL_COMMAND(object_new(TYPE_QIO_CHANNEL_COMMAND)); + + ioc->readfd = readfd; + ioc->writefd = writefd; + ioc->pid = pid; + + trace_qio_channel_command_new_pid(ioc, writefd, readfd, pid); + return ioc; +} + + +#ifndef WIN32 +QIOChannelCommand * +qio_channel_command_new_spawn(const char *const argv[], + int flags, + Error **errp) +{ + pid_t pid = -1; + int stdinfd[2] = { -1, -1 }; + int stdoutfd[2] = { -1, -1 }; + int devnull = -1; + bool stdinnull = false, stdoutnull = false; + QIOChannelCommand *ioc; + + flags = flags & O_ACCMODE; + + if (flags == O_RDONLY) { + stdinnull = true; + } + if (flags == O_WRONLY) { + stdoutnull = true; + } + + if (stdinnull || stdoutnull) { + devnull = open("/dev/null", O_RDWR); + if (devnull < 0) { + error_setg_errno(errp, errno, + "Unable to open /dev/null"); + goto error; + } + } + + if ((!stdinnull && pipe(stdinfd) < 0) || + (!stdoutnull && pipe(stdoutfd) < 0)) { + error_setg_errno(errp, errno, + "Unable to open pipe"); + goto error; + } + + pid = qemu_fork(errp); + if (pid < 0) { + goto error; + } + + if (pid == 0) { /* child */ + dup2(stdinnull ? devnull : stdinfd[0], STDIN_FILENO); + dup2(stdoutnull ? devnull : stdoutfd[1], STDOUT_FILENO); + /* Leave stderr connected to qemu's stderr */ + + if (!stdinnull) { + close(stdinfd[0]); + close(stdinfd[1]); + } + if (!stdoutnull) { + close(stdoutfd[0]); + close(stdoutfd[1]); + } + if (devnull != -1) { + close(devnull); + } + + execv(argv[0], (char * const *)argv); + _exit(1); + } + + if (!stdinnull) { + close(stdinfd[0]); + } + if (!stdoutnull) { + close(stdoutfd[1]); + } + + ioc = qio_channel_command_new_pid(stdinnull ? devnull : stdinfd[1], + stdoutnull ? devnull : stdoutfd[0], + pid); + trace_qio_channel_command_new_spawn(ioc, argv[0], flags); + return ioc; + + error: + if (devnull != -1) { + close(devnull); + } + if (stdinfd[0] != -1) { + close(stdinfd[0]); + } + if (stdinfd[1] != -1) { + close(stdinfd[1]); + } + if (stdoutfd[0] != -1) { + close(stdoutfd[0]); + } + if (stdoutfd[1] != -1) { + close(stdoutfd[1]); + } + return NULL; +} + +#else /* WIN32 */ +QIOChannelCommand * +qio_channel_command_new_spawn(const char *const argv[], + int flags, + Error **errp) +{ + error_setg_errno(errp, ENOSYS, + "Command spawn not supported on this platform"); + return NULL; +} +#endif /* WIN32 */ + +#ifndef WIN32 +static int qio_channel_command_abort(QIOChannelCommand *ioc, + Error **errp) +{ + pid_t ret; + int status; + int step = 0; + + /* See if intermediate process has exited; if not, try a nice + * SIGTERM followed by a more severe SIGKILL. + */ + rewait: + trace_qio_channel_command_abort(ioc, ioc->pid); + ret = waitpid(ioc->pid, &status, WNOHANG); + trace_qio_channel_command_wait(ioc, ioc->pid, ret, status); + if (ret == (pid_t)-1) { + if (errno == EINTR) { + goto rewait; + } else { + error_setg_errno(errp, errno, + "Cannot wait on pid %llu", + (unsigned long long)ioc->pid); + return -1; + } + } else if (ret == 0) { + if (step == 0) { + kill(ioc->pid, SIGTERM); + } else if (step == 1) { + kill(ioc->pid, SIGKILL); + } else { + error_setg(errp, + "Process %llu refused to die", + (unsigned long long)ioc->pid); + return -1; + } + step++; + usleep(10 * 1000); + goto rewait; + } + + return 0; +} +#endif /* ! WIN32 */ + + +static void qio_channel_command_init(Object *obj) +{ + QIOChannelCommand *ioc = QIO_CHANNEL_COMMAND(obj); + ioc->readfd = -1; + ioc->writefd = -1; + ioc->pid = -1; +} + +static void qio_channel_command_finalize(Object *obj) +{ + QIOChannelCommand *ioc = QIO_CHANNEL_COMMAND(obj); + if (ioc->readfd != -1) { + close(ioc->readfd); + } + if (ioc->writefd != -1 && + ioc->writefd != ioc->readfd) { + close(ioc->writefd); + } + ioc->writefd = ioc->readfd = -1; + if (ioc->pid > 0) { +#ifndef WIN32 + qio_channel_command_abort(ioc, NULL); +#endif + } +} + + +static ssize_t qio_channel_command_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + ssize_t ret; + + retry: + ret = readv(cioc->readfd, iov, niov); + if (ret < 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + + error_setg_errno(errp, errno, + "Unable to read from command"); + return -1; + } + + return ret; +} + +static ssize_t qio_channel_command_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + ssize_t ret; + + retry: + ret = writev(cioc->writefd, iov, niov); + if (ret <= 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + error_setg_errno(errp, errno, "%s", + "Unable to write to command"); + return -1; + } + return ret; +} + +static int qio_channel_command_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + + if (enabled) { + qemu_set_block(cioc->writefd); + qemu_set_block(cioc->readfd); + } else { + qemu_set_nonblock(cioc->writefd); + qemu_set_nonblock(cioc->readfd); + } + + return 0; +} + + +static int qio_channel_command_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + int rv = 0; +#ifndef WIN32 + pid_t wp; +#endif + + /* We close FDs before killing, because that + * gives a better chance of clean shutdown + */ + if (cioc->readfd != -1 && + close(cioc->readfd) < 0) { + rv = -1; + } + if (cioc->writefd != -1 && + cioc->writefd != cioc->readfd && + close(cioc->writefd) < 0) { + rv = -1; + } + cioc->writefd = cioc->readfd = -1; + +#ifndef WIN32 + do { + wp = waitpid(cioc->pid, NULL, 0); + } while (wp == (pid_t)-1 && errno == EINTR); + if (wp == (pid_t)-1) { + error_setg_errno(errp, errno, "Failed to wait for pid %llu", + (unsigned long long)cioc->pid); + return -1; + } +#endif + + if (rv < 0) { + error_setg_errno(errp, errno, "%s", + "Unable to close command"); + } + return rv; +} + + +static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque); + aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque); +} + + +static GSource *qio_channel_command_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + return qio_channel_create_fd_pair_watch(ioc, + cioc->readfd, + cioc->writefd, + condition); +} + + +static void qio_channel_command_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_command_writev; + ioc_klass->io_readv = qio_channel_command_readv; + ioc_klass->io_set_blocking = qio_channel_command_set_blocking; + ioc_klass->io_close = qio_channel_command_close; + ioc_klass->io_create_watch = qio_channel_command_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_command_set_aio_fd_handler; +} + +static const TypeInfo qio_channel_command_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_COMMAND, + .instance_size = sizeof(QIOChannelCommand), + .instance_init = qio_channel_command_init, + .instance_finalize = qio_channel_command_finalize, + .class_init = qio_channel_command_class_init, +}; + +static void qio_channel_command_register_types(void) +{ + type_register_static(&qio_channel_command_info); +} + +type_init(qio_channel_command_register_types); diff --git a/io/channel-file.c b/io/channel-file.c new file mode 100644 index 000000000..c4bf799a8 --- /dev/null +++ b/io/channel-file.c @@ -0,0 +1,234 @@ +/* + * QEMU I/O channels files driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel-file.h" +#include "io/channel-watch.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qemu/sockets.h" +#include "trace.h" + +QIOChannelFile * +qio_channel_file_new_fd(int fd) +{ + QIOChannelFile *ioc; + + ioc = QIO_CHANNEL_FILE(object_new(TYPE_QIO_CHANNEL_FILE)); + + ioc->fd = fd; + + trace_qio_channel_file_new_fd(ioc, fd); + + return ioc; +} + + +QIOChannelFile * +qio_channel_file_new_path(const char *path, + int flags, + mode_t mode, + Error **errp) +{ + QIOChannelFile *ioc; + + ioc = QIO_CHANNEL_FILE(object_new(TYPE_QIO_CHANNEL_FILE)); + + ioc->fd = qemu_open_old(path, flags, mode); + if (ioc->fd < 0) { + object_unref(OBJECT(ioc)); + error_setg_errno(errp, errno, + "Unable to open %s", path); + return NULL; + } + + trace_qio_channel_file_new_path(ioc, path, flags, mode, ioc->fd); + + return ioc; +} + + +static void qio_channel_file_init(Object *obj) +{ + QIOChannelFile *ioc = QIO_CHANNEL_FILE(obj); + ioc->fd = -1; +} + +static void qio_channel_file_finalize(Object *obj) +{ + QIOChannelFile *ioc = QIO_CHANNEL_FILE(obj); + if (ioc->fd != -1) { + qemu_close(ioc->fd); + ioc->fd = -1; + } +} + + +static ssize_t qio_channel_file_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + ssize_t ret; + + retry: + ret = readv(fioc->fd, iov, niov); + if (ret < 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + + error_setg_errno(errp, errno, + "Unable to read from file"); + return -1; + } + + return ret; +} + +static ssize_t qio_channel_file_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + ssize_t ret; + + retry: + ret = writev(fioc->fd, iov, niov); + if (ret <= 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + error_setg_errno(errp, errno, + "Unable to write to file"); + return -1; + } + return ret; +} + +static int qio_channel_file_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + + if (enabled) { + qemu_set_block(fioc->fd); + } else { + qemu_set_nonblock(fioc->fd); + } + return 0; +} + + +static off_t qio_channel_file_seek(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + off_t ret; + + ret = lseek(fioc->fd, offset, whence); + if (ret == (off_t)-1) { + error_setg_errno(errp, errno, + "Unable to seek to offset %lld whence %d in file", + (long long int)offset, whence); + return -1; + } + return ret; +} + + +static int qio_channel_file_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + + if (qemu_close(fioc->fd) < 0) { + error_setg_errno(errp, errno, + "Unable to close file"); + return -1; + } + fioc->fd = -1; + return 0; +} + + +static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque); +} + +static GSource *qio_channel_file_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + return qio_channel_create_fd_watch(ioc, + fioc->fd, + condition); +} + +static void qio_channel_file_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_file_writev; + ioc_klass->io_readv = qio_channel_file_readv; + ioc_klass->io_set_blocking = qio_channel_file_set_blocking; + ioc_klass->io_seek = qio_channel_file_seek; + ioc_klass->io_close = qio_channel_file_close; + ioc_klass->io_create_watch = qio_channel_file_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_file_set_aio_fd_handler; +} + +static const TypeInfo qio_channel_file_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_FILE, + .instance_size = sizeof(QIOChannelFile), + .instance_init = qio_channel_file_init, + .instance_finalize = qio_channel_file_finalize, + .class_init = qio_channel_file_class_init, +}; + +static void qio_channel_file_register_types(void) +{ + type_register_static(&qio_channel_file_info); +} + +type_init(qio_channel_file_register_types); diff --git a/io/channel-socket.c b/io/channel-socket.c new file mode 100644 index 000000000..de259f7ee --- /dev/null +++ b/io/channel-socket.c @@ -0,0 +1,806 @@ +/* + * QEMU I/O channels sockets driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qemu/module.h" +#include "io/channel-socket.h" +#include "io/channel-watch.h" +#include "trace.h" +#include "qapi/clone-visitor.h" + +#define SOCKET_MAX_FDS 16 + +SocketAddress * +qio_channel_socket_get_local_address(QIOChannelSocket *ioc, + Error **errp) +{ + return socket_sockaddr_to_address(&ioc->localAddr, + ioc->localAddrLen, + errp); +} + +SocketAddress * +qio_channel_socket_get_remote_address(QIOChannelSocket *ioc, + Error **errp) +{ + return socket_sockaddr_to_address(&ioc->remoteAddr, + ioc->remoteAddrLen, + errp); +} + +QIOChannelSocket * +qio_channel_socket_new(void) +{ + QIOChannelSocket *sioc; + QIOChannel *ioc; + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + +#ifdef WIN32 + ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + + trace_qio_channel_socket_new(sioc); + + return sioc; +} + + +static int +qio_channel_socket_set_fd(QIOChannelSocket *sioc, + int fd, + Error **errp) +{ + if (sioc->fd != -1) { + error_setg(errp, "Socket is already open"); + return -1; + } + + sioc->fd = fd; + sioc->remoteAddrLen = sizeof(sioc->remoteAddr); + sioc->localAddrLen = sizeof(sioc->localAddr); + + + if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr, + &sioc->remoteAddrLen) < 0) { + if (errno == ENOTCONN) { + memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr)); + sioc->remoteAddrLen = sizeof(sioc->remoteAddr); + } else { + error_setg_errno(errp, errno, + "Unable to query remote socket address"); + goto error; + } + } + + if (getsockname(fd, (struct sockaddr *)&sioc->localAddr, + &sioc->localAddrLen) < 0) { + error_setg_errno(errp, errno, + "Unable to query local socket address"); + goto error; + } + +#ifndef WIN32 + if (sioc->localAddr.ss_family == AF_UNIX) { + QIOChannel *ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS); + } +#endif /* WIN32 */ + + return 0; + + error: + sioc->fd = -1; /* Let the caller close FD on failure */ + return -1; +} + +QIOChannelSocket * +qio_channel_socket_new_fd(int fd, + Error **errp) +{ + QIOChannelSocket *ioc; + + ioc = qio_channel_socket_new(); + if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) { + object_unref(OBJECT(ioc)); + return NULL; + } + + trace_qio_channel_socket_new_fd(ioc, fd); + + return ioc; +} + + +int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + SocketAddress *addr, + Error **errp) +{ + int fd; + + trace_qio_channel_socket_connect_sync(ioc, addr); + fd = socket_connect(addr, errp); + if (fd < 0) { + trace_qio_channel_socket_connect_fail(ioc); + return -1; + } + + trace_qio_channel_socket_connect_complete(ioc, fd); + if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) { + close(fd); + return -1; + } + + return 0; +} + + +static void qio_channel_socket_connect_worker(QIOTask *task, + gpointer opaque) +{ + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task)); + SocketAddress *addr = opaque; + Error *err = NULL; + + qio_channel_socket_connect_sync(ioc, addr, &err); + + qio_task_set_error(task, err); +} + + +void qio_channel_socket_connect_async(QIOChannelSocket *ioc, + SocketAddress *addr, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context) +{ + QIOTask *task = qio_task_new( + OBJECT(ioc), callback, opaque, destroy); + SocketAddress *addrCopy; + + addrCopy = QAPI_CLONE(SocketAddress, addr); + + /* socket_connect() does a non-blocking connect(), but it + * still blocks in DNS lookups, so we must use a thread */ + trace_qio_channel_socket_connect_async(ioc, addr); + qio_task_run_in_thread(task, + qio_channel_socket_connect_worker, + addrCopy, + (GDestroyNotify)qapi_free_SocketAddress, + context); +} + + +int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + SocketAddress *addr, + int num, + Error **errp) +{ + int fd; + + trace_qio_channel_socket_listen_sync(ioc, addr, num); + fd = socket_listen(addr, num, errp); + if (fd < 0) { + trace_qio_channel_socket_listen_fail(ioc); + return -1; + } + + trace_qio_channel_socket_listen_complete(ioc, fd); + if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) { + close(fd); + return -1; + } + qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_LISTEN); + + return 0; +} + + +struct QIOChannelListenWorkerData { + SocketAddress *addr; + int num; /* amount of expected connections */ +}; + +static void qio_channel_listen_worker_free(gpointer opaque) +{ + struct QIOChannelListenWorkerData *data = opaque; + + qapi_free_SocketAddress(data->addr); + g_free(data); +} + +static void qio_channel_socket_listen_worker(QIOTask *task, + gpointer opaque) +{ + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task)); + struct QIOChannelListenWorkerData *data = opaque; + Error *err = NULL; + + qio_channel_socket_listen_sync(ioc, data->addr, data->num, &err); + + qio_task_set_error(task, err); +} + + +void qio_channel_socket_listen_async(QIOChannelSocket *ioc, + SocketAddress *addr, + int num, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context) +{ + QIOTask *task = qio_task_new( + OBJECT(ioc), callback, opaque, destroy); + struct QIOChannelListenWorkerData *data; + + data = g_new0(struct QIOChannelListenWorkerData, 1); + data->addr = QAPI_CLONE(SocketAddress, addr); + data->num = num; + + /* socket_listen() blocks in DNS lookups, so we must use a thread */ + trace_qio_channel_socket_listen_async(ioc, addr, num); + qio_task_run_in_thread(task, + qio_channel_socket_listen_worker, + data, + qio_channel_listen_worker_free, + context); +} + + +int qio_channel_socket_dgram_sync(QIOChannelSocket *ioc, + SocketAddress *localAddr, + SocketAddress *remoteAddr, + Error **errp) +{ + int fd; + + trace_qio_channel_socket_dgram_sync(ioc, localAddr, remoteAddr); + fd = socket_dgram(remoteAddr, localAddr, errp); + if (fd < 0) { + trace_qio_channel_socket_dgram_fail(ioc); + return -1; + } + + trace_qio_channel_socket_dgram_complete(ioc, fd); + if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) { + close(fd); + return -1; + } + + return 0; +} + + +struct QIOChannelSocketDGramWorkerData { + SocketAddress *localAddr; + SocketAddress *remoteAddr; +}; + + +static void qio_channel_socket_dgram_worker_free(gpointer opaque) +{ + struct QIOChannelSocketDGramWorkerData *data = opaque; + qapi_free_SocketAddress(data->localAddr); + qapi_free_SocketAddress(data->remoteAddr); + g_free(data); +} + +static void qio_channel_socket_dgram_worker(QIOTask *task, + gpointer opaque) +{ + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task)); + struct QIOChannelSocketDGramWorkerData *data = opaque; + Error *err = NULL; + + /* socket_dgram() blocks in DNS lookups, so we must use a thread */ + qio_channel_socket_dgram_sync(ioc, data->localAddr, + data->remoteAddr, &err); + + qio_task_set_error(task, err); +} + + +void qio_channel_socket_dgram_async(QIOChannelSocket *ioc, + SocketAddress *localAddr, + SocketAddress *remoteAddr, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context) +{ + QIOTask *task = qio_task_new( + OBJECT(ioc), callback, opaque, destroy); + struct QIOChannelSocketDGramWorkerData *data = g_new0( + struct QIOChannelSocketDGramWorkerData, 1); + + data->localAddr = QAPI_CLONE(SocketAddress, localAddr); + data->remoteAddr = QAPI_CLONE(SocketAddress, remoteAddr); + + trace_qio_channel_socket_dgram_async(ioc, localAddr, remoteAddr); + qio_task_run_in_thread(task, + qio_channel_socket_dgram_worker, + data, + qio_channel_socket_dgram_worker_free, + context); +} + + +QIOChannelSocket * +qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) +{ + QIOChannelSocket *cioc; + + cioc = qio_channel_socket_new(); + cioc->remoteAddrLen = sizeof(ioc->remoteAddr); + cioc->localAddrLen = sizeof(ioc->localAddr); + + retry: + trace_qio_channel_socket_accept(ioc); + cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, + &cioc->remoteAddrLen); + if (cioc->fd < 0) { + if (errno == EINTR) { + goto retry; + } + error_setg_errno(errp, errno, "Unable to accept connection"); + trace_qio_channel_socket_accept_fail(ioc); + goto error; + } + + if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr, + &cioc->localAddrLen) < 0) { + error_setg_errno(errp, errno, + "Unable to query local socket address"); + goto error; + } + +#ifndef WIN32 + if (cioc->localAddr.ss_family == AF_UNIX) { + QIOChannel *ioc_local = QIO_CHANNEL(cioc); + qio_channel_set_feature(ioc_local, QIO_CHANNEL_FEATURE_FD_PASS); + } +#endif /* WIN32 */ + + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + + error: + object_unref(OBJECT(cioc)); + return NULL; +} + +static void qio_channel_socket_init(Object *obj) +{ + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj); + ioc->fd = -1; +} + +static void qio_channel_socket_finalize(Object *obj) +{ + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj); + + if (ioc->fd != -1) { + QIOChannel *ioc_local = QIO_CHANNEL(ioc); + if (qio_channel_has_feature(ioc_local, QIO_CHANNEL_FEATURE_LISTEN)) { + Error *err = NULL; + + socket_listen_cleanup(ioc->fd, &err); + if (err) { + error_report_err(err); + err = NULL; + } + } +#ifdef WIN32 + WSAEventSelect(ioc->fd, NULL, 0); +#endif + closesocket(ioc->fd); + ioc->fd = -1; + } +} + + +#ifndef WIN32 +static void qio_channel_socket_copy_fds(struct msghdr *msg, + int **fds, size_t *nfds) +{ + struct cmsghdr *cmsg; + + *nfds = 0; + *fds = NULL; + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + int fd_size, i; + int gotfds; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || + cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS) { + continue; + } + + fd_size = cmsg->cmsg_len - CMSG_LEN(0); + + if (!fd_size) { + continue; + } + + gotfds = fd_size / sizeof(int); + *fds = g_renew(int, *fds, *nfds + gotfds); + memcpy(*fds + *nfds, CMSG_DATA(cmsg), fd_size); + + for (i = 0; i < gotfds; i++) { + int fd = (*fds)[*nfds + i]; + if (fd < 0) { + continue; + } + + /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ + qemu_set_block(fd); + +#ifndef MSG_CMSG_CLOEXEC + qemu_set_cloexec(fd); +#endif + } + *nfds += gotfds; + } +} + + +static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t ret; + struct msghdr msg = { NULL, }; + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +#ifdef MSG_CMSG_CLOEXEC + sflags |= MSG_CMSG_CLOEXEC; +#endif + + msg.msg_iov = (struct iovec *)iov; + msg.msg_iovlen = niov; + if (fds && nfds) { + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + } + + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + + error_setg_errno(errp, errno, + "Unable to read from socket"); + return -1; + } + + if (fds && nfds) { + qio_channel_socket_copy_fds(&msg, fds, nfds); + } + + return ret; +} + +static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t ret; + struct msghdr msg = { NULL, }; + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + + msg.msg_iov = (struct iovec *)iov; + msg.msg_iovlen = niov; + + if (nfds) { + if (nfds > SOCKET_MAX_FDS) { + error_setg_errno(errp, EINVAL, + "Only %d FDs can be sent, got %zu", + SOCKET_MAX_FDS, nfds); + return -1; + } + + msg.msg_control = control; + msg.msg_controllen = CMSG_SPACE(sizeof(int) * nfds); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + + retry: + ret = sendmsg(sioc->fd, &msg, 0); + if (ret <= 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; + } + return ret; +} +#else /* WIN32 */ +static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; + + for (i = 0; i < niov; i++) { + ssize_t ret; + retry: + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, + 0); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { + return done; + } else { + return QIO_CHANNEL_ERR_BLOCK; + } + } else if (errno == EINTR) { + goto retry; + } else { + error_setg_errno(errp, errno, + "Unable to read from socket"); + return -1; + } + } + done += ret; + if (ret < iov[i].iov_len) { + return done; + } + } + + return done; +} + +static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; + + for (i = 0; i < niov; i++) { + ssize_t ret; + retry: + ret = send(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, + 0); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { + return done; + } else { + return QIO_CHANNEL_ERR_BLOCK; + } + } else if (errno == EINTR) { + goto retry; + } else { + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; + } + } + done += ret; + if (ret < iov[i].iov_len) { + return done; + } + } + + return done; +} +#endif /* WIN32 */ + +static int +qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + + if (enabled) { + qemu_set_block(sioc->fd); + } else { + qemu_set_nonblock(sioc->fd); + } + return 0; +} + + +static void +qio_channel_socket_set_delay(QIOChannel *ioc, + bool enabled) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + int v = enabled ? 0 : 1; + + qemu_setsockopt(sioc->fd, + IPPROTO_TCP, TCP_NODELAY, + &v, sizeof(v)); +} + + +static void +qio_channel_socket_set_cork(QIOChannel *ioc, + bool enabled) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + int v = enabled ? 1 : 0; + + socket_set_cork(sioc->fd, v); +} + + +static int +qio_channel_socket_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + int rc = 0; + Error *err = NULL; + + if (sioc->fd != -1) { +#ifdef WIN32 + WSAEventSelect(sioc->fd, NULL, 0); +#endif + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) { + socket_listen_cleanup(sioc->fd, errp); + } + + if (closesocket(sioc->fd) < 0) { + sioc->fd = -1; + error_setg_errno(&err, errno, "Unable to close socket"); + error_propagate(errp, err); + return -1; + } + sioc->fd = -1; + } + return rc; +} + +static int +qio_channel_socket_shutdown(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + int sockhow; + + switch (how) { + case QIO_CHANNEL_SHUTDOWN_READ: + sockhow = SHUT_RD; + break; + case QIO_CHANNEL_SHUTDOWN_WRITE: + sockhow = SHUT_WR; + break; + case QIO_CHANNEL_SHUTDOWN_BOTH: + default: + sockhow = SHUT_RDWR; + break; + } + + if (shutdown(sioc->fd, sockhow) < 0) { + error_setg_errno(errp, errno, + "Unable to shutdown socket"); + return -1; + } + return 0; +} + +static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque); +} + +static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + return qio_channel_create_socket_watch(ioc, + sioc->fd, + condition); +} + +static void qio_channel_socket_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_socket_writev; + ioc_klass->io_readv = qio_channel_socket_readv; + ioc_klass->io_set_blocking = qio_channel_socket_set_blocking; + ioc_klass->io_close = qio_channel_socket_close; + ioc_klass->io_shutdown = qio_channel_socket_shutdown; + ioc_klass->io_set_cork = qio_channel_socket_set_cork; + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; +} + +static const TypeInfo qio_channel_socket_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_SOCKET, + .instance_size = sizeof(QIOChannelSocket), + .instance_init = qio_channel_socket_init, + .instance_finalize = qio_channel_socket_finalize, + .class_init = qio_channel_socket_class_init, +}; + +static void qio_channel_socket_register_types(void) +{ + type_register_static(&qio_channel_socket_info); +} + +type_init(qio_channel_socket_register_types); diff --git a/io/channel-tls.c b/io/channel-tls.c new file mode 100644 index 000000000..388f01997 --- /dev/null +++ b/io/channel-tls.c @@ -0,0 +1,432 @@ +/* + * QEMU I/O channels TLS driver + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "io/channel-tls.h" +#include "trace.h" + + +static ssize_t qio_channel_tls_write_handler(const char *buf, + size_t len, + void *opaque) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(opaque); + ssize_t ret; + + ret = qio_channel_write(tioc->master, buf, len, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + errno = EAGAIN; + return -1; + } else if (ret < 0) { + errno = EIO; + return -1; + } + return ret; +} + +static ssize_t qio_channel_tls_read_handler(char *buf, + size_t len, + void *opaque) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(opaque); + ssize_t ret; + + ret = qio_channel_read(tioc->master, buf, len, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + errno = EAGAIN; + return -1; + } else if (ret < 0) { + errno = EIO; + return -1; + } + return ret; +} + + +QIOChannelTLS * +qio_channel_tls_new_server(QIOChannel *master, + QCryptoTLSCreds *creds, + const char *aclname, + Error **errp) +{ + QIOChannelTLS *ioc; + + ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); + + ioc->master = master; + object_ref(OBJECT(master)); + + ioc->session = qcrypto_tls_session_new( + creds, + NULL, + aclname, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp); + if (!ioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( + ioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, + ioc); + + trace_qio_channel_tls_new_server(ioc, master, creds, aclname); + return ioc; + + error: + object_unref(OBJECT(ioc)); + return NULL; +} + +QIOChannelTLS * +qio_channel_tls_new_client(QIOChannel *master, + QCryptoTLSCreds *creds, + const char *hostname, + Error **errp) +{ + QIOChannelTLS *tioc; + QIOChannel *ioc; + + tioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); + ioc = QIO_CHANNEL(tioc); + + tioc->master = master; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + + tioc->session = qcrypto_tls_session_new( + creds, + hostname, + NULL, + QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, + errp); + if (!tioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( + tioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, + tioc); + + trace_qio_channel_tls_new_client(tioc, master, creds, hostname); + return tioc; + + error: + object_unref(OBJECT(tioc)); + return NULL; +} + +struct QIOChannelTLSData { + QIOTask *task; + GMainContext *context; +}; +typedef struct QIOChannelTLSData QIOChannelTLSData; + +static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, + GIOCondition condition, + gpointer user_data); + +static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, + QIOTask *task, + GMainContext *context) +{ + Error *err = NULL; + QCryptoTLSSessionHandshakeStatus status; + + if (qcrypto_tls_session_handshake(ioc->session, &err) < 0) { + trace_qio_channel_tls_handshake_fail(ioc); + qio_task_set_error(task, err); + qio_task_complete(task); + return; + } + + status = qcrypto_tls_session_get_handshake_status(ioc->session); + if (status == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { + trace_qio_channel_tls_handshake_complete(ioc); + if (qcrypto_tls_session_check_credentials(ioc->session, + &err) < 0) { + trace_qio_channel_tls_credentials_deny(ioc); + qio_task_set_error(task, err); + } else { + trace_qio_channel_tls_credentials_allow(ioc); + } + qio_task_complete(task); + } else { + GIOCondition condition; + QIOChannelTLSData *data = g_new0(typeof(*data), 1); + + data->task = task; + data->context = context; + + if (context) { + g_main_context_ref(context); + } + + if (status == QCRYPTO_TLS_HANDSHAKE_SENDING) { + condition = G_IO_OUT; + } else { + condition = G_IO_IN; + } + + trace_qio_channel_tls_handshake_pending(ioc, status); + qio_channel_add_watch_full(ioc->master, + condition, + qio_channel_tls_handshake_io, + data, + NULL, + context); + } +} + + +static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, + GIOCondition condition, + gpointer user_data) +{ + QIOChannelTLSData *data = user_data; + QIOTask *task = data->task; + GMainContext *context = data->context; + QIOChannelTLS *tioc = QIO_CHANNEL_TLS( + qio_task_get_source(task)); + + g_free(data); + qio_channel_tls_handshake_task(tioc, task, context); + + if (context) { + g_main_context_unref(context); + } + + return FALSE; +} + +void qio_channel_tls_handshake(QIOChannelTLS *ioc, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context) +{ + QIOTask *task; + + task = qio_task_new(OBJECT(ioc), + func, opaque, destroy); + + trace_qio_channel_tls_handshake_start(ioc); + qio_channel_tls_handshake_task(ioc, task, context); +} + + +static void qio_channel_tls_init(Object *obj G_GNUC_UNUSED) +{ +} + + +static void qio_channel_tls_finalize(Object *obj) +{ + QIOChannelTLS *ioc = QIO_CHANNEL_TLS(obj); + + object_unref(OBJECT(ioc->master)); + qcrypto_tls_session_free(ioc->session); +} + + +static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + size_t i; + ssize_t got = 0; + + for (i = 0 ; i < niov ; i++) { + ssize_t ret = qcrypto_tls_session_read(tioc->session, + iov[i].iov_base, + iov[i].iov_len); + if (ret < 0) { + if (errno == EAGAIN) { + if (got) { + return got; + } else { + return QIO_CHANNEL_ERR_BLOCK; + } + } else if (errno == ECONNABORTED && + (tioc->shutdown & QIO_CHANNEL_SHUTDOWN_READ)) { + return 0; + } + + error_setg_errno(errp, errno, + "Cannot read from TLS channel"); + return -1; + } + got += ret; + if (ret < iov[i].iov_len) { + break; + } + } + return got; +} + + +static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + size_t i; + ssize_t done = 0; + + for (i = 0 ; i < niov ; i++) { + ssize_t ret = qcrypto_tls_session_write(tioc->session, + iov[i].iov_base, + iov[i].iov_len); + if (ret <= 0) { + if (errno == EAGAIN) { + if (done) { + return done; + } else { + return QIO_CHANNEL_ERR_BLOCK; + } + } + + error_setg_errno(errp, errno, + "Cannot write to TLS channel"); + return -1; + } + done += ret; + if (ret < iov[i].iov_len) { + break; + } + } + return done; +} + +static int qio_channel_tls_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + return qio_channel_set_blocking(tioc->master, enabled, errp); +} + +static void qio_channel_tls_set_delay(QIOChannel *ioc, + bool enabled) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + qio_channel_set_delay(tioc->master, enabled); +} + +static void qio_channel_tls_set_cork(QIOChannel *ioc, + bool enabled) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + qio_channel_set_cork(tioc->master, enabled); +} + +static int qio_channel_tls_shutdown(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + tioc->shutdown |= how; + + return qio_channel_shutdown(tioc->master, how, errp); +} + +static int qio_channel_tls_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + return qio_channel_close(tioc->master, errp); +} + +static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque); +} + +static GSource *qio_channel_tls_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + + return qio_channel_create_watch(tioc->master, condition); +} + +QCryptoTLSSession * +qio_channel_tls_get_session(QIOChannelTLS *ioc) +{ + return ioc->session; +} + +static void qio_channel_tls_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_tls_writev; + ioc_klass->io_readv = qio_channel_tls_readv; + ioc_klass->io_set_blocking = qio_channel_tls_set_blocking; + ioc_klass->io_set_delay = qio_channel_tls_set_delay; + ioc_klass->io_set_cork = qio_channel_tls_set_cork; + ioc_klass->io_close = qio_channel_tls_close; + ioc_klass->io_shutdown = qio_channel_tls_shutdown; + ioc_klass->io_create_watch = qio_channel_tls_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_tls_set_aio_fd_handler; +} + +static const TypeInfo qio_channel_tls_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_TLS, + .instance_size = sizeof(QIOChannelTLS), + .instance_init = qio_channel_tls_init, + .instance_finalize = qio_channel_tls_finalize, + .class_init = qio_channel_tls_class_init, +}; + +static void qio_channel_tls_register_types(void) +{ + type_register_static(&qio_channel_tls_info); +} + +type_init(qio_channel_tls_register_types); diff --git a/io/channel-util.c b/io/channel-util.c new file mode 100644 index 000000000..848a7a43d --- /dev/null +++ b/io/channel-util.c @@ -0,0 +1,38 @@ +/* + * QEMU I/O channels utility APIs + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel-util.h" +#include "io/channel-file.h" +#include "io/channel-socket.h" + + +QIOChannel *qio_channel_new_fd(int fd, + Error **errp) +{ + QIOChannel *ioc; + + if (fd_is_socket(fd)) { + ioc = QIO_CHANNEL(qio_channel_socket_new_fd(fd, errp)); + } else { + ioc = QIO_CHANNEL(qio_channel_file_new_fd(fd)); + } + return ioc; +} diff --git a/io/channel-watch.c b/io/channel-watch.c new file mode 100644 index 000000000..0289b3647 --- /dev/null +++ b/io/channel-watch.c @@ -0,0 +1,353 @@ +/* + * QEMU I/O channels watch helper APIs + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel-watch.h" + +typedef struct QIOChannelFDSource QIOChannelFDSource; +struct QIOChannelFDSource { + GSource parent; + GPollFD fd; + QIOChannel *ioc; + GIOCondition condition; +}; + + +#ifdef CONFIG_WIN32 +typedef struct QIOChannelSocketSource QIOChannelSocketSource; +struct QIOChannelSocketSource { + GSource parent; + GPollFD fd; + QIOChannel *ioc; + SOCKET socket; + int revents; + GIOCondition condition; +}; + +#endif + + +typedef struct QIOChannelFDPairSource QIOChannelFDPairSource; +struct QIOChannelFDPairSource { + GSource parent; + GPollFD fdread; + GPollFD fdwrite; + QIOChannel *ioc; + GIOCondition condition; +}; + + +static gboolean +qio_channel_fd_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return FALSE; +} + + +static gboolean +qio_channel_fd_source_check(GSource *source) +{ + QIOChannelFDSource *ssource = (QIOChannelFDSource *)source; + + return ssource->fd.revents & ssource->condition; +} + + +static gboolean +qio_channel_fd_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelFDSource *ssource = (QIOChannelFDSource *)source; + + return (*func)(ssource->ioc, + ssource->fd.revents & ssource->condition, + user_data); +} + + +static void +qio_channel_fd_source_finalize(GSource *source) +{ + QIOChannelFDSource *ssource = (QIOChannelFDSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +#ifdef CONFIG_WIN32 +static gboolean +qio_channel_socket_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return FALSE; +} + + +/* + * NB, this impl only works when the socket is in non-blocking + * mode on Win32 + */ +static gboolean +qio_channel_socket_source_check(GSource *source) +{ + static struct timeval tv0; + + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + WSANETWORKEVENTS ev; + fd_set rfds, wfds, xfds; + + if (!ssource->condition) { + return 0; + } + + WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&xfds); + if (ssource->condition & G_IO_IN) { + FD_SET((SOCKET)ssource->socket, &rfds); + } + if (ssource->condition & G_IO_OUT) { + FD_SET((SOCKET)ssource->socket, &wfds); + } + if (ssource->condition & G_IO_PRI) { + FD_SET((SOCKET)ssource->socket, &xfds); + } + ssource->revents = 0; + if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) { + return 0; + } + + if (FD_ISSET(ssource->socket, &rfds)) { + ssource->revents |= G_IO_IN; + } + if (FD_ISSET(ssource->socket, &wfds)) { + ssource->revents |= G_IO_OUT; + } + if (FD_ISSET(ssource->socket, &xfds)) { + ssource->revents |= G_IO_PRI; + } + + return ssource->revents; +} + + +static gboolean +qio_channel_socket_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + return (*func)(ssource->ioc, ssource->revents, user_data); +} + + +static void +qio_channel_socket_source_finalize(GSource *source) +{ + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +GSourceFuncs qio_channel_socket_source_funcs = { + qio_channel_socket_source_prepare, + qio_channel_socket_source_check, + qio_channel_socket_source_dispatch, + qio_channel_socket_source_finalize +}; +#endif + + +static gboolean +qio_channel_fd_pair_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return FALSE; +} + + +static gboolean +qio_channel_fd_pair_source_check(GSource *source) +{ + QIOChannelFDPairSource *ssource = (QIOChannelFDPairSource *)source; + GIOCondition poll_condition = ssource->fdread.revents | + ssource->fdwrite.revents; + + return poll_condition & ssource->condition; +} + + +static gboolean +qio_channel_fd_pair_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelFDPairSource *ssource = (QIOChannelFDPairSource *)source; + GIOCondition poll_condition = ssource->fdread.revents | + ssource->fdwrite.revents; + + return (*func)(ssource->ioc, + poll_condition & ssource->condition, + user_data); +} + + +static void +qio_channel_fd_pair_source_finalize(GSource *source) +{ + QIOChannelFDPairSource *ssource = (QIOChannelFDPairSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +GSourceFuncs qio_channel_fd_source_funcs = { + qio_channel_fd_source_prepare, + qio_channel_fd_source_check, + qio_channel_fd_source_dispatch, + qio_channel_fd_source_finalize +}; + + +GSourceFuncs qio_channel_fd_pair_source_funcs = { + qio_channel_fd_pair_source_prepare, + qio_channel_fd_pair_source_check, + qio_channel_fd_pair_source_dispatch, + qio_channel_fd_pair_source_finalize +}; + + +GSource *qio_channel_create_fd_watch(QIOChannel *ioc, + int fd, + GIOCondition condition) +{ + GSource *source; + QIOChannelFDSource *ssource; + + source = g_source_new(&qio_channel_fd_source_funcs, + sizeof(QIOChannelFDSource)); + ssource = (QIOChannelFDSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + +#ifdef CONFIG_WIN32 + ssource->fd.fd = (gint64)_get_osfhandle(fd); +#else + ssource->fd.fd = fd; +#endif + ssource->fd.events = condition; + + g_source_add_poll(source, &ssource->fd); + + return source; +} + +#ifdef CONFIG_WIN32 +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + GSource *source; + QIOChannelSocketSource *ssource; + +#ifdef WIN32 + WSAEventSelect(socket, ioc->event, + FD_READ | FD_ACCEPT | FD_CLOSE | + FD_CONNECT | FD_WRITE | FD_OOB); +#endif + + source = g_source_new(&qio_channel_socket_source_funcs, + sizeof(QIOChannelSocketSource)); + ssource = (QIOChannelSocketSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + ssource->socket = socket; + ssource->revents = 0; + + ssource->fd.fd = (gintptr)ioc->event; + ssource->fd.events = G_IO_IN; + + g_source_add_poll(source, &ssource->fd); + + return source; +} +#else +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + return qio_channel_create_fd_watch(ioc, socket, condition); +} +#endif + +GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, + int fdread, + int fdwrite, + GIOCondition condition) +{ + GSource *source; + QIOChannelFDPairSource *ssource; + + source = g_source_new(&qio_channel_fd_pair_source_funcs, + sizeof(QIOChannelFDPairSource)); + ssource = (QIOChannelFDPairSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + +#ifdef CONFIG_WIN32 + ssource->fdread.fd = (gint64)_get_osfhandle(fdread); + ssource->fdwrite.fd = (gint64)_get_osfhandle(fdwrite); +#else + ssource->fdread.fd = fdread; + ssource->fdwrite.fd = fdwrite; +#endif + + ssource->fdread.events = condition & G_IO_IN; + ssource->fdwrite.events = condition & G_IO_OUT; + + g_source_add_poll(source, &ssource->fdread); + g_source_add_poll(source, &ssource->fdwrite); + + return source; +} diff --git a/io/channel-websock.c b/io/channel-websock.c new file mode 100644 index 000000000..03c1f7cb6 --- /dev/null +++ b/io/channel-websock.c @@ -0,0 +1,1341 @@ +/* + * QEMU I/O channels driver websockets + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/bswap.h" +#include "io/channel-websock.h" +#include "crypto/hash.h" +#include "trace.h" +#include "qemu/iov.h" +#include "qemu/module.h" + +/* Max amount to allow in rawinput/encoutput buffers */ +#define QIO_CHANNEL_WEBSOCK_MAX_BUFFER 8192 + +#define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24 +#define QIO_CHANNEL_WEBSOCK_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" +#define QIO_CHANNEL_WEBSOCK_GUID_LEN strlen(QIO_CHANNEL_WEBSOCK_GUID) + +#define QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL "sec-websocket-protocol" +#define QIO_CHANNEL_WEBSOCK_HEADER_VERSION "sec-websocket-version" +#define QIO_CHANNEL_WEBSOCK_HEADER_KEY "sec-websocket-key" +#define QIO_CHANNEL_WEBSOCK_HEADER_UPGRADE "upgrade" +#define QIO_CHANNEL_WEBSOCK_HEADER_HOST "host" +#define QIO_CHANNEL_WEBSOCK_HEADER_CONNECTION "connection" + +#define QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY "binary" +#define QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE "Upgrade" +#define QIO_CHANNEL_WEBSOCK_UPGRADE_WEBSOCKET "websocket" + +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Server: QEMU VNC\r\n" \ + "Date: %s\r\n" + +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_WITH_PROTO_RES_OK \ + "HTTP/1.1 101 Switching Protocols\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Upgrade: websocket\r\n" \ + "Connection: Upgrade\r\n" \ + "Sec-WebSocket-Accept: %s\r\n" \ + "Sec-WebSocket-Protocol: binary\r\n" \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK \ + "HTTP/1.1 101 Switching Protocols\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Upgrade: websocket\r\n" \ + "Connection: Upgrade\r\n" \ + "Sec-WebSocket-Accept: %s\r\n" \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_NOT_FOUND \ + "HTTP/1.1 404 Not Found\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Connection: close\r\n" \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_BAD_REQUEST \ + "HTTP/1.1 400 Bad Request\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Connection: close\r\n" \ + "Sec-WebSocket-Version: " \ + QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_SERVER_ERR \ + "HTTP/1.1 500 Internal Server Error\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Connection: close\r\n" \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_TOO_LARGE \ + "HTTP/1.1 403 Request Entity Too Large\r\n" \ + QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_COMMON \ + "Connection: close\r\n" \ + "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM "\r\n" +#define QIO_CHANNEL_WEBSOCK_HANDSHAKE_END "\r\n\r\n" +#define QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION "13" +#define QIO_CHANNEL_WEBSOCK_HTTP_METHOD "GET" +#define QIO_CHANNEL_WEBSOCK_HTTP_PATH "/" +#define QIO_CHANNEL_WEBSOCK_HTTP_VERSION "HTTP/1.1" + +/* The websockets packet header is variable length + * depending on the size of the payload... */ + +/* ...length when using 7-bit payload length */ +#define QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT 6 +/* ...length when using 16-bit payload length */ +#define QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT 8 +/* ...length when using 64-bit payload length */ +#define QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT 14 + +/* Length of the optional data mask field in header */ +#define QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK 4 + +/* Maximum length that can fit in 7-bit payload size */ +#define QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT 126 +/* Maximum length that can fit in 16-bit payload size */ +#define QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT 65536 + +/* Magic 7-bit length to indicate use of 16-bit payload length */ +#define QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT 126 +/* Magic 7-bit length to indicate use of 64-bit payload length */ +#define QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT 127 + +/* Bitmasks for accessing header fields */ +#define QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN 0x80 +#define QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE 0x0f +#define QIO_CHANNEL_WEBSOCK_HEADER_FIELD_HAS_MASK 0x80 +#define QIO_CHANNEL_WEBSOCK_HEADER_FIELD_PAYLOAD_LEN 0x7f +#define QIO_CHANNEL_WEBSOCK_CONTROL_OPCODE_MASK 0x8 + +typedef struct QIOChannelWebsockHeader QIOChannelWebsockHeader; + +struct QEMU_PACKED QIOChannelWebsockHeader { + unsigned char b0; + unsigned char b1; + union { + struct QEMU_PACKED { + uint16_t l16; + QIOChannelWebsockMask m16; + } s16; + struct QEMU_PACKED { + uint64_t l64; + QIOChannelWebsockMask m64; + } s64; + QIOChannelWebsockMask m; + } u; +}; + +typedef struct QIOChannelWebsockHTTPHeader QIOChannelWebsockHTTPHeader; + +struct QIOChannelWebsockHTTPHeader { + char *name; + char *value; +}; + +enum { + QIO_CHANNEL_WEBSOCK_OPCODE_CONTINUATION = 0x0, + QIO_CHANNEL_WEBSOCK_OPCODE_TEXT_FRAME = 0x1, + QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME = 0x2, + QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE = 0x8, + QIO_CHANNEL_WEBSOCK_OPCODE_PING = 0x9, + QIO_CHANNEL_WEBSOCK_OPCODE_PONG = 0xA +}; + +static void GCC_FMT_ATTR(2, 3) +qio_channel_websock_handshake_send_res(QIOChannelWebsock *ioc, + const char *resmsg, + ...) +{ + va_list vargs; + char *response; + size_t responselen; + + va_start(vargs, resmsg); + response = g_strdup_vprintf(resmsg, vargs); + responselen = strlen(response); + buffer_reserve(&ioc->encoutput, responselen); + buffer_append(&ioc->encoutput, response, responselen); + g_free(response); + va_end(vargs); +} + +static gchar *qio_channel_websock_date_str(void) +{ + struct tm tm; + time_t now = time(NULL); + char datebuf[128]; + + gmtime_r(&now, &tm); + + strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y %H:%M:%S GMT", &tm); + + return g_strdup(datebuf); +} + +static void qio_channel_websock_handshake_send_res_err(QIOChannelWebsock *ioc, + const char *resdata) +{ + char *date = qio_channel_websock_date_str(); + qio_channel_websock_handshake_send_res(ioc, resdata, date); + g_free(date); +} + +enum { + QIO_CHANNEL_WEBSOCK_STATUS_NORMAL = 1000, + QIO_CHANNEL_WEBSOCK_STATUS_PROTOCOL_ERR = 1002, + QIO_CHANNEL_WEBSOCK_STATUS_INVALID_DATA = 1003, + QIO_CHANNEL_WEBSOCK_STATUS_POLICY = 1008, + QIO_CHANNEL_WEBSOCK_STATUS_TOO_LARGE = 1009, + QIO_CHANNEL_WEBSOCK_STATUS_SERVER_ERR = 1011, +}; + +static size_t +qio_channel_websock_extract_headers(QIOChannelWebsock *ioc, + char *buffer, + QIOChannelWebsockHTTPHeader *hdrs, + size_t nhdrsalloc, + Error **errp) +{ + char *nl, *sep, *tmp; + size_t nhdrs = 0; + + /* + * First parse the HTTP protocol greeting of format: + * + * $METHOD $PATH $VERSION + * + * e.g. + * + * GET / HTTP/1.1 + */ + + nl = strstr(buffer, QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + if (!nl) { + error_setg(errp, "Missing HTTP header delimiter"); + goto bad_request; + } + *nl = '\0'; + trace_qio_channel_websock_http_greeting(ioc, buffer); + + tmp = strchr(buffer, ' '); + if (!tmp) { + error_setg(errp, "Missing HTTP path delimiter"); + return 0; + } + *tmp = '\0'; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_METHOD)) { + error_setg(errp, "Unsupported HTTP method %s", buffer); + goto bad_request; + } + + buffer = tmp + 1; + tmp = strchr(buffer, ' '); + if (!tmp) { + error_setg(errp, "Missing HTTP version delimiter"); + goto bad_request; + } + *tmp = '\0'; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_PATH)) { + qio_channel_websock_handshake_send_res_err( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_NOT_FOUND); + error_setg(errp, "Unexpected HTTP path %s", buffer); + return 0; + } + + buffer = tmp + 1; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_VERSION)) { + error_setg(errp, "Unsupported HTTP version %s", buffer); + goto bad_request; + } + + buffer = nl + strlen(QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + + /* + * Now parse all the header fields of format + * + * $NAME: $VALUE + * + * e.g. + * + * Cache-control: no-cache + */ + do { + QIOChannelWebsockHTTPHeader *hdr; + + nl = strstr(buffer, QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + if (nl) { + *nl = '\0'; + } + + sep = strchr(buffer, ':'); + if (!sep) { + error_setg(errp, "Malformed HTTP header"); + goto bad_request; + } + *sep = '\0'; + sep++; + while (*sep == ' ') { + sep++; + } + + if (nhdrs >= nhdrsalloc) { + error_setg(errp, "Too many HTTP headers"); + goto bad_request; + } + + hdr = &hdrs[nhdrs++]; + hdr->name = buffer; + hdr->value = sep; + + /* Canonicalize header name for easier identification later */ + for (tmp = hdr->name; *tmp; tmp++) { + *tmp = g_ascii_tolower(*tmp); + } + + if (nl) { + buffer = nl + strlen(QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + } + } while (nl != NULL); + + return nhdrs; + + bad_request: + qio_channel_websock_handshake_send_res_err( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_BAD_REQUEST); + return 0; +} + +static const char * +qio_channel_websock_find_header(QIOChannelWebsockHTTPHeader *hdrs, + size_t nhdrs, + const char *name) +{ + size_t i; + + for (i = 0; i < nhdrs; i++) { + if (g_str_equal(hdrs[i].name, name)) { + return hdrs[i].value; + } + } + + return NULL; +} + + +static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc, + const char *key, + const bool use_protocols, + Error **errp) +{ + char combined_key[QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + + QIO_CHANNEL_WEBSOCK_GUID_LEN + 1]; + char *accept = NULL; + char *date = NULL; + + g_strlcpy(combined_key, key, QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + 1); + g_strlcat(combined_key, QIO_CHANNEL_WEBSOCK_GUID, + QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + + QIO_CHANNEL_WEBSOCK_GUID_LEN + 1); + + /* hash and encode it */ + if (qcrypto_hash_base64(QCRYPTO_HASH_ALG_SHA1, + combined_key, + QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + + QIO_CHANNEL_WEBSOCK_GUID_LEN, + &accept, + errp) < 0) { + qio_channel_websock_handshake_send_res_err( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_SERVER_ERR); + return; + } + + date = qio_channel_websock_date_str(); + if (use_protocols) { + qio_channel_websock_handshake_send_res( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_WITH_PROTO_RES_OK, + date, accept); + } else { + qio_channel_websock_handshake_send_res( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK, date, accept); + } + + g_free(date); + g_free(accept); +} + +static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, + char *buffer, + Error **errp) +{ + QIOChannelWebsockHTTPHeader hdrs[32]; + size_t nhdrs = G_N_ELEMENTS(hdrs); + const char *protocols = NULL, *version = NULL, *key = NULL, + *host = NULL, *connection = NULL, *upgrade = NULL; + char **connectionv; + bool upgraded = false; + size_t i; + + nhdrs = qio_channel_websock_extract_headers(ioc, buffer, hdrs, nhdrs, errp); + if (!nhdrs) { + return; + } + + protocols = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL); + + version = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_VERSION); + if (!version) { + error_setg(errp, "Missing websocket version header data"); + goto bad_request; + } + + key = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_KEY); + if (!key) { + error_setg(errp, "Missing websocket key header data"); + goto bad_request; + } + + host = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_HOST); + if (!host) { + error_setg(errp, "Missing websocket host header data"); + goto bad_request; + } + + connection = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_CONNECTION); + if (!connection) { + error_setg(errp, "Missing websocket connection header data"); + goto bad_request; + } + + upgrade = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_UPGRADE); + if (!upgrade) { + error_setg(errp, "Missing websocket upgrade header data"); + goto bad_request; + } + + trace_qio_channel_websock_http_request(ioc, protocols, version, + host, connection, upgrade, key); + + if (protocols) { + if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) { + error_setg(errp, "No '%s' protocol is supported by client '%s'", + QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols); + goto bad_request; + } + } + + if (!g_str_equal(version, QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION)) { + error_setg(errp, "Version '%s' is not supported by client '%s'", + QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION, version); + goto bad_request; + } + + if (strlen(key) != QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN) { + error_setg(errp, "Key length '%zu' was not as expected '%d'", + strlen(key), QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN); + goto bad_request; + } + + connectionv = g_strsplit(connection, ",", 0); + for (i = 0; connectionv != NULL && connectionv[i] != NULL; i++) { + g_strstrip(connectionv[i]); + if (strcasecmp(connectionv[i], + QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) == 0) { + upgraded = true; + } + } + g_strfreev(connectionv); + if (!upgraded) { + error_setg(errp, "No connection upgrade requested '%s'", connection); + goto bad_request; + } + + if (strcasecmp(upgrade, QIO_CHANNEL_WEBSOCK_UPGRADE_WEBSOCKET) != 0) { + error_setg(errp, "Incorrect upgrade method '%s'", upgrade); + goto bad_request; + } + + qio_channel_websock_handshake_send_res_ok(ioc, key, !!protocols, errp); + return; + + bad_request: + qio_channel_websock_handshake_send_res_err( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_BAD_REQUEST); +} + +static int qio_channel_websock_handshake_read(QIOChannelWebsock *ioc, + Error **errp) +{ + char *handshake_end; + ssize_t ret; + /* Typical HTTP headers from novnc are 512 bytes, so limiting + * total header size to 4096 is easily enough. */ + size_t want = 4096 - ioc->encinput.offset; + buffer_reserve(&ioc->encinput, want); + ret = qio_channel_read(ioc->master, + (char *)buffer_end(&ioc->encinput), want, errp); + if (ret < 0) { + return -1; + } + ioc->encinput.offset += ret; + + handshake_end = g_strstr_len((char *)ioc->encinput.buffer, + ioc->encinput.offset, + QIO_CHANNEL_WEBSOCK_HANDSHAKE_END); + if (!handshake_end) { + if (ioc->encinput.offset >= 4096) { + qio_channel_websock_handshake_send_res_err( + ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_TOO_LARGE); + error_setg(errp, + "End of headers not found in first 4096 bytes"); + return 1; + } else if (ret == 0) { + error_setg(errp, + "End of headers not found before connection closed"); + return -1; + } + return 0; + } + *handshake_end = '\0'; + + qio_channel_websock_handshake_process(ioc, + (char *)ioc->encinput.buffer, + errp); + + buffer_advance(&ioc->encinput, + handshake_end - (char *)ioc->encinput.buffer + + strlen(QIO_CHANNEL_WEBSOCK_HANDSHAKE_END)); + return 1; +} + +static gboolean qio_channel_websock_handshake_send(QIOChannel *ioc, + GIOCondition condition, + gpointer user_data) +{ + QIOTask *task = user_data; + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK( + qio_task_get_source(task)); + Error *err = NULL; + ssize_t ret; + + ret = qio_channel_write(wioc->master, + (char *)wioc->encoutput.buffer, + wioc->encoutput.offset, + &err); + + if (ret < 0) { + trace_qio_channel_websock_handshake_fail(ioc, error_get_pretty(err)); + qio_task_set_error(task, err); + qio_task_complete(task); + return FALSE; + } + + buffer_advance(&wioc->encoutput, ret); + if (wioc->encoutput.offset == 0) { + if (wioc->io_err) { + trace_qio_channel_websock_handshake_fail( + ioc, error_get_pretty(wioc->io_err)); + qio_task_set_error(task, wioc->io_err); + wioc->io_err = NULL; + qio_task_complete(task); + } else { + trace_qio_channel_websock_handshake_complete(ioc); + qio_task_complete(task); + } + return FALSE; + } + trace_qio_channel_websock_handshake_pending(ioc, G_IO_OUT); + return TRUE; +} + +static gboolean qio_channel_websock_handshake_io(QIOChannel *ioc, + GIOCondition condition, + gpointer user_data) +{ + QIOTask *task = user_data; + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK( + qio_task_get_source(task)); + Error *err = NULL; + int ret; + + ret = qio_channel_websock_handshake_read(wioc, &err); + if (ret < 0) { + /* + * We only take this path on a fatal I/O error reading from + * client connection, as most of the time we have an + * HTTP 4xx err response to send instead + */ + trace_qio_channel_websock_handshake_fail(ioc, error_get_pretty(err)); + qio_task_set_error(task, err); + qio_task_complete(task); + return FALSE; + } + if (ret == 0) { + trace_qio_channel_websock_handshake_pending(ioc, G_IO_IN); + /* need more data still */ + return TRUE; + } + + error_propagate(&wioc->io_err, err); + + trace_qio_channel_websock_handshake_reply(ioc); + qio_channel_add_watch( + wioc->master, + G_IO_OUT, + qio_channel_websock_handshake_send, + task, + NULL); + return FALSE; +} + + +static void qio_channel_websock_encode(QIOChannelWebsock *ioc, + uint8_t opcode, + const struct iovec *iov, + size_t niov, + size_t size) +{ + size_t header_size; + size_t i; + union { + char buf[QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT]; + QIOChannelWebsockHeader ws; + } header; + + assert(size <= iov_size(iov, niov)); + + header.ws.b0 = QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN | + (opcode & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE); + if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) { + header.ws.b1 = (uint8_t)size; + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT; + } else if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) { + header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT; + header.ws.u.s16.l16 = cpu_to_be16((uint16_t)size); + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT; + } else { + header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT; + header.ws.u.s64.l64 = cpu_to_be64(size); + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT; + } + header_size -= QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK; + + trace_qio_channel_websock_encode(ioc, opcode, header_size, size); + buffer_reserve(&ioc->encoutput, header_size + size); + buffer_append(&ioc->encoutput, header.buf, header_size); + for (i = 0; i < niov && size != 0; i++) { + size_t want = iov[i].iov_len; + if (want > size) { + want = size; + } + buffer_append(&ioc->encoutput, iov[i].iov_base, want); + size -= want; + } +} + + +static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **); + + +static void qio_channel_websock_write_close(QIOChannelWebsock *ioc, + uint16_t code, const char *reason) +{ + struct iovec iov[2] = { + { .iov_base = &code, .iov_len = sizeof(code) }, + }; + size_t niov = 1; + size_t size = iov[0].iov_len; + + cpu_to_be16s(&code); + + if (reason) { + iov[1].iov_base = (void *)reason; + iov[1].iov_len = strlen(reason); + size += iov[1].iov_len; + niov++; + } + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + iov, niov, size); + qio_channel_websock_write_wire(ioc, NULL); + qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); +} + + +static int qio_channel_websock_decode_header(QIOChannelWebsock *ioc, + Error **errp) +{ + unsigned char opcode, fin, has_mask; + size_t header_size; + size_t payload_len; + QIOChannelWebsockHeader *header = + (QIOChannelWebsockHeader *)ioc->encinput.buffer; + + if (ioc->payload_remain) { + error_setg(errp, + "Decoding header but %zu bytes of payload remain", + ioc->payload_remain); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_SERVER_ERR, + "internal server error"); + return -1; + } + if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT) { + /* header not complete */ + return QIO_CHANNEL_ERR_BLOCK; + } + + fin = header->b0 & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN; + opcode = header->b0 & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE; + has_mask = header->b1 & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_HAS_MASK; + payload_len = header->b1 & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_PAYLOAD_LEN; + + /* Save or restore opcode. */ + if (opcode) { + ioc->opcode = opcode; + } else { + opcode = ioc->opcode; + } + + trace_qio_channel_websock_header_partial_decode(ioc, payload_len, + fin, opcode, (int)has_mask); + + if (opcode == QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE) { + /* disconnect */ + return 0; + } + + /* Websocket frame sanity check: + * * Fragmentation is only supported for binary frames. + * * All frames sent by a client MUST be masked. + * * Only binary and ping/pong encoding is supported. + */ + if (!fin) { + if (opcode != QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME) { + error_setg(errp, "only binary websocket frames may be fragmented"); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_POLICY , + "only binary frames may be fragmented"); + return -1; + } + } else { + if (opcode != QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME && + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE && + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PING && + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PONG) { + error_setg(errp, "unsupported opcode: 0x%04x; only binary, close, " + "ping, and pong websocket frames are supported", opcode); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_INVALID_DATA , + "only binary, close, ping, and pong frames are supported"); + return -1; + } + } + if (!has_mask) { + error_setg(errp, "client websocket frames must be masked"); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_PROTOCOL_ERR, + "client frames must be masked"); + return -1; + } + + if (payload_len < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT) { + ioc->payload_remain = payload_len; + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT; + ioc->mask = header->u.m; + } else if (opcode & QIO_CHANNEL_WEBSOCK_CONTROL_OPCODE_MASK) { + error_setg(errp, "websocket control frame is too large"); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_PROTOCOL_ERR, + "control frame is too large"); + return -1; + } else if (payload_len == QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT && + ioc->encinput.offset >= QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT) { + ioc->payload_remain = be16_to_cpu(header->u.s16.l16); + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT; + ioc->mask = header->u.s16.m16; + } else if (payload_len == QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT && + ioc->encinput.offset >= QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT) { + ioc->payload_remain = be64_to_cpu(header->u.s64.l64); + header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT; + ioc->mask = header->u.s64.m64; + } else { + /* header not complete */ + return QIO_CHANNEL_ERR_BLOCK; + } + + trace_qio_channel_websock_header_full_decode( + ioc, header_size, ioc->payload_remain, ioc->mask.u); + buffer_advance(&ioc->encinput, header_size); + return 0; +} + + +static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, + Error **errp) +{ + size_t i; + size_t payload_len = 0; + uint32_t *payload32; + + if (ioc->payload_remain) { + /* If we aren't at the end of the payload, then drop + * off the last bytes, so we're always multiple of 4 + * for purpose of unmasking, except at end of payload + */ + if (ioc->encinput.offset < ioc->payload_remain) { + /* Wait for the entire payload before processing control frames + * because the payload will most likely be echoed back. */ + if (ioc->opcode & QIO_CHANNEL_WEBSOCK_CONTROL_OPCODE_MASK) { + return QIO_CHANNEL_ERR_BLOCK; + } + payload_len = ioc->encinput.offset - (ioc->encinput.offset % 4); + } else { + payload_len = ioc->payload_remain; + } + if (payload_len == 0) { + return QIO_CHANNEL_ERR_BLOCK; + } + + ioc->payload_remain -= payload_len; + + /* unmask frame */ + /* process 1 frame (32 bit op) */ + payload32 = (uint32_t *)ioc->encinput.buffer; + for (i = 0; i < payload_len / 4; i++) { + payload32[i] ^= ioc->mask.u; + } + /* process the remaining bytes (if any) */ + for (i *= 4; i < payload_len; i++) { + ioc->encinput.buffer[i] ^= ioc->mask.c[i % 4]; + } + } + + trace_qio_channel_websock_payload_decode( + ioc, ioc->opcode, ioc->payload_remain); + + if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME) { + if (payload_len) { + /* binary frames are passed on */ + buffer_reserve(&ioc->rawinput, payload_len); + buffer_append(&ioc->rawinput, ioc->encinput.buffer, payload_len); + } + } else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE) { + /* close frames are echoed back */ + error_setg(errp, "websocket closed by peer"); + if (payload_len) { + /* echo client status */ + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + &iov, 1, iov.iov_len); + qio_channel_websock_write_wire(ioc, NULL); + qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } else { + /* send our own status */ + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_NORMAL, "peer requested close"); + } + return -1; + } else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_PING) { + /* ping frames produce an immediate reply, as long as we've not still + * got a previous pong queued, in which case we drop the new pong */ + if (ioc->pong_remain == 0) { + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, + &iov, 1, iov.iov_len); + ioc->pong_remain = ioc->encoutput.offset; + } + } /* pong frames are ignored */ + + if (payload_len) { + buffer_advance(&ioc->encinput, payload_len); + } + return 0; +} + + +QIOChannelWebsock * +qio_channel_websock_new_server(QIOChannel *master) +{ + QIOChannelWebsock *wioc; + QIOChannel *ioc; + + wioc = QIO_CHANNEL_WEBSOCK(object_new(TYPE_QIO_CHANNEL_WEBSOCK)); + ioc = QIO_CHANNEL(wioc); + + wioc->master = master; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + + trace_qio_channel_websock_new_server(wioc, master); + return wioc; +} + +void qio_channel_websock_handshake(QIOChannelWebsock *ioc, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy) +{ + QIOTask *task; + + task = qio_task_new(OBJECT(ioc), + func, + opaque, + destroy); + + trace_qio_channel_websock_handshake_start(ioc); + trace_qio_channel_websock_handshake_pending(ioc, G_IO_IN); + qio_channel_add_watch(ioc->master, + G_IO_IN, + qio_channel_websock_handshake_io, + task, + NULL); +} + + +static void qio_channel_websock_finalize(Object *obj) +{ + QIOChannelWebsock *ioc = QIO_CHANNEL_WEBSOCK(obj); + + buffer_free(&ioc->encinput); + buffer_free(&ioc->encoutput); + buffer_free(&ioc->rawinput); + object_unref(OBJECT(ioc->master)); + if (ioc->io_tag) { + g_source_remove(ioc->io_tag); + } + if (ioc->io_err) { + error_free(ioc->io_err); + } +} + + +static ssize_t qio_channel_websock_read_wire(QIOChannelWebsock *ioc, + Error **errp) +{ + ssize_t ret; + + if (ioc->encinput.offset < 4096) { + size_t want = 4096 - ioc->encinput.offset; + + buffer_reserve(&ioc->encinput, want); + ret = qio_channel_read(ioc->master, + (char *)ioc->encinput.buffer + + ioc->encinput.offset, + want, + errp); + if (ret < 0) { + return ret; + } + if (ret == 0 && ioc->encinput.offset == 0) { + ioc->io_eof = TRUE; + return 0; + } + ioc->encinput.offset += ret; + } + + while (ioc->encinput.offset != 0) { + if (ioc->payload_remain == 0) { + ret = qio_channel_websock_decode_header(ioc, errp); + if (ret < 0) { + return ret; + } + } + + ret = qio_channel_websock_decode_payload(ioc, errp); + if (ret < 0) { + return ret; + } + } + return 1; +} + + +static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, + Error **errp) +{ + ssize_t ret; + ssize_t done = 0; + + while (ioc->encoutput.offset > 0) { + ret = qio_channel_write(ioc->master, + (char *)ioc->encoutput.buffer, + ioc->encoutput.offset, + errp); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK && + done > 0) { + return done; + } else { + return ret; + } + } + buffer_advance(&ioc->encoutput, ret); + done += ret; + if (ioc->pong_remain < ret) { + ioc->pong_remain = 0; + } else { + ioc->pong_remain -= ret; + } + } + return done; +} + + +static void qio_channel_websock_flush_free(gpointer user_data) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(user_data); + object_unref(OBJECT(wioc)); +} + +static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc); + +static gboolean qio_channel_websock_flush(QIOChannel *ioc, + GIOCondition condition, + gpointer user_data) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(user_data); + ssize_t ret; + + if (condition & G_IO_OUT) { + ret = qio_channel_websock_write_wire(wioc, &wioc->io_err); + if (ret < 0) { + goto cleanup; + } + } + + if (condition & G_IO_IN) { + ret = qio_channel_websock_read_wire(wioc, &wioc->io_err); + if (ret < 0) { + goto cleanup; + } + } + + cleanup: + qio_channel_websock_set_watch(wioc); + return FALSE; +} + + +static void qio_channel_websock_unset_watch(QIOChannelWebsock *ioc) +{ + if (ioc->io_tag) { + g_source_remove(ioc->io_tag); + ioc->io_tag = 0; + } +} + +static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc) +{ + GIOCondition cond = 0; + + qio_channel_websock_unset_watch(ioc); + + if (ioc->io_err) { + return; + } + + if (ioc->encoutput.offset) { + cond |= G_IO_OUT; + } + if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER && + !ioc->io_eof) { + cond |= G_IO_IN; + } + + if (cond) { + object_ref(OBJECT(ioc)); + ioc->io_tag = + qio_channel_add_watch(ioc->master, + cond, + qio_channel_websock_flush, + ioc, + qio_channel_websock_flush_free); + } +} + + +static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); + size_t i; + ssize_t got = 0; + ssize_t ret; + + if (wioc->io_err) { + error_propagate(errp, error_copy(wioc->io_err)); + return -1; + } + + if (!wioc->rawinput.offset) { + ret = qio_channel_websock_read_wire(QIO_CHANNEL_WEBSOCK(ioc), errp); + if (ret < 0) { + return ret; + } + } + + for (i = 0 ; i < niov ; i++) { + size_t want = iov[i].iov_len; + if (want > (wioc->rawinput.offset - got)) { + want = (wioc->rawinput.offset - got); + } + + memcpy(iov[i].iov_base, + wioc->rawinput.buffer + got, + want); + got += want; + + if (want < iov[i].iov_len) { + break; + } + } + + buffer_advance(&wioc->rawinput, got); + qio_channel_websock_set_watch(wioc); + return got; +} + + +static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); + ssize_t want = iov_size(iov, niov); + ssize_t avail; + ssize_t ret; + + if (wioc->io_err) { + error_propagate(errp, error_copy(wioc->io_err)); + return -1; + } + + if (wioc->io_eof) { + error_setg(errp, "%s", "Broken pipe"); + return -1; + } + + avail = wioc->encoutput.offset >= QIO_CHANNEL_WEBSOCK_MAX_BUFFER ? + 0 : (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->encoutput.offset); + if (want > avail) { + want = avail; + } + + if (want) { + qio_channel_websock_encode(wioc, + QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, + iov, niov, want); + } + + /* Even if want == 0, we'll try write_wire in case there's + * pending data we could usefully flush out + */ + ret = qio_channel_websock_write_wire(wioc, errp); + if (ret < 0 && + ret != QIO_CHANNEL_ERR_BLOCK) { + qio_channel_websock_unset_watch(wioc); + return -1; + } + + qio_channel_websock_set_watch(wioc); + + if (want == 0) { + return QIO_CHANNEL_ERR_BLOCK; + } + + return want; +} + +static int qio_channel_websock_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); + + qio_channel_set_blocking(wioc->master, enabled, errp); + return 0; +} + +static void qio_channel_websock_set_delay(QIOChannel *ioc, + bool enabled) +{ + QIOChannelWebsock *tioc = QIO_CHANNEL_WEBSOCK(ioc); + + qio_channel_set_delay(tioc->master, enabled); +} + +static void qio_channel_websock_set_cork(QIOChannel *ioc, + bool enabled) +{ + QIOChannelWebsock *tioc = QIO_CHANNEL_WEBSOCK(ioc); + + qio_channel_set_cork(tioc->master, enabled); +} + +static int qio_channel_websock_shutdown(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp) +{ + QIOChannelWebsock *tioc = QIO_CHANNEL_WEBSOCK(ioc); + + return qio_channel_shutdown(tioc->master, how, errp); +} + +static int qio_channel_websock_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); + + trace_qio_channel_websock_close(ioc); + return qio_channel_close(wioc->master, errp); +} + +typedef struct QIOChannelWebsockSource QIOChannelWebsockSource; +struct QIOChannelWebsockSource { + GSource parent; + QIOChannelWebsock *wioc; + GIOCondition condition; +}; + +static gboolean +qio_channel_websock_source_check(GSource *source) +{ + QIOChannelWebsockSource *wsource = (QIOChannelWebsockSource *)source; + GIOCondition cond = 0; + + if (wsource->wioc->rawinput.offset) { + cond |= G_IO_IN; + } + if (wsource->wioc->encoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { + cond |= G_IO_OUT; + } + if (wsource->wioc->io_eof) { + cond |= G_IO_HUP; + } + if (wsource->wioc->io_err) { + cond |= G_IO_ERR; + } + + return cond & wsource->condition; +} + +static gboolean +qio_channel_websock_source_prepare(GSource *source, + gint *timeout) +{ + *timeout = -1; + return qio_channel_websock_source_check(source); +} + +static gboolean +qio_channel_websock_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelWebsockSource *wsource = (QIOChannelWebsockSource *)source; + + return (*func)(QIO_CHANNEL(wsource->wioc), + qio_channel_websock_source_check(source), + user_data); +} + +static void +qio_channel_websock_source_finalize(GSource *source) +{ + QIOChannelWebsockSource *ssource = (QIOChannelWebsockSource *)source; + + object_unref(OBJECT(ssource->wioc)); +} + +GSourceFuncs qio_channel_websock_source_funcs = { + qio_channel_websock_source_prepare, + qio_channel_websock_source_check, + qio_channel_websock_source_dispatch, + qio_channel_websock_source_finalize +}; + +static GSource *qio_channel_websock_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); + QIOChannelWebsockSource *ssource; + GSource *source; + + source = g_source_new(&qio_channel_websock_source_funcs, + sizeof(QIOChannelWebsockSource)); + ssource = (QIOChannelWebsockSource *)source; + + ssource->wioc = wioc; + object_ref(OBJECT(wioc)); + + ssource->condition = condition; + + qio_channel_websock_set_watch(wioc); + return source; +} + +static void qio_channel_websock_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_websock_writev; + ioc_klass->io_readv = qio_channel_websock_readv; + ioc_klass->io_set_blocking = qio_channel_websock_set_blocking; + ioc_klass->io_set_cork = qio_channel_websock_set_cork; + ioc_klass->io_set_delay = qio_channel_websock_set_delay; + ioc_klass->io_close = qio_channel_websock_close; + ioc_klass->io_shutdown = qio_channel_websock_shutdown; + ioc_klass->io_create_watch = qio_channel_websock_create_watch; +} + +static const TypeInfo qio_channel_websock_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_WEBSOCK, + .instance_size = sizeof(QIOChannelWebsock), + .instance_finalize = qio_channel_websock_finalize, + .class_init = qio_channel_websock_class_init, +}; + +static void qio_channel_websock_register_types(void) +{ + type_register_static(&qio_channel_websock_info); +} + +type_init(qio_channel_websock_register_types); diff --git a/io/channel.c b/io/channel.c new file mode 100644 index 000000000..93d449dee --- /dev/null +++ b/io/channel.c @@ -0,0 +1,550 @@ +/* + * QEMU I/O channels + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/channel.h" +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/iov.h" + +bool qio_channel_has_feature(QIOChannel *ioc, + QIOChannelFeature feature) +{ + return ioc->features & (1 << feature); +} + + +void qio_channel_set_feature(QIOChannel *ioc, + QIOChannelFeature feature) +{ + ioc->features |= (1 << feature); +} + + +void qio_channel_set_name(QIOChannel *ioc, + const char *name) +{ + g_free(ioc->name); + ioc->name = g_strdup(name); +} + + +ssize_t qio_channel_readv_full(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if ((fds || nfds) && + !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + error_setg_errno(errp, EINVAL, + "Channel does not support file descriptor passing"); + return -1; + } + + return klass->io_readv(ioc, iov, niov, fds, nfds, errp); +} + + +ssize_t qio_channel_writev_full(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if ((fds || nfds) && + !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + error_setg_errno(errp, EINVAL, + "Channel does not support file descriptor passing"); + return -1; + } + + return klass->io_writev(ioc, iov, niov, fds, nfds, errp); +} + + +int qio_channel_readv_all_eof(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp) +{ + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); + struct iovec *local_iov_head = local_iov; + unsigned int nlocal_iov = niov; + bool partial = false; + + nlocal_iov = iov_copy(local_iov, nlocal_iov, + iov, niov, + 0, iov_size(iov, niov)); + + while (nlocal_iov > 0) { + ssize_t len; + len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); + } else { + qio_channel_wait(ioc, G_IO_IN); + } + continue; + } else if (len < 0) { + goto cleanup; + } else if (len == 0) { + if (partial) { + error_setg(errp, + "Unexpected end-of-file before all bytes were read"); + } else { + ret = 0; + } + goto cleanup; + } + + partial = true; + iov_discard_front(&local_iov, &nlocal_iov, len); + } + + ret = 1; + + cleanup: + g_free(local_iov_head); + return ret; +} + +int qio_channel_readv_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp) +{ + int ret = qio_channel_readv_all_eof(ioc, iov, niov, errp); + + if (ret == 0) { + ret = -1; + error_setg(errp, + "Unexpected end-of-file before all bytes were read"); + } else if (ret == 1) { + ret = 0; + } + return ret; +} + +int qio_channel_writev_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp) +{ + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); + struct iovec *local_iov_head = local_iov; + unsigned int nlocal_iov = niov; + + nlocal_iov = iov_copy(local_iov, nlocal_iov, + iov, niov, + 0, iov_size(iov, niov)); + + while (nlocal_iov > 0) { + ssize_t len; + len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); + } else { + qio_channel_wait(ioc, G_IO_OUT); + } + continue; + } + if (len < 0) { + goto cleanup; + } + + iov_discard_front(&local_iov, &nlocal_iov, len); + } + + ret = 0; + cleanup: + g_free(local_iov_head); + return ret; +} + +ssize_t qio_channel_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp) +{ + return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); +} + + +ssize_t qio_channel_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + Error **errp) +{ + return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); +} + + +ssize_t qio_channel_read(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp) +{ + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; + return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); +} + + +ssize_t qio_channel_write(QIOChannel *ioc, + const char *buf, + size_t buflen, + Error **errp) +{ + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; + return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); +} + + +int qio_channel_read_all_eof(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp) +{ + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; + return qio_channel_readv_all_eof(ioc, &iov, 1, errp); +} + + +int qio_channel_read_all(QIOChannel *ioc, + char *buf, + size_t buflen, + Error **errp) +{ + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; + return qio_channel_readv_all(ioc, &iov, 1, errp); +} + + +int qio_channel_write_all(QIOChannel *ioc, + const char *buf, + size_t buflen, + Error **errp) +{ + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; + return qio_channel_writev_all(ioc, &iov, 1, errp); +} + + +int qio_channel_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + return klass->io_set_blocking(ioc, enabled, errp); +} + + +int qio_channel_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + return klass->io_close(ioc, errp); +} + + +GSource *qio_channel_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + GSource *ret = klass->io_create_watch(ioc, condition); + + if (ioc->name) { + g_source_set_name(ret, ioc->name); + } + + return ret; +} + + +void qio_channel_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque); +} + +guint qio_channel_add_watch_full(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify, + GMainContext *context) +{ + GSource *source; + guint id; + + source = qio_channel_create_watch(ioc, condition); + + g_source_set_callback(source, (GSourceFunc)func, user_data, notify); + + id = g_source_attach(source, context); + g_source_unref(source); + + return id; +} + +guint qio_channel_add_watch(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify) +{ + return qio_channel_add_watch_full(ioc, condition, func, + user_data, notify, NULL); +} + +GSource *qio_channel_add_watch_source(QIOChannel *ioc, + GIOCondition condition, + QIOChannelFunc func, + gpointer user_data, + GDestroyNotify notify, + GMainContext *context) +{ + GSource *source; + guint id; + + id = qio_channel_add_watch_full(ioc, condition, func, + user_data, notify, context); + source = g_main_context_find_source_by_id(context, id); + g_source_ref(source); + return source; +} + + +int qio_channel_shutdown(QIOChannel *ioc, + QIOChannelShutdown how, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (!klass->io_shutdown) { + error_setg(errp, "Data path shutdown not supported"); + return -1; + } + + return klass->io_shutdown(ioc, how, errp); +} + + +void qio_channel_set_delay(QIOChannel *ioc, + bool enabled) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (klass->io_set_delay) { + klass->io_set_delay(ioc, enabled); + } +} + + +void qio_channel_set_cork(QIOChannel *ioc, + bool enabled) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (klass->io_set_cork) { + klass->io_set_cork(ioc, enabled); + } +} + + +off_t qio_channel_io_seek(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (!klass->io_seek) { + error_setg(errp, "Channel does not support random access"); + return -1; + } + + return klass->io_seek(ioc, offset, whence, errp); +} + + +static void qio_channel_restart_read(void *opaque) +{ + QIOChannel *ioc = opaque; + Coroutine *co = ioc->read_coroutine; + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == + qemu_coroutine_get_aio_context(co)); + aio_co_wake(co); +} + +static void qio_channel_restart_write(void *opaque) +{ + QIOChannel *ioc = opaque; + Coroutine *co = ioc->write_coroutine; + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == + qemu_coroutine_get_aio_context(co)); + aio_co_wake(co); +} + +static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc) +{ + IOHandler *rd_handler = NULL, *wr_handler = NULL; + AioContext *ctx; + + if (ioc->read_coroutine) { + rd_handler = qio_channel_restart_read; + } + if (ioc->write_coroutine) { + wr_handler = qio_channel_restart_write; + } + + ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context(); + qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc); +} + +void qio_channel_attach_aio_context(QIOChannel *ioc, + AioContext *ctx) +{ + assert(!ioc->read_coroutine); + assert(!ioc->write_coroutine); + ioc->ctx = ctx; +} + +void qio_channel_detach_aio_context(QIOChannel *ioc) +{ + ioc->read_coroutine = NULL; + ioc->write_coroutine = NULL; + qio_channel_set_aio_fd_handlers(ioc); + ioc->ctx = NULL; +} + +void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition) +{ + assert(qemu_in_coroutine()); + if (condition == G_IO_IN) { + assert(!ioc->read_coroutine); + ioc->read_coroutine = qemu_coroutine_self(); + } else if (condition == G_IO_OUT) { + assert(!ioc->write_coroutine); + ioc->write_coroutine = qemu_coroutine_self(); + } else { + abort(); + } + qio_channel_set_aio_fd_handlers(ioc); + qemu_coroutine_yield(); + + /* Allow interrupting the operation by reentering the coroutine other than + * through the aio_fd_handlers. */ + if (condition == G_IO_IN && ioc->read_coroutine) { + ioc->read_coroutine = NULL; + qio_channel_set_aio_fd_handlers(ioc); + } else if (condition == G_IO_OUT && ioc->write_coroutine) { + ioc->write_coroutine = NULL; + qio_channel_set_aio_fd_handlers(ioc); + } +} + + +static gboolean qio_channel_wait_complete(QIOChannel *ioc, + GIOCondition condition, + gpointer opaque) +{ + GMainLoop *loop = opaque; + + g_main_loop_quit(loop); + return FALSE; +} + + +void qio_channel_wait(QIOChannel *ioc, + GIOCondition condition) +{ + GMainContext *ctxt = g_main_context_new(); + GMainLoop *loop = g_main_loop_new(ctxt, TRUE); + GSource *source; + + source = qio_channel_create_watch(ioc, condition); + + g_source_set_callback(source, + (GSourceFunc)qio_channel_wait_complete, + loop, + NULL); + + g_source_attach(source, ctxt); + + g_main_loop_run(loop); + + g_source_unref(source); + g_main_loop_unref(loop); + g_main_context_unref(ctxt); +} + + +static void qio_channel_finalize(Object *obj) +{ + QIOChannel *ioc = QIO_CHANNEL(obj); + + g_free(ioc->name); + +#ifdef _WIN32 + if (ioc->event) { + CloseHandle(ioc->event); + } +#endif +} + +static const TypeInfo qio_channel_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QIO_CHANNEL, + .instance_size = sizeof(QIOChannel), + .instance_finalize = qio_channel_finalize, + .abstract = true, + .class_size = sizeof(QIOChannelClass), +}; + + +static void qio_channel_register_types(void) +{ + type_register_static(&qio_channel_info); +} + + +type_init(qio_channel_register_types); diff --git a/io/dns-resolver.c b/io/dns-resolver.c new file mode 100644 index 000000000..743a0efc8 --- /dev/null +++ b/io/dns-resolver.c @@ -0,0 +1,279 @@ +/* + * QEMU DNS resolver + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/dns-resolver.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-sockets.h" +#include "qemu/sockets.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "qemu/module.h" + +#ifndef AI_NUMERICSERV +# define AI_NUMERICSERV 0 +#endif + +static QIODNSResolver *instance; +static GOnce instance_init = G_ONCE_INIT; + +static gpointer qio_dns_resolve_init_instance(gpointer unused G_GNUC_UNUSED) +{ + instance = QIO_DNS_RESOLVER(object_new(TYPE_QIO_DNS_RESOLVER)); + return NULL; +} + +QIODNSResolver *qio_dns_resolver_get_instance(void) +{ + g_once(&instance_init, qio_dns_resolve_init_instance, NULL); + return instance; +} + +static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver, + SocketAddress *addr, + size_t *naddrs, + SocketAddress ***addrs, + Error **errp) +{ + struct addrinfo ai, *res, *e; + InetSocketAddress *iaddr = &addr->u.inet; + char port[33]; + char uaddr[INET6_ADDRSTRLEN + 1]; + char uport[33]; + int rc; + Error *err = NULL; + size_t i; + + *naddrs = 0; + *addrs = NULL; + + memset(&ai, 0, sizeof(ai)); + ai.ai_flags = AI_PASSIVE; + if (iaddr->has_numeric && iaddr->numeric) { + ai.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + ai.ai_family = inet_ai_family_from_address(iaddr, &err); + ai.ai_socktype = SOCK_STREAM; + + if (err) { + error_propagate(errp, err); + return -1; + } + + if (iaddr->host == NULL) { + error_setg(errp, "host not specified"); + return -1; + } + if (iaddr->port != NULL) { + pstrcpy(port, sizeof(port), iaddr->port); + } else { + port[0] = '\0'; + } + + rc = getaddrinfo(strlen(iaddr->host) ? iaddr->host : NULL, + strlen(port) ? port : NULL, &ai, &res); + if (rc != 0) { + error_setg(errp, "address resolution failed for %s:%s: %s", + iaddr->host, port, gai_strerror(rc)); + return -1; + } + + for (e = res; e != NULL; e = e->ai_next) { + (*naddrs)++; + } + + *addrs = g_new0(SocketAddress *, *naddrs); + + /* create socket + bind */ + for (i = 0, e = res; e != NULL; i++, e = e->ai_next) { + SocketAddress *newaddr = g_new0(SocketAddress, 1); + + newaddr->type = SOCKET_ADDRESS_TYPE_INET; + + getnameinfo((struct sockaddr *)e->ai_addr, e->ai_addrlen, + uaddr, INET6_ADDRSTRLEN, uport, 32, + NI_NUMERICHOST | NI_NUMERICSERV); + + newaddr->u.inet = (InetSocketAddress){ + .host = g_strdup(uaddr), + .port = g_strdup(uport), + .has_numeric = true, + .numeric = true, + .has_to = iaddr->has_to, + .to = iaddr->to, + .has_ipv4 = iaddr->has_ipv4, + .ipv4 = iaddr->ipv4, + .has_ipv6 = iaddr->has_ipv6, + .ipv6 = iaddr->ipv6, + }; + + (*addrs)[i] = newaddr; + } + freeaddrinfo(res); + return 0; +} + + +static int qio_dns_resolver_lookup_sync_nop(QIODNSResolver *resolver, + SocketAddress *addr, + size_t *naddrs, + SocketAddress ***addrs, + Error **errp) +{ + *naddrs = 1; + *addrs = g_new0(SocketAddress *, 1); + (*addrs)[0] = QAPI_CLONE(SocketAddress, addr); + + return 0; +} + + +int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver, + SocketAddress *addr, + size_t *naddrs, + SocketAddress ***addrs, + Error **errp) +{ + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + return qio_dns_resolver_lookup_sync_inet(resolver, + addr, + naddrs, + addrs, + errp); + + case SOCKET_ADDRESS_TYPE_UNIX: + case SOCKET_ADDRESS_TYPE_VSOCK: + case SOCKET_ADDRESS_TYPE_FD: + return qio_dns_resolver_lookup_sync_nop(resolver, + addr, + naddrs, + addrs, + errp); + + default: + abort(); + } +} + + +struct QIODNSResolverLookupData { + SocketAddress *addr; + SocketAddress **addrs; + size_t naddrs; +}; + + +static void qio_dns_resolver_lookup_data_free(gpointer opaque) +{ + struct QIODNSResolverLookupData *data = opaque; + size_t i; + + qapi_free_SocketAddress(data->addr); + for (i = 0; i < data->naddrs; i++) { + qapi_free_SocketAddress(data->addrs[i]); + } + + g_free(data->addrs); + g_free(data); +} + + +static void qio_dns_resolver_lookup_worker(QIOTask *task, + gpointer opaque) +{ + QIODNSResolver *resolver = QIO_DNS_RESOLVER(qio_task_get_source(task)); + struct QIODNSResolverLookupData *data = opaque; + Error *err = NULL; + + qio_dns_resolver_lookup_sync(resolver, + data->addr, + &data->naddrs, + &data->addrs, + &err); + if (err) { + qio_task_set_error(task, err); + } else { + qio_task_set_result_pointer(task, opaque, NULL); + } + + object_unref(OBJECT(resolver)); +} + + +void qio_dns_resolver_lookup_async(QIODNSResolver *resolver, + SocketAddress *addr, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify notify) +{ + QIOTask *task; + struct QIODNSResolverLookupData *data = + g_new0(struct QIODNSResolverLookupData, 1); + + data->addr = QAPI_CLONE(SocketAddress, addr); + + task = qio_task_new(OBJECT(resolver), func, opaque, notify); + + qio_task_run_in_thread(task, + qio_dns_resolver_lookup_worker, + data, + qio_dns_resolver_lookup_data_free, + NULL); +} + + +void qio_dns_resolver_lookup_result(QIODNSResolver *resolver, + QIOTask *task, + size_t *naddrs, + SocketAddress ***addrs) +{ + struct QIODNSResolverLookupData *data = + qio_task_get_result_pointer(task); + size_t i; + + *naddrs = 0; + *addrs = NULL; + if (!data) { + return; + } + + *naddrs = data->naddrs; + *addrs = g_new0(SocketAddress *, data->naddrs); + for (i = 0; i < data->naddrs; i++) { + (*addrs)[i] = QAPI_CLONE(SocketAddress, data->addrs[i]); + } +} + + +static const TypeInfo qio_dns_resolver_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QIO_DNS_RESOLVER, + .instance_size = sizeof(QIODNSResolver), +}; + + +static void qio_dns_resolver_register_types(void) +{ + type_register_static(&qio_dns_resolver_info); +} + + +type_init(qio_dns_resolver_register_types); diff --git a/io/meson.build b/io/meson.build new file mode 100644 index 000000000..bcd8b1e73 --- /dev/null +++ b/io/meson.build @@ -0,0 +1,15 @@ +io_ss.add(genh) +io_ss.add(files( + 'channel-buffer.c', + 'channel-command.c', + 'channel-file.c', + 'channel-socket.c', + 'channel-tls.c', + 'channel-util.c', + 'channel-watch.c', + 'channel-websock.c', + 'channel.c', + 'dns-resolver.c', + 'net-listener.c', + 'task.c', +)) diff --git a/io/net-listener.c b/io/net-listener.c new file mode 100644 index 000000000..46c2643d0 --- /dev/null +++ b/io/net-listener.c @@ -0,0 +1,319 @@ +/* + * QEMU network listener + * + * Copyright (c) 2016-2017 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/net-listener.h" +#include "io/dns-resolver.h" +#include "qapi/error.h" +#include "qemu/module.h" + +QIONetListener *qio_net_listener_new(void) +{ + return QIO_NET_LISTENER(object_new(TYPE_QIO_NET_LISTENER)); +} + +void qio_net_listener_set_name(QIONetListener *listener, + const char *name) +{ + g_free(listener->name); + listener->name = g_strdup(name); +} + + +static gboolean qio_net_listener_channel_func(QIOChannel *ioc, + GIOCondition condition, + gpointer opaque) +{ + QIONetListener *listener = QIO_NET_LISTENER(opaque); + QIOChannelSocket *sioc; + + sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), + NULL); + if (!sioc) { + return TRUE; + } + + if (listener->io_func) { + listener->io_func(listener, sioc, listener->io_data); + } + + object_unref(OBJECT(sioc)); + + return TRUE; +} + + +int qio_net_listener_open_sync(QIONetListener *listener, + SocketAddress *addr, + int num, + Error **errp) +{ + QIODNSResolver *resolver = qio_dns_resolver_get_instance(); + SocketAddress **resaddrs; + size_t nresaddrs; + size_t i; + Error *err = NULL; + bool success = false; + + if (qio_dns_resolver_lookup_sync(resolver, + addr, + &nresaddrs, + &resaddrs, + errp) < 0) { + return -1; + } + + for (i = 0; i < nresaddrs; i++) { + QIOChannelSocket *sioc = qio_channel_socket_new(); + + if (qio_channel_socket_listen_sync(sioc, resaddrs[i], num, + err ? NULL : &err) == 0) { + success = true; + + qio_net_listener_add(listener, sioc); + } + + qapi_free_SocketAddress(resaddrs[i]); + object_unref(OBJECT(sioc)); + } + g_free(resaddrs); + + if (success) { + error_free(err); + return 0; + } else { + error_propagate(errp, err); + return -1; + } +} + + +void qio_net_listener_add(QIONetListener *listener, + QIOChannelSocket *sioc) +{ + if (listener->name) { + char *name = g_strdup_printf("%s-listen", listener->name); + qio_channel_set_name(QIO_CHANNEL(sioc), name); + g_free(name); + } + + listener->sioc = g_renew(QIOChannelSocket *, listener->sioc, + listener->nsioc + 1); + listener->io_source = g_renew(typeof(listener->io_source[0]), + listener->io_source, + listener->nsioc + 1); + listener->sioc[listener->nsioc] = sioc; + listener->io_source[listener->nsioc] = NULL; + + object_ref(OBJECT(sioc)); + listener->connected = true; + + if (listener->io_func != NULL) { + object_ref(OBJECT(listener)); + listener->io_source[listener->nsioc] = qio_channel_add_watch_source( + QIO_CHANNEL(listener->sioc[listener->nsioc]), G_IO_IN, + qio_net_listener_channel_func, + listener, (GDestroyNotify)object_unref, NULL); + } + + listener->nsioc++; +} + + +void qio_net_listener_set_client_func_full(QIONetListener *listener, + QIONetListenerClientFunc func, + gpointer data, + GDestroyNotify notify, + GMainContext *context) +{ + size_t i; + + if (listener->io_notify) { + listener->io_notify(listener->io_data); + } + listener->io_func = func; + listener->io_data = data; + listener->io_notify = notify; + + for (i = 0; i < listener->nsioc; i++) { + if (listener->io_source[i]) { + g_source_destroy(listener->io_source[i]); + g_source_unref(listener->io_source[i]); + listener->io_source[i] = NULL; + } + } + + if (listener->io_func != NULL) { + for (i = 0; i < listener->nsioc; i++) { + object_ref(OBJECT(listener)); + listener->io_source[i] = qio_channel_add_watch_source( + QIO_CHANNEL(listener->sioc[i]), G_IO_IN, + qio_net_listener_channel_func, + listener, (GDestroyNotify)object_unref, context); + } + } +} + +void qio_net_listener_set_client_func(QIONetListener *listener, + QIONetListenerClientFunc func, + gpointer data, + GDestroyNotify notify) +{ + qio_net_listener_set_client_func_full(listener, func, data, + notify, NULL); +} + +struct QIONetListenerClientWaitData { + QIOChannelSocket *sioc; + GMainLoop *loop; +}; + + +static gboolean qio_net_listener_wait_client_func(QIOChannel *ioc, + GIOCondition condition, + gpointer opaque) +{ + struct QIONetListenerClientWaitData *data = opaque; + QIOChannelSocket *sioc; + + sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), + NULL); + if (!sioc) { + return TRUE; + } + + if (data->sioc) { + object_unref(OBJECT(sioc)); + } else { + data->sioc = sioc; + g_main_loop_quit(data->loop); + } + + return TRUE; +} + +QIOChannelSocket *qio_net_listener_wait_client(QIONetListener *listener) +{ + GMainContext *ctxt = g_main_context_new(); + GMainLoop *loop = g_main_loop_new(ctxt, TRUE); + GSource **sources; + struct QIONetListenerClientWaitData data = { + .sioc = NULL, + .loop = loop + }; + size_t i; + + for (i = 0; i < listener->nsioc; i++) { + if (listener->io_source[i]) { + g_source_destroy(listener->io_source[i]); + g_source_unref(listener->io_source[i]); + listener->io_source[i] = NULL; + } + } + + sources = g_new0(GSource *, listener->nsioc); + for (i = 0; i < listener->nsioc; i++) { + sources[i] = qio_channel_create_watch(QIO_CHANNEL(listener->sioc[i]), + G_IO_IN); + + g_source_set_callback(sources[i], + (GSourceFunc)qio_net_listener_wait_client_func, + &data, + NULL); + g_source_attach(sources[i], ctxt); + } + + g_main_loop_run(loop); + + for (i = 0; i < listener->nsioc; i++) { + g_source_unref(sources[i]); + } + g_free(sources); + g_main_loop_unref(loop); + g_main_context_unref(ctxt); + + if (listener->io_func != NULL) { + for (i = 0; i < listener->nsioc; i++) { + object_ref(OBJECT(listener)); + listener->io_source[i] = qio_channel_add_watch_source( + QIO_CHANNEL(listener->sioc[i]), G_IO_IN, + qio_net_listener_channel_func, + listener, (GDestroyNotify)object_unref, NULL); + } + } + + return data.sioc; +} + +void qio_net_listener_disconnect(QIONetListener *listener) +{ + size_t i; + + if (!listener->connected) { + return; + } + + for (i = 0; i < listener->nsioc; i++) { + if (listener->io_source[i]) { + g_source_destroy(listener->io_source[i]); + g_source_unref(listener->io_source[i]); + listener->io_source[i] = NULL; + } + qio_channel_close(QIO_CHANNEL(listener->sioc[i]), NULL); + } + listener->connected = false; +} + + +bool qio_net_listener_is_connected(QIONetListener *listener) +{ + return listener->connected; +} + +static void qio_net_listener_finalize(Object *obj) +{ + QIONetListener *listener = QIO_NET_LISTENER(obj); + size_t i; + + qio_net_listener_disconnect(listener); + + for (i = 0; i < listener->nsioc; i++) { + object_unref(OBJECT(listener->sioc[i])); + } + g_free(listener->io_source); + g_free(listener->sioc); + g_free(listener->name); +} + +static const TypeInfo qio_net_listener_info = { + .parent = TYPE_OBJECT, + .name = TYPE_QIO_NET_LISTENER, + .instance_size = sizeof(QIONetListener), + .instance_finalize = qio_net_listener_finalize, +}; + + +static void qio_net_listener_register_types(void) +{ + type_register_static(&qio_net_listener_info); +} + + +type_init(qio_net_listener_register_types); diff --git a/io/task.c b/io/task.c new file mode 100644 index 000000000..451f26f8b --- /dev/null +++ b/io/task.c @@ -0,0 +1,241 @@ +/* + * QEMU I/O task + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "io/task.h" +#include "qapi/error.h" +#include "qemu/thread.h" +#include "qom/object.h" +#include "trace.h" + +struct QIOTaskThreadData { + QIOTaskWorker worker; + gpointer opaque; + GDestroyNotify destroy; + GMainContext *context; + GSource *completion; +}; + + +struct QIOTask { + Object *source; + QIOTaskFunc func; + gpointer opaque; + GDestroyNotify destroy; + Error *err; + gpointer result; + GDestroyNotify destroyResult; + QemuMutex thread_lock; + QemuCond thread_cond; + struct QIOTaskThreadData *thread; +}; + + +QIOTask *qio_task_new(Object *source, + QIOTaskFunc func, + gpointer opaque, + GDestroyNotify destroy) +{ + QIOTask *task; + + task = g_new0(QIOTask, 1); + + task->source = source; + object_ref(source); + task->func = func; + task->opaque = opaque; + task->destroy = destroy; + qemu_mutex_init(&task->thread_lock); + qemu_cond_init(&task->thread_cond); + + trace_qio_task_new(task, source, func, opaque); + + return task; +} + +static void qio_task_free(QIOTask *task) +{ + qemu_mutex_lock(&task->thread_lock); + if (task->thread) { + if (task->thread->destroy) { + task->thread->destroy(task->thread->opaque); + } + + if (task->thread->context) { + g_main_context_unref(task->thread->context); + } + + g_free(task->thread); + } + + if (task->destroy) { + task->destroy(task->opaque); + } + if (task->destroyResult) { + task->destroyResult(task->result); + } + if (task->err) { + error_free(task->err); + } + object_unref(task->source); + + qemu_mutex_unlock(&task->thread_lock); + qemu_mutex_destroy(&task->thread_lock); + qemu_cond_destroy(&task->thread_cond); + + g_free(task); +} + + +static gboolean qio_task_thread_result(gpointer opaque) +{ + QIOTask *task = opaque; + + trace_qio_task_thread_result(task); + qio_task_complete(task); + + return FALSE; +} + + +static gpointer qio_task_thread_worker(gpointer opaque) +{ + QIOTask *task = opaque; + + trace_qio_task_thread_run(task); + + task->thread->worker(task, task->thread->opaque); + + /* We're running in the background thread, and must only + * ever report the task results in the main event loop + * thread. So we schedule an idle callback to report + * the worker results + */ + trace_qio_task_thread_exit(task); + + qemu_mutex_lock(&task->thread_lock); + + task->thread->completion = g_idle_source_new(); + g_source_set_callback(task->thread->completion, + qio_task_thread_result, task, NULL); + g_source_attach(task->thread->completion, + task->thread->context); + g_source_unref(task->thread->completion); + trace_qio_task_thread_source_attach(task, task->thread->completion); + + qemu_cond_signal(&task->thread_cond); + qemu_mutex_unlock(&task->thread_lock); + + return NULL; +} + + +void qio_task_run_in_thread(QIOTask *task, + QIOTaskWorker worker, + gpointer opaque, + GDestroyNotify destroy, + GMainContext *context) +{ + struct QIOTaskThreadData *data = g_new0(struct QIOTaskThreadData, 1); + QemuThread thread; + + if (context) { + g_main_context_ref(context); + } + + data->worker = worker; + data->opaque = opaque; + data->destroy = destroy; + data->context = context; + + task->thread = data; + + trace_qio_task_thread_start(task, worker, opaque); + qemu_thread_create(&thread, + "io-task-worker", + qio_task_thread_worker, + task, + QEMU_THREAD_DETACHED); +} + + +void qio_task_wait_thread(QIOTask *task) +{ + qemu_mutex_lock(&task->thread_lock); + g_assert(task->thread != NULL); + while (task->thread->completion == NULL) { + qemu_cond_wait(&task->thread_cond, &task->thread_lock); + } + + trace_qio_task_thread_source_cancel(task, task->thread->completion); + g_source_destroy(task->thread->completion); + qemu_mutex_unlock(&task->thread_lock); + + qio_task_thread_result(task); +} + + +void qio_task_complete(QIOTask *task) +{ + task->func(task, task->opaque); + trace_qio_task_complete(task); + qio_task_free(task); +} + + +void qio_task_set_error(QIOTask *task, + Error *err) +{ + error_propagate(&task->err, err); +} + + +bool qio_task_propagate_error(QIOTask *task, + Error **errp) +{ + if (task->err) { + error_propagate(errp, task->err); + task->err = NULL; + return true; + } + + return false; +} + + +void qio_task_set_result_pointer(QIOTask *task, + gpointer result, + GDestroyNotify destroy) +{ + task->result = result; + task->destroyResult = destroy; +} + + +gpointer qio_task_get_result_pointer(QIOTask *task) +{ + return task->result; +} + + +Object *qio_task_get_source(QIOTask *task) +{ + return task->source; +} diff --git a/io/trace-events b/io/trace-events new file mode 100644 index 000000000..d7bc70b96 --- /dev/null +++ b/io/trace-events @@ -0,0 +1,65 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# task.c +qio_task_new(void *task, void *source, void *func, void *opaque) "Task new task=%p source=%p func=%p opaque=%p" +qio_task_complete(void *task) "Task complete task=%p" +qio_task_thread_start(void *task, void *worker, void *opaque) "Task thread start task=%p worker=%p opaque=%p" +qio_task_thread_run(void *task) "Task thread run task=%p" +qio_task_thread_exit(void *task) "Task thread exit task=%p" +qio_task_thread_result(void *task) "Task thread result task=%p" +qio_task_thread_source_attach(void *task, void *source) "Task thread source attach task=%p source=%p" +qio_task_thread_source_cancel(void *task, void *source) "Task thread source cancel task=%p source=%p" + +# channel-socket.c +qio_channel_socket_new(void *ioc) "Socket new ioc=%p" +qio_channel_socket_new_fd(void *ioc, int fd) "Socket new ioc=%p fd=%d" +qio_channel_socket_connect_sync(void *ioc, void *addr) "Socket connect sync ioc=%p addr=%p" +qio_channel_socket_connect_async(void *ioc, void *addr) "Socket connect async ioc=%p addr=%p" +qio_channel_socket_connect_fail(void *ioc) "Socket connect fail ioc=%p" +qio_channel_socket_connect_complete(void *ioc, int fd) "Socket connect complete ioc=%p fd=%d" +qio_channel_socket_listen_sync(void *ioc, void *addr, int num) "Socket listen sync ioc=%p addr=%p num=%d" +qio_channel_socket_listen_async(void *ioc, void *addr, int num) "Socket listen async ioc=%p addr=%p num=%d" +qio_channel_socket_listen_fail(void *ioc) "Socket listen fail ioc=%p" +qio_channel_socket_listen_complete(void *ioc, int fd) "Socket listen complete ioc=%p fd=%d" +qio_channel_socket_dgram_sync(void *ioc, void *localAddr, void *remoteAddr) "Socket dgram sync ioc=%p localAddr=%p remoteAddr=%p" +qio_channel_socket_dgram_async(void *ioc, void *localAddr, void *remoteAddr) "Socket dgram async ioc=%p localAddr=%p remoteAddr=%p" +qio_channel_socket_dgram_fail(void *ioc) "Socket dgram fail ioc=%p" +qio_channel_socket_dgram_complete(void *ioc, int fd) "Socket dgram complete ioc=%p fd=%d" +qio_channel_socket_accept(void *ioc) "Socket accept start ioc=%p" +qio_channel_socket_accept_fail(void *ioc) "Socket accept fail ioc=%p" +qio_channel_socket_accept_complete(void *ioc, void *cioc, int fd) "Socket accept complete ioc=%p cioc=%p fd=%d" + +# channel-file.c +qio_channel_file_new_fd(void *ioc, int fd) "File new fd ioc=%p fd=%d" +qio_channel_file_new_path(void *ioc, const char *path, int flags, int mode, int fd) "File new fd ioc=%p path=%s flags=%d mode=%d fd=%d" + +# channel-tls.c +qio_channel_tls_new_client(void *ioc, void *master, void *creds, const char *hostname) "TLS new client ioc=%p master=%p creds=%p hostname=%s" +qio_channel_tls_new_server(void *ioc, void *master, void *creds, const char *aclname) "TLS new client ioc=%p master=%p creds=%p acltname=%s" +qio_channel_tls_handshake_start(void *ioc) "TLS handshake start ioc=%p" +qio_channel_tls_handshake_pending(void *ioc, int status) "TLS handshake pending ioc=%p status=%d" +qio_channel_tls_handshake_fail(void *ioc) "TLS handshake fail ioc=%p" +qio_channel_tls_handshake_complete(void *ioc) "TLS handshake complete ioc=%p" +qio_channel_tls_credentials_allow(void *ioc) "TLS credentials allow ioc=%p" +qio_channel_tls_credentials_deny(void *ioc) "TLS credentials deny ioc=%p" + +# channel-websock.c +qio_channel_websock_new_server(void *ioc, void *master) "Websock new client ioc=%p master=%p" +qio_channel_websock_handshake_start(void *ioc) "Websock handshake start ioc=%p" +qio_channel_websock_handshake_pending(void *ioc, int status) "Websock handshake pending ioc=%p status=%d" +qio_channel_websock_handshake_reply(void *ioc) "Websock handshake reply ioc=%p" +qio_channel_websock_handshake_fail(void *ioc, const char *msg) "Websock handshake fail ioc=%p err=%s" +qio_channel_websock_handshake_complete(void *ioc) "Websock handshake complete ioc=%p" +qio_channel_websock_http_greeting(void *ioc, const char *greeting) "Websocket HTTP request ioc=%p greeting='%s'" +qio_channel_websock_http_request(void *ioc, const char *protocols, const char *version, const char *host, const char *connection, const char *upgrade, const char *key) "Websocket HTTP request ioc=%p protocols='%s' version='%s' host='%s' connection='%s' upgrade='%s' key='%s'" +qio_channel_websock_header_partial_decode(void *ioc, size_t payloadlen, unsigned char fin, unsigned char opcode, unsigned char has_mask) "Websocket header decoded ioc=%p payload-len=%zu fin=0x%x opcode=0x%x has_mask=0x%x" +qio_channel_websock_header_full_decode(void *ioc, size_t headerlen, size_t payloadlen, uint32_t mask) "Websocket header decoded ioc=%p header-len=%zu payload-len=%zu mask=0x%x" +qio_channel_websock_payload_decode(void *ioc, uint8_t opcode, size_t payload_remain) "Websocket header decoded ioc=%p opcode=0x%x payload-remain=%zu" +qio_channel_websock_encode(void *ioc, uint8_t opcode, size_t payloadlen, size_t headerlen) "Websocket encoded ioc=%p opcode=0x%x header-len=%zu payload-len=%zu" +qio_channel_websock_close(void *ioc) "Websocket close ioc=%p" + +# channel-command.c +qio_channel_command_new_pid(void *ioc, int writefd, int readfd, int pid) "Command new pid ioc=%p writefd=%d readfd=%d pid=%d" +qio_channel_command_new_spawn(void *ioc, const char *binary, int flags) "Command new spawn ioc=%p binary=%s flags=%d" +qio_channel_command_abort(void *ioc, int pid) "Command abort ioc=%p pid=%d" +qio_channel_command_wait(void *ioc, int pid, int ret, int status) "Command abort ioc=%p pid=%d ret=%d status=%d" diff --git a/io/trace.h b/io/trace.h new file mode 100644 index 000000000..92d63a5bf --- /dev/null +++ b/io/trace.h @@ -0,0 +1 @@ +#include "trace/trace-io.h" diff --git a/iothread.c b/iothread.c new file mode 100644 index 000000000..69eff9efb --- /dev/null +++ b/iothread.c @@ -0,0 +1,377 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qom/object.h" +#include "qom/object_interfaces.h" +#include "qemu/module.h" +#include "block/aio.h" +#include "block/block.h" +#include "sysemu/iothread.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc.h" +#include "qemu/error-report.h" +#include "qemu/rcu.h" +#include "qemu/main-loop.h" + +typedef ObjectClass IOThreadClass; + +DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD, + TYPE_IOTHREAD) + +#ifdef CONFIG_POSIX +/* Benchmark results from 2016 on NVMe SSD drives show max polling times around + * 16-32 microseconds yield IOPS improvements for both iodepth=1 and iodepth=32 + * workloads. + */ +#define IOTHREAD_POLL_MAX_NS_DEFAULT 32768ULL +#else +#define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL +#endif + +static __thread IOThread *my_iothread; + +AioContext *qemu_get_current_aio_context(void) +{ + return my_iothread ? my_iothread->ctx : qemu_get_aio_context(); +} + +static void *iothread_run(void *opaque) +{ + IOThread *iothread = opaque; + + rcu_register_thread(); + /* + * g_main_context_push_thread_default() must be called before anything + * in this new thread uses glib. + */ + g_main_context_push_thread_default(iothread->worker_context); + my_iothread = iothread; + iothread->thread_id = qemu_get_thread_id(); + qemu_sem_post(&iothread->init_done_sem); + + while (iothread->running) { + /* + * Note: from functional-wise the g_main_loop_run() below can + * already cover the aio_poll() events, but we can't run the + * main loop unconditionally because explicit aio_poll() here + * is faster than g_main_loop_run() when we do not need the + * gcontext at all (e.g., pure block layer iothreads). In + * other words, when we want to run the gcontext with the + * iothread we need to pay some performance for functionality. + */ + aio_poll(iothread->ctx, true); + + /* + * We must check the running state again in case it was + * changed in previous aio_poll() + */ + if (iothread->running && qatomic_read(&iothread->run_gcontext)) { + g_main_loop_run(iothread->main_loop); + } + } + + g_main_context_pop_thread_default(iothread->worker_context); + rcu_unregister_thread(); + return NULL; +} + +/* Runs in iothread_run() thread */ +static void iothread_stop_bh(void *opaque) +{ + IOThread *iothread = opaque; + + iothread->running = false; /* stop iothread_run() */ + + if (iothread->main_loop) { + g_main_loop_quit(iothread->main_loop); + } +} + +void iothread_stop(IOThread *iothread) +{ + if (!iothread->ctx || iothread->stopping) { + return; + } + iothread->stopping = true; + aio_bh_schedule_oneshot(iothread->ctx, iothread_stop_bh, iothread); + qemu_thread_join(&iothread->thread); +} + +static void iothread_instance_init(Object *obj) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT; + iothread->thread_id = -1; + qemu_sem_init(&iothread->init_done_sem, 0); + /* By default, we don't run gcontext */ + qatomic_set(&iothread->run_gcontext, 0); +} + +static void iothread_instance_finalize(Object *obj) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread_stop(iothread); + + /* + * Before glib2 2.33.10, there is a glib2 bug that GSource context + * pointer may not be cleared even if the context has already been + * destroyed (while it should). Here let's free the AIO context + * earlier to bypass that glib bug. + * + * We can remove this comment after the minimum supported glib2 + * version boosts to 2.33.10. Before that, let's free the + * GSources first before destroying any GMainContext. + */ + if (iothread->ctx) { + aio_context_unref(iothread->ctx); + iothread->ctx = NULL; + } + if (iothread->worker_context) { + g_main_context_unref(iothread->worker_context); + iothread->worker_context = NULL; + g_main_loop_unref(iothread->main_loop); + iothread->main_loop = NULL; + } + qemu_sem_destroy(&iothread->init_done_sem); +} + +static void iothread_init_gcontext(IOThread *iothread) +{ + GSource *source; + + iothread->worker_context = g_main_context_new(); + source = aio_get_g_source(iothread_get_aio_context(iothread)); + g_source_attach(source, iothread->worker_context); + g_source_unref(source); + iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); +} + +static void iothread_complete(UserCreatable *obj, Error **errp) +{ + Error *local_error = NULL; + IOThread *iothread = IOTHREAD(obj); + char *thread_name; + + iothread->stopping = false; + iothread->running = true; + iothread->ctx = aio_context_new(errp); + if (!iothread->ctx) { + return; + } + + /* + * Init one GMainContext for the iothread unconditionally, even if + * it's not used + */ + iothread_init_gcontext(iothread); + + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, + iothread->poll_shrink, + &local_error); + if (local_error) { + error_propagate(errp, local_error); + aio_context_unref(iothread->ctx); + iothread->ctx = NULL; + return; + } + + /* This assumes we are called from a thread with useful CPU affinity for us + * to inherit. + */ + thread_name = g_strdup_printf("IO %s", + object_get_canonical_path_component(OBJECT(obj))); + qemu_thread_create(&iothread->thread, thread_name, iothread_run, + iothread, QEMU_THREAD_JOINABLE); + g_free(thread_name); + + /* Wait for initialization to complete */ + while (iothread->thread_id == -1) { + qemu_sem_wait(&iothread->init_done_sem); + } +} + +typedef struct { + const char *name; + ptrdiff_t offset; /* field's byte offset in IOThread struct */ +} PollParamInfo; + +static PollParamInfo poll_max_ns_info = { + "poll-max-ns", offsetof(IOThread, poll_max_ns), +}; +static PollParamInfo poll_grow_info = { + "poll-grow", offsetof(IOThread, poll_grow), +}; +static PollParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), +}; + +static void iothread_get_poll_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + PollParamInfo *info = opaque; + int64_t *field = (void *)iothread + info->offset; + + visit_type_int64(v, name, field, errp); +} + +static void iothread_set_poll_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + PollParamInfo *info = opaque; + int64_t *field = (void *)iothread + info->offset; + int64_t value; + + if (!visit_type_int64(v, name, &value, errp)) { + return; + } + + if (value < 0) { + error_setg(errp, "%s value must be in range [0, %" PRId64 "]", + info->name, INT64_MAX); + return; + } + + *field = value; + + if (iothread->ctx) { + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, + iothread->poll_shrink, + errp); + } +} + +static void iothread_class_init(ObjectClass *klass, void *class_data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = iothread_complete; + + object_class_property_add(klass, "poll-max-ns", "int", + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_max_ns_info); + object_class_property_add(klass, "poll-grow", "int", + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_grow_info); + object_class_property_add(klass, "poll-shrink", "int", + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_shrink_info); +} + +static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, + .parent = TYPE_OBJECT, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_init = iothread_instance_init, + .instance_finalize = iothread_instance_finalize, + .interfaces = (InterfaceInfo[]) { + {TYPE_USER_CREATABLE}, + {} + }, +}; + +static void iothread_register_types(void) +{ + type_register_static(&iothread_info); +} + +type_init(iothread_register_types) + +char *iothread_get_id(IOThread *iothread) +{ + return g_strdup(object_get_canonical_path_component(OBJECT(iothread))); +} + +AioContext *iothread_get_aio_context(IOThread *iothread) +{ + return iothread->ctx; +} + +static int query_one_iothread(Object *object, void *opaque) +{ + IOThreadInfoList ***prev = opaque; + IOThreadInfoList *elem; + IOThreadInfo *info; + IOThread *iothread; + + iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); + if (!iothread) { + return 0; + } + + info = g_new0(IOThreadInfo, 1); + info->id = iothread_get_id(iothread); + info->thread_id = iothread->thread_id; + info->poll_max_ns = iothread->poll_max_ns; + info->poll_grow = iothread->poll_grow; + info->poll_shrink = iothread->poll_shrink; + + elem = g_new0(IOThreadInfoList, 1); + elem->value = info; + elem->next = NULL; + + **prev = elem; + *prev = &elem->next; + return 0; +} + +IOThreadInfoList *qmp_query_iothreads(Error **errp) +{ + IOThreadInfoList *head = NULL; + IOThreadInfoList **prev = &head; + Object *container = object_get_objects_root(); + + object_child_foreach(container, query_one_iothread, &prev); + return head; +} + +GMainContext *iothread_get_g_main_context(IOThread *iothread) +{ + qatomic_set(&iothread->run_gcontext, 1); + aio_notify(iothread->ctx); + return iothread->worker_context; +} + +IOThread *iothread_create(const char *id, Error **errp) +{ + Object *obj; + + obj = object_new_with_props(TYPE_IOTHREAD, + object_get_internal_root(), + id, errp, NULL); + + return IOTHREAD(obj); +} + +void iothread_destroy(IOThread *iothread) +{ + object_unparent(OBJECT(iothread)); +} + +/* Lookup IOThread by its id. Only finds user-created objects, not internal + * iothread_create() objects. */ +IOThread *iothread_by_id(const char *id) +{ + return IOTHREAD(object_resolve_path_type(id, TYPE_IOTHREAD, NULL)); +} diff --git a/job-qmp.c b/job-qmp.c new file mode 100644 index 000000000..645601b2c --- /dev/null +++ b/job-qmp.c @@ -0,0 +1,192 @@ +/* + * QMP interface for background jobs + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012, 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/job.h" +#include "qapi/qapi-commands-job.h" +#include "qapi/error.h" +#include "trace/trace-root.h" + +/* Get a job using its ID and acquire its AioContext */ +static Job *find_job(const char *id, AioContext **aio_context, Error **errp) +{ + Job *job; + + *aio_context = NULL; + + job = job_get(id); + if (!job) { + error_setg(errp, "Job not found"); + return NULL; + } + + *aio_context = job->aio_context; + aio_context_acquire(*aio_context); + + return job; +} + +void qmp_job_cancel(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_cancel(job); + job_user_cancel(job, true, errp); + aio_context_release(aio_context); +} + +void qmp_job_pause(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_pause(job); + job_user_pause(job, errp); + aio_context_release(aio_context); +} + +void qmp_job_resume(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_resume(job); + job_user_resume(job, errp); + aio_context_release(aio_context); +} + +void qmp_job_complete(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_complete(job); + job_complete(job, errp); + aio_context_release(aio_context); +} + +void qmp_job_finalize(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_finalize(job); + job_ref(job); + job_finalize(job, errp); + + /* + * Job's context might have changed via job_finalize (and job_txn_apply + * automatically acquires the new one), so make sure we release the correct + * one. + */ + aio_context = job->aio_context; + job_unref(job); + aio_context_release(aio_context); +} + +void qmp_job_dismiss(const char *id, Error **errp) +{ + AioContext *aio_context; + Job *job = find_job(id, &aio_context, errp); + + if (!job) { + return; + } + + trace_qmp_job_dismiss(job); + job_dismiss(&job, errp); + aio_context_release(aio_context); +} + +static JobInfo *job_query_single(Job *job, Error **errp) +{ + JobInfo *info; + + assert(!job_is_internal(job)); + + info = g_new(JobInfo, 1); + *info = (JobInfo) { + .id = g_strdup(job->id), + .type = job_type(job), + .status = job->status, + .current_progress = job->progress.current, + .total_progress = job->progress.total, + .has_error = !!job->err, + .error = job->err ? \ + g_strdup(error_get_pretty(job->err)) : NULL, + }; + + return info; +} + +JobInfoList *qmp_query_jobs(Error **errp) +{ + JobInfoList *head = NULL, **p_next = &head; + Job *job; + + for (job = job_next(NULL); job; job = job_next(job)) { + JobInfoList *elem; + AioContext *aio_context; + + if (job_is_internal(job)) { + continue; + } + elem = g_new0(JobInfoList, 1); + aio_context = job->aio_context; + aio_context_acquire(aio_context); + elem->value = job_query_single(job, errp); + aio_context_release(aio_context); + if (!elem->value) { + g_free(elem); + qapi_free_JobInfoList(head); + return NULL; + } + *p_next = elem; + p_next = &elem->next; + } + + return head; +} diff --git a/job.c b/job.c new file mode 100644 index 000000000..8fecf3896 --- /dev/null +++ b/job.c @@ -0,0 +1,1019 @@ +/* + * Background jobs (long-running operations) + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012, 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/job.h" +#include "qemu/id.h" +#include "qemu/main-loop.h" +#include "block/aio-wait.h" +#include "trace/trace-root.h" +#include "qapi/qapi-events-job.h" + +static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs); + +/* Job State Transition Table */ +bool JobSTT[JOB_STATUS__MAX][JOB_STATUS__MAX] = { + /* U, C, R, P, Y, S, W, D, X, E, N */ + /* U: */ [JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + /* C: */ [JOB_STATUS_CREATED] = {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1}, + /* R: */ [JOB_STATUS_RUNNING] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0}, + /* P: */ [JOB_STATUS_PAUSED] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, + /* Y: */ [JOB_STATUS_READY] = {0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0}, + /* S: */ [JOB_STATUS_STANDBY] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + /* W: */ [JOB_STATUS_WAITING] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, + /* D: */ [JOB_STATUS_PENDING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0}, + /* X: */ [JOB_STATUS_ABORTING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0}, + /* E: */ [JOB_STATUS_CONCLUDED] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, + /* N: */ [JOB_STATUS_NULL] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +}; + +bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = { + /* U, C, R, P, Y, S, W, D, X, E, N */ + [JOB_VERB_CANCEL] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}, + [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, + [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, + [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, + [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, + [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, +}; + +/* Transactional group of jobs */ +struct JobTxn { + + /* Is this txn being cancelled? */ + bool aborting; + + /* List of jobs */ + QLIST_HEAD(, Job) jobs; + + /* Reference count */ + int refcnt; +}; + +/* Right now, this mutex is only needed to synchronize accesses to job->busy + * and job->sleep_timer, such as concurrent calls to job_do_yield and + * job_enter. */ +static QemuMutex job_mutex; + +static void job_lock(void) +{ + qemu_mutex_lock(&job_mutex); +} + +static void job_unlock(void) +{ + qemu_mutex_unlock(&job_mutex); +} + +static void __attribute__((__constructor__)) job_init(void) +{ + qemu_mutex_init(&job_mutex); +} + +JobTxn *job_txn_new(void) +{ + JobTxn *txn = g_new0(JobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void job_txn_ref(JobTxn *txn) +{ + txn->refcnt++; +} + +void job_txn_unref(JobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void job_txn_add_job(JobTxn *txn, Job *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + job_txn_ref(txn); +} + +static void job_txn_del_job(Job *job) +{ + if (job->txn) { + QLIST_REMOVE(job, txn_list); + job_txn_unref(job->txn); + job->txn = NULL; + } +} + +static int job_txn_apply(Job *job, int fn(Job *)) +{ + AioContext *inner_ctx; + Job *other_job, *next; + JobTxn *txn = job->txn; + int rc = 0; + + /* + * Similar to job_completed_txn_abort, we take each job's lock before + * applying fn, but since we assume that outer_ctx is held by the caller, + * we need to release it here to avoid holding the lock twice - which would + * break AIO_WAIT_WHILE from within fn. + */ + job_ref(job); + aio_context_release(job->aio_context); + + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + inner_ctx = other_job->aio_context; + aio_context_acquire(inner_ctx); + rc = fn(other_job); + aio_context_release(inner_ctx); + if (rc) { + break; + } + } + + /* + * Note that job->aio_context might have been changed by calling fn, so we + * can't use a local variable to cache it. + */ + aio_context_acquire(job->aio_context); + job_unref(job); + return rc; +} + +bool job_is_internal(Job *job) +{ + return (job->id == NULL); +} + +static void job_state_transition(Job *job, JobStatus s1) +{ + JobStatus s0 = job->status; + assert(s1 >= 0 && s1 < JOB_STATUS__MAX); + trace_job_state_transition(job, job->ret, + JobSTT[s0][s1] ? "allowed" : "disallowed", + JobStatus_str(s0), JobStatus_str(s1)); + assert(JobSTT[s0][s1]); + job->status = s1; + + if (!job_is_internal(job) && s1 != s0) { + qapi_event_send_job_status_change(job->id, job->status); + } +} + +int job_apply_verb(Job *job, JobVerb verb, Error **errp) +{ + JobStatus s0 = job->status; + assert(verb >= 0 && verb < JOB_VERB__MAX); + trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb), + JobVerbTable[verb][s0] ? "allowed" : "prohibited"); + if (JobVerbTable[verb][s0]) { + return 0; + } + error_setg(errp, "Job '%s' in state '%s' cannot accept command verb '%s'", + job->id, JobStatus_str(s0), JobVerb_str(verb)); + return -EPERM; +} + +JobType job_type(const Job *job) +{ + return job->driver->job_type; +} + +const char *job_type_str(const Job *job) +{ + return JobType_str(job_type(job)); +} + +bool job_is_cancelled(Job *job) +{ + return job->cancelled; +} + +bool job_is_ready(Job *job) +{ + switch (job->status) { + case JOB_STATUS_UNDEFINED: + case JOB_STATUS_CREATED: + case JOB_STATUS_RUNNING: + case JOB_STATUS_PAUSED: + case JOB_STATUS_WAITING: + case JOB_STATUS_PENDING: + case JOB_STATUS_ABORTING: + case JOB_STATUS_CONCLUDED: + case JOB_STATUS_NULL: + return false; + case JOB_STATUS_READY: + case JOB_STATUS_STANDBY: + return true; + default: + g_assert_not_reached(); + } + return false; +} + +bool job_is_completed(Job *job) +{ + switch (job->status) { + case JOB_STATUS_UNDEFINED: + case JOB_STATUS_CREATED: + case JOB_STATUS_RUNNING: + case JOB_STATUS_PAUSED: + case JOB_STATUS_READY: + case JOB_STATUS_STANDBY: + return false; + case JOB_STATUS_WAITING: + case JOB_STATUS_PENDING: + case JOB_STATUS_ABORTING: + case JOB_STATUS_CONCLUDED: + case JOB_STATUS_NULL: + return true; + default: + g_assert_not_reached(); + } + return false; +} + +static bool job_started(Job *job) +{ + return job->co; +} + +static bool job_should_pause(Job *job) +{ + return job->pause_count > 0; +} + +Job *job_next(Job *job) +{ + if (!job) { + return QLIST_FIRST(&jobs); + } + return QLIST_NEXT(job, job_list); +} + +Job *job_get(const char *id) +{ + Job *job; + + QLIST_FOREACH(job, &jobs, job_list) { + if (job->id && !strcmp(id, job->id)) { + return job; + } + } + + return NULL; +} + +static void job_sleep_timer_cb(void *opaque) +{ + Job *job = opaque; + + job_enter(job); +} + +void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, + AioContext *ctx, int flags, BlockCompletionFunc *cb, + void *opaque, Error **errp) +{ + Job *job; + + if (job_id) { + if (flags & JOB_INTERNAL) { + error_setg(errp, "Cannot specify job ID for internal job"); + return NULL; + } + if (!id_wellformed(job_id)) { + error_setg(errp, "Invalid job ID '%s'", job_id); + return NULL; + } + if (job_get(job_id)) { + error_setg(errp, "Job ID '%s' already in use", job_id); + return NULL; + } + } else if (!(flags & JOB_INTERNAL)) { + error_setg(errp, "An explicit job ID is required"); + return NULL; + } + + job = g_malloc0(driver->instance_size); + job->driver = driver; + job->id = g_strdup(job_id); + job->refcnt = 1; + job->aio_context = ctx; + job->busy = false; + job->paused = true; + job->pause_count = 1; + job->auto_finalize = !(flags & JOB_MANUAL_FINALIZE); + job->auto_dismiss = !(flags & JOB_MANUAL_DISMISS); + job->cb = cb; + job->opaque = opaque; + + notifier_list_init(&job->on_finalize_cancelled); + notifier_list_init(&job->on_finalize_completed); + notifier_list_init(&job->on_pending); + notifier_list_init(&job->on_ready); + + job_state_transition(job, JOB_STATUS_CREATED); + aio_timer_init(qemu_get_aio_context(), &job->sleep_timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + job_sleep_timer_cb, job); + + QLIST_INSERT_HEAD(&jobs, job, job_list); + + /* Single jobs are modeled as single-job transactions for sake of + * consolidating the job management logic */ + if (!txn) { + txn = job_txn_new(); + job_txn_add_job(txn, job); + job_txn_unref(txn); + } else { + job_txn_add_job(txn, job); + } + + return job; +} + +void job_ref(Job *job) +{ + ++job->refcnt; +} + +void job_unref(Job *job) +{ + if (--job->refcnt == 0) { + assert(job->status == JOB_STATUS_NULL); + assert(!timer_pending(&job->sleep_timer)); + assert(!job->txn); + + if (job->driver->free) { + job->driver->free(job); + } + + QLIST_REMOVE(job, job_list); + + error_free(job->err); + g_free(job->id); + g_free(job); + } +} + +void job_progress_update(Job *job, uint64_t done) +{ + progress_work_done(&job->progress, done); +} + +void job_progress_set_remaining(Job *job, uint64_t remaining) +{ + progress_set_remaining(&job->progress, remaining); +} + +void job_progress_increase_remaining(Job *job, uint64_t delta) +{ + progress_increase_remaining(&job->progress, delta); +} + +void job_event_cancelled(Job *job) +{ + notifier_list_notify(&job->on_finalize_cancelled, job); +} + +void job_event_completed(Job *job) +{ + notifier_list_notify(&job->on_finalize_completed, job); +} + +static void job_event_pending(Job *job) +{ + notifier_list_notify(&job->on_pending, job); +} + +static void job_event_ready(Job *job) +{ + notifier_list_notify(&job->on_ready, job); +} + +static void job_event_idle(Job *job) +{ + notifier_list_notify(&job->on_idle, job); +} + +void job_enter_cond(Job *job, bool(*fn)(Job *job)) +{ + if (!job_started(job)) { + return; + } + if (job->deferred_to_main_loop) { + return; + } + + job_lock(); + if (job->busy) { + job_unlock(); + return; + } + + if (fn && !fn(job)) { + job_unlock(); + return; + } + + assert(!job->deferred_to_main_loop); + timer_del(&job->sleep_timer); + job->busy = true; + job_unlock(); + aio_co_enter(job->aio_context, job->co); +} + +void job_enter(Job *job) +{ + job_enter_cond(job, NULL); +} + +/* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds. + * Reentering the job coroutine with job_enter() before the timer has expired + * is allowed and cancels the timer. + * + * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be + * called explicitly. */ +static void coroutine_fn job_do_yield(Job *job, uint64_t ns) +{ + job_lock(); + if (ns != -1) { + timer_mod(&job->sleep_timer, ns); + } + job->busy = false; + job_event_idle(job); + job_unlock(); + qemu_coroutine_yield(); + + /* Set by job_enter_cond() before re-entering the coroutine. */ + assert(job->busy); +} + +void coroutine_fn job_pause_point(Job *job) +{ + assert(job && job_started(job)); + + if (!job_should_pause(job)) { + return; + } + if (job_is_cancelled(job)) { + return; + } + + if (job->driver->pause) { + job->driver->pause(job); + } + + if (job_should_pause(job) && !job_is_cancelled(job)) { + JobStatus status = job->status; + job_state_transition(job, status == JOB_STATUS_READY + ? JOB_STATUS_STANDBY + : JOB_STATUS_PAUSED); + job->paused = true; + job_do_yield(job, -1); + job->paused = false; + job_state_transition(job, status); + } + + if (job->driver->resume) { + job->driver->resume(job); + } +} + +void job_yield(Job *job) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (job_is_cancelled(job)) { + return; + } + + if (!job_should_pause(job)) { + job_do_yield(job, -1); + } + + job_pause_point(job); +} + +void coroutine_fn job_sleep_ns(Job *job, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (job_is_cancelled(job)) { + return; + } + + if (!job_should_pause(job)) { + job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); + } + + job_pause_point(job); +} + +/* Assumes the block_job_mutex is held */ +static bool job_timer_not_pending(Job *job) +{ + return !timer_pending(&job->sleep_timer); +} + +void job_pause(Job *job) +{ + job->pause_count++; +} + +void job_resume(Job *job) +{ + assert(job->pause_count > 0); + job->pause_count--; + if (job->pause_count) { + return; + } + + /* kick only if no timer is pending */ + job_enter_cond(job, job_timer_not_pending); +} + +void job_user_pause(Job *job, Error **errp) +{ + if (job_apply_verb(job, JOB_VERB_PAUSE, errp)) { + return; + } + if (job->user_paused) { + error_setg(errp, "Job is already paused"); + return; + } + job->user_paused = true; + job_pause(job); +} + +bool job_user_paused(Job *job) +{ + return job->user_paused; +} + +void job_user_resume(Job *job, Error **errp) +{ + assert(job); + if (!job->user_paused || job->pause_count <= 0) { + error_setg(errp, "Can't resume a job that was not paused"); + return; + } + if (job_apply_verb(job, JOB_VERB_RESUME, errp)) { + return; + } + if (job->driver->user_resume) { + job->driver->user_resume(job); + } + job->user_paused = false; + job_resume(job); +} + +static void job_do_dismiss(Job *job) +{ + assert(job); + job->busy = false; + job->paused = false; + job->deferred_to_main_loop = true; + + job_txn_del_job(job); + + job_state_transition(job, JOB_STATUS_NULL); + job_unref(job); +} + +void job_dismiss(Job **jobptr, Error **errp) +{ + Job *job = *jobptr; + /* similarly to _complete, this is QMP-interface only. */ + assert(job->id); + if (job_apply_verb(job, JOB_VERB_DISMISS, errp)) { + return; + } + + job_do_dismiss(job); + *jobptr = NULL; +} + +void job_early_fail(Job *job) +{ + assert(job->status == JOB_STATUS_CREATED); + job_do_dismiss(job); +} + +static void job_conclude(Job *job) +{ + job_state_transition(job, JOB_STATUS_CONCLUDED); + if (job->auto_dismiss || !job_started(job)) { + job_do_dismiss(job); + } +} + +static void job_update_rc(Job *job) +{ + if (!job->ret && job_is_cancelled(job)) { + job->ret = -ECANCELED; + } + if (job->ret) { + if (!job->err) { + error_setg(&job->err, "%s", strerror(-job->ret)); + } + job_state_transition(job, JOB_STATUS_ABORTING); + } +} + +static void job_commit(Job *job) +{ + assert(!job->ret); + if (job->driver->commit) { + job->driver->commit(job); + } +} + +static void job_abort(Job *job) +{ + assert(job->ret); + if (job->driver->abort) { + job->driver->abort(job); + } +} + +static void job_clean(Job *job) +{ + if (job->driver->clean) { + job->driver->clean(job); + } +} + +static int job_finalize_single(Job *job) +{ + assert(job_is_completed(job)); + + /* Ensure abort is called for late-transactional failures */ + job_update_rc(job); + + if (!job->ret) { + job_commit(job); + } else { + job_abort(job); + } + job_clean(job); + + if (job->cb) { + job->cb(job->opaque, job->ret); + } + + /* Emit events only if we actually started */ + if (job_started(job)) { + if (job_is_cancelled(job)) { + job_event_cancelled(job); + } else { + job_event_completed(job); + } + } + + job_txn_del_job(job); + job_conclude(job); + return 0; +} + +static void job_cancel_async(Job *job, bool force) +{ + if (job->user_paused) { + /* Do not call job_enter here, the caller will handle it. */ + if (job->driver->user_resume) { + job->driver->user_resume(job); + } + job->user_paused = false; + assert(job->pause_count > 0); + job->pause_count--; + } + job->cancelled = true; + /* To prevent 'force == false' overriding a previous 'force == true' */ + job->force_cancel |= force; +} + +static void job_completed_txn_abort(Job *job) +{ + AioContext *outer_ctx = job->aio_context; + AioContext *ctx; + JobTxn *txn = job->txn; + Job *other_job; + + if (txn->aborting) { + /* + * We are cancelled by another job, which will handle everything. + */ + return; + } + txn->aborting = true; + job_txn_ref(txn); + + /* We can only hold the single job's AioContext lock while calling + * job_finalize_single() because the finalization callbacks can involve + * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ + aio_context_release(outer_ctx); + + /* Other jobs are effectively cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (other_job != job) { + ctx = other_job->aio_context; + aio_context_acquire(ctx); + job_cancel_async(other_job, false); + aio_context_release(ctx); + } + } + while (!QLIST_EMPTY(&txn->jobs)) { + other_job = QLIST_FIRST(&txn->jobs); + ctx = other_job->aio_context; + aio_context_acquire(ctx); + if (!job_is_completed(other_job)) { + assert(job_is_cancelled(other_job)); + job_finish_sync(other_job, NULL, NULL); + } + job_finalize_single(other_job); + aio_context_release(ctx); + } + + aio_context_acquire(outer_ctx); + + job_txn_unref(txn); +} + +static int job_prepare(Job *job) +{ + if (job->ret == 0 && job->driver->prepare) { + job->ret = job->driver->prepare(job); + job_update_rc(job); + } + return job->ret; +} + +static int job_needs_finalize(Job *job) +{ + return !job->auto_finalize; +} + +static void job_do_finalize(Job *job) +{ + int rc; + assert(job && job->txn); + + /* prepare the transaction to complete */ + rc = job_txn_apply(job, job_prepare); + if (rc) { + job_completed_txn_abort(job); + } else { + job_txn_apply(job, job_finalize_single); + } +} + +void job_finalize(Job *job, Error **errp) +{ + assert(job && job->id); + if (job_apply_verb(job, JOB_VERB_FINALIZE, errp)) { + return; + } + job_do_finalize(job); +} + +static int job_transition_to_pending(Job *job) +{ + job_state_transition(job, JOB_STATUS_PENDING); + if (!job->auto_finalize) { + job_event_pending(job); + } + return 0; +} + +void job_transition_to_ready(Job *job) +{ + job_state_transition(job, JOB_STATUS_READY); + job_event_ready(job); +} + +static void job_completed_txn_success(Job *job) +{ + JobTxn *txn = job->txn; + Job *other_job; + + job_state_transition(job, JOB_STATUS_WAITING); + + /* + * Successful completion, see if there are other running jobs in this + * txn. + */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (!job_is_completed(other_job)) { + return; + } + assert(other_job->ret == 0); + } + + job_txn_apply(job, job_transition_to_pending); + + /* If no jobs need manual finalization, automatically do so */ + if (job_txn_apply(job, job_needs_finalize) == 0) { + job_do_finalize(job); + } +} + +static void job_completed(Job *job) +{ + assert(job && job->txn && !job_is_completed(job)); + + job_update_rc(job); + trace_job_completed(job, job->ret); + if (job->ret) { + job_completed_txn_abort(job); + } else { + job_completed_txn_success(job); + } +} + +/** Useful only as a type shim for aio_bh_schedule_oneshot. */ +static void job_exit(void *opaque) +{ + Job *job = (Job *)opaque; + AioContext *ctx; + + job_ref(job); + aio_context_acquire(job->aio_context); + + /* This is a lie, we're not quiescent, but still doing the completion + * callbacks. However, completion callbacks tend to involve operations that + * drain block nodes, and if .drained_poll still returned true, we would + * deadlock. */ + job->busy = false; + job_event_idle(job); + + job_completed(job); + + /* + * Note that calling job_completed can move the job to a different + * aio_context, so we cannot cache from above. job_txn_apply takes care of + * acquiring the new lock, and we ref/unref to avoid job_completed freeing + * the job underneath us. + */ + ctx = job->aio_context; + job_unref(job); + aio_context_release(ctx); +} + +/** + * All jobs must allow a pause point before entering their job proper. This + * ensures that jobs can be paused prior to being started, then resumed later. + */ +static void coroutine_fn job_co_entry(void *opaque) +{ + Job *job = opaque; + + assert(job && job->driver && job->driver->run); + job_pause_point(job); + job->ret = job->driver->run(job, &job->err); + job->deferred_to_main_loop = true; + job->busy = true; + aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); +} + +void job_start(Job *job) +{ + assert(job && !job_started(job) && job->paused && + job->driver && job->driver->run); + job->co = qemu_coroutine_create(job_co_entry, job); + job->pause_count--; + job->busy = true; + job->paused = false; + job_state_transition(job, JOB_STATUS_RUNNING); + aio_co_enter(job->aio_context, job->co); +} + +void job_cancel(Job *job, bool force) +{ + if (job->status == JOB_STATUS_CONCLUDED) { + job_do_dismiss(job); + return; + } + job_cancel_async(job, force); + if (!job_started(job)) { + job_completed(job); + } else if (job->deferred_to_main_loop) { + job_completed_txn_abort(job); + } else { + job_enter(job); + } +} + +void job_user_cancel(Job *job, bool force, Error **errp) +{ + if (job_apply_verb(job, JOB_VERB_CANCEL, errp)) { + return; + } + job_cancel(job, force); +} + +/* A wrapper around job_cancel() taking an Error ** parameter so it may be + * used with job_finish_sync() without the need for (rather nasty) function + * pointer casts there. */ +static void job_cancel_err(Job *job, Error **errp) +{ + job_cancel(job, false); +} + +int job_cancel_sync(Job *job) +{ + return job_finish_sync(job, &job_cancel_err, NULL); +} + +void job_cancel_sync_all(void) +{ + Job *job; + AioContext *aio_context; + + while ((job = job_next(NULL))) { + aio_context = job->aio_context; + aio_context_acquire(aio_context); + job_cancel_sync(job); + aio_context_release(aio_context); + } +} + +int job_complete_sync(Job *job, Error **errp) +{ + return job_finish_sync(job, job_complete, errp); +} + +void job_complete(Job *job, Error **errp) +{ + /* Should not be reachable via external interface for internal jobs */ + assert(job->id); + if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { + return; + } + if (job->pause_count || job_is_cancelled(job) || !job->driver->complete) { + error_setg(errp, "The active block job '%s' cannot be completed", + job->id); + return; + } + + job->driver->complete(job, errp); +} + +int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) +{ + Error *local_err = NULL; + int ret; + + job_ref(job); + + if (finish) { + finish(job, &local_err); + } + if (local_err) { + error_propagate(errp, local_err); + job_unref(job); + return -EBUSY; + } + + AIO_WAIT_WHILE(job->aio_context, + (job_enter(job), !job_is_completed(job))); + + ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; + job_unref(job); + return ret; +} diff --git a/meson.build b/meson.build new file mode 100644 index 000000000..135b40a1e --- /dev/null +++ b/meson.build @@ -0,0 +1,2246 @@ +project('qemu', ['c'], meson_version: '>=0.55.0', + default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] + + (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), + version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) + +not_found = dependency('', required: false) +if meson.version().version_compare('>=0.56.0') + keyval = import('keyval') +else + keyval = import('unstable-keyval') +endif +ss = import('sourceset') +fs = import('fs') + +sh = find_program('sh') +cc = meson.get_compiler('c') +config_host = keyval.load(meson.current_build_dir() / 'config-host.mak') +enable_modules = 'CONFIG_MODULES' in config_host +enable_static = 'CONFIG_STATIC' in config_host + +# Temporary directory used for files created while +# configure runs. Since it is in the build directory +# we can safely blow away any previous version of it +# (and we need not jump through hoops to try to delete +# it when configure exits.) +tmpdir = meson.current_build_dir() / 'meson-private/temp' + +if get_option('qemu_suffix').startswith('/') + error('qemu_suffix cannot start with a /') +endif + +qemu_confdir = get_option('sysconfdir') / get_option('qemu_suffix') +qemu_datadir = get_option('datadir') / get_option('qemu_suffix') +qemu_docdir = get_option('docdir') / get_option('qemu_suffix') +qemu_moddir = get_option('libdir') / get_option('qemu_suffix') + +qemu_desktopdir = get_option('datadir') / 'applications' +qemu_icondir = get_option('datadir') / 'icons' + +config_host_data = configuration_data() +genh = [] + +target_dirs = config_host['TARGET_DIRS'].split() +have_user = false +have_system = false +foreach target : target_dirs + have_user = have_user or target.endswith('-user') + have_system = have_system or target.endswith('-softmmu') +endforeach +have_tools = 'CONFIG_TOOLS' in config_host +have_block = have_system or have_tools + +python = import('python').find_installation() + +supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux'] +supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64', + 'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64'] + +cpu = host_machine.cpu_family() +targetos = host_machine.system() + +if cpu in ['x86', 'x86_64'] + kvm_targets = ['i386-softmmu', 'x86_64-softmmu'] +elif cpu == 'aarch64' + kvm_targets = ['aarch64-softmmu'] +elif cpu == 's390x' + kvm_targets = ['s390x-softmmu'] +elif cpu in ['ppc', 'ppc64'] + kvm_targets = ['ppc-softmmu', 'ppc64-softmmu'] +elif cpu in ['mips', 'mips64'] + kvm_targets = ['mips-softmmu', 'mipsel-softmmu', 'mips64-softmmu', 'mips64el-softmmu'] +else + kvm_targets = [] +endif + +accelerator_targets = { 'CONFIG_KVM': kvm_targets } +if cpu in ['x86', 'x86_64', 'arm', 'aarch64'] + # i368 emulator provides xenpv machine type for multiple architectures + accelerator_targets += { + 'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'], + } +endif +if cpu in ['x86', 'x86_64'] + accelerator_targets += { + 'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'], + 'CONFIG_HVF': ['x86_64-softmmu'], + 'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'], + } +endif + +################## +# Compiler flags # +################## + +# Specify linker-script with add_project_link_arguments so that it is not placed +# within a linker --start-group/--end-group pair +if 'CONFIG_FUZZ' in config_host + add_project_link_arguments(['-Wl,-T,', + (meson.current_source_dir() / 'tests/qtest/fuzz/fork_fuzz.ld')], + native: false, language: ['c', 'cpp', 'objc']) +endif + +add_project_arguments(config_host['QEMU_CFLAGS'].split(), + native: false, language: ['c', 'objc']) +add_project_arguments(config_host['QEMU_CXXFLAGS'].split(), + native: false, language: 'cpp') +add_project_link_arguments(config_host['QEMU_LDFLAGS'].split(), + native: false, language: ['c', 'cpp', 'objc']) + +if targetos == 'linux' + add_project_arguments('-isystem', meson.current_source_dir() / 'linux-headers', + '-isystem', 'linux-headers', + language: ['c', 'cpp']) +endif + +if 'CONFIG_TCG_INTERPRETER' in config_host + tcg_arch = 'tci' +elif config_host['ARCH'] == 'sparc64' + tcg_arch = 'sparc' +elif config_host['ARCH'] == 's390x' + tcg_arch = 's390' +elif config_host['ARCH'] in ['x86_64', 'x32'] + tcg_arch = 'i386' +elif config_host['ARCH'] == 'ppc64' + tcg_arch = 'ppc' +elif config_host['ARCH'] in ['riscv32', 'riscv64'] + tcg_arch = 'riscv' +else + tcg_arch = config_host['ARCH'] +endif +add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch, + '-iquote', '.', + '-iquote', meson.current_source_dir(), + '-iquote', meson.current_source_dir() / 'accel/tcg', + '-iquote', meson.current_source_dir() / 'include', + '-iquote', meson.current_source_dir() / 'disas/libvixl', + language: ['c', 'cpp', 'objc']) + +link_language = meson.get_external_property('link_language', 'cpp') +if link_language == 'cpp' + add_languages('cpp', required: true, native: false) +endif +if host_machine.system() == 'darwin' + add_languages('objc', required: false, native: false) +endif + +sparse = find_program('cgcc', required: get_option('sparse')) +if sparse.found() + run_target('sparse', + command: [find_program('scripts/check_sparse.py'), + 'compile_commands.json', sparse.full_path(), '-Wbitwise', + '-Wno-transparent-union', '-Wno-old-initializer', + '-Wno-non-pointer-null']) +endif + +########################################### +# Target-specific checks and dependencies # +########################################### + +if targetos != 'linux' and get_option('mpath').enabled() + error('Multipath is supported only on Linux') +endif + +m = cc.find_library('m', required: false) +util = cc.find_library('util', required: false) +winmm = [] +socket = [] +version_res = [] +coref = [] +iokit = [] +emulator_link_args = [] +cocoa = not_found +hvf = not_found +if targetos == 'windows' + socket = cc.find_library('ws2_32') + winmm = cc.find_library('winmm') + + win = import('windows') + version_res = win.compile_resources('version.rc', + depend_files: files('pc-bios/qemu-nsis.ico'), + include_directories: include_directories('.')) +elif targetos == 'darwin' + coref = dependency('appleframeworks', modules: 'CoreFoundation') + iokit = dependency('appleframeworks', modules: 'IOKit') + cocoa = dependency('appleframeworks', modules: 'Cocoa', required: get_option('cocoa')) +elif targetos == 'sunos' + socket = [cc.find_library('socket'), + cc.find_library('nsl'), + cc.find_library('resolv')] +elif targetos == 'haiku' + socket = [cc.find_library('posix_error_mapper'), + cc.find_library('network'), + cc.find_library('bsd')] +elif targetos == 'openbsd' + if not get_option('tcg').disabled() and target_dirs.length() > 0 + # Disable OpenBSD W^X if available + emulator_link_args = cc.get_supported_link_arguments('-Wl,-z,wxneeded') + endif +endif + +accelerators = [] +if not get_option('kvm').disabled() and targetos == 'linux' + accelerators += 'CONFIG_KVM' +endif +if not get_option('xen').disabled() and 'CONFIG_XEN_BACKEND' in config_host + accelerators += 'CONFIG_XEN' + have_xen_pci_passthrough = not get_option('xen_pci_passthrough').disabled() and targetos == 'linux' +else + have_xen_pci_passthrough = false +endif +if not get_option('whpx').disabled() and targetos == 'windows' + if get_option('whpx').enabled() and host_machine.cpu() != 'x86_64' + error('WHPX requires 64-bit host') + elif cc.has_header('WinHvPlatform.h', required: get_option('whpx')) and \ + cc.has_header('WinHvEmulation.h', required: get_option('whpx')) + accelerators += 'CONFIG_WHPX' + endif +endif +if not get_option('hvf').disabled() + hvf = dependency('appleframeworks', modules: 'Hypervisor', + required: get_option('hvf')) + if hvf.found() + accelerators += 'CONFIG_HVF' + endif +endif +if not get_option('hax').disabled() + if get_option('hax').enabled() or targetos in ['windows', 'darwin', 'netbsd'] + accelerators += 'CONFIG_HAX' + endif +endif +if not get_option('tcg').disabled() + if cpu not in supported_cpus + if 'CONFIG_TCG_INTERPRETER' in config_host + warning('Unsupported CPU @0@, will use TCG with TCI (experimental)'.format(cpu)) + else + error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu)) + endif + endif + accelerators += 'CONFIG_TCG' + config_host += { 'CONFIG_TCG': 'y' } +endif + +if 'CONFIG_KVM' not in accelerators and get_option('kvm').enabled() + error('KVM not available on this platform') +endif +if 'CONFIG_HVF' not in accelerators and get_option('hvf').enabled() + error('HVF not available on this platform') +endif +if 'CONFIG_WHPX' not in accelerators and get_option('whpx').enabled() + error('WHPX not available on this platform') +endif +if not have_xen_pci_passthrough and get_option('xen_pci_passthrough').enabled() + if 'CONFIG_XEN' in accelerators + error('Xen PCI passthrough not available on this platform') + else + error('Xen PCI passthrough requested but Xen not enabled') + endif +endif +if not cocoa.found() and get_option('cocoa').enabled() + error('Cocoa not available on this platform') +endif + +################ +# Dependencies # +################ + +# The path to glib.h is added to all compilation commands. This was +# grandfathered in from the QEMU Makefiles. +add_project_arguments(config_host['GLIB_CFLAGS'].split(), + native: false, language: ['c', 'cpp', 'objc']) +glib = declare_dependency(link_args: config_host['GLIB_LIBS'].split()) + +# pass down whether Glib has the slice allocator +if config_host.has_key('HAVE_GLIB_WITH_SLICE_ALLOCATOR') + config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', true) +endif + +gio = not_found +if 'CONFIG_GIO' in config_host + gio = declare_dependency(compile_args: config_host['GIO_CFLAGS'].split(), + link_args: config_host['GIO_LIBS'].split()) +endif +lttng = not_found +if 'CONFIG_TRACE_UST' in config_host + lttng = declare_dependency(link_args: config_host['LTTNG_UST_LIBS'].split()) +endif +urcubp = not_found +if 'CONFIG_TRACE_UST' in config_host + urcubp = declare_dependency(link_args: config_host['URCU_BP_LIBS'].split()) +endif +gcrypt = not_found +if 'CONFIG_GCRYPT' in config_host + gcrypt = declare_dependency(compile_args: config_host['GCRYPT_CFLAGS'].split(), + link_args: config_host['GCRYPT_LIBS'].split()) +endif +nettle = not_found +if 'CONFIG_NETTLE' in config_host + nettle = declare_dependency(compile_args: config_host['NETTLE_CFLAGS'].split(), + link_args: config_host['NETTLE_LIBS'].split()) +endif +gnutls = not_found +if 'CONFIG_GNUTLS' in config_host + gnutls = declare_dependency(compile_args: config_host['GNUTLS_CFLAGS'].split(), + link_args: config_host['GNUTLS_LIBS'].split()) +endif +pixman = not_found +if have_system or have_tools + pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8', + method: 'pkg-config', static: enable_static) +endif +pam = not_found +if 'CONFIG_AUTH_PAM' in config_host + pam = cc.find_library('pam') +endif +libaio = cc.find_library('aio', required: false) +zlib = dependency('zlib', required: true, static: enable_static) +linux_io_uring = not_found +if 'CONFIG_LINUX_IO_URING' in config_host + linux_io_uring = declare_dependency(compile_args: config_host['LINUX_IO_URING_CFLAGS'].split(), + link_args: config_host['LINUX_IO_URING_LIBS'].split()) +endif +libxml2 = not_found +if 'CONFIG_LIBXML2' in config_host + libxml2 = declare_dependency(compile_args: config_host['LIBXML2_CFLAGS'].split(), + link_args: config_host['LIBXML2_LIBS'].split()) +endif +libnfs = not_found +if 'CONFIG_LIBNFS' in config_host + libnfs = declare_dependency(link_args: config_host['LIBNFS_LIBS'].split()) +endif +libattr = not_found +if 'CONFIG_ATTR' in config_host + libattr = declare_dependency(link_args: config_host['LIBATTR_LIBS'].split()) +endif +seccomp = not_found +if 'CONFIG_SECCOMP' in config_host + seccomp = declare_dependency(compile_args: config_host['SECCOMP_CFLAGS'].split(), + link_args: config_host['SECCOMP_LIBS'].split()) +endif +libcap_ng = not_found +if 'CONFIG_LIBCAP_NG' in config_host + libcap_ng = declare_dependency(link_args: config_host['LIBCAP_NG_LIBS'].split()) +endif +if get_option('xkbcommon').auto() and not have_system and not have_tools + xkbcommon = not_found +else + xkbcommon = dependency('xkbcommon', required: get_option('xkbcommon'), + method: 'pkg-config', static: enable_static) +endif +vde = not_found +if config_host.has_key('CONFIG_VDE') + vde = declare_dependency(link_args: config_host['VDE_LIBS'].split()) +endif +pulse = not_found +if 'CONFIG_LIBPULSE' in config_host + pulse = declare_dependency(compile_args: config_host['PULSE_CFLAGS'].split(), + link_args: config_host['PULSE_LIBS'].split()) +endif +alsa = not_found +if 'CONFIG_ALSA' in config_host + alsa = declare_dependency(compile_args: config_host['ALSA_CFLAGS'].split(), + link_args: config_host['ALSA_LIBS'].split()) +endif +jack = not_found +if 'CONFIG_LIBJACK' in config_host + jack = declare_dependency(link_args: config_host['JACK_LIBS'].split()) +endif +spice = not_found +spice_headers = not_found +if 'CONFIG_SPICE' in config_host + spice = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split(), + link_args: config_host['SPICE_LIBS'].split()) + spice_headers = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split()) +endif +rt = cc.find_library('rt', required: false) +libdl = not_found +if 'CONFIG_PLUGIN' in config_host + libdl = cc.find_library('dl', required: true) +endif +libiscsi = not_found +if 'CONFIG_LIBISCSI' in config_host + libiscsi = declare_dependency(compile_args: config_host['LIBISCSI_CFLAGS'].split(), + link_args: config_host['LIBISCSI_LIBS'].split()) +endif +zstd = not_found +if 'CONFIG_ZSTD' in config_host + zstd = declare_dependency(compile_args: config_host['ZSTD_CFLAGS'].split(), + link_args: config_host['ZSTD_LIBS'].split()) +endif +gbm = not_found +if 'CONFIG_GBM' in config_host + gbm = declare_dependency(compile_args: config_host['GBM_CFLAGS'].split(), + link_args: config_host['GBM_LIBS'].split()) +endif +virgl = not_found +if 'CONFIG_VIRGL' in config_host + virgl = declare_dependency(compile_args: config_host['VIRGL_CFLAGS'].split(), + link_args: config_host['VIRGL_LIBS'].split()) +endif +curl = not_found +if 'CONFIG_CURL' in config_host + curl = declare_dependency(compile_args: config_host['CURL_CFLAGS'].split(), + link_args: config_host['CURL_LIBS'].split()) +endif +libudev = not_found +if targetos == 'linux' and (have_system or have_tools) + libudev = dependency('libudev', + required: get_option('libudev'), + static: enable_static) +endif + +mpathlibs = [libudev] +mpathpersist = not_found +mpathpersist_new_api = false +if targetos == 'linux' and have_tools and not get_option('mpath').disabled() + mpath_test_source_new = ''' + #include + #include + unsigned mpath_mx_alloc_len = 1024; + int logsink; + static struct config *multipath_conf; + extern struct udev *udev; + extern struct config *get_multipath_config(void); + extern void put_multipath_config(struct config *conf); + struct udev *udev; + struct config *get_multipath_config(void) { return multipath_conf; } + void put_multipath_config(struct config *conf) { } + int main(void) { + udev = udev_new(); + multipath_conf = mpath_lib_init(); + return 0; + }''' + mpath_test_source_old = ''' + #include + #include + unsigned mpath_mx_alloc_len = 1024; + int logsink; + int main(void) { + struct udev *udev = udev_new(); + mpath_lib_init(udev); + return 0; + }''' + libmpathpersist = cc.find_library('mpathpersist', + required: get_option('mpath'), + static: enable_static) + if libmpathpersist.found() + mpathlibs += libmpathpersist + if enable_static + mpathlibs += cc.find_library('devmapper', + required: get_option('mpath'), + static: enable_static) + endif + mpathlibs += cc.find_library('multipath', + required: get_option('mpath'), + static: enable_static) + foreach lib: mpathlibs + if not lib.found() + mpathlibs = [] + break + endif + endforeach + if mpathlibs.length() == 0 + msg = 'Dependencies missing for libmpathpersist' + elif cc.links(mpath_test_source_new, dependencies: mpathlibs) + mpathpersist = declare_dependency(dependencies: mpathlibs) + mpathpersist_new_api = true + elif cc.links(mpath_test_source_old, dependencies: mpathlibs) + mpathpersist = declare_dependency(dependencies: mpathlibs) + else + msg = 'Cannot detect libmpathpersist API' + endif + if not mpathpersist.found() + if get_option('mpath').enabled() + error(msg) + else + warning(msg + ', disabling') + endif + endif + endif +endif + +iconv = not_found +curses = not_found +if have_system and not get_option('curses').disabled() + curses_test = ''' + #include + #include + #include + int main(void) { + wchar_t wch = L'w'; + setlocale(LC_ALL, ""); + resize_term(0, 0); + addwstr(L"wide chars\n"); + addnwstr(&wch, 1); + add_wch(WACS_DEGREE); + return 0; + }''' + + curses_dep_list = targetos == 'windows' ? ['ncurses', 'ncursesw'] : ['ncursesw'] + foreach curses_dep : curses_dep_list + if not curses.found() + curses = dependency(curses_dep, + required: false, + method: 'pkg-config', + static: enable_static) + endif + endforeach + msg = get_option('curses').enabled() ? 'curses library not found' : '' + if curses.found() + if cc.links(curses_test, dependencies: [curses]) + curses = declare_dependency(compile_args: '-DNCURSES_WIDECHAR', dependencies: [curses]) + else + msg = 'curses package not usable' + curses = not_found + endif + endif + if not curses.found() + curses_compile_args = ['-DNCURSES_WIDECHAR'] + has_curses_h = cc.has_header('curses.h', args: curses_compile_args) + if targetos != 'windows' and not has_curses_h + message('Trying with /usr/include/ncursesw') + curses_compile_args += ['-I/usr/include/ncursesw'] + has_curses_h = cc.has_header('curses.h', args: curses_compile_args) + endif + if has_curses_h + curses_libname_list = (targetos == 'windows' ? ['pdcurses'] : ['ncursesw', 'cursesw']) + foreach curses_libname : curses_libname_list + libcurses = cc.find_library(curses_libname, + required: false, + static: enable_static) + if libcurses.found() + if cc.links(curses_test, args: curses_compile_args, dependencies: libcurses) + curses = declare_dependency(compile_args: curses_compile_args, + dependencies: [libcurses]) + break + else + msg = 'curses library not usable' + endif + endif + endforeach + endif + endif + if not get_option('iconv').disabled() + foreach link_args : [ ['-liconv'], [] ] + # Programs will be linked with glib and this will bring in libiconv on FreeBSD. + # We need to use libiconv if available because mixing libiconv's headers with + # the system libc does not work. + # However, without adding glib to the dependencies -L/usr/local/lib will not be + # included in the command line and libiconv will not be found. + if cc.links(''' + #include + int main(void) { + iconv_t conv = iconv_open("WCHAR_T", "UCS-2"); + return conv != (iconv_t) -1; + }''', args: config_host['GLIB_CFLAGS'].split() + config_host['GLIB_LIBS'].split() + link_args) + iconv = declare_dependency(link_args: link_args, dependencies: glib) + break + endif + endforeach + endif + if curses.found() and not iconv.found() + if get_option('iconv').enabled() + error('iconv not available') + endif + msg = 'iconv required for curses UI but not available' + curses = not_found + endif + if not curses.found() and msg != '' + if get_option('curses').enabled() + error(msg) + else + warning(msg + ', disabling') + endif + endif +endif + +brlapi = not_found +if 'CONFIG_BRLAPI' in config_host + brlapi = declare_dependency(link_args: config_host['BRLAPI_LIBS'].split()) +endif + +sdl = not_found +if have_system + sdl = dependency('sdl2', required: get_option('sdl'), static: enable_static) + sdl_image = not_found +endif +if sdl.found() + # work around 2.0.8 bug + sdl = declare_dependency(compile_args: '-Wno-undef', + dependencies: sdl) + sdl_image = dependency('SDL2_image', required: get_option('sdl_image'), + method: 'pkg-config', static: enable_static) +else + if get_option('sdl_image').enabled() + error('sdl-image required, but SDL was @0@'.format( + get_option('sdl').disabled() ? 'disabled' : 'not found')) + endif + sdl_image = not_found +endif + +rbd = not_found +if 'CONFIG_RBD' in config_host + rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split()) +endif +glusterfs = not_found +if 'CONFIG_GLUSTERFS' in config_host + glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(), + link_args: config_host['GLUSTERFS_LIBS'].split()) +endif +libssh = not_found +if 'CONFIG_LIBSSH' in config_host + libssh = declare_dependency(compile_args: config_host['LIBSSH_CFLAGS'].split(), + link_args: config_host['LIBSSH_LIBS'].split()) +endif +libbzip2 = not_found +if 'CONFIG_BZIP2' in config_host + libbzip2 = declare_dependency(link_args: config_host['BZIP2_LIBS'].split()) +endif +liblzfse = not_found +if 'CONFIG_LZFSE' in config_host + liblzfse = declare_dependency(link_args: config_host['LZFSE_LIBS'].split()) +endif +oss = not_found +if 'CONFIG_AUDIO_OSS' in config_host + oss = declare_dependency(link_args: config_host['OSS_LIBS'].split()) +endif +dsound = not_found +if 'CONFIG_AUDIO_DSOUND' in config_host + dsound = declare_dependency(link_args: config_host['DSOUND_LIBS'].split()) +endif +coreaudio = not_found +if 'CONFIG_AUDIO_COREAUDIO' in config_host + coreaudio = declare_dependency(link_args: config_host['COREAUDIO_LIBS'].split()) +endif +opengl = not_found +if 'CONFIG_OPENGL' in config_host + opengl = declare_dependency(compile_args: config_host['OPENGL_CFLAGS'].split(), + link_args: config_host['OPENGL_LIBS'].split()) +endif +gtk = not_found +if 'CONFIG_GTK' in config_host + gtk = declare_dependency(compile_args: config_host['GTK_CFLAGS'].split(), + link_args: config_host['GTK_LIBS'].split()) +endif +vte = not_found +if 'CONFIG_VTE' in config_host + vte = declare_dependency(compile_args: config_host['VTE_CFLAGS'].split(), + link_args: config_host['VTE_LIBS'].split()) +endif +x11 = not_found +if 'CONFIG_X11' in config_host + x11 = declare_dependency(compile_args: config_host['X11_CFLAGS'].split(), + link_args: config_host['X11_LIBS'].split()) +endif +vnc = not_found +png = not_found +jpeg = not_found +sasl = not_found +if get_option('vnc').enabled() + vnc = declare_dependency() # dummy dependency + png = dependency('libpng', required: get_option('vnc_png'), + method: 'pkg-config', static: enable_static) + jpeg = dependency('libjpeg', required: get_option('vnc_jpeg'), + method: 'pkg-config', static: enable_static) + sasl = cc.find_library('sasl2', has_headers: ['sasl/sasl.h'], + required: get_option('vnc_sasl'), + static: enable_static) + if sasl.found() + sasl = declare_dependency(dependencies: sasl, + compile_args: '-DSTRUCT_IOVEC_DEFINED') + endif +endif +snappy = not_found +if 'CONFIG_SNAPPY' in config_host + snappy = declare_dependency(link_args: config_host['SNAPPY_LIBS'].split()) +endif +lzo = not_found +if 'CONFIG_LZO' in config_host + lzo = declare_dependency(link_args: config_host['LZO_LIBS'].split()) +endif +rdma = not_found +if 'CONFIG_RDMA' in config_host + rdma = declare_dependency(link_args: config_host['RDMA_LIBS'].split()) +endif +numa = not_found +if 'CONFIG_NUMA' in config_host + numa = declare_dependency(link_args: config_host['NUMA_LIBS'].split()) +endif +xen = not_found +if 'CONFIG_XEN_BACKEND' in config_host + xen = declare_dependency(compile_args: config_host['XEN_CFLAGS'].split(), + link_args: config_host['XEN_LIBS'].split()) +endif +cacard = not_found +if 'CONFIG_SMARTCARD' in config_host + cacard = declare_dependency(compile_args: config_host['SMARTCARD_CFLAGS'].split(), + link_args: config_host['SMARTCARD_LIBS'].split()) +endif +u2f = not_found +if have_system + u2f = dependency('u2f-emu', required: get_option('u2f'), + method: 'pkg-config', + static: enable_static) +endif +usbredir = not_found +if 'CONFIG_USB_REDIR' in config_host + usbredir = declare_dependency(compile_args: config_host['USB_REDIR_CFLAGS'].split(), + link_args: config_host['USB_REDIR_LIBS'].split()) +endif +libusb = not_found +if 'CONFIG_USB_LIBUSB' in config_host + libusb = declare_dependency(compile_args: config_host['LIBUSB_CFLAGS'].split(), + link_args: config_host['LIBUSB_LIBS'].split()) +endif +libpmem = not_found +if 'CONFIG_LIBPMEM' in config_host + libpmem = declare_dependency(compile_args: config_host['LIBPMEM_CFLAGS'].split(), + link_args: config_host['LIBPMEM_LIBS'].split()) +endif +libdaxctl = not_found +if 'CONFIG_LIBDAXCTL' in config_host + libdaxctl = declare_dependency(link_args: config_host['LIBDAXCTL_LIBS'].split()) +endif +tasn1 = not_found +if 'CONFIG_TASN1' in config_host + tasn1 = declare_dependency(compile_args: config_host['TASN1_CFLAGS'].split(), + link_args: config_host['TASN1_LIBS'].split()) +endif +keyutils = dependency('libkeyutils', required: false, + method: 'pkg-config', static: enable_static) + +has_gettid = cc.has_function('gettid') + +# Malloc tests + +malloc = [] +if get_option('malloc') == 'system' + has_malloc_trim = \ + not get_option('malloc_trim').disabled() and \ + cc.links('''#include + int main(void) { malloc_trim(0); return 0; }''') +else + has_malloc_trim = false + malloc = cc.find_library(get_option('malloc'), required: true) +endif +if not has_malloc_trim and get_option('malloc_trim').enabled() + if get_option('malloc') == 'system' + error('malloc_trim not available on this platform.') + else + error('malloc_trim not available with non-libc memory allocator') + endif +endif + +# Check whether the glibc provides statx() + +statx_test = ''' + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif + #include + int main(void) { + struct statx statxbuf; + statx(0, "", 0, STATX_BASIC_STATS, &statxbuf); + return 0; + }''' + +has_statx = cc.links(statx_test) + +have_vhost_user_blk_server = (targetos == 'linux' and + 'CONFIG_VHOST_USER' in config_host) + +if get_option('vhost_user_blk_server').enabled() + if targetos != 'linux' + error('vhost_user_blk_server requires linux') + elif 'CONFIG_VHOST_USER' not in config_host + error('vhost_user_blk_server requires vhost-user support') + endif +elif get_option('vhost_user_blk_server').disabled() or not have_system + have_vhost_user_blk_server = false +endif + +################# +# config-host.h # +################# + +config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir')) +config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix')) +config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir) +config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / qemu_datadir) +config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / qemu_desktopdir) +config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('qemu_firmwarepath')) +config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / get_option('libexecdir')) +config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / qemu_icondir) +config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / get_option('localedir')) +config_host_data.set_quoted('CONFIG_QEMU_LOCALSTATEDIR', get_option('prefix') / get_option('localstatedir')) +config_host_data.set_quoted('CONFIG_QEMU_MODDIR', get_option('prefix') / qemu_moddir) +config_host_data.set_quoted('CONFIG_SYSCONFDIR', get_option('prefix') / get_option('sysconfdir')) + +config_host_data.set('CONFIG_COCOA', cocoa.found()) +config_host_data.set('CONFIG_LIBUDEV', libudev.found()) +config_host_data.set('CONFIG_MPATH', mpathpersist.found()) +config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) +config_host_data.set('CONFIG_CURSES', curses.found()) +config_host_data.set('CONFIG_SDL', sdl.found()) +config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found()) +config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', have_vhost_user_blk_server) +config_host_data.set('CONFIG_VNC', vnc.found()) +config_host_data.set('CONFIG_VNC_JPEG', jpeg.found()) +config_host_data.set('CONFIG_VNC_PNG', png.found()) +config_host_data.set('CONFIG_VNC_SASL', sasl.found()) +config_host_data.set('CONFIG_XKBCOMMON', xkbcommon.found()) +config_host_data.set('CONFIG_KEYUTILS', keyutils.found()) +config_host_data.set('CONFIG_GETTID', has_gettid) +config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) +config_host_data.set('CONFIG_STATX', has_statx) +config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) +config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0]) +config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1]) +config_host_data.set('QEMU_VERSION_MICRO', meson.project_version().split('.')[2]) + +config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h')) + +ignored = ['CONFIG_QEMU_INTERP_PREFIX'] # actually per-target +arrays = ['CONFIG_AUDIO_DRIVERS', 'CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST'] +strings = ['HOST_DSOSUF', 'CONFIG_IASL'] +foreach k, v: config_host + if ignored.contains(k) + # do nothing + elif arrays.contains(k) + if v != '' + v = '"' + '", "'.join(v.split()) + '", ' + endif + config_host_data.set(k, v) + elif k == 'ARCH' + config_host_data.set('HOST_' + v.to_upper(), 1) + elif strings.contains(k) + if not k.startswith('CONFIG_') + k = 'CONFIG_' + k.to_upper() + endif + config_host_data.set_quoted(k, v) + elif k.startswith('CONFIG_') or k.startswith('HAVE_') or k.startswith('HOST_') + config_host_data.set(k, v == 'y' ? 1 : v) + endif +endforeach + +######################## +# Target configuration # +######################## + +minikconf = find_program('scripts/minikconf.py') +config_all = {} +config_all_devices = {} +config_all_disas = {} +config_devices_mak_list = [] +config_target = {} +config_devices_h = {} +config_target_h = {} +config_target_mak = {} + +disassemblers = { + 'alpha' : ['CONFIG_ALPHA_DIS'], + 'arm' : ['CONFIG_ARM_DIS'], + 'avr' : ['CONFIG_AVR_DIS'], + 'cris' : ['CONFIG_CRIS_DIS'], + 'hppa' : ['CONFIG_HPPA_DIS'], + 'i386' : ['CONFIG_I386_DIS'], + 'x86_64' : ['CONFIG_I386_DIS'], + 'x32' : ['CONFIG_I386_DIS'], + 'lm32' : ['CONFIG_LM32_DIS'], + 'm68k' : ['CONFIG_M68K_DIS'], + 'microblaze' : ['CONFIG_MICROBLAZE_DIS'], + 'mips' : ['CONFIG_MIPS_DIS'], + 'moxie' : ['CONFIG_MOXIE_DIS'], + 'nios2' : ['CONFIG_NIOS2_DIS'], + 'or1k' : ['CONFIG_OPENRISC_DIS'], + 'ppc' : ['CONFIG_PPC_DIS'], + 'riscv' : ['CONFIG_RISCV_DIS'], + 'rx' : ['CONFIG_RX_DIS'], + 's390' : ['CONFIG_S390_DIS'], + 'sh4' : ['CONFIG_SH4_DIS'], + 'sparc' : ['CONFIG_SPARC_DIS'], + 'xtensa' : ['CONFIG_XTENSA_DIS'], +} +if link_language == 'cpp' + disassemblers += { + 'aarch64' : [ 'CONFIG_ARM_A64_DIS'], + 'arm' : [ 'CONFIG_ARM_DIS', 'CONFIG_ARM_A64_DIS'], + 'mips' : [ 'CONFIG_MIPS_DIS', 'CONFIG_NANOMIPS_DIS'], + } +endif + +kconfig_external_symbols = [ + 'CONFIG_KVM', + 'CONFIG_XEN', + 'CONFIG_TPM', + 'CONFIG_SPICE', + 'CONFIG_IVSHMEM', + 'CONFIG_OPENGL', + 'CONFIG_X11', + 'CONFIG_VHOST_USER', + 'CONFIG_VHOST_VDPA', + 'CONFIG_VHOST_KERNEL', + 'CONFIG_VIRTFS', + 'CONFIG_LINUX', + 'CONFIG_PVRDMA', +] +ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ] + +default_targets = 'CONFIG_DEFAULT_TARGETS' in config_host +actual_target_dirs = [] +fdt_required = [] +foreach target : target_dirs + config_target = { 'TARGET_NAME': target.split('-')[0] } + if target.endswith('linux-user') + if targetos != 'linux' + if default_targets + continue + endif + error('Target @0@ is only available on a Linux host'.format(target)) + endif + config_target += { 'CONFIG_LINUX_USER': 'y' } + elif target.endswith('bsd-user') + if 'CONFIG_BSD' not in config_host + if default_targets + continue + endif + error('Target @0@ is only available on a BSD host'.format(target)) + endif + config_target += { 'CONFIG_BSD_USER': 'y' } + elif target.endswith('softmmu') + config_target += { 'CONFIG_SOFTMMU': 'y' } + endif + if target.endswith('-user') + config_target += { + 'CONFIG_USER_ONLY': 'y', + 'CONFIG_QEMU_INTERP_PREFIX': + config_host['CONFIG_QEMU_INTERP_PREFIX'].format(config_target['TARGET_NAME']) + } + endif + + have_accel = false + foreach sym: accelerators + if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, []) + config_target += { sym: 'y' } + config_all += { sym: 'y' } + if sym == 'CONFIG_XEN' and have_xen_pci_passthrough + config_target += { 'CONFIG_XEN_PCI_PASSTHROUGH': 'y' } + endif + have_accel = true + endif + endforeach + if not have_accel + if default_targets + continue + endif + error('No accelerator available for target @0@'.format(target)) + endif + + actual_target_dirs += target + config_target += keyval.load('default-configs/targets' / target + '.mak') + config_target += { 'TARGET_' + config_target['TARGET_ARCH'].to_upper(): 'y' } + + if 'TARGET_NEED_FDT' in config_target + fdt_required += target + endif + + # Add default keys + if 'TARGET_BASE_ARCH' not in config_target + config_target += {'TARGET_BASE_ARCH': config_target['TARGET_ARCH']} + endif + if 'TARGET_ABI_DIR' not in config_target + config_target += {'TARGET_ABI_DIR': config_target['TARGET_ARCH']} + endif + + foreach k, v: disassemblers + if config_host['ARCH'].startswith(k) or config_target['TARGET_BASE_ARCH'].startswith(k) + foreach sym: v + config_target += { sym: 'y' } + config_all_disas += { sym: 'y' } + endforeach + endif + endforeach + + config_target_data = configuration_data() + foreach k, v: config_target + if not k.startswith('TARGET_') and not k.startswith('CONFIG_') + # do nothing + elif ignored.contains(k) + # do nothing + elif k == 'TARGET_BASE_ARCH' + # Note that TARGET_BASE_ARCH ends up in config-target.h but it is + # not used to select files from sourcesets. + config_target_data.set('TARGET_' + v.to_upper(), 1) + elif k == 'TARGET_NAME' or k == 'CONFIG_QEMU_INTERP_PREFIX' + config_target_data.set_quoted(k, v) + elif v == 'y' + config_target_data.set(k, 1) + else + config_target_data.set(k, v) + endif + endforeach + config_target_h += {target: configure_file(output: target + '-config-target.h', + configuration: config_target_data)} + + if target.endswith('-softmmu') + base_kconfig = [] + foreach sym : kconfig_external_symbols + if sym in config_target or sym in config_host + base_kconfig += '@0@=y'.format(sym) + endif + endforeach + + config_devices_mak = target + '-config-devices.mak' + config_devices_mak = configure_file( + input: ['default-configs/devices' / target + '.mak', 'Kconfig'], + output: config_devices_mak, + depfile: config_devices_mak + '.d', + capture: true, + command: [minikconf, config_host['CONFIG_MINIKCONF_MODE'], + config_devices_mak, '@DEPFILE@', '@INPUT@', + base_kconfig]) + + config_devices_data = configuration_data() + config_devices = keyval.load(config_devices_mak) + foreach k, v: config_devices + config_devices_data.set(k, 1) + endforeach + config_devices_mak_list += config_devices_mak + config_devices_h += {target: configure_file(output: target + '-config-devices.h', + configuration: config_devices_data)} + config_target += config_devices + config_all_devices += config_devices + endif + config_target_mak += {target: config_target} +endforeach +target_dirs = actual_target_dirs + +# This configuration is used to build files that are shared by +# multiple binaries, and then extracted out of the "common" +# static_library target. +# +# We do not use all_sources()/all_dependencies(), because it would +# build literally all source files, including devices only used by +# targets that are not built for this compilation. The CONFIG_ALL +# pseudo symbol replaces it. + +config_all += config_all_devices +config_all += config_host +config_all += config_all_disas +config_all += { + 'CONFIG_XEN': config_host.has_key('CONFIG_XEN_BACKEND'), + 'CONFIG_SOFTMMU': have_system, + 'CONFIG_USER_ONLY': have_user, + 'CONFIG_ALL': true, +} + +############## +# Submodules # +############## + +capstone = not_found +capstone_opt = get_option('capstone') +if capstone_opt in ['enabled', 'auto', 'system'] + have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile') + capstone = dependency('capstone', version: '>=4.0', + static: enable_static, method: 'pkg-config', + required: capstone_opt == 'system' or + capstone_opt == 'enabled' and not have_internal) + if capstone.found() + capstone_opt = 'system' + elif have_internal + capstone_opt = 'internal' + else + capstone_opt = 'disabled' + endif +endif +if capstone_opt == 'internal' + capstone_data = configuration_data() + capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1') + + capstone_files = files( + 'capstone/cs.c', + 'capstone/MCInst.c', + 'capstone/MCInstrDesc.c', + 'capstone/MCRegisterInfo.c', + 'capstone/SStream.c', + 'capstone/utils.c' + ) + + if 'CONFIG_ARM_DIS' in config_all_disas + capstone_data.set('CAPSTONE_HAS_ARM', '1') + capstone_files += files( + 'capstone/arch/ARM/ARMDisassembler.c', + 'capstone/arch/ARM/ARMInstPrinter.c', + 'capstone/arch/ARM/ARMMapping.c', + 'capstone/arch/ARM/ARMModule.c' + ) + endif + + # FIXME: This config entry currently depends on a c++ compiler. + # Which is needed for building libvixl, but not for capstone. + if 'CONFIG_ARM_A64_DIS' in config_all_disas + capstone_data.set('CAPSTONE_HAS_ARM64', '1') + capstone_files += files( + 'capstone/arch/AArch64/AArch64BaseInfo.c', + 'capstone/arch/AArch64/AArch64Disassembler.c', + 'capstone/arch/AArch64/AArch64InstPrinter.c', + 'capstone/arch/AArch64/AArch64Mapping.c', + 'capstone/arch/AArch64/AArch64Module.c' + ) + endif + + if 'CONFIG_PPC_DIS' in config_all_disas + capstone_data.set('CAPSTONE_HAS_POWERPC', '1') + capstone_files += files( + 'capstone/arch/PowerPC/PPCDisassembler.c', + 'capstone/arch/PowerPC/PPCInstPrinter.c', + 'capstone/arch/PowerPC/PPCMapping.c', + 'capstone/arch/PowerPC/PPCModule.c' + ) + endif + + if 'CONFIG_S390_DIS' in config_all_disas + capstone_data.set('CAPSTONE_HAS_SYSZ', '1') + capstone_files += files( + 'capstone/arch/SystemZ/SystemZDisassembler.c', + 'capstone/arch/SystemZ/SystemZInstPrinter.c', + 'capstone/arch/SystemZ/SystemZMapping.c', + 'capstone/arch/SystemZ/SystemZModule.c', + 'capstone/arch/SystemZ/SystemZMCTargetDesc.c' + ) + endif + + if 'CONFIG_I386_DIS' in config_all_disas + capstone_data.set('CAPSTONE_HAS_X86', 1) + capstone_files += files( + 'capstone/arch/X86/X86Disassembler.c', + 'capstone/arch/X86/X86DisassemblerDecoder.c', + 'capstone/arch/X86/X86ATTInstPrinter.c', + 'capstone/arch/X86/X86IntelInstPrinter.c', + 'capstone/arch/X86/X86InstPrinterCommon.c', + 'capstone/arch/X86/X86Mapping.c', + 'capstone/arch/X86/X86Module.c' + ) + endif + + configure_file(output: 'capstone-defs.h', configuration: capstone_data) + + capstone_cargs = [ + # FIXME: There does not seem to be a way to completely replace the c_args + # that come from add_project_arguments() -- we can only add to them. + # So: disable all warnings with a big hammer. + '-Wno-error', '-w', + + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. + '-include', 'capstone-defs.h' + ] + + libcapstone = static_library('capstone', + sources: capstone_files, + c_args: capstone_cargs, + include_directories: 'capstone/include') + capstone = declare_dependency(link_with: libcapstone, + include_directories: 'capstone/include/capstone') +endif + +slirp = not_found +slirp_opt = 'disabled' +if have_system + slirp_opt = get_option('slirp') + if slirp_opt in ['enabled', 'auto', 'system'] + have_internal = fs.exists(meson.current_source_dir() / 'slirp/meson.build') + slirp = dependency('slirp', static: enable_static, + method: 'pkg-config', + required: slirp_opt == 'system' or + slirp_opt == 'enabled' and not have_internal) + if slirp.found() + slirp_opt = 'system' + elif have_internal + slirp_opt = 'internal' + else + slirp_opt = 'disabled' + endif + endif + if slirp_opt == 'internal' + slirp_deps = [] + if targetos == 'windows' + slirp_deps = cc.find_library('iphlpapi') + endif + slirp_conf = configuration_data() + slirp_conf.set('SLIRP_MAJOR_VERSION', meson.project_version().split('.')[0]) + slirp_conf.set('SLIRP_MINOR_VERSION', meson.project_version().split('.')[1]) + slirp_conf.set('SLIRP_MICRO_VERSION', meson.project_version().split('.')[2]) + slirp_conf.set_quoted('SLIRP_VERSION_STRING', meson.project_version()) + slirp_cargs = ['-DG_LOG_DOMAIN="Slirp"'] + slirp_files = [ + 'slirp/src/arp_table.c', + 'slirp/src/bootp.c', + 'slirp/src/cksum.c', + 'slirp/src/dhcpv6.c', + 'slirp/src/dnssearch.c', + 'slirp/src/if.c', + 'slirp/src/ip6_icmp.c', + 'slirp/src/ip6_input.c', + 'slirp/src/ip6_output.c', + 'slirp/src/ip_icmp.c', + 'slirp/src/ip_input.c', + 'slirp/src/ip_output.c', + 'slirp/src/mbuf.c', + 'slirp/src/misc.c', + 'slirp/src/ncsi.c', + 'slirp/src/ndp_table.c', + 'slirp/src/sbuf.c', + 'slirp/src/slirp.c', + 'slirp/src/socket.c', + 'slirp/src/state.c', + 'slirp/src/stream.c', + 'slirp/src/tcp_input.c', + 'slirp/src/tcp_output.c', + 'slirp/src/tcp_subr.c', + 'slirp/src/tcp_timer.c', + 'slirp/src/tftp.c', + 'slirp/src/udp.c', + 'slirp/src/udp6.c', + 'slirp/src/util.c', + 'slirp/src/version.c', + 'slirp/src/vmstate.c', + ] + + configure_file( + input : 'slirp/src/libslirp-version.h.in', + output : 'libslirp-version.h', + configuration: slirp_conf) + + slirp_inc = include_directories('slirp', 'slirp/src') + libslirp = static_library('slirp', + sources: slirp_files, + c_args: slirp_cargs, + include_directories: slirp_inc) + slirp = declare_dependency(link_with: libslirp, + dependencies: slirp_deps, + include_directories: slirp_inc) + endif +endif + +fdt = not_found +fdt_opt = get_option('fdt') +if have_system + if fdt_opt in ['enabled', 'auto', 'system'] + have_internal = fs.exists(meson.current_source_dir() / 'dtc/libfdt/Makefile.libfdt') + fdt = cc.find_library('fdt', static: enable_static, + required: fdt_opt == 'system' or + fdt_opt == 'enabled' and not have_internal) + if fdt.found() and cc.links(''' + #include + #include + int main(void) { fdt_check_full(NULL, 0); return 0; }''', + dependencies: fdt) + fdt_opt = 'system' + elif have_internal + fdt_opt = 'internal' + else + fdt_opt = 'disabled' + endif + endif + if fdt_opt == 'internal' + fdt_files = files( + 'dtc/libfdt/fdt.c', + 'dtc/libfdt/fdt_ro.c', + 'dtc/libfdt/fdt_wip.c', + 'dtc/libfdt/fdt_sw.c', + 'dtc/libfdt/fdt_rw.c', + 'dtc/libfdt/fdt_strerror.c', + 'dtc/libfdt/fdt_empty_tree.c', + 'dtc/libfdt/fdt_addresses.c', + 'dtc/libfdt/fdt_overlay.c', + 'dtc/libfdt/fdt_check.c', + ) + + fdt_inc = include_directories('dtc/libfdt') + libfdt = static_library('fdt', + sources: fdt_files, + include_directories: fdt_inc) + fdt = declare_dependency(link_with: libfdt, + include_directories: fdt_inc) + endif +endif +if not fdt.found() and fdt_required.length() > 0 + error('fdt not available but required by targets ' + ', '.join(fdt_required)) +endif + +config_host_data.set('CONFIG_CAPSTONE', capstone.found()) +config_host_data.set('CONFIG_FDT', fdt.found()) +config_host_data.set('CONFIG_SLIRP', slirp.found()) + +##################### +# Generated sources # +##################### + +genh += configure_file(output: 'config-host.h', configuration: config_host_data) + +hxtool = find_program('scripts/hxtool') +shaderinclude = find_program('scripts/shaderinclude.pl') +qapi_gen = find_program('scripts/qapi-gen.py') +qapi_gen_depends = [ meson.source_root() / 'scripts/qapi/__init__.py', + meson.source_root() / 'scripts/qapi/commands.py', + meson.source_root() / 'scripts/qapi/common.py', + meson.source_root() / 'scripts/qapi/error.py', + meson.source_root() / 'scripts/qapi/events.py', + meson.source_root() / 'scripts/qapi/expr.py', + meson.source_root() / 'scripts/qapi/gen.py', + meson.source_root() / 'scripts/qapi/introspect.py', + meson.source_root() / 'scripts/qapi/parser.py', + meson.source_root() / 'scripts/qapi/schema.py', + meson.source_root() / 'scripts/qapi/source.py', + meson.source_root() / 'scripts/qapi/types.py', + meson.source_root() / 'scripts/qapi/visit.py', + meson.source_root() / 'scripts/qapi/common.py', + meson.source_root() / 'scripts/qapi-gen.py' +] + +tracetool = [ + python, files('scripts/tracetool.py'), + '--backend=' + config_host['TRACE_BACKENDS'] +] + +qemu_version_cmd = [find_program('scripts/qemu-version.sh'), + meson.current_source_dir(), + config_host['PKGVERSION'], meson.project_version()] +qemu_version = custom_target('qemu-version.h', + output: 'qemu-version.h', + command: qemu_version_cmd, + capture: true, + build_by_default: true, + build_always_stale: true) +genh += qemu_version + +hxdep = [] +hx_headers = [ + ['qemu-options.hx', 'qemu-options.def'], + ['qemu-img-cmds.hx', 'qemu-img-cmds.h'], +] +if have_system + hx_headers += [ + ['hmp-commands.hx', 'hmp-commands.h'], + ['hmp-commands-info.hx', 'hmp-commands-info.h'], + ] +endif +foreach d : hx_headers + hxdep += custom_target(d[1], + input: files(d[0]), + output: d[1], + capture: true, + build_by_default: true, # to be removed when added to a target + command: [hxtool, '-h', '@INPUT0@']) +endforeach +genh += hxdep + +################### +# Collect sources # +################### + +authz_ss = ss.source_set() +blockdev_ss = ss.source_set() +block_ss = ss.source_set() +bsd_user_ss = ss.source_set() +chardev_ss = ss.source_set() +common_ss = ss.source_set() +crypto_ss = ss.source_set() +io_ss = ss.source_set() +linux_user_ss = ss.source_set() +qmp_ss = ss.source_set() +qom_ss = ss.source_set() +softmmu_ss = ss.source_set() +specific_fuzz_ss = ss.source_set() +specific_ss = ss.source_set() +stub_ss = ss.source_set() +trace_ss = ss.source_set() +user_ss = ss.source_set() +util_ss = ss.source_set() + +modules = {} +hw_arch = {} +target_arch = {} +target_softmmu_arch = {} + +############### +# Trace files # +############### + +# TODO: add each directory to the subdirs from its own meson.build, once +# we have those +trace_events_subdirs = [ +# 'accel/kvm', +# 'accel/tcg', + 'crypto', + 'monitor', +] +if have_user + trace_events_subdirs += [ 'linux-user' ] +endif +if have_block + trace_events_subdirs += [ + 'authz', + 'block', + 'io', + 'nbd', + 'scsi', + ] +endif +if have_system + trace_events_subdirs += [ + 'audio', + 'backends', + 'backends/tpm', + 'chardev', + 'hw/9pfs', + 'hw/acpi', + 'hw/alpha', + 'hw/arm', + 'hw/audio', + 'hw/block', + 'hw/block/dataplane', + 'hw/char', + 'hw/display', + 'hw/dma', + 'hw/hppa', + 'hw/hyperv', + 'hw/i2c', + 'hw/i386', + 'hw/i386/xen', + 'hw/ide', + 'hw/input', + 'hw/intc', + 'hw/isa', + 'hw/mem', + 'hw/mips', + 'hw/misc', + 'hw/misc/macio', + 'hw/net', + 'hw/nvram', + 'hw/pci', + 'hw/pci-host', + 'hw/ppc', + 'hw/rdma', + 'hw/rdma/vmw', + 'hw/rtc', + 'hw/s390x', + 'hw/scsi', + 'hw/sd', + 'hw/sparc', + 'hw/sparc64', + 'hw/ssi', + 'hw/timer', + 'hw/tpm', + 'hw/usb', + 'hw/vfio', + 'hw/virtio', + 'hw/watchdog', + 'hw/xen', + 'hw/gpio', + 'migration', + 'net', + 'softmmu', + 'ui', + ] +endif +trace_events_subdirs += [ +# 'hw/core', + 'qapi', + 'qom', +# 'target/arm', +# 'target/hppa', +# 'target/i386', +# 'target/mips', +# 'target/ppc', +# 'target/riscv', +# 'target/s390x', +# 'target/sparc', + 'util', +] + +#subdir('contrib/libvhost-user') +subdir('qapi') +subdir('qobject') +subdir('stubs') +subdir('trace') +subdir('util') +subdir('qom') +subdir('authz') +subdir('crypto') +#subdir('ui') + + +if enable_modules + libmodulecommon = static_library('module-common', files('module-common.c') + genh, pic: true, c_args: '-DBUILD_DSO') + modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') +endif + +stub_ss = stub_ss.apply(config_all, strict: false) + +util_ss.add_all(trace_ss) +util_ss = util_ss.apply(config_all, strict: false) +libqemuutil = static_library('qemuutil', + sources: util_ss.sources() + stub_ss.sources() + genh, + dependencies: [util_ss.dependencies(), m, glib, socket, malloc]) +qemuutil = declare_dependency(link_with: libqemuutil, + sources: genh + version_res) + +decodetree = generator(find_program('scripts/decodetree.py'), + output: 'decode-@BASENAME@.c.inc', + arguments: ['@INPUT@', '@EXTRA_ARGS@', '-o', '@OUTPUT@']) + +#subdir('audio') +subdir('io') +#subdir('chardev') +#subdir('fsdev') +#subdir('libdecnumber') +#subdir('target') +#subdir('dump') + +block_ss.add(files( + 'block.c', + 'blockjob.c', + 'job.c', + 'qemu-io-cmds.c', +)) +#block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c')) + +subdir('nbd') +subdir('scsi') +subdir('block') + +blockdev_ss.add(files( + 'blockdev.c', + 'blockdev-nbd.c', + 'iothread.c', + 'job-qmp.c', +)) + +# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon, +# os-win32.c does not +blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c')) +#softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')]) + +#common_ss.add(files('cpus-common.c')) + +#subdir('softmmu') + +#common_ss.add(capstone) +#specific_ss.add(files('cpu.c', 'disas.c', 'gdbstub.c'), capstone) +#specific_ss.add(files('exec-vary.c')) +#specific_ss.add(when: 'CONFIG_TCG', if_true: files( +# 'fpu/softfloat.c', +# 'tcg/optimize.c', +# 'tcg/tcg-common.c', +# 'tcg/tcg-op-gvec.c', +# 'tcg/tcg-op-vec.c', +# 'tcg/tcg-op.c', +# 'tcg/tcg.c', +#)) +#specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('disas/tci.c', 'tcg/tci.c')) + +#subdir('backends') +#subdir('disas') +#subdir('migration') +subdir('monitor') +#subdir('net') +#subdir('replay') +#subdir('hw') +#subdir('accel') +#subdir('plugins') +#subdir('bsd-user') +#subdir('linux-user') + +#bsd_user_ss.add(files('gdbstub.c')) +#specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss) + +#linux_user_ss.add(files('gdbstub.c', 'thunk.c')) +#specific_ss.add_all(when: 'CONFIG_LINUX_USER', if_true: linux_user_ss) + +# needed for fuzzing binaries +#subdir('tests/qtest/libqos') +#subdir('tests/qtest/fuzz') + +######################## +# Library dependencies # +######################## + +block_mods = [] +softmmu_mods = [] +foreach d, list : modules + foreach m, module_ss : list + if enable_modules and targetos != 'windows' + module_ss = module_ss.apply(config_all, strict: false) + sl = static_library(d + '-' + m, [genh, module_ss.sources()], + dependencies: [modulecommon, module_ss.dependencies()], pic: true) + if d == 'block' + block_mods += sl + else + softmmu_mods += sl + endif + else + if d == 'block' + block_ss.add_all(module_ss) + else + softmmu_ss.add_all(module_ss) + endif + endif + endforeach +endforeach + +nm = find_program('nm') +undefsym = find_program('scripts/undefsym.py') +block_syms = custom_target('block.syms', output: 'block.syms', + input: [libqemuutil, block_mods], + capture: true, + command: [undefsym, nm, '@INPUT@']) +qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', + input: [libqemuutil, softmmu_mods], + capture: true, + command: [undefsym, nm, '@INPUT@']) + +qom_ss = qom_ss.apply(config_host, strict: false) +libqom = static_library('qom', qom_ss.sources() + genh, + dependencies: [qom_ss.dependencies()], + name_suffix: 'fa') + +qom = declare_dependency(link_whole: libqom) + +authz_ss = authz_ss.apply(config_host, strict: false) +libauthz = static_library('authz', authz_ss.sources() + genh, + dependencies: [authz_ss.dependencies()], + name_suffix: 'fa', + build_by_default: false) + +authz = declare_dependency(link_whole: libauthz, + dependencies: qom) + +crypto_ss = crypto_ss.apply(config_host, strict: false) +libcrypto = static_library('crypto', crypto_ss.sources() + genh, + dependencies: [crypto_ss.dependencies()], + name_suffix: 'fa', + build_by_default: false) + +crypto = declare_dependency(link_whole: libcrypto, + dependencies: [authz, qom]) + +io_ss = io_ss.apply(config_host, strict: false) +libio = static_library('io', io_ss.sources() + genh, + dependencies: [io_ss.dependencies()], + link_with: libqemuutil, + name_suffix: 'fa', + build_by_default: false) + +io = declare_dependency(link_whole: libio, dependencies: [crypto, qom]) + +#libmigration = static_library('migration', sources: migration_files + genh, +# name_suffix: 'fa', +# build_by_default: false) +#migration = declare_dependency(link_with: libmigration, +# dependencies: [zlib, qom, io]) +#softmmu_ss.add(migration) + +block_ss = block_ss.apply(config_host, strict: false) +libblock = static_library('block', block_ss.sources() + genh, + dependencies: block_ss.dependencies(), + link_depends: block_syms, + name_suffix: 'fa', + build_by_default: false) + +block = declare_dependency(link_whole: [libblock], + link_args: '@block.syms', + dependencies: [crypto, io]) + +blockdev_ss = blockdev_ss.apply(config_host, strict: false) +libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, + dependencies: blockdev_ss.dependencies(), + name_suffix: 'fa', + build_by_default: false) + +blockdev = declare_dependency(link_whole: [libblockdev], + dependencies: [block]) + +#qmp_ss = qmp_ss.apply(config_host, strict: false) +#libqmp = static_library('qmp', qmp_ss.sources() + genh, +# dependencies: qmp_ss.dependencies(), +# name_suffix: 'fa', +# build_by_default: false) + +#qmp = declare_dependency(link_whole: [libqmp]) + +#libchardev = static_library('chardev', chardev_ss.sources() + genh, +# name_suffix: 'fa', +# build_by_default: false) + +#chardev = declare_dependency(link_whole: libchardev) + +#libhwcore = static_library('hwcore', sources: hwcore_files + genh, +# name_suffix: 'fa', +# build_by_default: false) +#hwcore = declare_dependency(link_whole: libhwcore) +#common_ss.add(hwcore) + +########### +# Targets # +########### + +foreach m : block_mods + softmmu_mods + shared_module(m.name(), + name_prefix: '', + link_whole: m, + install: true, + install_dir: qemu_moddir) +endforeach + +#softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp) +#common_ss.add(qom, qemuutil) + +#common_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: [softmmu_ss]) +#common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss) + +#common_all = common_ss.apply(config_all, strict: false) +#common_all = static_library('common', +# build_by_default: false, +# sources: common_all.sources() + genh, +# dependencies: common_all.dependencies(), +# name_suffix: 'fa') + +feature_to_c = find_program('scripts/feature_to_c.sh') + +emulators = {} +foreach target : target_dirs + config_target = config_target_mak[target] + target_name = config_target['TARGET_NAME'] + arch = config_target['TARGET_BASE_ARCH'] + arch_srcs = [config_target_h[target]] + arch_deps = [] + c_args = ['-DNEED_CPU_H', + '-DCONFIG_TARGET="@0@-config-target.h"'.format(target), + '-DCONFIG_DEVICES="@0@-config-devices.h"'.format(target)] + link_args = emulator_link_args + + config_target += config_host + target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])] + if targetos == 'linux' + target_inc += include_directories('linux-headers', is_system: true) + endif + if target.endswith('-softmmu') + qemu_target_name = 'qemu-system-' + target_name + target_type='system' + t = target_softmmu_arch[arch].apply(config_target, strict: false) + arch_srcs += t.sources() + arch_deps += t.dependencies() + + hw_dir = target_name == 'sparc64' ? 'sparc64' : arch + hw = hw_arch[hw_dir].apply(config_target, strict: false) + arch_srcs += hw.sources() + arch_deps += hw.dependencies() + + arch_srcs += config_devices_h[target] + link_args += ['@block.syms', '@qemu.syms'] + else + abi = config_target['TARGET_ABI_DIR'] + target_type='user' + qemu_target_name = 'qemu-' + target_name + if 'CONFIG_LINUX_USER' in config_target + base_dir = 'linux-user' + target_inc += include_directories('linux-user/host/' / config_host['ARCH']) + else + base_dir = 'bsd-user' + endif + target_inc += include_directories( + base_dir, + base_dir / abi, + ) + if 'CONFIG_LINUX_USER' in config_target + dir = base_dir / abi + arch_srcs += files(dir / 'signal.c', dir / 'cpu_loop.c') + if config_target.has_key('TARGET_SYSTBL_ABI') + arch_srcs += \ + syscall_nr_generators[abi].process(base_dir / abi / config_target['TARGET_SYSTBL'], + extra_args : config_target['TARGET_SYSTBL_ABI']) + endif + endif + endif + + if 'TARGET_XML_FILES' in config_target + gdbstub_xml = custom_target(target + '-gdbstub-xml.c', + output: target + '-gdbstub-xml.c', + input: files(config_target['TARGET_XML_FILES'].split()), + command: [feature_to_c, '@INPUT@'], + capture: true) + arch_srcs += gdbstub_xml + endif + + t = target_arch[arch].apply(config_target, strict: false) + arch_srcs += t.sources() + arch_deps += t.dependencies() + + target_common = common_ss.apply(config_target, strict: false) + objects = common_all.extract_objects(target_common.sources()) + deps = target_common.dependencies() + + target_specific = specific_ss.apply(config_target, strict: false) + arch_srcs += target_specific.sources() + arch_deps += target_specific.dependencies() + + lib = static_library('qemu-' + target, + sources: arch_srcs + genh, + dependencies: arch_deps, + objects: objects, + include_directories: target_inc, + c_args: c_args, + build_by_default: false, + name_suffix: 'fa') + + if target.endswith('-softmmu') + execs = [{ + 'name': 'qemu-system-' + target_name, + 'gui': false, + 'sources': files('softmmu/main.c'), + 'dependencies': [] + }] + if targetos == 'windows' and (sdl.found() or gtk.found()) + execs += [{ + 'name': 'qemu-system-' + target_name + 'w', + 'gui': true, + 'sources': files('softmmu/main.c'), + 'dependencies': [] + }] + endif + if config_host.has_key('CONFIG_FUZZ') + specific_fuzz = specific_fuzz_ss.apply(config_target, strict: false) + execs += [{ + 'name': 'qemu-fuzz-' + target_name, + 'gui': false, + 'sources': specific_fuzz.sources(), + 'dependencies': specific_fuzz.dependencies(), + }] + endif + else + execs = [{ + 'name': 'qemu-' + target_name, + 'gui': false, + 'sources': [], + 'dependencies': [] + }] + endif + foreach exe: execs + emulators += {exe['name']: + executable(exe['name'], exe['sources'], + install: true, + c_args: c_args, + dependencies: arch_deps + deps + exe['dependencies'], + objects: lib.extract_all_objects(recursive: true), + link_language: link_language, + link_depends: [block_syms, qemu_syms] + exe.get('link_depends', []), + link_args: link_args, + gui_app: exe['gui']) + } + + if 'CONFIG_TRACE_SYSTEMTAP' in config_host + foreach stp: [ + {'ext': '.stp-build', 'fmt': 'stap', 'bin': meson.current_build_dir() / exe['name'], 'install': false}, + {'ext': '.stp', 'fmt': 'stap', 'bin': get_option('prefix') / get_option('bindir') / exe['name'], 'install': true}, + {'ext': '-simpletrace.stp', 'fmt': 'simpletrace-stap', 'bin': '', 'install': true}, + {'ext': '-log.stp', 'fmt': 'log-stap', 'bin': '', 'install': true}, + ] + custom_target(exe['name'] + stp['ext'], + input: trace_events_all, + output: exe['name'] + stp['ext'], + capture: true, + install: stp['install'], + install_dir: get_option('datadir') / 'systemtap/tapset', + command: [ + tracetool, '--group=all', '--format=' + stp['fmt'], + '--binary=' + stp['bin'], + '--target-name=' + target_name, + '--target-type=' + target_type, + '--probe-prefix=qemu.' + target_type + '.' + target_name, + '@INPUT@', + ]) + endforeach + endif + endforeach +endforeach + +# Other build targets + +if 'CONFIG_LINUX_USER' in config_target + executable('qemu-binfmt', files('linux-user/binfmt.c'), + install: true) +endif + +if 'CONFIG_PLUGIN' in config_host + install_headers('include/qemu/qemu-plugin.h') +endif + +if 'CONFIG_GUEST_AGENT' in config_host + subdir('qga') +endif + +# Don't build qemu-keymap if xkbcommon is not explicitly enabled +# when we don't build tools or system +if xkbcommon.found() + # used for the update-keymaps target, so include rules even if !have_tools + qemu_keymap = executable('qemu-keymap', files('qemu-keymap.c', 'ui/input-keymap.c') + genh, + dependencies: [qemuutil, xkbcommon], install: have_tools) +endif + +if have_tools + qemu_img = executable('qemu-img', [files('qemu-img.c'), hxdep], + dependencies: [authz, block, crypto, io, qom, qemuutil], install: true) + qemu_io = executable('qemu-io', files('qemu-io.c'), + dependencies: [block, qemuutil], install: true) + qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'), + dependencies: [blockdev, qemuutil], install: true) + +# subdir('storage-daemon') +# subdir('contrib/rdmacm-mux') +# subdir('contrib/elf2dmp') + +# executable('qemu-edid', files('qemu-edid.c', 'hw/display/edid-generate.c'), +# dependencies: qemuutil, +# install: true) + + if 'CONFIG_VHOST_USER' in config_host + subdir('contrib/vhost-user-blk') + subdir('contrib/vhost-user-gpu') + subdir('contrib/vhost-user-input') + subdir('contrib/vhost-user-scsi') + endif + +# if targetos == 'linux' +# executable('qemu-bridge-helper', files('qemu-bridge-helper.c'), +# dependencies: [qemuutil, libcap_ng], +# install: true, +# install_dir: get_option('libexecdir')) + +# executable('qemu-pr-helper', files('scsi/qemu-pr-helper.c', 'scsi/utils.c'), +# dependencies: [authz, crypto, io, qom, qemuutil, +# libcap_ng, mpathpersist], +# install: true) +# endif + +# if 'CONFIG_IVSHMEM' in config_host +# subdir('contrib/ivshmem-client') +# subdir('contrib/ivshmem-server') +# endif +endif + +subdir('scripts') +#subdir('tools') +#subdir('pc-bios') +#subdir('docs') +#subdir('tests') +#if 'CONFIG_GTK' in config_host +# subdir('po') +#endif + +if host_machine.system() == 'windows' + nsis_cmd = [ + find_program('scripts/nsis.py'), + '@OUTPUT@', + get_option('prefix'), + meson.current_source_dir(), + host_machine.cpu(), + '--', + '-DDISPLAYVERSION=' + meson.project_version(), + ] + if build_docs + nsis_cmd += '-DCONFIG_DOCUMENTATION=y' + endif + if 'CONFIG_GTK' in config_host + nsis_cmd += '-DCONFIG_GTK=y' + endif + + nsis = custom_target('nsis', + output: 'qemu-setup-' + meson.project_version() + '.exe', + input: files('qemu.nsi'), + build_always_stale: true, + command: nsis_cmd + ['@INPUT@']) + alias_target('installer', nsis) +endif + +######################### +# Configuration summary # +######################### + +summary_info = {} +summary_info += {'Install prefix': get_option('prefix')} +summary_info += {'BIOS directory': qemu_datadir} +summary_info += {'firmware path': get_option('qemu_firmwarepath')} +summary_info += {'binary directory': get_option('bindir')} +summary_info += {'library directory': get_option('libdir')} +summary_info += {'module directory': qemu_moddir} +summary_info += {'libexec directory': get_option('libexecdir')} +summary_info += {'include directory': get_option('includedir')} +summary_info += {'config directory': get_option('sysconfdir')} +if targetos != 'windows' + summary_info += {'local state directory': get_option('localstatedir')} + summary_info += {'Manual directory': get_option('mandir')} +else + summary_info += {'local state directory': 'queried at runtime'} +endif +summary_info += {'Doc directory': get_option('docdir')} +summary_info += {'Build directory': meson.current_build_dir()} +summary_info += {'Source path': meson.current_source_dir()} +summary_info += {'GIT binary': config_host['GIT']} +summary_info += {'GIT submodules': config_host['GIT_SUBMODULES']} +summary_info += {'C compiler': meson.get_compiler('c').cmd_array()[0]} +summary_info += {'Host C compiler': meson.get_compiler('c', native: true).cmd_array()[0]} +if link_language == 'cpp' + summary_info += {'C++ compiler': meson.get_compiler('cpp').cmd_array()[0]} +else + summary_info += {'C++ compiler': false} +endif +if targetos == 'darwin' + summary_info += {'Objective-C compiler': meson.get_compiler('objc').cmd_array()[0]} +endif +summary_info += {'ARFLAGS': config_host['ARFLAGS']} +summary_info += {'CFLAGS': ' '.join(get_option('c_args') + + ['-O' + get_option('optimization')] + + (get_option('debug') ? ['-g'] : []))} +if link_language == 'cpp' + summary_info += {'CXXFLAGS': ' '.join(get_option('cpp_args') + + ['-O' + get_option('optimization')] + + (get_option('debug') ? ['-g'] : []))} +endif +link_args = get_option(link_language + '_link_args') +if link_args.length() > 0 + summary_info += {'LDFLAGS': ' '.join(link_args)} +endif +summary_info += {'QEMU_CFLAGS': config_host['QEMU_CFLAGS']} +summary_info += {'QEMU_LDFLAGS': config_host['QEMU_LDFLAGS']} +summary_info += {'make': config_host['MAKE']} +summary_info += {'python': '@0@ (version: @1@)'.format(python.full_path(), python.language_version())} +#summary_info += {'sphinx-build': sphinx_build.found()} +summary_info += {'sphinx-build': false} +summary_info += {'genisoimage': config_host['GENISOIMAGE']} +# TODO: add back version +summary_info += {'slirp support': slirp_opt == 'disabled' ? false : slirp_opt} +if slirp_opt != 'disabled' + summary_info += {'smbd': config_host['CONFIG_SMBD_COMMAND']} +endif +summary_info += {'module support': config_host.has_key('CONFIG_MODULES')} +if config_host.has_key('CONFIG_MODULES') + summary_info += {'alternative module path': config_host.has_key('CONFIG_MODULE_UPGRADES')} +endif +summary_info += {'host CPU': cpu} +summary_info += {'host endianness': build_machine.endian()} +summary_info += {'target list': ' '.join(target_dirs)} +summary_info += {'gprof enabled': config_host.has_key('CONFIG_GPROF')} +summary_info += {'sparse enabled': sparse.found()} +summary_info += {'strip binaries': get_option('strip')} +summary_info += {'profiler': config_host.has_key('CONFIG_PROFILER')} +summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} +if targetos == 'darwin' + summary_info += {'Cocoa support': config_host.has_key('CONFIG_COCOA')} +endif +# TODO: add back version +summary_info += {'SDL support': sdl.found()} +summary_info += {'SDL image support': sdl_image.found()} +# TODO: add back version +summary_info += {'GTK support': config_host.has_key('CONFIG_GTK')} +summary_info += {'GTK GL support': config_host.has_key('CONFIG_GTK_GL')} +summary_info += {'pixman': pixman.found()} +# TODO: add back version +summary_info += {'VTE support': config_host.has_key('CONFIG_VTE')} +summary_info += {'TLS priority': config_host['CONFIG_TLS_PRIORITY']} +summary_info += {'GNUTLS support': config_host.has_key('CONFIG_GNUTLS')} +# TODO: add back version +summary_info += {'libgcrypt': config_host.has_key('CONFIG_GCRYPT')} +if config_host.has_key('CONFIG_GCRYPT') + summary_info += {' hmac': config_host.has_key('CONFIG_GCRYPT_HMAC')} + summary_info += {' XTS': not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')} +endif +# TODO: add back version +summary_info += {'nettle': config_host.has_key('CONFIG_NETTLE')} +if config_host.has_key('CONFIG_NETTLE') + summary_info += {' XTS': not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')} +endif +summary_info += {'libtasn1': config_host.has_key('CONFIG_TASN1')} +summary_info += {'PAM': config_host.has_key('CONFIG_AUTH_PAM')} +summary_info += {'iconv support': iconv.found()} +summary_info += {'curses support': curses.found()} +# TODO: add back version +summary_info += {'virgl support': config_host.has_key('CONFIG_VIRGL')} +summary_info += {'curl support': config_host.has_key('CONFIG_CURL')} +summary_info += {'mingw32 support': targetos == 'windows'} +summary_info += {'Audio drivers': config_host['CONFIG_AUDIO_DRIVERS']} +summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']} +summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']} +summary_info += {'VirtFS support': config_host.has_key('CONFIG_VIRTFS')} +#summary_info += {'build virtiofs daemon': have_virtiofsd} +summary_info += {'build virtiofs daemon': false} +summary_info += {'Multipath support': mpathpersist.found()} +summary_info += {'VNC support': vnc.found()} +if vnc.found() + summary_info += {'VNC SASL support': sasl.found()} + summary_info += {'VNC JPEG support': jpeg.found()} + summary_info += {'VNC PNG support': png.found()} +endif +summary_info += {'xen support': config_host.has_key('CONFIG_XEN_BACKEND')} +if config_host.has_key('CONFIG_XEN_BACKEND') + summary_info += {'xen ctrl version': config_host['CONFIG_XEN_CTRL_INTERFACE_VERSION']} +endif +summary_info += {'brlapi support': config_host.has_key('CONFIG_BRLAPI')} +#summary_info += {'Documentation': build_docs} +summary_info += {'Documentation': false} +summary_info += {'PIE': get_option('b_pie')} +summary_info += {'vde support': config_host.has_key('CONFIG_VDE')} +summary_info += {'netmap support': config_host.has_key('CONFIG_NETMAP')} +summary_info += {'Linux AIO support': config_host.has_key('CONFIG_LINUX_AIO')} +summary_info += {'Linux io_uring support': config_host.has_key('CONFIG_LINUX_IO_URING')} +summary_info += {'ATTR/XATTR support': config_host.has_key('CONFIG_ATTR')} +summary_info += {'Install blobs': get_option('install_blobs')} +summary_info += {'KVM support': config_all.has_key('CONFIG_KVM')} +summary_info += {'HAX support': config_all.has_key('CONFIG_HAX')} +summary_info += {'HVF support': config_all.has_key('CONFIG_HVF')} +summary_info += {'WHPX support': config_all.has_key('CONFIG_WHPX')} +summary_info += {'TCG support': config_all.has_key('CONFIG_TCG')} +if config_all.has_key('CONFIG_TCG') + summary_info += {'TCG debug enabled': config_host.has_key('CONFIG_DEBUG_TCG')} + summary_info += {'TCG interpreter': config_host.has_key('CONFIG_TCG_INTERPRETER')} +endif +summary_info += {'malloc trim support': has_malloc_trim} +summary_info += {'RDMA support': config_host.has_key('CONFIG_RDMA')} +summary_info += {'PVRDMA support': config_host.has_key('CONFIG_PVRDMA')} +summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt} +summary_info += {'membarrier': config_host.has_key('CONFIG_MEMBARRIER')} +summary_info += {'preadv support': config_host.has_key('CONFIG_PREADV')} +summary_info += {'fdatasync': config_host.has_key('CONFIG_FDATASYNC')} +summary_info += {'madvise': config_host.has_key('CONFIG_MADVISE')} +summary_info += {'posix_madvise': config_host.has_key('CONFIG_POSIX_MADVISE')} +summary_info += {'posix_memalign': config_host.has_key('CONFIG_POSIX_MEMALIGN')} +summary_info += {'libcap-ng support': config_host.has_key('CONFIG_LIBCAP_NG')} +summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} +summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} +summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} +summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} +summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} +summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} +summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} +summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} +summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} +summary_info += {'Trace backends': config_host['TRACE_BACKENDS']} +if config_host['TRACE_BACKENDS'].split().contains('simple') + summary_info += {'Trace output file': config_host['CONFIG_TRACE_FILE'] + '-'} +endif +# TODO: add back protocol and server version +summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')} +summary_info += {'rbd support': config_host.has_key('CONFIG_RBD')} +summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')} +summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')} +summary_info += {'U2F support': u2f.found()} +summary_info += {'libusb': config_host.has_key('CONFIG_USB_LIBUSB')} +summary_info += {'usb net redir': config_host.has_key('CONFIG_USB_REDIR')} +summary_info += {'OpenGL support': config_host.has_key('CONFIG_OPENGL')} +summary_info += {'OpenGL dmabufs': config_host.has_key('CONFIG_OPENGL_DMABUF')} +summary_info += {'libiscsi support': config_host.has_key('CONFIG_LIBISCSI')} +summary_info += {'libnfs support': config_host.has_key('CONFIG_LIBNFS')} +summary_info += {'build guest agent': config_host.has_key('CONFIG_GUEST_AGENT')} +if targetos == 'windows' + if 'WIN_SDK' in config_host + summary_info += {'Windows SDK': config_host['WIN_SDK']} + endif + summary_info += {'QGA VSS support': config_host.has_key('CONFIG_QGA_VSS')} + summary_info += {'QGA w32 disk info': config_host.has_key('CONFIG_QGA_NTDDSCSI')} + summary_info += {'QGA MSI support': config_host.has_key('CONFIG_QGA_MSI')} +endif +summary_info += {'seccomp support': config_host.has_key('CONFIG_SECCOMP')} +summary_info += {'coroutine backend': config_host['CONFIG_COROUTINE_BACKEND']} +summary_info += {'coroutine pool': config_host['CONFIG_COROUTINE_POOL'] == '1'} +summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')} +summary_info += {'mutex debugging': config_host.has_key('CONFIG_DEBUG_MUTEX')} +summary_info += {'crypto afalg': config_host.has_key('CONFIG_AF_ALG')} +summary_info += {'GlusterFS support': config_host.has_key('CONFIG_GLUSTERFS')} +summary_info += {'gcov': get_option('b_coverage')} +summary_info += {'TPM support': config_host.has_key('CONFIG_TPM')} +summary_info += {'libssh support': config_host.has_key('CONFIG_LIBSSH')} +summary_info += {'QOM debugging': config_host.has_key('CONFIG_QOM_CAST_DEBUG')} +summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')} +summary_info += {'lzo support': config_host.has_key('CONFIG_LZO')} +summary_info += {'snappy support': config_host.has_key('CONFIG_SNAPPY')} +summary_info += {'bzip2 support': config_host.has_key('CONFIG_BZIP2')} +summary_info += {'lzfse support': config_host.has_key('CONFIG_LZFSE')} +summary_info += {'zstd support': config_host.has_key('CONFIG_ZSTD')} +summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')} +summary_info += {'libxml2': config_host.has_key('CONFIG_LIBXML2')} +summary_info += {'memory allocator': get_option('malloc')} +summary_info += {'avx2 optimization': config_host.has_key('CONFIG_AVX2_OPT')} +summary_info += {'avx512f optimization': config_host.has_key('CONFIG_AVX512F_OPT')} +summary_info += {'replication support': config_host.has_key('CONFIG_REPLICATION')} +summary_info += {'bochs support': config_host.has_key('CONFIG_BOCHS')} +summary_info += {'cloop support': config_host.has_key('CONFIG_CLOOP')} +summary_info += {'dmg support': config_host.has_key('CONFIG_DMG')} +summary_info += {'qcow v1 support': config_host.has_key('CONFIG_QCOW1')} +summary_info += {'vdi support': config_host.has_key('CONFIG_VDI')} +summary_info += {'vvfat support': config_host.has_key('CONFIG_VVFAT')} +summary_info += {'qed support': config_host.has_key('CONFIG_QED')} +summary_info += {'parallels support': config_host.has_key('CONFIG_PARALLELS')} +summary_info += {'sheepdog support': config_host.has_key('CONFIG_SHEEPDOG')} +summary_info += {'capstone': capstone_opt == 'disabled' ? false : capstone_opt} +summary_info += {'libpmem support': config_host.has_key('CONFIG_LIBPMEM')} +summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')} +summary_info += {'libudev': libudev.found()} +summary_info += {'default devices': config_host['CONFIG_MINIKCONF_MODE'] == '--defconfig'} +summary_info += {'plugin support': config_host.has_key('CONFIG_PLUGIN')} +summary_info += {'fuzzing support': config_host.has_key('CONFIG_FUZZ')} +if config_host.has_key('HAVE_GDB_BIN') + summary_info += {'gdb': config_host['HAVE_GDB_BIN']} +endif +summary_info += {'thread sanitizer': config_host.has_key('CONFIG_TSAN')} +summary_info += {'rng-none': config_host.has_key('CONFIG_RNG_NONE')} +summary_info += {'Linux keyring': config_host.has_key('CONFIG_SECRET_KEYRING')} +summary(summary_info, bool_yn: true) + +if not supported_cpus.contains(cpu) + message() + warning('SUPPORT FOR THIS HOST CPU WILL GO AWAY IN FUTURE RELEASES!') + message() + message('CPU host architecture ' + cpu + ' support is not currently maintained.') + message('The QEMU project intends to remove support for this host CPU in') + message('a future release if nobody volunteers to maintain it and to') + message('provide a build host for our continuous integration setup.') + message('configure has succeeded and you can continue to build, but') + message('if you care about QEMU on this platform you should contact') + message('us upstream at qemu-devel@nongnu.org.') +endif + +if not supported_oses.contains(targetos) + message() + warning('WARNING: SUPPORT FOR THIS HOST OS WILL GO AWAY IN FUTURE RELEASES!') + message() + message('Host OS ' + targetos + 'support is not currently maintained.') + message('The QEMU project intends to remove support for this host OS in') + message('a future release if nobody volunteers to maintain it and to') + message('provide a build host for our continuous integration setup.') + message('configure has succeeded and you can continue to build, but') + message('if you care about QEMU on this platform you should contact') + message('us upstream at qemu-devel@nongnu.org.') +endif diff --git a/meson/.coveragerc b/meson/.coveragerc new file mode 100644 index 000000000..c3d41d4b8 --- /dev/null +++ b/meson/.coveragerc @@ -0,0 +1,50 @@ +[run] +branch = True +concurrency = + multiprocessing +data_file = .coverage/coverage +parallel = True +source = + meson.py + mesonbuild/ + mesonconf.py + mesonintrospect.py + mesonrewriter.py + mesontest.py + run_cross_test.py + run_project_tests.py + run_tests.py + run_unittests.py + +# Aliases to /root/ are needed because Travis runs in docker at /root/. +[paths] +meson = + meson.py + /root/meson.py +mesonbuild = + mesonbuild/ + /root/mesonbuild/ +mesonconf = + mesonconf.py + /root/mesonconf.py +mesonintrospect = + mesonintrospect.py + /root/mesonintrospect.py +mesonrewriter = + mesonrewriter.py + /root/mesonrewriter.py +mesontest = + mesontest.py + /root/mesontest.py +run_cross_test = + run_cross_test.py + /root/run_cross_test.py +run_project_tests = + run_project_tests.py + /root/run_project_tests.py +run_tests = + run_tests.py + /root/run_tests.py +run_unittests = + run_unittests.py + /root/run_unittests.py diff --git a/meson/.editorconfig b/meson/.editorconfig new file mode 100644 index 000000000..c2dd5d06a --- /dev/null +++ b/meson/.editorconfig @@ -0,0 +1,27 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_style = space + +[*.[ch]] +indent_size = 4 + +[*.cpp] +indent_size = 4 + +[*.py] +indent_size = 4 + +[*.vala] +indent_size = 4 + +[*.xml] +indent_size = 2 + +[meson.build] +indent_size = 2 + +[*.json] +indent_size = 2 diff --git a/meson/.flake8 b/meson/.flake8 new file mode 100644 index 000000000..ff38c0ff1 --- /dev/null +++ b/meson/.flake8 @@ -0,0 +1,33 @@ +[flake8] +ignore = + # E241: multiple spaces after ':' + E241, + # E251: unexpected spaces around keyword / parameter equals + E251, + # E261: at least two spaces before inline comment + E261, + # E265: block comment should start with '# ' + E265, + # E501: line too long + E501, + # E302: expected 2 blank lines, found 1 + E302, + # E305: expected 2 blank lines after class or function definition, found 1 + E305, + # E401: multiple imports on one line + E401, + # E266: too many leading '#' for block comment + E266, + # E402: module level import not at top of file + E402, + # E731: do not assign a lambda expression, use a def (too many false positives) + E731, + # E741: ambiguous variable name 'l' + E741, + # E722: do not use bare except' + E722, + # W504: line break after binary operator + W504, + # A003: builtin class attribute + A003 +max-line-length = 120 diff --git a/meson/.gitattributes b/meson/.gitattributes new file mode 100644 index 000000000..7fd80e241 --- /dev/null +++ b/meson/.gitattributes @@ -0,0 +1,5 @@ +.gitignore export-ignore +.gitattributes export-ignore +* text eol=lf +*.png binary +*.icns binary diff --git a/meson/.github/ISSUE_TEMPLATE/bug_report.md b/meson/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..4bc3fe6e0 --- /dev/null +++ b/meson/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,25 @@ +--- +name: Bug report +about: Meson bug report +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Please include your `meson.build` files, preferably as a minimal toy example showing the issue. +You may need to create simple source code files (don't include private/proprietary code). + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**system parameters** +* Is this a [cross build](https://mesonbuild.com/Cross-compilation.html) or just a plain native build (for the same computer)? +* what operating system (e.g. MacOS Catalina, Windows 10, CentOS 8.0, Ubuntu 18.04, etc.) +* what Python version are you using e.g. 3.8.0 +* what `meson --version` +* what `ninja --version` if it's a Ninja build diff --git a/meson/.github/workflows/ci_frameworks.yml b/meson/.github/workflows/ci_frameworks.yml new file mode 100644 index 000000000..12d41f859 --- /dev/null +++ b/meson/.github/workflows/ci_frameworks.yml @@ -0,0 +1,86 @@ +name: ci_frameworks + +on: + push: + paths: + - "mesonbuild/dependencies/**" + - "test cases/frameworks/**" + - ".github/workflows/ci_frameworks.yml" + pull_request: + paths: + - "mesonbuild/dependencies/**" + - "test cases/frameworks/**" + - ".github/workflows/ci_frameworks.yml" + +jobs: + + scalapackMacOS: + runs-on: macos-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: python -m pip install -e . + - run: brew install pkg-config ninja gcc openmpi lapack scalapack + - run: meson setup "test cases/frameworks/30 scalapack" build + - run: ninja -C build + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Scalapack_Mac_build + path: build/meson-logs/meson-log.txt + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Scalapack_Mac_test + path: build/meson-logs/testlog.txt + + HDF5macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: python -m pip install -e . + - run: brew install pkg-config ninja gcc hdf5 + - run: meson setup "test cases/frameworks/25 hdf5" build + - run: ninja -C build + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: HDF5_Mac_build + path: build/meson-logs/meson-log.txt + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: HDF5_Mac_test + path: build/meson-logs/testlog.txt + + Qt4macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: python -m pip install -e . + - run: brew install pkg-config ninja gcc + - run: brew tap cartr/qt4 + - run: brew install qt@4 + - run: meson setup "test cases/frameworks/4 qt" build -Drequired=qt4 + - run: ninja -C build + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Qt4_Mac_build + path: build/meson-logs/meson-log.txt + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Qt4_Mac_test + path: build/meson-logs/testlog.txt diff --git a/meson/.github/workflows/images.yml b/meson/.github/workflows/images.yml new file mode 100644 index 000000000..0cf9156f9 --- /dev/null +++ b/meson/.github/workflows/images.yml @@ -0,0 +1,56 @@ +name: CI image builder + +on: + push: + branches: + - master + paths: + - 'ci/ciimage/**' + - '.github/workflows/images.yml' + + pull_request: + branches: + - master + paths: + - 'ci/ciimage/**' + - '.github/workflows/images.yml' + + # Rebuild the images every week (Sunday) + schedule: + - cron: '0 0 * * 0' + +jobs: + build: + name: ${{ matrix.cfg.name }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cfg: + - { name: Arch Linux, id: arch } + - { name: CUDA (on Arch), id: cuda } + - { name: Fedora, id: fedora } + - { name: OpenSUSE, id: opensuse } + - { name: Ubuntu Bionic, id: bionic } + - { name: Ubuntu Eoan, id: eoan } + steps: + - uses: actions/checkout@v2 + + # Login to dockerhub + - name: Docker login + if: github.event_name == 'push' || github.event_name == 'schedule' + uses: azure/docker-login@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Build and test + - name: Building the ${{ matrix.cfg.id }} image + run: ./ci/ciimage/build.py -t build ${{ matrix.cfg.id }} + - name: Testing the ${{ matrix.cfg.id }} image + run: ./ci/ciimage/build.py -t test ${{ matrix.cfg.id }} + + # Publish + - name: Push the ${{ matrix.cfg.id }} image + if: github.event_name == 'push' || github.event_name == 'schedule' + run: docker push mesonbuild/${{ matrix.cfg.id }} diff --git a/meson/.github/workflows/lint_mypy.yml b/meson/.github/workflows/lint_mypy.yml new file mode 100644 index 000000000..056f96e39 --- /dev/null +++ b/meson/.github/workflows/lint_mypy.yml @@ -0,0 +1,34 @@ +name: LintMypy + +on: + push: + paths: + - "**.py" + - ".github/workflows/lint_mypy.yml" + pull_request: + paths: + - "**.py" + - ".github/workflows/lint_mypy.yml" + +jobs: + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + # pylint version constraint can be removed when https://github.com/PyCQA/pylint/issues/3524 is resolved + - run: python -m pip install pylint==2.4.4 + - run: pylint mesonbuild + + mypy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: python -m pip install mypy + - run: mypy --follow-imports=skip mesonbuild/interpreterbase.py mesonbuild/mtest.py mesonbuild/minit.py mesonbuild/mintro.py mesonbuild/mparser.py mesonbuild/msetup.py mesonbuild/ast mesonbuild/wrap tools/ mesonbuild/modules/fs.py mesonbuild/dependencies/boost.py mesonbuild/dependencies/mpi.py mesonbuild/dependencies/hdf5.py mesonbuild/compilers/mixins/intel.py mesonbuild/mlog.py mesonbuild/mcompile.py mesonbuild/mesonlib.py mesonbuild/arglist.py diff --git a/meson/.github/workflows/os_comp.yml b/meson/.github/workflows/os_comp.yml new file mode 100644 index 000000000..a5abf7da1 --- /dev/null +++ b/meson/.github/workflows/os_comp.yml @@ -0,0 +1,50 @@ +name: OS Comp Tests + +on: + push: + branches: + - master + pull_request: + +jobs: + xenial: + name: Ubuntu 16.04 + runs-on: ubuntu-16.04 + steps: + - uses: actions/checkout@v2 + - name: Install Dependencies + run: | + sudo apt update -yq + sudo apt install -yq --no-install-recommends python3-setuptools python3-pip g++ gfortran gobjc gobjc++ zlib1g-dev python-dev python3-dev python3-jsonschema + - name: Install ninja-build tool + uses: seanmiddleditch/gha-setup-ninja@v1 + - name: Python version + run: python3 --version + - name: Ninja version + run: ninja --version + - name: Run tests + run: LD_LIBRARY_PATH=/usr/local/share/boost/1.69.0/lib/:$(rustc --print sysroot)/lib:$LD_LIBRARY_PATH python3 run_tests.py + env: + CI: '1' + XENIAL: '1' + + arch: + name: ${{ matrix.cfg.name }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cfg: + - { name: Arch Linux, id: arch } + - { name: CUDA (on Arch), id: cuda } + - { name: Fedora, id: fedora } + - { name: OpenSUSE, id: opensuse } + - { name: Ubuntu Bionic, id: bionic } + container: mesonbuild/${{ matrix.cfg.id }}:latest + steps: + - uses: actions/checkout@v2 + - name: Run tests + # All environment variables are stored inside the docker image in /ci/env_vars.sh + # They are defined in the `env` section in each image.json. CI_ARGS should be set + # via the `args` array ub the image.json + run: bash -c 'source /ci/env_vars.sh; cd $GITHUB_WORKSPACE; ./run_tests.py $CI_ARGS' diff --git a/meson/.github/workflows/unusedargs_missingreturn.yml b/meson/.github/workflows/unusedargs_missingreturn.yml new file mode 100644 index 000000000..859dec2df --- /dev/null +++ b/meson/.github/workflows/unusedargs_missingreturn.yml @@ -0,0 +1,71 @@ +name: UnusedMissingReturn +# this workflow checks for +# * unused input arguments +# * missing return values +# * strict prototypes +# some users have default configs that will needlessly fail Meson self-tests due to these syntax. + +on: + push: + paths: + - ".github/workflows/unusedargs_missingreturn.yml" + - "test cases/cmake/**" + - "test cases/common/**" + - "test cases/fortran/**" + - "test cases/linuxlike/**" + - "test cases/objc/**" + - "test cases/objcpp/**" + - "test caes/windows/**" + + pull_request: + paths: + - ".github/workflows/unusedargs_missingreturn.yml" + - "test cases/cmake/**" + - "test cases/common/**" + - "test cases/fortran/**" + - "test cases/linuxlike/**" + - "test cases/objc/**" + - "test cases/objcpp/**" + - "test caes/windows/**" + +jobs: + + linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install Compilers + run: | + sudo apt update -yq + sudo apt install -yq --no-install-recommends g++ gfortran ninja-build gobjc gobjc++ + - run: python run_project_tests.py --only cmake common fortran platform-linux "objective c" "objective c++" + env: + CI: "1" + CFLAGS: "-Werror=unused-parameter -Werror=return-type -Werror=strict-prototypes" + CPPFLAGS: "-Werror=unused-parameter -Werror=return-type" + FFLAGS: "-fimplicit-none" + + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + # ninja==1.10 pypi release didn't ship with windows binaries, which causes + # pip to try to build it which fails on Windows. Pin the previous version + # for now. We can update once that's fixed. + # https://pypi.org/project/ninja/1.10.0/#files + - run: pip install ninja==1.9.0.post1 pefile + - run: python run_project_tests.py --only platform-windows + env: + CI: "1" + CFLAGS: "-Werror=unused-parameter -Werror=return-type -Werror=strict-prototypes" + CPPFLAGS: "-Werror=unused-parameter -Werror=return-type" + FFLAGS: "-fimplicit-none" + CC: gcc + CXX: g++ + FC: gfortran diff --git a/meson/.gitignore b/meson/.gitignore new file mode 100644 index 000000000..fea337e97 --- /dev/null +++ b/meson/.gitignore @@ -0,0 +1,33 @@ +.mypy_cache/ +.pytest_cache/ +/.project +/.pydevproject +/.settings +/.cproject +/.idea +/.vscode + +__pycache__ +.coverage +/install dir +/work area + +/meson-test-run.txt +/meson-test-run.xml +/meson-cross-test-run.txt +/meson-cross-test-run.xml + +.DS_Store +*~ +*.swp +packagecache +/MANIFEST +/build +/dist +/meson.egg-info + +/docs/built_docs +/docs/hotdoc-private* + +*.pyc +/*venv* diff --git a/meson/.mailmap b/meson/.mailmap new file mode 100644 index 000000000..8298afb0b --- /dev/null +++ b/meson/.mailmap @@ -0,0 +1,10 @@ +Alexandre Foley AlexandreFoley +Igor Gnatenko Igor Gnatenko +Jussi Pakkanen Jussi Pakkanen +Jussi Pakkanen jpakkane +Nirbheek Chauhan Nirbheek Chauhan +Nicolas Schneider Nicolas Schneider +Patrick Griffis TingPing +Thibault Saunier Thibault Saunier +Thibault Saunier Saunier Thibault +Tim-Philipp Müller Tim-Philipp Müller diff --git a/meson/.mypy.ini b/meson/.mypy.ini new file mode 100644 index 000000000..b8dad03fc --- /dev/null +++ b/meson/.mypy.ini @@ -0,0 +1,5 @@ +[mypy] +strict_optional = False +show_error_context = False +show_column_numbers = True +ignore_missing_imports = True diff --git a/meson/.pylintrc b/meson/.pylintrc new file mode 100644 index 000000000..10483d33d --- /dev/null +++ b/meson/.pylintrc @@ -0,0 +1,9 @@ +[MASTER] +jobs=0 + +[REPORTS] +score=no + +[MESSAGES CONTROL] +disable=all +enable=unreachable \ No newline at end of file diff --git a/meson/.travis.yml b/meson/.travis.yml new file mode 100644 index 000000000..22d76e7ea --- /dev/null +++ b/meson/.travis.yml @@ -0,0 +1,47 @@ +sudo: false + +branches: + only: + - master + # Release branches + - /^[0-9]+\.[0-9]+$/ + +os: + - linux + - osx + +compiler: + - gcc + - clang + +env: + - MESON_ARGS="" + - RUN_TESTS_ARGS="--no-unittests" MESON_ARGS="--unity=on" + +language: + - cpp + +services: + - docker + +matrix: + exclude: + # On OS X gcc is just a wrapper around clang, so don't waste time testing that + - os: osx + compiler: gcc + include: + # Test cross builds separately, they do not use the global compiler + # Also hijack one cross build to test long commandline handling codepath (and avoid overloading Travis) + - os: linux + compiler: gcc + env: RUN_TESTS_ARGS="--cross ubuntu-armhf.txt --cross linux-mingw-w64-64bit.txt" MESON_RSP_THRESHOLD=0 + - os: linux + compiler: gcc + env: RUN_TESTS_ARGS="--cross ubuntu-armhf.txt --cross linux-mingw-w64-64bit.txt" MESON_ARGS="--unity=on" + +before_install: + - python ./skip_ci.py --base-branch-env=TRAVIS_BRANCH --is-pull-env=TRAVIS_PULL_REQUEST + - ./ci/travis_install.sh + +script: + - ./ci/travis_script.sh diff --git a/meson/CODEOWNERS b/meson/CODEOWNERS new file mode 100644 index 000000000..e361e8344 --- /dev/null +++ b/meson/CODEOWNERS @@ -0,0 +1,5 @@ +* @jpakkane +/mesonbuild/modules/pkgconfig.py @xclaesse +/mesonbuild/modules/cmake.py @mensinda +/mesonbuild/ast/* @mensinda +/mesonbuild/cmake/* @mensinda diff --git a/meson/COPYING b/meson/COPYING new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/meson/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/meson/MANIFEST.in b/meson/MANIFEST.in new file mode 100644 index 000000000..11c804a38 --- /dev/null +++ b/meson/MANIFEST.in @@ -0,0 +1,19 @@ +graft test?cases +graft manual?tests +graft cross +graft data +graft graphics +graft man +graft tools + +include contributing.md +include COPYING +include README.md +include run_cross_test.py +include run_tests.py +include run_unittests.py +include run_meson_command_tests.py +include run_project_tests.py +include ghwt.py +include __main__.py +include meson.py diff --git a/meson/README.md b/meson/README.md new file mode 100644 index 000000000..9df015c11 --- /dev/null +++ b/meson/README.md @@ -0,0 +1,95 @@ +

+ +

+Meson® is a project to create the best possible next-generation +build system. + +#### Status + +[![PyPI](https://img.shields.io/pypi/v/meson.svg)](https://pypi.python.org/pypi/meson) +[![Travis](https://travis-ci.org/mesonbuild/meson.svg?branch=master)](https://travis-ci.org/mesonbuild/meson) +[![Build Status](https://dev.azure.com/jussi0947/jussi/_apis/build/status/mesonbuild.meson)](https://dev.azure.com/jussi0947/jussi/_build/latest?definitionId=1) +[![Codecov](https://codecov.io/gh/mesonbuild/meson/coverage.svg?branch=master)](https://codecov.io/gh/mesonbuild/meson/branch/master) +[![Code Quality: Python](https://img.shields.io/lgtm/grade/python/g/mesonbuild/meson.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/mesonbuild/meson/context:python) +[![Total Alerts](https://img.shields.io/lgtm/alerts/g/mesonbuild/meson.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/mesonbuild/meson/alerts) + +#### Dependencies + + - [Python](https://python.org) (version 3.5 or newer) + - [Ninja](https://ninja-build.org) (version 1.7 or newer) + +#### Installing from source + +Meson is available on [PyPi](https://pypi.python.org/pypi/meson), so +it can be installed with `pip3 install meson`. The exact command to +type to install with `pip` can vary between systems, be sure to use +the Python 3 version of `pip`. + +If you wish you can install it locally with the standard Python command: + +```console +python3 -m pip install meson +``` + +For builds using Ninja, Ninja can be downloaded directly from Ninja +[GitHub release page](https://github.com/ninja-build/ninja/releases) +or via [PyPi](https://pypi.python.org/pypi/ninja) + +```console +python3 -m pip install ninja +``` + +More on Installing Meson build can be found at the +[getting meson page](https://mesonbuild.com/Getting-meson.html). + +#### Running + +Meson requires that you have a source directory and a build directory +and that these two are different. In your source root must exist a +file called `meson.build`. To generate the build system run this +command: + +`meson setup ` + +Depending on how you obtained Meson the command might also be called +`meson.py` instead of plain `meson`. In the rest of this document we +are going to use the latter form. + +You can omit either of the two directories, and Meson will substitute +the current directory and autodetect what you mean. This allows you to +do things like this: + +```console +cd +meson setup builddir +``` + +To compile, cd into your build directory and type `ninja`. To run unit +tests, type `ninja test`. + +More on running Meson build system commands can be found at the +[running meson page](https://mesonbuild.com/Running-Meson.html) +or by typing `meson --help`. + +#### Contributing + +We love code contributions. See the [contribution +page](https://mesonbuild.com/Contributing.html) on the website for +details. + + +#### IRC + +The irc channel for Meson is `#mesonbuild` over at Freenode. + +You can use [FreeNode's official webchat][meson_irc] +to connect to this channel. + +[meson_irc]: https://webchat.freenode.net/?channels=%23mesonbuild + +#### Further info + +More information about the Meson build system can be found at the +[project's home page](https://mesonbuild.com). + +Meson is a registered trademark of ***Jussi Pakkanen***. diff --git a/meson/__main__.py b/meson/__main__.py new file mode 100644 index 000000000..27cd4c05a --- /dev/null +++ b/meson/__main__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +# Copyright 2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mesonbuild import mesonmain +import sys + +sys.exit(mesonmain.main()) diff --git a/meson/azure-pipelines.yml b/meson/azure-pipelines.yml new file mode 100644 index 000000000..1ff542a02 --- /dev/null +++ b/meson/azure-pipelines.yml @@ -0,0 +1,218 @@ +name: $(BuildID) + +trigger: + branches: + include: + - 'master' + # Release branches + - '0.*' + +variables: + CI: 1 + +jobs: + +- job: vs2017 + timeoutInMinutes: 120 + pool: + vmImage: VS2017-Win2016 + + strategy: + matrix: + vc2017x86ninja: + arch: x86 + compiler: msvc2017 + backend: ninja + MESON_RSP_THRESHOLD: 0 + vc2017x64vs: + arch: x64 + compiler: msvc2017 + backend: vs2017 + clangclx64ninja: + arch: x64 + compiler: clang-cl + backend: ninja + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + addToPath: true + architecture: 'x64' + - template: ci/azure-steps.yml + +- job: vs2019 + timeoutInMinutes: 120 + pool: + vmImage: windows-2019 + + strategy: + matrix: + vc2019x64ninja: + arch: x64 + compiler: msvc2019 + backend: ninja + vc2019x64vs: + arch: x64 + compiler: msvc2019 + backend: vs2019 + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.7' + addToPath: true + architecture: 'x64' + - template: ci/azure-steps.yml + +- job: cygwin + timeoutInMinutes: 120 + pool: + vmImage: VS2017-Win2016 + strategy: + matrix: + gccx64ninja: {} + variables: + CYGWIN_ROOT: $(System.Workfolder)\cygwin + CYGWIN_MIRROR: http://cygwin.mirror.constant.com + steps: + - script: | + choco install cygwin --params="/InstallDir:%CYGWIN_ROOT%" + displayName: Install Cygwin + - script: | + %CYGWIN_ROOT%\cygwinsetup.exe -qnNdO -R "%CYGWIN_ROOT%" -s "%CYGWIN_MIRROR%" -g -P ^ + gcc-fortran,^ + gcc-objc++,^ + gcc-objc,^ + git,^ + gobject-introspection,^ + gtk-doc,^ + libarchive13,^ + libboost-devel,^ + libglib2.0-devel,^ + libgtk3-devel,^ + libjsoncpp19,^ + librhash0,^ + libuv1,^ + libxml2,^ + libxml2-devel,^ + libxslt,^ + libxslt-devel,^ + ninja,^ + python2-devel,^ + python3-devel,^ + python3-libxml2,^ + python3-libxslt,^ + python36-pip,^ + vala,^ + wget,^ + cmake,^ + zlib-devel + displayName: Install Dependencies + - script: | + set PATH=%CYGWIN_ROOT%\bin;%SYSTEMROOT%\system32 + env.exe -- python3 -m pip --disable-pip-version-check install gcovr pefile jsonschema + displayName: "pip install gcovr pefile jsonschema (pytest-xdist broken, skipped: CHECK ME AGAIN)" + - script: | + set BOOST_ROOT= + set PATH=%CYGWIN_ROOT%\bin;%SYSTEMROOT%\system32 + set SKIP_STATIC_BOOST=1 + env.exe -- python3 run_tests.py --backend=ninja + # Cygwin's static boost installation is broken (some static library + # variants such as boost_thread are not present) + displayName: Run Tests + - task: CopyFiles@2 + condition: not(canceled()) + inputs: + contents: 'meson-test-run.*' + targetFolder: $(Build.ArtifactStagingDirectory) + - task: PublishBuildArtifacts@1 + inputs: + artifactName: $(System.JobName) + # publishing artifacts from PRs from a fork is currently blocked + condition: and(eq(variables['system.pullrequest.isfork'], false), not(canceled())) + - task: PublishTestResults@2 + condition: not(canceled()) + inputs: + testResultsFiles: meson-test-run.xml + testRunTitle: $(System.JobName) + +- job: msys2 + pool: + vmImage: VS2017-Win2016 + strategy: + matrix: + gccx86ninja: + MSYSTEM: MINGW32 + MSYS2_ARCH: i686 + compiler: gcc + gccx64ninja: + MSYSTEM: MINGW64 + MSYS2_ARCH: x86_64 + MESON_RSP_THRESHOLD: 0 + compiler: gcc + clangx64ninja: + MSYSTEM: MINGW64 + MSYS2_ARCH: x86_64 + compiler: clang + variables: + MSYS2_ROOT: $(System.Workfolder)\msys64 + steps: + - script: | + choco install msys2 --params="/InstallDir:%MSYS2_ROOT% /NoUpdate /NoPath" + displayName: Install MSYS2 + - script: | + set PATH=%MSYS2_ROOT%\usr\bin;%SystemRoot%\system32;%SystemRoot%;%SystemRoot%\System32\Wbem + # Remove this line when https://github.com/msys2/MSYS2-packages/pull/2022 is merged + %MSYS2_ROOT%\usr\bin\pacman --noconfirm -Sy dash + echo Updating msys2 + %MSYS2_ROOT%\usr\bin\pacman --noconfirm -Syuu || echo system update failed, ignoring + echo Killing all msys2 processes + taskkill /F /FI "MODULES eq msys-2.0.dll" + echo Updating msys2 (again) + %MSYS2_ROOT%\usr\bin\pacman --noconfirm -Syuu + displayName: Update MSYS2 + - script: | + set PATH=%MSYS2_ROOT%\usr\bin;%SystemRoot%\system32;%SystemRoot%;%SystemRoot%\System32\Wbem + if %compiler%==gcc ( set "TOOLCHAIN=mingw-w64-$(MSYS2_ARCH)-toolchain" ) else ( set "TOOLCHAIN=mingw-w64-$(MSYS2_ARCH)-clang" ) + %MSYS2_ROOT%\usr\bin\pacman --noconfirm --needed -S ^ + base-devel ^ + git ^ + mercurial ^ + mingw-w64-$(MSYS2_ARCH)-cmake ^ + mingw-w64-$(MSYS2_ARCH)-lcov ^ + mingw-w64-$(MSYS2_ARCH)-libxml2 ^ + mingw-w64-$(MSYS2_ARCH)-ninja ^ + mingw-w64-$(MSYS2_ARCH)-pkg-config ^ + mingw-w64-$(MSYS2_ARCH)-python2 ^ + mingw-w64-$(MSYS2_ARCH)-python3 ^ + mingw-w64-$(MSYS2_ARCH)-python3-lxml ^ + mingw-w64-$(MSYS2_ARCH)-python3-setuptools ^ + mingw-w64-$(MSYS2_ARCH)-python3-pip ^ + %TOOLCHAIN% + %MSYS2_ROOT%\usr\bin\bash -lc "python3 -m pip --disable-pip-version-check install gcovr jsonschema pefile" + displayName: Install Dependencies + - script: | + set BOOST_ROOT= + set PATH=%SystemRoot%\system32;%SystemRoot%;%SystemRoot%\System32\Wbem + set PATHEXT=%PATHEXT%;.py + if %compiler%==clang ( set CC=clang && set CXX=clang++ && set OBJC=clang && set OBJCXX=clang++ ) + %MSYS2_ROOT%\usr\bin\bash -lc "MSYSTEM= python3 run_tests.py --backend=ninja" + env: + CHERE_INVOKING: yes + displayName: Run Tests + - task: CopyFiles@2 + condition: not(canceled()) + inputs: + contents: 'meson-test-run.*' + targetFolder: $(Build.ArtifactStagingDirectory) + - task: PublishBuildArtifacts@1 + inputs: + artifactName: $(System.JobName) + # publishing artifacts from PRs from a fork is currently blocked + condition: and(eq(variables['system.pullrequest.isfork'], false), not(canceled())) + - task: PublishTestResults@2 + condition: not(canceled()) + inputs: + testResultsFiles: meson-test-run.xml + testRunTitle: $(System.JobName) diff --git a/meson/ci/azure-steps.yml b/meson/ci/azure-steps.yml new file mode 100644 index 000000000..233bbfa36 --- /dev/null +++ b/meson/ci/azure-steps.yml @@ -0,0 +1,23 @@ +steps: +- task: PowerShell@2 + inputs: + targetType: 'filePath' + filePath: .\ci\run.ps1 + +- task: PublishTestResults@2 + inputs: + testResultsFiles: meson-test-run.xml + testRunTitle: $(System.JobName) + publishRunAttachments: true + condition: not(canceled()) + +- task: CopyFiles@2 + inputs: + contents: 'meson-test-run.*' + targetFolder: $(Build.ArtifactStagingDirectory) + condition: not(canceled()) + +- task: PublishBuildArtifacts@1 + inputs: + artifactName: $(System.JobName) + condition: not(canceled()) diff --git a/meson/ci/ciimage/.gitignore b/meson/ci/ciimage/.gitignore new file mode 100644 index 000000000..02a031910 --- /dev/null +++ b/meson/ci/ciimage/.gitignore @@ -0,0 +1,2 @@ +/build_* +/test_* diff --git a/meson/ci/ciimage/arch/image.json b/meson/ci/ciimage/arch/image.json new file mode 100644 index 000000000..525407396 --- /dev/null +++ b/meson/ci/ciimage/arch/image.json @@ -0,0 +1,6 @@ +{ + "base_image": "archlinux:latest", + "env": { + "CI": "1" + } +} diff --git a/meson/ci/ciimage/arch/install.sh b/meson/ci/ciimage/arch/install.sh new file mode 100755 index 000000000..fb27c2619 --- /dev/null +++ b/meson/ci/ciimage/arch/install.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -e + +# Inspired by https://github.com/greyltc/docker-archlinux-aur/blob/master/add-aur.sh + +pkgs=( + python python-setuptools python-wheel python-pip python-pytest-xdist python-gobject python-jsonschema + ninja make git sudo fakeroot autoconf automake patch + libelf gcc gcc-fortran gcc-objc vala rust bison flex cython go dlang-dmd + mono boost qt5-base gtkmm3 gtest gmock protobuf wxgtk gobject-introspection + itstool gtk3 java-environment=8 gtk-doc llvm clang sdl2 graphviz + doxygen vulkan-validation-layers openssh mercurial gtk-sharp-2 qt5-tools + libwmf valgrind cmake netcdf-fortran openmpi nasm gnustep-base gettext + python-jsonschema python-lxml + # cuda +) + +aur_pkgs=(scalapack) +pip_pkgs=(hotdoc gcovr) +cleanup_pkgs=(go) + +AUR_USER=docker +PACMAN_OPTS='--needed --noprogressbar --noconfirm' + +# Patch config files +sed -i 's/#Color/Color/g' /etc/pacman.conf +sed -i 's,#MAKEFLAGS="-j2",MAKEFLAGS="-j$(nproc)",g' /etc/makepkg.conf +sed -i "s,PKGEXT='.pkg.tar.xz',PKGEXT='.pkg.tar',g" /etc/makepkg.conf + +# Install packages +pacman -Syu $PACMAN_OPTS "${pkgs[@]}" +python -m pip install "${pip_pkgs[@]}" + +# Setup the user +useradd -m $AUR_USER +echo "${AUR_USER}:" | chpasswd -e +echo "$AUR_USER ALL = NOPASSWD: ALL" >> /etc/sudoers + +# Install yay +su $AUR_USER -c 'cd; git clone https://aur.archlinux.org/yay.git' +su $AUR_USER -c 'cd; cd yay; makepkg' +pushd /home/$AUR_USER/yay/ +pacman -U *.pkg.tar --noprogressbar --noconfirm +popd +rm -rf /home/$AUR_USER/yay + +# Install yay deps +su $AUR_USER -c "yay -S $PACMAN_OPTS ${aur_pkgs[*]}" + +# cleanup +pacman -Rs --noconfirm "${cleanup_pkgs[@]}" +su $AUR_USER -c "yes | yay -Scc" diff --git a/meson/ci/ciimage/bionic/image.json b/meson/ci/ciimage/bionic/image.json new file mode 100644 index 000000000..6a3d7232e --- /dev/null +++ b/meson/ci/ciimage/bionic/image.json @@ -0,0 +1,8 @@ +{ + "base_image": "ubuntu:bionic", + "env": { + "CI": "1", + "SKIP_SCIENTIFIC": "1", + "DC": "gdc" + } +} diff --git a/meson/ci/ciimage/bionic/install.sh b/meson/ci/ciimage/bionic/install.sh new file mode 100755 index 000000000..0bfcdfb3f --- /dev/null +++ b/meson/ci/ciimage/bionic/install.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +set -e + +export DEBIAN_FRONTEND=noninteractive +export LANG='C.UTF-8' +export DC=gdc + +pkgs=( + python3-pytest-xdist + python3-pip libxml2-dev libxslt1-dev libyaml-dev libjson-glib-dev + wget unzip cmake doxygen + clang + pkg-config-arm-linux-gnueabihf + qt4-linguist-tools qt5-default qtbase5-private-dev + python-dev + libomp-dev + llvm lcov + ldc + libclang-dev + libgcrypt20-dev + libgpgme-dev + libhdf5-dev openssh-server + libboost-python-dev libboost-regex-dev + libblocksruntime-dev + libperl-dev libscalapack-mpi-dev libncurses-dev +) + +boost_pkgs=(atomic chrono date-time filesystem log regex serialization system test thread) + +sed -i '/^#\sdeb-src /s/^#//' "/etc/apt/sources.list" +apt-get -y update +apt-get -y upgrade +apt-get -y install eatmydata + +# Base stuff +eatmydata apt-get -y build-dep meson + +# Add boost packages +for i in "${boost_pkgs[@]}"; do + for j in "1.62.0" "1.65.1"; do + pkgs+=("libboost-${i}${j}") + done +done + +# packages +eatmydata apt-get -y install "${pkgs[@]}" + +eatmydata python3 -m pip install codecov gcovr jsonschema + +# Install the ninja 0.10 +wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip +unzip ninja-linux.zip -d /ci + +# cleanup +apt-get -y remove ninja-build +apt-get -y clean +apt-get -y autoclean +rm ninja-linux.zip diff --git a/meson/ci/ciimage/build.py b/meson/ci/ciimage/build.py new file mode 100755 index 000000000..e623a7e4c --- /dev/null +++ b/meson/ci/ciimage/build.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 + +import json +import argparse +import stat +import textwrap +import shutil +import subprocess +from tempfile import TemporaryDirectory +from pathlib import Path +import typing as T + +image_namespace = 'mesonbuild' + +image_def_file = 'image.json' +install_script = 'install.sh' + +class ImageDef: + def __init__(self, image_dir: Path) -> None: + path = image_dir / image_def_file + data = json.loads(path.read_text()) + + assert isinstance(data, dict) + assert all([x in data for x in ['base_image', 'env']]) + assert isinstance(data['base_image'], str) + assert isinstance(data['env'], dict) + + self.base_image: str = data['base_image'] + self.args: T.List[str] = data.get('args', []) + self.env: T.Dict[str, str] = data['env'] + +class BuilderBase(): + def __init__(self, data_dir: Path, temp_dir: Path) -> None: + self.data_dir = data_dir + self.temp_dir = temp_dir + + self.common_sh = self.data_dir.parent / 'common.sh' + self.common_sh = self.common_sh.resolve(strict=True) + self.validate_data_dir() + + self.image_def = ImageDef(self.data_dir) + + self.docker = shutil.which('docker') + self.git = shutil.which('git') + if self.docker is None: + raise RuntimeError('Unable to find docker') + if self.git is None: + raise RuntimeError('Unable to find git') + + def validate_data_dir(self) -> None: + files = [ + self.data_dir / image_def_file, + self.data_dir / install_script, + ] + if not self.data_dir.exists(): + raise RuntimeError(f'{self.data_dir.as_posix()} does not exist') + for i in files: + if not i.exists(): + raise RuntimeError(f'{i.as_posix()} does not exist') + if not i.is_file(): + raise RuntimeError(f'{i.as_posix()} is not a regular file') + +class Builder(BuilderBase): + def gen_bashrc(self) -> None: + out_file = self.temp_dir / 'env_vars.sh' + out_data = '' + + # run_tests.py parameters + self.image_def.env['CI_ARGS'] = ' '.join(self.image_def.args) + + for key, val in self.image_def.env.items(): + out_data += f'export {key}="{val}"\n' + + # Also add /ci to PATH + out_data += 'export PATH="/ci:$PATH"\n' + + out_file.write_text(out_data) + + # make it executable + mode = out_file.stat().st_mode + out_file.chmod(mode | stat.S_IEXEC) + + def gen_dockerfile(self) -> None: + out_file = self.temp_dir / 'Dockerfile' + out_data = textwrap.dedent(f'''\ + FROM {self.image_def.base_image} + + ADD install.sh /ci/install.sh + ADD common.sh /ci/common.sh + ADD env_vars.sh /ci/env_vars.sh + RUN /ci/install.sh + ''') + + out_file.write_text(out_data) + + def do_build(self) -> None: + # copy files + for i in self.data_dir.iterdir(): + shutil.copy(str(i), str(self.temp_dir)) + shutil.copy(str(self.common_sh), str(self.temp_dir)) + + self.gen_bashrc() + self.gen_dockerfile() + + cmd_git = [self.git, 'rev-parse', '--short', 'HEAD'] + res = subprocess.run(cmd_git, cwd=self.data_dir, stdout=subprocess.PIPE) + if res.returncode != 0: + raise RuntimeError('Failed to get the current commit hash') + commit_hash = res.stdout.decode().strip() + + cmd = [ + self.docker, 'build', + '-t', f'{image_namespace}/{self.data_dir.name}:latest', + '-t', f'{image_namespace}/{self.data_dir.name}:{commit_hash}', + '--pull', + self.temp_dir.as_posix(), + ] + if subprocess.run(cmd).returncode != 0: + raise RuntimeError('Failed to build the docker image') + +class ImageTester(BuilderBase): + def __init__(self, data_dir: Path, temp_dir: Path, ci_root: Path) -> None: + super().__init__(data_dir, temp_dir) + self.meson_root = ci_root.parent.parent.resolve() + + def gen_dockerfile(self) -> None: + out_file = self.temp_dir / 'Dockerfile' + out_data = textwrap.dedent(f'''\ + FROM {image_namespace}/{self.data_dir.name} + + ADD meson /meson + ''') + + out_file.write_text(out_data) + + def copy_meson(self) -> None: + shutil.copytree( + self.meson_root, + self.temp_dir / 'meson', + ignore=shutil.ignore_patterns( + '.git', + '*_cache', + 'work area', + self.temp_dir.name, + ) + ) + + def do_test(self): + self.copy_meson() + self.gen_dockerfile() + + try: + build_cmd = [ + self.docker, 'build', + '-t', 'meson_test_image', + self.temp_dir.as_posix(), + ] + if subprocess.run(build_cmd).returncode != 0: + raise RuntimeError('Failed to build the test docker image') + + test_cmd = [ + self.docker, 'run', '--rm', '-t', 'meson_test_image', + '/bin/bash', '-c', 'source /ci/env_vars.sh; cd meson; ./run_tests.py $CI_ARGS' + ] + if subprocess.run(test_cmd).returncode != 0: + raise RuntimeError('Running tests failed') + finally: + cleanup_cmd = [self.docker, 'rmi', '-f', 'meson_test_image'] + subprocess.run(cleanup_cmd).returncode + +def main() -> None: + parser = argparse.ArgumentParser(description='Meson CI image builder') + parser.add_argument('what', type=str, help='Which image to build / test') + parser.add_argument('-t', '--type', choices=['build', 'test'], help='What to do', required=True) + + args = parser.parse_args() + + ci_root = Path(__file__).parent + ci_data = ci_root / args.what + + with TemporaryDirectory(prefix=f'{args.type}_{args.what}_', dir=ci_root) as td: + ci_build = Path(td) + print(f'Build dir: {ci_build}') + + if args.type == 'build': + builder = Builder(ci_data, ci_build) + builder.do_build() + elif args.type == 'test': + tester = ImageTester(ci_data, ci_build, ci_root) + tester.do_test() + +if __name__ == '__main__': + main() diff --git a/meson/ci/ciimage/common.sh b/meson/ci/ciimage/common.sh new file mode 100644 index 000000000..c8940df6d --- /dev/null +++ b/meson/ci/ciimage/common.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +### +### Common functions for CI builder files. +### All functions can be accessed in install.sh via: +### +### $ source /ci/common.sh +### + +set -e + +dub_fetch() { + set +e + for (( i=1; i<=24; ++i )); do + dub fetch "$@" + (( $? == 0 )) && break + + echo "Dub Fetch failed. Retrying in $((i*5))s" + sleep $((i*5)) + done + set -e +} diff --git a/meson/ci/ciimage/cuda/image.json b/meson/ci/ciimage/cuda/image.json new file mode 100644 index 000000000..d395e7197 --- /dev/null +++ b/meson/ci/ciimage/cuda/image.json @@ -0,0 +1,5 @@ +{ + "base_image": "archlinux:latest", + "args": ["--only", "cuda"], + "env": {} +} diff --git a/meson/ci/ciimage/cuda/install.sh b/meson/ci/ciimage/cuda/install.sh new file mode 100755 index 000000000..011099d49 --- /dev/null +++ b/meson/ci/ciimage/cuda/install.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e + +pkgs=( + python python-setuptools python-wheel python-pytest-xdist python-jsonschema + ninja gcc gcc-objc git cmake + cuda zlib pkgconf +) + +PACMAN_OPTS='--needed --noprogressbar --noconfirm' + +pacman -Syu $PACMAN_OPTS "${pkgs[@]}" + +# Manually remove cache to avoid GitHub space restrictions +rm -rf /var/cache/pacman + +echo "source /etc/profile.d/cuda.sh" >> /ci/env_vars.sh diff --git a/meson/ci/ciimage/eoan/image.json b/meson/ci/ciimage/eoan/image.json new file mode 100644 index 000000000..cbf86c30d --- /dev/null +++ b/meson/ci/ciimage/eoan/image.json @@ -0,0 +1,7 @@ +{ + "base_image": "ubuntu:eoan", + "env": { + "CI": "1", + "DC": "gdc" + } +} diff --git a/meson/ci/ciimage/eoan/install.sh b/meson/ci/ciimage/eoan/install.sh new file mode 100755 index 000000000..36dec72b1 --- /dev/null +++ b/meson/ci/ciimage/eoan/install.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +set -e + +source /ci/common.sh + +export DEBIAN_FRONTEND=noninteractive +export LANG='C.UTF-8' +export DC=gdc + +pkgs=( + python3-pytest-xdist + python3-pip libxml2-dev libxslt1-dev libyaml-dev libjson-glib-dev + python3-lxml + wget unzip + qt5-default clang + pkg-config-arm-linux-gnueabihf + qt4-linguist-tools + python-dev + libomp-dev + llvm lcov + dub ldc + mingw-w64 mingw-w64-tools nim + libclang-dev + libgcrypt20-dev + libgpgme-dev + libhdf5-dev + libboost-python-dev libboost-regex-dev + libblocksruntime-dev + libperl-dev + liblapack-dev libscalapack-mpi-dev +) + +sed -i '/^#\sdeb-src /s/^#//' "/etc/apt/sources.list" +apt-get -y update +apt-get -y upgrade +apt-get -y install eatmydata + +# Base stuff +eatmydata apt-get -y build-dep meson + +# packages +eatmydata apt-get -y install "${pkgs[@]}" +eatmydata apt-get -y install --no-install-recommends wine-stable # Wine is special + +eatmydata python3 -m pip install hotdoc codecov gcovr jsonschema + +# dub stuff +dub_fetch urld +dub build urld --compiler=gdc +dub_fetch dubtestproject +dub build dubtestproject:test1 --compiler=ldc2 +dub build dubtestproject:test2 --compiler=ldc2 + +# cleanup +apt-get -y clean +apt-get -y autoclean diff --git a/meson/ci/ciimage/eoan/test.sh b/meson/ci/ciimage/eoan/test.sh new file mode 100755 index 000000000..d00df6f99 --- /dev/null +++ b/meson/ci/ciimage/eoan/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +testFN() { + set +e + false +} + +testFN +false +exit 0 \ No newline at end of file diff --git a/meson/ci/ciimage/fedora/image.json b/meson/ci/ciimage/fedora/image.json new file mode 100644 index 000000000..ae9ff4f20 --- /dev/null +++ b/meson/ci/ciimage/fedora/image.json @@ -0,0 +1,8 @@ +{ + "base_image": "fedora:latest", + "env": { + "CI": "1", + "SKIP_SCIENTIFIC": "1", + "SKIP_STATIC_BOOST": "1" + } +} diff --git a/meson/ci/ciimage/fedora/install.sh b/meson/ci/ciimage/fedora/install.sh new file mode 100755 index 000000000..3beb11cf0 --- /dev/null +++ b/meson/ci/ciimage/fedora/install.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -e + +pkgs=( + python python-setuptools python-wheel python-pip python-pytest-xdist pygobject3 python3-devel python2-devel + ninja-build make git autoconf automake patch python3-Cython python2-Cython python3-jsonschema + elfutils gcc gcc-c++ gcc-fortran gcc-objc gcc-objc++ vala rust bison flex ldc libasan libasan-static + mono-core boost-devel gtkmm30 gtest-devel gmock-devel protobuf-devel wxGTK3-devel gobject-introspection + boost-python3-devel boost-python2-devel + itstool gtk3-devel java-latest-openjdk-devel gtk-doc llvm-devel clang-devel SDL2-devel graphviz-devel zlib zlib-devel zlib-static + #hdf5-openmpi-devel hdf5-devel netcdf-openmpi-devel netcdf-devel netcdf-fortran-openmpi-devel netcdf-fortran-devel scalapack-openmpi-devel + doxygen vulkan-devel vulkan-validation-layers-devel openssh mercurial gtk-sharp2-devel libpcap-devel gpgme-devel + qt5-qtbase-devel qt5-qttools-devel qt5-linguist qt5-qtbase-private-devel + libwmf-devel valgrind cmake openmpi-devel nasm gnustep-base-devel gettext-devel ncurses-devel + libxml2-devel libxslt-devel libyaml-devel glib2-devel json-glib-devel python3-lxml +) + +# Sys update +dnf -y upgrade + +# Install deps +dnf -y install "${pkgs[@]}" +python3 -m pip install hotdoc gcovr gobject PyGObject + +# Cleanup +dnf -y clean all diff --git a/meson/ci/ciimage/opensuse/image.json b/meson/ci/ciimage/opensuse/image.json new file mode 100644 index 000000000..74acbe54e --- /dev/null +++ b/meson/ci/ciimage/opensuse/image.json @@ -0,0 +1,9 @@ +{ + "base_image": "opensuse/tumbleweed:latest", + "env": { + "CI": "1", + "SKIP_SCIENTIFIC": "1", + "SKIP_STATIC_BOOST": "1", + "SINGLE_DUB_COMPILER": "1" + } +} diff --git a/meson/ci/ciimage/opensuse/install.sh b/meson/ci/ciimage/opensuse/install.sh new file mode 100755 index 000000000..4c8e7705e --- /dev/null +++ b/meson/ci/ciimage/opensuse/install.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -e + +source /ci/common.sh + +pkgs=( + python3-setuptools python3-wheel python3-pip python3-pytest-xdist python3 python3-lxml + ninja make git autoconf automake patch python3-Cython python3-jsonschema + elfutils gcc gcc-c++ gcc-fortran gcc-objc gcc-obj-c++ vala rust bison flex curl lcov + mono-core gtkmm3-devel gtest gmock protobuf-devel wxGTK3-3_2-devel gobject-introspection-devel + itstool gtk3-devel java-15-openjdk-devel gtk-doc llvm-devel clang-devel libSDL2-devel graphviz-devel zlib-devel zlib-devel-static + #hdf5-devel netcdf-devel libscalapack2-openmpi3-devel libscalapack2-gnu-openmpi3-hpc-devel openmpi3-devel + doxygen vulkan-devel vulkan-validationlayers openssh mercurial gtk-sharp3-complete gtk-sharp2-complete libpcap-devel libgpgme-devel + libqt5-qtbase-devel libqt5-qttools-devel libqt5-linguist libqt5-qtbase-private-headers-devel + libwmf-devel valgrind cmake nasm gnustep-base-devel gettext-tools gettext-runtime gettext-csharp ncurses-devel + libxml2-devel libxslt-devel libyaml-devel glib2-devel json-glib-devel + boost-devel libboost_date_time-devel libboost_filesystem-devel libboost_locale-devel libboost_system-devel + libboost_test-devel libboost_log-devel libboost_regex-devel + libboost_python3-devel libboost_regex-devel +) + +# Sys update +zypper --non-interactive patch --with-update --with-optional +zypper --non-interactive update + +# Install deps +zypper install -y "${pkgs[@]}" +python3 -m pip install hotdoc gcovr gobject PyGObject + +echo 'export PKG_CONFIG_PATH="/usr/lib64/mpi/gcc/openmpi3/lib64/pkgconfig:$PKG_CONFIG_PATH"' >> /ci/env_vars.sh + +# dmd is very special on OpenSUSE (as in the packages do not work) +# see https://bugzilla.opensuse.org/show_bug.cgi?id=1162408 +curl -fsS https://dlang.org/install.sh | bash -s dmd | tee dmd_out.txt +cat dmd_out.txt | grep source | sed 's/^[^`]*`//g' | sed 's/`.*//g' >> /ci/env_vars.sh +chmod +x /ci/env_vars.sh + +source /ci/env_vars.sh + +dub_fetch urld +dub build urld --compiler=dmd +dub_fetch dubtestproject +dub build dubtestproject:test1 --compiler=dmd +dub build dubtestproject:test2 --compiler=dmd + +# Cleanup +zypper --non-interactive clean --all diff --git a/meson/ci/run.ps1 b/meson/ci/run.ps1 new file mode 100644 index 000000000..5065b8749 --- /dev/null +++ b/meson/ci/run.ps1 @@ -0,0 +1,78 @@ +python ./skip_ci.py --base-branch-env=SYSTEM_PULLREQUEST_TARGETBRANCH --is-pull-env=SYSTEM_PULLREQUEST_PULLREQUESTID --base-branch-origin +if ($LastExitCode -ne 0) { + exit 0 +} + +# remove Chocolately, MinGW, Strawberry Perl from path, so we don't find gcc/gfortran and try to use it +# remove PostgreSQL from path so we don't pickup a broken zlib from it +$env:Path = ($env:Path.Split(';') | Where-Object { $_ -notmatch 'mingw|Strawberry|Chocolatey|PostgreSQL' }) -join ';' + +# Rust puts its shared stdlib in a secret place, but it is needed to run tests. +$env:Path += ";$HOME/.rustup/toolchains/stable-x86_64-pc-windows-msvc/bin" + +# Set the CI env var for the meson test framework +$env:CI = '1' + +# download and install prerequisites +function DownloadFile([String] $Source, [String] $Destination) { + $retries = 10 + echo "Downloading $Source" + for ($i = 1; $i -le $retries; $i++) { + try { + (New-Object net.webclient).DownloadFile($Source, $Destination) + break # succeeded + } catch [net.WebException] { + if ($i -eq $retries) { + throw # fail on last retry + } + $backoff = (10 * $i) # backoff 10s, 20s, 30s... + echo ('{0}: {1}' -f $Source, $_.Exception.Message) + echo ('Retrying in {0}s...' -f $backoff) + Start-Sleep -m ($backoff * 1000) + } + } +} + + +if ($env:backend -eq 'ninja') { $dmd = $true } else { $dmd = $false } + +DownloadFile -Source https://github.com/mesonbuild/cidata/releases/download/ci2/ci_data.zip -Destination $env:AGENT_WORKFOLDER\ci_data.zip +echo "Extracting ci_data.zip" +Expand-Archive $env:AGENT_WORKFOLDER\ci_data.zip -DestinationPath $env:AGENT_WORKFOLDER\ci_data +& "$env:AGENT_WORKFOLDER\ci_data\install.ps1" -Arch $env:arch -Compiler $env:compiler -Boost $true -DMD $dmd + + +echo "=== PATH BEGIN ===" +echo ($env:Path).Replace(';',"`n") +echo "=== PATH END ===" +echo "" + +$progs = @("python","ninja","pkg-config","cl","rc","link") +foreach ($prog in $progs) { + echo "" + echo "Locating ${prog}:" + where.exe $prog +} + +echo "" +echo "Ninja / MSBuld version:" +if ($env:backend -eq 'ninja') { + ninja --version +} else { + MSBuild /version +} + +echo "" +echo "Python version:" +python --version + +# Needed for running unit tests in parallel. +echo "" +python -m pip --disable-pip-version-check install --upgrade pefile pytest-xdist jsonschema + +echo "" +echo "=== Start running tests ===" +# Starting from VS2019 Powershell(?) will fail the test run +# if it prints anything to stderr. Python's test runner +# does that by default so we need to forward it. +cmd /c 'python 2>&1' run_tests.py --backend $env:backend diff --git a/meson/ci/travis_install.sh b/meson/ci/travis_install.sh new file mode 100755 index 000000000..d9d308a45 --- /dev/null +++ b/meson/ci/travis_install.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e + +msg() { echo -e "\x1b[1;32mINFO: \x1b[37m$*\x1b[0m"; } + +if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then + msg "Running OSX setup" + brew update + # Run one macOS build with pkg-config available (pulled in by qt), and the + # other (unity=on) without pkg-config + brew install qt ldc llvm ninja + if [[ "$MESON_ARGS" =~ .*unity=on.* ]]; then + which pkg-config && rm -f $(which pkg-config) + fi + python3 -m pip install jsonschema +elif [[ "$TRAVIS_OS_NAME" == "linux" ]]; then + msg "Running Linux setup" + docker pull mesonbuild/eoan +fi + +msg "Setup finished" diff --git a/meson/ci/travis_script.sh b/meson/ci/travis_script.sh new file mode 100755 index 000000000..bdfd4c202 --- /dev/null +++ b/meson/ci/travis_script.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -e + +msg() { echo -e "\x1b[1;32mINFO: \x1b[37m$*\x1b[0m"; } + +if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then + # TODO enable coverage + #curl -s https://codecov.io/bash > upload.sh + #chmod +x upload.sh + + # We need to copy the current checkout inside the Docker container, + # because it has the MR id to be tested checked out. + + msg "Generating runner:" + cat < + + + + The Meson Build System + https://github.com/mesonbuild/meson + + + Install the given project via Meson + Authentication is required to install this project + preferences-system + + no + no + auth_admin_keep + + /usr/bin/python3 + /usr/bin/meson + + + diff --git a/meson/data/macros.meson b/meson/data/macros.meson new file mode 100644 index 000000000..cc4953c5f --- /dev/null +++ b/meson/data/macros.meson @@ -0,0 +1,45 @@ +%__meson %{_bindir}/meson +%__meson_wrap_mode nodownload +%__meson_auto_features enabled + +%meson \ + %set_build_flags \ + %{shrink:%{__meson} \ + --buildtype=plain \ + --prefix=%{_prefix} \ + --libdir=%{_libdir} \ + --libexecdir=%{_libexecdir} \ + --bindir=%{_bindir} \ + --sbindir=%{_sbindir} \ + --includedir=%{_includedir} \ + --datadir=%{_datadir} \ + --mandir=%{_mandir} \ + --infodir=%{_infodir} \ + --localedir=%{_datadir}/locale \ + --sysconfdir=%{_sysconfdir} \ + --localstatedir=%{_localstatedir} \ + --sharedstatedir=%{_sharedstatedir} \ + --wrap-mode=%{__meson_wrap_mode} \ + --auto-features=%{__meson_auto_features} \ + %{_vpath_srcdir} %{_vpath_builddir} \ + %{nil}} + +%meson_build \ + %{shrink:%{__meson} compile \ + -C %{_vpath_builddir} \ + -j %{_smp_build_ncpus} \ + --verbose \ + %{nil}} + +%meson_install \ + %{shrink:DESTDIR=%{buildroot} %{__meson} install \ + -C %{_vpath_builddir} \ + --no-rebuild \ + %{nil}} + +%meson_test \ + %{shrink:%{__meson} test \ + -C %{_vpath_builddir} \ + --num-processes %{_smp_build_ncpus} \ + --print-errorlogs \ + %{nil}} diff --git a/meson/data/schema.xsd b/meson/data/schema.xsd new file mode 100644 index 000000000..58c6bfde8 --- /dev/null +++ b/meson/data/schema.xsd @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/meson/data/shell-completions/bash/meson b/meson/data/shell-completions/bash/meson new file mode 100644 index 000000000..993885bbd --- /dev/null +++ b/meson/data/shell-completions/bash/meson @@ -0,0 +1,416 @@ +_meson() { + command="${COMP_WORDS[1]}" + case "$command" in + setup |\ + configure |\ + install |\ + introspect |\ + init |\ + test |\ + wrap |\ + subprojects |\ + help) + _meson-$command "${COMP_WORDS[@]:1}" + ;; + *) + _meson-setup "${COMP_WORDS[@]}" + ;; + esac +} && +complete -F _meson meson + +_meson_complete_option() { + option_string=$1 + + if [[ $# -eq 2 ]] && ! [[ "$option_string" == *=* ]]; then + option_string="$option_string=$2" + fi + + if [[ "$option_string" == *=* ]]; then + _meson_complete_option_value "$option_string" + else + _meson_complete_option_name "$option_string" + fi +} + +_meson_complete_option_name() { + option=$1 + options=($(python3 -c 'import sys, json +for option in json.load(sys.stdin): + print(option["name"]) +' <<< "$(_meson_get_options)")) + compopt -o nospace + COMPREPLY=($(compgen -W '${options[@]}' -S= -- "$option")) +} + +_meson_complete_option_value() { + cur=$1 + option_name=${cur%%=*} + option_value=${cur#*=} + + if _meson_complete_filedir "$option_name" "$option_value"; then + return + fi + +# TODO: support all the option types + options=($(python3 -c 'import sys, json +for option in json.load(sys.stdin): + if option["name"] != "'$option_name'": + continue + choices = [] + if option["type"] == "boolean": + choices.append("true") + choices.append("false") + elif option["type"] == "combo": + for choice in option["choices"]: + choices.append(choice) + for choice in choices: + if choice.startswith("'$cur'"): + print(choice) +' <<< "$(_meson_get_options)")) + COMPREPLY=("${options[@]}") +} + +_meson_get_options() { + local options + for builddir in "${COMP_WORDS[@]}"; do + if [ -d "$builddir" ]; then + break + fi + builddir=. + done + options=$(meson introspect "$builddir" --buildoptions 2>/dev/null) && + echo "$options" || + echo '[]' +} + +_meson_complete_filedir() { + _filedir_in() { + pushd "$1" &>/dev/null + local COMPREPLY=() + _filedir + echo "${COMPREPLY[@]}" + popd &>/dev/null + } + + option=$1 + cur=$2 + case $option in + prefix |\ + libdir |\ + libexecdir |\ + bindir |\ + sbindir |\ + includedir |\ + datadir |\ + mandir |\ + infodir |\ + localedir |\ + sysconfdir |\ + localstatedir |\ + sharedstatedir) + _filedir -d + ;; + cross-file) + _filedir + COMPREPLY+=($(_filedir_in "$XDG_DATA_DIRS"/meson/cross)) + COMPREPLY+=($(_filedir_in /usr/local/share/meson/cross)) + COMPREPLY+=($(_filedir_in /usr/share/meson/cross)) + COMPREPLY+=($(_filedir_in "$XDG_DATA_HOME"/meson/cross)) + COMPREPLY+=($(_filedir_in ~/.local/share/meson/cross)) + ;; + *) + return 1;; + esac + return 0 +} + +_meson-setup() { + + shortopts=( + h + D + v + ) + + longopts=( + help + prefix + libdir + libexecdir + bindir + sbindir + includedir + datadir + mandir + infodir + localedir + sysconfdir + localstatedir + sharedstatedir + backend + buildtype + strip + unity + werror + layout + default-library + warnlevel + stdsplit + errorlogs + cross-file + version + wrap-mode + ) + + local cur prev + if _get_comp_words_by_ref cur prev &>/dev/null && + [ "${prev:0:2}" = '--' ] && _meson_complete_option "${prev:2}" "$cur"; then + return + elif _get_comp_words_by_ref cur prev &>/dev/null && + [ "${prev:0:1}" = '-' ] && [ "${prev:1:2}" != '-' ] && _meson_complete_option "${prev:1}"; then + return + elif _get_comp_words_by_ref -n '=' cur prev &>/dev/null; then + if [ $prev == -D ]; then + _meson_complete_option "$cur" + return + fi + else + cur="${COMP_WORDS[COMP_CWORD]}" + fi + + if [[ "$cur" == "--"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + elif [[ "$cur" == "-"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}' -- "${cur:1}")) + else + _filedir -d + if [ -z "$cur" ]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}')) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}')) + fi + + if [ $COMP_CWORD -eq 1 ]; then + COMPREPLY+=($(compgen -W 'setup configure test introspect' -- "$cur")) + fi + fi +} + +_meson-configure() { + + shortopts=( + h + D + ) + + longopts=( + help + clearcache + ) + + local cur prev + if _get_comp_words_by_ref -n '=' cur prev &>/dev/null; then + if [ $prev == -D ]; then + _meson_complete_option "$cur" + return + fi + else + cur="${COMP_WORDS[COMP_CWORD]}" + fi + + if [[ "$cur" == "--"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + elif [[ "$cur" == "-"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}' -- "${cur:1}")) + else + for dir in "${COMP_WORDS[@]}"; do + if [ -d "$dir" ]; then + break + fi + dir=. + done + if [ ! -d "$dir/meson-private" ]; then + _filedir -d + fi + + if [ -z "$cur" ]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}')) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}')) + fi + fi +} + +_meson-test() { + shortopts=( + q + v + t + C + ) + + longopts=( + quiet + verbose + timeout-multiplier + repeat + no-rebuild + gdb + list + wrapper --wrap + no-suite + suite + no-stdsplit + print-errorlogs + benchmark + logbase + num-processes + setup + test-args + ) + + local cur prev + if _get_comp_words_by_ref -n ':' cur prev &>/dev/null; then + case $prev in + --repeat) + # number, can't be completed + return + ;; + --wrapper) + _command_offset $COMP_CWORD + return + ;; + -C) + _filedir -d + return + ;; + --suite | --no-suite) + for i in "${!COMP_WORDS[@]}"; do + opt="${COMP_WORDS[i]}" + dir="${COMP_WORDS[i+1]}" + case "$opt" in + -C) + break + ;; + esac + dir=. + done + suites=($(python3 -c 'import sys, json; +for test in json.load(sys.stdin): + for suite in test["suite"]: + print(suite) + ' <<< "$(meson introspect "$dir" --tests)")) +# TODO + COMPREPLY+=($(compgen -W "${suites[*]}" -- "$cur")) + return + ;; + --logbase) + # free string, can't be completed + return + ;; + --num-processes) + # number, can't be completed + return + ;; + -t | --timeout-multiplier) + # number, can't be completed + return + ;; + --setup) + # TODO + return + ;; + --test-args) + return + ;; + esac + else + cur="${COMP_WORDS[COMP_CWORD]}" + fi + + if [[ "$cur" == "--"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + elif [[ "$cur" == "-"* && ${#cur} -gt 1 ]]; then + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}' -- "${cur:1}")) + else + for dir in "${COMP_WORDS[@]}"; do + if [ -d "$dir" ]; then + break + fi + dir=. + done + if [ ! -d "$dir/meson-private" ]; then + _filedir -d + fi + + for i in "${!COMP_WORDS[@]}"; do + opt="${COMP_WORDS[i]}" + dir="${COMP_WORDS[i+1]}" + case "$opt" in + -C) + break + ;; + esac + dir=. + done + tests=($(python3 -c 'import sys, json; +for test in json.load(sys.stdin): + print(test["name"]) +' <<< "$(meson introspect "$dir" --tests)")) + COMPREPLY+=($(compgen -W "${tests[*]}" -- "$cur")) + + if [ -z "$cur" ]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}' -- "${cur:1}")) + fi + fi +} + +_meson-introspect() { + shortopts=( + h + ) + + longopts=( + targets + installed + buildsystem-files + buildoptions + tests + benchmarks + dependencies + projectinfo + ) + + local cur prev + if ! _get_comp_words_by_ref cur prev &>/dev/null; then + cur="${COMP_WORDS[COMP_CWORD]}" + fi + + if [[ "$cur" == "--"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + elif [[ "$cur" == "-"* ]]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}' -- "${cur:2}")) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}' -- "${cur:1}")) + else + for dir in "${COMP_WORDS[@]}"; do + if [ -d "$dir" ]; then + break + fi + dir=. + done + if [ ! -d "$dir/meson-private" ]; then + _filedir -d + fi + + if [ -z "$cur" ]; then + COMPREPLY+=($(compgen -P '--' -W '${longopts[*]}')) + COMPREPLY+=($(compgen -P '-' -W '${shortopts[*]}')) + fi + fi +} + +_meson-wrap() { + : TODO +} diff --git a/meson/data/shell-completions/zsh/_meson b/meson/data/shell-completions/zsh/_meson new file mode 100644 index 000000000..e7fe968a0 --- /dev/null +++ b/meson/data/shell-completions/zsh/_meson @@ -0,0 +1,212 @@ +#compdef meson mesonconf=meson-configure mesontest=meson-test mesonintrospect=meson-introspect + +# vim:ts=2 sw=2 + +# Copyright (c) 2017 Arseny Maslennikov +# All rights reserved. Individual authors, whether or not +# specifically named, retain copyright in all changes; in what follows, they +# are referred to as `the Meson development team'. This is for convenience +# only and this body has no legal status. This file is distributed under +# the following licence. +# +# Permission is hereby granted, without written agreement and without +# licence or royalty fees, to use, copy, modify, and distribute this +# software and to distribute modified versions of this software for any +# purpose, provided that the above copyright notice and the following +# two paragraphs appear in all copies of this software. +# +# In no event shall the Meson development team be liable to any party for +# direct, indirect, special, incidental, or consequential damages arising out +# of the use of this software and its documentation, even if the Meson +# development team have been advised of the possibility of such damage. +# +# The Meson development team specifically disclaim any warranties, including, +# but not limited to, the implied warranties of merchantability and fitness +# for a particular purpose. The software provided hereunder is on an "as is" +# basis, and the Meson development team have no obligation to provide +# maintenance, support, updates, enhancements, or modifications. + +local curcontext="$curcontext" state line +local -i ret + +local __meson_backends="(ninja xcode ${(j. .)${:-vs{,2010,2015,2017}}})" +local __meson_build_types="(plain debug debugoptimized minsize release)" +local __meson_wrap_modes="(WrapMode.{default,nofallback,nodownload,forcefallback})" + +local -a meson_commands=( +'setup:set up a build directory' +'configure:configure a project' +'test:run tests' +'introspect:query project properties' +'wrap:manage source dependencies' +) + +(( $+functions[__meson_is_build_dir] )) || __meson_is_build_dir() { + local mpd="${1:-$PWD}/meson-private" + [[ -f "$mpd/build.dat" && -f "$mpd/coredata.dat" ]] + return $? +} + +# TODO: implement build option completion +(( $+functions[__meson_build_options] )) || __meson_build_options() {} + +# `meson introspect` currently can provide that information in JSON. +# We can: +# 1) pipe its output to python3 -m json.tool | grep "$alovelyregex" | cut <...> +# 2) teach mintro.py to use a different output format +# (or perhaps just to select the fields printed) + +(( $+functions[__meson_test_names] )) || __meson_test_names() { + local rtests + if rtests="$(_call_program meson meson test ${opt_args[-C]:+-C "$opt_args[-C]"} --list)"; + then + local -a tests=(${(@f)rtests}) + _describe -t "tests" "Meson tests" tests + else + _message -r "current working directory is not a build directory" + _message -r 'use -C $build_dir or cd $build_dir' + fi +} + +(( $+functions[_meson_commands] )) || _meson_commands() { + _describe -t commands "Meson subcommands" meson_commands +} + +(( $+functions[_meson-setup] )) || _meson-setup() { + local firstd secondd + if [[ -f "meson.build" ]]; then + # if there's no second argument on the command line + # cwd will implicitly be substituted: + # - as the source directory if it has a file with the name "meson.build"; + # - as the build directory otherwise + # more info in mesonbuild/mesonmain.py + firstd="build" + secondd="source" + else + firstd="source" + secondd="build" + fi + + _arguments \ + '*-D-[set the value of a build option]:build option:__meson_build_options' \ + '--prefix=[installation prefix]: :_directories' \ + '--libdir=[library directory]: :_directories' \ + '--libexecdir=[library executable directory]: :_directories' \ + '--bindir=[executable directory]: :_directories' \ + '--sbindir=[system executable directory]: :_directories' \ + '--includedir=[header file directory]: :_directories' \ + '--datadir=[data file directory]: :_directories' \ + '--mandir=[manual page directory]: :_directories' \ + '--infodir=[info page directory]: :_directories' \ + '--localedir=[locale data directory]: :_directories' \ + '--sysconfdir=[system configuration directory]: :_directories' \ + '--localstatedir=[local state data directory]: :_directories' \ + '--sharedstatedir=[arch-independent data directory]: :_directories' \ + '--backend=[backend to use]:Meson backend:'"$__meson_backends" \ + '--buildtype=[build type to use]:Meson build type:'"$__meson_build_types" \ + '--strip[strip targets on install]' \ + '--unity=[unity builds on/off]:whether to do unity builds:(on off subprojects)' \ + '--werror[treat warnings as errors]' \ + '--layout=[build directory layout]:build directory layout:(flat mirror)' \ + '--default-library=[default library type]:default library type:(shared static)' \ + '--warnlevel=[compiler warning level]:compiler warning level:warning level:(1 2 3)' \ + '--stdsplit=[split stdout and stderr in test logs]' \ + '--errorlogs=[prints the logs from failing tests]' \ + '--cross-file=[cross-compilation environment description]:cross file:_files' \ + '--native-file=[build machine compilation environment description]:native file:_files' \ + '--wrap-mode=[special wrap mode]:wrap mode:'"$__meson_wrap_modes" \ + ":$firstd directory:_directories" \ + "::$secondd directory:_directories" \ + # +} + +(( $+functions[_meson-configure] )) || _meson-configure() { + local curcontext="$curcontext" + # TODO: implement 'mesonconf @file' + local -a specs=( + '--clearcache[clear cached state]' + '*-D-[set the value of a build option]:build option:__meson_build_options' + '::build directory:_directories' + ) + + _arguments \ + '(: -)'{'--help','-h'}'[show a help message and quit]' \ + "${(@)specs}" +} + +(( $+functions[_meson-test] )) || _meson-test() { + local curcontext="$curcontext" + + # TODO: complete test suites + local -a specs=( + '(--quiet -q)'{'--quiet','-q'}'[produce less output to the terminal]' + '(--verbose -v)'{'--verbose','-v'}'[do not redirect stdout and stderr]' + '(--timeout-multiplier -t)'{'--timeout-multiplier','-t'}'[a multiplier for test timeouts]:Python floating-point number: ' + '-C[directory to cd into]: :_directories' + '--repeat[number of times to run the tests]:number of times to repeat: ' + '--no-rebuild[do not rebuild before running tests]' + '--gdb[run tests under gdb]' + '--list[list available tests]' + '(--wrapper --wrap)'{'--wrapper=','--wrap='}'[wrapper to run tests with]:wrapper program:_path_commands' + '(--no-suite)--suite[only run tests from this suite]:test suite: ' + '(--suite)--no-suite[do not run tests from this suite]:test suite: ' + '--no-stdsplit[do not split stderr and stdout in logs]' + '--print-errorlogs[print logs for failing tests]' + '--benchmark[run benchmarks instead of tests]' + '--logbase[base name for log file]:filename: ' + '--num-processes[how many threads to use]:number of processes: ' + '--setup[which test setup to use]:test setup: ' + '--test-args[arguments to pass to the tests]: : ' + '*:Meson tests:__meson_test_names' + ) + + _arguments \ + '(: -)'{'--help','-h'}'[show a help message and quit]' \ + "${(@)specs}" +} + +(( $+functions[_meson-introspect] )) || _meson-introspect() { + local curcontext="$curcontext" + local -a specs=( + '--targets[list top level targets]' + '--installed[list all installed files and directories]' + '--buildsystem-files[list files that belong to the build system]' + '--buildoptions[list all build options]' + '--tests[list all unit tests]' + '--benchmarks[list all benchmarks]' + '--dependencies[list external dependencies]' + '--projectinfo[show project information]' + '::build directory:_directories' + ) +_arguments \ + '(: -)'{'--help','-h'}'[show a help message and quit]' \ + "${(@)specs}" +} + +(( $+functions[_meson-wrap] )) || _meson-wrap() { + # TODO +} + +if [[ $service != meson ]]; then + _call_function ret _$service + return ret +fi + +_arguments -C -R \ + '(: -)'{'--help','-h'}'[show a help message and quit]' \ + '(: -)'{'--version','-v'}'[show version information and quit]' \ + '(-): :_meson_commands' \ + '*:: :->post-command' \ +# +ret=$? + +[[ $ret = 300 ]] && case "$state" in + post-command) + service="meson-$words[1]" + curcontext=${curcontext%:*:*}:$service: + _call_function ret _$service + ;; +esac + +return ret + diff --git a/meson/data/syntax-highlighting/emacs/meson.el b/meson/data/syntax-highlighting/emacs/meson.el new file mode 100644 index 000000000..a640bbe71 --- /dev/null +++ b/meson/data/syntax-highlighting/emacs/meson.el @@ -0,0 +1,31 @@ +;; keywords for syntax coloring +(setq meson-keywords + `( + ( ,(regexp-opt '("elif" "if" "else" "endif" "foreach" "endforeach") 'word) . font-lock-keyword-face) + ) + ) + +;; syntax table +(defvar meson-syntax-table nil "Syntax table for `meson-mode'.") +(setq meson-syntax-table + (let ((synTable (make-syntax-table))) + + ;; bash style comment: “# …” + (modify-syntax-entry ?# "< b" synTable) + (modify-syntax-entry ?\n "> b" synTable) + + synTable)) + +;; define the major mode. +(define-derived-mode meson-mode prog-mode + "meson-mode is a major mode for editing Meson build definition files." + :syntax-table meson-syntax-table + + (setq font-lock-defaults '(meson-keywords)) + (setq mode-name "meson") + (setq-local comment-start "# ") + (setq-local comment-end "")) + +(add-to-list 'auto-mode-alist '("meson.build" . meson-mode)) +(provide 'meson) +;;; meson.el ends here diff --git a/meson/data/syntax-highlighting/vim/README b/meson/data/syntax-highlighting/vim/README new file mode 100644 index 000000000..95188fcc7 --- /dev/null +++ b/meson/data/syntax-highlighting/vim/README @@ -0,0 +1,4 @@ +ftdetect sets the filetype +ftplugin sets Meson indentation rules +indent does Meson indentation +syntax does Meson syntax highlighting diff --git a/meson/data/syntax-highlighting/vim/ftdetect/meson.vim b/meson/data/syntax-highlighting/vim/ftdetect/meson.vim new file mode 100644 index 000000000..3233c5875 --- /dev/null +++ b/meson/data/syntax-highlighting/vim/ftdetect/meson.vim @@ -0,0 +1,3 @@ +au BufNewFile,BufRead meson.build set filetype=meson +au BufNewFile,BufRead meson_options.txt set filetype=meson +au BufNewFile,BufRead *.wrap set filetype=dosini diff --git a/meson/data/syntax-highlighting/vim/ftplugin/meson.vim b/meson/data/syntax-highlighting/vim/ftplugin/meson.vim new file mode 100644 index 000000000..e432ebf19 --- /dev/null +++ b/meson/data/syntax-highlighting/vim/ftplugin/meson.vim @@ -0,0 +1,19 @@ +" Vim filetype plugin file +" Language: meson +" License: VIM License +" Original Author: Laurent Pinchart +" Last Change: 2018 Nov 27 + +if exists("b:did_ftplugin") | finish | endif +let b:did_ftplugin = 1 +let s:keepcpo= &cpo +set cpo&vim + +setlocal commentstring=#\ %s +setlocal comments=:# + +setlocal shiftwidth=2 +setlocal softtabstop=2 + +let &cpo = s:keepcpo +unlet s:keepcpo diff --git a/meson/data/syntax-highlighting/vim/indent/meson.vim b/meson/data/syntax-highlighting/vim/indent/meson.vim new file mode 100644 index 000000000..f73e3b92d --- /dev/null +++ b/meson/data/syntax-highlighting/vim/indent/meson.vim @@ -0,0 +1,180 @@ +" Vim indent file +" Language: Meson +" License: VIM License +" Maintainer: Nirbheek Chauhan +" Original Authors: David Bustos +" Bram Moolenaar +" Last Change: 2015 Feb 23 + +" Only load this indent file when no other was loaded. +if exists("b:did_indent") + finish +endif +let b:did_indent = 1 + +" Some preliminary settings +setlocal nolisp " Make sure lisp indenting doesn't supersede us +setlocal autoindent " indentexpr isn't much help otherwise + +setlocal indentexpr=GetMesonIndent(v:lnum) +setlocal indentkeys+==elif,=else,=endforeach,=endif,0) + +" Only define the function once. +if exists("*GetMesonIndent") + finish +endif +let s:keepcpo= &cpo +setlocal cpo&vim + +" Come here when loading the script the first time. + +let s:maxoff = 50 " maximum number of lines to look backwards for () + +function GetMesonIndent(lnum) + echom getline(line(".")) + + " If this line is explicitly joined: If the previous line was also joined, + " line it up with that one, otherwise add two 'shiftwidth' + if getline(a:lnum - 1) =~ '\\$' + if a:lnum > 1 && getline(a:lnum - 2) =~ '\\$' + return indent(a:lnum - 1) + endif + return indent(a:lnum - 1) + (exists("g:mesonindent_continue") ? eval(g:mesonindent_continue) : (shiftwidth() * 2)) + endif + + " If the start of the line is in a string don't change the indent. + if has('syntax_items') + \ && synIDattr(synID(a:lnum, 1, 1), "name") =~ "String$" + return -1 + endif + + " Search backwards for the previous non-empty line. + let plnum = prevnonblank(v:lnum - 1) + + if plnum == 0 + " This is the first non-empty line, use zero indent. + return 0 + endif + + " If the previous line is inside parenthesis, use the indent of the starting + " line. + " Trick: use the non-existing "dummy" variable to break out of the loop when + " going too far back. + call cursor(plnum, 1) + let parlnum = searchpair('(\|{\|\[', '', ')\|}\|\]', 'nbW', + \ "line('.') < " . (plnum - s:maxoff) . " ? dummy :" + \ . " synIDattr(synID(line('.'), col('.'), 1), 'name')" + \ . " =~ '\\(Comment\\|Todo\\|String\\)$'") + if parlnum > 0 + let plindent = indent(parlnum) + let plnumstart = parlnum + else + let plindent = indent(plnum) + let plnumstart = plnum + endif + + + " When inside parenthesis: If at the first line below the parenthesis add + " a 'shiftwidth', otherwise same as previous line. + " i = (a + " + b + " + c) + call cursor(a:lnum, 1) + let p = searchpair('(\|{\|\[', '', ')\|}\|\]', 'bW', + \ "line('.') < " . (a:lnum - s:maxoff) . " ? dummy :" + \ . " synIDattr(synID(line('.'), col('.'), 1), 'name')" + \ . " =~ '\\(Comment\\|Todo\\|String\\)$'") + if p > 0 + if p == plnum + " When the start is inside parenthesis, only indent one 'shiftwidth'. + let pp = searchpair('(\|{\|\[', '', ')\|}\|\]', 'bW', + \ "line('.') < " . (a:lnum - s:maxoff) . " ? dummy :" + \ . " synIDattr(synID(line('.'), col('.'), 1), 'name')" + \ . " =~ '\\(Comment\\|Todo\\|String\\)$'") + if pp > 0 + return indent(plnum) + (exists("g:pyindent_nested_paren") ? eval(g:pyindent_nested_paren) : shiftwidth()) + endif + return indent(plnum) + (exists("g:pyindent_open_paren") ? eval(g:pyindent_open_paren) : shiftwidth()) + endif + if plnumstart == p + return indent(plnum) + endif + return plindent + endif + + + " Get the line and remove a trailing comment. + " Use syntax highlighting attributes when possible. + let pline = getline(plnum) + let pline_len = strlen(pline) + if has('syntax_items') + " If the last character in the line is a comment, do a binary search for + " the start of the comment. synID() is slow, a linear search would take + " too long on a long line. + if synIDattr(synID(plnum, pline_len, 1), "name") =~ "\\(Comment\\|Todo\\)$" + let min = 1 + let max = pline_len + while min < max + let col = (min + max) / 2 + if synIDattr(synID(plnum, col, 1), "name") =~ "\\(Comment\\|Todo\\)$" + let max = col + else + let min = col + 1 + endif + endwhile + let pline = strpart(pline, 0, min - 1) + endif + else + let col = 0 + while col < pline_len + if pline[col] == '#' + let pline = strpart(pline, 0, col) + break + endif + let col = col + 1 + endwhile + endif + + " If the previous line ended the conditional/loop + if getline(plnum) =~ '^\s*\(endif\|endforeach\)\>\s*' + " Maintain indent + return -1 + endif + + " If the previous line ended with a builtin, indent this line + if pline =~ '^\s*\(foreach\|if\|else\|elif\)\>\s*' + return plindent + shiftwidth() + endif + + " If the current line begins with a header keyword, deindent + if getline(a:lnum) =~ '^\s*\(else\|elif\|endif\|endforeach\)' + + " Unless the previous line was a one-liner + if getline(plnumstart) =~ '^\s*\(foreach\|if\)\>\s*' + return plindent + endif + + " Or the user has already dedented + if indent(a:lnum) <= plindent - shiftwidth() + return -1 + endif + + return plindent - shiftwidth() + endif + + " When after a () construct we probably want to go back to the start line. + " a = (b + " + c) + " here + if parlnum > 0 + return plindent + endif + + return -1 + +endfunction + +let &cpo = s:keepcpo +unlet s:keepcpo + +" vim:sw=2 diff --git a/meson/data/syntax-highlighting/vim/syntax/meson.vim b/meson/data/syntax-highlighting/vim/syntax/meson.vim new file mode 100644 index 000000000..11001136f --- /dev/null +++ b/meson/data/syntax-highlighting/vim/syntax/meson.vim @@ -0,0 +1,167 @@ +" Vim syntax file +" Language: Meson +" License: VIM License +" Maintainer: Nirbheek Chauhan +" Last Change: 2016 Dec 7 +" Credits: Zvezdan Petkovic +" Neil Schemenauer +" Dmitry Vasiliev +" +" This version is copied and edited from python.vim +" It's very basic, and doesn't do many things I'd like it to +" For instance, it should show errors for syntax that is valid in +" Python but not in Meson. +" +" Optional highlighting can be controlled using these variables. +" +" let meson_space_error_highlight = 1 +" + +" For version 5.x: Clear all syntax items. +" For version 6.x: Quit when a syntax file was already loaded. +if version < 600 + syntax clear +elseif exists("b:current_syntax") + finish +endif + +" We need nocompatible mode in order to continue lines with backslashes. +" Original setting will be restored. +let s:cpo_save = &cpo +set cpo&vim + +" http://mesonbuild.com/Syntax.html +syn keyword mesonConditional elif else if endif +syn keyword mesonRepeat foreach endforeach +syn keyword mesonOperator and not or in +syn keyword mesonStatement continue break + +syn match mesonComment "#.*$" contains=mesonTodo,@Spell +syn keyword mesonTodo FIXME NOTE NOTES TODO XXX contained + +" Strings can either be single quoted or triple counted across multiple lines, +" but always with a ' +syn region mesonString + \ start="\z('\)" end="\z1" skip="\\\\\|\\\z1" + \ contains=mesonEscape,@Spell +syn region mesonString + \ start="\z('''\)" end="\z1" keepend + \ contains=mesonEscape,mesonSpaceError,@Spell + +syn match mesonEscape "\\[abfnrtv'\\]" contained +syn match mesonEscape "\\\o\{1,3}" contained +syn match mesonEscape "\\x\x\{2}" contained +syn match mesonEscape "\%(\\u\x\{4}\|\\U\x\{8}\)" contained +" Meson allows case-insensitive Unicode IDs: http://www.unicode.org/charts/ +syn match mesonEscape "\\N{\a\+\%(\s\a\+\)*}" contained +syn match mesonEscape "\\$" + +" Meson only supports integer numbers +" http://mesonbuild.com/Syntax.html#numbers +syn match mesonNumber "\<\d\+\>" + +" booleans +syn keyword mesonConstant false true + +" Built-in functions +syn keyword mesonBuiltin + \ add_global_arguments + \ add_global_link_arguments + \ add_languages + \ add_project_arguments + \ add_project_link_arguments + \ add_test_setup + \ alias_target + \ assert + \ benchmark + \ both_libraries + \ build_machine + \ build_target + \ configuration_data + \ configure_file + \ custom_target + \ declare_dependency + \ dependency + \ disabler + \ environment + \ error + \ executable + \ files + \ find_library + \ find_program + \ generator + \ get_option + \ get_variable + \ gettext + \ host_machine + \ import + \ include_directories + \ install_data + \ install_headers + \ install_man + \ install_subdir + \ is_disabler + \ is_variable + \ jar + \ join_paths + \ library + \ meson + \ message + \ option + \ project + \ run_command + \ run_target + \ set_variable + \ shared_library + \ shared_module + \ static_library + \ subdir + \ subdir_done + \ subproject + \ summary + \ target_machine + \ test + \ vcs_tag + \ warning + +if exists("meson_space_error_highlight") + " trailing whitespace + syn match mesonSpaceError display excludenl "\s\+$" + " mixed tabs and spaces + syn match mesonSpaceError display " \+\t" + syn match mesonSpaceError display "\t\+ " +endif + +if version >= 508 || !exists("did_meson_syn_inits") + if version <= 508 + let did_meson_syn_inits = 1 + command -nargs=+ HiLink hi link + else + command -nargs=+ HiLink hi def link + endif + + " The default highlight links. Can be overridden later. + HiLink mesonStatement Statement + HiLink mesonConditional Conditional + HiLink mesonRepeat Repeat + HiLink mesonOperator Operator + HiLink mesonComment Comment + HiLink mesonTodo Todo + HiLink mesonString String + HiLink mesonEscape Special + HiLink mesonNumber Number + HiLink mesonBuiltin Function + HiLink mesonConstant Number + if exists("meson_space_error_highlight") + HiLink mesonSpaceError Error + endif + + delcommand HiLink +endif + +let b:current_syntax = "meson" + +let &cpo = s:cpo_save +unlet s:cpo_save + +" vim:set sw=2 sts=2 ts=8 noet: diff --git a/meson/data/test.schema.json b/meson/data/test.schema.json new file mode 100644 index 000000000..aa3cf8fec --- /dev/null +++ b/meson/data/test.schema.json @@ -0,0 +1,131 @@ +{ + "type": "object", + "additionalProperties": false, + "properties": { + "env": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "installed": { + "type": "array", + "items": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "file", + "dir", + "exe", + "shared_lib", + "pdb", + "implib", + "implibempty", + "expr" + ] + }, + "platform": { + "type": "string", + "enum": [ + "msvc", + "gcc", + "cygwin", + "!cygwin" + ] + }, + "version": { + "type": "string" + }, + "language": { + "type": "string" + } + }, + "required": [ + "file", + "type" + ] + } + }, + "matrix": { + "type": "object", + "additionalProperties": { + "properties": { + "options": { + "type": "array", + "items": { + "type": "object", + "properties": { + "val": { + "type": "string" + }, + "compilers": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "skip_on_env": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "val" + ] + } + }, + "exclude": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "do_not_set_opts": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "libdir", + "prefix" + ] + } + }, + "tools": { + "type": "object" + }, + "stdout": { + "type": "array", + "items": { + "type": "object", + "properties": { + "line": { + "type": "string" + }, + "match": { + "type": "string", + "enum": [ + "literal", + "re" + ] + } + }, + "required": [ + "line" + ] + } + } + } +} diff --git a/meson/docs/.editorconfig b/meson/docs/.editorconfig new file mode 100644 index 000000000..b5276f173 --- /dev/null +++ b/meson/docs/.editorconfig @@ -0,0 +1,2 @@ +[sitemap.txt] +indent_style = tab diff --git a/meson/docs/README.md b/meson/docs/README.md new file mode 100644 index 000000000..55fc3ec81 --- /dev/null +++ b/meson/docs/README.md @@ -0,0 +1,40 @@ +# Meson Documentation + +## Build dependencies + +Meson uses itself and [hotdoc](https://github.com/hotdoc/hotdoc) for generating documentation. + +Minimum required version of hotdoc is *0.8.9*. + +Instructions on how to install hotdoc are [here](https://hotdoc.github.io/installing.html). + +## Building the documentation + +From the Meson repository root dir: +``` +$ cd docs/ +$ meson built_docs +$ ninja -C built_docs/ upload +``` +Now you should be able to open the documentation locally +``` +built_docs/Meson documentation-doc/html/index.html +``` + +## Upload + +Meson uses the git-upload hotdoc plugin which basically +removes the html pages and replaces with the new content. + +You can simply run: +``` +$ ninja -C built_docs/ upload +``` + +## Contributing to the documentation + +Commits that only change documentation should have `[skip ci]` in their commit message, so CI is not run (it is quite slow). +For example: +``` +A commit message [skip ci] +``` diff --git a/meson/docs/genrelnotes.py b/meson/docs/genrelnotes.py new file mode 100755 index 000000000..70d891514 --- /dev/null +++ b/meson/docs/genrelnotes.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# Copyright 2019 The Meson development team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' + Generates release notes for new releases of Meson build system +''' +import subprocess +import sys +import os +from glob import glob + +RELNOTE_TEMPLATE = '''--- +title: Release {} +short-description: Release notes for {} +... + +# New features + +''' + + +def add_to_sitemap(from_version, to_version): + ''' + Adds release note entry to sitemap.txt. + ''' + sitemapfile = '../sitemap.txt' + s_f = open(sitemapfile) + lines = s_f.readlines() + s_f.close() + with open(sitemapfile, 'w') as s_f: + for line in lines: + if 'Release-notes' in line and from_version in line: + new_line = line.replace(from_version, to_version) + s_f.write(new_line) + s_f.write(line) + +def generate(from_version, to_version): + ''' + Generate notes for Meson build next release. + ''' + ofilename = 'Release-notes-for-{}.md'.format(to_version) + with open(ofilename, 'w') as ofile: + ofile.write(RELNOTE_TEMPLATE.format(to_version, to_version)) + for snippetfile in glob('snippets/*.md'): + snippet = open(snippetfile).read() + ofile.write(snippet) + if not snippet.endswith('\n'): + ofile.write('\n') + ofile.write('\n') + subprocess.check_call(['git', 'rm', snippetfile]) + subprocess.check_call(['git', 'add', ofilename]) + add_to_sitemap(from_version, to_version) + +if __name__ == '__main__': + if len(sys.argv) != 3: + print(sys.argv[0], 'from_version to_version') + sys.exit(1) + FROM_VERSION = sys.argv[1] + TO_VERSION = sys.argv[2] + os.chdir('markdown') + generate(FROM_VERSION, TO_VERSION) diff --git a/meson/docs/markdown/ARM-performance-test.md b/meson/docs/markdown/ARM-performance-test.md new file mode 100644 index 000000000..4be6e4aab --- /dev/null +++ b/meson/docs/markdown/ARM-performance-test.md @@ -0,0 +1,81 @@ +# Arm performance test + +Performance differences in build systems become more apparent on +slower platforms. To examine this difference we compared the +performance of Meson with GNU Autotools. We took the GLib software +project and rewrote its build setup with Meson. GLib was chosen +because it is a relatively large C code base which requires lots of +low level configuration. + +The Meson version of the build system is not fully equivalent to the +original Autotools one. It does not do all the same configuration +steps and does not build all the same targets. The biggest missing +piece being internationalisation support with Gettext. However it does +configure the system enough to build all C source and run all unit +tests. + +All measurements were done on a Nexus 4 smart phone running the latest +Ubuntu touch image (updated on September 9th 2013). + +Measurements +------ + +The first thing we measured was the time it took to run the configure step. + +![GLib config time](images/glib_conf.png) + +Meson takes roughly 20 seconds whereas Autotools takes 220. This is a +difference of one order of magnitude. Autotools' time contains both +autogen and configure. Again it should be remembered that Meson does +not do all the configure steps that Autotools does. It does do about +90% of them and it takes just 10% of the time to do it. + +Then we measured the build times. Two parallel compilation processes +were used for both systems. + +![GLib build time](images/glib_build.png) + +On desktop machines Ninja based build systems are 10-20% faster than +Make based ones. On this platform the difference grows to 50%. The +difference is probably caused by Make's inefficient disk access +patterns. Ninja is better at keeping both cores running all the time +which yields impressive performance improvements. + +![GLib no-op time](images/glib_empty.png) + +Next we measured the "empty build" case. That is, how long does it +take for the build system to detect that no changes need to be +made. This is one of the most important metrics of build systems +because it places a hard limit on how fast you can iterate on your +code. Autotools takes 14 seconds to determine that no work needs to be +done. Meson (or, rather, Ninja) takes just one quarter of a second. + +![GLib link time](images/glib_link.png) + +One step which takes quite a lot of time is linking. A common case is +that you are working on a library and there are tens of small test +executables that link to it. Even if the compilation step would be +fast, relinking all of the test executables takes time. It is common +for people to manually compile only one test application with a +command such as `make sometest` rather than rebuild everything. + +Meson has an optimization for this case. Whenever a library is +rebuilt, Meson inspects the ABI it exports. If it has not changed, +Meson will skip all relinking steps as unnecessary. The difference +this makes can be clearly seen in the chart above. In that test the +source was fully built, then the file `glib/gbytes.c` was touched to +force the rebuild of the base glib shared library. As can be seen, +Autotools then relinks all test executables that link with glib. Since +Meson can detect that the ABI is the same it can skip those steps. The +end result being that Meson is almost one hundred times faster on this +very common use case. + +Conclusions +----- + +One of the main drawbacks of C and C++ compared to languages such as +Java are long compilation times. However at least some of the blame +can be found in the build tools used rather than the languages +themselves or their compilers. Choosing proper tools can bring C and +C++ compilation very close to instantaneous rebuilds. This has a +direct impact on programmer productivity. diff --git a/meson/docs/markdown/Adding-arguments.md b/meson/docs/markdown/Adding-arguments.md new file mode 100644 index 000000000..8dd848864 --- /dev/null +++ b/meson/docs/markdown/Adding-arguments.md @@ -0,0 +1,71 @@ +--- +short-description: Adding compiler arguments +... + +# Adding arguments + +Often you need to specify extra compiler arguments. Meson provides two +different ways to achieve this: global arguments and per-target +arguments. + +Global arguments +-- + +Global compiler arguments are set with the following command. As an +example you could do this. + +```meson +add_global_arguments('-DFOO=bar', language : 'c') +``` + +This makes Meson add the define to all C compilations. Usually you +would use this setting for flags for global settings. Note that for +setting the C/C++ language standard (the `-std=c99` argument in GCC), +you would probably want to use a default option of the `project()` +function. For details see the [reference manual](Reference-manual.md). + +Global arguments have certain limitations. They all have to be defined +before any build targets are specified. This ensures that the global +flags are the same for every single source file built in the entire +project with one exception. Compilation tests that are run as part of +your project configuration do not use these flags. The reason for that +is that you may need to run a test compile with and without a given +flag to determine your build setup. For this reason tests do not use +these global arguments. + +You should set only the most essential flags with this setting, you +should *not* set debug or optimization flags. Instead they should be +specified by selecting an appropriate build type. + +Project arguments +-- + +Project arguments work similar to global arguments except that they +are valid only within the current subproject. The usage is simple: + +```meson +add_project_arguments('-DMYPROJ=projname', language : 'c') +``` + +This would add the compiler flags to all C sources in the current +project. + +Per target arguments +-- + +Per target arguments are just as simple to define. + +```meson +executable('prog', 'prog.cc', cpp_args : '-DCPPTHING') +``` + +Here we create a C++ executable with an extra argument that is used +during compilation but not for linking. + +You can find the parameter name for other languages in the [reference tables](Reference-tables.md). + +Specifying extra linker arguments is done in the same way: + +```meson +executable('prog', 'prog.cc', link_args : '-Wl,--linker-option') +``` diff --git a/meson/docs/markdown/Adding-new-projects-to-wrapdb.md b/meson/docs/markdown/Adding-new-projects-to-wrapdb.md new file mode 100644 index 000000000..25fb61c64 --- /dev/null +++ b/meson/docs/markdown/Adding-new-projects-to-wrapdb.md @@ -0,0 +1,145 @@ +# Adding new projects to WrapDB + + +## How it works + +Each wrap repository has a master branch with only one initial commit and *no* wrap files. +And that is the only commit ever made on that branch. + +For every release of a project a new branch is created. The new branch is named after the +the upstream release number (e.g. `1.0.0`). This branch holds a wrap file for +this particular release. + +There are two types of wraps on WrapDB - regular wraps and wraps with Meson build +definition patches. A wrap file in a repository on WrapDB must have a name `upstream.wrap`. + +Wraps with Meson build definition patches work in much the same way as Debian: +we take the unaltered upstream source package and add a new build system to it as a patch. +These build systems are stored as Git repositories on GitHub. They only contain build definition files. +You may also think of them as an overlay to upstream source. + +Whenever a new commit is pushed into GitHub's project branch, a new wrap is generated +with an incremented version number. All the old releases remain unaltered. +New commits are always done via GitHub merge requests and must be reviewed by +someone other than the submitter. + +Note that your Git repo with wrap must not contain the subdirectory of the source +release. That gets added automatically by the service. You also must not commit +any source code from the original tarball into the wrap repository. + +## Choosing the repository name + +Wrapped subprojects are used much like external dependencies. Thus +they should have the same name as the upstream projects. + +NOTE: Repo names must fully match this regexp: `[a-z0-9._]+`. + +If the project provides a pkg-config file, then the repository name should be +the same as the pkg-config name. Usually this is the name of the +project, such as `libpng`. Sometimes it is slightly different, +however. As an example the libogg project's chosen pkg-config name is +`ogg` instead of `libogg`, which is the reason why the repository is +named plain `ogg`. + +If there is no a pkg-config file, the name the project uses/promotes should be used, +lowercase only (Catch2 -> catch2). + +If the project name is too generic or ambiguous (e.g. `benchmark`), +consider using `organization-project` naming format (e.g. `google-benchmark`). + +## How to contribute a new wrap + +If the project already uses Meson build system, then only a wrap file - `upstream.wrap` +should be provided. In other case a Meson build definition patch - a set of `meson.build` +files - should be also provided. + +### Request a new repository + +Create an issue on the [wrapdb bug tracker](https://github.com/mesonbuild/wrapdb/issues) +using *Title* and *Description* below as a template. + +*Title:* `new wrap: ` + +*Description:* +``` +upstream url: +version: +``` + +Wait until the new repository or branch is created. A link to the new repository or branch +will be posted in a comment to this issue. + +NOTE: Requesting a branch is not necessary. WrapDB maintainer can create the branch and +modify the PR accordingly if the project repository exists. + +### Add a new wrap + +First you need to fork the repository to your own page. +Then you can create the first Wrap commit that usually looks something like this. + +``` +tar xzf libfoo-1.0.0.tar.gz +git clone -b 1.0.0 git@github.com:yourusername/libfoo.git tmpdir +mv tmpdir/.git libfoo-1.0.0 +rm -rf tmpdir +cd libfoo-1.0.0 +git reset --hard +emacs upstream.wrap meson.build + +git add upstream.wrap meson.build +git commit -a -m 'Add wrap files for libfoo-1.0.0' +git push origin 1.0.0 +``` + +Now you should create a pull request on GitHub. Remember to create it against the +correct branch rather than master (`1.0.0` branch in this example). GitHub should do +this automatically. + +If the branch doesn't exist file a pull request against master. +WrapDB maintainers can fix it before merging. + +## What is done by WrapDB maintainers + +[mesonwrap tools](Wrap-maintainer-tools.md) must be used for the tasks below. + +### Adding new project to the Wrap provider service + +Each project gets its own repo. It is initialized like this: + +``` +mesonwrap new_repo --homepage=$HOMEPAGE --directory=$NEW_LOCAL_PROJECT_DIR $PROJECT_NAME +``` + +The command creates a new repository and uploads it to Github. + +`--version` flag may be used to create a branch immediately. + +### Adding a new branch to an existing project + +Create a new branch whose name matches the upstream release number. + +``` +git checkout master +git checkout -b 1.0.0 +git push origin 1.0.0 +(or from GitHub web page, remember to branch from master) +``` + +Branch names must fully match this regexp: `[a-z0-9._]+`. + +## Changes to original source + +The point of a wrap is to provide the upstream project with as few +changes as possible. Most projects should not contain anything more +than a few Meson definition files. Sometimes it may be necessary to +add a template header file or something similar. These should be held +at a minimum. + +It should especially be noted that there must **not** be any patches +to functionality. All such changes must be submitted to upstream. You +may also host your own Git repo with the changes if you wish. The Wrap +system has native support for Git subprojects. + +## Reviewing wraps + +See [Wrap review guidelines](Wrap-review-guidelines.md). diff --git a/meson/docs/markdown/Additional.md b/meson/docs/markdown/Additional.md new file mode 100644 index 000000000..2fff48842 --- /dev/null +++ b/meson/docs/markdown/Additional.md @@ -0,0 +1,8 @@ +--- +short-description: Misc documentation +... + +# Additional documentation + +This section references documents miscellaneous design, benchmarks, or +basically anything concerning Meson. diff --git a/meson/docs/markdown/Build-options.md b/meson/docs/markdown/Build-options.md new file mode 100644 index 000000000..429b9b2ca --- /dev/null +++ b/meson/docs/markdown/Build-options.md @@ -0,0 +1,226 @@ +--- +short-description: Build options to configure project properties +... + +# Build options + +Most non-trivial builds require user-settable options. As an example a +program may have two different data backends that are selectable at +build time. Meson provides for this by having a option definition +file. Its name is `meson_options.txt` and it is placed at the root of +your source tree. + +Here is a simple option file. + +```meson +option('someoption', type : 'string', value : 'optval', description : 'An option') +option('other_one', type : 'boolean', value : false) +option('combo_opt', type : 'combo', choices : ['one', 'two', 'three'], value : 'three') +option('integer_opt', type : 'integer', min : 0, max : 5, value : 3) # Since 0.45.0 +option('free_array_opt', type : 'array', value : ['one', 'two']) # Since 0.44.0 +option('array_opt', type : 'array', choices : ['one', 'two', 'three'], value : ['one', 'two']) +option('some_feature', type : 'feature', value : 'enabled') # Since 0.47.0 +option('long_desc', type : 'string', value : 'optval', + description : 'An option with a very long description' + + 'that does something in a specific context') # Since 0.55.0 +``` + +For built-in options, see [Built-in options][builtin_opts]. + +## Build option types + +All types allow a `description` value to be set describing the option, +if no description is set then the name of the option will be used instead. + +### Strings + +The string type is a free form string. If the default value is not set +then an empty string will be used as the default. + +### Booleans + +Booleans may have values of either `true` or `false`. If no default +value is supplied then `true` will be used as the default. + +### Combos + +A combo allows any one of the values in the `choices` parameter to be +selected. If no default value is set then the first value will be the +default. + +### Integers + +An integer option contains a single integer with optional upper and +lower values that are specified with the `min` and `max` keyword +arguments. + +This type is available since Meson version 0.45.0. + +### Arrays + +Arrays represent an array of strings. By default the array can contain +arbitrary strings. To limit the possible values that can used set the +`choices` parameter. Meson will then only allow the value array to +contain strings that are in the given list. The array may be +empty. The `value` parameter specifies the default value of the option +and if it is unset then the values of `choices` will be used as the +default. + +As of 0.47.0 -Dopt= and -Dopt=[] both pass an empty list, before this -Dopt= +would pass a list with an empty string. + +This type is available since version 0.44.0 + +### Features + +A `feature` option has three states: `enabled`, `disabled` or `auto`. It is intended +to be passed as value for the `required` keyword argument of most functions. +Currently supported in +[`dependency()`](Reference-manual.md#dependency), +[`find_library()`](Reference-manual.md#compiler-object), +[`find_program()`](Reference-manual.md#find_program) and +[`add_languages()`](Reference-manual.md#add_languages) functions. + +- `enabled` is the same as passing `required : true`. +- `auto` is the same as passing `required : false`. +- `disabled` do not look for the dependency and always return 'not-found'. + +When getting the value of this type of option using `get_option()`, a special +[feature option object](Reference-manual.md#feature-option-object) +is returned instead of the string representation of the option's value. +This object can be passed to `required`: + +```meson +d = dependency('foo', required : get_option('myfeature')) +if d.found() + app = executable('myapp', 'main.c', dependencies : [d]) +endif +``` + +To check the value of the feature, the object has three methods +returning a boolean and taking no argument: + +- `.enabled()` +- `.disabled()` +- `.auto()` + +This is useful for custom code depending on the feature: + +```meson +if get_option('myfeature').enabled() + # ... +endif +``` + +If the value of a `feature` option is set to `auto`, that value is overridden by +the global `auto_features` option (which defaults to `auto`). This is intended +to be used by packagers who want to have full control on which dependencies are +required and which are disabled, and not rely on build-deps being installed +(at the right version) to get a feature enabled. They could set +`auto_features=enabled` to enable all features and disable explicitly only the +few they don't want, if any. + +This type is available since version 0.47.0 + +## Using build options + +```meson +optval = get_option('opt_name') +``` + +This function also allows you to query the value of Meson's built-in +project options. For example, to get the installation prefix you would +issue the following command: + +```meson +prefix = get_option('prefix') +``` + +It should be noted that you can not set option values in your Meson +scripts. They have to be set externally with the `meson configure` +command line tool. Running `meson configure` without arguments in a +build dir shows you all options you can set. + +To change their values use the `-D` +option: + +```console +$ meson configure -Doption=newvalue +``` + +Setting the value of arrays is a bit special. If you only pass a +single string, then it is considered to have all values separated by +commas. Thus invoking the following command: + +```console +$ meson configure -Darray_opt=foo,bar +``` + +would set the value to an array of two elements, `foo` and `bar`. + +If you need to have commas in your string values, then you need to +pass the value with proper shell quoting like this: + +```console +$ meson configure "-Doption=['a,b', 'c,d']" +``` + +The inner values must always be single quotes and the outer ones +double quotes. + +To change values in subprojects prepend the name of the subproject and +a colon: + +```console +$ meson configure -Dsubproject:option=newvalue +``` + +**NOTE:** If you cannot call `meson configure` you likely have a old + version of Meson. In that case you can call `mesonconf` instead, but + that is deprecated in newer versions + +## Yielding to superproject option + +Suppose you have a master project and a subproject. In some cases it +might be useful to have an option that has the same value in both of +them. This can be achieved with the `yield` keyword. Suppose you have +an option definition like this: + +```meson +option('some_option', type : 'string', value : 'value', yield : true) +``` + +If you build this project on its own, this option behaves like +usual. However if you build this project as a subproject of another +project which also has an option called `some_option`, then calling +`get_option` returns the value of the superproject. If the value of +`yield` is `false`, `get_option` returns the value of the subproject's +option. + + +## Built-in build options + +There are a number of [built-in options][builtin_opts]. To get the current list execute `meson +configure` in the build directory. + +[builtin_opts]: https://mesonbuild.com/Builtin-options.html + +### Visual Studio + +#### Startup project + +The backend\_startup\_project option can be set to define the default project +that will be executed with the "Start debugging F5" action in visual studio. +It should be the same name as an executable target name. + +```meson +project('my_project', 'c', default_options: ['backend_startup_project=my_exe']) +executable('my_exe', ...) +``` + +### Ninja + +#### Max links + +The backend\_max\_links can be set to limit the number of processes that ninja +will use to link. diff --git a/meson/docs/markdown/Build-system-converters.md b/meson/docs/markdown/Build-system-converters.md new file mode 100644 index 000000000..1c0b4fc50 --- /dev/null +++ b/meson/docs/markdown/Build-system-converters.md @@ -0,0 +1,27 @@ +--- +short-description: Converting other build systems to Meson +... + +# Build system converters + +Moving from one build system into another includes a fair bit of +work. To make things easier, Meson provides scripts to convert other +build systems into Meson. At the time of writing, scripts for CMake +and autotools exist. It can be found in the `tools` subdirectory in +Meson's source tree. + +The scripts do not try to do a perfect conversion. This would be +extremely difficult because the data models of other build systems are +very different. The goal of the converter script is to convert as much +of the low level drudgery as possible. Using the scripts is +straightforward. We'll use the CMake one as an example but the +Autotools one works exactly the same way. + + cmake2meson.py path/to/CMake/project/root + +This command generates a skeleton Meson project definition that tries +to mirror CMake's setup as close as possible. Once this is done, you +need to go through these files manually and finalize the +conversion. To make this task as simple as possible, the converter +script will transfer all comments from the CMake definition into Meson +definition. diff --git a/meson/docs/markdown/Build-targets.md b/meson/docs/markdown/Build-targets.md new file mode 100644 index 000000000..83f959f0b --- /dev/null +++ b/meson/docs/markdown/Build-targets.md @@ -0,0 +1,100 @@ +--- +short-description: Definition of build targets +... + +# Build targets + +Meson provides four kinds of build targets: executables, libraries +(which can be set to be built as static or shared or both of them at +the build configuration time), static libraries, and shared libraries. +They are created with the commands `executable`, `library`, +`static_library` and `shared_library`, respectively. All objects created +in this way are **immutable**. That is, you can not change any aspect of +them after they have been constructed. This ensures that all information +pertaining to a given build target is specified in one well defined +place. + +Libraries and executables +-- + +As an example, here is how you would build a library. + +```meson +project('shared lib', 'c') +library('mylib', 'source.c') +``` + +It is generally preferred to use the `library` command instead of +`shared_library` and `static_library` and then configure which +libraries (static or shared or both of them) will be built at the +build configuration time using the `default_library` +[built-in option](Builtin-options.md). + +In Unix-like operating systems, shared libraries can be +versioned. Meson supports this with keyword arguments, which will be +ignored if the library is configured as static at the compile time. + +```meson +project('shared lib', 'c') +library('mylib', 'source.c', version : '1.2.3', soversion : '0') +``` + +It is common to build a library and then an executable that links +against it. This is supported as well. + +```meson +project('shared lib', 'c') +lib = library('mylib', 'source.c') +executable('program', 'prog.c', link_with : lib) +``` + +Meson sets things up so that the resulting executable can be run +directly from the build directory. There is no need to write shell +scripts or set environment variables. + +One target can have multiple language source files. + +```meson +project('multilang', 'c', 'cpp') +executable('multiexe', 'file.c', 'file2.cc') +``` + +Object files +-- + +Sometimes you can't build files from sources but need to utilize an +existing object file. A typical case is using an object file provided +by a third party. Object files can be specified just like sources. + +```meson +exe = executable('myexe', 'source.cpp', objects : 'third_party_object.o') +``` + +A different case is when you want to use object files built in one +target directly in another. A typical case is when you build a shared +library and it has an internal class that is not exported in the +ABI. This means you can't access it even if you link against the +library. Typical workarounds for this include building both a shared +and static version of the library or putting the source file in the +test executable's source list. Both of these approaches cause the +source to be built twice, which is slow. + +In Meson you can extract object files from targets and use them as-is +on other targets. This is the syntax for it. + +```meson +lib = shared_library('somelib', 'internalclass.cc', 'file.cc', ...) +eo = lib.extract_objects('internalclass.cc') +executable('classtest', 'classtest.cpp', objects : eo) +``` + +Here we take the internal class object and use it directly in the +test. The source file is only compiled once. + +Note that careless use of this feature may cause strange bugs. As an +example trying to use objects of an executable or static library in a +shared library will not work because shared library objects require +special compiler flags. Getting this right is the user's +responsibility. For this reason it is strongly recommended that you +only use this feature for generating unit test executables in the +manner described above. diff --git a/meson/docs/markdown/Builtin-options.md b/meson/docs/markdown/Builtin-options.md new file mode 100644 index 000000000..e7101d55b --- /dev/null +++ b/meson/docs/markdown/Builtin-options.md @@ -0,0 +1,217 @@ +--- +short-description: Built-in options to configure project properties +... + +# Built-in options + +Meson provides two kinds of options: [build options provided by the +build files](Build-options.md) and built-in options that are either +universal options, base options, compiler options. + +## Universal options + +A list of these options can be found by running `meson --help`. All +these can be set by passing to `meson` (aka `meson setup`) in any of +these ways: `--option=value`, `--option value`, `-Doption=value`, or +by setting them inside `default_options` of `project()` in your `meson.build`. + +For legacy reasons `--warnlevel` is the cli argument for the `warning_level` option. + +They can also be edited after setup using `meson configure`. + +Installation options are all relative to the prefix, except: + +* When the prefix is `/usr`: `sysconfdir` defaults to `/etc`, `localstatedir` defaults to `/var`, and `sharedstatedir` defaults to `/var/lib` +* When the prefix is `/usr/local`: `localstatedir` defaults to `/var/local`, and `sharedstatedir` defaults to `/var/local/lib` + +### Directories + +| Option | Default value | Description | +| ------ | ------------- | ----------- | +| prefix | see below | Installation prefix | +| bindir | bin | Executable directory | +| datadir | share | Data file directory | +| includedir | include | Header file directory | +| infodir | share/info | Info page directory | +| libdir | see below | Library directory | +| libexecdir | libexec | Library executable directory | +| localedir | share/locale | Locale data directory | +| localstatedir | var | Localstate data directory | +| mandir | share/man | Manual page directory | +| sbindir | sbin | System executable directory | +| sharedstatedir | com | Architecture-independent data directory | +| sysconfdir | etc | Sysconf data directory | + + +`prefix` defaults to `C:/` on Windows, and `/usr/local` otherwise. You should always +override this value. + +`libdir` is automatically detected based on your platform, it should be +correct when doing "native" (build machine == host machine) compilation. For +cross compiles meson will try to guess the correct libdir, but it may not be +accurate, especially on Linux where different distributions have different +defaults. Using a [cross file](Cross-compilation.md#defining-the-environment), +particularly the paths section may be necessary. + +### Core options + +Options that are labeled "per machine" in the table are set per machine. See +the [specifying options per machine](#Specifying-options-per-machine) section +for details. + +| Option | Default value | Description | Is per machine | +| ------ | ------------- | ----------- | -------------- | +| auto_features {enabled, disabled, auto} | auto | Override value of all 'auto' features | no | +| backend {ninja, vs,
vs2010, vs2015, vs2017, vs2019, xcode} | ninja | Backend to use | no | +| buildtype {plain, debug,
debugoptimized, release, minsize, custom} | debug | Build type to use | no | +| debug | true | Debug | no | +| default_library {shared, static, both} | shared | Default library type | no | +| errorlogs | true | Whether to print the logs from failing tests. | no | +| install_umask {preserve, 0000-0777} | 022 | Default umask to apply on permissions of installed files | no | +| layout {mirror,flat} | mirror | Build directory layout | no | +| optimization {0, g, 1, 2, 3, s} | 0 | Optimization level | no | +| pkg_config_path {OS separated path} | '' | Additional paths for pkg-config to search before builtin paths | yes | +| cmake_prefix_path | [] | Additional prefixes for cmake to search before builtin paths | yes | +| stdsplit | true | Split stdout and stderr in test logs | no | +| strip | false | Strip targets on install | no | +| unity {on, off, subprojects} | off | Unity build | no | +| unity_size {>=2} | 4 | Unity file block size | no | +| warning_level {0, 1, 2, 3} | 1 | Set the warning level. From 0 = none to 3 = highest | no | +| werror | false | Treat warnings as errors | no | +| wrap_mode {default, nofallback,
nodownload, forcefallback} | default | Wrap mode to use | no | +| force_fallback_for | [] | Force fallback for those dependencies | no | + + +For setting optimization levels and toggling debug, you can either set the +`buildtype` option, or you can set the `optimization` and `debug` options which +give finer control over the same. Whichever you decide to use, the other will +be deduced from it. For example, `-Dbuildtype=debugoptimized` is the same as +`-Ddebug=true -Doptimization=2` and vice-versa. This table documents the +two-way mapping: + +| buildtype | debug | optimization | +| --------- | ----- | ------------ | +| plain | false | 0 | +| debug | true | 0 | +| debugoptimized | true | 2 | +| release | false | 3 | +| minsize | true | s | + +All other combinations of `debug` and `optimization` set `buildtype` to `'custom'`. + +## Base options + +These are set in the same way as universal options, but cannot be shown in the +output of `meson --help` because they depend on both the current platform and +the compiler that will be selected. The only way to see them is to setup +a builddir and then run `meson configure` on it with no options. + +The following options are available. Note that they may not be available on all +platforms or with all compilers: + +| Option | Default value | Possible values | Description | +| ----------- | ------------- | --------------- | ----------- | +| b_asneeded | true | true, false | Use -Wl,--as-needed when linking | +| b_bitcode | false | true, false | Embed Apple bitcode, see below | +| b_colorout | always | auto, always, never | Use colored output | +| b_coverage | false | true, false | Enable coverage tracking | +| b_lundef | true | true, false | Don't allow undefined symbols when linking | +| b_lto | false | true, false | Use link time optimization | +| b_ndebug | false | true, false, if-release | Disable asserts | +| b_pch | true | true, false | Use precompiled headers | +| b_pgo | off | off, generate, use | Use profile guided optimization | +| b_sanitize | none | see below | Code sanitizer to use | +| b_staticpic | true | true, false | Build static libraries as position independent | +| b_pie | false | true, false | Build position-independent executables (since 0.49.0)| +| b_vscrt | from_buildtype| none, md, mdd, mt, mtd, from_buildtype | VS runtime library to use (since 0.48.0) | + +The value of `b_sanitize` can be one of: `none`, `address`, `thread`, +`undefined`, `memory`, `address,undefined`. + + +The default value of `b_vscrt` is `from_buildtype`. In that case, the following +table is used internally to pick the CRT compiler arguments based on the value +of the `buildtype` option: + +| buildtype | Visual Studio CRT | +| -------- | ----------------- | +| debug | `/MDd` | +| debugoptimized | `/MD` | +| release | `/MD` | +| minsize | `/MD` | +| custom | error! | + +### Notes about Apple Bitcode support + +`b_bitcode` will pass `-fembed-bitcode` while compiling and will pass +`-Wl,-bitcode_bundle` while linking. These options are incompatible with +`b_asneeded`, so that option will be silently disabled. + +[Shared modules](Reference-manual.md#shared_module) will not have bitcode +embedded because `-Wl,-bitcode_bundle` is incompatible with both `-bundle` and +`-Wl,-undefined,dynamic_lookup` which are necessary for shared modules to work. + +## Compiler options + +Same caveats as base options above. + +The following options are available. Note that both the options themselves and +the possible values they can take will depend on the target platform or +compiler being used: + +| Option | Default value | Possible values | Description | +| ------ | ------------- | --------------- | ----------- | +| c_args | | free-form comma-separated list | C compile arguments to use | +| c_link_args | | free-form comma-separated list | C link arguments to use | +| c_std | none | none, c89, c99, c11, c17, c18, gnu89, gnu99, gnu11, gnu17, gnu18 | C language standard to use | +| c_winlibs | see below | free-form comma-separated list | Standard Windows libs to link against | +| c_thread_count | 4 | integer value ≥ 0 | Number of threads to use with emcc when using threads | +| cpp_args | | free-form comma-separated list | C++ compile arguments to use | +| cpp_link_args | | free-form comma-separated list | C++ link arguments to use | +| cpp_std | none | none, c++98, c++03, c++11, c++14, c++17,
c++1z, gnu++03, gnu++11, gnu++14, gnu++17, gnu++1z,
vc++14, vc++17, vc++latest | C++ language standard to use | +| cpp_debugstl | false | true, false | C++ STL debug mode | +| cpp_eh | default | none, default, a, s, sc | C++ exception handling type | +| cpp_rtti | true | true, false | Whether to enable RTTI (runtime type identification) | +| cpp_thread_count | 4 | integer value ≥ 0 | Number of threads to use with emcc when using threads | +| cpp_winlibs | see below | free-form comma-separated list | Standard Windows libs to link against | +| fortran_std | none | [none, legacy, f95, f2003, f2008, f2018] | Fortran language standard to use | + +The default values of `c_winlibs` and `cpp_winlibs` are in compiler-specific +argument forms, but the libraries are: kernel32, user32, gdi32, winspool, +shell32, ole32, oleaut32, uuid, comdlg32, advapi32. + +All these `_*` options are specified per machine. See below in the +[specifying options per machine](#Specifying-options-per-machine) section on +how to do this in cross builds. + +When using MSVC, `cpp_eh=none` will result in no exception flags being passed, +while the `cpp_eh=[value]` will result in `/EH[value]`. +Since *0.51.0* `cpp_eh=default` will result in `/EHsc` on MSVC. When using +gcc-style compilers, nothing is passed (allowing exceptions to work), while +`cpp_eh=none` passes `-fno-exceptions`. + +Since *0.54.0* The `_thread_count` option can be used to control the +value passed to `-s PTHREAD_POOL_SIZE` when using emcc. No other c/c++ +compiler supports this option. + +## Specifying options per machine + +Since *0.51.0*, some options are specified per machine rather than globally for +all machine configurations. Prefixing the option with `build.` just affects the +build machine configuration, while unprefixed just affects the host machine +configuration, respectively. For example: + + - `build.pkg_config_path` controls the paths pkg-config will search for just + `native: true` dependencies (build machine). + + - `pkg_config_path` controls the paths pkg-config will search for just + `native: false` dependencies (host machine). + +This is useful for cross builds. In the native builds, build = host, and the +unprefixed option alone will suffice. + +Prior to *0.51.0*, these options just effected native builds when specified on +the command line, as there was no `build.` prefix. Similarly named fields in +the `[properties]` section of the cross file would effect cross compilers, but +the code paths were fairly different allowing differences in behavior to crop +out. diff --git a/meson/docs/markdown/CMake-module.md b/meson/docs/markdown/CMake-module.md new file mode 100644 index 000000000..fc6157ed7 --- /dev/null +++ b/meson/docs/markdown/CMake-module.md @@ -0,0 +1,239 @@ +# CMake module + +**Note**: the functionality of this module is governed by [Meson's + rules on mixing build systems](Mixing-build-systems.md). + +This module provides helper tools for generating cmake package files. +It also supports the usage of CMake based subprojects, similar to +the normal [meson subprojects](Subprojects.md). + + +## Usage + +To use this module, just do: **`cmake = import('cmake')`**. The +following functions will then be available as methods on the object +with the name `cmake`. You can, of course, replace the name `cmake` +with anything else. + +It is generally recommended to use the latest Meson version and +CMake >=3.17 for best compatibility. CMake subprojects will +usually also work with older CMake versions. However, this can +lead to unexpected issues in rare cases. + +## CMake subprojects + +Using CMake subprojects is similar to using the "normal" meson +subprojects. They also have to be located in the `subprojects` +directory. + +Example: + +```cmake +add_library(cm_lib SHARED ${SOURCES}) +``` + +```meson +cmake = import('cmake') + +# Configure the CMake project +sub_proj = cmake.subproject('libsimple_cmake') + +# Fetch the dependency object +cm_lib = sub_proj.dependency('cm_lib') + +executable(exe1, ['sources'], dependencies: [cm_lib]) +``` + +The `subproject` method is almost identical to the normal meson +`subproject` function. The only difference is that a CMake project +instead of a meson project is configured. + +The returned `sub_proj` supports the same options as a "normal" subproject. +Meson automatically detects CMake build targets, which can be accessed with +the methods listed [below](#subproject-object). + +It is usually enough to just use the dependency object returned by the +`dependency()` method in the build targets. This is almost identical to +using `declare_dependency()` object from a normal meson subproject. + +It is also possible to use executables defined in the CMake project as code +generators with the `target()` method: + +```cmake +add_executable(cm_exe ${EXE_SRC}) +``` + +```meson +cmake = import('cmake') + +# Subproject with the "code generator" +sub_pro = cmake.subproject('cmCodeGen') + +# Fetch the code generator exe +sub_exe = sub_pro.target('cm_exe') + +# Use the code generator +generated = custom_target( + 'cmake-generated', + input: [], + output: ['test.cpp'], + command: [sub_exe, '@OUTPUT@'] +) +``` + +It should be noted that not all projects are guaranteed to work. The +safest approach would still be to create a `meson.build` for the +subprojects in question. + +### Configuration options + +*New in meson 0.55.0* + +Meson also supports passing configuration options to CMake and overriding +certain build details extracted from the CMake subproject. + +```meson +cmake = import('cmake') +opt_var = cmake.subproject_options() + +# Call CMake with `-DSOME_OTHER_VAR=ON` +opt_var.add_cmake_defines({'SOME_OTHER_VAR': true}) + +# Globally override the C++ standard to c++11 +opt_var.set_override_option('cpp_std', 'c++11') + +# Override the previous global C++ standard +# with c++14 only for the CMake target someLib +opt_var.set_override_option('cpp_std', 'c++14', target: 'someLib') + +sub_pro = cmake.subproject('someLibProject', options: opt_var) + +# Further changes to opt_var have no effect +``` + +See [the CMake options object](#cmake-options-object) for a complete reference +of all supported functions. + +The CMake configuration options object is very similar to the +[configuration data object](Reference-manual.md#configuration-data-object) object +returned by [`configuration_data`](Reference-manual.md#configuration_data). It +is generated by the `subproject_options` function + +All configuration options have to be set *before* the subproject is configured +and must be passed to the `subproject` method via the `options` key. Altering +the configuration object won't have any effect on previous `cmake.subproject` +calls. + +In earlier meson versions CMake command-line parameters could be set with the +`cmake_options` kwarg. However, this feature is deprecated since 0.55.0 and only +kept for compatibility. It will not work together with the `options` kwarg. + +### `subproject` object + +This object is returned by the `subproject` function described above +and supports the following methods: + + - `dependency(target)` returns a dependency object for any CMake target. + - `include_directories(target)` returns a meson `include_directories()` + object for the specified target. Using this function is not necessary + if the dependency object is used. + - `target(target)` returns the raw build target. + - `target_type(target)` returns the type of the target as a string + - `target_list()` returns a list of all target *names*. + - `get_variable(name)` fetches the specified variable from inside + the subproject. Usually `dependency()` or `target()` should be + preferred to extract build targets. + - `found` returns true if the subproject is available, otherwise false + *new in meson 0.53.2* + +### `cmake options` object + +This object is returned by the `subproject_options()` function and consumed by +the `options` kwarg of the `subproject` function. The following methods are +supported: + + - `add_cmake_defines({'opt1': val1, ...})` add additional CMake commandline defines + - `set_override_option(opt, val)` set specific [build options](Build-options.md) + for targets. This will effectively add `opt=val` to the `override_options` + array of the [build target](Reference-manual.md#executable) + - `set_install(bool)` override wether targets should be installed or not + - `append_compile_args(lang, arg1, ...)` append compile flags for a specific + language to the targets + - `append_link_args(arg1, ...)` append linger args to the targets + - `clear()` reset all data in the `cmake options` object + +The methods `set_override_option`, `set_install`, `append_compile_args` and +`append_link_args` support the optional `target` kwarg. If specified, the set +options affect the specific target. The effect of the option is global for the +subproject otherwise. + +If, for instance, `opt_var.set_install(false)` is called, no target will be +installed regardless of what is set by CMake. However, it is still possible to +install specific targets (here `foo`) by setting the `target` kwarg: +`opt_var.set_install(true, target: 'foo')` + +Options that are not set won't affect the generated subproject. So, if for +instance, `set_install` was not called then the values extracted from CMake will +be used. + +## CMake configuration files + +### cmake.write_basic_package_version_file() + +This function is the equivalent of the corresponding [CMake function](https://cmake.org/cmake/help/v3.11/module/CMakePackageConfigHelpers.html#generating-a-package-version-file), +it generates a `name` package version file. + +* `name`: the name of the package. +* `version`: the version of the generated package file. +* `compatibility`: a string indicating the kind of compatibility, the accepted values are +`AnyNewerVersion`, `SameMajorVersion`, `SameMinorVersion` or `ExactVersion`. +It defaults to `AnyNewerVersion`. Depending on your cmake installation some kind of +compatibility may not be available. +* `install_dir`: optional installation directory, it defaults to `$(libdir)/cmake/$(name)` + + +Example: + +```meson +cmake = import('cmake') + +cmake.write_basic_package_version_file(name: 'myProject', version: '1.0.0') +``` + +### cmake.configure_package_config_file() + +This function is the equivalent of the corresponding [CMake function](https://cmake.org/cmake/help/v3.11/module/CMakePackageConfigHelpers.html#generating-a-package-configuration-file), +it generates a `name` package configuration file from the `input` template file. Just like the cmake function +in this file the `@PACKAGE_INIT@` statement will be replaced by the appropriate piece of cmake code. +The equivalent `PATH_VARS` argument is given through the `configuration` parameter. + +* `name`: the name of the package. +* `input`: the template file where that will be treated for variable substitutions contained in `configuration`. +* `install_dir`: optional installation directory, it defaults to `$(libdir)/cmake/$(name)`. +* `configuration`: a `configuration_data` object that will be used for variable substitution in the template file. + + +Example: + +meson.build: + +```meson +cmake = import('cmake') + +conf = configuration_data() +conf.set_quoted('VAR', 'variable value') + +cmake.configure_package_config_file( + name: 'myProject', + input: 'myProject.cmake.in', + configuration: conf +) +``` + +myProject.cmake.in: + +```text +@PACKAGE_INIT@ + +set(MYVAR VAR) +``` diff --git a/meson/docs/markdown/Comparisons.md b/meson/docs/markdown/Comparisons.md new file mode 100644 index 000000000..1deef6979 --- /dev/null +++ b/meson/docs/markdown/Comparisons.md @@ -0,0 +1,78 @@ +--- +title: Comparisons +... + +# Comparing Meson with other build systems + +A common question is *Why should I choose Meson over a different build +system X?* There is no one true answer to this as it depends on the +use case. Almost all build systems have all the functionality needed +to build medium-to-large projects so the decision is usually made on +other points. Here we list some pros and cons of various build systems +to help you do the decision yourself. + +## GNU Autotools + +### Pros + +Excellent support for legacy Unix platforms, large selection of +existing modules. + +### Cons + +Needlessly slow, complicated, hard to use correctly, unreliable, +painful to debug, incomprehensible for most people, poor support for +non-Unix platforms (especially Windows). + +## CMake + +### Pros + +Great support for multiple backends (Visual Studio, XCode, etc). + +### Cons + +The scripting language is cumbersome to work with. Some simple things +are more complicated than necessary. + +## SCons + +### Pros + +Full power of Python available for defining your build. + +### Cons + +Slow. Requires you to pass your configuration settings on every +invocation. That is, if you do `scons OPT1 OPT2` and then just +`scons`, it will reconfigure everything without settings `OPT1` and +`OPT2`. Every other build system remembers build options from the +previous invocation. + +## Bazel + +### Pros + +Proven to scale to very large projects. + +### Cons + +Implemented in Java. Poor Windows support. Heavily focused on Google's +way of doing things (which may be a good or a bad thing). Contributing +code requires [signing a CLA](https://bazel.build/contributing.html). + +## Meson + +### Pros + +The fastest build system [see +measurements](Performance-comparison.md), user friendly, designed to +be as invisible to the developer as possible, native support for +modern tools (precompiled headers, coverage, Valgrind etc). Not Turing +complete so build definition files are easy to read and understand. + +### Cons + +Relatively new so it does not have a large user base yet, and may thus +contain some unknown bugs. Visual Studio and XCode backends not as +high quality as Ninja one. diff --git a/meson/docs/markdown/Compiler-properties.md b/meson/docs/markdown/Compiler-properties.md new file mode 100644 index 000000000..5d29dd1d7 --- /dev/null +++ b/meson/docs/markdown/Compiler-properties.md @@ -0,0 +1,230 @@ +# Compiler properties + +Not all compilers and platforms are alike. Therefore Meson provides +the tools to detect properties of the system during configure time. To +get most of this information, you first need to extract the *compiler +object* from the main *meson* variable. + +```meson +compiler = meson.get_compiler('c') +``` + +Here we extract the C compiler. We could also have given the argument +`cpp` to get the C++ compiler, `objc` to get the objective C compiler +and so on. The call is valid for all languages specified in the +*project* declaration. Trying to obtain some other compiler will lead +to an unrecoverable error. + +## System information + +This is a bit complex and more thoroughly explained on the page on +[cross compilation](Cross-compilation.md). But if you just want to +know the operating system your code will run on, issue this command: + +```meson +host_machine.system() +``` + +## Compiler id + +The compiler object method `get_id` returns a +lower case string describing the "family" of the compiler. Since 0.53.0 +`get_linker_id` returns a lower case string with the linker name. Since +compilers can often choose from multiple linkers depending on operating +system, `get_linker_id` can be useful for handling or mitigating effects +of particular linkers. + +The compiler object also has a method `get_argument_syntax` which +returns a lower case string of `gcc`, `msvc`, or another undefined string +value; identifying whether the compiler arguments use the same syntax as +either `gcc` or `msvc`, or that its arguments are not like either. This should +only be used to select the syntax of the arguments, such as those to test +with `has_argument`. + +See [reference tables](Reference-tables.md#compiler-ids) for a list of supported compiler +ids and their argument type. + +## Does code compile? + +Sometimes the only way to test the system is to try to compile some +sample code and see if it works. For example, this can test that a +"C++17" compiler actually supports a particular C++17 feature, +without resorting to maintaining a feature list vs. compiler vendor, +compiler version and operating system. +Testing that a code snippet runs is a two-phase operation. First +we define some code using the multiline string operator: + +```meson +code = '''#include +void func() { printf("Compile me.\n"); } +''' +``` + +Then we can run the test. + +```meson +result = compiler.compiles(code, name : 'basic check') +``` + +The variable *result* will now contain either `true` or `false` +depending on whether the compilation succeeded or not. The keyword +argument `name` is optional. If it is specified, Meson will write the +result of the check to its log. + +## Does code compile and link? + +Sometimes it is necessary to check whether a certain code fragment not +only compiles, but also links successfully, e.g. to check if a symbol +is actually present in a library. This can be done using the +'''.links()''' method on a compiler object like this: + +```meson +code = '''#include +void func() { printf("Compile me.\n"); } +''' +``` + +Then we can run the test. + +```meson +result = compiler.links(code, args : '-lfoo', name : 'link check') +``` + +The variable *result* will now contain either `true` or `false` +depending on whether the compilation and linking succeeded or not. The +keyword argument `name` is optional. If it is specified, Meson will +write the result of the check to its log. + +## Compile and run test application + +Here is how you would compile and run a small test application. +Testing if a code snippets **runs** versus merely that it links +is particularly important for some dependencies such as MPI. + +```meson +code = '''#include +int main(int argc, char **argv) { + printf("%s\n", "stdout"); + fprintf(stderr, "%s\n", "stderr"); + return 0; +} +''' +result = compiler.run(code, name : 'basic check') +``` + +The `result` variable encapsulates the state of the test, which can be +extracted with the following methods. The `name` keyword argument +works the same as with `compiles`. + +| Method | Return value | +| ------ | ------------ | +| compiled | `True` if compilation succeeded. If `false` then all other methods return undefined values. | +| returncode | The return code of the application as an integer | +| stdout | Program's standard out as text. | +| stderr | Program's standard error as text. | + +Here is an example usage: + +```meson +if result.stdout().strip() == 'some_value' + # do something +endif +``` + +## Does a header exist? + +Header files provided by different platforms vary quite a lot. Meson +has functionality to detect whether a given header file is available +on the system. The test is done by trying to compile a simple test +program that includes the specified header. The following snippet +describes how this feature can be used. + +```meson +if compiler.has_header('sys/fstat.h') + # header exists, do something +endif +``` + +## Expression size + +Often you need to determine the size of a particular element (such as +`int`, `wchar_t` or `char*`). Using the `compiler` variable mentioned +above, the check can be done like this. + +```meson +wcharsize = compiler.sizeof('wchar_t', prefix : '#include') +``` + +This will put the size of `wchar_t` as reported by sizeof into +variable `wcharsize`. The keyword argument `prefix` is optional. If +specified its contents is put at the top of the source file. This +argument is typically used for setting `#include` directives in +configuration files. + +In older versions (<= 0.30) meson would error out if the size could +not be determined. Since version 0.31 it returns -1 if the size could +not be determined. + +## Does a function exist? + +Just having a header doesn't say anything about its +contents. Sometimes you need to explicitly check if some function +exists. This is how we would check whether the function `open_memstream` +exists in header `stdio.h` + +```meson +if compiler.has_function('open_memstream', prefix : '#include ') + # function exists, do whatever is required. +endif +``` + +Note that, on macOS programs can be compiled targeting older macOS +versions than the one that the program is compiled on. It can't be +assumed that the OS version that is compiled on matches the OS +version that the binary will run on. + +Therefore when detecting function availability with `has_function`, it +is important to specify the correct header in the prefix argument. + +In the example above, the function `open_memstream` is detected, which +was introduced in macOS 10.13. When the user builds on macOS 10.13, but +targeting macOS 10.11 (`-mmacosx-version-min=10.11`), this will correctly +report the function as missing. Without the header however, it would lack +the necessary availability information and incorrectly report the function +as available. + +## Does a structure contain a member? + +Some platforms have different standard structures. Here's how one +would check if a struct called `mystruct` from header `myheader.h` +contains a member called `some_member`. + +```meson +if compiler.has_member('struct mystruct', 'some_member', prefix : '#include') + # member exists, do whatever is required +endif +``` + +## Type alignment + +Most platforms can't access some data types at any address. For +example it is common that a `char` can be at any address but a 32 bit +integer only at locations which are divisible by four. Determining the +alignment of data types is simple. + +```meson +int_alignment = compiler.alignment('int') # Will most likely contain the value 4. +``` + +## Has argument + +This method tests if the compiler supports a given command line +argument. This is implemented by compiling a small file with the given +argument. + +```meson +has_special_flags = compiler.has_argument('-Wspecialthing') +``` + +*Note*: some compilers silently swallow command line arguments they do +not understand. Thus this test can not be made 100% reliable. diff --git a/meson/docs/markdown/Conference-presentations.md b/meson/docs/markdown/Conference-presentations.md new file mode 100644 index 000000000..abfc52f1e --- /dev/null +++ b/meson/docs/markdown/Conference-presentations.md @@ -0,0 +1,17 @@ +# Conference presentations on Meson + +- FOSDEM 2014, [Introducing the Meson build system](https://video.fosdem.org/2014/H2215_Ferrer/Sunday/Introducing_the_Meson_build_system.webm) (jpakkane) + +- LCA 2015, [Making build systems not suck](https://www.youtube.com/watch?v=KPi0AuVpxLI) (jpakkane) + +- GUADEC 2015, [Improving the way Gnome apps are built](https://www.youtube.com/watch?v=wTf0NjjNwTU) (jpakkane) + +- GStreamer conference 2015, [Done in 6.0 seconds](https://gstconf.ubicast.tv/videos/done-in-60-seconds-a-new-build-system-for-gstreamer) (jpakkane) + +- LCA 2016, [Builds, dependencies and deployment in the modern multiplatform world](https://www.youtube.com/watch?v=CTJtKtQ8R5k) (jpakkane) + +- GUADEC 2016, [Making your GNOME app compile 2.4x faster](https://media.ccc.de/v/44-making_your_gnome_app_compile_24x_faster) (nirbheek) + +- Libre Application Summit 2016, [New world, new tools](https://youtu.be/0-gx1qU2pPo) (jpakkane) + +- GStreamer conference 2016, [GStreamer Development on Windows and faster builds everywhere with Meson](https://gstconf.ubicast.tv/videos/gstreamer-development-on-windows-ans-faster-builds-everywhere-with-meson/) (tpm) diff --git a/meson/docs/markdown/Configuration.md b/meson/docs/markdown/Configuration.md new file mode 100644 index 000000000..cd1af14d1 --- /dev/null +++ b/meson/docs/markdown/Configuration.md @@ -0,0 +1,197 @@ +--- +short-description: Build-time configuration options +... + +# Configuration + +If there are multiple configuration options, passing them through +compiler flags becomes very burdensome. It also makes the +configuration settings hard to inspect. To make things easier, Meson +supports the generation of configure files. This feature is similar to +one found in other build systems such as CMake. + +Suppose we have the following Meson snippet: + +```meson +conf_data = configuration_data() +conf_data.set('version', '1.2.3') +configure_file(input : 'config.h.in', + output : 'config.h', + configuration : conf_data) +``` + +and that the contents of `config.h.in` are + +```c +#define VERSION_STR "@version@" +``` + +Meson will then create a file called `config.h` in the corresponding +build directory whose contents are the following. + +```c +#define VERSION_STR "1.2.3" +``` + +More specifically, Meson will find all strings of the type `@varname@` +and replace them with respective values set in `conf_data`. You can +use a single `configuration_data` object as many times as you like, +but it becomes immutable after being passed to the `configure_file` +function. That is, after it has been used once to generate output the +`set` function becomes unusable and trying to call it causes an error. +Copy of immutable `configuration_data` is still immutable. + +For more complex configuration file generation Meson provides a second +form. To use it, put a line like this in your configuration file. + + #mesondefine TOKEN + +The replacement that happens depends on what the value and type of TOKEN is: + +```c +#define TOKEN // If TOKEN is set to boolean true. +#undef TOKEN // If TOKEN is set to boolean false. +#define TOKEN 4 // If TOKEN is set to an integer or string value. +/* undef TOKEN */ // If TOKEN has not been set to any value. +``` + +Note that if you want to define a C string, you need to do the quoting +yourself like this: + +```meson +conf_data.set('TOKEN', '"value"') +``` + +Since this is such a common operation, Meson provides a convenience +method: + +```meson +plain_var = 'value' +conf_data.set_quoted('TOKEN', plain_var) # becomes #define TOKEN "value" +``` + +Often you have a boolean value in Meson but need to define the C/C++ +token as 0 or 1. Meson provides a convenience function for this use +case. + +```meson +conf_data.set10(token, boolean_value) +# The line above is equivalent to this: +if boolean_value + conf_data.set(token, 1) +else + conf_data.set(token, 0) +endif +``` + +## Configuring without an input file + +If the input file is not defined then Meson will generate a header +file all the entries in the configuration data object. The +replacements are the same as when generating `#mesondefine` entries: + +```meson +conf_data.set('FOO', '"string"') => #define FOO "string" +conf_data.set('FOO', 'a_token') => #define FOO a_token +conf_data.set('FOO', true) => #define FOO +conf_data.set('FOO', false) => #undef FOO +conf_data.set('FOO', 1) => #define FOO 1 +conf_data.set('FOO', 0) => #define FOO 0 +``` + +In this mode, you can also specify a comment which will be placed +before the value so that your generated files are self-documenting. + +```meson +conf_data.set('BAR', true, description : 'Set BAR if it is available') +``` + +Will produce: + +```c +/* Set BAR if it is available */ +#define BAR +``` + +## Dealing with file encodings + +The default meson file encoding to configure files is utf-8. If you need to +configure a file that is not utf-8 encoded the encoding keyword will allow +you to specify which file encoding to use. It is however strongly advised to +convert your non utf-8 file to utf-8 whenever possible. Supported file +encodings are those of python3, see [standard-encodings](https://docs.python.org/3/library/codecs.html#standard-encodings). + +## Using dictionaries + +Since *0.49.0* `configuration_data()` takes an optional dictionary as first +argument. If provided, each key/value pair is added into the +`configuration_data` as if `set()` method was called for each of them. +`configure_file()`'s `configuration` kwarg also accepts a dictionary instead of +a configuration_data object. + +Example: +```meson +cdata = configuration_data({ + 'STRING' : '"foo"', + 'INT' : 42, + 'DEFINED' : true, + 'UNDEFINED' : false, +}) + +configure_file(output : 'config1.h', + configuration : cdata, +) + +configure_file(output : 'config2.h', + configuration : { + 'STRING' : '"foo"', + 'INT' : 42, + 'DEFINED' : true, + 'UNDEFINED' : false, + } +) + +``` + +# A full example + +Generating and using a configuration file requires the following steps: + + - generate the file + - create an include directory object for the directory that holds the file + - use it in a target + +We are going to use the traditional approach of generating a header +file in the top directory. The common name is `config.h` but we're +going to use an unique name. This avoids the problem of accidentally +including the wrong header file when building a project with many +subprojects. + +At the top level we generate the file: + +```meson +conf_data = configuration_data() +# Set data +configure_file(input : 'projconfig.h.in', + output : 'projconfig.h', + configuration : conf_data) +``` + +Immediately afterwards we generate the include object. + +```meson +configuration_inc = include_directories('.') +``` + +Finally we specify this in a target that can be in any subdirectory. + +```meson +executable(..., include_directories : configuration_inc) +``` + +Now any source file in this target can include the configuration +header like this: + +```c +#include +``` diff --git a/meson/docs/markdown/Configuring-a-build-directory.md b/meson/docs/markdown/Configuring-a-build-directory.md new file mode 100644 index 000000000..330899f79 --- /dev/null +++ b/meson/docs/markdown/Configuring-a-build-directory.md @@ -0,0 +1,119 @@ +--- +short-description: Configuring a pre-generated build directory +... + +# Configuring a build directory + +Often you want to change the settings of your build after it has been +generated. For example you might want to change from a debug build +into a release build, set custom compiler flags, change the build +options provided in your `meson_options.txt` file and so on. + +The main tool for this is the `meson configure` command. + +You invoke `meson configure` by giving it the location of your build dir. If +omitted, the current working directory is used instead. Here's a +sample output for a simple project. + + Core properties + + Source dir /home/jpakkane/clangdemo/2_address + Build dir /home/jpakkane/clangdemo/2_address/buildmeson + + Core options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + auto_features auto [enabled, disabled, auto] Override value of all 'auto' features + backend ninja [ninja, vs, vs2010, vs2015, vs2017, vs2019, xcode] Backend to use + buildtype release [plain, debug, debugoptimized, release, minsize, custom] Build type to use + debug false [true, false] Debug + default_library shared [shared, static, both] Default library type + install_umask 0022 [preserve, 0000-0777] Default umask to apply on permissions of installed files + layout mirror [mirror, flat] Build directory layout + optimization 3 [0, g, 1, 2, 3, s] Optimization level + strip false [true, false] Strip targets on install + unity off [on, off, subprojects] Unity build + warning_level 1 [0, 1, 2, 3] Compiler warning level to use + werror false [true, false] Treat warnings as errors + + Backend options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + backend_max_links 0 >=0 Maximum number of linker processes to run or 0 for no limit + + Base options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + b_asneeded true [true, false] Use -Wl,--as-needed when linking + b_colorout always [auto, always, never] Use colored output + b_coverage false [true, false] Enable coverage tracking. + b_lto false [true, false] Use link time optimization + b_lundef true [true, false] Use -Wl,--no-undefined when linking + b_ndebug false [true, false, if-release] Disable asserts + b_pch true [true, false] Use precompiled headers + b_pgo off [off, generate, use] Use profile guided optimization + b_sanitize none [none, address, thread, undefined, memory, address,undefined] Code sanitizer to use + b_staticpic true [true, false] Build static libraries as position independent + + Compiler options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + c_args [] Extra arguments passed to the C compiler + c_link_args [] Extra arguments passed to the C linker + c_std c99 [none, c89, c99, c11, c17, c18, gnu89, gnu99, gnu11, gnu17, gnu18] C language standard to use + cpp_args [] Extra arguments passed to the C++ compiler + cpp_debugstl false [true, false] STL debug mode + cpp_link_args [] Extra arguments passed to the C++ linker + cpp_std c++11 [none, c++98, c++03, c++11, c++14, c++17, c++1z, c++2a, gnu++03, gnu++11, gnu++14, gnu++17, gnu++1z, gnu++2a] C++ language standard to use + fortran_std [] [none, legacy, f95, f2003, f2008, f2018] language standard to use + + Directories: + Option Current Value Description + ------ ------------- ----------- + bindir bin Executable directory + datadir share Data file directory + includedir include Header file directory + infodir share/info Info page directory + libdir lib/x86_64-linux-gnu Library directory + libexecdir libexec Library executable directory + localedir share/locale Locale data directory + localstatedir /var/local Localstate data directory + mandir share/man Manual page directory + prefix /usr/local Installation prefix + sbindir sbin System executable directory + sharedstatedir /var/local/lib Architecture-independent data directory + sysconfdir etc Sysconf data directory + + Project options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + array_opt [one, two] [one, two, three] array_opt + combo_opt three [one, two, three] combo_opt + free_array_opt [one, two] free_array_opt + integer_opt 3 >=0, <=5 integer_opt + other_one false [true, false] other_one + some_feature enabled [enabled, disabled, auto] some_feature + someoption optval An option + + Testing options: + Option Current Value Possible Values Description + ------ ------------- --------------- ----------- + errorlogs true [true, false] Whether to print the logs from failing tests + stdsplit true [true, false] Split stdout and stderr in test logs + +These are all the options available for the current project arranged +into related groups. The first column in every field is the name of +the option. To set an option you use the `-D` option. For example, +changing the installation prefix from `/usr/local` to `/tmp/testroot` +you would issue the following command. + + meson configure -Dprefix=/tmp/testroot + +Then you would run your build command (usually `meson compile`), which would +cause Meson to detect that the build setup has changed and do all the +work required to bring your build tree up to date. + +Since 0.50.0, it is also possible to get a list of all build options +by invoking [`meson configure`](Commands.md#configure) with the project source directory or +the path to the root `meson.build`. In this case, meson will print the +default values of all options similar to the example output from above. diff --git a/meson/docs/markdown/Contact-information.md b/meson/docs/markdown/Contact-information.md new file mode 100644 index 000000000..87a76a549 --- /dev/null +++ b/meson/docs/markdown/Contact-information.md @@ -0,0 +1,14 @@ +# Contact information + +For general discussion and questions, it is strongly recommended that +you use the [mailing +list](https://groups.google.com/forum/#!forum/mesonbuild). + +If you find bugs, please file them in the [issue +tracker](https://github.com/jpakkane/meson/issues). + +The maintainer of Meson is Jussi Pakkanen. You should usually not +contact him privately but rather use the channels listed +above. However if such a need arises, he can be reached at gmail where +his username is `jpakkane` (that is not a typo, the last letter is +indeed *e*). diff --git a/meson/docs/markdown/Continuous-Integration.md b/meson/docs/markdown/Continuous-Integration.md new file mode 100644 index 000000000..76a05a3e8 --- /dev/null +++ b/meson/docs/markdown/Continuous-Integration.md @@ -0,0 +1,270 @@ +# Continuous Integration + +Here you will find snippets to use Meson with various CI such as +Travis and AppVeyor. + +Please [file an issue](https://github.com/mesonbuild/meson/issues/new) +if these instructions don't work for you. + +## Travis-CI with Docker + +Travis with Docker gives access to newer non-LTS Ubuntu versions with +pre-installed libraries of your choice. + +This `yml` file is derived from the +[configuration used by Meson](https://github.com/mesonbuild/meson/blob/master/.travis.yml) +for running its own tests. + +```yaml +os: + - linux + - osx + +language: + - cpp + +services: + - docker + +before_install: + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install python3 ninja; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then pip3 install meson; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then docker pull YOUR/REPO:eoan; fi + +script: + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then echo FROM YOUR/REPO:eoan > Dockerfile; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then echo ADD . /root >> Dockerfile; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then docker build -t withgit .; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then docker run withgit /bin/sh -c "cd /root && TRAVIS=true CC=$CC CXX=$CXX meson builddir && meson test -C builddir"; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then SDKROOT=$(xcodebuild -version -sdk macosx Path) meson builddir && meson test -C builddir; fi +``` + +## CircleCi for Linux (with Docker) + +[CircleCi](https://circleci.com/) can work for spinning all of the Linux images you wish. +Here's a sample `yml` file for use with that. + +```yaml +version: 2.1 + +executors: + # Your dependencies would go in the docker images that represent + # the Linux distributions you are supporting + meson_ubuntu_builder: + docker: + - image: your_dockerhub_username/ubuntu-sys + + meson_debian_builder: + docker: + - image: your_dockerhub_username/debian-sys + + meson_fedora_builder: + docker: + - image: your_dockerhub_username/fedora-sys + +jobs: + meson_ubuntu_build: + executor: meson_ubuntu_builder + steps: + - checkout + - run: meson setup builddir --backend ninja + - run: meson compile -C builddir + - run: meson test -C builddir + + meson_debian_build: + executor: meson_debian_builder + steps: + - checkout + - run: meson setup builddir --backend ninja + - run: meson compile -C builddir + - run: meson test -C builddir + + meson_fedora_build: + executor: meson_fedora_builder + steps: + - checkout + - run: meson setup builddir --backend ninja + - run: meson compile -C builddir + - run: meson test -C builddir + +workflows: + version: 2 + linux_workflow: + jobs: + - meson_ubuntu_build + - meson_debian_build + - meson_fedora_build +``` + +## AppVeyor for Windows + +For CI on Windows, [AppVeyor](https://www.appveyor.com/) has a wide selection of +[default configurations](https://www.appveyor.com/docs/windows-images-software/). +AppVeyor also has [MacOS](https://www.appveyor.com/docs/macos-images-software/) and +[Linux](https://www.appveyor.com/docs/linux-images-software/) CI images. +This is a sample `appveyor.yml` file for Windows with Visual Studio 2015 and 2017. + +```yaml +image: Visual Studio 2017 + +environment: + matrix: + - arch: x86 + compiler: msvc2015 + - arch: x64 + compiler: msvc2015 + - arch: x86 + compiler: msvc2017 + - arch: x64 + compiler: msvc2017 + +platform: + - x64 + +install: + # Set paths to dependencies (based on architecture) + - cmd: if %arch%==x86 (set PYTHON_ROOT=C:\python37) else (set PYTHON_ROOT=C:\python37-x64) + # Print out dependency paths + - cmd: echo Using Python at %PYTHON_ROOT% + # Add necessary paths to PATH variable + - cmd: set PATH=%cd%;%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH% + # Install meson and ninja + - cmd: pip install ninja meson + # Set up the build environment + - cmd: if %compiler%==msvc2015 ( call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %arch% ) + - cmd: if %compiler%==msvc2017 ( call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" %arch% ) + +build_script: + - cmd: echo Building on %arch% with %compiler% + - cmd: meson --backend=ninja builddir + - cmd: meson compile -C builddir + +test_script: + - cmd: meson test -C builddir +``` + +### Qt + +For Qt 5, add the following line near the `PYTHON_ROOT` assignment: + +```yaml + - cmd: if %arch%==x86 (set QT_ROOT=C:\Qt\5.11\%compiler%) else (set QT_ROOT=C:\Qt\5.11\%compiler%_64) +``` + +And afterwards add `%QT_ROOT%\bin` to the `PATH` variable. + +You might have to adjust your build matrix as there are, for example, no msvc2017 32-bit builds. Visit the [Build Environment](https://www.appveyor.com/docs/build-environment/) page in the AppVeyor docs for more details. + +### Boost + +The following statement is sufficient for meson to find Boost: + +```yaml + - cmd: set BOOST_ROOT=C:\Libraries\boost_1_67_0 +``` + +## Travis without Docker + +Non-Docker Travis-CI builds can use Linux, MacOS or Windows. +Set the desired compiler(s) in the build **matrix**. +This example is for **Linux** (Ubuntu 18.04) and **C**. + +```yaml +dist: bionic +group: travis_latest + +os: linux +language: python + +matrix: + include: + - env: CC=gcc + - env: CC=clang + +install: + - pip install meson ninja + +script: + - meson builddir + - meson compile -C builddir + - meson test -C builddir +``` + +## GitHub Actions + +GitHub Actions are distinct from Azure Pipelines in their workflow syntax. +It can be easier to setup specific CI tasks in Actions than Pipelines, depending on the particular task. +This is an example file: .github/workflows/ci_meson.yml supposing the project is C-based, using GCC on Linux, Mac and Windows. +The optional `on:` parameters only run this CI when the C code is changed--corresponding ci_python.yml might run only on "**.py" file changes. + +```yml +name: ci_meson + +on: + push: + paths: + - "**.c" + - "**.h" + pull_request: + paths: + - "**.h" + - "**.h" + +jobs: + + linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: pip install meson ninja + - run: meson setup build + env: + CC: gcc + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Linux_Meson_Testlog + path: build/meson-logs/testlog.txt + + macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: brew install gcc + - run: pip install meson ninja + - run: meson setup build + env: + CC: gcc + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: MacOS_Meson_Testlog + path: build/meson-logs/testlog.txt + + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - run: pip install meson ninja + - run: meson setup build + env: + CC: gcc + - run: meson test -C build -v + - uses: actions/upload-artifact@v1 + if: failure() + with: + name: Windows_Meson_Testlog + path: build/meson-logs/testlog.txt +``` diff --git a/meson/docs/markdown/Contributing.md b/meson/docs/markdown/Contributing.md new file mode 100644 index 000000000..07ddc8834 --- /dev/null +++ b/meson/docs/markdown/Contributing.md @@ -0,0 +1,513 @@ +--- +short-description: Contributing to Meson +... + +# Contributing to Meson + +A large fraction of Meson is contributed by people outside the core +team. This documentation explains some of the design rationales of +Meson as well as how to create and submit your patches for inclusion +to Meson. + +Thank you for your interest in participating to the development. + +## Submitting patches + +All changes must be submitted as [pull requests to +Github](https://github.com/mesonbuild/meson/pulls). This causes them +to be run through the CI system. All submissions must pass a full CI +test run before they are even considered for submission. + +## Keeping pull requests up to date + +It is possible that while your pull request is being reviewed, other +changes are committed to master that cause merge conflicts that must +be resolved. The basic rule for this is very simple: keep your pull +request up to date using rebase _only_. + +Do not merge head back to your branch. Any merge commits in your pull +request make it not acceptable for merging into master and you must +remove them. + +## Special procedure for new features + +Every new feature requires some extra steps, namely: + +- Must include a project test under `test cases/`, or if that's not + possible or if the test requires a special environment, it must go + into `run_unittests.py`. +- Must be registered with the [FeatureChecks framework](Release-notes-for-0.47.0.md#feature-detection-based-on-meson_version-in-project) + that will warn the user if they try to use a new feature while + targeting an older meson version. +- Needs a release note snippet inside `docs/markdown/snippets/` with + a heading and a brief paragraph explaining what the feature does + with an example. + +## Acceptance and merging + +The kind of review and acceptance any merge proposal gets depends on +the changes it contains. All pull requests must be reviewed and +accepted by someone with commit rights who is not the original +submitter. Merge requests can be roughly split into three different +categories. + +The first one consists of MRs that only change the markdown +documentation under `docs/markdown`. Anyone with access rights can +push changes to these directly to master. For major changes it is +still recommended to create a MR so other people can comment on it. + +The second group consists of merges that don't change any +functionality, fixes to the CI system and bug fixes that have added +regression tests (see below) and don't change existing +functionality. Once successfully reviewed anyone with merge rights can +merge these to master. + +The final kind of merges are those that add new functionality or +change existing functionality in a backwards incompatible way. These +require the approval of the project lead. + +In a simplified list form the split would look like the following: + +- members with commit access can do: + - documentation changes (directly to master if warranted) + - bug fixes that don't change functionality + - refactorings + - new dependency types + - new tool support (e.g. a new Doxygen-kind of tool) + - support for new compilers to existing languages +- project leader decision is needed for: + - new modules + - new functions in the Meson language + - syntax changes for Meson files + - changes breaking backwards compatibility + - support for new languages + +## Strategy for merging pull requests to trunk + +Meson's merge strategy should fulfill the following guidelines: + +- preserve as much history as possible + +- have as little junk in the repo as possible + +- everything in the "master lineage" should always pass all tests + +These goals are slightly contradictory so the correct thing to do +often requires some judgement on part of the person doing the +merge. Github provides three different merge options, The rules of +thumb for choosing between them goes like this: + +- single commit pull requests should always be rebased + +- a pull request with one commit and one "fixup" commit (such as + testing something to see if it passes CI) should be squashed + +- large branches with many commits should be merged with a merge + commit, especially if one of the commits does not pass all tests + (which happens in e.g. large and difficult refactorings) + +If in doubt, ask for guidance on IRC. + +## Tests + +All new features must come with automatic tests that thoroughly prove +that the feature is working as expected. Similarly bug fixes must come +with a unit test that demonstrates the bug, proves that it has been +fixed and prevents the feature from breaking in the future. + +Sometimes it is difficult to create a unit test for a given bug. If +this is the case, note this in your pull request. We may permit bug +fix merge requests in these cases. This is done on a case by case +basis. Sometimes it may be easier to write the test than convince the +maintainers that one is not needed. Exercise judgment and ask for help +in problematic cases. + +The tests are split into two different parts: unit tests and full +project tests. To run all tests, execute `./run_tests.py`. Unit tests +can be run with `./run_unittests.py` and project tests with +`./run_project_tests.py`. + +### Project tests + +Subsets of project tests can be selected with +`./run_project_tests.py --only` option. This can save a great deal of +time when only a certain part of Meson is being tested. +For example, a useful and easy contribution to Meson is making +sure the full set of compilers is supported. One could for example test +various Fortran compilers by setting `FC=ifort` or `FC=flang` or similar +with `./run_project_test.py --only fortran`. +Some families of tests require a particular backend to run. +For example, all the CUDA project tests run and pass on Windows via +`./run_project_tests.py --only cuda --backend ninja` + +Each project test is a standalone project that can be compiled on its +own. They are all in the `test cases` subdirectory. The simplest way to +run a single project test is to do something like `./meson.py test\ +cases/common/1\ trivial builddir`. The one exception to this is `test +cases/unit` directory discussed below. + +The test cases in the `common` subdirectory are meant to be run always +for all backends. They should only depend on C and C++, without any +external dependencies such as libraries. Tests that require those are +in the `test cases/frameworks` directory. If there is a need for an +external program in the common directory, such as a code generator, it +should be implemented as a Python script. The goal of test projects is +also to provide sample projects that end users can use as a base for +their own projects. + +All project tests follow the same pattern: they are configured, compiled, tests +are run and finally install is run. Passing means that configuring, building and +tests succeed and that installed files match those expected. + +Any tests that require more thorough analysis, such as checking that certain +compiler arguments can be found in the command line or that the generated +pkg-config files actually work should be done with a unit test. + +Additionally: + +* `crossfile.ini` and `nativefile.ini` are passed to the configure step with +`--cross-file` and `--native-file` options, respectively. + +* `mlog.cmd_ci_include()` can be called from anywhere inside meson to capture the +contents of an additional file into the CI log on failure. + +Projects needed by unit tests are in the `test cases/unit` +subdirectory. They are not run as part of `./run_project_tests.py`. + +### Configuring project tests + +The (optional) `test.json` file, in the root of a test case, is used +for configuring the test. All of the following root entries in the `test.json` +are independent of each other and can be combined as needed. + +Exanple `test.json`: + +```json +{ + "env": { + "VAR": "VAL" + }, + "installed": [ + { "type": "exe", "file": "usr/bin/testexe" }, + { "type": "pdb", "file": "usr/bin/testexe" }, + { "type": "shared_lib", "file": "usr/lib/z", "version": "1.2.3" }, + ], + "matrix": { + "options": { + "opt1": [ + { "val": "abc" }, + { "val": "qwert" }, + { "val": "bad" } + ], + "opt2": [ + { "val": null }, + { "val": "true" }, + { "val": "false" }, + ] + }, + "exclude": [ + { "opt1": "qwert", "opt2": "false" }, + { "opt1": "bad" } + ] + }, + "tools": { + "cmake": ">=3.11" + } +} +``` + +#### env + +The `env` key contains a dictionary which specifies additional +environment variables to be set during the configure step of the test. `@ROOT@` +is replaced with the absolute path of the source directory. + +#### installed + +The `installed` dict contains a list of dicts, describing which files are expected +to be installed. Each dict contains the following keys: + +- `file` +- `type` +- `platform` (optional) +- `version` (optional) +- `language` (optional) + +The `file` entry contains the relative path (from the install root) to the +actually installed file. + +The `type` entry specifies how the `file` path should be interpreted based on the +current platform. The following values are currently supported: + +| type | Description | +| ------------- | ------------------------------------------------------------------------------------------------------- | +| `file` | No postprocessing, just use the provided path | +| `dir` | To include all files inside the directory (for generated docs, etc). The path must be a valid directory | +| `exe` | For executables. On Windows the `.exe` suffix is added to the path in `file` | +| `shared_lib` | For shared libraries, always written as `name`. The appropriate suffix and prefix are added by platform | +| `pdb` | For Windows PDB files. PDB entries are ignored on non Windows platforms | +| `implib` | For Windows import libraries. These entries are ignored on non Windows platforms | +| `implibempty` | Like `implib`, but no symbols are exported in the library | +| `expr` | `file` is an expression. This type should be avoided and removed if possible | + +Except for the `file` and `expr` types, all paths should be provided *without* a suffix. + +| Argument | Applies to | Description | +| -----------|----------------------------|-------------------------------------------------------------------------------| +| `version` | `shared_lib`, `pdb` | Sets the version to look for appropriately per-platform | +| `language` | `pdb` | Determines which compiler/linker determines the existence of this file | + +The `shared_lib` and `pdb` types takes an optional additional parameter, `version`, this is us a string in `X.Y.Z` format that will be applied to the library. Each version to be tested must have a single version. The harness will apply this correctly per platform: + +`pdb` takes an optional `language` argument. This determines which compiler/linker should generate the pdb file. Because it's possible to mix compilers that do and don't generate pdb files (dmd's optlink doesn't). Currently this is only needed when mixing D and C code. + +```json +{ + "type": "shared_lib", "file": "usr/lib/lib", + "type": "shared_lib", "file": "usr/lib/lib", "version": "1", + "type": "shared_lib", "file": "usr/lib/lib", "version": "1.2.3.", +} +``` + +This will be applied appropriatly per platform. On windows this expects `lib.dll` and `lib-1.dll`. on MacOS it expects `liblib.dylib` and `liblib.1.dylib`. On other Unices it expects `liblib.so`, `liblib.so.1`, and `liblib.so.1.2.3`. + +If the `platform` key is present, the installed file entry is only considered if +the platform matches. The following values for `platform` are currently supported: + +| platform | Description | +| ---------- | -------------------------------------------------------------------- | +| `msvc` | Matches when a msvc like compiler is used (`msvc`, `clang-cl`, etc.) | +| `gcc` | Not `msvc` | +| `cygwin` | Matches when the platform is cygwin | +| `!cygwin` | Not `cygwin` | + +#### matrix + +The `matrix` section can be used to define a test matrix to run project tests +with different meson options. + +In the `options` dict, all possible options and their values are specified. Each +key in the `options` dict is a meson option. It stores a list of all potential +values in a dict format, which allows to skip specific values based on the current +environment. + +Each value must contain the `val` key for the value of the option. `null` can be +used for adding matrix entries without the current option. + +Additionally, the `skip_on_env` key can be used to specify a list of environment +variables. If at least one environment variable in `skip_on_env` is present, all +matrix entries containing this value are skipped. + +Similarly, the `compilers` key can be used to define a mapping of compilers to languages that are required for this value. + +```json +{ + "compilers": { + "c": "gcc", + "cpp": "gcc", + "d": "gdc" + } +} +``` + +Specific option combinations can be excluded with the `exclude` section. It should +be noted that `exclude` does not require exact matches. Instead, any matrix entry +containing all option value combinations in `exclude` will be excluded. Thus +an empty dict (`{}`) to will match **all** elements in the test matrix. + +The above example will produce the following matrix entries: +- `opt1=abc` +- `opt1=abc opt2=true` +- `opt1=abc opt2=false` +- `opt1=qwert` +- `opt1=qwert opt2=true` + +#### do_not_set_opts + +Currently supported values are: +- `prefix` +- `libdir` + +#### tools + +This section specifies a dict of tool requirements in a simple key-value format. +If a tool is specified, it has to be present in the environment, and the version +requirement must be fulfilled. Otherwise, the entire test is skipped (including +every element in the test matrix). + +#### stdout + +The `stdout` key contains a list of dicts, describing the expected stdout. + +Each dict contains the following keys: + +- `line` +- `match` (optional) + +Each item in the list is matched, in order, against the remaining actual stdout +lines, after any previous matches. If the actual stdout is exhausted before +every item in the list is matched, the expected output has not been seen, and +the test has failed. + +The `match` element of the dict determines how the `line` element is matched: + +| Type | Description | +| -------- | ----------------------- | +| `literal` | Literal match (default) | +| `re` | regex match | + +### Skipping integration tests + +Meson uses several continuous integration testing systems that have slightly +different interfaces for indicating a commit should be skipped. + +Continuous integration systems currently used: +- [Travis-CI](https://docs.travis-ci.com/user/customizing-the-build#skipping-a-build) + allows `[skip ci]` anywhere in the commit messages. +- [Azure Pipelines](https://docs.microsoft.com/en-us/azure/devops/pipelines/scripts/git-commands?view=vsts&tabs=yaml#how-do-i-avoid-triggering-a-ci-build-when-the-script-pushes) + allows `***NO_CI***` in the commit message. +- [Sider](https://sider.review) + runs Flake8 ([see below](#python-coding-style)) + +To promote consistent naming policy, use: + +- `[skip ci]` in the commit title if you want to disable all integration tests + +## Documentation + +The `docs` directory contains the full documentation that will be used +to generate [the Meson web site](http://mesonbuild.com). Every change +in functionality must change the documentation pages. In most cases +this means updating the reference documentation page but bigger +changes might need changes in other documentation, too. + +All new functionality needs to have a mention in the release +notes. These features should be written in standalone files in the +`docs/markdown/snippets` directory. The release manager will combine +them into one page when doing the release. + +[Integration tests should be disabled](#skipping-integration-tests) for +documentation-only commits by putting `[skip ci]` into commit title. +Reviewers should ask contributors to put `[skip ci]` into the title because +tests are run again after merge for `master`. + +## Python Coding style + +Meson follows the basic Python coding style. Additional rules are the +following: + +- indent 4 spaces, no tabs ever +- indent meson.build files with two spaces +- try to keep the code as simple as possible +- contact the mailing list before embarking on large scale projects + to avoid wasted effort + +Meson uses Flake8 for style guide enforcement. The Flake8 options for +the project are contained in .flake8. + +To run Flake8 on your local clone of Meson: + +```console +$ python3 -m pip install flake8 +$ cd meson +$ flake8 +``` + +To run it automatically before committing: + +```console +$ flake8 --install-hook=git +$ git config --bool flake8.strict true +``` + +## C/C++ coding style + +Meson has a bunch of test code in several languages. The rules for +those are simple. + +- indent 4 spaces, no tabs ever +- brace always on the same line as if/for/else/function definition + +## External dependencies + +The goal of Meson is to be as easily usable as possible. The user +experience should be "get Python3 and Ninja, run", even on +Windows. Unfortunately this means that we can't have dependencies on +projects outside of Python's standard library. This applies only to +core functionality, though. For additional helper programs etc the use +of external dependencies may be ok. If you feel that you are dealing +with this kind of case, please contact the developers first with your +use case. + +## Turing completeness + +The main design principle of Meson is that the definition language is +not Turing complete. Any change that would make Meson Turing complete +is automatically rejected. In practice this means that defining your +own functions inside `meson.build` files and generalised loops will +not be added to the language. + +## Do I need to sign a CLA in order to contribute? + +No you don't. All contributions are welcome. + +## No lingering state + +Meson operates in much the same way as functional programming +languages. It has inputs, which include `meson.build` files, values of +options, compilers and so on. These are passed to a function, which +generates output build definition. This function is pure, which means that: + +- for any given input the output is always the same +- running Meson twice in a row _always_ produce the same output in both runs + +The latter one is important, because it enforces that there is no way +for "secret state" to pass between consecutive invocations of +Meson. This is the reason why, for example, there is no `set_option` +function even though there is a `get_option` one. + +If this were not the case, we could never know if the build output is +"stable". For example suppose there were a `set_option` function and a +boolean variable `flipflop`. Then you could do this: + +```meson +set_option('flipflop', not get_option('flipflop')) +``` + +This piece of code would never converge. Every Meson run would change +the value of the option and thus the output you get out of this build +definition would be random. + +Meson does not permit this by forbidding these sorts of covert channels. + +There is one exception to this rule. Users can call into external +commands with `run_command`. If the output of that command does not +behave like a pure function, this problem arises. Meson does not try +to guard against this case, it is the responsibility of the user to +make sure the commands they run behave like pure functions. + +## Environment variables + +Environment variables are like global variables, except that they are +also hidden by default. Envvars should be avoided whenever possible, +all functionality should be exposed in better ways such as command +line switches. + +## Random design points that fit nowhere else + +- All features should follow the 90/9/1 rule. 90% of all use cases + should be easy, 9% should be possible and it is totally fine to not + support the final 1% if it would make things too complicated. + +- Any build directory will have at most two toolchains: one native and + one cross. + +- Prefer specific solutions to generic frameworks. Solve the end + user's problems rather than providing them tools to do it + themselves. + +- Never use features of the Unix shell (or Windows shell for that + matter). Doing things like forwaring output with `>` or invoking + multiple commands with `&&` are not permitted. Whenever these sorts + of requirements show up, write an internal Python script with the + desired functionality and use that instead. diff --git a/meson/docs/markdown/Creating-Linux-binaries.md b/meson/docs/markdown/Creating-Linux-binaries.md new file mode 100644 index 000000000..c3b4b642a --- /dev/null +++ b/meson/docs/markdown/Creating-Linux-binaries.md @@ -0,0 +1,134 @@ +--- +short-description: Creating universal Linux binaries +... + +# Creating Linux binaries + +Creating Linux binaries that can be downloaded and run on any distro +(like .dmg packages for OSX or .exe installers for Windows) has +traditionally been difficult. This is even more tricky if you want to +use modern compilers and features, which is especially desired in game +development. There is still no simple turn-key solution for this +problem but with a bit of setup it can be relatively straightforward. + +## Installing system and GCC + +First you need to do a fresh operating system install. You can use +spare hardware, VirtualBox, cloud or whatever you want. Note that the +distro you install must be *at least as old* as the oldest release you +wish to support. Debian stable is usually a good choice, though +immediately after its release you might want to use Debian oldstable +or the previous Ubuntu LTS. The oldest supported version of CentOS is +also a good choice. + +Once you have installed the system, you need to install +build-dependencies for GCC. In Debian-based distros this can be done +with the following commands: + +```console +$ apt-get build-dep g++ +$ apt-get install pkg-config libgmp-dev libmpfr-dev libmpc-dev +``` + +Then create a `src` subdirectory in your home directory. Copy-paste +the following into `install_gcc.sh` and execute it. + +```bash +#!/bin/sh + +wget ftp://ftp.fu-berlin.de/unix/languages/gcc/releases/gcc-4.9.2/gcc-4.9.2.tar.bz2 +tar xf gcc-4.9.2.tar.bz2 + +mkdir objdir +cd objdir +../gcc-4.9.2/configure --disable-bootstrap --prefix=${HOME}/devroot \ + --disable-multilib --enable-languages=c,c++ +make -j 4 +make install-strip +ln -s gcc ${HOME}/devroot/bin/cc +``` + +Then finally add the following lines to your `.bashrc`. + +```console +$ export LD_LIBRARY_PATH=${HOME}/devroot/lib +$ export PATH=${HOME}/devroot/bin:$PATH +$ export PKG_CONFIG_PATH=${HOME}/devroot/lib/pkgconfig +``` + +Log out and back in and now your build environment is ready to use. + +## Adding other tools + +Old distros might have too old versions of some tools. For Meson this +could include Python 3 and Ninja. If this is the case you need to +download, build and install new versions into `~/devroot` in the usual +way. + +## Adding dependencies + +You want to embed and statically link every dependency you can +(especially C++ dependencies). Meson's [Wrap package +manager](Wrap-dependency-system-manual.md) might be of use here. This +is equivalent to what you would do on Windows, OSX, Android +etc. Sometimes static linking is not possible. In these cases you need +to copy the .so files inside your package. Let's use SDL2 as an +example. First we download and install it as usual giving it our +custom install prefix (that is, `./configure +--prefix=${HOME}/devroot`). This makes Meson's dependency detector +pick it up automatically. + +## Building and installing + +Building happens in much the same way as normally. There are just two +things to note. First, you must tell GCC to link the C++ standard +library statically. If you don't then your app is guaranteed to break +as different distros have binary-incompatible C++ libraries. The +second thing is that you need to point your install prefix to some +empty staging area. Here's the meson command to do that: + +```console +$ LDFLAGS=-static-libstdc++ meson --prefix=/tmp/myapp +``` + +The aim is to put the executable in `/tmp/myapp/bin` and shared +libraries to `/tmp/myapp/lib`. The next thing you need is the +embedder. It takes your dependencies (in this case only +`libSDL2-2.0.so.0`) and copies them in the lib directory. Depending on +your use case you can either copy the files by hand or write a script +that parses the output of `ldd binary_file`. Be sure not to copy +system libraries (`libc`, `libpthread`, `libm` etc). For an example, +see the [sample +project](https://github.com/jpakkane/meson/tree/master/manual%20tests/4%20standalone%20binaries). + +Make the script run during install with this: + +```meson +meson.add_install_script('linux_bundler.sh') +``` + +## Final steps + +If you try to run the program now it will most likely fail to start or +crashes. The reason for this is that the system does not know that the +executable needs libraries from the `lib` directory. The solution for +this is a simple wrapper script. Create a script called `myapp.sh` +with the following content: + +```bash +#!/bin/bash + +cd "${0%/*}" +export LD_LIBRARY_PATH="$(pwd)/lib" +bin/myapp +``` + +Install it with this Meson snippet: + +```meson +install_data('myapp.sh', install_dir : '.') +``` + +And now you are done. Zip up your `/tmp/myapp` directory and you have +a working binary ready for deployment. To run the program, just unzip +the file and run `myapp.sh`. diff --git a/meson/docs/markdown/Creating-OSX-packages.md b/meson/docs/markdown/Creating-OSX-packages.md new file mode 100644 index 000000000..849d5fd5d --- /dev/null +++ b/meson/docs/markdown/Creating-OSX-packages.md @@ -0,0 +1,158 @@ +--- +short-description: Tools to create OS X packages +... + +# Creating OSX packages + +Meson does not have native support for building OSX packages but it +does provide all the tools you need to create one yourself. The reason +for this is that it is a very hard task to write a system that +provides for all the different ways to do that but it is very easy to +write simple scripts for each application. + +Sample code for this can be found in [the Meson manual test +suite](https://github.com/jpakkane/meson/tree/master/manual%20tests/4%20standalone%20binaries). + +## Creating an app bundle + +OSX app bundles are actually extremely simple. They are just a +directory of files in a certain format. All the details you need to +know are on [this +page](https://stackoverflow.com/questions/1596945/building-osx-app-bundle) +and it is highly recommended that you read it first. + +Let's assume that we are creating our app bundle into +`/tmp/myapp.app`. Suppose we have one executable, so we need to +install that into `Contents/MacOS`. If we define the executable like +this: + +```meson +executable('myapp', 'foo1.c', ..., install : true) +``` + +then we just need to initialize our build tree with this command: + +```console +$ meson --prefix=/tmp/myapp.app \ + --bindir=Contents/MacOS \ + builddir \ + +``` + +Now when we do `meson install` the bundle is properly staged. If you +have any resource files or data, you need to install them into +`Contents/Resources` either by custom install commands or specifying +more install paths to the Meson command. + +Next we need to install an `Info.plist` file and an icon. For those we +need the following two Meson definitions. + +```meson +install_data('myapp.icns', install_dir : 'Contents/Resources') +install_data('Info.plist', install_dir : 'Contents') +``` + +The format of `Info.plist` can be found in the link or the sample +project linked above. The simplest way to get an icon in the `icns` +format is to save your image as a tiff an then use the `tiff2icns` helper +application that comes with XCode. + +Some applications assume that the working directory of the app process +is the same where the binary executable is. If this is the case for +you, then you need to create a wrapper script that looks like this: + +```bash +#!/bin/bash + +cd "${0%/*}" +./myapp +``` + +install it with this: + +```meson +install_data('myapp.sh', install_dir : 'Contents/MacOS') +``` + +and make sure that you specify `myapp.sh` as the executable to run in +your `Info.plist`. + +If you are not using any external libraries, this is all you need to +do. You now have a full app bundle in `/tmp/myapp.app` that you can +use. Most applications use third party frameworks and libraries, +though, so you need to add them to the bundle so it will work on other +peoples' machines. + +As an example we are going to use the [SDL2](https://libsdl.org/) +framework. In order to bundle it in our app, we first specify an +installer script to run. + +```meson +meson.add_install_script('install_script.sh') +``` + +The install script does two things. First it copies the whole +framework into our bundle. + +```console +$ mkdir -p ${MESON_INSTALL_PREFIX}/Contents/Frameworks +$ cp -R /Library/Frameworks/SDL2.framework \ + ${MESON_INSTALL_PREFIX}/Contents/Frameworks +``` + +Then it needs to alter the library search path of our +executable(s). This tells OSX that the libraries your app needs are +inside your bundle. In the case of SDL2, the invocation goes like +this: + +```console +$ install_name_tool -change @rpath/SDL2.framework/Versions/A/SDL2 \ + @executable_path/../FrameWorks/SDL2.framework/Versions/A/SDL2 \ + ${MESON_INSTALL_PREFIX}/Contents/MacOS/myapp +``` + +This is the part of OSX app bundling that you must always do +manually. OSX dependencies come in many shapes and forms and +unfortunately there is no reliable automatic way to determine how each +dependency should be handled. Frameworks go to the `Frameworks` +directory while plain `.dylib` files usually go to +`Contents/Resources/lib` (but you can put them wherever you like). To +get this done you have to check what your program links against with +`otool -L /path/to/binary` and manually add the copy and fix steps to +your install script. Do not copy system libraries inside your bundle, +though. + +After this you have a fully working, self-contained OSX app bundle +ready for distribution. + +## Creating a .dmg installer + +A .dmg installer is similarly quite simple, at its core it is +basically a fancy compressed archive. A good description can be found +on [this page](https://el-tramo.be/guides/fancy-dmg/). Please read it +and create a template image file according to its instructions. + +The actual process of creating the installer is very simple: you mount +the template image, copy your app bundle in it, unmount it and convert +the image into a compressed archive. The actual commands to do this +are not particularly interesting, feel free to steal them from either +the linked page above or from the sample script in Meson's test suite. + +## Putting it all together + +There are many ways to put the .dmg installer together and different +people will do it in different ways. The linked sample code does it by +having two different scripts. This separates the different pieces +generating the installer into logical pieces. + +`install_script.sh` only deals with embedding dependencies and fixing +the library paths. + +`build_osx_installer.sh` sets up the build with the proper paths, +compiles, installs and generates the .dmg package. + +The main reasoning here is that in order to build a complete OSX +installer package from source, all you need to do is to cd into the +source tree and run `./build_osx_installer.sh`. To build packages on +other platforms you would write scripts such as +`build_windows_installer.bat` and so on. diff --git a/meson/docs/markdown/Creating-releases.md b/meson/docs/markdown/Creating-releases.md new file mode 100644 index 000000000..040fb530a --- /dev/null +++ b/meson/docs/markdown/Creating-releases.md @@ -0,0 +1,60 @@ +--- +short-description: Creating releases +... + +# Creating releases + +In addition to development, almost all projects provide periodical +source releases. These are standalone packages (usually either in +tar or zip format) of the source code. They do not contain any +revision control metadata, only the source code. Meson provides +a simple way of generating these, with the `meson dist` command. + +Meson provides a simple way of generating these. It consists of a +single command *(available since 0.52.0)*: + +```sh +meson dist +``` + +or alternatively (on older meson versions with `ninja` backend): + +```sh +ninja dist +``` + +This creates a file called `projectname-version.tar.xz` in the build +tree subdirectory `meson-dist`. This archive contains the full contents +of the latest commit in revision control including all the submodules +(recursively). All revision control metadata is removed. Meson then +takes this archive and tests that it works by doing a full +`compile` + `test` + `install` cycle. If all these pass, Meson will +then create a `SHA-256` checksum file next to the archive. + + +## Autotools dist VS Meson dist + +Meson behaviour is different from Autotools. The Autotools "dist" +target packages up the current source tree. Meson packages the latest +revision control commit. The reason for this is that it prevents developers +from doing accidental releases where the distributed archive does not match +any commit in revision control (especially the one tagged for the release). + + +## Include subprojects in your release + +The `meson dist` command has `--include-subprojects` command line option. +When enabled, the source tree of all subprojects used by the current build +will also be included in the final tarball. This is useful to distribute +self contained tarball that can be built offline (i.e. `--wrap-mode=nodownload`). + + +## Skip build and test with `--no-tests` + +The `meson dist` command has a `--no-tests` option to skip build and +tests steps of generated packages. It can be used to not waste time +for example when done in CI that already does its own testing. + +So with `--no-tests` you can tell Meson "Do not build and test generated +packages.". + diff --git a/meson/docs/markdown/Cross-compilation.md b/meson/docs/markdown/Cross-compilation.md new file mode 100644 index 000000000..d86d417ab --- /dev/null +++ b/meson/docs/markdown/Cross-compilation.md @@ -0,0 +1,330 @@ +--- +short-description: Setting up cross-compilation +... + +# Cross compilation + +Meson has full support for cross compilation. Since cross compiling is +more complicated than native building, let's first go over some +nomenclature. The three most important definitions are traditionally +called *build*, *host* and *target*. This is confusing because those +terms are used for quite many different things. To simplify the issue, +we are going to call these the *build machine*, *host machine* and +*target machine*. Their definitions are the following: + +* *build machine* is the computer that is doing the actual compiling. +* *host machine* is the machine on which the compiled binary will run. +* *target machine* is the machine on which the compiled binary's + output will run, *only meaningful* if the program produces + machine-specific output. + +The `tl/dr` summary is the following: if you are doing regular cross +compilation, you only care about `build_machine` and +`host_machine`. Just ignore `target_machine` altogether and you will +be correct 99% of the time. Only compilers and similar tools care +about the target machine. In fact, for so-called "multi-target" tools +the target machine need not be fixed at build-time like the others but +chosen at runtime, so `target_machine` *still* doesn't matter. If your +needs are more complex or you are interested in the actual details, do +read on. + +This might be easier to understand through examples. Let's start with +the regular, not cross-compiling case. In these cases all of these +three machines are the same. Simple so far. + +Let's next look at the most common cross-compilation setup. Let's +suppose you are on a 64 bit OSX machine and you are cross compiling a +binary that will run on a 32 bit ARM Linux board. In this case your +*build machine* is 64 bit OSX, your *host machine* is 32 bit ARM Linux +and your *target machine* is irrelevant (but defaults to the same value +as the *host machine*). This should be quite understandable as well. + +The usual mistake in this case is to call the OSX system the *host* and +the ARM Linux board the *target*. That's because these were their actual +names when the cross-compiler itself was compiled! Let's assume the +cross-compiler was created on OSX too. When that happened the *build* +and *host machines* were the same OSX and different from the ARM Linux +*target machine*. + +In a nutshell, the typical mistake assumes that the terms *build*, +*host* and *target* refer to some fixed positions whereas they're +actually relative to where the current compiler is running. Think of +*host* as a *child* of the current compiler and *target* as an optional +*grand-child*. Compilers don't change their terminology when they're +creating another compiler, that would at the very least make their user +interface much more complex. + +The most complicated case is when you cross-compile a cross +compiler. As an example you can, on a Linux machine, generate a cross +compiler that runs on Windows but produces binaries on MIPS Linux. In +this case *build machine* is x86 Linux, *host machine* is x86 Windows +and *target machine* is MIPS Linux. This setup is known as the +[Canadian +Cross](https://en.wikipedia.org/wiki/Cross_compiler#Canadian_Cross). As +a side note, be careful when reading cross compilation articles on +Wikipedia or the net in general. It is very common for them to get +build, host and target mixed up, even in consecutive sentences, which +can leave you puzzled until you figure it out. + +Again note that when you cross-compile something, +the 3 systems (*build*, *host*, and *target*) used when +building the cross compiler don't align with the ones used when +building something with that newly-built cross compiler. To take our +Canadian Cross scenario from above (for full generality), since its +*host machine* is x86 Windows, the *build machine* of anything we +build with it is *x86 Windows*. And since its *target machine* is MIPS +Linux, the *host machine* of anything we build with it is *MIPS +Linux*. Only the *target machine* of whatever we build with it can be +freely chosen by us, say if we want to build another cross compiler +that runs on MIPS Linux and targets Aarch64 iOS. As this example +hopefully makes clear to you, the machine names are relative and +shifted over to the left by one position. + +If you did not understand all of the details, don't worry. For most +people it takes a while to wrap their head around these +concepts. Don't panic, it might take a while to click, but you will +get the hang of it eventually. + +## Defining the environment + +Meson requires you to write a cross build definition file. It defines +various properties of the cross build environment. The cross file +consists of different sections. + +There are a number of options shared by cross and native files, +[here](Machine-files.md). It is assumed that you have read that section already, +as this documentation will only call out options specific to cross files. + +### Binaries + +```ini +[binaries] +exe_wrapper = 'wine' # A command used to run generated executables. +``` + +The `exe_wrapper` option defines a *wrapper command* that can be used to run +executables for this host. In this case we can use Wine, which runs Windows +applications on Linux. Other choices include running the application with +qemu or a hardware simulator. If you have this kind of a wrapper, these lines +are all you need to write. Meson will automatically use the given wrapper +when it needs to run host binaries. This happens e.g. when running the +project's test suite. + +### Properties + +In addition to the properites allowed in [all machine +files](Machine-files.md#properties), the cross file may contain specific +information about the cross compiler or the host machine. It looks like this: + +```ini +[properties] +sizeof_int = 4 +sizeof_wchar_t = 4 +sizeof_void* = 4 + +alignment_char = 1 +alignment_void* = 4 +alignment_double = 4 + +has_function_printf = true + +sys_root = '/some/path' +pkg_config_libdir = '/some/path/lib/pkgconfig' +``` + +In most cases you don't need the size and alignment settings, Meson +will detect all these by compiling and running some sample +programs. If your build requires some piece of data that is not listed +here, Meson will stop and write an error message describing how to fix +the issue. If you need extra compiler arguments to be used during +cross compilation you can set them with `[langname]_args = +[args]`. Just remember to specify the args as an array and not as a +single string (i.e. not as `'-DCROSS=1 -DSOMETHING=3'`). + +*Since 0.52.0* The `sys_root` property may point to the root of the host +system path (the system that will run the compiled binaries). This is used +internally by Meson to set the PKG_CONFIG_SYSROOT_DIR environment variable +for pkg-config. If this is unset the host system is assumed to share a root +with the build system. + +*Since 0.54.0* The pkg_config_libdir property may point to a list of path used +internally by Meson to set the PKG_CONFIG_LIBDIR environment variable for pkg-config. +This prevents pkg-config from searching cross dependencies in system directories. + +One important thing to note, if you did not define an `exe_wrapper` in +the previous section, is that Meson will make a best-effort guess at +whether it can run the generated binaries on the build machine. It +determines whether this is possible by looking at the `system` and +`cpu_family` of build vs host. There will however be cases where they +do match up, but the build machine is actually not compatible with the +host machine. Typically this will happen if the libc used by the build +and host machines are incompatible, or the code relies on kernel +features not available on the build machine. One concrete example is a +macOS build machine producing binaries for an iOS Simulator x86-64 +host. They're both `darwin` and the same architecture, but their +binaries are not actually compatible. In such cases you may use the +`needs_exe_wrapper` property to override the auto-detection: + +```ini +[properties] +needs_exe_wrapper = true +``` + +### Machine Entries + +The next bit is the definition of host and target machines. Every +cross build definition must have one or both of them. If it had +neither, the build would not be a cross build but a native build. You +do not need to define the build machine, as all necessary information +about it is extracted automatically. The definitions for host and +target machines look the same. Here is a sample for host machine. + +```ini +[host_machine] +system = 'windows' +cpu_family = 'x86' +cpu = 'i686' +endian = 'little' +``` + +These values define the machines sufficiently for cross compilation +purposes. The corresponding target definition would look the same but +have `target_machine` in the header. These values are available in +your Meson scripts. There are three predefined variables called, +surprisingly, `build_machine`, `host_machine` and +`target_machine`. Determining the operating system of your host +machine is simply a matter of calling `host_machine.system()`. + +There are two different values for the CPU. The first one is `cpu_family`. It +is a general type of the CPU. This should have a value from [the CPU Family +table](Reference-tables.md#cpu-families). *Note* that meson does not add +`el` to end cpu_family value for little endian systems. Big endian and little +endian mips are both just `mips`, with the `endian` field set approriately. + +The second value is `cpu` which is +a more specific subtype for the CPU. Typical values for a `x86` CPU family +might include `i386` or `i586` and for `arm` family `armv5` or `armv7hl`. +Note that CPU type strings are very system dependent. You might get a +different value if you check its value on the same machine but with different +operating systems. + +If you do not define your host machine, it is assumed to be the build +machine. Similarly if you do not specify target machine, it is assumed +to be the host machine. + + +## Starting a cross build + + +Once you have the cross file, starting a build is simple + +```console +$ meson srcdir builddir --cross-file cross_file.txt +``` + +Once configuration is done, compilation is started by invoking `meson compile` +in the usual way. + +## Introspection and system checks + +The main *meson* object provides two functions to determine cross +compilation status. + +```meson +meson.is_cross_build() # returns true when cross compiling +meson.can_run_host_binaries() # returns true if the host binaries can be run, either with a wrapper or natively +``` + +You can run system checks on both the system compiler or the cross +compiler. You just have to specify which one to use. + +```meson +build_compiler = meson.get_compiler('c', native : true) +host_compiler = meson.get_compiler('c', native : false) + +build_int_size = build_compiler.sizeof('int') +host_int_size = host_compiler.sizeof('int') +``` + +## Mixing host and build targets + +Sometimes you need to build a tool which is used to generate source +files. These are then compiled for the actual target. For this you +would want to build some targets with the system's native +compiler. This requires only one extra keyword argument. + +```meson +native_exe = executable('mygen', 'mygen.c', native : true) +``` + +You can then take `native_exe` and use it as part of a generator rule or anything else you might want. + +## Using a custom standard library + +Sometimes in cross compilation you need to build your own standard +library instead of using the one provided by the compiler. Meson has +built-in support for switching standard libraries transparently. The +invocation to use in your cross file is the following: + +```ini +[properties] +c_stdlib = ['mylibc', 'mylibc_dep'] # Subproject name, dependency name +``` + +This specifies that C standard library is provided in the Meson +subproject `mylibc` in internal dependency variable `mylibc_dep`. It +is used on every cross built C target in the entire source tree +(including subprojects) and the standard library is disabled. The +build definitions of these targets do not need any modification. + +## Changing cross file settings + +Cross file settings are only read when the build directory is set up +the first time. Any changes to them after the fact will be +ignored. This is the same as regular compiles where you can't change +the compiler once a build tree has been set up. If you need to edit +your cross file, then you need to wipe your build tree and recreate it +from scratch. + +## Custom data + +You can store arbitrary data in `properties` and access them from your +Meson files. As an example if you cross file has this: + +```ini +[properties] +somekey = 'somevalue' +``` + +then you can access that using the `meson` object like this: + +```meson +myvar = meson.get_cross_property('somekey') +# myvar now has the value 'somevalue' +``` + +## Cross file locations + +As of version 0.44.0 meson supports loading cross files from system locations +(except on Windows). This will be $XDG_DATA_DIRS/meson/cross, or if +XDG_DATA_DIRS is undefined, then /usr/local/share/meson/cross and +/usr/share/meson/cross will be tried in that order, for system wide cross +files. User local files can be put in $XDG_DATA_HOME/meson/cross, or +~/.local/share/meson/cross if that is undefined. + +The order of locations tried is as follows: + - A file relative to the local dir + - The user local location + - The system wide locations in order + +Distributions are encouraged to ship cross files either with +their cross compiler toolchain packages or as a standalone package, and put +them in one of the system paths referenced above. + +These files can be loaded automatically without adding a path to the cross +file. For example, if a ~/.local/share/meson/cross contains a file called x86-linux, +then the following command would start a cross build using that cross files: + +```sh +meson builddir/ --cross-file x86-linux +``` diff --git a/meson/docs/markdown/Cuda-module.md b/meson/docs/markdown/Cuda-module.md new file mode 100644 index 000000000..f161eacd5 --- /dev/null +++ b/meson/docs/markdown/Cuda-module.md @@ -0,0 +1,191 @@ +--- +short-description: CUDA module +authors: + - name: Olexa Bilaniuk + years: [2019] + has-copyright: false +... + +# Unstable CUDA Module +_Since: 0.50.0_ + +This module provides helper functionality related to the CUDA Toolkit and +building code using it. + + +**Note**: this module is unstable. It is only provided as a technology preview. +Its API may change in arbitrary ways between releases or it might be removed +from Meson altogether. + + +## Importing the module + +The module may be imported as follows: + +``` meson +cuda = import('unstable-cuda') +``` + +It offers several useful functions that are enumerated below. + + +## Functions + +### `nvcc_arch_flags()` +_Since: 0.50.0_ + +``` meson +cuda.nvcc_arch_flags(nvcc_or_version, ..., + detected: string_or_array) +``` + +Returns a list of `-gencode` flags that should be passed to `cuda_args:` in +order to compile a "fat binary" for the architectures/compute capabilities +enumerated in the positional argument(s). The flags shall be acceptable to +the NVCC compiler object `nvcc_or_version`, or its version string. + +A set of architectures and/or compute capabilities may be specified by: + +- The single positional argument `'All'`, `'Common'` or `'Auto'` +- As (an array of) + - Architecture names (`'Kepler'`, `'Maxwell+Tegra'`, `'Turing'`) and/or + - Compute capabilities (`'3.0'`, `'3.5'`, `'5.3'`, `'7.5'`) + +A suffix of `+PTX` requests PTX code generation for the given architecture. +A compute capability given as `A.B(X.Y)` requests PTX generation for an older +virtual architecture `X.Y` before binary generation for a newer architecture +`A.B`. + +Multiple architectures and compute capabilities may be passed in using + +- Multiple positional arguments +- Lists of strings +- Space (` `), comma (`,`) or semicolon (`;`)-separated strings + +The single-word architectural sets `'All'`, `'Common'` or `'Auto'` cannot be +mixed with architecture names or compute capabilities. Their interpretation is: + +| Name | Compute Capability | +|-------------------|--------------------| +| `'All'` | All CCs supported by given NVCC compiler. | +| `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. | +| `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. | + +As a special case, when `nvcc_arch_flags()` is invoked with + +- an NVCC `compiler` object `nvcc`, +- `'Auto'` mode and +- no `detected:` keyword, + +Meson uses `nvcc`'s architecture auto-detection results. + +The supported architecture names and their corresponding compute capabilities +are: + +| Name | Compute Capability | +|-------------------|--------------------| +| `'Fermi'` | 2.0, 2.1(2.0) | +| `'Kepler'` | 3.0, 3.5 | +| `'Kepler+Tegra'` | 3.2 | +| `'Kepler+Tesla'` | 3.7 | +| `'Maxwell'` | 5.0, 5.2 | +| `'Maxwell+Tegra'` | 5.3 | +| `'Pascal'` | 6.0, 6.1 | +| `'Pascal+Tegra'` | 6.2 | +| `'Volta'` | 7.0 | +| `'Xavier'` | 7.2 | +| `'Turing'` | 7.5 | + + +Examples: + + cuda.nvcc_arch_flags('10.0', '3.0', '3.5', '5.0+PTX') + cuda.nvcc_arch_flags('10.0', ['3.0', '3.5', '5.0+PTX']) + cuda.nvcc_arch_flags('10.0', [['3.0', '3.5'], '5.0+PTX']) + cuda.nvcc_arch_flags('10.0', '3.0 3.5 5.0+PTX') + cuda.nvcc_arch_flags('10.0', '3.0,3.5,5.0+PTX') + cuda.nvcc_arch_flags('10.0', '3.0;3.5;5.0+PTX') + cuda.nvcc_arch_flags('10.0', 'Kepler 5.0+PTX') + # Returns ['-gencode', 'arch=compute_30,code=sm_30', + # '-gencode', 'arch=compute_35,code=sm_35', + # '-gencode', 'arch=compute_50,code=sm_50', + # '-gencode', 'arch=compute_50,code=compute_50'] + + cuda.nvcc_arch_flags('10.0', '3.5(3.0)') + # Returns ['-gencode', 'arch=compute_30,code=sm_35'] + + cuda.nvcc_arch_flags('8.0', 'Common') + # Returns ['-gencode', 'arch=compute_30,code=sm_30', + # '-gencode', 'arch=compute_35,code=sm_35', + # '-gencode', 'arch=compute_50,code=sm_50', + # '-gencode', 'arch=compute_52,code=sm_52', + # '-gencode', 'arch=compute_60,code=sm_60', + # '-gencode', 'arch=compute_61,code=sm_61', + # '-gencode', 'arch=compute_61,code=compute_61'] + + cuda.nvcc_arch_flags('9.2', 'Auto', detected: '6.0 6.0 6.0 6.0') + cuda.nvcc_arch_flags('9.2', 'Auto', detected: ['6.0', '6.0', '6.0', '6.0']) + # Returns ['-gencode', 'arch=compute_60,code=sm_60'] + + cuda.nvcc_arch_flags(nvcc, 'All') + # Returns ['-gencode', 'arch=compute_20,code=sm_20', + # '-gencode', 'arch=compute_20,code=sm_21', + # '-gencode', 'arch=compute_30,code=sm_30', + # '-gencode', 'arch=compute_32,code=sm_32', + # '-gencode', 'arch=compute_35,code=sm_35', + # '-gencode', 'arch=compute_37,code=sm_37', + # '-gencode', 'arch=compute_50,code=sm_50', # nvcc.version() < 7.0 + # '-gencode', 'arch=compute_52,code=sm_52', + # '-gencode', 'arch=compute_53,code=sm_53', # nvcc.version() >= 7.0 + # '-gencode', 'arch=compute_60,code=sm_60', + # '-gencode', 'arch=compute_61,code=sm_61', # nvcc.version() >= 8.0 + # '-gencode', 'arch=compute_70,code=sm_70', + # '-gencode', 'arch=compute_72,code=sm_72', # nvcc.version() >= 9.0 + # '-gencode', 'arch=compute_75,code=sm_75'] # nvcc.version() >= 10.0 + +_Note:_ This function is intended to closely replicate CMake's FindCUDA module +function `CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable, [list of CUDA compute architectures])` + + + +### `nvcc_arch_readable()` +_Since: 0.50.0_ + +``` meson +cuda.nvcc_arch_readable(nvcc_or_version, ..., + detected: string_or_array) +``` + +Has precisely the same interface as [`nvcc_arch_flags()`](#nvcc_arch_flags), +but rather than returning a list of flags, it returns a "readable" list of +architectures that will be compiled for. The output of this function is solely +intended for informative message printing. + + archs = '3.0 3.5 5.0+PTX' + readable = cuda.nvcc_arch_readable(nvcc, archs) + message('Building for architectures ' + ' '.join(readable)) + +This will print + + Message: Building for architectures sm30 sm35 sm50 compute50 + +_Note:_ This function is intended to closely replicate CMake's FindCUDA module function +`CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable, [list of CUDA compute architectures])` + + + +### `min_driver_version()` +_Since: 0.50.0_ + +``` meson +cuda.min_driver_version(nvcc_or_version) +``` + +Returns the minimum NVIDIA proprietary driver version required, on the host +system, by kernels compiled with the given NVCC compiler or its version string. + +The output of this function is generally intended for informative message +printing, but could be used for assertions or to conditionally enable +features known to exist within the minimum NVIDIA driver required. + + diff --git a/meson/docs/markdown/Custom-build-targets.md b/meson/docs/markdown/Custom-build-targets.md new file mode 100644 index 000000000..f0b50d8fe --- /dev/null +++ b/meson/docs/markdown/Custom-build-targets.md @@ -0,0 +1,45 @@ +--- +short-description: Build targets for custom languages or corner-cases +... + +# Custom build targets + +While Meson tries to support as many languages and tools as possible, +there is no possible way for it to cover all corner cases. For these +cases it permits you to define custom build targets. Here is how one +would use it. + +```meson +comp = find_program('custom_compiler') + +infile = 'source_code.txt' +outfile = 'output.bin' + +mytarget = custom_target('targetname', + output : outfile, + input : infile, + command : [comp, '@INPUT@', '@OUTPUT@'], + install : true, + install_dir : 'subdir') +``` + +This would generate the binary `output.bin` and install it to +`${prefix}/subdir/output.bin`. Variable substitution works just like +it does for source generation. + +See [Generating Sources](Generating-sources.md) for more information on this topic. + +## Details on compiler invocations + +Meson only permits you to specify one command to run. This is by +design as writing shell pipelines into build definition files leads to +code that is very hard to maintain. If your compilation requires +multiple steps you need to write a wrapper script that does all the +necessary work. When doing this you need to be mindful of the +following issues: + +* do not assume that the command is invoked in any specific directory +* a target called `target` file `outfile` defined in subdir `subdir` + must be written to `build_dir/subdir/foo.dat` +* if you need a subdirectory for temporary files, use + `build_dir/subdir/target.dir` diff --git a/meson/docs/markdown/D.md b/meson/docs/markdown/D.md new file mode 100644 index 000000000..39aebc8a0 --- /dev/null +++ b/meson/docs/markdown/D.md @@ -0,0 +1,119 @@ +--- +title: D +short-description: Compiling D sources +... + +# Compiling D applications + +Meson has support for compiling D programs. A minimal `meson.build` +file for D looks like this: + +```meson +project('myapp', 'd') + +executable('myapp', 'app.d') +``` + +## [Conditional compilation](https://dlang.org/spec/version.html) + +If you are using the [version()](https://dlang.org/spec/version.html#version-specification) feature for conditional compilation, +you can use it using the `d_module_versions` target property: +```meson +project('myapp', 'd') +executable('myapp', 'app.d', d_module_versions: ['Demo', 'FeatureA']) +``` + +For debugging, [debug()](https://dlang.org/spec/version.html#debug) conditions are compiled automatically in debug builds, and extra identifiers can be added with the `d_debug` argument: +```meson +project('myapp', 'd') +executable('myapp', 'app.d', d_debug: [3, 'DebugFeatureA']) +``` + +## Using embedded unittests + +If you are using embedded [unittest functions](https://dlang.org/spec/unittest.html), your source code needs +to be compiled twice, once in regular +mode, and once with unittests active. This is done by setting the +`d_unittest` target property to `true`. +Meson will only ever pass the respective compiler's `-unittest` flag, +and never have the compiler generate an empty main function. +If you need that feature in a portable way, create an empty `main()` +function for unittests yourself, since the GNU D compiler +does not have this feature. + +This is an example for using D unittests with Meson: +```meson +project('myapp_tested', 'd') + +myapp_src = ['app.d', 'alpha.d', 'beta.d'] +executable('myapp', myapp_src) + +test_exe = executable('myapp_test', myapp_src, d_unittest: true) +test('myapptest', test_exe) +``` + +# Compiling D libraries and installing them + +Building D libraries is a straightforward process, not different from +how C libraries are built in Meson. You should generate a pkg-config +file and install it, in order to make other software on the system +find the dependency once it is installed. + +This is an example on how to build a D shared library: +```meson +project('mylib', 'd', version: '1.2.0') + +project_soversion = 0 +glib_dep = dependency('glib-2.0') + +my_lib = library('mylib', + ['src/mylib/libfunctions.d'], + dependencies: [glib_dep], + install: true, + version: meson.project_version(), + soversion: project_soversion, + d_module_versions: ['FeatureA', 'featureB'] +) + +pkgc = import('pkgconfig') +pkgc.generate(name: 'mylib', + libraries: my_lib, + subdirs: 'd/mylib', + version: meson.project_version(), + description: 'A simple example D library.', + d_module_versions: ['FeatureA'] +) +install_subdir('src/mylib/', install_dir: 'include/d/mylib/') +``` + +It is important to make the D sources install in a subdirectory in the + include path, in this case `/usr/include/d/mylib/mylib`. +All D compilers include the `/usr/include/d` directory by default, and + if your library would be installed into `/usr/include/d/mylib`, there +is a high chance that, when you compile your project again on a +machine where you installed it, the compiler will prefer the old +installed include over the new version in the source tree, leading to +very confusing errors. + +This is an example of how to use the D library we just built and +installed in an application: +```meson +project('myapp', 'd') + +mylib_dep = dependency('mylib', version: '>= 1.2.0') +myapp_src = ['app.d', 'alpha.d', 'beta.d'] +executable('myapp', myapp_src, dependencies: [mylib_dep]) +``` + +Please keep in mind that the library and executable would both need to +be built with the exact same D compiler and D compiler version. The D +ABI is not stable across compilers and their versions, and mixing +compilers will lead to problems. + +# Integrating with DUB + +DUB is a fully integrated build system for D, but it is also a way to +provide dependencies. Adding dependencies from the [D package registry](https://code.dlang.org/) +is pretty straight forward. You can find how to do this in +[Dependencies](Dependencies.md#some-notes-on-dub). You can also automatically +generate a `dub.json` file as explained in [Dlang](Dlang-module.md#generate_dub_file). diff --git a/meson/docs/markdown/Dependencies.md b/meson/docs/markdown/Dependencies.md new file mode 100644 index 000000000..a8f6d8a5d --- /dev/null +++ b/meson/docs/markdown/Dependencies.md @@ -0,0 +1,612 @@ +--- +short-description: Dependencies for external libraries and frameworks +... + +# Dependencies + +Very few applications are fully self-contained, but rather they use +external libraries and frameworks to do their work. Meson makes it +very easy to find and use external dependencies. Here is how one would +use the zlib compression library. + +```meson +zdep = dependency('zlib', version : '>=1.2.8') +exe = executable('zlibprog', 'prog.c', dependencies : zdep) +``` + +First Meson is told to find the external library `zlib` and error out +if it is not found. The `version` keyword is optional and specifies a +version requirement for the dependency. Then an executable is built +using the specified dependency. Note how the user does not need to +manually handle compiler or linker flags or deal with any other +minutiae. + +If you have multiple dependencies, pass them as an array: + +```meson +executable('manydeps', 'file.c', dependencies : [dep1, dep2, dep3, dep4]) +``` + +If the dependency is optional, you can tell Meson not to error out if +the dependency is not found and then do further configuration. + +```meson +opt_dep = dependency('somedep', required : false) +if opt_dep.found() + # Do something. +else + # Do something else. +endif +``` + +You can pass the `opt_dep` variable to target construction functions +whether the actual dependency was found or not. Meson will ignore +non-found dependencies. + +Meson also allows to get variables that are defined in the +`pkg-config` file. This can be done by using the +`get_pkgconfig_variable` function. + +```meson +zdep_prefix = zdep.get_pkgconfig_variable('prefix') +``` + +These variables can also be redefined by passing the `define_variable` +parameter, which might be useful in certain situations: + +```meson +zdep_prefix = zdep.get_pkgconfig_variable('libdir', define_variable: ['prefix', '/tmp']) +``` + +The dependency detector works with all libraries that provide a +`pkg-config` file. Unfortunately several packages don't provide +pkg-config files. Meson has autodetection support for some of these, +and they are described [later in this +page](#dependencies-with-custom-lookup-functionality). + +# Arbitrary variables from dependencies that can be found multiple ways + +*Note* new in 0.51.0 +*new in 0.54.0, the `internal` keyword* + +When you need to get an arbitrary variables from a dependency that can be +found multiple ways and you don't want to constrain the type you can use +the generic `get_variable` method. This currently supports cmake, pkg-config, +and config-tool based variables. + +```meson +foo_dep = dependency('foo') +var = foo_dep.get_variable(cmake : 'CMAKE_VAR', pkgconfig : 'pkg-config-var', configtool : 'get-var', default_value : 'default') +``` + +It accepts the keywords 'cmake', 'pkgconfig', 'pkgconfig_define', +'configtool', 'internal', and 'default_value'. 'pkgconfig_define' works just +like the 'define_variable' argument to `get_pkgconfig_variable`. When this +method is invoked the keyword corresponding to the underlying type of the +dependency will be used to look for a variable. If that variable cannot be +found or if the caller does not provide an argument for the type of +dependency, one of the following will happen: If 'default_value' was provided +that value will be returned, if 'default_value' was not provided then an +error will be raised. + +# Declaring your own + +You can declare your own dependency objects that can be used +interchangeably with dependency objects obtained from the system. The +syntax is straightforward: + +```meson +my_inc = include_directories(...) +my_lib = static_library(...) +my_dep = declare_dependency(link_with : my_lib, + include_directories : my_inc) +``` + +This declares a dependency that adds the given include directories and +static library to any target you use it in. + +# Building dependencies as subprojects + +Many platforms do not provide a system package manager. On these +systems dependencies must be compiled from source. Meson's subprojects +make it simple to use system dependencies when they are available and +to build dependencies manually when they are not. + +To make this work, the dependency must have Meson build definitions +and it must declare its own dependency like this: + +```meson + foo_dep = declare_dependency(...) +``` + +Then any project that wants to use it can write out the following +declaration in their main `meson.build` file. + +```meson + foo_dep = dependency('foo', fallback : ['foo', 'foo_dep']) +``` + +What this declaration means is that first Meson tries to look up the +dependency from the system (such as by using pkg-config). If it is not +available, then it builds subproject named `foo` and from that +extracts a variable `foo_dep`. That means that the return value of +this function is either an external or an internal dependency +object. Since they can be used interchangeably, the rest of the build +definitions do not need to care which one it is. Meson will take care +of all the work behind the scenes to make this work. + +# Dependency method + +You can use the keyword `method` to let meson know what method to use +when searching for the dependency. The default value is `auto`. +Additional dependencies methods are `pkg-config`, `config-tool`, `cmake`, +`system`, `sysconfig`, `qmake`, `extraframework` and `dub`. + +```meson +cups_dep = dependency('cups', method : 'pkg-config') +``` + +The dependency method order for `auto` is: + + 1. `pkg-config` + 2. `cmake` + 3. `extraframework` (OSX only) + +## CMake + +Meson can use the CMake `find_package()` function to detect +dependencies with the builtin `Find.cmake` modules and exported +project configurations (usually in `/usr/lib/cmake`). Meson is able +to use both the old-style `_LIBRARIES` variables as well as +imported targets. + +It is possible to manually specify a list of CMake targets that should +be used with the `modules` property. However, this step is optional +since meson tries to automatically guess the correct target based on the +name of the dependency. + +Depending on the dependency it may be necessary to explicitly specify +a CMake target with the `modules` property if meson is unable to guess +it automatically. + +```meson + cmake_dep = dependency('ZLIB', method : 'cmake', modules : ['ZLIB::ZLIB']) +``` + +Support for adding additional `COMPONENTS` for the CMake `find_package` lookup +is provided with the `components` kwarg (*introduced in 0.54.0*). All specified +componets will be passed directly to `find_package(COMPONENTS)`. + +It is also possible to reuse existing `Find.cmake` files with the +`cmake_module_path` property. Using this property is equivalent to setting the +`CMAKE_MODULE_PATH` variable in CMake. The path(s) given to `cmake_module_path` +should all be relative to the project source directory. Absolute paths +should only be used if the CMake files are not stored in the project itself. + +Additional CMake parameters can be specified with the `cmake_args` property. + +## Dub + +Please understand that meson is only able to find dependencies that +exist in the local Dub repository. You need to manually fetch and +build the target dependencies. + +For `urld`. +``` +dub fetch urld +dub build urld +``` + +Other thing you need to keep in mind is that both meson and Dub need +to be using the same compiler. This can be achieved using Dub's +`-compiler` argument and/or manually setting the `DC` environment +variable when running meson. +``` +dub build urld --compiler=dmd +DC="dmd" meson builddir +``` + +# Dependencies with custom lookup functionality + +Some dependencies have specific detection logic. + +Generic dependency names are case-sensitive[1](#footnote1), +but these dependency names are matched case-insensitively. The +recommended style is to write them in all lower-case. + +In some cases, more than one detection method exists, and the `method` keyword +may be used to select a detection method to use. The `auto` method uses any +checking mechanisms in whatever order meson thinks is best. + +e.g. libwmf and CUPS provide both pkg-config and config-tool support. You can +force one or another via the `method` keyword: + +```meson +cups_dep = dependency('cups', method : 'pkg-config') +wmf_dep = dependency('libwmf', method : 'config-tool') +``` + +## Dependencies using config tools + +[CUPS](#cups), [LLVM](#llvm), [pcap](#pcap), [WxWidgets](#wxwidgets), +[libwmf](#libwmf), [GCrypt](#libgcrypt), [GPGME](#gpgme), and GnuStep either do not provide pkg-config +modules or additionally can be detected via a config tool +(cups-config, llvm-config, libgcrypt-config, etc). Meson has native support for these +tools, and they can be found like other dependencies: + +```meson +pcap_dep = dependency('pcap', version : '>=1.0') +cups_dep = dependency('cups', version : '>=1.4') +llvm_dep = dependency('llvm', version : '>=4.0') +libgcrypt_dep = dependency('libgcrypt', version: '>= 1.8') +gpgme_dep = dependency('gpgme', version: '>= 1.0') +``` + +*Since 0.55.0* Meson won't search $PATH any more for a config tool binary when +cross compiling if the config tool did not have an entry in the cross file. + +## AppleFrameworks + +Use the `modules` keyword to list frameworks required, e.g. + +```meson +dep = dependency('appleframeworks', modules : 'foundation') +``` + +These dependencies can never be found for non-OSX hosts. + +## Blocks + +Enable support for Clang's blocks extension. + +```meson +dep = dependency('blocks') +``` + +*(added 0.52.0)* + +## Boost + +Boost is not a single dependency but rather a group of different +libraries. To use Boost headers-only libraries, simply add Boost as a +dependency. + +```meson +boost_dep = dependency('boost') +exe = executable('myprog', 'file.cc', dependencies : boost_dep) +``` + +To link against boost with Meson, simply list which libraries you +would like to use. + +```meson +boost_dep = dependency('boost', modules : ['thread', 'utility']) +exe = executable('myprog', 'file.cc', dependencies : boost_dep) +``` + +You can call `dependency` multiple times with different modules and +use those to link against your targets. + +If your boost headers or libraries are in non-standard locations you +can set the BOOST_ROOT, BOOST_INCLUDEDIR, and/or BOOST_LIBRARYDIR +environment variables. + +You can set the argument `threading` to `single` to use boost +libraries that have been compiled for single-threaded use instead. + +## CUDA + +*(added 0.53.0)* + +Enables compiling and linking against the CUDA Toolkit. The `version` +and `modules` keywords may be passed to request the use of a specific +CUDA Toolkit version and/or additional CUDA libraries, correspondingly: + +```meson +dep = dependency('cuda', version : '>=10', modules : ['cublas']) +``` + +Note that explicitly adding this dependency is only necessary if you are +using CUDA Toolkit from a C/C++ file or project, or if you are utilizing +additional toolkit libraries that need to be explicitly linked to. + +## CUPS + +`method` may be `auto`, `config-tool`, `pkg-config`, `cmake` or `extraframework`. + +## Fortran Coarrays + +*(added 0.50.0)* + + Coarrays are a Fortran language intrinsic feature, enabled by +`dependency('coarray')`. + +GCC will use OpenCoarrays if present to implement coarrays, while Intel and NAG +use internal coarray support. + +## GL + +This finds the OpenGL library in a way appropriate to the platform. + +`method` may be `auto`, `pkg-config` or `system`. + +## GTest and GMock + +GTest and GMock come as sources that must be compiled as part of your +project. With Meson you don't have to care about the details, just +pass `gtest` or `gmock` to `dependency` and it will do everything for +you. If you want to use GMock, it is recommended to use GTest as well, +as getting it to work standalone is tricky. + +You can set the `main` keyword argument to `true` to use the `main()` +function provided by GTest: + +```meson +gtest_dep = dependency('gtest', main : true, required : false) +e = executable('testprog', 'test.cc', dependencies : gtest_dep) +test('gtest test', e) +``` + +## HDF5 + +*(added 0.50.0)* + +HDF5 is supported for C, C++ and Fortran. Because dependencies are +language-specific, you must specify the requested language using the +`language` keyword argument, i.e., + * `dependency('hdf5', language: 'c')` for the C HDF5 headers and libraries + * `dependency('hdf5', language: 'cpp')` for the C++ HDF5 headers and libraries + * `dependency('hdf5', language: 'fortran')` for the Fortran HDF5 headers and libraries + +Meson uses pkg-config to find HDF5. The standard low-level HDF5 function and the `HL` high-level HDF5 functions are linked for each language. + + +## libwmf + +*(added 0.44.0)* + +`method` may be `auto`, `config-tool` or `pkg-config`. + +## LLVM + +Meson has native support for LLVM going back to version LLVM version 3.5. +It supports a few additional features compared to other config-tool based +dependencies. + +As of 0.44.0 Meson supports the `static` keyword argument for +LLVM. Before this LLVM >= 3.9 would always dynamically link, while +older versions would statically link, due to a quirk in `llvm-config`. + +`method` may be `auto`, `config-tool`, or `cmake`. + +### Modules, a.k.a. Components + +Meson wraps LLVM's concept of components in it's own modules concept. +When you need specific components you add them as modules as meson +will do the right thing: + +```meson +llvm_dep = dependency('llvm', version : '>= 4.0', modules : ['amdgpu']) +``` + +As of 0.44.0 it can also take optional modules (these will affect the arguments +generated for a static link): + +```meson +llvm_dep = dependency( + 'llvm', version : '>= 4.0', modules : ['amdgpu'], optional_modules : ['inteljitevents'], +) +``` + +### Using LLVM tools +When using LLVM as library but also needing its tools, it is often beneficial to use the same version. +This can partially be achieved with the `version` argument of `find_program()`. +However, distributions tend to package different LLVM versions in rather different ways. +Therefore, it is often better to use the llvm dependency directly to retrieve the tools: + +```meson +llvm_dep = dependency('llvm', version : ['>= 8', '< 9']) +llvm_link = find_program(llvm_dep.get_variable(configtool: 'bindir') / 'llvm-link') +``` + +## MPI + +*(added 0.42.0)* + +MPI is supported for C, C++ and Fortran. Because dependencies are +language-specific, you must specify the requested language using the +`language` keyword argument, i.e., + * `dependency('mpi', language: 'c')` for the C MPI headers and libraries + * `dependency('mpi', language: 'cpp')` for the C++ MPI headers and libraries + * `dependency('mpi', language: 'fortran')` for the Fortran MPI headers and libraries + +Meson prefers pkg-config for MPI, but if your MPI implementation does +not provide them, it will search for the standard wrapper executables, +`mpic`, `mpicxx`, `mpic++`, `mpifort`, `mpif90`, `mpif77`. If these +are not in your path, they can be specified by setting the standard +environment variables `MPICC`, `MPICXX`, `MPIFC`, `MPIF90`, or +`MPIF77`, during configuration. It will also try to use the Microsoft +implementation on windows via the `system` method. + +`method` may be `auto`, `config-tool`, `pkg-config` or `system`. + +*New in 0.54.0* The `config-tool` and `system` method values. Previous +versions would always try `pkg-config`, then `config-tool`, then `system`. + +## NetCDF + +*(added 0.50.0)* + +NetCDF is supported for C, C++ and Fortran. Because NetCDF dependencies are +language-specific, you must specify the requested language using the +`language` keyword argument, i.e., + * `dependency('netcdf', language: 'c')` for the C NetCDF headers and libraries + * `dependency('netcdf', language: 'cpp')` for the C++ NetCDF headers and libraries + * `dependency('netcdf', language: 'fortran')` for the Fortran NetCDF headers and libraries + +Meson uses pkg-config to find NetCDF. + + +## OpenMP + +*(added 0.46.0)* + +This dependency selects the appropriate compiler flags and/or libraries to use +for OpenMP support. + +The `language` keyword may used. + +## pcap + +*(added 0.42.0)* + +`method` may be `auto`, `config-tool` or `pkg-config`. + +## libgcrypt + +*(added 0.49.0)* + +`method` may be `auto`, `config-tool` or `pkg-config`. + +## GPGME + +*(added 0.51.0)* + +`method` may be `auto`, `config-tool` or `pkg-config`. + +## Python3 + +Python3 is handled specially by meson: +1. Meson tries to use `pkg-config`. +2. If `pkg-config` fails meson uses a fallback: + - On Windows the fallback is the current `python3` interpreter. + - On OSX the fallback is a framework dependency from `/Library/Frameworks`. + +Note that `python3` found by this dependency might differ from the one used in +`python3` module because modules uses the current interpreter, but dependency tries +`pkg-config` first. + +`method` may be `auto`, `extraframework`, `pkg-config` or `sysconfig` + +## Qt4 & Qt5 + +Meson has native Qt support. Its usage is best demonstrated with an +example. + +```meson +qt5_mod = import('qt5') +qt5widgets = dependency('qt5', modules : 'Widgets') + +processed = qt5_mod.preprocess( + moc_headers : 'mainWindow.h', # Only headers that need moc should be put here + moc_sources : 'helperFile.cpp', # must have #include"moc_helperFile.cpp" + ui_files : 'mainWindow.ui', + qresources : 'resources.qrc', +) + +q5exe = executable('qt5test', + sources : ['main.cpp', + 'mainWindow.cpp', + processed], + dependencies: qt5widgets) +``` + +Here we have an UI file created with Qt Designer and one source and +header file each that require preprocessing with the `moc` tool. We +also define a resource file to be compiled with `rcc`. We just have to +tell Meson which files are which and it will take care of invoking all +the necessary tools in the correct order, which is done with the +`preprocess` method of the `qt5` module. Its output is simply put in +the list of sources for the target. The `modules` keyword of +`dependency` works just like it does with Boost. It tells which +subparts of Qt the program uses. + +You can set the `main` keyword argument to `true` to use the `WinMain()` +function provided by qtmain static library (this argument does nothing on platforms +other than Windows). + +Setting the optional `private_headers` keyword to true adds the private header +include path of the given module(s) to the compiler flags. (since v0.47.0) + +**Note** using private headers in your project is a bad idea, do so at your own +risk. + +`method` may be `auto`, `pkg-config` or `qmake`. + +## SDL2 + +SDL2 can be located using `pkg-confg`, the `sdl2-config` config tool, or as an +OSX framework. + +`method` may be `auto`, `config-tool`, `extraframework` or `pkg-config`. + +## Threads + +This dependency selects the appropriate compiler flags and/or libraries to use +for thread support. + +See [threads](Threads.md). + +## Valgrind + +Meson will find valgrind using `pkg-config`, but only uses the compilation flags +and avoids trying to link with it's non-PIC static libs. + +## Vulkan + +*(added 0.42.0)* + +Vulkan can be located using `pkg-config`, or the `VULKAN_SDK` environment variable. + +`method` may be `auto`, `pkg-config` or `system`. + +## WxWidgets + +Similar to [Boost](#boost), WxWidgets is not a single library but rather +a collection of modules. WxWidgets is supported via `wx-config`. +Meson substitutes `modules` to `wx-config` invocation, it generates +- `compile_args` using `wx-config --cxxflags $modules...` +- `link_args` using `wx-config --libs $modules...` + +### Example + +```meson +wx_dep = dependency( + 'wxwidgets', version : '>=3.0.0', modules : ['std', 'stc'], +) +``` + +```shell +# compile_args: +$ wx-config --cxxflags std stc + +# link_args: +$ wx-config --libs std stc +``` + +## Shaderc + +*(added 0.51.0)* + +Shaderc currently does not ship with any means of detection. Nevertheless, Meson +can try to detect it using `pkg-config`, but will default to looking for the +appropriate library manually. If the `static` keyword argument is `true`, +`shaderc_combined` is preferred. Otherwise, `shaderc_shared` is preferred. Note +that it is not possible to obtain the shaderc version using this method. + +`method` may be `auto`, `pkg-config` or `system`. + +## Zlib + +Zlib ships with pkg-config and cmake support, but on some operating systems +(windows, macOs, FreeBSD, dragonflybsd), it is provided as part of the base +operating system without pkg-config support. The new System finder can be used +on these OSes to link with the bundled version. + +`method` may be `auto`, `pkg-config`, `cmake`, or `system`. + +*New in 0.54.0* the `system` method. + +
+1: They may appear to be case-insensitive, if the + underlying file system happens to be case-insensitive. diff --git a/meson/docs/markdown/Design-rationale.md b/meson/docs/markdown/Design-rationale.md new file mode 100644 index 000000000..71211923d --- /dev/null +++ b/meson/docs/markdown/Design-rationale.md @@ -0,0 +1,258 @@ +--- +title: Design rationale +... + +This is the original design rationale for Meson. The syntax it describes does not match the released version +== + +A software developer's most important tool is the editor. If you talk +to coders about the editors they use, you are usually met with massive +enthusiasm and praise. You will hear how Emacs is the greatest thing +ever or how vi is so elegant or how Eclipse's integration features +make you so much more productive. You can sense the enthusiasm and +affection that the people feel towards these programs. + +The second most important tool, even more important than the compiler, +is the build system. + +Those are pretty much universally despised. + +The most positive statement on build systems you can usually get (and +it might require some coaxing) is something along the lines of *well, +it's a terrible system, but all other options are even worse*. It is +easy to see why this is the case. For starters, commonly used free +build systems have obtuse syntaxes. They use for the most part global +variables that are set in random locations so you can never really be +sure what a given line of code does. They do strange and unpredictable +things at every turn. + +Let's illustrate this with a simple example. Suppose we want to run a +program built with GNU Autotools under GDB. The instinctive thing to +do is to just run `gdb programname`. The problem is that this may or +may not work. In some cases the executable file is a binary whereas at +other times it is a wrapper shell script that invokes the real binary +which resides in a hidden subdirectory. GDB invocation fails if the +binary is a script but succeeds if it is not. The user has to remember +the type of each one of his executables (which is an implementation +detail of the build system) just to be able to debug them. Several +other such pain points can be found in [this blog +post](http://voices.canonical.com/jussi.pakkanen/2011/09/13/autotools/). + +Given these idiosyncrasies it is no wonder that most people don't want +to have anything to do with build systems. They'll just copy-paste +code that works (somewhat) in one place to another and hope for the +best. They actively go out of their way not to understand the system +because the mere thought of it is repulsive. Doing this also provides +a kind of inverse job security. If you don't know tool X, there's less +chance of finding yourself responsible for its use in your +organisation. Instead you get to work on more enjoyable things. + +This leads to a vicious circle. Since people avoid the tools and don't +want to deal with them, very few work on improving them. The result is +apathy and stagnation. + +Can we do better? +-- + +At its core, building C and C++ code is not a terribly difficult +task. In fact, writing a text editor is a lot more complicated and +takes more effort. Yet we have lots of very high quality editors but +only few build systems with questionable quality and usability. + +So, in the grand tradition of own-itch-scratching, I decided to run a +scientific experiment. The purpose of this experiment was to explore +what would it take to build a "good" build system. What kind of syntax +would suit this problem? What sort of problems would this application +need to solve? What sort of solutions would be the most appropriate? + +To get things started, here is a list of requirements any modern cross-platform build system needs to provide. + +### 1. Must be simple to use + +One of the great virtues of Python is the fact that it is very +readable. It is easy to see what a given block of code does. It is +concise, clear and easy to understand. The proposed build system must +be syntactically and semantically clean. Side effects, global state +and interrelations must be kept at a minimum or, if possible, +eliminated entirely. + +### 2. Must do the right thing by default + +Most builds are done by developers working on the code. Therefore the +defaults must be tailored towards that use case. As an example the +system shall build objects without optimization and with debug +information. It shall make binaries that can be run directly from the +build directory without linker tricks, shell scripts or magic +environment variables. + +### 3. Must enforce established best practices + +There really is no reason to compile source code without the +equivalent of `-Wall`. So enable it by default. A different kind of +best practice is the total separation of source and build +directories. All build artifacts must be stored in the build +directory. Writing stray files in the source directory is not +permitted under any circumstances. + +### 4. Must have native support for platforms that are in common use + +A lot of free software projects can be used on non-free platforms such +as Windows or OSX. The system must provide native support for the +tools of choice on those platforms. In practice this means native +support for Visual Studio and XCode. Having said IDEs invoke external +builder binaries does not count as native support. + +### 5. Must not add complexity due to obsolete platforms + +Work on this build system started during the Christmas holidays of 2012. +This provides a natural hard cutoff line of 2012/12/24. Any +platform, tool or library that was not in active use at that time is +explicitly not supported. These include Unixes such as IRIX, SunOS, +OSF-1, Ubuntu versions older than 12/10, GCC versions older than 4.7 +and so on. If these old versions happen to work, great. If they don't, +not a single line of code will be added to the system to work around +their bugs. + +### 6. Must be fast + +Running the configuration step on a moderate sized project must not +take more than five seconds. Running the compile command on a fully up +to date tree of 1000 source files must not take more than 0.1 seconds. + +### 7. Must provide easy to use support for modern sw development features + +An example is precompiled headers. Currently no free software build +system provides native support for them. Other examples could include +easy integration of Valgrind and unit tests, test coverage reporting +and so on. + +### 8. Must allow override of default values + +Sometimes you just have to compile files with only given compiler +flags and no others, or install files in weird places. The system must +allow the user to do this if he really wants to. + +Overview of the solution +-- + +Going over these requirements it becomes quite apparent that the only +viable approach is roughly the same as taken by CMake: having a domain +specific language to declare the build system. Out of this declaration +a configuration is generated for the backend build system. This can be +a Makefile, Visual Studio or XCode project or anything else. + +The difference between the proposed DSL and existing ones is that the +new one is declarative. It also tries to work on a higher level of +abstraction than existing systems. As an example, using external +libraries in current build systems means manually extracting and +passing around compiler flags and linker flags. In the proposed system +the user just declares that a given build target uses a given external +dependency. The build system then takes care of passing all flags and +settings to their proper locations. This means that the user can focus +on his own code rather than marshalling command line arguments from +one place to another. + +A DSL is more work than the approach taken by SCons, which is to +provide the system as a Python library. However it allows us to make +the syntax more expressive and prevent certain types of bugs by +e.g. making certain objects truly immutable. The end result is again +the same: less work for the user. + +The backend for Unix requires a bit more thought. The default choice +would be Make. However it is extremely slow. It is not uncommon on +large code bases for Make to take several minutes just to determine +that nothing needs to be done. Instead of Make we use +[Ninja](https://ninja-build.org/), which is extremely fast. The +backend code is abstracted away from the core, so other backends can +be added with relatively little effort. + +Sample code +-- + +Enough design talk, let's get to the code. Before looking at the +examples we would like to emphasize that this is not in any way the +final code. It is proof of concept code that works in the system as it +currently exists (February 2013), but may change at any time. + +Let's start simple. Here is the code to compile a single executable binary. + +```meson +project('compile one', 'c') +executable('program', 'prog.c') +``` + +This is about as simple as one can get. First you declare the project +name and the languages it uses. Then you specify the binary to build +and its sources. The build system will do all the rest. It will add +proper suffixes (e.g. '.exe' on Windows), set the default compiler +flags and so on. + +Usually programs have more than one source file. Listing them all in +the function call can become unwieldy. That is why the system supports +keyword arguments. They look like this. + +```meson +project('compile several', 'c') +sourcelist = ['main.c', 'file1.c', 'file2.c', 'file3.c'] +executable('program', sources : sourcelist) +``` + +External dependencies are simple to use. + +```meson +project('external lib', 'c') +libdep = find_dep('extlibrary', required : true) +sourcelist = ['main.c', 'file1.c', 'file2.c', 'file3.c'] +executable('program', sources : sourcelist, dep : libdep) +``` + +In other build systems you have to manually add the compile and link +flags from external dependencies to targets. In this system you just +declare that extlibrary is mandatory and that the generated program +uses that. The build system does all the plumbing for you. + +Here's a slightly more complicated definition. It should still be +understandable. + +```meson +project('build library', 'c') +foolib = shared_library('foobar', sources : 'foobar.c',\ +install : true) +exe = executable('testfoobar', 'tester.c', link : foolib) +add_test('test library', exe) +``` + +First we build a shared library named foobar. It is marked +installable, so running `meson install` installs it to the library +directory (the system knows which one so the user does not have to +care). Then we build a test executable which is linked against the +library. It will not be installed, but instead it is added to the list +of unit tests, which can be run with the command `meson test`. + +Above we mentioned precompiled headers as a feature not supported by +other build systems. Here's how you would use them. + +```meson +project('pch demo', 'cxx') +executable('myapp', 'myapp.cpp', pch : 'pch/myapp.hh') +``` + +The main reason other build systems can not provide pch support this +easily is because they don't enforce certain best practices. Due to +the way include paths work, it is impossible to provide pch support +that always works with both in-source and out-of-source +builds. Mandating separate build and source directories makes this and +many other problems a lot easier. + +Get the code +-- + +The code for this experiment can be found at [the Meson +repository](https://sourceforge.net/p/meson/code/). It should be noted +that it is not a build system. It is only a proposal for one. It does +not work reliably yet. You probably should not use it as the build +system of your project. + +All that said I hope that this experiment will eventually turn into a +full blown build system. For that I need your help. Comments and +especially patches are more than welcome. diff --git a/meson/docs/markdown/Disabler.md b/meson/docs/markdown/Disabler.md new file mode 100644 index 000000000..4aed7ad65 --- /dev/null +++ b/meson/docs/markdown/Disabler.md @@ -0,0 +1,68 @@ +--- +short-description: Disabling options +... + +# Disabling parts of the build + +*This feature is available since version 0.44.0.* + +The following is a common fragment found in many projects: + +```meson +dep = dependency('foo') + +# In some different directory + +lib = shared_library('mylib', 'mylib.c', + dependencies : dep) + +# And ín a third directory + +exe = executable('mytest', 'mytest.c', + link_with : lib) +test('mytest', exe) +``` + +This works fine but gets a bit inflexible when you want to make this +part of the build optional. Basically it reduces to adding `if/else` +statements around all target invocations. Meson provides a simpler way +of achieving the same with a disabler object. + +A disabler object is created with the `disabler` function: + +```meson +d = disabler() +``` + +The only thing you can do to a disabler object is to ask if it has +been found: + +```meson +f = d.found() # returns false +``` + +Any other statement that uses a disabler object will immediately +return a disabler. For example assuming that `d` contains a disabler +object then + +```meson +d2 = some_func(d) # value of d2 will be disabler +d3 = true or d2 # value of d3 will be true because of short-circuiting +d4 = false or d2 # value of d4 will be disabler +if d # neither branch is evaluated +``` + +Thus to disable every target that depends on the dependency given +above, you can do something like this: + +```meson +if use_foo_feature + d = dependency('foo') +else + d = disabler() +endif +``` + +This concentrates the handling of this option in one place and other +build definition files do not need to be sprinkled with `if` +statements. diff --git a/meson/docs/markdown/Dlang-module.md b/meson/docs/markdown/Dlang-module.md new file mode 100644 index 000000000..677359de6 --- /dev/null +++ b/meson/docs/markdown/Dlang-module.md @@ -0,0 +1,43 @@ +# Dlang module + +This module provides tools related to the D programming language. + +## Usage + +To use this module, just do: **`dlang = import('dlang')`**. +You can, of course, replace the name `dlang` with anything else. + +The module only exposes one function, `generate_dub_file`, used to +automatically generate Dub configuration files. + +### generate_dub_file() +This method only has two required arguments, the project name and the +source folder. You can pass other arguments with additional keywords, +they will be automatically translated to json and added to the +`dub.json` file. + +**Structure** +```meson +generate_dub_file("project name", "source/folder", key: "value" ...) +``` + +**Example** +```meson +dlang = import('dlang') +dlang.generate_dub_file(meson.project_name().to_lower(), meson.source_root(), + authors: 'Meson Team', + description: 'Test executable', + copyright: 'Copyright © 2018, Meson Team', + license: 'MIT', + sourceFiles: 'test.d', + targetType: 'executable', + dependencies: my_dep +) +``` + +You can manually edit a meson generated `dub.json` file or provide a +initial one. The module will only update the values specified in +`generate_dub_file()`. + +Although not required, you will need to have a `description` and +`license` if you want to publish the package in the [D package registry](https://code.dlang.org/). diff --git a/meson/docs/markdown/External-commands.md b/meson/docs/markdown/External-commands.md new file mode 100644 index 000000000..272182c23 --- /dev/null +++ b/meson/docs/markdown/External-commands.md @@ -0,0 +1,52 @@ +--- +short-description: Running external commands +... + +# External commands + +As a part of the software configuration, you may want to get extra +data by running external commands. The basic syntax is the following. + +```meson +r = run_command('command', 'arg1', 'arg2', 'arg3') +if r.returncode() != 0 + # it failed +endif +output = r.stdout().strip() +errortxt = r.stderr().strip() +``` + +Since 0.52.0, you can pass the command environment as a dictionary: + +```meson +run_command('command', 'arg1', 'arg2', env: {'FOO': 'bar'}) +``` + +Since 0.50.0, you can also pass the command +[`environment`](Reference-manual.md#environment-object) object: + +```meson +env = environment() +env.set('FOO', 'bar') +run_command('command', 'arg1', 'arg2', env: env) +``` + +The `run_command` function returns an object that can be queried for +return value and text written to stdout and stderr. The `strip` method +call is used to strip trailing and leading whitespace from +strings. Usually output from command line programs ends in a newline, +which is unwanted in string variables. The first argument can be +either a string or an executable you have detected earlier with +`find_program`. + +Meson will autodetect scripts with a shebang line and run them with +the executable/interpreter specified in it both on Windows and on Unixes. + +Note that you can not pass your command line as a single string. That +is, calling `run_command('do_something foo bar')` will not work. You +must either split up the string into separate arguments or pass the +split command as an array. It should also be noted that Meson will not +pass the command to the shell, so any command lines that try to use +things such as environment variables, backticks or pipelines will not +work. If you require shell semantics, write your command into a script +file and call that with `run_command`. diff --git a/meson/docs/markdown/FAQ.md b/meson/docs/markdown/FAQ.md new file mode 100644 index 000000000..7a414431d --- /dev/null +++ b/meson/docs/markdown/FAQ.md @@ -0,0 +1,502 @@ +--- +title: FAQ +... +# Meson Frequently Asked Questions + +See also [How do I do X in Meson](howtox.md). + +## Why is it called Meson? + +When the name was originally chosen, there were two main limitations: +there must not exist either a Debian package or a Sourceforge project +of the given name. This ruled out tens of potential project names. At +some point the name Gluon was considered. Gluons are elementary +particles that hold protons and neutrons together, much like a build +system's job is to take pieces of source code and a compiler and bind +them to a complete whole. + +Unfortunately this name was taken, too. Then the rest of subatomic +particles were examined and Meson was found to be available. + +## What is the correct way to use threads (such as pthreads)? + +```meson +thread_dep = dependency('threads') +``` + +This will set up everything on your behalf. People coming from +Autotools or CMake want to do this by looking for `libpthread.so` +manually. Don't do that, it has tricky corner cases especially when +cross compiling. + +## How to use Meson on a host where it is not available in system packages? + +Starting from version 0.29.0, Meson is available from the [Python +Package Index](https://pypi.python.org/pypi/meson/), so installing it +simply a matter of running this command: + +```console +$ pip3 install meson +``` + +If you don't have access to PyPI, that is not a problem either. Meson +has been designed to be easily runnable from an extracted source +tarball or even a git checkout. First you need to download Meson. Then +use this command to set up you build instead of plain `meson`. + +```console +$ /path/to/meson.py +``` + +After this you don't have to care about invoking Meson any more. It +remembers where it was originally invoked from and calls itself +appropriately. As a user the only thing you need to do is to `cd` into +your build directory and invoke `meson compile`. + +## Why can't I specify target files with a wildcard? + +Instead of specifying files explicitly, people seem to want to do this: + +```meson +executable('myprog', sources : '*.cpp') # This does NOT work! +``` + +Meson does not support this syntax and the reason for this is +simple. This can not be made both reliable and fast. By reliable we +mean that if the user adds a new source file to the subdirectory, +Meson should detect that and make it part of the build automatically. + +One of the main requirements of Meson is that it must be fast. This +means that a no-op build in a tree of 10 000 source files must take no +more than a fraction of a second. This is only possible because Meson +knows the exact list of files to check. If any target is specified as +a wildcard glob, this is no longer possible. Meson would need to +re-evaluate the glob every time and compare the list of files produced +against the previous list. This means inspecting the entire source +tree (because the glob pattern could be `src/\*/\*/\*/\*.cpp` or +something like that). This is impossible to do efficiently. + +The main backend of Meson is Ninja, which does not support wildcard +matches either, and for the same reasons. + +Because of this, all source files must be specified explicitly. + +## But I really want to use wildcards! + +If the tradeoff between reliability and convenience is acceptable to +you, then Meson gives you all the tools necessary to do wildcard +globbing. You are allowed to run arbitrary commands during +configuration. First you need to write a script that locates the files +to compile. Here's a simple shell script that writes all `.c` files in +the current directory, one per line. + + +```bash +#!/bin/sh + +for i in *.c; do + echo $i +done +``` + +Then you need to run this script in your Meson file, convert the +output into a string array and use the result in a target. + +```meson +c = run_command('grabber.sh') +sources = c.stdout().strip().split('\n') +e = executable('prog', sources) +``` + +The script can be any executable, so it can be written in shell, +Python, Lua, Perl or whatever you wish. + +As mentioned above, the tradeoff is that just adding new files to the +source directory does *not* add them to the build automatically. To +add them you need to tell Meson to reinitialize itself. The simplest +way is to touch the `meson.build` file in your source root. Then Meson +will reconfigure itself next time the build command is run. Advanced +users can even write a small background script that utilizes a +filesystem event queue, such as +[inotify](https://en.wikipedia.org/wiki/Inotify), to do this +automatically. + +## Should I use `subdir` or `subproject`? + +The answer is almost always `subdir`. Subproject exists for a very +specific use case: embedding external dependencies into your build +process. As an example, suppose we are writing a game and wish to use +SDL. Let us further suppose that SDL comes with a Meson build +definition. Let us suppose even further that we don't want to use +prebuilt binaries but want to compile SDL for ourselves. + +In this case you would use `subproject`. The way to do it would be to +grab the source code of SDL and put it inside your own source +tree. Then you would do `sdl = subproject('sdl')`, which would cause +Meson to build SDL as part of your build and would then allow you to +link against it or do whatever else you may prefer. + +For every other use you would use `subdir`. As an example, if you +wanted to build a shared library in one dir and link tests against it +in another dir, you would do something like this: + +```meson +project('simple', 'c') +subdir('src') # library is built here +subdir('tests') # test binaries would link against the library here +``` + +## Why is there not a Make backend? + +Because Make is slow. This is not an implementation issue, Make simply +can not be made fast. For further info we recommend you read [this +post](http://neugierig.org/software/chromium/notes/2011/02/ninja.html) +by Evan Martin, the author of Ninja. Makefiles also have a syntax that +is very unpleasant to write which makes them a big maintenance burden. + +The only reason why one would use Make instead of Ninja is working on +a platform that does not have a Ninja port. Even in this case it is an +order of magnitude less work to port Ninja than it is to write a Make +backend for Meson. + +Just use Ninja, you'll be happier that way. I guarantee it. + +## Why is Meson not just a Python module so I could code my build setup in Python? + +A related question to this is *Why is Meson's configuration language +not Turing-complete?* + +There are many good reasons for this, most of which are summarized on +this web page: [Against The Use Of Programming Languages in +Configuration Files](https://taint.org/2011/02/18/001527a.html). + +In addition to those reasons, not exposing Python or any other "real" +programming language makes it possible to port Meson's implementation +to a different language. This might become necessary if, for example, +Python turns out to be a performance bottleneck. This is an actual +problem that has caused complications for GNU Autotools and SCons. + +## How do I do the equivalent of Libtools export-symbol and export-regex? + +Either by using [GCC symbol +visibility](https://gcc.gnu.org/wiki/Visibility) or by writing a +[linker +script](https://ftp.gnu.org/old-gnu/Manuals/ld-2.9.1/html_mono/ld.html). This +has the added benefit that your symbol definitions are in a standalone +file instead of being buried inside your build definitions. An example +can be found +[here](https://github.com/jpakkane/meson/tree/master/test%20cases/linuxlike/3%20linker%20script). + +## My project works fine on Linux and MinGW but fails to link with MSVC due to a missing .lib file (fatal error LNK1181). Why? + +With GCC, all symbols on shared libraries are exported automatically +unless you specify otherwise. With MSVC no symbols are exported by +default. If your shared library exports no symbols, MSVC will silently +not produce an import library file leading to failures. The solution +is to add symbol visibility definitions [as specified in GCC +wiki](https://gcc.gnu.org/wiki/Visibility). + +## I added some compiler flags and now the build fails with weird errors. What is happening? + +You probably did the equivalent to this: + +```meson +executable('foobar', ... + c_args : '-some_arg -other_arg') +``` + +Meson is *explicit*. In this particular case it will **not** +automatically split your strings at whitespaces, instead it will take +it as is and work extra hard to pass it to the compiler unchanged, +including quoting it properly over shell invocations. This is +mandatory to make e.g. files with spaces in them work flawlessly. To +pass multiple command line arguments, you need to explicitly put them +in an array like this: + +```meson +executable('foobar', ... + c_args : ['-some_arg', '-other_arg']) +``` + +## Why are changes to default project options ignored? + +You probably had a project that looked something like this: + +```meson +project('foobar', 'cpp') +``` + +This defaults to `c++11` on GCC compilers. Suppose you want to use +`c++14` instead, so you change the definition to this: + +```meson +project('foobar', 'cpp', default_options : ['cpp_std=c++14']) +``` + +But when you recompile, it still uses `c++11`. The reason for this is +that default options are only looked at when you are setting up a +build directory for the very first time. After that the setting is +considered to have a value and thus the default value is ignored. To +change an existing build dir to `c++14`, either reconfigure your build +dir with `meson configure` or delete the build dir and recreate it +from scratch. + +The reason we don't automatically change the option value when the +default is changed is that it is impossible to know to do that +reliably. The actual question that we need to solve is "if the +option's value is foo and the default value is bar, should we change +the option value to bar also". There are many choices: + + - if the user has changed the value themselves from the default, then + we must not change it back + + - if the user has not changed the value, but changes the default + value, then this section's premise would seem to indicate that the + value should be changed + + - suppose the user changes the value from the default to foo, then + back to bar and then changes the default value to bar, the correct + step to take is ambiguous by itself + +In order to solve the latter question we would need to remember not +only the current and old value, but also all the times the user has +changed the value and from which value to which other value. Since +people don't remember their own actions that far back, toggling +between states based on long history would be confusing. + +Because of this we do the simple and understandable thing: default +values are only defaults and will never affect the value of an option +once set. + +## Does wrap download sources behind my back? + +It does not. In order for Meson to download anything from the net +while building, two conditions must be met. + +First of all there needs to be a `.wrap` file with a download URL in +the `subprojects` directory. If one does not exist, Meson will not +download anything. + +The second requirement is that there needs to be an explicit +subproject invocation in your `meson.build` files. Either +`subproject('foobar')` or `dependency('foobar', fallback : ['foobar', +'foo_dep'])`. If these declarations either are not in any build file +or they are not called (due to e.g. `if/else`) then nothing is +downloaded. + +If this is not sufficient for you, starting from release 0.40.0 Meson +has a option called `wrap-mode` which can be used to disable wrap +downloads altogether with `--wrap-mode=nodownload`. You can also +disable dependency fallbacks altogether with `--wrap-mode=nofallback`, +which also implies the `nodownload` option. + +If on the other hand, you want meson to always use the fallback +for dependencies, even when an external dependency exists and could +satisfy the version requirements, for example in order to make +sure your project builds when fallbacks are used, you can use +`--wrap-mode=forcefallback` since 0.46.0. + +## Why is Meson implemented in Python rather than [programming language X]? + +Because build systems are special in ways normal applications aren't. + +Perhaps the biggest limitation is that because Meson is used to build +software at the very lowest levels of the OS, it is part of the core +bootstrap for new systems. Whenever support for a new CPU architecture +is added, Meson must run on the system before software using it can be +compiled natively. This requirement adds two hard limitations. + +The first one is that Meson must have the minimal amount of +dependencies, because they must all be built during the bootstrap to +get Meson to work. + +The second is that Meson must support all CPU architectures, both +existing and future ones. As an example many new programming languages +have only an LLVM based compiler available. LLVM has limited CPU +support compared to, say, GCC, and thus bootstrapping Meson on such +platforms would first require adding new processor support to +LLVM. This is in most cases unfeasible. + +A further limitation is that we want developers on as many platforms +as possible to submit to Meson development using the default tools +provided by their operating system. In practice what this means is +that Windows developers should be able to contribute using nothing but +Visual Studio. + +At the time of writing (April 2018) there are only three languages +that could fulfill these requirements: + + - C + - C++ + - Python + +Out of these we have chosen Python because it is the best fit for our +needs. + +## I have proprietary compiler toolchain X that does not work with Meson, how can I make it work? + +Meson needs to know several details about each compiler in order to +compile code with it. These include things such as which compiler +flags to use for each option and how to detect the compiler from its +output. This information can not be input via a configuration file, +instead it requires changes to Meson's source code that need to be +submitted to Meson master repository. In theory you can run your own +forked version with custom patches, but that's not good use of your +time. Please submit the code upstream so everyone can use the +toolchain. + +The steps for adding a new compiler for an existing language are +roughly the following. For simplicity we're going to assume a C +compiler. + +- Create a new class with a proper name in + `mesonbuild/compilers/c.py`. Look at the methods that other + compilers for the same language have and duplicate what they do. + +- If the compiler can only be used for cross compilation, make sure to + flag it as such (see existing compiler classes for examples). + +- Add detection logic to `mesonbuild/environment.py`, look for a + method called `detect_c_compiler`. + +- Run the test suite and fix issues until the tests pass. + +- Submit a pull request, add the result of the test suite to your MR + (linking an existing page is fine). + +- If the compiler is freely available, consider adding it to the CI + system. + +## Why does building my project with MSVC output static libraries called `libfoo.a`? + +The naming convention for static libraries on Windows is usually +`foo.lib`. Unfortunately, import libraries are also called `foo.lib`. + +This causes filename collisions with the default library type where we +build both shared and static libraries, and also causes collisions +during installation since all libraries are installed to the same +directory by default. + +To resolve this, we decided to default to creating static libraries of +the form `libfoo.a` when building with MSVC. This has the following +advantages: + +1. Filename collisions are completely avoided. +1. The format for MSVC static libraries is `ar`, which is the same as the GNU + static library format, so using this extension is semantically correct. +1. The static library filename format is now the same on all platforms and with + all toolchains. +1. Both Clang and GNU compilers can search for `libfoo.a` when specifying + a library as `-lfoo`. This does not work for alternative naming schemes for + static libraries such as `libfoo.lib`. +1. Since `-lfoo` works out of the box, pkgconfig files will work correctly for + projects built with both MSVC, GCC, and Clang on Windows. +1. MSVC does not have arguments to search for library filenames, and [it does + not care what the extension is](https://docs.microsoft.com/en-us/cpp/build/reference/link-input-files?view=vs-2019), + so specifying `libfoo.a` instead of `foo.lib` does not change the workflow, + and is an improvement since it's less ambiguous. + +If, for some reason, you really need your project to output static libraries of +the form `foo.lib` when building with MSVC, you can set the +[`name_prefix:`](https://mesonbuild.com/Reference-manual.html#library) +kwarg to `''` and the [`name_suffix:`](https://mesonbuild.com/Reference-manual.html#library) +kwarg to `'lib'`. To get the default behaviour for each, you can either not +specify the kwarg, or pass `[]` (an empty array) to it. + +## Do I need to add my headers to the sources list like in Autotools? + +Autotools requires you to add private and public headers to the sources list so +that it knows what files to include in the tarball generated by `make dist`. +Meson's `dist` command simply gathers everything committed to your git/hg +repository and adds it to the tarball, so adding headers to the sources list is +pointless. + +Meson uses Ninja which uses compiler dependency information to automatically +figure out dependencies between C sources and headers, so it will rebuild +things correctly when a header changes. + +The only exception to this are generated headers, for which you must [declare +dependencies correctly](#how-do-i-tell-meson-that-my-sources-use-generated-headers). + +If, for whatever reason, you do add non-generated headers to the sources list +of a target, Meson will simply ignore them. + +## How do I tell Meson that my sources use generated headers? + +Let's say you use a [`custom_target()`](https://mesonbuild.com/Reference-manual.html#custom_target) +to generate the headers, and then `#include` them in your C code. Here's how +you ensure that Meson generates the headers before trying to compile any +sources in the build target: + +```meson +libfoo_gen_headers = custom_target('gen-headers', ..., output: 'foo-gen.h') +libfoo_sources = files('foo-utils.c', 'foo-lib.c') +# Add generated headers to the list of sources for the build target +libfoo = library('foo', sources: [libfoo_sources + libfoo_gen_headers]) +``` + +Now let's say you have a new target that links to `libfoo`: + +```meson +libbar_sources = files('bar-lib.c') +libbar = library('bar', sources: libbar_sources, link_with: libfoo) +``` + +This adds a **link-time** dependency between the two targets, but note that the +sources of the targets have **no compile-time** dependencies and can be built +in any order; which improves parallelism and speeds up builds. + +If the sources in `libbar` *also* use `foo-gen.h`, that's a *compile-time* +dependency, and you'll have to add `libfoo_gen_headers` to `sources:` for +`libbar` too: + +```meson +libbar_sources = files('bar-lib.c') +libbar = library('bar', sources: libbar_sources + libfoo_gen_headers, link_with: libfoo) +``` + +Alternatively, if you have multiple libraries with sources that link to +a library and also use its generated headers, this code is equivalent to above: + +```meson +# Add generated headers to the list of sources for the build target +libfoo = library('foo', sources: libfoo_sources + libfoo_gen_headers) + +# Declare a dependency that will add the generated headers to sources +libfoo_dep = declare_dependency(link_with: libfoo, sources: libfoo_gen_headers) + +... + +libbar = library('bar', sources: libbar_sources, dependencies: libfoo_dep) +``` + +**Note:** You should only add *headers* to `sources:` while declaring +a dependency. If your custom target outputs both sources and headers, you can +use the subscript notation to get only the header(s): + +```meson +libfoo_gen_sources = custom_target('gen-headers', ..., output: ['foo-gen.h', 'foo-gen.c']) +libfoo_gen_headers = libfoo_gen_sources[0] + +# Add static and generated sources to the target +libfoo = library('foo', sources: libfoo_sources + libfoo_gen_sources) + +# Declare a dependency that will add the generated *headers* to sources +libfoo_dep = declare_dependency(link_with: libfoo, sources: libfoo_gen_headers) +... +libbar = library('bar', sources: libbar_sources, dependencies: libfoo_dep) +``` + +A good example of a generator that outputs both sources and headers is +[`gnome.mkenums()`](https://mesonbuild.com/Gnome-module.html#gnomemkenums). + +## How do I disable exceptions and RTTI in my C++ project? + +With the `cpp_eh` and `cpp_rtti` options. A typical invocation would +look like this: + +``` +meson -Dcpp_eh=none -Dcpp_rtti=false +``` + +The RTTI option is only available since Meson version 0.53.0. diff --git a/meson/docs/markdown/Feature-autodetection.md b/meson/docs/markdown/Feature-autodetection.md new file mode 100644 index 000000000..4d366d939 --- /dev/null +++ b/meson/docs/markdown/Feature-autodetection.md @@ -0,0 +1,39 @@ +--- +short-description: Auto-detection of features like ccache and code coverage +... + +# Feature autodetection + +Meson is designed for high productivity. It tries to do as many things +automatically as it possibly can. + +Ccache +-- + +[Ccache](https://ccache.dev/) is a cache system designed to make +compiling faster. When you run Meson for the first time for a given +project, it checks if Ccache is installed. If it is, Meson will use it +automatically. + +If you do not wish to use Ccache for some reason, just specify your +compiler with environment variables `CC` and/or `CXX` when first +running Meson (remember that once specified the compiler can not be +changed). Meson will then use the specified compiler without Ccache. + +Coverage +-- + +When doing a code coverage build, Meson will check for existence of +the binaries `gcovr`, `lcov` and `genhtml`. If version 3.3 or higher +of the first is found, targets called *coverage-text*, *coverage-xml* +and *coverage-html* are generated. Alternatively, if the latter two +are found, only the target *coverage-html* is generated. Coverage +reports can then be produced simply by calling e.g. `meson compile +coverage-xml`. As a convenience, a high-level *coverage* target is +also generated which will produce all 3 coverage report types, if +possible. + +Note that generating any of the coverage reports described above +requires the tests (i.e. `meson test`) to finish running so the +information about the functions that are called in the tests can be +gathered for the report. diff --git a/meson/docs/markdown/Fs-module.md b/meson/docs/markdown/Fs-module.md new file mode 100644 index 000000000..e5941a9e9 --- /dev/null +++ b/meson/docs/markdown/Fs-module.md @@ -0,0 +1,196 @@ +# FS (filesystem) module + +This module provides functions to inspect the file system. It is +available starting with version 0.53.0. + +## File lookup rules + +Non-absolute paths are looked up relative to the directory where the +current `meson.build` file is. + +If specified, a leading `~` is expanded to the user home directory. +Environment variables are not available as is the rule throughout Meson. +That is, $HOME, %USERPROFILE%, $MKLROOT, etc. have no meaning to the Meson +filesystem module. If needed, pass such variables into Meson via command +line options in `meson_options.txt`, native-file or cross-file. + +Where possible, symlinks and parent directory notation are resolved to an +absolute path. + +### exists + +Takes a single string argument and returns true if an entity with that +name exists on the file system. This can be a file, directory or a +special entry such as a device node. + +### is_dir + +Takes a single string argument and returns true if a directory with +that name exists on the file system. + +### is_file + +Takes a single string argument and returns true if an file with that +name exists on the file system. + +### is_symlink + +Takes a single string argument and returns true if the path pointed to +by the string is a symbolic link. + +## File Parameters + +### is_absolute + +*since 0.54.0* + +Return a boolean indicating if the path string specified is absolute, WITHOUT expanding `~`. + +Examples: + +```meson +fs.is_absolute('~') # false + +home = fs.expanduser('~') +fs.is_absolute(home) # true + +fs.is_absolute(home / 'foo') # true, even if ~/foo doesn't exist + +fs.is_absolute('foo/bar') # false, even if ./foo/bar exists +``` + +### hash + +The `fs.hash(filename, hash_algorithm)` method returns a string containing +the hexidecimal `hash_algorithm` digest of a file. +`hash_algorithm` is a string; the available hash algorithms include: +md5, sha1, sha224, sha256, sha384, sha512. + +### size + +The `fs.size(filename)` method returns the size of the file in integer bytes. + +### is_samepath + +The `fs.is_samepath(path1, path2)` returns boolean `true` if both paths resolve to the same path. +For example, suppose path1 is a symlink and path2 is a relative path. +If path1 can be resolved to path2, then `true` is returned. +If path1 is not resolved to path2, `false` is returned. +If path1 or path2 do not exist, `false` is returned. + +Examples: + +```meson +x = 'foo.txt' +y = 'sub/../foo.txt' +z = 'bar.txt' # a symlink pointing to foo.txt +j = 'notafile.txt' # non-existant file + +fs.is_samepath(x, y) # true +fs.is_samepath(x, z) # true +fs.is_samepath(x, j) # false + +p = 'foo/bar' +q = 'foo/bar/baz/..' +r = 'buz' # a symlink pointing to foo/bar +s = 'notapath' # non-existant directory + +fs.is_samepath(p, q) # true +fs.is_samepath(p, r) # true +fs.is_samepath(p, s) # false +``` + +## Filename modification + +The files need not actually exist yet for these path string manipulation methods. + +### expanduser + +*since 0.54.0* + +A path string with a leading `~` is expanded to the user home directory + +Examples: + +```meson +fs.expanduser('~') # user home directory + +fs.expanduser('~/foo') # /foo +``` + +### as_posix + +*since 0.54.0* + +`fs.as_posix(path)` assumes a Windows path, even if on a Unix-like system. +Thus, all `'\'` or `'\\'` are turned to '/', even if you meant to escape a character. + +Examples + +```meson +fs.as_posix('\\') == '/' # true +fs.as_posix('\\\\') == '/' # true + +fs.as_posix('foo\\bar/baz') == 'foo/bar/baz' # true +``` + +### replace_suffix + +The `replace_suffix` method is a *string manipulation* convenient for filename modifications. +It allows changing the filename suffix like: + +#### swap suffix + +```meson +original = '/opt/foo.ini' +new = fs.replace_suffix(original, '.txt') # /opt/foo.txt +``` + +#### add suffix + +```meson +original = '/opt/foo' +new = fs.replace_suffix(original, '.txt') # /opt/foo.txt +``` + +#### compound suffix swap + +```meson +original = '/opt/foo.dll.a' +new = fs.replace_suffix(original, '.so') # /opt/foo.dll.so +``` + +#### delete suffix + +```meson +original = '/opt/foo.dll.a' +new = fs.replace_suffix(original, '') # /opt/foo.dll +``` + +### parent + +Returns the parent directory (i.e. dirname). + +```meson +new = fs.parent('foo/bar') # foo +new = fs.parent('foo/bar/baz.dll') # foo/bar +``` + +### name + +Returns the last component of the path (i.e. basename). + +```meson +fs.name('foo/bar/baz.dll.a') # baz.dll.a +``` + +### stem + +*since 0.54.0* + +Returns the last component of the path, dropping the last part of the suffix + +```meson +fs.stem('foo/bar/baz.dll') # baz +fs.stem('foo/bar/baz.dll.a') # baz.dll +``` diff --git a/meson/docs/markdown/Generating-sources.md b/meson/docs/markdown/Generating-sources.md new file mode 100644 index 000000000..e22112ffc --- /dev/null +++ b/meson/docs/markdown/Generating-sources.md @@ -0,0 +1,202 @@ +--- +short-description: Generation of source files before compilation +... + +# Generating sources + +Sometimes source files need to be preprocessed before they are passed +to the actual compiler. As an example you might want build an IDL +compiler and then run some files through that to generate actual +source files. In Meson this is done with +[`generator()`](Reference-manual.md#generator) or +[`custom_target()`](Reference-manual.md#custom_target). + +## Using custom_target() + +Let's say you have a build target that must be built using sources +generated by a compiler. The compiler can either be a built target: + +```meson +mycomp = executable('mycompiler', 'compiler.c') +``` + +Or an external program provided by the system, or script inside the +source tree: + +```meson +mycomp = find_program('mycompiler') +``` + +Custom targets can take zero or more input files and use them to +generate one or more output files. Using a custom target, you can run +this compiler at build time to generate the sources: + +```meson +gen_src = custom_target('gen-output', + input : ['somefile1.c', 'file2.c'], + output : ['out.c', 'out.h'], + command : [mycomp, '@INPUT@', + '--c-out', '@OUTPUT0@', + '--h-out', '@OUTPUT1@']) +``` + +The `@INPUT@` there will be transformed to `'somefile1.c' +'file2.c'`. Just like the output, you can also refer to each input +file individually by index. + +Then you just put that in your program and you're done. + +### Generating headers + +Adding a generated header to a source list will ensure that the header is +generated and that the proper include paths are created for the target: + +```meson +prog_python = import('python').find_installation('python3') + +foo_c = custom_target( + 'foo.c', + output : 'foo.c', + input : 'my_gen.py', + command : [prog_python, '@INPUT@', '--code', '@OUTPUT@'], +) + +foo_h = custom_target( + 'foo.h', + output : 'foo.h', + input : 'my_gen.py', + command : [prog_python, '@INPUT@', '--header', '@OUTPUT@'], +) + +libfoo = static_library('foo', [foo_c, foo_h]) + +executable('myexe', ['main.c', foo_h], link_with : libfoo) +``` + +Each target that depends on a generated header should add that header to it's sources, +as seen above with `libfoo` and `myexe`. This is because there is no way for +meson or the backend to know that `myexe` depends on `foo.h` just because +`libfoo` does, it could be a private header. + +### Generating multiple files at a time + +Sometimes it makes sense for a single generator to create two or more files at +a time, (perhaps a header and source file), meson has this case covered as +well. `custom_target`s can be indexed like a list to get each output file +separately. The order is the same as the order of the output argument to +`custom_target` + +```meson +prog_python = import('python').find_installation('python3') + +foo_ch = custom_target( + 'foo.[ch]', + output : ['foo.c', 'foo.h'], + input : 'my_gen.py', + command : [prog_python, '@INPUT@', '@OUTPUT@'], +) + +libfoo = static_library('foo', [foo_ch]) + +executable('myexe', ['main.c', foo_ch[1]], link_with : libfoo) +``` + +In this case `libfoo` depends on both `foo.c` and `foo.h` but `myexe` only +depends on `foo.h`, the second output. + +### Using dependencies to manage generated resources + +In some cases it might be easier to use `declare_dependency` to "bundle" the header +and library dependency, especially if there are many generated headers: + +```meson +idep_foo = declare_dependency( + sources : [foo_h, bar_h], + link_with : [libfoo], +) +``` + +See [dependencies](Dependencies.md#declaring-your-own), and +[reference](Reference-manual.md#declare_dependency) for more information. + +## Using generator() + +Generators are similar to custom targets, except that we define a +*generator*, which defines how to transform an input file into one or +more output files, and then use that on as many input files as we +want. + +Note that generators should only be used for outputs that will only be +used as inputs for a build target or a custom target. When you use the +processed output of a generator in multiple targets, the generator +will be run multiple times to create outputs for each target. Each +output will be created in a target-private directory `@BUILD_DIR@`. + +If you want to generate files for general purposes such as for +generating headers to be used by several sources, or data that will be +installed, and so on, use a +[`custom_target()`](Reference-manual.md#custom_target) instead. + + +```meson +gen = generator(mycomp, + output : '@BASENAME@.c', + arguments : ['@INPUT@', '@OUTPUT@']) +``` + +The first argument is the executable file to run. The next file +specifies a name generation rule. It specifies how to build the output +file name for a given input name. `@BASENAME@` is a placeholder for +the input file name without preceding path or suffix (if any). So if +the input file name were `some/path/filename.idl`, then the output +name would be `filename.c`. You can also use `@PLAINNAME@`, which +preserves the suffix which would result in a file called +`filename.idl.c`. The last line specifies the command line arguments +to pass to the executable. `@INPUT@` and `@OUTPUT@` are placeholders +for the input and output files, respectively, and will be +automatically filled in by Meson. If your rule produces multiple +output files and you need to pass them to the command line, append the +location to the output holder like this: `@OUTPUT0@`, `@OUTPUT1@` and +so on. + +With this rule specified we can generate source files and add them to +a target. + +```meson +gen_src = gen.process('input1.idl', 'input2.idl') +executable('program', 'main.c', gen_src) +``` + +Generators can also generate multiple output files with unknown names: + +```meson +gen2 = generator(someprog, + output : ['@BASENAME@.c', '@BASENAME@.h'], + arguments : ['--out_dir=@BUILD_DIR@', '@INPUT@']) +``` + +In this case you can not use the plain `@OUTPUT@` variable, as it +would be ambiguous. This program only needs to know the output +directory, it will generate the file names by itself. + +To make passing different additional arguments to the generator +program at each use possible, you can use the `@EXTRA_ARGS@` string in +the `arguments` list. Note that this placeholder can only be present +as a whole string, and not as a substring. The main reason is that it +represents a list of strings, which may be empty, or contain multiple +elements; and in either case, interpolating it into the middle of a +single string would be troublesome. If there are no extra arguments +passed in from a `process()` invocation, the placeholder is entirely +omitted from the actual list of arguments, so an empty string won't be +passed to the generator program because of this. If there are multiple +elements in `extra_args`, they are inserted into to the actual +argument list as separate elements. + +```meson +gen3 = generator(genprog, + output : '@BASENAME@.cc', + arguments : ['@INPUT@', '@EXTRA_ARGS@', '@OUTPUT@']) +gen3_src1 = gen3.process('input1.y') +gen3_src2 = gen3.process('input2.y', extra_args: '--foo') +gen3_src3 = gen3.process('input3.y', extra_args: ['--foo', '--bar']) +``` diff --git a/meson/docs/markdown/Getting-meson.md b/meson/docs/markdown/Getting-meson.md new file mode 100644 index 000000000..e0e572228 --- /dev/null +++ b/meson/docs/markdown/Getting-meson.md @@ -0,0 +1,84 @@ +# Getting meson + +Meson is implemented in Python 3, and requires 3.5 or newer. If your operating +system provides a package manager, you should install it with that. For +platforms that don't have a package manager, you need to download it from +[Python's home page]. See below for [platform-specific Python3 +quirks](#platformspecific-install-quirks). + +## Downloading Meson + +Meson releases can be downloaded from the [GitHub release page], and you can +run `./meson.py` from inside a release or the git repository itself without +doing anything special. + +On Windows, if you did not install Python with the installer options that make +Python scripts executable, you will have to run `python /path/to/meson.py`, +where `python` is Python 3.5 or newer. + +The newest development code can be obtained directly from [Git], and we strive +to ensure that it will always be working and usable. All commits go through +a pull-request process that runs CI and tests several platforms. + +## Installing Meson with pip + +Meson is available in the [Python Package Index] and can be installed with +`pip3 install meson` which requires root and will install it system-wide. + +Alternatively, you can use `pip3 install --user meson` which will install it +for your user and does not require any special privileges. This will install +the package in `~/.local/`, so you will have to add `~/.local/bin` to your +`PATH`. + +## Installing Meson and Ninja with the MSI installer + +We provide an MSI installer on the [GitHub release page] that can be used to +install both Meson and Ninja at once for Windows. It also contains an embedded +copy of Python, so scripts that use the [Python module](Python-module.md) and +do not have any external dependencies will continue to work as expected. + +Please note that this is a new feature, so bug reports are expected and welcome! + +## Dependencies + +In the most common case, you will need the [Ninja executable] for using the +`ninja` backend, which is the default in Meson. This backend can be used on all +platforms and with all toolchains, including GCC, Clang, Visual Studio, MinGW, +ICC, ARMCC, etc. + +You can use the version provided by your package manager if possible, otherwise +download the binary executable from the [Ninja project's release +page](https://github.com/ninja-build/ninja/releases). + +If you will only use the Visual Studio backend (`--backend=vs`) to generate +Visual Studio solutions on Windows or the XCode backend (`--backend=xcode`) to +generate XCode projects on macOS, you do not need Ninja. + +# Platform-specific install quirks + +## Windows Python3 quirks + +When installing Python 3, it is highly recommended (but not required) that you +select the installer options as follows: + +![installer step 1](images/py3-install-1.png "Enable 'Add Python 3.6 to PATH' and click 'Customize installation'") +![installer step 2](images/py3-install-2.png "Optional Features: ensure 'pip' is enabled") +![installer step 3](images/py3-install-3.png "Advanced Options: enable 'Install for all users'") + +With this, you will have `python` and `pip` in `PATH`, and you can install +Meson with pip. You will also be able to directly run `meson` in any shell on +Windows instead of having to run `py -3` with the full path to the `meson.py` +script. + +## MSYS2 Python3 quirks + +If you are using MSYS2 on Windows as your development environment, please make +sure that you **do not use** the `msys/python` package to provide Python 3. Use +either `mingw32/mingw-w64-i686-python3` or `mingw64/mingw-w64-x86_64-python3` +depending on which MinGW target you are building for. + + [GitHub release page]: https://github.com/mesonbuild/meson/releases + [Python Package Index]: https://pypi.python.org/pypi/meson/ + [Git]: https://github.com/mesonbuild/meson + [Python's home page]: https://www.python.org/downloads/ + [Ninja executable]: https://ninja-build.org/ diff --git a/meson/docs/markdown/Gnome-module.md b/meson/docs/markdown/Gnome-module.md new file mode 100644 index 000000000..3d06233d3 --- /dev/null +++ b/meson/docs/markdown/Gnome-module.md @@ -0,0 +1,358 @@ +# GNOME module + +This module provides helper tools for build operations needed when +building Gnome/GLib programs. + +**Note**: the compilation commands here might not work properly when + you change the source files. This is a bug in the respective + compilers which do not expose the required dependency + information. This has been reported upstream in [this bug]. Until + this is fixed you need to be careful when changing your source + files. + + [this bug]: https://bugzilla.gnome.org/show_bug.cgi?id=745754 + +## Usage + +To use this module, just do: **`gnome = import('gnome')`**. The +following functions will then be available as methods on the object +with the name `gnome`. You can, of course, replace the name `gnome` +with anything else. + +### gnome.compile_resources() + +This function compiles resources specified in an XML file into code +that can be embedded inside the main binary. Similar a build target it +takes two positional arguments. The first one is the name of the +resource and the second is the XML file containing the resource +definitions. If the name is `foobar`, Meson will generate a header +file called `foobar.h`, which you can then include in your sources. + +* `c_name`: passed to the resource compiler as an argument after + `--c-name` +* `dependencies`: extra targets to depend upon for building +* `export`: (*Added 0.37.0*) if true, export the symbols of the + generated sources +* `extra_args`: extra command line arguments to pass to the resource +* `gresource_bundle`: (*Added 0.37.0*) if true, output a `.gresource` + file instead of source +* `install`: (*Added 0.37.0*) if true, install the gresource file +* `install_dir`: (*Added 0.37.0*) location to install the header or + bundle depending on previous options +* `install_header`: (*Added 0.37.0*) if true, install the header file +* `source_dir`: a list of directories where the resource compiler + should look up the files + +Returns an array containing: `[c_source, header_file]` or +`[gresource_bundle]` + +Example: + +```meson +gnome = import('gnome') + +asresources = gnome.compile_resources( + 'as-resources', 'data/asresources.gresource.xml', + source_dir: 'data', + c_name: 'as' +) + +executable( + meson.project_name(), + asresources, + dependencies: my_deps, + install: true +) +``` + +### gnome.generate_gir() + +Generates GObject introspection data. + +Takes one or more positional arguments: + +Either one or more library objects you want to build gir data for, or a single +executable object. + +There are several keyword arguments. Many of these map directly to the +`g-ir-scanner` tool so see its documentation for more information. + +* `dependencies`: deps to use during introspection scanning +* `extra_args`: command line arguments to pass to gir compiler +* `export_packages`: extra packages the gir file exports +* `sources`: the list of sources to be scanned for gir data +* `nsversion`: namespace version +* `namespace`: the namespace for this gir object which determines + output files +* `identifier_prefix`: the identifier prefix for the gir object, + e.g. `Gtk` +* `includes`: list of gir names to be included, can also be a GirTarget +* `header`: *(Added 0.43.0)* name of main c header to include for the library, e.g. `glib.h` +* `include_directories`: extra include paths to look for gir files +* `install`: if true, install the generated files +* `install_dir_gir`: (*Added 0.35.0*) which directory to install the + gir file into +* `install_dir_typelib`: (*Added 0.35.0*) which directory to install + the typelib file into +* `link_with`: list of libraries to link with +* `symbol_prefix`: the symbol prefix for the gir object, e.g. `gtk`, + (*Since 0.43.0*) an ordered list of multiple prefixes is allowed +* `fatal_warnings`: *Since 0.55.0* turn scanner warnings into fatal errors. + +Returns an array of two elements which are: `[gir_target, +typelib_target]` + +### gnome.genmarshal() + +Generates a marshal file using the `glib-genmarshal` tool. The first +argument is the basename of the output files. + +* `extra_args`: (*Added 0.42.0*) additional command line arguments to + pass +* `install_header`: if true, install the generated header +* `install_dir`: directory to install header to +* `nostdinc`: if true, don't include the standard marshallers from + glib +* `internal`: if true, mark generated sources as internal to + `glib-genmarshal` (*Requires GLib 2.54*) +* `prefix`: the prefix to use for symbols +* `skip_source`: if true, skip source location comments +* `stdinc`: if true, include the standard marshallers from glib +* `sources`: the list of sources to use as inputs +* `valist_marshallers`: if true, generate va_list marshallers + +*Added 0.35.0* + +Returns an array of two elements which are: `[c_source, header_file]` + +### gnome.mkenums() + +Generates enum files for GObject using the `glib-mkenums` tool. The +first argument is the base name of the output files, unless `c_template` +and `h_template` are specified. In this case, the output files will be +the base name of the values passed as templates. + +This method is essentially a wrapper around the `glib-mkenums` tool's +command line API. It is the most featureful method for enum creation. + +Typically you either provide template files or you specify the various +template sections manually as strings. + +Most libraries and applications will be using the same standard +template with only minor tweaks, in which case the +`gnome.mkenums_simple()` convenience method can be used instead. + +Note that if you `#include` the generated header in any of the sources +for a build target, you must add the generated header to the build +target's list of sources to codify the dependency. This is true for +all generated sources, not just `mkenums`. + +* `c_template`: template to use for generating the source +* `comments`: comment passed to the command +* `h_template`: template to use for generating the header +* `identifier_prefix`: prefix to use for the identifiers +* `install_header`: if true, install the generated header +* `install_dir`: directory to install the header +* `sources`: the list of sources to make enums with +* `symbol_prefix`: prefix to use for the symbols +* `eprod`: enum text +* `fhead`: file header +* `fprod`: file text +* `ftail`: file tail +* `vhead`: value text +* `vtail`: value tail + +*Added 0.35.0* + +Returns an array of two elements which are: `[c_source, header_file]` + +### gnome.mkenums_simple() + +Generates enum `.c` and `.h` files for GObject using the +`glib-mkenums` tool with the standard template used by most +GObject-based C libraries. The first argument is the base name of the +output files. + +Note that if you `#include` the generated header in any of the sources +for a build target, you must add the generated header to the build +target's list of sources to codify the dependency. This is true for +all generated sources, not just `mkenums_simple`. + +* `body_prefix`: additional prefix at the top of the body file, + e.g. for extra includes +* `decorator`: optional decorator for the function declarations, + e.g. `GTK_AVAILABLE` or `GST_EXPORT` +* `function_prefix`: additional prefix for function names, e.g. in + case you want to add a leading underscore to functions used only + internally +* `header_prefix`: additional prefix at the top of the header file, + e.g. for extra includes (which may be needed if you specify a + decorator for the function declarations) +* `install_header`: if true, install the generated header +* `install_dir`: directory to install the header +* `identifier_prefix`: prefix to use for the identifiers +* `sources`: the list of sources to make enums with +* `symbol_prefix`: prefix to use for the symbols + +Example: + +```meson +gnome = import('gnome') + +my_headers = ['myheader1.h', 'myheader2.h'] +my_sources = ['mysource1.c', 'mysource2.c'] + +# will generate myenums.c and myenums.h based on enums in myheader1.h and myheader2.h +enums = gnome.mkenums_simple('myenums', sources : my_headers) + +mylib = library('my', my_sources, enums, + include_directories: my_incs, + dependencies: my_deps, + c_args: my_cargs, + install: true) +``` + +*Added 0.42.0* + +Returns an array of two elements which are: `[c_source, header_file]` + +### gnome.compile_schemas() + +When called, this method will compile the gschemas in the current +directory. Note that this is not for installing schemas and is only +useful when running the application locally for example during tests. + +* `build_by_default`: causes, when set to true, to have this target be + built by default, that is, when invoking plain `meson compile`, the default + value is true for all built target types +* `depend_files`: files ([`string`](Reference-manual.md#string-object), + [`files()`](Reference-manual.md#files), or + [`configure_file()`](Reference-manual.md#configure_file)) of + schema source XML files that should trigger a re-compile if changed. + +### gnome.gdbus_codegen() + +Compiles the given XML schema into gdbus source code. Takes two +positional arguments, the first one specifies the base name to use +while creating the output source and header and the second specifies +one XML file. + +* `sources`: list of XML files +* `interface_prefix`: prefix for the interface +* `namespace`: namespace of the interface +* `extra_args`: (*Added 0.47.0*) additional command line arguments to pass +* `autocleanup`: *(Added 0.47.0)* if set generates autocleanup code. Can be one of `none`, `objects` or `all` +* `object_manager`: *(Added 0.40.0)* if true generates object manager code +* `annotations`: *(Added 0.43.0)* list of lists of 3 strings for the annotation for `'ELEMENT', 'KEY', 'VALUE'` +* `docbook`: *(Added 0.43.0)* prefix to generate `'PREFIX'-NAME.xml` docbooks +* `build_by_default`: causes, when set to true, to have this target be + built by default, that is, when invoking plain `meson compile`, the default + value is true for all built target types +* `install_dir`: (*Added 0.46.0*) location to install the header or + bundle depending on previous options +* `install_header`: (*Added 0.46.0*) if true, install the header file + +Starting *0.46.0*, this function returns a list of at least two custom targets +(in order): one for the source code and one for the header. The list will +contain a third custom target for the generated docbook files if that keyword +argument is passed. + +Earlier versions return a single custom target representing all the outputs. +Generally, you should just add this list of targets to a top level target's +source list. + +Example: + +```meson +gnome = import('gnome') + +# The returned source would be passed to another target +gdbus_src = gnome.gdbus_codegen('example-interface', + sources: 'com.example.Sample.xml', + interface_prefix : 'com.example.', + namespace : 'Sample', + annotations : [ + ['com.example.Hello()', 'org.freedesktop.DBus.Deprecated', 'true'] + ], + docbook : 'example-interface-doc' +) +``` + +### gnome.generate_vapi() + +Creates a VAPI file from gir. The first argument is the name of the +library. + +* `gir_dirs`: extra directories to include for gir files +* `install`: if true, install the VAPI file +* `install_dir`: location to install the VAPI file (defaults to datadir/vala/vapi) +* `metadata_dirs`: extra directories to include for metadata files +* `packages`: VAPI packages that are depended upon +* `sources`: the gir source to generate the VAPI from +* `vapi_dirs`: extra directories to include for VAPI files + +Returns a custom dependency that can be included when building other +VAPI or Vala binaries. + +*Added 0.36.0* + +### gnome.yelp() + +Installs help documentation using Yelp. The first argument is the +project id. + +This also creates two targets for translations +`help-$project-update-po` and `help-$project-pot`. + +* `languages`: list of languages for translations +* `media`: list of media such as images +* `sources`: list of pages +* `symlink_media`: if media should be symlinked not copied (defaults to `true` since 0.42.0) + +Note that very old versions of yelp may not support symlinked media; +At least 3.10 should work. + +*Added 0.36.0* + +### gnome.gtkdoc() + +Compiles and installs gtkdoc documentation into +`prefix/share/gtk-doc/html`. Takes one positional argument: The name +of the module. + +* `content_files`: a list of content files +* `dependencies`: a list of dependencies +* `fixxref_args`: a list of arguments to pass to `gtkdoc-fixxref` +* `gobject_typesfile`: a list of type files +* `include_directories`: extra include paths to pass to `gtkdoc-scangobj` +* `ignore_headers`: a list of header files to ignore +* `html_assets`: a list of assets for the HTML pages +* `html_args` a list of arguments to pass to `gtkdoc-mkhtml` +* `install`: if true, installs the generated docs +* `install_dir`: the directory to install the generated docs relative + to the gtk-doc html dir or an absolute path (default: module name) +* `main_xml`: specifies the main XML file +* `main_sgml`: equal to `main_xml` +* `mkdb_args`: a list of arguments to pass to `gtkdoc-mkdb` +* `namespace`: specifies the name space to pass to `gtkdoc-mkdb` +* `module_version`: the version of the module, affects the installed location and the devhelp2 file location +* `scan_args`: a list of arguments to pass to `gtkdoc-scan` +* `scanobjs_args`: a list of arguments to pass to `gtkdoc-scangobj` +* `c_args`: (*Added 0.48.0*) additional compile arguments to pass +* `src_dir`: include_directories to include +* `check`: (*Since 0.52.0*) if `true` runs `gtkdoc-check` when running unit tests. + Note that this has the downside of rebuilding the doc for each build, which is + often very slow. It usually should be enabled only in CI. + +This also creates a `$module-doc` target that can be run to build documentation. +Normally the documentation is only built on install. + +*Since 0.52.0* Returns a target object that can be passed as dependency to other +targets using generated doc files (e.g. in `content_files` of another doc). + +### gnome.gtkdoc_html_dir() + +Takes as argument a module name and returns the path where that +module's HTML files will be installed. Usually used with +`install_data` to install extra files, such as images, to the output +directory. diff --git a/meson/docs/markdown/Hotdoc-module.md b/meson/docs/markdown/Hotdoc-module.md new file mode 100644 index 000000000..7d9fc555f --- /dev/null +++ b/meson/docs/markdown/Hotdoc-module.md @@ -0,0 +1,79 @@ +--- +short-description: Hotdoc module +authors: + - name: Thibault Saunier + email: tsaunier@igalia.com + years: [2018] + has-copyright: false +... + +# Hotdoc module + +This module provides helper functions for generating documentation using +[hotdoc]. + +*Added 0.48.0* + +## Usage + +To use this module, just do: **`hotdoc = import('hotdoc')`**. The +following functions will then be available as methods on the object +with the name `hotdoc`. You can, of course, replace the name `hotdoc` +with anything else. + +### hotdoc.generate_doc() + +Generates documentation using [hotdoc] and installs it into `$prefix/share/doc/html`. + +**Positional argument:** + +* `project_name`: The name of the hotdoc project + +**Keyworded arguments:** + +* `sitemap` (*[string] or [file]*) (**required**): The hotdoc sitemap file +* `index` (*[string] or [file]*) (**required**): Location of the index file +* `dependencies`(*[targets]*): Targets on which the documentation generation depends on. +* `subprojects`: A list of `HotdocTarget` that are used as subprojects for hotdoc to generate + the documentation. +* ... Any argument of `hotdoc` can be used replacing dashes (`-`) with underscores (`_`). + For a full list of available parameters, just have a look at `hotdoc help`. + +[file]: Reference-manual.md#files +[string]: Reference-manual.md#string-object +[targets]: Reference-manual.md#build-target-object + +**Returns:** + +`HotdocTarget`: A [`custom_target`](Reference-manual.md#custom-target-object) with the +following extra methods: + +* `config_path`: Path to the generated `hotdoc` configuration file. + +### hotdoc.has_extensions() + +**Positional arguments:** + +* `...`: The hotdoc extension names to look for + +**No keyworded arguments** + +**Returns:** `true` if all the extensions where found, `false` otherwise. + +### Example + +``` meson +hotdoc = import('hotdoc') + +hotdoc.generate_doc('foobar', + project_version: '0.1', + sitemap: 'sitemap.txt', + index: 'index.md', + c_sources: ['path/to/file.c'], + c_smart_index: true, + languages: ['c'], + install: true, +) +``` + +[hotdoc]: https://hotdoc.github.io/ \ No newline at end of file diff --git a/meson/docs/markdown/IDE-integration.md b/meson/docs/markdown/IDE-integration.md new file mode 100644 index 000000000..2cc4f4f98 --- /dev/null +++ b/meson/docs/markdown/IDE-integration.md @@ -0,0 +1,336 @@ +--- +short-description: Meson's API to integrate Meson support into an IDE +... + +# IDE integration + +Meson has exporters for Visual Studio and XCode, but writing a custom backend +for every IDE out there is not a scalable approach. To solve this problem, +Meson provides an API that makes it easy for any IDE or build tools to +integrate Meson builds and provide an experience comparable to a solution +native to the IDE. + +All the resources required for such a IDE integration can be found in +the `meson-info` directory in the build directory. + +The first thing to do when setting up a Meson project in an IDE is to select +the source and build directories. For this example we assume that the source +resides in an Eclipse-like directory called `workspace/project` and the build +tree is nested inside it as `workspace/project/build`. First, we initialize +Meson by running the following command in the source directory. + + meson builddir + +With this command meson will configure the project and also generate +introspection information that is stored in `intro-*.json` files in the +`meson-info` directory. The introspection dump will be automatically updated +when meson is (re)configured, or the build options change. Thus, an IDE can +watch for changes in this directory to know when something changed. + +The `meson-info` directory should contain the following files: + +| File | Description | +| ------------------------------ | ------------------------------------------------------------------- | +| `intro-benchmarks.json` | Lists all benchmarks | +| `intro-buildoptions.json` | Contains a full list of meson configuration options for the project | +| `intro-buildsystem_files.json` | Full list of all meson build files | +| `intro-dependencies.json` | Lists all dependencies used in the project | +| `intro-installed.json` | Contains mapping of files to their installed location | +| `intro-projectinfo.json` | Stores basic information about the project (name, version, etc.) | +| `intro-targets.json` | Full list of all build targets | +| `intro-tests.json` | Lists all tests with instructions how to run them | + +The content of the JSON files is further specified in the remainder of this document. + +## The `targets` section + +The most important file for an IDE is probably `intro-targets.json`. Here each +target with its sources and compiler parameters is specified. The JSON format +for one target is defined as follows: + +```json +{ + "name": "Name of the target", + "id": "The internal ID meson uses", + "type": "", + "defined_in": "/Path/to/the/targets/meson.build", + "subproject": null, + "filename": ["list", "of", "generated", "files"], + "build_by_default": true / false, + "target_sources": [], + "installed": true / false, +} +``` + +If the key `installed` is set to `true`, the key `install_filename` will also +be present. It stores the installation location for each file in `filename`. +If one file in `filename` is not installed, its corresponding install location +is set to `null`. + +The `subproject` key specifies the name of the subproject this target was +defined in, or `null` if the target was defined in the top level project. + +A target usually generates only one file. However, it is possible for custom +targets to have multiple outputs. + +### Target sources + +The `intro-targets.json` file also stores a list of all source objects of the +target in the `target_sources`. With this information, an IDE can provide code +completion for all source files. + +```json +{ + "language": "language ID", + "compiler": ["The", "compiler", "command"], + "parameters": ["list", "of", "compiler", "parameters"], + "sources": ["list", "of", "all", "source", "files", "for", "this", "language"], + "generated_sources": ["list", "of", "all", "source", "files", "that", "where", "generated", "somewhere", "else"] +} +``` + +It should be noted that the compiler parameters stored in the `parameters` +differ from the actual parameters used to compile the file. This is because +the parameters are optimized for the usage in an IDE to provide autocompletion +support, etc. It is thus not recommended to use this introspection information +for actual compilation. + +### Possible values for `type` + +The following table shows all valid types for a target. + +| value of `type` | Description | +| ---------------- | --------------------------------------------------------------------------------------------- | +| `executable` | This target will generate an executable file | +| `static library` | Target for a static library | +| `shared library` | Target for a shared library | +| `shared module` | A shared library that is meant to be used with dlopen rather than linking into something else | +| `custom` | A custom target | +| `run` | A Meson run target | +| `jar` | A Java JAR target | + +### Using `--targets` without a build directory + +It is also possible to get most targets without a build directory. This can be +done by running `meson introspect --targets /path/to/meson.build`. + +The generated output is similar to running the introspection with a build +directory or reading the `intro-targets.json`. However, there are some key +differences: + +- The paths in `filename` now are _relative_ to the future build directory +- The `install_filename` key is completely missing +- There is only one entry in `target_sources`: + - With the language set to `unknown` + - Empty lists for `compiler` and `parameters` and `generated_sources` + - The `sources` list _should_ contain all sources of the target + +There is no guarantee that the sources list in `target_sources` is correct. +There might be differences, due to internal limitations. It is also not +guaranteed that all targets will be listed in the output. It might even be +possible that targets are listed, which won't exist when meson is run normally. +This can happen if a target is defined inside an if statement. +Use this feature with care. + +## Build Options + +The list of all build options (build type, warning level, etc.) is stored in +the `intro-buildoptions.json` file. Here is the JSON format for each option. + +```json +{ + "name": "name of the option", + "description": "the description", + "type": "type ID", + "value": "value depends on type", + "section": "section ID", + "machine": "machine ID" +} +``` + +The supported types are: + + - string + - boolean + - combo + - integer + - array + +For the type `combo` the key `choices` is also present. Here all valid values +for the option are stored. + +The possible values for `section` are: + + - core + - backend + - base + - compiler + - directory + - user + - test + +The `machine` key specifies the machine configuration for the option. Possible +values are: + + - any + - host + - build + +To set the options, use the `meson configure` command. + +Since Meson 0.50.0 it is also possible to get the default buildoptions +without a build directory by providing the root `meson.build` instead of a +build directory to `meson introspect --buildoptions`. + +Running `--buildoptions` without a build directory produces the same output as +running it with a freshly configured build directory. + +However, this behavior is not guaranteed if subprojects are present. Due to +internal limitations all subprojects are processed even if they are never used +in a real meson run. Because of this options for the subprojects can differ. + +## The dependencies section + +The list of all _found_ dependencies can be acquired from +`intro-dependencies.json`. Here, the name, version, compiler and linker +arguments for a dependency are listed. + +### Scanning for dependecie with `--scan-dependencies` + +It is also possible to get most dependencies used without a build directory. +This can be done by running `meson introspect --scan-dependencies /path/to/meson.build`. + +The output format is as follows: + +```json +[ + { + "name": "The name of the dependency", + "required": true, + "version": [">=1.2.3"], + "conditional": false, + "has_fallback": false + } +] +``` + +The `required` keyword specifies whether the dependency is marked as required +in the `meson.build` (all dependencies are required by default). The +`conditional` key indicates whether the `dependency()` function was called +inside a conditional block. In a real meson run these dependencies might not be +used, thus they _may_ not be required, even if the `required` key is set. The +`has_fallback` key just indicates whether a fallback was directly set in the +`dependency()` function. The `version` key always contains a list of version +requirements from the `meson.build` and **not** the actual version of the +dependency on disc. The version list is empty if no version was specified +in the `meson.build`. + +## Tests + +Compilation and unit tests are done as usual by running the `meson compile` and +`meson test` commands. A JSON formatted result log can be found in +`workspace/project/builddir/meson-logs/testlog.json`. + +When these tests fail, the user probably wants to run the failing test in a +debugger. To make this as integrated as possible, extract the tests from the +`intro-tests.json` and `intro-benchmarks.json` files. This provides you with +all the information needed to run the test: what command to execute, command +line arguments, environment variable settings and how to process the output. + +```json +{ + "name": "name of the test", + "workdir": "the working directory (can be null)", + "timeout": "the test timeout", + "suite": ["list", "of", "test", "suites"], + "is_parallel": true / false, + "protocol": "exitcode" / "tap", + "cmd": ["command", "to", "run"], + "env": { + "VARIABLE1": "value 1", + "VARIABLE2": "value 2" + } +} +``` + +## Build system files + +It is also possible to get Meson build files used in your current project. This +can be done by running `meson introspect --buildsystem-files /path/to/builddir`. + +The output format is as follows: + +```json +[ + "/Path/to/the/targets/meson.build", + "/Path/to/the/targets/meson_options.txt", + "/Path/to/the/targets/subdir/meson.build" +] +``` + +# Programmatic interface + +Meson also provides the `meson introspect` for project introspection via the +command line. Use `meson introspect -h` to see all available options. + +This API can also work without a build directory for the `--projectinfo` command. + +# AST of a `meson.build` + +Since meson *0.55.0* it is possible to dump the AST of a `meson.build` as a JSON +object. The interface for this is `meson introspect --ast /path/to/meson.build`. + +Each node of the AST has at least the following entries: + +| Key | Description | +| ------------ | ------------------------------------------------------- | +| `node` | Type of the node (see following table) | +| `lineno` | Line number of the node in the file | +| `colno` | Column number of the node in the file | +| `end_lineno` | Marks the end of the node (may be the same as `lineno`) | +| `end_colno` | Marks the end of the node (may be the same as `colno`) | + +Possible values for `node` with additional keys: + +| Node type | Additional keys | +| -------------------- | ------------------------------------------------ | +| `BooleanNode` | `value`: bool | +| `IdNode` | `value`: str | +| `NumberNode` | `value`: int | +| `StringNode` | `value`: str | +| `ContinueNode` | | +| `BreakNode` | | +| `ArgumentNode` | `positional`: node list; `kwargs`: accept_kwargs | +| `ArrayNode` | `args`: node | +| `DictNode` | `args`: node | +| `EmptyNode` | | +| `OrNode` | `left`: node; `right`: node | +| `AndNode` | `left`: node; `right`: node | +| `ComparisonNode` | `left`: node; `right`: node; `ctype`: str | +| `ArithmeticNode` | `left`: node; `right`: node; `op`: str | +| `NotNode` | `right`: node | +| `CodeBlockNode` | `lines`: node list | +| `IndexNode` | `object`: node; `index`: node | +| `MethodNode` | `object`: node; `args`: node; `name`: str | +| `FunctionNode` | `args`: node; `name`: str | +| `AssignmentNode` | `value`: node; `var_name`: str | +| `PlusAssignmentNode` | `value`: node; `var_name`: str | +| `ForeachClauseNode` | `items`: node; `block`: node; `varnames`: list | +| `IfClauseNode` | `ifs`: node list; `else`: node | +| `IfNode` | `condition`: node; `block`: node | +| `UMinusNode` | `right`: node | +| `TernaryNode` | `condition`: node; `true`: node; `false`: node | + +We do not guarantee the stability of this format since it is heavily linked to +the internal Meson AST. However, breaking changes (removal of a node type or the +removal of a key) are unlikely and will be announced in the release notes. + + +# Existing integrations + +- [Gnome Builder](https://wiki.gnome.org/Apps/Builder) +- [KDevelop](https://www.kdevelop.org) +- [Eclipse CDT](https://www.eclipse.org/cdt/) (experimental) +- [Meson Cmake Wrapper](https://github.com/prozum/meson-cmake-wrapper) (for cmake IDEs) (currently unmaintained !!) +- [Meson-UI](https://github.com/michaelbadcrumble/meson-ui) (Meson build GUI) +- [Meson Syntax Highlighter](https://plugins.jetbrains.com/plugin/13269-meson-syntax-highlighter) plugin for JetBrains IDEs. diff --git a/meson/docs/markdown/Icestorm-module.md b/meson/docs/markdown/Icestorm-module.md new file mode 100644 index 000000000..bc2ad61b7 --- /dev/null +++ b/meson/docs/markdown/Icestorm-module.md @@ -0,0 +1,27 @@ +# Unstable IceStorm module + +This module is available since version 0.45.0. + +**Note**: this module is unstable. It is only provided as a technology +preview. Its API may change in arbitrary ways between releases or it +might be removed from Meson altogether. + +## Usage + +This module provides an experimental method to create FPGA bitstreams using +the [IceStorm](http://www.clifford.at/icestorm/) suite of tools. + +The module exposes only one method called `project` and it is used +like this: + + is.project('projname', + , + constraint_file : , + ) + +The input to this function is the set of Verilog files and a +constraint file. This produces output files called `projname.asc`, +`projname.blif` and `projname.bin`. In addition it creates two run +targets called `projname-time` for running timing analysis and +`projname-upload` that uploads the generated bitstream to an FPGA +device using the `iceprog` programming executable. diff --git a/meson/docs/markdown/In-the-press.md b/meson/docs/markdown/In-the-press.md new file mode 100644 index 000000000..aa6f2a8eb --- /dev/null +++ b/meson/docs/markdown/In-the-press.md @@ -0,0 +1,8 @@ +# In the press + +This page lists cases where Meson has been presented in the press. + +* [Linux Magazin](http://www.linux-magazin.de/Ausgaben/2014/08/), in German, August 2014, and also later in [Linux Magazine](http://www.linux-magazine.com/Issues/2014/166/Meson-Build-System) in English +* [Admin Magazine](http://www.admin-magazine.com/HPC/Articles/The-Meson-Build-System) +* [Phoronix](https://www.phoronix.com/scan.php?page=news_item&px=MTc1MDc) regarding compilation of Mesa3D +* [CppCast](http://cppcast.com/2015/12/jussi-pakkanen/) interviewed Jussi about Meson for C++ development in 12/2015 diff --git a/meson/docs/markdown/Include-directories.md b/meson/docs/markdown/Include-directories.md new file mode 100644 index 000000000..c1d4ac12d --- /dev/null +++ b/meson/docs/markdown/Include-directories.md @@ -0,0 +1,21 @@ +--- +short-description: Instructions on handling include directories +... + +# Include directories + +Most `C`/`C++` projects have headers in different directories than sources. Thus you need to specify include directories. Let's assume that we are at some subdirectory and wish to add its `include` subdirectory to some target's search path. To create a include directory object we do this: + +```meson +incdir = include_directories('include') +``` + +The `incdir` variable now holds a reference to the `include` subdir. Now we pass that as an argument to a build target: + +```meson +executable('someprog', 'someprog.c', include_directories : incdir) +``` + +Note that these two commands can be given in any subdirectories and it will still work. Meson will keep track of the locations and generate proper compiler flags to make it all work. + +Another thing to note is that `include_directories` adds both the source directory and corresponding build directory to include path, so you don't have to care. diff --git a/meson/docs/markdown/IndepthTutorial.md b/meson/docs/markdown/IndepthTutorial.md new file mode 100644 index 000000000..d2e26626f --- /dev/null +++ b/meson/docs/markdown/IndepthTutorial.md @@ -0,0 +1,90 @@ +# An in-depth tutorial + +In this tutorial we set up a project with multiple targets, unit tests and dependencies between targets. Our main product is a shared library called *foo* that is written in `C++11`. We are going to ignore the contents of the source files, as they are not really important from a build definition point of view. The library makes use of the `GLib` library so we need to detect and link it properly. We also make the resulting library installable. + +The source tree contains three subdirectories `src`, `include` and `test` that contain, respectively, the source code, headers and unit tests of our project. + +To start things up, here is the top level `meson.build` file. + +```meson +project('c++ foolib', 'cpp', + version : '1.0.0', + license : 'MIT') +add_global_arguments('-DSOME_TOKEN=value', language : 'cpp') +glib_dep = dependency('glib-2.0') + +inc = include_directories('include') + +subdir('include') +subdir('src') +subdir('test') + +pkg_mod = import('pkgconfig') +pkg_mod.generate(libraries : foolib, + version : '1.0', + name : 'libfoobar', + filebase : 'foobar', + description : 'A Library to barnicate your foos.') +``` + +The definition always starts with a call to the `project` function. In it you must specify the project's name and programming languages to use, in this case only `C++`. We also specify two additional arguments, the project's version and the license it is under. Our project is version `1.0.0` and is specified to be under the MIT license. + +Then we find GLib, which is an *external dependency*. The `dependency` function tells Meson to find the library (by default using `pkg-config`). If the library is not found, Meson will raise an error and stop processing the build definition. + +Then we add a global compiler argument `-DSOME_TOKEN=value`. This flag is used for *all* C++ source file compilations. It is not possible to unset it for some targets. The reason for this is that it is hard to keep track of what compiler flags are in use if global settings change per target. + +Since `include` directory contains the header files, we need a way to tell compilations to add that directory to the compiler command line. This is done with the `include_directories` command that takes a directory and returns an object representing this directory. It is stored in variable `inc` which makes it accessible later on. + +After this are three `subdir` commands. These instruct Meson to go to the specified subdirectory, open the `meson.build` file that's in there and execute it. The last few lines are a stanza to generate a `pkg-config` file. We'll skip that for now and come back to it at the end of this document. + +The first subdirectory we go into is `include`. In it we have a a header file for the library that we want to install. This requires one line. + +```meson +install_headers('foolib.h') +``` + +This installs the given header file to the system's header directory. This is by default `/[install prefix]/include`, but it can be changed with a command line argument. + +The Meson definition of `src` subdir is simple. + +```meson +foo_sources = ['source1.cpp', 'source2.cpp'] +foolib = shared_library('foo', + foo_sources, + include_directories : inc, + dependencies : glib_dep, + install : true) +``` + +Here we just tell Meson to build the library with the given sources. We also tell it to use the include directories we stored to variable `inc` earlier. Since this library uses GLib, we tell Meson to add all necessary compiler and linker flags with the `dependencies` keyword argument. Its value is `glib_dep` which we set at the top level `meson.build` file. The `install` argument tells Meson to install the result. As with the headers, the shared library is installed to the system's default location (usually `/[install prefix]/lib`) but is again overridable. + +The resulting library is stored in variable `foolib` just like the include directory was stored in the previous file. + +Once Meson has processed the `src` subdir it returns to the main Meson file and executes the next line that moves it into the `test` subdir. Its contents look like this. + +```meson +testexe = executable('testexe', 'footest.cpp', + include_directories : inc, + link_with : foolib) +test('foolib test', testexe) +``` + +First we build a test executable that has the same include directory as the main library and which also links against the freshly built shared library. Note that you don't need to specify `glib_dep` here just to be able to use the built library `foolib`. If the executable used GLib functionality itself, then we would of course need to add it as a keyword argument here. + +Finally we define a test with the name `foolib test`. It consists of running the binary we just built. If the executable exits with a zero return value, the test is considered passed. Nonzero return values mark the test as failed. + +At this point we can return to the pkg-config generator line. All shared libraries should provide a pkg-config file, which explains how that library is used. Meson provides this simple generator that should be sufficient for most simple projects. All you need to do is list a few basic pieces of information and Meson takes care of generating an appropriate file. More advanced users might want to create their own pkg-config files using Meson's [configuration file generator system](Configuration.md). + +With these four files we are done. To configure, build and run the test suite, we just need to execute the following commands (starting at source tree root directory). + +```console +$ meson builddir && cd builddir +$ meson compile +$ meson test +``` + +To then install the project you only need one command. + +```console +$ meson install +``` diff --git a/meson/docs/markdown/Installing.md b/meson/docs/markdown/Installing.md new file mode 100644 index 000000000..9dc2ad438 --- /dev/null +++ b/meson/docs/markdown/Installing.md @@ -0,0 +1,148 @@ +--- +short-description: Installing targets +... + +# Installing + +Invoked via the [following command](Commands.md#install) *(available since 0.47.0)*: + +```sh +meson install +``` + +or alternatively (on older meson versions with `ninja` backend): + +```sh +ninja install +``` + +By default Meson will not install anything. Build targets can be +installed by tagging them as installable in the definition. + +```meson +project('install', 'c') +shared_library('mylib', 'libfile.c', install : true) +``` + +There is usually no need to specify install paths or the like. Meson +will automatically install it to the standards-conforming location. In +this particular case the executable is installed to the `bin` +subdirectory of the install prefix. However if you wish to override +the install dir, you can do that with the `install_dir` argument. + +```meson +executable('prog', 'prog.c', install : true, install_dir : 'my/special/dir') +``` + +Other install commands are the following. + +```meson +install_headers('header.h', subdir : 'projname') # -> include/projname/header.h +install_man('foo.1') # -> share/man/man1/foo.1 +install_data('datafile.dat', install_dir : get_option('datadir') / 'progname') +# -> share/progname/datafile.dat +``` + +`install_data()` supports rename of the file *since 0.46.0*. + +```meson +# file.txt -> {datadir}/{projectname}/new-name.txt +install_data('file.txt', rename : 'new-name.txt') + +# file1.txt -> share/myapp/dir1/data.txt +# file2.txt -> share/myapp/dir2/data.txt +install_data(['file1.txt', 'file2.txt'], + rename : ['dir1/data.txt', 'dir2/data.txt'], + install_dir : 'share/myapp') +``` + +Sometimes you want to copy an entire subtree directly. For this use +case there is the `install_subdir` command, which can be used like +this. + +```meson +install_subdir('mydir', install_dir : 'include') # mydir subtree -> include/mydir +``` + +Most of the time you want to install files relative to the install +prefix. Sometimes you need to go outside of the prefix (such as writing +files to `/etc` instead of `/usr/etc`. This can be accomplished by +giving an absolute install path. + +```meson +install_data(sources : 'foo.dat', install_dir : '/etc') # -> /etc/foo.dat +``` + +## Custom install behavior + +Sometimes you need to do more than just install basic targets. Meson +makes this easy by allowing you to specify a custom script to execute +at install time. As an example, here is a script that generates an +empty file in a custom directory. + +```bash +#!/bin/sh + +mkdir "${DESTDIR}/${MESON_INSTALL_PREFIX}/mydir" +touch "${DESTDIR}/${MESON_INSTALL_PREFIX}/mydir/file.dat" +``` + +As you can see, Meson sets up some environment variables to help you +write your script (`DESTDIR` is not set by Meson, it is inherited from +the outside environment). In addition to the install prefix, Meson +also sets the variables `MESON_SOURCE_ROOT` and `MESON_BUILD_ROOT`. + +Telling Meson to run this script at install time is a one-liner. + +```meson +meson.add_install_script('myscript.sh') +``` + +The argument is the name of the script file relative to the current +subdirectory. + +## DESTDIR support + +Sometimes you need to install to a different directory than the +install prefix. This is most common when building rpm or deb +packages. This is done with the `DESTDIR` environment variable and it +is used just like with other build systems: + +```console +$ DESTDIR=/path/to/staging/area meson install +``` + +## Custom install behaviour + +Installation behaviour can be further customized using +additional arguments. + +For example, if you wish to install the current setup without +rebuilding the code (which the default install target always does) and +only installing those files that have changed, you would run this +command in the build tree: + +```console +$ meson install --no-rebuild --only-changed +``` + +## Finer control over install locations + +Sometimes it is necessary to only install a subsection of output files +or install them in different directories. This can be done by +specifying `install_dir` as an array rather than a single string. The +array must have as many items as there are outputs and each entry +specifies how the corresponding output file should be installed. For +example: + +```meson +custom_target(... + output: ['file1', 'file2', 'file3'], + install_dir: ['path1', false, 'path3'], + ... +) +``` + +In this case `file1` would be installed to `/prefix/path1/file1`, +`file2` would not be installed at all and `file3` would be installed +to `/prefix/path3/file3'. diff --git a/meson/docs/markdown/Java.md b/meson/docs/markdown/Java.md new file mode 100644 index 000000000..9b893043d --- /dev/null +++ b/meson/docs/markdown/Java.md @@ -0,0 +1,23 @@ +--- +title: Java +short-description: Compiling Java programs +... + +# Compiling Java applications + +Meson has experimental support for compiling Java programs. The basic syntax consists of only one function and would be used like this: + +```meson +project('javaprog', 'java') + +myjar = jar('mything', 'com/example/Prog.java', + main_class : 'com.example.Prog') + +test('javatest', myjar) +``` + +However note that Meson places limitations on how you lay out your code. + +* all Java files for a jar must be under the subdirectory the jar definition is in +* all Java files must be in paths specified by their package, e.g. a class called `com.example.Something` must be in a Java file situated at `com/example/Something.java`. +* Meson only deals with jar files, you cannot poke individual class files (unless you do so manually) diff --git a/meson/docs/markdown/Keyval-module.md b/meson/docs/markdown/Keyval-module.md new file mode 100644 index 000000000..643265e3b --- /dev/null +++ b/meson/docs/markdown/Keyval-module.md @@ -0,0 +1,55 @@ +--- +short-description: Unstable keyval module +authors: + - name: Mark Schulte, Paolo Bonzini + years: [2017, 2019] + has-copyright: false +... + +# keyval module + +This module parses files consisting of a series of `key=value` lines. One use +of this module is to load kconfig configurations in meson projects. + +**Note**: this does not provide kconfig frontend tooling to generate a +configuration. You still need something such as kconfig frontends (see +link below) to parse your Kconfig files, and then (after you've +chosen the configuration options), output a ".config" file. + + [kconfig-frontends]: http://ymorin.is-a-geek.org/projects/kconfig-frontends + +## Usage + +The module may be imported as follows: + +``` meson +keyval = import('unstable-keyval') +``` + +The following functions will then be available as methods on the object +with the name `keyval`. You can, of course, replace the name +`keyval` with anything else. + +### keyval.load() + +This function loads a file consisting of a series of `key=value` lines +and returns a dictionary object. + +`keyval.load()` makes no attempt at parsing the values in the file. +In particular boolean and integer values will be represented as strings, +and strings will keep any quoting that is present in the input file. It +can be useful to create a [`configuration_data()`](#configuration_data) +object from the dictionary and use methods such as `get_unquoted()`. + +Kconfig frontends usually have ".config" as the default name for the +configuration file. However, placing the configuration file in the source +directory limits the user to one configuration per source directory. +In order to allow separate configurations for each build directory, as is +the Meson standard, `meson.build` should not hardcode ".config" as the +argument to `kconfig.load()`, and should instead make the argument to +`kconfig.load()` a [project build option](Build-options.md). + +* The first (and only) argument is the path to the configuration file to + load (usually ".config"). + +**Returns**: a [dictionary object](Reference-manual.md#dictionary-object). diff --git a/meson/docs/markdown/Localisation.md b/meson/docs/markdown/Localisation.md new file mode 100644 index 000000000..ed63e137b --- /dev/null +++ b/meson/docs/markdown/Localisation.md @@ -0,0 +1,60 @@ +--- +short-description: Localization with GNU Gettext +... + +# Localisation + +Localising your application with GNU gettext takes a little effort but is quite straightforward. We'll create a `po` subdirectory at your project root directory for all the localisation info. + +## Generating .pot and .po files +In your main meson.build file include the `po` subdirectory in the build process. + + subdir('po') + +In this `po` subdirectory we need: +- `LINGUAS`: Space separated list of languages +- `POTFILES`: List of source files to scan for translatable strings. +- `meson.build`: Localization specific meson file + +### LINGUAS +File with space separated list of languages. A sample LINGUAS might look like this. + + aa ab ae af + +### POTFILES +File that lists all the source files that gettext should scan in order to find strings to translate. The syntax of the file is one line per source file and the line must contain the relative path from source root. A sample POTFILES might look like this. + + src/file1.c + src/file2.c + src/subdir/file3.c + include/mything/somefile.h + +### meson.build +Localization specific meson file. It imports and uses the `i18n` module. If not defined before it needs to define the `GETTEXT_PACKAGE` global. +```meson +i18n = import('i18n') +# define GETTEXT_PACKAGE +add_project_arguments('-DGETTEXT_PACKAGE="intltest"', language:'c') +i18n.gettext(meson.project_name(), + args: '--directory=' + meson.source_root() +) +``` +The first command imports the `i18n` module that provides gettext features. The fourth line does the actual invocation. The first argument is the gettext package name. This causes two things to happen. The first is that Meson will generate binary mo files and put them to their proper locations when doing an install. The second is that it creates a build rule to regenerate the main pot file. If you are using the Ninja backend, this is how you would invoke the rebuild. + +### generate .pot file + +Then we need to generate the main pot file. The potfile can have any name but is usually the name of the gettext package. Let's say the project is called *intltest*. In this case the corresponding pot file would be called `intltest.pot`. + +Run the following command from your build folder to generate the pot file. It is recommended to inspect it manually afterwards and fill in e.g. proper copyright and contact information. + +```console +$ meson compile intltest-pot +``` + +### generate .po files + +For each language listed in the array above we need a corresponding `.po` file. Those can be generated by running the following command from your build folder. + +```console +$ meson compile intltest-update-po +``` diff --git a/meson/docs/markdown/Machine-files.md b/meson/docs/markdown/Machine-files.md new file mode 100644 index 000000000..9011f79d7 --- /dev/null +++ b/meson/docs/markdown/Machine-files.md @@ -0,0 +1,188 @@ +# Cross and Native File reference + +Cross and native files are nearly identical, but not completely. This is the +documentation on the common values used by both, for the specific values of +one or the other see the [cross compilation](Cross-compilation.md) and [native +environments](Native-environments.md). + +## Sections + +The following sections are allowed: +- constants +- binaries +- paths +- properties + +### constants + +*Since 0.55.0* + +String and list concatenation is supported using the `+` operator, joining paths +is supported using the `/` operator. +Entries defined in the `[constants]` section can be used in any other section +(they are always parsed first), entries in any other section can be used only +within that same section and only after it has been defined. + +```ini +[constants] +toolchain = '/toolchain' +common_flags = ['--sysroot=' + toolchain / 'sysroot'] + +[properties] +c_args = common_flags + ['-DSOMETHING'] +cpp_args = c_args + ['-DSOMETHING_ELSE'] + +[binaries] +c = toolchain / 'gcc' +``` + +This can be useful with cross file composition as well. A generic cross file +could be composed with a platform specific file where constants are defined: +```ini +# aarch64.ini +[constants] +arch = 'aarch64-linux-gnu' +``` + +```ini +# cross.ini +[binaries] +c = arch + '-gcc' +cpp = arch + '-g++' +strip = arch + '-strip' +pkgconfig = arch + '-pkg-config' +... +``` + +This can be used as `meson setup --cross-file aarch64.ini --cross-file cross.ini builddir`. + +Note that file composition happens before the parsing of values. The example +below results in `b` being `'HelloWorld'`: +```ini +# file1.ini: +[constants] +a = 'Foo' +b = a + 'World' +``` + +```ini +#file2.ini: +[constants] +a = 'Hello' +``` + +The example below results in an error when file1.ini is included before file2.ini +because `b` would be defined before `a`: +```ini +# file1.ini: +[constants] +b = a + 'World' +``` + +```ini +#file2.ini: +[constants] +a = 'Hello' +``` + +### Binaries + +The binaries section contains a list of binaries. These can be used +internally by meson, or by the `find_program` function: + +Compilers and linkers are defined here using `` and `_ld`. +`_ld` is special because it is compiler specific. For compilers like +gcc and clang which are used to invoke the linker this is a value to pass to +their "choose the linker" argument (-fuse-ld= in this case). For compilers +like MSVC and Clang-Cl, this is the path to a linker for meson to invoke, +such as `link.exe` or `lld-link.exe`. Support for ls is *new in 0.53.0* + +*changed in 0.53.1* the `ld` variable was replaced by `_ld`, because it +*regressed a large number of projects. in 0.53.0 the `ld` variable was used +instead. + +Native example: + +```ini +c = '/usr/bin/clang' +c_ld = 'lld' +sed = 'C:\\program files\\gnu\\sed.exe' +llvm-config = '/usr/lib/llvm8/bin/llvm-config' +``` + +Cross example: + +```ini +c = '/usr/bin/i586-mingw32msvc-gcc' +cpp = '/usr/bin/i586-mingw32msvc-g++' +c_ld = 'gold' +cpp_ld = 'gold' +ar = '/usr/i586-mingw32msvc/bin/ar' +strip = '/usr/i586-mingw32msvc/bin/strip' +pkgconfig = '/usr/bin/i586-mingw32msvc-pkg-config' +``` + +An incomplete list of internally used programs that can be overridden here is: +- cmake +- cups-config +- gnustep-config +- gpgme-config +- libgcrypt-config +- libwmf-config +- llvm-config +- pcap-config +- pkgconfig +- sdl2-config +- wx-config (or wx-3.0-config or wx-config-gtk) + +### Paths and Directories + +As of 0.50.0 paths and directories such as libdir can be defined in the native +file in a paths section + +```ini +[paths] +libdir = 'mylibdir' +prefix = '/my prefix' +``` + +These values will only be loaded when not cross compiling. Any arguments on the +command line will override any options in the native file. For example, passing +`--libdir=otherlibdir` would result in a prefix of `/my prefix` and a libdir of +`otherlibdir`. + +### Properties + +*New in native files in 0.54.0*, always in cross files. + +In addition to special data that may be specified in cross files, this +section may contain random key value pairs accessed using the +`meson.get_external_property()` + +## Properties + +*New for native files in 0.54.0* + +The properties section can contain any variable you like, and is accessed via +`meson.get_external_property`, or `meson.get_cross_property`. + +## Loading multiple machine files + +Native files allow layering (cross files can be layered since meson 0.52.0). +More than one native file can be loaded, with values from a previous file being +overridden by the next. The intention of this is not overriding, but to allow +composing native files. This composition is done by passing the command line +argument multiple times: + +```console +meson setup builddir/ --cross-file first.ini --cross-file second.ini --cross-file thrid.ini +``` + +In this case `first.ini` will be loaded, then `second.ini`, with values from +`second.ini` replacing `first.ini`, and so on. + +For example, if there is a project using C and C++, python 3.4-3.7, and LLVM +5-7, and it needs to build with clang 5, 6, and 7, and gcc 5.x, 6.x, and 7.x; +expressing all of these configurations in monolithic configurations would +result in 81 different native files. By layering them, it can be expressed by +just 12 native files. diff --git a/meson/docs/markdown/Manual.md b/meson/docs/markdown/Manual.md new file mode 100644 index 000000000..988efa162 --- /dev/null +++ b/meson/docs/markdown/Manual.md @@ -0,0 +1,9 @@ +--- +short-description: User manual for Meson +... + +# Manual + +This is the user manual for Meson. It currently tracks the state of +Git head. If you are using an older version, some of the information +here might not work for you. diff --git a/meson/docs/markdown/Meson-sample.md b/meson/docs/markdown/Meson-sample.md new file mode 100644 index 000000000..f98e022a4 --- /dev/null +++ b/meson/docs/markdown/Meson-sample.md @@ -0,0 +1,58 @@ +--- +short-description: Simple project step by step explanation +... + +# Meson sample + +A Meson file that builds an executable looks like this. + +```meson +project('simple', 'c') +executable('myexe', 'source.c') +``` + +All Meson build definitions begin with the `project` command. It specifies the name of the project and what programming languages it uses. Here the project is called *simple* and it uses only the C programming language. All strings are single-quoted. + +On the next line we define a *build target*, in this case an executable called *myexe*. It consists of one source file. This is all the code that a user needs to write to compile an executable with Meson. + +Variables are fully supported. The above code snippet could also have been declared like this. + +```meson +project('simple', 'c') +src = 'source.c' +executable('myexe', src) +``` + +Most executables consist of more than one source file. The easiest way to deal with this is to put them in an array. + +```meson +project('simple', 'c') +src = ['source1.c', 'source2.c', 'source3.c'] +executable('myexe', src) +``` + +Meson also supports the notion of *keyword arguments*. Indeed most arguments to functions can only be passed using them. The above snippet could be rewritten like this. + +```meson +project('simple', 'c') +src = ['source1.c', 'source2.c', 'source3.c'] +executable('myexe', sources : src) +``` + +These two formats are equivalent and choosing one over the other is mostly a question of personal preference. + +The `executable` command actually returns an *executable object*, which represents the given build target. It can be passed on to other functions, like this. + +```meson +project('simple', 'c') +src = ['source1.c', 'source2.c', 'source3.c'] +exe = executable('myexe', src) +test('simple test', exe) +``` + +Here we create a unit test called *simple test*, and which uses the built executable. When the tests are run with the `meson test` command, the built executable is run. If it returns zero, the test passes. A non-zero return value indicates an error, which Meson will then report to the user. + +A note to Visual Studio users +----- + +There's a slight terminology difference between Meson and Visual Studio. A Meson *project* is the equivalent to a Visual Studio *solution*. That is, the topmost thing that encompasses all things to be built. A Visual Studio *project* on the other hand is the equivalent of a Meson top level build target, such as an executable or a shared library. diff --git a/meson/docs/markdown/MesonCI.md b/meson/docs/markdown/MesonCI.md new file mode 100644 index 000000000..73b979bf7 --- /dev/null +++ b/meson/docs/markdown/MesonCI.md @@ -0,0 +1,53 @@ +# Meson CI setup + +This document is aimed for Meson contributors and documents +the CI setup used for testing Meson itself. The Meson +project uses multiple CI platforms for covering a wide +range of target systems. + +## Travis CI + +The travis configuration file is the `.travis.yml` in the +the project root. This platform tests cross compilation and +unity builds on a [linux docker image](#docker-images) and +on OSX. + +## GitHub actions + +The configuration files for GitHub actions are located in +`.github/workflows`. Here, all [images](#docker-images) +are tested with the full `run_tests.py` run. Additionally, +some other, smaller, tests are run. + +## Docker images + +The Linux docker images are automatically built and +uploaded by GitHub actions. An image rebuild is triggerd +when any of the image definition files are changed (in +`ci/ciimage`) in the master branch. Additionally, the +images are also updated weekly. + +Each docker image has one corresponding dirctory in +`ci/ciimage` with an `image.json` and an `install.sh`. + +### Image generation + +There are no manual Dockerfiles. Instead the Dockerfile is +automatically generated by the `build.py` script. This is +done to ensure that all images have the same layout and can +all be built and tested automatically. + +The Dockerfile is generated from the `image.json` file and +basically only adds a few common files and runs the +`install.sh` script which should contain all distribution +specific setup steps. The `common.sh` can be sourced via +`source /ci/common.sh` to access some shared functionalety. + +To generate the image run `build.py -t build `. A +generated image can be tested with `build.py -t test `. + +### Common image setup + +Each docker image has a `/ci` directory with an +`env_vars.sh` script. This script has to be sourced before +running the meson test suite. diff --git a/meson/docs/markdown/Mixing-build-systems.md b/meson/docs/markdown/Mixing-build-systems.md new file mode 100644 index 000000000..98ca0edc0 --- /dev/null +++ b/meson/docs/markdown/Mixing-build-systems.md @@ -0,0 +1,55 @@ +# Meson's policy on mixing multiple build systems in one build directory + +Meson has been designed with the principle that all dependencies are +either provided by "the platform" via a mechanism such as Pkg-Config +or that they are built as Meson subprojects under the main +project. There are several projects that would like to mix build +systems, that is, build dependencies in the same build directory as +the other build system by having one build system call the other. The +build directories do not necessarily need to be inside each other, but +that is the common case. + +This page lists the Meson project's stance on mixing build +systems. The tl/dr version is that while we do provide some +functionality for this use case, it only works for simple +cases. Anything more complex can not be made reliable and trying to do +that would burden Meson developers with an effectively infinite +maintenance burden. Thus these use cases are not guaranteed to work, +and even if a project using them works today there are no guarantees +that it will work in any future version. + +## The definition of "build system mixing" + +For the purposes of this page, mixing build systems means any and all +mechanisms where one build system uses build artifacts from a +different build system's build directory in any way. + +Note that this definition does not specify what the dependencies are +and how they are built, only how they are consumed. For example +suppose you have a standalone dependency library that builds with +build system X. In this case having Meson call the build system to +build the dependency at build time would be interpreted as mixing +build systems. On the other hand a "Flatpak-like" approach of building +and installing the library with an external mechanism and consuming it +via a standard build-system agnostic method such as Pkg-Config would +not be considered build system mixing. Use of uninstalled-pkgconfig +files is considered mixing, though. + +## What does this mean for support and compatibility? + +The Meson project will not take on any maintenance burden to ensure +anything other than the simple builds setups as discussed above will +work. Nor will we make changes to support these use cases that would +worsen the user experience of users of plain Meson. This includes, but +is not limited to, the following: + +- Any changes in other build systems that cause mixed project breakage + will not be considered a bug in Meson. + +- Breakages in mixed build projects will not be considered regressions + and such problems will never be considered release blockers, + regardless of what the underlying issue is. + +- Any use case that would require major changes in Meson to work + around missing or broken functionality in the other build system is + not supported. These issues must be fixed upstream. diff --git a/meson/docs/markdown/Modules.md b/meson/docs/markdown/Modules.md new file mode 100644 index 000000000..c354169e6 --- /dev/null +++ b/meson/docs/markdown/Modules.md @@ -0,0 +1,22 @@ +--- +short-description: Meson modules for common build operations +... + +# Modules + +In addition to core language features, Meson also provides a module system aimed at providing helper methods for common build operations. Using modules is simple, first you import them: + +```meson +mymod = import('somemodule') +``` + +After this you can use the returned object to use the functionality provided: + +```meson +mymod.do_something('text argument') +``` + +Meson has a selection of modules to make common requirements easy to +use. Modules can be thought of like the standard library of a +programming language. Currently Meson provides the modules listed on +subpages. diff --git a/meson/docs/markdown/Native-environments.md b/meson/docs/markdown/Native-environments.md new file mode 100644 index 000000000..8dcce2ab5 --- /dev/null +++ b/meson/docs/markdown/Native-environments.md @@ -0,0 +1,49 @@ +--- +short-description: Setting up native compilation +... + +# Persistent native environments + +New in 0.49.0 + +Meson has [cross files for describing cross compilation environments](Cross-compilation.md), +for describing native environments it has equivalent "native files". + +Natives describe the *build machine*, and can be used to override properties of +non-cross builds, as well as properties that are marked as "native" in a cross +build. + +There are a couple of reasons you might want to use a native file to keep a +persistent environment: + +* To build with a non-default native tool chain (such as clang instead of gcc) +* To use a non-default version of another binary, such as yacc, or llvm-config + +## Changing native file settings + +All of the rules about cross files and changed settings apply to native files +as well, see [here](Cross-compilation.md#changing-cross-file-settings) + +## Defining the environment + +See the [config-files section](Machine-files.md), for options shared by cross +and native files. + +## Native file locations + +Like cross files, native files may be installed to user or system wide +locations, defined as: + - $XDG_DATA_DIRS/meson/native + (/usr/local/share/meson/native:/usr/share/meson/native if $XDG_DATA_DIRS is + undefined) + - $XDG_DATA_HOME/meson/native ($HOME/.local/share/meson/native if + $XDG_DATA_HOME is undefined) + +The order of locations tried is as follows: + - A file relative to the local dir + - The user local location + - The system wide locations in order + +These files are not intended to be shipped by distributions, unless they are +specifically for distribution packaging, they are mainly intended for +developers. diff --git a/meson/docs/markdown/Overview.md b/meson/docs/markdown/Overview.md new file mode 100644 index 000000000..7bee93789 --- /dev/null +++ b/meson/docs/markdown/Overview.md @@ -0,0 +1,58 @@ +--- +short-description: Overview of the Meson build system +... + +# Overview + +Meson is a build system that is designed to be as user-friendly as +possible without sacrificing performance. The main tool for this is a +custom language that the user uses to describe the structure of his +build. The main design goals of this language has been simplicity, +clarity and conciseness. Much inspiration was drawn from the Python +programming language, which is considered very readable, even to +people who have not programmed in Python before. + +Another main idea has been to provide first class support for modern +programming tools and best practices. These include features as varied +as unit testing, code coverage reporting, precompiled headers and the +like. All of these features should be immediately available to any +project using Meson. The user should not need to hunt for third party +macros or write shell scripts to get these features. They should just +work out of the box. + +This power should not come at the expense of limited usability. Many +software builds require unorthodox steps. A common example is that you +first need to build a custom tool and then use that tool to generate +more source code to build. This functionality needs to be supported +and be as easy to use as other parts of the system. + +Terminology +-- + +Meson follows the overall structure of other popular build systems, +such as CMake and GNU Autotools. This means that the build is divided +into two discrete steps: *configure step* and *build step*. The first +step inspects the system, checks for dependencies and does all other +steps necessary to configure the build. It then generates the actual +build system. The second step is simply executing this generated build +system. The end result is a bunch of *build targets*, which are +usually executables and shared and static libraries. + +The directory that contains the source code is called the *source +directory*. Correspondingly the directory where the output is written +is called the *build directory*. In other build systems it is common +to have these two be the same directory. This is called an *in-source +build*. The case where the build directory is separate is called an +*out-of-source build*. + +What sets Meson apart from most build systems is that it enforces a +separate build directory. All files created by the build system are +put in the build directory. It is actually impossible to do an +in-source build. For people used to building inside their source tree, +this may seem like a needless complication. However there are several +benefits to doing only out-of-source builds. These will be explained +in the next chapter. + +When the source code is built, a set of *unit tests* is usually +run. They ensure that the program is working as it should. If it does, +the build result can be *installed* after which it is ready for use. diff --git a/meson/docs/markdown/Performance-comparison.md b/meson/docs/markdown/Performance-comparison.md new file mode 100644 index 000000000..3f15e8249 --- /dev/null +++ b/meson/docs/markdown/Performance-comparison.md @@ -0,0 +1,6 @@ +# Performance comparison + +This page lists experiments comparing build performance between Meson and other build systems. + +- [Simple comparison](Simple-comparison.md) +- [ARM performance test](ARM-performance-test.md) diff --git a/meson/docs/markdown/Pkg-config-files.md b/meson/docs/markdown/Pkg-config-files.md new file mode 100644 index 000000000..3aa8897a3 --- /dev/null +++ b/meson/docs/markdown/Pkg-config-files.md @@ -0,0 +1,21 @@ +# Pkg config files + +[Pkg-config](https://en.wikipedia.org/wiki/Pkg-config) is a way for shared libraries to declare the compiler flags needed to use them. There are two different ways of generating Pkg-config files in Meson. The first way is to build them manually with the `configure_file` command. The second way is to use Meson's built in Pkg-config file generator. The difference between the two is that the latter is very simple and meant for basic use cases. The former should be used when you need to provide a more customized solution. + +In this document we describe the simple generator approach. It is used in the following way. + +```meson +pkg = import('pkgconfig') +libs = ... # the library/libraries users need to link against +h = ['.', ...] # subdirectories of ${prefix}/${includedir} to add to header path +pkg.generate(libraries : libs, + subdirs : h, + version : '1.0', + name : 'libsimple', + filebase : 'simple', + description : 'A simple demo library.') +``` + +This causes a file called `simple.pc` to be created and placed into the install directory during the install phase. + +More information on the pkg-config module and the parameters can be found on the [pkgconfig-module](Pkgconfig-module.md) page. diff --git a/meson/docs/markdown/Pkgconfig-module.md b/meson/docs/markdown/Pkgconfig-module.md new file mode 100644 index 000000000..7b68e2408 --- /dev/null +++ b/meson/docs/markdown/Pkgconfig-module.md @@ -0,0 +1,112 @@ +# Pkgconfig module + +This module is a simple generator for +[pkg-config](https://pkg-config.freedesktop.org/) files. + +## Usage + +```meson +pkg = import('pkgconfig') +bar_dep = dependency('bar') +lib = library('foo', dependencies : [bar]) +pkg.generate(lib) +``` + +### pkg.generate() + +The generated file's properties are specified with the following +keyword arguments. + +- `description` a string describing the library, used to set the `Description:` field +- `extra_cflags` a list of extra compiler flags to be added to the + `Cflags` field after the header search path +- `filebase` the base name to use for the pkg-config file; as an + example, the value of `libfoo` would produce a pkg-config file called + `libfoo.pc` +- `install_dir` the directory to install to, defaults to the value of + option `libdir` followed by `/pkgconfig` +- `libraries` a list of built libraries (usually results of + shared_library) that the user needs to link against. Arbitrary strings can + also be provided and they will be added into the `Libs` field. Since 0.45.0 + dependencies of built libraries will be automatically added, see the + [Implicit dependencies](#implicit-dependencies) section below for the exact + rules. +- `libraries_private` list of built libraries or strings to put in the + `Libs.private` field. Since 0.45.0 dependencies of built libraries will be + automatically added, see the [Implicit dependencies](#implicit-dependencies) + section below for the exact rules. +- `name` the name of this library, used to set the `Name:` field +- `subdirs` which subdirs of `include` should be added to the header + search path, for example if you install headers into + `${PREFIX}/include/foobar-1`, the correct value for this argument + would be `foobar-1` +- `requires` list of strings, pkgconfig-dependencies or libraries that + `pkgconfig.generate()` was used on to put in the `Requires` field +- `requires_private` same as `requires` but for `Requires.private` field + field +- `url` a string with a url for the library +- `variables` a list of strings with custom variables to add to the + generated file. The strings must be in the form `name=value` and may + reference other pkgconfig variables, + e.g. `datadir=${prefix}/share`. The names `prefix`, `libdir` and + `includedir` are reserved and may not be used. +- `version` a string describing the version of this library, used to set the + `Version:` field. (*since 0.46.0*) Defaults to the project version if unspecified. +- `d_module_versions` a list of module version flags used when compiling + D sources referred to by this pkg-config file +- `uninstalled_variables` used instead of the `variables` keyword argument, when + generating the uninstalled pkg-config file. Since *0.54.0* +- `dataonly` field. (*since 0.54.0*) this is used for architecture-independent + pkg-config files in projects which also have architecture-dependent outputs. +- `conflicts` (*since 0.36.0, incorrectly issued a warning prior to 0.54.0*) list of strings to be put in the `Conflicts` field. + +Since 0.46 a `StaticLibrary` or `SharedLibrary` object can optionally be passed +as first positional argument. If one is provided a default value will be +provided for all required fields of the pc file: +- `install_dir` is set to `pkgconfig` folder in the same location than the provided library. +- `description` is set to the project's name followed by the library's name. +- `name` is set to the library's name. + +Since 0.54.0 uninstalled pkg-config files are generated as well. They are +located in `/meson-uninstalled/`. It is sometimes +useful to build projects against libraries built by meson without having to +install them into a prefix. In order to do so, just set +`PKG_CONFIG_PATH=/meson-uninstalled` before building your +application. That will cause pkg-config to prefer those `-uninstalled.pc` files +and find libraries and headers from the meson builddir. This is an experimental +feature provided on a best-effort basis, it might not work in all use-cases. + +### Implicit dependencies + +The exact rules followed to find dependencies that are implicitly added into the +pkg-config file have evolved over time. Here are the rules as of Meson *0.49.0*, +previous versions might have slightly different behaviour. + +- Not found libraries or dependencies are ignored. +- Libraries and dependencies are private by default (i.e. added into + `Requires.private:` or `Libs.private:`) unless they are explicitly added in + `libraries` or `requires` keyword arguments, or is the main library (first + positional argument). +- Libraries and dependencies will be de-duplicated, if they are added in both + public and private (e.g `Requires:` and `Requires.private:`) it will be removed + from the private list. +- Shared libraries (i.e. `shared_library()` and **NOT** `library()`) add only + `-lfoo` into `Libs:` or `Libs.private:` but their dependencies are not pulled. + This is because dependencies are only needed for static link. +- Other libraries (i.e. `static_library()` or `library()`) add `-lfoo` into `Libs:` + or `Libs.private:` and recursively add their dependencies into `Libs.private:` or + `Requires.private:`. +- Dependencies provided by pkg-config are added into `Requires:` or + `Requires.private:`. If a version was specified when declaring that dependency + it will be written into the generated file too. +- The threads dependency (i.e. `dependency('threads')`) adds `-pthread` into + `Libs:` or `Libs.private:`. +- Internal dependencies (i.e. + `declare_dependency(compiler_args : '-DFOO', link_args : '-Wl,something', link_with : foo)`) + add `compiler_args` into `Cflags:` if public, `link_args` and `link_with` into + `Libs:` if public or `Libs.private:` if private. +- Other dependency types add their compiler arguments into `Cflags:` if public, + and linker arguments into `Libs:` if public or `Libs.private:` if private. +- Once a pkg-config file is generated for a library using `pkg.generate(mylib)`, + any subsequent call to `pkg.generate()` where mylib appears, will generate a + `Requires:` or `Requires.private` instead of a `Libs:` or `Libs.private:`. diff --git a/meson/docs/markdown/Playground.md b/meson/docs/markdown/Playground.md new file mode 100644 index 000000000..5699f22a5 --- /dev/null +++ b/meson/docs/markdown/Playground.md @@ -0,0 +1,31 @@ +# playground + +This page is *not* part of official documentation. It exists merely for testing new stuff for the wiki. + +## Ref manual reformat + +The current format is not very readable. We should have something more like what [glib](https://developer.gnome.org/glib/stable/glib-Hash-Tables.html) or [Python](https://docs.python.org/3/library/os.html) do. + +Here's a first proposal. + + project(, + , + version : , + subproject_dir : , + meson_version : , + license : , + default_options : , + +Longer descriptions of arguments go here. + +Take two: + +## project + + + + version : + subproject_dir : + meson_version : + license : + default_options : diff --git a/meson/docs/markdown/Porting-from-autotools.md b/meson/docs/markdown/Porting-from-autotools.md new file mode 100644 index 000000000..2170ffa61 --- /dev/null +++ b/meson/docs/markdown/Porting-from-autotools.md @@ -0,0 +1,686 @@ +# Porting from Autotools + +This page uses [AppStream-glib](https://github.com/hughsie/appstream-glib/) as an example project. AppStream-Glib contains some libraries, GObject Introspection data, tests, man pages, i18n, bash-completion with optional flags to build/not build support for some things. + +Meson comes with a helper script `ac_converter` that you can use to convert the basic autoconf checks for your project. + +## Configure.ac + +First let's look at `configure.ac` and write the same in `meson.build`. + +```autoconf +AC_PREREQ(2.63) +``` +Meson doesn't provide the same function, so just ignore this. + +### Defining variables +`configure.ac`: +```autoconf +m4_define([as_major_version], [0]) +m4_define([as_minor_version], [3]) +m4_define([as_micro_version], [6]) +m4_define([as_version], + [as_major_version.as_minor_version.as_micro_version]) +``` +`meson.build`: +```meson + +as_version = meson.project_version() # set in project() below +ver_arr = as_version.split('.') + +as_major_version = ver_arr[0] +as_minor_version = ver_arr[1] +as_micro_version = ver_arr[2] +``` + +### Initializing project and setting compilers +`configure.ac`: +```autoconf +AC_INIT([appstream-glib],[as_version]) +AC_PROG_CC +``` +`meson.build`: +```meson +project('appstream-glib', 'c', version : '0.3.6') +``` +Note that this must be the first line of your `meson.build` file. + +### AC_SUBST +`configure.ac`: +```autoconf +AC_SUBST(AS_MAJOR_VERSION) +AC_SUBST(AS_MINOR_VERSION) +AC_SUBST(AS_MICRO_VERSION) +AC_SUBST(AS_VERSION) +``` + +You don't need to do the same in Meson, because it does not have two different types of files (Makefile, configure). + +### Auto headers + +`configure.ac`: + +```autoconf +AC_CONFIG_HEADERS([config.h]) +``` + +`meson.build`: + +```meson +conf = configuration_data() +# Surround the version in quotes to make it a C string +conf.set_quoted('VERSION', as_version) +configure_file(output : 'config.h', + configuration : conf) +``` + +Meson doesn't support autoheaders, you need to manually specify what do you want to see in header file, write `configuration_data()` object and use `configure_file()`. + +You can also substitute variables of type `@SOME_VAR@` with configure data. The details are on the [configuration page](Configuration.md). + +### Finding programs + +`configure.ac`: + +```autoconf +AC_PATH_PROG(GPERF, [gperf], [no]) +if test x$GPERF != xno ; then + AC_DEFINE(HAVE_GPERF,[1], [Use gperf]) +fi +AM_CONDITIONAL(HAVE_GPERF, [test x$GPERF != xno]) +``` + +`meson.build`: + +```meson +gperf = find_program('gperf', required : false) +if gperf.found() + conf.set('HAVE_GPERF', 1) +endif +``` + +### Finding pkg-config modules + +`configure.ac`: + +```autoconf +PKG_CHECK_MODULES(SOUP, libsoup-2.4 >= 2.24) +``` + +`meson.build`: + +```meson +soup = dependency('libsoup-2.4', version : '>= 2.24') +``` + +### Arguments + +`configure.ac`: + +```autoconf +AC_ARG_ENABLE(dep11, AS_HELP_STRING([--enable-dep11],[enable DEP-11]), + enable_dep11=$enableval,enable_dep11=yes) +AM_CONDITIONAL(HAVE_DEP11, test x$enable_dep11 = xyes) +if test x$enable_dep11 = xyes; then + AC_CHECK_HEADER(yaml.h, [], [AC_MSG_ERROR([No yaml.h])]) + YAML_LIBS="-lyaml" + AC_SUBST(YAML_LIBS) + AC_DEFINE(AS_BUILD_DEP11,1,[Build DEP-11 code]) +fi +``` + +`meson.build`: + +```meson +if get_option('enable-dep11') + yaml = dependency('yaml-0.1') + conf.set('AS_BUILD_DEP11', 1) +else + yaml = dependency('yaml-0.1', required : false) +endif +``` + +`meson_options.txt`: + +```meson +option('enable-dep11', type : 'boolean', value : true, description : 'enable DEP-11') +``` + +## Makefile.am + +Next step is `Makefile.am`. In meson you don't need to have other file, you still use `meson.build`. + +### Sub directories + +`Makefile.am`: + +```make +SUBDIRS = \ + libappstream-glib +``` + +`meson.build`: + +```meson +subdir('libappstream-glib') +``` + +### *CLEANFILES, EXTRA_DIST, etc. + +`Makefile.am`: + +```make +DISTCLEANFILES = \ + appstream-glib-*.tar.xz + +MAINTAINERCLEANFILES = \ + *~ \ + ABOUT-NLS \ + aclocal.m4 \ + ChangeLog \ + compile \ + config.guess \ + config.h.* \ + config.rpath + +EXTRA_DIST = \ + COPYING \ + MAINTAINERS \ + AUTHORS \ + README.md \ + NEWS \ + autogen.sh \ + config.h +``` + +In Meson you don't need have `*CLEANFILES`, because in meson you are building in temporary directory (usually called `build`), you manually removing it. You also not need to use `EXTRA_DIST`, because you will make tarballs via `git archive` or something like this. + +### glib-compile-resources + +`Makefile.am`: +```make +as-resources.c: appstream-glib.gresource.xml \ + as-stock-icons.txt \ + as-license-ids.txt \ + as-blacklist-ids.txt \ + as-category-ids.txt \ + as-environment-ids.txt + $(AM_V_GEN) \ + glib-compile-resources \ + --sourcedir=$(srcdir) \ + --sourcedir=$(top_builddir)/data \ + --target=$@ \ + --generate-source \ + --c-name as \ + $(srcdir)/appstream-glib.gresource.xml +as-resources.h: appstream-glib.gresource.xml \ + as-stock-icons.txt \ + as-license-ids.txt \ + as-blacklist-ids.txt \ + as-category-ids.txt \ + as-environment-ids.txt + $(AM_V_GEN) \ + glib-compile-resources \ + --sourcedir=$(srcdir) \ + --sourcedir=$(top_builddir)/data \ + --target=$@ \ + --generate-header \ + --c-name as \ + $(srcdir)/appstream-glib.gresource.xml + +BUILT_SOURCES = \ + as-resources.c \ + as-resources.h +``` + +`meson.build`: + +```meson +asresources = gnome.compile_resources( + 'as-resources', 'appstream-glib.gresource.xml', + source_dir : '.', + c_name : 'as') +``` + +### Headers + +`Makefile.am`: + +```make +libappstream_glib_includedir = $(includedir)/libappstream-glib +libappstream_glib_include_HEADERS = \ + appstream-glib.h \ + as-app.h \ + as-bundle.h \ + as-enums.h \ + as-icon.h \ + as-image.h \ + as-inf.h \ + as-node.h \ + as-problem.h \ + as-provide.h \ + as-release.h \ + as-screenshot.h \ + as-store.h \ + as-tag.h \ + as-utils.h \ + as-version.h +``` + +`meson.build`: + +```meson +headers = [ + 'appstream-glib.h', + 'as-app.h', + 'as-bundle.h', + 'as-enums.h', + 'as-icon.h', + 'as-image.h', + 'as-inf.h', + 'as-node.h', + 'as-problem.h', + 'as-provide.h', + 'as-release.h', + 'as-screenshot.h', + 'as-store.h', + 'as-tag.h', + 'as-utils.h', + 'as-version.h'] +install_headers(headers, subdir : 'libappstream-glib') +``` + +### Libraries + +`Makefile.am`: +```make +lib_LTLIBRARIES = \ + libappstream-glib.la +libappstream_glib_la_SOURCES = \ + as-app.c \ + as-app-desktop.c \ + as-app-inf.c \ + as-app-private.h \ + as-app-validate.c \ + as-bundle.c \ + as-bundle-private.h \ + as-cleanup.h \ + as-enums.c \ + as-icon.c \ + as-icon-private.h \ + as-image.c \ + as-image-private.h \ + as-inf.c \ + as-inf.h \ + as-node.c \ + as-node-private.h \ + as-problem.c \ + as-problem.h \ + as-provide.c \ + as-provide-private.h \ + as-release.c \ + as-release-private.h \ + as-resources.c \ + as-resources.h \ + as-screenshot.c \ + as-screenshot-private.h \ + as-store.c \ + as-tag.c \ + as-utils.c \ + as-utils-private.h \ + as-version.h \ + as-yaml.c \ + as-yaml.h + +libappstream_glib_la_LIBADD = \ + $(GLIB_LIBS) \ + $(GDKPIXBUF_LIBS) \ + $(LIBARCHIVE_LIBS) \ + $(SOUP_LIBS) \ + $(YAML_LIBS) + +libappstream_glib_la_LDFLAGS = \ + -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) \ + -export-dynamic \ + -no-undefined \ + -export-symbols-regex '^as_.*' +``` + +`meson.build`: + +```meson +sources = [ + 'as-app.c', + 'as-app-desktop.c', + 'as-app-inf.c', + 'as-app-private.h', + 'as-app-validate.c', + 'as-bundle.c', + 'as-bundle-private.h', + 'as-cleanup.h', + 'as-enums.c', + 'as-icon.c', + 'as-icon-private.h', + 'as-image.c', + 'as-image-private.h', + 'as-inf.c', + 'as-inf.h', + 'as-node.c', + 'as-node-private.h', + 'as-problem.c', + 'as-problem.h', + 'as-provide.c', + 'as-provide-private.h', + 'as-release.c', + 'as-release-private.h', + asresources, + 'as-screenshot.c', + 'as-screenshot-private.h', + 'as-store.c', + 'as-tag.c', + 'as-utils.c', + 'as-utils-private.h', + 'as-version.h', + 'as-yaml.c', + 'as-yaml.h'] + +deps = [glib, gdkpixbuf, libarchive, soup, yaml] + +mapfile = 'appstream-glib.map' +vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) +asglib = shared_library( + 'appstream-glib', sources, + soversion : lt_current, + version : lt_version, + dependencies : deps, + include_directories : include_directories('@0@/..'.format(meson.current_build_dir())), + link_args : ['-Wl,--no-undefined', vflag], + link_depends : mapfile, + install : true) +``` + +`appstream-glib.map`: + +``` +{ +global: + as_*; +local: + *; +}; +``` + +### Custom targets + +`Makefile.am`: + +```make +if HAVE_GPERF +as-tag-private.h: as-tag.gperf + $(AM_V_GEN) gperf < $< > $@ + +libappstream_glib_la_SOURCES += as-tag-private.h +BUILT_SOURCES += as-tag-private.h +endif +``` + +`meson.build`: + +```meson +if gperf.found() + astagpriv = custom_target('gperf as-tag', + output : 'as-tag-private.h', + input : 'as-tag.gperf', + command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@']) + sources = sources + [astagpriv] +endif +``` + +### Global CFLAGS + +`Makefile.am`: + +```make +AM_CPPFLAGS = \ + -DAS_COMPILATION \ + -DLOCALSTATEDIR=\""$(localstatedir)"\" \ + -DG_LOG_DOMAIN=\"As\" +``` + +`meson.build`: + +```meson +add_project_arguments('-DG_LOG_DOMAIN="As"', language : 'c') +add_project_arguments('-DAS_COMPILATION', language : 'c') +add_project_arguments('-DLOCALSTATEDIR="/var"', language : 'c') +``` + +### Tests + +`Makefile.am`: + +```make +check_PROGRAMS = \ + as-self-test +as_self_test_SOURCES = \ + as-self-test.c +as_self_test_LDADD = \ + $(GLIB_LIBS) \ + $(GDKPIXBUF_LIBS) \ + $(LIBARCHIVE_LIBS) \ + $(SOUP_LIBS) \ + $(YAML_LIBS) \ + $(lib_LTLIBRARIES) +as_self_test_CFLAGS = -DTESTDATADIR=\""$(top_srcdir)/data/tests"\" + +TESTS = as-self-test +``` + +`meson.build`: + +```meson +selftest = executable( + 'as-self-test', 'as-self-test.c', + include_directories : include_directories('@0@/..'.format(meson.current_build_dir())), + dependencies : deps, + c_args : '-DTESTDATADIR="@0@/../data/tests"'.format(meson.current_source_dir()), + link_with : asglib) +test('as-self-test', selftest) +``` + +### GObject Introspection + +`Makefile.am`: + +```make +introspection_sources = \ + as-app.c \ + as-app-validate.c \ + as-app.h \ + as-bundle.c \ + as-bundle.h \ + as-enums.c \ + as-enums.h \ + as-icon.c \ + as-icon.h \ + as-image.c \ + as-image.h \ + as-inf.c \ + as-inf.h \ + as-node.c \ + as-node.h \ + as-problem.c \ + as-problem.h \ + as-provide.c \ + as-provide.h \ + as-release.c \ + as-release.h \ + as-screenshot.c \ + as-screenshot.h \ + as-store.c \ + as-store.h \ + as-tag.c \ + as-tag.h \ + as-utils.c \ + as-utils.h \ + as-version.h + +AppStreamGlib-1.0.gir: libappstream-glib.la +AppStreamGlib_1_0_gir_INCLUDES = GObject-2.0 Gio-2.0 GdkPixbuf-2.0 +AppStreamGlib_1_0_gir_CFLAGS = $(AM_CPPFLAGS) +AppStreamGlib_1_0_gir_SCANNERFLAGS = --identifier-prefix=As \ + --symbol-prefix=as_ \ + --warn-all \ + --add-include-path=$(srcdir) +AppStreamGlib_1_0_gir_EXPORT_PACKAGES = appstream-glib +AppStreamGlib_1_0_gir_LIBS = libappstream-glib.la +AppStreamGlib_1_0_gir_FILES = $(introspection_sources) +INTROSPECTION_GIRS += AppStreamGlib-1.0.gir + +girdir = $(datadir)/gir-1.0 +gir_DATA = $(INTROSPECTION_GIRS) + +typelibdir = $(libdir)/girepository-1.0 +typelib_DATA = $(INTROSPECTION_GIRS:.gir=.typelib) + +CLEANFILES += $(gir_DATA) $(typelib_DATA) +``` + +`meson.build`: + +```meson +introspection_sources = [ + 'as-app.c', + 'as-app-validate.c', + 'as-app.h', + 'as-bundle.c', + 'as-bundle.h', + 'as-enums.c', + 'as-enums.h', + 'as-icon.c', + 'as-icon.h', + 'as-image.c', + 'as-image.h', + 'as-inf.c', + 'as-inf.h', + 'as-node.c', + 'as-node.h', + 'as-problem.c', + 'as-problem.h', + 'as-provide.c', + 'as-provide.h', + 'as-release.c', + 'as-release.h', + 'as-screenshot.c', + 'as-screenshot.h', + 'as-store.c', + 'as-store.h', + 'as-tag.c', + 'as-tag.h', + 'as-utils.c', + 'as-utils.h', + 'as-version.h'] + +gnome.generate_gir(asglib, + sources : introspection_sources, + nsversion : '1.0', + namespace : 'AppStreamGlib', + symbol_prefix : 'as_', + identifier_prefix : 'As', + export_packages : 'appstream-glib', + includes : ['GObject-2.0', 'Gio-2.0', 'GdkPixbuf-2.0'], + install : true +) +``` + +### GSettings + +`configure.ac`: +```sh +GLIB_GSETTINGS +``` + +`Makefile.am`: +```make +gsettings_SCHEMAS = foo.gschema.xml +@GSETTINGS_RULES@ +``` + +`meson.build`: +```meson +install_data('foo.gschema.xml', install_dir: get_option('datadir') / 'glib-2.0' / 'schemas') +meson.add_install_script('meson_post_install.py') +``` + +`meson_post_install.py`: +```python +#!/usr/bin/env python3 + +import os +import subprocess + +schemadir = os.path.join(os.environ['MESON_INSTALL_PREFIX'], 'share', 'glib-2.0', 'schemas') + +if not os.environ.get('DESTDIR'): + print('Compiling gsettings schemas...') + subprocess.call(['glib-compile-schemas', schemadir]) +``` + +### gettext + +Note this example does not include `intltool` usage. + +`configure.ac`: +```m4 +AM_GNU_GETTEXT([external]) +AM_GNU_GETTEXT_VERSION([0.19.7]) + +GETTEXT_PACKAGE=foo +AC_SUBST(GETTEXT_PACKAGE) +AC_DEFINE_UNQUOTED(GETTEXT_PACKAGE, "$GETTEXT_PACKAGE", [The prefix for our gettext translation domains.]) +``` + +`po/Makevars`: +```make +XGETTEXT_OPTIONS = --from-code=UTF-8 --keyword=_ --keyword=N_ --keyword=C_:1c,2 --keyword=NC_:1c,2 --keyword=g_dngettext:2,3 --add-comments +``` + +`Makefile.am`: +```make +%.desktop: %.desktop.in + $(AM_V_GEN)$(MSGFMT) --desktop --template $< -d $(top_srcdir)/po -o $@ + +%.appdata.xml: %.appdata.xml.in + $(AM_V_GEN)$(MSGFMT) --xml --template $< -d $(top_srcdir)/po -o $@ +``` + +`meson.build`: +```meson +i18n = import('i18n') + +gettext_package = 'foo' +add_project_arguments('-DGETTEXT_PACKAGE=' + gettext_package, language: 'c') +subdir('po') + +i18n.merge_file( + input: 'foo.desktop.in', + output: 'foo.desktop', + type: 'desktop', + po_dir: 'po', + install: true, + install_dir: get_option('datadir') / 'applications' +) + +i18n.merge_file( + input: 'foo.appdata.xml.in', + output: 'foo.appdata.xml', + po_dir: 'po', + install: true, + install_dir: get_option('datadir') / 'appdata' +) +``` + +`po/meson.build`: +```meson +i18n.gettext(gettext_package, preset: 'glib') +``` diff --git a/meson/docs/markdown/Precompiled-headers.md b/meson/docs/markdown/Precompiled-headers.md new file mode 100644 index 000000000..05b50bcc1 --- /dev/null +++ b/meson/docs/markdown/Precompiled-headers.md @@ -0,0 +1,119 @@ +--- +short-description: Using precompiled headers to reduce compilation time +... + +# Precompiled headers + +Parsing header files of system libraries is surprisingly expensive. A +typical source file has less than one thousand lines of code. In +contrast the headers of large libraries can be tens of thousands of +lines. This is especially problematic with C++, where header-only +libraries are common and they may contain extremely complex code. This +makes them slow to compile. + +Precompiled headers are a tool to mitigate this issue. Basically what +they do is parse the headers and then serialize the compiler's +internal state to disk. The downside of precompiled headers is that +they are tricky to set up. Meson has native support for precompiled +headers, but using them takes a little work. + +A precompiled header file is relatively simple. It is a header file +that contains `#include` directives for the system headers to +precompile. Here is a C++ example. + +```cpp + #include + #include + #include +``` + +In Meson, precompiled header files are always per-target. That is, the +given precompiled header is used when compiling every single file in +the target. Due to limitations of the underlying compilers, this +header file must not be in the same subdirectory as any of the source +files. It is strongly recommended that you create a subdirectory +called `pch` in the target directory and put the header files (and +nothing else) there. + +Toggling the usage of precompiled headers +-- + +If you wish to compile your project without precompiled headers, you +can change the value of the pch option by passing `-Db_pch=false` +argument to Meson at configure time or later with `meson configure`. You can +also toggle the use of pch in a configured build directory with the +GUI tool. You don't have to do any changes to the source +code. Typically this is done to test whether your project compiles +cleanly without pch (that is, checking that its #includes are in +order) and working around compiler bugs. + +Using precompiled headers with GCC and derivatives +-- + +Once you have a file to precompile, you can enable the use of pch for +a given target with a *pch* keyword argument. As an example, let's assume +you want to build a small C binary with precompiled headers. +Let's say the source files of the binary use the system headers `stdio.h` +and `string.h`. Then you create a header file `pch/myexe_pch.h` with this +content: + +```c +#include +#include +``` + +And add this to meson: + +```meson +executable('myexe', sources : sourcelist, c_pch : 'pch/myexe_pch.h') +``` + +That's all. You should note that your source files must _not_ include +the file `myexe_pch.h` and you must _not_ add the pch subdirectory to +your search path. Any modification of the original program files is +not necessary. Meson will make the compiler include the pch with +compiler options. If you want to disable pch (because of, say, +compiler bugs), it can be done entirely on the build system side with +no changes to source code. + +You can use precompiled headers on any build target. If your target +has multiple languages, you can specify multiple pch files like this. + +```meson +executable('multilang', sources : srclist, + c_pch : 'pch/c_pch.h', cpp_pch : 'pch/cpp_pch.h') +``` + +Using precompiled headers with MSVC +-- +Since Meson version 0.50.0, precompiled headers with MSVC work just like +with GCC. Meson will automatically create the matching pch implementation +file for you. + +Before version 0.50.0, in addition to the header file, Meson +also requires a corresponding source file. If your header is called +`foo_pch.h`, the corresponding source file is usually called +`foo_pch.cpp` and it resides in the same `pch` subdirectory as the +header. Its contents are this: + +```cpp +#if !defined(_MSC_VER) +#error "This file is only for use with MSVC." +#endif + +#include "foo_pch.h" +``` + +To enable pch, simply list both files in the target definition: + +```meson +executable('myexe', sources : srclist, + cpp_pch : ['pch/foo_pch.h', 'pch/foo_pch.cpp']) +``` + +This form will work with both GCC and msvc, because Meson knows that +GCC does not need a `.cpp` file and thus just ignores it. + +It should be noted that due to implementation details of the MSVC +compiler, having precompiled headers for multiple languages in the +same target is not guaranteed to work. diff --git a/meson/docs/markdown/Project-templates.md b/meson/docs/markdown/Project-templates.md new file mode 100644 index 000000000..7ded318b7 --- /dev/null +++ b/meson/docs/markdown/Project-templates.md @@ -0,0 +1,49 @@ +--- +short-description: Project templates +... + +# Project templates + +To make it easier for new developers to start working, Meson ships a +tool to generate the basic setup of different kinds of projects. This +functionality can be accessed with the `meson init` command. A typical +project setup would go like this: + +```console +$ mkdir project_name +$ cd project_name +$ meson init --language=c --name=myproject --version=0.1 +``` + +This would create the build definitions for a helloworld type +project. The result can be compiled as usual. For example it +could be done like this: + +``` +$ meson setup builddir +$ meson compile -C builddir +``` + +The generator has many different projects and settings. They can all +be listed by invoking the command `meson init --help`. + +This feature is available since Meson version 0.45.0. + +# Generate a build script for an existing project + +With `meson init` you can generate a build script for an existing +project with existing project files by running the command in the +root directory of your project. Meson currently supports this +feature for `executable`, and `jar` projects. + +# Build after generation of template + +It is possible to have Meson generate a build directory from the +`meson init` command without running `meson setup`. This is done +by passing `-b` or `--build` switch. + +```console +$ mkdir project_name +$ cd project_name +$ meson init --language=c --name=myproject --version=0.1 --build +``` \ No newline at end of file diff --git a/meson/docs/markdown/Python-3-module.md b/meson/docs/markdown/Python-3-module.md new file mode 100644 index 000000000..f32cdc98b --- /dev/null +++ b/meson/docs/markdown/Python-3-module.md @@ -0,0 +1,52 @@ +# Python 3 module + +This module provides support for dealing with Python 3. It has the +following methods. + +This module is deprecated and replaced by the [python](Python-module.md) module. + +## find_python + +This is a cross platform way of finding the Python 3 executable, which +may have a different name on different operating systems. Returns an +[external program](Reference-manual.md#external-program-object) object. + +*Added 0.38.0* + +Deprecated, replaced by [`find_installation`](Python-module.md#find_installation) +function from `python` module. + +## extension_module + +Creates a `shared_module` target that is named according to the naming +conventions of the target platform. All positional and keyword +arguments are the same as for +[shared_module](Reference-manual.md#shared_module). + +`extension_module` does not add any dependencies to the library so user may +need to add `dependencies : dependency('python3')`, see +[Python3 dependency](Dependencies.md#python3). + +*Added 0.38.0* + +Deprecated, replaced by [`extension_module`](Python-module.md#extension_module) +method from `python` module. + +## language_version + +Returns a string with the Python language version such as `3.5`. + +*Added 0.40.0* + +Deprecated, replaced by [`language_version`](Python-module.md#language_version) +method from `python` module. + +## sysconfig_path + +Returns the Python sysconfig path without prefix, such as +`lib/python3.6/site-packages`. + +*Added 0.40.0* + +Deprecated, replaced by [`get_path`](Python-module.md#get_path) +method from `python` module. diff --git a/meson/docs/markdown/Python-module.md b/meson/docs/markdown/Python-module.md new file mode 100644 index 000000000..d99b36a3a --- /dev/null +++ b/meson/docs/markdown/Python-module.md @@ -0,0 +1,244 @@ +--- +short-description: Generic python module +authors: + - name: Mathieu Duponchelle + email: mathieu@centricular.com + years: [2018] + has-copyright: false +... + +# Python module + +This module provides support for finding and building extensions against +python installations, be they python 2 or 3. + +*Added 0.46.0* + +## Functions + +### `find_installation()` + +``` meson +pymod.find_installation(name_or_path, ...) +``` + +Find a python installation matching `name_or_path`. + +That argument is optional, if not provided then the returned python +installation will be the one used to run meson. + +If provided, it can be: + +- A simple name, eg `python-2.7`, meson will look for an external program + named that way, using [find_program] + +- A path, eg `/usr/local/bin/python3.4m` + +- One of `python2` or `python3`: in either case, the module will try some + alternative names: `py -2` or `py -3` on Windows, and `python` everywhere. + In the latter case, it will check whether the version provided by the + sysconfig module matches the required major version + +Keyword arguments are the following: + +- `required`: by default, `required` is set to `true` and Meson will + abort if no python installation can be found. If `required` is set to `false`, + Meson will continue even if no python installation was found. You can + then use the `.found()` method on the returned object to check + whether it was found or not. Since *0.48.0* the value of a + [`feature`](Build-options.md#features) option can also be passed to the + `required` keyword argument. +- `disabler`: if `true` and no python installation can be found, return a + [disabler object](Reference-manual.md#disabler-object) instead of a not-found object. + *Since 0.49.0* +- `modules`: a list of module names that this python installation must have. + *Since 0.51.0* + +**Returns**: a [python installation][`python_installation` object] + +## `python_installation` object + +The `python_installation` object is an [external program], with several +added methods. + +### Methods + +#### `path()` + +```meson +str py_installation.path() +``` + +*Added 0.50.0* + +Works like the path method of other `ExternalProgram` objects. Was not +provided prior to 0.50.0 due to a bug. + +#### `extension_module()` + +``` meson +shared_module py_installation.extension_module(module_name, list_of_sources, ...) +``` + +Create a `shared_module` target that is named according to the naming +conventions of the target platform. + +All positional and keyword arguments are the same as for [shared_module], +excluding `name_suffix` and `name_prefix`, and with the addition of the following: + +- `subdir`: By default, meson will install the extension module in + the relevant top-level location for the python installation, eg + `/usr/lib/site-packages`. When subdir is passed to this method, + it will be appended to that location. This keyword argument is + mutually exclusive with `install_dir` + +`extension_module` does not add any dependencies to the library so user may +need to add `dependencies : py_installation.dependency()`, see [][`dependency()`]. + +**Returns**: a [buildtarget object] + +#### `dependency()` + +``` meson +python_dependency py_installation.dependency(...) +``` + +This method accepts no positional arguments, and the same keyword arguments as +the standard [dependency] function. It also supports the following keyword +argument: + +- `embed`: *(since 0.53.0)* If true, meson will try to find a python dependency + that can be used for embedding python into an application. + +**Returns**: a [python dependency][`python_dependency` object] + +#### `install_sources()` + +``` meson +void py_installation.install_sources(list_of_files, ...) +``` + +Install actual python sources (`.py`). + +All positional and keyword arguments are the same as for [install_data], +with the addition of the following: + +- `pure`: On some platforms, architecture independent files are expected + to be placed in a separate directory. However, if the python sources + should be installed alongside an extension module built with this + module, this keyword argument can be used to override that behaviour. + Defaults to `true` + +- `subdir`: See documentation for the argument of the same name to + [][`extension_module()`] + +#### `get_install_dir()` + +``` meson +string py_installation.get_install_dir(...) +``` + +Retrieve the directory [][`install_sources()`] will install to. + +It can be useful in cases where `install_sources` cannot be used directly, +for example when using [configure_file]. + +This function accepts no arguments, its keyword arguments are the same +as [][`install_sources()`]. + +**Returns**: A string + +#### `language_version()` + +``` meson +string py_installation.language_version() +``` + +Get the major.minor python version, eg `2.7`. + +The version is obtained through the `sysconfig` module. + +This function expects no arguments or keyword arguments. + +**Returns**: A string + +#### `get_path()` + +``` meson +string py_installation.get_path(path_name, fallback) +``` + +Get a path as defined by the `sysconfig` module. + +For example: + +``` meson +purelib = py_installation.get_path('purelib') +``` + +This function requires at least one argument, `path_name`, +which is expected to be a non-empty string. + +If `fallback` is specified, it will be returned if no path +with the given name exists. Otherwise, attempting to read +a non-existing path will cause a fatal error. + +**Returns**: A string + +#### `has_path()` + +``` meson + bool py_installation.has_path(path_name) +``` + +**Returns**: true if a path named `path_name` can be retrieved with +[][`get_path()`], false otherwise. + +#### `get_variable()` + +``` meson +string py_installation.get_variable(variable_name, fallback) +``` + +Get a variable as defined by the `sysconfig` module. + +For example: + +``` meson +py_bindir = py_installation.get_variable('BINDIR', '') +``` + +This function requires at least one argument, `variable_name`, +which is expected to be a non-empty string. + +If `fallback` is specified, it will be returned if no variable +with the given name exists. Otherwise, attempting to read +a non-existing variable will cause a fatal error. + +**Returns**: A string + +#### `has_variable()` + +``` meson + bool py_installation.has_variable(variable_name) +``` + +**Returns**: true if a variable named `variable_name` can be retrieved with +[][`get_variable()`], false otherwise. + +## `python_dependency` object + +This [dependency object] subclass will try various methods to obtain the +compiler and linker arguments, starting with pkg-config then potentially +using information obtained from python's `sysconfig` module. + +It exposes the same methods as its parent class. + +[find_program]: Reference-manual.md#find_program +[shared_module]: Reference-manual.md#shared_module +[external program]: Reference-manual.md#external-program-object +[dependency]: Reference-manual.md#dependency +[install_data]: Reference-manual.md#install_data +[configure_file]: Reference-manual.md#configure_file +[dependency object]: Reference-manual.md#dependency-object +[buildtarget object]: Reference-manual.md#build-target-object diff --git a/meson/docs/markdown/Qt4-module.md b/meson/docs/markdown/Qt4-module.md new file mode 100644 index 000000000..4be1be59b --- /dev/null +++ b/meson/docs/markdown/Qt4-module.md @@ -0,0 +1,4 @@ +# Qt4 module + +This module provides support for Qt4's `moc`, `uic` and `rcc` +tools. It is used identically to the [Qt 5 module](Qt5-module.md). diff --git a/meson/docs/markdown/Qt5-module.md b/meson/docs/markdown/Qt5-module.md new file mode 100644 index 000000000..0d9a6b6b9 --- /dev/null +++ b/meson/docs/markdown/Qt5-module.md @@ -0,0 +1,73 @@ +# Qt5 module + +The Qt5 module provides tools to automatically deal with the various +tools and steps required for Qt. The module has two methods. + +## preprocess + +This method takes the following keyword arguments: + - `moc_headers`, `moc_sources`, `ui_files`, `qresources`, which define the files that require preprocessing with `moc`, `uic` and `rcc` + - `include_directories`, the directories to add to header search path for `moc` (optional) + - `moc_extra_arguments`, any additional arguments to `moc` (optional). Available since v0.44.0. + - `uic_extra_arguments`, any additional arguments to `uic` (optional). Available since v0.49.0. + - `rcc_extra_arguments`, any additional arguments to `rcc` (optional). Available since v0.49.0. + - `dependencies`, dependency objects needed by moc. Available since v0.48.0. + +It returns an opaque object that should be passed to a main build target. + +## compile_translations (since v0.44.0) + +This method generates the necessary targets to build translation files with lrelease, it takes the following keyword arguments: + - `ts_files`, the list of input translation files produced by Qt's lupdate tool. + - `install` when true, this target is installed during the install step (optional). + - `install_dir` directory to install to (optional). + - `build_by_default` when set to true, to have this target be built by default, that is, when invoking `meson compile`; the default value is false (optional). + +## has_tools + +This method returns `true` if all tools used by this module are found, `false` +otherwise. + +It should be used to compile optional Qt code: +```meson +qt5 = import('qt5') +if qt5.has_tools(required: get_option('qt_feature')) + moc_files = qt5.preprocess(...) + ... +endif +``` + +This method takes the following keyword arguments: +- `required`: by default, `required` is set to `false`. If `required` is set to + `true` or an enabled [`feature`](Build-options.md#features) and some tools are + missing Meson will abort. +- `method`: method used to find the Qt dependency (`auto` by default). + +*Since: 0.54.0* + +## Dependencies + +See [Qt dependencies](Dependencies.md#qt4-qt5) + +The 'modules' argument is used to include Qt modules in the project. +See the Qt documentation for the [list of modules](http://doc.qt.io/qt-5/qtmodules.html). + +The 'private_headers' argument allows usage of Qt's modules private headers. +(since v0.47.0) + +## Example +A simple example would look like this: + +```meson +qt5 = import('qt5') +qt5_dep = dependency('qt5', modules: ['Core', 'Gui']) +inc = include_directories('includes') +moc_files = qt5.preprocess(moc_headers : 'myclass.h', + moc_extra_arguments: ['-DMAKES_MY_MOC_HEADER_COMPILE'], + include_directories: inc, + dependencies: qt5_dep) +translations = qt5.compile_translations(ts_files : 'myTranslation_fr.ts', build_by_default : true) +executable('myprog', 'main.cpp', 'myclass.cpp', moc_files, + include_directories: inc, + dependencies : qt5_dep) +``` diff --git a/meson/docs/markdown/Quick-guide.md b/meson/docs/markdown/Quick-guide.md new file mode 100644 index 000000000..74636e5d4 --- /dev/null +++ b/meson/docs/markdown/Quick-guide.md @@ -0,0 +1,148 @@ +--- +title: Quickstart Guide +short-description: Getting Started using Mesonbuild +... + +# Using Meson + +Meson has been designed to be as simple to use as possible. This page +outlines the initial steps needed for installation, troubleshooting, +and standard use. + +For more advanced configuration please refer to the command line help `meson --help` +or the Meson documentation located at the [Mesonbuild](https://mesonbuild.com) website. + +Table of Contents: +* [Requirements](#requirements) +* [Installation using package manager](#installation-using-package-manager) +* [Installation using Python](#installation-using-python) +* [Installation from source](#installation-from-source) +* [Troubleshooting](#troubleshooting) +* [Compiling a Meson project](#compiling-a-meson-project) +* [Using Meson as a distro packager](#using-meson-as-a-distro-packager) + +Requirements +-- + +* [Python 3](https://python.org) +* [Ninja](https://github.com/ninja-build/ninja/) + +*Ninja is only needed if you use the Ninja backend. Meson can also +generate native VS and XCode project files.* + + +Installation using package manager +-- + +Ubuntu: + +```console +$ sudo apt-get install python3 python3-pip python3-setuptools \ + python3-wheel ninja-build +``` +*Due to our frequent release cycle and development speed, distro packaged software may quickly become outdated.* + +Installation using Python +-- +Requirements: **pip3** + +The best way to receive the most up-to-date version of Mesonbuild. + +Install as a local user (recommended): +```console +$ pip3 install --user meson +``` +Install as root: +```console +$ pip3 install meson +``` + +*If you are unsure whether to install as root or a local user, install as a local user.* + + +Installation from source +-- +Requirements: **git** + +Meson can be run directly from the cloned git repository. + +```console +$ git clone https://github.com/mesonbuild/meson.git /path/to/sourcedir +``` +Troubleshooting: +-- +Common Issues: +```console +$ meson builddir +$ bash: /usr/bin/meson: No such file or directory +``` +Description: The default installation prefix for the python pip module installation is not included in your shell environment PATH. The default prefix for python pip installation modules is located under ``/usr/local``. + +**Resolution: +This issue can be resolved by altering the default shell environment PATH to include ``/usr/local/bin``. ** + +*Note: There are other ways of fixing this issue such as using symlinks or copying the binaries to a default path and these methods are not recommended or supported as they may break package management interoperability.* + + +Compiling a Meson project +-- + +The most common use case of Meson is compiling code on a code base you +are working on. The steps to take are very simple. + +```console +$ cd /path/to/source/root +$ meson builddir && cd builddir +$ meson compile +$ meson test +``` + +The only thing to note is that you need to create a separate build +directory. Meson will not allow you to build source code inside your +source tree. All build artifacts are stored in the build +directory. This allows you to have multiple build trees with different +configurations at the same time. This way generated files are not +added into revision control by accident. + +To recompile after code changes, just type `meson compile`. The build command +is always the same. You can do arbitrary changes to source code and +build system files and Meson will detect those and will do the right +thing. If you want to build optimized binaries, just use the argument +`--buildtype=debugoptimized` when running Meson. It is recommended +that you keep one build directory for unoptimized builds and one for +optimized ones. To compile any given configuration, just go into the +corresponding build directory and run `meson compile`. + +Meson will automatically add compiler flags to enable debug +information and compiler warnings (i.e. `-g` and `-Wall`). This means +the user does not have to deal with them and can instead focus on +coding. + +Using Meson as a distro packager +-- + +Distro packagers usually want total control on the build flags +used. Meson supports this use case natively. The commands needed to +build and install Meson projects are the following. + +```console +$ cd /path/to/source/root +$ meson --prefix /usr --buildtype=plain builddir -Dc_args=... -Dcpp_args=... -Dc_link_args=... -Dcpp_link_args=... +$ meson compile -C builddir +$ meson test -C builddir +$ DESTDIR=/path/to/staging/root meson install -C builddir +``` + +The command line switch `--buildtype=plain` tells Meson not to add its +own flags to the command line. This gives the packager total control +on used flags. + +This is very similar to other build systems. The only difference is +that the `DESTDIR` variable is passed as an environment variable +rather than as an argument to `meson install`. + +As distro builds happen always from scratch, you might consider +enabling [unity builds](Unity-builds.md) on your packages because they +are faster and produce better code. However there are many projects +that do not build with unity builds enabled so the decision to use +unity builds must be done by the packager on a case by case basis. diff --git a/meson/docs/markdown/RPM-module.md b/meson/docs/markdown/RPM-module.md new file mode 100644 index 000000000..cab6d968a --- /dev/null +++ b/meson/docs/markdown/RPM-module.md @@ -0,0 +1,16 @@ +# RPM module + +The RPM module can be used to create a sample rpm spec file for a +Meson project. It autodetects installed files, dependencies and so +on. Using it is very simple. At the very end of your Meson project +(that is, the end of your top level `meson.build` file) add these two +lines. + +```meson +rpm = import('rpm') +rpm.generate_spec_template() +``` + +Run Meson once on your code and the template will be written in your +build directory. Then remove the two lines above and manually edit the +template to add missing information. After this it is ready for use. diff --git a/meson/docs/markdown/Reference-manual.md b/meson/docs/markdown/Reference-manual.md new file mode 100644 index 000000000..966d408ef --- /dev/null +++ b/meson/docs/markdown/Reference-manual.md @@ -0,0 +1,2588 @@ +# Reference manual + +## Functions + +The following functions are available in build files. Click on each to +see the description and usage. The objects returned by them are [list +afterwards](#returned-objects). + +### add_global_arguments() + +``` meson + void add_global_arguments(arg1, arg2, ...) +``` + +Adds the positional arguments to the compiler command line. This +function has two keyword arguments: + +- `language`: specifies the language(s) that the arguments should be +applied to. If a list of languages is given, the arguments are added +to each of the corresponding compiler command lines. Note that there +is no way to remove an argument set in this way. If you have an +argument that is only used in a subset of targets, you have to specify +it in per-target flags. + +- `native` *(since 0.48.0)*: a boolean specifying whether the arguments should be + applied to the native or cross compilation. If `true` the arguments + will only be used for native compilations. If `false` the arguments + will only be used in cross compilations. If omitted, the flags are + added to native compilations if compiling natively and cross + compilations (only) when cross compiling. + +The arguments are used in all compiler invocations with the exception +of compile tests, because you might need to run a compile test with +and without the argument in question. For this reason only the +arguments explicitly specified are used during compile tests. + +**Note:** Usually you should use `add_project_arguments` instead, + because that works even when you project is used as a subproject. + +**Note:** You must pass always arguments individually `arg1, arg2, + ...` rather than as a string `'arg1 arg2', ...` + +### add_global_link_arguments() + +``` meson + void add_global_link_arguments(*arg1*, *arg2*, ...) +``` + +Like `add_global_arguments` but the arguments are passed to the linker. + +### add_languages() + +``` meson + bool add_languages(*langs*) +``` + +Add programming languages used by the project. Equivalent to having them in the +`project` declaration. This function is usually used to add languages that are +only used under some conditions, like this: + +```meson +project('foobar', 'c') +if compiling_for_osx + add_languages('objc') +endif +if add_languages('cpp', required : false) + executable('cpp-app', 'main.cpp') +endif +``` + +Takes the following keyword arguments: + +- `required`: defaults to `true`, which means that if any of the languages +specified is not found, Meson will halt. *(since 0.47.0)* The value of a +[`feature`](Build-options.md#features) option can also be passed. + +- `native` *(since 0.54.0)*: if set to `true`, the language will be used to compile for the build + machine, if `false`, for the host machine. + +Returns `true` if all languages specified were found and `false` otherwise. + +If `native` is omitted, the languages may be used for either build or host +machine, but are never required for the build machine. (i.e. it is equivalent +to `add_languages(*langs*, native: false, required: *required*) and +add_languages(*langs*, native: true, required: false)`. This default behaviour +may change to `native: false` in a future meson version. + +### add_project_arguments() + +``` meson + void add_project_arguments(arg1, arg2, ...) +``` + +This function behaves in the same way as `add_global_arguments` except +that the arguments are only used for the current project, they won't +be used in any other subproject. + +### add_project_link_arguments() + +``` meson + void add_project_link_arguments(*arg1*, *arg2*, ...) +``` + +Like `add_project_arguments` but the arguments are passed to the linker. + +### add_test_setup() + +``` meson + void add_test_setup(*name*, ...) +``` + +Add a custom test setup that can be used to run the tests with a +custom setup, for example under Valgrind. The keyword arguments are +the following: + +- `env`: environment variables to set, such as `['NAME1=value1', + 'NAME2=value2']`, or an [`environment()` + object](#environment-object) which allows more sophisticated + environment juggling. *(since 0.52.0)* A dictionary is also accepted. +- `exe_wrapper`: a list containing the wrapper command or script followed by the arguments to it +- `gdb`: if `true`, the tests are also run under `gdb` +- `timeout_multiplier`: a number to multiply the test timeout with +- `is_default` *(since 0.49.0)*: a bool to set whether this is the default test setup. + If `true`, the setup will be used whenever `meson test` is run + without the `--setup` option. + +To use the test setup, run `meson test --setup=*name*` inside the +build dir. + +Note that all these options are also available while running the +`meson test` script for running tests instead of `ninja test` or +`msbuild RUN_TESTS.vcxproj`, etc depending on the backend. + +### alias_target + +``` meson +runtarget alias_target(target_name, dep1, ...) +``` + +*(since 0.52.0)* + +This function creates a new top-level target. Like all top-level targets, this +integrates with the selected backend. For instance, with you can +run it as `meson compile target_name`. This is a dummy target that does not execute any +command, but ensures that all dependencies are built. Dependencies can be any +build target (e.g. return value of [executable()](#executable), custom_target(), etc) + +### assert() + +``` meson + void assert(*condition*, *message*) +``` + +Abort with an error message if `condition` evaluates to `false`. + +*(since 0.53.0)* `message` argument is optional and defaults to print the condition +statement instead. + +### benchmark() + +``` meson + void benchmark(name, executable, ...) +``` + +Creates a benchmark item that will be run when the benchmark target is +run. The behavior of this function is identical to [`test()`](#test) except for: + +* benchmark() has no `is_parallel` keyword because benchmarks are not run in parallel +* benchmark() does not automatically add the `MALLOC_PERTURB_` environment variable + +*Note:* Prior to 0.52.0 benchmark would warn that `depends` and `priority` +were unsupported, this is incorrect. + +### both_libraries() + +``` meson + buildtarget = both_libraries(library_name, list_of_sources, ...) +``` + +*(since 0.46.0)* + +Builds both a static and shared library with the given +sources. Positional and keyword arguments are otherwise the same as +for [`library`](#library). Source files will be compiled only once and +object files will be reused to build both shared and static libraries, +unless `b_staticpic` user option or `pic` argument are set to false in +which case sources will be compiled twice. + +The returned [buildtarget](#build-target-object) always represents the +shared library. In addition it supports the following extra methods: + +- `get_shared_lib()` returns the shared library build target +- `get_static_lib()` returns the static library build target + +### build_target() + +Creates a build target whose type can be set dynamically with the +`target_type` keyword argument. + +`target_type` may be set to one of: + +- `executable` +- `shared_library` +- `shared_module` +- `static_library` +- `both_libraries` +- `library` +- `jar` + +This declaration: + +```meson +executable() +``` + +is equivalent to this: + +```meson +build_target(, target_type : 'executable') +``` + +The object returned by `build_target` and all convenience wrappers for +`build_target` such as [`executable`](#executable) and +[`library`](#library) has methods that are documented in the [object +methods section](#build-target-object) below. + +### configuration_data() + +``` meson + configuration_data_object = configuration_data(...) +``` + +Creates an empty configuration object. You should add your +configuration with [its method calls](#configuration-data-object) and +finally use it in a call to `configure_file`. + +*(since 0.49.0)* Takes an optional dictionary as first argument. If +provided, each key/value pair is added into the `configuration_data` +as if `set()` method was called for each of them. + +### configure_file() + +``` meson + generated_file = configure_file(...) +``` + +This function can run in three modes depending on the keyword arguments +passed to it. + +When a [`configuration_data()`](#configuration_data) object is passed +to the `configuration:` keyword argument, it takes a template file as +the `input:` (optional) and produces the `output:` (required) by +substituting values from the configuration data as detailed in [the +configuration file documentation](Configuration.md). *(since 0.49.0)* A +dictionary can be passed instead of a +[`configuration_data()`](#configuration_data) object. + +When a list of strings is passed to the `command:` keyword argument, +it takes any source or configured file as the `input:` and assumes +that the `output:` is produced when the specified command is run. + +*(since 0.47.0)* When the `copy:` keyword argument is set to `true`, +this function will copy the file provided in `input:` to a file in the +build directory with the name `output:` in the current directory. + +These are all the supported keyword arguments: + +- `capture` *(since 0.41.0)*: when this argument is set to true, + Meson captures `stdout` of the `command` and writes it to the target + file specified as `output`. +- `command`: as explained above, if specified, Meson does not create + the file itself but rather runs the specified command, which allows + you to do fully custom file generation. *(since 0.52.0)* The command can contain + file objects and more than one file can be passed to the `input` keyword + argument, see [`custom_target()`](#custom_target) for details about string + substitutions. +- `copy` *(since 0.47.0)*: as explained above, if specified Meson only + copies the file from input to output. +- `depfile` *(since 0.52.0)*: a dependency file that the command can write listing + all the additional files this target depends on. A change + in any one of these files triggers a reconfiguration. +- `format` *(since 0.46.0)*: the format of defines. It defaults to `meson`, and so substitutes +`#mesondefine` statements and variables surrounded by `@` characters, you can also use `cmake` +to replace `#cmakedefine` statements and variables with the `${variable}` syntax. Finally you can use +`cmake@` in which case substitutions will apply on `#cmakedefine` statements and variables with +the `@variable@` syntax. +- `input`: the input file name. If it's not specified in configuration + mode, all the variables in the `configuration:` object (see above) + are written to the `output:` file. +- `install` *(since 0.50.0)*: when true, this generated file is installed during +the install step, and `install_dir` must be set and not empty. When false, this +generated file is not installed regardless of the value of `install_dir`. +When omitted it defaults to true when `install_dir` is set and not empty, +false otherwise. +- `install_dir`: the subdirectory to install the generated file to + (e.g. `share/myproject`), if omitted or given the value of empty + string, the file is not installed. +- `install_mode` *(since 0.47.0)*: specify the file mode in symbolic format + and optionally the owner/uid and group/gid for the installed files. +- `output`: the output file name. *(since 0.41.0)* may contain + `@PLAINNAME@` or `@BASENAME@` substitutions. In configuration mode, + the permissions of the input file (if it is specified) are copied to + the output file. +- `output_format` *(since 0.47.0)*: the format of the output to generate when no input + was specified. It defaults to `c`, in which case preprocessor directives + will be prefixed with `#`, you can also use `nasm`, in which case the + prefix will be `%`. +- `encoding` *(since 0.47.0)*: set the file encoding for the input and output file, + defaults to utf-8. The supported encodings are those of python3, see + [standard-encodings](https://docs.python.org/3/library/codecs.html#standard-encodings). + +### custom_target() + +``` meson + customtarget custom_target(*name*, ...) +``` + +Create a custom top level build target. The only positional argument +is the name of this target and the keyword arguments are the +following. + +- `build_by_default` *(since 0.38.0)*: causes, when set to true, to + have this target be built by default. This means it will be built when + `meson compile` is called without any arguments. The default value is `false`. + *(since 0.50.0)* If `build_by_default` is explicitly set to false, `install` + will no longer override it. If `build_by_default` is not set, `install` will + still determine its default. +- `build_always` **(deprecated)**: if `true` this target is always considered out of + date and is rebuilt every time. Equivalent to setting both + `build_always_stale` and `build_by_default` to true. +- `build_always_stale` *(since 0.47.0)*: if `true` the target is always considered out of date. + Useful for things such as build timestamps or revision control tags. + The associated command is run even if the outputs are up to date. +- `capture`: there are some compilers that can't be told to write + their output to a file but instead write it to standard output. When + this argument is set to true, Meson captures `stdout` and writes it + to the target file. Note that your command argument list may not + contain `@OUTPUT@` when capture mode is active. +- `console` *(since 0.48.0)*: keyword argument conflicts with `capture`, and is meant + for commands that are resource-intensive and take a long time to + finish. With the Ninja backend, setting this will add this target + to [Ninja's `console` pool](https://ninja-build.org/manual.html#_the_literal_console_literal_pool), + which has special properties such as not buffering stdout and + serializing all targets in this pool. +- `command`: command to run to create outputs from inputs. The command + may be strings or the return value of functions that return file-like + objects such as [`find_program()`](#find_program), + [`executable()`](#executable), [`configure_file()`](#configure_file), + [`files()`](#files), [`custom_target()`](#custom_target), etc. + Meson will automatically insert the appropriate dependencies on + targets and files listed in this keyword argument. + Note: always specify commands in array form `['commandname', + '-arg1', '-arg2']` rather than as a string `'commandname -arg1 + -arg2'` as the latter will *not* work. +- `depend_files`: files ([`string`](#string-object), + [`files()`](#files), or [`configure_file()`](#configure_file)) that + this target depends on but are not listed in the `command` keyword + argument. Useful for adding regen dependencies. +- `depends`: specifies that this target depends on the specified + target(s), even though it does not take any of them as a command + line argument. This is meant for cases where you have a tool that + e.g. does globbing internally. Usually you should just put the + generated sources as inputs and Meson will set up all dependencies + automatically. +- `depfile`: a dependency file that the command can write listing + all the additional files this target depends on, for example a C + compiler would list all the header files it included, and a change + in any one of these files triggers a recompilation +- `input`: list of source files. *(since 0.41.0)* the list is flattened. +- `install`: when true, this target is installed during the install step +- `install_dir`: directory to install to +- `install_mode` *(since 0.47.0)*: the file mode and optionally the + owner/uid and group/gid +- `output`: list of output files + +The list of strings passed to the `command` keyword argument accept +the following special string substitutions: + +- `@INPUT@`: the full path to the input passed to `input`. If more than + one input is specified, all of them will be substituted as separate + arguments only if the command uses `'@INPUT@'` as a + standalone-argument. For instance, this would not work: `command : + ['cp', './@INPUT@']`, but this would: `command : ['cp', '@INPUT@']`. +- `@OUTPUT@`: the full path to the output passed to `output`. If more + than one outputs are specified, the behavior is the same as + `@INPUT@`. +- `@INPUT0@` `@INPUT1@` `...`: the full path to the input with the specified array index in `input` +- `@OUTPUT0@` `@OUTPUT1@` `...`: the full path to the output with the specified array index in `output` +- `@OUTDIR@`: the full path to the directory where the output(s) must be written +- `@DEPFILE@`: the full path to the dependency file passed to `depfile` +- `@PLAINNAME@`: the input filename, without a path +- `@BASENAME@`: the input filename, with extension removed +- `@PRIVATE_DIR@` *(since 0.50.1)*: path to a directory where the custom target must store all its intermediate files. + +*(since 0.47.0)* The `depfile` keyword argument also accepts the `@BASENAME@` and `@PLAINNAME@` substitutions. + +The returned object also has methods that are documented in the +[object methods section](#custom-target-object) below. + +### declare_dependency() + +``` meson + dependency_object declare_dependency(...) +``` + +This function returns a [dependency object](#dependency-object) that +behaves like the return value of [`dependency`](#dependency) but is +internal to the current build. The main use case for this is in +subprojects. This allows a subproject to easily specify how it should +be used. This makes it interchangeable with the same dependency that +is provided externally by the system. This function has the following +keyword arguments: + +- `compile_args`: compile arguments to use. +- `dependencies`: other dependencies needed to use this dependency. +- `include_directories`: the directories to add to header search path, + must be include_directories objects or *(since 0.50.0)* plain strings +- `link_args`: link arguments to use. +- `link_with`: libraries to link against. +- `link_whole` *(since 0.46.0)*: libraries to link fully, same as [`executable`](#executable). +- `sources`: sources to add to targets (or generated header files + that should be built before sources including them are built) +- `version`: the version of this dependency, such as `1.2.3` +- `variables` *(since 0.54.0)*: a dictionary of arbitrary strings, this is meant to be used + in subprojects where special variables would be provided via cmake or + pkg-config. + +### dependency() + +``` meson + dependency_object dependency(*dependency_name*, ...) +``` + +Finds an external dependency (usually a library installed on your +system) with the given name with `pkg-config` and [with +CMake](Dependencies.md#cmake) if `pkg-config` fails. Additionally, +frameworks (OSX only) and [library-specific fallback detection +logic](Dependencies.md#dependencies-with-custom-lookup-functionality) +are also supported. This function supports the following keyword +arguments: + +- `default_options` *(since 0.37.0)*: an array of default option values + that override those set in the subproject's `meson_options.txt` + (like `default_options` in [`project()`](#project), they only have + effect when Meson is run for the first time, and command line + arguments override any default options in build files) +- `fallback`: specifies a subproject fallback to use in case the + dependency is not found in the system. The value is an array + `['subproj_name', 'subproj_dep']` where the first value is the name + of the subproject and the second is the variable name in that + subproject that contains a dependency object such as the return + value of [`declare_dependency`](#declare_dependency) or + [`dependency()`](#dependency), etc. Note that this means the + fallback dependency may be a not-found dependency, in which + case the value of the `required:` kwarg will be obeyed. + *(since 0.54.0)* `'subproj_dep'` argument can be omitted in the case the + subproject used `meson.override_dependency('dependency_name', subproj_dep)`. + In that case, the `fallback` keyword argument can be a single string instead + of a list of 2 strings. *Since 0.55.0* the `fallback` keyword argument can be + omitted when there is a wrap file or a directory with the same `dependency_name`, + and subproject registered the dependency using + `meson.override_dependency('dependency_name', subproj_dep)`, or when the wrap + file has `dependency_name` in its `[provide]` section. + See [Wrap documentation](Wrap-dependency-system-manual.md#provide-section) + for more details. +- `language` *(since 0.42.0)*: defines what language-specific + dependency to find if it's available for multiple languages. +- `method`: defines the way the dependency is detected, the default is + `auto` but can be overridden to be e.g. `qmake` for Qt development, + and [different dependencies support different values]( + Dependencies.md#dependencies-with-custom-lookup-functionality) + for this (though `auto` will work on all of them) +- `native`: if set to `true`, causes Meson to find the dependency on + the build machine system rather than the host system (i.e. where the + cross compiled binary will run on), usually only needed if you build + a tool to be used during compilation. +- `not_found_message` *(since 0.50.0)*: an optional string that will + be printed as a `message()` if the dependency was not found. +- `required`: when set to false, Meson will proceed with the build + even if the dependency is not found. *(since 0.47.0)* The value of a + [`feature`](Build-options.md#features) option can also be passed. +- `static`: tells the dependency provider to try to get static + libraries instead of dynamic ones (note that this is not supported + by all dependency backends) +- `version` *(since 0.37.0)*: specifies the required version, a string containing a + comparison operator followed by the version string, examples include + `>1.0.0`, `<=2.3.5` or `3.1.4` for exact matching. + You can also specify multiple restrictions by passing a list to this + keyword argument, such as: `['>=3.14.0', '<=4.1.0']`. + These requirements are never met if the version is unknown. +- `include_type` *(since 0.52.0)*: an enum flag, marking how the dependency + flags should be converted. Supported values are `'preserve'`, `'system'` and + `'non-system'`. System dependencies may be handled differently on some + platforms, for instance, using `-isystem` instead of `-I`, where possible. + If `include_type` is set to `'preserve'`, no additional conversion will be + performed. The default value is `'preserve'`. +- other +[library-specific](Dependencies.md#dependencies-with-custom-lookup-functionality) +keywords may also be accepted (e.g. `modules` specifies submodules to use for +dependencies such as Qt5 or Boost. `components` allows the user to manually +add CMake `COMPONENTS` for the `find_package` lookup) +- `disabler` *(since 0.49.0)*: if `true` and the dependency couldn't be found, + returns a [disabler object](#disabler-object) instead of a not-found dependency. + +If dependency_name is `''`, the dependency is always not found. So +with `required: false`, this always returns a dependency object for +which the `found()` method returns `false`, and which can be passed +like any other dependency to the `dependencies:` keyword argument of a +`build_target`. This can be used to implement a dependency which is +sometimes not required e.g. in some branches of a conditional, or with +a `fallback:` kwarg, can be used to declare an optional dependency +that only looks in the specified subproject, and only if that's +allowed by `--wrap-mode`. + +The returned object also has methods that are documented in the +[object methods section](#dependency-object) below. + +### disabler() + +*(since 0.44.0)* + +Returns a [disabler object](#disabler-object). + +### error() + +``` meson + void error(message) +``` + +Print the argument string and halts the build process. + +### environment() + +``` meson + environment_object environment(...) +``` + +*(since 0.35.0)* + +Returns an empty [environment variable object](#environment-object). + +*(since 0.52.0)* Takes an optional dictionary as first argument. If +provided, each key/value pair is added into the `environment_object` +as if `set()` method was called for each of them. + +### executable() + +``` meson + buildtarget executable(*exe_name*, *sources*, ...) +``` + +Creates a new executable. The first argument specifies its name and +the remaining positional arguments define the input files to use. They +can be of the following types: + +- Strings relative to the current source directory +- [`files()`](#files) objects defined in any preceding build file +- The return value of configure-time generators such as [`configure_file()`](#configure_file) +- The return value of build-time generators such as + [`custom_target()`](#custom_target) or + [`generator.process()`](#generator-object) + +These input files can be sources, objects, libraries, or any other +file. Meson will automatically categorize them based on the extension +and use them accordingly. For instance, sources (`.c`, `.cpp`, +`.vala`, `.rs`, etc) will be compiled and objects (`.o`, `.obj`) and +libraries (`.so`, `.dll`, etc) will be linked. + +With the Ninja backend, Meson will create a build-time [order-only +dependency](https://ninja-build.org/manual.html#ref_dependencies) on +all generated input files, including unknown files. This is needed +to bootstrap the generation of the real dependencies in the +[depfile](https://ninja-build.org/manual.html#ref_headers) +generated by your compiler to determine when to rebuild sources. +Ninja relies on this dependency file for all input files, generated +and non-generated. The behavior is similar for other backends. + +Executable supports the following keyword arguments. Note that just +like the positional arguments above, these keyword arguments can also +be passed to [shared and static libraries](#library). + +- `_pch`: precompiled header file to use for the given language +- `_args`: compiler flags to use for the given language; + eg: `cpp_args` for C++ +- `build_by_default` *(since 0.38.0)*: causes, when set to true, to + have this target be built by default. This means it will be built when + `meson compile` is called without any arguments. The default value is + `true` for all built target types. +- `build_rpath`: a string to add to target's rpath definition in the + build dir, but which will be removed on install +- `dependencies`: one or more objects created with + [`dependency`](#dependency) or [`find_library`](#compiler-object) + (for external deps) or [`declare_dependency`](#declare_dependency) + (for deps built by the project) +- `extra_files`: not used for the build itself but are shown as + source files in IDEs that group files by targets (such as Visual + Studio) +- `gui_app`: when set to true flags this target as a GUI application on + platforms where this makes a difference (e.g. Windows). +- `link_args`: flags to use during linking. You can use UNIX-style + flags here for all platforms. +- `link_depends`: strings, files, or custom targets the link step + depends on such as a symbol visibility map. The purpose is to + automatically trigger a re-link (but not a re-compile) of the target + when this file changes. +- `link_language` *(since 0.51.0)* *(broken until 0.55.0)*: makes the linker for this + target be for the specified language. It is generally unnecessary to set + this, as meson will detect the right linker to use in most cases. There are + only two cases where this is needed. One, your main function in an + executable is not in the language meson picked, or second you want to force + a library to use only one ABI. +- `link_whole` *(since 0.40.0)*: links all contents of the given static libraries + whether they are used by not, equivalent to the `-Wl,--whole-archive` argument flag of GCC. + *(since 0.41.0)* If passed a list that list will be flattened. + *(since 0.51.0)* This argument also accepts outputs produced by + custom targets. The user must ensure that the output is a library in + the correct format. +- `link_with`: one or more shared or static libraries (built by this + project) that this target should be linked with. *(since 0.41.0)* If passed a + list this list will be flattened. *(since 0.51.0)* The arguments can also be custom targets. + In this case Meson will assume that merely adding the output file in the linker command + line is sufficient to make linking work. If this is not sufficient, + then the build system writer must write all other steps manually. +- `export_dynamic` *(since 0.45.0)*: when set to true causes the target's symbols to be + dynamically exported, allowing modules built using the + [`shared_module`](#shared_module) function to refer to functions, + variables and other symbols defined in the executable itself. Implies + the `implib` argument. +- `implib` *(since 0.42.0)*: when set to true, an import library is generated for the + executable (the name of the import library is based on *exe_name*). + Alternatively, when set to a string, that gives the base name for + the import library. The import library is used when the returned + build target object appears in `link_with:` elsewhere. Only has any + effect on platforms where that is meaningful (e.g. Windows). Implies + the `export_dynamic` argument. +- `implicit_include_directories` *(since 0.42.0)*: a boolean telling whether Meson + adds the current source and build directories to the include path, + defaults to `true`. +- `include_directories`: one or more objects created with the + `include_directories` function, or *(since 0.50.0)* strings, which + will be transparently expanded to include directory objects +- `install`: when set to true, this executable should be installed, defaults to `false` +- `install_dir`: override install directory for this file. The value is + relative to the `prefix` specified. F.ex, if you want to install + plugins into a subdir, you'd use something like this: `install_dir : + get_option('libdir') / 'projectname-1.0'`. +- `install_mode` *(since 0.47.0)*: specify the file mode in symbolic format + and optionally the owner/uid and group/gid for the installed files. +- `install_rpath`: a string to set the target's rpath to after install + (but *not* before that). On Windows, this argument has no effect. +- `objects`: list of prebuilt object files (usually for third party + products you don't have source to) that should be linked in this + target, **never** use this for object files that you build yourself. +- `name_suffix`: the string that will be used as the extension for the + target by overriding the default. By default on Windows this is + `exe` and on other platforms it is omitted. Set this to `[]`, or omit + the keyword argument for the default behaviour. +- `override_options` *(since 0.40.0)*: takes an array of strings in the same format as + `project`'s `default_options` overriding the values of these options + for this target only. +- `gnu_symbol_visibility` *(since 0.48.0)*: specifies how symbols should be exported, see + e.g [the GCC Wiki](https://gcc.gnu.org/wiki/Visibility) for more + information. This value can either be an empty string or one of + `default`, `internal`, `hidden`, `protected` or `inlineshidden`, which + is the same as `hidden` but also includes things like C++ implicit + constructors as specified in the GCC manual. Ignored on compilers that + do not support GNU visibility arguments. +- `d_import_dirs`: list of directories to look in for string imports used + in the D programming language +- `d_unittest`: when set to true, the D modules are compiled in debug mode +- `d_module_versions`: list of module version identifiers set when compiling D sources +- `d_debug`: list of module debug identifiers set when compiling D sources +- `pie` *(since 0.49.0)*: build a position-independent executable +- `native`: is a boolean controlling whether the target is compiled for the + build or host machines. Defaults to false, building for the host machine. + +The list of `sources`, `objects`, and `dependencies` is always +flattened, which means you can freely nest and add lists while +creating the final list. + +The returned object also has methods that are documented in the +[object methods section](#build-target-object) below. + +### find_library() + +*(since 0.31.0)* **(deprecated)** Use `find_library()` method of +[the compiler object](#compiler-object) as obtained from +`meson.get_compiler(lang)`. + +### find_program() + +``` meson + program find_program(program_name1, program_name2, ...) +``` + +`program_name1` here is a string that can be an executable or script +to be searched for in `PATH`, or a script in the current source +directory. + +*(since 0.37.0)* `program_name2` and later positional arguments are used as fallback +strings to search for. This is meant to be used for cases where the +program may have many alternative names, such as `foo` and +`foo.py`. The function will check for the arguments one by one and the +first one that is found is returned. + +Keyword arguments are the following: + +- `required` By default, `required` is set to `true` and Meson will + abort if no program can be found. If `required` is set to `false`, + Meson continue even if none of the programs can be found. You can + then use the `.found()` method on the [returned object](#external-program-object) to check + whether it was found or not. *(since 0.47.0)* The value of a + [`feature`](Build-options.md#features) option can also be passed to the + `required` keyword argument. + +- `native` *(since 0.43.0)*: defines how this executable should be searched. By default + it is set to `false`, which causes Meson to first look for the + executable in the cross file (when cross building) and if it is not + defined there, then from the system. If set to `true`, the cross + file is ignored and the program is only searched from the system. + +- `disabler` *(since 0.49.0)*: if `true` and the program couldn't be found, return a + [disabler object](#disabler-object) instead of a not-found object. + + +- `version` *(since 0.52.0)*: specifies the required version, see + [`dependency()`](#dependency) for argument format. The version of the program + is determined by running `program_name --version` command. If stdout is empty + it fallbacks to stderr. If the output contains more text than simply a version + number, only the first occurrence of numbers separated by dots is kept. + If the output is more complicated than that, the version checking will have to + be done manually using [`run_command()`](#run_command). + +- `dirs` *(since 0.53.0)*: extra list of absolute paths where to look for program + names. + +Meson will also autodetect scripts with a shebang line and run them +with the executable/interpreter specified in it both on Windows +(because the command invocator will reject the command otherwise) and +Unixes (if the script file does not have the executable bit +set). Hence, you *must not* manually add the interpreter while using +this script as part of a list of commands. + +If you need to check for a program in a non-standard location, you can +just pass an absolute path to `find_program`, e.g. + +```meson +setcap = find_program('setcap', '/usr/sbin/setcap', '/sbin/setcap', required : false) +``` + +It is also possible to pass an array to `find_program` in case you +need to construct the set of paths to search on the fly: + +```meson +setcap = find_program(['setcap', '/usr/sbin/setcap', '/sbin/setcap'], required : false) +``` + +The returned object also has methods that are documented in the +[object methods section](#external-program-object) below. + +### files() + +``` meson + file_array files(list_of_filenames) +``` + +This command takes the strings given to it in arguments and returns +corresponding File objects that you can use as sources for build +targets. The difference is that file objects remember the subdirectory +they were defined in and can be used anywhere in the source tree. As +an example suppose you have source file `foo.cpp` in subdirectory +`bar1` and you would like to use it in a build target that is defined +in `bar2`. To make this happen you first create the object in `bar1` +like this: + +```meson + foofile = files('foo.cpp') +``` + +Then you can use it in `bar2` like this: + +```meson + executable('myprog', 'myprog.cpp', foofile, ...) +``` + +Meson will then do the right thing. + +### generator() + +``` meson + generator_object generator(*executable*, ...) +``` + +See also: [`custom_target`](#custom_target) + +This function creates a [generator object](#generator-object) that can +be used to run custom compilation commands. The only positional +argument is the executable to use. It can either be a self-built +executable or one returned by find_program. Keyword arguments are the +following: + +- `arguments`: a list of template strings that will be the command line + arguments passed to the executable +- `depends` *(since 0.51.0)*: is an array of build targets that must be built before this + generator can be run. This is used if you have a generator that calls + a second executable that is built in this project. +- `depfile`: is a template string pointing to a dependency file that a + generator can write listing all the additional files this target + depends on, for example a C compiler would list all the header files + it included, and a change in any one of these files triggers a + recompilation +- `output`: a template string (or list of template strings) defining + how an output file name is (or multiple output names are) generated + from a single source file name +- `capture` *(since 0.43.0)*: when this argument is set to true, Meson + captures `stdout` of the `executable` and writes it to the target file + specified as `output`. + +The returned object also has methods that are documented in the +[object methods section](#generator-object) below. + +The template strings passed to all the above keyword arguments accept +the following special substitutions: + +- `@PLAINNAME@`: the complete input file name, e.g: `foo.c` becomes `foo.c` (unchanged) +- `@BASENAME@`: the base of the input filename, e.g.: `foo.c.y` becomes `foo.c` (extension is removed) + +Each string passed to the `output` keyword argument *must* be +constructed using one or both of these two substitutions. + +In addition to the above substitutions, the `arguments` keyword +argument also accepts the following: + +- `@OUTPUT@`: the full path to the output file +- `@INPUT@`: the full path to the input file +- `@DEPFILE@`: the full path to the depfile +- `@SOURCE_DIR@`: the full path to the root of the source tree +- `@CURRENT_SOURCE_DIR@`: this is the directory where the currently processed meson.build is located in +- `@BUILD_DIR@`: the full path to the root of the build dir where the output will be placed + +NOTE: Generators should only be used for outputs that will ***only*** +be used as inputs for a [build target](#build_target) or a [custom +target](#custom_target). When you use the processed output of a +generator in multiple targets, the generator will be run multiple +times to create outputs for each target. Each output will be created +in a target-private directory `@BUILD_DIR@`. + +If you want to generate files for general purposes such as for +generating headers to be used by several sources, or data that will be +installed, and so on, use a [`custom_target`](#custom_target) instead. + +### get_option() + +``` meson + value get_option(option_name) +``` + +Obtains the value of the [project build option](Build-options.md) +specified in the positional argument. + +Note that the value returned for built-in options that end in `dir` +such as `bindir` and `libdir` is always a path relative to (and +inside) the `prefix`. + +The only exceptions are: `sysconfdir`, `localstatedir`, and +`sharedstatedir` which will return the value passed during +configuration as-is, which may be absolute, or relative to `prefix`. +[`install_dir` arguments](Installing.md) handles that as expected, but +if you need the absolute path to one of these e.g. to use in a define +etc., you should use `get_option('prefix') / get_option('localstatedir')` + +For options of type `feature` a +[feature option object](#feature-option-object) +is returned instead of a string. +See [`feature` options](Build-options.md#features) +documentation for more details. + +### get_variable() + +``` meson + value get_variable(variable_name, fallback) +``` + +This function can be used to dynamically obtain a variable. `res = +get_variable(varname, fallback)` takes the value of `varname` (which +must be a string) and stores the variable of that name into `res`. If +the variable does not exist, the variable `fallback` is stored to +`res`instead. If a fallback is not specified, then attempting to read +a non-existing variable will cause a fatal error. + +### import() + +``` meson + module_object import(module_name) +``` + +Imports the given extension module. Returns an opaque object that can +be used to call the methods of the module. Here's an example for a +hypothetical `testmod` module. + +```meson + tmod = import('testmod') + tmod.do_something() +``` + +### include_directories() + +``` meson + include_object include_directories(directory_names, ...) +``` + +Returns an opaque object which contains the directories (relative to +the current directory) given in the positional arguments. The result +can then be passed to the `include_directories:` keyword argument when +building executables or libraries. You can use the returned object in +any subdirectory you want, Meson will make the paths work +automatically. + +Note that this function call itself does not add the directories into +the search path, since there is no global search path. For something +like that, see [`add_project_arguments()`](#add_project_arguments). + +See also `implicit_include_directories` parameter of +[executable()](#executable), which adds current source and build +directories to include path. + +Each directory given is converted to two include paths: one that is +relative to the source root and one relative to the build root. + +For example, with the following source tree layout in +`/home/user/project.git`: + +`meson.build`: +```meson +project(...) + +subdir('include') +subdir('src') + +... +``` + +`include/meson.build`: +```meson +inc = include_directories('.') + +... +``` + +`src/meson.build`: +```meson +sources = [...] + +executable('some-tool', sources, + include_directories : inc, + ...) + +... +``` + +If the build tree is `/tmp/build-tree`, the following include paths +will be added to the `executable()` call: `-I/tmp/build-tree/include +-I/home/user/project.git/include`. + +This function has one keyword argument `is_system` which, if set, +flags the specified directories as system directories. This means that +they will be used with the `-isystem` compiler argument rather than +`-I` on compilers that support this flag (in practice everything +except Visual Studio). + +### install_data() + +``` meson + void install_data(list_of_files, ...) +``` + +Installs files from the source tree that are listed as positional +arguments. The following keyword arguments are supported: + +- `install_dir`: the absolute or relative path to the installation + directory. If this is a relative path, it is assumed to be relative + to the prefix. + + If omitted, the directory defaults to `{datadir}/{projectname}` *(since 0.45.0)*. + +- `install_mode`: specify the file mode in symbolic format and + optionally the owner/uid and group/gid for the installed files. For + example: + + `install_mode: 'rw-r--r--'` for just the file mode + + `install_mode: ['rw-r--r--', 'nobody', 'nobody']` for the file mode and the user/group + + `install_mode: ['rw-r-----', 0, 0]` for the file mode and uid/gid + + To leave any of these three as the default, specify `false`. + +- `rename` *(since 0.46.0)*: if specified renames each source file into corresponding + file from `rename` list. Nested paths are allowed and they are + joined with `install_dir`. Length of `rename` list must be equal to + the number of sources. + +See [Installing](Installing.md) for more examples. + +### install_headers() + +``` meson + void install_headers(list_of_headers, ...) +``` + +Installs the specified header files from the source tree into the +system header directory (usually `/{prefix}/include`) during the +install step. This directory can be overridden by specifying it with +the `install_dir` keyword argument. If you just want to install into a +subdirectory of the system header directory, then use the `subdir` +argument. As an example if this has the value `myproj` then the +headers would be installed to `/{prefix}/include/myproj`. + +For example, this will install `common.h` and `kola.h` into +`/{prefix}/include`: + +```meson +install_headers('common.h', 'proj/kola.h') +``` + +This will install `common.h` and `kola.h` into `/{prefix}/include/myproj`: + +```meson +install_headers('common.h', 'proj/kola.h', subdir : 'myproj') +``` + +This will install `common.h` and `kola.h` into `/{prefix}/cust/myproj`: + +```meson +install_headers('common.h', 'proj/kola.h', install_dir : 'cust', subdir : 'myproj') +``` + +Accepts the following keywords: + +- `install_mode` *(since 0.47.0)*: can be used to specify the file mode in symbolic + format and optionally the owner/uid and group/gid for the installed files. + An example value could be `['rwxr-sr-x', 'root', 'root']`. + +### install_man() + +``` meson + void install_man(list_of_manpages, ...) +``` + +Installs the specified man files from the source tree into system's +man directory during the install step. This directory can be +overridden by specifying it with the `install_dir` keyword +argument. + +Accepts the following keywords: + +- `install_mode` *(since 0.47.0)*: can be used to specify the file mode in symbolic + format and optionally the owner/uid and group/gid for the installed files. + An example value could be `['rwxr-sr-x', 'root', 'root']`. + +*(since 0.49.0)* [manpages are no longer compressed implicitly][install_man_49]. + +[install_man_49]: https://mesonbuild.com/Release-notes-for-0-49-0.html#manpages-are-no-longer-compressed-implicitly + +### install_subdir() + +``` meson + void install_subdir(subdir_name, install_dir : ..., exclude_files : ..., exclude_directories : ..., strip_directory : ...) +``` + +Installs the entire given subdirectory and its contents from the +source tree to the location specified by the keyword argument +`install_dir`. + +The following keyword arguments are supported: + +- `exclude_files`: a list of file names that should not be installed. + Names are interpreted as paths relative to the `subdir_name` location. +- `exclude_directories`: a list of directory names that should not be installed. + Names are interpreted as paths relative to the `subdir_name` location. +- `install_dir`: the location to place the installed subdirectory. +- `install_mode` *(since 0.47.0)*: the file mode in symbolic format and optionally + the owner/uid and group/gid for the installed files. +- `strip_directory` *(since 0.45.0)*: install directory contents. `strip_directory=false` by default. + If `strip_directory=true` only the last component of the source path is used. + +For a given directory `foo`: +```text +foo/ + bar/ + file1 + file2 +``` +`install_subdir('foo', install_dir : 'share', strip_directory : false)` creates +```text +share/ + foo/ + bar/ + file1 + file2 +``` + +`install_subdir('foo', install_dir : 'share', strip_directory : true)` creates +```text +share/ + bar/ + file1 + file2 +``` + +`install_subdir('foo/bar', install_dir : 'share', strip_directory : false)` creates +```text +share/ + bar/ + file1 +``` + +`install_subdir('foo/bar', install_dir : 'share', strip_directory : true)` creates +```text +share/ + file1 +``` + +### is_disabler() + +``` meson + bool is_disabler(var) +``` + +*(since 0.52.0)* + +Returns true if a variable is a disabler and false otherwise. + +### is_variable() + +``` meson + bool is_variable(varname) +``` + +Returns true if a variable of the given name exists and false otherwise. + +### jar() + +```meson + jar_object jar(name, list_of_sources, ...) +``` + +Build a jar from the specified Java source files. Keyword arguments +are the same as [`executable`](#executable)'s, with the addition of +`main_class` which specifies the main class to execute when running +the jar with `java -jar file.jar`. + +### join_paths() + +``` meson +string join_paths(string1, string2, ...) +``` + +*(since 0.36.0)* + +Joins the given strings into a file system path segment. For example +`join_paths('foo', 'bar')` results in `foo/bar`. If any one of the +individual segments is an absolute path, all segments before it are +dropped. That means that `join_paths('foo', '/bar')` returns `/bar`. + +**Warning** Don't use `join_paths()` for sources in [`library`](#library) and +[`executable`](#executable), you should use [`files`](#files) instead. + +*(since 0.49.0)* Using the`/` operator on strings is equivalent to calling +`join_paths`. + +```meson +# res1 and res2 will have identical values +res1 = join_paths(foo, bar) +res2 = foo / bar +``` + +### library() + +``` meson + buildtarget library(library_name, list_of_sources, ...) +``` + +Builds a library that is either static, shared or both depending on +the value of `default_library` +user [option](https://mesonbuild.com/Builtin-options.html). +You should use this instead of [`shared_library`](#shared_library), +[`static_library`](#static_library) or +[`both_libraries`](#both_libraries) most of the time. This allows you +to toggle your entire project (including subprojects) from shared to +static with only one option. This option applies to libraries being +built internal to the entire project. For external dependencies, the +default library type preferred is shared. This can be adapted on a per +library basis using the [dependency()](#dependency)) `static` keyword. + +The keyword arguments for this are the same as for +[`executable`](#executable) with the following additions: + +- `name_prefix`: the string that will be used as the prefix for the + target output filename by overriding the default (only used for + libraries). By default this is `lib` on all platforms and compilers, + except for MSVC shared libraries where it is omitted to follow + convention, and Cygwin shared libraries where it is `cyg`. +- `name_suffix`: the string that will be used as the suffix for the + target output filename by overriding the default (see also: + [executable()](#executable)). By default, for shared libraries this + is `dylib` on macOS, `dll` on Windows, and `so` everywhere else. + For static libraries, it is `a` everywhere. By convention MSVC + static libraries use the `lib` suffix, but we use `a` to avoid a + potential name clash with shared libraries which also generate + import libraries with a `lib` suffix. +- `rust_crate_type`: specifies the crate type for Rust + libraries. Defaults to `dylib` for shared libraries and `rlib` for + static libraries. + +`static_library`, `shared_library` and `both_libraries` also accept +these keyword arguments. + +Note: You can set `name_prefix` and `name_suffix` to `[]`, or omit +them for the default behaviour for each platform. + +### message() + +``` meson + void message(text) +``` + +This function prints its argument to stdout. + +*(since 0.54.0)* Can take more more than one argument that will be separated by +space. + +### warning() + +``` meson + void warning(text) +``` + +*(since 0.44.0)* + +This function prints its argument to stdout prefixed with WARNING:. + +*(since 0.54.0)* Can take more more than one argument that will be separated by +space. + +### summary() + +``` meson + void summary(key, value) + void summary(dictionary) +``` + +*(since 0.53.0)* + +This function is used to summarize build configuration at the end of the build +process. This function provides a way for projects (and subprojects) to report +this information in a clear way. + +The content is a series of key/value pairs grouped into sections. If the section +keyword argument is omitted, those key/value pairs are implicitly grouped into a section +with no title. key/value pairs can optionally be grouped into a dictionary, +but keep in mind that dictionaries does not guarantee ordering. `key` must be string, +`value` can only be integer, boolean, string, or a list of those. + +`summary()` can be called multiple times as long as the same section/key +pair doesn't appear twice. All sections will be collected and printed at +the end of the configuration in the same order as they have been called. + +Keyword arguments: +- `section`: title to group a set of key/value pairs. +- `bool_yn`: if set to true, all boolean values will be replaced by green YES + or red NO. +- `list_sep` *(since 0.54.0)*: string used to separate list values (e.g. `', '`). + +Example: +```meson +project('My Project', version : '1.0') +summary({'bindir': get_option('bindir'), + 'libdir': get_option('libdir'), + 'datadir': get_option('datadir'), + }, section: 'Directories') +summary({'Some boolean': false, + 'Another boolean': true, + 'Some string': 'Hello World', + 'A list': ['string', 1, true], + }, section: 'Configuration') +``` + +Output: +``` +My Project 1.0 + + Directories + prefix: /opt/gnome + bindir: bin + libdir: lib/x86_64-linux-gnu + datadir: share + + Configuration + Some boolean: False + Another boolean: True + Some string: Hello World + A list: string + 1 + True +``` + +### project() + +``` meson + void project(project_name, list_of_languages, ...) +``` + +The first argument to this function must be a string defining the name +of this project. It is followed by programming languages that the +project uses. Supported values for languages are `c`, `cpp` (for +`C++`), `cuda`, `d`, `objc`, `objcpp`, `fortran`, `java`, `cs` (for `C#`), +`vala` and `rust`. *(since 0.40.0)* The list of languages +is optional. + +The project name can be any string you want, it's not used for +anything except descriptive purposes. However since it is written to +e.g. the dependency manifest is usually makes sense to have it be the +same as the project tarball or pkg-config name. So for example you +would probably want to use the name _libfoobar_ instead of _The Foobar +Library_. + +Project supports the following keyword arguments. + +- `default_options`: takes an array of strings. The strings are in the + form `key=value` and have the same format as options to + `meson configure`. For example to set the default project type you would + set this: `default_options : ['buildtype=debugoptimized']`. Note + that these settings are only used when running Meson for the first + time. Global options such as `buildtype` can only be specified in + the master project, settings in subprojects are ignored. Project + specific options are used normally even in subprojects. + + +- `license`: takes a string or array of strings describing the + license(s) the code is under. Usually this would be something like + `license : 'GPL2+'`, but if the code has multiple licenses you can + specify them as an array like this: `license : ['proprietary', + 'GPL3']`. Note that the text is informal and is only written to + the dependency manifest. Meson does not do any license validation, + you are responsible for verifying that you abide by all licensing + terms. You can access the value in your Meson build files with + `meson.project_license()`. + +- `meson_version`: takes a string describing which Meson version the + project requires. Usually something like `>=0.28.0`. + +- `subproject_dir`: specifies the top level directory name that holds + Meson subprojects. This is only meant as a compatibility option + for existing code bases that house their embedded source code in a + custom directory. All new projects should not set this but instead + use the default value. It should be noted that this keyword + argument is ignored inside subprojects. There can be only one + subproject dir and it is set in the top level Meson file. + +- `version`: which is a free form string describing the version of + this project. You can access the value in your Meson build files + with `meson.project_version()`. + +### run_command() + +``` meson + runresult run_command(command, list_of_args, ...) +``` + +Runs the command specified in positional arguments. `command` can be +a string, or the output of [`find_program()`](#find_program), +[`files()`](#files) or [`configure_file()`](#configure_file), or [a +compiler object](#compiler-object). + +Returns [an opaque object](#run-result-object) containing the result +of the invocation. The command is run from an *unspecified* directory, +and Meson will set three environment variables `MESON_SOURCE_ROOT`, +`MESON_BUILD_ROOT` and `MESON_SUBDIR` that specify the source +directory, build directory and subdirectory the target was defined in, +respectively. + +This function supports the following keyword arguments: + + - `check` *(since 0.47.0)*: takes a boolean. If `true`, the exit status code of the command will + be checked, and the configuration will fail if it is non-zero. The default is + `false`. + - `env` *(since 0.50.0)*: environment variables to set, such as `['NAME1=value1', + 'NAME2=value2']`, or an [`environment()` + object](#environment-object) which allows more sophisticated + environment juggling. *(since 0.52.0)* A dictionary is also accepted. + +See also [External commands](External-commands.md). + +### run_target + +``` meson +runtarget run_target(target_name, ...) +``` + +This function creates a new top-level target that runs a specified +command with the specified arguments. Like all top-level targets, this +integrates with the selected backend. For instance, you can +run it as `meson compile target_name`. Note that a run target produces no +output as far as Meson is concerned. It is only meant for tasks such +as running a code formatter or flashing an external device's firmware +with a built file. + +The command is run from an *unspecified* directory, and Meson will set +three environment variables `MESON_SOURCE_ROOT`, `MESON_BUILD_ROOT` +and `MESON_SUBDIR` that specify the source directory, build directory +and subdirectory the target was defined in, respectively. + + - `command` is a list containing the command to run and the arguments + to pass to it. Each list item may be a string or a target. For + instance, passing the return value of [`executable()`](#executable) + as the first item will run that executable, or passing a string as + the first item will find that command in `PATH` and run it. +- `depends` is a list of targets that this target depends on but which + are not listed in the command array (because, for example, the + script does file globbing internally) + +### set_variable() + +``` meson + void set_variable(variable_name, value) +``` + +Assigns a value to the given variable name. Calling +`set_variable('foo', bar)` is equivalent to `foo = bar`. + +*(since 0.46.1)* The `value` parameter can be an array type. + +### shared_library() + +``` meson + buildtarget shared_library(library_name, list_of_sources, ...) +``` + +Builds a shared library with the given sources. Positional and keyword +arguments are the same as for [`library`](#library) with the following +extra keyword arguments. + +- `soversion`: a string specifying the soversion of this shared + library, such as `0`. On Linux and Windows this is used to set the + soversion (or equivalent) in the filename. For example, if + `soversion` is `4`, a Windows DLL will be called `foo-4.dll` and one + of the aliases of the Linux shared library would be + `libfoo.so.4`. If this is not specified, the first part of `version` + is used instead (see below). For example, if `version` is `3.6.0` and + `soversion` is not defined, it is set to `3`. +- `version`: a string specifying the version of this shared library, + such as `1.1.0`. On Linux and OS X, this is used to set the shared + library version in the filename, such as `libfoo.so.1.1.0` and + `libfoo.1.1.0.dylib`. If this is not specified, `soversion` is used + instead (see above). +- `darwin_versions` *(since 0.48.0)*: an integer, string, or a list of + versions to use for setting dylib `compatibility version` and + `current version` on macOS. If a list is specified, it must be + either zero, one, or two elements. If only one element is specified + or if it's not a list, the specified value will be used for setting + both compatibility version and current version. If unspecified, the + `soversion` will be used as per the aforementioned rules. +- `vs_module_defs`: a string, a File object, or Custom Target for a + Microsoft module definition file for controlling symbol exports, + etc., on platforms where that is possible (e.g. Windows). + +### shared_module() + +``` meson + buildtarget shared_module(module_name, list_of_sources, ...) +``` + +*(since 0.37.0)* + +Builds a shared module with the given sources. Positional and keyword +arguments are the same as for [`library`](#library). + +This is useful for building modules that will be `dlopen()`ed and +hence may contain undefined symbols that will be provided by the +library that is loading it. + +If you want the shared module to be able to refer to functions and +variables defined in the [`executable`](#executable) it is loaded by, +you will need to set the `export_dynamic` argument of the executable to +`true`. + +Supports the following extra keyword arguments: + +- `vs_module_defs` *(since 0.52.0)*: a string, a File object, or + Custom Target for a Microsoft module definition file for controlling + symbol exports, etc., on platforms where that is possible + (e.g. Windows). + +**Note:** Linking to a shared module is not supported on some +platforms, notably OSX. Consider using a +[`shared_library`](#shared_library) instead, if you need to both +`dlopen()` and link with a library. + +### static_library() + +``` meson + buildtarget static_library(library_name, list_of_sources, ...) +``` + +Builds a static library with the given sources. Positional and keyword +arguments are otherwise the same as for [`library`](#library), but it +has one argument the others don't have: + + - `pic` *(since 0.36.0)*: builds the library as positional + independent code (so it can be linked into a shared library). This + option has no effect on Windows and OS X since it doesn't make + sense on Windows and PIC cannot be disabled on OS X. + +### subdir() + +``` meson + void subdir(dir_name, ...) +``` + +Enters the specified subdirectory and executes the `meson.build` file +in it. Once that is done, it returns and execution continues on the +line following this `subdir()` command. Variables defined in that +`meson.build` file are then available for use in later parts of the +current build file and in all subsequent build files executed with +`subdir()`. + +Note that this means that each `meson.build` file in a source tree can +and must only be executed once. + +This function has one keyword argument. + + - `if_found`: takes one or several dependency objects and will only + recurse in the subdir if they all return `true` when queried with + `.found()` + +### subdir_done() + +``` meson + subdir_done() +``` + +Stops further interpretation of the meson script file from the point of +the invocation. All steps executed up to this point are valid and will +be executed by meson. This means that all targets defined before the call +of `subdir_done` will be build. + +If the current script was called by `subdir` the execution returns to the +calling directory and continues as if the script had reached the end. +If the current script is the top level script meson configures the project +as defined up to this point. + +Example: +```meson +project('example exit', 'cpp') +executable('exe1', 'exe1.cpp') +subdir_done() +executable('exe2', 'exe2.cpp') +``` + +The executable `exe1` will be build, while the executable `exe2` is not +build. + +### subproject() + +``` meson + subproject_object subproject(subproject_name, ...) +``` + +Takes the project specified in the positional argument and brings that +in the current build specification by returning a [subproject +object](#subproject-object). Subprojects must always be placed inside +the `subprojects` directory at the top source directory. So for +example a subproject called `foo` must be located in +`${MESON_SOURCE_ROOT}/subprojects/foo`. Supports the following keyword +arguments: + + - `default_options` *(since 0.37.0)*: an array of default option values + that override those set in the subproject's `meson_options.txt` + (like `default_options` in `project`, they only have effect when + Meson is run for the first time, and command line arguments override + any default options in build files). *(since 0.54.0)*: `default_library` + built-in option can also be overridden. + - `version`: works just like the same as in `dependency`. + It specifies what version the subproject should be, as an example `>=1.0.1` + - `required` *(since 0.48.0)*: By default, `required` is `true` and + Meson will abort if the subproject could not be setup. You can set + this to `false` and then use the `.found()` method on the [returned + object](#subproject-object). You may also pass the value of a + [`feature`](Build-options.md#features) option, same as + [`dependency()`](#dependency). + +Note that you can use the returned [subproject +object](#subproject-object) to access any variable in the +subproject. However, if you want to use a dependency object from +inside a subproject, an easier way is to use the `fallback:` keyword +argument to [`dependency()`](#dependency). + +[See additional documentation](Subprojects.md). + +### test() + +``` meson + void test(name, executable, ...) +``` + +Defines a test to run with the test harness. Takes two positional +arguments, the first is the name of the test and the second is the +executable to run. The executable can be an [executable build target +object](#build-target-object) returned by +[`executable()`](#executable) or an [external program +object](#external-program-object) returned by +[`find_program()`](#find_program). + +*(since 0.55.0)* When cross compiling, if an exe_wrapper is needed and defined +the environment variable `MESON_EXE_WRAPPER` will be set to the string value +of that wrapper (implementation detail: using `mesonlib.join_args`). Test +scripts may use this to run cross built binaries. If your test needs +`MESON_EXE_WRAPPER` in cross build situations it is your responsibility to +return code 77 to tell the harness to report "skip". + +By default, environment variable +[`MALLOC_PERTURB_`](http://man7.org/linux/man-pages/man3/mallopt.3.html) +is automatically set by `meson test` to a random value between 1..255. +This can help find memory leaks on configurations using glibc, +including with non-GCC compilers. However, this can have a performance impact, +and may fail a test due to external libraries whose internals are out of the +user's control. To check if this feature is causing an expected runtime crash, +disable the feature by temporarily setting environment variable +`MALLOC_PERTURB_=0`. While it's preferable to only temporarily disable this +check, if a project requires permanent disabling of this check +in meson.build do like: + +```meson +nomalloc = environment({'MALLOC_PERTURB_': '0'}) + +test(..., env: nomalloc, ...) +``` + +#### test() Keyword arguments + +- `args`: arguments to pass to the executable + +- `env`: environment variables to set, such as `['NAME1=value1', + 'NAME2=value2']`, or an [`environment()` + object](#environment-object) which allows more sophisticated + environment juggling. *(since 0.52.0)* A dictionary is also accepted. + +- `is_parallel`: when false, specifies that no other test must be + running at the same time as this test + +- `should_fail`: when true the test is considered passed if the + executable returns a non-zero return value (i.e. reports an error) + +- `suite`: `'label'` (or list of labels `['label1', 'label2']`) + attached to this test. The suite name is qualified by a (sub)project + name resulting in `(sub)project_name:label`. In the case of a list + of strings, the suite names will be `(sub)project_name:label1`, + `(sub)project_name:label2`, etc. + +- `timeout`: the amount of seconds the test is allowed to run, a test + that exceeds its time limit is always considered failed, defaults to + 30 seconds + +- `workdir`: absolute path that will be used as the working directory + for the test + +- `depends` *(since 0.46.0)*: specifies that this test depends on the specified + target(s), even though it does not take any of them as a command + line argument. This is meant for cases where test finds those + targets internally, e.g. plugins or globbing. Those targets are built + before test is executed even if they have `build_by_default : false`. + +- `protocol` *(since 0.50.0)*: specifies how the test results are parsed and can + be one of `exitcode`, `tap`, or `gtest`. For more information about test + harness protocol read [Unit Tests](Unit-tests.md). The following values are + accepted: + - `exitcode`: the executable's exit code is used by the test harness + to record the outcome of the test). + - `tap`: [Test Anything Protocol](https://www.testanything.org/). + - `gtest` *(since 0.55.0)*: for Google Tests. + +- `priority` *(since 0.52.0)*:specifies the priority of a test. Tests with a + higher priority are *started* before tests with a lower priority. + The starting order of tests with identical priorities is + implementation-defined. The default priority is 0, negative numbers are + permitted. + +Defined tests can be run in a backend-agnostic way by calling +`meson test` inside the build dir, or by using backend-specific +commands, such as `ninja test` or `msbuild RUN_TESTS.vcxproj`. + +### vcs_tag() + +``` meson + customtarget vcs_tag(...) +``` + +This command detects revision control commit information at build time +and places it in the specified output file. This file is guaranteed to +be up to date on every build. Keywords are similar to `custom_target`. + +- `command`: string list with the command to execute, see + [`custom_target`](#custom_target) for details on how this command + must be specified +- `fallback`: version number to use when no revision control + information is present, such as when building from a release tarball + (defaults to `meson.project_version()`) +- `input`: file to modify (e.g. `version.c.in`) (required) +- `output`: file to write the results to (e.g. `version.c`) (required) +- `replace_string`: string in the input file to substitute with the + commit information (defaults to `@VCS_TAG@`) + +Meson will read the contents of `input`, substitute the +`replace_string` with the detected revision number, and write the +result to `output`. This method returns a +[`custom_target`](#custom_target) object that (as usual) should be +used to signal dependencies if other targets use the file outputted +by this. + +For example, if you generate a header with this and want to use that in +a build target, you must add the return value to the sources of that +build target. Without that, Meson will not know the order in which to +build the targets. + +If you desire more specific behavior than what this command provides, +you should use `custom_target`. + +## Built-in objects + +These are built-in objects that are always available. + +### `meson` object + +The `meson` object allows you to introspect various properties of the +system. This object is always mapped in the `meson` variable. It has +the following methods. + +- `add_dist_script(script_name, arg1, arg2, ...)` *(since 0.48.0)*: causes the script + given as argument to run during `dist` operation after the + distribution source has been generated but before it is + archived. Note that this runs the script file that is in the + _staging_ directory, not the one in the source directory. If the + script file can not be found in the staging directory, it is a hard + error. This command can only invoked from the main project, calling + it from a subproject is a hard error. *(since 0.49.0)* Accepts multiple arguments + for the fscript. *(since 0.54.0)* The `MESON_SOURCE_ROOT` and `MESON_BUILD_ROOT` + environment variables are set when dist scripts are run. + *(since 0.55.0)* The output of `configure_file`, `files`, and `find_program` + as well as strings. + +- `add_install_script(script_name, arg1, arg2, ...)`: causes the script + given as an argument to be run during the install step, this script + will have the environment variables `MESON_SOURCE_ROOT`, + `MESON_BUILD_ROOT`, `MESON_INSTALL_PREFIX`, + `MESON_INSTALL_DESTDIR_PREFIX`, and `MESONINTROSPECT` set. + All positional arguments are passed as parameters. + *(since 0.55.0)* The output of `configure_file`, `files`, `find_program`, + `custom_target`, indexes of `custom_target`, `executable`, `library`, and + other built targets as well as strings. + + *(since 0.54.0)* If `meson install` is called with the `--quiet` option, the + environment variable `MESON_INSTALL_QUIET` will be set. + + Meson uses the `DESTDIR` environment variable as set by the + inherited environment to determine the (temporary) installation + location for files. Your install script must be aware of this while + manipulating and installing files. The correct way to handle this is + with the `MESON_INSTALL_DESTDIR_PREFIX` variable which is always set + and contains `DESTDIR` (if set) and `prefix` joined together. This + is useful because both are usually absolute paths and there are + platform-specific edge-cases in joining two absolute paths. + + In case it is needed, `MESON_INSTALL_PREFIX` is also always set and + has the value of the `prefix` option passed to Meson. + + `MESONINTROSPECT` contains the path to the introspect command that + corresponds to the `meson` executable that was used to configure the + build. (This might be a different path then the first executable + found in `PATH`.) It can be used to query build configuration. Note + that the value will contain many parts, f.ex., it may be `python3 + /path/to/meson.py introspect`. The user is responsible for splitting + the string to an array if needed by splitting lexically like a UNIX + shell would. If your script uses Python, `shlex.split()` is the + easiest correct way to do this. + +- `add_postconf_script(script_name, arg1, arg2, ...)`: runs the + executable given as an argument after all project files have been + generated. This script will have the environment variables + `MESON_SOURCE_ROOT` and `MESON_BUILD_ROOT` set. + *(since 0.55.0)* The output of `configure_file`, `files`, and `find_program` + as well as strings. + +- `backend()` *(since 0.37.0)*: returns a string representing the + current backend: `ninja`, `vs2010`, `vs2015`, `vs2017`, `vs2019`, + or `xcode`. + +- `build_root()`: returns a string with the absolute path to the build + root directory. Note: this function will return the build root of + the parent project if called from a subproject, which is usually + not what you want. Try using `current_build_dir()`. + +- `source_root()`: returns a string with the absolute path to the + source root directory. Note: you should use the `files()` function + to refer to files in the root source directory instead of + constructing paths manually with `meson.source_root()`. This + function will return the source root of the parent project if called + from a subproject, which is usually not what you want. Try using + `current_source_dir()`. + +- `current_build_dir()`: returns a string with the absolute path to the + current build directory. + +- `current_source_dir()`: returns a string to the current source + directory. Note: **you do not need to use this function** when + passing files from the current source directory to a function since + that is the default. Also, you can use the `files()` function to + refer to files in the current or any other source directory instead + of constructing paths manually with `meson.current_source_dir()`. + +- `get_compiler(language)`: returns [an object describing a + compiler](#compiler-object), takes one positional argument which is + the language to use. It also accepts one keyword argument, `native` + which when set to true makes Meson return the compiler for the build + machine (the "native" compiler) and when false it returns the host + compiler (the "cross" compiler). If `native` is omitted, Meson + returns the "cross" compiler if we're currently cross-compiling and + the "native" compiler if we're not. + +- `get_cross_property(propname, fallback_value)`: + *Consider `get_external_property()` instead*. Returns the given + property from a cross file, the optional fallback_value is returned + if not cross compiling or the given property is not found. + +- `get_external_property(propname, fallback_value, native: true/false)` + *(since 0.54.0)*: returns the given property from a native or cross file. + The optional fallback_value is returned if the given property is not found. + The optional `native: true` forces retrieving a variable from the + native file, even when cross-compiling. + If `native: false` or not specified, variable is retrieved from the + cross-file if cross-compiling, and from the native-file when not cross-compiling. + +- `can_run_host_binaries()` *(since 0.55.0)*: returns true if the build machine can run + binaries compiled for the host. This returns true unless you are + cross compiling, need a helper to run host binaries, and don't have one. + For example when cross compiling from Linux to Windows, one can use `wine` + as the helper. + +- `has_exe_wrapper()`: *(since 0.55.0)* **(deprecated)**. Use `can_run_host_binaries` instead. + +- `install_dependency_manifest(output_name)`: installs a manifest file + containing a list of all subprojects, their versions and license + files to the file name given as the argument. + +- `is_cross_build()`: returns `true` if the current build is a [cross + build](Cross-compilation.md) and `false` otherwise. + +- `is_subproject()`: returns `true` if the current project is being + built as a subproject of some other project and `false` otherwise. + +- `is_unity()`: returns `true` when doing a [unity + build](Unity-builds.md) (multiple sources are combined before + compilation to reduce build time) and `false` otherwise. + +- `override_find_program(progname, program)` *(since 0.46.0)*: + specifies that whenever `find_program` is used to find a program + named `progname`, Meson should not look it up on the system but + instead return `program`, which may either be the result of + `find_program`, `configure_file` or `executable`. *(since 0.55.0)* If a version + check is passed to `find_program` for a program that has been overridden with + an executable, the current project version is used. + + If `program` is an `executable`, it cannot be used during configure. + +- `override_dependency(name, dep_object)` *(since 0.54.0)*: + specifies that whenever `dependency(name, ...)` is used, Meson should not + look it up on the system but instead return `dep_object`, which may either be + the result of `dependency()` or `declare_dependency()`. It takes optional + `native` keyword arguments. Doing this in a subproject allows the parent + project to retrieve the dependency without having to know the dependency + variable name: `dependency(name, fallback : subproject_name)`. + +- `project_version()`: returns the version string specified in + `project` function call. + +- `project_license()`: returns the array of licenses specified in + `project` function call. + +- `project_name()`: returns the project name specified in the `project` + function call. + +- `version()`: return a string with the version of Meson. + +### `build_machine` object + +Provides information about the build machine — the machine that is +doing the actual compilation. See +[Cross-compilation](Cross-compilation.md). It has the following +methods: + +- `cpu_family()`: returns the CPU family name. [This + table](Reference-tables.md#cpu-families) contains all known CPU + families. These are guaranteed to continue working. + +- `cpu()`: returns a more specific CPU name, such as `i686`, `amd64`, + etc. + +- `system()`: returns the operating system name. [This + table](Reference-tables.md#operating-system-names) Lists all of + the currently known Operating System names, these are guaranteed to + continue working. + +- `endian()`: returns `big` on big-endian systems and `little` on + little-endian systems. + +Currently, these values are populated using +[`platform.system()`](https://docs.python.org/3.4/library/platform.html#platform.system) +and +[`platform.machine()`](https://docs.python.org/3.4/library/platform.html#platform.machine). If +you think the returned values for any of these are incorrect for your +system or CPU, or if your OS is not in the linked table, please file +[a bug report](https://github.com/mesonbuild/meson/issues/new) with +details and we'll look into it. + +### `host_machine` object + +Provides information about the host machine — the machine on which the +compiled binary will run. See +[Cross-compilation](Cross-compilation.md). + +It has the same methods as [`build_machine`](#build_machine-object). + +When not cross-compiling, all the methods return the same values as +`build_machine` (because the build machine is the host machine) + +Note that while cross-compiling, it simply returns the values defined +in the cross-info file. + +### `target_machine` object + +Provides information about the target machine — the machine on which +the compiled binary's output will run. Hence, this object should only +be used while cross-compiling a compiler. See +[Cross-compilation](Cross-compilation.md). + +It has the same methods as [`build_machine`](#build_machine-object). + +When all compilation is 'native', all the methods return the same +values as `build_machine` (because the build machine is the host +machine and the target machine). + +Note that while cross-compiling, it simply returns the values defined +in the cross-info file. If `target_machine` values are not defined in +the cross-info file, `host_machine` values are returned instead. + +### `string` object + +All [strings](Syntax.md#strings) have the following methods. Strings +are immutable, all operations return their results as a new string. + +- `contains(string)`: returns true if string contains the string + specified as the argument. + +- `endswith(string)`: returns true if string ends with the string + specified as the argument. + +- `format()`: formats text, see the [Syntax + manual](Syntax.md#string-formatting) for usage info. + +- `join(list_of_strings)`: the opposite of split, for example + `'.'.join(['a', 'b', 'c']` yields `'a.b.c'`. + +- `split(split_character)`: splits the string at the specified + character (or whitespace if not set) and returns the parts in an + array. + +- `startswith(string)`: returns true if string starts with the string + specified as the argument + +- `strip()`: removes whitespace at the beginning and end of the string. + *(since 0.43.0)* Optionally can take one positional string argument, + and all characters in that string will be stripped. + +- `to_int()`: returns the string converted to an integer (error if string + is not a number). + +- `to_lower()`: creates a lower case version of the string. + +- `to_upper()`: creates an upper case version of the string. + +- `underscorify()`: creates a string where every non-alphabetical + non-number character is replaced with `_`. + +- `version_compare(comparison_string)`: does semantic version + comparison, if `x = '1.2.3'` then `x.version_compare('>1.0.0')` + returns `true`. + +### `Number` object + +[Numbers](Syntax.md#numbers) support these methods: + +- `is_even()`: returns true if the number is even +- `is_odd()`: returns true if the number is odd +- `to_string()`: returns the value of the number as a string. + +### `boolean` object + +A [boolean](Syntax.md#booleans) object has two simple methods: + +- `to_int()`: returns either `1` or `0`. + +- `to_string()`: returns the string `'true'` if the boolean is true or + `'false'` otherwise. You can also pass it two strings as positional + arguments to specify what to return for true/false. For instance, + `bool.to_string('yes', 'no')` will return `yes` if the boolean is + true and `no` if it is false. + +### `array` object + +The following methods are defined for all [arrays](Syntax.md#arrays): + +- `contains(item)`: returns `true` if the array contains the object + given as argument, `false` otherwise + +- `get(index, fallback)`: returns the object at the given index, + negative indices count from the back of the array, indexing out of + bounds returns the `fallback` value *(since 0.38.0)* or, if it is + not specified, causes a fatal error + +- `length()`: the size of the array + +You can also iterate over arrays with the [`foreach` +statement](Syntax.md#foreach-statements). + +### `dictionary` object + +*(since 0.47.0)* + +The following methods are defined for all [dictionaries](Syntax.md#dictionaries): + +- `has_key(key)`: returns `true` if the dictionary contains the key + given as argument, `false` otherwise + +- `get(key, fallback)`: returns the value for the key given as first + argument if it is present in the dictionary, or the optional + fallback value given as the second argument. If a single argument + was given and the key was not found, causes a fatal error + +You can also iterate over dictionaries with the [`foreach` +statement](Syntax.md#foreach-statements). + +*(since 0.48.0)* Dictionaries can be added (e.g. `d1 = d2 + d3` and `d1 += d2`). +Values from the second dictionary overrides values from the first. + +## Returned objects + +These are objects returned by the [functions listed above](#functions). + +### `compiler` object + +This object is returned by +[`meson.get_compiler(lang)`](#meson-object). It represents a compiler +for a given language and allows you to query its properties. It has +the following methods: + +- `alignment(typename)`: returns the alignment of the type specified in + the positional argument, you can specify external dependencies to + use with `dependencies` keyword argument. + +- `cmd_array()`: returns an array containing the command arguments for + the current compiler. + +- `compiles(code)`: returns true if the code fragment given in the + positional argument compiles, you can specify external dependencies + to use with `dependencies` keyword argument, `code` can be either a + string containing source code or a `file` object pointing to the + source code. + +- `compute_int(expr, ...')`: computes the value of the given expression + (as an example `1 + 2`). When cross compiling this is evaluated with + an iterative algorithm, you can specify keyword arguments `low` + (defaults to -1024), `high` (defaults to 1024) and `guess` to + specify max and min values for the search and the value to try + first. + +- `find_library(lib_name, ...)`: tries to find the library specified in + the positional argument. The [result + object](#external-library-object) can be used just like the return + value of `dependency`. If the keyword argument `required` is false, + Meson will proceed even if the library is not found. By default the + library is searched for in the system library directory + (e.g. /usr/lib). This can be overridden with the `dirs` keyword + argument, which can be either a string or a list of strings. + *(since 0.47.0)* The value of a [`feature`](Build-options.md#features) + option can also be passed to the `required` keyword argument. + *(since 0.49.0)* If the keyword argument `disabler` is `true` and the + dependency couldn't be found, return a [disabler object](#disabler-object) + instead of a not-found dependency. *(since 0.50.0)* The `has_headers` keyword + argument can be a list of header files that must be found as well, using + `has_header()` method. All keyword arguments prefixed with `header_` will be + passed down to `has_header()` method with the prefix removed. *(since 0.51.0)* + The `static` keyword (boolean) can be set to `true` to limit the search to + static libraries and `false` for dynamic/shared. + +- `first_supported_argument(list_of_strings)`: given a list of + strings, returns the first argument that passes the `has_argument` + test or an empty array if none pass. + +- `first_supported_link_argument(list_of_strings)` *(since 0.46.0)*: + given a list of strings, returns the first argument that passes the + `has_link_argument` test or an empty array if none pass. + +- `get_define(definename)`: returns the given preprocessor symbol's + value as a string or empty string if it is not defined. + *(since 0.47.0)* This method will concatenate string literals as + the compiler would. E.g. `"a" "b"` will become `"ab"`. + +- `get_id()`: returns a string identifying the compiler. For example, + `gcc`, `msvc`, [and more](Reference-tables.md#compiler-ids). + +- `get_argument_syntax()` *(since 0.49.0)*: returns a string identifying the type + of arguments the compiler takes. Can be one of `gcc`, `msvc`, or an undefined + string value. This method is useful for identifying compilers that are not + gcc or msvc, but use the same argument syntax as one of those two compilers + such as clang or icc, especially when they use different syntax on different + operating systems. + +- `get_linker_id()` *(since 0.53.0)*: returns a string identifying the linker. + For example, `ld.bfd`, `link`, [and more](Reference-tables.md#linker-ids). + +- `get_supported_arguments(list_of_string)` *(since 0.43.0)*: returns + an array containing only the arguments supported by the compiler, + as if `has_argument` were called on them individually. + +- `get_supported_link_arguments(list_of_string)` *(since 0.46.0)*: returns + an array containing only the arguments supported by the linker, + as if `has_link_argument` were called on them individually. + +- `has_argument(argument_name)`: returns true if the compiler accepts + the specified command line argument, that is, can compile code + without erroring out or printing a warning about an unknown flag. + +- `has_link_argument(argument_name)` *(since 0.46.0)*: returns true if + the linker accepts the specified command line argument, that is, can + compile and link code without erroring out or printing a warning + about an unknown flag. Link arguments will be passed to the + compiler, so should usually have the `-Wl,` prefix. On VisualStudio + a `/link` argument will be prepended. + +- `has_function(funcname)`: returns true if the given function is + provided by the standard library or a library passed in with the + `args` keyword, you can specify external dependencies to use with + `dependencies` keyword argument. + +- `check_header` *(since 0.47.0)*: returns true if the specified header is *usable* with + the specified prefix, dependencies, and arguments. + You can specify external dependencies to use with `dependencies` + keyword argument and extra code to put above the header test with + the `prefix` keyword. In order to look for headers in a specific + directory you can use `args : '-I/extra/include/dir`, but this + should only be used in exceptional cases for includes that can't be + detected via pkg-config and passed via `dependencies`. *(since 0.50.0)* The + `required` keyword argument can be used to abort if the header cannot be + found. + +- `has_header`: returns true if the specified header *exists*, and is + faster than `check_header()` since it only does a pre-processor check. + You can specify external dependencies to use with `dependencies` + keyword argument and extra code to put above the header test with + the `prefix` keyword. In order to look for headers in a specific + directory you can use `args : '-I/extra/include/dir`, but this + should only be used in exceptional cases for includes that can't be + detected via pkg-config and passed via `dependencies`. *(since 0.50.0)* The + `required` keyword argument can be used to abort if the header cannot be + found. + +- `has_header_symbol(headername, symbolname)`: detects + whether a particular symbol (function, variable, #define, type + definition, etc) is declared in the specified header, you can + specify external dependencies to use with `dependencies` keyword + argument. *(since 0.50.0)* The `required` keyword argument can be + used to abort if the symbol cannot be found. + +- `has_member(typename, membername)`: takes two arguments, type name + and member name and returns true if the type has the specified + member, you can specify external dependencies to use with + `dependencies` keyword argument. + +- `has_members(typename, membername1, membername2, ...)`: takes at + least two arguments, type name and one or more member names, returns + true if the type has all the specified members, you can specify + external dependencies to use with `dependencies` keyword argument. + +- `has_multi_arguments(arg1, arg2, arg3, ...)` *(since 0.37.0)*: the same as + `has_argument` but takes multiple arguments and uses them all in a + single compiler invocation. + +- `has_multi_link_arguments(arg1, arg2, arg3, ...)` *(since 0.46.0)*: + the same as `has_link_argument` but takes multiple arguments and + uses them all in a single compiler invocation. + +- `has_type(typename)`: returns true if the specified token is a type, + you can specify external dependencies to use with `dependencies` + keyword argument. + +- `links(code)`: returns true if the code fragment given in the + positional argument compiles and links, you can specify external + dependencies to use with `dependencies` keyword argument, `code` can + be either a string containing source code or a `file` object + pointing to the source code. + +- `run(code)`: attempts to compile and execute the given code fragment, + returns a run result object, you can specify external dependencies + to use with `dependencies` keyword argument, `code` can be either a + string containing source code or a `file` object pointing to the + source code. + +- `symbols_have_underscore_prefix()` *(since 0.37.0)*: returns `true` + if the C symbol mangling is one underscore (`_`) prefixed to the symbol. + +- `sizeof(typename, ...)`: returns the size of the given type + (e.g. `'int'`) or -1 if the type is unknown, to add includes set + them in the `prefix` keyword argument, you can specify external + dependencies to use with `dependencies` keyword argument. + +- `version()`: returns the compiler's version number as a string. + +- `has_function_attribute(name)` *(since 0.48.0)*: returns `true` if the + compiler supports the GNU style (`__attribute__(...)`) `name`. This is + preferable to manual compile checks as it may be optimized for compilers that + do not support such attributes. + [This table](Reference-tables.md#gcc-__attribute__) lists all of the + supported attributes. + +- `get_supported_function_attributes(list_of_names)` *(since 0.48.0)*: + returns an array containing any names that are supported GCC style + attributes. Equivalent to `has_function_attribute` was called on each of them + individually. + +The following keyword arguments can be used: + +- `args`: used to pass a list of compiler arguments that are + required to find the header or symbol. For example, you might need + to pass the include path `-Isome/path/to/header` if a header is not + in the default include path. *(since 0.38.0)* you should use the + `include_directories` keyword described below. You may also want to + pass a library name `-lfoo` for `has_function` to check for a function. + Supported by all methods except `get_id`, `version`, and `find_library`. + +- `include_directories` *(since 0.38.0)*: specifies extra directories for + header searches. + +- `name`: the name to use for printing a message about the compiler + check. Supported by the methods `compiles()`, `links()`, and + `run()`. If this keyword argument is not passed to those methods, no + message will be printed about the check. + +- `no_builtin_args`: when set to true, the compiler arguments controlled + by built-in configuration options are not added. + +- `prefix`: adds #includes and other things that are + required for the symbol to be declared. System definitions should be + passed via compiler args (eg: `_GNU_SOURCE` is often required for + some symbols to be exposed on Linux, and it should be passed via + `args` keyword argument, see below). Supported by the methods + `sizeof`, `has_type`, `has_function`, `has_member`, `has_members`, + `check_header`, `has_header`, `has_header_symbol`. + +**Note:** These compiler checks do not use compiler arguments added with +`add_*_arguments()`, via `-Dlang_args` on the command-line, or through +`CFLAGS`/`LDFLAGS`, etc in the environment. Hence, you can trust that +the tests will be fully self-contained, and won't fail because of custom +flags added by other parts of the build file or by users. + +Note that if you have a single prefix with all your dependencies, you +might find it easier to append to the environment variables +`C_INCLUDE_PATH` with GCC/Clang and `INCLUDE` with MSVC to expand the +default include path, and `LIBRARY_PATH` with GCC/Clang and `LIB` with +MSVC to expand the default library search path. + +However, with GCC, these variables will be ignored when +cross-compiling. In that case you need to use a specs file. See: + + +### `build target` object + +A build target is either an [executable](#executable), +[shared library](#shared_library), [static library](#static_library), +[both shared and static library](#both_libraries) or +[shared module](#shared_module). + +- `extract_all_objects()`: is same as `extract_objects` but returns all + object files generated by this target. *(since 0.46.0)* keyword argument + `recursive` must be set to `true` to also return objects passed to + the `object` argument of this target. By default only objects built + for this target are returned to maintain backward compatibility with + previous versions. The default will eventually be changed to `true` + in a future version. + +- `extract_objects(source1, source2, ...)`: takes as its arguments + a number of source files as [`string`](#string-object) or + [`files()`](#files) and returns an opaque value representing the + object files generated for those source files. This is typically used + to take single object files and link them to unit tests or to compile + some source files with custom flags. To use the object file(s) + in another build target, use the `objects:` keyword argument. + +- `full_path()`: returns a full path pointing to the result target file. + NOTE: In most cases using the object itself will do the same job as + this and will also allow Meson to setup inter-target dependencies + correctly. Please file a bug if that doesn't work for you. + +- `private_dir_include()`: returns a opaque value that works like + `include_directories` but points to the private directory of this + target, usually only needed if an another target needs to access + some generated internal headers of this target + +- `name()` *(since 0.54.0)*: returns the target name. + + +### `configuration` data object + +This object is returned by +[`configuration_data()`](#configuration_data) and encapsulates +configuration values to be used for generating configuration files. A +more in-depth description can be found in the [the configuration wiki +page](Configuration.md) It has three methods: + +- `get(varname, default_value)`: returns the value of `varname`, if the + value has not been set returns `default_value` if it is defined + *(since 0.38.0)* and errors out if not + +- `get_unquoted(varname, default_value)` *(since 0.44.0)*: returns the value + of `varname` but without surrounding double quotes (`"`). If the value has + not been set returns `default_value` if it is defined and errors out if not. + +- `has(varname)`: returns `true` if the specified variable is set + +- `merge_from(other)` *(since 0.42.0)*: takes as argument a different + configuration data object and copies all entries from that object to + the current. + +- `set(varname, value)`, sets a variable to a given value + +- `set10(varname, boolean_value)` is the same as above but the value + is either `true` or `false` and will be written as 1 or 0, + respectively + +- `set_quoted(varname, value)` is same as `set` but quotes the value + in double quotes (`"`) + +They all take the `description` keyword that will be written in the +result file. The replacement assumes a file with C syntax. If your +generated file is source code in some other language, you probably +don't want to add a description field because it most likely will +cause a syntax error. + +### `custom target` object + +This object is returned by [`custom_target`](#custom_target) and +contains a target with the following methods: + +- `full_path()`: returns a full path pointing to the result target file + NOTE: In most cases using the object itself will do the same job as + this and will also allow Meson to setup inter-target dependencies + correctly. Please file a bug if that doesn't work for you. + *(since 0.54.0)* It can be also called on indexes objects: + `custom_targets[i].full_path()`. + +- `[index]`: returns an opaque object that references this target, and + can be used as a source in other targets. When it is used as such it + will make that target depend on this custom target, but the only + source added will be the one that corresponds to the index of the + custom target's output argument. + +- `to_list()` *(since 0.54.0)*: returns a list of opaque objects that references + this target, and can be used as a source in other targets. This can be used to + iterate outputs with `foreach` loop. + +### `dependency` object + +This object is returned by [`dependency()`](#dependency) and contains +an external dependency with the following methods: + + - `found()`: returns whether the dependency was found. + + - `name()` *(since 0.48.0)*: returns the name of the dependency that was + searched. Returns `internal` for dependencies created with + `declare_dependency()`. + + - `get_pkgconfig_variable(varname)` *(since 0.36.0)*: gets the + pkg-config variable specified, or, if invoked on a non pkg-config + dependency, error out. *(since 0.44.0)* You can also redefine a + variable by passing a list to the `define_variable` parameter + that can affect the retrieved variable: `['prefix', '/'])`. + *(since 0.45.0)* A warning is issued if the variable is not defined, + unless a `default` parameter is specified. + + - `get_configtool_variable(varname)` *(since 0.44.0)*: gets the + command line argument from the config tool (with `--` prepended), or, + if invoked on a non config-tool dependency, error out. + + - `type_name()`: returns a string describing the type of the + dependency, the most common values are `internal` for deps created + with `declare_dependency()` and `pkgconfig` for system dependencies + obtained with Pkg-config. + + - `version()`: the version number as a string, for example `1.2.8`. + `unknown` if the dependency provider doesn't support determining the + version. + + - `include_type()`: returns whether the value set by the `include_type` kwarg + + - `as_system(value)`: returns a copy of the dependency object, which has changed + the value of `include_type` to `value`. The `value` argument is optional and + defaults to `'preserve'`. + + - `partial_dependency(compile_args : false, link_args : false, links + : false, includes : false, sources : false)` *(since 0.46.0)*: returns + a new dependency object with the same name, version, found status, + type name, and methods as the object that called it. This new + object will only inherit other attributes from its parent as + controlled by keyword arguments. + + If the parent has any dependencies, those will be applied to the new + partial dependency with the same rules. So, given: + + ```meson + dep1 = declare_dependency(compile_args : '-Werror=foo', link_with : 'libfoo') + dep2 = declare_dependency(compile_args : '-Werror=bar', dependencies : dep1) + dep3 = dep2.partial_dependency(compile_args : true) + ``` + + dep3 will add `['-Werror=foo', '-Werror=bar']` to the compiler args + of any target it is added to, but libfoo will not be added to the + link_args. + + *Note*: A bug present until 0.50.1 results in the above behavior + not working correctly. + + The following arguments will add the following attributes: + + - compile_args: any arguments passed to the compiler + - link_args: any arguments passed to the linker + - links: anything passed via link_with or link_whole + - includes: any include_directories + - sources: any compiled or static sources the dependency has + + - `get_variable(cmake : str, pkgconfig : str, configtool : str, + internal: str, default_value : str, pkgconfig_define : [str, str])` + *(since 0.51.0)*: a generic variable getter method, which replaces the + get_*type*_variable methods. This allows one to get the variable + from a dependency without knowing specifically how that dependency + was found. If default_value is set and the value cannot be gotten + from the object then default_value is returned, if it is not set + then an error is raised. + + *(since 0.54.0)* added `internal` keyword. + +### `disabler` object + +A disabler object is an object that behaves in much the same way as +NaN numbers do in floating point math. That is when used in any +statement (function call, logical op, etc) they will cause the +statement evaluation to immediately short circuit to return a disabler +object. A disabler object has one method: + +- `found()`: always returns `false`. + +### `external program` object + +This object is returned by [`find_program()`](#find_program) and +contains an external (i.e. not built as part of this project) program +and has the following methods: + +- `found()`: returns whether the executable was found. + +- `path()`: *(since 0.55.0)* **(deprecated)** use `full_path()` instead. + Returns a string pointing to the script or executable + **NOTE:** You should not need to use this method. Passing the object + itself should work in all cases. For example: `run_command(obj, arg1, arg2)`. + +- `full_path()` (*since 0.55.0*): which returns a string pointing to the script or + executable **NOTE:** You should not need to use this method. Passing the object + itself should work in all cases. For example: `run_command(obj, arg1, arg2)`. + +### `environment` object + +This object is returned by [`environment()`](#environment) and stores +detailed information about how environment variables should be set +during tests. It should be passed as the `env` keyword argument to +tests and other functions. It has the following methods. + +- `append(varname, value1, value2, ...)`: appends the given values to + the old value of the environment variable, e.g. `env.append('FOO', + 'BAR', 'BAZ', separator : ';')` produces `BOB;BAR;BAZ` if `FOO` had + the value `BOB` and plain `BAR;BAZ` if the value was not defined. If + the separator is not specified explicitly, the default path + separator for the host operating system will be used, i.e. ';' for + Windows and ':' for UNIX/POSIX systems. + +- `prepend(varname, value1, value2, ...)`: same as `append` + except that it writes to the beginning of the variable. + +- `set(varname, value1, value2)`: sets the environment variable + specified in the first argument to the values in the second argument + joined by the separator, e.g. `env.set('FOO', 'BAR'),` sets envvar + `FOO` to value `BAR`. See `append()` above for how separators work. + +**Note:** All these methods overwrite the previously-defined value(s) +if called twice with the same `varname`. + +### `external library` object + +This object is returned by [`find_library()`](#find_library) and +contains an external (i.e. not built as part of this project) +library. This object has the following methods: + +- `found()`: returns whether the library was found. + +- `type_name()` *(since 0.48.0)*: returns a string describing + the type of the dependency, which will be `library` in this case. + +- `partial_dependency(compile_args : false, link_args : false, links + : false, includes : false, source : false)` *(since 0.46.0)*: returns + a new dependency object with the same name, version, found status, + type name, and methods as the object that called it. This new + object will only inherit other attributes from its parent as + controlled by keyword arguments. + +### Feature option object + +*(since 0.47.0)* + +The following methods are defined for all [`feature` options](Build-options.md#features): + +- `enabled()`: returns whether the feature was set to `'enabled'` +- `disabled()`: returns whether the feature was set to `'disabled'` +- `auto()`: returns whether the feature was set to `'auto'` + +### `generator` object + +This object is returned by [`generator()`](#generator) and contains a +generator that is used to transform files from one type to another by +an executable (e.g. `idl` files into source code and headers). + +- `process(list_of_files, ...)`: takes a list of files, causes them to + be processed and returns an object containing the result which can + then, for example, be passed into a build target definition. The + keyword argument `extra_args`, if specified, will be used to replace + an entry `@EXTRA_ARGS@` in the argument list. The keyword argument + `preserve_path_from`, if given, specifies that the output files need + to maintain their directory structure inside the target temporary + directory. The most common value for this is + `meson.current_source_dir()`. With this value when a file called + `subdir/one.input` is processed it generates a file `/subdir/one.out` as opposed to `/one.out`. + +### `subproject` object + +This object is returned by [`subproject()`](#subproject) and is an +opaque object representing it. + +- `found()` *(since 0.48.0)*: returns whether the subproject was + successfully setup + +- `get_variable(name, fallback)`: fetches the specified variable from + inside the subproject. This is useful to, for instance, get a + [declared dependency](#declare_dependency) from the + [subproject](Subprojects.md). + + If the variable does not exist, the variable `fallback` is returned. + If a fallback is not specified, then attempting to read a non-existing + variable will cause a fatal error. + +### `run result` object + +This object encapsulates the result of trying to compile and run a +sample piece of code with [`compiler.run()`](#compiler-object) or +[`run_command()`](#run_command). It has the following methods: + +- `compiled()`: if true, the compilation succeeded, if false it did not + and the other methods return unspecified data. This is only available + for `compiler.run()` results. +- `returncode()`: the return code of executing the compiled binary +- `stderr()`: the standard error produced when the command was run +- `stdout()`: the standard out produced when the command was run diff --git a/meson/docs/markdown/Reference-tables.md b/meson/docs/markdown/Reference-tables.md new file mode 100644 index 000000000..3be129fe1 --- /dev/null +++ b/meson/docs/markdown/Reference-tables.md @@ -0,0 +1,323 @@ +# Reference tables + +## Compiler ids + +These are return values of the `get_id` (Compiler family) and +`get_argument_syntax` (Argument syntax) method in a compiler object. + +| Value | Compiler family | Argument syntax | +| ----- | --------------- | --------------- | +| arm | ARM compiler | | +| armclang | ARMCLANG compiler | | +| c2000 | Texas Instruments C2000 compiler | | +| ccrx | Renesas RX Family C/C++ compiler | | +| clang | The Clang compiler | gcc | +| clang-cl | The Clang compiler (MSVC compatible driver) | msvc | +| dmd | D lang reference compiler | | +| emscripten| Emscripten WASM compiler | | +| flang | Flang Fortran compiler | | +| g95 | The G95 Fortran compiler | | +| gcc | The GNU Compiler Collection | gcc | +| intel | Intel compiler (Linux and Mac) | gcc | +| intel-cl | Intel compiler (Windows) | msvc | +| lcc | Elbrus C/C++/Fortran Compiler | | +| llvm | LLVM-based compiler (Swift, D) | | +| mono | Xamarin C# compiler | | +| msvc | Microsoft Visual Studio | msvc | +| nagfor | The NAG Fortran compiler | | +| open64 | The Open64 Fortran Compiler | | +| pathscale | The Pathscale Fortran compiler | | +| pgi | Portland PGI C/C++/Fortran compilers | | +| rustc | Rust compiler | | +| sun | Sun Fortran compiler | | +| valac | Vala compiler | | +| xc16 | Microchip XC16 C compiler | | + +## Linker ids + +These are return values of the `get_linker_id` method in a compiler object. + +| Value | Linker family | +| ----- | --------------- | +| ld.bfd | The GNU linker | +| ld.gold | The GNU gold linker | +| ld.lld | The LLVM linker, with the GNU interface | +| ld.solaris | Solaris and illumos | +| ld.wasm | emscripten's wasm-ld linker | +| ld64 | Apple ld64 | +| link | MSVC linker | +| lld-link | The LLVM linker, with the MSVC interface | +| xilink | Used with Intel-cl only, MSVC like | +| optlink | optlink (used with DMD) | +| rlink | The Renesas linker, used with CCrx only | +| xc16-ar | The Microchip linker, used with XC16 only | +| ar2000 | The Texas Instruments linker, used with C2000 only | +| armlink | The ARM linker (arm and armclang compilers) | +| pgi | Portland/Nvidia PGI | +| nvlink | Nvidia Linker used with cuda | + +For languages that don't have separate dynamic linkers such as C# and Java, the +`get_linker_id` will return the compiler name. + +## Script environment variables + +| Value | Comment | +| ----- | ------- | +| MESONINTROSPECT | Command to run to run the introspection command, may be of the form `python /path/to/meson introspect`, user is responsible for splitting the path if necessary. | +| MESON_BUILD_ROOT | Absolute path to the build dir | +| MESON_DIST_ROOT | Points to the root of the staging directory, only set when running `dist` scripts | +| MESON_SOURCE_ROOT | Absolute path to the source dir | +| MESON_SUBDIR | Current subdirectory, only set for `run_command` | + +## CPU families + +These are returned by the `cpu_family` method of `build_machine`, +`host_machine` and `target_machine`. For cross compilation they are +set in the cross file. + +| Value | Comment | +| ----- | ------- | +| aarch64 | 64 bit ARM processor | +| alpha | DEC Alpha processor | +| arc | 32 bit ARC processor | +| arm | 32 bit ARM processor | +| avr | Atmel AVR processor | +| c2000 | 32 bit C2000 processor | +| dspic | 16 bit Microchip dsPIC | +| e2k | MCST Elbrus processor | +| ia64 | Itanium processor | +| m68k | Motorola 68000 processor | +| microblaze | MicroBlaze processor | +| mips | 32 bit MIPS processor | +| mips64 | 64 bit MIPS processor | +| parisc | HP PA-RISC processor | +| pic24 | 16 bit Microchip PIC24 | +| ppc | 32 bit PPC processors | +| ppc64 | 64 bit PPC processors | +| riscv32 | 32 bit RISC-V Open ISA | +| riscv64 | 64 bit RISC-V Open ISA | +| rl78 | Renesas RL78 | +| rx | Renesas RX 32 bit MCU | +| s390 | IBM zSystem s390 | +| s390x | IBM zSystem s390x | +| sh4 | SuperH SH-4 | +| sparc | 32 bit SPARC | +| sparc64 | SPARC v9 processor | +| wasm32 | 32 bit Webassembly | +| wasm64 | 64 bit Webassembly | +| x86 | 32 bit x86 processor | +| x86_64 | 64 bit x86 processor | + +Any cpu family not listed in the above list is not guaranteed to +remain stable in future releases. + +Those porting from autotools should note that meson does not add +endianness to the name of the cpu_family. For example, autotools +will call little endian PPC64 "ppc64le", meson will not, you must +also check the `.endian()` value of the machine for this information. + +## Operating system names + +These are provided by the `.system()` method call. + +| Value | Comment | +| ----- | ------- | +| android | By convention only, subject to change | +| cygwin | The Cygwin environment for Windows | +| darwin | Either OSX or iOS | +| dragonfly | DragonFly BSD | +| emscripten | Emscripten's Javascript environment | +| freebsd | FreeBSD and its derivatives | +| gnu | GNU Hurd | +| haiku | | +| linux | | +| netbsd | | +| openbsd | | +| windows | Any version of Windows | +| sunos | illumos and Solaris | + +Any string not listed above is not guaranteed to remain stable in +future releases. + +## Language arguments parameter names + +These are the parameter names for passing language specific arguments to your build target. + +| Language | compiler name | linker name | +| ------------- | ------------- | ----------------- | +| C | c_args | c_link_args | +| C++ | cpp_args | cpp_link_args | +| C# | cs_args | cs_link_args | +| D | d_args | d_link_args | +| Fortran | fortran_args | fortran_link_args | +| Java | java_args | java_link_args | +| Objective C | objc_args | objc_link_args | +| Objective C++ | objcpp_args | objcpp_link_args | +| Rust | rust_args | rust_link_args | +| Vala | vala_args | vala_link_args | + +All these `_*` options are specified per machine. See in [specifying +options per machine](Builtin-options.md#Specifying-options-per-machine) for on +how to do this in cross builds. + +## Compiler and linker flag environment variables + +These environment variables will be used to modify the compiler and +linker flags. + +It is recommended that you **do not use these**. They are provided purely to +for backwards compatibility with other build systems. There are many caveats to +their use, especially when rebuilding the project. It is **highly** recommended +that you use [the command line arguments](#language-arguments-parameter-names) +instead. + +| Name | Comment | +| ----- | ------- | +| CFLAGS | Flags for the C compiler | +| CXXFLAGS | Flags for the C++ compiler | +| OBJCFLAGS | Flags for the Objective C compiler | +| FFLAGS | Flags for the Fortran compiler | +| DFLAGS | Flags for the D compiler | +| VALAFLAGS | Flags for the Vala compiler | +| RUSTFLAGS | Flags for the Rust compiler | +| LDFLAGS | The linker flags, used for all languages | + +N.B. these settings are specified per machine, and so the environment varibles +actually come in pairs. See the [environment variables per +machine](#Environment-variables-per-machine) section for details. + +## Function Attributes + +These are the parameters names that are supported using +`compiler.has_function_attribute()` or +`compiler.get_supported_function_attributes()` + +### GCC `__attribute__` + +These values are supported using the GCC style `__attribute__` annotations, +which are supported by GCC, Clang, and other compilers. + + +| Name | +|--------------------------| +| alias | +| aligned | +| alloc_size | +| always_inline | +| artificial | +| cold | +| const | +| constructor | +| constructor_priority | +| deprecated | +| destructor | +| error | +| externally_visible | +| fallthrough | +| flatten | +| format | +| format_arg | +| force_align_arg_pointer³ | +| gnu_inline | +| hot | +| ifunc | +| malloc | +| noclone | +| noinline | +| nonnull | +| noreturn | +| nothrow | +| optimize | +| packed | +| pure | +| returns_nonnull | +| unused | +| used | +| visibility* | +| visibility:default† | +| visibility:hidden† | +| visibility:internal† | +| visibility:protected† | +| warning | +| warn_unused_result | +| weak | +| weakreaf | + +\* *Changed in 0.52.0* the "visibility" target no longer includes +"protected", which is not present in Apple's clang. + +† *New in 0.52.0* These split visibility attributes are preferred to the plain +"visibility" as they provide narrower checks. + +³ *New in 0.55.0* + +### MSVC __declspec + +These values are supported using the MSVC style `__declspec` annotation, +which are supported by MSVC, GCC, Clang, and other compilers. + +| Name | +|----------------------| +| dllexport | +| dllimport | + + +## Dependency lookup methods + +These are the values that can be passed to `dependency` function's +`method` keyword argument. + +| Name | Comment | +| ----- | ------- | +| auto | Automatic method selection | +| pkg-config | Use Pkg-Config | +| cmake | Look up as a CMake module | +| config-tool | Use a custom dep tool such as `cups-config` | +| system | System provided (e.g. OpenGL) | +| extraframework | A macOS/iOS framework | + + +## Compiler and Linker selection variables + +N.B. these settings are specified per machine, and so the environment varibles +actually come in pairs. See the [environment variables per +machine](#Environment-variables-per-machine) section for details. + +| Language | Compiler | Linker | Note | +|---------------|----------|-----------|---------------------------------------------| +| C | CC | CC_LD | | +| C++ | CXX | CXX_LD | | +| D | DC | DC_LD | Before 0.54 D_LD* | +| Fortran | FC | FC_LD | Before 0.54 F_LD* | +| Objective-C | OBJC | OBJC_LD | | +| Objective-C++ | OBJCXX | OBJCXX_LD | Before 0.54 OBJCPP_LD* | +| Rust | RUSTC | RUSTC_LD | Before 0.54 RUST_LD* | +| Vala | VALAC | | Use CC_LD. Vala transpiles to C | +| C# | CSC | CSC | The linker is the compiler | + +*The old environment variales are still supported, but are deprecated and will +be removed in a future version of meson.* + +## Environment variables per machine + +Since *0.54.0*, Following Autotool and other legacy build systems, environment +variables that affect machine-specific settings come in pairs: for every bare +environment variable `FOO`, there is a suffixed `FOO_FOR_BUILD`, where `FOO` +just affects the host machine configuration, while `FOO_FOR_BUILD` just affects +the build machine configuration. For example: + + - `PKG_CONFIG_PATH_FOR_BUILD` controls the paths pkg-config will search for + just `native: true` dependencies (build machine). + + - `PKG_CONFIG_PATH` controls the paths pkg-config will search for just + `native: false` dependencies (host machine). + +This mirrors the `build.` prefix used for (built-in) meson options, which has +the same meaning. + +This is useful for cross builds. In the native builds, build = host, and the +unsuffixed environment variables alone will suffice. + +Prior to *0.54.0*, there was no `_FOR_BUILD`-suffixed variables, and most +environment variables only effected native machine configurations, though this +wasn't consistent (e.g. `PKG_CONFIG_PATH` still affected cross builds). diff --git a/meson/docs/markdown/Release-notes-for-0.37.0.md b/meson/docs/markdown/Release-notes-for-0.37.0.md new file mode 100644 index 000000000..7e8aa54fe --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.37.0.md @@ -0,0 +1,128 @@ +--- +title: Release 0.37 +short-description: Release notes for 0.37 +... + +# New features + +## Mesontest + +Mesontest is a new testing tool that allows you to run your tests in many different ways. As an example you can run tests multiple times: + + mesontest --repeat=1000 a_test + +or with an arbitrary wrapper executable: + + mesontest --wrap='valgrind --tool=helgrind' a_test + +or under `gdb`, 1000 times in a row. This is handy for tests that fail spuriously, as when the crash happens you are given the full GDB command line: + + mesontest --repeat=1000 --gdb a_test + +## Mesonrewriter + +Mesonrewriter is an experimental tool to manipulate your build definitions programmatically. It is not installed by default yet but those interested can run it from the source repository. + +As an example, here is how you would add a source file to a build target: + + mesonrewriter add --target=program --filename=new_source.c + +## Shared modules + +The new `shared_module` function allows the creation of shared modules, that is, extension modules such as plugins that are meant to be used solely with `dlopen` rather than linking them to targets. + +## Gnome module + +- Detect required programs and print useful errors if missing + +### gtkdoc + +- Allow passing a list of directories to `src_dir` keyword argument +- Add `namespace` keyword argument +- Add `mode` keyword argument +- Fix `gtkdoc-scangobj` finding local libraries + +### compile_resources + +- Add `gresource_bundle` keyword argument to output `.gresource` files +- Add `export` and `install_header` keyword arguments +- Use depfile support available in GLib >= 2.52.0 + +## i18n module + +- Add `merge_file()` function for creating translated files +- Add `preset` keyword argument to included common gettext flags +- Read languages from `LINGUAS` file + +## LLVM IR compilation + +Meson has long had support for compiling assembler (GAS) files. In this release we add support for compiling LLVM IR files in a similar way when building with the Clang compiler. Just add it to the list of files when creating a `library` or `executable` target like any other source file. No special handling is required: + +```meson +executable('some-exe', 'main.c', 'asm-file.S', 'ir-file.ll') +``` + +As always, you can also mix LLVM IR files with C++, C, and Assembly (GAS) sources. + +## ViM indent and syntax files + +We now include filetype, indent, and syntax files for ViM [with the source tree](https://github.com/mesonbuild/meson/tree/master/data/syntax-highlighting/vim). Please file issues (or pull requests!) for enhancements or if you face any problems using them. + +## Push URLs in .wrap files + +[.wrap files](Using-the-WrapDB.md) for subprojects can now include a separate push URL to allow developers to push changes directly from a subproject git checkout. + +## pkg-config dependencies + +Meson now supports multiple version restrictions while searching for pkg-config dependencies. + +```meson +# Just want a lower limit +dependency('zlib', version : '>1.2.1') +# Want both a lower and an upper limit +dependency('opencv', version : ['>=2.3.0', '<=3.1.0']) +# Want to exclude one specific broken version +dependency('foolite', version : ['>=3.12.1', '!=3.13.99']) +``` + +## Overriding more binaries with environment variables + +You can now specify the binary to be used for the following tools by setting the corresponding environment variable + +| Name | Environment variable | +| ---- | -------------------- | +| pkg-config | PKG_CONFIG | +| readelf | READELF | +| nm | NM | + +## Support for `localstatedir` + +Similar to other options such as `bindir` and `datadir`, you can now specify the `localstatedir` for a project by passing `--localstatedir=dir` to `meson` or `-Dlocalstatedir=dir` to `mesonconf` after configuration. You can also access it from inside the `meson.build` file with `get_option('localstatedir')`. + +## New compiler function `symbols_have_underscore_prefix` + +Checks if the compiler prefixes an underscore to C global symbols with the default calling convention. This is useful when linking to compiled assembly code, or other code that does not have its C symbol mangling handled transparently by the compiler. + +```meson +cc = meson.get_compiler('c') +conf = configuration_data() +if cc.symbols_have_underscore_prefix() + conf.set('SYMBOLS_HAVE_UNDERSCORE', true) +endif +``` + +C symbol mangling is platform and architecture dependent, and a helper function is needed to detect it. For example, Windows 32-bit prefixes underscore, but 64-bit does not. Linux does not prefix an underscore but OS X does. + +## Vala + +GLib Resources compiled with [`gnome.compile_resources`](Gnome-module.md#compile_resources) that are added to the sources of a Vala build target will now cause the appropriate `--gresources` flag to be passed to the Vala compiler so you don't need to add that yourself to `vala_args:`. + +## Improvements to install scripts + +You can now pass arguments to install scripts added with [`meson.add_install_script()`](Reference-manual.md#meson-object). All arguments after the script name will be passed to the script. + +The `MESON_INSTALL_DESTDIR_PREFIX` environment variable is now set when install scripts are called. This contains the values of the `DESTDIR` environment variable and the `prefix` option passed to Meson joined together. This is useful because both those are usually absolute paths, and joining absolute paths in a cross-platform way is tricky. [`os.path.join` in Python](https://docs.python.org/3/library/os.path.html#os.path.join) will discard all previous path segments when it encounters an absolute path, and simply concatenating them will not work on Windows where absolute paths begin with the drive letter. + +## More install directories + +Added new options `sbindir` and `infodir` that can be used for installation. diff --git a/meson/docs/markdown/Release-notes-for-0.38.0.md b/meson/docs/markdown/Release-notes-for-0.38.0.md new file mode 100644 index 000000000..cbd8fff36 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.38.0.md @@ -0,0 +1,91 @@ +--- +title: Release 0.38 +short-description: Release notes for 0.38 +... + +## Uninstall target + +Meson allows you to uninstall an install step by invoking the uninstall target. This will remove all files installed as part of install. Note that this does not restore the original files. This also does not undo changes done by custom install scripts (because they can do arbitrary install operations). + +## Support for arbitrary test setups + +Sometimes you need to run unit tests with special settings. For example under Valgrind. Usually this requires extra command line options for the tool. This is supported with the new *test setup* feature. For example to set up a test run with Valgrind, you'd write this in a `meson.build` file: + +```meson +add_test_setup('valgrind', + exe_wrapper : [vg, '--error-exitcode=1', '--leak-check=full'], + timeout_multiplier : 100) +``` + +This tells Meson to run tests with Valgrind using the given options and multiplying the test timeout values by 100. To run this test setup simply issue the following command: + +```console +$ mesontest --setup=valgrind +``` + +## Intel C/C++ compiler support + +As usual, just set `CC=icc CXX=icpc` and Meson will use it as the C/C++ compiler. Currently only Linux is supported. + +## Get values from configuration data objects + +Now it is possible to query values stored in configuration data objects. + +```meson +cdata.set('key', 'value') +cdata.get('key') # returns 'value' +cdata.get('nokey', 'default') # returns 'default' +cdata.get('nokey') # halts with an error +``` + +## Python 3 module support + +Building Python 3 extension modules has always been possible, but it is now even easier: + +```meson +py3_mod = import('python3') +pylib = py3_mod.extension_module('modname', + 'modsource.c', + dependencies : py3_dep) +``` + +## Default options to subprojects + +Projects can specify overriding values for subprojects' `default_options` when invoking a subproject: + +```meson +subproject('foo', default_options : ['optname=overridevalue']) +dependency('some-dep', fallback : ['some_subproject', 'some_dep'], default_options : ['optname=overridevalue']) +``` + +The effect is the same as if the default options were written in the subproject's `project` call. + +## Set targets to be built (or not) by default + +Build targets got a new keyword `build_by_default` which tells whether the target should be built by default when running e.g. `ninja`. Custom targets are not built by default but other targets are. Any target that is tagged as installed or to be built always is also built by default, regardless of the value of this keyword. + +## Add option to mesonconf to wipe cached data. + +Meson caches the results of dependency lookups. Sometimes these may get out of sync with the system state. Mesonconf now has a `--clearcache` option to clear these values so they will be re-searched from the system upon next compile. + +## Can specify file permissions and owner when installing data + +The new `install_mode` keyword argument can be used to specify file permissions and uid/gid of files when doing the install. This allows you to, for example, install suid root scripts. + +## `has_header()` checks are now faster + +When using compilers that implement the [`__has_include()` preprocessor macro](https://clang.llvm.org/docs/LanguageExtensions.html#include-file-checking-macros), the check is now ~40% faster. + +## Array indexing now supports fallback values + +The second argument to the array [`.get()`](Reference-manual.md#array-object) function is now returned if the specified index could not be found +```meson +array = [10, 11, 12, 13] +array.get(0) # this will return `10` +array.get(4) # this will give an error about invalid index +array.get(4, 0) # this will return `0` +``` + +## Silent mode for Mesontest + +The Meson test executor got a new argument `-q` (and `--quiet`) that suppresses all output of successful tests. This makes interactive usage nicer because only errors are printed. diff --git a/meson/docs/markdown/Release-notes-for-0.39.0.md b/meson/docs/markdown/Release-notes-for-0.39.0.md new file mode 100644 index 000000000..705bb800a --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.39.0.md @@ -0,0 +1,14 @@ +--- +title: Release 0.39 +short-description: Release notes for 0.39 +... + +The 0.39.0 release turned out to consist almost entirely of bug fixes and minor polishes. + +# New features + +## Extra arguments for tests + +The Meson test executable allows specifying extra command line arguments to pass to test executables. + + mesontest --test-args=--more-debug-info currenttest diff --git a/meson/docs/markdown/Release-notes-for-0.40.0.md b/meson/docs/markdown/Release-notes-for-0.40.0.md new file mode 100644 index 000000000..a480788a8 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.40.0.md @@ -0,0 +1,106 @@ +--- +title: Release 0.40 +short-description: Release notes for 0.40 +... + +# New features + +## Outputs of generators can be used in custom targets in the VS backend + +This has been possible with the Ninja backend for a long time but now the Visual Studio backend works too. + +## `compute_int` method in the compiler objects + +This method can be used to evaluate the value of an expression. As an example: + +```meson +cc = meson.get_compiler('c') +two = cc.compute_int('1 + 1') # A very slow way of adding two numbers. +``` + +## Visual Studio 2017 support + +There is now a VS2017 backend (`--backend=vs2017`) as well as a generic VS backend (`--backend=vs`) that autodetects the currently active VS version. + +## Automatic initialization of subprojects that are git submodules + +If you have a directory inside your subprojects directory (usually `subprojects/`) that is a git submodule, meson will automatically initialize it if your build files refer to it. This will be done without needing a wrap file since git contains all the information needed. + +## No download mode for wraps + +Added a new option `wrap-mode` that can be toggled to prevent Meson from downloading dependency projects. Attempting to do so will cause an error. This is useful for distro packagers and other cases where you must explicitly enforce that nothing is downloaded from the net during configuration or build. + +## Overriding options per target + +Build targets got a new keyword argument `override_options` that can be used to override system options. As an example if you have a target that you know can't be built with `-Werror` enabled you can override the value of the option like this: + +```meson +executable('foo', 'foo.c', override_options : ['werror=false']) +``` + +Note that this does not affect project options, only those options that come from Meson (language standards, unity builds etc). + +## Compiler object get define + +Compiler objects got a new method `get_define()` that returns the given preprocessor symbol as a string. + +```meson +cc = meson.get_compiler('c') +one = cc.get_define('__linux__') # returns '1' on Linux hosts +``` + +## Cygwin support + +Meson now works under Cygwin and we have added it to our CI test matrix. + +## Multiple install directories + +Custom targets can produce many output files. Previously it was only possible to install all of them in the same directory, but now you can install each output in its own directory like this: + +```meson +custom_target('two_out', + output : ['diff.h', 'diff.sh'], + command : [creator, '@OUTDIR@'], + install : true, + install_dir : ['dir1', 'dir2']) +``` + +For backwards compatibility and for conciseness, if you only specify one directory all outputs will be installed into it. + +The same feature is also available for Vala build targets. For instance, to install a shared library built by valac, the generated header, and the generated VAPI (respectively) into the default locations, you can do: + +```meson +shared_library('valalib', 'mylib.vala', + install : true, + install_dir : [true, true, true]) +``` + +To install any of the three in a custom directory, just pass it as a string instead of `true`. To not install it, pass `false`. You can also pass a single string (as before) and it will cause only the library to be installed, so this is a backwards-compatible change. + +## Can specify method of obtaining dependencies + +Some dependencies have many ways of being provided. As an example Qt can either be detected via `pkg-config` or `qmake`. Until now Meson has had a heuristic for selecting which method to choose but sometimes it does the wrong thing. This can now be overridden with the `method` keyword like this: + +```meson +qt5_dep = dependency('qt5', modules : 'core', method : 'qmake') +``` + +## Link whole contents of static libraries + +The default behavior of static libraries is to discard all symbols that are not not directly referenced. This may lead to exported symbols being lost. Most compilers support "whole archive" linking that includes all symbols and code of a given static library. This is exposed with the `link_whole` keyword. + +```meson +shared_library('foo', 'foo.c', link_whole : some_static_library) +``` + +Note that Visual Studio compilers only support this functionality on VS 2015 and newer. + +## Unity builds only for subprojects + +Up until now unity builds were either done for every target or none of them. Now unity builds can be specified to be enabled only for subprojects, which change seldom, and not for the master project, which changes a lot. This is enabled by setting the `unity` option to `subprojects`. + +## Running `mesonintrospect` from scripts + +Meson now sets the `MESONINTROSPECT` environment variable in addition to `MESON_SOURCE_ROOT` and other variables when running scripts. It is guaranteed to point to the correct `mesonintrospect` script, which is important when running meson uninstalled from git or when your `PATH`s may not be set up correctly. + +Specifically, the following meson functions will set it: `meson.add_install_script()`, `meson.add_postconf_script()`, `run_command()`, `run_target()`. diff --git a/meson/docs/markdown/Release-notes-for-0.41.0.md b/meson/docs/markdown/Release-notes-for-0.41.0.md new file mode 100644 index 000000000..7861aad7d --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.41.0.md @@ -0,0 +1,84 @@ +--- +title: Release 0.41 +short-description: Release notes for 0.41 +... + +# New features + +## Dependency Handler for LLVM + +Native support for linking against LLVM using the `dependency` function. + +## vcs_tag keyword fallback is now optional + +The `fallback` keyword in `vcs_tag()` is now optional. If not given, its value +defaults to the return value of `meson.project_version()`. + +## Better quoting of special characters in ninja command invocations + +The ninja backend now quotes special characters that may be interpreted by +ninja itself, providing better interoperability with custom commands. This +support may not be perfect; please report any issues found with special +characters to the issue tracker. + +## Pkgconfig support for custom variables + +The Pkgconfig module object can add arbitrary variables to the generated .pc +file with the new `variables` keyword: +```meson +pkg.generate(libraries : libs, + subdirs : h, + version : '1.0', + name : 'libsimple', + filebase : 'simple', + description : 'A simple demo library.', + variables : ['datadir=${prefix}/data']) +``` + +## A target for creating tarballs + +Creating distribution tarballs is simple: + + ninja dist + +This will create a `.tar.xz` archive of the source code including +submodules without any revision control information. This command also +verifies that the resulting archive can be built, tested and +installed. This is roughly equivalent to the `distcheck` target in +other build systems. Currently this only works for projects using Git +and only with the Ninja backend. + +## Support for passing arguments to Rust compiler + +Targets for building rust now take a `rust_args` keyword. + +## Code coverage export for tests + +Code coverage can be generated for tests by passing the `--cov` argument to +the `run_tests.py` test runner. Note, since multiple processes are used, +coverage must be combined before producing a report (`coverage3 combine`). + +## Reproducible builds + +All known issues have been fixed and Meson can now build reproducible Debian +packages out of the box. + +## Extended template substitution in configure_file + +The output argument of `configure_file()` is parsed for @BASENAME@ and +@PLAINNAME@ substitutions. + +## Cross-config property for overriding whether an exe wrapper is needed + +The new `needs_exe_wrapper` property allows overriding auto-detection for +cases where `build_machine` appears to be compatible with `host_machine`, +but actually isn't. For example when: +- `build_machine` is macOS and `host_machine` is the iOS Simulator +- the `build_machine`'s libc is glibc but the `host_machine` libc is uClibc +- code relies on kernel features not available on the `build_machine` + +## Support for capturing stdout of a command in configure_file + +`configure_file()` now supports a new keyword - `capture`. When this argument +is set to true, Meson captures `stdout` of the `command` and writes it to +the target file specified as `output`. diff --git a/meson/docs/markdown/Release-notes-for-0.42.0.md b/meson/docs/markdown/Release-notes-for-0.42.0.md new file mode 100644 index 000000000..e89f1a117 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.42.0.md @@ -0,0 +1,147 @@ +--- +title: Release 0.42 +short-description: Release notes for 0.42 +... + +# New features + +## Distribution tarballs from Mercurial repositories + +Creating distribution tarballs can now be made out of projects based on +Mercurial. As before, this remains possible only with the Ninja backend. + +## Keyword argument verification + +Meson will now check the keyword arguments used when calling any function +and print a warning if any of the keyword arguments is not known. In the +future this will become a hard error. + +## Add support for Genie to Vala compiler + +The Vala compiler has an alternative syntax, Genie, that uses the `.gs` +file extension. Meson now recognises and uses Genie files. + +## Pkgconfig support for additional cflags + +The Pkgconfig module object can add arbitrary extra cflags to the Cflags +value in the .pc file, using the "extra_cflags" keyword: +```meson +pkg.generate(libraries : libs, + subdirs : h, + version : '1.0', + name : 'libsimple', + filebase : 'simple', + description : 'A simple demo library.', + extra_cflags : '-Dfoo' ) +``` + +## Base options accessible via get_option() + +Base options are now accessible via the get_option() function. +```meson +uses_lto = get_option('b_lto') +``` + +## Allow crate type configuration for Rust compiler + +Rust targets now take an optional `rust_crate_type` keyword, allowing +you to set the crate type of the resulting artifact. Valid crate types +are `dylib` or `cdylib` for shared libraries, and `rlib` or +`staticlib` for static libraries. For more, see +Rust's [linkage reference][rust-linkage]. + +[rust-linkage]: https://doc.rust-lang.org/reference/linkage.html + +## Simultaneous use of Address- and Undefined Behavior Sanitizers + +Both the address- and undefined behavior sanitizers can now be used +simultaneously by passing `-Db_sanitize=address,undefined` to Meson. + +## Unstable SIMD module + +A new experimental module to compile code with many different SIMD +instruction sets and selecting the best one at runtime. This module +is unstable, meaning its API is subject to change in later releases. +It might also be removed altogether. + + +## Import libraries for executables on Windows + +The new keyword `implib` to `executable()` allows generation of an import +library for the executable. + +## Added build_rpath keyword argument + +You can specify `build_rpath : '/foo/bar'` in build targets and the +given path will get added to the target's rpath in the build tree. It +is removed during the install step. + +Meson will print a warning when the user tries to add an rpath linker +flag manually, e.g. via `link_args` to a target. This is not +recommended because having multiple rpath causes them to stomp on each +other. This warning will become a hard error in some future release. + +## Vulkan dependency module + +Vulkan can now be used as native dependency. The dependency module +will detect the VULKAN_SDK environment variable or otherwise try to +receive the vulkan library and header via pkgconfig or from the +system. + +## Limiting the maximum number of linker processes + +With the Ninja backend it is now possible to limit the maximum number of +concurrent linker processes. This is usually only needed for projects +that have many large link steps that cause the system to run out of +memory if they are run in parallel. This limit can be set with the +new `backend_max_links` option. + +## Disable implicit include directories + +By default Meson adds the current source and build directories to the +header search path. On some rare occasions this is not desired. Setting +the `implicit_include_directories` keyword argument to `false` these +directories are not used. + +## Support for MPI dependency + +MPI is now supported as a dependency. Because dependencies are +language-specific, you must specify the requested language with the +`language` keyword, i.e., `dependency('mpi', language='c')` will +request the C MPI headers and libraries. See [the MPI +dependency](Dependencies.md#mpi) for more information. + +## Allow excluding files or directories from `install_subdir` + +The [`install_subdir`](Reference-manual.md#install_subdir) command +accepts the new `exclude_files` and `exclude_directories` keyword +arguments that allow specified files or directories to be excluded +from the installed subdirectory. + +## Make all Meson functionality invokable via the main executable + +Previously Meson had multiple executables such as `mesonintrospect` +and `mesontest`. They are now invokable via the main Meson executable +like this: + + meson configure # equivalent to mesonconf + meson test # equivalent to mesontest + +The old commands are still available but they are deprecated +and will be removed in some future release. + +## Pcap dependency detector + +Meson will automatically obtain dependency information for pcap +using the `pcap-config` tool. It is used like any other dependency: + +```meson +pcap_dep = dependency('pcap', version : '>=1.0') +``` + +## GNOME module mkenums_simple() addition + +Most libraries and applications use the same standard templates for +glib-mkenums. There is now a new `mkenums_simple()` convenience method +that passes those default templates to glib-mkenums and allows some tweaks +such as optional function decorators or leading underscores. diff --git a/meson/docs/markdown/Release-notes-for-0.43.0.md b/meson/docs/markdown/Release-notes-for-0.43.0.md new file mode 100644 index 000000000..0ca66aea2 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.43.0.md @@ -0,0 +1,123 @@ +--- +title: Release 0.43 +short-description: Release notes for 0.43 +... + +## Portability improvements to Boost Dependency + +The Boost dependency has been improved to better detect the various ways to +install boost on multiple platforms. At the same time the `modules` semantics +for the dependency has been changed. Previously it was allowed to specify +header directories as `modules` but it wasn't required. Now, modules are only +used to specify libraries that require linking. + +This is a breaking change and the fix is to remove all modules that aren't +found. + +## Generator learned capture + +Generators can now be configured to capture the standard output. See +`test cases/common/98 gen extra/meson.build` for an example. + +## Can index CustomTarget objects + +The `CustomTarget` object can now be indexed like an array. The resulting +object can be used as a source file for other Targets, this will create a +dependency on the original `CustomTarget`, but will only insert the generated +file corresponding to the index value of the `CustomTarget`'s `output` keyword. + +```meson +c = custom_target( + ... + output : ['out.h', 'out.c'], +) +lib1 = static_library( + 'lib1', + [lib1_sources, c[0]], + ... +) +exec = executable( + 'executable', + c[1], + link_with : lib1, +) +``` + +## Can override executables in the cross file + +The cross file can now be used for overriding the result of +`find_program`. As an example if you want to find the `objdump` +command and have the following definition in your cross file: + +```ini +[binaries] +... +objdump = '/usr/bin/arm-linux-gnueabihf-objdump-6' +``` + +Then issuing the command `find_program('objdump')` will return the +version specified in the cross file. If you need the build machine's +objdump, you can specify the `native` keyword like this: + +```meson +native_objdump = find_program('objdump', native : true) +``` + +## Easier handling of supported compiler arguments + +A common pattern for handling multiple desired compiler arguments, was to +test their presence and add them to an array one-by-one, e.g.: + +```meson +warning_flags_maybe = [ + '-Wsomething', + '-Wanother-thing', + '-Wno-the-other-thing', +] +warning_flags = [] +foreach flag : warning_flags_maybe + if cc.has_argument(flag) + warning_flags += flag + endif +endforeach +cc.add_project_argument(warning_flags) +``` + +A helper has been added for the foreach/has_argument pattern, so you can +now simply do: + +```meson +warning_flags = [ ... ] +flags = cc.get_supported_arguments(warning_flags) +``` + +## Better support for shared libraries in non-system paths + +Meson has support for prebuilt object files and static libraries. +This release adds feature parity to shared libraries that are either +in non-standard system paths or shipped as part of your project. On +systems that support rpath, Meson automatically adds rpath entries +to built targets using manually found external libraries. + +This means that e.g. supporting prebuilt libraries shipped with your +source or provided by subprojects or wrap definitions by writing a +build file like this: + +```meson +project('myprebuiltlibrary', 'c') + +cc = meson.get_compiler('c') +prebuilt = cc.find_library('mylib', dirs : meson.current_source_dir()) +mydep = declare_dependency(include_directories : include_directories('.'), + dependencies : prebuilt) +``` + +Then you can use the dependency object in the same way as any other. + +## wrap-svn + +The [Wrap dependency system](Wrap-dependency-system-manual.md) now +supports [Subversion](https://subversion.apache.org/) (svn). This +support is rudimentary. The repository url has to point to a specific +(sub)directory containing the `meson.build` file (typically +`trunk/`). However, providing a `revision` is supported. diff --git a/meson/docs/markdown/Release-notes-for-0.44.0.md b/meson/docs/markdown/Release-notes-for-0.44.0.md new file mode 100644 index 000000000..d7659407d --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.44.0.md @@ -0,0 +1,153 @@ +--- +title: Release 0.44 +short-description: Release notes for 0.44 +... + +# New features + +## Added warning function + +This function prints its argument to the console prefixed by "WARNING:" in +yellow color. A simple example: + +warning('foo is deprecated, please use bar instead') + + +## Adds support for additional Qt5-Module keyword `moc_extra_arguments` + +When `moc`-ing sources, the `moc` tool does not know about any +preprocessor macros. The generated code might not match the input +files when the linking with the moc input sources happens. + +This amendment allows to specify a a list of additional arguments +passed to the `moc` tool. They are called `moc_extra_arguments`. + + +## Prefix-dependent defaults for sysconfdir, localstatedir and sharedstatedir + +These options now default in a way consistent with +[FHS](http://refspecs.linuxfoundation.org/fhs.shtml) and common usage. + +If prefix is `/usr`, default sysconfdir to `/etc`, localstatedir to `/var` and +sharedstatedir to `/var/lib`. + +If prefix is `/usr/local` (the default), default localstatedir to `/var/local` +and sharedstatedir to `/var/local/lib`. + + +## An array type for user options + +Previously to have an option that took more than one value a string +value would have to be created and split, but validating this was +difficult. A new array type has been added to the meson_options.txt +for this case. It works like a 'combo', but allows more than one +option to be passed. The values can optionally be validated against a +list of valid values. When used on the command line (with -D), values +are passed as a comma separated list. + +```meson +option('array_opt', type : 'array', choices : ['one', 'two', 'three'], value : ['one']) +``` + +These can be overwritten on the command line, + +```meson +meson _build -Darray_opt=two,three +``` + +## LLVM dependency supports both dynamic and static linking + +The LLVM dependency has been improved to consistently use dynamic linking. +Previously recent version (>= 3.9) would link dynamically while older versions +would link statically. + +Now LLVM also accepts the `static` keyword to enable statically linking to LLVM +modules instead of dynamically linking. + + +## Added `if_found` to subdir + +Added a new keyword argument to the `subdir` command. It is given a +list of dependency objects and the function will only recurse in the +subdirectory if they are all found. Typical usage goes like this. + +```meson +d1 = dependency('foo') # This is found +d2 = dependency('bar') # This is not found + +subdir('somedir', if_found : [d1, d2]) +``` + +In this case the subdirectory would not be entered since `d2` could +not be found. + +## `get_unquoted()` method for the `configuration` data object + +New convenience method that allows reusing a variable value +defined quoted. Useful in C for config.h strings for example. + + +## Added disabler object + +A disabler object is a new kind of object that has very specific +semantics. If it is used as part of any other operation such as an +argument to a function call, logical operations etc, it will cause the +operation to not be evaluated. Instead the return value of said +operation will also be the disabler object. + +For example if you have an setup like this: + +```meson +dep = dependency('foo') +lib = shared_library('mylib', 'mylib.c', + dependencies : dep) +exe = executable('mytest', 'mytest.c', + link_with : lib) +test('mytest', exe) +``` + +If you replace the dependency with a disabler object like this: + +```meson +dep = disabler() +lib = shared_library('mylib', 'mylib.c', + dependencies : dep) +exe = executable('mytest', 'mytest.c', + link_with : lib) +test('mytest', exe) +``` + +Then the shared library, executable and unit test are not +created. This is a handy mechanism to cut down on the number of `if` +statements. + + +## Config-Tool based dependencies gained a method to get arbitrary options + +A number of dependencies (CUPS, LLVM, pcap, WxWidgets, GnuStep) use a config +tool instead of pkg-config. As of this version they now have a +`get_configtool_variable` method, which is analogous to the +`get_pkgconfig_variable` for pkg config. + +```meson +dep_llvm = dependency('LLVM') +llvm_inc_dir = dep_llvm.get_configtool_variable('includedir') +``` + +## Embedded Python in Windows MSI packages + +Meson now ships an internal version of Python in the MSI installer packages. +This means that it can run Python scripts that are part of your build +transparently. That is, if you do the following: + +```meson +myprog = find_program('myscript.py') +``` + +Then Meson will run the script with its internal Python version if necessary. + +## Libwmf dependency now supports libwmf-config + +Earlier, `dependency('libwmf')` could only detect the library with pkg-config +files. Now, if pkg-config files are not found, Meson will look for +`libwmf-config` and if it's found, will use that to find the library. diff --git a/meson/docs/markdown/Release-notes-for-0.45.0.md b/meson/docs/markdown/Release-notes-for-0.45.0.md new file mode 100644 index 000000000..2b67f9b6c --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.45.0.md @@ -0,0 +1,201 @@ +--- +title: Release 0.45 +short-description: Release notes for 0.45 +... + +# New features + +## Python minimum version is now 3.5 + +Meson will from this version on require Python version 3.5 or newer. + +## Config-Tool based dependencies can be specified in a cross file + +Tools like LLVM and pcap use a config tool for dependencies, this is a +script or binary that is run to get configuration information (cflags, +ldflags, etc) from. + +These binaries may now be specified in the `binaries` section of a +cross file. + +```ini +[binaries] +cc = ... +llvm-config = '/usr/bin/llvm-config32' +``` + +## Visual Studio C# compiler support + +In addition to the Mono C# compiler we also support Visual Studio's C# +compiler. Currently this is only supported on the Ninja backend. + +## Removed two deprecated features + +The standalone `find_library` function has been a no-op for a long +time. Starting with this version it becomes a hard error. + +There used to be a keywordless version of `run_target` which looked +like this: + +```meson +run_target('targetname', 'command', 'arg1', 'arg2') +``` + +This is now an error. The correct format for this is now: + +```meson +run_target('targetname', + command : ['command', 'arg1', 'arg2']) +``` + +## Experimental FPGA support + +This version adds support for generating, analysing and uploading FPGA +programs using the [IceStorm +toolchain](http://www.clifford.at/icestorm/). This support is +experimental and is currently limited to the `iCE 40` series of FPGA +chips. + +FPGA generation integrates with other parts of Meson seamlessly. As an +example, [here](https://github.com/jpakkane/lm32) is an example +project that compiles a simple firmware into Verilog and combines that +with an lm32 softcore processor. + +## Generator outputs can preserve directory structure + +Normally when generating files with a generator, Meson flattens the +input files so they all go in the same directory. Some code +generators, such as Protocol Buffers, require that the generated files +have the same directory layout as the input files used to generate +them. This can now be achieved like this: + +```meson +g = generator(...) # Compiles protobuf sources +generated = gen.process('com/mesonbuild/one.proto', + 'com/mesonbuild/two.proto', + preserve_path_from : meson.current_source_dir()) +``` + +This would cause the following files to be generated inside the target +private directory: + + com/mesonbuild/one.pb.h + com/mesonbuild/one.pb.cc + com/mesonbuild/two.pb.h + com/mesonbuild/two.pb.cc + +## Hexadecimal string literals + +Hexadecimal integer literals can now be used in build and option files. + +```meson +int_255 = 0xFF +``` + +## b_ndebug : if-release + +The value `if-release` can be given for the `b_ndebug` project option. + +This will make the `NDEBUG` pre-compiler macro to be defined for release +type builds as if the `b_ndebug` project option had had the value `true` +defined for it. + +## `install_data()` defaults to `{datadir}/{projectname}` + +If `install_data()` is not given an `install_dir` keyword argument, the +target directory defaults to `{datadir}/{projectname}` (e.g. +`/usr/share/myproj`). + +## install_subdir() supports strip_directory + +If strip_directory=true install_subdir() installs directory contents +instead of directory itself, stripping basename of the source directory. + +## Integer options + +There is a new integer option type with optional minimum and maximum +values. It can be specified like this in the `meson_options.txt` file: + +```meson +option('integer_option', type : 'integer', min : 0, max : 5, value : 3) +``` + +## New method meson.project_license() + +The `meson` builtin object now has a `project_license()` method that +returns a list of all licenses for the project. + +## Rust cross-compilation + +Cross-compilation is now supported for Rust targets. Like other +cross-compilers, the Rust binary must be specified in your cross +file. It should specify a `--target` (as installed by `rustup target`) +and a custom linker pointing to your C cross-compiler. For example: + +```ini +[binaries] +c = '/usr/bin/arm-linux-gnueabihf-gcc-7' +rust = [ + 'rustc', + '--target', 'arm-unknown-linux-gnueabihf', + '-C', 'linker=/usr/bin/arm-linux-gnueabihf-gcc-7', +] +``` + +## Rust compiler-private library disambiguation + +When building a Rust target with Rust library dependencies, an +`--extern` argument is now specified to avoid ambiguity between the +dependency library, and any crates of the same name in `rustc`'s +private sysroot. + +## Project templates + +Meson ships with predefined project templates. To start a new project from +scratch, simply go to an empty directory and type: + + meson init --name=myproject --type=executable --language=c + +## Improve test setup selection + +Test setups are now identified (also) by the project they belong to +and it is possible to select the used test setup from a specific +project. E.g. to use a test setup `some_setup` from project +`some_project` for all executed tests one can use + + meson test --setup some_project:some_setup + +Should one rather want test setups to be used from the same project as +where the current test itself has been defined, one can use just + + meson test --setup some_setup + +In the latter case every (sub)project must have a test setup `some_setup` +defined in it. + +## Can use custom targets as Windows resource files + +The `compile_resources()` function of the `windows` module can now be used on custom targets as well as regular files. + +## Can promote dependencies with wrap command + +The `promote` command makes it easy to copy nested dependencies to the top level. + + meson wrap promote scommon + +This will search the project tree for a subproject called `scommon` +and copy it to the top level. + +If there are many embedded subprojects with the same name, you have to +specify which one to promote manually like this: + + meson wrap promote subprojects/s1/subprojects/scommon + +## Yielding subproject option to superproject + +Normally project options are specific to the current project. However +sometimes you want to have an option whose value is the same over all +projects. This can be achieved with the new `yield` keyword for +options. When set to `true`, getting the value of this option in +`meson.build` files gets the value from the option with the same name +in the master project (if such an option exists). diff --git a/meson/docs/markdown/Release-notes-for-0.46.0.md b/meson/docs/markdown/Release-notes-for-0.46.0.md new file mode 100644 index 000000000..e3b45e92c --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.46.0.md @@ -0,0 +1,316 @@ +--- +title: Release 0.46 +short-description: Release notes for 0.46 +... + +# New features + +## Allow early return from a script + +Added the function `subdir_done()`. Its invocation exits the current script at +the point of invocation. All previously invoked build targets and commands are +build/executed. All following ones are ignored. If the current script was +invoked via `subdir()` the parent script continues normally. + +## Log output slightly changed + +The format of some human-readable diagnostic messages has changed in +minor ways. In case you are parsing these messages, you may need to +adjust your code. + +## ARM compiler for C and CPP + +Cross-compilation is now supported for ARM targets using ARM compiler - ARMCC. +The current implementation does not support shareable libraries. +The default extension of the output is .axf. +The environment path should be set properly for the ARM compiler executables. +The '--cpu' option with the appropriate target type should be mentioned +in the cross file as shown in the snippet below. + +```ini +[properties] +c_args = ['--cpu=Cortex-M0plus'] +cpp_args = ['--cpu=Cortex-M0plus'] + +``` + +## Building both shared and static libraries + +A new function `both_libraries()` has been added to build both shared and static +libraries at the same time. Source files will be compiled only once and object +files will be reused to build both shared and static libraries, unless +`b_staticpic` user option or `pic:` keyword argument are set to false in which +case sources will be compiled twice. + +The returned `buildtarget` object always represents the shared library. + +## Compiler object can now be passed to run_command() + +This can be used to run the current compiler with the specified arguments +to obtain additional information from it. +One of the use cases is to get the location of development files for the +GCC plugins: + +```meson +cc = meson.get_compiler('c') +result = run_command(cc, '-print-file-name=plugin') +plugin_dev_path = result.stdout().strip() +``` + +## declare_dependency() now supports `link_whole:` + +`declare_dependency()` now supports the `link_whole:` keyword argument which +transparently works for build targets which use that dependency. + +## Old command names are now errors + +The old executable names `mesonintrospect`, `mesonconf`, `mesonrewriter` +and `mesontest` have been deprecated for a long time. Starting from +this version they no longer do anything but instead always error +out. All functionality is available as subcommands in the main `meson` +binary. + +## Meson and meson configure now accept the same arguments + +Previously meson required that builtin arguments (like prefix) be passed as +`--prefix` to `meson` and `-Dprefix` to `meson configure`. `meson` now accepts -D +form like `meson configure` has. `meson configure` also accepts the `--prefix` +form, like `meson` has. + +## Recursively extract objects + +The `recursive:` keyword argument has been added to `extract_all_objects()`. When set +to `true` it will also return objects passed to the `objects:` argument of this +target. By default only objects built for this target are returned to maintain +backward compatibility with previous versions. The default will eventually be +changed to `true` in a future version. + +```meson +lib1 = static_library('a', 'source.c', objects : 'prebuilt.o') +lib2 = static_library('b', objects : lib1.extract_all_objects(recursive : true)) +``` + +## Can override find_program() + +It is now possible to override the result of `find_program` to point +to a custom program you want. The overriding is global and applies to +every subproject from there on. Here is how you would use it. + +In master project + +```meson +subproject('mydep') +``` + +In the called subproject: + +```meson +prog = find_program('my_custom_script') +meson.override_find_program('mycodegen', prog) +``` + +In master project (or, in fact, any subproject): + +```meson +genprog = find_program('mycodegen') +``` + +Now `genprog` points to the custom script. If the dependency had come +from the system, then it would point to the system version. + +You can also use the return value of `configure_file()` to override +a program in the same way as above: + +```meson +prog_script = configure_file(input : 'script.sh.in', + output : 'script.sh', + configuration : cdata) +meson.override_find_program('mycodegen', prog_script) +``` + +## New functions: has_link_argument() and friends + +A new set of methods has been added to [compiler objects](Reference-manual.md#compiler-object) +to test if the linker supports given arguments. + +- `has_link_argument()` +- `has_multi_link_arguments()` +- `get_supported_link_arguments()` +- `first_supported_link_argument()` + +## "meson help" now shows command line help + +Command line parsing is now less surprising. "meson help" is now +equivalent to "meson --help" and "meson help " is +equivalent to "meson --help", instead of creating a build +directory called "help" in these cases. + +## Autogeneration of simple meson.build files + +A feature to generate a meson.build file compiling given C/C++ source +files into a single executable has been added to "meson init". By +default, it will take all recognizable source files in the current +directory. You can also specify a list of dependencies with the -d +flag and automatically invoke a build with the -b flag to check if the +code builds with those dependencies. + +For example, + +```meson +meson init -fbd sdl2,gl +``` + +will look for C or C++ files in the current directory, generate a +meson.build for them with the dependencies of sdl2 and gl and +immediately try to build it, overwriting any previous meson.build and +build directory. + +## install_data() supports `rename:` + +The `rename:` keyword argument is used to change names of the installed +files. Here's how you install and rename the following files: + +- `file1.txt` into `share/myapp/dir1/data.txt` +- `file2.txt` into `share/myapp/dir2/data.txt` + +```meson +install_data(['file1.txt', 'file2.txt'], + rename : ['dir1/data.txt', 'dir2/data.txt'], + install_dir : 'share/myapp') +``` + +## Support for lcc compiler for e2k (Elbrus) architecture + +In this version, a support for lcc compiler for Elbrus processors +based on [e2k microarchitecture](https://en.wikipedia.org/wiki/Elbrus_2000) +has been added. + +Examples of such CPUs: +* [Elbrus-8S](https://en.wikipedia.org/wiki/Elbrus-8S); +* Elbrus-4S; +* [Elbrus-2S+](https://en.wikipedia.org/wiki/Elbrus-2S%2B). + +Such compiler have a similar behavior as gcc (basic option compatibility), +but, in is not strictly compatible with gcc as of current version. + +Major differences as of version 1.21.22: +* it does not support LTO and PCH; +* it suffers from the same dependency file creation error as icc; +* it has minor differences in output, especially version output; +* it differently reacts to lchmod() detection; +* some backend messages are produced in ru_RU.KOI8-R even if LANG=C; +* its preprocessor treats some characters differently. + +So every noted difference is properly handled now in meson. + +## String escape character sequence update + +Single-quoted strings in meson have been taught the same set of escape +sequences as in Python. It is therefore now possible to use arbitrary bytes in +strings, like for example `NUL` (`\0`) and other ASCII control characters. See +the chapter about [*Strings* in *Syntax*](Syntax.md#strings) for more +details. + +Potential backwards compatibility issue: Any valid escape sequence according to +the new rules will be interpreted as an escape sequence instead of the literal +characters. Previously only the following escape sequences were supported in +single-quote strings: `\'`, `\\` and `\n`. + +Note that the behaviour of triple-quoted (multiline) strings has not changed. +They behave like raw strings and do not support any escape sequences. + +## New `forcefallback` wrap mode + +A new wrap mode was added, `--wrap-mode=forcefallback`. When this is set, +dependencies for which a fallback was provided will always use it, even +if an external dependency exists and satisfies the version requirements. + +## Relaxing of target name requirements + +In earlier versions of Meson you could only have one target of a given name for each type. +For example you could not have two executables named `foo`. This requirement is now +relaxed so that you can have multiple targets with the same name, as long as they are in +different subdirectories. + +Note that projects that have multiple targets with the same name can not be built with +the `flat` layout or any backend that writes outputs in the same directory. + +## Addition of OpenMP dependency + +An OpenMP dependency (`openmp`) has been added that encapsulates the various +flags used by compilers to enable OpenMP and checks for the existence of the +`omp.h` header. The `language` keyword may be passed to force the use of a +specific compiler for the checks. + +## Added new partial_dependency method to dependencies and libraries + +It is now possible to use only part of a dependency in a target. This allows, +for example, to only use headers with convenience libraries to avoid linking +to the same library multiple times. + +```meson +dep = dependency('xcb') + +helper = static_library( + 'helper', + ['helper1.c', 'helper2.c'], + dependencies : dep.partial_dependency(includes : true), +] + +final = shared_library( + 'final', + ['final.c'], + dependencyes : dep, +) +``` + +A partial dependency will have the same name version as the full dependency it +is derived from, as well as any values requested. + +## Improved generation of pkg-config files for static only libraries. + +Previously pkg-config files generated by the pkgconfig modules for static libraries +with dependencies could only be used in a dependencies with `static: true`. + +Now the generated file contains the needed dependencies libraries directly within +`Requires` and `Libs` for build static libraries passed via the `libraries` keyword +argument. + +Projects that install both a static and a shared version of a library should use +the result of [`both_libraries()`](Reference-manual.md#both_libraries) to the +pkg-config file generator or use [`configure_file()`](Reference-manual.md#configure_file) +for more complicated setups. + +## Improvements to pkgconfig module + +A `StaticLibrary` or `SharedLibrary` object can optionally be passed +as first positional argument of the `generate()` method. If one is provided a +default value will be provided for all required fields of the pc file: +- `install_dir` is set to `pkgconfig` folder in the same location than the provided library. +- `description` is set to the project's name followed by the library's name. +- `name` is set to the library's name. + +Generating a .pc file is now as simple as: + +```meson +pkgconfig.generate(mylib) +``` + +## pkgconfig.generate() requires parameters non-string arguments + +`pkgconfig.generate()` `requires:` and `requires_private:` keyword arguments +now accept pkgconfig-dependencies and libraries that pkgconfig-files were +generated for. + +## Generic python module + +Meson now has is a revamped and generic (python 2 and 3) version of the python3 +module. With [this new interface](Python-module.md), projects can now fully +specify the version of python they want to build against / install sources to, +and can do so against multiple major or minor versions in parallel. + +## test() now supports the `depends:` keyword argument + +Build targets and custom targets can be listed in the `depends:` keyword argument +of test function. These targets will be built before test is run even if they have +`build_by_default : false`. diff --git a/meson/docs/markdown/Release-notes-for-0.47.0.md b/meson/docs/markdown/Release-notes-for-0.47.0.md new file mode 100644 index 000000000..675942be4 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.47.0.md @@ -0,0 +1,301 @@ +--- +title: Release 0.47 +short-description: Release notes for 0.47 +... + +# New features + +## Allow early return from a script + +Added the function `subdir_done()`. Its invocation exits the current script at +the point of invocation. All previously invoked build targets and commands are +build/executed. All following ones are ignored. If the current script was +invoked via `subdir()` the parent script continues normally. + +## Concatenate string literals returned from `get_define()` + +After obtaining the value of a preprocessor symbol consecutive string literals +are merged into a single string literal. +For example a preprocessor symbol's value `"ab" "cd"` is returned as `"abcd"`. + +## ARM compiler(version 6) for C and CPP + +Cross-compilation is now supported for ARM targets using ARM compiler +version 6 - ARMCLANG. The required ARMCLANG compiler options for +building a shareable library are not included in the current Meson +implementation for ARMCLANG support, so it can not build shareable +libraries. This current Meson implementation for ARMCLANG support can +not build assembly files with arm syntax (we need to use armasm instead +of ARMCLANG for the `.s` files with this syntax) and only supports GNU +syntax. + +The default extension of the executable output is `.axf`. +The environment path should be set properly for the ARM compiler executables. +The `--target`, `-mcpu` options with the appropriate values should be mentioned +in the cross file as shown in the snippet below. + +```ini +[properties] +c_args = ['--target=arm-arm-none-eabi', '-mcpu=cortex-m0plus'] +cpp_args = ['--target=arm-arm-none-eabi', '-mcpu=cortex-m0plus'] +``` + +Note: +- The current changes are tested on Windows only. +- PIC support is not enabled by default for ARM, + if users want to use it, they need to add the required arguments + explicitly from cross-file(`c_args`/`cpp_args`) or some other way. + +## New base build option for LLVM (Apple) bitcode support + +When building with clang on macOS, you can now build your static and shared +binaries with embedded bitcode by enabling the `b_bitcode` [base +option](Builtin-options.md#base-options) by passing `-Db_bitcode=true` to +Meson. + +This is better than passing the options manually in the environment since Meson +will automatically disable conflicting options such as `b_asneeded`, and will +disable bitcode support on targets that don't support it such as +`shared_module()`. + +Since this requires support in the linker, it is currently only enabled when +using Apple ld. In the future it can be extended to clang on other platforms +too. + +## New compiler check: `check_header()` + +The existing compiler check `has_header()` only checks if the header exists, +either with the `__has_include` C++11 builtin, or by running the pre-processor. + +However, sometimes the header you are looking for is unusable on some platforms +or with some compilers in a way that is only detectable at compile-time. For +such cases, you should use `check_header()` which will include the header and +run a full compile. + +Note that `has_header()` is much faster than `check_header()`, so it should be +used whenever possible. + +## New action `copy:` for `configure_file()` + +In addition to the existing actions `configuration:` and `command:`, +[`configure_file()`](Reference-manual.md#configure_file) now accepts a keyword +argument `copy:` which specifies a new action to copy the file specified with +the `input:` keyword argument to a file in the build directory with the name +specified with the `output:` keyword argument. + +These three keyword arguments are, as before, mutually exclusive. You can only +do one action at a time. + +## New keyword argument `encoding:` for `configure_file()` + +Add a new keyword to [`configure_file()`](Reference-manual.md#configure_file) +that allows the developer to specify the input and output file encoding. The +default value is the same as before: UTF-8. + +In the past, Meson would not handle non-UTF-8/ASCII files correctly, and in the +worst case would try to coerce it to UTF-8 and mangle the data. UTF-8 is the +standard encoding now, but sometimes it is necessary to process files that use +a different encoding. + +For additional details see [#3135](https://github.com/mesonbuild/meson/pull/3135). + +## New keyword argument `output_format:` for `configure_file()` + +When called without an input file, `configure_file` generates a +C header file by default. A keyword argument was added to allow +specifying the output format, for example for use with nasm or yasm: + +```meson +conf = configuration_data() +conf.set('FOO', 1) + +configure_file('config.asm', + configuration: conf, + output_format: 'nasm') +``` + +## Substitutions in `custom_target(depfile:)` + +The `depfile` keyword argument to `custom_target` now accepts the `@BASENAME@` +and `@PLAINNAME@` substitutions. + +## Deprecated `build_always:` for custom targets + +Setting `build_always` to `true` for a custom target not only marks the target +to be always considered out of date, but also adds it to the set of default +targets. This option is therefore deprecated and the new option +`build_always_stale` is introduced. + +`build_always_stale` *only* marks the target to be always considered out of +date, but does not add it to the set of default targets. The old behaviour can +be achieved by combining `build_always_stale` with `build_by_default`. + +The documentation has been updated accordingly. + +## New built-in object type: dictionary + +Meson dictionaries use a syntax similar to python's dictionaries, +but have a narrower scope: they are immutable, keys can only +be string literals, and initializing a dictionary with duplicate +keys causes a fatal error. + +Example usage: + +```meson +d = {'foo': 42, 'bar': 'baz'} + +foo = d.get('foo') +foobar = d.get('foobar', 'fallback-value') + +foreach key, value : d + Do something with key and value +endforeach +``` + +## Array options treat `-Dopt=` and `-Dopt=[]` as equivalent + +Prior to this change passing -Dopt= to an array opt would be interpreted as +`['']` (an array with an empty string), now `-Dopt=` is the same as `-Dopt=[]`, an +empty list. + +## Feature detection based on `meson_version:` in `project()` + +Meson will now print a `WARNING:` message during configuration if you use +a function or a keyword argument that was added in a meson version that's newer +than the version specified inside `project()`. For example: + +```meson +project('featurenew', meson_version: '>=0.43') + +cdata = configuration_data() +cdata.set('FOO', 'bar') +message(cdata.get_unquoted('FOO')) +``` + +This will output: + +``` +The Meson build system +Version: 0.47.0.dev1 +Source dir: C:\path\to\srctree +Build dir: C:\path\to\buildtree +Build type: native build +Project name: featurenew +Project version: undefined +Build machine cpu family: x86_64 +Build machine cpu: x86_64 +WARNING: Project targetting '>=0.43' but tried to use feature introduced in '0.44.0': configuration_data.get_unquoted() +Message: bar +Build targets in project: 0 +WARNING: Project specifies a minimum meson_version '>=0.43' which conflicts with: + * 0.44.0: {'configuration_data.get_unquoted()'} +``` + +## New type of build option for features + +A new type of [option called `feature`](Build-options.md#features) can be +defined in `meson_options.txt` for the traditional `enabled / disabled / auto` +tristate. The value of this option can be passed to the `required` keyword +argument of functions `dependency()`, `find_library()`, `find_program()` and +`add_languages()`. + +A new global option `auto_features` has been added to override the value of all +`auto` features. It is intended to be used by packagers to have full control on +which feature must be enabled or disabled. + +## New options to `gnome.gdbus_codegen()` + +You can now pass additional arguments to gdbus-codegen using the `extra_args` +keyword. This is the same for the other gnome function calls. + +Meson now automatically adds autocleanup support to the generated code. This +can be modified by setting the autocleanup keyword. + +For example: + +```meson +sources += gnome.gdbus_codegen('com.mesonbuild.Test', + 'com.mesonbuild.Test.xml', + autocleanup : 'none', + extra_args : ['--pragma-once']) +``` + +## Made 'install' a top level Meson command + +You can now run `meson install` in your build directory and it will do +the install. It has several command line options you can toggle the +behaviour that is not in the default `ninja install` invocation. This +is similar to how `meson test` already works. + +For example, to install only the files that have changed, you can do: + +```console +$ meson install --only-changed +``` + +## `install_mode:` keyword argument extended to all installable targets + +It is now possible to pass an `install_mode` argument to all installable targets, +such as `executable()`, libraries, headers, man pages and custom/generated +targets. + +The `install_mode` argument can be used to specify the file mode in symbolic +format and optionally the owner/uid and group/gid for the installed files. + +## New built-in option `install_umask` with a default value 022 + +This umask is used to define the default permissions of files and directories +created in the install tree. Files will preserve their executable mode, but the +exact permissions will obey the `install_umask`. + +The `install_umask` can be overridden in the meson command-line: + +```console +$ meson --install-umask=027 builddir/ +``` + +A project can also override the default in the `project()` call: + +```meson +project('myproject', 'c', + default_options : ['install_umask=027']) +``` + +To disable the `install_umask`, set it to `preserve`, in which case permissions +are copied from the files in their origin. + +## Octal and binary string literals + +Octal and binary integer literals can now be used in build and option files. + +```meson +int_493 = 0o755 +int_1365 = 0b10101010101 +``` + +## New keyword arguments: 'check' and 'capture' for `run_command()` + +If `check:` is `true`, then the configuration will fail if the command returns a +non-zero exit status. The default value is `false` for compatibility reasons. + +`run_command()` used to always capture the output and stored it for use in +build files. However, sometimes the stdout is in a binary format which is meant +to be discarded. For that case, you can now set the `capture:` keyword argument +to `false`. + +## Windows resource files dependencies + +The `compile_resources()` function of the `windows` module now takes +the `depend_files:` and `depends:` keywords. + +When using binutils's `windres`, dependencies on files `#include`'d by the +preprocessor are now automatically tracked. + +## Polkit support for privileged installation + +When running `install`, if installation fails with a permission error and +`pkexec` is available, Meson will attempt to use it to spawn a permission +dialog for privileged installation and retry the installation. + +If `pkexec` is not available, the old behaviour is retained and you will need +to explicitly run the install step with `sudo`. diff --git a/meson/docs/markdown/Release-notes-for-0.48.0.md b/meson/docs/markdown/Release-notes-for-0.48.0.md new file mode 100644 index 000000000..2cb02684e --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.48.0.md @@ -0,0 +1,334 @@ +--- +title: Release 0.48 +short-description: Release notes for 0.48 +... + +# New features + +## Toggles for build type, optimization and vcrt type + +Since the very beginning Meson has provided different project types to +use, such as *debug* and *minsize*. There is also a *plain* type that +adds nothing by default but instead makes it the user's responsibility +to add everything by hand. This works but is a bit tedious. + +In this release we have added new new options to manually toggle +e.g. optimization levels and debug info so those can be changed +independently of other options. For example by default the debug +buildtype has no optmization enabled at all. If you wish to use GCC's +`-Og` instead, you could set it with the following command: + +``` +meson configure -Doptimization=g +``` + +Similarly we have added a toggle option to select the version of +Visual Studio C runtime to use. By default it uses the debug runtime +DLL debug builds and release DLL for release builds but this can be +manually changed with the new base option `b_vscrt`. + +## Meson warns if two calls to `configure_file()` write to the same file + +If two calls to [`configure_file()`](Reference-manual.md#configure_file) +write to the same file Meson will print a `WARNING:` message during +configuration. For example: +```meson +project('configure_file', 'cpp') + +configure_file( + input: 'a.in', + output: 'out', + command: ['./foo.sh'] +) +configure_file( + input: 'a.in', + output: 'out', + command: ['./foo.sh'] +) +``` + +This will output: + +``` +The Meson build system +Version: 0.47.0.dev1 +Source dir: /path/to/srctree +Build dir: /path/to/buildtree +Build type: native build +Project name: configure_file +Project version: undefined +Build machine cpu family: x86_64 +Build machine cpu: x86_64 +Configuring out with command +WARNING: Output file out for configure_file overwritten. First time written in line 3 now in line 8 +Configuring out with command +Build targets in project: 0 +Found ninja-1.8.2 at /usr/bin/ninja +``` + +## New kwarg `console` for `custom_target()` + +This keyword argument conflicts with `capture`, and is meant for +commands that are resource-intensive and take a long time to +finish. With the Ninja backend, setting this will add this target to +[Ninja's `console` +pool](https://ninja-build.org/manual.html#_the_literal_console_literal_pool), +which has special properties such as not buffering stdout and +serializing all targets in this pool. + +The primary use-case for this is to be able to run external commands +that take a long time to exeute. Without setting this, the user does +not receive any feedback about what the program is doing. + +## `dependency(version:)` now applies to all dependency types + +Previously, version constraints were only enforced for dependencies found using +the pkg-config dependency provider. These constraints now apply to dependencies +found using any dependency provider. + +Some combinations of dependency, host and method do not currently support +discovery of the version. In these cases, the dependency will not be found if a +version constraint is applied, otherwise the `version()` method for the +dependency object will return `'unknown'`. + +(If discovering the version in one of these combinations is important to you, +and a method exists to determine the version in that case, please file an issue +with as much information as possible.) + +## python3 module is deprecated + +A generic module `python` has been added in Meson `0.46.0` and has a superset of +the features implemented by the previous `python3` module. + +In most cases, it is a simple matter of renaming: +```meson +py3mod = import('python3') +python = py3mod.find_python() +``` + +becomes + +```meson +pymod = import('python') +python = pymod.find_installation() +``` + +## Dictionary addition + +Dictionaries can now be added, values from the second dictionary overrides values +from the first + +```meson +d1 = {'a' : 'b'} +d3 = d1 + {'a' : 'c'} +d3 += {'d' : 'e'} +``` + +## Dist scripts + +You can now specify scripts that are run as part of the `dist` +target. An example usage would go like this: + +```meson +project('foo', 'c') + +# other stuff here + +meson.add_dist_script('dist_cleanup.py') +``` + +## Fatal warnings + +A new command line option has been added: `--fatal-meson-warnings`. When enabled, any +warning message printed by Meson will be fatal and raise an exception. It is +intended to be used by developers and CIs to easily catch deprecation warnings, +or any other potential issues. + +## Helper methods added for checking GNU style attributes: `__attribute__(...)` + +A set of new helpers have been added to the C and C++ compiler objects for +checking GNU style function attributes. These are not just simpler to use, they +may be optimized to return fast on compilers that don't support these +attributes. Currently this is true for MSVC. + +```meson +cc = meson.get_compiler('c') +if cc.has_function_attribute('aligned') + add_project_arguments('-DHAVE_ALIGNED', language : 'c') +endif +``` + +Would replace code like: + +```meson +if cc.compiles('''into foo(void) __attribute__((aligned(32)))''') + add_project_arguments('-DHAVE_ALIGNED', language : 'c') +endif +``` + +Additionally, a multi argument version has been added: + +```meson +foreach s : cc.get_supported_function_attributes(['hidden', 'alias']) + add_project_arguments('-DHAVE_@0@'.format(s.to_upper()), language : 'c') +endforeach +``` + +## `gnome.generate_gir()` now optionally accepts multiple libraries + +The GNOME module can now generate a single gir for multiple libraries, which +is something `g-ir-scanner` supported, but had not been exposed yet. + +gnome.generate_gir() will now accept multiple positional arguments, if none +of these arguments are an `Executable` instance. + +## Hotdoc module + +A new module has been written to ease generation of +[hotdoc](https://hotdoc.github.io/) based documentation. It supports +complex use cases such as hotdoc subprojects (to create documentation +portals) and makes it straight forward to leverage full capabilities +of hotdoc. + +Simple usage: + +``` meson +hotdoc = import('hotdoc') + +hotdoc.generate_doc( + 'foobar', + c_smart_index: true, + project_version: '0.1', + sitemap: 'sitemap.txt', + index: 'index.md', + c_sources: ['path/to/file.c'], + languages: ['c'], + install: true, +) +``` + +## `i18n.merge_file()` now fully supports variable substitutions defined in `custom_target()` + +Filename substitutions like @BASENAME@ and @PLAINNAME@ were previously +accepted but the name of the build target wasn't altered leading to +colliding target names when using the substitution twice. +i18n.merge_file() now behaves as custom_target() in this regard. + +## Projects args can be set separately for cross and native builds (potentially breaking change) + +It has been a longstanding bug (or let's call it a "delayed bug fix") +that if you do this: + +```meson +add_project_arguments('-DFOO', language : 'c') +``` + +Then the flag is used both in native and cross compilations. This is +very confusing and almost never what you want. To fix this a new +keyword `native` has been added to all functions that add arguments, +namely `add_global_arguments`, `add_global_link_arguments`, +`add_project_arguments` and `add_project_link_arguments` that behaves +like the following: + +```meson +# Added to native builds when compiling natively and to cross +# compilations when doing cross compiles. +add_project_arguments(...) + +# Added only to native compilations, not used in cross compilations. +add_project_arguments(..., native : true) + +# Added only to cross compilations, not used in native compilations. +add_project_arguments(..., native : false) +``` + +Also remember that cross compilation is a property of each +target. There can be target that are compiled with the native compiler +and some which are compiled with the cross compiler. + +Unfortunately this change is backwards incompatible and may cause some +projects to fail building. However this should be very rare in practice. + +## More flexible `override_find_program()`. + +It is now possible to pass an `executable` to +`override_find_program()` if the overridden program is not used during +configure. + +This is particularly useful for fallback dependencies like Protobuf +that also provide a tool like protoc. + +## `shared_library()` now supports setting dylib compatibility and current version + +Now, by default `shared_library()` sets `-compatibility_version` and +`-current_version` of a macOS dylib using the `soversion`. + +This can be overriden by using the `darwin_versions:` kwarg to +[`shared_library()`](Reference-manual.md#shared_library). As usual, you can +also pass this kwarg to `library()` or `build_target()` and it will be used in +the appropriate circumstances. + +## Version comparison + +`dependency(version:)` and other version constraints now handle versions +containing non-numeric characters better, comparing versions using the rpmvercmp +algorithm (as using the `pkg-config` autoconf macro `PKG_CHECK_MODULES` does). + +This is a breaking change for exact comparison constraints which rely on the +previous comparison behaviour of extending the compared versions with `'0'` +elements, up to the same length of `'.'`-separated elements. + +For example, a version of `'0.11.0'` would previously match a version constraint +of `'==0.11'`, but no longer does, being instead considered strictly greater. + +Instead, use a version constraint which exactly compares with the precise +version required, e.g. `'==0.11.0'`. + +## Keyword argument for GNU symbol visibility + +Build targets got a new keyword, `gnu_symbol_visibility` that controls +how symbols are exported from shared libraries. This is most commonly +used to hide implementation symbols like this: + +```meson +shared_library('mylib', ... + gnu_symbol_visibility: 'hidden') +``` + +In this case only symbols explicitly marked as visible in the source +files get exported. + +## Git wraps can now clone submodules automatically + +To enable this, the following needs to be added to the `.wrap` file: + +```ini +clone-recursive=true +``` + +## `subproject()` function now supports the `required:` kwarg + +This allows you to declare an optional subproject. You can now call `found()` +on the return value of the `subproject()` call to see if the subproject is +available before calling `get_variable()` to fetch information from it. + +## `dependency()` objects now support the `.name()` method + +You can now fetch the name of the dependency that was searched like so: + +```meson +glib_dep = dependency('glib-2.0') +... +message("dependency name is " + glib_dep.name()) +# This outputs `dependency name is glib-2.0` + +qt_dep = dependency('qt5') +... +message("dependency name is " + qt_dep.name()) +# This outputs `dependency name is qt5` + +decl_dep = declare_dependency() +... +message("dependency name is " + decl_dep.name()) +# This outputs `dependency name is internal` +``` diff --git a/meson/docs/markdown/Release-notes-for-0.49.0.md b/meson/docs/markdown/Release-notes-for-0.49.0.md new file mode 100644 index 000000000..9a5e5f53b --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.49.0.md @@ -0,0 +1,316 @@ +--- +title: Release 0.49 +short-description: Release notes for 0.49 +... + +# New features + +## Libgcrypt dependency now supports libgcrypt-config + +Earlier, `dependency('libgcrypt')` could only detect the library with pkg-config +files. Now, if pkg-config files are not found, Meson will look for +`libgcrypt-config` and if it's found, will use that to find the library. + +## New `section` key for the buildoptions introspection + +Meson now has a new `section` key in each build option. This allows +IDEs to group these options similar to `meson configure`. + +The possible values for `section` are: + + - core + - backend + - base + - compiler + - directory + - user + - test + +## CC-RX compiler for C and CPP + +Cross-compilation is now supported for Renesas RX targets with the +CC-RX compiler. + +The environment path should be set properly for the CC-RX compiler +executables. The `-cpu` option with the appropriate value should be +mentioned in the cross-file as shown in the snippet below. + +```ini +[properties] +c_args = ['-cpu=rx600'] +cpp_args = ['-cpu=rx600'] +``` + +The default extension of the executable output is `.abs`. Other +target specific arguments to the compiler and linker will need to be +added explicitly from the +cross-file(`c_args`/`c_link_args`/`cpp_args`/`cpp_link_args`) or some +other way. Refer to the CC-RX User's manual for additional compiler +and linker options. + +## CMake `find_package` dependency backend + +Meson can now use the CMake `find_package` ecosystem to +detect dependencies. Both the old-style `_LIBRARIES` +variables as well as imported targets are supported. Meson +can automatically guess the correct CMake target in most +cases but it is also possible to manually specify a target +with the `modules` property. + +```meson +# Implicitly uses CMake as a fallback and guesses a target +dep1 = dependency('KF5TextEditor') + +# Manually specify one or more CMake targets to use +dep2 = dependency('ZLIB', method : 'cmake', modules : ['ZLIB::ZLIB']) +``` + +CMake is automatically used after `pkg-config` fails when +no `method` (or `auto`) was provided in the dependency options. + +## New compiler method `get_argument_syntax` + +The compiler object now has `get_argument_syntax` method, which returns a +string value of `gcc`, `msvc`, or an undefined value string value. This can be +used to determine if a compiler uses gcc syntax (`-Wfoo`), msvc syntax +(`/w1234`), or some other kind of arguments. + +```meson +cc = meson.get_compiler('c') + +if cc.get_argument_syntax() == 'msvc' + if cc.has_argument('/w1235') + add_project_arguments('/w1235', language : ['c']) + endif +elif cc.get_argument_syntax() == 'gcc' + if cc.has_argument('-Wfoo') + add_project_arguments('-Wfoo', language : ['c']) + endif +elif cc.get_id() == 'some other compiler' + add_project_arguments('--error-on-foo', language : ['c']) +endif +``` + +## Return `Disabler()` instead of not-found object + +Functions such as `dependency()`, `find_library()`, `find_program()`, and +`python.find_installation()` have a new keyword argument: `disabler`. When set +to `true` those functions return `Disabler()` objects instead of not-found +objects. + +## `introspect --projectinfo` can now be used without configured build directory + +This allows IDE integration to get information about the project before the user has configured a build directory. + +Before you could use `meson.py introspect --projectinfo build-directory`. +Now you also can use `meson.py introspect --projectinfo project-dir/meson.build`. + +The output is similiar to the output with a build directory but additionally also includes information from `introspect --buildsystem-files`. + +For example `meson.py introspect --projectinfo test\ cases/common/47\ subproject\ options/meson.build` +This outputs (pretty printed for readability): +``` +{ + "buildsystem_files": [ + "meson_options.txt", + "meson.build" + ], + "name": "suboptions", + "version": null, + "descriptive_name": "suboptions", + "subprojects": [ + { + "buildsystem_files": [ + "subprojects/subproject/meson_options.txt", + "subprojects/subproject/meson.build" + ], + "name": "subproject", + "version": "undefined", + "descriptive_name": "subproject" + } + ] +} +``` + +Both usages now include a new `descriptive_name` property which always +shows the name set in the project. + +## Can specify keyword arguments with a dictionary + +You can now specify keyword arguments for any function and method call +with the `kwargs` keyword argument. This is perhaps best described +with an example: + +```meson +options = {'include_directories': include_directories('inc')} + +... + +executable(... + kwargs: options) +``` + +The above code is identical to this: + +```meson +executable(... + include_directories: include_directories('inc')) +``` + +That is, Meson will expand the dictionary given to `kwargs` as if the +entries in it had been given as keyword arguments directly. + +Note that any individual argument can be specified either directly or +with the `kwarg` dict but not both. If a key is specified twice, it +is a hard error. + +## Manpages are no longer compressed implicitly + +Earlier, the `install_man` command has automatically compressed installed +manpages into `.gz` format. This collided with manpage compression hooks +already used by various distributions. Now, manpages are installed uncompressed +and distributors are expected to handle compressing them according to their own +compression preferences. + +## Native config files + +Native files (`--native-file`) are the counterpart to cross files (`--cross-file`), +and allow specifying information about the build machine, both when cross compiling +and when not. + +Currently the native files only allow specifying the names of binaries, similar +to the cross file, for example: + +```ini +[binaries] +llvm-config = "/opt/llvm-custom/bin/llvm-config" +``` + +Will override the llvm-config used for *native* binaries. Targets for the host +machine will continue to use the cross file. + +## Foreach `break` and `continue` + +`break` and `continue` keywords can be used inside foreach loops. + +```meson +items = ['a', 'continue', 'b', 'break', 'c'] +result = [] +foreach i : items + if i == 'continue' + continue + elif i == 'break' + break + endif + result += i +endforeach +# result is ['a', 'b'] +``` + +You can check if an array contains an element like this: +```meson +my_array = [1, 2] +if 1 in my_array +# This condition is true +endif +if 1 not in my_array +# This condition is false +endif +``` + +You can check if a dictionary contains a key like this: +```meson +my_dict = {'foo': 42, 'foo': 43} +if 'foo' in my_dict +# This condition is true +endif +if 42 in my_dict +# This condition is false +endif +if 'foo' not in my_dict +# This condition is false +endif +``` + +## Joining paths with / + +For clarity and conciseness, we recommend using the `/` operator to separate +path elements: + +```meson +joined = 'foo' / 'bar' +``` + +Before Meson 0.49, joining path elements was done with the legacy `join_paths` +function, but the `/` syntax above is now recommended. + +```meson +joined = join_paths('foo', 'bar') +``` + +This only works for strings. + +## Position-independent executables + +When `b_pie` option, or `executable()`'s `pie` keyword argument is set to +`true`, position-independent executables are built. All their objects are built +with `-fPIE` and the executable is linked with `-pie`. Any static library they +link must be built with `pic` set to `true` (see `b_staticpic` option). + +## Deprecation warning in pkg-config generator + +All libraries passed to the `libraries` keyword argument of the `generate()` +method used to be associated with that generated pkg-config file. That means +that any subsequent call to `generate()` where those libraries appear would add +the filebase of the `generate()` that first contained them into `Requires:` or +`Requires.private:` field instead of adding an `-l` to `Libs:` or `Libs.private:`. + +This behaviour is now deprecated. The library that should be associated with +the generated pkg-config file should be passed as first positional argument +instead of in the `libraries` keyword argument. The previous behaviour is +maintained but prints a deprecation warning and support for this will be removed +in a future Meson release. If you can not create the needed pkg-config file +without this warning, please file an issue with as much details as possible +about the situation. + +For example this sample will write `Requires: liba` into `libb.pc` but print a +deprecation warning: +```meson +liba = library(...) +pkg.generate(libraries : liba) + +libb = library(...) +pkg.generate(libraries : [liba, libb]) +``` + +It can be fixed by passing `liba` as first positional argument:: +```meson +liba = library(...) +pkg.generate(liba) + +libb = library(...) +pkg.generate(libb, libraries : [liba]) +``` + +## Subprojects download, checkout, update command-line + +New command-line tool has been added to manage subprojects: + +- `meson subprojects download` to download all subprojects that have a wrap file. +- `meson subprojects update` to update all subprojects to latest version. +- `meson subprojects checkout` to checkout or create a branch in all git subprojects. + +## New keyword argument `is_default` to `add_test_setup()` + +The keyword argument `is_default` may be used to set whether the test +setup should be used by default whenever `meson test` is run without +the `--setup` option. + +```meson +add_test_setup('default', is_default: true, env: 'G_SLICE=debug-blocks') +add_test_setup('valgrind', env: 'G_SLICE=always-malloc', ...) +test('mytest', exe) +``` + +For the example above, running `meson test` and `meson test +--setup=default` is now equivalent. diff --git a/meson/docs/markdown/Release-notes-for-0.50.0.md b/meson/docs/markdown/Release-notes-for-0.50.0.md new file mode 100644 index 000000000..a9363d8b1 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.50.0.md @@ -0,0 +1,342 @@ +--- +title: Release 0.50.0 +short-description: Release notes for 0.50.0 +... + +# New features + +## Added `cmake_module_path` and `cmake_args` to dependency + +The CMake dependency backend can now make use of existing `Find.cmake` +files by setting the `CMAKE_MODULE_PATH` with the new `dependency()` property +`cmake_module_path`. The paths given to `cmake_module_path` should be relative +to the project source directory. + +Furthermore the property `cmake_args` was added to give CMake additional +parameters. + +## Added PGI compiler support + +Nvidia / PGI C, C++ and Fortran +[no-cost](https://www.pgroup.com/products/community.htm) compilers are +now supported. They have been tested on Linux so far. + + + +## Fortran Coarray + +Fortran 2008 / 2018 coarray support was added via `dependency('coarray')` + +## Libdir defaults to `lib` when cross compiling + +Previously `libdir` defaulted to the value of the build machine such +as `lib/x86_64-linux-gnu`, which is almost always incorrect when cross +compiling. It now defaults to plain `lib` when cross compiling. Native +builds remain unchanged and will point to the current system's library +dir. + +## Native and Cross File Paths and Directories + +A new `[paths]` section has been added to native and cross files. This +can be used to set paths such a prefix and libdir in a persistent way. + +## Add warning_level 0 option + +Adds support for a warning level 0 which does not enable any static +analysis checks from the compiler + +## A builtin target to run clang-format + +If you have `clang-format` installed and there is a `.clang-format` +file in the root of your master project, Meson will generate a run +target called `clang-format` so you can reformat all files with one +command: + +```meson +ninja clang-format +``` + + +## Added `.path()` method to object returned by `python.find_installation()` + +`ExternalProgram` objects as well as the object returned by the +`python3` module provide this method, but the new `python` module did +not. + +## Fix ninja console log from generators with multiple output nodes + +This resolves [issue #4760](https://github.com/mesonbuild/meson/issues/4760) +where a generator with multiple output nodes printed an empty string to the console + +## `introspect --buildoptions` can now be used without configured build directory + +It is now possible to run `meson introspect --buildoptions /path/to/meson.build` +without a configured build directory. + +Running `--buildoptions` without a build directory produces the same +output as running it with a freshly configured build directory. + +However, this behavior is not guaranteed if subprojects are +present. Due to internal limitations all subprojects are processed +even if they are never used in a real meson run. Because of this +options for the subprojects can differ. + +## `include_directories` accepts a string + +The `include_directories` keyword argument now accepts plain strings +rather than an include directory object. Meson will transparently +expand it so that a declaration like this: + +```meson +executable(..., include_directories: 'foo') +``` + +Is equivalent to this: + +```meson +foo_inc = include_directories('foo') +executable(..., include_directories: foo_inc) +``` + +## Fortran submodule support + +Initial support for Fortran `submodule` was added, where the submodule is in +the same or different file than the parent `module`. +The submodule hierarchy specified in the source Fortran code `submodule` +statements are used by Meson to resolve source file dependencies. +For example: + +```fortran +submodule (ancestor:parent) child +``` + + +## Add `subproject_dir` to `--projectinfo` introspection output + +This allows applications interfacing with Meson (such as IDEs) to know about +an overridden subproject directory. + +## Find library with its headers + +The `find_library()` method can now also verify if the library's headers are +found in a single call, using the `has_header()` method internally. + +```meson +# Aborts if the 'z' library is found but not its header file +zlib = find_library('z', has_headers : 'zlib.h') +# Returns not-found if the 'z' library is found but not its header file +zlib = find_library('z', has_headers : 'zlib.h', required : false) +``` + +Any keyword argument with the `header_` prefix passed to `find_library()` will +be passed to the `has_header()` method with the prefix removed. + +```meson +libfoo = find_library('foo', + has_headers : ['foo.h', 'bar.h'], + header_prefix : '#include ', + header_include_directories : include_directories('.')) +``` + +## NetCDF + +NetCDF support for C, C++ and Fortran is added via pkg-config. + +## Added the Flang compiler + +[Flang](https://github.com/flang-compiler/flang/releases) Fortran +compiler support was added. As with other Fortran compilers, flang is +specified using `FC=flang meson ..` or similar. + +## New `not_found_message` for `dependency()` + +You can now specify a `not_found_message` that will be printed if the +specified dependency was not found. The point is to convert constructs +that look like this: + +```meson +d = dependency('something', required: false) +if not d.found() + message('Will not be able to do something.') +endif +``` + +Into this: + +```meson +d = dependency('something', + required: false, + not_found_message: 'Will not be able to do something.') +``` + +Or constructs like this: + +```meson +d = dependency('something', required: false) +if not d.found() + error('Install something by doing XYZ.') +endif +``` + +into this: + +```meson +d = dependency('something', + not_found_message: 'Install something by doing XYZ.') +``` + +Which works, because the default value of `required` is `true`. + +## Cuda support + +Compiling Cuda source code is now supported, though only with the +Ninja backend. This has been tested only on Linux for now. + +Because NVidia's Cuda compiler does not produce `.d` dependency files, +dependency tracking does not work. + +## `run_command()` accepts `env` kwarg + +You can pass [`environment`](Reference-manual.md#environment-object) +object to [`run_command`](Reference-manual.md#run-command), just +like to `test`: + +```meson +env = environment() +env.set('FOO', 'bar') +run_command('command', 'arg1', 'arg2', env: env) +``` + +## `extract_objects:` accepts `File` arguments + +The `extract_objects` function now supports File objects to tell it +what to extract. Previously, file paths could only be passed as strings. + +## Changed the JSON format of the introspection + +All paths used in the meson introspection JSON format are now absolute. This +affects the `filename` key in the targets introspection and the output of +`--buildsystem-files`. + +Furthermore, the `filename` and `install_filename` keys in the targets +introspection are now lists of strings with identical length. + +The `--target-files` option is now deprecated, since the same information +can be acquired from the `--tragets` introspection API. + +## Meson file rewriter + +This release adds the functionality to perform some basic modification +on the `meson.build` files from the command line. The currently +supported operations are: + +- For build targets: + - Add/Remove source files + - Add/Remove targets + - Modify a select set of kwargs + - Print some JSON information +- For dependencies: + - Modify a select set of kwargs +- For the project function: + - Modify a select set of kwargs + - Modify the default options list + +For more information see the rewriter documentation. + +## `introspect --scan-dependencies` can now be used to scan for dependencies used in a project + +It is now possible to run `meson introspect --scan-dependencies +/path/to/meson.build` without a configured build directory to scan for +dependencies. + +The output format is as follows: + +```json +[ + { + "name": "The name of the dependency", + "required": true, + "conditional": false, + "has_fallback": false + } +] +``` + +The `required` keyword specifies whether the dependency is marked as required +in the `meson.build` (all dependencies are required by default). The +`conditional` key indicates whether the `dependency()` function was called +inside a conditional block. In a real meson run these dependencies might not be +used, thus they _may_ not be required, even if the `required` key is set. The +`has_fallback` key just indicates whether a fallback was directly set in the +`dependency()` function. + +## `introspect --targets` can now be used without configured build directory + +It is now possible to run `meson introspect --targets /path/to/meson.build` +without a configured build directory. + +The generated output is similar to running the introspection with a build +directory. However, there are some key differences: + +- The paths in `filename` now are _relative_ to the future build directory +- The `install_filename` key is completely missing +- There is only one entry in `target_sources`: + - With the language set to `unknown` + - Empty lists for `compiler` and `parameters` and `generated_sources` + - The `sources` list _should_ contain all sources of the target + +There is no guarantee that the sources list in `target_sources` is correct. +There might be differences, due to internal limitations. It is also not +guaranteed that all targets will be listed in the output. It might even be +possible that targets are listed, which won't exist when meson is run normally. +This can happen if a target is defined inside an if statement. +Use this feature with care. + +## Added option to introspect multiple parameters at once + +Meson introspect can now print the results of multiple introspection +commands in a single call. The results are then printed as a single JSON +object. + +The format for a single command was not changed to keep backward +compatibility. + +Furthermore the option `-a,--all`, `-i,--indent` and `-f,--force-object-output` +were added to print all introspection information in one go, format the +JSON output (the default is still compact JSON) and force use the new +output format, even if only one introspection command was given. + +A complete introspection dump is also stored in the `meson-info` +directory. This dump will be (re)generated each time meson updates the +configuration of the build directory. + +Additionlly the format of `meson introspect target` was changed: + + - New: the `sources` key. It stores the source files of a target and their compiler parameters. + - New: the `defined_in` key. It stores the meson file where a target is defined + - New: the `subproject` key. It stores the name of the subproject where a target is defined. + - Added new target types (`jar`, `shared module`). + +## `meson configure` can now print the default options of an unconfigured project + +With this release, it is also possible to get a list of all build options +by invoking `meson configure` with the project source directory or +the path to the root `meson.build`. In this case, meson will print the +default values of all options. + +## HDF5 + +HDF5 support is added via pkg-config. + +## Added the `meson-info.json` introspection file + +Meson now generates a `meson-info.json` file in the `meson-info` directory +to provide introspection information about the latest meson run. This file +is updated when the build configuration is changed and the build files are +(re)generated. + +## New kwarg `install:` for `configure_file()` + +Previously when using `configure_file()`, you could install the outputted file +by setting the `install_dir:` keyword argument. Now, there is an explicit kwarg +`install:` to enable/disable it. Omitting it will maintain the old behaviour. diff --git a/meson/docs/markdown/Release-notes-for-0.51.0.md b/meson/docs/markdown/Release-notes-for-0.51.0.md new file mode 100644 index 000000000..b7e441cf4 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.51.0.md @@ -0,0 +1,329 @@ +--- +title: Release 0.51.0 +short-description: Release notes for 0.51.0 +... + +# New features + +## (C) Preprocessor flag handling + +Meson previously stored `CPPFLAGS` and per-language compilation flags +separately. (That latter would come from `CFLAGS`, `CXXFLAGS`, etc., along with +`_args` options whether specified no the command-line interface (`-D..`), +`meson.build` (`default_options`), or cross file (`[properties]`).) This was +mostly unobservable, except for certain preprocessor-only checks like +`check_header` would only use the preprocessor flags, leading to confusion if +some `-isystem` was in `CFLAGS` but not `CPPFLAGS`. Now, they are lumped +together, and `CPPFLAGS`, for the languages which are deemed to care to about, +is just another source of compilation flags along with the others already +listed. + +## Sanity checking compilers with user flags + +Sanity checks previously only used user-specified flags for cross compilers, but +now do in all cases. + +All compilers meson might decide to use for the build are "sanity checked" +before other tests are run. This usually involves building simple executable and +trying to run it. Previously user flags (compilation and/or linking flags) were +used for sanity checking cross compilers, but not native compilers. This is +because such flags might be essential for a cross binary to succeed, but usually +aren't for a native compiler. + +In recent releases, there has been an effort to minimize the special-casing of +cross or native builds so as to make building more predictable in less-tested +cases. Since this the user flags are necessary for cross, but not harmful for +native, it makes more sense to use them in all sanity checks than use them in no +sanity checks, so this is what we now do. + +## New `sourceset` module + +A new module, `sourceset`, was added to help building many binaries +from the same source files. Source sets associate source files and +dependencies to keys in a `configuration_data` object or a dictionary; +they then take multiple `configuration_data` objects or dictionaries, +and compute the set of source files and dependencies for each of those +configurations. + +## n_debug=if-release and buildtype=plain means no asserts + +Previously if this combination was used then assertions were enabled, +which is fairly surprising behavior. + +## `target_type` in `build_targets` accepts the value 'shared_module' + +The `target_type` keyword argument in `build_target()` now accepts the +value `'shared_module'`. + +The statement + +```meson +build_target(..., target_type: 'shared_module') +``` + +is equivalent to this: + +```meson +shared_module(...) +``` + +## New modules kwarg for python.find_installation + +This mirrors the modules argument that some kinds of dependencies (such as +qt, llvm, and cmake based dependencies) take, allowing you to check that a +particular module is available when getting a python version. + +```meson +py = import('python').find_installation('python3', modules : ['numpy']) +``` + +## Support for the Intel Compiler on Windows (ICL) + +Support has been added for ICL.EXE and ifort on windows. The support should be +on part with ICC support on Linux/MacOS. The ICL C/C++ compiler behaves like +Microsoft's CL.EXE rather than GCC/Clang like ICC does, and has a different id, +`intel-cl` to differentiate it. + +```meson +cc = meson.get_compiler('c') +if cc.get_id == 'intel-cl' + add_project_argument('/Qfoobar:yes', language : 'c') +endif +``` + +## Added basic support for the Xtensa CPU toolchain + +You can now use `xt-xcc`, `xt-xc++`, `xt-nm`, etc... on your cross compilation +file and meson won't complain about an unknown toolchain. + + +## Dependency objects now have a get_variable method + +This is a generic replacement for type specific variable getters such as +`ConfigToolDependency.get_configtool_variable` and +`PkgConfigDependency.get_pkgconfig_variable`, and is the only way to query +such variables from cmake dependencies. + +This method allows you to get variables without knowing the kind of +dependency you have. + +```meson +dep = dependency('could_be_cmake_or_pkgconfig') +# cmake returns 'YES', pkg-config returns 'ON' +if ['YES', 'ON'].contains(dep.get_variable(pkgconfig : 'var-name', cmake : 'COP_VAR_NAME', default_value : 'NO')) + error('Cannot build your project when dep is built with var-name support') +endif +``` + +## CMake prefix path overrides + +When using pkg-config as a dependency resolver we can pass +`-Dpkg_config_path=$somepath` to extend or overwrite where pkg-config will +search for dependencies. Now cmake can do the same, as long as the dependency +uses a ${Name}Config.cmake file (not a Find{$Name}.cmake file), by passing +`-Dcmake_prefix_path=list,of,paths`. It is important that point this at the +prefix that the dependency is installed into, not the cmake path. + +If you have installed something to `/tmp/dep`, which has a layout like: +``` +/tmp/dep/lib/cmake +/tmp/dep/bin +``` + +then invoke meson as `meson builddir/ -Dcmake_prefix_path=/tmp/dep` + +## Tests that should fail but did not are now errors + +You can tag a test as needing to fail like this: + +```meson +test('shoulfail', exe, should_fail: true) +``` + +If the test passes the problem is reported in the error logs but due +to a bug it was not reported in the test runner's exit code. Starting +from this release the unexpected passes are properly reported in the +test runner's exit code. This means that test runs that were passing +in earlier versions of Meson will report failures with the current +version. This is a good thing, though, since it reveals an error in +your test suite that has, until now, gone unnoticed. + +## New target keyword argument: `link_language` + +There may be situations for which the user wishes to manually specify +the linking language. For example, a C++ target may link C, Fortran, +etc. and perhaps the automatic detection in Meson does not pick the +desired compiler. The user can manually choose the linker by language +per-target like this example of a target where one wishes to link with +the Fortran compiler: + +```meson +executable(..., link_language : 'fortran') +``` + +A specific case this option fixes is where for example the main +program is Fortran that calls C and/or C++ code. The automatic +language detection of Meson prioritizes C/C++, and so an compile-time +error results like `undefined reference to main`, because the linker +is C or C++ instead of Fortran, which is fixed by this per-target +override. + +## New module to parse kconfig output files + +The new module `unstable-kconfig` adds the ability to parse and use +kconfig output files from `meson.build`. + + +## Add new `meson subprojects foreach` command + +`meson subprojects` has learned a new `foreach` command which accepts a command +with arguments and executes it in each subproject directory. + +For example this can be useful to check the status of subprojects (e.g. with +`git status` or `git diff`) before performing other actions on them. + + +## Added c17 and c18 as c_std values for recent GCC and Clang Versions + +For gcc version 8.0 and later, the values c17, c18, gnu17, and gnu18 +were added to the accepted values for built-in compiler option c_std. + +For Clang version 10.0 and later on Apple OSX (Darwin), and for +version 7.0 and later on other platforms, the values c17 and gnu17 +were added as c_std values. + +## gpgme dependency now supports gpgme-config + +Previously, we could only detect GPGME with custom invocations of +`gpgme-config` or when the GPGME version was recent enough (>=1.13.0) +to install pkg-config files. Now we added support to Meson allowing us +to use `dependency('gpgme')` and fall back on `gpgme-config` parsing. + +## Can link against custom targets + +The output of `custom_target` and `custom_target[i]` can be used in +`link_with` and `link_whole` keyword arguments. This is useful for +integrating custom code generator steps, but note that there are many +limitations: + + - Meson can not know about link dependencies of the custom target. If + the target requires further link libraries, you need to add them manually + + - The user is responsible for ensuring that the code produced by + different toolchains are compatible. + + - `custom_target` may only be used when it has a single output file. + Use `custom_target[i]` when dealing with multiple output files. + + - The output file must have the correct file name extension. + + +## Removed the deprecated `--target-files` API + +The `--target-files` introspection API is now no longer available. The same +information can be queried with the `--targets` API introduced in 0.50.0. + +## Generators have a new `depends` keyword argument + +Generators can now specify extra dependencies with the `depends` +keyword argument. It matches the behaviour of the same argument in +other functions and specifies that the given targets must be built +before the generator can be run. This is used in cases such as this +one where you need to tell a generator to indirectly invoke a +different program. + +```meson +exe = executable(...) +cg = generator(program_runner, + output: ['@BASENAME@.c'], + arguments: ['--use-tool=' + exe.full_path(), '@INPUT@', '@OUTPUT@'], + depends: exe) +``` + +## Specifying options per mer machine + +Previously, no cross builds were controllable from the command line. +Machine-specific options like the pkg-config path and compiler options only +affected native targets, that is to say all targets in native builds, and +`native: true` targets in cross builds. Now, prefix the option with `build.` to +affect build machine targets, and leave it unprefixed to affect host machine +targets. + +For those trying to ensure native and cross builds to the same platform produced +the same result, the old way was frustrating because very different invocations +were needed to affect the same targets, if it was possible at all. Now, the same +command line arguments affect the same targets everwhere --- Meson is closer to +ignoring whether the "overall" build is native or cross, and just caring about +whether individual targets are for the build or host machines. + + +## subproject.get_variable() now accepts a `fallback` argument + +Similar to `get_variable`, a fallback argument can now be passed to +`subproject.get_variable()`, it will be returned if the requested +variable name did not exist. + +``` meson +var = subproject.get_variable('does-not-exist', 'fallback-value') +``` + +## Add keyword `static` to `find_library` + +`find_library` has learned the `static` keyword. They keyword must be a boolean, +where `true` only searches for static libraries and `false` only searches for +dynamic/shared. Leaving the keyword unset will keep the old behavior of first +searching for dynamic and then falling back to static. + +## Fortran `include` statements recursively parsed + +While non-standard and generally not recommended, some legacy Fortran +programs use `include` directives to inject code inline. Since v0.51, +Meson can handle Fortran `include` directives recursively. + +DO NOT list `include` files as sources for a target, as in general +their syntax is not correct as a standalone target. In general +`include` files are meant to be injected inline as if they were copy +and pasted into the source file. + +`include` was never standard and was superceded by Fortran 90 `module`. + +The `include` file is only recognized by Meson if it has a Fortran +file suffix, such as `.f` `.F` `.f90` `.F90` or similar. This is to +avoid deeply nested scanning of large external legacy C libraries that +only interface to Fortran by `include biglib.h` or similar. + +## CMake subprojects + +Meson can now directly consume CMake based subprojects with the +CMake module. + +Using CMake subprojects is similar to using the "normal" meson +subprojects. They also have to be located in the `subprojects` +directory. + +Example: + +```cmake +add_library(cm_lib SHARED ${SOURCES}) +``` + +```meson +cmake = import('cmake') + +# Configure the CMake project +sub_proj = cmake.subproject('libsimple_cmake') + +# Fetch the dependency object +cm_lib = sub_proj.dependency('cm_lib') + +executable(exe1, ['sources'], dependencies: [cm_lib]) +``` + +It should be noted that not all projects are guaranteed to work. The +safest approach would still be to create a `meson.build` for the +subprojects in question. + +## Multipe cross files can be specified + +`--cross-file` can be passed multiple times, with the configuration files overlaying the same way as `--native-file`. + diff --git a/meson/docs/markdown/Release-notes-for-0.52.0.md b/meson/docs/markdown/Release-notes-for-0.52.0.md new file mode 100644 index 000000000..018fb3569 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.52.0.md @@ -0,0 +1,248 @@ +--- +title: Release 0.52.0 +short-description: Release notes for 0.52.0 +... + +# New features + +## Gettext targets are ignored if `gettext` is not installed + +Previously the `i18n` module has errored out when `gettext` tools are +not installed on the system. Starting with this version they will +become no-ops instead. This makes it easier to build projects on +minimal environments (such as when bootstrapping) that do not have +translation tools installed. + +## Support taking environment values from a dictionary + +`environment()` now accepts a dictionary as first argument. If +provided, each key/value pair is added into the `environment_object` +as if `set()` method was called for each of them. + +On the various functions that take an `env:` keyword argument, you may +now give a dictionary. + +## alias_target + +``` meson +runtarget alias_target(target_name, dep1, ...) +``` + +This function creates a new top-level target. Like all top-level targets, this +integrates with the selected backend. For instance, with Ninja you can +run it as `ninja target_name`. This is a dummy target that does not execute any +command, but ensures that all dependencies are built. Dependencies can be any +build target (e.g. return value of executable(), custom_target(), etc) + + +## Enhancements to the pkg_config_path argument + +Setting sys_root in the [properties] section of your cross file will now set +PKG_CONFIG_SYSROOT_DIR automatically for host system dependencies when +cross compiling. + +## The meson test program now accepts an additional "--gdb-path" argument to specify the GDB binary + +`meson test --gdb testname` invokes GDB with the specific test case. However, sometimes GDB is not in the path or a GDB replacement is wanted. +Therefore, a `--gdb-path` argument was added to specify which binary is executed (per default `gdb`): + +```console +$ meson test --gdb --gdb-path /my/special/location/for/gdb testname +$ meson test --gdb --gdb-path cgdb testname +``` + +## Better support for illumos and Solaris + +illumos (and hopefully Solaris) support has been dramatically improved, and one +can reasonably expect projects to compile. + +## Splitting of Compiler.get_function_attribute('visibility') + +On macOS there is no `protected` visibility, which results in the visbility +check always failing. 0.52.0 introduces two changes to improve this situation: + +1. the "visibility" check no longer includes "protected" +2. a new set of "split" checks are introduced which check for a single + attribute instead of all attributes. + +These new attributes are: +* visibility:default +* visibility:hidden +* visibility:internal +* visibility:protected + +## Clang-tidy target + +If `clang-tidy` is installed and the project's source root contains a +`.clang-tidy` (or `_clang-tidy`) file, Meson will automatically define +a `clang-tidy` target that runs Clang-Tidy on all source files. + +If you have defined your own `clang-tidy` target, Meson will not +generate its own target. + +## Add blocks dependency + +Add `dependency('blocks')` to use the Clang blocks extension. + +## Meson's builtin b_lundef is now supported on macOS + +This has always been possible, but there are some addtional restrictions on +macOS (mainly do to Apple only features). With the linker internal +re-architecture this has become possible + +## Compiler and dynamic linker representation split + +0.52.0 inclues a massive refactor of the representaitons of compilers to +tease apart the representations of compilers and dynamic linkers (ld). This +fixes a number of compiler/linker combinations. In particular this fixes +use GCC and vanilla clang on macOS. + +## Add `depth` option to `wrap-git` + +To allow shallow cloning, an option `depth` has been added to `wrap-git`. +This applies recursively to submodules when `clone-recursive` is set to `true`. + +Note that the git server may have to be configured to support shallow cloning +not only for branches but also for tags. + +## Enhancements to the source_set module + +`SourceSet` objects now provide the `all_dependencies()` method, that +complement the existing `all_sources()` method. + +## added `--only test(s)` option to run_project_tests.py + +Individual tests or a list of tests from run_project_tests.py can be selected like: +``` +python run_project_tests.py --only fortran + +python run_project_tests.py --only fortran python3 +``` + +This assists Meson development by only running the tests for the portion of Meson being worked on during local development. + +## Experimental Webassembly support via Emscripten + +Meson now supports compiling code to Webassembly using the Emscripten +compiler. As with most things regarding Webassembly, this support is +subject to change. + +## Version check in `find_program()` + +A new `version` keyword argument has been added to `find_program` to specify +the required version. See [`dependency()`](#dependency) for argument format. +The version of the program is determined by running `program_name --version` +command. If stdout is empty it fallbacks to stderr. If the output contains more +text than simply a version number, only the first occurence of numbers separated +by dots is kept. If the output is more complicated than that, the version +checking will have to be done manually using [`run_command()`](#run_command). + +## Added `vs_module_defs` to `shared_module()` + +Like `shared_library()`, `shared_module()` now accepts +`vs_module_defs` argument for controlling symbol exports, etc. + +## Improved support for static libraries + +Static libraries had numerous shortcomings in the past, especially when using +uninstalled static libraries. This release brings many internal changes in the +way they are handled, including: + +- `link_whole:` of static libraries. In the example below, lib2 used to miss + symbols from lib1 and was unusable. +```meson +lib1 = static_library(sources) +lib2 = static_library(other_sources, link_whole : lib1, install : true) +``` +- `link_with:` of a static library with an uninstalled static library. In the +example below, lib2 now implicitly promote `link_with:` to `link_whole:` because +the installed lib2 would oterhwise be unusable. +```meson +lib1 = static_library(sources, install : false) +lib2 = static_library(sources, link_with : lib1, install : true) +``` +- pkg-config generator do not include uninstalled static libraries. In the example + below, the generated `.pc` file used to be unusable because it contained + `Libs.private: -llib1` and `lib1.a` is not installed. `lib1` is now ommitted + from the `.pc` file because the `link_with:` has been promoted to + `link_whole:` (see above) and thus lib1 is not needed to use lib2. +```meson +lib1 = static_library(sources, install : false) +lib2 = both_libraries(sources, link_with : lib1, install : true) +pkg.generate(lib2) +``` + +Many projects have been using `extract_all_objects()` to work around those issues, +and hopefully those hacks could now be removed. Since this is a pretty large +change, please double check if your static libraries behave correctly, and +report any regression. + +## Enhancements to the kconfig module + +`kconfig.load()` may now accept a `configure_file()` as input file. + +## Added `include_type` kwarg to `dependency` + +The `dependency()` function now has a `include_type` kwarg. It can take the +values `'preserve'`, `'system'` and `'non-system'`. If it is set to `'system'`, +all include directories of the dependency are marked as system dependencies. + +The default value of `include_type` is `'preserve'`. + +Additionally, it is also possible to check and change the `include_type` +state of an existing dependency object with the new `include_type()` and +`as_system()` methods. + +## Enhancements to `configure_file()` + +`input:` now accepts multiple input file names for `command:`-configured file. + +`depfile:` keyword argument is now accepted. The dependency file can +list all the additional files the configure target depends on. + +## Projects args can be set separately for build and host machines (potentially breaking change) + +Simplify `native` flag behavior in `add_global_arguments`, +`add_global_link_arguments`, `add_project_arguments` and +`add_project_link_arguments`. The rules are now very simple: + + - `native: true` affects `native: true` targets + + - `native: false` affects `native: false` targets + + - No native flag is the same as `native: false` + +This further simplifies behavior to match the "build vs host" decision done in +last release with `c_args` vs `build_c_args`. The underlying motivation in both +cases is to execute the same commands whether the overall build is native or +cross. + +## Allow checking if a variable is a disabler + +Added the function `is_disabler(var)`. Returns true if a variable is a disabler +and false otherwise. + + +## gtkdoc-check support + +`gnome.gtkdoc()` now has a `check` keyword argument. If `true` runs it will run +`gtkdoc-check` when running unit tests. Note that this has the downside of +rebuilding the doc for each build, which is often very slow. It usually should +be enabled only in CI. + +## `gnome.gtkdoc()` returns target object + +`gnome.gtkdoc()` now returns a target object that can be passed as dependency to +other targets using generated doc files (e.g. in `content_files` of another doc). + +## Dist is now a top level command + +Previously creating a source archive could only be done with `ninja +dist`. Starting with this release Meson provides a top level `dist` +that can be invoked directly. It also has a command line option to +determine which kinds of archives to create: + +```meson +meson dist --formats=xztar,zip +``` + diff --git a/meson/docs/markdown/Release-notes-for-0.53.0.md b/meson/docs/markdown/Release-notes-for-0.53.0.md new file mode 100644 index 000000000..b29759f46 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.53.0.md @@ -0,0 +1,211 @@ +--- +title: Release 0.53.0 +short-description: Release notes for 0.53.0 +... + +# New features + +## A new module for filesystem operations + +The new `fs` module can be used to examine the contents of the current +file system. + +```meson +fs = import('fs') +assert(fs.exists('important_file'), + 'The important file is missing.') +``` + +## meson dist --include-subprojects + +`meson dist` command line now gained `--include-subprojects` command line option. +When enabled, the source tree of all subprojects used by the current build will +also be included in the final tarball. This is useful to distribute self contained +tarball that can be built offline (i.e. `--wrap-mode=nodownload`). + +## Added new Meson templates for `Dlang`, `Rust`, `Objective-C` + +Meson now ships with predefined project templates for `Dlang`, +`Fortran`, `Rust`, `Objective-C`, and by passing the associated flags `d`, +`fortran`, `rust`, `objc` to `meson init --language`. + +## Add a new summary() function + +A new function [`summary()`](Reference-manual.md#summary) has been added to +summarize build configuration at the end of the build process. + +Example: +```meson +project('My Project', version : '1.0') +summary({'bindir': get_option('bindir'), + 'libdir': get_option('libdir'), + 'datadir': get_option('datadir'), + }, section: 'Directories') +summary({'Some boolean': false, + 'Another boolean': true, + 'Some string': 'Hello World', + 'A list': ['string', 1, true], + }, section: 'Configuration') +``` + +Output: +``` +My Project 1.0 + + Directories + prefix: /opt/gnome + bindir: bin + libdir: lib/x86_64-linux-gnu + datadir: share + + Configuration + Some boolean: False + Another boolean: True + Some string: Hello World + A list: string + 1 + True +``` + +## Generic Overrider for Dynamic Linker selection + +Previous to meson 0.52.0 you set the dynamic linker using compiler specific +flags passed via language flags and hoped things worked out. In meson 0.52.0 +meson started detecting the linker and making intelligent decisions about +using it. Unfortunately this broke choosing a non-default linker. + +Now there is a generic mechanism for doing this. In 0.53.0, you can use the `LD` +environment variable. **In 0.53.1** this was changed to `_LD`, +such as `CC_LD`, `CXX_LD`, `D_LD`, etc due to regressions. The usual meson +[environment variable rules](https://mesonbuild.com/Running-Meson.html#environment-variables) +apply. Alternatively, you can add the following to a cross or native file: + +In 0.53.0: + +```ini +[binaries] +ld = 'gold' +``` + +**In 0.53.1 or newer**: + +```ini +[binaries] +c = 'gcc' +c_ld = 'gold' +``` + +```ini +[binaries] +c = 'clang' +c_ld = 'lld' +``` + +And meson will select the linker if possible. + +## `fortran_std` option + +**new in 0.53.0** +Akin to the `c_std` and `cpp_std` options, the `fortran_std` option sets Fortran compilers to warn or error on non-Fortran standard code. +Only the Gfortran and Intel Fortran compilers have support for this option. +Other Fortran compilers ignore the `fortran_std` option. + +Supported values for `fortran_std` include: + +* `legacy` for non-conforming code--this is especially important for Gfortran, which by default errors on old non-compliant Fortran code +* `f95` for Fortran 95 compliant code. +* `f2003` for Fortran 2003 compliant code. +* `f2008` for Fortran 2008 compliant code. +* `f2018` for Fortran 2018 compliant code. + +## python.dependency() embed kwarg + +Added the `embed` kwarg to the python module dependency function to select +the python library that can be used to embed python into an application. + +## Scalapack + +added in **0.53.0**: + +```meson +scalapack = dependency('scalapack') +``` + +Historically and through today, typical Scalapack setups have broken and incomplete pkg-config or +FindScalapack.cmake. Meson handles finding Scalapack on setups including: + +* Linux: Intel MKL or OpenMPI + Netlib +* MacOS: Intel MKL or OpenMPI + Netlib +* Windows: Intel MKL (OpenMPI not available on Windows) + +## Search directories for `find_program()` + +It is now possible to give a list of absolute paths where `find_program()` should +also search, using the `dirs` keyword argument. + +For example on Linux `/sbin` and `/usr/sbin` are not always in the `$PATH`: +```meson +prog = find_program('mytool', dirs : ['/usr/sbin', '/sbin']) +``` + +## Source tags targets + +When the respective tools are available, 'ctags', 'TAGS' and 'cscope' +targets will be generated by Meson, unless you have defined your own. + +## Dictionary entry using string variable as key + +Keys can now be any expression evaluating to a string value, not limited +to string literals any more. +```meson +d = {'a' + 'b' : 42} +k = 'cd' +d += {k : 43} +``` + +## Improved CMake subprojects support + +With this release even more CMake projects are supported via +[CMake subprojects](CMake-module.md#cmake-subprojects) due to these internal +improvements: + +- Use the CMake file API for CMake >=3.14 +- Handle the explicit dependencies via `add_dependency` +- Basic support for `add_custom_target` +- Improved `add_custom_command` support +- Object library support on Windows + +## compiler.get_linker_id() + +since 0.53.0, `compiler.get_linker_id()` allows retrieving a lowercase name for the linker. +Since each compiler family can typically use a variety of linkers depending on operating system, +this helps users define logic for corner cases not otherwise easily handled. + +## CUDA dependency + +Native support for compiling and linking against the CUDA Toolkit using +the `dependency` function: + +```meson +project('CUDA test', 'cpp', meson_version: '>= 0.53.0') +exe = executable('prog', 'prog.cc', dependencies: dependency('cuda')) +``` + +See [the CUDA dependency](Dependencies.md#cuda) for more information. + +## Added global option to disable C++ RTTI + +The new boolean option is called `cpp_rtti`. + +## Introspection API changes + +dependencies (--dependencies, intro-dependencies.json): +- added the `version` key + +scanning dependencies (--scan-dependencies): +- added the `version` key containing the required dependency version + +tests and benchmarks (--tests, --benchmarks, intro-tests.json, +intro-benchmarks.json): +- added the `protocol` key + diff --git a/meson/docs/markdown/Release-notes-for-0.54.0.md b/meson/docs/markdown/Release-notes-for-0.54.0.md new file mode 100644 index 000000000..2f215de47 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.54.0.md @@ -0,0 +1,366 @@ +--- +title: Release 0.54.0 +short-description: Release notes for 0.54.0 +... + +# New features + +## Emscripten (emcc) now supports threads + +In addition to properly setting the compile and linker arguments, a new meson +builtin has been added to control the PTHREAD_POOL_SIZE option, +`-D_thread_count`, which may be set to any integer value greater than 0. +If it set to 0 then the PTHREAD_POOL_SIZE option will not be passed. + +## Introduce dataonly for the pkgconfig module +This allows users to disable writing out the inbuilt variables to +the pkg-config file as they might actually not be required. + +One reason to have this is for architecture-independent pkg-config +files in projects which also have architecture-dependent outputs. + +``` +pkgg.generate( + name : 'libhello_nolib', + description : 'A minimalistic pkgconfig file.', + version : libver, + dataonly: true +) +``` + +## Consistently report file locations relative to cwd + +The paths for filenames in error and warning locations are now consistently +reported relative to the current working directory (when possible), or as +absolute paths (when a relative path does not exist, e.g. a Windows path +starting with a different drive letter to the current working directory). + +(The previous behaviour was to report a path relative to the source root for all +warnings and most errors, and relative to cwd for certain parser errors) + +## `dependency()` consistency + +The first time a dependency is found, using `dependency('foo', ...)`, the return +value is now cached. Any subsequent call will return the same value as long as +version requested match, otherwise not-found dependency is returned. This means +that if a system dependency is first found, it won't fallback to a subproject +in a subsequent call any more and will rather return not-found instead if the +system version does not match. Similarly, if the first call returns the subproject +fallback dependency, it will also return the subproject dependency in a subsequent +call even if no fallback is provided. + +For example, if the system has `foo` version 1.0: +```meson +# d2 is set to foo_dep and not the system dependency, even without fallback argument. +d1 = dependency('foo', version : '>=2.0', required : false, + fallback : ['foo', 'foo_dep']) +d2 = dependency('foo', version : '>=1.0', required : false) +``` +```meson +# d2 is not-found because the first call returned the system dependency, but its version is too old for 2nd call. +d1 = dependency('foo', version : '>=1.0', required : false) +d2 = dependency('foo', version : '>=2.0', required : false, + fallback : ['foo', 'foo_dep']) +``` + +## Override `dependency()` + +It is now possible to override the result of `dependency()` to point +to any dependency object you want. The overriding is global and applies to +every subproject from there on. + +For example, this subproject provides 2 libraries with version 2.0: + +```meson +project(..., version : '2.0') + +libfoo = library('foo', ...) +foo_dep = declare_dependency(link_with : libfoo) +meson.override_dependency('foo', foo_dep) + +libbar = library('bar', ...) +bar_dep = declare_dependency(link_with : libbar) +meson.override_dependency('bar', bar_dep) +``` + +Assuming the system has `foo` and `bar` 1.0 installed, and master project does this: +```meson +foo_dep = dependency('foo', version : '>=2.0', fallback : ['foo', 'foo_dep']) +bar_dep = dependency('bar') +``` + +This used to mix system 1.0 version and subproject 2.0 dependencies, but thanks +to the override `bar_dep` is now set to the subproject's version instead. + +Another case this can be useful is to force a subproject to use a specific dependency. +If the subproject does `dependency('foo')` but the main project wants to provide +its own implementation of `foo`, it can for example call +`meson.override_dependency('foo', declare_dependency(...))` before configuring the +subproject. + +## Simplified `dependency()` fallback + +In the case a subproject `foo` calls `meson.override_dependency('foo-2.0', foo_dep)`, +the parent project can omit the dependency variable name in fallback keyword +argument: `dependency('foo-2.0', fallback : 'foo')`. + +## Backend agnostic compile command + +A new `meson compile` command has been added to support backend agnostic +compilation. It accepts two arguments, `-j` and `-l`, which are used if +possible (`-l` does nothing with msbuild). A `-j` or `-l` value < 1 lets the +backend decide how many threads to use. For msbuild this means `-m`, for +ninja it means passing no arguments. + +```console +meson builddir --backend vs +meson compile -C builddir -j0 # this is the same as `msbuild builddir/my.sln -m` +``` + +```console +meson builddir +meson compile -C builddir -j3 # this is the same as `ninja -C builddir -j3` +``` + +Additionally `meson compile` provides a `--clean` switch to clean the project. + +A complete list of arguments is always documented via `meson compile --help` + +## Native (build machine) compilers not always required + +`add_languages()` gained a `native:` keyword, indicating if a native or cross +compiler is to be used. + +For the benefit of existing simple build definitions which don't contain any +`native: true` targets, without breaking backwards compatibility for build +definitions which assume that the native compiler is available after +`add_languages()`, if the `native:` keyword is absent the languages may be used +for either the build or host machine, but are never required for the build +machine. + +This changes the behaviour of the following meson fragment (when cross-compiling +but a native compiler is not available) from reporting an error at +`add_language` to reporting an error at `executable`. + +``` +add_language('c') +executable('main', 'main.c', native: true) +``` + +## Summary improvements + +A new `list_sep` keyword argument has been added to `summary()` function. +If defined and the value is a list, elements will be separated by the provided +string instead of being aligned on a new line. + +The automatic `subprojects` section now also print the number of warnings encountered +during that subproject configuration, or the error message if the configuration failed. + +## Add a system type dependency for zlib + +This allows zlib to be detected on macOS and FreeBSD without the use of +pkg-config or cmake, neither of which are part of the base install on those +OSes (but zlib is). + +A side effect of this change is that `dependency('zlib')` also works with +cmake instead of requiring `dependency('ZLIB')`. + +## Added 'name' method +Build target objects (as returned by executable(), library(), ...) now have a name() method. + +## New option `--quiet` to `meson install` + +Now you can run `meson install --quiet` and meson will not verbosely print +every file as it is being installed. As before, the full log is always +available inside the builddir in `meson-logs/install-log.txt`. + +When this option is passed, install scripts will have the environment variable +`MESON_INSTALL_QUIET` set. + +Numerous speed-ups were also made for the install step, especially on Windows +where it is now 300% to 1200% faster than before depending on your workload. + +## Property support emscripten's wasm-ld + +Before 0.54.0 we treated emscripten as both compiler and linker, which isn't +really true. It does have a linker, called wasm-ld (meson's name is ld.wasm). +This is a special version of clang's lld. This will now be detected properly. + +## Skip sanity tests when cross compiling + +For certain cross compilation environments it is not possible to +compile a sanity check application. This can now be disabled by adding +the following entry to your cross file's `properties` section: + +``` +skip_sanity_check = true +``` + +## Support for overiding the linker with ldc and gdc + +LDC (the llvm D compiler) and GDC (The Gnu D Compiler) now honor D_LD linker +variable (or d_ld in the cross file) and is able to pick differnt linkers. + +GDC supports all of the same values as GCC, LDC supports ld.bfd, ld.gold, +ld.lld, ld64, link, and lld-link. + +## Native file properties + +As of Meson 0.54.0, the `--native-file nativefile.ini` can contain: + +* binaries +* paths +* properties + +which are defined and used the same way as in cross files. +The `properties` are new for Meson 0.54.0, and are read like: + +```meson +x = meson.get_external_property('foobar', 'foo') +``` + +where `foobar` is the property name, and the optional `foo` is the fallback string value. + +For cross-compiled projects, `get_external_property()` reads the cross-file unless `native: true` is specified. + +## Changed the signal used to terminate a test process (group) + +A test process (group) is now terminated via SIGTERM instead of SIGKILL +allowing the signal to be handled. However, it is now the responsibility of +the custom signal handler (if any) to ensure that any process spawned by the +top-level test processes is correctly killed. + +## Dynamic Linker environment variables actually match docs + +The docs have always claimed that the Dynamic Linker environment variable +should be `${COMPILER_VAR}_LD`, but that's only the case for about half of +the variables. The other half are different. In 0.54.0 the variables match. +The old variables are still supported, but are deprecated and raise a +deprecation warning. + +## Per subproject `default_library` and `werror` options + +The `default_library` and `werror` built-in options can now be defined per subproject. +This is useful for example when building shared libraries in the main project, +but static link a subproject, or when the main project must build with no warnings +but some subprojects cannot. + +Most of the time this would be used either by the parent project by setting +subproject's default_options (e.g. `subproject('foo', default_options: 'default_library=static')`), +or by the user using the command line `-Dfoo:default_library=static`. + +The value is overriden in this order: +- Value from parent project +- Value from subproject's default_options if set +- Value from subproject() default_options if set +- Value from command line if set + +## Environment Variables with Cross Builds + +Previously in Meson, variables like `CC` effected both the host and build +platforms for native builds, but the just the build platform for cross builds. +Now `CC_FOR_BUILD` is used for the build platform in cross builds. + +This old behavior is inconsistent with the way Autotools works, which +undermines the purpose of distro-integration that is the only reason +environment variables are supported at all in Meson. The new behavior is not +quite the same, but doesn't conflict: meson doesn't always repond to an +environment when Autoconf would, but when it does it interprets it as Autotools +would. + +## Added 'pkg_config_libdir' property +Allows to define a list of folders used by pkg-config for a cross build +and avoid a system directories use. + +## More new sample Meson templates for (`Java`, `Cuda`, and more) + +Meson now ships with predefined project templates for `Java`, +`Cuda`, `Objective-C++`, and `C#`, we provided with associated +values for corresponding languages, avalable for both library, +and executable. + +## Ninja version requirement bumped to 1.7 + +Meson now uses the [Implicit outputs](https://ninja-build.org/manual.html#ref_outputs) +feature of Ninja for some types of targets that have multiple outputs which may +not be listed on the command-line. This feature requires Ninja 1.7+. + +Note that the latest version of [Ninja available in Ubuntu 16.04](https://packages.ubuntu.com/search?keywords=ninja-build&searchon=names&suite=xenial-backports§ion=all) +(the oldest Ubuntu LTS at the time of writing) is 1.7.1. If your distro does +not ship with a new-enough Ninja, you can download the latest release from +Ninja's GitHub page: https://github.com/ninja-build/ninja/releases + +## Added `-C` argument to `meson init` command + +The meson init assumes that it is run inside the project +root directory. If this isn't the case, you can now use +`-C` to specify the actual project source directory. + +## More than one argument to `message()` and `warning()` + +Arguments passed to `message()` and `warning()` will be printed separated by +space. + +## Added `has_tools` method to qt module + +It should be used to compile optional Qt code: +```meson +qt5 = import('qt5') +if qt5.has_tools(required: get_option('qt_feature')) + moc_files = qt5.preprocess(...) + ... +endif +``` + +## The MSI installer is only available in 64 bit version + +Microsoft ended support for Windows 7, so only 64 bit Windows OSs are +officially supported. Thus only a 64 bit MSI installer will be +provided going forward. People needing a 32 bit version can build +their own with the `msi/createmsi.py` script in Meson's source +repository. + +## Uninstalled pkg-config files + +**Note**: the functionality of this module is governed by [Meson's + rules on mixing build systems](Mixing-build-systems.md). + +The `pkgconfig` module now generates uninstalled pc files as well. For any generated +`foo.pc` file, an extra `foo-uninstalled.pc` file is placed into +`/meson-uninstalled`. They can be used to build applications against +libraries built by meson without installing them, by pointing `PKG_CONFIG_PATH` +to that directory. This is an experimental feature provided on a best-effort +basis, it might not work in all use-cases. + +## CMake find_package COMPONENTS support + +It is now possible to pass components to the CMake dependency backend via the +new `components` kwarg in the `dependency` function. + +## Added Microchip XC16 C compiler support +Make sure compiler executables are setup correctly in your path +Compiler is available from the Microchip website for free + + +## Added Texas Instruments C2000 C/C++ compiler support +Make sure compiler executables are setup correctly in your path +Compiler is available from Texas Instruments website for free + +## Unity file block size is configurable + +Traditionally the unity files that Meson autogenerates contain all +source files that belong to a single target. This is the most +efficient setting for full builds but makes incremental builds slow. +This release adds a new option `unity_size` which specifies how many +source files should be put in each unity file. + +The default value for block size is 4. This means that if you have a +target that has eight source files, Meson will generate two unity +files each of which includes four source files. The old behaviour can +be replicated by setting `unity_size` to a large value, such as 10000. + +## Verbose mode for `meson compile` + +The new option `--verbose` has been added to `meson compile` that will enable +more verbose compilation logs. Note that for VS backend it means that logs will +be less verbose by default (without `--verbose` option). diff --git a/meson/docs/markdown/Release-notes-for-0.55.0.md b/meson/docs/markdown/Release-notes-for-0.55.0.md new file mode 100644 index 000000000..cd3f79514 --- /dev/null +++ b/meson/docs/markdown/Release-notes-for-0.55.0.md @@ -0,0 +1,315 @@ +--- +title: Release 0.55.0 +short-description: Release notes for 0.55.0 +... + +# New features + +## rpath removal now more careful + +On Linux-like systems, meson adds rpath entries to allow running apps +in the build tree, and then removes those build-time-only +rpath entries when installing. Rpath entries may also come +in via LDFLAGS and via .pc files. Meson used to remove those +latter rpath entries by accident, but is now more careful. + +## Added ability to specify targets in `meson compile` + +It's now possible to specify targets in `meson compile`, which will result in building only the requested targets. + +Usage: `meson compile [TARGET [TARGET...]]` +`TARGET` has the following syntax: `[PATH/]NAME[:TYPE]`. +`NAME`: name of the target from `meson.build` (e.g. `foo` from `executable('foo', ...)`). +`PATH`: path to the target relative to the root `meson.build` file. Note: relative path for a target specified in the root `meson.build` is `./`. +`TYPE`: type of the target (e.g. `shared_library`, `executable` and etc) + +`PATH` and/or `TYPE` can be ommited if the resulting `TARGET` can be used to uniquely identify the target in `meson.build`. + +For example targets from the following code: +```meson +shared_library('foo', ...) +static_library('foo', ...) +executable('bar', ...) +``` +can be invoked with `meson compile foo:shared_library foo:static_library bar`. + +## Test protocol for gtest + +Due to the popularity of Gtest (google test) among C and C++ developers meson +now supports a special protocol for gtest. With this protocol meson injects +arguments to gtests to output JUnit, reads that JUnit, and adds the output to +the JUnit it generates. + +## meson.add_*_script methods accept new types + +All three (`add_install_script`, `add_dist_script`, and +`add_postconf_script`) now accept ExternalPrograms (as returned by +`find_program`), Files, and the output of `configure_file`. The dist and +postconf methods cannot accept other types because of when they are run. +While dist could, in theory, take other dependencies, it would require more +extensive changes, particularly to the backend. + +```meson +meson.add_install_script(find_program('foo'), files('bar')) +meson.add_dist_script(find_program('foo'), files('bar')) +meson.add_postconf_script(find_program('foo'), files('bar')) +``` + +The install script variant is also able to accept custom_targets, +custom_target indexes, and build targets (executables, libraries), and can +use built executables a the script to run + +```meson +installer = executable('installer', ...) +meson.add_install_script(installer, ...) +meson.add_install_script('foo.py', installer) +``` + +## Machine file constants + +Native and cross files now support string and list concatenation using the `+` +operator, and joining paths using the `/` operator. +Entries defined in the `[constants]` section can be used in any other section. +An entry defined in any other section can be used only within that same section and only +after it has been defined. + +```ini +[constants] +toolchain = '/toolchain' +common_flags = ['--sysroot=' + toolchain + '/sysroot'] + +[properties] +c_args = common_flags + ['-DSOMETHING'] +cpp_args = c_args + ['-DSOMETHING_ELSE'] + +[binaries] +c = toolchain + '/gcc' +``` + +## Configure CMake subprojects with meson.subproject_options + +Meson now supports passing configuration options to CMake and overriding +certain build details extracted from the CMake subproject. + +The new CMake configuration options object is very similar to the +[configuration data object](Reference-manual.md#configuration-data-object) object +returned by [`configuration_data`](Reference-manual.md#configuration_data). It +is generated by the `subproject_options` function + +All configuration options have to be set *before* the subproject is configured +and must be passed to the `subproject` method via the `options` key. Altering +the configuration object won't have any effect on previous `cmake.subproject` +calls. + +**Note:** The `cmake_options` kwarg for the `subproject` function is now +deprecated since it is replaced by the new `options` system. + +## find_program: Fixes when the program has been overridden by executable + +When a program has been overridden by an executable, the returned object of +find_program() had some issues: + +```meson +# In a subproject: +exe = executable('foo', ...) +meson.override_find_program('foo', exe) + +# In main project: +# The version check was crashing meson. +prog = find_program('foo', version : '>=1.0') + +# This was crashing meson. +message(prog.path()) + +# New method to be consistent with built objects. +message(prog.full_path()) +``` + +## Response files enabled on Linux, reined in on Windows + +Meson used to always use response files on Windows, +but never on Linux. + +It now strikes a happier balance, using them on both platforms, +but only when needed to avoid command line length limits. + +## `unstable-kconfig` module renamed to `unstable-keyval` + +The `unstable-kconfig` module is now renamed to `unstable-keyval`. +We expect this module to become stable once it has some usage experience, +specifically in the next or the following release + + +## Fatal warnings in `gnome.generate_gir()` + +`gnome.generate_gir()` now has `fatal_warnings` keyword argument to abort when +a warning is produced. This is useful for example in CI environment where it's +important to catch potential issues. + +## b_ndebug support for D language compilers + +D Language compilers will now set -release/--release/-frelease (depending on +the compiler) when the b_ndebug flag is set. + +## Meson test now produces JUnit xml from results + +Meson will now generate a JUnit compatible XML file from test results. it +will be in the meson-logs directory and is called testlog.junit.xml. + +## Config tool based dependencies no longer search PATH for cross compiling + +Before 0.55.0 config tool based dependencies (llvm-config, cups-config, etc), +would search system $PATH if they weren't defined in the cross file. This has +been a source of bugs and has been deprecated. It is now removed, config tool +binaries must be specified in the cross file now or the dependency will not +be found. + +## Rename has_exe_wrapper -> can_run_host_binaries + +The old name was confusing as it didn't really match the behavior of the +function. The old name remains as an alias (the behavior hasn't changed), but +is now deprecated. + +## String concatenation in meson_options.txt + +It is now possible to use string concatenation (with the `+` opperator) in the +meson_options.txt file. This allows splitting long option descriptions. + +```meson +option( + 'testoption', + type : 'string', + value : 'optval', + description : 'An option with a very long description' + + 'that does something in a specific context' +) +``` + +## Wrap fallback URL + +Wrap files can now define `source_fallback_url` and `patch_fallback_url` to be +used in case the main server is temporaly down. + +## Clang coverage support + +llvm-cov is now used to generate coverage information when clang is used as +the compiler. + +## Local wrap source and patch files + +It is now possible to use the `patch_filename` and `source_filename` value in a +`.wrap` file without `*_url` to specify a local source / patch file. All local +files must be located in the `subprojects/packagefiles` directory. The `*_hash` +entries are optional with this setup. + +## Local wrap patch directory + +Wrap files can now specify `patch_directory` instead of `patch_filename` in the +case overlay files are local. Every files in that directory, and subdirectories, +will be copied to the subproject directory. This can be used for example to add +`meson.build` files to a project not using Meson build system upstream. +The patch directory must be placed in `subprojects/packagefiles` directory. + +## Patch on all wrap types + +`patch_*` keys are not limited to `wrap-file` any more, they can be specified for +all wrap types. + +## link_language argument added to all targets + +Previously the `link_language` argument was only supposed to be allowed in +executables, because the linker used needs to be the linker for the language +that implements the main function. Unfortunately it didn't work in that case, +and, even worse, if it had been implemented properly it would have worked for +*all* targets. In 0.55.0 this restriction has been removed, and the bug fixed. +It now is valid for `executable` and all derivative of `library`. + +## meson dist --no-tests + +`meson dist` has a new option `--no-tests` to skip build and tests of generated +packages. It can be used to not waste time for example when done in CI that +already does its own testing. + +## Force fallback for + +A newly-added `--force-fallback-for` command line option can now be used to +force fallback for specific subprojects. + +Example: + +``` +meson build --force-fallback-for=foo,bar +``` + +## Implicit dependency fallback + +`dependency('foo')` now automatically fallback if the dependency is not found on +the system but a subproject wrap file or directory exists with the same name. + +That means that simply adding `subprojects/foo.wrap` is enough to add fallback +to any `dependency('foo')` call. It is however requires that the subproject call +`meson.override_dependency('foo', foo_dep)` to specify which dependency object +should be used for `foo`. + +## Wrap file `provide` section + +Wrap files can define the dependencies it provides in the `[provide]` section. +When `foo.wrap` provides the dependency `foo-1.0` any call do `dependency('foo-1.0')` +will automatically fallback to that subproject even if no `fallback` keyword +argument is given. See [Wrap documentation](Wrap-dependency-system-manual.md#provide_section). + +## `find_program()` fallback + +When a program cannot be found on the system but a wrap file has its name in the +`[provide]` section, that subproject will be used as fallback. + +## Test scripts are given the exe wrapper if needed + +Meson will now set the `MESON_EXE_WRAPPER` as the properly wrapped and joined +representation. For Unix-like OSes this means python's shelx.join, on Windows +an implementation that attempts to properly quote windows argument is used. +This allow wrapper scripts to run test binaries, instead of just skipping. + +for example, if the wrapper is `['emulator', '--script']`, it will be passed +as `MESON_EXE_WRAPPER="emulator --script"`. + +## Added ability to specify backend arguments in `meson compile` + +It's now possible to specify backend specific arguments in `meson compile`. + +Usage: `meson compile [--vs-args=args] [--ninja-args=args]` + +``` + --ninja-args NINJA_ARGS Arguments to pass to `ninja` (applied only on `ninja` backend). + --vs-args VS_ARGS Arguments to pass to `msbuild` (applied only on `vs` backend). +``` + +These arguments use the following syntax: + +If you only pass a single string, then it is considered to have all values separated by commas. Thus invoking the following command: + +``` +$ meson compile --ninja-args=-n,-d,explain +``` + +would add `-n`, `-d` and `explain` arguments to ninja invocation. + +If you need to have commas or spaces in your string values, then you need to pass the value with proper shell quoting like this: + +``` +$ meson compile "--ninja-args=['a,b', 'c d']" +``` + +## Introspection API changes + +dumping the AST (--ast): **new in 0.55.0** +- prints the AST of a meson.build as JSON + +## `--backend=vs` now matches `-Db_vscrt=from_buildtype` behaviour in the Ninja backend + +When `--buildtype=debugoptimized` is used with the Ninja backend, the VS CRT +option used is `/MD`, which is the [behaviour documented for all +backends](https://mesonbuild.com/Builtin-options.html#b_vscrt-from_buildtype). +However, the Visual Studio backend was pass `/MT` in that case, which is inconsistent. + +If you need to use the MultiThreaded CRT, you should explicitly pass `-Db_vscrt=mt` diff --git a/meson/docs/markdown/Release-notes.md b/meson/docs/markdown/Release-notes.md new file mode 100644 index 000000000..d7de275fc --- /dev/null +++ b/meson/docs/markdown/Release-notes.md @@ -0,0 +1 @@ +# Release notes diff --git a/meson/docs/markdown/Release-procedure.md b/meson/docs/markdown/Release-procedure.md new file mode 100644 index 000000000..caf497308 --- /dev/null +++ b/meson/docs/markdown/Release-procedure.md @@ -0,0 +1,64 @@ +# Release procedure + +**This page is WIP. The following procedure is not yet approved for use** + +# Trunk + +Meson operates under the principle that trunk should (in theory) be always +good enough for release. That is, all code merged in trunk must pass all unit +tests. Any broken code should either be fixed or reverted immediately. + +People who are willing to tolerate the occasional glitch should be able to +use Meson trunk for their day to day development if they so choose. + +# Major releases + +Major releases are currently in the form 0.X.0, where X is an increasing +number. We aim to do a major release roughly once a month, though the +schedule is not set in stone. + +Before a major release is made a stable branch will be made, and 0.X.0-rc1 +release candidate will be made. A new milestone for 0.X.0 will be made, and +all bugs effecting the RC will be assigned to this milestone. Patches fixing +bugs in the milestone will be picked to the stable branch, and normal +development will continue on the master branch. Every week after after this a +new release candidate will be made until all bugs are resolved in that +milestone. When all of the bugs are fixed the 0.X.0 release will be made. + +# Bugfix releases + +Bugfix releases contain only minor fixes to major releases and are designated +by incrementing the last digit of the version number. The criteria for a bug +fix release is one of the following: + + - release has a major regression compared to the previous release (making + existing projects unbuildable) + - the release has a serious bug causing data loss or equivalent + - other unforeseen major issue + +In these cases a bug fix release can be made. It shall contain _only_ the fix +for the issue (or issues) in question and other minor bug fixes. Only changes +that have already landed in trunk will be considered for inclusion. No new +functionality shall be added. + +# Requesting a bug fix release + +The process for requesting that a bug fix release be made goes roughly as follows: + + - file a bug about the core issue + - file a patch fixing it if possible + - contact the development team and request a bug fix release (IRC is the + preferred contact medium) + +The request should contain the following information: + + - the issue in question + - whether it has already caused problems for real projects + - an estimate of how many people and projects will be affected + +There is no need to write a long and complicated request report. Something like the following is sufficient: + +> The latest release has a regression where trying to do Foo using Bar +breaks. This breaks all projects that use both, which includes at least [list +of affected projects]. This causes problems for X amount of people and +because of this we should do a bugfix release. diff --git a/meson/docs/markdown/Reproducible-builds.md b/meson/docs/markdown/Reproducible-builds.md new file mode 100644 index 000000000..1e00feae8 --- /dev/null +++ b/meson/docs/markdown/Reproducible-builds.md @@ -0,0 +1,20 @@ +# Reproducible builds + +A reproducible build means the following (as quoted from [the +reproducible builds project site](https://reproducible-builds.org/)): + +> Reproducible builds are a set of software development practices that + create a verifiable path from human readable source code to the + binary code used by computers. + +Roughly what this means is that if two different people compile the +project from source, their outputs are bitwise identical to each +other. This allows people to verify that binaries downloadable from +the net actually come from the corresponding sources and have not, for +example, had malware added to them. + +Meson aims to support reproducible builds out of the box with zero +additional work (assuming the rest of the build environment is set up +for reproducibility). If you ever find a case where this is not +happening, it is a bug. Please file an issue with as much information +as possible and we'll get it fixed. diff --git a/meson/docs/markdown/Rewriter.md b/meson/docs/markdown/Rewriter.md new file mode 100644 index 000000000..641448007 --- /dev/null +++ b/meson/docs/markdown/Rewriter.md @@ -0,0 +1,236 @@ +--- +short-description: Automatic modification of the build system files +... + +# Meson file rewriter + +Since version 0.50.0, meson has the functionality to perform some basic +modification on the `meson.build` files from the command line. The currently +supported operations are: + +- For build targets: + - Add/Remove source files + - Add/Remove targets + - Modify a select set of kwargs + - Print some JSON information +- For dependencies: + - Modify a select set of kwargs +- For the project function: + - Modify a select set of kwargs + - Modify the default options list + +The rewriter has both, a normal command line interface and a "script mode". The +normal CLI is mostly designed for everyday use. The "script mode", on the +other hand, is meant to be used by external programs (IDEs, graphical +frontends, etc.) + +The rewriter itself is considered stable, however the user interface and the +"script mode" API might change in the future. These changes may also break +backwards comaptibility to older releases. + +We are also open to suggestions for API improvements. + +## Using the rewriter + +All rewriter functions are accessed via `meson rewrite`. The meson rewriter +assumes that it is run inside the project root directory. If this isn't the +case, use `--sourcedir` to specify the actual project source directory. + +### Adding and removing sources + +The most common operations will probably be the adding and removing of source +files to a build target. This can be easily done with: + +```bash +meson rewrite target {add/rm} [list of sources] +``` + +For instance, given the following example + +```meson +src = ['main.cpp', 'fileA.cpp'] + +exe1 = executable('testExe', src) +``` + +the source `fileB.cpp` can be added with: + +```bash +meson rewrite target testExe add fileB.cpp +``` + +After executing this command, the new `meson.build` will look like this: + +```meson +src = ['main.cpp', 'fileA.cpp', 'fileB.cpp'] + +exe1 = executable('testExe', src) +``` + +In this case, `exe1` could also have been used for the target name. This is +possible because the rewriter also searches for assignments and unique meson +IDs, which can be acquired with introspection. If there are multiple targets +with the same name, meson will do nothing and print an error message. + +For more information see the help output of the rewriter target command. + +### Setting the project version + +It is also possible to set kwargs of specific functions with the rewriter. The +general command for setting or removing kwargs is: + +```bash +meson rewrite kwargs {set/delete} ... +``` + +For instance, setting the project version can be achieved with this command: + +```bash +meson rewrite kwargs set project / version 1.0.0 +``` + +Currently, only the following function types are supported: + +- dependency +- target (any build target, the function ID is the target name/ID) +- project (the function ID must be `/` since project() can only be called once) + +For more information see the help output of the rewrite kwargs command. + +### Setting the project default options + +For setting and deleting default options, use the following command: + +```bash +meson rewrite default-options {set/delete} ... +``` + +## Limitations + +Rewriting a meson file is not guaranteed to keep the indentation of the modified +functions. Additionally, comments inside a modified statement will be removed. +Furthermore, all source files will be sorted alphabetically. + +For instance adding `e.c` to srcs in the following code + +```meson +# Important comment + +srcs = [ +'a.c', 'c.c', 'f.c', +# something important about b + 'b.c', 'd.c', 'g.c' +] + +# COMMENT +``` + +would result in the following code: + +```meson +# Important comment + +srcs = [ + 'a.c', + 'b.c', + 'c.c', + 'd.c', + 'e.c', + 'f.c', + 'g.c' +] + +# COMMENT +``` + +## Using the "script mode" + +The "script mode" should be the preferred API for third party programs, since +it offers more flexibility and higher API stability. The "scripts" are stored +in JSON format and executed with `meson rewrite command `. + +The JSON format is defined as follows: + +```json +[ + { + "type": "function to execute", + ... + }, { + "type": "other function", + ... + }, + ... +] +``` + +Each object in the main array must have a `type` entry which specifies which +function should be executed. + +Currently, the following functions are supported: + +- target +- kwargs +- default_options + +### Target modification format + +The format for the type `target` is defined as follows: + +```json +{ + "type": "target", + "target": "target ID/name/assignment variable", + "operation": "one of ['src_add', 'src_rm', 'target_rm', 'target_add', 'info']", + "sources": ["list", "of", "source", "files", "to", "add, remove"], + "subdir": "subdir where the new target should be added (only has an effect for operation 'tgt_add')", + "target_type": "function name of the new target -- same as in the CLI (only has an effect for operation 'tgt_add')" +} +``` + +The keys `sources`, `subdir` and `target_type` are optional. + +### kwargs modification format + +The format for the type `target` is defined as follows: + +```json +{ + "type": "kwargs", + "function": "one of ['dependency', 'target', 'project']", + "id": "function ID", + "operation": "one of ['set', 'delete', 'add', 'remove', 'remove_regex', 'info']", + "kwargs": { + "key1": "value1", + "key2": "value2", + ... + } +} +``` + +### Default options modification format + +The format for the type `default_options` is defined as follows: + +```json +{ + "type": "default_options", + "operation": "one of ['set', 'delete']", + "options": { + "opt1": "value1", + "opt2": "value2", + ... + } +} +``` + +For operation `delete`, the values of the `options` can be anything (including `null`) + +## Extracting information + +The rewriter also offers operation `info` for the types `target` and `kwargs`. +When this operation is used, meson will print a JSON dump to stderr, containing +all available information to the rewriter about the build target / function +kwargs in question. + +The output format is currently experimental and may change in the future. diff --git a/meson/docs/markdown/Run-targets.md b/meson/docs/markdown/Run-targets.md new file mode 100644 index 000000000..b584bf769 --- /dev/null +++ b/meson/docs/markdown/Run-targets.md @@ -0,0 +1,40 @@ +--- +short-description: Targets to run external commands +... + +# Run targets + +Sometimes you need to have a target that just runs an external command. As an example you might have a build target that reformats your source code, runs `cppcheck` or something similar. In Meson this is accomplished with a so called *run target*. + +The recommended way of doing this is writing the command(s) you want to run to a script file. Here's an example script. + +```bash +#!/bin/sh + +cd "${MESON_SOURCE_ROOT}" +inspector_command -o "${MESON_BUILD_ROOT}/inspection_result.txt" +``` + +Note the two environment variables `MESON_SOURCE_ROOT` and `MESON_BUILD_ROOT`. These are absolute paths to your project's source and build directories and they are automatically set up by Meson. In addition to these Meson also sets up the variable `MESON_SUBDIR`, which points to the subdirectory where the run command was specified. Most commands don't need to set up this. + +Note how the script starts by cd'ing into the source dir. Meson does not guarantee that the script is run in any specific directory. Whether you need to do the same depends on what your custom target wants to do. + +To make this a run target we write it to a script file called `scripts/inspect.sh` and specify it in the top level Meson file like this. + +```meson +run_target('inspector', + command : 'scripts/inspect.sh') +``` + +Run targets are not run by default. To run it run the following command. + +```console +$ meson compile inspector +``` + +All additional entries in `run_target`'s `command` array are passed unchanged to the inspector script, so you can do things like this: + +```meson +run_target('inspector', + command : ['scripts/inspect.sh', '--exclude', 'tests']) +``` diff --git a/meson/docs/markdown/Running-Meson.md b/meson/docs/markdown/Running-Meson.md new file mode 100644 index 000000000..326ecb96f --- /dev/null +++ b/meson/docs/markdown/Running-Meson.md @@ -0,0 +1,198 @@ +--- +short-description: Building a project with Meson +... + +# Running Meson + +There are two different ways of invoking Meson. First, you can run it directly +from the source tree with the command `/path/to/source/meson.py`. Meson may +also be installed in which case the command is simply `meson`. In this manual +we only use the latter format for simplicity. + +At the time of writing only a command line version of Meson is available. This +means that Meson must be invoked using the terminal. If you wish to use the +MSVC compiler, you need to run Meson under "Visual Studio command prompt". + +All available meson commands are listed on the [commands reference page](Commands.md). + +## Configuring the build directory + +Let us assume that we have a source tree that has a Meson build system. This +means that at the topmost directory has a file called `meson.build`. We run the +following commands to get the build started. + +```sh +cd /path/to/source/root +meson setup builddir +``` + +We invoke Meson with the `setup` command, giving it the location of the build +directory. Meson uses [out of source +builds](http://voices.canonical.com/jussi.pakkanen/2013/04/16/why-you-should-consider-using-separate-build-directories/). + +Hint: The syntax of meson is `meson [command] [arguments] [options]`. The +`setup` command takes a `builddir` and a `srcdir` argument. If no `srcdir` is +given Meson will deduce the `srcdir` based on `pwd` and the location of +`meson.build`. + +Meson then loads the build configuration file and writes the corresponding +build backend in the build directory. By default Meson generates a *debug +build*, which turns on basic warnings and debug information and disables +compiler optimizations. + +Additionally, the invocation can pass options to meson. The list of options is +documented [here](Builtin-options.md). + +You can specify a different type of build with the `--buildtype` command line +argument. It can have one of the following values. + +| value | meaning | +| ------ | -------- | +| `plain` | no extra build flags are used, even for compiler warnings, useful for distro packagers and other cases where you need to specify all arguments by yourself | +| `debug` | debug info is generated but the result is not optimized, this is the default | +| `debugoptimized` | debug info is generated and the code is optimized (on most compilers this means `-g -O2`) | +| `release` | full optimization, no debug info | + +The build directory is mandatory. The reason for this is that it simplifies the +build process immensely. Meson will not under any circumstances write files +inside the source directory (if it does, it is a bug and should be fixed). This +means that the user does not need to add a bunch of files to their revision +control's ignore list. It also means that you can create arbitrarily many build +directories for any given source tree. + +For example, if we wanted to test building the source code with the Clang +compiler instead of the system default, we could just type the following +commands: + +```sh +cd /path/to/source/root +CC=clang CXX=clang++ meson setup buildclang +``` + +This separation is even more powerful if your code has multiple configuration +options (such as multiple data backends). You can create a separate +subdirectory for each of them. You can also have build directories for +optimized builds, code coverage, static analysis and so on. They are all neatly +separated and use the same source tree. Changing between different +configurations is just a question of changing to the corresponding directory. + +Unless otherwise mentioned, all following command line invocations are meant to +be run in the source directory. + +By default Meson will use the Ninja backend to build your project. If you wish +to use any of the other backends, you need to pass the corresponding argument +during configuration time. As an example, here is how you would use Meson to +generate a Visual studio solution. + +```sh +meson setup --backend=vs +``` + +You can then open the generated solution with Visual Studio and compile it in +the usual way. A list of backends can be obtained with `meson setup --help`. + +## Environment variables + +Sometimes you want to add extra compiler flags, this can be done by passing +them in environment variables when calling meson. See [the reference +tables](Reference-tables.md#compiler-and-linker-flag-envrionment-variables) for +a list of all the environment variables. Be aware however these environment +variables are only used for the native compiler and will not affect the +compiler used for cross-compiling, where the flags specified in the cross file +will be used. + +Furthermore it is possible to stop meson from adding flags itself by using the +`--buildtype=plain` option, in this case you must provide the full compiler and +linker arguments needed. + +## Building from the source + +To start the build, simply type the following command. + +```sh +meson compile -C builddir +``` + +See [`meson compile` description](Commands.md#compile) for more info. + +### Building directly with ninja + +By default Meson uses the [Ninja build system](https://ninja-build.org/) to +actually build the code. To start the build, simply type the following command. + +```sh +ninja -C builddir +``` + +The main usability difference between Ninja and Make is that Ninja will +automatically detect the number of CPUs in your computer and parallelize itself +accordingly. You can override the amount of parallel processes used with the +command line argument `-j `. + +It should be noted that after the initial configure step `ninja` is the only +command you ever need to type to compile. No matter how you alter your source +tree (short of moving it to a completely new location), Meson will detect the +changes and regenerate itself accordingly. This is especially handy if you have +multiple build directories. Often one of them is used for development (the +"debug" build) and others only every now and then (such as a "static analysis" +build). Any configuration can be built just by `cd`'ing to the corresponding +directory and running Ninja. + +## Running tests + +Meson provides native support for running tests. The command to do that is +simple. + +```sh +meson test -C builddir +``` + +See [`meson test` description](Commands.md#test) for more info. + +Meson does not force the use of any particular testing framework. You are free +to use GTest, Boost Test, Check or even custom executables. + +Note: it can be also invoked directly with ninja with the following command: +```sh +ninja -C builddir test +``` + +## Installing + +Installing the built software is just as simple. + +```sh +meson install -C builddir +``` + +See [`meson install` description](Commands.md#install) for more info. + +Note that Meson will only install build targets explicitly tagged as +installable, as detailed in the [installing targets +documentation](Installing.md). + +By default Meson installs to `/usr/local`. This can be changed by passing the +command line argument `--prefix /your/prefix` to Meson during configure time. +Meson also supports the `DESTDIR` variable used in e.g. building packages. It +is used like this: + +```sh +DESTDIR=/path/to/staging meson install -C builddir +``` + +Note: it can be also invoked directly with ninja with the following command: +```sh +ninja -C builddir install +``` + +## Command line help + +Meson has a standard command line help feature. It can be accessed with the +following command. + + meson --help + +## Exit status + +Meson exits with status 0 if successful, 1 for problems with the command line +or meson.build file, and 2 for internal errors. diff --git a/meson/docs/markdown/Shipping-prebuilt-binaries-as-wraps.md b/meson/docs/markdown/Shipping-prebuilt-binaries-as-wraps.md new file mode 100644 index 000000000..bac19f929 --- /dev/null +++ b/meson/docs/markdown/Shipping-prebuilt-binaries-as-wraps.md @@ -0,0 +1,24 @@ +# Shipping prebuilt binaries as wraps + +A common dependency case, especially on Windows, is the need to provide dependencies as prebuilt binaries rather than Meson projects that you build from scratch. Common reasons include not having access to source code, not having the time and effort to rewrite a legacy system's build definitions to Meson or just the fact that compiling the dependency projects takes too long. + +Packaging a project is straightforward. As an example let's look at a case where the project consists of one static library called `bob` and some headers. To create a binary dependency project we put the static library at the top level and headers in a subdirectory called `include`. The Meson build definition would look like the following. + +```meson +project('binary dep', 'c') + +cc = meson.get_compiler('c') +bin_dep = declare_dependency( + dependencies : cc.find_library('bob', dirs : meson.current_source_dir()), + include_directories : include_directories('include')) +``` + +Now you can use this subproject as if it was a Meson project: + +```meson +project('using dep', 'c') +bob_dep = subproject('bob').get_variable('bin_dep') +executable('prog', 'prog.c', dependencies : bob_dep) +``` + +Note that often libraries compiled with different compilers (or even compiler flags) might not be compatible. If you do this, then you are responsible for verifying that your libraries are compatible, Meson will not check things for you. diff --git a/meson/docs/markdown/Simd-module.md b/meson/docs/markdown/Simd-module.md new file mode 100644 index 000000000..0fd1dda70 --- /dev/null +++ b/meson/docs/markdown/Simd-module.md @@ -0,0 +1,70 @@ +# Unstable SIMD module + +This module provides helper functionality to build code with SIMD instructions. +Available since 0.42.0. + +**Note**: this module is unstable. It is only provided as a technology preview. +Its API may change in arbitrary ways between releases or it might be removed +from Meson altogether. + +## Usage + +This module is designed for the use case where you have an algorithm with one +or more SIMD implementation and you choose which one to use at runtime. + +The module provides one method, `check`, which is used like this: + + rval = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + compiler : cc) + +Here the individual files contain the accelerated versions of the functions +in question. The `compiler` keyword argument takes the compiler you are +going to use to compile them. The function returns an array with two values. +The first value is a bunch of libraries that contain the compiled code. Any +SIMD code that the compiler can't compile (for example, Neon instructions on +an x86 machine) are ignored. You should pass this value to the desired target +using `link_with`. The second value is a `configuration_data` object that +contains true for all the values that were supported. For example if the +compiler did support sse2 instructions, then the object would have `HAVE_SSE2` +set to 1. + +Generating code to detect the proper instruction set at runtime is +straightforward. First you create a header with the configuration object and +then a chooser function that looks like this: + + void (*fptr)(type_of_function_here) = NULL; + + #if HAVE_NEON + if(fptr == NULL && neon_available()) { + fptr = neon_accelerated_function; + } + #endif + #if HAVE_AVX2 + if(fptr == NULL && avx2_available()) { + fptr = avx_accelerated_function; + } + #endif + + ... + + if(fptr == NULL) { + fptr = default_function; + } + +Each source file provides two functions, the `xxx_available` function to query +whether the CPU currently in use supports the instruction set and +`xxx_accelerated_function` that is the corresponding accelerated +implementation. + +At the end of this function the function pointer points to the fastest +available implementation and can be invoked to do the computation. diff --git a/meson/docs/markdown/Simple-comparison.md b/meson/docs/markdown/Simple-comparison.md new file mode 100644 index 000000000..5ac90e449 --- /dev/null +++ b/meson/docs/markdown/Simple-comparison.md @@ -0,0 +1,47 @@ +# A simple comparison + +In this experiment we generated one thousand C files with contents that looked like this. + +```c +#include +#include"header.h" + +int func23() { return 0; } +``` + +The function number was different in each file. In addition there was a main C file that just called each function in turn. We then generated build system files for *Meson*, *CMake*, *SCons*, *Premake* and *Autotools* that compiled these files into a single executable. + +With this we measured three different things. The first is configuration time, that is, the time the build system takes to generate necessary build files. This is usually called the *configure step*. The time was measured in seconds. + +The second thing to measure was the build time. This should be limited by the compiler and in the optimal case should be the same for every build system. Four parallel processes were used in this test. + +The third thing we measured was the empty build time. This measures how much time the build system takes to check the states of all source files because if any of them could potentially cause a rebuild. + +Since CMake has two different backends, Make and Ninja, we ran the tests on both of them. All tests were run on a 2011 era Macbook Pro running Ubuntu 13/04. The tests were run multiple times and we always took the fastest time. + +Here are the results for configuration time. + +![Configuration times](images/conftime.png) + +The reason SCons got zero seconds on this test is because you cannot separate configure and build steps. They run as one unit. Autotools is the clear loser of this test as it is over an order of magnitude slower than the second slowest one. This configuration time includes both autogen and configure. All other systems take less than one second to do this setup, which is fast enough for a human being to interpret as instantaneous. + +![Build times](https://raw.githubusercontent.com/wiki/jpakkane/meson/buildtime.png) + +Build times are a bit more even. SCons is the slowest, being almost ten seconds slower than the second slowest. Some of it is work from the configure step but it still has the worst performance. Premake is the fastest Make-based build system narrowly beating out Autotools. Both Ninja-based build systems are faster than all non-Ninja ones with Meson being slightly faster. In practice the difference is minimal. The advantages of Ninja can be seen by comparing CMake's times when using Make or Ninja. It is possible to shave off 3.5 seconds (over 20%) of the total build time just by changing the backend. The project's CMake configuration files don't need any changes. + +![No-op time](https://raw.githubusercontent.com/wiki/jpakkane/meson/emptytime.png) + +Empty build times reflect the performance of regular build times. SCons is again the slowest taking over three seconds compared to Meson, which takes only 0.03 seconds, a difference of two orders of magnitude. Even Autotools, the fastest Make-based system, is almost one order of magnitude slower. Ninja holds the top spots just like in the previous test. + +Conclusions +----- + +Build system performance matters. Even with this extremely simple example we can find differences between various popular build systems. As the project size increases, these differences grow even larger. (The author has witnessed no-op build times of 30 seconds for Make versus less than one second for Ninja when compiling the Clang compiler.) Keeping incremental build times low is one of the major keys of programmer productivity as it allows developers to iterate faster and stay in the creative zone. + +Original scripts +----- + +Those who want to run these experiments themselves can download the scripts here: + +* [Generator script](https://raw.githubusercontent.com/wiki/jpakkane/meson/gen_src.py) +* [Measurement script](https://raw.githubusercontent.com/wiki/jpakkane/meson/measure.py) diff --git a/meson/docs/markdown/SourceSet-module.md b/meson/docs/markdown/SourceSet-module.md new file mode 100644 index 000000000..4b34ff727 --- /dev/null +++ b/meson/docs/markdown/SourceSet-module.md @@ -0,0 +1,207 @@ +--- +short-description: Source set module +authors: + - name: Paolo Bonzini + email: pbonzini@redhat.com + years: [2019] +... + +# Source set module + +This module provides support for building many targets against a single set +of files; the choice of which files to include in each target depends on the +contents of a dictionary or a `configuration_data` object. The module can +be loaded with: + +``` meson +ssmod = import('sourceset') +``` + +A simple example of using the module looks like this: + +``` meson +ss = ssmod.source_set() +# Include main.c unconditionally +ss.add(files('main.c')) +# Include a.c if configuration key FEATURE1 is true +ss.add(when: 'FEATURE1', if_true: files('a.c')) +# Include zlib.c if the zlib dependency was found, and link zlib +# in the executable +ss.add(when: zlib, if_true: files('zlib.c')) +# many more rules here... +ssconfig = ss.apply(config) +executable('exe', sources: ssconfig.sources(), + dependencies: ssconfig.dependencies()) +``` + +and it would be equivalent to + +``` meson +sources = files('main.c') +dependencies = [] +if config['FEATURE1'] then + sources += [files('a.c')] +endif +if zlib.found() then + sources += [files('zlib.c')] + dependencies += [zlib] +endif +# many more "if"s here... +executable('exe', sources: sources, dependencies: dependencies()) +``` + +Sourcesets can be used with a single invocation of the `apply` method, +similar to the example above, but the module is especially useful +when multiple executables are generated by applying the same rules to +many different configurations. + +*Added 0.51.0* + +## Functions + +### `source_set()` + +``` meson +ssmod.source_set() +``` + +Create and return a new source set object. + +**Returns**: a [source set][`source_set` object] + +## `source_set` object + +The `source_set` object provides methods to add files to a source set and +to query it. The source set becomes immutable after any method but `add` +is called. + +### Methods + +#### `add()` + +``` meson +source_set.add([when: varnames_and_deps], + [if_true: sources_and_deps], + [if_false: list_of_alt_sources]) +source_set.add(sources_and_deps) +``` + +Add a *rule* to a source set. A rule determines the conditions under which +some source files or dependency objects are included in a build configuration. +All source files must be present in the source tree or they can be created +in the build tree via `configure_file`, `custom_target` or `generator`. + +`varnames_and_deps` is a list of conditions for the rule, which can be +either strings or dependency objects (a dependency object is anything that +has a `found()` method). If *all* the strings evaluate to true and all +dependencies are found, the rule will evaluate to true; `apply()` +will then include the contents of the `if_true` keyword argument in its +result. Otherwise, that is if any of the strings in the positional + arguments evaluate to false or any dependency is not found, `apply()` +will instead use the contents of the `if_false` keyword argument. + +Dependencies can also appear in `sources_and_deps`. In this case, a +missing dependency will simply be ignored and will *not* disable the rule, +similar to how the `dependencies` keyword argument works in build targets. + +**Note**: It is generally better to avoid mixing source sets and disablers. +This is because disablers will cause the rule to be dropped altogether, +and the `list_of_alt_sources` would not be taken into account anymore. + +#### `add_all()` + +``` meson +source_set.add_all(when: varnames_and_deps, + if_true: [source_set1, source_set2, ...]) +source_set.add_all(source_set1, source_set2, ...) +``` + +Add one or more source sets to another. + +For each source set listed in the arguments, `apply()` will +consider their rules only if the conditions in `varnames_and_deps` are +evaluated positively. For example, the following: + +``` meson +sources_b = ssmod.source_set() +sources_b.add(when: 'HAVE_A', if_true: 'file.c') +sources = ssmod.source_set() +sources.add_all(when: 'HAVE_B', if_true: sources_b) +``` + +is equivalent to: + +``` meson +sources = ssmod.source_set() +sources.add(when: ['HAVE_A', 'HAVE_B'], if_true: 'file.c') +``` + +#### `all_sources()` + +``` meson +list source_set.all_sources(...) +``` + +Returns a list of all sources that were placed in the source set using +`add` (including nested source sets) and that do not have a not-found +dependency. If a rule has a not-found dependency, only the `if_false` +sources are included (if any). + +**Returns**: a list of file objects + +#### `all_dependencies()` *(since 0.52.0)* + +``` meson +list source_set.all_dependencies(...) +``` + +Returns a list of all dependencies that were placed in the source set +using `add` (including nested source sets) and that were found. + +**Returns**: a list of dependencies + +#### `apply()` + +``` meson +source_files source_set.apply(conf_data[, strict: false]) +``` + +Match the source set against a dictionary or a `configuration_data` object +and return a *source configuration* object. A source configuration object +allows you to retrieve the sources and dependencies for a specific configuration. + +By default, all the variables that were specified in the rules have to +be present in `conf_data`. However, in some cases the convention is +that `false` configuration symbols are absent in `conf_data`; this is +the case for example when the configuration was loaded from a Kconfig file. +In that case you can specify the `strict: false` keyword argument, which +will treat absent variables as false. + +**Returns**: a [source configuration][`source_configuration` object] + +## `source_configuration` object + +The `source_configuration` object provides methods to query the result of an +`apply` operation on a source set. + +### Methods + +#### `sources()` + +``` meson +source_config.sources() +``` + +Return the source files corresponding to the applied configuration. + +**Returns**: a list of file objects + +#### `dependencies()` + +``` meson +source_config.dependencies() +``` + +Return the dependencies corresponding to the applied configuration. + +**Returns**: a list of dependency objects diff --git a/meson/docs/markdown/Style-guide.md b/meson/docs/markdown/Style-guide.md new file mode 100644 index 000000000..960e60c55 --- /dev/null +++ b/meson/docs/markdown/Style-guide.md @@ -0,0 +1,74 @@ +--- +short-description: Style recommendations for Meson files +... + +# Style recommendations + +This page lists some recommendations on organizing and formatting your +Meson build files. + +## Tabs or spaces? + +Always spaces. + +## Naming Variable + +The most consistent naming convention is the snake case. Let say you would +like to refer to your executable so something like `my_exe` would work or +just `exe`. + +## Naming options + +There are two ways of naming project options. As an example for +booleans the first one is `foo` and the second one is +`enable-foo`. The former style is recommended, because in Meson +options have strong type, rather than being just strings. + +You should try to name options the same as is common in other +projects. This is especially important for yielding options, because +they require that both the parent and subproject options have the same +name. + +# Global arguments + +Prefer `add_project_arguments` to `add_global_arguments` because using +the latter prevents using the project as a subproject. + +# Cross compilation arguments + +Try to keep cross compilation arguments away from your build files as +much as possible. Keep them in the cross file instead. This adds +portability, since all changes needed to compile to a different +platform are isolated in one place. + +# Sorting source paths + +The source file arrays should all be sorted. This makes it easier to spot +errors and often reduces merge conflicts. Furthermore, the paths should be +sorted with a natural sorting algorithm, so that numbers are sorted in an +intuitive way (`1, 2, 3, 10, 20` instead of `1, 10, 2, 20, 3`). + +Numbers should also be sorted before characters (`a111` before `ab0`). +Furthermore, strings should be sorted case insensitive. + +Additionally, if a path contains a directory it should be sorted before +normal files. This rule also applies recursively for subdirectories. + +The following example shows correct source list definition: + +```meson +sources = files([ + 'aaa/a1.c', + 'aaa/a2.c', + 'bbb/subdir1/b1.c', + 'bbb/subdir2/b2.c', + 'bbb/subdir10/b3.c', + 'bbb/subdir20/b4.c', + 'bbb/b5.c', + 'bbb/b6.c', + 'f1.c', + 'f2.c', + 'f10.c', + 'f20.c' +]) +``` diff --git a/meson/docs/markdown/Subprojects.md b/meson/docs/markdown/Subprojects.md new file mode 100644 index 000000000..02a83e67c --- /dev/null +++ b/meson/docs/markdown/Subprojects.md @@ -0,0 +1,337 @@ +--- +short-description: Using meson projects as subprojects within other meson projects +... + +# Subprojects + +Some platforms do not provide a native packaging system. In these +cases it is common to bundle all third party libraries in your source +tree. This is usually frowned upon because it makes it hard to add +these kinds of projects into e.g. those Linux distributions that +forbid bundled libraries. + +Meson tries to solve this problem by making it extremely easy to +provide both at the same time. The way this is done is that Meson +allows you to take any other Meson project and make it a part of your +build without (in the best case) any changes to its Meson setup. It +becomes a transparent part of the project. + +It should be noted that this is only guaranteed to work for subprojects +that are built with Meson. The reason is the simple fact that there is +no possible way to do this reliably with mixed build systems. Because of +this, only meson subprojects are described here. +[CMake based subprojects](CMake-module.md#cmake-subprojects) are also +supported but not guaranteed to work. + +## A subproject example + +Usually dependencies consist of some header files plus a library to link against. +To declare this internal dependency use `declare_dependency` function. + +As an example, suppose we have a simple project that provides a shared +library. Its `meson.build` would look like this. + +```meson +project('libsimple', 'c') + +inc = include_directories('include') +libsimple = shared_library('simple', + 'simple.c', + include_directories : inc, + install : true) + +libsimple_dep = declare_dependency(include_directories : inc, + link_with : libsimple) +``` + +### Naming convention for dependency variables + +Ideally the dependency variable name should be of `_dep` form. +This way one can just use it without even looking inside build definitions of that subproject. + +In cases where there are multiple dependencies need to be declared, the default one +should be named as `_dep` (e.g. `gtest_dep`), and others can have +`___dep` form (e.g. `gtest_main_dep` - gtest with main function). + +There may be exceptions to these rules where common sense should be applied. + +### Adding variables to the dependency + +*New in 0.54.0* + +In some cases a project may define special variables via pkg-config or cmake +that a caller needs to know about. Meson provides a `dependency.get_variable` +method to hide what kind of dependency is provided, and this is available to +subprojects as well. Use the `variables` keyword to add a dict of strings: + +```meson +my_dep = declare_dependency(..., variables : {'var': 'value', 'number': '3'}) +``` + +Which another project can access via: + +```meson +var = my_dep.get_variable(internal : 'var', cmake : 'CMAKE_VAR') +``` + +The values of the dict must be strings, as pkg-config and cmake will return +variables as strings. + +### Build options in subproject + +All Meson features of the subproject, such as project options keep +working and can be set in the master project. There are a few +limitations, the most important being that global compiler arguments +must be set in the main project before calling subproject. Subprojects +must not set global arguments because there is no way to do that +reliably over multiple subprojects. To check whether you are running +as a subproject, use the `is_subproject` function. + +## Using a subproject + +All subprojects must be inside `subprojects` directory. +The `subprojects` directory must be at the top level of your project. +Subproject declaration must be in your top level `meson.build`. + +### A simple example + +Let's use `libsimple` as a subproject. + +At the top level of your project create `subprojects` directory. +Then copy `libsimple` into `subprojects` directory. + +Your project's `meson.build` should look like this. + +```meson +project('my_project', 'cpp') + +libsimple_proj = subproject('libsimple') +libsimple_dep = libsimple_proj.get_variable('libsimple_dep') + +executable('my_project', + 'my_project.cpp', + dependencies : libsimple_dep, + install : true) +``` + +Note that the subproject object is *not* used as the dependency, but +rather you need to get the declared dependency from it with +`get_variable` because a subproject may have multiple declared +dependencies. + +### Toggling between system libraries and embedded sources + +When building distro packages it is very important that you do not +embed any sources. Some distros have a rule forbidding embedded +dependencies so your project must be buildable without them or +otherwise the packager will hate you. + +Here's how you would use system libraries and fall back to embedding sources +if the dependency is not available. + +```meson +project('my_project', 'cpp') + +libsimple_dep = dependency('libsimple', required : false) + +if not libsimple_dep.found() + libsimple_proj = subproject('libsimple') + libsimple_dep = libsimple_proj.get_variable('libsimple_dep') +endif + +executable('my_project', + 'my_project.cpp', + dependencies : libsimple_dep, + install : true) +``` + +Because this is such a common operation, Meson provides a shortcut for +this use case. + +```meson +dep = dependency('foo', fallback : [subproject_name, variable_name]) +``` + +The `fallback` keyword argument takes two items, the name of the +subproject and the name of the variable that holds the dependency. If +you need to do something more complicated, such as extract several +different variables, then you need to do it yourself with the manual +method described above. + +Using this shortcut the build definition would look like this. + +```meson +project('my_project', 'cpp') + +libsimple_dep = dependency('libsimple', fallback : ['libsimple', 'libsimple_dep']) + +executable('my_project', + 'my_project.cpp', + dependencies : libsimple_dep, + install : true) +``` + +With this setup when libsimple is provided by the system, we use it. When +that is not the case we use the embedded version (the one from subprojects). + +Note that `libsimple_dep` can point to an external or an internal dependency but +you don't have to worry about their differences. Meson will take care +of the details for you. + +### Subprojects depending on other subprojects + +Subprojects can use other subprojects, but all subprojects must reside +in the top level `subprojects` directory. Recursive use of subprojects +is not allowed, though, so you can't have subproject `a` that uses +subproject `b` and have `b` also use `a`. + +## Obtaining subprojects + +Meson ships with a dependency system to automatically obtain +dependency subprojects. It is documented in the [Wrap dependency +system manual](Wrap-dependency-system-manual.md). + +## Command-line options + +The usage of subprojects can be controlled by users and distros with +the following command-line options: + +* **--wrap-mode=nodownload** + + Meson will not use the network to download any subprojects or + fetch any wrap information. Only pre-existing sources will be used. + This is useful (mostly for distros) when you want to only use the + sources provided by a software release, and want to manually handle + or provide missing dependencies. + +* **--wrap-mode=nofallback** + + Meson will not use subproject fallbacks for any dependency + declarations in the build files, and will only look for them in the + system. Note that this does not apply to unconditional subproject() + calls, and those are meant to be used for sources that cannot be + provided by the system, such as copylibs. + + This option may be overriden by `--force-fallback-for` for specific + dependencies. + +* **--wrap-mode=forcefallback** + + Meson will not look at the system for any dependencies which have + subproject fallbacks available, and will *only* use subprojects for + them. This is useful when you want to test your fallback setup, or + want to specifically build against the library sources provided by + your subprojects. + +* **--force-fallback-for=list,of,dependencies** + + Meson will not look at the system for any dependencies listed there, + provided a fallback was supplied when the dependency was declared. + + This option takes precedence over `--wrap-mode=nofallback`, and when + used in combination with `--wrap-mode=nodownload` will only work + if the dependency has already been downloaded. + + This is useful when your project has many fallback dependencies, + but you only want to build against the library sources for a few + of them. + + **Warning**: This could lead to mixing system and subproject version of the + same library in the same process. Take this case as example: + - Libraries `glib-2.0` and `gstreamer-1.0` are installed on your system. + - `gstreamer-1.0` depends on `glib-2.0`, pkg-config file `gstreamer-1.0.pc` + has `Requires: glib-2.0`. + - In your application build definition you do: + ```meson + executable('app', ..., + dependencies: [ + dependency('glib-2.0', fallback: 'glib'), + dependency('gstreamer-1.0', fallback: 'gstreamer')], + ) + ``` + - You configure with `--force-fallback-for=glib`. + This result in linking to two different versions of library `glib-2.0` + because `dependency('glib-2.0', fallback: 'glib')` will return the + subproject dependency, but `dependency('gstreamer-1.0', fallback: 'gstreamer')` + will not fallback and return the system dependency, including `glib-2.0` + library. To avoid that situation, every dependency that itself depend on + `glib-2.0` must also be forced to fallback, in this case with + `--force-fallback-for=glib,gsteamer`. + +## Download subprojects + +*Since 0.49.0* + +Meson will automatically download needed subprojects during configure, unless +**--wrap-mode=nodownload** option is passed. It is sometimes preferable to +download all subprojects in advance, so the meson configure can be performed +offline. The command-line `meson subprojects download` can be used for that, it +will download all missing subprojects, but will not update already fetched +subprojects. + +## Update subprojects + +*Since 0.49.0* + +Once a subproject has been fetched, Meson will not update it automatically. +For example if the wrap file tracks a git branch, it won't pull latest commits. + +To pull latest version of all your subprojects at once, just run the command: +`meson subprojects update`. +- If the wrap file comes from wrapdb, the latest version of the wrap file will + be pulled and used next time meson reconfigure the project. This can be + triggered using `meson --reconfigure`. Previous source tree is not deleted, to + prevent from any loss of local changes. +- If the wrap file points to a git commit or tag, a checkout of that commit is + performed. +- If the wrap file points to a git branch, and the current branch has the same + name, a `git pull` is performed. +- If the wrap file points to a git branch, and the current branch is different, + it is skipped. Unless `--rebase` option is passed in which case + `git pull --rebase` is performed. + +## Start a topic branch across all git subprojects + +*Since 0.49.0* + +The command-line `meson subprojects checkout ` will checkout a +branch, or create one with `-b` argument, in every git subprojects. This is +useful when starting local changes across multiple subprojects. It is still your +responsibility to commit and push in each repository where you made local +changes. + +To come back to the revision set in wrap file (i.e. master), just run +`meson subprojects checkout` with no branch name. + +## Execute a command on all subprojects + +*Since 0.51.0* + +The command-line `meson subprojects foreach [...]` will +execute a command in each subproject directory. For example this can be useful +to check the status of subprojects (e.g. with `git status` or `git diff`) before +performing other actions on them. + +## Why must all subprojects be inside a single directory? + +There are several reasons. + +First of all, to maintain any sort of sanity, the system must prevent going +inside other subprojects with `subdir()` or variations thereof. Having the +subprojects in well defined places makes this easy. If subprojects could be +anywhere at all, it would be a lot harder. + +Second of all it is extremely important that end users can easily see what +subprojects any project has. Because they are in one, and only one, place, +reviewing them becomes easy. + +This is also a question of convention. Since all Meson projects have the same +layout w.r.t subprojects, switching between projects becomes easier. You don't +have to spend time on a new project traipsing through the source tree looking +for subprojects. They are always in the same place. + +Finally if you can have subprojects anywhere, this increases the possibility of +having many different (possibly incompatible) versions of a dependency in your +source tree. Then changing some code (such as changing the order you traverse +directories) may cause a completely different version of the subproject to be +used by accident. diff --git a/meson/docs/markdown/Syntax.md b/meson/docs/markdown/Syntax.md new file mode 100644 index 000000000..7cb39e9a4 --- /dev/null +++ b/meson/docs/markdown/Syntax.md @@ -0,0 +1,674 @@ +--- +short-description: Syntax and structure of Meson files +... + +# Syntax + +The syntax of Meson's specification language has been kept as simple +as possible. It is *strongly typed* so no object is ever converted to +another under the covers. Variables have no visible type which makes +Meson *dynamically typed* (also known as *duck typed*). + +The main building blocks of the language are *variables*, *numbers*, +*booleans*, *strings*, *arrays*, *function calls*, *method calls*, *if +statements* and *includes*. + +Usually one Meson statement takes just one line. There is no way to +have multiple statements on one line as in e.g. *C*. Function and +method calls' argument lists can be split over multiple lines. Meson +will autodetect this case and do the right thing. + +In other cases, *(added 0.50)* you can get multi-line statements by ending the +line with a `\`. Apart from line ending whitespace has no syntactic meaning. + +## Variables + +Variables in Meson work just like in other high level programming +languages. A variable can contain a value of any type, such as an +integer or a string. Variables don't need to be predeclared, you can +just assign to them and they appear. Here's how you would assign +values to two different variables. + +```meson +var1 = 'hello' +var2 = 102 +``` + +One important difference in how variables work in Meson is that all +objects are immutable. This is different from, for example, how Python +works. + +```meson +var1 = [1, 2, 3] +var2 = var1 +var2 += [4] +# var2 is now [1, 2, 3, 4] +# var1 is still [1, 2, 3] +``` + +## Numbers + +Meson supports only integer numbers. They are declared simply by +writing them out. Basic arithmetic operations are supported. + +```meson +x = 1 + 2 +y = 3 * 4 +d = 5 % 3 # Yields 2. +``` + +Hexadecimal literals are supported since version 0.45.0: + +```meson +int_255 = 0xFF +``` + +Octal and binary literals are supported since version 0.47.0: + +```meson +int_493 = 0o755 +int_1365 = 0b10101010101 +``` + +Strings can be converted to a number like this: + +```meson +string_var = '42' +num = string_var.to_int() +``` + +Numbers can be converted to a string: + +```meson +int_var = 42 +string_var = int_var.to_string() +``` + +## Booleans + +A boolean is either `true` or `false`. + +```meson +truth = true +``` + +## Strings + +Strings in Meson are declared with single quotes. To enter a literal +single quote do it like this: + +```meson +single quote = 'contains a \' character' +``` + +The full list of escape sequences is: + +* `\\` Backslash +* `\'` Single quote +* `\a` Bell +* `\b` Backspace +* `\f` Formfeed +* `\n` Newline +* `\r` Carriage Return +* `\t` Horizontal Tab +* `\v` Vertical Tab +* `\ooo` Character with octal value ooo +* `\xhh` Character with hex value hh +* `\uxxxx` Character with 16-bit hex value xxxx +* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx +* `\N{name}` Character named name in Unicode database + +As in python and C, up to three octal digits are accepted in `\ooo`. + +Unrecognized escape sequences are left in the string unchanged, i.e., the +backslash is left in the string. + +### String concatenation + +Strings can be concatenated to form a new string using the `+` symbol. + +```meson +str1 = 'abc' +str2 = 'xyz' +combined = str1 + '_' + str2 # combined is now abc_xyz +``` + +### String path building + +*(Added 0.49)* + +You can concatenate any two strings using `/` as an operator to build paths. +This will always use `/` as the path separator on all platforms. + +```meson +joined = '/usr/share' / 'projectname' # => /usr/share/projectname +joined = '/usr/local' / '/etc/name' # => /etc/name + +joined = 'C:\\foo\\bar' / 'builddir' # => C:/foo/bar/builddir +joined = 'C:\\foo\\bar' / 'D:\\builddir' # => D:/builddir +``` + +Note that this is equivalent to using [`join_paths()`](Reference-manual.md#join_paths), +which was obsoleted by this operator. + +### Strings running over multiple lines + +Strings running over multiple lines can be declared with three single +quotes, like this: + +```meson +multiline_string = '''#include +int main (int argc, char ** argv) { + return FOO_SUCCESS; +}''' +``` + +These are raw strings that do not support the escape sequences listed +above. These strings can also be combined with the string formatting +functionality described below. + +### String formatting + +Strings can be built using the string formatting functionality. + +```meson +template = 'string: @0@, number: @1@, bool: @2@' +res = template.format('text', 1, true) +# res now has value 'string: text, number: 1, bool: true' +``` + +As can be seen, the formatting works by replacing placeholders of type +`@number@` with the corresponding argument. + +### String methods + +Strings also support a number of other methods that return transformed +copies. + +#### .strip() + +```meson +# Similar to the Python str.strip(). Removes leading/ending spaces and newlines +define = ' -Dsomedefine ' +stripped_define = define.strip() +# 'stripped_define' now has the value '-Dsomedefine' +``` + +#### .to_upper(), .to_lower() + +```meson +target = 'x86_FreeBSD' +upper = target.to_upper() # t now has the value 'X86_FREEBSD' +lower = target.to_lower() # t now has the value 'x86_freebsd' +``` + +#### .to_int() + +```meson +version = '1' +# Converts the string to an int and throws an error if it can't be +ver_int = version.to_int() +``` + +#### .contains(), .startswith(), .endswith() + +```meson +target = 'x86_FreeBSD' +is_fbsd = target.to_lower().contains('freebsd') +# is_fbsd now has the boolean value 'true' +is_x86 = target.startswith('x86') # boolean value 'true' +is_bsd = target.to_lower().endswith('bsd') # boolean value 'true' +``` + +#### .split(), .join() + +```meson +# Similar to the Python str.split() +components = 'a b c d '.split() +# components now has the value ['a', 'b', 'c', 'd'] +components = 'a b c d '.split(' ') +# components now has the value ['a', 'b', '', '', 'c', 'd', ''] + +# Similar to the Python str.join() +output = ' '.join(['foo', 'bar']) +# Output value is 'foo bar' +pathsep = ':' +path = pathsep.join(['/usr/bin', '/bin', '/usr/local/bin']) +# path now has the value '/usr/bin:/bin:/usr/local/bin' + +# For joining path elements, you should use path1 / path2 +# This has the advantage of being cross-platform +path = '/usr' / 'local' / 'bin' +# path now has the value '/usr/local/bin' + +# For sources files, use files(): +my_sources = files('foo.c') +... +my_sources += files('bar.c') +# This has the advantage of always calculating the correct relative path, even +# if you add files in another directory or use them in a different directory +# than they're defined in + +# Example to set an API version for use in library(), install_header(), etc +project('project', 'c', version: '0.2.3') +version_array = meson.project_version().split('.') +# version_array now has the value ['0', '2', '3'] +api_version = '.'.join([version_array[0], version_array[1]]) +# api_version now has the value '0.2' + +# We can do the same with .format() too: +api_version = '@0@.@1@'.format(version_array[0], version_array[1]) +# api_version now (again) has the value '0.2' +``` + +#### .underscorify() + +```meson +name = 'Meson Docs.txt#Reference-manual' +# Replaces all characters other than `a-zA-Z0-9` with `_` (underscore) +# Useful for substituting into #defines, filenames, etc. +underscored = name.underscorify() +# underscored now has the value 'Meson_Docs_txt_Reference_manual' +``` + +#### .version_compare() + +```meson +version = '1.2.3' +# Compare version numbers semantically +is_new = version.version_compare('>=2.0') +# is_new now has the boolean value false +# Supports the following operators: '>', '<', '>=', '<=', '!=', '==', '=' +``` + +Meson version comparison conventions include: + +```meson +'3.6'.version_compare('>=3.6.0') == false +``` + +It is best to be unambiguous and specify the full revision level to compare. + +## Arrays + +Arrays are delimited by brackets. An array can contain an arbitrary number of objects of any type. + +```meson +my_array = [1, 2, 'string', some_obj] +``` + +Accessing elements of an array can be done via array indexing: + +```meson +my_array = [1, 2, 'string', some_obj] +second_element = my_array[1] +last_element = my_array[-1] +``` + +You can add more items to an array like this: + +```meson +my_array += ['foo', 3, 4, another_obj] +``` + +When adding a single item, you do not need to enclose it in an array: + +```meson +my_array += ['something'] +# This also works +my_array += 'else' +``` + +Note appending to an array will always create a new array object and +assign it to `my_array` instead of modifying the original since all +objects in Meson are immutable. + +Since 0.49.0, you can check if an array contains an element like this: + +```meson +my_array = [1, 2] +if 1 in my_array +# This condition is true +endif +if 1 not in my_array +# This condition is false +endif +``` + +### Array methods + +The following methods are defined for all arrays: + + - `length`, the size of the array + - `contains`, returns `true` if the array contains the object given as argument, `false` otherwise + - `get`, returns the object at the given index, negative indices count from the back of the array, indexing out of bounds is a fatal error. Provided for backwards-compatibility, it is identical to array indexing. + +## Dictionaries + +Dictionaries are delimited by curly braces. A dictionary can contain an +arbitrary number of key value pairs. Keys are required to be strings, values can +be objects of any type. Prior to *0.53.0* keys were required to be literal strings. + +```meson +my_dict = {'foo': 42, 'bar': 'baz'} +``` + +Keys must be unique: + +```meson +# This will fail +my_dict = {'foo': 42, 'foo': 43} +``` + +Dictionaries are immutable and do not have a guaranteed order. + +Dictionaries are available since 0.47.0. + +Visit the [Reference Manual](Reference-manual.md#dictionary-object) to read +about the methods exposed by dictionaries. + +Since 0.49.0, you can check if a dictionary contains a key like this: + +```meson +my_dict = {'foo': 42, 'bar': 43} +if 'foo' in my_dict +# This condition is true +endif +if 42 in my_dict +# This condition is false +endif +if 'foo' not in my_dict +# This condition is false +endif +``` + +*Since 0.53.0* Keys can be any expression evaluating to a string value, not limited +to string literals any more. + +```meson +d = {'a' + 'b' : 42} +k = 'cd' +d += {k : 43} +``` + +## Function calls + +Meson provides a set of usable functions. The most common use case is +creating build objects. + +```meson +executable('progname', 'prog.c') +``` + +Most functions take only few positional arguments but several keyword +arguments, which are specified like this: + +```meson +executable('progname', + sources: 'prog.c', + c_args: '-DFOO=1') +``` + +Starting with version 0.49.0 keyword arguments can be specified +dynamically. This is done by passing dictionary representing the +keywords to set in the `kwargs` keyword. The previous example would be +specified like this: + +```meson +d = {'sources': 'prog.c', + 'c_args': '-DFOO=1'} + +executable('progname', + kwargs: d) +``` + +A single function can take keyword argumets both directly in the +function call and indirectly via the `kwargs` keyword argument. The +only limitation is that it is a hard error to pass any particular key +both as a direct and indirect argument. + +```meson +d = {'c_args': '-DFOO'} +executable('progname', 'prog.c', + c_args: '-DBAZ=1', + kwargs: d) # This is an error! +``` + +Attempting to do this causes Meson to immediately exit with an error. + +## Method calls + +Objects can have methods, which are called with the dot operator. The +exact methods it provides depends on the object. + +```meson +myobj = some_function() +myobj.do_something('now') +``` + +## If statements + +If statements work just like in other languages. + +```meson +var1 = 1 +var2 = 2 +if var1 == var2 # Evaluates to false + something_broke() +elif var3 == var2 + something_else_broke() +else + everything_ok() +endif + +opt = get_option('someoption') +if opt != 'foo' + do_something() +endif +``` + +## Logical operations + +Meson has the standard range of logical operations which can be used in +`if` statements. + +```meson +if a and b + # do something +endif +if c or d + # do something +endif +if not e + # do something +endif +if not (f or g) + # do something +endif +``` + +Logical operations work only on boolean values. + +## Foreach statements + +To do an operation on all elements of an iterable, use the `foreach` +command. + +> Note that Meson variables are immutable. Trying to assign a new value +> to the iterated object inside a foreach loop will not affect foreach's +> control flow. + +### Foreach with an array + +Here's an example of how you could define two executables +with corresponding tests using arrays and foreach. + +```meson +progs = [['prog1', ['prog1.c', 'foo.c']], + ['prog2', ['prog2.c', 'bar.c']]] + +foreach p : progs + exe = executable(p[0], p[1]) + test(p[0], exe) +endforeach +``` + +### Foreach with a dictionary + +Here's an example of you could iterate a set of components that +should be compiled in according to some configuration. This uses +a [dictionary][dictionaries], which is available since 0.47.0. + +```meson +components = { + 'foo': ['foo.c'], + 'bar': ['bar.c'], + 'baz': ['baz.c'], +} + +# compute a configuration based on system dependencies, custom logic +conf = configuration_data() +conf.set('USE_FOO', 1) + +# Determine the sources to compile +sources_to_compile = [] +foreach name, sources : components + if conf.get('USE_@0@'.format(name.to_upper()), 0) == 1 + sources_to_compile += sources + endif +endforeach +``` + +### Foreach `break` and `continue` + +Since 0.49.0 `break` and `continue` keywords can be used inside foreach loops. + +```meson +items = ['a', 'continue', 'b', 'break', 'c'] +result = [] +foreach i : items + if i == 'continue' + continue + elif i == 'break' + break + endif + result += i +endforeach +# result is ['a', 'b'] +``` + +## Comments + +A comment starts with the `#` character and extends until the end of the line. + +```meson +some_function() # This is a comment +some_other_function() +``` + +## Ternary operator + +The ternary operator works just like in other languages. + +```meson +x = condition ? true_value : false_value +``` + +The only exception is that nested ternary operators are forbidden to +improve legibility. If your branching needs are more complex than this +you need to write an `if/else` construct. + +## Includes + +Most source trees have multiple subdirectories to process. These can +be handled by Meson's `subdir` command. It changes to the given +subdirectory and executes the contents of `meson.build` in that +subdirectory. All state (variables etc) are passed to and from the +subdirectory. The effect is roughly the same as if the contents of the +subdirectory's Meson file would have been written where the include +command is. + +```meson +test_data_dir = 'data' +subdir('tests') +``` + +## User-defined functions and methods + +Meson does not currently support user-defined functions or +methods. The addition of user-defined functions would make Meson +Turing-complete which would make it harder to reason about and more +difficult to integrate with tools like IDEs. More details about this +are [in the +FAQ](FAQ.md#why-is-meson-not-just-a-python-module-so-i-could-code-my-build-setup-in-python). If +because of this limitation you find yourself copying and pasting code +a lot you may be able to use a [`foreach` loop +instead](#foreach-statements). + +## Stability Promises + +Meson is very actively developed and continuously improved. There is a +possibility that future enhancements to the Meson build system will require +changes to the syntax. Such changes might be the addition of new reserved +keywords, changing the meaning of existing keywords or additions around the +basic building blocks like statements and fundamental types. It is planned +to stabilize the syntax with the 1.0 release. + +## Grammar + +This is the full Meson grammar, as it is used to parse Meson build definition files: + +``` +additive_expression: multiplicative_expression | (additive_expression additive_operator multiplicative_expression) +additive_operator: "+" | "-" +argument_list: positional_arguments ["," keyword_arguments] | keyword_arguments +array_literal: "[" [expression_list] "]" +assignment_expression: conditional_expression | (logical_or_expression assignment_operator assignment_expression) +assignment_operator: "=" | "*=" | "/=" | "%=" | "+=" | "-=" +boolean_literal: "true" | "false" +build_definition: (NEWLINE | statement)* +condition: expression +conditional_expression: logical_or_expression | (logical_or_expression "?" expression ":" assignment_expression +decimal_literal: DECIMAL_NUMBER +DECIMAL_NUMBER: /[1-9][0-9]*/ +dictionary_literal: "{" [key_value_list] "}" +equality_expression: relational_expression | (equality_expression equality_operator relational_expression) +equality_operator: "==" | "!=" +expression: assignment_expression +expression_list: expression ("," expression)* +expression_statememt: expression +function_expression: id_expression "(" [argument_list] ")" +hex_literal: "0x" HEX_NUMBER +HEX_NUMBER: /[a-fA-F0-9]+/ +id_expression: IDENTIFIER +IDENTIFIER: /[a-zA-Z_][a-zA-Z_0-9]*/ +identifier_list: id_expression ("," id_expression)* +integer_literal: decimal_literal | octal_literal | hex_literal +iteration_statement: "foreach" identifier_list ":" id_expression NEWLINE (statement | jump_statement)* "endforeach" +jump_statement: ("break" | "continue") NEWLINE +key_value_item: expression ":" expression +key_value_list: key_value_item ("," key_value_item)* +keyword_item: id_expression ":" expression +keyword_arguments: keyword_item ("," keyword_item)* +literal: integer_literal | string_literal | boolean_literal | array_literal | dictionary_literal +logical_and_expression: equality_expression | (logical_and_expression "and" equality_expression) +logical_or_expression: logical_and_expression | (logical_or_expression "or" logical_and_expression) +method_expression: postfix_expression "." function_expression +multiplicative_expression: unary_expression | (multiplicative_expression multiplicative_operator unary_expression) +multiplicative_operator: "*" | "/" | "%" +octal_literal: "0o" OCTAL_NUMBER +OCTAL_NUMBER: /[0-7]+/ +positional_arguments: expression ("," expression)* +postfix_expression: primary_expression | subscript_expression | function_expression | method_expression +primary_expression: literal | ("(" expression ")") | id_expression +relational_expression: additive_expression | (relational_expression relational_operator additive_expression) +relational_operator: ">" | "<" | ">=" | "<=" | "in" | ("not" "in") +selection_statement: "if" condition NEWLINE (statement)* ("elif" condition NEWLINE (statement)*)* ["else" (statement)*] "endif" +statement: (expression_statement | selection_statement | iteration_statement) NEWLINE +string_literal: ("'" STRING_SIMPLE_VALUE "'") | ("'''" STRING_MULTILINE_VALUE "'''") +STRING_MULTILINE_VALUE: \.*?(''')\ +STRING_SIMPLE_VALUE: \.*?(? + +int main(int argc, char **argv) { + printf("Hello there.\n"); + return 0; +} +``` + +Then we create a Meson build description and put it in a file called +`meson.build` in the same directory. Its contents are the following. + +```meson +project('tutorial', 'c') +executable('demo', 'main.c') +``` + +That is all. We are now ready to build our application. First we need +to initialize the build by going into the source directory and issuing +the following commands. + +```console +$ meson builddir +``` + +We create a separate build directory to hold all of the compiler +output. Meson is different from some other build systems in that it +does not permit in-source builds. You must always create a separate +build directory. Common convention is to put the default build +directory in a subdirectory of your top level source directory. + +When Meson is run it prints the following output. + + The Meson build system + version: 0.13.0-research + Source dir: /home/jpakkane/mesontutorial + Build dir: /home/jpakkane/mesontutorial/builddir + Build type: native build + Project name is "tutorial". + Using native c compiler "ccache cc". (gcc 4.8.2) + Creating build target "demo" with 1 files. + +Now we are ready to build our code. + +``` +$ cd builddir +$ meson compile +``` + +Once that is done we can run the resulting binary. + +```console +$ ./demo +``` + +This produces the expected output. + + Hello there. + +Adding dependencies +----- + +Just printing text is a bit old fashioned. Let's update our program to +create a graphical window instead. We'll use the +[GTK+](https://gtk.org) widget toolkit. First we edit the main file to +use GTK+. The new version looks like this. + +```c +#include + +int main(int argc, char **argv) { + GtkWidget *win; + gtk_init(&argc, &argv); + win = gtk_window_new(GTK_WINDOW_TOPLEVEL); + gtk_window_set_title(GTK_WINDOW(win), "Hello there"); + g_signal_connect(win, "destroy", G_CALLBACK(gtk_main_quit), NULL); + gtk_widget_show(win); + gtk_main(); +} +``` + +Then we edit the Meson file, instructing it to find and use the GTK+ +libraries. + +```meson +project('tutorial', 'c') +gtkdep = dependency('gtk+-3.0') +executable('demo', 'main.c', dependencies : gtkdep) +``` + +Now we are ready to build. The thing to notice is that we do *not* +need to recreate our build directory, run any sort of magical commands +or the like. Instead we just type the exact same command as if we were +rebuilding our code without any build system changes. + +``` +$ meson compile +``` + +Once you have set up your build directory the first time, you don't +ever need to run the `meson` command again. You always just run +`meson compile`. Meson will automatically detect when you have done changes to +build definitions and will take care of everything so users don't have +to care. In this case the following output is produced. + + [1/1] Regenerating build files + The Meson build system + version: 0.13.0-research + Source dir: /home/jpakkane/mesontutorial + Build dir: /home/jpakkane/mesontutorial/builddir + Build type: native build + Project name is "tutorial". + Using native c compiler "ccache cc". (gcc 4.8.2) + Found pkg-config version 0.26. + Dependency gtk+-3.0 found: YES + Creating build target "demo" with 1 files. + [1/2] Compiling c object demo.dir/main.c.o + [2/2] Linking target demo + +Note how Meson noticed that the build definition has changed and reran +itself automatically. The program is now ready to be run: + +``` +$ ./demo +``` + +This creates the following GUI application. + +![GTK+ sample application screenshot](images/gtksample.png) diff --git a/meson/docs/markdown/Unit-tests.md b/meson/docs/markdown/Unit-tests.md new file mode 100644 index 000000000..60fcad216 --- /dev/null +++ b/meson/docs/markdown/Unit-tests.md @@ -0,0 +1,261 @@ +--- +short-description: Meson's own unit-test system +... + +# Unit tests + +Meson comes with a fully functional unit test system. To use it simply build +an executable and then use it in a test. + +```meson +e = executable('prog', 'testprog.c') +test('name of test', e) +``` + +You can add as many tests as you want. They are run with the command `meson +test`. + +Meson captures the output of all tests and writes it in the log file +`meson-logs/testlog.txt`. + +## Test parameters + +Some tests require the use of command line arguments or environment +variables. These are simple to define. + +```meson +test('command line test', exe, args : ['first', 'second']) +test('envvar test', exe2, env : ['key1=value1', 'key2=value2']) +``` + +Note how you need to specify multiple values as an array. + +### MALLOC_PERTURB_ + +By default, environment variable +[`MALLOC_PERTURB_`](http://man7.org/linux/man-pages/man3/mallopt.3.html) is +set to a random value between 1..255. This can help find memory leaks on +configurations using glibc, including with non-GCC compilers. This feature +can be disabled as discussed in [test()](Reference-manual.md#test). + +## Coverage + +If you enable coverage measurements by giving Meson the command line flag +`-Db_coverage=true`, you can generate coverage reports after running the +tests (running the tests is required to gather the list of functions that get +called). Meson will autodetect what coverage generator tools you have +installed and will generate the corresponding targets. These targets are +`coverage-xml` and `coverage-text` which are both provided by +[Gcovr](http://gcovr.com) (version 3.3 or higher) and `coverage-html`, which +requires [Lcov](https://ltp.sourceforge.io/coverage/lcov.php) and +[GenHTML](https://linux.die.net/man/1/genhtml) or [Gcovr](http://gcovr.com). +As a convenience, a high-level `coverage` target is also generated which will +produce all 3 coverage report types, if possible. + +The output of these commands is written to the log directory `meson-logs` in +your build directory. + +## Parallelism + +To reduce test times, Meson will by default run multiple unit tests in +parallel. It is common to have some tests which can not be run in parallel +because they require unique hold on some resource such as a file or a D-Bus +name. You have to specify these tests with a keyword argument. + +```meson +test('unique test', t, is_parallel : false) +``` + +Meson will then make sure that no other unit test is running at the same +time. Non-parallel tests take longer to run so it is recommended that you +write your unit tests to be parallel executable whenever possible. + +By default Meson uses as many concurrent processes as there are cores on the +test machine. You can override this with the environment variable +`MESON_TESTTHREADS` like this. + +```console +$ MESON_TESTTHREADS=5 meson test +``` + +## Priorities + +*(added in version 0.52.0)* + +Tests can be assigned a priority that determines when a test is *started*. +Tests with higher priority are started first, tests with lower priority +started later. The default priority is 0, meson makes no guarantee on the +ordering of tests with identical priority. + +```meson +test('started second', t, priority : 0) +test('started third', t, priority : -50) +test('started first', t, priority : 1000) +``` + +Note that the test priority only affects the starting order of tests and +subsequent tests are affected by how long it takes previous tests to +complete. It is thus possible that a higher-priority test is still running +when lower-priority tests with a shorter runtime have completed. + +## Skipped tests and hard errors + +Sometimes a test can only determine at runtime that it can not be run. + +For the default `exitcode` testing protocol, the GNU standard approach in +this case is to exit the program with error code 77. Meson will detect this +and report these tests as skipped rather than failed. This behavior was added +in version 0.37.0. + +For TAP-based tests, skipped tests should print a single line starting with +`1..0 # SKIP`. + +In addition, sometimes a test fails set up so that it should fail even if it +is marked as an expected failure. The GNU standard approach in this case is +to exit the program with error code 99. Again, Meson will detect this and +report these tests as `ERROR`, ignoring the setting of `should_fail`. This +behavior was added in version 0.50.0. + +## Testing tool + +The goal of the meson test tool is to provide a simple way to run tests in a +variety of different ways. The tool is designed to be run in the build +directory. + +The simplest thing to do is just to run all tests. + +```console +$ meson test +``` + +### Run subsets of tests + +For clarity, consider the meson.build containing: + +```meson + +test('A', ..., suite: 'foo') +test('B', ..., suite: ['foo', 'bar']) +test('C', ..., suite: 'bar') +test('D', ..., suite: 'baz') + +``` + +Specify test(s) by name like: + +```console +$ meson test A D +``` + +Tests belonging to a suite `suite` can be run as follows + +```console +$ meson test --suite (sub)project_name:suite +``` + +Since version *0.46*, `(sub)project_name` can be omitted if it is the +top-level project. + +Multiple suites are specified like: + +```console +$ meson test --suite foo --suite bar +``` + +NOTE: If you choose to specify both suite(s) and specific test name(s), the +test name(s) must be contained in the suite(s). This however is redundant-- +it would be more useful to specify either specific test names or suite(s). + +### Other test options + +Sometimes you need to run the tests multiple times, which is done like this: + +```console +$ meson test --repeat=10 +``` + +Invoking tests via a helper executable such as Valgrind can be done with the +`--wrap` argument + +```console +$ meson test --wrap=valgrind testname +``` + +Arguments to the wrapper binary can be given like this: + +```console +$ meson test --wrap='valgrind --tool=helgrind' testname +``` + +Meson also supports running the tests under GDB. Just doing this: + +```console +$ meson test --gdb testname +``` + +Meson will launch `gdb` all set up to run the test. Just type `run` in the +GDB command prompt to start the program. + +The second use case is a test that segfaults only rarely. In this case you +can invoke the following command: + +```console +$ meson test --gdb --repeat=10000 testname +``` + +This runs the test up to 10 000 times under GDB automatically. If the program +crashes, GDB will halt and the user can debug the application. Note that +testing timeouts are disabled in this case so `meson test` will not kill +`gdb` while the developer is still debugging it. The downside is that if the +test binary freezes, the test runner will wait forever. + +Sometimes, the GDB binary is not in the PATH variable or the user wants to +use a GDB replacement. Therefore, the invoked GDB program can be specified +*(added 0.52.0)*: + +```console +$ meson test --gdb --gdb-path /path/to/gdb testname +``` + +```console +$ meson test --print-errorlogs +``` + +Meson will report the output produced by the failing tests along with other +useful information as the environmental variables. This is useful, for +example, when you run the tests on Travis-CI, Jenkins and the like. + +For further information see the command line help of Meson by running `meson +test -h`. + +## Legacy notes + +If `meson test` does not work for you, you likely have a old version of +Meson. In that case you should call `mesontest` instead. If `mesontest` +doesn't work either you have a very old version prior to 0.37.0 and should +upgrade. + +## Test outputs + +Meson will write several different files with detailed results of running +tests. These will be written into $builddir/meson-logs/ + +### testlog.json + +This is not a proper json file, but a file containing one valid json object +per line. This is file is designed so each line is streamed out as each test +is run, so it can be read as a stream while the test harness is running + +### testlog.junit.xml + +This is a valid JUnit XML description of all tests run. It is not streamed +out, and is written only once all tests complete running. + +When tests use the `tap` protocol each test will be recorded as a testsuite +container, with each case named by the number of the result. + +When tests use the `gtest` protocol meson will inject arguments to the test +to generate it's own JUnit XML, which meson will include as part of this XML +file. + +*New in 0.55.0* diff --git a/meson/docs/markdown/Unity-builds.md b/meson/docs/markdown/Unity-builds.md new file mode 100644 index 000000000..533a42199 --- /dev/null +++ b/meson/docs/markdown/Unity-builds.md @@ -0,0 +1,19 @@ +--- +short-description: Unity builds are a technique for reducing build times +... + +# Unity builds + +Unity builds are a technique for cutting down build times. The way it works is relatively straightforward. Suppose we have source files `src1.c`, `src2.c` and `src3.c`. Normally we would run the compiler three times, once for each file. In a unity build we instead compile all these sources in a single unit. The simplest approach is to create a new source file that looks like this. + +```c +#include +#include +#include +``` + +This is then compiled rather than the individual files. The exact speedup depends on the code base, of course, but it is not uncommon to obtain more than 50% speedup in compile times. This happens even though the Unity build uses only one CPU whereas individual compiles can be run in parallel. Unity builds can also lead to faster code, because the compiler can do more aggressive optimizations (e.g. inlining). + +The downside is that incremental builds are as slow as full rebuilds (because that is what they are). Unity compiles also use more memory, which may become an issue in certain scenarios. There may also be some bugs in the source that need to be fixed before Unity compiles work. As an example, if both `src1.c` and `src2.c` contain a static function or variable of the same name, there will be a clash. + +Meson has built-in support for unity builds. To enable them, just pass `--unity on` on the command line or enable unity builds with the GUI. No code changes are necessary apart from the potential clash issue discussed above. Meson will automatically generate all the necessary inclusion files for you. diff --git a/meson/docs/markdown/Use-of-Python.md b/meson/docs/markdown/Use-of-Python.md new file mode 100644 index 000000000..18035032e --- /dev/null +++ b/meson/docs/markdown/Use-of-Python.md @@ -0,0 +1,21 @@ +# Use of Python + +Meson is implemented in Python. This has both positive and negative sides. The main thing people seem to be mindful about is the dependency on Python to build source code. This page discusses various aspects of this problem. + +# Dependency hell + +There have been many Python programs that are difficult to maintain on multiple platforms. The reasons come mostly from dependencies. The program may use dependencies that are hard to compile on certain platforms, are outdated, conflict with other dependencies, not available on a given Python version and so on. + +Meson avoids dependency problems with one simple rule: Meson is not allowed to have any dependencies outside the Python basic library. The only thing you need is Python 3 (and possibly Ninja). + +## Reimplementability + +Meson has been designed in such a way that the implementation language is never exposed in the build definitions. This makes it possible (and maybe even easy) to reimplement Meson in any other programming language. There are currently no plans to reimplement Meson, but we will make sure that Python is not exposed inside the build definitions. + +## Cross platform tooling + +There is no one technical solution or programming language that works natively on all operating systems currently in use. When Autotools was designed in the late 80s, Unix shell was available pretty much anywhere. This is no longer the case. + +It is also the case that as any project gets larger, sooner or later it requires code generation, scripting or other tooling. This seems to be inevitable. Because there is no scripting language that would be available everywhere, these tools either need to be rewritten for each platform (which is a lot of work and is prone to errors) or the project needs to take a dependency on _something_. + +Any project that uses Meson (at least the current version) can rely on the fact that Python 3 will always be available, because you can't compile the project without it. All tooling can then be done in Python 3 with the knowledge that it will run on any platform without any extra dependencies (modulo the usual porting work). This reduces maintenance effort on multiplatform projects by a fair margin. diff --git a/meson/docs/markdown/Users.md b/meson/docs/markdown/Users.md new file mode 100644 index 000000000..49d30a434 --- /dev/null +++ b/meson/docs/markdown/Users.md @@ -0,0 +1,147 @@ +--- +title: Users +... + +# List of projects using Meson + +If you have a project that uses Meson that you want to add to this +list, please [file a +pull-request](https://github.com/mesonbuild/meson/edit/master/docs/markdown/Users.md) +for it. All the software on this list is tested for regressions before +release, so it's highly recommended that projects add themselves +here. Some additional projects are listed in the [`meson` GitHub +topic](https://github.com/topics/meson). + + - [2048.cpp](https://github.com/plibither8/2048.cpp), a fully featured terminal version of the game "2048" written in C++ + - [Aravis](https://github.com/AravisProject/aravis), a glib/gobject based library for video acquisition using Genicam cameras + - [Akira](https://github.com/akiraux/Akira), a native Linux app for UI and UX design built in Vala and Gtk + - [AQEMU](https://github.com/tobimensch/aqemu), a Qt GUI for QEMU virtual machines, since version 0.9.3 + - [Arduino sample project](https://github.com/jpakkane/mesonarduino) + - [bolt](https://gitlab.freedesktop.org/bolt/bolt), userspace daemon to enable security levels for Thunderbolt™ 3 on Linux + - [Budgie Desktop](https://github.com/budgie-desktop/budgie-desktop), a desktop environment built on GNOME technologies + - [Bzip2](https://gitlab.com/federicomenaquintero/bzip2), the bzip2 compressor/decompressor + - [Cage](https://github.com/Hjdskes/cage), a Wayland kiosk + - [casync](https://github.com/systemd/casync), Content-Addressable Data Synchronization Tool + - [cinnamon-desktop](https://github.com/linuxmint/cinnamon-desktop), the cinnamon desktop library + - [Cozy](https://github.com/geigi/cozy), a modern audio book player for Linux and macOS using GTK+ 3 + - [dav1d](https://code.videolan.org/videolan/dav1d), an AV1 decoder + - [dbus-broker](https://github.com/bus1/dbus-broker), Linux D-Bus Message Broker + - [DPDK](http://dpdk.org/browse/dpdk), Data Plane Development Kit, a set of libraries and drivers for fast packet processing + - [DXVK](https://github.com/doitsujin/dxvk), a Vulkan-based Direct3D 11 implementation for Linux using Wine + - [elementary OS](https://github.com/elementary/), Linux desktop oriented distribution + - [Emeus](https://github.com/ebassi/emeus), constraint based layout manager for GTK+ + - [ESP8266 Arduino sample project](https://github.com/trilader/arduino-esp8266-meson), sample project for using the ESP8266 Arduino port with Meson + - [FeedReader](https://github.com/jangernert/FeedReader), a modern desktop application designed to complement existing web-based RSS accounts + - [Flecs](https://github.com/SanderMertens/flecs), a Fast and Lightweight ECS (Entity Component System) C library + - [Foliate](https://github.com/johnfactotum/foliate), a simple and modern GTK eBook reader, built with GJS and Epub.js + - [Fractal](https://wiki.gnome.org/Apps/Fractal/), a Matrix messaging client for GNOME + - [Frida](https://github.com/frida/frida-core), a dynamic binary instrumentation toolkit + - [fwupd](https://github.com/hughsie/fwupd), a simple daemon to allow session software to update firmware + - [GameMode](https://github.com/FeralInteractive/gamemode), a daemon/lib combo for Linux that allows games to request a set of optimisations be temporarily applied to the host OS + - [Geary](https://wiki.gnome.org/Apps/Geary), an email application built around conversations, for the GNOME 3 desktop. + - [GLib](https://gitlab.gnome.org/GNOME/glib), cross-platform C library used by GTK+ and GStreamer + - [Glorytun](https://github.com/angt/glorytun), a multipath UDP tunnel + - [GNOME Boxes](https://gitlab.gnome.org/GNOME/gnome-boxes), a GNOME hypervisor + - [GNOME Builder](https://gitlab.gnome.org/GNOME/gnome-builder), an IDE for the GNOME platform + - [GNOME MPV](https://github.com/gnome-mpv/gnome-mpv), GNOME frontend to the mpv video player + - [GNOME Recipes](https://gitlab.gnome.org/GNOME/recipes), application for cooking recipes + - [GNOME Software](https://gitlab.gnome.org/GNOME/gnome-software), an app store for GNOME + - [GNOME Twitch](https://github.com/vinszent/gnome-twitch), an app for viewing Twitch streams on GNOME desktop + - [GNOME Usage](https://gitlab.gnome.org/GNOME/gnome-usage), a GNOME application for visualizing system resources + - [GNU FriBidi](https://github.com/fribidi/fribidi), the open source implementation of the Unicode Bidirectional Algorithm + - [Graphene](https://ebassi.github.io/graphene/), a thin type library for graphics + - [Grilo](https://git.gnome.org/browse/grilo) and [Grilo plugins](https://git.gnome.org/browse/grilo-plugins), the Grilo multimedia framework + - [GStreamer](https://gitlab.freedesktop.org/gstreamer/gstreamer), multimedia framework + - [GTK+](https://gitlab.gnome.org/GNOME/gtk), the multi-platform toolkit used by GNOME + - [GtkDApp](https://gitlab.com/csoriano/GtkDApp), an application template for developing Flatpak apps with Gtk+ and D + - [GVfs](https://git.gnome.org/browse/gvfs/), a userspace virtual filesystem designed to work with the I/O abstraction of GIO + - [Hardcode-Tray](https://github.com/bil-elmoussaoui/Hardcode-Tray), fixes hardcoded tray icons in Linux + - [HelenOS](http://helenos.org), a portable microkernel-based multiserver operating system + - [HexChat](https://github.com/hexchat/hexchat), a cross-platform IRC client in C + - [IGT](https://gitlab.freedesktop.org/drm/igt-gpu-tools), Linux kernel graphics driver test suite + - [inih](https://github.com/benhoyt/inih) (INI Not Invented Here), a small and simple .INI file parser written in C + - [Irssi](https://github.com/irssi/irssi), a terminal chat client in C + - [iSH](https://github.com/tbodt/ish), Linux shell for iOS + - [Janet](https://github.com/janet-lang/janet), a functional and imperative programming language and bytecode interpreter + - [json](https://github.com/nlohmann/json), JSON for Modern C++ + - [JsonCpp](https://github.com/open-source-parsers/jsoncpp), a C++ library for interacting with JSON + - [Json-glib](https://gitlab.gnome.org/GNOME/json-glib), GLib-based JSON manipulation library + - [Kiwix libraries](https://github.com/kiwix/kiwix-lib) + - [Knot Resolver](https://gitlab.labs.nic.cz/knot/knot-resolver), Full caching DNS resolver implementation + - [Ksh](https://github.com/att/ast), a Korn Shell + - [Lc0](https://github.com/LeelaChessZero/lc0), LeelaChessZero is a UCI-compliant chess engine designed to play chess via neural network + - [Le](https://github.com/kirushyk/le), machine learning framework + - [libcamera](https://git.linuxtv.org/libcamera.git/), a library to handle complex cameras on Linux, ChromeOS and Android + - [Libdrm](https://gitlab.freedesktop.org/mesa/drm), a library for abstracting DRM kernel interfaces + - [libeconf](https://github.com/openSUSE/libeconf), Enhanced config file parsing library, which merges config files placed in several locations into one + - [Libepoxy](https://github.com/anholt/libepoxy/), a library for handling OpenGL function pointer management + - [libfuse](https://github.com/libfuse/libfuse), the reference implementation of the Linux FUSE (Filesystem in Userspace) interface + - [Libgit2-glib](https://git.gnome.org/browse/libgit2-glib), a GLib wrapper for libgit2 + - [libglvnd](https://gitlab.freedesktop.org/glvnd/libglvnd), Vendor neutral OpenGL dispatch library for Unix-like OSes + - [Libhttpseverywhere](https://git.gnome.org/browse/libhttpseverywhere), a library to enable httpseverywhere on any desktop app + - [libmodulemd](https://github.com/fedora-modularity/libmodulemd), a GObject Introspected library for managing [Fedora Project](https://getfedora.org/) module metadata. + - [Libosmscout](https://github.com/Framstag/libosmscout), a C++ library for offline map rendering, routing and location +lookup based on OpenStreetMap data + - [libratbag](https://github.com/libratbag/libratbag), provides a DBus daemon to configure input devices, mainly gaming mice. + - [libspng](https://gitlab.com/randy408/libspng), a C library for reading and writing Portable Network Graphics (PNG) +format files + - [libui](https://github.com/andlabs/libui), a simple and portable (but not inflexible) GUI library in C that uses the native GUI technologies of each platform it supports + - [Libva](https://github.com/intel/libva), an implementation for the VA (VIdeo Acceleration) API + - [Libzim](https://github.com/openzim/libzim), the reference implementation for the ZIM file format + - [Marker](https://github.com/fabiocolacio/Marker), a GTK-3 markdown editor + - [Mesa](https://gitlab.freedesktop.org/mesa/mesa/), an open source graphics driver project + - [MiracleCast](https://github.com/albfan/miraclecast), connect external monitors to your system via Wifi-Display specification aka Miracast + - [mrsh](https://github.com/emersion/mrsh), a minimal POSIX shell + - [Nautilus](https://gitlab.gnome.org/GNOME/nautilus), the GNOME file manager + - [Nemo](https://github.com/linuxmint/nemo), the file manager for the Cinnamon desktop environment + - [OcherBook](https://github.com/ccoffing/OcherBook), an open source book reader for Kobo devices + - [oomd](https://github.com/facebookincubator/oomd), a userspace Out-Of-Memory (OOM) killer for Linux systems + - [OpenH264](https://github.com/cisco/openh264), open source H.264 codec + - [OpenHMD](https://github.com/OpenHMD/OpenHMD), a free and open source API and drivers for immersive technology, such as head mounted displays with built in head tracking + - [OpenTitan](https://github.com/lowRISC/opentitan), an open source silicon Root of Trust (RoT) project. + - [Orc](https://gitlab.freedesktop.org/gstreamer/orc), the Optimized Inner Loop Runtime Compiler + - [OTS](https://github.com/khaledhosny/ots), the OpenType Sanitizer, parses and serializes OpenType files (OTF, TTF) and WOFF and WOFF2 font files, validating and sanitizing them as it goes. Used by Chromium and Firefox + - [Outlier](https://github.com/kerolasa/outlier), a small Hello World style meson example project + - [Pacman](https://git.archlinux.org/pacman.git/tree/), a package manager for Arch Linux + - [Pango](https://git.gnome.org/browse/pango/), an Internationalized text layout and rendering library + - [Parzip](https://github.com/jpakkane/parzip), a multithreaded reimplementation of Zip + - [Peek](https://github.com/phw/peek), simple animated GIF screen recorder with an easy to use interface + - [PicoLibc](https://github.com/keith-packard/picolibc), a standard C library for small embedded systems with limited RAM + - [PipeWire](https://github.com/PipeWire/pipewire), a framework for video and audio for containerized applications + - [Pithos](https://github.com/pithos/pithos), a Pandora Radio client + - [Pitivi](https://github.com/pitivi/pitivi/), a nonlinear video editor + - [Playerctl](https://github.com/acrisci/playerctl), mpris command-line controller and library for spotify, vlc, audacious, bmp, cmus, and others + - [Polari](https://gitlab.gnome.org/GNOME/polari), an IRC client + - [qboot](https://github.com/bonzini/qboot), a minimal x86 firmware for booting Linux kernels + - [radare2](https://github.com/radare/radare2), unix-like reverse engineering framework and commandline tools (not the default) + - [RxDock](https://gitlab.com/rxdock/rxdock), a protein-ligand docking software designed for high throughput virtual screening (fork of rDock) + - [scrcpy](https://github.com/Genymobile/scrcpy), a cross platform application that provides display and control of Android devices connected on USB or over TCP/IP + - [Sequeler](https://github.com/Alecaddd/sequeler), a friendly SQL client for Linux, built with Vala and Gtk + - [Siril](https://gitlab.com/free-astro/siril), an image processing software for amateur astronomy + - [SSHFS](https://github.com/libfuse/sshfs), allows you to mount a remote filesystem using SFTP + - [sway](https://github.com/swaywm/sway), i3-compatible Wayland compositor + - [Sysprof](https://git.gnome.org/browse/sysprof), a profiling tool + - [systemd](https://github.com/systemd/systemd), the init system + - [szl](https://github.com/dimkr/szl), a lightweight, embeddable scripting language + - [Taisei Project](https://taisei-project.org/), an open-source Touhou Project clone and fangame + - [Terminology](https://github.com/billiob/terminology), a terminal emulator based on the Enlightenment Foundation Libraries + - [Tilix](https://github.com/gnunn1/tilix), a tiling terminal emulator for Linux using GTK+ 3 + - [Tizonia](https://github.com/tizonia/tizonia-openmax-il), a command-line cloud music player for Linux with support for Spotify, Google Play Music, YouTube, SoundCloud, TuneIn, Plex servers and Chromecast devices + - [Vala Language Server](https://github.com/benwaffle/vala-language-server), code intelligence engine for the Vala and Genie programming languages + - [Valum](https://github.com/valum-framework/valum), a micro web framework written in Vala + - [Venom](https://github.com/naxuroqa/Venom), a modern Tox client for the GNU/Linux desktop + - [VMAF](https://github.com/Netflix/vmaf) (by Netflix), a perceptual video quality assessment based on multi-method fusion + - [Wayland](https://github.com/wayland-project/wayland) and [Weston](https://github.com/wayland-project/weston), a next generation display server + - [wlroots](https://github.com/swaywm/wlroots), a modular Wayland compositor library + - [wxFormBuilder](https://github.com/wxFormBuilder/wxFormBuilder), RAD tool for wxWidgets GUI design + - [xi-gtk](https://github.com/eyelash/xi-gtk), a GTK+ front-end for the Xi editor + - [Xorg](https://gitlab.freedesktop.org/xorg/xserver), the X.org display server (not the default yet) + - [X Test Suite](https://gitlab.freedesktop.org/xorg/test/xts), The X.org test suite + - [zathura](https://github.com/pwmt/zathura), a highly customizable and functional document viewer based on the +girara user interface library and several document libraries + - [Zrythm](https://git.zrythm.org/cgit/zrythm), a cross-platform digital audio workstation using GTK+ 3 + - [ZStandard](https://github.com/facebook/zstd/commit/4dca56ed832c6a88108a2484a8f8ff63d8d76d91), a compression algorithm developed at Facebook (not used by default) + +Note that a more up-to-date list of GNOME projects that use Meson can +be found +[here](https://wiki.gnome.org/Initiatives/GnomeGoals/MesonPorting). diff --git a/meson/docs/markdown/Using-multiple-build-directories.md b/meson/docs/markdown/Using-multiple-build-directories.md new file mode 100644 index 000000000..ab6cf3c85 --- /dev/null +++ b/meson/docs/markdown/Using-multiple-build-directories.md @@ -0,0 +1,49 @@ +# Using multiple build directories + +One of the main design goals of Meson has been to build all projects out-of-source. This means that *all* files generated during the build are placed in a separate subdirectory. This goes against common Unix tradition where you build your projects in-source. Building out of source gives two major advantages. + +First of all this makes for very simple `.gitignore` files. In classical build systems you may need to have tens of lines of definitions, most of which contain wildcards. When doing out of source builds all of this busywork goes away. A common ignore file for a Meson project only contains a few lines that are the build directory and IDE project files. + +Secondly this makes it very easy to clean your projects: just delete the build subdirectory and you are done. There is no need to guess whether you need to run `make clean`, `make distclean`, `make mrproper` or something else. When you delete a build subdirectory there is no possible way to have any lingering state from your old builds. + +The true benefit comes from somewhere else, though. + +## Multiple build directories for the same source tree + +Since a build directory is fully self contained and treats the source tree as a read-only piece of data, it follows that you can have arbitrarily many build trees for any source tree at the same time. Since all build trees can have different configuration, this is extremely powerful. Now you might be wondering why one would want to have multiple build setups at the same time. Let's examine this by setting up a hypothetical project. + +The first thing to do is to set up the default build, that is, the one we are going to use over 90% of the time. In this we use the system compiler and build with debug enabled and no optimizations so it builds as fast as possible. This is the default project type for Meson, so setting it up is simple. + + mkdir builddir + meson builddir + +Another common setup is to build with debug and optimizations to, for example, run performance tests. Setting this up is just as simple. + + mkdir buildopt + meson --buildtype=debugoptimized buildopt + +For systems where the default compiler is GCC, we would like to compile with Clang, too. So let's do that. + + mkdir buildclang + CC=clang CXX=clang++ meson buildclang + +You can add cross builds, too. As an example, let's set up a Linux -> Windows cross compilation build using MinGW. + + mkdir buildwine + meson --cross-file=mingw-cross.txt buildwine + +The cross compilation file sets up Wine so that not only can you compile your application, you can also run the unit test suite just by issuing the command `meson test`. + +To compile any of these build types, just cd into the corresponding build directory and run `meson compile` or instruct your IDE to do the same. Note that once you have set up your build directory once, you can just run Ninja and Meson will ensure that the resulting build is fully up to date according to the source. Even if you have not touched one of the directories in weeks and have done major changes to your build configuration, Meson will detect this and bring the build directory up to date (or print an error if it can't do that). This allows you to do most of your work in the default directory and use the others every now and then without having to babysit your build directories. + +## Specialized uses + +Separate build directories allows easy integration for various different kinds of tools. As an example, Clang comes with a static analyzer. It is meant to be run from scratch on a given source tree. The steps to run it with Meson are very simple. + + rm -rf buildscan + mkdir buildscan + scan-build meson buildscan + cd buildscan + scan-build ninja + +These commands are the same for every single Meson project, so they could even be put in a script turning static analysis into a single command. diff --git a/meson/docs/markdown/Using-the-WrapDB.md b/meson/docs/markdown/Using-the-WrapDB.md new file mode 100644 index 000000000..d8ea31c32 --- /dev/null +++ b/meson/docs/markdown/Using-the-WrapDB.md @@ -0,0 +1,28 @@ +# Using the WrapDB + +The Wrap database is a web service that provides Meson build definitions to projects that do not have it natively. Using it is simple. The service can be found [here](https://wrapdb.mesonbuild.com). + +The front page lists all projects that are on the service. Select the one you want and click it. The detail page lists available versions by branch and revision id. The branch names come from upstream releases and revision ids are version numbers internal to the database. Whenever the packaging is updated a new revision is released to the service a new revision with a bigger revision id is added. Usually you want to select the newest branch with the highest revision id. + +You can get the actual wrap file which tells Meson how to fetch the project by clicking on the download link on the page. As an example, the wrap file for [zlib-1.2.8, revision 4](https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.8/4/get_wrap) looks like this. You can find detailed documentation about it in [the Wrap manual](Wrap-dependency-system-manual.md). + + [wrap-file] + directory = zlib-1.2.8 + + source_url = http://zlib.net/zlib-1.2.8.tar.gz + source_filename = zlib-1.2.8.tar.gz + source_hash = 36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d + + patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.8/4/get_zip + patch_filename = zlib-1.2.8-4-wrap.zip + patch_hash = 2327a42c8f73a4289ee8c9cd4abc43b324d0decc28d6e609e927f0a50321af4a + +Add this file to your project with the name `subprojects/zlib.wrap`. Then you can use it in your `meson.build` file with this directive: + + zproj = subproject('zlib') + +When Meson encounters this it will automatically download, unpack and patch the source files. + +## Contributing build definitions + +The contents of the Wrap database are tracked in git repos of the [Mesonbuild project](https://github.com/mesonbuild). The actual process is simple and described in [submission documentation](Adding-new-projects-to-wrapdb.md). diff --git a/meson/docs/markdown/Using-with-Visual-Studio.md b/meson/docs/markdown/Using-with-Visual-Studio.md new file mode 100644 index 000000000..c66cf3c6f --- /dev/null +++ b/meson/docs/markdown/Using-with-Visual-Studio.md @@ -0,0 +1,45 @@ +--- +short-description: How to use meson in Visual Studio +... + +# Using with Visual Studio + +In order to generate Visual Studio projects, Meson needs to know the settings +of your installed version of Visual Studio. The only way to get this +information is to run Meson under the Visual Studio Command Prompt. + +You can always find the Visual Studio Command Prompt by searching from the +Start Menu. However, the name is different for each Visual Studio version. With +Visual Studio 2019, look for "x64 Native Tools Command Prompt for VS 2019". +The next steps are [the same as always](https://mesonbuild.com/Running-Meson.html#configuring-the-build-directory): + +1. `cd` into your source directory +1. `meson setup builddir`, which will create and setup the build directory +1. `meson compile -C builddir`, to compile your code. You can also use `ninja -C builddir` here if you are using the default Ninja backend. + +If you wish to generate Visual Studio project files, pass `--backend vs`. +At the time of writing the Ninja backend is more mature than the VS backend so +you might want to use it for serious work. + +# Using Clang-CL with Visual Studio + +*(new in 0.52.0)* + +You will first need to get a copy of llvm+clang for Windows, such versions +are available from a number of sources, including the llvm website. Then you +will need the [llvm toolset extension for visual +studio](https://marketplace.visualstudio.com/items?itemName=LLVMExtensions.llvm-toolchain). +You then need to either use a [native file](Native-environments.md#binaries) +or `set CC=clang-cl`, and `set CXX=clang-cl` to use those compilers, meson +will do the rest. + +This only works with visual studio 2017 and 2019. + +There is currently no support in meson for clang/c2. + +# Using Intel-CL (ICL) with Visual Studio + +*(new in 0.52.0)* + +To use ICL you need only have ICL installed and launch an ICL development +shell like you would for the ninja backend and meson will take care of it. diff --git a/meson/docs/markdown/Using-wraptool.md b/meson/docs/markdown/Using-wraptool.md new file mode 100644 index 000000000..f6023e89d --- /dev/null +++ b/meson/docs/markdown/Using-wraptool.md @@ -0,0 +1,99 @@ +# Using wraptool + +Wraptool is a subcommand of Meson that allows you to manage your +source dependencies using the WrapDB database. It gives you all things +you would expect, such as installing and updating dependencies. The +wrap tool works on all platforms, the only limitation is that the wrap +definition works on your target platform. If you find some Wraps that +don't work, please file bugs or, even better, patches. + +All code examples here assume that you are running the commands in +your top level source directory. Lines that start with the `$` mark +are commands to type. + +## Simple querying + +The simplest operation to do is to query the list of packages +available. To list them all issue the following command: + + $ meson wrap list + box2d + enet + gtest + libjpeg + liblzma + libpng + libxml2 + lua + ogg + sqlite + vorbis + zlib + +Usually you want to search for a specific package. This can be done +with the `search` command: + + $ meson wrap search jpeg + libjpeg + +To determine which versions of libjpeg are available to install, issue +the `info` command: + + $ meson wrap info libjpeg + Available versions of libjpeg: + 9a 2 + +The first number is the upstream release version, in this case +`9a`. The second number is the Wrap revision number. They don't relate +to anything in particular, but larger numbers imply newer +releases. You should always use the newest available release. + +## Installing dependencies + +Installing dependencies is just as straightforward. First just create +the `subprojects` directory at the top of your source tree and issue +the install command. + + $ meson wrap install libjpeg + Installed libjpeg branch 9a revision 2 + +Now you can issue a `subproject('libjpeg')` in your `meson.build` file +to use it. + +To check if your projects are up to date you can issue the `status` command. + + $ meson wrap status + Subproject status + libjpeg up to date. Branch 9a, revision 2. + zlib not up to date. Have 1.2.8 2, but 1.2.8 4 is available. + +In this case `zlib` has a newer release available. Updating it is +straightforward: + + $ meson wrap update zlib + Updated zlib to branch 1.2.8 revision 4 + +Wraptool can do other things besides these. Documentation for these +can be found in the command line help, which can be accessed by +`meson wrap --help`. + +## Promoting dependencies + +Meson will only search for subprojects from the top level +`subprojects` directory. If you have subprojects that themselves have +subprojects, you must transfer them to the top level. This can be done +by going to your source root and issuing a promotion command. + + meson wrap promote projname + +This will cause Meson to go through your entire project tree, find an +embedded subproject and copy it to the top level. + +If there are multiple embedded copies of a subproject, Meson will not +try to guess which one you want. Instead it will print all the +possibilities. You can then manually select which one to promote by +writing it out fully. + + meson wrap promote subprojects/s1/subprojects/projname + +This functionality was added in Meson release 0.45.0. diff --git a/meson/docs/markdown/Vala.md b/meson/docs/markdown/Vala.md new file mode 100644 index 000000000..cbb58a962 --- /dev/null +++ b/meson/docs/markdown/Vala.md @@ -0,0 +1,322 @@ +--- +title: Vala +short-description: Compiling Vala and Genie programs +... + +# Compiling Vala applications and libraries +Meson supports compiling applications and libraries written in +[Vala](https://vala-project.org/) and +[Genie](https://wiki.gnome.org/Projects/Genie) . A skeleton `meson.build` file: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` + +You must always specify the `glib-2.0` and `gobject-2.0` libraries as +dependencies, because all current Vala applications use them. +[GLib](https://developer.gnome.org/glib/stable/) is used for basic data types +and [GObject](https://developer.gnome.org/gobject/stable/) is used for the +runtime type system. + + +## Using libraries +Meson uses the [`dependency()`](Reference-manual.md#dependency) function to find +the relevant VAPI, C headers and linker flags when it encounters a Vala source +file in a build target. Vala needs a VAPI file and a C header or headers to use +a library. The VAPI file helps map Vala code to the library's C programming +interface. It is the +[`pkg-config`](https://www.freedesktop.org/wiki/Software/pkg-config/) tool that +makes finding these installed files all work seamlessly behind the scenes. When +a `pkg-config` file doesn't exist for the library then the +[`find_library()`](Reference-manual.md#find_library) method of the [compiler +object](Reference-manual.md#compiler-object) needs to be used. Examples are +given later. + +Note Vala uses libraries that follow the C Application Binary Interface (C ABI). +The library, however, could be written in C, Vala, Rust, Go, C++ or any other +language that can generate a binary compatible with the C ABI and so provides C +headers. + + +### The simplest case +This first example is a simple addition to the `meson.build` file because: + + * the library has a `pkg-config` file, `gtk+-3.0.pc` + * the VAPI is distributed with Vala and so installed with the Vala compiler + * the VAPI is installed in Vala's standard search path + * the VAPI, `gtk+-3.0.vapi`, has the same name as the `pkg-config` file + +Everything works seamlessly in the background and only a single extra line is +needed: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), + dependency('gtk+-3.0'), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` + +GTK+ is the graphical toolkit used by GNOME, elementary OS and other desktop +environments. The binding to the library, the VAPI file, is distributed with +Vala. + +Other libraries may have a VAPI that is distributed with the library itself. +Such libraries will have their VAPI file installed along with their other +development files. The VAPI is installed in Vala's standard search path and so +works just as seamlessly using the `dependency()` function. + + +### Targeting a version of GLib +Meson's [`dependency()`](Reference-manual.md#dependency) function allows a +version check of a library. This is often used to check a minimum version is +installed. When setting a minimum version of GLib, Meson will also pass this to +the Vala compiler using the `--target-glib` option. + +This is needed when using GTK+'s user interface definition files with Vala's +`[GtkTemplate]`, `[GtkChild]` and `[GtkCallback]` attributes. This requires +`--target-glib 2.38`, or a newer version, to be passed to Vala. With Meson this +is simply done with: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0', version: '>=2.38'), + dependency('gobject-2.0'), + dependency('gtk+-3.0'), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` + +Using `[GtkTemplate]` also requires the GTK+ user interface definition files to +be built in to the binary as GResources. For completeness, the next example +shows this: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0', version: '>=2.38'), + dependency('gobject-2.0'), + dependency('gtk+-3.0'), +] + +sources = files('app.vala') + +sources += import( 'gnome' ).compile_resources( + 'project-resources', + 'src/resources/resources.gresource.xml', + source_dir: 'src/resources', +) + +executable('app_name', sources, dependencies: dependencies) +``` + + +### Adding to Vala's search path +So far we have covered the cases where the VAPI file is either distributed with +Vala or the library. A VAPI can also be included in the source files of your +project. The convention is to put it in the `vapi` directory of your project. + +This is needed when a library does not have a VAPI or your project needs to link +to another component in the project that uses the C ABI. For example if part of +the project is written in C. + +The Vala compiler's `--vapidir` option is used to add the project directory to +the VAPI search path. In Meson this is done with the `add_project_arguments()` +function: + +```meson +project('vala app', 'vala', 'c') + +vapi_dir = meson.current_source_dir() / 'vapi' + +add_project_arguments(['--vapidir', vapi_dir], language: 'vala') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), + dependency('foo'), # 'foo.vapi' will be resolved as './vapi/foo.vapi' +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` + +If the VAPI is for an external library then make sure that the VAPI name +corresponds to the pkg-config file name. + +The [`vala-extra-vapis` repository](https://gitlab.gnome.org/GNOME/vala-extra-vapis) +is a community maintained repository of VAPIs that are not distributed. +Developers use the repository to share early work on new bindings and +improvements to existing bindings. So the VAPIs can frequently change. It is +recommended VAPIs from this repository are copied in to your project's source +files. + +This also works well for starting to write new bindings before they are shared +with the `vala-extra-vapis` repository. + + +### Libraries without pkg-config files +A library that does not have a corresponding pkg-config file may mean +`dependency()` is unsuitable for finding the C and Vala interface files. In this +case it is necessary to use the `find_library()` method of the compiler object. + +The first example uses Vala's POSIX binding. There is no pkg-config file because +POSIX includes the standard C library on Unix systems. All that is needed is the +VAPI file, `posix.vapi`. This is included with Vala and installed in Vala's +standard search path. Meson just needs to be told to only find the library for +the Vala compiler: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), + meson.get_compiler('vala').find_library('posix'), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` + +The next example shows how to link with a C library where no additional VAPI is +needed. The standard maths functions are already bound in `glib-2.0.vapi`, but +the GNU C library requires linking to the maths library separately. In this +example Meson is told to find the library only for the C compiler: + +```meson +project('vala app', 'vala', 'c') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), + meson.get_compiler('c').find_library('m', required: false), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` +The `required: false` means the build will continue when using another C library +that does not separate the maths library. See [Add math library (-lm) +portably](howtox.md#add-math-library-lm-portably). + +The final example shows how to use a library that does not have a pkg-config +file and the VAPI is in the `vapi` directory of your project source files: +```meson +project('vala app', 'vala', 'c') + +vapi_dir = meson.current_source_dir() / 'vapi' + +add_project_arguments(['--vapidir', vapi_dir], language: 'vala') + +dependencies = [ + dependency('glib-2.0'), + dependency('gobject-2.0'), + meson.get_compiler('c').find_library('foo'), + meson.get_compiler('vala').find_library('foo', dir: vapi_dir), +] + +sources = files('app.vala') + +executable('app_name', sources, dependencies: dependencies) +``` +The `find_library()` method of the C compiler object will try to find the C +header files and the library to link with. + +The `find_library()` method of the Vala compiler object needs to have the `dir` +keyword added to include the project VAPI directory. This is not added +automatically by `add_project_arguments()`. + +### Working with the Vala Preprocessor +Passing arguments to [Vala's preprocessor](https://wiki.gnome.org/Projects/Vala/Manual/Preprocessor) requires specifying the language as `c`. For example, the following statement sets the preprocessor symbol `FUSE_USE_VERSION` to the value `26`: + +```meson +add_project_arguments('-DFUSE_USE_VERSION=26', language: 'c') +``` + +## Building libraries + + +### Changing C header and VAPI names +Meson's [`library`](Reference-manual.md#library) target automatically outputs +the C header and the VAPI. They can be renamed by setting the `vala_header` and +`vala_vapi` arguments respectively: + +```meson +foo_lib = shared_library('foo', 'foo.vala', + vala_header: 'foo.h', + vala_vapi: 'foo-1.0.vapi', + dependencies: [glib_dep, gobject_dep], + install: true, + install_dir: [true, true, true]) +``` +In this example, the second and third elements of the `install_dir` array +indicate the destination with `true` to use default directories (i.e. `include` +and `share/vala/vapi`). + + +### GObject Introspection and language bindings +A 'binding' allows another programming language to use a library written in +Vala. Because Vala uses the GObject type system as its runtime type system it is +very easy to use introspection to generate a binding. A Meson build of a Vala +library can generate the GObject introspection metadata. The metadata is then +used in separate projects with [language specific +tools](https://wiki.gnome.org/Projects/Vala/LibraryWritingBindings) to generate +a binding. + +The main form of metadata is a GObject Introspection Repository (GIR) XML file. +GIRs are mostly used by languages that generate bindings at compile time. +Languages that generate bindings at runtime mostly use a typelib file, which is +generated from the GIR. + +Meson can generate a GIR as part of the build. For a Vala library the +`vala_gir` option has to be set for the `library`: + +```meson +foo_lib = shared_library('foo', 'foo.vala', + vala_gir: 'Foo-1.0.gir', + dependencies: [glib_dep, gobject_dep], + install: true, + install_dir: [true, true, true, true]) +``` + +The `true` value in `install_dir` tells Meson to use the default directory (i.e. +`share/gir-1.0` for GIRs). The fourth element in the `install_dir` array +indicates where the GIR file will be installed. + +To then generate a typelib file use a custom target with the `g-ir-compiler` +program and a dependency on the library: + +```meson +g_ir_compiler = find_program('g-ir-compiler') +custom_target('foo typelib', command: [g_ir_compiler, '--output', '@OUTPUT@', '@INPUT@'], + input: meson.current_build_dir() / 'Foo-1.0.gir', + output: 'Foo-1.0.typelib', + depends: foo_lib, + install: true, + install_dir: get_option('libdir') / 'girepository-1.0') +``` diff --git a/meson/docs/markdown/Videos.md b/meson/docs/markdown/Videos.md new file mode 100644 index 000000000..fb600382e --- /dev/null +++ b/meson/docs/markdown/Videos.md @@ -0,0 +1,45 @@ +--- +short-description: Videos about Meson +... + +# Videos + + - [Behind (and under) the scenes of the Meson build + system](https://www.youtube.com/watch?v=iLN6wL7ExHU), Linux.conf.au + 2020 + + - [Behind the Scenes of a C++ Build + System](https://www.youtube.com/watch?v=34KzT2yvQuM), CppCon 2019 + + - [Compiling Multi-Million Line C++ Code Bases Effortlessly with the + Meson Build system](https://www.youtube.com/watch?v=SCZLnopmYBM), + CppCon 2018 + + - [The Meson Build System, 4+ years of work to become an overnight + success](https://www.youtube.com/watch?v=gHdTzdXkhRY), Linux.conf.au 2018 + + - [Power through simplicity, using Python in the Meson Build + System](https://youtu.be/3jF3oVsjIEM), Piter.py, 2017 + + - [Meson and the changing Linux build + landscape](https://media.ccc.de/v/ASG2017-111-meson_and_the_changing_linux_build_landscape), + All Systems Go 2017 + + - [Meson, compiling the world with + Python](https://www.youtube.com/watch?v=sEO4DC8hm34), Europython + 2017 + + - [Builds, dependencies and deployment in a modern multiplatform + world](https://www.youtube.com/embed/CTJtKtQ8R5k), Linux.conf.au + 2016 + + - [New world, new tools](https://www.youtube.com/embed/0-gx1qU2pPo), + Libre Application Summit 2016 + + - [Making build systems not + suck](https://www.youtube.com/embed/KPi0AuVpxLI), Linux.conf.au + 2015, Auckland, New Zealand + + - [Lightning talk at FOSDEM + 2014](http://mirror.onet.pl/pub/mirrors/video.fosdem.org/2014/H2215_Ferrer/Sunday/Introducing_the_Meson_build_system.webm), + The first ever public presentation on Meson diff --git a/meson/docs/markdown/Vs-External.md b/meson/docs/markdown/Vs-External.md new file mode 100644 index 000000000..ab3d191f9 --- /dev/null +++ b/meson/docs/markdown/Vs-External.md @@ -0,0 +1,57 @@ +# Visual Studio's external build projects + +Visual Studio supports developing projects that have an external build +system. If you wish to use this integration method, here is how you +set it up. This documentation describes Visual Studio 2019. Other +versions have not been tested, but they should work roughly in the +same way. + +## Creating and compiling + +Check out your entire project in some directory. Then open Visual +Studio and select `File -> New -> Project` and from the list of +project types select `Makefile project`. Click `Next`. + +Type your project's name In the `Project name` entry box. In this +example we're going to use `testproj`. Next select the `Location` +entry and browse to the root of your projet sources. Make sure that +the checkbox `Place solution and project in the same directory` is +checked. Click `Create`. + +The next dialog page defines build commands, which you should set up +as follows: + +| entry | value | +| ----- | ----- | +|build | `meson compile -C $(Configuration)` | +|clean | `meson compile -C $(Configuration) --clean` | +|rebuild| `meson compile -C $(Configuration) --clean && meson compile -C $(Configuration)` | +|Output | `$(Configuration)\name_of_your_executable.exe| + + +Then click `Finish`. + +Visual Studio has created a subdirectory in your source root. It is +named after the project name. In this case it would be `testproj`. Now +you need to set up Meson for building both Debug and Release versions +in this directory. Open a VS dev tool terminal, go to the source root +and issue the following commands. + +``` +meson testproj\Debug +meson testproj\Release --buildtype=debugoptimized +``` + +Now you should have a working VS solution that compiles and runs both +in Debug and Release modes. + +## Adding sources to the project + +This project is not very useful on its own, because it does not list +any source files. VS does not seem to support adding entire source +trees at once, so you have to add sources to the solution manually. + +In the main view go to `Solution Explorer`, right click on the project +you just created and select `Add -> Existing Item`, browse to your +source tree and select all files you want to have in this project. Now +you can use the editor and debugger as in a normal VS project. diff --git a/meson/docs/markdown/Windows-module.md b/meson/docs/markdown/Windows-module.md new file mode 100644 index 000000000..8c01a413d --- /dev/null +++ b/meson/docs/markdown/Windows-module.md @@ -0,0 +1,30 @@ +# Windows module + +This module provides functionality used to build applications for +Windows. + +## Methods + +### compile_resources + +Compiles Windows `rc` files specified in the positional +arguments. Returns an opaque object that you put in the list of +sources for the target you want to have the resources in. This method +has the following keyword argument. + +- `args` lists extra arguments to pass to the resource compiler +- `depend_files` lists resource files that the resource script depends on + (e.g. bitmap, cursor, font, html, icon, message table, binary data or manifest + files referenced by the resource script) (*since 0.47.0*) +- `depends` lists target(s) that this target depends on, even though it does not + take them as an argument (e.g. as above, but generated) (*since 0.47.0*) +- `include_directories` lists directories to be both searched by the resource + compiler for referenced resource files, and added to the preprocessor include + search path. + +The resource compiler executable used is the first which exists from the +following list: + +1. The `windres` executable given in the `[binaries]` section of the cross-file +2. The `WINDRES` environment variable +3. The resource compiler which is part of the same toolset as the C or C++ compiler in use. diff --git a/meson/docs/markdown/Wrap-best-practices-and-tips.md b/meson/docs/markdown/Wrap-best-practices-and-tips.md new file mode 100644 index 000000000..bdff9ee0f --- /dev/null +++ b/meson/docs/markdown/Wrap-best-practices-and-tips.md @@ -0,0 +1,159 @@ +# Wrap best practices and tips + +There are several things you need to take into consideration when +writing a Meson build definition for a project. This is especially +true when the project will be used as a subproject. This page lists a +few things to consider when writing your definitions. + +## Do not put config.h in external search path + +Many projects use a `config.h` header file that they use for +configuring their project internally. These files are never installed +to the system header files so there are no inclusion collisions. This +is not the case with subprojects, your project tree may have an +arbitrary number of configuration files, so we need to ensure they +don't clash. + +The basic problem is that the users of the subproject must be able to +include subproject headers without seeing its `config.h` file. The +most correct solution is to rename the `config.h` file into something +unique, such as `foobar-config.h`. This is usually not feasible unless +you are the maintainer of the subproject in question. + +The pragmatic solution is to put the config header in a directory that +has no other header files and then hide that from everyone else. One +way is to create a top level subdirectory called `internal` and use +that to build your own sources, like this: + +```meson +subdir('internal') # create config.h in this subdir +internal_inc = include_directories('internal') +shared_library('foo', 'foo.c', include_directories : internal_inc) +``` + +Many projects keep their `config.h` in the top level directory that +has no other source files in it. In that case you don't need to move +it but can just do this instead: + +```meson +internal_inc = include_directories('.') # At top level meson.build +``` + +## Make libraries buildable both as static and shared + +Some platforms (e.g. iOS) requires linking everything in your main app +statically. In other cases you might want shared libraries. They are +also faster during development due to Meson's relinking +optimization. However building both library types on all builds is +slow and wasteful. + +Your project should use the `library` method that can be toggled +between shared and static with the `default_library` builtin option. + + +```meson +mylib = library('foo', 'foo.c') +``` + +## Declare generated headers explicitly + +Meson's Ninja backend works differently from Make and other +systems. Rather than processing things directory per directory, it +looks at the entire build definition at once and runs the individual +compile jobs in what might look to the outside as a random order. + +The reason for this is that this is much more efficient so your builds +finish faster. The downside is that you have to be careful with your +dependencies. The most common problem here is headers that are +generated at compile time with e.g. code generators. If these headers +are needed when building code that uses these libraries, the compile +job might be run before the code generation step. The fix is to make +the dependency explicit like this: + +```meson +myheader = custom_target(...) +mylibrary = shared_library(...) +mydep = declare_dependency(link_with : mylibrary, + include_directories : include_directories(...), + sources : myheader) +``` + +And then you can use the dependency in the usual way: + +```meson +executable('dep_using_exe', 'main.c', + dependencies : mydep) +``` + +Meson will ensure that the header file has been built before compiling `main.c`. + +## Avoid exposing compilable source files in declare_dependency + +The main use for the `sources` argument in `declare_dependency` is to +construct the correct dependency graph for the backends, as +demonstrated in the previous section. It is extremely important to +note that it should *not* be used to directly expose compilable +sources (`.c`, `.cpp`, etc.) of dependencies, and should rather only +be used for header/config files. The following example will illustrate +what can go wrong if you accidentally expose compilable source files. + +So you've read about unity builds and how Meson natively supports +them. You decide to expose the sources of dependencies in order to +have unity builds that include their dependencies. For your support +library you do + +```meson +my_support_sources = files(...) + +mysupportlib = shared_library( + ... + sources : my_support_sources, + ...) +mysupportlib_dep = declare_dependency( + ... + link_with : mylibrary, + sources : my_support_sources, + ...) +``` + +And for your main project you do: + +```meson +mylibrary = shared_library( + ... + dependencies : mysupportlib_dep, + ...) +myexe = executable( + ... + link_with : mylibrary, + dependencies : mysupportlib_dep, + ...) +``` + +This is extremely dangerous. When building, `mylibrary` will build and +link the support sources `my_support_sources` into the resulting +shared library. Then, for `myexe`, these same support sources will be +compiled again, will be linked into the resulting executable, in +addition to them being already present in `mylibrary`. This can +quickly run afoul of the [One Definition Rule +(ODR)](https://en.wikipedia.org/wiki/One_Definition_Rule) in C++, as +you have more than one definition of a symbol, yielding undefined +behavior. While C does not have a strict ODR rule, there is no +language in the standard which guarantees such behavior to +work. Violations of the ODR can lead to weird idiosyncratic failures +such as segfaults. In the overwhelming number of cases, exposing +library sources via the `sources` argument in `declare_dependency` is +thus incorrect. If you wish to get full cross-library performance, +consider building `mysupportlib` as a static library instead and +employing LTO. + +There are exceptions to this rule. If there are some natural +constraints on how your library is to be used, you can expose +sources. For instance, the WrapDB module for GoogleTest directly +exposes the sources of GTest and GMock. This is valid, as GTest and +GMock will only ever be used in *terminal* link targets. A terminal +target is the final target in a dependency link chain, for instance +`myexe` in the last example, whereas `mylibrary` is an intermediate +link target. For most libraries this rule is not applicable though, as +you cannot in general control how others consume your library, and as +such should not expose sources. diff --git a/meson/docs/markdown/Wrap-dependency-system-manual.md b/meson/docs/markdown/Wrap-dependency-system-manual.md new file mode 100644 index 000000000..8e6282e61 --- /dev/null +++ b/meson/docs/markdown/Wrap-dependency-system-manual.md @@ -0,0 +1,241 @@ +# Wrap dependency system manual + +One of the major problems of multiplatform development is wrangling +all your dependencies. This is awkward on many platforms, especially +on ones that do not have a built-in package manager. The latter problem +has been worked around by having third party package managers. They +are not really a solution for end user deployment, because you can't +tell them to install a package manager just to use your app. On these +platforms you must produce self-contained applications. Same applies +when destination platform is missing (up-to-date versions of) your +application's dependencies. + +The traditional approach to this has been to bundle dependencies +inside your own project. Either as prebuilt libraries and headers or +by embedding the source code inside your source tree and rewriting +your build system to build them as part of your project. + +This is both tedious and error prone because it is always done by +hand. The Wrap dependency system of Meson aims to provide an automated +way to do this. + +## How it works + +Meson has a concept of [subprojects](Subprojects.md). They are a way +of nesting one Meson project inside another. Any project that builds +with Meson can detect that it is built as a subproject and build +itself in a way that makes it easy to use (usually this means as a +static library). + +To use this kind of a project as a dependency you could just copy and +extract it inside your project's `subprojects` directory. + +However there is a simpler way. You can specify a Wrap file that tells Meson +how to download it for you. If you then use this subproject in your build, +Meson will automatically download and extract it during build. This makes +subproject embedding extremely easy. + +All wrap files must have a name of `.wrap` form and be in `subprojects` dir. + +Currently Meson has four kinds of wraps: +- wrap-file +- wrap-git +- wrap-hg +- wrap-svn + +## wrap format + +Wrap files are written in ini format, with a single header containing the type +of wrap, followed by properties describing how to obtain the sources, validate +them, and modify them if needed. An example wrap-file for the wrap named +`libfoobar` would have a filename `libfoobar.wrap` and would look like this: + +```ini +[wrap-file] +directory = libfoobar-1.0 + +source_url = https://example.com/foobar-1.0.tar.gz +source_filename = foobar-1.0.tar.gz +source_hash = 5ebeea0dfb75d090ea0e7ff84799b2a7a1550db3fe61eb5f6f61c2e971e57663 +``` + +An example wrap-git will look like this: + +```ini +[wrap-git] +url = https://github.com/libfoobar/libfoobar.git +revision = head +``` + +## Accepted configuration properties for wraps +- `directory` - name of the subproject root directory, defaults to the name of the wrap. + +Since *0.55.0* those can be used in all wrap types, they were previously reserved to `wrap-file`: + +- `patch_url` - download url to retrieve an optional overlay archive +- `patch_fallback_url` - fallback URL to be used when download from `patch_url` fails *Since: 0.55.0* +- `patch_filename` - filename of the downloaded overlay archive +- `patch_hash` - sha256 checksum of the downloaded overlay archive +- `patch_directory` - *Since 0.55.0* Overlay directory, alternative to `patch_filename` in the case + files are local instead of a downloaded archive. The directory must be placed in + `subprojects/packagefiles`. + +### Specific to wrap-file +- `source_url` - download url to retrieve the wrap-file source archive +- `source_fallback_url` - fallback URL to be used when download from `source_url` fails *Since: 0.55.0* +- `source_filename` - filename of the downloaded source archive +- `source_hash` - sha256 checksum of the downloaded source archive +- `lead_directory_missing` - for `wrap-file` create the leading + directory name. Needed when the source file does not have a leading + directory. + +Since *0.55.0* it is possible to use only the `source_filename` and +`patch_filename` value in a .wrap file (without `source_url` and `patch_url`) to +specify a local archive in the `subprojects/packagefiles` directory. The `*_hash` +entries are optional when using this method. This method should be prefered over +the old `packagecache` approach described below. + +Since *0.49.0* if `source_filename` or `patch_filename` is found in the +project's `subprojects/packagecache` directory, it will be used instead +of downloading the file, even if `--wrap-mode` option is set to +`nodownload`. The file's hash will be checked. + +### Specific to VCS-based wraps +- `url` - name of the wrap-git repository to clone. Required. +- `revision` - name of the revision to checkout. Must be either: a + valid value (such as a git tag) for the VCS's `checkout` command, or + (for git) `head` to track upstream's default branch. Required. + +### Specific to wrap-git +- `depth` - shallowly clone the repository to X number of commits. Note + that git always allow shallowly cloning branches, but in order to + clone commit ids shallowly, the server must support + `uploadpack.allowReachableSHA1InWant=true`. *(since 0.52.0)* +- `push-url` - alternative url to configure as a git push-url. Useful if + the subproject will be developed and changes pushed upstream. + *(since 0.37.0)* +- `clone-recursive` - also clone submodules of the repository + *(since 0.48.0)* + +## wrap-file with Meson build patch + +Unfortunately most software projects in the world do not build with +Meson. Because of this Meson allows you to specify a patch URL. + +For historic reasons this is called a "patch", however, it serves as an +overlay to add or replace files rather than modifying them. The file +must be an archive; it is downloaded and automatically extracted into +the subproject. The extracted files will include a meson build +definition for the given subproject. + +This approach makes it extremely simple to embed dependencies that +require build system changes. You can write the Meson build definition +for the dependency in total isolation. This is a lot better than doing +it inside your own source tree, especially if it contains hundreds of +thousands of lines of code. Once you have a working build definition, +just zip up the Meson build files (and others you have changed) and +put them somewhere where you can download them. + +Prior to *0.55.0* Meson build patches were only supported for wrap-file mode. +When using wrap-git, the repository must contain all Meson build definitions. +Since *0.55.0* Meson build patches are supported for any wrap modes, including +wrap-git. + +## `provide` section + +*Since *0.55.0* + +Wrap files can define the dependencies it provides in the `[provide]` section. + +```ini +[provide] +dependency_names = foo-1.0 +``` + +When a wrap file provides the dependency `foo-1.0`, as above, any call to +`dependency('foo-1.0')` will automatically fallback to that subproject even if +no `fallback` keyword argument is given. A wrap file named `foo.wrap` implicitly +provides the dependency name `foo` even when the `[provide]` section is missing. + +Optional dependencies, like `dependency('foo-1.0', required: get_option('foo_opt'))` +where `foo_opt` is a feature option set to `auto`, will not fallback to the +subproject defined in the wrap file, for 2 reasons: +- It allows for looking the dependency in other ways first, for example using + `cc.find_library('foo')`, and only fallback if that fails: + +```meson +# this won't use fallback defined in foo.wrap +foo_dep = dependency('foo-1.0', required: false) +if not foo_dep.found() + foo_dep = cc.find_library('foo', has_headers: 'foo.h', required: false) + if not foo_dep.found() + # This will use the fallback + foo_dep = dependency('foo-1.0') + # or + foo_dep = dependency('foo-1.0', required: false, fallback: 'foo') + endif +endif +``` + +- Sometimes not-found dependency is preferable to a fallback when the feature is + not explicitly requested by the user. In that case + `dependency('foo-1.0', required: get_option('foo_opt'))` will only fallback + when the user sets `foo_opt` to `enabled` instead of `auto`. + +If it is desired to fallback for an optional dependency, the `fallback` keyword +argument must be passed explicitly. For example +`dependency('foo-1.0', required: get_option('foo_opt'), fallback: 'foo')` will +use the fallback even when `foo_opt` is set to `auto`. + +This mechanism assumes the subproject calls `meson.override_dependency('foo-1.0', foo_dep)` +so Meson knows which dependency object should be used as fallback. Since that +method was introduced in version *0.54.0*, as a transitional aid for projects +that do not yet make use of it the variable name can be provided in the wrap file +with entries in the format `foo-1.0 = foo_dep`. + +For example when using a recent enough version of glib that uses +`meson.override_dependency()` to override `glib-2.0`, `gobject-2.0` and `gio-2.0`, +a wrap file would look like: +```ini +[wrap-git] +url=https://gitlab.gnome.org/GNOME/glib.git +revision=glib-2-62 + +[provide] +dependency_names = glib-2.0, gobject-2.0, gio-2.0 +``` + +With older version of glib dependency variable names need to be specified: +```ini +[wrap-git] +url=https://gitlab.gnome.org/GNOME/glib.git +revision=glib-2-62 + +[provide] +glib-2.0=glib_dep +gobject-2.0=gobject_dep +gio-2.0=gio_dep +``` + +Programs can also be provided by wrap files, with the `program_names` key: +```ini +[provide] +program_names = myprog, otherprog +``` + +With such wrap file, `find_program('myprog')` will automatically fallback to use +the subproject, assuming it uses `meson.override_find_program('myprog')`. + +## Using wrapped projects + +Wraps provide a convenient way of obtaining a project into your subproject directory. +Then you use it as a regular subproject (see [subprojects](Subprojects.md)). + +## Getting wraps + +Usually you don't want to write your wraps by hand. + +There is an online repository called [WrapDB](https://wrapdb.mesonbuild.com) that provides +many dependencies ready to use. You can read more about WrapDB [here](Using-the-WrapDB.md). + +There is also a Meson subcommand to get and manage wraps (see [using wraptool](Using-wraptool.md)). diff --git a/meson/docs/markdown/Wrap-maintainer-tools.md b/meson/docs/markdown/Wrap-maintainer-tools.md new file mode 100644 index 000000000..717d0d245 --- /dev/null +++ b/meson/docs/markdown/Wrap-maintainer-tools.md @@ -0,0 +1,17 @@ +# Wrap maintainer tools + +The [mesonwrap repository](https://github.com/mesonbuild/mesonwrap) provides tools +to maintain the WrapDB. Read-only features such can be used by anyone without Meson admin rights. + +## Personal access token + +Some tools require access to the Github API. +A [personal access token](https://github.com/settings/tokens) may be required +if the freebie Github API quota is exhausted. `public_repo` scope is required +for write operations. + +``` +$ cat ~/.config/mesonwrap.ini +[mesonwrap] +github_token = +``` diff --git a/meson/docs/markdown/Wrap-review-guidelines.md b/meson/docs/markdown/Wrap-review-guidelines.md new file mode 100644 index 000000000..3e41a8d62 --- /dev/null +++ b/meson/docs/markdown/Wrap-review-guidelines.md @@ -0,0 +1,36 @@ +# Wrap review guidelines + +In order to get a package in the Wrap database it must be reviewed and +accepted by someone with admin rights. Here is a list of items to +check in the review. If some item is not met it does not mean that the +package is rejected. What should be done will be determined on a +case-by-case basis. Similarly meeting all these requirements does not +guarantee that the package will get accepted. Use common sense. + +The review process is partially automated by the [mesonwrap](Wrap-maintainer-tools.md) +`review` tool. + +``` +mesonwrap review zlib --pull-request=1 [--approve] +``` + +Since not every check can be automated please pay attention to the following during the review: + +- Download link points to an authoritative upstream location. +- Version branch is created from master. +- Except for the existing code, `LICENSE.build` is mandatory. +- `project()` has a version and it matches the source version. +- `project()` has a license. +- Complex `configure_file()` inputs are documented. + If the file is a copy of a project file make sure it is clear what was changed. +- Unit tests are enabled if the project provides them. +- There are no guidelines if `install()` is a good or a bad thing in wraps. +- If the project can't be tested on the host platform consider using the `--cross-file` flag. + See [the issue](https://github.com/mesonbuild/mesonwrap/issues/125). + +Encourage wrap readability. Use your own judgement. + +## Approval + +If the code looks good use the `--approve` flag to merge it. +The tool automatically creates a release. diff --git a/meson/docs/markdown/_Sidebar.md b/meson/docs/markdown/_Sidebar.md new file mode 100644 index 000000000..2637d686d --- /dev/null +++ b/meson/docs/markdown/_Sidebar.md @@ -0,0 +1,14 @@ +## Quick References + +* [Functions](Reference-manual.md) +* [Options](Build-options.md) +* [Configuration](Configuration.md) +* [Dependencies](Dependencies.md) +* [Tests](Unit-tests.md) +* [Syntax](Syntax.md) + +### [Modules](Module-reference.md) + +* [gnome](Gnome-module.md) +* [i18n](i18n-module.md) +* [pkgconfig](Pkgconfig-module.md) diff --git a/meson/docs/markdown/fallback-wraptool.md b/meson/docs/markdown/fallback-wraptool.md new file mode 100644 index 000000000..9f4814b21 --- /dev/null +++ b/meson/docs/markdown/fallback-wraptool.md @@ -0,0 +1,43 @@ +--- +title: fallback wraptool +... + +# In case of emergency + +In case wraptool is down we have created a backup script that you can +use to download wraps directly from the GitHub repos. It is not as +slick and may have bugs but at least it will allow you to use wraps. + +## Using it + +To list all available wraps: + + ghwt.py list + +To install a wrap, go to your source root, make sure that the +`subprojects` directory exists and run this command: + + ghwt.py install [] + +This will stage the subproject ready to use. If you have multiple +subprojects you need to download them all manually. + +Specifying branch name is optional. If not specified, the list +of potential branches is sorted alphabetically and the last branch is +used. + +*Note* The tool was added in 0.32.0, for versions older than that you +need to delete the `foo.wrap` file to work around this issue. + +## How to upgrade an existing dir/fix broken state/any other problem + +Nuke the contents of `subprojects` and start again. + +## Known issues + +Some repositories show up in the list but are not installable. They +would not show up in the real WrapDB because they are works in +progress. + +GitHub web API limits the amount of queries you can do to 60/hour. If +you exceed that you need to wait for the timer to reset. diff --git a/meson/docs/markdown/howtox.md b/meson/docs/markdown/howtox.md new file mode 100644 index 000000000..c89f883cf --- /dev/null +++ b/meson/docs/markdown/howtox.md @@ -0,0 +1,286 @@ +# How do I do X in Meson? + +This page lists code snippets for common tasks. These are written +mostly using the C compiler, but the same approach should work on +almost all other compilers. + +## Set compiler + +When first running Meson, set it in an environment variable. + +```console +$ CC=mycc meson +``` + +Note that environment variables like `CC` only works in native builds. The `CC` +refers to the compiler for the host platform, that is the compiler used to +compile programs that run on the machine we will eventually install the project +on. The compiler used to build things that run on the machine we do the +building can be specified with `CC_FOR_BUILD`. You can use it in cross builds. + +Note that environment variables are never the idiomatic way to do anything with +Meson, however. It is better to use the native and cross files. And the tools +for the host platform in cross builds can only be specified with a cross file. + +There is a table of all environment variables supported [Here](Reference-tables.md#compiler-and-linker-selection-variables) + + +## Set dynamic linker + +*New in 0.53.0* + +Like the compiler, the linker is selected via the `_LD` +environment variable, or through the `_ld` entry in a native +or cross file. You must be aware of whether you're using a compiler that +invokes the linker itself (most compilers including GCC and Clang) or a +linker that is invoked directly (when using MSVC or compilers that act like +it, including Clang-Cl). With the former `c_ld` or `CC_LD` should be the value +to pass to the compiler's special argument (such as `-fuse-ld` with clang and +gcc), with the latter it should be an executable, such as `lld-link.exe`. + +*NOTE* In meson 0.53.0 the `ld` entry in the cross/native file and the `LD` +environment variable were used, this resulted in a large number of regressions +and was changed in 0.53.1 to `_ld` and `_LD`. + +```console +$ CC=clang CC_LD=lld meson +``` + +or + +```console +$ CC=clang-cl CC_LD=link meson +``` + +or in a cross or native file: + +```ini +[binaries] +c = 'clang' +c_ld = 'lld' +``` + +There is a table of all environment variables supported [Here](Reference-tables.md#compiler-and-linker-selection-variables) + + +## Set default C/C++ language version + +```meson +project('myproj', 'c', 'cpp', + default_options : ['c_std=c11', 'cpp_std=c++11']) +``` + +The language version can also be set on a per-target basis. + +```meson +executable(..., override_options : ['c_std=c11']) +``` + +## Enable threads + +Lots of people seem to do this manually with `find_library('pthread')` +or something similar. Do not do that. It is not portable. Instead do +this. + +```meson +thread_dep = dependency('threads') +executable(..., dependencies : thread_dep) +``` + +## Set extra compiler and linker flags from the outside (when e.g. building distro packages) + +The behavior is the same as with other build systems, with environment +variables during first invocation. Do not use these when you need to rebuild +the source + +```console +$ CFLAGS=-fsomething LDFLAGS=-Wl,--linker-flag meson +``` + +## Use an argument only with a specific compiler + +First check which arguments to use. + +```meson +if meson.get_compiler('c').get_id() == 'clang' + extra_args = ['-fclang-flag'] +else + extra_args = [] +endif +``` + +Then use it in a target. + +```meson +executable(..., c_args : extra_args) +``` + +If you want to use the arguments on all targets, then do this. + +```meson +if meson.get_compiler('c').get_id() == 'clang' + add_global_arguments('-fclang-flag', language : 'c') +endif +``` + +## Set a command's output to configuration + +```meson +txt = run_command('script', 'argument').stdout().strip() +cdata = configuration_data() +cdata.set('SOMETHING', txt) +configure_file(...) +``` + +## Generate a runnable script with `configure_file` + +`configure_file` preserves metadata so if your template file has +execute permissions, the generated file will have them too. + +## Producing a coverage report + +First initialize the build directory with this command. + +```console +$ meson -Db_coverage=true +``` + +Then issue the following commands. + +```console +$ meson compile +$ meson test +$ meson compile coverage-html (or coverage-xml) +``` + +The coverage report can be found in the meson-logs subdirectory. + +*New in 0.55.0* llvm-cov support for use with clang + +## Add some optimization to debug builds + +By default the debug build does not use any optimizations. This is the +desired approach most of the time. However some projects benefit from +having some minor optimizations enabled. GCC even has a specific +compiler flag `-Og` for this. To enable its use, just issue the +following command. + +```console +$ meson configure -Dc_args=-Og +``` + +This causes all subsequent builds to use this command line argument. + +## Use address sanitizer + +Clang comes with a selection of analysis tools such as the [address +sanitizer](https://clang.llvm.org/docs/AddressSanitizer.html). Meson +has native support for these with the `b_sanitize` option. + +```console +$ meson -Db_sanitize=address +``` + +After this you just compile your code and run the test suite. Address +sanitizer will abort executables which have bugs so they show up as +test failures. + +## Use Clang static analyzer + +Install scan-build and configure your project. Then do this: + +```console +$ meson compile scan-build +``` + +You can use the `SCANBUILD` environment variable to choose the +scan-build executable. + +```console +$ SCANBUILD= meson compile scan-build +``` + + +## Use profile guided optimization + +Using profile guided optimization with GCC is a two phase +operation. First we set up the project with profile measurements +enabled and compile it. + +```console +$ meson setup -Db_pgo=generate +$ meson compile -C builddir +``` + +Then we need to run the program with some representative input. This +step depends on your project. + +Once that is done we change the compiler flags to use the generated +information and rebuild. + +```console +$ meson configure -Db_pgo=use +$ meson compile +``` + +After these steps the resulting binary is fully optimized. + +## Add math library (`-lm`) portably + +Some platforms (e.g. Linux) have a standalone math library. Other +platforms (pretty much everyone else) do not. How to specify that `m` +is used only when needed? + +```meson +cc = meson.get_compiler('c') +m_dep = cc.find_library('m', required : false) +executable(..., dependencies : m_dep) +``` + +## Install an executable to `libexecdir` + +```meson +executable(..., install : true, install_dir : get_option('libexecdir')) +``` + +## Use existing `Find.cmake` files + +Meson can use the CMake `find_package()` ecosystem if CMake is installed. +To find a dependency with custom `Find.cmake`, set the `cmake_module_path` +property to the path in your project where the CMake scripts are stored. + +Example for a `FindCmakeOnlyDep.cmake` in a `cmake` subdirectory: + +```meson +cm_dep = dependency('CmakeOnlyDep', cmake_module_path : 'cmake') +``` + +The `cmake_module_path` property is only needed for custom CMake scripts. System +wide CMake scripts are found automatically. + +More information can be found [here](Dependencies.md#cmake) + +## Get a default not-found dependency? + +```meson +null_dep = dependency('', required : false) +``` + +This can be used in cases where you want a default value, but might override it +later. + +```meson +# Not needed on Windows! +my_dep = dependency('', required : false) +if host_machine.system() in ['freebsd', 'netbsd', 'openbsd', 'dragonfly'] + my_dep = dependency('some dep', required : false) +elif host_machine.system() == 'linux' + my_dep = dependency('some other dep', required : false) +endif + +executable( + 'myexe', + my_sources, + deps : [my_dep] +) +``` diff --git a/meson/docs/markdown/i18n-module.md b/meson/docs/markdown/i18n-module.md new file mode 100644 index 000000000..4948fab42 --- /dev/null +++ b/meson/docs/markdown/i18n-module.md @@ -0,0 +1,53 @@ +# I18n module + +This module provides internationalisation and localisation functionality. + +## Usage + +To use this module, just do: **`i18n = import('i18n')`**. The +following functions will then be available as methods on the object +with the name `i18n`. You can, of course, replace the name `i18n` with +anything else. + +### i18n.gettext() + +Sets up gettext localisation so that translations are built and placed +into their proper locations during install. Takes one positional +argument which is the name of the gettext module. + +* `args`: list of extra arguments to pass to `xgettext` when + generating the pot file +* `data_dirs`: (*Added 0.36.0*) list of directories to be set for + `GETTEXTDATADIRS` env var (Requires gettext 0.19.8+), used for local + its files +* `languages`: list of languages that are to be generated. As of + 0.37.0 this is optional and the + [LINGUAS](https://www.gnu.org/software/gettext/manual/html_node/po_002fLINGUAS.html) + file is read. +* `preset`: (*Added 0.37.0*) name of a preset list of arguments, + current option is `'glib'`, see + [source](https://github.com/mesonbuild/meson/blob/master/mesonbuild/modules/i18n.py) + for for their value +* `install`: (*Added 0.43.0*) if false, do not install the built translations. +* `install_dir`: (*Added 0.50.0*) override default install location, default is `localedir` + +This function also defines targets for maintainers to use: +**Note**: These output to the source directory + +* `-pot`: runs `xgettext` to regenerate the pot file +* `-update-po`: regenerates the `.po` files from current `.pot` file +* `-gmo`: builds the translations without installing + +### i18n.merge_file() + +This merges translations into a text file using `msgfmt`. See +[custom_target](Reference-manual.md#custom_target) +for normal keywords. In addition it accepts these keywords: + +* `data_dirs`: (*Added 0.41.0*) list of directories for its files (See + also `i18n.gettext()`) +* `po_dir`: directory containing translations, relative to current directory +* `type`: type of file, valid options are `'xml'` (default) and `'desktop'` +* `args`: (*Added 0.51.0*) list of extra arguments to pass to `msgfmt` + +*Added 0.37.0* diff --git a/meson/docs/markdown/images/buildtime.png b/meson/docs/markdown/images/buildtime.png new file mode 100644 index 000000000..2a44422f0 Binary files /dev/null and b/meson/docs/markdown/images/buildtime.png differ diff --git a/meson/docs/markdown/images/conftime.png b/meson/docs/markdown/images/conftime.png new file mode 100644 index 000000000..63754dbab Binary files /dev/null and b/meson/docs/markdown/images/conftime.png differ diff --git a/meson/docs/markdown/images/emptytime.png b/meson/docs/markdown/images/emptytime.png new file mode 100644 index 000000000..d80eab9f2 Binary files /dev/null and b/meson/docs/markdown/images/emptytime.png differ diff --git a/meson/docs/markdown/images/glib_build.png b/meson/docs/markdown/images/glib_build.png new file mode 100644 index 000000000..ddb994780 Binary files /dev/null and b/meson/docs/markdown/images/glib_build.png differ diff --git a/meson/docs/markdown/images/glib_conf.png b/meson/docs/markdown/images/glib_conf.png new file mode 100644 index 000000000..5de60d5a0 Binary files /dev/null and b/meson/docs/markdown/images/glib_conf.png differ diff --git a/meson/docs/markdown/images/glib_empty.png b/meson/docs/markdown/images/glib_empty.png new file mode 100644 index 000000000..5976e7f46 Binary files /dev/null and b/meson/docs/markdown/images/glib_empty.png differ diff --git a/meson/docs/markdown/images/glib_link.png b/meson/docs/markdown/images/glib_link.png new file mode 100644 index 000000000..23d90442b Binary files /dev/null and b/meson/docs/markdown/images/glib_link.png differ diff --git a/meson/docs/markdown/images/gtksample.png b/meson/docs/markdown/images/gtksample.png new file mode 100644 index 000000000..b6557c4e1 Binary files /dev/null and b/meson/docs/markdown/images/gtksample.png differ diff --git a/meson/docs/markdown/images/py3-install-1.png b/meson/docs/markdown/images/py3-install-1.png new file mode 100644 index 000000000..74f081938 Binary files /dev/null and b/meson/docs/markdown/images/py3-install-1.png differ diff --git a/meson/docs/markdown/images/py3-install-2.png b/meson/docs/markdown/images/py3-install-2.png new file mode 100644 index 000000000..9a8f1fe65 Binary files /dev/null and b/meson/docs/markdown/images/py3-install-2.png differ diff --git a/meson/docs/markdown/images/py3-install-3.png b/meson/docs/markdown/images/py3-install-3.png new file mode 100644 index 000000000..b702910ab Binary files /dev/null and b/meson/docs/markdown/images/py3-install-3.png differ diff --git a/meson/docs/markdown/index.md b/meson/docs/markdown/index.md new file mode 100644 index 000000000..ccd341fc6 --- /dev/null +++ b/meson/docs/markdown/index.md @@ -0,0 +1,59 @@ +--- +render-subpages: false +... + +# The Meson Build system + +## Overview + +Meson is an open source build system meant to be both extremely fast, +and, even more importantly, as user friendly as possible. + +The main design point of Meson is that every moment a developer spends +writing or debugging build definitions is a second wasted. So is every +second spent waiting for the build system to actually start compiling +code. + +## Features + +* multiplatform support for Linux, macOS, Windows, GCC, Clang, Visual Studio and others +* supported languages include C, C++, D, Fortran, Java, Rust +* build definitions in a very readable and user friendly non-Turing complete DSL +* cross compilation for many operating systems as well as bare metal +* optimized for extremely fast full and incremental builds without sacrificing correctness +* built-in multiplatform dependency provider that works together with distro packages +* fun! + +## A full manual + +The documentation on this site is freely available for all. However +there is also a full separate manual available for purchase [on this +web page](https://meson-manual.com/). + +## Community + +There are two main methods of connecting with other Meson +developers. The first one is the mailing list, which is hosted at +[Google Groups](https://groups.google.com/forum/#!forum/mesonbuild). + +The second way is via IRC. The channel to use is `#mesonbuild` at +[Freenode](https://freenode.net/). + +### [Projects using Meson](Users.md) + +Many projects out there are using Meson and their communities are also +a great resource for learning about what (and what not too!) do when +trying to convert to using Meson. + +[A short list of Meson users can be found here](Users.md) +but there are many more. We would love to hear about your success +stories too and how things could be improved too! + +## Development + +All development on Meson is done on the [GitHub +project](https://github.com/mesonbuild/meson). Instructions for +contributing can be found on the [contribution page](Contributing.md). + + +You do not need to sign a CLA to contribute to Meson. diff --git a/meson/docs/markdown/legal.md b/meson/docs/markdown/legal.md new file mode 100644 index 000000000..a14b7b93e --- /dev/null +++ b/meson/docs/markdown/legal.md @@ -0,0 +1,26 @@ +# Legal information + +Meson is copyrighted by all members of the Meson development team. +Meson is licensed under the [Apache 2 license]. + +Meson is a registered trademark of Jussi Pakkanen. + +## Meson logo licensing + +Meson's logo is (C) Jussi Pakkanen and used by the Meson project with +specific permission. It is not licensed under the same terms as the +rest of the project. + +If you are a third party and want to use the Meson logo, you must +first obtain written permission from Jussi Pakkanen. + +## Website licensing + +The meson website content is released under [Creative Commons +Attribution-ShareAlike 4.0 International]. + +All code samples on the website are released under [CC0 1.0 Universal]. + +[Creative Commons Attribution-ShareAlike 4.0 International]: https://creativecommons.org/licenses/by-sa/4.0/ +[CC0 1.0 Universal]: https://creativecommons.org/publicdomain/zero/1.0/ +[Apache 2 license]: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/meson/docs/markdown/snippets/add_release_note_snippets_here b/meson/docs/markdown/snippets/add_release_note_snippets_here new file mode 100644 index 000000000..cdd57c647 --- /dev/null +++ b/meson/docs/markdown/snippets/add_release_note_snippets_here @@ -0,0 +1,3 @@ +DO NOT ADD ANYTHING TO THIS FILE! + +Add release note snippets to their own files, one file per snippet. diff --git a/meson/docs/markdown_dynamic/Commands.md b/meson/docs/markdown_dynamic/Commands.md new file mode 100644 index 000000000..a35b4da96 --- /dev/null +++ b/meson/docs/markdown_dynamic/Commands.md @@ -0,0 +1,296 @@ +# Command-line commands + +There are two different ways of invoking Meson. First, you can run it directly +from the source tree with the command `/path/to/source/meson.py`. Meson may +also be installed in which case the command is simply `meson`. In this manual +we only use the latter format for simplicity. + +Meson is invoked using the following syntax: +`meson [COMMAND] [COMMAND_OPTIONS]` + +This section describes all available commands and some of their Optional arguments. +The most common workflow is to run [`setup`](#setup), followed by [`compile`](#compile), and then [`install`](#install). + +For the full list of all available options for a specific command use the following syntax: +`meson COMMAND --help` + +### configure + +``` +{{ cmd_help['configure']['usage'] }} +``` + +Changes options of a configured meson project. + +``` +{{ cmd_help['configure']['arguments'] }} +``` + +Most arguments are the same as in [`setup`](#setup). + +Note: reconfiguring project will not reset options to their default values (even if they were changed in `meson.build`). + +#### Examples: + +List all available options: +``` +meson configure builddir +``` + +Change value of a single option: +``` +meson configure builddir -Doption=new_value +``` + +### compile + +*(since 0.54.0)* + +``` +{{ cmd_help['compile']['usage'] }} +``` + +Builds a default or a specified target of a configured meson project. + +``` +{{ cmd_help['compile']['arguments'] }} +``` + +`--verbose` argument is available since 0.55.0. + +#### Targets + +*(since 0.55.0)* + +`TARGET` has the following syntax `[PATH/]NAME[:TYPE]`, where: +- `NAME`: name of the target from `meson.build` (e.g. `foo` from `executable('foo', ...)`). +- `PATH`: path to the target relative to the root `meson.build` file. Note: relative path for a target specified in the root `meson.build` is `./`. +- `TYPE`: type of the target. Can be one of the following: 'executable', 'static_library', 'shared_library', 'shared_module', 'custom', 'run', 'jar'. + +`PATH` and/or `TYPE` can be ommited if the resulting `TARGET` can be used to uniquely identify the target in `meson.build`. + +#### Backend specific arguments + +*(since 0.55.0)* + +`BACKEND-args` use the following syntax: + +If you only pass a single string, then it is considered to have all values separated by commas. Thus invoking the following command: + +``` +$ meson compile --ninja-args=-n,-d,explain +``` + +would add `-n`, `-d` and `explain` arguments to ninja invocation. + +If you need to have commas or spaces in your string values, then you need to pass the value with proper shell quoting like this: + +``` +$ meson compile "--ninja-args=['a,b', 'c d']" +``` + +#### Examples: + +Build the project: +``` +meson compile -C builddir +``` + +Execute a dry run on ninja backend with additional debug info: +``` +meson compile --ninja-args=-n,-d,explain +``` + +Build three targets: two targets that have the same `foo` name, but different type, and a `bar` target: +``` +meson compile foo:shared_library foo:static_library bar +``` + +Produce a coverage html report (if available): +``` +meson compile coverage-html +``` + +### dist + +*(since 0.52.0)* + +``` +{{ cmd_help['dist']['usage'] }} +``` + +Generates a release archive from the current source tree. + +``` +{{ cmd_help['dist']['arguments'] }} +``` + +See [notes about creating releases](Creating-releases.md) for more info. + +#### Examples: + +Create a release archive: +``` +meson dist -C builddir +``` + +### init + +*(since 0.45.0)* + +``` +{{ cmd_help['init']['usage'] }} +``` + +Creates a basic set of build files based on a template. + +``` +{{ cmd_help['init']['arguments'] }} +``` + +#### Examples: + +Create a project in `sourcedir`: +``` +meson init -C sourcedir +``` + +### introspect + +``` +{{ cmd_help['introspect']['usage'] }} +``` + +Displays information about a configured meson project. + +``` +{{ cmd_help['introspect']['arguments'] }} +``` + +#### Examples: + +Display basic information about a configured project in `builddir`: +``` +meson introspect builddir +``` + +### install + +*(since 0.47.0)* + +``` +{{ cmd_help['install']['usage'] }} +``` + +Installs the project to the prefix specified in [`setup`](#setup). + +``` +{{ cmd_help['install']['arguments'] }} +``` + +See [the installation documentation](Installing.md) for more info. + +#### Examples: + +Install project to `prefix`: +``` +meson install -C builddir +``` + +Install project to `$DESTDIR/prefix`: +``` +DESTDIR=/path/to/staging/area meson install -C builddir +``` + +### rewrite + +*(since 0.50.0)* + +``` +{{ cmd_help['rewrite']['usage'] }} +``` + +Modifies the meson project. + +``` +{{ cmd_help['rewrite']['arguments'] }} +``` + +See [the meson file rewriter documentation](Rewriter.md) for more info. + +### setup + +``` +{{ cmd_help['setup']['usage'] }} +``` + +Configures a build directory for the meson project. + +This is the default meson command (invoked if there was no COMMAND supplied). + +``` +{{ cmd_help['setup']['arguments'] }} +``` + +See [meson introduction page](Running-Meson.md#configuring-the-build-directory) for more info. + +#### Examples: + +Configures `builddir` with default values: +``` +meson setup builddir +``` + +### subprojects + +*(since 0.49.0)* + +``` +{{ cmd_help['subprojects']['usage'] }} +``` + +Manages subprojects of the meson project. + +``` +{{ cmd_help['subprojects']['arguments'] }} +``` + +### test + +``` +{{ cmd_help['test']['usage'] }} +``` + +Run tests for the configure meson project. + +``` +{{ cmd_help['test']['arguments'] }} +``` + +See [the unit test documentation](Unit-tests.md) for more info. + +#### Examples: + +Run tests for the project: +``` +meson test -C builddir +``` + +Run only `specific_test_1` and `specific_test_2`: +``` +meson test -C builddir specific_test_1 specific_test_2 +``` + +### wrap + +``` +{{ cmd_help['wrap']['usage'] }} +``` + +An utility to manage WrapDB dependencies. + +``` +{{ cmd_help['wrap']['arguments'] }} +``` + +See [the WrapDB tool documentation](Using-wraptool.md) for more info. diff --git a/meson/docs/meson.build b/meson/docs/meson.build new file mode 100644 index 000000000..c07a2001e --- /dev/null +++ b/meson/docs/meson.build @@ -0,0 +1,45 @@ +project('Meson documentation', version: '1.0') + +cur_bdir = meson.current_build_dir() + +# Copy all files to build dir, since HotDoc uses relative paths +run_command( + files('../tools/copy_files.py'), + '-C', meson.current_source_dir(), + '--output-dir', cur_bdir, + 'markdown', 'theme', 'sitemap.txt', + check: true) + +# Only the script knows which files are being generated +docs_gen = custom_target( + 'gen_docs', + input: files('markdown/index.md'), + output: 'gen_docs.dummy', + command: [ + files('../tools/regenerate_docs.py'), + '--output-dir', join_paths(cur_bdir, 'markdown'), + '--dummy-output-file', '@OUTPUT@', + ], + build_by_default: true, + install: false) + +hotdoc = import('hotdoc') +documentation = hotdoc.generate_doc(meson.project_name(), + project_version: meson.project_version(), + sitemap: join_paths(cur_bdir, 'sitemap.txt'), + build_by_default: true, + depends: docs_gen, + index: join_paths(cur_bdir, 'markdown/index.md'), + install: false, + extra_assets: ['images/'], + include_paths: [join_paths(cur_bdir, 'markdown')], + default_license: 'CC-BY-SAv4.0', + html_extra_theme: join_paths(cur_bdir, 'theme', 'extra'), + git_upload_repository: 'git@github.com:jpakkane/jpakkane.github.io.git', + edit_on_github_repository: 'https://github.com/mesonbuild/meson/', + syntax_highlighting_activate: true, +) + +run_target('upload', + command: [find_program('hotdoc'), 'run', '--conf-file', documentation.config_path()] +) diff --git a/meson/docs/sitemap.txt b/meson/docs/sitemap.txt new file mode 100644 index 000000000..ac7487000 --- /dev/null +++ b/meson/docs/sitemap.txt @@ -0,0 +1,124 @@ +index.md + Getting-meson.md + Quick-guide.md + Tutorial.md + Manual.md + Overview.md + Running-Meson.md + Commands.md + Builtin-options.md + Using-with-Visual-Studio.md + Meson-sample.md + Syntax.md + Machine-files.md + Native-environments.md + Build-targets.md + Include-directories.md + Installing.md + Adding-arguments.md + Configuration.md + Compiler-properties.md + Dependencies.md + Threads.md + External-commands.md + Precompiled-headers.md + Unity-builds.md + Feature-autodetection.md + Generating-sources.md + Unit-tests.md + Cross-compilation.md + Localisation.md + Build-options.md + Subprojects.md + Disabler.md + Modules.md + CMake-module.md + Dlang-module.md + Fs-module.md + Gnome-module.md + Hotdoc-module.md + i18n-module.md + Icestorm-module.md + Pkgconfig-module.md + Python-module.md + Python-3-module.md + Qt4-module.md + Qt5-module.md + RPM-module.md + Simd-module.md + SourceSet-module.md + Windows-module.md + Cuda-module.md + Keyval-module.md + Java.md + Vala.md + D.md + IDE-integration.md + Custom-build-targets.md + Build-system-converters.md + Configuring-a-build-directory.md + Run-targets.md + Creating-releases.md + Creating-OSX-packages.md + Creating-Linux-binaries.md + Project-templates.md + Reference-manual.md + Reference-tables.md + Style-guide.md + Rewriter.md + FAQ.md + Reproducible-builds.md + howtox.md + Wrap-dependency-system-manual.md + Adding-new-projects-to-wrapdb.md + Using-the-WrapDB.md + Using-wraptool.md + Wrap-maintainer-tools.md + Wrap-best-practices-and-tips.md + Wrap-review-guidelines.md + Shipping-prebuilt-binaries-as-wraps.md + fallback-wraptool.md + Release-notes.md + Release-notes-for-0.55.0.md + Release-notes-for-0.54.0.md + Release-notes-for-0.53.0.md + Release-notes-for-0.52.0.md + Release-notes-for-0.51.0.md + Release-notes-for-0.50.0.md + Release-notes-for-0.49.0.md + Release-notes-for-0.48.0.md + Release-notes-for-0.47.0.md + Release-notes-for-0.46.0.md + Release-notes-for-0.45.0.md + Release-notes-for-0.44.0.md + Release-notes-for-0.43.0.md + Release-notes-for-0.42.0.md + Release-notes-for-0.41.0.md + Release-notes-for-0.40.0.md + Release-notes-for-0.39.0.md + Release-notes-for-0.38.0.md + Release-notes-for-0.37.0.md + Additional.md + Release-procedure.md + Performance-comparison.md + ARM-performance-test.md + Simple-comparison.md + Comparisons.md + Conference-presentations.md + Contact-information.md + Continuous-Integration.md + Design-rationale.md + IndepthTutorial.md + In-the-press.md + Mixing-build-systems.md + Pkg-config-files.md + Playground.md + Porting-from-autotools.md + Use-of-Python.md + Users.md + Using-multiple-build-directories.md + Vs-External.md + Contributing.md + MesonCI.md + legal.md + Videos.md diff --git a/meson/docs/theme/extra/images/favicon.png b/meson/docs/theme/extra/images/favicon.png new file mode 100644 index 000000000..6800fe80e Binary files /dev/null and b/meson/docs/theme/extra/images/favicon.png differ diff --git a/meson/docs/theme/extra/images/meson_logo.png b/meson/docs/theme/extra/images/meson_logo.png new file mode 100644 index 000000000..1b3915d39 Binary files /dev/null and b/meson/docs/theme/extra/images/meson_logo.png differ diff --git a/meson/docs/theme/extra/prism_components/prism-meson.js b/meson/docs/theme/extra/prism_components/prism-meson.js new file mode 100644 index 000000000..242af19b4 --- /dev/null +++ b/meson/docs/theme/extra/prism_components/prism-meson.js @@ -0,0 +1,16 @@ +Prism.languages.meson= { + 'triple-quoted-string': { + 'pattern': /'''[\s\S]*?'''/, + 'alias': 'string' + }, + 'comment': /#.*/, + 'string': /'(?:\\'|[^'])*'/, + 'number': /\b\d+(?:\.\d+)?\b/, + 'keyword': /\b(?:if|else|elif|endif|foreach|endforeach)\b/, + 'function': /(?=\.|\b)[a-zA-Z_]+\s*(?=\()/, + 'boolean': /\b(?:true|false)\b/, + 'builtin': /\b(?:meson|host_machine|target_machine|build_machine)(?=\.)/, + 'operator': /(?:[<>=*+\-/!]?=|%|\/|\*|-|\+|\b(?:or|and|not)\b)/, + 'punctuation': /[(),[\]]/ + // TODO: Handle ternary ?: +}; \ No newline at end of file diff --git a/meson/docs/theme/extra/prism_components/prism-meson.min.js b/meson/docs/theme/extra/prism_components/prism-meson.min.js new file mode 100644 index 000000000..7bf90e06f --- /dev/null +++ b/meson/docs/theme/extra/prism_components/prism-meson.min.js @@ -0,0 +1 @@ +Prism.languages.meson={"triple-quoted-string":{pattern:/'''[\s\S]*?'''/,alias:"string"},comment:/#.*/,string:/'(?:\\'|[^'])*'/,number:/\b\d+(?:\.\d+)?\b/,keyword:/\b(?:if|else|elif|endif|foreach|endforeach)\b/,"function":/(?=\.|\b)[a-zA-Z_]+\s*(?=\()/,"boolean":/\b(?:true|false)\b/,builtin:/\b(?:meson|host_machine|target_machine|build_machine)(?=\.)/,operator:/(?:[<>=*+\-\/!]?=|%|\/|\*|-|\+|\b(?:or|and|not)\b)/,punctuation:/[(),[\]]/}; \ No newline at end of file diff --git a/meson/docs/theme/extra/templates/brand-logo.html b/meson/docs/theme/extra/templates/brand-logo.html new file mode 100644 index 000000000..7a12347d9 --- /dev/null +++ b/meson/docs/theme/extra/templates/brand-logo.html @@ -0,0 +1 @@ +Home diff --git a/meson/docs/theme/extra/templates/extra_head.html b/meson/docs/theme/extra/templates/extra_head.html new file mode 100644 index 000000000..29b7477ba --- /dev/null +++ b/meson/docs/theme/extra/templates/extra_head.html @@ -0,0 +1,2 @@ + + diff --git a/meson/docs/theme/extra/templates/license.html b/meson/docs/theme/extra/templates/license.html new file mode 100644 index 000000000..551878295 --- /dev/null +++ b/meson/docs/theme/extra/templates/license.html @@ -0,0 +1,7 @@ +@require(license, logo_path) + +
+ +
+ Website licensing information are available on the Legal page. +
diff --git a/meson/docs/theme/extra/templates/navbar_center.html b/meson/docs/theme/extra/templates/navbar_center.html new file mode 100644 index 000000000..9934be792 --- /dev/null +++ b/meson/docs/theme/extra/templates/navbar_center.html @@ -0,0 +1 @@ +

The Meson Build System

diff --git a/meson/docs/theme/extra/templates/navbar_links.html b/meson/docs/theme/extra/templates/navbar_links.html new file mode 100644 index 000000000..832bd2c49 --- /dev/null +++ b/meson/docs/theme/extra/templates/navbar_links.html @@ -0,0 +1,49 @@ +@require(page) + + +\ + diff --git a/meson/ghwt.py b/meson/ghwt.py new file mode 100755 index 000000000..5a71a3885 --- /dev/null +++ b/meson/ghwt.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +# Copyright 2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ghwt - GitHub WrapTool +# +# An emergency wraptool(1) replacement downloader that downloads +# directly from GitHub in case wrapdb.mesonbuild.com is down. + +import urllib.request, json, sys, os, shutil, subprocess +import configparser, hashlib + +req_timeout = 600.0 +private_repos = {'meson', 'wrapweb', 'meson-ci'} +spdir = 'subprojects' + +def gh_get(url): + r = urllib.request.urlopen(url, timeout=req_timeout) + jd = json.loads(r.read().decode('utf-8')) + return jd + +def list_projects(): + jd = gh_get('https://api.github.com/orgs/mesonbuild/repos') + entries = [entry['name'] for entry in jd] + entries = [e for e in entries if e not in private_repos] + entries.sort() + for i in entries: + print(i) + return 0 + +def unpack(sproj, branch): + tmpdir = os.path.join(spdir, sproj + '_ghwt') + shutil.rmtree(tmpdir, ignore_errors=True) + subprocess.check_call(['git', 'clone', '-b', branch, 'https://github.com/mesonbuild/{}.git'.format(sproj), tmpdir]) + usfile = os.path.join(tmpdir, 'upstream.wrap') + assert(os.path.isfile(usfile)) + config = configparser.ConfigParser(interpolation=None) + config.read(usfile) + outdir = os.path.join(spdir, sproj) + if 'directory' in config['wrap-file']: + outdir = os.path.join(spdir, config['wrap-file']['directory']) + if os.path.isdir(outdir): + print('Subproject is already there. To update, nuke the {} dir and reinstall.'.format(outdir)) + shutil.rmtree(tmpdir) + return 1 + us_url = config['wrap-file']['source_url'] + us = urllib.request.urlopen(us_url, timeout=req_timeout).read() + h = hashlib.sha256() + h.update(us) + dig = h.hexdigest() + should = config['wrap-file']['source_hash'] + if dig != should: + print('Incorrect hash on download.') + print(' expected:', should) + print(' obtained:', dig) + return 1 + ofilename = os.path.join(spdir, config['wrap-file']['source_filename']) + with open(ofilename, 'wb') as ofile: + ofile.write(us) + if 'lead_directory_missing' in config['wrap-file']: + os.mkdir(outdir) + shutil.unpack_archive(ofilename, outdir) + else: + shutil.unpack_archive(ofilename, spdir) + assert(os.path.isdir(outdir)) + shutil.move(os.path.join(tmpdir, '.git'), outdir) + subprocess.check_call(['git', 'reset', '--hard'], cwd=outdir) + shutil.rmtree(tmpdir) + shutil.rmtree(os.path.join(outdir, '.git')) + os.unlink(ofilename) + +def install(sproj, requested_branch=None): + if not os.path.isdir(spdir): + print('Run this in your source root and make sure there is a subprojects directory in it.') + return 1 + blist = gh_get('https://api.github.com/repos/mesonbuild/{}/branches'.format(sproj)) + blist = [b['name'] for b in blist] + blist = [b for b in blist if b != 'master'] + blist.sort() + branch = blist[-1] + if requested_branch is not None: + if requested_branch in blist: + branch = requested_branch + else: + print('Could not find user-requested branch', requested_branch) + print('Available branches for', sproj, ':') + print(blist) + return 1 + print('Using branch', branch) + return unpack(sproj, branch) + +def print_help(): + print('Usage:') + print(sys.argv[0], 'list') + print(sys.argv[0], 'install', 'package_name', '[branch_name]') + +def run(args): + if not args or args[0] == '-h' or args[0] == '--help': + print_help() + return 1 + command = args[0] + args = args[1:] + if command == 'list': + list_projects() + return 0 + elif command == 'install': + if len(args) == 1: + return install(args[0]) + elif len(args) == 2: + return install(args[0], args[1]) + else: + print_help() + return 1 + else: + print('Unknown command') + return 1 + +if __name__ == '__main__': + print('This is an emergency wrap downloader. Use only when wrapdb is down.') + sys.exit(run(sys.argv[1:])) diff --git a/meson/graphics/meson_logo.svg b/meson/graphics/meson_logo.svg new file mode 100644 index 000000000..d5b47bcf4 --- /dev/null +++ b/meson/graphics/meson_logo.svg @@ -0,0 +1,340 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/meson/graphics/meson_logo_big.png b/meson/graphics/meson_logo_big.png new file mode 100644 index 000000000..e2abe1b24 Binary files /dev/null and b/meson/graphics/meson_logo_big.png differ diff --git a/meson/graphics/wrap_logo.svg b/meson/graphics/wrap_logo.svg new file mode 100644 index 000000000..defc2d86f --- /dev/null +++ b/meson/graphics/wrap_logo.svg @@ -0,0 +1,70 @@ + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/meson/lgtm.yml b/meson/lgtm.yml new file mode 100644 index 000000000..62d8cb420 --- /dev/null +++ b/meson/lgtm.yml @@ -0,0 +1,4 @@ +extraction: + python: + python_setup: + version: 3 \ No newline at end of file diff --git a/meson/man/meson.1 b/meson/man/meson.1 new file mode 100644 index 000000000..fbbccfaf4 --- /dev/null +++ b/meson/man/meson.1 @@ -0,0 +1,238 @@ +.TH MESON "1" "September 2020" "meson 0.55.3" "User Commands" +.SH NAME +meson - a high productivity build system +.SH DESCRIPTION + +Meson is a build system designed to optimize programmer +productivity. It aims to do this by providing simple, out-of-the-box +support for modern software development tools and practices, such as +unit tests, coverage reports, Valgrind, Ccache and the like. + +The main Meson executable provides many subcommands to access all +the functionality. + +.SH The setup command + +Using Meson is simple and follows the common two-phase +process of most build systems. First you run Meson to +configure your build: + +.B meson setup [ +.I options +.B ] [ +.I build directory +.B ] [ +.I source directory +.B ] + +Note that the build directory must be different from the source +directory. Meson does not support building inside the source directory +and attempting to do that leads to an error. + +After a successful configuration step you can build the source by +running the actual build command in the build directory. The default +backend of Meson is Ninja, which can be invoked like this. + +\fBninja [\fR \fItarget\fR \fB]\fR + +You only need to run the Meson command once: when you first configure +your build dir. After that you just run the build command. Meson will +autodetect changes in your source tree and regenerate all files +needed to build the project. + +The setup command is the default operation. If no actual command is +specified, Meson will assume you meant to do a setup. That means +that you can set up a build directory without the setup command +like this: + +.B meson [ +.I options +.B ] [ +.I build directory +.B ] [ +.I source directory +.B ] + +.SS "options:" +.TP +\fB\-\-version\fR +print version number +.TP +\fB\-\-help\fR +print command line help + +.SH The configure command + +.B meson configure +provides a way to configure a Meson project from the command line. +Its usage is simple: + +.B meson configure [ +.I build directory +.B ] [ +.I options to set +.B ] + +If build directory is omitted, the current directory is used instead. + +If no parameters are set, +.B meson configure +will print the value of all build options to the console. + +To set values, use the \-D command line argument like this. + +.B meson configure \-Dopt1=value1 \-Dopt2=value2 + +.SH The introspect command + +Meson introspect is a command designed to make it simple to integrate with +other tools, such as IDEs. The output of this command is in JSON. + +.B meson introspect [ +.I build directory +.B ] [ +.I option +.B ] + +If build directory is omitted, the current directory is used instead. + +.SS "options:" +.TP +\fB\-\-targets\fR +print all top level targets (executables, libraries, etc) +.TP +\fB\-\-target\-files\fR +print the source files of the given target +.TP +\fB\-\-buildsystem\-files\fR +print all files that make up the build system (meson.build, meson_options.txt etc) +.TP +\fB\-\-tests\fR +print all unit tests +.TP +\fB\-\-help\fR +print command line help + +.SH The test command + +.B meson test +is a helper tool for running test suites of projects using Meson. +The default way of running tests is to invoke the default build command: + +\fBninja [\fR \fItest\fR \fB]\fR + +.B meson test +provides a richer set of tools for invoking tests. + +.B meson test +automatically rebuilds the necessary targets to run tests when used with the Ninja backend. +Upon build failure, +.B meson test +will return an exit code of 125. +This return code tells +.B git bisect run +to skip the current commit. +Thus bisecting using git can be done conveniently like this. + +.B git bisect run meson test -C build_dir + +.SS "options:" +.TP +\fB\-\-repeat\fR +run tests as many times as specified +.TP +\fB\-\-gdb\fR +run tests under gdb +.TP +\fB\-\-list\fR +list all available tests +.TP +\fB\-\-wrapper\fR +invoke all tests via the given wrapper (e.g. valgrind) +.TP +\fB\-C\fR +Change into the given directory before running tests (must be root of build directory). +.TP +\fB\-\-suite\fR +run tests in this suite +.TP +\fB\-\-no\-suite\fR +do not run tests in this suite +.TP +\fB\-\-no\-stdsplit\fR +do not split stderr and stdout in test logs +.TP +\fB\-\-benchmark\fR +run benchmarks instead of tests +.TP +\fB\-\-logbase\fR +base of file name to use for writing test logs +.TP +\fB\-\-num-processes\fR +how many parallel processes to use to run tests +.TP +\fB\-\-verbose\fR +do not redirect stdout and stderr +.TP +\fB\-t\fR +a multiplier to use for test timeout values (usually something like 100 for Valgrind) +.TP +\fB\-\-setup\fR +use the specified test setup + +.SH The wrap command + +Wraptool is a helper utility to manage source dependencies +using the online wrapdb service. + +.B meson wrap < +.I command +.B > [ +.I options +.B ] + +You should run this command in the top level source directory +of your project. + +.SS "Commands:" +.TP +\fBlist\fR +list all available projects +.TP +\fBsearch\fR +search projects by name +.TP +\fBinstall\fR +install a project with the given name +.TP +\fBupdate\fR +update the specified project to latest available version +.TP +\fBinfo\fR +show available versions of the specified project +.TP +\fBstatus\fR +show installed and available versions of currently used subprojects + +.SH EXIT STATUS + +.TP +.B 0 +Successful. +.TP +.B 1 +Usage error, or an error parsing or executing meson.build. +.TP +.B 2 +Internal error. +.TP +.B 125 +.B meson test +could not rebuild the required targets. +.TP + +.SH SEE ALSO + +http://mesonbuild.com/ + +https://wrapdb.mesonbuild.com/ diff --git a/meson/manual tests/1 wrap/main.c b/meson/manual tests/1 wrap/main.c new file mode 100644 index 000000000..df6abe434 --- /dev/null +++ b/meson/manual tests/1 wrap/main.c @@ -0,0 +1,12 @@ +#include +#include + +int main(void) { + sqlite3 *db; + if(sqlite3_open(":memory:", &db) != SQLITE_OK) { + printf("Sqlite failed.\n"); + return 1; + } + sqlite3_close(db); + return 0; +} diff --git a/meson/manual tests/1 wrap/meson.build b/meson/manual tests/1 wrap/meson.build new file mode 100644 index 000000000..aee358d88 --- /dev/null +++ b/meson/manual tests/1 wrap/meson.build @@ -0,0 +1,13 @@ +project('downloader', 'c') + +cc = meson.get_compiler('c') + +s = subproject('sqlite').get_variable('sqlite_dep') +th = dependency('threads') + +libdl = cc.find_library('dl', required : false) + +e = executable('dtest', 'main.c', + dependencies : [th, libdl, s]) + +test('dltest', e) diff --git a/meson/manual tests/1 wrap/subprojects/sqlite.wrap b/meson/manual tests/1 wrap/subprojects/sqlite.wrap new file mode 100644 index 000000000..6d14949e6 --- /dev/null +++ b/meson/manual tests/1 wrap/subprojects/sqlite.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = sqlite-amalgamation-3080802 + +source_url = http://sqlite.com/2015/sqlite-amalgamation-3080802.zip +source_filename = sqlite-amalgamation-3080802.zip +source_hash = 5ebeea0dfb75d090ea0e7ff84799b2a7a1550db3fe61eb5f6f61c2e971e57663 + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/sqlite/3080802/5/get_zip +patch_filename = sqlite-3080802-5-wrap.zip +patch_hash = d66469a73fa1344562d56a1d7627d5d0ee4044a77b32d16cf4bbb85741d4c9fd diff --git a/meson/manual tests/10 svn wrap/meson.build b/meson/manual tests/10 svn wrap/meson.build new file mode 100644 index 000000000..23ef1f10d --- /dev/null +++ b/meson/manual tests/10 svn wrap/meson.build @@ -0,0 +1,10 @@ +project('Subversion outchecker', 'c') + +sp = subproject('samplesubproject') + +exe = executable('gitprog', 'prog.c', +include_directories : sp.get_variable('subproj_inc'), +link_with : sp.get_variable('subproj_lib'), +) + +test('maintest', exe) diff --git a/meson/manual tests/10 svn wrap/prog.c b/meson/manual tests/10 svn wrap/prog.c new file mode 100644 index 000000000..6e2c4d861 --- /dev/null +++ b/meson/manual tests/10 svn wrap/prog.c @@ -0,0 +1,6 @@ +#include"subproj.h" + +int main(void) { + subproj_function(); + return 0; +} diff --git a/meson/manual tests/10 svn wrap/subprojects/samplesubproject.wrap b/meson/manual tests/10 svn wrap/subprojects/samplesubproject.wrap new file mode 100644 index 000000000..c8a687e3c --- /dev/null +++ b/meson/manual tests/10 svn wrap/subprojects/samplesubproject.wrap @@ -0,0 +1,4 @@ +[wrap-svn] +directory=samplesubproject +url=https://svn.code.sf.net/p/mesonsubproject/code/trunk +revision=head diff --git a/meson/manual tests/11 wrap imposter/meson.build b/meson/manual tests/11 wrap imposter/meson.build new file mode 100644 index 000000000..d0575acf7 --- /dev/null +++ b/meson/manual tests/11 wrap imposter/meson.build @@ -0,0 +1,8 @@ +project('evil URL') +# showing that new Meson wrap.py code tries to stop imposter WrapDB URLs +# a WrapException is raised. +# +# ERROR: https://wrapdb.mesonbuild.com.invalid/v1/projects/zlib/1.2.11/4/get_zip may be a WrapDB-impersonating URL +# + +subproject('zlib') \ No newline at end of file diff --git a/meson/manual tests/11 wrap imposter/subprojects/zlib.wrap b/meson/manual tests/11 wrap imposter/subprojects/zlib.wrap new file mode 100644 index 000000000..b88f8f2ab --- /dev/null +++ b/meson/manual tests/11 wrap imposter/subprojects/zlib.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = zlib-1.2.8 + +source_url = https://zlib.net/zlib-1.2.11.tar.gz +source_filename = zlib-1.2.11.tar.gz +source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 + +patch_url = https://wrapdb.mesonbuild.com.invalid/v1/projects/zlib/1.2.11/4/get_zip +patch_filename = zlib-1.2.11-4-wrap.zip +patch_hash = 886b67480dbe73b406ad83a1dd6d9596f93089d90c220ccfc91944c95f1c68c4 \ No newline at end of file diff --git a/meson/manual tests/12 wrap mirror/meson.build b/meson/manual tests/12 wrap mirror/meson.build new file mode 100644 index 000000000..6645bdf26 --- /dev/null +++ b/meson/manual tests/12 wrap mirror/meson.build @@ -0,0 +1,4 @@ +project('downloader') +# this test will timeout, showing that a subdomain isn't caught as masquarading url + +subproject('zlib') diff --git a/meson/manual tests/12 wrap mirror/subprojects/zlib.wrap b/meson/manual tests/12 wrap mirror/subprojects/zlib.wrap new file mode 100644 index 000000000..de0b9ad07 --- /dev/null +++ b/meson/manual tests/12 wrap mirror/subprojects/zlib.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = zlib-1.2.8 + +source_url = https://zlib.net/zlib-1.2.11.tar.gz +source_filename = zlib-1.2.11.tar.gz +source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 + +patch_url = https://mirror1.wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/4/get_zip +patch_filename = zlib-1.2.11-4-wrap.zip +patch_hash = 886b67480dbe73b406ad83a1dd6d9596f93089d90c220ccfc91944c95f1c68c4 \ No newline at end of file diff --git a/meson/manual tests/2 multiwrap/meson.build b/meson/manual tests/2 multiwrap/meson.build new file mode 100644 index 000000000..a4c42f468 --- /dev/null +++ b/meson/manual tests/2 multiwrap/meson.build @@ -0,0 +1,12 @@ +project('multiwrap', 'c', + default_options : 'c_std=c99') + +# Using multiple downloaded projects for great justice. + +cc = meson.get_compiler('c') + +luadep = dependency('lua', fallback : ['lua', 'lua_dep']) +pngdep = dependency('libpng', fallback : ['libpng', 'png_dep']) + +executable('prog', 'prog.c', + dependencies : [pngdep, luadep]) diff --git a/meson/manual tests/2 multiwrap/prog.c b/meson/manual tests/2 multiwrap/prog.c new file mode 100644 index 000000000..dd0349e2d --- /dev/null +++ b/meson/manual tests/2 multiwrap/prog.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#if !defined(_MSC_VER) +#include +#endif + +static void *l_alloc (void *ud, void *ptr, size_t osize, + size_t nsize) { + (void)ud; + (void)osize; + if (nsize == 0) { + free(ptr); + return NULL; + } else { + return realloc(ptr, nsize); + } +} + +void open_image(const char *fname) { + png_image image; + + memset(&image, 0, (sizeof image)); + image.version = PNG_IMAGE_VERSION; + + if(png_image_begin_read_from_file(&image, fname) != 0) { + png_bytep buffer; + + image.format = PNG_FORMAT_RGBA; + buffer = malloc(PNG_IMAGE_SIZE(image)); + + if(png_image_finish_read(&image, NULL, buffer, 0, NULL) != 0) { + printf("Image %s read failed: %s\n", fname, image.message); + } +// png_free_image(&image); + free(buffer); + } else { + printf("Image %s open failed: %s", fname, image.message); + } +} + +int printer(lua_State *l) { + if(!lua_isstring(l, 1)) { + fprintf(stderr, "Incorrect call.\n"); + return 0; + } + open_image(lua_tostring(l, 1)); + return 0; +} + + +int main(int argc, char **argv) { + lua_State *l = lua_newstate(l_alloc, NULL); + if(!l) { + printf("Lua state allocation failed.\n"); + return 1; + } + lua_register(l, "printer", printer); + lua_getglobal(l, "printer"); + lua_pushliteral(l, "foobar.png"); + lua_call(l, 1, 0); + lua_close(l); + return 0; +} diff --git a/meson/manual tests/2 multiwrap/subprojects/libpng.wrap b/meson/manual tests/2 multiwrap/subprojects/libpng.wrap new file mode 100644 index 000000000..283775c1e --- /dev/null +++ b/meson/manual tests/2 multiwrap/subprojects/libpng.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = libpng-1.6.34 + +source_url = ftp://ftp-osl.osuosl.org/pub/libpng/src/libpng16/libpng-1.6.34.tar.xz +source_filename = libpng-1.6.34.tar.xz +source_hash = 2f1e960d92ce3b3abd03d06dfec9637dfbd22febf107a536b44f7a47c60659f6 + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/libpng/1.6.34/1/get_zip +patch_filename = libpng-1.6.34-1-wrap.zip +patch_hash = 2123806eba8180c164e33a210f2892bbeb2473b69e56aecc786574e9221e6f20 diff --git a/meson/manual tests/2 multiwrap/subprojects/lua.wrap b/meson/manual tests/2 multiwrap/subprojects/lua.wrap new file mode 100644 index 000000000..c1a179a5d --- /dev/null +++ b/meson/manual tests/2 multiwrap/subprojects/lua.wrap @@ -0,0 +1,11 @@ +[wrap-file] +directory = lua-5.3.0 + +source_url = http://www.lua.org/ftp/lua-5.3.0.tar.gz +source_filename = lua-5.3.0.tar.gz +source_hash = ae4a5eb2d660515eb191bfe3e061f2b8ffe94dce73d32cfd0de090ddcc0ddb01 + + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/lua/5.3.0/5/get_zip +patch_filename = lua-5.3.0-5-wrap.zip +patch_hash = 439038309a0700adfb67d764b3fe935ed8601b31f819fc369e1438c6e79334dd diff --git a/meson/manual tests/2 multiwrap/subprojects/zlib.wrap b/meson/manual tests/2 multiwrap/subprojects/zlib.wrap new file mode 100644 index 000000000..6d5896f79 --- /dev/null +++ b/meson/manual tests/2 multiwrap/subprojects/zlib.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = zlib-1.2.8 + +source_url = http://zlib.net/fossils/zlib-1.2.8.tar.gz +source_filename = zlib-1.2.8.tar.gz +source_hash = 36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.8/8/get_zip +patch_filename = zlib-1.2.8-8-wrap.zip +patch_hash = 17c52a0e0c59ce926d3959005d5cd8178c6c7e2c9a4a1304279a8320c955ac60 diff --git a/meson/manual tests/3 git wrap/meson.build b/meson/manual tests/3 git wrap/meson.build new file mode 100644 index 000000000..7fd5083ee --- /dev/null +++ b/meson/manual tests/3 git wrap/meson.build @@ -0,0 +1,10 @@ +project('git outcheckker', 'c') + +sp = subproject('samplesubproject') + +exe = executable('gitprog', 'prog.c', +include_directories : sp.get_variable('subproj_inc'), +link_with : sp.get_variable('subproj_lib'), +) + +test('maintest', exe) diff --git a/meson/manual tests/3 git wrap/prog.c b/meson/manual tests/3 git wrap/prog.c new file mode 100644 index 000000000..6e2c4d861 --- /dev/null +++ b/meson/manual tests/3 git wrap/prog.c @@ -0,0 +1,6 @@ +#include"subproj.h" + +int main(void) { + subproj_function(); + return 0; +} diff --git a/meson/manual tests/3 git wrap/subprojects/samplesubproject.wrap b/meson/manual tests/3 git wrap/subprojects/samplesubproject.wrap new file mode 100644 index 000000000..f52190b8f --- /dev/null +++ b/meson/manual tests/3 git wrap/subprojects/samplesubproject.wrap @@ -0,0 +1,4 @@ +[wrap-git] +directory=samplesubproject +url=https://github.com/jpakkane/samplesubproject.git +revision=head diff --git a/meson/manual tests/4 standalone binaries/Info.plist b/meson/manual tests/4 standalone binaries/Info.plist new file mode 100644 index 000000000..0f0c90e49 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/Info.plist @@ -0,0 +1,26 @@ + + + + + CFBundleGetInfoString + MyApp + CFBundleExecutable + myapp.sh + CFBundleIdentifier + com.example.me + CFBundleName + myapp + CFBundleIconFile + myapp.icns + CFBundleShortVersionString + 1.0 + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + APPL + IFMajorVersion + 0 + IFMinorVersion + 1 + + diff --git a/meson/manual tests/4 standalone binaries/build_linux_package.sh b/meson/manual tests/4 standalone binaries/build_linux_package.sh new file mode 100755 index 000000000..783981ef5 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/build_linux_package.sh @@ -0,0 +1,12 @@ +#!/bin/sh -eu + +curdir=`pwd` +rm -rf buildtmp +mkdir buildtmp +LDFLAGS=-static-libstdc++ ~/meson/meson.py buildtmp --buildtype=release --prefix=/tmp/myapp --libdir=lib --strip +ninja -C buildtmp install +rm -rf buildtmp +cd /tmp/ +tar czf myapp.tar.gz myapp +mv myapp.tar.gz "$curdir" +rm -rf myapp diff --git a/meson/manual tests/4 standalone binaries/build_osx_package.sh b/meson/manual tests/4 standalone binaries/build_osx_package.sh new file mode 100755 index 000000000..8a94ca534 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/build_osx_package.sh @@ -0,0 +1,20 @@ +#!/bin/sh -eu + +rm -rf buildtmp +mkdir buildtmp +~/meson/meson.py buildtmp --buildtype=release --prefix=/tmp/myapp.app --bindir=Contents/MacOS +ninja -C buildtmp install +rm -rf buildtmp +mkdir -p mnttmp +rm -f working.dmg +gunzip < template.dmg.gz > working.dmg +hdiutil attach working.dmg -noautoopen -quiet -mountpoint mnttmp +rm -rf mnttmp/myapp.app +mv /tmp/myapp.app mnttmp +# NOTE: output of hdiutil changes every now and then. +# Verify that this is still working. +hdiutil detach $(hdiutil info|grep "mnttmp"|awk '{print $1}') +rm -rf mnttmp +rm -f myapp.dmg +hdiutil convert working.dmg -quiet -format UDZO -imagekey zlib-level=9 -o myapp.dmg +rm -f working.dmg diff --git a/meson/manual tests/4 standalone binaries/build_windows_package.py b/meson/manual tests/4 standalone binaries/build_windows_package.py new file mode 100755 index 000000000..0932eac09 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/build_windows_package.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import os, urllib.request, shutil, subprocess +from glob import glob + +sdl_url = 'http://libsdl.org/release/SDL2-devel-2.0.3-VC.zip' +sdl_filename = 'SDL2-devel-2.0.3-VC.zip' +sdl_dir = 'SDL2-2.0.3' + +shutil.rmtree('build', ignore_errors=True) +os.mkdir('build') + +if not os.path.exists(sdl_filename): + response = urllib.request.urlopen(sdl_url, timeout=600.0) + data = response.read() + open(sdl_filename, 'wb').write(data) + +shutil.unpack_archive(sdl_filename, 'build') + +libs = glob(os.path.join('build', sdl_dir, 'lib/x86/*')) +[shutil.copy(x, 'build') for x in libs] + +# Sorry for this hack but this needs to work during development +# when Meson is not in path. +subprocess.check_call(['python3', r'..\..\meson.py', 'build', + '--backend=ninja', '--buildtype=release']) +subprocess.check_call(['ninja'], cwd='build') +shutil.copy('myapp.iss', 'build') +subprocess.check_call([r'\Program Files\Inno Setup 5\ISCC.exe', 'myapp.iss'], + cwd='build') +shutil.copy('build/setup.exe', 'myapp 1.0.exe') +shutil.rmtree('build') diff --git a/meson/manual tests/4 standalone binaries/linux_bundler.sh b/meson/manual tests/4 standalone binaries/linux_bundler.sh new file mode 100755 index 000000000..2a8e907fe --- /dev/null +++ b/meson/manual tests/4 standalone binaries/linux_bundler.sh @@ -0,0 +1,7 @@ +#!/bin/sh -eu + +libdir="${MESON_INSTALL_PREFIX}/lib" +mkdir -p $libdir +sdlfile=`ldd ${MESON_INSTALL_PREFIX}/bin/myapp | grep libSDL | cut -d ' ' -f 3` +cp $sdlfile "${libdir}" +strip "${libdir}/libSDL"* diff --git a/meson/manual tests/4 standalone binaries/meson.build b/meson/manual tests/4 standalone binaries/meson.build new file mode 100644 index 000000000..ad6645f5b --- /dev/null +++ b/meson/manual tests/4 standalone binaries/meson.build @@ -0,0 +1,38 @@ +project('myapp', 'cpp') + +sdl = dependency('sdl2', required : host_machine.system() != 'windows') + +if meson.get_compiler('cpp').get_id() != 'msvc' + add_global_arguments('-std=c++11', language : 'cpp') +endif + +if host_machine.system() == 'darwin' + install_data('myapp.sh', + install_dir : 'Contents/MacOS') + + install_data('myapp.icns', + install_dir : 'Contents/Resources') + + install_data('Info.plist', + install_dir : 'Contents') + + meson.add_install_script('osx_bundler.sh') +endif + +if host_machine.system() == 'linux' + install_data('myapp.sh', install_dir : '.') + meson.add_install_script('linux_bundler.sh') +endif + +extra_link_args = [] + +if host_machine.system() == 'windows' + str = '-I@0@/@1@'.format(meson.current_build_dir(), 'SDL2-2.0.3/include') + add_global_arguments(str, language : 'cpp') + extra_link_args = ['/SUBSYSTEM:CONSOLE', 'SDL2main.lib', 'SDL2.lib'] +endif + +prog = executable('myapp', 'myapp.cpp', +dependencies : sdl, +link_args : extra_link_args, +install : true) diff --git a/meson/manual tests/4 standalone binaries/myapp.cpp b/meson/manual tests/4 standalone binaries/myapp.cpp new file mode 100644 index 000000000..8ddff2736 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/myapp.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +int main(void) { + SDL_Surface *screenSurface; + SDL_Event e; + int keepGoing = 1; + std::string message; + + if(SDL_Init( SDL_INIT_VIDEO ) < 0) { + printf( "SDL could not initialize! SDL_Error: %s\n", SDL_GetError() ); + } + atexit(SDL_Quit); + + std::unique_ptr window(SDL_CreateWindow( "My application", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 640, 480, SDL_WINDOW_SHOWN), SDL_DestroyWindow); + screenSurface = SDL_GetWindowSurface(window.get()); + + // Use iostream to make sure we have not screwed + // up libstdc++ linking. + message = "Window created."; + message += " Starting main loop."; + std::cout << message << std::endl; + + while(keepGoing) { + while(SDL_PollEvent(&e) != 0) { + if(e.type == SDL_QUIT) { + keepGoing = 0; + break; + } + } + SDL_FillRect(screenSurface, NULL, SDL_MapRGB(screenSurface->format, 0xFF, 0x00, 0x00)); + SDL_UpdateWindowSurface(window.get()); + SDL_Delay(100); + } + + return 0; +} diff --git a/meson/manual tests/4 standalone binaries/myapp.icns b/meson/manual tests/4 standalone binaries/myapp.icns new file mode 100644 index 000000000..633195454 Binary files /dev/null and b/meson/manual tests/4 standalone binaries/myapp.icns differ diff --git a/meson/manual tests/4 standalone binaries/myapp.iss b/meson/manual tests/4 standalone binaries/myapp.iss new file mode 100644 index 000000000..2bd441ded --- /dev/null +++ b/meson/manual tests/4 standalone binaries/myapp.iss @@ -0,0 +1,18 @@ +; Innosetup file for My app. + +[Setup] +AppName=My App +AppVersion=1.0 +DefaultDirName={pf}\My App +DefaultGroupName=My App +UninstallDisplayIcon={app}\myapp.exe +Compression=lzma2 +SolidCompression=yes +OutputDir=. + +[Files] +Source: "myapp.exe"; DestDir: "{app}" +Source: "SDL2.dll"; DestDir: "{app}" + +;[Icons] +;Name: "{group}\My App"; Filename: "{app}\myapp.exe" diff --git a/meson/manual tests/4 standalone binaries/myapp.sh b/meson/manual tests/4 standalone binaries/myapp.sh new file mode 100755 index 000000000..319148353 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/myapp.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +cd "${0%/*}" + +if [ `uname` == 'Darwin' ]; then + ./myapp +else + export LD_LIBRARY_PATH="`pwd`/lib" + bin/myapp +fi diff --git a/meson/manual tests/4 standalone binaries/osx_bundler.sh b/meson/manual tests/4 standalone binaries/osx_bundler.sh new file mode 100755 index 000000000..3bad65f76 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/osx_bundler.sh @@ -0,0 +1,6 @@ +#!/bin/sh -eu + +mkdir -p ${MESON_INSTALL_PREFIX}/Contents/Frameworks +cp -R /Library/Frameworks/SDL2.framework ${MESON_INSTALL_PREFIX}/Contents/Frameworks + +install_name_tool -change @rpath/SDL2.framework/Versions/A/SDL2 @executable_path/../Frameworks/SDL2.framework/Versions/A/SDL2 ${MESON_INSTALL_PREFIX}/Contents/MacOS/myapp diff --git a/meson/manual tests/4 standalone binaries/readme.txt b/meson/manual tests/4 standalone binaries/readme.txt new file mode 100644 index 000000000..991b5c6b8 --- /dev/null +++ b/meson/manual tests/4 standalone binaries/readme.txt @@ -0,0 +1,12 @@ +This directory shows how you can build redistributable binaries. On +OSX this menans building an app bundle and a .dmg installer. On Linux +it means building an archive that bundles its dependencies. On Windows +it means building an .exe installer. + +To build each package you run the corresponding build_ARCH.sh build +script. + +On Linux you must build the package on the oldest distribution you +plan to support (Debian stable/oldstable and old CentOS are the common +choice here). + diff --git a/meson/manual tests/4 standalone binaries/template.dmg.gz b/meson/manual tests/4 standalone binaries/template.dmg.gz new file mode 100644 index 000000000..fcb6d6115 Binary files /dev/null and b/meson/manual tests/4 standalone binaries/template.dmg.gz differ diff --git a/meson/manual tests/5 rpm/lib.c b/meson/manual tests/5 rpm/lib.c new file mode 100644 index 000000000..efc230aab --- /dev/null +++ b/meson/manual tests/5 rpm/lib.c @@ -0,0 +1,6 @@ +#include"lib.h" + +char *meson_print(void) +{ + return "Hello, world!"; +} diff --git a/meson/manual tests/5 rpm/lib.h b/meson/manual tests/5 rpm/lib.h new file mode 100644 index 000000000..08fc9611c --- /dev/null +++ b/meson/manual tests/5 rpm/lib.h @@ -0,0 +1 @@ +char *meson_print(void); diff --git a/meson/manual tests/5 rpm/main.c b/meson/manual tests/5 rpm/main.c new file mode 100644 index 000000000..8b1d193ee --- /dev/null +++ b/meson/manual tests/5 rpm/main.c @@ -0,0 +1,8 @@ +#include +#include +int main(void) +{ + char *t = meson_print(); + printf("%s", t); + return 0; +} diff --git a/meson/manual tests/5 rpm/meson.build b/meson/manual tests/5 rpm/meson.build new file mode 100644 index 000000000..131da3930 --- /dev/null +++ b/meson/manual tests/5 rpm/meson.build @@ -0,0 +1,14 @@ +project('test spec', 'c') + +rpm = import('rpm') +dependency('zlib') +find_program('nonexistprog', required : false) + +lib = shared_library('mesontest_shared', ['lib.c', 'lib.h'], + version : '0.1', soversion : '0', + install : true) +executable('mesontest-bin', 'main.c', + link_with : lib, + install : true) + +rpm.generate_spec_template() diff --git a/meson/manual tests/6 hg wrap/meson.build b/meson/manual tests/6 hg wrap/meson.build new file mode 100644 index 000000000..c7ac004ca --- /dev/null +++ b/meson/manual tests/6 hg wrap/meson.build @@ -0,0 +1,10 @@ +project('Mercurial outcheckker', 'c') + +sp = subproject('samplesubproject') + +exe = executable('gitprog', 'prog.c', +include_directories : sp.get_variable('subproj_inc'), +link_with : sp.get_variable('subproj_lib'), +) + +test('maintest', exe) diff --git a/meson/manual tests/6 hg wrap/prog.c b/meson/manual tests/6 hg wrap/prog.c new file mode 100644 index 000000000..6e2c4d861 --- /dev/null +++ b/meson/manual tests/6 hg wrap/prog.c @@ -0,0 +1,6 @@ +#include"subproj.h" + +int main(void) { + subproj_function(); + return 0; +} diff --git a/meson/manual tests/6 hg wrap/subprojects/samplesubproject.wrap b/meson/manual tests/6 hg wrap/subprojects/samplesubproject.wrap new file mode 100644 index 000000000..6d3b3f2d4 --- /dev/null +++ b/meson/manual tests/6 hg wrap/subprojects/samplesubproject.wrap @@ -0,0 +1,4 @@ +[wrap-hg] +directory=samplesubproject +url=https://bitbucket.org/jpakkane/samplesubproject +revision=tip diff --git a/meson/manual tests/7 vala composite widgets/meson.build b/meson/manual tests/7 vala composite widgets/meson.build new file mode 100644 index 000000000..579ca5198 --- /dev/null +++ b/meson/manual tests/7 vala composite widgets/meson.build @@ -0,0 +1,21 @@ +project('composite', 'vala', 'c') +gnome = import('gnome') +deps = [ + dependency('glib-2.0', version : '>=2.38'), + dependency('gobject-2.0'), + dependency('gtk+-3.0'), +] +res = files('my-resources.xml') +gres = gnome.compile_resources( + 'my', res, + source_dir : '.', +) +executable( + 'demo', + sources : [ + 'mywidget.vala', + gres, + ], + dependencies : deps, + vala_args : ['--gresources', res], +) diff --git a/meson/manual tests/7 vala composite widgets/my-resources.xml b/meson/manual tests/7 vala composite widgets/my-resources.xml new file mode 100644 index 000000000..b5743c193 --- /dev/null +++ b/meson/manual tests/7 vala composite widgets/my-resources.xml @@ -0,0 +1,6 @@ + + + + mywidget.ui + + diff --git a/meson/manual tests/7 vala composite widgets/mywidget.ui b/meson/manual tests/7 vala composite widgets/mywidget.ui new file mode 100644 index 000000000..2d6286ca2 --- /dev/null +++ b/meson/manual tests/7 vala composite widgets/mywidget.ui @@ -0,0 +1,70 @@ + + + + + diff --git a/meson/manual tests/7 vala composite widgets/mywidget.vala b/meson/manual tests/7 vala composite widgets/mywidget.vala new file mode 100644 index 000000000..68eaecc27 --- /dev/null +++ b/meson/manual tests/7 vala composite widgets/mywidget.vala @@ -0,0 +1,41 @@ +using Gtk; + +[GtkTemplate (ui = "/org/foo/my/mywidget.ui")] +public class MyWidget : Box { + public string text { + get { return entry.text; } + set { entry.text = value; } + } + + [GtkChild] + private Entry entry; + + public MyWidget (string text) { + this.text = text; + } + + [GtkCallback] + private void on_button_clicked (Button button) { + print ("The button was clicked with entry text: %s\n", entry.text); + } + + [GtkCallback] + private void on_entry_changed (Editable editable) { + print ("The entry text changed: %s\n", entry.text); + + notify_property ("text"); + } +} + +void main(string[] args) { + Gtk.init (ref args); + var win = new Window(); + win.destroy.connect (Gtk.main_quit); + + var widget = new MyWidget ("The entry text!"); + + win.add (widget); + win.show_all (); + + Gtk.main (); +} diff --git a/meson/manual tests/8 timeout/meson.build b/meson/manual tests/8 timeout/meson.build new file mode 100644 index 000000000..8ba7d4b43 --- /dev/null +++ b/meson/manual tests/8 timeout/meson.build @@ -0,0 +1,8 @@ +project('timeout', 'c') + +# This creates a test that times out. It is a manual test +# because currently there is no test suite for test that are expected +# to fail during unit test phase. + +exe = executable('sleepprog', 'sleepprog.c') +test('timeout', exe, timeout : 1) diff --git a/meson/manual tests/8 timeout/sleepprog.c b/meson/manual tests/8 timeout/sleepprog.c new file mode 100644 index 000000000..8875e126a --- /dev/null +++ b/meson/manual tests/8 timeout/sleepprog.c @@ -0,0 +1,6 @@ +#include + +int main(void) { + sleep(1000); + return 0; +} diff --git a/meson/manual tests/9 nostdlib/meson.build b/meson/manual tests/9 nostdlib/meson.build new file mode 100644 index 000000000..9c5f949b2 --- /dev/null +++ b/meson/manual tests/9 nostdlib/meson.build @@ -0,0 +1,14 @@ +project('own libc', 'c') + +# Not related to this test, but could not find a better place for this test. +assert(meson.get_cross_property('nonexisting', 'defaultvalue') == 'defaultvalue', + 'Cross prop getting is broken.') + +# A simple project that uses its own libc. + +# Note that we don't need to specify anything, the flags to use +# stdlib come from the cross file. + +exe = executable('selfcontained', 'prog.c') + +test('standalone test', exe) diff --git a/meson/manual tests/9 nostdlib/prog.c b/meson/manual tests/9 nostdlib/prog.c new file mode 100644 index 000000000..b9216ee81 --- /dev/null +++ b/meson/manual tests/9 nostdlib/prog.c @@ -0,0 +1,7 @@ + +#include + +int main(void) { + const char *message = "Hello without stdlib.\n"; + return simple_print(message, simple_strlen(message)); +} diff --git a/meson/manual tests/9 nostdlib/subprojects/mylibc/libc.c b/meson/manual tests/9 nostdlib/subprojects/mylibc/libc.c new file mode 100644 index 000000000..67261cb1d --- /dev/null +++ b/meson/manual tests/9 nostdlib/subprojects/mylibc/libc.c @@ -0,0 +1,35 @@ +/* Do not use this as the basis of your own libc. + * The code is probably unoptimal or wonky, as I + * had no prior experience with this, but instead + * just fiddled with the code until it worked. + */ + +#include + +#define STDOUT 1 +#define SYS_WRITE 4 + +int simple_print(const char *msg, const long bufsize) { + int count; + long total_written = 0; + while(total_written < bufsize) { + asm( + "int $0x80\n\t" + : "=a"(count) + : "0"(SYS_WRITE), "b"(STDOUT), "c"(msg+total_written), "d"(bufsize-total_written) + :); + if(count == 0) { + return 1; + } + total_written += count; + } + return 0; +} + +int simple_strlen(const char *str) { + int len = 0; + while(str[len] != '\0') { + len++; + } + return len; +} diff --git a/meson/manual tests/9 nostdlib/subprojects/mylibc/meson.build b/meson/manual tests/9 nostdlib/subprojects/mylibc/meson.build new file mode 100644 index 000000000..aa0184e11 --- /dev/null +++ b/meson/manual tests/9 nostdlib/subprojects/mylibc/meson.build @@ -0,0 +1,11 @@ +project('own libc', 'c') + +# A very simple libc implementation + +# Do not specify -nostdlib & co. They come from cross specifications. + +libc = static_library('c', 'libc.c', 'stubstart.s') + +mylibc_dep = declare_dependency(link_with : libc, + include_directories : include_directories('.') +) diff --git a/meson/manual tests/9 nostdlib/subprojects/mylibc/stdio.h b/meson/manual tests/9 nostdlib/subprojects/mylibc/stdio.h new file mode 100644 index 000000000..c3f8f56ca --- /dev/null +++ b/meson/manual tests/9 nostdlib/subprojects/mylibc/stdio.h @@ -0,0 +1,5 @@ +#pragma once + +int simple_print(const char *msg, const long bufsize); + +int simple_strlen(const char *str); diff --git a/meson/manual tests/9 nostdlib/subprojects/mylibc/stubstart.s b/meson/manual tests/9 nostdlib/subprojects/mylibc/stubstart.s new file mode 100644 index 000000000..0a6d9729a --- /dev/null +++ b/meson/manual tests/9 nostdlib/subprojects/mylibc/stubstart.s @@ -0,0 +1,8 @@ +.globl _start + +_start: + + call main + movl %eax, %ebx + movl $1, %eax + int $0x80 diff --git a/meson/meson.py b/meson/meson.py new file mode 100755 index 000000000..dab08d3a8 --- /dev/null +++ b/meson/meson.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +# Copyright 2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from pathlib import Path + +# If we're run uninstalled, add the script directory to sys.path to ensure that +# we always import the correct mesonbuild modules even if PYTHONPATH is mangled +meson_exe = Path(sys.argv[0]).resolve() +if (meson_exe.parent / 'mesonbuild').is_dir(): + sys.path.insert(0, str(meson_exe.parent)) + +from mesonbuild import mesonmain + +if __name__ == '__main__': + sys.exit(mesonmain.main()) diff --git a/meson/mesonbuild/__init__.py b/meson/mesonbuild/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/meson/mesonbuild/arglist.py b/meson/mesonbuild/arglist.py new file mode 100644 index 000000000..fd4de96f1 --- /dev/null +++ b/meson/mesonbuild/arglist.py @@ -0,0 +1,331 @@ +# Copyright 2012-2020 The Meson development team +# Copyright © 2020 Intel Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import lru_cache +import collections +import enum +import os +import re +import typing as T + +from . import mesonlib + +if T.TYPE_CHECKING: + from .linkers import StaticLinker + from .compilers import Compiler + +UNIXY_COMPILER_INTERNAL_LIBS = ['m', 'c', 'pthread', 'dl', 'rt'] # type: T.List[str] +# execinfo is a compiler lib on FreeBSD and NetBSD +if mesonlib.is_freebsd() or mesonlib.is_netbsd(): + UNIXY_COMPILER_INTERNAL_LIBS.append('execinfo') + + +class Dedup(enum.Enum): + + """What kind of deduplication can be done to compiler args. + + OVERRIDEN - Whether an argument can be 'overridden' by a later argument. + For example, -DFOO defines FOO and -UFOO undefines FOO. In this case, + we can safely remove the previous occurrence and add a new one. The + same is true for include paths and library paths with -I and -L. + UNIQUE - Arguments that once specified cannot be undone, such as `-c` or + `-pipe`. New instances of these can be completely skipped. + NO_DEDUP - Whether it matters where or how many times on the command-line + a particular argument is present. This can matter for symbol + resolution in static or shared libraries, so we cannot de-dup or + reorder them. + """ + + NO_DEDUP = 0 + UNIQUE = 1 + OVERRIDEN = 2 + + +class CompilerArgs(collections.abc.MutableSequence): + ''' + List-like class that manages a list of compiler arguments. Should be used + while constructing compiler arguments from various sources. Can be + operated with ordinary lists, so this does not need to be used + everywhere. + + All arguments must be inserted and stored in GCC-style (-lfoo, -Idir, etc) + and can converted to the native type of each compiler by using the + .to_native() method to which you must pass an instance of the compiler or + the compiler class. + + New arguments added to this class (either with .append(), .extend(), or +=) + are added in a way that ensures that they override previous arguments. + For example: + + >>> a = ['-Lfoo', '-lbar'] + >>> a += ['-Lpho', '-lbaz'] + >>> print(a) + ['-Lpho', '-Lfoo', '-lbar', '-lbaz'] + + Arguments will also be de-duped if they can be de-duped safely. + + Note that because of all this, this class is not commutative and does not + preserve the order of arguments if it is safe to not. For example: + >>> ['-Ifoo', '-Ibar'] + ['-Ifez', '-Ibaz', '-Werror'] + ['-Ifez', '-Ibaz', '-Ifoo', '-Ibar', '-Werror'] + >>> ['-Ifez', '-Ibaz', '-Werror'] + ['-Ifoo', '-Ibar'] + ['-Ifoo', '-Ibar', '-Ifez', '-Ibaz', '-Werror'] + + ''' + # Arg prefixes that override by prepending instead of appending + prepend_prefixes = () # type: T.Tuple[str, ...] + + # Arg prefixes and args that must be de-duped by returning 2 + dedup2_prefixes = () # type: T.Tuple[str, ...] + dedup2_suffixes = () # type: T.Tuple[str, ...] + dedup2_args = () # type: T.Tuple[str, ...] + + # Arg prefixes and args that must be de-duped by returning 1 + # + # NOTE: not thorough. A list of potential corner cases can be found in + # https://github.com/mesonbuild/meson/pull/4593#pullrequestreview-182016038 + dedup1_prefixes = () # type: T.Tuple[str, ...] + dedup1_suffixes = ('.lib', '.dll', '.so', '.dylib', '.a') # type: T.Tuple[str, ...] + # Match a .so of the form path/to/libfoo.so.0.1.0 + # Only UNIX shared libraries require this. Others have a fixed extension. + dedup1_regex = re.compile(r'([\/\\]|\A)lib.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$') + dedup1_args = () # type: T.Tuple[str, ...] + # In generate_link() we add external libs without de-dup, but we must + # *always* de-dup these because they're special arguments to the linker + # TODO: these should probably move too + always_dedup_args = tuple('-l' + lib for lib in UNIXY_COMPILER_INTERNAL_LIBS) # type : T.Tuple[str, ...] + + def __init__(self, compiler: T.Union['Compiler', 'StaticLinker'], + iterable: T.Optional[T.Iterable[str]] = None): + self.compiler = compiler + self._container = list(iterable) if iterable is not None else [] # type: T.List[str] + self.pre = collections.deque() # type: T.Deque[str] + self.post = collections.deque() # type: T.Deque[str] + + # Flush the saved pre and post list into the _container list + # + # This correctly deduplicates the entries after _can_dedup definition + # Note: This function is designed to work without delete operations, as deletions are worsening the performance a lot. + def flush_pre_post(self) -> None: + pre_flush = collections.deque() # type: T.Deque[str] + pre_flush_set = set() # type: T.Set[str] + post_flush = collections.deque() # type: T.Deque[str] + post_flush_set = set() # type: T.Set[str] + + #The two lists are here walked from the front to the back, in order to not need removals for deduplication + for a in self.pre: + dedup = self._can_dedup(a) + if a not in pre_flush_set: + pre_flush.append(a) + if dedup is Dedup.OVERRIDEN: + pre_flush_set.add(a) + for a in reversed(self.post): + dedup = self._can_dedup(a) + if a not in post_flush_set: + post_flush.appendleft(a) + if dedup is Dedup.OVERRIDEN: + post_flush_set.add(a) + + #pre and post will overwrite every element that is in the container + #only copy over args that are in _container but not in the post flush or pre flush set + + for a in self._container: + if a not in post_flush_set and a not in pre_flush_set: + pre_flush.append(a) + + self._container = list(pre_flush) + list(post_flush) + self.pre.clear() + self.post.clear() + + def __iter__(self) -> T.Iterator[str]: + self.flush_pre_post() + return iter(self._container) + + @T.overload # noqa: F811 + def __getitem__(self, index: int) -> str: # noqa: F811 + pass + + @T.overload # noqa: F811 + def __getitem__(self, index: slice) -> T.MutableSequence[str]: # noqa: F811 + pass + + def __getitem__(self, index): # noqa: F811 + self.flush_pre_post() + return self._container[index] + + @T.overload # noqa: F811 + def __setitem__(self, index: int, value: str) -> None: # noqa: F811 + pass + + @T.overload # noqa: F811 + def __setitem__(self, index: slice, value: T.Iterable[str]) -> None: # noqa: F811 + pass + + def __setitem__(self, index, value) -> None: # noqa: F811 + self.flush_pre_post() + self._container[index] = value + + def __delitem__(self, index: T.Union[int, slice]) -> None: + self.flush_pre_post() + del self._container[index] + + def __len__(self) -> int: + return len(self._container) + len(self.pre) + len(self.post) + + def insert(self, index: int, value: str) -> None: + self.flush_pre_post() + self._container.insert(index, value) + + def copy(self) -> 'CompilerArgs': + self.flush_pre_post() + return type(self)(self.compiler, self._container.copy()) + + @classmethod + @lru_cache(maxsize=None) + def _can_dedup(cls, arg: str) -> Dedup: + """Returns whether the argument can be safely de-duped. + + In addition to these, we handle library arguments specially. + With GNU ld, we surround library arguments with -Wl,--start/end-gr -> Dedupoup + to recursively search for symbols in the libraries. This is not needed + with other linkers. + """ + + # A standalone argument must never be deduplicated because it is + # defined by what comes _after_ it. Thus dedupping this: + # -D FOO -D BAR + # would yield either + # -D FOO BAR + # or + # FOO -D BAR + # both of which are invalid. + if arg in cls.dedup2_prefixes: + return Dedup.NO_DEDUP + if arg in cls.dedup2_args or \ + arg.startswith(cls.dedup2_prefixes) or \ + arg.endswith(cls.dedup2_suffixes): + return Dedup.OVERRIDEN + if arg in cls.dedup1_args or \ + arg.startswith(cls.dedup1_prefixes) or \ + arg.endswith(cls.dedup1_suffixes) or \ + re.search(cls.dedup1_regex, arg): + return Dedup.UNIQUE + return Dedup.NO_DEDUP + + @classmethod + @lru_cache(maxsize=None) + def _should_prepend(cls, arg: str) -> bool: + return arg.startswith(cls.prepend_prefixes) + + def to_native(self, copy: bool = False) -> T.List[str]: + # Check if we need to add --start/end-group for circular dependencies + # between static libraries, and for recursively searching for symbols + # needed by static libraries that are provided by object files or + # shared libraries. + self.flush_pre_post() + if copy: + new = self.copy() + else: + new = self + return self.compiler.unix_args_to_native(new._container) + + def append_direct(self, arg: str) -> None: + ''' + Append the specified argument without any reordering or de-dup except + for absolute paths to libraries, etc, which can always be de-duped + safely. + ''' + self.flush_pre_post() + if os.path.isabs(arg): + self.append(arg) + else: + self._container.append(arg) + + def extend_direct(self, iterable: T.Iterable[str]) -> None: + ''' + Extend using the elements in the specified iterable without any + reordering or de-dup except for absolute paths where the order of + include search directories is not relevant + ''' + self.flush_pre_post() + for elem in iterable: + self.append_direct(elem) + + def extend_preserving_lflags(self, iterable: T.Iterable[str]) -> None: + normal_flags = [] + lflags = [] + for i in iterable: + if i not in self.always_dedup_args and (i.startswith('-l') or i.startswith('-L')): + lflags.append(i) + else: + normal_flags.append(i) + self.extend(normal_flags) + self.extend_direct(lflags) + + def __add__(self, args: T.Iterable[str]) -> 'CompilerArgs': + self.flush_pre_post() + new = self.copy() + new += args + return new + + def __iadd__(self, args: T.Iterable[str]) -> 'CompilerArgs': + ''' + Add two CompilerArgs while taking into account overriding of arguments + and while preserving the order of arguments as much as possible + ''' + tmp_pre = collections.deque() # type: T.Deque[str] + if not isinstance(args, collections.abc.Iterable): + raise TypeError('can only concatenate Iterable[str] (not "{}") to CompilerArgs'.format(args)) + for arg in args: + # If the argument can be de-duped, do it either by removing the + # previous occurrence of it and adding a new one, or not adding the + # new occurrence. + dedup = self._can_dedup(arg) + if dedup is Dedup.UNIQUE: + # Argument already exists and adding a new instance is useless + if arg in self._container or arg in self.pre or arg in self.post: + continue + if self._should_prepend(arg): + tmp_pre.appendleft(arg) + else: + self.post.append(arg) + self.pre.extendleft(tmp_pre) + #pre and post is going to be merged later before a iter call + return self + + def __radd__(self, args: T.Iterable[str]) -> 'CompilerArgs': + self.flush_pre_post() + new = type(self)(self.compiler, args) + new += self + return new + + def __eq__(self, other: T.Any) -> T.Union[bool]: + self.flush_pre_post() + # Only allow equality checks against other CompilerArgs and lists instances + if isinstance(other, CompilerArgs): + return self.compiler == other.compiler and self._container == other._container + elif isinstance(other, list): + return self._container == other + return NotImplemented + + def append(self, arg: str) -> None: + self.__iadd__([arg]) + + def extend(self, args: T.Iterable[str]) -> None: + self.__iadd__(args) + + def __repr__(self) -> str: + self.flush_pre_post() + return 'CompilerArgs({!r}, {!r})'.format(self.compiler, self._container) diff --git a/meson/mesonbuild/ast/__init__.py b/meson/mesonbuild/ast/__init__.py new file mode 100644 index 000000000..4fb56cb86 --- /dev/null +++ b/meson/mesonbuild/ast/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +__all__ = [ + 'AstConditionLevel', + 'AstInterpreter', + 'AstIDGenerator', + 'AstIndentationGenerator', + 'AstJSONPrinter', + 'AstVisitor', + 'AstPrinter', + 'IntrospectionInterpreter', + 'build_target_functions', +] + +from .interpreter import AstInterpreter +from .introspection import IntrospectionInterpreter, build_target_functions +from .visitor import AstVisitor +from .postprocess import AstConditionLevel, AstIDGenerator, AstIndentationGenerator +from .printer import AstPrinter, AstJSONPrinter diff --git a/meson/mesonbuild/ast/interpreter.py b/meson/mesonbuild/ast/interpreter.py new file mode 100644 index 000000000..fb77db00c --- /dev/null +++ b/meson/mesonbuild/ast/interpreter.py @@ -0,0 +1,395 @@ +# Copyright 2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +from .visitor import AstVisitor +from .. import interpreterbase, mparser, mesonlib +from .. import environment + +from ..interpreterbase import InvalidArguments, BreakRequest, ContinueRequest, TYPE_nvar, TYPE_nkwargs +from ..mparser import ( + AndNode, + ArgumentNode, + ArithmeticNode, + ArrayNode, + AssignmentNode, + BaseNode, + ComparisonNode, + ElementaryNode, + EmptyNode, + ForeachClauseNode, + IdNode, + IfClauseNode, + IndexNode, + MethodNode, + NotNode, + OrNode, + PlusAssignmentNode, + StringNode, + TernaryNode, + UMinusNode, +) + +import os, sys +import typing as T + +class DontCareObject(interpreterbase.InterpreterObject): + pass + +class MockExecutable(interpreterbase.InterpreterObject): + pass + +class MockStaticLibrary(interpreterbase.InterpreterObject): + pass + +class MockSharedLibrary(interpreterbase.InterpreterObject): + pass + +class MockCustomTarget(interpreterbase.InterpreterObject): + pass + +class MockRunTarget(interpreterbase.InterpreterObject): + pass + +ADD_SOURCE = 0 +REMOVE_SOURCE = 1 + +class AstInterpreter(interpreterbase.InterpreterBase): + def __init__(self, source_root: str, subdir: str, subproject: str, visitors: T.Optional[T.List[AstVisitor]] = None): + super().__init__(source_root, subdir, subproject) + self.visitors = visitors if visitors is not None else [] + self.visited_subdirs = {} # type: T.Dict[str, bool] + self.assignments = {} # type: T.Dict[str, BaseNode] + self.assign_vals = {} # type: T.Dict[str, T.Any] + self.reverse_assignment = {} # type: T.Dict[str, BaseNode] + self.funcs.update({'project': self.func_do_nothing, + 'test': self.func_do_nothing, + 'benchmark': self.func_do_nothing, + 'install_headers': self.func_do_nothing, + 'install_man': self.func_do_nothing, + 'install_data': self.func_do_nothing, + 'install_subdir': self.func_do_nothing, + 'configuration_data': self.func_do_nothing, + 'configure_file': self.func_do_nothing, + 'find_program': self.func_do_nothing, + 'include_directories': self.func_do_nothing, + 'add_global_arguments': self.func_do_nothing, + 'add_global_link_arguments': self.func_do_nothing, + 'add_project_arguments': self.func_do_nothing, + 'add_project_link_arguments': self.func_do_nothing, + 'message': self.func_do_nothing, + 'generator': self.func_do_nothing, + 'error': self.func_do_nothing, + 'run_command': self.func_do_nothing, + 'assert': self.func_do_nothing, + 'subproject': self.func_do_nothing, + 'dependency': self.func_do_nothing, + 'get_option': self.func_do_nothing, + 'join_paths': self.func_do_nothing, + 'environment': self.func_do_nothing, + 'import': self.func_do_nothing, + 'vcs_tag': self.func_do_nothing, + 'add_languages': self.func_do_nothing, + 'declare_dependency': self.func_do_nothing, + 'files': self.func_do_nothing, + 'executable': self.func_do_nothing, + 'static_library': self.func_do_nothing, + 'shared_library': self.func_do_nothing, + 'library': self.func_do_nothing, + 'build_target': self.func_do_nothing, + 'custom_target': self.func_do_nothing, + 'run_target': self.func_do_nothing, + 'subdir': self.func_subdir, + 'set_variable': self.func_do_nothing, + 'get_variable': self.func_do_nothing, + 'is_disabler': self.func_do_nothing, + 'is_variable': self.func_do_nothing, + 'disabler': self.func_do_nothing, + 'gettext': self.func_do_nothing, + 'jar': self.func_do_nothing, + 'warning': self.func_do_nothing, + 'shared_module': self.func_do_nothing, + 'option': self.func_do_nothing, + 'both_libraries': self.func_do_nothing, + 'add_test_setup': self.func_do_nothing, + 'find_library': self.func_do_nothing, + 'subdir_done': self.func_do_nothing, + 'alias_target': self.func_do_nothing, + 'summary': self.func_do_nothing, + }) + + def func_do_nothing(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> bool: + return True + + def load_root_meson_file(self) -> None: + super().load_root_meson_file() + for i in self.visitors: + self.ast.accept(i) + + def func_subdir(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> None: + args = self.flatten_args(args) + if len(args) != 1 or not isinstance(args[0], str): + sys.stderr.write('Unable to evaluate subdir({}) in AstInterpreter --> Skipping\n'.format(args)) + return + + prev_subdir = self.subdir + subdir = os.path.join(prev_subdir, args[0]) + absdir = os.path.join(self.source_root, subdir) + buildfilename = os.path.join(subdir, environment.build_filename) + absname = os.path.join(self.source_root, buildfilename) + symlinkless_dir = os.path.realpath(absdir) + if symlinkless_dir in self.visited_subdirs: + sys.stderr.write('Trying to enter {} which has already been visited --> Skipping\n'.format(args[0])) + return + self.visited_subdirs[symlinkless_dir] = True + + if not os.path.isfile(absname): + sys.stderr.write('Unable to find build file {} --> Skipping\n'.format(buildfilename)) + return + with open(absname, encoding='utf8') as f: + code = f.read() + assert(isinstance(code, str)) + try: + codeblock = mparser.Parser(code, absname).parse() + except mesonlib.MesonException as me: + me.file = absname + raise me + + self.subdir = subdir + for i in self.visitors: + codeblock.accept(i) + self.evaluate_codeblock(codeblock) + self.subdir = prev_subdir + + def method_call(self, node: BaseNode) -> bool: + return True + + def evaluate_arithmeticstatement(self, cur: ArithmeticNode) -> int: + self.evaluate_statement(cur.left) + self.evaluate_statement(cur.right) + return 0 + + def evaluate_uminusstatement(self, cur: UMinusNode) -> int: + self.evaluate_statement(cur.value) + return 0 + + def evaluate_ternary(self, node: TernaryNode) -> None: + assert(isinstance(node, TernaryNode)) + self.evaluate_statement(node.condition) + self.evaluate_statement(node.trueblock) + self.evaluate_statement(node.falseblock) + + def evaluate_dictstatement(self, node: mparser.DictNode) -> TYPE_nkwargs: + (arguments, kwargs) = self.reduce_arguments(node.args, resolve_key_nodes=False) + assert (not arguments) + self.argument_depth += 1 + for key, value in kwargs.items(): + if isinstance(key, BaseNode): + self.evaluate_statement(key) + self.argument_depth -= 1 + return {} + + def evaluate_plusassign(self, node: PlusAssignmentNode) -> None: + assert(isinstance(node, PlusAssignmentNode)) + # Cheat by doing a reassignment + self.assignments[node.var_name] = node.value # Save a reference to the value node + if node.value.ast_id: + self.reverse_assignment[node.value.ast_id] = node + self.assign_vals[node.var_name] = self.evaluate_statement(node.value) + + def evaluate_indexing(self, node: IndexNode) -> int: + return 0 + + def unknown_function_called(self, func_name: str) -> None: + pass + + def reduce_arguments(self, args: ArgumentNode, resolve_key_nodes: bool = True) -> T.Tuple[T.List[TYPE_nvar], TYPE_nkwargs]: + if isinstance(args, ArgumentNode): + kwargs = {} # type: T.Dict[T.Union[str, BaseNode], TYPE_nvar] + for key, val in args.kwargs.items(): + if resolve_key_nodes and isinstance(key, (StringNode, IdNode)): + assert isinstance(key.value, str) + kwargs[key.value] = val + else: + kwargs[key] = val + if args.incorrect_order(): + raise InvalidArguments('All keyword arguments must be after positional arguments.') + return self.flatten_args(args.arguments), kwargs + else: + return self.flatten_args(args), {} + + def evaluate_comparison(self, node: ComparisonNode) -> bool: + self.evaluate_statement(node.left) + self.evaluate_statement(node.right) + return False + + def evaluate_andstatement(self, cur: AndNode) -> bool: + self.evaluate_statement(cur.left) + self.evaluate_statement(cur.right) + return False + + def evaluate_orstatement(self, cur: OrNode) -> bool: + self.evaluate_statement(cur.left) + self.evaluate_statement(cur.right) + return False + + def evaluate_notstatement(self, cur: NotNode) -> bool: + self.evaluate_statement(cur.value) + return False + + def evaluate_foreach(self, node: ForeachClauseNode) -> None: + try: + self.evaluate_codeblock(node.block) + except ContinueRequest: + pass + except BreakRequest: + pass + + def evaluate_if(self, node: IfClauseNode) -> None: + for i in node.ifs: + self.evaluate_codeblock(i.block) + if not isinstance(node.elseblock, EmptyNode): + self.evaluate_codeblock(node.elseblock) + + def get_variable(self, varname: str) -> int: + return 0 + + def assignment(self, node: AssignmentNode) -> None: + assert(isinstance(node, AssignmentNode)) + self.assignments[node.var_name] = node.value # Save a reference to the value node + if node.value.ast_id: + self.reverse_assignment[node.value.ast_id] = node + self.assign_vals[node.var_name] = self.evaluate_statement(node.value) # Evaluate the value just in case + + def resolve_node(self, node: BaseNode, include_unknown_args: bool = False, id_loop_detect: T.Optional[T.List[str]] = None) -> T.Optional[T.Any]: + def quick_resolve(n: BaseNode, loop_detect: T.Optional[T.List[str]] = None) -> T.Any: + if loop_detect is None: + loop_detect = [] + if isinstance(n, IdNode): + assert isinstance(n.value, str) + if n.value in loop_detect or n.value not in self.assignments: + return [] + return quick_resolve(self.assignments[n.value], loop_detect = loop_detect + [n.value]) + elif isinstance(n, ElementaryNode): + return n.value + else: + return n + + if id_loop_detect is None: + id_loop_detect = [] + result = None + + if not isinstance(node, BaseNode): + return None + + assert node.ast_id + if node.ast_id in id_loop_detect: + return None # Loop detected + id_loop_detect += [node.ast_id] + + # Try to evealuate the value of the node + if isinstance(node, IdNode): + result = quick_resolve(node) + + elif isinstance(node, ElementaryNode): + result = node.value + + elif isinstance(node, NotNode): + result = self.resolve_node(node.value, include_unknown_args, id_loop_detect) + if isinstance(result, bool): + result = not result + + elif isinstance(node, ArrayNode): + result = [x for x in node.args.arguments] + + elif isinstance(node, ArgumentNode): + result = [x for x in node.arguments] + + elif isinstance(node, ArithmeticNode): + if node.operation != 'add': + return None # Only handle string and array concats + l = quick_resolve(node.left) + r = quick_resolve(node.right) + if isinstance(l, str) and isinstance(r, str): + result = l + r # String concatenation detected + else: + result = self.flatten_args(l, include_unknown_args, id_loop_detect) + self.flatten_args(r, include_unknown_args, id_loop_detect) + + elif isinstance(node, MethodNode): + src = quick_resolve(node.source_object) + margs = self.flatten_args(node.args.arguments, include_unknown_args, id_loop_detect) + mkwargs = {} # type: T.Dict[str, TYPE_nvar] + try: + if isinstance(src, str): + result = self.string_method_call(src, node.name, margs, mkwargs) + elif isinstance(src, bool): + result = self.bool_method_call(src, node.name, margs, mkwargs) + elif isinstance(src, int): + result = self.int_method_call(src, node.name, margs, mkwargs) + elif isinstance(src, list): + result = self.array_method_call(src, node.name, margs, mkwargs) + elif isinstance(src, dict): + result = self.dict_method_call(src, node.name, margs, mkwargs) + except mesonlib.MesonException: + return None + + # Ensure that the result is fully resolved (no more nodes) + if isinstance(result, BaseNode): + result = self.resolve_node(result, include_unknown_args, id_loop_detect) + elif isinstance(result, list): + new_res = [] # type: T.List[TYPE_nvar] + for i in result: + if isinstance(i, BaseNode): + resolved = self.resolve_node(i, include_unknown_args, id_loop_detect) + if resolved is not None: + new_res += self.flatten_args(resolved, include_unknown_args, id_loop_detect) + else: + new_res += [i] + result = new_res + + return result + + def flatten_args(self, args_raw: T.Union[TYPE_nvar, T.Sequence[TYPE_nvar]], include_unknown_args: bool = False, id_loop_detect: T.Optional[T.List[str]] = None) -> T.List[TYPE_nvar]: + # Make sure we are always dealing with lists + if isinstance(args_raw, list): + args = args_raw + else: + args = [args_raw] + + flattend_args = [] # type: T.List[TYPE_nvar] + + # Resolve the contents of args + for i in args: + if isinstance(i, BaseNode): + resolved = self.resolve_node(i, include_unknown_args, id_loop_detect) + if resolved is not None: + if not isinstance(resolved, list): + resolved = [resolved] + flattend_args += resolved + elif isinstance(i, (str, bool, int, float)) or include_unknown_args: + flattend_args += [i] + return flattend_args + + def flatten_kwargs(self, kwargs: T.Dict[str, TYPE_nvar], include_unknown_args: bool = False) -> T.Dict[str, TYPE_nvar]: + flattend_kwargs = {} + for key, val in kwargs.items(): + if isinstance(val, BaseNode): + resolved = self.resolve_node(val, include_unknown_args) + if resolved is not None: + flattend_kwargs[key] = resolved + elif isinstance(val, (str, bool, int, float)) or include_unknown_args: + flattend_kwargs[key] = val + return flattend_kwargs diff --git a/meson/mesonbuild/ast/introspection.py b/meson/mesonbuild/ast/introspection.py new file mode 100644 index 000000000..142c219fd --- /dev/null +++ b/meson/mesonbuild/ast/introspection.py @@ -0,0 +1,304 @@ +# Copyright 2018 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool + +from .interpreter import AstInterpreter +from .visitor import AstVisitor +from .. import compilers, environment, mesonlib, optinterpreter +from .. import coredata as cdata +from ..mesonlib import MachineChoice +from ..interpreterbase import InvalidArguments, TYPE_nvar +from ..build import Executable, Jar, SharedLibrary, SharedModule, StaticLibrary +from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, StringNode +import typing as T +import os + +build_target_functions = ['executable', 'jar', 'library', 'shared_library', 'shared_module', 'static_library', 'both_libraries'] + +class IntrospectionHelper: + # mimic an argparse namespace + def __init__(self, cross_file: str): + self.cross_file = cross_file # type: str + self.native_file = None # type: str + self.cmd_line_options = {} # type: T.Dict[str, str] + +class IntrospectionInterpreter(AstInterpreter): + # Interpreter to detect the options without a build directory + # Most of the code is stolen from interpreter.Interpreter + def __init__(self, + source_root: str, + subdir: str, + backend: str, + visitors: T.Optional[T.List[AstVisitor]] = None, + cross_file: T.Optional[str] = None, + subproject: str = '', + subproject_dir: str = 'subprojects', + env: T.Optional[environment.Environment] = None): + visitors = visitors if visitors is not None else [] + super().__init__(source_root, subdir, subproject, visitors=visitors) + + options = IntrospectionHelper(cross_file) + self.cross_file = cross_file + if env is None: + self.environment = environment.Environment(source_root, None, options) + else: + self.environment = env + self.subproject_dir = subproject_dir + self.coredata = self.environment.get_coredata() + self.option_file = os.path.join(self.source_root, self.subdir, 'meson_options.txt') + self.backend = backend + self.default_options = {'backend': self.backend} + self.project_data = {} # type: T.Dict[str, T.Any] + self.targets = [] # type: T.List[T.Dict[str, T.Any]] + self.dependencies = [] # type: T.List[T.Dict[str, T.Any]] + self.project_node = None # type: BaseNode + + self.funcs.update({ + 'add_languages': self.func_add_languages, + 'dependency': self.func_dependency, + 'executable': self.func_executable, + 'jar': self.func_jar, + 'library': self.func_library, + 'project': self.func_project, + 'shared_library': self.func_shared_lib, + 'shared_module': self.func_shared_module, + 'static_library': self.func_static_lib, + 'both_libraries': self.func_both_lib, + }) + + def func_project(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> None: + if self.project_node: + raise InvalidArguments('Second call to project()') + self.project_node = node + if len(args) < 1: + raise InvalidArguments('Not enough arguments to project(). Needs at least the project name.') + + proj_name = args[0] + proj_vers = kwargs.get('version', 'undefined') + proj_langs = self.flatten_args(args[1:]) + if isinstance(proj_vers, ElementaryNode): + proj_vers = proj_vers.value + if not isinstance(proj_vers, str): + proj_vers = 'undefined' + self.project_data = {'descriptive_name': proj_name, 'version': proj_vers} + + if os.path.exists(self.option_file): + oi = optinterpreter.OptionInterpreter(self.subproject) + oi.process(self.option_file) + self.coredata.merge_user_options(oi.options) + + def_opts = self.flatten_args(kwargs.get('default_options', [])) + _project_default_options = mesonlib.stringlistify(def_opts) + self.project_default_options = cdata.create_options_dict(_project_default_options) + self.default_options.update(self.project_default_options) + self.coredata.set_default_options(self.default_options, self.subproject, self.environment) + + if not self.is_subproject() and 'subproject_dir' in kwargs: + spdirname = kwargs['subproject_dir'] + if isinstance(spdirname, StringNode): + assert isinstance(spdirname.value, str) + self.subproject_dir = spdirname.value + if not self.is_subproject(): + self.project_data['subprojects'] = [] + subprojects_dir = os.path.join(self.source_root, self.subproject_dir) + if os.path.isdir(subprojects_dir): + for i in os.listdir(subprojects_dir): + if os.path.isdir(os.path.join(subprojects_dir, i)): + self.do_subproject(i) + + self.coredata.init_backend_options(self.backend) + options = {k: v for k, v in self.environment.cmd_line_options.items() if k.startswith('backend_')} + + self.coredata.set_options(options) + self.func_add_languages(None, proj_langs, None) + + def do_subproject(self, dirname: str) -> None: + subproject_dir_abs = os.path.join(self.environment.get_source_dir(), self.subproject_dir) + subpr = os.path.join(subproject_dir_abs, dirname) + try: + subi = IntrospectionInterpreter(subpr, '', self.backend, cross_file=self.cross_file, subproject=dirname, subproject_dir=self.subproject_dir, env=self.environment, visitors=self.visitors) + subi.analyze() + subi.project_data['name'] = dirname + self.project_data['subprojects'] += [subi.project_data] + except (mesonlib.MesonException, RuntimeError): + return + + def func_add_languages(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> None: + args = self.flatten_args(args) + for for_machine in [MachineChoice.BUILD, MachineChoice.HOST]: + for lang in sorted(args, key=compilers.sort_clink): + if isinstance(lang, StringNode): + assert isinstance(lang.value, str) + lang = lang.value + if not isinstance(lang, str): + continue + lang = lang.lower() + if lang not in self.coredata.compilers[for_machine]: + self.environment.detect_compiler_for(lang, for_machine) + + def func_dependency(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> None: + args = self.flatten_args(args) + kwargs = self.flatten_kwargs(kwargs) + if not args: + return + name = args[0] + has_fallback = 'fallback' in kwargs + required = kwargs.get('required', True) + version = kwargs.get('version', []) + if not isinstance(version, list): + version = [version] + if isinstance(required, ElementaryNode): + required = required.value + if not isinstance(required, bool): + required = False + self.dependencies += [{ + 'name': name, + 'required': required, + 'version': version, + 'has_fallback': has_fallback, + 'conditional': node.condition_level > 0, + 'node': node + }] + + def build_target(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs_raw: T.Dict[str, TYPE_nvar], targetclass) -> T.Optional[T.Dict[str, T.Any]]: + args = self.flatten_args(args) + if not args or not isinstance(args[0], str): + return None + name = args[0] + srcqueue = [node] + + # Process the sources BEFORE flattening the kwargs, to preserve the original nodes + if 'sources' in kwargs_raw: + srcqueue += mesonlib.listify(kwargs_raw['sources']) + + kwargs = self.flatten_kwargs(kwargs_raw, True) + + source_nodes = [] # type: T.List[BaseNode] + while srcqueue: + curr = srcqueue.pop(0) + arg_node = None + assert(isinstance(curr, BaseNode)) + if isinstance(curr, FunctionNode): + arg_node = curr.args + elif isinstance(curr, ArrayNode): + arg_node = curr.args + elif isinstance(curr, IdNode): + # Try to resolve the ID and append the node to the queue + assert isinstance(curr.value, str) + var_name = curr.value + if var_name in self.assignments: + tmp_node = self.assignments[var_name] + if isinstance(tmp_node, (ArrayNode, IdNode, FunctionNode)): + srcqueue += [tmp_node] + elif isinstance(curr, ArithmeticNode): + srcqueue += [curr.left, curr.right] + if arg_node is None: + continue + arg_nodes = arg_node.arguments.copy() + # Pop the first element if the function is a build target function + if isinstance(curr, FunctionNode) and curr.func_name in build_target_functions: + arg_nodes.pop(0) + elemetary_nodes = [x for x in arg_nodes if isinstance(x, (str, StringNode))] + srcqueue += [x for x in arg_nodes if isinstance(x, (FunctionNode, ArrayNode, IdNode, ArithmeticNode))] + if elemetary_nodes: + source_nodes += [curr] + + # Make sure nothing can crash when creating the build class + kwargs_reduced = {k: v for k, v in kwargs.items() if k in targetclass.known_kwargs and k in ['install', 'build_by_default', 'build_always']} + kwargs_reduced = {k: v.value if isinstance(v, ElementaryNode) else v for k, v in kwargs_reduced.items()} + kwargs_reduced = {k: v for k, v in kwargs_reduced.items() if not isinstance(v, BaseNode)} + for_machine = MachineChoice.HOST + objects = [] # type: T.List[T.Any] + empty_sources = [] # type: T.List[T.Any] + # Passing the unresolved sources list causes errors + target = targetclass(name, self.subdir, self.subproject, for_machine, empty_sources, objects, self.environment, kwargs_reduced) + + new_target = { + 'name': target.get_basename(), + 'id': target.get_id(), + 'type': target.get_typename(), + 'defined_in': os.path.normpath(os.path.join(self.source_root, self.subdir, environment.build_filename)), + 'subdir': self.subdir, + 'build_by_default': target.build_by_default, + 'installed': target.should_install(), + 'outputs': target.get_outputs(), + 'sources': source_nodes, + 'kwargs': kwargs, + 'node': node, + } + + self.targets += [new_target] + return new_target + + def build_library(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + default_library = self.coredata.get_builtin_option('default_library') + if default_library == 'shared': + return self.build_target(node, args, kwargs, SharedLibrary) + elif default_library == 'static': + return self.build_target(node, args, kwargs, StaticLibrary) + elif default_library == 'both': + return self.build_target(node, args, kwargs, SharedLibrary) + return None + + def func_executable(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, Executable) + + def func_static_lib(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, StaticLibrary) + + def func_shared_lib(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, SharedLibrary) + + def func_both_lib(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, SharedLibrary) + + def func_shared_module(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, SharedModule) + + def func_library(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_library(node, args, kwargs) + + def func_jar(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + return self.build_target(node, args, kwargs, Jar) + + def func_build_target(self, node: BaseNode, args: T.List[TYPE_nvar], kwargs: T.Dict[str, TYPE_nvar]) -> T.Optional[T.Dict[str, T.Any]]: + if 'target_type' not in kwargs: + return None + target_type = kwargs.pop('target_type') + if isinstance(target_type, ElementaryNode): + target_type = target_type.value + if target_type == 'executable': + return self.build_target(node, args, kwargs, Executable) + elif target_type == 'shared_library': + return self.build_target(node, args, kwargs, SharedLibrary) + elif target_type == 'static_library': + return self.build_target(node, args, kwargs, StaticLibrary) + elif target_type == 'both_libraries': + return self.build_target(node, args, kwargs, SharedLibrary) + elif target_type == 'library': + return self.build_library(node, args, kwargs) + elif target_type == 'jar': + return self.build_target(node, args, kwargs, Jar) + return None + + def is_subproject(self) -> bool: + return self.subproject != '' + + def analyze(self) -> None: + self.load_root_meson_file() + self.sanity_check_ast() + self.parse_project() + self.run() diff --git a/meson/mesonbuild/ast/postprocess.py b/meson/mesonbuild/ast/postprocess.py new file mode 100644 index 000000000..35fe1d317 --- /dev/null +++ b/meson/mesonbuild/ast/postprocess.py @@ -0,0 +1,117 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool + +from . import AstVisitor +from .. import mparser +import typing as T + +class AstIndentationGenerator(AstVisitor): + def __init__(self): + self.level = 0 + + def visit_default_func(self, node: mparser.BaseNode) -> None: + # Store the current level in the node + node.level = self.level + + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: + self.visit_default_func(node) + self.level += 1 + node.args.accept(self) + self.level -= 1 + + def visit_DictNode(self, node: mparser.DictNode) -> None: + self.visit_default_func(node) + self.level += 1 + node.args.accept(self) + self.level -= 1 + + def visit_MethodNode(self, node: mparser.MethodNode) -> None: + self.visit_default_func(node) + node.source_object.accept(self) + self.level += 1 + node.args.accept(self) + self.level -= 1 + + def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: + self.visit_default_func(node) + self.level += 1 + node.args.accept(self) + self.level -= 1 + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + self.visit_default_func(node) + self.level += 1 + node.items.accept(self) + node.block.accept(self) + self.level -= 1 + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + self.visit_default_func(node) + for i in node.ifs: + i.accept(self) + if node.elseblock: + self.level += 1 + node.elseblock.accept(self) + self.level -= 1 + + def visit_IfNode(self, node: mparser.IfNode) -> None: + self.visit_default_func(node) + self.level += 1 + node.condition.accept(self) + node.block.accept(self) + self.level -= 1 + +class AstIDGenerator(AstVisitor): + def __init__(self): + self.counter = {} # type: T.Dict[str, int] + + def visit_default_func(self, node: mparser.BaseNode) -> None: + name = type(node).__name__ + if name not in self.counter: + self.counter[name] = 0 + node.ast_id = name + '#' + str(self.counter[name]) + self.counter[name] += 1 + +class AstConditionLevel(AstVisitor): + def __init__(self): + self.condition_level = 0 + + def visit_default_func(self, node: mparser.BaseNode) -> None: + node.condition_level = self.condition_level + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + self.visit_default_func(node) + self.condition_level += 1 + node.items.accept(self) + node.block.accept(self) + self.condition_level -= 1 + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + self.visit_default_func(node) + for i in node.ifs: + i.accept(self) + if node.elseblock: + self.condition_level += 1 + node.elseblock.accept(self) + self.condition_level -= 1 + + def visit_IfNode(self, node: mparser.IfNode) -> None: + self.visit_default_func(node) + self.condition_level += 1 + node.condition.accept(self) + node.block.accept(self) + self.condition_level -= 1 diff --git a/meson/mesonbuild/ast/printer.py b/meson/mesonbuild/ast/printer.py new file mode 100644 index 000000000..a57ba20d3 --- /dev/null +++ b/meson/mesonbuild/ast/printer.py @@ -0,0 +1,359 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool + +from .. import mparser +from . import AstVisitor +import re +import typing as T + +arithmic_map = { + 'add': '+', + 'sub': '-', + 'mod': '%', + 'mul': '*', + 'div': '/' +} + +class AstPrinter(AstVisitor): + def __init__(self, indent: int = 2, arg_newline_cutoff: int = 5): + self.result = '' + self.indent = indent + self.arg_newline_cutoff = arg_newline_cutoff + self.ci = '' + self.is_newline = True + self.last_level = 0 + + def post_process(self) -> None: + self.result = re.sub(r'\s+\n', '\n', self.result) + + def append(self, data: str, node: mparser.BaseNode) -> None: + self.last_level = node.level + if self.is_newline: + self.result += ' ' * (node.level * self.indent) + self.result += data + self.is_newline = False + + def append_padded(self, data: str, node: mparser.BaseNode) -> None: + if self.result[-1] not in [' ', '\n']: + data = ' ' + data + self.append(data + ' ', node) + + def newline(self) -> None: + self.result += '\n' + self.is_newline = True + + def visit_BooleanNode(self, node: mparser.BooleanNode) -> None: + self.append('true' if node.value else 'false', node) + + def visit_IdNode(self, node: mparser.IdNode) -> None: + assert isinstance(node.value, str) + self.append(node.value, node) + + def visit_NumberNode(self, node: mparser.NumberNode) -> None: + self.append(str(node.value), node) + + def visit_StringNode(self, node: mparser.StringNode) -> None: + assert isinstance(node.value, str) + self.append("'" + node.value + "'", node) + + def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: + self.append('continue', node) + + def visit_BreakNode(self, node: mparser.BreakNode) -> None: + self.append('break', node) + + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: + self.append('[', node) + node.args.accept(self) + self.append(']', node) + + def visit_DictNode(self, node: mparser.DictNode) -> None: + self.append('{', node) + node.args.accept(self) + self.append('}', node) + + def visit_OrNode(self, node: mparser.OrNode) -> None: + node.left.accept(self) + self.append_padded('or', node) + node.right.accept(self) + + def visit_AndNode(self, node: mparser.AndNode) -> None: + node.left.accept(self) + self.append_padded('and', node) + node.right.accept(self) + + def visit_ComparisonNode(self, node: mparser.ComparisonNode) -> None: + node.left.accept(self) + self.append_padded(node.ctype, node) + node.right.accept(self) + + def visit_ArithmeticNode(self, node: mparser.ArithmeticNode) -> None: + node.left.accept(self) + self.append_padded(arithmic_map[node.operation], node) + node.right.accept(self) + + def visit_NotNode(self, node: mparser.NotNode) -> None: + self.append_padded('not', node) + node.value.accept(self) + + def visit_CodeBlockNode(self, node: mparser.CodeBlockNode) -> None: + for i in node.lines: + i.accept(self) + self.newline() + + def visit_IndexNode(self, node: mparser.IndexNode) -> None: + node.iobject.accept(self) + self.append('[', node) + node.index.accept(self) + self.append(']', node) + + def visit_MethodNode(self, node: mparser.MethodNode) -> None: + node.source_object.accept(self) + self.append('.' + node.name + '(', node) + node.args.accept(self) + self.append(')', node) + + def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: + self.append(node.func_name + '(', node) + node.args.accept(self) + self.append(')', node) + + def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: + self.append(node.var_name + ' = ', node) + node.value.accept(self) + + def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: + self.append(node.var_name + ' += ', node) + node.value.accept(self) + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + varnames = [x for x in node.varnames] + self.append_padded('foreach', node) + self.append_padded(', '.join(varnames), node) + self.append_padded(':', node) + node.items.accept(self) + self.newline() + node.block.accept(self) + self.append('endforeach', node) + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + prefix = '' + for i in node.ifs: + self.append_padded(prefix + 'if', node) + prefix = 'el' + i.accept(self) + if not isinstance(node.elseblock, mparser.EmptyNode): + self.append('else', node) + node.elseblock.accept(self) + self.append('endif', node) + + def visit_UMinusNode(self, node: mparser.UMinusNode) -> None: + self.append_padded('-', node) + node.value.accept(self) + + def visit_IfNode(self, node: mparser.IfNode) -> None: + node.condition.accept(self) + self.newline() + node.block.accept(self) + + def visit_TernaryNode(self, node: mparser.TernaryNode) -> None: + node.condition.accept(self) + self.append_padded('?', node) + node.trueblock.accept(self) + self.append_padded(':', node) + node.falseblock.accept(self) + + def visit_ArgumentNode(self, node: mparser.ArgumentNode) -> None: + break_args = (len(node.arguments) + len(node.kwargs)) > self.arg_newline_cutoff + for i in node.arguments + list(node.kwargs.values()): + if not isinstance(i, (mparser.ElementaryNode, mparser.IndexNode)): + break_args = True + if break_args: + self.newline() + for i in node.arguments: + i.accept(self) + self.append(', ', node) + if break_args: + self.newline() + for key, val in node.kwargs.items(): + key.accept(self) + self.append_padded(':', node) + val.accept(self) + self.append(', ', node) + if break_args: + self.newline() + if break_args: + self.result = re.sub(r', \n$', '\n', self.result) + else: + self.result = re.sub(r', $', '', self.result) + +class AstJSONPrinter(AstVisitor): + def __init__(self) -> None: + self.result = {} # type: T.Dict[str, T.Any] + self.current = self.result + + def _accept(self, key: str, node: mparser.BaseNode) -> None: + old = self.current + data = {} # type: T.Dict[str, T.Any] + self.current = data + node.accept(self) + self.current = old + self.current[key] = data + + def _accept_list(self, key: str, nodes: T.Sequence[mparser.BaseNode]) -> None: + old = self.current + datalist = [] # type: T.List[T.Dict[str, T.Any]] + for i in nodes: + self.current = {} + i.accept(self) + datalist += [self.current] + self.current = old + self.current[key] = datalist + + def _raw_accept(self, node: mparser.BaseNode, data: T.Dict[str, T.Any]) -> None: + old = self.current + self.current = data + node.accept(self) + self.current = old + + def setbase(self, node: mparser.BaseNode) -> None: + self.current['node'] = type(node).__name__ + self.current['lineno'] = node.lineno + self.current['colno'] = node.colno + self.current['end_lineno'] = node.end_lineno + self.current['end_colno'] = node.end_colno + + def visit_default_func(self, node: mparser.BaseNode) -> None: + self.setbase(node) + + def gen_ElementaryNode(self, node: mparser.ElementaryNode) -> None: + self.current['value'] = node.value + self.setbase(node) + + def visit_BooleanNode(self, node: mparser.BooleanNode) -> None: + self.gen_ElementaryNode(node) + + def visit_IdNode(self, node: mparser.IdNode) -> None: + self.gen_ElementaryNode(node) + + def visit_NumberNode(self, node: mparser.NumberNode) -> None: + self.gen_ElementaryNode(node) + + def visit_StringNode(self, node: mparser.StringNode) -> None: + self.gen_ElementaryNode(node) + + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: + self._accept('args', node.args) + self.setbase(node) + + def visit_DictNode(self, node: mparser.DictNode) -> None: + self._accept('args', node.args) + self.setbase(node) + + def visit_OrNode(self, node: mparser.OrNode) -> None: + self._accept('left', node.left) + self._accept('right', node.right) + self.setbase(node) + + def visit_AndNode(self, node: mparser.AndNode) -> None: + self._accept('left', node.left) + self._accept('right', node.right) + self.setbase(node) + + def visit_ComparisonNode(self, node: mparser.ComparisonNode) -> None: + self._accept('left', node.left) + self._accept('right', node.right) + self.current['ctype'] = node.ctype + self.setbase(node) + + def visit_ArithmeticNode(self, node: mparser.ArithmeticNode) -> None: + self._accept('left', node.left) + self._accept('right', node.right) + self.current['op'] = arithmic_map[node.operation] + self.setbase(node) + + def visit_NotNode(self, node: mparser.NotNode) -> None: + self._accept('right', node.value) + self.setbase(node) + + def visit_CodeBlockNode(self, node: mparser.CodeBlockNode) -> None: + self._accept_list('lines', node.lines) + self.setbase(node) + + def visit_IndexNode(self, node: mparser.IndexNode) -> None: + self._accept('object', node.iobject) + self._accept('index', node.index) + self.setbase(node) + + def visit_MethodNode(self, node: mparser.MethodNode) -> None: + self._accept('object', node.source_object) + self._accept('args', node.args) + self.current['name'] = node.name + self.setbase(node) + + def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: + self._accept('args', node.args) + self.current['name'] = node.func_name + self.setbase(node) + + def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: + self._accept('value', node.value) + self.current['var_name'] = node.var_name + self.setbase(node) + + def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: + self._accept('value', node.value) + self.current['var_name'] = node.var_name + self.setbase(node) + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + self._accept('items', node.items) + self._accept('block', node.block) + self.current['varnames'] = node.varnames + self.setbase(node) + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + self._accept_list('ifs', node.ifs) + self._accept('else', node.elseblock) + self.setbase(node) + + def visit_UMinusNode(self, node: mparser.UMinusNode) -> None: + self._accept('right', node.value) + self.setbase(node) + + def visit_IfNode(self, node: mparser.IfNode) -> None: + self._accept('condition', node.condition) + self._accept('block', node.block) + self.setbase(node) + + def visit_TernaryNode(self, node: mparser.TernaryNode) -> None: + self._accept('condition', node.condition) + self._accept('true', node.trueblock) + self._accept('false', node.falseblock) + self.setbase(node) + + def visit_ArgumentNode(self, node: mparser.ArgumentNode) -> None: + self._accept_list('positional', node.arguments) + kwargs_list = [] # type: T.List[T.Dict[str, T.Dict[str, T.Any]]] + for key, val in node.kwargs.items(): + key_res = {} # type: T.Dict[str, T.Any] + val_res = {} # type: T.Dict[str, T.Any] + self._raw_accept(key, key_res) + self._raw_accept(val, val_res) + kwargs_list += [{'key': key_res, 'val': val_res}] + self.current['kwargs'] = kwargs_list + self.setbase(node) diff --git a/meson/mesonbuild/ast/visitor.py b/meson/mesonbuild/ast/visitor.py new file mode 100644 index 000000000..451020d2d --- /dev/null +++ b/meson/mesonbuild/ast/visitor.py @@ -0,0 +1,139 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool + +from .. import mparser + +class AstVisitor: + def __init__(self): + pass + + def visit_default_func(self, node: mparser.BaseNode) -> None: + pass + + def visit_BooleanNode(self, node: mparser.BooleanNode) -> None: + self.visit_default_func(node) + + def visit_IdNode(self, node: mparser.IdNode) -> None: + self.visit_default_func(node) + + def visit_NumberNode(self, node: mparser.NumberNode) -> None: + self.visit_default_func(node) + + def visit_StringNode(self, node: mparser.StringNode) -> None: + self.visit_default_func(node) + + def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: + self.visit_default_func(node) + + def visit_BreakNode(self, node: mparser.BreakNode) -> None: + self.visit_default_func(node) + + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: + self.visit_default_func(node) + node.args.accept(self) + + def visit_DictNode(self, node: mparser.DictNode) -> None: + self.visit_default_func(node) + node.args.accept(self) + + def visit_EmptyNode(self, node: mparser.EmptyNode) -> None: + self.visit_default_func(node) + + def visit_OrNode(self, node: mparser.OrNode) -> None: + self.visit_default_func(node) + node.left.accept(self) + node.right.accept(self) + + def visit_AndNode(self, node: mparser.AndNode) -> None: + self.visit_default_func(node) + node.left.accept(self) + node.right.accept(self) + + def visit_ComparisonNode(self, node: mparser.ComparisonNode) -> None: + self.visit_default_func(node) + node.left.accept(self) + node.right.accept(self) + + def visit_ArithmeticNode(self, node: mparser.ArithmeticNode) -> None: + self.visit_default_func(node) + node.left.accept(self) + node.right.accept(self) + + def visit_NotNode(self, node: mparser.NotNode) -> None: + self.visit_default_func(node) + node.value.accept(self) + + def visit_CodeBlockNode(self, node: mparser.CodeBlockNode) -> None: + self.visit_default_func(node) + for i in node.lines: + i.accept(self) + + def visit_IndexNode(self, node: mparser.IndexNode) -> None: + self.visit_default_func(node) + node.iobject.accept(self) + node.index.accept(self) + + def visit_MethodNode(self, node: mparser.MethodNode) -> None: + self.visit_default_func(node) + node.source_object.accept(self) + node.args.accept(self) + + def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: + self.visit_default_func(node) + node.args.accept(self) + + def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: + self.visit_default_func(node) + node.value.accept(self) + + def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: + self.visit_default_func(node) + node.value.accept(self) + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + self.visit_default_func(node) + node.items.accept(self) + node.block.accept(self) + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + self.visit_default_func(node) + for i in node.ifs: + i.accept(self) + node.elseblock.accept(self) + + def visit_UMinusNode(self, node: mparser.UMinusNode) -> None: + self.visit_default_func(node) + node.value.accept(self) + + def visit_IfNode(self, node: mparser.IfNode) -> None: + self.visit_default_func(node) + node.condition.accept(self) + node.block.accept(self) + + def visit_TernaryNode(self, node: mparser.TernaryNode) -> None: + self.visit_default_func(node) + node.condition.accept(self) + node.trueblock.accept(self) + node.falseblock.accept(self) + + def visit_ArgumentNode(self, node: mparser.ArgumentNode) -> None: + self.visit_default_func(node) + for i in node.arguments: + i.accept(self) + for key, val in node.kwargs.items(): + key.accept(self) + val.accept(self) diff --git a/meson/mesonbuild/backend/__init__.py b/meson/mesonbuild/backend/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/meson/mesonbuild/backend/backends.py b/meson/mesonbuild/backend/backends.py new file mode 100644 index 000000000..c84bb7534 --- /dev/null +++ b/meson/mesonbuild/backend/backends.py @@ -0,0 +1,1382 @@ +# Copyright 2012-2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +from functools import lru_cache +from pathlib import Path +import enum +import json +import os +import pickle +import re +import shlex +import subprocess +import textwrap +import typing as T + +from .. import build +from .. import dependencies +from .. import mesonlib +from .. import mlog +from ..mesonlib import ( + File, MachineChoice, MesonException, OrderedSet, OptionOverrideProxy, + classify_unity_sources, unholder +) + +if T.TYPE_CHECKING: + from ..interpreter import Interpreter + + +class TestProtocol(enum.Enum): + + EXITCODE = 0 + TAP = 1 + GTEST = 2 + + @classmethod + def from_str(cls, string: str) -> 'TestProtocol': + if string == 'exitcode': + return cls.EXITCODE + elif string == 'tap': + return cls.TAP + elif string == 'gtest': + return cls.GTEST + raise MesonException('unknown test format {}'.format(string)) + + def __str__(self) -> str: + if self is self.EXITCODE: + return 'exitcode' + elif self is self.GTEST: + return 'gtest' + return 'tap' + + +class CleanTrees: + ''' + Directories outputted by custom targets that have to be manually cleaned + because on Linux `ninja clean` only deletes empty directories. + ''' + def __init__(self, build_dir, trees): + self.build_dir = build_dir + self.trees = trees + +class InstallData: + def __init__(self, source_dir, build_dir, prefix, strip_bin, + install_umask, mesonintrospect): + self.source_dir = source_dir + self.build_dir = build_dir + self.prefix = prefix + self.strip_bin = strip_bin + self.install_umask = install_umask + self.targets = [] + self.headers = [] + self.man = [] + self.data = [] + self.po_package_name = '' + self.po = [] + self.install_scripts = [] + self.install_subdirs = [] + self.mesonintrospect = mesonintrospect + +class TargetInstallData: + def __init__(self, fname, outdir, aliases, strip, install_name_mappings, rpath_dirs_to_remove, install_rpath, install_mode, optional=False): + self.fname = fname + self.outdir = outdir + self.aliases = aliases + self.strip = strip + self.install_name_mappings = install_name_mappings + self.rpath_dirs_to_remove = rpath_dirs_to_remove + self.install_rpath = install_rpath + self.install_mode = install_mode + self.optional = optional + +class ExecutableSerialisation: + def __init__(self, cmd_args, env=None, exe_wrapper=None, + workdir=None, extra_paths=None, capture=None): + self.cmd_args = cmd_args + self.env = env or {} + if exe_wrapper is not None: + assert(isinstance(exe_wrapper, dependencies.ExternalProgram)) + self.exe_runner = exe_wrapper + self.workdir = workdir + self.extra_paths = extra_paths + self.capture = capture + +class TestSerialisation: + def __init__(self, name: str, project: str, suite: str, fname: T.List[str], + is_cross_built: bool, exe_wrapper: T.Optional[dependencies.ExternalProgram], + needs_exe_wrapper: bool, is_parallel: bool, cmd_args: T.List[str], + env: build.EnvironmentVariables, should_fail: bool, + timeout: T.Optional[int], workdir: T.Optional[str], + extra_paths: T.List[str], protocol: TestProtocol, priority: int, + cmd_is_built: bool): + self.name = name + self.project_name = project + self.suite = suite + self.fname = fname + self.is_cross_built = is_cross_built + if exe_wrapper is not None: + assert(isinstance(exe_wrapper, dependencies.ExternalProgram)) + self.exe_runner = exe_wrapper + self.is_parallel = is_parallel + self.cmd_args = cmd_args + self.env = env + self.should_fail = should_fail + self.timeout = timeout + self.workdir = workdir + self.extra_paths = extra_paths + self.protocol = protocol + self.priority = priority + self.needs_exe_wrapper = needs_exe_wrapper + self.cmd_is_built = cmd_is_built + + +def get_backend_from_name(backend: str, build: T.Optional[build.Build] = None, interpreter: T.Optional['Interpreter'] = None) -> T.Optional['Backend']: + if backend == 'ninja': + from . import ninjabackend + return ninjabackend.NinjaBackend(build, interpreter) + elif backend == 'vs': + from . import vs2010backend + return vs2010backend.autodetect_vs_version(build, interpreter) + elif backend == 'vs2010': + from . import vs2010backend + return vs2010backend.Vs2010Backend(build, interpreter) + elif backend == 'vs2015': + from . import vs2015backend + return vs2015backend.Vs2015Backend(build, interpreter) + elif backend == 'vs2017': + from . import vs2017backend + return vs2017backend.Vs2017Backend(build, interpreter) + elif backend == 'vs2019': + from . import vs2019backend + return vs2019backend.Vs2019Backend(build, interpreter) + elif backend == 'xcode': + from . import xcodebackend + return xcodebackend.XCodeBackend(build, interpreter) + return None + +# This class contains the basic functionality that is needed by all backends. +# Feel free to move stuff in and out of it as you see fit. +class Backend: + def __init__(self, build: T.Optional[build.Build], interpreter: T.Optional['Interpreter']): + # Make it possible to construct a dummy backend + # This is used for introspection without a build directory + if build is None: + self.environment = None + return + self.build = build + self.interpreter = interpreter + self.environment = build.environment + self.processed_targets = {} + self.build_dir = self.environment.get_build_dir() + self.source_dir = self.environment.get_source_dir() + self.build_to_src = mesonlib.relpath(self.environment.get_source_dir(), + self.environment.get_build_dir()) + + def get_target_filename(self, t, *, warn_multi_output: bool = True): + if isinstance(t, build.CustomTarget): + if warn_multi_output and len(t.get_outputs()) != 1: + mlog.warning('custom_target {!r} has more than one output! ' + 'Using the first one.'.format(t.name)) + filename = t.get_outputs()[0] + elif isinstance(t, build.CustomTargetIndex): + filename = t.get_outputs()[0] + else: + assert(isinstance(t, build.BuildTarget)) + filename = t.get_filename() + return os.path.join(self.get_target_dir(t), filename) + + def get_target_filename_abs(self, target): + return os.path.join(self.environment.get_build_dir(), self.get_target_filename(target)) + + def get_base_options_for_target(self, target): + return OptionOverrideProxy(target.option_overrides_base, + self.environment.coredata.builtins, + self.environment.coredata.base_options) + + def get_compiler_options_for_target(self, target): + comp_reg = self.environment.coredata.compiler_options[target.for_machine] + comp_override = target.option_overrides_compiler + return { + lang: OptionOverrideProxy(comp_override[lang], comp_reg[lang]) + for lang in set(comp_reg.keys()) | set(comp_override.keys()) + } + + def get_option_for_target(self, option_name, target): + if option_name in target.option_overrides_base: + override = target.option_overrides_base[option_name] + return self.environment.coredata.validate_option_value(option_name, override) + return self.environment.coredata.get_builtin_option(option_name, target.subproject) + + def get_target_filename_for_linking(self, target): + # On some platforms (msvc for instance), the file that is used for + # dynamic linking is not the same as the dynamic library itself. This + # file is called an import library, and we want to link against that. + # On all other platforms, we link to the library directly. + if isinstance(target, build.SharedLibrary): + link_lib = target.get_import_filename() or target.get_filename() + return os.path.join(self.get_target_dir(target), link_lib) + elif isinstance(target, build.StaticLibrary): + return os.path.join(self.get_target_dir(target), target.get_filename()) + elif isinstance(target, (build.CustomTarget, build.CustomTargetIndex)): + if not target.is_linkable_target(): + raise MesonException('Tried to link against custom target "{}", which is not linkable.'.format(target.name)) + return os.path.join(self.get_target_dir(target), target.get_filename()) + elif isinstance(target, build.Executable): + if target.import_filename: + return os.path.join(self.get_target_dir(target), target.get_import_filename()) + else: + return None + raise AssertionError('BUG: Tried to link to {!r} which is not linkable'.format(target)) + + @lru_cache(maxsize=None) + def get_target_dir(self, target): + if self.environment.coredata.get_builtin_option('layout') == 'mirror': + dirname = target.get_subdir() + else: + dirname = 'meson-out' + return dirname + + def get_target_dir_relative_to(self, t, o): + '''Get a target dir relative to another target's directory''' + target_dir = os.path.join(self.environment.get_build_dir(), self.get_target_dir(t)) + othert_dir = os.path.join(self.environment.get_build_dir(), self.get_target_dir(o)) + return os.path.relpath(target_dir, othert_dir) + + def get_target_source_dir(self, target): + # if target dir is empty, avoid extraneous trailing / from os.path.join() + target_dir = self.get_target_dir(target) + if target_dir: + return os.path.join(self.build_to_src, target_dir) + return self.build_to_src + + def get_target_private_dir(self, target): + return os.path.join(self.get_target_filename(target, warn_multi_output=False) + '.p') + + def get_target_private_dir_abs(self, target): + return os.path.join(self.environment.get_build_dir(), self.get_target_private_dir(target)) + + @lru_cache(maxsize=None) + def get_target_generated_dir(self, target, gensrc, src): + """ + Takes a BuildTarget, a generator source (CustomTarget or GeneratedList), + and a generated source filename. + Returns the full path of the generated source relative to the build root + """ + # CustomTarget generators output to the build dir of the CustomTarget + if isinstance(gensrc, (build.CustomTarget, build.CustomTargetIndex)): + return os.path.join(self.get_target_dir(gensrc), src) + # GeneratedList generators output to the private build directory of the + # target that the GeneratedList is used in + return os.path.join(self.get_target_private_dir(target), src) + + def get_unity_source_file(self, target, suffix, number): + # There is a potential conflict here, but it is unlikely that + # anyone both enables unity builds and has a file called foo-unity.cpp. + osrc = '{}-unity{}.{}'.format(target.name, number, suffix) + return mesonlib.File.from_built_file(self.get_target_private_dir(target), osrc) + + def generate_unity_files(self, target, unity_src): + abs_files = [] + result = [] + compsrcs = classify_unity_sources(target.compilers.values(), unity_src) + unity_size = self.get_option_for_target('unity_size', target) + + def init_language_file(suffix, unity_file_number): + unity_src = self.get_unity_source_file(target, suffix, unity_file_number) + outfileabs = unity_src.absolute_path(self.environment.get_source_dir(), + self.environment.get_build_dir()) + outfileabs_tmp = outfileabs + '.tmp' + abs_files.append(outfileabs) + outfileabs_tmp_dir = os.path.dirname(outfileabs_tmp) + if not os.path.exists(outfileabs_tmp_dir): + os.makedirs(outfileabs_tmp_dir) + result.append(unity_src) + return open(outfileabs_tmp, 'w') + + # For each language, generate unity source files and return the list + for comp, srcs in compsrcs.items(): + files_in_current = unity_size + 1 + unity_file_number = 0 + ofile = None + for src in srcs: + if files_in_current >= unity_size: + if ofile: + ofile.close() + ofile = init_language_file(comp.get_default_suffix(), unity_file_number) + unity_file_number += 1 + files_in_current = 0 + ofile.write('#include<{}>\n'.format(src)) + files_in_current += 1 + if ofile: + ofile.close() + + [mesonlib.replace_if_different(x, x + '.tmp') for x in abs_files] + return result + + def relpath(self, todir, fromdir): + return os.path.relpath(os.path.join('dummyprefixdir', todir), + os.path.join('dummyprefixdir', fromdir)) + + def flatten_object_list(self, target, proj_dir_to_build_root=''): + obj_list = self._flatten_object_list(target, target.get_objects(), proj_dir_to_build_root) + return list(dict.fromkeys(obj_list)) + + def _flatten_object_list(self, target, objects, proj_dir_to_build_root): + obj_list = [] + for obj in objects: + if isinstance(obj, str): + o = os.path.join(proj_dir_to_build_root, + self.build_to_src, target.get_subdir(), obj) + obj_list.append(o) + elif isinstance(obj, mesonlib.File): + obj_list.append(obj.rel_to_builddir(self.build_to_src)) + elif isinstance(obj, build.ExtractedObjects): + if obj.recursive: + obj_list += self._flatten_object_list(obj.target, obj.objlist, proj_dir_to_build_root) + obj_list += self.determine_ext_objs(obj, proj_dir_to_build_root) + else: + raise MesonException('Unknown data type in object list.') + return obj_list + + def as_meson_exe_cmdline(self, tname, exe, cmd_args, workdir=None, + for_machine=MachineChoice.BUILD, + extra_bdeps=None, capture=None, force_serialize=False): + ''' + Serialize an executable for running with a generator or a custom target + ''' + import hashlib + machine = self.environment.machines[for_machine] + if machine.is_windows() or machine.is_cygwin(): + extra_paths = self.determine_windows_extra_paths(exe, extra_bdeps or []) + else: + extra_paths = [] + + if isinstance(exe, dependencies.ExternalProgram): + exe_cmd = exe.get_command() + exe_for_machine = exe.for_machine + elif isinstance(exe, (build.BuildTarget, build.CustomTarget)): + exe_cmd = [self.get_target_filename_abs(exe)] + exe_for_machine = exe.for_machine + else: + exe_cmd = [exe] + exe_for_machine = MachineChoice.BUILD + + is_cross_built = not self.environment.machines.matches_build_machine(exe_for_machine) + if is_cross_built and self.environment.need_exe_wrapper(): + exe_wrapper = self.environment.get_exe_wrapper() + if not exe_wrapper.found(): + msg = 'The exe_wrapper {!r} defined in the cross file is ' \ + 'needed by target {!r}, but was not found. Please ' \ + 'check the command and/or add it to PATH.' + raise MesonException(msg.format(exe_wrapper.name, tname)) + else: + if exe_cmd[0].endswith('.jar'): + exe_cmd = ['java', '-jar'] + exe_cmd + elif exe_cmd[0].endswith('.exe') and not (mesonlib.is_windows() or mesonlib.is_cygwin()): + exe_cmd = ['mono'] + exe_cmd + exe_wrapper = None + + force_serialize = force_serialize or extra_paths or workdir or \ + exe_wrapper or any('\n' in c for c in cmd_args) + if not force_serialize: + if not capture: + return None + return (self.environment.get_build_command() + + ['--internal', 'exe', '--capture', capture, '--'] + exe_cmd + cmd_args) + + workdir = workdir or self.environment.get_build_dir() + env = {} + if isinstance(exe, (dependencies.ExternalProgram, + build.BuildTarget, build.CustomTarget)): + basename = exe.name + else: + basename = os.path.basename(exe) + + # Can't just use exe.name here; it will likely be run more than once + # Take a digest of the cmd args, env, workdir, and capture. This avoids + # collisions and also makes the name deterministic over regenerations + # which avoids a rebuild by Ninja because the cmdline stays the same. + data = bytes(str(sorted(env.items())) + str(cmd_args) + str(workdir) + str(capture), + encoding='utf-8') + digest = hashlib.sha1(data).hexdigest() + scratch_file = 'meson_exe_{0}_{1}.dat'.format(basename, digest) + exe_data = os.path.join(self.environment.get_scratch_dir(), scratch_file) + with open(exe_data, 'wb') as f: + es = ExecutableSerialisation(exe_cmd + cmd_args, env, + exe_wrapper, workdir, + extra_paths, capture) + pickle.dump(es, f) + return self.environment.get_build_command() + ['--internal', 'exe', '--unpickle', exe_data] + + def serialize_tests(self): + test_data = os.path.join(self.environment.get_scratch_dir(), 'meson_test_setup.dat') + with open(test_data, 'wb') as datafile: + self.write_test_file(datafile) + benchmark_data = os.path.join(self.environment.get_scratch_dir(), 'meson_benchmark_setup.dat') + with open(benchmark_data, 'wb') as datafile: + self.write_benchmark_file(datafile) + return test_data, benchmark_data + + def determine_linker_and_stdlib_args(self, target): + ''' + If we're building a static library, there is only one static linker. + Otherwise, we query the target for the dynamic linker. + ''' + if isinstance(target, build.StaticLibrary): + return self.build.static_linker[target.for_machine], [] + l, stdlib_args = target.get_clink_dynamic_linker_and_stdlibs() + return l, stdlib_args + + @staticmethod + def _libdir_is_system(libdir, compilers, env): + libdir = os.path.normpath(libdir) + for cc in compilers.values(): + if libdir in cc.get_library_dirs(env): + return True + return False + + def get_external_rpath_dirs(self, target): + dirs = set() + args = [] + # FIXME: is there a better way? + for lang in ['c', 'cpp']: + try: + args.extend(self.environment.coredata.get_external_link_args(target.for_machine, lang)) + except Exception: + pass + # Match rpath formats: + # -Wl,-rpath= + # -Wl,-rpath, + rpath_regex = re.compile(r'-Wl,-rpath[=,]([^,]+)') + # Match solaris style compat runpath formats: + # -Wl,-R + # -Wl,-R, + runpath_regex = re.compile(r'-Wl,-R[,]?([^,]+)') + # Match symbols formats: + # -Wl,--just-symbols= + # -Wl,--just-symbols, + symbols_regex = re.compile(r'-Wl,--just-symbols[=,]([^,]+)') + for arg in args: + rpath_match = rpath_regex.match(arg) + if rpath_match: + for dir in rpath_match.group(1).split(':'): + dirs.add(dir) + runpath_match = runpath_regex.match(arg) + if runpath_match: + for dir in runpath_match.group(1).split(':'): + # The symbols arg is an rpath if the path is a directory + if Path(dir).is_dir(): + dirs.add(dir) + symbols_match = symbols_regex.match(arg) + if symbols_match: + for dir in symbols_match.group(1).split(':'): + # Prevent usage of --just-symbols to specify rpath + if Path(dir).is_dir(): + raise MesonException('Invalid arg for --just-symbols, {} is a directory.'.format(dir)) + return dirs + + def rpaths_for_bundled_shared_libraries(self, target, exclude_system=True): + paths = [] + for dep in target.external_deps: + if not isinstance(dep, (dependencies.ExternalLibrary, dependencies.PkgConfigDependency)): + continue + la = dep.link_args + if len(la) != 1 or not os.path.isabs(la[0]): + continue + # The only link argument is an absolute path to a library file. + libpath = la[0] + libdir = os.path.dirname(libpath) + if exclude_system and self._libdir_is_system(libdir, target.compilers, self.environment): + # No point in adding system paths. + continue + # Don't remove rpaths specified in LDFLAGS. + if libdir in self.get_external_rpath_dirs(target): + continue + # Windows doesn't support rpaths, but we use this function to + # emulate rpaths by setting PATH, so also accept DLLs here + if os.path.splitext(libpath)[1] not in ['.dll', '.lib', '.so', '.dylib']: + continue + if libdir.startswith(self.environment.get_source_dir()): + rel_to_src = libdir[len(self.environment.get_source_dir()) + 1:] + assert not os.path.isabs(rel_to_src), 'rel_to_src: {} is absolute'.format(rel_to_src) + paths.append(os.path.join(self.build_to_src, rel_to_src)) + else: + paths.append(libdir) + return paths + + def determine_rpath_dirs(self, target): + if self.environment.coredata.get_builtin_option('layout') == 'mirror': + result = target.get_link_dep_subdirs() + else: + result = OrderedSet() + result.add('meson-out') + result.update(self.rpaths_for_bundled_shared_libraries(target)) + target.rpath_dirs_to_remove.update([d.encode('utf8') for d in result]) + return tuple(result) + + @staticmethod + def canonicalize_filename(fname): + for ch in ('/', '\\', ':'): + fname = fname.replace(ch, '_') + return fname + + def object_filename_from_source(self, target, source): + assert isinstance(source, mesonlib.File) + build_dir = self.environment.get_build_dir() + rel_src = source.rel_to_builddir(self.build_to_src) + + # foo.vala files compile down to foo.c and then foo.c.o, not foo.vala.o + if rel_src.endswith(('.vala', '.gs')): + # See description in generate_vala_compile for this logic. + if source.is_built: + if os.path.isabs(rel_src): + rel_src = rel_src[len(build_dir) + 1:] + rel_src = os.path.relpath(rel_src, self.get_target_private_dir(target)) + else: + rel_src = os.path.basename(rel_src) + # A meson- prefixed directory is reserved; hopefully no-one creates a file name with such a weird prefix. + source = 'meson-generated_' + rel_src[:-5] + '.c' + elif source.is_built: + if os.path.isabs(rel_src): + rel_src = rel_src[len(build_dir) + 1:] + targetdir = self.get_target_private_dir(target) + # A meson- prefixed directory is reserved; hopefully no-one creates a file name with such a weird prefix. + source = 'meson-generated_' + os.path.relpath(rel_src, targetdir) + else: + if os.path.isabs(rel_src): + # Use the absolute path directly to avoid file name conflicts + source = rel_src + else: + source = os.path.relpath(os.path.join(build_dir, rel_src), + os.path.join(self.environment.get_source_dir(), target.get_subdir())) + machine = self.environment.machines[target.for_machine] + return self.canonicalize_filename(source) + '.' + machine.get_object_suffix() + + def determine_ext_objs(self, extobj, proj_dir_to_build_root): + result = [] + + # Merge sources and generated sources + sources = list(extobj.srclist) + for gensrc in extobj.genlist: + for s in gensrc.get_outputs(): + path = self.get_target_generated_dir(extobj.target, gensrc, s) + dirpart, fnamepart = os.path.split(path) + sources.append(File(True, dirpart, fnamepart)) + + # Filter out headers and all non-source files + filtered_sources = [] + for s in sources: + if self.environment.is_source(s) and not self.environment.is_header(s): + filtered_sources.append(s) + elif self.environment.is_object(s): + result.append(s.relative_name()) + sources = filtered_sources + + # extobj could contain only objects and no sources + if not sources: + return result + + targetdir = self.get_target_private_dir(extobj.target) + + # With unity builds, sources don't map directly to objects, + # we only support extracting all the objects in this mode, + # so just return all object files. + if self.is_unity(extobj.target): + compsrcs = classify_unity_sources(extobj.target.compilers.values(), sources) + sources = [] + unity_size = self.get_option_for_target('unity_size', extobj.target) + for comp, srcs in compsrcs.items(): + for i in range(len(srcs) // unity_size + 1): + osrc = self.get_unity_source_file(extobj.target, + comp.get_default_suffix(), i) + sources.append(osrc) + + for osrc in sources: + objname = self.object_filename_from_source(extobj.target, osrc) + objpath = os.path.join(proj_dir_to_build_root, targetdir, objname) + result.append(objpath) + + return result + + def get_pch_include_args(self, compiler, target): + args = [] + pchpath = self.get_target_private_dir(target) + includeargs = compiler.get_include_args(pchpath, False) + p = target.get_pch(compiler.get_language()) + if p: + args += compiler.get_pch_use_args(pchpath, p[0]) + return includeargs + args + + def create_msvc_pch_implementation(self, target, lang, pch_header): + # We have to include the language in the file name, otherwise + # pch.c and pch.cpp will both end up as pch.obj in VS backends. + impl_name = 'meson_pch-{}.{}'.format(lang, lang) + pch_rel_to_build = os.path.join(self.get_target_private_dir(target), impl_name) + # Make sure to prepend the build dir, since the working directory is + # not defined. Otherwise, we might create the file in the wrong path. + pch_file = os.path.join(self.build_dir, pch_rel_to_build) + os.makedirs(os.path.dirname(pch_file), exist_ok=True) + + content = '#include "{}"'.format(os.path.basename(pch_header)) + pch_file_tmp = pch_file + '.tmp' + with open(pch_file_tmp, 'w') as f: + f.write(content) + mesonlib.replace_if_different(pch_file, pch_file_tmp) + return pch_rel_to_build + + @staticmethod + def escape_extra_args(compiler, args): + # all backslashes in defines are doubly-escaped + extra_args = [] + for arg in args: + if arg.startswith('-D') or arg.startswith('/D'): + arg = arg.replace('\\', '\\\\') + extra_args.append(arg) + + return extra_args + + def generate_basic_compiler_args(self, target, compiler, no_warn_args=False): + # Create an empty commands list, and start adding arguments from + # various sources in the order in which they must override each other + # starting from hard-coded defaults followed by build options and so on. + commands = compiler.compiler_args() + + copt_proxy = self.get_compiler_options_for_target(target)[compiler.language] + # First, the trivial ones that are impossible to override. + # + # Add -nostdinc/-nostdinc++ if needed; can't be overridden + commands += self.get_cross_stdlib_args(target, compiler) + # Add things like /NOLOGO or -pipe; usually can't be overridden + commands += compiler.get_always_args() + # Only add warning-flags by default if the buildtype enables it, and if + # we weren't explicitly asked to not emit warnings (for Vala, f.ex) + if no_warn_args: + commands += compiler.get_no_warn_args() + elif self.get_option_for_target('buildtype', target) != 'plain': + commands += compiler.get_warn_args(self.get_option_for_target('warning_level', target)) + # Add -Werror if werror=true is set in the build options set on the + # command-line or default_options inside project(). This only sets the + # action to be done for warnings if/when they are emitted, so it's ok + # to set it after get_no_warn_args() or get_warn_args(). + if self.get_option_for_target('werror', target): + commands += compiler.get_werror_args() + # Add compile args for c_* or cpp_* build options set on the + # command-line or default_options inside project(). + commands += compiler.get_option_compile_args(copt_proxy) + # Add buildtype args: optimization level, debugging, etc. + commands += compiler.get_buildtype_args(self.get_option_for_target('buildtype', target)) + commands += compiler.get_optimization_args(self.get_option_for_target('optimization', target)) + commands += compiler.get_debug_args(self.get_option_for_target('debug', target)) + # Add compile args added using add_project_arguments() + commands += self.build.get_project_args(compiler, target.subproject, target.for_machine) + # Add compile args added using add_global_arguments() + # These override per-project arguments + commands += self.build.get_global_args(compiler, target.for_machine) + # Compile args added from the env: CFLAGS/CXXFLAGS, etc, or the cross + # file. We want these to override all the defaults, but not the + # per-target compile args. + commands += self.environment.coredata.get_external_args(target.for_machine, compiler.get_language()) + # Always set -fPIC for shared libraries + if isinstance(target, build.SharedLibrary): + commands += compiler.get_pic_args() + # Set -fPIC for static libraries by default unless explicitly disabled + if isinstance(target, build.StaticLibrary) and target.pic: + commands += compiler.get_pic_args() + if isinstance(target, build.Executable) and target.pie: + commands += compiler.get_pie_args() + # Add compile args needed to find external dependencies. Link args are + # added while generating the link command. + # NOTE: We must preserve the order in which external deps are + # specified, so we reverse the list before iterating over it. + for dep in reversed(target.get_external_deps()): + if not dep.found(): + continue + + if compiler.language == 'vala': + if isinstance(dep, dependencies.PkgConfigDependency): + if dep.name == 'glib-2.0' and dep.version_reqs is not None: + for req in dep.version_reqs: + if req.startswith(('>=', '==')): + commands += ['--target-glib', req[2:]] + break + commands += ['--pkg', dep.name] + elif isinstance(dep, dependencies.ExternalLibrary): + commands += dep.get_link_args('vala') + else: + commands += compiler.get_dependency_compile_args(dep) + # Qt needs -fPIC for executables + # XXX: We should move to -fPIC for all executables + if isinstance(target, build.Executable): + commands += dep.get_exe_args(compiler) + # For 'automagic' deps: Boost and GTest. Also dependency('threads'). + # pkg-config puts the thread flags itself via `Cflags:` + # Fortran requires extra include directives. + if compiler.language == 'fortran': + for lt in target.link_targets: + priv_dir = self.get_target_private_dir(lt) + commands += compiler.get_include_args(priv_dir, False) + return commands + + def build_target_link_arguments(self, compiler, deps): + args = [] + for d in deps: + if not (d.is_linkable_target()): + raise RuntimeError('Tried to link with a non-library target "{}".'.format(d.get_basename())) + arg = self.get_target_filename_for_linking(d) + if not arg: + continue + if compiler.get_language() == 'd': + arg = '-Wl,' + arg + else: + arg = compiler.get_linker_lib_prefix() + arg + args.append(arg) + return args + + def get_mingw_extra_paths(self, target): + paths = OrderedSet() + # The cross bindir + root = self.environment.properties[target.for_machine].get_root() + if root: + paths.add(os.path.join(root, 'bin')) + # The toolchain bindir + sys_root = self.environment.properties[target.for_machine].get_sys_root() + if sys_root: + paths.add(os.path.join(sys_root, 'bin')) + # Get program and library dirs from all target compilers + if isinstance(target, build.BuildTarget): + for cc in target.compilers.values(): + paths.update(cc.get_program_dirs(self.environment)) + paths.update(cc.get_library_dirs(self.environment)) + return list(paths) + + def determine_windows_extra_paths(self, target: T.Union[build.BuildTarget, str], extra_bdeps): + '''On Windows there is no such thing as an rpath. + We must determine all locations of DLLs that this exe + links to and return them so they can be used in unit + tests.''' + result = set() + prospectives = set() + if isinstance(target, build.BuildTarget): + prospectives.update(target.get_transitive_link_deps()) + # External deps + for deppath in self.rpaths_for_bundled_shared_libraries(target, exclude_system=False): + result.add(os.path.normpath(os.path.join(self.environment.get_build_dir(), deppath))) + for bdep in extra_bdeps: + prospectives.add(bdep) + prospectives.update(bdep.get_transitive_link_deps()) + # Internal deps + for ld in prospectives: + if ld == '' or ld == '.': + continue + dirseg = os.path.join(self.environment.get_build_dir(), self.get_target_dir(ld)) + result.add(dirseg) + if (isinstance(target, build.BuildTarget) and + not self.environment.machines.matches_build_machine(target.for_machine)): + result.update(self.get_mingw_extra_paths(target)) + return list(result) + + def write_benchmark_file(self, datafile): + self.write_test_serialisation(self.build.get_benchmarks(), datafile) + + def write_test_file(self, datafile): + self.write_test_serialisation(self.build.get_tests(), datafile) + + def create_test_serialisation(self, tests): + arr = [] + for t in sorted(tests, key=lambda tst: -1 * tst.priority): + exe = t.get_exe() + if isinstance(exe, dependencies.ExternalProgram): + cmd = exe.get_command() + else: + cmd = [os.path.join(self.environment.get_build_dir(), self.get_target_filename(t.get_exe()))] + if isinstance(exe, (build.BuildTarget, dependencies.ExternalProgram)): + test_for_machine = exe.for_machine + else: + # E.g. an external verifier or simulator program run on a generated executable. + # Can always be run without a wrapper. + test_for_machine = MachineChoice.BUILD + + # we allow passing compiled executables to tests, which may be cross built. + # We need to consider these as well when considering whether the target is cross or not. + for a in t.cmd_args: + if isinstance(a, build.BuildTarget): + if a.for_machine is MachineChoice.HOST: + test_for_machine = MachineChoice.HOST + break + + is_cross = self.environment.is_cross_build(test_for_machine) + if is_cross and self.environment.need_exe_wrapper(): + exe_wrapper = self.environment.get_exe_wrapper() + else: + exe_wrapper = None + machine = self.environment.machines[exe.for_machine] + if machine.is_windows() or machine.is_cygwin(): + extra_bdeps = [] + if isinstance(exe, build.CustomTarget): + extra_bdeps = exe.get_transitive_build_target_deps() + extra_paths = self.determine_windows_extra_paths(exe, extra_bdeps) + else: + extra_paths = [] + + cmd_args = [] + for a in unholder(t.cmd_args): + if isinstance(a, build.BuildTarget): + extra_paths += self.determine_windows_extra_paths(a, []) + if isinstance(a, mesonlib.File): + a = os.path.join(self.environment.get_build_dir(), a.rel_to_builddir(self.build_to_src)) + cmd_args.append(a) + elif isinstance(a, str): + cmd_args.append(a) + elif isinstance(a, build.Executable): + p = self.construct_target_rel_path(a, t.workdir) + if p == a.get_filename(): + p = './' + p + cmd_args.append(p) + elif isinstance(a, build.Target): + cmd_args.append(self.construct_target_rel_path(a, t.workdir)) + else: + raise MesonException('Bad object in test command.') + ts = TestSerialisation(t.get_name(), t.project_name, t.suite, cmd, is_cross, + exe_wrapper, self.environment.need_exe_wrapper(), + t.is_parallel, cmd_args, t.env, + t.should_fail, t.timeout, t.workdir, + extra_paths, t.protocol, t.priority, + isinstance(exe, build.Executable)) + arr.append(ts) + return arr + + def write_test_serialisation(self, tests, datafile): + pickle.dump(self.create_test_serialisation(tests), datafile) + + def construct_target_rel_path(self, a, workdir): + if workdir is None: + return self.get_target_filename(a) + assert(os.path.isabs(workdir)) + abs_path = self.get_target_filename_abs(a) + return os.path.relpath(abs_path, workdir) + + def generate_depmf_install(self, d): + if self.build.dep_manifest_name is None: + return + ifilename = os.path.join(self.environment.get_build_dir(), 'depmf.json') + ofilename = os.path.join(self.environment.get_prefix(), self.build.dep_manifest_name) + mfobj = {'type': 'dependency manifest', 'version': '1.0', 'projects': self.build.dep_manifest} + with open(ifilename, 'w') as f: + f.write(json.dumps(mfobj)) + # Copy file from, to, and with mode unchanged + d.data.append([ifilename, ofilename, None]) + + def get_regen_filelist(self): + '''List of all files whose alteration means that the build + definition needs to be regenerated.''' + deps = [os.path.join(self.build_to_src, df) + for df in self.interpreter.get_build_def_files()] + if self.environment.is_cross_build(): + deps.extend(self.environment.coredata.cross_files) + deps.extend(self.environment.coredata.config_files) + deps.append('meson-private/coredata.dat') + self.check_clock_skew(deps) + return deps + + def check_clock_skew(self, file_list): + # If a file that leads to reconfiguration has a time + # stamp in the future, it will trigger an eternal reconfigure + # loop. + import time + now = time.time() + for f in file_list: + absf = os.path.join(self.environment.get_build_dir(), f) + ftime = os.path.getmtime(absf) + delta = ftime - now + # On Windows disk time stamps sometimes point + # to the future by a minuscule amount, less than + # 0.001 seconds. I don't know why. + if delta > 0.001: + raise MesonException('Clock skew detected. File {} has a time stamp {:.4f}s in the future.'.format(absf, delta)) + + def build_target_to_cmd_array(self, bt): + if isinstance(bt, build.BuildTarget): + if isinstance(bt, build.Executable) and bt.for_machine is not MachineChoice.BUILD: + if (self.environment.is_cross_build() and + self.environment.exe_wrapper is None and + self.environment.need_exe_wrapper()): + s = textwrap.dedent(''' + Cannot use target {} as a generator because it is built for the + host machine and no exe wrapper is defined or needs_exe_wrapper is + true. You might want to set `native: true` instead to build it for + the build machine.'''.format(bt.name)) + raise MesonException(s) + arr = [os.path.join(self.environment.get_build_dir(), self.get_target_filename(bt))] + else: + arr = bt.get_command() + return arr + + def replace_extra_args(self, args, genlist): + final_args = [] + for a in args: + if a == '@EXTRA_ARGS@': + final_args += genlist.get_extra_args() + else: + final_args.append(a) + return final_args + + def replace_outputs(self, args, private_dir, output_list): + newargs = [] + regex = re.compile(r'@OUTPUT(\d+)@') + for arg in args: + m = regex.search(arg) + while m is not None: + index = int(m.group(1)) + src = '@OUTPUT{}@'.format(index) + arg = arg.replace(src, os.path.join(private_dir, output_list[index])) + m = regex.search(arg) + newargs.append(arg) + return newargs + + def get_build_by_default_targets(self): + result = OrderedDict() + # Get all build and custom targets that must be built by default + for name, t in self.build.get_targets().items(): + if t.build_by_default: + result[name] = t + # Get all targets used as test executables and arguments. These must + # also be built by default. XXX: Sometime in the future these should be + # built only before running tests. + for t in self.build.get_tests(): + exe = unholder(t.exe) + if isinstance(exe, (build.CustomTarget, build.BuildTarget)): + result[exe.get_id()] = exe + for arg in unholder(t.cmd_args): + if not isinstance(arg, (build.CustomTarget, build.BuildTarget)): + continue + result[arg.get_id()] = arg + for dep in t.depends: + assert isinstance(dep, (build.CustomTarget, build.BuildTarget)) + result[dep.get_id()] = dep + return result + + @lru_cache(maxsize=None) + def get_custom_target_provided_by_generated_source(self, generated_source): + libs = [] + for f in generated_source.get_outputs(): + if self.environment.is_library(f): + libs.append(os.path.join(self.get_target_dir(generated_source), f)) + return libs + + @lru_cache(maxsize=None) + def get_custom_target_provided_libraries(self, target): + libs = [] + for t in target.get_generated_sources(): + if not isinstance(t, build.CustomTarget): + continue + l = self.get_custom_target_provided_by_generated_source(t) + libs = libs + l + return libs + + def is_unity(self, target): + optval = self.get_option_for_target('unity', target) + if optval == 'on' or (optval == 'subprojects' and target.subproject != ''): + return True + return False + + def get_custom_target_sources(self, target): + ''' + Custom target sources can be of various object types; strings, File, + BuildTarget, even other CustomTargets. + Returns the path to them relative to the build root directory. + ''' + srcs = [] + for i in unholder(target.get_sources()): + if isinstance(i, str): + fname = [os.path.join(self.build_to_src, target.subdir, i)] + elif isinstance(i, build.BuildTarget): + fname = [self.get_target_filename(i)] + elif isinstance(i, (build.CustomTarget, build.CustomTargetIndex)): + fname = [os.path.join(self.get_target_dir(i), p) for p in i.get_outputs()] + elif isinstance(i, build.GeneratedList): + fname = [os.path.join(self.get_target_private_dir(target), p) for p in i.get_outputs()] + elif isinstance(i, build.ExtractedObjects): + fname = [os.path.join(self.get_target_private_dir(i.target), p) for p in i.get_outputs(self)] + else: + fname = [i.rel_to_builddir(self.build_to_src)] + if target.absolute_paths: + fname = [os.path.join(self.environment.get_build_dir(), f) for f in fname] + srcs += fname + return srcs + + def get_custom_target_depend_files(self, target, absolute_paths=False): + deps = [] + for i in target.depend_files: + if isinstance(i, mesonlib.File): + if absolute_paths: + deps.append(i.absolute_path(self.environment.get_source_dir(), + self.environment.get_build_dir())) + else: + deps.append(i.rel_to_builddir(self.build_to_src)) + else: + if absolute_paths: + deps.append(os.path.join(self.environment.get_source_dir(), target.subdir, i)) + else: + deps.append(os.path.join(self.build_to_src, target.subdir, i)) + return deps + + def eval_custom_target_command(self, target, absolute_outputs=False): + # We want the outputs to be absolute only when using the VS backend + # XXX: Maybe allow the vs backend to use relative paths too? + source_root = self.build_to_src + build_root = '.' + outdir = self.get_target_dir(target) + if absolute_outputs: + source_root = self.environment.get_source_dir() + build_root = self.environment.get_build_dir() + outdir = os.path.join(self.environment.get_build_dir(), outdir) + outputs = [] + for i in target.get_outputs(): + outputs.append(os.path.join(outdir, i)) + inputs = self.get_custom_target_sources(target) + # Evaluate the command list + cmd = [] + for i in target.command: + if isinstance(i, build.BuildTarget): + cmd += self.build_target_to_cmd_array(i) + continue + elif isinstance(i, build.CustomTarget): + # GIR scanner will attempt to execute this binary but + # it assumes that it is in path, so always give it a full path. + tmp = i.get_outputs()[0] + i = os.path.join(self.get_target_dir(i), tmp) + elif isinstance(i, mesonlib.File): + i = i.rel_to_builddir(self.build_to_src) + if target.absolute_paths: + i = os.path.join(self.environment.get_build_dir(), i) + # FIXME: str types are blindly added ignoring 'target.absolute_paths' + # because we can't know if they refer to a file or just a string + elif not isinstance(i, str): + err_msg = 'Argument {0} is of unknown type {1}' + raise RuntimeError(err_msg.format(str(i), str(type(i)))) + else: + if '@SOURCE_ROOT@' in i: + i = i.replace('@SOURCE_ROOT@', source_root) + if '@BUILD_ROOT@' in i: + i = i.replace('@BUILD_ROOT@', build_root) + if '@DEPFILE@' in i: + if target.depfile is None: + msg = 'Custom target {!r} has @DEPFILE@ but no depfile ' \ + 'keyword argument.'.format(target.name) + raise MesonException(msg) + dfilename = os.path.join(outdir, target.depfile) + i = i.replace('@DEPFILE@', dfilename) + if '@PRIVATE_DIR@' in i: + if target.absolute_paths: + pdir = self.get_target_private_dir_abs(target) + else: + pdir = self.get_target_private_dir(target) + i = i.replace('@PRIVATE_DIR@', pdir) + if '@PRIVATE_OUTDIR_' in i: + match = re.search(r'@PRIVATE_OUTDIR_(ABS_)?([^/\s*]*)@', i) + if not match: + msg = 'Custom target {!r} has an invalid argument {!r}' \ + ''.format(target.name, i) + raise MesonException(msg) + source = match.group(0) + if match.group(1) is None and not target.absolute_paths: + lead_dir = '' + else: + lead_dir = self.environment.get_build_dir() + i = i.replace(source, os.path.join(lead_dir, outdir)) + cmd.append(i) + # Substitute the rest of the template strings + values = mesonlib.get_filenames_templates_dict(inputs, outputs) + cmd = mesonlib.substitute_values(cmd, values) + # This should not be necessary but removing it breaks + # building GStreamer on Windows. The underlying issue + # is problems with quoting backslashes on Windows + # which is the seventh circle of hell. The downside is + # that this breaks custom targets whose command lines + # have backslashes. If you try to fix this be sure to + # check that it does not break GST. + # + # The bug causes file paths such as c:\foo to get escaped + # into c:\\foo. + # + # Unfortunately we have not been able to come up with an + # isolated test case for this so unless you manage to come up + # with one, the only way is to test the building with Gst's + # setup. Note this in your MR or ping us and we will get it + # fixed. + # + # https://github.com/mesonbuild/meson/pull/737 + cmd = [i.replace('\\', '/') for i in cmd] + return inputs, outputs, cmd + + def run_postconf_scripts(self): + env = {'MESON_SOURCE_ROOT': self.environment.get_source_dir(), + 'MESON_BUILD_ROOT': self.environment.get_build_dir(), + 'MESONINTROSPECT': ' '.join([shlex.quote(x) for x in self.environment.get_build_command() + ['introspect']]), + } + child_env = os.environ.copy() + child_env.update(env) + + for s in self.build.postconf_scripts: + cmd = s['exe'] + s['args'] + subprocess.check_call(cmd, env=child_env) + + def create_install_data(self): + strip_bin = self.environment.lookup_binary_entry(MachineChoice.HOST, 'strip') + if strip_bin is None: + if self.environment.is_cross_build(): + mlog.warning('Cross file does not specify strip binary, result will not be stripped.') + else: + # TODO go through all candidates, like others + strip_bin = [self.environment.default_strip[0]] + d = InstallData(self.environment.get_source_dir(), + self.environment.get_build_dir(), + self.environment.get_prefix(), + strip_bin, + self.environment.coredata.get_builtin_option('install_umask'), + self.environment.get_build_command() + ['introspect']) + self.generate_depmf_install(d) + self.generate_target_install(d) + self.generate_header_install(d) + self.generate_man_install(d) + self.generate_data_install(d) + self.generate_custom_install_script(d) + self.generate_subdir_install(d) + return d + + def create_install_data_files(self): + install_data_file = os.path.join(self.environment.get_scratch_dir(), 'install.dat') + with open(install_data_file, 'wb') as ofile: + pickle.dump(self.create_install_data(), ofile) + + def generate_target_install(self, d): + for t in self.build.get_targets().values(): + if not t.should_install(): + continue + outdirs, custom_install_dir = t.get_install_dir(self.environment) + # Sanity-check the outputs and install_dirs + num_outdirs, num_out = len(outdirs), len(t.get_outputs()) + if num_outdirs != 1 and num_outdirs != num_out: + m = 'Target {!r} has {} outputs: {!r}, but only {} "install_dir"s were found.\n' \ + "Pass 'false' for outputs that should not be installed and 'true' for\n" \ + 'using the default installation directory for an output.' + raise MesonException(m.format(t.name, num_out, t.get_outputs(), num_outdirs)) + install_mode = t.get_custom_install_mode() + # Install the target output(s) + if isinstance(t, build.BuildTarget): + # In general, stripping static archives is tricky and full of pitfalls. + # Wholesale stripping of static archives with a command such as + # + # strip libfoo.a + # + # is broken, as GNU's strip will remove *every* symbol in a static + # archive. One solution to this nonintuitive behaviour would be + # to only strip local/debug symbols. Unfortunately, strip arguments + # are not specified by POSIX and therefore not portable. GNU's `-g` + # option (i.e. remove debug symbols) is equivalent to Apple's `-S`. + # + # TODO: Create GNUStrip/AppleStrip/etc. hierarchy for more + # fine-grained stripping of static archives. + should_strip = not isinstance(t, build.StaticLibrary) and self.get_option_for_target('strip', t) + # Install primary build output (library/executable/jar, etc) + # Done separately because of strip/aliases/rpath + if outdirs[0] is not False: + mappings = t.get_link_deps_mapping(d.prefix, self.environment) + i = TargetInstallData(self.get_target_filename(t), outdirs[0], + t.get_aliases(), should_strip, mappings, + t.rpath_dirs_to_remove, + t.install_rpath, install_mode) + d.targets.append(i) + + if isinstance(t, (build.SharedLibrary, build.SharedModule, build.Executable)): + # On toolchains/platforms that use an import library for + # linking (separate from the shared library with all the + # code), we need to install that too (dll.a/.lib). + if t.get_import_filename(): + if custom_install_dir: + # If the DLL is installed into a custom directory, + # install the import library into the same place so + # it doesn't go into a surprising place + implib_install_dir = outdirs[0] + else: + implib_install_dir = self.environment.get_import_lib_dir() + # Install the import library; may not exist for shared modules + i = TargetInstallData(self.get_target_filename_for_linking(t), + implib_install_dir, {}, False, {}, set(), '', install_mode, + optional=isinstance(t, build.SharedModule)) + d.targets.append(i) + + if not should_strip and t.get_debug_filename(): + debug_file = os.path.join(self.get_target_dir(t), t.get_debug_filename()) + i = TargetInstallData(debug_file, outdirs[0], + {}, False, {}, set(), '', + install_mode, optional=True) + d.targets.append(i) + # Install secondary outputs. Only used for Vala right now. + if num_outdirs > 1: + for output, outdir in zip(t.get_outputs()[1:], outdirs[1:]): + # User requested that we not install this output + if outdir is False: + continue + f = os.path.join(self.get_target_dir(t), output) + i = TargetInstallData(f, outdir, {}, False, {}, set(), None, install_mode) + d.targets.append(i) + elif isinstance(t, build.CustomTarget): + # If only one install_dir is specified, assume that all + # outputs will be installed into it. This is for + # backwards-compatibility and because it makes sense to + # avoid repetition since this is a common use-case. + # + # To selectively install only some outputs, pass `false` as + # the install_dir for the corresponding output by index + if num_outdirs == 1 and num_out > 1: + for output in t.get_outputs(): + f = os.path.join(self.get_target_dir(t), output) + i = TargetInstallData(f, outdirs[0], {}, False, {}, set(), None, install_mode, + optional=not t.build_by_default) + d.targets.append(i) + else: + for output, outdir in zip(t.get_outputs(), outdirs): + # User requested that we not install this output + if outdir is False: + continue + f = os.path.join(self.get_target_dir(t), output) + i = TargetInstallData(f, outdir, {}, False, {}, set(), None, install_mode, + optional=not t.build_by_default) + d.targets.append(i) + + def generate_custom_install_script(self, d): + result = [] + srcdir = self.environment.get_source_dir() + builddir = self.environment.get_build_dir() + for i in self.build.install_scripts: + exe = i['exe'] + args = i['args'] + fixed_args = [] + for a in args: + a = a.replace('@SOURCE_ROOT@', srcdir) + a = a.replace('@BUILD_ROOT@', builddir) + fixed_args.append(a) + result.append(build.RunScript(exe, fixed_args)) + d.install_scripts = result + + def generate_header_install(self, d): + incroot = self.environment.get_includedir() + headers = self.build.get_headers() + + srcdir = self.environment.get_source_dir() + builddir = self.environment.get_build_dir() + for h in headers: + outdir = h.get_custom_install_dir() + if outdir is None: + outdir = os.path.join(incroot, h.get_install_subdir()) + for f in h.get_sources(): + if not isinstance(f, File): + msg = 'Invalid header type {!r} can\'t be installed' + raise MesonException(msg.format(f)) + abspath = f.absolute_path(srcdir, builddir) + i = [abspath, outdir, h.get_custom_install_mode()] + d.headers.append(i) + + def generate_man_install(self, d): + manroot = self.environment.get_mandir() + man = self.build.get_man() + for m in man: + for f in m.get_sources(): + num = f.split('.')[-1] + subdir = m.get_custom_install_dir() + if subdir is None: + subdir = os.path.join(manroot, 'man' + num) + srcabs = f.absolute_path(self.environment.get_source_dir(), self.environment.get_build_dir()) + dstabs = os.path.join(subdir, os.path.basename(f.fname)) + i = [srcabs, dstabs, m.get_custom_install_mode()] + d.man.append(i) + + def generate_data_install(self, d): + data = self.build.get_data() + srcdir = self.environment.get_source_dir() + builddir = self.environment.get_build_dir() + for de in data: + assert(isinstance(de, build.Data)) + subdir = de.install_dir + if not subdir: + subdir = os.path.join(self.environment.get_datadir(), self.interpreter.build.project_name) + for src_file, dst_name in zip(de.sources, de.rename): + assert(isinstance(src_file, mesonlib.File)) + dst_abs = os.path.join(subdir, dst_name) + i = [src_file.absolute_path(srcdir, builddir), dst_abs, de.install_mode] + d.data.append(i) + + def generate_subdir_install(self, d): + for sd in self.build.get_install_subdirs(): + src_dir = os.path.join(self.environment.get_source_dir(), + sd.source_subdir, + sd.installable_subdir).rstrip('/') + dst_dir = os.path.join(self.environment.get_prefix(), + sd.install_dir) + if not sd.strip_directory: + dst_dir = os.path.join(dst_dir, os.path.basename(src_dir)) + d.install_subdirs.append([src_dir, dst_dir, sd.install_mode, + sd.exclude]) + + def get_introspection_data(self, target_id, target): + ''' + Returns a list of source dicts with the following format for a given target: + [ + { + "language": "", + "compiler": ["result", "of", "comp.get_exelist()"], + "parameters": ["list", "of", "compiler", "parameters], + "sources": ["list", "of", "all", "", "source", "files"], + "generated_sources": ["list", "of", "generated", "source", "files"] + } + ] + + This is a limited fallback / reference implementation. The backend should override this method. + ''' + if isinstance(target, (build.CustomTarget, build.BuildTarget)): + source_list_raw = target.sources + target.extra_files + source_list = [] + for j in source_list_raw: + if isinstance(j, mesonlib.File): + source_list += [j.absolute_path(self.source_dir, self.build_dir)] + elif isinstance(j, str): + source_list += [os.path.join(self.source_dir, j)] + source_list = list(map(lambda x: os.path.normpath(x), source_list)) + + compiler = [] + if isinstance(target, build.CustomTarget): + tmp_compiler = target.command + if not isinstance(compiler, list): + tmp_compiler = [compiler] + for j in tmp_compiler: + if isinstance(j, mesonlib.File): + compiler += [j.absolute_path(self.source_dir, self.build_dir)] + elif isinstance(j, str): + compiler += [j] + elif isinstance(j, (build.BuildTarget, build.CustomTarget)): + compiler += j.get_outputs() + else: + raise RuntimeError('Type "{}" is not supported in get_introspection_data. This is a bug'.format(type(j).__name__)) + + return [{ + 'language': 'unknown', + 'compiler': compiler, + 'parameters': [], + 'sources': source_list, + 'generated_sources': [] + }] + + return [] diff --git a/meson/mesonbuild/backend/ninjabackend.py b/meson/mesonbuild/backend/ninjabackend.py new file mode 100644 index 000000000..8afb80ed6 --- /dev/null +++ b/meson/mesonbuild/backend/ninjabackend.py @@ -0,0 +1,3098 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import typing as T +import os +import re +import pickle +import shlex +import subprocess +from collections import OrderedDict +from enum import Enum, unique +import itertools +from pathlib import PurePath, Path +from functools import lru_cache + +from . import backends +from .. import modules +from .. import environment, mesonlib +from .. import build +from .. import mlog +from .. import dependencies +from .. import compilers +from ..arglist import CompilerArgs +from ..compilers import ( + Compiler, CCompiler, + DmdDCompiler, + FortranCompiler, PGICCompiler, + VisualStudioCsCompiler, + VisualStudioLikeCompiler, +) +from ..linkers import ArLinker, VisualStudioLinker +from ..mesonlib import ( + File, LibType, MachineChoice, MesonException, OrderedSet, PerMachine, + ProgressBar, quote_arg, unholder, +) +from ..mesonlib import get_compiler_for_source, has_path_sep +from .backends import CleanTrees +from ..build import InvalidArguments +from ..interpreter import Interpreter + +FORTRAN_INCLUDE_PAT = r"^\s*#?include\s*['\"](\w+\.\w+)['\"]" +FORTRAN_MODULE_PAT = r"^\s*\bmodule\b\s+(\w+)\s*(?:!+.*)*$" +FORTRAN_SUBMOD_PAT = r"^\s*\bsubmodule\b\s*\((\w+:?\w+)\)\s*(\w+)" +FORTRAN_USE_PAT = r"^\s*use,?\s*(?:non_intrinsic)?\s*(?:::)?\s*(\w+)" + +def cmd_quote(s): + # see: https://docs.microsoft.com/en-us/windows/desktop/api/shellapi/nf-shellapi-commandlinetoargvw#remarks + + # backslash escape any existing double quotes + # any existing backslashes preceding a quote are doubled + s = re.sub(r'(\\*)"', lambda m: '\\' * (len(m.group(1)) * 2 + 1) + '"', s) + # any terminal backslashes likewise need doubling + s = re.sub(r'(\\*)$', lambda m: '\\' * (len(m.group(1)) * 2), s) + # and double quote + s = '"{}"'.format(s) + + return s + +def gcc_rsp_quote(s): + # see: the function buildargv() in libiberty + # + # this differs from sh-quoting in that a backslash *always* escapes the + # following character, even inside single quotes. + + s = s.replace('\\', '\\\\') + + return shlex.quote(s) + +# How ninja executes command lines differs between Unix and Windows +# (see https://ninja-build.org/manual.html#ref_rule_command) +if mesonlib.is_windows(): + quote_func = cmd_quote + execute_wrapper = ['cmd', '/c'] # unused + rmfile_prefix = ['del', '/f', '/s', '/q', '{}', '&&'] +else: + quote_func = quote_arg + execute_wrapper = [] + rmfile_prefix = ['rm', '-f', '{}', '&&'] + +def get_rsp_threshold(): + '''Return a conservative estimate of the commandline size in bytes + above which a response file should be used. May be overridden for + debugging by setting environment variable MESON_RSP_THRESHOLD.''' + + if mesonlib.is_windows(): + # Usually 32k, but some projects might use cmd.exe, + # and that has a limit of 8k. + limit = 8192 + else: + # On Linux, ninja always passes the commandline as a single + # big string to /bin/sh, and the kernel limits the size of a + # single argument; see MAX_ARG_STRLEN + limit = 131072 + # Be conservative + limit = limit / 2 + return int(os.environ.get('MESON_RSP_THRESHOLD', limit)) + +# a conservative estimate of the command-line length limit +rsp_threshold = get_rsp_threshold() + +# ninja variables whose value should remain unquoted. The value of these ninja +# variables (or variables we use them in) is interpreted directly by ninja +# (e.g. the value of the depfile variable is a pathname that ninja will read +# from, etc.), so it must not be shell quoted. +raw_names = {'DEPFILE_UNQUOTED', 'DESC', 'pool', 'description', 'targetdep'} + +def ninja_quote(text, is_build_line=False): + if is_build_line: + qcs = ('$', ' ', ':') + else: + qcs = ('$', ' ') + for char in qcs: + text = text.replace(char, '$' + char) + if '\n' in text: + errmsg = '''Ninja does not support newlines in rules. The content was: + +{} + +Please report this error with a test case to the Meson bug tracker.'''.format(text) + raise MesonException(errmsg) + return text + +@unique +class Quoting(Enum): + both = 0 + notShell = 1 + notNinja = 2 + none = 3 + +class NinjaCommandArg: + def __init__(self, s, quoting = Quoting.both): + self.s = s + self.quoting = quoting + + def __str__(self): + return self.s + + @staticmethod + def list(l, q): + return [NinjaCommandArg(i, q) for i in l] + +class NinjaComment: + def __init__(self, comment): + self.comment = comment + + def write(self, outfile): + for l in self.comment.split('\n'): + outfile.write('# ') + outfile.write(l) + outfile.write('\n') + outfile.write('\n') + +class NinjaRule: + def __init__(self, rule, command, args, description, + rspable = False, deps = None, depfile = None, extra = None, + rspfile_quote_style = 'gcc'): + + def strToCommandArg(c): + if isinstance(c, NinjaCommandArg): + return c + + # deal with common cases here, so we don't have to explicitly + # annotate the required quoting everywhere + if c == '&&': + # shell constructs shouldn't be shell quoted + return NinjaCommandArg(c, Quoting.notShell) + if c.startswith('$'): + var = re.search(r'\$\{?(\w*)\}?', c).group(1) + if var not in raw_names: + # ninja variables shouldn't be ninja quoted, and their value + # is already shell quoted + return NinjaCommandArg(c, Quoting.none) + else: + # shell quote the use of ninja variables whose value must + # not be shell quoted (as it also used by ninja) + return NinjaCommandArg(c, Quoting.notNinja) + + return NinjaCommandArg(c) + + self.name = rule + self.command = list(map(strToCommandArg, command)) # includes args which never go into a rspfile + self.args = list(map(strToCommandArg, args)) # args which will go into a rspfile, if used + self.description = description + self.deps = deps # depstyle 'gcc' or 'msvc' + self.depfile = depfile + self.extra = extra + self.rspable = rspable # if a rspfile can be used + self.refcount = 0 + self.rsprefcount = 0 + self.rspfile_quote_style = rspfile_quote_style # rspfile quoting style is 'gcc' or 'cl' + + if self.depfile == '$DEPFILE': + self.depfile += '_UNQUOTED' + + @staticmethod + def _quoter(x, qf = quote_func): + if isinstance(x, NinjaCommandArg): + if x.quoting == Quoting.none: + return x.s + elif x.quoting == Quoting.notNinja: + return qf(x.s) + elif x.quoting == Quoting.notShell: + return ninja_quote(x.s) + # fallthrough + return ninja_quote(qf(str(x))) + + def write(self, outfile): + if self.rspfile_quote_style == 'cl': + rspfile_quote_func = cmd_quote + else: + rspfile_quote_func = gcc_rsp_quote + + def rule_iter(): + if self.refcount: + yield '' + if self.rsprefcount: + yield '_RSP' + + for rsp in rule_iter(): + outfile.write('rule {}{}\n'.format(self.name, rsp)) + if rsp == '_RSP': + outfile.write(' command = {} @$out.rsp\n'.format(' '.join([self._quoter(x) for x in self.command]))) + outfile.write(' rspfile = $out.rsp\n') + outfile.write(' rspfile_content = {}\n'.format(' '.join([self._quoter(x, rspfile_quote_func) for x in self.args]))) + else: + outfile.write(' command = {}\n'.format(' '.join([self._quoter(x) for x in (self.command + self.args)]))) + if self.deps: + outfile.write(' deps = {}\n'.format(self.deps)) + if self.depfile: + outfile.write(' depfile = {}\n'.format(self.depfile)) + outfile.write(' description = {}\n'.format(self.description)) + if self.extra: + for l in self.extra.split('\n'): + outfile.write(' ') + outfile.write(l) + outfile.write('\n') + outfile.write('\n') + + def length_estimate(self, infiles, outfiles, elems): + # determine variables + # this order of actions only approximates ninja's scoping rules, as + # documented at: https://ninja-build.org/manual.html#ref_scope + ninja_vars = {} + for e in elems: + (name, value) = e + ninja_vars[name] = value + ninja_vars['deps'] = self.deps + ninja_vars['depfile'] = self.depfile + ninja_vars['in'] = infiles + ninja_vars['out'] = outfiles + + # expand variables in command + command = ' '.join([self._quoter(x) for x in self.command + self.args]) + expanded_command = '' + for m in re.finditer(r'(\${\w*})|(\$\w*)|([^$]*)', command): + chunk = m.group() + if chunk.startswith('$'): + chunk = chunk[1:] + chunk = re.sub(r'{(.*)}', r'\1', chunk) + chunk = ninja_vars.get(chunk, []) # undefined ninja variables are empty + chunk = ' '.join(chunk) + expanded_command += chunk + + # determine command length + return len(expanded_command) + +class NinjaBuildElement: + def __init__(self, all_outputs, outfilenames, rulename, infilenames, implicit_outs=None): + self.implicit_outfilenames = implicit_outs or [] + if isinstance(outfilenames, str): + self.outfilenames = [outfilenames] + else: + self.outfilenames = outfilenames + assert(isinstance(rulename, str)) + self.rulename = rulename + if isinstance(infilenames, str): + self.infilenames = [infilenames] + else: + self.infilenames = infilenames + self.deps = OrderedSet() + self.orderdeps = OrderedSet() + self.elems = [] + self.all_outputs = all_outputs + + def add_dep(self, dep): + if isinstance(dep, list): + self.deps.update(dep) + else: + self.deps.add(dep) + + def add_orderdep(self, dep): + if isinstance(dep, list): + self.orderdeps.update(dep) + else: + self.orderdeps.add(dep) + + def add_item(self, name, elems): + # Always convert from GCC-style argument naming to the naming used by the + # current compiler. Also filter system include paths, deduplicate, etc. + if isinstance(elems, CompilerArgs): + elems = elems.to_native() + if isinstance(elems, str): + elems = [elems] + self.elems.append((name, elems)) + + if name == 'DEPFILE': + self.elems.append((name + '_UNQUOTED', elems)) + + def _should_use_rspfile(self): + # 'phony' is a rule built-in to ninja + if self.rulename == 'phony': + return False + + if not self.rule.rspable: + return False + + infilenames = ' '.join([ninja_quote(i, True) for i in self.infilenames]) + outfilenames = ' '.join([ninja_quote(i, True) for i in self.outfilenames]) + + return self.rule.length_estimate(infilenames, + outfilenames, + self.elems) >= rsp_threshold + + def count_rule_references(self): + if self.rulename != 'phony': + if self._should_use_rspfile(): + self.rule.rsprefcount += 1 + else: + self.rule.refcount += 1 + + def write(self, outfile): + self.check_outputs() + ins = ' '.join([ninja_quote(i, True) for i in self.infilenames]) + outs = ' '.join([ninja_quote(i, True) for i in self.outfilenames]) + implicit_outs = ' '.join([ninja_quote(i, True) for i in self.implicit_outfilenames]) + if implicit_outs: + implicit_outs = ' | ' + implicit_outs + use_rspfile = self._should_use_rspfile() + if use_rspfile: + rulename = self.rulename + '_RSP' + mlog.debug("Command line for building %s is long, using a response file" % self.outfilenames) + else: + rulename = self.rulename + line = 'build {}{}: {} {}'.format(outs, implicit_outs, rulename, ins) + if len(self.deps) > 0: + line += ' | ' + ' '.join([ninja_quote(x, True) for x in self.deps]) + if len(self.orderdeps) > 0: + line += ' || ' + ' '.join([ninja_quote(x, True) for x in self.orderdeps]) + line += '\n' + # This is the only way I could find to make this work on all + # platforms including Windows command shell. Slash is a dir separator + # on Windows, too, so all characters are unambiguous and, more importantly, + # do not require quoting, unless explicitly specified, which is necessary for + # the csc compiler. + line = line.replace('\\', '/') + outfile.write(line) + + if use_rspfile: + if self.rule.rspfile_quote_style == 'cl': + qf = cmd_quote + else: + qf = gcc_rsp_quote + else: + qf = quote_func + + for e in self.elems: + (name, elems) = e + should_quote = name not in raw_names + line = ' {} = '.format(name) + newelems = [] + for i in elems: + if not should_quote or i == '&&': # Hackety hack hack + quoter = ninja_quote + else: + quoter = lambda x: ninja_quote(qf(x)) + newelems.append(quoter(i)) + line += ' '.join(newelems) + line += '\n' + outfile.write(line) + outfile.write('\n') + + def check_outputs(self): + for n in self.outfilenames: + if n in self.all_outputs: + raise MesonException('Multiple producers for Ninja target "{}". Please rename your targets.'.format(n)) + self.all_outputs[n] = True + +class NinjaBackend(backends.Backend): + + def __init__(self, build: T.Optional[build.Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'ninja' + self.ninja_filename = 'build.ninja' + self.fortran_deps = {} + self.all_outputs = {} + self.introspection_data = {} + self.created_llvm_ir_rule = PerMachine(False, False) + + def create_target_alias(self, to_target): + # We need to use aliases for targets that might be used as directory + # names to workaround a Ninja bug that breaks `ninja -t clean`. + # This is used for 'reserved' targets such as 'test', 'install', + # 'benchmark', etc, and also for RunTargets. + # https://github.com/mesonbuild/meson/issues/1644 + if not to_target.startswith('meson-'): + m = 'Invalid usage of create_target_alias with {!r}' + raise AssertionError(m.format(to_target)) + from_target = to_target[len('meson-'):] + elem = NinjaBuildElement(self.all_outputs, from_target, 'phony', to_target) + self.add_build(elem) + + def detect_vs_dep_prefix(self, tempfilename): + '''VS writes its dependency in a locale dependent format. + Detect the search prefix to use.''' + # TODO don't hard-code host + for compiler in self.environment.coredata.compilers.host.values(): + # Have to detect the dependency format + + # IFort on windows is MSVC like, but doesn't have /showincludes + if isinstance(compiler, FortranCompiler): + continue + if isinstance(compiler, PGICCompiler) and mesonlib.is_windows(): + # for the purpose of this function, PGI doesn't act enough like MSVC + return open(tempfilename, 'a', encoding='utf-8') + if isinstance(compiler, VisualStudioLikeCompiler): + break + else: + # None of our compilers are MSVC, we're done. + return open(tempfilename, 'a', encoding='utf-8') + filename = os.path.join(self.environment.get_scratch_dir(), + 'incdetect.c') + with open(filename, 'w') as f: + f.write('''#include +int dummy; +''') + + # The output of cl dependency information is language + # and locale dependent. Any attempt at converting it to + # Python strings leads to failure. We _must_ do this detection + # in raw byte mode and write the result in raw bytes. + pc = subprocess.Popen(compiler.get_exelist() + + ['/showIncludes', '/c', 'incdetect.c'], + cwd=self.environment.get_scratch_dir(), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = pc.communicate() + + # We want to match 'Note: including file: ' in the line + # 'Note: including file: d:\MyDir\include\stdio.h', however + # different locales have different messages with a different + # number of colons. Match up to the the drive name 'd:\'. + # When used in cross compilation, the path separator is a + # backslash rather than a forward slash so handle both. + matchre = re.compile(rb"^(.*\s)([a-zA-Z]:\\|\/).*stdio.h$") + + def detect_prefix(out): + for line in re.split(rb'\r?\n', out): + match = matchre.match(line) + if match: + with open(tempfilename, 'ab') as binfile: + binfile.write(b'msvc_deps_prefix = ' + match.group(1) + b'\n') + return open(tempfilename, 'a', encoding='utf-8') + return None + + # Some cl wrappers (e.g. Squish Coco) output dependency info + # to stderr rather than stdout + result = detect_prefix(stdout) or detect_prefix(stderr) + if result: + return result + + raise MesonException('Could not determine vs dep dependency prefix string.') + + def generate(self): + ninja = environment.detect_ninja_command_and_version(log=True) + if ninja is None: + raise MesonException('Could not detect Ninja v1.7 or newer') + (self.ninja_command, self.ninja_version) = ninja + outfilename = os.path.join(self.environment.get_build_dir(), self.ninja_filename) + tempfilename = outfilename + '~' + with open(tempfilename, 'w', encoding='utf-8') as outfile: + outfile.write('# This is the build file for project "{}"\n'.format(self.build.get_project())) + outfile.write('# It is autogenerated by the Meson build system.\n') + outfile.write('# Do not edit by hand.\n\n') + outfile.write('ninja_required_version = 1.7.1\n\n') + + num_pools = self.environment.coredata.backend_options['backend_max_links'].value + if num_pools > 0: + outfile.write('''pool link_pool + depth = {} + +'''.format(num_pools)) + + with self.detect_vs_dep_prefix(tempfilename) as outfile: + self.generate_rules() + + self.build_elements = [] + self.generate_phony() + self.add_build_comment(NinjaComment('Build rules for targets')) + for t in ProgressBar(self.build.get_targets().values(), desc='Generating targets'): + self.generate_target(t) + self.add_build_comment(NinjaComment('Test rules')) + self.generate_tests() + self.add_build_comment(NinjaComment('Install rules')) + self.generate_install() + self.generate_dist() + if 'b_coverage' in self.environment.coredata.base_options and \ + self.environment.coredata.base_options['b_coverage'].value: + self.add_build_comment(NinjaComment('Coverage rules')) + self.generate_coverage_rules() + self.add_build_comment(NinjaComment('Suffix')) + self.generate_utils() + self.generate_ending() + + self.write_rules(outfile) + self.write_builds(outfile) + + default = 'default all\n\n' + outfile.write(default) + # Only overwrite the old build file after the new one has been + # fully created. + os.replace(tempfilename, outfilename) + mlog.cmd_ci_include(outfilename) # For CI debugging + self.generate_compdb() + + # http://clang.llvm.org/docs/JSONCompilationDatabase.html + def generate_compdb(self): + rules = [] + # TODO: Rather than an explicit list here, rules could be marked in the + # rule store as being wanted in compdb + for for_machine in MachineChoice: + for lang in self.environment.coredata.compilers[for_machine]: + rules += [ "%s%s" % (rule, ext) for rule in [self.get_compiler_rule_name(lang, for_machine)] + for ext in ['', '_RSP']] + rules += [ "%s%s" % (rule, ext) for rule in [self.get_pch_rule_name(lang, for_machine)] + for ext in ['', '_RSP']] + compdb_options = ['-x'] if mesonlib.version_compare(self.ninja_version, '>=1.9') else [] + ninja_compdb = self.ninja_command + ['-t', 'compdb'] + compdb_options + rules + builddir = self.environment.get_build_dir() + try: + jsondb = subprocess.check_output(ninja_compdb, cwd=builddir) + with open(os.path.join(builddir, 'compile_commands.json'), 'wb') as f: + f.write(jsondb) + except Exception: + mlog.warning('Could not create compilation database.') + + # Get all generated headers. Any source file might need them so + # we need to add an order dependency to them. + def get_generated_headers(self, target): + if hasattr(target, 'cached_generated_headers'): + return target.cached_generated_headers + header_deps = [] + # XXX: Why don't we add deps to CustomTarget headers here? + for genlist in target.get_generated_sources(): + if isinstance(genlist, (build.CustomTarget, build.CustomTargetIndex)): + continue + for src in genlist.get_outputs(): + if self.environment.is_header(src): + header_deps.append(self.get_target_generated_dir(target, genlist, src)) + if 'vala' in target.compilers and not isinstance(target, build.Executable): + vala_header = File.from_built_file(self.get_target_dir(target), target.vala_header) + header_deps.append(vala_header) + # Recurse and find generated headers + for dep in itertools.chain(target.link_targets, target.link_whole_targets): + if isinstance(dep, (build.StaticLibrary, build.SharedLibrary)): + header_deps += self.get_generated_headers(dep) + target.cached_generated_headers = header_deps + return header_deps + + def get_target_generated_sources(self, target): + """ + Returns a dictionary with the keys being the path to the file + (relative to the build directory) of that type and the value + being the GeneratorList or CustomTarget that generated it. + """ + srcs = OrderedDict() + for gensrc in target.get_generated_sources(): + for s in gensrc.get_outputs(): + f = self.get_target_generated_dir(target, gensrc, s) + srcs[f] = s + return srcs + + def get_target_sources(self, target): + srcs = OrderedDict() + for s in target.get_sources(): + # BuildTarget sources are always mesonlib.File files which are + # either in the source root, or generated with configure_file and + # in the build root + if not isinstance(s, File): + raise InvalidArguments('All sources in target {!r} must be of type mesonlib.File'.format(s)) + f = s.rel_to_builddir(self.build_to_src) + srcs[f] = s + return srcs + + # Languages that can mix with C or C++ but don't support unity builds yet + # because the syntax we use for unity builds is specific to C/++/ObjC/++. + # Assembly files cannot be unitified and neither can LLVM IR files + langs_cant_unity = ('d', 'fortran') + + def get_target_source_can_unity(self, target, source): + if isinstance(source, File): + source = source.fname + if self.environment.is_llvm_ir(source) or \ + self.environment.is_assembly(source): + return False + suffix = os.path.splitext(source)[1][1:] + for lang in self.langs_cant_unity: + if lang not in target.compilers: + continue + if suffix in target.compilers[lang].file_suffixes: + return False + return True + + def create_target_source_introspection(self, target: build.Target, comp: compilers.Compiler, parameters, sources, generated_sources): + ''' + Adds the source file introspection information for a language of a target + + Internal introspection storage formart: + self.introspection_data = { + '': { + : { + 'language: 'lang', + 'compiler': ['comp', 'exe', 'list'], + 'parameters': ['UNIQUE', 'parameter', 'list'], + 'sources': [], + 'generated_sources': [], + } + } + } + ''' + tid = target.get_id() + lang = comp.get_language() + tgt = self.introspection_data[tid] + # Find an existing entry or create a new one + id_hash = (lang, tuple(parameters)) + src_block = tgt.get(id_hash, None) + if src_block is None: + # Convert parameters + if isinstance(parameters, CompilerArgs): + parameters = parameters.to_native(copy=True) + parameters = comp.compute_parameters_with_absolute_paths(parameters, self.build_dir) + # The new entry + src_block = { + 'language': lang, + 'compiler': comp.get_exelist(), + 'parameters': parameters, + 'sources': [], + 'generated_sources': [], + } + tgt[id_hash] = src_block + # Make source files absolute + sources = [x.absolute_path(self.source_dir, self.build_dir) if isinstance(x, File) else os.path.normpath(os.path.join(self.build_dir, x)) + for x in sources] + generated_sources = [x.absolute_path(self.source_dir, self.build_dir) if isinstance(x, File) else os.path.normpath(os.path.join(self.build_dir, x)) + for x in generated_sources] + # Add the source files + src_block['sources'] += sources + src_block['generated_sources'] += generated_sources + + def is_rust_target(self, target): + if len(target.sources) > 0: + first_file = target.sources[0] + if first_file.fname.endswith('.rs'): + return True + return False + + def generate_target(self, target): + if isinstance(target, build.CustomTarget): + self.generate_custom_target(target) + if isinstance(target, build.RunTarget): + self.generate_run_target(target) + name = target.get_id() + if name in self.processed_targets: + return + self.processed_targets[name] = True + # Initialize an empty introspection source list + self.introspection_data[name] = {} + # Generate rules for all dependency targets + self.process_target_dependencies(target) + # If target uses a language that cannot link to C objects, + # just generate for that language and return. + if isinstance(target, build.Jar): + self.generate_jar_target(target) + return + if self.is_rust_target(target): + self.generate_rust_target(target) + return + if 'cs' in target.compilers: + self.generate_cs_target(target) + return + if 'swift' in target.compilers: + self.generate_swift_target(target) + return + + # Now we handle the following languages: + # ObjC++, ObjC, C++, C, D, Fortran, Vala + + # target_sources: + # Pre-existing target C/C++ sources to be built; dict of full path to + # source relative to build root and the original File object. + # generated_sources: + # GeneratedList and CustomTarget sources to be built; dict of the full + # path to source relative to build root and the generating target/list + # vala_generated_sources: + # Array of sources generated by valac that have to be compiled + if 'vala' in target.compilers: + # Sources consumed by valac are filtered out. These only contain + # C/C++ sources, objects, generated libs, and unknown sources now. + target_sources, generated_sources, \ + vala_generated_sources = self.generate_vala_compile(target) + else: + target_sources = self.get_target_sources(target) + generated_sources = self.get_target_generated_sources(target) + vala_generated_sources = [] + self.scan_fortran_module_outputs(target) + # Generate rules for GeneratedLists + self.generate_generator_list_rules(target) + + # Generate rules for building the remaining source files in this target + outname = self.get_target_filename(target) + obj_list = [] + is_unity = self.is_unity(target) + header_deps = [] + unity_src = [] + unity_deps = [] # Generated sources that must be built before compiling a Unity target. + header_deps += self.get_generated_headers(target) + + if is_unity: + # Warn about incompatible sources if a unity build is enabled + langs = set(target.compilers.keys()) + langs_cant = langs.intersection(self.langs_cant_unity) + if langs_cant: + langs_are = langs = ', '.join(langs_cant).upper() + langs_are += ' are' if len(langs_cant) > 1 else ' is' + msg = '{} not supported in Unity builds yet, so {} ' \ + 'sources in the {!r} target will be compiled normally' \ + ''.format(langs_are, langs, target.name) + mlog.log(mlog.red('FIXME'), msg) + + # Get a list of all generated headers that will be needed while building + # this target's sources (generated sources and pre-existing sources). + # This will be set as dependencies of all the target's sources. At the + # same time, also deal with generated sources that need to be compiled. + generated_source_files = [] + for rel_src in generated_sources.keys(): + dirpart, fnamepart = os.path.split(rel_src) + raw_src = File(True, dirpart, fnamepart) + if self.environment.is_source(rel_src) and not self.environment.is_header(rel_src): + if is_unity and self.get_target_source_can_unity(target, rel_src): + unity_deps.append(raw_src) + abs_src = os.path.join(self.environment.get_build_dir(), rel_src) + unity_src.append(abs_src) + else: + generated_source_files.append(raw_src) + elif self.environment.is_object(rel_src): + obj_list.append(rel_src) + elif self.environment.is_library(rel_src) or modules.is_module_library(rel_src): + pass + else: + # Assume anything not specifically a source file is a header. This is because + # people generate files with weird suffixes (.inc, .fh) that they then include + # in their source files. + header_deps.append(raw_src) + # These are the generated source files that need to be built for use by + # this target. We create the Ninja build file elements for this here + # because we need `header_deps` to be fully generated in the above loop. + for src in generated_source_files: + if self.environment.is_llvm_ir(src): + o = self.generate_llvm_ir_compile(target, src) + else: + o = self.generate_single_compile(target, src, True, + order_deps=header_deps) + obj_list.append(o) + + use_pch = self.environment.coredata.base_options.get('b_pch', False) + if use_pch and target.has_pch(): + pch_objects = self.generate_pch(target, header_deps=header_deps) + else: + pch_objects = [] + + # Generate compilation targets for C sources generated from Vala + # sources. This can be extended to other $LANG->C compilers later if + # necessary. This needs to be separate for at least Vala + vala_generated_source_files = [] + for src in vala_generated_sources: + dirpart, fnamepart = os.path.split(src) + raw_src = File(True, dirpart, fnamepart) + if is_unity: + unity_src.append(os.path.join(self.environment.get_build_dir(), src)) + header_deps.append(raw_src) + else: + # Generated targets are ordered deps because the must exist + # before the sources compiling them are used. After the first + # compile we get precise dependency info from dep files. + # This should work in all cases. If it does not, then just + # move them from orderdeps to proper deps. + if self.environment.is_header(src): + header_deps.append(raw_src) + else: + # We gather all these and generate compile rules below + # after `header_deps` (above) is fully generated + vala_generated_source_files.append(raw_src) + for src in vala_generated_source_files: + # Passing 'vala' here signifies that we want the compile + # arguments to be specialized for C code generated by + # valac. For instance, no warnings should be emitted. + obj_list.append(self.generate_single_compile(target, src, 'vala', [], header_deps)) + + # Generate compile targets for all the pre-existing sources for this target + for src in target_sources.values(): + if not self.environment.is_header(src): + if self.environment.is_llvm_ir(src): + obj_list.append(self.generate_llvm_ir_compile(target, src)) + elif is_unity and self.get_target_source_can_unity(target, src): + abs_src = os.path.join(self.environment.get_build_dir(), + src.rel_to_builddir(self.build_to_src)) + unity_src.append(abs_src) + else: + obj_list.append(self.generate_single_compile(target, src, False, [], header_deps)) + obj_list += self.flatten_object_list(target) + if is_unity: + for src in self.generate_unity_files(target, unity_src): + obj_list.append(self.generate_single_compile(target, src, True, unity_deps + header_deps)) + linker, stdlib_args = self.determine_linker_and_stdlib_args(target) + elem = self.generate_link(target, outname, obj_list, linker, pch_objects, stdlib_args=stdlib_args) + self.generate_shlib_aliases(target, self.get_target_dir(target)) + self.add_build(elem) + + def process_target_dependencies(self, target): + for t in target.get_dependencies(): + if t.get_id() not in self.processed_targets: + self.generate_target(t) + + def custom_target_generator_inputs(self, target): + for s in unholder(target.sources): + if isinstance(s, build.GeneratedList): + self.generate_genlist_for_target(s, target) + + def unwrap_dep_list(self, target): + deps = [] + for i in target.get_dependencies(): + # FIXME, should not grab element at zero but rather expand all. + if isinstance(i, list): + i = i[0] + # Add a dependency on all the outputs of this target + for output in i.get_outputs(): + deps.append(os.path.join(self.get_target_dir(i), output)) + return deps + + def generate_custom_target(self, target): + self.custom_target_generator_inputs(target) + (srcs, ofilenames, cmd) = self.eval_custom_target_command(target) + deps = self.unwrap_dep_list(target) + deps += self.get_custom_target_depend_files(target) + desc = 'Generating {0} with a {1} command' + if target.build_always_stale: + deps.append('PHONY') + if target.depfile is None: + rulename = 'CUSTOM_COMMAND' + else: + rulename = 'CUSTOM_COMMAND_DEP' + elem = NinjaBuildElement(self.all_outputs, ofilenames, rulename, srcs) + elem.add_dep(deps) + for d in target.extra_depends: + # Add a dependency on all the outputs of this target + for output in d.get_outputs(): + elem.add_dep(os.path.join(self.get_target_dir(d), output)) + + meson_exe_cmd = self.as_meson_exe_cmdline(target.name, target.command[0], cmd[1:], + for_machine=target.for_machine, + extra_bdeps=target.get_transitive_build_target_deps(), + capture=ofilenames[0] if target.capture else None) + if meson_exe_cmd: + cmd = meson_exe_cmd + cmd_type = 'meson_exe.py custom' + else: + cmd_type = 'custom' + if target.depfile is not None: + depfile = target.get_dep_outname(elem.infilenames) + rel_dfile = os.path.join(self.get_target_dir(target), depfile) + abs_pdir = os.path.join(self.environment.get_build_dir(), self.get_target_dir(target)) + os.makedirs(abs_pdir, exist_ok=True) + elem.add_item('DEPFILE', rel_dfile) + if target.console: + elem.add_item('pool', 'console') + cmd = self.replace_paths(target, cmd) + elem.add_item('COMMAND', cmd) + elem.add_item('description', desc.format(target.name, cmd_type)) + self.add_build(elem) + self.processed_targets[target.get_id()] = True + + def build_run_target_name(self, target): + if target.subproject != '': + subproject_prefix = '{}@@'.format(target.subproject) + else: + subproject_prefix = '' + return '{}{}'.format(subproject_prefix, target.name) + + def generate_run_target(self, target): + cmd = self.environment.get_build_command() + ['--internal', 'commandrunner'] + deps = self.unwrap_dep_list(target) + arg_strings = [] + for i in target.args: + if isinstance(i, str): + arg_strings.append(i) + elif isinstance(i, (build.BuildTarget, build.CustomTarget)): + relfname = self.get_target_filename(i) + arg_strings.append(os.path.join(self.environment.get_build_dir(), relfname)) + deps.append(relfname) + elif isinstance(i, mesonlib.File): + relfname = i.rel_to_builddir(self.build_to_src) + arg_strings.append(os.path.join(self.environment.get_build_dir(), relfname)) + else: + raise AssertionError('Unreachable code in generate_run_target: ' + str(i)) + cmd += [self.environment.get_source_dir(), + self.environment.get_build_dir(), + target.subdir] + self.environment.get_build_command() + texe = target.command + try: + texe = texe.held_object + except AttributeError: + pass + if isinstance(texe, build.Executable): + abs_exe = os.path.join(self.environment.get_build_dir(), self.get_target_filename(texe)) + deps.append(self.get_target_filename(texe)) + if self.environment.is_cross_build(): + exe_wrap = self.environment.get_exe_wrapper() + if exe_wrap: + if not exe_wrap.found(): + msg = 'The exe_wrapper {!r} defined in the cross file is ' \ + 'needed by run target {!r}, but was not found. ' \ + 'Please check the command and/or add it to PATH.' + raise MesonException(msg.format(exe_wrap.name, target.name)) + cmd += exe_wrap.get_command() + cmd.append(abs_exe) + elif isinstance(texe, dependencies.ExternalProgram): + cmd += texe.get_command() + elif isinstance(texe, build.CustomTarget): + deps.append(self.get_target_filename(texe)) + cmd += [os.path.join(self.environment.get_build_dir(), self.get_target_filename(texe))] + elif isinstance(texe, mesonlib.File): + cmd.append(texe.absolute_path(self.environment.get_source_dir(), self.environment.get_build_dir())) + else: + cmd.append(target.command) + cmd += arg_strings + + if texe: + target_name = 'meson-{}'.format(self.build_run_target_name(target)) + elem = NinjaBuildElement(self.all_outputs, target_name, 'CUSTOM_COMMAND', []) + elem.add_item('COMMAND', cmd) + elem.add_item('description', 'Running external command {}'.format(target.name)) + elem.add_item('pool', 'console') + # Alias that runs the target defined above with the name the user specified + self.create_target_alias(target_name) + else: + target_name = self.build_run_target_name(target) + elem = NinjaBuildElement(self.all_outputs, target_name, 'phony', []) + + elem.add_dep(deps) + self.add_build(elem) + self.processed_targets[target.get_id()] = True + + def generate_coverage_command(self, elem, outputs): + targets = self.build.get_targets().values() + use_llvm_cov = False + for target in targets: + if not hasattr(target, 'compilers'): + continue + for compiler in target.compilers.values(): + if compiler.get_id() == 'clang' and not compiler.info.is_darwin(): + use_llvm_cov = True + break + elem.add_item('COMMAND', self.environment.get_build_command() + + ['--internal', 'coverage'] + + outputs + + [self.environment.get_source_dir(), + os.path.join(self.environment.get_source_dir(), + self.build.get_subproject_dir()), + self.environment.get_build_dir(), + self.environment.get_log_dir()] + + (['--use_llvm_cov'] if use_llvm_cov else [])) + + def generate_coverage_rules(self): + e = NinjaBuildElement(self.all_outputs, 'meson-coverage', 'CUSTOM_COMMAND', 'PHONY') + self.generate_coverage_command(e, []) + e.add_item('description', 'Generates coverage reports') + self.add_build(e) + # Alias that runs the target defined above + self.create_target_alias('meson-coverage') + self.generate_coverage_legacy_rules() + + def generate_coverage_legacy_rules(self): + e = NinjaBuildElement(self.all_outputs, 'meson-coverage-xml', 'CUSTOM_COMMAND', 'PHONY') + self.generate_coverage_command(e, ['--xml']) + e.add_item('description', 'Generates XML coverage report') + self.add_build(e) + # Alias that runs the target defined above + self.create_target_alias('meson-coverage-xml') + + e = NinjaBuildElement(self.all_outputs, 'meson-coverage-text', 'CUSTOM_COMMAND', 'PHONY') + self.generate_coverage_command(e, ['--text']) + e.add_item('description', 'Generates text coverage report') + self.add_build(e) + # Alias that runs the target defined above + self.create_target_alias('meson-coverage-text') + + e = NinjaBuildElement(self.all_outputs, 'meson-coverage-html', 'CUSTOM_COMMAND', 'PHONY') + self.generate_coverage_command(e, ['--html']) + e.add_item('description', 'Generates HTML coverage report') + self.add_build(e) + # Alias that runs the target defined above + self.create_target_alias('meson-coverage-html') + + def generate_install(self): + self.create_install_data_files() + elem = NinjaBuildElement(self.all_outputs, 'meson-install', 'CUSTOM_COMMAND', 'PHONY') + elem.add_dep('all') + elem.add_item('DESC', 'Installing files.') + elem.add_item('COMMAND', self.environment.get_build_command() + ['install', '--no-rebuild']) + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the target defined above + self.create_target_alias('meson-install') + + def generate_tests(self): + self.serialize_tests() + cmd = self.environment.get_build_command(True) + ['test', '--no-rebuild'] + if not self.environment.coredata.get_builtin_option('stdsplit'): + cmd += ['--no-stdsplit'] + if self.environment.coredata.get_builtin_option('errorlogs'): + cmd += ['--print-errorlogs'] + elem = NinjaBuildElement(self.all_outputs, 'meson-test', 'CUSTOM_COMMAND', ['all', 'PHONY']) + elem.add_item('COMMAND', cmd) + elem.add_item('DESC', 'Running all tests.') + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the above-defined meson-test target + self.create_target_alias('meson-test') + + # And then benchmarks. + cmd = self.environment.get_build_command(True) + [ + 'test', '--benchmark', '--logbase', + 'benchmarklog', '--num-processes=1', '--no-rebuild'] + elem = NinjaBuildElement(self.all_outputs, 'meson-benchmark', 'CUSTOM_COMMAND', ['all', 'PHONY']) + elem.add_item('COMMAND', cmd) + elem.add_item('DESC', 'Running benchmark suite.') + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the above-defined meson-benchmark target + self.create_target_alias('meson-benchmark') + + def generate_rules(self): + self.rules = [] + self.ruledict = {} + + self.add_rule_comment(NinjaComment('Rules for compiling.')) + self.generate_compile_rules() + self.add_rule_comment(NinjaComment('Rules for linking.')) + self.generate_static_link_rules() + self.generate_dynamic_link_rules() + self.add_rule_comment(NinjaComment('Other rules')) + # Ninja errors out if you have deps = gcc but no depfile, so we must + # have two rules for custom commands. + self.add_rule(NinjaRule('CUSTOM_COMMAND', ['$COMMAND'], [], '$DESC', + extra='restat = 1')) + self.add_rule(NinjaRule('CUSTOM_COMMAND_DEP', ['$COMMAND'], [], '$DESC', + deps='gcc', depfile='$DEPFILE', + extra='restat = 1')) + + c = self.environment.get_build_command() + \ + ['--internal', + 'regenerate', + self.environment.get_source_dir(), + self.environment.get_build_dir(), + '--backend', + 'ninja'] + self.add_rule(NinjaRule('REGENERATE_BUILD', + c, [], + 'Regenerating build files.', + extra='generator = 1')) + + def add_rule_comment(self, comment): + self.rules.append(comment) + + def add_build_comment(self, comment): + self.build_elements.append(comment) + + def add_rule(self, rule): + self.rules.append(rule) + self.ruledict[rule.name] = rule + + def add_build(self, build): + self.build_elements.append(build) + + if build.rulename != 'phony': + # reference rule + build.rule = self.ruledict[build.rulename] + + def write_rules(self, outfile): + for b in self.build_elements: + if isinstance(b, NinjaBuildElement): + b.count_rule_references() + + for r in self.rules: + r.write(outfile) + + def write_builds(self, outfile): + for b in ProgressBar(self.build_elements, desc='Writing build.ninja'): + b.write(outfile) + + def generate_phony(self): + self.add_build_comment(NinjaComment('Phony build target, always out of date')) + elem = NinjaBuildElement(self.all_outputs, 'PHONY', 'phony', '') + self.add_build(elem) + + def generate_jar_target(self, target): + fname = target.get_filename() + outname_rel = os.path.join(self.get_target_dir(target), fname) + src_list = target.get_sources() + class_list = [] + compiler = target.compilers['java'] + c = 'c' + m = 'm' + e = '' + f = 'f' + main_class = target.get_main_class() + if main_class != '': + e = 'e' + + # Add possible java generated files to src list + generated_sources = self.get_target_generated_sources(target) + gen_src_list = [] + for rel_src in generated_sources.keys(): + dirpart, fnamepart = os.path.split(rel_src) + raw_src = File(True, dirpart, fnamepart) + if rel_src.endswith('.java'): + gen_src_list.append(raw_src) + + compile_args = self.determine_single_java_compile_args(target, compiler) + for src in src_list + gen_src_list: + plain_class_path = self.generate_single_java_compile(src, target, compiler, compile_args) + class_list.append(plain_class_path) + class_dep_list = [os.path.join(self.get_target_private_dir(target), i) for i in class_list] + manifest_path = os.path.join(self.get_target_private_dir(target), 'META-INF', 'MANIFEST.MF') + manifest_fullpath = os.path.join(self.environment.get_build_dir(), manifest_path) + os.makedirs(os.path.dirname(manifest_fullpath), exist_ok=True) + with open(manifest_fullpath, 'w') as manifest: + if any(target.link_targets): + manifest.write('Class-Path: ') + cp_paths = [os.path.join(self.get_target_dir(l), l.get_filename()) for l in target.link_targets] + manifest.write(' '.join(cp_paths)) + manifest.write('\n') + jar_rule = 'java_LINKER' + commands = [c + m + e + f] + commands.append(manifest_path) + if e != '': + commands.append(main_class) + commands.append(self.get_target_filename(target)) + # Java compilation can produce an arbitrary number of output + # class files for a single source file. Thus tell jar to just + # grab everything in the final package. + commands += ['-C', self.get_target_private_dir(target), '.'] + elem = NinjaBuildElement(self.all_outputs, outname_rel, jar_rule, []) + elem.add_dep(class_dep_list) + elem.add_item('ARGS', commands) + self.add_build(elem) + # Create introspection information + self.create_target_source_introspection(target, compiler, compile_args, src_list, gen_src_list) + + def generate_cs_resource_tasks(self, target): + args = [] + deps = [] + for r in target.resources: + rel_sourcefile = os.path.join(self.build_to_src, target.subdir, r) + if r.endswith('.resources'): + a = '-resource:' + rel_sourcefile + elif r.endswith('.txt') or r.endswith('.resx'): + ofilebase = os.path.splitext(os.path.basename(r))[0] + '.resources' + ofilename = os.path.join(self.get_target_private_dir(target), ofilebase) + elem = NinjaBuildElement(self.all_outputs, ofilename, "CUSTOM_COMMAND", rel_sourcefile) + elem.add_item('COMMAND', ['resgen', rel_sourcefile, ofilename]) + elem.add_item('DESC', 'Compiling resource {}'.format(rel_sourcefile)) + self.add_build(elem) + deps.append(ofilename) + a = '-resource:' + ofilename + else: + raise InvalidArguments('Unknown resource file {}.'.format(r)) + args.append(a) + return args, deps + + def generate_cs_target(self, target): + buildtype = self.get_option_for_target('buildtype', target) + fname = target.get_filename() + outname_rel = os.path.join(self.get_target_dir(target), fname) + src_list = target.get_sources() + compiler = target.compilers['cs'] + rel_srcs = [os.path.normpath(s.rel_to_builddir(self.build_to_src)) for s in src_list] + deps = [] + commands = compiler.compiler_args(target.extra_args.get('cs', [])) + commands += compiler.get_buildtype_args(buildtype) + commands += compiler.get_optimization_args(self.get_option_for_target('optimization', target)) + commands += compiler.get_debug_args(self.get_option_for_target('debug', target)) + if isinstance(target, build.Executable): + commands.append('-target:exe') + elif isinstance(target, build.SharedLibrary): + commands.append('-target:library') + else: + raise MesonException('Unknown C# target type.') + (resource_args, resource_deps) = self.generate_cs_resource_tasks(target) + commands += resource_args + deps += resource_deps + commands += compiler.get_output_args(outname_rel) + for l in target.link_targets: + lname = os.path.join(self.get_target_dir(l), l.get_filename()) + commands += compiler.get_link_args(lname) + deps.append(lname) + if '-g' in commands: + outputs = [outname_rel, outname_rel + '.mdb'] + else: + outputs = [outname_rel] + generated_sources = self.get_target_generated_sources(target) + generated_rel_srcs = [] + for rel_src in generated_sources.keys(): + if rel_src.lower().endswith('.cs'): + generated_rel_srcs.append(os.path.normpath(rel_src)) + deps.append(os.path.normpath(rel_src)) + + for dep in target.get_external_deps(): + commands.extend_direct(dep.get_link_args()) + commands += self.build.get_project_args(compiler, target.subproject, target.for_machine) + commands += self.build.get_global_args(compiler, target.for_machine) + + elem = NinjaBuildElement(self.all_outputs, outputs, self.get_compiler_rule_name('cs', target.for_machine), rel_srcs + generated_rel_srcs) + elem.add_dep(deps) + elem.add_item('ARGS', commands) + self.add_build(elem) + + self.generate_generator_list_rules(target) + self.create_target_source_introspection(target, compiler, commands, rel_srcs, generated_rel_srcs) + + def determine_single_java_compile_args(self, target, compiler): + args = [] + args += compiler.get_buildtype_args(self.get_option_for_target('buildtype', target)) + args += self.build.get_global_args(compiler, target.for_machine) + args += self.build.get_project_args(compiler, target.subproject, target.for_machine) + args += target.get_java_args() + args += compiler.get_output_args(self.get_target_private_dir(target)) + args += target.get_classpath_args() + curdir = target.get_subdir() + sourcepath = os.path.join(self.build_to_src, curdir) + os.pathsep + sourcepath += os.path.normpath(curdir) + os.pathsep + for i in target.include_dirs: + for idir in i.get_incdirs(): + sourcepath += os.path.join(self.build_to_src, i.curdir, idir) + os.pathsep + args += ['-sourcepath', sourcepath] + return args + + def generate_single_java_compile(self, src, target, compiler, args): + deps = [os.path.join(self.get_target_dir(l), l.get_filename()) for l in target.link_targets] + generated_sources = self.get_target_generated_sources(target) + for rel_src in generated_sources.keys(): + if rel_src.endswith('.java'): + deps.append(rel_src) + rel_src = src.rel_to_builddir(self.build_to_src) + plain_class_path = src.fname[:-4] + 'class' + rel_obj = os.path.join(self.get_target_private_dir(target), plain_class_path) + element = NinjaBuildElement(self.all_outputs, rel_obj, self.compiler_to_rule_name(compiler), rel_src) + element.add_dep(deps) + element.add_item('ARGS', args) + self.add_build(element) + return plain_class_path + + def generate_java_link(self): + rule = 'java_LINKER' + command = ['jar', '$ARGS'] + description = 'Creating JAR $out' + self.add_rule(NinjaRule(rule, command, [], description)) + + def determine_dep_vapis(self, target): + """ + Peek into the sources of BuildTargets we're linking with, and if any of + them was built with Vala, assume that it also generated a .vapi file of + the same name as the BuildTarget and return the path to it relative to + the build directory. + """ + result = OrderedSet() + for dep in itertools.chain(target.link_targets, target.link_whole_targets): + if not dep.is_linkable_target(): + continue + for i in dep.sources: + if hasattr(i, 'fname'): + i = i.fname + if i.endswith('vala'): + vapiname = dep.vala_vapi + fullname = os.path.join(self.get_target_dir(dep), vapiname) + result.add(fullname) + break + return list(result) + + def split_vala_sources(self, t): + """ + Splits the target's sources into .vala, .gs, .vapi, and other sources. + Handles both pre-existing and generated sources. + + Returns a tuple (vala, vapi, others) each of which is a dictionary with + the keys being the path to the file (relative to the build directory) + and the value being the object that generated or represents the file. + """ + vala = OrderedDict() + vapi = OrderedDict() + others = OrderedDict() + othersgen = OrderedDict() + # Split pre-existing sources + for s in t.get_sources(): + # BuildTarget sources are always mesonlib.File files which are + # either in the source root, or generated with configure_file and + # in the build root + if not isinstance(s, File): + msg = 'All sources in target {!r} must be of type ' \ + 'mesonlib.File, not {!r}'.format(t, s) + raise InvalidArguments(msg) + f = s.rel_to_builddir(self.build_to_src) + if s.endswith(('.vala', '.gs')): + srctype = vala + elif s.endswith('.vapi'): + srctype = vapi + else: + srctype = others + srctype[f] = s + # Split generated sources + for gensrc in t.get_generated_sources(): + for s in gensrc.get_outputs(): + f = self.get_target_generated_dir(t, gensrc, s) + if s.endswith(('.vala', '.gs')): + srctype = vala + elif s.endswith('.vapi'): + srctype = vapi + # Generated non-Vala (C/C++) sources. Won't be used for + # generating the Vala compile rule below. + else: + srctype = othersgen + # Duplicate outputs are disastrous + if f in srctype and srctype[f] is not gensrc: + msg = 'Duplicate output {0!r} from {1!r} {2!r}; ' \ + 'conflicts with {0!r} from {4!r} {3!r}' \ + ''.format(f, type(gensrc).__name__, gensrc.name, + srctype[f].name, type(srctype[f]).__name__) + raise InvalidArguments(msg) + # Store 'somefile.vala': GeneratedList (or CustomTarget) + srctype[f] = gensrc + return vala, vapi, (others, othersgen) + + def generate_vala_compile(self, target): + """Vala is compiled into C. Set up all necessary build steps here.""" + (vala_src, vapi_src, other_src) = self.split_vala_sources(target) + extra_dep_files = [] + if not vala_src: + msg = 'Vala library {!r} has no Vala or Genie source files.' + raise InvalidArguments(msg.format(target.name)) + + valac = target.compilers['vala'] + c_out_dir = self.get_target_private_dir(target) + # C files generated by valac + vala_c_src = [] + # Files generated by valac + valac_outputs = [] + # All sources that are passed to valac on the commandline + all_files = list(vapi_src.keys()) + # Passed as --basedir + srcbasedir = os.path.join(self.build_to_src, target.get_subdir()) + for (vala_file, gensrc) in vala_src.items(): + all_files.append(vala_file) + # Figure out where the Vala compiler will write the compiled C file + # + # If the Vala file is in a subdir of the build dir (in our case + # because it was generated/built by something else), and is also + # a subdir of --basedir (because the builddir is in the source + # tree, and the target subdir is the source root), the subdir + # components from the source root till the private builddir will be + # duplicated inside the private builddir. Otherwise, just the + # basename will be used. + # + # If the Vala file is outside the build directory, the paths from + # the --basedir till the subdir will be duplicated inside the + # private builddir. + if isinstance(gensrc, (build.CustomTarget, build.GeneratedList)) or gensrc.is_built: + vala_c_file = os.path.splitext(os.path.basename(vala_file))[0] + '.c' + # Check if the vala file is in a subdir of --basedir + abs_srcbasedir = os.path.join(self.environment.get_source_dir(), target.get_subdir()) + abs_vala_file = os.path.join(self.environment.get_build_dir(), vala_file) + if PurePath(os.path.commonpath((abs_srcbasedir, abs_vala_file))) == PurePath(abs_srcbasedir): + vala_c_subdir = PurePath(abs_vala_file).parent.relative_to(abs_srcbasedir) + vala_c_file = os.path.join(str(vala_c_subdir), vala_c_file) + else: + path_to_target = os.path.join(self.build_to_src, target.get_subdir()) + if vala_file.startswith(path_to_target): + vala_c_file = os.path.splitext(os.path.relpath(vala_file, path_to_target))[0] + '.c' + else: + vala_c_file = os.path.splitext(os.path.basename(vala_file))[0] + '.c' + # All this will be placed inside the c_out_dir + vala_c_file = os.path.join(c_out_dir, vala_c_file) + vala_c_src.append(vala_c_file) + valac_outputs.append(vala_c_file) + + args = self.generate_basic_compiler_args(target, valac) + args += valac.get_colorout_args(self.environment.coredata.base_options.get('b_colorout').value) + # Tell Valac to output everything in our private directory. Sadly this + # means it will also preserve the directory components of Vala sources + # found inside the build tree (generated sources). + args += ['--directory', c_out_dir] + args += ['--basedir', srcbasedir] + if target.is_linkable_target(): + # Library name + args += ['--library', target.name] + # Outputted header + hname = os.path.join(self.get_target_dir(target), target.vala_header) + args += ['--header', hname] + if self.is_unity(target): + # Without this the declarations will get duplicated in the .c + # files and cause a build failure when all of them are + # #include-d in one .c file. + # https://github.com/mesonbuild/meson/issues/1969 + args += ['--use-header'] + valac_outputs.append(hname) + # Outputted vapi file + vapiname = os.path.join(self.get_target_dir(target), target.vala_vapi) + # Force valac to write the vapi and gir files in the target build dir. + # Without this, it will write it inside c_out_dir + args += ['--vapi', os.path.join('..', target.vala_vapi)] + valac_outputs.append(vapiname) + target.outputs += [target.vala_header, target.vala_vapi] + # Install header and vapi to default locations if user requests this + if len(target.install_dir) > 1 and target.install_dir[1] is True: + target.install_dir[1] = self.environment.get_includedir() + if len(target.install_dir) > 2 and target.install_dir[2] is True: + target.install_dir[2] = os.path.join(self.environment.get_datadir(), 'vala', 'vapi') + # Generate GIR if requested + if isinstance(target.vala_gir, str): + girname = os.path.join(self.get_target_dir(target), target.vala_gir) + args += ['--gir', os.path.join('..', target.vala_gir)] + valac_outputs.append(girname) + target.outputs.append(target.vala_gir) + # Install GIR to default location if requested by user + if len(target.install_dir) > 3 and target.install_dir[3] is True: + target.install_dir[3] = os.path.join(self.environment.get_datadir(), 'gir-1.0') + # Detect gresources and add --gresources arguments for each + for gensrc in other_src[1].values(): + if isinstance(gensrc, modules.GResourceTarget): + gres_xml, = self.get_custom_target_sources(gensrc) + args += ['--gresources=' + gres_xml] + extra_args = [] + + for a in target.extra_args.get('vala', []): + if isinstance(a, File): + relname = a.rel_to_builddir(self.build_to_src) + extra_dep_files.append(relname) + extra_args.append(relname) + else: + extra_args.append(a) + dependency_vapis = self.determine_dep_vapis(target) + extra_dep_files += dependency_vapis + args += extra_args + element = NinjaBuildElement(self.all_outputs, valac_outputs, + self.compiler_to_rule_name(valac), + all_files + dependency_vapis) + element.add_item('ARGS', args) + element.add_dep(extra_dep_files) + self.add_build(element) + self.create_target_source_introspection(target, valac, args, all_files, []) + return other_src[0], other_src[1], vala_c_src + + def generate_rust_target(self, target): + rustc = target.compilers['rust'] + # Rust compiler takes only the main file as input and + # figures out what other files are needed via import + # statements and magic. + main_rust_file = None + for i in target.get_sources(): + if not rustc.can_compile(i): + raise InvalidArguments('Rust target {} contains a non-rust source file.'.format(target.get_basename())) + if main_rust_file is None: + main_rust_file = i.rel_to_builddir(self.build_to_src) + if main_rust_file is None: + raise RuntimeError('A Rust target has no Rust sources. This is weird. Also a bug. Please report') + target_name = os.path.join(target.subdir, target.get_filename()) + args = ['--crate-type'] + if isinstance(target, build.Executable): + cratetype = 'bin' + elif hasattr(target, 'rust_crate_type'): + cratetype = target.rust_crate_type + elif isinstance(target, build.SharedLibrary): + cratetype = 'dylib' + elif isinstance(target, build.StaticLibrary): + cratetype = 'rlib' + else: + raise InvalidArguments('Unknown target type for rustc.') + args.append(cratetype) + + # If we're dynamically linking, add those arguments + # + # Rust is super annoying, calling -C link-arg foo does not work, it has + # to be -C link-arg=foo + if cratetype in {'bin', 'dylib'}: + for a in rustc.linker.get_always_args(): + args += ['-C', 'link-arg={}'.format(a)] + + args += ['--crate-name', target.name] + args += rustc.get_buildtype_args(self.get_option_for_target('buildtype', target)) + args += rustc.get_debug_args(self.get_option_for_target('debug', target)) + args += rustc.get_optimization_args(self.get_option_for_target('optimization', target)) + args += self.build.get_global_args(rustc, target.for_machine) + args += self.build.get_project_args(rustc, target.subproject, target.for_machine) + depfile = os.path.join(target.subdir, target.name + '.d') + args += ['--emit', 'dep-info={}'.format(depfile), '--emit', 'link'] + args += target.get_extra_args('rust') + args += ['-o', os.path.join(target.subdir, target.get_filename())] + orderdeps = [os.path.join(t.subdir, t.get_filename()) for t in target.link_targets] + linkdirs = OrderedDict() + for d in target.link_targets: + linkdirs[d.subdir] = True + # specify `extern CRATE_NAME=OUTPUT_FILE` for each Rust + # dependency, so that collisions with libraries in rustc's + # sysroot don't cause ambiguity + args += ['--extern', '{}={}'.format(d.name, os.path.join(d.subdir, d.filename))] + for d in linkdirs.keys(): + if d == '': + d = '.' + args += ['-L', d] + has_shared_deps = False + for dep in target.get_dependencies(): + if isinstance(dep, build.SharedLibrary): + has_shared_deps = True + if isinstance(target, build.SharedLibrary) or has_shared_deps: + # add prefer-dynamic if any of the Rust libraries we link + # against are dynamic, otherwise we'll end up with + # multiple implementations of crates + args += ['-C', 'prefer-dynamic'] + + # build the usual rpath arguments as well... + + # Set runtime-paths so we can run executables without needing to set + # LD_LIBRARY_PATH, etc in the environment. Doesn't work on Windows. + if has_path_sep(target.name): + # Target names really should not have slashes in them, but + # unfortunately we did not check for that and some downstream projects + # now have them. Once slashes are forbidden, remove this bit. + target_slashname_workaround_dir = os.path.join(os.path.dirname(target.name), + self.get_target_dir(target)) + else: + target_slashname_workaround_dir = self.get_target_dir(target) + (rpath_args, target.rpath_dirs_to_remove) = \ + rustc.build_rpath_args(self.environment, + self.environment.get_build_dir(), + target_slashname_workaround_dir, + self.determine_rpath_dirs(target), + target.build_rpath, + target.install_rpath) + # ... but then add rustc's sysroot to account for rustup + # installations + for rpath_arg in rpath_args: + args += ['-C', 'link-arg=' + rpath_arg + ':' + os.path.join(rustc.get_sysroot(), 'lib')] + compiler_name = self.get_compiler_rule_name('rust', target.for_machine) + element = NinjaBuildElement(self.all_outputs, target_name, compiler_name, main_rust_file) + if len(orderdeps) > 0: + element.add_orderdep(orderdeps) + element.add_item('ARGS', args) + element.add_item('targetdep', depfile) + element.add_item('cratetype', cratetype) + self.add_build(element) + if isinstance(target, build.SharedLibrary): + self.generate_shsym(target) + self.create_target_source_introspection(target, rustc, args, [main_rust_file], []) + + @staticmethod + def get_rule_suffix(for_machine: MachineChoice) -> str: + return PerMachine('_FOR_BUILD', '')[for_machine] + + @classmethod + def get_compiler_rule_name(cls, lang: str, for_machine: MachineChoice) -> str: + return '{}_COMPILER{}'.format(lang, cls.get_rule_suffix(for_machine)) + + @classmethod + def get_pch_rule_name(cls, lang: str, for_machine: MachineChoice) -> str: + return '{}_PCH{}'.format(lang, cls.get_rule_suffix(for_machine)) + + @classmethod + def compiler_to_rule_name(cls, compiler: Compiler) -> str: + return cls.get_compiler_rule_name(compiler.get_language(), compiler.for_machine) + + @classmethod + def compiler_to_pch_rule_name(cls, compiler: Compiler) -> str: + return cls.get_pch_rule_name(compiler.get_language(), compiler.for_machine) + + def swift_module_file_name(self, target): + return os.path.join(self.get_target_private_dir(target), + self.target_swift_modulename(target) + '.swiftmodule') + + def target_swift_modulename(self, target): + return target.name + + def is_swift_target(self, target): + for s in target.sources: + if s.endswith('swift'): + return True + return False + + def determine_swift_dep_modules(self, target): + result = [] + for l in target.link_targets: + if self.is_swift_target(l): + result.append(self.swift_module_file_name(l)) + return result + + def determine_swift_dep_dirs(self, target): + result = [] + for l in target.link_targets: + result.append(self.get_target_private_dir_abs(l)) + return result + + def get_swift_link_deps(self, target): + result = [] + for l in target.link_targets: + result.append(self.get_target_filename(l)) + return result + + def split_swift_generated_sources(self, target): + all_srcs = self.get_target_generated_sources(target) + srcs = [] + others = [] + for i in all_srcs: + if i.endswith('.swift'): + srcs.append(i) + else: + others.append(i) + return srcs, others + + def generate_swift_target(self, target): + module_name = self.target_swift_modulename(target) + swiftc = target.compilers['swift'] + abssrc = [] + relsrc = [] + abs_headers = [] + header_imports = [] + for i in target.get_sources(): + if swiftc.can_compile(i): + rels = i.rel_to_builddir(self.build_to_src) + abss = os.path.normpath(os.path.join(self.environment.get_build_dir(), rels)) + relsrc.append(rels) + abssrc.append(abss) + elif self.environment.is_header(i): + relh = i.rel_to_builddir(self.build_to_src) + absh = os.path.normpath(os.path.join(self.environment.get_build_dir(), relh)) + abs_headers.append(absh) + header_imports += swiftc.get_header_import_args(absh) + else: + raise InvalidArguments('Swift target {} contains a non-swift source file.'.format(target.get_basename())) + os.makedirs(self.get_target_private_dir_abs(target), exist_ok=True) + compile_args = swiftc.get_compile_only_args() + compile_args += swiftc.get_optimization_args(self.get_option_for_target('optimization', target)) + compile_args += swiftc.get_debug_args(self.get_option_for_target('debug', target)) + compile_args += swiftc.get_module_args(module_name) + compile_args += self.build.get_project_args(swiftc, target.subproject, target.for_machine) + compile_args += self.build.get_global_args(swiftc, target.for_machine) + for i in reversed(target.get_include_dirs()): + basedir = i.get_curdir() + for d in i.get_incdirs(): + if d not in ('', '.'): + expdir = os.path.join(basedir, d) + else: + expdir = basedir + srctreedir = os.path.normpath(os.path.join(self.environment.get_build_dir(), self.build_to_src, expdir)) + sargs = swiftc.get_include_args(srctreedir) + compile_args += sargs + link_args = swiftc.get_output_args(os.path.join(self.environment.get_build_dir(), self.get_target_filename(target))) + link_args += self.build.get_project_link_args(swiftc, target.subproject, target.for_machine) + link_args += self.build.get_global_link_args(swiftc, target.for_machine) + rundir = self.get_target_private_dir(target) + out_module_name = self.swift_module_file_name(target) + in_module_files = self.determine_swift_dep_modules(target) + abs_module_dirs = self.determine_swift_dep_dirs(target) + module_includes = [] + for x in abs_module_dirs: + module_includes += swiftc.get_include_args(x) + link_deps = self.get_swift_link_deps(target) + abs_link_deps = [os.path.join(self.environment.get_build_dir(), x) for x in link_deps] + for d in target.link_targets: + reldir = self.get_target_dir(d) + if reldir == '': + reldir = '.' + link_args += ['-L', os.path.normpath(os.path.join(self.environment.get_build_dir(), reldir))] + (rel_generated, _) = self.split_swift_generated_sources(target) + abs_generated = [os.path.join(self.environment.get_build_dir(), x) for x in rel_generated] + # We need absolute paths because swiftc needs to be invoked in a subdir + # and this is the easiest way about it. + objects = [] # Relative to swift invocation dir + rel_objects = [] # Relative to build.ninja + for i in abssrc + abs_generated: + base = os.path.basename(i) + oname = os.path.splitext(base)[0] + '.o' + objects.append(oname) + rel_objects.append(os.path.join(self.get_target_private_dir(target), oname)) + + rulename = self.get_compiler_rule_name('swift', target.for_machine) + + # Swiftc does not seem to be able to emit objects and module files in one go. + elem = NinjaBuildElement(self.all_outputs, rel_objects, rulename, abssrc) + elem.add_dep(in_module_files + rel_generated) + elem.add_dep(abs_headers) + elem.add_item('ARGS', compile_args + header_imports + abs_generated + module_includes) + elem.add_item('RUNDIR', rundir) + self.add_build(elem) + elem = NinjaBuildElement(self.all_outputs, out_module_name, + self.get_compiler_rule_name('swift', target.for_machine), + abssrc) + elem.add_dep(in_module_files + rel_generated) + elem.add_item('ARGS', compile_args + abs_generated + module_includes + swiftc.get_mod_gen_args()) + elem.add_item('RUNDIR', rundir) + self.add_build(elem) + if isinstance(target, build.StaticLibrary): + elem = self.generate_link(target, self.get_target_filename(target), + rel_objects, self.build.static_linker[target.for_machine]) + self.add_build(elem) + elif isinstance(target, build.Executable): + elem = NinjaBuildElement(self.all_outputs, self.get_target_filename(target), rulename, []) + elem.add_dep(rel_objects) + elem.add_dep(link_deps) + elem.add_item('ARGS', link_args + swiftc.get_std_exe_link_args() + objects + abs_link_deps) + elem.add_item('RUNDIR', rundir) + self.add_build(elem) + else: + raise MesonException('Swift supports only executable and static library targets.') + # Introspection information + self.create_target_source_introspection(target, swiftc, compile_args + header_imports + module_includes, relsrc, rel_generated) + + def generate_static_link_rules(self): + num_pools = self.environment.coredata.backend_options['backend_max_links'].value + if 'java' in self.environment.coredata.compilers.host: + self.generate_java_link() + for for_machine in MachineChoice: + static_linker = self.build.static_linker[for_machine] + if static_linker is None: + return + rule = 'STATIC_LINKER{}'.format(self.get_rule_suffix(for_machine)) + cmdlist = [] + args = ['$in'] + # FIXME: Must normalize file names with pathlib.Path before writing + # them out to fix this properly on Windows. See: + # https://github.com/mesonbuild/meson/issues/1517 + # https://github.com/mesonbuild/meson/issues/1526 + if isinstance(static_linker, ArLinker) and not mesonlib.is_windows(): + # `ar` has no options to overwrite archives. It always appends, + # which is never what we want. Delete an existing library first if + # it exists. https://github.com/mesonbuild/meson/issues/1355 + cmdlist = execute_wrapper + [c.format('$out') for c in rmfile_prefix] + cmdlist += static_linker.get_exelist() + cmdlist += ['$LINK_ARGS'] + cmdlist += NinjaCommandArg.list(static_linker.get_output_args('$out'), Quoting.none) + description = 'Linking static target $out' + if num_pools > 0: + pool = 'pool = link_pool' + else: + pool = None + self.add_rule(NinjaRule(rule, cmdlist, args, description, + rspable=static_linker.can_linker_accept_rsp(), + rspfile_quote_style='cl' if isinstance(static_linker, VisualStudioLinker) else 'gcc', + extra=pool)) + + def generate_dynamic_link_rules(self): + num_pools = self.environment.coredata.backend_options['backend_max_links'].value + for for_machine in MachineChoice: + complist = self.environment.coredata.compilers[for_machine] + for langname, compiler in complist.items(): + if langname == 'java' \ + or langname == 'vala' \ + or langname == 'rust' \ + or langname == 'cs': + continue + rule = '{}_LINKER{}'.format(langname, self.get_rule_suffix(for_machine)) + command = compiler.get_linker_exelist() + args = ['$ARGS'] + NinjaCommandArg.list(compiler.get_linker_output_args('$out'), Quoting.none) + ['$in', '$LINK_ARGS'] + description = 'Linking target $out' + if num_pools > 0: + pool = 'pool = link_pool' + else: + pool = None + self.add_rule(NinjaRule(rule, command, args, description, + rspable=compiler.can_linker_accept_rsp(), + rspfile_quote_style='cl' if (compiler.get_argument_syntax() == 'msvc' or + isinstance(compiler, DmdDCompiler)) else 'gcc', + extra=pool)) + + args = self.environment.get_build_command() + \ + ['--internal', + 'symbolextractor', + self.environment.get_build_dir(), + '$in', + '$IMPLIB', + '$out'] + symrule = 'SHSYM' + symcmd = args + ['$CROSS'] + syndesc = 'Generating symbol file $out' + synstat = 'restat = 1' + self.add_rule(NinjaRule(symrule, symcmd, [], syndesc, extra=synstat)) + + def generate_java_compile_rule(self, compiler): + rule = self.compiler_to_rule_name(compiler) + command = compiler.get_exelist() + ['$ARGS', '$in'] + description = 'Compiling Java object $in' + self.add_rule(NinjaRule(rule, command, [], description)) + + def generate_cs_compile_rule(self, compiler): + rule = self.compiler_to_rule_name(compiler) + command = compiler.get_exelist() + args = ['$ARGS', '$in'] + description = 'Compiling C Sharp target $out' + self.add_rule(NinjaRule(rule, command, args, description, + rspable=mesonlib.is_windows(), + rspfile_quote_style='cl' if isinstance(compiler, VisualStudioCsCompiler) else 'gcc')) + + def generate_vala_compile_rules(self, compiler): + rule = self.compiler_to_rule_name(compiler) + command = compiler.get_exelist() + ['$ARGS', '$in'] + description = 'Compiling Vala source $in' + self.add_rule(NinjaRule(rule, command, [], description, extra='restat = 1')) + + def generate_rust_compile_rules(self, compiler): + rule = self.compiler_to_rule_name(compiler) + command = compiler.get_exelist() + ['$ARGS', '$in'] + description = 'Compiling Rust source $in' + depfile = '$targetdep' + depstyle = 'gcc' + self.add_rule(NinjaRule(rule, command, [], description, deps=depstyle, + depfile=depfile)) + + def generate_swift_compile_rules(self, compiler): + rule = self.compiler_to_rule_name(compiler) + full_exe = self.environment.get_build_command() + [ + '--internal', + 'dirchanger', + '$RUNDIR', + ] + invoc = full_exe + compiler.get_exelist() + command = invoc + ['$ARGS', '$in'] + description = 'Compiling Swift source $in' + self.add_rule(NinjaRule(rule, command, [], description)) + + def generate_fortran_dep_hack(self, crstr): + rule = 'FORTRAN_DEP_HACK{}'.format(crstr) + if mesonlib.is_windows(): + cmd = ['cmd', '/C'] + else: + cmd = ['true'] + self.add_rule_comment(NinjaComment('''Workaround for these issues: +https://groups.google.com/forum/#!topic/ninja-build/j-2RfBIOd_8 +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485''')) + self.add_rule(NinjaRule(rule, cmd, [], 'Dep hack', extra='restat = 1')) + + def generate_llvm_ir_compile_rule(self, compiler): + if self.created_llvm_ir_rule[compiler.for_machine]: + return + rule = self.get_compiler_rule_name('llvm_ir', compiler.for_machine) + command = compiler.get_exelist() + args = ['$ARGS'] + NinjaCommandArg.list(compiler.get_output_args('$out'), Quoting.none) + compiler.get_compile_only_args() + ['$in'] + description = 'Compiling LLVM IR object $in' + self.add_rule(NinjaRule(rule, command, args, description, + rspable=compiler.can_linker_accept_rsp())) + self.created_llvm_ir_rule[compiler.for_machine] = True + + def generate_compile_rule_for(self, langname, compiler): + if langname == 'java': + if self.environment.machines.matches_build_machine(compiler.for_machine): + self.generate_java_compile_rule(compiler) + return + if langname == 'cs': + if self.environment.machines.matches_build_machine(compiler.for_machine): + self.generate_cs_compile_rule(compiler) + return + if langname == 'vala': + self.generate_vala_compile_rules(compiler) + return + if langname == 'rust': + self.generate_rust_compile_rules(compiler) + return + if langname == 'swift': + if self.environment.machines.matches_build_machine(compiler.for_machine): + self.generate_swift_compile_rules(compiler) + return + crstr = self.get_rule_suffix(compiler.for_machine) + if langname == 'fortran': + self.generate_fortran_dep_hack(crstr) + rule = self.get_compiler_rule_name(langname, compiler.for_machine) + depargs = NinjaCommandArg.list(compiler.get_dependency_gen_args('$out', '$DEPFILE'), Quoting.none) + command = compiler.get_exelist() + args = ['$ARGS'] + depargs + NinjaCommandArg.list(compiler.get_output_args('$out'), Quoting.none) + compiler.get_compile_only_args() + ['$in'] + description = 'Compiling {} object $out'.format(compiler.get_display_language()) + if isinstance(compiler, VisualStudioLikeCompiler): + deps = 'msvc' + depfile = None + else: + deps = 'gcc' + depfile = '$DEPFILE' + self.add_rule(NinjaRule(rule, command, args, description, + rspable=compiler.can_linker_accept_rsp(), + rspfile_quote_style='cl' if (compiler.get_argument_syntax() == 'msvc' or + isinstance(compiler, DmdDCompiler)) else 'gcc', + deps=deps, depfile=depfile)) + + def generate_pch_rule_for(self, langname, compiler): + if langname != 'c' and langname != 'cpp': + return + rule = self.compiler_to_pch_rule_name(compiler) + depargs = compiler.get_dependency_gen_args('$out', '$DEPFILE') + + if isinstance(compiler, VisualStudioLikeCompiler): + output = [] + else: + output = NinjaCommandArg.list(compiler.get_output_args('$out'), Quoting.none) + command = compiler.get_exelist() + ['$ARGS'] + depargs + output + compiler.get_compile_only_args() + ['$in'] + description = 'Precompiling header $in' + if isinstance(compiler, VisualStudioLikeCompiler): + deps = 'msvc' + depfile = None + else: + deps = 'gcc' + depfile = '$DEPFILE' + self.add_rule(NinjaRule(rule, command, [], description, deps=deps, + depfile=depfile)) + + def generate_compile_rules(self): + for for_machine in MachineChoice: + clist = self.environment.coredata.compilers[for_machine] + for langname, compiler in clist.items(): + if compiler.get_id() == 'clang': + self.generate_llvm_ir_compile_rule(compiler) + self.generate_compile_rule_for(langname, compiler) + self.generate_pch_rule_for(langname, compiler) + + def generate_generator_list_rules(self, target): + # CustomTargets have already written their rules and + # CustomTargetIndexes don't actually get generated, so write rules for + # GeneratedLists here + for genlist in target.get_generated_sources(): + if isinstance(genlist, (build.CustomTarget, build.CustomTargetIndex)): + continue + self.generate_genlist_for_target(genlist, target) + + def replace_paths(self, target, args, override_subdir=None): + if override_subdir: + source_target_dir = os.path.join(self.build_to_src, override_subdir) + else: + source_target_dir = self.get_target_source_dir(target) + relout = self.get_target_private_dir(target) + args = [x.replace("@SOURCE_DIR@", self.build_to_src).replace("@BUILD_DIR@", relout) + for x in args] + args = [x.replace("@CURRENT_SOURCE_DIR@", source_target_dir) for x in args] + args = [x.replace("@SOURCE_ROOT@", self.build_to_src).replace("@BUILD_ROOT@", '.') + for x in args] + args = [x.replace('\\', '/') for x in args] + return args + + def generate_genlist_for_target(self, genlist, target): + generator = genlist.get_generator() + subdir = genlist.subdir + exe = generator.get_exe() + exe_arr = self.build_target_to_cmd_array(exe) + infilelist = genlist.get_inputs() + outfilelist = genlist.get_outputs() + extra_dependencies = self.get_custom_target_depend_files(genlist) + for i in range(len(infilelist)): + curfile = infilelist[i] + if len(generator.outputs) == 1: + sole_output = os.path.join(self.get_target_private_dir(target), outfilelist[i]) + else: + sole_output = '{}'.format(curfile) + infilename = curfile.rel_to_builddir(self.build_to_src) + base_args = generator.get_arglist(infilename) + outfiles = genlist.get_outputs_for(curfile) + outfiles = [os.path.join(self.get_target_private_dir(target), of) for of in outfiles] + if generator.depfile is None: + rulename = 'CUSTOM_COMMAND' + args = base_args + else: + rulename = 'CUSTOM_COMMAND_DEP' + depfilename = generator.get_dep_outname(infilename) + depfile = os.path.join(self.get_target_private_dir(target), depfilename) + args = [x.replace('@DEPFILE@', depfile) for x in base_args] + args = [x.replace("@INPUT@", infilename).replace('@OUTPUT@', sole_output) + for x in args] + args = self.replace_outputs(args, self.get_target_private_dir(target), outfilelist) + # We have consumed output files, so drop them from the list of remaining outputs. + if len(generator.outputs) > 1: + outfilelist = outfilelist[len(generator.outputs):] + args = self.replace_paths(target, args, override_subdir=subdir) + cmdlist = exe_arr + self.replace_extra_args(args, genlist) + meson_exe_cmd = self.as_meson_exe_cmdline('generator ' + cmdlist[0], + cmdlist[0], cmdlist[1:], + capture=outfiles[0] if generator.capture else None) + if meson_exe_cmd: + cmdlist = meson_exe_cmd + abs_pdir = os.path.join(self.environment.get_build_dir(), self.get_target_dir(target)) + os.makedirs(abs_pdir, exist_ok=True) + + elem = NinjaBuildElement(self.all_outputs, outfiles, rulename, infilename) + elem.add_dep([self.get_target_filename(x) for x in generator.depends]) + if generator.depfile is not None: + elem.add_item('DEPFILE', depfile) + if len(extra_dependencies) > 0: + elem.add_dep(extra_dependencies) + if len(generator.outputs) == 1: + elem.add_item('DESC', 'Generating {!r}.'.format(sole_output)) + else: + # since there are multiple outputs, we log the source that caused the rebuild + elem.add_item('DESC', 'Generating source from {!r}.'.format(sole_output)) + if isinstance(exe, build.BuildTarget): + elem.add_dep(self.get_target_filename(exe)) + elem.add_item('COMMAND', cmdlist) + self.add_build(elem) + + def scan_fortran_module_outputs(self, target): + """ + Find all module and submodule made available in a Fortran code file. + """ + compiler = None + # TODO other compilers + for lang, c in self.environment.coredata.compilers.host.items(): + if lang == 'fortran': + compiler = c + break + if compiler is None: + self.fortran_deps[target.get_basename()] = {} + return + + modre = re.compile(FORTRAN_MODULE_PAT, re.IGNORECASE) + submodre = re.compile(FORTRAN_SUBMOD_PAT, re.IGNORECASE) + module_files = {} + submodule_files = {} + for s in target.get_sources(): + # FIXME, does not work for Fortran sources generated by + # custom_target() and generator() as those are run after + # the configuration (configure_file() is OK) + if not compiler.can_compile(s): + continue + filename = s.absolute_path(self.environment.get_source_dir(), + self.environment.get_build_dir()) + # Fortran keywords must be ASCII. + with open(filename, encoding='ascii', errors='ignore') as f: + for line in f: + modmatch = modre.match(line) + if modmatch is not None: + modname = modmatch.group(1).lower() + if modname in module_files: + raise InvalidArguments( + 'Namespace collision: module {} defined in ' + 'two files {} and {}.'.format(modname, module_files[modname], s)) + module_files[modname] = s + else: + submodmatch = submodre.match(line) + if submodmatch is not None: + # '_' is arbitrarily used to distinguish submod from mod. + parents = submodmatch.group(1).lower().split(':') + submodname = parents[0] + '_' + submodmatch.group(2).lower() + + if submodname in submodule_files: + raise InvalidArguments( + 'Namespace collision: submodule {} defined in ' + 'two files {} and {}.'.format(submodname, submodule_files[submodname], s)) + submodule_files[submodname] = s + + self.fortran_deps[target.get_basename()] = {**module_files, **submodule_files} + + def get_fortran_deps(self, compiler: FortranCompiler, src: Path, target) -> T.List[str]: + """ + Find all module and submodule needed by a Fortran target + """ + + dirname = Path(self.get_target_private_dir(target)) + tdeps = self.fortran_deps[target.get_basename()] + srcdir = Path(self.source_dir) + + mod_files = _scan_fortran_file_deps(src, srcdir, dirname, tdeps, compiler) + return mod_files + + def get_cross_stdlib_args(self, target, compiler): + if self.environment.machines.matches_build_machine(target.for_machine): + return [] + if not self.environment.properties.host.has_stdlib(compiler.language): + return [] + return compiler.get_no_stdinc_args() + + def get_compile_debugfile_args(self, compiler, target, objfile): + # The way MSVC uses PDB files is documented exactly nowhere so + # the following is what we have been able to decipher via + # reverse engineering. + # + # Each object file gets the path of its PDB file written + # inside it. This can be either the final PDB (for, say, + # foo.exe) or an object pdb (for foo.obj). If the former, then + # each compilation step locks the pdb file for writing, which + # is a bottleneck and object files from one target can not be + # used in a different target. The latter seems to be the + # sensible one (and what Unix does) but there is a catch. If + # you try to use precompiled headers MSVC will error out + # because both source and pch pdbs go in the same file and + # they must be the same. + # + # This means: + # + # - pch files must be compiled anew for every object file (negating + # the entire point of having them in the first place) + # - when using pch, output must go to the target pdb + # + # Since both of these are broken in some way, use the one that + # works for each target. This unfortunately means that you + # can't combine pch and object extraction in a single target. + # + # PDB files also lead to filename collisions. A target foo.exe + # has a corresponding foo.pdb. A shared library foo.dll _also_ + # has pdb file called foo.pdb. So will a static library + # foo.lib, which clobbers both foo.pdb _and_ the dll file's + # export library called foo.lib (by default, currently we name + # them libfoo.a to avoidt this issue). You can give the files + # unique names such as foo_exe.pdb but VC also generates a + # bunch of other files which take their names from the target + # basename (i.e. "foo") and stomp on each other. + # + # CMake solves this problem by doing two things. First of all + # static libraries do not generate pdb files at + # all. Presumably you don't need them and VC is smart enough + # to look up the original data when linking (speculation, not + # tested). The second solution is that you can only have + # target named "foo" as an exe, shared lib _or_ static + # lib. This makes filename collisions not happen. The downside + # is that you can't have an executable foo that uses a shared + # library libfoo.so, which is a common idiom on Unix. + # + # If you feel that the above is completely wrong and all of + # this is actually doable, please send patches. + + if target.has_pch(): + tfilename = self.get_target_filename_abs(target) + return compiler.get_compile_debugfile_args(tfilename, pch=True) + else: + return compiler.get_compile_debugfile_args(objfile, pch=False) + + def get_link_debugfile_name(self, linker, target, outname): + return linker.get_link_debugfile_name(outname) + + def get_link_debugfile_args(self, linker, target, outname): + return linker.get_link_debugfile_args(outname) + + def generate_llvm_ir_compile(self, target, src): + compiler = get_compiler_for_source(target.compilers.values(), src) + commands = compiler.compiler_args() + # Compiler args for compiling this target + commands += compilers.get_base_compile_args(self.environment.coredata.base_options, + compiler) + if isinstance(src, File): + if src.is_built: + src_filename = os.path.join(src.subdir, src.fname) + else: + src_filename = src.fname + elif os.path.isabs(src): + src_filename = os.path.basename(src) + else: + src_filename = src + obj_basename = self.canonicalize_filename(src_filename) + rel_obj = os.path.join(self.get_target_private_dir(target), obj_basename) + rel_obj += '.' + self.environment.machines[target.for_machine].get_object_suffix() + commands += self.get_compile_debugfile_args(compiler, target, rel_obj) + if isinstance(src, File) and src.is_built: + rel_src = src.fname + elif isinstance(src, File): + rel_src = src.rel_to_builddir(self.build_to_src) + else: + raise InvalidArguments('Invalid source type: {!r}'.format(src)) + # Write the Ninja build command + compiler_name = self.get_compiler_rule_name('llvm_ir', compiler.for_machine) + element = NinjaBuildElement(self.all_outputs, rel_obj, compiler_name, rel_src) + element.add_item('ARGS', commands) + self.add_build(element) + return rel_obj + + def get_source_dir_include_args(self, target, compiler): + curdir = target.get_subdir() + tmppath = os.path.normpath(os.path.join(self.build_to_src, curdir)) + return compiler.get_include_args(tmppath, False) + + def get_build_dir_include_args(self, target, compiler): + curdir = target.get_subdir() + if curdir == '': + curdir = '.' + return compiler.get_include_args(curdir, False) + + @lru_cache(maxsize=None) + def get_normpath_target(self, source) -> str: + return os.path.normpath(source) + + def get_custom_target_dir_include_args(self, target, compiler): + custom_target_include_dirs = [] + for i in target.get_generated_sources(): + # Generator output goes into the target private dir which is + # already in the include paths list. Only custom targets have their + # own target build dir. + if not isinstance(i, (build.CustomTarget, build.CustomTargetIndex)): + continue + idir = self.get_normpath_target(self.get_target_dir(i)) + if not idir: + idir = '.' + if idir not in custom_target_include_dirs: + custom_target_include_dirs.append(idir) + incs = [] + for i in custom_target_include_dirs: + incs += compiler.get_include_args(i, False) + return incs + + @lru_cache(maxsize=None) + def generate_inc_dir(self, compiler, d, basedir, is_system): + # Avoid superfluous '/.' at the end of paths when d is '.' + if d not in ('', '.'): + expdir = os.path.normpath(os.path.join(basedir, d)) + else: + expdir = basedir + srctreedir = os.path.normpath(os.path.join(self.build_to_src, expdir)) + sargs = compiler.get_include_args(srctreedir, is_system) + # There may be include dirs where a build directory has not been + # created for some source dir. For example if someone does this: + # + # inc = include_directories('foo/bar/baz') + # + # But never subdir()s into the actual dir. + if os.path.isdir(os.path.join(self.environment.get_build_dir(), expdir)): + bargs = compiler.get_include_args(expdir, is_system) + else: + bargs = [] + return (sargs, bargs) + + @lru_cache(maxsize=None) + def _generate_single_compile(self, target, compiler, is_generated=False): + base_proxy = self.get_base_options_for_target(target) + # Create an empty commands list, and start adding arguments from + # various sources in the order in which they must override each other + commands = compiler.compiler_args() + # Start with symbol visibility. + commands += compiler.gnu_symbol_visibility_args(target.gnu_symbol_visibility) + # Add compiler args for compiling this target derived from 'base' build + # options passed on the command-line, in default_options, etc. + # These have the lowest priority. + commands += compilers.get_base_compile_args(base_proxy, + compiler) + # The code generated by valac is usually crap and has tons of unused + # variables and such, so disable warnings for Vala C sources. + no_warn_args = (is_generated == 'vala') + # Add compiler args and include paths from several sources; defaults, + # build options, external dependencies, etc. + commands += self.generate_basic_compiler_args(target, compiler, no_warn_args) + # Add custom target dirs as includes automatically, but before + # target-specific include directories. + # XXX: Not sure if anyone actually uses this? It can cause problems in + # situations which increase the likelihood for a header name collision, + # such as in subprojects. + commands += self.get_custom_target_dir_include_args(target, compiler) + # Add include dirs from the `include_directories:` kwarg on the target + # and from `include_directories:` of internal deps of the target. + # + # Target include dirs should override internal deps include dirs. + # This is handled in BuildTarget.process_kwargs() + # + # Include dirs from internal deps should override include dirs from + # external deps and must maintain the order in which they are specified. + # Hence, we must reverse the list so that the order is preserved. + for i in reversed(target.get_include_dirs()): + basedir = i.get_curdir() + # We should iterate include dirs in reversed orders because + # -Ipath will add to begin of array. And without reverse + # flags will be added in reversed order. + for d in reversed(i.get_incdirs()): + # Add source subdir first so that the build subdir overrides it + (compile_obj, includeargs) = self.generate_inc_dir(compiler, d, basedir, i.is_system) + commands += compile_obj + commands += includeargs + for d in i.get_extra_build_dirs(): + commands += compiler.get_include_args(d, i.is_system) + # Add per-target compile args, f.ex, `c_args : ['-DFOO']`. We set these + # near the end since these are supposed to override everything else. + commands += self.escape_extra_args(compiler, + target.get_extra_args(compiler.get_language())) + + # D specific additional flags + if compiler.language == 'd': + commands += compiler.get_feature_args(target.d_features, self.build_to_src) + + # Add source dir and build dir. Project-specific and target-specific + # include paths must override per-target compile args, include paths + # from external dependencies, internal dependencies, and from + # per-target `include_directories:` + # + # We prefer headers in the build dir over the source dir since, for + # instance, the user might have an srcdir == builddir Autotools build + # in their source tree. Many projects that are moving to Meson have + # both Meson and Autotools in parallel as part of the transition. + if target.implicit_include_directories: + commands += self.get_source_dir_include_args(target, compiler) + if target.implicit_include_directories: + commands += self.get_build_dir_include_args(target, compiler) + # Finally add the private dir for the target to the include path. This + # must override everything else and must be the final path added. + commands += compiler.get_include_args(self.get_target_private_dir(target), False) + return commands + + def generate_single_compile(self, target, src, is_generated=False, header_deps=None, order_deps=None): + """ + Compiles C/C++, ObjC/ObjC++, Fortran, and D sources + """ + header_deps = header_deps if header_deps is not None else [] + order_deps = order_deps if order_deps is not None else [] + + if isinstance(src, str) and src.endswith('.h'): + raise AssertionError('BUG: sources should not contain headers {!r}'.format(src)) + + compiler = get_compiler_for_source(target.compilers.values(), src) + commands = self._generate_single_compile(target, compiler, is_generated) + commands = commands.compiler.compiler_args(commands) + + # Create introspection information + if is_generated is False: + self.create_target_source_introspection(target, compiler, commands, [src], []) + else: + self.create_target_source_introspection(target, compiler, commands, [], [src]) + + build_dir = self.environment.get_build_dir() + if isinstance(src, File): + rel_src = src.rel_to_builddir(self.build_to_src) + if os.path.isabs(rel_src): + # Source files may not be from the source directory if they originate in source-only libraries, + # so we can't assert that the absolute path is anywhere in particular. + if src.is_built: + assert rel_src.startswith(build_dir) + rel_src = rel_src[len(build_dir) + 1:] + elif is_generated: + raise AssertionError('BUG: broken generated source file handling for {!r}'.format(src)) + else: + raise InvalidArguments('Invalid source type: {!r}'.format(src)) + obj_basename = self.object_filename_from_source(target, src) + rel_obj = os.path.join(self.get_target_private_dir(target), obj_basename) + dep_file = compiler.depfile_for_object(rel_obj) + + # Add MSVC debug file generation compile flags: /Fd /FS + commands += self.get_compile_debugfile_args(compiler, target, rel_obj) + + # PCH handling + if self.environment.coredata.base_options.get('b_pch', False): + commands += self.get_pch_include_args(compiler, target) + pchlist = target.get_pch(compiler.language) + else: + pchlist = [] + if not pchlist: + pch_dep = [] + elif compiler.id == 'intel': + pch_dep = [] + else: + arr = [] + i = os.path.join(self.get_target_private_dir(target), compiler.get_pch_name(pchlist[0])) + arr.append(i) + pch_dep = arr + + compiler_name = self.compiler_to_rule_name(compiler) + extra_deps = [] + if compiler.get_language() == 'fortran': + # Can't read source file to scan for deps if it's generated later + # at build-time. Skip scanning for deps, and just set the module + # outdir argument instead. + # https://github.com/mesonbuild/meson/issues/1348 + if not is_generated: + abs_src = Path(build_dir) / rel_src + extra_deps += self.get_fortran_deps(compiler, abs_src, target) + # Dependency hack. Remove once multiple outputs in Ninja is fixed: + # https://groups.google.com/forum/#!topic/ninja-build/j-2RfBIOd_8 + for modname, srcfile in self.fortran_deps[target.get_basename()].items(): + modfile = os.path.join(self.get_target_private_dir(target), + compiler.module_name_to_filename(modname)) + + if srcfile == src: + crstr = self.get_rule_suffix(target.for_machine) + depelem = NinjaBuildElement(self.all_outputs, modfile, 'FORTRAN_DEP_HACK' + crstr, rel_obj) + self.add_build(depelem) + commands += compiler.get_module_outdir_args(self.get_target_private_dir(target)) + + element = NinjaBuildElement(self.all_outputs, rel_obj, compiler_name, rel_src) + self.add_header_deps(target, element, header_deps) + for d in extra_deps: + element.add_dep(d) + for d in order_deps: + if isinstance(d, File): + d = d.rel_to_builddir(self.build_to_src) + elif not self.has_dir_part(d): + d = os.path.join(self.get_target_private_dir(target), d) + element.add_orderdep(d) + element.add_dep(pch_dep) + for i in self.get_fortran_orderdeps(target, compiler): + element.add_orderdep(i) + element.add_item('DEPFILE', dep_file) + element.add_item('ARGS', commands) + self.add_build(element) + return rel_obj + + def add_header_deps(self, target, ninja_element, header_deps): + for d in header_deps: + if isinstance(d, File): + d = d.rel_to_builddir(self.build_to_src) + elif not self.has_dir_part(d): + d = os.path.join(self.get_target_private_dir(target), d) + ninja_element.add_dep(d) + + def has_dir_part(self, fname): + # FIXME FIXME: The usage of this is a terrible and unreliable hack + if isinstance(fname, File): + return fname.subdir != '' + return has_path_sep(fname) + + # Fortran is a bit weird (again). When you link against a library, just compiling a source file + # requires the mod files that are output when single files are built. To do this right we would need to + # scan all inputs and write out explicit deps for each file. That is stoo slow and too much effort so + # instead just have an ordered dependency on the library. This ensures all required mod files are created. + # The real deps are then detected via dep file generation from the compiler. This breaks on compilers that + # produce incorrect dep files but such is life. + def get_fortran_orderdeps(self, target, compiler): + if compiler.language != 'fortran': + return [] + return [os.path.join(self.get_target_dir(lt), lt.get_filename()) for lt in target.link_targets] + + def generate_msvc_pch_command(self, target, compiler, pch): + header = pch[0] + pchname = compiler.get_pch_name(header) + dst = os.path.join(self.get_target_private_dir(target), pchname) + + commands = [] + commands += self.generate_basic_compiler_args(target, compiler) + + if len(pch) == 1: + # Auto generate PCH. + source = self.create_msvc_pch_implementation(target, compiler.get_language(), pch[0]) + pch_header_dir = os.path.dirname(os.path.join(self.build_to_src, target.get_source_subdir(), header)) + commands += compiler.get_include_args(pch_header_dir, False) + else: + source = os.path.join(self.build_to_src, target.get_source_subdir(), pch[1]) + + just_name = os.path.basename(header) + (objname, pch_args) = compiler.gen_pch_args(just_name, source, dst) + commands += pch_args + commands += self._generate_single_compile(target, compiler) + commands += self.get_compile_debugfile_args(compiler, target, objname) + dep = dst + '.' + compiler.get_depfile_suffix() + return commands, dep, dst, [objname], source + + def generate_gcc_pch_command(self, target, compiler, pch): + commands = self._generate_single_compile(target, compiler) + if pch.split('.')[-1] == 'h' and compiler.language == 'cpp': + # Explicitly compile pch headers as C++. If Clang is invoked in C++ mode, it actually warns if + # this option is not set, and for gcc it also makes sense to use it. + commands += ['-x', 'c++-header'] + dst = os.path.join(self.get_target_private_dir(target), + os.path.basename(pch) + '.' + compiler.get_pch_suffix()) + dep = dst + '.' + compiler.get_depfile_suffix() + return commands, dep, dst, [] # Gcc does not create an object file during pch generation. + + def generate_pch(self, target, header_deps=None): + header_deps = header_deps if header_deps is not None else [] + pch_objects = [] + for lang in ['c', 'cpp']: + pch = target.get_pch(lang) + if not pch: + continue + if not has_path_sep(pch[0]) or not has_path_sep(pch[-1]): + msg = 'Precompiled header of {!r} must not be in the same ' \ + 'directory as source, please put it in a subdirectory.' \ + ''.format(target.get_basename()) + raise InvalidArguments(msg) + compiler = target.compilers[lang] + if isinstance(compiler, VisualStudioLikeCompiler): + (commands, dep, dst, objs, src) = self.generate_msvc_pch_command(target, compiler, pch) + extradep = os.path.join(self.build_to_src, target.get_source_subdir(), pch[0]) + elif compiler.id == 'intel': + # Intel generates on target generation + continue + else: + src = os.path.join(self.build_to_src, target.get_source_subdir(), pch[0]) + (commands, dep, dst, objs) = self.generate_gcc_pch_command(target, compiler, pch[0]) + extradep = None + pch_objects += objs + rulename = self.compiler_to_pch_rule_name(compiler) + elem = NinjaBuildElement(self.all_outputs, dst, rulename, src) + if extradep is not None: + elem.add_dep(extradep) + self.add_header_deps(target, elem, header_deps) + elem.add_item('ARGS', commands) + elem.add_item('DEPFILE', dep) + self.add_build(elem) + return pch_objects + + def get_target_shsym_filename(self, target): + # Always name the .symbols file after the primary build output because it always exists + targetdir = self.get_target_private_dir(target) + return os.path.join(targetdir, target.get_filename() + '.symbols') + + def generate_shsym(self, target): + target_file = self.get_target_filename(target) + symname = self.get_target_shsym_filename(target) + elem = NinjaBuildElement(self.all_outputs, symname, 'SHSYM', target_file) + # The library we will actually link to, which is an import library on Windows (not the DLL) + elem.add_item('IMPLIB', self.get_target_filename_for_linking(target)) + if self.environment.is_cross_build(): + elem.add_item('CROSS', '--cross-host=' + self.environment.machines[target.for_machine].system) + self.add_build(elem) + + def get_cross_stdlib_link_args(self, target, linker): + if isinstance(target, build.StaticLibrary) or \ + self.environment.machines.matches_build_machine(target.for_machine): + return [] + if not self.environment.properties.host.has_stdlib(linker.language): + return [] + return linker.get_no_stdlib_link_args() + + def get_import_filename(self, target): + return os.path.join(self.get_target_dir(target), target.import_filename) + + def get_target_type_link_args(self, target, linker): + commands = [] + if isinstance(target, build.Executable): + # Currently only used with the Swift compiler to add '-emit-executable' + commands += linker.get_std_exe_link_args() + # If export_dynamic, add the appropriate linker arguments + if target.export_dynamic: + commands += linker.gen_export_dynamic_link_args(self.environment) + # If implib, and that's significant on this platform (i.e. Windows using either GCC or Visual Studio) + if target.import_filename: + commands += linker.gen_import_library_args(self.get_import_filename(target)) + if target.pie: + commands += linker.get_pie_link_args() + elif isinstance(target, build.SharedLibrary): + if isinstance(target, build.SharedModule): + options = self.environment.coredata.base_options + commands += linker.get_std_shared_module_link_args(options) + else: + commands += linker.get_std_shared_lib_link_args() + # All shared libraries are PIC + commands += linker.get_pic_args() + # Add -Wl,-soname arguments on Linux, -install_name on OS X + commands += linker.get_soname_args( + self.environment, target.prefix, target.name, target.suffix, + target.soversion, target.darwin_versions, + isinstance(target, build.SharedModule)) + # This is only visited when building for Windows using either GCC or Visual Studio + if target.vs_module_defs and hasattr(linker, 'gen_vs_module_defs_args'): + commands += linker.gen_vs_module_defs_args(target.vs_module_defs.rel_to_builddir(self.build_to_src)) + # This is only visited when building for Windows using either GCC or Visual Studio + if target.import_filename: + commands += linker.gen_import_library_args(self.get_import_filename(target)) + elif isinstance(target, build.StaticLibrary): + commands += linker.get_std_link_args() + else: + raise RuntimeError('Unknown build target type.') + return commands + + def get_target_type_link_args_post_dependencies(self, target, linker): + commands = [] + if isinstance(target, build.Executable): + # If gui_app is significant on this platform, add the appropriate linker arguments. + # Unfortunately this can't be done in get_target_type_link_args, because some misguided + # libraries (such as SDL2) add -mwindows to their link flags. + commands += linker.get_gui_app_args(target.gui_app) + return commands + + def get_link_whole_args(self, linker, target): + target_args = self.build_target_link_arguments(linker, target.link_whole_targets) + return linker.get_link_whole_for(target_args) if len(target_args) else [] + + @lru_cache(maxsize=None) + def guess_library_absolute_path(self, linker, libname, search_dirs, patterns): + for d in search_dirs: + for p in patterns: + trial = CCompiler._get_trials_from_pattern(p, d, libname) + if not trial: + continue + trial = CCompiler._get_file_from_list(self.environment, trial) + if not trial: + continue + # Return the first result + return trial + + def guess_external_link_dependencies(self, linker, target, commands, internal): + # Ideally the linker would generate dependency information that could be used. + # But that has 2 problems: + # * currently ld can not create dependency information in a way that ninja can use: + # https://sourceware.org/bugzilla/show_bug.cgi?id=22843 + # * Meson optimizes libraries from the same build using the symbol extractor. + # Just letting ninja use ld generated dependencies would undo this optimization. + search_dirs = OrderedSet() + libs = OrderedSet() + absolute_libs = [] + + build_dir = self.environment.get_build_dir() + # the following loop sometimes consumes two items from command in one pass + it = iter(linker.native_args_to_unix(commands)) + for item in it: + if item in internal and not item.startswith('-'): + continue + + if item.startswith('-L'): + if len(item) > 2: + path = item[2:] + else: + try: + path = next(it) + except StopIteration: + mlog.warning("Generated linker command has -L argument without following path") + break + if not os.path.isabs(path): + path = os.path.join(build_dir, path) + search_dirs.add(path) + elif item.startswith('-l'): + if len(item) > 2: + lib = item[2:] + else: + try: + lib = next(it) + except StopIteration: + mlog.warning("Generated linker command has '-l' argument without following library name") + break + libs.add(lib) + elif os.path.isabs(item) and self.environment.is_library(item) and os.path.isfile(item): + absolute_libs.append(item) + + guessed_dependencies = [] + # TODO The get_library_naming requirement currently excludes link targets that use d or fortran as their main linker + if hasattr(linker, 'get_library_naming'): + search_dirs = tuple(search_dirs) + tuple(linker.get_library_dirs(self.environment)) + static_patterns = linker.get_library_naming(self.environment, LibType.STATIC, strict=True) + shared_patterns = linker.get_library_naming(self.environment, LibType.SHARED, strict=True) + for libname in libs: + # be conservative and record most likely shared and static resolution, because we don't know exactly + # which one the linker will prefer + staticlibs = self.guess_library_absolute_path(linker, libname, + search_dirs, static_patterns) + sharedlibs = self.guess_library_absolute_path(linker, libname, + search_dirs, shared_patterns) + if staticlibs: + guessed_dependencies.append(staticlibs.resolve().as_posix()) + if sharedlibs: + guessed_dependencies.append(sharedlibs.resolve().as_posix()) + + return guessed_dependencies + absolute_libs + + def generate_link(self, target, outname, obj_list, linker, extra_args=None, stdlib_args=None): + extra_args = extra_args if extra_args is not None else [] + stdlib_args = stdlib_args if stdlib_args is not None else [] + implicit_outs = [] + if isinstance(target, build.StaticLibrary): + linker_base = 'STATIC' + else: + linker_base = linker.get_language() # Fixme. + if isinstance(target, build.SharedLibrary): + self.generate_shsym(target) + crstr = self.get_rule_suffix(target.for_machine) + linker_rule = linker_base + '_LINKER' + crstr + # Create an empty commands list, and start adding link arguments from + # various sources in the order in which they must override each other + # starting from hard-coded defaults followed by build options and so on. + # + # Once all the linker options have been passed, we will start passing + # libraries and library paths from internal and external sources. + commands = linker.compiler_args() + # First, the trivial ones that are impossible to override. + # + # Add linker args for linking this target derived from 'base' build + # options passed on the command-line, in default_options, etc. + # These have the lowest priority. + if isinstance(target, build.StaticLibrary): + commands += linker.get_base_link_args(self.get_base_options_for_target(target)) + else: + commands += compilers.get_base_link_args(self.get_base_options_for_target(target), + linker, + isinstance(target, build.SharedModule)) + # Add -nostdlib if needed; can't be overridden + commands += self.get_cross_stdlib_link_args(target, linker) + # Add things like /NOLOGO; usually can't be overridden + commands += linker.get_linker_always_args() + # Add buildtype linker args: optimization level, etc. + commands += linker.get_buildtype_linker_args(self.get_option_for_target('buildtype', target)) + # Add /DEBUG and the pdb filename when using MSVC + if self.get_option_for_target('debug', target): + commands += self.get_link_debugfile_args(linker, target, outname) + debugfile = self.get_link_debugfile_name(linker, target, outname) + if debugfile is not None: + implicit_outs += [debugfile] + # Add link args specific to this BuildTarget type, such as soname args, + # PIC, import library generation, etc. + commands += self.get_target_type_link_args(target, linker) + # Archives that are copied wholesale in the result. Must be before any + # other link targets so missing symbols from whole archives are found in those. + if not isinstance(target, build.StaticLibrary): + commands += self.get_link_whole_args(linker, target) + + if not isinstance(target, build.StaticLibrary): + # Add link args added using add_project_link_arguments() + commands += self.build.get_project_link_args(linker, target.subproject, target.for_machine) + # Add link args added using add_global_link_arguments() + # These override per-project link arguments + commands += self.build.get_global_link_args(linker, target.for_machine) + # Link args added from the env: LDFLAGS. We want these to override + # all the defaults but not the per-target link args. + commands += self.environment.coredata.get_external_link_args(target.for_machine, linker.get_language()) + + # Now we will add libraries and library paths from various sources + + # Add link args to link to all internal libraries (link_with:) and + # internal dependencies needed by this target. + if linker_base == 'STATIC': + # Link arguments of static libraries are not put in the command + # line of the library. They are instead appended to the command + # line where the static library is used. + dependencies = [] + else: + dependencies = target.get_dependencies() + internal = self.build_target_link_arguments(linker, dependencies) + commands += internal + # Only non-static built targets need link args and link dependencies + if not isinstance(target, build.StaticLibrary): + # For 'automagic' deps: Boost and GTest. Also dependency('threads'). + # pkg-config puts the thread flags itself via `Cflags:` + + commands += linker.get_target_link_args(target) + # External deps must be last because target link libraries may depend on them. + for dep in target.get_external_deps(): + # Extend without reordering or de-dup to preserve `-L -l` sets + # https://github.com/mesonbuild/meson/issues/1718 + commands.extend_preserving_lflags(linker.get_dependency_link_args(dep)) + for d in target.get_dependencies(): + if isinstance(d, build.StaticLibrary): + for dep in d.get_external_deps(): + commands.extend_preserving_lflags(linker.get_dependency_link_args(dep)) + + # Add link args specific to this BuildTarget type that must not be overridden by dependencies + commands += self.get_target_type_link_args_post_dependencies(target, linker) + + # Add link args for c_* or cpp_* build options. Currently this only + # adds c_winlibs and cpp_winlibs when building for Windows. This needs + # to be after all internal and external libraries so that unresolved + # symbols from those can be found here. This is needed when the + # *_winlibs that we want to link to are static mingw64 libraries. + if hasattr(linker, 'get_language'): + # The static linker doesn't know what language it is building, so we + # don't know what option. Fortunately, it doesn't care to see the + # language-specific options either. + # + # We shouldn't check whether we are making a static library, because + # in the LTO case we do use a real compiler here. + commands += linker.get_option_link_args(self.environment.coredata.compiler_options[target.for_machine][linker.get_language()]) + + dep_targets = [] + dep_targets.extend(self.guess_external_link_dependencies(linker, target, commands, internal)) + + # Set runtime-paths so we can run executables without needing to set + # LD_LIBRARY_PATH, etc in the environment. Doesn't work on Windows. + if has_path_sep(target.name): + # Target names really should not have slashes in them, but + # unfortunately we did not check for that and some downstream projects + # now have them. Once slashes are forbidden, remove this bit. + target_slashname_workaround_dir = os.path.join( + os.path.dirname(target.name), + self.get_target_dir(target)) + else: + target_slashname_workaround_dir = self.get_target_dir(target) + (rpath_args, target.rpath_dirs_to_remove) = \ + linker.build_rpath_args(self.environment, + self.environment.get_build_dir(), + target_slashname_workaround_dir, + self.determine_rpath_dirs(target), + target.build_rpath, + target.install_rpath) + commands += rpath_args + # Add libraries generated by custom targets + custom_target_libraries = self.get_custom_target_provided_libraries(target) + commands += extra_args + commands += custom_target_libraries + commands += stdlib_args # Standard library arguments go last, because they never depend on anything. + dep_targets.extend([self.get_dependency_filename(t) for t in dependencies]) + dep_targets.extend([self.get_dependency_filename(t) + for t in target.link_depends]) + elem = NinjaBuildElement(self.all_outputs, outname, linker_rule, obj_list, implicit_outs=implicit_outs) + elem.add_dep(dep_targets + custom_target_libraries) + elem.add_item('LINK_ARGS', commands) + return elem + + def get_dependency_filename(self, t): + if isinstance(t, build.SharedLibrary): + return self.get_target_shsym_filename(t) + elif isinstance(t, mesonlib.File): + if t.is_built: + return t.relative_name() + else: + return t.absolute_path(self.environment.get_source_dir(), + self.environment.get_build_dir()) + return self.get_target_filename(t) + + def generate_shlib_aliases(self, target, outdir): + aliases = target.get_aliases() + for alias, to in aliases.items(): + aliasfile = os.path.join(self.environment.get_build_dir(), outdir, alias) + try: + os.remove(aliasfile) + except Exception: + pass + try: + os.symlink(to, aliasfile) + except NotImplementedError: + mlog.debug("Library versioning disabled because symlinks are not supported.") + except OSError: + mlog.debug("Library versioning disabled because we do not have symlink creation privileges.") + + def generate_custom_target_clean(self, trees): + e = NinjaBuildElement(self.all_outputs, 'meson-clean-ctlist', 'CUSTOM_COMMAND', 'PHONY') + d = CleanTrees(self.environment.get_build_dir(), trees) + d_file = os.path.join(self.environment.get_scratch_dir(), 'cleantrees.dat') + e.add_item('COMMAND', self.environment.get_build_command() + ['--internal', 'cleantrees', d_file]) + e.add_item('description', 'Cleaning custom target directories') + self.add_build(e) + # Alias that runs the target defined above + self.create_target_alias('meson-clean-ctlist') + # Write out the data file passed to the script + with open(d_file, 'wb') as ofile: + pickle.dump(d, ofile) + return 'clean-ctlist' + + def generate_gcov_clean(self): + gcno_elem = NinjaBuildElement(self.all_outputs, 'meson-clean-gcno', 'CUSTOM_COMMAND', 'PHONY') + gcno_elem.add_item('COMMAND', mesonlib.meson_command + ['--internal', 'delwithsuffix', '.', 'gcno']) + gcno_elem.add_item('description', 'Deleting gcno files') + self.add_build(gcno_elem) + # Alias that runs the target defined above + self.create_target_alias('meson-clean-gcno') + + gcda_elem = NinjaBuildElement(self.all_outputs, 'meson-clean-gcda', 'CUSTOM_COMMAND', 'PHONY') + gcda_elem.add_item('COMMAND', mesonlib.meson_command + ['--internal', 'delwithsuffix', '.', 'gcda']) + gcda_elem.add_item('description', 'Deleting gcda files') + self.add_build(gcda_elem) + # Alias that runs the target defined above + self.create_target_alias('meson-clean-gcda') + + def get_user_option_args(self): + cmds = [] + for (k, v) in self.environment.coredata.user_options.items(): + cmds.append('-D' + k + '=' + (v.value if isinstance(v.value, str) else str(v.value).lower())) + # The order of these arguments must be the same between runs of Meson + # to ensure reproducible output. The order we pass them shouldn't + # affect behavior in any other way. + return sorted(cmds) + + def generate_dist(self): + elem = NinjaBuildElement(self.all_outputs, 'meson-dist', 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('DESC', 'Creating source packages') + elem.add_item('COMMAND', self.environment.get_build_command() + ['dist']) + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the target defined above + self.create_target_alias('meson-dist') + + def generate_scanbuild(self): + if not environment.detect_scanbuild(): + return + if ('', 'scan-build') in self.build.run_target_names: + return + cmd = self.environment.get_build_command() + \ + ['--internal', 'scanbuild', self.environment.source_dir, self.environment.build_dir] + \ + self.environment.get_build_command() + self.get_user_option_args() + elem = NinjaBuildElement(self.all_outputs, 'meson-scan-build', 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('COMMAND', cmd) + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the target defined above + self.create_target_alias('meson-scan-build') + + def generate_clangtool(self, name): + target_name = 'clang-' + name + if not os.path.exists(os.path.join(self.environment.source_dir, '.clang-' + name)) and \ + not os.path.exists(os.path.join(self.environment.source_dir, '_clang-' + name)): + return + if target_name in self.all_outputs: + return + if ('', target_name) in self.build.run_target_names: + return + cmd = self.environment.get_build_command() + \ + ['--internal', 'clang' + name, self.environment.source_dir, self.environment.build_dir] + elem = NinjaBuildElement(self.all_outputs, 'meson-' + target_name, 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('COMMAND', cmd) + elem.add_item('pool', 'console') + self.add_build(elem) + self.create_target_alias('meson-' + target_name) + + def generate_clangformat(self): + if not environment.detect_clangformat(): + return + self.generate_clangtool('format') + + def generate_clangtidy(self): + import shutil + if not shutil.which('clang-tidy'): + return + self.generate_clangtool('tidy') + + def generate_tags(self, tool, target_name): + import shutil + if not shutil.which(tool): + return + if ('', target_name) in self.build.run_target_names: + return + if target_name in self.all_outputs: + return + cmd = self.environment.get_build_command() + \ + ['--internal', 'tags', tool, self.environment.source_dir] + elem = NinjaBuildElement(self.all_outputs, 'meson-' + target_name, 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('COMMAND', cmd) + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the target defined above + self.create_target_alias('meson-' + target_name) + + # For things like scan-build and other helper tools we might have. + def generate_utils(self): + self.generate_scanbuild() + self.generate_clangformat() + self.generate_clangtidy() + self.generate_tags('etags', 'TAGS') + self.generate_tags('ctags', 'ctags') + self.generate_tags('cscope', 'cscope') + cmd = self.environment.get_build_command() + ['--internal', 'uninstall'] + elem = NinjaBuildElement(self.all_outputs, 'meson-uninstall', 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('COMMAND', cmd) + elem.add_item('pool', 'console') + self.add_build(elem) + # Alias that runs the target defined above + self.create_target_alias('meson-uninstall') + + def generate_ending(self): + targetlist = [] + for t in self.get_build_by_default_targets().values(): + # Add the first output of each target to the 'all' target so that + # they are all built + targetlist.append(os.path.join(self.get_target_dir(t), t.get_outputs()[0])) + + elem = NinjaBuildElement(self.all_outputs, 'all', 'phony', targetlist) + self.add_build(elem) + + elem = NinjaBuildElement(self.all_outputs, 'meson-clean', 'CUSTOM_COMMAND', 'PHONY') + elem.add_item('COMMAND', self.ninja_command + ['-t', 'clean']) + elem.add_item('description', 'Cleaning') + # Alias that runs the above-defined meson-clean target + self.create_target_alias('meson-clean') + + # If we have custom targets in this project, add all their outputs to + # the list that is passed to the `cleantrees.py` script. The script + # will manually delete all custom_target outputs that are directories + # instead of files. This is needed because on platforms other than + # Windows, Ninja only deletes directories while cleaning if they are + # empty. https://github.com/mesonbuild/meson/issues/1220 + ctlist = [] + for t in self.build.get_targets().values(): + if isinstance(t, build.CustomTarget): + # Create a list of all custom target outputs + for o in t.get_outputs(): + ctlist.append(os.path.join(self.get_target_dir(t), o)) + if ctlist: + elem.add_dep(self.generate_custom_target_clean(ctlist)) + + if 'b_coverage' in self.environment.coredata.base_options and \ + self.environment.coredata.base_options['b_coverage'].value: + self.generate_gcov_clean() + elem.add_dep('clean-gcda') + elem.add_dep('clean-gcno') + self.add_build(elem) + + deps = self.get_regen_filelist() + elem = NinjaBuildElement(self.all_outputs, 'build.ninja', 'REGENERATE_BUILD', deps) + elem.add_item('pool', 'console') + self.add_build(elem) + + elem = NinjaBuildElement(self.all_outputs, 'reconfigure', 'REGENERATE_BUILD', 'PHONY') + elem.add_item('pool', 'console') + self.add_build(elem) + + elem = NinjaBuildElement(self.all_outputs, deps, 'phony', '') + self.add_build(elem) + + def get_introspection_data(self, target_id, target): + if target_id not in self.introspection_data or len(self.introspection_data[target_id]) == 0: + return super().get_introspection_data(target_id, target) + + result = [] + for i in self.introspection_data[target_id].values(): + result += [i] + return result + +def load(build_dir): + filename = os.path.join(build_dir, 'meson-private', 'install.dat') + with open(filename, 'rb') as f: + obj = pickle.load(f) + return obj + + +def _scan_fortran_file_deps(src: Path, srcdir: Path, dirname: Path, tdeps, compiler) -> T.List[str]: + """ + scan a Fortran file for dependencies. Needs to be distinct from target + to allow for recursion induced by `include` statements.er + + It makes a number of assumptions, including + + * `use`, `module`, `submodule` name is not on a continuation line + + Regex + ----- + + * `incre` works for `#include "foo.f90"` and `include "foo.f90"` + * `usere` works for legacy and Fortran 2003 `use` statements + * `submodre` is for Fortran >= 2008 `submodule` + """ + + incre = re.compile(FORTRAN_INCLUDE_PAT, re.IGNORECASE) + usere = re.compile(FORTRAN_USE_PAT, re.IGNORECASE) + submodre = re.compile(FORTRAN_SUBMOD_PAT, re.IGNORECASE) + + mod_files = [] + src = Path(src) + with src.open(encoding='ascii', errors='ignore') as f: + for line in f: + # included files + incmatch = incre.match(line) + if incmatch is not None: + incfile = srcdir / incmatch.group(1) + if incfile.suffix.lower()[1:] in compiler.file_suffixes: + mod_files.extend(_scan_fortran_file_deps(incfile, srcdir, dirname, tdeps, compiler)) + # modules + usematch = usere.match(line) + if usematch is not None: + usename = usematch.group(1).lower() + if usename == 'intrinsic': # this keeps the regex simpler + continue + if usename not in tdeps: + # The module is not provided by any source file. This + # is due to: + # a) missing file/typo/etc + # b) using a module provided by the compiler, such as + # OpenMP + # There's no easy way to tell which is which (that I + # know of) so just ignore this and go on. Ideally we + # would print a warning message to the user but this is + # a common occurrence, which would lead to lots of + # distracting noise. + continue + srcfile = srcdir / tdeps[usename].fname # type: Path + if not srcfile.is_file(): + if srcfile.name != src.name: # generated source file + pass + else: # subproject + continue + elif srcfile.samefile(src): # self-reference + continue + + mod_name = compiler.module_name_to_filename(usename) + mod_files.append(str(dirname / mod_name)) + else: # submodules + submodmatch = submodre.match(line) + if submodmatch is not None: + parents = submodmatch.group(1).lower().split(':') + assert len(parents) in (1, 2), ( + 'submodule ancestry must be specified as' + ' ancestor:parent but Meson found {}'.format(parents)) + + ancestor_child = '_'.join(parents) + if ancestor_child not in tdeps: + raise MesonException("submodule {} relies on ancestor module {} that was not found.".format(submodmatch.group(2).lower(), ancestor_child.split('_')[0])) + submodsrcfile = srcdir / tdeps[ancestor_child].fname # type: Path + if not submodsrcfile.is_file(): + if submodsrcfile.name != src.name: # generated source file + pass + else: # subproject + continue + elif submodsrcfile.samefile(src): # self-reference + continue + mod_name = compiler.module_name_to_filename(ancestor_child) + mod_files.append(str(dirname / mod_name)) + return mod_files diff --git a/meson/mesonbuild/backend/vs2010backend.py b/meson/mesonbuild/backend/vs2010backend.py new file mode 100644 index 000000000..f282d02c0 --- /dev/null +++ b/meson/mesonbuild/backend/vs2010backend.py @@ -0,0 +1,1517 @@ +# Copyright 2014-2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +import pickle +import xml.dom.minidom +import xml.etree.ElementTree as ET +import uuid +import typing as T +from pathlib import Path, PurePath + +from . import backends +from .. import build +from .. import dependencies +from .. import mlog +from .. import compilers +from ..interpreter import Interpreter +from ..mesonlib import ( + MesonException, File, python_command, replace_if_different +) +from ..environment import Environment, build_filename + +def autodetect_vs_version(build: T.Optional[build.Build], interpreter: T.Optional[Interpreter]): + vs_version = os.getenv('VisualStudioVersion', None) + vs_install_dir = os.getenv('VSINSTALLDIR', None) + if not vs_install_dir: + raise MesonException('Could not detect Visual Studio: Environment variable VSINSTALLDIR is not set!\n' + 'Are you running meson from the Visual Studio Developer Command Prompt?') + # VisualStudioVersion is set since Visual Studio 12.0, but sometimes + # vcvarsall.bat doesn't set it, so also use VSINSTALLDIR + if vs_version == '14.0' or 'Visual Studio 14' in vs_install_dir: + from mesonbuild.backend.vs2015backend import Vs2015Backend + return Vs2015Backend(build, interpreter) + if vs_version == '15.0' or 'Visual Studio 17' in vs_install_dir or \ + 'Visual Studio\\2017' in vs_install_dir: + from mesonbuild.backend.vs2017backend import Vs2017Backend + return Vs2017Backend(build, interpreter) + if vs_version == '16.0' or 'Visual Studio 19' in vs_install_dir or \ + 'Visual Studio\\2019' in vs_install_dir: + from mesonbuild.backend.vs2019backend import Vs2019Backend + return Vs2019Backend(build, interpreter) + if 'Visual Studio 10.0' in vs_install_dir: + return Vs2010Backend(build, interpreter) + raise MesonException('Could not detect Visual Studio using VisualStudioVersion: {!r} or VSINSTALLDIR: {!r}!\n' + 'Please specify the exact backend to use.'.format(vs_version, vs_install_dir)) + +def split_o_flags_args(args): + """ + Splits any /O args and returns them. Does not take care of flags overriding + previous ones. Skips non-O flag arguments. + + ['/Ox', '/Ob1'] returns ['/Ox', '/Ob1'] + ['/Oxj', '/MP'] returns ['/Ox', '/Oj'] + """ + o_flags = [] + for arg in args: + if not arg.startswith('/O'): + continue + flags = list(arg[2:]) + # Assume that this one can't be clumped with the others since it takes + # an argument itself + if 'b' in flags: + o_flags.append(arg) + else: + o_flags += ['/O' + f for f in flags] + return o_flags + +def generate_guid_from_path(path, path_type): + return str(uuid.uuid5(uuid.NAMESPACE_URL, 'meson-vs-' + path_type + ':' + str(path))).upper() + +class RegenInfo: + def __init__(self, source_dir, build_dir, depfiles): + self.source_dir = source_dir + self.build_dir = build_dir + self.depfiles = depfiles + +class Vs2010Backend(backends.Backend): + def __init__(self, build: T.Optional[build.Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'vs2010' + self.project_file_version = '10.0.30319.1' + self.platform_toolset = None + self.vs_version = '2010' + self.windows_target_platform_version = None + self.subdirs = {} + self.handled_target_deps = {} + + def get_target_private_dir(self, target): + return os.path.join(self.get_target_dir(target), target.get_id()) + + def generate_custom_generator_commands(self, target, parent_node): + generator_output_files = [] + custom_target_include_dirs = [] + custom_target_output_files = [] + target_private_dir = self.relpath(self.get_target_private_dir(target), self.get_target_dir(target)) + down = self.target_to_build_root(target) + for genlist in target.get_generated_sources(): + if isinstance(genlist, (build.CustomTarget, build.CustomTargetIndex)): + for i in genlist.get_outputs(): + # Path to the generated source from the current vcxproj dir via the build root + ipath = os.path.join(down, self.get_target_dir(genlist), i) + custom_target_output_files.append(ipath) + idir = self.relpath(self.get_target_dir(genlist), self.get_target_dir(target)) + if idir not in custom_target_include_dirs: + custom_target_include_dirs.append(idir) + else: + generator = genlist.get_generator() + exe = generator.get_exe() + infilelist = genlist.get_inputs() + outfilelist = genlist.get_outputs() + source_dir = os.path.join(down, self.build_to_src, genlist.subdir) + exe_arr = self.build_target_to_cmd_array(exe) + idgroup = ET.SubElement(parent_node, 'ItemGroup') + for i in range(len(infilelist)): + if len(infilelist) == len(outfilelist): + sole_output = os.path.join(target_private_dir, outfilelist[i]) + else: + sole_output = '' + curfile = infilelist[i] + infilename = os.path.join(down, curfile.rel_to_builddir(self.build_to_src)) + deps = self.get_custom_target_depend_files(genlist, True) + base_args = generator.get_arglist(infilename) + outfiles_rel = genlist.get_outputs_for(curfile) + outfiles = [os.path.join(target_private_dir, of) for of in outfiles_rel] + generator_output_files += outfiles + args = [x.replace("@INPUT@", infilename).replace('@OUTPUT@', sole_output) + for x in base_args] + args = self.replace_outputs(args, target_private_dir, outfiles_rel) + args = [x.replace("@SOURCE_DIR@", self.environment.get_source_dir()) + .replace("@BUILD_DIR@", target_private_dir) + for x in args] + args = [x.replace("@CURRENT_SOURCE_DIR@", source_dir) for x in args] + args = [x.replace("@SOURCE_ROOT@", self.environment.get_source_dir()) + .replace("@BUILD_ROOT@", self.environment.get_build_dir()) + for x in args] + args = [x.replace('\\', '/') for x in args] + cmd = exe_arr + self.replace_extra_args(args, genlist) + # Always use a wrapper because MSBuild eats random characters when + # there are many arguments. + tdir_abs = os.path.join(self.environment.get_build_dir(), self.get_target_dir(target)) + cmd = self.as_meson_exe_cmdline( + 'generator ' + cmd[0], + cmd[0], + cmd[1:], + workdir=tdir_abs, + capture=outfiles[0] if generator.capture else None, + force_serialize=True + ) + deps = cmd[-1:] + deps + abs_pdir = os.path.join(self.environment.get_build_dir(), self.get_target_dir(target)) + os.makedirs(abs_pdir, exist_ok=True) + cbs = ET.SubElement(idgroup, 'CustomBuild', Include=infilename) + ET.SubElement(cbs, 'Command').text = ' '.join(self.quote_arguments(cmd)) + ET.SubElement(cbs, 'Outputs').text = ';'.join(outfiles) + ET.SubElement(cbs, 'AdditionalInputs').text = ';'.join(deps) + return generator_output_files, custom_target_output_files, custom_target_include_dirs + + def generate(self): + target_machine = self.interpreter.builtin['target_machine'].cpu_family_method(None, None) + if target_machine == '64' or target_machine == 'x86_64': + # amd64 or x86_64 + self.platform = 'x64' + elif target_machine == 'x86': + # x86 + self.platform = 'Win32' + elif target_machine == 'aarch64' or target_machine == 'arm64': + self.platform = 'arm64' + elif 'arm' in target_machine.lower(): + self.platform = 'ARM' + else: + raise MesonException('Unsupported Visual Studio platform: ' + target_machine) + self.buildtype = self.environment.coredata.get_builtin_option('buildtype') + sln_filename = os.path.join(self.environment.get_build_dir(), self.build.project_name + '.sln') + projlist = self.generate_projects() + self.gen_testproj('RUN_TESTS', os.path.join(self.environment.get_build_dir(), 'RUN_TESTS.vcxproj')) + self.gen_installproj('RUN_INSTALL', os.path.join(self.environment.get_build_dir(), 'RUN_INSTALL.vcxproj')) + self.gen_regenproj('REGEN', os.path.join(self.environment.get_build_dir(), 'REGEN.vcxproj')) + self.generate_solution(sln_filename, projlist) + self.generate_regen_info() + Vs2010Backend.touch_regen_timestamp(self.environment.get_build_dir()) + + @staticmethod + def get_regen_stampfile(build_dir): + return os.path.join(os.path.join(build_dir, Environment.private_dir), 'regen.stamp') + + @staticmethod + def touch_regen_timestamp(build_dir): + with open(Vs2010Backend.get_regen_stampfile(build_dir), 'w'): + pass + + def generate_regen_info(self): + deps = self.get_regen_filelist() + regeninfo = RegenInfo(self.environment.get_source_dir(), + self.environment.get_build_dir(), + deps) + filename = os.path.join(self.environment.get_scratch_dir(), + 'regeninfo.dump') + with open(filename, 'wb') as f: + pickle.dump(regeninfo, f) + + def get_vcvars_command(self): + has_arch_values = 'VSCMD_ARG_TGT_ARCH' in os.environ and 'VSCMD_ARG_HOST_ARCH' in os.environ + + # Use vcvarsall.bat if we found it. + if 'VCINSTALLDIR' in os.environ: + vs_version = os.environ['VisualStudioVersion'] \ + if 'VisualStudioVersion' in os.environ else None + relative_path = 'Auxiliary\\Build\\' if vs_version >= '15.0' else '' + script_path = os.environ['VCINSTALLDIR'] + relative_path + 'vcvarsall.bat' + if os.path.exists(script_path): + if has_arch_values: + target_arch = os.environ['VSCMD_ARG_TGT_ARCH'] + host_arch = os.environ['VSCMD_ARG_HOST_ARCH'] + else: + target_arch = os.environ.get('Platform', 'x86') + host_arch = target_arch + arch = host_arch + '_' + target_arch if host_arch != target_arch else target_arch + return '"%s" %s' % (script_path, arch) + + # Otherwise try the VS2017 Developer Command Prompt. + if 'VS150COMNTOOLS' in os.environ and has_arch_values: + script_path = os.environ['VS150COMNTOOLS'] + 'VsDevCmd.bat' + if os.path.exists(script_path): + return '"%s" -arch=%s -host_arch=%s' % \ + (script_path, os.environ['VSCMD_ARG_TGT_ARCH'], os.environ['VSCMD_ARG_HOST_ARCH']) + return '' + + def get_obj_target_deps(self, obj_list): + result = {} + for o in obj_list: + if isinstance(o, build.ExtractedObjects): + result[o.target.get_id()] = o.target + return result.items() + + def get_target_deps(self, t, recursive=False): + all_deps = {} + for target in t.values(): + if isinstance(target, build.CustomTarget): + for d in target.get_target_dependencies(): + all_deps[d.get_id()] = d + elif isinstance(target, build.RunTarget): + for d in [target.command] + target.args: + if isinstance(d, (build.BuildTarget, build.CustomTarget)): + all_deps[d.get_id()] = d + elif isinstance(target, build.BuildTarget): + for ldep in target.link_targets: + if isinstance(ldep, build.CustomTargetIndex): + all_deps[ldep.get_id()] = ldep.target + else: + all_deps[ldep.get_id()] = ldep + for ldep in target.link_whole_targets: + if isinstance(ldep, build.CustomTargetIndex): + all_deps[ldep.get_id()] = ldep.target + else: + all_deps[ldep.get_id()] = ldep + for obj_id, objdep in self.get_obj_target_deps(target.objects): + all_deps[obj_id] = objdep + for gendep in target.get_generated_sources(): + if isinstance(gendep, build.CustomTarget): + all_deps[gendep.get_id()] = gendep + elif isinstance(gendep, build.CustomTargetIndex): + all_deps[gendep.target.get_id()] = gendep.target + else: + gen_exe = gendep.generator.get_exe() + if isinstance(gen_exe, build.Executable): + all_deps[gen_exe.get_id()] = gen_exe + else: + raise MesonException('Unknown target type for target %s' % target) + if not t or not recursive: + return all_deps + ret = self.get_target_deps(all_deps, recursive) + ret.update(all_deps) + return ret + + def generate_solution_dirs(self, ofile, parents): + prj_templ = 'Project("{%s}") = "%s", "%s", "{%s}"\n' + iterpaths = reversed(parents) + # Skip first path + next(iterpaths) + for path in iterpaths: + if path not in self.subdirs: + basename = path.name + identifier = generate_guid_from_path(path, 'subdir') + # top-level directories have None as their parent_dir + parent_dir = path.parent + parent_identifier = self.subdirs[parent_dir][0] \ + if parent_dir != PurePath('.') else None + self.subdirs[path] = (identifier, parent_identifier) + prj_line = prj_templ % ( + self.environment.coredata.lang_guids['directory'], + basename, basename, self.subdirs[path][0]) + ofile.write(prj_line) + ofile.write('EndProject\n') + + def generate_solution(self, sln_filename, projlist): + default_projlist = self.get_build_by_default_targets() + sln_filename_tmp = sln_filename + '~' + with open(sln_filename_tmp, 'w', encoding='utf-8') as ofile: + ofile.write('Microsoft Visual Studio Solution File, Format ' + 'Version 11.00\n') + ofile.write('# Visual Studio ' + self.vs_version + '\n') + prj_templ = 'Project("{%s}") = "%s", "%s", "{%s}"\n' + for prj in projlist: + coredata = self.environment.coredata + if coredata.get_builtin_option('layout') == 'mirror': + self.generate_solution_dirs(ofile, prj[1].parents) + target = self.build.targets[prj[0]] + lang = 'default' + if hasattr(target, 'compilers') and target.compilers: + for lang_out in target.compilers.keys(): + lang = lang_out + break + prj_line = prj_templ % ( + self.environment.coredata.lang_guids[lang], + prj[0], prj[1], prj[2]) + ofile.write(prj_line) + target_dict = {target.get_id(): target} + # Get recursive deps + recursive_deps = self.get_target_deps( + target_dict, recursive=True) + ofile.write('EndProject\n') + for dep, target in recursive_deps.items(): + if prj[0] in default_projlist: + default_projlist[dep] = target + + test_line = prj_templ % (self.environment.coredata.lang_guids['default'], + 'RUN_TESTS', 'RUN_TESTS.vcxproj', + self.environment.coredata.test_guid) + ofile.write(test_line) + ofile.write('EndProject\n') + regen_line = prj_templ % (self.environment.coredata.lang_guids['default'], + 'REGEN', 'REGEN.vcxproj', + self.environment.coredata.regen_guid) + ofile.write(regen_line) + ofile.write('EndProject\n') + install_line = prj_templ % (self.environment.coredata.lang_guids['default'], + 'RUN_INSTALL', 'RUN_INSTALL.vcxproj', + self.environment.coredata.install_guid) + ofile.write(install_line) + ofile.write('EndProject\n') + ofile.write('Global\n') + ofile.write('\tGlobalSection(SolutionConfigurationPlatforms) = ' + 'preSolution\n') + ofile.write('\t\t%s|%s = %s|%s\n' % + (self.buildtype, self.platform, self.buildtype, + self.platform)) + ofile.write('\tEndGlobalSection\n') + ofile.write('\tGlobalSection(ProjectConfigurationPlatforms) = ' + 'postSolution\n') + ofile.write('\t\t{%s}.%s|%s.ActiveCfg = %s|%s\n' % + (self.environment.coredata.regen_guid, self.buildtype, + self.platform, self.buildtype, self.platform)) + ofile.write('\t\t{%s}.%s|%s.Build.0 = %s|%s\n' % + (self.environment.coredata.regen_guid, self.buildtype, + self.platform, self.buildtype, self.platform)) + # Create the solution configuration + for p in projlist: + # Add to the list of projects in this solution + ofile.write('\t\t{%s}.%s|%s.ActiveCfg = %s|%s\n' % + (p[2], self.buildtype, self.platform, + self.buildtype, self.platform)) + if p[0] in default_projlist and \ + not isinstance(self.build.targets[p[0]], build.RunTarget): + # Add to the list of projects to be built + ofile.write('\t\t{%s}.%s|%s.Build.0 = %s|%s\n' % + (p[2], self.buildtype, self.platform, + self.buildtype, self.platform)) + ofile.write('\t\t{%s}.%s|%s.ActiveCfg = %s|%s\n' % + (self.environment.coredata.test_guid, self.buildtype, + self.platform, self.buildtype, self.platform)) + ofile.write('\t\t{%s}.%s|%s.ActiveCfg = %s|%s\n' % + (self.environment.coredata.install_guid, self.buildtype, + self.platform, self.buildtype, self.platform)) + ofile.write('\tEndGlobalSection\n') + ofile.write('\tGlobalSection(SolutionProperties) = preSolution\n') + ofile.write('\t\tHideSolutionNode = FALSE\n') + ofile.write('\tEndGlobalSection\n') + if self.subdirs: + ofile.write('\tGlobalSection(NestedProjects) = ' + 'preSolution\n') + for p in projlist: + if p[1].parent != PurePath('.'): + ofile.write("\t\t{%s} = {%s}\n" % (p[2], self.subdirs[p[1].parent][0])) + for subdir in self.subdirs.values(): + if subdir[1]: + ofile.write("\t\t{%s} = {%s}\n" % (subdir[0], subdir[1])) + ofile.write('\tEndGlobalSection\n') + ofile.write('EndGlobal\n') + replace_if_different(sln_filename, sln_filename_tmp) + + def generate_projects(self): + startup_project = self.environment.coredata.backend_options['backend_startup_project'].value + projlist = [] + startup_idx = 0 + for (i, (name, target)) in enumerate(self.build.targets.items()): + if startup_project and startup_project == target.get_basename(): + startup_idx = i + outdir = Path( + self.environment.get_build_dir(), + self.get_target_dir(target) + ) + outdir.mkdir(exist_ok=True, parents=True) + fname = name + '.vcxproj' + target_dir = PurePath(self.get_target_dir(target)) + relname = target_dir / fname + projfile_path = outdir / fname + proj_uuid = self.environment.coredata.target_guids[name] + self.gen_vcxproj(target, str(projfile_path), proj_uuid) + projlist.append((name, relname, proj_uuid)) + + # Put the startup project first in the project list + if startup_idx: + projlist = [projlist[startup_idx]] + projlist[0:startup_idx] + projlist[startup_idx + 1:-1] + + return projlist + + def split_sources(self, srclist): + sources = [] + headers = [] + objects = [] + languages = [] + for i in srclist: + if self.environment.is_header(i): + headers.append(i) + elif self.environment.is_object(i): + objects.append(i) + elif self.environment.is_source(i): + sources.append(i) + lang = self.lang_from_source_file(i) + if lang not in languages: + languages.append(lang) + elif self.environment.is_library(i): + pass + else: + # Everything that is not an object or source file is considered a header. + headers.append(i) + return sources, headers, objects, languages + + def target_to_build_root(self, target): + if self.get_target_dir(target) == '': + return '' + + directories = os.path.normpath(self.get_target_dir(target)).split(os.sep) + return os.sep.join(['..'] * len(directories)) + + def quote_arguments(self, arr): + return ['"%s"' % i for i in arr] + + def add_project_reference(self, root, include, projid, link_outputs=False): + ig = ET.SubElement(root, 'ItemGroup') + pref = ET.SubElement(ig, 'ProjectReference', Include=include) + ET.SubElement(pref, 'Project').text = '{%s}' % projid + if not link_outputs: + # Do not link in generated .lib files from dependencies automatically. + # We only use the dependencies for ordering and link in the generated + # objects and .lib files manually. + ET.SubElement(pref, 'LinkLibraryDependencies').text = 'false' + + def add_target_deps(self, root, target): + target_dict = {target.get_id(): target} + for dep in self.get_target_deps(target_dict).values(): + if dep.get_id() in self.handled_target_deps[target.get_id()]: + # This dependency was already handled manually. + continue + relpath = self.get_target_dir_relative_to(dep, target) + vcxproj = os.path.join(relpath, dep.get_id() + '.vcxproj') + tid = self.environment.coredata.target_guids[dep.get_id()] + self.add_project_reference(root, vcxproj, tid) + + def create_basic_crap(self, target, guid): + project_name = target.name + root = ET.Element('Project', {'DefaultTargets': "Build", + 'ToolsVersion': '4.0', + 'xmlns': 'http://schemas.microsoft.com/developer/msbuild/2003'}) + confitems = ET.SubElement(root, 'ItemGroup', {'Label': 'ProjectConfigurations'}) + prjconf = ET.SubElement(confitems, 'ProjectConfiguration', + {'Include': self.buildtype + '|' + self.platform}) + p = ET.SubElement(prjconf, 'Configuration') + p.text = self.buildtype + pl = ET.SubElement(prjconf, 'Platform') + pl.text = self.platform + globalgroup = ET.SubElement(root, 'PropertyGroup', Label='Globals') + guidelem = ET.SubElement(globalgroup, 'ProjectGuid') + guidelem.text = '{%s}' % guid + kw = ET.SubElement(globalgroup, 'Keyword') + kw.text = self.platform + 'Proj' + p = ET.SubElement(globalgroup, 'Platform') + p.text = self.platform + pname = ET.SubElement(globalgroup, 'ProjectName') + pname.text = project_name + if self.windows_target_platform_version: + ET.SubElement(globalgroup, 'WindowsTargetPlatformVersion').text = self.windows_target_platform_version + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.Default.props') + type_config = ET.SubElement(root, 'PropertyGroup', Label='Configuration') + ET.SubElement(type_config, 'ConfigurationType') + ET.SubElement(type_config, 'CharacterSet').text = 'MultiByte' + ET.SubElement(type_config, 'UseOfMfc').text = 'false' + if self.platform_toolset: + ET.SubElement(type_config, 'PlatformToolset').text = self.platform_toolset + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.props') + direlem = ET.SubElement(root, 'PropertyGroup') + fver = ET.SubElement(direlem, '_ProjectFileVersion') + fver.text = self.project_file_version + outdir = ET.SubElement(direlem, 'OutDir') + outdir.text = '.\\' + intdir = ET.SubElement(direlem, 'IntDir') + intdir.text = target.get_id() + '\\' + tname = ET.SubElement(direlem, 'TargetName') + tname.text = target.name + return root + + def gen_run_target_vcxproj(self, target, ofname, guid): + root = self.create_basic_crap(target, guid) + if not target.command: + # FIXME: This is an alias target that doesn't run any command, there + # is probably a better way than running a this dummy command. + cmd_raw = python_command + ['-c', 'exit'] + else: + cmd_raw = [target.command] + target.args + cmd = python_command + \ + [os.path.join(self.environment.get_script_dir(), 'commandrunner.py'), + self.environment.get_source_dir(), + self.environment.get_build_dir(), + self.get_target_dir(target)] + self.environment.get_build_command() + for i in cmd_raw: + if isinstance(i, build.BuildTarget): + cmd.append(os.path.join(self.environment.get_build_dir(), self.get_target_filename(i))) + elif isinstance(i, dependencies.ExternalProgram): + cmd += i.get_command() + elif isinstance(i, File): + relfname = i.rel_to_builddir(self.build_to_src) + cmd.append(os.path.join(self.environment.get_build_dir(), relfname)) + elif isinstance(i, str): + # Escape embedded quotes, because we quote the entire argument below. + cmd.append(i.replace('"', '\\"')) + else: + cmd.append(i) + cmd_templ = '''"%s" ''' * len(cmd) + self.add_custom_build(root, 'run_target', cmd_templ % tuple(cmd)) + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + self.add_regen_dependency(root) + self.add_target_deps(root, target) + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + def gen_custom_target_vcxproj(self, target, ofname, guid): + root = self.create_basic_crap(target, guid) + # We need to always use absolute paths because our invocation is always + # from the target dir, not the build root. + target.absolute_paths = True + (srcs, ofilenames, cmd) = self.eval_custom_target_command(target, True) + depend_files = self.get_custom_target_depend_files(target, True) + # Always use a wrapper because MSBuild eats random characters when + # there are many arguments. + tdir_abs = os.path.join(self.environment.get_build_dir(), self.get_target_dir(target)) + extra_bdeps = target.get_transitive_build_target_deps() + wrapper_cmd = self.as_meson_exe_cmdline(target.name, target.command[0], cmd[1:], + # All targets run from the target dir + workdir=tdir_abs, + extra_bdeps=extra_bdeps, + capture=ofilenames[0] if target.capture else None, + force_serialize=True) + if target.build_always_stale: + # Use a nonexistent file to always consider the target out-of-date. + ofilenames += [self.nonexistent_file(os.path.join(self.environment.get_scratch_dir(), + 'outofdate.file'))] + self.add_custom_build(root, 'custom_target', ' '.join(self.quote_arguments(wrapper_cmd)), + deps=wrapper_cmd[-1:] + srcs + depend_files, outputs=ofilenames) + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + self.generate_custom_generator_commands(target, root) + self.add_regen_dependency(root) + self.add_target_deps(root, target) + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + @classmethod + def lang_from_source_file(cls, src): + ext = src.split('.')[-1] + if ext in compilers.c_suffixes: + return 'c' + if ext in compilers.cpp_suffixes: + return 'cpp' + raise MesonException('Could not guess language from source file %s.' % src) + + def add_pch(self, pch_sources, lang, inc_cl): + if lang in pch_sources: + self.use_pch(pch_sources, lang, inc_cl) + + def create_pch(self, pch_sources, lang, inc_cl): + pch = ET.SubElement(inc_cl, 'PrecompiledHeader') + pch.text = 'Create' + self.add_pch_files(pch_sources, lang, inc_cl) + + def use_pch(self, pch_sources, lang, inc_cl): + pch = ET.SubElement(inc_cl, 'PrecompiledHeader') + pch.text = 'Use' + header = self.add_pch_files(pch_sources, lang, inc_cl) + pch_include = ET.SubElement(inc_cl, 'ForcedIncludeFiles') + pch_include.text = header + ';%(ForcedIncludeFiles)' + + def add_pch_files(self, pch_sources, lang, inc_cl): + header = os.path.basename(pch_sources[lang][0]) + pch_file = ET.SubElement(inc_cl, 'PrecompiledHeaderFile') + # When USING PCHs, MSVC will not do the regular include + # directory lookup, but simply use a string match to find the + # PCH to use. That means the #include directive must match the + # pch_file.text used during PCH CREATION verbatim. + # When CREATING a PCH, MSVC will do the include directory + # lookup to find the actual PCH header to use. Thus, the PCH + # header must either be in the include_directories of the target + # or be in the same directory as the PCH implementation. + pch_file.text = header + pch_out = ET.SubElement(inc_cl, 'PrecompiledHeaderOutputFile') + pch_out.text = '$(IntDir)$(TargetName)-%s.pch' % lang + return header + + def is_argument_with_msbuild_xml_entry(self, entry): + # Remove arguments that have a top level XML entry so + # they are not used twice. + # FIXME add args as needed. + return entry[1:].startswith('M') + + def add_additional_options(self, lang, parent_node, file_args): + args = [] + for arg in file_args[lang].to_native(): + if self.is_argument_with_msbuild_xml_entry(arg): + continue + if arg == '%(AdditionalOptions)': + args.append(arg) + else: + args.append(self.escape_additional_option(arg)) + ET.SubElement(parent_node, "AdditionalOptions").text = ' '.join(args) + + def add_preprocessor_defines(self, lang, parent_node, file_defines): + defines = [] + for define in file_defines[lang]: + if define == '%(PreprocessorDefinitions)': + defines.append(define) + else: + defines.append(self.escape_preprocessor_define(define)) + ET.SubElement(parent_node, "PreprocessorDefinitions").text = ';'.join(defines) + + def add_include_dirs(self, lang, parent_node, file_inc_dirs): + dirs = file_inc_dirs[lang] + ET.SubElement(parent_node, "AdditionalIncludeDirectories").text = ';'.join(dirs) + + @staticmethod + def has_objects(objects, additional_objects, generated_objects): + # Ignore generated objects, those are automatically used by MSBuild because they are part of + # the CustomBuild Outputs. + return len(objects) + len(additional_objects) > 0 + + @staticmethod + def add_generated_objects(node, generated_objects): + # Do not add generated objects to project file. Those are automatically used by MSBuild, because + # they are part of the CustomBuild Outputs. + return + + @staticmethod + def escape_preprocessor_define(define): + # See: https://msdn.microsoft.com/en-us/library/bb383819.aspx + table = str.maketrans({'%': '%25', '$': '%24', '@': '%40', + "'": '%27', ';': '%3B', '?': '%3F', '*': '%2A', + # We need to escape backslash because it'll be un-escaped by + # Windows during process creation when it parses the arguments + # Basically, this converts `\` to `\\`. + '\\': '\\\\'}) + return define.translate(table) + + @staticmethod + def escape_additional_option(option): + # See: https://msdn.microsoft.com/en-us/library/bb383819.aspx + table = str.maketrans({'%': '%25', '$': '%24', '@': '%40', + "'": '%27', ';': '%3B', '?': '%3F', '*': '%2A', ' ': '%20'}) + option = option.translate(table) + # Since we're surrounding the option with ", if it ends in \ that will + # escape the " when the process arguments are parsed and the starting + # " will not terminate. So we escape it if that's the case. I'm not + # kidding, this is how escaping works for process args on Windows. + if option.endswith('\\'): + option += '\\' + return '"{}"'.format(option) + + @staticmethod + def split_link_args(args): + """ + Split a list of link arguments into three lists: + * library search paths + * library filenames (or paths) + * other link arguments + """ + lpaths = [] + libs = [] + other = [] + for arg in args: + if arg.startswith('/LIBPATH:'): + lpath = arg[9:] + # De-dup library search paths by removing older entries when + # a new one is found. This is necessary because unlike other + # search paths such as the include path, the library is + # searched for in the newest (right-most) search path first. + if lpath in lpaths: + lpaths.remove(lpath) + lpaths.append(lpath) + elif arg.startswith(('/', '-')): + other.append(arg) + # It's ok if we miss libraries with non-standard extensions here. + # They will go into the general link arguments. + elif arg.endswith('.lib') or arg.endswith('.a'): + # De-dup + if arg not in libs: + libs.append(arg) + else: + other.append(arg) + return lpaths, libs, other + + def _get_cl_compiler(self, target): + for lang, c in target.compilers.items(): + if lang in ('c', 'cpp'): + return c + # No source files, only objects, but we still need a compiler, so + # return a found compiler + if len(target.objects) > 0: + for lang, c in self.environment.coredata.compilers[target.for_machine].items(): + if lang in ('c', 'cpp'): + return c + raise MesonException('Could not find a C or C++ compiler. MSVC can only build C/C++ projects.') + + def _prettyprint_vcxproj_xml(self, tree, ofname): + ofname_tmp = ofname + '~' + tree.write(ofname_tmp, encoding='utf-8', xml_declaration=True) + + # ElementTree can not do prettyprinting so do it manually + doc = xml.dom.minidom.parse(ofname_tmp) + with open(ofname_tmp, 'w', encoding='utf-8') as of: + of.write(doc.toprettyxml()) + replace_if_different(ofname, ofname_tmp) + + def gen_vcxproj(self, target, ofname, guid): + mlog.debug('Generating vcxproj %s.' % target.name) + subsystem = 'Windows' + self.handled_target_deps[target.get_id()] = [] + if isinstance(target, build.Executable): + conftype = 'Application' + if not target.gui_app: + subsystem = 'Console' + elif isinstance(target, build.StaticLibrary): + conftype = 'StaticLibrary' + elif isinstance(target, build.SharedLibrary): + conftype = 'DynamicLibrary' + elif isinstance(target, build.CustomTarget): + return self.gen_custom_target_vcxproj(target, ofname, guid) + elif isinstance(target, build.RunTarget): + return self.gen_run_target_vcxproj(target, ofname, guid) + else: + raise MesonException('Unknown target type for %s' % target.get_basename()) + # Prefix to use to access the build root from the vcxproj dir + down = self.target_to_build_root(target) + # Prefix to use to access the source tree's root from the vcxproj dir + proj_to_src_root = os.path.join(down, self.build_to_src) + # Prefix to use to access the source tree's subdir from the vcxproj dir + proj_to_src_dir = os.path.join(proj_to_src_root, self.get_target_dir(target)) + (sources, headers, objects, languages) = self.split_sources(target.sources) + if self.is_unity(target): + sources = self.generate_unity_files(target, sources) + compiler = self._get_cl_compiler(target) + buildtype_args = compiler.get_buildtype_args(self.buildtype) + buildtype_link_args = compiler.get_buildtype_linker_args(self.buildtype) + vscrt_type = self.environment.coredata.base_options['b_vscrt'] + project_name = target.name + target_name = target.name + root = ET.Element('Project', {'DefaultTargets': "Build", + 'ToolsVersion': '4.0', + 'xmlns': 'http://schemas.microsoft.com/developer/msbuild/2003'}) + confitems = ET.SubElement(root, 'ItemGroup', {'Label': 'ProjectConfigurations'}) + prjconf = ET.SubElement(confitems, 'ProjectConfiguration', + {'Include': self.buildtype + '|' + self.platform}) + p = ET.SubElement(prjconf, 'Configuration') + p.text = self.buildtype + pl = ET.SubElement(prjconf, 'Platform') + pl.text = self.platform + # Globals + globalgroup = ET.SubElement(root, 'PropertyGroup', Label='Globals') + guidelem = ET.SubElement(globalgroup, 'ProjectGuid') + guidelem.text = '{%s}' % guid + kw = ET.SubElement(globalgroup, 'Keyword') + kw.text = self.platform + 'Proj' + ns = ET.SubElement(globalgroup, 'RootNamespace') + ns.text = target_name + p = ET.SubElement(globalgroup, 'Platform') + p.text = self.platform + pname = ET.SubElement(globalgroup, 'ProjectName') + pname.text = project_name + if self.windows_target_platform_version: + ET.SubElement(globalgroup, 'WindowsTargetPlatformVersion').text = self.windows_target_platform_version + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.Default.props') + # Start configuration + type_config = ET.SubElement(root, 'PropertyGroup', Label='Configuration') + ET.SubElement(type_config, 'ConfigurationType').text = conftype + ET.SubElement(type_config, 'CharacterSet').text = 'MultiByte' + if self.platform_toolset: + ET.SubElement(type_config, 'PlatformToolset').text = self.platform_toolset + # FIXME: Meson's LTO support needs to be integrated here + ET.SubElement(type_config, 'WholeProgramOptimization').text = 'false' + # Let VS auto-set the RTC level + ET.SubElement(type_config, 'BasicRuntimeChecks').text = 'Default' + # Incremental linking increases code size + if '/INCREMENTAL:NO' in buildtype_link_args: + ET.SubElement(type_config, 'LinkIncremental').text = 'false' + + # Build information + compiles = ET.SubElement(root, 'ItemDefinitionGroup') + clconf = ET.SubElement(compiles, 'ClCompile') + # CRT type; debug or release + if vscrt_type.value == 'from_buildtype': + if self.buildtype == 'debug': + ET.SubElement(type_config, 'UseDebugLibraries').text = 'true' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreadedDebugDLL' + else: + ET.SubElement(type_config, 'UseDebugLibraries').text = 'false' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreadedDLL' + elif vscrt_type.value == 'mdd': + ET.SubElement(type_config, 'UseDebugLibraries').text = 'true' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreadedDebugDLL' + elif vscrt_type.value == 'mt': + # FIXME, wrong + ET.SubElement(type_config, 'UseDebugLibraries').text = 'false' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreaded' + elif vscrt_type.value == 'mtd': + # FIXME, wrong + ET.SubElement(type_config, 'UseDebugLibraries').text = 'true' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreadedDebug' + else: + ET.SubElement(type_config, 'UseDebugLibraries').text = 'false' + ET.SubElement(clconf, 'RuntimeLibrary').text = 'MultiThreadedDLL' + # Debug format + if '/ZI' in buildtype_args: + ET.SubElement(clconf, 'DebugInformationFormat').text = 'EditAndContinue' + elif '/Zi' in buildtype_args: + ET.SubElement(clconf, 'DebugInformationFormat').text = 'ProgramDatabase' + elif '/Z7' in buildtype_args: + ET.SubElement(clconf, 'DebugInformationFormat').text = 'OldStyle' + # Runtime checks + if '/RTC1' in buildtype_args: + ET.SubElement(clconf, 'BasicRuntimeChecks').text = 'EnableFastChecks' + elif '/RTCu' in buildtype_args: + ET.SubElement(clconf, 'BasicRuntimeChecks').text = 'UninitializedLocalUsageCheck' + elif '/RTCs' in buildtype_args: + ET.SubElement(clconf, 'BasicRuntimeChecks').text = 'StackFrameRuntimeCheck' + # Exception handling has to be set in the xml in addition to the "AdditionalOptions" because otherwise + # cl will give warning D9025: overriding '/Ehs' with cpp_eh value + if 'cpp' in target.compilers: + eh = self.environment.coredata.compiler_options[target.for_machine]['cpp']['eh'] + if eh.value == 'a': + ET.SubElement(clconf, 'ExceptionHandling').text = 'Async' + elif eh.value == 's': + ET.SubElement(clconf, 'ExceptionHandling').text = 'SyncCThrow' + elif eh.value == 'none': + ET.SubElement(clconf, 'ExceptionHandling').text = 'false' + else: # 'sc' or 'default' + ET.SubElement(clconf, 'ExceptionHandling').text = 'Sync' + # End configuration + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.props') + generated_files, custom_target_output_files, generated_files_include_dirs = self.generate_custom_generator_commands(target, root) + (gen_src, gen_hdrs, gen_objs, gen_langs) = self.split_sources(generated_files) + (custom_src, custom_hdrs, custom_objs, custom_langs) = self.split_sources(custom_target_output_files) + gen_src += custom_src + gen_hdrs += custom_hdrs + gen_langs += custom_langs + # Project information + direlem = ET.SubElement(root, 'PropertyGroup') + fver = ET.SubElement(direlem, '_ProjectFileVersion') + fver.text = self.project_file_version + outdir = ET.SubElement(direlem, 'OutDir') + outdir.text = '.\\' + intdir = ET.SubElement(direlem, 'IntDir') + intdir.text = target.get_id() + '\\' + tfilename = os.path.splitext(target.get_filename()) + ET.SubElement(direlem, 'TargetName').text = tfilename[0] + ET.SubElement(direlem, 'TargetExt').text = tfilename[1] + + # Arguments, include dirs, defines for all files in the current target + target_args = [] + target_defines = [] + target_inc_dirs = [] + # Arguments, include dirs, defines passed to individual files in + # a target; perhaps because the args are language-specific + # + # file_args is also later split out into defines and include_dirs in + # case someone passed those in there + file_args = {l: c.compiler_args() for l, c in target.compilers.items()} + file_defines = {l: [] for l in target.compilers} + file_inc_dirs = {l: [] for l in target.compilers} + # The order in which these compile args are added must match + # generate_single_compile() and generate_basic_compiler_args() + for l, comp in target.compilers.items(): + if l in file_args: + file_args[l] += compilers.get_base_compile_args( + self.get_base_options_for_target(target), comp) + file_args[l] += comp.get_option_compile_args( + self.environment.coredata.compiler_options[target.for_machine][comp.language]) + + # Add compile args added using add_project_arguments() + for l, args in self.build.projects_args[target.for_machine].get(target.subproject, {}).items(): + if l in file_args: + file_args[l] += args + # Add compile args added using add_global_arguments() + # These override per-project arguments + for l, args in self.build.global_args[target.for_machine].items(): + if l in file_args: + file_args[l] += args + # Compile args added from the env or cross file: CFLAGS/CXXFLAGS, etc. We want these + # to override all the defaults, but not the per-target compile args. + for l in file_args.keys(): + opts = self.environment.coredata.compiler_options[target.for_machine][l] + k = 'args' + if k in opts: + file_args[l] += opts[k].value + for args in file_args.values(): + # This is where Visual Studio will insert target_args, target_defines, + # etc, which are added later from external deps (see below). + args += ['%(AdditionalOptions)', '%(PreprocessorDefinitions)', '%(AdditionalIncludeDirectories)'] + # Add custom target dirs as includes automatically, but before + # target-specific include dirs. See _generate_single_compile() in + # the ninja backend for caveats. + args += ['-I' + arg for arg in generated_files_include_dirs] + # Add include dirs from the `include_directories:` kwarg on the target + # and from `include_directories:` of internal deps of the target. + # + # Target include dirs should override internal deps include dirs. + # This is handled in BuildTarget.process_kwargs() + # + # Include dirs from internal deps should override include dirs from + # external deps and must maintain the order in which they are + # specified. Hence, we must reverse so that the order is preserved. + # + # These are per-target, but we still add them as per-file because we + # need them to be looked in first. + for d in reversed(target.get_include_dirs()): + # reversed is used to keep order of includes + for i in reversed(d.get_incdirs()): + curdir = os.path.join(d.get_curdir(), i) + args.append('-I' + self.relpath(curdir, target.subdir)) # build dir + args.append('-I' + os.path.join(proj_to_src_root, curdir)) # src dir + for i in d.get_extra_build_dirs(): + curdir = os.path.join(d.get_curdir(), i) + args.append('-I' + self.relpath(curdir, target.subdir)) # build dir + # Add per-target compile args, f.ex, `c_args : ['/DFOO']`. We set these + # near the end since these are supposed to override everything else. + for l, args in target.extra_args.items(): + if l in file_args: + file_args[l] += args + # The highest priority includes. In order of directory search: + # target private dir, target build dir, target source dir + for args in file_args.values(): + t_inc_dirs = [self.relpath(self.get_target_private_dir(target), + self.get_target_dir(target))] + if target.implicit_include_directories: + t_inc_dirs += ['.'] + if target.implicit_include_directories: + t_inc_dirs += [proj_to_src_dir] + args += ['-I' + arg for arg in t_inc_dirs] + + # Split preprocessor defines and include directories out of the list of + # all extra arguments. The rest go into %(AdditionalOptions). + for l, args in file_args.items(): + for arg in args[:]: + if arg.startswith(('-D', '/D')) or arg == '%(PreprocessorDefinitions)': + file_args[l].remove(arg) + # Don't escape the marker + if arg == '%(PreprocessorDefinitions)': + define = arg + else: + define = arg[2:] + # De-dup + if define in file_defines[l]: + file_defines[l].remove(define) + file_defines[l].append(define) + elif arg.startswith(('-I', '/I')) or arg == '%(AdditionalIncludeDirectories)': + file_args[l].remove(arg) + # Don't escape the marker + if arg == '%(AdditionalIncludeDirectories)': + inc_dir = arg + else: + inc_dir = arg[2:] + # De-dup + if inc_dir not in file_inc_dirs[l]: + file_inc_dirs[l].append(inc_dir) + + # Split compile args needed to find external dependencies + # Link args are added while generating the link command + for d in reversed(target.get_external_deps()): + # Cflags required by external deps might have UNIX-specific flags, + # so filter them out if needed + if isinstance(d, dependencies.OpenMPDependency): + ET.SubElement(clconf, 'OpenMPSupport').text = 'true' + else: + d_compile_args = compiler.unix_args_to_native(d.get_compile_args()) + for arg in d_compile_args: + if arg.startswith(('-D', '/D')): + define = arg[2:] + # De-dup + if define in target_defines: + target_defines.remove(define) + target_defines.append(define) + elif arg.startswith(('-I', '/I')): + inc_dir = arg[2:] + # De-dup + if inc_dir not in target_inc_dirs: + target_inc_dirs.append(inc_dir) + else: + target_args.append(arg) + + languages += gen_langs + if len(target_args) > 0: + target_args.append('%(AdditionalOptions)') + ET.SubElement(clconf, "AdditionalOptions").text = ' '.join(target_args) + + target_inc_dirs.append('%(AdditionalIncludeDirectories)') + ET.SubElement(clconf, 'AdditionalIncludeDirectories').text = ';'.join(target_inc_dirs) + target_defines.append('%(PreprocessorDefinitions)') + ET.SubElement(clconf, 'PreprocessorDefinitions').text = ';'.join(target_defines) + ET.SubElement(clconf, 'FunctionLevelLinking').text = 'true' + # Warning level + warning_level = self.get_option_for_target('warning_level', target) + ET.SubElement(clconf, 'WarningLevel').text = 'Level' + str(1 + int(warning_level)) + if self.get_option_for_target('werror', target): + ET.SubElement(clconf, 'TreatWarningAsError').text = 'true' + # Optimization flags + o_flags = split_o_flags_args(buildtype_args) + if '/Ox' in o_flags: + ET.SubElement(clconf, 'Optimization').text = 'Full' + elif '/O2' in o_flags: + ET.SubElement(clconf, 'Optimization').text = 'MaxSpeed' + elif '/O1' in o_flags: + ET.SubElement(clconf, 'Optimization').text = 'MinSpace' + elif '/Od' in o_flags: + ET.SubElement(clconf, 'Optimization').text = 'Disabled' + if '/Oi' in o_flags: + ET.SubElement(clconf, 'IntrinsicFunctions').text = 'true' + if '/Ob1' in o_flags: + ET.SubElement(clconf, 'InlineFunctionExpansion').text = 'OnlyExplicitInline' + elif '/Ob2' in o_flags: + ET.SubElement(clconf, 'InlineFunctionExpansion').text = 'AnySuitable' + # Size-preserving flags + if '/Os' in o_flags: + ET.SubElement(clconf, 'FavorSizeOrSpeed').text = 'Size' + else: + ET.SubElement(clconf, 'FavorSizeOrSpeed').text = 'Speed' + # Note: SuppressStartupBanner is /NOLOGO and is 'true' by default + pch_sources = {} + if self.environment.coredata.base_options.get('b_pch', False): + for lang in ['c', 'cpp']: + pch = target.get_pch(lang) + if not pch: + continue + if compiler.id == 'msvc': + if len(pch) == 1: + # Auto generate PCH. + src = os.path.join(down, self.create_msvc_pch_implementation(target, lang, pch[0])) + pch_header_dir = os.path.dirname(os.path.join(proj_to_src_dir, pch[0])) + else: + src = os.path.join(proj_to_src_dir, pch[1]) + pch_header_dir = None + pch_sources[lang] = [pch[0], src, lang, pch_header_dir] + else: + # I don't know whether its relevant but let's handle other compilers + # used with a vs backend + pch_sources[lang] = [pch[0], None, lang, None] + + resourcecompile = ET.SubElement(compiles, 'ResourceCompile') + ET.SubElement(resourcecompile, 'PreprocessorDefinitions') + + # Linker options + link = ET.SubElement(compiles, 'Link') + extra_link_args = compiler.compiler_args() + # FIXME: Can these buildtype linker args be added as tags in the + # vcxproj file (similar to buildtype compiler args) instead of in + # AdditionalOptions? + extra_link_args += compiler.get_buildtype_linker_args(self.buildtype) + # Generate Debug info + if self.buildtype.startswith('debug'): + self.generate_debug_information(link) + if not isinstance(target, build.StaticLibrary): + if isinstance(target, build.SharedModule): + options = self.environment.coredata.base_options + extra_link_args += compiler.get_std_shared_module_link_args(options) + # Add link args added using add_project_link_arguments() + extra_link_args += self.build.get_project_link_args(compiler, target.subproject, target.for_machine) + # Add link args added using add_global_link_arguments() + # These override per-project link arguments + extra_link_args += self.build.get_global_link_args(compiler, target.for_machine) + # Link args added from the env: LDFLAGS, or the cross file. We want + # these to override all the defaults but not the per-target link + # args. + extra_link_args += self.environment.coredata.get_external_link_args(target.for_machine, compiler.get_language()) + # Only non-static built targets need link args and link dependencies + extra_link_args += target.link_args + # External deps must be last because target link libraries may depend on them. + for dep in target.get_external_deps(): + # Extend without reordering or de-dup to preserve `-L -l` sets + # https://github.com/mesonbuild/meson/issues/1718 + if isinstance(dep, dependencies.OpenMPDependency): + ET.SubElement(clconf, 'OpenMPSuppport').text = 'true' + else: + extra_link_args.extend_direct(dep.get_link_args()) + for d in target.get_dependencies(): + if isinstance(d, build.StaticLibrary): + for dep in d.get_external_deps(): + if isinstance(dep, dependencies.OpenMPDependency): + ET.SubElement(clconf, 'OpenMPSuppport').text = 'true' + else: + extra_link_args.extend_direct(dep.get_link_args()) + # Add link args for c_* or cpp_* build options. Currently this only + # adds c_winlibs and cpp_winlibs when building for Windows. This needs + # to be after all internal and external libraries so that unresolved + # symbols from those can be found here. This is needed when the + # *_winlibs that we want to link to are static mingw64 libraries. + extra_link_args += compiler.get_option_link_args( + self.environment.coredata.compiler_options[compiler.for_machine][comp.language]) + (additional_libpaths, additional_links, extra_link_args) = self.split_link_args(extra_link_args.to_native()) + + # Add more libraries to be linked if needed + for t in target.get_dependencies(): + if isinstance(t, build.CustomTargetIndex): + # We don't need the actual project here, just the library name + lobj = t + else: + lobj = self.build.targets[t.get_id()] + linkname = os.path.join(down, self.get_target_filename_for_linking(lobj)) + if t in target.link_whole_targets: + # /WHOLEARCHIVE:foo must go into AdditionalOptions + extra_link_args += compiler.get_link_whole_for(linkname) + # To force Visual Studio to build this project even though it + # has no sources, we include a reference to the vcxproj file + # that builds this target. Technically we should add this only + # if the current target has no sources, but it doesn't hurt to + # have 'extra' references. + trelpath = self.get_target_dir_relative_to(t, target) + tvcxproj = os.path.join(trelpath, t.get_id() + '.vcxproj') + tid = self.environment.coredata.target_guids[t.get_id()] + self.add_project_reference(root, tvcxproj, tid, link_outputs=True) + # Mark the dependency as already handled to not have + # multiple references to the same target. + self.handled_target_deps[target.get_id()].append(t.get_id()) + else: + # Other libraries go into AdditionalDependencies + if linkname not in additional_links: + additional_links.append(linkname) + for lib in self.get_custom_target_provided_libraries(target): + additional_links.append(self.relpath(lib, self.get_target_dir(target))) + additional_objects = [] + for o in self.flatten_object_list(target, down): + assert(isinstance(o, str)) + additional_objects.append(o) + for o in custom_objs: + additional_objects.append(o) + + if len(extra_link_args) > 0: + extra_link_args.append('%(AdditionalOptions)') + ET.SubElement(link, "AdditionalOptions").text = ' '.join(extra_link_args) + if len(additional_libpaths) > 0: + additional_libpaths.insert(0, '%(AdditionalLibraryDirectories)') + ET.SubElement(link, 'AdditionalLibraryDirectories').text = ';'.join(additional_libpaths) + if len(additional_links) > 0: + additional_links.append('%(AdditionalDependencies)') + ET.SubElement(link, 'AdditionalDependencies').text = ';'.join(additional_links) + ofile = ET.SubElement(link, 'OutputFile') + ofile.text = '$(OutDir)%s' % target.get_filename() + subsys = ET.SubElement(link, 'SubSystem') + subsys.text = subsystem + if (isinstance(target, build.SharedLibrary) or isinstance(target, build.Executable)) and target.get_import_filename(): + # DLLs built with MSVC always have an import library except when + # they're data-only DLLs, but we don't support those yet. + ET.SubElement(link, 'ImportLibrary').text = target.get_import_filename() + if isinstance(target, build.SharedLibrary): + # Add module definitions file, if provided + if target.vs_module_defs: + relpath = os.path.join(down, target.vs_module_defs.rel_to_builddir(self.build_to_src)) + ET.SubElement(link, 'ModuleDefinitionFile').text = relpath + if '/ZI' in buildtype_args or '/Zi' in buildtype_args: + pdb = ET.SubElement(link, 'ProgramDataBaseFileName') + pdb.text = '$(OutDir}%s.pdb' % target_name + targetmachine = ET.SubElement(link, 'TargetMachine') + targetplatform = self.platform.lower() + if targetplatform == 'win32': + targetmachine.text = 'MachineX86' + elif targetplatform == 'x64': + targetmachine.text = 'MachineX64' + elif targetplatform == 'arm': + targetmachine.text = 'MachineARM' + elif targetplatform == 'arm64': + targetmachine.text = 'MachineARM64' + else: + raise MesonException('Unsupported Visual Studio target machine: ' + targetplatform) + # /nologo + ET.SubElement(link, 'SuppressStartupBanner').text = 'true' + # /release + if not self.environment.coredata.get_builtin_option('debug'): + ET.SubElement(link, 'SetChecksum').text = 'true' + + meson_file_group = ET.SubElement(root, 'ItemGroup') + ET.SubElement(meson_file_group, 'None', Include=os.path.join(proj_to_src_dir, build_filename)) + + # Visual Studio can't load projects that present duplicated items. Filter them out + # by keeping track of already added paths. + def path_normalize_add(path, lis): + normalized = os.path.normcase(os.path.normpath(path)) + if normalized not in lis: + lis.append(normalized) + return True + else: + return False + + previous_includes = [] + if len(headers) + len(gen_hdrs) + len(target.extra_files) + len(pch_sources) > 0: + inc_hdrs = ET.SubElement(root, 'ItemGroup') + for h in headers: + relpath = os.path.join(down, h.rel_to_builddir(self.build_to_src)) + if path_normalize_add(relpath, previous_includes): + ET.SubElement(inc_hdrs, 'CLInclude', Include=relpath) + for h in gen_hdrs: + if path_normalize_add(h, previous_includes): + ET.SubElement(inc_hdrs, 'CLInclude', Include=h) + for h in target.extra_files: + relpath = os.path.join(down, h.rel_to_builddir(self.build_to_src)) + if path_normalize_add(relpath, previous_includes): + ET.SubElement(inc_hdrs, 'CLInclude', Include=relpath) + for lang in pch_sources: + h = pch_sources[lang][0] + path = os.path.join(proj_to_src_dir, h) + if path_normalize_add(path, previous_includes): + ET.SubElement(inc_hdrs, 'CLInclude', Include=path) + + previous_sources = [] + if len(sources) + len(gen_src) + len(pch_sources) > 0: + inc_src = ET.SubElement(root, 'ItemGroup') + for s in sources: + relpath = os.path.join(down, s.rel_to_builddir(self.build_to_src)) + if path_normalize_add(relpath, previous_sources): + inc_cl = ET.SubElement(inc_src, 'CLCompile', Include=relpath) + lang = Vs2010Backend.lang_from_source_file(s) + self.add_pch(pch_sources, lang, inc_cl) + self.add_additional_options(lang, inc_cl, file_args) + self.add_preprocessor_defines(lang, inc_cl, file_defines) + self.add_include_dirs(lang, inc_cl, file_inc_dirs) + ET.SubElement(inc_cl, 'ObjectFileName').text = "$(IntDir)" + self.object_filename_from_source(target, s) + for s in gen_src: + if path_normalize_add(s, previous_sources): + inc_cl = ET.SubElement(inc_src, 'CLCompile', Include=s) + lang = Vs2010Backend.lang_from_source_file(s) + self.add_pch(pch_sources, lang, inc_cl) + self.add_additional_options(lang, inc_cl, file_args) + self.add_preprocessor_defines(lang, inc_cl, file_defines) + self.add_include_dirs(lang, inc_cl, file_inc_dirs) + for lang in pch_sources: + impl = pch_sources[lang][1] + if impl and path_normalize_add(impl, previous_sources): + inc_cl = ET.SubElement(inc_src, 'CLCompile', Include=impl) + self.create_pch(pch_sources, lang, inc_cl) + self.add_additional_options(lang, inc_cl, file_args) + self.add_preprocessor_defines(lang, inc_cl, file_defines) + pch_header_dir = pch_sources[lang][3] + if pch_header_dir: + inc_dirs = copy.deepcopy(file_inc_dirs) + inc_dirs[lang] = [pch_header_dir] + inc_dirs[lang] + else: + inc_dirs = file_inc_dirs + self.add_include_dirs(lang, inc_cl, inc_dirs) + + previous_objects = [] + if self.has_objects(objects, additional_objects, gen_objs): + inc_objs = ET.SubElement(root, 'ItemGroup') + for s in objects: + relpath = os.path.join(down, s.rel_to_builddir(self.build_to_src)) + if path_normalize_add(relpath, previous_objects): + ET.SubElement(inc_objs, 'Object', Include=relpath) + for s in additional_objects: + if path_normalize_add(s, previous_objects): + ET.SubElement(inc_objs, 'Object', Include=s) + self.add_generated_objects(inc_objs, gen_objs) + + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + self.add_regen_dependency(root) + self.add_target_deps(root, target) + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + def gen_regenproj(self, project_name, ofname): + root = ET.Element('Project', {'DefaultTargets': 'Build', + 'ToolsVersion': '4.0', + 'xmlns': 'http://schemas.microsoft.com/developer/msbuild/2003'}) + confitems = ET.SubElement(root, 'ItemGroup', {'Label': 'ProjectConfigurations'}) + prjconf = ET.SubElement(confitems, 'ProjectConfiguration', + {'Include': self.buildtype + '|' + self.platform}) + p = ET.SubElement(prjconf, 'Configuration') + p.text = self.buildtype + pl = ET.SubElement(prjconf, 'Platform') + pl.text = self.platform + globalgroup = ET.SubElement(root, 'PropertyGroup', Label='Globals') + guidelem = ET.SubElement(globalgroup, 'ProjectGuid') + guidelem.text = '{%s}' % self.environment.coredata.regen_guid + kw = ET.SubElement(globalgroup, 'Keyword') + kw.text = self.platform + 'Proj' + p = ET.SubElement(globalgroup, 'Platform') + p.text = self.platform + pname = ET.SubElement(globalgroup, 'ProjectName') + pname.text = project_name + if self.windows_target_platform_version: + ET.SubElement(globalgroup, 'WindowsTargetPlatformVersion').text = self.windows_target_platform_version + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.Default.props') + type_config = ET.SubElement(root, 'PropertyGroup', Label='Configuration') + ET.SubElement(type_config, 'ConfigurationType').text = "Utility" + ET.SubElement(type_config, 'CharacterSet').text = 'MultiByte' + ET.SubElement(type_config, 'UseOfMfc').text = 'false' + if self.platform_toolset: + ET.SubElement(type_config, 'PlatformToolset').text = self.platform_toolset + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.props') + direlem = ET.SubElement(root, 'PropertyGroup') + fver = ET.SubElement(direlem, '_ProjectFileVersion') + fver.text = self.project_file_version + outdir = ET.SubElement(direlem, 'OutDir') + outdir.text = '.\\' + intdir = ET.SubElement(direlem, 'IntDir') + intdir.text = 'regen-temp\\' + tname = ET.SubElement(direlem, 'TargetName') + tname.text = project_name + + action = ET.SubElement(root, 'ItemDefinitionGroup') + midl = ET.SubElement(action, 'Midl') + ET.SubElement(midl, "AdditionalIncludeDirectories").text = '%(AdditionalIncludeDirectories)' + ET.SubElement(midl, "OutputDirectory").text = '$(IntDir)' + ET.SubElement(midl, 'HeaderFileName').text = '%(Filename).h' + ET.SubElement(midl, 'TypeLibraryName').text = '%(Filename).tlb' + ET.SubElement(midl, 'InterfaceIdentifierFilename').text = '%(Filename)_i.c' + ET.SubElement(midl, 'ProxyFileName').text = '%(Filename)_p.c' + regen_command = self.environment.get_build_command() + ['--internal', 'regencheck'] + cmd_templ = '''call %s > NUL +"%s" "%s"''' + regen_command = cmd_templ % \ + (self.get_vcvars_command(), '" "'.join(regen_command), self.environment.get_scratch_dir()) + self.add_custom_build(root, 'regen', regen_command, deps=self.get_regen_filelist(), + outputs=[Vs2010Backend.get_regen_stampfile(self.environment.get_build_dir())], + msg='Checking whether solution needs to be regenerated.') + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + ET.SubElement(root, 'ImportGroup', Label='ExtensionTargets') + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + def gen_testproj(self, target_name, ofname): + project_name = target_name + root = ET.Element('Project', {'DefaultTargets': "Build", + 'ToolsVersion': '4.0', + 'xmlns': 'http://schemas.microsoft.com/developer/msbuild/2003'}) + confitems = ET.SubElement(root, 'ItemGroup', {'Label': 'ProjectConfigurations'}) + prjconf = ET.SubElement(confitems, 'ProjectConfiguration', + {'Include': self.buildtype + '|' + self.platform}) + p = ET.SubElement(prjconf, 'Configuration') + p.text = self.buildtype + pl = ET.SubElement(prjconf, 'Platform') + pl.text = self.platform + globalgroup = ET.SubElement(root, 'PropertyGroup', Label='Globals') + guidelem = ET.SubElement(globalgroup, 'ProjectGuid') + guidelem.text = '{%s}' % self.environment.coredata.test_guid + kw = ET.SubElement(globalgroup, 'Keyword') + kw.text = self.platform + 'Proj' + p = ET.SubElement(globalgroup, 'Platform') + p.text = self.platform + pname = ET.SubElement(globalgroup, 'ProjectName') + pname.text = project_name + if self.windows_target_platform_version: + ET.SubElement(globalgroup, 'WindowsTargetPlatformVersion').text = self.windows_target_platform_version + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.Default.props') + type_config = ET.SubElement(root, 'PropertyGroup', Label='Configuration') + ET.SubElement(type_config, 'ConfigurationType') + ET.SubElement(type_config, 'CharacterSet').text = 'MultiByte' + ET.SubElement(type_config, 'UseOfMfc').text = 'false' + if self.platform_toolset: + ET.SubElement(type_config, 'PlatformToolset').text = self.platform_toolset + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.props') + direlem = ET.SubElement(root, 'PropertyGroup') + fver = ET.SubElement(direlem, '_ProjectFileVersion') + fver.text = self.project_file_version + outdir = ET.SubElement(direlem, 'OutDir') + outdir.text = '.\\' + intdir = ET.SubElement(direlem, 'IntDir') + intdir.text = 'test-temp\\' + tname = ET.SubElement(direlem, 'TargetName') + tname.text = target_name + + action = ET.SubElement(root, 'ItemDefinitionGroup') + midl = ET.SubElement(action, 'Midl') + ET.SubElement(midl, "AdditionalIncludeDirectories").text = '%(AdditionalIncludeDirectories)' + ET.SubElement(midl, "OutputDirectory").text = '$(IntDir)' + ET.SubElement(midl, 'HeaderFileName').text = '%(Filename).h' + ET.SubElement(midl, 'TypeLibraryName').text = '%(Filename).tlb' + ET.SubElement(midl, 'InterfaceIdentifierFilename').text = '%(Filename)_i.c' + ET.SubElement(midl, 'ProxyFileName').text = '%(Filename)_p.c' + # FIXME: No benchmarks? + test_command = self.environment.get_build_command() + ['test', '--no-rebuild'] + if not self.environment.coredata.get_builtin_option('stdsplit'): + test_command += ['--no-stdsplit'] + if self.environment.coredata.get_builtin_option('errorlogs'): + test_command += ['--print-errorlogs'] + self.serialize_tests() + self.add_custom_build(root, 'run_tests', '"%s"' % ('" "'.join(test_command))) + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + self.add_regen_dependency(root) + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + def gen_installproj(self, target_name, ofname): + self.create_install_data_files() + project_name = target_name + root = ET.Element('Project', {'DefaultTargets': "Build", + 'ToolsVersion': '4.0', + 'xmlns': 'http://schemas.microsoft.com/developer/msbuild/2003'}) + confitems = ET.SubElement(root, 'ItemGroup', {'Label': 'ProjectConfigurations'}) + prjconf = ET.SubElement(confitems, 'ProjectConfiguration', + {'Include': self.buildtype + '|' + self.platform}) + p = ET.SubElement(prjconf, 'Configuration') + p.text = self.buildtype + pl = ET.SubElement(prjconf, 'Platform') + pl.text = self.platform + globalgroup = ET.SubElement(root, 'PropertyGroup', Label='Globals') + guidelem = ET.SubElement(globalgroup, 'ProjectGuid') + guidelem.text = '{%s}' % self.environment.coredata.install_guid + kw = ET.SubElement(globalgroup, 'Keyword') + kw.text = self.platform + 'Proj' + p = ET.SubElement(globalgroup, 'Platform') + p.text = self.platform + pname = ET.SubElement(globalgroup, 'ProjectName') + pname.text = project_name + if self.windows_target_platform_version: + ET.SubElement(globalgroup, 'WindowsTargetPlatformVersion').text = self.windows_target_platform_version + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.Default.props') + type_config = ET.SubElement(root, 'PropertyGroup', Label='Configuration') + ET.SubElement(type_config, 'ConfigurationType') + ET.SubElement(type_config, 'CharacterSet').text = 'MultiByte' + ET.SubElement(type_config, 'UseOfMfc').text = 'false' + if self.platform_toolset: + ET.SubElement(type_config, 'PlatformToolset').text = self.platform_toolset + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.props') + direlem = ET.SubElement(root, 'PropertyGroup') + fver = ET.SubElement(direlem, '_ProjectFileVersion') + fver.text = self.project_file_version + outdir = ET.SubElement(direlem, 'OutDir') + outdir.text = '.\\' + intdir = ET.SubElement(direlem, 'IntDir') + intdir.text = 'install-temp\\' + tname = ET.SubElement(direlem, 'TargetName') + tname.text = target_name + + action = ET.SubElement(root, 'ItemDefinitionGroup') + midl = ET.SubElement(action, 'Midl') + ET.SubElement(midl, "AdditionalIncludeDirectories").text = '%(AdditionalIncludeDirectories)' + ET.SubElement(midl, "OutputDirectory").text = '$(IntDir)' + ET.SubElement(midl, 'HeaderFileName').text = '%(Filename).h' + ET.SubElement(midl, 'TypeLibraryName').text = '%(Filename).tlb' + ET.SubElement(midl, 'InterfaceIdentifierFilename').text = '%(Filename)_i.c' + ET.SubElement(midl, 'ProxyFileName').text = '%(Filename)_p.c' + install_command = self.environment.get_build_command() + ['install', '--no-rebuild'] + self.add_custom_build(root, 'run_install', '"%s"' % ('" "'.join(install_command))) + ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets') + self.add_regen_dependency(root) + self._prettyprint_vcxproj_xml(ET.ElementTree(root), ofname) + + def add_custom_build(self, node, rulename, command, deps=None, outputs=None, msg=None): + igroup = ET.SubElement(node, 'ItemGroup') + rulefile = os.path.join(self.environment.get_scratch_dir(), rulename + '.rule') + if not os.path.exists(rulefile): + with open(rulefile, 'w', encoding='utf-8') as f: + f.write("# Meson regen file.") + custombuild = ET.SubElement(igroup, 'CustomBuild', Include=rulefile) + if msg: + message = ET.SubElement(custombuild, 'Message') + message.text = msg + cmd_templ = '''setlocal +%s +if %%errorlevel%% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %%errorlevel%% & goto :cmDone +:cmErrorLevel +exit /b %%1 +:cmDone +if %%errorlevel%% neq 0 goto :VCEnd''' + ET.SubElement(custombuild, 'Command').text = cmd_templ % command + if not outputs: + # Use a nonexistent file to always consider the target out-of-date. + outputs = [self.nonexistent_file(os.path.join(self.environment.get_scratch_dir(), + 'outofdate.file'))] + ET.SubElement(custombuild, 'Outputs').text = ';'.join(outputs) + if deps: + ET.SubElement(custombuild, 'AdditionalInputs').text = ';'.join(deps) + + @staticmethod + def nonexistent_file(prefix): + i = 0 + file = prefix + while os.path.exists(file): + file = '%s%d' % (prefix, i) + return file + + def generate_debug_information(self, link): + # valid values for vs2015 is 'false', 'true', 'DebugFastLink' + ET.SubElement(link, 'GenerateDebugInformation').text = 'true' + + def add_regen_dependency(self, root): + regen_vcxproj = os.path.join(self.environment.get_build_dir(), 'REGEN.vcxproj') + self.add_project_reference(root, regen_vcxproj, self.environment.coredata.regen_guid) diff --git a/meson/mesonbuild/backend/vs2015backend.py b/meson/mesonbuild/backend/vs2015backend.py new file mode 100644 index 000000000..bdc1675a2 --- /dev/null +++ b/meson/mesonbuild/backend/vs2015backend.py @@ -0,0 +1,38 @@ +# Copyright 2014-2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .vs2010backend import Vs2010Backend +from ..mesonlib import MesonException +from ..interpreter import Interpreter +from ..build import Build +import typing as T + + +class Vs2015Backend(Vs2010Backend): + def __init__(self, build: T.Optional[Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'vs2015' + self.vs_version = '2015' + if self.environment is not None: + # TODO: we assume host == build + comps = self.environment.coredata.compilers.host + if comps and all(c.id == 'intel-cl' for c in comps.values()): + c = list(comps.values())[0] + if c.version.startswith('19'): + self.platform_toolset = 'Intel C++ Compiler 19.0' + else: + # We don't have support for versions older than 2019 right now. + raise MesonException('There is currently no support for ICL before 19, patches welcome.') + if self.platform_toolset is None: + self.platform_toolset = 'v140' diff --git a/meson/mesonbuild/backend/vs2017backend.py b/meson/mesonbuild/backend/vs2017backend.py new file mode 100644 index 000000000..fa216065c --- /dev/null +++ b/meson/mesonbuild/backend/vs2017backend.py @@ -0,0 +1,52 @@ +# Copyright 2014-2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import typing as T +import xml.etree.ElementTree as ET + +from .vs2010backend import Vs2010Backend +from ..mesonlib import MesonException +from ..interpreter import Interpreter +from ..build import Build + + +class Vs2017Backend(Vs2010Backend): + def __init__(self, build: T.Optional[Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'vs2017' + self.vs_version = '2017' + # We assume that host == build + if self.environment is not None: + comps = self.environment.coredata.compilers.host + if comps: + if comps and all(c.id == 'clang-cl' for c in comps.values()): + self.platform_toolset = 'llvm' + elif comps and all(c.id == 'intel-cl' for c in comps.values()): + c = list(comps.values())[0] + if c.version.startswith('19'): + self.platform_toolset = 'Intel C++ Compiler 19.0' + else: + # We don't have support for versions older than 2019 right now. + raise MesonException('There is currently no support for ICL before 19, patches welcome.') + if self.platform_toolset is None: + self.platform_toolset = 'v141' + # WindowsSDKVersion should be set by command prompt. + sdk_version = os.environ.get('WindowsSDKVersion', None) + if sdk_version: + self.windows_target_platform_version = sdk_version.rstrip('\\') + + def generate_debug_information(self, link): + # valid values for vs2017 is 'false', 'true', 'DebugFastLink', 'DebugFull' + ET.SubElement(link, 'GenerateDebugInformation').text = 'DebugFull' diff --git a/meson/mesonbuild/backend/vs2019backend.py b/meson/mesonbuild/backend/vs2019backend.py new file mode 100644 index 000000000..e089cb4bf --- /dev/null +++ b/meson/mesonbuild/backend/vs2019backend.py @@ -0,0 +1,47 @@ +# Copyright 2014-2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import typing as T +import xml.etree.ElementTree as ET + +from .vs2010backend import Vs2010Backend +from ..interpreter import Interpreter +from ..build import Build + + +class Vs2019Backend(Vs2010Backend): + def __init__(self, build: T.Optional[Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'vs2019' + if self.environment is not None: + comps = self.environment.coredata.compilers.host + if comps and all(c.id == 'clang-cl' for c in comps.values()): + self.platform_toolset = 'llvm' + elif comps and all(c.id == 'intel-cl' for c in comps.values()): + c = list(comps.values())[0] + if c.version.startswith('19'): + self.platform_toolset = 'Intel C++ Compiler 19.0' + # We don't have support for versions older than 2019 right now. + if not self.platform_toolset: + self.platform_toolset = 'v142' + self.vs_version = '2019' + # WindowsSDKVersion should be set by command prompt. + sdk_version = os.environ.get('WindowsSDKVersion', None) + if sdk_version: + self.windows_target_platform_version = sdk_version.rstrip('\\') + + def generate_debug_information(self, link): + # valid values for vs2019 is 'false', 'true', 'DebugFastLink', 'DebugFull' + ET.SubElement(link, 'GenerateDebugInformation').text = 'DebugFull' diff --git a/meson/mesonbuild/backend/xcodebackend.py b/meson/mesonbuild/backend/xcodebackend.py new file mode 100644 index 000000000..3fe657426 --- /dev/null +++ b/meson/mesonbuild/backend/xcodebackend.py @@ -0,0 +1,915 @@ +# Copyright 2014-2016 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import backends +from .. import build +from .. import dependencies +from .. import mesonlib +from .. import mlog +import uuid, os, operator +import typing as T + +from ..mesonlib import MesonException +from ..interpreter import Interpreter + +class XCodeBackend(backends.Backend): + def __init__(self, build: T.Optional[build.Build], interpreter: T.Optional[Interpreter]): + super().__init__(build, interpreter) + self.name = 'xcode' + self.project_uid = self.environment.coredata.lang_guids['default'].replace('-', '')[:24] + self.project_conflist = self.gen_id() + self.indent = '\t' # Recent versions of Xcode uses tabs + self.indent_level = 0 + self.xcodetypemap = {'c': 'sourcecode.c.c', + 'a': 'archive.ar', + 'cc': 'sourcecode.cpp.cpp', + 'cxx': 'sourcecode.cpp.cpp', + 'cpp': 'sourcecode.cpp.cpp', + 'c++': 'sourcecode.cpp.cpp', + 'm': 'sourcecode.c.objc', + 'mm': 'sourcecode.cpp.objcpp', + 'h': 'sourcecode.c.h', + 'hpp': 'sourcecode.cpp.h', + 'hxx': 'sourcecode.cpp.h', + 'hh': 'sourcecode.cpp.hh', + 'inc': 'sourcecode.c.h', + 'dylib': 'compiled.mach-o.dylib', + 'o': 'compiled.mach-o.objfile', + 's': 'sourcecode.asm', + 'asm': 'sourcecode.asm', + } + self.maingroup_id = self.gen_id() + self.all_id = self.gen_id() + self.all_buildconf_id = self.gen_id() + self.buildtypes = ['debug'] + self.test_id = self.gen_id() + self.test_command_id = self.gen_id() + self.test_buildconf_id = self.gen_id() + + def gen_id(self): + return str(uuid.uuid4()).upper().replace('-', '')[:24] + + def get_target_dir(self, target): + dirname = os.path.join(target.get_subdir(), self.environment.coredata.get_builtin_option('buildtype')) + os.makedirs(os.path.join(self.environment.get_build_dir(), dirname), exist_ok=True) + return dirname + + def target_to_build_root(self, target): + if self.get_target_dir(target) == '': + return '' + directories = os.path.normpath(self.get_target_dir(target)).split(os.sep) + return os.sep.join(['..'] * len(directories)) + + def write_line(self, text): + self.ofile.write(self.indent * self.indent_level + text) + if not text.endswith('\n'): + self.ofile.write('\n') + + def generate(self): + test_data = self.serialize_tests()[0] + self.generate_filemap() + self.generate_buildmap() + self.generate_buildstylemap() + self.generate_build_phase_map() + self.generate_build_configuration_map() + self.generate_build_configurationlist_map() + self.generate_project_configurations_map() + self.generate_buildall_configurations_map() + self.generate_test_configurations_map() + self.generate_native_target_map() + self.generate_native_frameworks_map() + self.generate_source_phase_map() + self.generate_target_dependency_map() + self.generate_pbxdep_map() + self.generate_containerproxy_map() + self.proj_dir = os.path.join(self.environment.get_build_dir(), self.build.project_name + '.xcodeproj') + os.makedirs(self.proj_dir, exist_ok=True) + self.proj_file = os.path.join(self.proj_dir, 'project.pbxproj') + with open(self.proj_file, 'w') as self.ofile: + self.generate_prefix() + self.generate_pbx_aggregate_target() + self.generate_pbx_build_file() + self.generate_pbx_build_style() + self.generate_pbx_container_item_proxy() + self.generate_pbx_file_reference() + self.generate_pbx_frameworks_buildphase() + self.generate_pbx_group() + self.generate_pbx_native_target() + self.generate_pbx_project() + self.generate_pbx_shell_build_phase(test_data) + self.generate_pbx_sources_build_phase() + self.generate_pbx_target_dependency() + self.generate_xc_build_configuration() + self.generate_xc_configurationList() + self.generate_suffix() + + def get_xcodetype(self, fname): + xcodetype = self.xcodetypemap.get(fname.split('.')[-1].lower()) + if not xcodetype: + xcodetype = 'sourcecode.unknown' + mlog.warning('Unknown file type "%s" fallbacking to "%s". Xcode project might be malformed.' % (fname, xcodetype)) + return xcodetype + + def generate_filemap(self): + self.filemap = {} # Key is source file relative to src root. + self.target_filemap = {} + for name, t in self.build.targets.items(): + for s in t.sources: + if isinstance(s, mesonlib.File): + s = os.path.join(s.subdir, s.fname) + self.filemap[s] = self.gen_id() + for o in t.objects: + if isinstance(o, str): + o = os.path.join(t.subdir, o) + self.filemap[o] = self.gen_id() + self.target_filemap[name] = self.gen_id() + + def generate_buildmap(self): + self.buildmap = {} + for t in self.build.targets.values(): + for s in t.sources: + s = os.path.join(s.subdir, s.fname) + self.buildmap[s] = self.gen_id() + for o in t.objects: + o = os.path.join(t.subdir, o) + if isinstance(o, str): + self.buildmap[o] = self.gen_id() + + def generate_buildstylemap(self): + self.buildstylemap = {'debug': self.gen_id()} + + def generate_build_phase_map(self): + for tname, t in self.build.targets.items(): + # generate id for our own target-name + t.buildphasemap = {} + t.buildphasemap[tname] = self.gen_id() + # each target can have it's own Frameworks/Sources/..., generate id's for those + t.buildphasemap['Frameworks'] = self.gen_id() + t.buildphasemap['Resources'] = self.gen_id() + t.buildphasemap['Sources'] = self.gen_id() + + def generate_build_configuration_map(self): + self.buildconfmap = {} + for t in self.build.targets: + bconfs = {'debug': self.gen_id()} + self.buildconfmap[t] = bconfs + + def generate_project_configurations_map(self): + self.project_configurations = {'debug': self.gen_id()} + + def generate_buildall_configurations_map(self): + self.buildall_configurations = {'debug': self.gen_id()} + + def generate_test_configurations_map(self): + self.test_configurations = {'debug': self.gen_id()} + + def generate_build_configurationlist_map(self): + self.buildconflistmap = {} + for t in self.build.targets: + self.buildconflistmap[t] = self.gen_id() + + def generate_native_target_map(self): + self.native_targets = {} + for t in self.build.targets: + self.native_targets[t] = self.gen_id() + + def generate_native_frameworks_map(self): + self.native_frameworks = {} + self.native_frameworks_fileref = {} + for t in self.build.targets.values(): + for dep in t.get_external_deps(): + if isinstance(dep, dependencies.AppleFrameworks): + for f in dep.frameworks: + self.native_frameworks[f] = self.gen_id() + self.native_frameworks_fileref[f] = self.gen_id() + + def generate_target_dependency_map(self): + self.target_dependency_map = {} + for tname, t in self.build.targets.items(): + for target in t.link_targets: + self.target_dependency_map[(tname, target.get_basename())] = self.gen_id() + + def generate_pbxdep_map(self): + self.pbx_dep_map = {} + for t in self.build.targets: + self.pbx_dep_map[t] = self.gen_id() + + def generate_containerproxy_map(self): + self.containerproxy_map = {} + for t in self.build.targets: + self.containerproxy_map[t] = self.gen_id() + + def generate_source_phase_map(self): + self.source_phase = {} + for t in self.build.targets: + self.source_phase[t] = self.gen_id() + + def generate_pbx_aggregate_target(self): + target_dependencies = list(map(lambda t: self.pbx_dep_map[t], self.build.targets)) + aggregated_targets = [] + aggregated_targets.append((self.all_id, 'ALL_BUILD', self.all_buildconf_id, [], target_dependencies)) + aggregated_targets.append((self.test_id, 'RUN_TESTS', self.test_buildconf_id, [self.test_command_id], [])) + # Sort objects by ID before writing + sorted_aggregated_targets = sorted(aggregated_targets, key=operator.itemgetter(0)) + self.ofile.write('\n/* Begin PBXAggregateTarget section */\n') + for t in sorted_aggregated_targets: + name = t[1] + buildconf_id = t[2] + build_phases = t[3] + dependencies = t[4] + self.write_line('%s /* %s */ = {' % (t[0], name)) + self.indent_level += 1 + self.write_line('isa = PBXAggregateTarget;') + self.write_line('buildConfigurationList = %s /* Build configuration list for PBXAggregateTarget "%s" */;' % (buildconf_id, name)) + self.write_line('buildPhases = (') + self.indent_level += 1 + for bp in build_phases: + self.write_line('%s /* ShellScript */,' % bp) + self.indent_level -= 1 + self.write_line(');') + self.write_line('dependencies = (') + self.indent_level += 1 + for td in dependencies: + self.write_line('%s /* PBXTargetDependency */,' % td) + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = %s;' % name) + self.write_line('productName = %s;' % name) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXAggregateTarget section */\n') + + def generate_pbx_build_file(self): + self.ofile.write('\n/* Begin PBXBuildFile section */\n') + templ = '%s /* %s */ = { isa = PBXBuildFile; fileRef = %s /* %s */; settings = { COMPILER_FLAGS = "%s"; }; };\n' + otempl = '%s /* %s */ = { isa = PBXBuildFile; fileRef = %s /* %s */;};\n' + + for t in self.build.targets.values(): + + for dep in t.get_external_deps(): + if isinstance(dep, dependencies.AppleFrameworks): + for f in dep.frameworks: + self.write_line('%s /* %s.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = %s /* %s.framework */; };\n' % (self.native_frameworks[f], f, self.native_frameworks_fileref[f], f)) + + for s in t.sources: + if isinstance(s, mesonlib.File): + s = s.fname + + if isinstance(s, str): + s = os.path.join(t.subdir, s) + idval = self.buildmap[s] + fullpath = os.path.join(self.environment.get_source_dir(), s) + fileref = self.filemap[s] + fullpath2 = fullpath + compiler_args = '' + self.write_line(templ % (idval, fullpath, fileref, fullpath2, compiler_args)) + for o in t.objects: + o = os.path.join(t.subdir, o) + idval = self.buildmap[o] + fileref = self.filemap[o] + fullpath = os.path.join(self.environment.get_source_dir(), o) + fullpath2 = fullpath + self.write_line(otempl % (idval, fullpath, fileref, fullpath2)) + self.ofile.write('/* End PBXBuildFile section */\n') + + def generate_pbx_build_style(self): + # FIXME: Xcode 9 and later does not uses PBXBuildStyle and it gets removed. Maybe we can remove this part. + self.ofile.write('\n/* Begin PBXBuildStyle section */\n') + for name, idval in self.buildstylemap.items(): + self.write_line('%s /* %s */ = {\n' % (idval, name)) + self.indent_level += 1 + self.write_line('isa = PBXBuildStyle;\n') + self.write_line('buildSettings = {\n') + self.indent_level += 1 + self.write_line('COPY_PHASE_STRIP = NO;\n') + self.indent_level -= 1 + self.write_line('};\n') + self.write_line('name = "%s";\n' % name) + self.indent_level -= 1 + self.write_line('};\n') + self.ofile.write('/* End PBXBuildStyle section */\n') + + def generate_pbx_container_item_proxy(self): + self.ofile.write('\n/* Begin PBXContainerItemProxy section */\n') + for t in self.build.targets: + self.write_line('%s /* PBXContainerItemProxy */ = {' % self.containerproxy_map[t]) + self.indent_level += 1 + self.write_line('isa = PBXContainerItemProxy;') + self.write_line('containerPortal = %s /* Project object */;' % self.project_uid) + self.write_line('proxyType = 1;') + self.write_line('remoteGlobalIDString = %s;' % self.native_targets[t]) + self.write_line('remoteInfo = "%s";' % t) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXContainerItemProxy section */\n') + + def generate_pbx_file_reference(self): + self.ofile.write('\n/* Begin PBXFileReference section */\n') + for t in self.build.targets.values(): + for dep in t.get_external_deps(): + if isinstance(dep, dependencies.AppleFrameworks): + for f in dep.frameworks: + self.write_line('%s /* %s.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = %s.framework; path = System/Library/Frameworks/%s.framework; sourceTree = SDKROOT; };\n' % (self.native_frameworks_fileref[f], f, f, f)) + src_templ = '%s /* %s */ = { isa = PBXFileReference; explicitFileType = "%s"; fileEncoding = 4; name = "%s"; path = "%s"; sourceTree = SOURCE_ROOT; };\n' + for fname, idval in self.filemap.items(): + fullpath = os.path.join(self.environment.get_source_dir(), fname) + xcodetype = self.get_xcodetype(fname) + name = os.path.basename(fname) + path = fname + self.write_line(src_templ % (idval, fullpath, xcodetype, name, path)) + target_templ = '%s /* %s */ = { isa = PBXFileReference; explicitFileType = "%s"; path = %s; refType = %d; sourceTree = BUILT_PRODUCTS_DIR; };\n' + for tname, idval in self.target_filemap.items(): + t = self.build.targets[tname] + fname = t.get_filename() + reftype = 0 + if isinstance(t, build.Executable): + typestr = 'compiled.mach-o.executable' + path = fname + elif isinstance(t, build.SharedLibrary): + typestr = self.get_xcodetype('dummy.dylib') + path = fname + else: + typestr = self.get_xcodetype(fname) + path = '"%s"' % t.get_filename() + self.write_line(target_templ % (idval, tname, typestr, path, reftype)) + self.ofile.write('/* End PBXFileReference section */\n') + + def generate_pbx_frameworks_buildphase(self): + for t in self.build.targets.values(): + self.ofile.write('\n/* Begin PBXFrameworksBuildPhase section */\n') + self.write_line('%s /* %s */ = {\n' % (t.buildphasemap['Frameworks'], 'Frameworks')) + self.indent_level += 1 + self.write_line('isa = PBXFrameworksBuildPhase;\n') + self.write_line('buildActionMask = %s;\n' % (2147483647)) + self.write_line('files = (\n') + self.indent_level += 1 + for dep in t.get_external_deps(): + if isinstance(dep, dependencies.AppleFrameworks): + for f in dep.frameworks: + self.write_line('%s /* %s.framework in Frameworks */,\n' % (self.native_frameworks[f], f)) + self.indent_level -= 1 + self.write_line(');\n') + self.write_line('runOnlyForDeploymentPostprocessing = 0;\n') + self.indent_level -= 1 + self.write_line('};\n') + self.ofile.write('/* End PBXFrameworksBuildPhase section */\n') + + def generate_pbx_group(self): + groupmap = {} + target_src_map = {} + for t in self.build.targets: + groupmap[t] = self.gen_id() + target_src_map[t] = self.gen_id() + self.ofile.write('\n/* Begin PBXGroup section */\n') + sources_id = self.gen_id() + resources_id = self.gen_id() + products_id = self.gen_id() + frameworks_id = self.gen_id() + self.write_line('%s = {' % self.maingroup_id) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.indent_level += 1 + self.write_line('%s /* Sources */,' % sources_id) + self.write_line('%s /* Resources */,' % resources_id) + self.write_line('%s /* Products */,' % products_id) + self.write_line('%s /* Frameworks */,' % frameworks_id) + self.indent_level -= 1 + self.write_line(');') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + + # Sources + self.write_line('%s /* Sources */ = {' % sources_id) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.indent_level += 1 + for t in self.build.targets: + self.write_line('%s /* %s */,' % (groupmap[t], t)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = Sources;') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + + self.write_line('%s /* Resources */ = {' % resources_id) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.write_line(');') + self.write_line('name = Resources;') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + + self.write_line('%s /* Frameworks */ = {' % frameworks_id) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + # write frameworks + self.indent_level += 1 + + for t in self.build.targets.values(): + for dep in t.get_external_deps(): + if isinstance(dep, dependencies.AppleFrameworks): + for f in dep.frameworks: + self.write_line('%s /* %s.framework */,\n' % (self.native_frameworks_fileref[f], f)) + + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = Frameworks;') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + + # Targets + for t in self.build.targets: + self.write_line('%s /* %s */ = {' % (groupmap[t], t)) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.indent_level += 1 + self.write_line('%s /* Source files */,' % target_src_map[t]) + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = "%s";' % t) + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + self.write_line('%s /* Source files */ = {' % target_src_map[t]) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.indent_level += 1 + for s in self.build.targets[t].sources: + s = os.path.join(s.subdir, s.fname) + if isinstance(s, str): + self.write_line('%s /* %s */,' % (self.filemap[s], s)) + for o in self.build.targets[t].objects: + o = os.path.join(self.build.targets[t].subdir, o) + self.write_line('%s /* %s */,' % (self.filemap[o], o)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = "Source files";') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + + # And finally products + self.write_line('%s /* Products */ = {' % products_id) + self.indent_level += 1 + self.write_line('isa = PBXGroup;') + self.write_line('children = (') + self.indent_level += 1 + for t in self.build.targets: + self.write_line('%s /* %s */,' % (self.target_filemap[t], t)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('name = Products;') + self.write_line('sourceTree = "";') + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXGroup section */\n') + + def generate_pbx_native_target(self): + self.ofile.write('\n/* Begin PBXNativeTarget section */\n') + for tname, idval in self.native_targets.items(): + t = self.build.targets[tname] + self.write_line('%s /* %s */ = {' % (idval, tname)) + self.indent_level += 1 + self.write_line('isa = PBXNativeTarget;') + self.write_line('buildConfigurationList = %s /* Build configuration list for PBXNativeTarget "%s" */;' + % (self.buildconflistmap[tname], tname)) + self.write_line('buildPhases = (') + self.indent_level += 1 + for bpname, bpval in t.buildphasemap.items(): + self.write_line('%s /* %s yyy */,' % (bpval, bpname)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('buildRules = (') + self.write_line(');') + self.write_line('dependencies = (') + self.indent_level += 1 + for lt in self.build.targets[tname].link_targets: + # NOT DOCUMENTED, may need to make different links + # to same target have different targetdependency item. + idval = self.pbx_dep_map[lt.get_id()] + self.write_line('%s /* PBXTargetDependency */,' % idval) + self.indent_level -= 1 + self.write_line(");") + self.write_line('name = "%s";' % tname) + self.write_line('productName = "%s";' % tname) + self.write_line('productReference = %s /* %s */;' % (self.target_filemap[tname], tname)) + if isinstance(t, build.Executable): + typestr = 'com.apple.product-type.tool' + elif isinstance(t, build.StaticLibrary): + typestr = 'com.apple.product-type.library.static' + elif isinstance(t, build.SharedLibrary): + typestr = 'com.apple.product-type.library.dynamic' + else: + raise MesonException('Unknown target type for %s' % tname) + self.write_line('productType = "%s";' % typestr) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXNativeTarget section */\n') + + def generate_pbx_project(self): + self.ofile.write('\n/* Begin PBXProject section */\n') + self.write_line('%s /* Project object */ = {' % self.project_uid) + self.indent_level += 1 + self.write_line('isa = PBXProject;') + self.write_line('attributes = {') + self.indent_level += 1 + self.write_line('BuildIndependentTargetsInParallel = YES;') + self.indent_level -= 1 + self.write_line('};') + conftempl = 'buildConfigurationList = %s /* Build configuration list for PBXProject "%s" */;' + self.write_line(conftempl % (self.project_conflist, self.build.project_name)) + self.write_line('buildSettings = {') + self.write_line('};') + self.write_line('buildStyles = (') + self.indent_level += 1 + for name, idval in self.buildstylemap.items(): + self.write_line('%s /* %s */,' % (idval, name)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('compatibilityVersion = "Xcode 3.2";') + self.write_line('hasScannedForEncodings = 0;') + self.write_line('mainGroup = %s;' % self.maingroup_id) + self.write_line('projectDirPath = "%s";' % self.build_to_src) + self.write_line('projectRoot = "";') + self.write_line('targets = (') + self.indent_level += 1 + self.write_line('%s /* ALL_BUILD */,' % self.all_id) + self.write_line('%s /* RUN_TESTS */,' % self.test_id) + for t in self.build.targets: + self.write_line('%s /* %s */,' % (self.native_targets[t], t)) + self.indent_level -= 1 + self.write_line(');') + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXProject section */\n') + + def generate_pbx_shell_build_phase(self, test_data): + self.ofile.write('\n/* Begin PBXShellScriptBuildPhase section */\n') + self.write_line('%s /* ShellScript */ = {' % self.test_command_id) + self.indent_level += 1 + self.write_line('isa = PBXShellScriptBuildPhase;') + self.write_line('buildActionMask = 2147483647;') + self.write_line('files = (') + self.write_line(');') + self.write_line('inputPaths = (') + self.write_line(');') + self.write_line('outputPaths = (') + self.write_line(');') + self.write_line('runOnlyForDeploymentPostprocessing = 0;') + self.write_line('shellPath = /bin/sh;') + cmd = mesonlib.meson_command + ['test', test_data, '-C', self.environment.get_build_dir()] + cmdstr = ' '.join(["'%s'" % i for i in cmd]) + self.write_line('shellScript = "%s";' % cmdstr) + self.write_line('showEnvVarsInLog = 0;') + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXShellScriptBuildPhase section */\n') + + def generate_pbx_sources_build_phase(self): + self.ofile.write('\n/* Begin PBXSourcesBuildPhase section */\n') + for name in self.source_phase.keys(): + t = self.build.targets[name] + self.write_line('%s /* Sources */ = {' % (t.buildphasemap[name])) + self.indent_level += 1 + self.write_line('isa = PBXSourcesBuildPhase;') + self.write_line('buildActionMask = 2147483647;') + self.write_line('files = (') + self.indent_level += 1 + for s in self.build.targets[name].sources: + s = os.path.join(s.subdir, s.fname) + if not self.environment.is_header(s): + self.write_line('%s /* %s */,' % (self.buildmap[s], os.path.join(self.environment.get_source_dir(), s))) + self.indent_level -= 1 + self.write_line(');') + self.write_line('runOnlyForDeploymentPostprocessing = 0;') + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXSourcesBuildPhase section */\n') + + def generate_pbx_target_dependency(self): + targets = [] + for t in self.build.targets: + idval = self.pbx_dep_map[t] # VERIFY: is this correct? + targets.append((idval, self.native_targets[t], t, self.containerproxy_map[t])) + + # Sort object by ID + sorted_targets = sorted(targets, key=operator.itemgetter(0)) + self.ofile.write('\n/* Begin PBXTargetDependency section */\n') + for t in sorted_targets: + self.write_line('%s /* PBXTargetDependency */ = {' % t[0]) + self.indent_level += 1 + self.write_line('isa = PBXTargetDependency;') + self.write_line('target = %s /* %s */;' % (t[1], t[2])) + self.write_line('targetProxy = %s /* PBXContainerItemProxy */;' % t[3]) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End PBXTargetDependency section */\n') + + def generate_xc_build_configuration(self): + self.ofile.write('\n/* Begin XCBuildConfiguration section */\n') + # First the setup for the toplevel project. + for buildtype in self.buildtypes: + self.write_line('%s /* %s */ = {' % (self.project_configurations[buildtype], buildtype)) + self.indent_level += 1 + self.write_line('isa = XCBuildConfiguration;') + self.write_line('buildSettings = {') + self.indent_level += 1 + self.write_line('ARCHS = "$(ARCHS_STANDARD_64_BIT)";') + self.write_line('ONLY_ACTIVE_ARCH = YES;') + self.write_line('SDKROOT = "macosx";') + self.write_line('SYMROOT = "%s/build";' % self.environment.get_build_dir()) + self.indent_level -= 1 + self.write_line('};') + self.write_line('name = "%s";' % buildtype) + self.indent_level -= 1 + self.write_line('};') + + # Then the all target. + for buildtype in self.buildtypes: + self.write_line('%s /* %s */ = {' % (self.buildall_configurations[buildtype], buildtype)) + self.indent_level += 1 + self.write_line('isa = XCBuildConfiguration;') + self.write_line('buildSettings = {') + self.indent_level += 1 + self.write_line('COMBINE_HIDPI_IMAGES = YES;') + self.write_line('GCC_GENERATE_DEBUGGING_SYMBOLS = NO;') + self.write_line('GCC_INLINES_ARE_PRIVATE_EXTERN = NO;') + self.write_line('GCC_OPTIMIZATION_LEVEL = 0;') + self.write_line('GCC_PREPROCESSOR_DEFINITIONS = "";') + self.write_line('GCC_SYMBOLS_PRIVATE_EXTERN = NO;') + self.write_line('INSTALL_PATH = "";') + self.write_line('OTHER_CFLAGS = " ";') + self.write_line('OTHER_LDFLAGS = " ";') + self.write_line('OTHER_REZFLAGS = "";') + self.write_line('PRODUCT_NAME = ALL_BUILD;') + self.write_line('SECTORDER_FLAGS = "";') + self.write_line('SYMROOT = "%s";' % self.environment.get_build_dir()) + self.write_line('USE_HEADERMAP = NO;') + self.write_build_setting_line('WARNING_CFLAGS', ['-Wmost', '-Wno-four-char-constants', '-Wno-unknown-pragmas']) + self.indent_level -= 1 + self.write_line('};') + self.write_line('name = "%s";' % buildtype) + self.indent_level -= 1 + self.write_line('};') + + # Then the test target. + for buildtype in self.buildtypes: + self.write_line('%s /* %s */ = {' % (self.test_configurations[buildtype], buildtype)) + self.indent_level += 1 + self.write_line('isa = XCBuildConfiguration;') + self.write_line('buildSettings = {') + self.indent_level += 1 + self.write_line('COMBINE_HIDPI_IMAGES = YES;') + self.write_line('GCC_GENERATE_DEBUGGING_SYMBOLS = NO;') + self.write_line('GCC_INLINES_ARE_PRIVATE_EXTERN = NO;') + self.write_line('GCC_OPTIMIZATION_LEVEL = 0;') + self.write_line('GCC_PREPROCESSOR_DEFINITIONS = "";') + self.write_line('GCC_SYMBOLS_PRIVATE_EXTERN = NO;') + self.write_line('INSTALL_PATH = "";') + self.write_line('OTHER_CFLAGS = " ";') + self.write_line('OTHER_LDFLAGS = " ";') + self.write_line('OTHER_REZFLAGS = "";') + self.write_line('PRODUCT_NAME = RUN_TESTS;') + self.write_line('SECTORDER_FLAGS = "";') + self.write_line('SYMROOT = "%s";' % self.environment.get_build_dir()) + self.write_line('USE_HEADERMAP = NO;') + self.write_build_setting_line('WARNING_CFLAGS', ['-Wmost', '-Wno-four-char-constants', '-Wno-unknown-pragmas']) + self.indent_level -= 1 + self.write_line('};') + self.write_line('name = "%s";' % buildtype) + self.indent_level -= 1 + self.write_line('};') + + # Now finally targets. + langnamemap = {'c': 'C', 'cpp': 'CPLUSPLUS', 'objc': 'OBJC', 'objcpp': 'OBJCPLUSPLUS'} + for target_name, target in self.build.targets.items(): + for buildtype in self.buildtypes: + dep_libs = [] + links_dylib = False + headerdirs = [] + for d in target.include_dirs: + for sd in d.incdirs: + cd = os.path.join(d.curdir, sd) + headerdirs.append(os.path.join(self.environment.get_source_dir(), cd)) + headerdirs.append(os.path.join(self.environment.get_build_dir(), cd)) + for l in target.link_targets: + abs_path = os.path.join(self.environment.get_build_dir(), + l.subdir, buildtype, l.get_filename()) + dep_libs.append("'%s'" % abs_path) + if isinstance(l, build.SharedLibrary): + links_dylib = True + if links_dylib: + dep_libs = ['-Wl,-search_paths_first', '-Wl,-headerpad_max_install_names'] + dep_libs + dylib_version = None + if isinstance(target, build.SharedLibrary): + ldargs = ['-dynamiclib', '-Wl,-headerpad_max_install_names'] + dep_libs + install_path = os.path.join(self.environment.get_build_dir(), target.subdir, buildtype) + dylib_version = target.soversion + else: + ldargs = dep_libs + install_path = '' + if dylib_version is not None: + product_name = target.get_basename() + '.' + dylib_version + else: + product_name = target.get_basename() + ldargs += target.link_args + for dep in target.get_external_deps(): + ldargs += dep.get_link_args() + ldstr = ' '.join(ldargs) + valid = self.buildconfmap[target_name][buildtype] + langargs = {} + for lang in self.environment.coredata.compilers[target.for_machine]: + if lang not in langnamemap: + continue + # Add compile args added using add_project_arguments() + pargs = self.build.projects_args[target.for_machine].get(target.subproject, {}).get(lang, []) + # Add compile args added using add_global_arguments() + # These override per-project arguments + gargs = self.build.global_args[target.for_machine].get(lang, []) + targs = target.get_extra_args(lang) + args = pargs + gargs + targs + if args: + langargs[langnamemap[lang]] = args + symroot = os.path.join(self.environment.get_build_dir(), target.subdir) + self.write_line('%s /* %s */ = {' % (valid, buildtype)) + self.indent_level += 1 + self.write_line('isa = XCBuildConfiguration;') + self.write_line('buildSettings = {') + self.indent_level += 1 + self.write_line('COMBINE_HIDPI_IMAGES = YES;') + if dylib_version is not None: + self.write_line('DYLIB_CURRENT_VERSION = "%s";' % dylib_version) + self.write_line('EXECUTABLE_PREFIX = "%s";' % target.prefix) + if target.suffix == '': + suffix = '' + else: + suffix = '.' + target.suffix + self.write_line('EXECUTABLE_SUFFIX = "%s";' % suffix) + self.write_line('GCC_GENERATE_DEBUGGING_SYMBOLS = YES;') + self.write_line('GCC_INLINES_ARE_PRIVATE_EXTERN = NO;') + self.write_line('GCC_OPTIMIZATION_LEVEL = 0;') + if target.has_pch: + # Xcode uses GCC_PREFIX_HEADER which only allows one file per target/executable. Precompiling various header files and + # applying a particular pch to each source file will require custom scripts (as a build phase) and build flags per each + # file. Since Xcode itself already discourages precompiled headers in favor of modules we don't try much harder here. + pchs = target.get_pch('c') + target.get_pch('cpp') + target.get_pch('objc') + target.get_pch('objcpp') + # Make sure to use headers (other backends require implementation files like *.c *.cpp, etc; these should not be used here) + pchs = [pch for pch in pchs if pch.endswith('.h') or pch.endswith('.hh') or pch.endswith('hpp')] + if pchs: + if len(pchs) > 1: + mlog.warning('Unsupported Xcode configuration: More than 1 precompiled header found "%s". Target "%s" might not compile correctly.' % (str(pchs), target.name)) + relative_pch_path = os.path.join(target.get_subdir(), pchs[0]) # Path relative to target so it can be used with "$(PROJECT_DIR)" + self.write_line('GCC_PRECOMPILE_PREFIX_HEADER = YES;') + self.write_line('GCC_PREFIX_HEADER = "$(PROJECT_DIR)/%s";' % relative_pch_path) + self.write_line('GCC_PREPROCESSOR_DEFINITIONS = "";') + self.write_line('GCC_SYMBOLS_PRIVATE_EXTERN = NO;') + if headerdirs: + quotedh = ','.join(['"\\"%s\\""' % i for i in headerdirs]) + self.write_line('HEADER_SEARCH_PATHS=(%s);' % quotedh) + self.write_line('INSTALL_PATH = "%s";' % install_path) + self.write_line('LIBRARY_SEARCH_PATHS = "";') + if isinstance(target, build.SharedLibrary): + self.write_line('LIBRARY_STYLE = DYNAMIC;') + for langname, args in langargs.items(): + self.write_build_setting_line('OTHER_%sFLAGS' % langname, args) + self.write_line('OTHER_LDFLAGS = "%s";' % ldstr) + self.write_line('OTHER_REZFLAGS = "";') + self.write_line('PRODUCT_NAME = %s;' % product_name) + self.write_line('SECTORDER_FLAGS = "";') + self.write_line('SYMROOT = "%s";' % symroot) + self.write_build_setting_line('SYSTEM_HEADER_SEARCH_PATHS', [self.environment.get_build_dir()]) + self.write_line('USE_HEADERMAP = NO;') + self.write_build_setting_line('WARNING_CFLAGS', ['-Wmost', '-Wno-four-char-constants', '-Wno-unknown-pragmas']) + self.indent_level -= 1 + self.write_line('};') + self.write_line('name = %s;' % buildtype) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End XCBuildConfiguration section */\n') + + def generate_xc_configurationList(self): + # FIXME: sort items + self.ofile.write('\n/* Begin XCConfigurationList section */\n') + self.write_line('%s /* Build configuration list for PBXProject "%s" */ = {' % (self.project_conflist, self.build.project_name)) + self.indent_level += 1 + self.write_line('isa = XCConfigurationList;') + self.write_line('buildConfigurations = (') + self.indent_level += 1 + for buildtype in self.buildtypes: + self.write_line('%s /* %s */,' % (self.project_configurations[buildtype], buildtype)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('defaultConfigurationIsVisible = 0;') + self.write_line('defaultConfigurationName = debug;') + self.indent_level -= 1 + self.write_line('};') + + # Now the all target + self.write_line('%s /* Build configuration list for PBXAggregateTarget "ALL_BUILD" */ = {' % self.all_buildconf_id) + self.indent_level += 1 + self.write_line('isa = XCConfigurationList;') + self.write_line('buildConfigurations = (') + self.indent_level += 1 + for buildtype in self.buildtypes: + self.write_line('%s /* %s */,' % (self.buildall_configurations[buildtype], buildtype)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('defaultConfigurationIsVisible = 0;') + self.write_line('defaultConfigurationName = debug;') + self.indent_level -= 1 + self.write_line('};') + + # Test target + self.write_line('%s /* Build configuration list for PBXAggregateTarget "ALL_BUILD" */ = {' % self.test_buildconf_id) + self.indent_level += 1 + self.write_line('isa = XCConfigurationList;') + self.write_line('buildConfigurations = (') + self.indent_level += 1 + for buildtype in self.buildtypes: + self.write_line('%s /* %s */,' % (self.test_configurations[buildtype], buildtype)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('defaultConfigurationIsVisible = 0;') + self.write_line('defaultConfigurationName = debug;') + self.indent_level -= 1 + self.write_line('};') + + for target_name in self.build.targets: + listid = self.buildconflistmap[target_name] + self.write_line('%s /* Build configuration list for PBXNativeTarget "%s" */ = {' % (listid, target_name)) + self.indent_level += 1 + self.write_line('isa = XCConfigurationList;') + self.write_line('buildConfigurations = (') + self.indent_level += 1 + typestr = 'debug' + idval = self.buildconfmap[target_name][typestr] + self.write_line('%s /* %s */,' % (idval, typestr)) + self.indent_level -= 1 + self.write_line(');') + self.write_line('defaultConfigurationIsVisible = 0;') + self.write_line('defaultConfigurationName = %s;' % typestr) + self.indent_level -= 1 + self.write_line('};') + self.ofile.write('/* End XCConfigurationList section */\n') + + def write_build_setting_line(self, flag_name, flag_values, explicit=False): + if flag_values: + if len(flag_values) == 1: + value = flag_values[0] + if (' ' in value): + # If path contains spaces surround it with double colon + self.write_line('%s = "\\"%s\\"";' % (flag_name, value)) + else: + self.write_line('%s = "%s";' % (flag_name, value)) + else: + self.write_line('%s = (' % flag_name) + self.indent_level += 1 + for value in flag_values: + if (' ' in value): + # If path contains spaces surround it with double colon + self.write_line('"\\"%s\\"",' % value) + else: + self.write_line('"%s",' % value) + self.indent_level -= 1 + self.write_line(');') + else: + if explicit: + self.write_line('%s = "";' % flag_name) + + def generate_prefix(self): + self.ofile.write('// !$*UTF8*$!\n{\n') + self.indent_level += 1 + self.write_line('archiveVersion = 1;\n') + self.write_line('classes = {\n') + self.write_line('};\n') + self.write_line('objectVersion = 46;\n') + self.write_line('objects = {\n') + self.indent_level += 1 + + def generate_suffix(self): + self.indent_level -= 1 + self.write_line('};\n') + self.write_line('rootObject = ' + self.project_uid + ' /* Project object */;') + self.indent_level -= 1 + self.write_line('}\n') diff --git a/meson/mesonbuild/build.py b/meson/mesonbuild/build.py new file mode 100644 index 000000000..a06979cea --- /dev/null +++ b/meson/mesonbuild/build.py @@ -0,0 +1,2520 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict, defaultdict +from functools import lru_cache +import copy +import hashlib +import itertools, pathlib +import os +import pickle +import re +import typing as T + +from . import environment +from . import dependencies +from . import mlog +from .mesonlib import ( + File, MesonException, MachineChoice, PerMachine, OrderedSet, listify, + extract_as_list, typeslistify, stringlistify, classify_unity_sources, + get_filenames_templates_dict, substitute_values, has_path_sep, unholder +) +from .compilers import ( + Compiler, all_languages, is_object, clink_langs, sort_clink, lang_suffixes, + is_known_suffix +) +from .linkers import StaticLinker +from .interpreterbase import FeatureNew + +pch_kwargs = set(['c_pch', 'cpp_pch']) + +lang_arg_kwargs = set([ + 'c_args', + 'cpp_args', + 'cuda_args', + 'd_args', + 'd_import_dirs', + 'd_unittest', + 'd_module_versions', + 'd_debug', + 'fortran_args', + 'java_args', + 'objc_args', + 'objcpp_args', + 'rust_args', + 'vala_args', + 'cs_args', +]) + +vala_kwargs = set(['vala_header', 'vala_gir', 'vala_vapi']) +rust_kwargs = set(['rust_crate_type']) +cs_kwargs = set(['resources', 'cs_args']) + +buildtarget_kwargs = set([ + 'build_by_default', + 'build_rpath', + 'dependencies', + 'extra_files', + 'gui_app', + 'link_with', + 'link_whole', + 'link_args', + 'link_depends', + 'implicit_include_directories', + 'include_directories', + 'install', + 'install_rpath', + 'install_dir', + 'install_mode', + 'name_prefix', + 'name_suffix', + 'native', + 'objects', + 'override_options', + 'sources', + 'gnu_symbol_visibility', + 'link_language', +]) + +known_build_target_kwargs = ( + buildtarget_kwargs | + lang_arg_kwargs | + pch_kwargs | + vala_kwargs | + rust_kwargs | + cs_kwargs) + +known_exe_kwargs = known_build_target_kwargs | {'implib', 'export_dynamic', 'pie'} +known_shlib_kwargs = known_build_target_kwargs | {'version', 'soversion', 'vs_module_defs', 'darwin_versions'} +known_shmod_kwargs = known_build_target_kwargs | {'vs_module_defs'} +known_stlib_kwargs = known_build_target_kwargs | {'pic'} +known_jar_kwargs = known_exe_kwargs | {'main_class'} + +@lru_cache(maxsize=None) +def get_target_macos_dylib_install_name(ld) -> str: + name = ['@rpath/', ld.prefix, ld.name] + if ld.soversion is not None: + name.append('.' + ld.soversion) + name.append('.dylib') + return ''.join(name) + +class InvalidArguments(MesonException): + pass + +class DependencyOverride: + def __init__(self, dep, node, explicit=True): + self.dep = dep + self.node = node + self.explicit = explicit + +class Build: + """A class that holds the status of one build including + all dependencies and so on. + """ + + def __init__(self, environment: environment.Environment): + self.project_name = 'name of master project' + self.project_version = None + self.environment = environment + self.projects = {} + self.targets = OrderedDict() + self.run_target_names = set() # type: T.Set[T.Tuple[str, str]] + self.global_args = PerMachine({}, {}) # type: PerMachine[T.Dict[str, T.List[str]]] + self.projects_args = PerMachine({}, {}) # type: PerMachine[T.Dict[str, T.List[str]]] + self.global_link_args = PerMachine({}, {}) # type: PerMachine[T.Dict[str, T.List[str]]] + self.projects_link_args = PerMachine({}, {}) # type: PerMachine[T.Dict[str, T.List[str]]] + self.tests = [] + self.benchmarks = [] + self.headers = [] + self.man = [] + self.data = [] + self.static_linker = PerMachine(None, None) # type: PerMachine[StaticLinker] + self.subprojects = {} + self.subproject_dir = '' + self.install_scripts = [] + self.postconf_scripts = [] + self.dist_scripts = [] + self.install_dirs = [] + self.dep_manifest_name = None + self.dep_manifest = {} + self.stdlibs = PerMachine({}, {}) + self.test_setups = {} # type: T.Dict[str, TestSetup] + self.test_setup_default_name = None + self.find_overrides = {} + self.searched_programs = set() # The list of all programs that have been searched for. + self.dependency_overrides = PerMachine({}, {}) + + def copy(self): + other = Build(self.environment) + for k, v in self.__dict__.items(): + if isinstance(v, (list, dict, set, OrderedDict)): + other.__dict__[k] = v.copy() + else: + other.__dict__[k] = v + return other + + def merge(self, other): + for k, v in other.__dict__.items(): + self.__dict__[k] = v + + def ensure_static_linker(self, compiler): + if self.static_linker[compiler.for_machine] is None and compiler.needs_static_linker(): + self.static_linker[compiler.for_machine] = self.environment.detect_static_linker(compiler) + + def get_project(self): + return self.projects[''] + + def get_subproject_dir(self): + return self.subproject_dir + + def get_targets(self): + return self.targets + + def get_tests(self): + return self.tests + + def get_benchmarks(self): + return self.benchmarks + + def get_headers(self): + return self.headers + + def get_man(self): + return self.man + + def get_data(self): + return self.data + + def get_install_subdirs(self): + return self.install_dirs + + def get_global_args(self, compiler, for_machine): + d = self.global_args[for_machine] + return d.get(compiler.get_language(), []) + + def get_project_args(self, compiler, project, for_machine): + d = self.projects_args[for_machine] + args = d.get(project) + if not args: + return [] + return args.get(compiler.get_language(), []) + + def get_global_link_args(self, compiler, for_machine): + d = self.global_link_args[for_machine] + return d.get(compiler.get_language(), []) + + def get_project_link_args(self, compiler, project, for_machine): + d = self.projects_link_args[for_machine] + + link_args = d.get(project) + if not link_args: + return [] + + return link_args.get(compiler.get_language(), []) + +class IncludeDirs: + def __init__(self, curdir, dirs, is_system, extra_build_dirs=None): + self.curdir = curdir + self.incdirs = dirs + self.is_system = is_system + # Interpreter has validated that all given directories + # actually exist. + if extra_build_dirs is None: + self.extra_build_dirs = [] + else: + self.extra_build_dirs = extra_build_dirs + + def __repr__(self): + r = '<{} {}/{}>' + return r.format(self.__class__.__name__, self.curdir, self.incdirs) + + def get_curdir(self): + return self.curdir + + def get_incdirs(self): + return self.incdirs + + def get_extra_build_dirs(self): + return self.extra_build_dirs + +class ExtractedObjects: + ''' + Holds a list of sources for which the objects must be extracted + ''' + def __init__(self, target, srclist=None, genlist=None, objlist=None, recursive=True): + self.target = target + self.recursive = recursive + self.srclist = srclist if srclist is not None else [] + self.genlist = genlist if genlist is not None else [] + self.objlist = objlist if objlist is not None else [] + if self.target.is_unity: + self.check_unity_compatible() + + def __repr__(self): + r = '<{0} {1!r}: {2}>' + return r.format(self.__class__.__name__, self.target.name, self.srclist) + + def classify_all_sources(self, sources, generated_sources): + # Merge sources and generated sources + sources = list(sources) + for gensrc in generated_sources: + for s in gensrc.get_outputs(): + # We cannot know the path where this source will be generated, + # but all we need here is the file extension to determine the + # compiler. + sources.append(s) + + # Filter out headers and all non-source files + sources = [s for s in sources if environment.is_source(s) and not environment.is_header(s)] + + return classify_unity_sources(self.target.compilers.values(), sources) + + def check_unity_compatible(self): + # Figure out if the extracted object list is compatible with a Unity + # build. When we're doing a Unified build, we go through the sources, + # and create a single source file from each subset of the sources that + # can be compiled with a specific compiler. Then we create one object + # from each unified source file. So for each compiler we can either + # extra all its sources or none. + cmpsrcs = self.classify_all_sources(self.target.sources, self.target.generated) + extracted_cmpsrcs = self.classify_all_sources(self.srclist, self.genlist) + + for comp, srcs in extracted_cmpsrcs.items(): + if set(srcs) != set(cmpsrcs[comp]): + raise MesonException('Single object files can not be extracted ' + 'in Unity builds. You can only extract all ' + 'the object files for each compiler at once.') + + def get_outputs(self, backend): + # TODO: Consider if we need to handle genlist here + return [ + backend.object_filename_from_source(self.target, source) + for source in self.srclist + ] + +class EnvironmentVariables: + def __init__(self): + self.envvars = [] + # The set of all env vars we have operations for. Only used for self.has_name() + self.varnames = set() + + def __repr__(self): + repr_str = "<{0}: {1}>" + return repr_str.format(self.__class__.__name__, self.envvars) + + def add_var(self, method, name, args, kwargs): + self.varnames.add(name) + self.envvars.append((method, name, args, kwargs)) + + def has_name(self, name): + return name in self.varnames + + def get_value(self, values, kwargs): + separator = kwargs.get('separator', os.pathsep) + + value = '' + for var in values: + value += separator + var + return separator, value.strip(separator) + + def set(self, env, name, values, kwargs): + return self.get_value(values, kwargs)[1] + + def append(self, env, name, values, kwargs): + sep, value = self.get_value(values, kwargs) + if name in env: + return env[name] + sep + value + return value + + def prepend(self, env, name, values, kwargs): + sep, value = self.get_value(values, kwargs) + if name in env: + return value + sep + env[name] + + return value + + def get_env(self, full_env: T.Dict[str, str]) -> T.Dict[str, str]: + env = full_env.copy() + for method, name, values, kwargs in self.envvars: + env[name] = method(full_env, name, values, kwargs) + return env + +class Target: + def __init__(self, name, subdir, subproject, build_by_default, for_machine: MachineChoice): + if has_path_sep(name): + # Fix failing test 53 when this becomes an error. + mlog.warning('''Target "{}" has a path separator in its name. +This is not supported, it can cause unexpected failures and will become +a hard error in the future.'''.format(name)) + self.name = name + self.subdir = subdir + self.subproject = subproject + self.build_by_default = build_by_default + self.for_machine = for_machine + self.install = False + self.build_always_stale = False + self.option_overrides_base = {} + self.option_overrides_compiler = defaultdict(dict) + if not hasattr(self, 'typename'): + raise RuntimeError('Target type is not set for target class "{}". This is a bug'.format(type(self).__name__)) + + def __lt__(self, other: T.Any) -> T.Union[bool, type(NotImplemented)]: + if not hasattr(other, 'get_id') and not callable(other.get_id): + return NotImplemented + return self.get_id() < other.get_id() + + def __le__(self, other: T.Any) -> T.Union[bool, type(NotImplemented)]: + if not hasattr(other, 'get_id') and not callable(other.get_id): + return NotImplemented + return self.get_id() <= other.get_id() + + def __gt__(self, other: T.Any) -> T.Union[bool, type(NotImplemented)]: + if not hasattr(other, 'get_id') and not callable(other.get_id): + return NotImplemented + return self.get_id() > other.get_id() + + def __ge__(self, other: T.Any) -> T.Union[bool, type(NotImplemented)]: + if not hasattr(other, 'get_id') and not callable(other.get_id): + return NotImplemented + return self.get_id() >= other.get_id() + + def get_install_dir(self, environment): + # Find the installation directory. + default_install_dir = self.get_default_install_dir(environment) + outdirs = self.get_custom_install_dir() + if outdirs[0] is not None and outdirs[0] != default_install_dir and outdirs[0] is not True: + # Either the value is set to a non-default value, or is set to + # False (which means we want this specific output out of many + # outputs to not be installed). + custom_install_dir = True + else: + custom_install_dir = False + outdirs[0] = default_install_dir + return outdirs, custom_install_dir + + def get_basename(self): + return self.name + + def get_subdir(self): + return self.subdir + + def get_typename(self): + return self.typename + + @staticmethod + def _get_id_hash(target_id): + # We don't really need cryptographic security here. + # Small-digest hash function with unlikely collision is good enough. + h = hashlib.sha256() + h.update(target_id.encode(encoding='utf-8', errors='replace')) + # This ID should be case-insensitive and should work in Visual Studio, + # e.g. it should not start with leading '-'. + return h.hexdigest()[:7] + + @staticmethod + def construct_id_from_path(subdir, name, type_suffix): + """Construct target ID from subdir, name and type suffix. + + This helper function is made public mostly for tests.""" + # This ID must also be a valid file name on all OSs. + # It should also avoid shell metacharacters for obvious + # reasons. '@' is not used as often as '_' in source code names. + # In case of collisions consider using checksums. + # FIXME replace with assert when slash in names is prohibited + name_part = name.replace('/', '@').replace('\\', '@') + assert not has_path_sep(type_suffix) + my_id = name_part + type_suffix + if subdir: + subdir_part = Target._get_id_hash(subdir) + # preserve myid for better debuggability + return subdir_part + '@@' + my_id + return my_id + + def get_id(self): + return self.construct_id_from_path( + self.subdir, self.name, self.type_suffix()) + + def process_kwargs_base(self, kwargs): + if 'build_by_default' in kwargs: + self.build_by_default = kwargs['build_by_default'] + if not isinstance(self.build_by_default, bool): + raise InvalidArguments('build_by_default must be a boolean value.') + elif kwargs.get('install', False): + # For backward compatibility, if build_by_default is not explicitly + # set, use the value of 'install' if it's enabled. + self.build_by_default = True + + option_overrides = self.parse_overrides(kwargs) + + for k, v in option_overrides.items(): + if '_' in k: + lang, k2 = k.split('_', 1) + if lang in all_languages: + self.option_overrides_compiler[lang][k2] = v + continue + self.option_overrides_base[k] = v + + def parse_overrides(self, kwargs) -> dict: + result = {} + overrides = stringlistify(kwargs.get('override_options', [])) + for o in overrides: + if '=' not in o: + raise InvalidArguments('Overrides must be of form "key=value"') + k, v = o.split('=', 1) + k = k.strip() + v = v.strip() + result[k] = v + return result + + def is_linkable_target(self) -> bool: + return False + +class BuildTarget(Target): + known_kwargs = known_build_target_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + super().__init__(name, subdir, subproject, True, for_machine) + unity_opt = environment.coredata.get_builtin_option('unity') + self.is_unity = unity_opt == 'on' or (unity_opt == 'subprojects' and subproject != '') + self.environment = environment + self.sources = [] + self.compilers = OrderedDict() # type: OrderedDict[str, Compiler] + self.objects = [] + self.external_deps = [] + self.include_dirs = [] + self.link_language = kwargs.get('link_language') + self.link_targets = [] + self.link_whole_targets = [] + self.link_depends = [] + self.added_deps = set() + self.name_prefix_set = False + self.name_suffix_set = False + self.filename = 'no_name' + # The list of all files outputted by this target. Useful in cases such + # as Vala which generates .vapi and .h besides the compiled output. + self.outputs = [self.filename] + self.need_install = False + self.pch = {} + self.extra_args = {} + self.generated = [] + self.extra_files = [] + self.d_features = {} + self.pic = False + self.pie = False + # Track build_rpath entries so we can remove them at install time + self.rpath_dirs_to_remove = set() + # Sources can be: + # 1. Pre-existing source files in the source tree + # 2. Pre-existing sources generated by configure_file in the build tree + # 3. Sources files generated by another target or a Generator + self.process_sourcelist(sources) + # Objects can be: + # 1. Pre-existing objects provided by the user with the `objects:` kwarg + # 2. Compiled objects created by and extracted from another target + self.process_objectlist(objects) + self.process_kwargs(kwargs, environment) + self.check_unknown_kwargs(kwargs) + self.process_compilers() + if not any([self.sources, self.generated, self.objects, self.link_whole]): + raise InvalidArguments('Build target {} has no sources.'.format(name)) + self.process_compilers_late() + self.validate_sources() + self.validate_install(environment) + self.check_module_linking() + + def __repr__(self): + repr_str = "<{0} {1}: {2}>" + return repr_str.format(self.__class__.__name__, self.get_id(), self.filename) + + def __str__(self): + return "{}".format(self.name) + + def validate_install(self, environment): + if self.for_machine is MachineChoice.BUILD and self.need_install: + if environment.is_cross_build(): + raise InvalidArguments('Tried to install a target for the build machine in a cross build.') + else: + mlog.warning('Installing target build for the build machine. This will fail in a cross build.') + + def check_unknown_kwargs(self, kwargs): + # Override this method in derived classes that have more + # keywords. + self.check_unknown_kwargs_int(kwargs, self.known_kwargs) + + def check_unknown_kwargs_int(self, kwargs, known_kwargs): + unknowns = [] + for k in kwargs: + if k not in known_kwargs: + unknowns.append(k) + if len(unknowns) > 0: + mlog.warning('Unknown keyword argument(s) in target {}: {}.'.format(self.name, ', '.join(unknowns))) + + def process_objectlist(self, objects): + assert(isinstance(objects, list)) + for s in unholder(objects): + if isinstance(s, (str, File, ExtractedObjects)): + self.objects.append(s) + elif isinstance(s, (GeneratedList, CustomTarget)): + msg = 'Generated files are not allowed in the \'objects\' kwarg ' + \ + 'for target {!r}.\nIt is meant only for '.format(self.name) + \ + 'pre-built object files that are shipped with the\nsource ' + \ + 'tree. Try adding it in the list of sources.' + raise InvalidArguments(msg) + else: + msg = 'Bad object of type {!r} in target {!r}.'.format(type(s).__name__, self.name) + raise InvalidArguments(msg) + + def process_sourcelist(self, sources): + sources = listify(sources) + added_sources = {} # If the same source is defined multiple times, use it only once. + for s in unholder(sources): + if isinstance(s, File): + if s not in added_sources: + self.sources.append(s) + added_sources[s] = True + elif isinstance(s, (GeneratedList, CustomTarget, CustomTargetIndex)): + self.generated.append(s) + else: + msg = 'Bad source of type {!r} in target {!r}.'.format(type(s).__name__, self.name) + raise InvalidArguments(msg) + + @staticmethod + def can_compile_remove_sources(compiler, sources): + removed = False + for s in sources[:]: + if compiler.can_compile(s): + sources.remove(s) + removed = True + return removed + + def process_compilers_late(self): + """Processes additional compilers after kwargs have been evaluated. + + This can add extra compilers that might be required by keyword + arguments, such as link_with or dependencies. It will also try to guess + which compiler to use if one hasn't been selected already. + """ + # Populate list of compilers + compilers = self.environment.coredata.compilers[self.for_machine] + + # did user override clink_langs for this target? + link_langs = [self.link_language] if self.link_language else clink_langs + + # If this library is linked against another library we need to consider + # the languages of those libraries as well. + if self.link_targets or self.link_whole_targets: + extra = set() + for t in itertools.chain(self.link_targets, self.link_whole_targets): + if isinstance(t, CustomTarget) or isinstance(t, CustomTargetIndex): + continue # We can't know anything about these. + for name, compiler in t.compilers.items(): + if name in link_langs: + extra.add((name, compiler)) + for name, compiler in sorted(extra, key=lambda p: sort_clink(p[0])): + self.compilers[name] = compiler + + if not self.compilers: + # No source files or parent targets, target consists of only object + # files of unknown origin. Just add the first clink compiler + # that we have and hope that it can link these objects + for lang in link_langs: + if lang in compilers: + self.compilers[lang] = compilers[lang] + break + + def process_compilers(self): + ''' + Populate self.compilers, which is the list of compilers that this + target will use for compiling all its sources. + We also add compilers that were used by extracted objects to simplify + dynamic linker determination. + ''' + if not self.sources and not self.generated and not self.objects: + return + # Populate list of compilers + compilers = self.environment.coredata.compilers[self.for_machine] + # Pre-existing sources + sources = list(self.sources) + # All generated sources + for gensrc in self.generated: + for s in gensrc.get_outputs(): + # Generated objects can't be compiled, so don't use them for + # compiler detection. If our target only has generated objects, + # we will fall back to using the first c-like compiler we find, + # which is what we need. + if not is_object(s): + sources.append(s) + for d in unholder(self.external_deps): + for s in d.sources: + if isinstance(s, (str, File)): + sources.append(s) + + # Sources that were used to create our extracted objects + for o in self.objects: + if not isinstance(o, ExtractedObjects): + continue + for s in o.srclist: + # Don't add Vala sources since that will pull in the Vala + # compiler even though we will never use it since we are + # dealing with compiled C code. + if not s.endswith(lang_suffixes['vala']): + sources.append(s) + if sources: + # For each source, try to add one compiler that can compile it. + # + # If it has a suffix that belongs to a known language, we must have + # a compiler for that language. + # + # Otherwise, it's ok if no compilers can compile it, because users + # are expected to be able to add arbitrary non-source files to the + # sources list + for s in sources: + for lang, compiler in compilers.items(): + if compiler.can_compile(s): + if lang not in self.compilers: + self.compilers[lang] = compiler + break + else: + if is_known_suffix(s): + raise MesonException('No {} machine compiler for "{}"'. + format(self.for_machine.get_lower_case_name(), s)) + + # Re-sort according to clink_langs + self.compilers = OrderedDict(sorted(self.compilers.items(), + key=lambda t: sort_clink(t[0]))) + + # If all our sources are Vala, our target also needs the C compiler but + # it won't get added above. + if 'vala' in self.compilers and 'c' not in self.compilers: + self.compilers['c'] = compilers['c'] + + def validate_sources(self): + if not self.sources: + return + for lang in ('cs', 'java'): + if lang in self.compilers: + check_sources = list(self.sources) + compiler = self.compilers[lang] + if not self.can_compile_remove_sources(compiler, check_sources): + m = 'No {} sources found in target {!r}'.format(lang, self.name) + raise InvalidArguments(m) + if check_sources: + m = '{0} targets can only contain {0} files:\n'.format(lang.capitalize()) + m += '\n'.join([repr(c) for c in check_sources]) + raise InvalidArguments(m) + # CSharp and Java targets can't contain any other file types + assert(len(self.compilers) == 1) + return + + def process_link_depends(self, sources, environment): + """Process the link_depends keyword argument. + + This is designed to handle strings, Files, and the output of Custom + Targets. Notably it doesn't handle generator() returned objects, since + adding them as a link depends would inherently cause them to be + generated twice, since the output needs to be passed to the ld_args and + link_depends. + """ + sources = listify(sources) + for s in unholder(sources): + if isinstance(s, File): + self.link_depends.append(s) + elif isinstance(s, str): + self.link_depends.append( + File.from_source_file(environment.source_dir, self.subdir, s)) + elif hasattr(s, 'get_outputs'): + self.link_depends.extend( + [File.from_built_file(s.get_subdir(), p) for p in s.get_outputs()]) + else: + raise InvalidArguments( + 'Link_depends arguments must be strings, Files, ' + 'or a Custom Target, or lists thereof.') + + def get_original_kwargs(self): + return self.kwargs + + def unpack_holder(self, d): + d = listify(d) + newd = [] + for i in d: + if isinstance(i, list): + i = self.unpack_holder(i) + elif hasattr(i, 'held_object'): + i = i.held_object + for t in ['dependencies', 'link_with', 'include_directories', 'sources']: + if hasattr(i, t): + setattr(i, t, self.unpack_holder(getattr(i, t))) + newd.append(i) + return newd + + def copy_kwargs(self, kwargs): + self.kwargs = copy.copy(kwargs) + # This sucks quite badly. Arguments + # are holders but they can't be pickled + # so unpack those known. + for k, v in self.kwargs.items(): + if isinstance(v, list): + self.kwargs[k] = self.unpack_holder(v) + if hasattr(v, 'held_object'): + self.kwargs[k] = v.held_object + for t in ['dependencies', 'link_with', 'include_directories', 'sources']: + if t in self.kwargs: + self.kwargs[t] = self.unpack_holder(self.kwargs[t]) + + def extract_objects(self, srclist): + obj_src = [] + for src in srclist: + if isinstance(src, str): + src = File(False, self.subdir, src) + elif isinstance(src, File): + FeatureNew.single_use('File argument for extract_objects', '0.50.0', self.subproject) + else: + raise MesonException('Object extraction arguments must be strings or Files.') + # FIXME: It could be a generated source + if src not in self.sources: + raise MesonException('Tried to extract unknown source {}.'.format(src)) + obj_src.append(src) + return ExtractedObjects(self, obj_src) + + def extract_all_objects(self, recursive=True): + return ExtractedObjects(self, self.sources, self.generated, self.objects, + recursive) + + def get_all_link_deps(self): + return self.get_transitive_link_deps() + + @lru_cache(maxsize=None) + def get_transitive_link_deps(self): + result = [] + for i in self.link_targets: + result += i.get_all_link_deps() + return result + + def get_link_deps_mapping(self, prefix, environment): + return self.get_transitive_link_deps_mapping(prefix, environment) + + @lru_cache(maxsize=None) + def get_transitive_link_deps_mapping(self, prefix, environment): + result = {} + for i in self.link_targets: + mapping = i.get_link_deps_mapping(prefix, environment) + #we are merging two dictionaries, while keeping the earlier one dominant + result_tmp = mapping.copy() + result_tmp.update(result) + result = result_tmp + return result + + @lru_cache(maxsize=None) + def get_link_dep_subdirs(self): + result = OrderedSet() + for i in self.link_targets: + if not isinstance(i, StaticLibrary): + result.add(i.get_subdir()) + result.update(i.get_link_dep_subdirs()) + return result + + def get_default_install_dir(self, environment): + return environment.get_libdir() + + def get_custom_install_dir(self): + return self.install_dir + + def get_custom_install_mode(self): + return self.install_mode + + def process_kwargs(self, kwargs, environment): + self.process_kwargs_base(kwargs) + self.copy_kwargs(kwargs) + kwargs.get('modules', []) + self.need_install = kwargs.get('install', self.need_install) + llist = extract_as_list(kwargs, 'link_with') + for linktarget in unholder(llist): + if isinstance(linktarget, dependencies.ExternalLibrary): + raise MesonException('''An external library was used in link_with keyword argument, which +is reserved for libraries built as part of this project. External +libraries must be passed using the dependencies keyword argument +instead, because they are conceptually "external dependencies", +just like those detected with the dependency() function.''') + self.link(linktarget) + lwhole = extract_as_list(kwargs, 'link_whole') + for linktarget in lwhole: + self.link_whole(linktarget) + + c_pchlist, cpp_pchlist, clist, cpplist, cudalist, cslist, valalist, objclist, objcpplist, fortranlist, rustlist \ + = [extract_as_list(kwargs, c) for c in ['c_pch', 'cpp_pch', 'c_args', 'cpp_args', 'cuda_args', 'cs_args', 'vala_args', 'objc_args', 'objcpp_args', 'fortran_args', 'rust_args']] + + self.add_pch('c', c_pchlist) + self.add_pch('cpp', cpp_pchlist) + compiler_args = {'c': clist, 'cpp': cpplist, 'cuda': cudalist, 'cs': cslist, 'vala': valalist, 'objc': objclist, 'objcpp': objcpplist, + 'fortran': fortranlist, 'rust': rustlist + } + for key, value in compiler_args.items(): + self.add_compiler_args(key, value) + + if not isinstance(self, Executable) or 'export_dynamic' in kwargs: + self.vala_header = kwargs.get('vala_header', self.name + '.h') + self.vala_vapi = kwargs.get('vala_vapi', self.name + '.vapi') + self.vala_gir = kwargs.get('vala_gir', None) + + dlist = stringlistify(kwargs.get('d_args', [])) + self.add_compiler_args('d', dlist) + dfeatures = dict() + dfeature_unittest = kwargs.get('d_unittest', False) + if dfeature_unittest: + dfeatures['unittest'] = dfeature_unittest + dfeature_versions = kwargs.get('d_module_versions', []) + if dfeature_versions: + dfeatures['versions'] = dfeature_versions + dfeature_debug = kwargs.get('d_debug', []) + if dfeature_debug: + dfeatures['debug'] = dfeature_debug + if 'd_import_dirs' in kwargs: + dfeature_import_dirs = unholder(extract_as_list(kwargs, 'd_import_dirs')) + for d in dfeature_import_dirs: + if not isinstance(d, IncludeDirs): + raise InvalidArguments('Arguments to d_import_dirs must be include_directories.') + dfeatures['import_dirs'] = dfeature_import_dirs + if dfeatures: + self.d_features = dfeatures + + self.link_args = extract_as_list(kwargs, 'link_args') + for i in self.link_args: + if not isinstance(i, str): + raise InvalidArguments('Link_args arguments must be strings.') + for l in self.link_args: + if '-Wl,-rpath' in l or l.startswith('-rpath'): + mlog.warning('''Please do not define rpath with a linker argument, use install_rpath or build_rpath properties instead. +This will become a hard error in a future Meson release.''') + self.process_link_depends(kwargs.get('link_depends', []), environment) + # Target-specific include dirs must be added BEFORE include dirs from + # internal deps (added inside self.add_deps()) to override them. + inclist = extract_as_list(kwargs, 'include_directories') + self.add_include_dirs(inclist) + # Add dependencies (which also have include_directories) + deplist = extract_as_list(kwargs, 'dependencies') + self.add_deps(deplist) + # If an item in this list is False, the output corresponding to + # the list index of that item will not be installed + self.install_dir = typeslistify(kwargs.get('install_dir', [None]), + (str, bool)) + self.install_mode = kwargs.get('install_mode', None) + main_class = kwargs.get('main_class', '') + if not isinstance(main_class, str): + raise InvalidArguments('Main class must be a string') + self.main_class = main_class + if isinstance(self, Executable): + self.gui_app = kwargs.get('gui_app', False) + if not isinstance(self.gui_app, bool): + raise InvalidArguments('Argument gui_app must be boolean.') + elif 'gui_app' in kwargs: + raise InvalidArguments('Argument gui_app can only be used on executables.') + extra_files = extract_as_list(kwargs, 'extra_files') + for i in extra_files: + assert(isinstance(i, File)) + trial = os.path.join(environment.get_source_dir(), i.subdir, i.fname) + if not(os.path.isfile(trial)): + raise InvalidArguments('Tried to add non-existing extra file {}.'.format(i)) + self.extra_files = extra_files + self.install_rpath = kwargs.get('install_rpath', '') + if not isinstance(self.install_rpath, str): + raise InvalidArguments('Install_rpath is not a string.') + self.build_rpath = kwargs.get('build_rpath', '') + if not isinstance(self.build_rpath, str): + raise InvalidArguments('Build_rpath is not a string.') + resources = extract_as_list(kwargs, 'resources') + for r in resources: + if not isinstance(r, str): + raise InvalidArguments('Resource argument is not a string.') + trial = os.path.join(environment.get_source_dir(), self.subdir, r) + if not os.path.isfile(trial): + raise InvalidArguments('Tried to add non-existing resource {}.'.format(r)) + self.resources = resources + if 'name_prefix' in kwargs: + name_prefix = kwargs['name_prefix'] + if isinstance(name_prefix, list): + if name_prefix: + raise InvalidArguments('name_prefix array must be empty to signify default.') + else: + if not isinstance(name_prefix, str): + raise InvalidArguments('name_prefix must be a string.') + self.prefix = name_prefix + self.name_prefix_set = True + if 'name_suffix' in kwargs: + name_suffix = kwargs['name_suffix'] + if isinstance(name_suffix, list): + if name_suffix: + raise InvalidArguments('name_suffix array must be empty to signify default.') + else: + if not isinstance(name_suffix, str): + raise InvalidArguments('name_suffix must be a string.') + if name_suffix == '': + raise InvalidArguments('name_suffix should not be an empty string. ' + 'If you want meson to use the default behaviour ' + 'for each platform pass `[]` (empty array)') + self.suffix = name_suffix + self.name_suffix_set = True + if isinstance(self, StaticLibrary): + # You can't disable PIC on OS X. The compiler ignores -fno-PIC. + # PIC is always on for Windows (all code is position-independent + # since library loading is done differently) + m = self.environment.machines[self.for_machine] + if m.is_darwin() or m.is_windows(): + self.pic = True + else: + self.pic = self._extract_pic_pie(kwargs, 'pic') + if isinstance(self, Executable): + # Executables must be PIE on Android + if self.environment.machines[self.for_machine].is_android(): + self.pie = True + else: + self.pie = self._extract_pic_pie(kwargs, 'pie') + self.implicit_include_directories = kwargs.get('implicit_include_directories', True) + if not isinstance(self.implicit_include_directories, bool): + raise InvalidArguments('Implicit_include_directories must be a boolean.') + self.gnu_symbol_visibility = kwargs.get('gnu_symbol_visibility', '') + if not isinstance(self.gnu_symbol_visibility, str): + raise InvalidArguments('GNU symbol visibility must be a string.') + if self.gnu_symbol_visibility != '': + permitted = ['default', 'internal', 'hidden', 'protected', 'inlineshidden'] + if self.gnu_symbol_visibility not in permitted: + raise InvalidArguments('GNU symbol visibility arg {} not one of: {}'.format(self.symbol_visibility, ', '.join(permitted))) + + def _extract_pic_pie(self, kwargs, arg): + # Check if we have -fPIC, -fpic, -fPIE, or -fpie in cflags + all_flags = self.extra_args['c'] + self.extra_args['cpp'] + if '-f' + arg.lower() in all_flags or '-f' + arg.upper() in all_flags: + mlog.warning("Use the '{}' kwarg instead of passing '{}' manually to {!r}".format(arg, '-f' + arg, self.name)) + return True + + val = kwargs.get(arg, False) + if not isinstance(val, bool): + raise InvalidArguments('Argument {} to {!r} must be boolean'.format(arg, self.name)) + return val + + def get_filename(self): + return self.filename + + def get_outputs(self): + return self.outputs + + def get_extra_args(self, language): + return self.extra_args.get(language, []) + + def get_dependencies(self, exclude=None, for_pkgconfig=False): + transitive_deps = [] + if exclude is None: + exclude = [] + for t in itertools.chain(self.link_targets, self.link_whole_targets): + if t in transitive_deps or t in exclude: + continue + # When generating `Libs:` and `Libs.private:` lists in pkg-config + # files we don't want to include static libraries that we link_whole + # or are uninstalled (they're implicitly promoted to link_whole). + # But we still need to include their transitive dependencies, + # a static library we link_whole would itself link to a shared + # library or an installed static library. + if not for_pkgconfig or (not t.is_internal() and t not in self.link_whole_targets): + transitive_deps.append(t) + if isinstance(t, StaticLibrary): + transitive_deps += t.get_dependencies(transitive_deps + exclude, for_pkgconfig) + return transitive_deps + + def get_source_subdir(self): + return self.subdir + + def get_sources(self): + return self.sources + + def get_objects(self): + return self.objects + + def get_generated_sources(self): + return self.generated + + def should_install(self): + return self.need_install + + def has_pch(self): + return len(self.pch) > 0 + + def get_pch(self, language): + try: + return self.pch[language] + except KeyError: + return[] + + def get_include_dirs(self): + return self.include_dirs + + def add_deps(self, deps): + deps = listify(deps) + for dep in unholder(deps): + if dep in self.added_deps: + continue + if isinstance(dep, dependencies.InternalDependency): + # Those parts that are internal. + self.process_sourcelist(dep.sources) + self.add_include_dirs(dep.include_directories, dep.get_include_type()) + for l in dep.libraries: + self.link(l) + for l in dep.whole_libraries: + self.link_whole(l) + if dep.get_compile_args() or dep.get_link_args(): + # Those parts that are external. + extpart = dependencies.InternalDependency('undefined', + [], + dep.get_compile_args(), + dep.get_link_args(), + [], [], [], [], {}) + self.external_deps.append(extpart) + # Deps of deps. + self.add_deps(dep.ext_deps) + elif isinstance(dep, dependencies.Dependency): + if dep not in self.external_deps: + self.external_deps.append(dep) + self.process_sourcelist(dep.get_sources()) + self.add_deps(dep.ext_deps) + elif isinstance(dep, BuildTarget): + raise InvalidArguments('''Tried to use a build target as a dependency. +You probably should put it in link_with instead.''') + else: + # This is a bit of a hack. We do not want Build to know anything + # about the interpreter so we can't import it and use isinstance. + # This should be reliable enough. + if hasattr(dep, 'project_args_frozen') or hasattr(dep, 'global_args_frozen'): + raise InvalidArguments('Tried to use subproject object as a dependency.\n' + 'You probably wanted to use a dependency declared in it instead.\n' + 'Access it by calling get_variable() on the subproject object.') + raise InvalidArguments('Argument is of an unacceptable type {!r}.\nMust be ' + 'either an external dependency (returned by find_library() or ' + 'dependency()) or an internal dependency (returned by ' + 'declare_dependency()).'.format(type(dep).__name__)) + self.added_deps.add(dep) + + def get_external_deps(self): + return self.external_deps + + def is_internal(self): + return isinstance(self, StaticLibrary) and not self.need_install + + def link(self, target): + for t in unholder(listify(target)): + if isinstance(self, StaticLibrary) and self.need_install and t.is_internal(): + # When we're a static library and we link_with to an + # internal/convenience library, promote to link_whole. + return self.link_whole(t) + if not isinstance(t, (Target, CustomTargetIndex)): + raise InvalidArguments('{!r} is not a target.'.format(t)) + if not t.is_linkable_target(): + raise InvalidArguments("Link target '{!s}' is not linkable.".format(t)) + if isinstance(self, SharedLibrary) and isinstance(t, StaticLibrary) and not t.pic: + msg = "Can't link non-PIC static library {!r} into shared library {!r}. ".format(t.name, self.name) + msg += "Use the 'pic' option to static_library to build with PIC." + raise InvalidArguments(msg) + if self.for_machine is not t.for_machine: + msg = 'Tried to mix libraries for machines {} and {} in target {!r}'.format(self.for_machine, t.for_machine, self.name) + if self.environment.is_cross_build(): + raise InvalidArguments(msg + ' This is not possible in a cross build.') + else: + mlog.warning(msg + ' This will fail in cross build.') + self.link_targets.append(t) + + def link_whole(self, target): + for t in unholder(listify(target)): + if isinstance(t, (CustomTarget, CustomTargetIndex)): + if not t.is_linkable_target(): + raise InvalidArguments('Custom target {!r} is not linkable.'.format(t)) + if not t.get_filename().endswith('.a'): + raise InvalidArguments('Can only link_whole custom targets that are .a archives.') + if isinstance(self, StaticLibrary): + # FIXME: We could extract the .a archive to get object files + raise InvalidArguments('Cannot link_whole a custom target into a static library') + elif not isinstance(t, StaticLibrary): + raise InvalidArguments('{!r} is not a static library.'.format(t)) + elif isinstance(self, SharedLibrary) and not t.pic: + msg = "Can't link non-PIC static library {!r} into shared library {!r}. ".format(t.name, self.name) + msg += "Use the 'pic' option to static_library to build with PIC." + raise InvalidArguments(msg) + if self.for_machine is not t.for_machine: + msg = 'Tried to mix libraries for machines {1} and {2} in target {0!r}'.format(self.name, self.for_machine, t.for_machine) + if self.environment.is_cross_build(): + raise InvalidArguments(msg + ' This is not possible in a cross build.') + else: + mlog.warning(msg + ' This will fail in cross build.') + if isinstance(self, StaticLibrary): + # When we're a static library and we link_whole: to another static + # library, we need to add that target's objects to ourselves. + self.objects += t.extract_all_objects_recurse() + self.link_whole_targets.append(t) + + def extract_all_objects_recurse(self): + objs = [self.extract_all_objects()] + for t in self.link_targets: + if t.is_internal(): + objs += t.extract_all_objects_recurse() + return objs + + def add_pch(self, language, pchlist): + if not pchlist: + return + elif len(pchlist) == 1: + if not environment.is_header(pchlist[0]): + raise InvalidArguments('PCH argument {} is not a header.'.format(pchlist[0])) + elif len(pchlist) == 2: + if environment.is_header(pchlist[0]): + if not environment.is_source(pchlist[1]): + raise InvalidArguments('PCH definition must contain one header and at most one source.') + elif environment.is_source(pchlist[0]): + if not environment.is_header(pchlist[1]): + raise InvalidArguments('PCH definition must contain one header and at most one source.') + pchlist = [pchlist[1], pchlist[0]] + else: + raise InvalidArguments('PCH argument {} is of unknown type.'.format(pchlist[0])) + + if (os.path.dirname(pchlist[0]) != os.path.dirname(pchlist[1])): + raise InvalidArguments('PCH files must be stored in the same folder.') + + mlog.warning('PCH source files are deprecated, only a single header file should be used.') + elif len(pchlist) > 2: + raise InvalidArguments('PCH definition may have a maximum of 2 files.') + for f in pchlist: + if not isinstance(f, str): + raise MesonException('PCH arguments must be strings.') + if not os.path.isfile(os.path.join(self.environment.source_dir, self.subdir, f)): + raise MesonException('File {} does not exist.'.format(f)) + self.pch[language] = pchlist + + def add_include_dirs(self, args, set_is_system: T.Optional[str] = None): + ids = [] + for a in unholder(args): + if not isinstance(a, IncludeDirs): + raise InvalidArguments('Include directory to be added is not an include directory object.') + ids.append(a) + if set_is_system is None: + set_is_system = 'preserve' + if set_is_system != 'preserve': + is_system = set_is_system == 'system' + ids = [IncludeDirs(x.get_curdir(), x.get_incdirs(), is_system, x.get_extra_build_dirs()) for x in ids] + self.include_dirs += ids + + def add_compiler_args(self, language, args): + args = listify(args) + for a in args: + if not isinstance(a, (str, File)): + raise InvalidArguments('A non-string passed to compiler args.') + if language in self.extra_args: + self.extra_args[language] += args + else: + self.extra_args[language] = args + + def get_aliases(self): + return {} + + def get_langs_used_by_deps(self) -> T.List[str]: + ''' + Sometimes you want to link to a C++ library that exports C API, which + means the linker must link in the C++ stdlib, and we must use a C++ + compiler for linking. The same is also applicable for objc/objc++, etc, + so we can keep using clink_langs for the priority order. + + See: https://github.com/mesonbuild/meson/issues/1653 + ''' + langs = [] # type: T.List[str] + + # Check if any of the external libraries were written in this language + for dep in self.external_deps: + if dep.language is None: + continue + if dep.language not in langs: + langs.append(dep.language) + # Check if any of the internal libraries this target links to were + # written in this language + for link_target in itertools.chain(self.link_targets, self.link_whole_targets): + if isinstance(link_target, (CustomTarget, CustomTargetIndex)): + continue + for language in link_target.compilers: + if language not in langs: + langs.append(language) + + return langs + + def get_clink_dynamic_linker_and_stdlibs(self): + ''' + We use the order of languages in `clink_langs` to determine which + linker to use in case the target has sources compiled with multiple + compilers. All languages other than those in this list have their own + linker. + Note that Vala outputs C code, so Vala sources can use any linker + that can link compiled C. We don't actually need to add an exception + for Vala here because of that. + ''' + # Populate list of all compilers, not just those being used to compile + # sources in this target + all_compilers = self.environment.coredata.compilers[self.for_machine] + + # If the user set the link_language, just return that. + if self.link_language: + comp = all_compilers[self.link_language] + return comp, comp.language_stdlib_only_link_flags() + + # Languages used by dependencies + dep_langs = self.get_langs_used_by_deps() + # Pick a compiler based on the language priority-order + for l in clink_langs: + if l in self.compilers or l in dep_langs: + try: + linker = all_compilers[l] + except KeyError: + raise MesonException( + 'Could not get a dynamic linker for build target {!r}. ' + 'Requires a linker for language "{}", but that is not ' + 'a project language.'.format(self.name, l)) + stdlib_args = [] + added_languages = set() + for dl in itertools.chain(self.compilers, dep_langs): + if dl != linker.language: + stdlib_args += all_compilers[dl].language_stdlib_only_link_flags() + added_languages.add(dl) + # Type of var 'linker' is Compiler. + # Pretty hard to fix because the return value is passed everywhere + return linker, stdlib_args + + m = 'Could not get a dynamic linker for build target {!r}' + raise AssertionError(m.format(self.name)) + + def get_using_rustc(self): + if len(self.sources) > 0 and self.sources[0].fname.endswith('.rs'): + return True + + def get_using_msvc(self): + ''' + Check if the dynamic linker is MSVC. Used by Executable, StaticLibrary, + and SharedLibrary for deciding when to use MSVC-specific file naming + and debug filenames. + + If at least some code is built with MSVC and the final library is + linked with MSVC, we can be sure that some debug info will be + generated. We only check the dynamic linker here because the static + linker is guaranteed to be of the same type. + + Interesting cases: + 1. The Vala compiler outputs C code to be compiled by whatever + C compiler we're using, so all objects will still be created by the + MSVC compiler. + 2. If the target contains only objects, process_compilers guesses and + picks the first compiler that smells right. + ''' + compiler, _ = self.get_clink_dynamic_linker_and_stdlibs() + # Mixing many languages with MSVC is not supported yet so ignore stdlibs. + if compiler and compiler.get_linker_id() in {'link', 'lld-link', 'xilink', 'optlink'}: + return True + return False + + def check_module_linking(self): + ''' + Warn if shared modules are linked with target: (link_with) #2865 + ''' + for link_target in self.link_targets: + if isinstance(link_target, SharedModule): + if self.environment.machines[self.for_machine].is_darwin(): + raise MesonException('''target links against shared modules. +This is not permitted on OSX''') + else: + mlog.warning('''target links against shared modules. This is not +recommended as it is not supported on some platforms''') + return + +class Generator: + def __init__(self, args, kwargs): + if len(args) != 1: + raise InvalidArguments('Generator requires exactly one positional argument: the executable') + exe = unholder(args[0]) + if not isinstance(exe, (Executable, dependencies.ExternalProgram)): + raise InvalidArguments('First generator argument must be an executable.') + self.exe = exe + self.depfile = None + self.capture = False + self.depends = [] + self.process_kwargs(kwargs) + + def __repr__(self): + repr_str = "<{0}: {1}>" + return repr_str.format(self.__class__.__name__, self.exe) + + def get_exe(self): + return self.exe + + def process_kwargs(self, kwargs): + if 'arguments' not in kwargs: + raise InvalidArguments('Generator must have "arguments" keyword argument.') + args = kwargs['arguments'] + if isinstance(args, str): + args = [args] + if not isinstance(args, list): + raise InvalidArguments('"Arguments" keyword argument must be a string or a list of strings.') + for a in args: + if not isinstance(a, str): + raise InvalidArguments('A non-string object in "arguments" keyword argument.') + self.arglist = args + if 'output' not in kwargs: + raise InvalidArguments('Generator must have "output" keyword argument.') + outputs = listify(kwargs['output']) + for rule in outputs: + if not isinstance(rule, str): + raise InvalidArguments('"output" may only contain strings.') + if '@BASENAME@' not in rule and '@PLAINNAME@' not in rule: + raise InvalidArguments('Every element of "output" must contain @BASENAME@ or @PLAINNAME@.') + if has_path_sep(rule): + raise InvalidArguments('"outputs" must not contain a directory separator.') + if len(outputs) > 1: + for o in outputs: + if '@OUTPUT@' in o: + raise InvalidArguments('Tried to use @OUTPUT@ in a rule with more than one output.') + self.outputs = outputs + if 'depfile' in kwargs: + depfile = kwargs['depfile'] + if not isinstance(depfile, str): + raise InvalidArguments('Depfile must be a string.') + if os.path.basename(depfile) != depfile: + raise InvalidArguments('Depfile must be a plain filename without a subdirectory.') + self.depfile = depfile + if 'capture' in kwargs: + capture = kwargs['capture'] + if not isinstance(capture, bool): + raise InvalidArguments('Capture must be boolean.') + self.capture = capture + if 'depends' in kwargs: + depends = unholder(listify(kwargs['depends'])) + for d in depends: + if not (isinstance(d, (BuildTarget, CustomTarget))): + raise InvalidArguments('Depends entries must be build targets.') + self.depends.append(d) + + def get_base_outnames(self, inname): + plainname = os.path.basename(inname) + basename = os.path.splitext(plainname)[0] + bases = [x.replace('@BASENAME@', basename).replace('@PLAINNAME@', plainname) for x in self.outputs] + return bases + + def get_dep_outname(self, inname): + if self.depfile is None: + raise InvalidArguments('Tried to get dep name for rule that does not have dependency file defined.') + plainname = os.path.basename(inname) + basename = os.path.splitext(plainname)[0] + return self.depfile.replace('@BASENAME@', basename).replace('@PLAINNAME@', plainname) + + def get_arglist(self, inname): + plainname = os.path.basename(inname) + basename = os.path.splitext(plainname)[0] + return [x.replace('@BASENAME@', basename).replace('@PLAINNAME@', plainname) for x in self.arglist] + + def is_parent_path(self, parent, trial): + relpath = pathlib.PurePath(trial).relative_to(parent) + return relpath.parts[0] != '..' # For subdirs we can only go "down". + + def process_files(self, name, files, state, preserve_path_from=None, extra_args=None): + output = GeneratedList(self, state.subdir, preserve_path_from, extra_args=extra_args if extra_args is not None else []) + for f in files: + if isinstance(f, str): + f = File.from_source_file(state.environment.source_dir, state.subdir, f) + elif not isinstance(f, File): + raise InvalidArguments('{} arguments must be strings or files not {!r}.'.format(name, f)) + if preserve_path_from: + abs_f = f.absolute_path(state.environment.source_dir, state.environment.build_dir) + if not self.is_parent_path(preserve_path_from, abs_f): + raise InvalidArguments('When using preserve_path_from, all input files must be in a subdirectory of the given dir.') + output.add_file(f, state) + return output + + +class GeneratedList: + def __init__(self, generator, subdir, preserve_path_from=None, extra_args=None): + self.generator = unholder(generator) + self.name = self.generator.exe + self.subdir = subdir + self.infilelist = [] + self.outfilelist = [] + self.outmap = {} + self.extra_depends = [] + self.depend_files = [] + self.preserve_path_from = preserve_path_from + self.extra_args = extra_args if extra_args is not None else [] + if isinstance(self.generator.exe, dependencies.ExternalProgram): + if not self.generator.exe.found(): + raise InvalidArguments('Tried to use not-found external program as generator') + path = self.generator.exe.get_path() + if os.path.isabs(path): + # Can only add a dependency on an external program which we + # know the absolute path of + self.depend_files.append(File.from_absolute_file(path)) + + def add_preserved_path_segment(self, infile, outfiles, state): + result = [] + in_abs = infile.absolute_path(state.environment.source_dir, state.environment.build_dir) + assert(os.path.isabs(self.preserve_path_from)) + rel = os.path.relpath(in_abs, self.preserve_path_from) + path_segment = os.path.dirname(rel) + for of in outfiles: + result.append(os.path.join(path_segment, of)) + return result + + def add_file(self, newfile, state): + self.infilelist.append(newfile) + outfiles = self.generator.get_base_outnames(newfile.fname) + if self.preserve_path_from: + outfiles = self.add_preserved_path_segment(newfile, outfiles, state) + self.outfilelist += outfiles + self.outmap[newfile] = outfiles + + def get_inputs(self): + return self.infilelist + + def get_outputs(self): + return self.outfilelist + + def get_outputs_for(self, filename): + return self.outmap[filename] + + def get_generator(self): + return self.generator + + def get_extra_args(self): + return self.extra_args + +class Executable(BuildTarget): + known_kwargs = known_exe_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + self.typename = 'executable' + if 'pie' not in kwargs and 'b_pie' in environment.coredata.base_options: + kwargs['pie'] = environment.coredata.base_options['b_pie'].value + super().__init__(name, subdir, subproject, for_machine, sources, objects, environment, kwargs) + # Unless overridden, executables have no suffix or prefix. Except on + # Windows and with C#/Mono executables where the suffix is 'exe' + if not hasattr(self, 'prefix'): + self.prefix = '' + if not hasattr(self, 'suffix'): + machine = environment.machines[for_machine] + # Executable for Windows or C#/Mono + if machine.is_windows() or machine.is_cygwin() or 'cs' in self.compilers: + self.suffix = 'exe' + elif machine.system.startswith('wasm') or machine.system == 'emscripten': + self.suffix = 'js' + elif ('c' in self.compilers and self.compilers['c'].get_id().startswith('arm') or + 'cpp' in self.compilers and self.compilers['cpp'].get_id().startswith('arm')): + self.suffix = 'axf' + elif ('c' in self.compilers and self.compilers['c'].get_id().startswith('ccrx') or + 'cpp' in self.compilers and self.compilers['cpp'].get_id().startswith('ccrx')): + self.suffix = 'abs' + elif ('c' in self.compilers and self.compilers['c'].get_id().startswith('xc16')): + self.suffix = 'elf' + elif ('c' in self.compilers and self.compilers['c'].get_id().startswith('c2000') or + 'cpp' in self.compilers and self.compilers['cpp'].get_id().startswith('c2000')): + self.suffix = 'out' + else: + self.suffix = environment.machines[for_machine].get_exe_suffix() + self.filename = self.name + if self.suffix: + self.filename += '.' + self.suffix + self.outputs = [self.filename] + + # The import library this target will generate + self.import_filename = None + # The import library that Visual Studio would generate (and accept) + self.vs_import_filename = None + # The import library that GCC would generate (and prefer) + self.gcc_import_filename = None + # The debugging information file this target will generate + self.debug_filename = None + + # Check for export_dynamic + self.export_dynamic = False + if kwargs.get('export_dynamic'): + if not isinstance(kwargs['export_dynamic'], bool): + raise InvalidArguments('"export_dynamic" keyword argument must be a boolean') + self.export_dynamic = True + if kwargs.get('implib'): + self.export_dynamic = True + if self.export_dynamic and kwargs.get('implib') is False: + raise InvalidArguments('"implib" keyword argument must not be false for if "export_dynamic" is true') + + m = environment.machines[for_machine] + + # If using export_dynamic, set the import library name + if self.export_dynamic: + implib_basename = self.name + '.exe' + if not isinstance(kwargs.get('implib', False), bool): + implib_basename = kwargs['implib'] + if m.is_windows() or m.is_cygwin(): + self.vs_import_filename = '{0}.lib'.format(implib_basename) + self.gcc_import_filename = 'lib{0}.a'.format(implib_basename) + if self.get_using_msvc(): + self.import_filename = self.vs_import_filename + else: + self.import_filename = self.gcc_import_filename + + if m.is_windows() and ('cs' in self.compilers or + self.get_using_rustc() or + self.get_using_msvc()): + self.debug_filename = self.name + '.pdb' + + # Only linkwithable if using export_dynamic + self.is_linkwithable = self.export_dynamic + + def get_default_install_dir(self, environment): + return environment.get_bindir() + + def description(self): + '''Human friendly description of the executable''' + return self.name + + def type_suffix(self): + return "@exe" + + def get_import_filename(self): + """ + The name of the import library that will be outputted by the compiler + + Returns None if there is no import library required for this platform + """ + return self.import_filename + + def get_import_filenameslist(self): + if self.import_filename: + return [self.vs_import_filename, self.gcc_import_filename] + return [] + + def get_debug_filename(self): + """ + The name of debuginfo file that will be created by the compiler + + Returns None if the build won't create any debuginfo file + """ + return self.debug_filename + + def is_linkable_target(self): + return self.is_linkwithable + +class StaticLibrary(BuildTarget): + known_kwargs = known_stlib_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + self.typename = 'static library' + if 'pic' not in kwargs and 'b_staticpic' in environment.coredata.base_options: + kwargs['pic'] = environment.coredata.base_options['b_staticpic'].value + super().__init__(name, subdir, subproject, for_machine, sources, objects, environment, kwargs) + if 'cs' in self.compilers: + raise InvalidArguments('Static libraries not supported for C#.') + if 'rust' in self.compilers: + # If no crate type is specified, or it's the generic lib type, use rlib + if not hasattr(self, 'rust_crate_type') or self.rust_crate_type == 'lib': + mlog.debug('Defaulting Rust static library target crate type to rlib') + self.rust_crate_type = 'rlib' + # Don't let configuration proceed with a non-static crate type + elif self.rust_crate_type not in ['rlib', 'staticlib']: + raise InvalidArguments('Crate type "{0}" invalid for static libraries; must be "rlib" or "staticlib"'.format(self.rust_crate_type)) + # By default a static library is named libfoo.a even on Windows because + # MSVC does not have a consistent convention for what static libraries + # are called. The MSVC CRT uses libfoo.lib syntax but nothing else uses + # it and GCC only looks for static libraries called foo.lib and + # libfoo.a. However, we cannot use foo.lib because that's the same as + # the import library. Using libfoo.a is ok because people using MSVC + # always pass the library filename while linking anyway. + if not hasattr(self, 'prefix'): + self.prefix = 'lib' + if not hasattr(self, 'suffix'): + if 'rust' in self.compilers: + if not hasattr(self, 'rust_crate_type') or self.rust_crate_type == 'rlib': + # default Rust static library suffix + self.suffix = 'rlib' + elif self.rust_crate_type == 'staticlib': + self.suffix = 'a' + else: + self.suffix = 'a' + self.filename = self.prefix + self.name + '.' + self.suffix + self.outputs = [self.filename] + + def get_link_deps_mapping(self, prefix, environment): + return {} + + def get_default_install_dir(self, environment): + return environment.get_static_lib_dir() + + def type_suffix(self): + return "@sta" + + def process_kwargs(self, kwargs, environment): + super().process_kwargs(kwargs, environment) + if 'rust_crate_type' in kwargs: + rust_crate_type = kwargs['rust_crate_type'] + if isinstance(rust_crate_type, str): + self.rust_crate_type = rust_crate_type + else: + raise InvalidArguments('Invalid rust_crate_type "{0}": must be a string.'.format(rust_crate_type)) + + def is_linkable_target(self): + return True + +class SharedLibrary(BuildTarget): + known_kwargs = known_shlib_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + self.typename = 'shared library' + self.soversion = None + self.ltversion = None + # Max length 2, first element is compatibility_version, second is current_version + self.darwin_versions = [] + self.vs_module_defs = None + # The import library this target will generate + self.import_filename = None + # The import library that Visual Studio would generate (and accept) + self.vs_import_filename = None + # The import library that GCC would generate (and prefer) + self.gcc_import_filename = None + # The debugging information file this target will generate + self.debug_filename = None + super().__init__(name, subdir, subproject, for_machine, sources, objects, environment, kwargs) + if 'rust' in self.compilers: + # If no crate type is specified, or it's the generic lib type, use dylib + if not hasattr(self, 'rust_crate_type') or self.rust_crate_type == 'lib': + mlog.debug('Defaulting Rust dynamic library target crate type to "dylib"') + self.rust_crate_type = 'dylib' + # Don't let configuration proceed with a non-dynamic crate type + elif self.rust_crate_type not in ['dylib', 'cdylib']: + raise InvalidArguments('Crate type "{0}" invalid for dynamic libraries; must be "dylib" or "cdylib"'.format(self.rust_crate_type)) + if not hasattr(self, 'prefix'): + self.prefix = None + if not hasattr(self, 'suffix'): + self.suffix = None + self.basic_filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + self.determine_filenames(environment) + + def get_link_deps_mapping(self, prefix, environment): + result = {} + mappings = self.get_transitive_link_deps_mapping(prefix, environment) + old = get_target_macos_dylib_install_name(self) + if old not in mappings: + fname = self.get_filename() + outdirs, _ = self.get_install_dir(self.environment) + new = os.path.join(prefix, outdirs[0], fname) + result.update({old: new}) + mappings.update(result) + return mappings + + def get_default_install_dir(self, environment): + return environment.get_shared_lib_dir() + + def determine_filenames(self, env): + """ + See https://github.com/mesonbuild/meson/pull/417 for details. + + First we determine the filename template (self.filename_tpl), then we + set the output filename (self.filename). + + The template is needed while creating aliases (self.get_aliases), + which are needed while generating .so shared libraries for Linux. + + Besides this, there's also the import library name, which is only used + on Windows since on that platform the linker uses a separate library + called the "import library" during linking instead of the shared + library (DLL). The toolchain will output an import library in one of + two formats: GCC or Visual Studio. + + When we're building with Visual Studio, the import library that will be + generated by the toolchain is self.vs_import_filename, and with + MinGW/GCC, it's self.gcc_import_filename. self.import_filename will + always contain the import library name this target will generate. + """ + prefix = '' + suffix = '' + create_debug_file = False + self.filename_tpl = self.basic_filename_tpl + # NOTE: manual prefix/suffix override is currently only tested for C/C++ + # C# and Mono + if 'cs' in self.compilers: + prefix = '' + suffix = 'dll' + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + create_debug_file = True + # C, C++, Swift, Vala + # Only Windows uses a separate import library for linking + # For all other targets/platforms import_filename stays None + elif env.machines[self.for_machine].is_windows(): + suffix = 'dll' + self.vs_import_filename = '{0}{1}.lib'.format(self.prefix if self.prefix is not None else '', self.name) + self.gcc_import_filename = '{0}{1}.dll.a'.format(self.prefix if self.prefix is not None else 'lib', self.name) + if self.get_using_rustc(): + # Shared library is of the form foo.dll + prefix = '' + # Import library is called foo.dll.lib + self.import_filename = '{0}.dll.lib'.format(self.name) + create_debug_file = True + elif self.get_using_msvc(): + # Shared library is of the form foo.dll + prefix = '' + # Import library is called foo.lib + self.import_filename = self.vs_import_filename + create_debug_file = True + # Assume GCC-compatible naming + else: + # Shared library is of the form libfoo.dll + prefix = 'lib' + # Import library is called libfoo.dll.a + self.import_filename = self.gcc_import_filename + # Shared library has the soversion if it is defined + if self.soversion: + self.filename_tpl = '{0.prefix}{0.name}-{0.soversion}.{0.suffix}' + else: + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + elif env.machines[self.for_machine].is_cygwin(): + suffix = 'dll' + self.gcc_import_filename = '{0}{1}.dll.a'.format(self.prefix if self.prefix is not None else 'lib', self.name) + # Shared library is of the form cygfoo.dll + # (ld --dll-search-prefix=cyg is the default) + prefix = 'cyg' + # Import library is called libfoo.dll.a + self.import_filename = self.gcc_import_filename + if self.soversion: + self.filename_tpl = '{0.prefix}{0.name}-{0.soversion}.{0.suffix}' + else: + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + elif env.machines[self.for_machine].is_darwin(): + prefix = 'lib' + suffix = 'dylib' + # On macOS, the filename can only contain the major version + if self.soversion: + # libfoo.X.dylib + self.filename_tpl = '{0.prefix}{0.name}.{0.soversion}.{0.suffix}' + else: + # libfoo.dylib + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + elif env.machines[self.for_machine].is_android(): + prefix = 'lib' + suffix = 'so' + # Android doesn't support shared_library versioning + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + else: + prefix = 'lib' + suffix = 'so' + if self.ltversion: + # libfoo.so.X[.Y[.Z]] (.Y and .Z are optional) + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}.{0.ltversion}' + elif self.soversion: + # libfoo.so.X + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}.{0.soversion}' + else: + # No versioning, libfoo.so + self.filename_tpl = '{0.prefix}{0.name}.{0.suffix}' + if self.prefix is None: + self.prefix = prefix + if self.suffix is None: + self.suffix = suffix + self.filename = self.filename_tpl.format(self) + self.outputs = [self.filename] + if create_debug_file: + self.debug_filename = os.path.splitext(self.filename)[0] + '.pdb' + + @staticmethod + def _validate_darwin_versions(darwin_versions): + try: + if isinstance(darwin_versions, int): + darwin_versions = str(darwin_versions) + if isinstance(darwin_versions, str): + darwin_versions = 2 * [darwin_versions] + if not isinstance(darwin_versions, list): + raise InvalidArguments('Shared library darwin_versions: must be a string, integer,' + 'or a list, not {!r}'.format(darwin_versions)) + if len(darwin_versions) > 2: + raise InvalidArguments('Shared library darwin_versions: list must contain 2 or fewer elements') + if len(darwin_versions) == 1: + darwin_versions = 2 * darwin_versions + for i, v in enumerate(darwin_versions[:]): + if isinstance(v, int): + v = str(v) + if not isinstance(v, str): + raise InvalidArguments('Shared library darwin_versions: list elements ' + 'must be strings or integers, not {!r}'.format(v)) + if not re.fullmatch(r'[0-9]+(\.[0-9]+){0,2}', v): + raise InvalidArguments('Shared library darwin_versions: must be X.Y.Z where ' + 'X, Y, Z are numbers, and Y and Z are optional') + parts = v.split('.') + if len(parts) in (1, 2, 3) and int(parts[0]) > 65535: + raise InvalidArguments('Shared library darwin_versions: must be X.Y.Z ' + 'where X is [0, 65535] and Y, Z are optional') + if len(parts) in (2, 3) and int(parts[1]) > 255: + raise InvalidArguments('Shared library darwin_versions: must be X.Y.Z ' + 'where Y is [0, 255] and Y, Z are optional') + if len(parts) == 3 and int(parts[2]) > 255: + raise InvalidArguments('Shared library darwin_versions: must be X.Y.Z ' + 'where Z is [0, 255] and Y, Z are optional') + darwin_versions[i] = v + except ValueError: + raise InvalidArguments('Shared library darwin_versions: value is invalid') + return darwin_versions + + def process_kwargs(self, kwargs, environment): + super().process_kwargs(kwargs, environment) + + if not self.environment.machines[self.for_machine].is_android(): + supports_versioning = True + else: + supports_versioning = False + + if supports_versioning: + # Shared library version + if 'version' in kwargs: + self.ltversion = kwargs['version'] + if not isinstance(self.ltversion, str): + raise InvalidArguments('Shared library version needs to be a string, not ' + type(self.ltversion).__name__) + if not re.fullmatch(r'[0-9]+(\.[0-9]+){0,2}', self.ltversion): + raise InvalidArguments('Invalid Shared library version "{0}". Must be of the form X.Y.Z where all three are numbers. Y and Z are optional.'.format(self.ltversion)) + # Try to extract/deduce the soversion + if 'soversion' in kwargs: + self.soversion = kwargs['soversion'] + if isinstance(self.soversion, int): + self.soversion = str(self.soversion) + if not isinstance(self.soversion, str): + raise InvalidArguments('Shared library soversion is not a string or integer.') + elif self.ltversion: + # library version is defined, get the soversion from that + # We replicate what Autotools does here and take the first + # number of the version by default. + self.soversion = self.ltversion.split('.')[0] + # macOS, iOS and tvOS dylib compatibility_version and current_version + if 'darwin_versions' in kwargs: + self.darwin_versions = self._validate_darwin_versions(kwargs['darwin_versions']) + elif self.soversion: + # If unspecified, pick the soversion + self.darwin_versions = 2 * [self.soversion] + + # Visual Studio module-definitions file + if 'vs_module_defs' in kwargs: + path = unholder(kwargs['vs_module_defs']) + if isinstance(path, str): + if os.path.isabs(path): + self.vs_module_defs = File.from_absolute_file(path) + else: + self.vs_module_defs = File.from_source_file(environment.source_dir, self.subdir, path) + self.link_depends.append(self.vs_module_defs) + elif isinstance(path, File): + # When passing a generated file. + self.vs_module_defs = path + self.link_depends.append(path) + elif hasattr(path, 'get_filename'): + # When passing output of a Custom Target + path = File.from_built_file(path.subdir, path.get_filename()) + self.vs_module_defs = path + self.link_depends.append(path) + else: + raise InvalidArguments( + 'Shared library vs_module_defs must be either a string, ' + 'a file object or a Custom Target') + if 'rust_crate_type' in kwargs: + rust_crate_type = kwargs['rust_crate_type'] + if isinstance(rust_crate_type, str): + self.rust_crate_type = rust_crate_type + else: + raise InvalidArguments('Invalid rust_crate_type "{0}": must be a string.'.format(rust_crate_type)) + + def get_import_filename(self): + """ + The name of the import library that will be outputted by the compiler + + Returns None if there is no import library required for this platform + """ + return self.import_filename + + def get_debug_filename(self): + """ + The name of debuginfo file that will be created by the compiler + + Returns None if the build won't create any debuginfo file + """ + return self.debug_filename + + def get_import_filenameslist(self): + if self.import_filename: + return [self.vs_import_filename, self.gcc_import_filename] + return [] + + def get_all_link_deps(self): + return [self] + self.get_transitive_link_deps() + + def get_aliases(self): + """ + If the versioned library name is libfoo.so.0.100.0, aliases are: + * libfoo.so.0 (soversion) -> libfoo.so.0.100.0 + * libfoo.so (unversioned; for linking) -> libfoo.so.0 + Same for dylib: + * libfoo.dylib (unversioned; for linking) -> libfoo.0.dylib + """ + aliases = {} + # Aliases are only useful with .so and .dylib libraries. Also if + # there's no self.soversion (no versioning), we don't need aliases. + if self.suffix not in ('so', 'dylib') or not self.soversion: + return {} + # With .so libraries, the minor and micro versions are also in the + # filename. If ltversion != soversion we create an soversion alias: + # libfoo.so.0 -> libfoo.so.0.100.0 + # Where libfoo.so.0.100.0 is the actual library + if self.suffix == 'so' and self.ltversion and self.ltversion != self.soversion: + alias_tpl = self.filename_tpl.replace('ltversion', 'soversion') + ltversion_filename = alias_tpl.format(self) + aliases[ltversion_filename] = self.filename + # libfoo.so.0/libfoo.0.dylib is the actual library + else: + ltversion_filename = self.filename + # Unversioned alias: + # libfoo.so -> libfoo.so.0 + # libfoo.dylib -> libfoo.0.dylib + aliases[self.basic_filename_tpl.format(self)] = ltversion_filename + return aliases + + def type_suffix(self): + return "@sha" + + def is_linkable_target(self): + return True + +# A shared library that is meant to be used with dlopen rather than linking +# into something else. +class SharedModule(SharedLibrary): + known_kwargs = known_shmod_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + if 'version' in kwargs: + raise MesonException('Shared modules must not specify the version kwarg.') + if 'soversion' in kwargs: + raise MesonException('Shared modules must not specify the soversion kwarg.') + super().__init__(name, subdir, subproject, for_machine, sources, objects, environment, kwargs) + self.typename = 'shared module' + + def get_default_install_dir(self, environment): + return environment.get_shared_module_dir() + + +class CustomTarget(Target): + known_kwargs = set([ + 'input', + 'output', + 'command', + 'capture', + 'install', + 'install_dir', + 'install_mode', + 'build_always', + 'build_always_stale', + 'depends', + 'depend_files', + 'depfile', + 'build_by_default', + 'override_options', + 'console', + ]) + + def __init__(self, name, subdir, subproject, kwargs, absolute_paths=False, backend=None): + self.typename = 'custom' + # TODO expose keyword arg to make MachineChoice.HOST configurable + super().__init__(name, subdir, subproject, False, MachineChoice.HOST) + self.dependencies = [] + self.extra_depends = [] + self.depend_files = [] # Files that this target depends on but are not on the command line. + self.depfile = None + self.process_kwargs(kwargs, backend) + self.extra_files = [] + # Whether to use absolute paths for all files on the commandline + self.absolute_paths = absolute_paths + unknowns = [] + for k in kwargs: + if k not in CustomTarget.known_kwargs: + unknowns.append(k) + if len(unknowns) > 0: + mlog.warning('Unknown keyword arguments in target {}: {}'.format(self.name, ', '.join(unknowns))) + + def get_default_install_dir(self, environment): + return None + + def __repr__(self): + repr_str = "<{0} {1}: {2}>" + return repr_str.format(self.__class__.__name__, self.get_id(), self.command) + + def get_target_dependencies(self): + deps = self.dependencies[:] + deps += self.extra_depends + for c in unholder(self.sources): + if isinstance(c, (BuildTarget, CustomTarget)): + deps.append(c) + return deps + + def get_transitive_build_target_deps(self): + ''' + Recursively fetch the build targets that this custom target depends on, + whether through `command:`, `depends:`, or `sources:` The recursion is + only performed on custom targets. + This is useful for setting PATH on Windows for finding required DLLs. + F.ex, if you have a python script that loads a C module that links to + other DLLs in your project. + ''' + bdeps = set() + deps = self.get_target_dependencies() + for d in deps: + if isinstance(d, BuildTarget): + bdeps.add(d) + elif isinstance(d, CustomTarget): + bdeps.update(d.get_transitive_build_target_deps()) + return bdeps + + def flatten_command(self, cmd): + cmd = unholder(listify(cmd)) + final_cmd = [] + for c in cmd: + if isinstance(c, str): + final_cmd.append(c) + elif isinstance(c, File): + self.depend_files.append(c) + final_cmd.append(c) + elif isinstance(c, dependencies.ExternalProgram): + if not c.found(): + raise InvalidArguments('Tried to use not-found external program in "command"') + path = c.get_path() + if os.path.isabs(path): + # Can only add a dependency on an external program which we + # know the absolute path of + self.depend_files.append(File.from_absolute_file(path)) + final_cmd += c.get_command() + elif isinstance(c, (BuildTarget, CustomTarget)): + self.dependencies.append(c) + final_cmd.append(c) + elif isinstance(c, list): + final_cmd += self.flatten_command(c) + else: + raise InvalidArguments('Argument {!r} in "command" is invalid'.format(c)) + return final_cmd + + def process_kwargs(self, kwargs, backend): + self.process_kwargs_base(kwargs) + self.sources = unholder(extract_as_list(kwargs, 'input')) + if 'output' not in kwargs: + raise InvalidArguments('Missing keyword argument "output".') + self.outputs = listify(kwargs['output']) + # This will substitute values from the input into output and return it. + inputs = get_sources_string_names(self.sources, backend) + values = get_filenames_templates_dict(inputs, []) + for i in self.outputs: + if not(isinstance(i, str)): + raise InvalidArguments('Output argument not a string.') + if i == '': + raise InvalidArguments('Output must not be empty.') + if i.strip() == '': + raise InvalidArguments('Output must not consist only of whitespace.') + if has_path_sep(i): + raise InvalidArguments('Output {!r} must not contain a path segment.'.format(i)) + if '@INPUT@' in i or '@INPUT0@' in i: + m = 'Output cannot contain @INPUT@ or @INPUT0@, did you ' \ + 'mean @PLAINNAME@ or @BASENAME@?' + raise InvalidArguments(m) + # We already check this during substitution, but the error message + # will be unclear/confusing, so check it here. + if len(inputs) != 1 and ('@PLAINNAME@' in i or '@BASENAME@' in i): + m = "Output cannot contain @PLAINNAME@ or @BASENAME@ when " \ + "there is more than one input (we can't know which to use)" + raise InvalidArguments(m) + self.outputs = substitute_values(self.outputs, values) + self.capture = kwargs.get('capture', False) + if self.capture and len(self.outputs) != 1: + raise InvalidArguments('Capturing can only output to a single file.') + self.console = kwargs.get('console', False) + if not isinstance(self.console, bool): + raise InvalidArguments('"console" kwarg only accepts booleans') + if self.capture and self.console: + raise InvalidArguments("Can't both capture output and output to console") + if 'command' not in kwargs: + raise InvalidArguments('Missing keyword argument "command".') + if 'depfile' in kwargs: + depfile = kwargs['depfile'] + if not isinstance(depfile, str): + raise InvalidArguments('Depfile must be a string.') + if os.path.basename(depfile) != depfile: + raise InvalidArguments('Depfile must be a plain filename without a subdirectory.') + self.depfile = depfile + self.command = self.flatten_command(kwargs['command']) + if self.capture: + for c in self.command: + if isinstance(c, str) and '@OUTPUT@' in c: + raise InvalidArguments('@OUTPUT@ is not allowed when capturing output.') + if 'install' in kwargs: + self.install = kwargs['install'] + if not isinstance(self.install, bool): + raise InvalidArguments('"install" must be boolean.') + if self.install: + if 'install_dir' not in kwargs: + raise InvalidArguments('"install_dir" must be specified ' + 'when installing a target') + + if isinstance(kwargs['install_dir'], list): + FeatureNew.single_use('multiple install_dir for custom_target', '0.40.0', self.subproject) + # If an item in this list is False, the output corresponding to + # the list index of that item will not be installed + self.install_dir = typeslistify(kwargs['install_dir'], (str, bool)) + self.install_mode = kwargs.get('install_mode', None) + else: + self.install = False + self.install_dir = [None] + self.install_mode = None + if 'build_always' in kwargs and 'build_always_stale' in kwargs: + raise InvalidArguments('build_always and build_always_stale are mutually exclusive. Combine build_by_default and build_always_stale.') + elif 'build_always' in kwargs: + if 'build_by_default' not in kwargs: + self.build_by_default = kwargs['build_always'] + self.build_always_stale = kwargs['build_always'] + elif 'build_always_stale' in kwargs: + self.build_always_stale = kwargs['build_always_stale'] + if not isinstance(self.build_always_stale, bool): + raise InvalidArguments('Argument build_always_stale must be a boolean.') + extra_deps, depend_files = [extract_as_list(kwargs, c, pop=False) for c in ['depends', 'depend_files']] + for ed in unholder(extra_deps): + if not isinstance(ed, (CustomTarget, BuildTarget)): + raise InvalidArguments('Can only depend on toplevel targets: custom_target or build_target (executable or a library) got: {}({})' + .format(type(ed), ed)) + self.extra_depends.append(ed) + for i in depend_files: + if isinstance(i, (File, str)): + self.depend_files.append(i) + else: + mlog.debug(i) + raise InvalidArguments('Unknown type {!r} in depend_files.'.format(type(i).__name__)) + + def get_dependencies(self): + return self.dependencies + + def should_install(self): + return self.install + + def get_custom_install_dir(self): + return self.install_dir + + def get_custom_install_mode(self): + return self.install_mode + + def get_outputs(self): + return self.outputs + + def get_filename(self): + return self.outputs[0] + + def get_sources(self): + return self.sources + + def get_generated_lists(self): + genlists = [] + for c in unholder(self.sources): + if isinstance(c, GeneratedList): + genlists.append(c) + return genlists + + def get_generated_sources(self): + return self.get_generated_lists() + + def get_dep_outname(self, infilenames): + if self.depfile is None: + raise InvalidArguments('Tried to get depfile name for custom_target that does not have depfile defined.') + if len(infilenames): + plainname = os.path.basename(infilenames[0]) + basename = os.path.splitext(plainname)[0] + return self.depfile.replace('@BASENAME@', basename).replace('@PLAINNAME@', plainname) + else: + if '@BASENAME@' in self.depfile or '@PLAINNAME@' in self.depfile: + raise InvalidArguments('Substitution in depfile for custom_target that does not have an input file.') + return self.depfile + + def is_linkable_target(self): + if len(self.outputs) != 1: + return False + suf = os.path.splitext(self.outputs[0])[-1] + if suf == '.a' or suf == '.dll' or suf == '.lib' or suf == '.so': + return True + + def get_link_deps_mapping(self, prefix, environment): + return {} + + def get_link_dep_subdirs(self): + return OrderedSet() + + def get_all_link_deps(self): + return [] + + def type_suffix(self): + return "@cus" + + def __getitem__(self, index): + return CustomTargetIndex(self, self.outputs[index]) + + def __setitem__(self, index, value): + raise NotImplementedError + + def __delitem__(self, index): + raise NotImplementedError + + def __iter__(self): + for i in self.outputs: + yield CustomTargetIndex(self, i) + +class RunTarget(Target): + def __init__(self, name, command, args, dependencies, subdir, subproject): + self.typename = 'run' + # These don't produce output artifacts + super().__init__(name, subdir, subproject, False, MachineChoice.BUILD) + self.command = command + self.args = args + self.dependencies = dependencies + + def __repr__(self): + repr_str = "<{0} {1}: {2}>" + return repr_str.format(self.__class__.__name__, self.get_id(), self.command) + + def process_kwargs(self, kwargs): + return self.process_kwargs_base(kwargs) + + def get_dependencies(self): + return self.dependencies + + def get_generated_sources(self): + return [] + + def get_sources(self): + return [] + + def should_install(self): + return False + + def get_filename(self): + return self.name + + def get_outputs(self): + if isinstance(self.name, str): + return [self.name] + elif isinstance(self.name, list): + return self.name + else: + raise RuntimeError('RunTarget: self.name is neither a list nor a string. This is a bug') + + def type_suffix(self): + return "@run" + +class AliasTarget(RunTarget): + def __init__(self, name, dependencies, subdir, subproject): + super().__init__(name, '', [], dependencies, subdir, subproject) + +class Jar(BuildTarget): + known_kwargs = known_jar_kwargs + + def __init__(self, name, subdir, subproject, for_machine: MachineChoice, sources, objects, environment, kwargs): + self.typename = 'jar' + super().__init__(name, subdir, subproject, for_machine, sources, objects, environment, kwargs) + for s in self.sources: + if not s.endswith('.java'): + raise InvalidArguments('Jar source {} is not a java file.'.format(s)) + for t in self.link_targets: + if not isinstance(t, Jar): + raise InvalidArguments('Link target {} is not a jar target.'.format(t)) + self.filename = self.name + '.jar' + self.outputs = [self.filename] + self.java_args = kwargs.get('java_args', []) + + def get_main_class(self): + return self.main_class + + def type_suffix(self): + return "@jar" + + def get_java_args(self): + return self.java_args + + def validate_install(self, environment): + # All jar targets are installable. + pass + + def is_linkable_target(self): + return True + + def get_classpath_args(self): + cp_paths = [os.path.join(l.get_subdir(), l.get_filename()) for l in self.link_targets] + cp_string = os.pathsep.join(cp_paths) + if cp_string: + return ['-cp', os.pathsep.join(cp_paths)] + return [] + +class CustomTargetIndex: + + """A special opaque object returned by indexing a CustomTarget. This object + exists in meson, but acts as a proxy in the backends, making targets depend + on the CustomTarget it's derived from, but only adding one source file to + the sources. + """ + + def __init__(self, target, output): + self.typename = 'custom' + self.target = target + self.output = output + self.for_machine = target.for_machine + + def __repr__(self): + return ''.format( + self.target, self.target.get_outputs().index(self.output)) + + def get_outputs(self): + return [self.output] + + def get_subdir(self): + return self.target.get_subdir() + + def get_filename(self): + return self.output + + def get_id(self): + return self.target.get_id() + + def get_all_link_deps(self): + return self.target.get_all_link_deps() + + def get_link_deps_mapping(self, prefix, environment): + return self.target.get_link_deps_mapping(prefix, environment) + + def get_link_dep_subdirs(self): + return self.target.get_link_dep_subdirs() + + def is_linkable_target(self): + suf = os.path.splitext(self.output)[-1] + if suf == '.a' or suf == '.dll' or suf == '.lib' or suf == '.so': + return True + +class ConfigureFile: + + def __init__(self, subdir, sourcename, targetname, configuration_data): + self.subdir = subdir + self.sourcename = sourcename + self.targetname = targetname + self.configuration_data = configuration_data + + def __repr__(self): + repr_str = "<{0}: {1} -> {2}>" + src = os.path.join(self.subdir, self.sourcename) + dst = os.path.join(self.subdir, self.targetname) + return repr_str.format(self.__class__.__name__, src, dst) + + def get_configuration_data(self): + return self.configuration_data + + def get_subdir(self): + return self.subdir + + def get_source_name(self): + return self.sourcename + + def get_target_name(self): + return self.targetname + +class ConfigurationData: + def __init__(self) -> None: + super().__init__() + self.values = {} # T.Dict[str, T.Union[str, int, bool]] + + def __repr__(self): + return repr(self.values) + + def __contains__(self, value: str) -> bool: + return value in self.values + + def get(self, name: str) -> T.Tuple[T.Union[str, int, bool], T.Optional[str]]: + return self.values[name] # (val, desc) + + def keys(self) -> T.Iterator[str]: + return self.values.keys() + +# A bit poorly named, but this represents plain data files to copy +# during install. +class Data: + def __init__(self, sources, install_dir, install_mode=None, rename=None): + self.sources = sources + self.install_dir = install_dir + self.install_mode = install_mode + self.sources = listify(self.sources) + for s in self.sources: + assert(isinstance(s, File)) + if rename is None: + self.rename = [os.path.basename(f.fname) for f in self.sources] + else: + self.rename = stringlistify(rename) + if len(self.rename) != len(self.sources): + raise MesonException('Size of rename argument is different from number of sources') + +class RunScript(dict): + def __init__(self, script, args): + super().__init__() + assert(isinstance(script, list)) + assert(isinstance(args, list)) + self['exe'] = script + self['args'] = args + +class TestSetup: + def __init__(self, exe_wrapper: T.Optional[T.List[str]], gdb: bool, + timeout_multiplier: int, env: EnvironmentVariables): + self.exe_wrapper = exe_wrapper + self.gdb = gdb + self.timeout_multiplier = timeout_multiplier + self.env = env + +def get_sources_string_names(sources, backend): + ''' + For the specified list of @sources which can be strings, Files, or targets, + get all the output basenames. + ''' + names = [] + for s in unholder(sources): + if isinstance(s, str): + names.append(s) + elif isinstance(s, (BuildTarget, CustomTarget, CustomTargetIndex, GeneratedList)): + names += s.get_outputs() + elif isinstance(s, ExtractedObjects): + names += s.get_outputs(backend) + elif isinstance(s, File): + names.append(s.fname) + else: + raise AssertionError('Unknown source type: {!r}'.format(s)) + return names + +def load(build_dir: str) -> Build: + filename = os.path.join(build_dir, 'meson-private', 'build.dat') + load_fail_msg = 'Build data file {!r} is corrupted. Try with a fresh build tree.'.format(filename) + nonexisting_fail_msg = 'No such build data file as "{!r}".'.format(filename) + try: + with open(filename, 'rb') as f: + obj = pickle.load(f) + except FileNotFoundError: + raise MesonException(nonexisting_fail_msg) + except (pickle.UnpicklingError, EOFError): + raise MesonException(load_fail_msg) + except AttributeError: + raise MesonException( + "Build data file {!r} references functions or classes that don't " + "exist. This probably means that it was generated with an old " + "version of meson. Try running from the source directory " + "meson {} --wipe".format(filename, build_dir)) + if not isinstance(obj, Build): + raise MesonException(load_fail_msg) + return obj + +def save(obj, filename): + with open(filename, 'wb') as f: + pickle.dump(obj, f) diff --git a/meson/mesonbuild/cmake/__init__.py b/meson/mesonbuild/cmake/__init__.py new file mode 100644 index 000000000..db7aefd6f --- /dev/null +++ b/meson/mesonbuild/cmake/__init__.py @@ -0,0 +1,40 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +__all__ = [ + 'CMakeClient', + 'CMakeExecutor', + 'CMakeException', + 'CMakeFileAPI', + 'CMakeInterpreter', + 'CMakeTarget', + 'CMakeTraceLine', + 'CMakeTraceParser', + 'SingleTargetOptions', + 'TargetOptions', + 'parse_generator_expressions', + 'language_map', + 'cmake_defines_to_args', +] + +from .common import CMakeException, SingleTargetOptions, TargetOptions, cmake_defines_to_args +from .client import CMakeClient +from .executor import CMakeExecutor +from .fileapi import CMakeFileAPI +from .generator import parse_generator_expressions +from .interpreter import CMakeInterpreter, language_map +from .traceparser import CMakeTarget, CMakeTraceLine, CMakeTraceParser diff --git a/meson/mesonbuild/cmake/client.py b/meson/mesonbuild/cmake/client.py new file mode 100644 index 000000000..b88a67330 --- /dev/null +++ b/meson/mesonbuild/cmake/client.py @@ -0,0 +1,369 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +from .common import CMakeException, CMakeConfiguration, CMakeBuildFile +from .executor import CMakeExecutor +from ..environment import Environment +from ..mesonlib import MachineChoice +from .. import mlog +from contextlib import contextmanager +from subprocess import Popen, PIPE, TimeoutExpired +import typing as T +import json +import os + +CMAKE_SERVER_BEGIN_STR = '[== "CMake Server" ==[' +CMAKE_SERVER_END_STR = ']== "CMake Server" ==]' + +CMAKE_MESSAGE_TYPES = { + 'error': ['cookie', 'errorMessage'], + 'hello': ['supportedProtocolVersions'], + 'message': ['cookie', 'message'], + 'progress': ['cookie'], + 'reply': ['cookie', 'inReplyTo'], + 'signal': ['cookie', 'name'], +} + +CMAKE_REPLY_TYPES = { + 'handshake': [], + 'configure': [], + 'compute': [], + 'cmakeInputs': ['buildFiles', 'cmakeRootDirectory', 'sourceDirectory'], + 'codemodel': ['configurations'] +} + +# Base CMake server message classes + +class MessageBase: + def __init__(self, msg_type: str, cookie: str): + self.type = msg_type + self.cookie = cookie + + def to_dict(self) -> dict: + return {'type': self.type, 'cookie': self.cookie} + + def log(self) -> None: + mlog.warning('CMake server message of type', mlog.bold(type(self).__name__), 'has no log function') + +class RequestBase(MessageBase): + cookie_counter = 0 + + def __init__(self, msg_type: str): + super().__init__(msg_type, self.gen_cookie()) + + @staticmethod + def gen_cookie(): + RequestBase.cookie_counter += 1 + return 'meson_{}'.format(RequestBase.cookie_counter) + +class ReplyBase(MessageBase): + def __init__(self, cookie: str, in_reply_to: str): + super().__init__('reply', cookie) + self.in_reply_to = in_reply_to + +class SignalBase(MessageBase): + def __init__(self, cookie: str, signal_name: str): + super().__init__('signal', cookie) + self.signal_name = signal_name + + def log(self) -> None: + mlog.log(mlog.bold('CMake signal:'), mlog.yellow(self.signal_name)) + +# Special Message classes + +class Error(MessageBase): + def __init__(self, cookie: str, message: str): + super().__init__('error', cookie) + self.message = message + + def log(self) -> None: + mlog.error(mlog.bold('CMake server error:'), mlog.red(self.message)) + +class Message(MessageBase): + def __init__(self, cookie: str, message: str): + super().__init__('message', cookie) + self.message = message + + def log(self) -> None: + #mlog.log(mlog.bold('CMake:'), self.message) + pass + +class Progress(MessageBase): + def __init__(self, cookie: str): + super().__init__('progress', cookie) + + def log(self) -> None: + pass + +class MessageHello(MessageBase): + def __init__(self, supported_protocol_versions: T.List[dict]): + super().__init__('hello', '') + self.supported_protocol_versions = supported_protocol_versions + + def supports(self, major: int, minor: T.Optional[int] = None) -> bool: + for i in self.supported_protocol_versions: + if major == i['major']: + if minor is None or minor == i['minor']: + return True + return False + +# Request classes + +class RequestHandShake(RequestBase): + def __init__(self, src_dir: str, build_dir: str, generator: str, vers_major: int, vers_minor: T.Optional[int] = None): + super().__init__('handshake') + self.src_dir = src_dir + self.build_dir = build_dir + self.generator = generator + self.vers_major = vers_major + self.vers_minor = vers_minor + + def to_dict(self) -> dict: + vers = {'major': self.vers_major} + if self.vers_minor is not None: + vers['minor'] = self.vers_minor + + # Old CMake versions (3.7) want '/' even on Windows + src_list = os.path.normpath(self.src_dir).split(os.sep) + bld_list = os.path.normpath(self.build_dir).split(os.sep) + + return { + **super().to_dict(), + 'sourceDirectory': '/'.join(src_list), + 'buildDirectory': '/'.join(bld_list), + 'generator': self.generator, + 'protocolVersion': vers + } + +class RequestConfigure(RequestBase): + def __init__(self, args: T.Optional[T.List[str]] = None): + super().__init__('configure') + self.args = args + + def to_dict(self) -> dict: + res = super().to_dict() + if self.args: + res['cacheArguments'] = self.args + return res + +class RequestCompute(RequestBase): + def __init__(self): + super().__init__('compute') + +class RequestCMakeInputs(RequestBase): + def __init__(self): + super().__init__('cmakeInputs') + +class RequestCodeModel(RequestBase): + def __init__(self): + super().__init__('codemodel') + +# Reply classes + +class ReplyHandShake(ReplyBase): + def __init__(self, cookie: str): + super().__init__(cookie, 'handshake') + +class ReplyConfigure(ReplyBase): + def __init__(self, cookie: str): + super().__init__(cookie, 'configure') + +class ReplyCompute(ReplyBase): + def __init__(self, cookie: str): + super().__init__(cookie, 'compute') + +class ReplyCMakeInputs(ReplyBase): + def __init__(self, cookie: str, cmake_root: str, src_dir: str, build_files: T.List[CMakeBuildFile]): + super().__init__(cookie, 'cmakeInputs') + self.cmake_root = cmake_root + self.src_dir = src_dir + self.build_files = build_files + + def log(self) -> None: + mlog.log('CMake root: ', mlog.bold(self.cmake_root)) + mlog.log('Source dir: ', mlog.bold(self.src_dir)) + mlog.log('Build files:', mlog.bold(str(len(self.build_files)))) + with mlog.nested(): + for i in self.build_files: + mlog.log(str(i)) + +class ReplyCodeModel(ReplyBase): + def __init__(self, data: dict): + super().__init__(data['cookie'], 'codemodel') + self.configs = [] + for i in data['configurations']: + self.configs += [CMakeConfiguration(i)] + + def log(self) -> None: + mlog.log('CMake code mode:') + for idx, i in enumerate(self.configs): + mlog.log('Configuration {}:'.format(idx)) + with mlog.nested(): + i.log() + +# Main client class + +class CMakeClient: + def __init__(self, env: Environment): + self.env = env + self.proc = None + self.type_map = { + 'error': lambda data: Error(data['cookie'], data['errorMessage']), + 'hello': lambda data: MessageHello(data['supportedProtocolVersions']), + 'message': lambda data: Message(data['cookie'], data['message']), + 'progress': lambda data: Progress(data['cookie']), + 'reply': self.resolve_type_reply, + 'signal': lambda data: SignalBase(data['cookie'], data['name']) + } + + self.reply_map = { + 'handshake': lambda data: ReplyHandShake(data['cookie']), + 'configure': lambda data: ReplyConfigure(data['cookie']), + 'compute': lambda data: ReplyCompute(data['cookie']), + 'cmakeInputs': self.resolve_reply_cmakeInputs, + 'codemodel': lambda data: ReplyCodeModel(data), + } + + def readMessageRaw(self) -> dict: + assert(self.proc is not None) + rawData = [] + begin = False + while self.proc.poll() is None: + line = self.proc.stdout.readline() + if not line: + break + line = line.decode('utf-8') + line = line.strip() + + if begin and line == CMAKE_SERVER_END_STR: + break # End of the message + elif begin: + rawData += [line] + elif line == CMAKE_SERVER_BEGIN_STR: + begin = True # Begin of the message + + if rawData: + return json.loads('\n'.join(rawData)) + raise CMakeException('Failed to read data from the CMake server') + + def readMessage(self) -> MessageBase: + raw_data = self.readMessageRaw() + if 'type' not in raw_data: + raise CMakeException('The "type" attribute is missing from the message') + msg_type = raw_data['type'] + func = self.type_map.get(msg_type, None) + if not func: + raise CMakeException('Recieved unknown message type "{}"'.format(msg_type)) + for i in CMAKE_MESSAGE_TYPES[msg_type]: + if i not in raw_data: + raise CMakeException('Key "{}" is missing from CMake server message type {}'.format(i, msg_type)) + return func(raw_data) + + def writeMessage(self, msg: MessageBase) -> None: + raw_data = '\n{}\n{}\n{}\n'.format(CMAKE_SERVER_BEGIN_STR, json.dumps(msg.to_dict(), indent=2), CMAKE_SERVER_END_STR) + self.proc.stdin.write(raw_data.encode('ascii')) + self.proc.stdin.flush() + + def query(self, request: RequestBase) -> MessageBase: + self.writeMessage(request) + while True: + reply = self.readMessage() + if reply.cookie == request.cookie and reply.type in ['reply', 'error']: + return reply + + reply.log() + + def query_checked(self, request: RequestBase, message: str) -> ReplyBase: + reply = self.query(request) + h = mlog.green('SUCCEEDED') if reply.type == 'reply' else mlog.red('FAILED') + mlog.log(message + ':', h) + if reply.type != 'reply': + reply.log() + raise CMakeException('CMake server query failed') + return reply + + def do_handshake(self, src_dir: str, build_dir: str, generator: str, vers_major: int, vers_minor: T.Optional[int] = None) -> None: + # CMake prints the hello message on startup + msg = self.readMessage() + if not isinstance(msg, MessageHello): + raise CMakeException('Recieved an unexpected message from the CMake server') + + request = RequestHandShake(src_dir, build_dir, generator, vers_major, vers_minor) + self.query_checked(request, 'CMake server handshake') + + def resolve_type_reply(self, data: dict) -> ReplyBase: + reply_type = data['inReplyTo'] + func = self.reply_map.get(reply_type, None) + if not func: + raise CMakeException('Recieved unknown reply type "{}"'.format(reply_type)) + for i in ['cookie'] + CMAKE_REPLY_TYPES[reply_type]: + if i not in data: + raise CMakeException('Key "{}" is missing from CMake server message type {}'.format(i, type)) + return func(data) + + def resolve_reply_cmakeInputs(self, data: dict) -> ReplyCMakeInputs: + files = [] + for i in data['buildFiles']: + for j in i['sources']: + files += [CMakeBuildFile(j, i['isCMake'], i['isTemporary'])] + return ReplyCMakeInputs(data['cookie'], data['cmakeRootDirectory'], data['sourceDirectory'], files) + + @contextmanager + def connect(self): + self.startup() + try: + yield + finally: + self.shutdown() + + def startup(self) -> None: + if self.proc is not None: + raise CMakeException('The CMake server was already started') + for_machine = MachineChoice.HOST # TODO make parameter + cmake_exe = CMakeExecutor(self.env, '>=3.7', for_machine) + if not cmake_exe.found(): + raise CMakeException('Unable to find CMake') + + mlog.debug('Starting CMake server with CMake', mlog.bold(' '.join(cmake_exe.get_command())), 'version', mlog.cyan(cmake_exe.version())) + self.proc = Popen(cmake_exe.get_command() + ['-E', 'server', '--experimental', '--debug'], stdin=PIPE, stdout=PIPE) + + def shutdown(self) -> None: + if self.proc is None: + return + + mlog.debug('Shutting down the CMake server') + + # Close the pipes to exit + self.proc.stdin.close() + self.proc.stdout.close() + + # Wait for CMake to finish + try: + self.proc.wait(timeout=2) + except TimeoutExpired: + # Terminate CMake if there is a timeout + # terminate() may throw a platform specific exception if the process has already + # terminated. This may be the case if there is a race condition (CMake exited after + # the timeout but before the terminate() call). Additionally, this behavior can + # also be triggered on cygwin if CMake crashes. + # See https://github.com/mesonbuild/meson/pull/4969#issuecomment-499413233 + try: + self.proc.terminate() + except Exception: + pass + + self.proc = None diff --git a/meson/mesonbuild/cmake/common.py b/meson/mesonbuild/cmake/common.py new file mode 100644 index 000000000..4510b5d5a --- /dev/null +++ b/meson/mesonbuild/cmake/common.py @@ -0,0 +1,260 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +from ..mesonlib import MesonException +from .. import mlog +import typing as T + +class CMakeException(MesonException): + pass + +class CMakeBuildFile: + def __init__(self, file: str, is_cmake: bool, is_temp: bool): + self.file = file + self.is_cmake = is_cmake + self.is_temp = is_temp + + def __repr__(self): + return '<{}: {}; cmake={}; temp={}>'.format(self.__class__.__name__, self.file, self.is_cmake, self.is_temp) + +def _flags_to_list(raw: str) -> T.List[str]: + # Convert a raw commandline string into a list of strings + res = [] + curr = '' + escape = False + in_string = False + for i in raw: + if escape: + # If the current char is not a quote, the '\' is probably important + if i not in ['"', "'"]: + curr += '\\' + curr += i + escape = False + elif i == '\\': + escape = True + elif i in ['"', "'"]: + in_string = not in_string + elif i in [' ', '\n']: + if in_string: + curr += i + else: + res += [curr] + curr = '' + else: + curr += i + res += [curr] + res = list(filter(lambda x: len(x) > 0, res)) + return res + +def cmake_defines_to_args(raw: T.Any, permissive: bool = False) -> T.List[str]: + res = [] # type: T.List[str] + if not isinstance(raw, list): + raw = [raw] + + for i in raw: + if not isinstance(i, dict): + raise MesonException('Invalid CMake defines. Expected a dict, but got a {}'.format(type(i).__name__)) + for key, val in i.items(): + assert isinstance(key, str) + if isinstance(val, (str, int, float)): + res += ['-D{}={}'.format(key, val)] + elif isinstance(val, bool): + val_str = 'ON' if val else 'OFF' + res += ['-D{}={}'.format(key, val_str)] + else: + raise MesonException('Type "{}" of "{}" is not supported as for a CMake define value'.format(type(val).__name__, key)) + + return res + +class CMakeFileGroup: + def __init__(self, data: dict): + self.defines = data.get('defines', '') + self.flags = _flags_to_list(data.get('compileFlags', '')) + self.includes = data.get('includePath', []) + self.is_generated = data.get('isGenerated', False) + self.language = data.get('language', 'C') + self.sources = data.get('sources', []) + + # Fix the include directories + tmp = [] + for i in self.includes: + if isinstance(i, dict) and 'path' in i: + i['isSystem'] = i.get('isSystem', False) + tmp += [i] + elif isinstance(i, str): + tmp += [{'path': i, 'isSystem': False}] + self.includes = tmp + + def log(self) -> None: + mlog.log('flags =', mlog.bold(', '.join(self.flags))) + mlog.log('defines =', mlog.bold(', '.join(self.defines))) + mlog.log('includes =', mlog.bold(', '.join(self.includes))) + mlog.log('is_generated =', mlog.bold('true' if self.is_generated else 'false')) + mlog.log('language =', mlog.bold(self.language)) + mlog.log('sources:') + for i in self.sources: + with mlog.nested(): + mlog.log(i) + +class CMakeTarget: + def __init__(self, data: dict): + self.artifacts = data.get('artifacts', []) + self.src_dir = data.get('sourceDirectory', '') + self.build_dir = data.get('buildDirectory', '') + self.name = data.get('name', '') + self.full_name = data.get('fullName', '') + self.install = data.get('hasInstallRule', False) + self.install_paths = list(set(data.get('installPaths', []))) + self.link_lang = data.get('linkerLanguage', '') + self.link_libraries = _flags_to_list(data.get('linkLibraries', '')) + self.link_flags = _flags_to_list(data.get('linkFlags', '')) + self.link_lang_flags = _flags_to_list(data.get('linkLanguageFlags', '')) + # self.link_path = data.get('linkPath', '') + self.type = data.get('type', 'EXECUTABLE') + # self.is_generator_provided = data.get('isGeneratorProvided', False) + self.files = [] + + for i in data.get('fileGroups', []): + self.files += [CMakeFileGroup(i)] + + def log(self) -> None: + mlog.log('artifacts =', mlog.bold(', '.join(self.artifacts))) + mlog.log('src_dir =', mlog.bold(self.src_dir)) + mlog.log('build_dir =', mlog.bold(self.build_dir)) + mlog.log('name =', mlog.bold(self.name)) + mlog.log('full_name =', mlog.bold(self.full_name)) + mlog.log('install =', mlog.bold('true' if self.install else 'false')) + mlog.log('install_paths =', mlog.bold(', '.join(self.install_paths))) + mlog.log('link_lang =', mlog.bold(self.link_lang)) + mlog.log('link_libraries =', mlog.bold(', '.join(self.link_libraries))) + mlog.log('link_flags =', mlog.bold(', '.join(self.link_flags))) + mlog.log('link_lang_flags =', mlog.bold(', '.join(self.link_lang_flags))) + # mlog.log('link_path =', mlog.bold(self.link_path)) + mlog.log('type =', mlog.bold(self.type)) + # mlog.log('is_generator_provided =', mlog.bold('true' if self.is_generator_provided else 'false')) + for idx, i in enumerate(self.files): + mlog.log('Files {}:'.format(idx)) + with mlog.nested(): + i.log() + +class CMakeProject: + def __init__(self, data: dict): + self.src_dir = data.get('sourceDirectory', '') + self.build_dir = data.get('buildDirectory', '') + self.name = data.get('name', '') + self.targets = [] + + for i in data.get('targets', []): + self.targets += [CMakeTarget(i)] + + def log(self) -> None: + mlog.log('src_dir =', mlog.bold(self.src_dir)) + mlog.log('build_dir =', mlog.bold(self.build_dir)) + mlog.log('name =', mlog.bold(self.name)) + for idx, i in enumerate(self.targets): + mlog.log('Target {}:'.format(idx)) + with mlog.nested(): + i.log() + +class CMakeConfiguration: + def __init__(self, data: dict): + self.name = data.get('name', '') + self.projects = [] + for i in data.get('projects', []): + self.projects += [CMakeProject(i)] + + def log(self) -> None: + mlog.log('name =', mlog.bold(self.name)) + for idx, i in enumerate(self.projects): + mlog.log('Project {}:'.format(idx)) + with mlog.nested(): + i.log() + +class SingleTargetOptions: + def __init__(self) -> None: + self.opts = {} # type: T.Dict[str, str] + self.lang_args = {} # type: T.Dict[str, T.List[str]] + self.link_args = [] # type: T.List[str] + self.install = 'preserve' + + def set_opt(self, opt: str, val: str) -> None: + self.opts[opt] = val + + def append_args(self, lang: str, args: T.List[str]) -> None: + if lang not in self.lang_args: + self.lang_args[lang] = [] + self.lang_args[lang] += args + + def append_link_args(self, args: T.List[str]) -> None: + self.link_args += args + + def set_install(self, install: bool) -> None: + self.install = 'true' if install else 'false' + + def get_override_options(self, initial: T.List[str]) -> T.List[str]: + res = [] # type: T.List[str] + for i in initial: + opt = i[:i.find('=')] + if opt not in self.opts: + res += [i] + res += ['{}={}'.format(k, v) for k, v in self.opts.items()] + return res + + def get_compile_args(self, lang: str, initial: T.List[str]) -> T.List[str]: + if lang in self.lang_args: + return initial + self.lang_args[lang] + return initial + + def get_link_args(self, initial: T.List[str]) -> T.List[str]: + return initial + self.link_args + + def get_install(self, initial: bool) -> bool: + return {'preserve': initial, 'true': True, 'false': False}[self.install] + +class TargetOptions: + def __init__(self) -> None: + self.global_options = SingleTargetOptions() + self.target_options = {} # type: T.Dict[str, SingleTargetOptions] + + def __getitem__(self, tgt: str) -> SingleTargetOptions: + if tgt not in self.target_options: + self.target_options[tgt] = SingleTargetOptions() + return self.target_options[tgt] + + def get_override_options(self, tgt: str, initial: T.List[str]) -> T.List[str]: + initial = self.global_options.get_override_options(initial) + if tgt in self.target_options: + initial = self.target_options[tgt].get_override_options(initial) + return initial + + def get_compile_args(self, tgt: str, lang: str, initial: T.List[str]) -> T.List[str]: + initial = self.global_options.get_compile_args(lang, initial) + if tgt in self.target_options: + initial = self.target_options[tgt].get_compile_args(lang, initial) + return initial + + def get_link_args(self, tgt: str, initial: T.List[str]) -> T.List[str]: + initial = self.global_options.get_link_args(initial) + if tgt in self.target_options: + initial = self.target_options[tgt].get_link_args(initial) + return initial + + def get_install(self, tgt: str, initial: bool) -> bool: + initial = self.global_options.get_install(initial) + if tgt in self.target_options: + initial = self.target_options[tgt].get_install(initial) + return initial diff --git a/meson/mesonbuild/cmake/data/preload.cmake b/meson/mesonbuild/cmake/data/preload.cmake new file mode 100644 index 000000000..f8caae923 --- /dev/null +++ b/meson/mesonbuild/cmake/data/preload.cmake @@ -0,0 +1,67 @@ +if(MESON_PS_LOADED) + return() +endif() + +set(MESON_PS_LOADED ON) + +# Dummy macros that have a special meaning in the meson code +macro(meson_ps_execute_delayed_calls) +endmacro() + +macro(meson_ps_reload_vars) +endmacro() + +# Helper macro to inspect the current CMake state +macro(meson_ps_inspect_vars) + set(MESON_PS_CMAKE_CURRENT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") + set(MESON_PS_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + meson_ps_execute_delayed_calls() +endmacro() + + +# Override some system functions with custom code and forward the args +# to the original function +macro(add_custom_command) + meson_ps_inspect_vars() + _add_custom_command(${ARGV}) +endmacro() + +macro(add_custom_target) + meson_ps_inspect_vars() + _add_custom_target(${ARGV}) +endmacro() + +macro(set_property) + meson_ps_inspect_vars() + _set_property(${ARGV}) +endmacro() + +function(set_source_files_properties) + set(FILES) + set(I 0) + set(PROPERTIES OFF) + + while(I LESS ARGC) + if(NOT PROPERTIES) + if("${ARGV${I}}" STREQUAL "PROPERTIES") + set(PROPERTIES ON) + else() + list(APPEND FILES "${ARGV${I}}") + endif() + + math(EXPR I "${I} + 1") + else() + set(ID_IDX ${I}) + math(EXPR PROP_IDX "${ID_IDX} + 1") + + set(ID "${ARGV${ID_IDX}}") + set(PROP "${ARGV${PROP_IDX}}") + + set_property(SOURCE ${FILES} PROPERTY "${ID}" "${PROP}") + math(EXPR I "${I} + 2") + endif() + endwhile() +endfunction() + +set(MESON_PS_DELAYED_CALLS add_custom_command;add_custom_target;set_property) +meson_ps_reload_vars() diff --git a/meson/mesonbuild/cmake/executor.py b/meson/mesonbuild/cmake/executor.py new file mode 100644 index 000000000..2226c0275 --- /dev/null +++ b/meson/mesonbuild/cmake/executor.py @@ -0,0 +1,426 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +import subprocess as S +from pathlib import Path +from threading import Thread +import typing as T +import re +import os +import shutil +import ctypes +import textwrap + +from .. import mlog, mesonlib +from ..mesonlib import PerMachine, Popen_safe, version_compare, MachineChoice +from ..environment import Environment +from ..envconfig import get_env_var +from ..compilers import ( + AppleClangCCompiler, AppleClangCPPCompiler, AppleClangObjCCompiler, + AppleClangObjCPPCompiler +) + +if T.TYPE_CHECKING: + from ..dependencies.base import ExternalProgram + from ..compilers import Compiler + +TYPE_result = T.Tuple[int, T.Optional[str], T.Optional[str]] + +_MESON_TO_CMAKE_MAPPING = { + 'arm': 'ARMCC', + 'armclang': 'ARMClang', + 'clang': 'Clang', + 'clang-cl': 'MSVC', + 'flang': 'Flang', + 'g95': 'G95', + 'gcc': 'GNU', + 'intel': 'Intel', + 'intel-cl': 'MSVC', + 'msvc': 'MSVC', + 'pathscale': 'PathScale', + 'pgi': 'PGI', + 'sun': 'SunPro', +} + +def meson_compiler_to_cmake_id(cobj: 'Compiler') -> str: + """Translate meson compiler's into CMAKE compiler ID's. + + Most of these can be handled by a simple table lookup, with a few + exceptions. + + Clang and Apple's Clang are both identified as "clang" by meson. To make + things more complicated gcc and vanilla clang both use Apple's ld64 on + macOS. The only way to know for sure is to do an isinstance() check. + """ + if isinstance(cobj, (AppleClangCCompiler, AppleClangCPPCompiler, + AppleClangObjCCompiler, AppleClangObjCPPCompiler)): + return 'AppleClang' + # If no mapping, try GNU and hope that the build files don't care + return _MESON_TO_CMAKE_MAPPING.get(cobj.get_id(), 'GNU') + + +class CMakeExecutor: + # The class's copy of the CMake path. Avoids having to search for it + # multiple times in the same Meson invocation. + class_cmakebin = PerMachine(None, None) + class_cmakevers = PerMachine(None, None) + class_cmake_cache = {} # type: T.Dict[T.Any, TYPE_result] + + def __init__(self, environment: Environment, version: str, for_machine: MachineChoice, silent: bool = False): + self.min_version = version + self.environment = environment + self.for_machine = for_machine + self.cmakebin, self.cmakevers = self.find_cmake_binary(self.environment, silent=silent) + self.always_capture_stderr = True + self.print_cmout = False + self.prefix_paths = [] # type: T.List[str] + self.extra_cmake_args = [] # type: T.List[str] + if self.cmakebin is False: + self.cmakebin = None + return + + if not version_compare(self.cmakevers, self.min_version): + mlog.warning( + 'The version of CMake', mlog.bold(self.cmakebin.get_path()), + 'is', mlog.bold(self.cmakevers), 'but version', mlog.bold(self.min_version), + 'is required') + self.cmakebin = None + return + + self.prefix_paths = self.environment.coredata.builtins_per_machine[self.for_machine]['cmake_prefix_path'].value + env_pref_path = get_env_var( + self.for_machine, + self.environment.is_cross_build(), + 'CMAKE_PREFIX_PATH') + if env_pref_path is not None: + if mesonlib.is_windows(): + # Cannot split on ':' on Windows because its in the drive letter + env_pref_path = env_pref_path.split(os.pathsep) + else: + # https://github.com/mesonbuild/meson/issues/7294 + env_pref_path = re.split(r':|;', env_pref_path) + env_pref_path = [x for x in env_pref_path if x] # Filter out empty strings + if not self.prefix_paths: + self.prefix_paths = [] + self.prefix_paths += env_pref_path + + if self.prefix_paths: + self.extra_cmake_args += ['-DCMAKE_PREFIX_PATH={}'.format(';'.join(self.prefix_paths))] + + def find_cmake_binary(self, environment: Environment, silent: bool = False) -> T.Tuple['ExternalProgram', str]: + from ..dependencies.base import find_external_program + + # Only search for CMake the first time and store the result in the class + # definition + if CMakeExecutor.class_cmakebin[self.for_machine] is False: + mlog.debug('CMake binary for %s is cached as not found' % self.for_machine) + elif CMakeExecutor.class_cmakebin[self.for_machine] is not None: + mlog.debug('CMake binary for %s is cached.' % self.for_machine) + else: + assert CMakeExecutor.class_cmakebin[self.for_machine] is None + + mlog.debug('CMake binary for %s is not cached' % self.for_machine) + for potential_cmakebin in find_external_program( + environment, self.for_machine, 'cmake', 'CMake', + environment.default_cmake, allow_default_for_cross=False): + version_if_ok = self.check_cmake(potential_cmakebin) + if not version_if_ok: + continue + if not silent: + mlog.log('Found CMake:', mlog.bold(potential_cmakebin.get_path()), + '(%s)' % version_if_ok) + CMakeExecutor.class_cmakebin[self.for_machine] = potential_cmakebin + CMakeExecutor.class_cmakevers[self.for_machine] = version_if_ok + break + else: + if not silent: + mlog.log('Found CMake:', mlog.red('NO')) + # Set to False instead of None to signify that we've already + # searched for it and not found it + CMakeExecutor.class_cmakebin[self.for_machine] = False + CMakeExecutor.class_cmakevers[self.for_machine] = None + + return CMakeExecutor.class_cmakebin[self.for_machine], CMakeExecutor.class_cmakevers[self.for_machine] + + def check_cmake(self, cmakebin: 'ExternalProgram') -> T.Optional[str]: + if not cmakebin.found(): + mlog.log('Did not find CMake {!r}'.format(cmakebin.name)) + return None + try: + p, out = Popen_safe(cmakebin.get_command() + ['--version'])[0:2] + if p.returncode != 0: + mlog.warning('Found CMake {!r} but couldn\'t run it' + ''.format(' '.join(cmakebin.get_command()))) + return None + except FileNotFoundError: + mlog.warning('We thought we found CMake {!r} but now it\'s not there. How odd!' + ''.format(' '.join(cmakebin.get_command()))) + return None + except PermissionError: + msg = 'Found CMake {!r} but didn\'t have permissions to run it.'.format(' '.join(cmakebin.get_command())) + if not mesonlib.is_windows(): + msg += '\n\nOn Unix-like systems this is often caused by scripts that are not executable.' + mlog.warning(msg) + return None + cmvers = re.search(r'(cmake|cmake3)\s*version\s*([\d.]+)', out).group(2) + return cmvers + + def set_exec_mode(self, print_cmout: T.Optional[bool] = None, always_capture_stderr: T.Optional[bool] = None) -> None: + if print_cmout is not None: + self.print_cmout = print_cmout + if always_capture_stderr is not None: + self.always_capture_stderr = always_capture_stderr + + def _cache_key(self, args: T.List[str], build_dir: str, env): + fenv = frozenset(env.items()) if env is not None else None + targs = tuple(args) + return (self.cmakebin, targs, build_dir, fenv) + + def _call_cmout_stderr(self, args: T.List[str], build_dir: str, env) -> TYPE_result: + cmd = self.cmakebin.get_command() + args + proc = S.Popen(cmd, stdout=S.PIPE, stderr=S.PIPE, cwd=build_dir, env=env) + + # stdout and stderr MUST be read at the same time to avoid pipe + # blocking issues. The easiest way to do this is with a separate + # thread for one of the pipes. + def print_stdout(): + while True: + line = proc.stdout.readline() + if not line: + break + mlog.log(line.decode(errors='ignore').strip('\n')) + proc.stdout.close() + + t = Thread(target=print_stdout) + t.start() + + try: + # Read stderr line by line and log non trace lines + raw_trace = '' + tline_start_reg = re.compile(r'^\s*(.*\.(cmake|txt))\(([0-9]+)\):\s*(\w+)\(.*$') + inside_multiline_trace = False + while True: + line = proc.stderr.readline() + if not line: + break + line = line.decode(errors='ignore') + if tline_start_reg.match(line): + raw_trace += line + inside_multiline_trace = not line.endswith(' )\n') + elif inside_multiline_trace: + raw_trace += line + else: + mlog.warning(line.strip('\n')) + + finally: + proc.stderr.close() + t.join() + proc.wait() + + return proc.returncode, None, raw_trace + + def _call_cmout(self, args: T.List[str], build_dir: str, env) -> TYPE_result: + cmd = self.cmakebin.get_command() + args + proc = S.Popen(cmd, stdout=S.PIPE, stderr=S.STDOUT, cwd=build_dir, env=env) + while True: + line = proc.stdout.readline() + if not line: + break + mlog.log(line.decode(errors='ignore').strip('\n')) + proc.stdout.close() + proc.wait() + return proc.returncode, None, None + + def _call_quiet(self, args: T.List[str], build_dir: str, env) -> TYPE_result: + os.makedirs(build_dir, exist_ok=True) + cmd = self.cmakebin.get_command() + args + ret = S.run(cmd, env=env, cwd=build_dir, close_fds=False, + stdout=S.PIPE, stderr=S.PIPE, universal_newlines=False) + rc = ret.returncode + out = ret.stdout.decode(errors='ignore') + err = ret.stderr.decode(errors='ignore') + call = ' '.join(cmd) + mlog.debug("Called `{}` in {} -> {}".format(call, build_dir, rc)) + return rc, out, err + + def _call_impl(self, args: T.List[str], build_dir: str, env) -> TYPE_result: + if not self.print_cmout: + return self._call_quiet(args, build_dir, env) + else: + if self.always_capture_stderr: + return self._call_cmout_stderr(args, build_dir, env) + else: + return self._call_cmout(args, build_dir, env) + + def call(self, args: T.List[str], build_dir: str, env=None, disable_cache: bool = False) -> TYPE_result: + if env is None: + env = os.environ + + args = args + self.extra_cmake_args + if disable_cache: + return self._call_impl(args, build_dir, env) + + # First check if cached, if not call the real cmake function + cache = CMakeExecutor.class_cmake_cache + key = self._cache_key(args, build_dir, env) + if key not in cache: + cache[key] = self._call_impl(args, build_dir, env) + return cache[key] + + def call_with_fake_build(self, args: T.List[str], build_dir: str, env=None) -> TYPE_result: + # First check the cache + cache = CMakeExecutor.class_cmake_cache + key = self._cache_key(args, build_dir, env) + if key in cache: + return cache[key] + + os.makedirs(build_dir, exist_ok=True) + + # Try to set the correct compiler for C and C++ + # This step is required to make try_compile work inside CMake + fallback = os.path.realpath(__file__) # A file used as a fallback wehen everything else fails + compilers = self.environment.coredata.compilers[MachineChoice.BUILD] + + def make_abs(exe: str, lang: str) -> str: + if os.path.isabs(exe): + return exe + + p = shutil.which(exe) + if p is None: + mlog.debug('Failed to find a {} compiler for CMake. This might cause CMake to fail.'.format(lang)) + p = fallback + return p + + def choose_compiler(lang: str) -> T.Tuple[str, str, str, str]: + comp_obj = None + exe_list = [] + if lang in compilers: + comp_obj = compilers[lang] + else: + try: + comp_obj = self.environment.compiler_from_language(lang, MachineChoice.BUILD) + except Exception: + pass + + if comp_obj is not None: + exe_list = comp_obj.get_exelist() + comp_id = meson_compiler_to_cmake_id(comp_obj) + comp_version = comp_obj.version.upper() + + if len(exe_list) == 1: + return make_abs(exe_list[0], lang), '', comp_id, comp_version + elif len(exe_list) == 2: + return make_abs(exe_list[1], lang), make_abs(exe_list[0], lang), comp_id, comp_version + else: + mlog.debug('Failed to find a {} compiler for CMake. This might cause CMake to fail.'.format(lang)) + return fallback, '', 'GNU', '' + + c_comp, c_launcher, c_id, c_version = choose_compiler('c') + cxx_comp, cxx_launcher, cxx_id, cxx_version = choose_compiler('cpp') + fortran_comp, fortran_launcher, _, _ = choose_compiler('fortran') + + # on Windows, choose_compiler returns path with \ as separator - replace by / before writing to CMAKE file + c_comp = c_comp.replace('\\', '/') + c_launcher = c_launcher.replace('\\', '/') + cxx_comp = cxx_comp.replace('\\', '/') + cxx_launcher = cxx_launcher.replace('\\', '/') + fortran_comp = fortran_comp.replace('\\', '/') + fortran_launcher = fortran_launcher.replace('\\', '/') + + # Reset the CMake cache + (Path(build_dir) / 'CMakeCache.txt').write_text('CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1\n') + + # Fake the compiler files + comp_dir = Path(build_dir) / 'CMakeFiles' / self.cmakevers + comp_dir.mkdir(parents=True, exist_ok=True) + + c_comp_file = comp_dir / 'CMakeCCompiler.cmake' + cxx_comp_file = comp_dir / 'CMakeCXXCompiler.cmake' + fortran_comp_file = comp_dir / 'CMakeFortranCompiler.cmake' + + if c_comp and not c_comp_file.is_file(): + is_gnu = '1' if c_id == 'GNU' else '' + c_comp_file.write_text(textwrap.dedent('''\ + # Fake CMake file to skip the boring and slow stuff + set(CMAKE_C_COMPILER "{}") # Should be a valid compiler for try_compile, etc. + set(CMAKE_C_COMPILER_LAUNCHER "{}") # The compiler launcher (if presentt) + set(CMAKE_COMPILER_IS_GNUCC {}) + set(CMAKE_C_COMPILER_ID "{}") + set(CMAKE_C_COMPILER_VERSION "{}") + set(CMAKE_C_COMPILER_LOADED 1) + set(CMAKE_C_COMPILER_FORCED 1) + set(CMAKE_C_COMPILER_WORKS TRUE) + set(CMAKE_C_ABI_COMPILED TRUE) + set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) + set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) + set(CMAKE_SIZEOF_VOID_P "{}") + '''.format(c_comp, c_launcher, is_gnu, c_id, c_version, + ctypes.sizeof(ctypes.c_voidp)))) + + if cxx_comp and not cxx_comp_file.is_file(): + is_gnu = '1' if cxx_id == 'GNU' else '' + cxx_comp_file.write_text(textwrap.dedent('''\ + # Fake CMake file to skip the boring and slow stuff + set(CMAKE_CXX_COMPILER "{}") # Should be a valid compiler for try_compile, etc. + set(CMAKE_CXX_COMPILER_LAUNCHER "{}") # The compiler launcher (if presentt) + set(CMAKE_COMPILER_IS_GNUCXX {}) + set(CMAKE_CXX_COMPILER_ID "{}") + set(CMAKE_CXX_COMPILER_VERSION "{}") + set(CMAKE_CXX_COMPILER_LOADED 1) + set(CMAKE_CXX_COMPILER_FORCED 1) + set(CMAKE_CXX_COMPILER_WORKS TRUE) + set(CMAKE_CXX_ABI_COMPILED TRUE) + set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) + set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP) + set(CMAKE_SIZEOF_VOID_P "{}") + '''.format(cxx_comp, cxx_launcher, is_gnu, cxx_id, cxx_version, + ctypes.sizeof(ctypes.c_voidp)))) + + if fortran_comp and not fortran_comp_file.is_file(): + fortran_comp_file.write_text(textwrap.dedent('''\ + # Fake CMake file to skip the boring and slow stuff + set(CMAKE_Fortran_COMPILER "{}") # Should be a valid compiler for try_compile, etc. + set(CMAKE_Fortran_COMPILER_LAUNCHER "{}") # The compiler launcher (if presentt) + set(CMAKE_Fortran_COMPILER_ID "GNU") # Pretend we have found GCC + set(CMAKE_COMPILER_IS_GNUG77 1) + set(CMAKE_Fortran_COMPILER_LOADED 1) + set(CMAKE_Fortran_COMPILER_WORKS TRUE) + set(CMAKE_Fortran_ABI_COMPILED TRUE) + set(CMAKE_Fortran_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) + set(CMAKE_Fortran_SOURCE_FILE_EXTENSIONS f;F;fpp;FPP;f77;F77;f90;F90;for;For;FOR;f95;F95) + set(CMAKE_SIZEOF_VOID_P "{}") + '''.format(fortran_comp, fortran_launcher, ctypes.sizeof(ctypes.c_voidp)))) + + return self.call(args, build_dir, env) + + def found(self) -> bool: + return self.cmakebin is not None + + def version(self) -> str: + return self.cmakevers + + def executable_path(self) -> str: + return self.cmakebin.get_path() + + def get_command(self) -> T.List[str]: + return self.cmakebin.get_command() + + def get_cmake_prefix_paths(self) -> T.List[str]: + return self.prefix_paths + + def machine_choice(self) -> MachineChoice: + return self.for_machine diff --git a/meson/mesonbuild/cmake/fileapi.py b/meson/mesonbuild/cmake/fileapi.py new file mode 100644 index 000000000..f219f16ca --- /dev/null +++ b/meson/mesonbuild/cmake/fileapi.py @@ -0,0 +1,318 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .common import CMakeException, CMakeBuildFile, CMakeConfiguration +import typing as T +from .. import mlog +import os +import json +import re + +STRIP_KEYS = ['cmake', 'reply', 'backtrace', 'backtraceGraph', 'version'] + +class CMakeFileAPI: + def __init__(self, build_dir: str): + self.build_dir = build_dir + self.api_base_dir = os.path.join(self.build_dir, '.cmake', 'api', 'v1') + self.request_dir = os.path.join(self.api_base_dir, 'query', 'client-meson') + self.reply_dir = os.path.join(self.api_base_dir, 'reply') + self.cmake_sources = [] + self.cmake_configurations = [] + self.kind_resolver_map = { + 'codemodel': self._parse_codemodel, + 'cmakeFiles': self._parse_cmakeFiles, + } + + def get_cmake_sources(self) -> T.List[CMakeBuildFile]: + return self.cmake_sources + + def get_cmake_configurations(self) -> T.List[CMakeConfiguration]: + return self.cmake_configurations + + def setup_request(self) -> None: + os.makedirs(self.request_dir, exist_ok=True) + + query = { + 'requests': [ + {'kind': 'codemodel', 'version': {'major': 2, 'minor': 0}}, + {'kind': 'cmakeFiles', 'version': {'major': 1, 'minor': 0}}, + ] + } + + with open(os.path.join(self.request_dir, 'query.json'), 'w') as fp: + json.dump(query, fp, indent=2) + + def load_reply(self) -> None: + if not os.path.isdir(self.reply_dir): + raise CMakeException('No response from the CMake file API') + + files = os.listdir(self.reply_dir) + root = None + reg_index = re.compile(r'^index-.*\.json$') + for i in files: + if reg_index.match(i): + root = i + break + + if not root: + raise CMakeException('Failed to find the CMake file API index') + + index = self._reply_file_content(root) # Load the root index + index = self._strip_data(index) # Avoid loading duplicate files + index = self._resolve_references(index) # Load everything + index = self._strip_data(index) # Strip unused data (again for loaded files) + + # Debug output + debug_json = os.path.normpath(os.path.join(self.build_dir, '..', 'fileAPI.json')) + with open(debug_json, 'w') as fp: + json.dump(index, fp, indent=2) + mlog.cmd_ci_include(debug_json) + + # parse the JSON + for i in index['objects']: + assert(isinstance(i, dict)) + assert('kind' in i) + assert(i['kind'] in self.kind_resolver_map) + + self.kind_resolver_map[i['kind']](i) + + def _parse_codemodel(self, data: dict) -> None: + assert('configurations' in data) + assert('paths' in data) + + source_dir = data['paths']['source'] + build_dir = data['paths']['build'] + + # The file API output differs quite a bit from the server + # output. It is more flat than the server output and makes + # heavy use of references. Here these references are + # resolved and the resulting data structure is identical + # to the CMake serve output. + + def helper_parse_dir(dir_entry: dict) -> T.Tuple[str, str]: + src_dir = dir_entry.get('source', '.') + bld_dir = dir_entry.get('build', '.') + src_dir = src_dir if os.path.isabs(src_dir) else os.path.join(source_dir, src_dir) + bld_dir = bld_dir if os.path.isabs(bld_dir) else os.path.join(source_dir, bld_dir) + src_dir = os.path.normpath(src_dir) + bld_dir = os.path.normpath(bld_dir) + + return src_dir, bld_dir + + def parse_sources(comp_group: dict, tgt: dict) -> T.Tuple[T.List[str], T.List[str], T.List[int]]: + gen = [] + src = [] + idx = [] + + src_list_raw = tgt.get('sources', []) + for i in comp_group.get('sourceIndexes', []): + if i >= len(src_list_raw) or 'path' not in src_list_raw[i]: + continue + if src_list_raw[i].get('isGenerated', False): + gen += [src_list_raw[i]['path']] + else: + src += [src_list_raw[i]['path']] + idx += [i] + + return src, gen, idx + + def parse_target(tgt: dict) -> dict: + src_dir, bld_dir = helper_parse_dir(cnf.get('paths', {})) + + # Parse install paths (if present) + install_paths = [] + if 'install' in tgt: + prefix = tgt['install']['prefix']['path'] + install_paths = [os.path.join(prefix, x['path']) for x in tgt['install']['destinations']] + install_paths = list(set(install_paths)) + + # On the first look, it looks really nice that the CMake devs have + # decided to use arrays for the linker flags. However, this feeling + # soon turns into despair when you realize that there only one entry + # per type in most cases, and we still have to do manual string splitting. + link_flags = [] + link_libs = [] + for i in tgt.get('link', {}).get('commandFragments', []): + if i['role'] == 'flags': + link_flags += [i['fragment']] + elif i['role'] == 'libraries': + link_libs += [i['fragment']] + elif i['role'] == 'libraryPath': + link_flags += ['-L{}'.format(i['fragment'])] + elif i['role'] == 'frameworkPath': + link_flags += ['-F{}'.format(i['fragment'])] + for i in tgt.get('archive', {}).get('commandFragments', []): + if i['role'] == 'flags': + link_flags += [i['fragment']] + + # TODO The `dependencies` entry is new in the file API. + # maybe we can make use of that in addition to the + # implicit dependency detection + tgt_data = { + 'artifacts': [x.get('path', '') for x in tgt.get('artifacts', [])], + 'sourceDirectory': src_dir, + 'buildDirectory': bld_dir, + 'name': tgt.get('name', ''), + 'fullName': tgt.get('nameOnDisk', ''), + 'hasInstallRule': 'install' in tgt, + 'installPaths': install_paths, + 'linkerLanguage': tgt.get('link', {}).get('language', 'CXX'), + 'linkLibraries': ' '.join(link_libs), # See previous comment block why we join the array + 'linkFlags': ' '.join(link_flags), # See previous comment block why we join the array + 'type': tgt.get('type', 'EXECUTABLE'), + 'fileGroups': [], + } + + processed_src_idx = [] + for cg in tgt.get('compileGroups', []): + # Again, why an array, when there is usually only one element + # and arguments are separated with spaces... + flags = [] + for i in cg.get('compileCommandFragments', []): + flags += [i['fragment']] + + cg_data = { + 'defines': [x.get('define', '') for x in cg.get('defines', [])], + 'compileFlags': ' '.join(flags), + 'language': cg.get('language', 'C'), + 'isGenerated': None, # Set later, flag is stored per source file + 'sources': [], + 'includePath': cg.get('includes', []), + } + + normal_src, generated_src, src_idx = parse_sources(cg, tgt) + if normal_src: + cg_data = dict(cg_data) + cg_data['isGenerated'] = False + cg_data['sources'] = normal_src + tgt_data['fileGroups'] += [cg_data] + if generated_src: + cg_data = dict(cg_data) + cg_data['isGenerated'] = True + cg_data['sources'] = generated_src + tgt_data['fileGroups'] += [cg_data] + processed_src_idx += src_idx + + # Object libraries have no compile groups, only source groups. + # So we add all the source files to a dummy source group that were + # not found in the previous loop + normal_src = [] + generated_src = [] + for idx, src in enumerate(tgt.get('sources', [])): + if idx in processed_src_idx: + continue + + if src.get('isGenerated', False): + generated_src += [src['path']] + else: + normal_src += [src['path']] + + if normal_src: + tgt_data['fileGroups'] += [{ + 'isGenerated': False, + 'sources': normal_src, + }] + if generated_src: + tgt_data['fileGroups'] += [{ + 'isGenerated': True, + 'sources': generated_src, + }] + return tgt_data + + def parse_project(pro: dict) -> dict: + # Only look at the first directory specified in directoryIndexes + # TODO Figure out what the other indexes are there for + p_src_dir = source_dir + p_bld_dir = build_dir + try: + p_src_dir, p_bld_dir = helper_parse_dir(cnf['directories'][pro['directoryIndexes'][0]]) + except (IndexError, KeyError): + pass + + pro_data = { + 'name': pro.get('name', ''), + 'sourceDirectory': p_src_dir, + 'buildDirectory': p_bld_dir, + 'targets': [], + } + + for ref in pro.get('targetIndexes', []): + tgt = {} + try: + tgt = cnf['targets'][ref] + except (IndexError, KeyError): + pass + pro_data['targets'] += [parse_target(tgt)] + + return pro_data + + for cnf in data.get('configurations', []): + cnf_data = { + 'name': cnf.get('name', ''), + 'projects': [], + } + + for pro in cnf.get('projects', []): + cnf_data['projects'] += [parse_project(pro)] + + self.cmake_configurations += [CMakeConfiguration(cnf_data)] + + def _parse_cmakeFiles(self, data: dict) -> None: + assert('inputs' in data) + assert('paths' in data) + + src_dir = data['paths']['source'] + + for i in data['inputs']: + path = i['path'] + path = path if os.path.isabs(path) else os.path.join(src_dir, path) + self.cmake_sources += [CMakeBuildFile(path, i.get('isCMake', False), i.get('isGenerated', False))] + + def _strip_data(self, data: T.Any) -> T.Any: + if isinstance(data, list): + for idx, i in enumerate(data): + data[idx] = self._strip_data(i) + + elif isinstance(data, dict): + new = {} + for key, val in data.items(): + if key not in STRIP_KEYS: + new[key] = self._strip_data(val) + data = new + + return data + + def _resolve_references(self, data: T.Any) -> T.Any: + if isinstance(data, list): + for idx, i in enumerate(data): + data[idx] = self._resolve_references(i) + + elif isinstance(data, dict): + # Check for the "magic" reference entry and insert + # it into the root data dict + if 'jsonFile' in data: + data.update(self._reply_file_content(data['jsonFile'])) + + for key, val in data.items(): + data[key] = self._resolve_references(val) + + return data + + def _reply_file_content(self, filename: str) -> dict: + real_path = os.path.join(self.reply_dir, filename) + if not os.path.exists(real_path): + raise CMakeException('File "{}" does not exist'.format(real_path)) + + with open(real_path, 'r') as fp: + return json.load(fp) diff --git a/meson/mesonbuild/cmake/generator.py b/meson/mesonbuild/cmake/generator.py new file mode 100644 index 000000000..a30d2de7b --- /dev/null +++ b/meson/mesonbuild/cmake/generator.py @@ -0,0 +1,129 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import mesonlib + +def parse_generator_expressions(raw: str) -> str: + '''Parse CMake generator expressions + + Most generator expressions are simply ignored for + simplicety, however some are required for some common + use cases. + ''' + + out = '' # type: str + i = 0 # type: int + + def equal(arg: str) -> str: + col_pos = arg.find(',') + if col_pos < 0: + return '0' + else: + return '1' if arg[:col_pos] == arg[col_pos + 1:] else '0' + + def vers_comp(op: str, arg: str) -> str: + col_pos = arg.find(',') + if col_pos < 0: + return '0' + else: + return '1' if mesonlib.version_compare(arg[:col_pos], '{}{}'.format(op, arg[col_pos + 1:])) else '0' + + supported = { + # Boolean functions + 'BOOL': lambda x: '0' if x.upper() in ['0', 'FALSE', 'OFF', 'N', 'NO', 'IGNORE', 'NOTFOUND'] or x.endswith('-NOTFOUND') else '1', + 'AND': lambda x: '1' if all([y == '1' for y in x.split(',')]) else '0', + 'OR': lambda x: '1' if any([y == '1' for y in x.split(',')]) else '0', + 'NOT': lambda x: '0' if x == '1' else '1', + + '0': lambda x: '', + '1': lambda x: x, + + # String operations + 'STREQUAL': equal, + 'EQUAL': equal, + 'VERSION_LESS': lambda x: vers_comp('<', x), + 'VERSION_GREATER': lambda x: vers_comp('>', x), + 'VERSION_EQUAL': lambda x: vers_comp('=', x), + 'VERSION_LESS_EQUAL': lambda x: vers_comp('<=', x), + 'VERSION_GREATER_EQUAL': lambda x: vers_comp('>=', x), + + # String modification + 'LOWER_CASE': lambda x: x.lower(), + 'UPPER_CASE': lambda x: x.upper(), + + # Always assume the BUILD_INTERFACE is valid. + # INSTALL_INTERFACE is always invalid for subprojects and + # it should also never appear in CMake config files, used + # for dependencies + 'INSTALL_INTERFACE': lambda x: '', + 'BUILD_INTERFACE': lambda x: x, + + # Constants + 'ANGLE-R': lambda x: '>', + 'COMMA': lambda x: ',', + 'SEMICOLON': lambda x: ';', + } + + # Recursively evaluate generator expressions + def eval_generator_expressions() -> str: + nonlocal i + i += 2 + + func = '' # type: str + args = '' # type: str + res = '' # type: str + exp = '' # type: str + + # Determine the body of the expression + while i < len(raw): + if raw[i] == '>': + # End of the generator expression + break + elif i < len(raw) - 1 and raw[i] == '$' and raw[i + 1] == '<': + # Nested generator expression + exp += eval_generator_expressions() + else: + # Generator expression body + exp += raw[i] + + i += 1 + + # Split the expression into a function and arguments part + col_pos = exp.find(':') + if col_pos < 0: + func = exp + else: + func = exp[:col_pos] + args = exp[col_pos + 1:] + + func = func.strip() + args = args.strip() + + # Evaluate the function + if func in supported: + res = supported[func](args) + + return res + + while i < len(raw): + if i < len(raw) - 1 and raw[i] == '$' and raw[i + 1] == '<': + # Generator expression detected --> try resolving it + out += eval_generator_expressions() + else: + # Normal string, leave unchanged + out += raw[i] + + i += 1 + + return out diff --git a/meson/mesonbuild/cmake/interpreter.py b/meson/mesonbuild/cmake/interpreter.py new file mode 100644 index 000000000..2fdb3289e --- /dev/null +++ b/meson/mesonbuild/cmake/interpreter.py @@ -0,0 +1,1328 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +from .common import CMakeException, CMakeTarget, TargetOptions +from .client import CMakeClient, RequestCMakeInputs, RequestConfigure, RequestCompute, RequestCodeModel +from .fileapi import CMakeFileAPI +from .executor import CMakeExecutor +from .traceparser import CMakeTraceParser, CMakeGeneratorTarget +from .. import mlog, mesonlib +from ..environment import Environment +from ..mesonlib import MachineChoice, OrderedSet, version_compare +from ..mesondata import mesondata +from ..compilers.compilers import lang_suffixes, header_suffixes, obj_suffixes, lib_suffixes, is_header +from enum import Enum +from functools import lru_cache +from pathlib import Path +import typing as T +import os, re + +from ..mparser import ( + Token, + BaseNode, + CodeBlockNode, + FunctionNode, + ArrayNode, + ArgumentNode, + AssignmentNode, + BooleanNode, + StringNode, + IdNode, + IndexNode, + MethodNode, + NumberNode, +) + + +if T.TYPE_CHECKING: + from ..build import Build + from ..backend.backends import Backend + +# Disable all warnings automaticall enabled with --trace and friends +# See https://cmake.org/cmake/help/latest/variable/CMAKE_POLICY_WARNING_CMPNNNN.html +disable_policy_warnings = [ + 'CMP0025', + 'CMP0047', + 'CMP0056', + 'CMP0060', + 'CMP0065', + 'CMP0066', + 'CMP0067', + 'CMP0082', + 'CMP0089', +] + +backend_generator_map = { + 'ninja': 'Ninja', + 'xcode': 'Xcode', + 'vs2010': 'Visual Studio 10 2010', + 'vs2015': 'Visual Studio 15 2017', + 'vs2017': 'Visual Studio 15 2017', + 'vs2019': 'Visual Studio 16 2019', +} + +language_map = { + 'c': 'C', + 'cpp': 'CXX', + 'cuda': 'CUDA', + 'objc': 'OBJC', + 'objcpp': 'OBJCXX', + 'cs': 'CSharp', + 'java': 'Java', + 'fortran': 'Fortran', + 'swift': 'Swift', +} + +target_type_map = { + 'STATIC_LIBRARY': 'static_library', + 'MODULE_LIBRARY': 'shared_module', + 'SHARED_LIBRARY': 'shared_library', + 'EXECUTABLE': 'executable', + 'OBJECT_LIBRARY': 'static_library', + 'INTERFACE_LIBRARY': 'header_only' +} + +skip_targets = ['UTILITY'] + +blacklist_compiler_flags = [ + '-Wall', '-Wextra', '-Weverything', '-Werror', '-Wpedantic', '-pedantic', '-w', + '/W1', '/W2', '/W3', '/W4', '/Wall', '/WX', '/w', + '/O1', '/O2', '/Ob', '/Od', '/Og', '/Oi', '/Os', '/Ot', '/Ox', '/Oy', '/Ob0', + '/RTC1', '/RTCc', '/RTCs', '/RTCu', + '/Z7', '/Zi', '/ZI', +] + +blacklist_link_flags = [ + '/machine:x64', '/machine:x86', '/machine:arm', '/machine:ebc', + '/debug', '/debug:fastlink', '/debug:full', '/debug:none', + '/incremental', +] + +blacklist_clang_cl_link_flags = ['/GR', '/EHsc', '/MDd', '/Zi', '/RTC1'] + +blacklist_link_libs = [ + 'kernel32.lib', + 'user32.lib', + 'gdi32.lib', + 'winspool.lib', + 'shell32.lib', + 'ole32.lib', + 'oleaut32.lib', + 'uuid.lib', + 'comdlg32.lib', + 'advapi32.lib' +] + +transfer_dependencies_from = ['header_only'] + +_cmake_name_regex = re.compile(r'[^_a-zA-Z0-9]') +def _sanitize_cmake_name(name: str) -> str: + name = _cmake_name_regex.sub('_', name) + return 'cm_' + name + +class OutputTargetMap: + rm_so_version = re.compile(r'(\.[0-9]+)+$') + + def __init__(self, build_dir: str): + self.tgt_map = {} + self.build_dir = build_dir + + def add(self, tgt: T.Union['ConverterTarget', 'ConverterCustomTarget']) -> None: + def assign_keys(keys: T.List[str]) -> None: + for i in [x for x in keys if x]: + self.tgt_map[i] = tgt + keys = [self._target_key(tgt.cmake_name)] + if isinstance(tgt, ConverterTarget): + keys += [tgt.full_name] + keys += [self._rel_artifact_key(x) for x in tgt.artifacts] + keys += [self._base_artifact_key(x) for x in tgt.artifacts] + if isinstance(tgt, ConverterCustomTarget): + keys += [self._rel_generated_file_key(x) for x in tgt.original_outputs] + keys += [self._base_generated_file_key(x) for x in tgt.original_outputs] + assign_keys(keys) + + def _return_first_valid_key(self, keys: T.List[str]) -> T.Optional[T.Union['ConverterTarget', 'ConverterCustomTarget']]: + for i in keys: + if i and i in self.tgt_map: + return self.tgt_map[i] + return None + + def target(self, name: str) -> T.Optional[T.Union['ConverterTarget', 'ConverterCustomTarget']]: + return self._return_first_valid_key([self._target_key(name)]) + + def executable(self, name: str) -> T.Optional['ConverterTarget']: + tgt = self.target(name) + if tgt is None or not isinstance(tgt, ConverterTarget): + return None + if tgt.meson_func() != 'executable': + return None + return tgt + + def artifact(self, name: str) -> T.Optional[T.Union['ConverterTarget', 'ConverterCustomTarget']]: + keys = [] + candidates = [name, OutputTargetMap.rm_so_version.sub('', name)] + for i in lib_suffixes: + if not name.endswith('.' + i): + continue + new_name = name[:-len(i) - 1] + new_name = OutputTargetMap.rm_so_version.sub('', new_name) + candidates += ['{}.{}'.format(new_name, i)] + for i in candidates: + keys += [self._rel_artifact_key(i), os.path.basename(i), self._base_artifact_key(i)] + return self._return_first_valid_key(keys) + + def generated(self, name: str) -> T.Optional[T.Union['ConverterTarget', 'ConverterCustomTarget']]: + return self._return_first_valid_key([self._rel_generated_file_key(name), self._base_generated_file_key(name)]) + + # Utility functions to generate local keys + def _rel_path(self, fname: str) -> T.Optional[str]: + fname = os.path.normpath(os.path.join(self.build_dir, fname)) + if os.path.commonpath([self.build_dir, fname]) != self.build_dir: + return None + return os.path.relpath(fname, self.build_dir) + + def _target_key(self, tgt_name: str) -> str: + return '__tgt_{}__'.format(tgt_name) + + def _rel_generated_file_key(self, fname: str) -> T.Optional[str]: + path = self._rel_path(fname) + return '__relgen_{}__'.format(path) if path else None + + def _base_generated_file_key(self, fname: str) -> str: + return '__gen_{}__'.format(os.path.basename(fname)) + + def _rel_artifact_key(self, fname: str) -> T.Optional[str]: + path = self._rel_path(fname) + return '__relart_{}__'.format(path) if path else None + + def _base_artifact_key(self, fname: str) -> str: + return '__art_{}__'.format(os.path.basename(fname)) + +class ConverterTarget: + def __init__(self, target: CMakeTarget, env: Environment): + self.env = env + self.artifacts = target.artifacts + self.src_dir = target.src_dir + self.build_dir = target.build_dir + self.name = target.name + self.cmake_name = target.name + self.full_name = target.full_name + self.type = target.type + self.install = target.install + self.install_dir = '' + self.link_libraries = target.link_libraries + self.link_flags = target.link_flags + target.link_lang_flags + self.depends_raw = [] + self.depends = [] + + if target.install_paths: + self.install_dir = target.install_paths[0] + + self.languages = [] + self.sources = [] + self.generated = [] + self.includes = [] + self.sys_includes = [] + self.link_with = [] + self.object_libs = [] + self.compile_opts = {} + self.public_compile_opts = [] + self.pie = False + + # Project default override options (c_std, cpp_std, etc.) + self.override_options = [] + + # Convert the target name to a valid meson target name + self.name = _sanitize_cmake_name(self.name) + + for i in target.files: + # Determine the meson language + lang_cmake_to_meson = {val.lower(): key for key, val in language_map.items()} + lang = lang_cmake_to_meson.get(i.language.lower(), 'c') + if lang not in self.languages: + self.languages += [lang] + if lang not in self.compile_opts: + self.compile_opts[lang] = [] + + # Add arguments, but avoid duplicates + args = i.flags + args += ['-D{}'.format(x) for x in i.defines] + self.compile_opts[lang] += [x for x in args if x not in self.compile_opts[lang]] + + # Handle include directories + self.includes += [x['path'] for x in i.includes if x not in self.includes and not x['isSystem']] + self.sys_includes += [x['path'] for x in i.includes if x not in self.sys_includes and x['isSystem']] + + # Add sources to the right array + if i.is_generated: + self.generated += i.sources + else: + self.sources += i.sources + + def __repr__(self) -> str: + return '<{}: {}>'.format(self.__class__.__name__, self.name) + + std_regex = re.compile(r'([-]{1,2}std=|/std:v?|[-]{1,2}std:)(.*)') + + def postprocess(self, output_target_map: OutputTargetMap, root_src_dir: str, subdir: str, install_prefix: str, trace: CMakeTraceParser) -> None: + # Detect setting the C and C++ standard and do additional compiler args manipulation + for i in ['c', 'cpp']: + if i not in self.compile_opts: + continue + + temp = [] + for j in self.compile_opts[i]: + m = ConverterTarget.std_regex.match(j) + ctgt = output_target_map.generated(j) + if m: + std = m.group(2) + supported = self._all_lang_stds(i) + if std not in supported: + mlog.warning( + 'Unknown {0}_std "{1}" -> Ignoring. Try setting the project-' + 'level {0}_std if build errors occur. Known ' + '{0}_stds are: {2}'.format(i, std, ' '.join(supported)), + once=True + ) + continue + self.override_options += ['{}_std={}'.format(i, std)] + elif j in ['-fPIC', '-fpic', '-fPIE', '-fpie']: + self.pie = True + elif isinstance(ctgt, ConverterCustomTarget): + # Sometimes projects pass generated source files as compiler + # flags. Add these as generated sources to ensure that the + # corresponding custom target is run.2 + self.generated += [j] + temp += [j] + elif j in blacklist_compiler_flags: + pass + else: + temp += [j] + + self.compile_opts[i] = temp + + # Make sure to force enable -fPIC for OBJECT libraries + if self.type.upper() == 'OBJECT_LIBRARY': + self.pie = True + + # Use the CMake trace, if required + tgt = trace.targets.get(self.cmake_name) + if tgt: + self.depends_raw = trace.targets[self.cmake_name].depends + + # TODO refactor this copy paste from CMakeDependency for future releases + reg_is_lib = re.compile(r'^(-l[a-zA-Z0-9_]+|-l?pthread)$') + to_process = [self.cmake_name] + processed = [] + while len(to_process) > 0: + curr = to_process.pop(0) + + if curr in processed or curr not in trace.targets: + continue + + tgt = trace.targets[curr] + cfgs = [] + cfg = '' + otherDeps = [] + libraries = [] + mlog.debug(tgt) + + if 'INTERFACE_INCLUDE_DIRECTORIES' in tgt.properties: + self.includes += [x for x in tgt.properties['INTERFACE_INCLUDE_DIRECTORIES'] if x] + + if 'INTERFACE_LINK_OPTIONS' in tgt.properties: + self.link_flags += [x for x in tgt.properties['INTERFACE_LINK_OPTIONS'] if x] + + if 'INTERFACE_COMPILE_DEFINITIONS' in tgt.properties: + self.public_compile_opts += ['-D' + re.sub('^-D', '', x) for x in tgt.properties['INTERFACE_COMPILE_DEFINITIONS'] if x] + + if 'INTERFACE_COMPILE_OPTIONS' in tgt.properties: + self.public_compile_opts += [x for x in tgt.properties['INTERFACE_COMPILE_OPTIONS'] if x] + + if 'IMPORTED_CONFIGURATIONS' in tgt.properties: + cfgs += [x for x in tgt.properties['IMPORTED_CONFIGURATIONS'] if x] + cfg = cfgs[0] + + if 'CONFIGURATIONS' in tgt.properties: + cfgs += [x for x in tgt.properties['CONFIGURATIONS'] if x] + cfg = cfgs[0] + + is_debug = self.env.coredata.get_builtin_option('debug'); + if is_debug: + if 'DEBUG' in cfgs: + cfg = 'DEBUG' + elif 'RELEASE' in cfgs: + cfg = 'RELEASE' + else: + if 'RELEASE' in cfgs: + cfg = 'RELEASE' + + if 'IMPORTED_IMPLIB_{}'.format(cfg) in tgt.properties: + libraries += [x for x in tgt.properties['IMPORTED_IMPLIB_{}'.format(cfg)] if x] + elif 'IMPORTED_IMPLIB' in tgt.properties: + libraries += [x for x in tgt.properties['IMPORTED_IMPLIB'] if x] + elif 'IMPORTED_LOCATION_{}'.format(cfg) in tgt.properties: + libraries += [x for x in tgt.properties['IMPORTED_LOCATION_{}'.format(cfg)] if x] + elif 'IMPORTED_LOCATION' in tgt.properties: + libraries += [x for x in tgt.properties['IMPORTED_LOCATION'] if x] + + if 'LINK_LIBRARIES' in tgt.properties: + otherDeps += [x for x in tgt.properties['LINK_LIBRARIES'] if x] + + if 'INTERFACE_LINK_LIBRARIES' in tgt.properties: + otherDeps += [x for x in tgt.properties['INTERFACE_LINK_LIBRARIES'] if x] + + if 'IMPORTED_LINK_DEPENDENT_LIBRARIES_{}'.format(cfg) in tgt.properties: + otherDeps += [x for x in tgt.properties['IMPORTED_LINK_DEPENDENT_LIBRARIES_{}'.format(cfg)] if x] + elif 'IMPORTED_LINK_DEPENDENT_LIBRARIES' in tgt.properties: + otherDeps += [x for x in tgt.properties['IMPORTED_LINK_DEPENDENT_LIBRARIES'] if x] + + for j in otherDeps: + if j in trace.targets: + to_process += [j] + elif reg_is_lib.match(j) or os.path.exists(j): + libraries += [j] + + for j in libraries: + if j not in self.link_libraries: + self.link_libraries += [j] + + processed += [curr] + elif self.type.upper() not in ['EXECUTABLE', 'OBJECT_LIBRARY']: + mlog.warning('CMake: Target', mlog.bold(self.cmake_name), 'not found in CMake trace. This can lead to build errors') + + temp = [] + for i in self.link_libraries: + # Let meson handle this arcane magic + if ',-rpath,' in i: + continue + if not os.path.isabs(i): + link_with = output_target_map.artifact(i) + if link_with: + self.link_with += [link_with] + continue + + temp += [i] + self.link_libraries = temp + + # Filter out files that are not supported by the language + supported = list(header_suffixes) + list(obj_suffixes) + for i in self.languages: + supported += list(lang_suffixes[i]) + supported = ['.{}'.format(x) for x in supported] + self.sources = [x for x in self.sources if any([x.endswith(y) for y in supported])] + self.generated = [x for x in self.generated if any([x.endswith(y) for y in supported])] + + # Make paths relative + def rel_path(x: str, is_header: bool, is_generated: bool) -> T.Optional[str]: + if not os.path.isabs(x): + x = os.path.join(self.src_dir, x) + x = os.path.normpath(x) + if not os.path.exists(x) and not any([x.endswith(y) for y in obj_suffixes]) and not is_generated: + if ( + any([os.path.commonpath([x, os.path.normpath(os.path.join(root_src_dir, y))]) == x for y in self.generated]) + and os.path.commonpath([x, self.env.get_build_dir()]) == self.env.get_build_dir() + ): + os.makedirs(x) + return os.path.relpath(x, os.path.join(self.env.get_build_dir(), subdir)) + else: + mlog.warning('CMake: path', mlog.bold(x), 'does not exist.') + mlog.warning(' --> Ignoring. This can lead to build errors.') + return None + if Path(x) in trace.explicit_headers: + return None + if ( + os.path.isabs(x) + and os.path.commonpath([x, self.env.get_source_dir()]) == self.env.get_source_dir() + and not ( + os.path.commonpath([x, root_src_dir]) == root_src_dir or + os.path.commonpath([x, self.env.get_build_dir()]) == self.env.get_build_dir() + ) + ): + mlog.warning('CMake: path', mlog.bold(x), 'is inside the root project but', mlog.bold('not'), 'inside the subproject.') + mlog.warning(' --> Ignoring. This can lead to build errors.') + return None + if os.path.isabs(x) and os.path.commonpath([x, self.env.get_build_dir()]) == self.env.get_build_dir(): + if is_header: + return os.path.relpath(x, os.path.join(self.env.get_build_dir(), subdir)) + else: + return os.path.relpath(x, root_src_dir) + if os.path.isabs(x) and os.path.commonpath([x, root_src_dir]) == root_src_dir: + return os.path.relpath(x, root_src_dir) + return x + + def custom_target(x: str): + ctgt = output_target_map.generated(x) + if ctgt: + assert(isinstance(ctgt, ConverterCustomTarget)) + ref = ctgt.get_ref(x) + assert(isinstance(ref, CustomTargetReference) and ref.valid()) + return ref + return x + + build_dir_rel = os.path.relpath(self.build_dir, os.path.join(self.env.get_build_dir(), subdir)) + self.generated = [rel_path(x, False, True) for x in self.generated] + self.includes = list(OrderedSet([rel_path(x, True, False) for x in OrderedSet(self.includes)] + [build_dir_rel])) + self.sys_includes = list(OrderedSet([rel_path(x, True, False) for x in OrderedSet(self.sys_includes)])) + self.sources = [rel_path(x, False, False) for x in self.sources] + + # Resolve custom targets + self.generated = [custom_target(x) for x in self.generated] + + # Remove delete entries + self.includes = [x for x in self.includes if x is not None] + self.sys_includes = [x for x in self.sys_includes if x is not None] + self.sources = [x for x in self.sources if x is not None] + self.generated = [x for x in self.generated if x is not None] + + # Make sure '.' is always in the include directories + if '.' not in self.includes: + self.includes += ['.'] + + # make install dir relative to the install prefix + if self.install_dir and os.path.isabs(self.install_dir): + if os.path.commonpath([self.install_dir, install_prefix]) == install_prefix: + self.install_dir = os.path.relpath(self.install_dir, install_prefix) + + # Remove blacklisted options and libs + def check_flag(flag: str) -> bool: + if flag.lower() in blacklist_link_flags or flag in blacklist_compiler_flags + blacklist_clang_cl_link_flags: + return False + if flag.startswith('/D'): + return False + return True + + self.link_libraries = [x for x in self.link_libraries if x.lower() not in blacklist_link_libs] + self.link_flags = [x for x in self.link_flags if check_flag(x)] + + # Handle explicit CMake add_dependency() calls + for i in self.depends_raw: + tgt = output_target_map.target(i) + if tgt: + self.depends.append(tgt) + + def process_object_libs(self, obj_target_list: T.List['ConverterTarget'], linker_workaround: bool): + # Try to detect the object library(s) from the generated input sources + temp = [x for x in self.generated if isinstance(x, str)] + temp = [os.path.basename(x) for x in temp] + temp = [x for x in temp if any([x.endswith('.' + y) for y in obj_suffixes])] + temp = [os.path.splitext(x)[0] for x in temp] + exts = self._all_source_suffixes() + # Temp now stores the source filenames of the object files + for i in obj_target_list: + source_files = [x for x in i.sources + i.generated if isinstance(x, str)] + source_files = [os.path.basename(x) for x in source_files] + for j in temp: + # On some platforms (specifically looking at you Windows with vs20xy backend) CMake does + # not produce object files with the format `foo.cpp.obj`, instead it skipps the language + # suffix and just produces object files like `foo.obj`. Thus we have to do our best to + # undo this step and guess the correct language suffix of the object file. This is done + # by trying all language suffixes meson knows and checking if one of them fits. + candidates = [j] # type: T.List[str] + if not any([j.endswith('.' + x) for x in exts]): + mlog.warning('Object files do not contain source file extensions, thus falling back to guessing them.', once=True) + candidates += ['{}.{}'.format(j, x) for x in exts] + if any([x in source_files for x in candidates]): + if linker_workaround: + self._append_objlib_sources(i) + else: + self.includes += i.includes + self.includes = list(OrderedSet(self.includes)) + self.object_libs += [i] + break + + # Filter out object files from the sources + self.generated = [x for x in self.generated if not isinstance(x, str) or not any([x.endswith('.' + y) for y in obj_suffixes])] + + def _append_objlib_sources(self, tgt: 'ConverterTarget') -> None: + self.includes += tgt.includes + self.sources += tgt.sources + self.generated += tgt.generated + self.sources = list(OrderedSet(self.sources)) + self.generated = list(OrderedSet(self.generated)) + self.includes = list(OrderedSet(self.includes)) + + # Inherit compiler arguments since they may be required for building + for lang, opts in tgt.compile_opts.items(): + if lang not in self.compile_opts: + self.compile_opts[lang] = [] + self.compile_opts[lang] += [x for x in opts if x not in self.compile_opts[lang]] + + @lru_cache(maxsize=None) + def _all_source_suffixes(self) -> T.List[str]: + suffixes = [] # type: T.List[str] + for exts in lang_suffixes.values(): + suffixes += [x for x in exts] + return suffixes + + @lru_cache(maxsize=None) + def _all_lang_stds(self, lang: str) -> T.List[str]: + lang_opts = self.env.coredata.compiler_options.build.get(lang, None) + if not lang_opts or 'std' not in lang_opts: + return [] + return lang_opts['std'].choices + + def process_inter_target_dependencies(self): + # Move the dependencies from all transfer_dependencies_from to the target + to_process = list(self.depends) + processed = [] + new_deps = [] + for i in to_process: + processed += [i] + if isinstance(i, ConverterTarget) and i.meson_func() in transfer_dependencies_from: + to_process += [x for x in i.depends if x not in processed] + else: + new_deps += [i] + self.depends = list(OrderedSet(new_deps)) + + def cleanup_dependencies(self): + # Clear the dependencies from targets that where moved from + if self.meson_func() in transfer_dependencies_from: + self.depends = [] + + def meson_func(self) -> str: + return target_type_map.get(self.type.upper()) + + def log(self) -> None: + mlog.log('Target', mlog.bold(self.name), '({})'.format(self.cmake_name)) + mlog.log(' -- artifacts: ', mlog.bold(str(self.artifacts))) + mlog.log(' -- full_name: ', mlog.bold(self.full_name)) + mlog.log(' -- type: ', mlog.bold(self.type)) + mlog.log(' -- install: ', mlog.bold('true' if self.install else 'false')) + mlog.log(' -- install_dir: ', mlog.bold(self.install_dir)) + mlog.log(' -- link_libraries: ', mlog.bold(str(self.link_libraries))) + mlog.log(' -- link_with: ', mlog.bold(str(self.link_with))) + mlog.log(' -- object_libs: ', mlog.bold(str(self.object_libs))) + mlog.log(' -- link_flags: ', mlog.bold(str(self.link_flags))) + mlog.log(' -- languages: ', mlog.bold(str(self.languages))) + mlog.log(' -- includes: ', mlog.bold(str(self.includes))) + mlog.log(' -- sys_includes: ', mlog.bold(str(self.sys_includes))) + mlog.log(' -- sources: ', mlog.bold(str(self.sources))) + mlog.log(' -- generated: ', mlog.bold(str(self.generated))) + mlog.log(' -- pie: ', mlog.bold('true' if self.pie else 'false')) + mlog.log(' -- override_opts: ', mlog.bold(str(self.override_options))) + mlog.log(' -- depends: ', mlog.bold(str(self.depends))) + mlog.log(' -- options:') + for key, val in self.compile_opts.items(): + mlog.log(' -', key, '=', mlog.bold(str(val))) + +class CustomTargetReference: + def __init__(self, ctgt: 'ConverterCustomTarget', index: int): + self.ctgt = ctgt # type: ConverterCustomTarget + self.index = index # type: int + + def __repr__(self) -> str: + if self.valid(): + return '<{}: {} [{}]>'.format(self.__class__.__name__, self.ctgt.name, self.ctgt.outputs[self.index]) + else: + return '<{}: INVALID REFERENCE>'.format(self.__class__.__name__) + + def valid(self) -> bool: + return self.ctgt is not None and self.index >= 0 + + def filename(self) -> str: + return self.ctgt.outputs[self.index] + +class ConverterCustomTarget: + tgt_counter = 0 # type: int + out_counter = 0 # type: int + + def __init__(self, target: CMakeGeneratorTarget): + assert(target.current_bin_dir is not None) + assert(target.current_src_dir is not None) + self.name = target.name + if not self.name: + self.name = 'custom_tgt_{}'.format(ConverterCustomTarget.tgt_counter) + ConverterCustomTarget.tgt_counter += 1 + self.cmake_name = str(self.name) + self.original_outputs = list(target.outputs) + self.outputs = [os.path.basename(x) for x in self.original_outputs] + self.conflict_map = {} + self.command = target.command + self.working_dir = target.working_dir + self.depends_raw = target.depends + self.inputs = [] + self.depends = [] + self.current_bin_dir = Path(target.current_bin_dir) + self.current_src_dir = Path(target.current_src_dir) + + # Convert the target name to a valid meson target name + self.name = _sanitize_cmake_name(self.name) + + def __repr__(self) -> str: + return '<{}: {} {}>'.format(self.__class__.__name__, self.name, self.outputs) + + def postprocess(self, output_target_map: OutputTargetMap, root_src_dir: str, subdir: str, all_outputs: T.List[str], trace: CMakeTraceParser) -> None: + # Default the working directory to ${CMAKE_CURRENT_BINARY_DIR} + if not self.working_dir: + self.working_dir = self.current_bin_dir.as_posix() + + # relative paths in the working directory are always relative + # to ${CMAKE_CURRENT_BINARY_DIR} + if not os.path.isabs(self.working_dir): + self.working_dir = (self.current_bin_dir / self.working_dir).as_posix() + + # Modify the original outputs if they are relative. Again, + # relative paths are relative to ${CMAKE_CURRENT_BINARY_DIR} + def ensure_absolute(x: Path) -> Path: + if x.is_absolute(): + return x + else: + return self.current_bin_dir / x + self.original_outputs = [ensure_absolute(Path(x)).as_posix() for x in self.original_outputs] + + # Ensure that there is no duplicate output in the project so + # that meson can handle cases where the same filename is + # generated in multiple directories + temp_outputs = [] # type: T.List[str] + for i in self.outputs: + if i in all_outputs: + old = str(i) + i = 'c{}_{}'.format(ConverterCustomTarget.out_counter, i) + ConverterCustomTarget.out_counter += 1 + self.conflict_map[old] = i + all_outputs += [i] + temp_outputs += [i] + self.outputs = temp_outputs + + # Check if the command is a build target + commands = [] + for i in self.command: + assert(isinstance(i, list)) + cmd = [] + + for j in i: + if not j: + continue + target = output_target_map.executable(j) + if target: + cmd += [target] + continue + elif j in trace.targets: + trace_tgt = trace.targets[j] + if trace_tgt.type == 'EXECUTABLE' and 'IMPORTED_LOCATION' in trace_tgt.properties: + cmd += trace_tgt.properties['IMPORTED_LOCATION'] + continue + mlog.debug('CMake: Found invalid CMake target "{}" --> ignoring \n{}'.format(j, trace_tgt)) + + # Fallthrough on error + cmd += [j] + + commands += [cmd] + self.command = commands + + # If the custom target does not declare any output, create a dummy + # one that can be used as dependency. + if not self.outputs: + self.outputs = [self.name + '.h'] + + # Check dependencies and input files + root = Path(root_src_dir) + for i in self.depends_raw: + if not i: + continue + raw = Path(i) + art = output_target_map.artifact(i) + tgt = output_target_map.target(i) + gen = output_target_map.generated(i) + + rel_to_root = None + try: + rel_to_root = raw.relative_to(root) + except ValueError: + rel_to_root = None + + # First check for existing files. Only then check for existing + # targets, etc. This reduces the chance of misdetecting input files + # as outputs from other targets. + # See https://github.com/mesonbuild/meson/issues/6632 + if not raw.is_absolute() and (self.current_src_dir / raw).exists(): + self.inputs += [(self.current_src_dir / raw).relative_to(root).as_posix()] + elif raw.is_absolute() and raw.exists() and rel_to_root is not None: + self.inputs += [rel_to_root.as_posix()] + elif art: + self.depends += [art] + elif tgt: + self.depends += [tgt] + elif gen: + self.inputs += [gen.get_ref(i)] + + def process_inter_target_dependencies(self): + # Move the dependencies from all transfer_dependencies_from to the target + to_process = list(self.depends) + processed = [] + new_deps = [] + for i in to_process: + processed += [i] + if isinstance(i, ConverterTarget) and i.meson_func() in transfer_dependencies_from: + to_process += [x for x in i.depends if x not in processed] + else: + new_deps += [i] + self.depends = list(OrderedSet(new_deps)) + + def get_ref(self, fname: str) -> T.Optional[CustomTargetReference]: + fname = os.path.basename(fname) + try: + if fname in self.conflict_map: + fname = self.conflict_map[fname] + idx = self.outputs.index(fname) + return CustomTargetReference(self, idx) + except ValueError: + return None + + def log(self) -> None: + mlog.log('Custom Target', mlog.bold(self.name), '({})'.format(self.cmake_name)) + mlog.log(' -- command: ', mlog.bold(str(self.command))) + mlog.log(' -- outputs: ', mlog.bold(str(self.outputs))) + mlog.log(' -- conflict_map: ', mlog.bold(str(self.conflict_map))) + mlog.log(' -- working_dir: ', mlog.bold(str(self.working_dir))) + mlog.log(' -- depends_raw: ', mlog.bold(str(self.depends_raw))) + mlog.log(' -- inputs: ', mlog.bold(str(self.inputs))) + mlog.log(' -- depends: ', mlog.bold(str(self.depends))) + +class CMakeAPI(Enum): + SERVER = 1 + FILE = 2 + +class CMakeInterpreter: + def __init__(self, build: 'Build', subdir: str, src_dir: str, install_prefix: str, env: Environment, backend: 'Backend'): + assert(hasattr(backend, 'name')) + self.build = build + self.subdir = subdir + self.src_dir = src_dir + self.build_dir_rel = os.path.join(subdir, '__CMake_build') + self.build_dir = os.path.join(env.get_build_dir(), self.build_dir_rel) + self.install_prefix = install_prefix + self.env = env + self.backend_name = backend.name + self.linkers = set() # type: T.Set[str] + self.cmake_api = CMakeAPI.SERVER + self.client = CMakeClient(self.env) + self.fileapi = CMakeFileAPI(self.build_dir) + + # Raw CMake results + self.bs_files = [] + self.codemodel_configs = None + self.raw_trace = None + + # Analysed data + self.project_name = '' + self.languages = [] + self.targets = [] + self.custom_targets = [] # type: T.List[ConverterCustomTarget] + self.trace = CMakeTraceParser('', '') # Will be replaced in analyse + self.output_target_map = OutputTargetMap(self.build_dir) + + # Generated meson data + self.generated_targets = {} + self.internal_name_map = {} + + def configure(self, extra_cmake_options: T.List[str]) -> None: + for_machine = MachineChoice.HOST # TODO make parameter + # Find CMake + cmake_exe = CMakeExecutor(self.env, '>=3.7', for_machine) + if not cmake_exe.found(): + raise CMakeException('Unable to find CMake') + self.trace = CMakeTraceParser(cmake_exe.version(), self.build_dir, permissive=True) + + preload_file = mesondata['cmake/data/preload.cmake'].write_to_private(self.env) + + # Prefere CMAKE_PROJECT_INCLUDE over CMAKE_TOOLCHAIN_FILE if possible, + # since CMAKE_PROJECT_INCLUDE was actually designed for code injection. + preload_var = 'CMAKE_PROJECT_INCLUDE' + if version_compare(cmake_exe.version(), '<3.15'): + preload_var = 'CMAKE_TOOLCHAIN_FILE' + + generator = backend_generator_map[self.backend_name] + cmake_args = [] + trace_args = self.trace.trace_args() + cmcmp_args = ['-DCMAKE_POLICY_WARNING_{}=OFF'.format(x) for x in disable_policy_warnings] + pload_args = ['-D{}={}'.format(preload_var, str(preload_file))] + + if version_compare(cmake_exe.version(), '>=3.14'): + self.cmake_api = CMakeAPI.FILE + self.fileapi.setup_request() + + # Map meson compiler to CMake variables + for lang, comp in self.env.coredata.compilers[for_machine].items(): + if lang not in language_map: + continue + self.linkers.add(comp.get_linker_id()) + cmake_lang = language_map[lang] + exelist = comp.get_exelist() + if len(exelist) == 1: + cmake_args += ['-DCMAKE_{}_COMPILER={}'.format(cmake_lang, exelist[0])] + elif len(exelist) == 2: + cmake_args += ['-DCMAKE_{}_COMPILER_LAUNCHER={}'.format(cmake_lang, exelist[0]), + '-DCMAKE_{}_COMPILER={}'.format(cmake_lang, exelist[1])] + if hasattr(comp, 'get_linker_exelist') and comp.get_id() == 'clang-cl': + cmake_args += ['-DCMAKE_LINKER={}'.format(comp.get_linker_exelist()[0])] + cmake_args += ['-G', generator] + cmake_args += ['-DCMAKE_INSTALL_PREFIX={}'.format(self.install_prefix)] + cmake_args += extra_cmake_options + + # Run CMake + mlog.log() + with mlog.nested(): + mlog.log('Configuring the build directory with', mlog.bold('CMake'), 'version', mlog.cyan(cmake_exe.version())) + mlog.log(mlog.bold('Running:'), ' '.join(cmake_args)) + mlog.log(mlog.bold(' - build directory: '), self.build_dir) + mlog.log(mlog.bold(' - source directory: '), self.src_dir) + mlog.log(mlog.bold(' - trace args: '), ' '.join(trace_args)) + mlog.log(mlog.bold(' - preload file: '), str(preload_file)) + mlog.log(mlog.bold(' - disabled policy warnings:'), '[{}]'.format(', '.join(disable_policy_warnings))) + mlog.log() + os.makedirs(self.build_dir, exist_ok=True) + os_env = os.environ.copy() + os_env['LC_ALL'] = 'C' + final_args = cmake_args + trace_args + cmcmp_args + pload_args + [self.src_dir] + + cmake_exe.set_exec_mode(print_cmout=True, always_capture_stderr=self.trace.requires_stderr()) + rc, _, self.raw_trace = cmake_exe.call(final_args, self.build_dir, env=os_env, disable_cache=True) + + mlog.log() + h = mlog.green('SUCCEEDED') if rc == 0 else mlog.red('FAILED') + mlog.log('CMake configuration:', h) + if rc != 0: + raise CMakeException('Failed to configure the CMake subproject') + + def initialise(self, extra_cmake_options: T.List[str]) -> None: + # Run configure the old way because doing it + # with the server doesn't work for some reason + # Additionally, the File API requires a configure anyway + self.configure(extra_cmake_options) + + # Continue with the file API If supported + if self.cmake_api is CMakeAPI.FILE: + # Parse the result + self.fileapi.load_reply() + + # Load the buildsystem file list + cmake_files = self.fileapi.get_cmake_sources() + self.bs_files = [x.file for x in cmake_files if not x.is_cmake and not x.is_temp] + self.bs_files = [os.path.relpath(x, self.env.get_source_dir()) for x in self.bs_files] + self.bs_files = list(OrderedSet(self.bs_files)) + + # Load the codemodel configurations + self.codemodel_configs = self.fileapi.get_cmake_configurations() + return + + with self.client.connect(): + generator = backend_generator_map[self.backend_name] + self.client.do_handshake(self.src_dir, self.build_dir, generator, 1) + + # Do a second configure to initialise the server + self.client.query_checked(RequestConfigure(), 'CMake server configure') + + # Generate the build system files + self.client.query_checked(RequestCompute(), 'Generating build system files') + + # Get CMake build system files + bs_reply = self.client.query_checked(RequestCMakeInputs(), 'Querying build system files') + + # Now get the CMake code model + cm_reply = self.client.query_checked(RequestCodeModel(), 'Querying the CMake code model') + + src_dir = bs_reply.src_dir + self.bs_files = [x.file for x in bs_reply.build_files if not x.is_cmake and not x.is_temp] + self.bs_files = [os.path.relpath(os.path.join(src_dir, x), self.env.get_source_dir()) for x in self.bs_files] + self.bs_files = list(OrderedSet(self.bs_files)) + self.codemodel_configs = cm_reply.configs + + def analyse(self) -> None: + if self.codemodel_configs is None: + raise CMakeException('CMakeInterpreter was not initialized') + + # Clear analyser data + self.project_name = '' + self.languages = [] + self.targets = [] + self.custom_targets = [] + + # Parse the trace + self.trace.parse(self.raw_trace) + + # Find all targets + added_target_names = [] # type: T.List[str] + for i in self.codemodel_configs: + for j in i.projects: + if not self.project_name: + self.project_name = j.name + for k in j.targets: + # Avoid duplicate targets from different configurations and known + # dummy CMake internal target types + if k.type not in skip_targets and k.name not in added_target_names: + added_target_names += [k.name] + self.targets += [ConverterTarget(k, self.env)] + + # Add interface targets from trace, if not already present. + # This step is required because interface targets were removed from + # the CMake file API output. + api_target_name_list = [x.name for x in self.targets] + for i in self.trace.targets.values(): + if i.type != 'INTERFACE' or i.name in api_target_name_list or i.imported: + continue + dummy = CMakeTarget({ + 'name': i.name, + 'type': 'INTERFACE_LIBRARY', + 'sourceDirectory': self.src_dir, + 'buildDirectory': self.build_dir, + }) + self.targets += [ConverterTarget(dummy, self.env)] + + for i in self.trace.custom_targets: + self.custom_targets += [ConverterCustomTarget(i)] + + # generate the output_target_map + for i in [*self.targets, *self.custom_targets]: + self.output_target_map.add(i) + + # First pass: Basic target cleanup + object_libs = [] + custom_target_outputs = [] # type: T.List[str] + for i in self.custom_targets: + i.postprocess(self.output_target_map, self.src_dir, self.subdir, custom_target_outputs, self.trace) + for i in self.targets: + i.postprocess(self.output_target_map, self.src_dir, self.subdir, self.install_prefix, self.trace) + if i.type == 'OBJECT_LIBRARY': + object_libs += [i] + self.languages += [x for x in i.languages if x not in self.languages] + + # Second pass: Detect object library dependencies + for i in self.targets: + i.process_object_libs(object_libs, self._object_lib_workaround()) + + # Third pass: Reassign dependencies to avoid some loops + for i in self.targets: + i.process_inter_target_dependencies() + for i in self.custom_targets: + i.process_inter_target_dependencies() + + # Fourth pass: Remove rassigned dependencies + for i in self.targets: + i.cleanup_dependencies() + + mlog.log('CMake project', mlog.bold(self.project_name), 'has', mlog.bold(str(len(self.targets) + len(self.custom_targets))), 'build targets.') + + def pretend_to_be_meson(self, options: TargetOptions) -> CodeBlockNode: + if not self.project_name: + raise CMakeException('CMakeInterpreter was not analysed') + + def token(tid: str = 'string', val='') -> Token: + return Token(tid, self.subdir, 0, 0, 0, None, val) + + def string(value: str) -> StringNode: + return StringNode(token(val=value)) + + def id_node(value: str) -> IdNode: + return IdNode(token(val=value)) + + def number(value: int) -> NumberNode: + return NumberNode(token(val=value)) + + def nodeify(value): + if isinstance(value, str): + return string(value) + elif isinstance(value, bool): + return BooleanNode(token(val=value)) + elif isinstance(value, int): + return number(value) + elif isinstance(value, list): + return array(value) + return value + + def indexed(node: BaseNode, index: int) -> IndexNode: + return IndexNode(node, nodeify(index)) + + def array(elements) -> ArrayNode: + args = ArgumentNode(token()) + if not isinstance(elements, list): + elements = [args] + args.arguments += [nodeify(x) for x in elements if x is not None] + return ArrayNode(args, 0, 0, 0, 0) + + def function(name: str, args=None, kwargs=None) -> FunctionNode: + args = [] if args is None else args + kwargs = {} if kwargs is None else kwargs + args_n = ArgumentNode(token()) + if not isinstance(args, list): + args = [args] + args_n.arguments = [nodeify(x) for x in args if x is not None] + args_n.kwargs = {id_node(k): nodeify(v) for k, v in kwargs.items() if v is not None} + func_n = FunctionNode(self.subdir, 0, 0, 0, 0, name, args_n) + return func_n + + def method(obj: BaseNode, name: str, args=None, kwargs=None) -> MethodNode: + args = [] if args is None else args + kwargs = {} if kwargs is None else kwargs + args_n = ArgumentNode(token()) + if not isinstance(args, list): + args = [args] + args_n.arguments = [nodeify(x) for x in args if x is not None] + args_n.kwargs = {id_node(k): nodeify(v) for k, v in kwargs.items() if v is not None} + return MethodNode(self.subdir, 0, 0, obj, name, args_n) + + def assign(var_name: str, value: BaseNode) -> AssignmentNode: + return AssignmentNode(self.subdir, 0, 0, var_name, value) + + # Generate the root code block and the project function call + root_cb = CodeBlockNode(token()) + root_cb.lines += [function('project', [self.project_name] + self.languages)] + + # Add the run script for custom commands + + # Add the targets + processing = [] + processed = {} + name_map = {} + + def extract_tgt(tgt: T.Union[ConverterTarget, ConverterCustomTarget, CustomTargetReference]) -> IdNode: + tgt_name = None + if isinstance(tgt, (ConverterTarget, ConverterCustomTarget)): + tgt_name = tgt.name + elif isinstance(tgt, CustomTargetReference): + tgt_name = tgt.ctgt.name + assert(tgt_name is not None and tgt_name in processed) + res_var = processed[tgt_name]['tgt'] + return id_node(res_var) if res_var else None + + def detect_cycle(tgt: T.Union[ConverterTarget, ConverterCustomTarget]) -> None: + if tgt.name in processing: + raise CMakeException('Cycle in CMake inputs/dependencies detected') + processing.append(tgt.name) + + def resolve_ctgt_ref(ref: CustomTargetReference) -> BaseNode: + tgt_var = extract_tgt(ref) + if len(ref.ctgt.outputs) == 1: + return tgt_var + else: + return indexed(tgt_var, ref.index) + + def process_target(tgt: ConverterTarget): + detect_cycle(tgt) + + # First handle inter target dependencies + link_with = [] + objec_libs = [] # type: T.List[IdNode] + sources = [] + generated = [] + generated_filenames = [] + custom_targets = [] + dependencies = [] + for i in tgt.link_with: + assert(isinstance(i, ConverterTarget)) + if i.name not in processed: + process_target(i) + link_with += [extract_tgt(i)] + for i in tgt.object_libs: + assert(isinstance(i, ConverterTarget)) + if i.name not in processed: + process_target(i) + objec_libs += [extract_tgt(i)] + for i in tgt.depends: + if not isinstance(i, ConverterCustomTarget): + continue + if i.name not in processed: + process_custom_target(i) + dependencies += [extract_tgt(i)] + + # Generate the source list and handle generated sources + for i in tgt.sources + tgt.generated: + if isinstance(i, CustomTargetReference): + if i.ctgt.name not in processed: + process_custom_target(i.ctgt) + generated += [resolve_ctgt_ref(i)] + generated_filenames += [i.filename()] + if i.ctgt not in custom_targets: + custom_targets += [i.ctgt] + else: + sources += [i] + + # Add all header files from all used custom targets. This + # ensures that all custom targets are built before any + # sources of the current target are compiled and thus all + # header files are present. This step is necessary because + # CMake always ensures that a custom target is executed + # before another target if at least one output is used. + for i in custom_targets: + for j in i.outputs: + if not is_header(j) or j in generated_filenames: + continue + + generated += [resolve_ctgt_ref(i.get_ref(j))] + generated_filenames += [j] + + # Determine the meson function to use for the build target + tgt_func = tgt.meson_func() + if not tgt_func: + raise CMakeException('Unknown target type "{}"'.format(tgt.type)) + + # Determine the variable names + inc_var = '{}_inc'.format(tgt.name) + dir_var = '{}_dir'.format(tgt.name) + sys_var = '{}_sys'.format(tgt.name) + src_var = '{}_src'.format(tgt.name) + dep_var = '{}_dep'.format(tgt.name) + tgt_var = tgt.name + + install_tgt = options.get_install(tgt.cmake_name, tgt.install) + + # Generate target kwargs + tgt_kwargs = { + 'build_by_default': install_tgt, + 'link_args': options.get_link_args(tgt.cmake_name, tgt.link_flags + tgt.link_libraries), + 'link_with': link_with, + 'include_directories': id_node(inc_var), + 'install': install_tgt, + 'override_options': options.get_override_options(tgt.cmake_name, tgt.override_options), + 'objects': [method(x, 'extract_all_objects') for x in objec_libs], + } + + # Only set if installed and only override if it is set + if install_tgt and tgt.install_dir: + tgt_kwargs['install_dir'] = tgt.install_dir + + # Handle compiler args + for key, val in tgt.compile_opts.items(): + tgt_kwargs['{}_args'.format(key)] = options.get_compile_args(tgt.cmake_name, key, val) + + # Handle -fPCI, etc + if tgt_func == 'executable': + tgt_kwargs['pie'] = tgt.pie + elif tgt_func == 'static_library': + tgt_kwargs['pic'] = tgt.pie + + # declare_dependency kwargs + dep_kwargs = { + 'link_args': tgt.link_flags + tgt.link_libraries, + 'link_with': id_node(tgt_var), + 'compile_args': tgt.public_compile_opts, + 'include_directories': id_node(inc_var), + } + + if dependencies: + generated += dependencies + + # Generate the function nodes + dir_node = assign(dir_var, function('include_directories', tgt.includes)) + sys_node = assign(sys_var, function('include_directories', tgt.sys_includes, {'is_system': True})) + inc_node = assign(inc_var, array([id_node(dir_var), id_node(sys_var)])) + node_list = [dir_node, sys_node, inc_node] + if tgt_func == 'header_only': + del dep_kwargs['link_with'] + dep_node = assign(dep_var, function('declare_dependency', kwargs=dep_kwargs)) + node_list += [dep_node] + src_var = None + tgt_var = None + else: + src_node = assign(src_var, function('files', sources)) + tgt_node = assign(tgt_var, function(tgt_func, [tgt_var, [id_node(src_var)] + generated], tgt_kwargs)) + node_list += [src_node, tgt_node] + if tgt_func in ['static_library', 'shared_library']: + dep_node = assign(dep_var, function('declare_dependency', kwargs=dep_kwargs)) + node_list += [dep_node] + elif tgt_func in ['shared_module']: + del dep_kwargs['link_with'] + dep_node = assign(dep_var, function('declare_dependency', kwargs=dep_kwargs)) + node_list += [dep_node] + else: + dep_var = None + + # Add the nodes to the ast + root_cb.lines += node_list + processed[tgt.name] = {'inc': inc_var, 'src': src_var, 'dep': dep_var, 'tgt': tgt_var, 'func': tgt_func} + name_map[tgt.cmake_name] = tgt.name + + def process_custom_target(tgt: ConverterCustomTarget) -> None: + # CMake allows to specify multiple commands in a custom target. + # To map this to meson, a helper script is used to execute all + # commands in order. This additionally allows setting the working + # directory. + + detect_cycle(tgt) + tgt_var = tgt.name # type: str + + def resolve_source(x: T.Any) -> T.Any: + if isinstance(x, ConverterTarget): + if x.name not in processed: + process_target(x) + return extract_tgt(x) + if isinstance(x, ConverterCustomTarget): + if x.name not in processed: + process_custom_target(x) + return extract_tgt(x) + elif isinstance(x, CustomTargetReference): + if x.ctgt.name not in processed: + process_custom_target(x.ctgt) + return resolve_ctgt_ref(x) + else: + return x + + # Generate the command list + command = [] + command += mesonlib.meson_command + command += ['--internal', 'cmake_run_ctgt'] + command += ['-o', '@OUTPUT@'] + if tgt.original_outputs: + command += ['-O'] + tgt.original_outputs + command += ['-d', tgt.working_dir] + + # Generate the commands. Subcommands are separated by ';;;' + for cmd in tgt.command: + command += [resolve_source(x) for x in cmd] + [';;;'] + + tgt_kwargs = { + 'input': [resolve_source(x) for x in tgt.inputs], + 'output': tgt.outputs, + 'command': command, + 'depends': [resolve_source(x) for x in tgt.depends], + } + + root_cb.lines += [assign(tgt_var, function('custom_target', [tgt.name], tgt_kwargs))] + processed[tgt.name] = {'inc': None, 'src': None, 'dep': None, 'tgt': tgt_var, 'func': 'custom_target'} + name_map[tgt.cmake_name] = tgt.name + + # Now generate the target function calls + for i in self.custom_targets: + if i.name not in processed: + process_custom_target(i) + for i in self.targets: + if i.name not in processed: + process_target(i) + + self.generated_targets = processed + self.internal_name_map = name_map + return root_cb + + def target_info(self, target: str) -> T.Optional[T.Dict[str, str]]: + # Try resolving the target name + # start by checking if there is a 100% match (excluding the name prefix) + prx_tgt = _sanitize_cmake_name(target) + if prx_tgt in self.generated_targets: + return self.generated_targets[prx_tgt] + # check if there exists a name mapping + if target in self.internal_name_map: + target = self.internal_name_map[target] + assert(target in self.generated_targets) + return self.generated_targets[target] + return None + + def target_list(self) -> T.List[str]: + return list(self.internal_name_map.keys()) + + def _object_lib_workaround(self) -> bool: + return 'link' in self.linkers and self.backend_name.startswith('vs') diff --git a/meson/mesonbuild/cmake/traceparser.py b/meson/mesonbuild/cmake/traceparser.py new file mode 100644 index 000000000..98b56f5bc --- /dev/null +++ b/meson/mesonbuild/cmake/traceparser.py @@ -0,0 +1,702 @@ +# Copyright 2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This class contains the basic functionality needed to run any interpreter +# or an interpreter-based tool. + +from .common import CMakeException +from .generator import parse_generator_expressions +from .. import mlog +from ..mesonlib import version_compare + +import typing as T +from pathlib import Path +import re +import os +import json +import textwrap + +class CMakeTraceLine: + def __init__(self, file, line, func, args): + self.file = file + self.line = line + self.func = func.lower() + self.args = args + + def __repr__(self): + s = 'CMake TRACE: {0}:{1} {2}({3})' + return s.format(self.file, self.line, self.func, self.args) + +class CMakeTarget: + def __init__(self, name, target_type, properties=None, imported: bool = False, tline: T.Optional[CMakeTraceLine] = None): + if properties is None: + properties = {} + self.name = name + self.type = target_type + self.properties = properties + self.imported = imported + self.tline = tline + self.depends = [] + self.current_bin_dir = None + self.current_src_dir = None + + def __repr__(self): + s = 'CMake TARGET:\n -- name: {}\n -- type: {}\n -- imported: {}\n -- properties: {{\n{} }}\n -- tline: {}' + propSTR = '' + for i in self.properties: + propSTR += " '{}': {}\n".format(i, self.properties[i]) + return s.format(self.name, self.type, self.imported, propSTR, self.tline) + + def strip_properties(self) -> None: + # Strip the strings in the properties + if not self.properties: + return + for key, val in self.properties.items(): + self.properties[key] = [x.strip() for x in val] + assert all([';' not in x for x in self.properties[key]]) + +class CMakeGeneratorTarget(CMakeTarget): + def __init__(self, name): + super().__init__(name, 'CUSTOM', {}) + self.outputs = [] # type: T.List[str] + self.command = [] # type: T.List[T.List[str]] + self.working_dir = None # type: T.Optional[str] + +class CMakeTraceParser: + def __init__(self, cmake_version: str, build_dir: str, permissive: bool = True): + self.vars = {} # type: T.Dict[str, T.List[str]] + self.targets = {} # type: T.Dict[str, CMakeTarget] + + self.explicit_headers = set() # type: T.Set[Path] + + # T.List of targes that were added with add_custom_command to generate files + self.custom_targets = [] # type: T.List[CMakeGeneratorTarget] + + self.permissive = permissive # type: bool + self.cmake_version = cmake_version # type: str + self.trace_file = 'cmake_trace.txt' + self.trace_file_path = Path(build_dir) / self.trace_file + self.trace_format = 'json-v1' if version_compare(cmake_version, '>=3.17') else 'human' + + # State for delayed command execution. Delayed command execution is realised + # with a custom CMake file that overrides some functions and adds some + # introspection information to the trace. + self.delayed_commands = [] # type: T.List[str] + self.stored_commands = [] # type: T.List[CMakeTraceLine] + + # All supported functions + self.functions = { + 'set': self._cmake_set, + 'unset': self._cmake_unset, + 'add_executable': self._cmake_add_executable, + 'add_library': self._cmake_add_library, + 'add_custom_command': self._cmake_add_custom_command, + 'add_custom_target': self._cmake_add_custom_target, + 'set_property': self._cmake_set_property, + 'set_target_properties': self._cmake_set_target_properties, + 'target_compile_definitions': self._cmake_target_compile_definitions, + 'target_compile_options': self._cmake_target_compile_options, + 'target_include_directories': self._cmake_target_include_directories, + 'target_link_libraries': self._cmake_target_link_libraries, + 'target_link_options': self._cmake_target_link_options, + 'add_dependencies': self._cmake_add_dependencies, + + # Special functions defined in the preload script. + # These functions do nothing in the CMake code, but have special + # meaning here in the trace parser. + 'meson_ps_execute_delayed_calls': self._meson_ps_execute_delayed_calls, + 'meson_ps_reload_vars': self._meson_ps_reload_vars, + } + + def trace_args(self) -> T.List[str]: + arg_map = { + 'human': ['--trace', '--trace-expand'], + 'json-v1': ['--trace-expand', '--trace-format=json-v1'], + } + + base_args = ['--no-warn-unused-cli'] + if not self.requires_stderr(): + base_args += ['--trace-redirect={}'.format(self.trace_file)] + + return arg_map[self.trace_format] + base_args + + def requires_stderr(self) -> bool: + return version_compare(self.cmake_version, '<3.16') + + def parse(self, trace: T.Optional[str] = None) -> None: + # First load the trace (if required) + if not self.requires_stderr(): + if not self.trace_file_path.exists and not self.trace_file_path.is_file(): + raise CMakeException('CMake: Trace file "{}" not found'.format(str(self.trace_file_path))) + trace = self.trace_file_path.read_text(errors='ignore') + if not trace: + raise CMakeException('CMake: The CMake trace was not provided or is empty') + + # Second parse the trace + lexer1 = None + if self.trace_format == 'human': + lexer1 = self._lex_trace_human(trace) + elif self.trace_format == 'json-v1': + lexer1 = self._lex_trace_json(trace) + else: + raise CMakeException('CMake: Internal error: Invalid trace format {}. Expected [human, json-v1]'.format(self.trace_format)) + + # Primary pass -- parse everything + for l in lexer1: + # store the function if its execution should be delayed + if l.func in self.delayed_commands: + self.stored_commands += [l] + continue + + # "Execute" the CMake function if supported + fn = self.functions.get(l.func, None) + if(fn): + fn(l) + + # Postprocess + for tgt in self.targets.values(): + tgt.strip_properties() + + def get_first_cmake_var_of(self, var_list: T.List[str]) -> T.List[str]: + # Return the first found CMake variable in list var_list + for i in var_list: + if i in self.vars: + return self.vars[i] + + return [] + + def get_cmake_var(self, var: str) -> T.List[str]: + # Return the value of the CMake variable var or an empty list if var does not exist + if var in self.vars: + return self.vars[var] + + return [] + + def var_to_str(self, var: str) -> T.Optional[str]: + if var in self.vars and self.vars[var]: + return self.vars[var][0] + + return None + + def _str_to_bool(self, expr: T.Union[str, T.List[str]]) -> bool: + if not expr: + return False + if isinstance(expr, list): + expr_str = expr[0] + else: + expr_str = expr + expr_str = expr_str.upper() + return expr_str not in ['0', 'OFF', 'NO', 'FALSE', 'N', 'IGNORE'] and not expr_str.endswith('NOTFOUND') + + def var_to_bool(self, var: str) -> bool: + return self._str_to_bool(self.vars.get(var, [])) + + def _gen_exception(self, function: str, error: str, tline: CMakeTraceLine) -> None: + # Generate an exception if the parser is not in permissive mode + + if self.permissive: + mlog.debug('CMake trace warning: {}() {}\n{}'.format(function, error, tline)) + return None + raise CMakeException('CMake: {}() {}\n{}'.format(function, error, tline)) + + def _cmake_set(self, tline: CMakeTraceLine) -> None: + """Handler for the CMake set() function in all variaties. + + comes in three flavors: + set( [PARENT_SCOPE]) + set( CACHE [FORCE]) + set(ENV{} ) + + We don't support the ENV variant, and any uses of it will be ignored + silently. the other two variates are supported, with some caveats: + - we don't properly handle scoping, so calls to set() inside a + function without PARENT_SCOPE set could incorrectly shadow the + outer scope. + - We don't honor the type of CACHE arguments + """ + # DOC: https://cmake.org/cmake/help/latest/command/set.html + + # 1st remove PARENT_SCOPE and CACHE from args + args = [] + for i in tline.args: + if not i or i == 'PARENT_SCOPE': + continue + + # Discard everything after the CACHE keyword + if i == 'CACHE': + break + + args.append(i) + + if len(args) < 1: + return self._gen_exception('set', 'requires at least one argument', tline) + + # Now that we've removed extra arguments all that should be left is the + # variable identifier and the value, join the value back together to + # ensure spaces in the value are correctly handled. This assumes that + # variable names don't have spaces. Please don't do that... + identifier = args.pop(0) + value = ' '.join(args) + + if not value: + # Same as unset + if identifier in self.vars: + del self.vars[identifier] + else: + self.vars[identifier] = value.split(';') + + def _cmake_unset(self, tline: CMakeTraceLine): + # DOC: https://cmake.org/cmake/help/latest/command/unset.html + if len(tline.args) < 1: + return self._gen_exception('unset', 'requires at least one argument', tline) + + if tline.args[0] in self.vars: + del self.vars[tline.args[0]] + + def _cmake_add_executable(self, tline: CMakeTraceLine): + # DOC: https://cmake.org/cmake/help/latest/command/add_executable.html + args = list(tline.args) # Make a working copy + + # Make sure the exe is imported + is_imported = True + if 'IMPORTED' not in args: + return self._gen_exception('add_executable', 'non imported executables are not supported', tline) + + args.remove('IMPORTED') + + if len(args) < 1: + return self._gen_exception('add_executable', 'requires at least 1 argument', tline) + + self.targets[args[0]] = CMakeTarget(args[0], 'EXECUTABLE', {}, tline=tline, imported=is_imported) + + def _cmake_add_library(self, tline: CMakeTraceLine): + # DOC: https://cmake.org/cmake/help/latest/command/add_library.html + args = list(tline.args) # Make a working copy + + # Make sure the lib is imported + if 'INTERFACE' in args: + args.remove('INTERFACE') + + if len(args) < 1: + return self._gen_exception('add_library', 'interface library name not specified', tline) + + self.targets[args[0]] = CMakeTarget(args[0], 'INTERFACE', {}, tline=tline, imported='IMPORTED' in args) + elif 'IMPORTED' in args: + args.remove('IMPORTED') + + # Now, only look at the first two arguments (target_name and target_type) and ignore the rest + if len(args) < 2: + return self._gen_exception('add_library', 'requires at least 2 arguments', tline) + + self.targets[args[0]] = CMakeTarget(args[0], args[1], {}, tline=tline, imported=True) + elif 'ALIAS' in args: + args.remove('ALIAS') + + # Now, only look at the first two arguments (target_name and target_ref) and ignore the rest + if len(args) < 2: + return self._gen_exception('add_library', 'requires at least 2 arguments', tline) + + # Simulate the ALIAS with INTERFACE_LINK_LIBRARIES + self.targets[args[0]] = CMakeTarget(args[0], 'ALIAS', {'INTERFACE_LINK_LIBRARIES': [args[1]]}, tline=tline) + elif 'OBJECT' in args: + return self._gen_exception('add_library', 'OBJECT libraries are not supported', tline) + else: + self.targets[args[0]] = CMakeTarget(args[0], 'NORMAL', {}, tline=tline) + + def _cmake_add_custom_command(self, tline: CMakeTraceLine, name=None): + # DOC: https://cmake.org/cmake/help/latest/command/add_custom_command.html + args = self._flatten_args(list(tline.args)) # Commands can be passed as ';' seperated lists + + if not args: + return self._gen_exception('add_custom_command', 'requires at least 1 argument', tline) + + # Skip the second function signature + if args[0] == 'TARGET': + return self._gen_exception('add_custom_command', 'TARGET syntax is currently not supported', tline) + + magic_keys = ['OUTPUT', 'COMMAND', 'MAIN_DEPENDENCY', 'DEPENDS', 'BYPRODUCTS', + 'IMPLICIT_DEPENDS', 'WORKING_DIRECTORY', 'COMMENT', 'DEPFILE', + 'JOB_POOL', 'VERBATIM', 'APPEND', 'USES_TERMINAL', 'COMMAND_EXPAND_LISTS'] + + target = CMakeGeneratorTarget(name) + + def handle_output(key: str, target: CMakeGeneratorTarget) -> None: + target.outputs += [key] + + def handle_command(key: str, target: CMakeGeneratorTarget) -> None: + if key == 'ARGS': + return + target.command[-1] += [key] + + def handle_depends(key: str, target: CMakeGeneratorTarget) -> None: + target.depends += [key] + + def handle_working_dir(key: str, target: CMakeGeneratorTarget) -> None: + if target.working_dir is None: + target.working_dir = key + else: + target.working_dir += ' ' + target.working_dir += key + + fn = None + + for i in args: + if i in magic_keys: + if i == 'OUTPUT': + fn = handle_output + elif i == 'DEPENDS': + fn = handle_depends + elif i == 'WORKING_DIRECTORY': + fn = handle_working_dir + elif i == 'COMMAND': + fn = handle_command + target.command += [[]] + else: + fn = None + continue + + if fn is not None: + fn(i, target) + + target.current_bin_dir = self.var_to_str('MESON_PS_CMAKE_CURRENT_BINARY_DIR') + target.current_src_dir = self.var_to_str('MESON_PS_CMAKE_CURRENT_SOURCE_DIR') + target.outputs = self._guess_files(target.outputs) + target.depends = self._guess_files(target.depends) + target.command = [self._guess_files(x) for x in target.command] + + self.custom_targets += [target] + if name: + self.targets[name] = target + + def _cmake_add_custom_target(self, tline: CMakeTraceLine): + # DOC: https://cmake.org/cmake/help/latest/command/add_custom_target.html + # We only the first parameter (the target name) is interesting + if len(tline.args) < 1: + return self._gen_exception('add_custom_target', 'requires at least one argument', tline) + + # It's pretty much the same as a custom command + self._cmake_add_custom_command(tline, tline.args[0]) + + def _cmake_set_property(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/set_property.html + args = list(tline.args) + + scope = args.pop(0) + + append = False + targets = [] + while args: + curr = args.pop(0) + # XXX: APPEND_STRING is specifically *not* supposed to create a + # list, is treating them as aliases really okay? + if curr == 'APPEND' or curr == 'APPEND_STRING': + append = True + continue + + if curr == 'PROPERTY': + break + + targets += curr.split(';') + + if not args: + return self._gen_exception('set_property', 'faild to parse argument list', tline) + + if len(args) == 1: + # Tries to set property to nothing so nothing has to be done + return + + identifier = args.pop(0) + if self.trace_format == 'human': + value = ' '.join(args).split(';') + else: + value = [y for x in args for y in x.split(';')] + if not value: + return + + def do_target(tgt: str) -> None: + if i not in self.targets: + return self._gen_exception('set_property', 'TARGET {} not found'.format(i), tline) + + if identifier not in self.targets[i].properties: + self.targets[i].properties[identifier] = [] + + if append: + self.targets[i].properties[identifier] += value + else: + self.targets[i].properties[identifier] = value + + def do_source(src: str) -> None: + if identifier != 'HEADER_FILE_ONLY' or not self._str_to_bool(value): + return + + current_src_dir = self.var_to_str('MESON_PS_CMAKE_CURRENT_SOURCE_DIR') + if not current_src_dir: + mlog.warning(textwrap.dedent('''\ + CMake trace: set_property(SOURCE) called before the preload script was loaded. + Unable to determine CMAKE_CURRENT_SOURCE_DIR. This can lead to build errors. + ''')) + current_src_dir = '.' + + cur_p = Path(current_src_dir) + src_p = Path(src) + + if not src_p.is_absolute(): + src_p = cur_p / src_p + self.explicit_headers.add(src_p) + + if scope == 'TARGET': + for i in targets: + do_target(i) + elif scope == 'SOURCE': + files = self._guess_files(targets) + for i in files: + do_source(i) + + def _cmake_set_target_properties(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/set_target_properties.html + args = list(tline.args) + + targets = [] + while args: + curr = args.pop(0) + if curr == 'PROPERTIES': + break + + targets.append(curr) + + # Now we need to try to reconsitute the original quoted format of the + # arguments, as a property value could have spaces in it. Unlike + # set_property() this is not context free. There are two approaches I + # can think of, both have drawbacks: + # + # 1. Assume that the property will be capitalized ([A-Z_]), this is + # convention but cmake doesn't require it. + # 2. Maintain a copy of the list here: https://cmake.org/cmake/help/latest/manual/cmake-properties.7.html#target-properties + # + # Neither of these is awesome for obvious reasons. I'm going to try + # option 1 first and fall back to 2, as 1 requires less code and less + # synchroniztion for cmake changes. + # + # With the JSON output format, introduced in CMake 3.17, spaces are + # handled properly and we don't have to do either options + + arglist = [] # type: T.List[T.Tuple[str, T.List[str]]] + if self.trace_format == 'human': + name = args.pop(0) + values = [] + prop_regex = re.compile(r'^[A-Z_]+$') + for a in args: + if prop_regex.match(a): + if values: + arglist.append((name, ' '.join(values).split(';'))) + name = a + values = [] + else: + values.append(a) + if values: + arglist.append((name, ' '.join(values).split(';'))) + else: + arglist = [(x[0], x[1].split(';')) for x in zip(args[::2], args[1::2])] + + for name, value in arglist: + for i in targets: + if i not in self.targets: + return self._gen_exception('set_target_properties', 'TARGET {} not found'.format(i), tline) + + self.targets[i].properties[name] = value + + def _cmake_add_dependencies(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/add_dependencies.html + args = list(tline.args) + + if len(args) < 2: + return self._gen_exception('add_dependencies', 'takes at least 2 arguments', tline) + + target = self.targets.get(args[0]) + if not target: + return self._gen_exception('add_dependencies', 'target not found', tline) + + for i in args[1:]: + target.depends += i.split(';') + + def _cmake_target_compile_definitions(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/target_compile_definitions.html + self._parse_common_target_options('target_compile_definitions', 'COMPILE_DEFINITIONS', 'INTERFACE_COMPILE_DEFINITIONS', tline) + + def _cmake_target_compile_options(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/target_compile_options.html + self._parse_common_target_options('target_compile_options', 'COMPILE_OPTIONS', 'INTERFACE_COMPILE_OPTIONS', tline) + + def _cmake_target_include_directories(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/target_include_directories.html + self._parse_common_target_options('target_include_directories', 'INCLUDE_DIRECTORIES', 'INTERFACE_INCLUDE_DIRECTORIES', tline, ignore=['SYSTEM', 'BEFORE'], paths=True) + + def _cmake_target_link_options(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/target_link_options.html + self._parse_common_target_options('target_link_options', 'LINK_OPTIONS', 'INTERFACE_LINK_OPTIONS', tline) + + def _cmake_target_link_libraries(self, tline: CMakeTraceLine) -> None: + # DOC: https://cmake.org/cmake/help/latest/command/target_link_libraries.html + self._parse_common_target_options('target_link_options', 'LINK_LIBRARIES', 'INTERFACE_LINK_LIBRARIES', tline) + + def _parse_common_target_options(self, func: str, private_prop: str, interface_prop: str, tline: CMakeTraceLine, ignore: T.Optional[T.List[str]] = None, paths: bool = False): + if ignore is None: + ignore = ['BEFORE'] + + args = list(tline.args) + + if len(args) < 1: + return self._gen_exception(func, 'requires at least one argument', tline) + + target = args[0] + if target not in self.targets: + return self._gen_exception(func, 'TARGET {} not found'.format(target), tline) + + interface = [] + private = [] + + mode = 'PUBLIC' + for i in args[1:]: + if i in ignore: + continue + + if i in ['INTERFACE', 'LINK_INTERFACE_LIBRARIES', 'PUBLIC', 'PRIVATE', 'LINK_PUBLIC', 'LINK_PRIVATE']: + mode = i + continue + + if mode in ['INTERFACE', 'LINK_INTERFACE_LIBRARIES', 'PUBLIC', 'LINK_PUBLIC']: + interface += i.split(';') + + if mode in ['PUBLIC', 'PRIVATE', 'LINK_PRIVATE']: + private += i.split(';') + + if paths: + interface = self._guess_files(interface) + private = self._guess_files(private) + + interface = [x for x in interface if x] + private = [x for x in private if x] + + for i in [(private_prop, private), (interface_prop, interface)]: + if not i[0] in self.targets[target].properties: + self.targets[target].properties[i[0]] = [] + + self.targets[target].properties[i[0]] += i[1] + + def _meson_ps_execute_delayed_calls(self, tline: CMakeTraceLine) -> None: + for l in self.stored_commands: + fn = self.functions.get(l.func, None) + if(fn): + fn(l) + + # clear the stored commands + self.stored_commands = [] + + def _meson_ps_reload_vars(self, tline: CMakeTraceLine) -> None: + self.delayed_commands = self.get_cmake_var('MESON_PS_DELAYED_CALLS') + + def _lex_trace_human(self, trace): + # The trace format is: '(): ( )\n' + reg_tline = re.compile(r'\s*(.*\.(cmake|txt))\(([0-9]+)\):\s*(\w+)\(([\s\S]*?) ?\)\s*\n', re.MULTILINE) + reg_other = re.compile(r'[^\n]*\n') + loc = 0 + while loc < len(trace): + mo_file_line = reg_tline.match(trace, loc) + if not mo_file_line: + skip_match = reg_other.match(trace, loc) + if not skip_match: + print(trace[loc:]) + raise CMakeException('Failed to parse CMake trace') + + loc = skip_match.end() + continue + + loc = mo_file_line.end() + + file = mo_file_line.group(1) + line = mo_file_line.group(3) + func = mo_file_line.group(4) + args = mo_file_line.group(5) + args = parse_generator_expressions(args) + args = args.split(' ') + args = list(map(lambda x: x.strip(), args)) + + yield CMakeTraceLine(file, line, func, args) + + def _lex_trace_json(self, trace: str): + lines = trace.splitlines(keepends=False) + lines.pop(0) # The first line is the version + for i in lines: + data = json.loads(i) + args = data['args'] + args = [parse_generator_expressions(x) for x in args] + yield CMakeTraceLine(data['file'], data['line'], data['cmd'], args) + + def _flatten_args(self, args: T.List[str]) -> T.List[str]: + # Split lists in arguments + res = [] # type: T.List[str] + for i in args: + res += i.split(';') + return res + + def _guess_files(self, broken_list: T.List[str]) -> T.List[str]: + # Nothing has to be done for newer formats + if self.trace_format != 'human': + return broken_list + + # Try joining file paths that contain spaces + + reg_start = re.compile(r'^([A-Za-z]:)?/(.*/)*[^./]+$') + reg_end = re.compile(r'^.*\.[a-zA-Z]+$') + + fixed_list = [] # type: T.List[str] + curr_str = None # type: T.Optional[str] + path_found = False # type: bool + + for i in broken_list: + if curr_str is None: + curr_str = i + path_found = False + elif os.path.isfile(curr_str): + # Abort concatenation if curr_str is an existing file + fixed_list += [curr_str] + curr_str = i + path_found = False + elif not reg_start.match(curr_str): + # Abort concatenation if curr_str no longer matches the regex + fixed_list += [curr_str] + curr_str = i + path_found = False + elif reg_end.match(i): + # File detected + curr_str = '{} {}'.format(curr_str, i) + fixed_list += [curr_str] + curr_str = None + path_found = False + elif os.path.exists('{} {}'.format(curr_str, i)): + # Path detected + curr_str = '{} {}'.format(curr_str, i) + path_found = True + elif path_found: + # Add path to fixed_list after ensuring the whole path is in curr_str + fixed_list += [curr_str] + curr_str = i + path_found = False + else: + curr_str = '{} {}'.format(curr_str, i) + path_found = False + + if curr_str: + fixed_list += [curr_str] + return fixed_list diff --git a/meson/mesonbuild/compilers/__init__.py b/meson/mesonbuild/compilers/__init__.py new file mode 100644 index 000000000..5b765a631 --- /dev/null +++ b/meson/mesonbuild/compilers/__init__.py @@ -0,0 +1,204 @@ +# Copyright 2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Public symbols for compilers sub-package when using 'from . import compilers' +__all__ = [ + 'Compiler', + + 'all_languages', + 'base_options', + 'clib_langs', + 'clink_langs', + 'c_suffixes', + 'cpp_suffixes', + 'get_base_compile_args', + 'get_base_link_args', + 'is_assembly', + 'is_header', + 'is_library', + 'is_llvm_ir', + 'is_object', + 'is_source', + 'is_known_suffix', + 'lang_suffixes', + 'sort_clink', + + 'AppleClangCCompiler', + 'AppleClangCPPCompiler', + 'AppleClangObjCCompiler', + 'AppleClangObjCPPCompiler', + 'ArmCCompiler', + 'ArmCPPCompiler', + 'ArmclangCCompiler', + 'ArmclangCPPCompiler', + 'CCompiler', + 'ClangCCompiler', + 'ClangCompiler', + 'ClangCPPCompiler', + 'ClangObjCCompiler', + 'ClangObjCPPCompiler', + 'ClangClCCompiler', + 'ClangClCPPCompiler', + 'CPPCompiler', + 'DCompiler', + 'DmdDCompiler', + 'FortranCompiler', + 'G95FortranCompiler', + 'GnuCCompiler', + 'ElbrusCCompiler', + 'EmscriptenCCompiler', + 'GnuCompiler', + 'GnuLikeCompiler', + 'GnuCPPCompiler', + 'ElbrusCPPCompiler', + 'EmscriptenCPPCompiler', + 'GnuDCompiler', + 'GnuFortranCompiler', + 'ElbrusFortranCompiler', + 'FlangFortranCompiler', + 'GnuObjCCompiler', + 'GnuObjCPPCompiler', + 'IntelGnuLikeCompiler', + 'IntelVisualStudioLikeCompiler', + 'IntelCCompiler', + 'IntelCPPCompiler', + 'IntelClCCompiler', + 'IntelClCPPCompiler', + 'IntelFortranCompiler', + 'IntelClFortranCompiler', + 'JavaCompiler', + 'LLVMDCompiler', + 'MonoCompiler', + 'CudaCompiler', + 'VisualStudioCsCompiler', + 'NAGFortranCompiler', + 'ObjCCompiler', + 'ObjCPPCompiler', + 'Open64FortranCompiler', + 'PathScaleFortranCompiler', + 'PGICCompiler', + 'PGICPPCompiler', + 'PGIFortranCompiler', + 'RustCompiler', + 'CcrxCCompiler', + 'CcrxCPPCompiler', + 'Xc16CCompiler', + 'C2000CCompiler', + 'C2000CPPCompiler', + 'SunFortranCompiler', + 'SwiftCompiler', + 'ValaCompiler', + 'VisualStudioLikeCompiler', + 'VisualStudioCCompiler', + 'VisualStudioCPPCompiler', +] + +# Bring symbols from each module into compilers sub-package namespace +from .compilers import ( + Compiler, + all_languages, + base_options, + clib_langs, + clink_langs, + c_suffixes, + cpp_suffixes, + get_base_compile_args, + get_base_link_args, + is_header, + is_source, + is_assembly, + is_llvm_ir, + is_object, + is_library, + is_known_suffix, + lang_suffixes, + sort_clink, +) +from .c import ( + CCompiler, + AppleClangCCompiler, + ArmCCompiler, + ArmclangCCompiler, + ClangCCompiler, + ClangClCCompiler, + GnuCCompiler, + ElbrusCCompiler, + EmscriptenCCompiler, + IntelCCompiler, + IntelClCCompiler, + PGICCompiler, + CcrxCCompiler, + Xc16CCompiler, + C2000CCompiler, + VisualStudioCCompiler, +) +from .cpp import ( + CPPCompiler, + AppleClangCPPCompiler, + ArmCPPCompiler, + ArmclangCPPCompiler, + ClangCPPCompiler, + ClangClCPPCompiler, + GnuCPPCompiler, + ElbrusCPPCompiler, + EmscriptenCPPCompiler, + IntelCPPCompiler, + IntelClCPPCompiler, + PGICPPCompiler, + CcrxCPPCompiler, + C2000CPPCompiler, + VisualStudioCPPCompiler, +) +from .cs import MonoCompiler, VisualStudioCsCompiler +from .d import ( + DCompiler, + DmdDCompiler, + GnuDCompiler, + LLVMDCompiler, +) +from .cuda import CudaCompiler +from .fortran import ( + FortranCompiler, + G95FortranCompiler, + GnuFortranCompiler, + ElbrusFortranCompiler, + FlangFortranCompiler, + IntelFortranCompiler, + IntelClFortranCompiler, + NAGFortranCompiler, + Open64FortranCompiler, + PathScaleFortranCompiler, + PGIFortranCompiler, + SunFortranCompiler, +) +from .java import JavaCompiler +from .objc import ( + ObjCCompiler, + AppleClangObjCCompiler, + ClangObjCCompiler, + GnuObjCCompiler, +) +from .objcpp import ( + ObjCPPCompiler, + AppleClangObjCPPCompiler, + ClangObjCPPCompiler, + GnuObjCPPCompiler, +) +from .rust import RustCompiler +from .swift import SwiftCompiler +from .vala import ValaCompiler +from .mixins.visualstudio import VisualStudioLikeCompiler +from .mixins.gnu import GnuCompiler, GnuLikeCompiler +from .mixins.intel import IntelGnuLikeCompiler, IntelVisualStudioLikeCompiler +from .mixins.clang import ClangCompiler diff --git a/meson/mesonbuild/compilers/c.py b/meson/mesonbuild/compilers/c.py new file mode 100644 index 000000000..aac99b426 --- /dev/null +++ b/meson/mesonbuild/compilers/c.py @@ -0,0 +1,561 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path +import typing as T + +from .. import coredata +from ..mesonlib import MachineChoice, MesonException, mlog, version_compare +from ..linkers import LinkerEnvVarsMixin +from .c_function_attributes import C_FUNC_ATTRIBUTES +from .mixins.clike import CLikeCompiler +from .mixins.ccrx import CcrxCompiler +from .mixins.xc16 import Xc16Compiler +from .mixins.c2000 import C2000Compiler +from .mixins.arm import ArmCompiler, ArmclangCompiler +from .mixins.visualstudio import MSVCCompiler, ClangClCompiler +from .mixins.gnu import GnuCompiler +from .mixins.intel import IntelGnuLikeCompiler, IntelVisualStudioLikeCompiler +from .mixins.clang import ClangCompiler +from .mixins.elbrus import ElbrusCompiler +from .mixins.pgi import PGICompiler +from .mixins.emscripten import EmscriptenMixin +from .compilers import ( + gnu_winlibs, + msvc_winlibs, + Compiler, +) + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + + +class CCompiler(CLikeCompiler, Compiler): + + @staticmethod + def attribute_check_func(name): + try: + return C_FUNC_ATTRIBUTES[name] + except KeyError: + raise MesonException('Unknown function attribute "{}"'.format(name)) + + language = 'c' + + def __init__(self, exelist, version, for_machine: MachineChoice, is_cross: bool, + info: 'MachineInfo', exe_wrapper: T.Optional[str] = None, **kwargs): + # If a child ObjC or CPP class has already set it, don't set it ourselves + Compiler.__init__(self, exelist, version, for_machine, info, **kwargs) + CLikeCompiler.__init__(self, is_cross, exe_wrapper) + + def get_no_stdinc_args(self): + return ['-nostdinc'] + + def sanity_check(self, work_dir, environment): + code = 'int main(void) { int class=0; return class; }\n' + return self.sanity_check_impl(work_dir, environment, 'sanitycheckc.c', code) + + def has_header_symbol(self, hname, symbol, prefix, env, *, extra_args=None, dependencies=None): + fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} + t = '''{prefix} + #include <{header}> + int main(void) {{ + /* If it's not defined as a macro, try to use as a symbol */ + #ifndef {symbol} + {symbol}; + #endif + return 0; + }}''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + +class ClangCCompiler(ClangCompiler, CCompiler): + + _C17_VERSION = '>=6.0.0' + _C18_VERSION = '>=8.0.0' + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines: T.Optional[T.List[str]] = None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrapper, **kwargs) + ClangCompiler.__init__(self, defines) + default_warn_args = ['-Wall', '-Winvalid-pch'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CCompiler.get_options(self) + c_stds = ['c89', 'c99', 'c11'] + g_stds = ['gnu89', 'gnu99', 'gnu11'] + # https://releases.llvm.org/6.0.0/tools/clang/docs/ReleaseNotes.html + # https://en.wikipedia.org/wiki/Xcode#Latest_versions + if version_compare(self.version, self._C17_VERSION): + c_stds += ['c17'] + g_stds += ['gnu17'] + if version_compare(self.version, self._C18_VERSION): + c_stds += ['c18'] + g_stds += ['gnu18'] + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none'] + c_stds + g_stds, + 'none', + ), + }) + if self.info.is_windows() or self.info.is_cygwin(): + opts.update({ + 'winlibs': coredata.UserArrayOption( + 'Standard Win libraries to link against', + gnu_winlibs, + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + return args + + def get_option_link_args(self, options): + if self.info.is_windows() or self.info.is_cygwin(): + return options['winlibs'].value[:] + return [] + + +class AppleClangCCompiler(ClangCCompiler): + + """Handle the differences between Apple Clang and Vanilla Clang. + + Right now this just handles the differences between the versions that new + C standards were added. + """ + + _C17_VERSION = '>=10.0.0' + _C18_VERSION = '>=11.0.0' + + +class EmscriptenCCompiler(EmscriptenMixin, LinkerEnvVarsMixin, ClangCCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross: bool, info: 'MachineInfo', exe_wrapper=None, **kwargs): + if not is_cross: + raise MesonException('Emscripten compiler can only be used for cross compilation.') + ClangCCompiler.__init__(self, exelist=exelist, version=version, + for_machine=for_machine, is_cross=is_cross, + info=info, exe_wrapper=exe_wrapper, **kwargs) + self.id = 'emscripten' + + +class ArmclangCCompiler(ArmclangCompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + ArmclangCompiler.__init__(self) + default_warn_args = ['-Wall', '-Winvalid-pch'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none', 'c90', 'c99', 'c11', 'gnu90', 'gnu99', 'gnu11'], + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + return args + + def get_option_link_args(self, options): + return [] + + +class GnuCCompiler(GnuCompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + GnuCompiler.__init__(self, defines) + default_warn_args = ['-Wall', '-Winvalid-pch'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CCompiler.get_options(self) + c_stds = ['c89', 'c99', 'c11'] + g_stds = ['gnu89', 'gnu99', 'gnu11'] + v = '>=8.0.0' + if version_compare(self.version, v): + c_stds += ['c17', 'c18'] + g_stds += ['gnu17', 'gnu18'] + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none'] + c_stds + g_stds, + 'none', + ), + }) + if self.info.is_windows() or self.info.is_cygwin(): + opts.update({ + 'winlibs': coredata.UserArrayOption( + 'Standard Win libraries to link against', + gnu_winlibs, + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + return args + + def get_option_link_args(self, options): + if self.info.is_windows() or self.info.is_cygwin(): + return options['winlibs'].value[:] + return [] + + def get_pch_use_args(self, pch_dir, header): + return ['-fpch-preprocess', '-include', os.path.basename(header)] + + +class PGICCompiler(PGICompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + PGICompiler.__init__(self) + + +class ElbrusCCompiler(GnuCCompiler, ElbrusCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines=None, **kwargs): + GnuCCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, defines, **kwargs) + ElbrusCompiler.__init__(self) + + # It does support some various ISO standards and c/gnu 90, 9x, 1x in addition to those which GNU CC supports. + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + [ + 'none', 'c89', 'c90', 'c9x', 'c99', 'c1x', 'c11', + 'gnu89', 'gnu90', 'gnu9x', 'gnu99', 'gnu1x', 'gnu11', + 'iso9899:2011', 'iso9899:1990', 'iso9899:199409', 'iso9899:1999', + ], + 'none', + ), + }) + return opts + + # Elbrus C compiler does not have lchmod, but there is only linker warning, not compiler error. + # So we should explicitly fail at this case. + def has_function(self, funcname, prefix, env, *, extra_args=None, dependencies=None): + if funcname == 'lchmod': + return False, False + else: + return super().has_function(funcname, prefix, env, + extra_args=extra_args, + dependencies=dependencies) + + +class IntelCCompiler(IntelGnuLikeCompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + IntelGnuLikeCompiler.__init__(self) + self.lang_header = 'c-header' + default_warn_args = ['-Wall', '-w3', '-diag-disable:remark'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra']} + + def get_options(self): + opts = CCompiler.get_options(self) + c_stds = ['c89', 'c99'] + g_stds = ['gnu89', 'gnu99'] + if version_compare(self.version, '>=16.0.0'): + c_stds += ['c11'] + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none'] + c_stds + g_stds, + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + return args + + +class VisualStudioLikeCCompilerMixin: + + """Shared methods that apply to MSVC-like C compilers.""" + + def get_options(self): + opts = super().get_options() + opts.update({ + 'winlibs': coredata.UserArrayOption( + 'Windows libs to link against.', + msvc_winlibs, + ), + }) + return opts + + def get_option_link_args(self, options): + return options['winlibs'].value[:] + + +class VisualStudioCCompiler(MSVCCompiler, VisualStudioLikeCCompilerMixin, CCompiler): + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, target: str, + **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + MSVCCompiler.__init__(self, target) + + +class ClangClCCompiler(ClangClCompiler, VisualStudioLikeCCompilerMixin, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, target, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + ClangClCompiler.__init__(self, target) + + +class IntelClCCompiler(IntelVisualStudioLikeCompiler, VisualStudioLikeCCompilerMixin, CCompiler): + + """Intel "ICL" compiler abstraction.""" + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, target, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + IntelVisualStudioLikeCompiler.__init__(self, target) + + def get_options(self): + opts = super().get_options() + c_stds = ['none', 'c89', 'c99', 'c11'] + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + c_stds, + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value == 'c89': + mlog.warning("ICL doesn't explicitly implement c89, setting the standard to 'none', which is close.", once=True) + elif std.value != 'none': + args.append('/Qstd:' + std.value) + return args + + +class ArmCCompiler(ArmCompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + ArmCompiler.__init__(self) + + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none', 'c90', 'c99'], + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('--' + std.value) + return args + + +class CcrxCCompiler(CcrxCompiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + CcrxCompiler.__init__(self) + + # Override CCompiler.get_always_args + def get_always_args(self): + return ['-nologo'] + + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({ + 'std': coredata.UserComboOption( + 'C language standard to use', + ['none', 'c89', 'c99'], + 'none', + ), + }) + return opts + + def get_no_stdinc_args(self): + return [] + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value == 'c89': + args.append('-lang=c') + elif std.value == 'c99': + args.append('-lang=c99') + return args + + def get_compile_only_args(self): + return [] + + def get_no_optimization_args(self): + return ['-optimize=0'] + + def get_output_args(self, target): + return ['-output=obj=%s' % target] + + def get_werror_args(self): + return ['-change_message=error'] + + def get_include_args(self, path, is_system): + if path == '': + path = '.' + return ['-include=' + path] + + +class Xc16CCompiler(Xc16Compiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + Xc16Compiler.__init__(self) + + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({'c_std': coredata.UserComboOption('C language standard to use', + ['none', 'c89', 'c99', 'gnu89', 'gnu99'], + 'none')}) + return opts + + def get_no_stdinc_args(self): + return [] + + def get_option_compile_args(self, options): + args = [] + std = options['c_std'] + if std.value != 'none': + args.append('-ansi') + args.append('-std=' + std.value) + return args + + def get_compile_only_args(self): + return [] + + def get_no_optimization_args(self): + return ['-O0'] + + def get_output_args(self, target): + return ['-o%s' % target] + + def get_werror_args(self): + return ['-change_message=error'] + + def get_include_args(self, path, is_system): + if path == '': + path = '.' + return ['-I' + path] + + +class C2000CCompiler(C2000Compiler, CCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + C2000Compiler.__init__(self) + + # Override CCompiler.get_always_args + def get_always_args(self): + return [] + + def get_options(self): + opts = CCompiler.get_options(self) + opts.update({'c_std': coredata.UserComboOption('C language standard to use', + ['none', 'c89', 'c99', 'c11'], + 'none')}) + return opts + + def get_no_stdinc_args(self): + return [] + + def get_option_compile_args(self, options): + args = [] + std = options['c_std'] + if std.value != 'none': + args.append('--' + std.value) + return args + + def get_compile_only_args(self): + return [] + + def get_no_optimization_args(self): + return ['-Ooff'] + + def get_output_args(self, target): + return ['--output_file=%s' % target] + + def get_werror_args(self): + return ['-change_message=error'] + + def get_include_args(self, path, is_system): + if path == '': + path = '.' + return ['--include_path=' + path] diff --git a/meson/mesonbuild/compilers/c_function_attributes.py b/meson/mesonbuild/compilers/c_function_attributes.py new file mode 100644 index 000000000..f31229e09 --- /dev/null +++ b/meson/mesonbuild/compilers/c_function_attributes.py @@ -0,0 +1,132 @@ +# These functions are based on the following code: +# https://git.savannah.gnu.org/gitweb/?p=autoconf-archive.git;a=blob_plain;f=m4/ax_gcc_func_attribute.m4, +# which is licensed under the following terms: +# +# Copyright (c) 2013 Gabriele Svelto +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. +# + +C_FUNC_ATTRIBUTES = { + 'alias': ''' + int foo(void) { return 0; } + int bar(void) __attribute__((alias("foo")));''', + 'aligned': + 'int foo(void) __attribute__((aligned(32)));', + 'alloc_size': + 'void *foo(int a) __attribute__((alloc_size(1)));', + 'always_inline': + 'inline __attribute__((always_inline)) int foo(void) { return 0; }', + 'artificial': + 'inline __attribute__((artificial)) int foo(void) { return 0; }', + 'cold': + 'int foo(void) __attribute__((cold));', + 'const': + 'int foo(void) __attribute__((const));', + 'constructor': + 'int foo(void) __attribute__((constructor));', + 'constructor_priority': + 'int foo( void ) __attribute__((__constructor__(65535/2)));', + 'deprecated': + 'int foo(void) __attribute__((deprecated("")));', + 'destructor': + 'int foo(void) __attribute__((destructor));', + 'dllexport': + '__declspec(dllexport) int foo(void) { return 0; }', + 'dllimport': + '__declspec(dllimport) int foo(void);', + 'error': + 'int foo(void) __attribute__((error("")));', + 'externally_visible': + 'int foo(void) __attribute__((externally_visible));', + 'fallthrough': ''' + int foo( void ) { + switch (0) { + case 1: __attribute__((fallthrough)); + case 2: break; + } + return 0; + };''', + 'flatten': + 'int foo(void) __attribute__((flatten));', + 'format': + 'int foo(const char * p, ...) __attribute__((format(printf, 1, 2)));', + 'format_arg': + 'char * foo(const char * p) __attribute__((format_arg(1)));', + 'force_align_arg_pointer': + '__attribute__((force_align_arg_pointer)) int foo(void) { return 0; }', + 'gnu_inline': + 'inline __attribute__((gnu_inline)) int foo(void) { return 0; }', + 'hot': + 'int foo(void) __attribute__((hot));', + 'ifunc': + ('int my_foo(void) { return 0; }' + 'static int (*resolve_foo(void))(void) { return my_foo; }' + 'int foo(void) __attribute__((ifunc("resolve_foo")));'), + 'leaf': + '__attribute__((leaf)) int foo(void) { return 0; }', + 'malloc': + 'int *foo(void) __attribute__((malloc));', + 'noclone': + 'int foo(void) __attribute__((noclone));', + 'noinline': + '__attribute__((noinline)) int foo(void) { return 0; }', + 'nonnull': + 'int foo(char * p) __attribute__((nonnull(1)));', + 'noreturn': + 'int foo(void) __attribute__((noreturn));', + 'nothrow': + 'int foo(void) __attribute__((nothrow));', + 'optimize': + '__attribute__((optimize(3))) int foo(void) { return 0; }', + 'packed': + 'struct __attribute__((packed)) foo { int bar; };', + 'pure': + 'int foo(void) __attribute__((pure));', + 'returns_nonnull': + 'int *foo(void) __attribute__((returns_nonnull));', + 'unused': + 'int foo(void) __attribute__((unused));', + 'used': + 'int foo(void) __attribute__((used));', + 'visibility': ''' + int foo_def(void) __attribute__((visibility("default"))); + int foo_hid(void) __attribute__((visibility("hidden"))); + int foo_int(void) __attribute__((visibility("internal")));''', + 'visibility:default': + 'int foo(void) __attribute__((visibility("default")));', + 'visibility:hidden': + 'int foo(void) __attribute__((visibility("hidden")));', + 'visibility:internal': + 'int foo(void) __attribute__((visibility("internal")));', + 'visibility:protected': + 'int foo(void) __attribute__((visibility("protected")));', + 'warning': + 'int foo(void) __attribute__((warning("")));', + 'warn_unused_result': + 'int foo(void) __attribute__((warn_unused_result));', + 'weak': + 'int foo(void) __attribute__((weak));', + 'weakref': ''' + static int foo(void) { return 0; } + static int var(void) __attribute__((weakref("foo")));''', +} + +CXX_FUNC_ATTRIBUTES = { + # Alias must be applied to the mangled name in C++ + 'alias': + ('extern "C" {' + 'int foo(void) { return 0; }' + '}' + 'int bar(void) __attribute__((alias("foo")));' + ), + 'ifunc': + ('extern "C" {' + 'int my_foo(void) { return 0; }' + 'static int (*resolve_foo(void))(void) { return my_foo; }' + '}' + 'int foo(void) __attribute__((ifunc("resolve_foo")));'), +} diff --git a/meson/mesonbuild/compilers/compilers.py b/meson/mesonbuild/compilers/compilers.py new file mode 100644 index 000000000..0de59a4da --- /dev/null +++ b/meson/mesonbuild/compilers/compilers.py @@ -0,0 +1,982 @@ +# Copyright 2012-2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import contextlib, os.path, re, tempfile +import itertools +import typing as T +from functools import lru_cache + +from .. import coredata +from .. import mlog +from .. import mesonlib +from ..linkers import LinkerEnvVarsMixin +from ..mesonlib import ( + EnvironmentException, MachineChoice, MesonException, + Popen_safe, split_args +) +from ..envconfig import ( + Properties, get_env_var +) +from ..arglist import CompilerArgs + +if T.TYPE_CHECKING: + from ..coredata import OptionDictType + from ..envconfig import MachineInfo + from ..environment import Environment + from ..linkers import DynamicLinker # noqa: F401 + + CompilerType = T.TypeVar('CompilerType', bound=Compiler) + +"""This file contains the data files of all compilers Meson knows +about. To support a new compiler, add its information below. +Also add corresponding autodetection code in environment.py.""" + +header_suffixes = ('h', 'hh', 'hpp', 'hxx', 'H', 'ipp', 'moc', 'vapi', 'di') +obj_suffixes = ('o', 'obj', 'res') +lib_suffixes = ('a', 'lib', 'dll', 'dll.a', 'dylib', 'so') +# Mapping of language to suffixes of files that should always be in that language +# This means we can't include .h headers here since they could be C, C++, ObjC, etc. +lang_suffixes = { + 'c': ('c',), + 'cpp': ('cpp', 'cc', 'cxx', 'c++', 'hh', 'hpp', 'ipp', 'hxx', 'ino'), + 'cuda': ('cu',), + # f90, f95, f03, f08 are for free-form fortran ('f90' recommended) + # f, for, ftn, fpp are for fixed-form fortran ('f' or 'for' recommended) + 'fortran': ('f90', 'f95', 'f03', 'f08', 'f', 'for', 'ftn', 'fpp'), + 'd': ('d', 'di'), + 'objc': ('m',), + 'objcpp': ('mm',), + 'rust': ('rs',), + 'vala': ('vala', 'vapi', 'gs'), + 'cs': ('cs',), + 'swift': ('swift',), + 'java': ('java',), +} +all_languages = lang_suffixes.keys() +cpp_suffixes = lang_suffixes['cpp'] + ('h',) +c_suffixes = lang_suffixes['c'] + ('h',) +# List of languages that by default consume and output libraries following the +# C ABI; these can generally be used interchangebly +clib_langs = ('objcpp', 'cpp', 'objc', 'c', 'fortran',) +# List of languages that can be linked with C code directly by the linker +# used in build.py:process_compilers() and build.py:get_dynamic_linker() +clink_langs = ('d', 'cuda') + clib_langs +clink_suffixes = () +for _l in clink_langs + ('vala',): + clink_suffixes += lang_suffixes[_l] +clink_suffixes += ('h', 'll', 's') +all_suffixes = set(itertools.chain(*lang_suffixes.values(), clink_suffixes)) + +# Languages that should use LDFLAGS arguments when linking. +languages_using_ldflags = {'objcpp', 'cpp', 'objc', 'c', 'fortran', 'd', 'cuda'} +# Languages that should use CPPFLAGS arguments when linking. +languages_using_cppflags = {'c', 'cpp', 'objc', 'objcpp'} +soregex = re.compile(r'.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$') + +# Environment variables that each lang uses. +cflags_mapping = {'c': 'CFLAGS', + 'cpp': 'CXXFLAGS', + 'cuda': 'CUFLAGS', + 'objc': 'OBJCFLAGS', + 'objcpp': 'OBJCXXFLAGS', + 'fortran': 'FFLAGS', + 'd': 'DFLAGS', + 'vala': 'VALAFLAGS', + 'rust': 'RUSTFLAGS'} + +# All these are only for C-linkable languages; see `clink_langs` above. + +def sort_clink(lang): + ''' + Sorting function to sort the list of languages according to + reversed(compilers.clink_langs) and append the unknown langs in the end. + The purpose is to prefer C over C++ for files that can be compiled by + both such as assembly, C, etc. Also applies to ObjC, ObjC++, etc. + ''' + if lang not in clink_langs: + return 1 + return -clink_langs.index(lang) + +def is_header(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + suffix = fname.split('.')[-1] + return suffix in header_suffixes + +def is_source(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + suffix = fname.split('.')[-1].lower() + return suffix in clink_suffixes + +def is_assembly(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + return fname.split('.')[-1].lower() == 's' + +def is_llvm_ir(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + return fname.split('.')[-1] == 'll' + +@lru_cache(maxsize=None) +def cached_by_name(fname): + suffix = fname.split('.')[-1] + return suffix in obj_suffixes + +def is_object(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + return cached_by_name(fname) + +def is_library(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + + if soregex.match(fname): + return True + + suffix = fname.split('.')[-1] + return suffix in lib_suffixes + +def is_known_suffix(fname): + if hasattr(fname, 'fname'): + fname = fname.fname + suffix = fname.split('.')[-1] + + return suffix in all_suffixes + +cuda_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + } +java_buildtype_args = {'plain': [], + 'debug': ['-g'], + 'debugoptimized': ['-g'], + 'release': [], + 'minsize': [], + 'custom': [], + } + +rust_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], + } + +d_gdc_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': ['-finline-functions'], + 'release': ['-finline-functions'], + 'minsize': [], + 'custom': [], + } + +d_ldc_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': ['-enable-inlining', '-Hkeep-all-bodies'], + 'release': ['-enable-inlining', '-Hkeep-all-bodies'], + 'minsize': [], + 'custom': [], + } + +d_dmd_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': ['-inline'], + 'release': ['-inline'], + 'minsize': [], + 'custom': [], + } + +mono_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': ['-optimize+'], + 'release': ['-optimize+'], + 'minsize': [], + 'custom': [], + } + +swift_buildtype_args = {'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], + } + +gnu_winlibs = ['-lkernel32', '-luser32', '-lgdi32', '-lwinspool', '-lshell32', + '-lole32', '-loleaut32', '-luuid', '-lcomdlg32', '-ladvapi32'] + +msvc_winlibs = ['kernel32.lib', 'user32.lib', 'gdi32.lib', + 'winspool.lib', 'shell32.lib', 'ole32.lib', 'oleaut32.lib', + 'uuid.lib', 'comdlg32.lib', 'advapi32.lib'] + +clike_optimization_args = {'0': [], + 'g': [], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-Os'], + } + +cuda_optimization_args = {'0': [], + 'g': ['-O0'], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-O3'] + } + +cuda_debug_args = {False: [], + True: ['-g']} + +clike_debug_args = {False: [], + True: ['-g']} + +base_options = {'b_pch': coredata.UserBooleanOption('Use precompiled headers', True), + 'b_lto': coredata.UserBooleanOption('Use link time optimization', False), + 'b_sanitize': coredata.UserComboOption('Code sanitizer to use', + ['none', 'address', 'thread', 'undefined', 'memory', 'address,undefined'], + 'none'), + 'b_lundef': coredata.UserBooleanOption('Use -Wl,--no-undefined when linking', True), + 'b_asneeded': coredata.UserBooleanOption('Use -Wl,--as-needed when linking', True), + 'b_pgo': coredata.UserComboOption('Use profile guided optimization', + ['off', 'generate', 'use'], + 'off'), + 'b_coverage': coredata.UserBooleanOption('Enable coverage tracking.', + False), + 'b_colorout': coredata.UserComboOption('Use colored output', + ['auto', 'always', 'never'], + 'always'), + 'b_ndebug': coredata.UserComboOption('Disable asserts', + ['true', 'false', 'if-release'], 'false'), + 'b_staticpic': coredata.UserBooleanOption('Build static libraries as position independent', + True), + 'b_pie': coredata.UserBooleanOption('Build executables as position independent', + False), + 'b_bitcode': coredata.UserBooleanOption('Generate and embed bitcode (only macOS/iOS/tvOS)', + False), + 'b_vscrt': coredata.UserComboOption('VS run-time library type to use.', + ['none', 'md', 'mdd', 'mt', 'mtd', 'from_buildtype'], + 'from_buildtype'), + } + +def option_enabled(boptions, options, option): + try: + if option not in boptions: + return False + return options[option].value + except KeyError: + return False + +def get_base_compile_args(options, compiler): + args = [] + try: + if options['b_lto'].value: + args.extend(compiler.get_lto_compile_args()) + except KeyError: + pass + try: + args += compiler.get_colorout_args(options['b_colorout'].value) + except KeyError: + pass + try: + args += compiler.sanitizer_compile_args(options['b_sanitize'].value) + except KeyError: + pass + try: + pgo_val = options['b_pgo'].value + if pgo_val == 'generate': + args.extend(compiler.get_profile_generate_args()) + elif pgo_val == 'use': + args.extend(compiler.get_profile_use_args()) + except KeyError: + pass + try: + if options['b_coverage'].value: + args += compiler.get_coverage_args() + except KeyError: + pass + try: + if (options['b_ndebug'].value == 'true' or + (options['b_ndebug'].value == 'if-release' and + options['buildtype'].value in {'release', 'plain'})): + args += compiler.get_disable_assert_args() + except KeyError: + pass + # This does not need a try...except + if option_enabled(compiler.base_options, options, 'b_bitcode'): + args.append('-fembed-bitcode') + try: + crt_val = options['b_vscrt'].value + buildtype = options['buildtype'].value + try: + args += compiler.get_crt_compile_args(crt_val, buildtype) + except AttributeError: + pass + except KeyError: + pass + return args + +def get_base_link_args(options, linker, is_shared_module): + args = [] + try: + if options['b_lto'].value: + args.extend(linker.get_lto_link_args()) + except KeyError: + pass + try: + args += linker.sanitizer_link_args(options['b_sanitize'].value) + except KeyError: + pass + try: + pgo_val = options['b_pgo'].value + if pgo_val == 'generate': + args.extend(linker.get_profile_generate_args()) + elif pgo_val == 'use': + args.extend(linker.get_profile_use_args()) + except KeyError: + pass + try: + if options['b_coverage'].value: + args += linker.get_coverage_link_args() + except KeyError: + pass + + as_needed = option_enabled(linker.base_options, options, 'b_asneeded') + bitcode = option_enabled(linker.base_options, options, 'b_bitcode') + # Shared modules cannot be built with bitcode_bundle because + # -bitcode_bundle is incompatible with -undefined and -bundle + if bitcode and not is_shared_module: + args.extend(linker.bitcode_args()) + elif as_needed: + # -Wl,-dead_strip_dylibs is incompatible with bitcode + args.extend(linker.get_asneeded_args()) + + # Apple's ld (the only one that supports bitcode) does not like -undefined + # arguments or -headerpad_max_install_names when bitcode is enabled + if not bitcode: + args.extend(linker.headerpad_args()) + if (not is_shared_module and + option_enabled(linker.base_options, options, 'b_lundef')): + args.extend(linker.no_undefined_link_args()) + else: + args.extend(linker.get_allow_undefined_link_args()) + + try: + crt_val = options['b_vscrt'].value + buildtype = options['buildtype'].value + try: + args += linker.get_crt_link_args(crt_val, buildtype) + except AttributeError: + pass + except KeyError: + pass + return args + + +class CrossNoRunException(MesonException): + pass + +class RunResult: + def __init__(self, compiled, returncode=999, stdout='UNDEFINED', stderr='UNDEFINED'): + self.compiled = compiled + self.returncode = returncode + self.stdout = stdout + self.stderr = stderr + + +class Compiler(metaclass=abc.ABCMeta): + # Libraries to ignore in find_library() since they are provided by the + # compiler or the C library. Currently only used for MSVC. + ignore_libs = () + # Libraries that are internal compiler implementations, and must not be + # manually searched. + internal_libs = () + + LINKER_PREFIX = None # type: T.Union[None, str, T.List[str]] + INVOKES_LINKER = True + + def __init__(self, exelist, version, for_machine: MachineChoice, info: 'MachineInfo', + linker: T.Optional['DynamicLinker'] = None, **kwargs): + if isinstance(exelist, str): + self.exelist = [exelist] + elif isinstance(exelist, list): + self.exelist = exelist + else: + raise TypeError('Unknown argument to Compiler') + # In case it's been overridden by a child class already + if not hasattr(self, 'file_suffixes'): + self.file_suffixes = lang_suffixes[self.language] + if not hasattr(self, 'can_compile_suffixes'): + self.can_compile_suffixes = set(self.file_suffixes) + self.default_suffix = self.file_suffixes[0] + self.version = version + if 'full_version' in kwargs: + self.full_version = kwargs['full_version'] + else: + self.full_version = None + self.for_machine = for_machine + self.base_options = [] + self.linker = linker + self.info = info + + def __repr__(self): + repr_str = "<{0}: v{1} `{2}`>" + return repr_str.format(self.__class__.__name__, self.version, + ' '.join(self.exelist)) + + @lru_cache(maxsize=None) + def can_compile(self, src) -> bool: + if hasattr(src, 'fname'): + src = src.fname + suffix = os.path.splitext(src)[1].lower() + if suffix and suffix[1:] in self.can_compile_suffixes: + return True + return False + + def get_id(self) -> str: + return self.id + + def get_linker_id(self) -> str: + # There is not guarantee that we have a dynamic linker instance, as + # some languages don't have separate linkers and compilers. In those + # cases return the compiler id + try: + return self.linker.id + except AttributeError: + return self.id + + def get_version_string(self) -> str: + details = [self.id, self.version] + if self.full_version: + details += ['"%s"' % (self.full_version)] + return '(%s)' % (' '.join(details)) + + def get_language(self) -> str: + return self.language + + @classmethod + def get_display_language(cls) -> str: + return cls.language.capitalize() + + def get_default_suffix(self) -> str: + return self.default_suffix + + def get_define(self, dname, prefix, env, extra_args, dependencies) -> T.Tuple[str, bool]: + raise EnvironmentException('%s does not support get_define ' % self.get_id()) + + def compute_int(self, expression, low, high, guess, prefix, env, extra_args, dependencies) -> int: + raise EnvironmentException('%s does not support compute_int ' % self.get_id()) + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + raise EnvironmentException('%s does not support compute_parameters_with_absolute_paths ' % self.get_id()) + + def has_members(self, typename, membernames, prefix, env, *, + extra_args=None, dependencies=None) -> T.Tuple[bool, bool]: + raise EnvironmentException('%s does not support has_member(s) ' % self.get_id()) + + def has_type(self, typename, prefix, env, extra_args, *, + dependencies=None) -> T.Tuple[bool, bool]: + raise EnvironmentException('%s does not support has_type ' % self.get_id()) + + def symbols_have_underscore_prefix(self, env) -> bool: + raise EnvironmentException('%s does not support symbols_have_underscore_prefix ' % self.get_id()) + + def get_exelist(self): + return self.exelist[:] + + def get_linker_exelist(self) -> T.List[str]: + return self.linker.get_exelist() + + def get_linker_output_args(self, outputname: str) -> T.List[str]: + return self.linker.get_output_args(outputname) + + def get_builtin_define(self, *args, **kwargs): + raise EnvironmentException('%s does not support get_builtin_define.' % self.id) + + def has_builtin_define(self, *args, **kwargs): + raise EnvironmentException('%s does not support has_builtin_define.' % self.id) + + def get_always_args(self): + return [] + + def can_linker_accept_rsp(self) -> bool: + """ + Determines whether the linker can accept arguments using the @rsp syntax. + """ + return self.linker.get_accepts_rsp() + + def get_linker_always_args(self) -> T.List[str]: + return self.linker.get_always_args() + + def get_linker_lib_prefix(self): + return self.linker.get_lib_prefix() + + def gen_import_library_args(self, implibname): + """ + Used only on Windows for libraries that need an import library. + This currently means C, C++, Fortran. + """ + return [] + + def get_linker_args_from_envvars(self, + for_machine: MachineChoice, + is_cross: bool) -> T.List[str]: + return self.linker.get_args_from_envvars(for_machine, is_cross) + + def get_options(self) -> T.Dict[str, coredata.UserOption]: + return {} + + def get_option_compile_args(self, options): + return [] + + def get_option_link_args(self, options: 'OptionDictType') -> T.List[str]: + return self.linker.get_option_args(options) + + def check_header(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support header checks.' % self.get_display_language()) + + def has_header(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support header checks.' % self.get_display_language()) + + def has_header_symbol(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support header symbol checks.' % self.get_display_language()) + + def compiles(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support compile checks.' % self.get_display_language()) + + def links(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support link checks.' % self.get_display_language()) + + def run(self, *args, **kwargs) -> RunResult: + raise EnvironmentException('Language %s does not support run checks.' % self.get_display_language()) + + def sizeof(self, *args, **kwargs) -> int: + raise EnvironmentException('Language %s does not support sizeof checks.' % self.get_display_language()) + + def alignment(self, *args, **kwargs) -> int: + raise EnvironmentException('Language %s does not support alignment checks.' % self.get_display_language()) + + def has_function(self, *args, **kwargs) -> T.Tuple[bool, bool]: + raise EnvironmentException('Language %s does not support function checks.' % self.get_display_language()) + + @classmethod + def unix_args_to_native(cls, args): + "Always returns a copy that can be independently mutated" + return args[:] + + @classmethod + def native_args_to_unix(cls, args: T.List[str]) -> T.List[str]: + "Always returns a copy that can be independently mutated" + return args[:] + + def find_library(self, *args, **kwargs): + raise EnvironmentException('Language {} does not support library finding.'.format(self.get_display_language())) + + def get_library_dirs(self, *args, **kwargs): + return () + + def get_program_dirs(self, *args, **kwargs): + return [] + + def has_multi_arguments(self, args, env) -> T.Tuple[bool, bool]: + raise EnvironmentException( + 'Language {} does not support has_multi_arguments.'.format( + self.get_display_language())) + + def has_multi_link_arguments(self, args: T.List[str], env: 'Environment') -> T.Tuple[bool, bool]: + return self.linker.has_multi_arguments(args, env) + + def _get_compile_output(self, dirname, mode): + # In pre-processor mode, the output is sent to stdout and discarded + if mode == 'preprocess': + return None + # Extension only matters if running results; '.exe' is + # guaranteed to be executable on every platform. + if mode == 'link': + suffix = 'exe' + else: + suffix = 'obj' + return os.path.join(dirname, 'output.' + suffix) + + def get_compiler_args_for_mode(self, mode): + args = [] + args += self.get_always_args() + if mode == 'compile': + args += self.get_compile_only_args() + if mode == 'preprocess': + args += self.get_preprocess_only_args() + return args + + def compiler_args(self, args: T.Optional[T.Iterable[str]] = None) -> CompilerArgs: + """Return an appropriate CompilerArgs instance for this class.""" + return CompilerArgs(self, args) + + @contextlib.contextmanager + def compile(self, code: str, extra_args: list = None, *, mode: str = 'link', want_output: bool = False, temp_dir: str = None): + if extra_args is None: + extra_args = [] + try: + with tempfile.TemporaryDirectory(dir=temp_dir) as tmpdirname: + no_ccache = False + if isinstance(code, str): + srcname = os.path.join(tmpdirname, + 'testfile.' + self.default_suffix) + with open(srcname, 'w') as ofile: + ofile.write(code) + # ccache would result in a cache miss + no_ccache = True + elif isinstance(code, mesonlib.File): + srcname = code.fname + + # Construct the compiler command-line + commands = self.compiler_args() + commands.append(srcname) + # Preprocess mode outputs to stdout, so no output args + if mode != 'preprocess': + output = self._get_compile_output(tmpdirname, mode) + commands += self.get_output_args(output) + commands.extend(self.get_compiler_args_for_mode(mode)) + # extra_args must be last because it could contain '/link' to + # pass args to VisualStudio's linker. In that case everything + # in the command line after '/link' is given to the linker. + commands += extra_args + # Generate full command-line with the exelist + commands = self.get_exelist() + commands.to_native() + mlog.debug('Running compile:') + mlog.debug('Working directory: ', tmpdirname) + mlog.debug('Command line: ', ' '.join(commands), '\n') + mlog.debug('Code:\n', code) + os_env = os.environ.copy() + os_env['LC_ALL'] = 'C' + if no_ccache: + os_env['CCACHE_DISABLE'] = '1' + p, p.stdo, p.stde = Popen_safe(commands, cwd=tmpdirname, env=os_env) + mlog.debug('Compiler stdout:\n', p.stdo) + mlog.debug('Compiler stderr:\n', p.stde) + p.commands = commands + p.input_name = srcname + if want_output: + p.output_name = output + p.cached = False # Make sure that the cached attribute always exists + yield p + except OSError: + # On Windows antivirus programs and the like hold on to files so + # they can't be deleted. There's not much to do in this case. Also, + # catch OSError because the directory is then no longer empty. + pass + + @contextlib.contextmanager + def cached_compile(self, code, cdata: coredata.CoreData, *, extra_args=None, mode: str = 'link', temp_dir=None): + assert(isinstance(cdata, coredata.CoreData)) + + # Calculate the key + textra_args = tuple(extra_args) if extra_args is not None else None + key = (tuple(self.exelist), self.version, code, textra_args, mode) + + # Check if not cached + if key not in cdata.compiler_check_cache: + with self.compile(code, extra_args=extra_args, mode=mode, want_output=False, temp_dir=temp_dir) as p: + # Remove all attributes except the following + # This way the object can be serialized + tokeep = ['args', 'commands', 'input_name', 'output_name', + 'pid', 'returncode', 'stdo', 'stde', 'text_mode'] + todel = [x for x in vars(p).keys() if x not in tokeep] + for i in todel: + delattr(p, i) + p.cached = False + cdata.compiler_check_cache[key] = p + yield p + return + + # Return cached + p = cdata.compiler_check_cache[key] + p.cached = True + mlog.debug('Using cached compile:') + mlog.debug('Cached command line: ', ' '.join(p.commands), '\n') + mlog.debug('Code:\n', code) + mlog.debug('Cached compiler stdout:\n', p.stdo) + mlog.debug('Cached compiler stderr:\n', p.stde) + yield p + + def get_colorout_args(self, colortype): + return [] + + # Some compilers (msvc) write debug info to a separate file. + # These args specify where it should be written. + def get_compile_debugfile_args(self, rel_obj, **kwargs): + return [] + + def get_link_debugfile_name(self, targetfile: str) -> str: + return self.linker.get_debugfile_name(targetfile) + + def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: + return self.linker.get_debugfile_args(targetfile) + + def get_std_shared_lib_link_args(self) -> T.List[str]: + return self.linker.get_std_shared_lib_args() + + def get_std_shared_module_link_args(self, options: 'OptionDictType') -> T.List[str]: + return self.linker.get_std_shared_module_args(options) + + def get_link_whole_for(self, args: T.List[str]) -> T.List[str]: + return self.linker.get_link_whole_for(args) + + def get_allow_undefined_link_args(self) -> T.List[str]: + return self.linker.get_allow_undefined_args() + + def no_undefined_link_args(self) -> T.List[str]: + return self.linker.no_undefined_args() + + # Compiler arguments needed to enable the given instruction set. + # May be [] meaning nothing needed or None meaning the given set + # is not supported. + def get_instruction_set_args(self, instruction_set): + return None + + def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, + rpath_paths: str, build_rpath: str, + install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: + return self.linker.build_rpath_args( + env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) + + def thread_flags(self, env): + return [] + + def openmp_flags(self): + raise EnvironmentException('Language %s does not support OpenMP flags.' % self.get_display_language()) + + def openmp_link_flags(self): + return self.openmp_flags() + + def language_stdlib_only_link_flags(self): + return [] + + def gnu_symbol_visibility_args(self, vistype): + return [] + + def get_gui_app_args(self, value): + return [] + + def has_func_attribute(self, name, env): + raise EnvironmentException( + 'Language {} does not support function attributes.'.format(self.get_display_language())) + + def get_pic_args(self): + m = 'Language {} does not support position-independent code' + raise EnvironmentException(m.format(self.get_display_language())) + + def get_pie_args(self): + m = 'Language {} does not support position-independent executable' + raise EnvironmentException(m.format(self.get_display_language())) + + def get_pie_link_args(self) -> T.List[str]: + return self.linker.get_pie_args() + + def get_argument_syntax(self): + """Returns the argument family type. + + Compilers fall into families if they try to emulate the command line + interface of another compiler. For example, clang is in the GCC family + since it accepts most of the same arguments as GCC. ICL (ICC on + windows) is in the MSVC family since it accepts most of the same + arguments as MSVC. + """ + return 'other' + + def get_profile_generate_args(self): + raise EnvironmentException( + '%s does not support get_profile_generate_args ' % self.get_id()) + + def get_profile_use_args(self): + raise EnvironmentException( + '%s does not support get_profile_use_args ' % self.get_id()) + + def get_undefined_link_args(self) -> T.List[str]: + return self.linker.get_undefined_link_args() + + def remove_linkerlike_args(self, args): + rm_exact = ('-headerpad_max_install_names',) + rm_prefixes = ('-Wl,', '-L',) + rm_next = ('-L', '-framework',) + ret = [] + iargs = iter(args) + for arg in iargs: + # Remove this argument + if arg in rm_exact: + continue + # If the argument starts with this, but is not *exactly* this + # f.ex., '-L' should match ['-Lfoo'] but not ['-L', 'foo'] + if arg.startswith(rm_prefixes) and arg not in rm_prefixes: + continue + # Ignore this argument and the one after it + if arg in rm_next: + next(iargs) + continue + ret.append(arg) + return ret + + def get_lto_compile_args(self) -> T.List[str]: + return [] + + def get_lto_link_args(self) -> T.List[str]: + return self.linker.get_lto_args() + + def sanitizer_compile_args(self, value: str) -> T.List[str]: + return [] + + def sanitizer_link_args(self, value: str) -> T.List[str]: + return self.linker.sanitizer_args(value) + + def get_asneeded_args(self) -> T.List[str]: + return self.linker.get_asneeded_args() + + def headerpad_args(self) -> T.List[str]: + return self.linker.headerpad_args() + + def bitcode_args(self) -> T.List[str]: + return self.linker.bitcode_args() + + def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]: + return self.linker.get_buildtype_args(buildtype) + + def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, + suffix: str, soversion: str, + darwin_versions: T.Tuple[str, str], + is_shared_module: bool) -> T.List[str]: + return self.linker.get_soname_args( + env, prefix, shlib_name, suffix, soversion, + darwin_versions, is_shared_module) + + def get_target_link_args(self, target): + return target.link_args + + def get_dependency_compile_args(self, dep): + return dep.get_compile_args() + + def get_dependency_link_args(self, dep): + return dep.get_link_args() + + @classmethod + def use_linker_args(cls, linker: str) -> T.List[str]: + """Get a list of arguments to pass to the compiler to set the linker. + """ + return [] + + def get_coverage_link_args(self) -> T.List[str]: + return self.linker.get_coverage_args() + + def get_disable_assert_args(self) -> T.List[str]: + return [] + + +def get_largefile_args(compiler): + ''' + Enable transparent large-file-support for 32-bit UNIX systems + ''' + if not (compiler.get_argument_syntax() == 'msvc' or compiler.info.is_darwin()): + # Enable large-file support unconditionally on all platforms other + # than macOS and MSVC. macOS is now 64-bit-only so it doesn't + # need anything special, and MSVC doesn't have automatic LFS. + # You must use the 64-bit counterparts explicitly. + # glibc, musl, and uclibc, and all BSD libcs support this. On Android, + # support for transparent LFS is available depending on the version of + # Bionic: https://github.com/android/platform_bionic#32-bit-abi-bugs + # https://code.google.com/p/android/issues/detail?id=64613 + # + # If this breaks your code, fix it! It's been 20+ years! + return ['-D_FILE_OFFSET_BITS=64'] + # We don't enable -D_LARGEFILE64_SOURCE since that enables + # transitionary features and must be enabled by programs that use + # those features explicitly. + return [] + + +def get_args_from_envvars(lang: str, + for_machine: MachineChoice, + is_cross: bool, + use_linker_args: bool) -> T.Tuple[T.List[str], T.List[str]]: + """ + Returns a tuple of (compile_flags, link_flags) for the specified language + from the inherited environment + """ + if lang not in cflags_mapping: + return [], [] + + compile_flags = [] # type: T.List[str] + link_flags = [] # type: T.List[str] + + env_compile_flags = get_env_var(for_machine, is_cross, cflags_mapping[lang]) + if env_compile_flags is not None: + compile_flags += split_args(env_compile_flags) + + # Link flags (same for all languages) + if lang in languages_using_ldflags: + link_flags += LinkerEnvVarsMixin.get_args_from_envvars(for_machine, is_cross) + if use_linker_args: + # When the compiler is used as a wrapper around the linker (such as + # with GCC and Clang), the compile flags can be needed while linking + # too. This is also what Autotools does. However, we don't want to do + # this when the linker is stand-alone such as with MSVC C/C++, etc. + link_flags = compile_flags + link_flags + + # Pre-processor flags for certain languages + if lang in languages_using_cppflags: + env_preproc_flags = get_env_var(for_machine, is_cross, 'CPPFLAGS') + if env_preproc_flags is not None: + compile_flags += split_args(env_preproc_flags) + + return compile_flags, link_flags + + +def get_global_options(lang: str, + comp: T.Type[Compiler], + for_machine: MachineChoice, + is_cross: bool, + properties: Properties) -> T.Dict[str, coredata.UserOption]: + """Retreive options that apply to all compilers for a given language.""" + description = 'Extra arguments passed to the {}'.format(lang) + opts = { + 'args': coredata.UserArrayOption( + description + ' compiler', + [], split_args=True, user_input=True, allow_dups=True), + 'link_args': coredata.UserArrayOption( + description + ' linker', + [], split_args=True, user_input=True, allow_dups=True), + } + + # Get from env vars. + compile_args, link_args = get_args_from_envvars( + lang, + for_machine, + is_cross, + comp.INVOKES_LINKER) + + for k, o in opts.items(): + user_k = lang + '_' + k + if user_k in properties: + # Get from configuration files. + o.set_value(properties[user_k]) + elif k == 'args': + o.set_value(compile_args) + elif k == 'link_args': + o.set_value(link_args) + + return opts diff --git a/meson/mesonbuild/compilers/cpp.py b/meson/mesonbuild/compilers/cpp.py new file mode 100644 index 000000000..478a68c13 --- /dev/null +++ b/meson/mesonbuild/compilers/cpp.py @@ -0,0 +1,726 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import functools +import os.path +import typing as T + +from .. import coredata +from .. import mlog +from ..mesonlib import MesonException, MachineChoice, version_compare + +from ..linkers import LinkerEnvVarsMixin +from .compilers import ( + gnu_winlibs, + msvc_winlibs, + Compiler, +) +from .c_function_attributes import CXX_FUNC_ATTRIBUTES, C_FUNC_ATTRIBUTES +from .mixins.clike import CLikeCompiler +from .mixins.ccrx import CcrxCompiler +from .mixins.c2000 import C2000Compiler +from .mixins.arm import ArmCompiler, ArmclangCompiler +from .mixins.visualstudio import MSVCCompiler, ClangClCompiler +from .mixins.gnu import GnuCompiler +from .mixins.intel import IntelGnuLikeCompiler, IntelVisualStudioLikeCompiler +from .mixins.clang import ClangCompiler +from .mixins.elbrus import ElbrusCompiler +from .mixins.pgi import PGICompiler +from .mixins.emscripten import EmscriptenMixin + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + + +def non_msvc_eh_options(eh, args): + if eh == 'none': + args.append('-fno-exceptions') + elif eh == 's' or eh == 'c': + mlog.warning('non-MSVC compilers do not support ' + eh + ' exception handling.' + + 'You may want to set eh to \'default\'.') + +class CPPCompiler(CLikeCompiler, Compiler): + + @classmethod + def attribute_check_func(cls, name): + try: + return CXX_FUNC_ATTRIBUTES.get(name, C_FUNC_ATTRIBUTES[name]) + except KeyError: + raise MesonException('Unknown function attribute "{}"'.format(name)) + + language = 'cpp' + + def __init__(self, exelist, version, for_machine: MachineChoice, is_cross: bool, + info: 'MachineInfo', exe_wrap: T.Optional[str] = None, **kwargs): + # If a child ObjCPP class has already set it, don't set it ourselves + Compiler.__init__(self, exelist, version, for_machine, info, **kwargs) + CLikeCompiler.__init__(self, is_cross, exe_wrap) + + @staticmethod + def get_display_language(): + return 'C++' + + def get_no_stdinc_args(self): + return ['-nostdinc++'] + + def sanity_check(self, work_dir, environment): + code = 'class breakCCompiler;int main(void) { return 0; }\n' + return self.sanity_check_impl(work_dir, environment, 'sanitycheckcpp.cc', code) + + def get_compiler_check_args(self): + # -fpermissive allows non-conforming code to compile which is necessary + # for many C++ checks. Particularly, the has_header_symbol check is + # too strict without this and always fails. + return super().get_compiler_check_args() + ['-fpermissive'] + + def has_header_symbol(self, hname, symbol, prefix, env, *, extra_args=None, dependencies=None): + # Check if it's a C-like symbol + found, cached = super().has_header_symbol(hname, symbol, prefix, env, + extra_args=extra_args, + dependencies=dependencies) + if found: + return True, cached + # Check if it's a class or a template + if extra_args is None: + extra_args = [] + fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} + t = '''{prefix} + #include <{header}> + using {symbol}; + int main(void) {{ return 0; }}''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def _test_cpp_std_arg(self, cpp_std_value): + # Test whether the compiler understands a -std=XY argument + assert(cpp_std_value.startswith('-std=')) + + # This test does not use has_multi_arguments() for two reasons: + # 1. has_multi_arguments() requires an env argument, which the compiler + # object does not have at this point. + # 2. even if it did have an env object, that might contain another more + # recent -std= argument, which might lead to a cascaded failure. + CPP_TEST = 'int i = static_cast(0);' + with self.compile(CPP_TEST, extra_args=[cpp_std_value], mode='compile') as p: + if p.returncode == 0: + mlog.debug('Compiler accepts {}:'.format(cpp_std_value), 'YES') + return True + else: + mlog.debug('Compiler accepts {}:'.format(cpp_std_value), 'NO') + return False + + @functools.lru_cache() + def _find_best_cpp_std(self, cpp_std): + # The initial version mapping approach to make falling back + # from '-std=c++14' to '-std=c++1y' was too brittle. For instance, + # Apple's Clang uses a different versioning scheme to upstream LLVM, + # making the whole detection logic awfully brittle. Instead, let's + # just see if feeding GCC or Clang our '-std=' setting works, and + # if not, try the fallback argument. + CPP_FALLBACKS = { + 'c++11': 'c++0x', + 'gnu++11': 'gnu++0x', + 'c++14': 'c++1y', + 'gnu++14': 'gnu++1y', + 'c++17': 'c++1z', + 'gnu++17': 'gnu++1z' + } + + # Currently, remapping is only supported for Clang, Elbrus and GCC + assert(self.id in frozenset(['clang', 'lcc', 'gcc', 'emscripten'])) + + if cpp_std not in CPP_FALLBACKS: + # 'c++03' and 'c++98' don't have fallback types + return '-std=' + cpp_std + + for i in (cpp_std, CPP_FALLBACKS[cpp_std]): + cpp_std_value = '-std=' + i + if self._test_cpp_std_arg(cpp_std_value): + return cpp_std_value + + raise MesonException('C++ Compiler does not support -std={}'.format(cpp_std)) + + +class ClangCPPCompiler(ClangCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines : T.Optional[T.List[str]] = None, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrapper, **kwargs) + ClangCompiler.__init__(self, defines) + default_warn_args = ['-Wall', '-Winvalid-pch', '-Wnon-virtual-dtor'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'rtti': coredata.UserBooleanOption('Enable RTTI', True), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + ['none', 'c++98', 'c++03', 'c++11', 'c++14', 'c++17', 'c++1z', 'c++2a', + 'gnu++11', 'gnu++14', 'gnu++17', 'gnu++1z', 'gnu++2a'], + 'none', + ), + }) + if self.info.is_windows() or self.info.is_cygwin(): + opts.update({ + 'winlibs': coredata.UserArrayOption( + 'Standard Win libraries to link against', + gnu_winlibs, + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append(self._find_best_cpp_std(std.value)) + + non_msvc_eh_options(options['eh'].value, args) + + if not options['rtti'].value: + args.append('-fno-rtti') + + return args + + def get_option_link_args(self, options): + if self.info.is_windows() or self.info.is_cygwin(): + return options['winlibs'].value[:] + return [] + + def language_stdlib_only_link_flags(self): + return ['-lstdc++'] + + +class AppleClangCPPCompiler(ClangCPPCompiler): + def language_stdlib_only_link_flags(self): + return ['-lc++'] + + +class EmscriptenCPPCompiler(EmscriptenMixin, LinkerEnvVarsMixin, ClangCPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross: bool, info: 'MachineInfo', exe_wrapper=None, **kwargs): + if not is_cross: + raise MesonException('Emscripten compiler can only be used for cross compilation.') + ClangCPPCompiler.__init__(self, exelist=exelist, version=version, + for_machine=for_machine, is_cross=is_cross, + info=info, exe_wrapper=exe_wrapper, **kwargs) + self.id = 'emscripten' + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append(self._find_best_cpp_std(std.value)) + return args + + +class ArmclangCPPCompiler(ArmclangCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CPPCompiler.__init__(self, exelist=exelist, version=version, + for_machine=for_machine, is_cross=is_cross, + info=info, exe_wrapper=exe_wrapper, **kwargs) + ArmclangCompiler.__init__(self) + default_warn_args = ['-Wall', '-Winvalid-pch', '-Wnon-virtual-dtor'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + [ + 'none', 'c++98', 'c++03', 'c++11', 'c++14', 'c++17', + 'gnu++98', 'gnu++03', 'gnu++11', 'gnu++14', 'gnu++17', + ], + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + + non_msvc_eh_options(options['eh'].value, args) + + return args + + def get_option_link_args(self, options): + return [] + + +class GnuCPPCompiler(GnuCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, defines, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrap, **kwargs) + GnuCompiler.__init__(self, defines) + default_warn_args = ['-Wall', '-Winvalid-pch', '-Wnon-virtual-dtor'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'rtti': coredata.UserBooleanOption('Enable RTTI', True), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + ['none', 'c++98', 'c++03', 'c++11', 'c++14', 'c++17', 'c++1z', 'c++2a', + 'gnu++03', 'gnu++11', 'gnu++14', 'gnu++17', 'gnu++1z', 'gnu++2a'], + 'none', + ), + 'debugstl': coredata.UserBooleanOption( + 'STL debug mode', + False, + ) + }) + if self.info.is_windows() or self.info.is_cygwin(): + opts.update({ + 'winlibs': coredata.UserArrayOption( + 'Standard Win libraries to link against', + gnu_winlibs, + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append(self._find_best_cpp_std(std.value)) + + non_msvc_eh_options(options['eh'].value, args) + + if not options['rtti'].value: + args.append('-fno-rtti') + + if options['debugstl'].value: + args.append('-D_GLIBCXX_DEBUG=1') + return args + + def get_option_link_args(self, options): + if self.info.is_windows() or self.info.is_cygwin(): + return options['winlibs'].value[:] + return [] + + def get_pch_use_args(self, pch_dir, header): + return ['-fpch-preprocess', '-include', os.path.basename(header)] + + def language_stdlib_only_link_flags(self): + return ['-lstdc++'] + + +class PGICPPCompiler(PGICompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrapper, **kwargs) + PGICompiler.__init__(self) + + +class ElbrusCPPCompiler(GnuCPPCompiler, ElbrusCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines=None, **kwargs): + GnuCPPCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, defines, + **kwargs) + ElbrusCompiler.__init__(self) + + # It does not support c++/gnu++ 17 and 1z, but still does support 0x, 1y, and gnu++98. + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + [ + 'none', 'c++98', 'c++03', 'c++0x', 'c++11', 'c++14', 'c++1y', + 'gnu++98', 'gnu++03', 'gnu++0x', 'gnu++11', 'gnu++14', 'gnu++1y', + ], + 'none', + ), + 'debugstl': coredata.UserBooleanOption( + 'STL debug mode', + False, + ), + }) + return opts + + # Elbrus C++ compiler does not have lchmod, but there is only linker warning, not compiler error. + # So we should explicitly fail at this case. + def has_function(self, funcname, prefix, env, *, extra_args=None, dependencies=None): + if funcname == 'lchmod': + return False, False + else: + return super().has_function(funcname, prefix, env, + extra_args=extra_args, + dependencies=dependencies) + + # Elbrus C++ compiler does not support RTTI, so don't check for it. + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + args.append(self._find_best_cpp_std(std.value)) + + non_msvc_eh_options(options['eh'].value, args) + + if options['debugstl'].value: + args.append('-D_GLIBCXX_DEBUG=1') + return args + + +class IntelCPPCompiler(IntelGnuLikeCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + IntelGnuLikeCompiler.__init__(self) + self.lang_header = 'c++-header' + default_warn_args = ['-Wall', '-w3', '-diag-disable:remark', + '-Wpch-messages', '-Wnon-virtual-dtor'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra']} + + def get_options(self): + opts = CPPCompiler.get_options(self) + # Every Unix compiler under the sun seems to accept -std=c++03, + # with the exception of ICC. Instead of preventing the user from + # globally requesting C++03, we transparently remap it to C++98 + c_stds = ['c++98', 'c++03'] + g_stds = ['gnu++98', 'gnu++03'] + if version_compare(self.version, '>=15.0.0'): + c_stds += ['c++11', 'c++14'] + g_stds += ['gnu++11'] + if version_compare(self.version, '>=16.0.0'): + c_stds += ['c++17'] + if version_compare(self.version, '>=17.0.0'): + g_stds += ['gnu++14'] + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'rtti': coredata.UserBooleanOption('Enable RTTI', True), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + ['none'] + c_stds + g_stds, + 'none', + ), + 'debugstl': coredata.UserBooleanOption('STL debug mode', False), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value != 'none': + remap_cpp03 = { + 'c++03': 'c++98', + 'gnu++03': 'gnu++98' + } + args.append('-std=' + remap_cpp03.get(std.value, std.value)) + if options['eh'].value == 'none': + args.append('-fno-exceptions') + if not options['rtti'].value: + args.append('-fno-rtti') + if options['debugstl'].value: + args.append('-D_GLIBCXX_DEBUG=1') + return args + + def get_option_link_args(self, options): + return [] + + +class VisualStudioLikeCPPCompilerMixin: + + """Mixin for C++ specific method overrides in MSVC-like compilers.""" + + VC_VERSION_MAP = { + 'none': (True, None), + 'vc++11': (True, 11), + 'vc++14': (True, 14), + 'vc++17': (True, 17), + 'vc++latest': (True, "latest"), + 'c++11': (False, 11), + 'c++14': (False, 14), + 'c++17': (False, 17), + 'c++latest': (False, "latest"), + } + + def get_option_link_args(self, options): + return options['winlibs'].value[:] + + def _get_options_impl(self, opts, cpp_stds: T.List[str]): + opts.update({ + 'eh': coredata.UserComboOption( + 'C++ exception handling type.', + ['none', 'default', 'a', 's', 'sc'], + 'default', + ), + 'rtti': coredata.UserBooleanOption('Enable RTTI', True), + 'std': coredata.UserComboOption( + 'C++ language standard to use', + cpp_stds, + 'none', + ), + 'winlibs': coredata.UserArrayOption( + 'Windows libs to link against.', + msvc_winlibs, + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + + eh = options['eh'] + if eh.value == 'default': + args.append('/EHsc') + elif eh.value == 'none': + args.append('/EHs-c-') + else: + args.append('/EH' + eh.value) + + if not options['rtti'].value: + args.append('/GR-') + + permissive, ver = self.VC_VERSION_MAP[options['std'].value] + + if ver is not None: + args.append('/std:c++{}'.format(ver)) + + if not permissive: + args.append('/permissive-') + + return args + + def get_compiler_check_args(self): + # XXX: this is a hack because so much GnuLike stuff is in the base CPPCompiler class. + return CLikeCompiler.get_compiler_check_args(self) + + +class CPP11AsCPP14Mixin: + + """Mixin class for VisualStudio and ClangCl to replace C++11 std with C++14. + + This is a limitation of Clang and MSVC that ICL doesn't share. + """ + + def get_option_compile_args(self, options): + # Note: there is no explicit flag for supporting C++11; we attempt to do the best we can + # which means setting the C++ standard version to C++14, in compilers that support it + # (i.e., after VS2015U3) + # if one is using anything before that point, one cannot set the standard. + if options['std'].value in {'vc++11', 'c++11'}: + mlog.warning(self.id, 'does not support C++11;', + 'attempting best effort; setting the standard to C++14', once=True) + # Don't mutate anything we're going to change, we need to use + # deepcopy since we're messing with members, and we can't simply + # copy the members because the option proxy doesn't support it. + options = copy.deepcopy(options) + if options['std'].value == 'vc++11': + options['std'].value = 'vc++14' + else: + options['std'].value = 'c++14' + return super().get_option_compile_args(options) + + +class VisualStudioCPPCompiler(CPP11AsCPP14Mixin, VisualStudioLikeCPPCompilerMixin, MSVCCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross: bool, info: 'MachineInfo', exe_wrap, target, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrap, **kwargs) + MSVCCompiler.__init__(self, target) + self.base_options = ['b_pch', 'b_vscrt'] # FIXME add lto, pgo and the like + self.id = 'msvc' + + def get_options(self): + cpp_stds = ['none', 'c++11', 'vc++11'] + # Visual Studio 2015 and later + if version_compare(self.version, '>=19'): + cpp_stds.extend(['c++14', 'c++latest', 'vc++latest']) + # Visual Studio 2017 and later + if version_compare(self.version, '>=19.11'): + cpp_stds.extend(['vc++14', 'c++17', 'vc++17']) + return self._get_options_impl(super().get_options(), cpp_stds) + + def get_option_compile_args(self, options): + if options['std'].value != 'none' and version_compare(self.version, '<19.00.24210'): + mlog.warning('This version of MSVC does not support cpp_std arguments') + options = copy.copy(options) + options['std'].value = 'none' + + args = super().get_option_compile_args(options) + + if version_compare(self.version, '<19.11'): + try: + i = args.index('/permissive-') + except ValueError: + return args + del args[i] + return args + +class ClangClCPPCompiler(CPP11AsCPP14Mixin, VisualStudioLikeCPPCompilerMixin, ClangClCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, target, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + ClangClCompiler.__init__(self, target) + self.id = 'clang-cl' + + def get_options(self): + cpp_stds = ['none', 'c++11', 'vc++11', 'c++14', 'vc++14', 'c++17', 'vc++17', 'c++latest'] + return self._get_options_impl(super().get_options(), cpp_stds) + + +class IntelClCPPCompiler(VisualStudioLikeCPPCompilerMixin, IntelVisualStudioLikeCompiler, CPPCompiler): + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap, target, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + IntelVisualStudioLikeCompiler.__init__(self, target) + + def get_options(self): + # This has only been tested with version 19.0, + cpp_stds = ['none', 'c++11', 'vc++11', 'c++14', 'vc++14', 'c++17', 'vc++17', 'c++latest'] + return self._get_options_impl(super().get_options(), cpp_stds) + + +class ArmCPPCompiler(ArmCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap=None, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + ArmCompiler.__init__(self) + + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({ + 'std': coredata.UserComboOption( + 'C++ language standard to use', + ['none', 'c++03', 'c++11'], + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options): + args = [] + std = options['std'] + if std.value == 'c++11': + args.append('--cpp11') + elif std.value == 'c++03': + args.append('--cpp') + return args + + def get_option_link_args(self, options): + return [] + + def get_compiler_check_args(self): + return [] + + +class CcrxCPPCompiler(CcrxCompiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap=None, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + CcrxCompiler.__init__(self) + + # Override CCompiler.get_always_args + def get_always_args(self): + return ['-nologo', '-lang=cpp'] + + def get_option_compile_args(self, options): + return [] + + def get_compile_only_args(self): + return [] + + def get_output_args(self, target): + return ['-output=obj=%s' % target] + + def get_option_link_args(self, options): + return [] + + def get_compiler_check_args(self): + return [] + +class C2000CPPCompiler(C2000Compiler, CPPCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrap=None, **kwargs): + CPPCompiler.__init__(self, exelist, version, for_machine, is_cross, + info, exe_wrap, **kwargs) + C2000Compiler.__init__(self) + + def get_options(self): + opts = CPPCompiler.get_options(self) + opts.update({'cpp_std': coredata.UserComboOption('C++ language standard to use', + ['none', 'c++03'], + 'none')}) + return opts + + def get_always_args(self): + return ['-nologo', '-lang=cpp'] + + def get_option_compile_args(self, options): + return [] + + def get_compile_only_args(self): + return [] + + def get_output_args(self, target): + return ['-output=obj=%s' % target] + + def get_option_link_args(self, options): + return [] + + def get_compiler_check_args(self): + return [] diff --git a/meson/mesonbuild/compilers/cs.py b/meson/mesonbuild/compilers/cs.py new file mode 100644 index 000000000..843348e76 --- /dev/null +++ b/meson/mesonbuild/compilers/cs.py @@ -0,0 +1,164 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path, subprocess +import typing as T + +from ..mesonlib import EnvironmentException + +from .compilers import Compiler, MachineChoice, mono_buildtype_args +from .mixins.islinker import BasicLinkerIsCompilerMixin + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + +cs_optimization_args = {'0': [], + 'g': [], + '1': ['-optimize+'], + '2': ['-optimize+'], + '3': ['-optimize+'], + 's': ['-optimize+'], + } + + +class CsCompiler(BasicLinkerIsCompilerMixin, Compiler): + + language = 'cs' + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo', comp_id, runner=None): + super().__init__(exelist, version, for_machine, info) + self.id = comp_id + self.is_cross = False + self.runner = runner + + @classmethod + def get_display_language(cls): + return 'C sharp' + + def get_always_args(self): + return ['/nologo'] + + def get_linker_always_args(self): + return ['/nologo'] + + def get_output_args(self, fname): + return ['-out:' + fname] + + def get_link_args(self, fname): + return ['-r:' + fname] + + def get_werror_args(self): + return ['-warnaserror'] + + def split_shlib_to_parts(self, fname): + return None, fname + + def get_dependency_gen_args(self, outtarget, outfile): + return [] + + def get_linker_exelist(self): + return self.exelist[:] + + def get_compile_only_args(self): + return [] + + def get_coverage_args(self): + return [] + + def get_std_exe_link_args(self): + return [] + + def get_include_args(self, path): + return [] + + def get_pic_args(self): + return [] + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + for idx, i in enumerate(parameter_list): + if i[:2] == '-L': + parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:])) + if i[:5] == '-lib:': + parameter_list[idx] = i[:5] + os.path.normpath(os.path.join(build_dir, i[5:])) + + return parameter_list + + def name_string(self): + return ' '.join(self.exelist) + + def get_pch_use_args(self, pch_dir, header): + return [] + + def get_pch_name(self, header_name): + return '' + + def sanity_check(self, work_dir, environment): + src = 'sanity.cs' + obj = 'sanity.exe' + source_name = os.path.join(work_dir, src) + with open(source_name, 'w') as ofile: + ofile.write('''public class Sanity { + static public void Main () { + } +} +''') + pc = subprocess.Popen(self.exelist + self.get_always_args() + [src], cwd=work_dir) + pc.wait() + if pc.returncode != 0: + raise EnvironmentException('Mono compiler %s can not compile programs.' % self.name_string()) + if self.runner: + cmdlist = [self.runner, obj] + else: + cmdlist = [os.path.join(work_dir, obj)] + pe = subprocess.Popen(cmdlist, cwd=work_dir) + pe.wait() + if pe.returncode != 0: + raise EnvironmentException('Executables created by Mono compiler %s are not runnable.' % self.name_string()) + + def needs_static_linker(self): + return False + + def get_buildtype_args(self, buildtype): + return mono_buildtype_args[buildtype] + + def get_debug_args(self, is_debug): + return ['-debug'] if is_debug else [] + + def get_optimization_args(self, optimization_level): + return cs_optimization_args[optimization_level] + + +class MonoCompiler(CsCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo'): + super().__init__(exelist, version, for_machine, info, 'mono', + runner='mono') + + +class VisualStudioCsCompiler(CsCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo'): + super().__init__(exelist, version, for_machine, info, 'csc') + + def get_buildtype_args(self, buildtype): + res = mono_buildtype_args[buildtype] + if not self.info.is_windows(): + tmp = [] + for flag in res: + if flag == '-debug': + flag = '-debug:portable' + tmp.append(flag) + res = tmp + return res diff --git a/meson/mesonbuild/compilers/cuda.py b/meson/mesonbuild/compilers/cuda.py new file mode 100644 index 000000000..934ad1218 --- /dev/null +++ b/meson/mesonbuild/compilers/cuda.py @@ -0,0 +1,313 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path +import typing as T +from functools import partial + +from .. import coredata +from .. import mlog +from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, is_windows, LibType +from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args, + cuda_debug_args) + +if T.TYPE_CHECKING: + from ..environment import Environment # noqa: F401 + from ..envconfig import MachineInfo + + +class CudaCompiler(Compiler): + + LINKER_PREFIX = '-Xlinker=' + language = 'cuda' + + _universal_flags = {'compiler': ['-I', '-D', '-U', '-E'], 'linker': ['-l', '-L']} + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, exe_wrapper, host_compiler, info: 'MachineInfo', **kwargs): + super().__init__(exelist, version, for_machine, info, **kwargs) + self.is_cross = is_cross + self.exe_wrapper = exe_wrapper + self.host_compiler = host_compiler + self.base_options = host_compiler.base_options + self.id = 'nvcc' + self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} + + @classmethod + def _to_host_flags(cls, flags, phase='compiler'): + return list(map(partial(cls._to_host_flag, phase=phase), flags)) + + @classmethod + def _to_host_flag(cls, flag, phase): + if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]: + return flag + + return '-X{}={}'.format(phase, flag) + + def needs_static_linker(self): + return False + + def get_always_args(self): + return [] + + def get_no_stdinc_args(self): + return [] + + def thread_link_flags(self, environment): + return self._to_host_flags(self.host_compiler.thread_link_flags(environment)) + + def sanity_check(self, work_dir, environment): + mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) + mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) + + sname = 'sanitycheckcuda.cu' + code = r''' + #include + #include + + __global__ void kernel (void) {} + + int main(void){ + struct cudaDeviceProp prop; + int count, i; + cudaError_t ret = cudaGetDeviceCount(&count); + if(ret != cudaSuccess){ + fprintf(stderr, "%d\n", (int)ret); + }else{ + for(i=0;i + using {symbol}; + int main(void) {{ return 0; }}''' + return self.compiles(t.format(**fargs), env, extra_args, dependencies) + + def get_options(self): + opts = super().get_options() + opts.update({'cuda_std': coredata.UserComboOption('C++ language standard to use', + ['none', 'c++03', 'c++11', 'c++14'], + 'none')}) + return opts + + def _to_host_compiler_options(self, options): + overrides = {name: opt.value for name, opt in options.copy().items()} + return OptionOverrideProxy(overrides, self.host_compiler.get_options()) + + def get_option_compile_args(self, options): + args = [] + # On Windows, the version of the C++ standard used by nvcc is dictated by + # the combination of CUDA version and MSVC version; the --std= is thus ignored + # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 + if not is_windows(): + std = options['cuda_std'] + if std.value != 'none': + args.append('--std=' + std.value) + + return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) + + @classmethod + def _cook_link_args(cls, args: T.List[str]) -> T.List[str]: + # Prepare link args for nvcc + cooked = [] # type: T.List[str] + for arg in args: + if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix + arg = arg.replace('-Wl,', '', 1) + arg = arg.replace(' ', '\\') # espace whitespace + cooked.append(arg) + return cls._to_host_flags(cooked, 'linker') + + def get_option_link_args(self, options): + return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options))) + + def name_string(self): + return ' '.join(self.exelist) + + def get_soname_args(self, *args): + return self._cook_link_args(self.host_compiler.get_soname_args(*args)) + + def get_dependency_gen_args(self, outtarget, outfile): + return [] + + def get_compile_only_args(self): + return ['-c'] + + def get_no_optimization_args(self): + return ['-O0'] + + def get_optimization_args(self, optimization_level): + # alternatively, consider simply redirecting this to the host compiler, which would + # give us more control over options like "optimize for space" (which nvcc doesn't support): + # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level)) + return cuda_optimization_args[optimization_level] + + def get_debug_args(self, is_debug): + return cuda_debug_args[is_debug] + + def get_werror_args(self): + return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] + + def get_warn_args(self, level): + return self.warn_args[level] + + def get_buildtype_args(self, buildtype): + # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to + # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi') + host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)] + return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args) + + def get_include_args(self, path, is_system): + if path == '': + path = '.' + return ['-I' + path] + + def get_compile_debugfile_args(self, rel_obj, **kwargs): + return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, **kwargs)) + + def get_link_debugfile_args(self, targetfile): + return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile)) + + def depfile_for_object(self, objfile): + return objfile + '.' + self.get_depfile_suffix() + + def get_depfile_suffix(self): + return 'd' + + def get_buildtype_linker_args(self, buildtype): + return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype)) + + def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, + rpath_paths: str, build_rpath: str, + install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: + (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( + env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) + return (self._cook_link_args(rpath_args), rpath_dirs_to_remove) + + def linker_to_compiler_args(self, args): + return args + + def get_pic_args(self): + return self._to_host_flags(self.host_compiler.get_pic_args()) + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + return [] + + def get_output_args(self, target: str) -> T.List[str]: + return ['-o', target] + + def get_std_exe_link_args(self) -> T.List[str]: + return self._cook_link_args(self.host_compiler.get_std_exe_link_args()) + + def find_library(self, libname, env, extra_dirs, libtype: LibType = LibType.PREFER_SHARED): + return ['-l' + libname] # FIXME + + def get_crt_compile_args(self, crt_val, buildtype): + return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype)) + + def get_crt_link_args(self, crt_val, buildtype): + # nvcc defaults to static, release version of msvc runtime and provides no + # native option to override it; override it with /NODEFAULTLIB + host_link_arg_overrides = [] + host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) + if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): + host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] + return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype)) + + def get_target_link_args(self, target): + return self._cook_link_args(super().get_target_link_args(target)) + + def get_dependency_compile_args(self, dep): + return self._to_host_flags(super().get_dependency_compile_args(dep)) + + def get_dependency_link_args(self, dep): + return self._cook_link_args(super().get_dependency_link_args(dep)) diff --git a/meson/mesonbuild/compilers/d.py b/meson/mesonbuild/compilers/d.py new file mode 100644 index 000000000..a74dc957e --- /dev/null +++ b/meson/mesonbuild/compilers/d.py @@ -0,0 +1,822 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path, subprocess +import typing as T + +from ..mesonlib import ( + EnvironmentException, MachineChoice, version_compare, +) + +from ..arglist import CompilerArgs +from .compilers import ( + d_dmd_buildtype_args, + d_gdc_buildtype_args, + d_ldc_buildtype_args, + clike_debug_args, + Compiler, +) +from .mixins.gnu import GnuCompiler + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + +d_feature_args = {'gcc': {'unittest': '-funittest', + 'debug': '-fdebug', + 'version': '-fversion', + 'import_dir': '-J' + }, + 'llvm': {'unittest': '-unittest', + 'debug': '-d-debug', + 'version': '-d-version', + 'import_dir': '-J' + }, + 'dmd': {'unittest': '-unittest', + 'debug': '-debug', + 'version': '-version', + 'import_dir': '-J' + } + } + +ldc_optimization_args = {'0': [], + 'g': [], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-Os'], + } + +dmd_optimization_args = {'0': [], + 'g': [], + '1': ['-O'], + '2': ['-O'], + '3': ['-O'], + 's': ['-O'], + } + + +class DmdLikeCompilerMixin: + + LINKER_PREFIX = '-L=' + + def get_output_args(self, target): + return ['-of=' + target] + + def get_linker_output_args(self, target): + return ['-of=' + target] + + def get_include_args(self, path, is_system): + return ['-I=' + path] + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + for idx, i in enumerate(parameter_list): + if i[:3] == '-I=': + parameter_list[idx] = i[:3] + os.path.normpath(os.path.join(build_dir, i[3:])) + if i[:4] == '-L-L': + parameter_list[idx] = i[:4] + os.path.normpath(os.path.join(build_dir, i[4:])) + if i[:5] == '-L=-L': + parameter_list[idx] = i[:5] + os.path.normpath(os.path.join(build_dir, i[5:])) + if i[:6] == '-Wl,-L': + parameter_list[idx] = i[:6] + os.path.normpath(os.path.join(build_dir, i[6:])) + + return parameter_list + + def get_warn_args(self, level): + return ['-wi'] + + def get_werror_args(self): + return ['-w'] + + def get_dependency_gen_args(self, outtarget, outfile): + # DMD and LDC does not currently return Makefile-compatible dependency info. + return [] + + def get_coverage_args(self): + return ['-cov'] + + def get_coverage_link_args(self): + return [] + + def get_preprocess_only_args(self): + return ['-E'] + + def get_compile_only_args(self): + return ['-c'] + + def depfile_for_object(self, objfile): + return objfile + '.' + self.get_depfile_suffix() + + def get_depfile_suffix(self): + return 'deps' + + def get_pic_args(self): + if self.info.is_windows(): + return [] + return ['-fPIC'] + + def get_feature_args(self, kwargs, build_to_src): + res = [] + if 'unittest' in kwargs: + unittest = kwargs.pop('unittest') + unittest_arg = d_feature_args[self.id]['unittest'] + if not unittest_arg: + raise EnvironmentException('D compiler %s does not support the "unittest" feature.' % self.name_string()) + if unittest: + res.append(unittest_arg) + + if 'debug' in kwargs: + debug_level = -1 + debugs = kwargs.pop('debug') + if not isinstance(debugs, list): + debugs = [debugs] + + debug_arg = d_feature_args[self.id]['debug'] + if not debug_arg: + raise EnvironmentException('D compiler %s does not support conditional debug identifiers.' % self.name_string()) + + # Parse all debug identifiers and the largest debug level identifier + for d in debugs: + if isinstance(d, int): + if d > debug_level: + debug_level = d + elif isinstance(d, str) and d.isdigit(): + if int(d) > debug_level: + debug_level = int(d) + else: + res.append('{0}={1}'.format(debug_arg, d)) + + if debug_level >= 0: + res.append('{0}={1}'.format(debug_arg, debug_level)) + + if 'versions' in kwargs: + version_level = -1 + versions = kwargs.pop('versions') + if not isinstance(versions, list): + versions = [versions] + + version_arg = d_feature_args[self.id]['version'] + if not version_arg: + raise EnvironmentException('D compiler %s does not support conditional version identifiers.' % self.name_string()) + + # Parse all version identifiers and the largest version level identifier + for v in versions: + if isinstance(v, int): + if v > version_level: + version_level = v + elif isinstance(v, str) and v.isdigit(): + if int(v) > version_level: + version_level = int(v) + else: + res.append('{0}={1}'.format(version_arg, v)) + + if version_level >= 0: + res.append('{0}={1}'.format(version_arg, version_level)) + + if 'import_dirs' in kwargs: + import_dirs = kwargs.pop('import_dirs') + if not isinstance(import_dirs, list): + import_dirs = [import_dirs] + + import_dir_arg = d_feature_args[self.id]['import_dir'] + if not import_dir_arg: + raise EnvironmentException('D compiler %s does not support the "string import directories" feature.' % self.name_string()) + for idir_obj in import_dirs: + basedir = idir_obj.get_curdir() + for idir in idir_obj.get_incdirs(): + # Avoid superfluous '/.' at the end of paths when d is '.' + if idir not in ('', '.'): + expdir = os.path.join(basedir, idir) + else: + expdir = basedir + srctreedir = os.path.join(build_to_src, expdir) + res.append('{0}{1}'.format(import_dir_arg, srctreedir)) + + if kwargs: + raise EnvironmentException('Unknown D compiler feature(s) selected: %s' % ', '.join(kwargs.keys())) + + return res + + def get_buildtype_linker_args(self, buildtype): + if buildtype != 'plain': + return self.get_target_arch_args() + return [] + + def get_std_exe_link_args(self): + return [] + + def gen_import_library_args(self, implibname): + return self.linker.import_library_args(implibname) + + def build_rpath_args(self, env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath): + if self.info.is_windows(): + return ([], set()) + + # GNU ld, solaris ld, and lld acting like GNU ld + if self.linker.id.startswith('ld'): + # The way that dmd and ldc pass rpath to gcc is different than we would + # do directly, each argument -rpath and the value to rpath, need to be + # split into two separate arguments both prefaced with the -L=. + args = [] + (rpath_args, rpath_dirs_to_remove) = super().build_rpath_args( + env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) + for r in rpath_args: + if ',' in r: + a, b = r.split(',', maxsplit=1) + args.append(a) + args.append(self.LINKER_PREFIX + b) + else: + args.append(r) + return (args, rpath_dirs_to_remove) + + return super().build_rpath_args( + env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) + + def translate_args_to_nongnu(self, args): + dcargs = [] + # Translate common arguments to flags the LDC/DMD compilers + # can understand. + # The flags might have been added by pkg-config files, + # and are therefore out of the user's control. + for arg in args: + # Translate OS specific arguments first. + osargs = [] + if self.info.is_windows(): + osargs = self.translate_arg_to_windows(arg) + elif self.info.is_darwin(): + osargs = self.translate_arg_to_osx(arg) + if osargs: + dcargs.extend(osargs) + continue + + # Translate common D arguments here. + if arg == '-pthread': + continue + if arg.startswith('-fstack-protector'): + continue + if arg.startswith('-D'): + continue + if arg.startswith('-Wl,'): + # Translate linker arguments here. + linkargs = arg[arg.index(',') + 1:].split(',') + for la in linkargs: + dcargs.append('-L=' + la.strip()) + continue + elif arg.startswith(('-link-defaultlib', '-linker', '-link-internally', '-linkonce-templates', '-lib')): + # these are special arguments to the LDC linker call, + # arguments like "-link-defaultlib-shared" do *not* + # denote a library to be linked, but change the default + # Phobos/DRuntime linking behavior, while "-linker" sets the + # default linker. + dcargs.append(arg) + continue + elif arg.startswith('-l'): + # translate library link flag + dcargs.append('-L=' + arg) + continue + elif arg.startswith('-isystem'): + # translate -isystem system include path + # this flag might sometimes be added by C library Cflags via + # pkg-config. + # NOTE: -isystem and -I are not 100% equivalent, so this is just + # a workaround for the most common cases. + if arg.startswith('-isystem='): + dcargs.append('-I=' + arg[9:]) + else: + dcargs.append('-I' + arg[8:]) + continue + elif arg.startswith('-idirafter'): + # same as -isystem, but appends the path instead + if arg.startswith('-idirafter='): + dcargs.append('-I=' + arg[11:]) + else: + dcargs.append('-I' + arg[10:]) + continue + elif arg.startswith('-L/') or arg.startswith('-L./'): + # we need to handle cases where -L is set by e.g. a pkg-config + # setting to select a linker search path. We can however not + # unconditionally prefix '-L' with '-L' because the user might + # have set this flag too to do what it is intended to for this + # compiler (pass flag through to the linker) + # Hence, we guess here whether the flag was intended to pass + # a linker search path. + + # Make sure static library files are passed properly to the linker. + if arg.endswith('.a') or arg.endswith('.lib'): + if arg.startswith('-L='): + farg = arg[3:] + else: + farg = arg[2:] + if len(farg) > 0 and not farg.startswith('-'): + dcargs.append('-L=' + farg) + continue + + dcargs.append('-L=' + arg) + continue + elif not arg.startswith('-') and arg.endswith(('.a', '.lib')): + # ensure static libraries are passed through to the linker + dcargs.append('-L=' + arg) + continue + else: + dcargs.append(arg) + + return dcargs + + @classmethod + def translate_arg_to_windows(cls, arg): + args = [] + if arg.startswith('-Wl,'): + # Translate linker arguments here. + linkargs = arg[arg.index(',') + 1:].split(',') + for la in linkargs: + if la.startswith('--out-implib='): + # Import library name + args.append('-L=/IMPLIB:' + la[13:].strip()) + elif arg.startswith('-mscrtlib='): + args.append(arg) + mscrtlib = arg[10:].lower() + if cls is LLVMDCompiler: + # Default crt libraries for LDC2 must be excluded for other + # selected crt options. + if mscrtlib != 'libcmt': + args.append('-L=/NODEFAULTLIB:libcmt') + args.append('-L=/NODEFAULTLIB:libvcruntime') + + # Fixes missing definitions for printf-functions in VS2017 + if mscrtlib.startswith('msvcrt'): + args.append('-L=/DEFAULTLIB:legacy_stdio_definitions.lib') + + return args + + @classmethod + def translate_arg_to_osx(cls, arg): + args = [] + if arg.startswith('-install_name'): + args.append('-L=' + arg) + return args + + def get_debug_args(self, is_debug): + ddebug_args = [] + if is_debug: + ddebug_args = [d_feature_args[self.id]['debug']] + + return clike_debug_args[is_debug] + ddebug_args + + def get_crt_args(self, crt_val, buildtype): + if not self.info.is_windows(): + return [] + + if crt_val in self.mscrt_args: + return self.mscrt_args[crt_val] + assert(crt_val == 'from_buildtype') + + # Match what build type flags used to do. + if buildtype == 'plain': + return [] + elif buildtype == 'debug': + return self.mscrt_args['mdd'] + elif buildtype == 'debugoptimized': + return self.mscrt_args['md'] + elif buildtype == 'release': + return self.mscrt_args['md'] + elif buildtype == 'minsize': + return self.mscrt_args['md'] + else: + assert(buildtype == 'custom') + raise EnvironmentException('Requested C runtime based on buildtype, but buildtype is "custom".') + + def get_soname_args(self, *args, **kwargs) -> T.List[str]: + # LDC and DMD actually do use a linker, but they proxy all of that with + # their own arguments + if self.linker.id.startswith('ld.'): + soargs = [] + for arg in super().get_soname_args(*args, **kwargs): + a, b = arg.split(',', maxsplit=1) + soargs.append(a) + soargs.append(self.LINKER_PREFIX + b) + return soargs + elif self.linker.id.startswith('ld64'): + soargs = [] + for arg in super().get_soname_args(*args, **kwargs): + if not arg.startswith(self.LINKER_PREFIX): + soargs.append(self.LINKER_PREFIX + arg) + else: + soargs.append(arg) + return soargs + else: + return super().get_soname_args(*args, **kwargs) + + def get_allow_undefined_link_args(self) -> T.List[str]: + args = self.linker.get_allow_undefined_args() + if self.info.is_darwin(): + # On macOS we're passing these options to the C compiler, but + # they're linker options and need -Wl, so clang/gcc knows what to + # do with them. I'm assuming, but don't know for certain, that + # ldc/dmd do some kind of mapping internally for arguments they + # understand, but pass arguments they don't understand directly. + args = [a.replace('-L=', '-Xcc=-Wl,') for a in args] + return args + +class DCompilerArgs(CompilerArgs): + prepend_prefixes = ('-I', '-L') + dedup2_prefixes = ('-I') + +class DCompiler(Compiler): + mscrt_args = { + 'none': ['-mscrtlib='], + 'md': ['-mscrtlib=msvcrt'], + 'mdd': ['-mscrtlib=msvcrtd'], + 'mt': ['-mscrtlib=libcmt'], + 'mtd': ['-mscrtlib=libcmtd'], + } + + language = 'd' + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo', arch, is_cross, exe_wrapper, **kwargs): + super().__init__(exelist, version, for_machine, info, **kwargs) + self.id = 'unknown' + self.arch = arch + self.exe_wrapper = exe_wrapper + self.is_cross = is_cross + + def sanity_check(self, work_dir, environment): + source_name = os.path.join(work_dir, 'sanity.d') + output_name = os.path.join(work_dir, 'dtest') + with open(source_name, 'w') as ofile: + ofile.write('''void main() { }''') + pc = subprocess.Popen(self.exelist + self.get_output_args(output_name) + self.get_target_arch_args() + [source_name], cwd=work_dir) + pc.wait() + if pc.returncode != 0: + raise EnvironmentException('D compiler %s can not compile programs.' % self.name_string()) + if self.is_cross: + if self.exe_wrapper is None: + # Can't check if the binaries run so we have to assume they do + return + cmdlist = self.exe_wrapper.get_command() + [output_name] + else: + cmdlist = [output_name] + if subprocess.call(cmdlist) != 0: + raise EnvironmentException('Executables created by D compiler %s are not runnable.' % self.name_string()) + + def needs_static_linker(self): + return True + + def depfile_for_object(self, objfile): + return objfile + '.' + self.get_depfile_suffix() + + def get_depfile_suffix(self): + return 'deps' + + def get_pic_args(self): + if self.info.is_windows(): + return [] + return ['-fPIC'] + + def get_feature_args(self, kwargs, build_to_src): + res = [] + if 'unittest' in kwargs: + unittest = kwargs.pop('unittest') + unittest_arg = d_feature_args[self.id]['unittest'] + if not unittest_arg: + raise EnvironmentException('D compiler %s does not support the "unittest" feature.' % self.name_string()) + if unittest: + res.append(unittest_arg) + + if 'debug' in kwargs: + debug_level = -1 + debugs = kwargs.pop('debug') + if not isinstance(debugs, list): + debugs = [debugs] + + debug_arg = d_feature_args[self.id]['debug'] + if not debug_arg: + raise EnvironmentException('D compiler %s does not support conditional debug identifiers.' % self.name_string()) + + # Parse all debug identifiers and the largest debug level identifier + for d in debugs: + if isinstance(d, int): + if d > debug_level: + debug_level = d + elif isinstance(d, str) and d.isdigit(): + if int(d) > debug_level: + debug_level = int(d) + else: + res.append('{0}={1}'.format(debug_arg, d)) + + if debug_level >= 0: + res.append('{0}={1}'.format(debug_arg, debug_level)) + + if 'versions' in kwargs: + version_level = -1 + versions = kwargs.pop('versions') + if not isinstance(versions, list): + versions = [versions] + + version_arg = d_feature_args[self.id]['version'] + if not version_arg: + raise EnvironmentException('D compiler %s does not support conditional version identifiers.' % self.name_string()) + + # Parse all version identifiers and the largest version level identifier + for v in versions: + if isinstance(v, int): + if v > version_level: + version_level = v + elif isinstance(v, str) and v.isdigit(): + if int(v) > version_level: + version_level = int(v) + else: + res.append('{0}={1}'.format(version_arg, v)) + + if version_level >= 0: + res.append('{0}={1}'.format(version_arg, version_level)) + + if 'import_dirs' in kwargs: + import_dirs = kwargs.pop('import_dirs') + if not isinstance(import_dirs, list): + import_dirs = [import_dirs] + + import_dir_arg = d_feature_args[self.id]['import_dir'] + if not import_dir_arg: + raise EnvironmentException('D compiler %s does not support the "string import directories" feature.' % self.name_string()) + for idir_obj in import_dirs: + basedir = idir_obj.get_curdir() + for idir in idir_obj.get_incdirs(): + # Avoid superfluous '/.' at the end of paths when d is '.' + if idir not in ('', '.'): + expdir = os.path.join(basedir, idir) + else: + expdir = basedir + srctreedir = os.path.join(build_to_src, expdir) + res.append('{0}{1}'.format(import_dir_arg, srctreedir)) + + if kwargs: + raise EnvironmentException('Unknown D compiler feature(s) selected: %s' % ', '.join(kwargs.keys())) + + return res + + def get_buildtype_linker_args(self, buildtype): + if buildtype != 'plain': + return self.get_target_arch_args() + return [] + + def get_std_exe_link_args(self): + return [] + + def _get_compiler_check_args(self, env, extra_args, dependencies, mode='compile'): + if callable(extra_args): + extra_args = extra_args(mode) + if extra_args is None: + extra_args = [] + elif isinstance(extra_args, str): + extra_args = [extra_args] + if dependencies is None: + dependencies = [] + elif not isinstance(dependencies, list): + dependencies = [dependencies] + # Collect compiler arguments + args = self.compiler_args() + for d in dependencies: + # Add compile flags needed by dependencies + args += d.get_compile_args() + if mode == 'link': + # Add link flags needed to find dependencies + args += d.get_link_args() + + if mode == 'compile': + # Add DFLAGS from the env + args += env.coredata.get_external_args(self.for_machine, self.language) + elif mode == 'link': + # Add LDFLAGS from the env + args += env.coredata.get_external_link_args(self.for_machine, self.language) + # extra_args must override all other arguments, so we add them last + args += extra_args + return args + + def compiler_args(self, args: T.Optional[T.Iterable[str]] = None) -> DCompilerArgs: + return DCompilerArgs(self, args) + + def compiles(self, code, env, *, extra_args=None, dependencies=None, mode='compile'): + args = self._get_compiler_check_args(env, extra_args, dependencies, mode) + + with self.cached_compile(code, env.coredata, extra_args=args, mode=mode) as p: + return p.returncode == 0, p.cached + + def has_multi_arguments(self, args, env): + return self.compiles('int i;\n', env, extra_args=args) + + def get_target_arch_args(self): + # LDC2 on Windows targets to current OS architecture, but + # it should follow the target specified by the MSVC toolchain. + if self.info.is_windows(): + if self.arch == 'x86_64': + return ['-m64'] + return ['-m32'] + return [] + + def get_crt_compile_args(self, crt_val, buildtype): + return [] + + def get_crt_link_args(self, crt_val, buildtype): + return [] + + def thread_link_flags(self, env): + return self.linker.thread_flags(env) + + def name_string(self): + return ' '.join(self.exelist) + + +class GnuDCompiler(GnuCompiler, DCompiler): + + # we mostly want DCompiler, but that gives us the Compiler.LINKER_PREFIX instead + LINKER_PREFIX = GnuCompiler.LINKER_PREFIX + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo', is_cross, exe_wrapper, arch, **kwargs): + DCompiler.__init__(self, exelist, version, for_machine, info, is_cross, exe_wrapper, arch, **kwargs) + GnuCompiler.__init__(self, {}) + self.id = 'gcc' + default_warn_args = ['-Wall', '-Wdeprecated'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic']} + self.base_options = ['b_colorout', 'b_sanitize', 'b_staticpic', + 'b_vscrt', 'b_coverage', 'b_pgo', 'b_ndebug'] + + self._has_color_support = version_compare(self.version, '>=4.9') + # dependencies were implemented before, but broken - support was fixed in GCC 7.1+ + # (and some backported versions) + self._has_deps_support = version_compare(self.version, '>=7.1') + + def get_colorout_args(self, colortype): + if self._has_color_support: + super().get_colorout_args(colortype) + return [] + + def get_dependency_gen_args(self, outtarget, outfile): + if self._has_deps_support: + return super().get_dependency_gen_args(outtarget, outfile) + return [] + + def get_warn_args(self, level): + return self.warn_args[level] + + def get_buildtype_args(self, buildtype): + return d_gdc_buildtype_args[buildtype] + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + for idx, i in enumerate(parameter_list): + if i[:2] == '-I' or i[:2] == '-L': + parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:])) + + return parameter_list + + def get_allow_undefined_link_args(self) -> T.List[str]: + return self.linker.get_allow_undefined_args() + + def get_linker_always_args(self) -> T.List[str]: + args = super().get_linker_always_args() + if self.info.is_windows(): + return args + return args + ['-shared-libphobos'] + + def get_disable_assert_args(self): + return ['-frelease'] + + +class LLVMDCompiler(DmdLikeCompilerMixin, DCompiler): + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo', arch, **kwargs): + DCompiler.__init__(self, exelist, version, for_machine, info, arch, False, None, **kwargs) + self.id = 'llvm' + self.base_options = ['b_coverage', 'b_colorout', 'b_vscrt', 'b_ndebug'] + + def get_colorout_args(self, colortype): + if colortype == 'always': + return ['-enable-color'] + return [] + + def get_warn_args(self, level): + if level == '2' or level == '3': + return ['-wi', '-dw'] + elif level == '1': + return ['-wi'] + else: + return [] + + def get_buildtype_args(self, buildtype): + if buildtype != 'plain': + return self.get_target_arch_args() + d_ldc_buildtype_args[buildtype] + return d_ldc_buildtype_args[buildtype] + + def get_pic_args(self): + return ['-relocation-model=pic'] + + def get_crt_link_args(self, crt_val, buildtype): + return self.get_crt_args(crt_val, buildtype) + + def unix_args_to_native(self, args): + return self.translate_args_to_nongnu(args) + + def get_optimization_args(self, optimization_level): + return ldc_optimization_args[optimization_level] + + @classmethod + def use_linker_args(cls, linker: str) -> T.List[str]: + return ['-linker={}'.format(linker)] + + def get_linker_always_args(self) -> T.List[str]: + args = super().get_linker_always_args() + if self.info.is_windows(): + return args + return args + ['-link-defaultlib-shared'] + + def get_disable_assert_args(self) -> T.List[str]: + return ['--release'] + + +class DmdDCompiler(DmdLikeCompilerMixin, DCompiler): + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo', arch, **kwargs): + DCompiler.__init__(self, exelist, version, for_machine, info, arch, False, None, **kwargs) + self.id = 'dmd' + self.base_options = ['b_coverage', 'b_colorout', 'b_vscrt', 'b_ndebug'] + + def get_colorout_args(self, colortype): + if colortype == 'always': + return ['-color=on'] + return [] + + def get_buildtype_args(self, buildtype): + if buildtype != 'plain': + return self.get_target_arch_args() + d_dmd_buildtype_args[buildtype] + return d_dmd_buildtype_args[buildtype] + + def get_std_exe_link_args(self): + if self.info.is_windows(): + # DMD links against D runtime only when main symbol is found, + # so these needs to be inserted when linking static D libraries. + if self.arch == 'x86_64': + return ['phobos64.lib'] + elif self.arch == 'x86_mscoff': + return ['phobos32mscoff.lib'] + return ['phobos.lib'] + return [] + + def get_std_shared_lib_link_args(self): + libname = 'libphobos2.so' + if self.info.is_windows(): + if self.arch == 'x86_64': + libname = 'phobos64.lib' + elif self.arch == 'x86_mscoff': + libname = 'phobos32mscoff.lib' + else: + libname = 'phobos.lib' + return ['-shared', '-defaultlib=' + libname] + + def get_target_arch_args(self): + # DMD32 and DMD64 on 64-bit Windows defaults to 32-bit (OMF). + # Force the target to 64-bit in order to stay consistent + # across the different platforms. + if self.info.is_windows(): + if self.arch == 'x86_64': + return ['-m64'] + elif self.arch == 'x86_mscoff': + return ['-m32mscoff'] + return ['-m32'] + return [] + + def get_crt_compile_args(self, crt_val, buildtype): + return self.get_crt_args(crt_val, buildtype) + + def unix_args_to_native(self, args): + return self.translate_args_to_nongnu(args) + + def get_optimization_args(self, optimization_level): + return dmd_optimization_args[optimization_level] + + def can_linker_accept_rsp(self) -> bool: + return False + + def get_linker_always_args(self) -> T.List[str]: + args = super().get_linker_always_args() + if self.info.is_windows(): + return args + return args + ['-defaultlib=phobos2', '-debuglib=phobos2'] + + def get_disable_assert_args(self) -> T.List[str]: + return ['-release'] diff --git a/meson/mesonbuild/compilers/fortran.py b/meson/mesonbuild/compilers/fortran.py new file mode 100644 index 000000000..af83c0e56 --- /dev/null +++ b/meson/mesonbuild/compilers/fortran.py @@ -0,0 +1,470 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +import typing as T +import subprocess, os + +from .. import coredata +from .compilers import ( + clike_debug_args, + Compiler, +) +from .mixins.clike import CLikeCompiler +from .mixins.gnu import ( + GnuCompiler, gnulike_buildtype_args, gnu_optimization_args, +) +from .mixins.intel import IntelGnuLikeCompiler, IntelVisualStudioLikeCompiler +from .mixins.clang import ClangCompiler +from .mixins.elbrus import ElbrusCompiler +from .mixins.pgi import PGICompiler +from .. import mlog + +from mesonbuild.mesonlib import ( + version_compare, EnvironmentException, MesonException, MachineChoice, LibType +) + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + + +class FortranCompiler(CLikeCompiler, Compiler): + + language = 'fortran' + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + Compiler.__init__(self, exelist, version, for_machine, info, **kwargs) + CLikeCompiler.__init__(self, is_cross, exe_wrapper) + self.id = 'unknown' + + def has_function(self, funcname, prefix, env, *, extra_args=None, dependencies=None): + raise MesonException('Fortran does not have "has_function" capability.\n' + 'It is better to test if a Fortran capability is working like:\n\n' + "meson.get_compiler('fortran').links('block; end block; end program')\n\n" + 'that example is to see if the compiler has Fortran 2008 Block element.') + + def sanity_check(self, work_dir: Path, environment): + """ + Check to be sure a minimal program can compile and execute + with this compiler & platform. + """ + work_dir = Path(work_dir) + source_name = work_dir / 'sanitycheckf.f90' + binary_name = work_dir / 'sanitycheckf' + if binary_name.is_file(): + binary_name.unlink() + + source_name.write_text('print *, "Fortran compilation is working."; end') + + extra_flags = [] + extra_flags += environment.coredata.get_external_args(self.for_machine, self.language) + extra_flags += environment.coredata.get_external_link_args(self.for_machine, self.language) + extra_flags += self.get_always_args() + # %% build the test executable "sanitycheckf" + # cwd=work_dir is necessary on Windows especially for Intel compilers to avoid error: cannot write on sanitycheckf.obj + # this is a defect with how Windows handles files and ifort's object file-writing behavior vis concurrent ProcessPoolExecutor. + # This simple workaround solves the issue. + # FIXME: cwd=str(work_dir) is for Python 3.5 on Windows, when 3.5 is deprcated, this can become cwd=work_dir + returncode = subprocess.run(self.exelist + extra_flags + [str(source_name), '-o', str(binary_name)], + cwd=str(work_dir)).returncode + if returncode != 0: + raise EnvironmentException('Compiler %s can not compile programs.' % self.name_string()) + if self.is_cross: + if self.exe_wrapper is None: + # Can't check if the binaries run so we have to assume they do + return + cmdlist = self.exe_wrapper + [str(binary_name)] + else: + cmdlist = [str(binary_name)] + # %% Run the test executable + try: + returncode = subprocess.run(cmdlist, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode + if returncode != 0: + raise EnvironmentException('Executables created by Fortran compiler %s are not runnable.' % self.name_string()) + except OSError: + raise EnvironmentException('Executables created by Fortran compiler %s are not runnable.' % self.name_string()) + + def get_std_warn_args(self, level): + return FortranCompiler.std_warn_args + + def get_buildtype_args(self, buildtype): + return gnulike_buildtype_args[buildtype] + + def get_optimization_args(self, optimization_level): + return gnu_optimization_args[optimization_level] + + def get_debug_args(self, is_debug): + return clike_debug_args[is_debug] + + def get_dependency_gen_args(self, outtarget, outfile): + return [] + + def get_preprocess_only_args(self): + return ['-cpp'] + super().get_preprocess_only_args() + + def get_module_incdir_args(self): + return ('-I', ) + + def get_module_outdir_args(self, path): + return ['-module', path] + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + for idx, i in enumerate(parameter_list): + if i[:2] == '-I' or i[:2] == '-L': + parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:])) + + return parameter_list + + def module_name_to_filename(self, module_name: str) -> str: + if '_' in module_name: # submodule + s = module_name.lower() + if self.id in ('gcc', 'intel', 'intel-cl'): + filename = s.replace('_', '@') + '.smod' + elif self.id in ('pgi', 'flang'): + filename = s.replace('_', '-') + '.mod' + else: + filename = s + '.mod' + else: # module + filename = module_name.lower() + '.mod' + + return filename + + def find_library(self, libname, env, extra_dirs, libtype: LibType = LibType.PREFER_SHARED): + code = 'stop; end program' + return self.find_library_impl(libname, env, extra_dirs, code, libtype) + + def has_multi_arguments(self, args: T.Sequence[str], env): + for arg in args[:]: + # some compilers, e.g. GCC, don't warn for unsupported warning-disable + # flags, so when we are testing a flag like "-Wno-forgotten-towel", also + # check the equivalent enable flag too "-Wforgotten-towel" + # GCC does error for "-fno-foobar" + if arg.startswith('-Wno-'): + args.append('-W' + arg[5:]) + if arg.startswith('-Wl,'): + mlog.warning('{} looks like a linker argument, ' + 'but has_argument and other similar methods only ' + 'support checking compiler arguments. Using them ' + 'to check linker arguments are never supported, ' + 'and results are likely to be wrong regardless of ' + 'the compiler you are using. has_link_argument or ' + 'other similar method can be used instead.' + .format(arg)) + code = 'stop; end program' + return self.has_arguments(args, env, code, mode='compile') + + +class GnuFortranCompiler(GnuCompiler, FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines=None, **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + GnuCompiler.__init__(self, defines) + default_warn_args = ['-Wall'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-Wpedantic', '-fimplicit-none']} + + def get_options(self): + opts = FortranCompiler.get_options(self) + fortran_stds = ['legacy', 'f95', 'f2003'] + if version_compare(self.version, '>=4.4.0'): + fortran_stds += ['f2008'] + if version_compare(self.version, '>=8.0.0'): + fortran_stds += ['f2018'] + opts.update({ + 'std': coredata.UserComboOption( + 'Fortran language standard to use', + ['none'] + fortran_stds, + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options) -> T.List[str]: + args = [] + std = options['std'] + if std.value != 'none': + args.append('-std=' + std.value) + return args + + def get_dependency_gen_args(self, outtarget, outfile) -> T.List[str]: + # Disabled until this is fixed: + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62162 + # return ['-cpp', '-MD', '-MQ', outtarget] + return [] + + def get_module_outdir_args(self, path: str) -> T.List[str]: + return ['-J' + path] + + def language_stdlib_only_link_flags(self) -> T.List[str]: + return ['-lgfortran', '-lm'] + + def has_header(self, hname, prefix, env, *, extra_args=None, dependencies=None, disable_cache=False): + ''' + Derived from mixins/clike.py:has_header, but without C-style usage of + __has_include which breaks with GCC-Fortran 10: + https://github.com/mesonbuild/meson/issues/7017 + ''' + fargs = {'prefix': prefix, 'header': hname} + code = '{prefix}\n#include <{header}>' + return self.compiles(code.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies, mode='preprocess', disable_cache=disable_cache) + + +class ElbrusFortranCompiler(GnuFortranCompiler, ElbrusCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + defines=None, **kwargs): + GnuFortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, defines, + **kwargs) + ElbrusCompiler.__init__(self) + +class G95FortranCompiler(FortranCompiler): + + LINKER_PREFIX = '-Wl,' + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + self.id = 'g95' + default_warn_args = ['-Wall'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-Wextra'], + '3': default_warn_args + ['-Wextra', '-pedantic']} + + def get_module_outdir_args(self, path: str) -> T.List[str]: + return ['-fmod=' + path] + + def get_no_warn_args(self): + # FIXME: Confirm that there's no compiler option to disable all warnings + return [] + + +class SunFortranCompiler(FortranCompiler): + + LINKER_PREFIX = '-Wl,' + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrapper, **kwargs) + self.id = 'sun' + + def get_dependency_gen_args(self, outtarget, outfile) -> T.List[str]: + return ['-fpp'] + + def get_always_args(self): + return [] + + def get_warn_args(self, level): + return [] + + def get_module_incdir_args(self): + return ('-M', ) + + def get_module_outdir_args(self, path: str) -> T.List[str]: + return ['-moddir=' + path] + + def openmp_flags(self) -> T.List[str]: + return ['-xopenmp'] + + +class IntelFortranCompiler(IntelGnuLikeCompiler, FortranCompiler): + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + self.file_suffixes = ('f90', 'f', 'for', 'ftn', 'fpp') + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + # FIXME: Add support for OS X and Windows in detect_fortran_compiler so + # we are sent the type of compiler + IntelGnuLikeCompiler.__init__(self) + self.id = 'intel' + default_warn_args = ['-warn', 'general', '-warn', 'truncated_source'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['-warn', 'unused'], + '3': ['-warn', 'all']} + + def get_options(self): + opts = FortranCompiler.get_options(self) + fortran_stds = ['legacy', 'f95', 'f2003', 'f2008', 'f2018'] + opts.update({ + 'std': coredata.UserComboOption( + 'Fortran language standard to use', + ['none'] + fortran_stds, + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options) -> T.List[str]: + args = [] + std = options['std'] + stds = {'legacy': 'none', 'f95': 'f95', 'f2003': 'f03', 'f2008': 'f08', 'f2018': 'f18'} + if std.value != 'none': + args.append('-stand=' + stds[std.value]) + return args + + def get_preprocess_only_args(self) -> T.List[str]: + return ['-cpp', '-EP'] + + def get_always_args(self): + """Ifort doesn't have -pipe.""" + val = super().get_always_args() + val.remove('-pipe') + return val + + def language_stdlib_only_link_flags(self) -> T.List[str]: + return ['-lifcore', '-limf'] + + def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: + return ['-gen-dep=' + outtarget, '-gen-depformat=make'] + + +class IntelClFortranCompiler(IntelVisualStudioLikeCompiler, FortranCompiler): + + file_suffixes = ['f90', 'f', 'for', 'ftn', 'fpp'] + always_args = ['/nologo'] + + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, target: str, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + IntelVisualStudioLikeCompiler.__init__(self, target) + + default_warn_args = ['/warn:general', '/warn:truncated_source'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + ['/warn:unused'], + '3': ['/warn:all']} + + def get_options(self): + opts = FortranCompiler.get_options(self) + fortran_stds = ['legacy', 'f95', 'f2003', 'f2008', 'f2018'] + opts.update({ + 'std': coredata.UserComboOption( + 'Fortran language standard to use', + ['none'] + fortran_stds, + 'none', + ), + }) + return opts + + def get_option_compile_args(self, options) -> T.List[str]: + args = [] + std = options['std'] + stds = {'legacy': 'none', 'f95': 'f95', 'f2003': 'f03', 'f2008': 'f08', 'f2018': 'f18'} + if std.value != 'none': + args.append('/stand:' + stds[std.value]) + return args + + def get_module_outdir_args(self, path) -> T.List[str]: + return ['/module:' + path] + + +class PathScaleFortranCompiler(FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + self.id = 'pathscale' + default_warn_args = ['-fullwarn'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args, + '3': default_warn_args} + + def openmp_flags(self) -> T.List[str]: + return ['-mp'] + + +class PGIFortranCompiler(PGICompiler, FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + PGICompiler.__init__(self) + + default_warn_args = ['-Minform=inform'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args, + '3': default_warn_args + ['-Mdclchk']} + + def language_stdlib_only_link_flags(self) -> T.List[str]: + return ['-lpgf90rtl', '-lpgf90', '-lpgf90_rpm1', '-lpgf902', + '-lpgf90rtl', '-lpgftnrtl', '-lrt'] + +class FlangFortranCompiler(ClangCompiler, FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + ClangCompiler.__init__(self, []) + self.id = 'flang' + default_warn_args = ['-Minform=inform'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args, + '3': default_warn_args} + + def language_stdlib_only_link_flags(self) -> T.List[str]: + return ['-lflang', '-lpgmath'] + +class Open64FortranCompiler(FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + self.id = 'open64' + default_warn_args = ['-fullwarn'] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args, + '3': default_warn_args} + + def openmp_flags(self) -> T.List[str]: + return ['-mp'] + + +class NAGFortranCompiler(FortranCompiler): + def __init__(self, exelist, version, for_machine: MachineChoice, + is_cross, info: 'MachineInfo', exe_wrapper=None, + **kwargs): + FortranCompiler.__init__(self, exelist, version, for_machine, + is_cross, info, exe_wrapper, **kwargs) + self.id = 'nagfor' + + def get_warn_args(self, level): + return [] + + def get_module_outdir_args(self, path) -> T.List[str]: + return ['-mdir', path] + + def openmp_flags(self) -> T.List[str]: + return ['-openmp'] diff --git a/meson/mesonbuild/compilers/java.py b/meson/mesonbuild/compilers/java.py new file mode 100644 index 000000000..5aeb2508c --- /dev/null +++ b/meson/mesonbuild/compilers/java.py @@ -0,0 +1,119 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path +import shutil +import subprocess +import typing as T + +from ..mesonlib import EnvironmentException, MachineChoice +from .compilers import Compiler, java_buildtype_args +from .mixins.islinker import BasicLinkerIsCompilerMixin + +if T.TYPE_CHECKING: + from ..envconfig import MachineInfo + +class JavaCompiler(BasicLinkerIsCompilerMixin, Compiler): + + language = 'java' + + def __init__(self, exelist, version, for_machine: MachineChoice, + info: 'MachineInfo'): + super().__init__(exelist, version, for_machine, info) + self.id = 'unknown' + self.is_cross = False + self.javarunner = 'java' + + def get_werror_args(self): + return ['-Werror'] + + def split_shlib_to_parts(self, fname): + return None, fname + + def get_dependency_gen_args(self, outtarget, outfile): + return [] + + def get_compile_only_args(self): + return [] + + def get_output_args(self, subdir): + if subdir == '': + subdir = './' + return ['-d', subdir, '-s', subdir] + + def get_coverage_args(self): + return [] + + def get_std_exe_link_args(self): + return [] + + def get_include_args(self, path): + return [] + + def get_pic_args(self): + return [] + + def name_string(self): + return ' '.join(self.exelist) + + def get_pch_use_args(self, pch_dir, header): + return [] + + def get_pch_name(self, header_name): + return '' + + def get_buildtype_args(self, buildtype): + return java_buildtype_args[buildtype] + + def compute_parameters_with_absolute_paths(self, parameter_list, build_dir): + for idx, i in enumerate(parameter_list): + if i in ['-cp', '-classpath', '-sourcepath'] and idx + 1 < len(parameter_list): + path_list = parameter_list[idx + 1].split(os.pathsep) + path_list = [os.path.normpath(os.path.join(build_dir, x)) for x in path_list] + parameter_list[idx + 1] = os.pathsep.join(path_list) + + return parameter_list + + def sanity_check(self, work_dir, environment): + src = 'SanityCheck.java' + obj = 'SanityCheck' + source_name = os.path.join(work_dir, src) + with open(source_name, 'w') as ofile: + ofile.write('''class SanityCheck { + public static void main(String[] args) { + int i; + } +} +''') + pc = subprocess.Popen(self.exelist + [src], cwd=work_dir) + pc.wait() + if pc.returncode != 0: + raise EnvironmentException('Java compiler %s can not compile programs.' % self.name_string()) + runner = shutil.which(self.javarunner) + if runner: + cmdlist = [runner, obj] + pe = subprocess.Popen(cmdlist, cwd=work_dir) + pe.wait() + if pe.returncode != 0: + raise EnvironmentException('Executables created by Java compiler %s are not runnable.' % self.name_string()) + else: + m = "Java Virtual Machine wasn't found, but it's needed by Meson. " \ + "Please install a JRE.\nIf you have specific needs where this " \ + "requirement doesn't make sense, please open a bug at " \ + "https://github.com/mesonbuild/meson/issues/new and tell us " \ + "all about it." + raise EnvironmentException(m) + + def needs_static_linker(self): + return False diff --git a/meson/mesonbuild/compilers/mixins/__init__.py b/meson/mesonbuild/compilers/mixins/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/meson/mesonbuild/compilers/mixins/arm.py b/meson/mesonbuild/compilers/mixins/arm.py new file mode 100644 index 000000000..b331d8fae --- /dev/null +++ b/meson/mesonbuild/compilers/mixins/arm.py @@ -0,0 +1,197 @@ +# Copyright 2012-2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Representations specific to the arm family of compilers.""" + +import os +import re +import typing as T + +from ... import mesonlib +from ..compilers import clike_debug_args +from .clang import clang_color_args + +if T.TYPE_CHECKING: + from ...environment import Environment + +arm_buildtype_args = { + 'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], +} # type: T.Dict[str, T.List[str]] + +arm_optimization_args = { + '0': ['-O0'], + 'g': ['-g'], + '1': ['-O1'], + '2': [], # Compiler defaults to -O2 + '3': ['-O3', '-Otime'], + 's': ['-O3'], # Compiler defaults to -Ospace +} # type: T.Dict[str, T.List[str]] + +armclang_buildtype_args = { + 'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], +} # type: T.Dict[str, T.List[str]] + +armclang_optimization_args = { + '0': [], # Compiler defaults to -O0 + 'g': ['-g'], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-Oz'] +} # type: T.Dict[str, T.List[str]] + + +class ArmCompiler: + # Functionality that is common to all ARM family compilers. + def __init__(self): + if not self.is_cross: + raise mesonlib.EnvironmentException('armcc supports only cross-compilation.') + self.id = 'arm' + default_warn_args = [] # type: T.List[str] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + [], + '3': default_warn_args + []} + # Assembly + self.can_compile_suffixes.add('s') + + def get_pic_args(self) -> T.List[str]: + # FIXME: Add /ropi, /rwpi, /fpic etc. qualifiers to --apcs + return [] + + def get_buildtype_args(self, buildtype: str) -> T.List[str]: + return arm_buildtype_args[buildtype] + + # Override CCompiler.get_always_args + def get_always_args(self) -> T.List[str]: + return [] + + # Override CCompiler.get_dependency_gen_args + def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: + return ['--depend_target', outtarget, '--depend', outfile, '--depend_single_line'] + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + # FIXME: Add required arguments + # NOTE from armcc user guide: + # "Support for Precompiled Header (PCH) files is deprecated from ARM Compiler 5.05 + # onwards on all platforms. Note that ARM Compiler on Windows 8 never supported + # PCH files." + return [] + + def get_pch_suffix(self) -> str: + # NOTE from armcc user guide: + # "Support for Precompiled Header (PCH) files is deprecated from ARM Compiler 5.05 + # onwards on all platforms. Note that ARM Compiler on Windows 8 never supported + # PCH files." + return 'pch' + + def thread_flags(self, env: 'Environment') -> T.List[str]: + return [] + + def get_coverage_args(self) -> T.List[str]: + return [] + + def get_optimization_args(self, optimization_level: str) -> T.List[str]: + return arm_optimization_args[optimization_level] + + def get_debug_args(self, is_debug: bool) -> T.List[str]: + return clike_debug_args[is_debug] + + def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]: + for idx, i in enumerate(parameter_list): + if i[:2] == '-I' or i[:2] == '-L': + parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:])) + + return parameter_list + + +class ArmclangCompiler: + def __init__(self): + if not self.is_cross: + raise mesonlib.EnvironmentException('armclang supports only cross-compilation.') + # Check whether 'armlink' is available in path + self.linker_exe = 'armlink' + args = '--vsn' + try: + p, stdo, stderr = mesonlib.Popen_safe(self.linker_exe, args) + except OSError as e: + err_msg = 'Unknown linker\nRunning "{0}" gave \n"{1}"'.format(' '.join([self.linker_exe] + [args]), e) + raise mesonlib.EnvironmentException(err_msg) + # Verify the armlink version + ver_str = re.search('.*Component.*', stdo) + if ver_str: + ver_str = ver_str.group(0) + else: + raise mesonlib.EnvironmentException('armlink version string not found') + assert ver_str # makes mypy happy + # Using the regular expression from environment.search_version, + # which is used for searching compiler version + version_regex = r'(? T.List[str]: + # PIC support is not enabled by default for ARM, + # if users want to use it, they need to add the required arguments explicitly + return [] + + def get_colorout_args(self, colortype: str) -> T.List[str]: + return clang_color_args[colortype][:] + + def get_buildtype_args(self, buildtype: str) -> T.List[str]: + return armclang_buildtype_args[buildtype] + + def get_pch_suffix(self) -> str: + return 'gch' + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + # Workaround for Clang bug http://llvm.org/bugs/show_bug.cgi?id=15136 + # This flag is internal to Clang (or at least not documented on the man page) + # so it might change semantics at any time. + return ['-include-pch', os.path.join(pch_dir, self.get_pch_name(header))] + + # Override CCompiler.get_dependency_gen_args + def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: + return ['-MD', '-MT', outtarget, '-MF', outfile] + + def get_optimization_args(self, optimization_level: str) -> T.List[str]: + return armclang_optimization_args[optimization_level] + + def get_debug_args(self, is_debug: bool) -> T.List[str]: + return clike_debug_args[is_debug] + + def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]: + for idx, i in enumerate(parameter_list): + if i[:2] == '-I' or i[:2] == '-L': + parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:])) + + return parameter_list diff --git a/meson/mesonbuild/compilers/mixins/c2000.py b/meson/mesonbuild/compilers/mixins/c2000.py new file mode 100644 index 000000000..65a2ceae4 --- /dev/null +++ b/meson/mesonbuild/compilers/mixins/c2000.py @@ -0,0 +1,120 @@ +# Copyright 2012-2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Representations specific to the Texas Instruments C2000 compiler family.""" + +import os +import typing as T + +from ...mesonlib import EnvironmentException + +if T.TYPE_CHECKING: + from ...environment import Environment + +c2000_buildtype_args = { + 'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], +} # type: T.Dict[str, T.List[str]] + +c2000_optimization_args = { + '0': ['-O0'], + 'g': ['-Ooff'], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-04'] +} # type: T.Dict[str, T.List[str]] + +c2000_debug_args = { + False: [], + True: [] +} # type: T.Dict[bool, T.List[str]] + + +class C2000Compiler: + def __init__(self): + if not self.is_cross: + raise EnvironmentException('c2000 supports only cross-compilation.') + self.id = 'c2000' + # Assembly + self.can_compile_suffixes.add('asm') + default_warn_args = [] # type: T.List[str] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + [], + '3': default_warn_args + []} + + def get_pic_args(self) -> T.List[str]: + # PIC support is not enabled by default for c2000, + # if users want to use it, they need to add the required arguments explicitly + return [] + + def get_buildtype_args(self, buildtype: str) -> T.List[str]: + return c2000_buildtype_args[buildtype] + + def get_pch_suffix(self) -> str: + return 'pch' + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + return [] + + # Override CCompiler.get_dependency_gen_args + def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: + return [] + + def thread_flags(self, env: 'Environment') -> T.List[str]: + return [] + + def get_coverage_args(self) -> T.List[str]: + return [] + + def get_no_stdinc_args(self) -> T.List[str]: + return [] + + def get_no_stdlib_link_args(self) -> T.List[str]: + return [] + + def get_optimization_args(self, optimization_level: str) -> T.List[str]: + return c2000_optimization_args[optimization_level] + + def get_debug_args(self, is_debug: bool) -> T.List[str]: + return c2000_debug_args[is_debug] + + @classmethod + def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]: + result = [] + for i in args: + if i.startswith('-D'): + i = '-define=' + i[2:] + if i.startswith('-I'): + i = '-include=' + i[2:] + if i.startswith('-Wl,-rpath='): + continue + elif i == '--print-search-dirs': + continue + elif i.startswith('-L'): + continue + result.append(i) + return result + + def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]: + for idx, i in enumerate(parameter_list): + if i[:9] == '-include=': + parameter_list[idx] = i[:9] + os.path.normpath(os.path.join(build_dir, i[9:])) + + return parameter_list diff --git a/meson/mesonbuild/compilers/mixins/ccrx.py b/meson/mesonbuild/compilers/mixins/ccrx.py new file mode 100644 index 000000000..b8592158a --- /dev/null +++ b/meson/mesonbuild/compilers/mixins/ccrx.py @@ -0,0 +1,122 @@ +# Copyright 2012-2019 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Representations specific to the Renesas CC-RX compiler family.""" + +import os +import typing as T + +from ...mesonlib import EnvironmentException + +if T.TYPE_CHECKING: + from ...environment import Environment + +ccrx_buildtype_args = { + 'plain': [], + 'debug': [], + 'debugoptimized': [], + 'release': [], + 'minsize': [], + 'custom': [], +} # type: T.Dict[str, T.List[str]] + +ccrx_optimization_args = { + '0': ['-optimize=0'], + 'g': ['-optimize=0'], + '1': ['-optimize=1'], + '2': ['-optimize=2'], + '3': ['-optimize=max'], + 's': ['-optimize=2', '-size'] +} # type: T.Dict[str, T.List[str]] + +ccrx_debug_args = { + False: [], + True: ['-debug'] +} # type: T.Dict[bool, T.List[str]] + + +class CcrxCompiler: + def __init__(self): + if not self.is_cross: + raise EnvironmentException('ccrx supports only cross-compilation.') + self.id = 'ccrx' + # Assembly + self.can_compile_suffixes.add('src') + default_warn_args = [] # type: T.List[str] + self.warn_args = {'0': [], + '1': default_warn_args, + '2': default_warn_args + [], + '3': default_warn_args + []} + + def get_pic_args(self) -> T.List[str]: + # PIC support is not enabled by default for CCRX, + # if users want to use it, they need to add the required arguments explicitly + return [] + + def get_buildtype_args(self, buildtype: str) -> T.List[str]: + return ccrx_buildtype_args[buildtype] + + def get_pch_suffix(self) -> str: + return 'pch' + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + return [] + + # Override CCompiler.get_dependency_gen_args + def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: + return [] + + def thread_flags(self, env: 'Environment') -> T.List[str]: + return [] + + def get_coverage_args(self) -> T.List[str]: + return [] + + def get_no_stdinc_args(self) -> T.List[str]: + return [] + + def get_no_stdlib_link_args(self) -> T.List[str]: + return [] + + def get_optimization_args(self, optimization_level: str) -> T.List[str]: + return ccrx_optimization_args[optimization_level] + + def get_debug_args(self, is_debug: bool) -> T.List[str]: + return ccrx_debug_args[is_debug] + + @classmethod + def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]: + result = [] + for i in args: + if i.startswith('-D'): + i = '-define=' + i[2:] + if i.startswith('-I'): + i = '-include=' + i[2:] + if i.startswith('-Wl,-rpath='): + continue + elif i == '--print-search-dirs': + continue + elif i.startswith('-L'): + continue + elif not i.startswith('-lib=') and i.endswith(('.a', '.lib')): + i = '-lib=' + i + result.append(i) + return result + + def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]: + for idx, i in enumerate(parameter_list): + if i[:9] == '-include=': + parameter_list[idx] = i[:9] + os.path.normpath(os.path.join(build_dir, i[9:])) + + return parameter_list diff --git a/meson/mesonbuild/compilers/mixins/clang.py b/meson/mesonbuild/compilers/mixins/clang.py new file mode 100644 index 000000000..ecfbc6469 --- /dev/null +++ b/meson/mesonbuild/compilers/mixins/clang.py @@ -0,0 +1,129 @@ +# Copyright 2019 The meson development team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstractions for the LLVM/Clang compiler family.""" + +import os +import shutil +import typing as T + +from ... import mesonlib +from ...linkers import AppleDynamicLinker +from .gnu import GnuLikeCompiler + +if T.TYPE_CHECKING: + from ...environment import Environment + from ...dependencies import Dependency # noqa: F401 + +clang_color_args = { + 'auto': ['-Xclang', '-fcolor-diagnostics'], + 'always': ['-Xclang', '-fcolor-diagnostics'], + 'never': ['-Xclang', '-fno-color-diagnostics'], +} # type: T.Dict[str, T.List[str]] + +clang_optimization_args = { + '0': [], + 'g': ['-Og'], + '1': ['-O1'], + '2': ['-O2'], + '3': ['-O3'], + 's': ['-Os'], +} # type: T.Dict[str, T.List[str]] + +class ClangCompiler(GnuLikeCompiler): + def __init__(self, defines: T.Optional[T.Dict[str, str]]): + super().__init__() + self.id = 'clang' + self.defines = defines or {} + self.base_options.append('b_colorout') + # TODO: this really should be part of the linker base_options, but + # linkers don't have base_options. + if isinstance(self.linker, AppleDynamicLinker): + self.base_options.append('b_bitcode') + # All Clang backends can also do LLVM IR + self.can_compile_suffixes.add('ll') + + def get_colorout_args(self, colortype: str) -> T.List[str]: + return clang_color_args[colortype][:] + + def has_builtin_define(self, define: str) -> bool: + return define in self.defines + + def get_builtin_define(self, define: str) -> T.Optional[str]: + return self.defines.get(define) + + def get_optimization_args(self, optimization_level: str) -> T.List[str]: + return clang_optimization_args[optimization_level] + + def get_pch_suffix(self) -> str: + return 'pch' + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + # Workaround for Clang bug http://llvm.org/bugs/show_bug.cgi?id=15136 + # This flag is internal to Clang (or at least not documented on the man page) + # so it might change semantics at any time. + return ['-include-pch', os.path.join(pch_dir, self.get_pch_name(header))] + + def has_multi_arguments(self, args: T.List[str], env: 'Environment') -> T.List[str]: + myargs = ['-Werror=unknown-warning-option', '-Werror=unused-command-line-argument'] + if mesonlib.version_compare(self.version, '>=3.6.0'): + myargs.append('-Werror=ignored-optimization-argument') + return super().has_multi_arguments( + myargs + args, + env) + + def has_function(self, funcname: str, prefix: str, env: 'Environment', *, + extra_args: T.Optional[T.List[str]] = None, + dependencies: T.Optional[T.List['Dependency']] = None) -> bool: + if extra_args is None: + extra_args = [] + # Starting with XCode 8, we need to pass this to force linker + # visibility to obey OS X/iOS/tvOS minimum version targets with + # -mmacosx-version-min, -miphoneos-version-min, -mtvos-version-min etc. + # https://github.com/Homebrew/homebrew-core/issues/3727 + # TODO: this really should be communicated by the linker + if isinstance(self.linker, AppleDynamicLinker) and mesonlib.version_compare(self.version, '>=8.0'): + extra_args.append('-Wl,-no_weak_imports') + return super().has_function(funcname, prefix, env, extra_args=extra_args, + dependencies=dependencies) + + def openmp_flags(self) -> T.List[str]: + if mesonlib.version_compare(self.version, '>=3.8.0'): + return ['-fopenmp'] + elif mesonlib.version_compare(self.version, '>=3.7.0'): + return ['-fopenmp=libomp'] + else: + # Shouldn't work, but it'll be checked explicitly in the OpenMP dependency. + return [] + + @classmethod + def use_linker_args(cls, linker: str) -> T.List[str]: + # Clang additionally can use a linker specified as a path, which GCC + # (and other gcc-like compilers) cannot. This is becuse clang (being + # llvm based) is retargetable, while GCC is not. + # + if shutil.which(linker): + if not shutil.which(linker): + raise mesonlib.MesonException( + 'Cannot find linker {}.'.format(linker)) + return ['-fuse-ld={}'.format(linker)] + return super().use_linker_args(linker) + + def get_has_func_attribute_extra_args(self, name): + # Clang only warns about unknown or ignored attributes, so force an + # error. + return ['-Werror=attributes'] + + def get_coverage_link_args(self) -> T.List[str]: + return ['--coverage'] diff --git a/meson/mesonbuild/compilers/mixins/clike.py b/meson/mesonbuild/compilers/mixins/clike.py new file mode 100644 index 000000000..d5ea8d1ed --- /dev/null +++ b/meson/mesonbuild/compilers/mixins/clike.py @@ -0,0 +1,1226 @@ +# Copyright 2012-2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Mixin classes to be shared between C and C++ compilers. + +Without this we'll end up with awful diamond inherintance problems. The goal +of this is to have mixin's, which are classes that are designed *not* to be +standalone, they only work through inheritance. +""" + +import functools +import glob +import itertools +import os +import re +import subprocess +import typing as T +from pathlib import Path + +from ... import arglist +from ... import mesonlib +from ... import mlog +from ...linkers import GnuLikeDynamicLinkerMixin, SolarisDynamicLinker +from ...mesonlib import LibType +from .. import compilers +from .visualstudio import VisualStudioLikeCompiler + +if T.TYPE_CHECKING: + from ...environment import Environment + +SOREGEX = re.compile(r'.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$') + +class CLikeCompilerArgs(arglist.CompilerArgs): + prepend_prefixes = ('-I', '-L') + dedup2_prefixes = ('-I', '-isystem', '-L', '-D', '-U') + + # NOTE: not thorough. A list of potential corner cases can be found in + # https://github.com/mesonbuild/meson/pull/4593#pullrequestreview-182016038 + dedup1_prefixes = ('-l', '-Wl,-l', '-Wl,--export-dynamic') + dedup1_suffixes = ('.lib', '.dll', '.so', '.dylib', '.a') + dedup1_args = ('-c', '-S', '-E', '-pipe', '-pthread') + + def to_native(self, copy: bool = False) -> T.List[str]: + # Check if we need to add --start/end-group for circular dependencies + # between static libraries, and for recursively searching for symbols + # needed by static libraries that are provided by object files or + # shared libraries. + self.flush_pre_post() + if copy: + new = self.copy() + else: + new = self + # This covers all ld.bfd, ld.gold, ld.gold, and xild on Linux, which + # all act like (or are) gnu ld + # TODO: this could probably be added to the DynamicLinker instead + if isinstance(self.compiler.linker, (GnuLikeDynamicLinkerMixin, SolarisDynamicLinker)): + group_start = -1 + group_end = -1 + for i, each in enumerate(new): + if not each.startswith(('-Wl,-l', '-l')) and not each.endswith('.a') and \ + not SOREGEX.match(each): + continue + group_end = i + if group_start < 0: + # First occurrence of a library + group_start = i + if group_start >= 0: + # Last occurrence of a library + new.insert(group_end + 1, '-Wl,--end-group') + new.insert(group_start, '-Wl,--start-group') + # Remove system/default include paths added with -isystem + if hasattr(self.compiler, 'get_default_include_dirs'): + default_dirs = self.compiler.get_default_include_dirs() + bad_idx_list = [] # type: T.List[int] + for i, each in enumerate(new): + # Remove the -isystem and the path if the path is a default path + if (each == '-isystem' and + i < (len(new) - 1) and + new[i + 1] in default_dirs): + bad_idx_list += [i, i + 1] + elif each.startswith('-isystem=') and each[9:] in default_dirs: + bad_idx_list += [i] + elif each.startswith('-isystem') and each[8:] in default_dirs: + bad_idx_list += [i] + for i in reversed(bad_idx_list): + new.pop(i) + return self.compiler.unix_args_to_native(new._container) + + def __repr__(self) -> str: + self.flush_pre_post() + return 'CLikeCompilerArgs({!r}, {!r})'.format(self.compiler, self._container) + + +class CLikeCompiler: + + """Shared bits for the C and CPP Compilers.""" + + # TODO: Replace this manual cache with functools.lru_cache + library_dirs_cache = {} + program_dirs_cache = {} + find_library_cache = {} + find_framework_cache = {} + internal_libs = arglist.UNIXY_COMPILER_INTERNAL_LIBS + + def __init__(self, is_cross: bool, exe_wrapper: T.Optional[str] = None): + # If a child ObjC or CPP class has already set it, don't set it ourselves + self.is_cross = is_cross + self.can_compile_suffixes.add('h') + # If the exe wrapper was not found, pretend it wasn't set so that the + # sanity check is skipped and compiler checks use fallbacks. + if not exe_wrapper or not exe_wrapper.found() or not exe_wrapper.get_command(): + self.exe_wrapper = None + else: + self.exe_wrapper = exe_wrapper.get_command() + + def compiler_args(self, args: T.Optional[T.Iterable[str]] = None) -> CLikeCompilerArgs: + return CLikeCompilerArgs(self, args) + + def needs_static_linker(self): + return True # When compiling static libraries, so yes. + + def get_always_args(self): + ''' + Args that are always-on for all C compilers other than MSVC + ''' + return ['-pipe'] + compilers.get_largefile_args(self) + + def get_no_stdinc_args(self): + return ['-nostdinc'] + + def get_no_stdlib_link_args(self): + return ['-nostdlib'] + + def get_warn_args(self, level): + return self.warn_args[level] + + def get_no_warn_args(self): + # Almost every compiler uses this for disabling warnings + return ['-w'] + + def split_shlib_to_parts(self, fname): + return None, fname + + def depfile_for_object(self, objfile): + return objfile + '.' + self.get_depfile_suffix() + + def get_depfile_suffix(self): + return 'd' + + def get_exelist(self): + return self.exelist[:] + + def get_preprocess_only_args(self): + return ['-E', '-P'] + + def get_compile_only_args(self): + return ['-c'] + + def get_no_optimization_args(self): + return ['-O0'] + + def get_compiler_check_args(self): + ''' + Get arguments useful for compiler checks such as being permissive in + the code quality and not doing any optimization. + ''' + return self.get_no_optimization_args() + + def get_output_args(self, target): + return ['-o', target] + + def get_werror_args(self): + return ['-Werror'] + + def get_std_exe_link_args(self): + # TODO: is this a linker property? + return [] + + def get_include_args(self, path, is_system): + if path == '': + path = '.' + if is_system: + return ['-isystem', path] + return ['-I' + path] + + def get_compiler_dirs(self, env: 'Environment', name: str) -> T.List[str]: + ''' + Get dirs from the compiler, either `libraries:` or `programs:` + ''' + return [] + + @functools.lru_cache() + def get_library_dirs(self, env, elf_class = None): + dirs = self.get_compiler_dirs(env, 'libraries') + if elf_class is None or elf_class == 0: + return dirs + + # if we do have an elf class for 32-bit or 64-bit, we want to check that + # the directory in question contains libraries of the appropriate class. Since + # system directories aren't mixed, we only need to check one file for each + # directory and go by that. If we can't check the file for some reason, assume + # the compiler knows what it's doing, and accept the directory anyway. + retval = [] + for d in dirs: + files = [f for f in os.listdir(d) if f.endswith('.so') and os.path.isfile(os.path.join(d, f))] + # if no files, accept directory and move on + if not files: + retval.append(d) + continue + + for f in files: + file_to_check = os.path.join(d, f) + try: + with open(file_to_check, 'rb') as fd: + header = fd.read(5) + # if file is not an ELF file, it's weird, but accept dir + # if it is elf, and the class matches, accept dir + if header[1:4] != b'ELF' or int(header[4]) == elf_class: + retval.append(d) + # at this point, it's an ELF file which doesn't match the + # appropriate elf_class, so skip this one + # stop scanning after the first sucessful read + break + except OSError: + # Skip the file if we can't read it + pass + + return tuple(retval) + + @functools.lru_cache() + def get_program_dirs(self, env): + ''' + Programs used by the compiler. Also where toolchain DLLs such as + libstdc++-6.dll are found with MinGW. + ''' + return self.get_compiler_dirs(env, 'programs') + + def get_pic_args(self) -> T.List[str]: + return ['-fPIC'] + + def name_string(self) -> str: + return ' '.join(self.exelist) + + def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]: + return ['-include', os.path.basename(header)] + + def get_pch_name(self, header_name: str) -> str: + return os.path.basename(header_name) + '.' + self.get_pch_suffix() + + def get_linker_search_args(self, dirname: str) -> T.List[str]: + return self.linker.get_search_args(dirname) + + def get_default_include_dirs(self): + return [] + + def gen_export_dynamic_link_args(self, env: 'Environment') -> T.List[str]: + return self.linker.export_dynamic_args(env) + + def gen_import_library_args(self, implibname: str) -> T.List[str]: + return self.linker.import_library_args(implibname) + + def sanity_check_impl(self, work_dir, environment, sname, code): + mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) + mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) + + source_name = os.path.join(work_dir, sname) + binname = sname.rsplit('.', 1)[0] + mode = 'link' + if self.is_cross: + binname += '_cross' + if self.exe_wrapper is None: + # Linking cross built apps is painful. You can't really + # tell if you should use -nostdlib or not and for example + # on OSX the compiler binary is the same but you need + # a ton of compiler flags to differentiate between + # arm and x86_64. So just compile. + mode = 'compile' + cargs, largs = self._get_basic_compiler_args(environment, mode) + extra_flags = cargs + self.linker_to_compiler_args(largs) + + # Is a valid executable output for all toolchains and platforms + binname += '.exe' + # Write binary check source + binary_name = os.path.join(work_dir, binname) + with open(source_name, 'w') as ofile: + ofile.write(code) + # Compile sanity check + # NOTE: extra_flags must be added at the end. On MSVC, it might contain a '/link' argument + # after which all further arguments will be passed directly to the linker + cmdlist = self.exelist + [source_name] + self.get_output_args(binary_name) + extra_flags + pc, stdo, stde = mesonlib.Popen_safe(cmdlist, cwd=work_dir) + mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist)) + mlog.debug('Sanity check compile stdout:') + mlog.debug(stdo) + mlog.debug('-----\nSanity check compile stderr:') + mlog.debug(stde) + mlog.debug('-----') + if pc.returncode != 0: + raise mesonlib.EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string())) + # Run sanity check + if self.is_cross: + if self.exe_wrapper is None: + # Can't check if the binaries run so we have to assume they do + return + cmdlist = self.exe_wrapper + [binary_name] + else: + cmdlist = [binary_name] + mlog.debug('Running test binary command: ' + ' '.join(cmdlist)) + try: + pe = subprocess.Popen(cmdlist) + except Exception as e: + raise mesonlib.EnvironmentException('Could not invoke sanity test executable: %s.' % str(e)) + pe.wait() + if pe.returncode != 0: + raise mesonlib.EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string())) + + def sanity_check(self, work_dir, environment): + code = 'int main(void) { int class=0; return class; }\n' + return self.sanity_check_impl(work_dir, environment, 'sanitycheckc.c', code) + + def check_header(self, hname: str, prefix: str, env, *, extra_args=None, dependencies=None): + fargs = {'prefix': prefix, 'header': hname} + code = '''{prefix} + #include <{header}>''' + return self.compiles(code.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def has_header(self, hname: str, prefix: str, env, *, extra_args=None, dependencies=None, disable_cache: bool = False): + fargs = {'prefix': prefix, 'header': hname} + code = '''{prefix} + #ifdef __has_include + #if !__has_include("{header}") + #error "Header '{header}' could not be found" + #endif + #else + #include <{header}> + #endif''' + return self.compiles(code.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies, mode='preprocess', disable_cache=disable_cache) + + def has_header_symbol(self, hname: str, symbol: str, prefix: str, env, *, extra_args=None, dependencies=None): + fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} + t = '''{prefix} + #include <{header}> + int main(void) {{ + /* If it's not defined as a macro, try to use as a symbol */ + #ifndef {symbol} + {symbol}; + #endif + return 0; + }}''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def _get_basic_compiler_args(self, env, mode: str): + cargs, largs = [], [] + if mode == 'link': + # Sometimes we need to manually select the CRT to use with MSVC. + # One example is when trying to do a compiler check that involves + # linking with static libraries since MSVC won't select a CRT for + # us in that case and will error out asking us to pick one. + try: + crt_val = env.coredata.base_options['b_vscrt'].value + buildtype = env.coredata.builtins['buildtype'].value + cargs += self.get_crt_compile_args(crt_val, buildtype) + except (KeyError, AttributeError): + pass + + # Add CFLAGS/CXXFLAGS/OBJCFLAGS/OBJCXXFLAGS and CPPFLAGS from the env + sys_args = env.coredata.get_external_args(self.for_machine, self.language) + # Apparently it is a thing to inject linker flags both + # via CFLAGS _and_ LDFLAGS, even though the former are + # also used during linking. These flags can break + # argument checks. Thanks, Autotools. + cleaned_sys_args = self.remove_linkerlike_args(sys_args) + cargs += cleaned_sys_args + + if mode == 'link': + ld_value = env.lookup_binary_entry(self.for_machine, self.language + '_ld') + if ld_value is not None: + largs += self.use_linker_args(ld_value[0]) + + # Add LDFLAGS from the env + sys_ld_args = env.coredata.get_external_link_args(self.for_machine, self.language) + # CFLAGS and CXXFLAGS go to both linking and compiling, but we want them + # to only appear on the command line once. Remove dupes. + largs += [x for x in sys_ld_args if x not in sys_args] + + cargs += self.get_compiler_args_for_mode(mode) + return cargs, largs + + def _get_compiler_check_args(self, env, extra_args: list, dependencies, mode: str = 'compile') -> T.List[str]: + if extra_args is None: + extra_args = [] + else: + extra_args = mesonlib.listify(extra_args) + extra_args = mesonlib.listify([e(mode) if callable(e) else e for e in extra_args]) + + if dependencies is None: + dependencies = [] + elif not isinstance(dependencies, list): + dependencies = [dependencies] + # Collect compiler arguments + cargs = self.compiler_args() + largs = [] + for d in dependencies: + # Add compile flags needed by dependencies + cargs += d.get_compile_args() + if mode == 'link': + # Add link flags needed to find dependencies + largs += d.get_link_args() + + ca, la = self._get_basic_compiler_args(env, mode) + cargs += ca + largs += la + + cargs += self.get_compiler_check_args() + + # on MSVC compiler and linker flags must be separated by the "/link" argument + # at this point, the '/link' argument may already be part of extra_args, otherwise, it is added here + if self.linker_to_compiler_args([]) == ['/link'] and largs != [] and not ('/link' in extra_args): + extra_args += ['/link'] + + args = cargs + extra_args + largs + return args + + def compiles(self, code: str, env, *, + extra_args: T.Sequence[T.Union[T.Sequence[str], str]] = None, + dependencies=None, mode: str = 'compile', disable_cache: bool = False) -> T.Tuple[bool, bool]: + with self._build_wrapper(code, env, extra_args, dependencies, mode, disable_cache=disable_cache) as p: + return p.returncode == 0, p.cached + + def _build_wrapper(self, code: str, env, extra_args, dependencies=None, mode: str = 'compile', want_output: bool = False, disable_cache: bool = False, temp_dir: str = None) -> T.Tuple[bool, bool]: + args = self._get_compiler_check_args(env, extra_args, dependencies, mode) + if disable_cache or want_output: + return self.compile(code, extra_args=args, mode=mode, want_output=want_output, temp_dir=env.scratch_dir) + return self.cached_compile(code, env.coredata, extra_args=args, mode=mode, temp_dir=env.scratch_dir) + + def links(self, code, env, *, extra_args=None, dependencies=None, disable_cache=False): + return self.compiles(code, env, extra_args=extra_args, + dependencies=dependencies, mode='link', disable_cache=disable_cache) + + def run(self, code: str, env, *, extra_args=None, dependencies=None): + need_exe_wrapper = env.need_exe_wrapper(self.for_machine) + if need_exe_wrapper and self.exe_wrapper is None: + raise compilers.CrossNoRunException('Can not run test applications in this cross environment.') + with self._build_wrapper(code, env, extra_args, dependencies, mode='link', want_output=True) as p: + if p.returncode != 0: + mlog.debug('Could not compile test file %s: %d\n' % ( + p.input_name, + p.returncode)) + return compilers.RunResult(False) + if need_exe_wrapper: + cmdlist = self.exe_wrapper + [p.output_name] + else: + cmdlist = p.output_name + try: + pe, so, se = mesonlib.Popen_safe(cmdlist) + except Exception as e: + mlog.debug('Could not run: %s (error: %s)\n' % (cmdlist, e)) + return compilers.RunResult(False) + + mlog.debug('Program stdout:\n') + mlog.debug(so) + mlog.debug('Program stderr:\n') + mlog.debug(se) + return compilers.RunResult(True, pe.returncode, so, se) + + def _compile_int(self, expression, prefix, env, extra_args, dependencies): + fargs = {'prefix': prefix, 'expression': expression} + t = '''#include + {prefix} + int main(void) {{ static int a[1-2*!({expression})]; a[0]=0; return 0; }}''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies)[0] + + def cross_compute_int(self, expression, low, high, guess, prefix, env, extra_args, dependencies): + # Try user's guess first + if isinstance(guess, int): + if self._compile_int('%s == %d' % (expression, guess), prefix, env, extra_args, dependencies): + return guess + + # If no bounds are given, compute them in the limit of int32 + maxint = 0x7fffffff + minint = -0x80000000 + if not isinstance(low, int) or not isinstance(high, int): + if self._compile_int('%s >= 0' % (expression), prefix, env, extra_args, dependencies): + low = cur = 0 + while self._compile_int('%s > %d' % (expression, cur), prefix, env, extra_args, dependencies): + low = cur + 1 + if low > maxint: + raise mesonlib.EnvironmentException('Cross-compile check overflowed') + cur = cur * 2 + 1 + if cur > maxint: + cur = maxint + high = cur + else: + high = cur = -1 + while self._compile_int('%s < %d' % (expression, cur), prefix, env, extra_args, dependencies): + high = cur - 1 + if high < minint: + raise mesonlib.EnvironmentException('Cross-compile check overflowed') + cur = cur * 2 + if cur < minint: + cur = minint + low = cur + else: + # Sanity check limits given by user + if high < low: + raise mesonlib.EnvironmentException('high limit smaller than low limit') + condition = '%s <= %d && %s >= %d' % (expression, high, expression, low) + if not self._compile_int(condition, prefix, env, extra_args, dependencies): + raise mesonlib.EnvironmentException('Value out of given range') + + # Binary search + while low != high: + cur = low + int((high - low) / 2) + if self._compile_int('%s <= %d' % (expression, cur), prefix, env, extra_args, dependencies): + high = cur + else: + low = cur + 1 + + return low + + def compute_int(self, expression, low, high, guess, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + if self.is_cross: + return self.cross_compute_int(expression, low, high, guess, prefix, env, extra_args, dependencies) + fargs = {'prefix': prefix, 'expression': expression} + t = '''#include + {prefix} + int main(void) {{ + printf("%ld\\n", (long)({expression})); + return 0; + }};''' + res = self.run(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + if not res.compiled: + return -1 + if res.returncode != 0: + raise mesonlib.EnvironmentException('Could not run compute_int test binary.') + return int(res.stdout) + + def cross_sizeof(self, typename, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + fargs = {'prefix': prefix, 'type': typename} + t = '''#include + {prefix} + int main(void) {{ + {type} something; + return 0; + }}''' + if not self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies)[0]: + return -1 + return self.cross_compute_int('sizeof(%s)' % typename, None, None, None, prefix, env, extra_args, dependencies) + + def sizeof(self, typename, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + fargs = {'prefix': prefix, 'type': typename} + if self.is_cross: + return self.cross_sizeof(typename, prefix, env, extra_args=extra_args, + dependencies=dependencies) + t = '''#include + {prefix} + int main(void) {{ + printf("%ld\\n", (long)(sizeof({type}))); + return 0; + }};''' + res = self.run(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + if not res.compiled: + return -1 + if res.returncode != 0: + raise mesonlib.EnvironmentException('Could not run sizeof test binary.') + return int(res.stdout) + + def cross_alignment(self, typename, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + fargs = {'prefix': prefix, 'type': typename} + t = '''#include + {prefix} + int main(void) {{ + {type} something; + return 0; + }}''' + if not self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies)[0]: + return -1 + t = '''#include + {prefix} + struct tmp {{ + char c; + {type} target; + }};''' + return self.cross_compute_int('offsetof(struct tmp, target)', None, None, None, t.format(**fargs), env, extra_args, dependencies) + + def alignment(self, typename, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + if self.is_cross: + return self.cross_alignment(typename, prefix, env, extra_args=extra_args, + dependencies=dependencies) + fargs = {'prefix': prefix, 'type': typename} + t = '''#include + #include + {prefix} + struct tmp {{ + char c; + {type} target; + }}; + int main(void) {{ + printf("%d", (int)offsetof(struct tmp, target)); + return 0; + }}''' + res = self.run(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + if not res.compiled: + raise mesonlib.EnvironmentException('Could not compile alignment test.') + if res.returncode != 0: + raise mesonlib.EnvironmentException('Could not run alignment test binary.') + align = int(res.stdout) + if align == 0: + raise mesonlib.EnvironmentException('Could not determine alignment of %s. Sorry. You might want to file a bug.' % typename) + return align + + def get_define(self, dname, prefix, env, extra_args, dependencies, disable_cache=False): + delim = '"MESON_GET_DEFINE_DELIMITER"' + fargs = {'prefix': prefix, 'define': dname, 'delim': delim} + code = ''' + {prefix} + #ifndef {define} + # define {define} + #endif + {delim}\n{define}''' + args = self._get_compiler_check_args(env, extra_args, dependencies, + mode='preprocess').to_native() + func = lambda: self.cached_compile(code.format(**fargs), env.coredata, extra_args=args, mode='preprocess') + if disable_cache: + func = lambda: self.compile(code.format(**fargs), extra_args=args, mode='preprocess', temp_dir=env.scratch_dir) + with func() as p: + cached = p.cached + if p.returncode != 0: + raise mesonlib.EnvironmentException('Could not get define {!r}'.format(dname)) + # Get the preprocessed value after the delimiter, + # minus the extra newline at the end and + # merge string literals. + return self.concatenate_string_literals(p.stdo.split(delim + '\n')[-1][:-1]), cached + + def get_return_value(self, fname, rtype, prefix, env, extra_args, dependencies): + if rtype == 'string': + fmt = '%s' + cast = '(char*)' + elif rtype == 'int': + fmt = '%lli' + cast = '(long long int)' + else: + raise AssertionError('BUG: Unknown return type {!r}'.format(rtype)) + fargs = {'prefix': prefix, 'f': fname, 'cast': cast, 'fmt': fmt} + code = '''{prefix} + #include + int main(void) {{ + printf ("{fmt}", {cast} {f}()); + return 0; + }}'''.format(**fargs) + res = self.run(code, env, extra_args=extra_args, dependencies=dependencies) + if not res.compiled: + m = 'Could not get return value of {}()' + raise mesonlib.EnvironmentException(m.format(fname)) + if rtype == 'string': + return res.stdout + elif rtype == 'int': + try: + return int(res.stdout.strip()) + except ValueError: + m = 'Return value of {}() is not an int' + raise mesonlib.EnvironmentException(m.format(fname)) + + @staticmethod + def _no_prototype_templ(): + """ + Try to find the function without a prototype from a header by defining + our own dummy prototype and trying to link with the C library (and + whatever else the compiler links in by default). This is very similar + to the check performed by Autoconf for AC_CHECK_FUNCS. + """ + # Define the symbol to something else since it is defined by the + # includes or defines listed by the user or by the compiler. This may + # include, for instance _GNU_SOURCE which must be defined before + # limits.h, which includes features.h + # Then, undef the symbol to get rid of it completely. + head = ''' + #define {func} meson_disable_define_of_{func} + {prefix} + #include + #undef {func} + ''' + # Override any GCC internal prototype and declare our own definition for + # the symbol. Use char because that's unlikely to be an actual return + # value for a function which ensures that we override the definition. + head += ''' + #ifdef __cplusplus + extern "C" + #endif + char {func} (void); + ''' + # The actual function call + main = ''' + int main(void) {{ + return {func} (); + }}''' + return head, main + + @staticmethod + def _have_prototype_templ(): + """ + Returns a head-er and main() call that uses the headers listed by the + user for the function prototype while checking if a function exists. + """ + # Add the 'prefix', aka defines, includes, etc that the user provides + # This may include, for instance _GNU_SOURCE which must be defined + # before limits.h, which includes features.h + head = '{prefix}\n#include \n' + # We don't know what the function takes or returns, so return it as an int. + # Just taking the address or comparing it to void is not enough because + # compilers are smart enough to optimize it away. The resulting binary + # is not run so we don't care what the return value is. + main = '''\nint main(void) {{ + void *a = (void*) &{func}; + long long b = (long long) a; + return (int) b; + }}''' + return head, main + + def has_function(self, funcname, prefix, env, *, extra_args=None, dependencies=None): + """ + First, this function looks for the symbol in the default libraries + provided by the compiler (stdlib + a few others usually). If that + fails, it checks if any of the headers specified in the prefix provide + an implementation of the function, and if that fails, it checks if it's + implemented as a compiler-builtin. + """ + if extra_args is None: + extra_args = [] + + # Short-circuit if the check is already provided by the cross-info file + varname = 'has function ' + funcname + varname = varname.replace(' ', '_') + if self.is_cross: + val = env.properties.host.get(varname, None) + if val is not None: + if isinstance(val, bool): + return val, False + raise mesonlib.EnvironmentException('Cross variable {0} is not a boolean.'.format(varname)) + + fargs = {'prefix': prefix, 'func': funcname} + + # glibc defines functions that are not available on Linux as stubs that + # fail with ENOSYS (such as e.g. lchmod). In this case we want to fail + # instead of detecting the stub as a valid symbol. + # We already included limits.h earlier to ensure that these are defined + # for stub functions. + stubs_fail = ''' + #if defined __stub_{func} || defined __stub___{func} + fail fail fail this function is not going to work + #endif + ''' + + # If we have any includes in the prefix supplied by the user, assume + # that the user wants us to use the symbol prototype defined in those + # includes. If not, then try to do the Autoconf-style check with + # a dummy prototype definition of our own. + # This is needed when the linker determines symbol availability from an + # SDK based on the prototype in the header provided by the SDK. + # Ignoring this prototype would result in the symbol always being + # marked as available. + if '#include' in prefix: + head, main = self._have_prototype_templ() + else: + head, main = self._no_prototype_templ() + templ = head + stubs_fail + main + + res, cached = self.links(templ.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + if res: + return True, cached + + # MSVC does not have compiler __builtin_-s. + if self.get_id() in {'msvc', 'intel-cl'}: + return False, False + + # Detect function as a built-in + # + # Some functions like alloca() are defined as compiler built-ins which + # are inlined by the compiler and you can't take their address, so we + # need to look for them differently. On nice compilers like clang, we + # can just directly use the __has_builtin() macro. + fargs['no_includes'] = '#include' not in prefix + is_builtin = funcname.startswith('__builtin_') + fargs['is_builtin'] = is_builtin + fargs['__builtin_'] = '' if is_builtin else '__builtin_' + t = '''{prefix} + int main(void) {{ + + /* With some toolchains (MSYS2/mingw for example) the compiler + * provides various builtins which are not really implemented and + * fall back to the stdlib where they aren't provided and fail at + * build/link time. In case the user provides a header, including + * the header didn't lead to the function being defined, and the + * function we are checking isn't a builtin itself we assume the + * builtin is not functional and we just error out. */ + #if !{no_includes:d} && !defined({func}) && !{is_builtin:d} + #error "No definition for {__builtin_}{func} found in the prefix" + #endif + + #ifdef __has_builtin + #if !__has_builtin({__builtin_}{func}) + #error "{__builtin_}{func} not found" + #endif + #elif ! defined({func}) + {__builtin_}{func}; + #endif + return 0; + }}''' + return self.links(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def has_members(self, typename, membernames, prefix, env, *, extra_args=None, dependencies=None): + if extra_args is None: + extra_args = [] + fargs = {'prefix': prefix, 'type': typename, 'name': 'foo'} + # Create code that accesses all members + members = '' + for member in membernames: + members += '{}.{};\n'.format(fargs['name'], member) + fargs['members'] = members + t = '''{prefix} + void bar(void) {{ + {type} {name}; + {members} + }};''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def has_type(self, typename, prefix, env, extra_args, dependencies=None): + fargs = {'prefix': prefix, 'type': typename} + t = '''{prefix} + void bar(void) {{ + sizeof({type}); + }};''' + return self.compiles(t.format(**fargs), env, extra_args=extra_args, + dependencies=dependencies) + + def symbols_have_underscore_prefix(self, env): + ''' + Check if the compiler prefixes an underscore to global C symbols + ''' + symbol_name = b'meson_uscore_prefix' + code = '''#ifdef __cplusplus + extern "C" { + #endif + void ''' + symbol_name.decode() + ''' (void) {} + #ifdef __cplusplus + } + #endif + ''' + args = self.get_compiler_check_args() + n = 'symbols_have_underscore_prefix' + with self._build_wrapper(code, env, extra_args=args, mode='compile', want_output=True, temp_dir=env.scratch_dir) as p: + if p.returncode != 0: + m = 'BUG: Unable to compile {!r} check: {}' + raise RuntimeError(m.format(n, p.stdo)) + if not os.path.isfile(p.output_name): + m = 'BUG: Can\'t find compiled test code for {!r} check' + raise RuntimeError(m.format(n)) + with open(p.output_name, 'rb') as o: + for line in o: + # Check if the underscore form of the symbol is somewhere + # in the output file. + if b'_' + symbol_name in line: + mlog.debug("Symbols have underscore prefix: YES") + return True + # Else, check if the non-underscored form is present + elif symbol_name in line: + mlog.debug("Symbols have underscore prefix: NO") + return False + raise RuntimeError('BUG: {!r} check failed unexpectedly'.format(n)) + + def _get_patterns(self, env, prefixes, suffixes, shared=False): + patterns = [] + for p in prefixes: + for s in suffixes: + patterns.append(p + '{}.' + s) + if shared and env.machines[self.for_machine].is_openbsd(): + # Shared libraries on OpenBSD can be named libfoo.so.X.Y: + # https://www.openbsd.org/faq/ports/specialtopics.html#SharedLibs + # + # This globbing is probably the best matching we can do since regex + # is expensive. It's wrong in many edge cases, but it will match + # correctly-named libraries and hopefully no one on OpenBSD names + # their files libfoo.so.9a.7b.1.0 + for p in prefixes: + patterns.append(p + '{}.so.[0-9]*.[0-9]*') + return patterns + + def get_library_naming(self, env, libtype: LibType, strict=False): + ''' + Get library prefixes and suffixes for the target platform ordered by + priority + ''' + stlibext = ['a'] + # We've always allowed libname to be both `foo` and `libfoo`, and now + # people depend on it. Also, some people use prebuilt `foo.so` instead + # of `libfoo.so` for unknown reasons, and may also want to create + # `foo.so` by setting name_prefix to '' + if strict and not isinstance(self, VisualStudioLikeCompiler): # lib prefix is not usually used with msvc + prefixes = ['lib'] + else: + prefixes = ['lib', ''] + # Library suffixes and prefixes + if env.machines[self.for_machine].is_darwin(): + shlibext = ['dylib', 'so'] + elif env.machines[self.for_machine].is_windows(): + # FIXME: .lib files can be import or static so we should read the + # file, figure out which one it is, and reject the wrong kind. + if isinstance(self, VisualStudioLikeCompiler): + shlibext = ['lib'] + else: + shlibext = ['dll.a', 'lib', 'dll'] + # Yep, static libraries can also be foo.lib + stlibext += ['lib'] + elif env.machines[self.for_machine].is_cygwin(): + shlibext = ['dll', 'dll.a'] + prefixes = ['cyg'] + prefixes + else: + # Linux/BSDs + shlibext = ['so'] + # Search priority + if libtype is LibType.PREFER_SHARED: + patterns = self._get_patterns(env, prefixes, shlibext, True) + patterns.extend([x for x in self._get_patterns(env, prefixes, stlibext, False) if x not in patterns]) + elif libtype is LibType.PREFER_STATIC: + patterns = self._get_patterns(env, prefixes, stlibext, False) + patterns.extend([x for x in self._get_patterns(env, prefixes, shlibext, True) if x not in patterns]) + elif libtype is LibType.SHARED: + patterns = self._get_patterns(env, prefixes, shlibext, True) + else: + assert libtype is LibType.STATIC + patterns = self._get_patterns(env, prefixes, stlibext, False) + return tuple(patterns) + + @staticmethod + def _sort_shlibs_openbsd(libs): + filtered = [] + for lib in libs: + # Validate file as a shared library of type libfoo.so.X.Y + ret = lib.rsplit('.so.', maxsplit=1) + if len(ret) != 2: + continue + try: + float(ret[1]) + except ValueError: + continue + filtered.append(lib) + float_cmp = lambda x: float(x.rsplit('.so.', maxsplit=1)[1]) + return sorted(filtered, key=float_cmp, reverse=True) + + @classmethod + def _get_trials_from_pattern(cls, pattern, directory, libname): + f = Path(directory) / pattern.format(libname) + # Globbing for OpenBSD + if '*' in pattern: + # NOTE: globbing matches directories and broken symlinks + # so we have to do an isfile test on it later + return [Path(x) for x in cls._sort_shlibs_openbsd(glob.glob(str(f)))] + return [f] + + @staticmethod + def _get_file_from_list(env, files: T.List[str]) -> Path: + ''' + We just check whether the library exists. We can't do a link check + because the library might have unresolved symbols that require other + libraries. On macOS we check if the library matches our target + architecture. + ''' + # If not building on macOS for Darwin, do a simple file check + paths = [Path(f) for f in files] + if not env.machines.host.is_darwin() or not env.machines.build.is_darwin(): + for p in paths: + if p.is_file(): + return p + # Run `lipo` and check if the library supports the arch we want + for p in paths: + if not p.is_file(): + continue + archs = mesonlib.darwin_get_object_archs(str(p)) + if archs and env.machines.host.cpu_family in archs: + return p + else: + mlog.debug('Rejected {}, supports {} but need {}' + .format(p, archs, env.machines.host.cpu_family)) + return None + + @functools.lru_cache() + def output_is_64bit(self, env): + ''' + returns true if the output produced is 64-bit, false if 32-bit + ''' + return self.sizeof('void *', '', env) == 8 + + def find_library_real(self, libname, env, extra_dirs, code, libtype: LibType): + # First try if we can just add the library as -l. + # Gcc + co seem to prefer builtin lib dirs to -L dirs. + # Only try to find std libs if no extra dirs specified. + # The built-in search procedure will always favour .so and then always + # search for .a. This is only allowed if libtype is LibType.PREFER_SHARED + if ((not extra_dirs and libtype is LibType.PREFER_SHARED) or + libname in self.internal_libs): + cargs = ['-l' + libname] + largs = self.get_allow_undefined_link_args() + extra_args = cargs + self.linker_to_compiler_args(largs) + + if self.links(code, env, extra_args=extra_args, disable_cache=True)[0]: + return cargs + # Don't do a manual search for internal libs + if libname in self.internal_libs: + return None + # Not found or we want to use a specific libtype? Try to find the + # library file itself. + patterns = self.get_library_naming(env, libtype) + # try to detect if we are 64-bit or 32-bit. If we can't + # detect, we will just skip path validity checks done in + # get_library_dirs() call + try: + if self.output_is_64bit(env): + elf_class = 2 + else: + elf_class = 1 + except (mesonlib.MesonException, KeyError): # TODO evaluate if catching KeyError is wanted here + elf_class = 0 + # Search in the specified dirs, and then in the system libraries + for d in itertools.chain(extra_dirs, self.get_library_dirs(env, elf_class)): + for p in patterns: + trial = self._get_trials_from_pattern(p, d, libname) + if not trial: + continue + trial = self._get_file_from_list(env, trial) + if not trial: + continue + return [trial.as_posix()] + return None + + def find_library_impl(self, libname, env, extra_dirs, code, libtype: LibType): + # These libraries are either built-in or invalid + if libname in self.ignore_libs: + return [] + if isinstance(extra_dirs, str): + extra_dirs = [extra_dirs] + key = (tuple(self.exelist), libname, tuple(extra_dirs), code, libtype) + if key not in self.find_library_cache: + value = self.find_library_real(libname, env, extra_dirs, code, libtype) + self.find_library_cache[key] = value + else: + value = self.find_library_cache[key] + if value is None: + return None + return value[:] + + def find_library(self, libname, env, extra_dirs, libtype: LibType = LibType.PREFER_SHARED): + code = 'int main(void) { return 0; }\n' + return self.find_library_impl(libname, env, extra_dirs, code, libtype) + + def find_framework_paths(self, env): + ''' + These are usually /Library/Frameworks and /System/Library/Frameworks, + unless you select a particular macOS SDK with the -isysroot flag. + You can also add to this by setting -F in CFLAGS. + ''' + if self.id != 'clang': + raise mesonlib.MesonException('Cannot find framework path with non-clang compiler') + # Construct the compiler command-line + commands = self.get_exelist() + ['-v', '-E', '-'] + commands += self.get_always_args() + # Add CFLAGS/CXXFLAGS/OBJCFLAGS/OBJCXXFLAGS from the env + commands += env.coredata.get_external_args(self.for_machine, self.language) + mlog.debug('Finding framework path by running: ', ' '.join(commands), '\n') + os_env = os.environ.copy() + os_env['LC_ALL'] = 'C' + _, _, stde = mesonlib.Popen_safe(commands, env=os_env, stdin=subprocess.PIPE) + paths = [] + for line in stde.split('\n'): + if '(framework directory)' not in line: + continue + # line is of the form: + # ` /path/to/framework (framework directory)` + paths.append(line[:-21].strip()) + return paths + + def find_framework_real(self, name, env, extra_dirs, allow_system): + code = 'int main(void) { return 0; }' + link_args = [] + for d in extra_dirs: + link_args += ['-F' + d] + # We can pass -Z to disable searching in the system frameworks, but + # then we must also pass -L/usr/lib to pick up libSystem.dylib + extra_args = [] if allow_system else ['-Z', '-L/usr/lib'] + link_args += ['-framework', name] + if self.links(code, env, extra_args=(extra_args + link_args), disable_cache=True)[0]: + return link_args + + def find_framework_impl(self, name, env, extra_dirs, allow_system): + if isinstance(extra_dirs, str): + extra_dirs = [extra_dirs] + key = (tuple(self.exelist), name, tuple(extra_dirs), allow_system) + if key in self.find_framework_cache: + value = self.find_framework_cache[key] + else: + value = self.find_framework_real(name, env, extra_dirs, allow_system) + self.find_framework_cache[key] = value + if value is None: + return None + return value[:] + + def find_framework(self, name, env, extra_dirs, allow_system=True): + ''' + Finds the framework with the specified name, and returns link args for + the same or returns None when the framework is not found. + ''' + if self.id != 'clang': + raise mesonlib.MesonException('Cannot find frameworks with non-clang compiler') + return self.find_framework_impl(name, env, extra_dirs, allow_system) + + def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]: + return [] + + def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]: + return [] + + def thread_flags(self, env): + host_m = env.machines[self.for_machine] + if host_m.is_haiku() or host_m.is_darwin(): + return [] + return ['-pthread'] + + def thread_link_flags(self, env: 'Environment') -> T.List[str]: + return self.linker.thread_flags(env) + + def linker_to_compiler_args(self, args): + return args + + def has_arguments(self, args: T.Sequence[str], env, code: str, mode: str) -> T.Tuple[bool, bool]: + return self.compiles(code, env, extra_args=args, mode=mode) + + def has_multi_arguments(self, args, env): + for arg in args[:]: + # some compilers, e.g. GCC, don't warn for unsupported warning-disable + # flags, so when we are testing a flag like "-Wno-forgotten-towel", also + # check the equivalent enable flag too "-Wforgotten-towel" + if arg.startswith('-Wno-'): + args.append('-W' + arg[5:]) + if arg.startswith('-Wl,'): + mlog.warning('{} looks like a linker argument, ' + 'but has_argument and other similar methods only ' + 'support checking compiler arguments. Using them ' + 'to check linker arguments are never supported, ' + 'and results are likely to be wrong regardless of ' + 'the compiler you are using. has_link_argument or ' + 'other similar method can be used instead.' + .format(arg)) + code = 'extern int i;\nint i;\n' + return self.has_arguments(args, env, code, mode='compile') + + def has_multi_link_arguments(self, args, env): + # First time we check for link flags we need to first check if we have + # --fatal-warnings, otherwise some linker checks could give some + # false positive. + args = self.linker.fatal_warnings() + args + args = self.linker_to_compiler_args(args) + code = 'int main(void) { return 0; }\n' + return self.has_arguments(args, env, code, mode='link') + + @staticmethod + def concatenate_string_literals(s): + pattern = re.compile(r'(?P
.*([^\\]")|^")(?P([^\\"]|\\.)*)"\s+"(?P([^\\"]|\\.)*)(?P".*)')
+        ret = s
+        m = pattern.match(ret)
+        while m:
+            ret = ''.join(m.group('pre', 'str1', 'str2', 'post'))
+            m = pattern.match(ret)
+        return ret
+
+    def get_has_func_attribute_extra_args(self, name):
+        # Most compilers (such as GCC and Clang) only warn about unknown or
+        # ignored attributes, so force an error. Overriden in GCC and Clang
+        # mixins.
+        return ['-Werror']
+
+    def has_func_attribute(self, name, env):
+        # Just assume that if we're not on windows that dllimport and dllexport
+        # don't work
+        m = env.machines[self.for_machine]
+        if not (m.is_windows() or m.is_cygwin()):
+            if name in ['dllimport', 'dllexport']:
+                return False, False
+
+        return self.compiles(self.attribute_check_func(name), env,
+                             extra_args=self.get_has_func_attribute_extra_args(name))
+
+    def get_disable_assert_args(self) -> T.List[str]:
+        return ['-DNDEBUG']
diff --git a/meson/mesonbuild/compilers/mixins/elbrus.py b/meson/mesonbuild/compilers/mixins/elbrus.py
new file mode 100644
index 000000000..db743d806
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/elbrus.py
@@ -0,0 +1,83 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Abstractions for the Elbrus family of compilers."""
+
+import os
+import typing as T
+import subprocess
+import re
+
+from .gnu import GnuLikeCompiler
+from .gnu import gnu_optimization_args
+from ...mesonlib import Popen_safe
+
+if T.TYPE_CHECKING:
+    from ...environment import Environment
+
+
+class ElbrusCompiler(GnuLikeCompiler):
+    # Elbrus compiler is nearly like GCC, but does not support
+    # PCH, LTO, sanitizers and color output as of version 1.21.x.
+    def __init__(self):
+        super().__init__()
+        self.id = 'lcc'
+        self.base_options = ['b_pgo', 'b_coverage',
+                             'b_ndebug', 'b_staticpic',
+                             'b_lundef', 'b_asneeded']
+
+    # FIXME: use _build_wrapper to call this so that linker flags from the env
+    # get applied
+    def get_library_dirs(self, env: 'Environment', elf_class: T.Optional[int] = None) -> T.List[str]:
+        os_env = os.environ.copy()
+        os_env['LC_ALL'] = 'C'
+        stdo = Popen_safe(self.exelist + ['--print-search-dirs'], env=os_env)[1]
+        for line in stdo.split('\n'):
+            if line.startswith('libraries:'):
+                # lcc does not include '=' in --print-search-dirs output. Also it could show nonexistent dirs.
+                libstr = line.split(' ', 1)[1]
+                return [os.path.realpath(p) for p in libstr.split(':') if os.path.exists(p)]
+        return []
+
+    def get_program_dirs(self, env: 'Environment') -> T.List[str]:
+        os_env = os.environ.copy()
+        os_env['LC_ALL'] = 'C'
+        stdo = Popen_safe(self.exelist + ['--print-search-dirs'], env=os_env)[1]
+        for line in stdo.split('\n'):
+            if line.startswith('programs:'):
+                # lcc does not include '=' in --print-search-dirs output.
+                libstr = line.split(' ', 1)[1]
+                return [os.path.realpath(p) for p in libstr.split(':')]
+        return []
+
+    def get_default_include_dirs(self) -> T.List[str]:
+        os_env = os.environ.copy()
+        os_env['LC_ALL'] = 'C'
+        p = subprocess.Popen(self.exelist + ['-xc', '-E', '-v', '-'], env=os_env, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stderr = p.stderr.read().decode('utf-8', errors='replace')
+        includes = []
+        for line in stderr.split('\n'):
+            if line.lstrip().startswith('--sys_include'):
+                includes.append(re.sub(r'\s*\\$', '', re.sub(r'^\s*--sys_include\s*', '', line)))
+        return includes
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return gnu_optimization_args[optimization_level]
+
+    def get_pch_suffix(self) -> str:
+        # Actually it's not supported for now, but probably will be supported in future
+        return 'pch'
+
+    def openmp_flags(self) -> T.List[str]:
+        return ['-fopenmp']
diff --git a/meson/mesonbuild/compilers/mixins/emscripten.py b/meson/mesonbuild/compilers/mixins/emscripten.py
new file mode 100644
index 000000000..10f4b25cc
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/emscripten.py
@@ -0,0 +1,53 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides a mixin for shared code between C and C++ Emscripten compilers."""
+
+import os.path
+import typing as T
+
+from ... import coredata
+
+if T.TYPE_CHECKING:
+    from ..environment import Environment
+
+
+class EmscriptenMixin:
+
+    def _get_compile_output(self, dirname, mode):
+        # In pre-processor mode, the output is sent to stdout and discarded
+        if mode == 'preprocess':
+            return None
+        # Unlike sane toolchains, emcc infers the kind of output from its name.
+        # This is the only reason why this method is overridden; compiler tests
+        # do not work well with the default exe/obj suffices.
+        if mode == 'link':
+            suffix = 'js'
+        else:
+            suffix = 'wasm'
+        return os.path.join(dirname, 'output.' + suffix)
+
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        return ['-s', 'USE_PTHREADS=1']
+
+    def get_options(self):
+        opts = super().get_options()
+        opts.update({
+            '{}_thread_count'.format(self.language): coredata.UserIntegerOption(
+                'Number of threads to use in web assembly, set to 0 to disable',
+                (0, None, 4),  # Default was picked at random
+            ),
+        })
+
+        return opts
diff --git a/meson/mesonbuild/compilers/mixins/gnu.py b/meson/mesonbuild/compilers/mixins/gnu.py
new file mode 100644
index 000000000..83f70474d
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/gnu.py
@@ -0,0 +1,376 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides mixins for GNU compilers and GNU-like compilers."""
+
+import abc
+import functools
+import os
+import pathlib
+import re
+import subprocess
+import typing as T
+
+from ... import mesonlib
+from ... import mlog
+
+if T.TYPE_CHECKING:
+    from ...coredata import UserOption  # noqa: F401
+    from ...environment import Environment
+
+# XXX: prevent circular references.
+# FIXME: this really is a posix interface not a c-like interface
+clike_debug_args = {
+    False: [],
+    True: ['-g'],
+}  # type: T.Dict[bool, T.List[str]]
+
+gnulike_buildtype_args = {
+    'plain': [],
+    'debug': [],
+    'debugoptimized': [],
+    'release': [],
+    'minsize': [],
+    'custom': [],
+}  # type: T.Dict[str, T.List[str]]
+
+gnu_optimization_args = {
+    '0': [],
+    'g': ['-Og'],
+    '1': ['-O1'],
+    '2': ['-O2'],
+    '3': ['-O3'],
+    's': ['-Os'],
+}  # type: T.Dict[str, T.List[str]]
+
+gnulike_instruction_set_args = {
+    'mmx': ['-mmmx'],
+    'sse': ['-msse'],
+    'sse2': ['-msse2'],
+    'sse3': ['-msse3'],
+    'ssse3': ['-mssse3'],
+    'sse41': ['-msse4.1'],
+    'sse42': ['-msse4.2'],
+    'avx': ['-mavx'],
+    'avx2': ['-mavx2'],
+    'neon': ['-mfpu=neon'],
+}  # type: T.Dict[str, T.List[str]]
+
+gnu_symbol_visibility_args = {
+    '': [],
+    'default': ['-fvisibility=default'],
+    'internal': ['-fvisibility=internal'],
+    'hidden': ['-fvisibility=hidden'],
+    'protected': ['-fvisibility=protected'],
+    'inlineshidden': ['-fvisibility=hidden', '-fvisibility-inlines-hidden'],
+}  # type: T.Dict[str, T.List[str]]
+
+gnu_color_args = {
+    'auto': ['-fdiagnostics-color=auto'],
+    'always': ['-fdiagnostics-color=always'],
+    'never': ['-fdiagnostics-color=never'],
+}  # type: T.Dict[str, T.List[str]]
+
+
+@functools.lru_cache(maxsize=None)
+def gnulike_default_include_dirs(compiler: T.Tuple[str], lang: str) -> T.List[str]:
+    lang_map = {
+        'c': 'c',
+        'cpp': 'c++',
+        'objc': 'objective-c',
+        'objcpp': 'objective-c++'
+    }
+    if lang not in lang_map:
+        return []
+    lang = lang_map[lang]
+    env = os.environ.copy()
+    env["LC_ALL"] = 'C'
+    cmd = list(compiler) + ['-x{}'.format(lang), '-E', '-v', '-']
+    p = subprocess.Popen(
+        cmd,
+        stdin=subprocess.DEVNULL,
+        stderr=subprocess.STDOUT,
+        stdout=subprocess.PIPE,
+        env=env
+    )
+    stdout = p.stdout.read().decode('utf-8', errors='replace')
+    parse_state = 0
+    paths = []
+    for line in stdout.split('\n'):
+        line = line.strip(' \n\r\t')
+        if parse_state == 0:
+            if line == '#include "..." search starts here:':
+                parse_state = 1
+        elif parse_state == 1:
+            if line == '#include <...> search starts here:':
+                parse_state = 2
+            else:
+                paths.append(line)
+        elif parse_state == 2:
+            if line == 'End of search list.':
+                break
+            else:
+                paths.append(line)
+    if not paths:
+        mlog.warning('No include directory found parsing "{cmd}" output'.format(cmd=" ".join(cmd)))
+    # Append a normalized copy of paths to make path lookup easier
+    paths += [os.path.normpath(x) for x in paths]
+    return paths
+
+
+class GnuLikeCompiler(metaclass=abc.ABCMeta):
+    """
+    GnuLikeCompiler is a common interface to all compilers implementing
+    the GNU-style commandline interface. This includes GCC, Clang
+    and ICC. Certain functionality between them is different and requires
+    that the actual concrete subclass define their own implementation.
+    """
+
+    LINKER_PREFIX = '-Wl,'
+
+    def __init__(self):
+        self.base_options = ['b_pch', 'b_lto', 'b_pgo', 'b_coverage',
+                             'b_ndebug', 'b_staticpic', 'b_pie']
+        if not (self.info.is_windows() or self.info.is_cygwin() or self.info.is_openbsd()):
+            self.base_options.append('b_lundef')
+        if not self.info.is_windows() or self.info.is_cygwin():
+            self.base_options.append('b_asneeded')
+        if not self.info.is_hurd():
+            self.base_options.append('b_sanitize')
+        # All GCC-like backends can do assembly
+        self.can_compile_suffixes.add('s')
+
+    def get_pic_args(self) -> T.List[str]:
+        if self.info.is_windows() or self.info.is_cygwin() or self.info.is_darwin():
+            return [] # On Window and OS X, pic is always on.
+        return ['-fPIC']
+
+    def get_pie_args(self) -> T.List[str]:
+        return ['-fPIE']
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return gnulike_buildtype_args[buildtype]
+
+    @abc.abstractmethod
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        raise NotImplementedError("get_optimization_args not implemented")
+
+    def get_debug_args(self, is_debug: bool) -> T.List[str]:
+        return clike_debug_args[is_debug]
+
+    @abc.abstractmethod
+    def get_pch_suffix(self) -> str:
+        raise NotImplementedError("get_pch_suffix not implemented")
+
+    def split_shlib_to_parts(self, fname: str) -> T.Tuple[str, str]:
+        return os.path.dirname(fname), fname
+
+    def get_instruction_set_args(self, instruction_set: str) -> T.Optional[T.List[str]]:
+        return gnulike_instruction_set_args.get(instruction_set, None)
+
+    def get_default_include_dirs(self) -> T.List[str]:
+        return gnulike_default_include_dirs(tuple(self.exelist), self.language)
+
+    @abc.abstractmethod
+    def openmp_flags(self) -> T.List[str]:
+        raise NotImplementedError("openmp_flags not implemented")
+
+    def gnu_symbol_visibility_args(self, vistype: str) -> T.List[str]:
+        return gnu_symbol_visibility_args[vistype]
+
+    def gen_vs_module_defs_args(self, defsfile: str) -> T.List[str]:
+        if not isinstance(defsfile, str):
+            raise RuntimeError('Module definitions file should be str')
+        # On Windows targets, .def files may be specified on the linker command
+        # line like an object file.
+        if self.info.is_windows() or self.info.is_cygwin():
+            return [defsfile]
+        # For other targets, discard the .def file.
+        return []
+
+    def get_argument_syntax(self) -> str:
+        return 'gcc'
+
+    def get_profile_generate_args(self) -> T.List[str]:
+        return ['-fprofile-generate']
+
+    def get_profile_use_args(self) -> T.List[str]:
+        return ['-fprofile-use', '-fprofile-correction']
+
+    def get_gui_app_args(self, value: bool) -> T.List[str]:
+        if self.info.is_windows() or self.info.is_cygwin():
+            return ['-mwindows' if value else '-mconsole']
+        return []
+
+    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]:
+        for idx, i in enumerate(parameter_list):
+            if i[:2] == '-I' or i[:2] == '-L':
+                parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:]))
+
+        return parameter_list
+
+    @functools.lru_cache()
+    def _get_search_dirs(self, env: 'Environment') -> str:
+        extra_args = ['--print-search-dirs']
+        stdo = None
+        with self._build_wrapper('', env, extra_args=extra_args,
+                                 dependencies=None, mode='compile',
+                                 want_output=True) as p:
+            stdo = p.stdo
+        return stdo
+
+    def _split_fetch_real_dirs(self, pathstr: str) -> T.List[str]:
+        # We need to use the path separator used by the compiler for printing
+        # lists of paths ("gcc --print-search-dirs"). By default
+        # we assume it uses the platform native separator.
+        pathsep = os.pathsep
+
+        # clang uses ':' instead of ';' on Windows https://reviews.llvm.org/D61121
+        # so we need to repair things like 'C:\foo:C:\bar'
+        if pathsep == ';':
+            pathstr = re.sub(r':([^/\\])', r';\1', pathstr)
+
+        # pathlib treats empty paths as '.', so filter those out
+        paths = [p for p in pathstr.split(pathsep) if p]
+
+        result = []
+        for p in paths:
+            # GCC returns paths like this:
+            # /usr/lib/gcc/x86_64-linux-gnu/8/../../../../x86_64-linux-gnu/lib
+            # It would make sense to normalize them to get rid of the .. parts
+            # Sadly when you are on a merged /usr fs it also kills these:
+            # /lib/x86_64-linux-gnu
+            # since /lib is a symlink to /usr/lib. This would mean
+            # paths under /lib would be considered not a "system path",
+            # which is wrong and breaks things. Store everything, just to be sure.
+            pobj = pathlib.Path(p)
+            unresolved = pobj.as_posix()
+            if pobj.exists():
+                if unresolved not in result:
+                    result.append(unresolved)
+                try:
+                    resolved = pathlib.Path(p).resolve().as_posix()
+                    if resolved not in result:
+                        result.append(resolved)
+                except FileNotFoundError:
+                    pass
+        return result
+
+    def get_compiler_dirs(self, env: 'Environment', name: str) -> T.List[str]:
+        '''
+        Get dirs from the compiler, either `libraries:` or `programs:`
+        '''
+        stdo = self._get_search_dirs(env)
+        for line in stdo.split('\n'):
+            if line.startswith(name + ':'):
+                return self._split_fetch_real_dirs(line.split('=', 1)[1])
+        return []
+
+    def get_lto_compile_args(self) -> T.List[str]:
+        return ['-flto']
+
+    def sanitizer_compile_args(self, value: str) -> T.List[str]:
+        if value == 'none':
+            return []
+        args = ['-fsanitize=' + value]
+        if 'address' in value:  # for -fsanitize=address,undefined
+            args.append('-fno-omit-frame-pointer')
+        return args
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['-o', target]
+
+    def get_dependency_gen_args(self, outtarget, outfile):
+        return ['-MD', '-MQ', outtarget, '-MF', outfile]
+
+    def get_compile_only_args(self) -> T.List[str]:
+        return ['-c']
+
+    def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
+        if not path:
+            path = '.'
+        if is_system:
+            return ['-isystem' + path]
+        return ['-I' + path]
+
+    @classmethod
+    def use_linker_args(cls, linker: str) -> T.List[str]:
+        if linker not in {'gold', 'bfd', 'lld'}:
+            raise mesonlib.MesonException(
+                'Unsupported linker, only bfd, gold, and lld are supported, '
+                'not {}.'.format(linker))
+        return ['-fuse-ld={}'.format(linker)]
+
+    def get_coverage_args(self) -> T.List[str]:
+        return ['--coverage']
+
+
+class GnuCompiler(GnuLikeCompiler):
+    """
+    GnuCompiler represents an actual GCC in its many incarnations.
+    Compilers imitating GCC (Clang/Intel) should use the GnuLikeCompiler ABC.
+    """
+
+    def __init__(self, defines: T.Dict[str, str]):
+        super().__init__()
+        self.id = 'gcc'
+        self.defines = defines or {}
+        self.base_options.append('b_colorout')
+
+    def get_colorout_args(self, colortype: str) -> T.List[str]:
+        if mesonlib.version_compare(self.version, '>=4.9.0'):
+            return gnu_color_args[colortype][:]
+        return []
+
+    def get_warn_args(self, level: str) -> T.List[str]:
+        args = super().get_warn_args(level)
+        if mesonlib.version_compare(self.version, '<4.8.0') and '-Wpedantic' in args:
+            # -Wpedantic was added in 4.8.0
+            # https://gcc.gnu.org/gcc-4.8/changes.html
+            args[args.index('-Wpedantic')] = '-pedantic'
+        return args
+
+    def has_builtin_define(self, define: str) -> bool:
+        return define in self.defines
+
+    def get_builtin_define(self, define: str) -> T.Optional[str]:
+        if define in self.defines:
+            return self.defines[define]
+        return None
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return gnu_optimization_args[optimization_level]
+
+    def get_pch_suffix(self) -> str:
+        return 'gch'
+
+    def openmp_flags(self) -> T.List[str]:
+        return ['-fopenmp']
+
+    def has_arguments(self, args, env, code, mode):
+        # For some compiler command line arguments, the GNU compilers will
+        # emit a warning on stderr indicating that an option is valid for a
+        # another language, but still complete with exit_success
+        with self._build_wrapper(code, env, args, None, mode) as p:
+            result = p.returncode == 0
+            if self.language in {'cpp', 'objcpp'} and 'is valid for C/ObjC' in p.stde:
+                result = False
+            if self.language in {'c', 'objc'} and 'is valid for C++/ObjC++' in p.stde:
+                result = False
+        return result, p.cached
+
+    def get_has_func_attribute_extra_args(self, name):
+        # GCC only warns about unknown or ignored attributes, so force an
+        # error.
+        return ['-Werror=attributes']
diff --git a/meson/mesonbuild/compilers/mixins/intel.py b/meson/mesonbuild/compilers/mixins/intel.py
new file mode 100644
index 000000000..fbbfdc54c
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/intel.py
@@ -0,0 +1,188 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Abstractions for the Intel Compiler families.
+
+Intel provides both a posix/gcc-like compiler (ICC) for MacOS and Linux,
+with Meson mixin IntelGnuLikeCompiler.
+For Windows, the Intel msvc-like compiler (ICL) Meson mixin
+is IntelVisualStudioLikeCompiler.
+"""
+
+import os
+import typing as T
+
+from ... import mesonlib
+from .gnu import GnuLikeCompiler
+from .visualstudio import VisualStudioLikeCompiler
+
+if T.TYPE_CHECKING:
+    import subprocess  # noqa: F401
+
+# XXX: avoid circular dependencies
+# TODO: this belongs in a posix compiler class
+# NOTE: the default Intel optimization is -O2, unlike GNU which defaults to -O0.
+# this can be surprising, particularly for debug builds, so we specify the
+# default as -O0.
+# https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-o
+# https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-g
+# https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-o
+# https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-g
+# https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-traceback
+# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+
+
+class IntelGnuLikeCompiler(GnuLikeCompiler):
+    """
+    Tested on linux for ICC 14.0.3, 15.0.6, 16.0.4, 17.0.1, 19.0
+    debugoptimized: -g -O2
+    release: -O3
+    minsize: -O2
+    """
+
+    BUILD_ARGS = {
+        'plain': [],
+        'debug': ["-g", "-traceback"],
+        'debugoptimized': ["-g", "-traceback"],
+        'release': [],
+        'minsize': [],
+        'custom': [],
+    }  # type: T.Dict[str, T.List[str]]
+
+    OPTIM_ARGS = {
+        '0': ['-O0'],
+        'g': ['-O0'],
+        '1': ['-O1'],
+        '2': ['-O2'],
+        '3': ['-O3'],
+        's': ['-Os'],
+    }
+
+    def __init__(self):
+        super().__init__()
+        # As of 19.0.0 ICC doesn't have sanitizer, color, or lto support.
+        #
+        # It does have IPO, which serves much the same purpose as LOT, but
+        # there is an unfortunate rule for using IPO (you can't control the
+        # name of the output file) which break assumptions meson makes
+        self.base_options = ['b_pch', 'b_lundef', 'b_asneeded', 'b_pgo',
+                             'b_coverage', 'b_ndebug', 'b_staticpic', 'b_pie']
+        self.id = 'intel'
+        self.lang_header = 'none'
+
+    def get_pch_suffix(self) -> str:
+        return 'pchi'
+
+    def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]:
+        return ['-pch', '-pch_dir', os.path.join(pch_dir), '-x',
+                self.lang_header, '-include', header, '-x', 'none']
+
+    def get_pch_name(self, header_name: str) -> str:
+        return os.path.basename(header_name) + '.' + self.get_pch_suffix()
+
+    def openmp_flags(self) -> T.List[str]:
+        if mesonlib.version_compare(self.version, '>=15.0.0'):
+            return ['-qopenmp']
+        else:
+            return ['-openmp']
+
+    def compiles(self, *args, **kwargs) -> T.Tuple[bool, bool]:
+        # This covers a case that .get('foo', []) doesn't, that extra_args is
+        # defined and is None
+        extra_args = kwargs.get('extra_args') or []
+        kwargs['extra_args'] = [
+            extra_args,
+            '-diag-error', '10006',  # ignoring unknown option
+            '-diag-error', '10148',  # Option not supported
+            '-diag-error', '10155',  # ignoring argument required
+            '-diag-error', '10156',  # ignoring not argument allowed
+            '-diag-error', '10157',  # Ignoring argument of the wrong type
+            '-diag-error', '10158',  # Argument must be separate. Can be hit by trying an option like -foo-bar=foo when -foo=bar is a valid option but -foo-bar isn't
+            '-diag-error', '1292',   # unknown __attribute__
+        ]
+        return super().compiles(*args, **kwargs)
+
+    def get_profile_generate_args(self) -> T.List[str]:
+        return ['-prof-gen=threadsafe']
+
+    def get_profile_use_args(self) -> T.List[str]:
+        return ['-prof-use']
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return self.BUILD_ARGS[buildtype]
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return self.OPTIM_ARGS[optimization_level]
+
+
+class IntelVisualStudioLikeCompiler(VisualStudioLikeCompiler):
+
+    """Abstractions for ICL, the Intel compiler on Windows."""
+
+    BUILD_ARGS = {
+        'plain': [],
+        'debug': ["/Zi", "/traceback"],
+        'debugoptimized': ["/Zi", "/traceback"],
+        'release': [],
+        'minsize': [],
+        'custom': [],
+    }  # type: T.Dict[str, T.List[str]]
+
+    OPTIM_ARGS = {
+        '0': ['/O0'],
+        'g': ['/O0'],
+        '1': ['/O1'],
+        '2': ['/O2'],
+        '3': ['/O3'],
+        's': ['/Os'],
+    }
+
+    def __init__(self, target: str):
+        super().__init__(target)
+        self.id = 'intel-cl'
+
+    def compile(self, code, *, extra_args: T.Optional[T.List[str]] = None, **kwargs) -> T.Iterator['subprocess.Popen']:
+        # This covers a case that .get('foo', []) doesn't, that extra_args is
+        if kwargs.get('mode', 'compile') != 'link':
+            extra_args = extra_args.copy() if extra_args is not None else []
+            extra_args.extend([
+                '/Qdiag-error:10006',  # ignoring unknown option
+                '/Qdiag-error:10148',  # Option not supported
+                '/Qdiag-error:10155',  # ignoring argument required
+                '/Qdiag-error:10156',  # ignoring not argument allowed
+                '/Qdiag-error:10157',  # Ignoring argument of the wrong type
+                '/Qdiag-error:10158',  # Argument must be separate. Can be hit by trying an option like -foo-bar=foo when -foo=bar is a valid option but -foo-bar isn't
+            ])
+        return super().compile(code, extra_args, **kwargs)
+
+    def get_toolset_version(self) -> T.Optional[str]:
+        # Avoid circular dependencies....
+        from ...environment import search_version
+
+        # ICL provides a cl.exe that returns the version of MSVC it tries to
+        # emulate, so we'll get the version from that and pass it to the same
+        # function the real MSVC uses to calculate the toolset version.
+        _, _, err = mesonlib.Popen_safe(['cl.exe'])
+        v1, v2, *_ = search_version(err).split('.')
+        version = int(v1 + v2)
+        return self._calculate_toolset_version(version)
+
+    def openmp_flags(self) -> T.List[str]:
+        return ['/Qopenmp']
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return self.BUILD_ARGS[buildtype]
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return self.OPTIM_ARGS[optimization_level]
diff --git a/meson/mesonbuild/compilers/mixins/islinker.py b/meson/mesonbuild/compilers/mixins/islinker.py
new file mode 100644
index 000000000..a9967d618
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/islinker.py
@@ -0,0 +1,126 @@
+# Copyright 2019 The Meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Mixins for compilers that *are* linkers.
+
+While many compilers (such as gcc and clang) are used by meson to dispatch
+linker commands and other (like MSVC) are not, a few (such as DMD) actually
+are both the linker and compiler in one binary. This module provides mixin
+classes for those cases.
+"""
+
+import typing as T
+
+from ... import mesonlib
+
+if T.TYPE_CHECKING:
+    from ...coredata import OptionDictType
+    from ...environment import Environment
+
+
+class BasicLinkerIsCompilerMixin:
+
+    """Provides a baseline of methods that a linker would implement.
+
+    In every case this provides a "no" or "empty" answer. If a compiler
+    implements any of these it needs a different mixin or to override that
+    functionality itself.
+    """
+
+    def sanitizer_link_args(self, value: str) -> T.List[str]:
+        return []
+
+    def get_lto_link_args(self) -> T.List[str]:
+        return []
+
+    def can_linker_accept_rsp(self) -> bool:
+        return mesonlib.is_windows()
+
+    def get_linker_exelist(self) -> T.List[str]:
+        return self.exelist.copy()
+
+    def get_linker_output_args(self, output: str) -> T.List[str]:
+        return []
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return []
+
+    def get_linker_lib_prefix(self) -> str:
+        return ''
+
+    def get_option_link_args(self, options: 'OptionDictType') -> T.List[str]:
+        return []
+
+    def has_multi_link_args(self, args: T.List[str], env: 'Environment') -> T.Tuple[bool, bool]:
+        return False, False
+
+    def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
+        return []
+
+    def get_std_shared_lib_link_args(self) -> T.List[str]:
+        return []
+
+    def get_std_shared_module_args(self, options: 'OptionDictType') -> T.List[str]:
+        return self.get_std_shared_lib_link_args()
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        raise mesonlib.EnvironmentException(
+            'Linker {} does not support link_whole'.format(self.id))
+
+    def get_allow_undefined_link_args(self) -> T.List[str]:
+        raise mesonlib.EnvironmentException(
+            'Linker {} does not support allow undefined'.format(self.id))
+
+    def get_pie_link_args(self) -> T.List[str]:
+        m = 'Linker {} does not support position-independent executable'
+        raise mesonlib.EnvironmentException(m.format(self.id))
+
+    def get_undefined_link_args(self) -> T.List[str]:
+        return []
+
+    def get_coverage_link_args(self) -> T.List[str]:
+        m = "Linker {} doesn't implement coverage data generation.".format(self.id)
+        raise mesonlib.EnvironmentException(m)
+
+    def no_undefined_link_args(self) -> T.List[str]:
+        return []
+
+    def bitcode_args(self) -> T.List[str]:
+        raise mesonlib.MesonException("This linker doesn't support bitcode bundles")
+
+    def get_soname_args(self, for_machine: 'mesonlib.MachineChoice',
+                        prefix: str, shlib_name: str, suffix: str, soversion: str,
+                        darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        raise mesonlib.MesonException("This linker doesn't support soname args")
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        return ([], set())
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return []
+
+    def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]:
+        return []
+
+    def get_link_debugfile_name(self, target: str) -> str:
+        return ''
+
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    def thread_link_flags(self, env: 'Environment') -> T.List[str]:
+        return []
diff --git a/meson/mesonbuild/compilers/mixins/pgi.py b/meson/mesonbuild/compilers/mixins/pgi.py
new file mode 100644
index 000000000..77a7a28c5
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/pgi.py
@@ -0,0 +1,99 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Abstractions for the PGI family of compilers."""
+
+import typing as T
+import os
+from pathlib import Path
+
+from ..compilers import clike_debug_args, clike_optimization_args
+
+pgi_buildtype_args = {
+    'plain': [],
+    'debug': [],
+    'debugoptimized': [],
+    'release': [],
+    'minsize': [],
+    'custom': [],
+}  # type: T.Dict[str, T.List[str]]
+
+
+class PGICompiler:
+    def __init__(self):
+        self.base_options = ['b_pch']
+        self.id = 'pgi'
+
+        default_warn_args = ['-Minform=inform']
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args,
+                          '3': default_warn_args}
+
+    def get_module_incdir_args(self) -> T.Tuple[str]:
+        return ('-module', )
+
+    def get_no_warn_args(self) -> T.List[str]:
+        return ['-silent']
+
+    def gen_import_library_args(self, implibname: str) -> T.List[str]:
+        return []
+
+    def get_pic_args(self) -> T.List[str]:
+        # PGI -fPIC is Linux only.
+        if self.info.is_linux():
+            return ['-fPIC']
+        return []
+
+    def openmp_flags(self) -> T.List[str]:
+        return ['-mp']
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return pgi_buildtype_args[buildtype]
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return clike_optimization_args[optimization_level]
+
+    def get_debug_args(self, is_debug: bool) -> T.List[str]:
+        return clike_debug_args[is_debug]
+
+    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]:
+        for idx, i in enumerate(parameter_list):
+            if i[:2] == '-I' or i[:2] == '-L':
+                parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:]))
+        return parameter_list
+
+    def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]:
+        return []
+
+    def get_always_args(self) -> T.List[str]:
+        return []
+
+    def get_pch_suffix(self) -> str:
+        # PGI defaults to .pch suffix for PCH on Linux and Windows with --pch option
+        return 'pch'
+
+    def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]:
+        # PGI supports PCH for C++ only.
+        hdr = Path(pch_dir).resolve().parent / header
+        if self.language == 'cpp':
+            return ['--pch',
+                    '--pch_dir', str(hdr.parent),
+                    '-I{}'.format(hdr.parent)]
+        else:
+            return []
+
+    def thread_flags(self, env):
+        # PGI cannot accept -pthread, it's already threaded
+        return []
diff --git a/meson/mesonbuild/compilers/mixins/visualstudio.py b/meson/mesonbuild/compilers/mixins/visualstudio.py
new file mode 100644
index 000000000..93101b5b6
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/visualstudio.py
@@ -0,0 +1,432 @@
+# Copyright 2019 The meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Abstractions to simplify compilers that implement an MSVC compatible
+interface.
+"""
+
+import abc
+import os
+import typing as T
+
+from ... import mesonlib
+from ... import mlog
+
+if T.TYPE_CHECKING:
+    from ...environment import Environment
+
+vs32_instruction_set_args = {
+    'mmx': ['/arch:SSE'], # There does not seem to be a flag just for MMX
+    'sse': ['/arch:SSE'],
+    'sse2': ['/arch:SSE2'],
+    'sse3': ['/arch:AVX'], # VS leaped from SSE2 directly to AVX.
+    'sse41': ['/arch:AVX'],
+    'sse42': ['/arch:AVX'],
+    'avx': ['/arch:AVX'],
+    'avx2': ['/arch:AVX2'],
+    'neon': None,
+}  # T.Dicst[str, T.Optional[T.List[str]]]
+
+# The 64 bit compiler defaults to /arch:avx.
+vs64_instruction_set_args = {
+    'mmx': ['/arch:AVX'],
+    'sse': ['/arch:AVX'],
+    'sse2': ['/arch:AVX'],
+    'sse3': ['/arch:AVX'],
+    'ssse3': ['/arch:AVX'],
+    'sse41': ['/arch:AVX'],
+    'sse42': ['/arch:AVX'],
+    'avx': ['/arch:AVX'],
+    'avx2': ['/arch:AVX2'],
+    'neon': None,
+}  # T.Dicst[str, T.Optional[T.List[str]]]
+
+msvc_buildtype_args = {
+    'plain': [],
+    'debug': ["/ZI", "/Ob0", "/Od", "/RTC1"],
+    'debugoptimized': ["/Zi", "/Ob1"],
+    'release': ["/Ob2", "/Gw"],
+    'minsize': ["/Zi", "/Gw"],
+    'custom': [],
+}  # type: T.Dict[str, T.List[str]]
+
+# Clang-cl doesn't have /ZI, and /Zi and /Z7 do the same thing
+# quoting the docs (https://clang.llvm.org/docs/MSVCCompatibility.html):
+#
+# Clang emits relatively complete CodeView debug information if /Z7 or /Zi is
+# passed. Microsoft’s link.exe will transform the CodeView debug information
+# into a PDB
+clangcl_buildtype_args = msvc_buildtype_args.copy()
+clangcl_buildtype_args['debug'] = ['/Zi', '/Ob0', '/Od', '/RTC1']
+
+msvc_optimization_args = {
+    '0': [],
+    'g': ['/O0'],
+    '1': ['/O1'],
+    '2': ['/O2'],
+    '3': ['/O2'],
+    's': ['/O1'], # Implies /Os.
+}  # type: T.Dict[str, T.List[str]]
+
+msvc_debug_args = {
+    False: [],
+    True: []  # Fixme!
+}  # type: T.Dict[bool, T.List[str]]
+
+
+class VisualStudioLikeCompiler(metaclass=abc.ABCMeta):
+
+    """A common interface for all compilers implementing an MSVC-style
+    interface.
+
+    A number of compilers attempt to mimic MSVC, with varying levels of
+    success, such as Clang-CL and ICL (the Intel C/C++ Compiler for Windows).
+    This class implements as much common logic as possible.
+    """
+
+    std_warn_args = ['/W3']
+    std_opt_args = ['/O2']
+    # XXX: this is copied in this patch only to avoid circular dependencies
+    #ignore_libs = unixy_compiler_internal_libs
+    ignore_libs = ('m', 'c', 'pthread', 'dl', 'rt', 'execinfo')
+    internal_libs = ()
+
+    crt_args = {
+        'none': [],
+        'md': ['/MD'],
+        'mdd': ['/MDd'],
+        'mt': ['/MT'],
+        'mtd': ['/MTd'],
+    }  # type: T.Dict[str, T.List[str]]
+
+    # /showIncludes is needed for build dependency tracking in Ninja
+    # See: https://ninja-build.org/manual.html#_deps
+    always_args = ['/nologo', '/showIncludes']
+    warn_args = {
+        '0': [],
+        '1': ['/W2'],
+        '2': ['/W3'],
+        '3': ['/W4'],
+    }  # type: T.Dict[str, T.List[str]]
+
+    INVOKES_LINKER = False
+
+    def __init__(self, target: str):
+        self.base_options = ['b_pch', 'b_ndebug', 'b_vscrt'] # FIXME add lto, pgo and the like
+        self.target = target
+        self.is_64 = ('x64' in target) or ('x86_64' in target)
+        # do some canonicalization of target machine
+        if 'x86_64' in target:
+            self.machine = 'x64'
+        elif '86' in target:
+            self.machine = 'x86'
+        else:
+            self.machine = target
+        self.linker.machine = self.machine
+
+    # Override CCompiler.get_always_args
+    def get_always_args(self) -> T.List[str]:
+        return self.always_args
+
+    def get_pch_suffix(self) -> str:
+        return 'pch'
+
+    def get_pch_name(self, header: str) -> str:
+        chopped = os.path.basename(header).split('.')[:-1]
+        chopped.append(self.get_pch_suffix())
+        pchname = '.'.join(chopped)
+        return pchname
+
+    def get_pch_base_name(self, header: str) -> str:
+        # This needs to be implemented by inherting classes
+        raise NotImplementedError
+
+    def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]:
+        base = self.get_pch_base_name(header)
+        pchname = self.get_pch_name(header)
+        return ['/FI' + base, '/Yu' + base, '/Fp' + os.path.join(pch_dir, pchname)]
+
+    def get_preprocess_only_args(self) -> T.List[str]:
+        return ['/EP']
+
+    def get_compile_only_args(self) -> T.List[str]:
+        return ['/c']
+
+    def get_no_optimization_args(self) -> T.List[str]:
+        return ['/Od']
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        if target.endswith('.exe'):
+            return ['/Fe' + target]
+        return ['/Fo' + target]
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return msvc_optimization_args[optimization_level]
+
+    def get_debug_args(self, is_debug: bool) -> T.List[str]:
+        return msvc_debug_args[is_debug]
+
+    def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]:
+        return []
+
+    def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]:
+        return ['/link'] + args
+
+    def get_gui_app_args(self, value: bool) -> T.List[str]:
+        # the default is for the linker to guess the subsystem based on presence
+        # of main or WinMain symbols, so always be explicit
+        if value:
+            return ['/SUBSYSTEM:WINDOWS']
+        else:
+            return ['/SUBSYSTEM:CONSOLE']
+
+    def get_pic_args(self) -> T.List[str]:
+        return [] # PIC is handled by the loader on Windows
+
+    def gen_vs_module_defs_args(self, defsfile: str) -> T.List[str]:
+        if not isinstance(defsfile, str):
+            raise RuntimeError('Module definitions file should be str')
+        # With MSVC, DLLs only export symbols that are explicitly exported,
+        # so if a module defs file is specified, we use that to export symbols
+        return ['/DEF:' + defsfile]
+
+    def gen_pch_args(self, header: str, source: str, pchname: str) -> T.Tuple[str, T.List[str]]:
+        objname = os.path.splitext(pchname)[0] + '.obj'
+        return objname, ['/Yc' + header, '/Fp' + pchname, '/Fo' + objname]
+
+    def openmp_flags(self) -> T.List[str]:
+        return ['/openmp']
+
+    def openmp_link_flags(self) -> T.List[str]:
+        return []
+
+    # FIXME, no idea what these should be.
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    @classmethod
+    def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]:
+        result = []
+        for i in args:
+            # -mms-bitfields is specific to MinGW-GCC
+            # -pthread is only valid for GCC
+            if i in ('-mms-bitfields', '-pthread'):
+                continue
+            if i.startswith('-LIBPATH:'):
+                i = '/LIBPATH:' + i[9:]
+            elif i.startswith('-L'):
+                i = '/LIBPATH:' + i[2:]
+            # Translate GNU-style -lfoo library name to the import library
+            elif i.startswith('-l'):
+                name = i[2:]
+                if name in cls.ignore_libs:
+                    # With MSVC, these are provided by the C runtime which is
+                    # linked in by default
+                    continue
+                else:
+                    i = name + '.lib'
+            elif i.startswith('-isystem'):
+                # just use /I for -isystem system include path s
+                if i.startswith('-isystem='):
+                    i = '/I' + i[9:]
+                else:
+                    i = '/I' + i[8:]
+            elif i.startswith('-idirafter'):
+                # same as -isystem, but appends the path instead
+                if i.startswith('-idirafter='):
+                    i = '/I' + i[11:]
+                else:
+                    i = '/I' + i[10:]
+            # -pthread in link flags is only used on Linux
+            elif i == '-pthread':
+                continue
+            result.append(i)
+        return result
+
+    @classmethod
+    def native_args_to_unix(cls, args: T.List[str]) -> T.List[str]:
+        result = []
+        for arg in args:
+            if arg.startswith(('/LIBPATH:', '-LIBPATH:')):
+                result.append('-L' + arg[9:])
+            elif arg.endswith(('.a', '.lib')) and not os.path.isabs(arg):
+                result.append('-l' + arg)
+            else:
+                result.append(arg)
+        return result
+
+    def get_werror_args(self) -> T.List[str]:
+        return ['/WX']
+
+    def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
+        if path == '':
+            path = '.'
+        # msvc does not have a concept of system header dirs.
+        return ['-I' + path]
+
+    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]:
+        for idx, i in enumerate(parameter_list):
+            if i[:2] == '-I' or i[:2] == '/I':
+                parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:]))
+            elif i[:9] == '/LIBPATH:':
+                parameter_list[idx] = i[:9] + os.path.normpath(os.path.join(build_dir, i[9:]))
+
+        return parameter_list
+
+    # Visual Studio is special. It ignores some arguments it does not
+    # understand and you can't tell it to error out on those.
+    # http://stackoverflow.com/questions/15259720/how-can-i-make-the-microsoft-c-compiler-treat-unknown-flags-as-errors-rather-t
+    def has_arguments(self, args: T.List[str], env: 'Environment', code, mode: str) -> T.Tuple[bool, bool]:
+        warning_text = '4044' if mode == 'link' else '9002'
+        with self._build_wrapper(code, env, extra_args=args, mode=mode) as p:
+            if p.returncode != 0:
+                return False, p.cached
+            return not(warning_text in p.stde or warning_text in p.stdo), p.cached
+
+    def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
+        pdbarr = rel_obj.split('.')[:-1]
+        pdbarr += ['pdb']
+        args = ['/Fd' + '.'.join(pdbarr)]
+        return args
+
+    def get_instruction_set_args(self, instruction_set: str) -> T.Optional[T.List[str]]:
+        if self.is_64:
+            return vs64_instruction_set_args.get(instruction_set, None)
+        return vs32_instruction_set_args.get(instruction_set, None)
+
+    def _calculate_toolset_version(self, version: int) -> T.Optional[str]:
+        if version < 1310:
+            return '7.0'
+        elif version < 1400:
+            return '7.1' # (Visual Studio 2003)
+        elif version < 1500:
+            return '8.0' # (Visual Studio 2005)
+        elif version < 1600:
+            return '9.0' # (Visual Studio 2008)
+        elif version < 1700:
+            return '10.0' # (Visual Studio 2010)
+        elif version < 1800:
+            return '11.0' # (Visual Studio 2012)
+        elif version < 1900:
+            return '12.0' # (Visual Studio 2013)
+        elif version < 1910:
+            return '14.0' # (Visual Studio 2015)
+        elif version < 1920:
+            return '14.1' # (Visual Studio 2017)
+        elif version < 1930:
+            return '14.2' # (Visual Studio 2019)
+        mlog.warning('Could not find toolset for version {!r}'.format(self.version))
+        return None
+
+    def get_toolset_version(self) -> T.Optional[str]:
+        # See boost/config/compiler/visualc.cpp for up to date mapping
+        try:
+            version = int(''.join(self.version.split('.')[0:2]))
+        except ValueError:
+            return None
+        return self._calculate_toolset_version(version)
+
+    def get_default_include_dirs(self) -> T.List[str]:
+        if 'INCLUDE' not in os.environ:
+            return []
+        return os.environ['INCLUDE'].split(os.pathsep)
+
+    def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]:
+        if crt_val in self.crt_args:
+            return self.crt_args[crt_val]
+        assert(crt_val == 'from_buildtype')
+        # Match what build type flags used to do.
+        if buildtype == 'plain':
+            return []
+        elif buildtype == 'debug':
+            return self.crt_args['mdd']
+        elif buildtype == 'debugoptimized':
+            return self.crt_args['md']
+        elif buildtype == 'release':
+            return self.crt_args['md']
+        elif buildtype == 'minsize':
+            return self.crt_args['md']
+        else:
+            assert(buildtype == 'custom')
+            raise mesonlib.EnvironmentException('Requested C runtime based on buildtype, but buildtype is "custom".')
+
+    def has_func_attribute(self, name: str, env: 'Environment') -> T.Tuple[bool, bool]:
+        # MSVC doesn't have __attribute__ like Clang and GCC do, so just return
+        # false without compiling anything
+        return name in ['dllimport', 'dllexport'], False
+
+    def get_argument_syntax(self) -> str:
+        return 'msvc'
+
+
+class MSVCCompiler(VisualStudioLikeCompiler):
+
+    """Spcific to the Microsoft Compilers."""
+
+    def __init__(self, target: str):
+        super().__init__(target)
+        self.id = 'msvc'
+
+    def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
+        args = super().get_compile_debugfile_args(rel_obj, pch)
+        # When generating a PDB file with PCH, all compile commands write
+        # to the same PDB file. Hence, we need to serialize the PDB
+        # writes using /FS since we do parallel builds. This slows down the
+        # build obviously, which is why we only do this when PCH is on.
+        # This was added in Visual Studio 2013 (MSVC 18.0). Before that it was
+        # always on: https://msdn.microsoft.com/en-us/library/dn502518.aspx
+        if pch and mesonlib.version_compare(self.version, '>=18.0'):
+            args = ['/FS'] + args
+        return args
+
+    def get_instruction_set_args(self, instruction_set: str) -> T.Optional[T.List[str]]:
+        if self.version.split('.')[0] == '16' and instruction_set == 'avx':
+            # VS documentation says that this exists and should work, but
+            # it does not. The headers do not contain AVX intrinsics
+            # and they can not be called.
+            return None
+        return super().get_instruction_set_args(instruction_set)
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        args = msvc_buildtype_args[buildtype]
+        if mesonlib.version_compare(self.version, '<18.0'):
+            args = [arg for arg in args if arg != '/Gw']
+        return args
+
+    def get_pch_base_name(self, header: str) -> str:
+        return os.path.basename(header)
+
+
+class ClangClCompiler(VisualStudioLikeCompiler):
+
+    """Spcific to Clang-CL."""
+
+    def __init__(self, target: str):
+        super().__init__(target)
+        self.id = 'clang-cl'
+
+    def has_arguments(self, args: T.List[str], env: 'Environment', code, mode: str) -> T.Tuple[bool, bool]:
+        if mode != 'link':
+            args = args + ['-Werror=unknown-argument']
+        return super().has_arguments(args, env, code, mode)
+
+    def get_toolset_version(self) -> T.Optional[str]:
+        # XXX: what is the right thing to do here?
+        return '14.1'
+
+    def get_pch_base_name(self, header: str) -> str:
+        return header
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return clangcl_buildtype_args[buildtype]
diff --git a/meson/mesonbuild/compilers/mixins/xc16.py b/meson/mesonbuild/compilers/mixins/xc16.py
new file mode 100644
index 000000000..f12ccccef
--- /dev/null
+++ b/meson/mesonbuild/compilers/mixins/xc16.py
@@ -0,0 +1,123 @@
+# Copyright 2012-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Representations specific to the Microchip XC16 C compiler family."""
+
+import os
+import typing as T
+
+from ...mesonlib import EnvironmentException
+
+if T.TYPE_CHECKING:
+    from ...environment import Environment
+
+xc16_buildtype_args = {
+    'plain': [],
+    'debug': [],
+    'debugoptimized': [],
+    'release': [],
+    'minsize': [],
+    'custom': [],
+}  # type: T.Dict[str, T.List[str]]
+
+xc16_optimization_args = {
+    '0': ['-O0'],
+    'g': ['-O0'],
+    '1': ['-O1'],
+    '2': ['-O2'],
+    '3': ['-O3'],
+    's': ['-Os']
+}  # type: T.Dict[str, T.List[str]]
+
+xc16_debug_args = {
+    False: [],
+    True: []
+}  # type: T.Dict[bool, T.List[str]]
+
+
+class Xc16Compiler:
+    def __init__(self):
+        if not self.is_cross:
+            raise EnvironmentException('xc16 supports only cross-compilation.')
+        self.id = 'xc16'
+        # Assembly
+        self.can_compile_suffixes.add('s')
+        default_warn_args = []  # type: T.List[str]
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args + [],
+                          '3': default_warn_args + []}
+
+    def get_always_args(self) -> T.List[str]:
+        return []
+
+    def get_pic_args(self) -> T.List[str]:
+        # PIC support is not enabled by default for xc16,
+        # if users want to use it, they need to add the required arguments explicitly
+        return []
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return xc16_buildtype_args[buildtype]
+
+    def get_pch_suffix(self) -> str:
+        return 'pch'
+
+    def get_pch_use_args(self, pch_dir: str, header: str) -> T.List[str]:
+        return []
+
+    # Override CCompiler.get_dependency_gen_args
+    def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]:
+        return []
+
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    def get_coverage_args(self) -> T.List[str]:
+        return []
+    
+    def get_no_stdinc_args(self) -> T.List[str]:
+        return ['-nostdinc']
+
+    def get_no_stdlib_link_args(self) -> T.List[str]:
+        return ['--nostdlib']
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        return xc16_optimization_args[optimization_level]
+
+    def get_debug_args(self, is_debug: bool) -> T.List[str]:
+        return xc16_debug_args[is_debug]
+
+    @classmethod
+    def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]:
+        result = []
+        for i in args:
+            if i.startswith('-D'):
+                i = '-D' + i[2:]
+            if i.startswith('-I'):
+                i = '-I' + i[2:]
+            if i.startswith('-Wl,-rpath='):
+                continue
+            elif i == '--print-search-dirs':
+                continue
+            elif i.startswith('-L'):
+                continue
+            result.append(i)
+        return result
+
+    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], build_dir: str) -> T.List[str]:
+        for idx, i in enumerate(parameter_list):
+            if i[:9] == '-I':
+                parameter_list[idx] = i[:9] + os.path.normpath(os.path.join(build_dir, i[9:]))
+
+        return parameter_list
diff --git a/meson/mesonbuild/compilers/objc.py b/meson/mesonbuild/compilers/objc.py
new file mode 100644
index 000000000..254a609bf
--- /dev/null
+++ b/meson/mesonbuild/compilers/objc.py
@@ -0,0 +1,99 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path, subprocess
+import typing as T
+
+from ..mesonlib import EnvironmentException, MachineChoice
+
+from .compilers import Compiler
+from .mixins.clike import CLikeCompiler
+from .mixins.gnu import GnuCompiler
+from .mixins.clang import ClangCompiler
+
+if T.TYPE_CHECKING:
+    from ..envconfig import MachineInfo
+
+
+class ObjCCompiler(CLikeCompiler, Compiler):
+
+    language = 'objc'
+
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross: bool, info: 'MachineInfo',
+                 exe_wrap: T.Optional[str], **kwargs):
+        Compiler.__init__(self, exelist, version, for_machine, info, **kwargs)
+        CLikeCompiler.__init__(self, is_cross, exe_wrap)
+
+    @staticmethod
+    def get_display_language():
+        return 'Objective-C'
+
+    def sanity_check(self, work_dir, environment):
+        # TODO try to use sanity_check_impl instead of duplicated code
+        source_name = os.path.join(work_dir, 'sanitycheckobjc.m')
+        binary_name = os.path.join(work_dir, 'sanitycheckobjc')
+        extra_flags = []
+        extra_flags += environment.coredata.get_external_args(self.for_machine, self.language)
+        if self.is_cross:
+            extra_flags += self.get_compile_only_args()
+        else:
+            extra_flags += environment.coredata.get_external_link_args(self.for_machine, self.language)
+        with open(source_name, 'w') as ofile:
+            ofile.write('#import\n'
+                        'int main(void) { return 0; }\n')
+        pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
+        pc.wait()
+        if pc.returncode != 0:
+            raise EnvironmentException('ObjC compiler %s can not compile programs.' % self.name_string())
+        if self.is_cross:
+            # Can't check if the binaries run so we have to assume they do
+            return
+        pe = subprocess.Popen(binary_name)
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException('Executables created by ObjC compiler %s are not runnable.' % self.name_string())
+
+
+class GnuObjCCompiler(GnuCompiler, ObjCCompiler):
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', exe_wrapper=None,
+                 defines=None, **kwargs):
+        ObjCCompiler.__init__(self, exelist, version, for_machine, is_cross,
+                              info, exe_wrapper, **kwargs)
+        GnuCompiler.__init__(self, defines)
+        default_warn_args = ['-Wall', '-Winvalid-pch']
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args + ['-Wextra'],
+                          '3': default_warn_args + ['-Wextra', '-Wpedantic']}
+
+
+class ClangObjCCompiler(ClangCompiler, ObjCCompiler):
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', exe_wrapper=None,
+                 **kwargs):
+        ObjCCompiler.__init__(self, exelist, version, for_machine, is_cross,
+                              info, exe_wrapper, **kwargs)
+        ClangCompiler.__init__(self, [])
+        default_warn_args = ['-Wall', '-Winvalid-pch']
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args + ['-Wextra'],
+                          '3': default_warn_args + ['-Wextra', '-Wpedantic']}
+
+
+class AppleClangObjCCompiler(ClangObjCCompiler):
+
+    """Handle the differences between Apple's clang and vanilla clang."""
diff --git a/meson/mesonbuild/compilers/objcpp.py b/meson/mesonbuild/compilers/objcpp.py
new file mode 100644
index 000000000..3197abc8c
--- /dev/null
+++ b/meson/mesonbuild/compilers/objcpp.py
@@ -0,0 +1,98 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path, subprocess
+import typing as T
+
+from ..mesonlib import EnvironmentException, MachineChoice
+
+from .mixins.clike import CLikeCompiler
+from .compilers import Compiler
+from .mixins.gnu import GnuCompiler
+from .mixins.clang import ClangCompiler
+
+if T.TYPE_CHECKING:
+    from ..envconfig import MachineInfo
+
+class ObjCPPCompiler(CLikeCompiler, Compiler):
+
+    language = 'objcpp'
+
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross: bool, info: 'MachineInfo',
+                 exe_wrap: T.Optional[str], **kwargs):
+        Compiler.__init__(self, exelist, version, for_machine, info, **kwargs)
+        CLikeCompiler.__init__(self, is_cross, exe_wrap)
+
+    @staticmethod
+    def get_display_language():
+        return 'Objective-C++'
+
+    def sanity_check(self, work_dir, environment):
+        # TODO try to use sanity_check_impl instead of duplicated code
+        source_name = os.path.join(work_dir, 'sanitycheckobjcpp.mm')
+        binary_name = os.path.join(work_dir, 'sanitycheckobjcpp')
+        extra_flags = []
+        extra_flags += environment.coredata.get_external_args(self.for_machine, self.language)
+        if self.is_cross:
+            extra_flags += self.get_compile_only_args()
+        else:
+            extra_flags += environment.coredata.get_external_link_args(self.for_machine, self.language)
+        with open(source_name, 'w') as ofile:
+            ofile.write('#import\n'
+                        'class MyClass;'
+                        'int main(void) { return 0; }\n')
+        pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
+        pc.wait()
+        if pc.returncode != 0:
+            raise EnvironmentException('ObjC++ compiler %s can not compile programs.' % self.name_string())
+        if self.is_cross:
+            # Can't check if the binaries run so we have to assume they do
+            return
+        pe = subprocess.Popen(binary_name)
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException('Executables created by ObjC++ compiler %s are not runnable.' % self.name_string())
+
+
+class GnuObjCPPCompiler(GnuCompiler, ObjCPPCompiler):
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', exe_wrapper=None,
+                 defines=None, **kwargs):
+        ObjCPPCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrapper, **kwargs)
+        GnuCompiler.__init__(self, defines)
+        default_warn_args = ['-Wall', '-Winvalid-pch', '-Wnon-virtual-dtor']
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args + ['-Wextra'],
+                          '3': default_warn_args + ['-Wextra', '-Wpedantic']}
+
+
+class ClangObjCPPCompiler(ClangCompiler, ObjCPPCompiler):
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', exe_wrapper=None,
+                 **kwargs):
+        ObjCPPCompiler.__init__(self, exelist, version, for_machine, is_cross, info, exe_wrapper, **kwargs)
+        ClangCompiler.__init__(self, [])
+        default_warn_args = ['-Wall', '-Winvalid-pch', '-Wnon-virtual-dtor']
+        self.warn_args = {'0': [],
+                          '1': default_warn_args,
+                          '2': default_warn_args + ['-Wextra'],
+                          '3': default_warn_args + ['-Wextra', '-Wpedantic']}
+
+
+
+class AppleClangObjCPPCompiler(ClangObjCPPCompiler):
+
+    """Handle the differences between Apple's clang and vanilla clang."""
diff --git a/meson/mesonbuild/compilers/rust.py b/meson/mesonbuild/compilers/rust.py
new file mode 100644
index 000000000..c2e21c4ab
--- /dev/null
+++ b/meson/mesonbuild/compilers/rust.py
@@ -0,0 +1,115 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, os.path
+import typing as T
+
+from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe
+from .compilers import Compiler, rust_buildtype_args, clike_debug_args
+
+if T.TYPE_CHECKING:
+    from ..envconfig import MachineInfo
+    from ..environment import Environment  # noqa: F401
+
+rust_optimization_args = {'0': [],
+                          'g': ['-C', 'opt-level=0'],
+                          '1': ['-C', 'opt-level=1'],
+                          '2': ['-C', 'opt-level=2'],
+                          '3': ['-C', 'opt-level=3'],
+                          's': ['-C', 'opt-level=s'],
+                          }
+
+class RustCompiler(Compiler):
+
+    # rustc doesn't invoke the compiler itself, it doesn't need a LINKER_PREFIX
+    language = 'rust'
+
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', exe_wrapper=None, **kwargs):
+        super().__init__(exelist, version, for_machine, info, **kwargs)
+        self.exe_wrapper = exe_wrapper
+        self.id = 'rustc'
+        self.is_cross = is_cross
+
+    def needs_static_linker(self):
+        return False
+
+    def name_string(self):
+        return ' '.join(self.exelist)
+
+    def sanity_check(self, work_dir, environment):
+        source_name = os.path.join(work_dir, 'sanity.rs')
+        output_name = os.path.join(work_dir, 'rusttest')
+        with open(source_name, 'w') as ofile:
+            ofile.write('''fn main() {
+}
+''')
+        pc = subprocess.Popen(self.exelist + ['-o', output_name, source_name],
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE,
+                              cwd=work_dir)
+        stdo, stde = pc.communicate()
+        stdo = stdo.decode('utf-8', errors='replace')
+        stde = stde.decode('utf-8', errors='replace')
+        if pc.returncode != 0:
+            raise EnvironmentException('Rust compiler %s can not compile programs.\n%s\n%s' % (
+                self.name_string(),
+                stdo,
+                stde))
+        if self.is_cross:
+            if self.exe_wrapper is None:
+                # Can't check if the binaries run so we have to assume they do
+                return
+            cmdlist = self.exe_wrapper + [output_name]
+        else:
+            cmdlist = [output_name]
+        pe = subprocess.Popen(cmdlist, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException('Executables created by Rust compiler %s are not runnable.' % self.name_string())
+
+    def get_dependency_gen_args(self, outfile):
+        return ['--dep-info', outfile]
+
+    def get_buildtype_args(self, buildtype):
+        return rust_buildtype_args[buildtype]
+
+    def get_sysroot(self):
+        cmd = self.exelist + ['--print', 'sysroot']
+        p, stdo, stde = Popen_safe(cmd)
+        return stdo.split('\n')[0]
+
+    def get_debug_args(self, is_debug):
+        return clike_debug_args[is_debug]
+
+    def get_optimization_args(self, optimization_level):
+        return rust_optimization_args[optimization_level]
+
+    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
+        for idx, i in enumerate(parameter_list):
+            if i[:2] == '-L':
+                for j in ['dependency', 'crate', 'native', 'framework', 'all']:
+                    combined_len = len(j) + 3
+                    if i[:combined_len] == '-L{}='.format(j):
+                        parameter_list[idx] = i[:combined_len] + os.path.normpath(os.path.join(build_dir, i[combined_len:]))
+                        break
+
+        return parameter_list
+
+    def get_std_exe_link_args(self):
+        return []
+
+    # Rust does not have a use_linker_args because it dispatches to a gcc-like
+    # C compiler for dynamic linking, as such we invoke the C compiler's
+    # use_linker_args method instead.
diff --git a/meson/mesonbuild/compilers/swift.py b/meson/mesonbuild/compilers/swift.py
new file mode 100644
index 000000000..1942120c7
--- /dev/null
+++ b/meson/mesonbuild/compilers/swift.py
@@ -0,0 +1,124 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, os.path
+import typing as T
+
+from ..mesonlib import EnvironmentException, MachineChoice
+
+from .compilers import Compiler, swift_buildtype_args, clike_debug_args
+
+if T.TYPE_CHECKING:
+    from ..envconfig import MachineInfo
+
+swift_optimization_args = {'0': [],
+                           'g': [],
+                           '1': ['-O'],
+                           '2': ['-O'],
+                           '3': ['-O'],
+                           's': ['-O'],
+                           }
+
+class SwiftCompiler(Compiler):
+
+    LINKER_PREFIX = ['-Xlinker']
+    language = 'swift'
+
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo', **kwargs):
+        super().__init__(exelist, version, for_machine, info, **kwargs)
+        self.version = version
+        self.id = 'llvm'
+        self.is_cross = is_cross
+
+    def name_string(self):
+        return ' '.join(self.exelist)
+
+    def needs_static_linker(self):
+        return True
+
+    def get_werror_args(self):
+        return ['--fatal-warnings']
+
+    def get_dependency_gen_args(self, outtarget, outfile):
+        return ['-emit-dependencies']
+
+    def depfile_for_object(self, objfile):
+        return os.path.splitext(objfile)[0] + '.' + self.get_depfile_suffix()
+
+    def get_depfile_suffix(self):
+        return 'd'
+
+    def get_output_args(self, target):
+        return ['-o', target]
+
+    def get_header_import_args(self, headername):
+        return ['-import-objc-header', headername]
+
+    def get_warn_args(self, level):
+        return []
+
+    def get_buildtype_args(self, buildtype):
+        return swift_buildtype_args[buildtype]
+
+    def get_std_exe_link_args(self):
+        return ['-emit-executable']
+
+    def get_module_args(self, modname):
+        return ['-module-name', modname]
+
+    def get_mod_gen_args(self):
+        return ['-emit-module']
+
+    def get_include_args(self, dirname):
+        return ['-I' + dirname]
+
+    def get_compile_only_args(self):
+        return ['-c']
+
+    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
+        for idx, i in enumerate(parameter_list):
+            if i[:2] == '-I' or i[:2] == '-L':
+                parameter_list[idx] = i[:2] + os.path.normpath(os.path.join(build_dir, i[2:]))
+
+        return parameter_list
+
+    def sanity_check(self, work_dir, environment):
+        src = 'swifttest.swift'
+        source_name = os.path.join(work_dir, src)
+        output_name = os.path.join(work_dir, 'swifttest')
+        extra_flags = []
+        extra_flags += environment.coredata.get_external_args(self.for_machine, self.language)
+        if self.is_cross:
+            extra_flags += self.get_compile_only_args()
+        else:
+            extra_flags += environment.coredata.get_external_link_args(self.for_machine, self.language)
+        with open(source_name, 'w') as ofile:
+            ofile.write('''print("Swift compilation is working.")
+''')
+        pc = subprocess.Popen(self.exelist + extra_flags + ['-emit-executable', '-o', output_name, src], cwd=work_dir)
+        pc.wait()
+        if pc.returncode != 0:
+            raise EnvironmentException('Swift compiler %s can not compile programs.' % self.name_string())
+        if self.is_cross:
+            # Can't check if the binaries run so we have to assume they do
+            return
+        if subprocess.call(output_name) != 0:
+            raise EnvironmentException('Executables created by Swift compiler %s are not runnable.' % self.name_string())
+
+    def get_debug_args(self, is_debug):
+        return clike_debug_args[is_debug]
+
+    def get_optimization_args(self, optimization_level):
+        return swift_optimization_args[optimization_level]
diff --git a/meson/mesonbuild/compilers/vala.py b/meson/mesonbuild/compilers/vala.py
new file mode 100644
index 000000000..a5d49b6ac
--- /dev/null
+++ b/meson/mesonbuild/compilers/vala.py
@@ -0,0 +1,140 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import typing as T
+
+from .. import mlog
+from ..mesonlib import EnvironmentException, MachineChoice, version_compare
+
+from .compilers import Compiler
+
+if T.TYPE_CHECKING:
+    from ..envconfig import MachineInfo
+
+class ValaCompiler(Compiler):
+
+    language = 'vala'
+
+    def __init__(self, exelist, version, for_machine: MachineChoice,
+                 is_cross, info: 'MachineInfo'):
+        super().__init__(exelist, version, for_machine, info)
+        self.version = version
+        self.is_cross = is_cross
+        self.id = 'valac'
+        self.base_options = ['b_colorout']
+
+    def name_string(self):
+        return ' '.join(self.exelist)
+
+    def needs_static_linker(self):
+        return False # Because compiles into C.
+
+    def get_optimization_args(self, optimization_level):
+        return []
+
+    def get_debug_args(self, is_debug):
+        return ['--debug'] if is_debug else []
+
+    def get_output_args(self, target):
+        return [] # Because compiles into C.
+
+    def get_compile_only_args(self):
+        return [] # Because compiles into C.
+
+    def get_pic_args(self):
+        return []
+
+    def get_pie_args(self):
+        return []
+
+    def get_pie_link_args(self):
+        return []
+
+    def get_always_args(self):
+        return ['-C']
+
+    def get_warn_args(self, warning_level):
+        return []
+
+    def get_no_warn_args(self):
+        return ['--disable-warnings']
+
+    def get_werror_args(self):
+        return ['--fatal-warnings']
+
+    def get_colorout_args(self, colortype):
+        if version_compare(self.version, '>=0.37.1'):
+            return ['--color=' + colortype]
+        return []
+
+    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
+        for idx, i in enumerate(parameter_list):
+            if i[:9] == '--girdir=':
+                parameter_list[idx] = i[:9] + os.path.normpath(os.path.join(build_dir, i[9:]))
+            if i[:10] == '--vapidir=':
+                parameter_list[idx] = i[:10] + os.path.normpath(os.path.join(build_dir, i[10:]))
+            if i[:13] == '--includedir=':
+                parameter_list[idx] = i[:13] + os.path.normpath(os.path.join(build_dir, i[13:]))
+            if i[:14] == '--metadatadir=':
+                parameter_list[idx] = i[:14] + os.path.normpath(os.path.join(build_dir, i[14:]))
+
+        return parameter_list
+
+    def sanity_check(self, work_dir, environment):
+        code = 'class MesonSanityCheck : Object { }'
+        extra_flags = []
+        extra_flags += environment.coredata.get_external_args(self.for_machine, self.language)
+        if self.is_cross:
+            extra_flags += self.get_compile_only_args()
+        else:
+            extra_flags += environment.coredata.get_external_link_args(self.for_machine, self.language)
+        with self.cached_compile(code, environment.coredata, extra_args=extra_flags, mode='compile') as p:
+            if p.returncode != 0:
+                msg = 'Vala compiler {!r} can not compile programs' \
+                      ''.format(self.name_string())
+                raise EnvironmentException(msg)
+
+    def get_buildtype_args(self, buildtype):
+        if buildtype == 'debug' or buildtype == 'debugoptimized' or buildtype == 'minsize':
+            return ['--debug']
+        return []
+
+    def find_library(self, libname, env, extra_dirs, *args):
+        if extra_dirs and isinstance(extra_dirs, str):
+            extra_dirs = [extra_dirs]
+        # Valac always looks in the default vapi dir, so only search there if
+        # no extra dirs are specified.
+        if not extra_dirs:
+            code = 'class MesonFindLibrary : Object { }'
+            args = []
+            args += env.coredata.get_external_args(self.for_machine, self.language)
+            vapi_args = ['--pkg', libname]
+            args += vapi_args
+            with self.cached_compile(code, env.coredata, extra_args=args, mode='compile') as p:
+                if p.returncode == 0:
+                    return vapi_args
+        # Not found? Try to find the vapi file itself.
+        for d in extra_dirs:
+            vapi = os.path.join(d, libname + '.vapi')
+            if os.path.isfile(vapi):
+                return [vapi]
+        mlog.debug('Searched {!r} and {!r} wasn\'t found'.format(extra_dirs, libname))
+        return None
+
+    def thread_flags(self, env):
+        return []
+
+    def thread_link_flags(self, env):
+        return []
diff --git a/meson/mesonbuild/coredata.py b/meson/mesonbuild/coredata.py
new file mode 100644
index 000000000..d639188b1
--- /dev/null
+++ b/meson/mesonbuild/coredata.py
@@ -0,0 +1,1231 @@
+# Copyrighs 2012-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import mlog, mparser
+import pickle, os, uuid
+import sys
+from itertools import chain
+from pathlib import PurePath
+from collections import OrderedDict, defaultdict
+from .mesonlib import (
+    MesonException, MachineChoice, PerMachine, OrderedSet,
+    default_libdir, default_libexecdir, default_prefix, split_args
+)
+from .envconfig import get_env_var_pair
+from .wrap import WrapMode
+import ast
+import argparse
+import configparser
+import enum
+import shlex
+import typing as T
+
+if T.TYPE_CHECKING:
+    from . import dependencies
+    from .compilers import Compiler  # noqa: F401
+    from .environment import Environment
+
+    OptionDictType = T.Dict[str, 'UserOption[T.Any]']
+
+version = '0.55.3'
+backendlist = ['ninja', 'vs', 'vs2010', 'vs2015', 'vs2017', 'vs2019', 'xcode']
+
+default_yielding = False
+
+# Can't bind this near the class method it seems, sadly.
+_T = T.TypeVar('_T')
+
+class MesonVersionMismatchException(MesonException):
+    '''Build directory generated with Meson version incompatible with current version'''
+    def __init__(self, old_version, current_version):
+        super().__init__('Build directory has been generated with Meson version {}, '
+                         'which is incompatible with current version {}.'
+                         .format(old_version, current_version))
+        self.old_version = old_version
+        self.current_version = current_version
+
+
+class UserOption(T.Generic[_T]):
+    def __init__(self, description, choices, yielding):
+        super().__init__()
+        self.choices = choices
+        self.description = description
+        if yielding is None:
+            yielding = default_yielding
+        if not isinstance(yielding, bool):
+            raise MesonException('Value of "yielding" must be a boolean.')
+        self.yielding = yielding
+
+    def printable_value(self):
+        return self.value
+
+    # Check that the input is a valid value and return the
+    # "cleaned" or "native" version. For example the Boolean
+    # option could take the string "true" and return True.
+    def validate_value(self, value: T.Any) -> _T:
+        raise RuntimeError('Derived option class did not override validate_value.')
+
+    def set_value(self, newvalue):
+        self.value = self.validate_value(newvalue)
+
+class UserStringOption(UserOption[str]):
+    def __init__(self, description, value, choices=None, yielding=None):
+        super().__init__(description, choices, yielding)
+        self.set_value(value)
+
+    def validate_value(self, value):
+        if not isinstance(value, str):
+            raise MesonException('Value "%s" for string option is not a string.' % str(value))
+        return value
+
+class UserBooleanOption(UserOption[bool]):
+    def __init__(self, description, value, yielding=None):
+        super().__init__(description, [True, False], yielding)
+        self.set_value(value)
+
+    def __bool__(self) -> bool:
+        return self.value
+
+    def validate_value(self, value) -> bool:
+        if isinstance(value, bool):
+            return value
+        if value.lower() == 'true':
+            return True
+        if value.lower() == 'false':
+            return False
+        raise MesonException('Value %s is not boolean (true or false).' % value)
+
+class UserIntegerOption(UserOption[int]):
+    def __init__(self, description, value, yielding=None):
+        min_value, max_value, default_value = value
+        self.min_value = min_value
+        self.max_value = max_value
+        c = []
+        if min_value is not None:
+            c.append('>=' + str(min_value))
+        if max_value is not None:
+            c.append('<=' + str(max_value))
+        choices = ', '.join(c)
+        super().__init__(description, choices, yielding)
+        self.set_value(default_value)
+
+    def validate_value(self, value) -> int:
+        if isinstance(value, str):
+            value = self.toint(value)
+        if not isinstance(value, int):
+            raise MesonException('New value for integer option is not an integer.')
+        if self.min_value is not None and value < self.min_value:
+            raise MesonException('New value %d is less than minimum value %d.' % (value, self.min_value))
+        if self.max_value is not None and value > self.max_value:
+            raise MesonException('New value %d is more than maximum value %d.' % (value, self.max_value))
+        return value
+
+    def toint(self, valuestring) -> int:
+        try:
+            return int(valuestring)
+        except ValueError:
+            raise MesonException('Value string "%s" is not convertible to an integer.' % valuestring)
+
+class UserUmaskOption(UserIntegerOption, UserOption[T.Union[str, int]]):
+    def __init__(self, description, value, yielding=None):
+        super().__init__(description, (0, 0o777, value), yielding)
+        self.choices = ['preserve', '0000-0777']
+
+    def printable_value(self):
+        if self.value == 'preserve':
+            return self.value
+        return format(self.value, '04o')
+
+    def validate_value(self, value):
+        if value is None or value == 'preserve':
+            return 'preserve'
+        return super().validate_value(value)
+
+    def toint(self, valuestring):
+        try:
+            return int(valuestring, 8)
+        except ValueError as e:
+            raise MesonException('Invalid mode: {}'.format(e))
+
+class UserComboOption(UserOption[str]):
+    def __init__(self, description, choices: T.List[str], value, yielding=None):
+        super().__init__(description, choices, yielding)
+        if not isinstance(self.choices, list):
+            raise MesonException('Combo choices must be an array.')
+        for i in self.choices:
+            if not isinstance(i, str):
+                raise MesonException('Combo choice elements must be strings.')
+        self.set_value(value)
+
+    def validate_value(self, value):
+        if value not in self.choices:
+            optionsstring = ', '.join(['"%s"' % (item,) for item in self.choices])
+            raise MesonException('Value "{}" for combo option "{}" is not one of the choices.'
+                                 ' Possible choices are: {}.'.format(
+                                     value, self.description, optionsstring))
+        return value
+
+class UserArrayOption(UserOption[T.List[str]]):
+    def __init__(self, description, value, split_args=False, user_input=False, allow_dups=False, **kwargs):
+        super().__init__(description, kwargs.get('choices', []), yielding=kwargs.get('yielding', None))
+        self.split_args = split_args
+        self.allow_dups = allow_dups
+        self.value = self.validate_value(value, user_input=user_input)
+
+    def validate_value(self, value, user_input: bool = True) -> T.List[str]:
+        # User input is for options defined on the command line (via -D
+        # options). Users can put their input in as a comma separated
+        # string, but for defining options in meson_options.txt the format
+        # should match that of a combo
+        if not user_input and isinstance(value, str) and not value.startswith('['):
+            raise MesonException('Value does not define an array: ' + value)
+
+        if isinstance(value, str):
+            if value.startswith('['):
+                try:
+                    newvalue = ast.literal_eval(value)
+                except ValueError:
+                    raise MesonException('malformed option {}'.format(value))
+            elif value == '':
+                newvalue = []
+            else:
+                if self.split_args:
+                    newvalue = split_args(value)
+                else:
+                    newvalue = [v.strip() for v in value.split(',')]
+        elif isinstance(value, list):
+            newvalue = value
+        else:
+            raise MesonException('"{}" should be a string array, but it is not'.format(newvalue))
+
+        if not self.allow_dups and len(set(newvalue)) != len(newvalue):
+            msg = 'Duplicated values in array option is deprecated. ' \
+                  'This will become a hard error in the future.'
+            mlog.deprecation(msg)
+        for i in newvalue:
+            if not isinstance(i, str):
+                raise MesonException('String array element "{0}" is not a string.'.format(str(newvalue)))
+        if self.choices:
+            bad = [x for x in newvalue if x not in self.choices]
+            if bad:
+                raise MesonException('Options "{}" are not in allowed choices: "{}"'.format(
+                    ', '.join(bad), ', '.join(self.choices)))
+        return newvalue
+
+
+class UserFeatureOption(UserComboOption):
+    static_choices = ['enabled', 'disabled', 'auto']
+
+    def __init__(self, description, value, yielding=None):
+        super().__init__(description, self.static_choices, value, yielding)
+
+    def is_enabled(self):
+        return self.value == 'enabled'
+
+    def is_disabled(self):
+        return self.value == 'disabled'
+
+    def is_auto(self):
+        return self.value == 'auto'
+
+if T.TYPE_CHECKING:
+    CacheKeyType = T.Tuple[T.Tuple[T.Any, ...], ...]
+    SubCacheKeyType = T.Tuple[T.Any, ...]
+
+
+class DependencyCacheType(enum.Enum):
+
+    OTHER = 0
+    PKG_CONFIG = 1
+    CMAKE = 2
+
+    @classmethod
+    def from_type(cls, dep: 'dependencies.Dependency') -> 'DependencyCacheType':
+        from . import dependencies
+        # As more types gain search overrides they'll need to be added here
+        if isinstance(dep, dependencies.PkgConfigDependency):
+            return cls.PKG_CONFIG
+        if isinstance(dep, dependencies.CMakeDependency):
+            return cls.CMAKE
+        return cls.OTHER
+
+
+class DependencySubCache:
+
+    def __init__(self, type_: DependencyCacheType):
+        self.types = [type_]
+        self.__cache = {}  # type: T.Dict[SubCacheKeyType, dependencies.Dependency]
+
+    def __getitem__(self, key: 'SubCacheKeyType') -> 'dependencies.Dependency':
+        return self.__cache[key]
+
+    def __setitem__(self, key: 'SubCacheKeyType', value: 'dependencies.Dependency') -> None:
+        self.__cache[key] = value
+
+    def __contains__(self, key: 'SubCacheKeyType') -> bool:
+        return key in self.__cache
+
+    def values(self) -> T.Iterable['dependencies.Dependency']:
+        return self.__cache.values()
+
+
+class DependencyCache:
+
+    """Class that stores a cache of dependencies.
+
+    This class is meant to encapsulate the fact that we need multiple keys to
+    successfully lookup by providing a simple get/put interface.
+    """
+
+    def __init__(self, builtins_per_machine: PerMachine[T.Dict[str, UserOption[T.Any]]], for_machine: MachineChoice):
+        self.__cache = OrderedDict()  # type: T.MutableMapping[CacheKeyType, DependencySubCache]
+        self.__builtins_per_machine = builtins_per_machine
+        self.__for_machine = for_machine
+
+    def __calculate_subkey(self, type_: DependencyCacheType) -> T.Tuple[T.Any, ...]:
+        if type_ is DependencyCacheType.PKG_CONFIG:
+            return tuple(self.__builtins_per_machine[self.__for_machine]['pkg_config_path'].value)
+        elif type_ is DependencyCacheType.CMAKE:
+            return tuple(self.__builtins_per_machine[self.__for_machine]['cmake_prefix_path'].value)
+        assert type_ is DependencyCacheType.OTHER, 'Someone forgot to update subkey calculations for a new type'
+        return tuple()
+
+    def __iter__(self) -> T.Iterator['CacheKeyType']:
+        return self.keys()
+
+    def put(self, key: 'CacheKeyType', dep: 'dependencies.Dependency') -> None:
+        t = DependencyCacheType.from_type(dep)
+        if key not in self.__cache:
+            self.__cache[key] = DependencySubCache(t)
+        subkey = self.__calculate_subkey(t)
+        self.__cache[key][subkey] = dep
+
+    def get(self, key: 'CacheKeyType') -> T.Optional['dependencies.Dependency']:
+        """Get a value from the cache.
+
+        If there is no cache entry then None will be returned.
+        """
+        try:
+            val = self.__cache[key]
+        except KeyError:
+            return None
+
+        for t in val.types:
+            subkey = self.__calculate_subkey(t)
+            try:
+                return val[subkey]
+            except KeyError:
+                pass
+        return None
+
+    def values(self) -> T.Iterator['dependencies.Dependency']:
+        for c in self.__cache.values():
+            yield from c.values()
+
+    def keys(self) -> T.Iterator['CacheKeyType']:
+        return iter(self.__cache.keys())
+
+    def items(self) -> T.Iterator[T.Tuple['CacheKeyType', T.List['dependencies.Dependency']]]:
+        for k, v in self.__cache.items():
+            vs = []
+            for t in v.types:
+                subkey = self.__calculate_subkey(t)
+                if subkey in v:
+                    vs.append(v[subkey])
+            yield k, vs
+
+    def clear(self) -> None:
+        self.__cache.clear()
+
+# Can't bind this near the class method it seems, sadly.
+_V = T.TypeVar('_V')
+
+# This class contains all data that must persist over multiple
+# invocations of Meson. It is roughly the same thing as
+# cmakecache.
+
+class CoreData:
+
+    def __init__(self, options: argparse.Namespace, scratch_dir: str):
+        self.lang_guids = {
+            'default': '8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942',
+            'c': '8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942',
+            'cpp': '8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942',
+            'test': '3AC096D0-A1C2-E12C-1390-A8335801FDAB',
+            'directory': '2150E333-8FDC-42A3-9474-1A3956D46DE8',
+        }
+        self.test_guid = str(uuid.uuid4()).upper()
+        self.regen_guid = str(uuid.uuid4()).upper()
+        self.install_guid = str(uuid.uuid4()).upper()
+        self.target_guids = {}
+        self.version = version
+        self.builtins = {} # : OptionDictType
+        self.builtins_per_machine = PerMachine({}, {})
+        self.backend_options = {} # : OptionDictType
+        self.user_options = {} # : OptionDictType
+        self.compiler_options = PerMachine(
+            defaultdict(dict),
+            defaultdict(dict),
+        ) # : PerMachine[T.defaultdict[str, OptionDictType]]
+        self.base_options = {} # : OptionDictType
+        self.cross_files = self.__load_config_files(options, scratch_dir, 'cross')
+        self.compilers = PerMachine(OrderedDict(), OrderedDict())
+
+        build_cache = DependencyCache(self.builtins_per_machine, MachineChoice.BUILD)
+        host_cache = DependencyCache(self.builtins_per_machine, MachineChoice.BUILD)
+        self.deps = PerMachine(build_cache, host_cache)  # type: PerMachine[DependencyCache]
+        self.compiler_check_cache = OrderedDict()
+        # Only to print a warning if it changes between Meson invocations.
+        self.config_files = self.__load_config_files(options, scratch_dir, 'native')
+        self.builtin_options_libdir_cross_fixup()
+        self.init_builtins('')
+
+    @staticmethod
+    def __load_config_files(options: argparse.Namespace, scratch_dir: str, ftype: str) -> T.List[str]:
+        # Need to try and make the passed filenames absolute because when the
+        # files are parsed later we'll have chdir()d.
+        if ftype == 'cross':
+            filenames = options.cross_file
+        else:
+            filenames = options.native_file
+
+        if not filenames:
+            return []
+
+        found_invalid = []  # type: T.List[str]
+        missing = []        # type: T.List[str]
+        real = []           # type: T.List[str]
+        for i, f in enumerate(filenames):
+            f = os.path.expanduser(os.path.expandvars(f))
+            if os.path.exists(f):
+                if os.path.isfile(f):
+                    real.append(os.path.abspath(f))
+                    continue
+                elif os.path.isdir(f):
+                    found_invalid.append(os.path.abspath(f))
+                else:
+                    # in this case we've been passed some kind of pipe, copy
+                    # the contents of that file into the meson private (scratch)
+                    # directory so that it can be re-read when wiping/reconfiguring
+                    copy = os.path.join(scratch_dir, '{}.{}.ini'.format(uuid.uuid4(), ftype))
+                    with open(f, 'r') as rf:
+                        with open(copy, 'w') as wf:
+                            wf.write(rf.read())
+                    real.append(copy)
+
+                    # Also replace the command line argument, as the pipe
+                    # probably won't exist on reconfigure
+                    filenames[i] = copy
+                    continue
+            if sys.platform != 'win32':
+                paths = [
+                    os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')),
+                ] + os.environ.get('XDG_DATA_DIRS', '/usr/local/share:/usr/share').split(':')
+                for path in paths:
+                    path_to_try = os.path.join(path, 'meson', ftype, f)
+                    if os.path.isfile(path_to_try):
+                        real.append(path_to_try)
+                        break
+                else:
+                    missing.append(f)
+            else:
+                missing.append(f)
+
+        if missing:
+            if found_invalid:
+                mlog.log('Found invalid candidates for', ftype, 'file:', *found_invalid)
+            mlog.log('Could not find any valid candidate for', ftype, 'files:', *missing)
+            raise MesonException('Cannot find specified {} file: {}'.format(ftype, f))
+        return real
+
+    def builtin_options_libdir_cross_fixup(self):
+        # By default set libdir to "lib" when cross compiling since
+        # getting the "system default" is always wrong on multiarch
+        # platforms as it gets a value like lib/x86_64-linux-gnu.
+        if self.cross_files:
+            builtin_options['libdir'].default = 'lib'
+
+    def sanitize_prefix(self, prefix):
+        prefix = os.path.expanduser(prefix)
+        if not os.path.isabs(prefix):
+            raise MesonException('prefix value {!r} must be an absolute path'
+                                 ''.format(prefix))
+        if prefix.endswith('/') or prefix.endswith('\\'):
+            # On Windows we need to preserve the trailing slash if the
+            # string is of type 'C:\' because 'C:' is not an absolute path.
+            if len(prefix) == 3 and prefix[1] == ':':
+                pass
+            # If prefix is a single character, preserve it since it is
+            # the root directory.
+            elif len(prefix) == 1:
+                pass
+            else:
+                prefix = prefix[:-1]
+        return prefix
+
+    def sanitize_dir_option_value(self, prefix: str, option: str, value: T.Any) -> T.Any:
+        '''
+        If the option is an installation directory option and the value is an
+        absolute path, check that it resides within prefix and return the value
+        as a path relative to the prefix.
+
+        This way everyone can do f.ex, get_option('libdir') and be sure to get
+        the library directory relative to prefix.
+
+        .as_posix() keeps the posix-like file seperators Meson uses.
+        '''
+        try:
+            value = PurePath(value)
+        except TypeError:
+            return value
+        if option.endswith('dir') and value.is_absolute() and \
+           option not in builtin_dir_noprefix_options:
+            # Value must be a subdir of the prefix
+            # commonpath will always return a path in the native format, so we
+            # must use pathlib.PurePath to do the same conversion before
+            # comparing.
+            msg = ('The value of the {!r} option is \'{!s}\' which must be a '
+                   'subdir of the prefix {!r}.\nNote that if you pass a '
+                   'relative path, it is assumed to be a subdir of prefix.')
+            # os.path.commonpath doesn't understand case-insensitive filesystems,
+            # but PurePath().relative_to() does.
+            try:
+                value = value.relative_to(prefix)
+            except ValueError:
+                raise MesonException(msg.format(option, value, prefix))
+            if '..' in str(value):
+                raise MesonException(msg.format(option, value, prefix))
+        return value.as_posix()
+
+    def init_builtins(self, subproject: str):
+        # Create builtin options with default values
+        for key, opt in builtin_options.items():
+            self.add_builtin_option(self.builtins, key, opt, subproject)
+        for for_machine in iter(MachineChoice):
+            for key, opt in builtin_options_per_machine.items():
+                self.add_builtin_option(self.builtins_per_machine[for_machine], key, opt, subproject)
+
+    def add_builtin_option(self, opts_map, key, opt, subproject):
+        if subproject:
+            if opt.yielding:
+                # This option is global and not per-subproject
+                return
+            optname = subproject + ':' + key
+            value = opts_map[key].value
+        else:
+            optname = key
+            value = None
+        opts_map[optname] = opt.init_option(key, value, default_prefix())
+
+    def init_backend_options(self, backend_name):
+        if backend_name == 'ninja':
+            self.backend_options['backend_max_links'] = \
+                UserIntegerOption(
+                    'Maximum number of linker processes to run or 0 for no '
+                    'limit',
+                    (0, None, 0))
+        elif backend_name.startswith('vs'):
+            self.backend_options['backend_startup_project'] = \
+                UserStringOption(
+                    'Default project to execute in Visual Studio',
+                    '')
+
+    def get_builtin_option(self, optname, subproject=''):
+        raw_optname = optname
+        if subproject:
+            optname = subproject + ':' + optname
+        for opts in self._get_all_builtin_options():
+            v = opts.get(optname)
+            if v is None or v.yielding:
+                v = opts.get(raw_optname)
+            if v is None:
+                continue
+            if raw_optname == 'wrap_mode':
+                return WrapMode.from_string(v.value)
+            return v.value
+        raise RuntimeError('Tried to get unknown builtin option %s.' % raw_optname)
+
+    def _try_set_builtin_option(self, optname, value):
+        for opts in self._get_all_builtin_options():
+            opt = opts.get(optname)
+            if opt is None:
+                continue
+            if optname == 'prefix':
+                value = self.sanitize_prefix(value)
+            else:
+                prefix = self.builtins['prefix'].value
+                value = self.sanitize_dir_option_value(prefix, optname, value)
+            break
+        else:
+            return False
+        opt.set_value(value)
+        # Make sure that buildtype matches other settings.
+        if optname == 'buildtype':
+            self.set_others_from_buildtype(value)
+        else:
+            self.set_buildtype_from_others()
+        return True
+
+    def set_builtin_option(self, optname, value):
+        res = self._try_set_builtin_option(optname, value)
+        if not res:
+            raise RuntimeError('Tried to set unknown builtin option %s.' % optname)
+
+    def set_others_from_buildtype(self, value):
+        if value == 'plain':
+            opt = '0'
+            debug = False
+        elif value == 'debug':
+            opt = '0'
+            debug = True
+        elif value == 'debugoptimized':
+            opt = '2'
+            debug = True
+        elif value == 'release':
+            opt = '3'
+            debug = False
+        elif value == 'minsize':
+            opt = 's'
+            debug = True
+        else:
+            assert(value == 'custom')
+            return
+        self.builtins['optimization'].set_value(opt)
+        self.builtins['debug'].set_value(debug)
+
+    def set_buildtype_from_others(self):
+        opt = self.builtins['optimization'].value
+        debug = self.builtins['debug'].value
+        if opt == '0' and not debug:
+            mode = 'plain'
+        elif opt == '0' and debug:
+            mode = 'debug'
+        elif opt == '2' and debug:
+            mode = 'debugoptimized'
+        elif opt == '3' and not debug:
+            mode = 'release'
+        elif opt == 's' and debug:
+            mode = 'minsize'
+        else:
+            mode = 'custom'
+        self.builtins['buildtype'].set_value(mode)
+
+    @classmethod
+    def get_prefixed_options_per_machine(
+        cls,
+        options_per_machine # : PerMachine[T.Dict[str, _V]]]
+    ) -> T.Iterable[T.Tuple[str, _V]]:
+        return cls._flatten_pair_iterator(
+            (for_machine.get_prefix(), options_per_machine[for_machine])
+            for for_machine in iter(MachineChoice)
+        )
+
+    @classmethod
+    def flatten_lang_iterator(
+        cls,
+        outer # : T.Iterable[T.Tuple[str, T.Dict[str, _V]]]
+    ) -> T.Iterable[T.Tuple[str, _V]]:
+        return cls._flatten_pair_iterator((lang + '_', opts) for lang, opts in outer)
+
+    @staticmethod
+    def _flatten_pair_iterator(
+        outer # : T.Iterable[T.Tuple[str, T.Dict[str, _V]]]
+    ) -> T.Iterable[T.Tuple[str, _V]]:
+        for k0, v0 in outer:
+            for k1, v1 in v0.items():
+                yield (k0 + k1, v1)
+
+    def _get_all_nonbuiltin_options(self) -> T.Iterable[T.Dict[str, UserOption]]:
+        yield self.backend_options
+        yield self.user_options
+        yield dict(self.flatten_lang_iterator(self.get_prefixed_options_per_machine(self.compiler_options)))
+        yield self.base_options
+
+    def _get_all_builtin_options(self) -> T.Iterable[T.Dict[str, UserOption]]:
+        yield dict(self.get_prefixed_options_per_machine(self.builtins_per_machine))
+        yield self.builtins
+
+    def get_all_options(self) -> T.Iterable[T.Dict[str, UserOption]]:
+        yield from self._get_all_nonbuiltin_options()
+        yield from self._get_all_builtin_options()
+
+    def validate_option_value(self, option_name, override_value):
+        for opts in self.get_all_options():
+            opt = opts.get(option_name)
+            if opt is not None:
+                try:
+                    return opt.validate_value(override_value)
+                except MesonException as e:
+                    raise type(e)(('Validation failed for option %s: ' % option_name) + str(e)) \
+                        .with_traceback(sys.exc_info()[2])
+        raise MesonException('Tried to validate unknown option %s.' % option_name)
+
+    def get_external_args(self, for_machine: MachineChoice, lang):
+        return self.compiler_options[for_machine][lang]['args'].value
+
+    def get_external_link_args(self, for_machine: MachineChoice, lang):
+        return self.compiler_options[for_machine][lang]['link_args'].value
+
+    def merge_user_options(self, options):
+        for (name, value) in options.items():
+            if name not in self.user_options:
+                self.user_options[name] = value
+            else:
+                oldval = self.user_options[name]
+                if type(oldval) != type(value):
+                    self.user_options[name] = value
+
+    def is_cross_build(self, when_building_for: MachineChoice = MachineChoice.HOST) -> bool:
+        if when_building_for == MachineChoice.BUILD:
+            return False
+        return len(self.cross_files) > 0
+
+    def strip_build_option_names(self, options):
+        res = OrderedDict()
+        for k, v in options.items():
+            if k.startswith('build.'):
+                k = k.split('.', 1)[1]
+            res[k] = v
+        return res
+
+    def copy_build_options_from_regular_ones(self):
+        assert(not self.is_cross_build())
+        for k, o in self.builtins_per_machine.host.items():
+            self.builtins_per_machine.build[k].set_value(o.value)
+        for lang, host_opts in self.compiler_options.host.items():
+            build_opts = self.compiler_options.build[lang]
+            for k, o in host_opts.items():
+                if k in build_opts:
+                    build_opts[k].set_value(o.value)
+
+    def set_options(self, options, *, subproject='', warn_unknown=True):
+        if not self.is_cross_build():
+            options = self.strip_build_option_names(options)
+        # Set prefix first because it's needed to sanitize other options
+        if 'prefix' in options:
+            prefix = self.sanitize_prefix(options['prefix'])
+            self.builtins['prefix'].set_value(prefix)
+            for key in builtin_dir_noprefix_options:
+                if key not in options:
+                    self.builtins[key].set_value(builtin_options[key].prefixed_default(key, prefix))
+
+        unknown_options = []
+        for k, v in options.items():
+            if k == 'prefix':
+                continue
+            if self._try_set_builtin_option(k, v):
+                continue
+            for opts in self._get_all_nonbuiltin_options():
+                tgt = opts.get(k)
+                if tgt is None:
+                    continue
+                tgt.set_value(v)
+                break
+            else:
+                unknown_options.append(k)
+        if unknown_options and warn_unknown:
+            unknown_options = ', '.join(sorted(unknown_options))
+            sub = 'In subproject {}: '.format(subproject) if subproject else ''
+            mlog.warning('{}Unknown options: "{}"'.format(sub, unknown_options))
+            mlog.log('The value of new options can be set with:')
+            mlog.log(mlog.bold('meson setup  --reconfigure -Dnew_option=new_value ...'))
+        if not self.is_cross_build():
+            self.copy_build_options_from_regular_ones()
+
+    def set_default_options(self, default_options, subproject, env):
+        # Warn if the user is using two different ways of setting build-type
+        # options that override each other
+        if 'buildtype' in env.cmd_line_options and \
+           ('optimization' in env.cmd_line_options or 'debug' in env.cmd_line_options):
+            mlog.warning('Recommend using either -Dbuildtype or -Doptimization + -Ddebug. '
+                         'Using both is redundant since they override each other. '
+                         'See: https://mesonbuild.com/Builtin-options.html#build-type-options')
+        cmd_line_options = OrderedDict()
+        # Set project default_options as if they were passed to the cmdline.
+        # Subprojects can only define default for user options and not yielding
+        # builtin option.
+        from . import optinterpreter
+        for k, v in default_options.items():
+            if subproject:
+                if (k not in builtin_options or builtin_options[k].yielding) \
+                        and optinterpreter.is_invalid_name(k, log=False):
+                    continue
+                k = subproject + ':' + k
+            cmd_line_options[k] = v
+
+        # Override project default_options using conf files (cross or native)
+        for k, v in env.paths.host:
+            if v is not None:
+                cmd_line_options[k] = v
+
+        # Override all the above defaults using the command-line arguments
+        # actually passed to us
+        cmd_line_options.update(env.cmd_line_options)
+        env.cmd_line_options = cmd_line_options
+
+        # Create a subset of cmd_line_options, keeping only options for this
+        # subproject. Also take builtin options if it's the main project.
+        # Language and backend specific options will be set later when adding
+        # languages and setting the backend (builtin options must be set first
+        # to know which backend we'll use).
+        options = OrderedDict()
+
+        # Some options default to environment variables if they are
+        # unset, set those now. These will either be overwritten
+        # below, or they won't. These should only be set on the first run.
+        for for_machine in MachineChoice:
+            p_env_pair = get_env_var_pair(for_machine, self.is_cross_build(), 'PKG_CONFIG_PATH')
+            if p_env_pair is not None:
+                p_env_var, p_env = p_env_pair
+
+                # PKG_CONFIG_PATH may contain duplicates, which must be
+                # removed, else a duplicates-in-array-option warning arises.
+                p_list = list(OrderedSet(p_env.split(':')))
+
+                key = 'pkg_config_path'
+                if for_machine == MachineChoice.BUILD:
+                    key = 'build.' + key
+
+                if env.first_invocation:
+                    options[key] = p_list
+                elif options.get(key, []) != p_list:
+                    mlog.warning(
+                        p_env_var +
+                        ' environment variable has changed '
+                        'between configurations, meson ignores this. '
+                        'Use -Dpkg_config_path to change pkg-config search '
+                        'path instead.'
+                    )
+
+        def remove_prefix(text, prefix):
+            if text.startswith(prefix):
+                return text[len(prefix):]
+            return text
+
+        for k, v in env.cmd_line_options.items():
+            if subproject:
+                if not k.startswith(subproject + ':'):
+                    continue
+            elif k not in builtin_options.keys() \
+                    and remove_prefix(k, 'build.') not in builtin_options_per_machine.keys():
+                if ':' in k:
+                    continue
+                if optinterpreter.is_invalid_name(k, log=False):
+                    continue
+            options[k] = v
+
+        self.set_options(options, subproject=subproject)
+
+    def add_lang_args(self, lang: str, comp: T.Type['Compiler'],
+                      for_machine: MachineChoice, env: 'Environment') -> None:
+        """Add global language arguments that are needed before compiler/linker detection."""
+        from .compilers import compilers
+
+        for k, o in compilers.get_global_options(
+                lang,
+                comp,
+                for_machine,
+                env.is_cross_build(),
+                env.properties[for_machine]).items():
+            # prefixed compiler options affect just this machine
+            opt_prefix = for_machine.get_prefix()
+            user_k = opt_prefix + lang + '_' + k
+            if user_k in env.cmd_line_options:
+                o.set_value(env.cmd_line_options[user_k])
+            self.compiler_options[for_machine][lang].setdefault(k, o)
+
+    def process_new_compiler(self, lang: str, comp: T.Type['Compiler'], env: 'Environment') -> None:
+        from . import compilers
+
+        self.compilers[comp.for_machine][lang] = comp
+        enabled_opts = []
+
+        for k, o in comp.get_options().items():
+            # prefixed compiler options affect just this machine
+            opt_prefix = comp.for_machine.get_prefix()
+            user_k = opt_prefix + lang + '_' + k
+            if user_k in env.cmd_line_options:
+                o.set_value(env.cmd_line_options[user_k])
+            self.compiler_options[comp.for_machine][lang].setdefault(k, o)
+
+        enabled_opts = []
+        for optname in comp.base_options:
+            if optname in self.base_options:
+                continue
+            oobj = compilers.base_options[optname]
+            if optname in env.cmd_line_options:
+                oobj.set_value(env.cmd_line_options[optname])
+                enabled_opts.append(optname)
+            self.base_options[optname] = oobj
+        self.emit_base_options_warnings(enabled_opts)
+
+    def emit_base_options_warnings(self, enabled_opts: list):
+        if 'b_bitcode' in enabled_opts:
+            mlog.warning('Base option \'b_bitcode\' is enabled, which is incompatible with many linker options. Incompatible options such as \'b_asneeded\' have been disabled.', fatal=False)
+            mlog.warning('Please see https://mesonbuild.com/Builtin-options.html#Notes_about_Apple_Bitcode_support for more details.', fatal=False)
+
+class CmdLineFileParser(configparser.ConfigParser):
+    def __init__(self):
+        # We don't want ':' as key delimiter, otherwise it would break when
+        # storing subproject options like "subproject:option=value"
+        super().__init__(delimiters=['='], interpolation=None)
+
+class MachineFileParser():
+    def __init__(self, filenames: T.List[str]):
+        self.parser = CmdLineFileParser()
+        self.constants = {'True': True, 'False': False}
+        self.sections = {}
+
+        self.parser.read(filenames)
+
+        # Parse [constants] first so they can be used in other sections
+        if self.parser.has_section('constants'):
+            self.constants.update(self._parse_section('constants'))
+
+        for s in self.parser.sections():
+            if s == 'constants':
+                continue
+            self.sections[s] = self._parse_section(s)
+
+    def _parse_section(self, s):
+        self.scope = self.constants.copy()
+        section = {}
+        for entry, value in self.parser.items(s):
+            if ' ' in entry or '\t' in entry or "'" in entry or '"' in entry:
+                raise EnvironmentException('Malformed variable name {!r} in machine file.'.format(entry))
+            # Windows paths...
+            value = value.replace('\\', '\\\\')
+            try:
+                ast = mparser.Parser(value, 'machinefile').parse()
+                res = self._evaluate_statement(ast.lines[0])
+            except MesonException:
+                raise EnvironmentException('Malformed value in machine file variable {!r}.'.format(entry))
+            except KeyError as e:
+                raise EnvironmentException('Undefined constant {!r} in machine file variable {!r}.'.format(e.args[0], entry))
+            section[entry] = res
+            self.scope[entry] = res
+        return section
+
+    def _evaluate_statement(self, node):
+        if isinstance(node, (mparser.StringNode)):
+            return node.value
+        elif isinstance(node, mparser.BooleanNode):
+            return node.value
+        elif isinstance(node, mparser.NumberNode):
+            return node.value
+        elif isinstance(node, mparser.ArrayNode):
+            return [self._evaluate_statement(arg) for arg in node.args.arguments]
+        elif isinstance(node, mparser.IdNode):
+            return self.scope[node.value]
+        elif isinstance(node, mparser.ArithmeticNode):
+            l = self._evaluate_statement(node.left)
+            r = self._evaluate_statement(node.right)
+            if node.operation == 'add':
+                if (isinstance(l, str) and isinstance(r, str)) or \
+                   (isinstance(l, list) and isinstance(r, list)):
+                    return l + r
+            elif node.operation == 'div':
+                if isinstance(l, str) and isinstance(r, str):
+                    return os.path.join(l, r)
+        raise EnvironmentException('Unsupported node type')
+
+def parse_machine_files(filenames):
+    parser = MachineFileParser(filenames)
+    return parser.sections
+
+def get_cmd_line_file(build_dir):
+    return os.path.join(build_dir, 'meson-private', 'cmd_line.txt')
+
+def read_cmd_line_file(build_dir, options):
+    filename = get_cmd_line_file(build_dir)
+    if not os.path.isfile(filename):
+        return
+
+    config = CmdLineFileParser()
+    config.read(filename)
+
+    # Do a copy because config is not really a dict. options.cmd_line_options
+    # overrides values from the file.
+    d = dict(config['options'])
+    d.update(options.cmd_line_options)
+    options.cmd_line_options = d
+
+    properties = config['properties']
+    if not options.cross_file:
+        options.cross_file = ast.literal_eval(properties.get('cross_file', '[]'))
+    if not options.native_file:
+        # This will be a string in the form: "['first', 'second', ...]", use
+        # literal_eval to get it into the list of strings.
+        options.native_file = ast.literal_eval(properties.get('native_file', '[]'))
+
+def cmd_line_options_to_string(options):
+    return {k: str(v) for k, v in options.cmd_line_options.items()}
+
+def write_cmd_line_file(build_dir, options):
+    filename = get_cmd_line_file(build_dir)
+    config = CmdLineFileParser()
+
+    properties = OrderedDict()
+    if options.cross_file:
+        properties['cross_file'] = options.cross_file
+    if options.native_file:
+        properties['native_file'] = options.native_file
+
+    config['options'] = cmd_line_options_to_string(options)
+    config['properties'] = properties
+    with open(filename, 'w') as f:
+        config.write(f)
+
+def update_cmd_line_file(build_dir, options):
+    filename = get_cmd_line_file(build_dir)
+    config = CmdLineFileParser()
+    config.read(filename)
+    config['options'].update(cmd_line_options_to_string(options))
+    with open(filename, 'w') as f:
+        config.write(f)
+
+def get_cmd_line_options(build_dir, options):
+    copy = argparse.Namespace(**vars(options))
+    read_cmd_line_file(build_dir, copy)
+    cmdline = ['-D{}={}'.format(k, v) for k, v in copy.cmd_line_options.items()]
+    if options.cross_file:
+        cmdline += ['--cross-file {}'.format(f) for f in options.cross_file]
+    if options.native_file:
+        cmdline += ['--native-file {}'.format(f) for f in options.native_file]
+    return ' '.join([shlex.quote(x) for x in cmdline])
+
+def major_versions_differ(v1, v2):
+    return v1.split('.')[0:2] != v2.split('.')[0:2]
+
+def load(build_dir):
+    filename = os.path.join(build_dir, 'meson-private', 'coredata.dat')
+    load_fail_msg = 'Coredata file {!r} is corrupted. Try with a fresh build tree.'.format(filename)
+    try:
+        with open(filename, 'rb') as f:
+            obj = pickle.load(f)
+    except (pickle.UnpicklingError, EOFError):
+        raise MesonException(load_fail_msg)
+    except AttributeError:
+        raise MesonException(
+            "Coredata file {!r} references functions or classes that don't "
+            "exist. This probably means that it was generated with an old "
+            "version of meson.".format(filename))
+    if not isinstance(obj, CoreData):
+        raise MesonException(load_fail_msg)
+    if major_versions_differ(obj.version, version):
+        raise MesonVersionMismatchException(obj.version, version)
+    return obj
+
+def save(obj, build_dir):
+    filename = os.path.join(build_dir, 'meson-private', 'coredata.dat')
+    prev_filename = filename + '.prev'
+    tempfilename = filename + '~'
+    if major_versions_differ(obj.version, version):
+        raise MesonException('Fatal version mismatch corruption.')
+    if os.path.exists(filename):
+        import shutil
+        shutil.copyfile(filename, prev_filename)
+    with open(tempfilename, 'wb') as f:
+        pickle.dump(obj, f)
+        f.flush()
+        os.fsync(f.fileno())
+    os.replace(tempfilename, filename)
+    return filename
+
+
+def register_builtin_arguments(parser):
+    for n, b in builtin_options.items():
+        b.add_to_argparse(n, parser, '', '')
+    for n, b in builtin_options_per_machine.items():
+        b.add_to_argparse(n, parser, '', ' (just for host machine)')
+        b.add_to_argparse(n, parser, 'build.', ' (just for build machine)')
+    parser.add_argument('-D', action='append', dest='projectoptions', default=[], metavar="option",
+                        help='Set the value of an option, can be used several times to set multiple options.')
+
+def create_options_dict(options):
+    result = OrderedDict()
+    for o in options:
+        try:
+            (key, value) = o.split('=', 1)
+        except ValueError:
+            raise MesonException('Option {!r} must have a value separated by equals sign.'.format(o))
+        result[key] = value
+    return result
+
+def parse_cmd_line_options(args):
+    args.cmd_line_options = create_options_dict(args.projectoptions)
+
+    # Merge builtin options set with --option into the dict.
+    for name in chain(
+            builtin_options.keys(),
+            ('build.' + k for k in builtin_options_per_machine.keys()),
+            builtin_options_per_machine.keys(),
+    ):
+        value = getattr(args, name, None)
+        if value is not None:
+            if name in args.cmd_line_options:
+                cmdline_name = BuiltinOption.argparse_name_to_arg(name)
+                raise MesonException(
+                    'Got argument {0} as both -D{0} and {1}. Pick one.'.format(name, cmdline_name))
+            args.cmd_line_options[name] = value
+            delattr(args, name)
+
+
+_U = T.TypeVar('_U', bound=UserOption[_T])
+
+class BuiltinOption(T.Generic[_T, _U]):
+
+    """Class for a builtin option type.
+
+    There are some cases that are not fully supported yet.
+    """
+
+    def __init__(self, opt_type: T.Type[_U], description: str, default: T.Any, yielding: bool = True, *,
+                 choices: T.Any = None):
+        self.opt_type = opt_type
+        self.description = description
+        self.default = default
+        self.choices = choices
+        self.yielding = yielding
+
+    def init_option(self, name: str, value: T.Optional[T.Any], prefix: str) -> _U:
+        """Create an instance of opt_type and return it."""
+        if value is None:
+            value = self.prefixed_default(name, prefix)
+        keywords = {'yielding': self.yielding, 'value': value}
+        if self.choices:
+            keywords['choices'] = self.choices
+        return self.opt_type(self.description, **keywords)
+
+    def _argparse_action(self) -> T.Optional[str]:
+        # If the type is a boolean, the presence of the argument in --foo form
+        # is to enable it. Disabling happens by using -Dfoo=false, which is
+        # parsed under `args.projectoptions` and does not hit this codepath.
+        if isinstance(self.default, bool):
+            return 'store_true'
+        return None
+
+    def _argparse_choices(self) -> T.Any:
+        if self.opt_type is UserBooleanOption:
+            return [True, False]
+        elif self.opt_type is UserFeatureOption:
+            return UserFeatureOption.static_choices
+        return self.choices
+
+    @staticmethod
+    def argparse_name_to_arg(name: str) -> str:
+        if name == 'warning_level':
+            return '--warnlevel'
+        else:
+            return '--' + name.replace('_', '-')
+
+    def prefixed_default(self, name: str, prefix: str = '') -> T.Any:
+        if self.opt_type in [UserComboOption, UserIntegerOption]:
+            return self.default
+        try:
+            return builtin_dir_noprefix_options[name][prefix]
+        except KeyError:
+            pass
+        return self.default
+
+    def add_to_argparse(self, name: str, parser: argparse.ArgumentParser, prefix: str, help_suffix: str) -> None:
+        kwargs = OrderedDict()
+
+        c = self._argparse_choices()
+        b = self._argparse_action()
+        h = self.description
+        if not b:
+            h = '{} (default: {}).'.format(h.rstrip('.'), self.prefixed_default(name))
+        else:
+            kwargs['action'] = b
+        if c and not b:
+            kwargs['choices'] = c
+        kwargs['default'] = argparse.SUPPRESS
+        kwargs['dest'] = prefix + name
+
+        cmdline_name = self.argparse_name_to_arg(prefix + name)
+        parser.add_argument(cmdline_name, help=h + help_suffix, **kwargs)
+
+# Update `docs/markdown/Builtin-options.md` after changing the options below
+builtin_options = OrderedDict([
+    # Directories
+    ('prefix',     BuiltinOption(UserStringOption, 'Installation prefix', default_prefix())),
+    ('bindir',     BuiltinOption(UserStringOption, 'Executable directory', 'bin')),
+    ('datadir',    BuiltinOption(UserStringOption, 'Data file directory', 'share')),
+    ('includedir', BuiltinOption(UserStringOption, 'Header file directory', 'include')),
+    ('infodir',    BuiltinOption(UserStringOption, 'Info page directory', 'share/info')),
+    ('libdir',     BuiltinOption(UserStringOption, 'Library directory', default_libdir())),
+    ('libexecdir', BuiltinOption(UserStringOption, 'Library executable directory', default_libexecdir())),
+    ('localedir',  BuiltinOption(UserStringOption, 'Locale data directory', 'share/locale')),
+    ('localstatedir',   BuiltinOption(UserStringOption, 'Localstate data directory', 'var')),
+    ('mandir',          BuiltinOption(UserStringOption, 'Manual page directory', 'share/man')),
+    ('sbindir',         BuiltinOption(UserStringOption, 'System executable directory', 'sbin')),
+    ('sharedstatedir',  BuiltinOption(UserStringOption, 'Architecture-independent data directory', 'com')),
+    ('sysconfdir',      BuiltinOption(UserStringOption, 'Sysconf data directory', 'etc')),
+    # Core options
+    ('auto_features',   BuiltinOption(UserFeatureOption, "Override value of all 'auto' features", 'auto')),
+    ('backend',         BuiltinOption(UserComboOption, 'Backend to use', 'ninja', choices=backendlist)),
+    ('buildtype',       BuiltinOption(UserComboOption, 'Build type to use', 'debug',
+                                      choices=['plain', 'debug', 'debugoptimized', 'release', 'minsize', 'custom'])),
+    ('debug',           BuiltinOption(UserBooleanOption, 'Debug', True)),
+    ('default_library', BuiltinOption(UserComboOption, 'Default library type', 'shared', choices=['shared', 'static', 'both'],
+                                      yielding=False)),
+    ('errorlogs',       BuiltinOption(UserBooleanOption, "Whether to print the logs from failing tests", True)),
+    ('install_umask',   BuiltinOption(UserUmaskOption, 'Default umask to apply on permissions of installed files', '022')),
+    ('layout',          BuiltinOption(UserComboOption, 'Build directory layout', 'mirror', choices=['mirror', 'flat'])),
+    ('optimization',    BuiltinOption(UserComboOption, 'Optimization level', '0', choices=['0', 'g', '1', '2', '3', 's'])),
+    ('stdsplit',        BuiltinOption(UserBooleanOption, 'Split stdout and stderr in test logs', True)),
+    ('strip',           BuiltinOption(UserBooleanOption, 'Strip targets on install', False)),
+    ('unity',           BuiltinOption(UserComboOption, 'Unity build', 'off', choices=['on', 'off', 'subprojects'])),
+    ('unity_size',      BuiltinOption(UserIntegerOption, 'Unity block size', (2, None, 4))),
+    ('warning_level',   BuiltinOption(UserComboOption, 'Compiler warning level to use', '1', choices=['0', '1', '2', '3'])),
+    ('werror',          BuiltinOption(UserBooleanOption, 'Treat warnings as errors', False, yielding=False)),
+    ('wrap_mode',       BuiltinOption(UserComboOption, 'Wrap mode', 'default', choices=['default', 'nofallback', 'nodownload', 'forcefallback'])),
+    ('force_fallback_for', BuiltinOption(UserArrayOption, 'Force fallback for those subprojects', [])),
+])
+
+builtin_options_per_machine = OrderedDict([
+    ('pkg_config_path', BuiltinOption(UserArrayOption, 'List of additional paths for pkg-config to search', [])),
+    ('cmake_prefix_path', BuiltinOption(UserArrayOption, 'List of additional prefixes for cmake to search', [])),
+])
+
+# Special prefix-dependent defaults for installation directories that reside in
+# a path outside of the prefix in FHS and common usage.
+builtin_dir_noprefix_options = {
+    'sysconfdir':     {'/usr': '/etc'},
+    'localstatedir':  {'/usr': '/var',     '/usr/local': '/var/local'},
+    'sharedstatedir': {'/usr': '/var/lib', '/usr/local': '/var/local/lib'},
+}
+
+forbidden_target_names = {'clean': None,
+                          'clean-ctlist': None,
+                          'clean-gcno': None,
+                          'clean-gcda': None,
+                          'coverage': None,
+                          'coverage-text': None,
+                          'coverage-xml': None,
+                          'coverage-html': None,
+                          'phony': None,
+                          'PHONY': None,
+                          'all': None,
+                          'test': None,
+                          'benchmark': None,
+                          'install': None,
+                          'uninstall': None,
+                          'build.ninja': None,
+                          'scan-build': None,
+                          'reconfigure': None,
+                          'dist': None,
+                          'distcheck': None,
+                          }
diff --git a/meson/mesonbuild/dependencies/__init__.py b/meson/mesonbuild/dependencies/__init__.py
new file mode 100644
index 000000000..d75d22698
--- /dev/null
+++ b/meson/mesonbuild/dependencies/__init__.py
@@ -0,0 +1,89 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .boost import BoostDependency
+from .cuda import CudaDependency
+from .hdf5 import HDF5Dependency
+from .base import (  # noqa: F401
+    Dependency, DependencyException, DependencyMethods, ExternalProgram, EmptyExternalProgram, NonExistingExternalProgram,
+    ExternalDependency, NotFoundDependency, ExternalLibrary, ExtraFrameworkDependency, InternalDependency,
+    PkgConfigDependency, CMakeDependency, find_external_dependency, get_dep_identifier, packages, _packages_accept_language,
+    DependencyFactory)
+from .dev import ValgrindDependency, gmock_factory, gtest_factory, llvm_factory, zlib_factory
+from .coarrays import coarray_factory
+from .mpi import mpi_factory
+from .scalapack import scalapack_factory
+from .misc import (
+    BlocksDependency, OpenMPDependency, cups_factory, curses_factory, gpgme_factory,
+    libgcrypt_factory, libwmf_factory, netcdf_factory, pcap_factory, python3_factory,
+    shaderc_factory, threads_factory,
+)
+from .platform import AppleFrameworks
+from .ui import GnuStepDependency, Qt4Dependency, Qt5Dependency, WxDependency, gl_factory, sdl2_factory, vulkan_factory
+
+
+# This is a dict where the keys should be strings, and the values must be one
+# of:
+# - An ExternalDependency subclass
+# - A DependencyFactory object
+# - A callable with a signature of (Environment, MachineChoice, Dict[str, Any]) -> List[Callable[[], DependencyType]]
+packages.update({
+    # From dev:
+    'gtest': gtest_factory,
+    'gmock': gmock_factory,
+    'llvm': llvm_factory,
+    'valgrind': ValgrindDependency,
+    'zlib': zlib_factory,
+
+    'boost': BoostDependency,
+    'cuda': CudaDependency,
+
+    # per-file
+    'coarray': coarray_factory,
+    'hdf5': HDF5Dependency,
+    'mpi': mpi_factory,
+    'scalapack': scalapack_factory,
+
+    # From misc:
+    'blocks': BlocksDependency,
+    'curses': curses_factory,
+    'netcdf': netcdf_factory,
+    'openmp': OpenMPDependency,
+    'python3': python3_factory,
+    'threads': threads_factory,
+    'pcap': pcap_factory,
+    'cups': cups_factory,
+    'libwmf': libwmf_factory,
+    'libgcrypt': libgcrypt_factory,
+    'gpgme': gpgme_factory,
+    'shaderc': shaderc_factory,
+
+    # From platform:
+    'appleframeworks': AppleFrameworks,
+
+    # From ui:
+    'gl': gl_factory,
+    'gnustep': GnuStepDependency,
+    'qt4': Qt4Dependency,
+    'qt5': Qt5Dependency,
+    'sdl2': sdl2_factory,
+    'wxwidgets': WxDependency,
+    'vulkan': vulkan_factory,
+})
+_packages_accept_language.update({
+    'hdf5',
+    'mpi',
+    'netcdf',
+    'openmp',
+})
diff --git a/meson/mesonbuild/dependencies/base.py b/meson/mesonbuild/dependencies/base.py
new file mode 100644
index 000000000..7e2bd0f09
--- /dev/null
+++ b/meson/mesonbuild/dependencies/base.py
@@ -0,0 +1,2589 @@
+# Copyright 2013-2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies.
+# Custom logic for several other packages are in separate files.
+import copy
+import functools
+import os
+import re
+import json
+import shlex
+import shutil
+import stat
+import sys
+import textwrap
+import platform
+import typing as T
+from enum import Enum
+from pathlib import Path, PurePath
+
+from .. import mlog
+from .. import mesonlib
+from ..compilers import clib_langs
+from ..envconfig import get_env_var
+from ..environment import BinaryTable, Environment, MachineInfo
+from ..cmake import CMakeExecutor, CMakeTraceParser, CMakeException
+from ..mesonlib import MachineChoice, MesonException, OrderedSet, PerMachine
+from ..mesonlib import Popen_safe, version_compare_many, version_compare, listify, stringlistify, extract_as_list, split_args
+from ..mesonlib import Version, LibType
+from ..mesondata import mesondata
+
+if T.TYPE_CHECKING:
+    from ..compilers.compilers import CompilerType  # noqa: F401
+    DependencyType = T.TypeVar('DependencyType', bound='Dependency')
+
+# These must be defined in this file to avoid cyclical references.
+packages = {}
+_packages_accept_language = set()
+
+
+class DependencyException(MesonException):
+    '''Exceptions raised while trying to find dependencies'''
+
+
+class DependencyMethods(Enum):
+    # Auto means to use whatever dependency checking mechanisms in whatever order meson thinks is best.
+    AUTO = 'auto'
+    PKGCONFIG = 'pkg-config'
+    QMAKE = 'qmake'
+    CMAKE = 'cmake'
+    # Just specify the standard link arguments, assuming the operating system provides the library.
+    SYSTEM = 'system'
+    # This is only supported on OSX - search the frameworks directory by name.
+    EXTRAFRAMEWORK = 'extraframework'
+    # Detect using the sysconfig module.
+    SYSCONFIG = 'sysconfig'
+    # Specify using a "program"-config style tool
+    CONFIG_TOOL = 'config-tool'
+    # For backwards compatibility
+    SDLCONFIG = 'sdlconfig'
+    CUPSCONFIG = 'cups-config'
+    PCAPCONFIG = 'pcap-config'
+    LIBWMFCONFIG = 'libwmf-config'
+    # Misc
+    DUB = 'dub'
+
+
+def find_external_program(env: Environment, for_machine: MachineChoice, name: str,
+                          display_name: str, default_names: T.List[str],
+                          allow_default_for_cross: bool = True) -> T.Generator['ExternalProgram', None, None]:
+    """Find an external program, chcking the cross file plus any default options."""
+    # Lookup in cross or machine file.
+    potential_path = env.lookup_binary_entry(for_machine, name)
+    if potential_path is not None:
+        mlog.debug('{} binary for {} specified from cross file, native file, '
+                    'or env var as {}'.format(display_name, for_machine, potential_path))
+        yield ExternalProgram.from_entry(name, potential_path)
+        # We never fallback if the user-specified option is no good, so
+        # stop returning options.
+        return
+    mlog.debug('{} binary missing from cross or native file, or env var undefined.'.format(display_name))
+    # Fallback on hard-coded defaults, if a default binary is allowed for use
+    # with cross targets, or if this is not a cross target
+    if allow_default_for_cross or not (for_machine is MachineChoice.HOST and env.is_cross_build(for_machine)):
+        for potential_path in default_names:
+            mlog.debug('Trying a default {} fallback at'.format(display_name), potential_path)
+            yield ExternalProgram(potential_path, silent=True)
+    else:
+        mlog.debug('Default target is not allowed for cross use')
+
+
+class Dependency:
+
+    @classmethod
+    def _process_include_type_kw(cls, kwargs) -> str:
+        if 'include_type' not in kwargs:
+            return 'preserve'
+        if not isinstance(kwargs['include_type'], str):
+            raise DependencyException('The include_type kwarg must be a string type')
+        if kwargs['include_type'] not in ['preserve', 'system', 'non-system']:
+            raise DependencyException("include_type may only be one of ['preserve', 'system', 'non-system']")
+        return kwargs['include_type']
+
+    def __init__(self, type_name, kwargs):
+        self.name = "null"
+        self.version = None
+        self.language = None # None means C-like
+        self.is_found = False
+        self.type_name = type_name
+        self.compile_args = []
+        self.link_args = []
+        # Raw -L and -l arguments without manual library searching
+        # If None, self.link_args will be used
+        self.raw_link_args = None
+        self.sources = []
+        self.methods = process_method_kw(self.get_methods(), kwargs)
+        self.include_type = self._process_include_type_kw(kwargs)
+        self.ext_deps = []  # type: T.List[Dependency]
+
+    def __repr__(self):
+        s = '<{0} {1}: {2}>'
+        return s.format(self.__class__.__name__, self.name, self.is_found)
+
+    def get_compile_args(self):
+        if self.include_type == 'system':
+            converted = []
+            for i in self.compile_args:
+                if i.startswith('-I') or i.startswith('/I'):
+                    converted += ['-isystem' + i[2:]]
+                else:
+                    converted += [i]
+            return converted
+        if self.include_type == 'non-system':
+            converted = []
+            for i in self.compile_args:
+                if i.startswith('-isystem'):
+                    converted += ['-I' + i[8:]]
+                else:
+                    converted += [i]
+            return converted
+        return self.compile_args
+
+    def get_link_args(self, raw: bool = False) -> T.List[str]:
+        if raw and self.raw_link_args is not None:
+            return self.raw_link_args
+        return self.link_args
+
+    def found(self):
+        return self.is_found
+
+    def get_sources(self):
+        """Source files that need to be added to the target.
+        As an example, gtest-all.cc when using GTest."""
+        return self.sources
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.AUTO]
+
+    def get_name(self):
+        return self.name
+
+    def get_version(self):
+        if self.version:
+            return self.version
+        else:
+            return 'unknown'
+
+    def get_include_type(self) -> str:
+        return self.include_type
+
+    def get_exe_args(self, compiler):
+        return []
+
+    def get_pkgconfig_variable(self, variable_name, kwargs):
+        raise DependencyException('{!r} is not a pkgconfig dependency'.format(self.name))
+
+    def get_configtool_variable(self, variable_name):
+        raise DependencyException('{!r} is not a config-tool dependency'.format(self.name))
+
+    def get_partial_dependency(self, *, compile_args: bool = False,
+                               link_args: bool = False, links: bool = False,
+                               includes: bool = False, sources: bool = False):
+        """Create a new dependency that contains part of the parent dependency.
+
+        The following options can be inherited:
+            links -- all link_with arguments
+            includes -- all include_directory and -I/-isystem calls
+            sources -- any source, header, or generated sources
+            compile_args -- any compile args
+            link_args -- any link args
+
+        Additionally the new dependency will have the version parameter of it's
+        parent (if any) and the requested values of any dependencies will be
+        added as well.
+        """
+        raise RuntimeError('Unreachable code in partial_dependency called')
+
+    def _add_sub_dependency(self, deplist: T.List['DependencyType']) -> bool:
+        """Add an internal depdency from a list of possible dependencies.
+
+        This method is intended to make it easier to add additional
+        dependencies to another dependency internally.
+
+        Returns true if the dependency was successfully added, false
+        otherwise.
+        """
+        for d in deplist:
+            dep = d()
+            if dep.is_found:
+                self.ext_deps.append(dep)
+                return True
+        return False
+
+    def get_variable(self, *, cmake: T.Optional[str] = None, pkgconfig: T.Optional[str] = None,
+                     configtool: T.Optional[str] = None, internal: T.Optional[str] = None,
+                     default_value: T.Optional[str] = None,
+                     pkgconfig_define: T.Optional[T.List[str]] = None) -> T.Union[str, T.List[str]]:
+        if default_value is not None:
+            return default_value
+        raise DependencyException('No default provided for dependency {!r}, which is not pkg-config, cmake, or config-tool based.'.format(self))
+
+    def generate_system_dependency(self, include_type: str) -> T.Type['Dependency']:
+        new_dep = copy.deepcopy(self)
+        new_dep.include_type = self._process_include_type_kw({'include_type': include_type})
+        return new_dep
+
+class InternalDependency(Dependency):
+    def __init__(self, version, incdirs, compile_args, link_args, libraries,
+                 whole_libraries, sources, ext_deps, variables: T.Dict[str, T.Any]):
+        super().__init__('internal', {})
+        self.version = version
+        self.is_found = True
+        self.include_directories = incdirs
+        self.compile_args = compile_args
+        self.link_args = link_args
+        self.libraries = libraries
+        self.whole_libraries = whole_libraries
+        self.sources = sources
+        self.ext_deps = ext_deps
+        self.variables = variables
+
+    def __deepcopy__(self, memo: dict) -> 'InternalDependency':
+        result = self.__class__.__new__(self.__class__)
+        memo[id(self)] = result
+        for k, v in self.__dict__.items():
+            if k in ['libraries', 'whole_libraries']:
+                setattr(result, k, copy.copy(v))
+            else:
+                setattr(result, k, copy.deepcopy(v, memo))
+        return result
+
+    def get_pkgconfig_variable(self, variable_name, kwargs):
+        raise DependencyException('Method "get_pkgconfig_variable()" is '
+                                  'invalid for an internal dependency')
+
+    def get_configtool_variable(self, variable_name):
+        raise DependencyException('Method "get_configtool_variable()" is '
+                                  'invalid for an internal dependency')
+
+    def get_partial_dependency(self, *, compile_args: bool = False,
+                               link_args: bool = False, links: bool = False,
+                               includes: bool = False, sources: bool = False):
+        final_compile_args = self.compile_args.copy() if compile_args else []
+        final_link_args = self.link_args.copy() if link_args else []
+        final_libraries = self.libraries.copy() if links else []
+        final_whole_libraries = self.whole_libraries.copy() if links else []
+        final_sources = self.sources.copy() if sources else []
+        final_includes = self.include_directories.copy() if includes else []
+        final_deps = [d.get_partial_dependency(
+            compile_args=compile_args, link_args=link_args, links=links,
+            includes=includes, sources=sources) for d in self.ext_deps]
+        return InternalDependency(
+            self.version, final_includes, final_compile_args,
+            final_link_args, final_libraries, final_whole_libraries,
+            final_sources, final_deps, self.variables)
+
+    def get_variable(self, *, cmake: T.Optional[str] = None, pkgconfig: T.Optional[str] = None,
+                     configtool: T.Optional[str] = None, internal: T.Optional[str] = None,
+                     default_value: T.Optional[str] = None,
+                     pkgconfig_define: T.Optional[T.List[str]] = None) -> T.Union[str, T.List[str]]:
+        val = self.variables.get(internal, default_value)
+        if val is not None:
+            return val
+        raise DependencyException('Could not get an internal variable and no default provided for {!r}'.format(self))
+
+
+class HasNativeKwarg:
+    def __init__(self, kwargs):
+        self.for_machine = self.get_for_machine_from_kwargs(kwargs)
+
+    def get_for_machine_from_kwargs(self, kwargs):
+        return MachineChoice.BUILD if kwargs.get('native', False) else MachineChoice.HOST
+
+class ExternalDependency(Dependency, HasNativeKwarg):
+    def __init__(self, type_name, environment, kwargs, language: T.Optional[str] = None):
+        Dependency.__init__(self, type_name, kwargs)
+        self.env = environment
+        self.name = type_name # default
+        self.is_found = False
+        self.language = language
+        self.version_reqs = kwargs.get('version', None)
+        if isinstance(self.version_reqs, str):
+            self.version_reqs = [self.version_reqs]
+        self.required = kwargs.get('required', True)
+        self.silent = kwargs.get('silent', False)
+        self.static = kwargs.get('static', False)
+        if not isinstance(self.static, bool):
+            raise DependencyException('Static keyword must be boolean')
+        # Is this dependency to be run on the build platform?
+        HasNativeKwarg.__init__(self, kwargs)
+        self.clib_compiler = detect_compiler(self.name, environment, self.for_machine, self.language)
+
+    def get_compiler(self):
+        return self.clib_compiler
+
+    def get_partial_dependency(self, *, compile_args: bool = False,
+                               link_args: bool = False, links: bool = False,
+                               includes: bool = False, sources: bool = False):
+        new = copy.copy(self)
+        if not compile_args:
+            new.compile_args = []
+        if not link_args:
+            new.link_args = []
+        if not sources:
+            new.sources = []
+        if not includes:
+            new.include_directories = []
+        if not sources:
+            new.sources = []
+
+        return new
+
+    def log_details(self):
+        return ''
+
+    def log_info(self):
+        return ''
+
+    def log_tried(self):
+        return ''
+
+    # Check if dependency version meets the requirements
+    def _check_version(self):
+        if not self.is_found:
+            return
+
+        if self.version_reqs:
+            # an unknown version can never satisfy any requirement
+            if not self.version:
+                found_msg = ['Dependency', mlog.bold(self.name), 'found:']
+                found_msg += [mlog.red('NO'), 'unknown version, but need:',
+                              self.version_reqs]
+                mlog.log(*found_msg)
+
+                if self.required:
+                    m = 'Unknown version of dependency {!r}, but need {!r}.'
+                    raise DependencyException(m.format(self.name, self.version_reqs))
+
+            else:
+                (self.is_found, not_found, found) = \
+                    version_compare_many(self.version, self.version_reqs)
+                if not self.is_found:
+                    found_msg = ['Dependency', mlog.bold(self.name), 'found:']
+                    found_msg += [mlog.red('NO'),
+                                  'found', mlog.normal_cyan(self.version), 'but need:',
+                                  mlog.bold(', '.join(["'{}'".format(e) for e in not_found]))]
+                    if found:
+                        found_msg += ['; matched:',
+                                      ', '.join(["'{}'".format(e) for e in found])]
+                    mlog.log(*found_msg)
+
+                    if self.required:
+                        m = 'Invalid version of dependency, need {!r} {!r} found {!r}.'
+                        raise DependencyException(m.format(self.name, not_found, self.version))
+                    return
+
+
+class NotFoundDependency(Dependency):
+    def __init__(self, environment):
+        super().__init__('not-found', {})
+        self.env = environment
+        self.name = 'not-found'
+        self.is_found = False
+
+    def get_partial_dependency(self, *, compile_args: bool = False,
+                               link_args: bool = False, links: bool = False,
+                               includes: bool = False, sources: bool = False):
+        return copy.copy(self)
+
+
+class ConfigToolDependency(ExternalDependency):
+
+    """Class representing dependencies found using a config tool.
+
+    Takes the following extra keys in kwargs that it uses internally:
+    :tools List[str]: A list of tool names to use
+    :version_arg str: The argument to pass to the tool to get it's version
+    :returncode_value int: The value of the correct returncode
+        Because some tools are stupid and don't return 0
+    """
+
+    tools = None
+    tool_name = None
+    version_arg = '--version'
+    __strip_version = re.compile(r'^[0-9][0-9.]+')
+
+    def __init__(self, name, environment, kwargs, language: T.Optional[str] = None):
+        super().__init__('config-tool', environment, kwargs, language=language)
+        self.name = name
+        # You may want to overwrite the class version in some cases
+        self.tools = listify(kwargs.get('tools', self.tools))
+        if not self.tool_name:
+            self.tool_name = self.tools[0]
+        if 'version_arg' in kwargs:
+            self.version_arg = kwargs['version_arg']
+
+        req_version = kwargs.get('version', None)
+        tool, version = self.find_config(req_version, kwargs.get('returncode_value', 0))
+        self.config = tool
+        self.is_found = self.report_config(version, req_version)
+        if not self.is_found:
+            self.config = None
+            return
+        self.version = version
+
+    def _sanitize_version(self, version):
+        """Remove any non-numeric, non-point version suffixes."""
+        m = self.__strip_version.match(version)
+        if m:
+            # Ensure that there isn't a trailing '.', such as an input like
+            # `1.2.3.git-1234`
+            return m.group(0).rstrip('.')
+        return version
+
+    def find_config(self, versions: T.Optional[T.List[str]] = None, returncode: int = 0) \
+            -> T.Tuple[T.Optional[str], T.Optional[str]]:
+        """Helper method that searches for config tool binaries in PATH and
+        returns the one that best matches the given version requirements.
+        """
+        if not isinstance(versions, list) and versions is not None:
+            versions = listify(versions)
+        best_match = (None, None)  # type: T.Tuple[T.Optional[str], T.Optional[str]]
+        for potential_bin in find_external_program(
+                self.env, self.for_machine, self.tool_name,
+                self.tool_name, self.tools, allow_default_for_cross=False):
+            if not potential_bin.found():
+                continue
+            tool = potential_bin.get_command()
+            try:
+                p, out = Popen_safe(tool + [self.version_arg])[:2]
+            except (FileNotFoundError, PermissionError):
+                continue
+            if p.returncode != returncode:
+                continue
+
+            out = self._sanitize_version(out.strip())
+            # Some tools, like pcap-config don't supply a version, but also
+            # don't fail with --version, in that case just assume that there is
+            # only one version and return it.
+            if not out:
+                return (tool, None)
+            if versions:
+                is_found = version_compare_many(out, versions)[0]
+                # This allows returning a found version without a config tool,
+                # which is useful to inform the user that you found version x,
+                # but y was required.
+                if not is_found:
+                    tool = None
+            if best_match[1]:
+                if version_compare(out, '> {}'.format(best_match[1])):
+                    best_match = (tool, out)
+            else:
+                best_match = (tool, out)
+
+        return best_match
+
+    def report_config(self, version, req_version):
+        """Helper method to print messages about the tool."""
+
+        found_msg = [mlog.bold(self.tool_name), 'found:']
+
+        if self.config is None:
+            found_msg.append(mlog.red('NO'))
+            if version is not None and req_version is not None:
+                found_msg.append('found {!r} but need {!r}'.format(version, req_version))
+            elif req_version:
+                found_msg.append('need {!r}'.format(req_version))
+        else:
+            found_msg += [mlog.green('YES'), '({})'.format(' '.join(self.config)), version]
+
+        mlog.log(*found_msg)
+
+        return self.config is not None
+
+    def get_config_value(self, args, stage):
+        p, out, err = Popen_safe(self.config + args)
+        if p.returncode != 0:
+            if self.required:
+                raise DependencyException(
+                    'Could not generate {} for {}.\n{}'.format(
+                        stage, self.name, err))
+            return []
+        return split_args(out)
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.AUTO, DependencyMethods.CONFIG_TOOL]
+
+    def get_configtool_variable(self, variable_name):
+        p, out, _ = Popen_safe(self.config + ['--{}'.format(variable_name)])
+        if p.returncode != 0:
+            if self.required:
+                raise DependencyException(
+                    'Could not get variable "{}" for dependency {}'.format(
+                        variable_name, self.name))
+        variable = out.strip()
+        mlog.debug('Got config-tool variable {} : {}'.format(variable_name, variable))
+        return variable
+
+    def log_tried(self):
+        return self.type_name
+
+    def get_variable(self, *, cmake: T.Optional[str] = None, pkgconfig: T.Optional[str] = None,
+                     configtool: T.Optional[str] = None, internal: T.Optional[str] = None,
+                     default_value: T.Optional[str] = None,
+                     pkgconfig_define: T.Optional[T.List[str]] = None) -> T.Union[str, T.List[str]]:
+        if configtool:
+            # In the not required case '' (empty string) will be returned if the
+            # variable is not found. Since '' is a valid value to return we
+            # set required to True here to force and error, and use the
+            # finally clause to ensure it's restored.
+            restore = self.required
+            self.required = True
+            try:
+                return self.get_configtool_variable(configtool)
+            except DependencyException:
+                pass
+            finally:
+                self.required = restore
+        if default_value is not None:
+            return default_value
+        raise DependencyException('Could not get config-tool variable and no default provided for {!r}'.format(self))
+
+
+class PkgConfigDependency(ExternalDependency):
+    # The class's copy of the pkg-config path. Avoids having to search for it
+    # multiple times in the same Meson invocation.
+    class_pkgbin = PerMachine(None, None)
+    # We cache all pkg-config subprocess invocations to avoid redundant calls
+    pkgbin_cache = {}
+
+    def __init__(self, name, environment, kwargs, language: T.Optional[str] = None):
+        super().__init__('pkgconfig', environment, kwargs, language=language)
+        self.name = name
+        self.is_libtool = False
+        # Store a copy of the pkg-config path on the object itself so it is
+        # stored in the pickled coredata and recovered.
+        self.pkgbin = None
+
+        # Only search for pkg-config for each machine the first time and store
+        # the result in the class definition
+        if PkgConfigDependency.class_pkgbin[self.for_machine] is False:
+            mlog.debug('Pkg-config binary for %s is cached as not found.' % self.for_machine)
+        elif PkgConfigDependency.class_pkgbin[self.for_machine] is not None:
+            mlog.debug('Pkg-config binary for %s is cached.' % self.for_machine)
+        else:
+            assert PkgConfigDependency.class_pkgbin[self.for_machine] is None
+            mlog.debug('Pkg-config binary for %s is not cached.' % self.for_machine)
+            for potential_pkgbin in find_external_program(
+                    self.env, self.for_machine, 'pkgconfig', 'Pkg-config',
+                    environment.default_pkgconfig, allow_default_for_cross=False):
+                version_if_ok = self.check_pkgconfig(potential_pkgbin)
+                if not version_if_ok:
+                    continue
+                if not self.silent:
+                    mlog.log('Found pkg-config:', mlog.bold(potential_pkgbin.get_path()),
+                             '(%s)' % version_if_ok)
+                PkgConfigDependency.class_pkgbin[self.for_machine] = potential_pkgbin
+                break
+            else:
+                if not self.silent:
+                    mlog.log('Found Pkg-config:', mlog.red('NO'))
+                # Set to False instead of None to signify that we've already
+                # searched for it and not found it
+                PkgConfigDependency.class_pkgbin[self.for_machine] = False
+
+        self.pkgbin = PkgConfigDependency.class_pkgbin[self.for_machine]
+        if self.pkgbin is False:
+            self.pkgbin = None
+            msg = 'Pkg-config binary for machine %s not found. Giving up.' % self.for_machine
+            if self.required:
+                raise DependencyException(msg)
+            else:
+                mlog.debug(msg)
+                return
+
+        mlog.debug('Determining dependency {!r} with pkg-config executable '
+                   '{!r}'.format(name, self.pkgbin.get_path()))
+        ret, self.version, _ = self._call_pkgbin(['--modversion', name])
+        if ret != 0:
+            return
+
+        self.is_found = True
+
+        try:
+            # Fetch cargs to be used while using this dependency
+            self._set_cargs()
+            # Fetch the libraries and library paths needed for using this
+            self._set_libs()
+        except DependencyException as e:
+            mlog.debug("pkg-config error with '%s': %s" % (name, e))
+            if self.required:
+                raise
+            else:
+                self.compile_args = []
+                self.link_args = []
+                self.is_found = False
+                self.reason = e
+
+    def __repr__(self):
+        s = '<{0} {1}: {2} {3}>'
+        return s.format(self.__class__.__name__, self.name, self.is_found,
+                        self.version_reqs)
+
+    def _call_pkgbin_real(self, args, env):
+        cmd = self.pkgbin.get_command() + args
+        p, out, err = Popen_safe(cmd, env=env)
+        rc, out, err = p.returncode, out.strip(), err.strip()
+        call = ' '.join(cmd)
+        mlog.debug("Called `{}` -> {}\n{}".format(call, rc, out))
+        return rc, out, err
+
+    def _call_pkgbin(self, args, env=None):
+        # Always copy the environment since we're going to modify it
+        # with pkg-config variables
+        if env is None:
+            env = os.environ.copy()
+        else:
+            env = env.copy()
+
+        extra_paths = self.env.coredata.builtins_per_machine[self.for_machine]['pkg_config_path'].value
+        sysroot = self.env.properties[self.for_machine].get_sys_root()
+        if sysroot:
+            env['PKG_CONFIG_SYSROOT_DIR'] = sysroot
+        new_pkg_config_path = ':'.join([p for p in extra_paths])
+        mlog.debug('PKG_CONFIG_PATH: ' + new_pkg_config_path)
+        env['PKG_CONFIG_PATH'] = new_pkg_config_path
+
+        pkg_config_libdir_prop = self.env.properties[self.for_machine].get_pkg_config_libdir()
+        if pkg_config_libdir_prop:
+            new_pkg_config_libdir = ':'.join([p for p in pkg_config_libdir_prop])
+            env['PKG_CONFIG_LIBDIR'] = new_pkg_config_libdir
+            mlog.debug('PKG_CONFIG_LIBDIR: ' + new_pkg_config_libdir)
+
+        fenv = frozenset(env.items())
+        targs = tuple(args)
+        cache = PkgConfigDependency.pkgbin_cache
+        if (self.pkgbin, targs, fenv) not in cache:
+            cache[(self.pkgbin, targs, fenv)] = self._call_pkgbin_real(args, env)
+        return cache[(self.pkgbin, targs, fenv)]
+
+    def _convert_mingw_paths(self, args: T.List[str]) -> T.List[str]:
+        '''
+        Both MSVC and native Python on Windows cannot handle MinGW-esque /c/foo
+        paths so convert them to C:/foo. We cannot resolve other paths starting
+        with / like /home/foo so leave them as-is so that the user gets an
+        error/warning from the compiler/linker.
+        '''
+        if not mesonlib.is_windows():
+            return args
+        converted = []
+        for arg in args:
+            pargs = []
+            # Library search path
+            if arg.startswith('-L/'):
+                pargs = PurePath(arg[2:]).parts
+                tmpl = '-L{}:/{}'
+            elif arg.startswith('-I/'):
+                pargs = PurePath(arg[2:]).parts
+                tmpl = '-I{}:/{}'
+            # Full path to library or .la file
+            elif arg.startswith('/'):
+                pargs = PurePath(arg).parts
+                tmpl = '{}:/{}'
+            elif arg.startswith(('-L', '-I')) or (len(arg) > 2 and arg[1] == ':'):
+                # clean out improper '\\ ' as comes from some Windows pkg-config files
+                arg = arg.replace('\\ ', ' ')
+            if len(pargs) > 1 and len(pargs[1]) == 1:
+                arg = tmpl.format(pargs[1], '/'.join(pargs[2:]))
+            converted.append(arg)
+        return converted
+
+    def _split_args(self, cmd):
+        # pkg-config paths follow Unix conventions, even on Windows; split the
+        # output using shlex.split rather than mesonlib.split_args
+        return shlex.split(cmd)
+
+    def _set_cargs(self):
+        env = None
+        if self.language == 'fortran':
+            # gfortran doesn't appear to look in system paths for INCLUDE files,
+            # so don't allow pkg-config to suppress -I flags for system paths
+            env = os.environ.copy()
+            env['PKG_CONFIG_ALLOW_SYSTEM_CFLAGS'] = '1'
+        ret, out, err = self._call_pkgbin(['--cflags', self.name], env=env)
+        if ret != 0:
+            raise DependencyException('Could not generate cargs for %s:\n%s\n' %
+                                      (self.name, err))
+        self.compile_args = self._convert_mingw_paths(self._split_args(out))
+
+    def _search_libs(self, out, out_raw):
+        '''
+        @out: PKG_CONFIG_ALLOW_SYSTEM_LIBS=1 pkg-config --libs
+        @out_raw: pkg-config --libs
+
+        We always look for the file ourselves instead of depending on the
+        compiler to find it with -lfoo or foo.lib (if possible) because:
+        1. We want to be able to select static or shared
+        2. We need the full path of the library to calculate RPATH values
+        3. De-dup of libraries is easier when we have absolute paths
+
+        Libraries that are provided by the toolchain or are not found by
+        find_library() will be added with -L -l pairs.
+        '''
+        # Library paths should be safe to de-dup
+        #
+        # First, figure out what library paths to use. Originally, we were
+        # doing this as part of the loop, but due to differences in the order
+        # of -L values between pkg-config and pkgconf, we need to do that as
+        # a separate step. See:
+        # https://github.com/mesonbuild/meson/issues/3951
+        # https://github.com/mesonbuild/meson/issues/4023
+        #
+        # Separate system and prefix paths, and ensure that prefix paths are
+        # always searched first.
+        prefix_libpaths = OrderedSet()
+        # We also store this raw_link_args on the object later
+        raw_link_args = self._convert_mingw_paths(self._split_args(out_raw))
+        for arg in raw_link_args:
+            if arg.startswith('-L') and not arg.startswith(('-L-l', '-L-L')):
+                path = arg[2:]
+                if not os.path.isabs(path):
+                    # Resolve the path as a compiler in the build directory would
+                    path = os.path.join(self.env.get_build_dir(), path)
+                prefix_libpaths.add(path)
+        # Library paths are not always ordered in a meaningful way
+        #
+        # Instead of relying on pkg-config or pkgconf to provide -L flags in a
+        # specific order, we reorder library paths ourselves, according to th
+        # order specified in PKG_CONFIG_PATH. See:
+        # https://github.com/mesonbuild/meson/issues/4271
+        #
+        # Only prefix_libpaths are reordered here because there should not be
+        # too many system_libpaths to cause library version issues.
+        pkg_config_path = get_env_var(
+            self.for_machine,
+            self.env.is_cross_build(),
+            'PKG_CONFIG_PATH')
+        if pkg_config_path:
+            pkg_config_path = pkg_config_path.split(os.pathsep)
+        else:
+            pkg_config_path = []
+        pkg_config_path = self._convert_mingw_paths(pkg_config_path)
+        prefix_libpaths = sort_libpaths(prefix_libpaths, pkg_config_path)
+        system_libpaths = OrderedSet()
+        full_args = self._convert_mingw_paths(self._split_args(out))
+        for arg in full_args:
+            if arg.startswith(('-L-l', '-L-L')):
+                # These are D language arguments, not library paths
+                continue
+            if arg.startswith('-L') and arg[2:] not in prefix_libpaths:
+                system_libpaths.add(arg[2:])
+        # Use this re-ordered path list for library resolution
+        libpaths = list(prefix_libpaths) + list(system_libpaths)
+        # Track -lfoo libraries to avoid duplicate work
+        libs_found = OrderedSet()
+        # Track not-found libraries to know whether to add library paths
+        libs_notfound = []
+        libtype = LibType.STATIC if self.static else LibType.PREFER_SHARED
+        # Generate link arguments for this library
+        link_args = []
+        for lib in full_args:
+            if lib.startswith(('-L-l', '-L-L')):
+                # These are D language arguments, add them as-is
+                pass
+            elif lib.startswith('-L'):
+                # We already handled library paths above
+                continue
+            elif lib.startswith('-l'):
+                # Don't resolve the same -lfoo argument again
+                if lib in libs_found:
+                    continue
+                if self.clib_compiler:
+                    args = self.clib_compiler.find_library(lib[2:], self.env,
+                                                           libpaths, libtype)
+                # If the project only uses a non-clib language such as D, Rust,
+                # C#, Python, etc, all we can do is limp along by adding the
+                # arguments as-is and then adding the libpaths at the end.
+                else:
+                    args = None
+                if args is not None:
+                    libs_found.add(lib)
+                    # Replace -l arg with full path to library if available
+                    # else, library is either to be ignored, or is provided by
+                    # the compiler, can't be resolved, and should be used as-is
+                    if args:
+                        if not args[0].startswith('-l'):
+                            lib = args[0]
+                    else:
+                        continue
+                else:
+                    # Library wasn't found, maybe we're looking in the wrong
+                    # places or the library will be provided with LDFLAGS or
+                    # LIBRARY_PATH from the environment (on macOS), and many
+                    # other edge cases that we can't account for.
+                    #
+                    # Add all -L paths and use it as -lfoo
+                    if lib in libs_notfound:
+                        continue
+                    if self.static:
+                        mlog.warning('Static library {!r} not found for dependency {!r}, may '
+                                     'not be statically linked'.format(lib[2:], self.name))
+                    libs_notfound.append(lib)
+            elif lib.endswith(".la"):
+                shared_libname = self.extract_libtool_shlib(lib)
+                shared_lib = os.path.join(os.path.dirname(lib), shared_libname)
+                if not os.path.exists(shared_lib):
+                    shared_lib = os.path.join(os.path.dirname(lib), ".libs", shared_libname)
+
+                if not os.path.exists(shared_lib):
+                    raise DependencyException('Got a libtools specific "%s" dependencies'
+                                              'but we could not compute the actual shared'
+                                              'library path' % lib)
+                self.is_libtool = True
+                lib = shared_lib
+                if lib in link_args:
+                    continue
+            link_args.append(lib)
+        # Add all -Lbar args if we have -lfoo args in link_args
+        if libs_notfound:
+            # Order of -L flags doesn't matter with ld, but it might with other
+            # linkers such as MSVC, so prepend them.
+            link_args = ['-L' + lp for lp in prefix_libpaths] + link_args
+        return link_args, raw_link_args
+
+    def _set_libs(self):
+        env = None
+        libcmd = [self.name, '--libs']
+        if self.static:
+            libcmd.append('--static')
+        # Force pkg-config to output -L fields even if they are system
+        # paths so we can do manual searching with cc.find_library() later.
+        env = os.environ.copy()
+        env['PKG_CONFIG_ALLOW_SYSTEM_LIBS'] = '1'
+        ret, out, err = self._call_pkgbin(libcmd, env=env)
+        if ret != 0:
+            raise DependencyException('Could not generate libs for %s:\n%s\n' %
+                                      (self.name, err))
+        # Also get the 'raw' output without -Lfoo system paths for adding -L
+        # args with -lfoo when a library can't be found, and also in
+        # gnome.generate_gir + gnome.gtkdoc which need -L -l arguments.
+        ret, out_raw, err_raw = self._call_pkgbin(libcmd)
+        if ret != 0:
+            raise DependencyException('Could not generate libs for %s:\n\n%s' %
+                                      (self.name, out_raw))
+        self.link_args, self.raw_link_args = self._search_libs(out, out_raw)
+
+    def get_pkgconfig_variable(self, variable_name, kwargs):
+        options = ['--variable=' + variable_name, self.name]
+
+        if 'define_variable' in kwargs:
+            definition = kwargs.get('define_variable', [])
+            if not isinstance(definition, list):
+                raise DependencyException('define_variable takes a list')
+
+            if len(definition) != 2 or not all(isinstance(i, str) for i in definition):
+                raise DependencyException('define_variable must be made up of 2 strings for VARIABLENAME and VARIABLEVALUE')
+
+            options = ['--define-variable=' + '='.join(definition)] + options
+
+        ret, out, err = self._call_pkgbin(options)
+        variable = ''
+        if ret != 0:
+            if self.required:
+                raise DependencyException('dependency %s not found:\n%s\n' %
+                                          (self.name, err))
+        else:
+            variable = out.strip()
+
+            # pkg-config doesn't distinguish between empty and non-existent variables
+            # use the variable list to check for variable existence
+            if not variable:
+                ret, out, _ = self._call_pkgbin(['--print-variables', self.name])
+                if not re.search(r'^' + variable_name + r'$', out, re.MULTILINE):
+                    if 'default' in kwargs:
+                        variable = kwargs['default']
+                    else:
+                        mlog.warning("pkgconfig variable '%s' not defined for dependency %s." % (variable_name, self.name))
+
+        mlog.debug('Got pkgconfig variable %s : %s' % (variable_name, variable))
+        return variable
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG]
+
+    def check_pkgconfig(self, pkgbin):
+        if not pkgbin.found():
+            mlog.log('Did not find pkg-config by name {!r}'.format(pkgbin.name))
+            return None
+        try:
+            p, out = Popen_safe(pkgbin.get_command() + ['--version'])[0:2]
+            if p.returncode != 0:
+                mlog.warning('Found pkg-config {!r} but it failed when run'
+                             ''.format(' '.join(pkgbin.get_command())))
+                return None
+        except FileNotFoundError:
+            mlog.warning('We thought we found pkg-config {!r} but now it\'s not there. How odd!'
+                         ''.format(' '.join(pkgbin.get_command())))
+            return None
+        except PermissionError:
+            msg = 'Found pkg-config {!r} but didn\'t have permissions to run it.'.format(' '.join(pkgbin.get_command()))
+            if not mesonlib.is_windows():
+                msg += '\n\nOn Unix-like systems this is often caused by scripts that are not executable.'
+            mlog.warning(msg)
+            return None
+        return out.strip()
+
+    def extract_field(self, la_file, fieldname):
+        with open(la_file) as f:
+            for line in f:
+                arr = line.strip().split('=')
+                if arr[0] == fieldname:
+                    return arr[1][1:-1]
+        return None
+
+    def extract_dlname_field(self, la_file):
+        return self.extract_field(la_file, 'dlname')
+
+    def extract_libdir_field(self, la_file):
+        return self.extract_field(la_file, 'libdir')
+
+    def extract_libtool_shlib(self, la_file):
+        '''
+        Returns the path to the shared library
+        corresponding to this .la file
+        '''
+        dlname = self.extract_dlname_field(la_file)
+        if dlname is None:
+            return None
+
+        # Darwin uses absolute paths where possible; since the libtool files never
+        # contain absolute paths, use the libdir field
+        if mesonlib.is_osx():
+            dlbasename = os.path.basename(dlname)
+            libdir = self.extract_libdir_field(la_file)
+            if libdir is None:
+                return dlbasename
+            return os.path.join(libdir, dlbasename)
+        # From the comments in extract_libtool(), older libtools had
+        # a path rather than the raw dlname
+        return os.path.basename(dlname)
+
+    def log_tried(self):
+        return self.type_name
+
+    def get_variable(self, *, cmake: T.Optional[str] = None, pkgconfig: T.Optional[str] = None,
+                     configtool: T.Optional[str] = None, internal: T.Optional[str] = None,
+                     default_value: T.Optional[str] = None,
+                     pkgconfig_define: T.Optional[T.List[str]] = None) -> T.Union[str, T.List[str]]:
+        if pkgconfig:
+            kwargs = {}
+            if default_value is not None:
+                kwargs['default'] = default_value
+            if pkgconfig_define is not None:
+                kwargs['define_variable'] = pkgconfig_define
+            try:
+                return self.get_pkgconfig_variable(pkgconfig, kwargs)
+            except DependencyException:
+                pass
+        if default_value is not None:
+            return default_value
+        raise DependencyException('Could not get pkg-config variable and no default provided for {!r}'.format(self))
+
+class CMakeDependency(ExternalDependency):
+    # The class's copy of the CMake path. Avoids having to search for it
+    # multiple times in the same Meson invocation.
+    class_cmakeinfo = PerMachine(None, None)
+    # Version string for the minimum CMake version
+    class_cmake_version = '>=3.4'
+    # CMake generators to try (empty for no generator)
+    class_cmake_generators = ['', 'Ninja', 'Unix Makefiles', 'Visual Studio 10 2010']
+    class_working_generator = None
+
+    def _gen_exception(self, msg):
+        return DependencyException('Dependency {} not found: {}'.format(self.name, msg))
+
+    def _main_cmake_file(self) -> str:
+        return 'CMakeLists.txt'
+
+    def _extra_cmake_opts(self) -> T.List[str]:
+        return []
+
+    def _map_module_list(self, modules: T.List[T.Tuple[str, bool]], components: T.List[T.Tuple[str, bool]]) -> T.List[T.Tuple[str, bool]]:
+        # Map the input module list to something else
+        # This function will only be executed AFTER the initial CMake
+        # interpreter pass has completed. Thus variables defined in the
+        # CMakeLists.txt can be accessed here.
+        #
+        # Both the modules and components inputs contain the original lists.
+        return modules
+
+    def _map_component_list(self, modules: T.List[T.Tuple[str, bool]], components: T.List[T.Tuple[str, bool]]) -> T.List[T.Tuple[str, bool]]:
+        # Map the input components list to something else. This
+        # function will be executed BEFORE the initial CMake interpreter
+        # pass. Thus variables from the CMakeLists.txt can NOT be accessed.
+        #
+        # Both the modules and components inputs contain the original lists.
+        return components
+
+    def _original_module_name(self, module: str) -> str:
+        # Reverse the module mapping done by _map_module_list for
+        # one module
+        return module
+
+    def __init__(self, name: str, environment: Environment, kwargs, language: T.Optional[str] = None):
+        # Gather a list of all languages to support
+        self.language_list = []  # type: T.List[str]
+        if language is None:
+            compilers = None
+            if kwargs.get('native', False):
+                compilers = environment.coredata.compilers.build
+            else:
+                compilers = environment.coredata.compilers.host
+
+            candidates = ['c', 'cpp', 'fortran', 'objc', 'objcxx']
+            self.language_list += [x for x in candidates if x in compilers]
+        else:
+            self.language_list += [language]
+
+        # Add additional languages if required
+        if 'fortran' in self.language_list:
+            self.language_list += ['c']
+
+        # Ensure that the list is unique
+        self.language_list = list(set(self.language_list))
+
+        super().__init__('cmake', environment, kwargs, language=language)
+        self.name = name
+        self.is_libtool = False
+        # Store a copy of the CMake path on the object itself so it is
+        # stored in the pickled coredata and recovered.
+        self.cmakebin = None
+        self.cmakeinfo = None
+
+        # Where all CMake "build dirs" are located
+        self.cmake_root_dir = environment.scratch_dir
+
+        # T.List of successfully found modules
+        self.found_modules = []
+
+        # Initialize with None before the first return to avoid
+        # AttributeError exceptions in derived classes
+        self.traceparser = None  # type: CMakeTraceParser
+
+        self.cmakebin = CMakeExecutor(environment, CMakeDependency.class_cmake_version, self.for_machine, silent=self.silent)
+        if not self.cmakebin.found():
+            self.cmakebin = None
+            msg = 'No CMake binary for machine %s not found. Giving up.' % self.for_machine
+            if self.required:
+                raise DependencyException(msg)
+            mlog.debug(msg)
+            return
+
+        # Setup the trace parser
+        self.traceparser = CMakeTraceParser(self.cmakebin.version(), self._get_build_dir())
+
+        cm_args = stringlistify(extract_as_list(kwargs, 'cmake_args'))
+        if CMakeDependency.class_cmakeinfo[self.for_machine] is None:
+            CMakeDependency.class_cmakeinfo[self.for_machine] = self._get_cmake_info(cm_args)
+        self.cmakeinfo = CMakeDependency.class_cmakeinfo[self.for_machine]
+        if self.cmakeinfo is None:
+            raise self._gen_exception('Unable to obtain CMake system information')
+
+        components = [(x, True) for x in stringlistify(extract_as_list(kwargs, 'components'))]
+        modules = [(x, True) for x in stringlistify(extract_as_list(kwargs, 'modules'))]
+        modules += [(x, False) for x in stringlistify(extract_as_list(kwargs, 'optional_modules'))]
+        cm_path = stringlistify(extract_as_list(kwargs, 'cmake_module_path'))
+        cm_path = [x if os.path.isabs(x) else os.path.join(environment.get_source_dir(), x) for x in cm_path]
+        if cm_path:
+            cm_args.append('-DCMAKE_MODULE_PATH=' + ';'.join(cm_path))
+        if not self._preliminary_find_check(name, cm_path, self.cmakebin.get_cmake_prefix_paths(), environment.machines[self.for_machine]):
+            mlog.debug('Preliminary CMake check failed. Aborting.')
+            return
+        self._detect_dep(name, modules, components, cm_args)
+
+    def __repr__(self):
+        s = '<{0} {1}: {2} {3}>'
+        return s.format(self.__class__.__name__, self.name, self.is_found,
+                        self.version_reqs)
+
+    def _get_cmake_info(self, cm_args):
+        mlog.debug("Extracting basic cmake information")
+        res = {}
+
+        # Try different CMake generators since specifying no generator may fail
+        # in cygwin for some reason
+        gen_list = []
+        # First try the last working generator
+        if CMakeDependency.class_working_generator is not None:
+            gen_list += [CMakeDependency.class_working_generator]
+        gen_list += CMakeDependency.class_cmake_generators
+
+        temp_parser = CMakeTraceParser(self.cmakebin.version(), self._get_build_dir())
+
+        for i in gen_list:
+            mlog.debug('Try CMake generator: {}'.format(i if len(i) > 0 else 'auto'))
+
+            # Prepare options
+            cmake_opts = temp_parser.trace_args() + ['.']
+            cmake_opts += cm_args
+            if len(i) > 0:
+                cmake_opts = ['-G', i] + cmake_opts
+
+            # Run CMake
+            ret1, out1, err1 = self._call_cmake(cmake_opts, 'CMakePathInfo.txt')
+
+            # Current generator was successful
+            if ret1 == 0:
+                CMakeDependency.class_working_generator = i
+                break
+
+            mlog.debug('CMake failed to gather system information for generator {} with error code {}'.format(i, ret1))
+            mlog.debug('OUT:\n{}\n\n\nERR:\n{}\n\n'.format(out1, err1))
+
+        # Check if any generator succeeded
+        if ret1 != 0:
+            return None
+
+        try:
+            temp_parser.parse(err1)
+        except MesonException:
+            return None
+
+        def process_paths(l: T.List[str]) -> T.Set[str]:
+            l = [x.split(':') for x in l]
+            l = [x for sublist in l for x in sublist]
+            return set(l)
+
+        # Extract the variables and sanity check them
+        root_paths = process_paths(temp_parser.get_cmake_var('MESON_FIND_ROOT_PATH'))
+        root_paths.update(process_paths(temp_parser.get_cmake_var('MESON_CMAKE_SYSROOT')))
+        root_paths = sorted(root_paths)
+        root_paths = list(filter(lambda x: os.path.isdir(x), root_paths))
+        module_paths = process_paths(temp_parser.get_cmake_var('MESON_PATHS_LIST'))
+        rooted_paths = []
+        for j in [Path(x) for x in root_paths]:
+            for i in [Path(x) for x in module_paths]:
+                rooted_paths.append(str(j / i.relative_to(i.anchor)))
+        module_paths = sorted(module_paths.union(rooted_paths))
+        module_paths = list(filter(lambda x: os.path.isdir(x), module_paths))
+        archs = temp_parser.get_cmake_var('MESON_ARCH_LIST')
+
+        common_paths = ['lib', 'lib32', 'lib64', 'libx32', 'share']
+        for i in archs:
+            common_paths += [os.path.join('lib', i)]
+
+        res = {
+            'module_paths': module_paths,
+            'cmake_root': temp_parser.get_cmake_var('MESON_CMAKE_ROOT')[0],
+            'archs': archs,
+            'common_paths': common_paths
+        }
+
+        mlog.debug('  -- Module search paths:    {}'.format(res['module_paths']))
+        mlog.debug('  -- CMake root:             {}'.format(res['cmake_root']))
+        mlog.debug('  -- CMake architectures:    {}'.format(res['archs']))
+        mlog.debug('  -- CMake lib search paths: {}'.format(res['common_paths']))
+
+        return res
+
+    @staticmethod
+    @functools.lru_cache(maxsize=None)
+    def _cached_listdir(path: str) -> T.Tuple[T.Tuple[str, str]]:
+        try:
+            return tuple((x, str(x).lower()) for x in os.listdir(path))
+        except OSError:
+            return ()
+
+    @staticmethod
+    @functools.lru_cache(maxsize=None)
+    def _cached_isdir(path: str) -> bool:
+        try:
+            return os.path.isdir(path)
+        except OSError:
+            return False
+
+    def _preliminary_find_check(self, name: str, module_path: T.List[str], prefix_path: T.List[str], machine: MachineInfo) -> bool:
+        lname = str(name).lower()
+
+        # Checks , /cmake, /CMake
+        def find_module(path: str) -> bool:
+            for i in [path, os.path.join(path, 'cmake'), os.path.join(path, 'CMake')]:
+                if not self._cached_isdir(i):
+                    continue
+
+                # Check the directory case insensitve
+                content = self._cached_listdir(i)
+                candidates = ['Find{}.cmake', '{}Config.cmake', '{}-config.cmake']
+                candidates = [x.format(name).lower() for x in candidates]
+                if any([x[1] in candidates for x in content]):
+                    return True
+            return False
+
+        # Search in /(lib/|lib*|share) for cmake files
+        def search_lib_dirs(path: str) -> bool:
+            for i in [os.path.join(path, x) for x in self.cmakeinfo['common_paths']]:
+                if not self._cached_isdir(i):
+                    continue
+
+                # Check /(lib/|lib*|share)/cmake/*/
+                cm_dir = os.path.join(i, 'cmake')
+                if self._cached_isdir(cm_dir):
+                    content = self._cached_listdir(cm_dir)
+                    content = list(filter(lambda x: x[1].startswith(lname), content))
+                    for k in content:
+                        if find_module(os.path.join(cm_dir, k[0])):
+                            return True
+
+                # /(lib/|lib*|share)/*/
+                # /(lib/|lib*|share)/*/(cmake|CMake)/
+                content = self._cached_listdir(i)
+                content = list(filter(lambda x: x[1].startswith(lname), content))
+                for k in content:
+                    if find_module(os.path.join(i, k[0])):
+                        return True
+
+            return False
+
+        # Check the user provided and system module paths
+        for i in module_path + [os.path.join(self.cmakeinfo['cmake_root'], 'Modules')]:
+            if find_module(i):
+                return True
+
+        # Check the user provided prefix paths
+        for i in prefix_path:
+            if search_lib_dirs(i):
+                return True
+
+        # Check the system paths
+        for i in self.cmakeinfo['module_paths']:
+            if find_module(i):
+                return True
+
+            if search_lib_dirs(i):
+                return True
+
+            content = self._cached_listdir(i)
+            content = list(filter(lambda x: x[1].startswith(lname), content))
+            for k in content:
+                if search_lib_dirs(os.path.join(i, k[0])):
+                    return True
+
+            # Mac framework support
+            if machine.is_darwin():
+                for j in ['{}.framework', '{}.app']:
+                    j = j.format(lname)
+                    if j in content:
+                        if find_module(os.path.join(i, j[0], 'Resources')) or find_module(os.path.join(i, j[0], 'Version')):
+                            return True
+
+        # Check the environment path
+        env_path = os.environ.get('{}_DIR'.format(name))
+        if env_path and find_module(env_path):
+            return True
+
+        return False
+
+    def _detect_dep(self, name: str, modules: T.List[T.Tuple[str, bool]], components: T.List[T.Tuple[str, bool]], args: T.List[str]):
+        # Detect a dependency with CMake using the '--find-package' mode
+        # and the trace output (stderr)
+        #
+        # When the trace output is enabled CMake prints all functions with
+        # parameters to stderr as they are executed. Since CMake 3.4.0
+        # variables ("${VAR}") are also replaced in the trace output.
+        mlog.debug('\nDetermining dependency {!r} with CMake executable '
+                   '{!r}'.format(name, self.cmakebin.executable_path()))
+
+        # Try different CMake generators since specifying no generator may fail
+        # in cygwin for some reason
+        gen_list = []
+        # First try the last working generator
+        if CMakeDependency.class_working_generator is not None:
+            gen_list += [CMakeDependency.class_working_generator]
+        gen_list += CMakeDependency.class_cmake_generators
+
+        # Map the components
+        comp_mapped = self._map_component_list(modules, components)
+
+        for i in gen_list:
+            mlog.debug('Try CMake generator: {}'.format(i if len(i) > 0 else 'auto'))
+
+            # Prepare options
+            cmake_opts = []
+            cmake_opts += ['-DNAME={}'.format(name)]
+            cmake_opts += ['-DARCHS={}'.format(';'.join(self.cmakeinfo['archs']))]
+            cmake_opts += ['-DCOMPS={}'.format(';'.join([x[0] for x in comp_mapped]))]
+            cmake_opts += args
+            cmake_opts += self.traceparser.trace_args()
+            cmake_opts += self._extra_cmake_opts()
+            cmake_opts += ['.']
+            if len(i) > 0:
+                cmake_opts = ['-G', i] + cmake_opts
+
+            # Run CMake
+            ret1, out1, err1 = self._call_cmake(cmake_opts, self._main_cmake_file())
+
+            # Current generator was successful
+            if ret1 == 0:
+                CMakeDependency.class_working_generator = i
+                break
+
+            mlog.debug('CMake failed for generator {} and package {} with error code {}'.format(i, name, ret1))
+            mlog.debug('OUT:\n{}\n\n\nERR:\n{}\n\n'.format(out1, err1))
+
+        # Check if any generator succeeded
+        if ret1 != 0:
+            return
+
+        try:
+            self.traceparser.parse(err1)
+        except CMakeException as e:
+            e = self._gen_exception(str(e))
+            if self.required:
+                raise
+            else:
+                self.compile_args = []
+                self.link_args = []
+                self.is_found = False
+                self.reason = e
+                return
+
+        # Whether the package is found or not is always stored in PACKAGE_FOUND
+        self.is_found = self.traceparser.var_to_bool('PACKAGE_FOUND')
+        if not self.is_found:
+            return
+
+        # Try to detect the version
+        vers_raw = self.traceparser.get_cmake_var('PACKAGE_VERSION')
+
+        if len(vers_raw) > 0:
+            self.version = vers_raw[0]
+            self.version.strip('"\' ')
+
+        # Post-process module list. Used in derived classes to modify the
+        # module list (append prepend a string, etc.).
+        modules = self._map_module_list(modules, components)
+        autodetected_module_list = False
+
+        # Try guessing a CMake target if none is provided
+        if len(modules) == 0:
+            for i in self.traceparser.targets:
+                tg = i.lower()
+                lname = name.lower()
+                if '{}::{}'.format(lname, lname) == tg or lname == tg.replace('::', ''):
+                    mlog.debug('Guessed CMake target \'{}\''.format(i))
+                    modules = [(i, True)]
+                    autodetected_module_list = True
+                    break
+
+        # Failed to guess a target --> try the old-style method
+        if len(modules) == 0:
+            incDirs = [x for x in self.traceparser.get_cmake_var('PACKAGE_INCLUDE_DIRS') if x]
+            defs = [x for x in self.traceparser.get_cmake_var('PACKAGE_DEFINITIONS') if x]
+            libs = [x for x in self.traceparser.get_cmake_var('PACKAGE_LIBRARIES') if x]
+
+            # Try to use old style variables if no module is specified
+            if len(libs) > 0:
+                self.compile_args = list(map(lambda x: '-I{}'.format(x), incDirs)) + defs
+                self.link_args = libs
+                mlog.debug('using old-style CMake variables for dependency {}'.format(name))
+                mlog.debug('Include Dirs:         {}'.format(incDirs))
+                mlog.debug('Compiler Definitions: {}'.format(defs))
+                mlog.debug('Libraries:            {}'.format(libs))
+                return
+
+            # Even the old-style approach failed. Nothing else we can do here
+            self.is_found = False
+            raise self._gen_exception('CMake: failed to guess a CMake target for {}.\n'
+                                      'Try to explicitly specify one or more targets with the "modules" property.\n'
+                                      'Valid targets are:\n{}'.format(name, list(self.traceparser.targets.keys())))
+
+        # Set dependencies with CMake targets
+        # recognise arguments we should pass directly to the linker
+        reg_is_lib = re.compile(r'^(-l[a-zA-Z0-9_]+|-pthread|-delayload:[a-zA-Z0-9_\.]+|[a-zA-Z0-9_]+\.lib)$')
+        reg_is_maybe_bare_lib = re.compile(r'^[a-zA-Z0-9_]+$')
+        processed_targets = []
+        incDirs = []
+        compileDefinitions = []
+        compileOptions = []
+        libraries = []
+        for i, required in modules:
+            if i not in self.traceparser.targets:
+                if not required:
+                    mlog.warning('CMake: T.Optional module', mlog.bold(self._original_module_name(i)), 'for', mlog.bold(name), 'was not found')
+                    continue
+                raise self._gen_exception('CMake: invalid module {} for {}.\n'
+                                          'Try to explicitly specify one or more targets with the "modules" property.\n'
+                                          'Valid targets are:\n{}'.format(self._original_module_name(i), name, list(self.traceparser.targets.keys())))
+
+            targets = [i]
+            if not autodetected_module_list:
+                self.found_modules += [i]
+
+            while len(targets) > 0:
+                curr = targets.pop(0)
+
+                # Skip already processed targets
+                if curr in processed_targets:
+                    continue
+
+                tgt = self.traceparser.targets[curr]
+                cfgs = []
+                cfg = ''
+                otherDeps = []
+                mlog.debug(tgt)
+
+                if 'INTERFACE_INCLUDE_DIRECTORIES' in tgt.properties:
+                    incDirs += [x for x in tgt.properties['INTERFACE_INCLUDE_DIRECTORIES'] if x]
+
+                if 'INTERFACE_COMPILE_DEFINITIONS' in tgt.properties:
+                    compileDefinitions += ['-D' + re.sub('^-D', '', x) for x in tgt.properties['INTERFACE_COMPILE_DEFINITIONS'] if x]
+
+                if 'INTERFACE_COMPILE_OPTIONS' in tgt.properties:
+                    compileOptions += [x for x in tgt.properties['INTERFACE_COMPILE_OPTIONS'] if x]
+
+                if 'IMPORTED_CONFIGURATIONS' in tgt.properties:
+                    cfgs = [x for x in tgt.properties['IMPORTED_CONFIGURATIONS'] if x]
+                    cfg = cfgs[0]
+
+                if 'b_vscrt' in self.env.coredata.base_options:
+                    is_debug = self.env.coredata.get_builtin_option('buildtype') == 'debug'
+                    if self.env.coredata.base_options['b_vscrt'].value in ('mdd', 'mtd'):
+                        is_debug = True
+                else:
+                    is_debug = self.env.coredata.get_builtin_option('debug')
+                if is_debug:
+                    if 'DEBUG' in cfgs:
+                        cfg = 'DEBUG'
+                    elif 'RELEASE' in cfgs:
+                        cfg = 'RELEASE'
+                else:
+                    if 'RELEASE' in cfgs:
+                        cfg = 'RELEASE'
+
+                if 'IMPORTED_IMPLIB_{}'.format(cfg) in tgt.properties:
+                    libraries += [x for x in tgt.properties['IMPORTED_IMPLIB_{}'.format(cfg)] if x]
+                elif 'IMPORTED_IMPLIB' in tgt.properties:
+                    libraries += [x for x in tgt.properties['IMPORTED_IMPLIB'] if x]
+                elif 'IMPORTED_LOCATION_{}'.format(cfg) in tgt.properties:
+                    libraries += [x for x in tgt.properties['IMPORTED_LOCATION_{}'.format(cfg)] if x]
+                elif 'IMPORTED_LOCATION' in tgt.properties:
+                    libraries += [x for x in tgt.properties['IMPORTED_LOCATION'] if x]
+
+                if 'INTERFACE_LINK_LIBRARIES' in tgt.properties:
+                    otherDeps += [x for x in tgt.properties['INTERFACE_LINK_LIBRARIES'] if x]
+
+                if 'IMPORTED_LINK_DEPENDENT_LIBRARIES_{}'.format(cfg) in tgt.properties:
+                    otherDeps += [x for x in tgt.properties['IMPORTED_LINK_DEPENDENT_LIBRARIES_{}'.format(cfg)] if x]
+                elif 'IMPORTED_LINK_DEPENDENT_LIBRARIES' in tgt.properties:
+                    otherDeps += [x for x in tgt.properties['IMPORTED_LINK_DEPENDENT_LIBRARIES'] if x]
+
+                for j in otherDeps:
+                    if j in self.traceparser.targets:
+                        targets += [j]
+                    elif reg_is_lib.match(j):
+                        libraries += [j]
+                    elif os.path.isabs(j) and os.path.exists(j):
+                        libraries += [j]
+                    elif mesonlib.is_windows() and reg_is_maybe_bare_lib.match(j):
+                        # On Windows, CMake library dependencies can be passed as bare library names,
+                        # e.g. 'version' should translate into 'version.lib'. CMake brute-forces a
+                        # combination of prefix/suffix combinations to find the right library, however
+                        # as we do not have a compiler environment available to us, we cannot do the
+                        # same, but must assume any bare argument passed which is not also a CMake
+                        # target must be a system library we should try to link against
+                        libraries += ["{}.lib".format(j)]
+                    else:
+                        mlog.warning('CMake: Dependency', mlog.bold(j), 'for', mlog.bold(name), 'target', mlog.bold(self._original_module_name(curr)), 'was not found')
+
+                processed_targets += [curr]
+
+        # Make sure all elements in the lists are unique and sorted
+        incDirs = sorted(set(incDirs))
+        compileDefinitions = sorted(set(compileDefinitions))
+        compileOptions = sorted(set(compileOptions))
+        libraries = sorted(set(libraries))
+
+        mlog.debug('Include Dirs:         {}'.format(incDirs))
+        mlog.debug('Compiler Definitions: {}'.format(compileDefinitions))
+        mlog.debug('Compiler Options:     {}'.format(compileOptions))
+        mlog.debug('Libraries:            {}'.format(libraries))
+
+        self.compile_args = compileOptions + compileDefinitions + ['-I{}'.format(x) for x in incDirs]
+        self.link_args = libraries
+
+    def _get_build_dir(self) -> str:
+        build_dir = Path(self.cmake_root_dir) / 'cmake_{}'.format(self.name)
+        build_dir.mkdir(parents=True, exist_ok=True)
+        return str(build_dir)
+
+    def _setup_cmake_dir(self, cmake_file: str) -> str:
+        # Setup the CMake build environment and return the "build" directory
+        build_dir = self._get_build_dir()
+
+        # Insert language parameters into the CMakeLists.txt and write new CMakeLists.txt
+        cmake_txt = mesondata['dependencies/data/' + cmake_file].data
+
+        # In general, some Fortran CMake find_package() also require C language enabled,
+        # even if nothing from C is directly used. An easy Fortran example that fails
+        # without C language is
+        #   find_package(Threads)
+        # To make this general to
+        # any other language that might need this, we use a list for all
+        # languages and expand in the cmake Project(... LANGUAGES ...) statement.
+        from ..cmake import language_map
+        cmake_language = [language_map[x] for x in self.language_list if x in language_map]
+        if not cmake_language:
+            cmake_language += ['NONE']
+
+        cmake_txt = """
+cmake_minimum_required(VERSION ${{CMAKE_VERSION}})
+project(MesonTemp LANGUAGES {})
+""".format(' '.join(cmake_language)) + cmake_txt
+
+        cm_file = Path(build_dir) / 'CMakeLists.txt'
+        cm_file.write_text(cmake_txt)
+        mlog.cmd_ci_include(cm_file.absolute().as_posix())
+
+        return build_dir
+
+    def _call_cmake(self, args, cmake_file: str, env=None):
+        build_dir = self._setup_cmake_dir(cmake_file)
+        return self.cmakebin.call_with_fake_build(args, build_dir, env=env)
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.CMAKE]
+
+    def log_tried(self):
+        return self.type_name
+
+    def log_details(self) -> str:
+        modules = [self._original_module_name(x) for x in self.found_modules]
+        modules = sorted(set(modules))
+        if modules:
+            return 'modules: ' + ', '.join(modules)
+        return ''
+
+    def get_variable(self, *, cmake: T.Optional[str] = None, pkgconfig: T.Optional[str] = None,
+                     configtool: T.Optional[str] = None, internal: T.Optional[str] = None,
+                     default_value: T.Optional[str] = None,
+                     pkgconfig_define: T.Optional[T.List[str]] = None) -> T.Union[str, T.List[str]]:
+        if cmake and self.traceparser is not None:
+            try:
+                v = self.traceparser.vars[cmake]
+            except KeyError:
+                pass
+            else:
+                if len(v) == 1:
+                    return v[0]
+                elif v:
+                    return v
+        if default_value is not None:
+            return default_value
+        raise DependencyException('Could not get cmake variable and no default provided for {!r}'.format(self))
+
+class DubDependency(ExternalDependency):
+    class_dubbin = None
+
+    def __init__(self, name, environment, kwargs):
+        super().__init__('dub', environment, kwargs, language='d')
+        self.name = name
+        self.compiler = super().get_compiler()
+        self.module_path = None
+
+        if 'required' in kwargs:
+            self.required = kwargs.get('required')
+
+        if DubDependency.class_dubbin is None:
+            self.dubbin = self._check_dub()
+            DubDependency.class_dubbin = self.dubbin
+        else:
+            self.dubbin = DubDependency.class_dubbin
+
+        if not self.dubbin:
+            if self.required:
+                raise DependencyException('DUB not found.')
+            self.is_found = False
+            return
+
+        mlog.debug('Determining dependency {!r} with DUB executable '
+                   '{!r}'.format(name, self.dubbin.get_path()))
+
+        # we need to know the target architecture
+        arch = self.compiler.arch
+
+        # Ask dub for the package
+        ret, res = self._call_dubbin(['describe', name, '--arch=' + arch])
+
+        if ret != 0:
+            self.is_found = False
+            return
+
+        comp = self.compiler.get_id().replace('llvm', 'ldc').replace('gcc', 'gdc')
+        packages = []
+        description = json.loads(res)
+        for package in description['packages']:
+            packages.append(package['name'])
+            if package['name'] == name:
+                self.is_found = True
+
+                not_lib = True
+                if 'targetType' in package:
+                    if package['targetType'] in ['library', 'sourceLibrary', 'staticLibrary', 'dynamicLibrary']:
+                        not_lib = False
+
+                if not_lib:
+                    mlog.error(mlog.bold(name), "found but it isn't a library")
+                    self.is_found = False
+                    return
+
+                self.module_path = self._find_right_lib_path(package['path'], comp, description, True, package['targetFileName'])
+                if not os.path.exists(self.module_path):
+                    # check if the dependency was built for other archs
+                    archs = [['x86_64'], ['x86'], ['x86', 'x86_mscoff']]
+                    for a in archs:
+                        description_a = copy.deepcopy(description)
+                        description_a['architecture'] = a
+                        arch_module_path = self._find_right_lib_path(package['path'], comp, description_a, True, package['targetFileName'])
+                        if arch_module_path:
+                            mlog.error(mlog.bold(name), "found but it wasn't compiled for", mlog.bold(arch))
+                            self.is_found = False
+                            return
+
+                    mlog.error(mlog.bold(name), "found but it wasn't compiled with", mlog.bold(comp))
+                    self.is_found = False
+                    return
+
+                self.version = package['version']
+                self.pkg = package
+
+        if self.pkg['targetFileName'].endswith('.a'):
+            self.static = True
+
+        self.compile_args = []
+        for flag in self.pkg['dflags']:
+            self.link_args.append(flag)
+        for path in self.pkg['importPaths']:
+            self.compile_args.append('-I' + os.path.join(self.pkg['path'], path))
+
+        self.link_args = self.raw_link_args = []
+        for flag in self.pkg['lflags']:
+            self.link_args.append(flag)
+
+        self.link_args.append(os.path.join(self.module_path, self.pkg['targetFileName']))
+
+        # Handle dependencies
+        libs = []
+
+        def add_lib_args(field_name, target):
+            if field_name in target['buildSettings']:
+                for lib in target['buildSettings'][field_name]:
+                    if lib not in libs:
+                        libs.append(lib)
+                        if os.name != 'nt':
+                            pkgdep = PkgConfigDependency(lib, environment, {'required': 'true', 'silent': 'true'})
+                            for arg in pkgdep.get_compile_args():
+                                self.compile_args.append(arg)
+                            for arg in pkgdep.get_link_args():
+                                self.link_args.append(arg)
+                            for arg in pkgdep.get_link_args(raw=True):
+                                self.raw_link_args.append(arg)
+
+        for target in description['targets']:
+            if target['rootPackage'] in packages:
+                add_lib_args('libs', target)
+                add_lib_args('libs-{}'.format(platform.machine()), target)
+                for file in target['buildSettings']['linkerFiles']:
+                    lib_path = self._find_right_lib_path(file, comp, description)
+                    if lib_path:
+                        self.link_args.append(lib_path)
+                    else:
+                        self.is_found = False
+
+    def get_compiler(self):
+        return self.compiler
+
+    def _find_right_lib_path(self, default_path, comp, description, folder_only=False, file_name=''):
+        module_path = lib_file_name = ''
+        if folder_only:
+            module_path = default_path
+            lib_file_name = file_name
+        else:
+            module_path = os.path.dirname(default_path)
+            lib_file_name = os.path.basename(default_path)
+        module_build_path = os.path.join(module_path, '.dub', 'build')
+
+        # If default_path is a path to lib file and
+        # directory of lib don't have subdir '.dub/build'
+        if not os.path.isdir(module_build_path) and os.path.isfile(default_path):
+            if folder_only:
+                return module_path
+            else:
+                return default_path
+
+        # Get D version implemented in the compiler
+        # gdc doesn't support this
+        ret, res = self._call_dubbin(['--version'])
+
+        if ret != 0:
+            mlog.error('Failed to run {!r}', mlog.bold(comp))
+            return
+
+        d_ver = re.search('v[0-9].[0-9][0-9][0-9].[0-9]', res) # Ex.: v2.081.2
+        if d_ver is not None:
+            d_ver = d_ver.group().rsplit('.', 1)[0].replace('v', '').replace('.', '') # Fix structure. Ex.: 2081
+        else:
+            d_ver = '' # gdc
+
+        if not os.path.isdir(module_build_path):
+            return ''
+
+        # Ex.: library-debug-linux.posix-x86_64-ldc_2081-EF934983A3319F8F8FF2F0E107A363BA
+        build_name = '-{}-{}-{}-{}_{}'.format(description['buildType'], '.'.join(description['platform']), '.'.join(description['architecture']), comp, d_ver)
+        for entry in os.listdir(module_build_path):
+            if build_name in entry:
+                for file in os.listdir(os.path.join(module_build_path, entry)):
+                    if file == lib_file_name:
+                        if folder_only:
+                            return os.path.join(module_build_path, entry)
+                        else:
+                            return os.path.join(module_build_path, entry, lib_file_name)
+
+        return ''
+
+    def _call_dubbin(self, args, env=None):
+        p, out = Popen_safe(self.dubbin.get_command() + args, env=env)[0:2]
+        return p.returncode, out.strip()
+
+    def _call_copmbin(self, args, env=None):
+        p, out = Popen_safe(self.compiler.get_exelist() + args, env=env)[0:2]
+        return p.returncode, out.strip()
+
+    def _check_dub(self):
+        dubbin = ExternalProgram('dub', silent=True)
+        if dubbin.found():
+            try:
+                p, out = Popen_safe(dubbin.get_command() + ['--version'])[0:2]
+                if p.returncode != 0:
+                    mlog.warning('Found dub {!r} but couldn\'t run it'
+                                 ''.format(' '.join(dubbin.get_command())))
+                    # Set to False instead of None to signify that we've already
+                    # searched for it and not found it
+                    dubbin = False
+            except (FileNotFoundError, PermissionError):
+                dubbin = False
+        else:
+            dubbin = False
+        if dubbin:
+            mlog.log('Found DUB:', mlog.bold(dubbin.get_path()),
+                     '(%s)' % out.strip())
+        else:
+            mlog.log('Found DUB:', mlog.red('NO'))
+        return dubbin
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.DUB]
+
+class ExternalProgram:
+    windows_exts = ('exe', 'msc', 'com', 'bat', 'cmd')
+    # An 'ExternalProgram' always runs on the build machine
+    for_machine = MachineChoice.BUILD
+
+    def __init__(self, name: str, command: T.Optional[T.List[str]] = None,
+                 silent: bool = False, search_dir: T.Optional[str] = None,
+                 extra_search_dirs: T.Optional[T.List[str]] = None):
+        self.name = name
+        if command is not None:
+            self.command = listify(command)
+            if mesonlib.is_windows():
+                cmd = self.command[0]
+                args = self.command[1:]
+                # Check whether the specified cmd is a path to a script, in
+                # which case we need to insert the interpreter. If not, try to
+                # use it as-is.
+                ret = self._shebang_to_cmd(cmd)
+                if ret:
+                    self.command = ret + args
+                else:
+                    self.command = [cmd] + args
+        else:
+            all_search_dirs = [search_dir]
+            if extra_search_dirs:
+                all_search_dirs += extra_search_dirs
+            for d in all_search_dirs:
+                self.command = self._search(name, d)
+                if self.found():
+                    break
+
+        # Set path to be the last item that is actually a file (in order to
+        # skip options in something like ['python', '-u', 'file.py']. If we
+        # can't find any components, default to the last component of the path.
+        self.path = self.command[-1]
+        for i in range(len(self.command) - 1, -1, -1):
+            arg = self.command[i]
+            if arg is not None and os.path.isfile(arg):
+                self.path = arg
+                break
+
+        if not silent:
+            # ignore the warning because derived classes never call this __init__
+            # method, and thus only the found() method of this class is ever executed
+            if self.found():  # lgtm [py/init-calls-subclass]
+                mlog.log('Program', mlog.bold(name), 'found:', mlog.green('YES'),
+                         '(%s)' % ' '.join(self.command))
+            else:
+                mlog.log('Program', mlog.bold(name), 'found:', mlog.red('NO'))
+
+    def __repr__(self) -> str:
+        r = '<{} {!r} -> {!r}>'
+        return r.format(self.__class__.__name__, self.name, self.command)
+
+    def description(self) -> str:
+        '''Human friendly description of the command'''
+        return ' '.join(self.command)
+
+    @classmethod
+    def from_bin_list(cls, env: Environment, for_machine: MachineChoice, name):
+        # There is a static `for_machine` for this class because the binary
+        # aways runs on the build platform. (It's host platform is our build
+        # platform.) But some external programs have a target platform, so this
+        # is what we are specifying here.
+        command = env.lookup_binary_entry(for_machine, name)
+        if command is None:
+            return NonExistingExternalProgram()
+        return cls.from_entry(name, command)
+
+    @staticmethod
+    @functools.lru_cache(maxsize=None)
+    def _windows_sanitize_path(path: str) -> str:
+        # Ensure that we use USERPROFILE even when inside MSYS, MSYS2, Cygwin, etc.
+        if 'USERPROFILE' not in os.environ:
+            return path
+        # The WindowsApps directory is a bit of a problem. It contains
+        # some zero-sized .exe files which have "reparse points", that
+        # might either launch an installed application, or might open
+        # a page in the Windows Store to download the application.
+        #
+        # To handle the case where the python interpreter we're
+        # running on came from the Windows Store, if we see the
+        # WindowsApps path in the search path, replace it with
+        # dirname(sys.executable).
+        appstore_dir = Path(os.environ['USERPROFILE']) / 'AppData' / 'Local' / 'Microsoft' / 'WindowsApps'
+        paths = []
+        for each in path.split(os.pathsep):
+            if Path(each) != appstore_dir:
+                paths.append(each)
+            elif 'WindowsApps' in sys.executable:
+                paths.append(os.path.dirname(sys.executable))
+        return os.pathsep.join(paths)
+
+    @staticmethod
+    def from_entry(name, command):
+        if isinstance(command, list):
+            if len(command) == 1:
+                command = command[0]
+        # We cannot do any searching if the command is a list, and we don't
+        # need to search if the path is an absolute path.
+        if isinstance(command, list) or os.path.isabs(command):
+            return ExternalProgram(name, command=command, silent=True)
+        assert isinstance(command, str)
+        # Search for the command using the specified string!
+        return ExternalProgram(command, silent=True)
+
+    @staticmethod
+    def _shebang_to_cmd(script: str) -> T.Optional[list]:
+        """
+        Check if the file has a shebang and manually parse it to figure out
+        the interpreter to use. This is useful if the script is not executable
+        or if we're on Windows (which does not understand shebangs).
+        """
+        try:
+            with open(script) as f:
+                first_line = f.readline().strip()
+            if first_line.startswith('#!'):
+                # In a shebang, everything before the first space is assumed to
+                # be the command to run and everything after the first space is
+                # the single argument to pass to that command. So we must split
+                # exactly once.
+                commands = first_line[2:].split('#')[0].strip().split(maxsplit=1)
+                if mesonlib.is_windows():
+                    # Windows does not have UNIX paths so remove them,
+                    # but don't remove Windows paths
+                    if commands[0].startswith('/'):
+                        commands[0] = commands[0].split('/')[-1]
+                    if len(commands) > 0 and commands[0] == 'env':
+                        commands = commands[1:]
+                    # Windows does not ship python3.exe, but we know the path to it
+                    if len(commands) > 0 and commands[0] == 'python3':
+                        commands = mesonlib.python_command + commands[1:]
+                elif mesonlib.is_haiku():
+                    # Haiku does not have /usr, but a lot of scripts assume that
+                    # /usr/bin/env always exists. Detect that case and run the
+                    # script with the interpreter after it.
+                    if commands[0] == '/usr/bin/env':
+                        commands = commands[1:]
+                    # We know what python3 is, we're running on it
+                    if len(commands) > 0 and commands[0] == 'python3':
+                        commands = mesonlib.python_command + commands[1:]
+                else:
+                    # Replace python3 with the actual python3 that we are using
+                    if commands[0] == '/usr/bin/env' and commands[1] == 'python3':
+                        commands = mesonlib.python_command + commands[2:]
+                    elif commands[0].split('/')[-1] == 'python3':
+                        commands = mesonlib.python_command + commands[1:]
+                return commands + [script]
+        except Exception as e:
+            mlog.debug(e)
+        mlog.debug('Unusable script {!r}'.format(script))
+        return None
+
+    def _is_executable(self, path):
+        suffix = os.path.splitext(path)[-1].lower()[1:]
+        execmask = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+        if mesonlib.is_windows():
+            if suffix in self.windows_exts:
+                return True
+        elif os.stat(path).st_mode & execmask:
+            return not os.path.isdir(path)
+        return False
+
+    def _search_dir(self, name: str, search_dir: T.Optional[str]) -> T.Optional[list]:
+        if search_dir is None:
+            return None
+        trial = os.path.join(search_dir, name)
+        if os.path.exists(trial):
+            if self._is_executable(trial):
+                return [trial]
+            # Now getting desperate. Maybe it is a script file that is
+            # a) not chmodded executable, or
+            # b) we are on windows so they can't be directly executed.
+            return self._shebang_to_cmd(trial)
+        else:
+            if mesonlib.is_windows():
+                for ext in self.windows_exts:
+                    trial_ext = '{}.{}'.format(trial, ext)
+                    if os.path.exists(trial_ext):
+                        return [trial_ext]
+        return None
+
+    def _search_windows_special_cases(self, name: str, command: str) -> list:
+        '''
+        Lots of weird Windows quirks:
+        1. PATH search for @name returns files with extensions from PATHEXT,
+           but only self.windows_exts are executable without an interpreter.
+        2. @name might be an absolute path to an executable, but without the
+           extension. This works inside MinGW so people use it a lot.
+        3. The script is specified without an extension, in which case we have
+           to manually search in PATH.
+        4. More special-casing for the shebang inside the script.
+        '''
+        if command:
+            # On Windows, even if the PATH search returned a full path, we can't be
+            # sure that it can be run directly if it's not a native executable.
+            # For instance, interpreted scripts sometimes need to be run explicitly
+            # with an interpreter if the file association is not done properly.
+            name_ext = os.path.splitext(command)[1]
+            if name_ext[1:].lower() in self.windows_exts:
+                # Good, it can be directly executed
+                return [command]
+            # Try to extract the interpreter from the shebang
+            commands = self._shebang_to_cmd(command)
+            if commands:
+                return commands
+            return [None]
+        # Maybe the name is an absolute path to a native Windows
+        # executable, but without the extension. This is technically wrong,
+        # but many people do it because it works in the MinGW shell.
+        if os.path.isabs(name):
+            for ext in self.windows_exts:
+                command = '{}.{}'.format(name, ext)
+                if os.path.exists(command):
+                    return [command]
+        # On Windows, interpreted scripts must have an extension otherwise they
+        # cannot be found by a standard PATH search. So we do a custom search
+        # where we manually search for a script with a shebang in PATH.
+        search_dirs = self._windows_sanitize_path(os.environ.get('PATH', '')).split(';')
+        for search_dir in search_dirs:
+            commands = self._search_dir(name, search_dir)
+            if commands:
+                return commands
+        return [None]
+
+    def _search(self, name: str, search_dir: T.Optional[str]) -> list:
+        '''
+        Search in the specified dir for the specified executable by name
+        and if not found search in PATH
+        '''
+        commands = self._search_dir(name, search_dir)
+        if commands:
+            return commands
+        # Do a standard search in PATH
+        path = os.environ.get('PATH', None)
+        if mesonlib.is_windows() and path:
+            path = self._windows_sanitize_path(path)
+        command = shutil.which(name, path=path)
+        if mesonlib.is_windows():
+            return self._search_windows_special_cases(name, command)
+        # On UNIX-like platforms, shutil.which() is enough to find
+        # all executables whether in PATH or with an absolute path
+        return [command]
+
+    def found(self) -> bool:
+        return self.command[0] is not None
+
+    def get_command(self):
+        return self.command[:]
+
+    def get_path(self):
+        return self.path
+
+    def get_name(self):
+        return self.name
+
+
+class NonExistingExternalProgram(ExternalProgram):  # lgtm [py/missing-call-to-init]
+    "A program that will never exist"
+
+    def __init__(self, name='nonexistingprogram'):
+        self.name = name
+        self.command = [None]
+        self.path = None
+
+    def __repr__(self):
+        r = '<{} {!r} -> {!r}>'
+        return r.format(self.__class__.__name__, self.name, self.command)
+
+    def found(self):
+        return False
+
+
+class EmptyExternalProgram(ExternalProgram):  # lgtm [py/missing-call-to-init]
+    '''
+    A program object that returns an empty list of commands. Used for cases
+    such as a cross file exe_wrapper to represent that it's not required.
+    '''
+
+    def __init__(self):
+        self.name = None
+        self.command = []
+        self.path = None
+
+    def __repr__(self):
+        r = '<{} {!r} -> {!r}>'
+        return r.format(self.__class__.__name__, self.name, self.command)
+
+    def found(self):
+        return True
+
+
+class ExternalLibrary(ExternalDependency):
+    def __init__(self, name, link_args, environment, language, silent=False):
+        super().__init__('library', environment, {}, language=language)
+        self.name = name
+        self.language = language
+        self.is_found = False
+        if link_args:
+            self.is_found = True
+            self.link_args = link_args
+        if not silent:
+            if self.is_found:
+                mlog.log('Library', mlog.bold(name), 'found:', mlog.green('YES'))
+            else:
+                mlog.log('Library', mlog.bold(name), 'found:', mlog.red('NO'))
+
+    def get_link_args(self, language=None, **kwargs):
+        '''
+        External libraries detected using a compiler must only be used with
+        compatible code. For instance, Vala libraries (.vapi files) cannot be
+        used with C code, and not all Rust library types can be linked with
+        C-like code. Note that C++ libraries *can* be linked with C code with
+        a C++ linker (and vice-versa).
+        '''
+        # Using a vala library in a non-vala target, or a non-vala library in a vala target
+        # XXX: This should be extended to other non-C linkers such as Rust
+        if (self.language == 'vala' and language != 'vala') or \
+           (language == 'vala' and self.language != 'vala'):
+            return []
+        return super().get_link_args(**kwargs)
+
+    def get_partial_dependency(self, *, compile_args: bool = False,
+                               link_args: bool = False, links: bool = False,
+                               includes: bool = False, sources: bool = False):
+        # External library only has link_args, so ignore the rest of the
+        # interface.
+        new = copy.copy(self)
+        if not link_args:
+            new.link_args = []
+        return new
+
+
+class ExtraFrameworkDependency(ExternalDependency):
+    system_framework_paths = None
+
+    def __init__(self, name, env, kwargs, language: T.Optional[str] = None):
+        paths = kwargs.get('paths', [])
+        super().__init__('extraframeworks', env, kwargs, language=language)
+        self.name = name
+        # Full path to framework directory
+        self.framework_path = None
+        if not self.clib_compiler:
+            raise DependencyException('No C-like compilers are available')
+        if self.system_framework_paths is None:
+            try:
+                self.system_framework_paths = self.clib_compiler.find_framework_paths(self.env)
+            except MesonException as e:
+                if 'non-clang' in str(e):
+                    # Apple frameworks can only be found (and used) with the
+                    # system compiler. It is not available so bail immediately.
+                    self.is_found = False
+                    return
+                raise
+        self.detect(name, paths)
+
+    def detect(self, name, paths):
+        if not paths:
+            paths = self.system_framework_paths
+        for p in paths:
+            mlog.debug('Looking for framework {} in {}'.format(name, p))
+            # We need to know the exact framework path because it's used by the
+            # Qt5 dependency class, and for setting the include path. We also
+            # want to avoid searching in an invalid framework path which wastes
+            # time and can cause a false positive.
+            framework_path = self._get_framework_path(p, name)
+            if framework_path is None:
+                continue
+            # We want to prefer the specified paths (in order) over the system
+            # paths since these are "extra" frameworks.
+            # For example, Python2's framework is in /System/Library/Frameworks and
+            # Python3's framework is in /Library/Frameworks, but both are called
+            # Python.framework. We need to know for sure that the framework was
+            # found in the path we expect.
+            allow_system = p in self.system_framework_paths
+            args = self.clib_compiler.find_framework(name, self.env, [p], allow_system)
+            if args is None:
+                continue
+            self.link_args = args
+            self.framework_path = framework_path.as_posix()
+            self.compile_args = ['-F' + self.framework_path]
+            # We need to also add -I includes to the framework because all
+            # cross-platform projects such as OpenGL, Python, Qt, GStreamer,
+            # etc do not use "framework includes":
+            # https://developer.apple.com/library/archive/documentation/MacOSX/Conceptual/BPFrameworks/Tasks/IncludingFrameworks.html
+            incdir = self._get_framework_include_path(framework_path)
+            if incdir:
+                self.compile_args += ['-I' + incdir]
+            self.is_found = True
+            return
+
+    def _get_framework_path(self, path, name):
+        p = Path(path)
+        lname = name.lower()
+        for d in p.glob('*.framework/'):
+            if lname == d.name.rsplit('.', 1)[0].lower():
+                return d
+        return None
+
+    def _get_framework_latest_version(self, path):
+        versions = []
+        for each in path.glob('Versions/*'):
+            # macOS filesystems are usually case-insensitive
+            if each.name.lower() == 'current':
+                continue
+            versions.append(Version(each.name))
+        if len(versions) == 0:
+            # most system frameworks do not have a 'Versions' directory
+            return 'Headers'
+        return 'Versions/{}/Headers'.format(sorted(versions)[-1]._s)
+
+    def _get_framework_include_path(self, path):
+        # According to the spec, 'Headers' must always be a symlink to the
+        # Headers directory inside the currently-selected version of the
+        # framework, but sometimes frameworks are broken. Look in 'Versions'
+        # for the currently-selected version or pick the latest one.
+        trials = ('Headers', 'Versions/Current/Headers',
+                  self._get_framework_latest_version(path))
+        for each in trials:
+            trial = path / each
+            if trial.is_dir():
+                return trial.as_posix()
+        return None
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.EXTRAFRAMEWORK]
+
+    def log_info(self):
+        return self.framework_path
+
+    def log_tried(self):
+        return 'framework'
+
+
+class DependencyFactory:
+
+    """Factory to get dependencies from multiple sources.
+
+    This class provides an initializer that takes a set of names and classes
+    for various kinds of dependencies. When the initialized object is called
+    it returns a list of callables return Dependency objects to try in order.
+
+    :name: The name of the dependency. This will be passed as the name
+        parameter of the each dependency unless it is overridden on a per
+        type basis.
+    :methods: An ordered list of DependencyMethods. This is the order
+        dependencies will be returned in unless they are removed by the
+        _process_method function
+    :*_name: This will overwrite the name passed to the coresponding class.
+        For example, if the name is 'zlib', but cmake calls the dependency
+        'Z', then using `cmake_name='Z'` will pass the name as 'Z' to cmake.
+    :*_class: A *type* or callable that creates a class, and has the
+        signature of an ExternalDependency
+    :system_class: If you pass DependencyMethods.SYSTEM in methods, you must
+        set this argument.
+    """
+
+    def __init__(self, name: str, methods: T.List[DependencyMethods], *,
+                 extra_kwargs: T.Optional[T.Dict[str, T.Any]] = None,
+                 pkgconfig_name: T.Optional[str] = None,
+                 pkgconfig_class: 'T.Type[PkgConfigDependency]' = PkgConfigDependency,
+                 cmake_name: T.Optional[str] = None,
+                 cmake_class: 'T.Type[CMakeDependency]' = CMakeDependency,
+                 configtool_class: 'T.Optional[T.Type[ConfigToolDependency]]' = None,
+                 framework_name: T.Optional[str] = None,
+                 framework_class: 'T.Type[ExtraFrameworkDependency]' = ExtraFrameworkDependency,
+                 system_class: 'T.Type[ExternalDependency]' = ExternalDependency):
+
+        if DependencyMethods.CONFIG_TOOL in methods and not configtool_class:
+            raise DependencyException('A configtool must have a custom class')
+
+        self.extra_kwargs = extra_kwargs or {}
+        self.methods = methods
+        self.classes = {
+            # Just attach the correct name right now, either the generic name
+            # or the method specific name.
+            DependencyMethods.EXTRAFRAMEWORK: functools.partial(framework_class, framework_name or name),
+            DependencyMethods.PKGCONFIG: functools.partial(pkgconfig_class, pkgconfig_name or name),
+            DependencyMethods.CMAKE: functools.partial(cmake_class, cmake_name or name),
+            DependencyMethods.SYSTEM: functools.partial(system_class, name),
+            DependencyMethods.CONFIG_TOOL: None,
+        }
+        if configtool_class is not None:
+            self.classes[DependencyMethods.CONFIG_TOOL] = functools.partial(configtool_class, name)
+
+    @staticmethod
+    def _process_method(method: DependencyMethods, env: Environment, for_machine: MachineChoice) -> bool:
+        """Report whether a method is valid or not.
+
+        If the method is valid, return true, otherwise return false. This is
+        used in a list comprehension to filter methods that are not possible.
+
+        By default this only remove EXTRAFRAMEWORK dependencies for non-mac platforms.
+        """
+        # Extra frameworks are only valid for macOS and other apple products
+        if (method is DependencyMethods.EXTRAFRAMEWORK and
+                not env.machines[for_machine].is_darwin()):
+            return False
+        return True
+
+    def __call__(self, env: Environment, for_machine: MachineChoice,
+                 kwargs: T.Dict[str, T.Any]) -> T.List['DependencyType']:
+        """Return a list of Dependencies with the arguments already attached."""
+        methods = process_method_kw(self.methods, kwargs)
+        nwargs = self.extra_kwargs.copy()
+        nwargs.update(kwargs)
+
+        return [functools.partial(self.classes[m], env, nwargs) for m in methods
+                if self._process_method(m, env, for_machine)]
+
+
+def get_dep_identifier(name, kwargs) -> T.Tuple:
+    identifier = (name, )
+    for key, value in kwargs.items():
+        # 'version' is irrelevant for caching; the caller must check version matches
+        # 'native' is handled above with `for_machine`
+        # 'required' is irrelevant for caching; the caller handles it separately
+        # 'fallback' subprojects cannot be cached -- they must be initialized
+        # 'default_options' is only used in fallback case
+        if key in ('version', 'native', 'required', 'fallback', 'default_options'):
+            continue
+        # All keyword arguments are strings, ints, or lists (or lists of lists)
+        if isinstance(value, list):
+            value = frozenset(listify(value))
+        identifier += (key, value)
+    return identifier
+
+display_name_map = {
+    'boost': 'Boost',
+    'cuda': 'CUDA',
+    'dub': 'DUB',
+    'gmock': 'GMock',
+    'gtest': 'GTest',
+    'hdf5': 'HDF5',
+    'llvm': 'LLVM',
+    'mpi': 'MPI',
+    'netcdf': 'NetCDF',
+    'openmp': 'OpenMP',
+    'wxwidgets': 'WxWidgets',
+}
+
+def find_external_dependency(name, env, kwargs):
+    assert(name)
+    required = kwargs.get('required', True)
+    if not isinstance(required, bool):
+        raise DependencyException('Keyword "required" must be a boolean.')
+    if not isinstance(kwargs.get('method', ''), str):
+        raise DependencyException('Keyword "method" must be a string.')
+    lname = name.lower()
+    if lname not in _packages_accept_language and 'language' in kwargs:
+        raise DependencyException('%s dependency does not accept "language" keyword argument' % (name, ))
+    if not isinstance(kwargs.get('version', ''), (str, list)):
+        raise DependencyException('Keyword "Version" must be string or list.')
+
+    # display the dependency name with correct casing
+    display_name = display_name_map.get(lname, lname)
+
+    for_machine = MachineChoice.BUILD if kwargs.get('native', False) else MachineChoice.HOST
+
+    type_text = PerMachine('Build-time', 'Run-time')[for_machine] + ' dependency'
+
+    # build a list of dependency methods to try
+    candidates = _build_external_dependency_list(name, env, for_machine, kwargs)
+
+    pkg_exc = []
+    pkgdep = []
+    details = ''
+
+    for c in candidates:
+        # try this dependency method
+        try:
+            d = c()
+            d._check_version()
+            pkgdep.append(d)
+        except DependencyException as e:
+            pkg_exc.append(e)
+            mlog.debug(str(e))
+        else:
+            pkg_exc.append(None)
+            details = d.log_details()
+            if details:
+                details = '(' + details + ') '
+            if 'language' in kwargs:
+                details += 'for ' + d.language + ' '
+
+            # if the dependency was found
+            if d.found():
+
+                info = []
+                if d.version:
+                    info.append(mlog.normal_cyan(d.version))
+
+                log_info = d.log_info()
+                if log_info:
+                    info.append('(' + log_info + ')')
+
+                mlog.log(type_text, mlog.bold(display_name), details + 'found:', mlog.green('YES'), *info)
+
+                return d
+
+    # otherwise, the dependency could not be found
+    tried_methods = [d.log_tried() for d in pkgdep if d.log_tried()]
+    if tried_methods:
+        tried = '{}'.format(mlog.format_list(tried_methods))
+    else:
+        tried = ''
+
+    mlog.log(type_text, mlog.bold(display_name), details + 'found:', mlog.red('NO'),
+             '(tried {})'.format(tried) if tried else '')
+
+    if required:
+        # if an exception occurred with the first detection method, re-raise it
+        # (on the grounds that it came from the preferred dependency detection
+        # method)
+        if pkg_exc and pkg_exc[0]:
+            raise pkg_exc[0]
+
+        # we have a list of failed ExternalDependency objects, so we can report
+        # the methods we tried to find the dependency
+        raise DependencyException('Dependency "%s" not found' % (name) +
+                                  (', tried %s' % (tried) if tried else ''))
+
+    return NotFoundDependency(env)
+
+
+def _build_external_dependency_list(name: str, env: Environment, for_machine: MachineChoice,
+                                    kwargs: T.Dict[str, T.Any]) -> T.List['DependencyType']:
+    # First check if the method is valid
+    if 'method' in kwargs and kwargs['method'] not in [e.value for e in DependencyMethods]:
+        raise DependencyException('method {!r} is invalid'.format(kwargs['method']))
+
+    # Is there a specific dependency detector for this dependency?
+    lname = name.lower()
+    if lname in packages:
+        # Create the list of dependency object constructors using a factory
+        # class method, if one exists, otherwise the list just consists of the
+        # constructor
+        if isinstance(packages[lname], type) and issubclass(packages[lname], Dependency):
+            dep = [functools.partial(packages[lname], env, kwargs)]
+        else:
+            dep = packages[lname](env, for_machine, kwargs)
+        return dep
+
+    candidates = []
+
+    # If it's explicitly requested, use the dub detection method (only)
+    if 'dub' == kwargs.get('method', ''):
+        candidates.append(functools.partial(DubDependency, name, env, kwargs))
+        return candidates
+
+    # If it's explicitly requested, use the pkgconfig detection method (only)
+    if 'pkg-config' == kwargs.get('method', ''):
+        candidates.append(functools.partial(PkgConfigDependency, name, env, kwargs))
+        return candidates
+
+    # If it's explicitly requested, use the CMake detection method (only)
+    if 'cmake' == kwargs.get('method', ''):
+        candidates.append(functools.partial(CMakeDependency, name, env, kwargs))
+        return candidates
+
+    # If it's explicitly requested, use the Extraframework detection method (only)
+    if 'extraframework' == kwargs.get('method', ''):
+        # On OSX, also try framework dependency detector
+        if mesonlib.is_osx():
+            candidates.append(functools.partial(ExtraFrameworkDependency, name, env, kwargs))
+        return candidates
+
+    # Otherwise, just use the pkgconfig and cmake dependency detector
+    if 'auto' == kwargs.get('method', 'auto'):
+        candidates.append(functools.partial(PkgConfigDependency, name, env, kwargs))
+
+        # On OSX, also try framework dependency detector
+        if mesonlib.is_osx():
+            candidates.append(functools.partial(ExtraFrameworkDependency, name, env, kwargs))
+
+        # Only use CMake as a last resort, since it might not work 100% (see #6113)
+        candidates.append(functools.partial(CMakeDependency, name, env, kwargs))
+
+    return candidates
+
+
+def sort_libpaths(libpaths: T.List[str], refpaths: T.List[str]) -> T.List[str]:
+    """Sort  according to 
+
+    It is intended to be used to sort -L flags returned by pkg-config.
+    Pkg-config returns flags in random order which cannot be relied on.
+    """
+    if len(refpaths) == 0:
+        return list(libpaths)
+
+    def key_func(libpath):
+        common_lengths = []
+        for refpath in refpaths:
+            try:
+                common_path = os.path.commonpath([libpath, refpath])
+            except ValueError:
+                common_path = ''
+            common_lengths.append(len(common_path))
+        max_length = max(common_lengths)
+        max_index = common_lengths.index(max_length)
+        reversed_max_length = len(refpaths[max_index]) - max_length
+        return (max_index, reversed_max_length)
+    return sorted(libpaths, key=key_func)
+
+
+def strip_system_libdirs(environment, for_machine: MachineChoice, link_args):
+    """Remove -L arguments.
+
+    leaving these in will break builds where a user has a version of a library
+    in the system path, and a different version not in the system path if they
+    want to link against the non-system path version.
+    """
+    exclude = {'-L{}'.format(p) for p in environment.get_compiler_system_dirs(for_machine)}
+    return [l for l in link_args if l not in exclude]
+
+
+def process_method_kw(possible: T.List[DependencyMethods], kwargs) -> T.List[DependencyMethods]:
+    method = kwargs.get('method', 'auto')
+    if isinstance(method, DependencyMethods):
+        return method
+    if method not in [e.value for e in DependencyMethods]:
+        raise DependencyException('method {!r} is invalid'.format(method))
+    method = DependencyMethods(method)
+
+    # This sets per-tool config methods which are deprecated to to the new
+    # generic CONFIG_TOOL value.
+    if method in [DependencyMethods.SDLCONFIG, DependencyMethods.CUPSCONFIG,
+                  DependencyMethods.PCAPCONFIG, DependencyMethods.LIBWMFCONFIG]:
+        mlog.warning(textwrap.dedent("""\
+            Configuration method {} has been deprecated in favor of
+            'config-tool'. This will be removed in a future version of
+            meson.""".format(method)))
+        method = DependencyMethods.CONFIG_TOOL
+
+    # Set the detection method. If the method is set to auto, use any available method.
+    # If method is set to a specific string, allow only that detection method.
+    if method == DependencyMethods.AUTO:
+        methods = possible
+    elif method in possible:
+        methods = [method]
+    else:
+        raise DependencyException(
+            'Unsupported detection method: {}, allowed methods are {}'.format(
+                method.value,
+                mlog.format_list([x.value for x in [DependencyMethods.AUTO] + possible])))
+
+    return methods
+
+
+if T.TYPE_CHECKING:
+    FactoryType = T.Callable[[Environment, MachineChoice, T.Dict[str, T.Any]],
+                             T.List['DependencyType']]
+    FullFactoryType = T.Callable[[Environment, MachineChoice, T.Dict[str, T.Any], T.Set[DependencyMethods]],
+                                 T.List['DependencyType']]
+
+
+def factory_methods(methods: T.Set[DependencyMethods]) -> 'FactoryType':
+    """Decorator for handling methods for dependency factory functions.
+
+    This helps to make factory functions self documenting
+    >>> @factory_methods([DependencyMethods.PKGCONFIG, DependencyMethods.CMAKE])
+    >>> def factory(env: Environment, for_machine: MachineChoice, kwargs: T.Dict[str, T.Any], methods: T.Set[DependencyMethods]) -> T.List[DependencyType]:
+    >>>     pass
+    """
+
+    def inner(func: 'FullFactoryType') -> 'FactoryType':
+
+        @functools.wraps(func)
+        def wrapped(env: Environment, for_machine: MachineChoice, kwargs: T.Dict[str, T.Any]) -> T.List['DependencyType']:
+            return func(env, for_machine, kwargs, process_method_kw(methods, kwargs))
+
+        return wrapped
+
+    return inner
+
+
+def detect_compiler(name: str, env: Environment, for_machine: MachineChoice,
+                    language: T.Optional[str]) -> T.Optional['CompilerType']:
+    """Given a language and environment find the compiler used."""
+    compilers = env.coredata.compilers[for_machine]
+
+    # Set the compiler for this dependency if a language is specified,
+    # else try to pick something that looks usable.
+    if language:
+        if language not in compilers:
+            m = name.capitalize() + ' requires a {0} compiler, but ' \
+                '{0} is not in the list of project languages'
+            raise DependencyException(m.format(language.capitalize()))
+        return compilers[language]
+    else:
+        for lang in clib_langs:
+            try:
+                return compilers[lang]
+            except KeyError:
+                continue
+    return None
diff --git a/meson/mesonbuild/dependencies/boost.py b/meson/mesonbuild/dependencies/boost.py
new file mode 100644
index 000000000..907c0c275
--- /dev/null
+++ b/meson/mesonbuild/dependencies/boost.py
@@ -0,0 +1,1045 @@
+# Copyright 2013-2020 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import functools
+import typing as T
+from pathlib import Path
+
+from .. import mlog
+from .. import mesonlib
+from ..environment import Environment
+
+from .base import DependencyException, ExternalDependency, PkgConfigDependency
+from .misc import threads_factory
+
+# On windows 3 directory layouts are supported:
+# * The default layout (versioned) installed:
+#   - $BOOST_ROOT/include/boost-x_x/boost/*.hpp
+#   - $BOOST_ROOT/lib/*.lib
+# * The non-default layout (system) installed:
+#   - $BOOST_ROOT/include/boost/*.hpp
+#   - $BOOST_ROOT/lib/*.lib
+# * The pre-built binaries from sf.net:
+#   - $BOOST_ROOT/boost/*.hpp
+#   - $BOOST_ROOT/lib-/*.lib where arch=32/64 and compiler=msvc-14.1
+#
+# Note that we should also try to support:
+# mingw-w64 / Windows : libboost_-mt.a            (location = /mingw64/lib/)
+#                       libboost_-mt.dll.a
+#
+# The `modules` argument accept library names. This is because every module that
+# has libraries to link against also has multiple options regarding how to
+# link. See for example:
+# * http://www.boost.org/doc/libs/1_65_1/libs/test/doc/html/boost_test/usage_variants.html
+# * http://www.boost.org/doc/libs/1_65_1/doc/html/stacktrace/configuration_and_build.html
+# * http://www.boost.org/doc/libs/1_65_1/libs/math/doc/html/math_toolkit/main_tr1.html
+
+# **On Unix**, official packaged versions of boost libraries follow the following schemes:
+#
+# Linux / Debian:   libboost_.so -> libboost_.so.1.66.0
+# Linux / Red Hat:  libboost_.so -> libboost_.so.1.66.0
+# Linux / OpenSuse: libboost_.so -> libboost_.so.1.66.0
+# Win   / Cygwin:   libboost_.dll.a                                 (location = /usr/lib)
+#                   libboost_.a
+#                   cygboost__1_64.dll                              (location = /usr/bin)
+# Win   / VS:       boost_-vc-mt[-gd]--1_67.dll          (location = C:/local/boost_1_67_0)
+# Mac   / homebrew: libboost_.dylib + libboost_-mt.dylib    (location = /usr/local/lib)
+# Mac   / macports: libboost_.dylib + libboost_-mt.dylib    (location = /opt/local/lib)
+#
+# Its not clear that any other abi tags (e.g. -gd) are used in official packages.
+#
+# On Linux systems, boost libs have multithreading support enabled, but without the -mt tag.
+#
+# Boost documentation recommends using complex abi tags like "-lboost_regex-gcc34-mt-d-1_36".
+# (See http://www.boost.org/doc/libs/1_66_0/more/getting_started/unix-variants.html#library-naming)
+# However, its not clear that any Unix distribution follows this scheme.
+# Furthermore, the boost documentation for unix above uses examples from windows like
+#   "libboost_regex-vc71-mt-d-x86-1_34.lib", so apparently the abi tags may be more aimed at windows.
+#
+# We follow the following strategy for finding modules:
+# A) Detect potential boost root directories (uses also BOOST_ROOT env var)
+# B) Foreach candidate
+#   1. Look for the boost headers (boost/version.pp)
+#   2. Find all boost libraries
+#     2.1 Add all libraries in lib*
+#     2.2 Filter out non boost libraries
+#     2.3 Filter the renaining libraries based on the meson requirements (static/shared, etc.)
+#     2.4 Ensure that all libraries have the same boost tag (and are thus compatible)
+#   3. Select the libraries matching the requested modules
+
+@functools.total_ordering
+class BoostIncludeDir():
+    def __init__(self, path: Path, version_int: int):
+        self.path = path
+        self.version_int = version_int
+        major = int(self.version_int / 100000)
+        minor = int((self.version_int / 100) % 1000)
+        patch = int(self.version_int % 100)
+        self.version = '{}.{}.{}'.format(major, minor, patch)
+        self.version_lib = '{}_{}'.format(major, minor)
+
+    def __repr__(self) -> str:
+        return ''.format(self.version, self.path)
+
+    def __lt__(self, other: T.Any) -> bool:
+        if isinstance(other, BoostIncludeDir):
+            return (self.version_int, self.path) < (other.version_int, other.path)
+        return NotImplemented
+
+@functools.total_ordering
+class BoostLibraryFile():
+    # Python libraries are special because of the included
+    # minor version in the module name.
+    boost_python_libs = ['boost_python', 'boost_numpy']
+    reg_python_mod_split = re.compile(r'(boost_[a-zA-Z]+)([0-9]*)')
+
+    reg_abi_tag = re.compile(r'^s?g?y?d?p?n?$')
+    reg_ver_tag = re.compile(r'^[0-9_]+$')
+
+    def __init__(self, path: Path):
+        self.path = path
+        self.name = self.path.name
+
+        # Initialize default properties
+        self.static = False
+        self.toolset = ''
+        self.arch = ''
+        self.version_lib = ''
+        self.mt = True
+
+        self.runtime_static = False
+        self.runtime_debug = False
+        self.python_debug = False
+        self.debug = False
+        self.stlport = False
+        self.deprecated_iostreams = False
+
+        # Post process the library name
+        name_parts = self.name.split('.')
+        self.basename = name_parts[0]
+        self.suffixes = name_parts[1:]
+        self.vers_raw = [x for x in self.suffixes if x.isdigit()]
+        self.suffixes = [x for x in self.suffixes if not x.isdigit()]
+        self.nvsuffix = '.'.join(self.suffixes)  # Used for detecting the library type
+        self.nametags = self.basename.split('-')
+        self.mod_name = self.nametags[0]
+        if self.mod_name.startswith('lib'):
+            self.mod_name = self.mod_name[3:]
+
+        # Set library version if possible
+        if len(self.vers_raw) >= 2:
+            self.version_lib = '{}_{}'.format(self.vers_raw[0], self.vers_raw[1])
+
+        # Detecting library type
+        if self.nvsuffix in ['so', 'dll', 'dll.a', 'dll.lib', 'dylib']:
+            self.static = False
+        elif self.nvsuffix in ['a', 'lib']:
+            self.static = True
+        else:
+            raise DependencyException('Unable to process library extension "{}" ({})'.format(self.nvsuffix, self.path))
+
+        # boost_.lib is the dll import library
+        if self.basename.startswith('boost_') and self.nvsuffix == 'lib':
+            self.static = False
+
+        # Process tags
+        tags = self.nametags[1:]
+        # Filter out the python version tag and fix modname
+        if self.is_python_lib():
+            tags = self.fix_python_name(tags)
+        if not tags:
+            return
+
+        # Without any tags mt is assumed, however, an absents of mt in the name
+        # with tags present indicates that the lib was build without mt support
+        self.mt = False
+        for i in tags:
+            if i == 'mt':
+                self.mt = True
+            elif len(i) == 3 and i[1:] in ['32', '64']:
+                self.arch = i
+            elif BoostLibraryFile.reg_abi_tag.match(i):
+                self.runtime_static = 's' in i
+                self.runtime_debug = 'g' in i
+                self.python_debug = 'y' in i
+                self.debug = 'd' in i
+                self.stlport = 'p' in i
+                self.deprecated_iostreams = 'n' in i
+            elif BoostLibraryFile.reg_ver_tag.match(i):
+                self.version_lib = i
+            else:
+                self.toolset = i
+
+    def __repr__(self) -> str:
+        return ''.format(self.abitag, self.mod_name, self.path)
+
+    def __lt__(self, other: T.Any) -> bool:
+        if isinstance(other, BoostLibraryFile):
+            return (
+                self.mod_name, self.static, self.version_lib, self.arch,
+                not self.mt, not self.runtime_static,
+                not self.debug, self.runtime_debug, self.python_debug,
+                self.stlport, self.deprecated_iostreams,
+                self.name,
+            ) < (
+                other.mod_name, other.static, other.version_lib, other.arch,
+                not other.mt, not other.runtime_static,
+                not other.debug, other.runtime_debug, other.python_debug,
+                other.stlport, other.deprecated_iostreams,
+                other.name,
+            )
+        return NotImplemented
+
+    def __eq__(self, other: T.Any) -> bool:
+        if isinstance(other, BoostLibraryFile):
+            return self.name == other.name
+        return NotImplemented
+
+    def __hash__(self) -> int:
+        return hash(self.name)
+
+    @property
+    def abitag(self) -> str:
+        abitag = ''
+        abitag += 'S' if self.static else '-'
+        abitag += 'M' if self.mt else '-'
+        abitag += ' '
+        abitag += 's' if self.runtime_static else '-'
+        abitag += 'g' if self.runtime_debug else '-'
+        abitag += 'y' if self.python_debug else '-'
+        abitag += 'd' if self.debug else '-'
+        abitag += 'p' if self.stlport else '-'
+        abitag += 'n' if self.deprecated_iostreams else '-'
+        abitag += ' ' + (self.arch or '???')
+        abitag += ' ' + (self.toolset or '?')
+        abitag += ' ' + (self.version_lib or 'x_xx')
+        return abitag
+
+    def is_boost(self) -> bool:
+        return any([self.name.startswith(x) for x in ['libboost_', 'boost_']])
+
+    def is_python_lib(self) -> bool:
+        return any([self.mod_name.startswith(x) for x in BoostLibraryFile.boost_python_libs])
+
+    def fix_python_name(self, tags: T.List[str]) -> T.List[str]:
+        # Handle the boost_python naming madeness.
+        # See https://github.com/mesonbuild/meson/issues/4788 for some distro
+        # specific naming variantions.
+        other_tags = []  # type: T.List[str]
+
+        # Split the current modname into the base name and the version
+        m_cur = BoostLibraryFile.reg_python_mod_split.match(self.mod_name)
+        cur_name = m_cur.group(1)
+        cur_vers = m_cur.group(2)
+
+        # Update the current version string if the new version string is longer
+        def update_vers(new_vers: str) -> None:
+            nonlocal cur_vers
+            new_vers = new_vers.replace('_', '')
+            new_vers = new_vers.replace('.', '')
+            if not new_vers.isdigit():
+                return
+            if len(new_vers) > len(cur_vers):
+                cur_vers = new_vers
+
+        for i in tags:
+            if i.startswith('py'):
+                update_vers(i[2:])
+            elif i.isdigit():
+                update_vers(i)
+            elif len(i) >= 3 and i[0].isdigit and i[2].isdigit() and i[1] == '.':
+                update_vers(i)
+            else:
+                other_tags += [i]
+
+        self.mod_name = cur_name + cur_vers
+        return other_tags
+
+    def mod_name_matches(self, mod_name: str) -> bool:
+        if self.mod_name == mod_name:
+            return True
+        if not self.is_python_lib():
+            return False
+
+        m_cur = BoostLibraryFile.reg_python_mod_split.match(self.mod_name)
+        m_arg = BoostLibraryFile.reg_python_mod_split.match(mod_name)
+
+        if not m_cur or not m_arg:
+            return False
+
+        if m_cur.group(1) != m_arg.group(1):
+            return False
+
+        cur_vers = m_cur.group(2)
+        arg_vers = m_arg.group(2)
+
+        # Always assume python 2 if nothing is specified
+        if not arg_vers:
+            arg_vers = '2'
+
+        return cur_vers.startswith(arg_vers)
+
+    def version_matches(self, version_lib: str) -> bool:
+        # If no version tag is present, assume that it fits
+        if not self.version_lib or not version_lib:
+            return True
+        return self.version_lib == version_lib
+
+    def arch_matches(self, arch: str) -> bool:
+        # If no version tag is present, assume that it fits
+        if not self.arch or not arch:
+            return True
+        return self.arch == arch
+
+    def vscrt_matches(self, vscrt: str) -> bool:
+        # If no vscrt tag present, assume that it fits  ['/MD', '/MDd', '/MT', '/MTd']
+        if not vscrt:
+            return True
+        if vscrt in ['/MD', '-MD']:
+            return not self.runtime_static and not self.runtime_debug
+        elif vscrt in ['/MDd', '-MDd']:
+            return not self.runtime_static and self.runtime_debug
+        elif vscrt in ['/MT', '-MT']:
+            return (self.runtime_static or not self.static) and not self.runtime_debug
+        elif vscrt in ['/MTd', '-MTd']:
+            return (self.runtime_static or not self.static) and self.runtime_debug
+
+        mlog.warning('Boost: unknow vscrt tag {}. This may cause the compilation to fail. Please consider reporting this as a bug.'.format(vscrt), once=True)
+        return True
+
+    def get_compiler_args(self) -> T.List[str]:
+        args = []  # type: T.List[str]
+        if self.mod_name in boost_libraries:
+            libdef = boost_libraries[self.mod_name]  # type: BoostLibrary
+            if self.static:
+                args += libdef.static
+            else:
+                args += libdef.shared
+            if self.mt:
+                args += libdef.multi
+            else:
+                args += libdef.single
+        return args
+
+    def get_link_args(self) -> T.List[str]:
+        return [self.path.as_posix()]
+
+class BoostDependency(ExternalDependency):
+    def __init__(self, environment: Environment, kwargs):
+        super().__init__('boost', environment, kwargs, language='cpp')
+        self.debug = environment.coredata.get_builtin_option('buildtype').startswith('debug')
+        self.multithreading = kwargs.get('threading', 'multi') == 'multi'
+
+        self.boost_root = None
+        self.explicit_static = 'static' in kwargs
+
+        # Extract and validate modules
+        self.modules = mesonlib.extract_as_list(kwargs, 'modules')  # type: T.List[str]
+        for i in self.modules:
+            if not isinstance(i, str):
+                raise DependencyException('Boost module argument is not a string.')
+            if i.startswith('boost_'):
+                raise DependencyException('Boost modules must be passed without the boost_ prefix')
+
+        self.modules_found = []    # type: T.List[str]
+        self.modules_missing = []  # type: T.List[str]
+
+        # Do we need threads?
+        if 'thread' in self.modules:
+            if not self._add_sub_dependency(threads_factory(environment, self.for_machine, {})):
+                self.is_found = False
+                return
+
+        # Try figuring out the architecture tag
+        self.arch = environment.machines[self.for_machine].cpu_family
+        self.arch = boost_arch_map.get(self.arch, None)
+
+        # Prefere BOOST_INCLUDEDIR and BOOST_LIBRARYDIR if preset
+        boost_manual_env = [x in os.environ for x in ['BOOST_INCLUDEDIR', 'BOOST_LIBRARYDIR']]
+        if all(boost_manual_env):
+            inc_dir = Path(os.environ['BOOST_INCLUDEDIR'])
+            lib_dir = Path(os.environ['BOOST_LIBRARYDIR'])
+            mlog.debug('Trying to find boost with:')
+            mlog.debug('  - BOOST_INCLUDEDIR = {}'.format(inc_dir))
+            mlog.debug('  - BOOST_LIBRARYDIR = {}'.format(lib_dir))
+
+            boost_inc_dir = None
+            for j in [inc_dir / 'version.hpp', inc_dir / 'boost' / 'version.hpp']:
+                if j.is_file():
+                    boost_inc_dir = self._include_dir_from_version_header(j)
+                    break
+            if not boost_inc_dir:
+                self.is_found = False
+                return
+
+            self.is_found = self.run_check([boost_inc_dir], [lib_dir])
+            return
+        elif any(boost_manual_env):
+            mlog.warning('Both BOOST_INCLUDEDIR *and* BOOST_LIBRARYDIR have to be set (one is not enough). Ignoring.')
+
+        # A) Detect potential boost root directories (uses also BOOST_ROOT env var)
+        roots = self.detect_roots()
+        roots = list(mesonlib.OrderedSet(roots))
+
+        # B) Foreach candidate
+        for j in roots:
+            #   1. Look for the boost headers (boost/version.pp)
+            mlog.debug('Checking potential boost root {}'.format(j.as_posix()))
+            inc_dirs = self.detect_inc_dirs(j)
+            inc_dirs = sorted(inc_dirs, reverse=True)  # Prefer the newer versions
+
+            # Early abort when boost is not found
+            if not inc_dirs:
+                continue
+
+            lib_dirs = self.detect_lib_dirs(j)
+            self.is_found = self.run_check(inc_dirs, lib_dirs)
+            if self.is_found:
+                self.boost_root = j
+                break
+
+    def run_check(self, inc_dirs: T.List[BoostIncludeDir], lib_dirs: T.List[Path]) -> bool:
+        mlog.debug('  - potential library dirs: {}'.format([x.as_posix() for x in lib_dirs]))
+        mlog.debug('  - potential include dirs: {}'.format([x.path.as_posix() for x in inc_dirs]))
+
+        #   2. Find all boost libraries
+        libs = []  # type: T.List[BoostLibraryFile]
+        for i in lib_dirs:
+            libs = self.detect_libraries(i)
+            if libs:
+                mlog.debug('  - found boost library dir: {}'.format(i))
+                # mlog.debug('  - raw library list:')
+                # for j in libs:
+                #     mlog.debug('    - {}'.format(j))
+                break
+        libs = sorted(set(libs))
+
+        modules = ['boost_' + x for x in self.modules]
+        for inc in inc_dirs:
+            mlog.debug('  - found boost {} include dir: {}'.format(inc.version, inc.path))
+            f_libs = self.filter_libraries(libs, inc.version_lib)
+
+            mlog.debug('  - filtered library list:')
+            for j in f_libs:
+                mlog.debug('    - {}'.format(j))
+
+            #   3. Select the libraries matching the requested modules
+            not_found = []  # type: T.List[str]
+            selected_modules = []  # type: T.List[BoostLibraryFile]
+            for mod in modules:
+                found = False
+                for l in f_libs:
+                    if l.mod_name_matches(mod):
+                        selected_modules += [l]
+                        found = True
+                        break
+                if not found:
+                    not_found += [mod]
+
+            # log the result
+            mlog.debug('  - found:')
+            comp_args = []  # type: T.List[str]
+            link_args = []  # type: T.List[str]
+            for j in selected_modules:
+                c_args = j.get_compiler_args()
+                l_args = j.get_link_args()
+                mlog.debug('    - {:<24} link={} comp={}'.format(j.mod_name, str(l_args), str(c_args)))
+                comp_args += c_args
+                link_args += l_args
+
+            comp_args = list(set(comp_args))
+            link_args = list(set(link_args))
+
+            self.modules_found = [x.mod_name for x in selected_modules]
+            self.modules_found = [x[6:] for x in self.modules_found]
+            self.modules_found = sorted(set(self.modules_found))
+            self.modules_missing = not_found
+            self.modules_missing = [x[6:] for x in self.modules_missing]
+            self.modules_missing = sorted(set(self.modules_missing))
+
+            # if we found all modules we are done
+            if not not_found:
+                self.version = inc.version
+                self.compile_args = ['-I' + inc.path.as_posix()]
+                self.compile_args += comp_args
+                self.compile_args += self._extra_compile_args()
+                self.compile_args = list(mesonlib.OrderedSet(self.compile_args))
+                self.link_args = link_args
+                mlog.debug('  - final compile args: {}'.format(self.compile_args))
+                mlog.debug('  - final link args:    {}'.format(self.link_args))
+                return True
+
+            # in case we missed something log it and try again
+            mlog.debug('  - NOT found:')
+            for mod in not_found:
+                mlog.debug('    - {}'.format(mod))
+
+        return False
+
+    def detect_inc_dirs(self, root: Path) -> T.List[BoostIncludeDir]:
+        candidates = []  # type: T.List[Path]
+        inc_root = root / 'include'
+
+        candidates += [root / 'boost']
+        candidates += [inc_root / 'boost']
+        if inc_root.is_dir():
+            for i in inc_root.iterdir():
+                if not i.is_dir() or not i.name.startswith('boost-'):
+                    continue
+                candidates += [i / 'boost']
+        candidates = [x for x in candidates if x.is_dir()]
+        candidates = [x / 'version.hpp' for x in candidates]
+        candidates = [x for x in candidates if x.exists()]
+        return [self._include_dir_from_version_header(x) for x in candidates]
+
+    def detect_lib_dirs(self, root: Path) -> T.List[Path]:
+        # First check the system include paths. Only consider those within the
+        # given root path
+        system_dirs_t = self.clib_compiler.get_library_dirs(self.env)
+        system_dirs = [Path(x) for x in system_dirs_t]
+        system_dirs = [x.resolve() for x in system_dirs if x.exists()]
+        system_dirs = [x for x in system_dirs if mesonlib.path_is_in_root(x, root)]
+        system_dirs = list(mesonlib.OrderedSet(system_dirs))
+
+        if system_dirs:
+            return system_dirs
+
+        # No system include paths were found --> fall back to manually looking
+        # for library dirs in root
+        dirs = []     # type: T.List[Path]
+        subdirs = []  # type: T.List[Path]
+        for i in root.iterdir():
+            if i.is_dir() and i.name.startswith('lib'):
+                dirs += [i]
+
+        # Some distros put libraries not directly inside /usr/lib but in /usr/lib/x86_64-linux-gnu
+        for i in dirs:
+            for j in i.iterdir():
+                if j.is_dir() and j.name.endswith('-linux-gnu'):
+                    subdirs += [j]
+
+        # Filter out paths that don't match the target arch to avoid finding
+        # the wrong libraries. See https://github.com/mesonbuild/meson/issues/7110
+        if not self.arch:
+            return dirs + subdirs
+
+        arch_list_32 = ['32', 'i386']
+        arch_list_64 = ['64']
+
+        raw_list = dirs + subdirs
+        no_arch = [x for x in raw_list if not any([y in x.name for y in arch_list_32 + arch_list_64])]
+
+        matching_arch = []  # type: T.List[Path]
+        if '32' in self.arch:
+            matching_arch = [x for x in raw_list if any([y in x.name for y in arch_list_32])]
+        elif '64' in self.arch:
+            matching_arch = [x for x in raw_list if any([y in x.name for y in arch_list_64])]
+
+        return sorted(matching_arch) + sorted(no_arch)
+
+    def filter_libraries(self, libs: T.List[BoostLibraryFile], lib_vers: str) -> T.List[BoostLibraryFile]:
+        # MSVC is very picky with the library tags
+        vscrt = ''
+        try:
+            crt_val = self.env.coredata.base_options['b_vscrt'].value
+            buildtype = self.env.coredata.builtins['buildtype'].value
+            vscrt = self.clib_compiler.get_crt_compile_args(crt_val, buildtype)[0]
+        except (KeyError, IndexError, AttributeError):
+            pass
+
+        libs = [x for x in libs if x.static == self.static or not self.explicit_static]
+        libs = [x for x in libs if x.mt == self.multithreading]
+        libs = [x for x in libs if x.version_matches(lib_vers)]
+        libs = [x for x in libs if x.arch_matches(self.arch)]
+        libs = [x for x in libs if x.vscrt_matches(vscrt)]
+        libs = [x for x in libs if x.nvsuffix != 'dll']  # Only link to import libraries
+
+        # Only filter by debug when we are building in release mode. Debug
+        # libraries are automatically prefered through sorting otherwise.
+        if not self.debug:
+            libs = [x for x in libs if not x.debug]
+
+        # Take the abitag from the first library and filter by it. This
+        # ensures that we have a set of libraries that are always compatible.
+        if not libs:
+            return []
+        abitag = libs[0].abitag
+        libs = [x for x in libs if x.abitag == abitag]
+
+        return libs
+
+    def detect_libraries(self, libdir: Path) -> T.List[BoostLibraryFile]:
+        libs = []  # type: T.List[BoostLibraryFile]
+        for i in libdir.iterdir():
+            if not i.is_file() or i.is_symlink():
+                continue
+            if not any([i.name.startswith(x) for x in ['libboost_', 'boost_']]):
+                continue
+
+            libs += [BoostLibraryFile(i)]
+        return [x for x in libs if x.is_boost()]  # Filter out no boost libraries
+
+    def detect_roots(self) -> T.List[Path]:
+        roots = []  # type: T.List[Path]
+
+        # Add roots from the environment
+        for i in ['BOOST_ROOT', 'BOOSTROOT']:
+            if i in os.environ:
+                raw_paths = os.environ[i].split(os.pathsep)
+                paths = [Path(x) for x in raw_paths]
+                if paths and any([not x.is_absolute() for x in paths]):
+                    raise DependencyException('Paths in {} must be absolute'.format(i))
+                roots += paths
+                return roots  # Do not add system paths if BOOST_ROOT is present
+
+        # Try getting the BOOST_ROOT from a boost.pc if it exists. This primarely
+        # allows BoostDependency to find boost from Conan. See #5438
+        try:
+            boost_pc = PkgConfigDependency('boost', self.env, {'required': False})
+            if boost_pc.found():
+                boost_root = boost_pc.get_pkgconfig_variable('prefix', {'default': None})
+                if boost_root:
+                    roots += [Path(boost_root)]
+        except DependencyException:
+            pass
+
+        # Add roots from system paths
+        inc_paths = [Path(x) for x in self.clib_compiler.get_default_include_dirs()]
+        inc_paths = [x.parent for x in inc_paths if x.exists()]
+        inc_paths = [x.resolve() for x in inc_paths]
+        roots += inc_paths
+
+        # Add system paths
+        if self.env.machines[self.for_machine].is_windows():
+            # Where boost built from source actually installs it
+            c_root = Path('C:/Boost')
+            if c_root.is_dir():
+                roots += [c_root]
+
+            # Where boost documentation says it should be
+            prog_files = Path('C:/Program Files/boost')
+            # Where boost prebuilt binaries are
+            local_boost = Path('C:/local')
+
+            candidates = []  # type: T.List[Path]
+            if prog_files.is_dir():
+                candidates += [*prog_files.iterdir()]
+            if local_boost.is_dir():
+                candidates += [*local_boost.iterdir()]
+
+            roots += [x for x in candidates if x.name.lower().startswith('boost') and x.is_dir()]
+        else:
+            tmp = []  # type: T.List[Path]
+
+            # Homebrew
+            brew_boost = Path('/usr/local/Cellar/boost')
+            if brew_boost.is_dir():
+                tmp += [x for x in brew_boost.iterdir()]
+
+            # Add some default system paths
+            tmp += [Path('/opt/local')]
+            tmp += [Path('/usr/local/opt/boost')]
+            tmp += [Path('/usr/local')]
+            tmp += [Path('/usr')]
+
+            # Cleanup paths
+            tmp = [x for x in tmp if x.is_dir()]
+            tmp = [x.resolve() for x in tmp]
+            roots += tmp
+
+        return roots
+
+    def log_details(self) -> str:
+        res = ''
+        if self.modules_found:
+            res += 'found: ' + ', '.join(self.modules_found)
+        if self.modules_missing:
+            if res:
+                res += ' | '
+            res += 'missing: ' + ', '.join(self.modules_missing)
+        return res
+
+    def log_info(self) -> str:
+        if self.boost_root:
+            return self.boost_root.as_posix()
+        return ''
+
+    def _include_dir_from_version_header(self, hfile: Path) -> BoostIncludeDir:
+        # Extract the version with a regex. Using clib_compiler.get_define would
+        # also work, however, this is slower (since it the compiler has to be
+        # invoked) and overkill since the layout of the header is always the same.
+        assert hfile.exists()
+        raw = hfile.read_text()
+        m = re.search(r'#define\s+BOOST_VERSION\s+([0-9]+)', raw)
+        if not m:
+            mlog.debug('Failed to extract version information from {}'.format(hfile))
+            return BoostIncludeDir(hfile.parents[1], 0)
+        return BoostIncludeDir(hfile.parents[1], int(m.group(1)))
+
+    def _extra_compile_args(self) -> T.List[str]:
+        # BOOST_ALL_DYN_LINK should not be required with the known defines below
+        return ['-DBOOST_ALL_NO_LIB']  # Disable automatic linking
+
+
+# See https://www.boost.org/doc/libs/1_72_0/more/getting_started/unix-variants.html#library-naming
+# See https://mesonbuild.com/Reference-tables.html#cpu-families
+boost_arch_map = {
+    'aarch64': 'a64',
+    'arc': 'a32',
+    'arm': 'a32',
+    'ia64': 'i64',
+    'mips': 'm32',
+    'mips64': 'm64',
+    'ppc': 'p32',
+    'ppc64': 'p64',
+    'sparc': 's32',
+    'sparc64': 's64',
+    'x86': 'x32',
+    'x86_64': 'x64',
+}
+
+
+####      ---- BEGIN GENERATED ----      ####
+#                                           #
+# Generated with tools/boost_names.py:
+#  - boost version:   1.73.0
+#  - modules found:   159
+#  - libraries found: 43
+#
+
+class BoostLibrary():
+    def __init__(self, name: str, shared: T.List[str], static: T.List[str], single: T.List[str], multi: T.List[str]):
+        self.name = name
+        self.shared = shared
+        self.static = static
+        self.single = single
+        self.multi = multi
+
+class BoostModule():
+    def __init__(self, name: str, key: str, desc: str, libs: T.List[str]):
+        self.name = name
+        self.key = key
+        self.desc = desc
+        self.libs = libs
+
+
+# dict of all know libraries with additional compile options
+boost_libraries = {
+    'boost_atomic': BoostLibrary(
+        name='boost_atomic',
+        shared=['-DBOOST_ATOMIC_DYN_LINK=1'],
+        static=['-DBOOST_ATOMIC_STATIC_LINK=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_chrono': BoostLibrary(
+        name='boost_chrono',
+        shared=['-DBOOST_CHRONO_DYN_LINK=1'],
+        static=['-DBOOST_CHRONO_STATIC_LINK=1'],
+        single=['-DBOOST_CHRONO_THREAD_DISABLED'],
+        multi=[],
+    ),
+    'boost_container': BoostLibrary(
+        name='boost_container',
+        shared=['-DBOOST_CONTAINER_DYN_LINK=1'],
+        static=['-DBOOST_CONTAINER_STATIC_LINK=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_context': BoostLibrary(
+        name='boost_context',
+        shared=['-DBOOST_CONTEXT_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_contract': BoostLibrary(
+        name='boost_contract',
+        shared=['-DBOOST_CONTRACT_DYN_LINK'],
+        static=['-DBOOST_CONTRACT_STATIC_LINK'],
+        single=['-DBOOST_CONTRACT_DISABLE_THREADS'],
+        multi=[],
+    ),
+    'boost_coroutine': BoostLibrary(
+        name='boost_coroutine',
+        shared=['-DBOOST_COROUTINES_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_date_time': BoostLibrary(
+        name='boost_date_time',
+        shared=['-DBOOST_DATE_TIME_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_exception': BoostLibrary(
+        name='boost_exception',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_fiber': BoostLibrary(
+        name='boost_fiber',
+        shared=['-DBOOST_FIBERS_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_fiber_numa': BoostLibrary(
+        name='boost_fiber_numa',
+        shared=['-DBOOST_FIBERS_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_filesystem': BoostLibrary(
+        name='boost_filesystem',
+        shared=['-DBOOST_FILESYSTEM_DYN_LINK=1'],
+        static=['-DBOOST_FILESYSTEM_STATIC_LINK=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_graph': BoostLibrary(
+        name='boost_graph',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_iostreams': BoostLibrary(
+        name='boost_iostreams',
+        shared=['-DBOOST_IOSTREAMS_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_locale': BoostLibrary(
+        name='boost_locale',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_log': BoostLibrary(
+        name='boost_log',
+        shared=['-DBOOST_LOG_DYN_LINK=1'],
+        static=[],
+        single=['-DBOOST_LOG_NO_THREADS'],
+        multi=[],
+    ),
+    'boost_log_setup': BoostLibrary(
+        name='boost_log_setup',
+        shared=['-DBOOST_LOG_SETUP_DYN_LINK=1'],
+        static=[],
+        single=['-DBOOST_LOG_NO_THREADS'],
+        multi=[],
+    ),
+    'boost_math_c99': BoostLibrary(
+        name='boost_math_c99',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_math_c99f': BoostLibrary(
+        name='boost_math_c99f',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_math_c99l': BoostLibrary(
+        name='boost_math_c99l',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_math_tr1': BoostLibrary(
+        name='boost_math_tr1',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_math_tr1f': BoostLibrary(
+        name='boost_math_tr1f',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_math_tr1l': BoostLibrary(
+        name='boost_math_tr1l',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_mpi': BoostLibrary(
+        name='boost_mpi',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_nowide': BoostLibrary(
+        name='boost_nowide',
+        shared=['-DBOOST_NOWIDE_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_prg_exec_monitor': BoostLibrary(
+        name='boost_prg_exec_monitor',
+        shared=['-DBOOST_TEST_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_program_options': BoostLibrary(
+        name='boost_program_options',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_random': BoostLibrary(
+        name='boost_random',
+        shared=['-DBOOST_RANDOM_DYN_LINK'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_regex': BoostLibrary(
+        name='boost_regex',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_serialization': BoostLibrary(
+        name='boost_serialization',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_addr2line': BoostLibrary(
+        name='boost_stacktrace_addr2line',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_backtrace': BoostLibrary(
+        name='boost_stacktrace_backtrace',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_basic': BoostLibrary(
+        name='boost_stacktrace_basic',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_noop': BoostLibrary(
+        name='boost_stacktrace_noop',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_windbg': BoostLibrary(
+        name='boost_stacktrace_windbg',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_stacktrace_windbg_cached': BoostLibrary(
+        name='boost_stacktrace_windbg_cached',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_system': BoostLibrary(
+        name='boost_system',
+        shared=['-DBOOST_SYSTEM_DYN_LINK=1'],
+        static=['-DBOOST_SYSTEM_STATIC_LINK=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_test_exec_monitor': BoostLibrary(
+        name='boost_test_exec_monitor',
+        shared=['-DBOOST_TEST_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_thread': BoostLibrary(
+        name='boost_thread',
+        shared=['-DBOOST_THREAD_BUILD_DLL=1', '-DBOOST_THREAD_USE_DLL=1'],
+        static=['-DBOOST_THREAD_BUILD_LIB=1', '-DBOOST_THREAD_USE_LIB=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_timer': BoostLibrary(
+        name='boost_timer',
+        shared=['-DBOOST_TIMER_DYN_LINK=1'],
+        static=['-DBOOST_TIMER_STATIC_LINK=1'],
+        single=[],
+        multi=[],
+    ),
+    'boost_type_erasure': BoostLibrary(
+        name='boost_type_erasure',
+        shared=['-DBOOST_TYPE_ERASURE_DYN_LINK'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_unit_test_framework': BoostLibrary(
+        name='boost_unit_test_framework',
+        shared=['-DBOOST_TEST_DYN_LINK=1'],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_wave': BoostLibrary(
+        name='boost_wave',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+    'boost_wserialization': BoostLibrary(
+        name='boost_wserialization',
+        shared=[],
+        static=[],
+        single=[],
+        multi=[],
+    ),
+}
+
+#                                           #
+####       ---- END GENERATED ----       ####
diff --git a/meson/mesonbuild/dependencies/coarrays.py b/meson/mesonbuild/dependencies/coarrays.py
new file mode 100644
index 000000000..84c34128f
--- /dev/null
+++ b/meson/mesonbuild/dependencies/coarrays.py
@@ -0,0 +1,85 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import typing as T
+
+from .base import CMakeDependency, DependencyMethods, ExternalDependency, PkgConfigDependency, detect_compiler, factory_methods
+
+if T.TYPE_CHECKING:
+    from . base import DependencyType
+    from ..environment import Environment, MachineChoice
+
+
+@factory_methods({DependencyMethods.PKGCONFIG, DependencyMethods.CMAKE, DependencyMethods.SYSTEM})
+def coarray_factory(env: 'Environment', for_machine: 'MachineChoice',
+                    kwargs: T.Dict[str, T.Any], methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    fcid = detect_compiler('coarray', env, for_machine, 'fortran').get_id()
+    candidates = []  # type: T.List[DependencyType]
+
+    if fcid == 'gcc':
+        # OpenCoarrays is the most commonly used method for Fortran Coarray with GCC
+        if DependencyMethods.PKGCONFIG in methods:
+            for pkg in ['caf-openmpi', 'caf']:
+                candidates.append(functools.partial(
+                    PkgConfigDependency, pkg, env, kwargs, language='fortran'))
+
+        if DependencyMethods.CMAKE in methods:
+            if 'modules' not in kwargs:
+                kwargs['modules'] = 'OpenCoarrays::caf_mpi'
+            candidates.append(functools.partial(
+                CMakeDependency, 'OpenCoarrays', env, kwargs, language='fortran'))
+
+    if DependencyMethods.SYSTEM in methods:
+        candidates.append(functools.partial(CoarrayDependency, env, kwargs))
+
+    return candidates
+
+
+class CoarrayDependency(ExternalDependency):
+    """
+    Coarrays are a Fortran 2008 feature.
+
+    Coarrays are sometimes implemented via external library (GCC+OpenCoarrays),
+    while other compilers just build in support (Cray, IBM, Intel, NAG).
+    Coarrays may be thought of as a high-level language abstraction of
+    low-level MPI calls.
+    """
+    def __init__(self, environment, kwargs: dict):
+        super().__init__('coarray', environment, kwargs, language='fortran')
+        kwargs['required'] = False
+        kwargs['silent'] = True
+
+        cid = self.get_compiler().get_id()
+        if cid == 'gcc':
+            # Fallback to single image
+            self.compile_args = ['-fcoarray=single']
+            self.version = 'single image (fallback)'
+            self.is_found = True
+        elif cid == 'intel':
+            # Coarrays are built into Intel compilers, no external library needed
+            self.is_found = True
+            self.link_args = ['-coarray=shared']
+            self.compile_args = self.link_args
+        elif cid == 'intel-cl':
+            # Coarrays are built into Intel compilers, no external library needed
+            self.is_found = True
+            self.compile_args = ['/Qcoarray:shared']
+        elif cid == 'nagfor':
+            # NAG doesn't require any special arguments for Coarray
+            self.is_found = True
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.AUTO, DependencyMethods.CMAKE, DependencyMethods.PKGCONFIG]
diff --git a/meson/mesonbuild/dependencies/cuda.py b/meson/mesonbuild/dependencies/cuda.py
new file mode 100644
index 000000000..c962cae60
--- /dev/null
+++ b/meson/mesonbuild/dependencies/cuda.py
@@ -0,0 +1,256 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import re
+import os
+
+from .. import mlog
+from .. import mesonlib
+from ..environment import detect_cpu_family
+
+from .base import (DependencyException, ExternalDependency)
+
+
+class CudaDependency(ExternalDependency):
+
+    supported_languages = ['cuda', 'cpp', 'c'] # see also _default_language
+
+    def __init__(self, environment, kwargs):
+        compilers = environment.coredata.compilers[self.get_for_machine_from_kwargs(kwargs)]
+        language = self._detect_language(compilers)
+        if language not in self.supported_languages:
+            raise DependencyException('Language \'{}\' is not supported by the CUDA Toolkit. Supported languages are {}.'.format(language, self.supported_languages))
+
+        super().__init__('cuda', environment, kwargs, language=language)
+        self.requested_modules = self.get_requested(kwargs)
+        if 'cudart' not in self.requested_modules:
+            self.requested_modules = ['cudart'] + self.requested_modules
+
+        (self.cuda_path, self.version, self.is_found) = self._detect_cuda_path_and_version()
+        if not self.is_found:
+            return
+
+        if not os.path.isabs(self.cuda_path):
+            raise DependencyException('CUDA Toolkit path must be absolute, got \'{}\'.'.format(self.cuda_path))
+
+        # nvcc already knows where to find the CUDA Toolkit, but if we're compiling
+        # a mixed C/C++/CUDA project, we still need to make the include dir searchable
+        if self.language != 'cuda' or len(compilers) > 1:
+            self.incdir = os.path.join(self.cuda_path, 'include')
+            self.compile_args += ['-I{}'.format(self.incdir)]
+
+        if self.language != 'cuda':
+            arch_libdir = self._detect_arch_libdir()
+            self.libdir = os.path.join(self.cuda_path, arch_libdir)
+            mlog.debug('CUDA library directory is', mlog.bold(self.libdir))
+        else:
+            self.libdir = None
+
+        self.is_found = self._find_requested_libraries()
+
+    @classmethod
+    def _detect_language(cls, compilers):
+        for lang in cls.supported_languages:
+            if lang in compilers:
+                return lang
+        return list(compilers.keys())[0]
+
+    def _detect_cuda_path_and_version(self):
+        self.env_var = self._default_path_env_var()
+        mlog.debug('Default path env var:', mlog.bold(self.env_var))
+
+        version_reqs = self.version_reqs
+        if self.language == 'cuda':
+            nvcc_version = self._strip_patch_version(self.get_compiler().version)
+            mlog.debug('nvcc version:', mlog.bold(nvcc_version))
+            if version_reqs:
+                # make sure nvcc version satisfies specified version requirements
+                (found_some, not_found, found) = mesonlib.version_compare_many(nvcc_version, version_reqs)
+                if not_found:
+                    msg = 'The current nvcc version {} does not satisfy the specified CUDA Toolkit version requirements {}.'.format(nvcc_version, version_reqs)
+                    return self._report_dependency_error(msg, (None, None, False))
+
+            # use nvcc version to find a matching CUDA Toolkit
+            version_reqs = ['={}'.format(nvcc_version)]
+        else:
+            nvcc_version = None
+
+        paths = [(path, self._cuda_toolkit_version(path), default) for (path, default) in self._cuda_paths()]
+        if version_reqs:
+            return self._find_matching_toolkit(paths, version_reqs, nvcc_version)
+
+        defaults = [(path, version) for (path, version, default) in paths if default]
+        if defaults:
+            return (defaults[0][0], defaults[0][1], True)
+
+        platform_msg = 'set the CUDA_PATH environment variable' if self._is_windows() \
+            else 'set the CUDA_PATH environment variable/create the \'/usr/local/cuda\' symbolic link'
+        msg = 'Please specify the desired CUDA Toolkit version (e.g. dependency(\'cuda\', version : \'>=10.1\')) or {} to point to the location of your desired version.'.format(platform_msg)
+        return self._report_dependency_error(msg, (None, None, False))
+
+    def _find_matching_toolkit(self, paths, version_reqs, nvcc_version):
+        # keep the default paths order intact, sort the rest in the descending order
+        # according to the toolkit version
+        defaults, rest = mesonlib.partition(lambda t: not t[2], paths)
+        defaults = list(defaults)
+        paths = defaults + sorted(rest, key=lambda t: mesonlib.Version(t[1]), reverse=True)
+        mlog.debug('Search paths: {}'.format(paths))
+
+        if nvcc_version and defaults:
+            default_src = "the {} environment variable".format(self.env_var) if self.env_var else "the \'/usr/local/cuda\' symbolic link"
+            nvcc_warning = 'The default CUDA Toolkit as designated by {} ({}) doesn\'t match the current nvcc version {} and will be ignored.'.format(default_src, os.path.realpath(defaults[0][0]), nvcc_version)
+        else:
+            nvcc_warning = None
+
+        for (path, version, default) in paths:
+            (found_some, not_found, found) = mesonlib.version_compare_many(version, version_reqs)
+            if not not_found:
+                if not default and nvcc_warning:
+                    mlog.warning(nvcc_warning)
+                return (path, version, True)
+
+        if nvcc_warning:
+            mlog.warning(nvcc_warning)
+        return (None, None, False)
+
+    def _default_path_env_var(self):
+        env_vars = ['CUDA_PATH'] if self._is_windows() else ['CUDA_PATH', 'CUDA_HOME', 'CUDA_ROOT']
+        env_vars = [var for var in env_vars if var in os.environ]
+        user_defaults = set([os.environ[var] for var in env_vars])
+        if len(user_defaults) > 1:
+            mlog.warning('Environment variables {} point to conflicting toolkit locations ({}). Toolkit selection might produce unexpected results.'.format(', '.join(env_vars), ', '.join(user_defaults)))
+        return env_vars[0] if env_vars else None
+
+    def _cuda_paths(self):
+        return ([(os.environ[self.env_var], True)] if self.env_var else []) \
+            + (self._cuda_paths_win() if self._is_windows() else self._cuda_paths_nix())
+
+    def _cuda_paths_win(self):
+        env_vars = os.environ.keys()
+        return [(os.environ[var], False) for var in env_vars if var.startswith('CUDA_PATH_')]
+
+    def _cuda_paths_nix(self):
+        # include /usr/local/cuda default only if no env_var was found
+        pattern = '/usr/local/cuda-*' if self.env_var else '/usr/local/cuda*'
+        return [(path, os.path.basename(path) == 'cuda') for path in glob.iglob(pattern)]
+
+    toolkit_version_regex = re.compile(r'^CUDA Version\s+(.*)$')
+    path_version_win_regex = re.compile(r'^v(.*)$')
+    path_version_nix_regex = re.compile(r'^cuda-(.*)$')
+
+    def _cuda_toolkit_version(self, path):
+        version = self._read_toolkit_version_txt(path)
+        if version:
+            return version
+
+        mlog.debug('Falling back to extracting version from path')
+        path_version_regex = self.path_version_win_regex if self._is_windows() else self.path_version_nix_regex
+        try:
+            m = path_version_regex.match(os.path.basename(path))
+            if m:
+                return m.group(1)
+            else:
+                mlog.warning('Could not detect CUDA Toolkit version for {}'.format(path))
+        except Exception as e:
+            mlog.warning('Could not detect CUDA Toolkit version for {}: {}'.format(path, str(e)))
+
+        return '0.0'
+
+    def _read_toolkit_version_txt(self, path):
+        # Read 'version.txt' at the root of the CUDA Toolkit directory to determine the tookit version
+        version_file_path = os.path.join(path, 'version.txt')
+        try:
+            with open(version_file_path) as version_file:
+                version_str = version_file.readline() # e.g. 'CUDA Version 10.1.168'
+                m = self.toolkit_version_regex.match(version_str)
+                if m:
+                    return self._strip_patch_version(m.group(1))
+        except Exception as e:
+            mlog.debug('Could not read CUDA Toolkit\'s version file {}: {}'.format(version_file_path, str(e)))
+
+        return None
+
+    @classmethod
+    def _strip_patch_version(cls, version):
+        return '.'.join(version.split('.')[:2])
+
+    def _detect_arch_libdir(self):
+        arch = detect_cpu_family(self.env.coredata.compilers.host)
+        machine = self.env.machines[self.for_machine]
+        msg = '{} architecture is not supported in {} version of the CUDA Toolkit.'
+        if machine.is_windows():
+            libdirs = {'x86': 'Win32', 'x86_64': 'x64'}
+            if arch not in libdirs:
+                raise DependencyException(msg.format(arch, 'Windows'))
+            return os.path.join('lib', libdirs[arch])
+        elif machine.is_linux():
+            libdirs = {'x86_64': 'lib64', 'ppc64': 'lib', 'aarch64': 'lib64'}
+            if arch not in libdirs:
+                raise DependencyException(msg.format(arch, 'Linux'))
+            return libdirs[arch]
+        elif machine.is_osx():
+            libdirs = {'x86_64': 'lib64'}
+            if arch not in libdirs:
+                raise DependencyException(msg.format(arch, 'macOS'))
+            return libdirs[arch]
+        else:
+            raise DependencyException('CUDA Toolkit: unsupported platform.')
+
+    def _find_requested_libraries(self):
+        self.lib_modules = {}
+        all_found = True
+
+        for module in self.requested_modules:
+            args = self.clib_compiler.find_library(module, self.env, [self.libdir] if self.libdir else [])
+            if args is None:
+                self._report_dependency_error('Couldn\'t find requested CUDA module \'{}\''.format(module))
+                all_found = False
+            else:
+                mlog.debug('Link args for CUDA module \'{}\' are {}'.format(module, args))
+                self.lib_modules[module] = args
+
+        return all_found
+
+    def _is_windows(self):
+        return self.env.machines[self.for_machine].is_windows()
+
+    def _report_dependency_error(self, msg, ret_val=None):
+        if self.required:
+            raise DependencyException(msg)
+
+        mlog.debug(msg)
+        return ret_val
+
+    def log_details(self):
+        module_str = ', '.join(self.requested_modules)
+        return 'modules: ' + module_str
+
+    def log_info(self):
+        return self.cuda_path if self.cuda_path else ''
+
+    def get_requested(self, kwargs):
+        candidates = mesonlib.extract_as_list(kwargs, 'modules')
+        for c in candidates:
+            if not isinstance(c, str):
+                raise DependencyException('CUDA module argument is not a string.')
+        return candidates
+
+    def get_link_args(self, **kwargs):
+        args = []
+        if self.libdir:
+            args += self.clib_compiler.get_linker_search_args(self.libdir)
+        for lib in self.requested_modules:
+            args += self.lib_modules[lib]
+        return args
diff --git a/meson/mesonbuild/dependencies/data/CMakeLists.txt b/meson/mesonbuild/dependencies/data/CMakeLists.txt
new file mode 100644
index 000000000..26c067d56
--- /dev/null
+++ b/meson/mesonbuild/dependencies/data/CMakeLists.txt
@@ -0,0 +1,94 @@
+# fail noisily if attempt to use this file without setting:
+# cmake_minimum_required(VERSION ${CMAKE_VERSION})
+# project(... LANGUAGES ...)
+
+cmake_policy(SET CMP0000 NEW)
+
+set(PACKAGE_FOUND FALSE)
+set(_packageName "${NAME}")
+string(TOUPPER "${_packageName}" PACKAGE_NAME)
+
+while(TRUE)
+  find_package("${NAME}" QUIET COMPONENTS ${COMPS})
+
+  # ARCHS has to be set via the CMD interface
+  if(${_packageName}_FOUND OR ${PACKAGE_NAME}_FOUND OR "${ARCHS}" STREQUAL "")
+    break()
+  endif()
+
+  list(GET       ARCHS 0 CMAKE_LIBRARY_ARCHITECTURE)
+  list(REMOVE_AT ARCHS 0)
+endwhile()
+
+if(${_packageName}_FOUND  OR  ${PACKAGE_NAME}_FOUND)
+  set(PACKAGE_FOUND TRUE)
+
+  # Check the following variables:
+  # FOO_VERSION
+  # Foo_VERSION
+  # FOO_VERSION_STRING
+  # Foo_VERSION_STRING
+  if(NOT DEFINED PACKAGE_VERSION)
+    if(DEFINED ${_packageName}_VERSION)
+      set(PACKAGE_VERSION "${${_packageName}_VERSION}")
+    elseif(DEFINED ${PACKAGE_NAME}_VERSION)
+      set(PACKAGE_VERSION "${${PACKAGE_NAME}_VERSION}")
+    elseif(DEFINED ${_packageName}_VERSION_STRING)
+      set(PACKAGE_VERSION "${${_packageName}_VERSION_STRING}")
+    elseif(DEFINED ${PACKAGE_NAME}_VERSION_STRING)
+      set(PACKAGE_VERSION "${${PACKAGE_NAME}_VERSION_STRING}")
+    endif()
+  endif()
+
+  # Check the following variables:
+  # FOO_LIBRARIES
+  # Foo_LIBRARIES
+  # FOO_LIBS
+  # Foo_LIBS
+  set(libs)
+  if(DEFINED ${_packageName}_LIBRARIES)
+    set(libs ${_packageName}_LIBRARIES)
+  elseif(DEFINED ${PACKAGE_NAME}_LIBRARIES)
+    set(libs ${PACKAGE_NAME}_LIBRARIES)
+  elseif(DEFINED ${_packageName}_LIBS)
+    set(libs ${_packageName}_LIBS)
+  elseif(DEFINED ${PACKAGE_NAME}_LIBS)
+    set(libs ${PACKAGE_NAME}_LIBS)
+  endif()
+
+  # Check the following variables:
+  # FOO_INCLUDE_DIRS
+  # Foo_INCLUDE_DIRS
+  # FOO_INCLUDES
+  # Foo_INCLUDES
+  # FOO_INCLUDE_DIR
+  # Foo_INCLUDE_DIR
+  set(includes)
+  if(DEFINED ${_packageName}_INCLUDE_DIRS)
+    set(includes ${_packageName}_INCLUDE_DIRS)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDE_DIRS)
+    set(includes ${PACKAGE_NAME}_INCLUDE_DIRS)
+  elseif(DEFINED ${_packageName}_INCLUDES)
+    set(includes ${_packageName}_INCLUDES)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDES)
+    set(includes ${PACKAGE_NAME}_INCLUDES)
+  elseif(DEFINED ${_packageName}_INCLUDE_DIR)
+    set(includes ${_packageName}_INCLUDE_DIR)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDE_DIR)
+    set(includes ${PACKAGE_NAME}_INCLUDE_DIR)
+  endif()
+
+  # Check the following variables:
+  # FOO_DEFINITIONS
+  # Foo_DEFINITIONS
+  set(definitions)
+  if(DEFINED ${_packageName}_DEFINITIONS)
+    set(definitions ${_packageName}_DEFINITIONS)
+  elseif(DEFINED ${PACKAGE_NAME}_DEFINITIONS)
+    set(definitions ${PACKAGE_NAME}_DEFINITIONS)
+  endif()
+
+  set(PACKAGE_INCLUDE_DIRS "${${includes}}")
+  set(PACKAGE_DEFINITIONS  "${${definitions}}")
+  set(PACKAGE_LIBRARIES    "${${libs}}")
+endif()
diff --git a/meson/mesonbuild/dependencies/data/CMakeListsLLVM.txt b/meson/mesonbuild/dependencies/data/CMakeListsLLVM.txt
new file mode 100644
index 000000000..9d3e41234
--- /dev/null
+++ b/meson/mesonbuild/dependencies/data/CMakeListsLLVM.txt
@@ -0,0 +1,95 @@
+cmake_minimum_required(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} )
+
+set(PACKAGE_FOUND FALSE)
+
+while(TRUE)
+  find_package(LLVM REQUIRED CONFIG QUIET)
+
+  # ARCHS has to be set via the CMD interface
+  if(LLVM_FOUND OR "${ARCHS}" STREQUAL "")
+    break()
+  endif()
+
+  list(GET       ARCHS 0 CMAKE_LIBRARY_ARCHITECTURE)
+  list(REMOVE_AT ARCHS 0)
+endwhile()
+
+if(LLVM_FOUND)
+  set(PACKAGE_FOUND TRUE)
+
+  foreach(mod IN LISTS LLVM_MESON_MODULES)
+    # Reset variables
+    set(out_mods)
+    set(real_mods)
+
+    # Generate a lower and upper case version
+    string(TOLOWER "${mod}" mod_L)
+    string(TOUPPER "${mod}" mod_U)
+
+    # Get the mapped components
+    llvm_map_components_to_libnames(out_mods ${mod} ${mod_L} ${mod_U})
+    list(SORT              out_mods)
+    list(REMOVE_DUPLICATES out_mods)
+
+    # Make sure that the modules exist
+    foreach(i IN LISTS out_mods)
+      if(TARGET ${i})
+        list(APPEND real_mods ${i})
+      endif()
+    endforeach()
+
+    # Set the output variables
+    set(MESON_LLVM_TARGETS_${mod} ${real_mods})
+    foreach(i IN LISTS real_mods)
+      set(MESON_TARGET_TO_LLVM_${i} ${mod})
+    endforeach()
+  endforeach()
+
+  # Check the following variables:
+  # LLVM_PACKAGE_VERSION
+  # LLVM_VERSION
+  # LLVM_VERSION_STRING
+  if(NOT DEFINED PACKAGE_VERSION)
+    if(DEFINED LLVM_PACKAGE_VERSION)
+      set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
+    elseif(DEFINED LLVM_VERSION)
+      set(PACKAGE_VERSION "${LLVM_VERSION}")
+    elseif(DEFINED LLVM_VERSION_STRING)
+      set(PACKAGE_VERSION "${LLVM_VERSION_STRING}")
+    endif()
+  endif()
+
+  # Check the following variables:
+  # LLVM_LIBRARIES
+  # LLVM_LIBS
+  set(libs)
+  if(DEFINED LLVM_LIBRARIES)
+    set(libs LLVM_LIBRARIES)
+  elseif(DEFINED LLVM_LIBS)
+    set(libs LLVM_LIBS)
+  endif()
+
+  # Check the following variables:
+  # LLVM_INCLUDE_DIRS
+  # LLVM_INCLUDES
+  # LLVM_INCLUDE_DIR
+  set(includes)
+  if(DEFINED LLVM_INCLUDE_DIRS)
+    set(includes LLVM_INCLUDE_DIRS)
+  elseif(DEFINED LLVM_INCLUDES)
+    set(includes LLVM_INCLUDES)
+  elseif(DEFINED LLVM_INCLUDE_DIR)
+    set(includes LLVM_INCLUDE_DIR)
+  endif()
+
+  # Check the following variables:
+  # LLVM_DEFINITIONS
+  set(definitions)
+  if(DEFINED LLVM_DEFINITIONS)
+    set(definitions LLVM_DEFINITIONS)
+  endif()
+
+  set(PACKAGE_INCLUDE_DIRS "${${includes}}")
+  set(PACKAGE_DEFINITIONS  "${${definitions}}")
+  set(PACKAGE_LIBRARIES    "${${libs}}")
+endif()
diff --git a/meson/mesonbuild/dependencies/data/CMakePathInfo.txt b/meson/mesonbuild/dependencies/data/CMakePathInfo.txt
new file mode 100644
index 000000000..662ec5836
--- /dev/null
+++ b/meson/mesonbuild/dependencies/data/CMakePathInfo.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION})
+
+set(TMP_PATHS_LIST)
+list(APPEND TMP_PATHS_LIST ${CMAKE_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_APPBUNDLE_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_APPBUNDLE_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_APPBUNDLE_PATH})
+
+set(LIB_ARCH_LIST)
+if(CMAKE_LIBRARY_ARCHITECTURE_REGEX)
+  file(GLOB implicit_dirs RELATIVE /lib /lib/*-linux-gnu* )
+  foreach(dir ${implicit_dirs})
+    if("${dir}" MATCHES "${CMAKE_LIBRARY_ARCHITECTURE_REGEX}")
+      list(APPEND LIB_ARCH_LIST "${dir}")
+    endif()
+  endforeach()
+endif()
+
+# "Export" these variables:
+set(MESON_ARCH_LIST ${LIB_ARCH_LIST})
+set(MESON_PATHS_LIST ${TMP_PATHS_LIST})
+set(MESON_CMAKE_ROOT ${CMAKE_ROOT})
+set(MESON_CMAKE_SYSROOT ${CMAKE_SYSROOT})
+set(MESON_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH})
+
+message(STATUS ${TMP_PATHS_LIST})
diff --git a/meson/mesonbuild/dependencies/dev.py b/meson/mesonbuild/dependencies/dev.py
new file mode 100644
index 000000000..67d7e65fa
--- /dev/null
+++ b/meson/mesonbuild/dependencies/dev.py
@@ -0,0 +1,528 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies useful for
+# development purposes, such as testing, debugging, etc..
+
+import glob
+import os
+import re
+import typing as T
+
+from .. import mesonlib, mlog
+from ..mesonlib import version_compare, stringlistify, extract_as_list, MachineChoice
+from ..environment import get_llvm_tool_names
+from .base import (
+    DependencyException, DependencyMethods, ExternalDependency, PkgConfigDependency,
+    strip_system_libdirs, ConfigToolDependency, CMakeDependency, DependencyFactory,
+)
+from .misc import threads_factory
+from ..compilers.c import AppleClangCCompiler
+from ..compilers.cpp import AppleClangCPPCompiler
+
+if T.TYPE_CHECKING:
+    from .. environment import Environment
+
+
+def get_shared_library_suffix(environment, for_machine: MachineChoice):
+    """This is only guaranteed to work for languages that compile to machine
+    code, not for languages like C# that use a bytecode and always end in .dll
+    """
+    m = environment.machines[for_machine]
+    if m.is_windows():
+        return '.dll'
+    elif m.is_darwin():
+        return '.dylib'
+    return '.so'
+
+
+class GTestDependencySystem(ExternalDependency):
+    def __init__(self, name: str, environment, kwargs):
+        super().__init__(name, environment, kwargs, language='cpp')
+        self.main = kwargs.get('main', False)
+        self.src_dirs = ['/usr/src/gtest/src', '/usr/src/googletest/googletest/src']
+        if not self._add_sub_dependency(threads_factory(environment, self.for_machine, {})):
+            self.is_found = False
+            return
+        self.detect()
+
+    def detect(self):
+        gtest_detect = self.clib_compiler.find_library("gtest", self.env, [])
+        gtest_main_detect = self.clib_compiler.find_library("gtest_main", self.env, [])
+        if gtest_detect and (not self.main or gtest_main_detect):
+            self.is_found = True
+            self.compile_args = []
+            self.link_args = gtest_detect
+            if self.main:
+                self.link_args += gtest_main_detect
+            self.sources = []
+            self.prebuilt = True
+        elif self.detect_srcdir():
+            self.is_found = True
+            self.compile_args = ['-I' + d for d in self.src_include_dirs]
+            self.link_args = []
+            if self.main:
+                self.sources = [self.all_src, self.main_src]
+            else:
+                self.sources = [self.all_src]
+            self.prebuilt = False
+        else:
+            self.is_found = False
+
+    def detect_srcdir(self):
+        for s in self.src_dirs:
+            if os.path.exists(s):
+                self.src_dir = s
+                self.all_src = mesonlib.File.from_absolute_file(
+                    os.path.join(self.src_dir, 'gtest-all.cc'))
+                self.main_src = mesonlib.File.from_absolute_file(
+                    os.path.join(self.src_dir, 'gtest_main.cc'))
+                self.src_include_dirs = [os.path.normpath(os.path.join(self.src_dir, '..')),
+                                         os.path.normpath(os.path.join(self.src_dir, '../include')),
+                                         ]
+                return True
+        return False
+
+    def log_info(self):
+        if self.prebuilt:
+            return 'prebuilt'
+        else:
+            return 'building self'
+
+    def log_tried(self):
+        return 'system'
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM]
+
+
+class GTestDependencyPC(PkgConfigDependency):
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        assert name == 'gtest'
+        if kwargs.get('main'):
+            name = 'gtest_main'
+        super().__init__(name, environment, kwargs)
+
+
+class GMockDependencySystem(ExternalDependency):
+    def __init__(self, name: str, environment, kwargs):
+        super().__init__(name, environment, kwargs, language='cpp')
+        self.main = kwargs.get('main', False)
+        if not self._add_sub_dependency(threads_factory(environment, self.for_machine, {})):
+            self.is_found = False
+            return
+
+        # If we are getting main() from GMock, we definitely
+        # want to avoid linking in main() from GTest
+        gtest_kwargs = kwargs.copy()
+        if self.main:
+            gtest_kwargs['main'] = False
+
+        # GMock without GTest is pretty much useless
+        # this also mimics the structure given in WrapDB,
+        # where GMock always pulls in GTest
+        found = self._add_sub_dependency(gtest_factory(environment, self.for_machine, gtest_kwargs))
+        if not found:
+            self.is_found = False
+            return
+
+        # GMock may be a library or just source.
+        # Work with both.
+        gmock_detect = self.clib_compiler.find_library("gmock", self.env, [])
+        gmock_main_detect = self.clib_compiler.find_library("gmock_main", self.env, [])
+        if gmock_detect and (not self.main or gmock_main_detect):
+            self.is_found = True
+            self.link_args += gmock_detect
+            if self.main:
+                self.link_args += gmock_main_detect
+            self.prebuilt = True
+            return
+
+        for d in ['/usr/src/googletest/googlemock/src', '/usr/src/gmock/src', '/usr/src/gmock']:
+            if os.path.exists(d):
+                self.is_found = True
+                # Yes, we need both because there are multiple
+                # versions of gmock that do different things.
+                d2 = os.path.normpath(os.path.join(d, '..'))
+                self.compile_args += ['-I' + d, '-I' + d2, '-I' + os.path.join(d2, 'include')]
+                all_src = mesonlib.File.from_absolute_file(os.path.join(d, 'gmock-all.cc'))
+                main_src = mesonlib.File.from_absolute_file(os.path.join(d, 'gmock_main.cc'))
+                if self.main:
+                    self.sources += [all_src, main_src]
+                else:
+                    self.sources += [all_src]
+                self.prebuilt = False
+                return
+
+        self.is_found = False
+
+    def log_info(self):
+        if self.prebuilt:
+            return 'prebuilt'
+        else:
+            return 'building self'
+
+    def log_tried(self):
+        return 'system'
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM]
+
+
+class GMockDependencyPC(PkgConfigDependency):
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        assert name == 'gmock'
+        if kwargs.get('main'):
+            name = 'gmock_main'
+        super().__init__(name, environment, kwargs)
+
+
+class LLVMDependencyConfigTool(ConfigToolDependency):
+    """
+    LLVM uses a special tool, llvm-config, which has arguments for getting
+    c args, cxx args, and ldargs as well as version.
+    """
+    tool_name = 'llvm-config'
+    __cpp_blacklist = {'-DNDEBUG'}
+
+    def __init__(self, name: str, environment, kwargs):
+        self.tools = get_llvm_tool_names('llvm-config')
+
+        # Fedora starting with Fedora 30 adds a suffix of the number
+        # of bits in the isa that llvm targets, for example, on x86_64
+        # and aarch64 the name will be llvm-config-64, on x86 and arm
+        # it will be llvm-config-32.
+        if environment.machines[self.get_for_machine_from_kwargs(kwargs)].is_64_bit:
+            self.tools.append('llvm-config-64')
+        else:
+            self.tools.append('llvm-config-32')
+
+        # It's necessary for LLVM <= 3.8 to use the C++ linker. For 3.9 and 4.0
+        # the C linker works fine if only using the C API.
+        super().__init__(name, environment, kwargs, language='cpp')
+        self.provided_modules = []
+        self.required_modules = set()
+        self.module_details = []
+        if not self.is_found:
+            return
+
+        self.provided_modules = self.get_config_value(['--components'], 'modules')
+        modules = stringlistify(extract_as_list(kwargs, 'modules'))
+        self.check_components(modules)
+        opt_modules = stringlistify(extract_as_list(kwargs, 'optional_modules'))
+        self.check_components(opt_modules, required=False)
+
+        cargs = set(self.get_config_value(['--cppflags'], 'compile_args'))
+        self.compile_args = list(cargs.difference(self.__cpp_blacklist))
+
+        if version_compare(self.version, '>= 3.9'):
+            self._set_new_link_args(environment)
+        else:
+            self._set_old_link_args()
+        self.link_args = strip_system_libdirs(environment, self.for_machine, self.link_args)
+        self.link_args = self.__fix_bogus_link_args(self.link_args)
+        if not self._add_sub_dependency(threads_factory(environment, self.for_machine, {})):
+            self.is_found = False
+            return
+
+    def __fix_bogus_link_args(self, args):
+        """This function attempts to fix bogus link arguments that llvm-config
+        generates.
+
+        Currently it works around the following:
+            - FreeBSD: when statically linking -l/usr/lib/libexecinfo.so will
+              be generated, strip the -l in cases like this.
+            - Windows: We may get -LIBPATH:... which is later interpreted as
+              "-L IBPATH:...", if we're using an msvc like compilers convert
+              that to "/LIBPATH", otherwise to "-L ..."
+        """
+        cpp = self.env.coredata.compilers[self.for_machine]['cpp']
+
+        new_args = []
+        for arg in args:
+            if arg.startswith('-l') and arg.endswith('.so'):
+                new_args.append(arg.lstrip('-l'))
+            elif arg.startswith('-LIBPATH:'):
+                new_args.extend(cpp.get_linker_search_args(arg.lstrip('-LIBPATH:')))
+            else:
+                new_args.append(arg)
+        return new_args
+
+    def __check_libfiles(self, shared):
+        """Use llvm-config's --libfiles to check if libraries exist."""
+        mode = '--link-shared' if shared else '--link-static'
+
+        # Set self.required to true to force an exception in get_config_value
+        # if the returncode != 0
+        restore = self.required
+        self.required = True
+
+        try:
+            # It doesn't matter what the stage is, the caller needs to catch
+            # the exception anyway.
+            self.link_args = self.get_config_value(['--libfiles', mode], '')
+        finally:
+            self.required = restore
+
+    def _set_new_link_args(self, environment):
+        """How to set linker args for LLVM versions >= 3.9"""
+        mode = self.get_config_value(['--shared-mode'], 'link_args')[0]
+        if not self.static and mode == 'static':
+            # If llvm is configured with LLVM_BUILD_LLVM_DYLIB but not with
+            # LLVM_LINK_LLVM_DYLIB and not LLVM_BUILD_SHARED_LIBS (which
+            # upstream doesn't recommend using), then llvm-config will lie to
+            # you about how to do shared-linking. It wants to link to a a bunch
+            # of individual shared libs (which don't exist because llvm wasn't
+            # built with LLVM_BUILD_SHARED_LIBS.
+            #
+            # Therefore, we'll try to get the libfiles, if the return code is 0
+            # or we get an empty list, then we'll try to build a working
+            # configuration by hand.
+            try:
+                self.__check_libfiles(True)
+            except DependencyException:
+                lib_ext = get_shared_library_suffix(environment, self.for_machine)
+                libdir = self.get_config_value(['--libdir'], 'link_args')[0]
+                # Sort for reproducibility
+                matches = sorted(glob.iglob(os.path.join(libdir, 'libLLVM*{}'.format(lib_ext))))
+                if not matches:
+                    if self.required:
+                        raise
+                    self.is_found = False
+                    return
+
+                self.link_args = self.get_config_value(['--ldflags'], 'link_args')
+                libname = os.path.basename(matches[0]).rstrip(lib_ext).lstrip('lib')
+                self.link_args.append('-l{}'.format(libname))
+                return
+        elif self.static and mode == 'shared':
+            # If, however LLVM_BUILD_SHARED_LIBS is true # (*cough* gentoo *cough*)
+            # then this is correct. Building with LLVM_BUILD_SHARED_LIBS has a side
+            # effect, it stops the generation of static archives. Therefore we need
+            # to check for that and error out on static if this is the case
+            try:
+                self.__check_libfiles(False)
+            except DependencyException:
+                if self.required:
+                    raise
+                self.is_found = False
+                return
+
+        link_args = ['--link-static', '--system-libs'] if self.static else ['--link-shared']
+        self.link_args = self.get_config_value(
+            ['--libs', '--ldflags'] + link_args + list(self.required_modules),
+            'link_args')
+
+    def _set_old_link_args(self):
+        """Setting linker args for older versions of llvm.
+
+        Old versions of LLVM bring an extra level of insanity with them.
+        llvm-config will provide the correct arguments for static linking, but
+        not for shared-linnking, we have to figure those out ourselves, because
+        of course we do.
+        """
+        if self.static:
+            self.link_args = self.get_config_value(
+                ['--libs', '--ldflags', '--system-libs'] + list(self.required_modules),
+                'link_args')
+        else:
+            # llvm-config will provide arguments for static linking, so we get
+            # to figure out for ourselves what to link with. We'll do that by
+            # checking in the directory provided by --libdir for a library
+            # called libLLVM-.(so|dylib|dll)
+            libdir = self.get_config_value(['--libdir'], 'link_args')[0]
+
+            expected_name = 'libLLVM-{}'.format(self.version)
+            re_name = re.compile(r'{}.(so|dll|dylib)$'.format(expected_name))
+
+            for file_ in os.listdir(libdir):
+                if re_name.match(file_):
+                    self.link_args = ['-L{}'.format(libdir),
+                                      '-l{}'.format(os.path.splitext(file_.lstrip('lib'))[0])]
+                    break
+            else:
+                raise DependencyException(
+                    'Could not find a dynamically linkable library for LLVM.')
+
+    def check_components(self, modules, required=True):
+        """Check for llvm components (modules in meson terms).
+
+        The required option is whether the module is required, not whether LLVM
+        is required.
+        """
+        for mod in sorted(set(modules)):
+            status = ''
+
+            if mod not in self.provided_modules:
+                if required:
+                    self.is_found = False
+                    if self.required:
+                        raise DependencyException(
+                            'Could not find required LLVM Component: {}'.format(mod))
+                    status = '(missing)'
+                else:
+                    status = '(missing but optional)'
+            else:
+                self.required_modules.add(mod)
+
+            self.module_details.append(mod + status)
+
+    def log_details(self):
+        if self.module_details:
+            return 'modules: ' + ', '.join(self.module_details)
+        return ''
+
+class LLVMDependencyCMake(CMakeDependency):
+    def __init__(self, name: str, env, kwargs):
+        self.llvm_modules = stringlistify(extract_as_list(kwargs, 'modules'))
+        self.llvm_opt_modules = stringlistify(extract_as_list(kwargs, 'optional_modules'))
+        super().__init__(name, env, kwargs, language='cpp')
+
+        # Cmake will always create a statically linked binary, so don't use
+        # cmake if dynamic is required
+        if not self.static:
+            self.is_found = False
+            mlog.warning('Ignoring LLVM CMake dependency because dynamic was requested')
+            return
+
+        if self.traceparser is None:
+            return
+
+        # Extract extra include directories and definitions
+        inc_dirs = self.traceparser.get_cmake_var('PACKAGE_INCLUDE_DIRS')
+        defs = self.traceparser.get_cmake_var('PACKAGE_DEFINITIONS')
+        # LLVM explicitly uses space-separated variables rather than semicolon lists
+        if len(defs) == 1:
+            defs = defs[0].split(' ')
+        temp = ['-I' + x for x in inc_dirs] + defs
+        self.compile_args += [x for x in temp if x not in self.compile_args]
+        if not self._add_sub_dependency(threads_factory(env, self.for_machine, {})):
+            self.is_found = False
+            return
+
+    def _main_cmake_file(self) -> str:
+        # Use a custom CMakeLists.txt for LLVM
+        return 'CMakeListsLLVM.txt'
+
+    def _extra_cmake_opts(self) -> T.List[str]:
+        return ['-DLLVM_MESON_MODULES={}'.format(';'.join(self.llvm_modules + self.llvm_opt_modules))]
+
+    def _map_module_list(self, modules: T.List[T.Tuple[str, bool]], components: T.List[T.Tuple[str, bool]]) -> T.List[T.Tuple[str, bool]]:
+        res = []
+        for mod, required in modules:
+            cm_targets = self.traceparser.get_cmake_var('MESON_LLVM_TARGETS_{}'.format(mod))
+            if not cm_targets:
+                if required:
+                    raise self._gen_exception('LLVM module {} was not found'.format(mod))
+                else:
+                    mlog.warning('Optional LLVM module', mlog.bold(mod), 'was not found')
+                    continue
+            for i in cm_targets:
+                res += [(i, required)]
+        return res
+
+    def _original_module_name(self, module: str) -> str:
+        orig_name = self.traceparser.get_cmake_var('MESON_TARGET_TO_LLVM_{}'.format(module))
+        if orig_name:
+            return orig_name[0]
+        return module
+
+
+class ValgrindDependency(PkgConfigDependency):
+    '''
+    Consumers of Valgrind usually only need the compile args and do not want to
+    link to its (static) libraries.
+    '''
+    def __init__(self, env, kwargs):
+        super().__init__('valgrind', env, kwargs)
+
+    def get_link_args(self, **kwargs):
+        return []
+
+
+class ZlibSystemDependency(ExternalDependency):
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+
+        m = self.env.machines[self.for_machine]
+
+        # I'm not sure this is entirely correct. What if we're cross compiling
+        # from something to macOS?
+        if ((m.is_darwin() and isinstance(self.clib_compiler, (AppleClangCCompiler, AppleClangCPPCompiler))) or
+                m.is_freebsd() or m.is_dragonflybsd()):
+            self.is_found = True
+            self.link_args = ['-lz']
+
+            # No need to set includes,
+            # on macos xcode/clang will do that for us.
+            # on freebsd zlib.h is in /usr/include
+        elif m.is_windows():
+            if self.clib_compiler.get_argument_syntax() == 'msvc':
+                libs = ['zlib1' 'zlib']
+            else:
+                libs = ['z']
+            for lib in libs:
+                l = self.clib_compiler.find_library(lib, environment, [])
+                h = self.clib_compiler.has_header('zlib.h', '', environment, dependencies=[self])
+                if l and h:
+                    self.is_found = True
+                    self.link_args = l
+                    break
+            else:
+                return
+        else:
+            mlog.debug('Unsupported OS {}'.format(m.system))
+            return
+
+        v, _ = self.clib_compiler.get_define('ZLIB_VERSION', '#include ', self.env, [], [self])
+        self.version = v.strip('"')
+
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.SYSTEM]
+
+
+llvm_factory = DependencyFactory(
+    'LLVM',
+    [DependencyMethods.CMAKE, DependencyMethods.CONFIG_TOOL],
+    cmake_class=LLVMDependencyCMake,
+    configtool_class=LLVMDependencyConfigTool,
+)
+
+gtest_factory = DependencyFactory(
+    'gtest',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM],
+    pkgconfig_class=GTestDependencyPC,
+    system_class=GTestDependencySystem,
+)
+
+gmock_factory = DependencyFactory(
+    'gmock',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM],
+    pkgconfig_class=GMockDependencyPC,
+    system_class=GMockDependencySystem,
+)
+
+zlib_factory = DependencyFactory(
+    'zlib',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CMAKE, DependencyMethods.SYSTEM],
+    cmake_name='ZLIB',
+    system_class=ZlibSystemDependency,
+)
diff --git a/meson/mesonbuild/dependencies/hdf5.py b/meson/mesonbuild/dependencies/hdf5.py
new file mode 100644
index 000000000..fadd10980
--- /dev/null
+++ b/meson/mesonbuild/dependencies/hdf5.py
@@ -0,0 +1,130 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for miscellaneous external dependencies.
+
+import subprocess
+import shutil
+from pathlib import Path
+
+from .. import mlog
+from ..mesonlib import split_args, listify
+from .base import (DependencyException, DependencyMethods, ExternalDependency, ExternalProgram,
+                   PkgConfigDependency)
+
+class HDF5Dependency(ExternalDependency):
+
+    def __init__(self, environment, kwargs):
+        language = kwargs.get('language', 'c')
+        super().__init__('hdf5', environment, kwargs, language=language)
+        kwargs['required'] = False
+        kwargs['silent'] = True
+        self.is_found = False
+        methods = listify(self.methods)
+
+        if language not in ('c', 'cpp', 'fortran'):
+            raise DependencyException('Language {} is not supported with HDF5.'.format(language))
+
+        if set([DependencyMethods.AUTO, DependencyMethods.PKGCONFIG]).intersection(methods):
+            pkgconfig_files = ['hdf5', 'hdf5-serial']
+            PCEXE = shutil.which('pkg-config')
+            if PCEXE:
+                # some distros put hdf5-1.2.3.pc with version number in .pc filename.
+                ret = subprocess.run([PCEXE, '--list-all'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+                                     universal_newlines=True)
+                if ret.returncode == 0:
+                    for pkg in ret.stdout.split('\n'):
+                        if pkg.startswith(('hdf5')):
+                            pkgconfig_files.append(pkg.split(' ', 1)[0])
+                    pkgconfig_files = list(set(pkgconfig_files))  # dedupe
+
+            for pkg in pkgconfig_files:
+                pkgdep = PkgConfigDependency(pkg, environment, kwargs, language=self.language)
+                if not pkgdep.found():
+                    continue
+
+                self.compile_args = pkgdep.get_compile_args()
+                # some broken pkgconfig don't actually list the full path to the needed includes
+                newinc = []
+                for arg in self.compile_args:
+                    if arg.startswith('-I'):
+                        stem = 'static' if kwargs.get('static', False) else 'shared'
+                        if (Path(arg[2:]) / stem).is_dir():
+                            newinc.append('-I' + str(Path(arg[2:]) / stem))
+                self.compile_args += newinc
+
+                # derive needed libraries by language
+                pd_link_args = pkgdep.get_link_args()
+                link_args = []
+                for larg in pd_link_args:
+                    lpath = Path(larg)
+                    # some pkg-config hdf5.pc (e.g. Ubuntu) don't include the commonly-used HL HDF5 libraries,
+                    # so let's add them if they exist
+                    # additionally, some pkgconfig HDF5 HL files are malformed so let's be sure to find HL anyway
+                    if lpath.is_file():
+                        hl = []
+                        if language == 'cpp':
+                            hl += ['_hl_cpp', '_cpp']
+                        elif language == 'fortran':
+                            hl += ['_hl_fortran', 'hl_fortran', '_fortran']
+                        hl += ['_hl']  # C HL library, always needed
+
+                        suffix = '.' + lpath.name.split('.', 1)[1]  # in case of .dll.a
+                        for h in hl:
+                            hlfn = lpath.parent / (lpath.name.split('.', 1)[0] + h + suffix)
+                            if hlfn.is_file():
+                                link_args.append(str(hlfn))
+                        # HDF5 C libs are required by other HDF5 languages
+                        link_args.append(larg)
+                    else:
+                        link_args.append(larg)
+
+                self.link_args = link_args
+                self.version = pkgdep.get_version()
+                self.is_found = True
+                self.pcdep = pkgdep
+                return
+
+        if DependencyMethods.AUTO in methods:
+            wrappers = {'c': 'h5cc', 'cpp': 'h5c++', 'fortran': 'h5fc'}
+            comp_args = []
+            link_args = []
+            # have to always do C as well as desired language
+            for lang in set([language, 'c']):
+                prog = ExternalProgram(wrappers[lang], silent=True)
+                if not prog.found():
+                    return
+                cmd = prog.get_command() + ['-show']
+                p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, timeout=15)
+                if p.returncode != 0:
+                    mlog.debug('Command', mlog.bold(cmd), 'failed to run:')
+                    mlog.debug(mlog.bold('Standard output\n'), p.stdout)
+                    mlog.debug(mlog.bold('Standard error\n'), p.stderr)
+                    return
+                args = split_args(p.stdout)
+                for arg in args[1:]:
+                    if arg.startswith(('-I', '-f', '-D')) or arg == '-pthread':
+                        comp_args.append(arg)
+                    elif arg.startswith(('-L', '-l', '-Wl')):
+                        link_args.append(arg)
+                    elif Path(arg).is_file():
+                        link_args.append(arg)
+            self.compile_args = comp_args
+            self.link_args = link_args
+            self.is_found = True
+            return
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.AUTO, DependencyMethods.PKGCONFIG]
diff --git a/meson/mesonbuild/dependencies/misc.py b/meson/mesonbuild/dependencies/misc.py
new file mode 100644
index 000000000..47694aff5
--- /dev/null
+++ b/meson/mesonbuild/dependencies/misc.py
@@ -0,0 +1,501 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for miscellaneous external dependencies.
+
+from pathlib import Path
+import functools
+import re
+import sysconfig
+import typing as T
+
+from .. import mlog
+from .. import mesonlib
+from ..environment import detect_cpu_family
+
+from .base import (
+    DependencyException, DependencyMethods, ExternalDependency,
+    PkgConfigDependency, CMakeDependency, ConfigToolDependency,
+    factory_methods, DependencyFactory,
+)
+
+if T.TYPE_CHECKING:
+    from ..environment import Environment, MachineChoice
+    from .base import DependencyType  # noqa: F401
+
+
+@factory_methods({DependencyMethods.PKGCONFIG, DependencyMethods.CMAKE})
+def netcdf_factory(env: 'Environment', for_machine: 'MachineChoice',
+                   kwargs: T.Dict[str, T.Any], methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    language = kwargs.get('language', 'c')
+    if language not in ('c', 'cpp', 'fortran'):
+        raise DependencyException('Language {} is not supported with NetCDF.'.format(language))
+
+    candidates = []  # type: T.List['DependencyType']
+
+    if DependencyMethods.PKGCONFIG in methods:
+        if language == 'fortran':
+            pkg = 'netcdf-fortran'
+        else:
+            pkg = 'netcdf'
+
+        candidates.append(functools.partial(PkgConfigDependency, pkg, env, kwargs, language=language))
+
+    if DependencyMethods.CMAKE in methods:
+        candidates.append(functools.partial(CMakeDependency, 'NetCDF', env, kwargs, language=language))
+
+    return candidates
+
+
+class OpenMPDependency(ExternalDependency):
+    # Map date of specification release (which is the macro value) to a version.
+    VERSIONS = {
+        '201811': '5.0',
+        '201611': '5.0-revision1',  # This is supported by ICC 19.x
+        '201511': '4.5',
+        '201307': '4.0',
+        '201107': '3.1',
+        '200805': '3.0',
+        '200505': '2.5',
+        '200203': '2.0',
+        '199810': '1.0',
+    }
+
+    def __init__(self, environment, kwargs):
+        language = kwargs.get('language')
+        super().__init__('openmp', environment, kwargs, language=language)
+        self.is_found = False
+        if self.clib_compiler.get_id() == 'pgi':
+            # through at least PGI 19.4, there is no macro defined for OpenMP, but OpenMP 3.1 is supported.
+            self.version = '3.1'
+            self.is_found = True
+            self.compile_args = self.link_args = self.clib_compiler.openmp_flags()
+            return
+        try:
+            openmp_date = self.clib_compiler.get_define(
+                '_OPENMP', '', self.env, self.clib_compiler.openmp_flags(), [self], disable_cache=True)[0]
+        except mesonlib.EnvironmentException as e:
+            mlog.debug('OpenMP support not available in the compiler')
+            mlog.debug(e)
+            openmp_date = None
+
+        if openmp_date:
+            self.version = self.VERSIONS[openmp_date]
+            # Flang has omp_lib.h
+            header_names = ('omp.h', 'omp_lib.h')
+            for name in header_names:
+                if self.clib_compiler.has_header(name, '', self.env, dependencies=[self], disable_cache=True)[0]:
+                    self.is_found = True
+                    self.compile_args = self.clib_compiler.openmp_flags()
+                    self.link_args = self.clib_compiler.openmp_link_flags()
+                    break
+            if not self.is_found:
+                mlog.log(mlog.yellow('WARNING:'), 'OpenMP found but omp.h missing.')
+
+
+class ThreadDependency(ExternalDependency):
+    def __init__(self, name: str, environment, kwargs):
+        super().__init__(name, environment, kwargs)
+        self.is_found = True
+        # Happens if you are using a language with threads
+        # concept without C, such as plain Cuda.
+        if self.clib_compiler is None:
+            self.compile_args = []
+            self.link_args = []
+        else:
+            self.compile_args = self.clib_compiler.thread_flags(environment)
+            self.link_args = self.clib_compiler.thread_link_flags(environment)
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.AUTO, DependencyMethods.CMAKE]
+
+
+class BlocksDependency(ExternalDependency):
+    def __init__(self, environment, kwargs):
+        super().__init__('blocks', environment, kwargs)
+        self.name = 'blocks'
+        self.is_found = False
+
+        if self.env.machines[self.for_machine].is_darwin():
+            self.compile_args = []
+            self.link_args = []
+        else:
+            self.compile_args = ['-fblocks']
+            self.link_args = ['-lBlocksRuntime']
+
+            if not self.clib_compiler.has_header('Block.h', '', environment, disable_cache=True) or \
+               not self.clib_compiler.find_library('BlocksRuntime', environment, []):
+                mlog.log(mlog.red('ERROR:'), 'BlocksRuntime not found.')
+                return
+
+        source = '''
+            int main(int argc, char **argv)
+            {
+                int (^callback)(void) = ^ int (void) { return 0; };
+                return callback();
+            }'''
+
+        with self.clib_compiler.compile(source, extra_args=self.compile_args + self.link_args) as p:
+            if p.returncode != 0:
+                mlog.log(mlog.red('ERROR:'), 'Compiler does not support blocks extension.')
+                return
+
+            self.is_found = True
+
+
+class Python3DependencySystem(ExternalDependency):
+    def __init__(self, name, environment, kwargs):
+        super().__init__(name, environment, kwargs)
+
+        if not environment.machines.matches_build_machine(self.for_machine):
+            return
+        if not environment.machines[self.for_machine].is_windows():
+            return
+
+        self.name = 'python3'
+        self.static = kwargs.get('static', False)
+        # We can only be sure that it is Python 3 at this point
+        self.version = '3'
+        self._find_libpy3_windows(environment)
+
+    @staticmethod
+    def get_windows_python_arch():
+        pyplat = sysconfig.get_platform()
+        if pyplat == 'mingw':
+            pycc = sysconfig.get_config_var('CC')
+            if pycc.startswith('x86_64'):
+                return '64'
+            elif pycc.startswith(('i686', 'i386')):
+                return '32'
+            else:
+                mlog.log('MinGW Python built with unknown CC {!r}, please file'
+                         'a bug'.format(pycc))
+                return None
+        elif pyplat == 'win32':
+            return '32'
+        elif pyplat in ('win64', 'win-amd64'):
+            return '64'
+        mlog.log('Unknown Windows Python platform {!r}'.format(pyplat))
+        return None
+
+    def get_windows_link_args(self):
+        pyplat = sysconfig.get_platform()
+        if pyplat.startswith('win'):
+            vernum = sysconfig.get_config_var('py_version_nodot')
+            if self.static:
+                libpath = Path('libs') / 'libpython{}.a'.format(vernum)
+            else:
+                comp = self.get_compiler()
+                if comp.id == "gcc":
+                    libpath = 'python{}.dll'.format(vernum)
+                else:
+                    libpath = Path('libs') / 'python{}.lib'.format(vernum)
+            lib = Path(sysconfig.get_config_var('base')) / libpath
+        elif pyplat == 'mingw':
+            if self.static:
+                libname = sysconfig.get_config_var('LIBRARY')
+            else:
+                libname = sysconfig.get_config_var('LDLIBRARY')
+            lib = Path(sysconfig.get_config_var('LIBDIR')) / libname
+        if not lib.exists():
+            mlog.log('Could not find Python3 library {!r}'.format(str(lib)))
+            return None
+        return [str(lib)]
+
+    def _find_libpy3_windows(self, env):
+        '''
+        Find python3 libraries on Windows and also verify that the arch matches
+        what we are building for.
+        '''
+        pyarch = self.get_windows_python_arch()
+        if pyarch is None:
+            self.is_found = False
+            return
+        arch = detect_cpu_family(env.coredata.compilers.host)
+        if arch == 'x86':
+            arch = '32'
+        elif arch == 'x86_64':
+            arch = '64'
+        else:
+            # We can't cross-compile Python 3 dependencies on Windows yet
+            mlog.log('Unknown architecture {!r} for'.format(arch),
+                     mlog.bold(self.name))
+            self.is_found = False
+            return
+        # Pyarch ends in '32' or '64'
+        if arch != pyarch:
+            mlog.log('Need', mlog.bold(self.name), 'for {}-bit, but '
+                     'found {}-bit'.format(arch, pyarch))
+            self.is_found = False
+            return
+        # This can fail if the library is not found
+        largs = self.get_windows_link_args()
+        if largs is None:
+            self.is_found = False
+            return
+        self.link_args = largs
+        # Compile args
+        inc = sysconfig.get_path('include')
+        platinc = sysconfig.get_path('platinclude')
+        self.compile_args = ['-I' + inc]
+        if inc != platinc:
+            self.compile_args.append('-I' + platinc)
+        self.version = sysconfig.get_config_var('py_version')
+        self.is_found = True
+
+    @staticmethod
+    def get_methods():
+        if mesonlib.is_windows():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSCONFIG]
+        elif mesonlib.is_osx():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.EXTRAFRAMEWORK]
+        else:
+            return [DependencyMethods.PKGCONFIG]
+
+    def log_tried(self):
+        return 'sysconfig'
+
+class PcapDependencyConfigTool(ConfigToolDependency):
+
+    tools = ['pcap-config']
+    tool_name = 'pcap-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--libs'], 'link_args')
+        self.version = self.get_pcap_lib_version()
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL]
+
+    def get_pcap_lib_version(self):
+        # Since we seem to need to run a program to discover the pcap version,
+        # we can't do that when cross-compiling
+        # FIXME: this should be handled if we have an exe_wrapper
+        if not self.env.machines.matches_build_machine(self.for_machine):
+            return None
+
+        v = self.clib_compiler.get_return_value('pcap_lib_version', 'string',
+                                                '#include ', self.env, [], [self])
+        v = re.sub(r'libpcap version ', '', v)
+        v = re.sub(r' -- Apple version.*$', '', v)
+        return v
+
+
+class CupsDependencyConfigTool(ConfigToolDependency):
+
+    tools = ['cups-config']
+    tool_name = 'cups-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--ldflags', '--libs'], 'link_args')
+
+    @staticmethod
+    def get_methods():
+        if mesonlib.is_osx():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.EXTRAFRAMEWORK, DependencyMethods.CMAKE]
+        else:
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.CMAKE]
+
+
+class LibWmfDependencyConfigTool(ConfigToolDependency):
+
+    tools = ['libwmf-config']
+    tool_name = 'libwmf-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--libs'], 'link_args')
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL]
+
+
+class LibGCryptDependencyConfigTool(ConfigToolDependency):
+
+    tools = ['libgcrypt-config']
+    tool_name = 'libgcrypt-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--libs'], 'link_args')
+        self.version = self.get_config_value(['--version'], 'version')[0]
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL]
+
+
+class GpgmeDependencyConfigTool(ConfigToolDependency):
+
+    tools = ['gpgme-config']
+    tool_name = 'gpg-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--libs'], 'link_args')
+        self.version = self.get_config_value(['--version'], 'version')[0]
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL]
+
+
+class ShadercDependency(ExternalDependency):
+
+    def __init__(self, environment, kwargs):
+        super().__init__('shaderc', environment, kwargs)
+
+        static_lib = 'shaderc_combined'
+        shared_lib = 'shaderc_shared'
+
+        libs = [shared_lib, static_lib]
+        if self.static:
+            libs.reverse()
+
+        cc = self.get_compiler()
+
+        for lib in libs:
+            self.link_args = cc.find_library(lib, environment, [])
+            if self.link_args is not None:
+                self.is_found = True
+
+                if self.static and lib != static_lib:
+                    mlog.warning('Static library {!r} not found for dependency {!r}, may '
+                                 'not be statically linked'.format(static_lib, self.name))
+
+                break
+
+    def log_tried(self):
+        return 'system'
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.SYSTEM, DependencyMethods.PKGCONFIG]
+
+
+@factory_methods({DependencyMethods.PKGCONFIG})
+def curses_factory(env: 'Environment', for_machine: 'MachineChoice',
+                   kwargs: T.Dict[str, T.Any], methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    candidates = []  # type: T.List['DependencyType']
+
+    if DependencyMethods.PKGCONFIG in methods:
+        pkgconfig_files = ['ncurses', 'ncursesw']
+        for pkg in pkgconfig_files:
+            candidates.append(functools.partial(PkgConfigDependency, pkg, env, kwargs))
+
+    return candidates
+
+
+@factory_methods({DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM})
+def shaderc_factory(env: 'Environment', for_machine: 'MachineChoice',
+                    kwargs: T.Dict[str, T.Any], methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    """Custom DependencyFactory for ShaderC.
+
+    ShaderC's odd you get three different libraries from the same build
+    thing are just easier to represent as a separate function than
+    twisting DependencyFactory even more.
+    """
+    candidates = []  # type: T.List['DependencyType']
+
+    if DependencyMethods.PKGCONFIG in methods:
+        # ShaderC packages their shared and static libs together
+        # and provides different pkg-config files for each one. We
+        # smooth over this difference by handling the static
+        # keyword before handing off to the pkg-config handler.
+        shared_libs = ['shaderc']
+        static_libs = ['shaderc_combined', 'shaderc_static']
+
+        if kwargs.get('static', False):
+            c = [functools.partial(PkgConfigDependency, name, env, kwargs)
+                 for name in static_libs + shared_libs]
+        else:
+            c = [functools.partial(PkgConfigDependency, name, env, kwargs)
+                 for name in shared_libs + static_libs]
+        candidates.extend(c)
+
+    if DependencyMethods.SYSTEM in methods:
+        candidates.append(functools.partial(ShadercDependency, env, kwargs))
+
+    return candidates
+
+
+cups_factory = DependencyFactory(
+    'cups',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.EXTRAFRAMEWORK, DependencyMethods.CMAKE],
+    configtool_class=CupsDependencyConfigTool,
+    cmake_name='Cups',
+)
+
+gpgme_factory = DependencyFactory(
+    'gpgme',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL],
+    configtool_class=GpgmeDependencyConfigTool,
+)
+
+libgcrypt_factory = DependencyFactory(
+    'libgcrypt',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL],
+    configtool_class=LibGCryptDependencyConfigTool,
+)
+
+libwmf_factory = DependencyFactory(
+    'libwmf',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL],
+    configtool_class=LibWmfDependencyConfigTool,
+)
+
+pcap_factory = DependencyFactory(
+    'pcap',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL],
+    configtool_class=PcapDependencyConfigTool,
+    pkgconfig_name='libpcap',
+)
+
+python3_factory = DependencyFactory(
+    'python3',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM, DependencyMethods.EXTRAFRAMEWORK],
+    system_class=Python3DependencySystem,
+    # There is no version number in the macOS version number
+    framework_name='Python',
+    # There is a python in /System/Library/Frameworks, but thats python 2.x,
+    # Python 3 will always be in /Library
+    extra_kwargs={'paths': ['/Library/Frameworks']},
+)
+
+threads_factory = DependencyFactory(
+    'threads',
+    [DependencyMethods.SYSTEM, DependencyMethods.CMAKE],
+    cmake_name='Threads',
+    system_class=ThreadDependency,
+)
diff --git a/meson/mesonbuild/dependencies/mpi.py b/meson/mesonbuild/dependencies/mpi.py
new file mode 100644
index 000000000..a20fb9cab
--- /dev/null
+++ b/meson/mesonbuild/dependencies/mpi.py
@@ -0,0 +1,231 @@
+# Copyright 2013-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import typing as T
+import os
+import re
+
+from .base import (DependencyMethods, PkgConfigDependency, factory_methods,
+                   ConfigToolDependency, detect_compiler, ExternalDependency)
+from ..environment import detect_cpu_family
+
+if T.TYPE_CHECKING:
+    from .base import DependencyType
+    from ..compilers import Compiler
+    from ..environment import Environment, MachineChoice
+
+
+@factory_methods({DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.SYSTEM})
+def mpi_factory(env: 'Environment', for_machine: 'MachineChoice',
+                kwargs: T.Dict[str, T.Any], methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    language = kwargs.get('language', 'c')
+    if language not in {'c', 'cpp', 'fortran'}:
+        # OpenMPI doesn't work without any other languages
+        return []
+
+    candidates = []
+    compiler = detect_compiler('mpi', env, for_machine, language)
+    compiler_is_intel = compiler.get_id() in {'intel', 'intel-cl'}
+
+    # Only OpenMPI has pkg-config, and it doesn't work with the intel compilers
+    if DependencyMethods.PKGCONFIG in methods and not compiler_is_intel:
+        pkg_name = None
+        if language == 'c':
+            pkg_name = 'ompi-c'
+        elif language == 'cpp':
+            pkg_name = 'ompi-cxx'
+        elif language == 'fortran':
+            pkg_name = 'ompi-fort'
+        candidates.append(functools.partial(
+            PkgConfigDependency, pkg_name, env, kwargs, language=language))
+
+    if DependencyMethods.CONFIG_TOOL in methods:
+        nwargs = kwargs.copy()
+
+        if compiler_is_intel:
+            if env.machines[for_machine].is_windows():
+                nwargs['version_arg'] = '-v'
+                nwargs['returncode_value'] = 3
+
+            if language == 'c':
+                tool_names = [os.environ.get('I_MPI_CC'), 'mpiicc']
+            elif language == 'cpp':
+                tool_names = [os.environ.get('I_MPI_CXX'), 'mpiicpc']
+            elif language == 'fortran':
+                tool_names = [os.environ.get('I_MPI_F90'), 'mpiifort']
+
+            cls = IntelMPIConfigToolDependency  # type: T.Type[ConfigToolDependency]
+        else: # OpenMPI, which doesn't work with intel
+            #
+            # We try the environment variables for the tools first, but then
+            # fall back to the hardcoded names
+            if language == 'c':
+                tool_names = [os.environ.get('MPICC'), 'mpicc']
+            elif language == 'cpp':
+                tool_names = [os.environ.get('MPICXX'), 'mpic++', 'mpicxx', 'mpiCC']
+            elif language == 'fortran':
+                tool_names = [os.environ.get(e) for e in ['MPIFC', 'MPIF90', 'MPIF77']]
+                tool_names.extend(['mpifort', 'mpif90', 'mpif77'])
+
+            cls = OpenMPIConfigToolDependency
+
+        tool_names = [t for t in tool_names if t]  # remove empty environment variables
+        assert tool_names
+
+        nwargs['tools'] = tool_names
+        candidates.append(functools.partial(
+            cls, tool_names[0], env, nwargs, language=language))
+
+    if DependencyMethods.SYSTEM in methods:
+        candidates.append(functools.partial(
+            MSMPIDependency, 'msmpi', env, kwargs, language=language))
+
+    return candidates
+
+
+class _MPIConfigToolDependency(ConfigToolDependency):
+
+    def _filter_compile_args(self, args: T.Sequence[str]) -> T.List[str]:
+        """
+        MPI wrappers return a bunch of garbage args.
+        Drop -O2 and everything that is not needed.
+        """
+        result = []
+        multi_args = ('-I', )
+        if self.language == 'fortran':
+            fc = self.env.coredata.compilers[self.for_machine]['fortran']
+            multi_args += fc.get_module_incdir_args()
+
+        include_next = False
+        for f in args:
+            if f.startswith(('-D', '-f') + multi_args) or f == '-pthread' \
+                    or (f.startswith('-W') and f != '-Wall' and not f.startswith('-Werror')):
+                result.append(f)
+                if f in multi_args:
+                    # Path is a separate argument.
+                    include_next = True
+            elif include_next:
+                include_next = False
+                result.append(f)
+        return result
+
+    def _filter_link_args(self, args: T.Sequence[str]) -> T.List[str]:
+        """
+        MPI wrappers return a bunch of garbage args.
+        Drop -O2 and everything that is not needed.
+        """
+        result = []
+        include_next = False
+        for f in args:
+            if self._is_link_arg(f):
+                result.append(f)
+                if f in ('-L', '-Xlinker'):
+                    include_next = True
+            elif include_next:
+                include_next = False
+                result.append(f)
+        return result
+
+    def _is_link_arg(self, f: str) -> bool:
+        if self.clib_compiler.id == 'intel-cl':
+            return f == '/link' or f.startswith('/LIBPATH') or f.endswith('.lib')   # always .lib whether static or dynamic
+        else:
+            return (f.startswith(('-L', '-l', '-Xlinker')) or
+                    f == '-pthread' or
+                    (f.startswith('-W') and f != '-Wall' and not f.startswith('-Werror')))
+
+
+class IntelMPIConfigToolDependency(_MPIConfigToolDependency):
+
+    """Wrapper around Intel's mpiicc and friends."""
+
+    version_arg = '-v'  # --version is not the same as -v
+
+    def __init__(self, name: str, env: 'Environment', kwargs: T.Dict[str, T.Any],
+                 language: T.Optional[str] = None):
+        super().__init__(name, env, kwargs, language=language)
+        if not self.is_found:
+            return
+
+        args = self.get_config_value(['-show'], 'link and compile args')
+        self.compile_args = self._filter_compile_args(args)
+        self.link_args = self._filter_link_args(args)
+
+    def _sanitize_version(self, out: str) -> str:
+        v = re.search(r'(\d{4}) Update (\d)', out)
+        if v:
+            return '{}.{}'.format(v.group(1), v.group(2))
+        return out
+
+
+class OpenMPIConfigToolDependency(_MPIConfigToolDependency):
+
+    """Wrapper around OpenMPI mpicc and friends."""
+
+    version_arg = '--showme:version'
+
+    def __init__(self, name: str, env: 'Environment', kwargs: T.Dict[str, T.Any],
+                 language: T.Optional[str] = None):
+        super().__init__(name, env, kwargs, language=language)
+        if not self.is_found:
+            return
+
+        c_args = self.get_config_value(['--showme:compile'], 'compile_args')
+        self.compile_args = self._filter_compile_args(c_args)
+
+        l_args = self.get_config_value(['--showme:link'], 'link_args')
+        self.link_args = self._filter_link_args(l_args)
+
+    def _sanitize_version(self, out: str) -> str:
+        v = re.search(r'\d+.\d+.\d+', out)
+        if v:
+            return v.group(0)
+        return out
+
+
+class MSMPIDependency(ExternalDependency):
+
+    """The Microsoft MPI."""
+
+    def __init__(self, name: str, env: 'Environment', kwargs: T.Dict[str, T.Any],
+                 language: T.Optional[str] = None):
+        super().__init__(name, env, kwargs, language=language)
+        # MSMPI only supports the C API
+        if language not in {'c', 'fortran', None}:
+            self.is_found = False
+            return
+        # MSMPI is only for windows, obviously
+        if not self.env.machines[self.for_machine].is_windows():
+            return
+
+        incdir = os.environ.get('MSMPI_INC')
+        arch = detect_cpu_family(self.env.coredata.compilers.host)
+        libdir = None
+        if arch == 'x86':
+            libdir = os.environ.get('MSMPI_LIB32')
+            post = 'x86'
+        elif arch == 'x86_64':
+            libdir = os.environ.get('MSMPI_LIB64')
+            post = 'x64'
+
+        if libdir is None or incdir is None:
+            self.is_found = False
+            return
+
+        self.is_found = True
+        self.link_args = ['-l' + os.path.join(libdir, 'msmpi')]
+        self.compile_args = ['-I' + incdir, '-I' + os.path.join(incdir, post)]
+        if self.language == 'fortran':
+            self.link_args.append('-l' + os.path.join(libdir, 'msmpifec'))
diff --git a/meson/mesonbuild/dependencies/platform.py b/meson/mesonbuild/dependencies/platform.py
new file mode 100644
index 000000000..6a32e36a1
--- /dev/null
+++ b/meson/mesonbuild/dependencies/platform.py
@@ -0,0 +1,54 @@
+# Copyright 2013-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies that are
+# platform-specific (generally speaking).
+
+from .base import ExternalDependency, DependencyException
+from ..mesonlib import MesonException
+
+class AppleFrameworks(ExternalDependency):
+    def __init__(self, env, kwargs):
+        super().__init__('appleframeworks', env, kwargs)
+        modules = kwargs.get('modules', [])
+        if isinstance(modules, str):
+            modules = [modules]
+        if not modules:
+            raise DependencyException("AppleFrameworks dependency requires at least one module.")
+        self.frameworks = modules
+        if not self.clib_compiler:
+            raise DependencyException('No C-like compilers are available, cannot find the framework')
+        self.is_found = True
+        for f in self.frameworks:
+            try:
+                args = self.clib_compiler.find_framework(f, env, [])
+            except MesonException as e:
+                if 'non-clang' in str(e):
+                    self.is_found = False
+                    self.link_args = []
+                    self.compile_args = []
+                    return
+                raise
+
+            if args is not None:
+                # No compile args are needed for system frameworks
+                self.link_args += args
+            else:
+                self.is_found = False
+
+    def log_info(self):
+        return ', '.join(self.frameworks)
+
+    def log_tried(self):
+        return 'framework'
diff --git a/meson/mesonbuild/dependencies/scalapack.py b/meson/mesonbuild/dependencies/scalapack.py
new file mode 100644
index 000000000..8774746fc
--- /dev/null
+++ b/meson/mesonbuild/dependencies/scalapack.py
@@ -0,0 +1,149 @@
+# Copyright 2013-2020 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+import functools
+import os
+import typing as T
+
+from .base import CMakeDependency, DependencyMethods, PkgConfigDependency
+from .base import factory_methods, DependencyException
+
+if T.TYPE_CHECKING:
+    from ..environment import Environment, MachineChoice
+    from .base import DependencyType
+
+
+@factory_methods({DependencyMethods.PKGCONFIG, DependencyMethods.CMAKE})
+def scalapack_factory(env: 'Environment', for_machine: 'MachineChoice',
+                      kwargs: T.Dict[str, T.Any],
+                      methods: T.List[DependencyMethods]) -> T.List['DependencyType']:
+    candidates = []
+
+    if DependencyMethods.PKGCONFIG in methods:
+        mkl = 'mkl-static-lp64-iomp' if kwargs.get('static', False) else 'mkl-dynamic-lp64-iomp'
+        candidates.append(functools.partial(
+            MKLPkgConfigDependency, mkl, env, kwargs))
+
+        for pkg in ['scalapack-openmpi', 'scalapack']:
+            candidates.append(functools.partial(
+                PkgConfigDependency, pkg, env, kwargs))
+
+    if DependencyMethods.CMAKE in methods:
+        candidates.append(functools.partial(
+            CMakeDependency, 'Scalapack', env, kwargs))
+
+    return candidates
+
+
+class MKLPkgConfigDependency(PkgConfigDependency):
+
+    """PkgConfigDependency for Intel MKL.
+
+    MKL's pkg-config is pretty much borked in every way. We need to apply a
+    bunch of fixups to make it work correctly.
+    """
+
+    def __init__(self, name: str, env: 'Environment', kwargs: T.Dict[str, T.Any],
+                 language: T.Optional[str] = None):
+        _m = os.environ.get('MKLROOT')
+        self.__mklroot = Path(_m).resolve() if _m else None
+
+        # We need to call down into the normal super() method even if we don't
+        # find mklroot, otherwise we won't have all of the instance variables
+        # initialized that meson expects.
+        super().__init__(name, env, kwargs, language=language)
+
+        # Doesn't work with gcc on windows, but does on Linux
+        if (not self.__mklroot or (env.machines[self.for_machine].is_windows()
+                                   and self.clib_compiler.id == 'gcc')):
+            self.is_found = False
+
+        # This can happen either because we're using GCC, we couldn't find the
+        # mklroot, or the pkg-config couldn't find it.
+        if not self.is_found:
+            return
+
+        assert self.version != '', 'This should not happen if we didn\'t return above'
+
+        if self.version == 'unknown':
+            # At least by 2020 the version is in the pkg-config, just not with
+            # the correct name
+            v = self.get_variable(pkgconfig='Version', default_value='')
+
+            if not v and self.__mklroot:
+                try:
+                    v = (
+                        self.__mklroot.as_posix()
+                        .split('compilers_and_libraries_')[1]
+                        .split('/', 1)[0]
+                    )
+                except IndexError:
+                    pass
+
+            if v:
+                self.version = v
+
+    def _set_libs(self):
+        super()._set_libs()
+
+        if self.env.machines[self.for_machine].is_windows():
+            suffix = '.lib'
+        elif self.static:
+            suffix = '.a'
+        else:
+            suffix = ''
+        libdir = self.__mklroot / 'lib/intel64'
+
+        if self.clib_compiler.id == 'gcc':
+            for i, a in enumerate(self.link_args):
+                # only replace in filename, not in directory names
+                parts = list(os.path.split(a))
+                if 'mkl_intel_lp64' in parts[-1]:
+                    parts[-1] = parts[-1].replace('intel', 'gf')
+                    self.link_args[i] = '/' + os.path.join(*parts)
+        # MKL pkg-config omits scalapack
+        # be sure "-L" and "-Wl" are first if present
+        i = 0
+        for j, a in enumerate(self.link_args):
+            if a.startswith(('-L', '-Wl')):
+                i = j + 1
+            elif j > 3:
+                break
+        if self.env.machines[self.for_machine].is_windows() or self.static:
+            self.link_args.insert(
+                i, str(libdir / ('mkl_scalapack_lp64' + suffix))
+            )
+            self.link_args.insert(
+                i + 1, str(libdir / ('mkl_blacs_intelmpi_lp64' + suffix))
+            )
+        else:
+            self.link_args.insert(i, '-lmkl_scalapack_lp64')
+            self.link_args.insert(i + 1, '-lmkl_blacs_intelmpi_lp64')
+
+    def _set_cargs(self):
+        env = None
+        if self.language == 'fortran':
+            # gfortran doesn't appear to look in system paths for INCLUDE files,
+            # so don't allow pkg-config to suppress -I flags for system paths
+            env = os.environ.copy()
+            env['PKG_CONFIG_ALLOW_SYSTEM_CFLAGS'] = '1'
+        ret, out, err = self._call_pkgbin([
+            '--cflags', self.name,
+            '--define-variable=prefix=' + self.__mklroot.as_posix()],
+            env=env)
+        if ret != 0:
+            raise DependencyException('Could not generate cargs for %s:\n%s\n' %
+                                      (self.name, err))
+        self.compile_args = self._convert_mingw_paths(self._split_args(out))
diff --git a/meson/mesonbuild/dependencies/ui.py b/meson/mesonbuild/dependencies/ui.py
new file mode 100644
index 000000000..95dfe2b93
--- /dev/null
+++ b/meson/mesonbuild/dependencies/ui.py
@@ -0,0 +1,684 @@
+# Copyright 2013-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies that
+# are UI-related.
+import os
+import re
+import subprocess
+import typing as T
+from collections import OrderedDict
+
+from .. import mlog
+from .. import mesonlib
+from ..mesonlib import (
+    MesonException, Popen_safe, extract_as_list, version_compare_many
+)
+from ..environment import detect_cpu_family
+
+from .base import DependencyException, DependencyMethods
+from .base import ExternalDependency, NonExistingExternalProgram
+from .base import ExtraFrameworkDependency, PkgConfigDependency
+from .base import ConfigToolDependency, DependencyFactory
+from .base import find_external_program
+
+if T.TYPE_CHECKING:
+    from ..environment import Environment
+    from .base import ExternalProgram
+
+
+class GLDependencySystem(ExternalDependency):
+    def __init__(self, name: str, environment, kwargs):
+        super().__init__(name, environment, kwargs)
+
+        if self.env.machines[self.for_machine].is_darwin():
+            self.is_found = True
+            # FIXME: Use AppleFrameworks dependency
+            self.link_args = ['-framework', 'OpenGL']
+            # FIXME: Detect version using self.clib_compiler
+            return
+        if self.env.machines[self.for_machine].is_windows():
+            self.is_found = True
+            # FIXME: Use self.clib_compiler.find_library()
+            self.link_args = ['-lopengl32']
+            # FIXME: Detect version using self.clib_compiler
+            return
+
+    @staticmethod
+    def get_methods():
+        if mesonlib.is_osx() or mesonlib.is_windows():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM]
+        else:
+            return [DependencyMethods.PKGCONFIG]
+
+    def log_tried(self):
+        return 'system'
+
+class GnuStepDependency(ConfigToolDependency):
+
+    tools = ['gnustep-config']
+    tool_name = 'gnustep-config'
+
+    def __init__(self, environment, kwargs):
+        super().__init__('gnustep', environment, kwargs, language='objc')
+        if not self.is_found:
+            return
+        self.modules = kwargs.get('modules', [])
+        self.compile_args = self.filter_args(
+            self.get_config_value(['--objc-flags'], 'compile_args'))
+        self.link_args = self.weird_filter(self.get_config_value(
+            ['--gui-libs' if 'gui' in self.modules else '--base-libs'],
+            'link_args'))
+
+    def find_config(self, versions=None, returncode: int = 0):
+        tool = [self.tools[0]]
+        try:
+            p, out = Popen_safe(tool + ['--help'])[:2]
+        except (FileNotFoundError, PermissionError):
+            return (None, None)
+        if p.returncode != returncode:
+            return (None, None)
+        self.config = tool
+        found_version = self.detect_version()
+        if versions and not version_compare_many(found_version, versions)[0]:
+            return (None, found_version)
+
+        return (tool, found_version)
+
+    def weird_filter(self, elems):
+        """When building packages, the output of the enclosing Make is
+        sometimes mixed among the subprocess output. I have no idea why. As a
+        hack filter out everything that is not a flag.
+        """
+        return [e for e in elems if e.startswith('-')]
+
+    def filter_args(self, args):
+        """gnustep-config returns a bunch of garbage args such as -O2 and so
+        on. Drop everything that is not needed.
+        """
+        result = []
+        for f in args:
+            if f.startswith('-D') \
+                    or f.startswith('-f') \
+                    or f.startswith('-I') \
+                    or f == '-pthread' \
+                    or (f.startswith('-W') and not f == '-Wall'):
+                result.append(f)
+        return result
+
+    def detect_version(self):
+        gmake = self.get_config_value(['--variable=GNUMAKE'], 'variable')[0]
+        makefile_dir = self.get_config_value(['--variable=GNUSTEP_MAKEFILES'], 'variable')[0]
+        # This Makefile has the GNUStep version set
+        base_make = os.path.join(makefile_dir, 'Additional', 'base.make')
+        # Print the Makefile variable passed as the argument. For instance, if
+        # you run the make target `print-SOME_VARIABLE`, this will print the
+        # value of the variable `SOME_VARIABLE`.
+        printver = "print-%:\n\t@echo '$($*)'"
+        env = os.environ.copy()
+        # See base.make to understand why this is set
+        env['FOUNDATION_LIB'] = 'gnu'
+        p, o, e = Popen_safe([gmake, '-f', '-', '-f', base_make,
+                              'print-GNUSTEP_BASE_VERSION'],
+                             env=env, write=printver, stdin=subprocess.PIPE)
+        version = o.strip()
+        if not version:
+            mlog.debug("Couldn't detect GNUStep version, falling back to '1'")
+            # Fallback to setting some 1.x version
+            version = '1'
+        return version
+
+
+def _qt_get_private_includes(mod_inc_dir, module, mod_version):
+    # usually Qt5 puts private headers in /QT_INSTALL_HEADERS/module/VERSION/module/private
+    # except for at least QtWebkit and Enginio where the module version doesn't match Qt version
+    # as an example with Qt 5.10.1 on linux you would get:
+    # /usr/include/qt5/QtCore/5.10.1/QtCore/private/
+    # /usr/include/qt5/QtWidgets/5.10.1/QtWidgets/private/
+    # /usr/include/qt5/QtWebKit/5.212.0/QtWebKit/private/
+
+    # on Qt4 when available private folder is directly in module folder
+    # like /usr/include/QtCore/private/
+    if int(mod_version.split('.')[0]) < 5:
+        return tuple()
+
+    private_dir = os.path.join(mod_inc_dir, mod_version)
+    # fallback, let's try to find a directory with the latest version
+    if not os.path.exists(private_dir):
+        dirs = [filename for filename in os.listdir(mod_inc_dir)
+                if os.path.isdir(os.path.join(mod_inc_dir, filename))]
+        dirs.sort(reverse=True)
+
+        for dirname in dirs:
+            if len(dirname.split('.')) == 3:
+                private_dir = dirname
+                break
+    return (private_dir,
+            os.path.join(private_dir, 'Qt' + module))
+
+class QtExtraFrameworkDependency(ExtraFrameworkDependency):
+    def __init__(self, name, env, kwargs, language: T.Optional[str] = None):
+        super().__init__(name, env, kwargs, language=language)
+        self.mod_name = name[2:]
+
+    def get_compile_args(self, with_private_headers=False, qt_version="0"):
+        if self.found():
+            mod_inc_dir = os.path.join(self.framework_path, 'Headers')
+            args = ['-I' + mod_inc_dir]
+            if with_private_headers:
+                args += ['-I' + dirname for dirname in _qt_get_private_includes(mod_inc_dir, self.mod_name, qt_version)]
+            return args
+        return []
+
+class QtBaseDependency(ExternalDependency):
+    def __init__(self, name, env, kwargs):
+        super().__init__(name, env, kwargs, language='cpp')
+        self.qtname = name.capitalize()
+        self.qtver = name[-1]
+        if self.qtver == "4":
+            self.qtpkgname = 'Qt'
+        else:
+            self.qtpkgname = self.qtname
+        self.root = '/usr'
+        self.bindir = None
+        self.private_headers = kwargs.get('private_headers', False)
+        mods = extract_as_list(kwargs, 'modules')
+        self.requested_modules = mods
+        if not mods:
+            raise DependencyException('No ' + self.qtname + '  modules specified.')
+        self.from_text = 'pkg-config'
+
+        self.qtmain = kwargs.get('main', False)
+        if not isinstance(self.qtmain, bool):
+            raise DependencyException('"main" argument must be a boolean')
+
+        # Keep track of the detection methods used, for logging purposes.
+        methods = []
+        # Prefer pkg-config, then fallback to `qmake -query`
+        if DependencyMethods.PKGCONFIG in self.methods:
+            mlog.debug('Trying to find qt with pkg-config')
+            self._pkgconfig_detect(mods, kwargs)
+            methods.append('pkgconfig')
+        if not self.is_found and DependencyMethods.QMAKE in self.methods:
+            mlog.debug('Trying to find qt with qmake')
+            self.from_text = self._qmake_detect(mods, kwargs)
+            methods.append('qmake-' + self.name)
+            methods.append('qmake')
+        if not self.is_found:
+            # Reset compile args and link args
+            self.compile_args = []
+            self.link_args = []
+            self.from_text = mlog.format_list(methods)
+            self.version = None
+
+    def compilers_detect(self, interp_obj):
+        "Detect Qt (4 or 5) moc, uic, rcc in the specified bindir or in PATH"
+        # It is important that this list does not change order as the order of
+        # the returned ExternalPrograms will change as well
+        bins = ['moc', 'uic', 'rcc', 'lrelease']
+        found = {b: NonExistingExternalProgram(name='{}-{}'.format(b, self.name))
+                 for b in bins}
+        wanted = '== {}'.format(self.version)
+
+        def gen_bins():
+            for b in bins:
+                if self.bindir:
+                    yield os.path.join(self.bindir, b), b, False
+                # prefer the -qt of the tool to the plain one, as we
+                # don't know what the unsuffixed one points to without calling it.
+                yield '{}-{}'.format(b, self.name), b, False
+                yield b, b, self.required if b != 'lrelease' else False
+
+        for b, name, required in gen_bins():
+            if found[name].found():
+                continue
+
+            if name == 'lrelease':
+                arg = ['-version']
+            elif mesonlib.version_compare(self.version, '>= 5'):
+                arg = ['--version']
+            else:
+                arg = ['-v']
+
+            # Ensure that the version of qt and each tool are the same
+            def get_version(p):
+                _, out, err = mesonlib.Popen_safe(p.get_command() + arg)
+                if b.startswith('lrelease') or not self.version.startswith('4'):
+                    care = out
+                else:
+                    care = err
+                return care.split(' ')[-1].replace(')', '')
+
+            p = interp_obj.find_program_impl([b], required=required,
+                                             version_func=get_version,
+                                             wanted=wanted).held_object
+            if p.found():
+                found[name] = p
+
+        return tuple([found[b] for b in bins])
+
+    def _pkgconfig_detect(self, mods, kwargs):
+        # We set the value of required to False so that we can try the
+        # qmake-based fallback if pkg-config fails.
+        kwargs['required'] = False
+        modules = OrderedDict()
+        for module in mods:
+            modules[module] = PkgConfigDependency(self.qtpkgname + module, self.env,
+                                                  kwargs, language=self.language)
+        for m_name, m in modules.items():
+            if not m.found():
+                self.is_found = False
+                return
+            self.compile_args += m.get_compile_args()
+            if self.private_headers:
+                qt_inc_dir = m.get_pkgconfig_variable('includedir', dict())
+                mod_private_dir = os.path.join(qt_inc_dir, 'Qt' + m_name)
+                if not os.path.isdir(mod_private_dir):
+                    # At least some versions of homebrew don't seem to set this
+                    # up correctly. /usr/local/opt/qt/include/Qt + m_name is a
+                    # symlink to /usr/local/opt/qt/include, but the pkg-config
+                    # file points to /usr/local/Cellar/qt/x.y.z/Headers/, and
+                    # the Qt + m_name there is not a symlink, it's a file
+                    mod_private_dir = qt_inc_dir
+                mod_private_inc = _qt_get_private_includes(mod_private_dir, m_name, m.version)
+                for directory in mod_private_inc:
+                    self.compile_args.append('-I' + directory)
+            self.link_args += m.get_link_args()
+
+        if 'Core' in modules:
+            core = modules['Core']
+        else:
+            corekwargs = {'required': 'false', 'silent': 'true'}
+            core = PkgConfigDependency(self.qtpkgname + 'Core', self.env, corekwargs,
+                                       language=self.language)
+            modules['Core'] = core
+
+        if self.env.machines[self.for_machine].is_windows() and self.qtmain:
+            # Check if we link with debug binaries
+            debug_lib_name = self.qtpkgname + 'Core' + self._get_modules_lib_suffix(True)
+            is_debug = False
+            for arg in core.get_link_args():
+                if arg == '-l%s' % debug_lib_name or arg.endswith('%s.lib' % debug_lib_name) or arg.endswith('%s.a' % debug_lib_name):
+                    is_debug = True
+                    break
+            libdir = core.get_pkgconfig_variable('libdir', {})
+            if not self._link_with_qtmain(is_debug, libdir):
+                self.is_found = False
+                return
+
+        self.is_found = True
+        self.version = m.version
+        self.pcdep = list(modules.values())
+        # Try to detect moc, uic, rcc
+        # Used by self.compilers_detect()
+        self.bindir = self.get_pkgconfig_host_bins(core)
+        if not self.bindir:
+            # If exec_prefix is not defined, the pkg-config file is broken
+            prefix = core.get_pkgconfig_variable('exec_prefix', {})
+            if prefix:
+                self.bindir = os.path.join(prefix, 'bin')
+
+    def search_qmake(self) -> T.Generator['ExternalProgram', None, None]:
+        for qmake in ('qmake-' + self.name, 'qmake'):
+            yield from find_external_program(self.env, self.for_machine, qmake, 'QMake', [qmake])
+
+    def _qmake_detect(self, mods, kwargs):
+        for qmake in self.search_qmake():
+            if not qmake.found():
+                continue
+            # Check that the qmake is for qt5
+            pc, stdo = Popen_safe(qmake.get_command() + ['-v'])[0:2]
+            if pc.returncode != 0:
+                continue
+            if not 'Qt version ' + self.qtver in stdo:
+                mlog.log('QMake is not for ' + self.qtname)
+                continue
+            # Found qmake for Qt5!
+            self.qmake = qmake
+            break
+        else:
+            # Didn't find qmake :(
+            self.is_found = False
+            return
+        self.version = re.search(self.qtver + r'(\.\d+)+', stdo).group(0)
+        # Query library path, header path, and binary path
+        mlog.log("Found qmake:", mlog.bold(self.qmake.get_path()), '(%s)' % self.version)
+        stdo = Popen_safe(self.qmake.get_command() + ['-query'])[1]
+        qvars = {}
+        for line in stdo.split('\n'):
+            line = line.strip()
+            if line == '':
+                continue
+            (k, v) = tuple(line.split(':', 1))
+            qvars[k] = v
+        # Qt on macOS uses a framework, but Qt for iOS/tvOS does not
+        xspec = qvars.get('QMAKE_XSPEC', '')
+        if self.env.machines.host.is_darwin() and not any(s in xspec for s in ['ios', 'tvos']):
+            mlog.debug("Building for macOS, looking for framework")
+            self._framework_detect(qvars, mods, kwargs)
+            # Sometimes Qt is built not as a framework (for instance, when using conan pkg manager)
+            # skip and fall back to normal procedure then
+            if self.is_found:
+                return self.qmake.name
+            else:
+                mlog.debug("Building for macOS, couldn't find framework, falling back to library search")
+        incdir = qvars['QT_INSTALL_HEADERS']
+        self.compile_args.append('-I' + incdir)
+        libdir = qvars['QT_INSTALL_LIBS']
+        # Used by self.compilers_detect()
+        self.bindir = self.get_qmake_host_bins(qvars)
+        self.is_found = True
+
+        # Use the buildtype by default, but look at the b_vscrt option if the
+        # compiler supports it.
+        is_debug = self.env.coredata.get_builtin_option('buildtype') == 'debug'
+        if 'b_vscrt' in self.env.coredata.base_options:
+            if self.env.coredata.base_options['b_vscrt'].value in ('mdd', 'mtd'):
+                is_debug = True
+        modules_lib_suffix = self._get_modules_lib_suffix(is_debug)
+
+        for module in mods:
+            mincdir = os.path.join(incdir, 'Qt' + module)
+            self.compile_args.append('-I' + mincdir)
+
+            if module == 'QuickTest':
+                define_base = 'QMLTEST'
+            elif module == 'Test':
+                define_base = 'TESTLIB'
+            else:
+                define_base = module.upper()
+            self.compile_args.append('-DQT_%s_LIB' % define_base)
+
+            if self.private_headers:
+                priv_inc = self.get_private_includes(mincdir, module)
+                for directory in priv_inc:
+                    self.compile_args.append('-I' + directory)
+            libfile = self.clib_compiler.find_library(self.qtpkgname + module + modules_lib_suffix,
+                                                      self.env,
+                                                      libdir)
+            if libfile:
+                libfile = libfile[0]
+            else:
+                mlog.log("Could not find:", module,
+                         self.qtpkgname + module + modules_lib_suffix,
+                         'in', libdir)
+                self.is_found = False
+                break
+            self.link_args.append(libfile)
+
+        if self.env.machines[self.for_machine].is_windows() and self.qtmain:
+            if not self._link_with_qtmain(is_debug, libdir):
+                self.is_found = False
+
+        return self.qmake.name
+
+    def _get_modules_lib_suffix(self, is_debug):
+        suffix = ''
+        if self.env.machines[self.for_machine].is_windows():
+            if is_debug:
+                suffix += 'd'
+            if self.qtver == '4':
+                suffix += '4'
+        if self.env.machines[self.for_machine].is_darwin():
+            if is_debug:
+                suffix += '_debug'
+        if mesonlib.version_compare(self.version, '>= 5.14.0'):
+            if self.env.machines[self.for_machine].is_android():
+                cpu_family = self.env.machines[self.for_machine].cpu_family
+                if cpu_family == 'x86':
+                    suffix += '_x86'
+                elif cpu_family == 'x86_64':
+                    suffix += '_x86_64'
+                elif cpu_family == 'arm':
+                    suffix += '_armeabi-v7a'
+                elif cpu_family == 'aarch64':
+                    suffix += '_arm64-v8a'
+                else:
+                    mlog.warning('Android target arch {!r} for Qt5 is unknown, '
+                                 'module detection may not work'.format(cpu_family))
+        return suffix
+
+    def _link_with_qtmain(self, is_debug, libdir):
+        base_name = 'qtmaind' if is_debug else 'qtmain'
+        qtmain = self.clib_compiler.find_library(base_name, self.env, libdir)
+        if qtmain:
+            self.link_args.append(qtmain[0])
+            return True
+        return False
+
+    def _framework_detect(self, qvars, modules, kwargs):
+        libdir = qvars['QT_INSTALL_LIBS']
+
+        # ExtraFrameworkDependency doesn't support any methods
+        fw_kwargs = kwargs.copy()
+        fw_kwargs.pop('method', None)
+        fw_kwargs['paths'] = [libdir]
+
+        for m in modules:
+            fname = 'Qt' + m
+            mlog.debug('Looking for qt framework ' + fname)
+            fwdep = QtExtraFrameworkDependency(fname, self.env, fw_kwargs, language=self.language)
+            if fwdep.found():
+                self.compile_args.append('-F' + libdir)
+                self.compile_args += fwdep.get_compile_args(with_private_headers=self.private_headers,
+                                                            qt_version=self.version)
+                self.link_args += fwdep.get_link_args()
+            else:
+                break
+        else:
+            self.is_found = True
+            # Used by self.compilers_detect()
+            self.bindir = self.get_qmake_host_bins(qvars)
+
+    def get_qmake_host_bins(self, qvars):
+        # Prefer QT_HOST_BINS (qt5, correct for cross and native compiling)
+        # but fall back to QT_INSTALL_BINS (qt4)
+        if 'QT_HOST_BINS' in qvars:
+            return qvars['QT_HOST_BINS']
+        else:
+            return qvars['QT_INSTALL_BINS']
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.PKGCONFIG, DependencyMethods.QMAKE]
+
+    def get_exe_args(self, compiler):
+        # Originally this was -fPIE but nowadays the default
+        # for upstream and distros seems to be -reduce-relocations
+        # which requires -fPIC. This may cause a performance
+        # penalty when using self-built Qt or on platforms
+        # where -fPIC is not required. If this is an issue
+        # for you, patches are welcome.
+        return compiler.get_pic_args()
+
+    def get_private_includes(self, mod_inc_dir, module):
+        return tuple()
+
+    def log_details(self):
+        module_str = ', '.join(self.requested_modules)
+        return 'modules: ' + module_str
+
+    def log_info(self):
+        return '{}'.format(self.from_text)
+
+    def log_tried(self):
+        return self.from_text
+
+
+class Qt4Dependency(QtBaseDependency):
+    def __init__(self, env, kwargs):
+        QtBaseDependency.__init__(self, 'qt4', env, kwargs)
+
+    def get_pkgconfig_host_bins(self, core):
+        # Only return one bins dir, because the tools are generally all in one
+        # directory for Qt4, in Qt5, they must all be in one directory. Return
+        # the first one found among the bin variables, in case one tool is not
+        # configured to be built.
+        applications = ['moc', 'uic', 'rcc', 'lupdate', 'lrelease']
+        for application in applications:
+            try:
+                return os.path.dirname(core.get_pkgconfig_variable('%s_location' % application, {}))
+            except MesonException:
+                pass
+
+
+class Qt5Dependency(QtBaseDependency):
+    def __init__(self, env, kwargs):
+        QtBaseDependency.__init__(self, 'qt5', env, kwargs)
+
+    def get_pkgconfig_host_bins(self, core):
+        return core.get_pkgconfig_variable('host_bins', {})
+
+    def get_private_includes(self, mod_inc_dir, module):
+        return _qt_get_private_includes(mod_inc_dir, module, self.version)
+
+
+class SDL2DependencyConfigTool(ConfigToolDependency):
+
+    tools = ['sdl2-config']
+    tool_name = 'sdl2-config'
+
+    def __init__(self, name: str, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__(name, environment, kwargs)
+        if not self.is_found:
+            return
+        self.compile_args = self.get_config_value(['--cflags'], 'compile_args')
+        self.link_args = self.get_config_value(['--libs'], 'link_args')
+
+    @staticmethod
+    def get_methods():
+        if mesonlib.is_osx():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.EXTRAFRAMEWORK]
+        else:
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL]
+
+
+class WxDependency(ConfigToolDependency):
+
+    tools = ['wx-config-3.0', 'wx-config', 'wx-config-gtk3']
+    tool_name = 'wx-config'
+
+    def __init__(self, environment: 'Environment', kwargs: T.Dict[str, T.Any]):
+        super().__init__('WxWidgets', environment, kwargs, language='cpp')
+        if not self.is_found:
+            return
+        self.requested_modules = self.get_requested(kwargs)
+
+        extra_args = []
+        if self.static:
+            extra_args.append('--static=yes')
+
+            # Check to make sure static is going to work
+            err = Popen_safe(self.config + extra_args)[2]
+            if 'No config found to match' in err:
+                mlog.debug('WxWidgets is missing static libraries.')
+                self.is_found = False
+                return
+
+        # wx-config seems to have a cflags as well but since it requires C++,
+        # this should be good, at least for now.
+        self.compile_args = self.get_config_value(['--cxxflags'] + extra_args + self.requested_modules, 'compile_args')
+        self.link_args = self.get_config_value(['--libs'] + extra_args + self.requested_modules, 'link_args')
+
+    @staticmethod
+    def get_requested(kwargs: T.Dict[str, T.Any]) -> T.List[str]:
+        if 'modules' not in kwargs:
+            return []
+        candidates = extract_as_list(kwargs, 'modules')
+        for c in candidates:
+            if not isinstance(c, str):
+                raise DependencyException('wxwidgets module argument is not a string')
+        return candidates
+
+
+class VulkanDependencySystem(ExternalDependency):
+
+    def __init__(self, name: str, environment, kwargs, language: T.Optional[str] = None):
+        super().__init__(name, environment, kwargs, language=language)
+
+        try:
+            self.vulkan_sdk = os.environ['VULKAN_SDK']
+            if not os.path.isabs(self.vulkan_sdk):
+                raise DependencyException('VULKAN_SDK must be an absolute path.')
+        except KeyError:
+            self.vulkan_sdk = None
+
+        if self.vulkan_sdk:
+            # TODO: this config might not work on some platforms, fix bugs as reported
+            # we should at least detect other 64-bit platforms (e.g. armv8)
+            lib_name = 'vulkan'
+            lib_dir = 'lib'
+            inc_dir = 'include'
+            if mesonlib.is_windows():
+                lib_name = 'vulkan-1'
+                lib_dir = 'Lib32'
+                inc_dir = 'Include'
+                if detect_cpu_family(self.env.coredata.compilers.host) == 'x86_64':
+                    lib_dir = 'Lib'
+
+            # make sure header and lib are valid
+            inc_path = os.path.join(self.vulkan_sdk, inc_dir)
+            header = os.path.join(inc_path, 'vulkan', 'vulkan.h')
+            lib_path = os.path.join(self.vulkan_sdk, lib_dir)
+            find_lib = self.clib_compiler.find_library(lib_name, environment, lib_path)
+
+            if not find_lib:
+                raise DependencyException('VULKAN_SDK point to invalid directory (no lib)')
+
+            if not os.path.isfile(header):
+                raise DependencyException('VULKAN_SDK point to invalid directory (no include)')
+
+            self.type_name = 'vulkan_sdk'
+            self.is_found = True
+            self.compile_args.append('-I' + inc_path)
+            self.link_args.append('-L' + lib_path)
+            self.link_args.append('-l' + lib_name)
+
+            # TODO: find a way to retrieve the version from the sdk?
+            # Usually it is a part of the path to it (but does not have to be)
+            return
+        else:
+            # simply try to guess it, usually works on linux
+            libs = self.clib_compiler.find_library('vulkan', environment, [])
+            if libs is not None and self.clib_compiler.has_header('vulkan/vulkan.h', '', environment, disable_cache=True)[0]:
+                self.type_name = 'system'
+                self.is_found = True
+                for lib in libs:
+                    self.link_args.append(lib)
+                return
+
+    @staticmethod
+    def get_methods():
+        return [DependencyMethods.SYSTEM]
+
+    def log_tried(self):
+        return 'system'
+
+gl_factory = DependencyFactory(
+    'gl',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM],
+    system_class=GLDependencySystem,
+)
+
+sdl2_factory = DependencyFactory(
+    'sdl2',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.CONFIG_TOOL, DependencyMethods.EXTRAFRAMEWORK],
+    configtool_class=SDL2DependencyConfigTool,
+)
+
+vulkan_factory = DependencyFactory(
+    'vulkan',
+    [DependencyMethods.PKGCONFIG, DependencyMethods.SYSTEM],
+    system_class=VulkanDependencySystem,
+)
diff --git a/meson/mesonbuild/depfile.py b/meson/mesonbuild/depfile.py
new file mode 100644
index 000000000..7a896cd27
--- /dev/null
+++ b/meson/mesonbuild/depfile.py
@@ -0,0 +1,85 @@
+# Copyright 2019 Red Hat, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+
+def parse(lines):
+    rules = []
+    targets = []
+    deps = []
+    in_deps = False
+    out = ''
+    for line in lines:
+        if not line.endswith('\n'):
+            line += '\n'
+        escape = None
+        for c in line:
+            if escape:
+                if escape == '$' and c != '$':
+                    out += '$'
+                if escape == '\\' and c == '\n':
+                    continue
+                out += c
+                escape = None
+                continue
+            if c == '\\' or c == '$':
+                escape = c
+                continue
+            elif c in (' ', '\n'):
+                if out != '':
+                    if in_deps:
+                        deps.append(out)
+                    else:
+                        targets.append(out)
+                out = ''
+                if c == '\n':
+                    rules.append((targets, deps))
+                    targets = []
+                    deps = []
+                    in_deps = False
+                continue
+            elif c == ':':
+                targets.append(out)
+                out = ''
+                in_deps = True
+                continue
+            out += c
+    return rules
+
+Target = collections.namedtuple('Target', ['deps'])
+
+class DepFile:
+    def __init__(self, lines):
+        rules = parse(lines)
+        depfile = {}
+        for (targets, deps) in rules:
+            for target in targets:
+                t = depfile.setdefault(target, Target(deps=set()))
+                for dep in deps:
+                    t.deps.add(dep)
+        self.depfile = depfile
+
+    def get_all_dependencies(self, target, visited=None):
+        deps = set()
+        if not visited:
+            visited = set()
+        if target in visited:
+            return set()
+        visited.add(target)
+        target = self.depfile.get(target)
+        if not target:
+            return set()
+        deps.update(target.deps)
+        for dep in target.deps:
+            deps.update(self.get_all_dependencies(dep, visited))
+        return deps
diff --git a/meson/mesonbuild/envconfig.py b/meson/mesonbuild/envconfig.py
new file mode 100644
index 000000000..219b62ec8
--- /dev/null
+++ b/meson/mesonbuild/envconfig.py
@@ -0,0 +1,449 @@
+# Copyright 2012-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, subprocess
+import typing as T
+
+from . import mesonlib
+from .mesonlib import EnvironmentException, MachineChoice, PerMachine, split_args
+from . import mlog
+
+_T = T.TypeVar('_T')
+
+
+# These classes contains all the data pulled from configuration files (native
+# and cross file currently), and also assists with the reading environment
+# variables.
+#
+# At this time there isn't an ironclad difference between this an other sources
+# of state like `coredata`. But one rough guide is much what is in `coredata` is
+# the *output* of the configuration process: the final decisions after tests.
+# This, on the other hand has *inputs*. The config files are parsed, but
+# otherwise minimally transformed. When more complex fallbacks (environment
+# detection) exist, they are defined elsewhere as functions that construct
+# instances of these classes.
+
+
+known_cpu_families = (
+    'aarch64',
+    'alpha',
+    'arc',
+    'arm',
+    'avr',
+    'c2000',
+    'dspic',
+    'e2k',
+    'ia64',
+    'm68k',
+    'microblaze',
+    'mips',
+    'mips64',
+    'parisc',
+    'pic24',
+    'ppc',
+    'ppc64',
+    'riscv32',
+    'riscv64',
+    'rl78',
+    'rx',
+    's390',
+    's390x',
+    'sh4',
+    'sparc',
+    'sparc64',
+    'wasm32',
+    'wasm64',
+    'x86',
+    'x86_64',
+)
+
+# It would feel more natural to call this "64_BIT_CPU_FAMILES", but
+# python identifiers cannot start with numbers
+CPU_FAMILES_64_BIT = [
+    'aarch64',
+    'alpha',
+    'ia64',
+    'mips64',
+    'ppc64',
+    'riscv64',
+    's390x',
+    'sparc64',
+    'wasm64',
+    'x86_64',
+]
+
+def get_env_var_pair(for_machine: MachineChoice,
+                     is_cross: bool,
+                     var_name: str) -> T.Tuple[T.Optional[str], T.Optional[str]]:
+    """
+    Returns the exact env var and the value.
+    """
+    candidates = PerMachine(
+        # The prefixed build version takes priority, but if we are native
+        # compiling we fall back on the unprefixed host version. This
+        # allows native builds to never need to worry about the 'BUILD_*'
+        # ones.
+        ([var_name + '_FOR_BUILD'] if is_cross else [var_name]),
+        # Always just the unprefixed host verions
+        [var_name]
+    )[for_machine]
+    for var in candidates:
+        value = os.environ.get(var)
+        if value is not None:
+            break
+    else:
+        formatted = ', '.join(['{!r}'.format(var) for var in candidates])
+        mlog.debug('None of {} are defined in the environment, not changing global flags.'.format(formatted))
+        return None
+    mlog.log('Using {!r} from environment with value: {!r}'.format(var, value))
+    return var, value
+
+def get_env_var(for_machine: MachineChoice,
+                is_cross: bool,
+                var_name: str) -> T.Tuple[T.Optional[str], T.Optional[str]]:
+    ret = get_env_var_pair(for_machine, is_cross, var_name)
+    if ret is None:
+        return None
+    else:
+        var, value = ret
+        return value
+
+class Properties:
+    def __init__(
+            self,
+            properties: T.Optional[T.Dict[str, T.Union[str, T.List[str]]]] = None,
+    ):
+        self.properties = properties or {}  # type: T.Dict[str, T.Union[str, T.List[str]]]
+
+    def has_stdlib(self, language: str) -> bool:
+        return language + '_stdlib' in self.properties
+
+    # Some of get_stdlib, get_root, get_sys_root are wider than is actually
+    # true, but without heterogenious dict annotations it's not practical to
+    # narrow them
+    def get_stdlib(self, language: str) -> T.Union[str, T.List[str]]:
+        return self.properties[language + '_stdlib']
+
+    def get_root(self) -> T.Optional[T.Union[str, T.List[str]]]:
+        return self.properties.get('root', None)
+
+    def get_sys_root(self) -> T.Optional[T.Union[str, T.List[str]]]:
+        return self.properties.get('sys_root', None)
+
+    def get_pkg_config_libdir(self) -> T.Optional[T.List[str]]:
+        p = self.properties.get('pkg_config_libdir', None)
+        if p is None:
+            return p
+        return mesonlib.listify(p)
+
+    def __eq__(self, other: T.Any) -> 'T.Union[bool, NotImplemented]':
+        if isinstance(other, type(self)):
+            return self.properties == other.properties
+        return NotImplemented
+
+    # TODO consider removing so Properties is less freeform
+    def __getitem__(self, key: str) -> T.Any:
+        return self.properties[key]
+
+    # TODO consider removing so Properties is less freeform
+    def __contains__(self, item: T.Any) -> bool:
+        return item in self.properties
+
+    # TODO consider removing, for same reasons as above
+    def get(self, key: str, default: T.Any = None) -> T.Any:
+        return self.properties.get(key, default)
+
+class MachineInfo:
+    def __init__(self, system: str, cpu_family: str, cpu: str, endian: str):
+        self.system = system
+        self.cpu_family = cpu_family
+        self.cpu = cpu
+        self.endian = endian
+        self.is_64_bit = cpu_family in CPU_FAMILES_64_BIT  # type: bool
+
+    def __eq__(self, other: T.Any) -> 'T.Union[bool, NotImplemented]':
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return \
+            self.system == other.system and \
+            self.cpu_family == other.cpu_family and \
+            self.cpu == other.cpu and \
+            self.endian == other.endian
+
+    def __ne__(self, other: T.Any) -> 'T.Union[bool, NotImplemented]':
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return not self.__eq__(other)
+
+    def __repr__(self) -> str:
+        return ''.format(self.system, self.cpu_family, self.cpu)
+
+    @classmethod
+    def from_literal(cls, literal: T.Dict[str, str]) -> 'MachineInfo':
+        minimum_literal = {'cpu', 'cpu_family', 'endian', 'system'}
+        if set(literal) < minimum_literal:
+            raise EnvironmentException(
+                'Machine info is currently {}\n'.format(literal) +
+                'but is missing {}.'.format(minimum_literal - set(literal)))
+
+        cpu_family = literal['cpu_family']
+        if cpu_family not in known_cpu_families:
+            mlog.warning('Unknown CPU family {}, please report this at https://github.com/mesonbuild/meson/issues/new'.format(cpu_family))
+
+        endian = literal['endian']
+        if endian not in ('little', 'big'):
+            mlog.warning('Unknown endian {}'.format(endian))
+
+        return cls(literal['system'], cpu_family, literal['cpu'], endian)
+
+    def is_windows(self) -> bool:
+        """
+        Machine is windows?
+        """
+        return self.system == 'windows' or 'mingw' in self.system
+
+    def is_cygwin(self) -> bool:
+        """
+        Machine is cygwin?
+        """
+        return self.system.startswith('cygwin')
+
+    def is_linux(self) -> bool:
+        """
+        Machine is linux?
+        """
+        return self.system == 'linux'
+
+    def is_darwin(self) -> bool:
+        """
+        Machine is Darwin (iOS/tvOS/OS X)?
+        """
+        return self.system in {'darwin', 'ios', 'tvos'}
+
+    def is_android(self) -> bool:
+        """
+        Machine is Android?
+        """
+        return self.system == 'android'
+
+    def is_haiku(self) -> bool:
+        """
+        Machine is Haiku?
+        """
+        return self.system == 'haiku'
+
+    def is_netbsd(self) -> bool:
+        """
+        Machine is NetBSD?
+        """
+        return self.system == 'netbsd'
+
+    def is_openbsd(self) -> bool:
+        """
+        Machine is OpenBSD?
+        """
+        return self.system == 'openbsd'
+
+    def is_dragonflybsd(self) -> bool:
+        """Machine is DragonflyBSD?"""
+        return self.system == 'dragonfly'
+
+    def is_freebsd(self) -> bool:
+        """Machine is FreeBSD?"""
+        return self.system == 'freebsd'
+
+    def is_sunos(self) -> bool:
+        """Machine is illumos or Solaris?"""
+        return self.system == 'sunos'
+
+    def is_hurd(self) -> bool:
+        """
+        Machine is GNU/Hurd?
+        """
+        return self.system == 'gnu'
+
+    def is_irix(self) -> bool:
+        """Machine is IRIX?"""
+        return self.system.startswith('irix')
+
+    # Various prefixes and suffixes for import libraries, shared libraries,
+    # static libraries, and executables.
+    # Versioning is added to these names in the backends as-needed.
+    def get_exe_suffix(self) -> str:
+        if self.is_windows() or self.is_cygwin():
+            return 'exe'
+        else:
+            return ''
+
+    def get_object_suffix(self) -> str:
+        if self.is_windows():
+            return 'obj'
+        else:
+            return 'o'
+
+    def libdir_layout_is_win(self) -> bool:
+        return self.is_windows() or self.is_cygwin()
+
+class BinaryTable:
+    def __init__(
+            self,
+            binaries: T.Optional[T.Dict[str, T.Union[str, T.List[str]]]] = None,
+    ):
+        self.binaries = binaries or {}  # type: T.Dict[str, T.Union[str, T.List[str]]]
+        for name, command in self.binaries.items():
+            if not isinstance(command, (list, str)):
+                # TODO generalize message
+                raise mesonlib.MesonException(
+                    'Invalid type {!r} for binary {!r} in cross file'
+                    ''.format(command, name))
+
+    # Map from language identifiers to environment variables.
+    evarMap = {
+        # Compilers
+        'c': 'CC',
+        'cpp': 'CXX',
+        'cs': 'CSC',
+        'd': 'DC',
+        'fortran': 'FC',
+        'objc': 'OBJC',
+        'objcpp': 'OBJCXX',
+        'rust': 'RUSTC',
+        'vala': 'VALAC',
+
+        # Linkers
+        'c_ld': 'CC_LD',
+        'cpp_ld': 'CXX_LD',
+        'd_ld': 'DC_LD',
+        'fortran_ld': 'FC_LD',
+        'objc_ld': 'OBJC_LD',
+        'objcpp_ld': 'OBJCXX_LD',
+        'rust_ld': 'RUSTC_LD',
+
+        # Binutils
+        'strip': 'STRIP',
+        'ar': 'AR',
+        'windres': 'WINDRES',
+
+        # Other tools
+        'cmake': 'CMAKE',
+        'qmake': 'QMAKE',
+        'pkgconfig': 'PKG_CONFIG',
+    }  # type: T.Dict[str, str]
+
+    # Deprecated environment variables mapped from the new variable to the old one
+    # Deprecated in 0.54.0
+    DEPRECATION_MAP = {
+        'DC_LD': 'D_LD',
+        'FC_LD': 'F_LD',
+        'RUSTC_LD': 'RUST_LD',
+        'OBJCXX_LD': 'OBJCPP_LD',
+    }  # type: T.Dict[str, str]
+
+    @staticmethod
+    def detect_ccache() -> T.List[str]:
+        try:
+            subprocess.check_call(['ccache', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        except (OSError, subprocess.CalledProcessError):
+            return []
+        return ['ccache']
+
+    @classmethod
+    def parse_entry(cls, entry: T.Union[str, T.List[str]]) -> T.Tuple[T.List[str], T.List[str]]:
+        compiler = mesonlib.stringlistify(entry)
+        # Ensure ccache exists and remove it if it doesn't
+        if compiler[0] == 'ccache':
+            compiler = compiler[1:]
+            ccache = cls.detect_ccache()
+        else:
+            ccache = []
+        # Return value has to be a list of compiler 'choices'
+        return compiler, ccache
+
+    def lookup_entry(self,
+                     for_machine: MachineChoice,
+                     is_cross: bool,
+                     name: str) -> T.Optional[T.List[str]]:
+        """Lookup binary in cross/native file and fallback to environment.
+
+        Returns command with args as list if found, Returns `None` if nothing is
+        found.
+        """
+        # Try explicit map, don't fall back on env var
+        # Try explict map, then env vars
+        for _ in [()]: # a trick to get `break`
+            raw_command = self.binaries.get(name)
+            if raw_command is not None:
+                command = mesonlib.stringlistify(raw_command)
+                break # found
+            evar = self.evarMap.get(name)
+            if evar is not None:
+                raw_command = get_env_var(for_machine, is_cross, evar)
+                if raw_command is None:
+                    deprecated = self.DEPRECATION_MAP.get(evar)
+                    if deprecated is not None:
+                        raw_command = get_env_var(for_machine, is_cross, deprecated)
+                        if raw_command is not None:
+                            mlog.deprecation(
+                                'The', deprecated, 'environment variable is deprecated in favor of',
+                                evar, once=True)
+                if raw_command is not None:
+                    command = split_args(raw_command)
+                    break # found
+            command = None
+
+
+        # Do not return empty or blank string entries
+        if command is not None and (len(command) == 0 or len(command[0].strip()) == 0):
+            command = None
+        return command
+
+class Directories:
+
+    """Data class that holds information about directories for native and cross
+    builds.
+    """
+
+    def __init__(self, bindir: T.Optional[str] = None, datadir: T.Optional[str] = None,
+                 includedir: T.Optional[str] = None, infodir: T.Optional[str] = None,
+                 libdir: T.Optional[str] = None, libexecdir: T.Optional[str] = None,
+                 localedir: T.Optional[str] = None, localstatedir: T.Optional[str] = None,
+                 mandir: T.Optional[str] = None, prefix: T.Optional[str] = None,
+                 sbindir: T.Optional[str] = None, sharedstatedir: T.Optional[str] = None,
+                 sysconfdir: T.Optional[str] = None):
+        self.bindir = bindir
+        self.datadir = datadir
+        self.includedir = includedir
+        self.infodir = infodir
+        self.libdir = libdir
+        self.libexecdir = libexecdir
+        self.localedir = localedir
+        self.localstatedir = localstatedir
+        self.mandir = mandir
+        self.prefix = prefix
+        self.sbindir = sbindir
+        self.sharedstatedir = sharedstatedir
+        self.sysconfdir = sysconfdir
+
+    def __contains__(self, key: str) -> bool:
+        return hasattr(self, key)
+
+    def __getitem__(self, key: str) -> T.Optional[str]:
+        # Mypy can't figure out what to do with getattr here, so we'll case for it
+        return T.cast(T.Optional[str], getattr(self, key))
+
+    def __setitem__(self, key: str, value: T.Optional[str]) -> None:
+        setattr(self, key, value)
+
+    def __iter__(self) -> T.Iterator[T.Tuple[str, str]]:
+        return iter(self.__dict__.items())
diff --git a/meson/mesonbuild/environment.py b/meson/mesonbuild/environment.py
new file mode 100644
index 000000000..c825bd876
--- /dev/null
+++ b/meson/mesonbuild/environment.py
@@ -0,0 +1,1869 @@
+# Copyright 2012-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, platform, re, sys, shutil, subprocess
+import tempfile
+import shlex
+import typing as T
+
+from . import coredata
+from .linkers import ArLinker, ArmarLinker, VisualStudioLinker, DLinker, CcrxLinker, Xc16Linker, C2000Linker, IntelVisualStudioLinker
+from . import mesonlib
+from .mesonlib import (
+    MesonException, EnvironmentException, MachineChoice, Popen_safe,
+    PerMachineDefaultable, PerThreeMachineDefaultable, split_args, quote_arg
+)
+from . import mlog
+
+from .envconfig import (
+    BinaryTable, Directories, MachineInfo,
+    Properties, known_cpu_families,
+)
+from . import compilers
+from .compilers import (
+    Compiler,
+    is_assembly,
+    is_header,
+    is_library,
+    is_llvm_ir,
+    is_object,
+    is_source,
+)
+from .linkers import (
+    AppleDynamicLinker,
+    ArmClangDynamicLinker,
+    ArmDynamicLinker,
+    CcrxDynamicLinker,
+    Xc16DynamicLinker,
+    C2000DynamicLinker,
+    ClangClDynamicLinker,
+    DynamicLinker,
+    GnuBFDDynamicLinker,
+    GnuGoldDynamicLinker,
+    LLVMDynamicLinker,
+    MSVCDynamicLinker,
+    OptlinkDynamicLinker,
+    PGIDynamicLinker,
+    PGIStaticLinker,
+    SolarisDynamicLinker,
+    XilinkDynamicLinker,
+    CudaLinker,
+    VisualStudioLikeLinkerMixin,
+    WASMDynamicLinker,
+)
+from functools import lru_cache
+from .compilers import (
+    ArmCCompiler,
+    ArmCPPCompiler,
+    ArmclangCCompiler,
+    ArmclangCPPCompiler,
+    AppleClangCCompiler,
+    AppleClangCPPCompiler,
+    AppleClangObjCCompiler,
+    AppleClangObjCPPCompiler,
+    ClangCCompiler,
+    ClangCPPCompiler,
+    ClangObjCCompiler,
+    ClangObjCPPCompiler,
+    ClangClCCompiler,
+    ClangClCPPCompiler,
+    FlangFortranCompiler,
+    G95FortranCompiler,
+    GnuCCompiler,
+    GnuCPPCompiler,
+    GnuFortranCompiler,
+    GnuObjCCompiler,
+    GnuObjCPPCompiler,
+    ElbrusCCompiler,
+    ElbrusCPPCompiler,
+    ElbrusFortranCompiler,
+    EmscriptenCCompiler,
+    EmscriptenCPPCompiler,
+    IntelCCompiler,
+    IntelClCCompiler,
+    IntelCPPCompiler,
+    IntelClCPPCompiler,
+    IntelFortranCompiler,
+    IntelClFortranCompiler,
+    JavaCompiler,
+    MonoCompiler,
+    CudaCompiler,
+    VisualStudioCsCompiler,
+    NAGFortranCompiler,
+    Open64FortranCompiler,
+    PathScaleFortranCompiler,
+    PGICCompiler,
+    PGICPPCompiler,
+    PGIFortranCompiler,
+    RustCompiler,
+    CcrxCCompiler,
+    CcrxCPPCompiler,
+    Xc16CCompiler,
+    C2000CCompiler,
+    C2000CPPCompiler,
+    SunFortranCompiler,
+    ValaCompiler,
+    VisualStudioCCompiler,
+    VisualStudioCPPCompiler,
+)
+
+build_filename = 'meson.build'
+
+CompilersDict = T.Dict[str, Compiler]
+
+def detect_gcovr(min_version='3.3', new_rootdir_version='4.2', log=False):
+    gcovr_exe = 'gcovr'
+    try:
+        p, found = Popen_safe([gcovr_exe, '--version'])[0:2]
+    except (FileNotFoundError, PermissionError):
+        # Doesn't exist in PATH or isn't executable
+        return None, None
+    found = search_version(found)
+    if p.returncode == 0 and mesonlib.version_compare(found, '>=' + min_version):
+        if log:
+            mlog.log('Found gcovr-{} at {}'.format(found, quote_arg(shutil.which(gcovr_exe))))
+        return gcovr_exe, mesonlib.version_compare(found, '>=' + new_rootdir_version)
+    return None, None
+
+def detect_llvm_cov():
+    tools = get_llvm_tool_names('llvm-cov')
+    for tool in tools:
+        if mesonlib.exe_exists([tool, '--version']):
+            return tool
+    return None
+
+def find_coverage_tools():
+    gcovr_exe, gcovr_new_rootdir = detect_gcovr()
+
+    llvm_cov_exe = detect_llvm_cov()
+
+    lcov_exe = 'lcov'
+    genhtml_exe = 'genhtml'
+
+    if not mesonlib.exe_exists([lcov_exe, '--version']):
+        lcov_exe = None
+    if not mesonlib.exe_exists([genhtml_exe, '--version']):
+        genhtml_exe = None
+
+    return gcovr_exe, gcovr_new_rootdir, lcov_exe, genhtml_exe, llvm_cov_exe
+
+def detect_ninja(version: str = '1.7', log: bool = False) -> T.List[str]:
+    r = detect_ninja_command_and_version(version, log)
+    return r[0] if r else None
+
+def detect_ninja_command_and_version(version: str = '1.7', log: bool = False) -> (T.List[str], str):
+    from .dependencies.base import ExternalProgram
+    env_ninja = os.environ.get('NINJA', None)
+    for n in [env_ninja] if env_ninja else ['ninja', 'ninja-build', 'samu']:
+        prog = ExternalProgram(n, silent=True)
+        if not prog.found():
+            continue
+        try:
+            p, found = Popen_safe(prog.command + ['--version'])[0:2]
+        except (FileNotFoundError, PermissionError):
+            # Doesn't exist in PATH or isn't executable
+            continue
+        found = found.strip()
+        # Perhaps we should add a way for the caller to know the failure mode
+        # (not found or too old)
+        if p.returncode == 0 and mesonlib.version_compare(found, '>=' + version):
+            if log:
+                name = os.path.basename(n)
+                if name.endswith('-' + found):
+                    name = name[0:-1 - len(found)]
+                if name == 'ninja-build':
+                    name = 'ninja'
+                if name == 'samu':
+                    name = 'samurai'
+                mlog.log('Found {}-{} at {}'.format(name, found,
+                         ' '.join([quote_arg(x) for x in prog.command])))
+            return (prog.command, found)
+
+def get_llvm_tool_names(tool: str) -> T.List[str]:
+    # Ordered list of possible suffixes of LLVM executables to try. Start with
+    # base, then try newest back to oldest (3.5 is arbitrary), and finally the
+    # devel version. Please note that the development snapshot in Debian does
+    # not have a distinct name. Do not move it to the beginning of the list
+    # unless it becomes a stable release.
+    suffixes = [
+        '', # base (no suffix)
+        '-9',   '90',
+        '-8',   '80',
+        '-7',   '70',
+        '-6.0', '60',
+        '-5.0', '50',
+        '-4.0', '40',
+        '-3.9', '39',
+        '-3.8', '38',
+        '-3.7', '37',
+        '-3.6', '36',
+        '-3.5', '35',
+        '-10',    # Debian development snapshot
+        '-devel', # FreeBSD development snapshot
+    ]
+    names = []
+    for suffix in suffixes:
+        names.append(tool + suffix)
+    return names
+
+def detect_scanbuild() -> T.List[str]:
+    """ Look for scan-build binary on build platform
+
+    First, if a SCANBUILD env variable has been provided, give it precedence
+    on all platforms.
+
+    For most platforms, scan-build is found is the PATH contains a binary
+    named "scan-build". However, some distribution's package manager (FreeBSD)
+    don't. For those, loop through a list of candidates to see if one is
+    available.
+
+    Return: a single-element list of the found scan-build binary ready to be
+        passed to Popen()
+    """
+    exelist = []
+    if 'SCANBUILD' in os.environ:
+        exelist = split_args(os.environ['SCANBUILD'])
+
+    else:
+        tools = get_llvm_tool_names('scan-build')
+        for tool in tools:
+            if shutil.which(tool) is not None:
+                exelist = [shutil.which(tool)]
+                break
+
+    if exelist:
+        tool = exelist[0]
+        if os.path.isfile(tool) and os.access(tool, os.X_OK):
+            return [tool]
+    return []
+
+def detect_clangformat() -> T.List[str]:
+    """ Look for clang-format binary on build platform
+
+    Do the same thing as detect_scanbuild to find clang-format except it
+    currently does not check the environment variable.
+
+    Return: a single-element list of the found clang-format binary ready to be
+        passed to Popen()
+    """
+    tools = get_llvm_tool_names('clang-format')
+    for tool in tools:
+        path = shutil.which(tool)
+        if path is not None:
+            return [path]
+    return []
+
+def detect_native_windows_arch():
+    """
+    The architecture of Windows itself: x86, amd64 or arm64
+    """
+    # These env variables are always available. See:
+    # https://msdn.microsoft.com/en-us/library/aa384274(VS.85).aspx
+    # https://blogs.msdn.microsoft.com/david.wang/2006/03/27/howto-detect-process-bitness/
+    arch = os.environ.get('PROCESSOR_ARCHITEW6432', '').lower()
+    if not arch:
+        try:
+            # If this doesn't exist, something is messing with the environment
+            arch = os.environ['PROCESSOR_ARCHITECTURE'].lower()
+        except KeyError:
+            raise EnvironmentException('Unable to detect native OS architecture')
+    return arch
+
+def detect_windows_arch(compilers: CompilersDict) -> str:
+    """
+    Detecting the 'native' architecture of Windows is not a trivial task. We
+    cannot trust that the architecture that Python is built for is the 'native'
+    one because you can run 32-bit apps on 64-bit Windows using WOW64 and
+    people sometimes install 32-bit Python on 64-bit Windows.
+
+    We also can't rely on the architecture of the OS itself, since it's
+    perfectly normal to compile and run 32-bit applications on Windows as if
+    they were native applications. It's a terrible experience to require the
+    user to supply a cross-info file to compile 32-bit applications on 64-bit
+    Windows. Thankfully, the only way to compile things with Visual Studio on
+    Windows is by entering the 'msvc toolchain' environment, which can be
+    easily detected.
+
+    In the end, the sanest method is as follows:
+    1. Check environment variables that are set by Windows and WOW64 to find out
+       if this is x86 (possibly in WOW64), if so use that as our 'native'
+       architecture.
+    2. If the compiler toolchain target architecture is x86, use that as our
+      'native' architecture.
+    3. Otherwise, use the actual Windows architecture
+
+    """
+    os_arch = detect_native_windows_arch()
+    if os_arch == 'x86':
+        return os_arch
+    # If we're on 64-bit Windows, 32-bit apps can be compiled without
+    # cross-compilation. So if we're doing that, just set the native arch as
+    # 32-bit and pretend like we're running under WOW64. Else, return the
+    # actual Windows architecture that we deduced above.
+    for compiler in compilers.values():
+        if compiler.id == 'msvc' and (compiler.target == 'x86' or compiler.target == '80x86'):
+            return 'x86'
+        if compiler.id == 'clang-cl' and compiler.target == 'x86':
+            return 'x86'
+        if compiler.id == 'gcc' and compiler.has_builtin_define('__i386__'):
+            return 'x86'
+    return os_arch
+
+def any_compiler_has_define(compilers: CompilersDict, define):
+    for c in compilers.values():
+        try:
+            if c.has_builtin_define(define):
+                return True
+        except mesonlib.MesonException:
+            # Ignore compilers that do not support has_builtin_define.
+            pass
+    return False
+
+def detect_cpu_family(compilers: CompilersDict) -> str:
+    """
+    Python is inconsistent in its platform module.
+    It returns different values for the same cpu.
+    For x86 it might return 'x86', 'i686' or somesuch.
+    Do some canonicalization.
+    """
+    if mesonlib.is_windows():
+        trial = detect_windows_arch(compilers)
+    elif mesonlib.is_freebsd() or mesonlib.is_netbsd() or mesonlib.is_openbsd() or mesonlib.is_qnx():
+        trial = platform.processor().lower()
+    else:
+        trial = platform.machine().lower()
+    if trial.startswith('i') and trial.endswith('86'):
+        trial = 'x86'
+    elif trial == 'bepc':
+        trial = 'x86'
+    elif trial == 'arm64':
+        trial = 'aarch64'
+    elif trial.startswith('arm') or trial.startswith('earm'):
+        trial = 'arm'
+    elif trial.startswith(('powerpc64', 'ppc64')):
+        trial = 'ppc64'
+    elif trial.startswith(('powerpc', 'ppc')) or trial in {'macppc', 'power machintosh'}:
+        trial = 'ppc'
+    elif trial in ('amd64', 'x64', 'i86pc'):
+        trial = 'x86_64'
+    elif trial in {'sun4u', 'sun4v'}:
+        trial = 'sparc64'
+    elif trial in {'mipsel', 'mips64el'}:
+        trial = trial.rstrip('el')
+    elif trial in {'ip30', 'ip35'}:
+        trial = 'mips64'
+
+    # On Linux (and maybe others) there can be any mixture of 32/64 bit code in
+    # the kernel, Python, system, 32-bit chroot on 64-bit host, etc. The only
+    # reliable way to know is to check the compiler defines.
+    if trial == 'x86_64':
+        if any_compiler_has_define(compilers, '__i386__'):
+            trial = 'x86'
+    elif trial == 'aarch64':
+        if any_compiler_has_define(compilers, '__arm__'):
+            trial = 'arm'
+    # Add more quirks here as bugs are reported. Keep in sync with detect_cpu()
+    # below.
+    elif trial == 'parisc64':
+        # ATM there is no 64 bit userland for PA-RISC. Thus always
+        # report it as 32 bit for simplicity.
+        trial = 'parisc'
+
+    if trial not in known_cpu_families:
+        mlog.warning('Unknown CPU family {!r}, please report this at '
+                     'https://github.com/mesonbuild/meson/issues/new with the '
+                     'output of `uname -a` and `cat /proc/cpuinfo`'.format(trial))
+
+    return trial
+
+def detect_cpu(compilers: CompilersDict):
+    if mesonlib.is_windows():
+        trial = detect_windows_arch(compilers)
+    elif mesonlib.is_freebsd() or mesonlib.is_netbsd() or mesonlib.is_openbsd():
+        trial = platform.processor().lower()
+    else:
+        trial = platform.machine().lower()
+
+    if trial in ('amd64', 'x64', 'i86pc'):
+        trial = 'x86_64'
+    if trial == 'x86_64':
+        # Same check as above for cpu_family
+        if any_compiler_has_define(compilers, '__i386__'):
+            trial = 'i686' # All 64 bit cpus have at least this level of x86 support.
+    elif trial == 'aarch64':
+        # Same check as above for cpu_family
+        if any_compiler_has_define(compilers, '__arm__'):
+            trial = 'arm'
+    elif trial.startswith('earm'):
+        trial = 'arm'
+    elif trial == 'e2k':
+        # Make more precise CPU detection for Elbrus platform.
+        trial = platform.processor().lower()
+    elif trial.startswith('mips'):
+        trial = trial.rstrip('el')
+
+    # Add more quirks here as bugs are reported. Keep in sync with
+    # detect_cpu_family() above.
+    return trial
+
+def detect_system():
+    system = platform.system().lower()
+    if system.startswith('cygwin'):
+        return 'cygwin'
+    return system
+
+def detect_msys2_arch():
+    if 'MSYSTEM_CARCH' in os.environ:
+        return os.environ['MSYSTEM_CARCH']
+    return None
+
+def detect_machine_info(compilers: T.Optional[CompilersDict] = None) -> MachineInfo:
+    """Detect the machine we're running on
+
+    If compilers are not provided, we cannot know as much. None out those
+    fields to avoid accidentally depending on partial knowledge. The
+    underlying ''detect_*'' method can be called to explicitly use the
+    partial information.
+    """
+    return MachineInfo(
+        detect_system(),
+        detect_cpu_family(compilers) if compilers is not None else None,
+        detect_cpu(compilers) if compilers is not None else None,
+        sys.byteorder)
+
+# TODO make this compare two `MachineInfo`s purely. How important is the
+# `detect_cpu_family({})` distinction? It is the one impediment to that.
+def machine_info_can_run(machine_info: MachineInfo):
+    """Whether we can run binaries for this machine on the current machine.
+
+    Can almost always run 32-bit binaries on 64-bit natively if the host
+    and build systems are the same. We don't pass any compilers to
+    detect_cpu_family() here because we always want to know the OS
+    architecture, not what the compiler environment tells us.
+    """
+    if machine_info.system != detect_system():
+        return False
+    true_build_cpu_family = detect_cpu_family({})
+    return \
+        (machine_info.cpu_family == true_build_cpu_family) or \
+        ((true_build_cpu_family == 'x86_64') and (machine_info.cpu_family == 'x86'))
+
+def search_version(text: str) -> str:
+    # Usually of the type 4.1.4 but compiler output may contain
+    # stuff like this:
+    # (Sourcery CodeBench Lite 2014.05-29) 4.8.3 20140320 (prerelease)
+    # Limiting major version number to two digits seems to work
+    # thus far. When we get to GCC 100, this will break, but
+    # if we are still relevant when that happens, it can be
+    # considered an achievement in itself.
+    #
+    # This regex is reaching magic levels. If it ever needs
+    # to be updated, do not complexify but convert to something
+    # saner instead.
+    # We'll demystify it a bit with a verbose definition.
+    version_regex = re.compile(r"""
+    (? bool:
+        return self.coredata.is_cross_build(when_building_for)
+
+    def dump_coredata(self):
+        return coredata.save(self.coredata, self.get_build_dir())
+
+    def get_script_dir(self):
+        import mesonbuild.scripts
+        return os.path.dirname(mesonbuild.scripts.__file__)
+
+    def get_log_dir(self):
+        return self.log_dir
+
+    def get_coredata(self):
+        return self.coredata
+
+    def get_build_command(self, unbuffered=False):
+        cmd = mesonlib.meson_command[:]
+        if unbuffered and 'python' in os.path.basename(cmd[0]):
+            cmd.insert(1, '-u')
+        return cmd
+
+    def is_header(self, fname):
+        return is_header(fname)
+
+    def is_source(self, fname):
+        return is_source(fname)
+
+    def is_assembly(self, fname):
+        return is_assembly(fname)
+
+    def is_llvm_ir(self, fname):
+        return is_llvm_ir(fname)
+
+    def is_object(self, fname):
+        return is_object(fname)
+
+    @lru_cache(maxsize=None)
+    def is_library(self, fname):
+        return is_library(fname)
+
+    def lookup_binary_entry(self, for_machine: MachineChoice, name: str):
+        return self.binaries[for_machine].lookup_entry(
+            for_machine,
+            self.is_cross_build(),
+            name)
+
+    @staticmethod
+    def get_gnu_compiler_defines(compiler):
+        """
+        Detect GNU compiler platform type (Apple, MinGW, Unix)
+        """
+        # Arguments to output compiler pre-processor defines to stdout
+        # gcc, g++, and gfortran all support these arguments
+        args = compiler + ['-E', '-dM', '-']
+        p, output, error = Popen_safe(args, write='', stdin=subprocess.PIPE)
+        if p.returncode != 0:
+            raise EnvironmentException('Unable to detect GNU compiler type:\n' + output + error)
+        # Parse several lines of the type:
+        # `#define ___SOME_DEF some_value`
+        # and extract `___SOME_DEF`
+        defines = {}
+        for line in output.split('\n'):
+            if not line:
+                continue
+            d, *rest = line.split(' ', 2)
+            if d != '#define':
+                continue
+            if len(rest) == 1:
+                defines[rest] = True
+            if len(rest) == 2:
+                defines[rest[0]] = rest[1]
+        return defines
+
+    @staticmethod
+    def get_gnu_version_from_defines(defines):
+        dot = '.'
+        major = defines.get('__GNUC__', '0')
+        minor = defines.get('__GNUC_MINOR__', '0')
+        patch = defines.get('__GNUC_PATCHLEVEL__', '0')
+        return dot.join((major, minor, patch))
+
+    @staticmethod
+    def get_lcc_version_from_defines(defines):
+        dot = '.'
+        generation_and_major = defines.get('__LCC__', '100')
+        generation = generation_and_major[:1]
+        major = generation_and_major[1:]
+        minor = defines.get('__LCC_MINOR__', '0')
+        return dot.join((generation, major, minor))
+
+    @staticmethod
+    def get_clang_compiler_defines(compiler):
+        """
+        Get the list of Clang pre-processor defines
+        """
+        args = compiler + ['-E', '-dM', '-']
+        p, output, error = Popen_safe(args, write='', stdin=subprocess.PIPE)
+        if p.returncode != 0:
+            raise EnvironmentException('Unable to get clang pre-processor defines:\n' + output + error)
+        defines = {}
+        for line in output.split('\n'):
+            if not line:
+                continue
+            d, *rest = line.split(' ', 2)
+            if d != '#define':
+                continue
+            if len(rest) == 1:
+                defines[rest] = True
+            if len(rest) == 2:
+                defines[rest[0]] = rest[1]
+        return defines
+
+    def _get_compilers(self, lang, for_machine):
+        '''
+        The list of compilers is detected in the exact same way for
+        C, C++, ObjC, ObjC++, Fortran, CS so consolidate it here.
+        '''
+        value = self.lookup_binary_entry(for_machine, lang)
+        if value is not None:
+            compilers, ccache = BinaryTable.parse_entry(value)
+            # Return value has to be a list of compiler 'choices'
+            compilers = [compilers]
+        else:
+            if not self.machines.matches_build_machine(for_machine):
+                raise EnvironmentException('{!r} compiler binary not defined in cross or native file'.format(lang))
+            compilers = getattr(self, 'default_' + lang)
+            ccache = BinaryTable.detect_ccache()
+
+        if self.machines.matches_build_machine(for_machine):
+            exe_wrap = None
+        else:
+            exe_wrap = self.get_exe_wrapper()
+
+        return compilers, ccache, exe_wrap
+
+    def _handle_exceptions(self, exceptions, binaries, bintype='compiler'):
+        errmsg = 'Unknown {}(s): {}'.format(bintype, binaries)
+        if exceptions:
+            errmsg += '\nThe follow exceptions were encountered:'
+            for (c, e) in exceptions.items():
+                errmsg += '\nRunning "{0}" gave "{1}"'.format(c, e)
+        raise EnvironmentException(errmsg)
+
+    def _guess_win_linker(self, compiler: T.List[str], comp_class: Compiler,
+                          for_machine: MachineChoice, *,
+                          use_linker_prefix: bool = True, invoked_directly: bool = True,
+                          extra_args: T.Optional[T.List[str]] = None) -> 'DynamicLinker':
+        self.coredata.add_lang_args(comp_class.language, comp_class, for_machine, self)
+
+        # Explicitly pass logo here so that we can get the version of link.exe
+        if not use_linker_prefix or comp_class.LINKER_PREFIX is None:
+            check_args = ['/logo', '--version']
+        elif isinstance(comp_class.LINKER_PREFIX, str):
+            check_args = [comp_class.LINKER_PREFIX + '/logo', comp_class.LINKER_PREFIX + '--version']
+        elif isinstance(comp_class.LINKER_PREFIX, list):
+            check_args = comp_class.LINKER_PREFIX + ['/logo'] + comp_class.LINKER_PREFIX + ['--version']
+
+        check_args += self.coredata.compiler_options[for_machine][comp_class.language]['args'].value
+
+        override = []  # type: T.List[str]
+        value = self.lookup_binary_entry(for_machine, comp_class.language + '_ld')
+        if value is not None:
+            override = comp_class.use_linker_args(value[0])
+            check_args += override
+
+        if extra_args is not None:
+            check_args.extend(extra_args)
+
+        p, o, _ = Popen_safe(compiler + check_args)
+        if o.startswith('LLD'):
+            if '(compatible with GNU linkers)' in o:
+                return LLVMDynamicLinker(
+                    compiler, for_machine, comp_class.LINKER_PREFIX,
+                    override, version=search_version(o))
+
+        if value is not None and invoked_directly:
+            compiler = value
+            # We've already hanedled the non-direct case above
+
+        p, o, e = Popen_safe(compiler + check_args)
+        if o.startswith('LLD'):
+            return ClangClDynamicLinker(
+                for_machine, [],
+                prefix=comp_class.LINKER_PREFIX if use_linker_prefix else [],
+                exelist=compiler, version=search_version(o), direct=invoked_directly)
+        elif 'OPTLINK' in o:
+            # Opltink's stdout *may* beging with a \r character.
+            return OptlinkDynamicLinker(compiler, for_machine, version=search_version(o))
+        elif o.startswith('Microsoft') or e.startswith('Microsoft'):
+            out = o or e
+            match = re.search(r'.*(X86|X64|ARM|ARM64).*', out)
+            if match:
+                target = str(match.group(1))
+            else:
+                target = 'x86'
+
+            return MSVCDynamicLinker(
+                for_machine, [], machine=target, exelist=compiler,
+                prefix=comp_class.LINKER_PREFIX if use_linker_prefix else [],
+                version=search_version(out), direct=invoked_directly)
+        elif 'GNU coreutils' in o:
+            raise EnvironmentException(
+                "Found GNU link.exe instead of MSVC link.exe. This link.exe "
+                "is not a linker. You may need to reorder entries to your "
+                "%PATH% variable to resolve this.")
+        raise EnvironmentException('Unable to determine dynamic linker')
+
+    def _guess_nix_linker(self, compiler: T.List[str], comp_class: T.Type[Compiler],
+                          for_machine: MachineChoice, *,
+                          extra_args: T.Optional[T.List[str]] = None) -> 'DynamicLinker':
+        """Helper for guessing what linker to use on Unix-Like OSes.
+
+        :compiler: Invocation to use to get linker
+        :comp_class: The Compiler Type (uninstantiated)
+        :for_machine: which machine this linker targets
+        :extra_args: Any additional arguments required (such as a source file)
+        """
+        self.coredata.add_lang_args(comp_class.language, comp_class, for_machine, self)
+        extra_args = T.cast(T.List[str], extra_args or [])
+        extra_args += self.coredata.compiler_options[for_machine][comp_class.language]['args'].value
+
+        if isinstance(comp_class.LINKER_PREFIX, str):
+            check_args = [comp_class.LINKER_PREFIX + '--version'] + extra_args
+        else:
+            check_args = comp_class.LINKER_PREFIX + ['--version'] + extra_args
+
+        override = []  # type: T.List[str]
+        value = self.lookup_binary_entry(for_machine, comp_class.language + '_ld')
+        if value is not None:
+            override = comp_class.use_linker_args(value[0])
+            check_args += override
+
+        _, o, e = Popen_safe(compiler + check_args)
+        v = search_version(o)
+        if o.startswith('LLD'):
+            linker = LLVMDynamicLinker(
+                compiler, for_machine, comp_class.LINKER_PREFIX, override, version=v)  # type: DynamicLinker
+        elif e.startswith('lld-link: '):
+            # The LLD MinGW frontend didn't respond to --version before version 9.0.0,
+            # and produced an error message about failing to link (when no object
+            # files were specified), instead of printing the version number.
+            # Let's try to extract the linker invocation command to grab the version.
+
+            _, o, e = Popen_safe(compiler + check_args + ['-v'])
+
+            try:
+                linker_cmd = re.match(r'.*\n(.*?)\nlld-link: ', e, re.DOTALL).group(1)
+                linker_cmd = shlex.split(linker_cmd)[0]
+            except (AttributeError, IndexError, ValueError):
+                pass
+            else:
+                _, o, e = Popen_safe([linker_cmd, '--version'])
+                v = search_version(o)
+
+            linker = LLVMDynamicLinker(compiler, for_machine, comp_class.LINKER_PREFIX, override, version=v)
+        # first is for apple clang, second is for real gcc, the third is icc
+        elif e.endswith('(use -v to see invocation)\n') or 'macosx_version' in e or 'ld: unknown option:' in e:
+            if isinstance(comp_class.LINKER_PREFIX, str):
+                _, _, e = Popen_safe(compiler + [comp_class.LINKER_PREFIX + '-v'] + extra_args)
+            else:
+                _, _, e = Popen_safe(compiler + comp_class.LINKER_PREFIX + ['-v'] + extra_args)
+            for line in e.split('\n'):
+                if 'PROJECT:ld' in line:
+                    v = line.split('-')[1]
+                    break
+            else:
+                v = 'unknown version'
+            linker = AppleDynamicLinker(compiler, for_machine, comp_class.LINKER_PREFIX, override, version=v)
+        elif 'GNU' in o:
+            if 'gold' in o:
+                cls = GnuGoldDynamicLinker
+            else:
+                cls = GnuBFDDynamicLinker
+            linker = cls(compiler, for_machine, comp_class.LINKER_PREFIX, override, version=v)
+        elif 'Solaris' in e or 'Solaris' in o:
+            linker = SolarisDynamicLinker(
+                compiler, for_machine, comp_class.LINKER_PREFIX, override,
+                version=search_version(e))
+        else:
+            raise EnvironmentException('Unable to determine dynamic linker')
+        return linker
+
+    def _detect_c_or_cpp_compiler(self, lang: str, for_machine: MachineChoice) -> Compiler:
+        popen_exceptions = {}
+        compilers, ccache, exe_wrap = self._get_compilers(lang, for_machine)
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            compiler_name = os.path.basename(compiler[0])
+
+            if not set(['cl', 'cl.exe', 'clang-cl', 'clang-cl.exe']).isdisjoint(compiler):
+                # Watcom C provides it's own cl.exe clone that mimics an older
+                # version of Microsoft's compiler. Since Watcom's cl.exe is
+                # just a wrapper, we skip using it if we detect its presence
+                # so as not to confuse Meson when configuring for MSVC.
+                #
+                # Additionally the help text of Watcom's cl.exe is paged, and
+                # the binary will not exit without human intervention. In
+                # practice, Meson will block waiting for Watcom's cl.exe to
+                # exit, which requires user input and thus will never exit.
+                if 'WATCOM' in os.environ:
+                    def sanitize(p):
+                        return os.path.normcase(os.path.abspath(p))
+
+                    watcom_cls = [sanitize(os.path.join(os.environ['WATCOM'], 'BINNT', 'cl')),
+                                  sanitize(os.path.join(os.environ['WATCOM'], 'BINNT', 'cl.exe'))]
+                    found_cl = sanitize(shutil.which('cl'))
+                    if found_cl in watcom_cls:
+                        continue
+                arg = '/?'
+            elif 'armcc' in compiler_name:
+                arg = '--vsn'
+            elif 'ccrx' in compiler_name:
+                arg = '-v'
+            elif 'xc16' in compiler_name:
+                arg = '--version'
+            elif 'cl2000' in compiler_name:
+                arg = '-version'
+            elif compiler_name in {'icl', 'icl.exe'}:
+                # if you pass anything to icl you get stuck in a pager
+                arg = ''
+            else:
+                arg = '--version'
+
+            try:
+                p, out, err = Popen_safe(compiler + [arg])
+            except OSError as e:
+                popen_exceptions[' '.join(compiler + [arg])] = e
+                continue
+
+            if 'ccrx' in compiler_name:
+                out = err
+
+            full_version = out.split('\n', 1)[0]
+            version = search_version(out)
+
+            guess_gcc_or_lcc = False
+            if 'Free Software Foundation' in out or 'xt-' in out:
+                guess_gcc_or_lcc = 'gcc'
+            if 'e2k' in out and 'lcc' in out:
+                guess_gcc_or_lcc = 'lcc'
+            if 'Microchip Technology' in out:
+                # this output has "Free Software Foundation" in its version
+                guess_gcc_or_lcc = False
+
+            if guess_gcc_or_lcc:
+                defines = self.get_gnu_compiler_defines(compiler)
+                if not defines:
+                    popen_exceptions[' '.join(compiler)] = 'no pre-processor defines'
+                    continue
+
+                if guess_gcc_or_lcc == 'lcc':
+                    version = self.get_lcc_version_from_defines(defines)
+                    cls = ElbrusCCompiler if lang == 'c' else ElbrusCPPCompiler
+                else:
+                    version = self.get_gnu_version_from_defines(defines)
+                    cls = GnuCCompiler if lang == 'c' else GnuCPPCompiler
+
+                linker = self._guess_nix_linker(compiler, cls, for_machine)
+
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross,
+                    info, exe_wrap, defines, full_version=full_version,
+                    linker=linker)
+
+            if 'Emscripten' in out:
+                cls = EmscriptenCCompiler if lang == 'c' else EmscriptenCPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+
+                # emcc requires a file input in order to pass arguments to the
+                # linker. It'll exit with an error code, but still print the
+                # linker version. Old emcc versions ignore -Wl,--version completely,
+                # however. We'll report "unknown version" in that case.
+                with tempfile.NamedTemporaryFile(suffix='.c') as f:
+                    cmd = compiler + [cls.LINKER_PREFIX + "--version", f.name]
+                    _, o, _ = Popen_safe(cmd)
+
+                linker = WASMDynamicLinker(
+                    compiler, for_machine, cls.LINKER_PREFIX,
+                    [], version=search_version(o))
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, linker=linker, full_version=full_version)
+
+            if 'armclang' in out:
+                # The compiler version is not present in the first line of output,
+                # instead it is present in second line, startswith 'Component:'.
+                # So, searching for the 'Component' in out although we know it is
+                # present in second line, as we are not sure about the
+                # output format in future versions
+                arm_ver_str = re.search('.*Component.*', out)
+                if arm_ver_str is None:
+                    popen_exceptions[' '.join(compiler)] = 'version string not found'
+                    continue
+                arm_ver_str = arm_ver_str.group(0)
+                # Override previous values
+                version = search_version(arm_ver_str)
+                full_version = arm_ver_str
+                cls = ArmclangCCompiler if lang == 'c' else ArmclangCPPCompiler
+                linker = ArmClangDynamicLinker(for_machine, version=version)
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, full_version=full_version, linker=linker)
+            if 'CL.EXE COMPATIBILITY' in out:
+                # if this is clang-cl masquerading as cl, detect it as cl, not
+                # clang
+                arg = '--version'
+                try:
+                    p, out, err = Popen_safe(compiler + [arg])
+                except OSError as e:
+                    popen_exceptions[' '.join(compiler + [arg])] = e
+                version = search_version(out)
+                match = re.search('^Target: (.*?)-', out, re.MULTILINE)
+                if match:
+                    target = match.group(1)
+                else:
+                    target = 'unknown target'
+                cls = ClangClCCompiler if lang == 'c' else ClangClCPPCompiler
+                linker = self._guess_win_linker(['lld-link'], cls, for_machine)
+                return cls(
+                    compiler, version, for_machine, is_cross, info, exe_wrap,
+                    target, linker=linker)
+            if 'clang' in out:
+                linker = None
+
+                defines = self.get_clang_compiler_defines(compiler)
+
+                # Even if the for_machine is darwin, we could be using vanilla
+                # clang.
+                if 'Apple' in out:
+                    cls = AppleClangCCompiler if lang == 'c' else AppleClangCPPCompiler
+                else:
+                    cls = ClangCCompiler if lang == 'c' else ClangCPPCompiler
+
+                if 'windows' in out or self.machines[for_machine].is_windows():
+                    # If we're in a MINGW context this actually will use a gnu
+                    # style ld, but for clang on "real" windows we'll use
+                    # either link.exe or lld-link.exe
+                    try:
+                        linker = self._guess_win_linker(compiler, cls, for_machine)
+                    except MesonException:
+                        pass
+                if linker is None:
+                    linker = self._guess_nix_linker(compiler, cls, for_machine)
+
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, defines, full_version=full_version, linker=linker)
+
+            if 'Intel(R) C++ Intel(R)' in err:
+                version = search_version(err)
+                target = 'x86' if 'IA-32' in err else 'x86_64'
+                cls = IntelClCCompiler if lang == 'c' else IntelClCPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = XilinkDynamicLinker(for_machine, [], version=version)
+                return cls(
+                    compiler, version, for_machine, is_cross, info=info,
+                    exe_wrap=exe_wrap, target=target, linker=linker)
+            if 'Microsoft' in out or 'Microsoft' in err:
+                # Latest versions of Visual Studio print version
+                # number to stderr but earlier ones print version
+                # on stdout.  Why? Lord only knows.
+                # Check both outputs to figure out version.
+                for lookat in [err, out]:
+                    version = search_version(lookat)
+                    if version != 'unknown version':
+                        break
+                else:
+                    m = 'Failed to detect MSVC compiler version: stderr was\n{!r}'
+                    raise EnvironmentException(m.format(err))
+                cl_signature = lookat.split('\n')[0]
+                match = re.search('.*(x86|x64|ARM|ARM64)( |$)', cl_signature)
+                if match:
+                    target = match.group(1)
+                else:
+                    m = 'Failed to detect MSVC compiler target architecture: \'cl /?\' output is\n{}'
+                    raise EnvironmentException(m.format(cl_signature))
+                cls = VisualStudioCCompiler if lang == 'c' else VisualStudioCPPCompiler
+                linker = self._guess_win_linker(['link'], cls, for_machine)
+                return cls(
+                    compiler, version, for_machine, is_cross, info, exe_wrap,
+                    target, linker=linker)
+            if 'PGI Compilers' in out:
+                cls = PGICCompiler if lang == 'c' else PGICPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = PGIDynamicLinker(compiler, for_machine, cls.LINKER_PREFIX, [], version=version)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross,
+                    info, exe_wrap, linker=linker)
+            if '(ICC)' in out:
+                cls = IntelCCompiler if lang == 'c' else IntelCPPCompiler
+                l = self._guess_nix_linker(compiler, cls, for_machine)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, full_version=full_version, linker=l)
+            if 'ARM' in out:
+                cls = ArmCCompiler if lang == 'c' else ArmCPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = ArmDynamicLinker(for_machine, version=version)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross,
+                    info, exe_wrap, full_version=full_version, linker=linker)
+            if 'RX Family' in out:
+                cls = CcrxCCompiler if lang == 'c' else CcrxCPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = CcrxDynamicLinker(for_machine, version=version)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, full_version=full_version, linker=linker)
+
+            if 'Microchip Technology' in out:
+                cls = Xc16CCompiler if lang == 'c' else Xc16CCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = Xc16DynamicLinker(for_machine, version=version)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, full_version=full_version, linker=linker)
+
+            if 'TMS320C2000 C/C++' in out:
+                cls = C2000CCompiler if lang == 'c' else C2000CPPCompiler
+                self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                linker = C2000DynamicLinker(for_machine, version=version)
+                return cls(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, full_version=full_version, linker=linker)
+
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def detect_c_compiler(self, for_machine):
+        return self._detect_c_or_cpp_compiler('c', for_machine)
+
+    def detect_cpp_compiler(self, for_machine):
+        return self._detect_c_or_cpp_compiler('cpp', for_machine)
+
+    def detect_cuda_compiler(self, for_machine):
+        popen_exceptions = {}
+        is_cross = self.is_cross_build(for_machine)
+        compilers, ccache, exe_wrap = self._get_compilers('cuda', for_machine)
+        info = self.machines[for_machine]
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            arg = '--version'
+            try:
+                p, out, err = Popen_safe(compiler + [arg])
+            except OSError as e:
+                popen_exceptions[' '.join(compiler + [arg])] = e
+                continue
+            # Example nvcc printout:
+            #
+            #     nvcc: NVIDIA (R) Cuda compiler driver
+            #     Copyright (c) 2005-2018 NVIDIA Corporation
+            #     Built on Sat_Aug_25_21:08:01_CDT_2018
+            #     Cuda compilation tools, release 10.0, V10.0.130
+            #
+            # search_version() first finds the "10.0" after "release",
+            # rather than the more precise "10.0.130" after "V".
+            # The patch version number is occasionally important; For
+            # instance, on Linux,
+            #    - CUDA Toolkit 8.0.44 requires NVIDIA Driver 367.48
+            #    - CUDA Toolkit 8.0.61 requires NVIDIA Driver 375.26
+            # Luckily, the "V" also makes it very simple to extract
+            # the full version:
+            version = out.strip().split('V')[-1]
+            cpp_compiler = self.detect_cpp_compiler(for_machine)
+            cls = CudaCompiler
+            self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+            linker = CudaLinker(compiler, for_machine, CudaCompiler.LINKER_PREFIX, [], version=CudaLinker.parse_version())
+            return cls(ccache + compiler, version, for_machine, is_cross, exe_wrap, host_compiler=cpp_compiler, info=info, linker=linker)
+        raise EnvironmentException('Could not find suitable CUDA compiler: "' + ' '.join(compilers) + '"')
+
+    def detect_fortran_compiler(self, for_machine: MachineChoice):
+        popen_exceptions = {}
+        compilers, ccache, exe_wrap = self._get_compilers('fortran', for_machine)
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            for arg in ['--version', '-V']:
+                try:
+                    p, out, err = Popen_safe(compiler + [arg])
+                except OSError as e:
+                    popen_exceptions[' '.join(compiler + [arg])] = e
+                    continue
+
+                version = search_version(out)
+                full_version = out.split('\n', 1)[0]
+
+                guess_gcc_or_lcc = False
+                if 'GNU Fortran' in out:
+                    guess_gcc_or_lcc = 'gcc'
+                if 'e2k' in out and 'lcc' in out:
+                    guess_gcc_or_lcc = 'lcc'
+
+                if guess_gcc_or_lcc:
+                    defines = self.get_gnu_compiler_defines(compiler)
+                    if not defines:
+                        popen_exceptions[' '.join(compiler)] = 'no pre-processor defines'
+                        continue
+                    if guess_gcc_or_lcc == 'lcc':
+                        version = self.get_lcc_version_from_defines(defines)
+                        cls = ElbrusFortranCompiler
+                    else:
+                        version = self.get_gnu_version_from_defines(defines)
+                        cls = GnuFortranCompiler
+                    linker = self._guess_nix_linker(
+                        compiler, cls, for_machine)
+                    return cls(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, defines, full_version=full_version,
+                        linker=linker)
+
+                if 'G95' in out:
+                    linker = self._guess_nix_linker(
+                        compiler, cls, for_machine)
+                    return G95FortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+                if 'Sun Fortran' in err:
+                    version = search_version(err)
+                    linker = self._guess_nix_linker(
+                        compiler, cls, for_machine)
+                    return SunFortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+                if 'Intel(R) Visual Fortran' in err:
+                    version = search_version(err)
+                    target = 'x86' if 'IA-32' in err else 'x86_64'
+                    cls = IntelClFortranCompiler
+                    self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                    linker = XilinkDynamicLinker(for_machine, [], version=version)
+                    return cls(
+                        compiler, version, for_machine, is_cross, target,
+                        info, exe_wrap, linker=linker)
+
+                if 'ifort (IFORT)' in out:
+                    linker = self._guess_nix_linker(compiler, IntelFortranCompiler, for_machine)
+                    return IntelFortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+                if 'PathScale EKOPath(tm)' in err:
+                    return PathScaleFortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version)
+
+                if 'PGI Compilers' in out:
+                    cls = PGIFortranCompiler
+                    self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+                    linker = PGIDynamicLinker(compiler, for_machine,
+                                              cls.LINKER_PREFIX, [], version=version)
+                    return cls(
+                        compiler, version, for_machine, is_cross, info, exe_wrap,
+                        full_version=full_version, linker=linker)
+
+                if 'flang' in out or 'clang' in out:
+                    linker = self._guess_nix_linker(
+                        compiler, FlangFortranCompiler, for_machine)
+                    return FlangFortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+                if 'Open64 Compiler Suite' in err:
+                    linker = self._guess_nix_linker(
+                        compiler, Open64FortranCompiler, for_machine)
+                    return Open64FortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+                if 'NAG Fortran' in err:
+                    linker = self._guess_nix_linker(
+                        compiler, NAGFortranCompiler, for_machine)
+                    return NAGFortranCompiler(
+                        compiler, version, for_machine, is_cross, info,
+                        exe_wrap, full_version=full_version, linker=linker)
+
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def get_scratch_dir(self):
+        return self.scratch_dir
+
+    def detect_objc_compiler(self, for_machine: MachineInfo) -> 'Compiler':
+        return self._detect_objc_or_objcpp_compiler(for_machine, True)
+
+    def detect_objcpp_compiler(self, for_machine: MachineInfo) -> 'Compiler':
+        return self._detect_objc_or_objcpp_compiler(for_machine, False)
+
+    def _detect_objc_or_objcpp_compiler(self, for_machine: MachineChoice, objc: bool) -> 'Compiler':
+        popen_exceptions = {}
+        compilers, ccache, exe_wrap = self._get_compilers('objc' if objc else 'objcpp', for_machine)
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            arg = ['--version']
+            try:
+                p, out, err = Popen_safe(compiler + arg)
+            except OSError as e:
+                popen_exceptions[' '.join(compiler + arg)] = e
+                continue
+            version = search_version(out)
+            if 'Free Software Foundation' in out:
+                defines = self.get_gnu_compiler_defines(compiler)
+                if not defines:
+                    popen_exceptions[' '.join(compiler)] = 'no pre-processor defines'
+                    continue
+                version = self.get_gnu_version_from_defines(defines)
+                comp = GnuObjCCompiler if objc else GnuObjCPPCompiler
+                linker = self._guess_nix_linker(compiler, comp, for_machine)
+                return comp(
+                    ccache + compiler, version, for_machine, is_cross, info,
+                    exe_wrap, defines, linker=linker)
+            if 'clang' in out:
+                linker = None
+                if 'Apple' in out:
+                    comp = AppleClangObjCCompiler if objc else AppleClangObjCPPCompiler
+                else:
+                    comp = ClangObjCCompiler if objc else ClangObjCPPCompiler
+                if 'windows' in out or self.machines[for_machine].is_windows():
+                    # If we're in a MINGW context this actually will use a gnu style ld
+                    try:
+                        linker = self._guess_win_linker(compiler, comp, for_machine)
+                    except MesonException:
+                        pass
+
+                if not linker:
+                    linker = self._guess_nix_linker(
+                        compiler, comp, for_machine)
+                return comp(
+                    ccache + compiler, version, for_machine,
+                    is_cross, info, exe_wrap, linker=linker)
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def detect_java_compiler(self, for_machine):
+        exelist = self.lookup_binary_entry(for_machine, 'java')
+        info = self.machines[for_machine]
+        if exelist is None:
+            # TODO support fallback
+            exelist = [self.default_java[0]]
+
+        try:
+            p, out, err = Popen_safe(exelist + ['-version'])
+        except OSError:
+            raise EnvironmentException('Could not execute Java compiler "{}"'.format(' '.join(exelist)))
+        if 'javac' in out or 'javac' in err:
+            version = search_version(err if 'javac' in err else out)
+            if not version or version == 'unknown version':
+                parts = (err if 'javac' in err else out).split()
+                if len(parts) > 1:
+                    version = parts[1]
+            comp_class = JavaCompiler
+            self.coredata.add_lang_args(comp_class.language, comp_class, for_machine, self)
+            return comp_class(exelist, version, for_machine, info)
+        raise EnvironmentException('Unknown compiler "' + ' '.join(exelist) + '"')
+
+    def detect_cs_compiler(self, for_machine):
+        compilers, ccache, exe_wrap = self._get_compilers('cs', for_machine)
+        popen_exceptions = {}
+        info = self.machines[for_machine]
+        for comp in compilers:
+            if not isinstance(comp, list):
+                comp = [comp]
+            try:
+                p, out, err = Popen_safe(comp + ['--version'])
+            except OSError as e:
+                popen_exceptions[' '.join(comp + ['--version'])] = e
+                continue
+
+            version = search_version(out)
+            if 'Mono' in out:
+                cls = MonoCompiler
+            elif "Visual C#" in out:
+                cls = VisualStudioCsCompiler
+            else:
+                continue
+            self.coredata.add_lang_args(cls.language, cls, for_machine, self)
+            return cls(comp, version, for_machine, info)
+
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def detect_vala_compiler(self, for_machine):
+        exelist = self.lookup_binary_entry(for_machine, 'vala')
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+        if exelist is None:
+            # TODO support fallback
+            exelist = [self.default_vala[0]]
+
+        try:
+            p, out = Popen_safe(exelist + ['--version'])[0:2]
+        except OSError:
+            raise EnvironmentException('Could not execute Vala compiler "{}"'.format(' '.join(exelist)))
+        version = search_version(out)
+        if 'Vala' in out:
+            comp_class = ValaCompiler
+            self.coredata.add_lang_args(comp_class.language, comp_class, for_machine, self)
+            return comp_class(exelist, version, for_machine, info, is_cross)
+        raise EnvironmentException('Unknown compiler "' + ' '.join(exelist) + '"')
+
+    def detect_rust_compiler(self, for_machine):
+        popen_exceptions = {}
+        compilers, ccache, exe_wrap = self._get_compilers('rust', for_machine)
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+
+        cc = self.detect_c_compiler(for_machine)
+        is_link_exe = isinstance(cc.linker, VisualStudioLikeLinkerMixin)
+        override = self.lookup_binary_entry(for_machine, 'rust_ld')
+
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            arg = ['--version']
+            try:
+                p, out = Popen_safe(compiler + arg)[0:2]
+            except OSError as e:
+                popen_exceptions[' '.join(compiler + arg)] = e
+                continue
+
+            version = search_version(out)
+
+            if 'rustc' in out:
+                # On Linux and mac rustc will invoke gcc (clang for mac
+                # presumably) and it can do this windows, for dynamic linking.
+                # this means the easiest way to C compiler for dynamic linking.
+                # figure out what linker to use is to just get the value of the
+                # C compiler and use that as the basis of the rust linker.
+                # However, there are two things we need to change, if CC is not
+                # the default use that, and second add the necessary arguments
+                # to rust to use -fuse-ld
+
+                if override is None:
+                    extra_args = {}
+                    always_args = []
+                    if is_link_exe:
+                        compiler.extend(['-C', 'linker={}'.format(cc.linker.exelist[0])])
+                        extra_args['direct'] = True
+                        extra_args['machine'] = cc.linker.machine
+                    elif not ((info.is_darwin() and isinstance(cc, AppleClangCCompiler)) or
+                              isinstance(cc, GnuCCompiler)):
+                        c = cc.exelist[1] if cc.exelist[0].endswith('ccache') else cc.exelist[0]
+                        compiler.extend(['-C', 'linker={}'.format(c)])
+
+                    # This trickery with type() gets us the class of the linker
+                    # so we can initialize a new copy for the Rust Compiler
+                    if is_link_exe:
+                        linker = type(cc.linker)(for_machine, always_args, exelist=cc.linker.exelist,
+                                                 version=cc.linker.version, **extra_args)
+                    else:
+                        linker = type(cc.linker)(compiler, for_machine, cc.LINKER_PREFIX,
+                                                 always_args=always_args, version=cc.linker.version,
+                                                 **extra_args)
+                elif 'link' in override[0]:
+                    linker = self._guess_win_linker(
+                        override, RustCompiler, for_machine, use_linker_prefix=False)
+                    linker.direct = True
+                else:
+                    # We're creating a new type of "C" compiler, that has rust
+                    # as it's language. This is gross, but I can't figure out
+                    # another way to handle this, because rustc is actually
+                    # invoking the c compiler as it's linker.
+                    b = type('b', (type(cc), ), {})
+                    b.language = RustCompiler.language
+                    linker = self._guess_nix_linker(cc.exelist, b, for_machine)
+
+                    # Of course, we're not going to use any of that, we just
+                    # need it to get the proper arguments to pass to rustc
+                    c = cc.exelist[1] if cc.exelist[0].endswith('ccache') else cc.exelist[0]
+                    compiler.extend(['-C', 'linker={}'.format(c)])
+                    compiler.extend(['-C', 'link-args={}'.format(' '.join(cc.use_linker_args(override[0])))])
+
+                self.coredata.add_lang_args(RustCompiler.language, RustCompiler, for_machine, self)
+                return RustCompiler(
+                    compiler, version, for_machine, is_cross, info, exe_wrap,
+                    linker=linker)
+
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def detect_d_compiler(self, for_machine: MachineChoice):
+        info = self.machines[for_machine]
+
+        # Detect the target architecture, required for proper architecture handling on Windows.
+        # MSVC compiler is required for correct platform detection.
+        c_compiler = {'c': self.detect_c_compiler(for_machine)}
+        is_msvc = isinstance(c_compiler['c'], VisualStudioCCompiler)
+        if not is_msvc:
+            c_compiler = {}
+
+        arch = detect_cpu_family(c_compiler)
+        if is_msvc and arch == 'x86':
+            arch = 'x86_mscoff'
+
+        popen_exceptions = {}
+        is_cross = self.is_cross_build(for_machine)
+        results, ccache, exe_wrap = self._get_compilers('d', for_machine)
+        for exelist in results:
+            # Search for a D compiler.
+            # We prefer LDC over GDC unless overridden with the DC
+            # environment variable because LDC has a much more
+            # up to date language version at time (2016).
+            if not isinstance(exelist, list):
+                exelist = [exelist]
+            if os.path.basename(exelist[-1]).startswith(('ldmd', 'gdmd')):
+                raise EnvironmentException(
+                    'Meson does not support {} as it is only a DMD frontend for another compiler.'
+                    'Please provide a valid value for DC or unset it so that Meson can resolve the compiler by itself.'.format(exelist[-1]))
+            try:
+                p, out = Popen_safe(exelist + ['--version'])[0:2]
+            except OSError as e:
+                popen_exceptions[' '.join(exelist + ['--version'])] = e
+                continue
+            version = search_version(out)
+            full_version = out.split('\n', 1)[0]
+
+            if 'LLVM D compiler' in out:
+                # LDC seems to require a file
+                # We cannot use NamedTemproraryFile on windows, its documented
+                # to not work for our uses. So, just use mkstemp and only have
+                # one path for simplicity.
+                o, f = tempfile.mkstemp('.d')
+                os.close(o)
+
+                try:
+                    if info.is_windows() or info.is_cygwin():
+                        objfile = os.path.basename(f)[:-1] + 'obj'
+                        linker = self._guess_win_linker(
+                            exelist,
+                            compilers.LLVMDCompiler, for_machine,
+                            use_linker_prefix=True, invoked_directly=False,
+                            extra_args=[f])
+                    else:
+                        # LDC writes an object file to the current working directory.
+                        # Clean it up.
+                        objfile = os.path.basename(f)[:-1] + 'o'
+                        linker = self._guess_nix_linker(
+                            exelist, compilers.LLVMDCompiler, for_machine,
+                            extra_args=[f])
+                finally:
+                    mesonlib.windows_proof_rm(f)
+                    mesonlib.windows_proof_rm(objfile)
+
+                return compilers.LLVMDCompiler(
+                    exelist, version, for_machine, info, arch,
+                    full_version=full_version, linker=linker)
+            elif 'gdc' in out:
+                linker = self._guess_nix_linker(exelist, compilers.GnuDCompiler, for_machine)
+                return compilers.GnuDCompiler(
+                    exelist, version, for_machine, info, arch, is_cross, exe_wrap,
+                    full_version=full_version, linker=linker)
+            elif 'The D Language Foundation' in out or 'Digital Mars' in out:
+                # DMD seems to require a file
+                # We cannot use NamedTemproraryFile on windows, its documented
+                # to not work for our uses. So, just use mkstemp and only have
+                # one path for simplicity.
+                o, f = tempfile.mkstemp('.d')
+                os.close(o)
+
+                # DMD as different detection logic for x86 and x86_64
+                arch_arg = '-m64' if arch == 'x86_64' else '-m32'
+
+                try:
+                    if info.is_windows() or info.is_cygwin():
+                        objfile = os.path.basename(f)[:-1] + 'obj'
+                        linker = self._guess_win_linker(
+                            exelist, compilers.DmdDCompiler, for_machine,
+                            invoked_directly=False, extra_args=[f, arch_arg])
+                    else:
+                        objfile = os.path.basename(f)[:-1] + 'o'
+                        linker = self._guess_nix_linker(
+                            exelist, compilers.DmdDCompiler, for_machine,
+                            extra_args=[f, arch_arg])
+                finally:
+                    mesonlib.windows_proof_rm(f)
+                    mesonlib.windows_proof_rm(objfile)
+
+                return compilers.DmdDCompiler(
+                    exelist, version, for_machine, info, arch,
+                    full_version=full_version, linker=linker)
+            raise EnvironmentException('Unknown compiler "' + ' '.join(exelist) + '"')
+
+        self._handle_exceptions(popen_exceptions, compilers)
+
+    def detect_swift_compiler(self, for_machine):
+        exelist = self.lookup_binary_entry(for_machine, 'swift')
+        is_cross = self.is_cross_build(for_machine)
+        info = self.machines[for_machine]
+        if exelist is None:
+            # TODO support fallback
+            exelist = [self.default_swift[0]]
+
+        try:
+            p, _, err = Popen_safe(exelist + ['-v'])
+        except OSError:
+            raise EnvironmentException('Could not execute Swift compiler "{}"'.format(' '.join(exelist)))
+        version = search_version(err)
+        if 'Swift' in err:
+            # As for 5.0.1 swiftc *requires* a file to check the linker:
+            with tempfile.NamedTemporaryFile(suffix='.swift') as f:
+                linker = self._guess_nix_linker(
+                    exelist, compilers.SwiftCompiler, for_machine,
+                    extra_args=[f.name])
+            return compilers.SwiftCompiler(
+                exelist, version, for_machine, info, is_cross, linker=linker)
+
+        raise EnvironmentException('Unknown compiler "' + ' '.join(exelist) + '"')
+
+    def compiler_from_language(self, lang: str, for_machine: MachineChoice):
+        if lang == 'c':
+            comp = self.detect_c_compiler(for_machine)
+        elif lang == 'cpp':
+            comp = self.detect_cpp_compiler(for_machine)
+        elif lang == 'objc':
+            comp = self.detect_objc_compiler(for_machine)
+        elif lang == 'cuda':
+            comp = self.detect_cuda_compiler(for_machine)
+        elif lang == 'objcpp':
+            comp = self.detect_objcpp_compiler(for_machine)
+        elif lang == 'java':
+            comp = self.detect_java_compiler(for_machine)
+        elif lang == 'cs':
+            comp = self.detect_cs_compiler(for_machine)
+        elif lang == 'vala':
+            comp = self.detect_vala_compiler(for_machine)
+        elif lang == 'd':
+            comp = self.detect_d_compiler(for_machine)
+        elif lang == 'rust':
+            comp = self.detect_rust_compiler(for_machine)
+        elif lang == 'fortran':
+            comp = self.detect_fortran_compiler(for_machine)
+        elif lang == 'swift':
+            comp = self.detect_swift_compiler(for_machine)
+        else:
+            comp = None
+        return comp
+
+    def detect_compiler_for(self, lang: str, for_machine: MachineChoice):
+        comp = self.compiler_from_language(lang, for_machine)
+        if comp is not None:
+            assert comp.for_machine == for_machine
+            self.coredata.process_new_compiler(lang, comp, self)
+        return comp
+
+    def detect_static_linker(self, compiler):
+        linker = self.lookup_binary_entry(compiler.for_machine, 'ar')
+        if linker is not None:
+            linkers = [linker]
+        else:
+            defaults = [[l] for l in self.default_static_linker]
+            if isinstance(compiler, compilers.CudaCompiler):
+                linkers = [self.cuda_static_linker] + defaults
+            elif isinstance(compiler, compilers.VisualStudioLikeCompiler):
+                linkers = [self.vs_static_linker, self.clang_cl_static_linker]
+            elif isinstance(compiler, compilers.GnuCompiler):
+                # Use gcc-ar if available; needed for LTO
+                linkers = [self.gcc_static_linker] + defaults
+            elif isinstance(compiler, compilers.ClangCompiler):
+                # Use llvm-ar if available; needed for LTO
+                linkers = [self.clang_static_linker] + defaults
+            elif isinstance(compiler, compilers.DCompiler):
+                # Prefer static linkers over linkers used by D compilers
+                if mesonlib.is_windows():
+                    linkers = [self.vs_static_linker, self.clang_cl_static_linker, compiler.get_linker_exelist()]
+                else:
+                    linkers = defaults
+            elif isinstance(compiler, IntelClCCompiler):
+                # Intel has it's own linker that acts like microsoft's lib
+                linkers = ['xilib']
+            elif isinstance(compiler, (PGICCompiler, PGIFortranCompiler)) and mesonlib.is_windows():
+                linkers = [['ar']]  # For PGI on Windows, "ar" is just a wrapper calling link/lib.
+            else:
+                linkers = defaults
+        popen_exceptions = {}
+        for linker in linkers:
+            if not {'lib', 'lib.exe', 'llvm-lib', 'llvm-lib.exe', 'xilib', 'xilib.exe'}.isdisjoint(linker):
+                arg = '/?'
+            elif not {'ar2000', 'ar2000.exe'}.isdisjoint(linker):
+                arg = '?'
+            else:
+                arg = '--version'
+            try:
+                p, out, err = Popen_safe(linker + [arg])
+            except OSError as e:
+                popen_exceptions[' '.join(linker + [arg])] = e
+                continue
+            if "xilib: executing 'lib'" in err:
+                return IntelVisualStudioLinker(linker, getattr(compiler, 'machine', None))
+            if '/OUT:' in out.upper() or '/OUT:' in err.upper():
+                return VisualStudioLinker(linker, getattr(compiler, 'machine', None))
+            if 'ar-Error-Unknown switch: --version' in err:
+                return PGIStaticLinker(linker)
+            if p.returncode == 0 and ('armar' in linker or 'armar.exe' in linker):
+                return ArmarLinker(linker)
+            if 'DMD32 D Compiler' in out or 'DMD64 D Compiler' in out:
+                return DLinker(linker, compiler.arch)
+            if 'LDC - the LLVM D compiler' in out:
+                return DLinker(linker, compiler.arch)
+            if 'GDC' in out and ' based on D ' in out:
+                return DLinker(linker, compiler.arch)
+            if err.startswith('Renesas') and ('rlink' in linker or 'rlink.exe' in linker):
+                return CcrxLinker(linker)
+            if out.startswith('GNU ar') and ('xc16-ar' in linker or 'xc16-ar.exe' in linker):
+                return Xc16Linker(linker)
+            if out.startswith('TMS320C2000') and ('ar2000' in linker or 'ar2000.exe' in linker):
+                return C2000Linker(linker)
+            if p.returncode == 0:
+                return ArLinker(linker)
+            if p.returncode == 1 and err.startswith('usage'): # OSX
+                return ArLinker(linker)
+            if p.returncode == 1 and err.startswith('Usage'): # AIX
+                return ArLinker(linker)
+            if p.returncode == 1 and err.startswith('ar: bad option: --'): # Solaris
+                return ArLinker(linker)
+        self._handle_exceptions(popen_exceptions, linkers, 'linker')
+        raise EnvironmentException('Unknown static linker "{}"'.format(' '.join(linkers)))
+
+    def get_source_dir(self):
+        return self.source_dir
+
+    def get_build_dir(self):
+        return self.build_dir
+
+    def get_import_lib_dir(self) -> str:
+        "Install dir for the import library (library used for linking)"
+        return self.get_libdir()
+
+    def get_shared_module_dir(self) -> str:
+        "Install dir for shared modules that are loaded at runtime"
+        return self.get_libdir()
+
+    def get_shared_lib_dir(self) -> str:
+        "Install dir for the shared library"
+        m = self.machines.host
+        # Windows has no RPATH or similar, so DLLs must be next to EXEs.
+        if m.is_windows() or m.is_cygwin():
+            return self.get_bindir()
+        return self.get_libdir()
+
+    def get_static_lib_dir(self) -> str:
+        "Install dir for the static library"
+        return self.get_libdir()
+
+    def get_prefix(self) -> str:
+        return self.coredata.get_builtin_option('prefix')
+
+    def get_libdir(self) -> str:
+        return self.coredata.get_builtin_option('libdir')
+
+    def get_libexecdir(self) -> str:
+        return self.coredata.get_builtin_option('libexecdir')
+
+    def get_bindir(self) -> str:
+        return self.coredata.get_builtin_option('bindir')
+
+    def get_includedir(self) -> str:
+        return self.coredata.get_builtin_option('includedir')
+
+    def get_mandir(self) -> str:
+        return self.coredata.get_builtin_option('mandir')
+
+    def get_datadir(self) -> str:
+        return self.coredata.get_builtin_option('datadir')
+
+    def get_compiler_system_dirs(self, for_machine: MachineChoice):
+        for comp in self.coredata.compilers[for_machine].values():
+            if isinstance(comp, compilers.ClangCompiler):
+                index = 1
+                break
+            elif isinstance(comp, compilers.GnuCompiler):
+                index = 2
+                break
+        else:
+            # This option is only supported by gcc and clang. If we don't get a
+            # GCC or Clang compiler return and empty list.
+            return []
+
+        p, out, _ = Popen_safe(comp.get_exelist() + ['-print-search-dirs'])
+        if p.returncode != 0:
+            raise mesonlib.MesonException('Could not calculate system search dirs')
+        out = out.split('\n')[index].lstrip('libraries: =').split(':')
+        return [os.path.normpath(p) for p in out]
+
+    def need_exe_wrapper(self, for_machine: MachineChoice = MachineChoice.HOST):
+        value = self.properties[for_machine].get('needs_exe_wrapper', None)
+        if value is not None:
+            return value
+        return not machine_info_can_run(self.machines[for_machine])
+
+    def get_exe_wrapper(self):
+        if not self.need_exe_wrapper():
+            from .dependencies import EmptyExternalProgram
+            return EmptyExternalProgram()
+        return self.exe_wrapper
diff --git a/meson/mesonbuild/interpreter.py b/meson/mesonbuild/interpreter.py
new file mode 100644
index 000000000..b2b43f395
--- /dev/null
+++ b/meson/mesonbuild/interpreter.py
@@ -0,0 +1,4869 @@
+# Copyright 2012-2019 The Meson development team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import mparser
+from . import environment
+from . import coredata
+from . import dependencies
+from . import mlog
+from . import build
+from . import optinterpreter
+from . import compilers
+from .wrap import wrap, WrapMode
+from . import mesonlib
+from .mesonlib import FileMode, MachineChoice, Popen_safe, listify, extract_as_list, has_path_sep, unholder
+from .dependencies import ExternalProgram
+from .dependencies import InternalDependency, Dependency, NotFoundDependency, DependencyException
+from .depfile import DepFile
+from .interpreterbase import InterpreterBase
+from .interpreterbase import check_stringlist, flatten, noPosargs, noKwargs, stringArgs, permittedKwargs, noArgsFlattening
+from .interpreterbase import InterpreterException, InvalidArguments, InvalidCode, SubdirDoneRequest
+from .interpreterbase import InterpreterObject, MutableInterpreterObject, Disabler, disablerIfNotFound
+from .interpreterbase import FeatureNew, FeatureDeprecated, FeatureNewKwargs, FeatureDeprecatedKwargs
+from .interpreterbase import ObjectHolder
+from .modules import ModuleReturnValue
+from .cmake import CMakeInterpreter
+from .backend.backends import TestProtocol
+
+from pathlib import Path, PurePath
+import os
+import shutil
+import uuid
+import re
+import shlex
+import stat
+import subprocess
+import collections
+import functools
+import typing as T
+
+import importlib
+
+permitted_method_kwargs = {
+    'partial_dependency': {'compile_args', 'link_args', 'links', 'includes',
+                           'sources'},
+}
+
+def stringifyUserArguments(args):
+    if isinstance(args, list):
+        return '[%s]' % ', '.join([stringifyUserArguments(x) for x in args])
+    elif isinstance(args, dict):
+        return '{%s}' % ', '.join(['%s : %s' % (stringifyUserArguments(k), stringifyUserArguments(v)) for k, v in args.items()])
+    elif isinstance(args, int):
+        return str(args)
+    elif isinstance(args, str):
+        return "'%s'" % args
+    raise InvalidArguments('Function accepts only strings, integers, lists and lists thereof.')
+
+
+class OverrideProgram(dependencies.ExternalProgram):
+    pass
+
+
+class FeatureOptionHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, env, name, option):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, option)
+        if option.is_auto():
+            self.held_object = env.coredata.builtins['auto_features']
+        self.name = name
+        self.methods.update({'enabled': self.enabled_method,
+                             'disabled': self.disabled_method,
+                             'auto': self.auto_method,
+                             })
+
+    @noPosargs
+    @permittedKwargs({})
+    def enabled_method(self, args, kwargs):
+        return self.held_object.is_enabled()
+
+    @noPosargs
+    @permittedKwargs({})
+    def disabled_method(self, args, kwargs):
+        return self.held_object.is_disabled()
+
+    @noPosargs
+    @permittedKwargs({})
+    def auto_method(self, args, kwargs):
+        return self.held_object.is_auto()
+
+def extract_required_kwarg(kwargs, subproject, feature_check=None, default=True):
+    val = kwargs.get('required', default)
+    disabled = False
+    required = False
+    feature = None
+    if isinstance(val, FeatureOptionHolder):
+        if not feature_check:
+            feature_check = FeatureNew('User option "feature"', '0.47.0')
+        feature_check.use(subproject)
+        option = val.held_object
+        feature = val.name
+        if option.is_disabled():
+            disabled = True
+        elif option.is_enabled():
+            required = True
+    elif isinstance(val, bool):
+        required = val
+    else:
+        raise InterpreterException('required keyword argument must be boolean or a feature option')
+
+    # Keep boolean value in kwargs to simplify other places where this kwarg is
+    # checked.
+    kwargs['required'] = required
+
+    return disabled, required, feature
+
+def extract_search_dirs(kwargs):
+    search_dirs = mesonlib.stringlistify(kwargs.get('dirs', []))
+    search_dirs = [Path(d).expanduser() for d in search_dirs]
+    for d in search_dirs:
+        if mesonlib.is_windows() and d.root.startswith('\\'):
+            # a Unix-path starting with `/` that is not absolute on Windows.
+            # discard without failing for end-user ease of cross-platform directory arrays
+            continue
+        if not d.is_absolute():
+            raise InvalidCode('Search directory {} is not an absolute path.'.format(d))
+    return list(map(str, search_dirs))
+
+class TryRunResultHolder(InterpreterObject):
+    def __init__(self, res):
+        super().__init__()
+        self.res = res
+        self.methods.update({'returncode': self.returncode_method,
+                             'compiled': self.compiled_method,
+                             'stdout': self.stdout_method,
+                             'stderr': self.stderr_method,
+                             })
+
+    @noPosargs
+    @permittedKwargs({})
+    def returncode_method(self, args, kwargs):
+        return self.res.returncode
+
+    @noPosargs
+    @permittedKwargs({})
+    def compiled_method(self, args, kwargs):
+        return self.res.compiled
+
+    @noPosargs
+    @permittedKwargs({})
+    def stdout_method(self, args, kwargs):
+        return self.res.stdout
+
+    @noPosargs
+    @permittedKwargs({})
+    def stderr_method(self, args, kwargs):
+        return self.res.stderr
+
+class RunProcess(InterpreterObject):
+
+    def __init__(self, cmd, args, env, source_dir, build_dir, subdir, mesonintrospect, in_builddir=False, check=False, capture=True):
+        super().__init__()
+        if not isinstance(cmd, ExternalProgram):
+            raise AssertionError('BUG: RunProcess must be passed an ExternalProgram')
+        self.capture = capture
+        pc, self.stdout, self.stderr = self.run_command(cmd, args, env, source_dir, build_dir, subdir, mesonintrospect, in_builddir, check)
+        self.returncode = pc.returncode
+        self.methods.update({'returncode': self.returncode_method,
+                             'stdout': self.stdout_method,
+                             'stderr': self.stderr_method,
+                             })
+
+    def run_command(self, cmd, args, env, source_dir, build_dir, subdir, mesonintrospect, in_builddir, check=False):
+        command_array = cmd.get_command() + args
+        menv = {'MESON_SOURCE_ROOT': source_dir,
+                'MESON_BUILD_ROOT': build_dir,
+                'MESON_SUBDIR': subdir,
+                'MESONINTROSPECT': ' '.join([shlex.quote(x) for x in mesonintrospect]),
+                }
+        if in_builddir:
+            cwd = os.path.join(build_dir, subdir)
+        else:
+            cwd = os.path.join(source_dir, subdir)
+        child_env = os.environ.copy()
+        child_env.update(menv)
+        child_env = env.get_env(child_env)
+        stdout = subprocess.PIPE if self.capture else subprocess.DEVNULL
+        mlog.debug('Running command:', ' '.join(command_array))
+        try:
+            p, o, e = Popen_safe(command_array, stdout=stdout, env=child_env, cwd=cwd)
+            if self.capture:
+                mlog.debug('--- stdout ---')
+                mlog.debug(o)
+            else:
+                o = ''
+                mlog.debug('--- stdout disabled ---')
+            mlog.debug('--- stderr ---')
+            mlog.debug(e)
+            mlog.debug('')
+
+            if check and p.returncode != 0:
+                raise InterpreterException('Command "{}" failed with status {}.'.format(' '.join(command_array), p.returncode))
+
+            return p, o, e
+        except FileNotFoundError:
+            raise InterpreterException('Could not execute command "%s".' % ' '.join(command_array))
+
+    @noPosargs
+    @permittedKwargs({})
+    def returncode_method(self, args, kwargs):
+        return self.returncode
+
+    @noPosargs
+    @permittedKwargs({})
+    def stdout_method(self, args, kwargs):
+        return self.stdout
+
+    @noPosargs
+    @permittedKwargs({})
+    def stderr_method(self, args, kwargs):
+        return self.stderr
+
+class ConfigureFileHolder(InterpreterObject, ObjectHolder):
+
+    def __init__(self, subdir, sourcename, targetname, configuration_data):
+        InterpreterObject.__init__(self)
+        obj = build.ConfigureFile(subdir, sourcename, targetname, configuration_data)
+        ObjectHolder.__init__(self, obj)
+
+
+class EnvironmentVariablesHolder(MutableInterpreterObject, ObjectHolder):
+    def __init__(self, initial_values=None):
+        MutableInterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, build.EnvironmentVariables())
+        self.methods.update({'set': self.set_method,
+                             'append': self.append_method,
+                             'prepend': self.prepend_method,
+                             })
+        if isinstance(initial_values, dict):
+            for k, v in initial_values.items():
+                self.set_method([k, v], {})
+        elif isinstance(initial_values, list):
+            for e in initial_values:
+                if '=' not in e:
+                    raise InterpreterException('Env var definition must be of type key=val.')
+                (k, val) = e.split('=', 1)
+                k = k.strip()
+                val = val.strip()
+                if ' ' in k:
+                    raise InterpreterException('Env var key must not have spaces in it.')
+                self.set_method([k, val], {})
+        elif initial_values:
+            raise AssertionError('Unsupported EnvironmentVariablesHolder initial_values')
+
+    def __repr__(self):
+        repr_str = "<{0}: {1}>"
+        return repr_str.format(self.__class__.__name__, self.held_object.envvars)
+
+    def add_var(self, method, args, kwargs):
+        if not isinstance(kwargs.get("separator", ""), str):
+            raise InterpreterException("EnvironmentVariablesHolder methods 'separator'"
+                                       " argument needs to be a string.")
+        if len(args) < 2:
+            raise InterpreterException("EnvironmentVariablesHolder methods require at least"
+                                       "2 arguments, first is the name of the variable and"
+                                       " following one are values")
+        # Warn when someone tries to use append() or prepend() on an env var
+        # which already has an operation set on it. People seem to think that
+        # multiple append/prepend operations stack, but they don't.
+        if method != self.held_object.set and self.held_object.has_name(args[0]):
+            mlog.warning('Overriding previous value of environment variable {!r} with a new one'
+                         .format(args[0]), location=self.current_node)
+        self.held_object.add_var(method, args[0], args[1:], kwargs)
+
+    @stringArgs
+    @permittedKwargs({'separator'})
+    def set_method(self, args, kwargs):
+        self.add_var(self.held_object.set, args, kwargs)
+
+    @stringArgs
+    @permittedKwargs({'separator'})
+    def append_method(self, args, kwargs):
+        self.add_var(self.held_object.append, args, kwargs)
+
+    @stringArgs
+    @permittedKwargs({'separator'})
+    def prepend_method(self, args, kwargs):
+        self.add_var(self.held_object.prepend, args, kwargs)
+
+
+class ConfigurationDataHolder(MutableInterpreterObject, ObjectHolder):
+    def __init__(self, pv, initial_values=None):
+        MutableInterpreterObject.__init__(self)
+        self.used = False # These objects become immutable after use in configure_file.
+        ObjectHolder.__init__(self, build.ConfigurationData(), pv)
+        self.methods.update({'set': self.set_method,
+                             'set10': self.set10_method,
+                             'set_quoted': self.set_quoted_method,
+                             'has': self.has_method,
+                             'get': self.get_method,
+                             'get_unquoted': self.get_unquoted_method,
+                             'merge_from': self.merge_from_method,
+                             })
+        if isinstance(initial_values, dict):
+            for k, v in initial_values.items():
+                self.set_method([k, v], {})
+        elif initial_values:
+            raise AssertionError('Unsupported ConfigurationDataHolder initial_values')
+
+    def is_used(self):
+        return self.used
+
+    def mark_used(self):
+        self.used = True
+
+    def validate_args(self, args, kwargs):
+        if len(args) == 1 and isinstance(args[0], list) and len(args[0]) == 2:
+            mlog.deprecation('Passing a list as the single argument to '
+                             'configuration_data.set is deprecated. This will '
+                             'become a hard error in the future.',
+                             location=self.current_node)
+            args = args[0]
+
+        if len(args) != 2:
+            raise InterpreterException("Configuration set requires 2 arguments.")
+        if self.used:
+            raise InterpreterException("Can not set values on configuration object that has been used.")
+        name, val = args
+        if not isinstance(val, (int, str)):
+            msg = 'Setting a configuration data value to {!r} is invalid, ' \
+                  'and will fail at configure_file(). If you are using it ' \
+                  'just to store some values, please use a dict instead.'
+            mlog.deprecation(msg.format(val), location=self.current_node)
+        desc = kwargs.get('description', None)
+        if not isinstance(name, str):
+            raise InterpreterException("First argument to set must be a string.")
+        if desc is not None and not isinstance(desc, str):
+            raise InterpreterException('Description must be a string.')
+
+        return name, val, desc
+
+    @noArgsFlattening
+    def set_method(self, args, kwargs):
+        (name, val, desc) = self.validate_args(args, kwargs)
+        self.held_object.values[name] = (val, desc)
+
+    def set_quoted_method(self, args, kwargs):
+        (name, val, desc) = self.validate_args(args, kwargs)
+        if not isinstance(val, str):
+            raise InterpreterException("Second argument to set_quoted must be a string.")
+        escaped_val = '\\"'.join(val.split('"'))
+        self.held_object.values[name] = ('"' + escaped_val + '"', desc)
+
+    def set10_method(self, args, kwargs):
+        (name, val, desc) = self.validate_args(args, kwargs)
+        if val:
+            self.held_object.values[name] = (1, desc)
+        else:
+            self.held_object.values[name] = (0, desc)
+
+    def has_method(self, args, kwargs):
+        return args[0] in self.held_object.values
+
+    @FeatureNew('configuration_data.get()', '0.38.0')
+    @noArgsFlattening
+    def get_method(self, args, kwargs):
+        if len(args) < 1 or len(args) > 2:
+            raise InterpreterException('Get method takes one or two arguments.')
+        name = args[0]
+        if name in self.held_object:
+            return self.held_object.get(name)[0]
+        if len(args) > 1:
+            return args[1]
+        raise InterpreterException('Entry %s not in configuration data.' % name)
+
+    @FeatureNew('configuration_data.get_unquoted()', '0.44.0')
+    def get_unquoted_method(self, args, kwargs):
+        if len(args) < 1 or len(args) > 2:
+            raise InterpreterException('Get method takes one or two arguments.')
+        name = args[0]
+        if name in self.held_object:
+            val = self.held_object.get(name)[0]
+        elif len(args) > 1:
+            val = args[1]
+        else:
+            raise InterpreterException('Entry %s not in configuration data.' % name)
+        if val[0] == '"' and val[-1] == '"':
+            return val[1:-1]
+        return val
+
+    def get(self, name):
+        return self.held_object.values[name] # (val, desc)
+
+    def keys(self):
+        return self.held_object.values.keys()
+
+    def merge_from_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Merge_from takes one positional argument.')
+        from_object = args[0]
+        if not isinstance(from_object, ConfigurationDataHolder):
+            raise InterpreterException('Merge_from argument must be a configuration data object.')
+        from_object = from_object.held_object
+        for k, v in from_object.values.items():
+            self.held_object.values[k] = v
+
+# Interpreter objects can not be pickled so we must have
+# these wrappers.
+
+class DependencyHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, dep, pv):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, dep, pv)
+        self.methods.update({'found': self.found_method,
+                             'type_name': self.type_name_method,
+                             'version': self.version_method,
+                             'name': self.name_method,
+                             'get_pkgconfig_variable': self.pkgconfig_method,
+                             'get_configtool_variable': self.configtool_method,
+                             'get_variable': self.variable_method,
+                             'partial_dependency': self.partial_dependency_method,
+                             'include_type': self.include_type_method,
+                             'as_system': self.as_system_method,
+                             })
+
+    def found(self):
+        return self.found_method([], {})
+
+    @noPosargs
+    @permittedKwargs({})
+    def type_name_method(self, args, kwargs):
+        return self.held_object.type_name
+
+    @noPosargs
+    @permittedKwargs({})
+    def found_method(self, args, kwargs):
+        if self.held_object.type_name == 'internal':
+            return True
+        return self.held_object.found()
+
+    @noPosargs
+    @permittedKwargs({})
+    def version_method(self, args, kwargs):
+        return self.held_object.get_version()
+
+    @noPosargs
+    @permittedKwargs({})
+    def name_method(self, args, kwargs):
+        return self.held_object.get_name()
+
+    @permittedKwargs({'define_variable', 'default'})
+    def pkgconfig_method(self, args, kwargs):
+        args = listify(args)
+        if len(args) != 1:
+            raise InterpreterException('get_pkgconfig_variable takes exactly one argument.')
+        varname = args[0]
+        if not isinstance(varname, str):
+            raise InterpreterException('Variable name must be a string.')
+        return self.held_object.get_pkgconfig_variable(varname, kwargs)
+
+    @FeatureNew('dep.get_configtool_variable', '0.44.0')
+    @permittedKwargs({})
+    def configtool_method(self, args, kwargs):
+        args = listify(args)
+        if len(args) != 1:
+            raise InterpreterException('get_configtool_variable takes exactly one argument.')
+        varname = args[0]
+        if not isinstance(varname, str):
+            raise InterpreterException('Variable name must be a string.')
+        return self.held_object.get_configtool_variable(varname)
+
+    @FeatureNew('dep.partial_dependency', '0.46.0')
+    @noPosargs
+    @permittedKwargs(permitted_method_kwargs['partial_dependency'])
+    def partial_dependency_method(self, args, kwargs):
+        pdep = self.held_object.get_partial_dependency(**kwargs)
+        return DependencyHolder(pdep, self.subproject)
+
+    @FeatureNew('dep.get_variable', '0.51.0')
+    @noPosargs
+    @permittedKwargs({'cmake', 'pkgconfig', 'configtool', 'internal', 'default_value', 'pkgconfig_define'})
+    @FeatureNewKwargs('dep.get_variable', '0.54.0', ['internal'])
+    def variable_method(self, args, kwargs):
+        return self.held_object.get_variable(**kwargs)
+
+    @FeatureNew('dep.include_type', '0.52.0')
+    @noPosargs
+    @permittedKwargs({})
+    def include_type_method(self, args, kwargs):
+        return self.held_object.get_include_type()
+
+    @FeatureNew('dep.as_system', '0.52.0')
+    @permittedKwargs({})
+    def as_system_method(self, args, kwargs):
+        args = listify(args)
+        new_is_system = 'system'
+        if len(args) > 1:
+            raise InterpreterException('as_system takes only one optional value')
+        if len(args) == 1:
+            new_is_system = args[0]
+        new_dep = self.held_object.generate_system_dependency(new_is_system)
+        return DependencyHolder(new_dep, self.subproject)
+
+class ExternalProgramHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, ep, subproject, backend=None):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, ep)
+        self.subproject = subproject
+        self.backend = backend
+        self.methods.update({'found': self.found_method,
+                             'path': self.path_method,
+                             'full_path': self.full_path_method})
+        self.cached_version = None
+
+    @noPosargs
+    @permittedKwargs({})
+    def found_method(self, args, kwargs):
+        return self.found()
+
+    @noPosargs
+    @permittedKwargs({})
+    @FeatureDeprecated('ExternalProgram.path', '0.55.0',
+                       'use ExternalProgram.full_path() instead')
+    def path_method(self, args, kwargs):
+        return self._full_path()
+
+    @noPosargs
+    @permittedKwargs({})
+    @FeatureNew('ExternalProgram.full_path', '0.55.0')
+    def full_path_method(self, args, kwargs):
+        return self._full_path()
+
+    def _full_path(self):
+        exe = self.held_object
+        if isinstance(exe, build.Executable):
+            return self.backend.get_target_filename_abs(exe)
+        return exe.get_path()
+
+    def found(self):
+        return isinstance(self.held_object, build.Executable) or self.held_object.found()
+
+    def get_command(self):
+        return self.held_object.get_command()
+
+    def get_name(self):
+        exe = self.held_object
+        if isinstance(exe, build.Executable):
+            return exe.name
+        return exe.get_name()
+
+    def get_version(self, interpreter):
+        if isinstance(self.held_object, build.Executable):
+            return self.held_object.project_version
+        if not self.cached_version:
+            raw_cmd = self.get_command() + ['--version']
+            cmd = [self, '--version']
+            res = interpreter.run_command_impl(interpreter.current_node, cmd, {}, True)
+            if res.returncode != 0:
+                m = 'Running {!r} failed'
+                raise InterpreterException(m.format(raw_cmd))
+            output = res.stdout.strip()
+            if not output:
+                output = res.stderr.strip()
+            match = re.search(r'([0-9][0-9\.]+)', output)
+            if not match:
+                m = 'Could not find a version number in output of {!r}'
+                raise InterpreterException(m.format(raw_cmd))
+            self.cached_version = match.group(1)
+        return self.cached_version
+
+class ExternalLibraryHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, el, pv):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, el, pv)
+        self.methods.update({'found': self.found_method,
+                             'type_name': self.type_name_method,
+                             'partial_dependency': self.partial_dependency_method,
+                             })
+
+    def found(self):
+        return self.held_object.found()
+
+    @noPosargs
+    @permittedKwargs({})
+    def type_name_method(self, args, kwargs):
+        return self.held_object.type_name
+
+    @noPosargs
+    @permittedKwargs({})
+    def found_method(self, args, kwargs):
+        return self.found()
+
+    def get_name(self):
+        return self.held_object.name
+
+    def get_compile_args(self):
+        return self.held_object.get_compile_args()
+
+    def get_link_args(self):
+        return self.held_object.get_link_args()
+
+    def get_exe_args(self):
+        return self.held_object.get_exe_args()
+
+    @FeatureNew('dep.partial_dependency', '0.46.0')
+    @noPosargs
+    @permittedKwargs(permitted_method_kwargs['partial_dependency'])
+    def partial_dependency_method(self, args, kwargs):
+        pdep = self.held_object.get_partial_dependency(**kwargs)
+        return DependencyHolder(pdep, self.subproject)
+
+class GeneratorHolder(InterpreterObject, ObjectHolder):
+    @FeatureNewKwargs('generator', '0.43.0', ['capture'])
+    def __init__(self, interp, args, kwargs):
+        self.interpreter = interp
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, build.Generator(args, kwargs), interp.subproject)
+        self.methods.update({'process': self.process_method})
+
+    @FeatureNewKwargs('generator.process', '0.45.0', ['preserve_path_from'])
+    @permittedKwargs({'extra_args', 'preserve_path_from'})
+    def process_method(self, args, kwargs):
+        extras = mesonlib.stringlistify(kwargs.get('extra_args', []))
+        if 'preserve_path_from' in kwargs:
+            preserve_path_from = kwargs['preserve_path_from']
+            if not isinstance(preserve_path_from, str):
+                raise InvalidArguments('Preserve_path_from must be a string.')
+            preserve_path_from = os.path.normpath(preserve_path_from)
+            if not os.path.isabs(preserve_path_from):
+                # This is a bit of a hack. Fix properly before merging.
+                raise InvalidArguments('Preserve_path_from must be an absolute path for now. Sorry.')
+        else:
+            preserve_path_from = None
+        gl = self.held_object.process_files('Generator', args, self.interpreter,
+                                            preserve_path_from, extra_args=extras)
+        return GeneratedListHolder(gl)
+
+
+class GeneratedListHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, arg1, extra_args=None):
+        InterpreterObject.__init__(self)
+        if isinstance(arg1, GeneratorHolder):
+            ObjectHolder.__init__(self, build.GeneratedList(arg1.held_object, extra_args if extra_args is not None else []))
+        else:
+            ObjectHolder.__init__(self, arg1)
+
+    def __repr__(self):
+        r = '<{}: {!r}>'
+        return r.format(self.__class__.__name__, self.held_object.get_outputs())
+
+    def add_file(self, a):
+        self.held_object.add_file(a)
+
+# A machine that's statically known from the cross file
+class MachineHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, machine_info):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, machine_info)
+        self.methods.update({'system': self.system_method,
+                             'cpu': self.cpu_method,
+                             'cpu_family': self.cpu_family_method,
+                             'endian': self.endian_method,
+                             })
+
+    @noPosargs
+    @permittedKwargs({})
+    def cpu_family_method(self, args, kwargs):
+        return self.held_object.cpu_family
+
+    @noPosargs
+    @permittedKwargs({})
+    def cpu_method(self, args, kwargs):
+        return self.held_object.cpu
+
+    @noPosargs
+    @permittedKwargs({})
+    def system_method(self, args, kwargs):
+        return self.held_object.system
+
+    @noPosargs
+    @permittedKwargs({})
+    def endian_method(self, args, kwargs):
+        return self.held_object.endian
+
+class IncludeDirsHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, idobj):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, idobj)
+
+class Headers(InterpreterObject):
+
+    def __init__(self, sources, kwargs):
+        InterpreterObject.__init__(self)
+        self.sources = sources
+        self.install_subdir = kwargs.get('subdir', '')
+        if os.path.isabs(self.install_subdir):
+            mlog.deprecation('Subdir keyword must not be an absolute path. This will be a hard error in the next release.')
+        self.custom_install_dir = kwargs.get('install_dir', None)
+        self.custom_install_mode = kwargs.get('install_mode', None)
+        if self.custom_install_dir is not None:
+            if not isinstance(self.custom_install_dir, str):
+                raise InterpreterException('Custom_install_dir must be a string.')
+
+    def set_install_subdir(self, subdir):
+        self.install_subdir = subdir
+
+    def get_install_subdir(self):
+        return self.install_subdir
+
+    def get_sources(self):
+        return self.sources
+
+    def get_custom_install_dir(self):
+        return self.custom_install_dir
+
+    def get_custom_install_mode(self):
+        return self.custom_install_mode
+
+class DataHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, data):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, data)
+
+    def get_source_subdir(self):
+        return self.held_object.source_subdir
+
+    def get_sources(self):
+        return self.held_object.sources
+
+    def get_install_dir(self):
+        return self.held_object.install_dir
+
+class InstallDir(InterpreterObject):
+    def __init__(self, src_subdir, inst_subdir, install_dir, install_mode, exclude, strip_directory):
+        InterpreterObject.__init__(self)
+        self.source_subdir = src_subdir
+        self.installable_subdir = inst_subdir
+        self.install_dir = install_dir
+        self.install_mode = install_mode
+        self.exclude = exclude
+        self.strip_directory = strip_directory
+
+class Man(InterpreterObject):
+
+    def __init__(self, sources, kwargs):
+        InterpreterObject.__init__(self)
+        self.sources = sources
+        self.validate_sources()
+        self.custom_install_dir = kwargs.get('install_dir', None)
+        self.custom_install_mode = kwargs.get('install_mode', None)
+        if self.custom_install_dir is not None and not isinstance(self.custom_install_dir, str):
+            raise InterpreterException('Custom_install_dir must be a string.')
+
+    def validate_sources(self):
+        for s in self.sources:
+            try:
+                num = int(s.split('.')[-1])
+            except (IndexError, ValueError):
+                num = 0
+            if num < 1 or num > 8:
+                raise InvalidArguments('Man file must have a file extension of a number between 1 and 8')
+
+    def get_custom_install_dir(self):
+        return self.custom_install_dir
+
+    def get_custom_install_mode(self):
+        return self.custom_install_mode
+
+    def get_sources(self):
+        return self.sources
+
+class GeneratedObjectsHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, held_object):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, held_object)
+
+class TargetHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, target, interp):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, target, interp.subproject)
+        self.interpreter = interp
+
+class BuildTargetHolder(TargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+        self.methods.update({'extract_objects': self.extract_objects_method,
+                             'extract_all_objects': self.extract_all_objects_method,
+                             'name': self.name_method,
+                             'get_id': self.get_id_method,
+                             'outdir': self.outdir_method,
+                             'full_path': self.full_path_method,
+                             'private_dir_include': self.private_dir_include_method,
+                             })
+
+    def __repr__(self):
+        r = '<{} {}: {}>'
+        h = self.held_object
+        return r.format(self.__class__.__name__, h.get_id(), h.filename)
+
+    def is_cross(self):
+        return not self.held_object.environment.machines.matches_build_machine(self.held_object.for_machine)
+
+    @noPosargs
+    @permittedKwargs({})
+    def private_dir_include_method(self, args, kwargs):
+        return IncludeDirsHolder(build.IncludeDirs('', [], False,
+                                                   [self.interpreter.backend.get_target_private_dir(self.held_object)]))
+
+    @noPosargs
+    @permittedKwargs({})
+    def full_path_method(self, args, kwargs):
+        return self.interpreter.backend.get_target_filename_abs(self.held_object)
+
+    @noPosargs
+    @permittedKwargs({})
+    def outdir_method(self, args, kwargs):
+        return self.interpreter.backend.get_target_dir(self.held_object)
+
+    @permittedKwargs({})
+    def extract_objects_method(self, args, kwargs):
+        gobjs = self.held_object.extract_objects(args)
+        return GeneratedObjectsHolder(gobjs)
+
+    @FeatureNewKwargs('extract_all_objects', '0.46.0', ['recursive'])
+    @noPosargs
+    @permittedKwargs({'recursive'})
+    def extract_all_objects_method(self, args, kwargs):
+        recursive = kwargs.get('recursive', False)
+        gobjs = self.held_object.extract_all_objects(recursive)
+        if gobjs.objlist and 'recursive' not in kwargs:
+            mlog.warning('extract_all_objects called without setting recursive '
+                         'keyword argument. Meson currently defaults to '
+                         'non-recursive to maintain backward compatibility but '
+                         'the default will be changed in the future.',
+                         location=self.current_node)
+        return GeneratedObjectsHolder(gobjs)
+
+    @noPosargs
+    @permittedKwargs({})
+    def get_id_method(self, args, kwargs):
+        return self.held_object.get_id()
+
+    @FeatureNew('name', '0.54.0')
+    @noPosargs
+    @permittedKwargs({})
+    def name_method(self, args, kwargs):
+        return self.held_object.name
+
+class ExecutableHolder(BuildTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+
+class StaticLibraryHolder(BuildTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+
+class SharedLibraryHolder(BuildTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+        # Set to True only when called from self.func_shared_lib().
+        target.shared_library_only = False
+
+class BothLibrariesHolder(BuildTargetHolder):
+    def __init__(self, shared_holder, static_holder, interp):
+        # FIXME: This build target always represents the shared library, but
+        # that should be configurable.
+        super().__init__(shared_holder.held_object, interp)
+        self.shared_holder = shared_holder
+        self.static_holder = static_holder
+        self.methods.update({'get_shared_lib': self.get_shared_lib_method,
+                             'get_static_lib': self.get_static_lib_method,
+                             })
+
+    def __repr__(self):
+        r = '<{} {}: {}, {}: {}>'
+        h1 = self.shared_holder.held_object
+        h2 = self.static_holder.held_object
+        return r.format(self.__class__.__name__, h1.get_id(), h1.filename, h2.get_id(), h2.filename)
+
+    @noPosargs
+    @permittedKwargs({})
+    def get_shared_lib_method(self, args, kwargs):
+        return self.shared_holder
+
+    @noPosargs
+    @permittedKwargs({})
+    def get_static_lib_method(self, args, kwargs):
+        return self.static_holder
+
+class SharedModuleHolder(BuildTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+
+class JarHolder(BuildTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+
+class CustomTargetIndexHolder(TargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+        self.methods.update({'full_path': self.full_path_method,
+                             })
+
+    @FeatureNew('custom_target[i].full_path', '0.54.0')
+    @noPosargs
+    @permittedKwargs({})
+    def full_path_method(self, args, kwargs):
+        return self.interpreter.backend.get_target_filename_abs(self.held_object)
+
+class CustomTargetHolder(TargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+        self.methods.update({'full_path': self.full_path_method,
+                             'to_list': self.to_list_method,
+                             })
+
+    def __repr__(self):
+        r = '<{} {}: {}>'
+        h = self.held_object
+        return r.format(self.__class__.__name__, h.get_id(), h.command)
+
+    @noPosargs
+    @permittedKwargs({})
+    def full_path_method(self, args, kwargs):
+        return self.interpreter.backend.get_target_filename_abs(self.held_object)
+
+    @FeatureNew('custom_target.to_list', '0.54.0')
+    @noPosargs
+    @permittedKwargs({})
+    def to_list_method(self, args, kwargs):
+        result = []
+        for i in self.held_object:
+            result.append(CustomTargetIndexHolder(i, self.interpreter))
+        return result
+
+    def __getitem__(self, index):
+        return CustomTargetIndexHolder(self.held_object[index], self.interpreter)
+
+    def __setitem__(self, index, value):  # lgtm[py/unexpected-raise-in-special-method]
+        raise InterpreterException('Cannot set a member of a CustomTarget')
+
+    def __delitem__(self, index):  # lgtm[py/unexpected-raise-in-special-method]
+        raise InterpreterException('Cannot delete a member of a CustomTarget')
+
+    def outdir_include(self):
+        return IncludeDirsHolder(build.IncludeDirs('', [], False,
+                                                   [os.path.join('@BUILD_ROOT@', self.interpreter.backend.get_target_dir(self.held_object))]))
+
+class RunTargetHolder(TargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+
+    def __repr__(self):
+        r = '<{} {}: {}>'
+        h = self.held_object
+        return r.format(self.__class__.__name__, h.get_id(), h.command)
+
+class Test(InterpreterObject):
+    def __init__(self, name: str, project: str, suite: T.List[str], exe: build.Executable,
+                 depends: T.List[T.Union[build.CustomTarget, build.BuildTarget]],
+                 is_parallel: bool, cmd_args: T.List[str], env: build.EnvironmentVariables,
+                 should_fail: bool, timeout: int, workdir: T.Optional[str], protocol: str,
+                 priority: int):
+        InterpreterObject.__init__(self)
+        self.name = name
+        self.suite = suite
+        self.project_name = project
+        self.exe = exe
+        self.depends = depends
+        self.is_parallel = is_parallel
+        self.cmd_args = cmd_args
+        self.env = env
+        self.should_fail = should_fail
+        self.timeout = timeout
+        self.workdir = workdir
+        self.protocol = TestProtocol.from_str(protocol)
+        self.priority = priority
+
+    def get_exe(self):
+        return self.exe
+
+    def get_name(self):
+        return self.name
+
+class SubprojectHolder(InterpreterObject, ObjectHolder):
+
+    def __init__(self, subinterpreter, subproject_dir, name, warnings=0, disabled_feature=None,
+                 exception=None):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, subinterpreter)
+        self.name = name
+        self.warnings = warnings
+        self.disabled_feature = disabled_feature
+        self.exception = exception
+        self.subproject_dir = subproject_dir
+        self.methods.update({'get_variable': self.get_variable_method,
+                             'found': self.found_method,
+                             })
+
+    @noPosargs
+    @permittedKwargs({})
+    def found_method(self, args, kwargs):
+        return self.found()
+
+    def found(self):
+        return self.held_object is not None
+
+    @permittedKwargs({})
+    @noArgsFlattening
+    def get_variable_method(self, args, kwargs):
+        if len(args) < 1 or len(args) > 2:
+            raise InterpreterException('Get_variable takes one or two arguments.')
+        if not self.found():
+            raise InterpreterException('Subproject "%s/%s" disabled can\'t get_variable on it.' % (
+                self.subproject_dir, self.name))
+        varname = args[0]
+        if not isinstance(varname, str):
+            raise InterpreterException('Get_variable first argument must be a string.')
+        try:
+            return self.held_object.variables[varname]
+        except KeyError:
+            pass
+
+        if len(args) == 2:
+            return args[1]
+
+        raise InvalidArguments('Requested variable "{0}" not found.'.format(varname))
+
+header_permitted_kwargs = set([
+    'required',
+    'prefix',
+    'no_builtin_args',
+    'include_directories',
+    'args',
+    'dependencies',
+])
+
+find_library_permitted_kwargs = set([
+    'has_headers',
+    'required',
+    'dirs',
+    'static',
+])
+
+find_library_permitted_kwargs |= set(['header_' + k for k in header_permitted_kwargs])
+
+class CompilerHolder(InterpreterObject):
+    def __init__(self, compiler, env, subproject):
+        InterpreterObject.__init__(self)
+        self.compiler = compiler
+        self.environment = env
+        self.subproject = subproject
+        self.methods.update({'compiles': self.compiles_method,
+                             'links': self.links_method,
+                             'get_id': self.get_id_method,
+                             'get_linker_id': self.get_linker_id_method,
+                             'compute_int': self.compute_int_method,
+                             'sizeof': self.sizeof_method,
+                             'get_define': self.get_define_method,
+                             'check_header': self.check_header_method,
+                             'has_header': self.has_header_method,
+                             'has_header_symbol': self.has_header_symbol_method,
+                             'run': self.run_method,
+                             'has_function': self.has_function_method,
+                             'has_member': self.has_member_method,
+                             'has_members': self.has_members_method,
+                             'has_type': self.has_type_method,
+                             'alignment': self.alignment_method,
+                             'version': self.version_method,
+                             'cmd_array': self.cmd_array_method,
+                             'find_library': self.find_library_method,
+                             'has_argument': self.has_argument_method,
+                             'has_function_attribute': self.has_func_attribute_method,
+                             'get_supported_function_attributes': self.get_supported_function_attributes_method,
+                             'has_multi_arguments': self.has_multi_arguments_method,
+                             'get_supported_arguments': self.get_supported_arguments_method,
+                             'first_supported_argument': self.first_supported_argument_method,
+                             'has_link_argument': self.has_link_argument_method,
+                             'has_multi_link_arguments': self.has_multi_link_arguments_method,
+                             'get_supported_link_arguments': self.get_supported_link_arguments_method,
+                             'first_supported_link_argument': self.first_supported_link_argument_method,
+                             'unittest_args': self.unittest_args_method,
+                             'symbols_have_underscore_prefix': self.symbols_have_underscore_prefix_method,
+                             'get_argument_syntax': self.get_argument_syntax_method,
+                             })
+
+    def _dep_msg(self, deps, endl):
+        msg_single = 'with dependency {}'
+        msg_many = 'with dependencies {}'
+        if not deps:
+            return endl
+        if endl is None:
+            endl = ''
+        tpl = msg_many if len(deps) > 1 else msg_single
+        names = []
+        for d in deps:
+            if isinstance(d, dependencies.ExternalLibrary):
+                name = '-l' + d.name
+            else:
+                name = d.name
+            names.append(name)
+        return tpl.format(', '.join(names)) + endl
+
+    @noPosargs
+    @permittedKwargs({})
+    def version_method(self, args, kwargs):
+        return self.compiler.version
+
+    @noPosargs
+    @permittedKwargs({})
+    def cmd_array_method(self, args, kwargs):
+        return self.compiler.exelist
+
+    def determine_args(self, kwargs, mode='link'):
+        nobuiltins = kwargs.get('no_builtin_args', False)
+        if not isinstance(nobuiltins, bool):
+            raise InterpreterException('Type of no_builtin_args not a boolean.')
+        args = []
+        incdirs = extract_as_list(kwargs, 'include_directories')
+        for i in incdirs:
+            if not isinstance(i, IncludeDirsHolder):
+                raise InterpreterException('Include directories argument must be an include_directories object.')
+            for idir in i.held_object.get_incdirs():
+                idir = os.path.join(self.environment.get_source_dir(),
+                                    i.held_object.get_curdir(), idir)
+                args += self.compiler.get_include_args(idir, False)
+        if not nobuiltins:
+            for_machine = Interpreter.machine_from_native_kwarg(kwargs)
+            opts = self.environment.coredata.compiler_options[for_machine][self.compiler.language]
+            args += self.compiler.get_option_compile_args(opts)
+            if mode == 'link':
+                args += self.compiler.get_option_link_args(opts)
+        args += mesonlib.stringlistify(kwargs.get('args', []))
+        return args
+
+    def determine_dependencies(self, kwargs, endl=':'):
+        deps = kwargs.get('dependencies', None)
+        if deps is not None:
+            deps = listify(deps)
+            final_deps = []
+            for d in deps:
+                try:
+                    d = d.held_object
+                except Exception:
+                    pass
+                if isinstance(d, InternalDependency) or not isinstance(d, Dependency):
+                    raise InterpreterException('Dependencies must be external dependencies')
+                final_deps.append(d)
+            deps = final_deps
+        return deps, self._dep_msg(deps, endl)
+
+    @permittedKwargs({
+        'prefix',
+        'args',
+        'dependencies',
+    })
+    def alignment_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Alignment method takes exactly one positional argument.')
+        check_stringlist(args)
+        typename = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of alignment must be a string.')
+        extra_args = mesonlib.stringlistify(kwargs.get('args', []))
+        deps, msg = self.determine_dependencies(kwargs)
+        result = self.compiler.alignment(typename, prefix, self.environment,
+                                         extra_args=extra_args,
+                                         dependencies=deps)
+        mlog.log('Checking for alignment of', mlog.bold(typename, True), msg, result)
+        return result
+
+    @permittedKwargs({
+        'name',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def run_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Run method takes exactly one positional argument.')
+        code = args[0]
+        if isinstance(code, mesonlib.File):
+            code = mesonlib.File.from_absolute_file(
+                code.rel_to_builddir(self.environment.source_dir))
+        elif not isinstance(code, str):
+            raise InvalidArguments('Argument must be string or file.')
+        testname = kwargs.get('name', '')
+        if not isinstance(testname, str):
+            raise InterpreterException('Testname argument must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs, endl=None)
+        result = self.compiler.run(code, self.environment, extra_args=extra_args,
+                                   dependencies=deps)
+        if len(testname) > 0:
+            if not result.compiled:
+                h = mlog.red('DID NOT COMPILE')
+            elif result.returncode == 0:
+                h = mlog.green('YES')
+            else:
+                h = mlog.red('NO (%d)' % result.returncode)
+            mlog.log('Checking if', mlog.bold(testname, True), msg, 'runs:', h)
+        return TryRunResultHolder(result)
+
+    @noPosargs
+    @permittedKwargs({})
+    def get_id_method(self, args, kwargs):
+        return self.compiler.get_id()
+
+    @noPosargs
+    @permittedKwargs({})
+    @FeatureNew('compiler.get_linker_id', '0.53.0')
+    def get_linker_id_method(self, args, kwargs):
+        return self.compiler.get_linker_id()
+
+    @noPosargs
+    @permittedKwargs({})
+    def symbols_have_underscore_prefix_method(self, args, kwargs):
+        '''
+        Check if the compiler prefixes _ (underscore) to global C symbols
+        See: https://en.wikipedia.org/wiki/Name_mangling#C
+        '''
+        return self.compiler.symbols_have_underscore_prefix(self.environment)
+
+    @noPosargs
+    @permittedKwargs({})
+    def unittest_args_method(self, args, kwargs):
+        '''
+        This function is deprecated and should not be used.
+        It can be removed in a future version of Meson.
+        '''
+        if not hasattr(self.compiler, 'get_feature_args'):
+            raise InterpreterException('This {} compiler has no feature arguments.'.format(self.compiler.get_display_language()))
+        build_to_src = os.path.relpath(self.environment.get_source_dir(), self.environment.get_build_dir())
+        return self.compiler.get_feature_args({'unittest': 'true'}, build_to_src)
+
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def has_member_method(self, args, kwargs):
+        if len(args) != 2:
+            raise InterpreterException('Has_member takes exactly two arguments.')
+        check_stringlist(args)
+        typename, membername = args
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_member must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        had, cached = self.compiler.has_members(typename, [membername], prefix,
+                                                self.environment,
+                                                extra_args=extra_args,
+                                                dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if had:
+            hadtxt = mlog.green('YES')
+        else:
+            hadtxt = mlog.red('NO')
+        mlog.log('Checking whether type', mlog.bold(typename, True),
+                 'has member', mlog.bold(membername, True), msg, hadtxt, cached)
+        return had
+
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def has_members_method(self, args, kwargs):
+        if len(args) < 2:
+            raise InterpreterException('Has_members needs at least two arguments.')
+        check_stringlist(args)
+        typename, *membernames = args
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_members must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        had, cached = self.compiler.has_members(typename, membernames, prefix,
+                                                self.environment,
+                                                extra_args=extra_args,
+                                                dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if had:
+            hadtxt = mlog.green('YES')
+        else:
+            hadtxt = mlog.red('NO')
+        members = mlog.bold(', '.join(['"{}"'.format(m) for m in membernames]))
+        mlog.log('Checking whether type', mlog.bold(typename, True),
+                 'has members', members, msg, hadtxt, cached)
+        return had
+
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def has_function_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Has_function takes exactly one argument.')
+        check_stringlist(args)
+        funcname = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_function must be a string.')
+        extra_args = self.determine_args(kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        had, cached = self.compiler.has_function(funcname, prefix, self.environment,
+                                                 extra_args=extra_args,
+                                                 dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if had:
+            hadtxt = mlog.green('YES')
+        else:
+            hadtxt = mlog.red('NO')
+        mlog.log('Checking for function', mlog.bold(funcname, True), msg, hadtxt, cached)
+        return had
+
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def has_type_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Has_type takes exactly one argument.')
+        check_stringlist(args)
+        typename = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_type must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        had, cached = self.compiler.has_type(typename, prefix, self.environment,
+                                             extra_args=extra_args, dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if had:
+            hadtxt = mlog.green('YES')
+        else:
+            hadtxt = mlog.red('NO')
+        mlog.log('Checking for type', mlog.bold(typename, True), msg, hadtxt, cached)
+        return had
+
+    @FeatureNew('compiler.compute_int', '0.40.0')
+    @permittedKwargs({
+        'prefix',
+        'low',
+        'high',
+        'guess',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def compute_int_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Compute_int takes exactly one argument.')
+        check_stringlist(args)
+        expression = args[0]
+        prefix = kwargs.get('prefix', '')
+        low = kwargs.get('low', None)
+        high = kwargs.get('high', None)
+        guess = kwargs.get('guess', None)
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of compute_int must be a string.')
+        if low is not None and not isinstance(low, int):
+            raise InterpreterException('Low argument of compute_int must be an int.')
+        if high is not None and not isinstance(high, int):
+            raise InterpreterException('High argument of compute_int must be an int.')
+        if guess is not None and not isinstance(guess, int):
+            raise InterpreterException('Guess argument of compute_int must be an int.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        res = self.compiler.compute_int(expression, low, high, guess, prefix,
+                                        self.environment, extra_args=extra_args,
+                                        dependencies=deps)
+        mlog.log('Computing int of', mlog.bold(expression, True), msg, res)
+        return res
+
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def sizeof_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Sizeof takes exactly one argument.')
+        check_stringlist(args)
+        element = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of sizeof must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        esize = self.compiler.sizeof(element, prefix, self.environment,
+                                     extra_args=extra_args, dependencies=deps)
+        mlog.log('Checking for size of', mlog.bold(element, True), msg, esize)
+        return esize
+
+    @FeatureNew('compiler.get_define', '0.40.0')
+    @permittedKwargs({
+        'prefix',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def get_define_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('get_define() takes exactly one argument.')
+        check_stringlist(args)
+        element = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of get_define() must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        value, cached = self.compiler.get_define(element, prefix, self.environment,
+                                                 extra_args=extra_args,
+                                                 dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        mlog.log('Fetching value of define', mlog.bold(element, True), msg, value, cached)
+        return value
+
+    @permittedKwargs({
+        'name',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def compiles_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('compiles method takes exactly one argument.')
+        code = args[0]
+        if isinstance(code, mesonlib.File):
+            code = mesonlib.File.from_absolute_file(
+                code.rel_to_builddir(self.environment.source_dir))
+        elif not isinstance(code, str):
+            raise InvalidArguments('Argument must be string or file.')
+        testname = kwargs.get('name', '')
+        if not isinstance(testname, str):
+            raise InterpreterException('Testname argument must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs, endl=None)
+        result, cached = self.compiler.compiles(code, self.environment,
+                                                extra_args=extra_args,
+                                                dependencies=deps)
+        if len(testname) > 0:
+            if result:
+                h = mlog.green('YES')
+            else:
+                h = mlog.red('NO')
+            cached = mlog.blue('(cached)') if cached else ''
+            mlog.log('Checking if', mlog.bold(testname, True), msg, 'compiles:', h, cached)
+        return result
+
+    @permittedKwargs({
+        'name',
+        'no_builtin_args',
+        'include_directories',
+        'args',
+        'dependencies',
+    })
+    def links_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('links method takes exactly one argument.')
+        code = args[0]
+        if isinstance(code, mesonlib.File):
+            code = mesonlib.File.from_absolute_file(
+                code.rel_to_builddir(self.environment.source_dir))
+        elif not isinstance(code, str):
+            raise InvalidArguments('Argument must be string or file.')
+        testname = kwargs.get('name', '')
+        if not isinstance(testname, str):
+            raise InterpreterException('Testname argument must be a string.')
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs, endl=None)
+        result, cached = self.compiler.links(code, self.environment,
+                                             extra_args=extra_args,
+                                             dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if len(testname) > 0:
+            if result:
+                h = mlog.green('YES')
+            else:
+                h = mlog.red('NO')
+            mlog.log('Checking if', mlog.bold(testname, True), msg, 'links:', h, cached)
+        return result
+
+    @FeatureNew('compiler.check_header', '0.47.0')
+    @FeatureNewKwargs('compiler.check_header', '0.50.0', ['required'])
+    @permittedKwargs(header_permitted_kwargs)
+    def check_header_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('check_header method takes exactly one argument.')
+        check_stringlist(args)
+        hname = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_header must be a string.')
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject, default=False)
+        if disabled:
+            mlog.log('Check usable header', mlog.bold(hname, True), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return False
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        haz, cached = self.compiler.check_header(hname, prefix, self.environment,
+                                                 extra_args=extra_args,
+                                                 dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if required and not haz:
+            raise InterpreterException('{} header {!r} not usable'.format(self.compiler.get_display_language(), hname))
+        elif haz:
+            h = mlog.green('YES')
+        else:
+            h = mlog.red('NO')
+        mlog.log('Check usable header', mlog.bold(hname, True), msg, h, cached)
+        return haz
+
+    @FeatureNewKwargs('compiler.has_header', '0.50.0', ['required'])
+    @permittedKwargs(header_permitted_kwargs)
+    def has_header_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('has_header method takes exactly one argument.')
+        check_stringlist(args)
+        hname = args[0]
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_header must be a string.')
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject, default=False)
+        if disabled:
+            mlog.log('Has header', mlog.bold(hname, True), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return False
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        haz, cached = self.compiler.has_header(hname, prefix, self.environment,
+                                               extra_args=extra_args, dependencies=deps)
+        cached = mlog.blue('(cached)') if cached else ''
+        if required and not haz:
+            raise InterpreterException('{} header {!r} not found'.format(self.compiler.get_display_language(), hname))
+        elif haz:
+            h = mlog.green('YES')
+        else:
+            h = mlog.red('NO')
+        mlog.log('Has header', mlog.bold(hname, True), msg, h, cached)
+        return haz
+
+    @FeatureNewKwargs('compiler.has_header_symbol', '0.50.0', ['required'])
+    @permittedKwargs(header_permitted_kwargs)
+    def has_header_symbol_method(self, args, kwargs):
+        if len(args) != 2:
+            raise InterpreterException('has_header_symbol method takes exactly two arguments.')
+        check_stringlist(args)
+        hname, symbol = args
+        prefix = kwargs.get('prefix', '')
+        if not isinstance(prefix, str):
+            raise InterpreterException('Prefix argument of has_header_symbol must be a string.')
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject, default=False)
+        if disabled:
+            mlog.log('Header <{0}> has symbol'.format(hname), mlog.bold(symbol, True), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return False
+        extra_args = functools.partial(self.determine_args, kwargs)
+        deps, msg = self.determine_dependencies(kwargs)
+        haz, cached = self.compiler.has_header_symbol(hname, symbol, prefix, self.environment,
+                                                      extra_args=extra_args,
+                                                      dependencies=deps)
+        if required and not haz:
+            raise InterpreterException('{} symbol {} not found in header {}'.format(self.compiler.get_display_language(), symbol, hname))
+        elif haz:
+            h = mlog.green('YES')
+        else:
+            h = mlog.red('NO')
+        cached = mlog.blue('(cached)') if cached else ''
+        mlog.log('Header <{0}> has symbol'.format(hname), mlog.bold(symbol, True), msg, h, cached)
+        return haz
+
+    def notfound_library(self, libname):
+        lib = dependencies.ExternalLibrary(libname, None,
+                                           self.environment,
+                                           self.compiler.language,
+                                           silent=True)
+        return ExternalLibraryHolder(lib, self.subproject)
+
+    @FeatureNewKwargs('compiler.find_library', '0.51.0', ['static'])
+    @FeatureNewKwargs('compiler.find_library', '0.50.0', ['has_headers'])
+    @FeatureNewKwargs('compiler.find_library', '0.49.0', ['disabler'])
+    @disablerIfNotFound
+    @permittedKwargs(find_library_permitted_kwargs)
+    def find_library_method(self, args, kwargs):
+        # TODO add dependencies support?
+        if len(args) != 1:
+            raise InterpreterException('find_library method takes one argument.')
+        libname = args[0]
+        if not isinstance(libname, str):
+            raise InterpreterException('Library name not a string.')
+
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject)
+        if disabled:
+            mlog.log('Library', mlog.bold(libname), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return self.notfound_library(libname)
+
+        has_header_kwargs = {k[7:]: v for k, v in kwargs.items() if k.startswith('header_')}
+        has_header_kwargs['required'] = required
+        headers = mesonlib.stringlistify(kwargs.get('has_headers', []))
+        for h in headers:
+            if not self.has_header_method([h], has_header_kwargs):
+                return self.notfound_library(libname)
+
+        search_dirs = extract_search_dirs(kwargs)
+
+        libtype = mesonlib.LibType.PREFER_SHARED
+        if 'static' in kwargs:
+            if not isinstance(kwargs['static'], bool):
+                raise InterpreterException('static must be a boolean')
+            libtype = mesonlib.LibType.STATIC if kwargs['static'] else mesonlib.LibType.SHARED
+        linkargs = self.compiler.find_library(libname, self.environment, search_dirs, libtype)
+        if required and not linkargs:
+            if libtype == mesonlib.LibType.PREFER_SHARED:
+                libtype = 'shared or static'
+            else:
+                libtype = libtype.name.lower()
+            raise InterpreterException('{} {} library {!r} not found'
+                                       .format(self.compiler.get_display_language(),
+                                               libtype, libname))
+        lib = dependencies.ExternalLibrary(libname, linkargs, self.environment,
+                                           self.compiler.language)
+        return ExternalLibraryHolder(lib, self.subproject)
+
+    @permittedKwargs({})
+    def has_argument_method(self, args: T.Sequence[str], kwargs) -> bool:
+        args = mesonlib.stringlistify(args)
+        if len(args) != 1:
+            raise InterpreterException('has_argument takes exactly one argument.')
+        return self.has_multi_arguments_method(args, kwargs)
+
+    @permittedKwargs({})
+    def has_multi_arguments_method(self, args: T.Sequence[str], kwargs: dict):
+        args = mesonlib.stringlistify(args)
+        result, cached = self.compiler.has_multi_arguments(args, self.environment)
+        if result:
+            h = mlog.green('YES')
+        else:
+            h = mlog.red('NO')
+        cached = mlog.blue('(cached)') if cached else ''
+        mlog.log(
+            'Compiler for {} supports arguments {}:'.format(
+                self.compiler.get_display_language(), ' '.join(args)),
+            h, cached)
+        return result
+
+    @FeatureNew('compiler.get_supported_arguments', '0.43.0')
+    @permittedKwargs({})
+    def get_supported_arguments_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        supported_args = []
+        for arg in args:
+            if self.has_argument_method(arg, kwargs):
+                supported_args.append(arg)
+        return supported_args
+
+    @permittedKwargs({})
+    def first_supported_argument_method(self, args: T.Sequence[str], kwargs: dict) -> T.List[str]:
+        for arg in mesonlib.stringlistify(args):
+            if self.has_argument_method(arg, kwargs):
+                mlog.log('First supported argument:', mlog.bold(arg))
+                return [arg]
+        mlog.log('First supported argument:', mlog.red('None'))
+        return []
+
+    @FeatureNew('compiler.has_link_argument', '0.46.0')
+    @permittedKwargs({})
+    def has_link_argument_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        if len(args) != 1:
+            raise InterpreterException('has_link_argument takes exactly one argument.')
+        return self.has_multi_link_arguments_method(args, kwargs)
+
+    @FeatureNew('compiler.has_multi_link_argument', '0.46.0')
+    @permittedKwargs({})
+    def has_multi_link_arguments_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        result, cached = self.compiler.has_multi_link_arguments(args, self.environment)
+        cached = mlog.blue('(cached)') if cached else ''
+        if result:
+            h = mlog.green('YES')
+        else:
+            h = mlog.red('NO')
+        mlog.log(
+            'Compiler for {} supports link arguments {}:'.format(
+                self.compiler.get_display_language(), ' '.join(args)),
+            h, cached)
+        return result
+
+    @FeatureNew('compiler.get_supported_link_arguments_method', '0.46.0')
+    @permittedKwargs({})
+    def get_supported_link_arguments_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        supported_args = []
+        for arg in args:
+            if self.has_link_argument_method(arg, kwargs):
+                supported_args.append(arg)
+        return supported_args
+
+    @FeatureNew('compiler.first_supported_link_argument_method', '0.46.0')
+    @permittedKwargs({})
+    def first_supported_link_argument_method(self, args, kwargs):
+        for i in mesonlib.stringlistify(args):
+            if self.has_link_argument_method(i, kwargs):
+                mlog.log('First supported link argument:', mlog.bold(i))
+                return [i]
+        mlog.log('First supported link argument:', mlog.red('None'))
+        return []
+
+    @FeatureNew('compiler.has_function_attribute', '0.48.0')
+    @permittedKwargs({})
+    def has_func_attribute_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        if len(args) != 1:
+            raise InterpreterException('has_func_attribute takes exactly one argument.')
+        result, cached = self.compiler.has_func_attribute(args[0], self.environment)
+        cached = mlog.blue('(cached)') if cached else ''
+        h = mlog.green('YES') if result else mlog.red('NO')
+        mlog.log('Compiler for {} supports function attribute {}:'.format(self.compiler.get_display_language(), args[0]), h, cached)
+        return result
+
+    @FeatureNew('compiler.get_supported_function_attributes', '0.48.0')
+    @permittedKwargs({})
+    def get_supported_function_attributes_method(self, args, kwargs):
+        args = mesonlib.stringlistify(args)
+        return [a for a in args if self.has_func_attribute_method(a, kwargs)]
+
+    @FeatureNew('compiler.get_argument_syntax_method', '0.49.0')
+    @noPosargs
+    @noKwargs
+    def get_argument_syntax_method(self, args, kwargs):
+        return self.compiler.get_argument_syntax()
+
+
+ModuleState = collections.namedtuple('ModuleState', [
+    'source_root', 'build_to_src', 'subproject', 'subdir', 'current_lineno', 'environment',
+    'project_name', 'project_version', 'backend', 'targets',
+    'data', 'headers', 'man', 'global_args', 'project_args', 'build_machine',
+    'host_machine', 'target_machine', 'current_node'])
+
+class ModuleHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, modname, module, interpreter):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, module)
+        self.modname = modname
+        self.interpreter = interpreter
+
+    def method_call(self, method_name, args, kwargs):
+        try:
+            fn = getattr(self.held_object, method_name)
+        except AttributeError:
+            raise InvalidArguments('Module %s does not have method %s.' % (self.modname, method_name))
+        if method_name.startswith('_'):
+            raise InvalidArguments('Function {!r} in module {!r} is private.'.format(method_name, self.modname))
+        if not getattr(fn, 'no-args-flattening', False):
+            args = flatten(args)
+        # This is not 100% reliable but we can't use hash()
+        # because the Build object contains dicts and lists.
+        num_targets = len(self.interpreter.build.targets)
+        state = ModuleState(
+            source_root = self.interpreter.environment.get_source_dir(),
+            build_to_src=mesonlib.relpath(self.interpreter.environment.get_source_dir(),
+                                          self.interpreter.environment.get_build_dir()),
+            subproject=self.interpreter.subproject,
+            subdir=self.interpreter.subdir,
+            current_lineno=self.interpreter.current_lineno,
+            environment=self.interpreter.environment,
+            project_name=self.interpreter.build.project_name,
+            project_version=self.interpreter.build.dep_manifest[self.interpreter.active_projectname],
+            # The backend object is under-used right now, but we will need it:
+            # https://github.com/mesonbuild/meson/issues/1419
+            backend=self.interpreter.backend,
+            targets=self.interpreter.build.targets,
+            data=self.interpreter.build.data,
+            headers=self.interpreter.build.get_headers(),
+            man=self.interpreter.build.get_man(),
+            #global_args_for_build = self.interpreter.build.global_args.build,
+            global_args = self.interpreter.build.global_args.host,
+            #project_args_for_build = self.interpreter.build.projects_args.build.get(self.interpreter.subproject, {}),
+            project_args = self.interpreter.build.projects_args.host.get(self.interpreter.subproject, {}),
+            build_machine=self.interpreter.builtin['build_machine'].held_object,
+            host_machine=self.interpreter.builtin['host_machine'].held_object,
+            target_machine=self.interpreter.builtin['target_machine'].held_object,
+            current_node=self.current_node
+        )
+        # Many modules do for example self.interpreter.find_program_impl(),
+        # so we have to ensure they use the current interpreter and not the one
+        # that first imported that module, otherwise it will use outdated
+        # overrides.
+        self.held_object.interpreter = self.interpreter
+        if self.held_object.is_snippet(method_name):
+            value = fn(self.interpreter, state, args, kwargs)
+            return self.interpreter.holderify(value)
+        else:
+            value = fn(state, args, kwargs)
+            if num_targets != len(self.interpreter.build.targets):
+                raise InterpreterException('Extension module altered internal state illegally.')
+            return self.interpreter.module_method_callback(value)
+
+
+class Summary:
+    def __init__(self, project_name, project_version):
+        self.project_name = project_name
+        self.project_version = project_version
+        self.sections = collections.defaultdict(dict)
+        self.max_key_len = 0
+
+    def add_section(self, section, values, kwargs):
+        bool_yn = kwargs.get('bool_yn', False)
+        if not isinstance(bool_yn, bool):
+            raise InterpreterException('bool_yn keyword argument must be boolean')
+        list_sep = kwargs.get('list_sep')
+        if list_sep is not None and not isinstance(list_sep, str):
+            raise InterpreterException('list_sep keyword argument must be string')
+        for k, v in values.items():
+            if k in self.sections[section]:
+                raise InterpreterException('Summary section {!r} already have key {!r}'.format(section, k))
+            formatted_values = []
+            for i in listify(v):
+                if not isinstance(i, (str, int)):
+                    m = 'Summary value in section {!r}, key {!r}, must be string, integer or boolean'
+                    raise InterpreterException(m.format(section, k))
+                if bool_yn and isinstance(i, bool):
+                    formatted_values.append(mlog.green('YES') if i else mlog.red('NO'))
+                else:
+                    formatted_values.append(i)
+            self.sections[section][k] = (formatted_values, list_sep)
+            self.max_key_len = max(self.max_key_len, len(k))
+
+    def dump(self):
+        mlog.log(self.project_name, mlog.normal_cyan(self.project_version))
+        for section, values in self.sections.items():
+            mlog.log('')  # newline
+            if section:
+                mlog.log(' ', mlog.bold(section))
+            for k, v in values.items():
+                v, list_sep = v
+                indent = self.max_key_len - len(k) + 3
+                end = ' ' if v else ''
+                mlog.log(' ' * indent, k + ':', end=end)
+                if list_sep is None:
+                    indent = self.max_key_len + 6
+                    list_sep = '\n' + ' ' * indent
+                mlog.log(*v, sep=list_sep)
+        mlog.log('')  # newline
+
+
+class MesonMain(InterpreterObject):
+    def __init__(self, build, interpreter):
+        InterpreterObject.__init__(self)
+        self.build = build
+        self.interpreter = interpreter
+        self._found_source_scripts = {}
+        self.methods.update({'get_compiler': self.get_compiler_method,
+                             'is_cross_build': self.is_cross_build_method,
+                             'has_exe_wrapper': self.has_exe_wrapper_method,
+                             'can_run_host_binaries': self.can_run_host_binaries_method,
+                             'is_unity': self.is_unity_method,
+                             'is_subproject': self.is_subproject_method,
+                             'current_source_dir': self.current_source_dir_method,
+                             'current_build_dir': self.current_build_dir_method,
+                             'source_root': self.source_root_method,
+                             'build_root': self.build_root_method,
+                             'add_install_script': self.add_install_script_method,
+                             'add_postconf_script': self.add_postconf_script_method,
+                             'add_dist_script': self.add_dist_script_method,
+                             'install_dependency_manifest': self.install_dependency_manifest_method,
+                             'override_dependency': self.override_dependency_method,
+                             'override_find_program': self.override_find_program_method,
+                             'project_version': self.project_version_method,
+                             'project_license': self.project_license_method,
+                             'version': self.version_method,
+                             'project_name': self.project_name_method,
+                             'get_cross_property': self.get_cross_property_method,
+                             'get_external_property': self.get_external_property_method,
+                             'backend': self.backend_method,
+                             })
+
+    def _find_source_script(self, prog: T.Union[str, ExecutableHolder], args):
+        if isinstance(prog, ExecutableHolder):
+            prog_path = self.interpreter.backend.get_target_filename(prog.held_object)
+            return build.RunScript([prog_path], args)
+        elif isinstance(prog, ExternalProgramHolder):
+            return build.RunScript(prog.get_command(), args)
+
+        # Prefer scripts in the current source directory
+        search_dir = os.path.join(self.interpreter.environment.source_dir,
+                                  self.interpreter.subdir)
+        key = (prog, search_dir)
+        if key in self._found_source_scripts:
+            found = self._found_source_scripts[key]
+        else:
+            found = dependencies.ExternalProgram(prog, search_dir=search_dir)
+            if found.found():
+                self._found_source_scripts[key] = found
+            else:
+                m = 'Script or command {!r} not found or not executable'
+                raise InterpreterException(m.format(prog))
+        return build.RunScript(found.get_command(), args)
+
+    def _process_script_args(
+            self, name: str, args: T.List[T.Union[
+                str, mesonlib.File, CustomTargetHolder,
+                CustomTargetIndexHolder, ConfigureFileHolder,
+                ExternalProgramHolder, ExecutableHolder,
+            ]], allow_built: bool = False) -> T.List[str]:
+        script_args = []  # T.List[str]
+        new = False
+        for a in args:
+            a = unholder(a)
+            if isinstance(a, str):
+                script_args.append(a)
+            elif isinstance(a, mesonlib.File):
+                new = True
+                script_args.append(a.rel_to_builddir(self.interpreter.environment.source_dir))
+            elif isinstance(a, (build.BuildTarget, build.CustomTarget, build.CustomTargetIndex)):
+                if not allow_built:
+                    raise InterpreterException('Arguments to {} cannot be built'.format(name))
+                new = True
+                script_args.extend([os.path.join(a.get_subdir(), o) for o in a.get_outputs()])
+
+                # This feels really hacky, but I'm not sure how else to fix
+                # this without completely rewriting install script handling.
+                # This is complicated by the fact that the install target
+                # depends on all.
+                if isinstance(a, build.CustomTargetIndex):
+                    a.target.build_by_default = True
+                else:
+                    a.build_by_default = True
+            elif isinstance(a, build.ConfigureFile):
+                new = True
+                script_args.append(os.path.join(a.subdir, a.targetname))
+            elif isinstance(a, dependencies.ExternalProgram):
+                script_args.extend(a.command)
+                new = True
+            else:
+                raise InterpreterException(
+                    'Arguments to {} must be strings, Files, CustomTargets, '
+                    'Indexes of CustomTargets, or ConfigureFiles'.format(name))
+        if new:
+            FeatureNew.single_use(
+                'Calling "{}" with File, CustomTaget, Index of CustomTarget, '
+                'ConfigureFile, Executable, or ExternalProgram'.format(name),
+                '0.55.0', self.interpreter.subproject)
+        return script_args
+
+    @permittedKwargs(set())
+    def add_install_script_method(self, args: 'T.Tuple[T.Union[str, ExecutableHolder], T.Union[str, mesonlib.File, CustomTargetHolder, CustomTargetIndexHolder, ConfigureFileHolder], ...]', kwargs):
+        if len(args) < 1:
+            raise InterpreterException('add_install_script takes one or more arguments')
+        script_args = self._process_script_args('add_install_script', args[1:], allow_built=True)
+        script = self._find_source_script(args[0], script_args)
+        self.build.install_scripts.append(script)
+
+    @permittedKwargs(set())
+    def add_postconf_script_method(self, args, kwargs):
+        if len(args) < 1:
+            raise InterpreterException('add_postconf_script takes one or more arguments')
+        script_args = self._process_script_args('add_postconf_script', args[1:], allow_built=True)
+        script = self._find_source_script(args[0], script_args)
+        self.build.postconf_scripts.append(script)
+
+    @permittedKwargs(set())
+    def add_dist_script_method(self, args, kwargs):
+        if len(args) < 1:
+            raise InterpreterException('add_dist_script takes one or more arguments')
+        if len(args) > 1:
+            FeatureNew.single_use('Calling "add_dist_script" with multiple arguments',
+                                  '0.49.0', self.interpreter.subproject)
+        if self.interpreter.subproject != '':
+            raise InterpreterException('add_dist_script may not be used in a subproject.')
+        script_args = self._process_script_args('add_dist_script', args[1:], allow_built=True)
+        script = self._find_source_script(args[0], script_args)
+        self.build.dist_scripts.append(script)
+
+    @noPosargs
+    @permittedKwargs({})
+    def current_source_dir_method(self, args, kwargs):
+        src = self.interpreter.environment.source_dir
+        sub = self.interpreter.subdir
+        if sub == '':
+            return src
+        return os.path.join(src, sub)
+
+    @noPosargs
+    @permittedKwargs({})
+    def current_build_dir_method(self, args, kwargs):
+        src = self.interpreter.environment.build_dir
+        sub = self.interpreter.subdir
+        if sub == '':
+            return src
+        return os.path.join(src, sub)
+
+    @noPosargs
+    @permittedKwargs({})
+    def backend_method(self, args, kwargs):
+        return self.interpreter.backend.name
+
+    @noPosargs
+    @permittedKwargs({})
+    def source_root_method(self, args, kwargs):
+        return self.interpreter.environment.source_dir
+
+    @noPosargs
+    @permittedKwargs({})
+    def build_root_method(self, args, kwargs):
+        return self.interpreter.environment.build_dir
+
+    @noPosargs
+    @permittedKwargs({})
+    @FeatureDeprecated('meson.has_exe_wrapper', '0.55.0', 'use meson.can_run_host_binaries instead.')
+    def has_exe_wrapper_method(self, args: T.Tuple[object, ...], kwargs: T.Dict[str, object]) -> bool:
+        return self.can_run_host_binaries_impl(args, kwargs)
+
+    @noPosargs
+    @permittedKwargs({})
+    @FeatureNew('meson.can_run_host_binaries', '0.55.0')
+    def can_run_host_binaries_method(self, args: T.Tuple[object, ...], kwargs: T.Dict[str, object]) -> bool:
+        return self.can_run_host_binaries_impl(args, kwargs)
+
+    def can_run_host_binaries_impl(self, args, kwargs):
+        if (self.is_cross_build_method(None, None) and
+                self.build.environment.need_exe_wrapper()):
+            if self.build.environment.exe_wrapper is None:
+                return False
+        # We return True when exe_wrap is defined, when it's not needed, and
+        # when we're compiling natively. The last two are semantically confusing.
+        # Need to revisit this.
+        return True
+
+    @noPosargs
+    @permittedKwargs({})
+    def is_cross_build_method(self, args, kwargs):
+        return self.build.environment.is_cross_build()
+
+    @permittedKwargs({'native'})
+    def get_compiler_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('get_compiler_method must have one and only one argument.')
+        cname = args[0]
+        for_machine = Interpreter.machine_from_native_kwarg(kwargs)
+        clist = self.interpreter.coredata.compilers[for_machine]
+        if cname in clist:
+            return CompilerHolder(clist[cname], self.build.environment, self.interpreter.subproject)
+        raise InterpreterException('Tried to access compiler for unspecified language "%s".' % cname)
+
+    @noPosargs
+    @permittedKwargs({})
+    def is_unity_method(self, args, kwargs):
+        optval = self.interpreter.environment.coredata.get_builtin_option('unity')
+        if optval == 'on' or (optval == 'subprojects' and self.interpreter.is_subproject()):
+            return True
+        return False
+
+    @noPosargs
+    @permittedKwargs({})
+    def is_subproject_method(self, args, kwargs):
+        return self.interpreter.is_subproject()
+
+    @permittedKwargs({})
+    def install_dependency_manifest_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Must specify manifest install file name')
+        if not isinstance(args[0], str):
+            raise InterpreterException('Argument must be a string.')
+        self.build.dep_manifest_name = args[0]
+
+    @FeatureNew('meson.override_find_program', '0.46.0')
+    @permittedKwargs({})
+    def override_find_program_method(self, args, kwargs):
+        if len(args) != 2:
+            raise InterpreterException('Override needs two arguments')
+        name, exe = args
+        if not isinstance(name, str):
+            raise InterpreterException('First argument must be a string')
+        if hasattr(exe, 'held_object'):
+            exe = exe.held_object
+        if isinstance(exe, mesonlib.File):
+            abspath = exe.absolute_path(self.interpreter.environment.source_dir,
+                                        self.interpreter.environment.build_dir)
+            if not os.path.exists(abspath):
+                raise InterpreterException('Tried to override %s with a file that does not exist.' % name)
+            exe = OverrideProgram(abspath)
+        if not isinstance(exe, (dependencies.ExternalProgram, build.Executable)):
+            raise InterpreterException('Second argument must be an external program or executable.')
+        self.interpreter.add_find_program_override(name, exe)
+
+    @FeatureNew('meson.override_dependency', '0.54.0')
+    @permittedKwargs({'native'})
+    def override_dependency_method(self, args, kwargs):
+        if len(args) != 2:
+            raise InterpreterException('Override needs two arguments')
+        name = args[0]
+        dep = args[1]
+        if not isinstance(name, str) or not name:
+            raise InterpreterException('First argument must be a string and cannot be empty')
+        if hasattr(dep, 'held_object'):
+            dep = dep.held_object
+        if not isinstance(dep, dependencies.Dependency):
+            raise InterpreterException('Second argument must be a dependency object')
+        identifier = dependencies.get_dep_identifier(name, kwargs)
+        for_machine = self.interpreter.machine_from_native_kwarg(kwargs)
+        override = self.build.dependency_overrides[for_machine].get(identifier)
+        if override:
+            m = 'Tried to override dependency {!r} which has already been resolved or overridden at {}'
+            location = mlog.get_error_location_string(override.node.filename, override.node.lineno)
+            raise InterpreterException(m.format(name, location))
+        self.build.dependency_overrides[for_machine][identifier] = \
+            build.DependencyOverride(dep, self.interpreter.current_node)
+
+    @noPosargs
+    @permittedKwargs({})
+    def project_version_method(self, args, kwargs):
+        return self.build.dep_manifest[self.interpreter.active_projectname]['version']
+
+    @FeatureNew('meson.project_license()', '0.45.0')
+    @noPosargs
+    @permittedKwargs({})
+    def project_license_method(self, args, kwargs):
+        return self.build.dep_manifest[self.interpreter.active_projectname]['license']
+
+    @noPosargs
+    @permittedKwargs({})
+    def version_method(self, args, kwargs):
+        return coredata.version
+
+    @noPosargs
+    @permittedKwargs({})
+    def project_name_method(self, args, kwargs):
+        return self.interpreter.active_projectname
+
+    @noArgsFlattening
+    @permittedKwargs({})
+    def get_cross_property_method(self, args, kwargs) -> str:
+        if len(args) < 1 or len(args) > 2:
+            raise InterpreterException('Must have one or two arguments.')
+        propname = args[0]
+        if not isinstance(propname, str):
+            raise InterpreterException('Property name must be string.')
+        try:
+            props = self.interpreter.environment.properties.host
+            return props[propname]
+        except Exception:
+            if len(args) == 2:
+                return args[1]
+            raise InterpreterException('Unknown cross property: %s.' % propname)
+
+    @noArgsFlattening
+    @permittedKwargs({'native'})
+    @FeatureNew('meson.get_external_property', '0.54.0')
+    def get_external_property_method(self, args: T.Sequence[str], kwargs: dict) -> str:
+        if len(args) < 1 or len(args) > 2:
+            raise InterpreterException('Must have one or two positional arguments.')
+        propname = args[0]
+        if not isinstance(propname, str):
+            raise InterpreterException('Property name must be string.')
+
+        def _get_native() -> str:
+            try:
+                props = self.interpreter.environment.properties.build
+                return props[propname]
+            except Exception:
+                if len(args) == 2:
+                    return args[1]
+                raise InterpreterException('Unknown native property: %s.' % propname)
+        if 'native' in kwargs:
+            if kwargs['native']:
+                return _get_native()
+            else:
+                return self.get_cross_property_method(args, {})
+        else:  # native: not specified
+            if self.build.environment.is_cross_build():
+                return self.get_cross_property_method(args, kwargs)
+            else:
+                return _get_native()
+
+known_library_kwargs = (
+    build.known_shlib_kwargs |
+    build.known_stlib_kwargs
+)
+
+known_build_target_kwargs = (
+    known_library_kwargs |
+    build.known_exe_kwargs |
+    build.known_jar_kwargs |
+    {'target_type'}
+)
+
+_base_test_args = {'args', 'depends', 'env', 'should_fail', 'timeout', 'workdir', 'suite', 'priority', 'protocol'}
+
+permitted_kwargs = {'add_global_arguments': {'language', 'native'},
+                    'add_global_link_arguments': {'language', 'native'},
+                    'add_languages': {'required', 'native'},
+                    'add_project_link_arguments': {'language', 'native'},
+                    'add_project_arguments': {'language', 'native'},
+                    'add_test_setup': {'exe_wrapper', 'gdb', 'timeout_multiplier', 'env', 'is_default'},
+                    'benchmark': _base_test_args,
+                    'build_target': known_build_target_kwargs,
+                    'configure_file': {'input',
+                                       'output',
+                                       'configuration',
+                                       'command',
+                                       'copy',
+                                       'depfile',
+                                       'install_dir',
+                                       'install_mode',
+                                       'capture',
+                                       'install',
+                                       'format',
+                                       'output_format',
+                                       'encoding'},
+                    'custom_target': {'input',
+                                      'output',
+                                      'command',
+                                      'install',
+                                      'install_dir',
+                                      'install_mode',
+                                      'build_always',
+                                      'capture',
+                                      'depends',
+                                      'depend_files',
+                                      'depfile',
+                                      'build_by_default',
+                                      'build_always_stale',
+                                      'console'},
+                    'dependency': {'default_options',
+                                   'embed',
+                                   'fallback',
+                                   'language',
+                                   'main',
+                                   'method',
+                                   'modules',
+                                   'components',
+                                   'cmake_module_path',
+                                   'optional_modules',
+                                   'native',
+                                   'not_found_message',
+                                   'required',
+                                   'static',
+                                   'version',
+                                   'private_headers',
+                                   'cmake_args',
+                                   'include_type',
+                                   },
+                    'declare_dependency': {'include_directories',
+                                           'link_with',
+                                           'sources',
+                                           'dependencies',
+                                           'compile_args',
+                                           'link_args',
+                                           'link_whole',
+                                           'version',
+                                           'variables',
+                                           },
+                    'executable': build.known_exe_kwargs,
+                    'find_program': {'required', 'native', 'version', 'dirs'},
+                    'generator': {'arguments',
+                                  'output',
+                                  'depends',
+                                  'depfile',
+                                  'capture',
+                                  'preserve_path_from'},
+                    'include_directories': {'is_system'},
+                    'install_data': {'install_dir', 'install_mode', 'rename', 'sources'},
+                    'install_headers': {'install_dir', 'install_mode', 'subdir'},
+                    'install_man': {'install_dir', 'install_mode'},
+                    'install_subdir': {'exclude_files', 'exclude_directories', 'install_dir', 'install_mode', 'strip_directory'},
+                    'jar': build.known_jar_kwargs,
+                    'project': {'version', 'meson_version', 'default_options', 'license', 'subproject_dir'},
+                    'run_command': {'check', 'capture', 'env'},
+                    'run_target': {'command', 'depends'},
+                    'shared_library': build.known_shlib_kwargs,
+                    'shared_module': build.known_shmod_kwargs,
+                    'static_library': build.known_stlib_kwargs,
+                    'both_libraries': known_library_kwargs,
+                    'library': known_library_kwargs,
+                    'subdir': {'if_found'},
+                    'subproject': {'version', 'default_options', 'required'},
+                    'test': set.union(_base_test_args, {'is_parallel'}),
+                    'vcs_tag': {'input', 'output', 'fallback', 'command', 'replace_string'},
+                    }
+
+
+class Interpreter(InterpreterBase):
+
+    def __init__(self, build, backend=None, subproject='', subdir='', subproject_dir='subprojects',
+                 modules = None, default_project_options=None, mock=False, ast=None):
+        super().__init__(build.environment.get_source_dir(), subdir, subproject)
+        self.an_unpicklable_object = mesonlib.an_unpicklable_object
+        self.build = build
+        self.environment = build.environment
+        self.coredata = self.environment.get_coredata()
+        self.backend = backend
+        self.summary = {}
+        if modules is None:
+            self.modules = {}
+        else:
+            self.modules = modules
+        # Subproject directory is usually the name of the subproject, but can
+        # be different for dependencies provided by wrap files.
+        self.subproject_directory_name = subdir.split(os.path.sep)[-1]
+        self.subproject_dir = subproject_dir
+        self.option_file = os.path.join(self.source_root, self.subdir, 'meson_options.txt')
+        if not mock and ast is None:
+            self.load_root_meson_file()
+            self.sanity_check_ast()
+        elif ast is not None:
+            self.ast = ast
+            self.sanity_check_ast()
+        self.builtin.update({'meson': MesonMain(build, self)})
+        self.generators = []
+        self.visited_subdirs = {}
+        self.project_args_frozen = False
+        self.global_args_frozen = False  # implies self.project_args_frozen
+        self.subprojects = {}
+        self.subproject_stack = []
+        self.configure_file_outputs = {}
+        # Passed from the outside, only used in subprojects.
+        if default_project_options:
+            self.default_project_options = default_project_options.copy()
+        else:
+            self.default_project_options = {}
+        self.project_default_options = {}
+        self.build_func_dict()
+        # build_def_files needs to be defined before parse_project is called
+        self.build_def_files = [os.path.join(self.subdir, environment.build_filename)]
+        if not mock:
+            self.parse_project()
+        self._redetect_machines()
+
+    def _redetect_machines(self):
+        # Re-initialize machine descriptions. We can do a better job now because we
+        # have the compilers needed to gain more knowledge, so wipe out old
+        # inference and start over.
+        machines = self.build.environment.machines.miss_defaulting()
+        machines.build = environment.detect_machine_info(self.coredata.compilers.build)
+        self.build.environment.machines = machines.default_missing()
+        assert self.build.environment.machines.build.cpu is not None
+        assert self.build.environment.machines.host.cpu is not None
+        assert self.build.environment.machines.target.cpu is not None
+
+        self.builtin['build_machine'] = \
+            MachineHolder(self.build.environment.machines.build)
+        self.builtin['host_machine'] = \
+            MachineHolder(self.build.environment.machines.host)
+        self.builtin['target_machine'] = \
+            MachineHolder(self.build.environment.machines.target)
+
+    def get_non_matching_default_options(self):
+        env = self.environment
+        for def_opt_name, def_opt_value in self.project_default_options.items():
+            for opts in env.coredata.get_all_options():
+                cur_opt_value = opts.get(def_opt_name)
+                if cur_opt_value is not None:
+                    def_opt_value = env.coredata.validate_option_value(def_opt_name, def_opt_value)
+                    if def_opt_value != cur_opt_value.value:
+                        yield (def_opt_name, def_opt_value, cur_opt_value)
+
+    def build_func_dict(self):
+        self.funcs.update({'add_global_arguments': self.func_add_global_arguments,
+                           'add_project_arguments': self.func_add_project_arguments,
+                           'add_global_link_arguments': self.func_add_global_link_arguments,
+                           'add_project_link_arguments': self.func_add_project_link_arguments,
+                           'add_test_setup': self.func_add_test_setup,
+                           'add_languages': self.func_add_languages,
+                           'alias_target': self.func_alias_target,
+                           'assert': self.func_assert,
+                           'benchmark': self.func_benchmark,
+                           'build_target': self.func_build_target,
+                           'configuration_data': self.func_configuration_data,
+                           'configure_file': self.func_configure_file,
+                           'custom_target': self.func_custom_target,
+                           'declare_dependency': self.func_declare_dependency,
+                           'dependency': self.func_dependency,
+                           'disabler': self.func_disabler,
+                           'environment': self.func_environment,
+                           'error': self.func_error,
+                           'executable': self.func_executable,
+                           'generator': self.func_generator,
+                           'gettext': self.func_gettext,
+                           'get_option': self.func_get_option,
+                           'get_variable': self.func_get_variable,
+                           'files': self.func_files,
+                           'find_library': self.func_find_library,
+                           'find_program': self.func_find_program,
+                           'include_directories': self.func_include_directories,
+                           'import': self.func_import,
+                           'install_data': self.func_install_data,
+                           'install_headers': self.func_install_headers,
+                           'install_man': self.func_install_man,
+                           'install_subdir': self.func_install_subdir,
+                           'is_disabler': self.func_is_disabler,
+                           'is_variable': self.func_is_variable,
+                           'jar': self.func_jar,
+                           'join_paths': self.func_join_paths,
+                           'library': self.func_library,
+                           'message': self.func_message,
+                           'warning': self.func_warning,
+                           'option': self.func_option,
+                           'project': self.func_project,
+                           'run_target': self.func_run_target,
+                           'run_command': self.func_run_command,
+                           'set_variable': self.func_set_variable,
+                           'subdir': self.func_subdir,
+                           'subdir_done': self.func_subdir_done,
+                           'subproject': self.func_subproject,
+                           'summary': self.func_summary,
+                           'shared_library': self.func_shared_lib,
+                           'shared_module': self.func_shared_module,
+                           'static_library': self.func_static_lib,
+                           'both_libraries': self.func_both_lib,
+                           'test': self.func_test,
+                           'vcs_tag': self.func_vcs_tag
+                           })
+        if 'MESON_UNIT_TEST' in os.environ:
+            self.funcs.update({'exception': self.func_exception})
+
+    def holderify(self, item):
+        if isinstance(item, list):
+            return [self.holderify(x) for x in item]
+        if isinstance(item, dict):
+            return {k: self.holderify(v) for k, v in item.items()}
+
+        if isinstance(item, build.CustomTarget):
+            return CustomTargetHolder(item, self)
+        elif isinstance(item, (int, str, bool, Disabler, InterpreterObject)) or item is None:
+            return item
+        elif isinstance(item, build.Executable):
+            return ExecutableHolder(item, self)
+        elif isinstance(item, build.GeneratedList):
+            return GeneratedListHolder(item)
+        elif isinstance(item, build.RunTarget):
+            raise RuntimeError('This is not a pipe.')
+        elif isinstance(item, build.RunScript):
+            raise RuntimeError('Do not do this.')
+        elif isinstance(item, build.Data):
+            return DataHolder(item)
+        elif isinstance(item, dependencies.Dependency):
+            return DependencyHolder(item, self.subproject)
+        elif isinstance(item, dependencies.ExternalProgram):
+            return ExternalProgramHolder(item, self.subproject)
+        elif hasattr(item, 'held_object'):
+            return item
+        else:
+            raise InterpreterException('Module returned a value of unknown type.')
+
+    def process_new_values(self, invalues):
+        invalues = listify(invalues)
+        for v in invalues:
+            if isinstance(v, (RunTargetHolder, CustomTargetHolder, BuildTargetHolder)):
+                v = v.held_object
+
+            if isinstance(v, (build.BuildTarget, build.CustomTarget, build.RunTarget)):
+                self.add_target(v.name, v)
+            elif isinstance(v, list):
+                self.module_method_callback(v)
+            elif isinstance(v, build.GeneratedList):
+                pass
+            elif isinstance(v, build.RunScript):
+                self.build.install_scripts.append(v)
+            elif isinstance(v, build.Data):
+                self.build.data.append(v)
+            elif isinstance(v, dependencies.ExternalProgram):
+                return ExternalProgramHolder(v, self.subproject)
+            elif isinstance(v, dependencies.InternalDependency):
+                # FIXME: This is special cased and not ideal:
+                # The first source is our new VapiTarget, the rest are deps
+                self.process_new_values(v.sources[0])
+            elif hasattr(v, 'held_object'):
+                pass
+            elif isinstance(v, (int, str, bool, Disabler)):
+                pass
+            else:
+                raise InterpreterException('Module returned a value of unknown type.')
+
+    def module_method_callback(self, return_object):
+        if not isinstance(return_object, ModuleReturnValue):
+            raise InterpreterException('Bug in module, it returned an invalid object')
+        invalues = return_object.new_objects
+        self.process_new_values(invalues)
+        return self.holderify(return_object.return_value)
+
+    def get_build_def_files(self):
+        return self.build_def_files
+
+    def add_build_def_file(self, f):
+        # Use relative path for files within source directory, and absolute path
+        # for system files. Skip files within build directory. Also skip not regular
+        # files (e.g. /dev/stdout) Normalize the path to avoid duplicates, this
+        # is especially important to convert '/' to '\' on Windows.
+        if isinstance(f, mesonlib.File):
+            if f.is_built:
+                return
+            f = os.path.normpath(f.relative_name())
+        elif os.path.isfile(f) and not f.startswith('/dev'):
+            srcdir = Path(self.environment.get_source_dir())
+            builddir = Path(self.environment.get_build_dir())
+            try:
+                f = Path(f).resolve()
+            except OSError:
+                f = Path(f)
+                s = f.stat()
+                if (hasattr(s, 'st_file_attributes') and
+                        s.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT != 0 and
+                        s.st_reparse_tag == stat.IO_REPARSE_TAG_APPEXECLINK):
+                    # This is a Windows Store link which we can't
+                    # resolve, so just do our best otherwise.
+                    f = f.parent.resolve() / f.name
+                else:
+                    raise
+            if builddir in f.parents:
+                return
+            if srcdir in f.parents:
+                f = f.relative_to(srcdir)
+            f = str(f)
+        else:
+            return
+        if f not in self.build_def_files:
+            self.build_def_files.append(f)
+
+    def get_variables(self):
+        return self.variables
+
+    def check_stdlibs(self):
+        for for_machine in MachineChoice:
+            props = self.build.environment.properties[for_machine]
+            for l in self.coredata.compilers[for_machine].keys():
+                try:
+                    di = mesonlib.stringlistify(props.get_stdlib(l))
+                    if len(di) != 2:
+                        raise InterpreterException('Stdlib definition for %s should have exactly two elements.'
+                                                   % l)
+                    projname, depname = di
+                    subproj = self.do_subproject(projname, 'meson', {})
+                    self.build.stdlibs.host[l] = subproj.get_variable_method([depname], {})
+                except KeyError:
+                    pass
+                except InvalidArguments:
+                    pass
+
+    @stringArgs
+    @noKwargs
+    def func_import(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InvalidCode('Import takes one argument.')
+        modname = args[0]
+        if modname.startswith('unstable-'):
+            plainname = modname.split('-', 1)[1]
+            mlog.warning('Module %s has no backwards or forwards compatibility and might not exist in future releases.' % modname, location=node)
+            modname = 'unstable_' + plainname
+        if modname not in self.modules:
+            try:
+                module = importlib.import_module('mesonbuild.modules.' + modname)
+            except ImportError:
+                raise InvalidArguments('Module "%s" does not exist' % (modname, ))
+            self.modules[modname] = module.initialize(self)
+        return ModuleHolder(modname, self.modules[modname], self)
+
+    @stringArgs
+    @noKwargs
+    def func_files(self, node, args, kwargs):
+        return [mesonlib.File.from_source_file(self.environment.source_dir, self.subdir, fname) for fname in args]
+
+    @FeatureNewKwargs('declare_dependency', '0.46.0', ['link_whole'])
+    @FeatureNewKwargs('declare_dependency', '0.54.0', ['variables'])
+    @permittedKwargs(permitted_kwargs['declare_dependency'])
+    @noPosargs
+    def func_declare_dependency(self, node, args, kwargs):
+        version = kwargs.get('version', self.project_version)
+        if not isinstance(version, str):
+            raise InterpreterException('Version must be a string.')
+        incs = self.extract_incdirs(kwargs)
+        libs = unholder(extract_as_list(kwargs, 'link_with'))
+        libs_whole = unholder(extract_as_list(kwargs, 'link_whole'))
+        sources = extract_as_list(kwargs, 'sources')
+        sources = unholder(listify(self.source_strings_to_files(sources)))
+        deps = unholder(extract_as_list(kwargs, 'dependencies'))
+        compile_args = mesonlib.stringlistify(kwargs.get('compile_args', []))
+        link_args = mesonlib.stringlistify(kwargs.get('link_args', []))
+        variables = kwargs.get('variables', {})
+        if not isinstance(variables, dict):
+            raise InterpreterException('variables must be a dict.')
+        if not all(isinstance(v, str) for v in variables.values()):
+            # Because that is how they will come from pkg-config and cmake
+            raise InterpreterException('variables values be strings.')
+        final_deps = []
+        for d in deps:
+            try:
+                d = d.held_object
+            except Exception:
+                pass
+            if not isinstance(d, (dependencies.Dependency, dependencies.ExternalLibrary, dependencies.InternalDependency)):
+                raise InterpreterException('Dependencies must be external deps')
+            final_deps.append(d)
+        for l in libs:
+            if isinstance(l, dependencies.Dependency):
+                raise InterpreterException('''Entries in "link_with" may only be self-built targets,
+external dependencies (including libraries) must go to "dependencies".''')
+        dep = dependencies.InternalDependency(version, incs, compile_args,
+                                              link_args, libs, libs_whole, sources, final_deps,
+                                              variables)
+        return DependencyHolder(dep, self.subproject)
+
+    @noKwargs
+    def func_assert(self, node, args, kwargs):
+        if len(args) == 1:
+            FeatureNew.single_use('assert function without message argument', '0.53.0', self.subproject)
+            value = args[0]
+            message = None
+        elif len(args) == 2:
+            value, message = args
+            if not isinstance(message, str):
+                raise InterpreterException('Assert message not a string.')
+        else:
+            raise InterpreterException('Assert takes between one and two arguments')
+        if not isinstance(value, bool):
+            raise InterpreterException('Assert value not bool.')
+        if not value:
+            if message is None:
+                from .ast import AstPrinter
+                printer = AstPrinter()
+                node.args.arguments[0].accept(printer)
+                message = printer.result
+            raise InterpreterException('Assert failed: ' + message)
+
+    def validate_arguments(self, args, argcount, arg_types):
+        if argcount is not None:
+            if argcount != len(args):
+                raise InvalidArguments('Expected %d arguments, got %d.' %
+                                       (argcount, len(args)))
+        for actual, wanted in zip(args, arg_types):
+            if wanted is not None:
+                if not isinstance(actual, wanted):
+                    raise InvalidArguments('Incorrect argument type.')
+
+    @FeatureNewKwargs('run_command', '0.50.0', ['env'])
+    @FeatureNewKwargs('run_command', '0.47.0', ['check', 'capture'])
+    @permittedKwargs(permitted_kwargs['run_command'])
+    def func_run_command(self, node, args, kwargs):
+        return self.run_command_impl(node, args, kwargs)
+
+    def run_command_impl(self, node, args, kwargs, in_builddir=False):
+        if len(args) < 1:
+            raise InterpreterException('Not enough arguments')
+        cmd, *cargs = args
+        capture = kwargs.get('capture', True)
+        srcdir = self.environment.get_source_dir()
+        builddir = self.environment.get_build_dir()
+
+        check = kwargs.get('check', False)
+        if not isinstance(check, bool):
+            raise InterpreterException('Check must be boolean.')
+
+        env = self.unpack_env_kwarg(kwargs)
+
+        m = 'must be a string, or the output of find_program(), files() '\
+            'or configure_file(), or a compiler object; not {!r}'
+        expanded_args = []
+        if isinstance(cmd, ExternalProgramHolder):
+            cmd = cmd.held_object
+            if isinstance(cmd, build.Executable):
+                progname = node.args.arguments[0].value
+                msg = 'Program {!r} was overridden with the compiled executable {!r}'\
+                      ' and therefore cannot be used during configuration'
+                raise InterpreterException(msg.format(progname, cmd.description()))
+            if not cmd.found():
+                raise InterpreterException('command {!r} not found or not executable'.format(cmd.get_name()))
+        elif isinstance(cmd, CompilerHolder):
+            exelist = cmd.compiler.get_exelist()
+            cmd = exelist[0]
+            prog = ExternalProgram(cmd, silent=True)
+            if not prog.found():
+                raise InterpreterException('Program {!r} not found '
+                                           'or not executable'.format(cmd))
+            cmd = prog
+            expanded_args = exelist[1:]
+        else:
+            if isinstance(cmd, mesonlib.File):
+                cmd = cmd.absolute_path(srcdir, builddir)
+            elif not isinstance(cmd, str):
+                raise InterpreterException('First argument ' + m.format(cmd))
+            # Prefer scripts in the current source directory
+            search_dir = os.path.join(srcdir, self.subdir)
+            prog = ExternalProgram(cmd, silent=True, search_dir=search_dir)
+            if not prog.found():
+                raise InterpreterException('Program or command {!r} not found '
+                                           'or not executable'.format(cmd))
+            cmd = prog
+        for a in listify(cargs):
+            if isinstance(a, str):
+                expanded_args.append(a)
+            elif isinstance(a, mesonlib.File):
+                expanded_args.append(a.absolute_path(srcdir, builddir))
+            elif isinstance(a, ExternalProgramHolder):
+                expanded_args.append(a.held_object.get_path())
+            else:
+                raise InterpreterException('Arguments ' + m.format(a))
+        # If any file that was used as an argument to the command
+        # changes, we must re-run the configuration step.
+        self.add_build_def_file(cmd.get_path())
+        for a in expanded_args:
+            if not os.path.isabs(a):
+                a = os.path.join(builddir if in_builddir else srcdir, self.subdir, a)
+            self.add_build_def_file(a)
+        return RunProcess(cmd, expanded_args, env, srcdir, builddir, self.subdir,
+                          self.environment.get_build_command() + ['introspect'],
+                          in_builddir=in_builddir, check=check, capture=capture)
+
+    @stringArgs
+    def func_gettext(self, nodes, args, kwargs):
+        raise InterpreterException('Gettext() function has been moved to module i18n. Import it and use i18n.gettext() instead')
+
+    def func_option(self, nodes, args, kwargs):
+        raise InterpreterException('Tried to call option() in build description file. All options must be in the option file.')
+
+    @FeatureNewKwargs('subproject', '0.38.0', ['default_options'])
+    @permittedKwargs(permitted_kwargs['subproject'])
+    @stringArgs
+    def func_subproject(self, nodes, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Subproject takes exactly one argument')
+        dirname = args[0]
+        return self.do_subproject(dirname, 'meson', kwargs)
+
+    def disabled_subproject(self, dirname, disabled_feature=None, exception=None):
+        sub = SubprojectHolder(None, self.subproject_dir, dirname,
+                               disabled_feature=disabled_feature, exception=exception)
+        self.subprojects[dirname] = sub
+        return sub
+
+    def get_subproject(self, dirname):
+        sub = self.subprojects.get(dirname)
+        if sub and sub.found():
+            return sub
+        return None
+
+    def do_subproject(self, dirname: str, method: str, kwargs):
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject)
+        if disabled:
+            mlog.log('Subproject', mlog.bold(dirname), ':', 'skipped: feature', mlog.bold(feature), 'disabled')
+            return self.disabled_subproject(dirname, disabled_feature=feature)
+
+        default_options = mesonlib.stringlistify(kwargs.get('default_options', []))
+        default_options = coredata.create_options_dict(default_options)
+        if dirname == '':
+            raise InterpreterException('Subproject dir name must not be empty.')
+        if dirname[0] == '.':
+            raise InterpreterException('Subproject dir name must not start with a period.')
+        if '..' in dirname:
+            raise InterpreterException('Subproject name must not contain a ".." path segment.')
+        if os.path.isabs(dirname):
+            raise InterpreterException('Subproject name must not be an absolute path.')
+        if has_path_sep(dirname):
+            mlog.warning('Subproject name has a path separator. This may cause unexpected behaviour.',
+                         location=self.current_node)
+        if dirname in self.subproject_stack:
+            fullstack = self.subproject_stack + [dirname]
+            incpath = ' => '.join(fullstack)
+            raise InvalidCode('Recursive include of subprojects: %s.' % incpath)
+        if dirname in self.subprojects:
+            subproject = self.subprojects[dirname]
+            if required and not subproject.found():
+                raise InterpreterException('Subproject "%s/%s" required but not found.' % (
+                                           self.subproject_dir, dirname))
+            return subproject
+
+        r = self.environment.wrap_resolver
+        try:
+            resolved = r.resolve(dirname, method, self.subproject)
+        except wrap.WrapException as e:
+            subprojdir = os.path.join(self.subproject_dir, r.directory)
+            if isinstance(e, wrap.WrapNotFoundException):
+                # if the reason subproject execution failed was because
+                # the directory doesn't exist, try to give some helpful
+                # advice if it's a nested subproject that needs
+                # promotion...
+                self.print_nested_info(dirname)
+            if not required:
+                mlog.log(e)
+                mlog.log('Subproject ', mlog.bold(subprojdir), 'is buildable:', mlog.red('NO'), '(disabling)')
+                return self.disabled_subproject(dirname, exception=e)
+            raise e
+
+        subdir = os.path.join(self.subproject_dir, resolved)
+        subdir_abs = os.path.join(self.environment.get_source_dir(), subdir)
+        os.makedirs(os.path.join(self.build.environment.get_build_dir(), subdir), exist_ok=True)
+        self.global_args_frozen = True
+
+        mlog.log()
+        with mlog.nested():
+            mlog.log('Executing subproject', mlog.bold(dirname), 'method', mlog.bold(method), '\n')
+        try:
+            if method == 'meson':
+                return self._do_subproject_meson(dirname, subdir, default_options, kwargs)
+            elif method == 'cmake':
+                return self._do_subproject_cmake(dirname, subdir, subdir_abs, default_options, kwargs)
+            else:
+                raise InterpreterException('The method {} is invalid for the subproject {}'.format(method, dirname))
+        # Invalid code is always an error
+        except InvalidCode:
+            raise
+        except Exception as e:
+            if not required:
+                with mlog.nested():
+                    # Suppress the 'ERROR:' prefix because this exception is not
+                    # fatal and VS CI treat any logs with "ERROR:" as fatal.
+                    mlog.exception(e, prefix=mlog.yellow('Exception:'))
+                mlog.log('\nSubproject', mlog.bold(dirname), 'is buildable:', mlog.red('NO'), '(disabling)')
+                return self.disabled_subproject(dirname, exception=e)
+            raise e
+
+    def _do_subproject_meson(self, dirname, subdir, default_options, kwargs, ast=None, build_def_files=None):
+        with mlog.nested():
+            new_build = self.build.copy()
+            subi = Interpreter(new_build, self.backend, dirname, subdir, self.subproject_dir,
+                               self.modules, default_options, ast=ast)
+            subi.subprojects = self.subprojects
+
+            subi.subproject_stack = self.subproject_stack + [dirname]
+            current_active = self.active_projectname
+            current_warnings_counter = mlog.log_warnings_counter
+            mlog.log_warnings_counter = 0
+            subi.run()
+            subi_warnings = mlog.log_warnings_counter
+            mlog.log_warnings_counter = current_warnings_counter
+
+            mlog.log('Subproject', mlog.bold(dirname), 'finished.')
+
+        mlog.log()
+
+        if 'version' in kwargs:
+            pv = subi.project_version
+            wanted = kwargs['version']
+            if pv == 'undefined' or not mesonlib.version_compare_many(pv, wanted)[0]:
+                raise InterpreterException('Subproject %s version is %s but %s required.' % (dirname, pv, wanted))
+        self.active_projectname = current_active
+        self.subprojects.update(subi.subprojects)
+        self.subprojects[dirname] = SubprojectHolder(subi, self.subproject_dir, dirname,
+                                                     warnings=subi_warnings)
+        # Duplicates are possible when subproject uses files from project root
+        if build_def_files:
+            self.build_def_files = list(set(self.build_def_files + build_def_files))
+        else:
+            self.build_def_files = list(set(self.build_def_files + subi.build_def_files))
+        self.build.merge(subi.build)
+        self.build.subprojects[dirname] = subi.project_version
+        self.summary.update(subi.summary)
+        return self.subprojects[dirname]
+
+    def _do_subproject_cmake(self, dirname, subdir, subdir_abs, default_options, kwargs):
+        with mlog.nested():
+            new_build = self.build.copy()
+            prefix = self.coredata.builtins['prefix'].value
+
+            from .modules.cmake import CMakeSubprojectOptions
+            options = kwargs.get('options', CMakeSubprojectOptions())
+            if not isinstance(options, CMakeSubprojectOptions):
+                raise InterpreterException('"options" kwarg must be CMakeSubprojectOptions'
+                                           ' object (created by cmake.subproject_options())')
+
+            cmake_options = mesonlib.stringlistify(kwargs.get('cmake_options', []))
+            cmake_options += options.cmake_options
+            cm_int = CMakeInterpreter(new_build, subdir, subdir_abs, prefix, new_build.environment, self.backend)
+            cm_int.initialise(cmake_options)
+            cm_int.analyse()
+
+            # Generate a meson ast and execute it with the normal do_subproject_meson
+            ast = cm_int.pretend_to_be_meson(options.target_options)
+
+            mlog.log()
+            with mlog.nested():
+                mlog.log('Processing generated meson AST')
+
+                # Debug print the generated meson file
+                from .ast import AstIndentationGenerator, AstPrinter
+                printer = AstPrinter()
+                ast.accept(AstIndentationGenerator())
+                ast.accept(printer)
+                printer.post_process()
+                meson_filename = os.path.join(self.build.environment.get_build_dir(), subdir, 'meson.build')
+                with open(meson_filename, "w") as f:
+                    f.write(printer.result)
+
+                mlog.log('Build file:', meson_filename)
+                mlog.cmd_ci_include(meson_filename)
+                mlog.log()
+
+            result = self._do_subproject_meson(dirname, subdir, default_options, kwargs, ast, cm_int.bs_files)
+            result.cm_interpreter = cm_int
+
+        mlog.log()
+        return result
+
+    def get_option_internal(self, optname):
+        raw_optname = optname
+        if self.is_subproject():
+            optname = self.subproject + ':' + optname
+
+        for opts in [
+                self.coredata.base_options, compilers.base_options, self.coredata.builtins,
+                dict(self.coredata.get_prefixed_options_per_machine(self.coredata.builtins_per_machine)),
+                dict(self.coredata.flatten_lang_iterator(
+                    self.coredata.get_prefixed_options_per_machine(self.coredata.compiler_options))),
+        ]:
+            v = opts.get(optname)
+            if v is None or v.yielding:
+                v = opts.get(raw_optname)
+            if v is not None:
+                return v
+
+        try:
+            opt = self.coredata.user_options[optname]
+            if opt.yielding and ':' in optname and raw_optname in self.coredata.user_options:
+                popt = self.coredata.user_options[raw_optname]
+                if type(opt) is type(popt):
+                    opt = popt
+                else:
+                    # Get class name, then option type as a string
+                    opt_type = opt.__class__.__name__[4:][:-6].lower()
+                    popt_type = popt.__class__.__name__[4:][:-6].lower()
+                    # This is not a hard error to avoid dependency hell, the workaround
+                    # when this happens is to simply set the subproject's option directly.
+                    mlog.warning('Option {0!r} of type {1!r} in subproject {2!r} cannot yield '
+                                 'to parent option of type {3!r}, ignoring parent value. '
+                                 'Use -D{2}:{0}=value to set the value for this option manually'
+                                 '.'.format(raw_optname, opt_type, self.subproject, popt_type),
+                                 location=self.current_node)
+            return opt
+        except KeyError:
+            pass
+
+        raise InterpreterException('Tried to access unknown option "%s".' % optname)
+
+    @stringArgs
+    @noKwargs
+    def func_get_option(self, nodes, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Argument required for get_option.')
+        optname = args[0]
+        if ':' in optname:
+            raise InterpreterException('Having a colon in option name is forbidden, '
+                                       'projects are not allowed to directly access '
+                                       'options of other subprojects.')
+        opt = self.get_option_internal(optname)
+        if isinstance(opt, coredata.UserFeatureOption):
+            return FeatureOptionHolder(self.environment, optname, opt)
+        elif isinstance(opt, coredata.UserOption):
+            return opt.value
+        return opt
+
+    @noKwargs
+    def func_configuration_data(self, node, args, kwargs):
+        if len(args) > 1:
+            raise InterpreterException('configuration_data takes only one optional positional arguments')
+        elif len(args) == 1:
+            FeatureNew.single_use('configuration_data dictionary', '0.49.0', self.subproject)
+            initial_values = args[0]
+            if not isinstance(initial_values, dict):
+                raise InterpreterException('configuration_data first argument must be a dictionary')
+        else:
+            initial_values = {}
+        return ConfigurationDataHolder(self.subproject, initial_values)
+
+    def set_backend(self):
+        # The backend is already set when parsing subprojects
+        if self.backend is not None:
+            return
+        backend = self.coredata.get_builtin_option('backend')
+        from .backend import backends
+        self.backend = backends.get_backend_from_name(backend, self.build, self)
+
+        if self.backend is None:
+            raise InterpreterException('Unknown backend "%s".' % backend)
+        if backend != self.backend.name:
+            if self.backend.name.startswith('vs'):
+                mlog.log('Auto detected Visual Studio backend:', mlog.bold(self.backend.name))
+            self.coredata.set_builtin_option('backend', self.backend.name)
+
+        # Only init backend options on first invocation otherwise it would
+        # override values previously set from command line.
+        if self.environment.first_invocation:
+            self.coredata.init_backend_options(backend)
+
+        options = {k: v for k, v in self.environment.cmd_line_options.items() if k.startswith('backend_')}
+        self.coredata.set_options(options)
+
+    @stringArgs
+    @permittedKwargs(permitted_kwargs['project'])
+    def func_project(self, node, args, kwargs):
+        if len(args) < 1:
+            raise InvalidArguments('Not enough arguments to project(). Needs at least the project name.')
+        proj_name, *proj_langs = args
+        if ':' in proj_name:
+            raise InvalidArguments("Project name {!r} must not contain ':'".format(proj_name))
+
+        # This needs to be evaluated as early as possible, as meson uses this
+        # for things like deprecation testing.
+        if 'meson_version' in kwargs:
+            cv = coredata.version
+            pv = kwargs['meson_version']
+            if not mesonlib.version_compare(cv, pv):
+                raise InterpreterException('Meson version is %s but project requires %s' % (cv, pv))
+            mesonlib.project_meson_versions[self.subproject] = kwargs['meson_version']
+
+        if os.path.exists(self.option_file):
+            oi = optinterpreter.OptionInterpreter(self.subproject)
+            oi.process(self.option_file)
+            self.coredata.merge_user_options(oi.options)
+            self.add_build_def_file(self.option_file)
+
+        # Do not set default_options on reconfigure otherwise it would override
+        # values previously set from command line. That means that changing
+        # default_options in a project will trigger a reconfigure but won't
+        # have any effect.
+        self.project_default_options = mesonlib.stringlistify(kwargs.get('default_options', []))
+        self.project_default_options = coredata.create_options_dict(self.project_default_options)
+        if self.environment.first_invocation:
+            default_options = self.project_default_options
+            default_options.update(self.default_project_options)
+            self.coredata.init_builtins(self.subproject)
+        else:
+            default_options = {}
+        self.coredata.set_default_options(default_options, self.subproject, self.environment)
+
+        if not self.is_subproject():
+            self.build.project_name = proj_name
+        self.active_projectname = proj_name
+        self.project_version = kwargs.get('version', 'undefined')
+        if self.build.project_version is None:
+            self.build.project_version = self.project_version
+        proj_license = mesonlib.stringlistify(kwargs.get('license', 'unknown'))
+        self.build.dep_manifest[proj_name] = {'version': self.project_version,
+                                              'license': proj_license}
+        if self.subproject in self.build.projects:
+            raise InvalidCode('Second call to project().')
+        if not self.is_subproject() and 'subproject_dir' in kwargs:
+            spdirname = kwargs['subproject_dir']
+            if not isinstance(spdirname, str):
+                raise InterpreterException('Subproject_dir must be a string')
+            if os.path.isabs(spdirname):
+                raise InterpreterException('Subproject_dir must not be an absolute path.')
+            if spdirname.startswith('.'):
+                raise InterpreterException('Subproject_dir must not begin with a period.')
+            if '..' in spdirname:
+                raise InterpreterException('Subproject_dir must not contain a ".." segment.')
+            self.subproject_dir = spdirname
+
+        self.build.subproject_dir = self.subproject_dir
+        if not self.is_subproject():
+            wrap_mode = self.coredata.get_builtin_option('wrap_mode')
+            subproject_dir_abs = os.path.join(self.environment.get_source_dir(), self.subproject_dir)
+            self.environment.wrap_resolver = wrap.Resolver(subproject_dir_abs, wrap_mode)
+
+        self.build.projects[self.subproject] = proj_name
+        mlog.log('Project name:', mlog.bold(proj_name))
+        mlog.log('Project version:', mlog.bold(self.project_version))
+        self.add_languages(proj_langs, True, MachineChoice.BUILD)
+        self.add_languages(proj_langs, True, MachineChoice.HOST)
+        self.set_backend()
+        if not self.is_subproject():
+            self.check_stdlibs()
+
+    @FeatureNewKwargs('add_languages', '0.54.0', ['native'])
+    @permittedKwargs(permitted_kwargs['add_languages'])
+    @stringArgs
+    def func_add_languages(self, node, args, kwargs):
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject)
+        if disabled:
+            for lang in sorted(args, key=compilers.sort_clink):
+                mlog.log('Compiler for language', mlog.bold(lang), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return False
+        if 'native' in kwargs:
+            return self.add_languages(args, required, self.machine_from_native_kwarg(kwargs))
+        else:
+            # absent 'native' means 'both' for backwards compatibility
+            tv = FeatureNew.get_target_version(self.subproject)
+            if FeatureNew.check_version(tv, '0.54.0'):
+                mlog.warning('add_languages is missing native:, assuming languages are wanted for both host and build.',
+                             location=self.current_node)
+
+            success = self.add_languages(args, False, MachineChoice.BUILD)
+            success &= self.add_languages(args, required, MachineChoice.HOST)
+            return success
+
+    def get_message_string_arg(self, arg):
+        if isinstance(arg, list):
+            argstr = stringifyUserArguments(arg)
+        elif isinstance(arg, dict):
+            argstr = stringifyUserArguments(arg)
+        elif isinstance(arg, str):
+            argstr = arg
+        elif isinstance(arg, int):
+            argstr = str(arg)
+        else:
+            raise InvalidArguments('Function accepts only strings, integers, lists and lists thereof.')
+
+        return argstr
+
+    @noArgsFlattening
+    @noKwargs
+    def func_message(self, node, args, kwargs):
+        if len(args) > 1:
+            FeatureNew.single_use('message with more than one argument', '0.54.0', self.subproject)
+        args_str = [self.get_message_string_arg(i) for i in args]
+        self.message_impl(args_str)
+
+    def message_impl(self, args):
+        mlog.log(mlog.bold('Message:'), *args)
+
+    @noArgsFlattening
+    @FeatureNewKwargs('summary', '0.54.0', ['list_sep'])
+    @permittedKwargs({'section', 'bool_yn', 'list_sep'})
+    @FeatureNew('summary', '0.53.0')
+    def func_summary(self, node, args, kwargs):
+        if len(args) == 1:
+            if not isinstance(args[0], dict):
+                raise InterpreterException('Summary first argument must be dictionary.')
+            values = args[0]
+        elif len(args) == 2:
+            if not isinstance(args[0], str):
+                raise InterpreterException('Summary first argument must be string.')
+            values = {args[0]: args[1]}
+        else:
+            raise InterpreterException('Summary accepts at most 2 arguments.')
+        section = kwargs.get('section', '')
+        if not isinstance(section, str):
+            raise InterpreterException('Summary\'s section keyword argument must be string.')
+        self.summary_impl(section, values, kwargs)
+
+    def summary_impl(self, section, values, kwargs):
+        if self.subproject not in self.summary:
+            self.summary[self.subproject] = Summary(self.active_projectname, self.project_version)
+        self.summary[self.subproject].add_section(section, values, kwargs)
+
+    def _print_summary(self):
+        # Add automatic 'Supbrojects' section in main project.
+        all_subprojects = collections.OrderedDict()
+        for name, subp in sorted(self.subprojects.items()):
+            value = subp.found()
+            if subp.disabled_feature:
+                value = [value, 'Feature {!r} disabled'.format(subp.disabled_feature)]
+            elif subp.exception:
+                value = [value, str(subp.exception)]
+            elif subp.warnings > 0:
+                value = [value, '{} warnings'.format(subp.warnings)]
+            all_subprojects[name] = value
+        if all_subprojects:
+            self.summary_impl('Subprojects', all_subprojects,
+                              {'bool_yn': True,
+                               'list_sep': ' ',
+                              })
+        # Print all summaries, main project last.
+        mlog.log('')  # newline
+        main_summary = self.summary.pop('', None)
+        for _, summary in sorted(self.summary.items()):
+            summary.dump()
+        if main_summary:
+            main_summary.dump()
+
+    @noArgsFlattening
+    @FeatureNew('warning', '0.44.0')
+    @noKwargs
+    def func_warning(self, node, args, kwargs):
+        if len(args) > 1:
+            FeatureNew.single_use('warning with more than one argument', '0.54.0', self.subproject)
+        args_str = [self.get_message_string_arg(i) for i in args]
+        mlog.warning(*args_str, location=node)
+
+    @noKwargs
+    def func_error(self, node, args, kwargs):
+        self.validate_arguments(args, 1, [str])
+        raise InterpreterException('Problem encountered: ' + args[0])
+
+    @noKwargs
+    def func_exception(self, node, args, kwargs):
+        self.validate_arguments(args, 0, [])
+        raise Exception()
+
+    def add_languages(self, args: T.Sequence[str], required: bool, for_machine: MachineChoice) -> bool:
+        success = self.add_languages_for(args, required, for_machine)
+        if not self.coredata.is_cross_build():
+            self.coredata.copy_build_options_from_regular_ones()
+        self._redetect_machines()
+        return success
+
+    def should_skip_sanity_check(self, for_machine: MachineChoice) -> bool:
+        should = self.environment.properties.host.get('skip_sanity_check', False)
+        if not isinstance(should, bool):
+            raise InterpreterException('Option skip_sanity_check must be a boolean.')
+        if for_machine != MachineChoice.HOST and not should:
+            return False
+        if not self.environment.is_cross_build() and not should:
+            return False
+        return should
+
+    def add_languages_for(self, args, required, for_machine: MachineChoice):
+        args = [a.lower() for a in args]
+        langs = set(self.coredata.compilers[for_machine].keys())
+        langs.update(args)
+        if 'vala' in langs:
+            if 'c' not in langs:
+                raise InterpreterException('Compiling Vala requires C. Add C to your project languages and rerun Meson.')
+
+        success = True
+        for lang in sorted(args, key=compilers.sort_clink):
+            clist = self.coredata.compilers[for_machine]
+            machine_name = for_machine.get_lower_case_name()
+            if lang in clist:
+                comp = clist[lang]
+            else:
+                try:
+                    comp = self.environment.detect_compiler_for(lang, for_machine)
+                    if comp is None:
+                        raise InvalidArguments('Tried to use unknown language "%s".' % lang)
+                    if self.should_skip_sanity_check(for_machine):
+                        mlog.log_once('Cross compiler sanity tests disabled via the cross file.')
+                    else:
+                        comp.sanity_check(self.environment.get_scratch_dir(), self.environment)
+                except Exception:
+                    if not required:
+                        mlog.log('Compiler for language',
+                                 mlog.bold(lang), 'for the', machine_name,
+                                 'machine not found.')
+                        success = False
+                        continue
+                    else:
+                        raise
+
+            if for_machine == MachineChoice.HOST or self.environment.is_cross_build():
+                logger_fun = mlog.log
+            else:
+                logger_fun = mlog.debug
+            logger_fun(comp.get_display_language(), 'compiler for the', machine_name, 'machine:',
+                       mlog.bold(' '.join(comp.get_exelist())), comp.get_version_string())
+            if comp.linker is not None:
+                logger_fun(comp.get_display_language(), 'linker for the', machine_name, 'machine:',
+                           mlog.bold(' '.join(comp.linker.get_exelist())), comp.linker.id, comp.linker.version)
+            self.build.ensure_static_linker(comp)
+
+        return success
+
+    def program_from_file_for(self, for_machine, prognames):
+        for p in unholder(prognames):
+            if isinstance(p, mesonlib.File):
+                continue # Always points to a local (i.e. self generated) file.
+            if not isinstance(p, str):
+                raise InterpreterException('Executable name must be a string')
+            prog = ExternalProgram.from_bin_list(self.environment, for_machine, p)
+            if prog.found():
+                return ExternalProgramHolder(prog, self.subproject)
+        return None
+
+    def program_from_system(self, args, search_dirs, silent=False):
+        # Search for scripts relative to current subdir.
+        # Do not cache found programs because find_program('foobar')
+        # might give different results when run from different source dirs.
+        source_dir = os.path.join(self.environment.get_source_dir(), self.subdir)
+        for exename in args:
+            if isinstance(exename, mesonlib.File):
+                if exename.is_built:
+                    search_dir = os.path.join(self.environment.get_build_dir(),
+                                              exename.subdir)
+                else:
+                    search_dir = os.path.join(self.environment.get_source_dir(),
+                                              exename.subdir)
+                exename = exename.fname
+                extra_search_dirs = []
+            elif isinstance(exename, str):
+                search_dir = source_dir
+                extra_search_dirs = search_dirs
+            else:
+                raise InvalidArguments('find_program only accepts strings and '
+                                       'files, not {!r}'.format(exename))
+            extprog = dependencies.ExternalProgram(exename, search_dir=search_dir,
+                                                   extra_search_dirs=extra_search_dirs,
+                                                   silent=silent)
+            progobj = ExternalProgramHolder(extprog, self.subproject)
+            if progobj.found():
+                return progobj
+
+    def program_from_overrides(self, command_names, extra_info):
+        for name in command_names:
+            if not isinstance(name, str):
+                continue
+            if name in self.build.find_overrides:
+                exe = self.build.find_overrides[name]
+                extra_info.append(mlog.blue('(overriden)'))
+                return ExternalProgramHolder(exe, self.subproject, self.backend)
+        return None
+
+    def store_name_lookups(self, command_names):
+        for name in command_names:
+            if isinstance(name, str):
+                self.build.searched_programs.add(name)
+
+    def add_find_program_override(self, name, exe):
+        if name in self.build.searched_programs:
+            raise InterpreterException('Tried to override finding of executable "%s" which has already been found.'
+                                       % name)
+        if name in self.build.find_overrides:
+            raise InterpreterException('Tried to override executable "%s" which has already been overridden.'
+                                       % name)
+        self.build.find_overrides[name] = exe
+
+    def notfound_program(self, args):
+        return ExternalProgramHolder(dependencies.NonExistingExternalProgram(' '.join(args)), self.subproject)
+
+    # TODO update modules to always pass `for_machine`. It is bad-form to assume
+    # the host machine.
+    def find_program_impl(self, args, for_machine: MachineChoice = MachineChoice.HOST,
+                          required=True, silent=True, wanted='', search_dirs=None,
+                          version_func=None):
+        args = mesonlib.listify(args)
+
+        extra_info = []
+        progobj = self.program_lookup(args, for_machine, required, search_dirs, extra_info)
+        if progobj is None:
+            progobj = self.notfound_program(args)
+
+        if not progobj.found():
+            mlog.log('Program', mlog.bold(progobj.get_name()), 'found:', mlog.red('NO'))
+            if required:
+                m = 'Program {!r} not found'
+                raise InterpreterException(m.format(progobj.get_name()))
+            return progobj
+
+        if wanted:
+            if version_func:
+                version = version_func(progobj)
+            else:
+                version = progobj.get_version(self)
+            is_found, not_found, found = mesonlib.version_compare_many(version, wanted)
+            if not is_found:
+                mlog.log('Program', mlog.bold(progobj.get_name()), 'found:', mlog.red('NO'),
+                         'found', mlog.normal_cyan(version), 'but need:',
+                         mlog.bold(', '.join(["'{}'".format(e) for e in not_found])))
+                if required:
+                    m = 'Invalid version of program, need {!r} {!r} found {!r}.'
+                    raise InterpreterException(m.format(progobj.get_name(), not_found, version))
+                return self.notfound_program(args)
+            extra_info.insert(0, mlog.normal_cyan(version))
+
+        # Only store successful lookups
+        self.store_name_lookups(args)
+        mlog.log('Program', mlog.bold(progobj.get_name()), 'found:', mlog.green('YES'), *extra_info)
+        return progobj
+
+    def program_lookup(self, args, for_machine, required, search_dirs, extra_info):
+        progobj = self.program_from_overrides(args, extra_info)
+        if progobj:
+            return progobj
+
+        fallback = None
+        wrap_mode = self.coredata.get_builtin_option('wrap_mode')
+        if wrap_mode != WrapMode.nofallback and self.environment.wrap_resolver:
+            fallback = self.environment.wrap_resolver.find_program_provider(args)
+        if fallback and wrap_mode == WrapMode.forcefallback:
+            return self.find_program_fallback(fallback, args, required, extra_info)
+
+        progobj = self.program_from_file_for(for_machine, args)
+        if progobj is None:
+            progobj = self.program_from_system(args, search_dirs, silent=True)
+        if progobj is None and args[0].endswith('python3'):
+            prog = dependencies.ExternalProgram('python3', mesonlib.python_command, silent=True)
+            progobj = ExternalProgramHolder(prog, self.subproject) if prog.found() else None
+        if progobj is None and fallback and required:
+            progobj = self.find_program_fallback(fallback, args, required, extra_info)
+
+        return progobj
+
+    def find_program_fallback(self, fallback, args, required, extra_info):
+        mlog.log('Fallback to subproject', mlog.bold(fallback), 'which provides program',
+                 mlog.bold(' '.join(args)))
+        sp_kwargs = { 'required': required }
+        self.do_subproject(fallback, 'meson', sp_kwargs)
+        return self.program_from_overrides(args, extra_info)
+
+    @FeatureNewKwargs('find_program', '0.53.0', ['dirs'])
+    @FeatureNewKwargs('find_program', '0.52.0', ['version'])
+    @FeatureNewKwargs('find_program', '0.49.0', ['disabler'])
+    @disablerIfNotFound
+    @permittedKwargs(permitted_kwargs['find_program'])
+    def func_find_program(self, node, args, kwargs):
+        if not args:
+            raise InterpreterException('No program name specified.')
+
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject)
+        if disabled:
+            mlog.log('Program', mlog.bold(' '.join(args)), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return self.notfound_program(args)
+
+        search_dirs = extract_search_dirs(kwargs)
+        wanted = mesonlib.stringlistify(kwargs.get('version', []))
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        return self.find_program_impl(args, for_machine, required=required,
+                                      silent=False, wanted=wanted,
+                                      search_dirs=search_dirs)
+
+    def func_find_library(self, node, args, kwargs):
+        raise InvalidCode('find_library() is removed, use meson.get_compiler(\'name\').find_library() instead.\n'
+                          'Look here for documentation: http://mesonbuild.com/Reference-manual.html#compiler-object\n'
+                          'Look here for example: http://mesonbuild.com/howtox.html#add-math-library-lm-portably\n'
+                          )
+
+    def _find_cached_dep(self, name, display_name, kwargs):
+        # Check if we want this as a build-time / build machine or runt-time /
+        # host machine dep.
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        identifier = dependencies.get_dep_identifier(name, kwargs)
+        wanted_vers = mesonlib.stringlistify(kwargs.get('version', []))
+
+        override = self.build.dependency_overrides[for_machine].get(identifier)
+        if override:
+            info = [mlog.blue('(overridden)' if override.explicit else '(cached)')]
+            cached_dep = override.dep
+            # We don't implicitly override not-found dependencies, but user could
+            # have explicitly called meson.override_dependency() with a not-found
+            # dep.
+            if not cached_dep.found():
+                mlog.log('Dependency', mlog.bold(display_name),
+                         'found:', mlog.red('NO'), *info)
+                return identifier, cached_dep
+            found_vers = cached_dep.get_version()
+            if not self.check_version(wanted_vers, found_vers):
+                mlog.log('Dependency', mlog.bold(name),
+                         'found:', mlog.red('NO'),
+                         'found', mlog.normal_cyan(found_vers), 'but need:',
+                         mlog.bold(', '.join(["'{}'".format(e) for e in wanted_vers])),
+                         *info)
+                return identifier, NotFoundDependency(self.environment)
+        else:
+            info = [mlog.blue('(cached)')]
+            cached_dep = self.coredata.deps[for_machine].get(identifier)
+            if cached_dep:
+                found_vers = cached_dep.get_version()
+                if not self.check_version(wanted_vers, found_vers):
+                    return identifier, None
+
+        if cached_dep:
+            if found_vers:
+                info = [mlog.normal_cyan(found_vers), *info]
+            mlog.log('Dependency', mlog.bold(display_name),
+                     'found:', mlog.green('YES'), *info)
+            return identifier, cached_dep
+
+        return identifier, None
+
+    @staticmethod
+    def check_version(wanted, found):
+        if not wanted:
+            return True
+        if found == 'undefined' or not mesonlib.version_compare_many(found, wanted)[0]:
+            return False
+        return True
+
+    def notfound_dependency(self):
+        return DependencyHolder(NotFoundDependency(self.environment), self.subproject)
+
+    def verify_fallback_consistency(self, dirname, varname, cached_dep):
+        subi = self.get_subproject(dirname)
+        if not cached_dep or not varname or not subi or not cached_dep.found():
+            return
+        dep = subi.get_variable_method([varname], {})
+        if dep.held_object != cached_dep:
+            m = 'Inconsistency: Subproject has overridden the dependency with another variable than {!r}'
+            raise DependencyException(m.format(varname))
+
+    def get_subproject_dep(self, name, display_name, dirname, varname, kwargs):
+        required = kwargs.get('required', True)
+        wanted = mesonlib.stringlistify(kwargs.get('version', []))
+        subproj_path = os.path.join(self.subproject_dir, dirname)
+        dep = self.notfound_dependency()
+        try:
+            subproject = self.subprojects[dirname]
+            _, cached_dep = self._find_cached_dep(name, display_name, kwargs)
+            if varname is None:
+                # Assuming the subproject overridden the dependency we want
+                if cached_dep:
+                    if required and not cached_dep.found():
+                        m = 'Dependency {!r} is not satisfied'
+                        raise DependencyException(m.format(display_name))
+                    return DependencyHolder(cached_dep, self.subproject)
+                else:
+                    if required:
+                        m = 'Subproject {} did not override dependency {}'
+                        raise DependencyException(m.format(subproj_path, display_name))
+                    mlog.log('Dependency', mlog.bold(display_name), 'from subproject',
+                             mlog.bold(subproj_path), 'found:', mlog.red('NO'))
+                    return self.notfound_dependency()
+            if subproject.found():
+                self.verify_fallback_consistency(dirname, varname, cached_dep)
+                dep = self.subprojects[dirname].get_variable_method([varname], {})
+        except InvalidArguments:
+            pass
+
+        if not isinstance(dep, DependencyHolder):
+            raise InvalidCode('Fetched variable {!r} in the subproject {!r} is '
+                              'not a dependency object.'.format(varname, dirname))
+
+        if not dep.found():
+            if required:
+                raise DependencyException('Could not find dependency {} in subproject {}'
+                                          ''.format(varname, dirname))
+            # If the dependency is not required, don't raise an exception
+            mlog.log('Dependency', mlog.bold(display_name), 'from subproject',
+                     mlog.bold(subproj_path), 'found:', mlog.red('NO'))
+            return dep
+
+        found = dep.held_object.get_version()
+        if not self.check_version(wanted, found):
+            if required:
+                raise DependencyException('Version {} of subproject dependency {} already '
+                                          'cached, requested incompatible version {} for '
+                                          'dep {}'.format(found, dirname, wanted, display_name))
+
+            mlog.log('Dependency', mlog.bold(display_name), 'from subproject',
+                     mlog.bold(subproj_path), 'found:', mlog.red('NO'),
+                     'found', mlog.normal_cyan(found), 'but need:',
+                     mlog.bold(', '.join(["'{}'".format(e) for e in wanted])))
+            return self.notfound_dependency()
+
+        found = mlog.normal_cyan(found) if found else None
+        mlog.log('Dependency', mlog.bold(display_name), 'from subproject',
+                 mlog.bold(subproj_path), 'found:', mlog.green('YES'), found)
+        return dep
+
+    def _handle_featurenew_dependencies(self, name):
+        'Do a feature check on dependencies used by this subproject'
+        if name == 'mpi':
+            FeatureNew.single_use('MPI Dependency', '0.42.0', self.subproject)
+        elif name == 'pcap':
+            FeatureNew.single_use('Pcap Dependency', '0.42.0', self.subproject)
+        elif name == 'vulkan':
+            FeatureNew.single_use('Vulkan Dependency', '0.42.0', self.subproject)
+        elif name == 'libwmf':
+            FeatureNew.single_use('LibWMF Dependency', '0.44.0', self.subproject)
+        elif name == 'openmp':
+            FeatureNew.single_use('OpenMP Dependency', '0.46.0', self.subproject)
+
+    @FeatureNewKwargs('dependency', '0.54.0', ['components'])
+    @FeatureNewKwargs('dependency', '0.52.0', ['include_type'])
+    @FeatureNewKwargs('dependency', '0.50.0', ['not_found_message', 'cmake_module_path', 'cmake_args'])
+    @FeatureNewKwargs('dependency', '0.49.0', ['disabler'])
+    @FeatureNewKwargs('dependency', '0.40.0', ['method'])
+    @FeatureNewKwargs('dependency', '0.38.0', ['default_options'])
+    @disablerIfNotFound
+    @permittedKwargs(permitted_kwargs['dependency'])
+    def func_dependency(self, node, args, kwargs):
+        self.validate_arguments(args, 1, [str])
+        name = args[0]
+        display_name = name if name else '(anonymous)'
+        mods = extract_as_list(kwargs, 'modules')
+        if mods:
+            display_name += ' (modules: {})'.format(', '.join(str(i) for i in mods))
+        not_found_message = kwargs.get('not_found_message', '')
+        if not isinstance(not_found_message, str):
+            raise InvalidArguments('The not_found_message must be a string.')
+        try:
+            d = self.dependency_impl(name, display_name, kwargs)
+        except Exception:
+            if not_found_message:
+                self.message_impl([not_found_message])
+            raise
+        if not d.found() and not_found_message:
+            self.message_impl([not_found_message])
+            self.message_impl([not_found_message])
+        # Override this dependency to have consistent results in subsequent
+        # dependency lookups.
+        if name and d.found():
+            for_machine = self.machine_from_native_kwarg(kwargs)
+            identifier = dependencies.get_dep_identifier(name, kwargs)
+            if identifier not in self.build.dependency_overrides[for_machine]:
+                self.build.dependency_overrides[for_machine][identifier] = \
+                    build.DependencyOverride(d.held_object, node, explicit=False)
+        return d
+
+    def dependency_impl(self, name, display_name, kwargs):
+        disabled, required, feature = extract_required_kwarg(kwargs, self.subproject)
+        if disabled:
+            mlog.log('Dependency', mlog.bold(display_name), 'skipped: feature', mlog.bold(feature), 'disabled')
+            return self.notfound_dependency()
+
+        has_fallback = 'fallback' in kwargs
+        if not has_fallback and name:
+            # Add an implicit fallback if we have a wrap file or a directory with the same name,
+            # but only if this dependency is required. It is common to first check for a pkg-config,
+            # then fallback to use find_library() and only afterward check again the dependency
+            # with a fallback. If the fallback has already been configured then we have to use it
+            # even if the dependency is not required.
+            provider = self.environment.wrap_resolver.find_dep_provider(name)
+            dirname = mesonlib.listify(provider)[0]
+            if provider and (required or self.get_subproject(dirname)):
+                kwargs['fallback'] = provider
+                has_fallback = True
+
+        if 'default_options' in kwargs and not has_fallback:
+            mlog.warning('The "default_options" keyworg argument does nothing without a "fallback" keyword argument.',
+                         location=self.current_node)
+
+        # writing just "dependency('')" is an error, because it can only fail
+        if name == '' and required and not has_fallback:
+            raise InvalidArguments('Dependency is both required and not-found')
+
+        if '<' in name or '>' in name or '=' in name:
+            raise InvalidArguments('Characters <, > and = are forbidden in dependency names. To specify'
+                                   'version\n requirements use the \'version\' keyword argument instead.')
+
+        identifier, cached_dep = self._find_cached_dep(name, display_name, kwargs)
+        if cached_dep:
+            if has_fallback:
+                dirname, varname = self.get_subproject_infos(kwargs)
+                self.verify_fallback_consistency(dirname, varname, cached_dep)
+            if required and not cached_dep.found():
+                m = 'Dependency {!r} was already checked and was not found'
+                raise DependencyException(m.format(display_name))
+            return DependencyHolder(cached_dep, self.subproject)
+
+        # If the dependency has already been configured, possibly by
+        # a higher level project, try to use it first.
+        if has_fallback:
+            dirname, varname = self.get_subproject_infos(kwargs)
+            if self.get_subproject(dirname):
+                return self.get_subproject_dep(name, display_name, dirname, varname, kwargs)
+
+        wrap_mode = self.coredata.get_builtin_option('wrap_mode')
+        force_fallback_for = self.coredata.get_builtin_option('force_fallback_for')
+        forcefallback = has_fallback and (wrap_mode == WrapMode.forcefallback or \
+                                          name in force_fallback_for or \
+                                          dirname in force_fallback_for)
+        if name != '' and not forcefallback:
+            self._handle_featurenew_dependencies(name)
+            kwargs['required'] = required and not has_fallback
+            dep = dependencies.find_external_dependency(name, self.environment, kwargs)
+            kwargs['required'] = required
+            # Only store found-deps in the cache
+            # Never add fallback deps to self.coredata.deps since we
+            # cannot cache them. They must always be evaluated else
+            # we won't actually read all the build files.
+            if dep.found():
+                for_machine = self.machine_from_native_kwarg(kwargs)
+                self.coredata.deps[for_machine].put(identifier, dep)
+                return DependencyHolder(dep, self.subproject)
+
+        if has_fallback:
+            return self.dependency_fallback(name, display_name, kwargs)
+
+        return self.notfound_dependency()
+
+    @FeatureNew('disabler', '0.44.0')
+    @noKwargs
+    @noPosargs
+    def func_disabler(self, node, args, kwargs):
+        return Disabler()
+
+    def print_nested_info(self, dependency_name):
+        message = ['Dependency', mlog.bold(dependency_name), 'not found but it is available in a sub-subproject.\n' +
+                   'To use it in the current project, promote it by going in the project source\n'
+                   'root and issuing']
+        sprojs = mesonlib.detect_subprojects('subprojects', self.source_root)
+        if dependency_name not in sprojs:
+            return
+        found = sprojs[dependency_name]
+        if len(found) > 1:
+            message.append('one of the following commands:')
+        else:
+            message.append('the following command:')
+        command_templ = '\nmeson wrap promote {}'
+        for l in found:
+            message.append(mlog.bold(command_templ.format(l[len(self.source_root) + 1:])))
+        mlog.warning(*message, location=self.current_node)
+
+    def get_subproject_infos(self, kwargs):
+        fbinfo = mesonlib.stringlistify(kwargs['fallback'])
+        if len(fbinfo) == 1:
+            FeatureNew.single_use('Fallback without variable name', '0.53.0', self.subproject)
+            return fbinfo[0], None
+        elif len(fbinfo) != 2:
+            raise InterpreterException('Fallback info must have one or two items.')
+        return fbinfo
+
+    def dependency_fallback(self, name, display_name, kwargs):
+        dirname, varname = self.get_subproject_infos(kwargs)
+        required = kwargs.get('required', True)
+
+        # Explicitly listed fallback preferences for specific subprojects
+        # take precedence over wrap-mode
+        force_fallback_for = self.coredata.get_builtin_option('force_fallback_for')
+        if name in force_fallback_for or dirname in force_fallback_for:
+            mlog.log('Looking for a fallback subproject for the dependency',
+                     mlog.bold(display_name), 'because:\nUse of fallback was forced for that specific subproject')
+        elif self.coredata.get_builtin_option('wrap_mode') == WrapMode.nofallback:
+            mlog.log('Not looking for a fallback subproject for the dependency',
+                     mlog.bold(display_name), 'because:\nUse of fallback '
+                     'dependencies is disabled.')
+            if required:
+                m = 'Dependency {!r} not found and fallback is disabled'
+                raise DependencyException(m.format(display_name))
+            return self.notfound_dependency()
+        elif self.coredata.get_builtin_option('wrap_mode') == WrapMode.forcefallback:
+            mlog.log('Looking for a fallback subproject for the dependency',
+                     mlog.bold(display_name), 'because:\nUse of fallback dependencies is forced.')
+        else:
+            mlog.log('Looking for a fallback subproject for the dependency',
+                     mlog.bold(display_name))
+        sp_kwargs = {
+            'default_options': kwargs.get('default_options', []),
+            'required': required,
+        }
+        self.do_subproject(dirname, 'meson', sp_kwargs)
+        return self.get_subproject_dep(name, display_name, dirname, varname, kwargs)
+
+    @FeatureNewKwargs('executable', '0.42.0', ['implib'])
+    @permittedKwargs(permitted_kwargs['executable'])
+    def func_executable(self, node, args, kwargs):
+        return self.build_target(node, args, kwargs, ExecutableHolder)
+
+    @permittedKwargs(permitted_kwargs['static_library'])
+    def func_static_lib(self, node, args, kwargs):
+        return self.build_target(node, args, kwargs, StaticLibraryHolder)
+
+    @permittedKwargs(permitted_kwargs['shared_library'])
+    def func_shared_lib(self, node, args, kwargs):
+        holder = self.build_target(node, args, kwargs, SharedLibraryHolder)
+        holder.held_object.shared_library_only = True
+        return holder
+
+    @permittedKwargs(permitted_kwargs['both_libraries'])
+    def func_both_lib(self, node, args, kwargs):
+        return self.build_both_libraries(node, args, kwargs)
+
+    @FeatureNew('shared_module', '0.37.0')
+    @permittedKwargs(permitted_kwargs['shared_module'])
+    def func_shared_module(self, node, args, kwargs):
+        return self.build_target(node, args, kwargs, SharedModuleHolder)
+
+    @permittedKwargs(permitted_kwargs['library'])
+    def func_library(self, node, args, kwargs):
+        return self.build_library(node, args, kwargs)
+
+    @permittedKwargs(permitted_kwargs['jar'])
+    def func_jar(self, node, args, kwargs):
+        return self.build_target(node, args, kwargs, JarHolder)
+
+    @FeatureNewKwargs('build_target', '0.40.0', ['link_whole', 'override_options'])
+    @permittedKwargs(permitted_kwargs['build_target'])
+    def func_build_target(self, node, args, kwargs):
+        if 'target_type' not in kwargs:
+            raise InterpreterException('Missing target_type keyword argument')
+        target_type = kwargs.pop('target_type')
+        if target_type == 'executable':
+            return self.build_target(node, args, kwargs, ExecutableHolder)
+        elif target_type == 'shared_library':
+            return self.build_target(node, args, kwargs, SharedLibraryHolder)
+        elif target_type == 'shared_module':
+            FeatureNew('build_target(target_type: \'shared_module\')',
+                       '0.51.0').use(self.subproject)
+            return self.build_target(node, args, kwargs, SharedModuleHolder)
+        elif target_type == 'static_library':
+            return self.build_target(node, args, kwargs, StaticLibraryHolder)
+        elif target_type == 'both_libraries':
+            return self.build_both_libraries(node, args, kwargs)
+        elif target_type == 'library':
+            return self.build_library(node, args, kwargs)
+        elif target_type == 'jar':
+            return self.build_target(node, args, kwargs, JarHolder)
+        else:
+            raise InterpreterException('Unknown target_type.')
+
+    @permittedKwargs(permitted_kwargs['vcs_tag'])
+    @FeatureDeprecatedKwargs('custom_target', '0.47.0', ['build_always'],
+                             'combine build_by_default and build_always_stale instead.')
+    def func_vcs_tag(self, node, args, kwargs):
+        if 'input' not in kwargs or 'output' not in kwargs:
+            raise InterpreterException('Keyword arguments input and output must exist')
+        if 'fallback' not in kwargs:
+            FeatureNew.single_use('Optional fallback in vcs_tag', '0.41.0', self.subproject)
+        fallback = kwargs.pop('fallback', self.project_version)
+        if not isinstance(fallback, str):
+            raise InterpreterException('Keyword argument fallback must be a string.')
+        replace_string = kwargs.pop('replace_string', '@VCS_TAG@')
+        regex_selector = '(.*)' # default regex selector for custom command: use complete output
+        vcs_cmd = kwargs.get('command', None)
+        if vcs_cmd and not isinstance(vcs_cmd, list):
+            vcs_cmd = [vcs_cmd]
+        source_dir = os.path.normpath(os.path.join(self.environment.get_source_dir(), self.subdir))
+        if vcs_cmd:
+            # Is the command an executable in path or maybe a script in the source tree?
+            vcs_cmd[0] = shutil.which(vcs_cmd[0]) or os.path.join(source_dir, vcs_cmd[0])
+        else:
+            vcs = mesonlib.detect_vcs(source_dir)
+            if vcs:
+                mlog.log('Found %s repository at %s' % (vcs['name'], vcs['wc_dir']))
+                vcs_cmd = vcs['get_rev'].split()
+                regex_selector = vcs['rev_regex']
+            else:
+                vcs_cmd = [' '] # executing this cmd will fail in vcstagger.py and force to use the fallback string
+        # vcstagger.py parameters: infile, outfile, fallback, source_dir, replace_string, regex_selector, command...
+        kwargs['command'] = self.environment.get_build_command() + \
+            ['--internal',
+             'vcstagger',
+             '@INPUT0@',
+             '@OUTPUT0@',
+             fallback,
+             source_dir,
+             replace_string,
+             regex_selector] + vcs_cmd
+        kwargs.setdefault('build_by_default', True)
+        kwargs.setdefault('build_always_stale', True)
+        return self._func_custom_target_impl(node, [kwargs['output']], kwargs)
+
+    @FeatureNew('subdir_done', '0.46.0')
+    @stringArgs
+    def func_subdir_done(self, node, args, kwargs):
+        if len(kwargs) > 0:
+            raise InterpreterException('exit does not take named arguments')
+        if len(args) > 0:
+            raise InterpreterException('exit does not take any arguments')
+        raise SubdirDoneRequest()
+
+    @stringArgs
+    @FeatureNewKwargs('custom_target', '0.48.0', ['console'])
+    @FeatureNewKwargs('custom_target', '0.47.0', ['install_mode', 'build_always_stale'])
+    @FeatureNewKwargs('custom_target', '0.40.0', ['build_by_default'])
+    @permittedKwargs(permitted_kwargs['custom_target'])
+    def func_custom_target(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('custom_target: Only one positional argument is allowed, and it must be a string name')
+        if 'depfile' in kwargs and ('@BASENAME@' in kwargs['depfile'] or '@PLAINNAME@' in kwargs['depfile']):
+            FeatureNew.single_use('substitutions in custom_target depfile', '0.47.0', self.subproject)
+        return self._func_custom_target_impl(node, args, kwargs)
+
+    def _func_custom_target_impl(self, node, args, kwargs):
+        'Implementation-only, without FeatureNew checks, for internal use'
+        name = args[0]
+        kwargs['install_mode'] = self._get_kwarg_install_mode(kwargs)
+        if 'input' in kwargs:
+            try:
+                kwargs['input'] = self.source_strings_to_files(extract_as_list(kwargs, 'input'))
+            except mesonlib.MesonException:
+                mlog.warning('''Custom target input \'%s\' can\'t be converted to File object(s).
+This will become a hard error in the future.''' % kwargs['input'], location=self.current_node)
+        tg = CustomTargetHolder(build.CustomTarget(name, self.subdir, self.subproject, kwargs, backend=self.backend), self)
+        self.add_target(name, tg.held_object)
+        return tg
+
+    @permittedKwargs(permitted_kwargs['run_target'])
+    def func_run_target(self, node, args, kwargs):
+        if len(args) > 1:
+            raise InvalidCode('Run_target takes only one positional argument: the target name.')
+        elif len(args) == 1:
+            if 'command' not in kwargs:
+                raise InterpreterException('Missing "command" keyword argument')
+            all_args = extract_as_list(kwargs, 'command')
+            deps = unholder(extract_as_list(kwargs, 'depends'))
+        else:
+            raise InterpreterException('Run_target needs at least one positional argument.')
+
+        cleaned_args = []
+        for i in unholder(listify(all_args)):
+            if not isinstance(i, (str, build.BuildTarget, build.CustomTarget, dependencies.ExternalProgram, mesonlib.File)):
+                mlog.debug('Wrong type:', str(i))
+                raise InterpreterException('Invalid argument to run_target.')
+            if isinstance(i, dependencies.ExternalProgram) and not i.found():
+                raise InterpreterException('Tried to use non-existing executable {!r}'.format(i.name))
+            cleaned_args.append(i)
+        name = args[0]
+        if not isinstance(name, str):
+            raise InterpreterException('First argument must be a string.')
+        cleaned_deps = []
+        for d in deps:
+            if not isinstance(d, (build.BuildTarget, build.CustomTarget)):
+                raise InterpreterException('Depends items must be build targets.')
+            cleaned_deps.append(d)
+        command, *cmd_args = cleaned_args
+        tg = RunTargetHolder(build.RunTarget(name, command, cmd_args, cleaned_deps, self.subdir, self.subproject), self)
+        self.add_target(name, tg.held_object)
+        full_name = (self.subproject, name)
+        assert(full_name not in self.build.run_target_names)
+        self.build.run_target_names.add(full_name)
+        return tg
+
+    @FeatureNew('alias_target', '0.52.0')
+    @noKwargs
+    def func_alias_target(self, node, args, kwargs):
+        if len(args) < 2:
+            raise InvalidCode('alias_target takes at least 2 arguments.')
+        name = args[0]
+        if not isinstance(name, str):
+            raise InterpreterException('First argument must be a string.')
+        deps = unholder(listify(args[1:]))
+        for d in deps:
+            if not isinstance(d, (build.BuildTarget, build.CustomTarget)):
+                raise InterpreterException('Depends items must be build targets.')
+        tg = RunTargetHolder(build.AliasTarget(name, deps, self.subdir, self.subproject), self)
+        self.add_target(name, tg.held_object)
+        return tg
+
+    @permittedKwargs(permitted_kwargs['generator'])
+    def func_generator(self, node, args, kwargs):
+        gen = GeneratorHolder(self, args, kwargs)
+        self.generators.append(gen)
+        return gen
+
+    @FeatureNewKwargs('benchmark', '0.46.0', ['depends'])
+    @FeatureNewKwargs('benchmark', '0.52.0', ['priority'])
+    @permittedKwargs(permitted_kwargs['benchmark'])
+    def func_benchmark(self, node, args, kwargs):
+        # is_parallel isn't valid here, so make sure it isn't passed
+        if 'is_parallel' in kwargs:
+            del kwargs['is_parallel']
+        self.add_test(node, args, kwargs, False)
+
+    @FeatureNewKwargs('test', '0.46.0', ['depends'])
+    @FeatureNewKwargs('test', '0.52.0', ['priority'])
+    @permittedKwargs(permitted_kwargs['test'])
+    def func_test(self, node, args, kwargs):
+        if kwargs.get('protocol') == 'gtest':
+            FeatureNew.single_use('"gtest" protocol for tests', '0.55.0', self.subproject)
+        self.add_test(node, args, kwargs, True)
+
+    def unpack_env_kwarg(self, kwargs) -> build.EnvironmentVariables:
+        envlist = kwargs.get('env', EnvironmentVariablesHolder())
+        if isinstance(envlist, EnvironmentVariablesHolder):
+            env = envlist.held_object
+        elif isinstance(envlist, dict):
+            FeatureNew.single_use('environment dictionary', '0.52.0', self.subproject)
+            env = EnvironmentVariablesHolder(envlist)
+            env = env.held_object
+        else:
+            envlist = listify(envlist)
+            # Convert from array to environment object
+            env = EnvironmentVariablesHolder(envlist)
+            env = env.held_object
+        return env
+
+    def add_test(self, node, args, kwargs, is_base_test):
+        if len(args) != 2:
+            raise InterpreterException('test expects 2 arguments, {} given'.format(len(args)))
+        if not isinstance(args[0], str):
+            raise InterpreterException('First argument of test must be a string.')
+        exe = args[1]
+        if not isinstance(exe, (ExecutableHolder, JarHolder, ExternalProgramHolder)):
+            if isinstance(exe, mesonlib.File):
+                exe = self.func_find_program(node, args[1], {})
+            else:
+                raise InterpreterException('Second argument must be executable.')
+        par = kwargs.get('is_parallel', True)
+        if not isinstance(par, bool):
+            raise InterpreterException('Keyword argument is_parallel must be a boolean.')
+        cmd_args = unholder(extract_as_list(kwargs, 'args'))
+        for i in cmd_args:
+            if not isinstance(i, (str, mesonlib.File, build.Target)):
+                raise InterpreterException('Command line arguments must be strings, files or targets.')
+        env = self.unpack_env_kwarg(kwargs)
+        should_fail = kwargs.get('should_fail', False)
+        if not isinstance(should_fail, bool):
+            raise InterpreterException('Keyword argument should_fail must be a boolean.')
+        timeout = kwargs.get('timeout', 30)
+        if 'workdir' in kwargs:
+            workdir = kwargs['workdir']
+            if not isinstance(workdir, str):
+                raise InterpreterException('Workdir keyword argument must be a string.')
+            if not os.path.isabs(workdir):
+                raise InterpreterException('Workdir keyword argument must be an absolute path.')
+        else:
+            workdir = None
+        if not isinstance(timeout, int):
+            raise InterpreterException('Timeout must be an integer.')
+        protocol = kwargs.get('protocol', 'exitcode')
+        if protocol not in {'exitcode', 'tap', 'gtest'}:
+            raise InterpreterException('Protocol must be "exitcode", "tap", or "gtest".')
+        suite = []
+        prj = self.subproject if self.is_subproject() else self.build.project_name
+        for s in mesonlib.stringlistify(kwargs.get('suite', '')):
+            if len(s) > 0:
+                s = ':' + s
+            suite.append(prj.replace(' ', '_').replace(':', '_') + s)
+        depends = unholder(extract_as_list(kwargs, 'depends'))
+        for dep in depends:
+            if not isinstance(dep, (build.CustomTarget, build.BuildTarget)):
+                raise InterpreterException('Depends items must be build targets.')
+        priority = kwargs.get('priority', 0)
+        if not isinstance(priority, int):
+            raise InterpreterException('Keyword argument priority must be an integer.')
+        t = Test(args[0], prj, suite, exe.held_object, depends, par, cmd_args,
+                 env, should_fail, timeout, workdir, protocol, priority)
+        if is_base_test:
+            self.build.tests.append(t)
+            mlog.debug('Adding test', mlog.bold(args[0], True))
+        else:
+            self.build.benchmarks.append(t)
+            mlog.debug('Adding benchmark', mlog.bold(args[0], True))
+
+    @FeatureNewKwargs('install_headers', '0.47.0', ['install_mode'])
+    @permittedKwargs(permitted_kwargs['install_headers'])
+    def func_install_headers(self, node, args, kwargs):
+        source_files = self.source_strings_to_files(args)
+        kwargs['install_mode'] = self._get_kwarg_install_mode(kwargs)
+        h = Headers(source_files, kwargs)
+        self.build.headers.append(h)
+        return h
+
+    @FeatureNewKwargs('install_man', '0.47.0', ['install_mode'])
+    @permittedKwargs(permitted_kwargs['install_man'])
+    def func_install_man(self, node, args, kwargs):
+        fargs = self.source_strings_to_files(args)
+        kwargs['install_mode'] = self._get_kwarg_install_mode(kwargs)
+        m = Man(fargs, kwargs)
+        self.build.man.append(m)
+        return m
+
+    @FeatureNewKwargs('subdir', '0.44.0', ['if_found'])
+    @permittedKwargs(permitted_kwargs['subdir'])
+    def func_subdir(self, node, args, kwargs):
+        self.validate_arguments(args, 1, [str])
+        mesonlib.check_direntry_issues(args)
+        if '..' in args[0]:
+            raise InvalidArguments('Subdir contains ..')
+        if self.subdir == '' and args[0] == self.subproject_dir:
+            raise InvalidArguments('Must not go into subprojects dir with subdir(), use subproject() instead.')
+        if self.subdir == '' and args[0].startswith('meson-'):
+            raise InvalidArguments('The "meson-" prefix is reserved and cannot be used for top-level subdir().')
+        for i in mesonlib.extract_as_list(kwargs, 'if_found'):
+            if not hasattr(i, 'found_method'):
+                raise InterpreterException('Object used in if_found does not have a found method.')
+            if not i.found_method([], {}):
+                return
+        prev_subdir = self.subdir
+        subdir = os.path.join(prev_subdir, args[0])
+        if os.path.isabs(subdir):
+            raise InvalidArguments('Subdir argument must be a relative path.')
+        absdir = os.path.join(self.environment.get_source_dir(), subdir)
+        symlinkless_dir = os.path.realpath(absdir)
+        if symlinkless_dir in self.visited_subdirs:
+            raise InvalidArguments('Tried to enter directory "%s", which has already been visited.'
+                                   % subdir)
+        self.visited_subdirs[symlinkless_dir] = True
+        self.subdir = subdir
+        os.makedirs(os.path.join(self.environment.build_dir, subdir), exist_ok=True)
+        buildfilename = os.path.join(self.subdir, environment.build_filename)
+        self.build_def_files.append(buildfilename)
+        absname = os.path.join(self.environment.get_source_dir(), buildfilename)
+        if not os.path.isfile(absname):
+            self.subdir = prev_subdir
+            raise InterpreterException("Non-existent build file '{!s}'".format(buildfilename))
+        with open(absname, encoding='utf8') as f:
+            code = f.read()
+        assert(isinstance(code, str))
+        try:
+            codeblock = mparser.Parser(code, absname).parse()
+        except mesonlib.MesonException as me:
+            me.file = absname
+            raise me
+        try:
+            self.evaluate_codeblock(codeblock)
+        except SubdirDoneRequest:
+            pass
+        self.subdir = prev_subdir
+
+    def _get_kwarg_install_mode(self, kwargs):
+        if kwargs.get('install_mode', None) is None:
+            return None
+        install_mode = []
+        mode = mesonlib.typeslistify(kwargs.get('install_mode', []), (str, int))
+        for m in mode:
+            # We skip any arguments that are set to `false`
+            if m is False:
+                m = None
+            install_mode.append(m)
+        if len(install_mode) > 3:
+            raise InvalidArguments('Keyword argument install_mode takes at '
+                                   'most 3 arguments.')
+        if len(install_mode) > 0 and install_mode[0] is not None and \
+           not isinstance(install_mode[0], str):
+            raise InvalidArguments('Keyword argument install_mode requires the '
+                                   'permissions arg to be a string or false')
+        return FileMode(*install_mode)
+
+    @FeatureNewKwargs('install_data', '0.46.0', ['rename'])
+    @FeatureNewKwargs('install_data', '0.38.0', ['install_mode'])
+    @permittedKwargs(permitted_kwargs['install_data'])
+    def func_install_data(self, node, args, kwargs):
+        kwsource = mesonlib.stringlistify(kwargs.get('sources', []))
+        raw_sources = args + kwsource
+        sources = []
+        source_strings = []
+        for s in raw_sources:
+            if isinstance(s, mesonlib.File):
+                sources.append(s)
+            elif isinstance(s, str):
+                source_strings.append(s)
+            else:
+                raise InvalidArguments('Argument must be string or file.')
+        sources += self.source_strings_to_files(source_strings)
+        install_dir = kwargs.get('install_dir', None)
+        if not isinstance(install_dir, (str, type(None))):
+            raise InvalidArguments('Keyword argument install_dir not a string.')
+        install_mode = self._get_kwarg_install_mode(kwargs)
+        rename = kwargs.get('rename', None)
+        data = DataHolder(build.Data(sources, install_dir, install_mode, rename))
+        self.build.data.append(data.held_object)
+        return data
+
+    @FeatureNewKwargs('install_subdir', '0.42.0', ['exclude_files', 'exclude_directories'])
+    @FeatureNewKwargs('install_subdir', '0.38.0', ['install_mode'])
+    @permittedKwargs(permitted_kwargs['install_subdir'])
+    @stringArgs
+    def func_install_subdir(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InvalidArguments('Install_subdir requires exactly one argument.')
+        subdir = args[0]
+        if 'install_dir' not in kwargs:
+            raise InvalidArguments('Missing keyword argument install_dir')
+        install_dir = kwargs['install_dir']
+        if not isinstance(install_dir, str):
+            raise InvalidArguments('Keyword argument install_dir not a string.')
+        if 'strip_directory' in kwargs:
+            if not isinstance(kwargs['strip_directory'], bool):
+                raise InterpreterException('"strip_directory" keyword must be a boolean.')
+            strip_directory = kwargs['strip_directory']
+        else:
+            strip_directory = False
+        if 'exclude_files' in kwargs:
+            exclude = extract_as_list(kwargs, 'exclude_files')
+            for f in exclude:
+                if not isinstance(f, str):
+                    raise InvalidArguments('Exclude argument not a string.')
+                elif os.path.isabs(f):
+                    raise InvalidArguments('Exclude argument cannot be absolute.')
+            exclude_files = set(exclude)
+        else:
+            exclude_files = set()
+        if 'exclude_directories' in kwargs:
+            exclude = extract_as_list(kwargs, 'exclude_directories')
+            for d in exclude:
+                if not isinstance(d, str):
+                    raise InvalidArguments('Exclude argument not a string.')
+                elif os.path.isabs(d):
+                    raise InvalidArguments('Exclude argument cannot be absolute.')
+            exclude_directories = set(exclude)
+        else:
+            exclude_directories = set()
+        exclude = (exclude_files, exclude_directories)
+        install_mode = self._get_kwarg_install_mode(kwargs)
+        idir = InstallDir(self.subdir, subdir, install_dir, install_mode, exclude, strip_directory)
+        self.build.install_dirs.append(idir)
+        return idir
+
+    @FeatureNewKwargs('configure_file', '0.47.0', ['copy', 'output_format', 'install_mode', 'encoding'])
+    @FeatureNewKwargs('configure_file', '0.46.0', ['format'])
+    @FeatureNewKwargs('configure_file', '0.41.0', ['capture'])
+    @FeatureNewKwargs('configure_file', '0.50.0', ['install'])
+    @FeatureNewKwargs('configure_file', '0.52.0', ['depfile'])
+    @permittedKwargs(permitted_kwargs['configure_file'])
+    def func_configure_file(self, node, args, kwargs):
+        if len(args) > 0:
+            raise InterpreterException("configure_file takes only keyword arguments.")
+        if 'output' not in kwargs:
+            raise InterpreterException('Required keyword argument "output" not defined.')
+        actions = set(['configuration', 'command', 'copy']).intersection(kwargs.keys())
+        if len(actions) == 0:
+            raise InterpreterException('Must specify an action with one of these '
+                                       'keyword arguments: \'configuration\', '
+                                       '\'command\', or \'copy\'.')
+        elif len(actions) == 2:
+            raise InterpreterException('Must not specify both {!r} and {!r} '
+                                       'keyword arguments since they are '
+                                       'mutually exclusive.'.format(*actions))
+        elif len(actions) == 3:
+            raise InterpreterException('Must specify one of {!r}, {!r}, and '
+                                       '{!r} keyword arguments since they are '
+                                       'mutually exclusive.'.format(*actions))
+        if 'capture' in kwargs:
+            if not isinstance(kwargs['capture'], bool):
+                raise InterpreterException('"capture" keyword must be a boolean.')
+            if 'command' not in kwargs:
+                raise InterpreterException('"capture" keyword requires "command" keyword.')
+
+        if 'format' in kwargs:
+            fmt = kwargs['format']
+            if not isinstance(fmt, str):
+                raise InterpreterException('"format" keyword must be a string.')
+        else:
+            fmt = 'meson'
+
+        if fmt not in ('meson', 'cmake', 'cmake@'):
+            raise InterpreterException('"format" possible values are "meson", "cmake" or "cmake@".')
+
+        if 'output_format' in kwargs:
+            output_format = kwargs['output_format']
+            if not isinstance(output_format, str):
+                raise InterpreterException('"output_format" keyword must be a string.')
+        else:
+            output_format = 'c'
+
+        if output_format not in ('c', 'nasm'):
+            raise InterpreterException('"format" possible values are "c" or "nasm".')
+
+        if 'depfile' in kwargs:
+            depfile = kwargs['depfile']
+            if not isinstance(depfile, str):
+                raise InterpreterException('depfile file name must be a string')
+        else:
+            depfile = None
+
+        # Validate input
+        inputs = self.source_strings_to_files(extract_as_list(kwargs, 'input'))
+        inputs_abs = []
+        for f in inputs:
+            if isinstance(f, mesonlib.File):
+                inputs_abs.append(f.absolute_path(self.environment.source_dir,
+                                                  self.environment.build_dir))
+                self.add_build_def_file(f)
+            else:
+                raise InterpreterException('Inputs can only be strings or file objects')
+        # Validate output
+        output = kwargs['output']
+        if not isinstance(output, str):
+            raise InterpreterException('Output file name must be a string')
+        if inputs_abs:
+            values = mesonlib.get_filenames_templates_dict(inputs_abs, None)
+            outputs = mesonlib.substitute_values([output], values)
+            output = outputs[0]
+            if depfile:
+                depfile = mesonlib.substitute_values([depfile], values)[0]
+        ofile_rpath = os.path.join(self.subdir, output)
+        if ofile_rpath in self.configure_file_outputs:
+            mesonbuildfile = os.path.join(self.subdir, 'meson.build')
+            current_call = "{}:{}".format(mesonbuildfile, self.current_lineno)
+            first_call = "{}:{}".format(mesonbuildfile, self.configure_file_outputs[ofile_rpath])
+            mlog.warning('Output file', mlog.bold(ofile_rpath, True), 'for configure_file() at', current_call, 'overwrites configure_file() output at', first_call)
+        else:
+            self.configure_file_outputs[ofile_rpath] = self.current_lineno
+        if os.path.dirname(output) != '':
+            raise InterpreterException('Output file name must not contain a subdirectory.')
+        (ofile_path, ofile_fname) = os.path.split(os.path.join(self.subdir, output))
+        ofile_abs = os.path.join(self.environment.build_dir, ofile_path, ofile_fname)
+        # Perform the appropriate action
+        if 'configuration' in kwargs:
+            conf = kwargs['configuration']
+            if isinstance(conf, dict):
+                FeatureNew.single_use('configure_file.configuration dictionary', '0.49.0', self.subproject)
+                conf = ConfigurationDataHolder(self.subproject, conf)
+            elif not isinstance(conf, ConfigurationDataHolder):
+                raise InterpreterException('Argument "configuration" is not of type configuration_data')
+            mlog.log('Configuring', mlog.bold(output), 'using configuration')
+            if len(inputs) > 1:
+                raise InterpreterException('At most one input file can given in configuration mode')
+            if inputs:
+                os.makedirs(os.path.join(self.environment.build_dir, self.subdir), exist_ok=True)
+                file_encoding = kwargs.setdefault('encoding', 'utf-8')
+                missing_variables, confdata_useless = \
+                    mesonlib.do_conf_file(inputs_abs[0], ofile_abs, conf.held_object,
+                                          fmt, file_encoding)
+                if missing_variables:
+                    var_list = ", ".join(map(repr, sorted(missing_variables)))
+                    mlog.warning(
+                        "The variable(s) %s in the input file '%s' are not "
+                        "present in the given configuration data." % (
+                            var_list, inputs[0]), location=node)
+                if confdata_useless:
+                    ifbase = os.path.basename(inputs_abs[0])
+                    mlog.warning('Got an empty configuration_data() object and found no '
+                                 'substitutions in the input file {!r}. If you want to '
+                                 'copy a file to the build dir, use the \'copy:\' keyword '
+                                 'argument added in 0.47.0'.format(ifbase), location=node)
+            else:
+                mesonlib.dump_conf_header(ofile_abs, conf.held_object, output_format)
+            conf.mark_used()
+        elif 'command' in kwargs:
+            if len(inputs) > 1:
+                FeatureNew.single_use('multiple inputs in configure_file()', '0.52.0', self.subproject)
+            # We use absolute paths for input and output here because the cwd
+            # that the command is run from is 'unspecified', so it could change.
+            # Currently it's builddir/subdir for in_builddir else srcdir/subdir.
+            values = mesonlib.get_filenames_templates_dict(inputs_abs, [ofile_abs])
+            if depfile:
+                depfile = os.path.join(self.environment.get_scratch_dir(), depfile)
+                values['@DEPFILE@'] = depfile
+            # Substitute @INPUT@, @OUTPUT@, etc here.
+            cmd = mesonlib.substitute_values(kwargs['command'], values)
+            mlog.log('Configuring', mlog.bold(output), 'with command')
+            res = self.run_command_impl(node, cmd,  {}, True)
+            if res.returncode != 0:
+                raise InterpreterException('Running configure command failed.\n%s\n%s' %
+                                           (res.stdout, res.stderr))
+            if 'capture' in kwargs and kwargs['capture']:
+                dst_tmp = ofile_abs + '~'
+                file_encoding = kwargs.setdefault('encoding', 'utf-8')
+                with open(dst_tmp, 'w', encoding=file_encoding) as f:
+                    f.writelines(res.stdout)
+                if inputs_abs:
+                    shutil.copymode(inputs_abs[0], dst_tmp)
+                mesonlib.replace_if_different(ofile_abs, dst_tmp)
+            if depfile:
+                mlog.log('Reading depfile:', mlog.bold(depfile))
+                with open(depfile, 'r') as f:
+                    df = DepFile(f.readlines())
+                    deps = df.get_all_dependencies(ofile_fname)
+                    for dep in deps:
+                        self.add_build_def_file(dep)
+
+        elif 'copy' in kwargs:
+            if len(inputs_abs) != 1:
+                raise InterpreterException('Exactly one input file must be given in copy mode')
+            os.makedirs(os.path.join(self.environment.build_dir, self.subdir), exist_ok=True)
+            shutil.copyfile(inputs_abs[0], ofile_abs)
+            shutil.copystat(inputs_abs[0], ofile_abs)
+        else:
+            # Not reachable
+            raise AssertionError
+        # Install file if requested, we check for the empty string
+        # for backwards compatibility. That was the behaviour before
+        # 0.45.0 so preserve it.
+        idir = kwargs.get('install_dir', '')
+        if idir is False:
+            idir = ''
+            mlog.deprecation('Please use the new `install:` kwarg instead of passing '
+                             '`false` to `install_dir:`', location=node)
+        if not isinstance(idir, str):
+            if isinstance(idir, list) and len(idir) == 0:
+                mlog.deprecation('install_dir: kwarg must be a string and not an empty array. '
+                                 'Please use the install: kwarg to enable or disable installation. '
+                                 'This will be a hard error in the next release.')
+            else:
+                raise InterpreterException('"install_dir" must be a string')
+        install = kwargs.get('install', idir != '')
+        if not isinstance(install, bool):
+            raise InterpreterException('"install" must be a boolean')
+        if install:
+            if not idir:
+                raise InterpreterException('"install_dir" must be specified '
+                                           'when "install" in a configure_file '
+                                           'is true')
+            cfile = mesonlib.File.from_built_file(ofile_path, ofile_fname)
+            install_mode = self._get_kwarg_install_mode(kwargs)
+            self.build.data.append(build.Data([cfile], idir, install_mode))
+        return mesonlib.File.from_built_file(self.subdir, output)
+
+    def extract_incdirs(self, kwargs):
+        prospectives = unholder(extract_as_list(kwargs, 'include_directories'))
+        result = []
+        for p in prospectives:
+            if isinstance(p, build.IncludeDirs):
+                result.append(p)
+            elif isinstance(p, str):
+                result.append(self.build_incdir_object([p]).held_object)
+            else:
+                raise InterpreterException('Include directory objects can only be created from strings or include directories.')
+        return result
+
+    @permittedKwargs(permitted_kwargs['include_directories'])
+    @stringArgs
+    def func_include_directories(self, node, args, kwargs):
+        return self.build_incdir_object(args, kwargs.get('is_system', False))
+
+    def build_incdir_object(self, incdir_strings, is_system=False):
+        if not isinstance(is_system, bool):
+            raise InvalidArguments('Is_system must be boolean.')
+        src_root = self.environment.get_source_dir()
+        build_root = self.environment.get_build_dir()
+        absbase_src = os.path.join(src_root, self.subdir)
+        absbase_build = os.path.join(build_root, self.subdir)
+
+        for a in incdir_strings:
+            if a.startswith(src_root):
+                raise InvalidArguments('Tried to form an absolute path to a source dir. '
+                                       'You should not do that but use relative paths instead.'
+                                       '''
+
+To get include path to any directory relative to the current dir do
+
+incdir = include_directories(dirname)
+
+After this incdir will contain both the current source dir as well as the
+corresponding build dir. It can then be used in any subdirectory and
+Meson will take care of all the busywork to make paths work.
+
+Dirname can even be '.' to mark the current directory. Though you should
+remember that the current source and build directories are always
+put in the include directories by default so you only need to do
+include_directories('.') if you intend to use the result in a
+different subdirectory.
+''')
+            absdir_src = os.path.join(absbase_src, a)
+            absdir_build = os.path.join(absbase_build, a)
+            if not os.path.isdir(absdir_src) and not os.path.isdir(absdir_build):
+                raise InvalidArguments('Include dir %s does not exist.' % a)
+        i = IncludeDirsHolder(build.IncludeDirs(self.subdir, incdir_strings, is_system))
+        return i
+
+    @permittedKwargs(permitted_kwargs['add_test_setup'])
+    @stringArgs
+    def func_add_test_setup(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Add_test_setup needs one argument for the setup name.')
+        setup_name = args[0]
+        if re.fullmatch('([_a-zA-Z][_0-9a-zA-Z]*:)?[_a-zA-Z][_0-9a-zA-Z]*', setup_name) is None:
+            raise InterpreterException('Setup name may only contain alphanumeric characters.')
+        if ":" not in setup_name:
+            setup_name = (self.subproject if self.subproject else self.build.project_name) + ":" + setup_name
+        try:
+            inp = unholder(extract_as_list(kwargs, 'exe_wrapper'))
+            exe_wrapper = []
+            for i in inp:
+                if isinstance(i, str):
+                    exe_wrapper.append(i)
+                elif isinstance(i, dependencies.ExternalProgram):
+                    if not i.found():
+                        raise InterpreterException('Tried to use non-found executable.')
+                    exe_wrapper += i.get_command()
+                else:
+                    raise InterpreterException('Exe wrapper can only contain strings or external binaries.')
+        except KeyError:
+            exe_wrapper = None
+        gdb = kwargs.get('gdb', False)
+        if not isinstance(gdb, bool):
+            raise InterpreterException('Gdb option must be a boolean')
+        timeout_multiplier = kwargs.get('timeout_multiplier', 1)
+        if not isinstance(timeout_multiplier, int):
+            raise InterpreterException('Timeout multiplier must be a number.')
+        is_default = kwargs.get('is_default', False)
+        if not isinstance(is_default, bool):
+            raise InterpreterException('is_default option must be a boolean')
+        if is_default:
+            if self.build.test_setup_default_name is not None:
+                raise InterpreterException('\'%s\' is already set as default. '
+                                           'is_default can be set to true only once' % self.build.test_setup_default_name)
+            self.build.test_setup_default_name = setup_name
+        env = self.unpack_env_kwarg(kwargs)
+        self.build.test_setups[setup_name] = build.TestSetup(exe_wrapper, gdb, timeout_multiplier, env)
+
+    @permittedKwargs(permitted_kwargs['add_global_arguments'])
+    @stringArgs
+    def func_add_global_arguments(self, node, args, kwargs):
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        self.add_global_arguments(node, self.build.global_args[for_machine], args, kwargs)
+
+    @permittedKwargs(permitted_kwargs['add_global_link_arguments'])
+    @stringArgs
+    def func_add_global_link_arguments(self, node, args, kwargs):
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        self.add_global_arguments(node, self.build.global_link_args[for_machine], args, kwargs)
+
+    @permittedKwargs(permitted_kwargs['add_project_arguments'])
+    @stringArgs
+    def func_add_project_arguments(self, node, args, kwargs):
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        self.add_project_arguments(node, self.build.projects_args[for_machine], args, kwargs)
+
+    @permittedKwargs(permitted_kwargs['add_project_link_arguments'])
+    @stringArgs
+    def func_add_project_link_arguments(self, node, args, kwargs):
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        self.add_project_arguments(node, self.build.projects_link_args[for_machine], args, kwargs)
+
+    def warn_about_builtin_args(self, args):
+        warnargs = ('/W1', '/W2', '/W3', '/W4', '/Wall', '-Wall', '-Wextra', '-Wpedantic')
+        optargs = ('-O0', '-O2', '-O3', '-Os', '/O1', '/O2', '/Os')
+        for arg in args:
+            if arg in warnargs:
+                mlog.warning('Consider using the built-in warning_level option instead of using "{}".'.format(arg),
+                             location=self.current_node)
+            elif arg in optargs:
+                mlog.warning('Consider using the built-in optimization level instead of using "{}".'.format(arg),
+                             location=self.current_node)
+            elif arg == '-g':
+                mlog.warning('Consider using the built-in debug option instead of using "{}".'.format(arg),
+                             location=self.current_node)
+            elif arg == '-pipe':
+                mlog.warning("You don't need to add -pipe, Meson will use it automatically when it is available.",
+                             location=self.current_node)
+            elif arg.startswith('-fsanitize'):
+                mlog.warning('Consider using the built-in option for sanitizers instead of using "{}".'.format(arg),
+                             location=self.current_node)
+            elif arg.startswith('-std=') or arg.startswith('/std:'):
+                mlog.warning('Consider using the built-in option for language standard version instead of using "{}".'.format(arg),
+                             location=self.current_node)
+
+    def add_global_arguments(self, node, argsdict, args, kwargs):
+        if self.is_subproject():
+            msg = 'Function \'{}\' cannot be used in subprojects because ' \
+                  'there is no way to make that reliable.\nPlease only call ' \
+                  'this if is_subproject() returns false. Alternatively, ' \
+                  'define a variable that\ncontains your language-specific ' \
+                  'arguments and add it to the appropriate *_args kwarg ' \
+                  'in each target.'.format(node.func_name)
+            raise InvalidCode(msg)
+        frozen = self.project_args_frozen or self.global_args_frozen
+        self.add_arguments(node, argsdict, frozen, args, kwargs)
+
+    def add_project_arguments(self, node, argsdict, args, kwargs):
+        if self.subproject not in argsdict:
+            argsdict[self.subproject] = {}
+        self.add_arguments(node, argsdict[self.subproject],
+                           self.project_args_frozen, args, kwargs)
+
+    def add_arguments(self, node, argsdict, args_frozen, args, kwargs):
+        if args_frozen:
+            msg = 'Tried to use \'{}\' after a build target has been declared.\n' \
+                  'This is not permitted. Please declare all ' \
+                  'arguments before your targets.'.format(node.func_name)
+            raise InvalidCode(msg)
+
+        if 'language' not in kwargs:
+            raise InvalidCode('Missing language definition in {}'.format(node.func_name))
+
+        self.warn_about_builtin_args(args)
+
+        for lang in mesonlib.stringlistify(kwargs['language']):
+            lang = lang.lower()
+            argsdict[lang] = argsdict.get(lang, []) + args
+
+    @noKwargs
+    @noArgsFlattening
+    def func_environment(self, node, args, kwargs):
+        if len(args) > 1:
+            raise InterpreterException('environment takes only one optional positional arguments')
+        elif len(args) == 1:
+            FeatureNew.single_use('environment positional arguments', '0.52.0', self.subproject)
+            initial_values = args[0]
+            if not isinstance(initial_values, dict) and not isinstance(initial_values, list):
+                raise InterpreterException('environment first argument must be a dictionary or a list')
+        else:
+            initial_values = {}
+        return EnvironmentVariablesHolder(initial_values)
+
+    @stringArgs
+    @noKwargs
+    def func_join_paths(self, node, args, kwargs):
+        return self.join_path_strings(args)
+
+    def run(self):
+        super().run()
+        mlog.log('Build targets in project:', mlog.bold(str(len(self.build.targets))))
+        FeatureNew.report(self.subproject)
+        FeatureDeprecated.report(self.subproject)
+        if not self.is_subproject():
+            self.print_extra_warnings()
+        if self.subproject == '':
+            self._print_summary()
+
+    def print_extra_warnings(self):
+        # TODO cross compilation
+        for c in self.coredata.compilers.host.values():
+            if c.get_id() == 'clang':
+                self.check_clang_asan_lundef()
+                break
+
+    def check_clang_asan_lundef(self):
+        if 'b_lundef' not in self.coredata.base_options:
+            return
+        if 'b_sanitize' not in self.coredata.base_options:
+            return
+        if (self.coredata.base_options['b_lundef'].value and
+                self.coredata.base_options['b_sanitize'].value != 'none'):
+            mlog.warning('''Trying to use {} sanitizer on Clang with b_lundef.
+This will probably not work.
+Try setting b_lundef to false instead.'''.format(self.coredata.base_options['b_sanitize'].value),
+                         location=self.current_node)
+
+    def evaluate_subproject_info(self, path_from_source_root, subproject_dirname):
+        depth = 0
+        subproj_name = ''
+        segs = PurePath(path_from_source_root).parts
+        segs_spd = PurePath(subproject_dirname).parts
+        while segs and segs[0] == segs_spd[0]:
+            if len(segs_spd) == 1:
+                subproj_name = segs[1]
+                segs = segs[2:]
+                depth += 1
+            else:
+                segs_spd = segs_spd[1:]
+                segs = segs[1:]
+        return (depth, subproj_name)
+
+    # Check that the indicated file is within the same subproject
+    # as we currently are. This is to stop people doing
+    # nasty things like:
+    #
+    # f = files('../../master_src/file.c')
+    #
+    # Note that this is validated only when the file
+    # object is generated. The result can be used in a different
+    # subproject than it is defined in (due to e.g. a
+    # declare_dependency).
+    def validate_within_subproject(self, subdir, fname):
+        norm = os.path.normpath(os.path.join(subdir, fname))
+        if os.path.isabs(norm):
+            if not norm.startswith(self.environment.source_dir):
+                # Grabbing files outside the source tree is ok.
+                # This is for vendor stuff like:
+                #
+                # /opt/vendorsdk/src/file_with_license_restrictions.c
+                return
+            norm = os.path.relpath(norm, self.environment.source_dir)
+            assert(not os.path.isabs(norm))
+        (num_sps, sproj_name) = self.evaluate_subproject_info(norm, self.subproject_dir)
+        plain_filename = os.path.basename(norm)
+        if num_sps == 0:
+            if not self.is_subproject():
+                return
+            raise InterpreterException('Sandbox violation: Tried to grab file %s from a different subproject.' % plain_filename)
+        if num_sps > 1:
+            raise InterpreterException('Sandbox violation: Tried to grab file %s from a nested subproject.' % plain_filename)
+        if sproj_name != self.subproject_directory_name:
+            raise InterpreterException('Sandbox violation: Tried to grab file %s from a different subproject.' % plain_filename)
+
+    def source_strings_to_files(self, sources):
+        results = []
+        mesonlib.check_direntry_issues(sources)
+        if not isinstance(sources, list):
+            sources = [sources]
+        for s in sources:
+            if isinstance(s, (mesonlib.File, GeneratedListHolder,
+                              TargetHolder, CustomTargetIndexHolder,
+                              GeneratedObjectsHolder)):
+                pass
+            elif isinstance(s, str):
+                self.validate_within_subproject(self.subdir, s)
+                s = mesonlib.File.from_source_file(self.environment.source_dir, self.subdir, s)
+            else:
+                raise InterpreterException('Source item is {!r} instead of '
+                                           'string or File-type object'.format(s))
+            results.append(s)
+        return results
+
+    def add_target(self, name, tobj):
+        if name == '':
+            raise InterpreterException('Target name must not be empty.')
+        if name.strip() == '':
+            raise InterpreterException('Target name must not consist only of whitespace.')
+        if name.startswith('meson-'):
+            raise InvalidArguments("Target names starting with 'meson-' are reserved "
+                                   "for Meson's internal use. Please rename.")
+        if name in coredata.forbidden_target_names:
+            raise InvalidArguments("Target name '%s' is reserved for Meson's "
+                                   "internal use. Please rename." % name)
+        # To permit an executable and a shared library to have the
+        # same name, such as "foo.exe" and "libfoo.a".
+        idname = tobj.get_id()
+        if idname in self.build.targets:
+            raise InvalidCode('Tried to create target "%s", but a target of that name already exists.' % name)
+        self.build.targets[idname] = tobj
+        if idname not in self.coredata.target_guids:
+            self.coredata.target_guids[idname] = str(uuid.uuid4()).upper()
+
+    @FeatureNew('both_libraries', '0.46.0')
+    def build_both_libraries(self, node, args, kwargs):
+        shared_holder = self.build_target(node, args, kwargs, SharedLibraryHolder)
+
+        # Check if user forces non-PIC static library.
+        pic = True
+        if 'pic' in kwargs:
+            pic = kwargs['pic']
+        elif 'b_staticpic' in self.environment.coredata.base_options:
+            pic = self.environment.coredata.base_options['b_staticpic'].value
+
+        if pic:
+            # Exclude sources from args and kwargs to avoid building them twice
+            static_args = [args[0]]
+            static_kwargs = kwargs.copy()
+            static_kwargs['sources'] = []
+            static_kwargs['objects'] = shared_holder.held_object.extract_all_objects()
+        else:
+            static_args = args
+            static_kwargs = kwargs
+
+        static_holder = self.build_target(node, static_args, static_kwargs, StaticLibraryHolder)
+
+        return BothLibrariesHolder(shared_holder, static_holder, self)
+
+    def build_library(self, node, args, kwargs):
+        default_library = self.coredata.get_builtin_option('default_library', self.subproject)
+        if default_library == 'shared':
+            return self.build_target(node, args, kwargs, SharedLibraryHolder)
+        elif default_library == 'static':
+            return self.build_target(node, args, kwargs, StaticLibraryHolder)
+        elif default_library == 'both':
+            return self.build_both_libraries(node, args, kwargs)
+        else:
+            raise InterpreterException('Unknown default_library value: %s.', default_library)
+
+    def build_target(self, node, args, kwargs, targetholder):
+        @FeatureNewKwargs('build target', '0.42.0', ['rust_crate_type', 'build_rpath', 'implicit_include_directories'])
+        @FeatureNewKwargs('build target', '0.41.0', ['rust_args'])
+        @FeatureNewKwargs('build target', '0.40.0', ['build_by_default'])
+        @FeatureNewKwargs('build target', '0.48.0', ['gnu_symbol_visibility'])
+        def build_target_decorator_caller(self, node, args, kwargs):
+            return True
+
+        build_target_decorator_caller(self, node, args, kwargs)
+
+        if not args:
+            raise InterpreterException('Target does not have a name.')
+        name, *sources = args
+        for_machine = self.machine_from_native_kwarg(kwargs)
+        if 'sources' in kwargs:
+            sources += listify(kwargs['sources'])
+        sources = self.source_strings_to_files(sources)
+        objs = extract_as_list(kwargs, 'objects')
+        kwargs['dependencies'] = extract_as_list(kwargs, 'dependencies')
+        kwargs['install_mode'] = self._get_kwarg_install_mode(kwargs)
+        if 'extra_files' in kwargs:
+            ef = extract_as_list(kwargs, 'extra_files')
+            kwargs['extra_files'] = self.source_strings_to_files(ef)
+        self.check_sources_exist(os.path.join(self.source_root, self.subdir), sources)
+        if targetholder == ExecutableHolder:
+            targetclass = build.Executable
+        elif targetholder == SharedLibraryHolder:
+            targetclass = build.SharedLibrary
+        elif targetholder == SharedModuleHolder:
+            targetclass = build.SharedModule
+        elif targetholder == StaticLibraryHolder:
+            targetclass = build.StaticLibrary
+        elif targetholder == JarHolder:
+            targetclass = build.Jar
+        else:
+            mlog.debug('Unknown target type:', str(targetholder))
+            raise RuntimeError('Unreachable code')
+        self.kwarg_strings_to_includedirs(kwargs)
+
+        # Filter out kwargs from other target types. For example 'soversion'
+        # passed to library() when default_library == 'static'.
+        kwargs = {k: v for k, v in kwargs.items() if k in targetclass.known_kwargs}
+
+        kwargs['include_directories'] = self.extract_incdirs(kwargs)
+        target = targetclass(name, self.subdir, self.subproject, for_machine, sources, objs, self.environment, kwargs)
+        target.project_version = self.project_version
+
+        if not self.environment.machines.matches_build_machine(for_machine):
+            self.add_cross_stdlib_info(target)
+        l = targetholder(target, self)
+        self.add_target(name, l.held_object)
+        self.project_args_frozen = True
+        return l
+
+    def kwarg_strings_to_includedirs(self, kwargs):
+        if 'd_import_dirs' in kwargs:
+            items = mesonlib.extract_as_list(kwargs, 'd_import_dirs')
+            cleaned_items = []
+            for i in items:
+                if isinstance(i, str):
+                    # BW compatibility. This was permitted so we must support it
+                    # for a few releases so people can transition to "correct"
+                    # path declarations.
+                    if os.path.normpath(i).startswith(self.environment.get_source_dir()):
+                        mlog.warning('''Building a path to the source dir is not supported. Use a relative path instead.
+This will become a hard error in the future.''', location=self.current_node)
+                        i = os.path.relpath(i, os.path.join(self.environment.get_source_dir(), self.subdir))
+                        i = self.build_incdir_object([i])
+                cleaned_items.append(i)
+            kwargs['d_import_dirs'] = cleaned_items
+
+    def get_used_languages(self, target):
+        result = {}
+        for i in target.sources:
+            # TODO other platforms
+            for lang, c in self.coredata.compilers.host.items():
+                if c.can_compile(i):
+                    result[lang] = True
+                    break
+        return result
+
+    def add_cross_stdlib_info(self, target):
+        if target.for_machine != MachineChoice.HOST:
+            return
+        for l in self.get_used_languages(target):
+            props = self.environment.properties.host
+            if props.has_stdlib(l) \
+                    and self.subproject != props.get_stdlib(l)[0]:
+                target.add_deps(self.build.stdlibs.host[l])
+
+    def check_sources_exist(self, subdir, sources):
+        for s in sources:
+            if not isinstance(s, str):
+                continue # This means a generated source and they always exist.
+            fname = os.path.join(subdir, s)
+            if not os.path.isfile(fname):
+                raise InterpreterException('Tried to add non-existing source file %s.' % s)
+
+    # Only permit object extraction from the same subproject
+    def validate_extraction(self, buildtarget: InterpreterObject) -> None:
+        if not self.subdir.startswith(self.subproject_dir):
+            if buildtarget.subdir.startswith(self.subproject_dir):
+                raise InterpreterException('Tried to extract objects from a subproject target.')
+        else:
+            if not buildtarget.subdir.startswith(self.subproject_dir):
+                raise InterpreterException('Tried to extract objects from the main project from a subproject.')
+            if self.subdir.split('/')[1] != buildtarget.subdir.split('/')[1]:
+                raise InterpreterException('Tried to extract objects from a different subproject.')
+
+    def is_subproject(self):
+        return self.subproject != ''
+
+    @noKwargs
+    @noArgsFlattening
+    def func_set_variable(self, node, args, kwargs):
+        if len(args) != 2:
+            raise InvalidCode('Set_variable takes two arguments.')
+        varname, value = args
+        self.set_variable(varname, value)
+
+    @noKwargs
+    @noArgsFlattening
+    def func_get_variable(self, node, args, kwargs):
+        if len(args) < 1 or len(args) > 2:
+            raise InvalidCode('Get_variable takes one or two arguments.')
+        varname = args[0]
+        if isinstance(varname, Disabler):
+            return varname
+        if not isinstance(varname, str):
+            raise InterpreterException('First argument must be a string.')
+        try:
+            return self.variables[varname]
+        except KeyError:
+            pass
+        if len(args) == 2:
+            return args[1]
+        raise InterpreterException('Tried to get unknown variable "%s".' % varname)
+
+    @stringArgs
+    @noKwargs
+    def func_is_variable(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InvalidCode('Is_variable takes two arguments.')
+        varname = args[0]
+        return varname in self.variables
+
+    @staticmethod
+    def machine_from_native_kwarg(kwargs: T.Dict[str, T.Any]) -> MachineChoice:
+        native = kwargs.get('native', False)
+        if not isinstance(native, bool):
+            raise InvalidArguments('Argument to "native" must be a boolean.')
+        return MachineChoice.BUILD if native else MachineChoice.HOST
+
+    @FeatureNew('is_disabler', '0.52.0')
+    @noKwargs
+    def func_is_disabler(self, node, args, kwargs):
+        if len(args) != 1:
+            raise InvalidCode('Is_disabler takes one argument.')
+        varname = args[0]
+        return isinstance(varname, Disabler)
diff --git a/meson/mesonbuild/interpreterbase.py b/meson/mesonbuild/interpreterbase.py
new file mode 100644
index 000000000..39531c111
--- /dev/null
+++ b/meson/mesonbuild/interpreterbase.py
@@ -0,0 +1,1211 @@
+# Copyright 2016-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This class contains the basic functionality needed to run any interpreter
+# or an interpreter-based tool.
+
+from . import mparser, mesonlib, mlog
+from . import environment, dependencies
+
+import abc
+import os, copy, re
+import collections.abc
+from functools import wraps
+import typing as T
+
+class InterpreterObject:
+    def __init__(self):
+        self.methods = {}  # type: T.Dict[str, T.Callable]
+        # Current node set during a method call. This can be used as location
+        # when printing a warning message during a method call.
+        self.current_node = None  # type: mparser.BaseNode
+
+    def method_call(self, method_name: str, args: T.List[T.Union[mparser.BaseNode, str, int, float, bool, list, dict, 'InterpreterObject', 'ObjectHolder']], kwargs: T.Dict[str, T.Union[mparser.BaseNode, str, int, float, bool, list, dict, 'InterpreterObject', 'ObjectHolder']]):
+        if method_name in self.methods:
+            method = self.methods[method_name]
+            if not getattr(method, 'no-args-flattening', False):
+                args = flatten(args)
+            return method(args, kwargs)
+        raise InvalidCode('Unknown method "%s" in object.' % method_name)
+
+TV_InterpreterObject = T.TypeVar('TV_InterpreterObject')
+
+class ObjectHolder(T.Generic[TV_InterpreterObject]):
+    def __init__(self, obj: InterpreterObject, subproject: T.Optional[str] = None):
+        self.held_object = obj        # type: InterpreterObject
+        self.subproject = subproject  # type: str
+
+    def __repr__(self):
+        return ''.format(self.held_object)
+
+TYPE_elementary = T.Union[str, int, float, bool]
+TYPE_var = T.Union[TYPE_elementary, list, dict, InterpreterObject, ObjectHolder]
+TYPE_nvar = T.Union[TYPE_var, mparser.BaseNode]
+TYPE_nkwargs = T.Dict[T.Union[mparser.BaseNode, str], TYPE_nvar]
+
+# Decorators for method calls.
+
+def check_stringlist(a: T.Any, msg: str = 'Arguments must be strings.') -> None:
+    if not isinstance(a, list):
+        mlog.debug('Not a list:', str(a))
+        raise InvalidArguments('Argument not a list.')
+    if not all(isinstance(s, str) for s in a):
+        mlog.debug('Element not a string:', str(a))
+        raise InvalidArguments(msg)
+
+def _get_callee_args(wrapped_args, want_subproject: bool = False):
+    s = wrapped_args[0]
+    n = len(wrapped_args)
+    # Raise an error if the codepaths are not there
+    subproject = None
+    if want_subproject and n == 2:
+        if hasattr(s, 'subproject'):
+            # Interpreter base types have 2 args: self, node
+            node = wrapped_args[1]
+            # args and kwargs are inside the node
+            args = None
+            kwargs = None
+            subproject = s.subproject
+        elif hasattr(wrapped_args[1], 'subproject'):
+            # Module objects have 2 args: self, interpreter
+            node = wrapped_args[1].current_node
+            # args and kwargs are inside the node
+            args = None
+            kwargs = None
+            subproject = wrapped_args[1].subproject
+        else:
+            raise AssertionError('Unknown args: {!r}'.format(wrapped_args))
+    elif n == 3:
+        # Methods on objects (*Holder, MesonMain, etc) have 3 args: self, args, kwargs
+        node = s.current_node
+        args = wrapped_args[1]
+        kwargs = wrapped_args[2]
+        if want_subproject:
+            if hasattr(s, 'subproject'):
+                subproject = s.subproject
+            elif hasattr(s, 'interpreter'):
+                subproject = s.interpreter.subproject
+    elif n == 4:
+        # Meson functions have 4 args: self, node, args, kwargs
+        # Module functions have 4 args: self, state, args, kwargs
+        if isinstance(s, InterpreterBase):
+            node = wrapped_args[1]
+        else:
+            node = wrapped_args[1].current_node
+        args = wrapped_args[2]
+        kwargs = wrapped_args[3]
+        if want_subproject:
+            if isinstance(s, InterpreterBase):
+                subproject = s.subproject
+            else:
+                subproject = wrapped_args[1].subproject
+    elif n == 5:
+        # Module snippets have 5 args: self, interpreter, state, args, kwargs
+        node = wrapped_args[2].current_node
+        args = wrapped_args[3]
+        kwargs = wrapped_args[4]
+        if want_subproject:
+            subproject = wrapped_args[2].subproject
+    else:
+        raise AssertionError('Unknown args: {!r}'.format(wrapped_args))
+    # Sometimes interpreter methods are called internally with None instead of
+    # empty list/dict
+    args = args if args is not None else []
+    kwargs = kwargs if kwargs is not None else {}
+    return s, node, args, kwargs, subproject
+
+def flatten(args: T.Union[TYPE_nvar, T.List[TYPE_nvar]]) -> T.List[TYPE_nvar]:
+    if isinstance(args, mparser.StringNode):
+        assert isinstance(args.value, str)
+        return [args.value]
+    if not isinstance(args, collections.abc.Sequence):
+        return [args]
+    result = []  # type: T.List[TYPE_nvar]
+    for a in args:
+        if isinstance(a, list):
+            rest = flatten(a)
+            result = result + rest
+        elif isinstance(a, mparser.StringNode):
+            result.append(a.value)
+        else:
+            result.append(a)
+    return result
+
+def noPosargs(f):
+    @wraps(f)
+    def wrapped(*wrapped_args, **wrapped_kwargs):
+        args = _get_callee_args(wrapped_args)[2]
+        if args:
+            raise InvalidArguments('Function does not take positional arguments.')
+        return f(*wrapped_args, **wrapped_kwargs)
+    return wrapped
+
+def builtinMethodNoKwargs(f):
+    @wraps(f)
+    def wrapped(*wrapped_args, **wrapped_kwargs):
+        node = wrapped_args[0].current_node
+        method_name = wrapped_args[2]
+        kwargs = wrapped_args[4]
+        if kwargs:
+            mlog.warning('Method {!r} does not take keyword arguments.'.format(method_name),
+                         'This will become a hard error in the future',
+                         location=node)
+        return f(*wrapped_args, **wrapped_kwargs)
+    return wrapped
+
+def noKwargs(f):
+    @wraps(f)
+    def wrapped(*wrapped_args, **wrapped_kwargs):
+        kwargs = _get_callee_args(wrapped_args)[3]
+        if kwargs:
+            raise InvalidArguments('Function does not take keyword arguments.')
+        return f(*wrapped_args, **wrapped_kwargs)
+    return wrapped
+
+def stringArgs(f):
+    @wraps(f)
+    def wrapped(*wrapped_args, **wrapped_kwargs):
+        args = _get_callee_args(wrapped_args)[2]
+        assert(isinstance(args, list))
+        check_stringlist(args)
+        return f(*wrapped_args, **wrapped_kwargs)
+    return wrapped
+
+def noArgsFlattening(f):
+    setattr(f, 'no-args-flattening', True)  # noqa: B010
+    return f
+
+def disablerIfNotFound(f):
+    @wraps(f)
+    def wrapped(*wrapped_args, **wrapped_kwargs):
+        kwargs = _get_callee_args(wrapped_args)[3]
+        disabler = kwargs.pop('disabler', False)
+        ret = f(*wrapped_args, **wrapped_kwargs)
+        if disabler and not ret.held_object.found():
+            return Disabler()
+        return ret
+    return wrapped
+
+class permittedKwargs:
+
+    def __init__(self, permitted: T.Set[str]):
+        self.permitted = permitted  # type: T.Set[str]
+
+    def __call__(self, f):
+        @wraps(f)
+        def wrapped(*wrapped_args, **wrapped_kwargs):
+            s, node, args, kwargs, _ = _get_callee_args(wrapped_args)
+            for k in kwargs:
+                if k not in self.permitted:
+                    mlog.warning('''Passed invalid keyword argument "{}".'''.format(k), location=node)
+                    mlog.warning('This will become a hard error in the future.')
+            return f(*wrapped_args, **wrapped_kwargs)
+        return wrapped
+
+class FeatureCheckBase(metaclass=abc.ABCMeta):
+    "Base class for feature version checks"
+
+    # In python 3.6 we can just forward declare this, but in 3.5 we can't
+    # This will be overwritten by the subclasses by necessity
+    feature_registry = {}  # type: T.ClassVar[T.Dict[str, T.Dict[str, T.Set[str]]]]
+
+    def __init__(self, feature_name: str, version: str, extra_message: T.Optional[str] = None):
+        self.feature_name = feature_name  # type: str
+        self.feature_version = version    # type: str
+        self.extra_message = extra_message or ''  # type: str
+
+    @staticmethod
+    def get_target_version(subproject: str) -> str:
+        # Don't do any checks if project() has not been parsed yet
+        if subproject not in mesonlib.project_meson_versions:
+            return ''
+        return mesonlib.project_meson_versions[subproject]
+
+    @staticmethod
+    @abc.abstractmethod
+    def check_version(target_version: str, feature_Version: str) -> bool:
+        pass
+
+    def use(self, subproject: str) -> None:
+        tv = self.get_target_version(subproject)
+        # No target version
+        if tv == '':
+            return
+        # Target version is new enough
+        if self.check_version(tv, self.feature_version):
+            return
+        # Feature is too new for target version, register it
+        if subproject not in self.feature_registry:
+            self.feature_registry[subproject] = {self.feature_version: set()}
+        register = self.feature_registry[subproject]
+        if self.feature_version not in register:
+            register[self.feature_version] = set()
+        if self.feature_name in register[self.feature_version]:
+            # Don't warn about the same feature multiple times
+            # FIXME: This is needed to prevent duplicate warnings, but also
+            # means we won't warn about a feature used in multiple places.
+            return
+        register[self.feature_version].add(self.feature_name)
+        self.log_usage_warning(tv)
+
+    @classmethod
+    def report(cls, subproject: str) -> None:
+        if subproject not in cls.feature_registry:
+            return
+        warning_str = cls.get_warning_str_prefix(cls.get_target_version(subproject))
+        fv = cls.feature_registry[subproject]
+        for version in sorted(fv.keys()):
+            warning_str += '\n * {}: {}'.format(version, fv[version])
+        mlog.warning(warning_str)
+
+    def log_usage_warning(self, tv: str) -> None:
+        raise InterpreterException('log_usage_warning not implemented')
+
+    @staticmethod
+    def get_warning_str_prefix(tv: str) -> str:
+        raise InterpreterException('get_warning_str_prefix not implemented')
+
+    def __call__(self, f):
+        @wraps(f)
+        def wrapped(*wrapped_args, **wrapped_kwargs):
+            subproject = _get_callee_args(wrapped_args, want_subproject=True)[4]
+            if subproject is None:
+                raise AssertionError('{!r}'.format(wrapped_args))
+            self.use(subproject)
+            return f(*wrapped_args, **wrapped_kwargs)
+        return wrapped
+
+    @classmethod
+    def single_use(cls, feature_name: str, version: str, subproject: str,
+                   extra_message: T.Optional[str] = None) -> None:
+        """Oneline version that instantiates and calls use()."""
+        cls(feature_name, version, extra_message).use(subproject)
+
+
+class FeatureNew(FeatureCheckBase):
+    """Checks for new features"""
+
+    # Class variable, shared across all instances
+    #
+    # Format: {subproject: {feature_version: set(feature_names)}}
+    feature_registry = {}  # type: T.ClassVar[T.Dict[str, T.Dict[str, T.Set[str]]]]
+
+    @staticmethod
+    def check_version(target_version: str, feature_version: str) -> bool:
+        return mesonlib.version_compare_condition_with_min(target_version, feature_version)
+
+    @staticmethod
+    def get_warning_str_prefix(tv: str) -> str:
+        return 'Project specifies a minimum meson_version \'{}\' but uses features which were added in newer versions:'.format(tv)
+
+    def log_usage_warning(self, tv: str) -> None:
+        args = [
+            'Project targeting', "'{}'".format(tv),
+            'but tried to use feature introduced in',
+            "'{}':".format(self.feature_version),
+            '{}.'.format(self.feature_name),
+        ]
+        if self.extra_message:
+            args.append(self.extra_message)
+        mlog.warning(*args)
+
+class FeatureDeprecated(FeatureCheckBase):
+    """Checks for deprecated features"""
+
+    # Class variable, shared across all instances
+    #
+    # Format: {subproject: {feature_version: set(feature_names)}}
+    feature_registry = {}  # type: T.ClassVar[T.Dict[str, T.Dict[str, T.Set[str]]]]
+
+    @staticmethod
+    def check_version(target_version: str, feature_version: str) -> bool:
+        # For deprecation checks we need to return the inverse of FeatureNew checks
+        return not mesonlib.version_compare_condition_with_min(target_version, feature_version)
+
+    @staticmethod
+    def get_warning_str_prefix(tv: str) -> str:
+        return 'Deprecated features used:'
+
+    def log_usage_warning(self, tv: str) -> None:
+        args = [
+            'Project targeting', "'{}'".format(tv),
+            'but tried to use feature deprecated since',
+            "'{}':".format(self.feature_version),
+            '{}.'.format(self.feature_name),
+        ]
+        if self.extra_message:
+            args.append(self.extra_message)
+        mlog.warning(*args)
+
+
+class FeatureCheckKwargsBase(metaclass=abc.ABCMeta):
+
+    @property
+    @abc.abstractmethod
+    def feature_check_class(self) -> T.Type[FeatureCheckBase]:
+        pass
+
+    def __init__(self, feature_name: str, feature_version: str,
+                 kwargs: T.List[str], extra_message: T.Optional[str] = None):
+        self.feature_name = feature_name
+        self.feature_version = feature_version
+        self.kwargs = kwargs
+        self.extra_message = extra_message
+
+    def __call__(self, f):
+        @wraps(f)
+        def wrapped(*wrapped_args, **wrapped_kwargs):
+            kwargs, subproject = _get_callee_args(wrapped_args, want_subproject=True)[3:5]
+            if subproject is None:
+                raise AssertionError('{!r}'.format(wrapped_args))
+            for arg in self.kwargs:
+                if arg not in kwargs:
+                    continue
+                name = arg + ' arg in ' + self.feature_name
+                self.feature_check_class.single_use(
+                        name, self.feature_version, subproject, self.extra_message)
+            return f(*wrapped_args, **wrapped_kwargs)
+        return wrapped
+
+class FeatureNewKwargs(FeatureCheckKwargsBase):
+    feature_check_class = FeatureNew
+
+class FeatureDeprecatedKwargs(FeatureCheckKwargsBase):
+    feature_check_class = FeatureDeprecated
+
+
+class InterpreterException(mesonlib.MesonException):
+    pass
+
+class InvalidCode(InterpreterException):
+    pass
+
+class InvalidArguments(InterpreterException):
+    pass
+
+class SubdirDoneRequest(BaseException):
+    pass
+
+class ContinueRequest(BaseException):
+    pass
+
+class BreakRequest(BaseException):
+    pass
+
+class MutableInterpreterObject(InterpreterObject):
+    def __init__(self):
+        super().__init__()
+
+class Disabler(InterpreterObject):
+    def __init__(self):
+        super().__init__()
+        self.methods.update({'found': self.found_method})
+
+    def found_method(self, args, kwargs):
+        return False
+
+def is_disabler(i) -> bool:
+    return isinstance(i, Disabler)
+
+def is_arg_disabled(arg) -> bool:
+    if is_disabler(arg):
+        return True
+    if isinstance(arg, list):
+        for i in arg:
+            if is_arg_disabled(i):
+                return True
+    return False
+
+def is_disabled(args, kwargs) -> bool:
+    for i in args:
+        if is_arg_disabled(i):
+            return True
+    for i in kwargs.values():
+        if is_arg_disabled(i):
+            return True
+    return False
+
+class InterpreterBase:
+    elementary_types = (int, float, str, bool, list)
+
+    def __init__(self, source_root: str, subdir: str, subproject: str):
+        self.source_root = source_root
+        self.funcs = {}    # type: T.Dict[str, T.Callable[[mparser.BaseNode, T.List[TYPE_nvar], T.Dict[str, TYPE_nvar]], TYPE_var]]
+        self.builtin = {}  # type: T.Dict[str, InterpreterObject]
+        self.subdir = subdir
+        self.subproject = subproject
+        self.variables = {}  # type: T.Dict[str, TYPE_var]
+        self.argument_depth = 0
+        self.current_lineno = -1
+        # Current node set during a function call. This can be used as location
+        # when printing a warning message during a method call.
+        self.current_node = None  # type: mparser.BaseNode
+
+    def load_root_meson_file(self) -> None:
+        mesonfile = os.path.join(self.source_root, self.subdir, environment.build_filename)
+        if not os.path.isfile(mesonfile):
+            raise InvalidArguments('Missing Meson file in %s' % mesonfile)
+        with open(mesonfile, encoding='utf8') as mf:
+            code = mf.read()
+        if code.isspace():
+            raise InvalidCode('Builder file is empty.')
+        assert(isinstance(code, str))
+        try:
+            self.ast = mparser.Parser(code, mesonfile).parse()
+        except mesonlib.MesonException as me:
+            me.file = mesonfile
+            raise me
+
+    def join_path_strings(self, args: T.Sequence[str]) -> str:
+        return os.path.join(*args).replace('\\', '/')
+
+    def parse_project(self) -> None:
+        """
+        Parses project() and initializes languages, compilers etc. Do this
+        early because we need this before we parse the rest of the AST.
+        """
+        self.evaluate_codeblock(self.ast, end=1)
+
+    def sanity_check_ast(self) -> None:
+        if not isinstance(self.ast, mparser.CodeBlockNode):
+            raise InvalidCode('AST is of invalid type. Possibly a bug in the parser.')
+        if not self.ast.lines:
+            raise InvalidCode('No statements in code.')
+        first = self.ast.lines[0]
+        if not isinstance(first, mparser.FunctionNode) or first.func_name != 'project':
+            raise InvalidCode('First statement must be a call to project')
+
+    def run(self) -> None:
+        # Evaluate everything after the first line, which is project() because
+        # we already parsed that in self.parse_project()
+        try:
+            self.evaluate_codeblock(self.ast, start=1)
+        except SubdirDoneRequest:
+            pass
+
+    def evaluate_codeblock(self, node: mparser.CodeBlockNode, start: int = 0, end: T.Optional[int] = None) -> None:
+        if node is None:
+            return
+        if not isinstance(node, mparser.CodeBlockNode):
+            e = InvalidCode('Tried to execute a non-codeblock. Possibly a bug in the parser.')
+            e.lineno = node.lineno
+            e.colno = node.colno
+            raise e
+        statements = node.lines[start:end]
+        i = 0
+        while i < len(statements):
+            cur = statements[i]
+            try:
+                self.current_lineno = cur.lineno
+                self.evaluate_statement(cur)
+            except Exception as e:
+                if getattr(e, 'lineno', None) is None:
+                    # We are doing the equivalent to setattr here and mypy does not like it
+                    e.lineno = cur.lineno                                                             # type: ignore
+                    e.colno = cur.colno                                                               # type: ignore
+                    e.file = os.path.join(self.source_root, self.subdir, environment.build_filename)  # type: ignore
+                raise e
+            i += 1 # In THE FUTURE jump over blocks and stuff.
+
+    def evaluate_statement(self, cur: mparser.BaseNode) -> T.Optional[TYPE_var]:
+        self.current_node = cur
+        if isinstance(cur, mparser.FunctionNode):
+            return self.function_call(cur)
+        elif isinstance(cur, mparser.AssignmentNode):
+            self.assignment(cur)
+        elif isinstance(cur, mparser.MethodNode):
+            return self.method_call(cur)
+        elif isinstance(cur, mparser.StringNode):
+            return cur.value
+        elif isinstance(cur, mparser.BooleanNode):
+            return cur.value
+        elif isinstance(cur, mparser.IfClauseNode):
+            return self.evaluate_if(cur)
+        elif isinstance(cur, mparser.IdNode):
+            return self.get_variable(cur.value)
+        elif isinstance(cur, mparser.ComparisonNode):
+            return self.evaluate_comparison(cur)
+        elif isinstance(cur, mparser.ArrayNode):
+            return self.evaluate_arraystatement(cur)
+        elif isinstance(cur, mparser.DictNode):
+            return self.evaluate_dictstatement(cur)
+        elif isinstance(cur, mparser.NumberNode):
+            return cur.value
+        elif isinstance(cur, mparser.AndNode):
+            return self.evaluate_andstatement(cur)
+        elif isinstance(cur, mparser.OrNode):
+            return self.evaluate_orstatement(cur)
+        elif isinstance(cur, mparser.NotNode):
+            return self.evaluate_notstatement(cur)
+        elif isinstance(cur, mparser.UMinusNode):
+            return self.evaluate_uminusstatement(cur)
+        elif isinstance(cur, mparser.ArithmeticNode):
+            return self.evaluate_arithmeticstatement(cur)
+        elif isinstance(cur, mparser.ForeachClauseNode):
+            self.evaluate_foreach(cur)
+        elif isinstance(cur, mparser.PlusAssignmentNode):
+            self.evaluate_plusassign(cur)
+        elif isinstance(cur, mparser.IndexNode):
+            return self.evaluate_indexing(cur)
+        elif isinstance(cur, mparser.TernaryNode):
+            return self.evaluate_ternary(cur)
+        elif isinstance(cur, mparser.ContinueNode):
+            raise ContinueRequest()
+        elif isinstance(cur, mparser.BreakNode):
+            raise BreakRequest()
+        elif isinstance(cur, self.elementary_types):
+            return cur
+        else:
+            raise InvalidCode("Unknown statement.")
+        return None
+
+    def evaluate_arraystatement(self, cur: mparser.ArrayNode) -> list:
+        (arguments, kwargs) = self.reduce_arguments(cur.args)
+        if len(kwargs) > 0:
+            raise InvalidCode('Keyword arguments are invalid in array construction.')
+        return arguments
+
+    @FeatureNew('dict', '0.47.0')
+    def evaluate_dictstatement(self, cur: mparser.DictNode) -> T.Dict[str, T.Any]:
+        (arguments, kwargs) = self.reduce_arguments(cur.args, resolve_key_nodes=False)
+        assert (not arguments)
+        result = {}  # type: T.Dict[str, T.Any]
+        self.argument_depth += 1
+        for key, value in kwargs.items():
+            if not isinstance(key, mparser.StringNode):
+                FeatureNew.single_use('Dictionary entry using non literal key', '0.53.0', self.subproject)
+            assert isinstance(key, mparser.BaseNode)  # All keys must be nodes due to resolve_key_nodes=False
+            str_key = self.evaluate_statement(key)
+            if not isinstance(str_key, str):
+                raise InvalidArguments('Key must be a string')
+            if str_key in result:
+                raise InvalidArguments('Duplicate dictionary key: {}'.format(str_key))
+            result[str_key] = value
+        self.argument_depth -= 1
+        return result
+
+    def evaluate_notstatement(self, cur: mparser.NotNode) -> T.Union[bool, Disabler]:
+        v = self.evaluate_statement(cur.value)
+        if isinstance(v, Disabler):
+            return v
+        if not isinstance(v, bool):
+            raise InterpreterException('Argument to "not" is not a boolean.')
+        return not v
+
+    def evaluate_if(self, node: mparser.IfClauseNode) -> T.Optional[Disabler]:
+        assert(isinstance(node, mparser.IfClauseNode))
+        for i in node.ifs:
+            result = self.evaluate_statement(i.condition)
+            if isinstance(result, Disabler):
+                return result
+            if not(isinstance(result, bool)):
+                raise InvalidCode('If clause {!r} does not evaluate to true or false.'.format(result))
+            if result:
+                self.evaluate_codeblock(i.block)
+                return None
+        if not isinstance(node.elseblock, mparser.EmptyNode):
+            self.evaluate_codeblock(node.elseblock)
+        return None
+
+    def validate_comparison_types(self, val1: T.Any, val2: T.Any) -> bool:
+        if type(val1) != type(val2):
+            return False
+        return True
+
+    def evaluate_in(self, val1: T.Any, val2: T.Any) -> bool:
+        if not isinstance(val1, (str, int, float, ObjectHolder)):
+            raise InvalidArguments('lvalue of "in" operator must be a string, integer, float, or object')
+        if not isinstance(val2, (list, dict)):
+            raise InvalidArguments('rvalue of "in" operator must be an array or a dict')
+        return val1 in val2
+
+    def evaluate_comparison(self, node: mparser.ComparisonNode) -> T.Union[bool, Disabler]:
+        val1 = self.evaluate_statement(node.left)
+        if isinstance(val1, Disabler):
+            return val1
+        val2 = self.evaluate_statement(node.right)
+        if isinstance(val2, Disabler):
+            return val2
+        if node.ctype == 'in':
+            return self.evaluate_in(val1, val2)
+        elif node.ctype == 'notin':
+            return not self.evaluate_in(val1, val2)
+        valid = self.validate_comparison_types(val1, val2)
+        # Ordering comparisons of different types isn't allowed since PR #1810
+        # (0.41.0).  Since PR #2884 we also warn about equality comparisons of
+        # different types, which will one day become an error.
+        if not valid and (node.ctype == '==' or node.ctype == '!='):
+            mlog.warning('''Trying to compare values of different types ({}, {}) using {}.
+The result of this is undefined and will become a hard error in a future Meson release.'''
+                         .format(type(val1).__name__, type(val2).__name__, node.ctype), location=node)
+        if node.ctype == '==':
+            return val1 == val2
+        elif node.ctype == '!=':
+            return val1 != val2
+        elif not valid:
+            raise InterpreterException(
+                'Values of different types ({}, {}) cannot be compared using {}.'.format(type(val1).__name__,
+                                                                                         type(val2).__name__,
+                                                                                         node.ctype))
+        elif not isinstance(val1, self.elementary_types):
+            raise InterpreterException('{} can only be compared for equality.'.format(getattr(node.left, 'value', '')))
+        elif not isinstance(val2, self.elementary_types):
+            raise InterpreterException('{} can only be compared for equality.'.format(getattr(node.right, 'value', '')))
+        # Use type: ignore because mypy will complain that we are comparing two Unions,
+        # but we actually guarantee earlier that both types are the same
+        elif node.ctype == '<':
+            return val1 < val2   # type: ignore
+        elif node.ctype == '<=':
+            return val1 <= val2  # type: ignore
+        elif node.ctype == '>':
+            return val1 > val2   # type: ignore
+        elif node.ctype == '>=':
+            return val1 >= val2  # type: ignore
+        else:
+            raise InvalidCode('You broke my compare eval.')
+
+    def evaluate_andstatement(self, cur: mparser.AndNode) -> T.Union[bool, Disabler]:
+        l = self.evaluate_statement(cur.left)
+        if isinstance(l, Disabler):
+            return l
+        if not isinstance(l, bool):
+            raise InterpreterException('First argument to "and" is not a boolean.')
+        if not l:
+            return False
+        r = self.evaluate_statement(cur.right)
+        if isinstance(r, Disabler):
+            return r
+        if not isinstance(r, bool):
+            raise InterpreterException('Second argument to "and" is not a boolean.')
+        return r
+
+    def evaluate_orstatement(self, cur: mparser.OrNode) -> T.Union[bool, Disabler]:
+        l = self.evaluate_statement(cur.left)
+        if isinstance(l, Disabler):
+            return l
+        if not isinstance(l, bool):
+            raise InterpreterException('First argument to "or" is not a boolean.')
+        if l:
+            return True
+        r = self.evaluate_statement(cur.right)
+        if isinstance(r, Disabler):
+            return r
+        if not isinstance(r, bool):
+            raise InterpreterException('Second argument to "or" is not a boolean.')
+        return r
+
+    def evaluate_uminusstatement(self, cur) -> T.Union[int, Disabler]:
+        v = self.evaluate_statement(cur.value)
+        if isinstance(v, Disabler):
+            return v
+        if not isinstance(v, int):
+            raise InterpreterException('Argument to negation is not an integer.')
+        return -v
+
+    @FeatureNew('/ with string arguments', '0.49.0')
+    def evaluate_path_join(self, l: str, r: str) -> str:
+        if not isinstance(l, str):
+            raise InvalidCode('The division operator can only append to a string.')
+        if not isinstance(r, str):
+            raise InvalidCode('The division operator can only append a string.')
+        return self.join_path_strings((l, r))
+
+    def evaluate_division(self, l: T.Any, r: T.Any) -> T.Union[int, str]:
+        if isinstance(l, str) or isinstance(r, str):
+            return self.evaluate_path_join(l, r)
+        if isinstance(l, int) and isinstance(r, int):
+            if r == 0:
+                raise InvalidCode('Division by zero.')
+            return l // r
+        raise InvalidCode('Division works only with strings or integers.')
+
+    def evaluate_arithmeticstatement(self, cur: mparser.ArithmeticNode) -> T.Union[int, str, dict, list, Disabler]:
+        l = self.evaluate_statement(cur.left)
+        if isinstance(l, Disabler):
+            return l
+        r = self.evaluate_statement(cur.right)
+        if isinstance(r, Disabler):
+            return r
+
+        if cur.operation == 'add':
+            if isinstance(l, dict) and isinstance(r, dict):
+                return {**l, **r}
+            try:
+                # MyPy error due to handling two Unions (we are catching all exceptions anyway)
+                return l + r  # type: ignore
+            except Exception as e:
+                raise InvalidCode('Invalid use of addition: ' + str(e))
+        elif cur.operation == 'sub':
+            if not isinstance(l, int) or not isinstance(r, int):
+                raise InvalidCode('Subtraction works only with integers.')
+            return l - r
+        elif cur.operation == 'mul':
+            if not isinstance(l, int) or not isinstance(r, int):
+                raise InvalidCode('Multiplication works only with integers.')
+            return l * r
+        elif cur.operation == 'div':
+            return self.evaluate_division(l, r)
+        elif cur.operation == 'mod':
+            if not isinstance(l, int) or not isinstance(r, int):
+                raise InvalidCode('Modulo works only with integers.')
+            return l % r
+        else:
+            raise InvalidCode('You broke me.')
+
+    def evaluate_ternary(self, node: mparser.TernaryNode) -> TYPE_var:
+        assert(isinstance(node, mparser.TernaryNode))
+        result = self.evaluate_statement(node.condition)
+        if isinstance(result, Disabler):
+            return result
+        if not isinstance(result, bool):
+            raise InterpreterException('Ternary condition is not boolean.')
+        if result:
+            return self.evaluate_statement(node.trueblock)
+        else:
+            return self.evaluate_statement(node.falseblock)
+
+    def evaluate_foreach(self, node: mparser.ForeachClauseNode) -> None:
+        assert(isinstance(node, mparser.ForeachClauseNode))
+        items = self.evaluate_statement(node.items)
+
+        if isinstance(items, list):
+            if len(node.varnames) != 1:
+                raise InvalidArguments('Foreach on array does not unpack')
+            varname = node.varnames[0]
+            for item in items:
+                self.set_variable(varname, item)
+                try:
+                    self.evaluate_codeblock(node.block)
+                except ContinueRequest:
+                    continue
+                except BreakRequest:
+                    break
+        elif isinstance(items, dict):
+            if len(node.varnames) != 2:
+                raise InvalidArguments('Foreach on dict unpacks key and value')
+            for key, value in items.items():
+                self.set_variable(node.varnames[0], key)
+                self.set_variable(node.varnames[1], value)
+                try:
+                    self.evaluate_codeblock(node.block)
+                except ContinueRequest:
+                    continue
+                except BreakRequest:
+                    break
+        else:
+            raise InvalidArguments('Items of foreach loop must be an array or a dict')
+
+    def evaluate_plusassign(self, node: mparser.PlusAssignmentNode) -> None:
+        assert(isinstance(node, mparser.PlusAssignmentNode))
+        varname = node.var_name
+        addition = self.evaluate_statement(node.value)
+
+        # Remember that all variables are immutable. We must always create a
+        # full new variable and then assign it.
+        old_variable = self.get_variable(varname)
+        new_value = None  # type: T.Union[str, int, float, bool, dict, list]
+        if isinstance(old_variable, str):
+            if not isinstance(addition, str):
+                raise InvalidArguments('The += operator requires a string on the right hand side if the variable on the left is a string')
+            new_value = old_variable + addition
+        elif isinstance(old_variable, int):
+            if not isinstance(addition, int):
+                raise InvalidArguments('The += operator requires an int on the right hand side if the variable on the left is an int')
+            new_value = old_variable + addition
+        elif isinstance(old_variable, list):
+            if isinstance(addition, list):
+                new_value = old_variable + addition
+            else:
+                new_value = old_variable + [addition]
+        elif isinstance(old_variable, dict):
+            if not isinstance(addition, dict):
+                raise InvalidArguments('The += operator requires a dict on the right hand side if the variable on the left is a dict')
+            new_value = {**old_variable, **addition}
+        # Add other data types here.
+        else:
+            raise InvalidArguments('The += operator currently only works with arrays, dicts, strings or ints')
+        self.set_variable(varname, new_value)
+
+    def evaluate_indexing(self, node: mparser.IndexNode) -> TYPE_var:
+        assert(isinstance(node, mparser.IndexNode))
+        iobject = self.evaluate_statement(node.iobject)
+        if isinstance(iobject, Disabler):
+            return iobject
+        if not hasattr(iobject, '__getitem__'):
+            raise InterpreterException(
+                'Tried to index an object that doesn\'t support indexing.')
+        index = self.evaluate_statement(node.index)
+
+        if isinstance(iobject, dict):
+            if not isinstance(index, str):
+                raise InterpreterException('Key is not a string')
+            try:
+                return iobject[index]
+            except KeyError:
+                raise InterpreterException('Key %s is not in dict' % index)
+        else:
+            if not isinstance(index, int):
+                raise InterpreterException('Index value is not an integer.')
+            try:
+                # Ignore the MyPy error, since we don't know all indexable types here
+                # and we handle non indexable types with an exception
+                # TODO maybe find a better solution
+                return iobject[index]  # type: ignore
+            except IndexError:
+                # We are already checking for the existance of __getitem__, so this should be save
+                raise InterpreterException('Index %d out of bounds of array of size %d.' % (index, len(iobject)))  # type: ignore
+
+    def function_call(self, node: mparser.FunctionNode) -> T.Optional[TYPE_var]:
+        func_name = node.func_name
+        (posargs, kwargs) = self.reduce_arguments(node.args)
+        if is_disabled(posargs, kwargs) and func_name not in {'get_variable', 'set_variable', 'is_disabler'}:
+            return Disabler()
+        if func_name in self.funcs:
+            func = self.funcs[func_name]
+            func_args = posargs  # type: T.Any
+            if not getattr(func, 'no-args-flattening', False):
+                func_args = flatten(posargs)
+            return func(node, func_args, self.kwargs_string_keys(kwargs))
+        else:
+            self.unknown_function_called(func_name)
+            return None
+
+    def method_call(self, node: mparser.MethodNode) -> TYPE_var:
+        invokable = node.source_object
+        if isinstance(invokable, mparser.IdNode):
+            object_name = invokable.value
+            obj = self.get_variable(object_name)
+        else:
+            obj = self.evaluate_statement(invokable)
+        method_name = node.name
+        (args, kwargs) = self.reduce_arguments(node.args)
+        if is_disabled(args, kwargs):
+            return Disabler()
+        if isinstance(obj, str):
+            return self.string_method_call(obj, method_name, args, kwargs)
+        if isinstance(obj, bool):
+            return self.bool_method_call(obj, method_name, args, kwargs)
+        if isinstance(obj, int):
+            return self.int_method_call(obj, method_name, args, kwargs)
+        if isinstance(obj, list):
+            return self.array_method_call(obj, method_name, args, kwargs)
+        if isinstance(obj, dict):
+            return self.dict_method_call(obj, method_name, args, kwargs)
+        if isinstance(obj, mesonlib.File):
+            raise InvalidArguments('File object "%s" is not callable.' % obj)
+        if not isinstance(obj, InterpreterObject):
+            raise InvalidArguments('Variable "%s" is not callable.' % object_name)
+        # Special case. This is the only thing you can do with a disabler
+        # object. Every other use immediately returns the disabler object.
+        if isinstance(obj, Disabler):
+            if method_name == 'found':
+                return False
+            else:
+                return Disabler()
+        if method_name == 'extract_objects':
+            if not isinstance(obj, ObjectHolder):
+                raise InvalidArguments('Invalid operation "extract_objects" on variable "{}"'.format(object_name))
+            self.validate_extraction(obj.held_object)
+        obj.current_node = node
+        return obj.method_call(method_name, args, self.kwargs_string_keys(kwargs))
+
+    @builtinMethodNoKwargs
+    def bool_method_call(self, obj: bool, method_name: str, posargs: T.List[TYPE_nvar], kwargs: T.Dict[str, T.Any]) -> T.Union[str, int]:
+        if method_name == 'to_string':
+            if not posargs:
+                if obj:
+                    return 'true'
+                else:
+                    return 'false'
+            elif len(posargs) == 2 and isinstance(posargs[0], str) and isinstance(posargs[1], str):
+                if obj:
+                    return posargs[0]
+                else:
+                    return posargs[1]
+            else:
+                raise InterpreterException('bool.to_string() must have either no arguments or exactly two string arguments that signify what values to return for true and false.')
+        elif method_name == 'to_int':
+            if obj:
+                return 1
+            else:
+                return 0
+        else:
+            raise InterpreterException('Unknown method "%s" for a boolean.' % method_name)
+
+    @builtinMethodNoKwargs
+    def int_method_call(self, obj: int, method_name: str, posargs: T.List[TYPE_nvar], kwargs: T.Dict[str, T.Any]) -> T.Union[str, bool]:
+        if method_name == 'is_even':
+            if not posargs:
+                return obj % 2 == 0
+            else:
+                raise InterpreterException('int.is_even() must have no arguments.')
+        elif method_name == 'is_odd':
+            if not posargs:
+                return obj % 2 != 0
+            else:
+                raise InterpreterException('int.is_odd() must have no arguments.')
+        elif method_name == 'to_string':
+            if not posargs:
+                return str(obj)
+            else:
+                raise InterpreterException('int.to_string() must have no arguments.')
+        else:
+            raise InterpreterException('Unknown method "%s" for an integer.' % method_name)
+
+    @staticmethod
+    def _get_one_string_posarg(posargs: T.List[TYPE_nvar], method_name: str) -> str:
+        if len(posargs) > 1:
+            m = '{}() must have zero or one arguments'
+            raise InterpreterException(m.format(method_name))
+        elif len(posargs) == 1:
+            s = posargs[0]
+            if not isinstance(s, str):
+                m = '{}() argument must be a string'
+                raise InterpreterException(m.format(method_name))
+            return s
+        return None
+
+    @builtinMethodNoKwargs
+    def string_method_call(self, obj: str, method_name: str, posargs: T.List[TYPE_nvar], kwargs: T.Dict[str, T.Any]) -> T.Union[str, int, bool, T.List[str]]:
+        if method_name == 'strip':
+            s1 = self._get_one_string_posarg(posargs, 'strip')
+            if s1 is not None:
+                return obj.strip(s1)
+            return obj.strip()
+        elif method_name == 'format':
+            return self.format_string(obj, posargs)
+        elif method_name == 'to_upper':
+            return obj.upper()
+        elif method_name == 'to_lower':
+            return obj.lower()
+        elif method_name == 'underscorify':
+            return re.sub(r'[^a-zA-Z0-9]', '_', obj)
+        elif method_name == 'split':
+            s2 = self._get_one_string_posarg(posargs, 'split')
+            if s2 is not None:
+                return obj.split(s2)
+            return obj.split()
+        elif method_name == 'startswith' or method_name == 'contains' or method_name == 'endswith':
+            s3 = posargs[0]
+            if not isinstance(s3, str):
+                raise InterpreterException('Argument must be a string.')
+            if method_name == 'startswith':
+                return obj.startswith(s3)
+            elif method_name == 'contains':
+                return obj.find(s3) >= 0
+            return obj.endswith(s3)
+        elif method_name == 'to_int':
+            try:
+                return int(obj)
+            except Exception:
+                raise InterpreterException('String {!r} cannot be converted to int'.format(obj))
+        elif method_name == 'join':
+            if len(posargs) != 1:
+                raise InterpreterException('Join() takes exactly one argument.')
+            strlist = posargs[0]
+            check_stringlist(strlist)
+            assert isinstance(strlist, list)  # Required for mypy
+            return obj.join(strlist)
+        elif method_name == 'version_compare':
+            if len(posargs) != 1:
+                raise InterpreterException('Version_compare() takes exactly one argument.')
+            cmpr = posargs[0]
+            if not isinstance(cmpr, str):
+                raise InterpreterException('Version_compare() argument must be a string.')
+            return mesonlib.version_compare(obj, cmpr)
+        raise InterpreterException('Unknown method "%s" for a string.' % method_name)
+
+    def format_string(self, templ: str, args: T.List[TYPE_nvar]) -> str:
+        arg_strings = []
+        for arg in args:
+            if isinstance(arg, mparser.BaseNode):
+                arg = self.evaluate_statement(arg)
+            if isinstance(arg, bool): # Python boolean is upper case.
+                arg = str(arg).lower()
+            arg_strings.append(str(arg))
+
+        def arg_replace(match):
+            idx = int(match.group(1))
+            if idx >= len(arg_strings):
+                raise InterpreterException('Format placeholder @{}@ out of range.'.format(idx))
+            return arg_strings[idx]
+
+        return re.sub(r'@(\d+)@', arg_replace, templ)
+
+    def unknown_function_called(self, func_name: str) -> None:
+        raise InvalidCode('Unknown function "%s".' % func_name)
+
+    @builtinMethodNoKwargs
+    def array_method_call(self, obj: list, method_name: str, posargs: T.List[TYPE_nvar], kwargs: T.Dict[str, T.Any]) -> TYPE_var:
+        if method_name == 'contains':
+            def check_contains(el: list) -> bool:
+                if len(posargs) != 1:
+                    raise InterpreterException('Contains method takes exactly one argument.')
+                item = posargs[0]
+                for element in el:
+                    if isinstance(element, list):
+                        found = check_contains(element)
+                        if found:
+                            return True
+                    if element == item:
+                        return True
+                return False
+            return check_contains(obj)
+        elif method_name == 'length':
+            return len(obj)
+        elif method_name == 'get':
+            index = posargs[0]
+            fallback = None
+            if len(posargs) == 2:
+                fallback = posargs[1]
+            elif len(posargs) > 2:
+                m = 'Array method \'get()\' only takes two arguments: the ' \
+                    'index and an optional fallback value if the index is ' \
+                    'out of range.'
+                raise InvalidArguments(m)
+            if not isinstance(index, int):
+                raise InvalidArguments('Array index must be a number.')
+            if index < -len(obj) or index >= len(obj):
+                if fallback is None:
+                    m = 'Array index {!r} is out of bounds for array of size {!r}.'
+                    raise InvalidArguments(m.format(index, len(obj)))
+                if isinstance(fallback, mparser.BaseNode):
+                    return self.evaluate_statement(fallback)
+                return fallback
+            return obj[index]
+        m = 'Arrays do not have a method called {!r}.'
+        raise InterpreterException(m.format(method_name))
+
+    @builtinMethodNoKwargs
+    def dict_method_call(self, obj: dict, method_name: str, posargs: T.List[TYPE_nvar], kwargs: T.Dict[str, T.Any]) -> TYPE_var:
+        if method_name in ('has_key', 'get'):
+            if method_name == 'has_key':
+                if len(posargs) != 1:
+                    raise InterpreterException('has_key() takes exactly one argument.')
+            else:
+                if len(posargs) not in (1, 2):
+                    raise InterpreterException('get() takes one or two arguments.')
+
+            key = posargs[0]
+            if not isinstance(key, (str)):
+                raise InvalidArguments('Dictionary key must be a string.')
+
+            has_key = key in obj
+
+            if method_name == 'has_key':
+                return has_key
+
+            if has_key:
+                return obj[key]
+
+            if len(posargs) == 2:
+                fallback = posargs[1]
+                if isinstance(fallback, mparser.BaseNode):
+                    return self.evaluate_statement(fallback)
+                return fallback
+
+            raise InterpreterException('Key {!r} is not in the dictionary.'.format(key))
+
+        if method_name == 'keys':
+            if len(posargs) != 0:
+                raise InterpreterException('keys() takes no arguments.')
+            return list(obj.keys())
+
+        raise InterpreterException('Dictionaries do not have a method called "%s".' % method_name)
+
+    def reduce_arguments(self, args: mparser.ArgumentNode, resolve_key_nodes: bool = True) -> T.Tuple[T.List[TYPE_nvar], TYPE_nkwargs]:
+        assert(isinstance(args, mparser.ArgumentNode))
+        if args.incorrect_order():
+            raise InvalidArguments('All keyword arguments must be after positional arguments.')
+        self.argument_depth += 1
+        reduced_pos = [self.evaluate_statement(arg) for arg in args.arguments]  # type: T.List[TYPE_nvar]
+        reduced_kw = {}  # type: TYPE_nkwargs
+        for key, val in args.kwargs.items():
+            reduced_key = key  # type: T.Union[str, mparser.BaseNode]
+            reduced_val = val  # type: TYPE_nvar
+            if resolve_key_nodes and isinstance(key, mparser.IdNode):
+                assert isinstance(key.value, str)
+                reduced_key = key.value
+            if isinstance(reduced_val, mparser.BaseNode):
+                reduced_val = self.evaluate_statement(reduced_val)
+            reduced_kw[reduced_key] = reduced_val
+        self.argument_depth -= 1
+        final_kw = self.expand_default_kwargs(reduced_kw)
+        return reduced_pos, final_kw
+
+    def expand_default_kwargs(self, kwargs: TYPE_nkwargs) -> TYPE_nkwargs:
+        if 'kwargs' not in kwargs:
+            return kwargs
+        to_expand = kwargs.pop('kwargs')
+        if not isinstance(to_expand, dict):
+            raise InterpreterException('Value of "kwargs" must be dictionary.')
+        if 'kwargs' in to_expand:
+            raise InterpreterException('Kwargs argument must not contain a "kwargs" entry. Points for thinking meta, though. :P')
+        for k, v in to_expand.items():
+            if k in kwargs:
+                raise InterpreterException('Entry "{}" defined both as a keyword argument and in a "kwarg" entry.'.format(k))
+            kwargs[k] = v
+        return kwargs
+
+    def kwargs_string_keys(self, kwargs: TYPE_nkwargs) -> T.Dict[str, TYPE_nvar]:
+        kw = {}  # type: T.Dict[str, TYPE_nvar]
+        for key, val in kwargs.items():
+            if not isinstance(key, str):
+                raise InterpreterException('Key of kwargs is not a string')
+            kw[key] = val
+        return kw
+
+    def assignment(self, node: mparser.AssignmentNode) -> None:
+        assert(isinstance(node, mparser.AssignmentNode))
+        if self.argument_depth != 0:
+            raise InvalidArguments('''Tried to assign values inside an argument list.
+To specify a keyword argument, use : instead of =.''')
+        var_name = node.var_name
+        if not isinstance(var_name, str):
+            raise InvalidArguments('Tried to assign value to a non-variable.')
+        value = self.evaluate_statement(node.value)
+        if not self.is_assignable(value):
+            raise InvalidCode('Tried to assign an invalid value to variable.')
+        # For mutable objects we need to make a copy on assignment
+        if isinstance(value, MutableInterpreterObject):
+            value = copy.deepcopy(value)
+        self.set_variable(var_name, value)
+        return None
+
+    def set_variable(self, varname: str, variable: TYPE_var) -> None:
+        if variable is None:
+            raise InvalidCode('Can not assign None to variable.')
+        if not isinstance(varname, str):
+            raise InvalidCode('First argument to set_variable must be a string.')
+        if not self.is_assignable(variable):
+            raise InvalidCode('Assigned value not of assignable type.')
+        if re.match('[_a-zA-Z][_0-9a-zA-Z]*$', varname) is None:
+            raise InvalidCode('Invalid variable name: ' + varname)
+        if varname in self.builtin:
+            raise InvalidCode('Tried to overwrite internal variable "%s"' % varname)
+        self.variables[varname] = variable
+
+    def get_variable(self, varname) -> TYPE_var:
+        if varname in self.builtin:
+            return self.builtin[varname]
+        if varname in self.variables:
+            return self.variables[varname]
+        raise InvalidCode('Unknown variable "%s".' % varname)
+
+    def is_assignable(self, value: T.Any) -> bool:
+        return isinstance(value, (InterpreterObject, dependencies.Dependency,
+                                  str, int, list, dict, mesonlib.File))
+
+    def validate_extraction(self, buildtarget: InterpreterObject) -> None:
+        raise InterpreterException('validate_extraction is not implemented in this context (please file a bug)')
diff --git a/meson/mesonbuild/linkers.py b/meson/mesonbuild/linkers.py
new file mode 100644
index 000000000..fe1441ecb
--- /dev/null
+++ b/meson/mesonbuild/linkers.py
@@ -0,0 +1,1200 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import os
+import typing as T
+
+from . import mesonlib
+from .arglist import CompilerArgs
+from .envconfig import get_env_var
+
+if T.TYPE_CHECKING:
+    from .coredata import OptionDictType
+    from .environment import Environment
+
+
+class StaticLinker:
+
+    def __init__(self, exelist: T.List[str]):
+        self.exelist = exelist
+
+    def compiler_args(self, args: T.Optional[T.Iterable[str]] = None) -> CompilerArgs:
+        return CompilerArgs(self, args)
+
+    def can_linker_accept_rsp(self) -> bool:
+        """
+        Determines whether the linker can accept arguments using the @rsp syntax.
+        """
+        return mesonlib.is_windows()
+
+    def get_base_link_args(self, options: 'OptionDictType') -> T.List[str]:
+        """Like compilers.get_base_link_args, but for the static linker."""
+        return []
+
+    def get_exelist(self) -> T.List[str]:
+        return self.exelist.copy()
+
+    def get_std_link_args(self) -> T.List[str]:
+        return []
+
+    def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]:
+        return []
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return[]
+
+    def get_coverage_link_args(self) -> T.List[str]:
+        return []
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        return ([], set())
+
+    def thread_link_flags(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    def openmp_flags(self) -> T.List[str]:
+        return []
+
+    def get_option_link_args(self, options: 'OptionDictType') -> T.List[str]:
+        return []
+
+    @classmethod
+    def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]:
+        return args[:]
+
+    @classmethod
+    def native_args_to_unix(cls, args: T.List[str]) -> T.List[str]:
+        return args[:]
+
+    def get_link_debugfile_name(self, targetfile: str) -> str:
+        return None
+
+    def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
+        # Static libraries do not have PDB files
+        return []
+
+    def get_always_args(self) -> T.List[str]:
+        return []
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return []
+
+
+class VisualStudioLikeLinker:
+    always_args = ['/NOLOGO']
+
+    def __init__(self, machine: str):
+        self.machine = machine
+
+    def get_always_args(self) -> T.List[str]:
+        return self.always_args.copy()
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return self.always_args.copy()
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        args = []  # type: T.List[str]
+        if self.machine:
+            args += ['/MACHINE:' + self.machine]
+        args += ['/OUT:' + target]
+        return args
+
+    @classmethod
+    def unix_args_to_native(cls, args: T.List[str]) -> T.List[str]:
+        from .compilers import VisualStudioCCompiler
+        return VisualStudioCCompiler.unix_args_to_native(args)
+
+    @classmethod
+    def native_args_to_unix(cls, args: T.List[str]) -> T.List[str]:
+        from .compilers import VisualStudioCCompiler
+        return VisualStudioCCompiler.native_args_to_unix(args)
+
+
+class VisualStudioLinker(VisualStudioLikeLinker, StaticLinker):
+
+    """Microsoft's lib static linker."""
+
+    def __init__(self, exelist: T.List[str], machine: str):
+        StaticLinker.__init__(self, exelist)
+        VisualStudioLikeLinker.__init__(self, machine)
+
+
+class IntelVisualStudioLinker(VisualStudioLikeLinker, StaticLinker):
+
+    """Intel's xilib static linker."""
+
+    def __init__(self, exelist: T.List[str], machine: str):
+        StaticLinker.__init__(self, exelist)
+        VisualStudioLikeLinker.__init__(self, machine)
+
+
+class ArLinker(StaticLinker):
+
+    def __init__(self, exelist: T.List[str]):
+        super().__init__(exelist)
+        self.id = 'ar'
+        pc, stdo = mesonlib.Popen_safe(self.exelist + ['-h'])[0:2]
+        # Enable deterministic builds if they are available.
+        if '[D]' in stdo:
+            self.std_args = ['csrD']
+        else:
+            self.std_args = ['csr']
+        self.can_rsp = '@<' in stdo
+
+    def can_linker_accept_rsp(self) -> bool:
+        return self.can_rsp
+
+    def get_std_link_args(self) -> T.List[str]:
+        return self.std_args
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return [target]
+
+
+class ArmarLinker(ArLinker):  # lgtm [py/missing-call-to-init]
+
+    def __init__(self, exelist: T.List[str]):
+        StaticLinker.__init__(self, exelist)
+        self.id = 'armar'
+        self.std_args = ['-csr']
+
+    def can_linker_accept_rsp(self) -> bool:
+        # armar can't accept arguments using the @rsp syntax
+        return False
+
+
+class DLinker(StaticLinker):
+    def __init__(self, exelist: T.List[str], arch: str):
+        super().__init__(exelist)
+        self.id = exelist[0]
+        self.arch = arch
+
+    def get_std_link_args(self) -> T.List[str]:
+        return ['-lib']
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['-of=' + target]
+
+    def get_linker_always_args(self) -> T.List[str]:
+        if mesonlib.is_windows():
+            if self.arch == 'x86_64':
+                return ['-m64']
+            elif self.arch == 'x86_mscoff' and self.id == 'dmd':
+                return ['-m32mscoff']
+            return ['-m32']
+        return []
+
+
+class CcrxLinker(StaticLinker):
+
+    def __init__(self, exelist: T.List[str]):
+        super().__init__(exelist)
+        self.id = 'rlink'
+
+    def can_linker_accept_rsp(self) -> bool:
+        return False
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['-output={}'.format(target)]
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return ['-nologo', '-form=library']
+
+
+class Xc16Linker(StaticLinker):
+
+    def __init__(self, exelist: T.List[str]):
+        super().__init__(exelist)
+        self.id = 'xc16-ar'
+
+    def can_linker_accept_rsp(self) -> bool:
+        return False
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['{}'.format(target)]
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return ['rcs']
+
+
+class C2000Linker(StaticLinker):
+
+    def __init__(self, exelist: T.List[str]):
+        super().__init__(exelist)
+        self.id = 'ar2000'
+
+    def can_linker_accept_rsp(self) -> bool:
+        return False
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['{}'.format(target)]
+
+    def get_linker_always_args(self) -> T.List[str]:
+        return ['-r']
+
+
+def prepare_rpaths(raw_rpaths: str, build_dir: str, from_dir: str) -> T.List[str]:
+    # The rpaths we write must be relative if they point to the build dir,
+    # because otherwise they have different length depending on the build
+    # directory. This breaks reproducible builds.
+    internal_format_rpaths = [evaluate_rpath(p, build_dir, from_dir) for p in raw_rpaths]
+    ordered_rpaths = order_rpaths(internal_format_rpaths)
+    return ordered_rpaths
+
+
+def order_rpaths(rpath_list: T.List[str]) -> T.List[str]:
+    # We want rpaths that point inside our build dir to always override
+    # those pointing to other places in the file system. This is so built
+    # binaries prefer our libraries to the ones that may lie somewhere
+    # in the file system, such as /lib/x86_64-linux-gnu.
+    #
+    # The correct thing to do here would be C++'s std::stable_partition.
+    # Python standard library does not have it, so replicate it with
+    # sort, which is guaranteed to be stable.
+    return sorted(rpath_list, key=os.path.isabs)
+
+
+def evaluate_rpath(p: str, build_dir: str, from_dir: str) -> str:
+    if p == from_dir:
+        return '' # relpath errors out in this case
+    elif os.path.isabs(p):
+        return p # These can be outside of build dir.
+    else:
+        return os.path.relpath(os.path.join(build_dir, p), os.path.join(build_dir, from_dir))
+
+
+class LinkerEnvVarsMixin(metaclass=abc.ABCMeta):
+
+    """Mixin reading LDFLAGS from the environment."""
+
+    @staticmethod
+    def get_args_from_envvars(for_machine: mesonlib.MachineChoice,
+                              is_cross: bool) -> T.List[str]:
+        raw_value = get_env_var(for_machine, is_cross, 'LDFLAGS')
+        if raw_value is not None:
+            return mesonlib.split_args(raw_value)
+        else:
+            return []
+
+class DynamicLinker(LinkerEnvVarsMixin, metaclass=abc.ABCMeta):
+
+    """Base class for dynamic linkers."""
+
+    _BUILDTYPE_ARGS = {
+        'plain': [],
+        'debug': [],
+        'debugoptimized': [],
+        'release': [],
+        'minsize': [],
+        'custom': [],
+    }  # type: T.Dict[str, T.List[str]]
+
+    def _apply_prefix(self, arg: T.Union[str, T.List[str]]) -> T.List[str]:
+        args = [arg] if isinstance(arg, str) else arg
+        if self.prefix_arg is None:
+            return args
+        elif isinstance(self.prefix_arg, str):
+            return [self.prefix_arg + arg for arg in args]
+        ret = []
+        for arg in args:
+            ret += self.prefix_arg + [arg]
+        return ret
+
+    def __init__(self, id_: str, exelist: T.List[str],
+                 for_machine: mesonlib.MachineChoice, prefix_arg: T.Union[str, T.List[str]],
+                 always_args: T.List[str], *, version: str = 'unknown version'):
+        self.exelist = exelist
+        self.for_machine = for_machine
+        self.version = version
+        self.id = id_
+        self.prefix_arg = prefix_arg
+        self.always_args = always_args
+
+    def __repr__(self) -> str:
+        return '<{}: v{} `{}`>'.format(type(self).__name__, self.version, ' '.join(self.exelist))
+
+    def get_id(self) -> str:
+        return self.id
+
+    def get_version_string(self) -> str:
+        return '({} {})'.format(self.id, self.version)
+
+    def get_exelist(self) -> T.List[str]:
+        return self.exelist.copy()
+
+    def get_accepts_rsp(self) -> bool:
+        # rsp files are only used when building on Windows because we want to
+        # avoid issues with quoting and max argument length
+        return mesonlib.is_windows()
+
+    def get_always_args(self) -> T.List[str]:
+        return self.always_args.copy()
+
+    def get_lib_prefix(self) -> str:
+        return ''
+
+    # XXX: is use_ldflags a compiler or a linker attribute?
+
+    def get_option_args(self, options: 'OptionDictType') -> T.List[str]:
+        return []
+
+    def has_multi_arguments(self, args: T.List[str], env: 'Environment') -> T.Tuple[bool, bool]:
+        m = 'Language {} does not support has_multi_link_arguments.'
+        raise mesonlib.EnvironmentException(m.format(self.id))
+
+    def get_debugfile_name(self, targetfile: str) -> str:
+        '''Name of debug file written out (see below)'''
+        return None
+
+    def get_debugfile_args(self, targetfile: str) -> T.List[str]:
+        """Some compilers (MSVC) write debug into a separate file.
+
+        This method takes the target object path and returns a list of
+        commands to append to the linker invocation to control where that
+        file is written.
+        """
+        return []
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return []
+
+    def get_std_shared_module_args(self, options: 'OptionDictType') -> T.List[str]:
+        return self.get_std_shared_lib_args()
+
+    def get_pie_args(self) -> T.List[str]:
+        # TODO: this really needs to take a boolean and return the args to
+        # disable pie, otherwise it only acts to enable pie if pie *isn't* the
+        # default.
+        m = 'Linker {} does not support position-independent executable'
+        raise mesonlib.EnvironmentException(m.format(self.id))
+
+    def get_lto_args(self) -> T.List[str]:
+        return []
+
+    def sanitizer_args(self, value: str) -> T.List[str]:
+        return []
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        # We can override these in children by just overriding the
+        # _BUILDTYPE_ARGS value.
+        return self._BUILDTYPE_ARGS[buildtype]
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return []
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        raise mesonlib.EnvironmentException(
+            'Linker {} does not support link_whole'.format(self.id))
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        raise mesonlib.EnvironmentException(
+            'Linker {} does not support allow undefined'.format(self.id))
+
+    @abc.abstractmethod
+    def get_output_args(self, outname: str) -> T.List[str]:
+        pass
+
+    def get_coverage_args(self) -> T.List[str]:
+        m = "Linker {} doesn't implement coverage data generation.".format(self.id)
+        raise mesonlib.EnvironmentException(m)
+
+    @abc.abstractmethod
+    def get_search_args(self, dirname: str) -> T.List[str]:
+        pass
+
+    def export_dynamic_args(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    def import_library_args(self, implibname: str) -> T.List[str]:
+        """The name of the outputted import library.
+
+        This implementation is used only on Windows by compilers that use GNU ld
+        """
+        return []
+
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        return []
+
+    def no_undefined_args(self) -> T.List[str]:
+        """Arguments to error if there are any undefined symbols at link time.
+
+        This is the inverse of get_allow_undefined_args().
+
+        TODO: A future cleanup might merge this and
+              get_allow_undefined_args() into a single method taking a
+              boolean
+        """
+        return []
+
+    def fatal_warnings(self) -> T.List[str]:
+        """Arguments to make all warnings errors."""
+        return []
+
+    def headerpad_args(self) -> T.List[str]:
+        # Only used by the Apple linker
+        return []
+
+    def bitcode_args(self) -> T.List[str]:
+        raise mesonlib.MesonException('This linker does not support bitcode bundles')
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        return ([], set())
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
+
+
+class PosixDynamicLinkerMixin:
+
+    """Mixin class for POSIX-ish linkers.
+
+    This is obviously a pretty small subset of the linker interface, but
+    enough dynamic linkers that meson supports are POSIX-like but not
+    GNU-like that it makes sense to split this out.
+    """
+
+    def get_output_args(self, outname: str) -> T.List[str]:
+        return ['-o', outname]
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return ['-shared']
+
+    def get_search_args(self, dirname: str) -> T.List[str]:
+        return ['-L' + dirname]
+
+
+class GnuLikeDynamicLinkerMixin:
+
+    """Mixin class for dynamic linkers that provides gnu-like interface.
+
+    This acts as a base for the GNU linkers (bfd and gold), LLVM's lld, and
+    other linkers like GNU-ld.
+    """
+
+    _BUILDTYPE_ARGS = {
+        'plain': [],
+        'debug': [],
+        'debugoptimized': [],
+        'release': ['-O1'],
+        'minsize': [],
+        'custom': [],
+    }  # type: T.Dict[str, T.List[str]]
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        # We can override these in children by just overriding the
+        # _BUILDTYPE_ARGS value.
+        return mesonlib.listify([self._apply_prefix(a) for a in self._BUILDTYPE_ARGS[buildtype]])
+
+    def get_pie_args(self) -> T.List[str]:
+        return ['-pie']
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return self._apply_prefix('--as-needed')
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        if not args:
+            return args
+        return self._apply_prefix('--whole-archive') + args + self._apply_prefix('--no-whole-archive')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return self._apply_prefix('--allow-shlib-undefined')
+
+    def get_lto_args(self) -> T.List[str]:
+        return ['-flto']
+
+    def sanitizer_args(self, value: str) -> T.List[str]:
+        if value == 'none':
+            return []
+        return ['-fsanitize=' + value]
+
+    def get_coverage_args(self) -> T.List[str]:
+        return ['--coverage']
+
+    def export_dynamic_args(self, env: 'Environment') -> T.List[str]:
+        m = env.machines[self.for_machine]
+        if m.is_windows() or m.is_cygwin():
+            return self._apply_prefix('--export-all-symbols')
+        return self._apply_prefix('-export-dynamic')
+
+    def import_library_args(self, implibname: str) -> T.List[str]:
+        return self._apply_prefix('--out-implib=' + implibname)
+
+    def thread_flags(self, env: 'Environment') -> T.List[str]:
+        if env.machines[self.for_machine].is_haiku():
+            return []
+        return ['-pthread']
+
+    def no_undefined_args(self) -> T.List[str]:
+        return self._apply_prefix('--no-undefined')
+
+    def fatal_warnings(self) -> T.List[str]:
+        return self._apply_prefix('--fatal-warnings')
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        m = env.machines[self.for_machine]
+        if m.is_windows() or m.is_cygwin():
+            # For PE/COFF the soname argument has no effect
+            return []
+        sostr = '' if soversion is None else '.' + soversion
+        return self._apply_prefix('-soname,{}{}.{}{}'.format(prefix, shlib_name, suffix, sostr))
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        m = env.machines[self.for_machine]
+        if m.is_windows() or m.is_cygwin():
+            return ([], set())
+        if not rpath_paths and not install_rpath and not build_rpath:
+            return ([], set())
+        args = []
+        origin_placeholder = '$ORIGIN'
+        processed_rpaths = prepare_rpaths(rpath_paths, build_dir, from_dir)
+        # Need to deduplicate rpaths, as macOS's install_name_tool
+        # is *very* allergic to duplicate -delete_rpath arguments
+        # when calling depfixer on installation.
+        all_paths = mesonlib.OrderedSet([os.path.join(origin_placeholder, p) for p in processed_rpaths])
+        rpath_dirs_to_remove = set()
+        for p in all_paths:
+            rpath_dirs_to_remove.add(p.encode('utf8'))
+        # Build_rpath is used as-is (it is usually absolute).
+        if build_rpath != '':
+            all_paths.add(build_rpath)
+            for p in build_rpath.split(':'):
+                rpath_dirs_to_remove.add(p.encode('utf8'))
+
+        # TODO: should this actually be "for (dragonfly|open)bsd"?
+        if mesonlib.is_dragonflybsd() or mesonlib.is_openbsd():
+            # This argument instructs the compiler to record the value of
+            # ORIGIN in the .dynamic section of the elf. On Linux this is done
+            # by default, but is not on dragonfly/openbsd for some reason. Without this
+            # $ORIGIN in the runtime path will be undefined and any binaries
+            # linked against local libraries will fail to resolve them.
+            args.extend(self._apply_prefix('-z,origin'))
+
+        # In order to avoid relinking for RPATH removal, the binary needs to contain just
+        # enough space in the ELF header to hold the final installation RPATH.
+        paths = ':'.join(all_paths)
+        if len(paths) < len(install_rpath):
+            padding = 'X' * (len(install_rpath) - len(paths))
+            if not paths:
+                paths = padding
+            else:
+                paths = paths + ':' + padding
+        args.extend(self._apply_prefix('-rpath,' + paths))
+
+        # TODO: should this actually be "for solaris/sunos"?
+        if mesonlib.is_sunos():
+            return (args, rpath_dirs_to_remove)
+
+        # Rpaths to use while linking must be absolute. These are not
+        # written to the binary. Needed only with GNU ld:
+        # https://sourceware.org/bugzilla/show_bug.cgi?id=16936
+        # Not needed on Windows or other platforms that don't use RPATH
+        # https://github.com/mesonbuild/meson/issues/1897
+        #
+        # In addition, this linker option tends to be quite long and some
+        # compilers have trouble dealing with it. That's why we will include
+        # one option per folder, like this:
+        #
+        #   -Wl,-rpath-link,/path/to/folder1 -Wl,-rpath,/path/to/folder2 ...
+        #
+        # ...instead of just one single looooong option, like this:
+        #
+        #   -Wl,-rpath-link,/path/to/folder1:/path/to/folder2:...
+        for p in rpath_paths:
+            args.extend(self._apply_prefix('-rpath-link,' + os.path.join(build_dir, p)))
+
+        return (args, rpath_dirs_to_remove)
+
+
+class AppleDynamicLinker(PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Apple's ld implementation."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld64', *args, **kwargs)
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return self._apply_prefix('-dead_strip_dylibs')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return self._apply_prefix('-undefined,dynamic_lookup')
+
+    def get_std_shared_module_args(self, options: 'OptionDictType') -> T.List[str]:
+        return ['-bundle'] + self._apply_prefix('-undefined,dynamic_lookup')
+
+    def get_pie_args(self) -> T.List[str]:
+        return ['-pie']
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        result = []  # type: T.List[str]
+        for a in args:
+            result.extend(self._apply_prefix('-force_load'))
+            result.append(a)
+        return result
+
+    def get_coverage_args(self) -> T.List[str]:
+        return ['--coverage']
+
+    def sanitizer_args(self, value: str) -> T.List[str]:
+        if value == 'none':
+            return []
+        return ['-fsanitize=' + value]
+
+    def no_undefined_args(self) -> T.List[str]:
+        return self._apply_prefix('-undefined,error')
+
+    def headerpad_args(self) -> T.List[str]:
+        return self._apply_prefix('-headerpad_max_install_names')
+
+    def bitcode_args(self) -> T.List[str]:
+        return self._apply_prefix('-bitcode_bundle')
+
+    def fatal_warnings(self) -> T.List[str]:
+        return self._apply_prefix('-fatal_warnings')
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        if is_shared_module:
+            return []
+        install_name = ['@rpath/', prefix, shlib_name]
+        if soversion is not None:
+            install_name.append('.' + soversion)
+        install_name.append('.dylib')
+        args = ['-install_name', ''.join(install_name)]
+        if darwin_versions:
+            args.extend(['-compatibility_version', darwin_versions[0],
+                         '-current_version', darwin_versions[1]])
+        return args
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        if not rpath_paths and not install_rpath and not build_rpath:
+            return ([], set())
+        args = []
+        # @loader_path is the equivalent of $ORIGIN on macOS
+        # https://stackoverflow.com/q/26280738
+        origin_placeholder = '@loader_path'
+        processed_rpaths = prepare_rpaths(rpath_paths, build_dir, from_dir)
+        all_paths = mesonlib.OrderedSet([os.path.join(origin_placeholder, p) for p in processed_rpaths])
+        if build_rpath != '':
+            all_paths.add(build_rpath)
+        for rp in all_paths:
+            args.extend(self._apply_prefix('-rpath,' + rp))
+
+        return (args, set())
+
+
+class GnuDynamicLinker(GnuLikeDynamicLinkerMixin, PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Representation of GNU ld.bfd and ld.gold."""
+
+    def get_accepts_rsp(self) -> bool:
+        return True;
+
+
+class GnuGoldDynamicLinker(GnuDynamicLinker):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld.gold', *args, **kwargs)
+
+
+class GnuBFDDynamicLinker(GnuDynamicLinker):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld.bfd', *args, **kwargs)
+
+
+class LLVMDynamicLinker(GnuLikeDynamicLinkerMixin, PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Representation of LLVM's ld.lld linker.
+
+    This is only the gnu-like linker, not the apple like or link.exe like
+    linkers.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld.lld', *args, **kwargs)
+
+        # Some targets don't seem to support this argument (windows, wasm, ...)
+        _, _, e = mesonlib.Popen_safe(self.exelist + self._apply_prefix('--allow-shlib-undefined'))
+        self.has_allow_shlib_undefined = not ('unknown argument: --allow-shlib-undefined' in e)
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        if self.has_allow_shlib_undefined:
+            return self._apply_prefix('--allow-shlib-undefined')
+        return []
+
+
+class WASMDynamicLinker(GnuLikeDynamicLinkerMixin, PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Emscripten's wasm-ld."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld.wasm', *args, **kwargs)
+
+    def thread_link_flags(self, env: 'Environment') -> T.List[str]:
+        args = ['-s', 'USE_PTHREADS=1']
+        count = env.coredata.compiler_options[self.for_machine]['{}_thread_count'.format(self.language)].value  # type: int
+        if count:
+            args.extend(['-s', 'PTHREAD_POOL_SIZE={}'.format(count)])
+        return args
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return ['-s', 'ERROR_ON_UNDEFINED_SYMBOLS=0']
+
+    def no_undefined_args(self) -> T.List[str]:
+        return ['-s', 'ERROR_ON_UNDEFINED_SYMBOLS=1']
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        raise mesonlib.MesonException('{} does not support shared libraries.'.format(self.id))
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return []
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        return ([], set())
+
+
+class CcrxDynamicLinker(DynamicLinker):
+
+    """Linker for Renesis CCrx compiler."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice,
+                 *, version: str = 'unknown version'):
+        super().__init__('rlink', ['rlink.exe'], for_machine, '', [],
+                         version=version)
+
+    def get_accepts_rsp(self) -> bool:
+        return False
+
+    def get_lib_prefix(self) -> str:
+        return '-lib='
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return []
+
+    def get_output_args(self, outputname: str) -> T.List[str]:
+        return ['-output={}'.format(outputname)]
+
+    def get_search_args(self, dirname: str) -> 'T.NoReturn':
+        raise EnvironmentError('rlink.exe does not have a search dir argument')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
+
+
+class Xc16DynamicLinker(DynamicLinker):
+
+    """Linker for Microchip XC16 compiler."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice,
+                 *, version: str = 'unknown version'):
+        super().__init__('xc16-gcc', ['xc16-gcc.exe'], for_machine, '', [],
+                         version=version)
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        if not args:
+            return args
+        return self._apply_prefix('--start-group') + args + self._apply_prefix('--end-group')
+
+    def get_accepts_rsp(self) -> bool:
+        return False
+
+    def get_lib_prefix(self) -> str:
+        return ''
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return []
+
+    def get_output_args(self, outputname: str) -> T.List[str]:
+        return ['-o{}'.format(outputname)]
+
+    def get_search_args(self, dirname: str) -> 'T.NoReturn':
+        raise EnvironmentError('xc16-gcc.exe does not have a search dir argument')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        return ([], set())
+
+
+class C2000DynamicLinker(DynamicLinker):
+
+    """Linker for Texas Instruments C2000 compiler."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice,
+                 *, version: str = 'unknown version'):
+        super().__init__('cl2000', ['cl2000.exe'], for_machine, '', [],
+                         version=version)
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        if not args:
+            return args
+        return self._apply_prefix('--start-group') + args + self._apply_prefix('--end-group')
+
+    def get_accepts_rsp(self) -> bool:
+        return False
+
+    def get_lib_prefix(self) -> str:
+        return '-l='
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return []
+
+    def get_output_args(self, outputname: str) -> T.List[str]:
+        return ['-z', '--output_file={}'.format(outputname)]
+
+    def get_search_args(self, dirname: str) -> 'T.NoReturn':
+        raise EnvironmentError('cl2000.exe does not have a search dir argument')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_always_args(self) -> T.List[str]:
+        return []
+
+
+class ArmDynamicLinker(PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Linker for the ARM compiler."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice,
+                 *, version: str = 'unknown version'):
+        super().__init__('armlink', ['armlink'], for_machine, '', [],
+                         version=version)
+
+    def get_accepts_rsp(self) -> bool:
+        return False
+
+    def get_std_shared_lib_args(self) -> 'T.NoReturn':
+        raise mesonlib.MesonException('The Arm Linkers do not support shared libraries')
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+
+class ArmClangDynamicLinker(ArmDynamicLinker):
+
+    """Linker used with ARM's clang fork.
+
+    The interface is similar enough to the old ARM ld that it inherits and
+    extends a few things as needed.
+    """
+
+    def export_dynamic_args(self, env: 'Environment') -> T.List[str]:
+        return ['--export_dynamic']
+
+    def import_library_args(self, implibname: str) -> T.List[str]:
+        return ['--symdefs=' + implibname]
+
+
+class PGIDynamicLinker(PosixDynamicLinkerMixin, DynamicLinker):
+
+    """PGI linker."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('pgi', *args, **kwargs)
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        # PGI -shared is Linux only.
+        if mesonlib.is_windows():
+            return ['-Bdynamic', '-Mmakedll']
+        elif mesonlib.is_linux():
+            return ['-shared']
+        return []
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        if not env.machines[self.for_machine].is_windows():
+            return (['-R' + os.path.join(build_dir, p) for p in rpath_paths], set())
+        return ([], set())
+
+
+class PGIStaticLinker(StaticLinker):
+    def __init__(self, exelist: T.List[str]):
+        super().__init__(exelist)
+        self.id = 'ar'
+        self.std_args = ['-r']
+
+    def get_std_link_args(self) -> T.List[str]:
+        return self.std_args
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return [target]
+
+
+class VisualStudioLikeLinkerMixin:
+
+    _BUILDTYPE_ARGS = {
+        'plain': [],
+        'debug': [],
+        'debugoptimized': [],
+        # The otherwise implicit REF and ICF linker optimisations are disabled by
+        # /DEBUG. REF implies ICF.
+        'release': ['/OPT:REF'],
+        'minsize': ['/INCREMENTAL:NO', '/OPT:REF'],
+        'custom': [],
+    }  # type: T.Dict[str, T.List[str]]
+
+    def __init__(self, *args, direct: bool = True, machine: str = 'x86', **kwargs):
+        super().__init__(*args, **kwargs)
+        self.machine = machine
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        return mesonlib.listify([self._apply_prefix(a) for a in self._BUILDTYPE_ARGS[buildtype]])
+
+    def invoked_by_compiler(self) -> bool:
+        return not self.direct
+
+    def get_output_args(self, outputname: str) -> T.List[str]:
+        return self._apply_prefix(['/MACHINE:' + self.machine, '/OUT:' + outputname])
+
+    def get_always_args(self) -> T.List[str]:
+        return self._apply_prefix('/nologo') + super().get_always_args()
+
+    def get_search_args(self, dirname: str) -> T.List[str]:
+        return self._apply_prefix('/LIBPATH:' + dirname)
+
+    def get_std_shared_lib_args(self) -> T.List[str]:
+        return self._apply_prefix('/DLL')
+
+    def get_debugfile_name(self, targetfile: str) -> str:
+        basename = targetfile.rsplit('.', maxsplit=1)[0]
+        return basename + '.pdb'
+
+    def get_debugfile_args(self, targetfile: str) -> T.List[str]:
+        return self._apply_prefix(['/DEBUG', '/PDB:' + self.get_debugfile_name(targetfile)])
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        # Only since VS2015
+        args = mesonlib.listify(args)
+        l = []  # T.List[str]
+        for a in args:
+            l.extend(self._apply_prefix('/WHOLEARCHIVE:' + a))
+        return l
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
+
+    def import_library_args(self, implibname: str) -> T.List[str]:
+        """The command to generate the import library."""
+        return self._apply_prefix(['/IMPLIB:' + implibname])
+
+
+class MSVCDynamicLinker(VisualStudioLikeLinkerMixin, DynamicLinker):
+
+    """Microsoft's Link.exe."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice, always_args: T.List[str], *,
+                 exelist: T.Optional[T.List[str]] = None,
+                 prefix: T.Union[str, T.List[str]] = '',
+                 machine: str = 'x86', version: str = 'unknown version',
+                 direct: bool = True):
+        super().__init__('link', exelist or ['link.exe'], for_machine,
+                         prefix, always_args, machine=machine, version=version, direct=direct)
+
+    def get_always_args(self) -> T.List[str]:
+        return self._apply_prefix(['/nologo', '/release']) + super().get_always_args()
+
+
+class ClangClDynamicLinker(VisualStudioLikeLinkerMixin, DynamicLinker):
+
+    """Clang's lld-link.exe."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice, always_args: T.List[str], *,
+                 exelist: T.Optional[T.List[str]] = None,
+                 prefix: T.Union[str, T.List[str]] = '',
+                 machine: str = 'x86', version: str = 'unknown version',
+                 direct: bool = True):
+        super().__init__('lld-link', exelist or ['lld-link.exe'], for_machine,
+                         prefix, always_args, machine=machine, version=version, direct=direct)
+
+
+class XilinkDynamicLinker(VisualStudioLikeLinkerMixin, DynamicLinker):
+
+    """Intel's Xilink.exe."""
+
+    def __init__(self, for_machine: mesonlib.MachineChoice, always_args: T.List[str],
+                 *, version: str = 'unknown version'):
+        super().__init__('xilink', ['xilink.exe'], for_machine, '', always_args, version=version)
+
+
+class SolarisDynamicLinker(PosixDynamicLinkerMixin, DynamicLinker):
+
+    """Sys-V derived linker used on Solaris and OpenSolaris."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('ld.solaris', *args, **kwargs)
+
+    def get_link_whole_for(self, args: T.List[str]) -> T.List[str]:
+        if not args:
+            return args
+        return self._apply_prefix('--whole-archive') + args + self._apply_prefix('--no-whole-archive')
+
+    def get_pie_args(self) -> T.List[str]:
+        # Available in Solaris 11.2 and later
+        return ['-z', 'type=pie']
+
+    def get_asneeded_args(self) -> T.List[str]:
+        return self._apply_prefix(['-z', 'ignore'])
+
+    def no_undefined_args(self) -> T.List[str]:
+        return ['-z', 'defs']
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return ['-z', 'nodefs']
+
+    def fatal_warnings(self) -> T.List[str]:
+        return ['-z', 'fatal-warnings']
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        if not rpath_paths and not install_rpath and not build_rpath:
+            return ([], set())
+        processed_rpaths = prepare_rpaths(rpath_paths, build_dir, from_dir)
+        all_paths = mesonlib.OrderedSet([os.path.join('$ORIGIN', p) for p in processed_rpaths])
+        if build_rpath != '':
+            all_paths.add(build_rpath)
+
+        # In order to avoid relinking for RPATH removal, the binary needs to contain just
+        # enough space in the ELF header to hold the final installation RPATH.
+        paths = ':'.join(all_paths)
+        if len(paths) < len(install_rpath):
+            padding = 'X' * (len(install_rpath) - len(paths))
+            if not paths:
+                paths = padding
+            else:
+                paths = paths + ':' + padding
+        return (self._apply_prefix('-rpath,{}'.format(paths)), set())
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        sostr = '' if soversion is None else '.' + soversion
+        return self._apply_prefix('-soname,{}{}.{}{}'.format(prefix, shlib_name, suffix, sostr))
+
+
+class OptlinkDynamicLinker(VisualStudioLikeLinkerMixin, DynamicLinker):
+
+    """Digital Mars dynamic linker for windows."""
+
+    def __init__(self, exelist: T.List[str], for_machine: mesonlib.MachineChoice,
+                 *, version: str = 'unknown version'):
+        # Use optlink instead of link so we don't interfer with other link.exe
+        # implementations.
+        super().__init__('optlink', exelist, for_machine, '', [], version=version)
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_debugfile_args(self, targetfile: str) -> T.List[str]:
+        # Optlink does not generate pdb files.
+        return []
+
+    def get_always_args(self) -> T.List[str]:
+        return []
+
+
+class CudaLinker(PosixDynamicLinkerMixin, DynamicLinker):
+    """Cuda linker (nvlink)"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('nvlink', *args, **kwargs)
+
+    @staticmethod
+    def parse_version():
+        version_cmd = ['nvlink', '--version']
+        try:
+            _, out, _ = mesonlib.Popen_safe(version_cmd)
+        except OSError:
+            return 'unknown version'
+        # Output example:
+        # nvlink: NVIDIA (R) Cuda linker
+        # Copyright (c) 2005-2018 NVIDIA Corporation
+        # Built on Sun_Sep_30_21:09:22_CDT_2018
+        # Cuda compilation tools, release 10.0, V10.0.166
+        # we need the most verbose version output. Luckily starting with V
+        return out.strip().split('V')[-1]
+
+    def get_accepts_rsp(self) -> bool:
+        # nvcc does not support response files
+        return False
+
+    def get_lib_prefix(self) -> str:
+        if not mesonlib.is_windows():
+            return ''
+        # nvcc doesn't recognize Meson's default .a extension for static libraries on
+        # Windows and passes it to cl as an object file, resulting in 'warning D9024 :
+        # unrecognized source file type 'xxx.a', object file assumed'.
+        #
+        # nvcc's --library= option doesn't help: it takes the library name without the
+        # extension and assumes that the extension on Windows is .lib; prefixing the
+        # library with -Xlinker= seems to work.
+        from .compilers import CudaCompiler
+        return CudaCompiler.LINKER_PREFIX
+
+    def fatal_warnings(self) -> T.List[str]:
+        return ['--warning-as-error']
+
+    def get_allow_undefined_args(self) -> T.List[str]:
+        return []
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str, darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return []
diff --git a/meson/mesonbuild/mcompile.py b/meson/mesonbuild/mcompile.py
new file mode 100644
index 000000000..8e2a38fbf
--- /dev/null
+++ b/meson/mesonbuild/mcompile.py
@@ -0,0 +1,304 @@
+# Copyright 2020 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Entrypoint script for backend agnostic compile."""
+
+import json
+import re
+import sys
+import typing as T
+from collections import defaultdict
+from pathlib import Path
+
+from . import mlog
+from . import mesonlib
+from . import coredata
+from .mesonlib import MesonException
+from mesonbuild.environment import detect_ninja
+from mesonbuild.coredata import UserArrayOption
+
+if T.TYPE_CHECKING:
+    import argparse
+
+def array_arg(value: str) -> T.List[str]:
+    return UserArrayOption(None, value, allow_dups=True, user_input=True).value
+
+def validate_builddir(builddir: Path) -> None:
+    if not (builddir / 'meson-private' / 'coredata.dat' ).is_file():
+        raise MesonException('Current directory is not a meson build directory: `{}`.\n'
+                             'Please specify a valid build dir or change the working directory to it.\n'
+                             'It is also possible that the build directory was generated with an old\n'
+                             'meson version. Please regenerate it in this case.'.format(builddir))
+
+def get_backend_from_coredata(builddir: Path) -> str:
+    """
+    Gets `backend` option value from coredata
+    """
+    return coredata.load(str(builddir)).get_builtin_option('backend')
+
+def parse_introspect_data(builddir: Path) -> T.Dict[str, T.List[dict]]:
+    """
+    Converts a List of name-to-dict to a dict of name-to-dicts (since names are not unique)
+    """
+    path_to_intro = builddir / 'meson-info' / 'intro-targets.json'
+    if not path_to_intro.exists():
+        raise MesonException('`{}` is missing! Directory is not configured yet?'.format(path_to_intro.name))
+    with path_to_intro.open() as f:
+        schema = json.load(f)
+
+    parsed_data = defaultdict(list) # type: T.Dict[str, T.List[dict]]
+    for target in schema:
+        parsed_data[target['name']] += [target]
+    return parsed_data
+
+class ParsedTargetName:
+    full_name = ''
+    name = ''
+    type = ''
+    path = ''
+
+    def __init__(self, target: str):
+        self.full_name = target
+        split = target.rsplit(':', 1)
+        if len(split) > 1:
+            self.type = split[1]
+            if not self._is_valid_type(self.type):
+                raise MesonException('Can\'t invoke target `{}`: unknown target type: `{}`'.format(target, self.type))
+
+        split = split[0].rsplit('/', 1)
+        if len(split) > 1:
+            self.path = split[0]
+            self.name = split[1]
+        else:
+            self.name = split[0]
+
+    @staticmethod
+    def _is_valid_type(type: str) -> bool:
+        # Ammend docs in Commands.md when editing this list
+        allowed_types = {
+            'executable',
+            'static_library',
+            'shared_library',
+            'shared_module',
+            'custom',
+            'run',
+            'jar',
+        }
+        return type in allowed_types
+
+def get_target_from_intro_data(target: ParsedTargetName, builddir: Path, introspect_data: dict) -> dict:
+    if target.name not in introspect_data:
+        raise MesonException('Can\'t invoke target `{}`: target not found'.format(target.full_name))
+
+    intro_targets = introspect_data[target.name]
+    found_targets = []
+
+    resolved_bdir = builddir.resolve()
+
+    if not target.type and not target.path:
+        found_targets = intro_targets
+    else:
+        for intro_target in intro_targets:
+            if (intro_target['subproject'] or
+                    (target.type and target.type != intro_target['type'].replace(' ', '_')) or
+                    (target.path
+                         and intro_target['filename'] != 'no_name'
+                         and Path(target.path) != Path(intro_target['filename'][0]).relative_to(resolved_bdir).parent)):
+                continue
+            found_targets += [intro_target]
+
+    if not found_targets:
+        raise MesonException('Can\'t invoke target `{}`: target not found'.format(target.full_name))
+    elif len(found_targets) > 1:
+        raise MesonException('Can\'t invoke target `{}`: ambigious name. Add target type and/or path: `PATH/NAME:TYPE`'.format(target.full_name))
+
+    return found_targets[0]
+
+def generate_target_names_ninja(target: ParsedTargetName, builddir: Path, introspect_data: dict) -> T.List[str]:
+    intro_target = get_target_from_intro_data(target, builddir, introspect_data)
+
+    if intro_target['type'] == 'run':
+        return [target.name]
+    else:
+        return [str(Path(out_file).relative_to(builddir.resolve())) for out_file in intro_target['filename']]
+
+def get_parsed_args_ninja(options: 'argparse.Namespace', builddir: Path) -> T.List[str]:
+    runner = detect_ninja()
+    if runner is None:
+        raise MesonException('Cannot find ninja.')
+    mlog.log('Found runner:', runner)
+
+    cmd = runner + ['-C', builddir.as_posix()]
+
+    if options.targets:
+        intro_data = parse_introspect_data(builddir)
+        for t in options.targets:
+            cmd.extend(generate_target_names_ninja(ParsedTargetName(t), builddir, intro_data))
+    if options.clean:
+        cmd.append('clean')
+
+    # If the value is set to < 1 then don't set anything, which let's
+    # ninja/samu decide what to do.
+    if options.jobs > 0:
+        cmd.extend(['-j', str(options.jobs)])
+    if options.load_average > 0:
+        cmd.extend(['-l', str(options.load_average)])
+
+    if options.verbose:
+        cmd.append('-v')
+
+    cmd += options.ninja_args
+
+    return cmd
+
+def generate_target_name_vs(target: ParsedTargetName, builddir: Path, introspect_data: dict) -> str:
+    intro_target = get_target_from_intro_data(target, builddir, introspect_data)
+
+    assert intro_target['type'] != 'run', 'Should not reach here: `run` targets must be handle above'
+
+    # Normalize project name
+    # Source: https://docs.microsoft.com/en-us/visualstudio/msbuild/how-to-build-specific-targets-in-solutions-by-using-msbuild-exe
+    target_name = re.sub('[\%\$\@\;\.\(\)\']', '_', intro_target['id'])
+    rel_path = Path(intro_target['filename'][0]).relative_to(builddir.resolve()).parent
+    if rel_path != '.':
+        target_name = str(rel_path / target_name)
+    return target_name
+
+def get_parsed_args_vs(options: 'argparse.Namespace', builddir: Path) -> T.List[str]:
+    slns = list(builddir.glob('*.sln'))
+    assert len(slns) == 1, 'More than one solution in a project?'
+    sln = slns[0]
+
+    cmd = ['msbuild']
+
+    if options.targets:
+        intro_data = parse_introspect_data(builddir)
+        has_run_target = any(map(
+            lambda t:
+                get_target_from_intro_data(ParsedTargetName(t), builddir, intro_data)['type'] == 'run',
+            options.targets
+        ))
+
+        if has_run_target:
+            # `run` target can't be used the same way as other targets on `vs` backend.
+            # They are defined as disabled projects, which can't be invoked as `.sln`
+            # target and have to be invoked directly as project instead.
+            # Issue: https://github.com/microsoft/msbuild/issues/4772
+
+            if len(options.targets) > 1:
+                raise MesonException('Only one target may be specified when `run` target type is used on this backend.')
+            intro_target = get_target_from_intro_data(ParsedTargetName(options.targets[0]), builddir, intro_data)
+            proj_dir = Path(intro_target['filename'][0]).parent
+            proj = proj_dir/'{}.vcxproj'.format(intro_target['id'])
+            cmd += [str(proj.resolve())]
+        else:
+            cmd += [str(sln.resolve())]
+            cmd.extend(['-target:{}'.format(generate_target_name_vs(ParsedTargetName(t), builddir, intro_data)) for t in options.targets])
+    else:
+        cmd += [str(sln.resolve())]
+
+    if options.clean:
+        cmd.extend(['-target:Clean'])
+
+    # In msbuild `-maxCpuCount` with no number means "detect cpus", the default is `-maxCpuCount:1`
+    if options.jobs > 0:
+        cmd.append('-maxCpuCount:{}'.format(options.jobs))
+    else:
+        cmd.append('-maxCpuCount')
+
+    if options.load_average:
+        mlog.warning('Msbuild does not have a load-average switch, ignoring.')
+
+    if not options.verbose:
+        cmd.append('-verbosity:minimal')
+
+    cmd += options.vs_args
+
+    return cmd
+
+def add_arguments(parser: 'argparse.ArgumentParser') -> None:
+    """Add compile specific arguments."""
+    parser.add_argument(
+        'targets',
+        metavar='TARGET',
+        nargs='*',
+        default=None,
+        help='Targets to build. Target has the following format: [PATH_TO_TARGET/]TARGET_NAME[:TARGET_TYPE].')
+    parser.add_argument(
+        '--clean',
+        action='store_true',
+        help='Clean the build directory.'
+    )
+    parser.add_argument(
+        '-C',
+        action='store',
+        dest='builddir',
+        type=Path,
+        default='.',
+        help='The directory containing build files to be built.'
+    )
+    parser.add_argument(
+        '-j', '--jobs',
+        action='store',
+        default=0,
+        type=int,
+        help='The number of worker jobs to run (if supported). If the value is less than 1 the build program will guess.'
+    )
+    parser.add_argument(
+        '-l', '--load-average',
+        action='store',
+        default=0,
+        type=int,
+        help='The system load average to try to maintain (if supported).'
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Show more verbose output.'
+    )
+    parser.add_argument(
+        '--ninja-args',
+        type=array_arg,
+        default=[],
+        help='Arguments to pass to `ninja` (applied only on `ninja` backend).'
+    )
+    parser.add_argument(
+        '--vs-args',
+        type=array_arg,
+        default=[],
+        help='Arguments to pass to `msbuild` (applied only on `vs` backend).'
+    )
+
+def run(options: 'argparse.Namespace') -> int:
+    bdir = options.builddir  # type: Path
+    validate_builddir(bdir.resolve())
+
+    cmd = []  # type: T.List[str]
+
+    if options.targets and options.clean:
+        raise MesonException('`TARGET` and `--clean` can\'t be used simultaneously')
+
+    backend = get_backend_from_coredata(bdir)
+    if backend == 'ninja':
+        cmd = get_parsed_args_ninja(options, bdir)
+    elif backend.startswith('vs'):
+        cmd = get_parsed_args_vs(options, bdir)
+    else:
+        # TODO: xcode?
+        raise MesonException(
+            'Backend `{}` is not yet supported by `compile`. Use generated project files directly instead.'.format(backend))
+
+    p, *_ = mesonlib.Popen_safe(cmd, stdout=sys.stdout.buffer, stderr=sys.stderr.buffer)
+
+    return p.returncode
diff --git a/meson/mesonbuild/mconf.py b/meson/mesonbuild/mconf.py
new file mode 100644
index 000000000..2e03cabaf
--- /dev/null
+++ b/meson/mesonbuild/mconf.py
@@ -0,0 +1,286 @@
+# Copyright 2014-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from . import coredata, environment, mesonlib, build, mintro, mlog
+from .ast import AstIDGenerator
+
+def add_arguments(parser):
+    coredata.register_builtin_arguments(parser)
+    parser.add_argument('builddir', nargs='?', default='.')
+    parser.add_argument('--clearcache', action='store_true', default=False,
+                        help='Clear cached state (e.g. found dependencies)')
+
+
+def make_lower_case(val):
+    if isinstance(val, bool):
+        return str(val).lower()
+    elif isinstance(val, list):
+        return [make_lower_case(i) for i in val]
+    else:
+        return str(val)
+
+
+class ConfException(mesonlib.MesonException):
+    pass
+
+
+class Conf:
+    def __init__(self, build_dir):
+        self.build_dir = os.path.abspath(os.path.realpath(build_dir))
+        if 'meson.build' in [os.path.basename(self.build_dir), self.build_dir]:
+            self.build_dir = os.path.dirname(self.build_dir)
+        self.build = None
+        self.max_choices_line_length = 60
+        self.name_col = []
+        self.value_col = []
+        self.choices_col = []
+        self.descr_col = []
+        self.has_choices = False
+        self.all_subprojects = set()
+        self.yielding_options = set()
+
+        if os.path.isdir(os.path.join(self.build_dir, 'meson-private')):
+            self.build = build.load(self.build_dir)
+            self.source_dir = self.build.environment.get_source_dir()
+            self.coredata = coredata.load(self.build_dir)
+            self.default_values_only = False
+        elif os.path.isfile(os.path.join(self.build_dir, environment.build_filename)):
+            # Make sure that log entries in other parts of meson don't interfere with the JSON output
+            mlog.disable()
+            self.source_dir = os.path.abspath(os.path.realpath(self.build_dir))
+            intr = mintro.IntrospectionInterpreter(self.source_dir, '', 'ninja', visitors = [AstIDGenerator()])
+            intr.analyze()
+            # Re-enable logging just in case
+            mlog.enable()
+            self.coredata = intr.coredata
+            self.default_values_only = True
+        else:
+            raise ConfException('Directory {} is neither a Meson build directory nor a project source directory.'.format(build_dir))
+
+    def clear_cache(self):
+        self.coredata.deps.host.clear()
+        self.coredata.deps.build.clear()
+
+    def set_options(self, options):
+        self.coredata.set_options(options)
+
+    def save(self):
+        # Do nothing when using introspection
+        if self.default_values_only:
+            return
+        # Only called if something has changed so overwrite unconditionally.
+        coredata.save(self.coredata, self.build_dir)
+        # We don't write the build file because any changes to it
+        # are erased when Meson is executed the next time, i.e. when
+        # Ninja is run.
+
+    def print_aligned(self):
+        col_widths = (max([len(i) for i in self.name_col], default=0),
+                      max([len(i) for i in self.value_col], default=0),
+                      max([len(i) for i in self.choices_col], default=0))
+
+        for line in zip(self.name_col, self.value_col, self.choices_col, self.descr_col):
+            if self.has_choices:
+                print('{0:{width[0]}} {1:{width[1]}} {2:{width[2]}} {3}'.format(*line, width=col_widths))
+            else:
+                print('{0:{width[0]}} {1:{width[1]}} {3}'.format(*line, width=col_widths))
+
+    def split_options_per_subproject(self, options):
+        result = {}
+        for k, o in options.items():
+            subproject = ''
+            if ':' in k:
+                subproject, optname = k.split(':')
+                if o.yielding and optname in options:
+                    self.yielding_options.add(k)
+                self.all_subprojects.add(subproject)
+            result.setdefault(subproject, {})[k] = o
+        return result
+
+    def _add_line(self, name, value, choices, descr):
+        self.name_col.append(' ' * self.print_margin + name)
+        self.value_col.append(value)
+        self.choices_col.append(choices)
+        self.descr_col.append(descr)
+
+    def add_option(self, name, descr, value, choices):
+        if isinstance(value, list):
+            value = '[{0}]'.format(', '.join(make_lower_case(value)))
+        else:
+            value = make_lower_case(value)
+
+        if choices:
+            self.has_choices = True
+            if isinstance(choices, list):
+                choices_list = make_lower_case(choices)
+                current = '['
+                while choices_list:
+                    i = choices_list.pop(0)
+                    if len(current) + len(i) >= self.max_choices_line_length:
+                        self._add_line(name, value, current + ',', descr)
+                        name = ''
+                        value = ''
+                        descr = ''
+                        current = ' '
+                    if len(current) > 1:
+                        current += ', '
+                    current += i
+                choices = current + ']'
+            else:
+                choices = make_lower_case(choices)
+        else:
+            choices = ''
+
+        self._add_line(name, value, choices, descr)
+
+    def add_title(self, title):
+        titles = {'descr': 'Description', 'value': 'Current Value', 'choices': 'Possible Values'}
+        if self.default_values_only:
+            titles['value'] = 'Default Value'
+        self._add_line('', '', '', '')
+        self._add_line(title, titles['value'], titles['choices'], titles['descr'])
+        self._add_line('-' * len(title), '-' * len(titles['value']), '-' * len(titles['choices']), '-' * len(titles['descr']))
+
+    def add_section(self, section):
+        self.print_margin = 0
+        self._add_line('', '', '', '')
+        self._add_line(section + ':', '', '', '')
+        self.print_margin = 2
+
+    def print_options(self, title, options):
+        if not options:
+            return
+        if title:
+            self.add_title(title)
+        for k, o in sorted(options.items()):
+            printable_value = o.printable_value()
+            if k in self.yielding_options:
+                printable_value = ''
+            self.add_option(k, o.description, printable_value, o.choices)
+
+    def print_conf(self):
+        def print_default_values_warning():
+            mlog.warning('The source directory instead of the build directory was specified.')
+            mlog.warning('Only the default values for the project are printed, and all command line parameters are ignored.')
+
+        if self.default_values_only:
+            print_default_values_warning()
+            print('')
+
+        print('Core properties:')
+        print('  Source dir', self.source_dir)
+        if not self.default_values_only:
+            print('  Build dir ', self.build_dir)
+
+        dir_option_names = ['bindir',
+                            'datadir',
+                            'includedir',
+                            'infodir',
+                            'libdir',
+                            'libexecdir',
+                            'localedir',
+                            'localstatedir',
+                            'mandir',
+                            'prefix',
+                            'sbindir',
+                            'sharedstatedir',
+                            'sysconfdir']
+        test_option_names = ['errorlogs',
+                             'stdsplit']
+        core_option_names = [k for k in self.coredata.builtins if k not in dir_option_names + test_option_names]
+
+        dir_options = {k: o for k, o in self.coredata.builtins.items() if k in dir_option_names}
+        test_options = {k: o for k, o in self.coredata.builtins.items() if k in test_option_names}
+        core_options = {k: o for k, o in self.coredata.builtins.items() if k in core_option_names}
+
+        def insert_build_prefix(k):
+            idx = k.find(':')
+            if idx < 0:
+                return 'build.' + k
+            return k[:idx + 1] + 'build.' + k[idx + 1:]
+
+        core_options = self.split_options_per_subproject(core_options)
+        host_compiler_options = self.split_options_per_subproject(
+            dict(self.coredata.flatten_lang_iterator(
+                self.coredata.compiler_options.host.items())))
+        build_compiler_options = self.split_options_per_subproject(
+            dict(self.coredata.flatten_lang_iterator(
+                (insert_build_prefix(k), o)
+                for k, o in self.coredata.compiler_options.build.items())))
+        project_options = self.split_options_per_subproject(self.coredata.user_options)
+        show_build_options = self.default_values_only or self.build.environment.is_cross_build()
+
+        self.add_section('Main project options')
+        self.print_options('Core options', core_options[''])
+        self.print_options('', self.coredata.builtins_per_machine.host)
+        if show_build_options:
+            self.print_options('', {insert_build_prefix(k): o for k, o in self.coredata.builtins_per_machine.build.items()})
+        self.print_options('Backend options', self.coredata.backend_options)
+        self.print_options('Base options', self.coredata.base_options)
+        self.print_options('Compiler options', host_compiler_options.get('', {}))
+        if show_build_options:
+            self.print_options('', build_compiler_options.get('', {}))
+        self.print_options('Directories', dir_options)
+        self.print_options('Testing options', test_options)
+        self.print_options('Project options', project_options.get('', {}))
+        for subproject in sorted(self.all_subprojects):
+            if subproject == '':
+                continue
+            self.add_section('Subproject ' + subproject)
+            if subproject in core_options:
+                self.print_options('Core options', core_options[subproject])
+            if subproject in host_compiler_options:
+                self.print_options('Compiler options', host_compiler_options[subproject])
+            if subproject in build_compiler_options and show_build_options:
+                self.print_options('', build_compiler_options[subproject])
+            if subproject in project_options:
+                self.print_options('Project options', project_options[subproject])
+        self.print_aligned()
+
+        # Print the warning twice so that the user shouldn't be able to miss it
+        if self.default_values_only:
+            print('')
+            print_default_values_warning()
+
+def run(options):
+    coredata.parse_cmd_line_options(options)
+    builddir = os.path.abspath(os.path.realpath(options.builddir))
+    c = None
+    try:
+        c = Conf(builddir)
+        if c.default_values_only:
+            c.print_conf()
+            return 0
+
+        save = False
+        if len(options.cmd_line_options) > 0:
+            c.set_options(options.cmd_line_options)
+            coredata.update_cmd_line_file(builddir, options)
+            save = True
+        elif options.clearcache:
+            c.clear_cache()
+            save = True
+        else:
+            c.print_conf()
+        if save:
+            c.save()
+            mintro.update_build_options(c.coredata, c.build.environment.info_dir)
+            mintro.write_meson_info_file(c.build, [])
+    except ConfException as e:
+        print('Meson configurator encountered an error:')
+        if c is not None and c.build is not None:
+            mintro.write_meson_info_file(c.build, [e])
+        raise e
+    return 0
diff --git a/meson/mesonbuild/mdist.py b/meson/mesonbuild/mdist.py
new file mode 100644
index 000000000..fbda240cf
--- /dev/null
+++ b/meson/mesonbuild/mdist.py
@@ -0,0 +1,285 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import gzip
+import os
+import sys
+import shutil
+import subprocess
+import hashlib
+import json
+from glob import glob
+from pathlib import Path
+from mesonbuild.environment import detect_ninja
+from mesonbuild.mesonlib import windows_proof_rmtree, MesonException
+from mesonbuild.wrap import wrap
+from mesonbuild import mlog, build
+
+archive_choices = ['gztar', 'xztar', 'zip']
+archive_extension = {'gztar': '.tar.gz',
+                     'xztar': '.tar.xz',
+                     'zip': '.zip'}
+
+def add_arguments(parser):
+    parser.add_argument('-C', default='.', dest='wd',
+                        help='directory to cd into before running')
+    parser.add_argument('--formats', default='xztar',
+                        help='Comma separated list of archive types to create.')
+    parser.add_argument('--include-subprojects', action='store_true',
+                        help='Include source code of subprojects that have been used for the build.')
+    parser.add_argument('--no-tests', action='store_true',
+                        help='Do not build and test generated packages.')
+
+
+def create_hash(fname):
+    hashname = fname + '.sha256sum'
+    m = hashlib.sha256()
+    m.update(open(fname, 'rb').read())
+    with open(hashname, 'w') as f:
+        f.write('{} {}\n'.format(m.hexdigest(), os.path.basename(fname)))
+    print(os.path.relpath(fname), m.hexdigest())
+
+
+def del_gitfiles(dirname):
+    for f in glob(os.path.join(dirname, '.git*')):
+        if os.path.isdir(f) and not os.path.islink(f):
+            windows_proof_rmtree(f)
+        else:
+            os.unlink(f)
+
+def process_submodules(dirname):
+    module_file = os.path.join(dirname, '.gitmodules')
+    if not os.path.exists(module_file):
+        return
+    subprocess.check_call(['git', 'submodule', 'update', '--init', '--recursive'], cwd=dirname)
+    for line in open(module_file):
+        line = line.strip()
+        if '=' not in line:
+            continue
+        k, v = line.split('=', 1)
+        k = k.strip()
+        v = v.strip()
+        if k != 'path':
+            continue
+        del_gitfiles(os.path.join(dirname, v))
+
+
+def run_dist_scripts(src_root, bld_root, dist_root, dist_scripts):
+    assert(os.path.isabs(dist_root))
+    env = os.environ.copy()
+    env['MESON_DIST_ROOT'] = dist_root
+    env['MESON_SOURCE_ROOT'] = src_root
+    env['MESON_BUILD_ROOT'] = bld_root
+    for d in dist_scripts:
+        script = d['exe']
+        args = d['args']
+        name = ' '.join(script + args)
+        print('Running custom dist script {!r}'.format(name))
+        try:
+            rc = subprocess.call(script + args, env=env)
+            if rc != 0:
+                sys.exit('Dist script errored out')
+        except OSError:
+            print('Failed to run dist script {!r}'.format(name))
+            sys.exit(1)
+
+def is_git(src_root):
+    _git = os.path.join(src_root, '.git')
+    return os.path.isdir(_git) or os.path.isfile(_git)
+
+def git_have_dirty_index(src_root):
+    '''Check whether there are uncommitted changes in git'''
+    ret = subprocess.call(['git', '-C', src_root, 'diff-index', '--quiet', 'HEAD'])
+    return ret == 1
+
+def git_clone(src_root, distdir):
+    if git_have_dirty_index(src_root):
+        mlog.warning('Repository has uncommitted changes that will not be included in the dist tarball')
+    if os.path.exists(distdir):
+        shutil.rmtree(distdir)
+    os.makedirs(distdir)
+    subprocess.check_call(['git', 'clone', '--shared', src_root, distdir])
+    process_submodules(distdir)
+    del_gitfiles(distdir)
+
+def create_dist_git(dist_name, archives, src_root, bld_root, dist_sub, dist_scripts, subprojects):
+    distdir = os.path.join(dist_sub, dist_name)
+    git_clone(src_root, distdir)
+    for path in subprojects:
+        sub_src_root = os.path.join(src_root, path)
+        sub_distdir = os.path.join(distdir, path)
+        if os.path.exists(sub_distdir):
+            continue
+        if is_git(sub_src_root):
+            git_clone(sub_src_root, sub_distdir)
+        else:
+            shutil.copytree(sub_src_root, sub_distdir)
+    run_dist_scripts(src_root, bld_root, distdir, dist_scripts)
+    output_names = []
+    for a in archives:
+        compressed_name = distdir + archive_extension[a]
+        shutil.make_archive(distdir, a, root_dir=dist_sub, base_dir=dist_name)
+        output_names.append(compressed_name)
+    shutil.rmtree(distdir)
+    return output_names
+
+def is_hg(src_root):
+    return os.path.isdir(os.path.join(src_root, '.hg'))
+
+def hg_have_dirty_index(src_root):
+    '''Check whether there are uncommitted changes in hg'''
+    out = subprocess.check_output(['hg', '-R', src_root, 'summary'])
+    return b'commit: (clean)' not in out
+
+def create_dist_hg(dist_name, archives, src_root, bld_root, dist_sub, dist_scripts):
+    if hg_have_dirty_index(src_root):
+        mlog.warning('Repository has uncommitted changes that will not be included in the dist tarball')
+    if dist_scripts:
+        mlog.warning('dist scripts are not supported in Mercurial projects')
+
+    os.makedirs(dist_sub, exist_ok=True)
+    tarname = os.path.join(dist_sub, dist_name + '.tar')
+    xzname = tarname + '.xz'
+    gzname = tarname + '.gz'
+    zipname = os.path.join(dist_sub, dist_name + '.zip')
+    # Note that -X interprets relative paths using the current working
+    # directory, not the repository root, so this must be an absolute path:
+    # https://bz.mercurial-scm.org/show_bug.cgi?id=6267
+    #
+    # .hg[a-z]* is used instead of .hg* to keep .hg_archival.txt, which may
+    # be useful to link the tarball to the Mercurial revision for either
+    # manual inspection or in case any code interprets it for a --version or
+    # similar.
+    subprocess.check_call(['hg', 'archive', '-R', src_root, '-S', '-t', 'tar',
+                           '-X', src_root + '/.hg[a-z]*', tarname])
+    output_names = []
+    if 'xztar' in archives:
+        import lzma
+        with lzma.open(xzname, 'wb') as xf, open(tarname, 'rb') as tf:
+            shutil.copyfileobj(tf, xf)
+        output_names.append(xzname)
+    if 'gztar' in archives:
+        with gzip.open(gzname, 'wb') as zf, open(tarname, 'rb') as tf:
+            shutil.copyfileobj(tf, zf)
+        output_names.append(gzname)
+    os.unlink(tarname)
+    if 'zip' in archives:
+        subprocess.check_call(['hg', 'archive', '-R', src_root, '-S', '-t', 'zip', zipname])
+        output_names.append(zipname)
+    return output_names
+
+def run_dist_steps(meson_command, unpacked_src_dir, builddir, installdir, ninja_args):
+    if subprocess.call(meson_command + ['--backend=ninja', unpacked_src_dir, builddir]) != 0:
+        print('Running Meson on distribution package failed')
+        return 1
+    if subprocess.call(ninja_args, cwd=builddir) != 0:
+        print('Compiling the distribution package failed')
+        return 1
+    if subprocess.call(ninja_args + ['test'], cwd=builddir) != 0:
+        print('Running unit tests on the distribution package failed')
+        return 1
+    myenv = os.environ.copy()
+    myenv['DESTDIR'] = installdir
+    if subprocess.call(ninja_args + ['install'], cwd=builddir, env=myenv) != 0:
+        print('Installing the distribution package failed')
+        return 1
+    return 0
+
+def check_dist(packagename, meson_command, extra_meson_args, bld_root, privdir):
+    print('Testing distribution package {}'.format(packagename))
+    unpackdir = os.path.join(privdir, 'dist-unpack')
+    builddir = os.path.join(privdir, 'dist-build')
+    installdir = os.path.join(privdir, 'dist-install')
+    for p in (unpackdir, builddir, installdir):
+        if os.path.exists(p):
+            windows_proof_rmtree(p)
+        os.mkdir(p)
+    ninja_args = detect_ninja()
+    shutil.unpack_archive(packagename, unpackdir)
+    unpacked_files = glob(os.path.join(unpackdir, '*'))
+    assert(len(unpacked_files) == 1)
+    unpacked_src_dir = unpacked_files[0]
+    with open(os.path.join(bld_root, 'meson-info', 'intro-buildoptions.json')) as boptions:
+        meson_command += ['-D{name}={value}'.format(**o) for o in json.load(boptions)
+                          if o['name'] not in ['backend', 'install_umask', 'buildtype']]
+    meson_command += extra_meson_args
+
+    ret = run_dist_steps(meson_command, unpacked_src_dir, builddir, installdir, ninja_args)
+    if ret > 0:
+        print('Dist check build directory was {}'.format(builddir))
+    else:
+        windows_proof_rmtree(unpackdir)
+        windows_proof_rmtree(builddir)
+        windows_proof_rmtree(installdir)
+        print('Distribution package {} tested'.format(packagename))
+    return ret
+
+def determine_archives_to_generate(options):
+    result = []
+    for i in options.formats.split(','):
+        if i not in archive_choices:
+            sys.exit('Value "{}" not one of permitted values {}.'.format(i, archive_choices))
+        result.append(i)
+    if len(i) == 0:
+        sys.exit('No archive types specified.')
+    return result
+
+def run(options):
+    options.wd = os.path.abspath(options.wd)
+    buildfile = Path(options.wd) / 'meson-private' / 'build.dat'
+    if not buildfile.is_file():
+        raise MesonException('Directory {!r} does not seem to be a Meson build directory.'.format(options.wd))
+    b = build.load(options.wd)
+    # This import must be load delayed, otherwise it will get the default
+    # value of None.
+    from mesonbuild.mesonlib import meson_command
+    src_root = b.environment.source_dir
+    bld_root = b.environment.build_dir
+    priv_dir = os.path.join(bld_root, 'meson-private')
+    dist_sub = os.path.join(bld_root, 'meson-dist')
+
+    dist_name = b.project_name + '-' + b.project_version
+
+    archives = determine_archives_to_generate(options)
+
+    subprojects = []
+    extra_meson_args = []
+    if options.include_subprojects:
+        subproject_dir = os.path.join(src_root, b.subproject_dir)
+        for sub in b.subprojects:
+            directory = wrap.get_directory(subproject_dir, sub)
+            subprojects.append(os.path.join(b.subproject_dir, directory))
+        extra_meson_args.append('-Dwrap_mode=nodownload')
+
+    if is_git(src_root):
+        names = create_dist_git(dist_name, archives, src_root, bld_root, dist_sub, b.dist_scripts, subprojects)
+    elif is_hg(src_root):
+        if subprojects:
+            print('--include-subprojects option currently not supported with Mercurial')
+            return 1
+        names = create_dist_hg(dist_name, archives, src_root, bld_root, dist_sub, b.dist_scripts)
+    else:
+        print('Dist currently only works with Git or Mercurial repos')
+        return 1
+    if names is None:
+        return 1
+    rc = 0
+    if not options.no_tests:
+        # Check only one.
+        rc = check_dist(names[0], meson_command, extra_meson_args, bld_root, priv_dir)
+    if rc == 0:
+        for name in names:
+            create_hash(name)
+    return rc
diff --git a/meson/mesonbuild/mesondata.py b/meson/mesonbuild/mesondata.py
new file mode 100644
index 000000000..1f223c251
--- /dev/null
+++ b/meson/mesonbuild/mesondata.py
@@ -0,0 +1,374 @@
+# Copyright 2020 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+####
+####  WARNING: This is an automatically generated file! Do not edit!
+####           Generated by tools/gen_data.py
+####
+
+
+from pathlib import Path
+import typing as T
+
+if T.TYPE_CHECKING:
+    from .environment import Environment
+
+######################
+# BEGIN Data section #
+######################
+
+file_0_data_CMakeListsLLVM_txt = '''\
+cmake_minimum_required(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} )
+
+set(PACKAGE_FOUND FALSE)
+
+while(TRUE)
+  find_package(LLVM REQUIRED CONFIG QUIET)
+
+  # ARCHS has to be set via the CMD interface
+  if(LLVM_FOUND OR "${ARCHS}" STREQUAL "")
+    break()
+  endif()
+
+  list(GET       ARCHS 0 CMAKE_LIBRARY_ARCHITECTURE)
+  list(REMOVE_AT ARCHS 0)
+endwhile()
+
+if(LLVM_FOUND)
+  set(PACKAGE_FOUND TRUE)
+
+  foreach(mod IN LISTS LLVM_MESON_MODULES)
+    # Reset variables
+    set(out_mods)
+    set(real_mods)
+
+    # Generate a lower and upper case version
+    string(TOLOWER "${mod}" mod_L)
+    string(TOUPPER "${mod}" mod_U)
+
+    # Get the mapped components
+    llvm_map_components_to_libnames(out_mods ${mod} ${mod_L} ${mod_U})
+    list(SORT              out_mods)
+    list(REMOVE_DUPLICATES out_mods)
+
+    # Make sure that the modules exist
+    foreach(i IN LISTS out_mods)
+      if(TARGET ${i})
+        list(APPEND real_mods ${i})
+      endif()
+    endforeach()
+
+    # Set the output variables
+    set(MESON_LLVM_TARGETS_${mod} ${real_mods})
+    foreach(i IN LISTS real_mods)
+      set(MESON_TARGET_TO_LLVM_${i} ${mod})
+    endforeach()
+  endforeach()
+
+  # Check the following variables:
+  # LLVM_PACKAGE_VERSION
+  # LLVM_VERSION
+  # LLVM_VERSION_STRING
+  if(NOT DEFINED PACKAGE_VERSION)
+    if(DEFINED LLVM_PACKAGE_VERSION)
+      set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
+    elseif(DEFINED LLVM_VERSION)
+      set(PACKAGE_VERSION "${LLVM_VERSION}")
+    elseif(DEFINED LLVM_VERSION_STRING)
+      set(PACKAGE_VERSION "${LLVM_VERSION_STRING}")
+    endif()
+  endif()
+
+  # Check the following variables:
+  # LLVM_LIBRARIES
+  # LLVM_LIBS
+  set(libs)
+  if(DEFINED LLVM_LIBRARIES)
+    set(libs LLVM_LIBRARIES)
+  elseif(DEFINED LLVM_LIBS)
+    set(libs LLVM_LIBS)
+  endif()
+
+  # Check the following variables:
+  # LLVM_INCLUDE_DIRS
+  # LLVM_INCLUDES
+  # LLVM_INCLUDE_DIR
+  set(includes)
+  if(DEFINED LLVM_INCLUDE_DIRS)
+    set(includes LLVM_INCLUDE_DIRS)
+  elseif(DEFINED LLVM_INCLUDES)
+    set(includes LLVM_INCLUDES)
+  elseif(DEFINED LLVM_INCLUDE_DIR)
+    set(includes LLVM_INCLUDE_DIR)
+  endif()
+
+  # Check the following variables:
+  # LLVM_DEFINITIONS
+  set(definitions)
+  if(DEFINED LLVM_DEFINITIONS)
+    set(definitions LLVM_DEFINITIONS)
+  endif()
+
+  set(PACKAGE_INCLUDE_DIRS "${${includes}}")
+  set(PACKAGE_DEFINITIONS  "${${definitions}}")
+  set(PACKAGE_LIBRARIES    "${${libs}}")
+endif()
+'''
+
+file_1_data_CMakePathInfo_txt = '''\
+cmake_minimum_required(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION})
+
+set(TMP_PATHS_LIST)
+list(APPEND TMP_PATHS_LIST ${CMAKE_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_APPBUNDLE_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST $ENV{CMAKE_APPBUNDLE_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_PREFIX_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_FRAMEWORK_PATH})
+list(APPEND TMP_PATHS_LIST ${CMAKE_SYSTEM_APPBUNDLE_PATH})
+
+set(LIB_ARCH_LIST)
+if(CMAKE_LIBRARY_ARCHITECTURE_REGEX)
+  file(GLOB implicit_dirs RELATIVE /lib /lib/*-linux-gnu* )
+  foreach(dir ${implicit_dirs})
+    if("${dir}" MATCHES "${CMAKE_LIBRARY_ARCHITECTURE_REGEX}")
+      list(APPEND LIB_ARCH_LIST "${dir}")
+    endif()
+  endforeach()
+endif()
+
+# "Export" these variables:
+set(MESON_ARCH_LIST ${LIB_ARCH_LIST})
+set(MESON_PATHS_LIST ${TMP_PATHS_LIST})
+set(MESON_CMAKE_ROOT ${CMAKE_ROOT})
+set(MESON_CMAKE_SYSROOT ${CMAKE_SYSROOT})
+set(MESON_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH})
+
+message(STATUS ${TMP_PATHS_LIST})
+'''
+
+file_2_data_CMakeLists_txt = '''\
+# fail noisily if attempt to use this file without setting:
+# cmake_minimum_required(VERSION ${CMAKE_VERSION})
+# project(... LANGUAGES ...)
+
+cmake_policy(SET CMP0000 NEW)
+
+set(PACKAGE_FOUND FALSE)
+set(_packageName "${NAME}")
+string(TOUPPER "${_packageName}" PACKAGE_NAME)
+
+while(TRUE)
+  find_package("${NAME}" QUIET COMPONENTS ${COMPS})
+
+  # ARCHS has to be set via the CMD interface
+  if(${_packageName}_FOUND OR ${PACKAGE_NAME}_FOUND OR "${ARCHS}" STREQUAL "")
+    break()
+  endif()
+
+  list(GET       ARCHS 0 CMAKE_LIBRARY_ARCHITECTURE)
+  list(REMOVE_AT ARCHS 0)
+endwhile()
+
+if(${_packageName}_FOUND  OR  ${PACKAGE_NAME}_FOUND)
+  set(PACKAGE_FOUND TRUE)
+
+  # Check the following variables:
+  # FOO_VERSION
+  # Foo_VERSION
+  # FOO_VERSION_STRING
+  # Foo_VERSION_STRING
+  if(NOT DEFINED PACKAGE_VERSION)
+    if(DEFINED ${_packageName}_VERSION)
+      set(PACKAGE_VERSION "${${_packageName}_VERSION}")
+    elseif(DEFINED ${PACKAGE_NAME}_VERSION)
+      set(PACKAGE_VERSION "${${PACKAGE_NAME}_VERSION}")
+    elseif(DEFINED ${_packageName}_VERSION_STRING)
+      set(PACKAGE_VERSION "${${_packageName}_VERSION_STRING}")
+    elseif(DEFINED ${PACKAGE_NAME}_VERSION_STRING)
+      set(PACKAGE_VERSION "${${PACKAGE_NAME}_VERSION_STRING}")
+    endif()
+  endif()
+
+  # Check the following variables:
+  # FOO_LIBRARIES
+  # Foo_LIBRARIES
+  # FOO_LIBS
+  # Foo_LIBS
+  set(libs)
+  if(DEFINED ${_packageName}_LIBRARIES)
+    set(libs ${_packageName}_LIBRARIES)
+  elseif(DEFINED ${PACKAGE_NAME}_LIBRARIES)
+    set(libs ${PACKAGE_NAME}_LIBRARIES)
+  elseif(DEFINED ${_packageName}_LIBS)
+    set(libs ${_packageName}_LIBS)
+  elseif(DEFINED ${PACKAGE_NAME}_LIBS)
+    set(libs ${PACKAGE_NAME}_LIBS)
+  endif()
+
+  # Check the following variables:
+  # FOO_INCLUDE_DIRS
+  # Foo_INCLUDE_DIRS
+  # FOO_INCLUDES
+  # Foo_INCLUDES
+  # FOO_INCLUDE_DIR
+  # Foo_INCLUDE_DIR
+  set(includes)
+  if(DEFINED ${_packageName}_INCLUDE_DIRS)
+    set(includes ${_packageName}_INCLUDE_DIRS)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDE_DIRS)
+    set(includes ${PACKAGE_NAME}_INCLUDE_DIRS)
+  elseif(DEFINED ${_packageName}_INCLUDES)
+    set(includes ${_packageName}_INCLUDES)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDES)
+    set(includes ${PACKAGE_NAME}_INCLUDES)
+  elseif(DEFINED ${_packageName}_INCLUDE_DIR)
+    set(includes ${_packageName}_INCLUDE_DIR)
+  elseif(DEFINED ${PACKAGE_NAME}_INCLUDE_DIR)
+    set(includes ${PACKAGE_NAME}_INCLUDE_DIR)
+  endif()
+
+  # Check the following variables:
+  # FOO_DEFINITIONS
+  # Foo_DEFINITIONS
+  set(definitions)
+  if(DEFINED ${_packageName}_DEFINITIONS)
+    set(definitions ${_packageName}_DEFINITIONS)
+  elseif(DEFINED ${PACKAGE_NAME}_DEFINITIONS)
+    set(definitions ${PACKAGE_NAME}_DEFINITIONS)
+  endif()
+
+  set(PACKAGE_INCLUDE_DIRS "${${includes}}")
+  set(PACKAGE_DEFINITIONS  "${${definitions}}")
+  set(PACKAGE_LIBRARIES    "${${libs}}")
+endif()
+'''
+
+file_3_data_preload_cmake = '''\
+if(MESON_PS_LOADED)
+  return()
+endif()
+
+set(MESON_PS_LOADED ON)
+
+# Dummy macros that have a special meaning in the meson code
+macro(meson_ps_execute_delayed_calls)
+endmacro()
+
+macro(meson_ps_reload_vars)
+endmacro()
+
+# Helper macro to inspect the current CMake state
+macro(meson_ps_inspect_vars)
+  set(MESON_PS_CMAKE_CURRENT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+  set(MESON_PS_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+  meson_ps_execute_delayed_calls()
+endmacro()
+
+
+# Override some system functions with custom code and forward the args
+# to the original function
+macro(add_custom_command)
+  meson_ps_inspect_vars()
+  _add_custom_command(${ARGV})
+endmacro()
+
+macro(add_custom_target)
+  meson_ps_inspect_vars()
+  _add_custom_target(${ARGV})
+endmacro()
+
+macro(set_property)
+  meson_ps_inspect_vars()
+  _set_property(${ARGV})
+endmacro()
+
+function(set_source_files_properties)
+  set(FILES)
+  set(I 0)
+  set(PROPERTIES OFF)
+
+  while(I LESS ARGC)
+    if(NOT PROPERTIES)
+      if("${ARGV${I}}" STREQUAL "PROPERTIES")
+        set(PROPERTIES ON)
+      else()
+        list(APPEND FILES "${ARGV${I}}")
+      endif()
+
+      math(EXPR I "${I} + 1")
+    else()
+      set(ID_IDX ${I})
+      math(EXPR PROP_IDX "${ID_IDX} + 1")
+
+      set(ID   "${ARGV${ID_IDX}}")
+      set(PROP "${ARGV${PROP_IDX}}")
+
+      set_property(SOURCE ${FILES} PROPERTY "${ID}" "${PROP}")
+      math(EXPR I "${I} + 2")
+    endif()
+  endwhile()
+endfunction()
+
+set(MESON_PS_DELAYED_CALLS add_custom_command;add_custom_target;set_property)
+meson_ps_reload_vars()
+'''
+
+
+####################
+# END Data section #
+####################
+
+class DataFile:
+    def __init__(self, path: Path, sha256sum: str, data: str) -> None:
+        self.path = path
+        self.sha256sum = sha256sum
+        self.data = data
+
+    def write_once(self, path: Path) -> None:
+        if not path.exists():
+            path.write_text(self.data)
+
+    def write_to_private(self, env: 'Environment') -> Path:
+        out_file = Path(env.scratch_dir) / 'data' / self.path.name
+        out_file.parent.mkdir(exist_ok=True)
+        self.write_once(out_file)
+        return out_file
+
+
+mesondata = {
+    'dependencies/data/CMakeListsLLVM.txt': DataFile(
+        Path('dependencies/data/CMakeListsLLVM.txt'),
+        '412cec3315597041a978d018cdaca282dcd47693793540da88ae2f80d0cbd7cd',
+        file_0_data_CMakeListsLLVM_txt,
+    ),
+    'dependencies/data/CMakePathInfo.txt': DataFile(
+        Path('dependencies/data/CMakePathInfo.txt'),
+        '90da8b443982d9c87139b7dc84228eb58cab4315764949637208f25e2bda7db2',
+        file_1_data_CMakePathInfo_txt,
+    ),
+    'dependencies/data/CMakeLists.txt': DataFile(
+        Path('dependencies/data/CMakeLists.txt'),
+        '71a2d58381f912bbfb1c8709884d34d721f682edf2fca001e1f582f0bffd0da7',
+        file_2_data_CMakeLists_txt,
+    ),
+    'cmake/data/preload.cmake': DataFile(
+        Path('cmake/data/preload.cmake'),
+        '064d047b18a5c919ad016b838bed50c5d40aebe9e53da0e70eff9d52a2c1ca1f',
+        file_3_data_preload_cmake,
+    ),
+}
diff --git a/meson/mesonbuild/mesonlib.py b/meson/mesonbuild/mesonlib.py
new file mode 100644
index 000000000..77163d7e9
--- /dev/null
+++ b/meson/mesonbuild/mesonlib.py
@@ -0,0 +1,1712 @@
+# Copyright 2012-2020 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A library of random helper functionality."""
+from pathlib import Path
+import sys
+import stat
+import time
+import platform, subprocess, operator, os, shlex, shutil, re
+import collections
+from enum import Enum
+from functools import lru_cache, wraps
+from itertools import tee, filterfalse
+import typing as T
+import uuid
+import textwrap
+
+from mesonbuild import mlog
+
+if T.TYPE_CHECKING:
+    from .build import ConfigurationData
+    from .coredata import OptionDictType, UserOption
+    from .compilers.compilers import CompilerType
+    from .interpreterbase import ObjectHolder
+
+_T = T.TypeVar('_T')
+_U = T.TypeVar('_U')
+
+have_fcntl = False
+have_msvcrt = False
+# TODO: this is such a hack, this really should be either in coredata or in the
+# interpreter
+# {subproject: project_meson_version}
+project_meson_versions = collections.defaultdict(str)  # type: T.DefaultDict[str, str]
+
+try:
+    import fcntl
+    have_fcntl = True
+except Exception:
+    pass
+
+try:
+    import msvcrt
+    have_msvcrt = True
+except Exception:
+    pass
+
+from glob import glob
+
+if os.path.basename(sys.executable) == 'meson.exe':
+    # In Windows and using the MSI installed executable.
+    python_command = [sys.executable, 'runpython']
+else:
+    python_command = [sys.executable]
+meson_command = None
+
+GIT = shutil.which('git')
+def git(cmd: T.List[str], workingdir: str, **kwargs: T.Any) -> subprocess.CompletedProcess:
+    pc = subprocess.run([GIT, '-C', workingdir] + cmd,
+                        # Redirect stdin to DEVNULL otherwise git messes up the
+                        # console and ANSI colors stop working on Windows.
+                        stdin=subprocess.DEVNULL, **kwargs)
+    # Sometimes git calls git recursively, such as `git submodule update
+    # --recursive` which will be without the above workaround, so set the
+    # console mode again just in case.
+    mlog.setup_console()
+    return pc
+
+
+def set_meson_command(mainfile: str) -> None:
+    global python_command
+    global meson_command
+    # On UNIX-like systems `meson` is a Python script
+    # On Windows `meson` and `meson.exe` are wrapper exes
+    if not mainfile.endswith('.py'):
+        meson_command = [mainfile]
+    elif os.path.isabs(mainfile) and mainfile.endswith('mesonmain.py'):
+        # Can't actually run meson with an absolute path to mesonmain.py, it must be run as -m mesonbuild.mesonmain
+        meson_command = python_command + ['-m', 'mesonbuild.mesonmain']
+    else:
+        # Either run uninstalled, or full path to meson-script.py
+        meson_command = python_command + [mainfile]
+    # We print this value for unit tests.
+    if 'MESON_COMMAND_TESTS' in os.environ:
+        mlog.log('meson_command is {!r}'.format(meson_command))
+
+
+def is_ascii_string(astring: T.Union[str, bytes]) -> bool:
+    try:
+        if isinstance(astring, str):
+            astring.encode('ascii')
+        elif isinstance(astring, bytes):
+            astring.decode('ascii')
+    except UnicodeDecodeError:
+        return False
+    return True
+
+
+def check_direntry_issues(direntry_array: T.Union[T.List[T.Union[str, bytes]], str, bytes]) -> None:
+    import locale
+    # Warn if the locale is not UTF-8. This can cause various unfixable issues
+    # such as os.stat not being able to decode filenames with unicode in them.
+    # There is no way to reset both the preferred encoding and the filesystem
+    # encoding, so we can just warn about it.
+    e = locale.getpreferredencoding()
+    if e.upper() != 'UTF-8' and not is_windows():
+        if not isinstance(direntry_array, list):
+            direntry_array = [direntry_array]
+        for de in direntry_array:
+            if is_ascii_string(de):
+                continue
+            mlog.warning(textwrap.dedent('''
+                You are using {!r} which is not a Unicode-compatible
+                locale but you are trying to access a file system entry called {!r} which is
+                not pure ASCII. This may cause problems.
+                '''.format(e, de)), file=sys.stderr)
+
+
+# Put this in objects that should not get dumped to pickle files
+# by accident.
+import threading
+an_unpicklable_object = threading.Lock()
+
+
+class MesonException(Exception):
+    '''Exceptions thrown by Meson'''
+
+    file = None    # type: T.Optional[str]
+    lineno = None  # type: T.Optional[int]
+    colno = None   # type: T.Optional[int]
+
+
+class EnvironmentException(MesonException):
+    '''Exceptions thrown while processing and creating the build environment'''
+
+
+class FileMode:
+    # The first triad is for owner permissions, the second for group permissions,
+    # and the third for others (everyone else).
+    # For the 1st character:
+    #  'r' means can read
+    #  '-' means not allowed
+    # For the 2nd character:
+    #  'w' means can write
+    #  '-' means not allowed
+    # For the 3rd character:
+    #  'x' means can execute
+    #  's' means can execute and setuid/setgid is set (owner/group triads only)
+    #  'S' means cannot execute and setuid/setgid is set (owner/group triads only)
+    #  't' means can execute and sticky bit is set ("others" triads only)
+    #  'T' means cannot execute and sticky bit is set ("others" triads only)
+    #  '-' means none of these are allowed
+    #
+    # The meanings of 'rwx' perms is not obvious for directories; see:
+    # https://www.hackinglinuxexposed.com/articles/20030424.html
+    #
+    # For information on this notation such as setuid/setgid/sticky bits, see:
+    # https://en.wikipedia.org/wiki/File_system_permissions#Symbolic_notation
+    symbolic_perms_regex = re.compile('[r-][w-][xsS-]' # Owner perms
+                                      '[r-][w-][xsS-]' # Group perms
+                                      '[r-][w-][xtT-]') # Others perms
+
+    def __init__(self, perms: T.Optional[str] = None, owner: T.Optional[str] = None,
+                 group: T.Optional[str] = None):
+        self.perms_s = perms
+        self.perms = self.perms_s_to_bits(perms)
+        self.owner = owner
+        self.group = group
+
+    def __repr__(self) -> str:
+        ret = ' int:
+        '''
+        Does the opposite of stat.filemode(), converts strings of the form
+        'rwxr-xr-x' to st_mode enums which can be passed to os.chmod()
+        '''
+        if perms_s is None:
+            # No perms specified, we will not touch the permissions
+            return -1
+        eg = 'rwxr-xr-x'
+        if not isinstance(perms_s, str):
+            msg = 'Install perms must be a string. For example, {!r}'
+            raise MesonException(msg.format(eg))
+        if len(perms_s) != 9 or not cls.symbolic_perms_regex.match(perms_s):
+            msg = 'File perms {!r} must be exactly 9 chars. For example, {!r}'
+            raise MesonException(msg.format(perms_s, eg))
+        perms = 0
+        # Owner perms
+        if perms_s[0] == 'r':
+            perms |= stat.S_IRUSR
+        if perms_s[1] == 'w':
+            perms |= stat.S_IWUSR
+        if perms_s[2] == 'x':
+            perms |= stat.S_IXUSR
+        elif perms_s[2] == 'S':
+            perms |= stat.S_ISUID
+        elif perms_s[2] == 's':
+            perms |= stat.S_IXUSR
+            perms |= stat.S_ISUID
+        # Group perms
+        if perms_s[3] == 'r':
+            perms |= stat.S_IRGRP
+        if perms_s[4] == 'w':
+            perms |= stat.S_IWGRP
+        if perms_s[5] == 'x':
+            perms |= stat.S_IXGRP
+        elif perms_s[5] == 'S':
+            perms |= stat.S_ISGID
+        elif perms_s[5] == 's':
+            perms |= stat.S_IXGRP
+            perms |= stat.S_ISGID
+        # Others perms
+        if perms_s[6] == 'r':
+            perms |= stat.S_IROTH
+        if perms_s[7] == 'w':
+            perms |= stat.S_IWOTH
+        if perms_s[8] == 'x':
+            perms |= stat.S_IXOTH
+        elif perms_s[8] == 'T':
+            perms |= stat.S_ISVTX
+        elif perms_s[8] == 't':
+            perms |= stat.S_IXOTH
+            perms |= stat.S_ISVTX
+        return perms
+
+class File:
+    def __init__(self, is_built: bool, subdir: str, fname: str):
+        self.is_built = is_built
+        self.subdir = subdir
+        self.fname = fname
+
+    def __str__(self) -> str:
+        return self.relative_name()
+
+    def __repr__(self) -> str:
+        ret = ' 'File':
+        if not os.path.isfile(os.path.join(source_root, subdir, fname)):
+            raise MesonException('File %s does not exist.' % fname)
+        return File(False, subdir, fname)
+
+    @staticmethod
+    def from_built_file(subdir: str, fname: str) -> 'File':
+        return File(True, subdir, fname)
+
+    @staticmethod
+    def from_absolute_file(fname: str) -> 'File':
+        return File(False, '', fname)
+
+    @lru_cache(maxsize=None)
+    def rel_to_builddir(self, build_to_src: str) -> str:
+        if self.is_built:
+            return self.relative_name()
+        else:
+            return os.path.join(build_to_src, self.subdir, self.fname)
+
+    @lru_cache(maxsize=None)
+    def absolute_path(self, srcdir: str, builddir: str) -> str:
+        absdir = srcdir
+        if self.is_built:
+            absdir = builddir
+        return os.path.join(absdir, self.relative_name())
+
+    def endswith(self, ending: str) -> bool:
+        return self.fname.endswith(ending)
+
+    def split(self, s: str) -> T.List[str]:
+        return self.fname.split(s)
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, File):
+            return NotImplemented
+        return (self.fname, self.subdir, self.is_built) == (other.fname, other.subdir, other.is_built)
+
+    def __hash__(self) -> int:
+        return hash((self.fname, self.subdir, self.is_built))
+
+    @lru_cache(maxsize=None)
+    def relative_name(self) -> str:
+        return os.path.join(self.subdir, self.fname)
+
+
+def get_compiler_for_source(compilers: T.Iterable['CompilerType'], src: str) -> 'CompilerType':
+    """Given a set of compilers and a source, find the compiler for that source type."""
+    for comp in compilers:
+        if comp.can_compile(src):
+            return comp
+    raise MesonException('No specified compiler can handle file {!s}'.format(src))
+
+
+def classify_unity_sources(compilers: T.Iterable['CompilerType'], sources: T.Iterable[str]) -> T.Dict['CompilerType', T.List[str]]:
+    compsrclist = {}  # type: T.Dict[CompilerType, T.List[str]]
+    for src in sources:
+        comp = get_compiler_for_source(compilers, src)
+        if comp not in compsrclist:
+            compsrclist[comp] = [src]
+        else:
+            compsrclist[comp].append(src)
+    return compsrclist
+
+
+class OrderedEnum(Enum):
+    """
+    An Enum which additionally offers homogeneous ordered comparison.
+    """
+    def __ge__(self, other):
+        if self.__class__ is other.__class__:
+            return self.value >= other.value
+        return NotImplemented
+
+    def __gt__(self, other):
+        if self.__class__ is other.__class__:
+            return self.value > other.value
+        return NotImplemented
+
+    def __le__(self, other):
+        if self.__class__ is other.__class__:
+            return self.value <= other.value
+        return NotImplemented
+
+    def __lt__(self, other):
+        if self.__class__ is other.__class__:
+            return self.value < other.value
+        return NotImplemented
+
+
+class MachineChoice(OrderedEnum):
+
+    """Enum class representing one of the two abstract machine names used in
+    most places: the build, and host, machines.
+    """
+
+    BUILD = 0
+    HOST = 1
+
+    def get_lower_case_name(self) -> str:
+        return PerMachine('build', 'host')[self]
+
+    def get_prefix(self) -> str:
+        return PerMachine('build.', '')[self]
+
+
+class PerMachine(T.Generic[_T]):
+    def __init__(self, build: _T, host: _T):
+        self.build = build
+        self.host = host
+
+    def __getitem__(self, machine: MachineChoice) -> _T:
+        return {
+            MachineChoice.BUILD:  self.build,
+            MachineChoice.HOST:   self.host,
+        }[machine]
+
+    def __setitem__(self, machine: MachineChoice, val: _T) -> None:
+        setattr(self, machine.get_lower_case_name(), val)
+
+    def miss_defaulting(self) -> "PerMachineDefaultable[T.Optional[_T]]":
+        """Unset definition duplicated from their previous to None
+
+        This is the inverse of ''default_missing''. By removing defaulted
+        machines, we can elaborate the original and then redefault them and thus
+        avoid repeating the elaboration explicitly.
+        """
+        unfreeze = PerMachineDefaultable() # type: PerMachineDefaultable[T.Optional[_T]]
+        unfreeze.build = self.build
+        unfreeze.host = self.host
+        if unfreeze.host == unfreeze.build:
+            unfreeze.host = None
+        return unfreeze
+
+
+class PerThreeMachine(PerMachine[_T]):
+    """Like `PerMachine` but includes `target` too.
+
+    It turns out just one thing do we need track the target machine. There's no
+    need to computer the `target` field so we don't bother overriding the
+    `__getitem__`/`__setitem__` methods.
+    """
+    def __init__(self, build: _T, host: _T, target: _T):
+        super().__init__(build, host)
+        self.target = target
+
+    def miss_defaulting(self) -> "PerThreeMachineDefaultable[T.Optional[_T]]":
+        """Unset definition duplicated from their previous to None
+
+        This is the inverse of ''default_missing''. By removing defaulted
+        machines, we can elaborate the original and then redefault them and thus
+        avoid repeating the elaboration explicitly.
+        """
+        unfreeze = PerThreeMachineDefaultable() # type: PerThreeMachineDefaultable[T.Optional[_T]]
+        unfreeze.build = self.build
+        unfreeze.host = self.host
+        unfreeze.target = self.target
+        if unfreeze.target == unfreeze.host:
+            unfreeze.target = None
+        if unfreeze.host == unfreeze.build:
+            unfreeze.host = None
+        return unfreeze
+
+    def matches_build_machine(self, machine: MachineChoice) -> bool:
+        return self.build == self[machine]
+
+
+class PerMachineDefaultable(PerMachine[T.Optional[_T]]):
+    """Extends `PerMachine` with the ability to default from `None`s.
+    """
+    def __init__(self):
+        super().__init__(None, None)
+
+    def default_missing(self) -> "PerMachine[T.Optional[_T]]":
+        """Default host to build
+
+        This allows just specifying nothing in the native case, and just host in the
+        cross non-compiler case.
+        """
+        freeze = PerMachine(self.build, self.host)
+        if freeze.host is None:
+            freeze.host = freeze.build
+        return freeze
+
+
+class PerThreeMachineDefaultable(PerMachineDefaultable, PerThreeMachine[T.Optional[_T]]):
+    """Extends `PerThreeMachine` with the ability to default from `None`s.
+    """
+    def __init__(self):
+        PerThreeMachine.__init__(self, None, None, None)
+
+    def default_missing(self) -> "PerThreeMachine[T.Optional[_T]]":
+        """Default host to build and target to host.
+
+        This allows just specifying nothing in the native case, just host in the
+        cross non-compiler case, and just target in the native-built
+        cross-compiler case.
+        """
+        freeze = PerThreeMachine(self.build, self.host, self.target)
+        if freeze.host is None:
+            freeze.host = freeze.build
+        if freeze.target is None:
+            freeze.target = freeze.host
+        return freeze
+
+
+def is_sunos() -> bool:
+    return platform.system().lower() == 'sunos'
+
+
+def is_osx() -> bool:
+    return platform.system().lower() == 'darwin'
+
+
+def is_linux() -> bool:
+    return platform.system().lower() == 'linux'
+
+
+def is_android() -> bool:
+    return platform.system().lower() == 'android'
+
+
+def is_haiku() -> bool:
+    return platform.system().lower() == 'haiku'
+
+
+def is_openbsd() -> bool:
+    return platform.system().lower() == 'openbsd'
+
+
+def is_windows() -> bool:
+    platname = platform.system().lower()
+    return platname == 'windows' or 'mingw' in platname
+
+
+def is_cygwin() -> bool:
+    return platform.system().lower().startswith('cygwin')
+
+
+def is_debianlike() -> bool:
+    return os.path.isfile('/etc/debian_version')
+
+
+def is_dragonflybsd() -> bool:
+    return platform.system().lower() == 'dragonfly'
+
+
+def is_netbsd() -> bool:
+    return platform.system().lower() == 'netbsd'
+
+
+def is_freebsd() -> bool:
+    return platform.system().lower() == 'freebsd'
+
+def is_irix() -> bool:
+    return platform.system().startswith('irix')
+
+def is_hurd() -> bool:
+    return platform.system().lower() == 'gnu'
+
+def is_qnx() -> bool:
+    return platform.system().lower() == 'qnx'
+
+def exe_exists(arglist: T.List[str]) -> bool:
+    try:
+        if subprocess.run(arglist, timeout=10).returncode == 0:
+            return True
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+    return False
+
+
+@lru_cache(maxsize=None)
+def darwin_get_object_archs(objpath: str) -> T.List[str]:
+    '''
+    For a specific object (executable, static library, dylib, etc), run `lipo`
+    to fetch the list of archs supported by it. Supports both thin objects and
+    'fat' objects.
+    '''
+    _, stdo, stderr = Popen_safe(['lipo', '-info', objpath])
+    if not stdo:
+        mlog.debug('lipo {}: {}'.format(objpath, stderr))
+        return None
+    stdo = stdo.rsplit(': ', 1)[1]
+    # Convert from lipo-style archs to meson-style CPUs
+    stdo = stdo.replace('i386', 'x86')
+    stdo = stdo.replace('arm64', 'aarch64')
+    # Add generic name for armv7 and armv7s
+    if 'armv7' in stdo:
+        stdo += ' arm'
+    return stdo.split()
+
+
+def detect_vcs(source_dir: T.Union[str, Path]) -> T.Optional[T.Dict[str, str]]:
+    vcs_systems = [
+        dict(name = 'git',        cmd = 'git', repo_dir = '.git', get_rev = 'git describe --dirty=+', rev_regex = '(.*)', dep = '.git/logs/HEAD'),
+        dict(name = 'mercurial',  cmd = 'hg',  repo_dir = '.hg',  get_rev = 'hg id -i',               rev_regex = '(.*)', dep = '.hg/dirstate'),
+        dict(name = 'subversion', cmd = 'svn', repo_dir = '.svn', get_rev = 'svn info',               rev_regex = 'Revision: (.*)', dep = '.svn/wc.db'),
+        dict(name = 'bazaar',     cmd = 'bzr', repo_dir = '.bzr', get_rev = 'bzr revno',              rev_regex = '(.*)', dep = '.bzr'),
+    ]
+    if isinstance(source_dir, str):
+        source_dir = Path(source_dir)
+
+    parent_paths_and_self = collections.deque(source_dir.parents)
+    # Prepend the source directory to the front so we can check it;
+    # source_dir.parents doesn't include source_dir
+    parent_paths_and_self.appendleft(source_dir)
+    for curdir in parent_paths_and_self:
+        for vcs in vcs_systems:
+            if Path.is_dir(curdir.joinpath(vcs['repo_dir'])) and shutil.which(vcs['cmd']):
+                vcs['wc_dir'] = str(curdir)
+                return vcs
+    return None
+
+# a helper class which implements the same version ordering as RPM
+class Version:
+    def __init__(self, s: str):
+        self._s = s
+
+        # split into numeric, alphabetic and non-alphanumeric sequences
+        sequences1 = re.finditer(r'(\d+|[a-zA-Z]+|[^a-zA-Z\d]+)', s)
+
+        # non-alphanumeric separators are discarded
+        sequences2 = [m for m in sequences1 if not re.match(r'[^a-zA-Z\d]+', m.group(1))]
+
+        # numeric sequences are converted from strings to ints
+        sequences3 = [int(m.group(1)) if m.group(1).isdigit() else m.group(1) for m in sequences2]
+
+        self._v = sequences3
+
+    def __str__(self):
+        return '%s (V=%s)' % (self._s, str(self._v))
+
+    def __repr__(self):
+        return ''.format(self._s)
+
+    def __lt__(self, other):
+        if isinstance(other, Version):
+            return self.__cmp(other, operator.lt)
+        return NotImplemented
+
+    def __gt__(self, other):
+        if isinstance(other, Version):
+            return self.__cmp(other, operator.gt)
+        return NotImplemented
+
+    def __le__(self, other):
+        if isinstance(other, Version):
+            return self.__cmp(other, operator.le)
+        return NotImplemented
+
+    def __ge__(self, other):
+        if isinstance(other, Version):
+            return self.__cmp(other, operator.ge)
+        return NotImplemented
+
+    def __eq__(self, other):
+        if isinstance(other, Version):
+            return self._v == other._v
+        return NotImplemented
+
+    def __ne__(self, other):
+        if isinstance(other, Version):
+            return self._v != other._v
+        return NotImplemented
+
+    def __cmp(self, other: 'Version', comparator: T.Callable[[T.Any, T.Any], bool]) -> bool:
+        # compare each sequence in order
+        for ours, theirs in zip(self._v, other._v):
+            # sort a non-digit sequence before a digit sequence
+            ours_is_int = isinstance(ours, int)
+            theirs_is_int = isinstance(theirs, int)
+            if ours_is_int != theirs_is_int:
+                return comparator(ours_is_int, theirs_is_int)
+
+            if ours != theirs:
+                return comparator(ours, theirs)
+
+        # if equal length, all components have matched, so equal
+        # otherwise, the version with a suffix remaining is greater
+        return comparator(len(self._v), len(other._v))
+
+
+def _version_extract_cmpop(vstr2: str) -> T.Tuple[T.Callable[[T.Any, T.Any], bool], str]:
+    if vstr2.startswith('>='):
+        cmpop = operator.ge
+        vstr2 = vstr2[2:]
+    elif vstr2.startswith('<='):
+        cmpop = operator.le
+        vstr2 = vstr2[2:]
+    elif vstr2.startswith('!='):
+        cmpop = operator.ne
+        vstr2 = vstr2[2:]
+    elif vstr2.startswith('=='):
+        cmpop = operator.eq
+        vstr2 = vstr2[2:]
+    elif vstr2.startswith('='):
+        cmpop = operator.eq
+        vstr2 = vstr2[1:]
+    elif vstr2.startswith('>'):
+        cmpop = operator.gt
+        vstr2 = vstr2[1:]
+    elif vstr2.startswith('<'):
+        cmpop = operator.lt
+        vstr2 = vstr2[1:]
+    else:
+        cmpop = operator.eq
+
+    return (cmpop, vstr2)
+
+
+def version_compare(vstr1: str, vstr2: str) -> bool:
+    (cmpop, vstr2) = _version_extract_cmpop(vstr2)
+    return cmpop(Version(vstr1), Version(vstr2))
+
+
+def version_compare_many(vstr1: str, conditions: T.Union[str, T.Iterable[str]]) -> T.Tuple[bool, T.List[str], T.List[str]]:
+    if isinstance(conditions, str):
+        conditions = [conditions]
+    found = []
+    not_found = []
+    for req in conditions:
+        if not version_compare(vstr1, req):
+            not_found.append(req)
+        else:
+            found.append(req)
+    return not_found == [], not_found, found
+
+
+# determine if the minimum version satisfying the condition |condition| exceeds
+# the minimum version for a feature |minimum|
+def version_compare_condition_with_min(condition: str, minimum: str) -> bool:
+    if condition.startswith('>='):
+        cmpop = operator.le
+        condition = condition[2:]
+    elif condition.startswith('<='):
+        return False
+    elif condition.startswith('!='):
+        return False
+    elif condition.startswith('=='):
+        cmpop = operator.le
+        condition = condition[2:]
+    elif condition.startswith('='):
+        cmpop = operator.le
+        condition = condition[1:]
+    elif condition.startswith('>'):
+        cmpop = operator.lt
+        condition = condition[1:]
+    elif condition.startswith('<'):
+        return False
+    else:
+        cmpop = operator.le
+
+    # Declaring a project(meson_version: '>=0.46') and then using features in
+    # 0.46.0 is valid, because (knowing the meson versioning scheme) '0.46.0' is
+    # the lowest version which satisfies the constraint '>=0.46'.
+    #
+    # But this will fail here, because the minimum version required by the
+    # version constraint ('0.46') is strictly less (in our version comparison)
+    # than the minimum version needed for the feature ('0.46.0').
+    #
+    # Map versions in the constraint of the form '0.46' to '0.46.0', to embed
+    # this knowledge of the meson versioning scheme.
+    condition = condition.strip()
+    if re.match(r'^\d+.\d+$', condition):
+        condition += '.0'
+
+    return cmpop(Version(minimum), Version(condition))
+
+
+def default_libdir() -> str:
+    if is_debianlike():
+        try:
+            pc = subprocess.Popen(['dpkg-architecture', '-qDEB_HOST_MULTIARCH'],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.DEVNULL)
+            (stdo, _) = pc.communicate()
+            if pc.returncode == 0:
+                archpath = stdo.decode().strip()
+                return 'lib/' + archpath
+        except Exception:
+            pass
+    if is_freebsd() or is_irix():
+        return 'lib'
+    if os.path.isdir('/usr/lib64') and not os.path.islink('/usr/lib64'):
+        return 'lib64'
+    return 'lib'
+
+
+def default_libexecdir() -> str:
+    # There is no way to auto-detect this, so it must be set at build time
+    return 'libexec'
+
+
+def default_prefix() -> str:
+    return 'c:/' if is_windows() else '/usr/local'
+
+
+def get_library_dirs() -> T.List[str]:
+    if is_windows():
+        return ['C:/mingw/lib'] # TODO: get programmatically
+    if is_osx():
+        return ['/usr/lib'] # TODO: get programmatically
+    # The following is probably Debian/Ubuntu specific.
+    # /usr/local/lib is first because it contains stuff
+    # installed by the sysadmin and is probably more up-to-date
+    # than /usr/lib. If you feel that this search order is
+    # problematic, please raise the issue on the mailing list.
+    unixdirs = ['/usr/local/lib', '/usr/lib', '/lib']
+
+    if is_freebsd():
+        return unixdirs
+    # FIXME: this needs to be further genericized for aarch64 etc.
+    machine = platform.machine()
+    if machine in ('i386', 'i486', 'i586', 'i686'):
+        plat = 'i386'
+    elif machine.startswith('arm'):
+        plat = 'arm'
+    else:
+        plat = ''
+
+    # Solaris puts 32-bit libraries in the main /lib & /usr/lib directories
+    # and 64-bit libraries in platform specific subdirectories.
+    if is_sunos():
+        if machine == 'i86pc':
+            plat = 'amd64'
+        elif machine.startswith('sun4'):
+            plat = 'sparcv9'
+
+    usr_platdir = Path('/usr/lib/') / plat
+    if usr_platdir.is_dir():
+        unixdirs += [str(x) for x in (usr_platdir).iterdir() if x.is_dir()]
+    if os.path.exists('/usr/lib64'):
+        unixdirs.append('/usr/lib64')
+
+    lib_platdir = Path('/lib/') / plat
+    if lib_platdir.is_dir():
+        unixdirs += [str(x) for x in (lib_platdir).iterdir() if x.is_dir()]
+    if os.path.exists('/lib64'):
+        unixdirs.append('/lib64')
+
+    return unixdirs
+
+
+def has_path_sep(name: str, sep: str = '/\\') -> bool:
+    'Checks if any of the specified @sep path separators are in @name'
+    for each in sep:
+        if each in name:
+            return True
+    return False
+
+
+if is_windows():
+    # shlex.split is not suitable for splitting command line on Window (https://bugs.python.org/issue1724822);
+    # shlex.quote is similarly problematic. Below are "proper" implementations of these functions according to
+    # https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments and
+    # https://blogs.msdn.microsoft.com/twistylittlepassagesallalike/2011/04/23/everyone-quotes-command-line-arguments-the-wrong-way/
+
+    _whitespace = ' \t\n\r'
+    _find_unsafe_char = re.compile(r'[{}"]'.format(_whitespace)).search
+
+    def quote_arg(arg: str) -> str:
+        if arg and not _find_unsafe_char(arg):
+            return arg
+
+        result = '"'
+        num_backslashes = 0
+        for c in arg:
+            if c == '\\':
+                num_backslashes += 1
+            else:
+                if c == '"':
+                    # Escape all backslashes and the following double quotation mark
+                    num_backslashes = num_backslashes * 2 + 1
+
+                result += num_backslashes * '\\' + c
+                num_backslashes = 0
+
+        # Escape all backslashes, but let the terminating double quotation
+        # mark we add below be interpreted as a metacharacter
+        result += (num_backslashes * 2) * '\\' + '"'
+        return result
+
+    def split_args(cmd: str) -> T.List[str]:
+        result = []
+        arg = ''
+        num_backslashes = 0
+        num_quotes = 0
+        in_quotes = False
+        for c in cmd:
+            if c == '\\':
+                num_backslashes += 1
+            else:
+                if c == '"' and not (num_backslashes % 2):
+                    # unescaped quote, eat it
+                    arg += (num_backslashes // 2) * '\\'
+                    num_quotes += 1
+                    in_quotes = not in_quotes
+                elif c in _whitespace and not in_quotes:
+                    if arg or num_quotes:
+                        # reached the end of the argument
+                        result.append(arg)
+                        arg = ''
+                        num_quotes = 0
+                else:
+                    if c == '"':
+                        # escaped quote
+                        num_backslashes = (num_backslashes - 1) // 2
+
+                    arg += num_backslashes * '\\' + c
+
+                num_backslashes = 0
+
+        if arg or num_quotes:
+            result.append(arg)
+
+        return result
+else:
+    def quote_arg(arg: str) -> str:
+        return shlex.quote(arg)
+
+    def split_args(cmd: str) -> T.List[str]:
+        return shlex.split(cmd)
+
+
+def join_args(args: T.Iterable[str]) -> str:
+    return ' '.join([quote_arg(x) for x in args])
+
+
+def do_replacement(regex: T.Pattern[str], line: str, variable_format: str,
+                   confdata: 'ConfigurationData') -> T.Tuple[str, T.Set[str]]:
+    missing_variables = set()  # type: T.Set[str]
+    if variable_format == 'cmake':
+        start_tag = '${'
+        backslash_tag = '\\${'
+    else:
+        assert variable_format in ['meson', 'cmake@']
+        start_tag = '@'
+        backslash_tag = '\\@'
+
+    def variable_replace(match: T.Match[str]) -> str:
+        # Pairs of escape characters before '@' or '\@'
+        if match.group(0).endswith('\\'):
+            num_escapes = match.end(0) - match.start(0)
+            return '\\' * (num_escapes // 2)
+        # Single escape character and '@'
+        elif match.group(0) == backslash_tag:
+            return start_tag
+        # Template variable to be replaced
+        else:
+            varname = match.group(1)
+            if varname in confdata:
+                (var, desc) = confdata.get(varname)
+                if isinstance(var, str):
+                    pass
+                elif isinstance(var, int):
+                    var = str(var)
+                else:
+                    msg = 'Tried to replace variable {!r} value with ' \
+                          'something other than a string or int: {!r}'
+                    raise MesonException(msg.format(varname, var))
+            else:
+                missing_variables.add(varname)
+                var = ''
+            return var
+    return re.sub(regex, variable_replace, line), missing_variables
+
+def do_define(regex: T.Pattern[str], line: str, confdata: 'ConfigurationData', variable_format: str) -> str:
+    def get_cmake_define(line: str, confdata: 'ConfigurationData') -> str:
+        arr = line.split()
+        define_value=[]
+        for token in arr[2:]:
+            try:
+                (v, desc) = confdata.get(token)
+                define_value += [v]
+            except KeyError:
+                define_value += [token]
+        return ' '.join(define_value)
+
+    arr = line.split()
+    if variable_format == 'meson' and len(arr) != 2:
+      raise MesonException('#mesondefine does not contain exactly two tokens: %s' % line.strip())
+
+    varname = arr[1]
+    try:
+        (v, desc) = confdata.get(varname)
+    except KeyError:
+        return '/* #undef %s */\n' % varname
+    if isinstance(v, bool):
+        if v:
+            return '#define %s\n' % varname
+        else:
+            return '#undef %s\n' % varname
+    elif isinstance(v, int):
+        return '#define %s %d\n' % (varname, v)
+    elif isinstance(v, str):
+        if variable_format == 'meson':
+            result = v
+        else:
+            result = get_cmake_define(line, confdata)
+        result = '#define %s %s\n' % (varname, result)
+        (result, missing_variable) = do_replacement(regex, result, variable_format, confdata)
+        return result
+    else:
+        raise MesonException('#mesondefine argument "%s" is of unknown type.' % varname)
+
+def do_conf_str (data: list, confdata: 'ConfigurationData', variable_format: str,
+                 encoding: str = 'utf-8') -> T.Tuple[T.List[str],T.Set[str], bool]:
+    def line_is_valid(line : str, variable_format: str):
+      if variable_format == 'meson':
+          if '#cmakedefine' in line:
+              return False
+      else: #cmake format
+         if '#mesondefine' in line:
+            return False
+      return True
+
+    # Only allow (a-z, A-Z, 0-9, _, -) as valid characters for a define
+    # Also allow escaping '@' with '\@'
+    if variable_format in ['meson', 'cmake@']:
+        regex = re.compile(r'(?:\\\\)+(?=\\?@)|\\@|@([-a-zA-Z0-9_]+)@')
+    elif variable_format == 'cmake':
+        regex = re.compile(r'(?:\\\\)+(?=\\?\$)|\\\${|\${([-a-zA-Z0-9_]+)}')
+    else:
+        raise MesonException('Format "{}" not handled'.format(variable_format))
+
+    search_token = '#mesondefine'
+    if variable_format != 'meson':
+        search_token = '#cmakedefine'
+
+    result = []
+    missing_variables = set()
+    # Detect when the configuration data is empty and no tokens were found
+    # during substitution so we can warn the user to use the `copy:` kwarg.
+    confdata_useless = not confdata.keys()
+    for line in data:
+        if line.startswith(search_token):
+            confdata_useless = False
+            line = do_define(regex, line, confdata, variable_format)
+        else:
+            if not line_is_valid(line,variable_format):
+                raise MesonException('Format "{}" mismatched'.format(variable_format))
+            line, missing = do_replacement(regex, line, variable_format, confdata)
+            missing_variables.update(missing)
+            if missing:
+                confdata_useless = False
+        result.append(line)
+
+    return result, missing_variables, confdata_useless
+
+def do_conf_file(src: str, dst: str, confdata: 'ConfigurationData', variable_format: str,
+                 encoding: str = 'utf-8') -> T.Tuple[T.Set[str], bool]:
+    try:
+        with open(src, encoding=encoding, newline='') as f:
+            data = f.readlines()
+    except Exception as e:
+        raise MesonException('Could not read input file %s: %s' % (src, str(e)))
+
+    (result, missing_variables, confdata_useless) = do_conf_str(data, confdata, variable_format, encoding)
+    dst_tmp = dst + '~'
+    try:
+        with open(dst_tmp, 'w', encoding=encoding, newline='') as f:
+            f.writelines(result)
+    except Exception as e:
+        raise MesonException('Could not write output file %s: %s' % (dst, str(e)))
+    shutil.copymode(src, dst_tmp)
+    replace_if_different(dst, dst_tmp)
+    return missing_variables, confdata_useless
+
+CONF_C_PRELUDE = '''/*
+ * Autogenerated by the Meson build system.
+ * Do not edit, your changes will be lost.
+ */
+
+#pragma once
+
+'''
+
+CONF_NASM_PRELUDE = '''; Autogenerated by the Meson build system.
+; Do not edit, your changes will be lost.
+
+'''
+
+def dump_conf_header(ofilename: str, cdata: 'ConfigurationData', output_format: str) -> None:
+    if output_format == 'c':
+        prelude = CONF_C_PRELUDE
+        prefix = '#'
+    elif output_format == 'nasm':
+        prelude = CONF_NASM_PRELUDE
+        prefix = '%'
+
+    ofilename_tmp = ofilename + '~'
+    with open(ofilename_tmp, 'w', encoding='utf-8') as ofile:
+        ofile.write(prelude)
+        for k in sorted(cdata.keys()):
+            (v, desc) = cdata.get(k)
+            if desc:
+                if output_format == 'c':
+                    ofile.write('/* %s */\n' % desc)
+                elif output_format == 'nasm':
+                    for line in desc.split('\n'):
+                        ofile.write('; %s\n' % line)
+            if isinstance(v, bool):
+                if v:
+                    ofile.write('%sdefine %s\n\n' % (prefix, k))
+                else:
+                    ofile.write('%sundef %s\n\n' % (prefix, k))
+            elif isinstance(v, (int, str)):
+                ofile.write('%sdefine %s %s\n\n' % (prefix, k, v))
+            else:
+                raise MesonException('Unknown data type in configuration file entry: ' + k)
+    replace_if_different(ofilename, ofilename_tmp)
+
+
+def replace_if_different(dst: str, dst_tmp: str) -> None:
+    # If contents are identical, don't touch the file to prevent
+    # unnecessary rebuilds.
+    different = True
+    try:
+        with open(dst, 'rb') as f1, open(dst_tmp, 'rb') as f2:
+            if f1.read() == f2.read():
+                different = False
+    except FileNotFoundError:
+        pass
+    if different:
+        os.replace(dst_tmp, dst)
+    else:
+        os.unlink(dst_tmp)
+
+
+@T.overload
+def unholder(item: 'ObjectHolder[_T]') -> _T: ...
+
+@T.overload
+def unholder(item: T.List['ObjectHolder[_T]']) -> T.List[_T]: ...
+
+@T.overload
+def unholder(item: T.List[_T]) -> T.List[_T]: ...
+
+@T.overload
+def unholder(item: T.List[T.Union[_T, 'ObjectHolder[_T]']]) -> T.List[_T]: ...
+
+def unholder(item):
+    """Get the held item of an object holder or list of object holders."""
+    if isinstance(item, list):
+        return [i.held_object if hasattr(i, 'held_object') else i for i in item]
+    if hasattr(item, 'held_object'):
+        return item.held_object
+    return item
+
+
+def listify(item: T.Any, flatten: bool = True) -> T.List[T.Any]:
+    '''
+    Returns a list with all args embedded in a list if they are not a list.
+    This function preserves order.
+    @flatten: Convert lists of lists to a flat list
+    '''
+    if not isinstance(item, list):
+        return [item]
+    result = []  # type: T.List[T.Any]
+    for i in item:
+        if flatten and isinstance(i, list):
+            result += listify(i, flatten=True)
+        else:
+            result.append(i)
+    return result
+
+
+def extract_as_list(dict_object: T.Dict[_T, _U], key: _T, pop: bool = False) -> T.List[_U]:
+    '''
+    Extracts all values from given dict_object and listifies them.
+    '''
+    fetch = dict_object.get
+    if pop:
+        fetch = dict_object.pop
+    # If there's only one key, we don't return a list with one element
+    return listify(fetch(key, []), flatten=True)
+
+
+def typeslistify(item: 'T.Union[_T, T.Sequence[_T]]',
+                 types: 'T.Union[T.Type[_T], T.Tuple[T.Type[_T]]]') -> T.List[_T]:
+    '''
+    Ensure that type(@item) is one of @types or a
+    list of items all of which are of type @types
+    '''
+    if isinstance(item, types):
+        item = T.cast(T.List[_T], [item])
+    if not isinstance(item, list):
+        raise MesonException('Item must be a list or one of {!r}'.format(types))
+    for i in item:
+        if i is not None and not isinstance(i, types):
+            raise MesonException('List item must be one of {!r}'.format(types))
+    return item
+
+
+def stringlistify(item: T.Union[T.Any, T.Sequence[T.Any]]) -> T.List[str]:
+    return typeslistify(item, str)
+
+
+def expand_arguments(args: T.Iterable[str]) -> T.Optional[T.List[str]]:
+    expended_args = []  # type: T.List[str]
+    for arg in args:
+        if not arg.startswith('@'):
+            expended_args.append(arg)
+            continue
+
+        args_file = arg[1:]
+        try:
+            with open(args_file) as f:
+                extended_args = f.read().split()
+            expended_args += extended_args
+        except Exception as e:
+            mlog.error('Expanding command line arguments:',  args_file, 'not found')
+            mlog.exception(e)
+            return None
+    return expended_args
+
+
+def partition(pred: T.Callable[[_T], object], iterable: T.Iterator[_T]) -> T.Tuple[T.Iterator[_T], T.Iterator[_T]]:
+    """Use a predicate to partition entries into false entries and true
+    entries.
+
+    >>> x, y = partition(is_odd, range(10))
+    >>> (list(x), list(y))
+    ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9])
+    """
+    t1, t2 = tee(iterable)
+    return filterfalse(pred, t1), filter(pred, t2)
+
+
+def Popen_safe(args: T.List[str], write: T.Optional[str] = None,
+               stdout: T.Union[T.BinaryIO, int] = subprocess.PIPE,
+               stderr: T.Union[T.BinaryIO, int] = subprocess.PIPE,
+               **kwargs: T.Any) -> T.Tuple[subprocess.Popen, str, str]:
+    import locale
+    encoding = locale.getpreferredencoding()
+    # Redirect stdin to DEVNULL otherwise the command run by us here might mess
+    # up the console and ANSI colors will stop working on Windows.
+    if 'stdin' not in kwargs:
+        kwargs['stdin'] = subprocess.DEVNULL
+    if sys.version_info < (3, 6) or not sys.stdout.encoding or encoding.upper() != 'UTF-8':
+        p, o, e = Popen_safe_legacy(args, write=write, stdout=stdout, stderr=stderr, **kwargs)
+    else:
+        p = subprocess.Popen(args, universal_newlines=True, close_fds=False,
+                             stdout=stdout, stderr=stderr, **kwargs)
+        o, e = p.communicate(write)
+    # Sometimes the command that we run will call another command which will be
+    # without the above stdin workaround, so set the console mode again just in
+    # case.
+    mlog.setup_console()
+    return p, o, e
+
+
+def Popen_safe_legacy(args: T.List[str], write: T.Optional[str] = None,
+                      stdout: T.Union[T.BinaryIO, int] = subprocess.PIPE,
+                      stderr: T.Union[T.BinaryIO, int] = subprocess.PIPE,
+                      **kwargs: T.Any) -> T.Tuple[subprocess.Popen, str, str]:
+    p = subprocess.Popen(args, universal_newlines=False, close_fds=False,
+                         stdout=stdout, stderr=stderr, **kwargs)
+    input_ = None  # type: T.Optional[bytes]
+    if write is not None:
+        input_ = write.encode('utf-8')
+    o, e = p.communicate(input_)
+    if o is not None:
+        if sys.stdout.encoding:
+            o = o.decode(encoding=sys.stdout.encoding, errors='replace').replace('\r\n', '\n')
+        else:
+            o = o.decode(errors='replace').replace('\r\n', '\n')
+    if e is not None:
+        if sys.stderr.encoding:
+            e = e.decode(encoding=sys.stderr.encoding, errors='replace').replace('\r\n', '\n')
+        else:
+            e = e.decode(errors='replace').replace('\r\n', '\n')
+    return p, o, e
+
+
+def iter_regexin_iter(regexiter: T.Iterable[str], initer: T.Iterable[str]) -> T.Optional[str]:
+    '''
+    Takes each regular expression in @regexiter and tries to search for it in
+    every item in @initer. If there is a match, returns that match.
+    Else returns False.
+    '''
+    for regex in regexiter:
+        for ii in initer:
+            if not isinstance(ii, str):
+                continue
+            match = re.search(regex, ii)
+            if match:
+                return match.group()
+    return None
+
+
+def _substitute_values_check_errors(command: T.List[str], values: T.Dict[str, str]) -> None:
+    # Error checking
+    inregex = ['@INPUT([0-9]+)?@', '@PLAINNAME@', '@BASENAME@']  # type: T.List[str]
+    outregex = ['@OUTPUT([0-9]+)?@', '@OUTDIR@']                 # type: T.List[str]
+    if '@INPUT@' not in values:
+        # Error out if any input-derived templates are present in the command
+        match = iter_regexin_iter(inregex, command)
+        if match:
+            m = 'Command cannot have {!r}, since no input files were specified'
+            raise MesonException(m.format(match))
+    else:
+        if len(values['@INPUT@']) > 1:
+            # Error out if @PLAINNAME@ or @BASENAME@ is present in the command
+            match = iter_regexin_iter(inregex[1:], command)
+            if match:
+                raise MesonException('Command cannot have {!r} when there is '
+                                     'more than one input file'.format(match))
+        # Error out if an invalid @INPUTnn@ template was specified
+        for each in command:
+            if not isinstance(each, str):
+                continue
+            match2 = re.search(inregex[0], each)
+            if match2 and match2.group() not in values:
+                m = 'Command cannot have {!r} since there are only {!r} inputs'
+                raise MesonException(m.format(match2.group(), len(values['@INPUT@'])))
+    if '@OUTPUT@' not in values:
+        # Error out if any output-derived templates are present in the command
+        match = iter_regexin_iter(outregex, command)
+        if match:
+            m = 'Command cannot have {!r} since there are no outputs'
+            raise MesonException(m.format(match))
+    else:
+        # Error out if an invalid @OUTPUTnn@ template was specified
+        for each in command:
+            if not isinstance(each, str):
+                continue
+            match2 = re.search(outregex[0], each)
+            if match2 and match2.group() not in values:
+                m = 'Command cannot have {!r} since there are only {!r} outputs'
+                raise MesonException(m.format(match2.group(), len(values['@OUTPUT@'])))
+
+
+def substitute_values(command: T.List[str], values: T.Dict[str, str]) -> T.List[str]:
+    '''
+    Substitute the template strings in the @values dict into the list of
+    strings @command and return a new list. For a full list of the templates,
+    see get_filenames_templates_dict()
+
+    If multiple inputs/outputs are given in the @values dictionary, we
+    substitute @INPUT@ and @OUTPUT@ only if they are the entire string, not
+    just a part of it, and in that case we substitute *all* of them.
+    '''
+    # Error checking
+    _substitute_values_check_errors(command, values)
+    # Substitution
+    outcmd = []  # type: T.List[str]
+    rx_keys = [re.escape(key) for key in values if key not in ('@INPUT@', '@OUTPUT@')]
+    value_rx = re.compile('|'.join(rx_keys)) if rx_keys else None
+    for vv in command:
+        if not isinstance(vv, str):
+            outcmd.append(vv)
+        elif '@INPUT@' in vv:
+            inputs = values['@INPUT@']
+            if vv == '@INPUT@':
+                outcmd += inputs
+            elif len(inputs) == 1:
+                outcmd.append(vv.replace('@INPUT@', inputs[0]))
+            else:
+                raise MesonException("Command has '@INPUT@' as part of a "
+                                     "string and more than one input file")
+        elif '@OUTPUT@' in vv:
+            outputs = values['@OUTPUT@']
+            if vv == '@OUTPUT@':
+                outcmd += outputs
+            elif len(outputs) == 1:
+                outcmd.append(vv.replace('@OUTPUT@', outputs[0]))
+            else:
+                raise MesonException("Command has '@OUTPUT@' as part of a "
+                                     "string and more than one output file")
+        # Append values that are exactly a template string.
+        # This is faster than a string replace.
+        elif vv in values:
+            outcmd.append(values[vv])
+        # Substitute everything else with replacement
+        elif value_rx:
+            outcmd.append(value_rx.sub(lambda m: values[m.group(0)], vv))
+        else:
+            outcmd.append(vv)
+    return outcmd
+
+
+def get_filenames_templates_dict(inputs: T.List[str], outputs: T.List[str]) -> T.Dict[str, T.Union[str, T.List[str]]]:
+    '''
+    Create a dictionary with template strings as keys and values as values for
+    the following templates:
+
+    @INPUT@  - the full path to one or more input files, from @inputs
+    @OUTPUT@ - the full path to one or more output files, from @outputs
+    @OUTDIR@ - the full path to the directory containing the output files
+
+    If there is only one input file, the following keys are also created:
+
+    @PLAINNAME@ - the filename of the input file
+    @BASENAME@ - the filename of the input file with the extension removed
+
+    If there is more than one input file, the following keys are also created:
+
+    @INPUT0@, @INPUT1@, ... one for each input file
+
+    If there is more than one output file, the following keys are also created:
+
+    @OUTPUT0@, @OUTPUT1@, ... one for each output file
+    '''
+    values = {}  # type: T.Dict[str, T.Union[str, T.List[str]]]
+    # Gather values derived from the input
+    if inputs:
+        # We want to substitute all the inputs.
+        values['@INPUT@'] = inputs
+        for (ii, vv) in enumerate(inputs):
+            # Write out @INPUT0@, @INPUT1@, ...
+            values['@INPUT{}@'.format(ii)] = vv
+        if len(inputs) == 1:
+            # Just one value, substitute @PLAINNAME@ and @BASENAME@
+            values['@PLAINNAME@'] = plain = os.path.basename(inputs[0])
+            values['@BASENAME@'] = os.path.splitext(plain)[0]
+    if outputs:
+        # Gather values derived from the outputs, similar to above.
+        values['@OUTPUT@'] = outputs
+        for (ii, vv) in enumerate(outputs):
+            values['@OUTPUT{}@'.format(ii)] = vv
+        # Outdir should be the same for all outputs
+        values['@OUTDIR@'] = os.path.dirname(outputs[0])
+        # Many external programs fail on empty arguments.
+        if values['@OUTDIR@'] == '':
+            values['@OUTDIR@'] = '.'
+    return values
+
+
+def _make_tree_writable(topdir: str) -> None:
+    # Ensure all files and directories under topdir are writable
+    # (and readable) by owner.
+    for d, _, files in os.walk(topdir):
+        os.chmod(d, os.stat(d).st_mode | stat.S_IWRITE | stat.S_IREAD)
+        for fname in files:
+            fpath = os.path.join(d, fname)
+            if os.path.isfile(fpath):
+                os.chmod(fpath, os.stat(fpath).st_mode | stat.S_IWRITE | stat.S_IREAD)
+
+
+def windows_proof_rmtree(f: str) -> None:
+    # On Windows if anyone is holding a file open you can't
+    # delete it. As an example an anti virus scanner might
+    # be scanning files you are trying to delete. The only
+    # way to fix this is to try again and again.
+    delays = [0.1, 0.1, 0.2, 0.2, 0.2, 0.5, 0.5, 1, 1, 1, 1, 2]
+    # Start by making the tree wriable.
+    _make_tree_writable(f)
+    for d in delays:
+        try:
+            shutil.rmtree(f)
+            return
+        except FileNotFoundError:
+            return
+        except OSError:
+            time.sleep(d)
+    # Try one last time and throw if it fails.
+    shutil.rmtree(f)
+
+
+def windows_proof_rm(fpath: str) -> None:
+    """Like windows_proof_rmtree, but for a single file."""
+    if os.path.isfile(fpath):
+        os.chmod(fpath, os.stat(fpath).st_mode | stat.S_IWRITE | stat.S_IREAD)
+    delays = [0.1, 0.1, 0.2, 0.2, 0.2, 0.5, 0.5, 1, 1, 1, 1, 2]
+    for d in delays:
+        try:
+            os.unlink(fpath)
+            return
+        except FileNotFoundError:
+            return
+        except OSError:
+            time.sleep(d)
+    os.unlink(fpath)
+
+
+def detect_subprojects(spdir_name: str, current_dir: str = '',
+                       result: T.Optional[T.Dict[str, T.List[str]]] = None) -> T.Optional[T.Dict[str, T.List[str]]]:
+    if result is None:
+        result = {}
+    spdir = os.path.join(current_dir, spdir_name)
+    if not os.path.exists(spdir):
+        return result
+    for trial in glob(os.path.join(spdir, '*')):
+        basename = os.path.basename(trial)
+        if trial == 'packagecache':
+            continue
+        append_this = True
+        if os.path.isdir(trial):
+            detect_subprojects(spdir_name, trial, result)
+        elif trial.endswith('.wrap') and os.path.isfile(trial):
+            basename = os.path.splitext(basename)[0]
+        else:
+            append_this = False
+        if append_this:
+            if basename in result:
+                result[basename].append(trial)
+            else:
+                result[basename] = [trial]
+    return result
+
+
+def substring_is_in_list(substr: str, strlist: T.List[str]) -> bool:
+    for s in strlist:
+        if substr in s:
+            return True
+    return False
+
+
+class OrderedSet(T.MutableSet[_T]):
+    """A set that preserves the order in which items are added, by first
+    insertion.
+    """
+    def __init__(self, iterable: T.Optional[T.Iterable[_T]] = None):
+        # typing.OrderedDict is new in 3.7.2, so we can't use that, but we can
+        # use MutableMapping, which is fine in this case.
+        self.__container = collections.OrderedDict()  # type: T.MutableMapping[_T, None]
+        if iterable:
+            self.update(iterable)
+
+    def __contains__(self, value: object) -> bool:
+        return value in self.__container
+
+    def __iter__(self) -> T.Iterator[_T]:
+        return iter(self.__container.keys())
+
+    def __len__(self) -> int:
+        return len(self.__container)
+
+    def __repr__(self) -> str:
+        # Don't print 'OrderedSet("")' for an empty set.
+        if self.__container:
+            return 'OrderedSet("{}")'.format(
+                '", "'.join(repr(e) for e in self.__container.keys()))
+        return 'OrderedSet()'
+
+    def __reversed__(self) -> T.Iterator[_T]:
+        # Mypy is complaining that sets cant be reversed, which is true for
+        # unordered sets, but this is an ordered, set so reverse() makes sense.
+        return reversed(self.__container.keys())  # type: ignore
+
+    def add(self, value: _T) -> None:
+        self.__container[value] = None
+
+    def discard(self, value: _T) -> None:
+        if value in self.__container:
+            del self.__container[value]
+
+    def update(self, iterable: T.Iterable[_T]) -> None:
+        for item in iterable:
+            self.__container[item] = None
+
+    def difference(self, set_: T.Union[T.Set[_T], 'OrderedSet[_T]']) -> 'OrderedSet[_T]':
+        return type(self)(e for e in self if e not in set_)
+
+class BuildDirLock:
+
+    def __init__(self, builddir: str):
+        self.lockfilename = os.path.join(builddir, 'meson-private/meson.lock')
+
+    def __enter__(self):
+        self.lockfile = open(self.lockfilename, 'w')
+        try:
+            if have_fcntl:
+                fcntl.flock(self.lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            elif have_msvcrt:
+                msvcrt.locking(self.lockfile.fileno(), msvcrt.LK_NBLCK, 1)
+        except (BlockingIOError, PermissionError):
+            self.lockfile.close()
+            raise MesonException('Some other Meson process is already using this build directory. Exiting.')
+
+    def __exit__(self, *args):
+        if have_fcntl:
+            fcntl.flock(self.lockfile, fcntl.LOCK_UN)
+        elif have_msvcrt:
+            msvcrt.locking(self.lockfile.fileno(), msvcrt.LK_UNLCK, 1)
+        self.lockfile.close()
+
+def relpath(path: str, start: str) -> str:
+    # On Windows a relative path can't be evaluated for paths on two different
+    # drives (i.e. c:\foo and f:\bar).  The only thing left to do is to use the
+    # original absolute path.
+    try:
+        return os.path.relpath(path, start)
+    except (TypeError, ValueError):
+        return path
+
+def path_is_in_root(path: Path, root: Path, resolve: bool = False) -> bool:
+    # Check wheter a path is within the root directory root
+    try:
+        if resolve:
+            path.resolve().relative_to(root.resolve())
+        else:
+            path.relative_to(root)
+    except ValueError:
+        return False
+    return True
+
+class LibType(Enum):
+
+    """Enumeration for library types."""
+
+    SHARED = 0
+    STATIC = 1
+    PREFER_SHARED = 2
+    PREFER_STATIC = 3
+
+
+class ProgressBarFallback:  # lgtm [py/iter-returns-non-self]
+    '''
+    Fallback progress bar implementation when tqdm is not found
+
+    Since this class is not an actual iterator, but only provides a minimal
+    fallback, it is safe to ignore the 'Iterator does not return self from
+    __iter__ method' warning.
+    '''
+    def __init__(self, iterable: T.Optional[T.Iterable[str]] = None, total: T.Optional[int] = None,
+                 bar_type: T.Optional[str] = None, desc: T.Optional[str] = None):
+        if iterable is not None:
+            self.iterable = iter(iterable)
+            return
+        self.total = total
+        self.done = 0
+        self.printed_dots = 0
+        if self.total and bar_type == 'download':
+            print('Download size:', self.total)
+        if desc:
+            print('{}: '.format(desc), end='')
+
+    # Pretend to be an iterator when called as one and don't print any
+    # progress
+    def __iter__(self) -> T.Iterator[str]:
+        return self.iterable
+
+    def __next__(self) -> str:
+        return next(self.iterable)
+
+    def print_dot(self) -> None:
+        print('.', end='')
+        sys.stdout.flush()
+        self.printed_dots += 1
+
+    def update(self, progress: int) -> None:
+        self.done += progress
+        if not self.total:
+            # Just print one dot per call if we don't have a total length
+            self.print_dot()
+            return
+        ratio = int(self.done / self.total * 10)
+        while self.printed_dots < ratio:
+            self.print_dot()
+
+    def close(self) -> None:
+        print('')
+
+try:
+    from tqdm import tqdm
+except ImportError:
+    # ideally we would use a typing.Protocol here, but it's part of typing_extensions until 3.8
+    ProgressBar = ProgressBarFallback  # type: T.Union[T.Type[ProgressBarFallback], T.Type[ProgressBarTqdm]]
+else:
+    class ProgressBarTqdm(tqdm):
+        def __init__(self, *args, bar_type: T.Optional[str] = None, **kwargs):
+            if bar_type == 'download':
+                kwargs.update({'unit': 'bytes', 'leave': True})
+            else:
+                kwargs.update({'leave': False})
+            kwargs['ncols'] = 100
+            super().__init__(*args, **kwargs)
+
+    ProgressBar = ProgressBarTqdm
+
+
+def get_wine_shortpath(winecmd: T.List[str], wine_paths: T.Sequence[str]) -> str:
+    """Get A short version of @wine_paths to avoid reaching WINEPATH number
+    of char limit.
+    """
+
+    wine_paths = list(OrderedSet(wine_paths))
+
+    getShortPathScript = '%s.bat' % str(uuid.uuid4()).lower()[:5]
+    with open(getShortPathScript, mode='w') as f:
+        f.write("@ECHO OFF\nfor %%x in (%*) do (\n echo|set /p=;%~sx\n)\n")
+        f.flush()
+    try:
+        with open(os.devnull, 'w') as stderr:
+            wine_path = subprocess.check_output(
+                winecmd +
+                ['cmd', '/C', getShortPathScript] + wine_paths,
+                stderr=stderr).decode('utf-8')
+    except subprocess.CalledProcessError as e:
+        print("Could not get short paths: %s" % e)
+        wine_path = ';'.join(wine_paths)
+    finally:
+        os.remove(getShortPathScript)
+    if len(wine_path) > 2048:
+        raise MesonException(
+            'WINEPATH size {} > 2048'
+            ' this will cause random failure.'.format(
+                len(wine_path)))
+
+    return wine_path.strip(';')
+
+
+def run_once(func: T.Callable[..., _T]) -> T.Callable[..., _T]:
+    ret = []  # type: T.List[_T]
+
+    @wraps(func)
+    def wrapper(*args: T.Any, **kwargs: T.Any) -> _T:
+        if ret:
+            return ret[0]
+
+        val = func(*args, **kwargs)
+        ret.append(val)
+        return val
+
+    return wrapper
+
+
+class OptionProxy(T.Generic[_T]):
+    def __init__(self, value: _T):
+        self.value = value
+
+
+class OptionOverrideProxy:
+
+    '''Mimic an option list but transparently override selected option
+    values.
+    '''
+
+    # TODO: the typing here could be made more explicit using a TypeDict from
+    # python 3.8 or typing_extensions
+
+    def __init__(self, overrides: T.Dict[str, T.Any], *options: 'OptionDictType'):
+        self.overrides = overrides
+        self.options = options
+
+    def __getitem__(self, option_name: str) -> T.Any:
+        for opts in self.options:
+            if option_name in opts:
+                return self._get_override(option_name, opts[option_name])
+        raise KeyError('Option not found', option_name)
+
+    def _get_override(self, option_name: str, base_opt: 'UserOption[T.Any]') -> T.Union[OptionProxy[T.Any], 'UserOption[T.Any]']:
+        if option_name in self.overrides:
+            return OptionProxy(base_opt.validate_value(self.overrides[option_name]))
+        return base_opt
+
+    def copy(self) -> T.Dict[str, T.Any]:
+        result = {}  # type: T.Dict[str, T.Any]
+        for opts in self.options:
+            for option_name in opts:
+                result[option_name] = self._get_override(option_name, opts[option_name])
+        return result
diff --git a/meson/mesonbuild/mesonmain.py b/meson/mesonbuild/mesonmain.py
new file mode 100644
index 000000000..e07c0df41
--- /dev/null
+++ b/meson/mesonbuild/mesonmain.py
@@ -0,0 +1,243 @@
+# Copyright 2012-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os.path
+import importlib
+import traceback
+import argparse
+import codecs
+import shutil
+
+from . import mesonlib
+from . import mlog
+from . import mconf, mdist, minit, minstall, mintro, msetup, mtest, rewriter, msubprojects, munstable_coredata, mcompile
+from .mesonlib import MesonException
+from .environment import detect_msys2_arch
+from .wrap import wraptool
+
+
+# Note: when adding arguments, please also add them to the completion
+# scripts in $MESONSRC/data/shell-completions/
+class CommandLineParser:
+    def __init__(self):
+        self.term_width = shutil.get_terminal_size().columns
+        self.formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=int(self.term_width / 2), width=self.term_width)
+
+        self.commands = {}
+        self.hidden_commands = []
+        self.parser = argparse.ArgumentParser(prog='meson', formatter_class=self.formatter)
+        self.subparsers = self.parser.add_subparsers(title='Commands', dest='command',
+                                                     description='If no command is specified it defaults to setup command.')
+        self.add_command('setup', msetup.add_arguments, msetup.run,
+                         help_msg='Configure the project')
+        self.add_command('configure', mconf.add_arguments, mconf.run,
+                         help_msg='Change project options',)
+        self.add_command('dist', mdist.add_arguments, mdist.run,
+                         help_msg='Generate release archive',)
+        self.add_command('install', minstall.add_arguments, minstall.run,
+                         help_msg='Install the project')
+        self.add_command('introspect', mintro.add_arguments, mintro.run,
+                         help_msg='Introspect project')
+        self.add_command('init', minit.add_arguments, minit.run,
+                         help_msg='Create a new project')
+        self.add_command('test', mtest.add_arguments, mtest.run,
+                         help_msg='Run tests')
+        self.add_command('wrap', wraptool.add_arguments, wraptool.run,
+                         help_msg='Wrap tools')
+        self.add_command('subprojects', msubprojects.add_arguments, msubprojects.run,
+                         help_msg='Manage subprojects')
+        self.add_command('help', self.add_help_arguments, self.run_help_command,
+                         help_msg='Print help of a subcommand')
+        self.add_command('rewrite', lambda parser: rewriter.add_arguments(parser, self.formatter), rewriter.run,
+                         help_msg='Modify the project definition')
+        self.add_command('compile', mcompile.add_arguments, mcompile.run,
+                         help_msg='Build the project')
+
+        # Hidden commands
+        self.add_command('runpython', self.add_runpython_arguments, self.run_runpython_command,
+                         help_msg=argparse.SUPPRESS)
+        self.add_command('unstable-coredata', munstable_coredata.add_arguments, munstable_coredata.run,
+                         help_msg=argparse.SUPPRESS)
+
+    def add_command(self, name, add_arguments_func, run_func, help_msg, aliases=None):
+        aliases = aliases or []
+        # FIXME: Cannot have hidden subparser:
+        # https://bugs.python.org/issue22848
+        if help_msg == argparse.SUPPRESS:
+            p = argparse.ArgumentParser(prog='meson ' + name, formatter_class=self.formatter)
+            self.hidden_commands.append(name)
+        else:
+            p = self.subparsers.add_parser(name, help=help_msg, aliases=aliases, formatter_class=self.formatter)
+        add_arguments_func(p)
+        p.set_defaults(run_func=run_func)
+        for i in [name] + aliases:
+            self.commands[i] = p
+
+    def add_runpython_arguments(self, parser):
+        parser.add_argument('-c', action='store_true', dest='eval_arg', default=False)
+        parser.add_argument('script_file')
+        parser.add_argument('script_args', nargs=argparse.REMAINDER)
+
+    def run_runpython_command(self, options):
+        import runpy
+        if options.eval_arg:
+            exec(options.script_file)
+        else:
+            sys.argv[1:] = options.script_args
+            sys.path.insert(0, os.path.dirname(options.script_file))
+            runpy.run_path(options.script_file, run_name='__main__')
+        return 0
+
+    def add_help_arguments(self, parser):
+        parser.add_argument('command', nargs='?')
+
+    def run_help_command(self, options):
+        if options.command:
+            self.commands[options.command].print_help()
+        else:
+            self.parser.print_help()
+        return 0
+
+    def run(self, args):
+        print_py35_notice = False
+        # If first arg is not a known command, assume user wants to run the setup
+        # command.
+        known_commands = list(self.commands.keys()) + ['-h', '--help']
+        if not args or args[0] not in known_commands:
+            args = ['setup'] + args
+
+        # Hidden commands have their own parser instead of using the global one
+        if args[0] in self.hidden_commands:
+            command = args[0]
+            parser = self.commands[command]
+            args = args[1:]
+        else:
+            parser = self.parser
+            command = None
+
+        args = mesonlib.expand_arguments(args)
+        options = parser.parse_args(args)
+
+        if command is None:
+            command = options.command
+
+        if command in ('setup', 'compile', 'test', 'install') and sys.version_info < (3, 6):
+            print_py35_notice = True
+
+        try:
+            return options.run_func(options)
+        except MesonException as e:
+            mlog.exception(e)
+            logfile = mlog.shutdown()
+            if logfile is not None:
+                mlog.log("\nA full log can be found at", mlog.bold(logfile))
+            if os.environ.get('MESON_FORCE_BACKTRACE'):
+                raise
+            return 1
+        except Exception:
+            if os.environ.get('MESON_FORCE_BACKTRACE'):
+                raise
+            traceback.print_exc()
+            return 2
+        finally:
+            if print_py35_notice:
+                mlog.notice('You are using Python 3.5 which is EOL. Starting with v0.57, '
+                            'Meson will require Python 3.6 or newer', fatal=False)
+            mlog.shutdown()
+
+def run_script_command(script_name, script_args):
+    # Map script name to module name for those that doesn't match
+    script_map = {'exe': 'meson_exe',
+                  'install': 'meson_install',
+                  'delsuffix': 'delwithsuffix',
+                  'gtkdoc': 'gtkdochelper',
+                  'hotdoc': 'hotdochelper',
+                  'regencheck': 'regen_checker'}
+    module_name = script_map.get(script_name, script_name)
+
+    try:
+        module = importlib.import_module('mesonbuild.scripts.' + module_name)
+    except ModuleNotFoundError as e:
+        mlog.exception(e)
+        return 1
+
+    try:
+        return module.run(script_args)
+    except MesonException as e:
+        mlog.error('Error in {} helper script:'.format(script_name))
+        mlog.exception(e)
+        return 1
+
+def ensure_stdout_accepts_unicode():
+    if sys.stdout.encoding and not sys.stdout.encoding.upper().startswith('UTF-'):
+        if sys.version_info >= (3, 7):
+            sys.stdout.reconfigure(errors='surrogateescape')
+        else:
+            sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach(),
+                                                   errors='surrogateescape')
+            sys.stdout.encoding = 'UTF-8'
+            if not hasattr(sys.stdout, 'buffer'):
+                sys.stdout.buffer = sys.stdout.raw if hasattr(sys.stdout, 'raw') else sys.stdout
+
+def run(original_args, mainfile):
+    if sys.version_info < (3, 5):
+        print('Meson works correctly only with python 3.5+.')
+        print('You have python {}.'.format(sys.version))
+        print('Please update your environment')
+        return 1
+
+    # Meson gets confused if stdout can't output Unicode, if the
+    # locale isn't Unicode, just force stdout to accept it. This tries
+    # to emulate enough of PEP 540 to work elsewhere.
+    ensure_stdout_accepts_unicode()
+
+    # https://github.com/mesonbuild/meson/issues/3653
+    if sys.platform.lower() == 'msys':
+        mlog.error('This python3 seems to be msys/python on MSYS2 Windows, which is known to have path semantics incompatible with Meson')
+        msys2_arch = detect_msys2_arch()
+        if msys2_arch:
+            mlog.error('Please install and use mingw-w64-i686-python3 and/or mingw-w64-x86_64-python3 with Pacman')
+        else:
+            mlog.error('Please download and use Python as detailed at: https://mesonbuild.com/Getting-meson.html')
+        return 2
+
+    # Set the meson command that will be used to run scripts and so on
+    mesonlib.set_meson_command(mainfile)
+
+    args = original_args[:]
+
+    # Special handling of internal commands called from backends, they don't
+    # need to go through argparse.
+    if len(args) >= 2 and args[0] == '--internal':
+        if args[1] == 'regenerate':
+            # Rewrite "meson --internal regenerate" command line to
+            # "meson --reconfigure"
+            args = ['--reconfigure'] + args[2:]
+        else:
+            return run_script_command(args[1], args[2:])
+
+    return CommandLineParser().run(args)
+
+def main():
+    # Always resolve the command path so Ninja can find it for regen, tests, etc.
+    if 'meson.exe' in sys.executable:
+        assert(os.path.isabs(sys.executable))
+        launcher = sys.executable
+    else:
+        launcher = os.path.realpath(sys.argv[0])
+    return run(sys.argv[1:], launcher)
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/meson/mesonbuild/minit.py b/meson/mesonbuild/minit.py
new file mode 100644
index 000000000..06e6dd4f3
--- /dev/null
+++ b/meson/mesonbuild/minit.py
@@ -0,0 +1,181 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Code that creates simple startup projects."""
+
+from pathlib import Path
+from enum import Enum
+import subprocess
+import shutil
+import sys
+import os
+import re
+from glob import glob
+from mesonbuild import mesonlib
+from mesonbuild.environment import detect_ninja
+from mesonbuild.templates.samplefactory import sameple_generator
+
+'''
+we currently have one meson template at this time.
+'''
+from mesonbuild.templates.mesontemplates import create_meson_build
+
+FORTRAN_SUFFIXES = {'.f', '.for', '.F', '.f90', '.F90'}
+LANG_SUFFIXES = {'.c', '.cc', '.cpp', '.cs', '.cu', '.d', '.m', '.mm', '.rs', '.java'} | FORTRAN_SUFFIXES
+LANG_SUPPORTED = {'c', 'cpp', 'cs', 'cuda', 'd', 'fortran', 'java', 'rust', 'objc', 'objcpp'}
+
+DEFAULT_PROJECT = 'executable'
+DEFAULT_VERSION = '0.1'
+class DEFAULT_TYPES(Enum):
+    EXE = 'executable'
+    LIB = 'library'
+
+INFO_MESSAGE = '''Sample project created. To build it run the
+following commands:
+
+meson setup builddir
+meson compile -C builddir
+'''
+
+
+def create_sample(options) -> None:
+    '''
+    Based on what arguments are passed we check for a match in language
+    then check for project type and create new Meson samples project.
+    '''
+    sample_gen = sameple_generator(options)
+    if options.type == DEFAULT_TYPES['EXE'].value:
+        sample_gen.create_executable()
+    elif options.type == DEFAULT_TYPES['LIB'].value:
+        sample_gen.create_library()
+    else:
+        raise RuntimeError('Unreachable code')
+    print(INFO_MESSAGE)
+
+def autodetect_options(options, sample: bool = False) -> None:
+    '''
+    Here we autodetect options for args not passed in so don't have to
+    think about it.
+    '''
+    if not options.name:
+        options.name = Path().resolve().stem
+        if not re.match('[a-zA-Z_][a-zA-Z0-9]*', options.name) and sample:
+            raise SystemExit('Name of current directory "{}" is not usable as a sample project name.\n'
+                             'Specify a project name with --name.'.format(options.name))
+        print('Using "{}" (name of current directory) as project name.'
+              .format(options.name))
+    if not options.executable:
+        options.executable = options.name
+        print('Using "{}" (project name) as name of executable to build.'
+              .format(options.executable))
+    if sample:
+        # The rest of the autodetection is not applicable to generating sample projects.
+        return
+    if not options.srcfiles:
+        srcfiles = []
+        for f in (f for f in Path().iterdir() if f.is_file()):
+            if f.suffix in LANG_SUFFIXES:
+                srcfiles.append(f)
+        if not srcfiles:
+            raise SystemExit('No recognizable source files found.\n'
+                             'Run meson init in an empty directory to create a sample project.')
+        options.srcfiles = srcfiles
+        print("Detected source files: " + ' '.join(map(str, srcfiles)))
+    options.srcfiles = [Path(f) for f in options.srcfiles]
+    if not options.language:
+        for f in options.srcfiles:
+            if f.suffix == '.c':
+                options.language = 'c'
+                break
+            if f.suffix in ('.cc', '.cpp'):
+                options.language = 'cpp'
+                break
+            if f.suffix in '.cs':
+                options.language = 'cs'
+                break
+            if f.suffix == '.cu':
+                options.language = 'cuda'
+                break
+            if f.suffix == '.d':
+                options.language = 'd'
+                break
+            if f.suffix in FORTRAN_SUFFIXES:
+                options.language = 'fortran'
+                break
+            if f.suffix == '.rs':
+                options.language = 'rust'
+                break
+            if f.suffix == '.m':
+                options.language = 'objc'
+                break
+            if f.suffix == '.mm':
+                options.language = 'objcpp'
+                break
+            if f.suffix == '.java':
+                options.language = 'java'
+                break
+        if not options.language:
+            raise SystemExit("Can't autodetect language, please specify it with -l.")
+        print("Detected language: " + options.language)
+
+def add_arguments(parser):
+    '''
+    Here we add args for that the user can passed when making a new
+    Meson project.
+    '''
+    parser.add_argument("srcfiles", metavar="sourcefile", nargs="*", help="source files. default: all recognized files in current directory")
+    parser.add_argument('-C', default='.', dest='wd', help='directory to cd into before running')
+    parser.add_argument("-n", "--name", help="project name. default: name of current directory")
+    parser.add_argument("-e", "--executable", help="executable name. default: project name")
+    parser.add_argument("-d", "--deps", help="dependencies, comma-separated")
+    parser.add_argument("-l", "--language", choices=sorted(LANG_SUPPORTED), help="project language. default: autodetected based on source files")
+    parser.add_argument("-b", "--build", action='store_true', help="build after generation")
+    parser.add_argument("--builddir", default='build', help="directory for build")
+    parser.add_argument("-f", "--force", action="store_true", help="force overwrite of existing files and directories.")
+    parser.add_argument('--type', default=DEFAULT_PROJECT, choices=('executable', 'library'), help="project type. default: {} based project".format(DEFAULT_PROJECT))
+    parser.add_argument('--version', default=DEFAULT_VERSION, help="project version. default: {}".format(DEFAULT_VERSION))
+
+def run(options) -> int:
+    '''
+    Here we generate the new Meson sample project.
+    '''
+    if not Path(options.wd).exists():
+        sys.exit('Project source root directory not found. Run this command in source directory root.')
+    os.chdir(options.wd)
+
+    if not glob('*'):
+        autodetect_options(options, sample=True)
+        if not options.language:
+            print('Defaulting to generating a C language project.')
+            options.language = 'c'
+        create_sample(options)
+    else:
+        autodetect_options(options)
+        if Path('meson.build').is_file() and not options.force:
+            raise SystemExit('meson.build already exists. Use --force to overwrite.')
+        create_meson_build(options)
+    if options.build:
+        if Path(options.builddir).is_dir() and options.force:
+            print('Build directory already exists, deleting it.')
+            shutil.rmtree(options.builddir)
+        print('Building...')
+        cmd = mesonlib.meson_command + [options.builddir]
+        ret = subprocess.run(cmd)
+        if ret.returncode:
+            raise SystemExit
+        cmd = detect_ninja() + ['-C', options.builddir]
+        ret = subprocess.run(cmd)
+        if ret.returncode:
+            raise SystemExit
+    return 0
diff --git a/meson/mesonbuild/minstall.py b/meson/mesonbuild/minstall.py
new file mode 100644
index 000000000..b95ff3645
--- /dev/null
+++ b/meson/mesonbuild/minstall.py
@@ -0,0 +1,545 @@
+# Copyright 2013-2014 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys, pickle, os, shutil, subprocess, errno
+import argparse
+import shlex
+from glob import glob
+from .scripts import depfixer
+from .scripts import destdir_join
+from .mesonlib import is_windows, Popen_safe
+from .mtest import rebuild_all
+try:
+    from __main__ import __file__ as main_file
+except ImportError:
+    # Happens when running as meson.exe which is native Windows.
+    # This is only used for pkexec which is not, so this is fine.
+    main_file = None
+
+symlink_warning = '''Warning: trying to copy a symlink that points to a file. This will copy the file,
+but this will be changed in a future version of Meson to copy the symlink as is. Please update your
+build definitions so that it will not break when the change happens.'''
+
+selinux_updates = []
+
+def add_arguments(parser):
+    parser.add_argument('-C', default='.', dest='wd',
+                        help='directory to cd into before running')
+    parser.add_argument('--profile-self', action='store_true', dest='profile',
+                        help=argparse.SUPPRESS)
+    parser.add_argument('--no-rebuild', default=False, action='store_true',
+                        help='Do not rebuild before installing.')
+    parser.add_argument('--only-changed', default=False, action='store_true',
+                        help='Only overwrite files that are older than the copied file.')
+    parser.add_argument('--quiet', default=False, action='store_true',
+                        help='Do not print every file that was installed.')
+
+class DirMaker:
+    def __init__(self, lf):
+        self.lf = lf
+        self.dirs = []
+
+    def makedirs(self, path, exist_ok=False):
+        dirname = os.path.normpath(path)
+        dirs = []
+        while dirname != os.path.dirname(dirname):
+            if not os.path.exists(dirname):
+                dirs.append(dirname)
+            dirname = os.path.dirname(dirname)
+        os.makedirs(path, exist_ok=exist_ok)
+
+        # store the directories in creation order, with the parent directory
+        # before the child directories. Future calls of makedir() will not
+        # create the parent directories, so the last element in the list is
+        # the last one to be created. That is the first one to be removed on
+        # __exit__
+        dirs.reverse()
+        self.dirs += dirs
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exception_type, value, traceback):
+        self.dirs.reverse()
+        for d in self.dirs:
+            append_to_log(self.lf, d)
+
+def is_executable(path, follow_symlinks=False):
+    '''Checks whether any of the "x" bits are set in the source file mode.'''
+    return bool(os.stat(path, follow_symlinks=follow_symlinks).st_mode & 0o111)
+
+def append_to_log(lf, line):
+    lf.write(line)
+    if not line.endswith('\n'):
+        lf.write('\n')
+    lf.flush()
+
+def set_chown(path, user=None, group=None, dir_fd=None, follow_symlinks=True):
+    # shutil.chown will call os.chown without passing all the parameters
+    # and particularly follow_symlinks, thus we replace it temporary
+    # with a lambda with all the parameters so that follow_symlinks will
+    # be actually passed properly.
+    # Not nice, but better than actually rewriting shutil.chown until
+    # this python bug is fixed: https://bugs.python.org/issue18108
+    real_os_chown = os.chown
+    try:
+        os.chown = lambda p, u, g: real_os_chown(p, u, g,
+                                                 dir_fd=dir_fd,
+                                                 follow_symlinks=follow_symlinks)
+        shutil.chown(path, user, group)
+    except Exception:
+        raise
+    finally:
+        os.chown = real_os_chown
+
+def set_chmod(path, mode, dir_fd=None, follow_symlinks=True):
+    try:
+        os.chmod(path, mode, dir_fd=dir_fd, follow_symlinks=follow_symlinks)
+    except (NotImplementedError, OSError, SystemError):
+        if not os.path.islink(path):
+            os.chmod(path, mode, dir_fd=dir_fd)
+
+def sanitize_permissions(path, umask):
+    if umask == 'preserve':
+        return
+    new_perms = 0o777 if is_executable(path, follow_symlinks=False) else 0o666
+    new_perms &= ~umask
+    try:
+        set_chmod(path, new_perms, follow_symlinks=False)
+    except PermissionError as e:
+        msg = '{!r}: Unable to set permissions {!r}: {}, ignoring...'
+        print(msg.format(path, new_perms, e.strerror))
+
+def set_mode(path, mode, default_umask):
+    if mode is None or (mode.perms_s or mode.owner or mode.group) is None:
+        # Just sanitize permissions with the default umask
+        sanitize_permissions(path, default_umask)
+        return
+    # No chown() on Windows, and must set one of owner/group
+    if not is_windows() and (mode.owner or mode.group) is not None:
+        try:
+            set_chown(path, mode.owner, mode.group, follow_symlinks=False)
+        except PermissionError as e:
+            msg = '{!r}: Unable to set owner {!r} and group {!r}: {}, ignoring...'
+            print(msg.format(path, mode.owner, mode.group, e.strerror))
+        except LookupError:
+            msg = '{!r}: Non-existent owner {!r} or group {!r}: ignoring...'
+            print(msg.format(path, mode.owner, mode.group))
+        except OSError as e:
+            if e.errno == errno.EINVAL:
+                msg = '{!r}: Non-existent numeric owner {!r} or group {!r}: ignoring...'
+                print(msg.format(path, mode.owner, mode.group))
+            else:
+                raise
+    # Must set permissions *after* setting owner/group otherwise the
+    # setuid/setgid bits will get wiped by chmod
+    # NOTE: On Windows you can set read/write perms; the rest are ignored
+    if mode.perms_s is not None:
+        try:
+            set_chmod(path, mode.perms, follow_symlinks=False)
+        except PermissionError as e:
+            msg = '{!r}: Unable to set permissions {!r}: {}, ignoring...'
+            print(msg.format(path, mode.perms_s, e.strerror))
+    else:
+        sanitize_permissions(path, default_umask)
+
+def restore_selinux_contexts():
+    '''
+    Restores the SELinux context for files in @selinux_updates
+
+    If $DESTDIR is set, do not warn if the call fails.
+    '''
+    try:
+        subprocess.check_call(['selinuxenabled'])
+    except (FileNotFoundError, NotADirectoryError, PermissionError, subprocess.CalledProcessError):
+        # If we don't have selinux or selinuxenabled returned 1, failure
+        # is ignored quietly.
+        return
+
+    if not shutil.which('restorecon'):
+        # If we don't have restorecon, failure is ignored quietly.
+        return
+
+    if not selinux_updates:
+        # If the list of files is empty, do not try to call restorecon.
+        return
+
+    with subprocess.Popen(['restorecon', '-F', '-f-', '-0'],
+                          stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
+        out, err = proc.communicate(input=b'\0'.join(os.fsencode(f) for f in selinux_updates) + b'\0')
+        if proc.returncode != 0 and not os.environ.get('DESTDIR'):
+            print('Failed to restore SELinux context of installed files...',
+                  'Standard output:', out.decode(),
+                  'Standard error:', err.decode(), sep='\n')
+
+
+def get_destdir_path(d, path):
+    if os.path.isabs(path):
+        output = destdir_join(d.destdir, path)
+    else:
+        output = os.path.join(d.fullprefix, path)
+    return output
+
+
+def check_for_stampfile(fname):
+    '''Some languages e.g. Rust have output files
+    whose names are not known at configure time.
+    Check if this is the case and return the real
+    file instead.'''
+    if fname.endswith('.so') or fname.endswith('.dll'):
+        if os.stat(fname).st_size == 0:
+            (base, suffix) = os.path.splitext(fname)
+            files = glob(base + '-*' + suffix)
+            if len(files) > 1:
+                print("Stale dynamic library files in build dir. Can't install.")
+                sys.exit(1)
+            if len(files) == 1:
+                return files[0]
+    elif fname.endswith('.a') or fname.endswith('.lib'):
+        if os.stat(fname).st_size == 0:
+            (base, suffix) = os.path.splitext(fname)
+            files = glob(base + '-*' + '.rlib')
+            if len(files) > 1:
+                print("Stale static library files in build dir. Can't install.")
+                sys.exit(1)
+            if len(files) == 1:
+                return files[0]
+    return fname
+
+class Installer:
+
+    def __init__(self, options, lf):
+        self.did_install_something = False
+        self.options = options
+        self.lf = lf
+        self.preserved_file_count = 0
+
+    def log(self, msg):
+        if not self.options.quiet:
+            print(msg)
+
+    def should_preserve_existing_file(self, from_file, to_file):
+        if not self.options.only_changed:
+            return False
+        # Always replace danging symlinks
+        if os.path.islink(from_file) and not os.path.isfile(from_file):
+            return False
+        from_time = os.stat(from_file).st_mtime
+        to_time = os.stat(to_file).st_mtime
+        return from_time <= to_time
+
+    def do_copyfile(self, from_file, to_file, makedirs=None):
+        outdir = os.path.split(to_file)[0]
+        if not os.path.isfile(from_file) and not os.path.islink(from_file):
+            raise RuntimeError('Tried to install something that isn\'t a file:'
+                               '{!r}'.format(from_file))
+        # copyfile fails if the target file already exists, so remove it to
+        # allow overwriting a previous install. If the target is not a file, we
+        # want to give a readable error.
+        if os.path.exists(to_file):
+            if not os.path.isfile(to_file):
+                raise RuntimeError('Destination {!r} already exists and is not '
+                                   'a file'.format(to_file))
+            if self.should_preserve_existing_file(from_file, to_file):
+                append_to_log(self.lf, '# Preserving old file {}\n'.format(to_file))
+                self.preserved_file_count += 1
+                return False
+            os.remove(to_file)
+        elif makedirs:
+            # Unpack tuple
+            dirmaker, outdir = makedirs
+            # Create dirs if needed
+            dirmaker.makedirs(outdir, exist_ok=True)
+        self.log('Installing {} to {}'.format(from_file, outdir))
+        if os.path.islink(from_file):
+            if not os.path.exists(from_file):
+                # Dangling symlink. Replicate as is.
+                shutil.copy(from_file, outdir, follow_symlinks=False)
+            else:
+                # Remove this entire branch when changing the behaviour to duplicate
+                # symlinks rather than copying what they point to.
+                print(symlink_warning)
+                shutil.copyfile(from_file, to_file)
+                shutil.copystat(from_file, to_file)
+        else:
+            shutil.copyfile(from_file, to_file)
+            shutil.copystat(from_file, to_file)
+        selinux_updates.append(to_file)
+        append_to_log(self.lf, to_file)
+        return True
+
+    def do_copydir(self, data, src_dir, dst_dir, exclude, install_mode):
+        '''
+        Copies the contents of directory @src_dir into @dst_dir.
+
+        For directory
+            /foo/
+              bar/
+                excluded
+                foobar
+              file
+        do_copydir(..., '/foo', '/dst/dir', {'bar/excluded'}) creates
+            /dst/
+              dir/
+                bar/
+                  foobar
+                file
+
+        Args:
+            src_dir: str, absolute path to the source directory
+            dst_dir: str, absolute path to the destination directory
+            exclude: (set(str), set(str)), tuple of (exclude_files, exclude_dirs),
+                     each element of the set is a path relative to src_dir.
+        '''
+        if not os.path.isabs(src_dir):
+            raise ValueError('src_dir must be absolute, got {}'.format(src_dir))
+        if not os.path.isabs(dst_dir):
+            raise ValueError('dst_dir must be absolute, got {}'.format(dst_dir))
+        if exclude is not None:
+            exclude_files, exclude_dirs = exclude
+        else:
+            exclude_files = exclude_dirs = set()
+        for root, dirs, files in os.walk(src_dir):
+            assert os.path.isabs(root)
+            for d in dirs[:]:
+                abs_src = os.path.join(root, d)
+                filepart = os.path.relpath(abs_src, start=src_dir)
+                abs_dst = os.path.join(dst_dir, filepart)
+                # Remove these so they aren't visited by os.walk at all.
+                if filepart in exclude_dirs:
+                    dirs.remove(d)
+                    continue
+                if os.path.isdir(abs_dst):
+                    continue
+                if os.path.exists(abs_dst):
+                    print('Tried to copy directory {} but a file of that name already exists.'.format(abs_dst))
+                    sys.exit(1)
+                data.dirmaker.makedirs(abs_dst)
+                shutil.copystat(abs_src, abs_dst)
+                sanitize_permissions(abs_dst, data.install_umask)
+            for f in files:
+                abs_src = os.path.join(root, f)
+                filepart = os.path.relpath(abs_src, start=src_dir)
+                if filepart in exclude_files:
+                    continue
+                abs_dst = os.path.join(dst_dir, filepart)
+                if os.path.isdir(abs_dst):
+                    print('Tried to copy file {} but a directory of that name already exists.'.format(abs_dst))
+                    sys.exit(1)
+                parent_dir = os.path.dirname(abs_dst)
+                if not os.path.isdir(parent_dir):
+                    os.mkdir(parent_dir)
+                    shutil.copystat(os.path.dirname(abs_src), parent_dir)
+                # FIXME: what about symlinks?
+                self.do_copyfile(abs_src, abs_dst)
+                set_mode(abs_dst, install_mode, data.install_umask)
+
+    def do_install(self, datafilename):
+        with open(datafilename, 'rb') as ifile:
+            d = pickle.load(ifile)
+        d.destdir = os.environ.get('DESTDIR', '')
+        d.fullprefix = destdir_join(d.destdir, d.prefix)
+
+        if d.install_umask != 'preserve':
+            os.umask(d.install_umask)
+
+        self.did_install_something = False
+        try:
+            d.dirmaker = DirMaker(self.lf)
+            with d.dirmaker:
+                self.install_subdirs(d) # Must be first, because it needs to delete the old subtree.
+                self.install_targets(d)
+                self.install_headers(d)
+                self.install_man(d)
+                self.install_data(d)
+                restore_selinux_contexts()
+                self.run_install_script(d)
+                if not self.did_install_something:
+                    self.log('Nothing to install.')
+                if not self.options.quiet and self.preserved_file_count > 0:
+                    self.log('Preserved {} unchanged files, see {} for the full list'
+                             .format(self.preserved_file_count, os.path.normpath(self.lf.name)))
+        except PermissionError:
+            if shutil.which('pkexec') is not None and 'PKEXEC_UID' not in os.environ:
+                print('Installation failed due to insufficient permissions.')
+                print('Attempting to use polkit to gain elevated privileges...')
+                os.execlp('pkexec', 'pkexec', sys.executable, main_file, *sys.argv[1:],
+                          '-C', os.getcwd())
+            else:
+                raise
+
+    def install_subdirs(self, d):
+        for (src_dir, dst_dir, mode, exclude) in d.install_subdirs:
+            self.did_install_something = True
+            full_dst_dir = get_destdir_path(d, dst_dir)
+            self.log('Installing subdir {} to {}'.format(src_dir, full_dst_dir))
+            d.dirmaker.makedirs(full_dst_dir, exist_ok=True)
+            self.do_copydir(d, src_dir, full_dst_dir, exclude, mode)
+
+    def install_data(self, d):
+        for i in d.data:
+            fullfilename = i[0]
+            outfilename = get_destdir_path(d, i[1])
+            mode = i[2]
+            outdir = os.path.dirname(outfilename)
+            if self.do_copyfile(fullfilename, outfilename, makedirs=(d.dirmaker, outdir)):
+                self.did_install_something = True
+            set_mode(outfilename, mode, d.install_umask)
+
+    def install_man(self, d):
+        for m in d.man:
+            full_source_filename = m[0]
+            outfilename = get_destdir_path(d, m[1])
+            outdir = os.path.dirname(outfilename)
+            install_mode = m[2]
+            if self.do_copyfile(full_source_filename, outfilename, makedirs=(d.dirmaker, outdir)):
+                self.did_install_something = True
+            set_mode(outfilename, install_mode, d.install_umask)
+
+    def install_headers(self, d):
+        for t in d.headers:
+            fullfilename = t[0]
+            fname = os.path.basename(fullfilename)
+            outdir = get_destdir_path(d, t[1])
+            outfilename = os.path.join(outdir, fname)
+            install_mode = t[2]
+            if self.do_copyfile(fullfilename, outfilename, makedirs=(d.dirmaker, outdir)):
+                self.did_install_something = True
+            set_mode(outfilename, install_mode, d.install_umask)
+
+    def run_install_script(self, d):
+        env = {'MESON_SOURCE_ROOT': d.source_dir,
+               'MESON_BUILD_ROOT': d.build_dir,
+               'MESON_INSTALL_PREFIX': d.prefix,
+               'MESON_INSTALL_DESTDIR_PREFIX': d.fullprefix,
+               'MESONINTROSPECT': ' '.join([shlex.quote(x) for x in d.mesonintrospect]),
+               }
+        if self.options.quiet:
+            env['MESON_INSTALL_QUIET'] = '1'
+
+        child_env = os.environ.copy()
+        child_env.update(env)
+
+        for i in d.install_scripts:
+            self.did_install_something = True  # Custom script must report itself if it does nothing.
+            script = i['exe']
+            args = i['args']
+            name = ' '.join(script + args)
+            self.log('Running custom install script {!r}'.format(name))
+            try:
+                rc = subprocess.call(script + args, env=child_env)
+            except OSError:
+                print('FAILED: install script \'{}\' could not be run, stopped'.format(name))
+                # POSIX shells return 127 when a command could not be found
+                sys.exit(127)
+            if rc != 0:
+                print('FAILED: install script \'{}\' exit code {}, stopped'.format(name, rc))
+                sys.exit(rc)
+
+    def install_targets(self, d):
+        for t in d.targets:
+            if not os.path.exists(t.fname):
+                # For example, import libraries of shared modules are optional
+                if t.optional:
+                    self.log('File {!r} not found, skipping'.format(t.fname))
+                    continue
+                else:
+                    raise RuntimeError('File {!r} could not be found'.format(t.fname))
+            file_copied = False # not set when a directory is copied
+            fname = check_for_stampfile(t.fname)
+            outdir = get_destdir_path(d, t.outdir)
+            outname = os.path.join(outdir, os.path.basename(fname))
+            final_path = os.path.join(d.prefix, t.outdir, os.path.basename(fname))
+            aliases = t.aliases
+            should_strip = t.strip
+            install_rpath = t.install_rpath
+            install_name_mappings = t.install_name_mappings
+            install_mode = t.install_mode
+            if not os.path.exists(fname):
+                raise RuntimeError('File {!r} could not be found'.format(fname))
+            elif os.path.isfile(fname):
+                file_copied = self.do_copyfile(fname, outname, makedirs=(d.dirmaker, outdir))
+                set_mode(outname, install_mode, d.install_umask)
+                if should_strip and d.strip_bin is not None:
+                    if fname.endswith('.jar'):
+                        self.log('Not stripping jar target:', os.path.basename(fname))
+                        continue
+                    self.log('Stripping target {!r} using {}.'.format(fname, d.strip_bin[0]))
+                    ps, stdo, stde = Popen_safe(d.strip_bin + [outname])
+                    if ps.returncode != 0:
+                        print('Could not strip file.\n')
+                        print('Stdout:\n{}\n'.format(stdo))
+                        print('Stderr:\n{}\n'.format(stde))
+                        sys.exit(1)
+                if fname.endswith('.js'):
+                    # Emscripten outputs js files and optionally a wasm file.
+                    # If one was generated, install it as well.
+                    wasm_source = os.path.splitext(fname)[0] + '.wasm'
+                    if os.path.exists(wasm_source):
+                        wasm_output = os.path.splitext(outname)[0] + '.wasm'
+                        file_copied = self.do_copyfile(wasm_source, wasm_output)
+            elif os.path.isdir(fname):
+                fname = os.path.join(d.build_dir, fname.rstrip('/'))
+                outname = os.path.join(outdir, os.path.basename(fname))
+                d.dirmaker.makedirs(outdir, exist_ok=True)
+                self.do_copydir(d, fname, outname, None, install_mode)
+            else:
+                raise RuntimeError('Unknown file type for {!r}'.format(fname))
+            printed_symlink_error = False
+            for alias, to in aliases.items():
+                try:
+                    symlinkfilename = os.path.join(outdir, alias)
+                    try:
+                        os.remove(symlinkfilename)
+                    except FileNotFoundError:
+                        pass
+                    os.symlink(to, symlinkfilename)
+                    append_to_log(self.lf, symlinkfilename)
+                except (NotImplementedError, OSError):
+                    if not printed_symlink_error:
+                        print("Symlink creation does not work on this platform. "
+                              "Skipping all symlinking.")
+                        printed_symlink_error = True
+            if file_copied:
+                self.did_install_something = True
+                try:
+                    depfixer.fix_rpath(outname, t.rpath_dirs_to_remove, install_rpath, final_path,
+                                       install_name_mappings, verbose=False)
+                except SystemExit as e:
+                    if isinstance(e.code, int) and e.code == 0:
+                        pass
+                    else:
+                        raise
+
+def run(opts):
+    datafilename = 'meson-private/install.dat'
+    private_dir = os.path.dirname(datafilename)
+    log_dir = os.path.join(private_dir, '../meson-logs')
+    if not os.path.exists(os.path.join(opts.wd, datafilename)):
+        sys.exit('Install data not found. Run this command in build directory root.')
+    if not opts.no_rebuild:
+        if not rebuild_all(opts.wd):
+            sys.exit(-1)
+    os.chdir(opts.wd)
+    with open(os.path.join(log_dir, 'install-log.txt'), 'w') as lf:
+        installer = Installer(opts, lf)
+        append_to_log(lf, '# List of files installed by Meson')
+        append_to_log(lf, '# Does not contain files installed by custom scripts.')
+        if opts.profile:
+            import cProfile as profile
+            fname = os.path.join(private_dir, 'profile-installer.log')
+            profile.runctx('installer.do_install(datafilename)', globals(), locals(), filename=fname)
+        else:
+            installer.do_install(datafilename)
+    return 0
diff --git a/meson/mesonbuild/mintro.py b/meson/mesonbuild/mintro.py
new file mode 100644
index 000000000..cccedaa59
--- /dev/null
+++ b/meson/mesonbuild/mintro.py
@@ -0,0 +1,531 @@
+# Copyright 2014-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This is a helper script for IDE developers. It allows you to
+extract information such as list of targets, files, compiler flags,
+tests and so on. All output is in JSON for simple parsing.
+
+Currently only works for the Ninja backend. Others use generated
+project files and don't need this info."""
+
+import collections
+import json
+from . import build, coredata as cdata
+from . import mesonlib
+from .ast import IntrospectionInterpreter, build_target_functions, AstConditionLevel, AstIDGenerator, AstIndentationGenerator, AstJSONPrinter
+from . import mlog
+from .backend import backends
+from .mparser import BaseNode, FunctionNode, ArrayNode, ArgumentNode, StringNode
+from .interpreter import Interpreter
+from pathlib import PurePath
+import typing as T
+import os
+
+def get_meson_info_file(info_dir: str) -> str:
+    return os.path.join(info_dir, 'meson-info.json')
+
+def get_meson_introspection_version() -> str:
+    return '1.0.0'
+
+def get_meson_introspection_required_version() -> T.List[str]:
+    return ['>=1.0', '<2.0']
+
+class IntroCommand:
+    def __init__(self,
+                 desc: str,
+                 func: T.Optional[T.Callable[[], T.Union[dict, list]]] = None,
+                 no_bd: T.Optional[T.Callable[[IntrospectionInterpreter], T.Union[dict, list]]] = None) -> None:
+        self.desc = desc + '.'
+        self.func = func
+        self.no_bd = no_bd
+
+def get_meson_introspection_types(coredata: T.Optional[cdata.CoreData] = None,
+                                  builddata: T.Optional[build.Build] = None,
+                                  backend: T.Optional[backends.Backend] = None,
+                                  sourcedir: T.Optional[str] = None) -> 'T.Mapping[str, IntroCommand]':
+    if backend and builddata:
+        benchmarkdata = backend.create_test_serialisation(builddata.get_benchmarks())
+        testdata = backend.create_test_serialisation(builddata.get_tests())
+        installdata = backend.create_install_data()
+        interpreter = backend.interpreter
+    else:
+        benchmarkdata = testdata = installdata = None
+
+    # Enforce key order for argparse
+    return collections.OrderedDict([
+        ('ast', IntroCommand('Dump the AST of the meson file', no_bd=dump_ast)),
+        ('benchmarks', IntroCommand('List all benchmarks', func=lambda: list_benchmarks(benchmarkdata))),
+        ('buildoptions', IntroCommand('List all build options', func=lambda: list_buildoptions(coredata), no_bd=list_buildoptions_from_source)),
+        ('buildsystem_files', IntroCommand('List files that make up the build system', func=lambda: list_buildsystem_files(builddata, interpreter))),
+        ('dependencies', IntroCommand('List external dependencies', func=lambda: list_deps(coredata), no_bd=list_deps_from_source)),
+        ('scan_dependencies', IntroCommand('Scan for dependencies used in the meson.build file', no_bd=list_deps_from_source)),
+        ('installed', IntroCommand('List all installed files and directories', func=lambda: list_installed(installdata))),
+        ('projectinfo', IntroCommand('Information about projects', func=lambda: list_projinfo(builddata), no_bd=list_projinfo_from_source)),
+        ('targets', IntroCommand('List top level targets', func=lambda: list_targets(builddata, installdata, backend), no_bd=list_targets_from_source)),
+        ('tests', IntroCommand('List all unit tests', func=lambda: list_tests(testdata))),
+    ])
+
+def add_arguments(parser):
+    intro_types = get_meson_introspection_types()
+    for key, val in intro_types.items():
+        flag = '--' + key.replace('_', '-')
+        parser.add_argument(flag, action='store_true', dest=key, default=False, help=val.desc)
+
+    parser.add_argument('--backend', choices=sorted(cdata.backendlist), dest='backend', default='ninja',
+                        help='The backend to use for the --buildoptions introspection.')
+    parser.add_argument('-a', '--all', action='store_true', dest='all', default=False,
+                        help='Print all available information.')
+    parser.add_argument('-i', '--indent', action='store_true', dest='indent', default=False,
+                        help='Enable pretty printed JSON.')
+    parser.add_argument('-f', '--force-object-output', action='store_true', dest='force_dict', default=False,
+                        help='Always use the new JSON format for multiple entries (even for 0 and 1 introspection commands)')
+    parser.add_argument('builddir', nargs='?', default='.', help='The build directory')
+
+def dump_ast(intr: IntrospectionInterpreter) -> T.Dict[str, T.Any]:
+    printer = AstJSONPrinter()
+    intr.ast.accept(printer)
+    return printer.result
+
+def list_installed(installdata):
+    res = {}
+    if installdata is not None:
+        for t in installdata.targets:
+            res[os.path.join(installdata.build_dir, t.fname)] = \
+                os.path.join(installdata.prefix, t.outdir, os.path.basename(t.fname))
+            for alias in t.aliases.keys():
+                res[os.path.join(installdata.build_dir, alias)] = \
+                    os.path.join(installdata.prefix, t.outdir, os.path.basename(alias))
+        for path, installpath, _ in installdata.data:
+            res[path] = os.path.join(installdata.prefix, installpath)
+        for path, installdir, _ in installdata.headers:
+            res[path] = os.path.join(installdata.prefix, installdir, os.path.basename(path))
+        for path, installpath, _ in installdata.man:
+            res[path] = os.path.join(installdata.prefix, installpath)
+        for path, installpath, _, _ in installdata.install_subdirs:
+            res[path] = os.path.join(installdata.prefix, installpath)
+    return res
+
+def list_targets_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[str, T.Union[bool, str, T.List[T.Union[str, T.Dict[str, T.Union[str, T.List[str], bool]]]]]]]:
+    tlist = []  # type: T.List[T.Dict[str, T.Union[bool, str, T.List[T.Union[str, T.Dict[str, T.Union[str, T.List[str], bool]]]]]]]
+    for i in intr.targets:
+        sources = []  # type: T.List[str]
+        for n in i['sources']:
+            args = []  # type: T.List[BaseNode]
+            if isinstance(n, FunctionNode):
+                args = list(n.args.arguments)
+                if n.func_name in build_target_functions:
+                    args.pop(0)
+            elif isinstance(n, ArrayNode):
+                args = n.args.arguments
+            elif isinstance(n, ArgumentNode):
+                args = n.arguments
+            for j in args:
+                if isinstance(j, StringNode):
+                    assert isinstance(j.value, str)
+                    sources += [j.value]
+                elif isinstance(j, str):
+                    sources += [j]
+
+        tlist += [{
+            'name': i['name'],
+            'id': i['id'],
+            'type': i['type'],
+            'defined_in': i['defined_in'],
+            'filename': [os.path.join(i['subdir'], x) for x in i['outputs']],
+            'build_by_default': i['build_by_default'],
+            'target_sources': [{
+                'language': 'unknown',
+                'compiler': [],
+                'parameters': [],
+                'sources': [os.path.normpath(os.path.join(os.path.abspath(intr.source_root), i['subdir'], x)) for x in sources],
+                'generated_sources': []
+            }],
+            'subproject': None, # Subprojects are not supported
+            'installed': i['installed']
+        }]
+
+    return tlist
+
+def list_targets(builddata: build.Build, installdata, backend: backends.Backend) -> T.List[T.Dict[str, T.Union[bool, str, T.List[T.Union[str, T.Dict[str, T.Union[str, T.List[str], bool]]]]]]]:
+    tlist = []  # type: T.List[T.Dict[str, T.Union[bool, str, T.List[T.Union[str, T.Dict[str, T.Union[str, T.List[str], bool]]]]]]]
+    build_dir = builddata.environment.get_build_dir()
+    src_dir = builddata.environment.get_source_dir()
+
+    # Fast lookup table for installation files
+    install_lookuptable = {}
+    for i in installdata.targets:
+        out = [os.path.join(installdata.prefix, i.outdir, os.path.basename(i.fname))]
+        out += [os.path.join(installdata.prefix, i.outdir, os.path.basename(x)) for x in i.aliases]
+        install_lookuptable[os.path.basename(i.fname)] = [str(PurePath(x)) for x in out]
+
+    for (idname, target) in builddata.get_targets().items():
+        if not isinstance(target, build.Target):
+            raise RuntimeError('The target object in `builddata.get_targets()` is not of type `build.Target`. Please file a bug with this error message.')
+
+        t = {
+            'name': target.get_basename(),
+            'id': idname,
+            'type': target.get_typename(),
+            'defined_in': os.path.normpath(os.path.join(src_dir, target.subdir, 'meson.build')),
+            'filename': [os.path.join(build_dir, target.subdir, x) for x in target.get_outputs()],
+            'build_by_default': target.build_by_default,
+            'target_sources': backend.get_introspection_data(idname, target),
+            'subproject': target.subproject or None
+        }
+
+        if installdata and target.should_install():
+            t['installed'] = True
+            t['install_filename'] = [install_lookuptable.get(x, [None]) for x in target.get_outputs()]
+            t['install_filename'] = [x for sublist in t['install_filename'] for x in sublist]  # flatten the list
+        else:
+            t['installed'] = False
+        tlist.append(t)
+    return tlist
+
+def list_buildoptions_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[str, T.Union[str, bool, int, T.List[str]]]]:
+    subprojects = [i['name'] for i in intr.project_data['subprojects']]
+    return list_buildoptions(intr.coredata, subprojects)
+
+def list_buildoptions(coredata: cdata.CoreData, subprojects: T.Optional[T.List[str]] = None) -> T.List[T.Dict[str, T.Union[str, bool, int, T.List[str]]]]:
+    optlist = []  # type: T.List[T.Dict[str, T.Union[str, bool, int, T.List[str]]]]
+
+    dir_option_names = ['bindir',
+                        'datadir',
+                        'includedir',
+                        'infodir',
+                        'libdir',
+                        'libexecdir',
+                        'localedir',
+                        'localstatedir',
+                        'mandir',
+                        'prefix',
+                        'sbindir',
+                        'sharedstatedir',
+                        'sysconfdir']
+    test_option_names = ['errorlogs',
+                         'stdsplit']
+    core_option_names = [k for k in coredata.builtins if k not in dir_option_names + test_option_names]
+
+    dir_options = {k: o for k, o in coredata.builtins.items() if k in dir_option_names}
+    test_options = {k: o for k, o in coredata.builtins.items() if k in test_option_names}
+    core_options = {k: o for k, o in coredata.builtins.items() if k in core_option_names}
+
+    if subprojects:
+        # Add per subproject built-in options
+        sub_core_options = {}
+        for sub in subprojects:
+            for k, o in core_options.items():
+                if o.yielding:
+                    continue
+                sub_core_options[sub + ':' + k] = o
+        core_options.update(sub_core_options)
+
+    def add_keys(options: T.Dict[str, cdata.UserOption], section: str, machine: str = 'any') -> None:
+        for key in sorted(options.keys()):
+            opt = options[key]
+            optdict = {'name': key, 'value': opt.value, 'section': section, 'machine': machine}
+            if isinstance(opt, cdata.UserStringOption):
+                typestr = 'string'
+            elif isinstance(opt, cdata.UserBooleanOption):
+                typestr = 'boolean'
+            elif isinstance(opt, cdata.UserComboOption):
+                optdict['choices'] = opt.choices
+                typestr = 'combo'
+            elif isinstance(opt, cdata.UserIntegerOption):
+                typestr = 'integer'
+            elif isinstance(opt, cdata.UserArrayOption):
+                typestr = 'array'
+            else:
+                raise RuntimeError("Unknown option type")
+            optdict['type'] = typestr
+            optdict['description'] = opt.description
+            optlist.append(optdict)
+
+    add_keys(core_options, 'core')
+    add_keys(coredata.builtins_per_machine.host, 'core', machine='host')
+    add_keys(
+        {'build.' + k: o for k, o in coredata.builtins_per_machine.build.items()},
+        'core',
+        machine='build',
+    )
+    add_keys(coredata.backend_options, 'backend')
+    add_keys(coredata.base_options, 'base')
+    add_keys(
+        dict(coredata.flatten_lang_iterator(coredata.compiler_options.host.items())),
+        'compiler',
+        machine='host',
+    )
+    add_keys(
+        {
+            'build.' + k: o for k, o in
+            coredata.flatten_lang_iterator(coredata.compiler_options.build.items())
+        },
+        'compiler',
+        machine='build',
+    )
+    add_keys(dir_options, 'directory')
+    add_keys(coredata.user_options, 'user')
+    add_keys(test_options, 'test')
+    return optlist
+
+def find_buildsystem_files_list(src_dir) -> T.List[str]:
+    # I feel dirty about this. But only slightly.
+    filelist = []  # type: T.List[str]
+    for root, _, files in os.walk(src_dir):
+        for f in files:
+            if f == 'meson.build' or f == 'meson_options.txt':
+                filelist.append(os.path.relpath(os.path.join(root, f), src_dir))
+    return filelist
+
+def list_buildsystem_files(builddata: build.Build, interpreter: Interpreter) -> T.List[str]:
+    src_dir = builddata.environment.get_source_dir()
+    filelist = interpreter.get_build_def_files()
+    filelist = [PurePath(src_dir, x).as_posix() for x in filelist]
+    return filelist
+
+def list_deps_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[str, T.Union[str, bool]]]:
+    result = []  # type: T.List[T.Dict[str, T.Union[str, bool]]]
+    for i in intr.dependencies:
+        keys = [
+            'name',
+            'required',
+            'version',
+            'has_fallback',
+            'conditional',
+        ]
+        result += [{k: v for k, v in i.items() if k in keys}]
+    return result
+
+def list_deps(coredata: cdata.CoreData) -> T.List[T.Dict[str, T.Union[str, T.List[str]]]]:
+    result = []  # type: T.List[T.Dict[str, T.Union[str, T.List[str]]]]
+    for d in coredata.deps.host.values():
+        if d.found():
+            result += [{'name': d.name,
+                        'version': d.get_version(),
+                        'compile_args': d.get_compile_args(),
+                        'link_args': d.get_link_args()}]
+    return result
+
+def get_test_list(testdata) -> T.List[T.Dict[str, T.Union[str, int, T.List[str], T.Dict[str, str]]]]:
+    result = []  # type: T.List[T.Dict[str, T.Union[str, int, T.List[str], T.Dict[str, str]]]]
+    for t in testdata:
+        to = {}
+        if isinstance(t.fname, str):
+            fname = [t.fname]
+        else:
+            fname = t.fname
+        to['cmd'] = fname + t.cmd_args
+        if isinstance(t.env, build.EnvironmentVariables):
+            to['env'] = t.env.get_env({})
+        else:
+            to['env'] = t.env
+        to['name'] = t.name
+        to['workdir'] = t.workdir
+        to['timeout'] = t.timeout
+        to['suite'] = t.suite
+        to['is_parallel'] = t.is_parallel
+        to['priority'] = t.priority
+        to['protocol'] = str(t.protocol)
+        result.append(to)
+    return result
+
+def list_tests(testdata) -> T.List[T.Dict[str, T.Union[str, int, T.List[str], T.Dict[str, str]]]]:
+    return get_test_list(testdata)
+
+def list_benchmarks(benchdata) -> T.List[T.Dict[str, T.Union[str, int, T.List[str], T.Dict[str, str]]]]:
+    return get_test_list(benchdata)
+
+def list_projinfo(builddata: build.Build) -> T.Dict[str, T.Union[str, T.List[T.Dict[str, str]]]]:
+    result = {'version': builddata.project_version,
+              'descriptive_name': builddata.project_name,
+              'subproject_dir': builddata.subproject_dir}
+    subprojects = []
+    for k, v in builddata.subprojects.items():
+        c = {'name': k,
+             'version': v,
+             'descriptive_name': builddata.projects.get(k)}
+        subprojects.append(c)
+    result['subprojects'] = subprojects
+    return result
+
+def list_projinfo_from_source(intr: IntrospectionInterpreter) -> T.Dict[str, T.Union[str, T.List[T.Dict[str, str]]]]:
+    sourcedir = intr.source_root
+    files = find_buildsystem_files_list(sourcedir)
+    files = [os.path.normpath(x) for x in files]
+
+    for i in intr.project_data['subprojects']:
+        basedir = os.path.join(intr.subproject_dir, i['name'])
+        i['buildsystem_files'] = [x for x in files if x.startswith(basedir)]
+        files = [x for x in files if not x.startswith(basedir)]
+
+    intr.project_data['buildsystem_files'] = files
+    intr.project_data['subproject_dir'] = intr.subproject_dir
+    return intr.project_data
+
+def print_results(options, results: T.Sequence[T.Tuple[str, T.Union[dict, T.List[T.Any]]]], indent: int) -> int:
+    if not results and not options.force_dict:
+        print('No command specified')
+        return 1
+    elif len(results) == 1 and not options.force_dict:
+        # Make to keep the existing output format for a single option
+        print(json.dumps(results[0][1], indent=indent))
+    else:
+        out = {}
+        for i in results:
+            out[i[0]] = i[1]
+        print(json.dumps(out, indent=indent))
+    return 0
+
+def run(options) -> int:
+    datadir = 'meson-private'
+    infodir = 'meson-info'
+    if options.builddir is not None:
+        datadir = os.path.join(options.builddir, datadir)
+        infodir = os.path.join(options.builddir, infodir)
+    indent = 4 if options.indent else None
+    results = []  # type: T.List[T.Tuple[str, T.Union[dict, T.List[T.Any]]]]
+    sourcedir = '.' if options.builddir == 'meson.build' else options.builddir[:-11]
+    intro_types = get_meson_introspection_types(sourcedir=sourcedir)
+
+    if 'meson.build' in [os.path.basename(options.builddir), options.builddir]:
+        # Make sure that log entries in other parts of meson don't interfere with the JSON output
+        mlog.disable()
+        backend = backends.get_backend_from_name(options.backend)
+        intr = IntrospectionInterpreter(sourcedir, '', backend.name, visitors = [AstIDGenerator(), AstIndentationGenerator(), AstConditionLevel()])
+        intr.analyze()
+        # Re-enable logging just in case
+        mlog.enable()
+        for key, val in intro_types.items():
+            if (not options.all and not getattr(options, key, False)) or not val.no_bd:
+                continue
+            results += [(key, val.no_bd(intr))]
+        return print_results(options, results, indent)
+
+    infofile = get_meson_info_file(infodir)
+    if not os.path.isdir(datadir) or not os.path.isdir(infodir) or not os.path.isfile(infofile):
+        print('Current directory is not a meson build directory.\n'
+              'Please specify a valid build dir or change the working directory to it.\n'
+              'It is also possible that the build directory was generated with an old\n'
+              'meson version. Please regenerate it in this case.')
+        return 1
+
+    with open(infofile, 'r') as fp:
+        raw = json.load(fp)
+        intro_vers = raw.get('introspection', {}).get('version', {}).get('full', '0.0.0')
+
+    vers_to_check = get_meson_introspection_required_version()
+    for i in vers_to_check:
+        if not mesonlib.version_compare(intro_vers, i):
+            print('Introspection version {} is not supported. '
+                  'The required version is: {}'
+                  .format(intro_vers, ' and '.join(vers_to_check)))
+            return 1
+
+    # Extract introspection information from JSON
+    for i in intro_types.keys():
+        if not intro_types[i].func:
+            continue
+        if not options.all and not getattr(options, i, False):
+            continue
+        curr = os.path.join(infodir, 'intro-{}.json'.format(i))
+        if not os.path.isfile(curr):
+            print('Introspection file {} does not exist.'.format(curr))
+            return 1
+        with open(curr, 'r') as fp:
+            results += [(i, json.load(fp))]
+
+    return print_results(options, results, indent)
+
+updated_introspection_files = []  # type: T.List[str]
+
+def write_intro_info(intro_info: T.Sequence[T.Tuple[str, T.Union[dict, T.List[T.Any]]]], info_dir: str) -> None:
+    global updated_introspection_files
+    for i in intro_info:
+        out_file = os.path.join(info_dir, 'intro-{}.json'.format(i[0]))
+        tmp_file = os.path.join(info_dir, 'tmp_dump.json')
+        with open(tmp_file, 'w') as fp:
+            json.dump(i[1], fp)
+            fp.flush() # Not sure if this is needed
+        os.replace(tmp_file, out_file)
+        updated_introspection_files += [i[0]]
+
+def generate_introspection_file(builddata: build.Build, backend: backends.Backend) -> None:
+    coredata = builddata.environment.get_coredata()
+    intro_types = get_meson_introspection_types(coredata=coredata, builddata=builddata, backend=backend)
+    intro_info = []  # type: T.List[T.Tuple[str, T.Union[dict, T.List[T.Any]]]]
+
+    for key, val in intro_types.items():
+        if not val.func:
+            continue
+        intro_info += [(key, val.func())]
+
+    write_intro_info(intro_info, builddata.environment.info_dir)
+
+def update_build_options(coredata: cdata.CoreData, info_dir) -> None:
+    intro_info = [
+        ('buildoptions', list_buildoptions(coredata))
+    ]
+
+    write_intro_info(intro_info, info_dir)
+
+def split_version_string(version: str) -> T.Dict[str, T.Union[str, int]]:
+    vers_list = version.split('.')
+    return {
+        'full': version,
+        'major': int(vers_list[0] if len(vers_list) > 0 else 0),
+        'minor': int(vers_list[1] if len(vers_list) > 1 else 0),
+        'patch': int(vers_list[2] if len(vers_list) > 2 else 0)
+    }
+
+def write_meson_info_file(builddata: build.Build, errors: list, build_files_updated: bool = False) -> None:
+    global updated_introspection_files
+    info_dir = builddata.environment.info_dir
+    info_file = get_meson_info_file(info_dir)
+    intro_types = get_meson_introspection_types()
+    intro_info = {}
+
+    for i in intro_types.keys():
+        if not intro_types[i].func:
+            continue
+        intro_info[i] = {
+            'file': 'intro-{}.json'.format(i),
+            'updated': i in updated_introspection_files
+        }
+
+    info_data = {
+        'meson_version': split_version_string(cdata.version),
+        'directories': {
+            'source': builddata.environment.get_source_dir(),
+            'build': builddata.environment.get_build_dir(),
+            'info': info_dir,
+        },
+        'introspection': {
+            'version': split_version_string(get_meson_introspection_version()),
+            'information': intro_info,
+        },
+        'build_files_updated': build_files_updated,
+    }
+
+    if errors:
+        info_data['error'] = True
+        info_data['error_list'] = [x if isinstance(x, str) else str(x) for x in errors]
+    else:
+        info_data['error'] = False
+
+    # Write the data to disc
+    tmp_file = os.path.join(info_dir, 'tmp_dump.json')
+    with open(tmp_file, 'w') as fp:
+        json.dump(info_data, fp)
+        fp.flush()
+    os.replace(tmp_file, info_file)
diff --git a/meson/mesonbuild/mlog.py b/meson/mesonbuild/mlog.py
new file mode 100644
index 000000000..9c19427cc
--- /dev/null
+++ b/meson/mesonbuild/mlog.py
@@ -0,0 +1,353 @@
+# Copyright 2013-2014 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import io
+import sys
+import time
+import platform
+import typing as T
+from contextlib import contextmanager
+from pathlib import Path
+
+"""This is (mostly) a standalone module used to write logging
+information about Meson runs. Some output goes to screen,
+some to logging dir and some goes to both."""
+
+def _windows_ansi() -> bool:
+    # windll only exists on windows, so mypy will get mad
+    from ctypes import windll, byref  # type: ignore
+    from ctypes.wintypes import DWORD
+
+    kernel = windll.kernel32
+    stdout = kernel.GetStdHandle(-11)
+    mode = DWORD()
+    if not kernel.GetConsoleMode(stdout, byref(mode)):
+        return False
+    # ENABLE_VIRTUAL_TERMINAL_PROCESSING == 0x4
+    # If the call to enable VT processing fails (returns 0), we fallback to
+    # original behavior
+    return bool(kernel.SetConsoleMode(stdout, mode.value | 0x4) or os.environ.get('ANSICON'))
+
+def colorize_console() -> bool:
+    _colorize_console = getattr(sys.stdout, 'colorize_console', None)  # type: bool
+    if _colorize_console is not None:
+        return _colorize_console
+
+    try:
+        if platform.system().lower() == 'windows':
+            _colorize_console = os.isatty(sys.stdout.fileno()) and _windows_ansi()
+        else:
+            _colorize_console = os.isatty(sys.stdout.fileno()) and os.environ.get('TERM', 'dumb') != 'dumb'
+    except Exception:
+        _colorize_console = False
+
+    sys.stdout.colorize_console = _colorize_console  # type: ignore[attr-defined]
+    return _colorize_console
+
+def setup_console():
+    # on Windows, a subprocess might call SetConsoleMode() on the console
+    # connected to stdout and turn off ANSI escape processing. Call this after
+    # running a subprocess to ensure we turn it on again.
+    if platform.system().lower() == 'windows':
+        try:
+            delattr(sys.stdout, 'colorize_console')
+        except AttributeError:
+            pass
+
+log_dir = None               # type: T.Optional[str]
+log_file = None              # type: T.Optional[T.TextIO]
+log_fname = 'meson-log.txt'  # type: str
+log_depth = 0                # type: int
+log_timestamp_start = None   # type: T.Optional[float]
+log_fatal_warnings = False   # type: bool
+log_disable_stdout = False   # type: bool
+log_errors_only = False      # type: bool
+_in_ci = 'CI' in os.environ  # type: bool
+_logged_once = set()         # type: T.Set[T.Tuple[str, ...]]
+log_warnings_counter = 0     # type: int
+
+def disable() -> None:
+    global log_disable_stdout
+    log_disable_stdout = True
+
+def enable() -> None:
+    global log_disable_stdout
+    log_disable_stdout = False
+
+def set_quiet() -> None:
+    global log_errors_only
+    log_errors_only = True
+
+def set_verbose() -> None:
+    global log_errors_only
+    log_errors_only = False
+
+def initialize(logdir: str, fatal_warnings: bool = False) -> None:
+    global log_dir, log_file, log_fatal_warnings
+    log_dir = logdir
+    log_file = open(os.path.join(logdir, log_fname), 'w', encoding='utf8')
+    log_fatal_warnings = fatal_warnings
+
+def set_timestamp_start(start: float) -> None:
+    global log_timestamp_start
+    log_timestamp_start = start
+
+def shutdown() -> T.Optional[str]:
+    global log_file
+    if log_file is not None:
+        path = log_file.name
+        exception_around_goer = log_file
+        log_file = None
+        exception_around_goer.close()
+        return path
+    return None
+
+class AnsiDecorator:
+    plain_code = "\033[0m"
+
+    def __init__(self, text: str, code: str, quoted: bool = False):
+        self.text = text
+        self.code = code
+        self.quoted = quoted
+
+    def get_text(self, with_codes: bool) -> str:
+        text = self.text
+        if with_codes:
+            text = self.code + self.text + AnsiDecorator.plain_code
+        if self.quoted:
+            text = '"{}"'.format(text)
+        return text
+
+def bold(text: str, quoted: bool = False) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1m", quoted=quoted)
+
+def red(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1;31m")
+
+def green(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1;32m")
+
+def yellow(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1;33m")
+
+def blue(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1;34m")
+
+def cyan(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[1;36m")
+
+def normal_red(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[31m")
+
+def normal_green(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[32m")
+
+def normal_yellow(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[33m")
+
+def normal_blue(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[34m")
+
+def normal_cyan(text: str) -> AnsiDecorator:
+    return AnsiDecorator(text, "\033[36m")
+
+# This really should be AnsiDecorator or anything that implements
+# __str__(), but that requires protocols from typing_extensions
+def process_markup(args: T.Sequence[T.Union[AnsiDecorator, str]], keep: bool) -> T.List[str]:
+    arr = []  # type: T.List[str]
+    if log_timestamp_start is not None:
+        arr = ['[{:.3f}]'.format(time.monotonic() - log_timestamp_start)]
+    for arg in args:
+        if arg is None:
+            continue
+        if isinstance(arg, str):
+            arr.append(arg)
+        elif isinstance(arg, AnsiDecorator):
+            arr.append(arg.get_text(keep))
+        else:
+            arr.append(str(arg))
+    return arr
+
+def force_print(*args: str, **kwargs: T.Any) -> None:
+    if log_disable_stdout:
+        return
+    iostr = io.StringIO()
+    kwargs['file'] = iostr
+    print(*args, **kwargs)
+
+    raw = iostr.getvalue()
+    if log_depth > 0:
+        prepend = '|' * log_depth
+        raw = prepend + raw.replace('\n', '\n' + prepend, raw.count('\n') - 1)
+
+    # _Something_ is going to get printed.
+    try:
+        print(raw, end='')
+    except UnicodeEncodeError:
+        cleaned = raw.encode('ascii', 'replace').decode('ascii')
+        print(cleaned, end='')
+
+# We really want a heterogeneous dict for this, but that's in typing_extensions
+def debug(*args: T.Union[str, AnsiDecorator], **kwargs: T.Any) -> None:
+    arr = process_markup(args, False)
+    if log_file is not None:
+        print(*arr, file=log_file, **kwargs)
+        log_file.flush()
+
+def _debug_log_cmd(cmd: str, args: T.List[str]) -> None:
+    if not _in_ci:
+        return
+    args = ['"{}"'.format(x) for x in args]  # Quote all args, just in case
+    debug('!meson_ci!/{} {}'.format(cmd, ' '.join(args)))
+
+def cmd_ci_include(file: str) -> None:
+    _debug_log_cmd('ci_include', [file])
+
+def log(*args: T.Union[str, AnsiDecorator], is_error: bool = False,
+        **kwargs: T.Any) -> None:
+    arr = process_markup(args, False)
+    if log_file is not None:
+        print(*arr, file=log_file, **kwargs)
+        log_file.flush()
+    if colorize_console():
+        arr = process_markup(args, True)
+    if not log_errors_only or is_error:
+        force_print(*arr, **kwargs)
+
+def log_once(*args: T.Union[str, AnsiDecorator], is_error: bool = False,
+             **kwargs: T.Any) -> None:
+    """Log variant that only prints a given message one time per meson invocation.
+
+    This considers ansi decorated values by the values they wrap without
+    regard for the AnsiDecorator itself.
+    """
+    t = tuple(a.text if isinstance(a, AnsiDecorator) else a for a in args)
+    if t in _logged_once:
+        return
+    _logged_once.add(t)
+    log(*args, is_error=is_error, **kwargs)
+
+# This isn't strictly correct. What we really want here is something like:
+# class StringProtocol(typing_extensions.Protocol):
+#
+#      def __str__(self) -> str: ...
+#
+# This would more accurately embody what this function can handle, but we
+# don't have that yet, so instead we'll do some casting to work around it
+def get_error_location_string(fname: str, lineno: str) -> str:
+    return '{}:{}:'.format(fname, lineno)
+
+def _log_error(severity: str, *rargs: T.Union[str, AnsiDecorator],
+               once: bool = False, fatal: bool = True, **kwargs: T.Any) -> None:
+    from .mesonlib import MesonException, relpath
+
+    # The typing requirements here are non-obvious. Lists are invariant,
+    # therefore T.List[A] and T.List[T.Union[A, B]] are not able to be joined
+    if severity == 'notice':
+        label = [bold('NOTICE:')]  # type: T.List[T.Union[str, AnsiDecorator]]
+    elif severity == 'warning':
+        label = [yellow('WARNING:')]
+    elif severity == 'error':
+        label = [red('ERROR:')]
+    elif severity == 'deprecation':
+        label = [red('DEPRECATION:')]
+    else:
+        raise MesonException('Invalid severity ' + severity)
+    # rargs is a tuple, not a list
+    args = label + list(rargs)
+
+    location = kwargs.pop('location', None)
+    if location is not None:
+        location_file = relpath(location.filename, os.getcwd())
+        location_str = get_error_location_string(location_file, location.lineno)
+        # Unions are frankly awful, and we have to T.cast here to get mypy
+        # to understand that the list concatenation is safe
+        location_list = T.cast(T.List[T.Union[str, AnsiDecorator]], [location_str])
+        args = location_list + args
+
+    if once:
+        log_once(*args, **kwargs)
+    else:
+        log(*args, **kwargs)
+
+    global log_warnings_counter
+    log_warnings_counter += 1
+
+    if log_fatal_warnings and fatal:
+        raise MesonException("Fatal warnings enabled, aborting")
+
+def error(*args: T.Union[str, AnsiDecorator], **kwargs: T.Any) -> None:
+    return _log_error('error', *args, **kwargs, is_error=True)
+
+def warning(*args: T.Union[str, AnsiDecorator], **kwargs: T.Any) -> None:
+    return _log_error('warning', *args, **kwargs, is_error=True)
+
+def deprecation(*args: T.Union[str, AnsiDecorator], **kwargs: T.Any) -> None:
+    return _log_error('deprecation', *args, **kwargs, is_error=True)
+
+def notice(*args: T.Union[str, AnsiDecorator], **kwargs: T.Any) -> None:
+    return _log_error('notice', *args, **kwargs, is_error=False)
+
+def get_relative_path(target: Path, current: Path) -> Path:
+    """Get the path to target from current"""
+    # Go up "current" until we find a common ancestor to target
+    acc = ['.']
+    for part in [current, *current.parents]:
+        try:
+            path = target.relative_to(part)
+            return Path(*acc, path)
+        except ValueError:
+            pass
+        acc += ['..']
+
+    # we failed, should not get here
+    return target
+
+def exception(e: Exception, prefix: T.Optional[AnsiDecorator] = None) -> None:
+    if prefix is None:
+        prefix = red('ERROR:')
+    log()
+    args = []  # type: T.List[T.Union[AnsiDecorator, str]]
+    if all(getattr(e, a, None) is not None for a in ['file', 'lineno', 'colno']):
+        # Mypy doesn't follow hasattr, and it's pretty easy to visually inspect
+        # that this is correct, so we'll just ignore it.
+        path = get_relative_path(Path(e.file), Path(os.getcwd()))  # type: ignore
+        args.append('{}:{}:{}:'.format(path, e.lineno, e.colno))  # type: ignore
+    if prefix:
+        args.append(prefix)
+    args.append(str(e))
+    log(*args)
+
+# Format a list for logging purposes as a string. It separates
+# all but the last item with commas, and the last with 'and'.
+def format_list(input_list: T.List[str]) -> str:
+    l = len(input_list)
+    if l > 2:
+        return ' and '.join([', '.join(input_list[:-1]), input_list[-1]])
+    elif l == 2:
+        return ' and '.join(input_list)
+    elif l == 1:
+        return input_list[0]
+    else:
+        return ''
+
+@contextmanager
+def nested() -> T.Generator[None, None, None]:
+    global log_depth
+    log_depth += 1
+    try:
+        yield
+    finally:
+        log_depth -= 1
diff --git a/meson/mesonbuild/modules/__init__.py b/meson/mesonbuild/modules/__init__.py
new file mode 100644
index 000000000..47be039dc
--- /dev/null
+++ b/meson/mesonbuild/modules/__init__.py
@@ -0,0 +1,95 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies that
+# are UI-related.
+
+import os
+
+from .. import build
+from ..mesonlib import unholder
+
+
+class ExtensionModule:
+    def __init__(self, interpreter):
+        self.interpreter = interpreter
+        self.snippets = set() # List of methods that operate only on the interpreter.
+
+    def is_snippet(self, funcname):
+        return funcname in self.snippets
+
+
+def get_include_args(include_dirs, prefix='-I'):
+    '''
+    Expand include arguments to refer to the source and build dirs
+    by using @SOURCE_ROOT@ and @BUILD_ROOT@ for later substitution
+    '''
+    if not include_dirs:
+        return []
+
+    dirs_str = []
+    for dirs in unholder(include_dirs):
+        if isinstance(dirs, str):
+            dirs_str += ['%s%s' % (prefix, dirs)]
+            continue
+
+        # Should be build.IncludeDirs object.
+        basedir = dirs.get_curdir()
+        for d in dirs.get_incdirs():
+            expdir = os.path.join(basedir, d)
+            srctreedir = os.path.join('@SOURCE_ROOT@', expdir)
+            buildtreedir = os.path.join('@BUILD_ROOT@', expdir)
+            dirs_str += ['%s%s' % (prefix, buildtreedir),
+                         '%s%s' % (prefix, srctreedir)]
+        for d in dirs.get_extra_build_dirs():
+            dirs_str += ['%s%s' % (prefix, d)]
+
+    return dirs_str
+
+def is_module_library(fname):
+    '''
+    Check if the file is a library-like file generated by a module-specific
+    target, such as GirTarget or TypelibTarget
+    '''
+    if hasattr(fname, 'fname'):
+        fname = fname.fname
+    suffix = fname.split('.')[-1]
+    return suffix in ('gir', 'typelib')
+
+
+class ModuleReturnValue:
+    def __init__(self, return_value, new_objects):
+        self.return_value = return_value
+        assert(isinstance(new_objects, list))
+        self.new_objects = new_objects
+
+class GResourceTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, kwargs):
+        super().__init__(name, subdir, subproject, kwargs)
+
+class GResourceHeaderTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, kwargs):
+        super().__init__(name, subdir, subproject, kwargs)
+
+class GirTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, kwargs):
+        super().__init__(name, subdir, subproject, kwargs)
+
+class TypelibTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, kwargs):
+        super().__init__(name, subdir, subproject, kwargs)
+
+class VapiTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, kwargs):
+        super().__init__(name, subdir, subproject, kwargs)
diff --git a/meson/mesonbuild/modules/cmake.py b/meson/mesonbuild/modules/cmake.py
new file mode 100644
index 000000000..238375302
--- /dev/null
+++ b/meson/mesonbuild/modules/cmake.py
@@ -0,0 +1,395 @@
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+import os, os.path, pathlib
+import shutil
+import typing as T
+
+from . import ExtensionModule, ModuleReturnValue
+
+from .. import build, dependencies, mesonlib, mlog
+from ..cmake import SingleTargetOptions, TargetOptions, cmake_defines_to_args
+from ..interpreter import ConfigurationDataHolder, InterpreterException, SubprojectHolder
+from ..interpreterbase import (
+    InterpreterObject,
+    ObjectHolder,
+
+    FeatureNew,
+    FeatureNewKwargs,
+    FeatureDeprecatedKwargs,
+
+    stringArgs,
+    permittedKwargs,
+    noPosargs,
+    noKwargs,
+
+    InvalidArguments,
+)
+
+
+COMPATIBILITIES = ['AnyNewerVersion', 'SameMajorVersion', 'SameMinorVersion', 'ExactVersion']
+
+# Taken from https://github.com/Kitware/CMake/blob/master/Modules/CMakePackageConfigHelpers.cmake
+PACKAGE_INIT_BASE = '''
+####### Expanded from \\@PACKAGE_INIT\\@ by configure_package_config_file() #######
+####### Any changes to this file will be overwritten by the next CMake run ####
+####### The input file was @inputFileName@ ########
+
+get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/@PACKAGE_RELATIVE_PATH@" ABSOLUTE)
+'''
+PACKAGE_INIT_EXT = '''
+# Use original install prefix when loaded through a "/usr move"
+# cross-prefix symbolic link such as /lib -> /usr/lib.
+get_filename_component(_realCurr "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
+get_filename_component(_realOrig "@absInstallDir@" REALPATH)
+if(_realCurr STREQUAL _realOrig)
+  set(PACKAGE_PREFIX_DIR "@installPrefix@")
+endif()
+unset(_realOrig)
+unset(_realCurr)
+'''
+PACKAGE_INIT_SET_AND_CHECK = '''
+macro(set_and_check _var _file)
+  set(${_var} "${_file}")
+  if(NOT EXISTS "${_file}")
+    message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
+  endif()
+endmacro()
+
+####################################################################################
+'''
+
+class CMakeSubprojectHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, subp, pv):
+        assert(isinstance(subp, SubprojectHolder))
+        assert(hasattr(subp, 'cm_interpreter'))
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, subp, pv)
+        self.methods.update({'get_variable': self.get_variable,
+                             'dependency': self.dependency,
+                             'include_directories': self.include_directories,
+                             'target': self.target,
+                             'target_type': self.target_type,
+                             'target_list': self.target_list,
+                             'found': self.found_method,
+                             })
+
+    def _args_to_info(self, args):
+        if len(args) != 1:
+            raise InterpreterException('Exactly one argument is required.')
+
+        tgt = args[0]
+        res = self.held_object.cm_interpreter.target_info(tgt)
+        if res is None:
+            raise InterpreterException('The CMake target {} does not exist\n'.format(tgt) +
+                                       '  Use the following command in your meson.build to list all available targets:\n\n' +
+                                       '    message(\'CMaket targets:\\n - \' + \'\\n - \'.join(.target_list()))')
+
+        # Make sure that all keys are present (if not this is a bug)
+        assert(all([x in res for x in ['inc', 'src', 'dep', 'tgt', 'func']]))
+        return res
+
+    @noKwargs
+    @stringArgs
+    def get_variable(self, args, kwargs):
+        return self.held_object.get_variable_method(args, kwargs)
+
+    @noKwargs
+    @stringArgs
+    def dependency(self, args, kwargs):
+        info = self._args_to_info(args)
+        return self.get_variable([info['dep']], kwargs)
+
+    @noKwargs
+    @stringArgs
+    def include_directories(self, args, kwargs):
+        info = self._args_to_info(args)
+        return self.get_variable([info['inc']], kwargs)
+
+    @noKwargs
+    @stringArgs
+    def target(self, args, kwargs):
+        info = self._args_to_info(args)
+        return self.get_variable([info['tgt']], kwargs)
+
+    @noKwargs
+    @stringArgs
+    def target_type(self, args, kwargs):
+        info = self._args_to_info(args)
+        return info['func']
+
+    @noPosargs
+    @noKwargs
+    def target_list(self, args, kwargs):
+        return self.held_object.cm_interpreter.target_list()
+
+    @noPosargs
+    @noKwargs
+    @FeatureNew('CMakeSubproject.found()', '0.53.2')
+    def found_method(self, args, kwargs):
+        return self.held_object is not None
+
+
+class CMakeSubprojectOptions(InterpreterObject):
+    def __init__(self) -> None:
+        super().__init__()
+        self.cmake_options = []  # type: T.List[str]
+        self.target_options = TargetOptions()
+
+        self.methods.update(
+            {
+                'add_cmake_defines': self.add_cmake_defines,
+                'set_override_option': self.set_override_option,
+                'set_install': self.set_install,
+                'append_compile_args': self.append_compile_args,
+                'append_link_args': self.append_link_args,
+                'clear': self.clear,
+            }
+        )
+
+    def _get_opts(self, kwargs: dict) -> SingleTargetOptions:
+        if 'target' in kwargs:
+            return self.target_options[kwargs['target']]
+        return self.target_options.global_options
+
+    @noKwargs
+    def add_cmake_defines(self, args, kwargs) -> None:
+        self.cmake_options += cmake_defines_to_args(args)
+
+    @stringArgs
+    @permittedKwargs({'target'})
+    def set_override_option(self, args, kwargs) -> None:
+        if len(args) != 2:
+            raise InvalidArguments('set_override_option takes exactly 2 positional arguments')
+        self._get_opts(kwargs).set_opt(args[0], args[1])
+
+    @permittedKwargs({'target'})
+    def set_install(self, args, kwargs) -> None:
+        if len(args) != 1 or not isinstance(args[0], bool):
+            raise InvalidArguments('set_install takes exactly 1 boolean argument')
+        self._get_opts(kwargs).set_install(args[0])
+
+    @stringArgs
+    @permittedKwargs({'target'})
+    def append_compile_args(self, args, kwargs) -> None:
+        if len(args) < 2:
+            raise InvalidArguments('append_compile_args takes at least 2 positional arguments')
+        self._get_opts(kwargs).append_args(args[0], args[1:])
+
+    @stringArgs
+    @permittedKwargs({'target'})
+    def append_link_args(self, args, kwargs) -> None:
+        if not args:
+            raise InvalidArguments('append_link_args takes at least 1 positional argument')
+        self._get_opts(kwargs).append_link_args(args)
+
+    @noPosargs
+    @noKwargs
+    def clear(self, args, kwargs) -> None:
+        self.cmake_options.clear()
+        self.target_options = TargetOptions()
+
+
+class CmakeModule(ExtensionModule):
+    cmake_detected = False
+    cmake_root = None
+
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.snippets.add('configure_package_config_file')
+        self.snippets.add('subproject')
+
+    def detect_voidp_size(self, env):
+        compilers = env.coredata.compilers.host
+        compiler = compilers.get('c', None)
+        if not compiler:
+            compiler = compilers.get('cpp', None)
+
+        if not compiler:
+            raise mesonlib.MesonException('Requires a C or C++ compiler to compute sizeof(void *).')
+
+        return compiler.sizeof('void *', '', env)
+
+    def detect_cmake(self):
+        if self.cmake_detected:
+            return True
+
+        cmakebin = dependencies.ExternalProgram('cmake', silent=False)
+        p, stdout, stderr = mesonlib.Popen_safe(cmakebin.get_command() + ['--system-information', '-G', 'Ninja'])[0:3]
+        if p.returncode != 0:
+            mlog.log('error retrieving cmake information: returnCode={0} stdout={1} stderr={2}'.format(p.returncode, stdout, stderr))
+            return False
+
+        match = re.search('\nCMAKE_ROOT \\"([^"]+)"\n', stdout.strip())
+        if not match:
+            mlog.log('unable to determine cmake root')
+            return False
+
+        cmakePath = pathlib.PurePath(match.group(1))
+        self.cmake_root = os.path.join(*cmakePath.parts)
+        self.cmake_detected = True
+        return True
+
+    @permittedKwargs({'version', 'name', 'compatibility', 'install_dir'})
+    def write_basic_package_version_file(self, state, _args, kwargs):
+        version = kwargs.get('version', None)
+        if not isinstance(version, str):
+            raise mesonlib.MesonException('Version must be specified.')
+
+        name = kwargs.get('name', None)
+        if not isinstance(name, str):
+            raise mesonlib.MesonException('Name not specified.')
+
+        compatibility = kwargs.get('compatibility', 'AnyNewerVersion')
+        if not isinstance(compatibility, str):
+            raise mesonlib.MesonException('compatibility is not string.')
+        if compatibility not in COMPATIBILITIES:
+            raise mesonlib.MesonException('compatibility must be either AnyNewerVersion, SameMajorVersion or ExactVersion.')
+
+        if not self.detect_cmake():
+            raise mesonlib.MesonException('Unable to find cmake')
+
+        pkgroot = kwargs.get('install_dir', None)
+        if pkgroot is None:
+            pkgroot = os.path.join(state.environment.coredata.get_builtin_option('libdir'), 'cmake', name)
+        if not isinstance(pkgroot, str):
+            raise mesonlib.MesonException('Install_dir must be a string.')
+
+        template_file = os.path.join(self.cmake_root, 'Modules', 'BasicConfigVersion-{}.cmake.in'.format(compatibility))
+        if not os.path.exists(template_file):
+            raise mesonlib.MesonException('your cmake installation doesn\'t support the {} compatibility'.format(compatibility))
+
+        version_file = os.path.join(state.environment.scratch_dir, '{}ConfigVersion.cmake'.format(name))
+
+        conf = {
+            'CVF_VERSION': (version, ''),
+            'CMAKE_SIZEOF_VOID_P': (str(self.detect_voidp_size(state.environment)), '')
+        }
+        mesonlib.do_conf_file(template_file, version_file, conf, 'meson')
+
+        res = build.Data(mesonlib.File(True, state.environment.get_scratch_dir(), version_file), pkgroot)
+        return ModuleReturnValue(res, [res])
+
+    def create_package_file(self, infile, outfile, PACKAGE_RELATIVE_PATH, extra, confdata):
+        package_init = PACKAGE_INIT_BASE.replace('@PACKAGE_RELATIVE_PATH@', PACKAGE_RELATIVE_PATH)
+        package_init = package_init.replace('@inputFileName@', infile)
+        package_init += extra
+        package_init += PACKAGE_INIT_SET_AND_CHECK
+
+        try:
+            with open(infile, "r") as fin:
+                data = fin.readlines()
+        except Exception as e:
+            raise mesonlib.MesonException('Could not read input file %s: %s' % (infile, str(e)))
+
+        result = []
+        regex = re.compile(r'(?:\\\\)+(?=\\?@)|\\@|@([-a-zA-Z0-9_]+)@')
+        for line in data:
+            line = line.replace('@PACKAGE_INIT@', package_init)
+            line, _missing = mesonlib.do_replacement(regex, line, 'meson', confdata)
+
+            result.append(line)
+
+        outfile_tmp = outfile + "~"
+        with open(outfile_tmp, "w", encoding='utf-8') as fout:
+            fout.writelines(result)
+
+        shutil.copymode(infile, outfile_tmp)
+        mesonlib.replace_if_different(outfile, outfile_tmp)
+
+    @permittedKwargs({'input', 'name', 'install_dir', 'configuration'})
+    def configure_package_config_file(self, interpreter, state, args, kwargs):
+        if args:
+            raise mesonlib.MesonException('configure_package_config_file takes only keyword arguments.')
+
+        if 'input' not in kwargs:
+            raise mesonlib.MesonException('configure_package_config_file requires "input" keyword.')
+        inputfile = kwargs['input']
+        if isinstance(inputfile, list):
+            if len(inputfile) != 1:
+                m = "Keyword argument 'input' requires exactly one file"
+                raise mesonlib.MesonException(m)
+            inputfile = inputfile[0]
+        if not isinstance(inputfile, (str, mesonlib.File)):
+            raise mesonlib.MesonException("input must be a string or a file")
+        if isinstance(inputfile, str):
+            inputfile = mesonlib.File.from_source_file(state.environment.source_dir, state.subdir, inputfile)
+
+        ifile_abs = inputfile.absolute_path(state.environment.source_dir, state.environment.build_dir)
+
+        if 'name' not in kwargs:
+            raise mesonlib.MesonException('"name" not specified.')
+        name = kwargs['name']
+
+        (ofile_path, ofile_fname) = os.path.split(os.path.join(state.subdir, '{}Config.cmake'.format(name)))
+        ofile_abs = os.path.join(state.environment.build_dir, ofile_path, ofile_fname)
+
+        install_dir = kwargs.get('install_dir', os.path.join(state.environment.coredata.get_builtin_option('libdir'), 'cmake', name))
+        if not isinstance(install_dir, str):
+            raise mesonlib.MesonException('"install_dir" must be a string.')
+
+        if 'configuration' not in kwargs:
+            raise mesonlib.MesonException('"configuration" not specified.')
+        conf = kwargs['configuration']
+        if not isinstance(conf, ConfigurationDataHolder):
+            raise mesonlib.MesonException('Argument "configuration" is not of type configuration_data')
+
+        prefix = state.environment.coredata.get_builtin_option('prefix')
+        abs_install_dir = install_dir
+        if not os.path.isabs(abs_install_dir):
+            abs_install_dir = os.path.join(prefix, install_dir)
+
+        PACKAGE_RELATIVE_PATH = os.path.relpath(prefix, abs_install_dir)
+        extra = ''
+        if re.match('^(/usr)?/lib(64)?/.+', abs_install_dir):
+            extra = PACKAGE_INIT_EXT.replace('@absInstallDir@', abs_install_dir)
+            extra = extra.replace('@installPrefix@', prefix)
+
+        self.create_package_file(ifile_abs, ofile_abs, PACKAGE_RELATIVE_PATH, extra, conf.held_object)
+        conf.mark_used()
+
+        conffile = os.path.normpath(inputfile.relative_name())
+        if conffile not in interpreter.build_def_files:
+            interpreter.build_def_files.append(conffile)
+
+        res = build.Data(mesonlib.File(True, ofile_path, ofile_fname), install_dir)
+        interpreter.build.data.append(res)
+
+        return res
+
+    @FeatureNew('subproject', '0.51.0')
+    @FeatureNewKwargs('subproject', '0.55.0', ['options'])
+    @FeatureDeprecatedKwargs('subproject', '0.55.0', ['cmake_options'])
+    @permittedKwargs({'cmake_options', 'required', 'options'})
+    @stringArgs
+    def subproject(self, interpreter, state, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Subproject takes exactly one argument')
+        if 'cmake_options' in kwargs and 'options' in kwargs:
+            raise InterpreterException('"options" cannot be used together with "cmake_options"')
+        dirname = args[0]
+        subp = interpreter.do_subproject(dirname, 'cmake', kwargs)
+        if not subp.held_object:
+            return subp
+        return CMakeSubprojectHolder(subp, dirname)
+
+    @FeatureNew('subproject_options', '0.55.0')
+    @noKwargs
+    @noPosargs
+    def subproject_options(self, state, args, kwargs) -> ModuleReturnValue:
+        opts = CMakeSubprojectOptions()
+        return ModuleReturnValue(opts, [])
+
+def initialize(*args, **kwargs):
+    return CmakeModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/dlang.py b/meson/mesonbuild/modules/dlang.py
new file mode 100644
index 000000000..d4f62e459
--- /dev/null
+++ b/meson/mesonbuild/modules/dlang.py
@@ -0,0 +1,141 @@
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains the detection logic for external dependencies that
+# are UI-related.
+
+import json
+import os
+
+from . import ExtensionModule
+
+from .. import mlog
+
+from ..mesonlib import (
+    Popen_safe, MesonException
+)
+
+from ..dependencies.base import (
+    ExternalProgram, DubDependency
+)
+
+from ..interpreter import DependencyHolder
+
+class DlangModule(ExtensionModule):
+    class_dubbin = None
+    init_dub = False
+
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.snippets.add('generate_dub_file')
+
+    def _init_dub(self):
+        if DlangModule.class_dubbin is None:
+            self.dubbin = DubDependency.class_dubbin
+            DlangModule.class_dubbin = self.dubbin
+        else:
+            self.dubbin = DlangModule.class_dubbin
+
+        if DlangModule.class_dubbin is None:
+            self.dubbin = self.check_dub()
+            DlangModule.class_dubbin = self.dubbin
+        else:
+            self.dubbin = DlangModule.class_dubbin
+
+        if not self.dubbin:
+            if not self.dubbin:
+                raise MesonException('DUB not found.')
+
+    def generate_dub_file(self, interpreter, state, args, kwargs):
+        if not DlangModule.init_dub:
+            self._init_dub()
+
+        if len(args) < 2:
+            raise MesonException('Missing arguments')
+
+        config = {
+            'name': args[0]
+        }
+
+        config_path = os.path.join(args[1], 'dub.json')
+        if os.path.exists(config_path):
+            with open(config_path, 'r', encoding='utf8') as ofile:
+                try:
+                    config = json.load(ofile)
+                except ValueError:
+                    mlog.warning('Failed to load the data in dub.json')
+
+        warn_publishing = ['description', 'license']
+        for arg in warn_publishing:
+            if arg not in kwargs and \
+               arg not in config:
+                mlog.warning('Without', mlog.bold(arg), 'the DUB package can\'t be published')
+
+        for key, value in kwargs.items():
+            if key == 'dependencies':
+                config[key] = {}
+                if isinstance(value, list):
+                    for dep in value:
+                        if isinstance(dep, DependencyHolder):
+                            name = dep.method_call('name', [], [])
+                            ret, res = self._call_dubbin(['describe', name])
+                            if ret == 0:
+                                version = dep.method_call('version', [], [])
+                                if version is None:
+                                    config[key][name] = ''
+                                else:
+                                    config[key][name] = version
+                elif isinstance(value, DependencyHolder):
+                    name = value.method_call('name', [], [])
+                    ret, res = self._call_dubbin(['describe', name])
+                    if ret == 0:
+                        version = value.method_call('version', [], [])
+                        if version is None:
+                            config[key][name] = ''
+                        else:
+                            config[key][name] = version
+            else:
+                config[key] = value
+
+        with open(config_path, 'w', encoding='utf8') as ofile:
+            ofile.write(json.dumps(config, indent=4, ensure_ascii=False))
+
+    def _call_dubbin(self, args, env=None):
+        p, out = Popen_safe(self.dubbin.get_command() + args, env=env)[0:2]
+        return p.returncode, out.strip()
+
+    def check_dub(self):
+        dubbin = ExternalProgram('dub', silent=True)
+        if dubbin.found():
+            try:
+                p, out = Popen_safe(dubbin.get_command() + ['--version'])[0:2]
+                if p.returncode != 0:
+                    mlog.warning('Found dub {!r} but couldn\'t run it'
+                                 ''.format(' '.join(dubbin.get_command())))
+                    # Set to False instead of None to signify that we've already
+                    # searched for it and not found it
+                    dubbin = False
+            except (FileNotFoundError, PermissionError):
+                dubbin = False
+        else:
+            dubbin = False
+        if dubbin:
+            mlog.log('Found DUB:', mlog.bold(dubbin.get_path()),
+                     '(%s)' % out.strip())
+        else:
+            mlog.log('Found DUB:', mlog.red('NO'))
+        return dubbin
+
+def initialize(*args, **kwargs):
+    return DlangModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/fs.py b/meson/mesonbuild/modules/fs.py
new file mode 100644
index 000000000..d48311114
--- /dev/null
+++ b/meson/mesonbuild/modules/fs.py
@@ -0,0 +1,197 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing as T
+import hashlib
+from pathlib import Path, PurePath, PureWindowsPath
+
+from .. import mlog
+from . import ExtensionModule
+from . import ModuleReturnValue
+from ..mesonlib import MesonException
+from ..interpreterbase import FeatureNew
+
+from ..interpreterbase import stringArgs, noKwargs
+if T.TYPE_CHECKING:
+    from ..interpreter import ModuleState
+
+class FSModule(ExtensionModule):
+
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.snippets.add('generate_dub_file')
+
+    def _absolute_dir(self, state: 'ModuleState', arg: str) -> Path:
+        """
+        make an absolute path from a relative path, WITHOUT resolving symlinks
+        """
+        return Path(state.source_root) / state.subdir / Path(arg).expanduser()
+
+    def _resolve_dir(self, state: 'ModuleState', arg: str) -> Path:
+        """
+        resolves symlinks and makes absolute a directory relative to calling meson.build,
+        if not already absolute
+        """
+        path = self._absolute_dir(state, arg)
+        try:
+            # accomodate unresolvable paths e.g. symlink loops
+            path = path.resolve()
+        except Exception:
+            # return the best we could do
+            pass
+        return path
+
+    def _check(self, check: str, state: 'ModuleState', args: T.Sequence[str]) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.{} takes exactly one argument.'.format(check))
+        test_file = self._resolve_dir(state, args[0])
+        val = getattr(test_file, check)()
+        if isinstance(val, Path):
+            val = str(val)
+        return ModuleReturnValue(val, [])
+
+    @stringArgs
+    @noKwargs
+    @FeatureNew('fs.expanduser', '0.54.0')
+    def expanduser(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.expanduser takes exactly one argument.')
+        return ModuleReturnValue(str(Path(args[0]).expanduser()), [])
+
+    @stringArgs
+    @noKwargs
+    @FeatureNew('fs.is_absolute', '0.54.0')
+    def is_absolute(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.is_absolute takes exactly one argument.')
+        return ModuleReturnValue(PurePath(args[0]).is_absolute(), [])
+
+    @stringArgs
+    @noKwargs
+    @FeatureNew('fs.as_posix', '0.54.0')
+    def as_posix(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        """
+        this function assumes you are passing a Windows path, even if on a Unix-like system
+        and so ALL '\' are turned to '/', even if you meant to escape a character
+        """
+        if len(args) != 1:
+            raise MesonException('fs.as_posix takes exactly one argument.')
+        return ModuleReturnValue(PureWindowsPath(args[0]).as_posix(), [])
+
+    @stringArgs
+    @noKwargs
+    def exists(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        return self._check('exists', state, args)
+
+    @stringArgs
+    @noKwargs
+    def is_symlink(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.is_symlink takes exactly one argument.')
+        return ModuleReturnValue(self._absolute_dir(state, args[0]).is_symlink(), [])
+
+    @stringArgs
+    @noKwargs
+    def is_file(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        return self._check('is_file', state, args)
+
+    @stringArgs
+    @noKwargs
+    def is_dir(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        return self._check('is_dir', state, args)
+
+    @stringArgs
+    @noKwargs
+    def hash(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 2:
+            raise MesonException('fs.hash takes exactly two arguments.')
+        file = self._resolve_dir(state, args[0])
+        if not file.is_file():
+            raise MesonException('{} is not a file and therefore cannot be hashed'.format(file))
+        try:
+            h = hashlib.new(args[1])
+        except ValueError:
+            raise MesonException('hash algorithm {} is not available'.format(args[1]))
+        mlog.debug('computing {} sum of {} size {} bytes'.format(args[1], file, file.stat().st_size))
+        h.update(file.read_bytes())
+        return ModuleReturnValue(h.hexdigest(), [])
+
+    @stringArgs
+    @noKwargs
+    def size(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.size takes exactly one argument.')
+        file = self._resolve_dir(state, args[0])
+        if not file.is_file():
+            raise MesonException('{} is not a file and therefore cannot be sized'.format(file))
+        try:
+            return ModuleReturnValue(file.stat().st_size, [])
+        except ValueError:
+            raise MesonException('{} size could not be determined'.format(args[0]))
+
+    @stringArgs
+    @noKwargs
+    def is_samepath(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 2:
+            raise MesonException('fs.is_samepath takes exactly two arguments.')
+        file1 = self._resolve_dir(state, args[0])
+        file2 = self._resolve_dir(state, args[1])
+        if not file1.exists():
+            return ModuleReturnValue(False, [])
+        if not file2.exists():
+            return ModuleReturnValue(False, [])
+        try:
+            return ModuleReturnValue(file1.samefile(file2), [])
+        except OSError:
+            return ModuleReturnValue(False, [])
+
+    @stringArgs
+    @noKwargs
+    def replace_suffix(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 2:
+            raise MesonException('fs.replace_suffix takes exactly two arguments.')
+        original = PurePath(args[0])
+        new = original.with_suffix(args[1])
+        return ModuleReturnValue(str(new), [])
+
+    @stringArgs
+    @noKwargs
+    def parent(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.parent takes exactly one argument.')
+        original = PurePath(args[0])
+        new = original.parent
+        return ModuleReturnValue(str(new), [])
+
+    @stringArgs
+    @noKwargs
+    def name(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.name takes exactly one argument.')
+        original = PurePath(args[0])
+        new = original.name
+        return ModuleReturnValue(str(new), [])
+
+    @stringArgs
+    @noKwargs
+    @FeatureNew('fs.stem', '0.54.0')
+    def stem(self, state: 'ModuleState', args: T.Sequence[str], kwargs: dict) -> ModuleReturnValue:
+        if len(args) != 1:
+            raise MesonException('fs.stem takes exactly one argument.')
+        original = PurePath(args[0])
+        new = original.stem
+        return ModuleReturnValue(str(new), [])
+
+def initialize(*args, **kwargs) -> FSModule:
+    return FSModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/gnome.py b/meson/mesonbuild/modules/gnome.py
new file mode 100644
index 000000000..1faa128bd
--- /dev/null
+++ b/meson/mesonbuild/modules/gnome.py
@@ -0,0 +1,1699 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''This module provides helper functions for Gnome/GLib related
+functionality such as gobject-introspection, gresources and gtk-doc'''
+
+import os
+import copy
+import subprocess
+import functools
+
+from .. import build
+from .. import mlog
+from .. import mesonlib
+from .. import interpreter
+from . import GResourceTarget, GResourceHeaderTarget, GirTarget, TypelibTarget, VapiTarget
+from . import get_include_args
+from . import ExtensionModule
+from . import ModuleReturnValue
+from ..mesonlib import (
+    MachineChoice, MesonException, OrderedSet, Popen_safe, extract_as_list,
+    join_args, unholder,
+)
+from ..dependencies import Dependency, PkgConfigDependency, InternalDependency, ExternalProgram
+from ..interpreterbase import noKwargs, permittedKwargs, FeatureNew, FeatureNewKwargs, FeatureDeprecatedKwargs
+
+# gresource compilation is broken due to the way
+# the resource compiler and Ninja clash about it
+#
+# https://github.com/ninja-build/ninja/issues/1184
+# https://bugzilla.gnome.org/show_bug.cgi?id=774368
+gresource_dep_needed_version = '>= 2.51.1'
+
+native_glib_version = None
+
+class GnomeModule(ExtensionModule):
+    gir_dep = None
+
+    @staticmethod
+    def _get_native_glib_version(state):
+        global native_glib_version
+        if native_glib_version is None:
+            glib_dep = PkgConfigDependency('glib-2.0', state.environment,
+                                           {'native': True, 'required': False})
+            if glib_dep.found():
+                native_glib_version = glib_dep.get_version()
+            else:
+                mlog.warning('Could not detect glib version, assuming 2.54. '
+                             'You may get build errors if your glib is older.')
+                native_glib_version = '2.54'
+        return native_glib_version
+
+    @mesonlib.run_once
+    def __print_gresources_warning(self, state):
+        if not mesonlib.version_compare(self._get_native_glib_version(state),
+                                        gresource_dep_needed_version):
+            mlog.warning('GLib compiled dependencies do not work reliably with \n'
+                         'the current version of GLib. See the following upstream issue:',
+                         mlog.bold('https://bugzilla.gnome.org/show_bug.cgi?id=774368'))
+
+    @staticmethod
+    def _print_gdbus_warning():
+        mlog.warning('Code generated with gdbus_codegen() requires the root directory be added to\n'
+                     '  include_directories of targets with GLib < 2.51.3:',
+                     mlog.bold('https://github.com/mesonbuild/meson/issues/1387'),
+                     once=True)
+
+    @FeatureNewKwargs('gnome.compile_resources', '0.37.0', ['gresource_bundle', 'export', 'install_header'])
+    @permittedKwargs({'source_dir', 'c_name', 'dependencies', 'export', 'gresource_bundle', 'install_header',
+                      'install', 'install_dir', 'extra_args', 'build_by_default'})
+    def compile_resources(self, state, args, kwargs):
+        self.__print_gresources_warning(state)
+        glib_version = self._get_native_glib_version(state)
+
+        cmd = ['glib-compile-resources', '@INPUT@']
+
+        source_dirs, dependencies = [mesonlib.extract_as_list(kwargs, c, pop=True) for c in  ['source_dir', 'dependencies']]
+
+        if len(args) < 2:
+            raise MesonException('Not enough arguments; the name of the resource '
+                                 'and the path to the XML file are required')
+
+        # Validate dependencies
+        for (ii, dep) in enumerate(dependencies):
+            if hasattr(dep, 'held_object'):
+                dependencies[ii] = dep = dep.held_object
+            if not isinstance(dep, (mesonlib.File, build.CustomTarget, build.CustomTargetIndex)):
+                m = 'Unexpected dependency type {!r} for gnome.compile_resources() ' \
+                    '"dependencies" argument.\nPlease pass the return value of ' \
+                    'custom_target() or configure_file()'
+                raise MesonException(m.format(dep))
+            if isinstance(dep, (build.CustomTarget, build.CustomTargetIndex)):
+                if not mesonlib.version_compare(glib_version, gresource_dep_needed_version):
+                    m = 'The "dependencies" argument of gnome.compile_resources() can not\n' \
+                        'be used with the current version of glib-compile-resources due to\n' \
+                        ''
+                    raise MesonException(m)
+
+        ifile = args[1]
+        if isinstance(ifile, mesonlib.File):
+            # glib-compile-resources will be run inside the source dir,
+            # so we need either 'src_to_build' or the absolute path.
+            # Absolute path is the easiest choice.
+            if ifile.is_built:
+                ifile = os.path.join(state.environment.get_build_dir(), ifile.subdir, ifile.fname)
+            else:
+                ifile = os.path.join(ifile.subdir, ifile.fname)
+        elif isinstance(ifile, str):
+            ifile = os.path.join(state.subdir, ifile)
+        elif isinstance(ifile, (interpreter.CustomTargetHolder,
+                                interpreter.CustomTargetIndexHolder,
+                                interpreter.GeneratedObjectsHolder)):
+            m = 'Resource xml files generated at build-time cannot be used ' \
+                'with gnome.compile_resources() because we need to scan ' \
+                'the xml for dependencies. Use configure_file() instead ' \
+                'to generate it at configure-time.'
+            raise MesonException(m)
+        else:
+            raise MesonException('Invalid file argument: {!r}'.format(ifile))
+
+        depend_files, depends, subdirs = self._get_gresource_dependencies(
+            state, ifile, source_dirs, dependencies)
+
+        # Make source dirs relative to build dir now
+        source_dirs = [os.path.join(state.build_to_src, state.subdir, d) for d in source_dirs]
+        # Ensure build directories of generated deps are included
+        source_dirs += subdirs
+        # Always include current directory, but after paths set by user
+        source_dirs.append(os.path.join(state.build_to_src, state.subdir))
+
+        for source_dir in OrderedSet(source_dirs):
+            cmd += ['--sourcedir', source_dir]
+
+        if 'c_name' in kwargs:
+            cmd += ['--c-name', kwargs.pop('c_name')]
+        export = kwargs.pop('export', False)
+        if not export:
+            cmd += ['--internal']
+
+        cmd += ['--generate', '--target', '@OUTPUT@']
+
+        cmd += mesonlib.stringlistify(kwargs.pop('extra_args', []))
+
+        gresource = kwargs.pop('gresource_bundle', False)
+        if gresource:
+            output = args[0] + '.gresource'
+            name = args[0] + '_gresource'
+        else:
+            output = args[0] + '.c'
+            name = args[0] + '_c'
+
+        if kwargs.get('install', False) and not gresource:
+            raise MesonException('The install kwarg only applies to gresource bundles, see install_header')
+
+        install_header = kwargs.pop('install_header', False)
+        if install_header and gresource:
+            raise MesonException('The install_header kwarg does not apply to gresource bundles')
+        if install_header and not export:
+            raise MesonException('GResource header is installed yet export is not enabled')
+
+        kwargs['input'] = args[1]
+        kwargs['output'] = output
+        kwargs['depends'] = depends
+        if not mesonlib.version_compare(glib_version, gresource_dep_needed_version):
+            # This will eventually go out of sync if dependencies are added
+            kwargs['depend_files'] = depend_files
+            kwargs['command'] = cmd
+        else:
+            depfile = kwargs['output'] + '.d'
+            kwargs['depfile'] = depfile
+            kwargs['command'] = copy.copy(cmd) + ['--dependency-file', '@DEPFILE@']
+        target_c = GResourceTarget(name, state.subdir, state.subproject, kwargs)
+
+        if gresource: # Only one target for .gresource files
+            return ModuleReturnValue(target_c, [target_c])
+
+        h_kwargs = {
+            'command': cmd,
+            'input': args[1],
+            'output': args[0] + '.h',
+            # The header doesn't actually care about the files yet it errors if missing
+            'depends': depends
+        }
+        if 'build_by_default' in kwargs:
+            h_kwargs['build_by_default'] = kwargs['build_by_default']
+        if install_header:
+            h_kwargs['install'] = install_header
+            h_kwargs['install_dir'] = kwargs.get('install_dir',
+                                                 state.environment.coredata.get_builtin_option('includedir'))
+        target_h = GResourceHeaderTarget(args[0] + '_h', state.subdir, state.subproject, h_kwargs)
+        rv = [target_c, target_h]
+        return ModuleReturnValue(rv, rv)
+
+    def _get_gresource_dependencies(self, state, input_file, source_dirs, dependencies):
+
+        cmd = ['glib-compile-resources',
+               input_file,
+               '--generate-dependencies']
+
+        # Prefer generated files over source files
+        cmd += ['--sourcedir', state.subdir] # Current build dir
+        for source_dir in source_dirs:
+            cmd += ['--sourcedir', os.path.join(state.subdir, source_dir)]
+
+        try:
+            pc, stdout, stderr = Popen_safe(cmd, cwd=state.environment.get_source_dir())
+        except (FileNotFoundError, PermissionError):
+            raise MesonException('Could not execute glib-compile-resources.')
+        if pc.returncode != 0:
+            m = 'glib-compile-resources failed to get dependencies for {}:\n{}'
+            mlog.warning(m.format(cmd[1], stderr))
+            raise subprocess.CalledProcessError(pc.returncode, cmd)
+
+        dep_files = stdout.split('\n')[:-1]
+
+        depends = []
+        subdirs = []
+        for resfile in dep_files[:]:
+            resbasename = os.path.basename(resfile)
+            for dep in unholder(dependencies):
+                if isinstance(dep, mesonlib.File):
+                    if dep.fname != resbasename:
+                        continue
+                    dep_files.remove(resfile)
+                    dep_files.append(dep)
+                    subdirs.append(dep.subdir)
+                    break
+                elif isinstance(dep, (build.CustomTarget, build.CustomTargetIndex)):
+                    fname = None
+                    outputs = {(o, os.path.basename(o)) for o in dep.get_outputs()}
+                    for o, baseo in outputs:
+                        if baseo == resbasename:
+                            fname = o
+                            break
+                    if fname is not None:
+                        dep_files.remove(resfile)
+                        depends.append(dep)
+                        subdirs.append(dep.get_subdir())
+                        break
+            else:
+                # In generate-dependencies mode, glib-compile-resources doesn't raise
+                # an error for missing resources but instead prints whatever filename
+                # was listed in the input file.  That's good because it means we can
+                # handle resource files that get generated as part of the build, as
+                # follows.
+                #
+                # If there are multiple generated resource files with the same basename
+                # then this code will get confused.
+                try:
+                    f = mesonlib.File.from_source_file(state.environment.get_source_dir(),
+                                                       ".", resfile)
+                except MesonException:
+                    raise MesonException(
+                        'Resource "%s" listed in "%s" was not found. If this is a '
+                        'generated file, pass the target that generates it to '
+                        'gnome.compile_resources() using the "dependencies" '
+                        'keyword argument.' % (resfile, input_file))
+                dep_files.remove(resfile)
+                dep_files.append(f)
+        return dep_files, depends, subdirs
+
+    def _get_link_args(self, state, lib, depends, include_rpath=False,
+                       use_gir_args=False):
+        link_command = []
+        # Construct link args
+        if isinstance(lib, build.SharedLibrary):
+            libdir = os.path.join(state.environment.get_build_dir(), state.backend.get_target_dir(lib))
+            link_command.append('-L' + libdir)
+            if include_rpath:
+                link_command.append('-Wl,-rpath,' + libdir)
+            depends.append(lib)
+            # Needed for the following binutils bug:
+            # https://github.com/mesonbuild/meson/issues/1911
+            # However, g-ir-scanner does not understand -Wl,-rpath
+            # so we need to use -L instead
+            for d in state.backend.determine_rpath_dirs(lib):
+                d = os.path.join(state.environment.get_build_dir(), d)
+                link_command.append('-L' + d)
+                if include_rpath:
+                    link_command.append('-Wl,-rpath,' + d)
+        if use_gir_args and self._gir_has_option('--extra-library'):
+            link_command.append('--extra-library=' + lib.name)
+        else:
+            link_command.append('-l' + lib.name)
+        return link_command
+
+    def _get_dependencies_flags(self, deps, state, depends, include_rpath=False,
+                                use_gir_args=False, separate_nodedup=False):
+        cflags = OrderedSet()
+        internal_ldflags = OrderedSet()
+        external_ldflags = OrderedSet()
+        # External linker flags that can't be de-duped reliably because they
+        # require two args in order, such as -framework AVFoundation
+        external_ldflags_nodedup = []
+        gi_includes = OrderedSet()
+        deps = mesonlib.unholder(mesonlib.listify(deps))
+
+        for dep in deps:
+            if isinstance(dep, Dependency):
+                girdir = dep.get_variable(pkgconfig='girdir', internal='girdir', default_value='')
+                if girdir:
+                    gi_includes.update([girdir])
+            if isinstance(dep, InternalDependency):
+                cflags.update(dep.get_compile_args())
+                cflags.update(get_include_args(dep.include_directories))
+                for lib in unholder(dep.libraries):
+                    if isinstance(lib, build.SharedLibrary):
+                        internal_ldflags.update(self._get_link_args(state, lib, depends, include_rpath))
+                        libdepflags = self._get_dependencies_flags(lib.get_external_deps(), state, depends, include_rpath,
+                                                                   use_gir_args, True)
+                        cflags.update(libdepflags[0])
+                        internal_ldflags.update(libdepflags[1])
+                        external_ldflags.update(libdepflags[2])
+                        external_ldflags_nodedup += libdepflags[3]
+                        gi_includes.update(libdepflags[4])
+                extdepflags = self._get_dependencies_flags(dep.ext_deps, state, depends, include_rpath,
+                                                           use_gir_args, True)
+                cflags.update(extdepflags[0])
+                internal_ldflags.update(extdepflags[1])
+                external_ldflags.update(extdepflags[2])
+                external_ldflags_nodedup += extdepflags[3]
+                gi_includes.update(extdepflags[4])
+                for source in unholder(dep.sources):
+                    if isinstance(source, GirTarget):
+                        gi_includes.update([os.path.join(state.environment.get_build_dir(),
+                                            source.get_subdir())])
+            # This should be any dependency other than an internal one.
+            elif isinstance(dep, Dependency):
+                cflags.update(dep.get_compile_args())
+                ldflags = iter(dep.get_link_args(raw=True))
+                for lib in ldflags:
+                    if (os.path.isabs(lib) and
+                            # For PkgConfigDependency only:
+                            getattr(dep, 'is_libtool', False)):
+                        lib_dir = os.path.dirname(lib)
+                        external_ldflags.update(["-L%s" % lib_dir])
+                        if include_rpath:
+                            external_ldflags.update(['-Wl,-rpath {}'.format(lib_dir)])
+                        libname = os.path.basename(lib)
+                        if libname.startswith("lib"):
+                            libname = libname[3:]
+                        libname = libname.split(".so")[0]
+                        lib = "-l%s" % libname
+                    # FIXME: Hack to avoid passing some compiler options in
+                    if lib.startswith("-W"):
+                        continue
+                    # If it's a framework arg, slurp the framework name too
+                    # to preserve the order of arguments
+                    if lib == '-framework':
+                        external_ldflags_nodedup += [lib, next(ldflags)]
+                    else:
+                        external_ldflags.update([lib])
+            elif isinstance(dep, (build.StaticLibrary, build.SharedLibrary)):
+                cflags.update(get_include_args(dep.get_include_dirs()))
+                depends.append(dep)
+            else:
+                mlog.log('dependency {!r} not handled to build gir files'.format(dep))
+                continue
+
+        if use_gir_args and self._gir_has_option('--extra-library'):
+            def fix_ldflags(ldflags):
+                fixed_ldflags = OrderedSet()
+                for ldflag in ldflags:
+                    if ldflag.startswith("-l"):
+                        ldflag = ldflag.replace('-l', '--extra-library=', 1)
+                    fixed_ldflags.add(ldflag)
+                return fixed_ldflags
+            internal_ldflags = fix_ldflags(internal_ldflags)
+            external_ldflags = fix_ldflags(external_ldflags)
+        if not separate_nodedup:
+            external_ldflags.update(external_ldflags_nodedup)
+            return cflags, internal_ldflags, external_ldflags, gi_includes
+        else:
+            return cflags, internal_ldflags, external_ldflags, external_ldflags_nodedup, gi_includes
+
+    def _unwrap_gir_target(self, girtarget, state):
+        while hasattr(girtarget, 'held_object'):
+            girtarget = girtarget.held_object
+
+        if not isinstance(girtarget, (build.Executable, build.SharedLibrary,
+                                      build.StaticLibrary)):
+            raise MesonException('Gir target must be an executable or library')
+
+        STATIC_BUILD_REQUIRED_VERSION = ">=1.58.1"
+        if isinstance(girtarget, (build.StaticLibrary)) and \
+           not mesonlib.version_compare(
+               self._get_gir_dep(state)[0].get_version(),
+               STATIC_BUILD_REQUIRED_VERSION):
+            raise MesonException('Static libraries can only be introspected with GObject-Introspection ' + STATIC_BUILD_REQUIRED_VERSION)
+
+        return girtarget
+
+    def _get_gir_dep(self, state):
+        if not self.gir_dep:
+            kwargs = {'native': True, 'required': True}
+            holder = self.interpreter.func_dependency(state.current_node, ['gobject-introspection-1.0'], kwargs)
+            self.gir_dep = holder.held_object
+            giscanner = state.environment.lookup_binary_entry(MachineChoice.HOST, 'g-ir-scanner')
+            if giscanner is not None:
+                self.giscanner = ExternalProgram.from_entry('g-ir-scanner', giscanner)
+            elif self.gir_dep.type_name == 'pkgconfig':
+                self.giscanner = ExternalProgram('g_ir_scanner', self.gir_dep.get_pkgconfig_variable('g_ir_scanner', {}))
+            else:
+                self.giscanner = self.interpreter.find_program_impl('g-ir-scanner')
+            gicompiler = state.environment.lookup_binary_entry(MachineChoice.HOST, 'g-ir-compiler')
+            if gicompiler is not None:
+                self.gicompiler = ExternalProgram.from_entry('g-ir-compiler', gicompiler)
+            elif self.gir_dep.type_name == 'pkgconfig':
+                self.gicompiler = ExternalProgram('g_ir_compiler', self.gir_dep.get_pkgconfig_variable('g_ir_compiler', {}))
+            else:
+                self.gicompiler = self.interpreter.find_program_impl('g-ir-compiler')
+        return self.gir_dep, self.giscanner, self.gicompiler
+
+    @functools.lru_cache(maxsize=None)
+    def _gir_has_option(self, option):
+        exe = self.giscanner
+        if hasattr(exe, 'held_object'):
+            exe = exe.held_object
+        if isinstance(exe, interpreter.OverrideProgram):
+            # Handle overridden g-ir-scanner
+            assert option in ['--extra-library', '--sources-top-dirs']
+            return True
+        p, o, e = Popen_safe(exe.get_command() + ['--help'], stderr=subprocess.STDOUT)
+        return p.returncode == 0 and option in o
+
+    def _scan_header(self, kwargs):
+        ret = []
+        header = kwargs.pop('header', None)
+        if header:
+            if not isinstance(header, str):
+                raise MesonException('header must be a string')
+            ret = ['--c-include=' + header]
+        return ret
+
+    def _scan_extra_args(self, kwargs):
+        return mesonlib.stringlistify(kwargs.pop('extra_args', []))
+
+    def _scan_link_withs(self, state, depends, kwargs):
+        ret = []
+        if 'link_with' in kwargs:
+            link_with = mesonlib.extract_as_list(kwargs, 'link_with', pop = True)
+
+            for link in link_with:
+                ret += self._get_link_args(state, link.held_object, depends,
+                                           use_gir_args=True)
+        return ret
+
+    # May mutate depends and gir_inc_dirs
+    def _scan_include(self, state, depends, gir_inc_dirs, kwargs):
+        ret = []
+
+        if 'includes' in kwargs:
+            includes = mesonlib.extract_as_list(kwargs, 'includes', pop = True)
+            for inc in unholder(includes):
+                if isinstance(inc, str):
+                    ret += ['--include=%s' % (inc, )]
+                elif isinstance(inc, GirTarget):
+                    gir_inc_dirs += [
+                        os.path.join(state.environment.get_build_dir(),
+                                     inc.get_subdir()),
+                    ]
+                    ret += [
+                        "--include-uninstalled=%s" % (os.path.join(inc.get_subdir(), inc.get_basename()), )
+                    ]
+                    depends += [inc]
+                else:
+                    raise MesonException(
+                        'Gir includes must be str, GirTarget, or list of them')
+
+        return ret
+
+    def _scan_symbol_prefix(self, kwargs):
+        ret = []
+
+        if 'symbol_prefix' in kwargs:
+            sym_prefixes = mesonlib.stringlistify(kwargs.pop('symbol_prefix', []))
+            ret += ['--symbol-prefix=%s' % sym_prefix for sym_prefix in sym_prefixes]
+
+        return ret
+
+    def _scan_identifier_prefix(self, kwargs):
+        ret = []
+
+        if 'identifier_prefix' in kwargs:
+            identifier_prefix = kwargs.pop('identifier_prefix')
+            if not isinstance(identifier_prefix, str):
+                raise MesonException('Gir identifier prefix must be str')
+            ret += ['--identifier-prefix=%s' % identifier_prefix]
+
+        return ret
+
+    def _scan_export_packages(self, kwargs):
+        ret = []
+
+        if 'export_packages' in kwargs:
+            pkgs = kwargs.pop('export_packages')
+            if isinstance(pkgs, str):
+                ret += ['--pkg-export=%s' % pkgs]
+            elif isinstance(pkgs, list):
+                ret += ['--pkg-export=%s' % pkg for pkg in pkgs]
+            else:
+                raise MesonException('Gir export packages must be str or list')
+
+        return ret
+
+    def _scan_inc_dirs(self, kwargs):
+        ret = mesonlib.extract_as_list(kwargs, 'include_directories', pop = True)
+        for incd in ret:
+            if not isinstance(incd.held_object, (str, build.IncludeDirs)):
+                raise MesonException(
+                    'Gir include dirs should be include_directories().')
+        return ret
+
+    def _scan_langs(self, state, langs):
+        ret = []
+
+        for lang in langs:
+            link_args = state.environment.coredata.get_external_link_args(MachineChoice.HOST, lang)
+            for link_arg in link_args:
+                if link_arg.startswith('-L'):
+                    ret.append(link_arg)
+
+        return ret
+
+    def _scan_gir_targets(self, state, girtargets):
+        ret = []
+
+        for girtarget in girtargets:
+            if isinstance(girtarget, build.Executable):
+                ret += ['--program', girtarget]
+            else:
+                # Because of https://gitlab.gnome.org/GNOME/gobject-introspection/merge_requests/72
+                # we can't use the full path until this is merged.
+                if isinstance(girtarget, build.SharedLibrary):
+                    libname = girtarget.get_basename()
+                else:
+                    libname = os.path.join("@PRIVATE_OUTDIR_ABS_%s@" % girtarget.get_id(), girtarget.get_filename())
+                ret += ['--library', libname]
+                # need to put our output directory first as we need to use the
+                # generated libraries instead of any possibly installed system/prefix
+                # ones.
+                ret += ["-L@PRIVATE_OUTDIR_ABS_%s@" % girtarget.get_id()]
+                # Needed for the following binutils bug:
+                # https://github.com/mesonbuild/meson/issues/1911
+                # However, g-ir-scanner does not understand -Wl,-rpath
+                # so we need to use -L instead
+                for d in state.backend.determine_rpath_dirs(girtarget):
+                    d = os.path.join(state.environment.get_build_dir(), d)
+                    ret.append('-L' + d)
+
+        return ret
+
+    def _get_girtargets_langs_compilers(self, girtargets):
+        ret = []
+        for girtarget in girtargets:
+            for lang, compiler in girtarget.compilers.items():
+                # XXX: Can you use g-i with any other language?
+                if lang in ('c', 'cpp', 'objc', 'objcpp', 'd'):
+                    ret.append((lang, compiler))
+                    break
+
+        return ret
+
+    def _get_gir_targets_deps(self, girtargets):
+        ret = []
+        for girtarget in girtargets:
+            ret += girtarget.get_all_link_deps()
+            ret += girtarget.get_external_deps()
+        return ret
+
+    def _get_gir_targets_inc_dirs(self, girtargets):
+        ret = []
+        for girtarget in girtargets:
+            ret += girtarget.get_include_dirs()
+        return ret
+
+    def _get_langs_compilers_flags(self, state, langs_compilers):
+        cflags = []
+        internal_ldflags = []
+        external_ldflags = []
+
+        for lang, compiler in langs_compilers:
+            if state.global_args.get(lang):
+                cflags += state.global_args[lang]
+            if state.project_args.get(lang):
+                cflags += state.project_args[lang]
+            if 'b_sanitize' in compiler.base_options:
+                sanitize = state.environment.coredata.base_options['b_sanitize'].value
+                cflags += compiler.sanitizer_compile_args(sanitize)
+                sanitize = sanitize.split(',')
+                # These must be first in ldflags
+                if 'address' in sanitize:
+                    internal_ldflags += ['-lasan']
+                if 'thread' in sanitize:
+                    internal_ldflags += ['-ltsan']
+                if 'undefined' in sanitize:
+                    internal_ldflags += ['-lubsan']
+                # FIXME: Linking directly to lib*san is not recommended but g-ir-scanner
+                # does not understand -f LDFLAGS. https://bugzilla.gnome.org/show_bug.cgi?id=783892
+                # ldflags += compiler.sanitizer_link_args(sanitize)
+
+        return cflags, internal_ldflags, external_ldflags
+
+    def _make_gir_filelist(self, state, srcdir, ns, nsversion, girtargets, libsources):
+        gir_filelist_dir = state.backend.get_target_private_dir_abs(girtargets[0])
+        if not os.path.isdir(gir_filelist_dir):
+            os.mkdir(gir_filelist_dir)
+        gir_filelist_filename = os.path.join(gir_filelist_dir, '%s_%s_gir_filelist' % (ns, nsversion))
+
+        with open(gir_filelist_filename, 'w', encoding='utf-8') as gir_filelist:
+            for s in unholder(libsources):
+                if isinstance(s, (build.CustomTarget, build.CustomTargetIndex)):
+                    for custom_output in s.get_outputs():
+                        gir_filelist.write(os.path.join(state.environment.get_build_dir(),
+                                                        state.backend.get_target_dir(s),
+                                                        custom_output) + '\n')
+                elif isinstance(s, mesonlib.File):
+                    gir_filelist.write(s.rel_to_builddir(state.build_to_src) + '\n')
+                elif isinstance(s, build.GeneratedList):
+                    for gen_src in s.get_outputs():
+                        gir_filelist.write(os.path.join(srcdir, gen_src) + '\n')
+                else:
+                    gir_filelist.write(os.path.join(srcdir, s) + '\n')
+
+        return gir_filelist_filename
+
+    def _make_gir_target(self, state, girfile, scan_command, depends, kwargs):
+        scankwargs = {'output': girfile,
+                      'command': scan_command,
+                      'depends': depends}
+
+        if 'install' in kwargs:
+            scankwargs['install'] = kwargs['install']
+            scankwargs['install_dir'] = kwargs.get('install_dir_gir',
+                                                   os.path.join(state.environment.get_datadir(), 'gir-1.0'))
+
+        if 'build_by_default' in kwargs:
+            scankwargs['build_by_default'] = kwargs['build_by_default']
+
+        return GirTarget(girfile, state.subdir, state.subproject, scankwargs)
+
+    def _make_typelib_target(self, state, typelib_output, typelib_cmd, kwargs):
+        typelib_kwargs = {
+            'output': typelib_output,
+            'command': typelib_cmd,
+        }
+
+        if 'install' in kwargs:
+            typelib_kwargs['install'] = kwargs['install']
+            typelib_kwargs['install_dir'] = kwargs.get('install_dir_typelib',
+                                                       os.path.join(state.environment.get_libdir(), 'girepository-1.0'))
+
+        if 'build_by_default' in kwargs:
+            typelib_kwargs['build_by_default'] = kwargs['build_by_default']
+
+        return TypelibTarget(typelib_output, state.subdir, state.subproject, typelib_kwargs)
+
+    # May mutate depends
+    def _gather_typelib_includes_and_update_depends(self, state, deps, depends):
+        # Need to recursively add deps on GirTarget sources from our
+        # dependencies and also find the include directories needed for the
+        # typelib generation custom target below.
+        typelib_includes = []
+        for dep in unholder(deps):
+            # Add a dependency on each GirTarget listed in dependencies and add
+            # the directory where it will be generated to the typelib includes
+            if isinstance(dep, InternalDependency):
+                for source in unholder(dep.sources):
+                    if isinstance(source, GirTarget) and source not in depends:
+                        depends.append(source)
+                        subdir = os.path.join(state.environment.get_build_dir(),
+                                              source.get_subdir())
+                        if subdir not in typelib_includes:
+                            typelib_includes.append(subdir)
+            # Do the same, but for dependencies of dependencies. These are
+            # stored in the list of generated sources for each link dep (from
+            # girtarget.get_all_link_deps() above).
+            # FIXME: Store this in the original form from declare_dependency()
+            # so it can be used here directly.
+            elif isinstance(dep, build.SharedLibrary):
+                for source in dep.generated:
+                    if isinstance(source, GirTarget):
+                        subdir = os.path.join(state.environment.get_build_dir(),
+                                              source.get_subdir())
+                        if subdir not in typelib_includes:
+                            typelib_includes.append(subdir)
+            if isinstance(dep, Dependency):
+                girdir = dep.get_variable(pkgconfig='girdir', internal='girdir', default_value='')
+                if girdir and girdir not in typelib_includes:
+                    typelib_includes.append(girdir)
+        return typelib_includes
+
+    def _get_external_args_for_langs(self, state, langs):
+        ret = []
+        for lang in langs:
+            ret += state.environment.coredata.get_external_args(MachineChoice.HOST, lang)
+        return ret
+
+    @staticmethod
+    def _get_scanner_cflags(cflags):
+        'g-ir-scanner only accepts -I/-D/-U; must ignore all other flags'
+        for f in cflags:
+            if f.startswith(('-D', '-U', '-I')):
+                yield f
+
+    @staticmethod
+    def _get_scanner_ldflags(ldflags):
+        'g-ir-scanner only accepts -L/-l; must ignore -F and other linker flags'
+        for f in ldflags:
+            if f.startswith(('-L', '-l', '--extra-library')):
+                yield f
+
+    @FeatureNewKwargs('generate_gir', '0.55.0', ['fatal_warnings'])
+    @FeatureNewKwargs('generate_gir', '0.40.0', ['build_by_default'])
+    @permittedKwargs({'sources', 'nsversion', 'namespace', 'symbol_prefix', 'identifier_prefix',
+                      'export_packages', 'includes', 'dependencies', 'link_with', 'include_directories',
+                      'install', 'install_dir_gir', 'install_dir_typelib', 'extra_args',
+                      'packages', 'header', 'build_by_default', 'fatal_warnings'})
+    def generate_gir(self, state, args, kwargs):
+        if not args:
+            raise MesonException('generate_gir takes at least one argument')
+        if kwargs.get('install_dir'):
+            raise MesonException('install_dir is not supported with generate_gir(), see "install_dir_gir" and "install_dir_typelib"')
+
+        girtargets = [self._unwrap_gir_target(arg, state) for arg in args]
+
+        if len(girtargets) > 1 and any([isinstance(el, build.Executable) for el in girtargets]):
+            raise MesonException('generate_gir only accepts a single argument when one of the arguments is an executable')
+
+        gir_dep, giscanner, gicompiler = self._get_gir_dep(state)
+
+        ns = kwargs.get('namespace')
+        if not ns:
+            raise MesonException('Missing "namespace" keyword argument')
+        nsversion = kwargs.get('nsversion')
+        if not nsversion:
+            raise MesonException('Missing "nsversion" keyword argument')
+        libsources = mesonlib.extract_as_list(kwargs, 'sources', pop=True)
+        girfile = '%s-%s.gir' % (ns, nsversion)
+        srcdir = os.path.join(state.environment.get_source_dir(), state.subdir)
+        builddir = os.path.join(state.environment.get_build_dir(), state.subdir)
+        depends = gir_dep.sources + girtargets
+        gir_inc_dirs = []
+        langs_compilers = self._get_girtargets_langs_compilers(girtargets)
+        cflags, internal_ldflags, external_ldflags = self._get_langs_compilers_flags(state, langs_compilers)
+        deps = self._get_gir_targets_deps(girtargets)
+        deps += mesonlib.unholder(extract_as_list(kwargs, 'dependencies', pop=True))
+        deps += [gir_dep]
+        typelib_includes = self._gather_typelib_includes_and_update_depends(state, deps, depends)
+        # ldflags will be misinterpreted by gir scanner (showing
+        # spurious dependencies) but building GStreamer fails if they
+        # are not used here.
+        dep_cflags, dep_internal_ldflags, dep_external_ldflags, gi_includes = \
+            self._get_dependencies_flags(deps, state, depends, use_gir_args=True)
+        cflags += list(self._get_scanner_cflags(dep_cflags))
+        cflags += list(self._get_scanner_cflags(self._get_external_args_for_langs(state, [lc[0] for lc in langs_compilers])))
+        internal_ldflags += list(self._get_scanner_ldflags(dep_internal_ldflags))
+        external_ldflags += list(self._get_scanner_ldflags(dep_external_ldflags))
+        girtargets_inc_dirs = self._get_gir_targets_inc_dirs(girtargets)
+        inc_dirs = self._scan_inc_dirs(kwargs)
+
+        scan_command = [giscanner]
+        scan_command += ['--no-libtool']
+        scan_command += ['--namespace=' + ns, '--nsversion=' + nsversion]
+        scan_command += ['--warn-all']
+        scan_command += ['--output', '@OUTPUT@']
+        scan_command += self._scan_header(kwargs)
+        scan_command += self._scan_extra_args(kwargs)
+        scan_command += ['-I' + srcdir, '-I' + builddir]
+        scan_command += get_include_args(girtargets_inc_dirs)
+        scan_command += ['--filelist=' + self._make_gir_filelist(state, srcdir, ns, nsversion, girtargets, libsources)]
+        scan_command += self._scan_link_withs(state, depends, kwargs)
+        scan_command += self._scan_include(state, depends, gir_inc_dirs, kwargs)
+        scan_command += self._scan_symbol_prefix(kwargs)
+        scan_command += self._scan_identifier_prefix(kwargs)
+        scan_command += self._scan_export_packages(kwargs)
+        scan_command += ['--cflags-begin']
+        scan_command += cflags
+        scan_command += ['--cflags-end']
+        scan_command += get_include_args(inc_dirs)
+        scan_command += get_include_args(list(gi_includes) + gir_inc_dirs + inc_dirs, prefix='--add-include-path=')
+        scan_command += list(internal_ldflags)
+        scan_command += self._scan_gir_targets(state, girtargets)
+        scan_command += self._scan_langs(state, [lc[0] for lc in langs_compilers])
+        scan_command += list(external_ldflags)
+
+        if self._gir_has_option('--sources-top-dirs'):
+            scan_command += ['--sources-top-dirs', os.path.join(state.environment.get_source_dir(), self.interpreter.subproject_dir, state.subproject)]
+            scan_command += ['--sources-top-dirs', os.path.join(state.environment.get_build_dir(), self.interpreter.subproject_dir, state.subproject)]
+
+        if '--warn-error' in scan_command:
+            mlog.deprecation('Passing --warn-error is deprecated in favor of "fatal_warnings" keyword argument since v0.55')
+        fatal_warnings = kwargs.get('fatal_warnings', False)
+        if not isinstance(fatal_warnings, bool):
+            raise MesonException('fatal_warnings keyword argument must be a boolean')
+        if fatal_warnings:
+            scan_command.append('--warn-error')
+
+        scan_target = self._make_gir_target(state, girfile, scan_command, depends, kwargs)
+
+        typelib_output = '%s-%s.typelib' % (ns, nsversion)
+        typelib_cmd = [gicompiler, scan_target, '--output', '@OUTPUT@']
+        typelib_cmd += get_include_args(gir_inc_dirs, prefix='--includedir=')
+
+        for incdir in typelib_includes:
+            typelib_cmd += ["--includedir=" + incdir]
+
+        typelib_target = self._make_typelib_target(state, typelib_output, typelib_cmd, kwargs)
+
+        rv = [scan_target, typelib_target]
+
+        return ModuleReturnValue(rv, rv)
+
+    @FeatureNewKwargs('build target', '0.40.0', ['build_by_default'])
+    @permittedKwargs({'build_by_default', 'depend_files'})
+    def compile_schemas(self, state, args, kwargs):
+        if args:
+            raise MesonException('Compile_schemas does not take positional arguments.')
+        srcdir = os.path.join(state.build_to_src, state.subdir)
+        outdir = state.subdir
+
+        cmd = [self.interpreter.find_program_impl('glib-compile-schemas')]
+        cmd += ['--targetdir', outdir, srcdir]
+        kwargs['command'] = cmd
+        kwargs['input'] = []
+        kwargs['output'] = 'gschemas.compiled'
+        if state.subdir == '':
+            targetname = 'gsettings-compile'
+        else:
+            targetname = 'gsettings-compile-' + state.subdir.replace('/', '_')
+        target_g = build.CustomTarget(targetname, state.subdir, state.subproject, kwargs)
+        return ModuleReturnValue(target_g, [target_g])
+
+    @permittedKwargs({'sources', 'media', 'symlink_media', 'languages'})
+    @FeatureDeprecatedKwargs('gnome.yelp', '0.43.0', ['languages'],
+                             'Use a LINGUAS file in the source directory instead')
+    def yelp(self, state, args, kwargs):
+        if len(args) < 1:
+            raise MesonException('Yelp requires a project id')
+
+        project_id = args[0]
+        sources = mesonlib.stringlistify(kwargs.pop('sources', []))
+        if not sources:
+            if len(args) > 1:
+                sources = mesonlib.stringlistify(args[1:])
+            if not sources:
+                raise MesonException('Yelp requires a list of sources')
+        source_str = '@@'.join(sources)
+
+        langs = mesonlib.stringlistify(kwargs.pop('languages', []))
+        media = mesonlib.stringlistify(kwargs.pop('media', []))
+        symlinks = kwargs.pop('symlink_media', True)
+
+        if not isinstance(symlinks, bool):
+            raise MesonException('symlink_media must be a boolean')
+
+        if kwargs:
+            raise MesonException('Unknown arguments passed: {}'.format(', '.join(kwargs.keys())))
+
+        script = state.environment.get_build_command()
+        args = ['--internal',
+                'yelphelper',
+                'install',
+                '--subdir=' + state.subdir,
+                '--id=' + project_id,
+                '--installdir=' + os.path.join(state.environment.get_datadir(), 'help'),
+                '--sources=' + source_str]
+        if symlinks:
+            args.append('--symlinks=true')
+        if media:
+            args.append('--media=' + '@@'.join(media))
+        if langs:
+            args.append('--langs=' + '@@'.join(langs))
+        inscript = build.RunScript(script, args)
+
+        potargs = state.environment.get_build_command() + [
+            '--internal', 'yelphelper', 'pot',
+            '--subdir=' + state.subdir,
+            '--id=' + project_id,
+            '--sources=' + source_str,
+        ]
+        pottarget = build.RunTarget('help-' + project_id + '-pot', potargs[0],
+                                    potargs[1:], [], state.subdir, state.subproject)
+
+        poargs = state.environment.get_build_command() + [
+            '--internal', 'yelphelper', 'update-po',
+            '--subdir=' + state.subdir,
+            '--id=' + project_id,
+            '--sources=' + source_str,
+            '--langs=' + '@@'.join(langs),
+        ]
+        potarget = build.RunTarget('help-' + project_id + '-update-po', poargs[0],
+                                   poargs[1:], [], state.subdir, state.subproject)
+
+        rv = [inscript, pottarget, potarget]
+        return ModuleReturnValue(None, rv)
+
+    @FeatureNewKwargs('gnome.gtkdoc', '0.52.0', ['check'])
+    @FeatureNewKwargs('gnome.gtkdoc', '0.48.0', ['c_args'])
+    @FeatureNewKwargs('gnome.gtkdoc', '0.48.0', ['module_version'])
+    @FeatureNewKwargs('gnome.gtkdoc', '0.37.0', ['namespace', 'mode'])
+    @permittedKwargs({'main_xml', 'main_sgml', 'src_dir', 'dependencies', 'install',
+                      'install_dir', 'scan_args', 'scanobjs_args', 'gobject_typesfile',
+                      'fixxref_args', 'html_args', 'html_assets', 'content_files',
+                      'mkdb_args', 'ignore_headers', 'include_directories',
+                      'namespace', 'mode', 'expand_content_files', 'module_version',
+                      'c_args', 'check'})
+    def gtkdoc(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException('Gtkdoc must have one positional argument.')
+        modulename = args[0]
+        if not isinstance(modulename, str):
+            raise MesonException('Gtkdoc arg must be string.')
+        if 'src_dir' not in kwargs:
+            raise MesonException('Keyword argument src_dir missing.')
+        main_file = kwargs.get('main_sgml', '')
+        if not isinstance(main_file, str):
+            raise MesonException('Main sgml keyword argument must be a string.')
+        main_xml = kwargs.get('main_xml', '')
+        if not isinstance(main_xml, str):
+            raise MesonException('Main xml keyword argument must be a string.')
+        moduleversion = kwargs.get('module_version', '')
+        if not isinstance(moduleversion, str):
+            raise MesonException('Module version keyword argument must be a string.')
+        if main_xml != '':
+            if main_file != '':
+                raise MesonException('You can only specify main_xml or main_sgml, not both.')
+            main_file = main_xml
+        targetname = modulename + ('-' + moduleversion if moduleversion else '') + '-doc'
+        command = state.environment.get_build_command()
+
+        namespace = kwargs.get('namespace', '')
+        mode = kwargs.get('mode', 'auto')
+        VALID_MODES = ('xml', 'sgml', 'none', 'auto')
+        if mode not in VALID_MODES:
+            raise MesonException('gtkdoc: Mode {} is not a valid mode: {}'.format(mode, VALID_MODES))
+
+        src_dirs = mesonlib.extract_as_list(kwargs, 'src_dir')
+        header_dirs = []
+        for src_dir in src_dirs:
+            if hasattr(src_dir, 'held_object'):
+                src_dir = src_dir.held_object
+                if not isinstance(src_dir, build.IncludeDirs):
+                    raise MesonException('Invalid keyword argument for src_dir.')
+                for inc_dir in src_dir.get_incdirs():
+                    header_dirs.append(os.path.join(state.environment.get_source_dir(),
+                                                    src_dir.get_curdir(), inc_dir))
+                    header_dirs.append(os.path.join(state.environment.get_build_dir(),
+                                                    src_dir.get_curdir(), inc_dir))
+            else:
+                header_dirs.append(src_dir)
+
+        args = ['--internal', 'gtkdoc',
+                '--sourcedir=' + state.environment.get_source_dir(),
+                '--builddir=' + state.environment.get_build_dir(),
+                '--subdir=' + state.subdir,
+                '--headerdirs=' + '@@'.join(header_dirs),
+                '--mainfile=' + main_file,
+                '--modulename=' + modulename,
+                '--moduleversion=' + moduleversion,
+                '--mode=' + mode]
+        for tool in ['scan', 'scangobj', 'mkdb', 'mkhtml', 'fixxref']:
+            program_name = 'gtkdoc-' + tool
+            program = self.interpreter.find_program_impl(program_name)
+            path = program.held_object.get_path()
+            args.append('--{}={}'.format(program_name, path))
+        if namespace:
+            args.append('--namespace=' + namespace)
+        args += self._unpack_args('--htmlargs=', 'html_args', kwargs)
+        args += self._unpack_args('--scanargs=', 'scan_args', kwargs)
+        args += self._unpack_args('--scanobjsargs=', 'scanobjs_args', kwargs)
+        args += self._unpack_args('--gobjects-types-file=', 'gobject_typesfile', kwargs, state)
+        args += self._unpack_args('--fixxrefargs=', 'fixxref_args', kwargs)
+        args += self._unpack_args('--mkdbargs=', 'mkdb_args', kwargs)
+        args += self._unpack_args('--html-assets=', 'html_assets', kwargs, state)
+
+        depends = []
+        content_files = []
+        for s in unholder(mesonlib.extract_as_list(kwargs, 'content_files')):
+            if isinstance(s, (build.CustomTarget, build.CustomTargetIndex)):
+                depends.append(s)
+                for o in s.get_outputs():
+                    content_files.append(os.path.join(state.environment.get_build_dir(),
+                                                      state.backend.get_target_dir(s),
+                                                      o))
+            elif isinstance(s, mesonlib.File):
+                content_files.append(s.absolute_path(state.environment.get_source_dir(),
+                                                     state.environment.get_build_dir()))
+            elif isinstance(s, build.GeneratedList):
+                depends.append(s)
+                for gen_src in s.get_outputs():
+                    content_files.append(os.path.join(state.environment.get_source_dir(),
+                                                      state.subdir,
+                                                      gen_src))
+            elif isinstance(s, str):
+                content_files.append(os.path.join(state.environment.get_source_dir(),
+                                                  state.subdir,
+                                                  s))
+            else:
+                raise MesonException(
+                    'Invalid object type: {!r}'.format(s.__class__.__name__))
+        args += ['--content-files=' + '@@'.join(content_files)]
+
+        args += self._unpack_args('--expand-content-files=', 'expand_content_files', kwargs, state)
+        args += self._unpack_args('--ignore-headers=', 'ignore_headers', kwargs)
+        args += self._unpack_args('--installdir=', 'install_dir', kwargs)
+        args += self._get_build_args(kwargs, state, depends)
+        custom_kwargs = {'output': modulename + '-decl.txt',
+                         'command': command + args,
+                         'depends': depends,
+                         'build_always_stale': True,
+                         }
+        custom_target = build.CustomTarget(targetname, state.subdir, state.subproject, custom_kwargs)
+        alias_target = build.AliasTarget(targetname, [custom_target], state.subdir, state.subproject)
+        if kwargs.get('check', False):
+            check_cmd = self.interpreter.find_program_impl('gtkdoc-check')
+            check_env = ['DOC_MODULE=' + modulename,
+                         'DOC_MAIN_SGML_FILE=' + main_file]
+            check_args = [targetname + '-check', check_cmd]
+            check_kwargs = {'env': check_env,
+                            'workdir': os.path.join(state.environment.get_build_dir(), state.subdir),
+                            'depends': custom_target}
+            self.interpreter.add_test(state.current_node, check_args, check_kwargs, True)
+        res = [custom_target, alias_target]
+        if kwargs.get('install', True):
+            res.append(build.RunScript(command, args))
+        return ModuleReturnValue(custom_target, res)
+
+    def _get_build_args(self, kwargs, state, depends):
+        args = []
+        deps = mesonlib.unholder(extract_as_list(kwargs, 'dependencies'))
+        cflags = []
+        cflags.extend(mesonlib.stringlistify(kwargs.pop('c_args', [])))
+        deps_cflags, internal_ldflags, external_ldflags, gi_includes = \
+            self._get_dependencies_flags(deps, state, depends, include_rpath=True)
+        inc_dirs = mesonlib.extract_as_list(kwargs, 'include_directories')
+        for incd in inc_dirs:
+            if not isinstance(incd.held_object, (str, build.IncludeDirs)):
+                raise MesonException(
+                    'Gir include dirs should be include_directories().')
+
+        cflags.extend(deps_cflags)
+        cflags.extend(get_include_args(inc_dirs))
+        ldflags = []
+        ldflags.extend(internal_ldflags)
+        ldflags.extend(external_ldflags)
+
+        cflags.extend(state.environment.coredata.get_external_args(MachineChoice.HOST, 'c'))
+        ldflags.extend(state.environment.coredata.get_external_link_args(MachineChoice.HOST, 'c'))
+        compiler = state.environment.coredata.compilers[MachineChoice.HOST]['c']
+
+        compiler_flags = self._get_langs_compilers_flags(state, [('c', compiler)])
+        cflags.extend(compiler_flags[0])
+        ldflags.extend(compiler_flags[1])
+        ldflags.extend(compiler_flags[2])
+        if compiler:
+            args += ['--cc=%s' % join_args(compiler.get_exelist())]
+            args += ['--ld=%s' % join_args(compiler.get_linker_exelist())]
+        if cflags:
+            args += ['--cflags=%s' % join_args(cflags)]
+        if ldflags:
+            args += ['--ldflags=%s' % join_args(ldflags)]
+
+        return args
+
+    @noKwargs
+    def gtkdoc_html_dir(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException('Must have exactly one argument.')
+        modulename = args[0]
+        if not isinstance(modulename, str):
+            raise MesonException('Argument must be a string')
+        return ModuleReturnValue(os.path.join('share/gtk-doc/html', modulename), [])
+
+    @staticmethod
+    def _unpack_args(arg, kwarg_name, kwargs, expend_file_state=None):
+        if kwarg_name not in kwargs:
+            return []
+
+        new_args = mesonlib.extract_as_list(kwargs, kwarg_name)
+        args = []
+        for i in new_args:
+            if expend_file_state and isinstance(i, mesonlib.File):
+                i = i.absolute_path(expend_file_state.environment.get_source_dir(), expend_file_state.environment.get_build_dir())
+            elif expend_file_state and isinstance(i, str):
+                i = os.path.join(expend_file_state.environment.get_source_dir(), expend_file_state.subdir, i)
+            elif not isinstance(i, str):
+                raise MesonException(kwarg_name + ' values must be strings.')
+            args.append(i)
+
+        if args:
+            return [arg + '@@'.join(args)]
+
+        return []
+
+    def _get_autocleanup_args(self, kwargs, glib_version):
+        if not mesonlib.version_compare(glib_version, '>= 2.49.1'):
+            # Warn if requested, silently disable if not
+            if 'autocleanup' in kwargs:
+                mlog.warning('Glib version ({}) is too old to support the \'autocleanup\' '
+                             'kwarg, need 2.49.1 or newer'.format(glib_version))
+            return []
+        autocleanup = kwargs.pop('autocleanup', 'all')
+        values = ('none', 'objects', 'all')
+        if autocleanup not in values:
+            raise MesonException('gdbus_codegen does not support {!r} as an autocleanup value, '
+                                 'must be one of: {!r}'.format(autocleanup, ', '.join(values)))
+        return ['--c-generate-autocleanup', autocleanup]
+
+    @FeatureNewKwargs('build target', '0.46.0', ['install_header', 'install_dir', 'sources'])
+    @FeatureNewKwargs('build target', '0.40.0', ['build_by_default'])
+    @FeatureNewKwargs('build target', '0.47.0', ['extra_args', 'autocleanup'])
+    @permittedKwargs({'interface_prefix', 'namespace', 'extra_args', 'autocleanup', 'object_manager', 'build_by_default',
+                      'annotations', 'docbook', 'install_header', 'install_dir', 'sources'})
+    def gdbus_codegen(self, state, args, kwargs):
+        if len(args) not in (1, 2):
+            raise MesonException('gdbus_codegen takes at most two arguments, name and xml file.')
+        namebase = args[0]
+        xml_files = args[1:]
+        cmd = [self.interpreter.find_program_impl('gdbus-codegen')]
+        extra_args = mesonlib.stringlistify(kwargs.pop('extra_args', []))
+        cmd += extra_args
+        # Autocleanup supported?
+        glib_version = self._get_native_glib_version(state)
+        cmd += self._get_autocleanup_args(kwargs, glib_version)
+        if 'interface_prefix' in kwargs:
+            cmd += ['--interface-prefix', kwargs.pop('interface_prefix')]
+        if 'namespace' in kwargs:
+            cmd += ['--c-namespace', kwargs.pop('namespace')]
+        if kwargs.get('object_manager', False):
+            cmd += ['--c-generate-object-manager']
+        if 'sources' in kwargs:
+            xml_files += mesonlib.listify(kwargs.pop('sources'))
+        build_by_default = kwargs.get('build_by_default', False)
+
+        # Annotations are a bit ugly in that they are a list of lists of strings...
+        annotations = kwargs.pop('annotations', [])
+        if not isinstance(annotations, list):
+            raise MesonException('annotations takes a list')
+        if annotations and isinstance(annotations, list) and not isinstance(annotations[0], list):
+            annotations = [annotations]
+
+        for annotation in annotations:
+            if len(annotation) != 3 or not all(isinstance(i, str) for i in annotation):
+                raise MesonException('Annotations must be made up of 3 strings for ELEMENT, KEY, and VALUE')
+            cmd += ['--annotate'] + annotation
+
+        targets = []
+        install_header = kwargs.get('install_header', False)
+        install_dir = kwargs.get('install_dir', state.environment.coredata.get_builtin_option('includedir'))
+
+        output = namebase + '.c'
+        # Added in https://gitlab.gnome.org/GNOME/glib/commit/e4d68c7b3e8b01ab1a4231bf6da21d045cb5a816 (2.55.2)
+        # Fixed in https://gitlab.gnome.org/GNOME/glib/commit/cd1f82d8fc741a2203582c12cc21b4dacf7e1872 (2.56.2)
+        if mesonlib.version_compare(glib_version, '>= 2.56.2'):
+            custom_kwargs = {'input': xml_files,
+                             'output': output,
+                             'command': cmd + ['--body', '--output', '@OUTPUT@', '@INPUT@'],
+                             'build_by_default': build_by_default
+                             }
+        else:
+            if 'docbook' in kwargs:
+                docbook = kwargs['docbook']
+                if not isinstance(docbook, str):
+                    raise MesonException('docbook value must be a string.')
+
+                cmd += ['--generate-docbook', docbook]
+
+            # https://git.gnome.org/browse/glib/commit/?id=ee09bb704fe9ccb24d92dd86696a0e6bb8f0dc1a
+            if mesonlib.version_compare(glib_version, '>= 2.51.3'):
+                cmd += ['--output-directory', '@OUTDIR@', '--generate-c-code', namebase, '@INPUT@']
+            else:
+                self._print_gdbus_warning()
+                cmd += ['--generate-c-code', '@OUTDIR@/' + namebase, '@INPUT@']
+
+            custom_kwargs = {'input': xml_files,
+                             'output': output,
+                             'command': cmd,
+                             'build_by_default': build_by_default
+                             }
+
+        cfile_custom_target = build.CustomTarget(output, state.subdir, state.subproject, custom_kwargs)
+        targets.append(cfile_custom_target)
+
+        output = namebase + '.h'
+        if mesonlib.version_compare(glib_version, '>= 2.56.2'):
+            custom_kwargs = {'input': xml_files,
+                             'output': output,
+                             'command': cmd + ['--header', '--output', '@OUTPUT@', '@INPUT@'],
+                             'build_by_default': build_by_default,
+                             'install': install_header,
+                             'install_dir': install_dir
+                             }
+        else:
+            custom_kwargs = {'input': xml_files,
+                             'output': output,
+                             'command': cmd,
+                             'build_by_default': build_by_default,
+                             'install': install_header,
+                             'install_dir': install_dir,
+                             'depends': cfile_custom_target
+                             }
+
+        hfile_custom_target = build.CustomTarget(output, state.subdir, state.subproject, custom_kwargs)
+        targets.append(hfile_custom_target)
+
+        if 'docbook' in kwargs:
+            docbook = kwargs['docbook']
+            if not isinstance(docbook, str):
+                raise MesonException('docbook value must be a string.')
+
+            docbook_cmd = cmd + ['--output-directory', '@OUTDIR@', '--generate-docbook', docbook, '@INPUT@']
+
+            # The docbook output is always ${docbook}-${name_of_xml_file}
+            output = namebase + '-docbook'
+            outputs = []
+            for f in xml_files:
+                outputs.append('{}-{}'.format(docbook, os.path.basename(str(f))))
+
+            if mesonlib.version_compare(glib_version, '>= 2.56.2'):
+                custom_kwargs = {'input': xml_files,
+                                 'output': outputs,
+                                 'command': docbook_cmd,
+                                 'build_by_default': build_by_default
+                                 }
+            else:
+                custom_kwargs = {'input': xml_files,
+                                 'output': outputs,
+                                 'command': cmd,
+                                 'build_by_default': build_by_default,
+                                 'depends': cfile_custom_target
+                                 }
+
+            docbook_custom_target = build.CustomTarget(output, state.subdir, state.subproject, custom_kwargs)
+            targets.append(docbook_custom_target)
+
+        return ModuleReturnValue(targets, targets)
+
+    @permittedKwargs({'sources', 'c_template', 'h_template', 'install_header', 'install_dir',
+                      'comments', 'identifier_prefix', 'symbol_prefix', 'eprod', 'vprod',
+                      'fhead', 'fprod', 'ftail', 'vhead', 'vtail', 'depends'})
+    def mkenums(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException('Mkenums requires one positional argument.')
+        basename = args[0]
+
+        if 'sources' not in kwargs:
+            raise MesonException('Missing keyword argument "sources".')
+        sources = kwargs.pop('sources')
+        if isinstance(sources, str):
+            sources = [sources]
+        elif not isinstance(sources, list):
+            raise MesonException(
+                'Sources keyword argument must be a string or array.')
+
+        cmd = []
+        known_kwargs = ['comments', 'eprod', 'fhead', 'fprod', 'ftail',
+                        'identifier_prefix', 'symbol_prefix', 'template',
+                        'vhead', 'vprod', 'vtail']
+        known_custom_target_kwargs = ['install_dir', 'build_always',
+                                      'depends', 'depend_files']
+        c_template = h_template = None
+        install_header = False
+        for arg, value in kwargs.items():
+            if arg == 'sources':
+                raise AssertionError("sources should've already been handled")
+            elif arg == 'c_template':
+                c_template = value
+                if isinstance(c_template, mesonlib.File):
+                    c_template = c_template.absolute_path(state.environment.source_dir, state.environment.build_dir)
+                if 'template' in kwargs:
+                    raise MesonException('Mkenums does not accept both '
+                                         'c_template and template keyword '
+                                         'arguments at the same time.')
+            elif arg == 'h_template':
+                h_template = value
+                if isinstance(h_template, mesonlib.File):
+                    h_template = h_template.absolute_path(state.environment.source_dir, state.environment.build_dir)
+                if 'template' in kwargs:
+                    raise MesonException('Mkenums does not accept both '
+                                         'h_template and template keyword '
+                                         'arguments at the same time.')
+            elif arg == 'install_header':
+                install_header = value
+            elif arg in known_kwargs:
+                cmd += ['--' + arg.replace('_', '-'), value]
+            elif arg not in known_custom_target_kwargs:
+                raise MesonException(
+                    'Mkenums does not take a %s keyword argument.' % (arg, ))
+        cmd = [self.interpreter.find_program_impl(['glib-mkenums', 'mkenums'])] + cmd
+        custom_kwargs = {}
+        for arg in known_custom_target_kwargs:
+            if arg in kwargs:
+                custom_kwargs[arg] = kwargs[arg]
+
+        targets = []
+
+        if h_template is not None:
+            h_output = os.path.basename(os.path.splitext(h_template)[0])
+            # We always set template as the first element in the source array
+            # so --template consumes it.
+            h_cmd = cmd + ['--template', '@INPUT@']
+            h_sources = [h_template] + sources
+            custom_kwargs['install'] = install_header
+            if 'install_dir' not in custom_kwargs:
+                custom_kwargs['install_dir'] = \
+                    state.environment.coredata.get_builtin_option('includedir')
+            h_target = self._make_mkenum_custom_target(state, h_sources,
+                                                       h_output, h_cmd,
+                                                       custom_kwargs)
+            targets.append(h_target)
+
+        if c_template is not None:
+            c_output = os.path.basename(os.path.splitext(c_template)[0])
+            # We always set template as the first element in the source array
+            # so --template consumes it.
+            c_cmd = cmd + ['--template', '@INPUT@']
+            c_sources = [c_template] + sources
+            # Never install the C file. Complain on bug tracker if you need it.
+            custom_kwargs['install'] = False
+            if h_template is not None:
+                if 'depends' in custom_kwargs:
+                    custom_kwargs['depends'] += [h_target]
+                else:
+                    custom_kwargs['depends'] = h_target
+            c_target = self._make_mkenum_custom_target(state, c_sources,
+                                                       c_output, c_cmd,
+                                                       custom_kwargs)
+            targets.insert(0, c_target)
+
+        if c_template is None and h_template is None:
+            generic_cmd = cmd + ['@INPUT@']
+            custom_kwargs['install'] = install_header
+            if 'install_dir' not in custom_kwargs:
+                custom_kwargs['install_dir'] = \
+                    state.environment.coredata.get_builtin_option('includedir')
+            target = self._make_mkenum_custom_target(state, sources, basename,
+                                                     generic_cmd, custom_kwargs)
+            return ModuleReturnValue(target, [target])
+        elif len(targets) == 1:
+            return ModuleReturnValue(targets[0], [targets[0]])
+        else:
+            return ModuleReturnValue(targets, targets)
+
+    @FeatureNew('gnome.mkenums_simple', '0.42.0')
+    def mkenums_simple(self, state, args, kwargs):
+        hdr_filename = args[0] + '.h'
+        body_filename = args[0] + '.c'
+
+        # not really needed, just for sanity checking
+        forbidden_kwargs = ['c_template', 'h_template', 'eprod', 'fhead',
+                            'fprod', 'ftail', 'vhead', 'vtail', 'comments']
+        for arg in forbidden_kwargs:
+            if arg in kwargs:
+                raise MesonException('mkenums_simple() does not take a %s keyword argument' % (arg, ))
+
+        # kwargs to pass as-is from mkenums_simple() to mkenums()
+        shared_kwargs = ['sources', 'install_header', 'install_dir',
+                         'identifier_prefix', 'symbol_prefix']
+        mkenums_kwargs = {}
+        for arg in shared_kwargs:
+            if arg in kwargs:
+                mkenums_kwargs[arg] = kwargs[arg]
+
+        # .c file generation
+        c_file_kwargs = copy.deepcopy(mkenums_kwargs)
+        if 'sources' not in kwargs:
+            raise MesonException('Missing keyword argument "sources".')
+        sources = kwargs['sources']
+        if isinstance(sources, str):
+            sources = [sources]
+        elif not isinstance(sources, list):
+            raise MesonException(
+                'Sources keyword argument must be a string or array.')
+
+        # The `install_header` argument will be used by mkenums() when
+        # not using template files, so we need to forcibly unset it
+        # when generating the C source file, otherwise we will end up
+        # installing it
+        c_file_kwargs['install_header'] = False
+
+        header_prefix = kwargs.get('header_prefix', '')
+        decl_decorator = kwargs.get('decorator', '')
+        func_prefix = kwargs.get('function_prefix', '')
+        body_prefix = kwargs.get('body_prefix', '')
+
+        # Maybe we should write our own template files into the build dir
+        # instead, but that seems like much more work, nice as it would be.
+        fhead = ''
+        if body_prefix != '':
+            fhead += '%s\n' % body_prefix
+        fhead += '#include "%s"\n' % hdr_filename
+        for hdr in sources:
+            fhead += '#include "%s"\n' % os.path.basename(str(hdr))
+        fhead += '''
+#define C_ENUM(v) ((gint) v)
+#define C_FLAGS(v) ((guint) v)
+'''
+        c_file_kwargs['fhead'] = fhead
+
+        c_file_kwargs['fprod'] = '''
+/* enumerations from "@basename@" */
+'''
+
+        c_file_kwargs['vhead'] = '''
+GType
+%s@enum_name@_get_type (void)
+{
+  static volatile gsize gtype_id = 0;
+  static const G@Type@Value values[] = {''' % func_prefix
+
+        c_file_kwargs['vprod'] = '    { C_@TYPE@(@VALUENAME@), "@VALUENAME@", "@valuenick@" },'
+
+        c_file_kwargs['vtail'] = '''    { 0, NULL, NULL }
+  };
+  if (g_once_init_enter (>ype_id)) {
+    GType new_type = g_@type@_register_static (g_intern_static_string ("@EnumName@"), values);
+    g_once_init_leave (>ype_id, new_type);
+  }
+  return (GType) gtype_id;
+}'''
+
+        rv = self.mkenums(state, [body_filename], c_file_kwargs)
+        c_file = rv.return_value
+
+        # .h file generation
+        h_file_kwargs = copy.deepcopy(mkenums_kwargs)
+
+        h_file_kwargs['fhead'] = '''#pragma once
+
+#include 
+{}
+
+G_BEGIN_DECLS
+'''.format(header_prefix)
+
+        h_file_kwargs['fprod'] = '''
+/* enumerations from "@basename@" */
+'''
+
+        h_file_kwargs['vhead'] = '''
+{}
+GType {}@enum_name@_get_type (void);
+#define @ENUMPREFIX@_TYPE_@ENUMSHORT@ ({}@enum_name@_get_type())'''.format(decl_decorator, func_prefix, func_prefix)
+
+        h_file_kwargs['ftail'] = '''
+G_END_DECLS'''
+
+        rv = self.mkenums(state, [hdr_filename], h_file_kwargs)
+        h_file = rv.return_value
+
+        return ModuleReturnValue([c_file, h_file], [c_file, h_file])
+
+    @staticmethod
+    def _make_mkenum_custom_target(state, sources, output, cmd, kwargs):
+        custom_kwargs = {
+            'input': sources,
+            'output': output,
+            'capture': True,
+            'command': cmd
+        }
+        custom_kwargs.update(kwargs)
+        return build.CustomTarget(output, state.subdir, state.subproject, custom_kwargs,
+                                  # https://github.com/mesonbuild/meson/issues/973
+                                  absolute_paths=True)
+
+    @permittedKwargs({'sources', 'prefix', 'install_header', 'install_dir', 'stdinc',
+                      'nostdinc', 'internal', 'skip_source', 'valist_marshallers',
+                      'extra_args'})
+    def genmarshal(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException(
+                'Genmarshal requires one positional argument.')
+        output = args[0]
+
+        if 'sources' not in kwargs:
+            raise MesonException('Missing keyword argument "sources".')
+        sources = kwargs.pop('sources')
+        if isinstance(sources, str):
+            sources = [sources]
+        elif not isinstance(sources, list):
+            raise MesonException(
+                'Sources keyword argument must be a string or array.')
+
+        new_genmarshal = mesonlib.version_compare(self._get_native_glib_version(state), '>= 2.53.3')
+
+        cmd = [self.interpreter.find_program_impl('glib-genmarshal')]
+        known_kwargs = ['internal', 'nostdinc', 'skip_source', 'stdinc',
+                        'valist_marshallers', 'extra_args']
+        known_custom_target_kwargs = ['build_always', 'depends',
+                                      'depend_files', 'install_dir',
+                                      'install_header']
+        for arg, value in kwargs.items():
+            if arg == 'prefix':
+                cmd += ['--prefix', value]
+            elif arg == 'extra_args':
+                if new_genmarshal:
+                    cmd += mesonlib.stringlistify(value)
+                else:
+                    mlog.warning('The current version of GLib does not support extra arguments \n'
+                                 'for glib-genmarshal. You need at least GLib 2.53.3. See ',
+                                 mlog.bold('https://github.com/mesonbuild/meson/pull/2049'))
+            elif arg in known_kwargs and value:
+                cmd += ['--' + arg.replace('_', '-')]
+            elif arg not in known_custom_target_kwargs:
+                raise MesonException(
+                    'Genmarshal does not take a %s keyword argument.' % (
+                        arg, ))
+
+        install_header = kwargs.pop('install_header', False)
+        install_dir = kwargs.pop('install_dir', None)
+
+        custom_kwargs = {
+            'input': sources,
+        }
+
+        # https://github.com/GNOME/glib/commit/0fbc98097fac4d3e647684f344e508abae109fdf
+        if mesonlib.version_compare(self._get_native_glib_version(state), '>= 2.51.0'):
+            cmd += ['--output', '@OUTPUT@']
+        else:
+            custom_kwargs['capture'] = True
+
+        for arg in known_custom_target_kwargs:
+            if arg in kwargs:
+                custom_kwargs[arg] = kwargs[arg]
+
+        header_file = output + '.h'
+        custom_kwargs['command'] = cmd + ['--body', '@INPUT@']
+        if mesonlib.version_compare(self._get_native_glib_version(state), '>= 2.53.4'):
+            # Silence any warnings about missing prototypes
+            custom_kwargs['command'] += ['--include-header', header_file]
+        custom_kwargs['output'] = output + '.c'
+        body = build.CustomTarget(output + '_c', state.subdir, state.subproject, custom_kwargs)
+
+        custom_kwargs['install'] = install_header
+        if install_dir is not None:
+            custom_kwargs['install_dir'] = install_dir
+        if new_genmarshal:
+            cmd += ['--pragma-once']
+        custom_kwargs['command'] = cmd + ['--header', '@INPUT@']
+        custom_kwargs['output'] = header_file
+        header = build.CustomTarget(output + '_h', state.subdir, state.subproject, custom_kwargs)
+
+        rv = [body, header]
+        return ModuleReturnValue(rv, rv)
+
+    @staticmethod
+    def _vapi_args_to_command(prefix, variable, kwargs, accept_vapi=False):
+        arg_list = mesonlib.extract_as_list(kwargs, variable)
+        ret = []
+        for arg in arg_list:
+            if not isinstance(arg, str):
+                types = 'strings' + ' or InternalDependencys' if accept_vapi else ''
+                raise MesonException('All {} must be {}'.format(variable, types))
+            ret.append(prefix + arg)
+        return ret
+
+    def _extract_vapi_packages(self, state, kwargs):
+        '''
+        Packages are special because we need to:
+        - Get a list of packages for the .deps file
+        - Get a list of depends for any VapiTargets
+        - Get package name from VapiTargets
+        - Add include dirs for any VapiTargets
+        '''
+        arg_list = kwargs.get('packages')
+        if not arg_list:
+            return [], [], [], []
+        arg_list = mesonlib.listify(arg_list)
+        vapi_depends = []
+        vapi_packages = []
+        vapi_includes = []
+        ret = []
+        remaining_args = []
+        for arg in unholder(arg_list):
+            if isinstance(arg, InternalDependency):
+                targets = [t for t in arg.sources if isinstance(t, VapiTarget)]
+                for target in targets:
+                    srcdir = os.path.join(state.environment.get_source_dir(),
+                                          target.get_subdir())
+                    outdir = os.path.join(state.environment.get_build_dir(),
+                                          target.get_subdir())
+                    outfile = target.get_outputs()[0][:-5] # Strip .vapi
+                    ret.append('--vapidir=' + outdir)
+                    ret.append('--girdir=' + outdir)
+                    ret.append('--pkg=' + outfile)
+                    vapi_depends.append(target)
+                    vapi_packages.append(outfile)
+                    vapi_includes.append(srcdir)
+            else:
+                vapi_packages.append(arg)
+                remaining_args.append(arg)
+
+        kwargs['packages'] = remaining_args
+        vapi_args = ret + self._vapi_args_to_command('--pkg=', 'packages', kwargs, accept_vapi=True)
+        return vapi_args, vapi_depends, vapi_packages, vapi_includes
+
+    def _generate_deps(self, state, library, packages, install_dir):
+        outdir = state.environment.scratch_dir
+        fname = os.path.join(outdir, library + '.deps')
+        with open(fname, 'w') as ofile:
+            for package in packages:
+                ofile.write(package + '\n')
+        return build.Data(mesonlib.File(True, outdir, fname), install_dir)
+
+    def _get_vapi_link_with(self, target):
+        link_with = []
+        for dep in target.get_target_dependencies():
+            if isinstance(dep, build.SharedLibrary):
+                link_with.append(dep)
+            elif isinstance(dep, GirTarget):
+                link_with += self._get_vapi_link_with(dep)
+        return link_with
+
+    @permittedKwargs({'sources', 'packages', 'metadata_dirs', 'gir_dirs',
+                      'vapi_dirs', 'install', 'install_dir'})
+    def generate_vapi(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException('The library name is required')
+
+        if not isinstance(args[0], str):
+            raise MesonException('The first argument must be the name of the library')
+        created_values = []
+
+        library = args[0]
+        build_dir = os.path.join(state.environment.get_build_dir(), state.subdir)
+        source_dir = os.path.join(state.environment.get_source_dir(), state.subdir)
+        pkg_cmd, vapi_depends, vapi_packages, vapi_includes = self._extract_vapi_packages(state, kwargs)
+        if 'VAPIGEN' in os.environ:
+            cmd = [self.interpreter.find_program_impl(os.environ['VAPIGEN'])]
+        else:
+            cmd = [self.interpreter.find_program_impl('vapigen')]
+        cmd += ['--quiet', '--library=' + library, '--directory=' + build_dir]
+        cmd += self._vapi_args_to_command('--vapidir=', 'vapi_dirs', kwargs)
+        cmd += self._vapi_args_to_command('--metadatadir=', 'metadata_dirs', kwargs)
+        cmd += self._vapi_args_to_command('--girdir=', 'gir_dirs', kwargs)
+        cmd += pkg_cmd
+        cmd += ['--metadatadir=' + source_dir]
+
+        if 'sources' not in kwargs:
+            raise MesonException('sources are required to generate the vapi file')
+
+        inputs = mesonlib.extract_as_list(kwargs, 'sources')
+
+        link_with = []
+        for i in inputs:
+            if isinstance(i, str):
+                cmd.append(os.path.join(source_dir, i))
+            elif hasattr(i, 'held_object') and isinstance(i.held_object, GirTarget):
+                link_with += self._get_vapi_link_with(i.held_object)
+                subdir = os.path.join(state.environment.get_build_dir(),
+                                      i.held_object.get_subdir())
+                gir_file = os.path.join(subdir, i.held_object.get_outputs()[0])
+                cmd.append(gir_file)
+            else:
+                raise MesonException('Input must be a str or GirTarget')
+
+        vapi_output = library + '.vapi'
+        custom_kwargs = {
+            'command': cmd,
+            'input': inputs,
+            'output': vapi_output,
+            'depends': vapi_depends,
+        }
+        install_dir = kwargs.get('install_dir',
+                                 os.path.join(state.environment.coredata.get_builtin_option('datadir'),
+                                              'vala', 'vapi'))
+        if kwargs.get('install'):
+            custom_kwargs['install'] = kwargs['install']
+            custom_kwargs['install_dir'] = install_dir
+
+            # We shouldn't need this locally but we install it
+            deps_target = self._generate_deps(state, library, vapi_packages, install_dir)
+            created_values.append(deps_target)
+        vapi_target = VapiTarget(vapi_output, state.subdir, state.subproject, custom_kwargs)
+
+        # So to try our best to get this to just work we need:
+        # - link with with the correct library
+        # - include the vapi and dependent vapi files in sources
+        # - add relevant directories to include dirs
+        incs = [build.IncludeDirs(state.subdir, ['.'] + vapi_includes, False)]
+        sources = [vapi_target] + vapi_depends
+        rv = InternalDependency(None, incs, [], [], link_with, [], sources, [], {})
+        created_values.append(rv)
+        return ModuleReturnValue(rv, created_values)
+
+def initialize(*args, **kwargs):
+    return GnomeModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/hotdoc.py b/meson/mesonbuild/modules/hotdoc.py
new file mode 100644
index 000000000..5c04e2740
--- /dev/null
+++ b/meson/mesonbuild/modules/hotdoc.py
@@ -0,0 +1,435 @@
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''This module provides helper functions for generating documentation using hotdoc'''
+
+import os
+from collections import OrderedDict
+
+from mesonbuild import mesonlib
+from mesonbuild import mlog, build
+from mesonbuild.coredata import MesonException
+from . import ModuleReturnValue
+from . import ExtensionModule
+from . import get_include_args
+from ..dependencies import Dependency, InternalDependency, ExternalProgram
+from ..interpreterbase import FeatureNew, InvalidArguments, noPosargs, noKwargs
+from ..interpreter import CustomTargetHolder
+
+
+def ensure_list(value):
+    if not isinstance(value, list):
+        return [value]
+    return value
+
+
+MIN_HOTDOC_VERSION = '0.8.100'
+
+
+class HotdocTargetBuilder:
+    def __init__(self, name, state, hotdoc, interpreter, kwargs):
+        self.hotdoc = hotdoc
+        self.build_by_default = kwargs.pop('build_by_default', False)
+        self.kwargs = kwargs
+        self.name = name
+        self.state = state
+        self.interpreter = interpreter
+        self.include_paths = OrderedDict()
+
+        self.builddir = state.environment.get_build_dir()
+        self.sourcedir = state.environment.get_source_dir()
+        self.subdir = state.subdir
+        self.build_command = state.environment.get_build_command()
+
+        self.cmd = ['conf', '--project-name', name, "--disable-incremental-build",
+                    '--output', os.path.join(self.builddir, self.subdir, self.name + '-doc')]
+
+        self._extra_extension_paths = set()
+        self.extra_assets = set()
+        self._dependencies = []
+        self._subprojects = []
+
+    def process_known_arg(self, option, types, argname=None,
+                          value_processor=None, mandatory=False,
+                          force_list=False):
+        if not argname:
+            argname = option.strip("-").replace("-", "_")
+
+        value, _ = self.get_value(
+            types, argname, None, value_processor, mandatory, force_list)
+
+        self.set_arg_value(option, value)
+
+    def set_arg_value(self, option, value):
+        if value is None:
+            return
+
+        if isinstance(value, bool):
+            if value:
+                self.cmd.append(option)
+        elif isinstance(value, list):
+            # Do not do anything on empty lists
+            if value:
+                # https://bugs.python.org/issue9334 (from 2010 :( )
+                # The syntax with nargs=+ is inherently ambiguous
+                # A workaround for this case is to simply prefix with a space
+                # every value starting with a dash
+                escaped_value = []
+                for e in value:
+                    if isinstance(e, str) and e.startswith('-'):
+                        escaped_value += [' %s' % e]
+                    else:
+                        escaped_value += [e]
+                if option:
+                    self.cmd.extend([option] + escaped_value)
+                else:
+                    self.cmd.extend(escaped_value)
+        else:
+            # argparse gets confused if value(s) start with a dash.
+            # When an option expects a single value, the unambiguous way
+            # to specify it is with =
+            if isinstance(value, str):
+                self.cmd.extend(['%s=%s' % (option, value)])
+            else:
+                self.cmd.extend([option, value])
+
+    def check_extra_arg_type(self, arg, value):
+        value = getattr(value, 'held_object', value)
+        if isinstance(value, list):
+            for v in value:
+                self.check_extra_arg_type(arg, v)
+            return
+
+        valid_types = (str, bool, mesonlib.File, build.IncludeDirs, build.CustomTarget, build.BuildTarget)
+        if not isinstance(value, valid_types):
+            raise InvalidArguments('Argument "%s=%s" should be of type: %s.' % (
+                arg, value, [t.__name__ for t in valid_types]))
+
+    def process_extra_args(self):
+        for arg, value in self.kwargs.items():
+            option = "--" + arg.replace("_", "-")
+            self.check_extra_arg_type(arg, value)
+            self.set_arg_value(option, value)
+
+    def get_value(self, types, argname, default=None, value_processor=None,
+                  mandatory=False, force_list=False):
+        if not isinstance(types, list):
+            types = [types]
+        try:
+            uvalue = value = self.kwargs.pop(argname)
+            if value_processor:
+                value = value_processor(value)
+
+            for t in types:
+                if isinstance(value, t):
+                    if force_list and not isinstance(value, list):
+                        return [value], uvalue
+                    return value, uvalue
+            raise MesonException("%s field value %s is not valid,"
+                                 " valid types are %s" % (argname, value,
+                                                          types))
+        except KeyError:
+            if mandatory:
+                raise MesonException("%s mandatory field not found" % argname)
+
+            if default is not None:
+                return default, default
+
+        return None, None
+
+    def setup_extension_paths(self, paths):
+        if not isinstance(paths, list):
+            paths = [paths]
+
+        for path in paths:
+            self.add_extension_paths([path])
+
+        return []
+
+    def add_extension_paths(self, paths):
+        for path in paths:
+            if path in self._extra_extension_paths:
+                continue
+
+            self._extra_extension_paths.add(path)
+            self.cmd.extend(["--extra-extension-path", path])
+
+    def process_extra_extension_paths(self):
+        self.get_value([list, str], 'extra_extensions_paths',
+                       default="", value_processor=self.setup_extension_paths)
+
+    def replace_dirs_in_string(self, string):
+        return string.replace("@SOURCE_ROOT@", self.sourcedir).replace("@BUILD_ROOT@", self.builddir)
+
+    def process_gi_c_source_roots(self):
+        if self.hotdoc.run_hotdoc(['--has-extension=gi-extension']) != 0:
+            return
+
+        value, _ = self.get_value([list, str], 'gi_c_source_roots', default=[], force_list=True)
+        value.extend([
+            os.path.join(self.state.environment.get_source_dir(),
+                         self.interpreter.subproject_dir, self.state.subproject),
+            os.path.join(self.state.environment.get_build_dir(), self.interpreter.subproject_dir, self.state.subproject)
+        ])
+
+        self.cmd += ['--gi-c-source-roots'] + value
+
+    def process_dependencies(self, deps):
+        cflags = set()
+        for dep in mesonlib.listify(ensure_list(deps)):
+            dep = getattr(dep, "held_object", dep)
+            if isinstance(dep, InternalDependency):
+                inc_args = get_include_args(dep.include_directories)
+                cflags.update([self.replace_dirs_in_string(x)
+                               for x in inc_args])
+                cflags.update(self.process_dependencies(dep.libraries))
+                cflags.update(self.process_dependencies(dep.sources))
+                cflags.update(self.process_dependencies(dep.ext_deps))
+            elif isinstance(dep, Dependency):
+                cflags.update(dep.get_compile_args())
+            elif isinstance(dep, (build.StaticLibrary, build.SharedLibrary)):
+                self._dependencies.append(dep)
+                for incd in dep.get_include_dirs():
+                    cflags.update(incd.get_incdirs())
+            elif isinstance(dep, HotdocTarget):
+                # Recurse in hotdoc target dependencies
+                self.process_dependencies(dep.get_target_dependencies())
+                self._subprojects.extend(dep.subprojects)
+                self.process_dependencies(dep.subprojects)
+                self.add_include_path(os.path.join(self.builddir, dep.hotdoc_conf.subdir))
+                self.cmd += ['--extra-assets=' + p for p in dep.extra_assets]
+                self.add_extension_paths(dep.extra_extension_paths)
+            elif isinstance(dep, build.CustomTarget) or isinstance(dep, build.BuildTarget):
+                self._dependencies.append(dep)
+
+        return [f.strip('-I') for f in cflags]
+
+    def process_extra_assets(self):
+        self._extra_assets, _ = self.get_value("--extra-assets", (str, list), default=[],
+                                               force_list=True)
+        for assets_path in self._extra_assets:
+            self.cmd.extend(["--extra-assets", assets_path])
+
+    def process_subprojects(self):
+        _, value = self.get_value([
+            list, HotdocTarget], argname="subprojects",
+            force_list=True, value_processor=self.process_dependencies)
+
+        if value is not None:
+            self._subprojects.extend(value)
+
+    def flatten_config_command(self):
+        cmd = []
+        for arg in mesonlib.listify(self.cmd, flatten=True):
+            arg = getattr(arg, 'held_object', arg)
+            if isinstance(arg, mesonlib.File):
+                arg = arg.absolute_path(self.state.environment.get_source_dir(),
+                                        self.state.environment.get_build_dir())
+            elif isinstance(arg, build.IncludeDirs):
+                for inc_dir in arg.get_incdirs():
+                    cmd.append(os.path.join(self.sourcedir, arg.get_curdir(), inc_dir))
+                    cmd.append(os.path.join(self.builddir, arg.get_curdir(), inc_dir))
+
+                continue
+            elif isinstance(arg, build.CustomTarget) or isinstance(arg, build.BuildTarget):
+                self._dependencies.append(arg)
+                arg = self.interpreter.backend.get_target_filename_abs(arg)
+
+            cmd.append(arg)
+
+        return cmd
+
+    def generate_hotdoc_config(self):
+        cwd = os.path.abspath(os.curdir)
+        ncwd = os.path.join(self.sourcedir, self.subdir)
+        mlog.log('Generating Hotdoc configuration for: ', mlog.bold(self.name))
+        os.chdir(ncwd)
+        self.hotdoc.run_hotdoc(self.flatten_config_command())
+        os.chdir(cwd)
+
+    def ensure_file(self, value):
+        if isinstance(value, list):
+            res = []
+            for val in value:
+                res.append(self.ensure_file(val))
+            return res
+
+        if not isinstance(value, mesonlib.File):
+            return mesonlib.File.from_source_file(self.sourcedir, self.subdir, value)
+
+        return value
+
+    def ensure_dir(self, value):
+        if os.path.isabs(value):
+            _dir = value
+        else:
+            _dir = os.path.join(self.sourcedir, self.subdir, value)
+
+        if not os.path.isdir(_dir):
+            raise InvalidArguments('"%s" is not a directory.' % _dir)
+
+        return os.path.relpath(_dir, os.path.join(self.builddir, self.subdir))
+
+    def check_forbiden_args(self):
+        for arg in ['conf_file']:
+            if arg in self.kwargs:
+                raise InvalidArguments('Argument "%s" is forbidden.' % arg)
+
+    def add_include_path(self, path):
+        self.include_paths[path] = path
+
+    def make_targets(self):
+        self.check_forbiden_args()
+        file_types = (str, mesonlib.File)
+        self.process_known_arg("--index", file_types, mandatory=True, value_processor=self.ensure_file)
+        self.process_known_arg("--project-version", str, mandatory=True)
+        self.process_known_arg("--sitemap", file_types, mandatory=True, value_processor=self.ensure_file)
+        self.process_known_arg("--html-extra-theme", str, value_processor=self.ensure_dir)
+        self.process_known_arg(None, list, "include_paths", force_list=True,
+                               value_processor=lambda x: [self.add_include_path(self.ensure_dir(v)) for v in ensure_list(x)])
+        self.process_known_arg('--c-include-directories',
+                               [Dependency, build.StaticLibrary, build.SharedLibrary, list], argname="dependencies",
+                               force_list=True, value_processor=self.process_dependencies)
+        self.process_gi_c_source_roots()
+        self.process_extra_assets()
+        self.process_extra_extension_paths()
+        self.process_subprojects()
+
+        install, install = self.get_value(bool, "install", mandatory=False)
+        self.process_extra_args()
+
+        fullname = self.name + '-doc'
+        hotdoc_config_name = fullname + '.json'
+        hotdoc_config_path = os.path.join(
+            self.builddir, self.subdir, hotdoc_config_name)
+        with open(hotdoc_config_path, 'w') as f:
+            f.write('{}')
+
+        self.cmd += ['--conf-file', hotdoc_config_path]
+        self.add_include_path(os.path.join(self.builddir, self.subdir))
+        self.add_include_path(os.path.join(self.sourcedir, self.subdir))
+
+        depfile = os.path.join(self.builddir, self.subdir, self.name + '.deps')
+        self.cmd += ['--deps-file-dest', depfile]
+
+        for path in self.include_paths.keys():
+            self.cmd.extend(['--include-path', path])
+
+        if self.state.environment.coredata.get_builtin_option('werror', self.state.subproject):
+            self.cmd.append('--fatal-warning')
+        self.generate_hotdoc_config()
+
+        target_cmd = self.build_command + ["--internal", "hotdoc"] + \
+            self.hotdoc.get_command() + ['run', '--conf-file', hotdoc_config_name] + \
+            ['--builddir', os.path.join(self.builddir, self.subdir)]
+
+        target = HotdocTarget(fullname,
+                              subdir=self.subdir,
+                              subproject=self.state.subproject,
+                              hotdoc_conf=mesonlib.File.from_built_file(
+                                  self.subdir, hotdoc_config_name),
+                              extra_extension_paths=self._extra_extension_paths,
+                              extra_assets=self._extra_assets,
+                              subprojects=self._subprojects,
+                              command=target_cmd,
+                              depends=self._dependencies,
+                              output=fullname,
+                              depfile=os.path.basename(depfile),
+                              build_by_default=self.build_by_default)
+
+        install_script = None
+        if install is True:
+            install_script = HotdocRunScript(self.build_command, [
+                "--internal", "hotdoc",
+                "--install", os.path.join(fullname, 'html'),
+                '--name', self.name,
+                '--builddir', os.path.join(self.builddir, self.subdir)] +
+                self.hotdoc.get_command() +
+                ['run', '--conf-file', hotdoc_config_name])
+
+        return (target, install_script)
+
+
+class HotdocTargetHolder(CustomTargetHolder):
+    def __init__(self, target, interp):
+        super().__init__(target, interp)
+        self.methods.update({'config_path': self.config_path_method})
+
+    @noPosargs
+    @noKwargs
+    def config_path_method(self, *args, **kwargs):
+        conf = self.held_object.hotdoc_conf.absolute_path(self.interpreter.environment.source_dir,
+                                                          self.interpreter.environment.build_dir)
+        return self.interpreter.holderify(conf)
+
+
+class HotdocTarget(build.CustomTarget):
+    def __init__(self, name, subdir, subproject, hotdoc_conf, extra_extension_paths, extra_assets,
+                 subprojects, **kwargs):
+        super().__init__(name, subdir, subproject, kwargs, absolute_paths=True)
+        self.hotdoc_conf = hotdoc_conf
+        self.extra_extension_paths = extra_extension_paths
+        self.extra_assets = extra_assets
+        self.subprojects = subprojects
+
+    def __getstate__(self):
+        # Make sure we do not try to pickle subprojects
+        res = self.__dict__.copy()
+        res['subprojects'] = []
+
+        return res
+
+
+class HotdocRunScript(build.RunScript):
+    def __init__(self, script, args):
+        super().__init__(script, args)
+
+
+class HotDocModule(ExtensionModule):
+    @FeatureNew('Hotdoc Module', '0.48.0')
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.hotdoc = ExternalProgram('hotdoc')
+        if not self.hotdoc.found():
+            raise MesonException('hotdoc executable not found')
+
+        try:
+            from hotdoc.run_hotdoc import run  # noqa: F401
+            self.hotdoc.run_hotdoc = run
+        except Exception as e:
+            raise MesonException('hotdoc %s required but not found. (%s)' % (
+                MIN_HOTDOC_VERSION, e))
+
+    @noKwargs
+    def has_extensions(self, state, args, kwargs):
+        res = self.hotdoc.run_hotdoc(['--has-extension=%s' % extension for extension in args]) == 0
+        return ModuleReturnValue(res, [res])
+
+    def generate_doc(self, state, args, kwargs):
+        if len(args) != 1:
+            raise MesonException('One positional argument is'
+                                 ' required for the project name.')
+
+        project_name = args[0]
+        builder = HotdocTargetBuilder(project_name, state, self.hotdoc, self.interpreter, kwargs)
+        target, install_script = builder.make_targets()
+        targets = [HotdocTargetHolder(target, self.interpreter)]
+        if install_script:
+            targets.append(install_script)
+
+        return ModuleReturnValue(targets[0], targets)
+
+
+def initialize(interpreter):
+    return HotDocModule(interpreter)
diff --git a/meson/mesonbuild/modules/i18n.py b/meson/mesonbuild/modules/i18n.py
new file mode 100644
index 000000000..2652e7d2a
--- /dev/null
+++ b/meson/mesonbuild/modules/i18n.py
@@ -0,0 +1,189 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import shutil
+
+from os import path
+from .. import coredata, mesonlib, build, mlog
+from ..mesonlib import MesonException
+from . import ModuleReturnValue
+from . import ExtensionModule
+from ..interpreterbase import permittedKwargs, FeatureNew, FeatureNewKwargs
+
+PRESET_ARGS = {
+    'glib': [
+        '--from-code=UTF-8',
+        '--add-comments',
+
+        # https://developer.gnome.org/glib/stable/glib-I18N.html
+        '--keyword=_',
+        '--keyword=N_',
+        '--keyword=C_:1c,2',
+        '--keyword=NC_:1c,2',
+        '--keyword=g_dcgettext:2',
+        '--keyword=g_dngettext:2,3',
+        '--keyword=g_dpgettext2:2c,3',
+
+        '--flag=N_:1:pass-c-format',
+        '--flag=C_:2:pass-c-format',
+        '--flag=NC_:2:pass-c-format',
+        '--flag=g_dngettext:2:pass-c-format',
+        '--flag=g_strdup_printf:1:c-format',
+        '--flag=g_string_printf:2:c-format',
+        '--flag=g_string_append_printf:2:c-format',
+        '--flag=g_error_new:3:c-format',
+        '--flag=g_set_error:4:c-format',
+        '--flag=g_markup_printf_escaped:1:c-format',
+        '--flag=g_log:3:c-format',
+        '--flag=g_print:1:c-format',
+        '--flag=g_printerr:1:c-format',
+        '--flag=g_printf:1:c-format',
+        '--flag=g_fprintf:2:c-format',
+        '--flag=g_sprintf:2:c-format',
+        '--flag=g_snprintf:3:c-format',
+    ]
+}
+
+
+class I18nModule(ExtensionModule):
+
+    @staticmethod
+    def nogettext_warning():
+        mlog.warning('Gettext not found, all translation targets will be ignored.', once=True)
+        return ModuleReturnValue(None, [])
+
+    @staticmethod
+    def _get_data_dirs(state, dirs):
+        """Returns source directories of relative paths"""
+        src_dir = path.join(state.environment.get_source_dir(), state.subdir)
+        return [path.join(src_dir, d) for d in dirs]
+
+    @FeatureNew('i18n.merge_file', '0.37.0')
+    @FeatureNewKwargs('i18n.merge_file', '0.51.0', ['args'])
+    @permittedKwargs(build.CustomTarget.known_kwargs | {'data_dirs', 'po_dir', 'type', 'args'})
+    def merge_file(self, state, args, kwargs):
+        if not shutil.which('xgettext'):
+            return self.nogettext_warning()
+        podir = kwargs.pop('po_dir', None)
+        if not podir:
+            raise MesonException('i18n: po_dir is a required kwarg')
+        podir = path.join(state.build_to_src, state.subdir, podir)
+
+        file_type = kwargs.pop('type', 'xml')
+        VALID_TYPES = ('xml', 'desktop')
+        if file_type not in VALID_TYPES:
+            raise MesonException('i18n: "{}" is not a valid type {}'.format(file_type, VALID_TYPES))
+
+        datadirs = self._get_data_dirs(state, mesonlib.stringlistify(kwargs.pop('data_dirs', [])))
+        datadirs = '--datadirs=' + ':'.join(datadirs) if datadirs else None
+
+        command = state.environment.get_build_command() + [
+            '--internal', 'msgfmthelper',
+            '@INPUT@', '@OUTPUT@', file_type, podir
+        ]
+        if datadirs:
+            command.append(datadirs)
+
+        if 'args' in kwargs:
+            command.append('--')
+            command.append(mesonlib.stringlistify(kwargs.pop('args', [])))
+
+        kwargs['command'] = command
+
+        inputfile = kwargs['input']
+        if hasattr(inputfile, 'held_object'):
+            ct = build.CustomTarget(kwargs['output'] + '_merge', state.subdir, state.subproject, kwargs)
+        else:
+            if isinstance(inputfile, list):
+                # We only use this input file to create a name of the custom target.
+                # Thus we can ignore the other entries.
+                inputfile = inputfile[0]
+            if isinstance(inputfile, str):
+                inputfile = mesonlib.File.from_source_file(state.environment.source_dir,
+                                                           state.subdir, inputfile)
+            output = kwargs['output']
+            ifile_abs = inputfile.absolute_path(state.environment.source_dir,
+                                                state.environment.build_dir)
+            values = mesonlib.get_filenames_templates_dict([ifile_abs], None)
+            outputs = mesonlib.substitute_values([output], values)
+            output = outputs[0]
+            ct = build.CustomTarget(output + '_' + state.subdir.replace('/', '@').replace('\\', '@') + '_merge', state.subdir, state.subproject, kwargs)
+        return ModuleReturnValue(ct, [ct])
+
+    @FeatureNewKwargs('i18n.gettext', '0.37.0', ['preset'])
+    @FeatureNewKwargs('i18n.gettext', '0.50.0', ['install_dir'])
+    @permittedKwargs({'po_dir', 'data_dirs', 'type', 'languages', 'args', 'preset', 'install', 'install_dir'})
+    def gettext(self, state, args, kwargs):
+        if len(args) != 1:
+            raise coredata.MesonException('Gettext requires one positional argument (package name).')
+        if not shutil.which('xgettext'):
+            return self.nogettext_warning()
+        packagename = args[0]
+        languages = mesonlib.stringlistify(kwargs.get('languages', []))
+        datadirs = self._get_data_dirs(state, mesonlib.stringlistify(kwargs.get('data_dirs', [])))
+        extra_args = mesonlib.stringlistify(kwargs.get('args', []))
+
+        preset = kwargs.pop('preset', None)
+        if preset:
+            preset_args = PRESET_ARGS.get(preset)
+            if not preset_args:
+                raise coredata.MesonException('i18n: Preset "{}" is not one of the valid options: {}'.format(
+                                              preset, list(PRESET_ARGS.keys())))
+            extra_args = set(preset_args + extra_args)
+
+        pkg_arg = '--pkgname=' + packagename
+        lang_arg = '--langs=' + '@@'.join(languages) if languages else None
+        datadirs = '--datadirs=' + ':'.join(datadirs) if datadirs else None
+        extra_args = '--extra-args=' + '@@'.join(extra_args) if extra_args else None
+
+        potargs = state.environment.get_build_command() + ['--internal', 'gettext', 'pot', pkg_arg]
+        if datadirs:
+            potargs.append(datadirs)
+        if extra_args:
+            potargs.append(extra_args)
+        pottarget = build.RunTarget(packagename + '-pot', potargs[0], potargs[1:], [], state.subdir, state.subproject)
+
+        gmoargs = state.environment.get_build_command() + ['--internal', 'gettext', 'gen_gmo']
+        if lang_arg:
+            gmoargs.append(lang_arg)
+        gmotarget = build.RunTarget(packagename + '-gmo', gmoargs[0], gmoargs[1:], [], state.subdir, state.subproject)
+
+        updatepoargs = state.environment.get_build_command() + ['--internal', 'gettext', 'update_po', pkg_arg]
+        if lang_arg:
+            updatepoargs.append(lang_arg)
+        if datadirs:
+            updatepoargs.append(datadirs)
+        if extra_args:
+            updatepoargs.append(extra_args)
+        updatepotarget = build.RunTarget(packagename + '-update-po', updatepoargs[0], updatepoargs[1:], [], state.subdir, state.subproject)
+
+        targets = [pottarget, gmotarget, updatepotarget]
+
+        install = kwargs.get('install', True)
+        if install:
+            install_dir = kwargs.get('install_dir', state.environment.coredata.get_builtin_option('localedir'))
+            script = state.environment.get_build_command()
+            args = ['--internal', 'gettext', 'install',
+                    '--subdir=' + state.subdir,
+                    '--localedir=' + install_dir,
+                    pkg_arg]
+            if lang_arg:
+                args.append(lang_arg)
+            iscript = build.RunScript(script, args)
+            targets.append(iscript)
+
+        return ModuleReturnValue(None, targets)
+
+def initialize(*args, **kwargs):
+    return I18nModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/modtest.py b/meson/mesonbuild/modules/modtest.py
new file mode 100644
index 000000000..533989f68
--- /dev/null
+++ b/meson/mesonbuild/modules/modtest.py
@@ -0,0 +1,28 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import ModuleReturnValue
+from . import ExtensionModule
+from ..interpreterbase import noKwargs
+
+class TestModule(ExtensionModule):
+
+    @noKwargs
+    def print_hello(self, state, args, kwargs):
+        print('Hello from a Meson module')
+        rv = ModuleReturnValue(None, [])
+        return rv
+
+def initialize(*args, **kwargs):
+    return TestModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/pkgconfig.py b/meson/mesonbuild/modules/pkgconfig.py
new file mode 100644
index 000000000..b7a12ff74
--- /dev/null
+++ b/meson/mesonbuild/modules/pkgconfig.py
@@ -0,0 +1,527 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, types
+from pathlib import PurePath
+
+from .. import build
+from .. import dependencies
+from ..dependencies.misc import ThreadDependency
+from .. import mesonlib
+from .. import mlog
+from . import ModuleReturnValue
+from . import ExtensionModule
+from ..interpreterbase import permittedKwargs, FeatureNew, FeatureNewKwargs
+
+already_warned_objs = set()
+
+class DependenciesHelper:
+    def __init__(self, state, name):
+        self.state = state
+        self.name = name
+        self.pub_libs = []
+        self.pub_reqs = []
+        self.priv_libs = []
+        self.priv_reqs = []
+        self.cflags = []
+        self.version_reqs = {}
+
+    def add_pub_libs(self, libs):
+        libs, reqs, cflags = self._process_libs(libs, True)
+        self.pub_libs = libs + self.pub_libs # prepend to preserve dependencies
+        self.pub_reqs += reqs
+        self.cflags += cflags
+
+    def add_priv_libs(self, libs):
+        libs, reqs, _ = self._process_libs(libs, False)
+        self.priv_libs = libs + self.priv_libs
+        self.priv_reqs += reqs
+
+    def add_pub_reqs(self, reqs):
+        self.pub_reqs += self._process_reqs(reqs)
+
+    def add_priv_reqs(self, reqs):
+        self.priv_reqs += self._process_reqs(reqs)
+
+    def _check_generated_pc_deprecation(self, obj):
+        if not hasattr(obj, 'generated_pc_warn'):
+            return
+        name = obj.generated_pc_warn[0]
+        if (name, obj.name) in already_warned_objs:
+            return
+        mlog.deprecation('Library', mlog.bold(obj.name), 'was passed to the '
+                         '"libraries" keyword argument of a previous call '
+                         'to generate() method instead of first positional '
+                         'argument.', 'Adding', mlog.bold(obj.generated_pc),
+                         'to "Requires" field, but this is a deprecated '
+                         'behaviour that will change in a future version '
+                         'of Meson. Please report the issue if this '
+                         'warning cannot be avoided in your case.',
+                         location=obj.generated_pc_warn[1])
+        already_warned_objs.add((name, obj.name))
+
+    def _process_reqs(self, reqs):
+        '''Returns string names of requirements'''
+        processed_reqs = []
+        for obj in mesonlib.unholder(mesonlib.listify(reqs)):
+            if not isinstance(obj, str):
+                FeatureNew.single_use('pkgconfig.generate requirement from non-string object', '0.46.0', self.state.subproject)
+            if hasattr(obj, 'generated_pc'):
+                self._check_generated_pc_deprecation(obj)
+                processed_reqs.append(obj.generated_pc)
+            elif hasattr(obj, 'pcdep'):
+                pcdeps = mesonlib.listify(obj.pcdep)
+                for d in pcdeps:
+                    processed_reqs.append(d.name)
+                    self.add_version_reqs(d.name, obj.version_reqs)
+            elif isinstance(obj, dependencies.PkgConfigDependency):
+                if obj.found():
+                    processed_reqs.append(obj.name)
+                    self.add_version_reqs(obj.name, obj.version_reqs)
+            elif isinstance(obj, str):
+                name, version_req = self.split_version_req(obj)
+                processed_reqs.append(name)
+                self.add_version_reqs(name, version_req)
+            elif isinstance(obj, dependencies.Dependency) and not obj.found():
+                pass
+            elif isinstance(obj, ThreadDependency):
+                pass
+            else:
+                raise mesonlib.MesonException('requires argument not a string, '
+                                              'library with pkgconfig-generated file '
+                                              'or pkgconfig-dependency object, '
+                                              'got {!r}'.format(obj))
+        return processed_reqs
+
+    def add_cflags(self, cflags):
+        self.cflags += mesonlib.stringlistify(cflags)
+
+    def _process_libs(self, libs, public):
+        libs = mesonlib.unholder(mesonlib.listify(libs))
+        processed_libs = []
+        processed_reqs = []
+        processed_cflags = []
+        for obj in libs:
+            shared_library_only = getattr(obj, 'shared_library_only', False)
+            if hasattr(obj, 'pcdep'):
+                pcdeps = mesonlib.listify(obj.pcdep)
+                for d in pcdeps:
+                    processed_reqs.append(d.name)
+                    self.add_version_reqs(d.name, obj.version_reqs)
+            elif hasattr(obj, 'generated_pc'):
+                self._check_generated_pc_deprecation(obj)
+                processed_reqs.append(obj.generated_pc)
+            elif isinstance(obj, dependencies.PkgConfigDependency):
+                if obj.found():
+                    processed_reqs.append(obj.name)
+                    self.add_version_reqs(obj.name, obj.version_reqs)
+            elif isinstance(obj, dependencies.InternalDependency):
+                if obj.found():
+                    processed_libs += obj.get_link_args()
+                    processed_cflags += obj.get_compile_args()
+                    if public:
+                        self.add_pub_libs(obj.libraries)
+                    else:
+                        self.add_priv_libs(obj.libraries)
+            elif isinstance(obj, dependencies.Dependency):
+                if obj.found():
+                    processed_libs += obj.get_link_args()
+                    processed_cflags += obj.get_compile_args()
+            elif isinstance(obj, build.SharedLibrary) and shared_library_only:
+                # Do not pull dependencies for shared libraries because they are
+                # only required for static linking. Adding private requires has
+                # the side effect of exposing their cflags, which is the
+                # intended behaviour of pkg-config but force Debian to add more
+                # than needed build deps.
+                # See https://bugs.freedesktop.org/show_bug.cgi?id=105572
+                processed_libs.append(obj)
+            elif isinstance(obj, (build.SharedLibrary, build.StaticLibrary)):
+                processed_libs.append(obj)
+                if isinstance(obj, build.StaticLibrary) and public:
+                    self.add_pub_libs(obj.get_dependencies(for_pkgconfig=True))
+                    self.add_pub_libs(obj.get_external_deps())
+                else:
+                    self.add_priv_libs(obj.get_dependencies(for_pkgconfig=True))
+                    self.add_priv_libs(obj.get_external_deps())
+            elif isinstance(obj, str):
+                processed_libs.append(obj)
+            else:
+                raise mesonlib.MesonException('library argument not a string, library or dependency object.')
+
+        return processed_libs, processed_reqs, processed_cflags
+
+    def add_version_reqs(self, name, version_reqs):
+        if version_reqs:
+            if name not in self.version_reqs:
+                self.version_reqs[name] = set()
+            # Note that pkg-config is picky about whitespace.
+            # 'foo > 1.2' is ok but 'foo>1.2' is not.
+            # foo, bar' is ok, but 'foo,bar' is not.
+            new_vreqs = [s for s in mesonlib.stringlistify(version_reqs)]
+            self.version_reqs[name].update(new_vreqs)
+
+    def split_version_req(self, s):
+        for op in ['>=', '<=', '!=', '==', '=', '>', '<']:
+            pos = s.find(op)
+            if pos > 0:
+                return s[0:pos].strip(), s[pos:].strip()
+        return s, None
+
+    def format_vreq(self, vreq):
+        # vreq are '>=1.0' and pkgconfig wants '>= 1.0'
+        for op in ['>=', '<=', '!=', '==', '=', '>', '<']:
+            if vreq.startswith(op):
+                return op + ' ' + vreq[len(op):]
+        return vreq
+
+    def format_reqs(self, reqs):
+        result = []
+        for name in reqs:
+            vreqs = self.version_reqs.get(name, None)
+            if vreqs:
+                result += [name + ' ' + self.format_vreq(vreq) for vreq in vreqs]
+            else:
+                result += [name]
+        return ', '.join(result)
+
+    def remove_dups(self):
+        def _fn(xs, libs=False):
+            # Remove duplicates whilst preserving original order
+            result = []
+            for x in xs:
+                # Don't de-dup unknown strings to avoid messing up arguments like:
+                # ['-framework', 'CoreAudio', '-framework', 'CoreMedia']
+                known_flags = ['-pthread']
+                cannot_dedup = libs and isinstance(x, str) and \
+                    not x.startswith(('-l', '-L')) and \
+                    x not in known_flags
+                if x not in result or cannot_dedup:
+                    result.append(x)
+            return result
+        self.pub_libs = _fn(self.pub_libs, True)
+        self.pub_reqs = _fn(self.pub_reqs)
+        self.priv_libs = _fn(self.priv_libs, True)
+        self.priv_reqs = _fn(self.priv_reqs)
+        self.cflags = _fn(self.cflags)
+
+        # Remove from private libs/reqs if they are in public already
+        self.priv_libs = [i for i in self.priv_libs if i not in self.pub_libs]
+        self.priv_reqs = [i for i in self.priv_reqs if i not in self.pub_reqs]
+
+class PkgConfigModule(ExtensionModule):
+
+    def _get_lname(self, l, msg, pcfile):
+        # Nothing special
+        if not l.name_prefix_set:
+            return l.name
+        # Sometimes people want the library to start with 'lib' everywhere,
+        # which is achieved by setting name_prefix to '' and the target name to
+        # 'libfoo'. In that case, try to get the pkg-config '-lfoo' arg correct.
+        if l.prefix == '' and l.name.startswith('lib'):
+            return l.name[3:]
+        # If the library is imported via an import library which is always
+        # named after the target name, '-lfoo' is correct.
+        if isinstance(l, build.SharedLibrary) and l.import_filename:
+            return l.name
+        # In other cases, we can't guarantee that the compiler will be able to
+        # find the library via '-lfoo', so tell the user that.
+        mlog.warning(msg.format(l.name, 'name_prefix', l.name, pcfile))
+        return l.name
+
+    def _escape(self, value):
+        '''
+        We cannot use quote_arg because it quotes with ' and " which does not
+        work with pkg-config and pkgconf at all.
+        '''
+        # We should always write out paths with / because pkg-config requires
+        # spaces to be quoted with \ and that messes up on Windows:
+        # https://bugs.freedesktop.org/show_bug.cgi?id=103203
+        if isinstance(value, PurePath):
+            value = value.as_posix()
+        return value.replace(' ', r'\ ')
+
+    def _make_relative(self, prefix, subdir):
+        if isinstance(prefix, PurePath):
+            prefix = prefix.as_posix()
+        if isinstance(subdir, PurePath):
+            subdir = subdir.as_posix()
+        try:
+            if os.path.commonpath([prefix, subdir]) == prefix:
+                skip = len(prefix) + 1
+                subdir = subdir[skip:]
+        except ValueError:
+            pass
+        return subdir
+
+    def generate_pkgconfig_file(self, state, deps, subdirs, name, description,
+                                url, version, pcfile, conflicts, variables,
+                                uninstalled=False, dataonly=False):
+        deps.remove_dups()
+        coredata = state.environment.get_coredata()
+        if uninstalled:
+            outdir = os.path.join(state.environment.build_dir, 'meson-uninstalled')
+            if not os.path.exists(outdir):
+                os.mkdir(outdir)
+            prefix = PurePath(state.environment.get_build_dir())
+            srcdir = PurePath(state.environment.get_source_dir())
+        else:
+            outdir = state.environment.scratch_dir
+            prefix = PurePath(coredata.get_builtin_option('prefix'))
+            # These always return paths relative to prefix
+            libdir = PurePath(coredata.get_builtin_option('libdir'))
+            incdir = PurePath(coredata.get_builtin_option('includedir'))
+        fname = os.path.join(outdir, pcfile)
+        with open(fname, 'w', encoding='utf-8') as ofile:
+            if not dataonly:
+                ofile.write('prefix={}\n'.format(self._escape(prefix)))
+                if uninstalled:
+                    ofile.write('srcdir={}\n'.format(self._escape(srcdir)))
+                else:
+                    ofile.write('libdir={}\n'.format(self._escape('${prefix}' / libdir)))
+                    ofile.write('includedir={}\n'.format(self._escape('${prefix}' / incdir)))
+            if variables:
+                ofile.write('\n')
+            for k, v in variables:
+                ofile.write('{}={}\n'.format(k, self._escape(v)))
+            ofile.write('\n')
+            ofile.write('Name: %s\n' % name)
+            if len(description) > 0:
+                ofile.write('Description: %s\n' % description)
+            if len(url) > 0:
+                ofile.write('URL: %s\n' % url)
+            ofile.write('Version: %s\n' % version)
+            reqs_str = deps.format_reqs(deps.pub_reqs)
+            if len(reqs_str) > 0:
+                ofile.write('Requires: {}\n'.format(reqs_str))
+            reqs_str = deps.format_reqs(deps.priv_reqs)
+            if len(reqs_str) > 0:
+                ofile.write('Requires.private: {}\n'.format(reqs_str))
+            if len(conflicts) > 0:
+                ofile.write('Conflicts: {}\n'.format(' '.join(conflicts)))
+
+            def generate_libs_flags(libs):
+                msg = 'Library target {0!r} has {1!r} set. Compilers ' \
+                      'may not find it from its \'-l{2}\' linker flag in the ' \
+                      '{3!r} pkg-config file.'
+                Lflags = []
+                for l in libs:
+                    if isinstance(l, str):
+                        yield l
+                    else:
+                        if uninstalled:
+                            install_dir = os.path.dirname(state.backend.get_target_filename_abs(l))
+                        else:
+                            install_dir = l.get_custom_install_dir()[0]
+                        if install_dir is False:
+                            continue
+                        if 'cs' in l.compilers:
+                            if isinstance(install_dir, str):
+                                Lflag = '-r${prefix}/%s/%s' % (self._escape(self._make_relative(prefix, install_dir)), l.filename)
+                            else:  # install_dir is True
+                                Lflag = '-r${libdir}/%s' % l.filename
+                        else:
+                            if isinstance(install_dir, str):
+                                Lflag = '-L${prefix}/%s' % self._escape(self._make_relative(prefix, install_dir))
+                            else:  # install_dir is True
+                                Lflag = '-L${libdir}'
+                        if Lflag not in Lflags:
+                            Lflags.append(Lflag)
+                            yield Lflag
+                        lname = self._get_lname(l, msg, pcfile)
+                        # If using a custom suffix, the compiler may not be able to
+                        # find the library
+                        if l.name_suffix_set:
+                            mlog.warning(msg.format(l.name, 'name_suffix', lname, pcfile))
+                        if 'cs' not in l.compilers:
+                            yield '-l%s' % lname
+
+            def get_uninstalled_include_dirs(libs):
+                result = []
+                for l in libs:
+                    if isinstance(l, str):
+                        continue
+                    if l.get_subdir() not in result:
+                        result.append(l.get_subdir())
+                    for i in l.get_include_dirs():
+                        curdir = i.get_curdir()
+                        for d in i.get_incdirs():
+                            path = os.path.join(curdir, d)
+                            if path not in result:
+                                result.append(path)
+                return result
+
+            def generate_uninstalled_cflags(libs):
+                for d in get_uninstalled_include_dirs(libs):
+                    for basedir in ['${prefix}', '${srcdir}']:
+                        path = os.path.join(basedir, d)
+                        yield '-I%s' % self._escape(path)
+
+            if len(deps.pub_libs) > 0:
+                ofile.write('Libs: {}\n'.format(' '.join(generate_libs_flags(deps.pub_libs))))
+            if len(deps.priv_libs) > 0:
+                ofile.write('Libs.private: {}\n'.format(' '.join(generate_libs_flags(deps.priv_libs))))
+
+            cflags = []
+            if uninstalled:
+                cflags += generate_uninstalled_cflags(deps.pub_libs + deps.priv_libs)
+            else:
+                for d in subdirs:
+                    if d == '.':
+                        cflags.append('-I${includedir}')
+                    else:
+                        cflags.append(self._escape(PurePath('-I${includedir}') / d))
+            cflags += [self._escape(f) for f in deps.cflags]
+            if cflags and not dataonly:
+                ofile.write('Cflags: {}\n'.format(' '.join(cflags)))
+
+    @FeatureNewKwargs('pkgconfig.generate', '0.54.0', ['uninstalled_variables'])
+    @FeatureNewKwargs('pkgconfig.generate', '0.42.0', ['extra_cflags'])
+    @FeatureNewKwargs('pkgconfig.generate', '0.41.0', ['variables'])
+    @FeatureNewKwargs('pkgconfig.generate', '0.54.0', ['dataonly'])
+    @permittedKwargs({'libraries', 'version', 'name', 'description', 'filebase',
+                      'subdirs', 'requires', 'requires_private', 'libraries_private',
+                      'install_dir', 'extra_cflags', 'variables', 'url', 'd_module_versions',
+                      'dataonly', 'conflicts'})
+    def generate(self, state, args, kwargs):
+        default_version = state.project_version['version']
+        default_install_dir = None
+        default_description = None
+        default_name = None
+        mainlib = None
+        default_subdirs = ['.']
+        if not args and 'version' not in kwargs:
+            FeatureNew.single_use('pkgconfig.generate implicit version keyword', '0.46.0', state.subproject)
+        elif len(args) == 1:
+            FeatureNew.single_use('pkgconfig.generate optional positional argument', '0.46.0', state.subproject)
+            mainlib = getattr(args[0], 'held_object', args[0])
+            if not isinstance(mainlib, (build.StaticLibrary, build.SharedLibrary)):
+                raise mesonlib.MesonException('Pkgconfig_gen first positional argument must be a library object')
+            default_name = mainlib.name
+            default_description = state.project_name + ': ' + mainlib.name
+            install_dir = mainlib.get_custom_install_dir()[0]
+            if isinstance(install_dir, str):
+                default_install_dir = os.path.join(install_dir, 'pkgconfig')
+        elif len(args) > 1:
+            raise mesonlib.MesonException('Too many positional arguments passed to Pkgconfig_gen.')
+
+        dataonly = kwargs.get('dataonly', False)
+        if dataonly:
+            default_subdirs = []
+            blocked_vars = ['libraries', 'libraries_private', 'require_private', 'extra_cflags', 'subdirs']
+            if len(set(kwargs) & set(blocked_vars)) > 0:
+                raise mesonlib.MesonException('Cannot combine dataonly with any of {}'.format(blocked_vars))
+
+        subdirs = mesonlib.stringlistify(kwargs.get('subdirs', default_subdirs))
+        version = kwargs.get('version', default_version)
+        if not isinstance(version, str):
+            raise mesonlib.MesonException('Version must be specified.')
+        name = kwargs.get('name', default_name)
+        if not isinstance(name, str):
+            raise mesonlib.MesonException('Name not specified.')
+        filebase = kwargs.get('filebase', name)
+        if not isinstance(filebase, str):
+            raise mesonlib.MesonException('Filebase must be a string.')
+        description = kwargs.get('description', default_description)
+        if not isinstance(description, str):
+            raise mesonlib.MesonException('Description is not a string.')
+        url = kwargs.get('url', '')
+        if not isinstance(url, str):
+            raise mesonlib.MesonException('URL is not a string.')
+        conflicts = mesonlib.stringlistify(kwargs.get('conflicts', []))
+
+        # Prepend the main library to public libraries list. This is required
+        # so dep.add_pub_libs() can handle dependency ordering correctly and put
+        # extra libraries after the main library.
+        libraries = mesonlib.extract_as_list(kwargs, 'libraries')
+        if mainlib:
+            libraries = [mainlib] + libraries
+
+        deps = DependenciesHelper(state, filebase)
+        deps.add_pub_libs(libraries)
+        deps.add_priv_libs(kwargs.get('libraries_private', []))
+        deps.add_pub_reqs(kwargs.get('requires', []))
+        deps.add_priv_reqs(kwargs.get('requires_private', []))
+        deps.add_cflags(kwargs.get('extra_cflags', []))
+
+        dversions = kwargs.get('d_module_versions', None)
+        if dversions:
+            compiler = state.environment.coredata.compilers.host.get('d')
+            if compiler:
+                deps.add_cflags(compiler.get_feature_args({'versions': dversions}, None))
+
+        def parse_variable_list(stringlist):
+            reserved = ['prefix', 'libdir', 'includedir']
+            variables = []
+            for var in stringlist:
+                # foo=bar=baz is ('foo', 'bar=baz')
+                l = var.split('=', 1)
+                if len(l) < 2:
+                    raise mesonlib.MesonException('Invalid variable "{}". Variables must be in \'name=value\' format'.format(var))
+
+                name, value = l[0].strip(), l[1].strip()
+                if not name or not value:
+                    raise mesonlib.MesonException('Invalid variable "{}". Variables must be in \'name=value\' format'.format(var))
+
+                # Variable names must not contain whitespaces
+                if any(c.isspace() for c in name):
+                    raise mesonlib.MesonException('Invalid whitespace in assignment "{}"'.format(var))
+
+                if name in reserved:
+                    raise mesonlib.MesonException('Variable "{}" is reserved'.format(name))
+
+                variables.append((name, value))
+
+            return variables
+
+        variables = parse_variable_list(mesonlib.stringlistify(kwargs.get('variables', [])))
+
+        pcfile = filebase + '.pc'
+        pkgroot = kwargs.get('install_dir', default_install_dir)
+        if pkgroot is None:
+            if mesonlib.is_freebsd():
+                pkgroot = os.path.join(state.environment.coredata.get_builtin_option('prefix'), 'libdata', 'pkgconfig')
+            else:
+                pkgroot = os.path.join(state.environment.coredata.get_builtin_option('libdir'), 'pkgconfig')
+        if not isinstance(pkgroot, str):
+            raise mesonlib.MesonException('Install_dir must be a string.')
+        self.generate_pkgconfig_file(state, deps, subdirs, name, description, url,
+                                     version, pcfile, conflicts, variables,
+                                     False, dataonly)
+        res = build.Data(mesonlib.File(True, state.environment.get_scratch_dir(), pcfile), pkgroot)
+        variables = parse_variable_list(mesonlib.stringlistify(kwargs.get('uninstalled_variables', [])))
+        pcfile = filebase + '-uninstalled.pc'
+        self.generate_pkgconfig_file(state, deps, subdirs, name, description, url,
+                                     version, pcfile, conflicts, variables,
+                                     uninstalled=True, dataonly=dataonly)
+        # Associate the main library with this generated pc file. If the library
+        # is used in any subsequent call to the generated, it will generate a
+        # 'Requires:' or 'Requires.private:'.
+        # Backward compatibility: We used to set 'generated_pc' on all public
+        # libraries instead of just the main one. Keep doing that but warn if
+        # anyone is relying on that deprecated behaviour.
+        if mainlib:
+            if not hasattr(mainlib, 'generated_pc'):
+                mainlib.generated_pc = filebase
+            else:
+                mlog.warning('Already generated a pkg-config file for', mlog.bold(mainlib.name))
+        else:
+            for lib in deps.pub_libs:
+                if not isinstance(lib, str) and not hasattr(lib, 'generated_pc'):
+                    lib.generated_pc = filebase
+                    location = state.current_node
+                    lib.generated_pc_warn = [name, location]
+        return ModuleReturnValue(res, [res])
+
+def initialize(*args, **kwargs):
+    return PkgConfigModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/python.py b/meson/mesonbuild/modules/python.py
new file mode 100644
index 000000000..ceabd766b
--- /dev/null
+++ b/meson/mesonbuild/modules/python.py
@@ -0,0 +1,594 @@
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import json
+import shutil
+import typing as T
+
+from pathlib import Path
+from .. import mesonlib
+from ..mesonlib import MachineChoice, MesonException
+from . import ExtensionModule
+from mesonbuild.modules import ModuleReturnValue
+from ..interpreterbase import (
+    noPosargs, noKwargs, permittedKwargs,
+    InvalidArguments,
+    FeatureNew, FeatureNewKwargs, disablerIfNotFound
+)
+from ..interpreter import ExternalProgramHolder, extract_required_kwarg, permitted_kwargs
+from ..build import known_shmod_kwargs
+from .. import mlog
+from ..environment import detect_cpu_family
+from ..dependencies.base import (
+    DependencyMethods, ExternalDependency,
+    ExternalProgram, PkgConfigDependency,
+    NonExistingExternalProgram
+)
+
+mod_kwargs = set(['subdir'])
+mod_kwargs.update(known_shmod_kwargs)
+mod_kwargs -= set(['name_prefix', 'name_suffix'])
+
+class PythonDependency(ExternalDependency):
+
+    def __init__(self, python_holder, environment, kwargs):
+        super().__init__('python', environment, kwargs)
+        self.name = 'python'
+        self.static = kwargs.get('static', False)
+        self.embed = kwargs.get('embed', False)
+        self.version = python_holder.version
+        self.platform = python_holder.platform
+        self.pkgdep = None
+        self.variables = python_holder.variables
+        self.paths = python_holder.paths
+        self.link_libpython = python_holder.link_libpython
+        if mesonlib.version_compare(self.version, '>= 3.0'):
+            self.major_version = 3
+        else:
+            self.major_version = 2
+
+        # We first try to find the necessary python variables using pkgconfig
+        if DependencyMethods.PKGCONFIG in self.methods and not python_holder.is_pypy:
+            pkg_version = self.variables.get('LDVERSION') or self.version
+            pkg_libdir = self.variables.get('LIBPC')
+            pkg_embed = '-embed' if self.embed and mesonlib.version_compare(self.version, '>=3.8') else ''
+            pkg_name = 'python-{}{}'.format(pkg_version, pkg_embed)
+
+            # If python-X.Y.pc exists in LIBPC, we will try to use it
+            if pkg_libdir is not None and Path(os.path.join(pkg_libdir, '{}.pc'.format(pkg_name))).is_file():
+                old_pkg_libdir = os.environ.get('PKG_CONFIG_LIBDIR')
+                old_pkg_path = os.environ.get('PKG_CONFIG_PATH')
+
+                os.environ.pop('PKG_CONFIG_PATH', None)
+
+                if pkg_libdir:
+                    os.environ['PKG_CONFIG_LIBDIR'] = pkg_libdir
+
+                try:
+                    self.pkgdep = PkgConfigDependency(pkg_name, environment, kwargs)
+                    mlog.debug('Found "{}" via pkgconfig lookup in LIBPC ({})'.format(pkg_name, pkg_libdir))
+                    py_lookup_method = 'pkgconfig'
+                except MesonException as e:
+                    mlog.debug('"{}" could not be found in LIBPC ({})'.format(pkg_name, pkg_libdir))
+                    mlog.debug(e)
+
+                if old_pkg_path is not None:
+                    os.environ['PKG_CONFIG_PATH'] = old_pkg_path
+
+                if old_pkg_libdir is not None:
+                    os.environ['PKG_CONFIG_LIBDIR'] = old_pkg_libdir
+                else:
+                    os.environ.pop('PKG_CONFIG_LIBDIR', None)
+            else:
+                mlog.debug('"{}" could not be found in LIBPC ({}), this is likely due to a relocated python installation'.format(pkg_name, pkg_libdir))
+
+            # If lookup via LIBPC failed, try to use fallback PKG_CONFIG_LIBDIR/PKG_CONFIG_PATH mechanisms
+            if self.pkgdep is None or not self.pkgdep.found():
+                try:
+                    self.pkgdep = PkgConfigDependency(pkg_name, environment, kwargs)
+                    mlog.debug('Found "{}" via fallback pkgconfig lookup in PKG_CONFIG_LIBDIR/PKG_CONFIG_PATH'.format(pkg_name))
+                    py_lookup_method = 'pkgconfig-fallback'
+                except MesonException as e:
+                    mlog.debug('"{}" could not be found via fallback pkgconfig lookup in PKG_CONFIG_LIBDIR/PKG_CONFIG_PATH'.format(pkg_name))
+                    mlog.debug(e)
+
+        if self.pkgdep and self.pkgdep.found():
+            self.compile_args = self.pkgdep.get_compile_args()
+            self.link_args = self.pkgdep.get_link_args()
+            self.is_found = True
+            self.pcdep = self.pkgdep
+        else:
+            self.pkgdep = None
+
+            # Finally, try to find python via SYSCONFIG as a final measure
+            if DependencyMethods.SYSCONFIG in self.methods:
+                if mesonlib.is_windows():
+                    self._find_libpy_windows(environment)
+                else:
+                    self._find_libpy(python_holder, environment)
+                if self.is_found:
+                    mlog.debug('Found "python-{}" via SYSCONFIG module'.format(self.version))
+                    py_lookup_method = 'sysconfig'
+
+        if self.is_found:
+            mlog.log('Dependency', mlog.bold(self.name), 'found:', mlog.green('YES ({})'.format(py_lookup_method)))
+        else:
+            mlog.log('Dependency', mlog.bold(self.name), 'found:', mlog.red('NO'))
+
+    def _find_libpy(self, python_holder, environment):
+        if python_holder.is_pypy:
+            if self.major_version == 3:
+                libname = 'pypy3-c'
+            else:
+                libname = 'pypy-c'
+            libdir = os.path.join(self.variables.get('base'), 'bin')
+            libdirs = [libdir]
+        else:
+            libname = 'python{}'.format(self.version)
+            if 'DEBUG_EXT' in self.variables:
+                libname += self.variables['DEBUG_EXT']
+            if 'ABIFLAGS' in self.variables:
+                libname += self.variables['ABIFLAGS']
+            libdirs = []
+
+        largs = self.clib_compiler.find_library(libname, environment, libdirs)
+        if largs is not None:
+            self.link_args = largs
+
+        self.is_found = largs is not None or self.link_libpython
+
+        inc_paths = mesonlib.OrderedSet([
+            self.variables.get('INCLUDEPY'),
+            self.paths.get('include'),
+            self.paths.get('platinclude')])
+
+        self.compile_args += ['-I' + path for path in inc_paths if path]
+
+    def get_windows_python_arch(self):
+        if self.platform == 'mingw':
+            pycc = self.variables.get('CC')
+            if pycc.startswith('x86_64'):
+                return '64'
+            elif pycc.startswith(('i686', 'i386')):
+                return '32'
+            else:
+                mlog.log('MinGW Python built with unknown CC {!r}, please file'
+                         'a bug'.format(pycc))
+                return None
+        elif self.platform == 'win32':
+            return '32'
+        elif self.platform in ('win64', 'win-amd64'):
+            return '64'
+        mlog.log('Unknown Windows Python platform {!r}'.format(self.platform))
+        return None
+
+    def get_windows_link_args(self):
+        if self.platform.startswith('win'):
+            vernum = self.variables.get('py_version_nodot')
+            if self.static:
+                libpath = Path('libs') / 'libpython{}.a'.format(vernum)
+            else:
+                comp = self.get_compiler()
+                if comp.id == "gcc":
+                    libpath = 'python{}.dll'.format(vernum)
+                else:
+                    libpath = Path('libs') / 'python{}.lib'.format(vernum)
+            lib = Path(self.variables.get('base')) / libpath
+        elif self.platform == 'mingw':
+            if self.static:
+                libname = self.variables.get('LIBRARY')
+            else:
+                libname = self.variables.get('LDLIBRARY')
+            lib = Path(self.variables.get('LIBDIR')) / libname
+        if not lib.exists():
+            mlog.log('Could not find Python3 library {!r}'.format(str(lib)))
+            return None
+        return [str(lib)]
+
+    def _find_libpy_windows(self, env):
+        '''
+        Find python3 libraries on Windows and also verify that the arch matches
+        what we are building for.
+        '''
+        pyarch = self.get_windows_python_arch()
+        if pyarch is None:
+            self.is_found = False
+            return
+        arch = detect_cpu_family(env.coredata.compilers.host)
+        if arch == 'x86':
+            arch = '32'
+        elif arch == 'x86_64':
+            arch = '64'
+        else:
+            # We can't cross-compile Python 3 dependencies on Windows yet
+            mlog.log('Unknown architecture {!r} for'.format(arch),
+                     mlog.bold(self.name))
+            self.is_found = False
+            return
+        # Pyarch ends in '32' or '64'
+        if arch != pyarch:
+            mlog.log('Need', mlog.bold(self.name), 'for {}-bit, but '
+                     'found {}-bit'.format(arch, pyarch))
+            self.is_found = False
+            return
+        # This can fail if the library is not found
+        largs = self.get_windows_link_args()
+        if largs is None:
+            self.is_found = False
+            return
+        self.link_args = largs
+        # Compile args
+        inc_paths = mesonlib.OrderedSet([
+            self.variables.get('INCLUDEPY'),
+            self.paths.get('include'),
+            self.paths.get('platinclude')])
+
+        self.compile_args += ['-I' + path for path in inc_paths if path]
+
+        # https://sourceforge.net/p/mingw-w64/mailman/message/30504611/
+        if pyarch == '64' and self.major_version == 2:
+            self.compile_args += ['-DMS_WIN64']
+
+        self.is_found = True
+
+    @staticmethod
+    def get_methods():
+        if mesonlib.is_windows():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSCONFIG]
+        elif mesonlib.is_osx():
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.EXTRAFRAMEWORK]
+        else:
+            return [DependencyMethods.PKGCONFIG, DependencyMethods.SYSCONFIG]
+
+    def get_pkgconfig_variable(self, variable_name, kwargs):
+        if self.pkgdep:
+            return self.pkgdep.get_pkgconfig_variable(variable_name, kwargs)
+        else:
+            return super().get_pkgconfig_variable(variable_name, kwargs)
+
+
+INTROSPECT_COMMAND = '''import sysconfig
+import json
+import sys
+
+install_paths = sysconfig.get_paths(scheme='posix_prefix', vars={'base': '', 'platbase': '', 'installed_base': ''})
+
+def links_against_libpython():
+    from distutils.core import Distribution, Extension
+    cmd = Distribution().get_command_obj('build_ext')
+    cmd.ensure_finalized()
+    return bool(cmd.get_libraries(Extension('dummy', [])))
+
+print (json.dumps ({
+  'variables': sysconfig.get_config_vars(),
+  'paths': sysconfig.get_paths(),
+  'install_paths': install_paths,
+  'version': sysconfig.get_python_version(),
+  'platform': sysconfig.get_platform(),
+  'is_pypy': '__pypy__' in sys.builtin_module_names,
+  'link_libpython': links_against_libpython(),
+}))
+'''
+
+
+class PythonInstallation(ExternalProgramHolder):
+    def __init__(self, interpreter, python, info):
+        ExternalProgramHolder.__init__(self, python, interpreter.subproject)
+        self.interpreter = interpreter
+        self.subproject = self.interpreter.subproject
+        prefix = self.interpreter.environment.coredata.get_builtin_option('prefix')
+        self.variables = info['variables']
+        self.paths = info['paths']
+        install_paths = info['install_paths']
+        self.platlib_install_path = os.path.join(prefix, install_paths['platlib'][1:])
+        self.purelib_install_path = os.path.join(prefix, install_paths['purelib'][1:])
+        self.version = info['version']
+        self.platform = info['platform']
+        self.is_pypy = info['is_pypy']
+        self.link_libpython = info['link_libpython']
+        self.methods.update({
+            'extension_module': self.extension_module_method,
+            'dependency': self.dependency_method,
+            'install_sources': self.install_sources_method,
+            'get_install_dir': self.get_install_dir_method,
+            'language_version': self.language_version_method,
+            'found': self.found_method,
+            'has_path': self.has_path_method,
+            'get_path': self.get_path_method,
+            'has_variable': self.has_variable_method,
+            'get_variable': self.get_variable_method,
+            'path': self.path_method,
+        })
+
+    @permittedKwargs(mod_kwargs)
+    def extension_module_method(self, args, kwargs):
+        if 'subdir' in kwargs and 'install_dir' in kwargs:
+            raise InvalidArguments('"subdir" and "install_dir" are mutually exclusive')
+
+        if 'subdir' in kwargs:
+            subdir = kwargs.pop('subdir', '')
+            if not isinstance(subdir, str):
+                raise InvalidArguments('"subdir" argument must be a string.')
+
+            kwargs['install_dir'] = os.path.join(self.platlib_install_path, subdir)
+
+        # On macOS and some Linux distros (Debian) distutils doesn't link
+        # extensions against libpython. We call into distutils and mirror its
+        # behavior. See https://github.com/mesonbuild/meson/issues/4117
+        if not self.link_libpython:
+            new_deps = []
+            for holder in mesonlib.extract_as_list(kwargs, 'dependencies'):
+                dep = holder.held_object
+                if isinstance(dep, PythonDependency):
+                    holder = self.interpreter.holderify(dep.get_partial_dependency(compile_args=True))
+                new_deps.append(holder)
+            kwargs['dependencies'] = new_deps
+
+        suffix = self.variables.get('EXT_SUFFIX') or self.variables.get('SO') or self.variables.get('.so')
+
+        # msys2's python3 has "-cpython-36m.dll", we have to be clever
+        split = suffix.rsplit('.', 1)
+        suffix = split.pop(-1)
+        args[0] += ''.join(s for s in split)
+
+        kwargs['name_prefix'] = ''
+        kwargs['name_suffix'] = suffix
+
+        return self.interpreter.func_shared_module(None, args, kwargs)
+
+    @permittedKwargs(permitted_kwargs['dependency'])
+    @FeatureNewKwargs('python_installation.dependency', '0.53.0', ['embed'])
+    def dependency_method(self, args, kwargs):
+        if args:
+            mlog.warning('python_installation.dependency() does not take any '
+                         'positional arguments. It always returns a Python '
+                         'dependency. This will become an error in the future.',
+                         location=self.interpreter.current_node)
+        dep = PythonDependency(self, self.interpreter.environment, kwargs)
+        return self.interpreter.holderify(dep)
+
+    @permittedKwargs(['pure', 'subdir'])
+    def install_sources_method(self, args, kwargs):
+        pure = kwargs.pop('pure', True)
+        if not isinstance(pure, bool):
+            raise InvalidArguments('"pure" argument must be a boolean.')
+
+        subdir = kwargs.pop('subdir', '')
+        if not isinstance(subdir, str):
+            raise InvalidArguments('"subdir" argument must be a string.')
+
+        if pure:
+            kwargs['install_dir'] = os.path.join(self.purelib_install_path, subdir)
+        else:
+            kwargs['install_dir'] = os.path.join(self.platlib_install_path, subdir)
+
+        return self.interpreter.holderify(self.interpreter.func_install_data(None, args, kwargs))
+
+    @noPosargs
+    @permittedKwargs(['pure', 'subdir'])
+    def get_install_dir_method(self, args, kwargs):
+        pure = kwargs.pop('pure', True)
+        if not isinstance(pure, bool):
+            raise InvalidArguments('"pure" argument must be a boolean.')
+
+        subdir = kwargs.pop('subdir', '')
+        if not isinstance(subdir, str):
+            raise InvalidArguments('"subdir" argument must be a string.')
+
+        if pure:
+            res = os.path.join(self.purelib_install_path, subdir)
+        else:
+            res = os.path.join(self.platlib_install_path, subdir)
+
+        return self.interpreter.module_method_callback(ModuleReturnValue(res, []))
+
+    @noPosargs
+    @noKwargs
+    def language_version_method(self, args, kwargs):
+        return self.interpreter.module_method_callback(ModuleReturnValue(self.version, []))
+
+    @noKwargs
+    def has_path_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InvalidArguments('has_path takes exactly one positional argument.')
+        path_name = args[0]
+        if not isinstance(path_name, str):
+            raise InvalidArguments('has_path argument must be a string.')
+
+        return self.interpreter.module_method_callback(ModuleReturnValue(path_name in self.paths, []))
+
+    @noKwargs
+    def get_path_method(self, args, kwargs):
+        if len(args) not in (1, 2):
+            raise InvalidArguments('get_path must have one or two arguments.')
+        path_name = args[0]
+        if not isinstance(path_name, str):
+            raise InvalidArguments('get_path argument must be a string.')
+
+        try:
+            path = self.paths[path_name]
+        except KeyError:
+            if len(args) == 2:
+                path = args[1]
+            else:
+                raise InvalidArguments('{} is not a valid path name'.format(path_name))
+
+        return self.interpreter.module_method_callback(ModuleReturnValue(path, []))
+
+    @noKwargs
+    def has_variable_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InvalidArguments('has_variable takes exactly one positional argument.')
+        var_name = args[0]
+        if not isinstance(var_name, str):
+            raise InvalidArguments('has_variable argument must be a string.')
+
+        return self.interpreter.module_method_callback(ModuleReturnValue(var_name in self.variables, []))
+
+    @noKwargs
+    def get_variable_method(self, args, kwargs):
+        if len(args) not in (1, 2):
+            raise InvalidArguments('get_variable must have one or two arguments.')
+        var_name = args[0]
+        if not isinstance(var_name, str):
+            raise InvalidArguments('get_variable argument must be a string.')
+
+        try:
+            var = self.variables[var_name]
+        except KeyError:
+            if len(args) == 2:
+                var = args[1]
+            else:
+                raise InvalidArguments('{} is not a valid variable name'.format(var_name))
+
+        return self.interpreter.module_method_callback(ModuleReturnValue(var, []))
+
+    @noPosargs
+    @noKwargs
+    @FeatureNew('Python module path method', '0.50.0')
+    def path_method(self, args, kwargs):
+        return super().path_method(args, kwargs)
+
+
+class PythonModule(ExtensionModule):
+
+    @FeatureNew('Python Module', '0.46.0')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.snippets.add('find_installation')
+
+    # https://www.python.org/dev/peps/pep-0397/
+    def _get_win_pythonpath(self, name_or_path):
+        if name_or_path not in ['python2', 'python3']:
+            return None
+        if not shutil.which('py'):
+            # program not installed, return without an exception
+            return None
+        ver = {'python2': '-2', 'python3': '-3'}[name_or_path]
+        cmd = ['py', ver, '-c', "import sysconfig; print(sysconfig.get_config_var('BINDIR'))"]
+        _, stdout, _ = mesonlib.Popen_safe(cmd)
+        directory = stdout.strip()
+        if os.path.exists(directory):
+            return os.path.join(directory, 'python')
+        else:
+            return None
+
+    def _check_version(self, name_or_path, version):
+        if name_or_path == 'python2':
+            return mesonlib.version_compare(version, '< 3.0')
+        elif name_or_path == 'python3':
+            return mesonlib.version_compare(version, '>= 3.0')
+        return True
+
+    @FeatureNewKwargs('python.find_installation', '0.49.0', ['disabler'])
+    @FeatureNewKwargs('python.find_installation', '0.51.0', ['modules'])
+    @disablerIfNotFound
+    @permittedKwargs({'required', 'modules'})
+    def find_installation(self, interpreter, state, args, kwargs):
+        feature_check = FeatureNew('Passing "feature" option to find_installation', '0.48.0')
+        disabled, required, feature = extract_required_kwarg(kwargs, state.subproject, feature_check)
+        want_modules = mesonlib.extract_as_list(kwargs, 'modules')  # type: T.List[str]
+        found_modules = []    # type: T.List[str]
+        missing_modules = []  # type: T.List[str]
+
+        if len(args) > 1:
+            raise InvalidArguments('find_installation takes zero or one positional argument.')
+
+        name_or_path = state.environment.lookup_binary_entry(MachineChoice.HOST, 'python')
+        if name_or_path is None and args:
+            name_or_path = args[0]
+            if not isinstance(name_or_path, str):
+                raise InvalidArguments('find_installation argument must be a string.')
+
+        if disabled:
+            mlog.log('Program', name_or_path or 'python', 'found:', mlog.red('NO'), '(disabled by:', mlog.bold(feature), ')')
+            return ExternalProgramHolder(NonExistingExternalProgram(), state.subproject)
+
+        if not name_or_path:
+            python = ExternalProgram('python3', mesonlib.python_command, silent=True)
+        else:
+            python = ExternalProgram.from_entry('python3', name_or_path)
+
+            if not python.found() and mesonlib.is_windows():
+                pythonpath = self._get_win_pythonpath(name_or_path)
+                if pythonpath is not None:
+                    name_or_path = pythonpath
+                    python = ExternalProgram(name_or_path, silent=True)
+
+            # Last ditch effort, python2 or python3 can be named python
+            # on various platforms, let's not give up just yet, if an executable
+            # named python is available and has a compatible version, let's use
+            # it
+            if not python.found() and name_or_path in ['python2', 'python3']:
+                python = ExternalProgram('python', silent=True)
+
+        if python.found() and want_modules:
+            for mod in want_modules:
+                p, out, err = mesonlib.Popen_safe(
+                    python.command +
+                    ['-c', 'import {0}'.format(mod)])
+                if p.returncode != 0:
+                    missing_modules.append(mod)
+                else:
+                    found_modules.append(mod)
+
+        msg = ['Program', python.name]
+        if want_modules:
+            msg.append('({})'.format(', '.join(want_modules)))
+        msg.append('found:')
+        if python.found() and not missing_modules:
+            msg.extend([mlog.green('YES'), '({})'.format(' '.join(python.command))])
+        else:
+            msg.append(mlog.red('NO'))
+        if found_modules:
+            msg.append('modules:')
+            msg.append(', '.join(found_modules))
+
+        mlog.log(*msg)
+
+        if not python.found():
+            if required:
+                raise mesonlib.MesonException('{} not found'.format(name_or_path or 'python'))
+            res = ExternalProgramHolder(NonExistingExternalProgram(), state.subproject)
+        elif missing_modules:
+            if required:
+                raise mesonlib.MesonException('{} is missing modules: {}'.format(name_or_path or 'python', ', '.join(missing_modules)))
+            res = ExternalProgramHolder(NonExistingExternalProgram(), state.subproject)
+        else:
+            # Sanity check, we expect to have something that at least quacks in tune
+            try:
+                cmd = python.get_command() + ['-c', INTROSPECT_COMMAND]
+                p, stdout, stderr = mesonlib.Popen_safe(cmd)
+                info = json.loads(stdout)
+            except json.JSONDecodeError:
+                info = None
+                mlog.debug('Could not introspect Python (%s): exit code %d' % (str(p.args), p.returncode))
+                mlog.debug('Program stdout:\n')
+                mlog.debug(stdout)
+                mlog.debug('Program stderr:\n')
+                mlog.debug(stderr)
+
+            if isinstance(info, dict) and 'version' in info and self._check_version(name_or_path, info['version']):
+                res = PythonInstallation(interpreter, python, info)
+            else:
+                res = ExternalProgramHolder(NonExistingExternalProgram(), state.subproject)
+                if required:
+                    raise mesonlib.MesonException('{} is not a valid python or it is missing setuptools'.format(python))
+
+        return res
+
+
+def initialize(*args, **kwargs):
+    return PythonModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/python3.py b/meson/mesonbuild/modules/python3.py
new file mode 100644
index 000000000..97bd5ecc4
--- /dev/null
+++ b/meson/mesonbuild/modules/python3.py
@@ -0,0 +1,77 @@
+# Copyright 2016-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sysconfig
+from .. import mesonlib, dependencies
+
+from . import ExtensionModule
+from mesonbuild.modules import ModuleReturnValue
+from ..interpreterbase import noKwargs, permittedKwargs, FeatureDeprecated
+from ..build import known_shmod_kwargs
+
+
+class Python3Module(ExtensionModule):
+    @FeatureDeprecated('python3 module', '0.48.0')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.snippets.add('extension_module')
+
+    @permittedKwargs(known_shmod_kwargs)
+    def extension_module(self, interpreter, state, args, kwargs):
+        if 'name_prefix' in kwargs:
+            raise mesonlib.MesonException('Name_prefix is set automatically, specifying it is forbidden.')
+        if 'name_suffix' in kwargs:
+            raise mesonlib.MesonException('Name_suffix is set automatically, specifying it is forbidden.')
+        host_system = state.host_machine.system
+        if host_system == 'darwin':
+            # Default suffix is 'dylib' but Python does not use it for extensions.
+            suffix = 'so'
+        elif host_system == 'windows':
+            # On Windows the extension is pyd for some unexplainable reason.
+            suffix = 'pyd'
+        else:
+            suffix = []
+        kwargs['name_prefix'] = ''
+        kwargs['name_suffix'] = suffix
+        return interpreter.func_shared_module(None, args, kwargs)
+
+    @noKwargs
+    def find_python(self, state, args, kwargs):
+        command = state.environment.lookup_binary_entry(mesonlib.MachineChoice.HOST, 'python3')
+        if command is not None:
+            py3 = dependencies.ExternalProgram.from_entry('python3', command)
+        else:
+            py3 = dependencies.ExternalProgram('python3', mesonlib.python_command, silent=True)
+        return ModuleReturnValue(py3, [py3])
+
+    @noKwargs
+    def language_version(self, state, args, kwargs):
+        return ModuleReturnValue(sysconfig.get_python_version(), [])
+
+    @noKwargs
+    def sysconfig_path(self, state, args, kwargs):
+        if len(args) != 1:
+            raise mesonlib.MesonException('sysconfig_path() requires passing the name of path to get.')
+        path_name = args[0]
+        valid_names = sysconfig.get_path_names()
+        if path_name not in valid_names:
+            raise mesonlib.MesonException('{} is not a valid path name {}.'.format(path_name, valid_names))
+
+        # Get a relative path without a prefix, e.g. lib/python3.6/site-packages
+        path = sysconfig.get_path(path_name, vars={'base': '', 'platbase': '', 'installed_base': ''})[1:]
+        return ModuleReturnValue(path, [])
+
+
+def initialize(*args, **kwargs):
+    return Python3Module(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/qt.py b/meson/mesonbuild/modules/qt.py
new file mode 100644
index 000000000..c810df655
--- /dev/null
+++ b/meson/mesonbuild/modules/qt.py
@@ -0,0 +1,227 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from .. import mlog
+from .. import build
+from ..mesonlib import MesonException, extract_as_list, File, unholder, version_compare
+from ..dependencies import Dependency, Qt4Dependency, Qt5Dependency, NonExistingExternalProgram
+import xml.etree.ElementTree as ET
+from . import ModuleReturnValue, get_include_args, ExtensionModule
+from ..interpreterbase import noPosargs, permittedKwargs, FeatureNew, FeatureNewKwargs
+from ..interpreter import extract_required_kwarg
+
+_QT_DEPS_LUT = {
+    4: Qt4Dependency,
+    5: Qt5Dependency
+}
+
+
+class QtBaseModule(ExtensionModule):
+    tools_detected = False
+    rcc_supports_depfiles = False
+
+    def __init__(self, interpreter, qt_version=5):
+        ExtensionModule.__init__(self, interpreter)
+        self.snippets.add('has_tools')
+        self.qt_version = qt_version
+
+    def _detect_tools(self, env, method, required=True):
+        if self.tools_detected:
+            return
+        self.tools_detected = True
+        mlog.log('Detecting Qt{version} tools'.format(version=self.qt_version))
+        kwargs = {'required': required, 'modules': 'Core', 'method': method}
+        qt = _QT_DEPS_LUT[self.qt_version](env, kwargs)
+        if qt.found():
+            # Get all tools and then make sure that they are the right version
+            self.moc, self.uic, self.rcc, self.lrelease = qt.compilers_detect(self.interpreter)
+            if version_compare(qt.version, '>=5.14.0'):
+                self.rcc_supports_depfiles = True
+            else:
+                mlog.warning('rcc dependencies will not work properly until you move to Qt >= 5.14:',
+                    mlog.bold('https://bugreports.qt.io/browse/QTBUG-45460'), fatal=False)
+        else:
+            suffix = '-qt{}'.format(self.qt_version)
+            self.moc = NonExistingExternalProgram(name='moc' + suffix)
+            self.uic = NonExistingExternalProgram(name='uic' + suffix)
+            self.rcc = NonExistingExternalProgram(name='rcc' + suffix)
+            self.lrelease = NonExistingExternalProgram(name='lrelease' + suffix)
+
+    def parse_qrc(self, state, rcc_file):
+        if type(rcc_file) is str:
+            abspath = os.path.join(state.environment.source_dir, state.subdir, rcc_file)
+            rcc_dirname = os.path.dirname(abspath)
+        elif type(rcc_file) is File:
+            abspath = rcc_file.absolute_path(state.environment.source_dir, state.environment.build_dir)
+            rcc_dirname = os.path.dirname(abspath)
+
+        try:
+            tree = ET.parse(abspath)
+            root = tree.getroot()
+            result = []
+            for child in root[0]:
+                if child.tag != 'file':
+                    mlog.warning("malformed rcc file: ", os.path.join(state.subdir, rcc_file))
+                    break
+                else:
+                    resource_path = child.text
+                    # We need to guess if the pointed resource is:
+                    #   a) in build directory -> implies a generated file
+                    #   b) in source directory
+                    #   c) somewhere else external dependency file to bundle
+                    #
+                    # Also from qrc documentation: relative path are always from qrc file
+                    # So relative path must always be computed from qrc file !
+                    if os.path.isabs(resource_path):
+                        # a)
+                        if resource_path.startswith(os.path.abspath(state.environment.build_dir)):
+                            resource_relpath = os.path.relpath(resource_path, state.environment.build_dir)
+                            result.append(File(is_built=True, subdir='', fname=resource_relpath))
+                        # either b) or c)
+                        else:
+                            result.append(File(is_built=False, subdir=state.subdir, fname=resource_path))
+                    else:
+                        path_from_rcc = os.path.normpath(os.path.join(rcc_dirname, resource_path))
+                        # a)
+                        if path_from_rcc.startswith(state.environment.build_dir):
+                            result.append(File(is_built=True, subdir=state.subdir, fname=resource_path))
+                        # b)
+                        else:
+                            result.append(File(is_built=False, subdir=state.subdir, fname=path_from_rcc))
+            return result
+        except Exception:
+            return []
+
+    @noPosargs
+    @permittedKwargs({'method', 'required'})
+    @FeatureNew('qt.has_tools', '0.54.0')
+    def has_tools(self, interpreter, state, args, kwargs):
+        method = kwargs.get('method', 'auto')
+        disabled, required, feature = extract_required_kwarg(kwargs, state.subproject, default=False)
+        if disabled:
+            mlog.log('qt.has_tools skipped: feature', mlog.bold(feature), 'disabled')
+            return False
+        self._detect_tools(state.environment, method, required=False)
+        for tool in (self.moc, self.uic, self.rcc, self.lrelease):
+            if not tool.found():
+                if required:
+                    raise MesonException('Qt tools not found')
+                return False
+        return True
+
+    @FeatureNewKwargs('qt.preprocess', '0.49.0', ['uic_extra_arguments'])
+    @FeatureNewKwargs('qt.preprocess', '0.44.0', ['moc_extra_arguments'])
+    @FeatureNewKwargs('qt.preprocess', '0.49.0', ['rcc_extra_arguments'])
+    @permittedKwargs({'moc_headers', 'moc_sources', 'uic_extra_arguments', 'moc_extra_arguments', 'rcc_extra_arguments', 'include_directories', 'dependencies', 'ui_files', 'qresources', 'method'})
+    def preprocess(self, state, args, kwargs):
+        rcc_files, ui_files, moc_headers, moc_sources, uic_extra_arguments, moc_extra_arguments, rcc_extra_arguments, sources, include_directories, dependencies \
+            = [extract_as_list(kwargs, c, pop=True) for c in ['qresources', 'ui_files', 'moc_headers', 'moc_sources', 'uic_extra_arguments', 'moc_extra_arguments', 'rcc_extra_arguments', 'sources', 'include_directories', 'dependencies']]
+        sources += args[1:]
+        method = kwargs.get('method', 'auto')
+        self._detect_tools(state.environment, method)
+        err_msg = "{0} sources specified and couldn't find {1}, " \
+                  "please check your qt{2} installation"
+        if (moc_headers or moc_sources) and not self.moc.found():
+            raise MesonException(err_msg.format('MOC', 'moc-qt{}'.format(self.qt_version), self.qt_version))
+        if rcc_files:
+            if not self.rcc.found():
+                raise MesonException(err_msg.format('RCC', 'rcc-qt{}'.format(self.qt_version), self.qt_version))
+            # custom output name set? -> one output file, multiple otherwise
+            if args:
+                qrc_deps = []
+                for i in rcc_files:
+                    qrc_deps += self.parse_qrc(state, i)
+                name = args[0]
+                rcc_kwargs = {'input': rcc_files,
+                              'output': name + '.cpp',
+                              'command': [self.rcc, '-name', name, '-o', '@OUTPUT@', rcc_extra_arguments, '@INPUT@'],
+                              'depend_files': qrc_deps}
+                res_target = build.CustomTarget(name, state.subdir, state.subproject, rcc_kwargs)
+                sources.append(res_target)
+            else:
+                for rcc_file in rcc_files:
+                    qrc_deps = self.parse_qrc(state, rcc_file)
+                    if type(rcc_file) is str:
+                        basename = os.path.basename(rcc_file)
+                    elif type(rcc_file) is File:
+                        basename = os.path.basename(rcc_file.fname)
+                    name = 'qt' + str(self.qt_version) + '-' + basename.replace('.', '_')
+                    rcc_kwargs = {'input': rcc_file,
+                                  'output': name + '.cpp',
+                                  'command': [self.rcc, '-name', '@BASENAME@', '-o', '@OUTPUT@', rcc_extra_arguments, '@INPUT@'],
+                                  'depend_files': qrc_deps}
+                    if self.rcc_supports_depfiles:
+                        rcc_kwargs['depfile'] = name + '.d'
+                        rcc_kwargs['command'] += ['--depfile', '@DEPFILE@']
+                    res_target = build.CustomTarget(name, state.subdir, state.subproject, rcc_kwargs)
+                    sources.append(res_target)
+        if ui_files:
+            if not self.uic.found():
+                raise MesonException(err_msg.format('UIC', 'uic-qt{}'.format(self.qt_version), self.qt_version))
+            arguments = uic_extra_arguments + ['-o', '@OUTPUT@', '@INPUT@']
+            ui_kwargs = {'output': 'ui_@BASENAME@.h',
+                         'arguments': arguments}
+            ui_gen = build.Generator([self.uic], ui_kwargs)
+            ui_output = ui_gen.process_files('Qt{} ui'.format(self.qt_version), ui_files, state)
+            sources.append(ui_output)
+        inc = get_include_args(include_dirs=include_directories)
+        compile_args = []
+        for dep in unholder(dependencies):
+            if isinstance(dep, Dependency):
+                for arg in dep.get_compile_args():
+                    if arg.startswith('-I') or arg.startswith('-D'):
+                        compile_args.append(arg)
+            else:
+                raise MesonException('Argument is of an unacceptable type {!r}.\nMust be '
+                                     'either an external dependency (returned by find_library() or '
+                                     'dependency()) or an internal dependency (returned by '
+                                     'declare_dependency()).'.format(type(dep).__name__))
+        if moc_headers:
+            arguments = moc_extra_arguments + inc + compile_args + ['@INPUT@', '-o', '@OUTPUT@']
+            moc_kwargs = {'output': 'moc_@BASENAME@.cpp',
+                          'arguments': arguments}
+            moc_gen = build.Generator([self.moc], moc_kwargs)
+            moc_output = moc_gen.process_files('Qt{} moc header'.format(self.qt_version), moc_headers, state)
+            sources.append(moc_output)
+        if moc_sources:
+            arguments = moc_extra_arguments + inc + compile_args + ['@INPUT@', '-o', '@OUTPUT@']
+            moc_kwargs = {'output': '@BASENAME@.moc',
+                          'arguments': arguments}
+            moc_gen = build.Generator([self.moc], moc_kwargs)
+            moc_output = moc_gen.process_files('Qt{} moc source'.format(self.qt_version), moc_sources, state)
+            sources.append(moc_output)
+        return ModuleReturnValue(sources, sources)
+
+    @FeatureNew('qt.compile_translations', '0.44.0')
+    @permittedKwargs({'ts_files', 'install', 'install_dir', 'build_by_default', 'method'})
+    def compile_translations(self, state, args, kwargs):
+        ts_files, install_dir = [extract_as_list(kwargs, c, pop=True) for c in  ['ts_files', 'install_dir']]
+        self._detect_tools(state.environment, kwargs.get('method', 'auto'))
+        translations = []
+        for ts in ts_files:
+            if not self.lrelease.found():
+                raise MesonException('qt.compile_translations: ' +
+                                     self.lrelease.name + ' not found')
+            cmd = [self.lrelease, '@INPUT@', '-qm', '@OUTPUT@']
+            lrelease_kwargs = {'output': '@BASENAME@.qm',
+                               'input': ts,
+                               'install': kwargs.get('install', False),
+                               'build_by_default': kwargs.get('build_by_default', False),
+                               'command': cmd}
+            if install_dir is not None:
+                lrelease_kwargs['install_dir'] = install_dir
+            lrelease_target = build.CustomTarget('qt{}-compile-{}'.format(self.qt_version, ts), state.subdir, state.subproject, lrelease_kwargs)
+            translations.append(lrelease_target)
+        return ModuleReturnValue(translations, translations)
diff --git a/meson/mesonbuild/modules/qt4.py b/meson/mesonbuild/modules/qt4.py
new file mode 100644
index 000000000..e85a1506f
--- /dev/null
+++ b/meson/mesonbuild/modules/qt4.py
@@ -0,0 +1,25 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .qt import QtBaseModule
+
+
+class Qt4Module(QtBaseModule):
+
+    def __init__(self, interpreter):
+        QtBaseModule.__init__(self, interpreter, qt_version=4)
+
+
+def initialize(*args, **kwargs):
+    return Qt4Module(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/qt5.py b/meson/mesonbuild/modules/qt5.py
new file mode 100644
index 000000000..873c2dbeb
--- /dev/null
+++ b/meson/mesonbuild/modules/qt5.py
@@ -0,0 +1,25 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .qt import QtBaseModule
+
+
+class Qt5Module(QtBaseModule):
+
+    def __init__(self, interpreter):
+        QtBaseModule.__init__(self, interpreter, qt_version=5)
+
+
+def initialize(*args, **kwargs):
+    return Qt5Module(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/rpm.py b/meson/mesonbuild/modules/rpm.py
new file mode 100644
index 000000000..073e33889
--- /dev/null
+++ b/meson/mesonbuild/modules/rpm.py
@@ -0,0 +1,181 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''This module provides helper functions for RPM related
+functionality such as generating template RPM spec file.'''
+
+from .. import build
+from .. import compilers
+import datetime
+from .. import mlog
+from . import GirTarget, TypelibTarget
+from . import ModuleReturnValue
+from . import ExtensionModule
+from ..interpreterbase import noKwargs
+
+import os
+
+class RPMModule(ExtensionModule):
+
+    @noKwargs
+    def generate_spec_template(self, coredata, args, kwargs):
+        self.coredata = coredata
+        required_compilers = self.__get_required_compilers()
+        proj = coredata.project_name.replace(' ', '_').replace('\t', '_')
+        so_installed = False
+        devel_subpkg = False
+        files = set()
+        files_devel = set()
+        to_delete = set()
+        for target in coredata.targets.values():
+            if isinstance(target, build.Executable) and target.need_install:
+                files.add('%%{_bindir}/%s' % target.get_filename())
+            elif isinstance(target, build.SharedLibrary) and target.need_install:
+                files.add('%%{_libdir}/%s' % target.get_filename())
+                for alias in target.get_aliases():
+                    if alias.endswith('.so'):
+                        files_devel.add('%%{_libdir}/%s' % alias)
+                    else:
+                        files.add('%%{_libdir}/%s' % alias)
+                so_installed = True
+            elif isinstance(target, build.StaticLibrary) and target.need_install:
+                to_delete.add('%%{buildroot}%%{_libdir}/%s' % target.get_filename())
+                mlog.warning('removing', mlog.bold(target.get_filename()),
+                             'from package because packaging static libs not recommended')
+            elif isinstance(target, GirTarget) and target.should_install():
+                files_devel.add('%%{_datadir}/gir-1.0/%s' % target.get_filename()[0])
+            elif isinstance(target, TypelibTarget) and target.should_install():
+                files.add('%%{_libdir}/girepository-1.0/%s' % target.get_filename()[0])
+        for header in coredata.headers:
+            if header.get_install_subdir():
+                files_devel.add('%%{_includedir}/%s/' % header.get_install_subdir())
+            else:
+                for hdr_src in header.get_sources():
+                    files_devel.add('%%{_includedir}/%s' % hdr_src)
+        for man in coredata.man:
+            for man_file in man.get_sources():
+                files.add('%%{_mandir}/man%u/%s.*' % (int(man_file.split('.')[-1]), man_file))
+        if files_devel:
+            devel_subpkg = True
+
+        filename = os.path.join(coredata.environment.get_build_dir(),
+                                '%s.spec' % proj)
+        with open(filename, 'w+') as fn:
+            fn.write('Name: %s\n' % proj)
+            fn.write('Version: # FIXME\n')
+            fn.write('Release: 1%{?dist}\n')
+            fn.write('Summary: # FIXME\n')
+            fn.write('License: # FIXME\n')
+            fn.write('\n')
+            fn.write('Source0: %{name}-%{version}.tar.xz # FIXME\n')
+            fn.write('\n')
+            fn.write('BuildRequires: meson\n')
+            for compiler in required_compilers:
+                fn.write('BuildRequires: %s\n' % compiler)
+            for dep in coredata.environment.coredata.deps.host:
+                fn.write('BuildRequires: pkgconfig(%s)\n' % dep[0])
+#   ext_libs and ext_progs have been removed from coredata so the following code
+#   no longer works. It is kept as a reminder of the idea should anyone wish
+#   to re-implement it.
+#
+#            for lib in state.environment.coredata.ext_libs.values():
+#                name = lib.get_name()
+#                fn.write('BuildRequires: {} # FIXME\n'.format(name))
+#                mlog.warning('replace', mlog.bold(name), 'with the real package.',
+#                             'You can use following command to find package which '
+#                             'contains this lib:',
+#                             mlog.bold("dnf provides '*/lib{}.so'".format(name)))
+#            for prog in state.environment.coredata.ext_progs.values():
+#                if not prog.found():
+#                    fn.write('BuildRequires: %%{_bindir}/%s # FIXME\n' %
+#                             prog.get_name())
+#                else:
+#                    fn.write('BuildRequires: {}\n'.format(prog.get_path()))
+            fn.write('\n')
+            fn.write('%description\n')
+            fn.write('\n')
+            if devel_subpkg:
+                fn.write('%package devel\n')
+                fn.write('Summary: Development files for %{name}\n')
+                fn.write('Requires: %{name}%{?_isa} = %{?epoch:%{epoch}:}{version}-%{release}\n')
+                fn.write('\n')
+                fn.write('%description devel\n')
+                fn.write('Development files for %{name}.\n')
+                fn.write('\n')
+            fn.write('%prep\n')
+            fn.write('%autosetup\n')
+            fn.write('\n')
+            fn.write('%build\n')
+            fn.write('%meson\n')
+            fn.write('%meson_build\n')
+            fn.write('\n')
+            fn.write('%install\n')
+            fn.write('%meson_install\n')
+            if to_delete:
+                fn.write('rm -vf %s\n' % ' '.join(to_delete))
+            fn.write('\n')
+            fn.write('%check\n')
+            fn.write('%meson_test\n')
+            fn.write('\n')
+            fn.write('%files\n')
+            for f in files:
+                fn.write('%s\n' % f)
+            fn.write('\n')
+            if devel_subpkg:
+                fn.write('%files devel\n')
+                for f in files_devel:
+                    fn.write('%s\n' % f)
+                fn.write('\n')
+            if so_installed:
+                fn.write('%post -p /sbin/ldconfig\n')
+                fn.write('%postun -p /sbin/ldconfig\n')
+            fn.write('\n')
+            fn.write('%changelog\n')
+            fn.write('* %s meson  - \n' %
+                     datetime.date.today().strftime('%a %b %d %Y'))
+            fn.write('- \n')
+            fn.write('\n')
+        mlog.log('RPM spec template written to %s.spec.\n' % proj)
+        return ModuleReturnValue(None, [])
+
+    def __get_required_compilers(self):
+        required_compilers = set()
+        for compiler in self.coredata.environment.coredata.compilers.host.values():
+            # Elbrus has one 'lcc' package for every compiler
+            if isinstance(compiler, compilers.GnuCCompiler):
+                required_compilers.add('gcc')
+            elif isinstance(compiler, compilers.GnuCPPCompiler):
+                required_compilers.add('gcc-c++')
+            elif isinstance(compiler, compilers.ElbrusCCompiler):
+                required_compilers.add('lcc')
+            elif isinstance(compiler, compilers.ElbrusCPPCompiler):
+                required_compilers.add('lcc')
+            elif isinstance(compiler, compilers.ElbrusFortranCompiler):
+                required_compilers.add('lcc')
+            elif isinstance(compiler, compilers.ValaCompiler):
+                required_compilers.add('vala')
+            elif isinstance(compiler, compilers.GnuFortranCompiler):
+                required_compilers.add('gcc-gfortran')
+            elif isinstance(compiler, compilers.GnuObjCCompiler):
+                required_compilers.add('gcc-objc')
+            elif compiler == compilers.GnuObjCPPCompiler:
+                required_compilers.add('gcc-objc++')
+            else:
+                mlog.log('RPM spec file not created, generation not allowed for:',
+                         mlog.bold(compiler.get_id()))
+        return required_compilers
+
+
+def initialize(*args, **kwargs):
+    return RPMModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/sourceset.py b/meson/mesonbuild/modules/sourceset.py
new file mode 100644
index 000000000..e23e12ec4
--- /dev/null
+++ b/meson/mesonbuild/modules/sourceset.py
@@ -0,0 +1,200 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+from .. import mesonlib
+from ..mesonlib import listify
+from . import ExtensionModule
+from ..interpreterbase import (
+    noPosargs, noKwargs, permittedKwargs,
+    InterpreterObject, MutableInterpreterObject, ObjectHolder,
+    InterpreterException, InvalidArguments, InvalidCode, FeatureNew,
+)
+from ..interpreter import (
+    GeneratedListHolder, CustomTargetHolder,
+    CustomTargetIndexHolder
+)
+
+SourceSetRule = namedtuple('SourceSetRule', 'keys sources if_false sourcesets dependencies extra_deps')
+SourceFiles = namedtuple('SourceFiles', 'sources dependencies')
+
+class SourceSetHolder(MutableInterpreterObject, ObjectHolder):
+    def __init__(self, interpreter):
+        MutableInterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, list())
+        self.subproject = interpreter.subproject
+        self.environment = interpreter.environment
+        self.subdir = interpreter.subdir
+        self.frozen = False
+        self.methods.update({
+            'add': self.add_method,
+            'add_all': self.add_all_method,
+            'all_sources': self.all_sources_method,
+            'all_dependencies': self.all_dependencies_method,
+            'apply': self.apply_method,
+        })
+
+    def check_source_files(self, arg, allow_deps):
+        sources = []
+        deps = []
+        for x in arg:
+            if isinstance(x, (str, mesonlib.File,
+                              GeneratedListHolder, CustomTargetHolder,
+                              CustomTargetIndexHolder)):
+                sources.append(x)
+            elif hasattr(x, 'found'):
+                if not allow_deps:
+                    msg = 'Dependencies are not allowed in the if_false argument.'
+                    raise InvalidArguments(msg)
+                deps.append(x)
+            else:
+                msg = 'Sources must be strings or file-like objects.'
+                raise InvalidArguments(msg)
+        mesonlib.check_direntry_issues(sources)
+        return sources, deps
+
+    def check_conditions(self, arg):
+        keys = []
+        deps = []
+        for x in listify(arg):
+            if isinstance(x, str):
+                keys.append(x)
+            elif hasattr(x, 'found'):
+                deps.append(x)
+            else:
+                raise InvalidArguments('Conditions must be strings or dependency object')
+        return keys, deps
+
+    @permittedKwargs(['when', 'if_false', 'if_true'])
+    def add_method(self, args, kwargs):
+        if self.frozen:
+            raise InvalidCode('Tried to use \'add\' after querying the source set')
+        when = listify(kwargs.get('when', []))
+        if_true = listify(kwargs.get('if_true', []))
+        if_false = listify(kwargs.get('if_false', []))
+        if not when and not if_true and not if_false:
+            if_true = args
+        elif args:
+            raise InterpreterException('add called with both positional and keyword arguments')
+        keys, dependencies = self.check_conditions(when)
+        sources, extra_deps = self.check_source_files(if_true, True)
+        if_false, _ = self.check_source_files(if_false, False)
+        self.held_object.append(SourceSetRule(keys, sources, if_false, [], dependencies, extra_deps))
+
+    @permittedKwargs(['when', 'if_true'])
+    def add_all_method(self, args, kwargs):
+        if self.frozen:
+            raise InvalidCode('Tried to use \'add_all\' after querying the source set')
+        when = listify(kwargs.get('when', []))
+        if_true = listify(kwargs.get('if_true', []))
+        if not when and not if_true:
+            if_true = args
+        elif args:
+            raise InterpreterException('add_all called with both positional and keyword arguments')
+        keys, dependencies = self.check_conditions(when)
+        for s in if_true:
+            if not isinstance(s, SourceSetHolder):
+                raise InvalidCode('Arguments to \'add_all\' after the first must be source sets')
+            s.frozen = True
+        self.held_object.append(SourceSetRule(keys, [], [], if_true, dependencies, []))
+
+    def collect(self, enabled_fn, all_sources, into=None):
+        if not into:
+            into = SourceFiles(set(), set())
+        for entry in self.held_object:
+            if all(x.found() for x in entry.dependencies) and \
+               all(enabled_fn(key) for key in entry.keys):
+                into.sources.update(entry.sources)
+                into.dependencies.update(entry.dependencies)
+                into.dependencies.update(entry.extra_deps)
+                for ss in entry.sourcesets:
+                    ss.collect(enabled_fn, all_sources, into)
+                if not all_sources:
+                    continue
+            into.sources.update(entry.if_false)
+        return into
+
+    @noKwargs
+    @noPosargs
+    def all_sources_method(self, args, kwargs):
+        self.frozen = True
+        files = self.collect(lambda x: True, True)
+        return list(files.sources)
+
+    @noKwargs
+    @noPosargs
+    @FeatureNew('source_set.all_dependencies() method', '0.52.0')
+    def all_dependencies_method(self, args, kwargs):
+        self.frozen = True
+        files = self.collect(lambda x: True, True)
+        return list(files.dependencies)
+
+    @permittedKwargs(['strict'])
+    def apply_method(self, args, kwargs):
+        if len(args) != 1:
+            raise InterpreterException('Apply takes exactly one argument')
+        config_data = args[0]
+        self.frozen = True
+        strict = kwargs.get('strict', True)
+        if isinstance(config_data, dict):
+            def _get_from_config_data(key):
+                if strict and key not in config_data:
+                    raise InterpreterException('Entry {} not in configuration dictionary.'.format(key))
+                return config_data.get(key, False)
+        else:
+            config_cache = dict()
+
+            def _get_from_config_data(key):
+                nonlocal config_cache
+                if key not in config_cache:
+                    args = [key] if strict else [key, False]
+                    config_cache[key] = config_data.get_method(args, {})
+                return config_cache[key]
+
+        files = self.collect(_get_from_config_data, False)
+        res = SourceFilesHolder(files)
+        return res
+
+class SourceFilesHolder(InterpreterObject, ObjectHolder):
+    def __init__(self, files):
+        InterpreterObject.__init__(self)
+        ObjectHolder.__init__(self, files)
+        self.methods.update({
+            'sources': self.sources_method,
+            'dependencies': self.dependencies_method,
+        })
+
+    @noPosargs
+    @noKwargs
+    def sources_method(self, args, kwargs):
+        return list(self.held_object.sources)
+
+    @noPosargs
+    @noKwargs
+    def dependencies_method(self, args, kwargs):
+        return list(self.held_object.dependencies)
+
+class SourceSetModule(ExtensionModule):
+    @FeatureNew('SourceSet module', '0.51.0')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.snippets.add('source_set')
+
+    @noKwargs
+    @noPosargs
+    def source_set(self, interpreter, state, args, kwargs):
+        return SourceSetHolder(interpreter)
+
+def initialize(*args, **kwargs):
+    return SourceSetModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/unstable_cuda.py b/meson/mesonbuild/modules/unstable_cuda.py
new file mode 100644
index 000000000..0d693c3a0
--- /dev/null
+++ b/meson/mesonbuild/modules/unstable_cuda.py
@@ -0,0 +1,271 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+from ..mesonlib import version_compare
+from ..interpreter import CompilerHolder
+from ..compilers import CudaCompiler
+
+from . import ExtensionModule, ModuleReturnValue
+
+from ..interpreterbase import (
+    flatten, permittedKwargs, noKwargs,
+    InvalidArguments, FeatureNew
+)
+
+class CudaModule(ExtensionModule):
+
+    @FeatureNew('CUDA module', '0.50.0')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @noKwargs
+    def min_driver_version(self, state, args, kwargs):
+        argerror = InvalidArguments('min_driver_version must have exactly one positional argument: ' +
+                                    'an NVCC compiler object, or its version string.')
+
+        if len(args) != 1:
+            raise argerror
+        else:
+            cuda_version = self._version_from_compiler(args[0])
+            if cuda_version == 'unknown':
+                raise argerror
+
+        driver_version_table = [
+            {'cuda_version': '>=10.2.89',  'windows': '441.22', 'linux': '440.33'},
+            {'cuda_version': '>=10.1.105', 'windows': '418.96', 'linux': '418.39'},
+            {'cuda_version': '>=10.0.130', 'windows': '411.31', 'linux': '410.48'},
+            {'cuda_version': '>=9.2.148',  'windows': '398.26', 'linux': '396.37'},
+            {'cuda_version': '>=9.2.88',   'windows': '397.44', 'linux': '396.26'},
+            {'cuda_version': '>=9.1.85',   'windows': '391.29', 'linux': '390.46'},
+            {'cuda_version': '>=9.0.76',   'windows': '385.54', 'linux': '384.81'},
+            {'cuda_version': '>=8.0.61',   'windows': '376.51', 'linux': '375.26'},
+            {'cuda_version': '>=8.0.44',   'windows': '369.30', 'linux': '367.48'},
+            {'cuda_version': '>=7.5.16',   'windows': '353.66', 'linux': '352.31'},
+            {'cuda_version': '>=7.0.28',   'windows': '347.62', 'linux': '346.46'},
+        ]
+
+        driver_version = 'unknown'
+        for d in driver_version_table:
+            if version_compare(cuda_version, d['cuda_version']):
+                driver_version = d.get(state.host_machine.system, d['linux'])
+                break
+
+        return ModuleReturnValue(driver_version, [driver_version])
+
+    @permittedKwargs(['detected'])
+    def nvcc_arch_flags(self, state, args, kwargs):
+        nvcc_arch_args = self._validate_nvcc_arch_args(state, args, kwargs)
+        ret = self._nvcc_arch_flags(*nvcc_arch_args)[0]
+        return ModuleReturnValue(ret, [ret])
+
+    @permittedKwargs(['detected'])
+    def nvcc_arch_readable(self, state, args, kwargs):
+        nvcc_arch_args = self._validate_nvcc_arch_args(state, args, kwargs)
+        ret = self._nvcc_arch_flags(*nvcc_arch_args)[1]
+        return ModuleReturnValue(ret, [ret])
+
+    @staticmethod
+    def _break_arch_string(s):
+        s = re.sub('[ \t\r\n,;]+', ';', s)
+        s = s.strip(';').split(';')
+        return s
+
+    @staticmethod
+    def _detected_cc_from_compiler(c):
+        if isinstance(c, CompilerHolder):
+            c = c.compiler
+        if isinstance(c, CudaCompiler):
+            return c.detected_cc
+        return ''
+
+    @staticmethod
+    def _version_from_compiler(c):
+        if isinstance(c, CompilerHolder):
+            c = c.compiler
+        if isinstance(c, CudaCompiler):
+            return c.version
+        if isinstance(c, str):
+            return c
+        return 'unknown'
+
+    def _validate_nvcc_arch_args(self, state, args, kwargs):
+        argerror = InvalidArguments('The first argument must be an NVCC compiler object, or its version string!')
+
+        if len(args) < 1:
+            raise argerror
+        else:
+            compiler = args[0]
+            cuda_version = self._version_from_compiler(compiler)
+            if cuda_version == 'unknown':
+                raise argerror
+
+        arch_list = [] if len(args) <= 1 else flatten(args[1:])
+        arch_list = [self._break_arch_string(a) for a in arch_list]
+        arch_list = flatten(arch_list)
+        if len(arch_list) > 1 and not set(arch_list).isdisjoint({'All', 'Common', 'Auto'}):
+            raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
+        arch_list = arch_list[0] if len(arch_list) == 1 else arch_list
+
+        detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler))
+        detected = flatten([detected])
+        detected = [self._break_arch_string(a) for a in detected]
+        detected = flatten(detected)
+        if not set(detected).isdisjoint({'All', 'Common', 'Auto'}):
+            raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
+
+        return cuda_version, arch_list, detected
+
+    def _nvcc_arch_flags(self, cuda_version, cuda_arch_list='Auto', detected=''):
+        """
+        Using the CUDA Toolkit version (the NVCC version) and the target
+        architectures, compute the NVCC architecture flags.
+        """
+
+        cuda_known_gpu_architectures  = ['Fermi', 'Kepler', 'Maxwell']  # noqa: E221
+        cuda_common_gpu_architectures = ['3.0', '3.5', '5.0']           # noqa: E221
+        cuda_limit_gpu_architecture   = None                            # noqa: E221
+        cuda_all_gpu_architectures    = ['3.0', '3.2', '3.5', '5.0']    # noqa: E221
+
+        if version_compare(cuda_version, '<7.0'):
+            cuda_limit_gpu_architecture = '5.2'
+
+        if version_compare(cuda_version, '>=7.0'):
+            cuda_known_gpu_architectures  += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra']  # noqa: E221
+            cuda_common_gpu_architectures += ['5.2']                                            # noqa: E221
+
+            if version_compare(cuda_version, '<8.0'):
+                cuda_common_gpu_architectures += ['5.2+PTX']  # noqa: E221
+                cuda_limit_gpu_architecture    = '6.0'        # noqa: E221
+
+        if version_compare(cuda_version, '>=8.0'):
+            cuda_known_gpu_architectures  += ['Pascal', 'Pascal+Tegra']  # noqa: E221
+            cuda_common_gpu_architectures += ['6.0', '6.1']              # noqa: E221
+            cuda_all_gpu_architectures    += ['6.0', '6.1', '6.2']       # noqa: E221
+
+            if version_compare(cuda_version, '<9.0'):
+                cuda_common_gpu_architectures += ['6.1+PTX']  # noqa: E221
+                cuda_limit_gpu_architecture    = '7.0'        # noqa: E221
+
+        if version_compare(cuda_version, '>=9.0'):
+            cuda_known_gpu_architectures  += ['Volta', 'Xavier']                   # noqa: E221
+            cuda_common_gpu_architectures += ['7.0', '7.0+PTX']                    # noqa: E221
+            cuda_all_gpu_architectures    += ['7.0', '7.0+PTX', '7.2', '7.2+PTX']  # noqa: E221
+
+            if version_compare(cuda_version, '<10.0'):
+                cuda_limit_gpu_architecture = '7.5'
+
+        if version_compare(cuda_version, '>=10.0'):
+            cuda_known_gpu_architectures  += ['Turing']          # noqa: E221
+            cuda_common_gpu_architectures += ['7.5', '7.5+PTX']  # noqa: E221
+            cuda_all_gpu_architectures    += ['7.5', '7.5+PTX']  # noqa: E221
+
+            if version_compare(cuda_version, '<11.0'):
+                cuda_limit_gpu_architecture = '8.0'
+
+        if not cuda_arch_list:
+            cuda_arch_list = 'Auto'
+
+        if   cuda_arch_list == 'All':     # noqa: E271
+            cuda_arch_list = cuda_known_gpu_architectures
+        elif cuda_arch_list == 'Common':  # noqa: E271
+            cuda_arch_list = cuda_common_gpu_architectures
+        elif cuda_arch_list == 'Auto':    # noqa: E271
+            if detected:
+                if isinstance(detected, list):
+                    cuda_arch_list = detected
+                else:
+                    cuda_arch_list = self._break_arch_string(detected)
+
+                if cuda_limit_gpu_architecture:
+                    filtered_cuda_arch_list = []
+                    for arch in cuda_arch_list:
+                        if arch:
+                            if version_compare(arch, '>=' + cuda_limit_gpu_architecture):
+                                arch = cuda_common_gpu_architectures[-1]
+                            if arch not in filtered_cuda_arch_list:
+                                filtered_cuda_arch_list.append(arch)
+                    cuda_arch_list = filtered_cuda_arch_list
+            else:
+                cuda_arch_list = cuda_common_gpu_architectures
+        elif isinstance(cuda_arch_list, str):
+            cuda_arch_list = self._break_arch_string(cuda_arch_list)
+
+        cuda_arch_list = sorted([x for x in set(cuda_arch_list) if x])
+
+        cuda_arch_bin = []
+        cuda_arch_ptx = []
+        for arch_name in cuda_arch_list:
+            arch_bin = []
+            arch_ptx = []
+            add_ptx = arch_name.endswith('+PTX')
+            if add_ptx:
+                arch_name = arch_name[:-len('+PTX')]
+
+            if re.fullmatch('[0-9]+\\.[0-9](\\([0-9]+\\.[0-9]\\))?', arch_name):
+                arch_bin, arch_ptx = [arch_name], [arch_name]
+            else:
+                arch_bin, arch_ptx = {
+                    'Fermi':         (['2.0', '2.1(2.0)'], []),
+                    'Kepler+Tegra':  (['3.2'],             []),
+                    'Kepler+Tesla':  (['3.7'],             []),
+                    'Kepler':        (['3.0', '3.5'],      ['3.5']),
+                    'Maxwell+Tegra': (['5.3'],             []),
+                    'Maxwell':       (['5.0', '5.2'],      ['5.2']),
+                    'Pascal':        (['6.0', '6.1'],      ['6.1']),
+                    'Pascal+Tegra':  (['6.2'],             []),
+                    'Volta':         (['7.0'],             ['7.0']),
+                    'Xavier':        (['7.2'],             []),
+                    'Turing':        (['7.5'],             ['7.5']),
+                }.get(arch_name, (None, None))
+
+            if arch_bin is None:
+                raise InvalidArguments('Unknown CUDA Architecture Name {}!'
+                                       .format(arch_name))
+
+            cuda_arch_bin += arch_bin
+
+            if add_ptx:
+                if not arch_ptx:
+                    arch_ptx = arch_bin
+                cuda_arch_ptx += arch_ptx
+
+        cuda_arch_bin = re.sub('\\.', '', ' '.join(cuda_arch_bin))
+        cuda_arch_ptx = re.sub('\\.', '', ' '.join(cuda_arch_ptx))
+        cuda_arch_bin = re.findall('[0-9()]+', cuda_arch_bin)
+        cuda_arch_ptx = re.findall('[0-9]+',   cuda_arch_ptx)
+        cuda_arch_bin = sorted(list(set(cuda_arch_bin)))
+        cuda_arch_ptx = sorted(list(set(cuda_arch_ptx)))
+
+        nvcc_flags = []
+        nvcc_archs_readable = []
+
+        for arch in cuda_arch_bin:
+            m = re.match('([0-9]+)\\(([0-9]+)\\)', arch)
+            if m:
+                nvcc_flags += ['-gencode', 'arch=compute_' + m[2] + ',code=sm_' + m[1]]
+                nvcc_archs_readable += ['sm_' + m[1]]
+            else:
+                nvcc_flags += ['-gencode', 'arch=compute_' + arch + ',code=sm_' + arch]
+                nvcc_archs_readable += ['sm_' + arch]
+
+        for arch in cuda_arch_ptx:
+            nvcc_flags += ['-gencode', 'arch=compute_' + arch + ',code=compute_' + arch]
+            nvcc_archs_readable += ['compute_' + arch]
+
+        return nvcc_flags, nvcc_archs_readable
+
+def initialize(*args, **kwargs):
+    return CudaModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/unstable_icestorm.py b/meson/mesonbuild/modules/unstable_icestorm.py
new file mode 100644
index 000000000..268c39480
--- /dev/null
+++ b/meson/mesonbuild/modules/unstable_icestorm.py
@@ -0,0 +1,87 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import mesonlib
+from ..interpreterbase import flatten
+from ..interpreterbase import FeatureNew
+
+from . import ExtensionModule
+
+class IceStormModule(ExtensionModule):
+
+    @FeatureNew('FPGA/Icestorm Module', '0.45.0')
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.snippets.add('project')
+        self.yosys_bin = None
+
+    def detect_binaries(self, interpreter):
+        self.yosys_bin = interpreter.find_program_impl(['yosys'])
+        self.arachne_bin = interpreter.find_program_impl(['arachne-pnr'])
+        self.icepack_bin = interpreter.find_program_impl(['icepack'])
+        self.iceprog_bin = interpreter.find_program_impl(['iceprog'])
+        self.icetime_bin = interpreter.find_program_impl(['icetime'])
+
+    def project(self, interpreter, state, args, kwargs):
+        if not self.yosys_bin:
+            self.detect_binaries(interpreter)
+        if not args:
+            raise mesonlib.MesonException('Project requires at least one argument, which is the project name.')
+        proj_name = args[0]
+        arg_sources = args[1:]
+        if not isinstance(proj_name, str):
+            raise mesonlib.MesonException('Argument must be a string.')
+        kwarg_sources = kwargs.get('sources', [])
+        if not isinstance(kwarg_sources, list):
+            kwarg_sources = [kwarg_sources]
+        all_sources = interpreter.source_strings_to_files(flatten(arg_sources + kwarg_sources))
+        if 'constraint_file' not in kwargs:
+            raise mesonlib.MesonException('Constraint file not specified.')
+
+        constraint_file = interpreter.source_strings_to_files(kwargs['constraint_file'])
+        if len(constraint_file) != 1:
+            raise mesonlib.MesonException('Constraint file must contain one and only one entry.')
+        blif_name = proj_name + '_blif'
+        blif_fname = proj_name + '.blif'
+        asc_name = proj_name + '_asc'
+        asc_fname = proj_name + '.asc'
+        bin_name = proj_name + '_bin'
+        bin_fname = proj_name + '.bin'
+        time_name = proj_name + '-time'
+        upload_name = proj_name + '-upload'
+
+        blif_target = interpreter.func_custom_target(None, [blif_name], {
+            'input': all_sources,
+            'output': blif_fname,
+            'command': [self.yosys_bin, '-q', '-p', 'synth_ice40 -blif @OUTPUT@', '@INPUT@']})
+
+        asc_target = interpreter.func_custom_target(None, [asc_name], {
+            'input': blif_target,
+            'output': asc_fname,
+            'command': [self.arachne_bin, '-q', '-d', '1k', '-p', constraint_file, '@INPUT@', '-o', '@OUTPUT@']})
+
+        bin_target = interpreter.func_custom_target(None, [bin_name], {
+            'input': asc_target,
+            'output': bin_fname,
+            'command': [self.icepack_bin, '@INPUT@', '@OUTPUT@'],
+            'build_by_default': True})
+
+        interpreter.func_run_target(None, [upload_name], {
+            'command': [self.iceprog_bin, bin_target]})
+
+        interpreter.func_run_target(None, [time_name], {
+            'command': [self.icetime_bin, bin_target]})
+
+def initialize(*args, **kwargs):
+    return IceStormModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/unstable_keyval.py b/meson/mesonbuild/modules/unstable_keyval.py
new file mode 100644
index 000000000..3da299297
--- /dev/null
+++ b/meson/mesonbuild/modules/unstable_keyval.py
@@ -0,0 +1,71 @@
+# Copyright 2017, 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import ExtensionModule
+
+from .. import mesonlib
+from ..mesonlib import typeslistify
+from ..interpreterbase import FeatureNew, noKwargs
+from ..interpreter import InvalidCode
+
+import os
+
+class KeyvalModule(ExtensionModule):
+
+    @FeatureNew('Keyval Module', '0.55.0')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.snippets.add('load')
+
+    def _load_file(self, path_to_config):
+        result = dict()
+        try:
+            with open(path_to_config) as f:
+                for line in f:
+                    if '#' in line:
+                        comment_idx = line.index('#')
+                        line = line[:comment_idx]
+                    line = line.strip()
+                    try:
+                        name, val = line.split('=', 1)
+                    except ValueError:
+                        continue
+                    result[name.strip()] = val.strip()
+        except IOError as e:
+            raise mesonlib.MesonException('Failed to load {}: {}'.format(path_to_config, e))
+
+        return result
+
+    @noKwargs
+    def load(self, interpreter, state, args, kwargs):
+        sources = typeslistify(args, (str, mesonlib.File))
+        if len(sources) != 1:
+            raise InvalidCode('load takes only one file input.')
+
+        s = sources[0]
+        is_built = False
+        if isinstance(s, mesonlib.File):
+            is_built = is_built or s.is_built
+            s = s.absolute_path(interpreter.environment.source_dir, interpreter.environment.build_dir)
+        else:
+            s = os.path.join(interpreter.environment.source_dir, s)
+
+        if s not in interpreter.build_def_files and not is_built:
+            interpreter.build_def_files.append(s)
+
+        return self._load_file(s)
+
+
+def initialize(*args, **kwargs):
+    return KeyvalModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/unstable_simd.py b/meson/mesonbuild/modules/unstable_simd.py
new file mode 100644
index 000000000..4c066fb91
--- /dev/null
+++ b/meson/mesonbuild/modules/unstable_simd.py
@@ -0,0 +1,86 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import mesonlib, compilers, mlog
+
+from . import ExtensionModule
+
+from ..interpreterbase import FeatureNew
+
+class SimdModule(ExtensionModule):
+
+    @FeatureNew('SIMD module', '0.42.0')
+    def __init__(self, interpreter):
+        super().__init__(interpreter)
+        self.snippets.add('check')
+        # FIXME add Altivec and AVX512.
+        self.isets = ('mmx',
+                      'sse',
+                      'sse2',
+                      'sse3',
+                      'ssse3',
+                      'sse41',
+                      'sse42',
+                      'avx',
+                      'avx2',
+                      'neon',
+                      )
+
+    def check(self, interpreter, state, args, kwargs):
+        result = []
+        if len(args) != 1:
+            raise mesonlib.MesonException('Check requires one argument, a name prefix for checks.')
+        prefix = args[0]
+        if not isinstance(prefix, str):
+            raise mesonlib.MesonException('Argument must be a string.')
+        if 'compiler' not in kwargs:
+            raise mesonlib.MesonException('Must specify compiler keyword')
+        if 'sources' in kwargs:
+            raise mesonlib.MesonException('SIMD module does not support the "sources" keyword')
+        basic_kwargs = {}
+        for key, value in kwargs.items():
+            if key not in self.isets and key != 'compiler':
+                basic_kwargs[key] = value
+        compiler = kwargs['compiler'].compiler
+        if not isinstance(compiler, compilers.compilers.Compiler):
+            raise mesonlib.MesonException('Compiler argument must be a compiler object.')
+        cdata = interpreter.func_configuration_data(None, [], {})
+        conf = cdata.held_object
+        for iset in self.isets:
+            if iset not in kwargs:
+                continue
+            iset_fname = kwargs[iset] # Might also be an array or Files. static_library will validate.
+            args = compiler.get_instruction_set_args(iset)
+            if args is None:
+                mlog.log('Compiler supports %s:' % iset, mlog.red('NO'))
+                continue
+            if args:
+                if not compiler.has_multi_arguments(args, state.environment)[0]:
+                    mlog.log('Compiler supports %s:' % iset, mlog.red('NO'))
+                    continue
+            mlog.log('Compiler supports %s:' % iset, mlog.green('YES'))
+            conf.values['HAVE_' + iset.upper()] = ('1', 'Compiler supports %s.' % iset)
+            libname = prefix + '_' + iset
+            lib_kwargs = {'sources': iset_fname,
+                          }
+            lib_kwargs.update(basic_kwargs)
+            langarg_key = compiler.get_language() + '_args'
+            old_lang_args = mesonlib.extract_as_list(lib_kwargs, langarg_key)
+            all_lang_args = old_lang_args + args
+            lib_kwargs[langarg_key] = all_lang_args
+            result.append(interpreter.func_static_lib(None, [libname], lib_kwargs))
+        return [result, cdata]
+
+def initialize(*args, **kwargs):
+    return SimdModule(*args, **kwargs)
diff --git a/meson/mesonbuild/modules/windows.py b/meson/mesonbuild/modules/windows.py
new file mode 100644
index 000000000..605070542
--- /dev/null
+++ b/meson/mesonbuild/modules/windows.py
@@ -0,0 +1,159 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import enum
+import os
+import re
+
+from .. import mlog
+from .. import mesonlib, build
+from ..mesonlib import MachineChoice, MesonException, extract_as_list, unholder
+from . import get_include_args
+from . import ModuleReturnValue
+from . import ExtensionModule
+from ..interpreter import CustomTargetHolder
+from ..interpreterbase import permittedKwargs, FeatureNewKwargs, flatten
+from ..dependencies import ExternalProgram
+
+class ResourceCompilerType(enum.Enum):
+    windres = 1
+    rc = 2
+
+class WindowsModule(ExtensionModule):
+
+    def detect_compiler(self, compilers):
+        for l in ('c', 'cpp'):
+            if l in compilers:
+                return compilers[l]
+        raise MesonException('Resource compilation requires a C or C++ compiler.')
+
+    def _find_resource_compiler(self, state):
+        # FIXME: Does not handle `native: true` executables, see
+        # See https://github.com/mesonbuild/meson/issues/1531
+        # Take a parameter instead of the hardcoded definition below
+        for_machine = MachineChoice.HOST
+
+        if hasattr(self, '_rescomp'):
+            return self._rescomp
+
+        # Will try cross / native file and then env var
+        rescomp = ExternalProgram.from_bin_list(state.environment, for_machine, 'windres')
+
+        if not rescomp or not rescomp.found():
+            comp = self.detect_compiler(state.environment.coredata.compilers[for_machine])
+            if comp.id in {'msvc', 'clang-cl', 'intel-cl'}:
+                rescomp = ExternalProgram('rc', silent=True)
+            else:
+                rescomp = ExternalProgram('windres', silent=True)
+
+        if not rescomp.found():
+            raise MesonException('Could not find Windows resource compiler')
+
+        for (arg, match, rc_type) in [
+                ('/?', '^.*Microsoft.*Resource Compiler.*$', ResourceCompilerType.rc),
+                ('--version', '^.*GNU windres.*$', ResourceCompilerType.windres),
+        ]:
+            p, o, e = mesonlib.Popen_safe(rescomp.get_command() + [arg])
+            m = re.search(match, o, re.MULTILINE)
+            if m:
+                mlog.log('Windows resource compiler: %s' % m.group())
+                self._rescomp = (rescomp, rc_type)
+                break
+        else:
+            raise MesonException('Could not determine type of Windows resource compiler')
+
+        return self._rescomp
+
+    @FeatureNewKwargs('windows.compile_resources', '0.47.0', ['depend_files', 'depends'])
+    @permittedKwargs({'args', 'include_directories', 'depend_files', 'depends'})
+    def compile_resources(self, state, args, kwargs):
+        extra_args = mesonlib.stringlistify(flatten(kwargs.get('args', [])))
+        wrc_depend_files = extract_as_list(kwargs, 'depend_files', pop = True)
+        wrc_depends = extract_as_list(kwargs, 'depends', pop = True)
+        for d in wrc_depends:
+            if isinstance(d, CustomTargetHolder):
+                extra_args += get_include_args([d.outdir_include()])
+        inc_dirs = extract_as_list(kwargs, 'include_directories', pop = True)
+        for incd in inc_dirs:
+            if not isinstance(incd.held_object, (str, build.IncludeDirs)):
+                raise MesonException('Resource include dirs should be include_directories().')
+        extra_args += get_include_args(inc_dirs)
+
+        rescomp, rescomp_type = self._find_resource_compiler(state)
+        if rescomp_type == ResourceCompilerType.rc:
+            # RC is used to generate .res files, a special binary resource
+            # format, which can be passed directly to LINK (apparently LINK uses
+            # CVTRES internally to convert this to a COFF object)
+            suffix = 'res'
+            res_args = extra_args + ['/nologo', '/fo@OUTPUT@', '@INPUT@']
+        else:
+            # ld only supports object files, so windres is used to generate a
+            # COFF object
+            suffix = 'o'
+            res_args = extra_args + ['@INPUT@', '@OUTPUT@']
+
+            m = 'Argument {!r} has a space which may not work with windres due to ' \
+                'a MinGW bug: https://sourceware.org/bugzilla/show_bug.cgi?id=4933'
+            for arg in extra_args:
+                if ' ' in arg:
+                    mlog.warning(m.format(arg), fatal=False)
+
+        res_targets = []
+
+        def add_target(src):
+            if isinstance(src, list):
+                for subsrc in src:
+                    add_target(subsrc)
+                return
+            src = unholder(src)
+
+            if isinstance(src, str):
+                name_format = 'file {!r}'
+                name = os.path.join(state.subdir, src)
+            elif isinstance(src, mesonlib.File):
+                name_format = 'file {!r}'
+                name = src.relative_name()
+            elif isinstance(src, build.CustomTarget):
+                if len(src.get_outputs()) > 1:
+                    raise MesonException('windows.compile_resources does not accept custom targets with more than 1 output.')
+
+                name_format = 'target {!r}'
+                name = src.get_id()
+            else:
+                raise MesonException('Unexpected source type {!r}. windows.compile_resources accepts only strings, files, custom targets, and lists thereof.'.format(src))
+
+            # Path separators are not allowed in target names
+            name = name.replace('/', '_').replace('\\', '_')
+
+            res_kwargs = {
+                'output': name + '_@BASENAME@.' + suffix,
+                'input': [src],
+                'command': [rescomp] + res_args,
+                'depend_files': wrc_depend_files,
+                'depends': wrc_depends,
+            }
+
+            # instruct binutils windres to generate a preprocessor depfile
+            if rescomp_type == ResourceCompilerType.windres:
+                res_kwargs['depfile'] = res_kwargs['output'] + '.d'
+                res_kwargs['command'] += ['--preprocessor-arg=-MD', '--preprocessor-arg=-MQ@OUTPUT@', '--preprocessor-arg=-MF@DEPFILE@']
+
+            res_targets.append(build.CustomTarget('Windows resource for ' + name_format.format(name), state.subdir, state.subproject, res_kwargs))
+
+        add_target(args)
+
+        return ModuleReturnValue(res_targets, [res_targets])
+
+def initialize(*args, **kwargs):
+    return WindowsModule(*args, **kwargs)
diff --git a/meson/mesonbuild/mparser.py b/meson/mesonbuild/mparser.py
new file mode 100644
index 000000000..b9e381e98
--- /dev/null
+++ b/meson/mesonbuild/mparser.py
@@ -0,0 +1,792 @@
+# Copyright 2014-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import codecs
+import textwrap
+import types
+import typing as T
+from .mesonlib import MesonException
+from . import mlog
+
+if T.TYPE_CHECKING:
+    from .ast import AstVisitor
+
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+    ( \\U[A-Fa-f0-9]{8}   # 8-digit hex escapes
+    | \\u[A-Fa-f0-9]{4}   # 4-digit hex escapes
+    | \\x[A-Fa-f0-9]{2}   # 2-digit hex escapes
+    | \\[0-7]{1,3}        # Octal escapes
+    | \\N\{[^}]+\}        # Unicode characters by name
+    | \\[\\'abfnrtv]      # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+class MesonUnicodeDecodeError(MesonException):
+    def __init__(self, match):
+        super().__init__("%s" % match)
+        self.match = match
+
+def decode_match(match):
+    try:
+        return codecs.decode(match.group(0), 'unicode_escape')
+    except UnicodeDecodeError:
+        raise MesonUnicodeDecodeError(match.group(0))
+
+class ParseException(MesonException):
+    def __init__(self, text, line, lineno, colno):
+        # Format as error message, followed by the line with the error, followed by a caret to show the error column.
+        super().__init__("%s\n%s\n%s" % (text, line, '%s^' % (' ' * colno)))
+        self.lineno = lineno
+        self.colno = colno
+
+class BlockParseException(MesonException):
+    def __init__(self, text, line, lineno, colno, start_line, start_lineno, start_colno):
+        # This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines.
+
+        if lineno == start_lineno:
+            # If block start and end are on the same line, it is formatted as:
+            # Error message
+            # Followed by the line with the error
+            # Followed by a caret to show the block start
+            # Followed by underscores
+            # Followed by a caret to show the block end.
+            super().__init__("%s\n%s\n%s" % (text, line, '%s^%s^' % (' ' * start_colno, '_' * (colno - start_colno - 1))))
+        else:
+            # If block start and end are on different lines, it is formatted as:
+            # Error message
+            # Followed by the line with the error
+            # Followed by a caret to show the error column.
+            # Followed by a message saying where the block started.
+            # Followed by the line of the block start.
+            # Followed by a caret for the block start.
+            super().__init__("%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno)))
+        self.lineno = lineno
+        self.colno = colno
+
+TV_TokenTypes = T.TypeVar('TV_TokenTypes', int, str, bool)
+
+class Token(T.Generic[TV_TokenTypes]):
+    def __init__(self, tid: str, filename: str, line_start: int, lineno: int, colno: int, bytespan: T.Tuple[int, int], value: TV_TokenTypes):
+        self.tid = tid                # type: str
+        self.filename = filename      # type: str
+        self.line_start = line_start  # type: int
+        self.lineno = lineno          # type: int
+        self.colno = colno            # type: int
+        self.bytespan = bytespan      # type: T.Tuple[int, int]
+        self.value = value            # type: TV_TokenTypes
+
+    def __eq__(self, other) -> bool:
+        if isinstance(other, str):
+            return self.tid == other
+        return self.tid == other.tid
+
+class Lexer:
+    def __init__(self, code: str):
+        self.code = code
+        self.keywords = {'true', 'false', 'if', 'else', 'elif',
+                         'endif', 'and', 'or', 'not', 'foreach', 'endforeach',
+                         'in', 'continue', 'break'}
+        self.future_keywords = {'return'}
+        self.token_specification = [
+            # Need to be sorted longest to shortest.
+            ('ignore', re.compile(r'[ \t]')),
+            ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
+            ('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')),
+            ('eol_cont', re.compile(r'\\\n')),
+            ('eol', re.compile(r'\n')),
+            ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)),
+            ('comment', re.compile(r'#.*')),
+            ('lparen', re.compile(r'\(')),
+            ('rparen', re.compile(r'\)')),
+            ('lbracket', re.compile(r'\[')),
+            ('rbracket', re.compile(r'\]')),
+            ('lcurl', re.compile(r'\{')),
+            ('rcurl', re.compile(r'\}')),
+            ('dblquote', re.compile(r'"')),
+            ('string', re.compile(r"'([^'\\]|(\\.))*'")),
+            ('comma', re.compile(r',')),
+            ('plusassign', re.compile(r'\+=')),
+            ('dot', re.compile(r'\.')),
+            ('plus', re.compile(r'\+')),
+            ('dash', re.compile(r'-')),
+            ('star', re.compile(r'\*')),
+            ('percent', re.compile(r'%')),
+            ('fslash', re.compile(r'/')),
+            ('colon', re.compile(r':')),
+            ('equal', re.compile(r'==')),
+            ('nequal', re.compile(r'!=')),
+            ('assign', re.compile(r'=')),
+            ('le', re.compile(r'<=')),
+            ('lt', re.compile(r'<')),
+            ('ge', re.compile(r'>=')),
+            ('gt', re.compile(r'>')),
+            ('questionmark', re.compile(r'\?')),
+        ]
+
+    def getline(self, line_start: int) -> str:
+        return self.code[line_start:self.code.find('\n', line_start)]
+
+    def lex(self, filename: str) -> T.Generator[Token, None, None]:
+        line_start = 0
+        lineno = 1
+        loc = 0
+        par_count = 0
+        bracket_count = 0
+        curl_count = 0
+        col = 0
+        while loc < len(self.code):
+            matched = False
+            value = None  # type: T.Union[str, bool, int]
+            for (tid, reg) in self.token_specification:
+                mo = reg.match(self.code, loc)
+                if mo:
+                    curline = lineno
+                    curline_start = line_start
+                    col = mo.start() - line_start
+                    matched = True
+                    span_start = loc
+                    loc = mo.end()
+                    span_end = loc
+                    bytespan = (span_start, span_end)
+                    match_text = mo.group()
+                    if tid == 'ignore' or tid == 'comment':
+                        break
+                    elif tid == 'lparen':
+                        par_count += 1
+                    elif tid == 'rparen':
+                        par_count -= 1
+                    elif tid == 'lbracket':
+                        bracket_count += 1
+                    elif tid == 'rbracket':
+                        bracket_count -= 1
+                    elif tid == 'lcurl':
+                        curl_count += 1
+                    elif tid == 'rcurl':
+                        curl_count -= 1
+                    elif tid == 'dblquote':
+                        raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col)
+                    elif tid == 'string':
+                        # Handle here and not on the regexp to give a better error message.
+                        if match_text.find("\n") != -1:
+                            mlog.warning(textwrap.dedent("""\
+                                    Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
+                                    This will become a hard error in a future Meson release.\
+                                """),
+                                self.getline(line_start),
+                                str(lineno),
+                                str(col)
+                            )
+                        value = match_text[1:-1]
+                        try:
+                            value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
+                        except MesonUnicodeDecodeError as err:
+                            raise MesonException("Failed to parse escape sequence: '{}' in string:\n  {}".format(err.match, match_text))
+                    elif tid == 'multiline_string':
+                        tid = 'string'
+                        value = match_text[3:-3]
+                        lines = match_text.split('\n')
+                        if len(lines) > 1:
+                            lineno += len(lines) - 1
+                            line_start = mo.end() - len(lines[-1])
+                    elif tid == 'number':
+                        value = int(match_text, base=0)
+                    elif tid == 'eol_cont':
+                        lineno += 1
+                        line_start = loc
+                        break
+                    elif tid == 'eol':
+                        lineno += 1
+                        line_start = loc
+                        if par_count > 0 or bracket_count > 0 or curl_count > 0:
+                            break
+                    elif tid == 'id':
+                        if match_text in self.keywords:
+                            tid = match_text
+                        else:
+                            if match_text in self.future_keywords:
+                                mlog.warning("Identifier '{}' will become a reserved keyword in a future release. Please rename it.".format(match_text),
+                                             location=types.SimpleNamespace(filename=filename, lineno=lineno))
+                            value = match_text
+                    yield Token(tid, filename, curline_start, curline, col, bytespan, value)
+                    break
+            if not matched:
+                raise ParseException('lexer', self.getline(line_start), lineno, col)
+
+class BaseNode:
+    def __init__(self, lineno: int, colno: int, filename: str, end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None):
+        self.lineno = lineno      # type: int
+        self.colno = colno        # type: int
+        self.filename = filename  # type: str
+        self.end_lineno = end_lineno if end_lineno is not None else self.lineno
+        self.end_colno = end_colno if end_colno is not None else self.colno
+
+        # Attributes for the visitors
+        self.level = 0            # type: int
+        self.ast_id = ''          # type: str
+        self.condition_level = 0  # type: int
+
+    def accept(self, visitor: 'AstVisitor') -> None:
+        fname = 'visit_{}'.format(type(self).__name__)
+        if hasattr(visitor, fname):
+            func = getattr(visitor, fname)
+            if callable(func):
+                func(self)
+
+class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode):
+    def __init__(self, token: Token[TV_TokenTypes]):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.value = token.value        # type: TV_TokenTypes
+        self.bytespan = token.bytespan  # type: T.Tuple[int, int]
+
+class BooleanNode(ElementaryNode[bool]):
+    def __init__(self, token: Token[bool]):
+        super().__init__(token)
+        assert isinstance(self.value, bool)
+
+class IdNode(ElementaryNode[str]):
+    def __init__(self, token: Token[str]):
+        super().__init__(token)
+        assert isinstance(self.value, str)
+
+    def __str__(self):
+        return "Id node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
+
+class NumberNode(ElementaryNode[int]):
+    def __init__(self, token: Token[int]):
+        super().__init__(token)
+        assert isinstance(self.value, int)
+
+class StringNode(ElementaryNode[str]):
+    def __init__(self, token: Token[str]):
+        super().__init__(token)
+        assert isinstance(self.value, str)
+
+    def __str__(self):
+        return "String node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
+
+class ContinueNode(ElementaryNode):
+    pass
+
+class BreakNode(ElementaryNode):
+    pass
+
+class ArgumentNode(BaseNode):
+    def __init__(self, token: Token[TV_TokenTypes]):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.arguments = []  # type: T.List[BaseNode]
+        self.commas = []     # type: T.List[Token[TV_TokenTypes]]
+        self.kwargs = {}     # type: T.Dict[BaseNode, BaseNode]
+        self.order_error = False
+
+    def prepend(self, statement: BaseNode) -> None:
+        if self.num_kwargs() > 0:
+            self.order_error = True
+        if not isinstance(statement, EmptyNode):
+            self.arguments = [statement] + self.arguments
+
+    def append(self, statement: BaseNode) -> None:
+        if self.num_kwargs() > 0:
+            self.order_error = True
+        if not isinstance(statement, EmptyNode):
+            self.arguments += [statement]
+
+    def set_kwarg(self, name: IdNode, value: BaseNode) -> None:
+        if name.value in [x.value for x in self.kwargs.keys() if isinstance(x, IdNode)]:
+            mlog.warning('Keyword argument "{}" defined multiple times.'.format(name.value), location=self)
+            mlog.warning('This will be an error in future Meson releases.')
+        self.kwargs[name] = value
+
+    def set_kwarg_no_check(self, name: BaseNode, value: BaseNode) -> None:
+        self.kwargs[name] = value
+
+    def num_args(self) -> int:
+        return len(self.arguments)
+
+    def num_kwargs(self) -> int:
+        return len(self.kwargs)
+
+    def incorrect_order(self) -> bool:
+        return self.order_error
+
+    def __len__(self) -> int:
+        return self.num_args() # Fixme
+
+class ArrayNode(BaseNode):
+    def __init__(self, args: ArgumentNode, lineno: int, colno: int, end_lineno: int, end_colno: int):
+        super().__init__(lineno, colno, args.filename, end_lineno=end_lineno, end_colno=end_colno)
+        self.args = args              # type: ArgumentNode
+
+class DictNode(BaseNode):
+    def __init__(self, args: ArgumentNode, lineno: int, colno: int, end_lineno: int, end_colno: int):
+        super().__init__(lineno, colno, args.filename, end_lineno=end_lineno, end_colno=end_colno)
+        self.args = args
+
+class EmptyNode(BaseNode):
+    def __init__(self, lineno: int, colno: int, filename: str):
+        super().__init__(lineno, colno, filename)
+        self.value = None
+
+class OrNode(BaseNode):
+    def __init__(self, left: BaseNode, right: BaseNode):
+        super().__init__(left.lineno, left.colno, left.filename)
+        self.left = left    # type: BaseNode
+        self.right = right  # type: BaseNode
+
+class AndNode(BaseNode):
+    def __init__(self, left: BaseNode, right: BaseNode):
+        super().__init__(left.lineno, left.colno, left.filename)
+        self.left = left    # type: BaseNode
+        self.right = right  # type: BaseNode
+
+class ComparisonNode(BaseNode):
+    def __init__(self, ctype: str, left: BaseNode, right: BaseNode):
+        super().__init__(left.lineno, left.colno, left.filename)
+        self.left = left    # type: BaseNode
+        self.right = right  # type: BaseNode
+        self.ctype = ctype  # type: str
+
+class ArithmeticNode(BaseNode):
+    def __init__(self, operation: str, left: BaseNode, right: BaseNode):
+        super().__init__(left.lineno, left.colno, left.filename)
+        self.left = left            # type: BaseNode
+        self.right = right          # type: BaseNode
+        self.operation = operation  # type: str
+
+class NotNode(BaseNode):
+    def __init__(self, token: Token[TV_TokenTypes], value: BaseNode):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.value = value  # type: BaseNode
+
+class CodeBlockNode(BaseNode):
+    def __init__(self, token: Token[TV_TokenTypes]):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.lines = []  # type: T.List[BaseNode]
+
+class IndexNode(BaseNode):
+    def __init__(self, iobject: BaseNode, index: BaseNode):
+        super().__init__(iobject.lineno, iobject.colno, iobject.filename)
+        self.iobject = iobject  # type: BaseNode
+        self.index = index      # type: BaseNode
+
+class MethodNode(BaseNode):
+    def __init__(self, filename: str, lineno: int, colno: int, source_object: BaseNode, name: str, args: ArgumentNode):
+        super().__init__(lineno, colno, filename)
+        self.source_object = source_object  # type: BaseNode
+        self.name = name                    # type: str
+        assert(isinstance(self.name, str))
+        self.args = args                    # type: ArgumentNode
+
+class FunctionNode(BaseNode):
+    def __init__(self, filename: str, lineno: int, colno: int, end_lineno: int, end_colno: int, func_name: str, args: ArgumentNode):
+        super().__init__(lineno, colno, filename, end_lineno=end_lineno, end_colno=end_colno)
+        self.func_name = func_name  # type: str
+        assert(isinstance(func_name, str))
+        self.args = args  # type: ArgumentNode
+
+class AssignmentNode(BaseNode):
+    def __init__(self, filename: str, lineno: int, colno: int, var_name: str, value: BaseNode):
+        super().__init__(lineno, colno, filename)
+        self.var_name = var_name  # type: str
+        assert(isinstance(var_name, str))
+        self.value = value  # type: BaseNode
+
+class PlusAssignmentNode(BaseNode):
+    def __init__(self, filename: str, lineno: int, colno: int, var_name: str, value: BaseNode):
+        super().__init__(lineno, colno, filename)
+        self.var_name = var_name  # type: str
+        assert(isinstance(var_name, str))
+        self.value = value  # type: BaseNode
+
+class ForeachClauseNode(BaseNode):
+    def __init__(self, token: Token, varnames: T.List[str], items: BaseNode, block: CodeBlockNode):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.varnames = varnames  # type: T.List[str]
+        self.items = items        # type: BaseNode
+        self.block = block        # type: CodeBlockNode
+
+class IfNode(BaseNode):
+    def __init__(self, linenode: BaseNode, condition: BaseNode, block: CodeBlockNode):
+        super().__init__(linenode.lineno, linenode.colno, linenode.filename)
+        self.condition = condition  # type: BaseNode
+        self.block = block          # type: CodeBlockNode
+
+class IfClauseNode(BaseNode):
+    def __init__(self, linenode: BaseNode):
+        super().__init__(linenode.lineno, linenode.colno, linenode.filename)
+        self.ifs = []          # type: T.List[IfNode]
+        self.elseblock = None  # type: T.Union[EmptyNode, CodeBlockNode]
+
+class UMinusNode(BaseNode):
+    def __init__(self, current_location: Token, value: BaseNode):
+        super().__init__(current_location.lineno, current_location.colno, current_location.filename)
+        self.value = value  # type: BaseNode
+
+class TernaryNode(BaseNode):
+    def __init__(self, condition: BaseNode, trueblock: BaseNode, falseblock: BaseNode):
+        super().__init__(condition.lineno, condition.colno, condition.filename)
+        self.condition = condition    # type: BaseNode
+        self.trueblock = trueblock    # type: BaseNode
+        self.falseblock = falseblock  # type: BaseNode
+
+comparison_map = {'equal': '==',
+                  'nequal': '!=',
+                  'lt': '<',
+                  'le': '<=',
+                  'gt': '>',
+                  'ge': '>=',
+                  'in': 'in',
+                  'notin': 'not in',
+                  }
+
+# Recursive descent parser for Meson's definition language.
+# Very basic apart from the fact that we have many precedence
+# levels so there are not enough words to describe them all.
+# Enter numbering:
+#
+# 1 assignment
+# 2 or
+# 3 and
+# 4 comparison
+# 5 arithmetic
+# 6 negation
+# 7 funcall, method call
+# 8 parentheses
+# 9 plain token
+
+class Parser:
+    def __init__(self, code: str, filename: str):
+        self.lexer = Lexer(code)
+        self.stream = self.lexer.lex(filename)
+        self.current = Token('eof', '', 0, 0, 0, (0, 0), None)  # type: Token
+        self.getsym()
+        self.in_ternary = False
+
+    def getsym(self) -> None:
+        try:
+            self.current = next(self.stream)
+        except StopIteration:
+            self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None)
+
+    def getline(self) -> str:
+        return self.lexer.getline(self.current.line_start)
+
+    def accept(self, s: str) -> bool:
+        if self.current.tid == s:
+            self.getsym()
+            return True
+        return False
+
+    def accept_any(self, tids: T.Sequence[str]) -> str:
+        tid = self.current.tid
+        if tid in tids:
+            self.getsym()
+            return tid
+        return ''
+
+    def expect(self, s: str) -> bool:
+        if self.accept(s):
+            return True
+        raise ParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno)
+
+    def block_expect(self, s: str, block_start: Token) -> bool:
+        if self.accept(s):
+            return True
+        raise BlockParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno)
+
+    def parse(self) -> CodeBlockNode:
+        block = self.codeblock()
+        self.expect('eof')
+        return block
+
+    def statement(self) -> BaseNode:
+        return self.e1()
+
+    def e1(self) -> BaseNode:
+        left = self.e2()
+        if self.accept('plusassign'):
+            value = self.e1()
+            if not isinstance(left, IdNode):
+                raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno)
+            assert isinstance(left.value, str)
+            return PlusAssignmentNode(left.filename, left.lineno, left.colno, left.value, value)
+        elif self.accept('assign'):
+            value = self.e1()
+            if not isinstance(left, IdNode):
+                raise ParseException('Assignment target must be an id.',
+                                     self.getline(), left.lineno, left.colno)
+            assert isinstance(left.value, str)
+            return AssignmentNode(left.filename, left.lineno, left.colno, left.value, value)
+        elif self.accept('questionmark'):
+            if self.in_ternary:
+                raise ParseException('Nested ternary operators are not allowed.',
+                                     self.getline(), left.lineno, left.colno)
+            self.in_ternary = True
+            trueblock = self.e1()
+            self.expect('colon')
+            falseblock = self.e1()
+            self.in_ternary = False
+            return TernaryNode(left, trueblock, falseblock)
+        return left
+
+    def e2(self) -> BaseNode:
+        left = self.e3()
+        while self.accept('or'):
+            if isinstance(left, EmptyNode):
+                raise ParseException('Invalid or clause.',
+                                     self.getline(), left.lineno, left.colno)
+            left = OrNode(left, self.e3())
+        return left
+
+    def e3(self) -> BaseNode:
+        left = self.e4()
+        while self.accept('and'):
+            if isinstance(left, EmptyNode):
+                raise ParseException('Invalid and clause.',
+                                     self.getline(), left.lineno, left.colno)
+            left = AndNode(left, self.e4())
+        return left
+
+    def e4(self) -> BaseNode:
+        left = self.e5()
+        for nodename, operator_type in comparison_map.items():
+            if self.accept(nodename):
+                return ComparisonNode(operator_type, left, self.e5())
+        if self.accept('not') and self.accept('in'):
+            return ComparisonNode('notin', left, self.e5())
+        return left
+
+    def e5(self) -> BaseNode:
+        return self.e5addsub()
+
+    def e5addsub(self) -> BaseNode:
+        op_map = {
+            'plus': 'add',
+            'dash': 'sub',
+        }
+        left = self.e5muldiv()
+        while True:
+            op = self.accept_any(tuple(op_map.keys()))
+            if op:
+                left = ArithmeticNode(op_map[op], left, self.e5muldiv())
+            else:
+                break
+        return left
+
+    def e5muldiv(self) -> BaseNode:
+        op_map = {
+            'percent': 'mod',
+            'star': 'mul',
+            'fslash': 'div',
+        }
+        left = self.e6()
+        while True:
+            op = self.accept_any(tuple(op_map.keys()))
+            if op:
+                left = ArithmeticNode(op_map[op], left, self.e6())
+            else:
+                break
+        return left
+
+    def e6(self) -> BaseNode:
+        if self.accept('not'):
+            return NotNode(self.current, self.e7())
+        if self.accept('dash'):
+            return UMinusNode(self.current, self.e7())
+        return self.e7()
+
+    def e7(self) -> BaseNode:
+        left = self.e8()
+        block_start = self.current
+        if self.accept('lparen'):
+            args = self.args()
+            self.block_expect('rparen', block_start)
+            if not isinstance(left, IdNode):
+                raise ParseException('Function call must be applied to plain id',
+                                     self.getline(), left.lineno, left.colno)
+            assert isinstance(left.value, str)
+            left = FunctionNode(left.filename, left.lineno, left.colno, self.current.lineno, self.current.colno, left.value, args)
+        go_again = True
+        while go_again:
+            go_again = False
+            if self.accept('dot'):
+                go_again = True
+                left = self.method_call(left)
+            if self.accept('lbracket'):
+                go_again = True
+                left = self.index_call(left)
+        return left
+
+    def e8(self) -> BaseNode:
+        block_start = self.current
+        if self.accept('lparen'):
+            e = self.statement()
+            self.block_expect('rparen', block_start)
+            return e
+        elif self.accept('lbracket'):
+            args = self.args()
+            self.block_expect('rbracket', block_start)
+            return ArrayNode(args, block_start.lineno, block_start.colno, self.current.lineno, self.current.colno)
+        elif self.accept('lcurl'):
+            key_values = self.key_values()
+            self.block_expect('rcurl', block_start)
+            return DictNode(key_values, block_start.lineno, block_start.colno, self.current.lineno, self.current.colno)
+        else:
+            return self.e9()
+
+    def e9(self) -> BaseNode:
+        t = self.current
+        if self.accept('true'):
+            t.value = True
+            return BooleanNode(t)
+        if self.accept('false'):
+            t.value = False
+            return BooleanNode(t)
+        if self.accept('id'):
+            return IdNode(t)
+        if self.accept('number'):
+            return NumberNode(t)
+        if self.accept('string'):
+            return StringNode(t)
+        return EmptyNode(self.current.lineno, self.current.colno, self.current.filename)
+
+    def key_values(self) -> ArgumentNode:
+        s = self.statement()  # type: BaseNode
+        a = ArgumentNode(self.current)
+
+        while not isinstance(s, EmptyNode):
+            if self.accept('colon'):
+                a.set_kwarg_no_check(s, self.statement())
+                potential = self.current
+                if not self.accept('comma'):
+                    return a
+                a.commas.append(potential)
+            else:
+                raise ParseException('Only key:value pairs are valid in dict construction.',
+                                     self.getline(), s.lineno, s.colno)
+            s = self.statement()
+        return a
+
+    def args(self) -> ArgumentNode:
+        s = self.statement()  # type: BaseNode
+        a = ArgumentNode(self.current)
+
+        while not isinstance(s, EmptyNode):
+            potential = self.current
+            if self.accept('comma'):
+                a.commas.append(potential)
+                a.append(s)
+            elif self.accept('colon'):
+                if not isinstance(s, IdNode):
+                    raise ParseException('Dictionary key must be a plain identifier.',
+                                         self.getline(), s.lineno, s.colno)
+                a.set_kwarg(s, self.statement())
+                potential = self.current
+                if not self.accept('comma'):
+                    return a
+                a.commas.append(potential)
+            else:
+                a.append(s)
+                return a
+            s = self.statement()
+        return a
+
+    def method_call(self, source_object) -> MethodNode:
+        methodname = self.e9()
+        if not(isinstance(methodname, IdNode)):
+            raise ParseException('Method name must be plain id',
+                                 self.getline(), self.current.lineno, self.current.colno)
+        assert isinstance(methodname.value, str)
+        self.expect('lparen')
+        args = self.args()
+        self.expect('rparen')
+        method = MethodNode(methodname.filename, methodname.lineno, methodname.colno, source_object, methodname.value, args)
+        if self.accept('dot'):
+            return self.method_call(method)
+        return method
+
+    def index_call(self, source_object) -> IndexNode:
+        index_statement = self.statement()
+        self.expect('rbracket')
+        return IndexNode(source_object, index_statement)
+
+    def foreachblock(self) -> ForeachClauseNode:
+        t = self.current
+        self.expect('id')
+        assert isinstance(t.value, str)
+        varname = t
+        varnames = [t.value]  # type: T.List[str]
+
+        if self.accept('comma'):
+            t = self.current
+            self.expect('id')
+            assert isinstance(t.value, str)
+            varnames.append(t.value)
+
+        self.expect('colon')
+        items = self.statement()
+        block = self.codeblock()
+        return ForeachClauseNode(varname, varnames, items, block)
+
+    def ifblock(self) -> IfClauseNode:
+        condition = self.statement()
+        clause = IfClauseNode(condition)
+        self.expect('eol')
+        block = self.codeblock()
+        clause.ifs.append(IfNode(clause, condition, block))
+        self.elseifblock(clause)
+        clause.elseblock = self.elseblock()
+        return clause
+
+    def elseifblock(self, clause) -> None:
+        while self.accept('elif'):
+            s = self.statement()
+            self.expect('eol')
+            b = self.codeblock()
+            clause.ifs.append(IfNode(s, s, b))
+
+    def elseblock(self) -> T.Union[CodeBlockNode, EmptyNode]:
+        if self.accept('else'):
+            self.expect('eol')
+            return self.codeblock()
+        return EmptyNode(self.current.lineno, self.current.colno, self.current.filename)
+
+    def line(self) -> BaseNode:
+        block_start = self.current
+        if self.current == 'eol':
+            return EmptyNode(self.current.lineno, self.current.colno, self.current.filename)
+        if self.accept('if'):
+            ifblock = self.ifblock()
+            self.block_expect('endif', block_start)
+            return ifblock
+        if self.accept('foreach'):
+            forblock = self.foreachblock()
+            self.block_expect('endforeach', block_start)
+            return forblock
+        if self.accept('continue'):
+            return ContinueNode(self.current)
+        if self.accept('break'):
+            return BreakNode(self.current)
+        return self.statement()
+
+    def codeblock(self) -> CodeBlockNode:
+        block = CodeBlockNode(self.current)
+        cond = True
+        while cond:
+            curline = self.line()
+            if not isinstance(curline, EmptyNode):
+                block.lines.append(curline)
+            cond = self.accept('eol')
+        return block
diff --git a/meson/mesonbuild/msetup.py b/meson/mesonbuild/msetup.py
new file mode 100644
index 000000000..252151194
--- /dev/null
+++ b/meson/mesonbuild/msetup.py
@@ -0,0 +1,246 @@
+# Copyright 2016-2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing as T
+import time
+import sys, stat
+import datetime
+import os.path
+import platform
+import cProfile as profile
+import argparse
+import tempfile
+import shutil
+import glob
+
+from . import environment, interpreter, mesonlib
+from . import build
+from . import mlog, coredata
+from . import mintro
+from .mconf import make_lower_case
+from .mesonlib import MesonException
+
+def add_arguments(parser):
+    coredata.register_builtin_arguments(parser)
+    parser.add_argument('--native-file',
+                        default=[],
+                        action='append',
+                        help='File containing overrides for native compilation environment.')
+    parser.add_argument('--cross-file',
+                        default=[],
+                        action='append',
+                        help='File describing cross compilation environment.')
+    parser.add_argument('-v', '--version', action='version',
+                        version=coredata.version)
+    parser.add_argument('--profile-self', action='store_true', dest='profile',
+                        help=argparse.SUPPRESS)
+    parser.add_argument('--fatal-meson-warnings', action='store_true', dest='fatal_warnings',
+                        help='Make all Meson warnings fatal')
+    parser.add_argument('--reconfigure', action='store_true',
+                        help='Set options and reconfigure the project. Useful when new ' +
+                             'options have been added to the project and the default value ' +
+                             'is not working.')
+    parser.add_argument('--wipe', action='store_true',
+                        help='Wipe build directory and reconfigure using previous command line options. ' +
+                             'Useful when build directory got corrupted, or when rebuilding with a ' +
+                             'newer version of meson.')
+    parser.add_argument('builddir', nargs='?', default=None)
+    parser.add_argument('sourcedir', nargs='?', default=None)
+
+class MesonApp:
+    def __init__(self, options):
+        (self.source_dir, self.build_dir) = self.validate_dirs(options.builddir,
+                                                               options.sourcedir,
+                                                               options.reconfigure,
+                                                               options.wipe)
+        if options.wipe:
+            # Make a copy of the cmd line file to make sure we can always
+            # restore that file if anything bad happens. For example if
+            # configuration fails we need to be able to wipe again.
+            restore = []
+            with tempfile.TemporaryDirectory() as d:
+                for filename in [coredata.get_cmd_line_file(self.build_dir)] + glob.glob(os.path.join(self.build_dir, environment.Environment.private_dir, '*.ini')):
+                    try:
+                        restore.append((shutil.copy(filename, d), filename))
+                    except FileNotFoundError:
+                        raise MesonException(
+                            'Cannot find cmd_line.txt. This is probably because this '
+                            'build directory was configured with a meson version < 0.49.0.')
+
+                coredata.read_cmd_line_file(self.build_dir, options)
+
+                try:
+                    # Don't delete the whole tree, just all of the files and
+                    # folders in the tree. Otherwise calling wipe form the builddir
+                    # will cause a crash
+                    for l in os.listdir(self.build_dir):
+                        l = os.path.join(self.build_dir, l)
+                        if os.path.isdir(l) and not os.path.islink(l):
+                            mesonlib.windows_proof_rmtree(l)
+                        else:
+                            mesonlib.windows_proof_rm(l)
+                finally:
+                    for b, f in restore:
+                        os.makedirs(os.path.dirname(f), exist_ok=True)
+                        shutil.move(b, f)
+
+        self.options = options
+
+    def has_build_file(self, dirname: str) -> bool:
+        fname = os.path.join(dirname, environment.build_filename)
+        return os.path.exists(fname)
+
+    def validate_core_dirs(self, dir1: str, dir2: str) -> T.Tuple[str, str]:
+        if dir1 is None:
+            if dir2 is None:
+                if not os.path.exists('meson.build') and os.path.exists('../meson.build'):
+                    dir2 = '..'
+                else:
+                    raise MesonException('Must specify at least one directory name.')
+            dir1 = os.getcwd()
+        if dir2 is None:
+            dir2 = os.getcwd()
+        ndir1 = os.path.abspath(os.path.realpath(dir1))
+        ndir2 = os.path.abspath(os.path.realpath(dir2))
+        if not os.path.exists(ndir1):
+            os.makedirs(ndir1)
+        if not os.path.exists(ndir2):
+            os.makedirs(ndir2)
+        if not stat.S_ISDIR(os.stat(ndir1).st_mode):
+            raise MesonException('{} is not a directory'.format(dir1))
+        if not stat.S_ISDIR(os.stat(ndir2).st_mode):
+            raise MesonException('{} is not a directory'.format(dir2))
+        if os.path.samefile(dir1, dir2):
+            raise MesonException('Source and build directories must not be the same. Create a pristine build directory.')
+        if self.has_build_file(ndir1):
+            if self.has_build_file(ndir2):
+                raise MesonException('Both directories contain a build file {}.'.format(environment.build_filename))
+            return ndir1, ndir2
+        if self.has_build_file(ndir2):
+            return ndir2, ndir1
+        raise MesonException('Neither directory contains a build file {}.'.format(environment.build_filename))
+
+    def validate_dirs(self, dir1: str, dir2: str, reconfigure: bool, wipe: bool) -> T.Tuple[str, str]:
+        (src_dir, build_dir) = self.validate_core_dirs(dir1, dir2)
+        priv_dir = os.path.join(build_dir, 'meson-private/coredata.dat')
+        if os.path.exists(priv_dir):
+            if not reconfigure and not wipe:
+                print('Directory already configured.\n'
+                      '\nJust run your build command (e.g. ninja) and Meson will regenerate as necessary.\n'
+                      'If ninja fails, run "ninja reconfigure" or "meson --reconfigure"\n'
+                      'to force Meson to regenerate.\n'
+                      '\nIf build failures persist, run "meson setup --wipe" to rebuild from scratch\n'
+                      'using the same options as passed when configuring the build.'
+                      '\nTo change option values, run "meson configure" instead.')
+                raise SystemExit
+        else:
+            has_cmd_line_file = os.path.exists(coredata.get_cmd_line_file(build_dir))
+            if (wipe and not has_cmd_line_file) or (not wipe and reconfigure):
+                raise SystemExit('Directory does not contain a valid build tree:\n{}'.format(build_dir))
+        return src_dir, build_dir
+
+    def generate(self):
+        env = environment.Environment(self.source_dir, self.build_dir, self.options)
+        mlog.initialize(env.get_log_dir(), self.options.fatal_warnings)
+        if self.options.profile:
+            mlog.set_timestamp_start(time.monotonic())
+        with mesonlib.BuildDirLock(self.build_dir):
+            self._generate(env)
+
+    def _generate(self, env):
+        mlog.debug('Build started at', datetime.datetime.now().isoformat())
+        mlog.debug('Main binary:', sys.executable)
+        mlog.debug('Build Options:', coredata.get_cmd_line_options(self.build_dir, self.options))
+        mlog.debug('Python system:', platform.system())
+        mlog.log(mlog.bold('The Meson build system'))
+        mlog.log('Version:', coredata.version)
+        mlog.log('Source dir:', mlog.bold(self.source_dir))
+        mlog.log('Build dir:', mlog.bold(self.build_dir))
+        if env.is_cross_build():
+            mlog.log('Build type:', mlog.bold('cross build'))
+        else:
+            mlog.log('Build type:', mlog.bold('native build'))
+        b = build.Build(env)
+
+        intr = interpreter.Interpreter(b)
+        if env.is_cross_build():
+            logger_fun = mlog.log
+        else:
+            logger_fun = mlog.debug
+        logger_fun('Build machine cpu family:', mlog.bold(intr.builtin['build_machine'].cpu_family_method([], {})))
+        logger_fun('Build machine cpu:', mlog.bold(intr.builtin['build_machine'].cpu_method([], {})))
+        mlog.log('Host machine cpu family:', mlog.bold(intr.builtin['host_machine'].cpu_family_method([], {})))
+        mlog.log('Host machine cpu:', mlog.bold(intr.builtin['host_machine'].cpu_method([], {})))
+        logger_fun('Target machine cpu family:', mlog.bold(intr.builtin['target_machine'].cpu_family_method([], {})))
+        logger_fun('Target machine cpu:', mlog.bold(intr.builtin['target_machine'].cpu_method([], {})))
+        try:
+            if self.options.profile:
+                fname = os.path.join(self.build_dir, 'meson-private', 'profile-interpreter.log')
+                profile.runctx('intr.run()', globals(), locals(), filename=fname)
+            else:
+                intr.run()
+        except Exception as e:
+            mintro.write_meson_info_file(b, [e])
+            raise
+        # Print all default option values that don't match the current value
+        for def_opt_name, def_opt_value, cur_opt_value in intr.get_non_matching_default_options():
+            mlog.log('Option', mlog.bold(def_opt_name), 'is:',
+                     mlog.bold('{}'.format(make_lower_case(cur_opt_value.printable_value()))),
+                     '[default: {}]'.format(make_lower_case(def_opt_value)))
+        try:
+            dumpfile = os.path.join(env.get_scratch_dir(), 'build.dat')
+            # We would like to write coredata as late as possible since we use the existence of
+            # this file to check if we generated the build file successfully. Since coredata
+            # includes settings, the build files must depend on it and appear newer. However, due
+            # to various kernel caches, we cannot guarantee that any time in Python is exactly in
+            # sync with the time that gets applied to any files. Thus, we dump this file as late as
+            # possible, but before build files, and if any error occurs, delete it.
+            cdf = env.dump_coredata()
+            if self.options.profile:
+                fname = 'profile-{}-backend.log'.format(intr.backend.name)
+                fname = os.path.join(self.build_dir, 'meson-private', fname)
+                profile.runctx('intr.backend.generate()', globals(), locals(), filename=fname)
+            else:
+                intr.backend.generate()
+            build.save(b, dumpfile)
+            if env.first_invocation:
+                coredata.write_cmd_line_file(self.build_dir, self.options)
+            else:
+                coredata.update_cmd_line_file(self.build_dir, self.options)
+
+            # Generate an IDE introspection file with the same syntax as the already existing API
+            if self.options.profile:
+                fname = os.path.join(self.build_dir, 'meson-private', 'profile-introspector.log')
+                profile.runctx('mintro.generate_introspection_file(b, intr.backend)', globals(), locals(), filename=fname)
+            else:
+                mintro.generate_introspection_file(b, intr.backend)
+            mintro.write_meson_info_file(b, [], True)
+
+            # Post-conf scripts must be run after writing coredata or else introspection fails.
+            intr.backend.run_postconf_scripts()
+        except Exception as e:
+            mintro.write_meson_info_file(b, [e])
+            if 'cdf' in locals():
+                old_cdf = cdf + '.prev'
+                if os.path.exists(old_cdf):
+                    os.replace(old_cdf, cdf)
+                else:
+                    os.unlink(cdf)
+            raise
+
+def run(options) -> int:
+    coredata.parse_cmd_line_options(options)
+    app = MesonApp(options)
+    app.generate()
+    return 0
diff --git a/meson/mesonbuild/msubprojects.py b/meson/mesonbuild/msubprojects.py
new file mode 100755
index 000000000..9a8b0cf89
--- /dev/null
+++ b/meson/mesonbuild/msubprojects.py
@@ -0,0 +1,255 @@
+import os, subprocess
+import argparse
+
+from . import mlog
+from .mesonlib import git, Popen_safe
+from .wrap.wrap import API_ROOT, PackageDefinition, Resolver, WrapException
+from .wrap import wraptool
+
+def update_wrapdb_file(wrap, repo_dir, options):
+    patch_url = wrap.get('patch_url')
+    branch, revision = wraptool.parse_patch_url(patch_url)
+    new_branch, new_revision = wraptool.get_latest_version(wrap.name)
+    if new_branch == branch and new_revision == revision:
+        mlog.log('  -> Up to date.')
+        return
+    wraptool.update_wrap_file(wrap.filename, wrap.name, new_branch, new_revision)
+    msg = ['  -> New wrap file downloaded.']
+    # Meson reconfigure won't use the new wrap file as long as the source
+    # directory exists. We don't delete it ourself to avoid data loss in case
+    # user has changes in their copy.
+    if os.path.isdir(repo_dir):
+        msg += ['To use it, delete', mlog.bold(repo_dir), 'and run', mlog.bold('meson --reconfigure')]
+    mlog.log(*msg)
+
+def update_file(wrap, repo_dir, options):
+    patch_url = wrap.values.get('patch_url', '')
+    if patch_url.startswith(API_ROOT):
+        update_wrapdb_file(wrap, repo_dir, options)
+    elif not os.path.isdir(repo_dir):
+        # The subproject is not needed, or it is a tarball extracted in
+        # 'libfoo-1.0' directory and the version has been bumped and the new
+        # directory is 'libfoo-2.0'. In that case forcing a meson
+        # reconfigure will download and use the new tarball.
+        mlog.log('  -> Subproject has not been checked out. Run', mlog.bold('meson --reconfigure'), 'to fetch it if needed.')
+    else:
+        # The subproject has not changed, or the new source and/or patch
+        # tarballs should be extracted in the same directory than previous
+        # version.
+        mlog.log('  -> Subproject has not changed, or the new source/patch needs to be extracted on the same location.\n' +
+                 '     In that case, delete', mlog.bold(repo_dir), 'and run', mlog.bold('meson --reconfigure'))
+
+def git_output(cmd, workingdir):
+    return git(cmd, workingdir, check=True, universal_newlines=True,
+               stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
+
+def git_show(repo_dir):
+    commit_message = git_output(['show', '--quiet', '--pretty=format:%h%n%d%n%s%n[%an]'], repo_dir)
+    parts = [s.strip() for s in commit_message.split('\n')]
+    mlog.log('  ->', mlog.yellow(parts[0]), mlog.red(parts[1]), parts[2], mlog.blue(parts[3]))
+
+def update_git(wrap, repo_dir, options):
+    if not os.path.isdir(repo_dir):
+        mlog.log('  -> Not used.')
+        return
+    revision = wrap.get('revision')
+    ret = git_output(['rev-parse', '--abbrev-ref', 'HEAD'], repo_dir).strip()
+    if ret == 'HEAD':
+        try:
+            # We are currently in detached mode, just checkout the new revision
+            git_output(['fetch'], repo_dir)
+            git_output(['checkout', revision], repo_dir)
+        except subprocess.CalledProcessError as e:
+            out = e.output.strip()
+            mlog.log('  -> Could not checkout revision', mlog.cyan(revision))
+            mlog.log(mlog.red(out))
+            mlog.log(mlog.red(str(e)))
+            return
+    elif ret == revision:
+        try:
+            # We are in the same branch, pull latest commits
+            git_output(['-c', 'rebase.autoStash=true', 'pull', '--rebase'], repo_dir)
+        except subprocess.CalledProcessError as e:
+            out = e.output.strip()
+            mlog.log('  -> Could not rebase', mlog.bold(repo_dir), 'please fix and try again.')
+            mlog.log(mlog.red(out))
+            mlog.log(mlog.red(str(e)))
+            return
+    else:
+        # We are in another branch, probably user created their own branch and
+        # we should rebase it on top of wrap's branch.
+        if options.rebase:
+            try:
+                git_output(['fetch'], repo_dir)
+                git_output(['-c', 'rebase.autoStash=true', 'rebase', revision], repo_dir)
+            except subprocess.CalledProcessError as e:
+                out = e.output.strip()
+                mlog.log('  -> Could not rebase', mlog.bold(repo_dir), 'please fix and try again.')
+                mlog.log(mlog.red(out))
+                mlog.log(mlog.red(str(e)))
+                return
+        else:
+            mlog.log('  -> Target revision is', mlog.bold(revision), 'but currently in branch is', mlog.bold(ret), '\n' +
+                     '     To rebase your branch on top of', mlog.bold(revision), 'use', mlog.bold('--rebase'), 'option.')
+            return
+
+    git_output(['submodule', 'update', '--checkout', '--recursive'], repo_dir)
+    git_show(repo_dir)
+
+def update_hg(wrap, repo_dir, options):
+    if not os.path.isdir(repo_dir):
+        mlog.log('  -> Not used.')
+        return
+    revno = wrap.get('revision')
+    if revno.lower() == 'tip':
+        # Failure to do pull is not a fatal error,
+        # because otherwise you can't develop without
+        # a working net connection.
+        subprocess.call(['hg', 'pull'], cwd=repo_dir)
+    else:
+        if subprocess.call(['hg', 'checkout', revno], cwd=repo_dir) != 0:
+            subprocess.check_call(['hg', 'pull'], cwd=repo_dir)
+            subprocess.check_call(['hg', 'checkout', revno], cwd=repo_dir)
+
+def update_svn(wrap, repo_dir, options):
+    if not os.path.isdir(repo_dir):
+        mlog.log('  -> Not used.')
+        return
+    revno = wrap.get('revision')
+    p, out, _ = Popen_safe(['svn', 'info', '--show-item', 'revision', repo_dir])
+    current_revno = out
+    if current_revno == revno:
+        return
+    if revno.lower() == 'head':
+        # Failure to do pull is not a fatal error,
+        # because otherwise you can't develop without
+        # a working net connection.
+        subprocess.call(['svn', 'update'], cwd=repo_dir)
+    else:
+        subprocess.check_call(['svn', 'update', '-r', revno], cwd=repo_dir)
+
+def update(wrap, repo_dir, options):
+    mlog.log('Updating {}...'.format(wrap.name))
+    if wrap.type == 'file':
+        update_file(wrap, repo_dir, options)
+    elif wrap.type == 'git':
+        update_git(wrap, repo_dir, options)
+    elif wrap.type == 'hg':
+        update_hg(wrap, repo_dir, options)
+    elif wrap.type == 'svn':
+        update_svn(wrap, repo_dir, options)
+    else:
+        mlog.log('  -> Cannot update', wrap.type, 'subproject')
+
+def checkout(wrap, repo_dir, options):
+    if wrap.type != 'git' or not os.path.isdir(repo_dir):
+        return
+    branch_name = options.branch_name if options.branch_name else wrap.get('revision')
+    cmd = ['checkout', branch_name, '--']
+    if options.b:
+        cmd.insert(1, '-b')
+    mlog.log('Checkout {} in {}...'.format(branch_name, wrap.name))
+    try:
+        git_output(cmd, repo_dir)
+        git_show(repo_dir)
+    except subprocess.CalledProcessError as e:
+        out = e.output.strip()
+        mlog.log('  -> ', mlog.red(out))
+
+def download(wrap, repo_dir, options):
+    mlog.log('Download {}...'.format(wrap.name))
+    if os.path.isdir(repo_dir):
+        mlog.log('  -> Already downloaded')
+        return
+    try:
+        r = Resolver(os.path.dirname(repo_dir))
+        r.resolve(wrap.name, 'meson')
+        mlog.log('  -> done')
+    except WrapException as e:
+        mlog.log('  ->', mlog.red(str(e)))
+
+def foreach(wrap, repo_dir, options):
+    mlog.log('Executing command in {}'.format(repo_dir))
+    if not os.path.isdir(repo_dir):
+        mlog.log('  -> Not downloaded yet')
+        return
+    try:
+        out = subprocess.check_output([options.command] + options.args,
+                                      stderr=subprocess.STDOUT,
+                                      cwd=repo_dir).decode()
+        mlog.log(out, end='')
+    except subprocess.CalledProcessError as e:
+        err_message = "Command '{}' returned non-zero exit status {}.".format(" ".join(e.cmd), e.returncode)
+        out = e.output.decode()
+        mlog.log('  -> ', mlog.red(err_message))
+        mlog.log(out, end='')
+    except Exception as e:
+        mlog.log('  -> ', mlog.red(str(e)))
+
+def add_common_arguments(p):
+    p.add_argument('--sourcedir', default='.',
+                   help='Path to source directory')
+    p.add_argument('subprojects', nargs='*',
+                   help='List of subprojects (default: all)')
+
+def add_arguments(parser):
+    subparsers = parser.add_subparsers(title='Commands', dest='command')
+    subparsers.required = True
+
+    p = subparsers.add_parser('update', help='Update all subprojects from wrap files')
+    p.add_argument('--rebase', default=False, action='store_true',
+                   help='Rebase your branch on top of wrap\'s revision (git only)')
+    add_common_arguments(p)
+    p.set_defaults(subprojects_func=update)
+
+    p = subparsers.add_parser('checkout', help='Checkout a branch (git only)')
+    p.add_argument('-b', default=False, action='store_true',
+                   help='Create a new branch')
+    p.add_argument('branch_name', nargs='?',
+                   help='Name of the branch to checkout or create (default: revision set in wrap file)')
+    add_common_arguments(p)
+    p.set_defaults(subprojects_func=checkout)
+
+    p = subparsers.add_parser('download', help='Ensure subprojects are fetched, even if not in use. ' +
+                                               'Already downloaded subprojects are not modified. ' +
+                                               'This can be used to pre-fetch all subprojects and avoid downloads during configure.')
+    add_common_arguments(p)
+    p.set_defaults(subprojects_func=download)
+
+    p = subparsers.add_parser('foreach', help='Execute a command in each subproject directory.')
+    p.add_argument('command', metavar='command ...',
+                   help='Command to execute in each subproject directory')
+    p.add_argument('args', nargs=argparse.REMAINDER,
+                   help=argparse.SUPPRESS)
+    p.add_argument('--sourcedir', default='.',
+                   help='Path to source directory')
+    p.set_defaults(subprojects_func=foreach)
+
+def run(options):
+    src_dir = os.path.relpath(os.path.realpath(options.sourcedir))
+    if not os.path.isfile(os.path.join(src_dir, 'meson.build')):
+        mlog.error('Directory', mlog.bold(src_dir), 'does not seem to be a Meson source directory.')
+        return 1
+    subprojects_dir = os.path.join(src_dir, 'subprojects')
+    if not os.path.isdir(subprojects_dir):
+        mlog.log('Directory', mlog.bold(src_dir), 'does not seem to have subprojects.')
+        return 0
+    files = []
+    if hasattr(options, 'subprojects'):
+        for name in options.subprojects:
+            f = os.path.join(subprojects_dir, name + '.wrap')
+            if not os.path.isfile(f):
+                mlog.error('Subproject', mlog.bold(name), 'not found.')
+                return 1
+            else:
+                files.append(f)
+    if not files:
+        for f in os.listdir(subprojects_dir):
+            if f.endswith('.wrap'):
+                files.append(os.path.join(subprojects_dir, f))
+    for f in files:
+        wrap = PackageDefinition(f)
+        directory = wrap.values.get('directory', wrap.name)
+        repo_dir = os.path.join(subprojects_dir, directory)
+        options.subprojects_func(wrap, repo_dir, options)
+    return 0
diff --git a/meson/mesonbuild/mtest.py b/meson/mesonbuild/mtest.py
new file mode 100644
index 000000000..c28bdd230
--- /dev/null
+++ b/meson/mesonbuild/mtest.py
@@ -0,0 +1,1234 @@
+# Copyright 2016-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# A tool to run tests in many different ways.
+
+from pathlib import Path
+from collections import namedtuple
+from copy import deepcopy
+import argparse
+import concurrent.futures as conc
+import datetime
+import enum
+import io
+import json
+import multiprocessing
+import os
+import pickle
+import platform
+import random
+import re
+import signal
+import subprocess
+import sys
+import tempfile
+import textwrap
+import time
+import typing as T
+import xml.etree.ElementTree as et
+
+from . import build
+from . import environment
+from . import mlog
+from .dependencies import ExternalProgram
+from .mesonlib import MesonException, get_wine_shortpath, split_args, join_args
+from .backend.backends import TestProtocol
+
+if T.TYPE_CHECKING:
+    from .backend.backends import TestSerialisation
+
+# GNU autotools interprets a return code of 77 from tests it executes to
+# mean that the test should be skipped.
+GNU_SKIP_RETURNCODE = 77
+
+# GNU autotools interprets a return code of 99 from tests it executes to
+# mean that the test failed even before testing what it is supposed to test.
+GNU_ERROR_RETURNCODE = 99
+
+def is_windows() -> bool:
+    platname = platform.system().lower()
+    return platname == 'windows' or 'mingw' in platname
+
+def is_cygwin() -> bool:
+    platname = platform.system().lower()
+    return 'cygwin' in platname
+
+def determine_worker_count() -> int:
+    varname = 'MESON_TESTTHREADS'
+    if varname in os.environ:
+        try:
+            num_workers = int(os.environ[varname])
+        except ValueError:
+            print('Invalid value in {}, using 1 thread.'.format(varname))
+            num_workers = 1
+    else:
+        try:
+            # Fails in some weird environments such as Debian
+            # reproducible build.
+            num_workers = multiprocessing.cpu_count()
+        except Exception:
+            num_workers = 1
+    return num_workers
+
+def add_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument('--repeat', default=1, dest='repeat', type=int,
+                        help='Number of times to run the tests.')
+    parser.add_argument('--no-rebuild', default=False, action='store_true',
+                        help='Do not rebuild before running tests.')
+    parser.add_argument('--gdb', default=False, dest='gdb', action='store_true',
+                        help='Run test under gdb.')
+    parser.add_argument('--gdb-path', default='gdb', dest='gdb_path',
+                        help='Path to the gdb binary (default: gdb).')
+    parser.add_argument('--list', default=False, dest='list', action='store_true',
+                        help='List available tests.')
+    parser.add_argument('--wrapper', default=None, dest='wrapper', type=split_args,
+                        help='wrapper to run tests with (e.g. Valgrind)')
+    parser.add_argument('-C', default='.', dest='wd',
+                        # https://github.com/python/typeshed/issues/3107
+                        # https://github.com/python/mypy/issues/7177
+                        type=os.path.abspath,  # type: ignore
+                        help='directory to cd into before running')
+    parser.add_argument('--suite', default=[], dest='include_suites', action='append', metavar='SUITE',
+                        help='Only run tests belonging to the given suite.')
+    parser.add_argument('--no-suite', default=[], dest='exclude_suites', action='append', metavar='SUITE',
+                        help='Do not run tests belonging to the given suite.')
+    parser.add_argument('--no-stdsplit', default=True, dest='split', action='store_false',
+                        help='Do not split stderr and stdout in test logs.')
+    parser.add_argument('--print-errorlogs', default=False, action='store_true',
+                        help="Whether to print failing tests' logs.")
+    parser.add_argument('--benchmark', default=False, action='store_true',
+                        help="Run benchmarks instead of tests.")
+    parser.add_argument('--logbase', default='testlog',
+                        help="Base name for log file.")
+    parser.add_argument('--num-processes', default=determine_worker_count(), type=int,
+                        help='How many parallel processes to use.')
+    parser.add_argument('-v', '--verbose', default=False, action='store_true',
+                        help='Do not redirect stdout and stderr')
+    parser.add_argument('-q', '--quiet', default=False, action='store_true',
+                        help='Produce less output to the terminal.')
+    parser.add_argument('-t', '--timeout-multiplier', type=float, default=None,
+                        help='Define a multiplier for test timeout, for example '
+                        ' when running tests in particular conditions they might take'
+                        ' more time to execute.')
+    parser.add_argument('--setup', default=None, dest='setup',
+                        help='Which test setup to use.')
+    parser.add_argument('--test-args', default=[], type=split_args,
+                        help='Arguments to pass to the specified test(s) or all tests')
+    parser.add_argument('args', nargs='*',
+                        help='Optional list of tests to run')
+
+
+def returncode_to_status(retcode: int) -> str:
+    # Note: We can't use `os.WIFSIGNALED(result.returncode)` and the related
+    # functions here because the status returned by subprocess is munged. It
+    # returns a negative value if the process was killed by a signal rather than
+    # the raw status returned by `wait()`. Also, If a shell sits between Meson
+    # the the actual unit test that shell is likely to convert a termination due
+    # to a signal into an exit status of 128 plus the signal number.
+    if retcode < 0:
+        signum = -retcode
+        try:
+            signame = signal.Signals(signum).name
+        except ValueError:
+            signame = 'SIGinvalid'
+        return '(killed by signal {} {})'.format(signum, signame)
+
+    if retcode <= 128:
+        return '(exit status {})'.format(retcode)
+
+    signum = retcode - 128
+    try:
+        signame = signal.Signals(signum).name
+    except ValueError:
+        signame = 'SIGinvalid'
+    return '(exit status {} or signal {} {})'.format(retcode, signum, signame)
+
+def env_tuple_to_str(env: T.Iterable[T.Tuple[str, str]]) -> str:
+    return ''.join(["{}='{}' ".format(k, v) for k, v in env])
+
+
+class TestException(MesonException):
+    pass
+
+
+@enum.unique
+class TestResult(enum.Enum):
+
+    OK = 'OK'
+    TIMEOUT = 'TIMEOUT'
+    SKIP = 'SKIP'
+    FAIL = 'FAIL'
+    EXPECTEDFAIL = 'EXPECTEDFAIL'
+    UNEXPECTEDPASS = 'UNEXPECTEDPASS'
+    ERROR = 'ERROR'
+
+    @staticmethod
+    def maxlen() -> int:
+        return 14 # len(UNEXPECTEDPASS)
+
+
+class TAPParser:
+    Plan = namedtuple('Plan', ['count', 'late', 'skipped', 'explanation'])
+    Bailout = namedtuple('Bailout', ['message'])
+    Test = namedtuple('Test', ['number', 'name', 'result', 'explanation'])
+    Error = namedtuple('Error', ['message'])
+    Version = namedtuple('Version', ['version'])
+
+    _MAIN = 1
+    _AFTER_TEST = 2
+    _YAML = 3
+
+    _RE_BAILOUT = re.compile(r'Bail out!\s*(.*)')
+    _RE_DIRECTIVE = re.compile(r'(?:\s*\#\s*([Ss][Kk][Ii][Pp]\S*|[Tt][Oo][Dd][Oo])\b\s*(.*))?')
+    _RE_PLAN = re.compile(r'1\.\.([0-9]+)' + _RE_DIRECTIVE.pattern)
+    _RE_TEST = re.compile(r'((?:not )?ok)\s*(?:([0-9]+)\s*)?([^#]*)' + _RE_DIRECTIVE.pattern)
+    _RE_VERSION = re.compile(r'TAP version ([0-9]+)')
+    _RE_YAML_START = re.compile(r'(\s+)---.*')
+    _RE_YAML_END = re.compile(r'\s+\.\.\.\s*')
+
+    def __init__(self, io: T.Iterator[str]):
+        self.io = io
+
+    def parse_test(self, ok: bool, num: int, name: str, directive: T.Optional[str], explanation: T.Optional[str]) -> \
+            T.Generator[T.Union['TAPParser.Test', 'TAPParser.Error'], None, None]:
+        name = name.strip()
+        explanation = explanation.strip() if explanation else None
+        if directive is not None:
+            directive = directive.upper()
+            if directive == 'SKIP':
+                if ok:
+                    yield self.Test(num, name, TestResult.SKIP, explanation)
+                    return
+            elif directive == 'TODO':
+                yield self.Test(num, name, TestResult.UNEXPECTEDPASS if ok else TestResult.EXPECTEDFAIL, explanation)
+                return
+            else:
+                yield self.Error('invalid directive "{}"'.format(directive,))
+
+        yield self.Test(num, name, TestResult.OK if ok else TestResult.FAIL, explanation)
+
+    def parse(self) -> T.Generator[T.Union['TAPParser.Test', 'TAPParser.Error', 'TAPParser.Version', 'TAPParser.Plan', 'TAPParser.Bailout'], None, None]:
+        found_late_test = False
+        bailed_out = False
+        plan = None
+        lineno = 0
+        num_tests = 0
+        yaml_lineno = None
+        yaml_indent = ''
+        state = self._MAIN
+        version = 12
+        while True:
+            lineno += 1
+            try:
+                line = next(self.io).rstrip()
+            except StopIteration:
+                break
+
+            # YAML blocks are only accepted after a test
+            if state == self._AFTER_TEST:
+                if version >= 13:
+                    m = self._RE_YAML_START.match(line)
+                    if m:
+                        state = self._YAML
+                        yaml_lineno = lineno
+                        yaml_indent = m.group(1)
+                        continue
+                state = self._MAIN
+
+            elif state == self._YAML:
+                if self._RE_YAML_END.match(line):
+                    state = self._MAIN
+                    continue
+                if line.startswith(yaml_indent):
+                    continue
+                yield self.Error('YAML block not terminated (started on line {})'.format(yaml_lineno))
+                state = self._MAIN
+
+            assert state == self._MAIN
+            if line.startswith('#'):
+                continue
+
+            m = self._RE_TEST.match(line)
+            if m:
+                if plan and plan.late and not found_late_test:
+                    yield self.Error('unexpected test after late plan')
+                    found_late_test = True
+                num_tests += 1
+                num = num_tests if m.group(2) is None else int(m.group(2))
+                if num != num_tests:
+                    yield self.Error('out of order test numbers')
+                yield from self.parse_test(m.group(1) == 'ok', num,
+                                           m.group(3), m.group(4), m.group(5))
+                state = self._AFTER_TEST
+                continue
+
+            m = self._RE_PLAN.match(line)
+            if m:
+                if plan:
+                    yield self.Error('more than one plan found')
+                else:
+                    count = int(m.group(1))
+                    skipped = (count == 0)
+                    if m.group(2):
+                        if m.group(2).upper().startswith('SKIP'):
+                            if count > 0:
+                                yield self.Error('invalid SKIP directive for plan')
+                            skipped = True
+                        else:
+                            yield self.Error('invalid directive for plan')
+                    plan = self.Plan(count=count, late=(num_tests > 0),
+                                     skipped=skipped, explanation=m.group(3))
+                    yield plan
+                continue
+
+            m = self._RE_BAILOUT.match(line)
+            if m:
+                yield self.Bailout(m.group(1))
+                bailed_out = True
+                continue
+
+            m = self._RE_VERSION.match(line)
+            if m:
+                # The TAP version is only accepted as the first line
+                if lineno != 1:
+                    yield self.Error('version number must be on the first line')
+                    continue
+                version = int(m.group(1))
+                if version < 13:
+                    yield self.Error('version number should be at least 13')
+                else:
+                    yield self.Version(version=version)
+                continue
+
+            if not line:
+                continue
+
+            yield self.Error('unexpected input at line {}'.format((lineno,)))
+
+        if state == self._YAML:
+            yield self.Error('YAML block not terminated (started on line {})'.format(yaml_lineno))
+
+        if not bailed_out and plan and num_tests != plan.count:
+            if num_tests < plan.count:
+                yield self.Error('Too few tests run (expected {}, got {})'.format(plan.count, num_tests))
+            else:
+                yield self.Error('Too many tests run (expected {}, got {})'.format(plan.count, num_tests))
+
+
+
+class JunitBuilder:
+
+    """Builder for Junit test results.
+
+    Junit is impossible to stream out, it requires attributes counting the
+    total number of tests, failures, skips, and errors in the root element
+    and in each test suite. As such, we use a builder class to track each
+    test case, and calculate all metadata before writing it out.
+
+    For tests with multiple results (like from a TAP test), we record the
+    test as a suite with the project_name.test_name. This allows us to track
+    each result separately. For tests with only one result (such as exit-code
+    tests) we record each one into a suite with the name project_name. The use
+    of the project_name allows us to sort subproject tests separately from
+    the root project.
+    """
+
+    def __init__(self, filename: str) -> None:
+        self.filename = filename
+        self.root = et.Element(
+            'testsuites', tests='0', errors='0', failures='0')
+        self.suites = {}  # type: T.Dict[str, et.Element]
+
+    def log(self, name: str, test: 'TestRun') -> None:
+        """Log a single test case."""
+        if test.junit is not None:
+            for suite in test.junit.findall('.//testsuite'):
+                # Assume that we don't need to merge anything here...
+                suite.attrib['name'] = '{}.{}.{}'.format(test.project, name, suite.attrib['name'])
+
+                # GTest can inject invalid attributes
+                for case in suite.findall('.//testcase[@result]'):
+                    del case.attrib['result']
+                for case in suite.findall('.//testcase[@timestamp]'):
+                    del case.attrib['timestamp']
+                self.root.append(suite)
+            return
+
+        # In this case we have a test binary with multiple results.
+        # We want to record this so that each result is recorded
+        # separately
+        if test.results:
+            suitename = '{}.{}'.format(test.project, name)
+            assert suitename not in self.suites, 'duplicate suite'
+
+            suite = self.suites[suitename] = et.Element(
+                'testsuite',
+                name=suitename,
+                tests=str(len(test.results)),
+                errors=str(sum(1 for r in test.results if r is TestResult.ERROR)),
+                failures=str(sum(1 for r in test.results if r in
+                                 {TestResult.FAIL, TestResult.UNEXPECTEDPASS, TestResult.TIMEOUT})),
+                skipped=str(sum(1 for r in test.results if r is TestResult.SKIP)),
+            )
+
+            for i, result in enumerate(test.results):
+                # Both name and classname are required. Set them both to the
+                # number of the test in a TAP test, as TAP doesn't give names.
+                testcase = et.SubElement(suite, 'testcase', name=str(i), classname=str(i))
+                if result is TestResult.SKIP:
+                    et.SubElement(testcase, 'skipped')
+                elif result is TestResult.ERROR:
+                    et.SubElement(testcase, 'error')
+                elif result is TestResult.FAIL:
+                    et.SubElement(testcase, 'failure')
+                elif result is TestResult.UNEXPECTEDPASS:
+                    fail = et.SubElement(testcase, 'failure')
+                    fail.text = 'Test unexpected passed.'
+                elif result is TestResult.TIMEOUT:
+                    fail = et.SubElement(testcase, 'failure')
+                    fail.text = 'Test did not finish before configured timeout.'
+            if test.stdo:
+                out = et.SubElement(suite, 'system-out')
+                out.text = test.stdo.rstrip()
+            if test.stde:
+                err = et.SubElement(suite, 'system-err')
+                err.text = test.stde.rstrip()
+        else:
+            if test.project not in self.suites:
+                suite = self.suites[test.project] = et.Element(
+                    'testsuite', name=test.project, tests='1', errors='0',
+                    failures='0', skipped='0')
+            else:
+                suite = self.suites[test.project]
+                suite.attrib['tests'] = str(int(suite.attrib['tests']) + 1)
+
+            testcase = et.SubElement(suite, 'testcase', name=name, classname=name)
+            if test.res is TestResult.SKIP:
+                et.SubElement(testcase, 'skipped')
+                suite.attrib['skipped'] = str(int(suite.attrib['skipped']) + 1)
+            elif test.res is TestResult.ERROR:
+                et.SubElement(testcase, 'error')
+                suite.attrib['errors'] = str(int(suite.attrib['errors']) + 1)
+            elif test.res is TestResult.FAIL:
+                et.SubElement(testcase, 'failure')
+                suite.attrib['failures'] = str(int(suite.attrib['failures']) + 1)
+            if test.stdo:
+                out = et.SubElement(testcase, 'system-out')
+                out.text = test.stdo.rstrip()
+            if test.stde:
+                err = et.SubElement(testcase, 'system-err')
+                err.text = test.stde.rstrip()
+
+    def write(self) -> None:
+        """Calculate total test counts and write out the xml result."""
+        for suite in self.suites.values():
+            self.root.append(suite)
+            # Skipped is really not allowed in the "testsuits" element
+            for attr in ['tests', 'errors', 'failures']:
+                self.root.attrib[attr] = str(int(self.root.attrib[attr]) + int(suite.attrib[attr]))
+
+        tree = et.ElementTree(self.root)
+        with open(self.filename, 'wb') as f:
+            tree.write(f, encoding='utf-8', xml_declaration=True)
+
+
+class TestRun:
+
+    @classmethod
+    def make_gtest(cls, test: 'TestSerialisation', test_env: T.Dict[str, str],
+                   returncode: int, starttime: float, duration: float,
+                   stdo: T.Optional[str], stde: T.Optional[str],
+                   cmd: T.Optional[T.List[str]]) -> 'TestRun':
+        filename = '{}.xml'.format(test.name)
+        if test.workdir:
+            filename = os.path.join(test.workdir, filename)
+        tree = et.parse(filename)
+
+        return cls.make_exitcode(
+            test, test_env, returncode, starttime, duration, stdo, stde, cmd,
+            junit=tree)
+
+    @classmethod
+    def make_exitcode(cls, test: 'TestSerialisation', test_env: T.Dict[str, str],
+                      returncode: int, starttime: float, duration: float,
+                      stdo: T.Optional[str], stde: T.Optional[str],
+                      cmd: T.Optional[T.List[str]], **kwargs) -> 'TestRun':
+        if returncode == GNU_SKIP_RETURNCODE:
+            res = TestResult.SKIP
+        elif returncode == GNU_ERROR_RETURNCODE:
+            res = TestResult.ERROR
+        elif test.should_fail:
+            res = TestResult.EXPECTEDFAIL if bool(returncode) else TestResult.UNEXPECTEDPASS
+        else:
+            res = TestResult.FAIL if bool(returncode) else TestResult.OK
+        return cls(test, test_env, res, [], returncode, starttime, duration, stdo, stde, cmd, **kwargs)
+
+    @classmethod
+    def make_tap(cls, test: 'TestSerialisation', test_env: T.Dict[str, str],
+                 returncode: int, starttime: float, duration: float,
+                 stdo: str, stde: str,
+                 cmd: T.Optional[T.List[str]]) -> 'TestRun':
+        res = None    # type: T.Optional[TestResult]
+        results = []  # type: T.List[TestResult]
+        failed = False
+
+        for i in TAPParser(io.StringIO(stdo)).parse():
+            if isinstance(i, TAPParser.Bailout):
+                results.append(TestResult.ERROR)
+                failed = True
+            elif isinstance(i, TAPParser.Test):
+                results.append(i.result)
+                if i.result not in {TestResult.OK, TestResult.EXPECTEDFAIL, TestResult.SKIP}:
+                    failed = True
+            elif isinstance(i, TAPParser.Error):
+                results.append(TestResult.ERROR)
+                stde += '\nTAP parsing error: ' + i.message
+                failed = True
+
+        if returncode != 0:
+            res = TestResult.ERROR
+            stde += '\n(test program exited with status code {})'.format(returncode,)
+
+        if res is None:
+            # Now determine the overall result of the test based on the outcome of the subcases
+            if all(t is TestResult.SKIP for t in results):
+                # This includes the case where num_tests is zero
+                res = TestResult.SKIP
+            elif test.should_fail:
+                res = TestResult.EXPECTEDFAIL if failed else TestResult.UNEXPECTEDPASS
+            else:
+                res = TestResult.FAIL if failed else TestResult.OK
+
+        return cls(test, test_env, res, results, returncode, starttime, duration, stdo, stde, cmd)
+
+    def __init__(self, test: 'TestSerialisation', test_env: T.Dict[str, str],
+                 res: TestResult, results: T.List[TestResult], returncode:
+                 int, starttime: float, duration: float,
+                 stdo: T.Optional[str], stde: T.Optional[str],
+                 cmd: T.Optional[T.List[str]], *, junit: T.Optional[et.ElementTree] = None):
+        assert isinstance(res, TestResult)
+        self.res = res
+        self.results = results  # May be an empty list
+        self.returncode = returncode
+        self.starttime = starttime
+        self.duration = duration
+        self.stdo = stdo
+        self.stde = stde
+        self.cmd = cmd
+        self.env = test_env
+        self.should_fail = test.should_fail
+        self.project = test.project_name
+        self.junit = junit
+
+    def get_log(self) -> str:
+        res = '--- command ---\n'
+        if self.cmd is None:
+            res += 'NONE\n'
+        else:
+            test_only_env = set(self.env.items()) - set(os.environ.items())
+            starttime_str = time.strftime("%H:%M:%S", time.gmtime(self.starttime))
+            res += '{} {}{}\n'.format(
+                starttime_str, env_tuple_to_str(test_only_env), ' '.join(self.cmd)
+            )
+        if self.stdo:
+            res += '--- stdout ---\n'
+            res += self.stdo
+        if self.stde:
+            if res[-1:] != '\n':
+                res += '\n'
+            res += '--- stderr ---\n'
+            res += self.stde
+        if res[-1:] != '\n':
+            res += '\n'
+        res += '-------\n\n'
+        return res
+
+def decode(stream: T.Union[None, bytes]) -> str:
+    if stream is None:
+        return ''
+    try:
+        return stream.decode('utf-8')
+    except UnicodeDecodeError:
+        return stream.decode('iso-8859-1', errors='ignore')
+
+def write_json_log(jsonlogfile: T.TextIO, test_name: str, result: TestRun) -> None:
+    jresult = {'name': test_name,
+               'stdout': result.stdo,
+               'result': result.res.value,
+               'starttime': result.starttime,
+               'duration': result.duration,
+               'returncode': result.returncode,
+               'env': result.env,
+               'command': result.cmd}  # type: T.Dict[str, T.Any]
+    if result.stde:
+        jresult['stderr'] = result.stde
+    jsonlogfile.write(json.dumps(jresult) + '\n')
+
+def run_with_mono(fname: str) -> bool:
+    return fname.endswith('.exe') and not (is_windows() or is_cygwin())
+
+def load_benchmarks(build_dir: str) -> T.List['TestSerialisation']:
+    datafile = Path(build_dir) / 'meson-private' / 'meson_benchmark_setup.dat'
+    if not datafile.is_file():
+        raise TestException('Directory {!r} does not seem to be a Meson build directory.'.format(build_dir))
+    with datafile.open('rb') as f:
+        obj = T.cast(T.List['TestSerialisation'], pickle.load(f))
+    return obj
+
+def load_tests(build_dir: str) -> T.List['TestSerialisation']:
+    datafile = Path(build_dir) / 'meson-private' / 'meson_test_setup.dat'
+    if not datafile.is_file():
+        raise TestException('Directory {!r} does not seem to be a Meson build directory.'.format(build_dir))
+    with datafile.open('rb') as f:
+        obj = T.cast(T.List['TestSerialisation'], pickle.load(f))
+    return obj
+
+
+class SingleTestRunner:
+
+    def __init__(self, test: 'TestSerialisation', test_env: T.Dict[str, str],
+                 env: T.Dict[str, str], options: argparse.Namespace):
+        self.test = test
+        self.test_env = test_env
+        self.env = env
+        self.options = options
+
+    def _get_cmd(self) -> T.Optional[T.List[str]]:
+        if self.test.fname[0].endswith('.jar'):
+            return ['java', '-jar'] + self.test.fname
+        elif not self.test.is_cross_built and run_with_mono(self.test.fname[0]):
+            return ['mono'] + self.test.fname
+        elif self.test.cmd_is_built and self.test.needs_exe_wrapper:
+            if self.test.exe_runner is None:
+                # Can not run test on cross compiled executable
+                # because there is no execute wrapper.
+                return None
+            elif self.test.cmd_is_built:
+                # If the command is not built (ie, its a python script),
+                # then we don't check for the exe-wrapper
+                if not self.test.exe_runner.found():
+                    msg = ('The exe_wrapper defined in the cross file {!r} was not '
+                           'found. Please check the command and/or add it to PATH.')
+                    raise TestException(msg.format(self.test.exe_runner.name))
+                return self.test.exe_runner.get_command() + self.test.fname
+        return self.test.fname
+
+    def run(self) -> TestRun:
+        cmd = self._get_cmd()
+        if cmd is None:
+            skip_stdout = 'Not run because can not execute cross compiled binaries.'
+            return TestRun(self.test, self.test_env, TestResult.SKIP, [], GNU_SKIP_RETURNCODE, time.time(), 0.0, skip_stdout, None, None)
+        else:
+            wrap = TestHarness.get_wrapper(self.options)
+            if self.options.gdb:
+                self.test.timeout = None
+            return self._run_cmd(wrap + cmd + self.test.cmd_args + self.options.test_args)
+
+    def _run_cmd(self, cmd: T.List[str]) -> TestRun:
+        starttime = time.time()
+
+        if self.test.extra_paths:
+            self.env['PATH'] = os.pathsep.join(self.test.extra_paths + ['']) + self.env['PATH']
+            winecmd = []
+            for c in cmd:
+                winecmd.append(c)
+                if os.path.basename(c).startswith('wine'):
+                    self.env['WINEPATH'] = get_wine_shortpath(
+                        winecmd,
+                        ['Z:' + p for p in self.test.extra_paths] + self.env.get('WINEPATH', '').split(';')
+                    )
+                    break
+
+        # If MALLOC_PERTURB_ is not set, or if it is set to an empty value,
+        # (i.e., the test or the environment don't explicitly set it), set
+        # it ourselves. We do this unconditionally for regular tests
+        # because it is extremely useful to have.
+        # Setting MALLOC_PERTURB_="0" will completely disable this feature.
+        if ('MALLOC_PERTURB_' not in self.env or not self.env['MALLOC_PERTURB_']) and not self.options.benchmark:
+            self.env['MALLOC_PERTURB_'] = str(random.randint(1, 255))
+
+        stdout = None
+        stderr = None
+        if not self.options.verbose:
+            stdout = tempfile.TemporaryFile("wb+")
+            stderr = tempfile.TemporaryFile("wb+") if self.options.split else stdout
+        if self.test.protocol is TestProtocol.TAP and stderr is stdout:
+            stdout = tempfile.TemporaryFile("wb+")
+
+        # Let gdb handle ^C instead of us
+        if self.options.gdb:
+            previous_sigint_handler = signal.getsignal(signal.SIGINT)
+            # Make the meson executable ignore SIGINT while gdb is running.
+            signal.signal(signal.SIGINT, signal.SIG_IGN)
+
+        def preexec_fn() -> None:
+            if self.options.gdb:
+                # Restore the SIGINT handler for the child process to
+                # ensure it can handle it.
+                signal.signal(signal.SIGINT, signal.SIG_DFL)
+            else:
+                # We don't want setsid() in gdb because gdb needs the
+                # terminal in order to handle ^C and not show tcsetpgrp()
+                # errors avoid not being able to use the terminal.
+                os.setsid()  # type: ignore
+
+        extra_cmd = []  # type: T.List[str]
+        if self.test.protocol is TestProtocol.GTEST:
+            gtestname = '{}.xml'.format(self.test.name)
+            if self.test.workdir:
+                gtestname = '{}:{}'.format(self.test.workdir, self.test.name)
+            extra_cmd.append('--gtest_output=xml:{}'.format(gtestname))
+
+        p = subprocess.Popen(cmd + extra_cmd,
+                             stdout=stdout,
+                             stderr=stderr,
+                             env=self.env,
+                             cwd=self.test.workdir,
+                             preexec_fn=preexec_fn if not is_windows() else None)
+        timed_out = False
+        kill_test = False
+        if self.test.timeout is None:
+            timeout = None
+        elif self.options.timeout_multiplier is not None:
+            timeout = self.test.timeout * self.options.timeout_multiplier
+        else:
+            timeout = self.test.timeout
+        try:
+            p.communicate(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            if self.options.verbose:
+                print('{} time out (After {} seconds)'.format(self.test.name, timeout))
+            timed_out = True
+        except KeyboardInterrupt:
+            mlog.warning('CTRL-C detected while running {}'.format(self.test.name))
+            kill_test = True
+        finally:
+            if self.options.gdb:
+                # Let us accept ^C again
+                signal.signal(signal.SIGINT, previous_sigint_handler)
+
+        additional_error = None
+
+        if kill_test or timed_out:
+            # Python does not provide multiplatform support for
+            # killing a process and all its children so we need
+            # to roll our own.
+            if is_windows():
+                subprocess.run(['taskkill', '/F', '/T', '/PID', str(p.pid)])
+            else:
+
+                def _send_signal_to_process_group(pgid : int, signum : int):
+                    """ sends a signal to a process group """
+                    try:
+                        os.killpg(pgid, signum) # type: ignore
+                    except ProcessLookupError:
+                        # Sometimes (e.g. with Wine) this happens.
+                        # There's nothing we can do (maybe the process
+                        # already died) so carry on.
+                        pass
+
+                # Send a termination signal to the process group that setsid()
+                # created - giving it a chance to perform any cleanup.
+                _send_signal_to_process_group(p.pid, signal.SIGTERM)
+
+                # Make sure the termination signal actually kills the process
+                # group, otherwise retry with a SIGKILL.
+                try:
+                    p.communicate(timeout=0.5)
+                except subprocess.TimeoutExpired:
+                    _send_signal_to_process_group(p.pid, signal.SIGKILL)
+            try:
+                p.communicate(timeout=1)
+            except subprocess.TimeoutExpired:
+                # An earlier kill attempt has not worked for whatever reason.
+                # Try to kill it one last time with a direct call.
+                # If the process has spawned children, they will remain around.
+                p.kill()
+                try:
+                    p.communicate(timeout=1)
+                except subprocess.TimeoutExpired:
+                    additional_error = 'Test process could not be killed.'
+            except ValueError:
+                additional_error = 'Could not read output. Maybe the process has redirected its stdout/stderr?'
+        endtime = time.time()
+        duration = endtime - starttime
+        if additional_error is None:
+            if stdout is None:
+                stdo = ''
+            else:
+                stdout.seek(0)
+                stdo = decode(stdout.read())
+            if stderr is None or stderr is stdout:
+                stde = ''
+            else:
+                stderr.seek(0)
+                stde = decode(stderr.read())
+        else:
+            stdo = ""
+            stde = additional_error
+        if timed_out:
+            return TestRun(self.test, self.test_env, TestResult.TIMEOUT, [], p.returncode, starttime, duration, stdo, stde, cmd)
+        else:
+            if self.test.protocol is TestProtocol.EXITCODE:
+                return TestRun.make_exitcode(self.test, self.test_env, p.returncode, starttime, duration, stdo, stde, cmd)
+            elif self.test.protocol is TestProtocol.GTEST:
+                return TestRun.make_gtest(self.test, self.test_env, p.returncode, starttime, duration, stdo, stde, cmd)
+            else:
+                if self.options.verbose:
+                    print(stdo, end='')
+                return TestRun.make_tap(self.test, self.test_env, p.returncode, starttime, duration, stdo, stde, cmd)
+
+
+class TestHarness:
+    def __init__(self, options: argparse.Namespace):
+        self.options = options
+        self.collected_logs = []  # type: T.List[str]
+        self.fail_count = 0
+        self.expectedfail_count = 0
+        self.unexpectedpass_count = 0
+        self.success_count = 0
+        self.skip_count = 0
+        self.timeout_count = 0
+        self.is_run = False
+        self.tests = None
+        self.results = []         # type: T.List[TestRun]
+        self.logfilename = None   # type: T.Optional[str]
+        self.logfile = None       # type: T.Optional[T.TextIO]
+        self.jsonlogfile = None   # type: T.Optional[T.TextIO]
+        self.junit = None         # type: T.Optional[JunitBuilder]
+        if self.options.benchmark:
+            self.tests = load_benchmarks(options.wd)
+        else:
+            self.tests = load_tests(options.wd)
+        ss = set()
+        for t in self.tests:
+            for s in t.suite:
+                ss.add(s)
+        self.suites = list(ss)
+
+    def __del__(self) -> None:
+        self.close_logfiles()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        self.close_logfiles()
+
+    def close_logfiles(self) -> None:
+        for f in ['logfile', 'jsonlogfile']:
+            lfile =  getattr(self, f)
+            if lfile:
+                lfile.close()
+                setattr(self, f, None)
+
+    def merge_suite_options(self, options: argparse.Namespace, test: 'TestSerialisation') -> T.Dict[str, str]:
+        if ':' in options.setup:
+            if options.setup not in self.build_data.test_setups:
+                sys.exit("Unknown test setup '{}'.".format(options.setup))
+            current = self.build_data.test_setups[options.setup]
+        else:
+            full_name = test.project_name + ":" + options.setup
+            if full_name not in self.build_data.test_setups:
+                sys.exit("Test setup '{}' not found from project '{}'.".format(options.setup, test.project_name))
+            current = self.build_data.test_setups[full_name]
+        if not options.gdb:
+            options.gdb = current.gdb
+        if options.gdb:
+            options.verbose = True
+        if options.timeout_multiplier is None:
+            options.timeout_multiplier = current.timeout_multiplier
+    #    if options.env is None:
+    #        options.env = current.env # FIXME, should probably merge options here.
+        if options.wrapper is not None and current.exe_wrapper is not None:
+            sys.exit('Conflict: both test setup and command line specify an exe wrapper.')
+        if options.wrapper is None:
+            options.wrapper = current.exe_wrapper
+        return current.env.get_env(os.environ.copy())
+
+    def get_test_runner(self, test: 'TestSerialisation') -> SingleTestRunner:
+        options = deepcopy(self.options)
+        if not options.setup:
+            options.setup = self.build_data.test_setup_default_name
+        if options.setup:
+            env = self.merge_suite_options(options, test)
+        else:
+            env = os.environ.copy()
+        test_env = test.env.get_env(env)
+        env.update(test_env)
+        if (test.is_cross_built and test.needs_exe_wrapper and
+                test.exe_runner and test.exe_runner.found()):
+            env['MESON_EXE_WRAPPER'] = join_args(test.exe_runner.get_command())
+        return SingleTestRunner(test, test_env, env, options)
+
+    def process_test_result(self, result: TestRun) -> None:
+        if result.res is TestResult.TIMEOUT:
+            self.timeout_count += 1
+        elif result.res is TestResult.SKIP:
+            self.skip_count += 1
+        elif result.res is TestResult.OK:
+            self.success_count += 1
+        elif result.res is TestResult.FAIL or result.res is TestResult.ERROR:
+            self.fail_count += 1
+        elif result.res is TestResult.EXPECTEDFAIL:
+            self.expectedfail_count += 1
+        elif result.res is TestResult.UNEXPECTEDPASS:
+            self.unexpectedpass_count += 1
+        else:
+            sys.exit('Unknown test result encountered: {}'.format(result.res))
+
+    def print_stats(self, test_count: int, name_max_len: int,
+                    tests: T.List['TestSerialisation'],
+                    name: str, result: TestRun, i: int) -> None:
+        ok_statuses = (TestResult.OK, TestResult.EXPECTEDFAIL)
+        bad_statuses = (TestResult.FAIL, TestResult.TIMEOUT,
+                        TestResult.UNEXPECTEDPASS, TestResult.ERROR)
+        result_str = '{num:{numlen}}/{testcount} {name:{name_max_len}} {res:{reslen}} {dur:.2f}s'.format(
+            numlen=len(str(test_count)),
+            num=i,
+            testcount=test_count,
+            name_max_len=name_max_len,
+            name=name,
+            reslen=TestResult.maxlen(),
+            res=result.res.value,
+            dur=result.duration)
+        if result.res is TestResult.FAIL:
+            result_str += ' ' + returncode_to_status(result.returncode)
+        if not self.options.quiet or result.res not in ok_statuses:
+            if result.res not in ok_statuses and mlog.colorize_console():
+                if result.res in bad_statuses:
+                    decorator = mlog.red
+                elif result.res is TestResult.SKIP:
+                    decorator = mlog.yellow
+                else:
+                    sys.exit('Unreachable code was ... well ... reached.')
+                print(decorator(result_str).get_text(True))
+            else:
+                print(result_str)
+        result_str += "\n\n" + result.get_log()
+        if result.res in bad_statuses:
+            if self.options.print_errorlogs:
+                self.collected_logs.append(result_str)
+        if self.logfile:
+            self.logfile.write(result_str)
+        if self.jsonlogfile:
+            write_json_log(self.jsonlogfile, name, result)
+        if self.junit:
+            self.junit.log(name, result)
+
+    def print_summary(self) -> None:
+        msg = textwrap.dedent('''
+            Ok:                 {:<4}
+            Expected Fail:      {:<4}
+            Fail:               {:<4}
+            Unexpected Pass:    {:<4}
+            Skipped:            {:<4}
+            Timeout:            {:<4}
+            ''').format(self.success_count, self.expectedfail_count, self.fail_count,
+           self.unexpectedpass_count, self.skip_count, self.timeout_count)
+        print(msg)
+        if self.logfile:
+            self.logfile.write(msg)
+        if self.junit:
+            self.junit.write()
+
+    def print_collected_logs(self) -> None:
+        if self.collected_logs:
+            if len(self.collected_logs) > 10:
+                print('\nThe output from 10 first failed tests:\n')
+            else:
+                print('\nThe output from the failed tests:\n')
+            for log in self.collected_logs[:10]:
+                lines = log.splitlines()
+                if len(lines) > 104:
+                    print('\n'.join(lines[0:4]))
+                    print('--- Listing only the last 100 lines from a long log. ---')
+                    lines = lines[-100:]
+                for line in lines:
+                    try:
+                        print(line)
+                    except UnicodeEncodeError:
+                        line = line.encode('ascii', errors='replace').decode()
+                        print(line)
+
+    def total_failure_count(self) -> int:
+        return self.fail_count + self.unexpectedpass_count + self.timeout_count
+
+    def doit(self) -> int:
+        if self.is_run:
+            raise RuntimeError('Test harness object can only be used once.')
+        self.is_run = True
+        tests = self.get_tests()
+        if not tests:
+            return 0
+        self.run_tests(tests)
+        return self.total_failure_count()
+
+    @staticmethod
+    def split_suite_string(suite: str) -> T.Tuple[str, str]:
+        if ':' in suite:
+            # mypy can't figure out that str.split(n, 1) will return a list of
+            # length 2, so we have to help it.
+            return T.cast(T.Tuple[str, str], tuple(suite.split(':', 1)))
+        else:
+            return suite, ""
+
+    @staticmethod
+    def test_in_suites(test: 'TestSerialisation', suites: T.List[str]) -> bool:
+        for suite in suites:
+            (prj_match, st_match) = TestHarness.split_suite_string(suite)
+            for prjst in test.suite:
+                (prj, st) = TestHarness.split_suite_string(prjst)
+
+                # the SUITE can be passed as
+                #     suite_name
+                # or
+                #     project_name:suite_name
+                # so we need to select only the test belonging to project_name
+
+                # this if handle the first case (i.e., SUITE == suite_name)
+
+                # in this way we can run tests belonging to different
+                # (sub)projects which share the same suite_name
+                if not st_match and st == prj_match:
+                    return True
+
+                # these two conditions are needed to handle the second option
+                # i.e., SUITE == project_name:suite_name
+
+                # in this way we select the only the tests of
+                # project_name with suite_name
+                if prj_match and prj != prj_match:
+                    continue
+                if st_match and st != st_match:
+                    continue
+                return True
+        return False
+
+    def test_suitable(self, test: 'TestSerialisation') -> bool:
+        return ((not self.options.include_suites or
+                TestHarness.test_in_suites(test, self.options.include_suites)) and not
+                TestHarness.test_in_suites(test, self.options.exclude_suites))
+
+    def get_tests(self) -> T.List['TestSerialisation']:
+        if not self.tests:
+            print('No tests defined.')
+            return []
+
+        if self.options.include_suites or self.options.exclude_suites:
+            tests = []
+            for tst in self.tests:
+                if self.test_suitable(tst):
+                    tests.append(tst)
+        else:
+            tests = self.tests
+
+        # allow specifying test names like "meson test foo1 foo2", where test('foo1', ...)
+        if self.options.args:
+            tests = [t for t in tests if t.name in self.options.args]
+
+        if not tests:
+            print('No suitable tests defined.')
+            return []
+
+        return tests
+
+    def open_log_files(self) -> None:
+        if not self.options.logbase or self.options.verbose:
+            return
+
+        namebase = None
+        logfile_base = os.path.join(self.options.wd, 'meson-logs', self.options.logbase)
+
+        if self.options.wrapper:
+            namebase = os.path.basename(self.get_wrapper(self.options)[0])
+        elif self.options.setup:
+            namebase = self.options.setup.replace(":", "_")
+
+        if namebase:
+            logfile_base += '-' + namebase.replace(' ', '_')
+
+        self.junit = JunitBuilder(logfile_base + '.junit.xml')
+
+        self.logfilename = logfile_base + '.txt'
+        self.jsonlogfilename = logfile_base + '.json'
+
+        self.jsonlogfile = open(self.jsonlogfilename, 'w', encoding='utf-8', errors='replace')
+        self.logfile = open(self.logfilename, 'w', encoding='utf-8', errors='surrogateescape')
+
+        self.logfile.write('Log of Meson test suite run on {}\n\n'.format(datetime.datetime.now().isoformat()))
+        inherit_env = env_tuple_to_str(os.environ.items())
+        self.logfile.write('Inherited environment: {}\n\n'.format(inherit_env))
+
+    @staticmethod
+    def get_wrapper(options: argparse.Namespace) -> T.List[str]:
+        wrap = []  # type: T.List[str]
+        if options.gdb:
+            wrap = [options.gdb_path, '--quiet', '--nh']
+            if options.repeat > 1:
+                wrap += ['-ex', 'run', '-ex', 'quit']
+            # Signal the end of arguments to gdb
+            wrap += ['--args']
+        if options.wrapper:
+            wrap += options.wrapper
+        return wrap
+
+    def get_pretty_suite(self, test: 'TestSerialisation') -> str:
+        if len(self.suites) > 1 and test.suite:
+            rv = TestHarness.split_suite_string(test.suite[0])[0]
+            s = "+".join(TestHarness.split_suite_string(s)[1] for s in test.suite)
+            if s:
+                rv += ":"
+            return rv + s + " / " + test.name
+        else:
+            return test.name
+
+    def run_tests(self, tests: T.List['TestSerialisation']) -> None:
+        executor = None
+        futures = []  # type: T.List[T.Tuple[conc.Future[TestRun], int, int, T.List[TestSerialisation], str, int]]
+        test_count = len(tests)
+        name_max_len = max([len(self.get_pretty_suite(test)) for test in tests])
+        self.open_log_files()
+        startdir = os.getcwd()
+        if self.options.wd:
+            os.chdir(self.options.wd)
+        self.build_data = build.load(os.getcwd())
+
+        try:
+            for _ in range(self.options.repeat):
+                for i, test in enumerate(tests, 1):
+                    visible_name = self.get_pretty_suite(test)
+                    single_test = self.get_test_runner(test)
+
+                    if not test.is_parallel or self.options.num_processes == 1 or single_test.options.gdb:
+                        self.drain_futures(futures)
+                        futures = []
+                        res = single_test.run()
+                        self.process_test_result(res)
+                        self.print_stats(test_count, name_max_len, tests, visible_name, res, i)
+                    else:
+                        if not executor:
+                            executor = conc.ThreadPoolExecutor(max_workers=self.options.num_processes)
+                        f = executor.submit(single_test.run)
+                        futures.append((f, test_count, name_max_len, tests, visible_name, i))
+                    if self.options.repeat > 1 and self.fail_count:
+                        break
+                if self.options.repeat > 1 and self.fail_count:
+                    break
+
+            self.drain_futures(futures)
+            self.print_summary()
+            self.print_collected_logs()
+
+            if self.logfilename:
+                print('Full log written to {}'.format(self.logfilename))
+        finally:
+            os.chdir(startdir)
+
+    def drain_futures(self, futures: T.List[T.Tuple['conc.Future[TestRun]', int, int, T.List['TestSerialisation'], str, int]]) -> None:
+        for x in futures:
+            (result, test_count, name_max_len, tests, name, i) = x
+            if self.options.repeat > 1 and self.fail_count:
+                result.cancel()
+            if self.options.verbose:
+                result.result()
+            self.process_test_result(result.result())
+            self.print_stats(test_count, name_max_len, tests, name, result.result(), i)
+
+    def run_special(self) -> int:
+        '''Tests run by the user, usually something like "under gdb 1000 times".'''
+        if self.is_run:
+            raise RuntimeError('Can not use run_special after a full run.')
+        tests = self.get_tests()
+        if not tests:
+            return 0
+        self.run_tests(tests)
+        return self.total_failure_count()
+
+
+def list_tests(th: TestHarness) -> bool:
+    tests = th.get_tests()
+    for t in tests:
+        print(th.get_pretty_suite(t))
+    return not tests
+
+def rebuild_all(wd: str) -> bool:
+    if not (Path(wd) / 'build.ninja').is_file():
+        print('Only ninja backend is supported to rebuild tests before running them.')
+        return True
+
+    ninja = environment.detect_ninja()
+    if not ninja:
+        print("Can't find ninja, can't rebuild test.")
+        return False
+
+    ret = subprocess.run(ninja + ['-C', wd]).returncode
+    if ret != 0:
+        print('Could not rebuild {}'.format(wd))
+        return False
+
+    return True
+
+def run(options: argparse.Namespace) -> int:
+    if options.benchmark:
+        options.num_processes = 1
+
+    if options.verbose and options.quiet:
+        print('Can not be both quiet and verbose at the same time.')
+        return 1
+
+    check_bin = None
+    if options.gdb:
+        options.verbose = True
+        if options.wrapper:
+            print('Must not specify both a wrapper and gdb at the same time.')
+            return 1
+        check_bin = 'gdb'
+
+    if options.wrapper:
+        check_bin = options.wrapper[0]
+
+    if check_bin is not None:
+        exe = ExternalProgram(check_bin, silent=True)
+        if not exe.found():
+            print('Could not find requested program: {!r}'.format(check_bin))
+            return 1
+
+    if not options.list and not options.no_rebuild:
+        if not rebuild_all(options.wd):
+            # We return 125 here in case the build failed.
+            # The reason is that exit code 125 tells `git bisect run` that the current commit should be skipped.
+            # Thus users can directly use `meson test` to bisect without needing to handle the does-not-build case separately in a wrapper script.
+            return 125
+
+    with TestHarness(options) as th:
+        try:
+            if options.list:
+                return list_tests(th)
+            if not options.args:
+                return th.doit()
+            return th.run_special()
+        except TestException as e:
+            print('Meson test encountered an error:\n')
+            if os.environ.get('MESON_FORCE_BACKTRACE'):
+                raise e
+            else:
+                print(e)
+            return 1
+
+def run_with_args(args: T.List[str]) -> int:
+    parser = argparse.ArgumentParser(prog='meson test')
+    add_arguments(parser)
+    options = parser.parse_args(args)
+    return run(options)
diff --git a/meson/mesonbuild/munstable_coredata.py b/meson/mesonbuild/munstable_coredata.py
new file mode 100644
index 000000000..5463f1625
--- /dev/null
+++ b/meson/mesonbuild/munstable_coredata.py
@@ -0,0 +1,114 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from . import coredata as cdata
+from .mesonlib import MachineChoice
+
+import os.path
+import pprint
+import textwrap
+
+def add_arguments(parser):
+    parser.add_argument('--all', action='store_true', dest='all', default=False,
+                        help='Show data not used by current backend.')
+
+    parser.add_argument('builddir', nargs='?', default='.', help='The build directory')
+
+
+def dump_compilers(compilers):
+    for lang, compiler in compilers.items():
+        print('  ' + lang + ':')
+        print('      Id: ' + compiler.id)
+        print('      Command: ' + ' '.join(compiler.exelist))
+        if compiler.full_version:
+            print('      Full version: ' + compiler.full_version)
+        if compiler.version:
+            print('      Detected version: ' + compiler.version)
+
+
+def dump_guids(d):
+    for name, value in d.items():
+        print('  ' + name + ': ' + value)
+
+
+def run(options):
+    datadir = 'meson-private'
+    if options.builddir is not None:
+        datadir = os.path.join(options.builddir, datadir)
+    if not os.path.isdir(datadir):
+        print('Current directory is not a build dir. Please specify it or '
+              'change the working directory to it.')
+        return 1
+
+    all_backends = options.all
+
+    print('This is a dump of the internal unstable cache of meson. This is for debugging only.')
+    print('Do NOT parse, this will change from version to version in incompatible ways')
+    print('')
+
+    coredata = cdata.load(options.builddir)
+    backend = coredata.get_builtin_option('backend')
+    for k, v in sorted(coredata.__dict__.items()):
+        if k in ('backend_options', 'base_options', 'builtins', 'compiler_options', 'user_options'):
+            # use `meson configure` to view these
+            pass
+        elif k in ['install_guid', 'test_guid', 'regen_guid']:
+            if all_backends or backend.startswith('vs'):
+                print(k + ': ' + v)
+        elif k == 'target_guids':
+            if all_backends or backend.startswith('vs'):
+                print(k + ':')
+                dump_guids(v)
+        elif k in ['lang_guids']:
+            if all_backends or backend.startswith('vs') or backend == 'xcode':
+                print(k + ':')
+                dump_guids(v)
+        elif k == 'meson_command':
+            if all_backends or backend.startswith('vs'):
+                print('Meson command used in build file regeneration: ' + ' '.join(v))
+        elif k == 'pkgconf_envvar':
+            print('Last seen PKGCONFIG environment variable value: ' + v)
+        elif k == 'version':
+            print('Meson version: ' + v)
+        elif k == 'cross_files':
+            if v:
+                print('Cross File: ' + ' '.join(v))
+        elif k == 'config_files':
+            if v:
+                print('Native File: ' + ' '.join(v))
+        elif k == 'compilers':
+            for for_machine in MachineChoice:
+                print('Cached {} machine compilers:'.format(
+                    for_machine.get_lower_case_name()))
+                dump_compilers(v[for_machine])
+        elif k == 'deps':
+            def print_dep(dep_key, dep):
+                print('  ' + dep_key[0] + ": ")
+                print('      compile args: ' + repr(dep.get_compile_args()))
+                print('      link args: ' + repr(dep.get_link_args()))
+                if dep.get_sources():
+                    print('      sources: ' + repr(dep.get_sources()))
+                print('      version: ' + repr(dep.get_version()))
+
+            for for_machine in iter(MachineChoice):
+                items_list = list(sorted(v[for_machine].items()))
+                if items_list:
+                    print('Cached dependencies for {} machine' % for_machine.get_lower_case_name())
+                    for dep_key, deps in items_list:
+                        for dep in deps:
+                            print_dep(dep_key, dep)
+        else:
+            print(k + ':')
+            print(textwrap.indent(pprint.pformat(v), '  '))
diff --git a/meson/mesonbuild/optinterpreter.py b/meson/mesonbuild/optinterpreter.py
new file mode 100644
index 000000000..d47a3d247
--- /dev/null
+++ b/meson/mesonbuild/optinterpreter.py
@@ -0,0 +1,235 @@
+# Copyright 2013-2014 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import functools
+import typing as T
+
+from . import compilers
+from . import coredata
+from . import mesonlib
+from . import mparser
+from .interpreterbase import FeatureNew
+
+forbidden_option_names = set(coredata.builtin_options.keys())
+forbidden_prefixes = [lang + '_' for lang in compilers.all_languages] + ['b_', 'backend_']
+reserved_prefixes = ['cross_']
+
+def is_invalid_name(name: str, *, log: bool = True) -> bool:
+    if name in forbidden_option_names:
+        return True
+    pref = name.split('_')[0] + '_'
+    if pref in forbidden_prefixes:
+        return True
+    if pref in reserved_prefixes:
+        if log:
+            from . import mlog
+            mlog.deprecation('Option uses prefix "%s", which is reserved for Meson. This will become an error in the future.' % pref)
+    return False
+
+class OptionException(mesonlib.MesonException):
+    pass
+
+
+def permitted_kwargs(permitted):
+    """Function that validates kwargs for options."""
+    def _wraps(func):
+        @functools.wraps(func)
+        def _inner(name, description, kwargs):
+            bad = [a for a in kwargs.keys() if a not in permitted]
+            if bad:
+                raise OptionException('Invalid kwargs for option "{}": "{}"'.format(
+                    name, ' '.join(bad)))
+            return func(description, kwargs)
+        return _inner
+    return _wraps
+
+
+optname_regex = re.compile('[^a-zA-Z0-9_-]')
+
+@permitted_kwargs({'value', 'yield'})
+def StringParser(description, kwargs):
+    return coredata.UserStringOption(description,
+                                     kwargs.get('value', ''),
+                                     kwargs.get('choices', []),
+                                     kwargs.get('yield', coredata.default_yielding))
+
+@permitted_kwargs({'value', 'yield'})
+def BooleanParser(description, kwargs):
+    return coredata.UserBooleanOption(description,
+                                      kwargs.get('value', True),
+                                      kwargs.get('yield', coredata.default_yielding))
+
+@permitted_kwargs({'value', 'yield', 'choices'})
+def ComboParser(description, kwargs):
+    if 'choices' not in kwargs:
+        raise OptionException('Combo option missing "choices" keyword.')
+    choices = kwargs['choices']
+    if not isinstance(choices, list):
+        raise OptionException('Combo choices must be an array.')
+    for i in choices:
+        if not isinstance(i, str):
+            raise OptionException('Combo choice elements must be strings.')
+    return coredata.UserComboOption(description,
+                                    choices,
+                                    kwargs.get('value', choices[0]),
+                                    kwargs.get('yield', coredata.default_yielding),)
+
+
+@permitted_kwargs({'value', 'min', 'max', 'yield'})
+def IntegerParser(description, kwargs):
+    if 'value' not in kwargs:
+        raise OptionException('Integer option must contain value argument.')
+    inttuple = (kwargs.get('min', None), kwargs.get('max', None), kwargs['value'])
+    return coredata.UserIntegerOption(description,
+                                      inttuple,
+                                      kwargs.get('yield', coredata.default_yielding))
+
+# FIXME: Cannot use FeatureNew while parsing options because we parse it before
+# reading options in project(). See func_project() in interpreter.py
+#@FeatureNew('array type option()', '0.44.0')
+@permitted_kwargs({'value', 'yield', 'choices'})
+def string_array_parser(description, kwargs):
+    if 'choices' in kwargs:
+        choices = kwargs['choices']
+        if not isinstance(choices, list):
+            raise OptionException('Array choices must be an array.')
+        for i in choices:
+            if not isinstance(i, str):
+                raise OptionException('Array choice elements must be strings.')
+            value = kwargs.get('value', choices)
+    else:
+        choices = None
+        value = kwargs.get('value', [])
+    if not isinstance(value, list):
+        raise OptionException('Array choices must be passed as an array.')
+    return coredata.UserArrayOption(description,
+                                    value,
+                                    choices=choices,
+                                    yielding=kwargs.get('yield', coredata.default_yielding))
+
+@permitted_kwargs({'value', 'yield'})
+def FeatureParser(description, kwargs):
+    return coredata.UserFeatureOption(description,
+                                      kwargs.get('value', 'auto'),
+                                      yielding=kwargs.get('yield', coredata.default_yielding))
+
+option_types = {'string': StringParser,
+                'boolean': BooleanParser,
+                'combo': ComboParser,
+                'integer': IntegerParser,
+                'array': string_array_parser,
+                'feature': FeatureParser,
+                } # type: T.Dict[str, T.Callable[[str, T.Dict], coredata.UserOption]]
+
+class OptionInterpreter:
+    def __init__(self, subproject):
+        self.options = {}
+        self.subproject = subproject
+
+    def process(self, option_file):
+        try:
+            with open(option_file, 'r', encoding='utf8') as f:
+                ast = mparser.Parser(f.read(), option_file).parse()
+        except mesonlib.MesonException as me:
+            me.file = option_file
+            raise me
+        if not isinstance(ast, mparser.CodeBlockNode):
+            e = OptionException('Option file is malformed.')
+            e.lineno = ast.lineno()
+            e.file = option_file
+            raise e
+        for cur in ast.lines:
+            try:
+                self.evaluate_statement(cur)
+            except Exception as e:
+                e.lineno = cur.lineno
+                e.colno = cur.colno
+                e.file = option_file
+                raise e
+
+    def reduce_single(self, arg):
+        if isinstance(arg, str):
+            return arg
+        elif isinstance(arg, (mparser.StringNode, mparser.BooleanNode,
+                              mparser.NumberNode)):
+            return arg.value
+        elif isinstance(arg, mparser.ArrayNode):
+            return [self.reduce_single(curarg) for curarg in arg.args.arguments]
+        elif isinstance(arg, mparser.UMinusNode):
+            res = self.reduce_single(arg.value)
+            if not isinstance(res, (int, float)):
+                raise OptionException('Token after "-" is not a number')
+            FeatureNew.single_use('negative numbers in meson_options.txt', '0.54.1', self.subproject)
+            return -res
+        elif isinstance(arg, mparser.NotNode):
+            res = self.reduce_single(arg.value)
+            if not isinstance(res, bool):
+                raise OptionException('Token after "not" is not a a boolean')
+            FeatureNew.single_use('negation ("not") in meson_options.txt', '0.54.1', self.subproject)
+            return not res
+        elif isinstance(arg, mparser.ArithmeticNode):
+            l = self.reduce_single(arg.left)
+            r = self.reduce_single(arg.right)
+            if not (arg.operation == 'add' and isinstance(l, str) and isinstance(r, str)):
+                raise OptionException('Only string concatenation with the "+" operator is allowed')
+            FeatureNew.single_use('string concatenation in meson_options.txt', '0.55.0', self.subproject)
+            return l + r
+        else:
+            raise OptionException('Arguments may only be string, int, bool, or array of those.')
+
+    def reduce_arguments(self, args):
+        assert(isinstance(args, mparser.ArgumentNode))
+        if args.incorrect_order():
+            raise OptionException('All keyword arguments must be after positional arguments.')
+        reduced_pos = [self.reduce_single(arg) for arg in args.arguments]
+        reduced_kw = {}
+        for key in args.kwargs.keys():
+            if not isinstance(key, mparser.IdNode):
+                raise OptionException('Keyword argument name is not a string.')
+            a = args.kwargs[key]
+            reduced_kw[key.value] = self.reduce_single(a)
+        return reduced_pos, reduced_kw
+
+    def evaluate_statement(self, node):
+        if not isinstance(node, mparser.FunctionNode):
+            raise OptionException('Option file may only contain option definitions')
+        func_name = node.func_name
+        if func_name != 'option':
+            raise OptionException('Only calls to option() are allowed in option files.')
+        (posargs, kwargs) = self.reduce_arguments(node.args)
+
+        if 'yield' in kwargs:
+           FeatureNew.single_use('option yield', '0.45.0', self.subproject)
+
+        if 'type' not in kwargs:
+            raise OptionException('Option call missing mandatory "type" keyword argument')
+        opt_type = kwargs.pop('type')
+        if opt_type not in option_types:
+            raise OptionException('Unknown type %s.' % opt_type)
+        if len(posargs) != 1:
+            raise OptionException('Option() must have one (and only one) positional argument')
+        opt_name = posargs[0]
+        if not isinstance(opt_name, str):
+            raise OptionException('Positional argument must be a string.')
+        if optname_regex.search(opt_name) is not None:
+            raise OptionException('Option names can only contain letters, numbers or dashes.')
+        if is_invalid_name(opt_name):
+            raise OptionException('Option name %s is reserved.' % opt_name)
+        if self.subproject != '':
+            opt_name = self.subproject + ':' + opt_name
+        opt = option_types[opt_type](opt_name, kwargs.pop('description', ''), kwargs)
+        if opt.description == '':
+            opt.description = opt_name
+        self.options[opt_name] = opt
diff --git a/meson/mesonbuild/rewriter.py b/meson/mesonbuild/rewriter.py
new file mode 100644
index 000000000..7730ab637
--- /dev/null
+++ b/meson/mesonbuild/rewriter.py
@@ -0,0 +1,969 @@
+#!/usr/bin/env python3
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This class contains the basic functionality needed to run any interpreter
+# or an interpreter-based tool.
+
+# This tool is used to manipulate an existing Meson build definition.
+#
+# - add a file to a target
+# - remove files from a target
+# - move targets
+# - reindent?
+
+from .ast import IntrospectionInterpreter, build_target_functions, AstConditionLevel, AstIDGenerator, AstIndentationGenerator, AstPrinter
+from mesonbuild.mesonlib import MesonException
+from . import mlog, environment
+from functools import wraps
+from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BaseNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode
+import json, os, re, sys
+import typing as T
+
+class RewriterException(MesonException):
+    pass
+
+def add_arguments(parser, formatter=None):
+    parser.add_argument('-s', '--sourcedir', type=str, default='.', metavar='SRCDIR', help='Path to source directory.')
+    parser.add_argument('-V', '--verbose', action='store_true', default=False, help='Enable verbose output')
+    parser.add_argument('-S', '--skip-errors', dest='skip', action='store_true', default=False, help='Skip errors instead of aborting')
+    subparsers = parser.add_subparsers(dest='type', title='Rewriter commands', description='Rewrite command to execute')
+
+    # Target
+    tgt_parser = subparsers.add_parser('target', help='Modify a target', formatter_class=formatter)
+    tgt_parser.add_argument('-s', '--subdir', default='', dest='subdir', help='Subdirectory of the new target (only for the "add_target" action)')
+    tgt_parser.add_argument('--type', dest='tgt_type', choices=rewriter_keys['target']['target_type'][2], default='executable',
+                            help='Type of the target to add (only for the "add_target" action)')
+    tgt_parser.add_argument('target', help='Name or ID of the target')
+    tgt_parser.add_argument('operation', choices=['add', 'rm', 'add_target', 'rm_target', 'info'],
+                            help='Action to execute')
+    tgt_parser.add_argument('sources', nargs='*', help='Sources to add/remove')
+
+    # KWARGS
+    kw_parser = subparsers.add_parser('kwargs', help='Modify keyword arguments', formatter_class=formatter)
+    kw_parser.add_argument('operation', choices=rewriter_keys['kwargs']['operation'][2],
+                           help='Action to execute')
+    kw_parser.add_argument('function', choices=list(rewriter_func_kwargs.keys()),
+                           help='Function type to modify')
+    kw_parser.add_argument('id', help='ID of the function to modify (can be anything for "project")')
+    kw_parser.add_argument('kwargs', nargs='*', help='Pairs of keyword and value')
+
+    # Default options
+    def_parser = subparsers.add_parser('default-options', help='Modify the project default options', formatter_class=formatter)
+    def_parser.add_argument('operation', choices=rewriter_keys['default_options']['operation'][2],
+                            help='Action to execute')
+    def_parser.add_argument('options', nargs='*', help='Key, value pairs of configuration option')
+
+    # JSON file/command
+    cmd_parser = subparsers.add_parser('command', help='Execute a JSON array of commands', formatter_class=formatter)
+    cmd_parser.add_argument('json', help='JSON string or file to execute')
+
+class RequiredKeys:
+    def __init__(self, keys):
+        self.keys = keys
+
+    def __call__(self, f):
+        @wraps(f)
+        def wrapped(*wrapped_args, **wrapped_kwargs):
+            assert(len(wrapped_args) >= 2)
+            cmd = wrapped_args[1]
+            for key, val in self.keys.items():
+                typ = val[0] # The type of the value
+                default = val[1] # The default value -- None is required
+                choices = val[2] # Valid choices -- None is for everything
+                if key not in cmd:
+                    if default is not None:
+                        cmd[key] = default
+                    else:
+                        raise RewriterException('Key "{}" is missing in object for {}'
+                                                .format(key, f.__name__))
+                if not isinstance(cmd[key], typ):
+                    raise RewriterException('Invalid type of "{}". Required is {} but provided was {}'
+                                            .format(key, typ.__name__, type(cmd[key]).__name__))
+                if choices is not None:
+                    assert(isinstance(choices, list))
+                    if cmd[key] not in choices:
+                        raise RewriterException('Invalid value of "{}": Possible values are {} but provided was "{}"'
+                                                .format(key, choices, cmd[key]))
+            return f(*wrapped_args, **wrapped_kwargs)
+
+        return wrapped
+
+class MTypeBase:
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        if node is None:
+            self.node = self._new_node()  # lgtm [py/init-calls-subclass] (node creation does not depend on base class state)
+        else:
+            self.node = node
+        self.node_type = None
+        for i in self.supported_nodes():  # lgtm [py/init-calls-subclass] (listing nodes does not depend on base class state)
+            if isinstance(self.node, i):
+                self.node_type = i
+
+    def _new_node(self):
+        # Overwrite in derived class
+        raise RewriterException('Internal error: _new_node of MTypeBase was called')
+
+    def can_modify(self):
+        return self.node_type is not None
+
+    def get_node(self):
+        return self.node
+
+    def supported_nodes(self):
+        # Overwrite in derived class
+        return []
+
+    def set_value(self, value):
+        # Overwrite in derived class
+        mlog.warning('Cannot set the value of type', mlog.bold(type(self).__name__), '--> skipping')
+
+    def add_value(self, value):
+        # Overwrite in derived class
+        mlog.warning('Cannot add a value of type', mlog.bold(type(self).__name__), '--> skipping')
+
+    def remove_value(self, value):
+        # Overwrite in derived class
+        mlog.warning('Cannot remove a value of type', mlog.bold(type(self).__name__), '--> skipping')
+
+    def remove_regex(self, value):
+        # Overwrite in derived class
+        mlog.warning('Cannot remove a regex in type', mlog.bold(type(self).__name__), '--> skipping')
+
+class MTypeStr(MTypeBase):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_node(self):
+        return StringNode(Token('', '', 0, 0, 0, None, ''))
+
+    def supported_nodes(self):
+        return [StringNode]
+
+    def set_value(self, value):
+        self.node.value = str(value)
+
+class MTypeBool(MTypeBase):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_node(self):
+        return BooleanNode(Token('', '', 0, 0, 0, None, False))
+
+    def supported_nodes(self):
+        return [BooleanNode]
+
+    def set_value(self, value):
+        self.node.value = bool(value)
+
+class MTypeID(MTypeBase):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_node(self):
+        return IdNode(Token('', '', 0, 0, 0, None, ''))
+
+    def supported_nodes(self):
+        return [IdNode]
+
+    def set_value(self, value):
+        self.node.value = str(value)
+
+class MTypeList(MTypeBase):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_node(self):
+        return ArrayNode(ArgumentNode(Token('', '', 0, 0, 0, None, '')), 0, 0, 0, 0)
+
+    def _new_element_node(self, value):
+        # Overwrite in derived class
+        raise RewriterException('Internal error: _new_element_node of MTypeList was called')
+
+    def _ensure_array_node(self):
+        if not isinstance(self.node, ArrayNode):
+            tmp = self.node
+            self.node = self._new_node()
+            self.node.args.arguments += [tmp]
+
+    def _check_is_equal(self, node, value) -> bool:
+        # Overwrite in derived class
+        return False
+
+    def _check_regex_matches(self, node, regex: str) -> bool:
+        # Overwrite in derived class
+        return False
+
+    def get_node(self):
+        if isinstance(self.node, ArrayNode):
+            if len(self.node.args.arguments) == 1:
+                return self.node.args.arguments[0]
+        return self.node
+
+    def supported_element_nodes(self):
+        # Overwrite in derived class
+        return []
+
+    def supported_nodes(self):
+        return [ArrayNode] + self.supported_element_nodes()
+
+    def set_value(self, value):
+        if not isinstance(value, list):
+            value = [value]
+        self._ensure_array_node()
+        self.node.args.arguments = [] # Remove all current nodes
+        for i in value:
+            self.node.args.arguments += [self._new_element_node(i)]
+
+    def add_value(self, value):
+        if not isinstance(value, list):
+            value = [value]
+        self._ensure_array_node()
+        for i in value:
+            self.node.args.arguments += [self._new_element_node(i)]
+
+    def _remove_helper(self, value, equal_func):
+        def check_remove_node(node):
+            for j in value:
+                if equal_func(i, j):
+                    return True
+            return False
+
+        if not isinstance(value, list):
+            value = [value]
+        self._ensure_array_node()
+        removed_list = []
+        for i in self.node.args.arguments:
+            if not check_remove_node(i):
+                removed_list += [i]
+        self.node.args.arguments = removed_list
+
+    def remove_value(self, value):
+        self._remove_helper(value, self._check_is_equal)
+
+    def remove_regex(self, regex: str):
+        self._remove_helper(regex, self._check_regex_matches)
+
+class MTypeStrList(MTypeList):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_element_node(self, value):
+        return StringNode(Token('', '', 0, 0, 0, None, str(value)))
+
+    def _check_is_equal(self, node, value) -> bool:
+        if isinstance(node, StringNode):
+            return node.value == value
+        return False
+
+    def _check_regex_matches(self, node, regex: str) -> bool:
+        if isinstance(node, StringNode):
+            return re.match(regex, node.value) is not None
+        return False
+
+    def supported_element_nodes(self):
+        return [StringNode]
+
+class MTypeIDList(MTypeList):
+    def __init__(self, node: T.Optional[BaseNode] = None):
+        super().__init__(node)
+
+    def _new_element_node(self, value):
+        return IdNode(Token('', '', 0, 0, 0, None, str(value)))
+
+    def _check_is_equal(self, node, value) -> bool:
+        if isinstance(node, IdNode):
+            return node.value == value
+        return False
+
+    def _check_regex_matches(self, node, regex: str) -> bool:
+        if isinstance(node, StringNode):
+            return re.match(regex, node.value) is not None
+        return False
+
+    def supported_element_nodes(self):
+        return [IdNode]
+
+rewriter_keys = {
+    'default_options': {
+        'operation': (str, None, ['set', 'delete']),
+        'options': (dict, {}, None)
+    },
+    'kwargs': {
+        'function': (str, None, None),
+        'id': (str, None, None),
+        'operation': (str, None, ['set', 'delete', 'add', 'remove', 'remove_regex', 'info']),
+        'kwargs': (dict, {}, None)
+    },
+    'target': {
+        'target': (str, None, None),
+        'operation': (str, None, ['src_add', 'src_rm', 'target_rm', 'target_add', 'info']),
+        'sources': (list, [], None),
+        'subdir': (str, '', None),
+        'target_type': (str, 'executable', ['both_libraries', 'executable', 'jar', 'library', 'shared_library', 'shared_module', 'static_library']),
+    }
+}
+
+rewriter_func_kwargs = {
+    'dependency': {
+        'language': MTypeStr,
+        'method': MTypeStr,
+        'native': MTypeBool,
+        'not_found_message': MTypeStr,
+        'required': MTypeBool,
+        'static': MTypeBool,
+        'version': MTypeStrList,
+        'modules': MTypeStrList
+    },
+    'target': {
+        'build_by_default': MTypeBool,
+        'build_rpath': MTypeStr,
+        'dependencies': MTypeIDList,
+        'gui_app': MTypeBool,
+        'link_with': MTypeIDList,
+        'export_dynamic': MTypeBool,
+        'implib': MTypeBool,
+        'install': MTypeBool,
+        'install_dir': MTypeStr,
+        'install_rpath': MTypeStr,
+        'pie': MTypeBool
+    },
+    'project': {
+        'default_options': MTypeStrList,
+        'meson_version': MTypeStr,
+        'license': MTypeStrList,
+        'subproject_dir': MTypeStr,
+        'version': MTypeStr
+    }
+}
+
+class Rewriter:
+    def __init__(self, sourcedir: str, generator: str = 'ninja', skip_errors: bool = False):
+        self.sourcedir = sourcedir
+        self.interpreter = IntrospectionInterpreter(sourcedir, '', generator, visitors = [AstIDGenerator(), AstIndentationGenerator(), AstConditionLevel()])
+        self.skip_errors = skip_errors
+        self.modefied_nodes = []
+        self.to_remove_nodes = []
+        self.to_add_nodes = []
+        self.functions = {
+            'default_options': self.process_default_options,
+            'kwargs': self.process_kwargs,
+            'target': self.process_target,
+        }
+        self.info_dump = None
+
+    def analyze_meson(self):
+        mlog.log('Analyzing meson file:', mlog.bold(os.path.join(self.sourcedir, environment.build_filename)))
+        self.interpreter.analyze()
+        mlog.log('  -- Project:', mlog.bold(self.interpreter.project_data['descriptive_name']))
+        mlog.log('  -- Version:', mlog.cyan(self.interpreter.project_data['version']))
+
+    def add_info(self, cmd_type: str, cmd_id: str, data: dict):
+        if self.info_dump is None:
+            self.info_dump = {}
+        if cmd_type not in self.info_dump:
+            self.info_dump[cmd_type] = {}
+        self.info_dump[cmd_type][cmd_id] = data
+
+    def print_info(self):
+        if self.info_dump is None:
+            return
+        sys.stderr.write(json.dumps(self.info_dump, indent=2))
+
+    def on_error(self):
+        if self.skip_errors:
+            return mlog.cyan('-->'), mlog.yellow('skipping')
+        return mlog.cyan('-->'), mlog.red('aborting')
+
+    def handle_error(self):
+        if self.skip_errors:
+            return None
+        raise MesonException('Rewriting the meson.build failed')
+
+    def find_target(self, target: str):
+        def check_list(name: str) -> T.List[BaseNode]:
+            result = []
+            for i in self.interpreter.targets:
+                if name == i['name'] or name == i['id']:
+                    result += [i]
+            return result
+
+        targets = check_list(target)
+        if targets:
+            if len(targets) == 1:
+                return targets[0]
+            else:
+                mlog.error('There are multiple targets matching', mlog.bold(target))
+                for i in targets:
+                    mlog.error('  -- Target name', mlog.bold(i['name']), 'with ID', mlog.bold(i['id']))
+                mlog.error('Please try again with the unique ID of the target', *self.on_error())
+                self.handle_error()
+                return None
+
+        # Check the assignments
+        tgt = None
+        if target in self.interpreter.assignments:
+            node = self.interpreter.assignments[target]
+            if isinstance(node, FunctionNode):
+                if node.func_name in ['executable', 'jar', 'library', 'shared_library', 'shared_module', 'static_library', 'both_libraries']:
+                    tgt = self.interpreter.assign_vals[target]
+
+        return tgt
+
+    def find_dependency(self, dependency: str):
+        def check_list(name: str):
+            for i in self.interpreter.dependencies:
+                if name == i['name']:
+                    return i
+            return None
+
+        dep = check_list(dependency)
+        if dep is not None:
+            return dep
+
+        # Check the assignments
+        if dependency in self.interpreter.assignments:
+            node = self.interpreter.assignments[dependency]
+            if isinstance(node, FunctionNode):
+                if node.func_name in ['dependency']:
+                    name = self.interpreter.flatten_args(node.args)[0]
+                    dep = check_list(name)
+
+        return dep
+
+    @RequiredKeys(rewriter_keys['default_options'])
+    def process_default_options(self, cmd):
+        # First, remove the old values
+        kwargs_cmd = {
+            'function': 'project',
+            'id': "/",
+            'operation': 'remove_regex',
+            'kwargs': {
+                'default_options': ['{}=.*'.format(x) for x in cmd['options'].keys()]
+            }
+        }
+        self.process_kwargs(kwargs_cmd)
+
+        # Then add the new values
+        if cmd['operation'] != 'set':
+            return
+
+        kwargs_cmd['operation'] = 'add'
+        kwargs_cmd['kwargs']['default_options'] = []
+
+        cdata = self.interpreter.coredata
+        options = {
+            **cdata.builtins,
+            **cdata.builtins_per_machine.host,
+            **{'build.' + k: o for k, o in cdata.builtins_per_machine.build.items()},
+            **cdata.backend_options,
+            **cdata.base_options,
+            **(dict(cdata.flatten_lang_iterator(cdata.compiler_options.host.items()))),
+            **{'build.' + k: o for k, o in cdata.flatten_lang_iterator(cdata.compiler_options.build.items())},
+            **cdata.user_options,
+        }
+
+        for key, val in sorted(cmd['options'].items()):
+            if key not in options:
+                mlog.error('Unknown options', mlog.bold(key), *self.on_error())
+                self.handle_error()
+                continue
+
+            try:
+                val = options[key].validate_value(val)
+            except MesonException as e:
+                mlog.error('Unable to set', mlog.bold(key), mlog.red(str(e)), *self.on_error())
+                self.handle_error()
+                continue
+
+            kwargs_cmd['kwargs']['default_options'] += ['{}={}'.format(key, val)]
+
+        self.process_kwargs(kwargs_cmd)
+
+    @RequiredKeys(rewriter_keys['kwargs'])
+    def process_kwargs(self, cmd):
+        mlog.log('Processing function type', mlog.bold(cmd['function']), 'with id', mlog.cyan("'" + cmd['id'] + "'"))
+        if cmd['function'] not in rewriter_func_kwargs:
+            mlog.error('Unknown function type', cmd['function'], *self.on_error())
+            return self.handle_error()
+        kwargs_def = rewriter_func_kwargs[cmd['function']]
+
+        # Find the function node to modify
+        node = None
+        arg_node = None
+        if cmd['function'] == 'project':
+            if cmd['id'] != '/':
+                mlog.error('The ID for the function type project must be "/"', *self.on_error())
+                return self.handle_error()
+            node = self.interpreter.project_node
+            arg_node = node.args
+        elif cmd['function'] == 'target':
+            tmp = self.find_target(cmd['id'])
+            if tmp:
+                node = tmp['node']
+                arg_node = node.args
+        elif cmd['function'] == 'dependency':
+            tmp = self.find_dependency(cmd['id'])
+            if tmp:
+                node = tmp['node']
+                arg_node = node.args
+        if not node:
+            mlog.error('Unable to find the function node')
+        assert(isinstance(node, FunctionNode))
+        assert(isinstance(arg_node, ArgumentNode))
+        # Transform the key nodes to plain strings
+        arg_node.kwargs = {k.value: v for k, v in arg_node.kwargs.items()}
+
+        # Print kwargs info
+        if cmd['operation'] == 'info':
+            info_data = {}
+            for key, val in sorted(arg_node.kwargs.items()):
+                info_data[key] = None
+                if isinstance(val, ElementaryNode):
+                    info_data[key] = val.value
+                elif isinstance(val, ArrayNode):
+                    data_list = []
+                    for i in val.args.arguments:
+                        element = None
+                        if isinstance(i, ElementaryNode):
+                            element = i.value
+                        data_list += [element]
+                    info_data[key] = data_list
+
+            self.add_info('kwargs', '{}#{}'.format(cmd['function'], cmd['id']), info_data)
+            return # Nothing else to do
+
+        # Modify the kwargs
+        num_changed = 0
+        for key, val in sorted(cmd['kwargs'].items()):
+            if key not in kwargs_def:
+                mlog.error('Cannot modify unknown kwarg', mlog.bold(key), *self.on_error())
+                self.handle_error()
+                continue
+
+            # Remove the key from the kwargs
+            if cmd['operation'] == 'delete':
+                if key in arg_node.kwargs:
+                    mlog.log('  -- Deleting', mlog.bold(key), 'from the kwargs')
+                    del arg_node.kwargs[key]
+                    num_changed += 1
+                else:
+                    mlog.log('  -- Key', mlog.bold(key), 'is already deleted')
+                continue
+
+            if key not in arg_node.kwargs:
+                arg_node.kwargs[key] = None
+            modifyer = kwargs_def[key](arg_node.kwargs[key])
+            if not modifyer.can_modify():
+                mlog.log('  -- Skipping', mlog.bold(key), 'because it is to complex to modify')
+
+            # Apply the operation
+            val_str = str(val)
+            if cmd['operation'] == 'set':
+                mlog.log('  -- Setting', mlog.bold(key), 'to', mlog.yellow(val_str))
+                modifyer.set_value(val)
+            elif cmd['operation'] == 'add':
+                mlog.log('  -- Adding', mlog.yellow(val_str), 'to', mlog.bold(key))
+                modifyer.add_value(val)
+            elif cmd['operation'] == 'remove':
+                mlog.log('  -- Removing', mlog.yellow(val_str), 'from', mlog.bold(key))
+                modifyer.remove_value(val)
+            elif cmd['operation'] == 'remove_regex':
+                mlog.log('  -- Removing all values matching', mlog.yellow(val_str), 'from', mlog.bold(key))
+                modifyer.remove_regex(val)
+
+            # Write back the result
+            arg_node.kwargs[key] = modifyer.get_node()
+            num_changed += 1
+
+        # Convert the keys back to IdNode's
+        arg_node.kwargs = {IdNode(Token('', '', 0, 0, 0, None, k)): v for k, v in arg_node.kwargs.items()}
+        if num_changed > 0 and node not in self.modefied_nodes:
+            self.modefied_nodes += [node]
+
+    def find_assignment_node(self, node: BaseNode) -> AssignmentNode:
+        if node.ast_id and node.ast_id in self.interpreter.reverse_assignment:
+            return self.interpreter.reverse_assignment[node.ast_id]
+        return None
+
+    @RequiredKeys(rewriter_keys['target'])
+    def process_target(self, cmd):
+        mlog.log('Processing target', mlog.bold(cmd['target']), 'operation', mlog.cyan(cmd['operation']))
+        target = self.find_target(cmd['target'])
+        if target is None and cmd['operation'] != 'target_add':
+            mlog.error('Unknown target', mlog.bold(cmd['target']), *self.on_error())
+            return self.handle_error()
+
+        # Make source paths relative to the current subdir
+        def rel_source(src: str) -> str:
+            subdir = os.path.abspath(os.path.join(self.sourcedir, target['subdir']))
+            if os.path.isabs(src):
+                return os.path.relpath(src, subdir)
+            elif not os.path.exists(src):
+                return src # Trust the user when the source doesn't exist
+            # Make sure that the path is relative to the subdir
+            return os.path.relpath(os.path.abspath(src), subdir)
+
+        if target is not None:
+            cmd['sources'] = [rel_source(x) for x in cmd['sources']]
+
+        # Utility function to get a list of the sources from a node
+        def arg_list_from_node(n):
+            args = []
+            if isinstance(n, FunctionNode):
+                args = list(n.args.arguments)
+                if n.func_name in build_target_functions:
+                    args.pop(0)
+            elif isinstance(n, ArrayNode):
+                args = n.args.arguments
+            elif isinstance(n, ArgumentNode):
+                args = n.arguments
+            return args
+
+        to_sort_nodes = []
+
+        if cmd['operation'] == 'src_add':
+            node = None
+            if target['sources']:
+                node = target['sources'][0]
+            else:
+                node = target['node']
+            assert(node is not None)
+
+            # Generate the current source list
+            src_list = []
+            for i in target['sources']:
+                for j in arg_list_from_node(i):
+                    if isinstance(j, StringNode):
+                        src_list += [j.value]
+
+            # Generate the new String nodes
+            to_append = []
+            for i in sorted(set(cmd['sources'])):
+                if i in src_list:
+                    mlog.log('  -- Source', mlog.green(i), 'is already defined for the target --> skipping')
+                    continue
+                mlog.log('  -- Adding source', mlog.green(i), 'at',
+                         mlog.yellow('{}:{}'.format(node.filename, node.lineno)))
+                token = Token('string', node.filename, 0, 0, 0, None, i)
+                to_append += [StringNode(token)]
+
+            # Append to the AST at the right place
+            arg_node = None
+            if isinstance(node, (FunctionNode, ArrayNode)):
+                arg_node = node.args
+            elif isinstance(node, ArgumentNode):
+                arg_node = node
+            assert(arg_node is not None)
+            arg_node.arguments += to_append
+
+            # Mark the node as modified
+            if arg_node not in to_sort_nodes and not isinstance(node, FunctionNode):
+                to_sort_nodes += [arg_node]
+            if node not in self.modefied_nodes:
+                self.modefied_nodes += [node]
+
+        elif cmd['operation'] == 'src_rm':
+            # Helper to find the exact string node and its parent
+            def find_node(src):
+                for i in target['sources']:
+                    for j in arg_list_from_node(i):
+                        if isinstance(j, StringNode):
+                            if j.value == src:
+                                return i, j
+                return None, None
+
+            for i in cmd['sources']:
+                # Try to find the node with the source string
+                root, string_node = find_node(i)
+                if root is None:
+                    mlog.warning('  -- Unable to find source', mlog.green(i), 'in the target')
+                    continue
+
+                # Remove the found string node from the argument list
+                arg_node = None
+                if isinstance(root, (FunctionNode, ArrayNode)):
+                    arg_node = root.args
+                elif isinstance(root, ArgumentNode):
+                    arg_node = root
+                assert(arg_node is not None)
+                mlog.log('  -- Removing source', mlog.green(i), 'from',
+                         mlog.yellow('{}:{}'.format(string_node.filename, string_node.lineno)))
+                arg_node.arguments.remove(string_node)
+
+                # Mark the node as modified
+                if arg_node not in to_sort_nodes and not isinstance(root, FunctionNode):
+                    to_sort_nodes += [arg_node]
+                if root not in self.modefied_nodes:
+                    self.modefied_nodes += [root]
+
+        elif cmd['operation'] == 'target_add':
+            if target is not None:
+                mlog.error('Can not add target', mlog.bold(cmd['target']), 'because it already exists', *self.on_error())
+                return self.handle_error()
+
+            id_base = re.sub(r'[- ]', '_', cmd['target'])
+            target_id = id_base + '_exe' if cmd['target_type'] == 'executable' else '_lib'
+            source_id = id_base + '_sources'
+            filename = os.path.join(cmd['subdir'], environment.build_filename)
+
+            # Build src list
+            src_arg_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, ''))
+            src_arr_node = ArrayNode(src_arg_node, 0, 0, 0, 0)
+            src_far_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, ''))
+            src_fun_node = FunctionNode(filename, 0, 0, 0, 0, 'files', src_far_node)
+            src_ass_node = AssignmentNode(filename, 0, 0, source_id, src_fun_node)
+            src_arg_node.arguments = [StringNode(Token('string', filename, 0, 0, 0, None, x)) for x in cmd['sources']]
+            src_far_node.arguments = [src_arr_node]
+
+            # Build target
+            tgt_arg_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, ''))
+            tgt_fun_node = FunctionNode(filename, 0, 0, 0, 0, cmd['target_type'], tgt_arg_node)
+            tgt_ass_node = AssignmentNode(filename, 0, 0, target_id, tgt_fun_node)
+            tgt_arg_node.arguments = [
+                StringNode(Token('string', filename, 0, 0, 0, None, cmd['target'])),
+                IdNode(Token('string', filename, 0, 0, 0, None, source_id))
+            ]
+
+            src_ass_node.accept(AstIndentationGenerator())
+            tgt_ass_node.accept(AstIndentationGenerator())
+            self.to_add_nodes += [src_ass_node, tgt_ass_node]
+
+        elif cmd['operation'] == 'target_rm':
+            to_remove = self.find_assignment_node(target['node'])
+            if to_remove is None:
+                to_remove = target['node']
+            self.to_remove_nodes += [to_remove]
+            mlog.log('  -- Removing target', mlog.green(cmd['target']), 'at',
+                     mlog.yellow('{}:{}'.format(to_remove.filename, to_remove.lineno)))
+
+        elif cmd['operation'] == 'info':
+            # T.List all sources in the target
+            src_list = []
+            for i in target['sources']:
+                for j in arg_list_from_node(i):
+                    if isinstance(j, StringNode):
+                        src_list += [j.value]
+            test_data = {
+                'name': target['name'],
+                'sources': src_list
+            }
+            self.add_info('target', target['id'], test_data)
+
+        # Sort files
+        for i in to_sort_nodes:
+            convert = lambda text: int(text) if text.isdigit() else text.lower()
+            alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+            path_sorter = lambda key: ([(key.count('/') <= idx, alphanum_key(x)) for idx, x in enumerate(key.split('/'))])
+
+            unknown = [x for x in i.arguments if not isinstance(x, StringNode)]
+            sources = [x for x in i.arguments if isinstance(x, StringNode)]
+            sources = sorted(sources, key=lambda x: path_sorter(x.value))
+            i.arguments = unknown + sources
+
+    def process(self, cmd):
+        if 'type' not in cmd:
+            raise RewriterException('Command has no key "type"')
+        if cmd['type'] not in self.functions:
+            raise RewriterException('Unknown command "{}". Supported commands are: {}'
+                                    .format(cmd['type'], list(self.functions.keys())))
+        self.functions[cmd['type']](cmd)
+
+    def apply_changes(self):
+        assert(all(hasattr(x, 'lineno') and hasattr(x, 'colno') and hasattr(x, 'filename') for x in self.modefied_nodes))
+        assert(all(hasattr(x, 'lineno') and hasattr(x, 'colno') and hasattr(x, 'filename') for x in self.to_remove_nodes))
+        assert(all(isinstance(x, (ArrayNode, FunctionNode)) for x in self.modefied_nodes))
+        assert(all(isinstance(x, (ArrayNode, AssignmentNode, FunctionNode)) for x in self.to_remove_nodes))
+        # Sort based on line and column in reversed order
+        work_nodes = [{'node': x, 'action': 'modify'} for x in self.modefied_nodes]
+        work_nodes += [{'node': x, 'action': 'rm'} for x in self.to_remove_nodes]
+        work_nodes = list(sorted(work_nodes, key=lambda x: (x['node'].lineno, x['node'].colno), reverse=True))
+        work_nodes += [{'node': x, 'action': 'add'} for x in self.to_add_nodes]
+
+        # Generating the new replacement string
+        str_list = []
+        for i in work_nodes:
+            new_data = ''
+            if i['action'] == 'modify' or i['action'] == 'add':
+                printer = AstPrinter()
+                i['node'].accept(printer)
+                printer.post_process()
+                new_data = printer.result.strip()
+            data = {
+                'file': i['node'].filename,
+                'str': new_data,
+                'node': i['node'],
+                'action': i['action']
+            }
+            str_list += [data]
+
+        # Load build files
+        files = {}
+        for i in str_list:
+            if i['file'] in files:
+                continue
+            fpath = os.path.realpath(os.path.join(self.sourcedir, i['file']))
+            fdata = ''
+            # Create an empty file if it does not exist
+            if not os.path.exists(fpath):
+                with open(fpath, 'w'):
+                    pass
+            with open(fpath, 'r') as fp:
+                fdata = fp.read()
+
+            # Generate line offsets numbers
+            m_lines = fdata.splitlines(True)
+            offset = 0
+            line_offsets = []
+            for j in m_lines:
+                line_offsets += [offset]
+                offset += len(j)
+
+            files[i['file']] = {
+                'path': fpath,
+                'raw': fdata,
+                'offsets': line_offsets
+            }
+
+        # Replace in source code
+        def remove_node(i):
+            offsets = files[i['file']]['offsets']
+            raw = files[i['file']]['raw']
+            node = i['node']
+            line = node.lineno - 1
+            col = node.colno
+            start = offsets[line] + col
+            end = start
+            if isinstance(node, (ArrayNode, FunctionNode)):
+                end = offsets[node.end_lineno - 1] + node.end_colno
+
+            # Only removal is supported for assignments
+            elif isinstance(node, AssignmentNode) and i['action'] == 'rm':
+                if isinstance(node.value, (ArrayNode, FunctionNode)):
+                    remove_node({'file': i['file'], 'str': '', 'node': node.value, 'action': 'rm'})
+                    raw = files[i['file']]['raw']
+                while raw[end] != '=':
+                    end += 1
+                end += 1 # Handle the '='
+                while raw[end] in [' ', '\n', '\t']:
+                    end += 1
+
+            files[i['file']]['raw'] = raw[:start] + i['str'] + raw[end:]
+
+        for i in str_list:
+            if i['action'] in ['modify', 'rm']:
+                remove_node(i)
+            elif i['action'] in ['add']:
+                files[i['file']]['raw'] += i['str'] + '\n'
+
+        # Write the files back
+        for key, val in files.items():
+            mlog.log('Rewriting', mlog.yellow(key))
+            with open(val['path'], 'w') as fp:
+                fp.write(val['raw'])
+
+target_operation_map = {
+    'add': 'src_add',
+    'rm': 'src_rm',
+    'add_target': 'target_add',
+    'rm_target': 'target_rm',
+    'info': 'info',
+}
+
+def list_to_dict(in_list: T.List[str]) -> T.Dict[str, str]:
+    result = {}
+    it = iter(in_list)
+    try:
+        for i in it:
+            # calling next(it) is not a mistake, we're taking the next element from
+            # the iterator, avoiding the need to preprocess it into a sequence of
+            # key value pairs.
+            result[i] = next(it)
+    except StopIteration:
+        raise TypeError('in_list parameter of list_to_dict must have an even length.')
+    return result
+
+def generate_target(options) -> T.List[dict]:
+    return [{
+        'type': 'target',
+        'target': options.target,
+        'operation': target_operation_map[options.operation],
+        'sources': options.sources,
+        'subdir': options.subdir,
+        'target_type': options.tgt_type,
+    }]
+
+def generate_kwargs(options) -> T.List[dict]:
+    return [{
+        'type': 'kwargs',
+        'function': options.function,
+        'id': options.id,
+        'operation': options.operation,
+        'kwargs': list_to_dict(options.kwargs),
+    }]
+
+def generate_def_opts(options) -> T.List[dict]:
+    return [{
+        'type': 'default_options',
+        'operation': options.operation,
+        'options': list_to_dict(options.options),
+    }]
+
+def genreate_cmd(options) -> T.List[dict]:
+    if os.path.exists(options.json):
+        with open(options.json, 'r') as fp:
+            return json.load(fp)
+    else:
+        return json.loads(options.json)
+
+# Map options.type to the actual type name
+cli_type_map = {
+    'target': generate_target,
+    'tgt': generate_target,
+    'kwargs': generate_kwargs,
+    'default-options': generate_def_opts,
+    'def': generate_def_opts,
+    'command': genreate_cmd,
+    'cmd': genreate_cmd,
+}
+
+def run(options):
+    if not options.verbose:
+        mlog.set_quiet()
+
+    try:
+        rewriter = Rewriter(options.sourcedir, skip_errors=options.skip)
+        rewriter.analyze_meson()
+
+        if options.type is None:
+            mlog.error('No command specified')
+            return 1
+
+        commands = cli_type_map[options.type](options)
+
+        if not isinstance(commands, list):
+            raise TypeError('Command is not a list')
+
+        for i in commands:
+            if not isinstance(i, object):
+                raise TypeError('Command is not an object')
+            rewriter.process(i)
+
+        rewriter.apply_changes()
+        rewriter.print_info()
+        return 0
+    except Exception as e:
+        raise e
+    finally:
+        mlog.set_verbose()
diff --git a/meson/mesonbuild/scripts/__init__.py b/meson/mesonbuild/scripts/__init__.py
new file mode 100644
index 000000000..af6bedc17
--- /dev/null
+++ b/meson/mesonbuild/scripts/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def destdir_join(d1, d2):
+    # c:\destdir + c:\prefix must produce c:\destdir\prefix
+    if len(d1) > 1 and d1[1] == ':' \
+            and len(d2) > 1 and d2[1] == ':':
+        return d1 + d2[2:]
+    return d1 + d2
diff --git a/meson/mesonbuild/scripts/clangformat.py b/meson/mesonbuild/scripts/clangformat.py
new file mode 100644
index 000000000..4b441de13
--- /dev/null
+++ b/meson/mesonbuild/scripts/clangformat.py
@@ -0,0 +1,45 @@
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+import subprocess
+from concurrent.futures import ThreadPoolExecutor
+
+from ..environment import detect_clangformat
+from ..compilers import lang_suffixes
+
+def clangformat(exelist, srcdir_name, builddir_name):
+    srcdir = pathlib.Path(srcdir_name)
+    suffixes = set(lang_suffixes['c']).union(set(lang_suffixes['cpp']))
+    suffixes.add('h')
+    futures = []
+    with ThreadPoolExecutor() as e:
+        for f in (x for suff in suffixes for x in srcdir.glob('**/*.' + suff)):
+            strf = str(f)
+            if strf.startswith(builddir_name):
+                continue
+            futures.append(e.submit(subprocess.check_call, exelist + ['-style=file', '-i', strf]))
+        [x.result() for x in futures]
+    return 0
+
+def run(args):
+    srcdir_name = args[0]
+    builddir_name = args[1]
+
+    exelist = detect_clangformat()
+    if not exelist:
+        print('Could not execute clang-format "%s"' % ' '.join(exelist))
+        return 1
+
+    return clangformat(exelist, srcdir_name, builddir_name)
diff --git a/meson/mesonbuild/scripts/clangtidy.py b/meson/mesonbuild/scripts/clangtidy.py
new file mode 100644
index 000000000..0452086ee
--- /dev/null
+++ b/meson/mesonbuild/scripts/clangtidy.py
@@ -0,0 +1,52 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+import subprocess
+import shutil
+from concurrent.futures import ThreadPoolExecutor
+
+from ..compilers import lang_suffixes
+
+def manual_clangformat(srcdir_name, builddir_name):
+    srcdir = pathlib.Path(srcdir_name)
+    suffixes = set(lang_suffixes['c']).union(set(lang_suffixes['cpp']))
+    suffixes.add('h')
+    futures = []
+    returncode = 0
+    with ThreadPoolExecutor() as e:
+        for f in (x for suff in suffixes for x in srcdir.glob('**/*.' + suff)):
+            strf = str(f)
+            if strf.startswith(builddir_name):
+                continue
+            futures.append(e.submit(subprocess.run, ['clang-tidy', '-p', builddir_name, strf]))
+        [max(returncode, x.result().returncode) for x in futures]
+    return returncode
+
+def clangformat(srcdir_name, builddir_name):
+    run_clang_tidy = None
+    for rct in ('run-clang-tidy', 'run-clang-tidy.py'):
+        if shutil.which(rct):
+            run_clang_tidy = rct
+            break
+    if run_clang_tidy:
+        return subprocess.run([run_clang_tidy, '-p', builddir_name]).returncode
+    else:
+        print('Could not find run-clang-tidy, running checks manually.')
+        manual_clangformat(srcdir_name, builddir_name)
+
+def run(args):
+    srcdir_name = args[0]
+    builddir_name = args[1]
+    return clangformat(srcdir_name, builddir_name)
diff --git a/meson/mesonbuild/scripts/cleantrees.py b/meson/mesonbuild/scripts/cleantrees.py
new file mode 100644
index 000000000..0af8dd001
--- /dev/null
+++ b/meson/mesonbuild/scripts/cleantrees.py
@@ -0,0 +1,43 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import shutil
+import pickle
+
+def rmtrees(build_dir, trees):
+    for t in trees:
+        # Never delete trees outside of the builddir
+        if os.path.isabs(t):
+            print('Cannot delete dir with absolute path {!r}'.format(t))
+            continue
+        bt = os.path.join(build_dir, t)
+        # Skip if it doesn't exist, or if it is not a directory
+        if os.path.isdir(bt):
+            shutil.rmtree(bt, ignore_errors=True)
+
+def run(args):
+    if len(args) != 1:
+        print('Cleaner script for Meson. Do not run on your own please.')
+        print('cleantrees.py ')
+        return 1
+    with open(args[0], 'rb') as f:
+        data = pickle.load(f)
+    rmtrees(data.build_dir, data.trees)
+    # Never fail cleaning
+    return 0
+
+if __name__ == '__main__':
+    run(sys.argv[1:])
diff --git a/meson/mesonbuild/scripts/cmake_run_ctgt.py b/meson/mesonbuild/scripts/cmake_run_ctgt.py
new file mode 100755
index 000000000..5c0b31f6f
--- /dev/null
+++ b/meson/mesonbuild/scripts/cmake_run_ctgt.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+import shutil
+import os
+import sys
+from pathlib import Path
+
+def run(argsv):
+    commands = [[]]
+    SEPARATOR = ';;;'
+
+    # Generate CMD parameters
+    parser = argparse.ArgumentParser(description='Wrapper for add_custom_command')
+    parser.add_argument('-d', '--directory', type=str, metavar='D', required=True, help='Working directory to cwd to')
+    parser.add_argument('-o', '--outputs', nargs='+', metavar='O', required=True, help='Expected output files')
+    parser.add_argument('-O', '--original-outputs', nargs='*', metavar='O', default=[], help='Output files expected by CMake')
+    parser.add_argument('commands', nargs=argparse.REMAINDER, help='A "{}" seperated list of commands'.format(SEPARATOR))
+
+    # Parse
+    args = parser.parse_args(argsv)
+
+    dummy_target = None
+    if len(args.outputs) == 1 and len(args.original_outputs) == 0:
+        dummy_target = args.outputs[0]
+    elif len(args.outputs) != len(args.original_outputs):
+        print('Length of output list and original output list differ')
+        sys.exit(1)
+
+    for i in args.commands:
+        if i == SEPARATOR:
+            commands += [[]]
+            continue
+
+        i = i.replace('"', '')  # Remove lefover quotes
+        commands[-1] += [i]
+
+    # Execute
+    for i in commands:
+        # Skip empty lists
+        if not i:
+            continue
+
+        cmd = []
+        stdout = None
+        stderr = None
+        capture_file = ''
+
+        for j in i:
+            if j in ['>', '>>']:
+                stdout = subprocess.PIPE
+                continue
+            elif j in ['&>', '&>>']:
+                stdout = subprocess.PIPE
+                stderr = subprocess.STDOUT
+                continue
+
+            if stdout is not None or stderr is not None:
+                capture_file += j
+            else:
+                cmd += [j]
+
+        try:
+            os.makedirs(args.directory, exist_ok=True)
+
+            res = subprocess.run(cmd, stdout=stdout, stderr=stderr, cwd=args.directory, check=True)
+            if capture_file:
+                out_file = Path(args.directory) / capture_file
+                out_file.write_bytes(res.stdout)
+        except subprocess.CalledProcessError:
+            sys.exit(1)
+
+    if dummy_target:
+        with open(dummy_target, 'a'):
+            os.utime(dummy_target, None)
+        sys.exit(0)
+
+    # Copy outputs
+    zipped_outputs = zip(args.outputs, args.original_outputs)
+    for expected, generated in zipped_outputs:
+        do_copy = False
+        if not os.path.exists(expected):
+            if not os.path.exists(generated):
+                print('Unable to find generated file. This can cause the build to fail:')
+                print(generated)
+                do_copy = False
+            else:
+                do_copy = True
+        elif os.path.exists(generated):
+            if os.path.getmtime(generated) > os.path.getmtime(expected):
+                do_copy = True
+
+        if do_copy:
+            if os.path.exists(expected):
+                os.remove(expected)
+            shutil.copyfile(generated, expected)
+
+if __name__ == '__main__':
+    sys.run(sys.argv[1:])
diff --git a/meson/mesonbuild/scripts/commandrunner.py b/meson/mesonbuild/scripts/commandrunner.py
new file mode 100644
index 000000000..22da4178d
--- /dev/null
+++ b/meson/mesonbuild/scripts/commandrunner.py
@@ -0,0 +1,83 @@
+# Copyright 2014 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This program is a wrapper to run external commands. It determines
+what to run, sets up the environment and executes the command."""
+
+import sys, os, subprocess, shutil, shlex
+import re
+
+def run_command(source_dir, build_dir, subdir, meson_command, command, arguments):
+    env = {'MESON_SOURCE_ROOT': source_dir,
+           'MESON_BUILD_ROOT': build_dir,
+           'MESON_SUBDIR': subdir,
+           'MESONINTROSPECT': ' '.join([shlex.quote(x) for x in meson_command + ['introspect']]),
+           }
+    cwd = os.path.join(source_dir, subdir)
+    child_env = os.environ.copy()
+    child_env.update(env)
+
+    # Is the command an executable in path?
+    exe = shutil.which(command)
+    if exe is not None:
+        command_array = [exe] + arguments
+    else:# No? Maybe it is a script in the source tree.
+        fullpath = os.path.join(source_dir, subdir, command)
+        command_array = [fullpath] + arguments
+    try:
+        return subprocess.Popen(command_array, env=child_env, cwd=cwd)
+    except FileNotFoundError:
+        print('Could not execute command "%s". File not found.' % command)
+        sys.exit(1)
+    except PermissionError:
+        print('Could not execute command "%s". File not executable.' % command)
+        sys.exit(1)
+    except OSError as err:
+        print('Could not execute command "{}": {}'.format(command, err))
+        sys.exit(1)
+    except subprocess.SubprocessError as err:
+        print('Could not execute command "{}": {}'.format(command, err))
+        sys.exit(1)
+
+def is_python_command(cmdname):
+    end_py_regex = r'python(3|3\.\d+)?(\.exe)?$'
+    return re.search(end_py_regex, cmdname) is not None
+
+def run(args):
+    if len(args) < 4:
+        print('commandrunner.py     [arguments]')
+        return 1
+    src_dir = args[0]
+    build_dir = args[1]
+    subdir = args[2]
+    meson_command = args[3]
+    if is_python_command(meson_command):
+        meson_command = [meson_command, args[4]]
+        command = args[5]
+        arguments = args[6:]
+    else:
+        meson_command = [meson_command]
+        command = args[4]
+        arguments = args[5:]
+    pc = run_command(src_dir, build_dir, subdir, meson_command, command, arguments)
+    while True:
+        try:
+            pc.wait()
+            break
+        except KeyboardInterrupt:
+            pass
+    return pc.returncode
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/coverage.py b/meson/mesonbuild/scripts/coverage.py
new file mode 100644
index 000000000..72319724d
--- /dev/null
+++ b/meson/mesonbuild/scripts/coverage.py
@@ -0,0 +1,172 @@
+# Copyright 2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from mesonbuild import environment, mesonlib
+
+import argparse, sys, os, subprocess, pathlib, stat
+
+def coverage(outputs, source_root, subproject_root, build_root, log_dir, use_llvm_cov):
+    outfiles = []
+    exitcode = 0
+
+    (gcovr_exe, gcovr_new_rootdir, lcov_exe, genhtml_exe, llvm_cov_exe) = environment.find_coverage_tools()
+
+    # gcovr >= 4.2 requires a different syntax for out of source builds
+    if gcovr_new_rootdir:
+        gcovr_base_cmd = [gcovr_exe, '-r', source_root, build_root]
+    else:
+        gcovr_base_cmd = [gcovr_exe, '-r', build_root]
+
+    if use_llvm_cov:
+        gcov_exe_args = ['--gcov-executable', llvm_cov_exe + ' gcov']
+    else:
+        gcov_exe_args = []
+
+    if not outputs or 'xml' in outputs:
+        if gcovr_exe:
+            subprocess.check_call(gcovr_base_cmd +
+                                  ['-x',
+                                   '-e', subproject_root,
+                                   '-o', os.path.join(log_dir, 'coverage.xml')
+                                   ] + gcov_exe_args)
+            outfiles.append(('Xml', pathlib.Path(log_dir, 'coverage.xml')))
+        elif outputs:
+            print('gcovr >= 3.3 needed to generate Xml coverage report')
+            exitcode = 1
+
+    if not outputs or 'text' in outputs:
+        if gcovr_exe:
+            subprocess.check_call(gcovr_base_cmd +
+                                  ['-e', subproject_root,
+                                   '-o', os.path.join(log_dir, 'coverage.txt')
+                                   ] + gcov_exe_args)
+            outfiles.append(('Text', pathlib.Path(log_dir, 'coverage.txt')))
+        elif outputs:
+            print('gcovr >= 3.3 needed to generate text coverage report')
+            exitcode = 1
+
+    if not outputs or 'html' in outputs:
+        if lcov_exe and genhtml_exe:
+            htmloutdir = os.path.join(log_dir, 'coveragereport')
+            covinfo = os.path.join(log_dir, 'coverage.info')
+            initial_tracefile = covinfo + '.initial'
+            run_tracefile = covinfo + '.run'
+            raw_tracefile = covinfo + '.raw'
+            if use_llvm_cov:
+                # Create a shim to allow using llvm-cov as a gcov tool.
+                if mesonlib.is_windows():
+                    llvm_cov_shim_path = os.path.join(log_dir, 'llvm-cov.bat')
+                    with open(llvm_cov_shim_path, 'w') as llvm_cov_bat:
+                        llvm_cov_bat.write('@"{}" gcov %*'.format(llvm_cov_exe))
+                else:
+                    llvm_cov_shim_path = os.path.join(log_dir, 'llvm-cov.sh')
+                    with open(llvm_cov_shim_path, 'w') as llvm_cov_sh:
+                        llvm_cov_sh.write('#!/usr/bin/env sh\nexec "{}" gcov $@'.format(llvm_cov_exe))
+                    os.chmod(llvm_cov_shim_path, os.stat(llvm_cov_shim_path).st_mode | stat.S_IEXEC)
+                gcov_tool_args = ['--gcov-tool', llvm_cov_shim_path]
+            else:
+                gcov_tool_args = []
+            subprocess.check_call([lcov_exe,
+                                   '--directory', build_root,
+                                   '--capture',
+                                   '--initial',
+                                   '--output-file',
+                                   initial_tracefile] +
+                                  gcov_tool_args)
+            subprocess.check_call([lcov_exe,
+                                   '--directory', build_root,
+                                   '--capture',
+                                   '--output-file', run_tracefile,
+                                   '--no-checksum',
+                                   '--rc', 'lcov_branch_coverage=1'] +
+                                  gcov_tool_args)
+            # Join initial and test results.
+            subprocess.check_call([lcov_exe,
+                                   '-a', initial_tracefile,
+                                   '-a', run_tracefile,
+                                   '--rc', 'lcov_branch_coverage=1',
+                                   '-o', raw_tracefile])
+            # Remove all directories outside the source_root from the covinfo
+            subprocess.check_call([lcov_exe,
+                                   '--extract', raw_tracefile,
+                                   os.path.join(source_root, '*'),
+                                   '--rc', 'lcov_branch_coverage=1',
+                                   '--output-file', covinfo])
+            # Remove all directories inside subproject dir
+            subprocess.check_call([lcov_exe,
+                                   '--remove', covinfo,
+                                   os.path.join(subproject_root, '*'),
+                                   '--rc', 'lcov_branch_coverage=1',
+                                   '--output-file', covinfo])
+            subprocess.check_call([genhtml_exe,
+                                   '--prefix', build_root,
+                                   '--prefix', source_root,
+                                   '--output-directory', htmloutdir,
+                                   '--title', 'Code coverage',
+                                   '--legend',
+                                   '--show-details',
+                                   '--branch-coverage',
+                                   covinfo])
+            outfiles.append(('Html', pathlib.Path(htmloutdir, 'index.html')))
+        elif gcovr_exe:
+            htmloutdir = os.path.join(log_dir, 'coveragereport')
+            if not os.path.isdir(htmloutdir):
+                os.mkdir(htmloutdir)
+            subprocess.check_call(gcovr_base_cmd +
+                                  ['--html',
+                                   '--html-details',
+                                   '--print-summary',
+                                   '-e', subproject_root,
+                                   '-o', os.path.join(htmloutdir, 'index.html'),
+                                   ])
+            outfiles.append(('Html', pathlib.Path(htmloutdir, 'index.html')))
+        elif outputs:
+            print('lcov/genhtml or gcovr >= 3.3 needed to generate Html coverage report')
+            exitcode = 1
+
+    if not outputs and not outfiles:
+        print('Need gcovr or lcov/genhtml to generate any coverage reports')
+        exitcode = 1
+
+    if outfiles:
+        print('')
+        for (filetype, path) in outfiles:
+            print(filetype + ' coverage report can be found at', path.as_uri())
+
+    return exitcode
+
+def run(args):
+    if not os.path.isfile('build.ninja'):
+        print('Coverage currently only works with the Ninja backend.')
+        return 1
+    parser = argparse.ArgumentParser(description='Generate coverage reports')
+    parser.add_argument('--text', dest='outputs', action='append_const',
+                        const='text', help='generate Text report')
+    parser.add_argument('--xml', dest='outputs', action='append_const',
+                        const='xml', help='generate Xml report')
+    parser.add_argument('--html', dest='outputs', action='append_const',
+                        const='html', help='generate Html report')
+    parser.add_argument('--use_llvm_cov', action='store_true',
+                        help='use llvm-cov')
+    parser.add_argument('source_root')
+    parser.add_argument('subproject_root')
+    parser.add_argument('build_root')
+    parser.add_argument('log_dir')
+    options = parser.parse_args(args)
+    return coverage(options.outputs, options.source_root,
+                    options.subproject_root, options.build_root,
+                    options.log_dir, options.use_llvm_cov)
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/delwithsuffix.py b/meson/mesonbuild/scripts/delwithsuffix.py
new file mode 100644
index 000000000..0d410ae16
--- /dev/null
+++ b/meson/mesonbuild/scripts/delwithsuffix.py
@@ -0,0 +1,35 @@
+# Copyright 2013 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, sys
+
+def run(args):
+    if len(args) != 2:
+        print('delwithsuffix.py  ')
+        sys.exit(1)
+
+    topdir = args[0]
+    suffix = args[1]
+    if suffix[0] != '.':
+        suffix = '.' + suffix
+
+    for (root, _, files) in os.walk(topdir):
+        for f in files:
+            if f.endswith(suffix):
+                fullname = os.path.join(root, f)
+                os.unlink(fullname)
+    return 0
+
+if __name__ == '__main__':
+    run(sys.argv[1:])
diff --git a/meson/mesonbuild/scripts/depfixer.py b/meson/mesonbuild/scripts/depfixer.py
new file mode 100644
index 000000000..f92769303
--- /dev/null
+++ b/meson/mesonbuild/scripts/depfixer.py
@@ -0,0 +1,478 @@
+# Copyright 2013-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys, struct
+import shutil, subprocess
+
+from ..mesonlib import OrderedSet
+
+SHT_STRTAB = 3
+DT_NEEDED = 1
+DT_RPATH = 15
+DT_RUNPATH = 29
+DT_STRTAB = 5
+DT_SONAME = 14
+DT_MIPS_RLD_MAP_REL = 1879048245
+
+# Global cache for tools
+INSTALL_NAME_TOOL = False
+
+class DataSizes:
+    def __init__(self, ptrsize, is_le):
+        if is_le:
+            p = '<'
+        else:
+            p = '>'
+        self.Half = p + 'h'
+        self.HalfSize = 2
+        self.Word = p + 'I'
+        self.WordSize = 4
+        self.Sword = p + 'i'
+        self.SwordSize = 4
+        if ptrsize == 64:
+            self.Addr = p + 'Q'
+            self.AddrSize = 8
+            self.Off = p + 'Q'
+            self.OffSize = 8
+            self.XWord = p + 'Q'
+            self.XWordSize = 8
+            self.Sxword = p + 'q'
+            self.SxwordSize = 8
+        else:
+            self.Addr = p + 'I'
+            self.AddrSize = 4
+            self.Off = p + 'I'
+            self.OffSize = 4
+
+class DynamicEntry(DataSizes):
+    def __init__(self, ifile, ptrsize, is_le):
+        super().__init__(ptrsize, is_le)
+        self.ptrsize = ptrsize
+        if ptrsize == 64:
+            self.d_tag = struct.unpack(self.Sxword, ifile.read(self.SxwordSize))[0]
+            self.val = struct.unpack(self.XWord, ifile.read(self.XWordSize))[0]
+        else:
+            self.d_tag = struct.unpack(self.Sword, ifile.read(self.SwordSize))[0]
+            self.val = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+
+    def write(self, ofile):
+        if self.ptrsize == 64:
+            ofile.write(struct.pack(self.Sxword, self.d_tag))
+            ofile.write(struct.pack(self.XWord, self.val))
+        else:
+            ofile.write(struct.pack(self.Sword, self.d_tag))
+            ofile.write(struct.pack(self.Word, self.val))
+
+class SectionHeader(DataSizes):
+    def __init__(self, ifile, ptrsize, is_le):
+        super().__init__(ptrsize, is_le)
+        if ptrsize == 64:
+            is_64 = True
+        else:
+            is_64 = False
+# Elf64_Word
+        self.sh_name = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Word
+        self.sh_type = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Xword
+        if is_64:
+            self.sh_flags = struct.unpack(self.XWord, ifile.read(self.XWordSize))[0]
+        else:
+            self.sh_flags = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Addr
+        self.sh_addr = struct.unpack(self.Addr, ifile.read(self.AddrSize))[0]
+# Elf64_Off
+        self.sh_offset = struct.unpack(self.Off, ifile.read(self.OffSize))[0]
+# Elf64_Xword
+        if is_64:
+            self.sh_size = struct.unpack(self.XWord, ifile.read(self.XWordSize))[0]
+        else:
+            self.sh_size = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Word
+        self.sh_link = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Word
+        self.sh_info = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Xword
+        if is_64:
+            self.sh_addralign = struct.unpack(self.XWord, ifile.read(self.XWordSize))[0]
+        else:
+            self.sh_addralign = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+# Elf64_Xword
+        if is_64:
+            self.sh_entsize = struct.unpack(self.XWord, ifile.read(self.XWordSize))[0]
+        else:
+            self.sh_entsize = struct.unpack(self.Word, ifile.read(self.WordSize))[0]
+
+class Elf(DataSizes):
+    def __init__(self, bfile, verbose=True):
+        self.bfile = bfile
+        self.verbose = verbose
+        self.bf = open(bfile, 'r+b')
+        try:
+            (self.ptrsize, self.is_le) = self.detect_elf_type()
+            super().__init__(self.ptrsize, self.is_le)
+            self.parse_header()
+            self.parse_sections()
+            self.parse_dynamic()
+        except (struct.error, RuntimeError):
+            self.bf.close()
+            raise
+
+    def __enter__(self):
+        return self
+
+    def __del__(self):
+        if self.bf:
+            self.bf.close()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.bf.close()
+        self.bf = None
+
+    def detect_elf_type(self):
+        data = self.bf.read(6)
+        if data[1:4] != b'ELF':
+            # This script gets called to non-elf targets too
+            # so just ignore them.
+            if self.verbose:
+                print('File "%s" is not an ELF file.' % self.bfile)
+            sys.exit(0)
+        if data[4] == 1:
+            ptrsize = 32
+        elif data[4] == 2:
+            ptrsize = 64
+        else:
+            sys.exit('File "%s" has unknown ELF class.' % self.bfile)
+        if data[5] == 1:
+            is_le = True
+        elif data[5] == 2:
+            is_le = False
+        else:
+            sys.exit('File "%s" has unknown ELF endianness.' % self.bfile)
+        return ptrsize, is_le
+
+    def parse_header(self):
+        self.bf.seek(0)
+        self.e_ident = struct.unpack('16s', self.bf.read(16))[0]
+        self.e_type = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_machine = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_version = struct.unpack(self.Word, self.bf.read(self.WordSize))[0]
+        self.e_entry = struct.unpack(self.Addr, self.bf.read(self.AddrSize))[0]
+        self.e_phoff = struct.unpack(self.Off, self.bf.read(self.OffSize))[0]
+        self.e_shoff = struct.unpack(self.Off, self.bf.read(self.OffSize))[0]
+        self.e_flags = struct.unpack(self.Word, self.bf.read(self.WordSize))[0]
+        self.e_ehsize = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_phentsize = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_phnum = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_shentsize = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_shnum = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+        self.e_shstrndx = struct.unpack(self.Half, self.bf.read(self.HalfSize))[0]
+
+    def parse_sections(self):
+        self.bf.seek(self.e_shoff)
+        self.sections = []
+        for _ in range(self.e_shnum):
+            self.sections.append(SectionHeader(self.bf, self.ptrsize, self.is_le))
+
+    def read_str(self):
+        arr = []
+        x = self.bf.read(1)
+        while x != b'\0':
+            arr.append(x)
+            x = self.bf.read(1)
+            if x == b'':
+                raise RuntimeError('Tried to read past the end of the file')
+        return b''.join(arr)
+
+    def find_section(self, target_name):
+        section_names = self.sections[self.e_shstrndx]
+        for i in self.sections:
+            self.bf.seek(section_names.sh_offset + i.sh_name)
+            name = self.read_str()
+            if name == target_name:
+                return i
+
+    def parse_dynamic(self):
+        sec = self.find_section(b'.dynamic')
+        self.dynamic = []
+        if sec is None:
+            return
+        self.bf.seek(sec.sh_offset)
+        while True:
+            e = DynamicEntry(self.bf, self.ptrsize, self.is_le)
+            self.dynamic.append(e)
+            if e.d_tag == 0:
+                break
+
+    def print_section_names(self):
+        section_names = self.sections[self.e_shstrndx]
+        for i in self.sections:
+            self.bf.seek(section_names.sh_offset + i.sh_name)
+            name = self.read_str()
+            print(name.decode())
+
+    def print_soname(self):
+        soname = None
+        strtab = None
+        for i in self.dynamic:
+            if i.d_tag == DT_SONAME:
+                soname = i
+            if i.d_tag == DT_STRTAB:
+                strtab = i
+        if soname is None or strtab is None:
+            print("This file does not have a soname")
+            return
+        self.bf.seek(strtab.val + soname.val)
+        print(self.read_str())
+
+    def get_entry_offset(self, entrynum):
+        sec = self.find_section(b'.dynstr')
+        for i in self.dynamic:
+            if i.d_tag == entrynum:
+                return sec.sh_offset + i.val
+        return None
+
+    def print_rpath(self):
+        offset = self.get_entry_offset(DT_RPATH)
+        if offset is None:
+            print("This file does not have an rpath.")
+        else:
+            self.bf.seek(offset)
+            print(self.read_str())
+
+    def print_runpath(self):
+        offset = self.get_entry_offset(DT_RUNPATH)
+        if offset is None:
+            print("This file does not have a runpath.")
+        else:
+            self.bf.seek(offset)
+            print(self.read_str())
+
+    def print_deps(self):
+        sec = self.find_section(b'.dynstr')
+        deps = []
+        for i in self.dynamic:
+            if i.d_tag == DT_NEEDED:
+                deps.append(i)
+        for i in deps:
+            offset = sec.sh_offset + i.val
+            self.bf.seek(offset)
+            name = self.read_str()
+            print(name)
+
+    def fix_deps(self, prefix):
+        sec = self.find_section(b'.dynstr')
+        deps = []
+        for i in self.dynamic:
+            if i.d_tag == DT_NEEDED:
+                deps.append(i)
+        for i in deps:
+            offset = sec.sh_offset + i.val
+            self.bf.seek(offset)
+            name = self.read_str()
+            if name.startswith(prefix):
+                basename = name.split(b'/')[-1]
+                padding = b'\0' * (len(name) - len(basename))
+                newname = basename + padding
+                assert(len(newname) == len(name))
+                self.bf.seek(offset)
+                self.bf.write(newname)
+
+    def fix_rpath(self, rpath_dirs_to_remove, new_rpath):
+        # The path to search for can be either rpath or runpath.
+        # Fix both of them to be sure.
+        self.fix_rpathtype_entry(rpath_dirs_to_remove, new_rpath, DT_RPATH)
+        self.fix_rpathtype_entry(rpath_dirs_to_remove, new_rpath, DT_RUNPATH)
+
+    def fix_rpathtype_entry(self, rpath_dirs_to_remove, new_rpath, entrynum):
+        if isinstance(new_rpath, str):
+            new_rpath = new_rpath.encode('utf8')
+        rp_off = self.get_entry_offset(entrynum)
+        if rp_off is None:
+            if self.verbose:
+                print('File does not have rpath. It should be a fully static executable.')
+            return
+        self.bf.seek(rp_off)
+
+        old_rpath = self.read_str()
+        # Some rpath entries may come from multiple sources.
+        # Only add each one once.
+        new_rpaths = OrderedSet()
+        if new_rpath:
+            new_rpaths.add(new_rpath)
+        if old_rpath:
+            # Filter out build-only rpath entries
+            # added by get_link_dep_subdirs() or
+            # specified by user with build_rpath.
+            for rpath_dir in old_rpath.split(b':'):
+                if not (rpath_dir in rpath_dirs_to_remove or
+                        rpath_dir == (b'X' * len(rpath_dir))):
+                    if rpath_dir:
+                        new_rpaths.add(rpath_dir)
+
+        # Prepend user-specified new entries while preserving the ones that came from pkgconfig etc.
+        new_rpath = b':'.join(new_rpaths)
+
+        if len(old_rpath) < len(new_rpath):
+            msg = "New rpath must not be longer than the old one.\n Old: {}\n New: {}".format(old_rpath, new_rpath)
+            sys.exit(msg)
+        # The linker does read-only string deduplication. If there is a
+        # string that shares a suffix with the rpath, they might get
+        # dedupped. This means changing the rpath string might break something
+        # completely unrelated. This has already happened once with X.org.
+        # Thus we want to keep this change as small as possible to minimize
+        # the chance of obliterating other strings. It might still happen
+        # but our behavior is identical to what chrpath does and it has
+        # been in use for ages so based on that this should be rare.
+        if not new_rpath:
+            self.remove_rpath_entry(entrynum)
+        else:
+            self.bf.seek(rp_off)
+            self.bf.write(new_rpath)
+            self.bf.write(b'\0')
+
+    def remove_rpath_entry(self, entrynum):
+        sec = self.find_section(b'.dynamic')
+        if sec is None:
+            return None
+        for (i, entry) in enumerate(self.dynamic):
+            if entry.d_tag == entrynum:
+                rpentry = self.dynamic[i]
+                rpentry.d_tag = 0
+                self.dynamic = self.dynamic[:i] + self.dynamic[i + 1:] + [rpentry]
+                break
+        # DT_MIPS_RLD_MAP_REL is relative to the offset of the tag. Adjust it consequently.
+        for entry in self.dynamic[i:]:
+            if entry.d_tag == DT_MIPS_RLD_MAP_REL:
+                entry.val += 2 * (self.ptrsize // 8)
+                break
+        self.bf.seek(sec.sh_offset)
+        for entry in self.dynamic:
+            entry.write(self.bf)
+        return None
+
+def fix_elf(fname, rpath_dirs_to_remove, new_rpath, verbose=True):
+    with Elf(fname, verbose) as e:
+        if new_rpath is None:
+            e.print_rpath()
+            e.print_runpath()
+        else:
+            e.fix_rpath(rpath_dirs_to_remove, new_rpath)
+
+def get_darwin_rpaths_to_remove(fname):
+    out = subprocess.check_output(['otool', '-l', fname],
+                                  universal_newlines=True,
+                                  stderr=subprocess.DEVNULL)
+    result = []
+    current_cmd = 'FOOBAR'
+    for line in out.split('\n'):
+        line = line.strip()
+        if ' ' not in line:
+            continue
+        key, value = line.strip().split(' ', 1)
+        if key == 'cmd':
+            current_cmd = value
+        if key == 'path' and current_cmd == 'LC_RPATH':
+            rp = value.split('(', 1)[0].strip()
+            result.append(rp)
+    return result
+
+def fix_darwin(fname, new_rpath, final_path, install_name_mappings):
+    try:
+        rpaths = get_darwin_rpaths_to_remove(fname)
+    except subprocess.CalledProcessError:
+        # Otool failed, which happens when invoked on a
+        # non-executable target. Just return.
+        return
+    try:
+        args = []
+        if rpaths:
+            # TODO: fix this properly, not totally clear how
+            #
+            # removing rpaths from binaries on macOS has tons of
+            # weird edge cases. For instance, if the user provided
+            # a '-Wl,-rpath' argument in LDFLAGS that happens to
+            # coincide with an rpath generated from a dependency,
+            # this would cause installation failures, as meson would
+            # generate install_name_tool calls with two identical
+            # '-delete_rpath' arguments, which install_name_tool
+            # fails on. Because meson itself ensures that it never
+            # adds duplicate rpaths, duplicate rpaths necessarily
+            # come from user variables. The idea of using OrderedSet
+            # is to remove *at most one* duplicate RPATH entry. This
+            # is not optimal, as it only respects the user's choice
+            # partially: if they provided a non-duplicate '-Wl,-rpath'
+            # argument, it gets removed, if they provided a duplicate
+            # one, it remains in the final binary. A potentially optimal
+            # solution would split all user '-Wl,-rpath' arguments from
+            # LDFLAGS, and later add them back with '-add_rpath'.
+            for rp in OrderedSet(rpaths):
+                args += ['-delete_rpath', rp]
+            subprocess.check_call(['install_name_tool', fname] + args,
+                                  stdout=subprocess.DEVNULL,
+                                  stderr=subprocess.DEVNULL)
+        args = []
+        if new_rpath:
+            args += ['-add_rpath', new_rpath]
+        # Rewrite -install_name @rpath/libfoo.dylib to /path/to/libfoo.dylib
+        if fname.endswith('dylib'):
+            args += ['-id', final_path]
+        if install_name_mappings:
+            for old, new in install_name_mappings.items():
+                args += ['-change', old, new]
+        if args:
+            subprocess.check_call(['install_name_tool', fname] + args,
+                                  stdout=subprocess.DEVNULL,
+                                  stderr=subprocess.DEVNULL)
+    except Exception as err:
+        raise SystemExit(err)
+
+def fix_jar(fname):
+    subprocess.check_call(['jar', 'xfv', fname, 'META-INF/MANIFEST.MF'])
+    with open('META-INF/MANIFEST.MF', 'r+') as f:
+        lines = f.readlines()
+        f.seek(0)
+        for line in lines:
+            if not line.startswith('Class-Path:'):
+                f.write(line)
+        f.truncate()
+    subprocess.check_call(['jar', 'ufm', fname, 'META-INF/MANIFEST.MF'])
+
+def fix_rpath(fname, rpath_dirs_to_remove, new_rpath, final_path, install_name_mappings, verbose=True):
+    global INSTALL_NAME_TOOL
+    # Static libraries, import libraries, debug information, headers, etc
+    # never have rpaths
+    # DLLs and EXE currently do not need runtime path fixing
+    if fname.endswith(('.a', '.lib', '.pdb', '.h', '.hpp', '.dll', '.exe')):
+        return
+    try:
+        if fname.endswith('.jar'):
+            fix_jar(fname)
+            return
+        fix_elf(fname, rpath_dirs_to_remove, new_rpath, verbose)
+        return
+    except SystemExit as e:
+        if isinstance(e.code, int) and e.code == 0:
+            pass
+        else:
+            raise
+    # We don't look for this on import because it will do a useless PATH lookup
+    # on non-mac platforms. That can be expensive on some Windows machines
+    # (upto 30ms), which is significant with --only-changed. For details, see:
+    # https://github.com/mesonbuild/meson/pull/6612#discussion_r378581401
+    if INSTALL_NAME_TOOL is False:
+        INSTALL_NAME_TOOL = shutil.which('install_name_tool')
+    if INSTALL_NAME_TOOL:
+        fix_darwin(fname, new_rpath, final_path, install_name_mappings)
diff --git a/meson/mesonbuild/scripts/dirchanger.py b/meson/mesonbuild/scripts/dirchanger.py
new file mode 100644
index 000000000..3d7f4e24e
--- /dev/null
+++ b/meson/mesonbuild/scripts/dirchanger.py
@@ -0,0 +1,28 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''CD into dir given as first argument and execute
+the command given in the rest of the arguments.'''
+
+import os, subprocess, sys
+
+def run(args):
+    dirname = args[0]
+    command = args[1:]
+
+    os.chdir(dirname)
+    return subprocess.call(command)
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/gettext.py b/meson/mesonbuild/scripts/gettext.py
new file mode 100644
index 000000000..f5c04214c
--- /dev/null
+++ b/meson/mesonbuild/scripts/gettext.py
@@ -0,0 +1,123 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import shutil
+import argparse
+import subprocess
+from . import destdir_join
+
+parser = argparse.ArgumentParser()
+parser.add_argument('command')
+parser.add_argument('--pkgname', default='')
+parser.add_argument('--datadirs', default='')
+parser.add_argument('--langs', default='')
+parser.add_argument('--localedir', default='')
+parser.add_argument('--subdir', default='')
+parser.add_argument('--extra-args', default='')
+
+def read_linguas(src_sub):
+    # Syntax of this file is documented here:
+    # https://www.gnu.org/software/gettext/manual/html_node/po_002fLINGUAS.html
+    linguas = os.path.join(src_sub, 'LINGUAS')
+    try:
+        langs = []
+        with open(linguas) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    langs += line.split()
+        return langs
+    except (FileNotFoundError, PermissionError):
+        print('Could not find file LINGUAS in {}'.format(src_sub))
+        return []
+
+def run_potgen(src_sub, pkgname, datadirs, args):
+    listfile = os.path.join(src_sub, 'POTFILES.in')
+    if not os.path.exists(listfile):
+        listfile = os.path.join(src_sub, 'POTFILES')
+        if not os.path.exists(listfile):
+            print('Could not find file POTFILES in %s' % src_sub)
+            return 1
+
+    child_env = os.environ.copy()
+    if datadirs:
+        child_env['GETTEXTDATADIRS'] = datadirs
+
+    ofile = os.path.join(src_sub, pkgname + '.pot')
+    return subprocess.call(['xgettext', '--package-name=' + pkgname, '-p', src_sub, '-f', listfile,
+                            '-D', os.environ['MESON_SOURCE_ROOT'], '-k_', '-o', ofile] + args,
+                           env=child_env)
+
+def gen_gmo(src_sub, bld_sub, langs):
+    for l in langs:
+        subprocess.check_call(['msgfmt', os.path.join(src_sub, l + '.po'),
+                               '-o', os.path.join(bld_sub, l + '.gmo')])
+    return 0
+
+def update_po(src_sub, pkgname, langs):
+    potfile = os.path.join(src_sub, pkgname + '.pot')
+    for l in langs:
+        pofile = os.path.join(src_sub, l + '.po')
+        if os.path.exists(pofile):
+            subprocess.check_call(['msgmerge', '-q', '-o', pofile, pofile, potfile])
+        else:
+            subprocess.check_call(['msginit', '--input', potfile, '--output-file', pofile, '--locale', l, '--no-translator'])
+    return 0
+
+def do_install(src_sub, bld_sub, dest, pkgname, langs):
+    for l in langs:
+        srcfile = os.path.join(bld_sub, l + '.gmo')
+        outfile = os.path.join(dest, l, 'LC_MESSAGES',
+                               pkgname + '.mo')
+        tempfile = outfile + '.tmp'
+        os.makedirs(os.path.dirname(outfile), exist_ok=True)
+        shutil.copyfile(srcfile, tempfile)
+        shutil.copystat(srcfile, tempfile)
+        os.replace(tempfile, outfile)
+        print('Installing %s to %s' % (srcfile, outfile))
+    return 0
+
+def run(args):
+    options = parser.parse_args(args)
+    subcmd = options.command
+    langs = options.langs.split('@@') if options.langs else None
+    extra_args = options.extra_args.split('@@') if options.extra_args else []
+    subdir = os.environ.get('MESON_SUBDIR', '')
+    if options.subdir:
+        subdir = options.subdir
+    src_sub = os.path.join(os.environ['MESON_SOURCE_ROOT'], subdir)
+    bld_sub = os.path.join(os.environ['MESON_BUILD_ROOT'], subdir)
+
+    if not langs:
+        langs = read_linguas(src_sub)
+
+    if subcmd == 'pot':
+        return run_potgen(src_sub, options.pkgname, options.datadirs, extra_args)
+    elif subcmd == 'gen_gmo':
+        return gen_gmo(src_sub, bld_sub, langs)
+    elif subcmd == 'update_po':
+        if run_potgen(src_sub, options.pkgname, options.datadirs, extra_args) != 0:
+            return 1
+        return update_po(src_sub, options.pkgname, langs)
+    elif subcmd == 'install':
+        destdir = os.environ.get('DESTDIR', '')
+        dest = destdir_join(destdir, os.path.join(os.environ['MESON_INSTALL_PREFIX'],
+                                                  options.localedir))
+        if gen_gmo(src_sub, bld_sub, langs) != 0:
+            return 1
+        do_install(src_sub, bld_sub, dest, options.pkgname, langs)
+    else:
+        print('Unknown subcommand.')
+        return 1
diff --git a/meson/mesonbuild/scripts/gtkdochelper.py b/meson/mesonbuild/scripts/gtkdochelper.py
new file mode 100644
index 000000000..812604af5
--- /dev/null
+++ b/meson/mesonbuild/scripts/gtkdochelper.py
@@ -0,0 +1,294 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys, os
+import subprocess
+import shutil
+import argparse
+from ..mesonlib import MesonException, Popen_safe, is_windows, is_cygwin, split_args
+from . import destdir_join
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--sourcedir', dest='sourcedir')
+parser.add_argument('--builddir', dest='builddir')
+parser.add_argument('--subdir', dest='subdir')
+parser.add_argument('--headerdirs', dest='headerdirs')
+parser.add_argument('--mainfile', dest='mainfile')
+parser.add_argument('--modulename', dest='modulename')
+parser.add_argument('--moduleversion', dest='moduleversion')
+parser.add_argument('--htmlargs', dest='htmlargs', default='')
+parser.add_argument('--scanargs', dest='scanargs', default='')
+parser.add_argument('--scanobjsargs', dest='scanobjsargs', default='')
+parser.add_argument('--gobjects-types-file', dest='gobject_typesfile', default='')
+parser.add_argument('--fixxrefargs', dest='fixxrefargs', default='')
+parser.add_argument('--mkdbargs', dest='mkdbargs', default='')
+parser.add_argument('--ld', dest='ld', default='')
+parser.add_argument('--cc', dest='cc', default='')
+parser.add_argument('--ldflags', dest='ldflags', default='')
+parser.add_argument('--cflags', dest='cflags', default='')
+parser.add_argument('--content-files', dest='content_files', default='')
+parser.add_argument('--expand-content-files', dest='expand_content_files', default='')
+parser.add_argument('--html-assets', dest='html_assets', default='')
+parser.add_argument('--ignore-headers', dest='ignore_headers', default='')
+parser.add_argument('--namespace', dest='namespace', default='')
+parser.add_argument('--mode', dest='mode', default='')
+parser.add_argument('--installdir', dest='install_dir')
+parser.add_argument('--run', dest='run', default='')
+for tool in ['scan', 'scangobj', 'mkdb', 'mkhtml', 'fixxref']:
+    program_name = 'gtkdoc-' + tool
+    parser.add_argument('--' + program_name, dest=program_name.replace('-', '_'))
+
+def gtkdoc_run_check(cmd, cwd, library_paths=None):
+    if library_paths is None:
+        library_paths = []
+
+    env = dict(os.environ)
+    if is_windows() or is_cygwin():
+        if 'PATH' in env:
+            library_paths.extend(env['PATH'].split(os.pathsep))
+        env['PATH'] = os.pathsep.join(library_paths)
+    else:
+        if 'LD_LIBRARY_PATH' in env:
+            library_paths.extend(env['LD_LIBRARY_PATH'].split(os.pathsep))
+        env['LD_LIBRARY_PATH'] = os.pathsep.join(library_paths)
+
+    if is_windows():
+        cmd.insert(0, sys.executable)
+
+    # Put stderr into stdout since we want to print it out anyway.
+    # This preserves the order of messages.
+    p, out = Popen_safe(cmd, cwd=cwd, env=env, stderr=subprocess.STDOUT)[0:2]
+    if p.returncode != 0:
+        err_msg = ["{!r} failed with status {:d}".format(cmd, p.returncode)]
+        if out:
+            err_msg.append(out)
+        raise MesonException('\n'.join(err_msg))
+    elif out:
+        # Unfortunately Windows cmd.exe consoles may be using a codepage
+        # that might choke print() with a UnicodeEncodeError, so let's
+        # ignore such errors for now, as a compromise as we are outputting
+        # console output here...
+        try:
+            print(out)
+        except UnicodeEncodeError:
+            pass
+
+def build_gtkdoc(source_root, build_root, doc_subdir, src_subdirs,
+                 main_file, module, module_version,
+                 html_args, scan_args, fixxref_args, mkdb_args,
+                 gobject_typesfile, scanobjs_args, run, ld, cc, ldflags, cflags,
+                 html_assets, content_files, ignore_headers, namespace,
+                 expand_content_files, mode, options):
+    print("Building documentation for %s" % module)
+
+    src_dir_args = []
+    for src_dir in src_subdirs:
+        if not os.path.isabs(src_dir):
+            dirs = [os.path.join(source_root, src_dir),
+                    os.path.join(build_root, src_dir)]
+        else:
+            dirs = [src_dir]
+        src_dir_args += ['--source-dir=' + d for d in dirs]
+
+    doc_src = os.path.join(source_root, doc_subdir)
+    abs_out = os.path.join(build_root, doc_subdir)
+    htmldir = os.path.join(abs_out, 'html')
+
+    content_files += [main_file]
+    sections = os.path.join(doc_src, module + "-sections.txt")
+    if os.path.exists(sections):
+        content_files.append(sections)
+
+    overrides = os.path.join(doc_src, module + "-overrides.txt")
+    if os.path.exists(overrides):
+        content_files.append(overrides)
+
+    # Copy files to build directory
+    for f in content_files:
+        # FIXME: Use mesonlib.File objects so we don't need to do this
+        if not os.path.isabs(f):
+            f = os.path.join(doc_src, f)
+        elif os.path.commonpath([f, build_root]) == build_root:
+            continue
+        shutil.copyfile(f, os.path.join(abs_out, os.path.basename(f)))
+
+    shutil.rmtree(htmldir, ignore_errors=True)
+    try:
+        os.mkdir(htmldir)
+    except Exception:
+        pass
+
+    for f in html_assets:
+        f_abs = os.path.join(doc_src, f)
+        shutil.copyfile(f_abs, os.path.join(htmldir, os.path.basename(f_abs)))
+
+    scan_cmd = [options.gtkdoc_scan, '--module=' + module] + src_dir_args
+    if ignore_headers:
+        scan_cmd.append('--ignore-headers=' + ' '.join(ignore_headers))
+    # Add user-specified arguments
+    scan_cmd += scan_args
+    gtkdoc_run_check(scan_cmd, abs_out)
+
+    # Use the generated types file when available, otherwise gobject_typesfile
+    # would often be a path to source dir instead of build dir.
+    if '--rebuild-types' in scan_args:
+        gobject_typesfile = os.path.join(abs_out, module + '.types')
+
+    if gobject_typesfile:
+        scanobjs_cmd = [options.gtkdoc_scangobj] + scanobjs_args
+        scanobjs_cmd += ['--types=' + gobject_typesfile,
+                         '--module=' + module,
+                         '--run=' + run,
+                         '--cflags=' + cflags,
+                         '--ldflags=' + ldflags,
+                         '--cc=' + cc,
+                         '--ld=' + ld,
+                         '--output-dir=' + abs_out]
+
+        library_paths = []
+        for ldflag in split_args(ldflags):
+            if ldflag.startswith('-Wl,-rpath,'):
+                library_paths.append(ldflag[11:])
+
+        gtkdoc_run_check(scanobjs_cmd, build_root, library_paths)
+
+    # Make docbook files
+    if mode == 'auto':
+        # Guessing is probably a poor idea but these keeps compat
+        # with previous behavior
+        if main_file.endswith('sgml'):
+            modeflag = '--sgml-mode'
+        else:
+            modeflag = '--xml-mode'
+    elif mode == 'xml':
+        modeflag = '--xml-mode'
+    elif mode == 'sgml':
+        modeflag = '--sgml-mode'
+    else: # none
+        modeflag = None
+
+    mkdb_cmd = [options.gtkdoc_mkdb,
+                '--module=' + module,
+                '--output-format=xml',
+                '--expand-content-files=' + ' '.join(expand_content_files),
+                ] + src_dir_args
+    if namespace:
+        mkdb_cmd.append('--name-space=' + namespace)
+    if modeflag:
+        mkdb_cmd.append(modeflag)
+    if main_file:
+        # Yes, this is the flag even if the file is in xml.
+        mkdb_cmd.append('--main-sgml-file=' + main_file)
+    # Add user-specified arguments
+    mkdb_cmd += mkdb_args
+    gtkdoc_run_check(mkdb_cmd, abs_out)
+
+    # Make HTML documentation
+    mkhtml_cmd = [options.gtkdoc_mkhtml,
+                  '--path=' + ':'.join((doc_src, abs_out)),
+                  module,
+                  ] + html_args
+    if main_file:
+        mkhtml_cmd.append('../' + main_file)
+    else:
+        mkhtml_cmd.append('%s-docs.xml' % module)
+    # html gen must be run in the HTML dir
+    gtkdoc_run_check(mkhtml_cmd, htmldir)
+
+    # Fix cross-references in HTML files
+    fixref_cmd = [options.gtkdoc_fixxref,
+                  '--module=' + module,
+                  '--module-dir=html'] + fixxref_args
+    gtkdoc_run_check(fixref_cmd, abs_out)
+
+    if module_version:
+        shutil.move(os.path.join(htmldir, '{}.devhelp2'.format(module)),
+                    os.path.join(htmldir, '{}-{}.devhelp2'.format(module, module_version)))
+
+def install_gtkdoc(build_root, doc_subdir, install_prefix, datadir, module):
+    source = os.path.join(build_root, doc_subdir, 'html')
+    final_destination = os.path.join(install_prefix, datadir, module)
+    shutil.rmtree(final_destination, ignore_errors=True)
+    shutil.copytree(source, final_destination)
+
+def run(args):
+    options = parser.parse_args(args)
+    if options.htmlargs:
+        htmlargs = options.htmlargs.split('@@')
+    else:
+        htmlargs = []
+    if options.scanargs:
+        scanargs = options.scanargs.split('@@')
+    else:
+        scanargs = []
+    if options.scanobjsargs:
+        scanobjsargs = options.scanobjsargs.split('@@')
+    else:
+        scanobjsargs = []
+    if options.fixxrefargs:
+        fixxrefargs = options.fixxrefargs.split('@@')
+    else:
+        fixxrefargs = []
+    if options.mkdbargs:
+        mkdbargs = options.mkdbargs.split('@@')
+    else:
+        mkdbargs = []
+    build_gtkdoc(
+        options.sourcedir,
+        options.builddir,
+        options.subdir,
+        options.headerdirs.split('@@'),
+        options.mainfile,
+        options.modulename,
+        options.moduleversion,
+        htmlargs,
+        scanargs,
+        fixxrefargs,
+        mkdbargs,
+        options.gobject_typesfile,
+        scanobjsargs,
+        options.run,
+        options.ld,
+        options.cc,
+        options.ldflags,
+        options.cflags,
+        options.html_assets.split('@@') if options.html_assets else [],
+        options.content_files.split('@@') if options.content_files else [],
+        options.ignore_headers.split('@@') if options.ignore_headers else [],
+        options.namespace,
+        options.expand_content_files.split('@@') if options.expand_content_files else [],
+        options.mode,
+        options)
+
+    if 'MESON_INSTALL_PREFIX' in os.environ:
+        destdir = os.environ.get('DESTDIR', '')
+        install_prefix = destdir_join(destdir, os.environ['MESON_INSTALL_PREFIX'])
+        if options.install_dir:
+            install_dir = options.install_dir
+        else:
+            install_dir = options.modulename
+            if options.moduleversion:
+                install_dir += '-' + options.moduleversion
+        if os.path.isabs(install_dir):
+            install_dir = destdir_join(destdir, install_dir)
+        install_gtkdoc(options.builddir,
+                       options.subdir,
+                       install_prefix,
+                       'share/gtk-doc/html',
+                       install_dir)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/hotdochelper.py b/meson/mesonbuild/scripts/hotdochelper.py
new file mode 100644
index 000000000..826745d09
--- /dev/null
+++ b/meson/mesonbuild/scripts/hotdochelper.py
@@ -0,0 +1,36 @@
+import os
+import shutil
+import subprocess
+
+from . import destdir_join
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--install')
+parser.add_argument('--extra-extension-path', action="append", default=[])
+parser.add_argument('--name')
+parser.add_argument('--builddir')
+parser.add_argument('--project-version')
+
+
+def run(argv):
+    options, args = parser.parse_known_args(argv)
+    subenv = os.environ.copy()
+
+    for ext_path in options.extra_extension_path:
+        subenv['PYTHONPATH'] = subenv.get('PYTHONPATH', '') + ':' + ext_path
+
+    res = subprocess.call(args, cwd=options.builddir, env=subenv)
+    if res != 0:
+        exit(res)
+
+    if options.install:
+        source_dir = os.path.join(options.builddir, options.install)
+        destdir = os.environ.get('DESTDIR', '')
+        installdir = destdir_join(destdir,
+                                  os.path.join(os.environ['MESON_INSTALL_PREFIX'],
+                                               'share/doc/', options.name, "html"))
+
+        shutil.rmtree(installdir, ignore_errors=True)
+        shutil.copytree(source_dir, installdir)
diff --git a/meson/mesonbuild/scripts/meson_exe.py b/meson/mesonbuild/scripts/meson_exe.py
new file mode 100644
index 000000000..a5acb228d
--- /dev/null
+++ b/meson/mesonbuild/scripts/meson_exe.py
@@ -0,0 +1,107 @@
+# Copyright 2013-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import argparse
+import pickle
+import platform
+import subprocess
+
+from .. import mesonlib
+from ..backend.backends import ExecutableSerialisation
+
+options = None
+
+def buildparser():
+    parser = argparse.ArgumentParser(description='Custom executable wrapper for Meson. Do not run on your own, mmm\'kay?')
+    parser.add_argument('--unpickle')
+    parser.add_argument('--capture')
+    return parser
+
+def is_windows():
+    platname = platform.system().lower()
+    return platname == 'windows' or 'mingw' in platname
+
+def is_cygwin():
+    platname = platform.system().lower()
+    return 'cygwin' in platname
+
+def run_exe(exe):
+    if exe.exe_runner:
+        if not exe.exe_runner.found():
+            raise AssertionError('BUG: Can\'t run cross-compiled exe {!r} with not-found '
+                                 'wrapper {!r}'.format(exe.cmd_args[0], exe.exe_runner.get_path()))
+        cmd_args = exe.exe_runner.get_command() + exe.cmd_args
+    else:
+        cmd_args = exe.cmd_args
+    child_env = os.environ.copy()
+    child_env.update(exe.env)
+    if exe.extra_paths:
+        child_env['PATH'] = (os.pathsep.join(exe.extra_paths + ['']) +
+                             child_env['PATH'])
+        if exe.exe_runner and mesonlib.substring_is_in_list('wine', exe.exe_runner.get_command()):
+            child_env['WINEPATH'] = mesonlib.get_wine_shortpath(
+                exe.exe_runner.get_command(),
+                ['Z:' + p for p in exe.extra_paths] + child_env.get('WINEPATH', '').split(';')
+            )
+
+    p = subprocess.Popen(cmd_args, env=child_env, cwd=exe.workdir,
+                         close_fds=False,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+
+    if p.returncode == 0xc0000135:
+        # STATUS_DLL_NOT_FOUND on Windows indicating a common problem that is otherwise hard to diagnose
+        raise FileNotFoundError('Missing DLLs on calling {!r}'.format(exe.name))
+
+    if exe.capture and p.returncode == 0:
+        skip_write = False
+        try:
+            with open(exe.capture, 'rb') as cur:
+                skip_write = cur.read() == stdout
+        except IOError:
+            pass
+        if not skip_write:
+            with open(exe.capture, 'wb') as output:
+                output.write(stdout)
+    else:
+        sys.stdout.buffer.write(stdout)
+    if stderr:
+        sys.stderr.buffer.write(stderr)
+    return p.returncode
+
+def run(args):
+    global options
+    parser = buildparser()
+    options, cmd_args = parser.parse_known_args(args)
+    # argparse supports double dash to separate options and positional arguments,
+    # but the user has to remove it manually.
+    if cmd_args and cmd_args[0] == '--':
+        cmd_args = cmd_args[1:]
+    if not options.unpickle and not cmd_args:
+        parser.error('either --unpickle or executable and arguments are required')
+    if options.unpickle:
+        if cmd_args or options.capture:
+            parser.error('no other arguments can be used with --unpickle')
+        with open(options.unpickle, 'rb') as f:
+            exe = pickle.load(f)
+    else:
+        exe = ExecutableSerialisation(cmd_args, capture=options.capture)
+
+    return run_exe(exe)
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/msgfmthelper.py b/meson/mesonbuild/scripts/msgfmthelper.py
new file mode 100644
index 000000000..737f1bca6
--- /dev/null
+++ b/meson/mesonbuild/scripts/msgfmthelper.py
@@ -0,0 +1,36 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import subprocess
+import os
+
+parser = argparse.ArgumentParser()
+parser.add_argument('input')
+parser.add_argument('output')
+parser.add_argument('type')
+parser.add_argument('podir')
+parser.add_argument('--datadirs', default='')
+parser.add_argument('args', default=[], metavar='extra msgfmt argument', nargs='*')
+
+
+def run(args):
+    options = parser.parse_args(args)
+    env = None
+    if options.datadirs:
+        env = os.environ.copy()
+        env.update({'GETTEXTDATADIRS': options.datadirs})
+    return subprocess.call(['msgfmt', '--' + options.type, '-d', options.podir,
+                            '--template', options.input,  '-o', options.output] + options.args,
+                           env=env)
diff --git a/meson/mesonbuild/scripts/regen_checker.py b/meson/mesonbuild/scripts/regen_checker.py
new file mode 100644
index 000000000..80d9242b4
--- /dev/null
+++ b/meson/mesonbuild/scripts/regen_checker.py
@@ -0,0 +1,57 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys, os
+import pickle, subprocess
+
+# This could also be used for XCode.
+
+def need_regen(regeninfo, regen_timestamp):
+    for i in regeninfo.depfiles:
+        curfile = os.path.join(regeninfo.build_dir, i)
+        curtime = os.stat(curfile).st_mtime
+        if curtime > regen_timestamp:
+            return True
+    # The timestamp file gets automatically deleted by MSBuild during a 'Clean' build.
+    # We must make sure to recreate it, even if we do not regenerate the solution.
+    # Otherwise, Visual Studio will always consider the REGEN project out of date.
+    print("Everything is up-to-date, regeneration of build files is not needed.")
+    from ..backend.vs2010backend import Vs2010Backend
+    Vs2010Backend.touch_regen_timestamp(regeninfo.build_dir)
+    return False
+
+def regen(regeninfo, meson_command, backend):
+    cmd = meson_command + ['--internal',
+                           'regenerate',
+                           regeninfo.build_dir,
+                           regeninfo.source_dir,
+                           '--backend=' + backend]
+    subprocess.check_call(cmd)
+
+def run(args):
+    private_dir = args[0]
+    dumpfile = os.path.join(private_dir, 'regeninfo.dump')
+    coredata = os.path.join(private_dir, 'coredata.dat')
+    with open(dumpfile, 'rb') as f:
+        regeninfo = pickle.load(f)
+    with open(coredata, 'rb') as f:
+        coredata = pickle.load(f)
+    backend = coredata.get_builtin_option('backend')
+    regen_timestamp = os.stat(dumpfile).st_mtime
+    if need_regen(regeninfo, regen_timestamp):
+        regen(regeninfo, coredata.meson_command, backend)
+    sys.exit(0)
+
+if __name__ == '__main__':
+    run(sys.argv[1:])
diff --git a/meson/mesonbuild/scripts/scanbuild.py b/meson/mesonbuild/scripts/scanbuild.py
new file mode 100644
index 000000000..0190067e2
--- /dev/null
+++ b/meson/mesonbuild/scripts/scanbuild.py
@@ -0,0 +1,45 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import shutil
+import tempfile
+from ..environment import detect_ninja, detect_scanbuild
+
+
+def scanbuild(exelist, srcdir, blddir, privdir, logdir, args):
+    with tempfile.TemporaryDirectory(dir=privdir) as scandir:
+        meson_cmd = exelist + args
+        build_cmd = exelist + ['-o', logdir] + detect_ninja() + ['-C', scandir]
+        rc = subprocess.call(meson_cmd + [srcdir, scandir])
+        if rc != 0:
+            return rc
+        return subprocess.call(build_cmd)
+
+
+def run(args):
+    srcdir = args[0]
+    blddir = args[1]
+    meson_cmd = args[2:]
+    privdir = os.path.join(blddir, 'meson-private')
+    logdir = os.path.join(blddir, 'meson-logs/scanbuild')
+    shutil.rmtree(logdir, ignore_errors=True)
+
+    exelist = detect_scanbuild()
+    if not exelist:
+        print('Could not execute scan-build "%s"' % ' '.join(exelist))
+        return 1
+
+    return scanbuild(exelist, srcdir, blddir, privdir, logdir, meson_cmd)
diff --git a/meson/mesonbuild/scripts/symbolextractor.py b/meson/mesonbuild/scripts/symbolextractor.py
new file mode 100644
index 000000000..d36acf6e2
--- /dev/null
+++ b/meson/mesonbuild/scripts/symbolextractor.py
@@ -0,0 +1,311 @@
+# Copyright 2013-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script extracts the symbols of a given shared library
+# into a file. If the symbols have not changed, the file is not
+# touched. This information is used to skip link steps if the
+# ABI has not changed.
+
+# This file is basically a reimplementation of
+# http://cgit.freedesktop.org/libreoffice/core/commit/?id=3213cd54b76bc80a6f0516aac75a48ff3b2ad67c
+
+import typing as T
+import os, sys
+from .. import mesonlib
+from .. import mlog
+from ..mesonlib import Popen_safe
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--cross-host', default=None, dest='cross_host',
+                    help='cross compilation host platform')
+parser.add_argument('args', nargs='+')
+
+TOOL_WARNING_FILE = None
+RELINKING_WARNING = 'Relinking will always happen on source changes.'
+
+def dummy_syms(outfilename: str):
+    """Just touch it so relinking happens always."""
+    with open(outfilename, 'w'):
+        pass
+
+def write_if_changed(text: str, outfilename: str):
+    try:
+        with open(outfilename, 'r') as f:
+            oldtext = f.read()
+        if text == oldtext:
+            return
+    except FileNotFoundError:
+        pass
+    with open(outfilename, 'w') as f:
+        f.write(text)
+
+def print_tool_warning(tool: list, msg: str, stderr: str = None):
+    global TOOL_WARNING_FILE
+    if os.path.exists(TOOL_WARNING_FILE):
+        return
+    if len(tool) == 1:
+        tool = tool[0]
+    m = '{!r} {}. {}'.format(tool, msg, RELINKING_WARNING)
+    if stderr:
+        m += '\n' + stderr
+    mlog.warning(m)
+    # Write it out so we don't warn again
+    with open(TOOL_WARNING_FILE, 'w'):
+        pass
+
+def get_tool(name: str) -> T.List[str]:
+    evar = name.upper()
+    if evar in os.environ:
+        import shlex
+        return shlex.split(os.environ[evar])
+    return [name]
+
+def call_tool(name: str, args: T.List[str], **kwargs) -> str:
+    tool = get_tool(name)
+    try:
+        p, output, e = Popen_safe(tool + args, **kwargs)
+    except FileNotFoundError:
+        print_tool_warning(tool, 'not found')
+        return None
+    except PermissionError:
+        print_tool_warning(tool, 'not usable')
+        return None
+    if p.returncode != 0:
+        print_tool_warning(tool, 'does not work', e)
+        return None
+    return output
+
+def call_tool_nowarn(tool: T.List[str], **kwargs) -> T.Tuple[str, str]:
+    try:
+        p, output, e = Popen_safe(tool, **kwargs)
+    except FileNotFoundError:
+        return None, '{!r} not found\n'.format(tool[0])
+    except PermissionError:
+        return None, '{!r} not usable\n'.format(tool[0])
+    if p.returncode != 0:
+        return None, e
+    return output, None
+
+def gnu_syms(libfilename: str, outfilename: str):
+    # Get the name of the library
+    output = call_tool('readelf', ['-d', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    result = [x for x in output.split('\n') if 'SONAME' in x]
+    assert(len(result) <= 1)
+    # Get a list of all symbols exported
+    output = call_tool('nm', ['--dynamic', '--extern-only', '--defined-only',
+                              '--format=posix', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    for line in output.split('\n'):
+        if not line:
+            continue
+        line_split = line.split()
+        entry = line_split[0:2]
+        # Store the size of symbols pointing to data objects so we relink
+        # when those change, which is needed because of copy relocations
+        # https://github.com/mesonbuild/meson/pull/7132#issuecomment-628353702
+        if line_split[1].upper() in ('B', 'G', 'D') and len(line_split) >= 4:
+            entry += [line_split[3]]
+        result += [' '.join(entry)]
+    write_if_changed('\n'.join(result) + '\n', outfilename)
+
+def solaris_syms(libfilename: str, outfilename: str):
+    # gnu_syms() works with GNU nm & readelf, not Solaris nm & elfdump
+    origpath = os.environ['PATH']
+    os.environ['PATH'] = '/usr/gnu/bin:' + origpath
+    gnu_syms(libfilename, outfilename)
+    os.environ['PATH'] = origpath
+
+def osx_syms(libfilename: str, outfilename: str):
+    # Get the name of the library
+    output = call_tool('otool', ['-l', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    arr = output.split('\n')
+    for (i, val) in enumerate(arr):
+        if 'LC_ID_DYLIB' in val:
+            match = i
+            break
+    result = [arr[match + 2], arr[match + 5]] # Libreoffice stores all 5 lines but the others seem irrelevant.
+    # Get a list of all symbols exported
+    output = call_tool('nm', ['--extern-only', '--defined-only',
+                              '--format=posix', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    result += [' '.join(x.split()[0:2]) for x in output.split('\n')]
+    write_if_changed('\n'.join(result) + '\n', outfilename)
+
+def openbsd_syms(libfilename: str, outfilename: str):
+    # Get the name of the library
+    output = call_tool('readelf', ['-d', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    result = [x for x in output.split('\n') if 'SONAME' in x]
+    assert(len(result) <= 1)
+    # Get a list of all symbols exported
+    output = call_tool('nm', ['-D', '-P', '-g', libfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    # U = undefined (cope with the lack of --defined-only option)
+    result += [' '.join(x.split()[0:2]) for x in output.split('\n') if x and not x.endswith('U ')]
+    write_if_changed('\n'.join(result) + '\n', outfilename)
+
+def cygwin_syms(impfilename: str, outfilename: str):
+    # Get the name of the library
+    output = call_tool('dlltool', ['-I', impfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    result = [output]
+    # Get the list of all symbols exported
+    output = call_tool('nm', ['--extern-only', '--defined-only',
+                              '--format=posix', impfilename])
+    if not output:
+        dummy_syms(outfilename)
+        return
+    for line in output.split('\n'):
+        if ' T ' not in line:
+            continue
+        result.append(line.split(maxsplit=1)[0])
+    write_if_changed('\n'.join(result) + '\n', outfilename)
+
+def _get_implib_dllname(impfilename: str) -> T.Tuple[T.List[str], str]:
+    all_stderr = ''
+    # First try lib.exe, which is provided by MSVC. Then llvm-lib.exe, by LLVM
+    # for clang-cl.
+    #
+    # We cannot call get_tool on `lib` because it will look at the `LIB` env
+    # var which is the list of library paths MSVC will search for import
+    # libraries while linking.
+    for lib in (['lib'], get_tool('llvm-lib')):
+        output, e = call_tool_nowarn(lib + ['-list', impfilename])
+        if output:
+            # The output is a list of DLLs that each symbol exported by the import
+            # library is available in. We only build import libraries that point to
+            # a single DLL, so we can pick any of these. Pick the last one for
+            # simplicity. Also skip the last line, which is empty.
+            return output.split('\n')[-2:-1], None
+        all_stderr += e
+    # Next, try dlltool.exe which is provided by MinGW
+    output, e = call_tool_nowarn(get_tool('dlltool') + ['-I', impfilename])
+    if output:
+        return [output], None
+    all_stderr += e
+    return ([], all_stderr)
+
+def _get_implib_exports(impfilename: str) -> T.Tuple[T.List[str], str]:
+    all_stderr = ''
+    # Force dumpbin.exe to use en-US so we can parse its output
+    env = os.environ.copy()
+    env['VSLANG'] = '1033'
+    output, e = call_tool_nowarn(get_tool('dumpbin') + ['-exports', impfilename], env=env)
+    if output:
+        lines = output.split('\n')
+        start = lines.index('File Type: LIBRARY')
+        end = lines.index('  Summary')
+        return lines[start:end], None
+    all_stderr += e
+    # Next, try llvm-nm.exe provided by LLVM, then nm.exe provided by MinGW
+    for nm in ('llvm-nm', 'nm'):
+        output, e = call_tool_nowarn(get_tool(nm) + ['--extern-only', '--defined-only',
+                                                     '--format=posix', impfilename])
+        if output:
+            result = []
+            for line in output.split('\n'):
+                if ' T ' not in line or line.startswith('.text'):
+                    continue
+                result.append(line.split(maxsplit=1)[0])
+            return result, None
+        all_stderr += e
+    return ([], all_stderr)
+
+def windows_syms(impfilename: str, outfilename: str):
+    # Get the name of the library
+    result, e = _get_implib_dllname(impfilename)
+    if not result:
+        print_tool_warning('lib, llvm-lib, dlltool', 'do not work or were not found', e)
+        dummy_syms(outfilename)
+        return
+    # Get a list of all symbols exported
+    symbols, e = _get_implib_exports(impfilename)
+    if not symbols:
+        print_tool_warning('dumpbin, llvm-nm, nm', 'do not work or were not found', e)
+        dummy_syms(outfilename)
+        return
+    result += symbols
+    write_if_changed('\n'.join(result) + '\n', outfilename)
+
+def gen_symbols(libfilename: str, impfilename: str, outfilename: str, cross_host: str):
+    if cross_host is not None:
+        # In case of cross builds just always relink. In theory we could
+        # determine the correct toolset, but we would need to use the correct
+        # `nm`, `readelf`, etc, from the cross info which requires refactoring.
+        dummy_syms(outfilename)
+    elif mesonlib.is_linux() or mesonlib.is_hurd():
+        gnu_syms(libfilename, outfilename)
+    elif mesonlib.is_osx():
+        osx_syms(libfilename, outfilename)
+    elif mesonlib.is_openbsd():
+        openbsd_syms(libfilename, outfilename)
+    elif mesonlib.is_windows():
+        if os.path.isfile(impfilename):
+            windows_syms(impfilename, outfilename)
+        else:
+            # No import library. Not sure how the DLL is being used, so just
+            # rebuild everything that links to it every time.
+            dummy_syms(outfilename)
+    elif mesonlib.is_cygwin():
+        if os.path.isfile(impfilename):
+            cygwin_syms(impfilename, outfilename)
+        else:
+            # No import library. Not sure how the DLL is being used, so just
+            # rebuild everything that links to it every time.
+            dummy_syms(outfilename)
+    elif mesonlib.is_sunos():
+        solaris_syms(libfilename, outfilename)
+    else:
+        if not os.path.exists(TOOL_WARNING_FILE):
+            mlog.warning('Symbol extracting has not been implemented for this '
+                         'platform. ' + RELINKING_WARNING)
+            # Write it out so we don't warn again
+            with open(TOOL_WARNING_FILE, 'w'):
+                pass
+        dummy_syms(outfilename)
+
+def run(args):
+    global TOOL_WARNING_FILE
+    options = parser.parse_args(args)
+    if len(options.args) != 4:
+        print('symbolextractor.py   ')
+        sys.exit(1)
+    privdir = os.path.join(options.args[0], 'meson-private')
+    TOOL_WARNING_FILE = os.path.join(privdir, 'symbolextractor_tool_warning_printed')
+    libfile = options.args[1]
+    impfile = options.args[2] # Only used on Windows
+    outfile = options.args[3]
+    gen_symbols(libfile, impfile, outfile, options.cross_host)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/tags.py b/meson/mesonbuild/scripts/tags.py
new file mode 100644
index 000000000..431bb5fd3
--- /dev/null
+++ b/meson/mesonbuild/scripts/tags.py
@@ -0,0 +1,51 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+from pathlib import Path
+
+
+def ls_as_bytestream():
+    if os.path.exists('.git'):
+        return subprocess.run(['git', 'ls-tree', '-r', '--name-only', 'HEAD'],
+                              stdout=subprocess.PIPE).stdout
+
+    files = [str(p) for p in Path('.').glob('**/*')
+             if not p.is_dir() and
+             not next((x for x in p.parts if x.startswith('.')), None)]
+    return '\n'.join(files).encode()
+
+
+def cscope():
+    ls = b'\n'.join([b'"%s"' % f for f in ls_as_bytestream().split()])
+    return subprocess.run(['cscope', '-v', '-b', '-i-'], input=ls).returncode
+
+
+def ctags():
+    ls = ls_as_bytestream()
+    return subprocess.run(['ctags', '-L-'], input=ls).returncode
+
+
+def etags():
+    ls = ls_as_bytestream()
+    return subprocess.run(['etags', '-'], input=ls).returncode
+
+
+def run(args):
+    tool_name = args[0]
+    srcdir_name = args[1]
+    os.chdir(srcdir_name)
+    assert tool_name in ['cscope', 'ctags', 'etags']
+    return globals()[tool_name]()
diff --git a/meson/mesonbuild/scripts/uninstall.py b/meson/mesonbuild/scripts/uninstall.py
new file mode 100644
index 000000000..bdc036b85
--- /dev/null
+++ b/meson/mesonbuild/scripts/uninstall.py
@@ -0,0 +1,49 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+logfile = 'meson-logs/install-log.txt'
+
+def do_uninstall(log):
+    failures = 0
+    successes = 0
+    for line in open(log):
+        if line.startswith('#'):
+            continue
+        fname = line.strip()
+        try:
+            if os.path.isdir(fname) and not os.path.islink(fname):
+                os.rmdir(fname)
+            else:
+                os.unlink(fname)
+            print('Deleted:', fname)
+            successes += 1
+        except Exception as e:
+            print('Could not delete %s: %s.' % (fname, e))
+            failures += 1
+    print('\nUninstall finished.\n')
+    print('Deleted:', successes)
+    print('Failed:', failures)
+    print('\nRemember that files created by custom scripts have not been removed.')
+
+def run(args):
+    if args:
+        print('Weird error.')
+        return 1
+    if not os.path.exists(logfile):
+        print('Log file does not exist, no installation has been done.')
+        return 0
+    do_uninstall(logfile)
+    return 0
diff --git a/meson/mesonbuild/scripts/vcstagger.py b/meson/mesonbuild/scripts/vcstagger.py
new file mode 100644
index 000000000..16dd4d1e5
--- /dev/null
+++ b/meson/mesonbuild/scripts/vcstagger.py
@@ -0,0 +1,44 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys, os, subprocess, re
+
+
+def config_vcs_tag(infile, outfile, fallback, source_dir, replace_string, regex_selector, cmd):
+    try:
+        output = subprocess.check_output(cmd, cwd=source_dir)
+        new_string = re.search(regex_selector, output.decode()).group(1).strip()
+    except Exception:
+        new_string = fallback
+
+    with open(infile, encoding='utf8') as f:
+        new_data = f.read().replace(replace_string, new_string)
+    if os.path.exists(outfile):
+        with open(outfile, encoding='utf8') as f:
+            needs_update = (f.read() != new_data)
+    else:
+        needs_update = True
+    if needs_update:
+        with open(outfile, 'w', encoding='utf8') as f:
+            f.write(new_data)
+
+
+def run(args):
+    infile, outfile, fallback, source_dir, replace_string, regex_selector = args[0:6]
+    command = args[6:]
+    config_vcs_tag(infile, outfile, fallback, source_dir, replace_string, regex_selector, command)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(run(sys.argv[1:]))
diff --git a/meson/mesonbuild/scripts/yelphelper.py b/meson/mesonbuild/scripts/yelphelper.py
new file mode 100644
index 000000000..95c8c9c70
--- /dev/null
+++ b/meson/mesonbuild/scripts/yelphelper.py
@@ -0,0 +1,132 @@
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import shutil
+import argparse
+from .. import mlog
+from ..mesonlib import has_path_sep
+from . import destdir_join
+from .gettext import read_linguas
+
+parser = argparse.ArgumentParser()
+parser.add_argument('command')
+parser.add_argument('--id', dest='project_id')
+parser.add_argument('--subdir', dest='subdir')
+parser.add_argument('--installdir', dest='install_dir')
+parser.add_argument('--sources', dest='sources')
+parser.add_argument('--media', dest='media', default='')
+parser.add_argument('--langs', dest='langs', default='')
+parser.add_argument('--symlinks', type=bool, dest='symlinks', default=False)
+
+def build_pot(srcdir, project_id, sources):
+    # Must be relative paths
+    sources = [os.path.join('C', source) for source in sources]
+    outfile = os.path.join(srcdir, project_id + '.pot')
+    subprocess.call(['itstool', '-o', outfile] + sources)
+
+def update_po(srcdir, project_id, langs):
+    potfile = os.path.join(srcdir, project_id + '.pot')
+    for lang in langs:
+        pofile = os.path.join(srcdir, lang, lang + '.po')
+        subprocess.call(['msgmerge', '-q', '-o', pofile, pofile, potfile])
+
+def build_translations(srcdir, blddir, langs):
+    for lang in langs:
+        outdir = os.path.join(blddir, lang)
+        os.makedirs(outdir, exist_ok=True)
+        subprocess.call([
+            'msgfmt', os.path.join(srcdir, lang, lang + '.po'),
+            '-o', os.path.join(outdir, lang + '.gmo')
+        ])
+
+def merge_translations(blddir, sources, langs):
+    for lang in langs:
+        subprocess.call([
+            'itstool', '-m', os.path.join(blddir, lang, lang + '.gmo'),
+            '-o', os.path.join(blddir, lang)
+        ] + sources)
+
+def install_help(srcdir, blddir, sources, media, langs, install_dir, destdir, project_id, symlinks):
+    c_install_dir = os.path.join(install_dir, 'C', project_id)
+    for lang in langs + ['C']:
+        indir = destdir_join(destdir, os.path.join(install_dir, lang, project_id))
+        os.makedirs(indir, exist_ok=True)
+        for source in sources:
+            infile = os.path.join(srcdir if lang == 'C' else blddir, lang, source)
+            outfile = os.path.join(indir, source)
+            mlog.log('Installing %s to %s' % (infile, outfile))
+            shutil.copyfile(infile, outfile)
+            shutil.copystat(infile, outfile)
+        for m in media:
+            infile = os.path.join(srcdir, lang, m)
+            outfile = os.path.join(indir, m)
+            c_infile = os.path.join(srcdir, 'C', m)
+            if not os.path.exists(infile):
+                if not os.path.exists(c_infile):
+                    mlog.warning('Media file "%s" did not exist in C directory' % m)
+                    continue
+                elif symlinks:
+                    srcfile = os.path.join(c_install_dir, m)
+                    mlog.log('Symlinking %s to %s.' % (outfile, srcfile))
+                    if has_path_sep(m):
+                        os.makedirs(os.path.dirname(outfile), exist_ok=True)
+                    try:
+                        try:
+                            os.symlink(srcfile, outfile)
+                        except FileExistsError:
+                            os.remove(outfile)
+                            os.symlink(srcfile, outfile)
+                        continue
+                    except (NotImplementedError, OSError):
+                        mlog.warning('Symlinking not supported, falling back to copying')
+                        infile = c_infile
+                else:
+                    # Lang doesn't have media file so copy it over 'C' one
+                    infile = c_infile
+            mlog.log('Installing %s to %s' % (infile, outfile))
+            if has_path_sep(m):
+                os.makedirs(os.path.dirname(outfile), exist_ok=True)
+            shutil.copyfile(infile, outfile)
+            shutil.copystat(infile, outfile)
+
+def run(args):
+    options = parser.parse_args(args)
+    langs = options.langs.split('@@') if options.langs else []
+    media = options.media.split('@@') if options.media else []
+    sources = options.sources.split('@@')
+    destdir = os.environ.get('DESTDIR', '')
+    src_subdir = os.path.join(os.environ['MESON_SOURCE_ROOT'], options.subdir)
+    build_subdir = os.path.join(os.environ['MESON_BUILD_ROOT'], options.subdir)
+    abs_sources = [os.path.join(src_subdir, 'C', source) for source in sources]
+
+    if not langs:
+        langs = read_linguas(src_subdir)
+
+    if options.command == 'pot':
+        build_pot(src_subdir, options.project_id, sources)
+    elif options.command == 'update-po':
+        build_pot(src_subdir, options.project_id, sources)
+        update_po(src_subdir, options.project_id, langs)
+    elif options.command == 'build':
+        if langs:
+            build_translations(src_subdir, build_subdir, langs)
+    elif options.command == 'install':
+        install_dir = os.path.join(os.environ['MESON_INSTALL_PREFIX'], options.install_dir)
+        if langs:
+            build_translations(src_subdir, build_subdir, langs)
+            merge_translations(build_subdir, abs_sources, langs)
+        install_help(src_subdir, build_subdir, sources, media, langs, install_dir,
+                     destdir, options.project_id, options.symlinks)
diff --git a/meson/mesonbuild/templates/__init__.py b/meson/mesonbuild/templates/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/meson/mesonbuild/templates/cpptemplates.py b/meson/mesonbuild/templates/cpptemplates.py
new file mode 100644
index 000000000..834d95520
--- /dev/null
+++ b/meson/mesonbuild/templates/cpptemplates.py
@@ -0,0 +1,184 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+hello_cpp_template = '''#include 
+
+#define PROJECT_NAME "{project_name}"
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] <<  "takes no arguments.\\n";
+        return 1;
+    }}
+    std::cout << "This is project " << PROJECT_NAME << ".\\n";
+    return 0;
+}}
+'''
+
+hello_cpp_meson_template = '''project('{project_name}', 'cpp',
+  version : '{version}',
+  default_options : ['warning_level=3',
+                     'cpp_std=c++14'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+lib_hpp_template = '''#pragma once
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_{utoken}
+    #define {utoken}_PUBLIC __declspec(dllexport)
+  #else
+    #define {utoken}_PUBLIC __declspec(dllimport)
+  #endif
+#else
+  #ifdef BUILDING_{utoken}
+      #define {utoken}_PUBLIC __attribute__ ((visibility ("default")))
+  #else
+      #define {utoken}_PUBLIC
+  #endif
+#endif
+
+namespace {namespace} {{
+
+class {utoken}_PUBLIC {class_name} {{
+
+public:
+  {class_name}();
+  int get_number() const;
+
+private:
+
+  int number;
+
+}};
+
+}}
+
+'''
+
+lib_cpp_template = '''#include <{header_file}>
+
+namespace {namespace} {{
+
+{class_name}::{class_name}() {{
+    number = 6;
+}}
+
+int {class_name}::get_number() const {{
+  return number;
+}}
+
+}}
+'''
+
+lib_cpp_test_template = '''#include <{header_file}>
+#include 
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] << " takes no arguments.\\n";
+        return 1;
+    }}
+    {namespace}::{class_name} c;
+    return c.get_number() != 6;
+}}
+'''
+
+lib_cpp_meson_template = '''project('{project_name}', 'cpp',
+  version : '{version}',
+  default_options : ['warning_level=3', 'cpp_std=c++14'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  cpp_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+# Make this library usable from the system's
+# package manager.
+install_headers('{header_file}', subdir : '{header_dir}')
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+
+class CppProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.cpp'
+        open(source_name, 'w').write(hello_cpp_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_cpp_meson_template.format(project_name=self.name,
+                                                                       exe_name=lowercase_token,
+                                                                       source_name=source_name,
+                                                                       version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        test_exe_name = lowercase_token + '_test'
+        namespace = lowercase_token
+        lib_hpp_name = lowercase_token + '.hpp'
+        lib_cpp_name = lowercase_token + '.cpp'
+        test_cpp_name = lowercase_token + '_test.cpp'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'class_name': class_name,
+                  'namespace': namespace,
+                  'header_file': lib_hpp_name,
+                  'source_file': lib_cpp_name,
+                  'test_source_file': test_cpp_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_hpp_name, 'w').write(lib_hpp_template.format(**kwargs))
+        open(lib_cpp_name, 'w').write(lib_cpp_template.format(**kwargs))
+        open(test_cpp_name, 'w').write(lib_cpp_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_cpp_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/cstemplates.py b/meson/mesonbuild/templates/cstemplates.py
new file mode 100644
index 000000000..b9b376e27
--- /dev/null
+++ b/meson/mesonbuild/templates/cstemplates.py
@@ -0,0 +1,132 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+hello_cs_template = '''using System;
+
+public class {class_name} {{
+    const String PROJECT_NAME = "{project_name}";
+
+    static int Main(String[] args) {{
+      if (args.Length > 0) {{
+          System.Console.WriteLine(String.Format("{project_name} takes no arguments.."));
+          return 1;
+      }}
+      Console.WriteLine(String.Format("This is project {{0}}.", PROJECT_NAME));
+      return 0;
+    }}
+}}
+
+'''
+
+hello_cs_meson_template = '''project('{project_name}', 'cs',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+lib_cs_template = '''
+public class {class_name} {{
+    private const int number = 6;
+
+    public int get_number() {{
+      return number;
+    }}
+}}
+
+'''
+
+lib_cs_test_template = '''using System;
+
+public class {class_test} {{
+    static int Main(String[] args) {{
+      if (args.Length > 0) {{
+          System.Console.WriteLine("{project_name} takes no arguments..");
+          return 1;
+      }}
+      {class_name} c = new {class_name}();
+      Boolean result = true;
+      return result.CompareTo(c.get_number() != 6);
+    }}
+}}
+
+'''
+
+lib_cs_meson_template = '''project('{project_name}', 'cs',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+stlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : stlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : stlib)
+
+'''
+
+
+class CSharpProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        source_name = uppercase_token[0] + lowercase_token[1:] + '.cs'
+        open(source_name, 'w').write(hello_cs_template.format(project_name=self.name,
+                                                              class_name=class_name))
+        open('meson.build', 'w').write(hello_cs_meson_template.format(project_name=self.name,
+                                                                      exe_name=self.name,
+                                                                      source_name=source_name,
+                                                                      version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        class_test = uppercase_token[0] + lowercase_token[1:] + '_test'
+        project_test = lowercase_token + '_test'
+        lib_cs_name = uppercase_token[0] + lowercase_token[1:] + '.cs'
+        test_cs_name = uppercase_token[0] + lowercase_token[1:] + '_test.cs'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'class_test': class_test,
+                  'class_name': class_name,
+                  'source_file': lib_cs_name,
+                  'test_source_file': test_cs_name,
+                  'test_exe_name': project_test,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_cs_name, 'w').write(lib_cs_template.format(**kwargs))
+        open(test_cs_name, 'w').write(lib_cs_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_cs_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/ctemplates.py b/meson/mesonbuild/templates/ctemplates.py
new file mode 100644
index 000000000..30acef373
--- /dev/null
+++ b/meson/mesonbuild/templates/ctemplates.py
@@ -0,0 +1,165 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+lib_h_template = '''#pragma once
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_{utoken}
+    #define {utoken}_PUBLIC __declspec(dllexport)
+  #else
+    #define {utoken}_PUBLIC __declspec(dllimport)
+  #endif
+#else
+  #ifdef BUILDING_{utoken}
+      #define {utoken}_PUBLIC __attribute__ ((visibility ("default")))
+  #else
+      #define {utoken}_PUBLIC
+  #endif
+#endif
+
+int {utoken}_PUBLIC {function_name}();
+
+'''
+
+lib_c_template = '''#include <{header_file}>
+
+/* This function will not be exported and is not
+ * directly callable by users of this library.
+ */
+int internal_function() {{
+    return 0;
+}}
+
+int {function_name}() {{
+    return internal_function();
+}}
+'''
+
+lib_c_test_template = '''#include <{header_file}>
+#include 
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        printf("%s takes no arguments.\\n", argv[0]);
+        return 1;
+    }}
+    return {function_name}();
+}}
+'''
+
+lib_c_meson_template = '''project('{project_name}', 'c',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  c_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+# Make this library usable from the system's
+# package manager.
+install_headers('{header_file}', subdir : '{header_dir}')
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+hello_c_template = '''#include 
+
+#define PROJECT_NAME "{project_name}"
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        printf("%s takes no arguments.\\n", argv[0]);
+        return 1;
+    }}
+    printf("This is project %s.\\n", PROJECT_NAME);
+    return 0;
+}}
+'''
+
+hello_c_meson_template = '''project('{project_name}', 'c',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+
+class CProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.c'
+        open(source_name, 'w').write(hello_c_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_c_meson_template.format(project_name=self.name,
+                                                                     exe_name=lowercase_token,
+                                                                     source_name=source_name,
+                                                                     version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_h_name = lowercase_token + '.h'
+        lib_c_name = lowercase_token + '.c'
+        test_c_name = lowercase_token + '_test.c'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'header_file': lib_h_name,
+                  'source_file': lib_c_name,
+                  'test_source_file': test_c_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_h_name, 'w').write(lib_h_template.format(**kwargs))
+        open(lib_c_name, 'w').write(lib_c_template.format(**kwargs))
+        open(test_c_name, 'w').write(lib_c_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_c_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/cudatemplates.py b/meson/mesonbuild/templates/cudatemplates.py
new file mode 100644
index 000000000..2321011c8
--- /dev/null
+++ b/meson/mesonbuild/templates/cudatemplates.py
@@ -0,0 +1,184 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+hello_cuda_template = '''#include 
+
+#define PROJECT_NAME "{project_name}"
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] <<  "takes no arguments.\\n";
+        return 1;
+    }}
+    std::cout << "This is project " << PROJECT_NAME << ".\\n";
+    return 0;
+}}
+'''
+
+hello_cuda_meson_template = '''project('{project_name}', ['cuda', 'cpp'],
+  version : '{version}',
+  default_options : ['warning_level=3',
+                     'cpp_std=c++14'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+lib_h_template = '''#pragma once
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_{utoken}
+    #define {utoken}_PUBLIC __declspec(dllexport)
+  #else
+    #define {utoken}_PUBLIC __declspec(dllimport)
+  #endif
+#else
+  #ifdef BUILDING_{utoken}
+      #define {utoken}_PUBLIC __attribute__ ((visibility ("default")))
+  #else
+      #define {utoken}_PUBLIC
+  #endif
+#endif
+
+namespace {namespace} {{
+
+class {utoken}_PUBLIC {class_name} {{
+
+public:
+  {class_name}();
+  int get_number() const;
+
+private:
+
+  int number;
+
+}};
+
+}}
+
+'''
+
+lib_cuda_template = '''#include <{header_file}>
+
+namespace {namespace} {{
+
+{class_name}::{class_name}() {{
+    number = 6;
+}}
+
+int {class_name}::get_number() const {{
+  return number;
+}}
+
+}}
+'''
+
+lib_cuda_test_template = '''#include <{header_file}>
+#include 
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] << " takes no arguments.\\n";
+        return 1;
+    }}
+    {namespace}::{class_name} c;
+    return c.get_number() != 6;
+}}
+'''
+
+lib_cuda_meson_template = '''project('{project_name}', ['cuda', 'cpp'],
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  cpp_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+# Make this library usable from the system's
+# package manager.
+install_headers('{header_file}', subdir : '{header_dir}')
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+
+class CudaProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.cu'
+        open(source_name, 'w').write(hello_cuda_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_cuda_meson_template.format(project_name=self.name,
+                                                                        exe_name=lowercase_token,
+                                                                        source_name=source_name,
+                                                                        version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        test_exe_name = lowercase_token + '_test'
+        namespace = lowercase_token
+        lib_h_name = lowercase_token + '.h'
+        lib_cuda_name = lowercase_token + '.cu'
+        test_cuda_name = lowercase_token + '_test.cu'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'class_name': class_name,
+                  'namespace': namespace,
+                  'header_file': lib_h_name,
+                  'source_file': lib_cuda_name,
+                  'test_source_file': test_cuda_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_h_name, 'w').write(lib_h_template.format(**kwargs))
+        open(lib_cuda_name, 'w').write(lib_cuda_template.format(**kwargs))
+        open(test_cuda_name, 'w').write(lib_cuda_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_cuda_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/dlangtemplates.py b/meson/mesonbuild/templates/dlangtemplates.py
new file mode 100644
index 000000000..84e6ab0c2
--- /dev/null
+++ b/meson/mesonbuild/templates/dlangtemplates.py
@@ -0,0 +1,142 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+hello_d_template = '''module main;
+import std.stdio;
+
+enum PROJECT_NAME = "{project_name}";
+
+int main(string[] args) {{
+    if (args.length != 1){{
+        writefln("%s takes no arguments.\\n", args[0]);
+        return 1;
+    }}
+    writefln("This is project %s.\\n", PROJECT_NAME);
+    return 0;
+}}
+'''
+
+hello_d_meson_template = '''project('{project_name}', 'd',
+    version : '{version}',
+    default_options: ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+lib_d_template = '''module {module_file};
+
+/* This function will not be exported and is not
+ * directly callable by users of this library.
+ */
+int internal_function() {{
+    return 0;
+}}
+
+int {function_name}() {{
+    return internal_function();
+}}
+'''
+
+lib_d_test_template = '''module {module_file}_test;
+import std.stdio;
+import {module_file};
+
+
+int main(string[] args) {{
+    if (args.length != 1){{
+        writefln("%s takes no arguments.\\n", args[0]);
+        return 1;
+    }}
+    return {function_name}();
+}}
+'''
+
+lib_d_meson_template = '''project('{project_name}', 'd',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+stlib = static_library('{lib_name}', '{source_file}',
+  install : true,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : stlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : stlib)
+
+# Make this library usable from the Dlang
+# build system.
+dlang_mod = import('dlang')
+if find_program('dub', required: false).found()
+  dlang_mod.generate_dub_file(meson.project_name().to_lower(), meson.source_root(),
+    name : meson.project_name(),
+    license: meson.project_license(),
+    sourceFiles : '{source_file}',
+    description : 'Meson sample project.',
+    version : '{version}',
+  )
+endif
+'''
+
+
+class DlangProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.d'
+        open(source_name, 'w').write(hello_d_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_d_meson_template.format(project_name=self.name,
+                                                                     exe_name=lowercase_token,
+                                                                     source_name=source_name,
+                                                                     version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_m_name = lowercase_token
+        lib_d_name = lowercase_token + '.d'
+        test_d_name = lowercase_token + '_test.d'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'module_file': lib_m_name,
+                  'source_file': lib_d_name,
+                  'test_source_file': test_d_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_d_name, 'w').write(lib_d_template.format(**kwargs))
+        open(test_d_name, 'w').write(lib_d_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_d_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/fortrantemplates.py b/meson/mesonbuild/templates/fortrantemplates.py
new file mode 100644
index 000000000..a4f307971
--- /dev/null
+++ b/meson/mesonbuild/templates/fortrantemplates.py
@@ -0,0 +1,139 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+lib_fortran_template = '''
+! This procedure will not be exported and is not
+! directly callable by users of this library.
+
+module modfoo
+
+implicit none
+private
+public :: {function_name}
+
+contains
+
+integer function internal_function()
+    internal_function = 0
+end function internal_function
+
+integer function {function_name}()
+    {function_name} = internal_function()
+end function {function_name}
+
+end module modfoo
+'''
+
+lib_fortran_test_template = '''
+use modfoo
+
+print *,{function_name}()
+
+end program
+'''
+
+lib_fortran_meson_template = '''project('{project_name}', 'fortran',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  fortran_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+hello_fortran_template = '''
+implicit none
+
+character(len=*), parameter :: PROJECT_NAME = "{project_name}"
+
+print *,"This is project ", PROJECT_NAME
+
+end program
+'''
+
+hello_fortran_meson_template = '''project('{project_name}', 'fortran',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+
+class FortranProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.f90'
+        open(source_name, 'w').write(hello_fortran_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_fortran_meson_template.format(project_name=self.name,
+                                                                           exe_name=lowercase_token,
+                                                                           source_name=source_name,
+                                                                           version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_fortran_name = lowercase_token + '.f90'
+        test_fortran_name = lowercase_token + '_test.f90'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'source_file': lib_fortran_name,
+                  'test_source_file': test_fortran_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_fortran_name, 'w').write(lib_fortran_template.format(**kwargs))
+        open(test_fortran_name, 'w').write(lib_fortran_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_fortran_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/javatemplates.py b/meson/mesonbuild/templates/javatemplates.py
new file mode 100644
index 000000000..211faede4
--- /dev/null
+++ b/meson/mesonbuild/templates/javatemplates.py
@@ -0,0 +1,134 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+hello_java_template = '''
+
+public class {class_name} {{
+    final static String PROJECT_NAME = "{project_name}";
+
+    public static void main (String args[]) {{
+        if(args.length != 0) {{
+            System.out.println(args + " takes no arguments.");
+            System.exit(0);
+        }}
+        System.out.println("This is project " + PROJECT_NAME + ".");
+        System.exit(0);
+    }}
+}}
+
+'''
+
+hello_java_meson_template = '''project('{project_name}', 'java',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = jar('{exe_name}', '{source_name}',
+  main_class : '{exe_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+lib_java_template = '''
+
+public class {class_name} {{
+    final static int number = 6;
+
+    public final int get_number() {{
+      return number;
+    }}
+}}
+
+'''
+
+lib_java_test_template = '''
+
+public class {class_test} {{
+    public static void main (String args[]) {{
+        if(args.length != 0) {{
+            System.out.println(args + " takes no arguments.");
+            System.exit(1);
+        }}
+
+        {class_name} c = new {class_name}();
+        Boolean result = true;
+        System.exit(result.compareTo(c.get_number() != 6));
+    }}
+}}
+
+'''
+
+lib_java_meson_template = '''project('{project_name}', 'java',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+jarlib = jar('{class_name}', '{source_file}',
+  main_class : '{class_name}',
+  install : true,
+)
+
+test_jar = jar('{class_test}', '{test_source_file}',
+  main_class : '{class_test}',
+  link_with : jarlib)
+test('{test_name}', test_jar)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : jarlib)
+'''
+
+
+class JavaProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        source_name = uppercase_token[0] + lowercase_token[1:] + '.java'
+        open(source_name, 'w').write(hello_java_template.format(project_name=self.name,
+                                                                class_name=class_name))
+        open('meson.build', 'w').write(hello_java_meson_template.format(project_name=self.name,
+                                                                        exe_name=class_name,
+                                                                        source_name=source_name,
+                                                                        version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        class_name = uppercase_token[0] + lowercase_token[1:]
+        class_test = uppercase_token[0] + lowercase_token[1:] + '_test'
+        lib_java_name = uppercase_token[0] + lowercase_token[1:] + '.java'
+        test_java_name = uppercase_token[0] + lowercase_token[1:] + '_test.java'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'class_test': class_test,
+                  'class_name': class_name,
+                  'source_file': lib_java_name,
+                  'test_source_file': test_java_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_java_name, 'w').write(lib_java_template.format(**kwargs))
+        open(test_java_name, 'w').write(lib_java_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_java_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/mesontemplates.py b/meson/mesonbuild/templates/mesontemplates.py
new file mode 100644
index 000000000..6b341a219
--- /dev/null
+++ b/meson/mesonbuild/templates/mesontemplates.py
@@ -0,0 +1,73 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+meson_executable_template = '''project('{project_name}', '{language}',
+  version : '{version}',
+  default_options : [{default_options}])
+
+executable('{executable}',
+           {sourcespec},{depspec}
+           install : true)
+'''
+
+
+meson_jar_template = '''project('{project_name}', '{language}',
+  version : '{version}',
+  default_options : [{default_options}])
+
+jar('{executable}',
+    {sourcespec},{depspec}
+    main_class: '{main_class}',
+    install : true)
+'''
+
+
+def create_meson_build(options):
+    if options.type != 'executable':
+        raise SystemExit('\nGenerating a meson.build file from existing sources is\n'
+                         'supported only for project type "executable".\n'
+                         'Run meson init in an empty directory to create a sample project.')
+    default_options = ['warning_level=3']
+    if options.language == 'cpp':
+        # This shows how to set this very common option.
+        default_options += ['cpp_std=c++14']
+    # If we get a meson.build autoformatter one day, this code could
+    # be simplified quite a bit.
+    formatted_default_options = ', '.join("'{}'".format(x) for x in default_options)
+    sourcespec = ',\n           '.join("'{}'".format(x) for x in options.srcfiles)
+    depspec = ''
+    if options.deps:
+        depspec = '\n           dependencies : [\n              '
+        depspec += ',\n              '.join("dependency('{}')".format(x)
+                                            for x in options.deps.split(','))
+        depspec += '],'
+    if options.language != 'java':
+        content = meson_executable_template.format(project_name=options.name,
+                                                   language=options.language,
+                                                   version=options.version,
+                                                   executable=options.executable,
+                                                   sourcespec=sourcespec,
+                                                   depspec=depspec,
+                                                   default_options=formatted_default_options)
+    else:
+        content = meson_jar_template.format(project_name=options.name,
+                                            language=options.language,
+                                            version=options.version,
+                                            executable=options.executable,
+                                            main_class=options.name,
+                                            sourcespec=sourcespec,
+                                            depspec=depspec,
+                                            default_options=formatted_default_options)
+    open('meson.build', 'w').write(content)
+    print('Generated meson.build file:\n\n' + content)
diff --git a/meson/mesonbuild/templates/objcpptemplates.py b/meson/mesonbuild/templates/objcpptemplates.py
new file mode 100644
index 000000000..84299a869
--- /dev/null
+++ b/meson/mesonbuild/templates/objcpptemplates.py
@@ -0,0 +1,166 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+lib_h_template = '''#pragma once
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_{utoken}
+    #define {utoken}_PUBLIC __declspec(dllexport)
+  #else
+    #define {utoken}_PUBLIC __declspec(dllimport)
+  #endif
+#else
+  #ifdef BUILDING_{utoken}
+      #define {utoken}_PUBLIC __attribute__ ((visibility ("default")))
+  #else
+      #define {utoken}_PUBLIC
+  #endif
+#endif
+
+int {utoken}_PUBLIC {function_name}();
+
+'''
+
+lib_objcpp_template = '''#import <{header_file}>
+
+/* This function will not be exported and is not
+ * directly callable by users of this library.
+ */
+int internal_function() {{
+    return 0;
+}}
+
+int {function_name}() {{
+    return internal_function();
+}}
+'''
+
+lib_objcpp_test_template = '''#import <{header_file}>
+#import 
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] << " takes no arguments." << std::endl;
+        return 1;
+    }}
+    return {function_name}();
+}}
+'''
+
+lib_objcpp_meson_template = '''project('{project_name}', 'objcpp',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  objcpp_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+# Make this library usable from the system's
+# package manager.
+install_headers('{header_file}', subdir : '{header_dir}')
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+hello_objcpp_template = '''#import 
+
+#define PROJECT_NAME "{project_name}"
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        std::cout << argv[0] << " takes no arguments." << std::endl;
+        return 1;
+    }}
+    std::cout << "This is project " << PROJECT_NAME << "." << std::endl;
+    return 0;
+}}
+'''
+
+hello_objcpp_meson_template = '''project('{project_name}', 'objcpp',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+
+class ObjCppProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.mm'
+        open(source_name, 'w').write(hello_objcpp_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_objcpp_meson_template.format(project_name=self.name,
+                                                                          exe_name=lowercase_token,
+                                                                          source_name=source_name,
+                                                                          version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_h_name = lowercase_token + '.h'
+        lib_objcpp_name = lowercase_token + '.mm'
+        test_objcpp_name = lowercase_token + '_test.mm'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'header_file': lib_h_name,
+                  'source_file': lib_objcpp_name,
+                  'test_source_file': test_objcpp_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_h_name, 'w').write(lib_h_template.format(**kwargs))
+        open(lib_objcpp_name, 'w').write(lib_objcpp_template.format(**kwargs))
+        open(test_objcpp_name, 'w').write(lib_objcpp_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_objcpp_meson_template.format(**kwargs))
+
diff --git a/meson/mesonbuild/templates/objctemplates.py b/meson/mesonbuild/templates/objctemplates.py
new file mode 100644
index 000000000..9ec4a5e11
--- /dev/null
+++ b/meson/mesonbuild/templates/objctemplates.py
@@ -0,0 +1,165 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+lib_h_template = '''#pragma once
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_{utoken}
+    #define {utoken}_PUBLIC __declspec(dllexport)
+  #else
+    #define {utoken}_PUBLIC __declspec(dllimport)
+  #endif
+#else
+  #ifdef BUILDING_{utoken}
+      #define {utoken}_PUBLIC __attribute__ ((visibility ("default")))
+  #else
+      #define {utoken}_PUBLIC
+  #endif
+#endif
+
+int {utoken}_PUBLIC {function_name}();
+
+'''
+
+lib_objc_template = '''#import <{header_file}>
+
+/* This function will not be exported and is not
+ * directly callable by users of this library.
+ */
+int internal_function() {{
+    return 0;
+}}
+
+int {function_name}() {{
+    return internal_function();
+}}
+'''
+
+lib_objc_test_template = '''#import <{header_file}>
+#import 
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        printf("%s takes no arguments.\\n", argv[0]);
+        return 1;
+    }}
+    return {function_name}();
+}}
+'''
+
+lib_objc_meson_template = '''project('{project_name}', 'objc',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+# These arguments are only used to build the shared library
+# not the executables that use the library.
+lib_args = ['-DBUILDING_{utoken}']
+
+shlib = shared_library('{lib_name}', '{source_file}',
+  install : true,
+  objc_args : lib_args,
+  gnu_symbol_visibility : 'hidden',
+)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+
+# Make this library usable from the system's
+# package manager.
+install_headers('{header_file}', subdir : '{header_dir}')
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(
+  name : '{project_name}',
+  filebase : '{ltoken}',
+  description : 'Meson sample project.',
+  subdirs : '{header_dir}',
+  libraries : shlib,
+  version : '{version}',
+)
+'''
+
+hello_objc_template = '''#import 
+
+#define PROJECT_NAME "{project_name}"
+
+int main(int argc, char **argv) {{
+    if(argc != 1) {{
+        printf("%s takes no arguments.\\n", argv[0]);
+        return 1;
+    }}
+    printf("This is project %s.\\n", PROJECT_NAME);
+    return 0;
+}}
+'''
+
+hello_objc_meson_template = '''project('{project_name}', 'objc',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+
+class ObjCProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.m'
+        open(source_name, 'w').write(hello_objc_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_objc_meson_template.format(project_name=self.name,
+                                                                        exe_name=lowercase_token,
+                                                                        source_name=source_name,
+                                                                        version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_h_name = lowercase_token + '.h'
+        lib_objc_name = lowercase_token + '.m'
+        test_objc_name = lowercase_token + '_test.m'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'header_file': lib_h_name,
+                  'source_file': lib_objc_name,
+                  'test_source_file': test_objc_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_h_name, 'w').write(lib_h_template.format(**kwargs))
+        open(lib_objc_name, 'w').write(lib_objc_template.format(**kwargs))
+        open(test_objc_name, 'w').write(lib_objc_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_objc_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/rusttemplates.py b/meson/mesonbuild/templates/rusttemplates.py
new file mode 100644
index 000000000..c4ff53dec
--- /dev/null
+++ b/meson/mesonbuild/templates/rusttemplates.py
@@ -0,0 +1,112 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.sampleimpl import SampleImpl
+import re
+
+
+lib_rust_template = '''#![crate_name = "{crate_file}"]
+
+/* This function will not be exported and is not
+ * directly callable by users of this library.
+ */
+fn internal_function() -> i32 {{
+    return 0;
+}}
+
+pub fn {function_name}() -> i32 {{
+    return internal_function();
+}}
+'''
+
+lib_rust_test_template = '''extern crate {crate_file};
+
+fn main() {{
+    println!("printing: {{}}", {crate_file}::{function_name}());
+}}
+'''
+
+
+lib_rust_meson_template = '''project('{project_name}', 'rust',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+shlib = static_library('{lib_name}', '{source_file}', install : true)
+
+test_exe = executable('{test_exe_name}', '{test_source_file}',
+  link_with : shlib)
+test('{test_name}', test_exe)
+
+# Make this library usable as a Meson subproject.
+{ltoken}_dep = declare_dependency(
+  include_directories: include_directories('.'),
+  link_with : shlib)
+'''
+
+hello_rust_template = '''
+fn main() {{
+    let project_name = "{project_name}";
+    println!("This is project {{}}.\\n", project_name);
+}}
+'''
+
+hello_rust_meson_template = '''project('{project_name}', 'rust',
+  version : '{version}',
+  default_options : ['warning_level=3'])
+
+exe = executable('{exe_name}', '{source_name}',
+  install : true)
+
+test('basic', exe)
+'''
+
+
+class RustProject(SampleImpl):
+    def __init__(self, options):
+        super().__init__()
+        self.name = options.name
+        self.version = options.version
+
+    def create_executable(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        source_name = lowercase_token + '.rs'
+        open(source_name, 'w').write(hello_rust_template.format(project_name=self.name))
+        open('meson.build', 'w').write(hello_rust_meson_template.format(project_name=self.name,
+                                                                        exe_name=lowercase_token,
+                                                                        source_name=source_name,
+                                                                        version=self.version))
+
+    def create_library(self):
+        lowercase_token = re.sub(r'[^a-z0-9]', '_', self.name.lower())
+        uppercase_token = lowercase_token.upper()
+        function_name = lowercase_token[0:3] + '_func'
+        test_exe_name = lowercase_token + '_test'
+        lib_crate_name = lowercase_token
+        lib_rs_name = lowercase_token + '.rs'
+        test_rs_name = lowercase_token + '_test.rs'
+        kwargs = {'utoken': uppercase_token,
+                  'ltoken': lowercase_token,
+                  'header_dir': lowercase_token,
+                  'function_name': function_name,
+                  'crate_file': lib_crate_name,
+                  'source_file': lib_rs_name,
+                  'test_source_file': test_rs_name,
+                  'test_exe_name': test_exe_name,
+                  'project_name': self.name,
+                  'lib_name': lowercase_token,
+                  'test_name': lowercase_token,
+                  'version': self.version,
+                  }
+        open(lib_rs_name, 'w').write(lib_rust_template.format(**kwargs))
+        open(test_rs_name, 'w').write(lib_rust_test_template.format(**kwargs))
+        open('meson.build', 'w').write(lib_rust_meson_template.format(**kwargs))
diff --git a/meson/mesonbuild/templates/samplefactory.py b/meson/mesonbuild/templates/samplefactory.py
new file mode 100644
index 000000000..1da2bc12d
--- /dev/null
+++ b/meson/mesonbuild/templates/samplefactory.py
@@ -0,0 +1,38 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mesonbuild.templates.fortrantemplates import FortranProject
+from mesonbuild.templates.objcpptemplates import ObjCppProject
+from mesonbuild.templates.dlangtemplates import DlangProject
+from mesonbuild.templates.rusttemplates import RustProject
+from mesonbuild.templates.javatemplates import JavaProject
+from mesonbuild.templates.cudatemplates import CudaProject
+from mesonbuild.templates.objctemplates import ObjCProject
+from mesonbuild.templates.cpptemplates import CppProject
+from mesonbuild.templates.cstemplates import CSharpProject
+from mesonbuild.templates.ctemplates import CProject
+
+
+def sameple_generator(options):
+    return {
+        'c': CProject,
+        'cpp': CppProject,
+        'cs': CSharpProject,
+        'cuda': CudaProject,
+        'objc': ObjCProject,
+        'objcpp': ObjCppProject,
+        'java': JavaProject,
+        'd': DlangProject,
+        'rust': RustProject,
+        'fortran': FortranProject
+    }[options.language](options)
diff --git a/meson/mesonbuild/templates/sampleimpl.py b/meson/mesonbuild/templates/sampleimpl.py
new file mode 100644
index 000000000..20ed8ab30
--- /dev/null
+++ b/meson/mesonbuild/templates/sampleimpl.py
@@ -0,0 +1,21 @@
+# Copyright 2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class SampleImpl:
+    def create_executable(self):
+        raise NotImplementedError('Sample implementation for "executable" not implemented!')
+
+    def create_library(self):
+        raise NotImplementedError('Sample implementation for "library" not implemented!')
diff --git a/meson/mesonbuild/wrap/__init__.py b/meson/mesonbuild/wrap/__init__.py
new file mode 100644
index 000000000..182a94de5
--- /dev/null
+++ b/meson/mesonbuild/wrap/__init__.py
@@ -0,0 +1,57 @@
+from enum import Enum
+
+# Used for the --wrap-mode command-line argument
+#
+# Special wrap modes:
+#   nofallback: Don't download wraps for dependency() fallbacks
+#   nodownload: Don't download wraps for all subproject() calls
+#
+# subprojects are used for two purposes:
+# 1. To download and build dependencies by using .wrap
+#    files if they are not provided by the system. This is
+#    usually expressed via dependency(..., fallback: ...).
+# 2. To download and build 'copylibs' which are meant to be
+#    used by copying into your project. This is always done
+#    with an explicit subproject() call.
+#
+# --wrap-mode=nofallback will never do (1)
+# --wrap-mode=nodownload will do neither (1) nor (2)
+#
+# If you are building from a release tarball, you should be
+# able to safely use 'nodownload' since upstream is
+# expected to ship all required sources with the tarball.
+#
+# If you are building from a git repository, you will want
+# to use 'nofallback' so that any 'copylib' wraps will be
+# download as subprojects.
+#
+# --wrap-mode=forcefallback will ignore external dependencies,
+# even if they match the version requirements, and automatically
+# use the fallback if one was provided. This is useful for example
+# to make sure a project builds when using the fallbacks.
+#
+# Note that these options do not affect subprojects that
+# are git submodules since those are only usable in git
+# repositories, and you almost always want to download them.
+
+# This did _not_ work when inside the WrapMode class.
+# I don't know why. If you can fix this, patches welcome.
+string_to_value = {'default': 1,
+                   'nofallback': 2,
+                   'nodownload': 3,
+                   'forcefallback': 4,
+                   }
+
+class WrapMode(Enum):
+    default = 1
+    nofallback = 2
+    nodownload = 3
+    forcefallback = 4
+
+    def __str__(self) -> str:
+        return self.name
+
+    @staticmethod
+    def from_string(mode_name: str):
+        g = string_to_value[mode_name]
+        return WrapMode(g)
diff --git a/meson/mesonbuild/wrap/wrap.py b/meson/mesonbuild/wrap/wrap.py
new file mode 100644
index 000000000..aba220e1b
--- /dev/null
+++ b/meson/mesonbuild/wrap/wrap.py
@@ -0,0 +1,549 @@
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import mlog
+import contextlib
+import urllib.request
+import urllib.error
+import urllib.parse
+import os
+import hashlib
+import shutil
+import tempfile
+import stat
+import subprocess
+import sys
+import configparser
+import typing as T
+
+from pathlib import Path
+from . import WrapMode
+from .. import coredata
+from ..mesonlib import git, GIT, ProgressBar, MesonException
+
+if T.TYPE_CHECKING:
+    import http.client
+
+try:
+    # Importing is just done to check if SSL exists, so all warnings
+    # regarding 'imported but unused' can be safely ignored
+    import ssl  # noqa
+    has_ssl = True
+    API_ROOT = 'https://wrapdb.mesonbuild.com/v1/'
+except ImportError:
+    has_ssl = False
+    API_ROOT = 'http://wrapdb.mesonbuild.com/v1/'
+
+REQ_TIMEOUT = 600.0
+SSL_WARNING_PRINTED = False
+WHITELIST_SUBDOMAIN = 'wrapdb.mesonbuild.com'
+
+def quiet_git(cmd: T.List[str], workingdir: str) -> T.Tuple[bool, str]:
+    if not GIT:
+        return False, 'Git program not found.'
+    pc = git(cmd, workingdir, universal_newlines=True,
+             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if pc.returncode != 0:
+        return False, pc.stderr
+    return True, pc.stdout
+
+def verbose_git(cmd: T.List[str], workingdir: str, check: bool = False) -> bool:
+    if not GIT:
+        return False
+    try:
+        return git(cmd, workingdir, check=check).returncode == 0
+    except subprocess.CalledProcessError:
+        raise WrapException('Git command failed')
+
+def whitelist_wrapdb(urlstr: str) -> urllib.parse.ParseResult:
+    """ raises WrapException if not whitelisted subdomain """
+    url = urllib.parse.urlparse(urlstr)
+    if not url.hostname:
+        raise WrapException('{} is not a valid URL'.format(urlstr))
+    if not url.hostname.endswith(WHITELIST_SUBDOMAIN):
+        raise WrapException('{} is not a whitelisted WrapDB URL'.format(urlstr))
+    if has_ssl and not url.scheme == 'https':
+        raise WrapException('WrapDB did not have expected SSL https url, instead got {}'.format(urlstr))
+    return url
+
+def open_wrapdburl(urlstring: str) -> 'http.client.HTTPResponse':
+    global SSL_WARNING_PRINTED
+
+    url = whitelist_wrapdb(urlstring)
+    if has_ssl:
+        try:
+            return urllib.request.urlopen(urllib.parse.urlunparse(url), timeout=REQ_TIMEOUT)
+        except urllib.error.URLError as excp:
+            raise WrapException('WrapDB connection failed to {} with error {}'.format(urlstring, excp))
+
+    # following code is only for those without Python SSL
+    nossl_url = url._replace(scheme='http')
+    if not SSL_WARNING_PRINTED:
+        mlog.warning('SSL module not available in {}: WrapDB traffic not authenticated.'.format(sys.executable))
+        SSL_WARNING_PRINTED = True
+    try:
+        return urllib.request.urlopen(urllib.parse.urlunparse(nossl_url), timeout=REQ_TIMEOUT)
+    except urllib.error.URLError as excp:
+        raise WrapException('WrapDB connection failed to {} with error {}'.format(urlstring, excp))
+
+
+class WrapException(MesonException):
+    pass
+
+class WrapNotFoundException(WrapException):
+    pass
+
+class PackageDefinition:
+    def __init__(self, fname: str):
+        self.filename = fname
+        self.type = None
+        self.values = {} # type: T.Dict[str, str]
+        self.provided_deps = {} # type: T.Dict[str, T.Optional[str]]
+        self.provided_programs = [] # type: T.List[str]
+        self.basename = os.path.basename(fname)
+        self.name = self.basename
+        if self.name.endswith('.wrap'):
+            self.name = self.name[:-5]
+        self.provided_deps[self.name] = None
+        if fname.endswith('.wrap'):
+            self.parse_wrap(fname)
+        self.directory = self.values.get('directory', self.name)
+        if os.path.dirname(self.directory):
+            raise WrapException('Directory key must be a name and not a path')
+
+    def parse_wrap(self, fname: str):
+        try:
+            self.config = configparser.ConfigParser(interpolation=None)
+            self.config.read(fname)
+        except configparser.Error:
+            raise WrapException('Failed to parse {}'.format(self.basename))
+        self.parse_wrap_section()
+        self.parse_provide_section()
+
+    def parse_wrap_section(self):
+        if len(self.config.sections()) < 1:
+            raise WrapException('Missing sections in {}'.format(self.basename))
+        self.wrap_section = self.config.sections()[0]
+        if not self.wrap_section.startswith('wrap-'):
+            m = '{!r} is not a valid first section in {}'
+            raise WrapException(m.format(self.wrap_section, self.basename))
+        self.type = self.wrap_section[5:]
+        self.values = dict(self.config[self.wrap_section])
+
+    def parse_provide_section(self):
+        if self.config.has_section('provide'):
+            for k, v in self.config['provide'].items():
+                if k == 'dependency_names':
+                    # A comma separated list of dependency names that does not
+                    # need a variable name
+                    names = {n.strip(): None for n in v.split(',')}
+                    self.provided_deps.update(names)
+                    continue
+                if k == 'program_names':
+                    # A comma separated list of program names
+                    names = [n.strip() for n in v.split(',')]
+                    self.provided_programs += names
+                    continue
+                if not v:
+                    m = ('Empty dependency variable name for {!r} in {}. '
+                         'If the subproject uses meson.override_dependency() '
+                         'it can be added in the "dependency_names" special key.')
+                    raise WrapException(m.format(k, self.basename))
+                self.provided_deps[k] = v
+
+    def get(self, key: str) -> str:
+        try:
+            return self.values[key]
+        except KeyError:
+            m = 'Missing key {!r} in {}'
+            raise WrapException(m.format(key, self.basename))
+
+def get_directory(subdir_root: str, packagename: str) -> str:
+    fname = os.path.join(subdir_root, packagename + '.wrap')
+    if os.path.isfile(fname):
+        wrap = PackageDefinition(fname)
+        return wrap.directory
+    return packagename
+
+class Resolver:
+    def __init__(self, subdir_root: str, wrap_mode=WrapMode.default):
+        self.wrap_mode = wrap_mode
+        self.subdir_root = subdir_root
+        self.cachedir = os.path.join(self.subdir_root, 'packagecache')
+        self.filesdir = os.path.join(self.subdir_root, 'packagefiles')
+        self.wraps = {} # type: T.Dict[str, PackageDefinition]
+        self.provided_deps = {} # type: T.Dict[str, PackageDefinition]
+        self.provided_programs = {} # type: T.Dict[str, PackageDefinition]
+        self.load_wraps()
+
+    def load_wraps(self):
+        if not os.path.isdir(self.subdir_root):
+            return
+        root, dirs, files = next(os.walk(self.subdir_root))
+        for i in files:
+            if not i.endswith('.wrap'):
+                continue
+            fname = os.path.join(self.subdir_root, i)
+            wrap = PackageDefinition(fname)
+            self.wraps[wrap.name] = wrap
+            if wrap.directory in dirs:
+                dirs.remove(wrap.directory)
+        # Add dummy package definition for directories not associated with a wrap file.
+        for i in dirs:
+            if i in ['packagecache', 'packagefiles']:
+                continue
+            fname = os.path.join(self.subdir_root, i)
+            wrap = PackageDefinition(fname)
+            self.wraps[wrap.name] = wrap
+
+        for wrap in self.wraps.values():
+            for k in wrap.provided_deps.keys():
+                if k in self.provided_deps:
+                    prev_wrap = self.provided_deps[k]
+                    m = 'Multiple wrap files provide {!r} dependency: {} and {}'
+                    raise WrapException(m.format(k, wrap.basename, prev_wrap.basename))
+                self.provided_deps[k] = wrap
+            for k in wrap.provided_programs:
+                if k in self.provided_programs:
+                    prev_wrap = self.provided_programs[k]
+                    m = 'Multiple wrap files provide {!r} program: {} and {}'
+                    raise WrapException(m.format(k, wrap.basename, prev_wrap.basename))
+                self.provided_programs[k] = wrap
+
+    def find_dep_provider(self, packagename: str):
+        # Return value is in the same format as fallback kwarg:
+        # ['subproject_name', 'variable_name'], or 'subproject_name'.
+        wrap = self.provided_deps.get(packagename)
+        if wrap:
+            dep_var = wrap.provided_deps.get(packagename)
+            if dep_var:
+                return [wrap.name, dep_var]
+            return wrap.name
+        return None
+
+    def find_program_provider(self, names: T.List[str]):
+        for name in names:
+            wrap = self.provided_programs.get(name)
+            if wrap:
+                return wrap.name
+        return None
+
+    def resolve(self, packagename: str, method: str, current_subproject: str = '') -> str:
+        self.current_subproject = current_subproject
+        self.packagename = packagename
+        self.directory = packagename
+        self.wrap = self.wraps.get(packagename)
+        if not self.wrap:
+            m = 'Subproject directory not found and {}.wrap file not found'
+            raise WrapNotFoundException(m.format(self.packagename))
+        self.directory = self.wrap.directory
+        self.dirname = os.path.join(self.subdir_root, self.directory)
+
+        meson_file = os.path.join(self.dirname, 'meson.build')
+        cmake_file = os.path.join(self.dirname, 'CMakeLists.txt')
+
+        if method not in ['meson', 'cmake']:
+            raise WrapException('Only the methods "meson" and "cmake" are supported')
+
+        # The directory is there and has meson.build? Great, use it.
+        if method == 'meson' and os.path.exists(meson_file):
+            return self.directory
+        if method == 'cmake' and os.path.exists(cmake_file):
+            return self.directory
+
+        # Check if the subproject is a git submodule
+        self.resolve_git_submodule()
+
+        if os.path.exists(self.dirname):
+            if not os.path.isdir(self.dirname):
+                raise WrapException('Path already exists but is not a directory')
+        else:
+            if self.wrap.type == 'file':
+                self.get_file()
+            else:
+                self.check_can_download()
+                if self.wrap.type == 'git':
+                    self.get_git()
+                elif self.wrap.type == "hg":
+                    self.get_hg()
+                elif self.wrap.type == "svn":
+                    self.get_svn()
+                else:
+                    raise WrapException('Unknown wrap type {!r}'.format(self.wrap.type))
+            self.apply_patch()
+
+        # A meson.build or CMakeLists.txt file is required in the directory
+        if method == 'meson' and not os.path.exists(meson_file):
+            raise WrapException('Subproject exists but has no meson.build file')
+        if method == 'cmake' and not os.path.exists(cmake_file):
+            raise WrapException('Subproject exists but has no CMakeLists.txt file')
+
+        return self.directory
+
+    def check_can_download(self) -> None:
+        # Don't download subproject data based on wrap file if requested.
+        # Git submodules are ok (see above)!
+        if self.wrap_mode is WrapMode.nodownload:
+            m = 'Automatic wrap-based subproject downloading is disabled'
+            raise WrapException(m)
+
+    def resolve_git_submodule(self) -> bool:
+        if not GIT:
+            raise WrapException('Git program not found.')
+        # Are we in a git repository?
+        ret, out = quiet_git(['rev-parse'], self.subdir_root)
+        if not ret:
+            return False
+        # Is `dirname` a submodule?
+        ret, out = quiet_git(['submodule', 'status', self.dirname], self.subdir_root)
+        if not ret:
+            return False
+        # Submodule has not been added, add it
+        if out.startswith('+'):
+            mlog.warning('git submodule might be out of date')
+            return True
+        elif out.startswith('U'):
+            raise WrapException('git submodule has merge conflicts')
+        # Submodule exists, but is deinitialized or wasn't initialized
+        elif out.startswith('-'):
+            if verbose_git(['submodule', 'update', '--init', self.dirname], self.subdir_root):
+                return True
+            raise WrapException('git submodule failed to init')
+        # Submodule looks fine, but maybe it wasn't populated properly. Do a checkout.
+        elif out.startswith(' '):
+            verbose_git(['checkout', '.'], self.dirname)
+            # Even if checkout failed, try building it anyway and let the user
+            # handle any problems manually.
+            return True
+        elif out == '':
+            # It is not a submodule, just a folder that exists in the main repository.
+            return False
+        m = 'Unknown git submodule output: {!r}'
+        raise WrapException(m.format(out))
+
+    def get_file(self) -> None:
+        path = self.get_file_internal('source')
+        extract_dir = self.subdir_root
+        # Some upstreams ship packages that do not have a leading directory.
+        # Create one for them.
+        if 'lead_directory_missing' in self.wrap.values:
+            os.mkdir(self.dirname)
+            extract_dir = self.dirname
+        shutil.unpack_archive(path, extract_dir)
+
+    def get_git(self) -> None:
+        if not GIT:
+            raise WrapException('Git program not found.')
+        revno = self.wrap.get('revision')
+        is_shallow = False
+        depth_option = []    # type: T.List[str]
+        if self.wrap.values.get('depth', '') != '':
+            is_shallow = True
+            depth_option = ['--depth', self.wrap.values.get('depth')]
+        # for some reason git only allows commit ids to be shallowly fetched by fetch not with clone
+        if is_shallow and self.is_git_full_commit_id(revno):
+            # git doesn't support directly cloning shallowly for commits,
+            # so we follow https://stackoverflow.com/a/43136160
+            verbose_git(['init', self.directory], self.subdir_root, check=True)
+            verbose_git(['remote', 'add', 'origin', self.wrap.get('url')], self.dirname, check=True)
+            revno = self.wrap.get('revision')
+            verbose_git(['fetch', *depth_option, 'origin', revno], self.dirname, check=True)
+            verbose_git(['checkout', revno], self.dirname, check=True)
+            if self.wrap.values.get('clone-recursive', '').lower() == 'true':
+                verbose_git(['submodule', 'update', '--init', '--checkout',
+                             '--recursive', *depth_option], self.dirname, check=True)
+            push_url = self.wrap.values.get('push-url')
+            if push_url:
+                verbose_git(['remote', 'set-url', '--push', 'origin', push_url], self.dirname, check=True)
+        else:
+            if not is_shallow:
+                verbose_git(['clone', self.wrap.get('url'), self.directory], self.subdir_root, check=True)
+                if revno.lower() != 'head':
+                    if not verbose_git(['checkout', revno], self.dirname):
+                        verbose_git(['fetch', self.wrap.get('url'), revno], self.dirname, check=True)
+                        verbose_git(['checkout', revno], self.dirname, check=True)
+            else:
+                verbose_git(['clone', *depth_option, '--branch', revno, self.wrap.get('url'),
+                             self.directory], self.subdir_root, check=True)
+            if self.wrap.values.get('clone-recursive', '').lower() == 'true':
+                verbose_git(['submodule', 'update', '--init', '--checkout', '--recursive', *depth_option],
+                            self.dirname, check=True)
+            push_url = self.wrap.values.get('push-url')
+            if push_url:
+                verbose_git(['remote', 'set-url', '--push', 'origin', push_url], self.dirname, check=True)
+
+    def is_git_full_commit_id(self, revno: str) -> bool:
+        result = False
+        if len(revno) in (40, 64): # 40 for sha1, 64 for upcoming sha256
+            result = all((ch in '0123456789AaBbCcDdEeFf' for ch in revno))
+        return result
+
+    def get_hg(self) -> None:
+        revno = self.wrap.get('revision')
+        hg = shutil.which('hg')
+        if not hg:
+            raise WrapException('Mercurial program not found.')
+        subprocess.check_call([hg, 'clone', self.wrap.get('url'),
+                               self.directory], cwd=self.subdir_root)
+        if revno.lower() != 'tip':
+            subprocess.check_call([hg, 'checkout', revno],
+                                  cwd=self.dirname)
+
+    def get_svn(self) -> None:
+        revno = self.wrap.get('revision')
+        svn = shutil.which('svn')
+        if not svn:
+            raise WrapException('SVN program not found.')
+        subprocess.check_call([svn, 'checkout', '-r', revno, self.wrap.get('url'),
+                               self.directory], cwd=self.subdir_root)
+
+    def get_data(self, urlstring: str) -> T.Tuple[str, str]:
+        blocksize = 10 * 1024
+        h = hashlib.sha256()
+        tmpfile = tempfile.NamedTemporaryFile(mode='wb', dir=self.cachedir, delete=False)
+        url = urllib.parse.urlparse(urlstring)
+        if url.hostname and url.hostname.endswith(WHITELIST_SUBDOMAIN):
+            resp = open_wrapdburl(urlstring)
+        elif WHITELIST_SUBDOMAIN in urlstring:
+            raise WrapException('{} may be a WrapDB-impersonating URL'.format(urlstring))
+        else:
+            try:
+                req = urllib.request.Request(urlstring, headers={'User-Agent': 'mesonbuild/{}'.format(coredata.version)})
+                resp = urllib.request.urlopen(req, timeout=REQ_TIMEOUT)
+            except urllib.error.URLError as e:
+                mlog.log(str(e))
+                raise WrapException('could not get {} is the internet available?'.format(urlstring))
+        with contextlib.closing(resp) as resp:
+            try:
+                dlsize = int(resp.info()['Content-Length'])
+            except TypeError:
+                dlsize = None
+            if dlsize is None:
+                print('Downloading file of unknown size.')
+                while True:
+                    block = resp.read(blocksize)
+                    if block == b'':
+                        break
+                    h.update(block)
+                    tmpfile.write(block)
+                hashvalue = h.hexdigest()
+                return hashvalue, tmpfile.name
+            sys.stdout.flush()
+            progress_bar = ProgressBar(bar_type='download', total=dlsize,
+                                       desc='Downloading')
+            while True:
+                block = resp.read(blocksize)
+                if block == b'':
+                    break
+                h.update(block)
+                tmpfile.write(block)
+                progress_bar.update(len(block))
+            progress_bar.close()
+            hashvalue = h.hexdigest()
+        return hashvalue, tmpfile.name
+
+    def check_hash(self, what: str, path: str, hash_required: bool = True) -> None:
+        if what + '_hash' not in self.wrap.values and not hash_required:
+            return
+        expected = self.wrap.get(what + '_hash')
+        h = hashlib.sha256()
+        with open(path, 'rb') as f:
+            h.update(f.read())
+        dhash = h.hexdigest()
+        if dhash != expected:
+            raise WrapException('Incorrect hash for {}:\n {} expected\n {} actual.'.format(what, expected, dhash))
+
+    def download(self, what: str, ofname: str, fallback=False) -> None:
+        self.check_can_download()
+        srcurl = self.wrap.get(what + ('_fallback_url' if fallback else '_url'))
+        mlog.log('Downloading', mlog.bold(self.packagename), what, 'from', mlog.bold(srcurl))
+        try:
+            dhash, tmpfile = self.get_data(srcurl)
+            expected = self.wrap.get(what + '_hash')
+            if dhash != expected:
+                os.remove(tmpfile)
+                raise WrapException('Incorrect hash for {}:\n {} expected\n {} actual.'.format(what, expected, dhash))
+        except WrapException:
+            if not fallback:
+                if what + '_fallback_url' in self.wrap.values:
+                    return self.download(what, ofname, fallback=True)
+                mlog.log('A fallback URL could be specified using',
+                         mlog.bold(what + '_fallback_url'), 'key in the wrap file')
+            raise
+        os.rename(tmpfile, ofname)
+
+    def get_file_internal(self, what: str) -> str:
+        filename = self.wrap.get(what + '_filename')
+        if what + '_url' in self.wrap.values:
+            cache_path = os.path.join(self.cachedir, filename)
+
+            if os.path.exists(cache_path):
+                self.check_hash(what, cache_path)
+                mlog.log('Using', mlog.bold(self.packagename), what, 'from cache.')
+                return cache_path
+
+            if not os.path.isdir(self.cachedir):
+                os.mkdir(self.cachedir)
+            self.download(what, cache_path)
+            return cache_path
+        else:
+            from ..interpreterbase import FeatureNew
+            FeatureNew('Local wrap patch files without {}_url'.format(what), '0.55.0').use(self.current_subproject)
+            path = Path(self.filesdir) / filename
+
+            if not path.exists():
+                raise WrapException('File "{}" does not exist'.format(path))
+            self.check_hash(what, path.as_posix(), hash_required=False)
+
+            return path.as_posix()
+
+    def apply_patch(self) -> None:
+        if 'patch_filename' in self.wrap.values and 'patch_directory' in self.wrap.values:
+            m = 'Wrap file {!r} must not have both "patch_filename" and "patch_directory"'
+            raise WrapException(m.format(self.wrap.basename))
+        if 'patch_filename' in self.wrap.values:
+            path = self.get_file_internal('patch')
+            try:
+                shutil.unpack_archive(path, self.subdir_root)
+            except Exception:
+                with tempfile.TemporaryDirectory() as workdir:
+                    shutil.unpack_archive(path, workdir)
+                    self.copy_tree(workdir, self.subdir_root)
+        elif 'patch_directory' in self.wrap.values:
+            from ..interpreterbase import FeatureNew
+            FeatureNew('patch_directory', '0.55.0').use(self.current_subproject)
+            patch_dir = self.wrap.values['patch_directory']
+            src_dir = os.path.join(self.filesdir, patch_dir)
+            if not os.path.isdir(src_dir):
+                raise WrapException('patch directory does not exists: {}'.format(patch_dir))
+            self.copy_tree(src_dir, self.dirname)
+
+    def copy_tree(self, root_src_dir: str, root_dst_dir: str) -> None:
+        """
+        Copy directory tree. Overwrites also read only files.
+        """
+        for src_dir, _, files in os.walk(root_src_dir):
+            dst_dir = src_dir.replace(root_src_dir, root_dst_dir, 1)
+            if not os.path.exists(dst_dir):
+                os.makedirs(dst_dir)
+            for file_ in files:
+                src_file = os.path.join(src_dir, file_)
+                dst_file = os.path.join(dst_dir, file_)
+                if os.path.exists(dst_file):
+                    try:
+                        os.remove(dst_file)
+                    except PermissionError:
+                        os.chmod(dst_file, stat.S_IWUSR)
+                        os.remove(dst_file)
+                shutil.copy2(src_file, dst_dir)
diff --git a/meson/mesonbuild/wrap/wraptool.py b/meson/mesonbuild/wrap/wraptool.py
new file mode 100644
index 000000000..e01e15985
--- /dev/null
+++ b/meson/mesonbuild/wrap/wraptool.py
@@ -0,0 +1,210 @@
+# Copyright 2015-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import sys, os
+import configparser
+import shutil
+
+from glob import glob
+
+from .wrap import API_ROOT, open_wrapdburl
+
+from .. import mesonlib
+
+def add_arguments(parser):
+    subparsers = parser.add_subparsers(title='Commands', dest='command')
+    subparsers.required = True
+
+    p = subparsers.add_parser('list', help='show all available projects')
+    p.set_defaults(wrap_func=list_projects)
+
+    p = subparsers.add_parser('search', help='search the db by name')
+    p.add_argument('name')
+    p.set_defaults(wrap_func=search)
+
+    p = subparsers.add_parser('install', help='install the specified project')
+    p.add_argument('name')
+    p.set_defaults(wrap_func=install)
+
+    p = subparsers.add_parser('update', help='update the project to its newest available release')
+    p.add_argument('name')
+    p.set_defaults(wrap_func=update)
+
+    p = subparsers.add_parser('info', help='show available versions of a project')
+    p.add_argument('name')
+    p.set_defaults(wrap_func=info)
+
+    p = subparsers.add_parser('status', help='show installed and available versions of your projects')
+    p.set_defaults(wrap_func=status)
+
+    p = subparsers.add_parser('promote', help='bring a subsubproject up to the master project')
+    p.add_argument('project_path')
+    p.set_defaults(wrap_func=promote)
+
+def get_result(urlstring):
+    u = open_wrapdburl(urlstring)
+    data = u.read().decode('utf-8')
+    jd = json.loads(data)
+    if jd['output'] != 'ok':
+        print('Got bad output from server.', file=sys.stderr)
+        raise SystemExit(data)
+    return jd
+
+def get_projectlist():
+    jd = get_result(API_ROOT + 'projects')
+    projects = jd['projects']
+    return projects
+
+def list_projects(options):
+    projects = get_projectlist()
+    for p in projects:
+        print(p)
+
+def search(options):
+    name = options.name
+    jd = get_result(API_ROOT + 'query/byname/' + name)
+    for p in jd['projects']:
+        print(p)
+
+def get_latest_version(name: str) -> tuple:
+    jd = get_result(API_ROOT + 'query/get_latest/' + name)
+    branch = jd['branch']
+    revision = jd['revision']
+    return branch, revision
+
+def install(options):
+    name = options.name
+    if not os.path.isdir('subprojects'):
+        raise SystemExit('Subprojects dir not found. Run this script in your source root directory.')
+    if os.path.isdir(os.path.join('subprojects', name)):
+        raise SystemExit('Subproject directory for this project already exists.')
+    wrapfile = os.path.join('subprojects', name + '.wrap')
+    if os.path.exists(wrapfile):
+        raise SystemExit('Wrap file already exists.')
+    (branch, revision) = get_latest_version(name)
+    u = open_wrapdburl(API_ROOT + 'projects/{}/{}/{}/get_wrap'.format(name, branch, revision))
+    data = u.read()
+    with open(wrapfile, 'wb') as f:
+        f.write(data)
+    print('Installed', name, 'branch', branch, 'revision', revision)
+
+def parse_patch_url(patch_url):
+    arr = patch_url.split('/')
+    return arr[-3], int(arr[-2])
+
+def get_current_version(wrapfile):
+    cp = configparser.ConfigParser(interpolation=None)
+    cp.read(wrapfile)
+    cp = cp['wrap-file']
+    patch_url = cp['patch_url']
+    branch, revision = parse_patch_url(patch_url)
+    return branch, revision, cp['directory'], cp['source_filename'], cp['patch_filename']
+
+def update_wrap_file(wrapfile, name, new_branch, new_revision):
+    u = open_wrapdburl(API_ROOT + 'projects/{}/{}/{}/get_wrap'.format(name, new_branch, new_revision))
+    data = u.read()
+    with open(wrapfile, 'wb') as f:
+        f.write(data)
+
+def update(options):
+    name = options.name
+    if not os.path.isdir('subprojects'):
+        raise SystemExit('Subprojects dir not found. Run this command in your source root directory.')
+    wrapfile = os.path.join('subprojects', name + '.wrap')
+    if not os.path.exists(wrapfile):
+        raise SystemExit('Project ' + name + ' is not in use.')
+    (branch, revision, subdir, src_file, patch_file) = get_current_version(wrapfile)
+    (new_branch, new_revision) = get_latest_version(name)
+    if new_branch == branch and new_revision == revision:
+        print('Project ' + name + ' is already up to date.')
+        raise SystemExit
+    update_wrap_file(wrapfile, name, new_branch, new_revision)
+    shutil.rmtree(os.path.join('subprojects', subdir), ignore_errors=True)
+    try:
+        os.unlink(os.path.join('subprojects/packagecache', src_file))
+    except FileNotFoundError:
+        pass
+    try:
+        os.unlink(os.path.join('subprojects/packagecache', patch_file))
+    except FileNotFoundError:
+        pass
+    print('Updated', name, 'to branch', new_branch, 'revision', new_revision)
+
+def info(options):
+    name = options.name
+    jd = get_result(API_ROOT + 'projects/' + name)
+    versions = jd['versions']
+    if not versions:
+        raise SystemExit('No available versions of' + name)
+    print('Available versions of {}:'.format(name))
+    for v in versions:
+        print(' ', v['branch'], v['revision'])
+
+def do_promotion(from_path, spdir_name):
+    if os.path.isfile(from_path):
+        assert(from_path.endswith('.wrap'))
+        shutil.copy(from_path, spdir_name)
+    elif os.path.isdir(from_path):
+        sproj_name = os.path.basename(from_path)
+        outputdir = os.path.join(spdir_name, sproj_name)
+        if os.path.exists(outputdir):
+            raise SystemExit('Output dir {} already exists. Will not overwrite.'.format(outputdir))
+        shutil.copytree(from_path, outputdir, ignore=shutil.ignore_patterns('subprojects'))
+
+def promote(options):
+    argument = options.project_path
+    spdir_name = 'subprojects'
+    sprojs = mesonlib.detect_subprojects(spdir_name)
+
+    # check if the argument is a full path to a subproject directory or wrap file
+    system_native_path_argument = argument.replace('/', os.sep)
+    for matches in sprojs.values():
+        if system_native_path_argument in matches:
+            do_promotion(system_native_path_argument, spdir_name)
+            return
+
+    # otherwise the argument is just a subproject basename which must be unambiguous
+    if argument not in sprojs:
+        raise SystemExit('Subproject {} not found in directory tree.'.format(argument))
+    matches = sprojs[argument]
+    if len(matches) > 1:
+        print('There is more than one version of {} in tree. Please specify which one to promote:\n'.format(argument), file=sys.stderr)
+        for s in matches:
+            print(s, file=sys.stderr)
+        raise SystemExit(1)
+    do_promotion(matches[0], spdir_name)
+
+def status(options):
+    print('Subproject status')
+    for w in glob('subprojects/*.wrap'):
+        name = os.path.basename(w)[:-5]
+        try:
+            (latest_branch, latest_revision) = get_latest_version(name)
+        except Exception:
+            print('', name, 'not available in wrapdb.', file=sys.stderr)
+            continue
+        try:
+            (current_branch, current_revision, _, _, _) = get_current_version(w)
+        except Exception:
+            print('Wrap file not from wrapdb.', file=sys.stderr)
+            continue
+        if current_branch == latest_branch and current_revision == latest_revision:
+            print('', name, 'up to date. Branch {}, revision {}.'.format(current_branch, current_revision))
+        else:
+            print('', name, 'not up to date. Have {} {}, but {} {} is available.'.format(current_branch, current_revision, latest_branch, latest_revision))
+
+def run(options):
+    options.wrap_func(options)
+    return 0
diff --git a/meson/msi/License.rtf b/meson/msi/License.rtf
new file mode 100644
index 000000000..b3945baa7
--- /dev/null
+++ b/meson/msi/License.rtf
@@ -0,0 +1,73 @@
+{\rtf1\ansi\ansicpg1252\deff0{\fonttbl{\f0\fswiss\fprq2\fcharset0 Arial;}}
+{\colortbl ;\red0\green0\blue255;}
+{\*\generator Msftedit 5.41.21.2510;}\viewkind4\uc1\pard\qc\lang1033\b\f0\fs18 Apache License\par
+Version 2.0, January 2004\par
+{\field{\*\fldinst{HYPERLINK "http://www.apache.org/licenses/"}}{\fldrslt{\ul\cf1 http://www.apache.org/licenses/}}}\f0\fs18\par
+\b0\par
+\pard TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\par
+\par
+\pard\fi-180\li180 1. Definitions.\par
+\par
+\pard\li180 "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.\par
+\par
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.\par
+\par
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.\par
+\par
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.\par
+\par
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.\par
+\par
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.\par
+\par
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).\par
+\par
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.\par
+\par
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."\par
+\par
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.\par
+\pard\par
+\pard\fi-180\li180 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.\par
+\par
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.\par
+\par
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:\par
+\pard\par
+\pard\fi-270\li450 (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and\par
+\par
+(b) You must cause any modified files to carry prominent notices stating that You changed the files; and\par
+\par
+(c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and\par
+\par
+(d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.\par
+\pard\par
+\pard\li180 You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or  for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.\par
+\pard\par
+\pard\fi-180\li180 5. Submission of Contributions. Unless You explicitly state otherwise,  any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.\par
+\par
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.\par
+\par
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.\par
+\par
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.\par
+\par
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.\par
+\pard\par
+END OF TERMS AND CONDITIONS\par
+\par
+APPENDIX: How to apply the Apache License to your work.\par
+\par
+To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!)  The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.\par
+\par
+\pard\li180 Copyright [yyyy] [name of copyright owner]\par
+\par
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.  You may obtain a copy of the License at\par
+\par
+\pard\li360{\field{\*\fldinst{HYPERLINK "http://www.apache.org/licenses/LICENSE-2.0"}}{\fldrslt{\ul\cf1 http://www.apache.org/licenses/LICENSE-2.0}}}\f0\fs18\par
+\pard\li180\par
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\par
+\pard\par
+\par
+}
+
\ No newline at end of file
diff --git a/meson/msi/createmsi.py b/meson/msi/createmsi.py
new file mode 100644
index 000000000..f80d1dccb
--- /dev/null
+++ b/meson/msi/createmsi.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+
+# Copyright 2017 The Meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''
+    This script is for generating MSI packages
+    for Windows users.
+'''
+import subprocess
+import shutil
+import uuid
+import sys
+import os
+from glob import glob
+import platform
+import xml.etree.ElementTree as ET
+
+sys.path.append(os.getcwd())
+from mesonbuild import coredata
+
+def gen_guid():
+    '''
+       Generate guid
+    '''
+    return str(uuid.uuid4()).upper()
+
+class Node:
+    '''
+       Node to hold path and directory values
+    '''
+
+    def __init__(self, dirs, files):
+        self.check_dirs(dirs)
+        self.check_files(files)
+        self.dirs = dirs
+        self.files = files
+
+    @staticmethod
+    def check_dirs(dirs):
+        '''
+           Check to see if directory is instance of list
+        '''
+        assert isinstance(dirs, list)
+
+    @staticmethod
+    def check_files(files):
+        '''
+           Check to see if files is instance of list
+        '''
+        assert isinstance(files, list)
+
+
+class PackageGenerator:
+    '''
+       Package generator for MSI pacakges
+    '''
+
+    def __init__(self):
+        self.product_name = 'Meson Build System'
+        self.manufacturer = 'The Meson Development Team'
+        self.version = coredata.version.replace('dev', '')
+        self.root = None
+        self.guid = '*'
+        self.update_guid = '141527EE-E28A-4D14-97A4-92E6075D28B2'
+        self.main_xml = 'meson.wxs'
+        self.main_o = 'meson.wixobj'
+        self.bytesize = 32 if '32' in platform.architecture()[0] else 64
+        self.final_output = 'meson-{}-{}.msi'.format(self.version, self.bytesize)
+        self.staging_dirs = ['dist', 'dist2']
+        if self.bytesize == 64:
+            self.progfile_dir = 'ProgramFiles64Folder'
+            redist_glob = 'C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Redist\\MSVC\\*\\MergeModules\\Microsoft_VC142_CRT_x64.msm'
+        else:
+            self.progfile_dir = 'ProgramFilesFolder'
+            redist_glob = 'C:\\Program Files*\\Microsoft Visual Studio\\2019\\Community\\VC\\Redist\\MSVC\\*\\MergeModules\\Microsoft_VC142_CRT_x86.msm'
+        trials = glob(redist_glob)
+        if len(trials) != 1:
+            sys.exit('Could not find unique MSM setup:' + '\n'.join(trials))
+        self.redist_path = trials[0]
+        self.component_num = 0
+        self.feature_properties = {
+            self.staging_dirs[0]: {
+                'Id': 'MainProgram',
+                'Title': 'Meson',
+                'Description': 'Meson executables',
+                'Level': '1',
+                'Absent': 'disallow',
+            },
+            self.staging_dirs[1]: {
+                'Id': 'NinjaProgram',
+                'Title': 'Ninja',
+                'Description': 'Ninja build tool',
+                'Level': '1',
+            }
+        }
+        self.feature_components = {}
+        for s_d in self.staging_dirs:
+            self.feature_components[s_d] = []
+
+    @staticmethod
+    def get_all_modules_from_dir(dirname):
+        '''
+           Get all modules required for Meson build MSI package
+           from directories.
+        '''
+        modname = os.path.basename(dirname)
+        modules = [os.path.splitext(os.path.split(x)[1])[0] for x in glob(os.path.join(dirname, '*'))]
+        modules = ['mesonbuild.' + modname + '.' + x for x in modules if not x.startswith('_')]
+        return modules
+
+    @staticmethod
+    def get_more_modules():
+        '''
+           Getter for missing Modules.
+           Python packagers want to be minimal and only copy the things
+           that they can see that being used. They are blind to many things.
+        '''
+        return ['distutils.archive_util',
+                'distutils.cmd',
+                'distutils.config',
+                'distutils.core',
+                'distutils.debug',
+                'distutils.dep_util',
+                'distutils.dir_util',
+                'distutils.dist',
+                'distutils.errors',
+                'distutils.extension',
+                'distutils.fancy_getopt',
+                'distutils.file_util',
+                'distutils.spawn',
+                'distutils.util',
+                'distutils.version',
+                'distutils.command.build_ext',
+                'distutils.command.build',
+                ]
+
+    def build_dist(self):
+        '''
+           Build dist file from PyInstaller info
+        '''
+        for sdir in self.staging_dirs:
+            if os.path.exists(sdir):
+                shutil.rmtree(sdir)
+        main_stage, ninja_stage = self.staging_dirs
+        modules = self.get_all_modules_from_dir('mesonbuild/modules')
+        modules += self.get_all_modules_from_dir('mesonbuild/scripts')
+        modules += self.get_more_modules()
+
+        pyinstaller = shutil.which('pyinstaller')
+        if not pyinstaller:
+            print("ERROR: This script requires pyinstaller.")
+            sys.exit(1)
+
+        pyinstaller_tmpdir = 'pyinst-tmp'
+        if os.path.exists(pyinstaller_tmpdir):
+            shutil.rmtree(pyinstaller_tmpdir)
+        pyinst_cmd = [pyinstaller,
+                      '--clean',
+                      '--distpath',
+                      pyinstaller_tmpdir]
+        for m in modules:
+            pyinst_cmd += ['--hidden-import', m]
+        pyinst_cmd += ['meson.py']
+        subprocess.check_call(pyinst_cmd)
+        shutil.move(pyinstaller_tmpdir + '/meson', main_stage)
+        if not os.path.exists(os.path.join(main_stage, 'meson.exe')):
+            sys.exit('Meson exe missing from staging dir.')
+        os.mkdir(ninja_stage)
+        shutil.copy(shutil.which('ninja'), ninja_stage)
+        if not os.path.exists(os.path.join(ninja_stage, 'ninja.exe')):
+            sys.exit('Ninja exe missing from staging dir.')
+
+    def generate_files(self):
+        '''
+           Generate package files for MSI installer package
+        '''
+        self.root = ET.Element('Wix', {'xmlns': 'http://schemas.microsoft.com/wix/2006/wi'})
+        product = ET.SubElement(self.root, 'Product', {
+            'Name': self.product_name,
+            'Manufacturer': 'The Meson Development Team',
+            'Id': self.guid,
+            'UpgradeCode': self.update_guid,
+            'Language': '1033',
+            'Codepage':  '1252',
+            'Version': self.version,
+        })
+
+        package = ET.SubElement(product, 'Package', {
+            'Id': '*',
+            'Keywords': 'Installer',
+            'Description': 'Meson {} installer'.format(self.version),
+            'Comments': 'Meson is a high performance build system',
+            'Manufacturer': 'The Meson Development Team',
+            'InstallerVersion': '500',
+            'Languages': '1033',
+            'Compressed': 'yes',
+            'SummaryCodepage': '1252',
+        })
+
+        ET.SubElement(product, 'MajorUpgrade',
+                      {'DowngradeErrorMessage': 'A newer version of Meson is already installed.'})
+
+        if self.bytesize == 64:
+            package.set('Platform', 'x64')
+        ET.SubElement(product, 'Media', {
+            'Id': '1',
+            'Cabinet': 'meson.cab',
+            'EmbedCab': 'yes',
+        })
+        targetdir = ET.SubElement(product, 'Directory', {
+            'Id': 'TARGETDIR',
+            'Name': 'SourceDir',
+        })
+        progfiledir = ET.SubElement(targetdir, 'Directory', {
+            'Id': self.progfile_dir,
+        })
+        installdir = ET.SubElement(progfiledir, 'Directory', {
+            'Id': 'INSTALLDIR',
+            'Name': 'Meson',
+        })
+        ET.SubElement(installdir, 'Merge', {
+            'Id': 'VCRedist',
+            'SourceFile': self.redist_path,
+            'DiskId': '1',
+            'Language': '0',
+        })
+
+        ET.SubElement(product, 'Property', {
+            'Id': 'WIXUI_INSTALLDIR',
+            'Value': 'INSTALLDIR',
+        })
+        ET.SubElement(product, 'UIRef', {
+            'Id': 'WixUI_FeatureTree',
+        })
+        for s_d in self.staging_dirs:
+            assert os.path.isdir(s_d)
+        top_feature = ET.SubElement(product, 'Feature', {
+            'Id': 'Complete',
+            'Title': 'Meson ' + self.version,
+            'Description': 'The complete package',
+            'Display': 'expand',
+            'Level': '1',
+            'ConfigurableDirectory': 'INSTALLDIR',
+        })
+        for s_d in self.staging_dirs:
+            nodes = {}
+            for root, dirs, files in os.walk(s_d):
+                cur_node = Node(dirs, files)
+                nodes[root] = cur_node
+            self.create_xml(nodes, s_d, installdir, s_d)
+            self.build_features(top_feature, s_d)
+        vcredist_feature = ET.SubElement(top_feature, 'Feature', {
+            'Id': 'VCRedist',
+            'Title': 'Visual C++ runtime',
+            'AllowAdvertise': 'no',
+            'Display': 'hidden',
+            'Level': '1',
+        })
+        ET.SubElement(vcredist_feature, 'MergeRef', {'Id': 'VCRedist'})
+        ET.ElementTree(self.root).write(self.main_xml, encoding='utf-8', xml_declaration=True)
+        # ElementTree can not do prettyprinting so do it manually
+        import xml.dom.minidom
+        doc = xml.dom.minidom.parse(self.main_xml)
+        with open(self.main_xml, 'w') as open_file:
+            open_file.write(doc.toprettyxml())
+
+    def build_features(self, top_feature, staging_dir):
+        '''
+           Generate build features
+        '''
+        feature = ET.SubElement(top_feature, 'Feature', self.feature_properties[staging_dir])
+        for component_id in self.feature_components[staging_dir]:
+            ET.SubElement(feature, 'ComponentRef', {
+                'Id': component_id,
+            })
+
+    def create_xml(self, nodes, current_dir, parent_xml_node, staging_dir):
+        '''
+           Create XML file
+        '''
+        cur_node = nodes[current_dir]
+        if cur_node.files:
+            component_id = 'ApplicationFiles{}'.format(self.component_num)
+            comp_xml_node = ET.SubElement(parent_xml_node, 'Component', {
+                'Id': component_id,
+                'Guid': gen_guid(),
+            })
+            self.feature_components[staging_dir].append(component_id)
+            if self.bytesize == 64:
+                comp_xml_node.set('Win64', 'yes')
+            if self.component_num == 0:
+                ET.SubElement(comp_xml_node, 'Environment', {
+                    'Id': 'Environment',
+                    'Name': 'PATH',
+                    'Part': 'last',
+                    'System': 'yes',
+                    'Action': 'set',
+                    'Value': '[INSTALLDIR]',
+                })
+            self.component_num += 1
+            for f_node in cur_node.files:
+                file_id = os.path.join(current_dir, f_node).replace('\\', '_').replace('#', '_').replace('-', '_')
+                ET.SubElement(comp_xml_node, 'File', {
+                    'Id': file_id,
+                    'Name': f_node,
+                    'Source': os.path.join(current_dir, f_node),
+                })
+
+        for dirname in cur_node.dirs:
+            dir_id = os.path.join(current_dir, dirname).replace('\\', '_').replace('/', '_')
+            dir_node = ET.SubElement(parent_xml_node, 'Directory', {
+                'Id': dir_id,
+                'Name': dirname,
+            })
+            self.create_xml(nodes, os.path.join(current_dir, dirname), dir_node, staging_dir)
+
+    def build_package(self):
+        '''
+           Generate the Meson build MSI package.
+        '''
+        wixdir = 'c:\\Program Files\\Wix Toolset v3.11\\bin'
+        if not os.path.isdir(wixdir):
+            wixdir = 'c:\\Program Files (x86)\\Wix Toolset v3.11\\bin'
+        if not os.path.isdir(wixdir):
+            print("ERROR: This script requires WIX")
+            sys.exit(1)
+        subprocess.check_call([os.path.join(wixdir, 'candle'), self.main_xml])
+        subprocess.check_call([os.path.join(wixdir, 'light'),
+                               '-ext', 'WixUIExtension',
+                               '-cultures:en-us',
+                               '-dWixUILicenseRtf=msi\\License.rtf',
+                               '-out', self.final_output,
+                               self.main_o])
+
+if __name__ == '__main__':
+    if not os.path.exists('meson.py'):
+        sys.exit(print('Run me in the top level source dir.'))
+    subprocess.check_call(['pip', 'install', '--upgrade', 'pyinstaller'])
+
+    p = PackageGenerator()
+    p.build_dist()
+    p.generate_files()
+    p.build_package()
diff --git a/meson/pyproject.toml b/meson/pyproject.toml
new file mode 100644
index 000000000..d1e6ae6e5
--- /dev/null
+++ b/meson/pyproject.toml
@@ -0,0 +1,2 @@
+[build-system]
+requires = ["setuptools", "wheel"]
diff --git a/meson/run_cross_test.py b/meson/run_cross_test.py
new file mode 100755
index 000000000..1e67876ab
--- /dev/null
+++ b/meson/run_cross_test.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+# Copyright 2013-2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''Runs the basic test suite through a cross compiler.
+
+This is now just a wrapper around run_project_tests.py with specific arguments
+'''
+
+import argparse
+import subprocess
+from mesonbuild import mesonlib
+from mesonbuild.coredata import version as meson_version
+
+
+def runtests(cross_file, failfast):
+    tests = ['--only', 'common']
+    cmd = mesonlib.python_command + ['run_project_tests.py', '--backend', 'ninja'] + (['--failfast'] if failfast else []) + tests + ['--cross-file', cross_file]
+    return subprocess.call(cmd)
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--failfast', action='store_true')
+    parser.add_argument('cross_file')
+    options = parser.parse_args()
+    return runtests(options.cross_file, options.failfast)
+
+if __name__ == '__main__':
+    print('Meson build system', meson_version, 'Cross Tests')
+    raise SystemExit(main())
diff --git a/meson/run_meson_command_tests.py b/meson/run_meson_command_tests.py
new file mode 100755
index 000000000..7bc6185f1
--- /dev/null
+++ b/meson/run_meson_command_tests.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+
+# Copyright 2018 The Meson development team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import unittest
+import subprocess
+import zipapp
+from pathlib import Path
+
+from mesonbuild.mesonlib import windows_proof_rmtree, python_command, is_windows
+from mesonbuild.coredata import version as meson_version
+
+
+def get_pypath():
+    import sysconfig
+    pypath = sysconfig.get_path('purelib', vars={'base': ''})
+    # Ensure that / is the path separator and not \, then strip /
+    return Path(pypath).as_posix().strip('/')
+
+def get_pybindir():
+    import sysconfig
+    # 'Scripts' on Windows and 'bin' on other platforms including MSYS
+    return sysconfig.get_path('scripts', vars={'base': ''}).strip('\\/')
+
+class CommandTests(unittest.TestCase):
+    '''
+    Test that running meson in various ways works as expected by checking the
+    value of mesonlib.meson_command that was set during configuration.
+    '''
+
+    def setUp(self):
+        super().setUp()
+        self.orig_env = os.environ.copy()
+        self.orig_dir = os.getcwd()
+        os.environ['MESON_COMMAND_TESTS'] = '1'
+        self.tmpdir = Path(tempfile.mkdtemp()).resolve()
+        self.src_root = Path(__file__).resolve().parent
+        self.testdir = str(self.src_root / 'test cases/common/1 trivial')
+        self.meson_args = ['--backend=ninja']
+
+    def tearDown(self):
+        try:
+            windows_proof_rmtree(str(self.tmpdir))
+        except FileNotFoundError:
+            pass
+        os.environ.clear()
+        os.environ.update(self.orig_env)
+        os.chdir(str(self.orig_dir))
+        super().tearDown()
+
+    def _run(self, command, workdir=None):
+        '''
+        Run a command while printing the stdout, and also return a copy of it
+        '''
+        # If this call hangs CI will just abort. It is very hard to distinguish
+        # between CI issue and test bug in that case. Set timeout and fail loud
+        # instead.
+        p = subprocess.run(command, stdout=subprocess.PIPE,
+                           env=os.environ.copy(), universal_newlines=True,
+                           cwd=workdir, timeout=60 * 5)
+        print(p.stdout)
+        if p.returncode != 0:
+            raise subprocess.CalledProcessError(p.returncode, command)
+        return p.stdout
+
+    def assertMesonCommandIs(self, line, cmd):
+        self.assertTrue(line.startswith('meson_command '), msg=line)
+        self.assertEqual(line, 'meson_command is {!r}'.format(cmd))
+
+    def test_meson_uninstalled(self):
+        # This is what the meson command must be for all these cases
+        resolved_meson_command = python_command + [str(self.src_root / 'meson.py')]
+        # Absolute path to meson.py
+        os.chdir('/')
+        builddir = str(self.tmpdir / 'build1')
+        meson_py = str(self.src_root / 'meson.py')
+        meson_setup = [meson_py, 'setup']
+        meson_command = python_command + meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+        # ./meson.py
+        os.chdir(str(self.src_root))
+        builddir = str(self.tmpdir / 'build2')
+        meson_py = './meson.py'
+        meson_setup = [meson_py, 'setup']
+        meson_command = python_command + meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+        # Symlink to meson.py
+        if is_windows():
+            # Symlinks require admin perms
+            return
+        os.chdir(str(self.src_root))
+        builddir = str(self.tmpdir / 'build3')
+        # Create a symlink to meson.py in bindir, and add it to PATH
+        bindir = (self.tmpdir / 'bin')
+        bindir.mkdir()
+        (bindir / 'meson').symlink_to(self.src_root / 'meson.py')
+        os.environ['PATH'] = str(bindir) + os.pathsep + os.environ['PATH']
+        # See if it works!
+        meson_py = 'meson'
+        meson_setup = [meson_py, 'setup']
+        meson_command = meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+
+    def test_meson_installed(self):
+        # Install meson
+        prefix = self.tmpdir / 'prefix'
+        pylibdir = prefix / get_pypath()
+        bindir = prefix / get_pybindir()
+        pylibdir.mkdir(parents=True)
+        # XXX: join with empty name so it always ends with os.sep otherwise
+        # distutils complains that prefix isn't contained in PYTHONPATH
+        os.environ['PYTHONPATH'] = os.path.join(str(pylibdir), '')
+        os.environ['PATH'] = str(bindir) + os.pathsep + os.environ['PATH']
+        self._run(python_command + ['setup.py', 'install', '--prefix', str(prefix)])
+        # Fix importlib-metadata by appending all dirs in pylibdir
+        PYTHONPATHS = [pylibdir] + [x for x in pylibdir.iterdir()]
+        PYTHONPATHS = [os.path.join(str(x), '') for x in PYTHONPATHS]
+        os.environ['PYTHONPATH'] = os.pathsep.join(PYTHONPATHS)
+        # Check that all the files were installed correctly
+        self.assertTrue(bindir.is_dir())
+        self.assertTrue(pylibdir.is_dir())
+        from setup import packages
+        # Extract list of expected python module files
+        expect = set()
+        for pkg in packages:
+            expect.update([p.as_posix() for p in Path(pkg.replace('.', '/')).glob('*.py')])
+        # Check what was installed, only count files that are inside 'mesonbuild'
+        have = set()
+        for p in Path(pylibdir).glob('**/*.py'):
+            s = p.as_posix()
+            if 'mesonbuild' not in s:
+                continue
+            if '/data/' in s:
+                continue
+            have.add(s[s.rfind('mesonbuild'):])
+        self.assertEqual(have, expect)
+        # Run `meson`
+        os.chdir('/')
+        resolved_meson_command = [str(bindir / 'meson')]
+        builddir = str(self.tmpdir / 'build1')
+        meson_setup = ['meson', 'setup']
+        meson_command = meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+        # Run `/path/to/meson`
+        builddir = str(self.tmpdir / 'build2')
+        meson_setup = [str(bindir / 'meson'), 'setup']
+        meson_command = meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+        # Run `python3 -m mesonbuild.mesonmain`
+        resolved_meson_command = python_command + ['-m', 'mesonbuild.mesonmain']
+        builddir = str(self.tmpdir / 'build3')
+        meson_setup = ['-m', 'mesonbuild.mesonmain', 'setup']
+        meson_command = python_command + meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+        if is_windows():
+            # Next part requires a shell
+            return
+        # `meson` is a wrapper to `meson.real`
+        resolved_meson_command = [str(bindir / 'meson.real')]
+        builddir = str(self.tmpdir / 'build4')
+        (bindir / 'meson').rename(bindir / 'meson.real')
+        wrapper = (bindir / 'meson')
+        wrapper.open('w').write('#!/bin/sh\n\nmeson.real "$@"')
+        wrapper.chmod(0o755)
+        meson_setup = [str(wrapper), 'setup']
+        meson_command = meson_setup + self.meson_args
+        stdo = self._run(meson_command + [self.testdir, builddir])
+        self.assertMesonCommandIs(stdo.split('\n')[0], resolved_meson_command)
+
+    def test_meson_exe_windows(self):
+        raise unittest.SkipTest('NOT IMPLEMENTED')
+
+    def test_meson_zipapp(self):
+        if is_windows():
+            raise unittest.SkipTest('NOT IMPLEMENTED')
+        source = Path(__file__).resolve().parent.as_posix()
+        target = self.tmpdir / 'meson.pyz'
+        zipapp.create_archive(source=source, target=target, interpreter=python_command[0], main=None)
+        self._run([target.as_posix(), '--help'])
+
+
+if __name__ == '__main__':
+    print('Meson build system', meson_version, 'Command Tests')
+    raise SystemExit(unittest.main(buffer=True))
diff --git a/meson/run_project_tests.py b/meson/run_project_tests.py
new file mode 100755
index 000000000..348c5ca72
--- /dev/null
+++ b/meson/run_project_tests.py
@@ -0,0 +1,1300 @@
+#!/usr/bin/env python3
+
+# Copyright 2012-2019 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from concurrent.futures import ProcessPoolExecutor, CancelledError
+from enum import Enum
+from io import StringIO
+from pathlib import Path, PurePath
+import argparse
+import functools
+import itertools
+import json
+import multiprocessing
+import os
+import re
+import shlex
+import shutil
+import signal
+import subprocess
+import sys
+import tempfile
+import time
+import typing as T
+import xml.etree.ElementTree as ET
+
+from mesonbuild import build
+from mesonbuild import environment
+from mesonbuild import compilers
+from mesonbuild import mesonlib
+from mesonbuild import mlog
+from mesonbuild import mtest
+from mesonbuild.mesonlib import MachineChoice, Popen_safe
+from mesonbuild.coredata import backendlist, version as meson_version
+
+from run_tests import get_fake_options, run_configure, get_meson_script
+from run_tests import get_backend_commands, get_backend_args_for_dir, Backend
+from run_tests import ensure_backend_detects_changes
+from run_tests import guess_backend
+
+ALL_TESTS = ['cmake', 'common', 'warning-meson', 'failing-meson', 'failing-build', 'failing-test',
+             'keyval', 'platform-osx', 'platform-windows', 'platform-linux',
+             'java', 'C#', 'vala',  'rust', 'd', 'objective c', 'objective c++',
+             'fortran', 'swift', 'cuda', 'python3', 'python', 'fpga', 'frameworks', 'nasm', 'wasm'
+             ]
+
+
+class BuildStep(Enum):
+    configure = 1
+    build = 2
+    test = 3
+    install = 4
+    clean = 5
+    validate = 6
+
+
+class TestResult(BaseException):
+    def __init__(self, cicmds):
+        self.msg = ''  # empty msg indicates test success
+        self.stdo = ''
+        self.stde = ''
+        self.mlog = ''
+        self.cicmds = cicmds
+        self.conftime = 0
+        self.buildtime = 0
+        self.testtime = 0
+
+    def add_step(self, step, stdo, stde, mlog='', time=0):
+        self.step = step
+        self.stdo += stdo
+        self.stde += stde
+        self.mlog += mlog
+        if step == BuildStep.configure:
+            self.conftime = time
+        elif step == BuildStep.build:
+            self.buildtime = time
+        elif step == BuildStep.test:
+            self.testtime = time
+
+    def fail(self, msg):
+        self.msg = msg
+
+class InstalledFile:
+    def __init__(self, raw: T.Dict[str, str]):
+        self.path = raw['file']
+        self.typ = raw['type']
+        self.platform = raw.get('platform', None)
+        self.language = raw.get('language', 'c')  # type: str
+
+        version = raw.get('version', '')  # type: str
+        if version:
+            self.version = version.split('.')  # type: T.List[str]
+        else:
+            # split on '' will return [''], we want an empty list though
+            self.version = []
+
+    def get_path(self, compiler: str, env: environment.Environment) -> T.Optional[Path]:
+        p = Path(self.path)
+        canonical_compiler = compiler
+        if ((compiler in ['clang-cl', 'intel-cl']) or
+                (env.machines.host.is_windows() and compiler in {'pgi', 'dmd', 'ldc'})):
+            canonical_compiler = 'msvc'
+
+        has_pdb = False
+        if self.language in {'c', 'cpp'}:
+            has_pdb = canonical_compiler == 'msvc'
+        elif self.language == 'd':
+            # dmd's optlink does not genearte pdb iles
+            has_pdb = env.coredata.compilers.host['d'].linker.id in {'link', 'lld-link'}
+
+        # Abort if the platform does not match
+        matches = {
+            'msvc': canonical_compiler == 'msvc',
+            'gcc': canonical_compiler != 'msvc',
+            'cygwin': env.machines.host.is_cygwin(),
+            '!cygwin': not env.machines.host.is_cygwin(),
+        }.get(self.platform or '', True)
+        if not matches:
+            return None
+
+        # Handle the different types
+        if self.typ in ['file', 'dir']:
+            return p
+        elif self.typ == 'shared_lib':
+            if env.machines.host.is_windows() or env.machines.host.is_cygwin():
+                # Windows only has foo.dll and foo-X.dll
+                if len(self.version) > 1:
+                    return None
+                if self.version:
+                    p = p.with_name('{}-{}'.format(p.name, self.version[0]))
+                return p.with_suffix('.dll')
+
+            p = p.with_name('lib{}'.format(p.name))
+            if env.machines.host.is_darwin():
+                # MacOS only has libfoo.dylib and libfoo.X.dylib
+                if len(self.version) > 1:
+                    return None
+
+                # pathlib.Path.with_suffix replaces, not appends
+                suffix = '.dylib'
+                if self.version:
+                    suffix = '.{}{}'.format(self.version[0], suffix)
+            else:
+                # pathlib.Path.with_suffix replaces, not appends
+                suffix = '.so'
+                if self.version:
+                    suffix = '{}.{}'.format(suffix, '.'.join(self.version))
+            return p.with_suffix(suffix)
+        elif self.typ == 'exe':
+            if env.machines.host.is_windows() or env.machines.host.is_cygwin():
+                return p.with_suffix('.exe')
+        elif self.typ == 'pdb':
+            if self.version:
+                p = p.with_name('{}-{}'.format(p.name, self.version[0]))
+            return p.with_suffix('.pdb') if has_pdb else None
+        elif self.typ == 'implib' or self.typ == 'implibempty':
+            if env.machines.host.is_windows() and canonical_compiler == 'msvc':
+                # only MSVC doesn't generate empty implibs
+                if self.typ == 'implibempty' and compiler == 'msvc':
+                    return None
+                return p.parent / (re.sub(r'^lib', '', p.name) + '.lib')
+            elif env.machines.host.is_windows() or env.machines.host.is_cygwin():
+                return p.with_suffix('.dll.a')
+            else:
+                return None
+        elif self.typ == 'expr':
+            return Path(platform_fix_name(p.as_posix(), canonical_compiler, env))
+        else:
+            raise RuntimeError('Invalid installed file type {}'.format(self.typ))
+
+        return p
+
+    def get_paths(self, compiler: str, env: environment.Environment, installdir: Path) -> T.List[Path]:
+        p = self.get_path(compiler, env)
+        if not p:
+            return []
+        if self.typ == 'dir':
+            abs_p = installdir / p
+            if not abs_p.exists():
+                raise RuntimeError('{} does not exist'.format(p))
+            if not abs_p.is_dir():
+                raise RuntimeError('{} is not a directory'.format(p))
+            return [x.relative_to(installdir) for x in abs_p.rglob('*') if x.is_file() or x.is_symlink()]
+        else:
+            return [p]
+
+@functools.total_ordering
+class TestDef:
+    def __init__(self, path: Path, name: T.Optional[str], args: T.List[str], skip: bool = False):
+        self.path = path
+        self.name = name
+        self.args = args
+        self.skip = skip
+        self.env = os.environ.copy()
+        self.installed_files = []  # type: T.List[InstalledFile]
+        self.do_not_set_opts = []  # type: T.List[str]
+        self.stdout = [] # type: T.List[T.Dict[str, str]]
+
+    def __repr__(self) -> str:
+        return '<{}: {:<48} [{}: {}] -- {}>'.format(type(self).__name__, str(self.path), self.name, self.args, self.skip)
+
+    def display_name(self) -> str:
+        if self.name:
+            return '{}   ({})'.format(self.path.as_posix(), self.name)
+        return self.path.as_posix()
+
+    def __lt__(self, other: T.Any) -> bool:
+        if isinstance(other, TestDef):
+            # None is not sortable, so replace it with an empty string
+            s_id = int(self.path.name.split(' ')[0])
+            o_id = int(other.path.name.split(' ')[0])
+            return (s_id, self.path, self.name or '') < (o_id, other.path, other.name or '')
+        return NotImplemented
+
+class AutoDeletedDir:
+    def __init__(self, d):
+        self.dir = d
+
+    def __enter__(self):
+        os.makedirs(self.dir, exist_ok=True)
+        return self.dir
+
+    def __exit__(self, _type, value, traceback):
+        # We don't use tempfile.TemporaryDirectory, but wrap the
+        # deletion in the AutoDeletedDir class because
+        # it fails on Windows due antivirus programs
+        # holding files open.
+        mesonlib.windows_proof_rmtree(self.dir)
+
+failing_logs = []
+print_debug = 'MESON_PRINT_TEST_OUTPUT' in os.environ
+under_ci = 'CI' in os.environ
+under_xenial_ci = under_ci and ('XENIAL' in os.environ)
+skip_scientific = under_ci and ('SKIP_SCIENTIFIC' in os.environ)
+do_debug = under_ci or print_debug
+no_meson_log_msg = 'No meson-log.txt found.'
+
+system_compiler = None
+compiler_id_map = {}  # type: T.Dict[str, str]
+tool_vers_map = {}    # type: T.Dict[str, str]
+
+class StopException(Exception):
+    def __init__(self):
+        super().__init__('Stopped by user')
+
+stop = False
+def stop_handler(signal, frame):
+    global stop
+    stop = True
+signal.signal(signal.SIGINT, stop_handler)
+signal.signal(signal.SIGTERM, stop_handler)
+
+def setup_commands(optbackend):
+    global do_debug, backend, backend_flags
+    global compile_commands, clean_commands, test_commands, install_commands, uninstall_commands
+    backend, backend_flags = guess_backend(optbackend, shutil.which('msbuild'))
+    compile_commands, clean_commands, test_commands, install_commands, \
+        uninstall_commands = get_backend_commands(backend, do_debug)
+
+# TODO try to eliminate or at least reduce this function
+def platform_fix_name(fname: str, canonical_compiler: str, env: environment.Environment) -> str:
+    if '?lib' in fname:
+        if env.machines.host.is_windows() and canonical_compiler == 'msvc':
+            fname = re.sub(r'lib/\?lib(.*)\.', r'bin/\1.', fname)
+            fname = re.sub(r'/\?lib/', r'/bin/', fname)
+        elif env.machines.host.is_windows():
+            fname = re.sub(r'lib/\?lib(.*)\.', r'bin/lib\1.', fname)
+            fname = re.sub(r'\?lib(.*)\.dll$', r'lib\1.dll', fname)
+            fname = re.sub(r'/\?lib/', r'/bin/', fname)
+        elif env.machines.host.is_cygwin():
+            fname = re.sub(r'lib/\?lib(.*)\.so$', r'bin/cyg\1.dll', fname)
+            fname = re.sub(r'lib/\?lib(.*)\.', r'bin/cyg\1.', fname)
+            fname = re.sub(r'\?lib(.*)\.dll$', r'cyg\1.dll', fname)
+            fname = re.sub(r'/\?lib/', r'/bin/', fname)
+        else:
+            fname = re.sub(r'\?lib', 'lib', fname)
+
+    if fname.endswith('?so'):
+        if env.machines.host.is_windows() and canonical_compiler == 'msvc':
+            fname = re.sub(r'lib/([^/]*)\?so$', r'bin/\1.dll', fname)
+            fname = re.sub(r'/(?:lib|)([^/]*?)\?so$', r'/\1.dll', fname)
+            return fname
+        elif env.machines.host.is_windows():
+            fname = re.sub(r'lib/([^/]*)\?so$', r'bin/\1.dll', fname)
+            fname = re.sub(r'/([^/]*?)\?so$', r'/\1.dll', fname)
+            return fname
+        elif env.machines.host.is_cygwin():
+            fname = re.sub(r'lib/([^/]*)\?so$', r'bin/\1.dll', fname)
+            fname = re.sub(r'/lib([^/]*?)\?so$', r'/cyg\1.dll', fname)
+            fname = re.sub(r'/([^/]*?)\?so$', r'/\1.dll', fname)
+            return fname
+        elif env.machines.host.is_darwin():
+            return fname[:-3] + '.dylib'
+        else:
+            return fname[:-3] + '.so'
+
+    return fname
+
+def validate_install(test: TestDef, installdir: Path, compiler: str, env: environment.Environment) -> str:
+    ret_msg = ''
+    expected_raw = []  # type: T.List[Path]
+    for i in test.installed_files:
+        try:
+            expected_raw += i.get_paths(compiler, env, installdir)
+        except RuntimeError as err:
+            ret_msg += 'Expected path error: {}\n'.format(err)
+    expected = {x: False for x in expected_raw}
+    found = [x.relative_to(installdir) for x in installdir.rglob('*') if x.is_file() or x.is_symlink()]
+    # Mark all found files as found and detect unexpected files
+    for fname in found:
+        if fname not in expected:
+            ret_msg += 'Extra file {} found.\n'.format(fname)
+            continue
+        expected[fname] = True
+    # Check if expected files were found
+    for p, f in expected.items():
+        if not f:
+            ret_msg += 'Expected file {} missing.\n'.format(p)
+    # List dir content on error
+    if ret_msg != '':
+        ret_msg += '\nInstall dir contents:\n'
+        for i in found:
+            ret_msg += '  - {}\n'.format(i)
+    return ret_msg
+
+def log_text_file(logfile, testdir, stdo, stde):
+    global stop, executor, futures
+    logfile.write('%s\nstdout\n\n---\n' % testdir.as_posix())
+    logfile.write(stdo)
+    logfile.write('\n\n---\n\nstderr\n\n---\n')
+    logfile.write(stde)
+    logfile.write('\n\n---\n\n')
+    if print_debug:
+        try:
+            print(stdo)
+        except UnicodeError:
+            sanitized_out = stdo.encode('ascii', errors='replace').decode()
+            print(sanitized_out)
+        try:
+            print(stde, file=sys.stderr)
+        except UnicodeError:
+            sanitized_err = stde.encode('ascii', errors='replace').decode()
+            print(sanitized_err, file=sys.stderr)
+    if stop:
+        print("Aborting..")
+        for f in futures:
+            f[2].cancel()
+        executor.shutdown()
+        raise StopException()
+
+
+def bold(text):
+    return mlog.bold(text).get_text(mlog.colorize_console())
+
+
+def green(text):
+    return mlog.green(text).get_text(mlog.colorize_console())
+
+
+def red(text):
+    return mlog.red(text).get_text(mlog.colorize_console())
+
+
+def yellow(text):
+    return mlog.yellow(text).get_text(mlog.colorize_console())
+
+
+def _run_ci_include(args: T.List[str]) -> str:
+    if not args:
+        return 'At least one parameter required'
+    try:
+        file_path = Path(args[0])
+        data = file_path.open(errors='ignore', encoding='utf-8').read()
+        return 'Included file {}:\n{}\n'.format(args[0], data)
+    except Exception:
+        return 'Failed to open {} ({})'.format(args[0])
+
+ci_commands = {
+    'ci_include': _run_ci_include
+}
+
+def run_ci_commands(raw_log: str) -> T.List[str]:
+    res = []
+    for l in raw_log.splitlines():
+        if not l.startswith('!meson_ci!/'):
+            continue
+        cmd = shlex.split(l[11:])
+        if not cmd or cmd[0] not in ci_commands:
+            continue
+        res += ['CI COMMAND {}:\n{}\n'.format(cmd[0], ci_commands[cmd[0]](cmd[1:]))]
+    return res
+
+def _compare_output(expected: T.List[T.Dict[str, str]], output: str, desc: str) -> str:
+    if expected:
+        i = iter(expected)
+
+        def next_expected(i):
+            # Get the next expected line
+            item = next(i)
+            how = item.get('match', 'literal')
+            expected = item.get('line')
+
+            # Simple heuristic to automatically convert path separators for
+            # Windows:
+            #
+            # Any '/' appearing before 'WARNING' or 'ERROR' (i.e. a path in a
+            # filename part of a location) is replaced with '\' (in a re: '\\'
+            # which matches a literal '\')
+            #
+            # (There should probably be a way to turn this off for more complex
+            # cases which don't fit this)
+            if mesonlib.is_windows():
+                if how != "re":
+                    sub = r'\\'
+                else:
+                    sub = r'\\\\'
+                expected = re.sub(r'/(?=.*(WARNING|ERROR))', sub, expected)
+
+            return how, expected
+
+        try:
+            how, expected = next_expected(i)
+            for actual in output.splitlines():
+                if how == "re":
+                    match = bool(re.match(expected, actual))
+                else:
+                    match = (expected == actual)
+                if match:
+                    how, expected = next_expected(i)
+
+            # reached the end of output without finding expected
+            return 'expected "{}" not found in {}'.format(expected, desc)
+        except StopIteration:
+            # matched all expected lines
+            pass
+
+    return ''
+
+def validate_output(test: TestDef, stdo: str, stde: str) -> str:
+    return _compare_output(test.stdout, stdo, 'stdout')
+
+# There are some class variables and such that cahce
+# information. Clear all of these. The better solution
+# would be to change the code so that no state is persisted
+# but that would be a lot of work given that Meson was originally
+# coded to run as a batch process.
+def clear_internal_caches():
+    import mesonbuild.interpreterbase
+    from mesonbuild.dependencies import CMakeDependency
+    from mesonbuild.mesonlib import PerMachine
+    mesonbuild.interpreterbase.FeatureNew.feature_registry = {}
+    CMakeDependency.class_cmakeinfo = PerMachine(None, None)
+
+def run_test_inprocess(testdir):
+    old_stdout = sys.stdout
+    sys.stdout = mystdout = StringIO()
+    old_stderr = sys.stderr
+    sys.stderr = mystderr = StringIO()
+    old_cwd = os.getcwd()
+    os.chdir(testdir)
+    test_log_fname = Path('meson-logs', 'testlog.txt')
+    try:
+        returncode_test = mtest.run_with_args(['--no-rebuild'])
+        if test_log_fname.exists():
+            test_log = test_log_fname.open(errors='ignore').read()
+        else:
+            test_log = ''
+        returncode_benchmark = mtest.run_with_args(['--no-rebuild', '--benchmark', '--logbase', 'benchmarklog'])
+    finally:
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        os.chdir(old_cwd)
+    return max(returncode_test, returncode_benchmark), mystdout.getvalue(), mystderr.getvalue(), test_log
+
+# Build directory name must be the same so Ccache works over
+# consecutive invocations.
+def create_deterministic_builddir(test: TestDef, use_tmpdir: bool) -> str:
+    import hashlib
+    src_dir = test.path.as_posix()
+    if test.name:
+        src_dir += test.name
+    rel_dirname = 'b ' + hashlib.sha256(src_dir.encode(errors='ignore')).hexdigest()[0:10]
+    abs_pathname = os.path.join(tempfile.gettempdir() if use_tmpdir else os.getcwd(), rel_dirname)
+    os.mkdir(abs_pathname)
+    return abs_pathname
+
+def run_test(test: TestDef, extra_args, compiler, backend, flags, commands, should_fail, use_tmp: bool):
+    if test.skip:
+        return None
+    with AutoDeletedDir(create_deterministic_builddir(test, use_tmp)) as build_dir:
+        with AutoDeletedDir(tempfile.mkdtemp(prefix='i ', dir=None if use_tmp else os.getcwd())) as install_dir:
+            try:
+                return _run_test(test, build_dir, install_dir, extra_args, compiler, backend, flags, commands, should_fail)
+            except TestResult as r:
+                return r
+            finally:
+                mlog.shutdown() # Close the log file because otherwise Windows wets itself.
+
+def _run_test(test: TestDef, test_build_dir: str, install_dir: str, extra_args, compiler, backend, flags, commands, should_fail):
+    compile_commands, clean_commands, install_commands, uninstall_commands = commands
+    gen_start = time.time()
+    # Configure in-process
+    gen_args = []  # type: T.List[str]
+    if 'prefix' not in test.do_not_set_opts:
+        gen_args += ['--prefix', 'x:/usr'] if mesonlib.is_windows() else ['--prefix', '/usr']
+    if 'libdir' not in test.do_not_set_opts:
+        gen_args += ['--libdir', 'lib']
+    gen_args += [test.path.as_posix(), test_build_dir] + flags + extra_args
+    nativefile = test.path / 'nativefile.ini'
+    crossfile = test.path / 'crossfile.ini'
+    if nativefile.exists():
+        gen_args.extend(['--native-file', nativefile.as_posix()])
+    if crossfile.exists():
+        gen_args.extend(['--cross-file', crossfile.as_posix()])
+    (returncode, stdo, stde) = run_configure(gen_args, env=test.env)
+    try:
+        logfile = Path(test_build_dir, 'meson-logs', 'meson-log.txt')
+        mesonlog = logfile.open(errors='ignore', encoding='utf-8').read()
+    except Exception:
+        mesonlog = no_meson_log_msg
+    cicmds = run_ci_commands(mesonlog)
+    testresult = TestResult(cicmds)
+    testresult.add_step(BuildStep.configure, stdo, stde, mesonlog, time.time() - gen_start)
+    output_msg = validate_output(test, stdo, stde)
+    testresult.mlog += output_msg
+    if output_msg:
+        testresult.fail('Unexpected output while configuring.')
+        return testresult
+    if should_fail == 'meson':
+        if returncode == 1:
+            return testresult
+        elif returncode != 0:
+            testresult.fail('Test exited with unexpected status {}.'.format(returncode))
+            return testresult
+        else:
+            testresult.fail('Test that should have failed succeeded.')
+            return testresult
+    if returncode != 0:
+        testresult.fail('Generating the build system failed.')
+        return testresult
+    builddata = build.load(test_build_dir)
+    dir_args = get_backend_args_for_dir(backend, test_build_dir)
+
+    # Build with subprocess
+    def build_step():
+        build_start = time.time()
+        pc, o, e = Popen_safe(compile_commands + dir_args, cwd=test_build_dir)
+        testresult.add_step(BuildStep.build, o, e, '', time.time() - build_start)
+        if should_fail == 'build':
+            if pc.returncode != 0:
+                raise testresult
+            testresult.fail('Test that should have failed to build succeeded.')
+            raise testresult
+        if pc.returncode != 0:
+            testresult.fail('Compiling source code failed.')
+            raise testresult
+
+    # Touch the meson.build file to force a regenerate
+    def force_regenerate():
+        ensure_backend_detects_changes(backend)
+        os.utime(str(test.path / 'meson.build'))
+
+    # just test building
+    build_step()
+
+    # test that regeneration works for build step
+    force_regenerate()
+    build_step()  # TBD: assert nothing gets built after the regenerate?
+
+    # test that regeneration works for test step
+    force_regenerate()
+
+    # Test in-process
+    clear_internal_caches()
+    test_start = time.time()
+    (returncode, tstdo, tstde, test_log) = run_test_inprocess(test_build_dir)
+    testresult.add_step(BuildStep.test, tstdo, tstde, test_log, time.time() - test_start)
+    if should_fail == 'test':
+        if returncode != 0:
+            return testresult
+        testresult.fail('Test that should have failed to run unit tests succeeded.')
+        return testresult
+    if returncode != 0:
+        testresult.fail('Running unit tests failed.')
+        return testresult
+
+    # Do installation, if the backend supports it
+    if install_commands:
+        env = os.environ.copy()
+        env['DESTDIR'] = install_dir
+        # Install with subprocess
+        pi, o, e = Popen_safe(install_commands, cwd=test_build_dir, env=env)
+        testresult.add_step(BuildStep.install, o, e)
+        if pi.returncode != 0:
+            testresult.fail('Running install failed.')
+            return testresult
+
+    # Clean with subprocess
+    env = os.environ.copy()
+    pi, o, e = Popen_safe(clean_commands + dir_args, cwd=test_build_dir, env=env)
+    testresult.add_step(BuildStep.clean, o, e)
+    if pi.returncode != 0:
+        testresult.fail('Running clean failed.')
+        return testresult
+
+    # Validate installed files
+    testresult.add_step(BuildStep.install, '', '')
+    if not install_commands:
+        return testresult
+    install_msg = validate_install(test, Path(install_dir), compiler, builddata.environment)
+    if install_msg:
+        testresult.fail('\n' + install_msg)
+        return testresult
+
+    return testresult
+
+def gather_tests(testdir: Path, stdout_mandatory: bool) -> T.List[TestDef]:
+    tests = [t.name for t in testdir.iterdir() if t.is_dir()]
+    tests = [t for t in tests if not t.startswith('.')]  # Filter non-tests files (dot files, etc)
+    test_defs = [TestDef(testdir / t, None, []) for t in tests]
+    all_tests = []  # type: T.List[TestDef]
+    for t in test_defs:
+        test_def = {}
+        test_def_file = t.path / 'test.json'
+        if test_def_file.is_file():
+            test_def = json.loads(test_def_file.read_text())
+
+        # Handle additional environment variables
+        env = {}  # type: T.Dict[str, str]
+        if 'env' in test_def:
+            assert isinstance(test_def['env'], dict)
+            env = test_def['env']
+            for key, val in env.items():
+                val = val.replace('@ROOT@', t.path.resolve().as_posix())
+                env[key] = val
+
+        # Handle installed files
+        installed = []  # type: T.List[InstalledFile]
+        if 'installed' in test_def:
+            installed = [InstalledFile(x) for x in test_def['installed']]
+
+        # Handle expected output
+        stdout = test_def.get('stdout', [])
+        if stdout_mandatory and not stdout:
+            raise RuntimeError("{} must contain a non-empty stdout key".format(test_def_file))
+
+        # Handle the do_not_set_opts list
+        do_not_set_opts = test_def.get('do_not_set_opts', [])  # type: T.List[str]
+
+        # Skip tests if the tool requirements are not met
+        if 'tools' in test_def:
+            assert isinstance(test_def['tools'], dict)
+            for tool, vers_req in test_def['tools'].items():
+                if tool not in tool_vers_map:
+                    t.skip = True
+                elif not mesonlib.version_compare(tool_vers_map[tool], vers_req):
+                    t.skip = True
+
+        # Skip the matrix code and just update the existing test
+        if 'matrix' not in test_def:
+            t.env.update(env)
+            t.installed_files = installed
+            t.do_not_set_opts = do_not_set_opts
+            t.stdout = stdout
+            all_tests += [t]
+            continue
+
+        # 'matrix; entry is present, so build multiple tests from matrix definition
+        opt_list = []  # type: T.List[T.List[T.Tuple[str, bool]]]
+        matrix = test_def['matrix']
+        assert "options" in matrix
+        for key, val in matrix["options"].items():
+            assert isinstance(val, list)
+            tmp_opts = []  # type: T.List[T.Tuple[str, bool]]
+            for i in val:
+                assert isinstance(i, dict)
+                assert "val" in i
+                skip = False
+
+                # Skip the matrix entry if environment variable is present
+                if 'skip_on_env' in i:
+                    for skip_env_var in i['skip_on_env']:
+                        if skip_env_var in os.environ:
+                            skip = True
+
+                # Only run the test if all compiler ID's match
+                if 'compilers' in i:
+                    for lang, id_list in i['compilers'].items():
+                        if lang not in compiler_id_map or compiler_id_map[lang] not in id_list:
+                            skip = True
+                            break
+
+                # Add an empty matrix entry
+                if i['val'] is None:
+                    tmp_opts += [(None, skip)]
+                    continue
+
+                tmp_opts += [('{}={}'.format(key, i['val']), skip)]
+
+            if opt_list:
+                new_opt_list = []  # type: T.List[T.List[T.Tuple[str, bool]]]
+                for i in opt_list:
+                    for j in tmp_opts:
+                        new_opt_list += [[*i, j]]
+                opt_list = new_opt_list
+            else:
+                opt_list = [[x] for x in tmp_opts]
+
+        # Exclude specific configurations
+        if 'exclude' in matrix:
+            assert isinstance(matrix['exclude'], list)
+            new_opt_list = []  # type: T.List[T.List[T.Tuple[str, bool]]]
+            for i in opt_list:
+                exclude = False
+                opt_names = [x[0] for x in i]
+                for j in matrix['exclude']:
+                    ex_list = ['{}={}'.format(k, v) for k, v in j.items()]
+                    if all([x in opt_names for x in ex_list]):
+                        exclude = True
+                        break
+
+                if not exclude:
+                    new_opt_list += [i]
+
+            opt_list = new_opt_list
+
+        for i in opt_list:
+            name = ' '.join([x[0] for x in i if x[0] is not None])
+            opts = ['-D' + x[0] for x in i if x[0] is not None]
+            skip = any([x[1] for x in i])
+            test = TestDef(t.path, name, opts, skip or t.skip)
+            test.env.update(env)
+            test.installed_files = installed
+            test.do_not_set_opts = do_not_set_opts
+            test.stdout = stdout
+            all_tests += [test]
+
+    return sorted(all_tests)
+
+def have_d_compiler():
+    if shutil.which("ldc2"):
+        return True
+    elif shutil.which("ldc"):
+        return True
+    elif shutil.which("gdc"):
+        return True
+    elif shutil.which("dmd"):
+        # The Windows installer sometimes produces a DMD install
+        # that exists but segfaults every time the compiler is run.
+        # Don't know why. Don't know how to fix. Skip in this case.
+        cp = subprocess.run(['dmd', '--version'],
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+        if cp.stdout == b'':
+            return False
+        return True
+    return False
+
+def have_objc_compiler(use_tmp: bool) -> bool:
+    with AutoDeletedDir(tempfile.mkdtemp(prefix='b ', dir=None if use_tmp else '.')) as build_dir:
+        env = environment.Environment(None, build_dir, get_fake_options('/'))
+        try:
+            objc_comp = env.detect_objc_compiler(MachineChoice.HOST)
+        except mesonlib.MesonException:
+            return False
+        if not objc_comp:
+            return False
+        env.coredata.process_new_compiler('objc', objc_comp, env)
+        try:
+            objc_comp.sanity_check(env.get_scratch_dir(), env)
+        except mesonlib.MesonException:
+            return False
+    return True
+
+def have_objcpp_compiler(use_tmp: bool) -> bool:
+    with AutoDeletedDir(tempfile.mkdtemp(prefix='b ', dir=None if use_tmp else '.')) as build_dir:
+        env = environment.Environment(None, build_dir, get_fake_options('/'))
+        try:
+            objcpp_comp = env.detect_objcpp_compiler(MachineChoice.HOST)
+        except mesonlib.MesonException:
+            return False
+        if not objcpp_comp:
+            return False
+        env.coredata.process_new_compiler('objcpp', objcpp_comp, env)
+        try:
+            objcpp_comp.sanity_check(env.get_scratch_dir(), env)
+        except mesonlib.MesonException:
+            return False
+    return True
+
+def have_java():
+    if shutil.which('javac') and shutil.which('java'):
+        return True
+    return False
+
+def skippable(suite, test):
+    # Everything is optional when not running on CI, or on Ubuntu 16.04 CI
+    if not under_ci or under_xenial_ci:
+        return True
+
+    if not suite.endswith('frameworks'):
+        return True
+
+    # this test assumptions aren't valid for Windows paths
+    if test.endswith('38 libdir must be inside prefix'):
+        return True
+
+    # gtk-doc test may be skipped, pending upstream fixes for spaces in
+    # filenames landing in the distro used for CI
+    if test.endswith('10 gtk-doc'):
+        return True
+
+    # NetCDF is not in the CI Docker image
+    if test.endswith('netcdf'):
+        return True
+
+    # MSVC doesn't link with GFortran
+    if test.endswith('14 fortran links c'):
+        return True
+
+    # Blocks are not supported on all compilers
+    if test.endswith('29 blocks'):
+        return True
+
+    # Scientific libraries are skippable on certain systems
+    # See the discussion here: https://github.com/mesonbuild/meson/pull/6562
+    if any([x in test for x in ['17 mpi', '25 hdf5', '30 scalapack']]) and skip_scientific:
+        return True
+
+    # These create OS specific tests, and need to be skippable
+    if any([x in test for x in ['16 sdl', '17 mpi']]):
+        return True
+
+    # No frameworks test should be skipped on linux CI, as we expect all
+    # prerequisites to be installed
+    if mesonlib.is_linux():
+        return False
+
+    # Boost test should only be skipped for windows CI build matrix entries
+    # which don't define BOOST_ROOT
+    if test.endswith('1 boost'):
+        if mesonlib.is_windows():
+            return 'BOOST_ROOT' not in os.environ
+        return False
+
+    # Qt is provided on macOS by Homebrew
+    if test.endswith('4 qt') and mesonlib.is_osx():
+        return False
+
+    # Other framework tests are allowed to be skipped on other platforms
+    return True
+
+def skip_csharp(backend) -> bool:
+    if backend is not Backend.ninja:
+        return True
+    if not shutil.which('resgen'):
+        return True
+    if shutil.which('mcs'):
+        return False
+    if shutil.which('csc'):
+        # Only support VS2017 for now. Earlier versions fail
+        # under CI in mysterious ways.
+        try:
+            stdo = subprocess.check_output(['csc', '/version'])
+        except subprocess.CalledProcessError:
+            return True
+        # Having incrementing version numbers would be too easy.
+        # Microsoft reset the versioning back to 1.0 (from 4.x)
+        # when they got the Roslyn based compiler. Thus there
+        # is NO WAY to reliably do version number comparisons.
+        # Only support the version that ships with VS2017.
+        return not stdo.startswith(b'2.')
+    return True
+
+# In Azure some setups have a broken rustc that will error out
+# on all compilation attempts.
+
+def has_broken_rustc() -> bool:
+    dirname = 'brokenrusttest'
+    if os.path.exists(dirname):
+        mesonlib.windows_proof_rmtree(dirname)
+    os.mkdir(dirname)
+    open(dirname + '/sanity.rs', 'w').write('''fn main() {
+}
+''')
+    pc = subprocess.run(['rustc', '-o', 'sanity.exe', 'sanity.rs'],
+                        cwd=dirname,
+                        stdout = subprocess.DEVNULL,
+                        stderr = subprocess.DEVNULL)
+    mesonlib.windows_proof_rmtree(dirname)
+    return pc.returncode != 0
+
+def should_skip_rust(backend: Backend) -> bool:
+    if not shutil.which('rustc'):
+        return True
+    if backend is not Backend.ninja:
+        return True
+    if mesonlib.is_windows() and has_broken_rustc():
+        return True
+    return False
+
+def detect_tests_to_run(only: T.List[str], use_tmp: bool) -> T.List[T.Tuple[str, T.List[TestDef], bool]]:
+    """
+    Parameters
+    ----------
+    only: list of str, optional
+        specify names of tests to run
+
+    Returns
+    -------
+    gathered_tests: list of tuple of str, list of TestDef, bool
+        tests to run
+    """
+
+    skip_fortran = not(shutil.which('gfortran') or
+                       shutil.which('flang') or
+                       shutil.which('pgfortran') or
+                       shutil.which('ifort'))
+
+    class TestCategory:
+        def __init__(self, category: str, subdir: str, skip: bool = False, stdout_mandatory: bool = False):
+            self.category = category                  # category name
+            self.subdir = subdir                      # subdirectory
+            self.skip = skip                          # skip condition
+            self.stdout_mandatory = stdout_mandatory  # expected stdout is mandatory for tests in this categroy
+
+    all_tests = [
+        TestCategory('cmake', 'cmake', not shutil.which('cmake') or (os.environ.get('compiler') == 'msvc2015' and under_ci)),
+        TestCategory('common', 'common'),
+        TestCategory('warning-meson', 'warning', stdout_mandatory=True),
+        TestCategory('failing-meson', 'failing', stdout_mandatory=True),
+        TestCategory('failing-build', 'failing build'),
+        TestCategory('failing-test',  'failing test'),
+        TestCategory('keyval', 'keyval'),
+        TestCategory('platform-osx', 'osx', not mesonlib.is_osx()),
+        TestCategory('platform-windows', 'windows', not mesonlib.is_windows() and not mesonlib.is_cygwin()),
+        TestCategory('platform-linux', 'linuxlike', mesonlib.is_osx() or mesonlib.is_windows()),
+        TestCategory('java', 'java', backend is not Backend.ninja or mesonlib.is_osx() or not have_java()),
+        TestCategory('C#', 'csharp', skip_csharp(backend)),
+        TestCategory('vala', 'vala', backend is not Backend.ninja or not shutil.which(os.environ.get('VALAC', 'valac'))),
+        TestCategory('rust', 'rust', should_skip_rust(backend)),
+        TestCategory('d', 'd', backend is not Backend.ninja or not have_d_compiler()),
+        TestCategory('objective c', 'objc', backend not in (Backend.ninja, Backend.xcode) or not have_objc_compiler(options.use_tmpdir)),
+        TestCategory('objective c++', 'objcpp', backend not in (Backend.ninja, Backend.xcode) or not have_objcpp_compiler(options.use_tmpdir)),
+        TestCategory('fortran', 'fortran', skip_fortran or backend != Backend.ninja),
+        TestCategory('swift', 'swift', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('swiftc')),
+        # CUDA tests on Windows: use Ninja backend:  python run_project_tests.py --only cuda --backend ninja
+        TestCategory('cuda', 'cuda', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('nvcc')),
+        TestCategory('python3', 'python3', backend is not Backend.ninja),
+        TestCategory('python', 'python', backend is not Backend.ninja),
+        TestCategory('fpga', 'fpga', shutil.which('yosys') is None),
+        TestCategory('frameworks', 'frameworks'),
+        TestCategory('nasm', 'nasm'),
+        TestCategory('wasm', 'wasm', shutil.which('emcc') is None or backend is not Backend.ninja),
+    ]
+
+    categories = [t.category for t in all_tests]
+    assert categories == ALL_TESTS, 'argparse("--only", choices=ALL_TESTS) need to be updated to match all_tests categories'
+
+    if only:
+        all_tests = [t for t in all_tests if t.category in only]
+
+    gathered_tests = [(t.category, gather_tests(Path('test cases', t.subdir), t.stdout_mandatory), t.skip) for t in all_tests]
+    return gathered_tests
+
+def run_tests(all_tests: T.List[T.Tuple[str, T.List[TestDef], bool]],
+              log_name_base: str, failfast: bool,
+              extra_args: T.List[str], use_tmp: bool) -> T.Tuple[int, int, int]:
+    global logfile
+    txtname = log_name_base + '.txt'
+    with open(txtname, 'w', encoding='utf-8', errors='ignore') as lf:
+        logfile = lf
+        return _run_tests(all_tests, log_name_base, failfast, extra_args, use_tmp)
+
+def _run_tests(all_tests: T.List[T.Tuple[str, T.List[TestDef], bool]],
+               log_name_base: str, failfast: bool,
+               extra_args: T.List[str], use_tmp: bool) -> T.Tuple[int, int, int]:
+    global stop, executor, futures, system_compiler
+    xmlname = log_name_base + '.xml'
+    junit_root = ET.Element('testsuites')
+    conf_time = 0
+    build_time = 0
+    test_time = 0
+    passing_tests = 0
+    failing_tests = 0
+    skipped_tests = 0
+    commands = (compile_commands, clean_commands, install_commands, uninstall_commands)
+
+    try:
+        # This fails in some CI environments for unknown reasons.
+        num_workers = multiprocessing.cpu_count()
+    except Exception as e:
+        print('Could not determine number of CPUs due to the following reason:' + str(e))
+        print('Defaulting to using only one process')
+        num_workers = 1
+    # Due to Ninja deficiency, almost 50% of build time
+    # is spent waiting. Do something useful instead.
+    #
+    # Remove this once the following issue has been resolved:
+    # https://github.com/mesonbuild/meson/pull/2082
+    if not mesonlib.is_windows():  # twice as fast on Windows by *not* multiplying by 2.
+        num_workers *= 2
+    executor = ProcessPoolExecutor(max_workers=num_workers)
+
+    for name, test_cases, skipped in all_tests:
+        current_suite = ET.SubElement(junit_root, 'testsuite', {'name': name, 'tests': str(len(test_cases))})
+        print()
+        if skipped:
+            print(bold('Not running %s tests.' % name))
+        else:
+            print(bold('Running %s tests.' % name))
+        print()
+        futures = []
+        for t in test_cases:
+            # Jenkins screws us over by automatically sorting test cases by name
+            # and getting it wrong by not doing logical number sorting.
+            (testnum, testbase) = t.path.name.split(' ', 1)
+            testname = '%.3d %s' % (int(testnum), testbase)
+            if t.name:
+                testname += ' ({})'.format(t.name)
+            should_fail = False
+            suite_args = []
+            if name.startswith('failing'):
+                should_fail = name.split('failing-')[1]
+            if name.startswith('warning'):
+                suite_args = ['--fatal-meson-warnings']
+                should_fail = name.split('warning-')[1]
+
+            t.skip = skipped or t.skip
+            result = executor.submit(run_test, t, extra_args + suite_args + t.args,
+                                     system_compiler, backend, backend_flags, commands, should_fail, use_tmp)
+            futures.append((testname, t, result))
+        for (testname, t, result) in futures:
+            sys.stdout.flush()
+            try:
+                result = result.result()
+            except CancelledError:
+                continue
+            if (result is None) or (('MESON_SKIP_TEST' in result.stdo) and (skippable(name, t.path.as_posix()))):
+                print(yellow('Skipping:'), t.display_name())
+                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
+                                                                         'classname': name})
+                ET.SubElement(current_test, 'skipped', {})
+                skipped_tests += 1
+            else:
+                without_install = "" if len(install_commands) > 0 else " (without install)"
+                if result.msg != '':
+                    print(red('Failed test{} during {}: {!r}'.format(without_install, result.step.name, t.display_name())))
+                    print('Reason:', result.msg)
+                    failing_tests += 1
+                    if result.step == BuildStep.configure and result.mlog != no_meson_log_msg:
+                        # For configure failures, instead of printing stdout,
+                        # print the meson log if available since it's a superset
+                        # of stdout and often has very useful information.
+                        failing_logs.append(result.mlog)
+                    elif under_ci:
+                        # Always print the complete meson log when running in
+                        # a CI. This helps debugging issues that only occur in
+                        # a hard to reproduce environment
+                        failing_logs.append(result.mlog)
+                        failing_logs.append(result.stdo)
+                    else:
+                        failing_logs.append(result.stdo)
+                    for cmd_res in result.cicmds:
+                        failing_logs.append(cmd_res)
+                    failing_logs.append(result.stde)
+                    if failfast:
+                        print("Cancelling the rest of the tests")
+                        for (_, _, res) in futures:
+                            res.cancel()
+                else:
+                    print('Succeeded test%s: %s' % (without_install, t.display_name()))
+                    passing_tests += 1
+                conf_time += result.conftime
+                build_time += result.buildtime
+                test_time += result.testtime
+                total_time = conf_time + build_time + test_time
+                log_text_file(logfile, t.path, result.stdo, result.stde)
+                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
+                                                                         'classname': name,
+                                                                         'time': '%.3f' % total_time})
+                if result.msg != '':
+                    ET.SubElement(current_test, 'failure', {'message': result.msg})
+                stdoel = ET.SubElement(current_test, 'system-out')
+                stdoel.text = result.stdo
+                stdeel = ET.SubElement(current_test, 'system-err')
+                stdeel.text = result.stde
+
+            if failfast and failing_tests > 0:
+                break
+
+    print("\nTotal configuration time: %.2fs" % conf_time)
+    print("Total build time: %.2fs" % build_time)
+    print("Total test time: %.2fs" % test_time)
+    ET.ElementTree(element=junit_root).write(xmlname, xml_declaration=True, encoding='UTF-8')
+    return passing_tests, failing_tests, skipped_tests
+
+def check_file(file: Path):
+    lines = file.read_bytes().split(b'\n')
+    tabdetector = re.compile(br' *\t')
+    for i, line in enumerate(lines):
+        if re.match(tabdetector, line):
+            raise SystemExit("File {} contains a tab indent on line {:d}. Only spaces are permitted.".format(file, i + 1))
+        if line.endswith(b'\r'):
+            raise SystemExit("File {} contains DOS line ending on line {:d}. Only unix-style line endings are permitted.".format(file, i + 1))
+
+def check_format():
+    check_suffixes = {'.c',
+                      '.cpp',
+                      '.cxx',
+                      '.cc',
+                      '.rs',
+                      '.f90',
+                      '.vala',
+                      '.d',
+                      '.s',
+                      '.m',
+                      '.mm',
+                      '.asm',
+                      '.java',
+                      '.txt',
+                      '.py',
+                      '.swift',
+                      '.build',
+                      '.md',
+                      }
+    skip_dirs = {
+        '.dub',                         # external deps are here
+        '.pytest_cache',
+        'meson-logs', 'meson-private',
+        '.eggs', '_cache',              # e.g. .mypy_cache
+        'venv',                         # virtualenvs have DOS line endings
+    }
+    for (root, _, filenames) in os.walk('.'):
+        if any([x in root for x in skip_dirs]):
+            continue
+        for fname in filenames:
+            file = Path(fname)
+            if file.suffix.lower() in check_suffixes:
+                if file.name in ('sitemap.txt', 'meson-test-run.txt'):
+                    continue
+                check_file(root / file)
+
+def check_meson_commands_work(options):
+    global backend, compile_commands, test_commands, install_commands
+    testdir = PurePath('test cases', 'common', '1 trivial').as_posix()
+    meson_commands = mesonlib.python_command + [get_meson_script()]
+    with AutoDeletedDir(tempfile.mkdtemp(prefix='b ', dir=None if options.use_tmpdir else '.')) as build_dir:
+        print('Checking that configuring works...')
+        gen_cmd = meson_commands + [testdir, build_dir] + backend_flags + options.extra_args
+        pc, o, e = Popen_safe(gen_cmd)
+        if pc.returncode != 0:
+            raise RuntimeError('Failed to configure {!r}:\n{}\n{}'.format(testdir, e, o))
+        print('Checking that building works...')
+        dir_args = get_backend_args_for_dir(backend, build_dir)
+        pc, o, e = Popen_safe(compile_commands + dir_args, cwd=build_dir)
+        if pc.returncode != 0:
+            raise RuntimeError('Failed to build {!r}:\n{}\n{}'.format(testdir, e, o))
+        print('Checking that testing works...')
+        pc, o, e = Popen_safe(test_commands, cwd=build_dir)
+        if pc.returncode != 0:
+            raise RuntimeError('Failed to test {!r}:\n{}\n{}'.format(testdir, e, o))
+        if install_commands:
+            print('Checking that installing works...')
+            pc, o, e = Popen_safe(install_commands, cwd=build_dir)
+            if pc.returncode != 0:
+                raise RuntimeError('Failed to install {!r}:\n{}\n{}'.format(testdir, e, o))
+
+
+def detect_system_compiler(options):
+    global system_compiler, compiler_id_map
+
+    with AutoDeletedDir(tempfile.mkdtemp(prefix='b ', dir=None if options.use_tmpdir else '.')) as build_dir:
+        fake_opts = get_fake_options('/')
+        if options.cross_file:
+            fake_opts.cross_file = [options.cross_file]
+        env = environment.Environment(None, build_dir, fake_opts)
+        print()
+        for lang in sorted(compilers.all_languages):
+            try:
+                comp = env.compiler_from_language(lang, MachineChoice.HOST)
+                details = '{:<10} {} {}'.format('[' + comp.get_id() + ']', ' '.join(comp.get_exelist()), comp.get_version_string())
+                compiler_id_map[lang] = comp.get_id()
+            except mesonlib.MesonException:
+                comp = None
+                details = '[not found]'
+            print('%-7s: %s' % (lang, details))
+
+            # note C compiler for later use by platform_fix_name()
+            if lang == 'c':
+                if comp:
+                    system_compiler = comp.get_id()
+                else:
+                    raise RuntimeError("Could not find C compiler.")
+        print()
+
+def print_tool_versions():
+    tools = [
+        {
+            'tool': 'ninja',
+            'args': ['--version'],
+            'regex': re.compile(r'^([0-9]+(\.[0-9]+)*(-[a-z0-9]+)?)$'),
+            'match_group': 1,
+        },
+        {
+            'tool': 'cmake',
+            'args': ['--version'],
+            'regex': re.compile(r'^cmake version ([0-9]+(\.[0-9]+)*(-[a-z0-9]+)?)$'),
+            'match_group': 1,
+        },
+        {
+            'tool': 'hotdoc',
+            'args': ['--version'],
+            'regex': re.compile(r'^([0-9]+(\.[0-9]+)*(-[a-z0-9]+)?)$'),
+            'match_group': 1,
+        },
+    ]
+
+    def get_version(t: dict) -> str:
+        exe = shutil.which(t['tool'])
+        if not exe:
+            return 'not found'
+
+        args = [t['tool']] + t['args']
+        pc, o, e = Popen_safe(args)
+        if pc.returncode != 0:
+            return '{} (invalid {} executable)'.format(exe, t['tool'])
+        for i in o.split('\n'):
+            i = i.strip('\n\r\t ')
+            m = t['regex'].match(i)
+            if m is not None:
+                tool_vers_map[t['tool']] = m.group(t['match_group'])
+                return '{} ({})'.format(exe, m.group(t['match_group']))
+
+        return '{} (unknown)'.format(exe)
+
+    max_width = max([len(x['tool']) for x in tools] + [7])
+    for tool in tools:
+        print('{0:<{2}}: {1}'.format(tool['tool'], get_version(tool), max_width))
+    print()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Run the test suite of Meson.")
+    parser.add_argument('extra_args', nargs='*',
+                        help='arguments that are passed directly to Meson (remember to have -- before these).')
+    parser.add_argument('--backend', dest='backend', choices=backendlist)
+    parser.add_argument('--failfast', action='store_true',
+                        help='Stop running if test case fails')
+    parser.add_argument('--no-unittests', action='store_true',
+                        help='Not used, only here to simplify run_tests.py')
+    parser.add_argument('--only', help='name of test(s) to run', nargs='+', choices=ALL_TESTS)
+    parser.add_argument('--cross-file', action='store', help='File describing cross compilation environment.')
+    parser.add_argument('--use-tmpdir', action='store_true', help='Use tmp directory for temporary files.')
+    options = parser.parse_args()
+    if options.cross_file:
+        options.extra_args += ['--cross-file', options.cross_file]
+
+    print('Meson build system', meson_version, 'Project Tests')
+    print('Using python', sys.version.split('\n')[0])
+    setup_commands(options.backend)
+    detect_system_compiler(options)
+    print_tool_versions()
+    script_dir = os.path.split(__file__)[0]
+    if script_dir != '':
+        os.chdir(script_dir)
+    check_format()
+    check_meson_commands_work(options)
+    try:
+        all_tests = detect_tests_to_run(options.only, options.use_tmpdir)
+        (passing_tests, failing_tests, skipped_tests) = run_tests(all_tests, 'meson-test-run', options.failfast, options.extra_args, options.use_tmpdir)
+    except StopException:
+        pass
+    print('\nTotal passed tests:', green(str(passing_tests)))
+    print('Total failed tests:', red(str(failing_tests)))
+    print('Total skipped tests:', yellow(str(skipped_tests)))
+    if failing_tests > 0:
+        print('\nMesonlogs of failing tests\n')
+        for l in failing_logs:
+            try:
+                print(l, '\n')
+            except UnicodeError:
+                print(l.encode('ascii', errors='replace').decode(), '\n')
+    for name, dirs, _ in all_tests:
+        dir_names = list(set(x.path.name for x in dirs))
+        for k, g in itertools.groupby(dir_names, key=lambda x: x.split()[0]):
+            tests = list(g)
+            if len(tests) != 1:
+                print('WARNING: The %s suite contains duplicate "%s" tests: "%s"' % (name, k, '", "'.join(tests)))
+    raise SystemExit(failing_tests)
diff --git a/meson/run_tests.py b/meson/run_tests.py
new file mode 100755
index 000000000..08ad78a61
--- /dev/null
+++ b/meson/run_tests.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python3
+
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import time
+import shutil
+import subprocess
+import tempfile
+import platform
+import argparse
+from io import StringIO
+from enum import Enum
+from glob import glob
+from pathlib import Path
+from mesonbuild import compilers
+from mesonbuild import dependencies
+from mesonbuild import mesonlib
+from mesonbuild import mesonmain
+from mesonbuild import mtest
+from mesonbuild import mlog
+from mesonbuild.environment import Environment, detect_ninja
+from mesonbuild.coredata import backendlist, version as meson_version
+
+NINJA_1_9_OR_NEWER = False
+NINJA_CMD = None
+# If we're on CI, just assume we have ninja in PATH and it's new enough because
+# we provide that. This avoids having to detect ninja for every subprocess unit
+# test that we run.
+if 'CI' in os.environ:
+    NINJA_1_9_OR_NEWER = True
+    NINJA_CMD = ['ninja']
+else:
+    # Look for 1.9 to see if https://github.com/ninja-build/ninja/issues/1219
+    # is fixed
+    NINJA_CMD = detect_ninja('1.9')
+    if NINJA_CMD is not None:
+        NINJA_1_9_OR_NEWER = True
+    else:
+        mlog.warning('Found ninja <1.9, tests will run slower', once=True)
+        NINJA_CMD = detect_ninja()
+if NINJA_CMD is None:
+    raise RuntimeError('Could not find Ninja v1.7 or newer')
+
+def guess_backend(backend, msbuild_exe: str):
+    # Auto-detect backend if unspecified
+    backend_flags = []
+    if backend is None:
+        if msbuild_exe is not None and (mesonlib.is_windows() and not _using_intelcl()):
+            backend = 'vs' # Meson will auto-detect VS version to use
+        else:
+            backend = 'ninja'
+    # Set backend arguments for Meson
+    if backend.startswith('vs'):
+        backend_flags = ['--backend=' + backend]
+        backend = Backend.vs
+    elif backend == 'xcode':
+        backend_flags = ['--backend=xcode']
+        backend = Backend.xcode
+    elif backend == 'ninja':
+        backend_flags = ['--backend=ninja']
+        backend = Backend.ninja
+    else:
+        raise RuntimeError('Unknown backend: {!r}'.format(backend))
+    return (backend, backend_flags)
+
+
+def _using_intelcl() -> bool:
+    """
+    detect if intending to using Intel-Cl compilers (Intel compilers on Windows)
+    Sufficient evidence of intent is that user is working in the Intel compiler
+    shell environment, otherwise this function returns False
+    """
+    if not mesonlib.is_windows():
+        return False
+    # handle where user tried to "blank" MKLROOT and left space(s)
+    if not os.environ.get('MKLROOT', '').strip():
+        return False
+    if (os.environ.get('CC') == 'icl' or
+            os.environ.get('CXX') == 'icl' or
+            os.environ.get('FC') == 'ifort'):
+        return True
+    # Intel-Cl users might not have the CC,CXX,FC envvars set,
+    # but because they're in Intel shell, the exe's below are on PATH
+    if shutil.which('icl') or shutil.which('ifort'):
+        return True
+    mlog.warning('It appears you might be intending to use Intel compiler on Windows '
+                 'since non-empty environment variable MKLROOT is set to {} '
+                 'However, Meson cannot find the Intel WIndows compiler executables (icl,ifort).'
+                 'Please try using the Intel shell.'.format(os.environ.get('MKLROOT')))
+    return False
+
+
+# Fake classes and objects for mocking
+class FakeBuild:
+    def __init__(self, env):
+        self.environment = env
+
+class FakeCompilerOptions:
+    def __init__(self):
+        self.value = []
+
+def get_fake_options(prefix=''):
+    opts = argparse.Namespace()
+    opts.native_file = []
+    opts.cross_file = None
+    opts.wrap_mode = None
+    opts.prefix = prefix
+    opts.cmd_line_options = {}
+    return opts
+
+def get_fake_env(sdir='', bdir=None, prefix='', opts=None):
+    if opts is None:
+        opts = get_fake_options(prefix)
+    env = Environment(sdir, bdir, opts)
+    env.coredata.compiler_options.host['c']['args'] = FakeCompilerOptions()
+    env.machines.host.cpu_family = 'x86_64' # Used on macOS inside find_library
+    return env
+
+
+Backend = Enum('Backend', 'ninja vs xcode')
+
+if 'MESON_EXE' in os.environ:
+    meson_exe = mesonlib.split_args(os.environ['MESON_EXE'])
+else:
+    meson_exe = None
+
+if mesonlib.is_windows() or mesonlib.is_cygwin():
+    exe_suffix = '.exe'
+else:
+    exe_suffix = ''
+
+def get_meson_script():
+    '''
+    Guess the meson that corresponds to the `mesonbuild` that has been imported
+    so we can run configure and other commands in-process, since mesonmain.run
+    needs to know the meson_command to use.
+
+    Also used by run_unittests.py to determine what meson to run when not
+    running in-process (which is the default).
+    '''
+    # Is there a meson.py next to the mesonbuild currently in use?
+    mesonbuild_dir = Path(mesonmain.__file__).resolve().parent.parent
+    meson_script = mesonbuild_dir / 'meson.py'
+    if meson_script.is_file():
+        return str(meson_script)
+    # Then if mesonbuild is in PYTHONPATH, meson must be in PATH
+    mlog.warning('Could not find meson.py next to the mesonbuild module. '
+                 'Trying system meson...')
+    meson_cmd = shutil.which('meson')
+    if meson_cmd:
+        return meson_cmd
+    raise RuntimeError('Could not find {!r} or a meson in PATH'.format(meson_script))
+
+def get_backend_args_for_dir(backend, builddir):
+    '''
+    Visual Studio backend needs to be given the solution to build
+    '''
+    if backend is Backend.vs:
+        sln_name = glob(os.path.join(builddir, '*.sln'))[0]
+        return [os.path.split(sln_name)[-1]]
+    return []
+
+def find_vcxproj_with_target(builddir, target):
+    import re, fnmatch
+    t, ext = os.path.splitext(target)
+    if ext:
+        p = r'{}\s*\{}'.format(t, ext)
+    else:
+        p = r'{}'.format(t)
+    for _, _, files in os.walk(builddir):
+        for f in fnmatch.filter(files, '*.vcxproj'):
+            f = os.path.join(builddir, f)
+            with open(f, 'r', encoding='utf-8') as o:
+                if re.search(p, o.read(), flags=re.MULTILINE):
+                    return f
+    raise RuntimeError('No vcxproj matching {!r} in {!r}'.format(p, builddir))
+
+def get_builddir_target_args(backend, builddir, target):
+    dir_args = []
+    if not target:
+        dir_args = get_backend_args_for_dir(backend, builddir)
+    if target is None:
+        return dir_args
+    if backend is Backend.vs:
+        vcxproj = find_vcxproj_with_target(builddir, target)
+        target_args = [vcxproj]
+    elif backend is Backend.xcode:
+        target_args = ['-target', target]
+    elif backend is Backend.ninja:
+        target_args = [target]
+    else:
+        raise AssertionError('Unknown backend: {!r}'.format(backend))
+    return target_args + dir_args
+
+def get_backend_commands(backend, debug=False):
+    install_cmd = []
+    uninstall_cmd = []
+    if backend is Backend.vs:
+        cmd = ['msbuild']
+        clean_cmd = cmd + ['/target:Clean']
+        test_cmd = cmd + ['RUN_TESTS.vcxproj']
+    elif backend is Backend.xcode:
+        cmd = ['xcodebuild']
+        # In Xcode9 new build system's clean command fails when using a custom build directory.
+        # Maybe use it when CI uses Xcode10 we can remove '-UseNewBuildSystem=FALSE'
+        clean_cmd = cmd + ['-alltargets', 'clean', '-UseNewBuildSystem=FALSE']
+        test_cmd = cmd + ['-target', 'RUN_TESTS']
+    elif backend is Backend.ninja:
+        global NINJA_CMD
+        cmd = NINJA_CMD + ['-w', 'dupbuild=err', '-d', 'explain']
+        if debug:
+            cmd += ['-v']
+        clean_cmd = cmd + ['clean']
+        test_cmd = cmd + ['test', 'benchmark']
+        install_cmd = cmd + ['install']
+        uninstall_cmd = cmd + ['uninstall']
+    else:
+        raise AssertionError('Unknown backend: {!r}'.format(backend))
+    return cmd, clean_cmd, test_cmd, install_cmd, uninstall_cmd
+
+def ensure_backend_detects_changes(backend):
+    global NINJA_1_9_OR_NEWER
+    if backend is not Backend.ninja:
+        return
+    need_workaround = False
+    # We're not running on HFS+ which only stores dates in seconds:
+    # https://developer.apple.com/legacy/library/technotes/tn/tn1150.html#HFSPlusDates
+    # XXX: Upgrade Travis image to Apple FS when that becomes available
+    # TODO: Detect HFS+ vs APFS
+    if mesonlib.is_osx():
+        mlog.warning('Running on HFS+, enabling timestamp resolution workaround', once=True)
+        need_workaround = True
+    # We're using ninja >= 1.9 which has QuLogic's patch for sub-1s resolution
+    # timestamps
+    if not NINJA_1_9_OR_NEWER:
+        mlog.warning('Don\'t have ninja >= 1.9, enabling timestamp resolution workaround', once=True)
+        need_workaround = True
+    # Increase the difference between build.ninja's timestamp and the timestamp
+    # of whatever you changed: https://github.com/ninja-build/ninja/issues/371
+    if need_workaround:
+        time.sleep(1)
+
+def run_mtest_inprocess(commandlist):
+    old_stdout = sys.stdout
+    sys.stdout = mystdout = StringIO()
+    old_stderr = sys.stderr
+    sys.stderr = mystderr = StringIO()
+    try:
+        returncode = mtest.run_with_args(commandlist)
+    finally:
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+    return returncode, mystdout.getvalue(), mystderr.getvalue()
+
+def clear_meson_configure_class_caches():
+    compilers.CCompiler.library_dirs_cache = {}
+    compilers.CCompiler.program_dirs_cache = {}
+    compilers.CCompiler.find_library_cache = {}
+    compilers.CCompiler.find_framework_cache = {}
+    dependencies.PkgConfigDependency.pkgbin_cache = {}
+    dependencies.PkgConfigDependency.class_pkgbin = mesonlib.PerMachine(None, None)
+
+def run_configure_inprocess(commandlist, env=None):
+    old_stdout = sys.stdout
+    sys.stdout = mystdout = StringIO()
+    old_stderr = sys.stderr
+    sys.stderr = mystderr = StringIO()
+    old_environ = os.environ.copy()
+    if env is not None:
+        os.environ.update(env)
+    try:
+        returncode = mesonmain.run(commandlist, get_meson_script())
+    finally:
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        clear_meson_configure_class_caches()
+        os.environ.clear()
+        os.environ.update(old_environ)
+    return returncode, mystdout.getvalue(), mystderr.getvalue()
+
+def run_configure_external(full_command, env=None):
+    pc, o, e = mesonlib.Popen_safe(full_command, env=env)
+    return pc.returncode, o, e
+
+def run_configure(commandlist, env=None):
+    global meson_exe
+    if meson_exe:
+        return run_configure_external(meson_exe + commandlist, env=env)
+    return run_configure_inprocess(commandlist, env=env)
+
+def print_system_info():
+    print(mlog.bold('System information.').get_text(mlog.colorize_console()))
+    print('Architecture:', platform.architecture())
+    print('Machine:', platform.machine())
+    print('Platform:', platform.system())
+    print('Processor:', platform.processor())
+    print('System:', platform.system())
+    print('')
+    print(flush=True)
+
+def main():
+    print_system_info()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cov', action='store_true')
+    parser.add_argument('--backend', default=None, dest='backend',
+                        choices=backendlist)
+    parser.add_argument('--cross', default=[], dest='cross', action='append')
+    parser.add_argument('--failfast', action='store_true')
+    parser.add_argument('--no-unittests', action='store_true', default=False)
+    (options, _) = parser.parse_known_args()
+    # Enable coverage early...
+    enable_coverage = options.cov
+    if enable_coverage:
+        os.makedirs('.coverage', exist_ok=True)
+        sys.argv.remove('--cov')
+        import coverage
+        coverage.process_startup()
+    returncode = 0
+    cross = options.cross
+    backend, _ = guess_backend(options.backend, shutil.which('msbuild'))
+    no_unittests = options.no_unittests
+    # Running on a developer machine? Be nice!
+    if not mesonlib.is_windows() and not mesonlib.is_haiku() and 'CI' not in os.environ:
+        os.nice(20)
+    # Appveyor sets the `platform` environment variable which completely messes
+    # up building with the vs2010 and vs2015 backends.
+    #
+    # Specifically, MSBuild reads the `platform` environment variable to set
+    # the configured value for the platform (Win32/x64/arm), which breaks x86
+    # builds.
+    #
+    # Appveyor setting this also breaks our 'native build arch' detection for
+    # Windows in environment.py:detect_windows_arch() by overwriting the value
+    # of `platform` set by vcvarsall.bat.
+    #
+    # While building for x86, `platform` should be unset.
+    if 'APPVEYOR' in os.environ and os.environ['arch'] == 'x86':
+        os.environ.pop('platform')
+    # Run tests
+    # Can't pass arguments to unit tests, so set the backend to use in the environment
+    env = os.environ.copy()
+    env['MESON_UNIT_TEST_BACKEND'] = backend.name
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Enable coverage on all subsequent processes.
+        if enable_coverage:
+            Path(temp_dir, 'usercustomize.py').open('w').write(
+                'import coverage\n'
+                'coverage.process_startup()\n')
+            env['COVERAGE_PROCESS_START'] = '.coveragerc'
+            if 'PYTHONPATH' in env:
+                env['PYTHONPATH'] = os.pathsep.join([temp_dir, env.get('PYTHONPATH')])
+            else:
+                env['PYTHONPATH'] = temp_dir
+        if not cross:
+            cmd = mesonlib.python_command + ['run_meson_command_tests.py', '-v']
+            if options.failfast:
+                cmd += ['--failfast']
+            returncode += subprocess.call(cmd, env=env)
+            if options.failfast and returncode != 0:
+                return returncode
+            if no_unittests:
+                print('Skipping all unit tests.')
+                print(flush=True)
+                returncode = 0
+            else:
+                print(mlog.bold('Running unittests.').get_text(mlog.colorize_console()))
+                print(flush=True)
+                cmd = mesonlib.python_command + ['run_unittests.py', '-v']
+                if options.failfast:
+                    cmd += ['--failfast']
+                returncode += subprocess.call(cmd, env=env)
+                if options.failfast and returncode != 0:
+                    return returncode
+            cmd = mesonlib.python_command + ['run_project_tests.py'] + sys.argv[1:]
+            returncode += subprocess.call(cmd, env=env)
+        else:
+            cross_test_args = mesonlib.python_command + ['run_cross_test.py']
+            for cf in options.cross:
+                print(mlog.bold('Running {} cross tests.'.format(cf)).get_text(mlog.colorize_console()))
+                print(flush=True)
+                cmd = cross_test_args + ['cross/' + cf]
+                if options.failfast:
+                    cmd += ['--failfast']
+                returncode += subprocess.call(cmd, env=env)
+                if options.failfast and returncode != 0:
+                    return returncode
+    return returncode
+
+if __name__ == '__main__':
+    print('Meson build system', meson_version, 'Project and Unit Tests')
+    raise SystemExit(main())
diff --git a/meson/run_unittests.py b/meson/run_unittests.py
new file mode 100755
index 000000000..9f36669bb
--- /dev/null
+++ b/meson/run_unittests.py
@@ -0,0 +1,8640 @@
+#!/usr/bin/env python3
+# Copyright 2016-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing as T
+import stat
+import subprocess
+import re
+import json
+import tempfile
+import textwrap
+import os
+import shutil
+import sys
+import unittest
+import platform
+import pickle
+import functools
+import io
+import operator
+import threading
+import urllib.error
+import urllib.request
+import zipfile
+import hashlib
+from itertools import chain
+from unittest import mock
+from configparser import ConfigParser
+from contextlib import contextmanager
+from glob import glob
+from pathlib import (PurePath, Path)
+from distutils.dir_util import copy_tree
+import typing as T
+
+import mesonbuild.mlog
+import mesonbuild.depfile
+import mesonbuild.dependencies.base
+import mesonbuild.compilers
+import mesonbuild.envconfig
+import mesonbuild.environment
+import mesonbuild.mesonlib
+import mesonbuild.coredata
+import mesonbuild.modules.gnome
+from mesonbuild.interpreter import Interpreter, ObjectHolder
+from mesonbuild.ast import AstInterpreter
+from mesonbuild.mesonlib import (
+    BuildDirLock, LibType, MachineChoice, PerMachine, Version, is_windows,
+    is_osx, is_cygwin, is_dragonflybsd, is_openbsd, is_haiku, is_sunos,
+    windows_proof_rmtree, python_command, version_compare, split_args,
+    quote_arg, relpath, is_linux
+)
+from mesonbuild.environment import detect_ninja
+from mesonbuild.mesonlib import MesonException, EnvironmentException
+from mesonbuild.dependencies import PkgConfigDependency, ExternalProgram
+import mesonbuild.dependencies.base
+from mesonbuild.build import Target, ConfigurationData
+import mesonbuild.modules.pkgconfig
+
+from mesonbuild.mtest import TAPParser, TestResult
+
+from run_tests import (
+    Backend, FakeBuild, FakeCompilerOptions,
+    ensure_backend_detects_changes, exe_suffix, get_backend_commands,
+    get_builddir_target_args, get_fake_env, get_fake_options, get_meson_script,
+    run_configure_inprocess, run_mtest_inprocess
+)
+
+
+URLOPEN_TIMEOUT = 5
+
+@contextmanager
+def chdir(path: str):
+    curdir = os.getcwd()
+    os.chdir(path)
+    yield
+    os.chdir(curdir)
+
+
+def get_dynamic_section_entry(fname, entry):
+    if is_cygwin() or is_osx():
+        raise unittest.SkipTest('Test only applicable to ELF platforms')
+
+    try:
+        raw_out = subprocess.check_output(['readelf', '-d', fname],
+                                          universal_newlines=True)
+    except FileNotFoundError:
+        # FIXME: Try using depfixer.py:Elf() as a fallback
+        raise unittest.SkipTest('readelf not found')
+    pattern = re.compile(entry + r': \[(.*?)\]')
+    for line in raw_out.split('\n'):
+        m = pattern.search(line)
+        if m is not None:
+            return m.group(1)
+    return None # The file did not contain the specified entry.
+
+def get_soname(fname):
+    return get_dynamic_section_entry(fname, 'soname')
+
+def get_rpath(fname):
+    return get_dynamic_section_entry(fname, r'(?:rpath|runpath)')
+
+def is_tarball():
+    if not os.path.isdir('docs'):
+        return True
+    return False
+
+def is_ci():
+    if 'CI' in os.environ:
+        return True
+    return False
+
+def is_pull():
+    # Travis
+    if os.environ.get('TRAVIS_PULL_REQUEST', 'false') != 'false':
+        return True
+    # Azure
+    if 'SYSTEM_PULLREQUEST_ISFORK' in os.environ:
+        return True
+    return False
+
+def _git_init(project_dir):
+    subprocess.check_call(['git', 'init'], cwd=project_dir, stdout=subprocess.DEVNULL)
+    subprocess.check_call(['git', 'config',
+                           'user.name', 'Author Person'], cwd=project_dir)
+    subprocess.check_call(['git', 'config',
+                           'user.email', 'teh_coderz@example.com'], cwd=project_dir)
+    subprocess.check_call('git add *', cwd=project_dir, shell=True,
+                          stdout=subprocess.DEVNULL)
+    subprocess.check_call(['git', 'commit', '-a', '-m', 'I am a project'], cwd=project_dir,
+                          stdout=subprocess.DEVNULL)
+
+@functools.lru_cache()
+def is_real_gnu_compiler(path):
+    '''
+    Check if the gcc we have is a real gcc and not a macOS wrapper around clang
+    '''
+    if not path:
+        return False
+    out = subprocess.check_output([path, '--version'], universal_newlines=True, stderr=subprocess.STDOUT)
+    return 'Free Software Foundation' in out
+
+def skipIfNoExecutable(exename):
+    '''
+    Skip this test if the given executable is not found.
+    '''
+    def wrapper(func):
+        @functools.wraps(func)
+        def wrapped(*args, **kwargs):
+            if shutil.which(exename) is None:
+                raise unittest.SkipTest(exename + ' not found')
+            return func(*args, **kwargs)
+        return wrapped
+    return wrapper
+
+def skipIfNoPkgconfig(f):
+    '''
+    Skip this test if no pkg-config is found, unless we're on CI.
+    This allows users to run our test suite without having
+    pkg-config installed on, f.ex., macOS, while ensuring that our CI does not
+    silently skip the test because of misconfiguration.
+
+    Note: Yes, we provide pkg-config even while running Windows CI
+    '''
+    @functools.wraps(f)
+    def wrapped(*args, **kwargs):
+        if not is_ci() and shutil.which('pkg-config') is None:
+            raise unittest.SkipTest('pkg-config not found')
+        return f(*args, **kwargs)
+    return wrapped
+
+def skipIfNoPkgconfigDep(depname):
+    '''
+    Skip this test if the given pkg-config dep is not found, unless we're on CI.
+    '''
+    def wrapper(func):
+        @functools.wraps(func)
+        def wrapped(*args, **kwargs):
+            if not is_ci() and shutil.which('pkg-config') is None:
+                raise unittest.SkipTest('pkg-config not found')
+            if not is_ci() and subprocess.call(['pkg-config', '--exists', depname]) != 0:
+                raise unittest.SkipTest('pkg-config dependency {} not found.'.format(depname))
+            return func(*args, **kwargs)
+        return wrapped
+    return wrapper
+
+def skip_if_no_cmake(f):
+    '''
+    Skip this test if no cmake is found, unless we're on CI.
+    This allows users to run our test suite without having
+    cmake installed on, f.ex., macOS, while ensuring that our CI does not
+    silently skip the test because of misconfiguration.
+    '''
+    @functools.wraps(f)
+    def wrapped(*args, **kwargs):
+        if not is_ci() and shutil.which('cmake') is None:
+            raise unittest.SkipTest('cmake not found')
+        return f(*args, **kwargs)
+    return wrapped
+
+def skip_if_not_language(lang):
+    def wrapper(func):
+        @functools.wraps(func)
+        def wrapped(*args, **kwargs):
+            try:
+                env = get_fake_env()
+                f = getattr(env, 'detect_{}_compiler'.format(lang))
+                f(MachineChoice.HOST)
+            except EnvironmentException:
+                raise unittest.SkipTest('No {} compiler found.'.format(lang))
+            return func(*args, **kwargs)
+        return wrapped
+    return wrapper
+
+def skip_if_env_set(key):
+    '''
+    Skip a test if a particular env is set, except when running under CI
+    '''
+    def wrapper(func):
+        @functools.wraps(func)
+        def wrapped(*args, **kwargs):
+            old = None
+            if key in os.environ:
+                if not is_ci():
+                    raise unittest.SkipTest('Env var {!r} set, skipping'.format(key))
+                old = os.environ.pop(key)
+            try:
+                return func(*args, **kwargs)
+            finally:
+                if old is not None:
+                    os.environ[key] = old
+        return wrapped
+    return wrapper
+
+def skip_if_not_base_option(feature):
+    """Skip tests if The compiler does not support a given base option.
+
+    for example, ICC doesn't currently support b_sanitize.
+    """
+    def actual(f):
+        @functools.wraps(f)
+        def wrapped(*args, **kwargs):
+            env = get_fake_env()
+            cc = env.detect_c_compiler(MachineChoice.HOST)
+            if feature not in cc.base_options:
+                raise unittest.SkipTest(
+                    '{} not available with {}'.format(feature, cc.id))
+            return f(*args, **kwargs)
+        return wrapped
+    return actual
+
+
+@contextmanager
+def temp_filename():
+    '''A context manager which provides a filename to an empty temporary file.
+
+    On exit the file will be deleted.
+    '''
+
+    fd, filename = tempfile.mkstemp()
+    os.close(fd)
+    try:
+        yield filename
+    finally:
+        try:
+            os.remove(filename)
+        except OSError:
+            pass
+
+@contextmanager
+def no_pkgconfig():
+    '''
+    A context manager that overrides shutil.which and ExternalProgram to force
+    them to return None for pkg-config to simulate it not existing.
+    '''
+    old_which = shutil.which
+    old_search = ExternalProgram._search
+
+    def new_search(self, name, search_dir):
+        if name == 'pkg-config':
+            return [None]
+        return old_search(self, name, search_dir)
+
+    def new_which(cmd, *kwargs):
+        if cmd == 'pkg-config':
+            return None
+        return old_which(cmd, *kwargs)
+
+    shutil.which = new_which
+    ExternalProgram._search = new_search
+    try:
+        yield
+    finally:
+        shutil.which = old_which
+        ExternalProgram._search = old_search
+
+
+class InternalTests(unittest.TestCase):
+
+    def test_version_number(self):
+        searchfunc = mesonbuild.environment.search_version
+        self.assertEqual(searchfunc('foobar 1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('foobar 2016.10.28 1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('2016.10.28 1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('foobar 2016.10.128'), '2016.10.128')
+        self.assertEqual(searchfunc('2016.10.128'), '2016.10.128')
+        self.assertEqual(searchfunc('2016.10'), '2016.10')
+        self.assertEqual(searchfunc('2016.10 1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('oops v1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('2016.oops 1.2.3'), '1.2.3')
+        self.assertEqual(searchfunc('2016.x'), 'unknown version')
+
+
+    def test_mode_symbolic_to_bits(self):
+        modefunc = mesonbuild.mesonlib.FileMode.perms_s_to_bits
+        self.assertEqual(modefunc('---------'), 0)
+        self.assertEqual(modefunc('r--------'), stat.S_IRUSR)
+        self.assertEqual(modefunc('---r-----'), stat.S_IRGRP)
+        self.assertEqual(modefunc('------r--'), stat.S_IROTH)
+        self.assertEqual(modefunc('-w-------'), stat.S_IWUSR)
+        self.assertEqual(modefunc('----w----'), stat.S_IWGRP)
+        self.assertEqual(modefunc('-------w-'), stat.S_IWOTH)
+        self.assertEqual(modefunc('--x------'), stat.S_IXUSR)
+        self.assertEqual(modefunc('-----x---'), stat.S_IXGRP)
+        self.assertEqual(modefunc('--------x'), stat.S_IXOTH)
+        self.assertEqual(modefunc('--S------'), stat.S_ISUID)
+        self.assertEqual(modefunc('-----S---'), stat.S_ISGID)
+        self.assertEqual(modefunc('--------T'), stat.S_ISVTX)
+        self.assertEqual(modefunc('--s------'), stat.S_ISUID | stat.S_IXUSR)
+        self.assertEqual(modefunc('-----s---'), stat.S_ISGID | stat.S_IXGRP)
+        self.assertEqual(modefunc('--------t'), stat.S_ISVTX | stat.S_IXOTH)
+        self.assertEqual(modefunc('rwx------'), stat.S_IRWXU)
+        self.assertEqual(modefunc('---rwx---'), stat.S_IRWXG)
+        self.assertEqual(modefunc('------rwx'), stat.S_IRWXO)
+        # We could keep listing combinations exhaustively but that seems
+        # tedious and pointless. Just test a few more.
+        self.assertEqual(modefunc('rwxr-xr-x'),
+                         stat.S_IRWXU |
+                         stat.S_IRGRP | stat.S_IXGRP |
+                         stat.S_IROTH | stat.S_IXOTH)
+        self.assertEqual(modefunc('rw-r--r--'),
+                         stat.S_IRUSR | stat.S_IWUSR |
+                         stat.S_IRGRP |
+                         stat.S_IROTH)
+        self.assertEqual(modefunc('rwsr-x---'),
+                         stat.S_IRWXU | stat.S_ISUID |
+                         stat.S_IRGRP | stat.S_IXGRP)
+
+    def test_compiler_args_class_none_flush(self):
+        cc = mesonbuild.compilers.CCompiler([], 'fake', False, MachineChoice.HOST, mock.Mock())
+        a = cc.compiler_args(['-I.'])
+        #first we are checking if the tree construction deduplicates the correct -I argument
+        a += ['-I..']
+        a += ['-I./tests/']
+        a += ['-I./tests2/']
+        #think this here as assertion, we cannot apply it, otherwise the CompilerArgs would already flush the changes:
+        # assertEqual(a, ['-I.', '-I./tests2/', '-I./tests/', '-I..', '-I.'])
+        a += ['-I.']
+        a += ['-I.', '-I./tests/']
+        self.assertEqual(a, ['-I.', '-I./tests/', '-I./tests2/', '-I..'])
+
+        #then we are checking that when CompilerArgs already have a build container list, that the deduplication is taking the correct one
+        a += ['-I.', '-I./tests2/']
+        self.assertEqual(a, ['-I.', '-I./tests2/', '-I./tests/', '-I..'])
+
+    def test_compiler_args_class_d(self):
+        d = mesonbuild.compilers.DCompiler([], 'fake', MachineChoice.HOST, 'info', 'arch', False, None)
+        # check include order is kept when deduplicating
+        a = d.compiler_args(['-Ifirst', '-Isecond', '-Ithird'])
+        a += ['-Ifirst']
+        self.assertEqual(a, ['-Ifirst', '-Isecond', '-Ithird'])
+
+    def test_compiler_args_class(self):
+        cc = mesonbuild.compilers.CCompiler([], 'fake', False, MachineChoice.HOST, mock.Mock())
+        # Test that empty initialization works
+        a = cc.compiler_args()
+        self.assertEqual(a, [])
+        # Test that list initialization works
+        a = cc.compiler_args(['-I.', '-I..'])
+        self.assertEqual(a, ['-I.', '-I..'])
+        # Test that there is no de-dup on initialization
+        self.assertEqual(cc.compiler_args(['-I.', '-I.']), ['-I.', '-I.'])
+
+        ## Test that appending works
+        a.append('-I..')
+        self.assertEqual(a, ['-I..', '-I.'])
+        a.append('-O3')
+        self.assertEqual(a, ['-I..', '-I.', '-O3'])
+
+        ## Test that in-place addition works
+        a += ['-O2', '-O2']
+        self.assertEqual(a, ['-I..', '-I.', '-O3', '-O2', '-O2'])
+        # Test that removal works
+        a.remove('-O2')
+        self.assertEqual(a, ['-I..', '-I.', '-O3', '-O2'])
+        # Test that de-dup happens on addition
+        a += ['-Ifoo', '-Ifoo']
+        self.assertEqual(a, ['-Ifoo', '-I..', '-I.', '-O3', '-O2'])
+
+        # .extend() is just +=, so we don't test it
+
+        ## Test that addition works
+        # Test that adding a list with just one old arg works and yields the same array
+        a = a + ['-Ifoo']
+        self.assertEqual(a, ['-Ifoo', '-I..', '-I.', '-O3', '-O2'])
+        # Test that adding a list with one arg new and one old works
+        a = a + ['-Ifoo', '-Ibaz']
+        self.assertEqual(a, ['-Ifoo', '-Ibaz', '-I..', '-I.', '-O3', '-O2'])
+        # Test that adding args that must be prepended and appended works
+        a = a + ['-Ibar', '-Wall']
+        self.assertEqual(a, ['-Ibar', '-Ifoo', '-Ibaz', '-I..', '-I.', '-O3', '-O2', '-Wall'])
+
+        ## Test that reflected addition works
+        # Test that adding to a list with just one old arg works and yields the same array
+        a = ['-Ifoo'] + a
+        self.assertEqual(a, ['-Ibar', '-Ifoo', '-Ibaz', '-I..', '-I.', '-O3', '-O2', '-Wall'])
+        # Test that adding to a list with just one new arg that is not pre-pended works
+        a = ['-Werror'] + a
+        self.assertEqual(a, ['-Ibar', '-Ifoo', '-Ibaz', '-I..', '-I.', '-Werror', '-O3', '-O2', '-Wall'])
+        # Test that adding to a list with two new args preserves the order
+        a = ['-Ldir', '-Lbah'] + a
+        self.assertEqual(a, ['-Ibar', '-Ifoo', '-Ibaz', '-I..', '-I.', '-Ldir', '-Lbah', '-Werror', '-O3', '-O2', '-Wall'])
+        # Test that adding to a list with old args does nothing
+        a = ['-Ibar', '-Ibaz', '-Ifoo'] + a
+        self.assertEqual(a, ['-Ibar', '-Ifoo', '-Ibaz', '-I..', '-I.', '-Ldir', '-Lbah', '-Werror', '-O3', '-O2', '-Wall'])
+
+        ## Test that adding libraries works
+        l = cc.compiler_args(['-Lfoodir', '-lfoo'])
+        self.assertEqual(l, ['-Lfoodir', '-lfoo'])
+        # Adding a library and a libpath appends both correctly
+        l += ['-Lbardir', '-lbar']
+        self.assertEqual(l, ['-Lbardir', '-Lfoodir', '-lfoo', '-lbar'])
+        # Adding the same library again does nothing
+        l += ['-lbar']
+        self.assertEqual(l, ['-Lbardir', '-Lfoodir', '-lfoo', '-lbar'])
+
+        ## Test that 'direct' append and extend works
+        l = cc.compiler_args(['-Lfoodir', '-lfoo'])
+        self.assertEqual(l, ['-Lfoodir', '-lfoo'])
+        # Direct-adding a library and a libpath appends both correctly
+        l.extend_direct(['-Lbardir', '-lbar'])
+        self.assertEqual(l, ['-Lfoodir', '-lfoo', '-Lbardir', '-lbar'])
+        # Direct-adding the same library again still adds it
+        l.append_direct('-lbar')
+        self.assertEqual(l, ['-Lfoodir', '-lfoo', '-Lbardir', '-lbar', '-lbar'])
+        # Direct-adding with absolute path deduplicates
+        l.append_direct('/libbaz.a')
+        self.assertEqual(l, ['-Lfoodir', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a'])
+        # Adding libbaz again does nothing
+        l.append_direct('/libbaz.a')
+        self.assertEqual(l, ['-Lfoodir', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a'])
+
+    def test_compiler_args_class_gnuld(self):
+        ## Test --start/end-group
+        linker = mesonbuild.linkers.GnuDynamicLinker([], MachineChoice.HOST, 'fake', '-Wl,', [])
+        gcc = mesonbuild.compilers.GnuCCompiler([], 'fake', False, MachineChoice.HOST, mock.Mock(), linker=linker)
+        ## Ensure that the fake compiler is never called by overriding the relevant function
+        gcc.get_default_include_dirs = lambda: ['/usr/include', '/usr/share/include', '/usr/local/include']
+        ## Test that 'direct' append and extend works
+        l = gcc.compiler_args(['-Lfoodir', '-lfoo'])
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Wl,--end-group'])
+        # Direct-adding a library and a libpath appends both correctly
+        l.extend_direct(['-Lbardir', '-lbar'])
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-Wl,--end-group'])
+        # Direct-adding the same library again still adds it
+        l.append_direct('-lbar')
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-lbar', '-Wl,--end-group'])
+        # Direct-adding with absolute path deduplicates
+        l.append_direct('/libbaz.a')
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a', '-Wl,--end-group'])
+        # Adding libbaz again does nothing
+        l.append_direct('/libbaz.a')
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a', '-Wl,--end-group'])
+        # Adding a non-library argument doesn't include it in the group
+        l += ['-Lfoo', '-Wl,--export-dynamic']
+        self.assertEqual(l.to_native(copy=True), ['-Lfoo', '-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a', '-Wl,--end-group', '-Wl,--export-dynamic'])
+        # -Wl,-lfoo is detected as a library and gets added to the group
+        l.append('-Wl,-ldl')
+        self.assertEqual(l.to_native(copy=True), ['-Lfoo', '-Lfoodir', '-Wl,--start-group', '-lfoo', '-Lbardir', '-lbar', '-lbar', '/libbaz.a', '-Wl,--export-dynamic', '-Wl,-ldl', '-Wl,--end-group'])
+
+    def test_compiler_args_remove_system(self):
+        ## Test --start/end-group
+        linker = mesonbuild.linkers.GnuDynamicLinker([], MachineChoice.HOST, 'fake', '-Wl,', [])
+        gcc = mesonbuild.compilers.GnuCCompiler([], 'fake', False, MachineChoice.HOST, mock.Mock(), linker=linker)
+        ## Ensure that the fake compiler is never called by overriding the relevant function
+        gcc.get_default_include_dirs = lambda: ['/usr/include', '/usr/share/include', '/usr/local/include']
+        ## Test that 'direct' append and extend works
+        l = gcc.compiler_args(['-Lfoodir', '-lfoo'])
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Wl,--end-group'])
+        ## Test that to_native removes all system includes
+        l += ['-isystem/usr/include', '-isystem=/usr/share/include', '-DSOMETHING_IMPORTANT=1', '-isystem', '/usr/local/include']
+        self.assertEqual(l.to_native(copy=True), ['-Lfoodir', '-Wl,--start-group', '-lfoo', '-Wl,--end-group', '-DSOMETHING_IMPORTANT=1'])
+
+    def test_string_templates_substitution(self):
+        dictfunc = mesonbuild.mesonlib.get_filenames_templates_dict
+        substfunc = mesonbuild.mesonlib.substitute_values
+        ME = mesonbuild.mesonlib.MesonException
+
+        # Identity
+        self.assertEqual(dictfunc([], []), {})
+
+        # One input, no outputs
+        inputs = ['bar/foo.c.in']
+        outputs = []
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0],
+             '@PLAINNAME@': 'foo.c.in', '@BASENAME@': 'foo.c'}
+        # Check dictionary
+        self.assertEqual(ret, d)
+        # Check substitutions
+        cmd = ['some', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), cmd)
+        cmd = ['@INPUT@.out', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), [inputs[0] + '.out'] + cmd[1:])
+        cmd = ['@INPUT0@.out', '@PLAINNAME@.ok', 'strings']
+        self.assertEqual(substfunc(cmd, d),
+                         [inputs[0] + '.out'] + [d['@PLAINNAME@'] + '.ok'] + cmd[2:])
+        cmd = ['@INPUT@', '@BASENAME@.hah', 'strings']
+        self.assertEqual(substfunc(cmd, d),
+                         inputs + [d['@BASENAME@'] + '.hah'] + cmd[2:])
+        cmd = ['@OUTPUT@']
+        self.assertRaises(ME, substfunc, cmd, d)
+
+        # One input, one output
+        inputs = ['bar/foo.c.in']
+        outputs = ['out.c']
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0],
+             '@PLAINNAME@': 'foo.c.in', '@BASENAME@': 'foo.c',
+             '@OUTPUT@': outputs, '@OUTPUT0@': outputs[0], '@OUTDIR@': '.'}
+        # Check dictionary
+        self.assertEqual(ret, d)
+        # Check substitutions
+        cmd = ['some', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), cmd)
+        cmd = ['@INPUT@.out', '@OUTPUT@', 'strings']
+        self.assertEqual(substfunc(cmd, d),
+                         [inputs[0] + '.out'] + outputs + cmd[2:])
+        cmd = ['@INPUT0@.out', '@PLAINNAME@.ok', '@OUTPUT0@']
+        self.assertEqual(substfunc(cmd, d),
+                         [inputs[0] + '.out', d['@PLAINNAME@'] + '.ok'] + outputs)
+        cmd = ['@INPUT@', '@BASENAME@.hah', 'strings']
+        self.assertEqual(substfunc(cmd, d),
+                         inputs + [d['@BASENAME@'] + '.hah'] + cmd[2:])
+
+        # One input, one output with a subdir
+        outputs = ['dir/out.c']
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0],
+             '@PLAINNAME@': 'foo.c.in', '@BASENAME@': 'foo.c',
+             '@OUTPUT@': outputs, '@OUTPUT0@': outputs[0], '@OUTDIR@': 'dir'}
+        # Check dictionary
+        self.assertEqual(ret, d)
+
+        # Two inputs, no outputs
+        inputs = ['bar/foo.c.in', 'baz/foo.c.in']
+        outputs = []
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0], '@INPUT1@': inputs[1]}
+        # Check dictionary
+        self.assertEqual(ret, d)
+        # Check substitutions
+        cmd = ['some', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), cmd)
+        cmd = ['@INPUT@', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), inputs + cmd[1:])
+        cmd = ['@INPUT0@.out', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), [inputs[0] + '.out'] + cmd[1:])
+        cmd = ['@INPUT0@.out', '@INPUT1@.ok', 'strings']
+        self.assertEqual(substfunc(cmd, d), [inputs[0] + '.out', inputs[1] + '.ok'] + cmd[2:])
+        cmd = ['@INPUT0@', '@INPUT1@', 'strings']
+        self.assertEqual(substfunc(cmd, d), inputs + cmd[2:])
+        # Many inputs, can't use @INPUT@ like this
+        cmd = ['@INPUT@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Not enough inputs
+        cmd = ['@INPUT2@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Too many inputs
+        cmd = ['@PLAINNAME@']
+        self.assertRaises(ME, substfunc, cmd, d)
+        cmd = ['@BASENAME@']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # No outputs
+        cmd = ['@OUTPUT@']
+        self.assertRaises(ME, substfunc, cmd, d)
+        cmd = ['@OUTPUT0@']
+        self.assertRaises(ME, substfunc, cmd, d)
+        cmd = ['@OUTDIR@']
+        self.assertRaises(ME, substfunc, cmd, d)
+
+        # Two inputs, one output
+        outputs = ['dir/out.c']
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0], '@INPUT1@': inputs[1],
+             '@OUTPUT@': outputs, '@OUTPUT0@': outputs[0], '@OUTDIR@': 'dir'}
+        # Check dictionary
+        self.assertEqual(ret, d)
+        # Check substitutions
+        cmd = ['some', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), cmd)
+        cmd = ['@OUTPUT@', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), outputs + cmd[1:])
+        cmd = ['@OUTPUT@.out', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), [outputs[0] + '.out'] + cmd[1:])
+        cmd = ['@OUTPUT0@.out', '@INPUT1@.ok', 'strings']
+        self.assertEqual(substfunc(cmd, d), [outputs[0] + '.out', inputs[1] + '.ok'] + cmd[2:])
+        # Many inputs, can't use @INPUT@ like this
+        cmd = ['@INPUT@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Not enough inputs
+        cmd = ['@INPUT2@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Not enough outputs
+        cmd = ['@OUTPUT2@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+
+        # Two inputs, two outputs
+        outputs = ['dir/out.c', 'dir/out2.c']
+        ret = dictfunc(inputs, outputs)
+        d = {'@INPUT@': inputs, '@INPUT0@': inputs[0], '@INPUT1@': inputs[1],
+             '@OUTPUT@': outputs, '@OUTPUT0@': outputs[0], '@OUTPUT1@': outputs[1],
+             '@OUTDIR@': 'dir'}
+        # Check dictionary
+        self.assertEqual(ret, d)
+        # Check substitutions
+        cmd = ['some', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), cmd)
+        cmd = ['@OUTPUT@', 'ordinary', 'strings']
+        self.assertEqual(substfunc(cmd, d), outputs + cmd[1:])
+        cmd = ['@OUTPUT0@', '@OUTPUT1@', 'strings']
+        self.assertEqual(substfunc(cmd, d), outputs + cmd[2:])
+        cmd = ['@OUTPUT0@.out', '@INPUT1@.ok', '@OUTDIR@']
+        self.assertEqual(substfunc(cmd, d), [outputs[0] + '.out', inputs[1] + '.ok', 'dir'])
+        # Many inputs, can't use @INPUT@ like this
+        cmd = ['@INPUT@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Not enough inputs
+        cmd = ['@INPUT2@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Not enough outputs
+        cmd = ['@OUTPUT2@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+        # Many outputs, can't use @OUTPUT@ like this
+        cmd = ['@OUTPUT@.out', 'ordinary', 'strings']
+        self.assertRaises(ME, substfunc, cmd, d)
+
+    def test_needs_exe_wrapper_override(self):
+        config = ConfigParser()
+        config['binaries'] = {
+            'c': '\'/usr/bin/gcc\'',
+        }
+        config['host_machine'] = {
+            'system': '\'linux\'',
+            'cpu_family': '\'arm\'',
+            'cpu': '\'armv7\'',
+            'endian': '\'little\'',
+        }
+        # Can not be used as context manager because we need to
+        # open it a second time and this is not possible on
+        # Windows.
+        configfile = tempfile.NamedTemporaryFile(mode='w+', delete=False)
+        configfilename = configfile.name
+        config.write(configfile)
+        configfile.flush()
+        configfile.close()
+        opts = get_fake_options()
+        opts.cross_file = (configfilename,)
+        env = get_fake_env(opts=opts)
+        detected_value = env.need_exe_wrapper()
+        os.unlink(configfilename)
+
+        desired_value = not detected_value
+        config['properties'] = {
+            'needs_exe_wrapper': 'true' if desired_value else 'false'
+        }
+
+        configfile = tempfile.NamedTemporaryFile(mode='w+', delete=False)
+        configfilename = configfile.name
+        config.write(configfile)
+        configfile.close()
+        opts = get_fake_options()
+        opts.cross_file = (configfilename,)
+        env = get_fake_env(opts=opts)
+        forced_value = env.need_exe_wrapper()
+        os.unlink(configfilename)
+
+        self.assertEqual(forced_value, desired_value)
+
+    def test_listify(self):
+        listify = mesonbuild.mesonlib.listify
+        # Test sanity
+        self.assertEqual([1], listify(1))
+        self.assertEqual([], listify([]))
+        self.assertEqual([1], listify([1]))
+        # Test flattening
+        self.assertEqual([1, 2, 3], listify([1, [2, 3]]))
+        self.assertEqual([1, 2, 3], listify([1, [2, [3]]]))
+        self.assertEqual([1, [2, [3]]], listify([1, [2, [3]]], flatten=False))
+        # Test flattening and unholdering
+        holder1 = ObjectHolder(1)
+        self.assertEqual([holder1], listify(holder1))
+        self.assertEqual([holder1], listify([holder1]))
+        self.assertEqual([holder1, 2], listify([holder1, 2]))
+        self.assertEqual([holder1, 2, 3], listify([holder1, 2, [3]]))
+
+    def test_unholder(self):
+        unholder = mesonbuild.mesonlib.unholder
+
+        holder1 = ObjectHolder(1)
+        holder3 = ObjectHolder(3)
+        holders = [holder1, holder3]
+
+        self.assertEqual(1, unholder(holder1))
+        self.assertEqual([1], unholder([holder1]))
+        self.assertEqual([1, 3], unholder(holders))
+
+    def test_extract_as_list(self):
+        extract = mesonbuild.mesonlib.extract_as_list
+        # Test sanity
+        kwargs = {'sources': [1, 2, 3]}
+        self.assertEqual([1, 2, 3], extract(kwargs, 'sources'))
+        self.assertEqual(kwargs, {'sources': [1, 2, 3]})
+        self.assertEqual([1, 2, 3], extract(kwargs, 'sources', pop=True))
+        self.assertEqual(kwargs, {})
+
+        # Test unholding
+        holder3 = ObjectHolder(3)
+        kwargs = {'sources': [1, 2, holder3]}
+        self.assertEqual(kwargs, {'sources': [1, 2, holder3]})
+
+        # flatten nested lists
+        kwargs = {'sources': [1, [2, [3]]]}
+        self.assertEqual([1, 2, 3], extract(kwargs, 'sources'))
+
+    def test_pkgconfig_module(self):
+        dummystate = mock.Mock()
+        dummystate.subproject = 'dummy'
+        _mock = mock.Mock(spec=mesonbuild.dependencies.ExternalDependency)
+        _mock.pcdep = mock.Mock()
+        _mock.pcdep.name = "some_name"
+        _mock.version_reqs = []
+        _mock = mock.Mock(held_object=_mock)
+
+        # pkgconfig dependency as lib
+        deps = mesonbuild.modules.pkgconfig.DependenciesHelper(dummystate, "thislib")
+        deps.add_pub_libs([_mock])
+        self.assertEqual(deps.format_reqs(deps.pub_reqs), "some_name")
+
+        # pkgconfig dependency as requires
+        deps = mesonbuild.modules.pkgconfig.DependenciesHelper(dummystate, "thislib")
+        deps.add_pub_reqs([_mock])
+        self.assertEqual(deps.format_reqs(deps.pub_reqs), "some_name")
+
+    def _test_all_naming(self, cc, env, patterns, platform):
+        shr = patterns[platform]['shared']
+        stc = patterns[platform]['static']
+        shrstc = shr + tuple([x for x in stc if x not in shr])
+        stcshr = stc + tuple([x for x in shr if x not in stc])
+        p = cc.get_library_naming(env, LibType.SHARED)
+        self.assertEqual(p, shr)
+        p = cc.get_library_naming(env, LibType.STATIC)
+        self.assertEqual(p, stc)
+        p = cc.get_library_naming(env, LibType.PREFER_STATIC)
+        self.assertEqual(p, stcshr)
+        p = cc.get_library_naming(env, LibType.PREFER_SHARED)
+        self.assertEqual(p, shrstc)
+        # Test find library by mocking up openbsd
+        if platform != 'openbsd':
+            return
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with open(os.path.join(tmpdir, 'libfoo.so.6.0'), 'w') as f:
+                f.write('')
+            with open(os.path.join(tmpdir, 'libfoo.so.5.0'), 'w') as f:
+                f.write('')
+            with open(os.path.join(tmpdir, 'libfoo.so.54.0'), 'w') as f:
+                f.write('')
+            with open(os.path.join(tmpdir, 'libfoo.so.66a.0b'), 'w') as f:
+                f.write('')
+            with open(os.path.join(tmpdir, 'libfoo.so.70.0.so.1'), 'w') as f:
+                f.write('')
+            found = cc.find_library_real('foo', env, [tmpdir], '', LibType.PREFER_SHARED)
+            self.assertEqual(os.path.basename(found[0]), 'libfoo.so.54.0')
+
+    def test_find_library_patterns(self):
+        '''
+        Unit test for the library search patterns used by find_library()
+        '''
+        unix_static = ('lib{}.a', '{}.a')
+        msvc_static = ('lib{}.a', 'lib{}.lib', '{}.a', '{}.lib')
+        # This is the priority list of pattern matching for library searching
+        patterns = {'openbsd': {'shared': ('lib{}.so', '{}.so', 'lib{}.so.[0-9]*.[0-9]*', '{}.so.[0-9]*.[0-9]*'),
+                                'static': unix_static},
+                    'linux': {'shared': ('lib{}.so', '{}.so'),
+                              'static': unix_static},
+                    'darwin': {'shared': ('lib{}.dylib', 'lib{}.so', '{}.dylib', '{}.so'),
+                               'static': unix_static},
+                    'cygwin': {'shared': ('cyg{}.dll', 'cyg{}.dll.a', 'lib{}.dll',
+                                          'lib{}.dll.a', '{}.dll', '{}.dll.a'),
+                               'static': ('cyg{}.a',) + unix_static},
+                    'windows-msvc': {'shared': ('lib{}.lib', '{}.lib'),
+                                     'static': msvc_static},
+                    'windows-mingw': {'shared': ('lib{}.dll.a', 'lib{}.lib', 'lib{}.dll',
+                                                 '{}.dll.a', '{}.lib', '{}.dll'),
+                                      'static': msvc_static}}
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if is_osx():
+            self._test_all_naming(cc, env, patterns, 'darwin')
+        elif is_cygwin():
+            self._test_all_naming(cc, env, patterns, 'cygwin')
+        elif is_windows():
+            if cc.get_argument_syntax() == 'msvc':
+                self._test_all_naming(cc, env, patterns, 'windows-msvc')
+            else:
+                self._test_all_naming(cc, env, patterns, 'windows-mingw')
+        elif is_openbsd():
+            self._test_all_naming(cc, env, patterns, 'openbsd')
+        else:
+            self._test_all_naming(cc, env, patterns, 'linux')
+            env.machines.host.system = 'openbsd'
+            self._test_all_naming(cc, env, patterns, 'openbsd')
+            env.machines.host.system = 'darwin'
+            self._test_all_naming(cc, env, patterns, 'darwin')
+            env.machines.host.system = 'cygwin'
+            self._test_all_naming(cc, env, patterns, 'cygwin')
+            env.machines.host.system = 'windows'
+            self._test_all_naming(cc, env, patterns, 'windows-mingw')
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_parse_libs(self):
+        '''
+        Unit test for parsing of pkg-config output to search for libraries
+
+        https://github.com/mesonbuild/meson/issues/3951
+        '''
+        def create_static_lib(name):
+            if not is_osx():
+                name.open('w').close()
+                return
+            src = name.with_suffix('.c')
+            out = name.with_suffix('.o')
+            with src.open('w') as f:
+                f.write('int meson_foobar (void) { return 0; }')
+            subprocess.check_call(['clang', '-c', str(src), '-o', str(out)])
+            subprocess.check_call(['ar', 'csr', str(name), str(out)])
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pkgbin = ExternalProgram('pkg-config', command=['pkg-config'], silent=True)
+            env = get_fake_env()
+            compiler = env.detect_c_compiler(MachineChoice.HOST)
+            env.coredata.compilers.host = {'c': compiler}
+            env.coredata.compiler_options.host['c']['link_args'] = FakeCompilerOptions()
+            p1 = Path(tmpdir) / '1'
+            p2 = Path(tmpdir) / '2'
+            p1.mkdir()
+            p2.mkdir()
+            # libfoo.a is in one prefix
+            create_static_lib(p1 / 'libfoo.a')
+            # libbar.a is in both prefixes
+            create_static_lib(p1 / 'libbar.a')
+            create_static_lib(p2 / 'libbar.a')
+            # Ensure that we never statically link to these
+            create_static_lib(p1 / 'libpthread.a')
+            create_static_lib(p1 / 'libm.a')
+            create_static_lib(p1 / 'libc.a')
+            create_static_lib(p1 / 'libdl.a')
+            create_static_lib(p1 / 'librt.a')
+
+            def fake_call_pkgbin(self, args, env=None):
+                if '--libs' not in args:
+                    return 0, '', ''
+                if args[0] == 'foo':
+                    return 0, '-L{} -lfoo -L{} -lbar'.format(p2.as_posix(), p1.as_posix()), ''
+                if args[0] == 'bar':
+                    return 0, '-L{} -lbar'.format(p2.as_posix()), ''
+                if args[0] == 'internal':
+                    return 0, '-L{} -lpthread -lm -lc -lrt -ldl'.format(p1.as_posix()), ''
+
+            old_call = PkgConfigDependency._call_pkgbin
+            old_check = PkgConfigDependency.check_pkgconfig
+            PkgConfigDependency._call_pkgbin = fake_call_pkgbin
+            PkgConfigDependency.check_pkgconfig = lambda x, _: pkgbin
+            # Test begins
+            try:
+                kwargs = {'required': True, 'silent': True}
+                foo_dep = PkgConfigDependency('foo', env, kwargs)
+                self.assertEqual(foo_dep.get_link_args(),
+                                 [(p1 / 'libfoo.a').as_posix(), (p2 / 'libbar.a').as_posix()])
+                bar_dep = PkgConfigDependency('bar', env, kwargs)
+                self.assertEqual(bar_dep.get_link_args(), [(p2 / 'libbar.a').as_posix()])
+                internal_dep = PkgConfigDependency('internal', env, kwargs)
+                if compiler.get_argument_syntax() == 'msvc':
+                    self.assertEqual(internal_dep.get_link_args(), [])
+                else:
+                    link_args = internal_dep.get_link_args()
+                    for link_arg in link_args:
+                        for lib in ('pthread', 'm', 'c', 'dl', 'rt'):
+                            self.assertNotIn('lib{}.a'.format(lib), link_arg, msg=link_args)
+            finally:
+                # Test ends
+                PkgConfigDependency._call_pkgbin = old_call
+                PkgConfigDependency.check_pkgconfig = old_check
+                # Reset dependency class to ensure that in-process configure doesn't mess up
+                PkgConfigDependency.pkgbin_cache = {}
+                PkgConfigDependency.class_pkgbin = PerMachine(None, None)
+
+    def test_version_compare(self):
+        comparefunc = mesonbuild.mesonlib.version_compare_many
+        for (a, b, result) in [
+                ('0.99.beta19', '>= 0.99.beta14', True),
+        ]:
+            self.assertEqual(comparefunc(a, b)[0], result)
+
+        for (a, b, op) in [
+                # examples from https://fedoraproject.org/wiki/Archive:Tools/RPM/VersionComparison
+                ("1.0010", "1.9", operator.gt),
+                ("1.05", "1.5", operator.eq),
+                ("1.0", "1", operator.gt),
+                ("2.50", "2.5", operator.gt),
+                ("fc4", "fc.4", operator.eq),
+                ("FC5", "fc4", operator.lt),
+                ("2a", "2.0", operator.lt),
+                ("1.0", "1.fc4", operator.gt),
+                ("3.0.0_fc", "3.0.0.fc", operator.eq),
+                # from RPM tests
+                ("1.0", "1.0", operator.eq),
+                ("1.0", "2.0", operator.lt),
+                ("2.0", "1.0", operator.gt),
+                ("2.0.1", "2.0.1", operator.eq),
+                ("2.0", "2.0.1", operator.lt),
+                ("2.0.1", "2.0", operator.gt),
+                ("2.0.1a", "2.0.1a", operator.eq),
+                ("2.0.1a", "2.0.1", operator.gt),
+                ("2.0.1", "2.0.1a", operator.lt),
+                ("5.5p1", "5.5p1", operator.eq),
+                ("5.5p1", "5.5p2", operator.lt),
+                ("5.5p2", "5.5p1", operator.gt),
+                ("5.5p10", "5.5p10", operator.eq),
+                ("5.5p1", "5.5p10", operator.lt),
+                ("5.5p10", "5.5p1", operator.gt),
+                ("10xyz", "10.1xyz", operator.lt),
+                ("10.1xyz", "10xyz", operator.gt),
+                ("xyz10", "xyz10", operator.eq),
+                ("xyz10", "xyz10.1", operator.lt),
+                ("xyz10.1", "xyz10", operator.gt),
+                ("xyz.4", "xyz.4", operator.eq),
+                ("xyz.4", "8", operator.lt),
+                ("8", "xyz.4", operator.gt),
+                ("xyz.4", "2", operator.lt),
+                ("2", "xyz.4", operator.gt),
+                ("5.5p2", "5.6p1", operator.lt),
+                ("5.6p1", "5.5p2", operator.gt),
+                ("5.6p1", "6.5p1", operator.lt),
+                ("6.5p1", "5.6p1", operator.gt),
+                ("6.0.rc1", "6.0", operator.gt),
+                ("6.0", "6.0.rc1", operator.lt),
+                ("10b2", "10a1", operator.gt),
+                ("10a2", "10b2", operator.lt),
+                ("1.0aa", "1.0aa", operator.eq),
+                ("1.0a", "1.0aa", operator.lt),
+                ("1.0aa", "1.0a", operator.gt),
+                ("10.0001", "10.0001", operator.eq),
+                ("10.0001", "10.1", operator.eq),
+                ("10.1", "10.0001", operator.eq),
+                ("10.0001", "10.0039", operator.lt),
+                ("10.0039", "10.0001", operator.gt),
+                ("4.999.9", "5.0", operator.lt),
+                ("5.0", "4.999.9", operator.gt),
+                ("20101121", "20101121", operator.eq),
+                ("20101121", "20101122", operator.lt),
+                ("20101122", "20101121", operator.gt),
+                ("2_0", "2_0", operator.eq),
+                ("2.0", "2_0", operator.eq),
+                ("2_0", "2.0", operator.eq),
+                ("a", "a", operator.eq),
+                ("a+", "a+", operator.eq),
+                ("a+", "a_", operator.eq),
+                ("a_", "a+", operator.eq),
+                ("+a", "+a", operator.eq),
+                ("+a", "_a", operator.eq),
+                ("_a", "+a", operator.eq),
+                ("+_", "+_", operator.eq),
+                ("_+", "+_", operator.eq),
+                ("_+", "_+", operator.eq),
+                ("+", "_", operator.eq),
+                ("_", "+", operator.eq),
+                # other tests
+                ('0.99.beta19', '0.99.beta14', operator.gt),
+                ("1.0.0", "2.0.0", operator.lt),
+                (".0.0", "2.0.0", operator.lt),
+                ("alpha", "beta", operator.lt),
+                ("1.0", "1.0.0", operator.lt),
+                ("2.456", "2.1000", operator.lt),
+                ("2.1000", "3.111", operator.lt),
+                ("2.001", "2.1", operator.eq),
+                ("2.34", "2.34", operator.eq),
+                ("6.1.2", "6.3.8", operator.lt),
+                ("1.7.3.0", "2.0.0", operator.lt),
+                ("2.24.51", "2.25", operator.lt),
+                ("2.1.5+20120813+gitdcbe778", "2.1.5", operator.gt),
+                ("3.4.1", "3.4b1", operator.gt),
+                ("041206", "200090325", operator.lt),
+                ("0.6.2+git20130413", "0.6.2", operator.gt),
+                ("2.6.0+bzr6602", "2.6.0", operator.gt),
+                ("2.6.0", "2.6b2", operator.gt),
+                ("2.6.0+bzr6602", "2.6b2x", operator.gt),
+                ("0.6.7+20150214+git3a710f9", "0.6.7", operator.gt),
+                ("15.8b", "15.8.0.1", operator.lt),
+                ("1.2rc1", "1.2.0", operator.lt),
+        ]:
+            ver_a = Version(a)
+            ver_b = Version(b)
+            if op is operator.eq:
+                for o, name in [(op, 'eq'), (operator.ge, 'ge'), (operator.le, 'le')]:
+                    self.assertTrue(o(ver_a, ver_b), '{} {} {}'.format(ver_a, name, ver_b))
+            if op is operator.lt:
+                for o, name in [(op, 'lt'), (operator.le, 'le'), (operator.ne, 'ne')]:
+                    self.assertTrue(o(ver_a, ver_b), '{} {} {}'.format(ver_a, name, ver_b))
+                for o, name in [(operator.gt, 'gt'), (operator.ge, 'ge'), (operator.eq, 'eq')]:
+                    self.assertFalse(o(ver_a, ver_b), '{} {} {}'.format(ver_a, name, ver_b))
+            if op is operator.gt:
+                for o, name in [(op, 'gt'), (operator.ge, 'ge'), (operator.ne, 'ne')]:
+                    self.assertTrue(o(ver_a, ver_b), '{} {} {}'.format(ver_a, name, ver_b))
+                for o, name in [(operator.lt, 'lt'), (operator.le, 'le'), (operator.eq, 'eq')]:
+                    self.assertFalse(o(ver_a, ver_b), '{} {} {}'.format(ver_a, name, ver_b))
+
+    def test_msvc_toolset_version(self):
+        '''
+        Ensure that the toolset version returns the correct value for this MSVC
+        '''
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_argument_syntax() != 'msvc':
+            raise unittest.SkipTest('Test only applies to MSVC-like compilers')
+        toolset_ver = cc.get_toolset_version()
+        self.assertIsNotNone(toolset_ver)
+        # Visual Studio 2015 and older versions do not define VCToolsVersion
+        # TODO: ICL doesn't set this in the VSC2015 profile either
+        if cc.id == 'msvc' and int(''.join(cc.version.split('.')[0:2])) < 1910:
+            return
+        if 'VCToolsVersion' in os.environ:
+            vctools_ver = os.environ['VCToolsVersion']
+        else:
+            self.assertIn('VCINSTALLDIR', os.environ)
+            # See https://devblogs.microsoft.com/cppblog/finding-the-visual-c-compiler-tools-in-visual-studio-2017/
+            vctools_ver = (Path(os.environ['VCINSTALLDIR']) / 'Auxiliary' / 'Build' / 'Microsoft.VCToolsVersion.default.txt').read_text()
+        self.assertTrue(vctools_ver.startswith(toolset_ver),
+                        msg='{!r} does not start with {!r}'.format(vctools_ver, toolset_ver))
+
+    def test_split_args(self):
+        split_args = mesonbuild.mesonlib.split_args
+        join_args = mesonbuild.mesonlib.join_args
+        if is_windows():
+            test_data = [
+                # examples from https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments
+                (r'"a b c" d e', ['a b c', 'd', 'e'], True),
+                (r'"ab\"c" "\\" d', ['ab"c', '\\', 'd'], False),
+                (r'a\\\b d"e f"g h', [r'a\\\b', 'de fg', 'h'], False),
+                (r'a\\\"b c d', [r'a\"b', 'c', 'd'], False),
+                (r'a\\\\"b c" d e', [r'a\\b c', 'd', 'e'], False),
+                # other basics
+                (r'""', [''], True),
+                (r'a b c d "" e', ['a', 'b', 'c', 'd', '', 'e'], True),
+                (r"'a b c' d e", ["'a", 'b', "c'", 'd', 'e'], True),
+                (r"'a&b&c' d e", ["'a&b&c'", 'd', 'e'], True),
+                (r"a & b & c d e", ['a', '&', 'b', '&', 'c', 'd', 'e'], True),
+                (r"'a & b & c d e'", ["'a", '&', 'b', '&', 'c', 'd', "e'"], True),
+                ('a  b\nc\rd \n\re', ['a', 'b', 'c', 'd', 'e'], False),
+                # more illustrative tests
+                (r'cl test.cpp /O1 /Fe:test.exe', ['cl', 'test.cpp', '/O1', '/Fe:test.exe'], True),
+                (r'cl "test.cpp /O1 /Fe:test.exe"', ['cl', 'test.cpp /O1 /Fe:test.exe'], True),
+                (r'cl /DNAME=\"Bob\" test.cpp', ['cl', '/DNAME="Bob"', 'test.cpp'], False),
+                (r'cl "/DNAME=\"Bob\"" test.cpp', ['cl', '/DNAME="Bob"', 'test.cpp'], True),
+                (r'cl /DNAME=\"Bob, Alice\" test.cpp', ['cl', '/DNAME="Bob,', 'Alice"', 'test.cpp'], False),
+                (r'cl "/DNAME=\"Bob, Alice\"" test.cpp', ['cl', '/DNAME="Bob, Alice"', 'test.cpp'], True),
+                (r'cl C:\path\with\backslashes.cpp', ['cl', r'C:\path\with\backslashes.cpp'], True),
+                (r'cl C:\\path\\with\\double\\backslashes.cpp', ['cl', r'C:\\path\\with\\double\\backslashes.cpp'], True),
+                (r'cl "C:\\path\\with\\double\\backslashes.cpp"', ['cl', r'C:\\path\\with\\double\\backslashes.cpp'], False),
+                (r'cl C:\path with spaces\test.cpp', ['cl', r'C:\path', 'with', r'spaces\test.cpp'], False),
+                (r'cl "C:\path with spaces\test.cpp"', ['cl', r'C:\path with spaces\test.cpp'], True),
+                (r'cl /DPATH="C:\path\with\backslashes test.cpp', ['cl', r'/DPATH=C:\path\with\backslashes test.cpp'], False),
+                (r'cl /DPATH=\"C:\\ends\\with\\backslashes\\\" test.cpp', ['cl', r'/DPATH="C:\\ends\\with\\backslashes\"', 'test.cpp'], False),
+                (r'cl /DPATH="C:\\ends\\with\\backslashes\\" test.cpp', ['cl', '/DPATH=C:\\\\ends\\\\with\\\\backslashes\\', 'test.cpp'], False),
+                (r'cl "/DNAME=\"C:\\ends\\with\\backslashes\\\"" test.cpp', ['cl', r'/DNAME="C:\\ends\\with\\backslashes\"', 'test.cpp'], True),
+                (r'cl "/DNAME=\"C:\\ends\\with\\backslashes\\\\"" test.cpp', ['cl', r'/DNAME="C:\\ends\\with\\backslashes\\ test.cpp'], False),
+                (r'cl "/DNAME=\"C:\\ends\\with\\backslashes\\\\\"" test.cpp', ['cl', r'/DNAME="C:\\ends\\with\\backslashes\\"', 'test.cpp'], True),
+            ]
+        else:
+            test_data = [
+                (r"'a b c' d e", ['a b c', 'd', 'e'], True),
+                (r"a/b/c d e", ['a/b/c', 'd', 'e'], True),
+                (r"a\b\c d e", [r'abc', 'd', 'e'], False),
+                (r"a\\b\\c d e", [r'a\b\c', 'd', 'e'], False),
+                (r'"a b c" d e', ['a b c', 'd', 'e'], False),
+                (r'"a\\b\\c\\" d e', ['a\\b\\c\\', 'd', 'e'], False),
+                (r"'a\b\c\' d e", ['a\\b\\c\\', 'd', 'e'], True),
+                (r"'a&b&c' d e", ['a&b&c', 'd', 'e'], True),
+                (r"a & b & c d e", ['a', '&', 'b', '&', 'c', 'd', 'e'], False),
+                (r"'a & b & c d e'", ['a & b & c d e'], True),
+                (r"abd'e f'g h", [r'abde fg', 'h'], False),
+                ('a  b\nc\rd \n\re', ['a', 'b', 'c', 'd', 'e'], False),
+
+                ('g++ -DNAME="Bob" test.cpp', ['g++', '-DNAME=Bob', 'test.cpp'], False),
+                ("g++ '-DNAME=\"Bob\"' test.cpp", ['g++', '-DNAME="Bob"', 'test.cpp'], True),
+                ('g++ -DNAME="Bob, Alice" test.cpp', ['g++', '-DNAME=Bob, Alice', 'test.cpp'], False),
+                ("g++ '-DNAME=\"Bob, Alice\"' test.cpp", ['g++', '-DNAME="Bob, Alice"', 'test.cpp'], True),
+            ]
+
+        for (cmd, expected, roundtrip) in test_data:
+            self.assertEqual(split_args(cmd), expected)
+            if roundtrip:
+                self.assertEqual(join_args(expected), cmd)
+
+    def test_quote_arg(self):
+        split_args = mesonbuild.mesonlib.split_args
+        quote_arg = mesonbuild.mesonlib.quote_arg
+        if is_windows():
+            test_data = [
+                ('', '""'),
+                ('arg1', 'arg1'),
+                ('/option1', '/option1'),
+                ('/Ovalue', '/Ovalue'),
+                ('/OBob&Alice', '/OBob&Alice'),
+                ('/Ovalue with spaces', r'"/Ovalue with spaces"'),
+                (r'/O"value with spaces"', r'"/O\"value with spaces\""'),
+                (r'/OC:\path with spaces\test.exe', r'"/OC:\path with spaces\test.exe"'),
+                ('/LIBPATH:C:\\path with spaces\\ends\\with\\backslashes\\', r'"/LIBPATH:C:\path with spaces\ends\with\backslashes\\"'),
+                ('/LIBPATH:"C:\\path with spaces\\ends\\with\\backslashes\\\\"', r'"/LIBPATH:\"C:\path with spaces\ends\with\backslashes\\\\\""'),
+                (r'/DMSG="Alice said: \"Let\'s go\""', r'"/DMSG=\"Alice said: \\\"Let\'s go\\\"\""'),
+            ]
+        else:
+            test_data = [
+                ('arg1', 'arg1'),
+                ('--option1', '--option1'),
+                ('-O=value', '-O=value'),
+                ('-O=Bob&Alice', "'-O=Bob&Alice'"),
+                ('-O=value with spaces', "'-O=value with spaces'"),
+                ('-O="value with spaces"', '\'-O=\"value with spaces\"\''),
+                ('-O=/path with spaces/test', '\'-O=/path with spaces/test\''),
+                ('-DMSG="Alice said: \\"Let\'s go\\""', "'-DMSG=\"Alice said: \\\"Let'\"'\"'s go\\\"\"'"),
+            ]
+
+        for (arg, expected) in test_data:
+            self.assertEqual(quote_arg(arg), expected)
+            self.assertEqual(split_args(expected)[0], arg)
+
+    def test_depfile(self):
+        for (f, target, expdeps) in [
+                # empty, unknown target
+                ([''], 'unknown', set()),
+                # simple target & deps
+                (['meson/foo.o  : foo.c   foo.h'], 'meson/foo.o', set({'foo.c', 'foo.h'})),
+                (['meson/foo.o: foo.c foo.h'], 'foo.c', set()),
+                # get all deps
+                (['meson/foo.o: foo.c foo.h',
+                  'foo.c: gen.py'], 'meson/foo.o', set({'foo.c', 'foo.h', 'gen.py'})),
+                (['meson/foo.o: foo.c foo.h',
+                  'foo.c: gen.py'], 'foo.c', set({'gen.py'})),
+                # linue continuation, multiple targets
+                (['foo.o \\', 'foo.h: bar'], 'foo.h', set({'bar'})),
+                (['foo.o \\', 'foo.h: bar'], 'foo.o', set({'bar'})),
+                # \\ handling
+                (['foo: Program\\ F\\iles\\\\X'], 'foo', set({'Program Files\\X'})),
+                # $ handling
+                (['f$o.o: c/b'], 'f$o.o', set({'c/b'})),
+                (['f$$o.o: c/b'], 'f$o.o', set({'c/b'})),
+                # cycles
+                (['a: b', 'b: a'], 'a', set({'a', 'b'})),
+                (['a: b', 'b: a'], 'b', set({'a', 'b'})),
+        ]:
+            d = mesonbuild.depfile.DepFile(f)
+            deps = d.get_all_dependencies(target)
+            self.assertEqual(deps, expdeps)
+
+    def test_log_once(self):
+        f = io.StringIO()
+        with mock.patch('mesonbuild.mlog.log_file', f), \
+                mock.patch('mesonbuild.mlog._logged_once', set()):
+            mesonbuild.mlog.log_once('foo')
+            mesonbuild.mlog.log_once('foo')
+            actual = f.getvalue().strip()
+            self.assertEqual(actual, 'foo', actual)
+
+    def test_log_once_ansi(self):
+        f = io.StringIO()
+        with mock.patch('mesonbuild.mlog.log_file', f), \
+                mock.patch('mesonbuild.mlog._logged_once', set()):
+            mesonbuild.mlog.log_once(mesonbuild.mlog.bold('foo'))
+            mesonbuild.mlog.log_once(mesonbuild.mlog.bold('foo'))
+            actual = f.getvalue().strip()
+            self.assertEqual(actual.count('foo'), 1, actual)
+
+            mesonbuild.mlog.log_once('foo')
+            actual = f.getvalue().strip()
+            self.assertEqual(actual.count('foo'), 1, actual)
+
+            f.truncate()
+
+            mesonbuild.mlog.warning('bar', once=True)
+            mesonbuild.mlog.warning('bar', once=True)
+            actual = f.getvalue().strip()
+            self.assertEqual(actual.count('bar'), 1, actual)
+
+    def test_sort_libpaths(self):
+        sort_libpaths = mesonbuild.dependencies.base.sort_libpaths
+        self.assertEqual(sort_libpaths(
+            ['/home/mesonuser/.local/lib', '/usr/local/lib', '/usr/lib'],
+            ['/home/mesonuser/.local/lib/pkgconfig', '/usr/local/lib/pkgconfig']),
+            ['/home/mesonuser/.local/lib', '/usr/local/lib', '/usr/lib'])
+        self.assertEqual(sort_libpaths(
+            ['/usr/local/lib', '/home/mesonuser/.local/lib', '/usr/lib'],
+            ['/home/mesonuser/.local/lib/pkgconfig', '/usr/local/lib/pkgconfig']),
+            ['/home/mesonuser/.local/lib', '/usr/local/lib', '/usr/lib'])
+        self.assertEqual(sort_libpaths(
+            ['/usr/lib', '/usr/local/lib', '/home/mesonuser/.local/lib'],
+            ['/home/mesonuser/.local/lib/pkgconfig', '/usr/local/lib/pkgconfig']),
+            ['/home/mesonuser/.local/lib', '/usr/local/lib', '/usr/lib'])
+        self.assertEqual(sort_libpaths(
+            ['/usr/lib', '/usr/local/lib', '/home/mesonuser/.local/lib'],
+            ['/home/mesonuser/.local/lib/pkgconfig', '/usr/local/libdata/pkgconfig']),
+            ['/home/mesonuser/.local/lib', '/usr/local/lib', '/usr/lib'])
+
+    def test_dependency_factory_order(self):
+        b = mesonbuild.dependencies.base
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with chdir(tmpdir):
+                env = get_fake_env()
+
+                f = b.DependencyFactory(
+                    'test_dep',
+                    methods=[b.DependencyMethods.PKGCONFIG, b.DependencyMethods.CMAKE]
+                )
+                actual = [m() for m in f(env, MachineChoice.HOST, {'required': False})]
+                self.assertListEqual([m.type_name for m in actual], ['pkgconfig', 'cmake'])
+
+                f = b.DependencyFactory(
+                    'test_dep',
+                    methods=[b.DependencyMethods.CMAKE, b.DependencyMethods.PKGCONFIG]
+                )
+                actual = [m() for m in f(env, MachineChoice.HOST, {'required': False})]
+                self.assertListEqual([m.type_name for m in actual], ['cmake', 'pkgconfig'])
+
+    def test_validate_json(self) -> None:
+        """Validate the json schema for the test cases."""
+        try:
+            from jsonschema import validate, ValidationError
+        except ImportError:
+            if is_ci():
+                raise
+            raise unittest.SkipTest('Python jsonschema module not found.')
+
+        with Path('data/test.schema.json').open() as f:
+            schema = json.load(f)
+
+        errors = []  # type: T.Tuple[str, Exception]
+        for p in Path('test cases').glob('**/test.json'):
+            with p.open() as f:
+                try:
+                    validate(json.load(f), schema=schema)
+                except ValidationError as e:
+                    errors.append((p.resolve(), e))
+
+        for f, e in errors:
+            print('Failed to validate: "{}"'.format(f))
+            print(str(e))
+
+        self.assertFalse(errors)
+
+@unittest.skipIf(is_tarball(), 'Skipping because this is a tarball release')
+class DataTests(unittest.TestCase):
+
+    def test_snippets(self):
+        hashcounter = re.compile('^ *(#)+')
+        snippet_dir = Path('docs/markdown/snippets')
+        self.assertTrue(snippet_dir.is_dir())
+        for f in snippet_dir.glob('*'):
+            self.assertTrue(f.is_file())
+            if f.parts[-1].endswith('~'):
+                continue
+            if f.suffix == '.md':
+                in_code_block = False
+                with f.open() as snippet:
+                    for line in snippet:
+                        if line.startswith('    '):
+                            continue
+                        if line.startswith('```'):
+                            in_code_block = not in_code_block
+                        if in_code_block:
+                            continue
+                        m = re.match(hashcounter, line)
+                        if m:
+                            self.assertEqual(len(m.group(0)), 2, 'All headings in snippets must have two hash symbols: ' + f.name)
+                self.assertFalse(in_code_block, 'Unclosed code block.')
+            else:
+                if f.name != 'add_release_note_snippets_here':
+                    self.assertTrue(False, 'A file without .md suffix in snippets dir: ' + f.name)
+
+    def test_compiler_options_documented(self):
+        '''
+        Test that C and C++ compiler options and base options are documented in
+        Builtin-Options.md. Only tests the default compiler for the current
+        platform on the CI.
+        '''
+        md = None
+        with open('docs/markdown/Builtin-options.md', encoding='utf-8') as f:
+            md = f.read()
+        self.assertIsNotNone(md)
+        env = get_fake_env()
+        # FIXME: Support other compilers
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        cpp = env.detect_cpp_compiler(MachineChoice.HOST)
+        for comp in (cc, cpp):
+            for opt in comp.get_options().keys():
+                self.assertIn(opt, md)
+            for opt in comp.base_options:
+                self.assertIn(opt, md)
+        self.assertNotIn('b_unknown', md)
+
+    @staticmethod
+    def _get_section_content(name, sections, md):
+        for section in sections:
+            if section and section.group(1) == name:
+                try:
+                    next_section = next(sections)
+                    end = next_section.start()
+                except StopIteration:
+                    end = len(md)
+                # Extract the content for this section
+                return md[section.end():end]
+        raise RuntimeError('Could not find "{}" heading'.format(name))
+
+    def test_builtin_options_documented(self):
+        '''
+        Test that universal options and base options are documented in
+        Builtin-Options.md.
+        '''
+        from itertools import tee
+        md = None
+        with open('docs/markdown/Builtin-options.md', encoding='utf-8') as f:
+            md = f.read()
+        self.assertIsNotNone(md)
+
+        found_entries = set()
+        sections = re.finditer(r"^## (.+)$", md, re.MULTILINE)
+        # Extract the content for this section
+        content = self._get_section_content("Universal options", sections, md)
+        subsections = tee(re.finditer(r"^### (.+)$", content, re.MULTILINE))
+        subcontent1 = self._get_section_content("Directories", subsections[0], content)
+        subcontent2 = self._get_section_content("Core options", subsections[1], content)
+        for subcontent in (subcontent1, subcontent2):
+            # Find the option names
+            options = set()
+            # Match either a table row or a table heading separator: | ------ |
+            rows = re.finditer(r"^\|(?: (\w+) .* | *-+ *)\|", subcontent, re.MULTILINE)
+            # Skip the header of the first table
+            next(rows)
+            # Skip the heading separator of the first table
+            next(rows)
+            for m in rows:
+                value = m.group(1)
+                # End when the `buildtype` table starts
+                if value is None:
+                    break
+                options.add(value)
+            self.assertEqual(len(found_entries & options), 0)
+            found_entries |= options
+
+        self.assertEqual(found_entries, set([
+            *mesonbuild.coredata.builtin_options.keys(),
+            *mesonbuild.coredata.builtin_options_per_machine.keys()
+        ]))
+
+        # Check that `buildtype` table inside `Core options` matches how
+        # setting of builtin options behaves
+        #
+        # Find all tables inside this subsection
+        tables = re.finditer(r"^\| (\w+) .* \|\n\| *[-|\s]+ *\|$", subcontent2, re.MULTILINE)
+        # Get the table we want using the header of the first column
+        table = self._get_section_content('buildtype', tables, subcontent2)
+        # Get table row data
+        rows = re.finditer(r"^\|(?: (\w+)\s+\| (\w+)\s+\| (\w+) .* | *-+ *)\|", table, re.MULTILINE)
+        env = get_fake_env()
+        for m in rows:
+            buildtype, debug, opt = m.groups()
+            if debug == 'true':
+                debug = True
+            elif debug == 'false':
+                debug = False
+            else:
+                raise RuntimeError('Invalid debug value {!r} in row:\n{}'.format(debug, m.group()))
+            env.coredata.set_builtin_option('buildtype', buildtype)
+            self.assertEqual(env.coredata.builtins['buildtype'].value, buildtype)
+            self.assertEqual(env.coredata.builtins['optimization'].value, opt)
+            self.assertEqual(env.coredata.builtins['debug'].value, debug)
+
+    def test_cpu_families_documented(self):
+        with open("docs/markdown/Reference-tables.md", encoding='utf-8') as f:
+            md = f.read()
+        self.assertIsNotNone(md)
+
+        sections = re.finditer(r"^## (.+)$", md, re.MULTILINE)
+        content = self._get_section_content("CPU families", sections, md)
+        # Find the list entries
+        arches = [m.group(1) for m in re.finditer(r"^\| (\w+) +\|", content, re.MULTILINE)]
+        # Drop the header
+        arches = set(arches[1:])
+        self.assertEqual(arches, set(mesonbuild.environment.known_cpu_families))
+
+    def test_markdown_files_in_sitemap(self):
+        '''
+        Test that each markdown files in docs/markdown is referenced in sitemap.txt
+        '''
+        with open("docs/sitemap.txt", encoding='utf-8') as f:
+            md = f.read()
+        self.assertIsNotNone(md)
+        toc = list(m.group(1) for m in re.finditer(r"^\s*(\w.*)$", md, re.MULTILINE))
+        markdownfiles = [f.name for f in Path("docs/markdown").iterdir() if f.is_file() and f.suffix == '.md']
+        exceptions = ['_Sidebar.md']
+        for f in markdownfiles:
+            if f not in exceptions:
+                self.assertIn(f, toc)
+
+    def test_vim_syntax_highlighting(self):
+        '''
+        Ensure that vim syntax highlighting files were updated for new
+        functions in the global namespace in build files.
+        '''
+        env = get_fake_env()
+        interp = Interpreter(FakeBuild(env), mock=True)
+        with open('data/syntax-highlighting/vim/syntax/meson.vim') as f:
+            res = re.search(r'syn keyword mesonBuiltin(\s+\\\s\w+)+', f.read(), re.MULTILINE)
+            defined = set([a.strip() for a in res.group().split('\\')][1:])
+            self.assertEqual(defined, set(chain(interp.funcs.keys(), interp.builtin.keys())))
+
+    @unittest.skipIf(is_pull(), 'Skipping because this is a pull request')
+    def test_json_grammar_syntax_highlighting(self):
+        '''
+        Ensure that syntax highlighting JSON grammar written by TingPing was
+        updated for new functions in the global namespace in build files.
+        https://github.com/TingPing/language-meson/
+        '''
+        env = get_fake_env()
+        interp = Interpreter(FakeBuild(env), mock=True)
+        url = 'https://raw.githubusercontent.com/TingPing/language-meson/master/grammars/meson.json'
+        try:
+            # Use a timeout to avoid blocking forever in case the network is
+            # slow or unavailable in a weird way
+            r = urllib.request.urlopen(url, timeout=URLOPEN_TIMEOUT)
+        except urllib.error.URLError as e:
+            # Skip test when network is not available, such as during packaging
+            # by a distro or Flatpak
+            if not isinstance(e, urllib.error.HTTPError):
+                raise unittest.SkipTest('Network unavailable')
+            # Don't fail the test if github is down, but do fail if 4xx
+            if e.code >= 500:
+                raise unittest.SkipTest('Server error ' + str(e.code))
+            raise e
+        # On Python 3.5, we must decode bytes to string. Newer versions don't require that.
+        grammar = json.loads(r.read().decode('utf-8', 'surrogatepass'))
+        for each in grammar['patterns']:
+            if 'name' in each and each['name'] == 'support.function.builtin.meson':
+                # The string is of the form: (?x)\\b(func1|func2|...\n)\\b\\s*(?=\\() and
+                # we convert that to [func1, func2, ...] without using regex to parse regex
+                funcs = set(each['match'].split('\\b(')[1].split('\n')[0].split('|'))
+            if 'name' in each and each['name'] == 'support.variable.meson':
+                # \\b(builtin1|builtin2...)\\b
+                builtin = set(each['match'].split('\\b(')[1].split(')\\b')[0].split('|'))
+        self.assertEqual(builtin, set(interp.builtin.keys()))
+        self.assertEqual(funcs, set(interp.funcs.keys()))
+
+    def test_all_functions_defined_in_ast_interpreter(self):
+        '''
+        Ensure that the all functions defined in the Interpreter are also defined
+        in the AstInterpreter (and vice versa).
+        '''
+        env = get_fake_env()
+        interp = Interpreter(FakeBuild(env), mock=True)
+        astint = AstInterpreter('.', '', '')
+        self.assertEqual(set(interp.funcs.keys()), set(astint.funcs.keys()))
+
+    def test_mesondata_is_up_to_date(self):
+        from mesonbuild.mesondata import mesondata
+        err_msg = textwrap.dedent('''
+
+            ###########################################################
+            ###        mesonbuild.mesondata is not up-to-date       ###
+            ###  Please regenerate it by running tools/gen_data.py  ###
+            ###########################################################
+
+        ''')
+
+        root_dir = Path(__file__).resolve().parent
+        mesonbuild_dir = root_dir / 'mesonbuild'
+
+        data_dirs = mesonbuild_dir.glob('**/data')
+        data_files = []  # type: T.List[T.Tuple(str, str)]
+
+        for i in data_dirs:
+            for p in i.iterdir():
+                data_files += [(p.relative_to(mesonbuild_dir).as_posix(), hashlib.sha256(p.read_bytes()).hexdigest())]
+
+        from pprint import pprint
+        current_files = set(mesondata.keys())
+        scanned_files = set([x[0] for x in data_files])
+
+        self.assertSetEqual(current_files, scanned_files, err_msg + 'Data files were added or removed\n')
+        errors = []
+        for i in data_files:
+            if mesondata[i[0]].sha256sum != i[1]:
+                errors += [i[0]]
+
+        self.assertListEqual(errors, [], err_msg + 'Files were changed')
+
+class BasePlatformTests(unittest.TestCase):
+    prefix = '/usr'
+    libdir = 'lib'
+
+    def setUp(self):
+        super().setUp()
+        self.maxDiff = None
+        src_root = os.path.dirname(__file__)
+        src_root = os.path.join(os.getcwd(), src_root)
+        self.src_root = src_root
+        # Get the backend
+        # FIXME: Extract this from argv?
+        self.backend = getattr(Backend, os.environ.get('MESON_UNIT_TEST_BACKEND', 'ninja'))
+        self.meson_args = ['--backend=' + self.backend.name]
+        self.meson_native_file = None
+        self.meson_cross_file = None
+        self.meson_command = python_command + [get_meson_script()]
+        self.setup_command = self.meson_command + self.meson_args
+        self.mconf_command = self.meson_command + ['configure']
+        self.mintro_command = self.meson_command + ['introspect']
+        self.wrap_command = self.meson_command + ['wrap']
+        self.rewrite_command = self.meson_command + ['rewrite']
+        # Backend-specific build commands
+        self.build_command, self.clean_command, self.test_command, self.install_command, \
+            self.uninstall_command = get_backend_commands(self.backend)
+        # Test directories
+        self.common_test_dir = os.path.join(src_root, 'test cases/common')
+        self.vala_test_dir = os.path.join(src_root, 'test cases/vala')
+        self.framework_test_dir = os.path.join(src_root, 'test cases/frameworks')
+        self.unit_test_dir = os.path.join(src_root, 'test cases/unit')
+        self.rewrite_test_dir = os.path.join(src_root, 'test cases/rewrite')
+        self.linuxlike_test_dir = os.path.join(src_root, 'test cases/linuxlike')
+        # Misc stuff
+        self.orig_env = os.environ.copy()
+        if self.backend is Backend.ninja:
+            self.no_rebuild_stdout = ['ninja: no work to do.', 'samu: nothing to do']
+        else:
+            # VS doesn't have a stable output when no changes are done
+            # XCode backend is untested with unit tests, help welcome!
+            self.no_rebuild_stdout = ['UNKNOWN BACKEND {!r}'.format(self.backend.name)]
+
+        self.builddirs = []
+        self.new_builddir()
+
+    def change_builddir(self, newdir):
+        self.builddir = newdir
+        self.privatedir = os.path.join(self.builddir, 'meson-private')
+        self.logdir = os.path.join(self.builddir, 'meson-logs')
+        self.installdir = os.path.join(self.builddir, 'install')
+        self.distdir = os.path.join(self.builddir, 'meson-dist')
+        self.mtest_command = self.meson_command + ['test', '-C', self.builddir]
+        self.builddirs.append(self.builddir)
+
+    def new_builddir(self):
+        if not is_cygwin():
+            # Keep builddirs inside the source tree so that virus scanners
+            # don't complain
+            newdir = tempfile.mkdtemp(dir=os.getcwd())
+        else:
+            # But not on Cygwin because that breaks the umask tests. See:
+            # https://github.com/mesonbuild/meson/pull/5546#issuecomment-509666523
+            newdir = tempfile.mkdtemp()
+        # In case the directory is inside a symlinked directory, find the real
+        # path otherwise we might not find the srcdir from inside the builddir.
+        newdir = os.path.realpath(newdir)
+        self.change_builddir(newdir)
+
+    def _print_meson_log(self):
+        log = os.path.join(self.logdir, 'meson-log.txt')
+        if not os.path.isfile(log):
+            print("{!r} doesn't exist".format(log))
+            return
+        with open(log, 'r', encoding='utf-8') as f:
+            print(f.read())
+
+    def tearDown(self):
+        for path in self.builddirs:
+            try:
+                windows_proof_rmtree(path)
+            except FileNotFoundError:
+                pass
+        os.environ.clear()
+        os.environ.update(self.orig_env)
+        super().tearDown()
+
+    def _run(self, command, *, workdir=None, override_envvars=None):
+        '''
+        Run a command while printing the stdout and stderr to stdout,
+        and also return a copy of it
+        '''
+        # If this call hangs CI will just abort. It is very hard to distinguish
+        # between CI issue and test bug in that case. Set timeout and fail loud
+        # instead.
+        if override_envvars is None:
+            env = None
+        else:
+            env = os.environ.copy()
+            env.update(override_envvars)
+
+        p = subprocess.run(command, stdout=subprocess.PIPE,
+                           stderr=subprocess.STDOUT, env=env,
+                           universal_newlines=True, cwd=workdir, timeout=60 * 5)
+        print(p.stdout)
+        if p.returncode != 0:
+            if 'MESON_SKIP_TEST' in p.stdout:
+                raise unittest.SkipTest('Project requested skipping.')
+            raise subprocess.CalledProcessError(p.returncode, command, output=p.stdout)
+        return p.stdout
+
+    def init(self, srcdir, *,
+             extra_args=None,
+             default_args=True,
+             inprocess=False,
+             override_envvars=None,
+             workdir=None):
+        self.assertPathExists(srcdir)
+        if extra_args is None:
+            extra_args = []
+        if not isinstance(extra_args, list):
+            extra_args = [extra_args]
+        args = [srcdir, self.builddir]
+        if default_args:
+            args += ['--prefix', self.prefix]
+            if self.libdir:
+                args += ['--libdir', self.libdir]
+            if self.meson_native_file:
+                args += ['--native-file', self.meson_native_file]
+            if self.meson_cross_file:
+                args += ['--cross-file', self.meson_cross_file]
+        self.privatedir = os.path.join(self.builddir, 'meson-private')
+        if inprocess:
+            try:
+                if override_envvars is not None:
+                    old_envvars = os.environ.copy()
+                    os.environ.update(override_envvars)
+                (returncode, out, err) = run_configure_inprocess(self.meson_args + args + extra_args)
+                if override_envvars is not None:
+                    os.environ.clear()
+                    os.environ.update(old_envvars)
+                if 'MESON_SKIP_TEST' in out:
+                    raise unittest.SkipTest('Project requested skipping.')
+                if returncode != 0:
+                    self._print_meson_log()
+                    print('Stdout:\n')
+                    print(out)
+                    print('Stderr:\n')
+                    print(err)
+                    raise RuntimeError('Configure failed')
+            except Exception:
+                self._print_meson_log()
+                raise
+            finally:
+                # Close log file to satisfy Windows file locking
+                mesonbuild.mlog.shutdown()
+                mesonbuild.mlog.log_dir = None
+                mesonbuild.mlog.log_file = None
+        else:
+            try:
+                out = self._run(self.setup_command + args + extra_args, override_envvars=override_envvars, workdir=workdir)
+            except unittest.SkipTest:
+                raise unittest.SkipTest('Project requested skipping: ' + srcdir)
+            except Exception:
+                self._print_meson_log()
+                raise
+        return out
+
+    def build(self, target=None, *, extra_args=None, override_envvars=None):
+        if extra_args is None:
+            extra_args = []
+        # Add arguments for building the target (if specified),
+        # and using the build dir (if required, with VS)
+        args = get_builddir_target_args(self.backend, self.builddir, target)
+        return self._run(self.build_command + args + extra_args, workdir=self.builddir, override_envvars=override_envvars)
+
+    def clean(self, *, override_envvars=None):
+        dir_args = get_builddir_target_args(self.backend, self.builddir, None)
+        self._run(self.clean_command + dir_args, workdir=self.builddir, override_envvars=override_envvars)
+
+    def run_tests(self, *, inprocess=False, override_envvars=None):
+        if not inprocess:
+            self._run(self.test_command, workdir=self.builddir, override_envvars=override_envvars)
+        else:
+            if override_envvars is not None:
+                old_envvars = os.environ.copy()
+                os.environ.update(override_envvars)
+            try:
+                run_mtest_inprocess(['-C', self.builddir])
+            finally:
+                if override_envvars is not None:
+                    os.environ.clear()
+                    os.environ.update(old_envvars)
+
+    def install(self, *, use_destdir=True, override_envvars=None):
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('{!r} backend can\'t install files'.format(self.backend.name))
+        if use_destdir:
+            destdir = {'DESTDIR': self.installdir}
+            if override_envvars is None:
+                override_envvars = destdir
+            else:
+                override_envvars.update(destdir)
+        self._run(self.install_command, workdir=self.builddir, override_envvars=override_envvars)
+
+    def uninstall(self, *, override_envvars=None):
+        self._run(self.uninstall_command, workdir=self.builddir, override_envvars=override_envvars)
+
+    def run_target(self, target, *, override_envvars=None):
+        '''
+        Run a Ninja target while printing the stdout and stderr to stdout,
+        and also return a copy of it
+        '''
+        return self.build(target=target, override_envvars=override_envvars)
+
+    def setconf(self, arg, will_build=True):
+        if not isinstance(arg, list):
+            arg = [arg]
+        if will_build:
+            ensure_backend_detects_changes(self.backend)
+        self._run(self.mconf_command + arg + [self.builddir])
+
+    def wipe(self):
+        windows_proof_rmtree(self.builddir)
+
+    def utime(self, f):
+        ensure_backend_detects_changes(self.backend)
+        os.utime(f)
+
+    def get_compdb(self):
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Compiler db not available with {} backend'.format(self.backend.name))
+        try:
+            with open(os.path.join(self.builddir, 'compile_commands.json')) as ifile:
+                contents = json.load(ifile)
+        except FileNotFoundError:
+            raise unittest.SkipTest('Compiler db not found')
+        # If Ninja is using .rsp files, generate them, read their contents, and
+        # replace it as the command for all compile commands in the parsed json.
+        if len(contents) > 0 and contents[0]['command'].endswith('.rsp'):
+            # Pretend to build so that the rsp files are generated
+            self.build(extra_args=['-d', 'keeprsp', '-n'])
+            for each in contents:
+                # Extract the actual command from the rsp file
+                compiler, rsp = each['command'].split(' @')
+                rsp = os.path.join(self.builddir, rsp)
+                # Replace the command with its contents
+                with open(rsp, 'r', encoding='utf-8') as f:
+                    each['command'] = compiler + ' ' + f.read()
+        return contents
+
+    def get_meson_log(self):
+        with open(os.path.join(self.builddir, 'meson-logs', 'meson-log.txt')) as f:
+            return f.readlines()
+
+    def get_meson_log_compiler_checks(self):
+        '''
+        Fetch a list command-lines run by meson for compiler checks.
+        Each command-line is returned as a list of arguments.
+        '''
+        log = self.get_meson_log()
+        prefix = 'Command line:'
+        cmds = [l[len(prefix):].split() for l in log if l.startswith(prefix)]
+        return cmds
+
+    def get_meson_log_sanitychecks(self):
+        '''
+        Same as above, but for the sanity checks that were run
+        '''
+        log = self.get_meson_log()
+        prefix = 'Sanity check compiler command line:'
+        cmds = [l[len(prefix):].split() for l in log if l.startswith(prefix)]
+        return cmds
+
+    def introspect(self, args):
+        if isinstance(args, str):
+            args = [args]
+        out = subprocess.check_output(self.mintro_command + args + [self.builddir],
+                                      universal_newlines=True)
+        return json.loads(out)
+
+    def introspect_directory(self, directory, args):
+        if isinstance(args, str):
+            args = [args]
+        out = subprocess.check_output(self.mintro_command + args + [directory],
+                                      universal_newlines=True)
+        try:
+            obj = json.loads(out)
+        except Exception as e:
+            print(out)
+            raise e
+        return obj
+
+    def assertPathEqual(self, path1, path2):
+        '''
+        Handles a lot of platform-specific quirks related to paths such as
+        separator, case-sensitivity, etc.
+        '''
+        self.assertEqual(PurePath(path1), PurePath(path2))
+
+    def assertPathListEqual(self, pathlist1, pathlist2):
+        self.assertEqual(len(pathlist1), len(pathlist2))
+        worklist = list(zip(pathlist1, pathlist2))
+        for i in worklist:
+            if i[0] is None:
+                self.assertEqual(i[0], i[1])
+            else:
+                self.assertPathEqual(i[0], i[1])
+
+    def assertPathBasenameEqual(self, path, basename):
+        msg = '{!r} does not end with {!r}'.format(path, basename)
+        # We cannot use os.path.basename because it returns '' when the path
+        # ends with '/' for some silly reason. This is not how the UNIX utility
+        # `basename` works.
+        path_basename = PurePath(path).parts[-1]
+        self.assertEqual(PurePath(path_basename), PurePath(basename), msg)
+
+    def assertReconfiguredBuildIsNoop(self):
+        'Assert that we reconfigured and then there was nothing to do'
+        ret = self.build()
+        self.assertIn('The Meson build system', ret)
+        if self.backend is Backend.ninja:
+            for line in ret.split('\n'):
+                if line in self.no_rebuild_stdout:
+                    break
+            else:
+                raise AssertionError('build was reconfigured, but was not no-op')
+        elif self.backend is Backend.vs:
+            # Ensure that some target said that no rebuild was done
+            # XXX: Note CustomBuild did indeed rebuild, because of the regen checker!
+            self.assertIn('ClCompile:\n  All outputs are up-to-date.', ret)
+            self.assertIn('Link:\n  All outputs are up-to-date.', ret)
+            # Ensure that no targets were built
+            self.assertNotRegex(ret, re.compile('ClCompile:\n [^\n]*cl', flags=re.IGNORECASE))
+            self.assertNotRegex(ret, re.compile('Link:\n [^\n]*link', flags=re.IGNORECASE))
+        elif self.backend is Backend.xcode:
+            raise unittest.SkipTest('Please help us fix this test on the xcode backend')
+        else:
+            raise RuntimeError('Invalid backend: {!r}'.format(self.backend.name))
+
+    def assertBuildIsNoop(self):
+        ret = self.build()
+        if self.backend is Backend.ninja:
+            self.assertIn(ret.split('\n')[-2], self.no_rebuild_stdout)
+        elif self.backend is Backend.vs:
+            # Ensure that some target of each type said that no rebuild was done
+            # We always have at least one CustomBuild target for the regen checker
+            self.assertIn('CustomBuild:\n  All outputs are up-to-date.', ret)
+            self.assertIn('ClCompile:\n  All outputs are up-to-date.', ret)
+            self.assertIn('Link:\n  All outputs are up-to-date.', ret)
+            # Ensure that no targets were built
+            self.assertNotRegex(ret, re.compile('CustomBuild:\n [^\n]*cl', flags=re.IGNORECASE))
+            self.assertNotRegex(ret, re.compile('ClCompile:\n [^\n]*cl', flags=re.IGNORECASE))
+            self.assertNotRegex(ret, re.compile('Link:\n [^\n]*link', flags=re.IGNORECASE))
+        elif self.backend is Backend.xcode:
+            raise unittest.SkipTest('Please help us fix this test on the xcode backend')
+        else:
+            raise RuntimeError('Invalid backend: {!r}'.format(self.backend.name))
+
+    def assertRebuiltTarget(self, target):
+        ret = self.build()
+        if self.backend is Backend.ninja:
+            self.assertIn('Linking target {}'.format(target), ret)
+        elif self.backend is Backend.vs:
+            # Ensure that this target was rebuilt
+            linkre = re.compile('Link:\n [^\n]*link[^\n]*' + target, flags=re.IGNORECASE)
+            self.assertRegex(ret, linkre)
+        elif self.backend is Backend.xcode:
+            raise unittest.SkipTest('Please help us fix this test on the xcode backend')
+        else:
+            raise RuntimeError('Invalid backend: {!r}'.format(self.backend.name))
+
+    @staticmethod
+    def get_target_from_filename(filename):
+        base = os.path.splitext(filename)[0]
+        if base.startswith(('lib', 'cyg')):
+            return base[3:]
+        return base
+
+    def assertBuildRelinkedOnlyTarget(self, target):
+        ret = self.build()
+        if self.backend is Backend.ninja:
+            linked_targets = []
+            for line in ret.split('\n'):
+                if 'Linking target' in line:
+                    fname = line.rsplit('target ')[-1]
+                    linked_targets.append(self.get_target_from_filename(fname))
+            self.assertEqual(linked_targets, [target])
+        elif self.backend is Backend.vs:
+            # Ensure that this target was rebuilt
+            linkre = re.compile(r'Link:\n  [^\n]*link.exe[^\n]*/OUT:".\\([^"]*)"', flags=re.IGNORECASE)
+            matches = linkre.findall(ret)
+            self.assertEqual(len(matches), 1, msg=matches)
+            self.assertEqual(self.get_target_from_filename(matches[0]), target)
+        elif self.backend is Backend.xcode:
+            raise unittest.SkipTest('Please help us fix this test on the xcode backend')
+        else:
+            raise RuntimeError('Invalid backend: {!r}'.format(self.backend.name))
+
+    def assertPathExists(self, path):
+        m = 'Path {!r} should exist'.format(path)
+        self.assertTrue(os.path.exists(path), msg=m)
+
+    def assertPathDoesNotExist(self, path):
+        m = 'Path {!r} should not exist'.format(path)
+        self.assertFalse(os.path.exists(path), msg=m)
+
+
+class AllPlatformTests(BasePlatformTests):
+    '''
+    Tests that should run on all platforms
+    '''
+
+    def test_default_options_prefix(self):
+        '''
+        Tests that setting a prefix in default_options in project() works.
+        Can't be an ordinary test because we pass --prefix to meson there.
+        https://github.com/mesonbuild/meson/issues/1349
+        '''
+        testdir = os.path.join(self.common_test_dir, '90 default options')
+        self.init(testdir, default_args=False)
+        opts = self.introspect('--buildoptions')
+        for opt in opts:
+            if opt['name'] == 'prefix':
+                prefix = opt['value']
+        self.assertEqual(prefix, '/absoluteprefix')
+
+    def test_do_conf_file_preserve_newlines(self):
+
+        def conf_file(in_data, confdata):
+            with temp_filename() as fin:
+                with open(fin, 'wb') as fobj:
+                    fobj.write(in_data.encode('utf-8'))
+                with temp_filename() as fout:
+                    mesonbuild.mesonlib.do_conf_file(fin, fout, confdata, 'meson')
+                    with open(fout, 'rb') as fobj:
+                        return fobj.read().decode('utf-8')
+
+        confdata = {'VAR': ('foo', 'bar')}
+        self.assertEqual(conf_file('@VAR@\n@VAR@\n', confdata), 'foo\nfoo\n')
+        self.assertEqual(conf_file('@VAR@\r\n@VAR@\r\n', confdata), 'foo\r\nfoo\r\n')
+
+    def test_do_conf_file_by_format(self):
+        def conf_str(in_data, confdata, vformat):
+            (result, missing_variables, confdata_useless) = mesonbuild.mesonlib.do_conf_str(in_data, confdata, variable_format = vformat)
+            return '\n'.join(result)
+
+        def check_formats(confdata, result):
+            self.assertEqual(conf_str(['#mesondefine VAR'], confdata, 'meson'), result)
+            self.assertEqual(conf_str(['#cmakedefine VAR ${VAR}'], confdata, 'cmake'), result)
+            self.assertEqual(conf_str(['#cmakedefine VAR @VAR@'], confdata, 'cmake@'), result)
+
+        confdata = ConfigurationData()
+        # Key error as they do not exists
+        check_formats(confdata, '/* #undef VAR */\n')
+
+        # Check boolean
+        confdata.values = {'VAR': (False, 'description')}
+        check_formats(confdata, '#undef VAR\n')
+        confdata.values = {'VAR': (True, 'description')}
+        check_formats(confdata, '#define VAR\n')
+
+        # Check string
+        confdata.values = {'VAR': ('value', 'description')}
+        check_formats(confdata, '#define VAR value\n')
+
+        # Check integer
+        confdata.values = {'VAR': (10, 'description')}
+        check_formats(confdata, '#define VAR 10\n')
+
+        # Check multiple string with cmake formats
+        confdata.values = {'VAR': ('value', 'description')}
+        self.assertEqual(conf_str(['#cmakedefine VAR xxx @VAR@ yyy @VAR@'], confdata, 'cmake@'), '#define VAR xxx value yyy value\n')
+        self.assertEqual(conf_str(['#define VAR xxx @VAR@ yyy @VAR@'], confdata, 'cmake@'), '#define VAR xxx value yyy value')
+        self.assertEqual(conf_str(['#cmakedefine VAR xxx ${VAR} yyy ${VAR}'], confdata, 'cmake'), '#define VAR xxx value yyy value\n')
+        self.assertEqual(conf_str(['#define VAR xxx ${VAR} yyy ${VAR}'], confdata, 'cmake'), '#define VAR xxx value yyy value')
+
+        # Handles meson format exceptions
+        #   Unknown format
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#mesondefine VAR xxx'], confdata, 'unknown_format')
+        #   More than 2 params in mesondefine
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#mesondefine VAR xxx'], confdata, 'meson')
+        #   Mismatched line with format
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#cmakedefine VAR'], confdata, 'meson')
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#mesondefine VAR'], confdata, 'cmake')
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#mesondefine VAR'], confdata, 'cmake@')
+        #   Dict value in confdata
+        confdata.values = {'VAR': (['value'], 'description')}
+        self.assertRaises(mesonbuild.mesonlib.MesonException, conf_str, ['#mesondefine VAR'], confdata, 'meson')
+
+    def test_absolute_prefix_libdir(self):
+        '''
+        Tests that setting absolute paths for --prefix and --libdir work. Can't
+        be an ordinary test because these are set via the command-line.
+        https://github.com/mesonbuild/meson/issues/1341
+        https://github.com/mesonbuild/meson/issues/1345
+        '''
+        testdir = os.path.join(self.common_test_dir, '90 default options')
+        # on Windows, /someabs is *not* an absolute path
+        prefix = 'x:/someabs' if is_windows() else '/someabs'
+        libdir = 'libdir'
+        extra_args = ['--prefix=' + prefix,
+                      # This can just be a relative path, but we want to test
+                      # that passing this as an absolute path also works
+                      '--libdir=' + prefix + '/' + libdir]
+        self.init(testdir, extra_args=extra_args, default_args=False)
+        opts = self.introspect('--buildoptions')
+        for opt in opts:
+            if opt['name'] == 'prefix':
+                self.assertEqual(prefix, opt['value'])
+            elif opt['name'] == 'libdir':
+                self.assertEqual(libdir, opt['value'])
+
+    def test_libdir_must_be_inside_prefix(self):
+        '''
+        Tests that libdir is forced to be inside prefix no matter how it is set.
+        Must be a unit test for obvious reasons.
+        '''
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        # libdir being inside prefix is ok
+        if is_windows():
+            args = ['--prefix', 'x:/opt', '--libdir', 'x:/opt/lib32']
+        else:
+            args = ['--prefix', '/opt', '--libdir', '/opt/lib32']
+        self.init(testdir, extra_args=args)
+        self.wipe()
+        # libdir not being inside prefix is not ok
+        if is_windows():
+            args = ['--prefix', 'x:/usr', '--libdir', 'x:/opt/lib32']
+        else:
+            args = ['--prefix', '/usr', '--libdir', '/opt/lib32']
+        self.assertRaises(subprocess.CalledProcessError, self.init, testdir, extra_args=args)
+        self.wipe()
+        # libdir must be inside prefix even when set via mesonconf
+        self.init(testdir)
+        if is_windows():
+            self.assertRaises(subprocess.CalledProcessError, self.setconf, '-Dlibdir=x:/opt', False)
+        else:
+            self.assertRaises(subprocess.CalledProcessError, self.setconf, '-Dlibdir=/opt', False)
+
+    def test_prefix_dependent_defaults(self):
+        '''
+        Tests that configured directory paths are set to prefix dependent
+        defaults.
+        '''
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        expected = {
+            '/opt': {'prefix': '/opt',
+                     'bindir': 'bin', 'datadir': 'share', 'includedir': 'include',
+                     'infodir': 'share/info',
+                     'libexecdir': 'libexec', 'localedir': 'share/locale',
+                     'localstatedir': 'var', 'mandir': 'share/man',
+                     'sbindir': 'sbin', 'sharedstatedir': 'com',
+                     'sysconfdir': 'etc'},
+            '/usr': {'prefix': '/usr',
+                     'bindir': 'bin', 'datadir': 'share', 'includedir': 'include',
+                     'infodir': 'share/info',
+                     'libexecdir': 'libexec', 'localedir': 'share/locale',
+                     'localstatedir': '/var', 'mandir': 'share/man',
+                     'sbindir': 'sbin', 'sharedstatedir': '/var/lib',
+                     'sysconfdir': '/etc'},
+            '/usr/local': {'prefix': '/usr/local',
+                           'bindir': 'bin', 'datadir': 'share',
+                           'includedir': 'include', 'infodir': 'share/info',
+                           'libexecdir': 'libexec',
+                           'localedir': 'share/locale',
+                           'localstatedir': '/var/local', 'mandir': 'share/man',
+                           'sbindir': 'sbin', 'sharedstatedir': '/var/local/lib',
+                           'sysconfdir': 'etc'},
+            # N.B. We don't check 'libdir' as it's platform dependent, see
+            # default_libdir():
+        }
+
+        if mesonbuild.mesonlib.default_prefix() == '/usr/local':
+            expected[None] = expected['/usr/local']
+
+        for prefix in expected:
+            args = []
+            if prefix:
+                args += ['--prefix', prefix]
+            self.init(testdir, extra_args=args, default_args=False)
+            opts = self.introspect('--buildoptions')
+            for opt in opts:
+                name = opt['name']
+                value = opt['value']
+                if name in expected[prefix]:
+                    self.assertEqual(value, expected[prefix][name])
+            self.wipe()
+
+    def test_default_options_prefix_dependent_defaults(self):
+        '''
+        Tests that setting a prefix in default_options in project() sets prefix
+        dependent defaults for other options, and that those defaults can
+        be overridden in default_options or by the command line.
+        '''
+        testdir = os.path.join(self.common_test_dir, '168 default options prefix dependent defaults')
+        expected = {
+            '':
+            {'prefix':         '/usr',
+             'sysconfdir':     '/etc',
+             'localstatedir':  '/var',
+             'sharedstatedir': '/sharedstate'},
+            '--prefix=/usr':
+            {'prefix':         '/usr',
+             'sysconfdir':     '/etc',
+             'localstatedir':  '/var',
+             'sharedstatedir': '/sharedstate'},
+            '--sharedstatedir=/var/state':
+            {'prefix':         '/usr',
+             'sysconfdir':     '/etc',
+             'localstatedir':  '/var',
+             'sharedstatedir': '/var/state'},
+            '--sharedstatedir=/var/state --prefix=/usr --sysconfdir=sysconf':
+            {'prefix':         '/usr',
+             'sysconfdir':     'sysconf',
+             'localstatedir':  '/var',
+             'sharedstatedir': '/var/state'},
+        }
+        for args in expected:
+            self.init(testdir, extra_args=args.split(), default_args=False)
+            opts = self.introspect('--buildoptions')
+            for opt in opts:
+                name = opt['name']
+                value = opt['value']
+                if name in expected[args]:
+                    self.assertEqual(value, expected[args][name])
+            self.wipe()
+
+    def test_clike_get_library_dirs(self):
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        for d in cc.get_library_dirs(env):
+            self.assertTrue(os.path.exists(d))
+            self.assertTrue(os.path.isdir(d))
+            self.assertTrue(os.path.isabs(d))
+
+    def test_static_library_overwrite(self):
+        '''
+        Tests that static libraries are never appended to, always overwritten.
+        Has to be a unit test because this involves building a project,
+        reconfiguring, and building it again so that `ar` is run twice on the
+        same static library.
+        https://github.com/mesonbuild/meson/issues/1355
+        '''
+        testdir = os.path.join(self.common_test_dir, '3 static')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        static_linker = env.detect_static_linker(cc)
+        if is_windows():
+            raise unittest.SkipTest('https://github.com/mesonbuild/meson/issues/1526')
+        if not isinstance(static_linker, mesonbuild.linkers.ArLinker):
+            raise unittest.SkipTest('static linker is not `ar`')
+        # Configure
+        self.init(testdir)
+        # Get name of static library
+        targets = self.introspect('--targets')
+        self.assertEqual(len(targets), 1)
+        libname = targets[0]['filename'][0]
+        # Build and get contents of static library
+        self.build()
+        before = self._run(['ar', 't', os.path.join(self.builddir, libname)]).split()
+        # Filter out non-object-file contents
+        before = [f for f in before if f.endswith(('.o', '.obj'))]
+        # Static library should contain only one object
+        self.assertEqual(len(before), 1, msg=before)
+        # Change the source to be built into the static library
+        self.setconf('-Dsource=libfile2.c')
+        self.build()
+        after = self._run(['ar', 't', os.path.join(self.builddir, libname)]).split()
+        # Filter out non-object-file contents
+        after = [f for f in after if f.endswith(('.o', '.obj'))]
+        # Static library should contain only one object
+        self.assertEqual(len(after), 1, msg=after)
+        # and the object must have changed
+        self.assertNotEqual(before, after)
+
+    def test_static_compile_order(self):
+        '''
+        Test that the order of files in a compiler command-line while compiling
+        and linking statically is deterministic. This can't be an ordinary test
+        case because we need to inspect the compiler database.
+        https://github.com/mesonbuild/meson/pull/951
+        '''
+        testdir = os.path.join(self.common_test_dir, '5 linkstatic')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        # Rules will get written out in this order
+        self.assertTrue(compdb[0]['file'].endswith("libfile.c"))
+        self.assertTrue(compdb[1]['file'].endswith("libfile2.c"))
+        self.assertTrue(compdb[2]['file'].endswith("libfile3.c"))
+        self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
+        # FIXME: We don't have access to the linker command
+
+    def test_run_target_files_path(self):
+        '''
+        Test that run_targets are run from the correct directory
+        https://github.com/mesonbuild/meson/issues/957
+        '''
+        testdir = os.path.join(self.common_test_dir, '54 run target')
+        self.init(testdir)
+        self.run_target('check_exists')
+
+    def test_install_introspection(self):
+        '''
+        Tests that the Meson introspection API exposes install filenames correctly
+        https://github.com/mesonbuild/meson/issues/829
+        '''
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('{!r} backend can\'t install files'.format(self.backend.name))
+        testdir = os.path.join(self.common_test_dir, '8 install')
+        self.init(testdir)
+        intro = self.introspect('--targets')
+        if intro[0]['type'] == 'executable':
+            intro = intro[::-1]
+        self.assertPathListEqual(intro[0]['install_filename'], ['/usr/lib/libstat.a'])
+        self.assertPathListEqual(intro[1]['install_filename'], ['/usr/bin/prog' + exe_suffix])
+
+    def test_install_subdir_introspection(self):
+        '''
+        Test that the Meson introspection API also contains subdir install information
+        https://github.com/mesonbuild/meson/issues/5556
+        '''
+        testdir = os.path.join(self.common_test_dir, '62 install subdir')
+        self.init(testdir)
+        intro = self.introspect('--installed')
+        expected = {
+            'sub2': 'share/sub2',
+            'subdir/sub1': 'share/sub1',
+            'subdir/sub_elided': 'share',
+            'sub1': 'share/sub1',
+            'sub/sub1': 'share/sub1',
+            'sub_elided': 'share',
+            'nested_elided/sub': 'share',
+        }
+
+        self.assertEqual(len(intro), len(expected))
+
+        # Convert expected to PurePath
+        expected_converted = {PurePath(os.path.join(testdir, key)): PurePath(os.path.join(self.prefix, val)) for key, val in expected.items()}
+        intro_converted = {PurePath(key): PurePath(val) for key, val in intro.items()}
+
+        for src, dst in expected_converted.items():
+            self.assertIn(src, intro_converted)
+            self.assertEqual(dst, intro_converted[src])
+
+    def test_install_introspection_multiple_outputs(self):
+        '''
+        Tests that the Meson introspection API exposes multiple install filenames correctly without crashing
+        https://github.com/mesonbuild/meson/pull/4555
+
+        Reverted to the first file only because of https://github.com/mesonbuild/meson/pull/4547#discussion_r244173438
+        TODO Change the format to a list officially in a followup PR
+        '''
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('{!r} backend can\'t install files'.format(self.backend.name))
+        testdir = os.path.join(self.common_test_dir, '144 custom target multiple outputs')
+        self.init(testdir)
+        intro = self.introspect('--targets')
+        if intro[0]['type'] == 'executable':
+            intro = intro[::-1]
+        self.assertPathListEqual(intro[0]['install_filename'], ['/usr/include/diff.h', '/usr/bin/diff.sh'])
+        self.assertPathListEqual(intro[1]['install_filename'], ['/opt/same.h', '/opt/same.sh'])
+        self.assertPathListEqual(intro[2]['install_filename'], ['/usr/include/first.h', None])
+        self.assertPathListEqual(intro[3]['install_filename'], [None, '/usr/bin/second.sh'])
+
+    def test_install_log_content(self):
+        '''
+        Tests that the install-log.txt is consistent with the installed files and directories.
+        Specifically checks that the log file only contains one entry per file/directory.
+        https://github.com/mesonbuild/meson/issues/4499
+        '''
+        testdir = os.path.join(self.common_test_dir, '62 install subdir')
+        self.init(testdir)
+        self.install()
+        installpath = Path(self.installdir)
+        # Find installed files and directories
+        expected = {installpath: 0}
+        for name in installpath.rglob('*'):
+            expected[name] = 0
+        # Find logged files and directories
+        with Path(self.builddir, 'meson-logs', 'install-log.txt').open() as f:
+            logged = list(map(lambda l: Path(l.strip()),
+                              filter(lambda l: not l.startswith('#'),
+                                     f.readlines())))
+        for name in logged:
+            self.assertTrue(name in expected, 'Log contains extra entry {}'.format(name))
+            expected[name] += 1
+
+        for name, count in expected.items():
+            self.assertGreater(count, 0, 'Log is missing entry for {}'.format(name))
+            self.assertLess(count, 2, 'Log has multiple entries for {}'.format(name))
+
+    def test_uninstall(self):
+        exename = os.path.join(self.installdir, 'usr/bin/prog' + exe_suffix)
+        testdir = os.path.join(self.common_test_dir, '8 install')
+        self.init(testdir)
+        self.assertPathDoesNotExist(exename)
+        self.install()
+        self.assertPathExists(exename)
+        self.uninstall()
+        self.assertPathDoesNotExist(exename)
+
+    def test_forcefallback(self):
+        testdir = os.path.join(self.unit_test_dir, '31 forcefallback')
+        self.init(testdir, extra_args=['--wrap-mode=forcefallback'])
+        self.build()
+        self.run_tests()
+
+    def test_force_fallback_for(self):
+        testdir = os.path.join(self.unit_test_dir, '31 forcefallback')
+        self.init(testdir, extra_args=['--force-fallback-for=zlib,foo'])
+        self.build()
+        self.run_tests()
+
+    def test_env_ops_dont_stack(self):
+        '''
+        Test that env ops prepend/append do not stack, and that this usage issues a warning
+        '''
+        testdir = os.path.join(self.unit_test_dir, '63 test env does not stack')
+        out = self.init(testdir)
+        self.assertRegex(out, r'WARNING: Overriding.*TEST_VAR_APPEND')
+        self.assertRegex(out, r'WARNING: Overriding.*TEST_VAR_PREPEND')
+        self.assertNotRegex(out, r'WARNING: Overriding.*TEST_VAR_SET')
+        self.run_tests()
+
+    def test_testsetups(self):
+        if not shutil.which('valgrind'):
+            raise unittest.SkipTest('Valgrind not installed.')
+        testdir = os.path.join(self.unit_test_dir, '2 testsetups')
+        self.init(testdir)
+        self.build()
+        # Run tests without setup
+        self.run_tests()
+        with open(os.path.join(self.logdir, 'testlog.txt')) as f:
+            basic_log = f.read()
+        # Run buggy test with setup that has env that will make it fail
+        self.assertRaises(subprocess.CalledProcessError,
+                          self._run, self.mtest_command + ['--setup=valgrind'])
+        with open(os.path.join(self.logdir, 'testlog-valgrind.txt')) as f:
+            vg_log = f.read()
+        self.assertFalse('TEST_ENV is set' in basic_log)
+        self.assertFalse('Memcheck' in basic_log)
+        self.assertTrue('TEST_ENV is set' in vg_log)
+        self.assertTrue('Memcheck' in vg_log)
+        # Run buggy test with setup without env that will pass
+        self._run(self.mtest_command + ['--setup=wrapper'])
+        # Setup with no properties works
+        self._run(self.mtest_command + ['--setup=empty'])
+        # Setup with only env works
+        self._run(self.mtest_command + ['--setup=onlyenv'])
+        self._run(self.mtest_command + ['--setup=onlyenv2'])
+        self._run(self.mtest_command + ['--setup=onlyenv3'])
+        # Setup with only a timeout works
+        self._run(self.mtest_command + ['--setup=timeout'])
+
+    def test_testsetup_selection(self):
+        testdir = os.path.join(self.unit_test_dir, '14 testsetup selection')
+        self.init(testdir)
+        self.build()
+
+        # Run tests without setup
+        self.run_tests()
+
+        self.assertRaises(subprocess.CalledProcessError, self._run, self.mtest_command + ['--setup=missingfromfoo'])
+        self._run(self.mtest_command + ['--setup=missingfromfoo', '--no-suite=foo:'])
+
+        self._run(self.mtest_command + ['--setup=worksforall'])
+        self._run(self.mtest_command + ['--setup=main:worksforall'])
+
+        self.assertRaises(subprocess.CalledProcessError, self._run,
+                          self.mtest_command + ['--setup=onlyinbar'])
+        self.assertRaises(subprocess.CalledProcessError, self._run,
+                          self.mtest_command + ['--setup=onlyinbar', '--no-suite=main:'])
+        self._run(self.mtest_command + ['--setup=onlyinbar', '--no-suite=main:', '--no-suite=foo:'])
+        self._run(self.mtest_command + ['--setup=bar:onlyinbar'])
+        self.assertRaises(subprocess.CalledProcessError, self._run,
+                          self.mtest_command + ['--setup=foo:onlyinbar'])
+        self.assertRaises(subprocess.CalledProcessError, self._run,
+                          self.mtest_command + ['--setup=main:onlyinbar'])
+
+    def test_testsetup_default(self):
+        testdir = os.path.join(self.unit_test_dir, '49 testsetup default')
+        self.init(testdir)
+        self.build()
+
+        # Run tests without --setup will cause the default setup to be used
+        self.run_tests()
+        with open(os.path.join(self.logdir, 'testlog.txt')) as f:
+            default_log = f.read()
+
+        # Run tests with explicitly using the same setup that is set as default
+        self._run(self.mtest_command + ['--setup=mydefault'])
+        with open(os.path.join(self.logdir, 'testlog-mydefault.txt')) as f:
+            mydefault_log = f.read()
+
+        # Run tests with another setup
+        self._run(self.mtest_command + ['--setup=other'])
+        with open(os.path.join(self.logdir, 'testlog-other.txt')) as f:
+            other_log = f.read()
+
+        self.assertTrue('ENV_A is 1' in default_log)
+        self.assertTrue('ENV_B is 2' in default_log)
+        self.assertTrue('ENV_C is 2' in default_log)
+
+        self.assertTrue('ENV_A is 1' in mydefault_log)
+        self.assertTrue('ENV_B is 2' in mydefault_log)
+        self.assertTrue('ENV_C is 2' in mydefault_log)
+
+        self.assertTrue('ENV_A is 1' in other_log)
+        self.assertTrue('ENV_B is 3' in other_log)
+        self.assertTrue('ENV_C is 2' in other_log)
+
+    def assertFailedTestCount(self, failure_count, command):
+        try:
+            self._run(command)
+            self.assertEqual(0, failure_count, 'Expected %d tests to fail.' % failure_count)
+        except subprocess.CalledProcessError as e:
+            self.assertEqual(e.returncode, failure_count)
+
+    def test_suite_selection(self):
+        testdir = os.path.join(self.unit_test_dir, '4 suite selection')
+        self.init(testdir)
+        self.build()
+
+        self.assertFailedTestCount(4, self.mtest_command)
+
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', ':success'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--suite', ':fail'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', ':success'])
+        self.assertFailedTestCount(1, self.mtest_command + ['--no-suite', ':fail'])
+
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'mainprj'])
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'subprjsucc'])
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'subprjfail'])
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'subprjmix'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'mainprj'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'subprjsucc'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'subprjfail'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'subprjmix'])
+
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'mainprj:fail'])
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'mainprj:success'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'mainprj:fail'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'mainprj:success'])
+
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'subprjfail:fail'])
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'subprjfail:success'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'subprjfail:fail'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'subprjfail:success'])
+
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'subprjsucc:fail'])
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'subprjsucc:success'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'subprjsucc:fail'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'subprjsucc:success'])
+
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'subprjmix:fail'])
+        self.assertFailedTestCount(0, self.mtest_command + ['--suite', 'subprjmix:success'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--no-suite', 'subprjmix:fail'])
+        self.assertFailedTestCount(4, self.mtest_command + ['--no-suite', 'subprjmix:success'])
+
+        self.assertFailedTestCount(2, self.mtest_command + ['--suite', 'subprjfail', '--suite', 'subprjmix:fail'])
+        self.assertFailedTestCount(3, self.mtest_command + ['--suite', 'subprjfail', '--suite', 'subprjmix', '--suite', 'mainprj'])
+        self.assertFailedTestCount(2, self.mtest_command + ['--suite', 'subprjfail', '--suite', 'subprjmix', '--suite', 'mainprj', '--no-suite', 'subprjmix:fail'])
+        self.assertFailedTestCount(1, self.mtest_command + ['--suite', 'subprjfail', '--suite', 'subprjmix', '--suite', 'mainprj', '--no-suite', 'subprjmix:fail', 'mainprj-failing_test'])
+
+        self.assertFailedTestCount(2, self.mtest_command + ['--no-suite', 'subprjfail:fail', '--no-suite', 'subprjmix:fail'])
+
+    def test_build_by_default(self):
+        testdir = os.path.join(self.common_test_dir, '133 build by default')
+        self.init(testdir)
+        self.build()
+        genfile1 = os.path.join(self.builddir, 'generated1.dat')
+        genfile2 = os.path.join(self.builddir, 'generated2.dat')
+        exe1 = os.path.join(self.builddir, 'fooprog' + exe_suffix)
+        exe2 = os.path.join(self.builddir, 'barprog' + exe_suffix)
+        self.assertPathExists(genfile1)
+        self.assertPathExists(genfile2)
+        self.assertPathDoesNotExist(exe1)
+        self.assertPathDoesNotExist(exe2)
+        self.build(target=('fooprog' + exe_suffix))
+        self.assertPathExists(exe1)
+        self.build(target=('barprog' + exe_suffix))
+        self.assertPathExists(exe2)
+
+    def test_internal_include_order(self):
+        if mesonbuild.environment.detect_msys2_arch() and ('MESON_RSP_THRESHOLD' in os.environ):
+            raise unittest.SkipTest('Test does not yet support gcc rsp files on msys2')
+
+        testdir = os.path.join(self.common_test_dir, '134 include order')
+        self.init(testdir)
+        execmd = fxecmd = None
+        for cmd in self.get_compdb():
+            if 'someexe' in cmd['command']:
+                execmd = cmd['command']
+                continue
+            if 'somefxe' in cmd['command']:
+                fxecmd = cmd['command']
+                continue
+        if not execmd or not fxecmd:
+            raise Exception('Could not find someexe and somfxe commands')
+        # Check include order for 'someexe'
+        incs = [a for a in split_args(execmd) if a.startswith("-I")]
+        self.assertEqual(len(incs), 9)
+        # Need to run the build so the private dir is created.
+        self.build()
+        pdirs = glob(os.path.join(self.builddir, 'sub4/someexe*.p'))
+        self.assertEqual(len(pdirs), 1)
+        privdir = pdirs[0][len(self.builddir)+1:]
+        self.assertPathEqual(incs[0], "-I" + privdir)
+        # target build subdir
+        self.assertPathEqual(incs[1], "-Isub4")
+        # target source subdir
+        self.assertPathBasenameEqual(incs[2], 'sub4')
+        # include paths added via per-target c_args: ['-I'...]
+        self.assertPathBasenameEqual(incs[3], 'sub3')
+        # target include_directories: build dir
+        self.assertPathEqual(incs[4], "-Isub2")
+        # target include_directories: source dir
+        self.assertPathBasenameEqual(incs[5], 'sub2')
+        # target internal dependency include_directories: build dir
+        self.assertPathEqual(incs[6], "-Isub1")
+        # target internal dependency include_directories: source dir
+        self.assertPathBasenameEqual(incs[7], 'sub1')
+        # custom target include dir
+        self.assertPathEqual(incs[8], '-Ictsub')
+        # Check include order for 'somefxe'
+        incs = [a for a in split_args(fxecmd) if a.startswith('-I')]
+        self.assertEqual(len(incs), 9)
+        # target private dir
+        pdirs = glob(os.path.join(self.builddir, 'somefxe*.p'))
+        self.assertEqual(len(pdirs), 1)
+        privdir = pdirs[0][len(self.builddir)+1:]
+        self.assertPathEqual(incs[0], '-I' + privdir)
+        # target build dir
+        self.assertPathEqual(incs[1], '-I.')
+        # target source dir
+        self.assertPathBasenameEqual(incs[2], os.path.basename(testdir))
+        # target internal dependency correct include_directories: build dir
+        self.assertPathEqual(incs[3], "-Isub4")
+        # target internal dependency correct include_directories: source dir
+        self.assertPathBasenameEqual(incs[4], 'sub4')
+        # target internal dependency dep include_directories: build dir
+        self.assertPathEqual(incs[5], "-Isub1")
+        # target internal dependency dep include_directories: source dir
+        self.assertPathBasenameEqual(incs[6], 'sub1')
+        # target internal dependency wrong include_directories: build dir
+        self.assertPathEqual(incs[7], "-Isub2")
+        # target internal dependency wrong include_directories: source dir
+        self.assertPathBasenameEqual(incs[8], 'sub2')
+
+    def test_compiler_detection(self):
+        '''
+        Test that automatic compiler detection and setting from the environment
+        both work just fine. This is needed because while running project tests
+        and other unit tests, we always read CC/CXX/etc from the environment.
+        '''
+        gnu = mesonbuild.compilers.GnuCompiler
+        clang = mesonbuild.compilers.ClangCompiler
+        intel = mesonbuild.compilers.IntelGnuLikeCompiler
+        msvc = (mesonbuild.compilers.VisualStudioCCompiler, mesonbuild.compilers.VisualStudioCPPCompiler)
+        clangcl = (mesonbuild.compilers.ClangClCCompiler, mesonbuild.compilers.ClangClCPPCompiler)
+        ar = mesonbuild.linkers.ArLinker
+        lib = mesonbuild.linkers.VisualStudioLinker
+        langs = [('c', 'CC'), ('cpp', 'CXX')]
+        if not is_windows() and platform.machine().lower() != 'e2k':
+            langs += [('objc', 'OBJC'), ('objcpp', 'OBJCXX')]
+        testdir = os.path.join(self.unit_test_dir, '5 compiler detection')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        for lang, evar in langs:
+            # Detect with evar and do sanity checks on that
+            if evar in os.environ:
+                ecc = getattr(env, 'detect_{}_compiler'.format(lang))(MachineChoice.HOST)
+                self.assertTrue(ecc.version)
+                elinker = env.detect_static_linker(ecc)
+                # Pop it so we don't use it for the next detection
+                evalue = os.environ.pop(evar)
+                # Very rough/strict heuristics. Would never work for actual
+                # compiler detection, but should be ok for the tests.
+                ebase = os.path.basename(evalue)
+                if ebase.startswith('g') or ebase.endswith(('-gcc', '-g++')):
+                    self.assertIsInstance(ecc, gnu)
+                    self.assertIsInstance(elinker, ar)
+                elif 'clang-cl' in ebase:
+                    self.assertIsInstance(ecc, clangcl)
+                    self.assertIsInstance(elinker, lib)
+                elif 'clang' in ebase:
+                    self.assertIsInstance(ecc, clang)
+                    self.assertIsInstance(elinker, ar)
+                elif ebase.startswith('ic'):
+                    self.assertIsInstance(ecc, intel)
+                    self.assertIsInstance(elinker, ar)
+                elif ebase.startswith('cl'):
+                    self.assertIsInstance(ecc, msvc)
+                    self.assertIsInstance(elinker, lib)
+                else:
+                    raise AssertionError('Unknown compiler {!r}'.format(evalue))
+                # Check that we actually used the evalue correctly as the compiler
+                self.assertEqual(ecc.get_exelist(), split_args(evalue))
+            # Do auto-detection of compiler based on platform, PATH, etc.
+            cc = getattr(env, 'detect_{}_compiler'.format(lang))(MachineChoice.HOST)
+            self.assertTrue(cc.version)
+            linker = env.detect_static_linker(cc)
+            # Check compiler type
+            if isinstance(cc, gnu):
+                self.assertIsInstance(linker, ar)
+                if is_osx():
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.AppleDynamicLinker)
+                elif is_sunos():
+                    self.assertIsInstance(cc.linker, (mesonbuild.linkers.SolarisDynamicLinker, mesonbuild.linkers.GnuLikeDynamicLinkerMixin))
+                else:
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.GnuLikeDynamicLinkerMixin)
+            if isinstance(cc, clangcl):
+                self.assertIsInstance(linker, lib)
+                self.assertIsInstance(cc.linker, mesonbuild.linkers.ClangClDynamicLinker)
+            if isinstance(cc, clang):
+                self.assertIsInstance(linker, ar)
+                if is_osx():
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.AppleDynamicLinker)
+                elif is_windows():
+                    # This is clang, not clang-cl
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.MSVCDynamicLinker)
+                else:
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.GnuLikeDynamicLinkerMixin)
+            if isinstance(cc, intel):
+                self.assertIsInstance(linker, ar)
+                if is_osx():
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.AppleDynamicLinker)
+                elif is_windows():
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.XilinkDynamicLinker)
+                else:
+                    self.assertIsInstance(cc.linker, mesonbuild.linkers.GnuDynamicLinker)
+            if isinstance(cc, msvc):
+                self.assertTrue(is_windows())
+                self.assertIsInstance(linker, lib)
+                self.assertEqual(cc.id, 'msvc')
+                self.assertTrue(hasattr(cc, 'is_64'))
+                self.assertIsInstance(cc.linker, mesonbuild.linkers.MSVCDynamicLinker)
+                # If we're on Windows CI, we know what the compiler will be
+                if 'arch' in os.environ:
+                    if os.environ['arch'] == 'x64':
+                        self.assertTrue(cc.is_64)
+                    else:
+                        self.assertFalse(cc.is_64)
+            # Set evar ourselves to a wrapper script that just calls the same
+            # exelist + some argument. This is meant to test that setting
+            # something like `ccache gcc -pipe` or `distcc ccache gcc` works.
+            wrapper = os.path.join(testdir, 'compiler wrapper.py')
+            wrappercc = python_command + [wrapper] + cc.get_exelist() + ['-DSOME_ARG']
+            wrappercc_s = ''
+            for w in wrappercc:
+                wrappercc_s += quote_arg(w) + ' '
+            os.environ[evar] = wrappercc_s
+            wcc = getattr(env, 'detect_{}_compiler'.format(lang))(MachineChoice.HOST)
+            # Check static linker too
+            wrapperlinker = python_command + [wrapper] + linker.get_exelist() + linker.get_always_args()
+            wrapperlinker_s = ''
+            for w in wrapperlinker:
+                wrapperlinker_s += quote_arg(w) + ' '
+            os.environ['AR'] = wrapperlinker_s
+            wlinker = env.detect_static_linker(wcc)
+            # Pop it so we don't use it for the next detection
+            evalue = os.environ.pop('AR')
+            # Must be the same type since it's a wrapper around the same exelist
+            self.assertIs(type(cc), type(wcc))
+            self.assertIs(type(linker), type(wlinker))
+            # Ensure that the exelist is correct
+            self.assertEqual(wcc.get_exelist(), wrappercc)
+            self.assertEqual(wlinker.get_exelist(), wrapperlinker)
+            # Ensure that the version detection worked correctly
+            self.assertEqual(cc.version, wcc.version)
+            if hasattr(cc, 'is_64'):
+                self.assertEqual(cc.is_64, wcc.is_64)
+
+    def test_always_prefer_c_compiler_for_asm(self):
+        testdir = os.path.join(self.common_test_dir, '137 c cpp and asm')
+        # Skip if building with MSVC
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        if env.detect_c_compiler(MachineChoice.HOST).get_id() == 'msvc':
+            raise unittest.SkipTest('MSVC can\'t compile assembly')
+        self.init(testdir)
+        commands = {'c-asm': {}, 'cpp-asm': {}, 'cpp-c-asm': {}, 'c-cpp-asm': {}}
+        for cmd in self.get_compdb():
+            # Get compiler
+            split = split_args(cmd['command'])
+            if split[0] == 'ccache':
+                compiler = split[1]
+            else:
+                compiler = split[0]
+            # Classify commands
+            if 'Ic-asm' in cmd['command']:
+                if cmd['file'].endswith('.S'):
+                    commands['c-asm']['asm'] = compiler
+                elif cmd['file'].endswith('.c'):
+                    commands['c-asm']['c'] = compiler
+                else:
+                    raise AssertionError('{!r} found in cpp-asm?'.format(cmd['command']))
+            elif 'Icpp-asm' in cmd['command']:
+                if cmd['file'].endswith('.S'):
+                    commands['cpp-asm']['asm'] = compiler
+                elif cmd['file'].endswith('.cpp'):
+                    commands['cpp-asm']['cpp'] = compiler
+                else:
+                    raise AssertionError('{!r} found in cpp-asm?'.format(cmd['command']))
+            elif 'Ic-cpp-asm' in cmd['command']:
+                if cmd['file'].endswith('.S'):
+                    commands['c-cpp-asm']['asm'] = compiler
+                elif cmd['file'].endswith('.c'):
+                    commands['c-cpp-asm']['c'] = compiler
+                elif cmd['file'].endswith('.cpp'):
+                    commands['c-cpp-asm']['cpp'] = compiler
+                else:
+                    raise AssertionError('{!r} found in c-cpp-asm?'.format(cmd['command']))
+            elif 'Icpp-c-asm' in cmd['command']:
+                if cmd['file'].endswith('.S'):
+                    commands['cpp-c-asm']['asm'] = compiler
+                elif cmd['file'].endswith('.c'):
+                    commands['cpp-c-asm']['c'] = compiler
+                elif cmd['file'].endswith('.cpp'):
+                    commands['cpp-c-asm']['cpp'] = compiler
+                else:
+                    raise AssertionError('{!r} found in cpp-c-asm?'.format(cmd['command']))
+            else:
+                raise AssertionError('Unknown command {!r} found'.format(cmd['command']))
+        # Check that .S files are always built with the C compiler
+        self.assertEqual(commands['c-asm']['asm'], commands['c-asm']['c'])
+        self.assertEqual(commands['c-asm']['asm'], commands['cpp-asm']['asm'])
+        self.assertEqual(commands['cpp-asm']['asm'], commands['c-cpp-asm']['c'])
+        self.assertEqual(commands['c-cpp-asm']['asm'], commands['c-cpp-asm']['c'])
+        self.assertEqual(commands['cpp-c-asm']['asm'], commands['cpp-c-asm']['c'])
+        self.assertNotEqual(commands['cpp-asm']['asm'], commands['cpp-asm']['cpp'])
+        self.assertNotEqual(commands['c-cpp-asm']['c'], commands['c-cpp-asm']['cpp'])
+        self.assertNotEqual(commands['cpp-c-asm']['c'], commands['cpp-c-asm']['cpp'])
+        # Check that the c-asm target is always linked with the C linker
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            contents = f.read()
+            m = re.search('build c-asm.*: c_LINKER', contents)
+        self.assertIsNotNone(m, msg=contents)
+
+    def test_preprocessor_checks_CPPFLAGS(self):
+        '''
+        Test that preprocessor compiler checks read CPPFLAGS and also CFLAGS but
+        not LDFLAGS.
+        '''
+        testdir = os.path.join(self.common_test_dir, '136 get define')
+        define = 'MESON_TEST_DEFINE_VALUE'
+        # NOTE: this list can't have \n, ' or "
+        # \n is never substituted by the GNU pre-processor via a -D define
+        # ' and " confuse split_args() even when they are escaped
+        # % and # confuse the MSVC preprocessor
+        # !, ^, *, and < confuse lcc preprocessor
+        value = 'spaces and fun@$&()-=_+{}[]:;>?,./~`'
+        for env_var in ['CPPFLAGS', 'CFLAGS']:
+            env = {}
+            env[env_var] = '-D{}="{}"'.format(define, value)
+            env['LDFLAGS'] = '-DMESON_FAIL_VALUE=cflags-read'.format(define)
+            self.init(testdir, extra_args=['-D{}={}'.format(define, value)], override_envvars=env)
+
+    def test_custom_target_exe_data_deterministic(self):
+        testdir = os.path.join(self.common_test_dir, '113 custom target capture')
+        self.init(testdir)
+        meson_exe_dat1 = glob(os.path.join(self.privatedir, 'meson_exe*.dat'))
+        self.wipe()
+        self.init(testdir)
+        meson_exe_dat2 = glob(os.path.join(self.privatedir, 'meson_exe*.dat'))
+        self.assertListEqual(meson_exe_dat1, meson_exe_dat2)
+
+    def test_noop_changes_cause_no_rebuilds(self):
+        '''
+        Test that no-op changes to the build files such as mtime do not cause
+        a rebuild of anything.
+        '''
+        testdir = os.path.join(self.common_test_dir, '6 linkshared')
+        self.init(testdir)
+        self.build()
+        # Immediately rebuilding should not do anything
+        self.assertBuildIsNoop()
+        # Changing mtime of meson.build should not rebuild anything
+        self.utime(os.path.join(testdir, 'meson.build'))
+        self.assertReconfiguredBuildIsNoop()
+        # Changing mtime of libefile.c should rebuild the library, but not relink the executable
+        self.utime(os.path.join(testdir, 'libfile.c'))
+        self.assertBuildRelinkedOnlyTarget('mylib')
+
+    def test_source_changes_cause_rebuild(self):
+        '''
+        Test that changes to sources and headers cause rebuilds, but not
+        changes to unused files (as determined by the dependency file) in the
+        input files list.
+        '''
+        testdir = os.path.join(self.common_test_dir, '20 header in file list')
+        self.init(testdir)
+        self.build()
+        # Immediately rebuilding should not do anything
+        self.assertBuildIsNoop()
+        # Changing mtime of header.h should rebuild everything
+        self.utime(os.path.join(testdir, 'header.h'))
+        self.assertBuildRelinkedOnlyTarget('prog')
+
+    def test_custom_target_changes_cause_rebuild(self):
+        '''
+        Test that in a custom target, changes to the input files, the
+        ExternalProgram, and any File objects on the command-line cause
+        a rebuild.
+        '''
+        testdir = os.path.join(self.common_test_dir, '60 custom header generator')
+        self.init(testdir)
+        self.build()
+        # Immediately rebuilding should not do anything
+        self.assertBuildIsNoop()
+        # Changing mtime of these should rebuild everything
+        for f in ('input.def', 'makeheader.py', 'somefile.txt'):
+            self.utime(os.path.join(testdir, f))
+            self.assertBuildRelinkedOnlyTarget('prog')
+
+    def test_source_generator_program_cause_rebuild(self):
+        '''
+        Test that changes to generator programs in the source tree cause
+        a rebuild.
+        '''
+        testdir = os.path.join(self.common_test_dir, '94 gen extra')
+        self.init(testdir)
+        self.build()
+        # Immediately rebuilding should not do anything
+        self.assertBuildIsNoop()
+        # Changing mtime of generator should rebuild the executable
+        self.utime(os.path.join(testdir, 'srcgen.py'))
+        self.assertRebuiltTarget('basic')
+
+    def test_static_library_lto(self):
+        '''
+        Test that static libraries can be built with LTO and linked to
+        executables. On Linux, this requires the use of gcc-ar.
+        https://github.com/mesonbuild/meson/issues/1646
+        '''
+        testdir = os.path.join(self.common_test_dir, '5 linkstatic')
+
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        if env.detect_c_compiler(MachineChoice.HOST).get_id() == 'clang' and is_windows():
+            raise unittest.SkipTest('LTO not (yet) supported by windows clang')
+
+        self.init(testdir, extra_args='-Db_lto=true')
+        self.build()
+        self.run_tests()
+
+    def test_dist_git(self):
+        if not shutil.which('git'):
+            raise unittest.SkipTest('Git not found')
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Dist is only supported with Ninja')
+
+        try:
+            self.dist_impl(_git_init)
+        except PermissionError:
+            # When run under Windows CI, something (virus scanner?)
+            # holds on to the git files so cleaning up the dir
+            # fails sometimes.
+            pass
+
+    def has_working_hg(self):
+        if not shutil.which('hg'):
+            return False
+        try:
+            # This check should not be necessary, but
+            # CI under macOS passes the above test even
+            # though Mercurial is not installed.
+            if subprocess.call(['hg', '--version'],
+                               stdout=subprocess.DEVNULL,
+                               stderr=subprocess.DEVNULL) != 0:
+                return False
+            return True
+        except FileNotFoundError:
+            return False
+
+
+    def test_dist_hg(self):
+        if not self.has_working_hg():
+            raise unittest.SkipTest('Mercurial not found or broken.')
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Dist is only supported with Ninja')
+
+        def hg_init(project_dir):
+            subprocess.check_call(['hg', 'init'], cwd=project_dir)
+            with open(os.path.join(project_dir, '.hg', 'hgrc'), 'w') as f:
+                print('[ui]', file=f)
+                print('username=Author Person ', file=f)
+            subprocess.check_call(['hg', 'add', 'meson.build', 'distexe.c'], cwd=project_dir)
+            subprocess.check_call(['hg', 'commit', '-m', 'I am a project'], cwd=project_dir)
+
+        try:
+            self.dist_impl(hg_init, include_subprojects=False)
+        except PermissionError:
+            # When run under Windows CI, something (virus scanner?)
+            # holds on to the hg files so cleaning up the dir
+            # fails sometimes.
+            pass
+
+    def test_dist_git_script(self):
+        if not shutil.which('git'):
+            raise unittest.SkipTest('Git not found')
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Dist is only supported with Ninja')
+
+        try:
+            with tempfile.TemporaryDirectory() as tmpdir:
+                project_dir = os.path.join(tmpdir, 'a')
+                shutil.copytree(os.path.join(self.unit_test_dir, '35 dist script'),
+                                project_dir)
+                _git_init(project_dir)
+                self.init(project_dir)
+                self.build('dist')
+        except PermissionError:
+            # When run under Windows CI, something (virus scanner?)
+            # holds on to the git files so cleaning up the dir
+            # fails sometimes.
+            pass
+
+    def create_dummy_subproject(self, project_dir, name):
+        path = os.path.join(project_dir, 'subprojects', name)
+        os.makedirs(path)
+        with open(os.path.join(path, 'meson.build'), 'w') as ofile:
+            ofile.write("project('{}')".format(name))
+        return path
+
+    def dist_impl(self, vcs_init, include_subprojects=True):
+        # Create this on the fly because having rogue .git directories inside
+        # the source tree leads to all kinds of trouble.
+        with tempfile.TemporaryDirectory() as project_dir:
+            with open(os.path.join(project_dir, 'meson.build'), 'w') as ofile:
+                ofile.write('''project('disttest', 'c', version : '1.4.3')
+e = executable('distexe', 'distexe.c')
+test('dist test', e)
+subproject('vcssub', required : false)
+subproject('tarballsub', required : false)
+''')
+            with open(os.path.join(project_dir, 'distexe.c'), 'w') as ofile:
+                ofile.write('''#include
+
+int main(int argc, char **argv) {
+    printf("I am a distribution test.\\n");
+    return 0;
+}
+''')
+            xz_distfile = os.path.join(self.distdir, 'disttest-1.4.3.tar.xz')
+            xz_checksumfile = xz_distfile + '.sha256sum'
+            zip_distfile = os.path.join(self.distdir, 'disttest-1.4.3.zip')
+            zip_checksumfile = zip_distfile + '.sha256sum'
+            vcs_init(project_dir)
+            if include_subprojects:
+                vcs_init(self.create_dummy_subproject(project_dir, 'vcssub'))
+                self.create_dummy_subproject(project_dir, 'tarballsub')
+                self.create_dummy_subproject(project_dir, 'unusedsub')
+            self.init(project_dir)
+            self.build('dist')
+            self.assertPathExists(xz_distfile)
+            self.assertPathExists(xz_checksumfile)
+            self.assertPathDoesNotExist(zip_distfile)
+            self.assertPathDoesNotExist(zip_checksumfile)
+            self._run(self.meson_command + ['dist', '--formats', 'zip'],
+                      workdir=self.builddir)
+            self.assertPathExists(zip_distfile)
+            self.assertPathExists(zip_checksumfile)
+
+            if include_subprojects:
+                z = zipfile.ZipFile(zip_distfile)
+                self.assertEqual(sorted(['disttest-1.4.3/',
+                                         'disttest-1.4.3/meson.build',
+                                         'disttest-1.4.3/distexe.c']),
+                                 sorted(z.namelist()))
+
+                self._run(self.meson_command + ['dist', '--formats', 'zip', '--include-subprojects'],
+                          workdir=self.builddir)
+                z = zipfile.ZipFile(zip_distfile)
+                self.assertEqual(sorted(['disttest-1.4.3/',
+                                         'disttest-1.4.3/subprojects/',
+                                         'disttest-1.4.3/meson.build',
+                                         'disttest-1.4.3/distexe.c',
+                                         'disttest-1.4.3/subprojects/tarballsub/',
+                                         'disttest-1.4.3/subprojects/vcssub/',
+                                         'disttest-1.4.3/subprojects/tarballsub/meson.build',
+                                         'disttest-1.4.3/subprojects/vcssub/meson.build']),
+                                 sorted(z.namelist()))
+
+    def test_rpath_uses_ORIGIN(self):
+        '''
+        Test that built targets use $ORIGIN in rpath, which ensures that they
+        are relocatable and ensures that builds are reproducible since the
+        build directory won't get embedded into the built binaries.
+        '''
+        if is_windows() or is_cygwin():
+            raise unittest.SkipTest('Windows PE/COFF binaries do not use RPATH')
+        testdir = os.path.join(self.common_test_dir, '42 library chain')
+        self.init(testdir)
+        self.build()
+        for each in ('prog', 'subdir/liblib1.so', ):
+            rpath = get_rpath(os.path.join(self.builddir, each))
+            self.assertTrue(rpath, 'Rpath could not be determined for {}.'.format(each))
+            if is_dragonflybsd():
+                # DragonflyBSD will prepend /usr/lib/gccVERSION to the rpath,
+                # so ignore that.
+                self.assertTrue(rpath.startswith('/usr/lib/gcc'))
+                rpaths = rpath.split(':')[1:]
+            else:
+                rpaths = rpath.split(':')
+            for path in rpaths:
+                self.assertTrue(path.startswith('$ORIGIN'), msg=(each, path))
+        # These two don't link to anything else, so they do not need an rpath entry.
+        for each in ('subdir/subdir2/liblib2.so', 'subdir/subdir3/liblib3.so'):
+            rpath = get_rpath(os.path.join(self.builddir, each))
+            if is_dragonflybsd():
+                # The rpath should be equal to /usr/lib/gccVERSION
+                self.assertTrue(rpath.startswith('/usr/lib/gcc'))
+                self.assertEqual(len(rpath.split(':')), 1)
+            else:
+                self.assertTrue(rpath is None)
+
+    def test_dash_d_dedup(self):
+        testdir = os.path.join(self.unit_test_dir, '9 d dedup')
+        self.init(testdir)
+        cmd = self.get_compdb()[0]['command']
+        self.assertTrue('-D FOO -D BAR' in cmd or
+                        '"-D" "FOO" "-D" "BAR"' in cmd or
+                        '/D FOO /D BAR' in cmd or
+                        '"/D" "FOO" "/D" "BAR"' in cmd)
+
+    def test_all_forbidden_targets_tested(self):
+        '''
+        Test that all forbidden targets are tested in the '154 reserved targets'
+        test. Needs to be a unit test because it accesses Meson internals.
+        '''
+        testdir = os.path.join(self.common_test_dir, '154 reserved targets')
+        targets = mesonbuild.coredata.forbidden_target_names
+        # We don't actually define a target with this name
+        targets.pop('build.ninja')
+        # Remove this to avoid multiple entries with the same name
+        # but different case.
+        targets.pop('PHONY')
+        for i in targets:
+            self.assertPathExists(os.path.join(testdir, i))
+
+    def detect_prebuild_env(self):
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        stlinker = env.detect_static_linker(cc)
+        if mesonbuild.mesonlib.is_windows():
+            object_suffix = 'obj'
+            shared_suffix = 'dll'
+        elif mesonbuild.mesonlib.is_cygwin():
+            object_suffix = 'o'
+            shared_suffix = 'dll'
+        elif mesonbuild.mesonlib.is_osx():
+            object_suffix = 'o'
+            shared_suffix = 'dylib'
+        else:
+            object_suffix = 'o'
+            shared_suffix = 'so'
+        return (cc, stlinker, object_suffix, shared_suffix)
+
+    def pbcompile(self, compiler, source, objectfile, extra_args=None):
+        cmd = compiler.get_exelist()
+        extra_args = extra_args or []
+        if compiler.get_argument_syntax() == 'msvc':
+            cmd += ['/nologo', '/Fo' + objectfile, '/c', source] + extra_args
+        else:
+            cmd += ['-c', source, '-o', objectfile] + extra_args
+        subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    def test_prebuilt_object(self):
+        (compiler, _, object_suffix, _) = self.detect_prebuild_env()
+        tdir = os.path.join(self.unit_test_dir, '15 prebuilt object')
+        source = os.path.join(tdir, 'source.c')
+        objectfile = os.path.join(tdir, 'prebuilt.' + object_suffix)
+        self.pbcompile(compiler, source, objectfile)
+        try:
+            self.init(tdir)
+            self.build()
+            self.run_tests()
+        finally:
+            os.unlink(objectfile)
+
+    def build_static_lib(self, compiler, linker, source, objectfile, outfile, extra_args=None):
+        if extra_args is None:
+            extra_args = []
+        if compiler.get_argument_syntax() == 'msvc':
+            link_cmd = ['lib', '/NOLOGO', '/OUT:' + outfile, objectfile]
+        else:
+            link_cmd = ['ar', 'csr', outfile, objectfile]
+        link_cmd = linker.get_exelist()
+        link_cmd += linker.get_always_args()
+        link_cmd += linker.get_std_link_args()
+        link_cmd += linker.get_output_args(outfile)
+        link_cmd += [objectfile]
+        self.pbcompile(compiler, source, objectfile, extra_args=extra_args)
+        try:
+            subprocess.check_call(link_cmd)
+        finally:
+            os.unlink(objectfile)
+
+    def test_prebuilt_static_lib(self):
+        (cc, stlinker, object_suffix, _) = self.detect_prebuild_env()
+        tdir = os.path.join(self.unit_test_dir, '16 prebuilt static')
+        source = os.path.join(tdir, 'libdir/best.c')
+        objectfile = os.path.join(tdir, 'libdir/best.' + object_suffix)
+        stlibfile = os.path.join(tdir, 'libdir/libbest.a')
+        self.build_static_lib(cc, stlinker, source, objectfile, stlibfile)
+        # Run the test
+        try:
+            self.init(tdir)
+            self.build()
+            self.run_tests()
+        finally:
+            os.unlink(stlibfile)
+
+    def build_shared_lib(self, compiler, source, objectfile, outfile, impfile, extra_args=None):
+        if extra_args is None:
+            extra_args = []
+        if compiler.get_argument_syntax() == 'msvc':
+            link_cmd = compiler.get_linker_exelist() + [
+                '/NOLOGO', '/DLL', '/DEBUG', '/IMPLIB:' + impfile,
+                '/OUT:' + outfile, objectfile]
+        else:
+            if not (compiler.info.is_windows() or compiler.info.is_cygwin() or compiler.info.is_darwin()):
+                extra_args += ['-fPIC']
+            link_cmd = compiler.get_exelist() + ['-shared', '-o', outfile, objectfile]
+            if not mesonbuild.mesonlib.is_osx():
+                link_cmd += ['-Wl,-soname=' + os.path.basename(outfile)]
+        self.pbcompile(compiler, source, objectfile, extra_args=extra_args)
+        try:
+            subprocess.check_call(link_cmd)
+        finally:
+            os.unlink(objectfile)
+
+    def test_prebuilt_shared_lib(self):
+        (cc, _, object_suffix, shared_suffix) = self.detect_prebuild_env()
+        tdir = os.path.join(self.unit_test_dir, '17 prebuilt shared')
+        source = os.path.join(tdir, 'alexandria.c')
+        objectfile = os.path.join(tdir, 'alexandria.' + object_suffix)
+        impfile = os.path.join(tdir, 'alexandria.lib')
+        if cc.get_argument_syntax() == 'msvc':
+            shlibfile = os.path.join(tdir, 'alexandria.' + shared_suffix)
+        elif is_cygwin():
+            shlibfile = os.path.join(tdir, 'cygalexandria.' + shared_suffix)
+        else:
+            shlibfile = os.path.join(tdir, 'libalexandria.' + shared_suffix)
+        self.build_shared_lib(cc, source, objectfile, shlibfile, impfile)
+        # Run the test
+        try:
+            self.init(tdir)
+            self.build()
+            self.run_tests()
+        finally:
+            os.unlink(shlibfile)
+            if mesonbuild.mesonlib.is_windows():
+                # Clean up all the garbage MSVC writes in the
+                # source tree.
+                for fname in glob(os.path.join(tdir, 'alexandria.*')):
+                    if os.path.splitext(fname)[1] not in ['.c', '.h']:
+                        os.unlink(fname)
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_static(self):
+        '''
+        Test that the we prefer static libraries when `static: true` is
+        passed to dependency() with pkg-config. Can't be an ordinary test
+        because we need to build libs and try to find them from meson.build
+
+        Also test that it's not a hard error to have unsatisfiable library deps
+        since system libraries -lm will never be found statically.
+        https://github.com/mesonbuild/meson/issues/2785
+        '''
+        (cc, stlinker, objext, shext) = self.detect_prebuild_env()
+        testdir = os.path.join(self.unit_test_dir, '18 pkgconfig static')
+        source = os.path.join(testdir, 'foo.c')
+        objectfile = os.path.join(testdir, 'foo.' + objext)
+        stlibfile = os.path.join(testdir, 'libfoo.a')
+        impfile = os.path.join(testdir, 'foo.lib')
+        if cc.get_argument_syntax() == 'msvc':
+            shlibfile = os.path.join(testdir, 'foo.' + shext)
+        elif is_cygwin():
+            shlibfile = os.path.join(testdir, 'cygfoo.' + shext)
+        else:
+            shlibfile = os.path.join(testdir, 'libfoo.' + shext)
+        # Build libs
+        self.build_static_lib(cc, stlinker, source, objectfile, stlibfile, extra_args=['-DFOO_STATIC'])
+        self.build_shared_lib(cc, source, objectfile, shlibfile, impfile)
+        # Run test
+        try:
+            self.init(testdir, override_envvars={'PKG_CONFIG_LIBDIR': self.builddir})
+            self.build()
+            self.run_tests()
+        finally:
+            os.unlink(stlibfile)
+            os.unlink(shlibfile)
+            if mesonbuild.mesonlib.is_windows():
+                # Clean up all the garbage MSVC writes in the
+                # source tree.
+                for fname in glob(os.path.join(testdir, 'foo.*')):
+                    if os.path.splitext(fname)[1] not in ['.c', '.h', '.in']:
+                        os.unlink(fname)
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_gen_escaping(self):
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen')
+        prefix = '/usr/with spaces'
+        libdir = 'lib'
+        self.init(testdir, extra_args=['--prefix=' + prefix,
+                                       '--libdir=' + libdir])
+        # Find foo dependency
+        os.environ['PKG_CONFIG_LIBDIR'] = self.privatedir
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        kwargs = {'required': True, 'silent': True}
+        foo_dep = PkgConfigDependency('libfoo', env, kwargs)
+        # Ensure link_args are properly quoted
+        libdir = PurePath(prefix) / PurePath(libdir)
+        link_args = ['-L' + libdir.as_posix(), '-lfoo']
+        self.assertEqual(foo_dep.get_link_args(), link_args)
+        # Ensure include args are properly quoted
+        incdir = PurePath(prefix) / PurePath('include')
+        cargs = ['-I' + incdir.as_posix(), '-DLIBFOO']
+        # pkg-config and pkgconf does not respect the same order
+        self.assertEqual(sorted(foo_dep.get_compile_args()), sorted(cargs))
+
+    def test_array_option_change(self):
+        def get_opt():
+            opts = self.introspect('--buildoptions')
+            for x in opts:
+                if x.get('name') == 'list':
+                    return x
+            raise Exception(opts)
+
+        expected = {
+            'name': 'list',
+            'description': 'list',
+            'section': 'user',
+            'type': 'array',
+            'value': ['foo', 'bar'],
+            'machine': 'any',
+        }
+        tdir = os.path.join(self.unit_test_dir, '19 array option')
+        self.init(tdir)
+        original = get_opt()
+        self.assertDictEqual(original, expected)
+
+        expected['value'] = ['oink', 'boink']
+        self.setconf('-Dlist=oink,boink')
+        changed = get_opt()
+        self.assertEqual(changed, expected)
+
+    def test_array_option_bad_change(self):
+        def get_opt():
+            opts = self.introspect('--buildoptions')
+            for x in opts:
+                if x.get('name') == 'list':
+                    return x
+            raise Exception(opts)
+
+        expected = {
+            'name': 'list',
+            'description': 'list',
+            'section': 'user',
+            'type': 'array',
+            'value': ['foo', 'bar'],
+            'machine': 'any',
+        }
+        tdir = os.path.join(self.unit_test_dir, '19 array option')
+        self.init(tdir)
+        original = get_opt()
+        self.assertDictEqual(original, expected)
+        with self.assertRaises(subprocess.CalledProcessError):
+            self.setconf('-Dlist=bad')
+        changed = get_opt()
+        self.assertDictEqual(changed, expected)
+
+    def test_array_option_empty_equivalents(self):
+        """Array options treat -Dopt=[] and -Dopt= as equivalent."""
+        def get_opt():
+            opts = self.introspect('--buildoptions')
+            for x in opts:
+                if x.get('name') == 'list':
+                    return x
+            raise Exception(opts)
+
+        expected = {
+            'name': 'list',
+            'description': 'list',
+            'section': 'user',
+            'type': 'array',
+            'value': [],
+            'machine': 'any',
+        }
+        tdir = os.path.join(self.unit_test_dir, '19 array option')
+        self.init(tdir, extra_args='-Dlist=')
+        original = get_opt()
+        self.assertDictEqual(original, expected)
+
+    def opt_has(self, name, value):
+        res = self.introspect('--buildoptions')
+        found = False
+        for i in res:
+            if i['name'] == name:
+                self.assertEqual(i['value'], value)
+                found = True
+                break
+        self.assertTrue(found, "Array option not found in introspect data.")
+
+    def test_free_stringarray_setting(self):
+        testdir = os.path.join(self.common_test_dir, '43 options')
+        self.init(testdir)
+        self.opt_has('free_array_opt', [])
+        self.setconf('-Dfree_array_opt=foo,bar', will_build=False)
+        self.opt_has('free_array_opt', ['foo', 'bar'])
+        self.setconf("-Dfree_array_opt=['a,b', 'c,d']", will_build=False)
+        self.opt_has('free_array_opt', ['a,b', 'c,d'])
+
+    def test_subproject_promotion(self):
+        testdir = os.path.join(self.unit_test_dir, '12 promote')
+        workdir = os.path.join(self.builddir, 'work')
+        shutil.copytree(testdir, workdir)
+        spdir = os.path.join(workdir, 'subprojects')
+        s3dir = os.path.join(spdir, 's3')
+        scommondir = os.path.join(spdir, 'scommon')
+        self.assertFalse(os.path.isdir(s3dir))
+        subprocess.check_call(self.wrap_command + ['promote', 's3'], cwd=workdir)
+        self.assertTrue(os.path.isdir(s3dir))
+        self.assertFalse(os.path.isdir(scommondir))
+        self.assertNotEqual(subprocess.call(self.wrap_command + ['promote', 'scommon'],
+                                            cwd=workdir,
+                                            stdout=subprocess.DEVNULL), 0)
+        self.assertNotEqual(subprocess.call(self.wrap_command + ['promote', 'invalid/path/to/scommon'],
+                                            cwd=workdir,
+                                            stderr=subprocess.DEVNULL), 0)
+        self.assertFalse(os.path.isdir(scommondir))
+        subprocess.check_call(self.wrap_command + ['promote', 'subprojects/s2/subprojects/scommon'], cwd=workdir)
+        self.assertTrue(os.path.isdir(scommondir))
+        promoted_wrap = os.path.join(spdir, 'athing.wrap')
+        self.assertFalse(os.path.isfile(promoted_wrap))
+        subprocess.check_call(self.wrap_command + ['promote', 'athing'], cwd=workdir)
+        self.assertTrue(os.path.isfile(promoted_wrap))
+        self.init(workdir)
+        self.build()
+
+    def test_subproject_promotion_wrap(self):
+        testdir = os.path.join(self.unit_test_dir, '44 promote wrap')
+        workdir = os.path.join(self.builddir, 'work')
+        shutil.copytree(testdir, workdir)
+        spdir = os.path.join(workdir, 'subprojects')
+
+        ambiguous_wrap = os.path.join(spdir, 'ambiguous.wrap')
+        self.assertNotEqual(subprocess.call(self.wrap_command + ['promote', 'ambiguous'],
+                                            cwd=workdir,
+                                            stdout=subprocess.DEVNULL), 0)
+        self.assertFalse(os.path.isfile(ambiguous_wrap))
+        subprocess.check_call(self.wrap_command + ['promote', 'subprojects/s2/subprojects/ambiguous.wrap'], cwd=workdir)
+        self.assertTrue(os.path.isfile(ambiguous_wrap))
+
+    def test_warning_location(self):
+        tdir = os.path.join(self.unit_test_dir, '22 warning location')
+        out = self.init(tdir)
+        for expected in [
+            r'meson.build:4: WARNING: Keyword argument "link_with" defined multiple times.',
+            r'sub' + os.path.sep + r'meson.build:3: WARNING: Keyword argument "link_with" defined multiple times.',
+            r'meson.build:6: WARNING: a warning of some sort',
+            r'sub' + os.path.sep + r'meson.build:4: WARNING: subdir warning',
+            r'meson.build:7: WARNING: Module unstable-simd has no backwards or forwards compatibility and might not exist in future releases.',
+            r"meson.build:11: WARNING: The variable(s) 'MISSING' in the input file 'conf.in' are not present in the given configuration data.",
+            r'meson.build:1: WARNING: Passed invalid keyword argument "invalid".',
+        ]:
+            self.assertRegex(out, re.escape(expected))
+
+        for wd in [
+            self.src_root,
+            self.builddir,
+            os.getcwd(),
+        ]:
+            self.new_builddir()
+            out = self.init(tdir, workdir=wd)
+            expected = os.path.join(relpath(tdir, self.src_root), 'meson.build')
+            relwd = relpath(self.src_root, wd)
+            if relwd != '.':
+                expected = os.path.join(relwd, expected)
+                expected = '\n' + expected + ':'
+            self.assertIn(expected, out)
+
+    def test_error_location_path(self):
+        '''Test locations in meson errors contain correct paths'''
+        # this list contains errors from all the different steps in the
+        # lexer/parser/interpreter we have tests for.
+        for (t, f) in [
+            ('10 out of bounds', 'meson.build'),
+            ('18 wrong plusassign', 'meson.build'),
+            ('61 bad option argument', 'meson_options.txt'),
+            ('102 subdir parse error', os.path.join('subdir', 'meson.build')),
+            ('103 invalid option file', 'meson_options.txt'),
+        ]:
+            tdir = os.path.join(self.src_root, 'test cases', 'failing', t)
+
+            for wd in [
+                self.src_root,
+                self.builddir,
+                os.getcwd(),
+            ]:
+                try:
+                    self.init(tdir, workdir=wd)
+                except subprocess.CalledProcessError as e:
+                    expected = os.path.join('test cases', 'failing', t, f)
+                    relwd = relpath(self.src_root, wd)
+                    if relwd != '.':
+                        expected = os.path.join(relwd, expected)
+                    expected = '\n' + expected + ':'
+                    self.assertIn(expected, e.output)
+                else:
+                    self.fail('configure unexpectedly succeeded')
+
+    def test_permitted_method_kwargs(self):
+        tdir = os.path.join(self.unit_test_dir, '25 non-permitted kwargs')
+        out = self.init(tdir)
+        for expected in [
+            r'WARNING: Passed invalid keyword argument "prefixxx".',
+            r'WARNING: Passed invalid keyword argument "argsxx".',
+            r'WARNING: Passed invalid keyword argument "invalidxx".',
+        ]:
+            self.assertRegex(out, re.escape(expected))
+
+    def test_templates(self):
+        ninja = detect_ninja()
+        if ninja is None:
+            raise unittest.SkipTest('This test currently requires ninja. Fix this once "meson build" works.')
+        langs = ['c']
+        env = get_fake_env()
+        try:
+            env.detect_cpp_compiler(MachineChoice.HOST)
+            langs.append('cpp')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_cs_compiler(MachineChoice.HOST)
+            langs.append('cs')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_d_compiler(MachineChoice.HOST)
+            langs.append('d')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_java_compiler(MachineChoice.HOST)
+            langs.append('java')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_cuda_compiler(MachineChoice.HOST)
+            langs.append('cuda')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_fortran_compiler(MachineChoice.HOST)
+            langs.append('fortran')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_objc_compiler(MachineChoice.HOST)
+            langs.append('objc')
+        except EnvironmentException:
+            pass
+        try:
+            env.detect_objcpp_compiler(MachineChoice.HOST)
+            langs.append('objcpp')
+        except EnvironmentException:
+            pass
+        # FIXME: omitting rust as Windows AppVeyor CI finds Rust but doesn't link correctly
+        if not is_windows():
+            try:
+                env.detect_rust_compiler(MachineChoice.HOST)
+                langs.append('rust')
+            except EnvironmentException:
+                pass
+
+        for lang in langs:
+            for target_type in ('executable', 'library'):
+                # test empty directory
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    self._run(self.meson_command + ['init', '--language', lang, '--type', target_type],
+                              workdir=tmpdir)
+                    self._run(self.setup_command + ['--backend=ninja', 'builddir'],
+                              workdir=tmpdir)
+                    self._run(ninja,
+                              workdir=os.path.join(tmpdir, 'builddir'))
+            # test directory with existing code file
+            if lang in ('c', 'cpp', 'd'):
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    with open(os.path.join(tmpdir, 'foo.' + lang), 'w') as f:
+                        f.write('int main(void) {}')
+                    self._run(self.meson_command + ['init', '-b'], workdir=tmpdir)
+            elif lang in ('java'):
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    with open(os.path.join(tmpdir, 'Foo.' + lang), 'w') as f:
+                        f.write('public class Foo { public static void main() {} }')
+                    self._run(self.meson_command + ['init', '-b'], workdir=tmpdir)
+
+    def test_compiler_run_command(self):
+        '''
+        The test checks that the compiler object can be passed to
+        run_command().
+        '''
+        testdir = os.path.join(self.unit_test_dir, '24 compiler run_command')
+        self.init(testdir)
+
+    def test_identical_target_name_in_subproject_flat_layout(self):
+        '''
+        Test that identical targets in different subprojects do not collide
+        if layout is flat.
+        '''
+        testdir = os.path.join(self.common_test_dir, '177 identical target name in subproject flat layout')
+        self.init(testdir, extra_args=['--layout=flat'])
+        self.build()
+
+    def test_identical_target_name_in_subdir_flat_layout(self):
+        '''
+        Test that identical targets in different subdirs do not collide
+        if layout is flat.
+        '''
+        testdir = os.path.join(self.common_test_dir, '186 same target name flat layout')
+        self.init(testdir, extra_args=['--layout=flat'])
+        self.build()
+
+    def test_flock(self):
+        exception_raised = False
+        with tempfile.TemporaryDirectory() as tdir:
+            os.mkdir(os.path.join(tdir, 'meson-private'))
+            with BuildDirLock(tdir):
+                try:
+                    with BuildDirLock(tdir):
+                        pass
+                except MesonException:
+                    exception_raised = True
+        self.assertTrue(exception_raised, 'Double locking did not raise exception.')
+
+    @unittest.skipIf(is_osx(), 'Test not applicable to OSX')
+    def test_check_module_linking(self):
+        """
+        Test that link_with: a shared module issues a warning
+        https://github.com/mesonbuild/meson/issues/2865
+        (That an error is raised on OSX is exercised by test failing/78)
+        """
+        tdir = os.path.join(self.unit_test_dir, '30 shared_mod linking')
+        out = self.init(tdir)
+        msg = ('''WARNING: target links against shared modules. This is not
+recommended as it is not supported on some platforms''')
+        self.assertIn(msg, out)
+
+    def test_ndebug_if_release_disabled(self):
+        testdir = os.path.join(self.unit_test_dir, '28 ndebug if-release')
+        self.init(testdir, extra_args=['--buildtype=release', '-Db_ndebug=if-release'])
+        self.build()
+        exe = os.path.join(self.builddir, 'main')
+        self.assertEqual(b'NDEBUG=1', subprocess.check_output(exe).strip())
+
+    def test_ndebug_if_release_enabled(self):
+        testdir = os.path.join(self.unit_test_dir, '28 ndebug if-release')
+        self.init(testdir, extra_args=['--buildtype=debugoptimized', '-Db_ndebug=if-release'])
+        self.build()
+        exe = os.path.join(self.builddir, 'main')
+        self.assertEqual(b'NDEBUG=0', subprocess.check_output(exe).strip())
+
+    def test_guessed_linker_dependencies(self):
+        '''
+        Test that meson adds dependencies for libraries based on the final
+        linker command line.
+        '''
+        testdirbase = os.path.join(self.unit_test_dir, '29 guessed linker dependencies')
+        testdirlib = os.path.join(testdirbase, 'lib')
+
+        extra_args = None
+        libdir_flags = ['-L']
+        env = get_fake_env(testdirlib, self.builddir, self.prefix)
+        if env.detect_c_compiler(MachineChoice.HOST).get_id() in {'msvc', 'clang-cl', 'intel-cl'}:
+            # msvc-like compiler, also test it with msvc-specific flags
+            libdir_flags += ['/LIBPATH:', '-LIBPATH:']
+        else:
+            # static libraries are not linkable with -l with msvc because meson installs them
+            # as .a files which unix_args_to_native will not know as it expects libraries to use
+            # .lib as extension. For a DLL the import library is installed as .lib. Thus for msvc
+            # this tests needs to use shared libraries to test the path resolving logic in the
+            # dependency generation code path.
+            extra_args = ['--default-library', 'static']
+
+        initial_builddir = self.builddir
+        initial_installdir = self.installdir
+
+        for libdir_flag in libdir_flags:
+            # build library
+            self.new_builddir()
+            self.init(testdirlib, extra_args=extra_args)
+            self.build()
+            self.install()
+            libbuilddir = self.builddir
+            installdir = self.installdir
+            libdir = os.path.join(self.installdir, self.prefix.lstrip('/').lstrip('\\'), 'lib')
+
+            # build user of library
+            self.new_builddir()
+            # replace is needed because meson mangles platform paths passed via LDFLAGS
+            self.init(os.path.join(testdirbase, 'exe'),
+                      override_envvars={"LDFLAGS": '{}{}'.format(libdir_flag, libdir.replace('\\', '/'))})
+            self.build()
+            self.assertBuildIsNoop()
+
+            # rebuild library
+            exebuilddir = self.builddir
+            self.installdir = installdir
+            self.builddir = libbuilddir
+            # Microsoft's compiler is quite smart about touching import libs on changes,
+            # so ensure that there is actually a change in symbols.
+            self.setconf('-Dmore_exports=true')
+            self.build()
+            self.install()
+            # no ensure_backend_detects_changes needed because self.setconf did that already
+
+            # assert user of library will be rebuild
+            self.builddir = exebuilddir
+            self.assertRebuiltTarget('app')
+
+            # restore dirs for the next test case
+            self.installdir = initial_builddir
+            self.builddir = initial_installdir
+
+    def test_conflicting_d_dash_option(self):
+        testdir = os.path.join(self.unit_test_dir, '37 mixed command line args')
+        with self.assertRaises(subprocess.CalledProcessError) as e:
+            self.init(testdir, extra_args=['-Dbindir=foo', '--bindir=bar'])
+            # Just to ensure that we caught the correct error
+            self.assertIn('passed as both', e.stderr)
+
+    def _test_same_option_twice(self, arg, args):
+        testdir = os.path.join(self.unit_test_dir, '37 mixed command line args')
+        self.init(testdir, extra_args=args)
+        opts = self.introspect('--buildoptions')
+        for item in opts:
+            if item['name'] == arg:
+                self.assertEqual(item['value'], 'bar')
+                return
+        raise Exception('Missing {} value?'.format(arg))
+
+    def test_same_dash_option_twice(self):
+        self._test_same_option_twice('bindir', ['--bindir=foo', '--bindir=bar'])
+
+    def test_same_d_option_twice(self):
+        self._test_same_option_twice('bindir', ['-Dbindir=foo', '-Dbindir=bar'])
+
+    def test_same_project_d_option_twice(self):
+        self._test_same_option_twice('one', ['-Done=foo', '-Done=bar'])
+
+    def _test_same_option_twice_configure(self, arg, args):
+        testdir = os.path.join(self.unit_test_dir, '37 mixed command line args')
+        self.init(testdir)
+        self.setconf(args)
+        opts = self.introspect('--buildoptions')
+        for item in opts:
+            if item['name'] == arg:
+                self.assertEqual(item['value'], 'bar')
+                return
+        raise Exception('Missing {} value?'.format(arg))
+
+    def test_same_dash_option_twice_configure(self):
+        self._test_same_option_twice_configure(
+            'bindir', ['--bindir=foo', '--bindir=bar'])
+
+    def test_same_d_option_twice_configure(self):
+        self._test_same_option_twice_configure(
+            'bindir', ['-Dbindir=foo', '-Dbindir=bar'])
+
+    def test_same_project_d_option_twice_configure(self):
+        self._test_same_option_twice_configure(
+            'one', ['-Done=foo', '-Done=bar'])
+
+    def test_command_line(self):
+        testdir = os.path.join(self.unit_test_dir, '34 command line')
+
+        # Verify default values when passing no args that affect the
+        # configuration, and as a bonus, test that --profile-self works.
+        self.init(testdir, extra_args=['--profile-self'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['default_library'].value, 'static')
+        self.assertEqual(obj.builtins['warning_level'].value, '1')
+        self.assertEqual(obj.user_options['set_sub_opt'].value, True)
+        self.assertEqual(obj.user_options['subp:subp_opt'].value, 'default3')
+        self.wipe()
+
+        # warning_level is special, it's --warnlevel instead of --warning-level
+        # for historical reasons
+        self.init(testdir, extra_args=['--warnlevel=2'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '2')
+        self.setconf('--warnlevel=3')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '3')
+        self.wipe()
+
+        # But when using -D syntax, it should be 'warning_level'
+        self.init(testdir, extra_args=['-Dwarning_level=2'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '2')
+        self.setconf('-Dwarning_level=3')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '3')
+        self.wipe()
+
+        # Mixing --option and -Doption is forbidden
+        with self.assertRaises(subprocess.CalledProcessError) as cm:
+            self.init(testdir, extra_args=['--warnlevel=1', '-Dwarning_level=3'])
+        self.assertNotEqual(0, cm.exception.returncode)
+        self.assertIn('as both', cm.exception.output)
+        self.init(testdir)
+        with self.assertRaises(subprocess.CalledProcessError) as cm:
+            self.setconf(['--warnlevel=1', '-Dwarning_level=3'])
+        self.assertNotEqual(0, cm.exception.returncode)
+        self.assertIn('as both', cm.exception.output)
+        self.wipe()
+
+        # --default-library should override default value from project()
+        self.init(testdir, extra_args=['--default-library=both'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['default_library'].value, 'both')
+        self.setconf('--default-library=shared')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['default_library'].value, 'shared')
+        if self.backend is Backend.ninja:
+            # reconfigure target works only with ninja backend
+            self.build('reconfigure')
+            obj = mesonbuild.coredata.load(self.builddir)
+            self.assertEqual(obj.builtins['default_library'].value, 'shared')
+        self.wipe()
+
+        # Should warn on unknown options
+        out = self.init(testdir, extra_args=['-Dbad=1', '-Dfoo=2', '-Dwrong_link_args=foo'])
+        self.assertIn('Unknown options: "bad, foo, wrong_link_args"', out)
+        self.wipe()
+
+        # Should fail on malformed option
+        with self.assertRaises(subprocess.CalledProcessError) as cm:
+            self.init(testdir, extra_args=['-Dfoo'])
+        self.assertNotEqual(0, cm.exception.returncode)
+        self.assertIn('Option \'foo\' must have a value separated by equals sign.', cm.exception.output)
+        self.init(testdir)
+        with self.assertRaises(subprocess.CalledProcessError) as cm:
+            self.setconf('-Dfoo')
+        self.assertNotEqual(0, cm.exception.returncode)
+        self.assertIn('Option \'foo\' must have a value separated by equals sign.', cm.exception.output)
+        self.wipe()
+
+        # It is not an error to set wrong option for unknown subprojects or
+        # language because we don't have control on which one will be selected.
+        self.init(testdir, extra_args=['-Dc_wrong=1', '-Dwrong:bad=1', '-Db_wrong=1'])
+        self.wipe()
+
+        # Test we can set subproject option
+        self.init(testdir, extra_args=['-Dsubp:subp_opt=foo'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.user_options['subp:subp_opt'].value, 'foo')
+        self.wipe()
+
+        # c_args value should be parsed with split_args
+        self.init(testdir, extra_args=['-Dc_args=-Dfoo -Dbar "-Dthird=one two"'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.compiler_options.host['c']['args'].value, ['-Dfoo', '-Dbar', '-Dthird=one two'])
+
+        self.setconf('-Dc_args="foo bar" one two')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.compiler_options.host['c']['args'].value, ['foo bar', 'one', 'two'])
+        self.wipe()
+
+        self.init(testdir, extra_args=['-Dset_percent_opt=myoption%'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.user_options['set_percent_opt'].value, 'myoption%')
+        self.wipe()
+
+        # Setting a 2nd time the same option should override the first value
+        try:
+            self.init(testdir, extra_args=['--bindir=foo', '--bindir=bar',
+                                           '-Dbuildtype=plain', '-Dbuildtype=release',
+                                           '-Db_sanitize=address', '-Db_sanitize=thread',
+                                           '-Dc_args=-Dfoo', '-Dc_args=-Dbar'])
+            obj = mesonbuild.coredata.load(self.builddir)
+            self.assertEqual(obj.builtins['bindir'].value, 'bar')
+            self.assertEqual(obj.builtins['buildtype'].value, 'release')
+            self.assertEqual(obj.base_options['b_sanitize'].value, 'thread')
+            self.assertEqual(obj.compiler_options.host['c']['args'].value, ['-Dbar'])
+            self.setconf(['--bindir=bar', '--bindir=foo',
+                          '-Dbuildtype=release', '-Dbuildtype=plain',
+                          '-Db_sanitize=thread', '-Db_sanitize=address',
+                          '-Dc_args=-Dbar', '-Dc_args=-Dfoo'])
+            obj = mesonbuild.coredata.load(self.builddir)
+            self.assertEqual(obj.builtins['bindir'].value, 'foo')
+            self.assertEqual(obj.builtins['buildtype'].value, 'plain')
+            self.assertEqual(obj.base_options['b_sanitize'].value, 'address')
+            self.assertEqual(obj.compiler_options.host['c']['args'].value, ['-Dfoo'])
+            self.wipe()
+        except KeyError:
+            # Ignore KeyError, it happens on CI for compilers that does not
+            # support b_sanitize. We have to test with a base option because
+            # they used to fail this test with Meson 0.46 an earlier versions.
+            pass
+
+    def test_warning_level_0(self):
+        testdir = os.path.join(self.common_test_dir, '214 warning level 0')
+
+        # Verify default values when passing no args
+        self.init(testdir)
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '0')
+        self.wipe()
+
+        # verify we can override w/ --warnlevel
+        self.init(testdir, extra_args=['--warnlevel=1'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '1')
+        self.setconf('--warnlevel=0')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '0')
+        self.wipe()
+
+        # verify we can override w/ -Dwarning_level
+        self.init(testdir, extra_args=['-Dwarning_level=1'])
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '1')
+        self.setconf('-Dwarning_level=0')
+        obj = mesonbuild.coredata.load(self.builddir)
+        self.assertEqual(obj.builtins['warning_level'].value, '0')
+        self.wipe()
+
+    def test_feature_check_usage_subprojects(self):
+        testdir = os.path.join(self.unit_test_dir, '41 featurenew subprojects')
+        out = self.init(testdir)
+        # Parent project warns correctly
+        self.assertRegex(out, "WARNING: Project targeting '>=0.45'.*'0.47.0': dict")
+        # Subprojects warn correctly
+        self.assertRegex(out, r"\|WARNING: Project targeting '>=0.40'.*'0.44.0': disabler")
+        self.assertRegex(out, r"\|WARNING: Project targeting '!=0.40'.*'0.44.0': disabler")
+        # Subproject has a new-enough meson_version, no warning
+        self.assertNotRegex(out, "WARNING: Project targeting.*Python")
+        # Ensure a summary is printed in the subproject and the outer project
+        self.assertRegex(out, r"\|WARNING: Project specifies a minimum meson_version '>=0.40'")
+        self.assertRegex(out, r"\| \* 0.44.0: {'disabler'}")
+        self.assertRegex(out, "WARNING: Project specifies a minimum meson_version '>=0.45'")
+        self.assertRegex(out, " * 0.47.0: {'dict'}")
+
+    def test_configure_file_warnings(self):
+        testdir = os.path.join(self.common_test_dir, "14 configure file")
+        out = self.init(testdir)
+        self.assertRegex(out, "WARNING:.*'empty'.*config.h.in.*not present.*")
+        self.assertRegex(out, "WARNING:.*'FOO_BAR'.*nosubst-nocopy2.txt.in.*not present.*")
+        self.assertRegex(out, "WARNING:.*'empty'.*config.h.in.*not present.*")
+        self.assertRegex(out, "WARNING:.*empty configuration_data.*test.py.in")
+        # Warnings for configuration files that are overwritten.
+        self.assertRegex(out, "WARNING:.*\"double_output.txt\".*overwrites")
+        self.assertRegex(out, "WARNING:.*\"subdir.double_output2.txt\".*overwrites")
+        self.assertNotRegex(out, "WARNING:.*no_write_conflict.txt.*overwrites")
+        self.assertNotRegex(out, "WARNING:.*@BASENAME@.*overwrites")
+        self.assertRegex(out, "WARNING:.*\"sameafterbasename\".*overwrites")
+        # No warnings about empty configuration data objects passed to files with substitutions
+        self.assertNotRegex(out, "WARNING:.*empty configuration_data.*nosubst-nocopy1.txt.in")
+        self.assertNotRegex(out, "WARNING:.*empty configuration_data.*nosubst-nocopy2.txt.in")
+        with open(os.path.join(self.builddir, 'nosubst-nocopy1.txt'), 'rb') as f:
+            self.assertEqual(f.read().strip(), b'/* #undef FOO_BAR */')
+        with open(os.path.join(self.builddir, 'nosubst-nocopy2.txt'), 'rb') as f:
+            self.assertEqual(f.read().strip(), b'')
+        self.assertRegex(out, r"DEPRECATION:.*\['array'\] is invalid.*dict")
+
+    def test_dirs(self):
+        with tempfile.TemporaryDirectory() as containing:
+            with tempfile.TemporaryDirectory(dir=containing) as srcdir:
+                mfile = os.path.join(srcdir, 'meson.build')
+                of = open(mfile, 'w')
+                of.write("project('foobar', 'c')\n")
+                of.close()
+                pc = subprocess.run(self.setup_command,
+                                    cwd=srcdir,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.DEVNULL)
+                self.assertIn(b'Must specify at least one directory name', pc.stdout)
+                with tempfile.TemporaryDirectory(dir=srcdir) as builddir:
+                    subprocess.run(self.setup_command,
+                                   check=True,
+                                   cwd=builddir,
+                                   stdout=subprocess.DEVNULL,
+                                   stderr=subprocess.DEVNULL)
+
+    def get_opts_as_dict(self):
+        result = {}
+        for i in self.introspect('--buildoptions'):
+            result[i['name']] = i['value']
+        return result
+
+    def test_buildtype_setting(self):
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        self.init(testdir)
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['buildtype'], 'debug')
+        self.assertEqual(opts['debug'], True)
+        self.setconf('-Ddebug=false')
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['debug'], False)
+        self.assertEqual(opts['buildtype'], 'plain')
+        self.assertEqual(opts['optimization'], '0')
+
+        # Setting optimizations to 3 should cause buildtype
+        # to go to release mode.
+        self.setconf('-Doptimization=3')
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['buildtype'], 'release')
+        self.assertEqual(opts['debug'], False)
+        self.assertEqual(opts['optimization'], '3')
+
+        # Going to debug build type should reset debugging
+        # and optimization
+        self.setconf('-Dbuildtype=debug')
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['buildtype'], 'debug')
+        self.assertEqual(opts['debug'], True)
+        self.assertEqual(opts['optimization'], '0')
+
+        # Command-line parsing of buildtype settings should be the same as
+        # setting with `meson configure`.
+        #
+        # Setting buildtype should set optimization/debug
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Dbuildtype=debugoptimized'])
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['debug'], True)
+        self.assertEqual(opts['optimization'], '2')
+        self.assertEqual(opts['buildtype'], 'debugoptimized')
+        # Setting optimization/debug should set buildtype
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Doptimization=2', '-Ddebug=true'])
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['debug'], True)
+        self.assertEqual(opts['optimization'], '2')
+        self.assertEqual(opts['buildtype'], 'debugoptimized')
+        # Setting both buildtype and debug on the command-line should work, and
+        # should warn not to do that. Also test that --debug is parsed as -Ddebug=true
+        self.new_builddir()
+        out = self.init(testdir, extra_args=['-Dbuildtype=debugoptimized', '--debug'])
+        self.assertRegex(out, 'Recommend using either.*buildtype.*debug.*redundant')
+        opts = self.get_opts_as_dict()
+        self.assertEqual(opts['debug'], True)
+        self.assertEqual(opts['optimization'], '2')
+        self.assertEqual(opts['buildtype'], 'debugoptimized')
+
+    @skipIfNoPkgconfig
+    @unittest.skipIf(is_windows(), 'Help needed with fixing this test on windows')
+    def test_native_dep_pkgconfig(self):
+        testdir = os.path.join(self.unit_test_dir,
+                               '46 native dep pkgconfig var')
+        with tempfile.NamedTemporaryFile(mode='w', delete=False) as crossfile:
+            crossfile.write(textwrap.dedent(
+                '''[binaries]
+                pkgconfig = '{0}'
+
+                [properties]
+
+                [host_machine]
+                system = 'linux'
+                cpu_family = 'arm'
+                cpu = 'armv7'
+                endian = 'little'
+                '''.format(os.path.join(testdir, 'cross_pkgconfig.py'))))
+            crossfile.flush()
+            self.meson_cross_file = crossfile.name
+
+        env = {'PKG_CONFIG_LIBDIR':  os.path.join(testdir,
+                                                  'native_pkgconfig')}
+        self.init(testdir, extra_args=['-Dstart_native=false'], override_envvars=env)
+        self.wipe()
+        self.init(testdir, extra_args=['-Dstart_native=true'], override_envvars=env)
+
+    @skipIfNoPkgconfig
+    @unittest.skipIf(is_windows(), 'Help needed with fixing this test on windows')
+    def test_pkg_config_libdir(self):
+        testdir = os.path.join(self.unit_test_dir,
+                               '46 native dep pkgconfig var')
+        with tempfile.NamedTemporaryFile(mode='w', delete=False) as crossfile:
+            crossfile.write(textwrap.dedent(
+                '''[binaries]
+                pkgconfig = 'pkg-config'
+
+                [properties]
+                pkg_config_libdir = ['{0}']
+
+                [host_machine]
+                system = 'linux'
+                cpu_family = 'arm'
+                cpu = 'armv7'
+                endian = 'little'
+                '''.format(os.path.join(testdir, 'cross_pkgconfig'))))
+            crossfile.flush()
+            self.meson_cross_file = crossfile.name
+
+        env = {'PKG_CONFIG_LIBDIR':  os.path.join(testdir,
+                                                  'native_pkgconfig')}
+        self.init(testdir, extra_args=['-Dstart_native=false'], override_envvars=env)
+        self.wipe()
+        self.init(testdir, extra_args=['-Dstart_native=true'], override_envvars=env)
+
+    def __reconfigure(self, change_minor=False):
+        # Set an older version to force a reconfigure from scratch
+        filename = os.path.join(self.privatedir, 'coredata.dat')
+        with open(filename, 'rb') as f:
+            obj = pickle.load(f)
+        if change_minor:
+            v = mesonbuild.coredata.version.split('.')
+            obj.version = '.'.join(v[0:2] + [str(int(v[2]) + 1)])
+        else:
+            obj.version = '0.47.0'
+        with open(filename, 'wb') as f:
+            pickle.dump(obj, f)
+
+    def test_reconfigure(self):
+        testdir = os.path.join(self.unit_test_dir, '48 reconfigure')
+        self.init(testdir, extra_args=['-Dopt1=val1'])
+        self.setconf('-Dopt2=val2')
+
+        self.__reconfigure()
+
+        out = self.init(testdir, extra_args=['--reconfigure', '-Dopt3=val3'])
+        self.assertRegex(out, 'Regenerating configuration from scratch')
+        self.assertRegex(out, 'opt1 val1')
+        self.assertRegex(out, 'opt2 val2')
+        self.assertRegex(out, 'opt3 val3')
+        self.assertRegex(out, 'opt4 default4')
+        self.build()
+        self.run_tests()
+
+        # Create a file in builddir and verify wipe command removes it
+        filename = os.path.join(self.builddir, 'something')
+        open(filename, 'w').close()
+        self.assertTrue(os.path.exists(filename))
+        out = self.init(testdir, extra_args=['--wipe', '-Dopt4=val4'])
+        self.assertFalse(os.path.exists(filename))
+        self.assertRegex(out, 'opt1 val1')
+        self.assertRegex(out, 'opt2 val2')
+        self.assertRegex(out, 'opt3 val3')
+        self.assertRegex(out, 'opt4 val4')
+        self.build()
+        self.run_tests()
+
+    def test_wipe_from_builddir(self):
+        testdir = os.path.join(self.common_test_dir, '161 custom target subdir depend files')
+        self.init(testdir)
+        self.__reconfigure()
+
+        with Path(self.builddir):
+            self.init(testdir, extra_args=['--wipe'])
+
+    def test_minor_version_does_not_reconfigure_wipe(self):
+        testdir = os.path.join(self.unit_test_dir, '48 reconfigure')
+        self.init(testdir, extra_args=['-Dopt1=val1'])
+        self.setconf('-Dopt2=val2')
+
+        self.__reconfigure(change_minor=True)
+
+        out = self.init(testdir, extra_args=['--reconfigure', '-Dopt3=val3'])
+        self.assertNotRegex(out, 'Regenerating configuration from scratch')
+        self.assertRegex(out, 'opt1 val1')
+        self.assertRegex(out, 'opt2 val2')
+        self.assertRegex(out, 'opt3 val3')
+        self.assertRegex(out, 'opt4 default4')
+        self.build()
+        self.run_tests()
+
+    def test_target_construct_id_from_path(self):
+        # This id is stable but not guessable.
+        # The test is supposed to prevent unintentional
+        # changes of target ID generation.
+        target_id = Target.construct_id_from_path('some/obscure/subdir',
+                                                  'target-id', '@suffix')
+        self.assertEqual('5e002d3@@target-id@suffix', target_id)
+        target_id = Target.construct_id_from_path('subproject/foo/subdir/bar',
+                                                  'target2-id', '@other')
+        self.assertEqual('81d46d1@@target2-id@other', target_id)
+
+    def test_introspect_projectinfo_without_configured_build(self):
+        testfile = os.path.join(self.common_test_dir, '35 run program', 'meson.build')
+        res = self.introspect_directory(testfile, '--projectinfo')
+        self.assertEqual(set(res['buildsystem_files']), set(['meson.build']))
+        self.assertEqual(res['version'], 'undefined')
+        self.assertEqual(res['descriptive_name'], 'run command')
+        self.assertEqual(res['subprojects'], [])
+
+        testfile = os.path.join(self.common_test_dir, '43 options', 'meson.build')
+        res = self.introspect_directory(testfile, '--projectinfo')
+        self.assertEqual(set(res['buildsystem_files']), set(['meson_options.txt', 'meson.build']))
+        self.assertEqual(res['version'], 'undefined')
+        self.assertEqual(res['descriptive_name'], 'options')
+        self.assertEqual(res['subprojects'], [])
+
+        testfile = os.path.join(self.common_test_dir, '46 subproject options', 'meson.build')
+        res = self.introspect_directory(testfile, '--projectinfo')
+        self.assertEqual(set(res['buildsystem_files']), set(['meson_options.txt', 'meson.build']))
+        self.assertEqual(res['version'], 'undefined')
+        self.assertEqual(res['descriptive_name'], 'suboptions')
+        self.assertEqual(len(res['subprojects']), 1)
+        subproject_files = set(f.replace('\\', '/') for f in res['subprojects'][0]['buildsystem_files'])
+        self.assertEqual(subproject_files, set(['subprojects/subproject/meson_options.txt', 'subprojects/subproject/meson.build']))
+        self.assertEqual(res['subprojects'][0]['name'], 'subproject')
+        self.assertEqual(res['subprojects'][0]['version'], 'undefined')
+        self.assertEqual(res['subprojects'][0]['descriptive_name'], 'subproject')
+
+    def test_introspect_projectinfo_subprojects(self):
+        testdir = os.path.join(self.common_test_dir, '102 subproject subdir')
+        self.init(testdir)
+        res = self.introspect('--projectinfo')
+        expected = {
+            'descriptive_name': 'proj',
+            'version': 'undefined',
+            'subproject_dir': 'subprojects',
+            'subprojects': [
+                {
+                    'descriptive_name': 'sub',
+                    'name': 'sub',
+                    'version': '1.0'
+                },
+                {
+                    'descriptive_name': 'sub_implicit',
+                    'name': 'sub_implicit',
+                    'version': '1.0',
+                },
+                {
+                    'descriptive_name': 'sub-novar',
+                    'name': 'sub_novar',
+                    'version': '1.0',
+                },
+            ]
+        }
+        res['subprojects'] = sorted(res['subprojects'], key=lambda i: i['name'])
+        self.assertDictEqual(expected, res)
+
+    def test_introspection_target_subproject(self):
+        testdir = os.path.join(self.common_test_dir, '45 subproject')
+        self.init(testdir)
+        res = self.introspect('--targets')
+
+        expected = {
+            'sublib': 'sublib',
+            'simpletest': 'sublib',
+            'user': None
+        }
+
+        for entry in res:
+            name = entry['name']
+            self.assertEqual(entry['subproject'], expected[name])
+
+    def test_introspect_projectinfo_subproject_dir(self):
+        testdir = os.path.join(self.common_test_dir, '78 custom subproject dir')
+        self.init(testdir)
+        res = self.introspect('--projectinfo')
+
+        self.assertEqual(res['subproject_dir'], 'custom_subproject_dir')
+
+    def test_introspect_projectinfo_subproject_dir_from_source(self):
+        testfile = os.path.join(self.common_test_dir, '78 custom subproject dir', 'meson.build')
+        res = self.introspect_directory(testfile, '--projectinfo')
+
+        self.assertEqual(res['subproject_dir'], 'custom_subproject_dir')
+
+    @skipIfNoExecutable('clang-format')
+    def test_clang_format(self):
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Clang-format is for now only supported on Ninja, not {}'.format(self.backend.name))
+        testdir = os.path.join(self.unit_test_dir, '54 clang-format')
+        testfile = os.path.join(testdir, 'prog.c')
+        badfile = os.path.join(testdir, 'prog_orig_c')
+        goodfile = os.path.join(testdir, 'prog_expected_c')
+        testheader = os.path.join(testdir, 'header.h')
+        badheader = os.path.join(testdir, 'header_orig_h')
+        goodheader = os.path.join(testdir, 'header_expected_h')
+        try:
+            shutil.copyfile(badfile, testfile)
+            shutil.copyfile(badheader, testheader)
+            self.init(testdir)
+            self.assertNotEqual(Path(testfile).read_text(),
+                                Path(goodfile).read_text())
+            self.assertNotEqual(Path(testheader).read_text(),
+                                Path(goodheader).read_text())
+            self.run_target('clang-format')
+            self.assertEqual(Path(testheader).read_text(),
+                             Path(goodheader).read_text())
+        finally:
+            if os.path.exists(testfile):
+                os.unlink(testfile)
+            if os.path.exists(testheader):
+                os.unlink(testheader)
+
+    @skipIfNoExecutable('clang-tidy')
+    def test_clang_tidy(self):
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Clang-tidy is for now only supported on Ninja, not {}'.format(self.backend.name))
+        if shutil.which('c++') is None:
+            raise unittest.SkipTest('Clang-tidy breaks when ccache is used and "c++" not in path.')
+        if is_osx():
+            raise unittest.SkipTest('Apple ships a broken clang-tidy that chokes on -pipe.')
+        testdir = os.path.join(self.unit_test_dir, '70 clang-tidy')
+        self.init(testdir, override_envvars={'CXX': 'c++'})
+        out = self.run_target('clang-tidy')
+        self.assertIn('cttest.cpp:4:20', out)
+
+    def test_identity_cross(self):
+        testdir = os.path.join(self.unit_test_dir, '71 cross')
+        # Do a build to generate a cross file where the host is this target
+        self.init(testdir, extra_args=['-Dgenerate=true'])
+        self.meson_cross_file = os.path.join(self.builddir, "crossfile")
+        self.assertTrue(os.path.exists(self.meson_cross_file))
+        # Now verify that this is detected as cross
+        self.new_builddir()
+        self.init(testdir)
+
+    def test_introspect_buildoptions_without_configured_build(self):
+        testdir = os.path.join(self.unit_test_dir, '59 introspect buildoptions')
+        testfile = os.path.join(testdir, 'meson.build')
+        res_nb = self.introspect_directory(testfile, ['--buildoptions'] + self.meson_args)
+        self.init(testdir, default_args=False)
+        res_wb = self.introspect('--buildoptions')
+        self.maxDiff = None
+        self.assertListEqual(res_nb, res_wb)
+
+    def test_meson_configure_from_source_does_not_crash(self):
+        testdir = os.path.join(self.unit_test_dir, '59 introspect buildoptions')
+        self._run(self.mconf_command + [testdir])
+
+    def test_introspect_json_dump(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        self.init(testdir)
+        infodir = os.path.join(self.builddir, 'meson-info')
+        self.assertPathExists(infodir)
+
+        def assertKeyTypes(key_type_list, obj):
+            for i in key_type_list:
+                self.assertIn(i[0], obj)
+                self.assertIsInstance(obj[i[0]], i[1])
+
+        root_keylist = [
+            ('benchmarks', list),
+            ('buildoptions', list),
+            ('buildsystem_files', list),
+            ('dependencies', list),
+            ('installed', dict),
+            ('projectinfo', dict),
+            ('targets', list),
+            ('tests', list),
+        ]
+
+        test_keylist = [
+            ('cmd', list),
+            ('env', dict),
+            ('name', str),
+            ('timeout', int),
+            ('suite', list),
+            ('is_parallel', bool),
+            ('protocol', str),
+        ]
+
+        buildoptions_keylist = [
+            ('name', str),
+            ('section', str),
+            ('type', str),
+            ('description', str),
+            ('machine', str),
+        ]
+
+        buildoptions_typelist = [
+            ('combo', str, [('choices', list)]),
+            ('string', str, []),
+            ('boolean', bool, []),
+            ('integer', int, []),
+            ('array', list, []),
+        ]
+
+        buildoptions_sections = ['core', 'backend', 'base', 'compiler', 'directory', 'user', 'test']
+        buildoptions_machines = ['any', 'build', 'host']
+
+        dependencies_typelist = [
+            ('name', str),
+            ('version', str),
+            ('compile_args', list),
+            ('link_args', list),
+        ]
+
+        targets_typelist = [
+            ('name', str),
+            ('id', str),
+            ('type', str),
+            ('defined_in', str),
+            ('filename', list),
+            ('build_by_default', bool),
+            ('target_sources', list),
+            ('installed', bool),
+        ]
+
+        targets_sources_typelist = [
+            ('language', str),
+            ('compiler', list),
+            ('parameters', list),
+            ('sources', list),
+            ('generated_sources', list),
+        ]
+
+        # First load all files
+        res = {}
+        for i in root_keylist:
+            curr = os.path.join(infodir, 'intro-{}.json'.format(i[0]))
+            self.assertPathExists(curr)
+            with open(curr, 'r') as fp:
+                res[i[0]] = json.load(fp)
+
+        assertKeyTypes(root_keylist, res)
+
+        # Check Tests and benchmarks
+        tests_to_find = ['test case 1', 'test case 2', 'benchmark 1']
+        for i in res['benchmarks'] + res['tests']:
+            assertKeyTypes(test_keylist, i)
+            if i['name'] in tests_to_find:
+                tests_to_find.remove(i['name'])
+        self.assertListEqual(tests_to_find, [])
+
+        # Check buildoptions
+        buildopts_to_find = {'cpp_std': 'c++11'}
+        for i in res['buildoptions']:
+            assertKeyTypes(buildoptions_keylist, i)
+            valid_type = False
+            for j in buildoptions_typelist:
+                if i['type'] == j[0]:
+                    self.assertIsInstance(i['value'], j[1])
+                    assertKeyTypes(j[2], i)
+                    valid_type = True
+                    break
+
+            self.assertIn(i['section'], buildoptions_sections)
+            self.assertIn(i['machine'], buildoptions_machines)
+            self.assertTrue(valid_type)
+            if i['name'] in buildopts_to_find:
+                self.assertEqual(i['value'], buildopts_to_find[i['name']])
+                buildopts_to_find.pop(i['name'], None)
+        self.assertDictEqual(buildopts_to_find, {})
+
+        # Check buildsystem_files
+        bs_files = ['meson.build', 'meson_options.txt', 'sharedlib/meson.build', 'staticlib/meson.build']
+        bs_files = [os.path.join(testdir, x) for x in bs_files]
+        self.assertPathListEqual(list(sorted(res['buildsystem_files'])), list(sorted(bs_files)))
+
+        # Check dependencies
+        dependencies_to_find = ['threads']
+        for i in res['dependencies']:
+            assertKeyTypes(dependencies_typelist, i)
+            if i['name'] in dependencies_to_find:
+                dependencies_to_find.remove(i['name'])
+        self.assertListEqual(dependencies_to_find, [])
+
+        # Check projectinfo
+        self.assertDictEqual(res['projectinfo'], {'version': '1.2.3', 'descriptive_name': 'introspection', 'subproject_dir': 'subprojects', 'subprojects': []})
+
+        # Check targets
+        targets_to_find = {
+            'sharedTestLib': ('shared library', True, False, 'sharedlib/meson.build'),
+            'staticTestLib': ('static library', True, False, 'staticlib/meson.build'),
+            'test1': ('executable', True, True, 'meson.build'),
+            'test2': ('executable', True, False, 'meson.build'),
+            'test3': ('executable', True, False, 'meson.build'),
+        }
+        for i in res['targets']:
+            assertKeyTypes(targets_typelist, i)
+            if i['name'] in targets_to_find:
+                tgt = targets_to_find[i['name']]
+                self.assertEqual(i['type'], tgt[0])
+                self.assertEqual(i['build_by_default'], tgt[1])
+                self.assertEqual(i['installed'], tgt[2])
+                self.assertPathEqual(i['defined_in'], os.path.join(testdir, tgt[3]))
+                targets_to_find.pop(i['name'], None)
+            for j in i['target_sources']:
+                assertKeyTypes(targets_sources_typelist, j)
+        self.assertDictEqual(targets_to_find, {})
+
+    def test_introspect_file_dump_equals_all(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        self.init(testdir)
+        res_all = self.introspect('--all')
+        res_file = {}
+
+        root_keylist = [
+            'benchmarks',
+            'buildoptions',
+            'buildsystem_files',
+            'dependencies',
+            'installed',
+            'projectinfo',
+            'targets',
+            'tests',
+        ]
+
+        infodir = os.path.join(self.builddir, 'meson-info')
+        self.assertPathExists(infodir)
+        for i in root_keylist:
+            curr = os.path.join(infodir, 'intro-{}.json'.format(i))
+            self.assertPathExists(curr)
+            with open(curr, 'r') as fp:
+                res_file[i] = json.load(fp)
+
+        self.assertEqual(res_all, res_file)
+
+    def test_introspect_meson_info(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        introfile = os.path.join(self.builddir, 'meson-info', 'meson-info.json')
+        self.init(testdir)
+        self.assertPathExists(introfile)
+        with open(introfile, 'r') as fp:
+            res1 = json.load(fp)
+
+        for i in ['meson_version', 'directories', 'introspection', 'build_files_updated', 'error']:
+            self.assertIn(i, res1)
+
+        self.assertEqual(res1['error'], False)
+        self.assertEqual(res1['build_files_updated'], True)
+
+    def test_introspect_config_update(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        introfile = os.path.join(self.builddir, 'meson-info', 'intro-buildoptions.json')
+        self.init(testdir)
+        self.assertPathExists(introfile)
+        with open(introfile, 'r') as fp:
+            res1 = json.load(fp)
+
+        self.setconf('-Dcpp_std=c++14')
+        self.setconf('-Dbuildtype=release')
+
+        for idx, i in enumerate(res1):
+            if i['name'] == 'cpp_std':
+                res1[idx]['value'] = 'c++14'
+            if i['name'] == 'build.cpp_std':
+                res1[idx]['value'] = 'c++14'
+            if i['name'] == 'buildtype':
+                res1[idx]['value'] = 'release'
+            if i['name'] == 'optimization':
+                res1[idx]['value'] = '3'
+            if i['name'] == 'debug':
+                res1[idx]['value'] = False
+
+        with open(introfile, 'r') as fp:
+            res2 = json.load(fp)
+
+        self.assertListEqual(res1, res2)
+
+    def test_introspect_targets_from_source(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        testfile = os.path.join(testdir, 'meson.build')
+        introfile = os.path.join(self.builddir, 'meson-info', 'intro-targets.json')
+        self.init(testdir)
+        self.assertPathExists(introfile)
+        with open(introfile, 'r') as fp:
+            res_wb = json.load(fp)
+
+        res_nb = self.introspect_directory(testfile, ['--targets'] + self.meson_args)
+
+        # Account for differences in output
+        for i in res_wb:
+            i['filename'] = [os.path.relpath(x, self.builddir) for x in i['filename']]
+            if 'install_filename' in i:
+                del i['install_filename']
+
+            sources = []
+            for j in i['target_sources']:
+                sources += j['sources']
+            i['target_sources'] = [{
+                'language': 'unknown',
+                'compiler': [],
+                'parameters': [],
+                'sources': sources,
+                'generated_sources': []
+            }]
+
+        self.maxDiff = None
+        self.assertListEqual(res_nb, res_wb)
+
+    def test_introspect_ast_source(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        testfile = os.path.join(testdir, 'meson.build')
+        res_nb = self.introspect_directory(testfile, ['--ast'] + self.meson_args)
+
+        node_counter = {}
+
+        def accept_node(json_node):
+            self.assertIsInstance(json_node, dict)
+            for i in ['lineno', 'colno', 'end_lineno', 'end_colno']:
+                self.assertIn(i, json_node)
+                self.assertIsInstance(json_node[i], int)
+            self.assertIn('node', json_node)
+            n = json_node['node']
+            self.assertIsInstance(n, str)
+            self.assertIn(n, nodes)
+            if n not in node_counter:
+                node_counter[n] = 0
+            node_counter[n] = node_counter[n] + 1
+            for nodeDesc in nodes[n]:
+                key = nodeDesc[0]
+                func = nodeDesc[1]
+                self.assertIn(key, json_node)
+                if func is None:
+                    tp = nodeDesc[2]
+                    self.assertIsInstance(json_node[key], tp)
+                    continue
+                func(json_node[key])
+
+        def accept_node_list(node_list):
+            self.assertIsInstance(node_list, list)
+            for i in node_list:
+                accept_node(i)
+
+        def accept_kwargs(kwargs):
+            self.assertIsInstance(kwargs, list)
+            for i in kwargs:
+                self.assertIn('key', i)
+                self.assertIn('val', i)
+                accept_node(i['key'])
+                accept_node(i['val'])
+
+        nodes = {
+            'BooleanNode': [('value', None, bool)],
+            'IdNode': [('value', None, str)],
+            'NumberNode': [('value', None, int)],
+            'StringNode': [('value', None, str)],
+            'ContinueNode': [],
+            'BreakNode': [],
+            'ArgumentNode': [('positional', accept_node_list), ('kwargs', accept_kwargs)],
+            'ArrayNode': [('args', accept_node)],
+            'DictNode': [('args', accept_node)],
+            'EmptyNode': [],
+            'OrNode': [('left', accept_node), ('right', accept_node)],
+            'AndNode': [('left', accept_node), ('right', accept_node)],
+            'ComparisonNode': [('left', accept_node), ('right', accept_node), ('ctype', None, str)],
+            'ArithmeticNode': [('left', accept_node), ('right', accept_node), ('op', None, str)],
+            'NotNode': [('right', accept_node)],
+            'CodeBlockNode': [('lines', accept_node_list)],
+            'IndexNode': [('object', accept_node), ('index', accept_node)],
+            'MethodNode': [('object', accept_node), ('args', accept_node), ('name', None, str)],
+            'FunctionNode': [('args', accept_node), ('name', None, str)],
+            'AssignmentNode': [('value', accept_node), ('var_name', None, str)],
+            'PlusAssignmentNode': [('value', accept_node), ('var_name', None, str)],
+            'ForeachClauseNode': [('items', accept_node), ('block', accept_node), ('varnames', None, list)],
+            'IfClauseNode': [('ifs', accept_node_list), ('else', accept_node)],
+            'IfNode': [('condition', accept_node), ('block', accept_node)],
+            'UMinusNode': [('right', accept_node)],
+            'TernaryNode': [('condition', accept_node), ('true', accept_node), ('false', accept_node)],
+        }
+
+        accept_node(res_nb)
+
+        for n, c in [('ContinueNode', 2), ('BreakNode', 1), ('NotNode', 3)]:
+            self.assertIn(n, node_counter)
+            self.assertEqual(node_counter[n], c)
+
+    def test_introspect_dependencies_from_source(self):
+        testdir = os.path.join(self.unit_test_dir, '57 introspection')
+        testfile = os.path.join(testdir, 'meson.build')
+        res_nb = self.introspect_directory(testfile, ['--scan-dependencies'] + self.meson_args)
+        expected = [
+            {
+                'name': 'threads',
+                'required': True,
+                'version': [],
+                'has_fallback': False,
+                'conditional': False
+            },
+            {
+                'name': 'zlib',
+                'required': False,
+                'version': [],
+                'has_fallback': False,
+                'conditional': False
+            },
+            {
+                'name': 'bugDep1',
+                'required': True,
+                'version': [],
+                'has_fallback': False,
+                'conditional': False
+            },
+            {
+                'name': 'somethingthatdoesnotexist',
+                'required': True,
+                'version': ['>=1.2.3'],
+                'has_fallback': False,
+                'conditional': True
+            },
+            {
+                'name': 'look_i_have_a_fallback',
+                'required': True,
+                'version': ['>=1.0.0', '<=99.9.9'],
+                'has_fallback': True,
+                'conditional': True
+            }
+        ]
+        self.maxDiff = None
+        self.assertListEqual(res_nb, expected)
+
+    def test_unstable_coredata(self):
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        self.init(testdir)
+        # just test that the command does not fail (e.g. because it throws an exception)
+        self._run([*self.meson_command, 'unstable-coredata', self.builddir])
+
+    @skip_if_no_cmake
+    def test_cmake_prefix_path(self):
+        testdir = os.path.join(self.unit_test_dir, '64 cmake_prefix_path')
+        self.init(testdir, extra_args=['-Dcmake_prefix_path=' + os.path.join(testdir, 'prefix')])
+
+    @skip_if_no_cmake
+    def test_cmake_parser(self):
+        testdir = os.path.join(self.unit_test_dir, '65 cmake parser')
+        self.init(testdir, extra_args=['-Dcmake_prefix_path=' + os.path.join(testdir, 'prefix')])
+
+    def test_alias_target(self):
+        if self.backend is Backend.vs:
+            # FIXME: This unit test is broken with vs backend, needs investigation
+            raise unittest.SkipTest('Skipping alias_target test with {} backend'.format(self.backend.name))
+        testdir = os.path.join(self.unit_test_dir, '66 alias target')
+        self.init(testdir)
+        self.build()
+        self.assertPathDoesNotExist(os.path.join(self.builddir, 'prog' + exe_suffix))
+        self.assertPathDoesNotExist(os.path.join(self.builddir, 'hello.txt'))
+        self.run_target('build-all')
+        self.assertPathExists(os.path.join(self.builddir, 'prog' + exe_suffix))
+        self.assertPathExists(os.path.join(self.builddir, 'hello.txt'))
+
+    def test_configure(self):
+        testdir = os.path.join(self.common_test_dir, '2 cpp')
+        self.init(testdir)
+        self._run(self.mconf_command + [self.builddir])
+
+    def test_summary(self):
+        testdir = os.path.join(self.unit_test_dir, '73 summary')
+        out = self.init(testdir)
+        expected = textwrap.dedent(r'''
+            Some Subproject 2.0
+
+                 string: bar
+                integer: 1
+                boolean: True
+
+            My Project 1.0
+
+              Configuration
+                   Some boolean: False
+                Another boolean: True
+                    Some string: Hello World
+                         A list: string
+                                 1
+                                 True
+                     empty list:
+                       A number: 1
+                            yes: YES
+                             no: NO
+                      coma list: a, b, c
+
+              Subprojects
+                            sub: YES
+                           sub2: NO Problem encountered: This subproject failed
+            ''')
+        expected_lines = expected.split('\n')[1:]
+        out_start = out.find(expected_lines[0])
+        out_lines = out[out_start:].split('\n')[:len(expected_lines)]
+        if sys.version_info < (3, 7, 0):
+            # Dictionary order is not stable in Python <3.7, so sort the lines
+            # while comparing
+            self.assertEqual(sorted(expected_lines), sorted(out_lines))
+        else:
+            self.assertEqual(expected_lines, out_lines)
+
+    def test_meson_compile(self):
+        """Test the meson compile command."""
+
+        def get_exe_name(basename: str) -> str:
+            if is_windows():
+                return '{}.exe'.format(basename)
+            else:
+                return basename
+
+        def get_shared_lib_name(basename: str) -> str:
+            if mesonbuild.environment.detect_msys2_arch():
+                return 'lib{}.dll'.format(basename)
+            elif is_windows():
+                return '{}.dll'.format(basename)
+            elif is_cygwin():
+                return 'cyg{}.dll'.format(basename)
+            elif is_osx():
+                return 'lib{}.dylib'.format(basename)
+            else:
+                return 'lib{}.so'.format(basename)
+
+        def get_static_lib_name(basename: str) -> str:
+            return 'lib{}.a'.format(basename)
+
+        # Base case (no targets or additional arguments)
+
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        self.init(testdir)
+
+        self._run([*self.meson_command, 'compile', '-C', self.builddir])
+        self.assertPathExists(os.path.join(self.builddir, get_exe_name('trivialprog')))
+
+        # `--clean`
+
+        self._run([*self.meson_command, 'compile', '-C', self.builddir, '--clean'])
+        self.assertPathDoesNotExist(os.path.join(self.builddir, get_exe_name('trivialprog')))
+
+        # Target specified in a project with unique names
+
+        testdir = os.path.join(self.common_test_dir, '6 linkshared')
+        self.init(testdir, extra_args=['--wipe'])
+        # Multiple targets and target type specified
+        self._run([*self.meson_command, 'compile', '-C', self.builddir, 'mylib', 'mycpplib:shared_library'])
+        # Check that we have a shared lib, but not an executable, i.e. check that target actually worked
+        self.assertPathExists(os.path.join(self.builddir, get_shared_lib_name('mylib')))
+        self.assertPathDoesNotExist(os.path.join(self.builddir, get_exe_name('prog')))
+        self.assertPathExists(os.path.join(self.builddir, get_shared_lib_name('mycpplib')))
+        self.assertPathDoesNotExist(os.path.join(self.builddir, get_exe_name('cppprog')))
+
+        # Target specified in a project with non unique names
+
+        testdir = os.path.join(self.common_test_dir, '190 same target name')
+        self.init(testdir, extra_args=['--wipe'])
+        self._run([*self.meson_command, 'compile', '-C', self.builddir, './foo'])
+        self.assertPathExists(os.path.join(self.builddir, get_static_lib_name('foo')))
+        self._run([*self.meson_command, 'compile', '-C', self.builddir, 'sub/foo'])
+        self.assertPathExists(os.path.join(self.builddir, 'sub', get_static_lib_name('foo')))
+
+        # run_target
+
+        testdir = os.path.join(self.common_test_dir, '54 run target')
+        self.init(testdir, extra_args=['--wipe'])
+        out = self._run([*self.meson_command, 'compile', '-C', self.builddir, 'py3hi'])
+        self.assertIn('I am Python3.', out)
+
+        # `--$BACKEND-args`
+
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        if self.backend is Backend.ninja:
+            self.init(testdir, extra_args=['--wipe'])
+            # Dry run - should not create a program
+            self._run([*self.meson_command, 'compile', '-C', self.builddir, '--ninja-args=-n'])
+            self.assertPathDoesNotExist(os.path.join(self.builddir, get_exe_name('trivialprog')))
+        elif self.backend is Backend.vs:
+            self.init(testdir, extra_args=['--wipe'])
+            self._run([*self.meson_command, 'compile', '-C', self.builddir])
+            # Explicitly clean the target through msbuild interface
+            self._run([*self.meson_command, 'compile', '-C', self.builddir, '--vs-args=-t:{}:Clean'.format(re.sub(r'[\%\$\@\;\.\(\)\']', '_', get_exe_name('trivialprog')))])
+            self.assertPathDoesNotExist(os.path.join(self.builddir, get_exe_name('trivialprog')))
+
+    def test_spurious_reconfigure_built_dep_file(self):
+        testdir = os.path.join(self.unit_test_dir, '75 dep files')
+
+        # Regression test: Spurious reconfigure was happening when build
+        # directory is inside source directory.
+        # See https://gitlab.freedesktop.org/gstreamer/gst-build/-/issues/85.
+        srcdir = os.path.join(self.builddir, 'srctree')
+        shutil.copytree(testdir, srcdir)
+        builddir = os.path.join(srcdir, '_build')
+        self.change_builddir(builddir)
+
+        self.init(srcdir)
+        self.build()
+
+        # During first configure the file did not exist so no dependency should
+        # have been set. A rebuild should not trigger a reconfigure.
+        self.clean()
+        out = self.build()
+        self.assertNotIn('Project configured', out)
+
+        self.init(srcdir, extra_args=['--reconfigure'])
+
+        # During the reconfigure the file did exist, but is inside build
+        # directory, so no dependency should have been set. A rebuild should not
+        # trigger a reconfigure.
+        self.clean()
+        out = self.build()
+        self.assertNotIn('Project configured', out)
+
+    def _test_junit(self, case: str) -> None:
+        try:
+            import lxml.etree as et
+        except ImportError:
+            raise unittest.SkipTest('lxml required, but not found.')
+
+        schema = et.XMLSchema(et.parse(str(Path(__file__).parent / 'data' / 'schema.xsd')))
+
+        self.init(case)
+        self.run_tests()
+
+        junit = et.parse(str(Path(self.builddir) / 'meson-logs' / 'testlog.junit.xml'))
+        try:
+            schema.assertValid(junit)
+        except et.DocumentInvalid as e:
+            self.fail(e.error_log)
+
+    def test_junit_valid_tap(self):
+        self._test_junit(os.path.join(self.common_test_dir, '213 tap tests'))
+
+    def test_junit_valid_exitcode(self):
+        self._test_junit(os.path.join(self.common_test_dir, '44 test args'))
+
+    def test_junit_valid_gtest(self):
+        self._test_junit(os.path.join(self.framework_test_dir, '2 gtest'))
+
+    def test_link_language_linker(self):
+        # TODO: there should be some way to query how we're linking things
+        # without resorting to reading the ninja.build file
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('This test reads the ninja file')
+
+        testdir = os.path.join(self.common_test_dir, '232 link language')
+        self.init(testdir)
+
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            contents = f.read()
+
+        self.assertRegex(contents, r'build main(\.exe)?.*: c_LINKER')
+        self.assertRegex(contents, r'build (lib|cyg)?mylib.*: c_LINKER')
+
+    def test_commands_documented(self):
+        '''
+        Test that all listed meson commands are documented in Commands.md.
+        '''
+
+        # The docs directory is not in release tarballs.
+        if not os.path.isdir('docs'):
+            raise unittest.SkipTest('Doc directory does not exist.')
+        doc_path = 'docs/markdown_dynamic/Commands.md'
+
+        md = None
+        with open(doc_path, encoding='utf-8') as f:
+            md = f.read()
+        self.assertIsNotNone(md)
+
+        ## Get command sections
+
+        section_pattern = re.compile(r'^### (.+)$', re.MULTILINE)
+        md_command_section_matches = [i for i in section_pattern.finditer(md)]
+        md_command_sections = dict()
+        for i, s in enumerate(md_command_section_matches):
+            section_end = len(md) if i == len(md_command_section_matches) - 1 else md_command_section_matches[i + 1].start()
+            md_command_sections[s.group(1)] = (s.start(), section_end)
+
+        ## Validate commands
+
+        md_commands = set(k for k,v in md_command_sections.items())
+
+        help_output = self._run(self.meson_command + ['--help'])
+        help_commands = set(c.strip() for c in re.findall(r'usage:(?:.+)?{((?:[a-z]+,*)+?)}', help_output, re.MULTILINE|re.DOTALL)[0].split(','))
+
+        self.assertEqual(md_commands | {'help'}, help_commands, 'Doc file: `{}`'.format(doc_path))
+
+        ## Validate that each section has proper placeholders
+
+        def get_data_pattern(command):
+            return re.compile(
+                r'^```[\r\n]'
+                r'{{ cmd_help\[\'' + command + r'\'\]\[\'usage\'\] }}[\r\n]'
+                r'^```[\r\n]'
+                r'.*?'
+                r'^```[\r\n]'
+                r'{{ cmd_help\[\'' + command + r'\'\]\[\'arguments\'\] }}[\r\n]'
+                r'^```',
+                flags = re.MULTILINE|re.DOTALL)
+
+        for command in md_commands:
+            m = get_data_pattern(command).search(md, pos=md_command_sections[command][0], endpos=md_command_sections[command][1])
+            self.assertIsNotNone(m, 'Command `{}` is missing placeholders for dynamic data. Doc file: `{}`'.format(command, doc_path))
+
+    def _check_coverage_files(self, types=('text', 'xml', 'html')):
+        covdir = Path(self.builddir) / 'meson-logs'
+        files = []
+        if 'text' in types:
+            files.append('coverage.txt')
+        if 'xml' in types:
+            files.append('coverage.xml')
+        if 'html' in types:
+            files.append('coveragereport/index.html')
+        for f in files:
+            self.assertTrue((covdir / f).is_file(), msg='{} is not a file'.format(f))
+
+    def test_coverage(self):
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with coverage on MSYS2')
+        gcovr_exe, gcovr_new_rootdir = mesonbuild.environment.detect_gcovr()
+        if not gcovr_exe:
+            raise unittest.SkipTest('gcovr not found, or too old')
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_id() == 'clang':
+            if not mesonbuild.environment.detect_llvm_cov():
+                raise unittest.SkipTest('llvm-cov not found')
+        if cc.get_id() == 'msvc':
+            raise unittest.SkipTest('Test only applies to non-MSVC compilers')
+        self.init(testdir, extra_args=['-Db_coverage=true'])
+        self.build()
+        self.run_tests()
+        self.run_target('coverage')
+        self._check_coverage_files()
+
+    def test_coverage_complex(self):
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with coverage on MSYS2')
+        gcovr_exe, gcovr_new_rootdir = mesonbuild.environment.detect_gcovr()
+        if not gcovr_exe:
+            raise unittest.SkipTest('gcovr not found, or too old')
+        testdir = os.path.join(self.common_test_dir, '109 generatorcustom')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_id() == 'clang':
+            if not mesonbuild.environment.detect_llvm_cov():
+                raise unittest.SkipTest('llvm-cov not found')
+        if cc.get_id() == 'msvc':
+            raise unittest.SkipTest('Test only applies to non-MSVC compilers')
+        self.init(testdir, extra_args=['-Db_coverage=true'])
+        self.build()
+        self.run_tests()
+        self.run_target('coverage')
+        self._check_coverage_files()
+
+    def test_coverage_html(self):
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with coverage on MSYS2')
+        gcovr_exe, gcovr_new_rootdir = mesonbuild.environment.detect_gcovr()
+        if not gcovr_exe:
+            raise unittest.SkipTest('gcovr not found, or too old')
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_id() == 'clang':
+            if not mesonbuild.environment.detect_llvm_cov():
+                raise unittest.SkipTest('llvm-cov not found')
+        if cc.get_id() == 'msvc':
+            raise unittest.SkipTest('Test only applies to non-MSVC compilers')
+        self.init(testdir, extra_args=['-Db_coverage=true'])
+        self.build()
+        self.run_tests()
+        self.run_target('coverage-html')
+        self._check_coverage_files(['html'])
+
+    def test_coverage_text(self):
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with coverage on MSYS2')
+        gcovr_exe, gcovr_new_rootdir = mesonbuild.environment.detect_gcovr()
+        if not gcovr_exe:
+            raise unittest.SkipTest('gcovr not found, or too old')
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_id() == 'clang':
+            if not mesonbuild.environment.detect_llvm_cov():
+                raise unittest.SkipTest('llvm-cov not found')
+        if cc.get_id() == 'msvc':
+            raise unittest.SkipTest('Test only applies to non-MSVC compilers')
+        self.init(testdir, extra_args=['-Db_coverage=true'])
+        self.build()
+        self.run_tests()
+        self.run_target('coverage-text')
+        self._check_coverage_files(['text'])
+
+    def test_coverage_xml(self):
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with coverage on MSYS2')
+        gcovr_exe, gcovr_new_rootdir = mesonbuild.environment.detect_gcovr()
+        if not gcovr_exe:
+            raise unittest.SkipTest('gcovr not found, or too old')
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_id() == 'clang':
+            if not mesonbuild.environment.detect_llvm_cov():
+                raise unittest.SkipTest('llvm-cov not found')
+        if cc.get_id() == 'msvc':
+            raise unittest.SkipTest('Test only applies to non-MSVC compilers')
+        self.init(testdir, extra_args=['-Db_coverage=true'])
+        self.build()
+        self.run_tests()
+        self.run_target('coverage-xml')
+        self._check_coverage_files(['xml'])
+
+    def test_cross_file_constants(self):
+        with temp_filename() as crossfile1, temp_filename() as crossfile2:
+            with open(crossfile1, 'w') as f:
+                f.write(textwrap.dedent(
+                    '''
+                    [constants]
+                    compiler = 'gcc'
+                    '''))
+            with open(crossfile2, 'w') as f:
+                f.write(textwrap.dedent(
+                    '''
+                    [constants]
+                    toolchain = '/toolchain/'
+                    common_flags = ['--sysroot=' + toolchain / 'sysroot']
+
+                    [properties]
+                    c_args = common_flags + ['-DSOMETHING']
+                    cpp_args = c_args + ['-DSOMETHING_ELSE']
+
+                    [binaries]
+                    c = toolchain / compiler
+                    '''))
+
+            values = mesonbuild.coredata.parse_machine_files([crossfile1, crossfile2])
+            self.assertEqual(values['binaries']['c'], '/toolchain/gcc')
+            self.assertEqual(values['properties']['c_args'],
+                             ['--sysroot=/toolchain/sysroot', '-DSOMETHING'])
+            self.assertEqual(values['properties']['cpp_args'],
+                             ['--sysroot=/toolchain/sysroot', '-DSOMETHING', '-DSOMETHING_ELSE'])
+
+    @unittest.skipIf(is_windows(), 'Directory cleanup fails for some reason')
+    def test_wrap_git(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            srcdir = os.path.join(tmpdir, 'src')
+            shutil.copytree(os.path.join(self.unit_test_dir, '78 wrap-git'), srcdir)
+            upstream = os.path.join(srcdir, 'subprojects', 'wrap_git_upstream')
+            upstream_uri = Path(upstream).as_uri()
+            _git_init(upstream)
+            with open(os.path.join(srcdir, 'subprojects', 'wrap_git.wrap'), 'w') as f:
+                f.write(textwrap.dedent('''
+                  [wrap-git]
+                  url = {}
+                  patch_directory = wrap_git_builddef
+                  revision = master
+                '''.format(upstream_uri)))
+            self.init(srcdir)
+            self.build()
+            self.run_tests()
+
+    def test_multi_output_custom_target_no_warning(self):
+        testdir = os.path.join(self.common_test_dir, '235 custom_target source')
+
+        out = self.init(testdir)
+        self.assertNotRegex(out, 'WARNING:.*Using the first one.')
+        self.build()
+        self.run_tests()
+
+class FailureTests(BasePlatformTests):
+    '''
+    Tests that test failure conditions. Build files here should be dynamically
+    generated and static tests should go into `test cases/failing*`.
+    This is useful because there can be many ways in which a particular
+    function can fail, and creating failing tests for all of them is tedious
+    and slows down testing.
+    '''
+    dnf = "[Dd]ependency.*not found(:.*)?"
+    nopkg = '[Pp]kg-config.*not found'
+
+    def setUp(self):
+        super().setUp()
+        self.srcdir = os.path.realpath(tempfile.mkdtemp())
+        self.mbuild = os.path.join(self.srcdir, 'meson.build')
+        self.moptions = os.path.join(self.srcdir, 'meson_options.txt')
+
+    def tearDown(self):
+        super().tearDown()
+        windows_proof_rmtree(self.srcdir)
+
+    def assertMesonRaises(self, contents, match, *,
+                          extra_args=None,
+                          langs=None,
+                          meson_version=None,
+                          options=None,
+                          override_envvars=None):
+        '''
+        Assert that running meson configure on the specified @contents raises
+        a error message matching regex @match.
+        '''
+        if langs is None:
+            langs = []
+        with open(self.mbuild, 'w') as f:
+            f.write("project('failure test', 'c', 'cpp'")
+            if meson_version:
+                f.write(", meson_version: '{}'".format(meson_version))
+            f.write(")\n")
+            for lang in langs:
+                f.write("add_languages('{}', required : false)\n".format(lang))
+            f.write(contents)
+        if options is not None:
+            with open(self.moptions, 'w') as f:
+                f.write(options)
+        o = {'MESON_FORCE_BACKTRACE': '1'}
+        if override_envvars is None:
+            override_envvars = o
+        else:
+            override_envvars.update(o)
+        # Force tracebacks so we can detect them properly
+        with self.assertRaisesRegex(MesonException, match, msg=contents):
+            # Must run in-process or we'll get a generic CalledProcessError
+            self.init(self.srcdir, extra_args=extra_args,
+                      inprocess=True,
+                      override_envvars = override_envvars)
+
+    def obtainMesonOutput(self, contents, match, extra_args, langs, meson_version=None):
+        if langs is None:
+            langs = []
+        with open(self.mbuild, 'w') as f:
+            f.write("project('output test', 'c', 'cpp'")
+            if meson_version:
+                f.write(", meson_version: '{}'".format(meson_version))
+            f.write(")\n")
+            for lang in langs:
+                f.write("add_languages('{}', required : false)\n".format(lang))
+            f.write(contents)
+        # Run in-process for speed and consistency with assertMesonRaises
+        return self.init(self.srcdir, extra_args=extra_args, inprocess=True)
+
+    def assertMesonOutputs(self, contents, match, extra_args=None, langs=None, meson_version=None):
+        '''
+        Assert that running meson configure on the specified @contents outputs
+        something that matches regex @match.
+        '''
+        out = self.obtainMesonOutput(contents, match, extra_args, langs, meson_version)
+        self.assertRegex(out, match)
+
+    def assertMesonDoesNotOutput(self, contents, match, extra_args=None, langs=None, meson_version=None):
+        '''
+        Assert that running meson configure on the specified @contents does not output
+        something that matches regex @match.
+        '''
+        out = self.obtainMesonOutput(contents, match, extra_args, langs, meson_version)
+        self.assertNotRegex(out, match)
+
+    @skipIfNoPkgconfig
+    def test_dependency(self):
+        if subprocess.call(['pkg-config', '--exists', 'zlib']) != 0:
+            raise unittest.SkipTest('zlib not found with pkg-config')
+        a = (("dependency('zlib', method : 'fail')", "'fail' is invalid"),
+             ("dependency('zlib', static : '1')", "[Ss]tatic.*boolean"),
+             ("dependency('zlib', version : 1)", "Item must be a list or one of "),
+             ("dependency('zlib', required : 1)", "[Rr]equired.*boolean"),
+             ("dependency('zlib', method : 1)", "[Mm]ethod.*string"),
+             ("dependency('zlibfail')", self.dnf),)
+        for contents, match in a:
+            self.assertMesonRaises(contents, match)
+
+    def test_apple_frameworks_dependency(self):
+        if not is_osx():
+            raise unittest.SkipTest('only run on macOS')
+        self.assertMesonRaises("dependency('appleframeworks')",
+                               "requires at least one module")
+
+    def test_extraframework_dependency_method(self):
+        code = "dependency('python', method : 'extraframework')"
+        if not is_osx():
+            self.assertMesonRaises(code, self.dnf)
+        else:
+            # Python2 framework is always available on macOS
+            self.assertMesonOutputs(code, '[Dd]ependency.*python.*found.*YES')
+
+    def test_sdl2_notfound_dependency(self):
+        # Want to test failure, so skip if available
+        if shutil.which('sdl2-config'):
+            raise unittest.SkipTest('sdl2-config found')
+        self.assertMesonRaises("dependency('sdl2', method : 'sdlconfig')", self.dnf)
+        if shutil.which('pkg-config'):
+            self.assertMesonRaises("dependency('sdl2', method : 'pkg-config')", self.dnf)
+        with no_pkgconfig():
+            # Look for pkg-config, cache it, then
+            # Use cached pkg-config without erroring out, then
+            # Use cached pkg-config to error out
+            code = "dependency('foobarrr', method : 'pkg-config', required : false)\n" \
+                "dependency('foobarrr2', method : 'pkg-config', required : false)\n" \
+                "dependency('sdl2', method : 'pkg-config')"
+            self.assertMesonRaises(code, self.nopkg)
+
+    def test_gnustep_notfound_dependency(self):
+        # Want to test failure, so skip if available
+        if shutil.which('gnustep-config'):
+            raise unittest.SkipTest('gnustep-config found')
+        self.assertMesonRaises("dependency('gnustep')",
+                               "(requires a Objc compiler|{})".format(self.dnf),
+                               langs = ['objc'])
+
+    def test_wx_notfound_dependency(self):
+        # Want to test failure, so skip if available
+        if shutil.which('wx-config-3.0') or shutil.which('wx-config') or shutil.which('wx-config-gtk3'):
+            raise unittest.SkipTest('wx-config, wx-config-3.0 or wx-config-gtk3 found')
+        self.assertMesonRaises("dependency('wxwidgets')", self.dnf)
+        self.assertMesonOutputs("dependency('wxwidgets', required : false)",
+                                "Run-time dependency .*WxWidgets.* found: .*NO.*")
+
+    def test_wx_dependency(self):
+        if not shutil.which('wx-config-3.0') and not shutil.which('wx-config') and not shutil.which('wx-config-gtk3'):
+            raise unittest.SkipTest('Neither wx-config, wx-config-3.0 nor wx-config-gtk3 found')
+        self.assertMesonRaises("dependency('wxwidgets', modules : 1)",
+                               "module argument is not a string")
+
+    def test_llvm_dependency(self):
+        self.assertMesonRaises("dependency('llvm', modules : 'fail')",
+                               "(required.*fail|{})".format(self.dnf))
+
+    def test_boost_notfound_dependency(self):
+        # Can be run even if Boost is found or not
+        self.assertMesonRaises("dependency('boost', modules : 1)",
+                               "module.*not a string")
+        self.assertMesonRaises("dependency('boost', modules : 'fail')",
+                               "(fail.*not found|{})".format(self.dnf))
+
+    def test_boost_BOOST_ROOT_dependency(self):
+        # Test BOOST_ROOT; can be run even if Boost is found or not
+        self.assertMesonRaises("dependency('boost')",
+                               "(BOOST_ROOT.*absolute|{})".format(self.dnf),
+                               override_envvars = {'BOOST_ROOT': 'relative/path'})
+
+    def test_dependency_invalid_method(self):
+        code = '''zlib_dep = dependency('zlib', required : false)
+        zlib_dep.get_configtool_variable('foo')
+        '''
+        self.assertMesonRaises(code, ".* is not a config-tool dependency")
+        code = '''zlib_dep = dependency('zlib', required : false)
+        dep = declare_dependency(dependencies : zlib_dep)
+        dep.get_pkgconfig_variable('foo')
+        '''
+        self.assertMesonRaises(code, "Method.*pkgconfig.*is invalid.*internal")
+        code = '''zlib_dep = dependency('zlib', required : false)
+        dep = declare_dependency(dependencies : zlib_dep)
+        dep.get_configtool_variable('foo')
+        '''
+        self.assertMesonRaises(code, "Method.*configtool.*is invalid.*internal")
+
+    def test_objc_cpp_detection(self):
+        '''
+        Test that when we can't detect objc or objcpp, we fail gracefully.
+        '''
+        env = get_fake_env()
+        try:
+            env.detect_objc_compiler(MachineChoice.HOST)
+            env.detect_objcpp_compiler(MachineChoice.HOST)
+        except EnvironmentException:
+            code = "add_languages('objc')\nadd_languages('objcpp')"
+            self.assertMesonRaises(code, "Unknown compiler")
+            return
+        raise unittest.SkipTest("objc and objcpp found, can't test detection failure")
+
+    def test_subproject_variables(self):
+        '''
+        Test that:
+        1. The correct message is outputted when a not-required dep is not
+           found and the fallback subproject is also not found.
+        2. A not-required fallback dependency is not found because the
+           subproject failed to parse.
+        3. A not-found not-required dep with a fallback subproject outputs the
+           correct message when the fallback subproject is found but the
+           variable inside it is not.
+        4. A fallback dependency is found from the subproject parsed in (3)
+        5. The correct message is outputted when the .wrap file is missing for
+           a sub-subproject.
+        '''
+        tdir = os.path.join(self.unit_test_dir, '20 subproj dep variables')
+        out = self.init(tdir, inprocess=True)
+        self.assertRegex(out, r"Subproject directory not found and .*nosubproj.wrap.* file not found")
+        self.assertRegex(out, r'Function does not take positional arguments.')
+        self.assertRegex(out, r'WARNING:.* Dependency .*subsubproject.* not found but it is available in a sub-subproject.')
+        self.assertRegex(out, r'Subproject directory not found and .*subsubproject.wrap.* file not found')
+        self.assertRegex(out, r'Dependency .*zlibproxy.* from subproject .*subprojects.*somesubproj.* found: .*YES.*')
+
+    def test_exception_exit_status(self):
+        '''
+        Test exit status on python exception
+        '''
+        tdir = os.path.join(self.unit_test_dir, '21 exit status')
+        with self.assertRaises(subprocess.CalledProcessError) as cm:
+            self.init(tdir, inprocess=False, override_envvars = {'MESON_UNIT_TEST': '1'})
+        self.assertEqual(cm.exception.returncode, 2)
+        self.wipe()
+
+    def test_dict_requires_key_value_pairs(self):
+        self.assertMesonRaises("dict = {3, 'foo': 'bar'}",
+                               'Only key:value pairs are valid in dict construction.')
+        self.assertMesonRaises("{'foo': 'bar', 3}",
+                               'Only key:value pairs are valid in dict construction.')
+
+    def test_dict_forbids_duplicate_keys(self):
+        self.assertMesonRaises("dict = {'a': 41, 'a': 42}",
+                               'Duplicate dictionary key: a.*')
+
+    def test_dict_forbids_integer_key(self):
+        self.assertMesonRaises("dict = {3: 'foo'}",
+                               'Key must be a string.*')
+
+    def test_using_too_recent_feature(self):
+        # Here we use a dict, which was introduced in 0.47.0
+        self.assertMesonOutputs("dict = {}",
+                                ".*WARNING.*Project targeting.*but.*",
+                                meson_version='>= 0.46.0')
+
+    def test_using_recent_feature(self):
+        # Same as above, except the meson version is now appropriate
+        self.assertMesonDoesNotOutput("dict = {}",
+                                      ".*WARNING.*Project targeting.*but.*",
+                                      meson_version='>= 0.47')
+
+    def test_using_too_recent_feature_dependency(self):
+        self.assertMesonOutputs("dependency('pcap', required: false)",
+                                ".*WARNING.*Project targeting.*but.*",
+                                meson_version='>= 0.41.0')
+
+    def test_vcs_tag_featurenew_build_always_stale(self):
+        'https://github.com/mesonbuild/meson/issues/3904'
+        vcs_tag = '''version_data = configuration_data()
+        version_data.set('PROJVER', '@VCS_TAG@')
+        vf = configure_file(output : 'version.h.in', configuration: version_data)
+        f = vcs_tag(input : vf, output : 'version.h')
+        '''
+        msg = '.*WARNING:.*feature.*build_always_stale.*custom_target.*'
+        self.assertMesonDoesNotOutput(vcs_tag, msg, meson_version='>=0.43')
+
+    def test_missing_subproject_not_required_and_required(self):
+        self.assertMesonRaises("sub1 = subproject('not-found-subproject', required: false)\n" +
+                               "sub2 = subproject('not-found-subproject', required: true)",
+                               """.*Subproject "subprojects/not-found-subproject" required but not found.*""")
+
+    def test_get_variable_on_not_found_project(self):
+        self.assertMesonRaises("sub1 = subproject('not-found-subproject', required: false)\n" +
+                               "sub1.get_variable('naaa')",
+                               """Subproject "subprojects/not-found-subproject" disabled can't get_variable on it.""")
+
+    def test_version_checked_before_parsing_options(self):
+        '''
+        https://github.com/mesonbuild/meson/issues/5281
+        '''
+        options = "option('some-option', type: 'foo', value: '')"
+        match = 'Meson version is.*but project requires >=2000'
+        self.assertMesonRaises("", match, meson_version='>=2000', options=options)
+
+    def test_assert_default_message(self):
+        self.assertMesonRaises("k1 = 'a'\n" +
+                               "assert({\n" +
+                               "  k1: 1,\n" +
+                               "}['a'] == 2)\n",
+                               r"Assert failed: {k1 : 1}\['a'\] == 2")
+
+    def test_wrap_nofallback(self):
+        self.assertMesonRaises("dependency('notfound', fallback : ['foo', 'foo_dep'])",
+                               r"Dependency \'notfound\' not found and fallback is disabled",
+                               extra_args=['--wrap-mode=nofallback'])
+
+    def test_message(self):
+        self.assertMesonOutputs("message('Array:', ['a', 'b'])",
+                                r"Message:.* Array: \['a', 'b'\]")
+
+    def test_warning(self):
+        self.assertMesonOutputs("warning('Array:', ['a', 'b'])",
+                                r"WARNING:.* Array: \['a', 'b'\]")
+
+    def test_override_dependency_twice(self):
+        self.assertMesonRaises("meson.override_dependency('foo', declare_dependency())\n" +
+                               "meson.override_dependency('foo', declare_dependency())",
+                               """Tried to override dependency 'foo' which has already been resolved or overridden""")
+
+    @unittest.skipIf(is_windows(), 'zlib is not available on Windows')
+    def test_override_resolved_dependency(self):
+        self.assertMesonRaises("dependency('zlib')\n" +
+                               "meson.override_dependency('zlib', declare_dependency())",
+                               """Tried to override dependency 'zlib' which has already been resolved or overridden""")
+
+@unittest.skipUnless(is_windows() or is_cygwin(), "requires Windows (or Windows via Cygwin)")
+class WindowsTests(BasePlatformTests):
+    '''
+    Tests that should run on Cygwin, MinGW, and MSVC
+    '''
+
+    def setUp(self):
+        super().setUp()
+        self.platform_test_dir = os.path.join(self.src_root, 'test cases/windows')
+
+    @unittest.skipIf(is_cygwin(), 'Test only applicable to Windows')
+    def test_find_program(self):
+        '''
+        Test that Windows-specific edge-cases in find_program are functioning
+        correctly. Cannot be an ordinary test because it involves manipulating
+        PATH to point to a directory with Python scripts.
+        '''
+        testdir = os.path.join(self.platform_test_dir, '8 find program')
+        # Find `cmd` and `cmd.exe`
+        prog1 = ExternalProgram('cmd')
+        self.assertTrue(prog1.found(), msg='cmd not found')
+        prog2 = ExternalProgram('cmd.exe')
+        self.assertTrue(prog2.found(), msg='cmd.exe not found')
+        self.assertPathEqual(prog1.get_path(), prog2.get_path())
+        # Find cmd.exe with args without searching
+        prog = ExternalProgram('cmd', command=['cmd', '/C'])
+        self.assertTrue(prog.found(), msg='cmd not found with args')
+        self.assertPathEqual(prog.get_command()[0], 'cmd')
+        # Find cmd with an absolute path that's missing the extension
+        cmd_path = prog2.get_path()[:-4]
+        prog = ExternalProgram(cmd_path)
+        self.assertTrue(prog.found(), msg='{!r} not found'.format(cmd_path))
+        # Finding a script with no extension inside a directory works
+        prog = ExternalProgram(os.path.join(testdir, 'test-script'))
+        self.assertTrue(prog.found(), msg='test-script not found')
+        # Finding a script with an extension inside a directory works
+        prog = ExternalProgram(os.path.join(testdir, 'test-script-ext.py'))
+        self.assertTrue(prog.found(), msg='test-script-ext.py not found')
+        # Finding a script in PATH
+        os.environ['PATH'] += os.pathsep + testdir
+        # If `.PY` is in PATHEXT, scripts can be found as programs
+        if '.PY' in [ext.upper() for ext in os.environ['PATHEXT'].split(';')]:
+            # Finding a script in PATH w/o extension works and adds the interpreter
+            prog = ExternalProgram('test-script-ext')
+            self.assertTrue(prog.found(), msg='test-script-ext not found in PATH')
+            self.assertPathEqual(prog.get_command()[0], python_command[0])
+            self.assertPathBasenameEqual(prog.get_path(), 'test-script-ext.py')
+        # Finding a script in PATH with extension works and adds the interpreter
+        prog = ExternalProgram('test-script-ext.py')
+        self.assertTrue(prog.found(), msg='test-script-ext.py not found in PATH')
+        self.assertPathEqual(prog.get_command()[0], python_command[0])
+        self.assertPathBasenameEqual(prog.get_path(), 'test-script-ext.py')
+        # Using a script with an extension directly via command= works and adds the interpreter
+        prog = ExternalProgram('test-script-ext.py', command=[os.path.join(testdir, 'test-script-ext.py'), '--help'])
+        self.assertTrue(prog.found(), msg='test-script-ext.py with full path not picked up via command=')
+        self.assertPathEqual(prog.get_command()[0], python_command[0])
+        self.assertPathEqual(prog.get_command()[2], '--help')
+        self.assertPathBasenameEqual(prog.get_path(), 'test-script-ext.py')
+        # Using a script without an extension directly via command= works and adds the interpreter
+        prog = ExternalProgram('test-script', command=[os.path.join(testdir, 'test-script'), '--help'])
+        self.assertTrue(prog.found(), msg='test-script with full path not picked up via command=')
+        self.assertPathEqual(prog.get_command()[0], python_command[0])
+        self.assertPathEqual(prog.get_command()[2], '--help')
+        self.assertPathBasenameEqual(prog.get_path(), 'test-script')
+        # Ensure that WindowsApps gets removed from PATH
+        path = os.environ['PATH']
+        if 'WindowsApps' not in path:
+            username = os.environ['USERNAME']
+            appstore_dir = r'C:\Users\{}\AppData\Local\Microsoft\WindowsApps'.format(username)
+            path = os.pathsep + appstore_dir
+        path = ExternalProgram._windows_sanitize_path(path)
+        self.assertNotIn('WindowsApps', path)
+
+    def test_ignore_libs(self):
+        '''
+        Test that find_library on libs that are to be ignored returns an empty
+        array of arguments. Must be a unit test because we cannot inspect
+        ExternalLibraryHolder from build files.
+        '''
+        testdir = os.path.join(self.platform_test_dir, '1 basic')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_argument_syntax() != 'msvc':
+            raise unittest.SkipTest('Not using MSVC')
+        # To force people to update this test, and also test
+        self.assertEqual(set(cc.ignore_libs), {'c', 'm', 'pthread', 'dl', 'rt', 'execinfo'})
+        for l in cc.ignore_libs:
+            self.assertEqual(cc.find_library(l, env, []), [])
+
+    def test_rc_depends_files(self):
+        testdir = os.path.join(self.platform_test_dir, '5 resources')
+
+        # resource compiler depfile generation is not yet implemented for msvc
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        depfile_works = env.detect_c_compiler(MachineChoice.HOST).get_id() not in {'msvc', 'clang-cl', 'intel-cl'}
+
+        self.init(testdir)
+        self.build()
+        # Immediately rebuilding should not do anything
+        self.assertBuildIsNoop()
+        # Test compile_resources(depend_file:)
+        # Changing mtime of sample.ico should rebuild prog
+        self.utime(os.path.join(testdir, 'res', 'sample.ico'))
+        self.assertRebuiltTarget('prog')
+        # Test depfile generation by compile_resources
+        # Changing mtime of resource.h should rebuild myres.rc and then prog
+        if depfile_works:
+            self.utime(os.path.join(testdir, 'inc', 'resource', 'resource.h'))
+            self.assertRebuiltTarget('prog')
+        self.wipe()
+
+        if depfile_works:
+            testdir = os.path.join(self.platform_test_dir, '12 resources with custom targets')
+            self.init(testdir)
+            self.build()
+            # Immediately rebuilding should not do anything
+            self.assertBuildIsNoop()
+            # Changing mtime of resource.h should rebuild myres_1.rc and then prog_1
+            self.utime(os.path.join(testdir, 'res', 'resource.h'))
+            self.assertRebuiltTarget('prog_1')
+
+    def test_msvc_cpp17(self):
+        testdir = os.path.join(self.unit_test_dir, '45 vscpp17')
+
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_argument_syntax() != 'msvc':
+            raise unittest.SkipTest('Test only applies to MSVC-like compilers')
+
+        try:
+            self.init(testdir)
+        except subprocess.CalledProcessError:
+            # According to Python docs, output is only stored when
+            # using check_output. We don't use it, so we can't check
+            # that the output is correct (i.e. that it failed due
+            # to the right reason).
+            return
+        self.build()
+
+    def test_install_pdb_introspection(self):
+        testdir = os.path.join(self.platform_test_dir, '1 basic')
+
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.get_argument_syntax() != 'msvc':
+            raise unittest.SkipTest('Test only applies to MSVC-like compilers')
+
+        self.init(testdir)
+        installed = self.introspect('--installed')
+        files = [os.path.basename(path) for path in installed.values()]
+
+        self.assertTrue('prog.pdb' in files)
+
+    def _check_ld(self, name: str, lang: str, expected: str) -> None:
+        if not shutil.which(name):
+            raise unittest.SkipTest('Could not find {}.'.format(name))
+        envvars = [mesonbuild.envconfig.BinaryTable.evarMap['{}_ld'.format(lang)]]
+
+        # Also test a deprecated variable if there is one.
+        if envvars[0] in mesonbuild.envconfig.BinaryTable.DEPRECATION_MAP:
+            envvars.append(
+                mesonbuild.envconfig.BinaryTable.DEPRECATION_MAP[envvars[0]])
+
+        for envvar in envvars:
+            with mock.patch.dict(os.environ, {envvar: name}):
+                env = get_fake_env()
+                try:
+                    comp = getattr(env, 'detect_{}_compiler'.format(lang))(MachineChoice.HOST)
+                except EnvironmentException:
+                    raise unittest.SkipTest('Could not find a compiler for {}'.format(lang))
+                self.assertEqual(comp.linker.id, expected)
+
+    def test_link_environment_variable_lld_link(self):
+        env = get_fake_env()
+        comp = getattr(env, 'detect_c_compiler')(MachineChoice.HOST)
+        if isinstance(comp, mesonbuild.compilers.GnuLikeCompiler):
+            raise unittest.SkipTest('GCC cannot be used with link compatible linkers.')
+        self._check_ld('lld-link', 'c', 'lld-link')
+
+    def test_link_environment_variable_link(self):
+        env = get_fake_env()
+        comp = getattr(env, 'detect_c_compiler')(MachineChoice.HOST)
+        if isinstance(comp, mesonbuild.compilers.GnuLikeCompiler):
+            raise unittest.SkipTest('GCC cannot be used with link compatible linkers.')
+        self._check_ld('link', 'c', 'link')
+
+    def test_link_environment_variable_optlink(self):
+        env = get_fake_env()
+        comp = getattr(env, 'detect_c_compiler')(MachineChoice.HOST)
+        if isinstance(comp, mesonbuild.compilers.GnuLikeCompiler):
+            raise unittest.SkipTest('GCC cannot be used with link compatible linkers.')
+        self._check_ld('optlink', 'c', 'optlink')
+
+    @skip_if_not_language('rust')
+    def test_link_environment_variable_rust(self):
+        self._check_ld('link', 'rust', 'link')
+
+    @skip_if_not_language('d')
+    def test_link_environment_variable_d(self):
+        env = get_fake_env()
+        comp = getattr(env, 'detect_d_compiler')(MachineChoice.HOST)
+        if comp.id == 'dmd':
+            raise unittest.SkipTest('meson cannot reliably make DMD use a different linker.')
+        self._check_ld('lld-link', 'd', 'lld-link')
+
+    def test_pefile_checksum(self):
+        try:
+            import pefile
+        except ImportError:
+            if is_ci():
+                raise
+            raise unittest.SkipTest('pefile module not found')
+        testdir = os.path.join(self.common_test_dir, '6 linkshared')
+        self.init(testdir, extra_args=['--buildtype=release'])
+        self.build()
+        # Test that binaries have a non-zero checksum
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        cc_id = cc.get_id()
+        ld_id = cc.get_linker_id()
+        dll = glob(os.path.join(self.builddir, '*mycpplib.dll'))[0]
+        exe = os.path.join(self.builddir, 'cppprog.exe')
+        for f in (dll, exe):
+            pe = pefile.PE(f)
+            msg = 'PE file: {!r}, compiler: {!r}, linker: {!r}'.format(f, cc_id, ld_id)
+            if cc_id == 'clang-cl':
+                # Latest clang-cl tested (7.0) does not write checksums out
+                self.assertFalse(pe.verify_checksum(), msg=msg)
+            else:
+                # Verify that a valid checksum was written by all other compilers
+                self.assertTrue(pe.verify_checksum(), msg=msg)
+
+    def test_qt5dependency_vscrt(self):
+        '''
+        Test that qt5 dependencies use the debug module suffix when b_vscrt is
+        set to 'mdd'
+        '''
+        # Verify that the `b_vscrt` option is available
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if 'b_vscrt' not in cc.base_options:
+            raise unittest.SkipTest('Compiler does not support setting the VS CRT')
+        # Verify that qmake is for Qt5
+        if not shutil.which('qmake-qt5'):
+            if not shutil.which('qmake') and not is_ci():
+                raise unittest.SkipTest('QMake not found')
+            output = subprocess.getoutput('qmake --version')
+            if 'Qt version 5' not in output and not is_ci():
+                raise unittest.SkipTest('Qmake found, but it is not for Qt 5.')
+        # Setup with /MDd
+        testdir = os.path.join(self.framework_test_dir, '4 qt')
+        self.init(testdir, extra_args=['-Db_vscrt=mdd'])
+        # Verify that we're linking to the debug versions of Qt DLLs
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            contents = f.read()
+            m = re.search('build qt5core.exe: cpp_LINKER.*Qt5Cored.lib', contents)
+        self.assertIsNotNone(m, msg=contents)
+
+    def test_compiler_checks_vscrt(self):
+        '''
+        Test that the correct VS CRT is used when running compiler checks
+        '''
+        # Verify that the `b_vscrt` option is available
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if 'b_vscrt' not in cc.base_options:
+            raise unittest.SkipTest('Compiler does not support setting the VS CRT')
+
+        def sanitycheck_vscrt(vscrt):
+            checks = self.get_meson_log_sanitychecks()
+            self.assertTrue(len(checks) > 0)
+            for check in checks:
+                self.assertIn(vscrt, check)
+
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        self.init(testdir)
+        sanitycheck_vscrt('/MDd')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Dbuildtype=debugoptimized'])
+        sanitycheck_vscrt('/MD')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Dbuildtype=release'])
+        sanitycheck_vscrt('/MD')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Db_vscrt=md'])
+        sanitycheck_vscrt('/MD')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Db_vscrt=mdd'])
+        sanitycheck_vscrt('/MDd')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Db_vscrt=mt'])
+        sanitycheck_vscrt('/MT')
+
+        self.new_builddir()
+        self.init(testdir, extra_args=['-Db_vscrt=mtd'])
+        sanitycheck_vscrt('/MTd')
+
+
+@unittest.skipUnless(is_osx(), "requires Darwin")
+class DarwinTests(BasePlatformTests):
+    '''
+    Tests that should run on macOS
+    '''
+
+    def setUp(self):
+        super().setUp()
+        self.platform_test_dir = os.path.join(self.src_root, 'test cases/osx')
+
+    def test_apple_bitcode(self):
+        '''
+        Test that -fembed-bitcode is correctly added while compiling and
+        -bitcode_bundle is added while linking when b_bitcode is true and not
+        when it is false.  This can't be an ordinary test case because we need
+        to inspect the compiler database.
+        '''
+        testdir = os.path.join(self.platform_test_dir, '7 bitcode')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        if cc.id != 'clang':
+            raise unittest.SkipTest('Not using Clang on OSX')
+        # Try with bitcode enabled
+        out = self.init(testdir, extra_args='-Db_bitcode=true')
+        # Warning was printed
+        self.assertRegex(out, 'WARNING:.*b_bitcode')
+        # Compiler options were added
+        for compdb in self.get_compdb():
+            if 'module' in compdb['file']:
+                self.assertNotIn('-fembed-bitcode', compdb['command'])
+            else:
+                self.assertIn('-fembed-bitcode', compdb['command'])
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        # Linker options were added
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            contents = f.read()
+            m = re.search('LINK_ARGS =.*-bitcode_bundle', contents)
+        self.assertIsNotNone(m, msg=contents)
+        # Try with bitcode disabled
+        self.setconf('-Db_bitcode=false')
+        # Regenerate build
+        self.build()
+        for compdb in self.get_compdb():
+            self.assertNotIn('-fembed-bitcode', compdb['command'])
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            contents = f.read()
+            m = re.search('LINK_ARGS =.*-bitcode_bundle', contents)
+        self.assertIsNone(m, msg=contents)
+
+    def test_apple_bitcode_modules(self):
+        '''
+        Same as above, just for shared_module()
+        '''
+        testdir = os.path.join(self.common_test_dir, '152 shared module resolving symbol in executable')
+        # Ensure that it builds even with bitcode enabled
+        self.init(testdir, extra_args='-Db_bitcode=true')
+        self.build()
+        self.run_tests()
+
+    def _get_darwin_versions(self, fname):
+        fname = os.path.join(self.builddir, fname)
+        out = subprocess.check_output(['otool', '-L', fname], universal_newlines=True)
+        m = re.match(r'.*version (.*), current version (.*)\)', out.split('\n')[1])
+        self.assertIsNotNone(m, msg=out)
+        return m.groups()
+
+    @skipIfNoPkgconfig
+    def test_library_versioning(self):
+        '''
+        Ensure that compatibility_version and current_version are set correctly
+        '''
+        testdir = os.path.join(self.platform_test_dir, '2 library versions')
+        self.init(testdir)
+        self.build()
+        targets = {}
+        for t in self.introspect('--targets'):
+            targets[t['name']] = t['filename'][0] if isinstance(t['filename'], list) else t['filename']
+        self.assertEqual(self._get_darwin_versions(targets['some']), ('7.0.0', '7.0.0'))
+        self.assertEqual(self._get_darwin_versions(targets['noversion']), ('0.0.0', '0.0.0'))
+        self.assertEqual(self._get_darwin_versions(targets['onlyversion']), ('1.0.0', '1.0.0'))
+        self.assertEqual(self._get_darwin_versions(targets['onlysoversion']), ('5.0.0', '5.0.0'))
+        self.assertEqual(self._get_darwin_versions(targets['intver']), ('2.0.0', '2.0.0'))
+        self.assertEqual(self._get_darwin_versions(targets['stringver']), ('2.3.0', '2.3.0'))
+        self.assertEqual(self._get_darwin_versions(targets['stringlistver']), ('2.4.0', '2.4.0'))
+        self.assertEqual(self._get_darwin_versions(targets['intstringver']), ('1111.0.0', '2.5.0'))
+        self.assertEqual(self._get_darwin_versions(targets['stringlistvers']), ('2.6.0', '2.6.1'))
+
+    def test_duplicate_rpath(self):
+        testdir = os.path.join(self.unit_test_dir, '10 build_rpath')
+        # We purposely pass a duplicate rpath to Meson, in order
+        # to ascertain that Meson does not call install_name_tool
+        # with duplicate -delete_rpath arguments, which would
+        # lead to erroring out on installation
+        env = {"LDFLAGS": "-Wl,-rpath,/foo/bar"}
+        self.init(testdir, override_envvars=env)
+        self.build()
+        self.install()
+
+    def test_removing_unused_linker_args(self):
+        testdir = os.path.join(self.common_test_dir, '108 has arg')
+        env = {'CFLAGS': '-L/tmp -L /var/tmp -headerpad_max_install_names -Wl,-export_dynamic -framework Foundation'}
+        self.init(testdir, override_envvars=env)
+
+
+@unittest.skipUnless(not is_windows(), "requires something Unix-like")
+class LinuxlikeTests(BasePlatformTests):
+    '''
+    Tests that should run on Linux, macOS, and *BSD
+    '''
+
+    def test_basic_soname(self):
+        '''
+        Test that the soname is set correctly for shared libraries. This can't
+        be an ordinary test case because we need to run `readelf` and actually
+        check the soname.
+        https://github.com/mesonbuild/meson/issues/785
+        '''
+        testdir = os.path.join(self.common_test_dir, '4 shared')
+        self.init(testdir)
+        self.build()
+        lib1 = os.path.join(self.builddir, 'libmylib.so')
+        soname = get_soname(lib1)
+        self.assertEqual(soname, 'libmylib.so')
+
+    def test_custom_soname(self):
+        '''
+        Test that the soname is set correctly for shared libraries when
+        a custom prefix and/or suffix is used. This can't be an ordinary test
+        case because we need to run `readelf` and actually check the soname.
+        https://github.com/mesonbuild/meson/issues/785
+        '''
+        testdir = os.path.join(self.common_test_dir, '25 library versions')
+        self.init(testdir)
+        self.build()
+        lib1 = os.path.join(self.builddir, 'prefixsomelib.suffix')
+        soname = get_soname(lib1)
+        self.assertEqual(soname, 'prefixsomelib.suffix')
+
+    def test_pic(self):
+        '''
+        Test that -fPIC is correctly added to static libraries when b_staticpic
+        is true and not when it is false. This can't be an ordinary test case
+        because we need to inspect the compiler database.
+        '''
+        if is_windows() or is_cygwin() or is_osx():
+            raise unittest.SkipTest('PIC not relevant')
+
+        testdir = os.path.join(self.common_test_dir, '3 static')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        self.assertIn('-fPIC', compdb[0]['command'])
+        self.setconf('-Db_staticpic=false')
+        # Regenerate build
+        self.build()
+        compdb = self.get_compdb()
+        self.assertNotIn('-fPIC', compdb[0]['command'])
+
+    def test_pkgconfig_gen(self):
+        '''
+        Test that generated pkg-config files can be found and have the correct
+        version and link args. This can't be an ordinary test case because we
+        need to run pkg-config outside of a Meson build file.
+        https://github.com/mesonbuild/meson/issues/889
+        '''
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen')
+        self.init(testdir)
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        kwargs = {'required': True, 'silent': True}
+        os.environ['PKG_CONFIG_LIBDIR'] = self.privatedir
+        foo_dep = PkgConfigDependency('libfoo', env, kwargs)
+        self.assertTrue(foo_dep.found())
+        self.assertEqual(foo_dep.get_version(), '1.0')
+        self.assertIn('-lfoo', foo_dep.get_link_args())
+        self.assertEqual(foo_dep.get_pkgconfig_variable('foo', {}), 'bar')
+        self.assertPathEqual(foo_dep.get_pkgconfig_variable('datadir', {}), '/usr/data')
+
+        libhello_nolib = PkgConfigDependency('libhello_nolib', env, kwargs)
+        self.assertTrue(libhello_nolib.found())
+        self.assertEqual(libhello_nolib.get_link_args(), [])
+        self.assertEqual(libhello_nolib.get_compile_args(), [])
+
+    def test_pkgconfig_gen_deps(self):
+        '''
+        Test that generated pkg-config files correctly handle dependencies
+        '''
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen')
+        self.init(testdir)
+        privatedir1 = self.privatedir
+
+        self.new_builddir()
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen', 'dependencies')
+        self.init(testdir, override_envvars={'PKG_CONFIG_LIBDIR': privatedir1})
+        privatedir2 = self.privatedir
+
+        os.environ
+        env = {
+            'PKG_CONFIG_LIBDIR': os.pathsep.join([privatedir1, privatedir2]),
+            'PKG_CONFIG_SYSTEM_LIBRARY_PATH': '/usr/lib',
+        }
+        self._run(['pkg-config', 'dependency-test', '--validate'], override_envvars=env)
+
+        # pkg-config strips some duplicated flags so we have to parse the
+        # generated file ourself.
+        expected = {
+            'Requires': 'libexposed',
+            'Requires.private': 'libfoo >= 1.0',
+            'Libs': '-L${libdir} -llibmain -pthread -lcustom',
+            'Libs.private': '-lcustom2 -L${libdir} -llibinternal',
+            'Cflags': '-I${includedir} -pthread -DCUSTOM',
+        }
+        if is_osx() or is_haiku():
+            expected['Cflags'] = expected['Cflags'].replace('-pthread ', '')
+        with open(os.path.join(privatedir2, 'dependency-test.pc')) as f:
+            matched_lines = 0
+            for line in f:
+                parts = line.split(':', 1)
+                if parts[0] in expected:
+                    key = parts[0]
+                    val = parts[1].strip()
+                    expected_val = expected[key]
+                    self.assertEqual(expected_val, val)
+                    matched_lines += 1
+            self.assertEqual(len(expected), matched_lines)
+
+        cmd = ['pkg-config', 'requires-test']
+        out = self._run(cmd + ['--print-requires'], override_envvars=env).strip().split('\n')
+        if not is_openbsd():
+            self.assertEqual(sorted(out), sorted(['libexposed', 'libfoo >= 1.0', 'libhello']))
+        else:
+            self.assertEqual(sorted(out), sorted(['libexposed', 'libfoo>=1.0', 'libhello']))
+
+        cmd = ['pkg-config', 'requires-private-test']
+        out = self._run(cmd + ['--print-requires-private'], override_envvars=env).strip().split('\n')
+        if not is_openbsd():
+            self.assertEqual(sorted(out), sorted(['libexposed', 'libfoo >= 1.0', 'libhello']))
+        else:
+            self.assertEqual(sorted(out), sorted(['libexposed', 'libfoo>=1.0', 'libhello']))
+
+        cmd = ['pkg-config', 'pub-lib-order']
+        out = self._run(cmd + ['--libs'], override_envvars=env).strip().split()
+        self.assertEqual(out, ['-llibmain2', '-llibinternal'])
+
+    def test_pkgconfig_uninstalled(self):
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen')
+        self.init(testdir)
+        self.build()
+
+        os.environ['PKG_CONFIG_LIBDIR'] = os.path.join(self.builddir, 'meson-uninstalled')
+        if is_cygwin():
+            os.environ['PATH'] += os.pathsep + self.builddir
+
+        self.new_builddir()
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen', 'dependencies')
+        self.init(testdir)
+        self.build()
+        self.run_tests()
+
+    def test_pkg_unfound(self):
+        testdir = os.path.join(self.unit_test_dir, '23 unfound pkgconfig')
+        self.init(testdir)
+        with open(os.path.join(self.privatedir, 'somename.pc')) as f:
+            pcfile = f.read()
+        self.assertFalse('blub_blob_blib' in pcfile)
+
+    def test_vala_c_warnings(self):
+        '''
+        Test that no warnings are emitted for C code generated by Vala. This
+        can't be an ordinary test case because we need to inspect the compiler
+        database.
+        https://github.com/mesonbuild/meson/issues/864
+        '''
+        if not shutil.which('valac'):
+            raise unittest.SkipTest('valac not installed.')
+        testdir = os.path.join(self.vala_test_dir, '5 target glib')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        vala_command = None
+        c_command = None
+        for each in compdb:
+            if each['file'].endswith('GLib.Thread.c'):
+                vala_command = each['command']
+            elif each['file'].endswith('GLib.Thread.vala'):
+                continue
+            elif each['file'].endswith('retcode.c'):
+                c_command = each['command']
+            else:
+                m = 'Unknown file {!r} in vala_c_warnings test'.format(each['file'])
+                raise AssertionError(m)
+        self.assertIsNotNone(vala_command)
+        self.assertIsNotNone(c_command)
+        # -w suppresses all warnings, should be there in Vala but not in C
+        self.assertIn(" -w ", vala_command)
+        self.assertNotIn(" -w ", c_command)
+        # -Wall enables all warnings, should be there in C but not in Vala
+        self.assertNotIn(" -Wall ", vala_command)
+        self.assertIn(" -Wall ", c_command)
+        # -Werror converts warnings to errors, should always be there since it's
+        # injected by an unrelated piece of code and the project has werror=true
+        self.assertIn(" -Werror ", vala_command)
+        self.assertIn(" -Werror ", c_command)
+
+    @skipIfNoPkgconfig
+    def test_qtdependency_pkgconfig_detection(self):
+        '''
+        Test that qt4 and qt5 detection with pkgconfig works.
+        '''
+        # Verify Qt4 or Qt5 can be found with pkg-config
+        qt4 = subprocess.call(['pkg-config', '--exists', 'QtCore'])
+        qt5 = subprocess.call(['pkg-config', '--exists', 'Qt5Core'])
+        testdir = os.path.join(self.framework_test_dir, '4 qt')
+        self.init(testdir, extra_args=['-Dmethod=pkg-config'])
+        # Confirm that the dependency was found with pkg-config
+        mesonlog = self.get_meson_log()
+        if qt4 == 0:
+            self.assertRegex('\n'.join(mesonlog),
+                             r'Run-time dependency qt4 \(modules: Core\) found: YES 4.* \(pkg-config\)\n')
+        if qt5 == 0:
+            self.assertRegex('\n'.join(mesonlog),
+                             r'Run-time dependency qt5 \(modules: Core\) found: YES 5.* \(pkg-config\)\n')
+
+    @skip_if_not_base_option('b_sanitize')
+    def test_generate_gir_with_address_sanitizer(self):
+        if is_cygwin():
+            raise unittest.SkipTest('asan not available on Cygwin')
+        if is_openbsd():
+            raise unittest.SkipTest('-fsanitize=address is not supported on OpenBSD')
+
+        testdir = os.path.join(self.framework_test_dir, '7 gnome')
+        self.init(testdir, extra_args=['-Db_sanitize=address', '-Db_lundef=false'])
+        self.build()
+
+    def test_qt5dependency_qmake_detection(self):
+        '''
+        Test that qt5 detection with qmake works. This can't be an ordinary
+        test case because it involves setting the environment.
+        '''
+        # Verify that qmake is for Qt5
+        if not shutil.which('qmake-qt5'):
+            if not shutil.which('qmake'):
+                raise unittest.SkipTest('QMake not found')
+            output = subprocess.getoutput('qmake --version')
+            if 'Qt version 5' not in output:
+                raise unittest.SkipTest('Qmake found, but it is not for Qt 5.')
+        # Disable pkg-config codepath and force searching with qmake/qmake-qt5
+        testdir = os.path.join(self.framework_test_dir, '4 qt')
+        self.init(testdir, extra_args=['-Dmethod=qmake'])
+        # Confirm that the dependency was found with qmake
+        mesonlog = self.get_meson_log()
+        self.assertRegex('\n'.join(mesonlog),
+                         r'Run-time dependency qt5 \(modules: Core\) found: YES .* \((qmake|qmake-qt5)\)\n')
+
+    def glob_sofiles_without_privdir(self, g):
+        files = glob(g)
+        return [f for f in files if not f.endswith('.p')]
+
+    def _test_soname_impl(self, libpath, install):
+        if is_cygwin() or is_osx():
+            raise unittest.SkipTest('Test only applicable to ELF and linuxlike sonames')
+
+        testdir = os.path.join(self.unit_test_dir, '1 soname')
+        self.init(testdir)
+        self.build()
+        if install:
+            self.install()
+
+        # File without aliases set.
+        nover = os.path.join(libpath, 'libnover.so')
+        self.assertPathExists(nover)
+        self.assertFalse(os.path.islink(nover))
+        self.assertEqual(get_soname(nover), 'libnover.so')
+        self.assertEqual(len(self.glob_sofiles_without_privdir(nover[:-3] + '*')), 1)
+
+        # File with version set
+        verset = os.path.join(libpath, 'libverset.so')
+        self.assertPathExists(verset + '.4.5.6')
+        self.assertEqual(os.readlink(verset), 'libverset.so.4')
+        self.assertEqual(get_soname(verset), 'libverset.so.4')
+        self.assertEqual(len(self.glob_sofiles_without_privdir(verset[:-3] + '*')), 3)
+
+        # File with soversion set
+        soverset = os.path.join(libpath, 'libsoverset.so')
+        self.assertPathExists(soverset + '.1.2.3')
+        self.assertEqual(os.readlink(soverset), 'libsoverset.so.1.2.3')
+        self.assertEqual(get_soname(soverset), 'libsoverset.so.1.2.3')
+        self.assertEqual(len(self.glob_sofiles_without_privdir(soverset[:-3] + '*')), 2)
+
+        # File with version and soversion set to same values
+        settosame = os.path.join(libpath, 'libsettosame.so')
+        self.assertPathExists(settosame + '.7.8.9')
+        self.assertEqual(os.readlink(settosame), 'libsettosame.so.7.8.9')
+        self.assertEqual(get_soname(settosame), 'libsettosame.so.7.8.9')
+        self.assertEqual(len(self.glob_sofiles_without_privdir(settosame[:-3] + '*')), 2)
+
+        # File with version and soversion set to different values
+        bothset = os.path.join(libpath, 'libbothset.so')
+        self.assertPathExists(bothset + '.1.2.3')
+        self.assertEqual(os.readlink(bothset), 'libbothset.so.1.2.3')
+        self.assertEqual(os.readlink(bothset + '.1.2.3'), 'libbothset.so.4.5.6')
+        self.assertEqual(get_soname(bothset), 'libbothset.so.1.2.3')
+        self.assertEqual(len(self.glob_sofiles_without_privdir(bothset[:-3] + '*')), 3)
+
+    def test_soname(self):
+        self._test_soname_impl(self.builddir, False)
+
+    def test_installed_soname(self):
+        libdir = self.installdir + os.path.join(self.prefix, self.libdir)
+        self._test_soname_impl(libdir, True)
+
+    def test_compiler_check_flags_order(self):
+        '''
+        Test that compiler check flags override all other flags. This can't be
+        an ordinary test case because it needs the environment to be set.
+        '''
+        testdir = os.path.join(self.common_test_dir, '39 has function')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cpp = env.detect_cpp_compiler(MachineChoice.HOST)
+        Oflag = '-O3'
+        OflagCPP = Oflag
+        if cpp.get_id() in ('clang', 'gcc'):
+            # prevent developers from adding "int main(int argc, char **argv)"
+            # to small Meson checks unless these parameters are actually used
+            OflagCPP += ' -Werror=unused-parameter'
+        env = {'CFLAGS': Oflag,
+               'CXXFLAGS': OflagCPP}
+        self.init(testdir, override_envvars=env)
+        cmds = self.get_meson_log_compiler_checks()
+        for cmd in cmds:
+            if cmd[0] == 'ccache':
+                cmd = cmd[1:]
+            # Verify that -I flags from the `args` kwarg are first
+            # This is set in the '39 has function' test case
+            self.assertEqual(cmd[1], '-I/tmp')
+            # Verify that -O3 set via the environment is overridden by -O0
+            Oargs = [arg for arg in cmd if arg.startswith('-O')]
+            self.assertEqual(Oargs, [Oflag, '-O0'])
+
+    def _test_stds_impl(self, testdir, compiler, p: str):
+        has_cpp17 = (compiler.get_id() not in {'clang', 'gcc'} or
+                     compiler.get_id() == 'clang' and _clang_at_least(compiler, '>=5.0.0', '>=9.1') or
+                     compiler.get_id() == 'gcc' and version_compare(compiler.version, '>=5.0.0'))
+        has_cpp2a_c17 = (compiler.get_id() not in {'clang', 'gcc'} or
+                         compiler.get_id() == 'clang' and _clang_at_least(compiler, '>=6.0.0', '>=10.0') or
+                         compiler.get_id() == 'gcc' and version_compare(compiler.version, '>=8.0.0'))
+        has_c18 = (compiler.get_id() not in {'clang', 'gcc'} or
+                   compiler.get_id() == 'clang' and _clang_at_least(compiler, '>=8.0.0', '>=11.0') or
+                   compiler.get_id() == 'gcc' and version_compare(compiler.version, '>=8.0.0'))
+        # Check that all the listed -std=xxx options for this compiler work just fine when used
+        # https://en.wikipedia.org/wiki/Xcode#Latest_versions
+        # https://www.gnu.org/software/gcc/projects/cxx-status.html
+        for v in compiler.get_options()['std'].choices:
+            lang_std = p + '_std'
+            # we do it like this to handle gnu++17,c++17 and gnu17,c17 cleanly
+            # thus, C++ first
+            if '++17' in v and not has_cpp17:
+                continue
+            elif '++2a' in v and not has_cpp2a_c17:  # https://en.cppreference.com/w/cpp/compiler_support
+                continue
+            # now C
+            elif '17' in v and not has_cpp2a_c17:
+                continue
+            elif '18' in v and not has_c18:
+                continue
+            std_opt = '{}={}'.format(lang_std, v)
+            self.init(testdir, extra_args=['-D' + std_opt])
+            cmd = self.get_compdb()[0]['command']
+            # c++03 and gnu++03 are not understood by ICC, don't try to look for them
+            skiplist = frozenset([
+                ('intel', 'c++03'),
+                ('intel', 'gnu++03')])
+            if v != 'none' and not (compiler.get_id(), v) in skiplist:
+                cmd_std = " -std={} ".format(v)
+                self.assertIn(cmd_std, cmd)
+            try:
+                self.build()
+            except Exception:
+                print('{} was {!r}'.format(lang_std, v))
+                raise
+            self.wipe()
+        # Check that an invalid std option in CFLAGS/CPPFLAGS fails
+        # Needed because by default ICC ignores invalid options
+        cmd_std = '-std=FAIL'
+        if p == 'c':
+            env_flag_name = 'CFLAGS'
+        elif p == 'cpp':
+            env_flag_name = 'CXXFLAGS'
+        else:
+            raise NotImplementedError('Language {} not defined.'.format(p))
+        env = {}
+        env[env_flag_name] = cmd_std
+        with self.assertRaises((subprocess.CalledProcessError, mesonbuild.mesonlib.EnvironmentException),
+                               msg='C compiler should have failed with -std=FAIL'):
+            self.init(testdir, override_envvars = env)
+            # ICC won't fail in the above because additional flags are needed to
+            # make unknown -std=... options errors.
+            self.build()
+
+    def test_compiler_c_stds(self):
+        '''
+        Test that C stds specified for this compiler can all be used. Can't be
+        an ordinary test because it requires passing options to meson.
+        '''
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        self._test_stds_impl(testdir, cc, 'c')
+
+    def test_compiler_cpp_stds(self):
+        '''
+        Test that C++ stds specified for this compiler can all be used. Can't
+        be an ordinary test because it requires passing options to meson.
+        '''
+        testdir = os.path.join(self.common_test_dir, '2 cpp')
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        cpp = env.detect_cpp_compiler(MachineChoice.HOST)
+        self._test_stds_impl(testdir, cpp, 'cpp')
+
+    def test_unity_subproj(self):
+        testdir = os.path.join(self.common_test_dir, '45 subproject')
+        self.init(testdir, extra_args='--unity=subprojects')
+        pdirs = glob(os.path.join(self.builddir, 'subprojects/sublib/simpletest*.p'))
+        self.assertEqual(len(pdirs), 1)
+        self.assertPathExists(os.path.join(pdirs[0], 'simpletest-unity0.c'))
+        sdirs = glob(os.path.join(self.builddir, 'subprojects/sublib/*sublib*.p'))
+        self.assertEqual(len(sdirs), 1)
+        self.assertPathExists(os.path.join(sdirs[0], 'sublib-unity0.c'))
+        self.assertPathDoesNotExist(os.path.join(self.builddir, 'user@exe/user-unity.c'))
+        self.build()
+
+    def test_installed_modes(self):
+        '''
+        Test that files installed by these tests have the correct permissions.
+        Can't be an ordinary test because our installed_files.txt is very basic.
+        '''
+        # Test file modes
+        testdir = os.path.join(self.common_test_dir, '12 data')
+        self.init(testdir)
+        self.install()
+
+        f = os.path.join(self.installdir, 'etc', 'etcfile.dat')
+        found_mode = stat.filemode(os.stat(f).st_mode)
+        want_mode = 'rw------T'
+        self.assertEqual(want_mode, found_mode[1:])
+
+        f = os.path.join(self.installdir, 'usr', 'bin', 'runscript.sh')
+        statf = os.stat(f)
+        found_mode = stat.filemode(statf.st_mode)
+        want_mode = 'rwxr-sr-x'
+        self.assertEqual(want_mode, found_mode[1:])
+        if os.getuid() == 0:
+            # The chown failed nonfatally if we're not root
+            self.assertEqual(0, statf.st_uid)
+            self.assertEqual(0, statf.st_gid)
+
+        f = os.path.join(self.installdir, 'usr', 'share', 'progname',
+                         'fileobject_datafile.dat')
+        orig = os.path.join(testdir, 'fileobject_datafile.dat')
+        statf = os.stat(f)
+        statorig = os.stat(orig)
+        found_mode = stat.filemode(statf.st_mode)
+        orig_mode = stat.filemode(statorig.st_mode)
+        self.assertEqual(orig_mode[1:], found_mode[1:])
+        self.assertEqual(os.getuid(), statf.st_uid)
+        if os.getuid() == 0:
+            # The chown failed nonfatally if we're not root
+            self.assertEqual(0, statf.st_gid)
+
+        self.wipe()
+        # Test directory modes
+        testdir = os.path.join(self.common_test_dir, '62 install subdir')
+        self.init(testdir)
+        self.install()
+
+        f = os.path.join(self.installdir, 'usr', 'share', 'sub1', 'second.dat')
+        statf = os.stat(f)
+        found_mode = stat.filemode(statf.st_mode)
+        want_mode = 'rwxr-x--t'
+        self.assertEqual(want_mode, found_mode[1:])
+        if os.getuid() == 0:
+            # The chown failed nonfatally if we're not root
+            self.assertEqual(0, statf.st_uid)
+
+    def test_installed_modes_extended(self):
+        '''
+        Test that files are installed with correct permissions using install_mode.
+        '''
+        testdir = os.path.join(self.common_test_dir, '195 install_mode')
+        self.init(testdir)
+        self.build()
+        self.install()
+
+        for fsobj, want_mode in [
+                ('bin', 'drwxr-x---'),
+                ('bin/runscript.sh', '-rwxr-sr-x'),
+                ('bin/trivialprog', '-rwxr-sr-x'),
+                ('include', 'drwxr-x---'),
+                ('include/config.h', '-rw-rwSr--'),
+                ('include/rootdir.h', '-r--r--r-T'),
+                ('lib', 'drwxr-x---'),
+                ('lib/libstat.a', '-rw---Sr--'),
+                ('share', 'drwxr-x---'),
+                ('share/man', 'drwxr-x---'),
+                ('share/man/man1', 'drwxr-x---'),
+                ('share/man/man1/foo.1', '-r--r--r-T'),
+                ('share/sub1', 'drwxr-x---'),
+                ('share/sub1/second.dat', '-rwxr-x--t'),
+                ('subdir', 'drwxr-x---'),
+                ('subdir/data.dat', '-rw-rwSr--'),
+        ]:
+            f = os.path.join(self.installdir, 'usr', *fsobj.split('/'))
+            found_mode = stat.filemode(os.stat(f).st_mode)
+            self.assertEqual(want_mode, found_mode,
+                             msg=('Expected file %s to have mode %s but found %s instead.' %
+                                  (fsobj, want_mode, found_mode)))
+        # Ensure that introspect --installed works on all types of files
+        # FIXME: also verify the files list
+        self.introspect('--installed')
+
+    def test_install_umask(self):
+        '''
+        Test that files are installed with correct permissions using default
+        install umask of 022, regardless of the umask at time the worktree
+        was checked out or the build was executed.
+        '''
+        # Copy source tree to a temporary directory and change permissions
+        # there to simulate a checkout with umask 002.
+        orig_testdir = os.path.join(self.unit_test_dir, '26 install umask')
+        # Create a new testdir under tmpdir.
+        tmpdir = os.path.realpath(tempfile.mkdtemp())
+        self.addCleanup(windows_proof_rmtree, tmpdir)
+        testdir = os.path.join(tmpdir, '26 install umask')
+        # Copy the tree using shutil.copyfile, which will use the current umask
+        # instead of preserving permissions of the old tree.
+        save_umask = os.umask(0o002)
+        self.addCleanup(os.umask, save_umask)
+        shutil.copytree(orig_testdir, testdir, copy_function=shutil.copyfile)
+        # Preserve the executable status of subdir/sayhello though.
+        os.chmod(os.path.join(testdir, 'subdir', 'sayhello'), 0o775)
+        self.init(testdir)
+        # Run the build under a 027 umask now.
+        os.umask(0o027)
+        self.build()
+        # And keep umask 027 for the install step too.
+        self.install()
+
+        for executable in [
+                'bin/prog',
+                'share/subdir/sayhello',
+        ]:
+            f = os.path.join(self.installdir, 'usr', *executable.split('/'))
+            found_mode = stat.filemode(os.stat(f).st_mode)
+            want_mode = '-rwxr-xr-x'
+            self.assertEqual(want_mode, found_mode,
+                             msg=('Expected file %s to have mode %s but found %s instead.' %
+                                  (executable, want_mode, found_mode)))
+
+        for directory in [
+                'usr',
+                'usr/bin',
+                'usr/include',
+                'usr/share',
+                'usr/share/man',
+                'usr/share/man/man1',
+                'usr/share/subdir',
+        ]:
+            f = os.path.join(self.installdir, *directory.split('/'))
+            found_mode = stat.filemode(os.stat(f).st_mode)
+            want_mode = 'drwxr-xr-x'
+            self.assertEqual(want_mode, found_mode,
+                             msg=('Expected directory %s to have mode %s but found %s instead.' %
+                                  (directory, want_mode, found_mode)))
+
+        for datafile in [
+                'include/sample.h',
+                'share/datafile.cat',
+                'share/file.dat',
+                'share/man/man1/prog.1',
+                'share/subdir/datafile.dog',
+        ]:
+            f = os.path.join(self.installdir, 'usr', *datafile.split('/'))
+            found_mode = stat.filemode(os.stat(f).st_mode)
+            want_mode = '-rw-r--r--'
+            self.assertEqual(want_mode, found_mode,
+                             msg=('Expected file %s to have mode %s but found %s instead.' %
+                                  (datafile, want_mode, found_mode)))
+
+    def test_cpp_std_override(self):
+        testdir = os.path.join(self.unit_test_dir, '6 std override')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        # Don't try to use -std=c++03 as a check for the
+        # presence of a compiler flag, as ICC does not
+        # support it.
+        for i in compdb:
+            if 'prog98' in i['file']:
+                c98_comp = i['command']
+            if 'prog11' in i['file']:
+                c11_comp = i['command']
+            if 'progp' in i['file']:
+                plain_comp = i['command']
+        self.assertNotEqual(len(plain_comp), 0)
+        self.assertIn('-std=c++98', c98_comp)
+        self.assertNotIn('-std=c++11', c98_comp)
+        self.assertIn('-std=c++11', c11_comp)
+        self.assertNotIn('-std=c++98', c11_comp)
+        self.assertNotIn('-std=c++98', plain_comp)
+        self.assertNotIn('-std=c++11', plain_comp)
+        # Now werror
+        self.assertIn('-Werror', plain_comp)
+        self.assertNotIn('-Werror', c98_comp)
+
+    def test_run_installed(self):
+        if is_cygwin() or is_osx():
+            raise unittest.SkipTest('LD_LIBRARY_PATH and RPATH not applicable')
+
+        testdir = os.path.join(self.unit_test_dir, '7 run installed')
+        self.init(testdir)
+        self.build()
+        self.install()
+        installed_exe = os.path.join(self.installdir, 'usr/bin/prog')
+        installed_libdir = os.path.join(self.installdir, 'usr/foo')
+        installed_lib = os.path.join(installed_libdir, 'libfoo.so')
+        self.assertTrue(os.path.isfile(installed_exe))
+        self.assertTrue(os.path.isdir(installed_libdir))
+        self.assertTrue(os.path.isfile(installed_lib))
+        # Must fail when run without LD_LIBRARY_PATH to ensure that
+        # rpath has been properly stripped rather than pointing to the builddir.
+        self.assertNotEqual(subprocess.call(installed_exe, stderr=subprocess.DEVNULL), 0)
+        # When LD_LIBRARY_PATH is set it should start working.
+        # For some reason setting LD_LIBRARY_PATH in os.environ fails
+        # when all tests are run (but works when only this test is run),
+        # but doing this explicitly works.
+        env = os.environ.copy()
+        env['LD_LIBRARY_PATH'] = ':'.join([installed_libdir, env.get('LD_LIBRARY_PATH', '')])
+        self.assertEqual(subprocess.call(installed_exe, env=env), 0)
+        # Ensure that introspect --installed works
+        installed = self.introspect('--installed')
+        for v in installed.values():
+            self.assertTrue('prog' in v or 'foo' in v)
+
+    @skipIfNoPkgconfig
+    def test_order_of_l_arguments(self):
+        testdir = os.path.join(self.unit_test_dir, '8 -L -l order')
+        self.init(testdir, override_envvars={'PKG_CONFIG_PATH': testdir})
+        # NOTE: .pc file has -Lfoo -lfoo -Lbar -lbar but pkg-config reorders
+        # the flags before returning them to -Lfoo -Lbar -lfoo -lbar
+        # but pkgconf seems to not do that. Sigh. Support both.
+        expected_order = [('-L/me/first', '-lfoo1'),
+                          ('-L/me/second', '-lfoo2'),
+                          ('-L/me/first', '-L/me/second'),
+                          ('-lfoo1', '-lfoo2'),
+                          ('-L/me/second', '-L/me/third'),
+                          ('-L/me/third', '-L/me/fourth',),
+                          ('-L/me/third', '-lfoo3'),
+                          ('-L/me/fourth', '-lfoo4'),
+                          ('-lfoo3', '-lfoo4'),
+                          ]
+        with open(os.path.join(self.builddir, 'build.ninja')) as ifile:
+            for line in ifile:
+                if expected_order[0][0] in line:
+                    for first, second in expected_order:
+                        self.assertLess(line.index(first), line.index(second))
+                    return
+        raise RuntimeError('Linker entries not found in the Ninja file.')
+
+    def test_introspect_dependencies(self):
+        '''
+        Tests that mesonintrospect --dependencies returns expected output.
+        '''
+        testdir = os.path.join(self.framework_test_dir, '7 gnome')
+        self.init(testdir)
+        glib_found = False
+        gobject_found = False
+        deps = self.introspect('--dependencies')
+        self.assertIsInstance(deps, list)
+        for dep in deps:
+            self.assertIsInstance(dep, dict)
+            self.assertIn('name', dep)
+            self.assertIn('compile_args', dep)
+            self.assertIn('link_args', dep)
+            if dep['name'] == 'glib-2.0':
+                glib_found = True
+            elif dep['name'] == 'gobject-2.0':
+                gobject_found = True
+        self.assertTrue(glib_found)
+        self.assertTrue(gobject_found)
+        if subprocess.call(['pkg-config', '--exists', 'glib-2.0 >= 2.56.2']) != 0:
+            raise unittest.SkipTest('glib >= 2.56.2 needed for the rest')
+        targets = self.introspect('--targets')
+        docbook_target = None
+        for t in targets:
+            if t['name'] == 'generated-gdbus-docbook':
+                docbook_target = t
+                break
+        self.assertIsInstance(docbook_target, dict)
+        self.assertEqual(os.path.basename(t['filename'][0]), 'generated-gdbus-doc-' + os.path.basename(t['target_sources'][0]['sources'][0]))
+
+    def test_introspect_installed(self):
+        testdir = os.path.join(self.linuxlike_test_dir, '7 library versions')
+        self.init(testdir)
+
+        install = self.introspect('--installed')
+        install = {os.path.basename(k): v for k, v in install.items()}
+        print(install)
+        if is_osx():
+            the_truth = {
+                'libmodule.dylib': '/usr/lib/libmodule.dylib',
+                'libnoversion.dylib': '/usr/lib/libnoversion.dylib',
+                'libonlysoversion.5.dylib': '/usr/lib/libonlysoversion.5.dylib',
+                'libonlysoversion.dylib': '/usr/lib/libonlysoversion.dylib',
+                'libonlyversion.1.dylib': '/usr/lib/libonlyversion.1.dylib',
+                'libonlyversion.dylib': '/usr/lib/libonlyversion.dylib',
+                'libsome.0.dylib': '/usr/lib/libsome.0.dylib',
+                'libsome.dylib': '/usr/lib/libsome.dylib',
+            }
+            the_truth_2 = {'/usr/lib/libsome.dylib',
+                           '/usr/lib/libsome.0.dylib',
+            }
+        else:
+            the_truth = {
+                'libmodule.so': '/usr/lib/libmodule.so',
+                'libnoversion.so': '/usr/lib/libnoversion.so',
+                'libonlysoversion.so': '/usr/lib/libonlysoversion.so',
+                'libonlysoversion.so.5': '/usr/lib/libonlysoversion.so.5',
+                'libonlyversion.so': '/usr/lib/libonlyversion.so',
+                'libonlyversion.so.1': '/usr/lib/libonlyversion.so.1',
+                'libonlyversion.so.1.4.5': '/usr/lib/libonlyversion.so.1.4.5',
+                'libsome.so': '/usr/lib/libsome.so',
+                'libsome.so.0': '/usr/lib/libsome.so.0',
+                'libsome.so.1.2.3': '/usr/lib/libsome.so.1.2.3',
+            }
+            the_truth_2 = {'/usr/lib/libsome.so',
+                           '/usr/lib/libsome.so.0',
+                           '/usr/lib/libsome.so.1.2.3'}
+        self.assertDictEqual(install, the_truth)
+
+        targets = self.introspect('--targets')
+        for t in targets:
+            if t['name'] != 'some':
+                continue
+            self.assertSetEqual(the_truth_2, set(t['install_filename']))
+
+    def test_build_rpath(self):
+        if is_cygwin():
+            raise unittest.SkipTest('Windows PE/COFF binaries do not use RPATH')
+        testdir = os.path.join(self.unit_test_dir, '10 build_rpath')
+        self.init(testdir)
+        self.build()
+        # C program RPATH
+        build_rpath = get_rpath(os.path.join(self.builddir, 'prog'))
+        self.assertEqual(build_rpath, '$ORIGIN/sub:/foo/bar')
+        self.install()
+        install_rpath = get_rpath(os.path.join(self.installdir, 'usr/bin/prog'))
+        self.assertEqual(install_rpath, '/baz')
+        # C++ program RPATH
+        build_rpath = get_rpath(os.path.join(self.builddir, 'progcxx'))
+        self.assertEqual(build_rpath, '$ORIGIN/sub:/foo/bar')
+        self.install()
+        install_rpath = get_rpath(os.path.join(self.installdir, 'usr/bin/progcxx'))
+        self.assertEqual(install_rpath, 'baz')
+
+    def test_global_rpath(self):
+        if is_cygwin():
+            raise unittest.SkipTest('Windows PE/COFF binaries do not use RPATH')
+        if is_osx():
+            raise unittest.SkipTest('Global RPATHs via LDFLAGS not yet supported on MacOS (does anybody need it?)')
+
+        testdir = os.path.join(self.unit_test_dir, '77 global-rpath')
+        oldinstalldir = self.installdir
+
+        # Build and install an external library without DESTDIR.
+        # The external library generates a .pc file without an rpath.
+        yonder_dir = os.path.join(testdir, 'yonder')
+        yonder_prefix = os.path.join(oldinstalldir, 'yonder')
+        yonder_libdir = os.path.join(yonder_prefix, self.libdir)
+        self.prefix = yonder_prefix
+        self.installdir = yonder_prefix
+        self.init(yonder_dir)
+        self.build()
+        self.install(use_destdir=False)
+
+        # Since rpath has multiple valid formats we need to
+        # test that they are all properly used.
+        rpath_formats = [
+            ('-Wl,-rpath=', False),
+            ('-Wl,-rpath,', False),
+            ('-Wl,--just-symbols=', True),
+            ('-Wl,--just-symbols,', True),
+            ('-Wl,-R', False),
+            ('-Wl,-R,', False)
+        ]
+        for rpath_format, exception in rpath_formats:
+            # Build an app that uses that installed library.
+            # Supply the rpath to the installed library via LDFLAGS
+            # (as systems like buildroot and guix are wont to do)
+            # and verify install preserves that rpath.
+            self.new_builddir()
+            env = {'LDFLAGS': rpath_format + yonder_libdir,
+                   'PKG_CONFIG_PATH': os.path.join(yonder_libdir, 'pkgconfig')}
+            if exception:
+                with self.assertRaises(subprocess.CalledProcessError):
+                    self.init(testdir, override_envvars=env)
+                break
+            self.init(testdir, override_envvars=env)
+            self.build()
+            self.install(use_destdir=False)
+            got_rpath = get_rpath(os.path.join(yonder_prefix, 'bin/rpathified'))
+            self.assertEqual(got_rpath, yonder_libdir, rpath_format)
+
+    @skip_if_not_base_option('b_sanitize')
+    def test_pch_with_address_sanitizer(self):
+        if is_cygwin():
+            raise unittest.SkipTest('asan not available on Cygwin')
+        if is_openbsd():
+            raise unittest.SkipTest('-fsanitize=address is not supported on OpenBSD')
+
+        testdir = os.path.join(self.common_test_dir, '13 pch')
+        self.init(testdir, extra_args=['-Db_sanitize=address', '-Db_lundef=false'])
+        self.build()
+        compdb = self.get_compdb()
+        for i in compdb:
+            self.assertIn("-fsanitize=address", i["command"])
+
+    def test_cross_find_program(self):
+        testdir = os.path.join(self.unit_test_dir, '11 cross prog')
+        crossfile = tempfile.NamedTemporaryFile(mode='w')
+        print(os.path.join(testdir, 'some_cross_tool.py'))
+        crossfile.write(textwrap.dedent('''\
+            [binaries]
+            c = '/usr/bin/{1}'
+            ar = '/usr/bin/ar'
+            strip = '/usr/bin/ar'
+            sometool.py = ['{0}']
+            someothertool.py = '{0}'
+
+            [properties]
+
+            [host_machine]
+            system = 'linux'
+            cpu_family = 'arm'
+            cpu = 'armv7' # Not sure if correct.
+            endian = 'little'
+            ''').format(os.path.join(testdir, 'some_cross_tool.py'),
+                        'gcc' if is_sunos() else 'cc'))
+        crossfile.flush()
+        self.meson_cross_file = crossfile.name
+        self.init(testdir)
+
+    def test_reconfigure(self):
+        testdir = os.path.join(self.unit_test_dir, '13 reconfigure')
+        self.init(testdir, extra_args=['-Db_coverage=true'], default_args=False)
+        self.build('reconfigure')
+
+    def test_vala_generated_source_buildir_inside_source_tree(self):
+        '''
+        Test that valac outputs generated C files in the expected location when
+        the builddir is a subdir of the source tree.
+        '''
+        if not shutil.which('valac'):
+            raise unittest.SkipTest('valac not installed.')
+
+        testdir = os.path.join(self.vala_test_dir, '8 generated sources')
+        newdir = os.path.join(self.builddir, 'srctree')
+        shutil.copytree(testdir, newdir)
+        testdir = newdir
+        # New builddir
+        builddir = os.path.join(testdir, 'subdir/_build')
+        os.makedirs(builddir, exist_ok=True)
+        self.change_builddir(builddir)
+        self.init(testdir)
+        self.build()
+
+    def test_old_gnome_module_codepaths(self):
+        '''
+        A lot of code in the GNOME module is conditional on the version of the
+        glib tools that are installed, and breakages in the old code can slip
+        by once the CI has a newer glib version. So we force the GNOME module
+        to pretend that it's running on an ancient glib so the fallback code is
+        also tested.
+        '''
+        testdir = os.path.join(self.framework_test_dir, '7 gnome')
+        mesonbuild.modules.gnome.native_glib_version = '2.20'
+        env = {'MESON_UNIT_TEST_PRETEND_GLIB_OLD': "1"}
+        try:
+            self.init(testdir,
+                      inprocess=True,
+                      override_envvars=env)
+            self.build(override_envvars=env)
+        finally:
+            mesonbuild.modules.gnome.native_glib_version = None
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_usage(self):
+        testdir1 = os.path.join(self.unit_test_dir, '27 pkgconfig usage/dependency')
+        testdir2 = os.path.join(self.unit_test_dir, '27 pkgconfig usage/dependee')
+        if subprocess.call(['pkg-config', '--cflags', 'glib-2.0'],
+                           stdout=subprocess.DEVNULL,
+                           stderr=subprocess.DEVNULL) != 0:
+            raise unittest.SkipTest('Glib 2.0 dependency not available.')
+        with tempfile.TemporaryDirectory() as tempdirname:
+            self.init(testdir1, extra_args=['--prefix=' + tempdirname, '--libdir=lib'], default_args=False)
+            self.install(use_destdir=False)
+            shutil.rmtree(self.builddir)
+            os.mkdir(self.builddir)
+            pkg_dir = os.path.join(tempdirname, 'lib/pkgconfig')
+            self.assertTrue(os.path.exists(os.path.join(pkg_dir, 'libpkgdep.pc')))
+            lib_dir = os.path.join(tempdirname, 'lib')
+            myenv = os.environ.copy()
+            myenv['PKG_CONFIG_PATH'] = pkg_dir
+            # Private internal libraries must not leak out.
+            pkg_out = subprocess.check_output(['pkg-config', '--static', '--libs', 'libpkgdep'], env=myenv)
+            self.assertFalse(b'libpkgdep-int' in pkg_out, 'Internal library leaked out.')
+            # Dependencies must not leak to cflags when building only a shared library.
+            pkg_out = subprocess.check_output(['pkg-config', '--cflags', 'libpkgdep'], env=myenv)
+            self.assertFalse(b'glib' in pkg_out, 'Internal dependency leaked to headers.')
+            # Test that the result is usable.
+            self.init(testdir2, override_envvars=myenv)
+            self.build(override_envvars=myenv)
+            myenv = os.environ.copy()
+            myenv['LD_LIBRARY_PATH'] = ':'.join([lib_dir, myenv.get('LD_LIBRARY_PATH', '')])
+            if is_cygwin():
+                bin_dir = os.path.join(tempdirname, 'bin')
+                myenv['PATH'] = bin_dir + os.pathsep + myenv['PATH']
+            self.assertTrue(os.path.isdir(lib_dir))
+            test_exe = os.path.join(self.builddir, 'pkguser')
+            self.assertTrue(os.path.isfile(test_exe))
+            subprocess.check_call(test_exe, env=myenv)
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_relative_paths(self):
+        testdir = os.path.join(self.unit_test_dir, '62 pkgconfig relative paths')
+        pkg_dir = os.path.join(testdir, 'pkgconfig')
+        self.assertTrue(os.path.exists(os.path.join(pkg_dir, 'librelativepath.pc')))
+
+        env = get_fake_env(testdir, self.builddir, self.prefix)
+        env.coredata.set_options({'pkg_config_path': pkg_dir}, subproject='')
+        kwargs = {'required': True, 'silent': True}
+        relative_path_dep = PkgConfigDependency('librelativepath', env, kwargs)
+        self.assertTrue(relative_path_dep.found())
+
+        # Ensure link_args are properly quoted
+        libpath = Path(self.builddir) / '../relativepath/lib'
+        link_args = ['-L' + libpath.as_posix(), '-lrelativepath']
+        self.assertEqual(relative_path_dep.get_link_args(), link_args)
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_internal_libraries(self):
+        '''
+        '''
+        with tempfile.TemporaryDirectory() as tempdirname:
+            # build library
+            testdirbase = os.path.join(self.unit_test_dir, '32 pkgconfig use libraries')
+            testdirlib = os.path.join(testdirbase, 'lib')
+            self.init(testdirlib, extra_args=['--prefix=' + tempdirname,
+                                              '--libdir=lib',
+                                              '--default-library=static'], default_args=False)
+            self.build()
+            self.install(use_destdir=False)
+
+            # build user of library
+            pkg_dir = os.path.join(tempdirname, 'lib/pkgconfig')
+            self.new_builddir()
+            self.init(os.path.join(testdirbase, 'app'),
+                      override_envvars={'PKG_CONFIG_PATH': pkg_dir})
+            self.build()
+
+    @skipIfNoPkgconfig
+    def test_static_archive_stripping(self):
+        '''
+        Check that Meson produces valid static archives with --strip enabled
+        '''
+        with tempfile.TemporaryDirectory() as tempdirname:
+            testdirbase = os.path.join(self.unit_test_dir, '67 static archive stripping')
+
+            # build lib
+            self.new_builddir()
+            testdirlib = os.path.join(testdirbase, 'lib')
+            testlibprefix = os.path.join(tempdirname, 'libprefix')
+            self.init(testdirlib, extra_args=['--prefix=' + testlibprefix,
+                                              '--libdir=lib',
+                                              '--default-library=static',
+                                              '--buildtype=debug',
+                                              '--strip'], default_args=False)
+            self.build()
+            self.install(use_destdir=False)
+
+            # build executable (uses lib, fails if static archive has been stripped incorrectly)
+            pkg_dir = os.path.join(testlibprefix, 'lib/pkgconfig')
+            self.new_builddir()
+            self.init(os.path.join(testdirbase, 'app'),
+                      override_envvars={'PKG_CONFIG_PATH': pkg_dir})
+            self.build()
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_formatting(self):
+        testdir = os.path.join(self.unit_test_dir, '38 pkgconfig format')
+        self.init(testdir)
+        myenv = os.environ.copy()
+        myenv['PKG_CONFIG_PATH'] = self.privatedir
+        stdo = subprocess.check_output(['pkg-config', '--libs-only-l', 'libsomething'], env=myenv)
+        deps = [b'-lgobject-2.0', b'-lgio-2.0', b'-lglib-2.0', b'-lsomething']
+        if is_windows() or is_cygwin() or is_osx() or is_openbsd():
+            # On Windows, libintl is a separate library
+            deps.append(b'-lintl')
+        self.assertEqual(set(deps), set(stdo.split()))
+
+    @skipIfNoPkgconfig
+    @skip_if_not_language('cs')
+    def test_pkgconfig_csharp_library(self):
+        testdir = os.path.join(self.unit_test_dir, '50 pkgconfig csharp library')
+        self.init(testdir)
+        myenv = os.environ.copy()
+        myenv['PKG_CONFIG_PATH'] = self.privatedir
+        stdo = subprocess.check_output(['pkg-config', '--libs', 'libsomething'], env=myenv)
+
+        self.assertEqual("-r/usr/lib/libsomething.dll", str(stdo.decode('ascii')).strip())
+
+    @skipIfNoPkgconfig
+    def test_pkgconfig_link_order(self):
+        '''
+        Test that libraries are listed before their dependencies.
+        '''
+        testdir = os.path.join(self.unit_test_dir, '53 pkgconfig static link order')
+        self.init(testdir)
+        myenv = os.environ.copy()
+        myenv['PKG_CONFIG_PATH'] = self.privatedir
+        stdo = subprocess.check_output(['pkg-config', '--libs', 'libsomething'], env=myenv)
+        deps = stdo.split()
+        self.assertTrue(deps.index(b'-lsomething') < deps.index(b'-ldependency'))
+
+    def test_deterministic_dep_order(self):
+        '''
+        Test that the dependencies are always listed in a deterministic order.
+        '''
+        testdir = os.path.join(self.unit_test_dir, '43 dep order')
+        self.init(testdir)
+        with open(os.path.join(self.builddir, 'build.ninja')) as bfile:
+            for line in bfile:
+                if 'build myexe:' in line or 'build myexe.exe:' in line:
+                    self.assertIn('liblib1.a liblib2.a', line)
+                    return
+        raise RuntimeError('Could not find the build rule')
+
+    def test_deterministic_rpath_order(self):
+        '''
+        Test that the rpaths are always listed in a deterministic order.
+        '''
+        if is_cygwin():
+            raise unittest.SkipTest('rpath are not used on Cygwin')
+        testdir = os.path.join(self.unit_test_dir, '42 rpath order')
+        self.init(testdir)
+        if is_osx():
+            rpathre = re.compile(r'-rpath,.*/subprojects/sub1.*-rpath,.*/subprojects/sub2')
+        else:
+            rpathre = re.compile(r'-rpath,\$\$ORIGIN/subprojects/sub1:\$\$ORIGIN/subprojects/sub2')
+        with open(os.path.join(self.builddir, 'build.ninja')) as bfile:
+            for line in bfile:
+                if '-rpath' in line:
+                    self.assertRegex(line, rpathre)
+                    return
+        raise RuntimeError('Could not find the rpath')
+
+    def test_override_with_exe_dep(self):
+        '''
+        Test that we produce the correct dependencies when a program is overridden with an executable.
+        '''
+        testdir = os.path.join(self.common_test_dir, '201 override with exe')
+        self.init(testdir)
+        with open(os.path.join(self.builddir, 'build.ninja')) as bfile:
+            for line in bfile:
+                if 'main1.c:' in line or 'main2.c:' in line:
+                    self.assertIn('| subprojects/sub/foobar', line)
+
+    @skipIfNoPkgconfig
+    def test_usage_external_library(self):
+        '''
+        Test that uninstalled usage of an external library (from the system or
+        PkgConfigDependency) works. On macOS, this workflow works out of the
+        box. On Linux, BSDs, Windows, etc, you need to set extra arguments such
+        as LD_LIBRARY_PATH, etc, so this test is skipped.
+
+        The system library is found with cc.find_library() and pkg-config deps.
+        '''
+        oldprefix = self.prefix
+        # Install external library so we can find it
+        testdir = os.path.join(self.unit_test_dir, '40 external, internal library rpath', 'external library')
+        # install into installdir without using DESTDIR
+        installdir = self.installdir
+        self.prefix = installdir
+        self.init(testdir)
+        self.prefix = oldprefix
+        self.build()
+        self.install(use_destdir=False)
+        ## New builddir for the consumer
+        self.new_builddir()
+        env = {'LIBRARY_PATH': os.path.join(installdir, self.libdir),
+               'PKG_CONFIG_PATH': os.path.join(installdir, self.libdir, 'pkgconfig')}
+        testdir = os.path.join(self.unit_test_dir, '40 external, internal library rpath', 'built library')
+        # install into installdir without using DESTDIR
+        self.prefix = self.installdir
+        self.init(testdir, override_envvars=env)
+        self.prefix = oldprefix
+        self.build(override_envvars=env)
+        # test uninstalled
+        self.run_tests(override_envvars=env)
+        if not (is_osx() or is_linux()):
+            return
+        # test running after installation
+        self.install(use_destdir=False)
+        prog = os.path.join(self.installdir, 'bin', 'prog')
+        self._run([prog])
+        if not is_osx():
+            # Rest of the workflow only works on macOS
+            return
+        out = self._run(['otool', '-L', prog])
+        self.assertNotIn('@rpath', out)
+        ## New builddir for testing that DESTDIR is not added to install_name
+        self.new_builddir()
+        # install into installdir with DESTDIR
+        self.init(testdir, override_envvars=env)
+        self.build(override_envvars=env)
+        # test running after installation
+        self.install(override_envvars=env)
+        prog = self.installdir + os.path.join(self.prefix, 'bin', 'prog')
+        lib = self.installdir + os.path.join(self.prefix, 'lib', 'libbar_built.dylib')
+        for f in prog, lib:
+            out = self._run(['otool', '-L', f])
+            # Ensure that the otool output does not contain self.installdir
+            self.assertNotRegex(out, self.installdir + '.*dylib ')
+
+    @skipIfNoPkgconfig
+    def test_usage_pkgconfig_prefixes(self):
+        '''
+        Build and install two external libraries, to different prefixes,
+        then build and install a client program that finds them via pkgconfig,
+        and verify the installed client program runs.
+        '''
+        oldinstalldir = self.installdir
+
+        # Build and install both external libraries without DESTDIR
+        val1dir = os.path.join(self.unit_test_dir, '76 pkgconfig prefixes', 'val1')
+        val1prefix = os.path.join(oldinstalldir, 'val1')
+        self.prefix = val1prefix
+        self.installdir = val1prefix
+        self.init(val1dir)
+        self.build()
+        self.install(use_destdir=False)
+        self.new_builddir()
+
+        env1 = {}
+        env1['PKG_CONFIG_PATH'] = os.path.join(val1prefix, self.libdir, 'pkgconfig')
+        val2dir = os.path.join(self.unit_test_dir, '76 pkgconfig prefixes', 'val2')
+        val2prefix = os.path.join(oldinstalldir, 'val2')
+        self.prefix = val2prefix
+        self.installdir = val2prefix
+        self.init(val2dir, override_envvars=env1)
+        self.build()
+        self.install(use_destdir=False)
+        self.new_builddir()
+
+        # Build, install, and run the client program
+        env2 = {}
+        env2['PKG_CONFIG_PATH'] = os.path.join(val2prefix, self.libdir, 'pkgconfig')
+        testdir = os.path.join(self.unit_test_dir, '76 pkgconfig prefixes', 'client')
+        testprefix = os.path.join(oldinstalldir, 'client')
+        self.prefix = testprefix
+        self.installdir = testprefix
+        self.init(testdir, override_envvars=env2)
+        self.build()
+        self.install(use_destdir=False)
+        prog = os.path.join(self.installdir, 'bin', 'client')
+        env3 = {}
+        if is_cygwin():
+            env3['PATH'] = os.path.join(val1prefix, 'bin') + \
+                os.pathsep + \
+                os.path.join(val2prefix, 'bin') + \
+                os.pathsep + os.environ['PATH']
+        out = self._run([prog], override_envvars=env3).strip()
+        # Expected output is val1 + val2 = 3
+        self.assertEqual(out, '3')
+
+    def install_subdir_invalid_symlinks(self, testdir, subdir_path):
+        '''
+        Test that installation of broken symlinks works fine.
+        https://github.com/mesonbuild/meson/issues/3914
+        '''
+        testdir = os.path.join(self.common_test_dir, testdir)
+        subdir = os.path.join(testdir, subdir_path)
+        with chdir(subdir):
+            # Can't distribute broken symlinks in the source tree because it breaks
+            # the creation of zipapps. Create it dynamically and run the test by
+            # hand.
+            src = '../../nonexistent.txt'
+            os.symlink(src, 'invalid-symlink.txt')
+            try:
+                self.init(testdir)
+                self.build()
+                self.install()
+                install_path = subdir_path.split(os.path.sep)[-1]
+                link = os.path.join(self.installdir, 'usr', 'share', install_path, 'invalid-symlink.txt')
+                self.assertTrue(os.path.islink(link), msg=link)
+                self.assertEqual(src, os.readlink(link))
+                self.assertFalse(os.path.isfile(link), msg=link)
+            finally:
+                os.remove(os.path.join(subdir, 'invalid-symlink.txt'))
+
+    def test_install_subdir_symlinks(self):
+        self.install_subdir_invalid_symlinks('62 install subdir', os.path.join('sub', 'sub1'))
+
+    def test_install_subdir_symlinks_with_default_umask(self):
+        self.install_subdir_invalid_symlinks('195 install_mode', 'sub2')
+
+    def test_install_subdir_symlinks_with_default_umask_and_mode(self):
+        self.install_subdir_invalid_symlinks('195 install_mode', 'sub1')
+
+    @skipIfNoPkgconfigDep('gmodule-2.0')
+    def test_ldflag_dedup(self):
+        testdir = os.path.join(self.unit_test_dir, '52 ldflagdedup')
+        if is_cygwin() or is_osx():
+            raise unittest.SkipTest('Not applicable on Cygwin or OSX.')
+        env = get_fake_env()
+        cc = env.detect_c_compiler(MachineChoice.HOST)
+        linker = cc.linker
+        if not linker.export_dynamic_args(env):
+            raise unittest.SkipTest('Not applicable for linkers without --export-dynamic')
+        self.init(testdir)
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        max_count = 0
+        search_term = '-Wl,--export-dynamic'
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            for line in f:
+                max_count = max(max_count, line.count(search_term))
+        self.assertEqual(max_count, 1, 'Export dynamic incorrectly deduplicated.')
+
+    def test_compiler_libs_static_dedup(self):
+        testdir = os.path.join(self.unit_test_dir, '56 dedup compiler libs')
+        self.init(testdir)
+        build_ninja = os.path.join(self.builddir, 'build.ninja')
+        with open(build_ninja, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        for lib in ('-ldl', '-lm', '-lc', '-lrt'):
+            for line in lines:
+                if lib not in line:
+                    continue
+                # Assert that
+                self.assertEqual(len(line.split(lib)), 2, msg=(lib, line))
+
+    @skipIfNoPkgconfig
+    def test_noncross_options(self):
+        # C_std defined in project options must be in effect also when native compiling.
+        testdir = os.path.join(self.unit_test_dir, '51 noncross options')
+        self.init(testdir, extra_args=['-Dpkg_config_path=' + testdir])
+        compdb = self.get_compdb()
+        self.assertEqual(len(compdb), 2)
+        self.assertRegex(compdb[0]['command'], '-std=c99')
+        self.assertRegex(compdb[1]['command'], '-std=c99')
+        self.build()
+
+    def test_identity_cross(self):
+        testdir = os.path.join(self.unit_test_dir, '61 identity cross')
+
+        nativefile = tempfile.NamedTemporaryFile(mode='w')
+        nativefile.write('''[binaries]
+c = ['{0}']
+'''.format(os.path.join(testdir, 'build_wrapper.py')))
+        nativefile.flush()
+        self.meson_native_file = nativefile.name
+
+        crossfile = tempfile.NamedTemporaryFile(mode='w')
+        crossfile.write('''[binaries]
+c = ['{0}']
+'''.format(os.path.join(testdir, 'host_wrapper.py')))
+        crossfile.flush()
+        self.meson_cross_file = crossfile.name
+
+        # TODO should someday be explicit about build platform only here
+        self.init(testdir)
+
+    def test_identity_cross_env(self):
+        testdir = os.path.join(self.unit_test_dir, '61 identity cross')
+        env = {
+            'CC_FOR_BUILD': '"' + os.path.join(testdir, 'build_wrapper.py') + '"',
+        }
+        crossfile = tempfile.NamedTemporaryFile(mode='w')
+        crossfile.write('''[binaries]
+c = ['{0}']
+'''.format(os.path.join(testdir, 'host_wrapper.py')))
+        crossfile.flush()
+        self.meson_cross_file = crossfile.name
+        # TODO should someday be explicit about build platform only here
+        self.init(testdir, override_envvars=env)
+
+    @skipIfNoPkgconfig
+    def test_static_link(self):
+        if is_cygwin():
+            raise unittest.SkipTest("Cygwin doesn't support LD_LIBRARY_PATH.")
+
+        # Build some libraries and install them
+        testdir = os.path.join(self.unit_test_dir, '68 static link/lib')
+        libdir = os.path.join(self.installdir, self.libdir)
+        oldprefix = self.prefix
+        self.prefix = self.installdir
+        self.init(testdir)
+        self.install(use_destdir=False)
+
+        # Test that installed libraries works
+        self.new_builddir()
+        self.prefix = oldprefix
+        meson_args = ['-Dc_link_args=-L{}'.format(libdir),
+                      '--fatal-meson-warnings']
+        testdir = os.path.join(self.unit_test_dir, '68 static link')
+        env = {'PKG_CONFIG_LIBDIR': os.path.join(libdir, 'pkgconfig')}
+        self.init(testdir, extra_args=meson_args, override_envvars=env)
+        self.build()
+        self.run_tests()
+
+    def _check_ld(self, check: str, name: str, lang: str, expected: str) -> None:
+        if is_sunos():
+            raise unittest.SkipTest('Solaris currently cannot override the linker.')
+        if not shutil.which(check):
+            raise unittest.SkipTest('Could not find {}.'.format(check))
+        envvars = [mesonbuild.envconfig.BinaryTable.evarMap['{}_ld'.format(lang)]]
+
+        # Also test a deprecated variable if there is one.
+        if envvars[0] in mesonbuild.envconfig.BinaryTable.DEPRECATION_MAP:
+            envvars.append(
+                mesonbuild.envconfig.BinaryTable.DEPRECATION_MAP[envvars[0]])
+
+        for envvar in envvars:
+            with mock.patch.dict(os.environ, {envvar: name}):
+                env = get_fake_env()
+                comp = getattr(env, 'detect_{}_compiler'.format(lang))(MachineChoice.HOST)
+                if lang != 'rust' and comp.use_linker_args('bfd') == []:
+                    raise unittest.SkipTest(
+                        'Compiler {} does not support using alternative linkers'.format(comp.id))
+                self.assertEqual(comp.linker.id, expected)
+
+    def test_ld_environment_variable_bfd(self):
+        self._check_ld('ld.bfd', 'bfd', 'c', 'ld.bfd')
+
+    def test_ld_environment_variable_gold(self):
+        self._check_ld('ld.gold', 'gold', 'c', 'ld.gold')
+
+    def test_ld_environment_variable_lld(self):
+        self._check_ld('ld.lld', 'lld', 'c', 'ld.lld')
+
+    @skip_if_not_language('rust')
+    def test_ld_environment_variable_rust(self):
+        self._check_ld('ld.gold', 'gold', 'rust', 'ld.gold')
+
+    def test_ld_environment_variable_cpp(self):
+        self._check_ld('ld.gold', 'gold', 'cpp', 'ld.gold')
+
+    @skip_if_not_language('objc')
+    def test_ld_environment_variable_objc(self):
+        self._check_ld('ld.gold', 'gold', 'objc', 'ld.gold')
+
+    @skip_if_not_language('objcpp')
+    def test_ld_environment_variable_objcpp(self):
+        self._check_ld('ld.gold', 'gold', 'objcpp', 'ld.gold')
+
+    @skip_if_not_language('fortran')
+    def test_ld_environment_variable_fortran(self):
+        self._check_ld('ld.gold', 'gold', 'fortran', 'ld.gold')
+
+    @skip_if_not_language('d')
+    def test_ld_environment_variable_d(self):
+        # At least for me, ldc defaults to gold, and gdc defaults to bfd, so
+        # let's pick lld, which isn't the default for either (currently)
+        self._check_ld('ld.lld', 'lld', 'd', 'ld.lld')
+
+    def compute_sha256(self, filename):
+        with open(filename, 'rb') as f:
+            return hashlib.sha256(f.read()).hexdigest()
+
+    def test_wrap_with_file_url(self):
+        testdir = os.path.join(self.unit_test_dir, '74 wrap file url')
+        source_filename = os.path.join(testdir, 'subprojects', 'foo.tar.xz')
+        patch_filename = os.path.join(testdir, 'subprojects', 'foo-patch.tar.xz')
+        wrap_filename = os.path.join(testdir, 'subprojects', 'foo.wrap')
+        source_hash = self.compute_sha256(source_filename)
+        patch_hash = self.compute_sha256(patch_filename)
+        wrap = textwrap.dedent("""\
+            [wrap-file]
+            directory = foo
+
+            source_url = http://server.invalid/foo
+            source_fallback_url = file://{}
+            source_filename = foo.tar.xz
+            source_hash = {}
+
+            patch_url = http://server.invalid/foo
+            patch_fallback_url = file://{}
+            patch_filename = foo-patch.tar.xz
+            patch_hash = {}
+            """.format(source_filename, source_hash, patch_filename, patch_hash))
+        with open(wrap_filename, 'w') as f:
+            f.write(wrap)
+        self.init(testdir)
+        self.build()
+        self.run_tests()
+
+        windows_proof_rmtree(os.path.join(testdir, 'subprojects', 'packagecache'))
+        windows_proof_rmtree(os.path.join(testdir, 'subprojects', 'foo'))
+        os.unlink(wrap_filename)
+
+    def test_no_rpath_for_static(self):
+        testdir = os.path.join(self.common_test_dir, '5 linkstatic')
+        self.init(testdir)
+        self.build()
+        build_rpath = get_rpath(os.path.join(self.builddir, 'prog'))
+        self.assertIsNone(build_rpath)
+
+    def test_lookup_system_after_broken_fallback(self):
+        # Just to generate libfoo.pc so we can test system dependency lookup.
+        testdir = os.path.join(self.common_test_dir, '47 pkgconfig-gen')
+        self.init(testdir)
+        privatedir = self.privatedir
+
+        # Write test project where the first dependency() returns not-found
+        # because 'broken' subproject does not exit, but that should not prevent
+        # the 2nd dependency() to lookup on system.
+        self.new_builddir()
+        with tempfile.TemporaryDirectory() as d:
+            with open(os.path.join(d, 'meson.build'), 'w') as f:
+                f.write(textwrap.dedent('''\
+                    project('test')
+                    dependency('notfound', fallback: 'broken', required: false)
+                    dependency('libfoo', fallback: 'broken', required: true)
+                    '''))
+            self.init(d, override_envvars={'PKG_CONFIG_LIBDIR': privatedir})
+
+class BaseLinuxCrossTests(BasePlatformTests):
+    # Don't pass --libdir when cross-compiling. We have tests that
+    # check whether meson auto-detects it correctly.
+    libdir = None
+
+
+def should_run_cross_arm_tests():
+    return shutil.which('arm-linux-gnueabihf-gcc') and not platform.machine().lower().startswith('arm')
+
+@unittest.skipUnless(not is_windows() and should_run_cross_arm_tests(), "requires ability to cross compile to ARM")
+class LinuxCrossArmTests(BaseLinuxCrossTests):
+    '''
+    Tests that cross-compilation to Linux/ARM works
+    '''
+
+    def setUp(self):
+        super().setUp()
+        src_root = os.path.dirname(__file__)
+        self.meson_cross_file = os.path.join(src_root, 'cross', 'ubuntu-armhf.txt')
+
+    def test_cflags_cross_environment_pollution(self):
+        '''
+        Test that the CFLAGS environment variable does not pollute the cross
+        environment. This can't be an ordinary test case because we need to
+        inspect the compiler database.
+        '''
+        testdir = os.path.join(self.common_test_dir, '3 static')
+        self.init(testdir, override_envvars={'CFLAGS': '-DBUILD_ENVIRONMENT_ONLY'})
+        compdb = self.get_compdb()
+        self.assertNotIn('-DBUILD_ENVIRONMENT_ONLY', compdb[0]['command'])
+
+    def test_cross_file_overrides_always_args(self):
+        '''
+        Test that $lang_args in cross files always override get_always_args().
+        Needed for overriding the default -D_FILE_OFFSET_BITS=64 on some
+        architectures such as some Android versions and Raspbian.
+        https://github.com/mesonbuild/meson/issues/3049
+        https://github.com/mesonbuild/meson/issues/3089
+        '''
+        testdir = os.path.join(self.unit_test_dir, '33 cross file overrides always args')
+        self.meson_cross_file = os.path.join(testdir, 'ubuntu-armhf-overrides.txt')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        self.assertRegex(compdb[0]['command'], '-D_FILE_OFFSET_BITS=64.*-U_FILE_OFFSET_BITS')
+        self.build()
+
+    def test_cross_libdir(self):
+        # When cross compiling "libdir" should default to "lib"
+        # rather than "lib/x86_64-linux-gnu" or something like that.
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        self.init(testdir)
+        for i in self.introspect('--buildoptions'):
+            if i['name'] == 'libdir':
+                self.assertEqual(i['value'], 'lib')
+                return
+        self.assertTrue(False, 'Option libdir not in introspect data.')
+
+    def test_cross_libdir_subproject(self):
+        # Guard against a regression where calling "subproject"
+        # would reset the value of libdir to its default value.
+        testdir = os.path.join(self.unit_test_dir, '76 subdir libdir')
+        self.init(testdir, extra_args=['--libdir=fuf'])
+        for i in self.introspect('--buildoptions'):
+            if i['name'] == 'libdir':
+                self.assertEqual(i['value'], 'fuf')
+                return
+        self.assertTrue(False, 'Libdir specified on command line gets reset.')
+
+    def test_std_remains(self):
+        # C_std defined in project options must be in effect also when cross compiling.
+        testdir = os.path.join(self.unit_test_dir, '51 noncross options')
+        self.init(testdir)
+        compdb = self.get_compdb()
+        self.assertRegex(compdb[0]['command'], '-std=c99')
+        self.build()
+
+    @skipIfNoPkgconfig
+    def test_pkg_config_option(self):
+        if not shutil.which('arm-linux-gnueabihf-pkg-config'):
+            raise unittest.SkipTest('Cross-pkgconfig not found.')
+        testdir = os.path.join(self.unit_test_dir, '58 pkg_config_path option')
+        self.init(testdir, extra_args=[
+            '-Dbuild.pkg_config_path=' + os.path.join(testdir, 'build_extra_path'),
+            '-Dpkg_config_path=' + os.path.join(testdir, 'host_extra_path'),
+        ])
+
+
+def should_run_cross_mingw_tests():
+    return shutil.which('x86_64-w64-mingw32-gcc') and not (is_windows() or is_cygwin())
+
+@unittest.skipUnless(not is_windows() and should_run_cross_mingw_tests(), "requires ability to cross compile with MinGW")
+class LinuxCrossMingwTests(BaseLinuxCrossTests):
+    '''
+    Tests that cross-compilation to Windows/MinGW works
+    '''
+
+    def setUp(self):
+        super().setUp()
+        src_root = os.path.dirname(__file__)
+        self.meson_cross_file = os.path.join(src_root, 'cross', 'linux-mingw-w64-64bit.txt')
+
+    def test_exe_wrapper_behaviour(self):
+        '''
+        Test that an exe wrapper that isn't found doesn't cause compiler sanity
+        checks and compiler checks to fail, but causes configure to fail if it
+        requires running a cross-built executable (custom_target or run_target)
+        and causes the tests to be skipped if they are run.
+        '''
+        testdir = os.path.join(self.unit_test_dir, '36 exe_wrapper behaviour')
+        # Configures, builds, and tests fine by default
+        self.init(testdir)
+        self.build()
+        self.run_tests()
+        self.wipe()
+        os.mkdir(self.builddir)
+        # Change cross file to use a non-existing exe_wrapper and it should fail
+        self.meson_cross_file = os.path.join(testdir, 'broken-cross.txt')
+        # Force tracebacks so we can detect them properly
+        env = {'MESON_FORCE_BACKTRACE': '1'}
+        with self.assertRaisesRegex(MesonException, 'exe_wrapper.*target.*use-exe-wrapper'):
+            # Must run in-process or we'll get a generic CalledProcessError
+            self.init(testdir, extra_args='-Drun-target=false',
+                      inprocess=True,
+                      override_envvars=env)
+        with self.assertRaisesRegex(MesonException, 'exe_wrapper.*run target.*run-prog'):
+            # Must run in-process or we'll get a generic CalledProcessError
+            self.init(testdir, extra_args='-Dcustom-target=false',
+                      inprocess=True,
+                      override_envvars=env)
+        self.init(testdir, extra_args=['-Dcustom-target=false', '-Drun-target=false'],
+                  override_envvars=env)
+        self.build()
+        with self.assertRaisesRegex(MesonException, 'exe_wrapper.*PATH'):
+            # Must run in-process or we'll get a generic CalledProcessError
+            self.run_tests(inprocess=True, override_envvars=env)
+
+    @skipIfNoPkgconfig
+    def test_cross_pkg_config_option(self):
+        testdir = os.path.join(self.unit_test_dir, '58 pkg_config_path option')
+        self.init(testdir, extra_args=[
+            '-Dbuild.pkg_config_path=' + os.path.join(testdir, 'build_extra_path'),
+            '-Dpkg_config_path=' + os.path.join(testdir, 'host_extra_path'),
+        ])
+
+
+class PythonTests(BasePlatformTests):
+    '''
+    Tests that verify compilation of python extension modules
+    '''
+
+    def test_versions(self):
+        if self.backend is not Backend.ninja:
+            raise unittest.SkipTest('Skipping python tests with {} backend'.format(self.backend.name))
+
+        testdir = os.path.join(self.src_root, 'test cases', 'unit', '39 python extmodule')
+
+        # No python version specified, this will use meson's python
+        self.init(testdir)
+        self.build()
+        self.run_tests()
+        self.wipe()
+
+        # When specifying a known name, (python2 / python3) the module
+        # will also try 'python' as a fallback and use it if the major
+        # version matches
+        try:
+            self.init(testdir, extra_args=['-Dpython=python2'])
+            self.build()
+            self.run_tests()
+        except unittest.SkipTest:
+            # python2 is not necessarily installed on the test machine,
+            # if it is not, or the python headers can't be found, the test
+            # will raise MESON_SKIP_TEST, we could check beforehand what version
+            # of python is available, but it's a bit of a chicken and egg situation,
+            # as that is the job of the module, so we just ask for forgiveness rather
+            # than permission.
+            pass
+
+        self.wipe()
+
+        for py in ('pypy', 'pypy3'):
+            try:
+                self.init(testdir, extra_args=['-Dpython=%s' % py])
+            except unittest.SkipTest:
+                # Same as above, pypy2 and pypy3 are not expected to be present
+                # on the test system, the test project only raises in these cases
+                continue
+
+            # We have a pypy, this is expected to work
+            self.build()
+            self.run_tests()
+            self.wipe()
+
+        # The test is configured to error out with MESON_SKIP_TEST
+        # in case it could not find python
+        with self.assertRaises(unittest.SkipTest):
+            self.init(testdir, extra_args=['-Dpython=not-python'])
+        self.wipe()
+
+        # While dir is an external command on both Windows and Linux,
+        # it certainly isn't python
+        with self.assertRaises(unittest.SkipTest):
+            self.init(testdir, extra_args=['-Dpython=dir'])
+        self.wipe()
+
+
+class RewriterTests(BasePlatformTests):
+    def setUp(self):
+        super().setUp()
+        self.maxDiff = None
+
+    def prime(self, dirname):
+        copy_tree(os.path.join(self.rewrite_test_dir, dirname), self.builddir)
+
+    def rewrite_raw(self, directory, args):
+        if isinstance(args, str):
+            args = [args]
+        command = self.rewrite_command + ['--verbose', '--skip', '--sourcedir', directory] + args
+        p = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                           universal_newlines=True, timeout=60)
+        print('STDOUT:')
+        print(p.stdout)
+        print('STDERR:')
+        print(p.stderr)
+        if p.returncode != 0:
+            if 'MESON_SKIP_TEST' in p.stdout:
+                raise unittest.SkipTest('Project requested skipping.')
+            raise subprocess.CalledProcessError(p.returncode, command, output=p.stdout)
+        if not p.stderr:
+            return {}
+        return json.loads(p.stderr)
+
+    def rewrite(self, directory, args):
+        if isinstance(args, str):
+            args = [args]
+        return self.rewrite_raw(directory, ['command'] + args)
+
+    def test_target_source_list(self):
+        self.prime('1 basic')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'target': {
+                'trivialprog0@exe': {'name': 'trivialprog0', 'sources': ['main.cpp', 'fileA.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog1@exe': {'name': 'trivialprog1', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog2@exe': {'name': 'trivialprog2', 'sources': ['fileB.cpp', 'fileC.cpp']},
+                'trivialprog3@exe': {'name': 'trivialprog3', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog4@exe': {'name': 'trivialprog4', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog5@exe': {'name': 'trivialprog5', 'sources': ['main.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog6@exe': {'name': 'trivialprog6', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog7@exe': {'name': 'trivialprog7', 'sources': ['fileB.cpp', 'fileC.cpp', 'main.cpp', 'fileA.cpp']},
+                'trivialprog8@exe': {'name': 'trivialprog8', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog9@exe': {'name': 'trivialprog9', 'sources': ['main.cpp', 'fileA.cpp']},
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_target_add_sources(self):
+        self.prime('1 basic')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'addSrc.json'))
+        expected = {
+            'target': {
+                'trivialprog0@exe': {'name': 'trivialprog0', 'sources': ['a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp', 'a7.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog1@exe': {'name': 'trivialprog1', 'sources': ['a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp']},
+                'trivialprog2@exe': {'name': 'trivialprog2', 'sources': ['a7.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog3@exe': {'name': 'trivialprog3', 'sources': ['a5.cpp', 'fileA.cpp', 'main.cpp']},
+                'trivialprog4@exe': {'name': 'trivialprog4', 'sources': ['a5.cpp', 'main.cpp', 'fileA.cpp']},
+                'trivialprog5@exe': {'name': 'trivialprog5', 'sources': ['a3.cpp', 'main.cpp', 'a7.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog6@exe': {'name': 'trivialprog6', 'sources': ['main.cpp', 'fileA.cpp', 'a4.cpp']},
+                'trivialprog7@exe': {'name': 'trivialprog7', 'sources': ['fileB.cpp', 'fileC.cpp', 'a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp']},
+                'trivialprog8@exe': {'name': 'trivialprog8', 'sources': ['a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp']},
+                'trivialprog9@exe': {'name': 'trivialprog9', 'sources': ['a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp']},
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+        # Check the written file
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        self.assertDictEqual(out, expected)
+
+    def test_target_add_sources_abs(self):
+        self.prime('1 basic')
+        abs_src = [os.path.join(self.builddir, x) for x in ['a1.cpp', 'a2.cpp', 'a6.cpp']]
+        add = json.dumps([{"type": "target", "target": "trivialprog1", "operation": "src_add", "sources": abs_src}])
+        inf = json.dumps([{"type": "target", "target": "trivialprog1", "operation": "info"}])
+        self.rewrite(self.builddir, add)
+        out = self.rewrite(self.builddir, inf)
+        expected = {'target': {'trivialprog1@exe': {'name': 'trivialprog1', 'sources': ['a1.cpp', 'a2.cpp', 'a6.cpp', 'fileA.cpp', 'main.cpp']}}}
+        self.assertDictEqual(out, expected)
+
+    def test_target_remove_sources(self):
+        self.prime('1 basic')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'rmSrc.json'))
+        expected = {
+            'target': {
+                'trivialprog0@exe': {'name': 'trivialprog0', 'sources': ['main.cpp', 'fileC.cpp']},
+                'trivialprog1@exe': {'name': 'trivialprog1', 'sources': ['main.cpp']},
+                'trivialprog2@exe': {'name': 'trivialprog2', 'sources': ['fileC.cpp']},
+                'trivialprog3@exe': {'name': 'trivialprog3', 'sources': ['main.cpp']},
+                'trivialprog4@exe': {'name': 'trivialprog4', 'sources': ['main.cpp']},
+                'trivialprog5@exe': {'name': 'trivialprog5', 'sources': ['main.cpp', 'fileC.cpp']},
+                'trivialprog6@exe': {'name': 'trivialprog6', 'sources': ['main.cpp']},
+                'trivialprog7@exe': {'name': 'trivialprog7', 'sources': ['fileC.cpp', 'main.cpp']},
+                'trivialprog8@exe': {'name': 'trivialprog8', 'sources': ['main.cpp']},
+                'trivialprog9@exe': {'name': 'trivialprog9', 'sources': ['main.cpp']},
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+        # Check the written file
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        self.assertDictEqual(out, expected)
+
+    def test_target_subdir(self):
+        self.prime('2 subdirs')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'addSrc.json'))
+        expected = {'name': 'something', 'sources': ['first.c', 'second.c', 'third.c']}
+        self.assertDictEqual(list(out['target'].values())[0], expected)
+
+        # Check the written file
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        self.assertDictEqual(list(out['target'].values())[0], expected)
+
+    def test_target_remove(self):
+        self.prime('1 basic')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'rmTgt.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+
+        expected = {
+            'target': {
+                'trivialprog2@exe': {'name': 'trivialprog2', 'sources': ['fileB.cpp', 'fileC.cpp']},
+                'trivialprog3@exe': {'name': 'trivialprog3', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog4@exe': {'name': 'trivialprog4', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog5@exe': {'name': 'trivialprog5', 'sources': ['main.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog6@exe': {'name': 'trivialprog6', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog7@exe': {'name': 'trivialprog7', 'sources': ['fileB.cpp', 'fileC.cpp', 'main.cpp', 'fileA.cpp']},
+                'trivialprog8@exe': {'name': 'trivialprog8', 'sources': ['main.cpp', 'fileA.cpp']},
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_tatrget_add(self):
+        self.prime('1 basic')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'addTgt.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+
+        expected = {
+            'target': {
+                'trivialprog0@exe': {'name': 'trivialprog0', 'sources': ['main.cpp', 'fileA.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog1@exe': {'name': 'trivialprog1', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog2@exe': {'name': 'trivialprog2', 'sources': ['fileB.cpp', 'fileC.cpp']},
+                'trivialprog3@exe': {'name': 'trivialprog3', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog4@exe': {'name': 'trivialprog4', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog5@exe': {'name': 'trivialprog5', 'sources': ['main.cpp', 'fileB.cpp', 'fileC.cpp']},
+                'trivialprog6@exe': {'name': 'trivialprog6', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog7@exe': {'name': 'trivialprog7', 'sources': ['fileB.cpp', 'fileC.cpp', 'main.cpp', 'fileA.cpp']},
+                'trivialprog8@exe': {'name': 'trivialprog8', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog9@exe': {'name': 'trivialprog9', 'sources': ['main.cpp', 'fileA.cpp']},
+                'trivialprog10@sha': {'name': 'trivialprog10', 'sources': ['new1.cpp', 'new2.cpp']},
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_target_remove_subdir(self):
+        self.prime('2 subdirs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'rmTgt.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        self.assertDictEqual(out, {})
+
+    def test_target_add_subdir(self):
+        self.prime('2 subdirs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'addTgt.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {'name': 'something', 'sources': ['first.c', 'second.c']}
+        self.assertDictEqual(out['target']['94b671c@@something@exe'], expected)
+
+    def test_target_source_sorting(self):
+        self.prime('5 sorting')
+        add_json = json.dumps([{'type': 'target', 'target': 'exe1', 'operation': 'src_add', 'sources': ['a666.c']}])
+        inf_json = json.dumps([{'type': 'target', 'target': 'exe1', 'operation': 'info'}])
+        out = self.rewrite(self.builddir, add_json)
+        out = self.rewrite(self.builddir, inf_json)
+        expected = {
+            'target': {
+                'exe1@exe': {
+                    'name': 'exe1',
+                    'sources': [
+                        'aaa/a/a1.c',
+                        'aaa/b/b1.c',
+                        'aaa/b/b2.c',
+                        'aaa/f1.c',
+                        'aaa/f2.c',
+                        'aaa/f3.c',
+                        'bbb/a/b1.c',
+                        'bbb/b/b2.c',
+                        'bbb/c1/b5.c',
+                        'bbb/c2/b7.c',
+                        'bbb/c10/b6.c',
+                        'bbb/a4.c',
+                        'bbb/b3.c',
+                        'bbb/b4.c',
+                        'bbb/b5.c',
+                        'a1.c',
+                        'a2.c',
+                        'a3.c',
+                        'a10.c',
+                        'a20.c',
+                        'a30.c',
+                        'a100.c',
+                        'a101.c',
+                        'a110.c',
+                        'a210.c',
+                        'a666.c',
+                        'b1.c',
+                        'c2.c'
+                    ]
+                }
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_target_same_name_skip(self):
+        self.prime('4 same name targets')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'addSrc.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {'name': 'myExe', 'sources': ['main.cpp']}
+        self.assertEqual(len(out['target']), 2)
+        for val in out['target'].values():
+            self.assertDictEqual(expected, val)
+
+    def test_kwargs_info(self):
+        self.prime('3 kwargs')
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1'},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_kwargs_set(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'set.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.2', 'meson_version': '0.50.0', 'license': ['GPL', 'MIT']},
+                'target#tgt1': {'build_by_default': False, 'build_rpath': '/usr/local', 'dependencies': 'dep1'},
+                'dependency#dep1': {'required': True, 'method': 'cmake'}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_kwargs_add(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'add.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1', 'license': ['GPL', 'MIT', 'BSD']},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_kwargs_remove(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'remove.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1', 'license': 'GPL'},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_kwargs_remove_regex(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'remove_regex.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1', 'default_options': ['buildtype=release', 'debug=true']},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_kwargs_delete(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'delete.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {},
+                'target#tgt1': {},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_default_options_set(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'defopts_set.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1', 'default_options': ['buildtype=release', 'debug=True', 'cpp_std=c++11']},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+    def test_default_options_delete(self):
+        self.prime('3 kwargs')
+        self.rewrite(self.builddir, os.path.join(self.builddir, 'defopts_delete.json'))
+        out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json'))
+        expected = {
+            'kwargs': {
+                'project#/': {'version': '0.0.1', 'default_options': ['cpp_std=c++14', 'debug=true']},
+                'target#tgt1': {'build_by_default': True},
+                'dependency#dep1': {'required': False}
+            }
+        }
+        self.assertDictEqual(out, expected)
+
+class NativeFileTests(BasePlatformTests):
+
+    def setUp(self):
+        super().setUp()
+        self.testcase = os.path.join(self.unit_test_dir, '47 native file binary')
+        self.current_config = 0
+        self.current_wrapper = 0
+
+    def helper_create_native_file(self, values):
+        """Create a config file as a temporary file.
+
+        values should be a nested dictionary structure of {section: {key:
+        value}}
+        """
+        filename = os.path.join(self.builddir, 'generated{}.config'.format(self.current_config))
+        self.current_config += 1
+        with open(filename, 'wt') as f:
+            for section, entries in values.items():
+                f.write('[{}]\n'.format(section))
+                for k, v in entries.items():
+                    f.write("{}='{}'\n".format(k, v))
+        return filename
+
+    def helper_create_binary_wrapper(self, binary, dir_=None, extra_args=None, **kwargs):
+        """Creates a wrapper around a binary that overrides specific values."""
+        filename = os.path.join(dir_ or self.builddir, 'binary_wrapper{}.py'.format(self.current_wrapper))
+        extra_args = extra_args or {}
+        self.current_wrapper += 1
+        if is_haiku():
+            chbang = '#!/bin/env python3'
+        else:
+            chbang = '#!/usr/bin/env python3'
+
+        with open(filename, 'wt') as f:
+            f.write(textwrap.dedent('''\
+                {}
+                import argparse
+                import subprocess
+                import sys
+
+                def main():
+                    parser = argparse.ArgumentParser()
+                '''.format(chbang)))
+            for name in chain(extra_args, kwargs):
+                f.write('    parser.add_argument("-{0}", "--{0}", action="store_true")\n'.format(name))
+            f.write('    args, extra_args = parser.parse_known_args()\n')
+            for name, value in chain(extra_args.items(), kwargs.items()):
+                f.write('    if args.{}:\n'.format(name))
+                f.write('        print("{}", file=sys.{})\n'.format(value, kwargs.get('outfile', 'stdout')))
+                f.write('        sys.exit(0)\n')
+            f.write(textwrap.dedent('''
+                    ret = subprocess.run(
+                        ["{}"] + extra_args,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE)
+                    print(ret.stdout.decode('utf-8'))
+                    print(ret.stderr.decode('utf-8'), file=sys.stderr)
+                    sys.exit(ret.returncode)
+
+                if __name__ == '__main__':
+                    main()
+                '''.format(binary)))
+
+        if not is_windows():
+            os.chmod(filename, 0o755)
+            return filename
+
+        # On windows we need yet another level of indirection, as cmd cannot
+        # invoke python files itself, so instead we generate a .bat file, which
+        # invokes our python wrapper
+        batfile = os.path.join(self.builddir, 'binary_wrapper{}.bat'.format(self.current_wrapper))
+        with open(batfile, 'wt') as f:
+            f.write(r'@{} {} %*'.format(sys.executable, filename))
+        return batfile
+
+    def helper_for_compiler(self, lang, cb, for_machine = MachineChoice.HOST):
+        """Helper for generating tests for overriding compilers for langaugages
+        with more than one implementation, such as C, C++, ObjC, ObjC++, and D.
+        """
+        env = get_fake_env()
+        getter = getattr(env, 'detect_{}_compiler'.format(lang))
+        getter = functools.partial(getter, for_machine)
+        cc = getter()
+        binary, newid = cb(cc)
+        env.binaries[for_machine].binaries[lang] = binary
+        compiler = getter()
+        self.assertEqual(compiler.id, newid)
+
+    def test_multiple_native_files_override(self):
+        wrapper = self.helper_create_binary_wrapper('bash', version='foo')
+        config = self.helper_create_native_file({'binaries': {'bash': wrapper}})
+        wrapper = self.helper_create_binary_wrapper('bash', version='12345')
+        config2 = self.helper_create_native_file({'binaries': {'bash': wrapper}})
+        self.init(self.testcase, extra_args=[
+            '--native-file', config, '--native-file', config2,
+            '-Dcase=find_program'])
+
+    # This test hangs on cygwin.
+    @unittest.skipIf(os.name != 'posix' or is_cygwin(), 'Uses fifos, which are not available on non Unix OSes.')
+    def test_native_file_is_pipe(self):
+        fifo = os.path.join(self.builddir, 'native.file')
+        os.mkfifo(fifo)
+        with tempfile.TemporaryDirectory() as d:
+            wrapper = self.helper_create_binary_wrapper('bash', d, version='12345')
+
+            def filler():
+                with open(fifo, 'w') as f:
+                    f.write('[binaries]\n')
+                    f.write("bash = '{}'\n".format(wrapper))
+
+            thread = threading.Thread(target=filler)
+            thread.start()
+
+            self.init(self.testcase, extra_args=['--native-file', fifo, '-Dcase=find_program'])
+
+            thread.join()
+            os.unlink(fifo)
+
+            self.init(self.testcase, extra_args=['--wipe'])
+
+    def test_multiple_native_files(self):
+        wrapper = self.helper_create_binary_wrapper('bash', version='12345')
+        config = self.helper_create_native_file({'binaries': {'bash': wrapper}})
+        wrapper = self.helper_create_binary_wrapper('python')
+        config2 = self.helper_create_native_file({'binaries': {'python': wrapper}})
+        self.init(self.testcase, extra_args=[
+            '--native-file', config, '--native-file', config2,
+            '-Dcase=find_program'])
+
+    def _simple_test(self, case, binary, entry=None):
+        wrapper = self.helper_create_binary_wrapper(binary, version='12345')
+        config = self.helper_create_native_file({'binaries': {entry or binary: wrapper}})
+        self.init(self.testcase, extra_args=['--native-file', config, '-Dcase={}'.format(case)])
+
+    def test_find_program(self):
+        self._simple_test('find_program', 'bash')
+
+    def test_config_tool_dep(self):
+        # Do the skip at this level to avoid screwing up the cache
+        if mesonbuild.environment.detect_msys2_arch():
+            raise unittest.SkipTest('Skipped due to problems with LLVM on MSYS2')
+        if not shutil.which('llvm-config'):
+            raise unittest.SkipTest('No llvm-installed, cannot test')
+        self._simple_test('config_dep', 'llvm-config')
+
+    def test_python3_module(self):
+        self._simple_test('python3', 'python3')
+
+    def test_python_module(self):
+        if is_windows():
+            # Bat adds extra crap to stdout, so the version check logic in the
+            # python module breaks. This is fine on other OSes because they
+            # don't need the extra indirection.
+            raise unittest.SkipTest('bat indirection breaks internal sanity checks.')
+        elif is_osx():
+            binary = 'python'
+        else:
+            binary = 'python2'
+
+            # We not have python2, check for it
+            for v in ['2', '2.7', '-2.7']:
+                rc = subprocess.call(['pkg-config', '--cflags', 'python{}'.format(v)],
+                                     stdout=subprocess.DEVNULL,
+                                     stderr=subprocess.DEVNULL)
+                if rc == 0:
+                    break
+            else:
+                raise unittest.SkipTest('Not running Python 2 tests because dev packages not installed.')
+        self._simple_test('python', binary, entry='python')
+
+    @unittest.skipIf(is_windows(), 'Setting up multiple compilers on windows is hard')
+    @skip_if_env_set('CC')
+    def test_c_compiler(self):
+        def cb(comp):
+            if comp.id == 'gcc':
+                if not shutil.which('clang'):
+                    raise unittest.SkipTest('Only one compiler found, cannot test.')
+                return 'clang', 'clang'
+            if not is_real_gnu_compiler(shutil.which('gcc')):
+                raise unittest.SkipTest('Only one compiler found, cannot test.')
+            return 'gcc', 'gcc'
+        self.helper_for_compiler('c', cb)
+
+    @unittest.skipIf(is_windows(), 'Setting up multiple compilers on windows is hard')
+    @skip_if_env_set('CXX')
+    def test_cpp_compiler(self):
+        def cb(comp):
+            if comp.id == 'gcc':
+                if not shutil.which('clang++'):
+                    raise unittest.SkipTest('Only one compiler found, cannot test.')
+                return 'clang++', 'clang'
+            if not is_real_gnu_compiler(shutil.which('g++')):
+                raise unittest.SkipTest('Only one compiler found, cannot test.')
+            return 'g++', 'gcc'
+        self.helper_for_compiler('cpp', cb)
+
+    @skip_if_not_language('objc')
+    @skip_if_env_set('OBJC')
+    def test_objc_compiler(self):
+        def cb(comp):
+            if comp.id == 'gcc':
+                if not shutil.which('clang'):
+                    raise unittest.SkipTest('Only one compiler found, cannot test.')
+                return 'clang', 'clang'
+            if not is_real_gnu_compiler(shutil.which('gcc')):
+                raise unittest.SkipTest('Only one compiler found, cannot test.')
+            return 'gcc', 'gcc'
+        self.helper_for_compiler('objc', cb)
+
+    @skip_if_not_language('objcpp')
+    @skip_if_env_set('OBJCXX')
+    def test_objcpp_compiler(self):
+        def cb(comp):
+            if comp.id == 'gcc':
+                if not shutil.which('clang++'):
+                    raise unittest.SkipTest('Only one compiler found, cannot test.')
+                return 'clang++', 'clang'
+            if not is_real_gnu_compiler(shutil.which('g++')):
+                raise unittest.SkipTest('Only one compiler found, cannot test.')
+            return 'g++', 'gcc'
+        self.helper_for_compiler('objcpp', cb)
+
+    @skip_if_not_language('d')
+    @skip_if_env_set('DC')
+    def test_d_compiler(self):
+        def cb(comp):
+            if comp.id == 'dmd':
+                if shutil.which('ldc'):
+                    return 'ldc', 'ldc'
+                elif shutil.which('gdc'):
+                    return 'gdc', 'gdc'
+                else:
+                    raise unittest.SkipTest('No alternative dlang compiler found.')
+            if shutil.which('dmd'):
+                return 'dmd', 'dmd'
+            raise unittest.SkipTest('No alternative dlang compiler found.')
+        self.helper_for_compiler('d', cb)
+
+    @skip_if_not_language('cs')
+    @skip_if_env_set('CSC')
+    def test_cs_compiler(self):
+        def cb(comp):
+            if comp.id == 'csc':
+                if not shutil.which('mcs'):
+                    raise unittest.SkipTest('No alternate C# implementation.')
+                return 'mcs', 'mcs'
+            if not shutil.which('csc'):
+                raise unittest.SkipTest('No alternate C# implementation.')
+            return 'csc', 'csc'
+        self.helper_for_compiler('cs', cb)
+
+    @skip_if_not_language('fortran')
+    @skip_if_env_set('FC')
+    def test_fortran_compiler(self):
+        def cb(comp):
+            if comp.id == 'lcc':
+                if shutil.which('lfortran'):
+                    return 'lfortran', 'lcc'
+                raise unittest.SkipTest('No alternate Fortran implementation.')
+            elif comp.id == 'gcc':
+                if shutil.which('ifort'):
+                    # There is an ICC for windows (windows build, linux host),
+                    # but we don't support that ATM so lets not worry about it.
+                    if is_windows():
+                        return 'ifort', 'intel-cl'
+                    return 'ifort', 'intel'
+                elif shutil.which('flang'):
+                    return 'flang', 'flang'
+                elif shutil.which('pgfortran'):
+                    return 'pgfortran', 'pgi'
+                # XXX: there are several other fortran compilers meson
+                # supports, but I don't have any of them to test with
+                raise unittest.SkipTest('No alternate Fortran implementation.')
+            if not shutil.which('gfortran'):
+                raise unittest.SkipTest('No alternate Fortran implementation.')
+            return 'gfortran', 'gcc'
+        self.helper_for_compiler('fortran', cb)
+
+    def _single_implementation_compiler(self, lang, binary, version_str, version):
+        """Helper for languages with a single (supported) implementation.
+
+        Builds a wrapper around the compiler to override the version.
+        """
+        wrapper = self.helper_create_binary_wrapper(binary, version=version_str)
+        env = get_fake_env()
+        getter = getattr(env, 'detect_{}_compiler'.format(lang))
+        getter = functools.partial(getter, MachineChoice.HOST)
+        env.binaries.host.binaries[lang] = wrapper
+        compiler = getter()
+        self.assertEqual(compiler.version, version)
+
+    @skip_if_not_language('vala')
+    @skip_if_env_set('VALAC')
+    def test_vala_compiler(self):
+        self._single_implementation_compiler(
+            'vala', 'valac', 'Vala 1.2345', '1.2345')
+
+    @skip_if_not_language('rust')
+    @skip_if_env_set('RUSTC')
+    def test_rust_compiler(self):
+        self._single_implementation_compiler(
+            'rust', 'rustc', 'rustc 1.2345', '1.2345')
+
+    @skip_if_not_language('java')
+    def test_java_compiler(self):
+        self._single_implementation_compiler(
+            'java', 'javac', 'javac 9.99.77', '9.99.77')
+
+    @skip_if_not_language('swift')
+    def test_swift_compiler(self):
+        wrapper = self.helper_create_binary_wrapper(
+            'swiftc', version='Swift 1.2345', outfile='stderr',
+            extra_args={'Xlinker': 'macosx_version. PROJECT:ld - 1.2.3'})
+        env = get_fake_env()
+        env.binaries.host.binaries['swift'] = wrapper
+        compiler = env.detect_swift_compiler(MachineChoice.HOST)
+        self.assertEqual(compiler.version, '1.2345')
+
+    def test_native_file_dirs(self):
+        testcase = os.path.join(self.unit_test_dir, '60 native file override')
+        self.init(testcase, default_args=False,
+                  extra_args=['--native-file', os.path.join(testcase, 'nativefile')])
+
+    def test_native_file_dirs_overriden(self):
+        testcase = os.path.join(self.unit_test_dir, '60 native file override')
+        self.init(testcase, default_args=False,
+                  extra_args=['--native-file', os.path.join(testcase, 'nativefile'),
+                              '-Ddef_libdir=liblib', '-Dlibdir=liblib'])
+
+    def test_compile_sys_path(self):
+        """Compiling with a native file stored in a system path works.
+
+        There was a bug which caused the paths to be stored incorrectly and
+        would result in ninja invoking meson in an infinite loop. This tests
+        for that by actually invoking ninja.
+        """
+        testcase = os.path.join(self.common_test_dir, '1 trivial')
+
+        # It really doesn't matter what's in the native file, just that it exists
+        config = self.helper_create_native_file({'binaries': {'bash': 'false'}})
+
+        self.init(testcase, extra_args=['--native-file', config])
+        self.build()
+
+
+class CrossFileTests(BasePlatformTests):
+
+    """Tests for cross file functionality not directly related to
+    cross compiling.
+
+    This is mainly aimed to testing overrides from cross files.
+    """
+
+    def _cross_file_generator(self, *, needs_exe_wrapper: bool = False,
+                              exe_wrapper: T.Optional[T.List[str]] = None) -> str:
+        if is_windows():
+            raise unittest.SkipTest('Cannot run this test on non-mingw/non-cygwin windows')
+        if is_sunos():
+            cc = 'gcc'
+        else:
+            cc = 'cc'
+
+        return textwrap.dedent("""\
+            [binaries]
+            c = '/usr/bin/{}'
+            ar = '/usr/bin/ar'
+            strip = '/usr/bin/ar'
+            {}
+
+            [properties]
+            needs_exe_wrapper = {}
+
+            [host_machine]
+            system = 'linux'
+            cpu_family = 'x86'
+            cpu = 'i686'
+            endian = 'little'
+            """.format(cc,
+                       'exe_wrapper = {}'.format(str(exe_wrapper)) if exe_wrapper is not None else '',
+                       needs_exe_wrapper))
+
+    def _stub_exe_wrapper(self) -> str:
+        return textwrap.dedent('''\
+            #!/usr/bin/env python3
+            import subprocess
+            import sys
+
+            sys.exit(subprocess.run(sys.argv[1:]).returncode)
+            ''')
+
+    def test_needs_exe_wrapper_true(self):
+        testdir = os.path.join(self.unit_test_dir, '72 cross test passed')
+        with tempfile.TemporaryDirectory() as d:
+            p = Path(d) / 'crossfile'
+            with p.open('wt') as f:
+                f.write(self._cross_file_generator(needs_exe_wrapper=True))
+            self.init(testdir, extra_args=['--cross-file=' + str(p)])
+            out = self.run_target('test')
+            self.assertRegex(out, r'Skipped:\s*1\s*\n')
+
+    def test_needs_exe_wrapper_false(self):
+        testdir = os.path.join(self.unit_test_dir, '72 cross test passed')
+        with tempfile.TemporaryDirectory() as d:
+            p = Path(d) / 'crossfile'
+            with p.open('wt') as f:
+                f.write(self._cross_file_generator(needs_exe_wrapper=False))
+            self.init(testdir, extra_args=['--cross-file=' + str(p)])
+            out = self.run_target('test')
+            self.assertNotRegex(out, r'Skipped:\s*1\n')
+
+    def test_needs_exe_wrapper_true_wrapper(self):
+        testdir = os.path.join(self.unit_test_dir, '72 cross test passed')
+        with tempfile.TemporaryDirectory() as d:
+            s = Path(d) / 'wrapper.py'
+            with s.open('wt') as f:
+                f.write(self._stub_exe_wrapper())
+            s.chmod(0o774)
+            p = Path(d) / 'crossfile'
+            with p.open('wt') as f:
+                f.write(self._cross_file_generator(
+                    needs_exe_wrapper=True,
+                    exe_wrapper=[str(s)]))
+
+            self.init(testdir, extra_args=['--cross-file=' + str(p), '-Dexpect=true'])
+            out = self.run_target('test')
+            self.assertRegex(out, r'Ok:\s*3\s*\n')
+
+    def test_cross_exe_passed_no_wrapper(self):
+        testdir = os.path.join(self.unit_test_dir, '72 cross test passed')
+        with tempfile.TemporaryDirectory() as d:
+            p = Path(d) / 'crossfile'
+            with p.open('wt') as f:
+                f.write(self._cross_file_generator(needs_exe_wrapper=True))
+
+            self.init(testdir, extra_args=['--cross-file=' + str(p)])
+            self.build()
+            out = self.run_target('test')
+            self.assertRegex(out, r'Skipped:\s*1\s*\n')
+
+    # The test uses mocking and thus requires that the current process is the
+    # one to run the Meson steps. If we are using an external test executable
+    # (most commonly in Debian autopkgtests) then the mocking won't work.
+    @unittest.skipIf('MESON_EXE' in os.environ, 'MESON_EXE is defined, can not use mocking.')
+    def test_cross_file_system_paths(self):
+        if is_windows():
+            raise unittest.SkipTest('system crossfile paths not defined for Windows (yet)')
+
+        testdir = os.path.join(self.common_test_dir, '1 trivial')
+        cross_content = self._cross_file_generator()
+        with tempfile.TemporaryDirectory() as d:
+            dir_ = os.path.join(d, 'meson', 'cross')
+            os.makedirs(dir_)
+            with tempfile.NamedTemporaryFile('w', dir=dir_, delete=False) as f:
+                f.write(cross_content)
+            name = os.path.basename(f.name)
+
+            with mock.patch.dict(os.environ, {'XDG_DATA_HOME': d}):
+                self.init(testdir, extra_args=['--cross-file=' + name], inprocess=True)
+                self.wipe()
+
+            with mock.patch.dict(os.environ, {'XDG_DATA_DIRS': d}):
+                os.environ.pop('XDG_DATA_HOME', None)
+                self.init(testdir, extra_args=['--cross-file=' + name], inprocess=True)
+                self.wipe()
+
+        with tempfile.TemporaryDirectory() as d:
+            dir_ = os.path.join(d, '.local', 'share', 'meson', 'cross')
+            os.makedirs(dir_)
+            with tempfile.NamedTemporaryFile('w', dir=dir_, delete=False) as f:
+                f.write(cross_content)
+            name = os.path.basename(f.name)
+
+            # If XDG_DATA_HOME is set in the environment running the
+            # tests this test will fail, os mock the environment, pop
+            # it, then test
+            with mock.patch.dict(os.environ):
+                os.environ.pop('XDG_DATA_HOME', None)
+                with mock.patch('mesonbuild.coredata.os.path.expanduser', lambda x: x.replace('~', d)):
+                    self.init(testdir, extra_args=['--cross-file=' + name], inprocess=True)
+                    self.wipe()
+
+    def test_cross_file_dirs(self):
+        testcase = os.path.join(self.unit_test_dir, '60 native file override')
+        self.init(testcase, default_args=False,
+                  extra_args=['--native-file', os.path.join(testcase, 'nativefile'),
+                              '--cross-file', os.path.join(testcase, 'crossfile'),
+                              '-Ddef_bindir=binbar',
+                              '-Ddef_datadir=databar',
+                              '-Ddef_includedir=includebar',
+                              '-Ddef_infodir=infobar',
+                              '-Ddef_libdir=libbar',
+                              '-Ddef_libexecdir=libexecbar',
+                              '-Ddef_localedir=localebar',
+                              '-Ddef_localstatedir=localstatebar',
+                              '-Ddef_mandir=manbar',
+                              '-Ddef_sbindir=sbinbar',
+                              '-Ddef_sharedstatedir=sharedstatebar',
+                              '-Ddef_sysconfdir=sysconfbar'])
+
+    def test_cross_file_dirs_overriden(self):
+        testcase = os.path.join(self.unit_test_dir, '60 native file override')
+        self.init(testcase, default_args=False,
+                  extra_args=['--native-file', os.path.join(testcase, 'nativefile'),
+                              '--cross-file', os.path.join(testcase, 'crossfile'),
+                              '-Ddef_libdir=liblib', '-Dlibdir=liblib',
+                              '-Ddef_bindir=binbar',
+                              '-Ddef_datadir=databar',
+                              '-Ddef_includedir=includebar',
+                              '-Ddef_infodir=infobar',
+                              '-Ddef_libexecdir=libexecbar',
+                              '-Ddef_localedir=localebar',
+                              '-Ddef_localstatedir=localstatebar',
+                              '-Ddef_mandir=manbar',
+                              '-Ddef_sbindir=sbinbar',
+                              '-Ddef_sharedstatedir=sharedstatebar',
+                              '-Ddef_sysconfdir=sysconfbar'])
+
+    def test_cross_file_dirs_chain(self):
+        # crossfile2 overrides crossfile overrides nativefile
+        testcase = os.path.join(self.unit_test_dir, '60 native file override')
+        self.init(testcase, default_args=False,
+                  extra_args=['--native-file', os.path.join(testcase, 'nativefile'),
+                              '--cross-file', os.path.join(testcase, 'crossfile'),
+                              '--cross-file', os.path.join(testcase, 'crossfile2'),
+                              '-Ddef_bindir=binbar2',
+                              '-Ddef_datadir=databar',
+                              '-Ddef_includedir=includebar',
+                              '-Ddef_infodir=infobar',
+                              '-Ddef_libdir=libbar',
+                              '-Ddef_libexecdir=libexecbar',
+                              '-Ddef_localedir=localebar',
+                              '-Ddef_localstatedir=localstatebar',
+                              '-Ddef_mandir=manbar',
+                              '-Ddef_sbindir=sbinbar',
+                              '-Ddef_sharedstatedir=sharedstatebar',
+                              '-Ddef_sysconfdir=sysconfbar'])
+
+class TAPParserTests(unittest.TestCase):
+    def assert_test(self, events, **kwargs):
+        if 'explanation' not in kwargs:
+            kwargs['explanation'] = None
+        self.assertEqual(next(events), TAPParser.Test(**kwargs))
+
+    def assert_plan(self, events, **kwargs):
+        if 'skipped' not in kwargs:
+            kwargs['skipped'] = False
+        if 'explanation' not in kwargs:
+            kwargs['explanation'] = None
+        self.assertEqual(next(events), TAPParser.Plan(**kwargs))
+
+    def assert_version(self, events, **kwargs):
+        self.assertEqual(next(events), TAPParser.Version(**kwargs))
+
+    def assert_error(self, events):
+        self.assertEqual(type(next(events)), TAPParser.Error)
+
+    def assert_bailout(self, events, **kwargs):
+        self.assertEqual(next(events), TAPParser.Bailout(**kwargs))
+
+    def assert_last(self, events):
+        with self.assertRaises(StopIteration):
+            next(events)
+
+    def parse_tap(self, s):
+        parser = TAPParser(io.StringIO(s))
+        return iter(parser.parse())
+
+    def parse_tap_v13(self, s):
+        events = self.parse_tap('TAP version 13\n' + s)
+        self.assert_version(events, version=13)
+        return events
+
+    def test_empty(self):
+        events = self.parse_tap('')
+        self.assert_last(events)
+
+    def test_empty_plan(self):
+        events = self.parse_tap('1..0')
+        self.assert_plan(events, count=0, late=False, skipped=True)
+        self.assert_last(events)
+
+    def test_plan_directive(self):
+        events = self.parse_tap('1..0 # skipped for some reason')
+        self.assert_plan(events, count=0, late=False, skipped=True,
+                         explanation='for some reason')
+        self.assert_last(events)
+
+        events = self.parse_tap('1..1 # skipped for some reason\nok 1')
+        self.assert_error(events)
+        self.assert_plan(events, count=1, late=False, skipped=True,
+                         explanation='for some reason')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+        events = self.parse_tap('1..1 # todo not supported here\nok 1')
+        self.assert_error(events)
+        self.assert_plan(events, count=1, late=False, skipped=False,
+                         explanation='not supported here')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_one_test_ok(self):
+        events = self.parse_tap('ok')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_one_test_with_number(self):
+        events = self.parse_tap('ok 1')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_one_test_with_name(self):
+        events = self.parse_tap('ok 1 abc')
+        self.assert_test(events, number=1, name='abc', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_one_test_not_ok(self):
+        events = self.parse_tap('not ok')
+        self.assert_test(events, number=1, name='', result=TestResult.FAIL)
+        self.assert_last(events)
+
+    def test_one_test_todo(self):
+        events = self.parse_tap('not ok 1 abc # TODO')
+        self.assert_test(events, number=1, name='abc', result=TestResult.EXPECTEDFAIL)
+        self.assert_last(events)
+
+        events = self.parse_tap('ok 1 abc # TODO')
+        self.assert_test(events, number=1, name='abc', result=TestResult.UNEXPECTEDPASS)
+        self.assert_last(events)
+
+    def test_one_test_skip(self):
+        events = self.parse_tap('ok 1 abc # SKIP')
+        self.assert_test(events, number=1, name='abc', result=TestResult.SKIP)
+        self.assert_last(events)
+
+    def test_one_test_skip_failure(self):
+        events = self.parse_tap('not ok 1 abc # SKIP')
+        self.assert_test(events, number=1, name='abc', result=TestResult.FAIL)
+        self.assert_last(events)
+
+    def test_many_early_plan(self):
+        events = self.parse_tap('1..4\nok 1\nnot ok 2\nok 3\nnot ok 4')
+        self.assert_plan(events, count=4, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_test(events, number=3, name='', result=TestResult.OK)
+        self.assert_test(events, number=4, name='', result=TestResult.FAIL)
+        self.assert_last(events)
+
+    def test_many_late_plan(self):
+        events = self.parse_tap('ok 1\nnot ok 2\nok 3\nnot ok 4\n1..4')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_test(events, number=3, name='', result=TestResult.OK)
+        self.assert_test(events, number=4, name='', result=TestResult.FAIL)
+        self.assert_plan(events, count=4, late=True)
+        self.assert_last(events)
+
+    def test_directive_case(self):
+        events = self.parse_tap('ok 1 abc # skip')
+        self.assert_test(events, number=1, name='abc', result=TestResult.SKIP)
+        self.assert_last(events)
+
+        events = self.parse_tap('ok 1 abc # ToDo')
+        self.assert_test(events, number=1, name='abc', result=TestResult.UNEXPECTEDPASS)
+        self.assert_last(events)
+
+    def test_directive_explanation(self):
+        events = self.parse_tap('ok 1 abc # skip why')
+        self.assert_test(events, number=1, name='abc', result=TestResult.SKIP,
+                         explanation='why')
+        self.assert_last(events)
+
+        events = self.parse_tap('ok 1 abc # ToDo Because')
+        self.assert_test(events, number=1, name='abc', result=TestResult.UNEXPECTEDPASS,
+                         explanation='Because')
+        self.assert_last(events)
+
+    def test_one_test_early_plan(self):
+        events = self.parse_tap('1..1\nok')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_one_test_late_plan(self):
+        events = self.parse_tap('ok\n1..1')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_plan(events, count=1, late=True)
+        self.assert_last(events)
+
+    def test_out_of_order(self):
+        events = self.parse_tap('ok 2')
+        self.assert_error(events)
+        self.assert_test(events, number=2, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_middle_plan(self):
+        events = self.parse_tap('ok 1\n1..2\nok 2')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_plan(events, count=2, late=True)
+        self.assert_error(events)
+        self.assert_test(events, number=2, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_too_many_plans(self):
+        events = self.parse_tap('1..1\n1..2\nok 1')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_error(events)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_too_many(self):
+        events = self.parse_tap('ok 1\nnot ok 2\n1..1')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_plan(events, count=1, late=True)
+        self.assert_error(events)
+        self.assert_last(events)
+
+        events = self.parse_tap('1..1\nok 1\nnot ok 2')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_error(events)
+        self.assert_last(events)
+
+    def test_too_few(self):
+        events = self.parse_tap('ok 1\nnot ok 2\n1..3')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_plan(events, count=3, late=True)
+        self.assert_error(events)
+        self.assert_last(events)
+
+        events = self.parse_tap('1..3\nok 1\nnot ok 2')
+        self.assert_plan(events, count=3, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_error(events)
+        self.assert_last(events)
+
+    def test_too_few_bailout(self):
+        events = self.parse_tap('1..3\nok 1\nnot ok 2\nBail out! no third test')
+        self.assert_plan(events, count=3, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_bailout(events, message='no third test')
+        self.assert_last(events)
+
+    def test_diagnostics(self):
+        events = self.parse_tap('1..1\n# ignored\nok 1')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+        events = self.parse_tap('# ignored\n1..1\nok 1\n# ignored too')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+        events = self.parse_tap('# ignored\nok 1\n1..1\n# ignored too')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_plan(events, count=1, late=True)
+        self.assert_last(events)
+
+    def test_empty_line(self):
+        events = self.parse_tap('1..1\n\nok 1')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_unexpected(self):
+        events = self.parse_tap('1..1\ninvalid\nok 1')
+        self.assert_plan(events, count=1, late=False)
+        self.assert_error(events)
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+    def test_version(self):
+        events = self.parse_tap('TAP version 13\n')
+        self.assert_version(events, version=13)
+        self.assert_last(events)
+
+        events = self.parse_tap('TAP version 12\n')
+        self.assert_error(events)
+        self.assert_last(events)
+
+        events = self.parse_tap('1..0\nTAP version 13\n')
+        self.assert_plan(events, count=0, late=False, skipped=True)
+        self.assert_error(events)
+        self.assert_last(events)
+
+    def test_yaml(self):
+        events = self.parse_tap_v13('ok\n ---\n foo: abc\n  bar: def\n ...\nok 2')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_test(events, number=2, name='', result=TestResult.OK)
+        self.assert_last(events)
+
+        events = self.parse_tap_v13('ok\n ---\n foo: abc\n  bar: def')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_error(events)
+        self.assert_last(events)
+
+        events = self.parse_tap_v13('ok 1\n ---\n foo: abc\n  bar: def\nnot ok 2')
+        self.assert_test(events, number=1, name='', result=TestResult.OK)
+        self.assert_error(events)
+        self.assert_test(events, number=2, name='', result=TestResult.FAIL)
+        self.assert_last(events)
+
+
+def _clang_at_least(compiler, minver: str, apple_minver: str) -> bool:
+    """
+    check that Clang compiler is at least a specified version, whether AppleClang or regular Clang
+
+    Parameters
+    ----------
+    compiler:
+        Meson compiler object
+    minver: str
+        Clang minimum version
+    apple_minver: str
+        AppleCLang minimum version
+
+    Returns
+    -------
+    at_least: bool
+        Clang is at least the specified version
+    """
+    if isinstance(compiler, (mesonbuild.compilers.AppleClangCCompiler,
+                             mesonbuild.compilers.AppleClangCPPCompiler)):
+        return version_compare(compiler.version, apple_minver)
+    return version_compare(compiler.version, minver)
+
+
+def unset_envs():
+    # For unit tests we must fully control all command lines
+    # so that there are no unexpected changes coming from the
+    # environment, for example when doing a package build.
+    varnames = ['CPPFLAGS', 'LDFLAGS'] + list(mesonbuild.compilers.compilers.cflags_mapping.values())
+    for v in varnames:
+        if v in os.environ:
+            del os.environ[v]
+
+def convert_args(argv):
+    # If we got passed a list of tests, pass it on
+    pytest_args = ['-v'] if '-v' in argv else []
+    test_list = []
+    for arg in argv:
+        if arg.startswith('-'):
+            if arg in ('-f', '--failfast'):
+                arg = '--exitfirst'
+            pytest_args.append(arg)
+            continue
+        # ClassName.test_name => 'ClassName and test_name'
+        if '.' in arg:
+            arg = ' and '.join(arg.split('.'))
+        test_list.append(arg)
+    if test_list:
+        pytest_args += ['-k', ' or '.join(test_list)]
+    return pytest_args
+
+def main():
+    unset_envs()
+    try:
+        import pytest # noqa: F401
+        # Need pytest-xdist for `-n` arg
+        import xdist # noqa: F401
+        pytest_args = ['-n', 'auto', './run_unittests.py']
+        pytest_args += convert_args(sys.argv[1:])
+        return subprocess.run(python_command + ['-m', 'pytest'] + pytest_args).returncode
+    except ImportError:
+        print('pytest-xdist not found, using unittest instead')
+    # All attempts at locating pytest failed, fall back to plain unittest.
+    cases = ['InternalTests', 'DataTests', 'AllPlatformTests', 'FailureTests',
+             'PythonTests', 'NativeFileTests', 'RewriterTests', 'CrossFileTests',
+             'TAPParserTests',
+
+             'LinuxlikeTests', 'LinuxCrossArmTests', 'LinuxCrossMingwTests',
+             'WindowsTests', 'DarwinTests']
+
+    return unittest.main(defaultTest=cases, buffer=True)
+
+if __name__ == '__main__':
+    print('Meson build system', mesonbuild.coredata.version, 'Unit Tests')
+    raise SystemExit(main())
diff --git a/meson/setup.cfg b/meson/setup.cfg
new file mode 100644
index 000000000..f3ed75ef9
--- /dev/null
+++ b/meson/setup.cfg
@@ -0,0 +1,41 @@
+[metadata]
+description = A high performance build system
+author = Jussi Pakkanen
+author_email = jpakkane@gmail.com
+url = https://mesonbuild.com
+keywords =
+  meson
+  mesonbuild
+  build system
+  cmake
+license = Apache License, Version 2.0
+license_file = COPYING
+classifiers =
+  Development Status :: 5 - Production/Stable
+  Environment :: Console
+  Intended Audience :: Developers
+  License :: OSI Approved :: Apache Software License
+  Natural Language :: English
+  Operating System :: MacOS :: MacOS X
+  Operating System :: Microsoft :: Windows
+  Operating System :: POSIX :: BSD
+  Operating System :: POSIX :: Linux
+  Programming Language :: Python :: 3 :: Only
+  Programming Language :: Python :: 3.5
+  Programming Language :: Python :: 3.6
+  Programming Language :: Python :: 3.7
+  Programming Language :: Python :: 3.8
+  Topic :: Software Development :: Build Tools
+long_description = Meson is a cross-platform build system designed to be both as fast and as user friendly as possible. It supports many languages and compilers, including GCC, Clang, PGI, Intel, and Visual Studio. Its build definitions are written in a simple non-Turing complete DSL.
+
+[options]
+python_requires = >= 3.5.2
+setup_requires =
+  setuptools
+
+[options.extras_require]
+progress =
+  tqdm
+
+[tool:pytest]
+python_classes =
diff --git a/meson/setup.py b/meson/setup.py
new file mode 100644
index 000000000..145f19c52
--- /dev/null
+++ b/meson/setup.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+# Copyright 2016 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+if sys.version_info < (3, 5, 2):
+    raise SystemExit('ERROR: Tried to install Meson with an unsupported Python version: \n{}'
+                     '\nMeson requires Python 3.5.2 or greater'.format(sys.version))
+
+from mesonbuild.coredata import version
+from setuptools import setup
+
+# On windows, will create Scripts/meson.exe and Scripts/meson-script.py
+# Other platforms will create bin/meson
+entries = {'console_scripts': ['meson=mesonbuild.mesonmain:main']}
+packages = ['mesonbuild',
+            'mesonbuild.ast',
+            'mesonbuild.backend',
+            'mesonbuild.cmake',
+            'mesonbuild.compilers',
+            'mesonbuild.compilers.mixins',
+            'mesonbuild.dependencies',
+            'mesonbuild.modules',
+            'mesonbuild.scripts',
+            'mesonbuild.templates',
+            'mesonbuild.wrap']
+data_files = []
+if sys.platform != 'win32':
+    # Only useful on UNIX-like systems
+    data_files = [('share/man/man1', ['man/meson.1']),
+                  ('share/polkit-1/actions', ['data/com.mesonbuild.install.policy'])]
+
+if __name__ == '__main__':
+    setup(name='meson',
+          version=version,
+          packages=packages,
+          entry_points=entries,
+          data_files=data_files,)
diff --git a/meson/sider.yml b/meson/sider.yml
new file mode 100644
index 000000000..5c5619692
--- /dev/null
+++ b/meson/sider.yml
@@ -0,0 +1,7 @@
+linter:
+  flake8:
+    version: 3
+    plugins:
+      - flake8-blind-except
+      - flake8-builtins
+      - flake8-bugbear
diff --git a/meson/skip_ci.py b/meson/skip_ci.py
new file mode 100755
index 000000000..5e0a7aa72
--- /dev/null
+++ b/meson/skip_ci.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import argparse
+import os
+import subprocess
+import sys
+import traceback
+
+
+def check_pr(is_pr_env):
+    if is_pr_env not in os.environ:
+        print('This is not pull request: {} is not set'.format(is_pr_env))
+        sys.exit()
+    elif os.environ[is_pr_env] == 'false':
+        print('This is not pull request: {} is false'.format(is_pr_env))
+        sys.exit()
+
+
+def get_base_branch(base_env):
+    if base_env not in os.environ:
+        print('Unable to determine base branch: {} is not set'.format(base_env))
+        sys.exit()
+    return os.environ[base_env]
+
+
+def get_git_files(base):
+    diff = subprocess.check_output(['git', 'diff', '--name-only', base + '...HEAD'])
+    return diff.strip().split(b'\n')
+
+
+def is_documentation(filename):
+    return filename.startswith(b'docs/')
+
+
+def main():
+    try:
+        parser = argparse.ArgumentParser(description='CI Skipper')
+        parser.add_argument('--base-branch-env', required=True,
+                            help='Branch push is targeted to')
+        parser.add_argument('--is-pull-env', required=True,
+                            help='Variable set if it is a PR')
+        parser.add_argument('--base-branch-origin', action='store_true',
+                            help='Base branch reference is only in origin remote')
+        args = parser.parse_args()
+        check_pr(args.is_pull_env)
+        base = get_base_branch(args.base_branch_env)
+        if args.base_branch_origin:
+            base = 'origin/' + base
+        if all(is_documentation(f) for f in get_git_files(base)):
+            print("Don't run CI for documentation-only changes, add '[skip ci]' to commit title.")
+            print('See http://mesonbuild.com/Contributing.html#skipping-integration-tests')
+            sys.exit(1)
+    except Exception:
+        # If this script fails we want build to proceed.
+        # Failure likely means some corner case we did not consider or bug.
+        # Either case this should not prevent CI from running if it is needed,
+        # and we tolerate it if it is run where it is not required.
+        traceback.print_exc()
+        print('There is a BUG in skip_ci.py, exiting.')
+        sys.exit()
+
+if __name__ == '__main__':
+    main()
diff --git a/meson/tools/ac_converter.py b/meson/tools/ac_converter.py
new file mode 100755
index 000000000..075eae629
--- /dev/null
+++ b/meson/tools/ac_converter.py
@@ -0,0 +1,453 @@
+#!/usr/bin/env python3
+
+# Copyright 2015 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+help_message = """Usage: {} 
+
+This script reads config.h.meson, looks for header
+checks and writes the corresponding meson declaration.
+
+Copy config.h.in to config.h.meson, replace #undef
+with #mesondefine and run this. We can't do this automatically
+because some configure scripts have #undef statements
+that are unrelated to configure checks.
+"""
+
+import sys
+
+
+# Add stuff here as it is encountered.
+function_data = \
+    {'HAVE_FEENABLEEXCEPT': ('feenableexcept', 'fenv.h'),
+     'HAVE_FECLEAREXCEPT': ('feclearexcept', 'fenv.h'),
+     'HAVE_FEDISABLEEXCEPT': ('fedisableexcept', 'fenv.h'),
+     'HAVE_MMAP': ('mmap', 'sys/mman.h'),
+     'HAVE_GETPAGESIZE': ('getpagesize', 'unistd.h'),
+     'HAVE_GETISAX': ('getisax', 'sys/auxv.h'),
+     'HAVE_GETTIMEOFDAY': ('gettimeofday', 'sys/time.h'),
+     'HAVE_MPROTECT': ('mprotect', 'sys/mman.h'),
+     'HAVE_POSIX_MEMALIGN': ('posix_memalign', 'stdlib.h'),
+     'HAVE_SIGACTION': ('sigaction', 'signal.h'),
+     'HAVE_ALARM': ('alarm', 'unistd.h'),
+     'HAVE_CTIME_R': ('ctime_r', 'time.h'),
+     'HAVE_DRAND48': ('drand48', 'stdlib.h'),
+     'HAVE_FLOCKFILE': ('flockfile', 'stdio.h'),
+     'HAVE_FORK': ('fork', 'unistd.h'),
+     'HAVE_FUNLOCKFILE': ('funlockfile', 'stdio.h'),
+     'HAVE_GETLINE': ('getline', 'stdio.h'),
+     'HAVE_LINK': ('link', 'unistd.h'),
+     'HAVE_RAISE': ('raise', 'signal.h'),
+     'HAVE_STRNDUP': ('strndup', 'string.h'),
+     'HAVE_SCHED_GETAFFINITY': ('sched_getaffinity', 'sched.h'),
+     'HAVE_WAITPID': ('waitpid', 'sys/wait.h'),
+     'HAVE_XRENDERCREATECONICALGRADIENT': ('XRenderCreateConicalGradient', 'xcb/render.h'),
+     'HAVE_XRENDERCREATELINEARGRADIENT': ('XRenderCreateLinearGradient', 'xcb/render.h'),
+     'HAVE_XRENDERCREATERADIALGRADIENT': ('XRenderCreateRadialGradient', 'xcb/render.h'),
+     'HAVE_XRENDERCREATESOLIDFILL': ('XRenderCreateSolidFill', 'xcb/render.h'),
+     'HAVE_DCGETTEXT': ('dcgettext', 'libintl.h'),
+     'HAVE_ENDMNTENT': ('endmntent', 'mntent.h'),
+     'HAVE_ENDSERVENT': ('endservent', 'netdb.h'),
+     'HAVE_EVENTFD': ('eventfd', 'sys/eventfd.h'),
+     'HAVE_FALLOCATE': ('fallocate', 'fcntl.h'),
+     'HAVE_FCHMOD': ('fchmod', 'sys/stat.h'),
+     'HAVE_FCHOWN': ('fchown', 'unistd.h'),
+     'HAVE_FDWALK': ('fdwalk', 'stdlib.h'),
+     'HAVE_FSYNC': ('fsync', 'unistd.h'),
+     'HAVE_GETC_UNLOCKED': ('getc_unlocked', 'stdio.h'),
+     'HAVE_GETFSSTAT': ('getfsstat', 'sys/mount.h'),
+     'HAVE_GETMNTENT_R': ('getmntent_r', 'mntent.h'),
+     'HAVE_GETPROTOBYNAME_R': ('getprotobyname_r', 'netdb.h'),
+     'HAVE_GETRESUID': ('getresuid', 'unistd.h'),
+     'HAVE_GETVFSSTAT': ('getvfsstat', 'sys/statvfs.h'),
+     'HAVE_GMTIME_R': ('gmtime_r', 'time.h'),
+     'HAVE_HASMNTOPT': ('hasmntopt', 'mntent.h'),
+     'HAVE_IF_INDEXTONAME': ('if_indextoname', 'net/if.h'),
+     'HAVE_IF_NAMETOINDEX': ('if_nametoindex', 'net/if.h'),
+     'HAVE_INOTIFY_INIT1': ('inotify_init1', 'sys/inotify.h'),
+     'HAVE_ISSETUGID': ('issetugid', 'unistd.h'),
+     'HAVE_KEVENT': ('kevent', 'sys/event.h'),
+     'HAVE_KQUEUE': ('kqueue', 'sys/event.h'),
+     'HAVE_LCHMOD': ('lchmod', 'sys/stat.h'),
+     'HAVE_LCHOWN': ('lchown', 'unistd.h'),
+     'HAVE_LSTAT': ('lstat', 'sys/stat.h'),
+     'HAVE_MEMCPY': ('memcpy', 'string.h'),
+     'HAVE_MEMALIGN': ('memalign', 'stdlib.h'),
+     'HAVE_MEMMEM': ('memmem', 'string.h'),
+     'HAVE_NEWLOCALE': ('newlocale', 'locale.h'),
+     'HAVE_PIPE2': ('pipe2', 'fcntl.h'),
+     'HAVE_POLL': ('poll', 'poll.h'),
+     'HAVE_PRLIMIT': ('prlimit', 'sys/resource.h'),
+     'HAVE_PTHREAD_ATTR_SETSTACKSIZE': ('pthread_attr_setstacksize', 'pthread.h'),
+     'HAVE_PTHREAD_CONDATTR_SETCLOCK': ('pthread_condattr_setclock', 'pthread.h'),
+     'HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP': ('pthread_cond_timedwait_relative_np', 'pthread.h'),
+     'HAVE_READLINK': ('readlink', 'unistd.h'),
+     'HAVE_RES_INIT': ('res_init', 'resolv.h'),
+     'HAVE_SENDMMSG': ('sendmmsg', 'sys/socket.h'),
+     'HAVE_SOCKET': ('socket', 'sys/socket.h'),
+     'HAVE_GETENV': ('getenv', 'stdlib.h'),
+     'HAVE_SETENV': ('setenv', 'stdlib.h'),
+     'HAVE_PUTENV': ('putenv', 'stdlib.h'),
+     'HAVE_UNSETENV': ('unsetenv', 'stdlib.h'),
+     'HAVE_SETMNTENT': ('setmntent', 'mntent.h'),
+     'HAVE_SNPRINTF': ('snprintf', 'stdio.h'),
+     'HAVE_SPLICE': ('splice', 'fcntl.h'),
+     'HAVE_STATFS': ('statfs', 'mount.h'),
+     'HAVE_STATVFS': ('statvfs', 'sys/statvfs.h'),
+     'HAVE_STPCOPY': ('stpcopy', 'string.h'),
+     'HAVE_STRCASECMP': ('strcasecmp', 'strings.h'),
+     'HAVE_STRLCPY': ('strlcpy', 'string.h'),
+     'HAVE_STRNCASECMP': ('strncasecmp', 'strings.h'),
+     'HAVE_STRSIGNAL': ('strsignal', 'signal.h'),
+     'HAVE_STRTOD_L': ('strtod_l', 'stdlib.h'),
+     'HAVE_STRTOLL_L': ('strtoll_l', 'stdlib.h'),
+     'HAVE_STRTOULL_L': ('strtoull_l', 'stdlib.h'),
+     'HAVE_SYMLINK': ('symlink', 'unistd.h'),
+     'HAVE_SYSCTLBYNAME': ('sysctlbyname', 'sys/sysctl.h'),
+     'HAVE_TIMEGM': ('timegm', 'time.h'),
+     'HAVE_USELOCALE': ('uselocale', 'xlocale.h'),
+     'HAVE_UTIMES': ('utimes', 'sys/time.h'),
+     'HAVE_VALLOC': ('valloc', 'stdlib.h'),
+     'HAVE_VASPRINTF': ('vasprintf', 'stdio.h'),
+     'HAVE_VSNPRINTF': ('vsnprintf', 'stdio.h'),
+     'HAVE_BCOPY': ('bcopy', 'strings.h'),
+     'HAVE_STRERROR': ('strerror', 'string.h'),
+     'HAVE_MEMMOVE': ('memmove', 'string.h'),
+     'HAVE_STRTOIMAX': ('strtoimax', 'inttypes.h'),
+     'HAVE_STRTOLL': ('strtoll', 'stdlib.h'),
+     'HAVE_STRTOQ': ('strtoq', 'stdlib.h'),
+     'HAVE_ACCEPT4': ('accept4', 'sys/socket.h'),
+     'HAVE_CHMOD': ('chmod', 'sys/stat.h'),
+     'HAVE_CHOWN': ('chown', 'unistd.h'),
+     'HAVE_FSTAT': ('fstat', 'sys/stat.h'),
+     'HAVE_GETADDRINFO': ('getaddrinfo', 'netdb.h'),
+     'HAVE_GETGRGID_R': ('getgrgid_r', 'grp.h'),
+     'HAVE_GETGRNAM_R': ('getgrnam_r', 'grp.h'),
+     'HAVE_GETGROUPS': ('getgroups', 'grp.h'),
+     'HAVE_GETOPT_LONG': ('getopt_long', 'getopt.h'),
+     'HAVE_GETPWNAM_R': ('getpwnam', 'pwd.h'),
+     'HAVE_GETPWUID_R': ('getpwuid_r', 'pwd.h'),
+     'HAVE_GETUID': ('getuid', 'unistd.h'),
+     'HAVE_LRINTF': ('lrintf', 'math.h'),
+     'HAVE_DECL_ISNAN': ('isnan', 'math.h'),
+     'HAVE_DECL_ISINF': ('isinf', 'math.h'),
+     'HAVE_ROUND': ('round', 'math.h'),
+     'HAVE_NEARBYINT': ('nearbyint', 'math.h'),
+     'HAVE_RINT': ('rint', 'math.h'),
+     'HAVE_MKFIFO': ('mkfifo', 'sys/stat.h'),
+     'HAVE_MLOCK': ('mlock', 'sys/mman.h'),
+     'HAVE_NANOSLEEP': ('nanosleep', 'time.h'),
+     'HAVE_PIPE': ('pipe', 'unistd.h'),
+     'HAVE_PPOLL': ('ppoll', 'poll.h'),
+     'HAVE_REGEXEC': ('regexec', 'regex.h'),
+     'HAVE_SETEGID': ('setegid', 'unistd.h'),
+     'HAVE_SETEUID': ('seteuid', 'unistd.h'),
+     'HAVE_SETPGID': ('setpgid', 'unistd.h'),
+     'HAVE_SETREGID': ('setregid', 'unistd.h'),
+     'HAVE_SETRESGID': ('setresgid', 'unistd.h'),
+     'HAVE_SETRESUID': ('setresuid', 'unistd.h'),
+     'HAVE_SHM_OPEN': ('shm_open', 'fcntl.h'),
+     'HAVE_SLEEP': ('sleep', 'unistd.h'),
+     'HAVE_STRERROR_R': ('strerror_r', 'string.h'),
+     'HAVE_STRTOF': ('strtof', 'stdlib.h'),
+     'HAVE_SYSCONF': ('sysconf', 'unistd.h'),
+     'HAVE_USLEEP': ('usleep', 'unistd.h'),
+     'HAVE_VFORK': ('vfork', 'unistd.h'),
+     'HAVE_MALLOC': ('malloc', 'stdlib.h'),
+     'HAVE_CALLOC': ('calloc', 'stdlib.h'),
+     'HAVE_REALLOC': ('realloc', 'stdlib.h'),
+     'HAVE_FREE': ('free', 'stdlib.h'),
+     'HAVE_ALLOCA': ('alloca', 'alloca.h'),
+     'HAVE_QSORT': ('qsort', 'stdlib.h'),
+     'HAVE_ABS': ('abs', 'stdlib.h'),
+     'HAVE_MEMSET': ('memset', 'string.h'),
+     'HAVE_MEMCMP': ('memcmp', 'string.h'),
+     'HAVE_STRLEN': ('strlen', 'string.h'),
+     'HAVE_STRLCAT': ('strlcat', 'string.h'),
+     'HAVE_STRDUP': ('strdup', 'string.h'),
+     'HAVE__STRREV': ('_strrev', 'string.h'),
+     'HAVE__STRUPR': ('_strupr', 'string.h'),
+     'HAVE__STRLWR': ('_strlwr', 'string.h'),
+     'HAVE_INDEX': ('index', 'strings.h'),
+     'HAVE_RINDEX': ('rindex', 'strings.h'),
+     'HAVE_STRCHR': ('strchr', 'string.h'),
+     'HAVE_STRRCHR': ('strrchr', 'string.h'),
+     'HAVE_STRSTR': ('strstr', 'string.h'),
+     'HAVE_STRTOL': ('strtol', 'stdlib.h'),
+     'HAVE_STRTOUL': ('strtoul', 'stdlib.h'),
+     'HAVE_STRTOULL': ('strtoull', 'stdlib.h'),
+     'HAVE_STRTOD': ('strtod', 'stdlib.h'),
+     'HAVE_ATOI': ('atoi', 'stdlib.h'),
+     'HAVE_ATOF': ('atof', 'stdlib.h'),
+     'HAVE_STRCMP': ('strcmp', 'string.h'),
+     'HAVE_STRNCMP': ('strncmp', 'string.h'),
+     'HAVE_VSSCANF': ('vsscanf', 'stdio.h'),
+     'HAVE_CHROOT': ('chroot', 'unistd.h'),
+     'HAVE_CLOCK': ('clock', 'time.h'),
+     'HAVE_CLOCK_GETRES': ('clock_getres', 'time.h'),
+     'HAVE_CLOCK_GETTIME': ('clock_gettime', 'time.h'),
+     'HAVE_CLOCK_SETTIME': ('clock_settime', 'time.h'),
+     'HAVE_CONFSTR': ('confstr', 'time.h'),
+     'HAVE_CTERMID': ('ctermid', 'stdio.h'),
+     'HAVE_DIRFD': ('dirfd', 'dirent.h'),
+     'HAVE_DLOPEN': ('dlopen', 'dlfcn.h'),
+     'HAVE_DUP2': ('dup2', 'unistd.h'),
+     'HAVE_DUP3': ('dup3', 'unistd.h'),
+     'HAVE_EPOLL_CREATE1': ('epoll_create1', 'sys/epoll.h'),
+     'HAVE_ERF': ('erf', 'math.h'),
+     'HAVE_ERFC': ('erfc', 'math.h'),
+     'HAVE_EXECV': ('execv', 'unistd.h'),
+     'HAVE_FACCESSAT': ('faccessat', 'unistd.h'),
+     'HAVE_FCHDIR': ('fchdir', 'unistd.h'),
+     'HAVE_FCHMODAT': ('fchmodat', 'sys/stat.h'),
+     'HAVE_FDATASYNC': ('fdatasync', 'unistd.h'),
+     'HAVE_FDOPENDIR': ('fdopendir', 'dirent.h'),
+     'HAVE_FEXECVE': ('fexecve', 'unistd.h'),
+     'HAVE_FLOCK': ('flock', 'sys/file.h'),
+     'HAVE_FORKPTY': ('forkpty', 'pty.h'),
+     'HAVE_FPATHCONF': ('fpathconf', 'unistd.h'),
+     'HAVE_FSTATAT': ('fstatat', 'unistd.h'),
+     'HAVE_FSTATVFS': ('fstatvfs', 'sys/statvfs.h'),
+     'HAVE_FTELLO': ('ftello', 'stdio.h'),
+     'HAVE_FTIME': ('ftime', 'sys/timeb.h'),
+     'HAVE_FTRUNCATE': ('ftruncate', 'unistd.h'),
+     'HAVE_FUTIMENS': ('futimens', 'sys/stat.h'),
+     'HAVE_FUTIMES': ('futimes', 'sys/time.h'),
+     'HAVE_GAI_STRERROR': ('gai_strerror', 'netdb.h'),
+     'HAVE_GETGROUPLIST': ('getgrouplist', 'grp.h'),
+     'HAVE_GETHOSTBYNAME': ('gethostbyname', 'netdb.h'),
+     'HAVE_GETHOSTBYNAME_R': ('gethostbyname_r', 'netdb.h'),
+     'HAVE_GETITIMER': ('getitimer', 'sys/time.h'),
+     'HAVE_GETLOADAVG': ('getloadavg', 'stdlib.h'),
+     'HAVE_GETLOGIN': ('getlogin', 'unistd.h'),
+     'HAVE_GETNAMEINFO': ('getnameinfo', 'netdb.h'),
+     'HAVE_GETPEERNAME': ('getpeername', 'sys/socket.h'),
+     'HAVE_GETPGID': ('getpgid', 'unistd.h'),
+     'HAVE_GETPGRP': ('getpgrp', 'unistd.h'),
+     'HAVE_GETPID': ('getpid', 'unistd.h'),
+     'HAVE_GETPRIORITY': ('getpriority', 'sys/resource.h'),
+     'HAVE_GETPWENT': ('getpwent', 'pwd.h'),
+     'HAVE_GETRANDOM': ('getrandom', 'linux/random.h'),
+     'HAVE_GETRESGID': ('getresgid', 'unistd.h'),
+     'HAVE_GETSID': ('getsid', 'unistd.h'),
+     'HAVE_GETSPENT': ('getspent', 'shadow.h'),
+     'HAVE_GETSPNAM': ('getspnam', 'shadow.h'),
+     'HAVE_GETWD': ('getwd', 'unistd.h'),
+     'HAVE_HSTRERROR': ('hstrerror', 'netdb.h'),
+     'HAVE_HTOLE64': ('htole64', 'endian.h'),
+     'HAVE_IF_NAMEINDEX': ('if_nameindex', 'net/if.h'),
+     'HAVE_INET_ATON': ('inet_aton', 'arpa/inet.h'),
+     'HAVE_INET_PTON': ('inet_pton', 'arpa/inet.h'),
+     'HAVE_INITGROUPS': ('initgroups', 'grp.h'),
+     'HAVE_KILL': ('kill', 'signal.h'),
+     'HAVE_KILLPG': ('killpg', 'signal.h'),
+     'HAVE_LINKAT': ('linkat', 'unistd.h'),
+     'HAVE_LOCKF': ('lockf', 'unistd.h'),
+     'HAVE_LUTIMES': ('lutimes', 'sys/time.h'),
+     'HAVE_MAKEDEV': ('makedev', 'sys/sysmacros.h'),
+     'HAVE_MBRTOWC': ('mbrtowc', 'wchar.h'),
+     'HAVE_MEMRCHR': ('memrchr', 'string.h'),
+     'HAVE_MKDIRAT': ('mkdirat', 'sys/stat.h'),
+     'HAVE_MKFIFOAT': ('mkfifoat', 'sys/stat.h'),
+     'HAVE_MKNOD': ('mknod', 'unistd.h'),
+     'HAVE_MKNODAT': ('mknodat', 'unistd.h'),
+     'HAVE_MKTIME': ('mktime', 'unistd.h'),
+     'HAVE_MKREMAP': ('mkremap', 'sys/mman.h'),
+     'HAVE_NICE': ('nice', 'unistd.h'),
+     'HAVE_OPENAT': ('openat', 'fcntl.h'),
+     'HAVE_OPENPTY': ('openpty', 'pty.h'),
+     'HAVE_PATHCONF': ('pathconf', 'unistd.h'),
+     'HAVE_PAUSE': ('pause', 'unistd.h'),
+     'HAVE_PREAD': ('pread', 'unistd.h'),
+     'HAVE_PTHREAD_KILL': ('pthread_kill', 'signal.h'),
+     'HAVE_PTHREAD_SIGMASK': ('pthread_sigmask', 'signal.h'),
+     'HAVE_PWRITE': ('pwrite', 'unistd.h'),
+     'HAVE_READLINKAT': ('readlinkat', 'unistd.h'),
+     'HAVE_READV': ('readv', 'sys/uio.h'),
+     'HAVE_RENAMEAT': ('renamat', 'stdio.h'),
+     'HAVE_SCHED_GET_PRIORITY_MAX': ('sched_get_priority_max', 'sched.h'),
+     'HAVE_SCHED_RR_GET_INTERVAL': ('sched_rr_get_interval', 'sched.h'),
+     'HAVE_SCHED_SETAFFINITY': ('sched_setaffinity', 'sched.h'),
+     'HAVE_SCHED_SETPARAM': ('sched_setparam', 'sched.h'),
+     'HAVE_SCHED_SETSCHEDULER': ('sched_setscheduler', 'sched.h'),
+     'HAVE_SELECT': ('select', 'sys/select.h'),
+     'HAVE_SEM_GETVALUE': ('sem_getvalue', 'semaphore.h'),
+     'HAVE_SEM_OPEN': ('sem_open', 'semaphore.h'),
+     'HAVE_SEM_TIMEDWAIT': ('sem_timedwait', 'semaphore.h'),
+     'HAVE_SEM_UNLINK': ('sem_unlink', 'semaphore.h'),
+     'HAVE_SENDFILE': ('sendfile', 'sys/sendfile.h'),
+     'HAVE_SETGID': ('setgid', 'unistd.h'),
+     'HAVE_SETGROUPS': ('setgroups', 'grp.h'),
+     'HAVE_SETHOSTNAME': ('sethostname', 'unistd.h'),
+     'HAVE_SETITIMER': ('setitimer', 'sys/time.h'),
+     'HAVE_SETLOCALE': ('setlocale', 'locale.h'),
+     'HAVE_SETPGRP': ('setpgrp', 'unistd.h'),
+     'HAVE_SETPRIORITY': ('setpriority', 'sys/resource.h'),
+     'HAVE_SETREUID': ('setreuid', 'unistd.h'),
+     'HAVE_SETSID': ('setsid', 'unistd.h'),
+     'HAVE_SETUID': ('setuid', 'unistd.h'),
+     'HAVE_SETVBUF': ('setvbuf', 'unistd.h'),
+     'HAVE_SIGALTSTACK': ('sigaltstack', 'signal.h'),
+     'HAVE_SIGINTERRUPT': ('siginterrupt', 'signal.h'),
+     'HAVE_SIGPENDING': ('sigpending', 'signal.h'),
+     'HAVE_SIGRELSE': ('sigrelse', 'signal.h'),
+     'HAVE_SIGTIMEDWAIT': ('sigtimedwait', 'signal.h'),
+     'HAVE_SIGWAIT': ('sigwait', 'signal.h'),
+     'HAVE_SIGWAITINFO': ('sigwaitinfo', 'signal.h'),
+     'HAVE_SOCKETPAIR': ('socketpair', 'sys/socket.h'),
+     'HAVE_STRFTIME': ('strftime', 'time.h'),
+     'HAVE_SYMLINKAT': ('symlinkat', 'unistd.h'),
+     'HAVE_SYNC': ('sync', 'unistd.h'),
+     'HAVE_TCGETPGRP': ('tcgetpgrp', 'unistd.h'),
+     'HAVE_TCSETPGRP': ('tcsetpgrp', 'unistd.h'),
+     'HAVE_TEMPNAM': ('tempnam', 'stdio.h'),
+     'HAVE_TIMES': ('times', 'sys/times.h'),
+     'HAVE_TEMPFILE': ('tempfile', 'stdio.h'),
+     'HAVE_TMPNAM': ('tmpnam', 'stdio.h'),
+     'HAVE_TMPNAM_R': ('tmpnam_r', 'stdio.h'),
+     'HAVE_TRUNCATE': ('truncate', 'unistd.h'),
+     'HAVE_TZNAME': ('tzname', 'time.h'),
+     'HAVE_UNAME': ('uname', 'sys/utsname.h'),
+     'HAVE_UNLINKAT': ('unlinkat', 'unistd.h'),
+     'HAVE_UTIMENSAT': ('utimensat', 'sys/stat.h'),
+     'HAVE_WAIT3': ('wait3', 'sys/wait.h'),
+     'HAVE_WAIT4': ('wait4', 'sys/wait.h'),
+     'HAVE_WAITID': ('waitid', 'sys/wait.h'),
+     'HAVE_WRITEV': ('writev', 'sys/uio.h'),
+     'HAVE_WMEMCMP': ('wmemcmp', 'wchar.h'),
+     'HAVE_ATAN': ('atan', 'math.h'),
+     'HAVE_ATAN2': ('atan2', 'math.h'),
+     'HAVE_ACOS': ('acos', 'math.h'),
+     'HAVE_ACOSH': ('acosh', 'math.h'),
+     'HAVE_ASIN': ('asin', 'math.h'),
+     'HAVE_ASINH': ('asinh', 'math.h'),
+     'HAVE_ATANH': ('atanh', 'math.h'),
+     'HAVE_CEIL': ('ceil', 'math.h'),
+     'HAVE_COPYSIGN': ('copysign', 'math.h'),
+     'HAVE_COS': ('cos', 'math.h'),
+     'HAVE_COSH': ('cosh', 'math.h'),
+     'HAVE_COSF': ('cosf', 'math.h'),
+     'HAVE_EXPM1': ('expm1', 'math.h'),
+     'HAVE_FABS': ('fabs', 'math.h'),
+     'HAVE_FINITE': ('finite', 'math.h'),
+     'HAVE_FLOOR': ('floor', 'math.h'),
+     'HAVE_GAMMA': ('gamma', 'math.h'),
+     'HAVE_HYPOT': ('hypot', 'math.h'),
+     'HAVE_ISINF': ('isinf', 'math.h'),
+     'HAVE_LOG': ('log', 'math.h'),
+     'HAVE_LOG1P': ('log1p', 'math.h'),
+     'HAVE_LOG2': ('log2', 'math.h'),
+     'HAVE_LGAMMA': ('lgamma', 'math.h'),
+     'HAVE_POW': ('pow', 'math.h'),
+     'HAVE_SCALBN': ('scalbn', 'math.h'),
+     'HAVE_SIN': ('sin', 'math.h'),
+     'HAVE_SINF': ('sinf', 'math.h'),
+     'HAVE_SINH': ('sinh', 'math.h'),
+     'HAVE_SQRT': ('sqrt', 'math.h'),
+     'HAVE_TGAMMA': ('tgamma', 'math.h'),
+     'HAVE_FSEEKO': ('fseeko', 'stdio.h'),
+     'HAVE_FSEEKO64': ('fseeko64', 'stdio.h'),
+     'HAVE_SETJMP': ('setjmp', 'setjmp.h'),
+     'HAVE_PTHREAD_SETNAME_NP': ('pthread_setname_np', 'pthread.h'),
+     'HAVE_PTHREAD_SET_NAME_NP': ('pthread_set_name_np', 'pthread.h'),
+     }
+
+headers = []
+functions = []
+sizes = []
+
+if len(sys.argv) != 2:
+    print(help_message.format(sys.argv[0]))
+    sys.exit(0)
+
+with open(sys.argv[1]) as f:
+    for line in f:
+        line = line.strip()
+        arr = line.split()
+
+        # Check for headers.
+        if line.startswith('#mesondefine') and line.endswith('_H'):
+            token = line.split()[1]
+            tarr = token.split('_')[1:-1]
+            tarr = [x.lower() for x in tarr]
+            hname = '/'.join(tarr) + '.h'
+            headers.append((token, hname))
+
+        # Check for functions.
+        try:
+            token = arr[1]
+            if token in function_data:
+                fdata = function_data[token]
+                functions.append([token, fdata[0], fdata[1]])
+            elif token.startswith('HAVE_') and not token.endswith('_H'):
+                functions.append([token])
+        except Exception:
+            pass
+
+        # Check for sizeof tests.
+        if len(arr) != 2:
+            continue
+        elem = arr[1]
+        if elem.startswith('SIZEOF_'):
+            typename = elem.split('_', 1)[1] \
+                .replace('_P', '*') \
+                .replace('_', ' ') \
+                .lower() \
+                .replace('size t', 'size_t')
+            sizes.append((elem, typename))
+
+print('''cc = meson.get_compiler('c')
+cdata = configuration_data()''')
+
+# Convert header checks.
+
+print('check_headers = [')
+for token, hname in headers:
+    print("  ['{}', '{}'],".format(token, hname))
+print(']\n')
+
+print('''foreach h : check_headers
+  if cc.has_header(h.get(1))
+    cdata.set(h.get(0), 1)
+  endif
+endforeach
+''')
+
+# Convert function checks.
+
+print('check_functions = [')
+for tok in functions:
+    if len(tok) == 3:
+        tokstr, fdata0, fdata1 = tok
+        print("  ['{}', '{}', '#include<{}>'],".format(tokstr, fdata0, fdata1))
+    else:
+        print('# check token', tok)
+print(']\n')
+
+print('''foreach f : check_functions
+  if cc.has_function(f.get(1), prefix : f.get(2))
+    cdata.set(f.get(0), 1)
+  endif
+endforeach
+''')
+
+# Convert sizeof checks.
+
+for elem, typename in sizes:
+    print("cdata.set('{}', cc.sizeof('{}'))".format(elem, typename))
+
+print('''
+configure_file(input : 'config.h.meson',
+  output : 'config.h',
+  configuration : cdata)''')
diff --git a/meson/tools/boost_names.py b/meson/tools/boost_names.py
new file mode 100755
index 000000000..b66c6cc1e
--- /dev/null
+++ b/meson/tools/boost_names.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+
+# Copyright 2017 Niklas Claesson
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This is two implementations for how to get module names from the boost
+sources.  One relies on json metadata files in the sources, the other relies on
+the folder names.
+
+Run the tool in the boost directory and append the stdout to the misc.py:
+
+boost/$ path/to/meson/tools/boost_names.py >> path/to/meson/dependencies/misc.py
+"""
+
+import sys
+import json
+import re
+import textwrap
+import functools
+import typing as T
+from pathlib import Path
+
+lib_dir = Path('libs')
+jamroot = Path('Jamroot')
+
+not_modules = ['config', 'disjoint_sets', 'headers']
+
+export_modules = False
+
+
+@functools.total_ordering
+class BoostLibrary():
+    def __init__(self, name: str, shared: T.List[str], static: T.List[str], single: T.List[str], multi: T.List[str]):
+        self.name = name
+        self.shared = sorted(set(shared))
+        self.static = sorted(set(static))
+        self.single = sorted(set(single))
+        self.multi = sorted(set(multi))
+
+    def __lt__(self, other: T.Any) -> T.Union[bool, 'NotImplemented']:
+        if isinstance(other, BoostLibrary):
+            return self.name < other.name
+        return NotImplemented
+
+    def __eq__(self, other: T.Any) -> T.Union[bool, 'NotImplemented']:
+        if isinstance(other, BoostLibrary):
+            return self.name == other.name
+        elif isinstance(other, str):
+            return self.name == other
+        return NotImplemented
+
+    def __hash__(self) -> int:
+        return hash(self.name)
+
+@functools.total_ordering
+class BoostModule():
+    def __init__(self, name: str, key: str, desc: str, libs: T.List[BoostLibrary]):
+        self.name = name
+        self.key = key
+        self.desc = desc
+        self.libs = libs
+
+    def __lt__(self, other: T.Any) -> T.Union[bool, 'NotImplemented']:
+        if isinstance(other, BoostModule):
+            return self.key < other.key
+        return NotImplemented
+
+
+def get_boost_version() -> T.Optional[str]:
+    raw = jamroot.read_text()
+    m = re.search(r'BOOST_VERSION\s*:\s*([0-9\.]+)\s*;', raw)
+    if m:
+        return m.group(1)
+    return None
+
+
+def get_libraries(jamfile: Path) -> T.List[BoostLibrary]:
+    # Extract libraries from the boost Jamfiles. This includes:
+    #  - library name
+    #  - compiler flags
+
+    libs: T.List[BoostLibrary] = []
+    raw = jamfile.read_text()
+    raw = re.sub(r'#.*\n', '\n', raw)  # Remove comments
+    raw = re.sub(r'\s+', ' ', raw)     # Force single space
+    raw = re.sub(r'}', ';', raw)       # Cheat code blocks by converting } to ;
+
+    cmds = raw.split(';')              # Commands always terminate with a ; (I hope)
+    cmds = [x.strip() for x in cmds]   # Some cleanup
+
+    project_usage_requirements: T.List[str] = []
+
+    # "Parse" the relevant sections
+    for i in cmds:
+        parts = i.split(' ')
+        parts = [x for x in parts if x not in ['']]
+        if not parts:
+            continue
+
+        # Parse project
+        if parts[0] in ['project']:
+            attributes: T.Dict[str, T.List[str]] = {}
+            curr: T.Optional[str] = None
+
+            for j in parts:
+                if j == ':':
+                    curr = None
+                elif curr is None:
+                    curr = j
+                else:
+                    if curr not in attributes:
+                        attributes[curr] = []
+                    attributes[curr] += [j]
+
+            if 'usage-requirements' in attributes:
+                project_usage_requirements = attributes['usage-requirements']
+
+        # Parse libraries
+        elif parts[0] in ['lib', 'boost-lib']:
+            assert len(parts) >= 2
+
+            # Get and check the library name
+            lname = parts[1]
+            if parts[0] == 'boost-lib':
+                lname = f'boost_{lname}'
+            if not lname.startswith('boost_'):
+                continue
+
+            # Count `:` to only select the 'usage-requirements'
+            # See https://boostorg.github.io/build/manual/master/index.html#bbv2.main-target-rule-syntax
+            colon_counter = 0
+            usage_requirements: T.List[str] = []
+            for j in parts:
+                if j == ':':
+                    colon_counter += 1
+                elif colon_counter >= 4:
+                    usage_requirements += [j]
+
+            # Get shared / static defines
+            shared: T.List[str] = []
+            static: T.List[str] = []
+            single: T.List[str] = []
+            multi: T.List[str] = []
+            for j in usage_requirements + project_usage_requirements:
+                m1 = re.match(r'shared:(.*)', j)
+                m2 = re.match(r'static:(.*)', j)
+                m3 = re.match(r'single:(.*)', j)
+                m4 = re.match(r'multi:(.*)', j)
+
+                if m1:
+                    shared += [f'-D{m1.group(1)}']
+                if m2:
+                    static += [f'-D{m2.group(1)}']
+                if m3:
+                    single +=[f'-D{m3.group(1)}']
+                if m4:
+                    multi += [f'-D{m4.group(1)}']
+
+            libs += [BoostLibrary(lname, shared, static, single, multi)]
+
+    return libs
+
+
+def process_lib_dir(ldir: Path) -> T.List[BoostModule]:
+    meta_file = ldir / 'meta' / 'libraries.json'
+    bjam_file = ldir / 'build' / 'Jamfile.v2'
+    if not meta_file.exists():
+        print(f'WARNING: Meta file {meta_file} does not exist')
+        return []
+
+    # Extract libs
+    libs: T.List[BoostLibrary] = []
+    if bjam_file.exists():
+        libs = get_libraries(bjam_file)
+
+    # Extract metadata
+    data = json.loads(meta_file.read_text())
+    if not isinstance(data, list):
+        data = [data]
+
+    modules: T.List[BoostModule] = []
+    for i in data:
+        modules += [BoostModule(i['name'], i['key'], i['description'], libs)]
+
+    return modules
+
+
+def get_modules() -> T.List[BoostModule]:
+    modules: T.List[BoostModule] = []
+    for i in lib_dir.iterdir():
+        if not i.is_dir() or i.name in not_modules:
+            continue
+
+        # numeric has sub libs
+        subdirs = i / 'sublibs'
+        metadir = i / 'meta'
+        if subdirs.exists() and not metadir.exists():
+            for j in i.iterdir():
+                if not j.is_dir():
+                    continue
+                modules += process_lib_dir(j)
+        else:
+            modules += process_lib_dir(i)
+
+    return modules
+
+
+def main() -> int:
+    if not lib_dir.is_dir() or not jamroot.exists():
+        print("ERROR: script must be run in boost source directory")
+        return 1
+
+    vers = get_boost_version()
+    modules = get_modules()
+    modules = sorted(modules)
+    libraries = [x for y in modules for x in y.libs]
+    libraries = sorted(set(libraries))
+
+    print(textwrap.dedent(f'''\
+        ####      ---- BEGIN GENERATED ----      ####
+        #                                           #
+        # Generated with tools/boost_names.py:
+        #  - boost version:   {vers}
+        #  - modules found:   {len(modules)}
+        #  - libraries found: {len(libraries)}
+        #
+
+        class BoostLibrary():
+            def __init__(self, name: str, shared: T.List[str], static: T.List[str], single: T.List[str], multi: T.List[str]):
+                self.name = name
+                self.shared = shared
+                self.static = static
+                self.single = single
+                self.multi = multi
+
+        class BoostModule():
+            def __init__(self, name: str, key: str, desc: str, libs: T.List[str]):
+                self.name = name
+                self.key = key
+                self.desc = desc
+                self.libs = libs
+
+
+        # dict of all know libraries with additional compile options
+        boost_libraries = {{\
+    '''))
+
+    for i in libraries:
+        print(textwrap.indent(textwrap.dedent(f"""\
+            '{i.name}': BoostLibrary(
+                name='{i.name}',
+                shared={i.shared},
+                static={i.static},
+                single={i.single},
+                multi={i.multi},
+            ),\
+        """), '    '))
+
+    if export_modules:
+        print(textwrap.dedent(f'''\
+            }}
+
+
+            # dict of all modules with metadata
+            boost_modules = {{\
+        '''))
+
+        for mod in modules:
+            desc_excaped = re.sub(r"'", "\\'", mod.desc)
+            print(textwrap.indent(textwrap.dedent(f"""\
+                '{mod.key}': BoostModule(
+                    name='{mod.name}',
+                    key='{mod.key}',
+                    desc='{desc_excaped}',
+                    libs={[x.name for x in mod.libs]},
+                ),\
+            """), '    '))
+
+    print(textwrap.dedent(f'''\
+        }}
+
+        #                                           #
+        ####       ---- END GENERATED ----       ####\
+    '''))
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/meson/tools/build_website.py b/meson/tools/build_website.py
new file mode 100755
index 000000000..5486b6937
--- /dev/null
+++ b/meson/tools/build_website.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import os, sys, subprocess, shutil
+
+assert(os.getcwd() == '/home/jpakkane')
+
+from glob import glob
+
+def purge(fname):
+    if not os.path.exists(fname):
+        return
+    if os.path.isdir(fname):
+        shutil.rmtree(fname)
+    os.unlink(fname)
+
+def update():
+    webdir = 'mesonweb'
+    repodir = 'mesonwebbuild'
+    docdir = os.path.join(repodir, 'docs')
+    builddir = os.path.join(docdir, 'builddir')
+    htmldir = os.path.join(builddir, 'Meson documentation-doc/html')
+#    subprocess.check_call(['git', 'pull'], cwd=webdir)
+    subprocess.check_call(['git', 'fetch', '-a'], cwd=repodir)
+    subprocess.check_call(['git', 'reset', '--hard', 'origin/master'],
+                          cwd=repodir)
+    if os.path.isdir(htmldir):
+        shutil.rmtree(htmldir)
+    if os.path.isdir(builddir):
+        shutil.rmtree(builddir)
+    env = os.environ.copy()
+    env['PATH'] = env['PATH'] + ':/home/jpakkane/.local/bin'
+    subprocess.check_call(['../meson.py', '.', 'builddir'], cwd=docdir, env=env)
+    subprocess.check_call(['ninja'], cwd=builddir)
+    old_files = glob(os.path.join(webdir, '*'))
+    for f in old_files:
+        base = f[len(webdir)+1:]
+        if base == 'CNAME' or base == 'favicon.png':
+            continue
+        subprocess.check_call(['git', 'rm', '-rf', base], cwd=webdir)
+    assert(os.path.isdir(webdir))
+    new_entries = glob(os.path.join(htmldir, '*'))
+    for e in new_entries:
+        shutil.move(e, webdir)
+    subprocess.check_call('git add *', shell=True, cwd=webdir)
+    subprocess.check_call(['git', 'commit', '-a', '-m', 'Bleep. Bloop. I am a bot.'],
+                          cwd=webdir)
+    subprocess.check_call(['git', 'push'], cwd=webdir)
+    shutil.rmtree(builddir)
+
+if __name__ == '__main__':
+    update()
diff --git a/meson/tools/cmake2meson.py b/meson/tools/cmake2meson.py
new file mode 100755
index 000000000..05acd8f68
--- /dev/null
+++ b/meson/tools/cmake2meson.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+
+# Copyright 2014 Jussi Pakkanen
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing as T
+from pathlib import Path
+import sys
+import re
+import argparse
+
+
+class Token:
+    def __init__(self, tid: str, value: str):
+        self.tid = tid
+        self.value = value
+        self.lineno = 0
+        self.colno = 0
+
+class Statement:
+    def __init__(self, name: str, args: list):
+        self.name = name.lower()
+        self.args = args
+
+class Lexer:
+    def __init__(self):
+        self.token_specification = [
+            # Need to be sorted longest to shortest.
+            ('ignore', re.compile(r'[ \t]')),
+            ('string', re.compile(r'"([^\\]|(\\.))*?"', re.M)),
+            ('varexp', re.compile(r'\${[-_0-9a-z/A-Z.]+}')),
+            ('id', re.compile('''[,-><${}=+_0-9a-z/A-Z|@.*]+''')),
+            ('eol', re.compile(r'\n')),
+            ('comment', re.compile(r'#.*')),
+            ('lparen', re.compile(r'\(')),
+            ('rparen', re.compile(r'\)')),
+        ]
+
+    def lex(self, code: str) -> T.Iterator[Token]:
+        lineno = 1
+        line_start = 0
+        loc = 0
+        col = 0
+        while loc < len(code):
+            matched = False
+            for (tid, reg) in self.token_specification:
+                mo = reg.match(code, loc)
+                if mo:
+                    col = mo.start() - line_start
+                    matched = True
+                    loc = mo.end()
+                    match_text = mo.group()
+                    if tid == 'ignore':
+                        continue
+                    if tid == 'comment':
+                        yield(Token('comment', match_text))
+                    elif tid == 'lparen':
+                        yield(Token('lparen', '('))
+                    elif tid == 'rparen':
+                        yield(Token('rparen', ')'))
+                    elif tid == 'string':
+                        yield(Token('string', match_text[1:-1]))
+                    elif tid == 'id':
+                        yield(Token('id', match_text))
+                    elif tid == 'eol':
+                        # yield('eol')
+                        lineno += 1
+                        col = 1
+                        line_start = mo.end()
+                    elif tid == 'varexp':
+                        yield(Token('varexp', match_text[2:-1]))
+                    else:
+                        raise ValueError('lex: unknown element {}'.format(tid))
+                    break
+            if not matched:
+                raise ValueError('Lexer got confused line %d column %d' % (lineno, col))
+
+class Parser:
+    def __init__(self, code: str):
+        self.stream = Lexer().lex(code)
+        self.getsym()
+
+    def getsym(self):
+        try:
+            self.current = next(self.stream)
+        except StopIteration:
+            self.current = Token('eof', '')
+
+    def accept(self, s: str) -> bool:
+        if self.current.tid == s:
+            self.getsym()
+            return True
+        return False
+
+    def expect(self, s: str) -> bool:
+        if self.accept(s):
+            return True
+        raise ValueError('Expecting %s got %s.' % (s, self.current.tid), self.current.lineno, self.current.colno)
+
+    def statement(self) -> Statement:
+        cur = self.current
+        if self.accept('comment'):
+            return Statement('_', [cur.value])
+        self.accept('id')
+        self.expect('lparen')
+        args = self.arguments()
+        self.expect('rparen')
+        return Statement(cur.value, args)
+
+    def arguments(self) -> list:
+        args = []
+        if self.accept('lparen'):
+            args.append(self.arguments())
+            self.expect('rparen')
+        arg = self.current
+        if self.accept('comment'):
+            rest = self.arguments()
+            args += rest
+        elif self.accept('string') \
+                or self.accept('varexp') \
+                or self.accept('id'):
+            args.append(arg)
+            rest = self.arguments()
+            args += rest
+        return args
+
+    def parse(self) -> T.Iterator[Statement]:
+        while not self.accept('eof'):
+            yield(self.statement())
+
+def token_or_group(arg):
+    if isinstance(arg, Token):
+        return ' ' + arg.value
+    elif isinstance(arg, list):
+        line = ' ('
+        for a in arg:
+            line += ' ' + token_or_group(a)
+        line += ' )'
+        return line
+
+class Converter:
+    ignored_funcs = {'cmake_minimum_required': True,
+                     'enable_testing': True,
+                     'include': True}
+
+    def __init__(self, cmake_root: str):
+        self.cmake_root = Path(cmake_root).expanduser()
+        self.indent_unit = '  '
+        self.indent_level = 0
+        self.options = []  # type: T.List[tuple]
+
+    def convert_args(self, args: T.List[Token], as_array: bool = True) -> str:
+        res = []
+        if as_array:
+            start = '['
+            end = ']'
+        else:
+            start = ''
+            end = ''
+        for i in args:
+            if i.tid == 'id':
+                res.append("'%s'" % i.value)
+            elif i.tid == 'varexp':
+                res.append('%s' % i.value.lower())
+            elif i.tid == 'string':
+                res.append("'%s'" % i.value)
+            else:
+                raise ValueError('Unknown arg type {}'.format(i.tid))
+        if len(res) > 1:
+            return start + ', '.join(res) + end
+        if len(res) == 1:
+            return res[0]
+        return ''
+
+    def write_entry(self, outfile: T.TextIO, t: Statement):
+        if t.name in Converter.ignored_funcs:
+            return
+        preincrement = 0
+        postincrement = 0
+        if t.name == '_':
+            line = t.args[0]
+        elif t.name == 'add_subdirectory':
+            line = "subdir('" + t.args[0].value + "')"
+        elif t.name == 'pkg_search_module' or t.name == 'pkg_search_modules':
+            varname = t.args[0].value.lower()
+            mods = ["dependency('%s')" % i.value for i in t.args[1:]]
+            if len(mods) == 1:
+                line = '%s = %s' % (varname, mods[0])
+            else:
+                line = '%s = [%s]' % (varname, ', '.join(["'%s'" % i for i in mods]))
+        elif t.name == 'find_package':
+            line = "%s_dep = dependency('%s')" % (t.args[0].value, t.args[0].value)
+        elif t.name == 'find_library':
+            line = "%s = find_library('%s')" % (t.args[0].value.lower(), t.args[0].value)
+        elif t.name == 'add_executable':
+            line = '%s_exe = executable(%s)' % (t.args[0].value, self.convert_args(t.args, False))
+        elif t.name == 'add_library':
+            if t.args[1].value == 'SHARED':
+                libcmd = 'shared_library'
+                args = [t.args[0]] + t.args[2:]
+            elif t.args[1].value == 'STATIC':
+                libcmd = 'static_library'
+                args = [t.args[0]] + t.args[2:]
+            else:
+                libcmd = 'library'
+                args = t.args
+            line = '%s_lib = %s(%s)' % (t.args[0].value, libcmd, self.convert_args(args, False))
+        elif t.name == 'add_test':
+            line = 'test(%s)' % self.convert_args(t.args, False)
+        elif t.name == 'option':
+            optname = t.args[0].value
+            description = t.args[1].value
+            if len(t.args) > 2:
+                default = t.args[2].value
+            else:
+                default = None
+            self.options.append((optname, description, default))
+            return
+        elif t.name == 'project':
+            pname = t.args[0].value
+            args = [pname]
+            for l in t.args[1:]:
+                l = l.value.lower()
+                if l == 'cxx':
+                    l = 'cpp'
+                args.append(l)
+            args = ["'%s'" % i for i in args]
+            line = 'project(' + ', '.join(args) + ", default_options : ['default_library=static'])"
+        elif t.name == 'set':
+            varname = t.args[0].value.lower()
+            line = '%s = %s\n' % (varname, self.convert_args(t.args[1:]))
+        elif t.name == 'if':
+            postincrement = 1
+            try:
+                line = 'if %s' % self.convert_args(t.args, False)
+            except AttributeError:  # complex if statements
+                line = t.name
+                for arg in t.args:
+                    line += token_or_group(arg)
+        elif t.name == 'elseif':
+            preincrement = -1
+            postincrement = 1
+            try:
+                line = 'elif %s' % self.convert_args(t.args, False)
+            except AttributeError:  # complex if statements
+                line = t.name
+                for arg in t.args:
+                    line += token_or_group(arg)
+        elif t.name == 'else':
+            preincrement = -1
+            postincrement = 1
+            line = 'else'
+        elif t.name == 'endif':
+            preincrement = -1
+            line = 'endif'
+        else:
+            line = '''# %s(%s)''' % (t.name, self.convert_args(t.args))
+        self.indent_level += preincrement
+        indent = self.indent_level * self.indent_unit
+        outfile.write(indent)
+        outfile.write(line)
+        if not(line.endswith('\n')):
+            outfile.write('\n')
+        self.indent_level += postincrement
+
+    def convert(self, subdir: Path = None):
+        if not subdir:
+            subdir = self.cmake_root
+        cfile = Path(subdir).expanduser() / 'CMakeLists.txt'
+        try:
+            with cfile.open() as f:
+                cmakecode = f.read()
+        except FileNotFoundError:
+            print('\nWarning: No CMakeLists.txt in', subdir, '\n', file=sys.stderr)
+            return
+        p = Parser(cmakecode)
+        with (subdir / 'meson.build').open('w') as outfile:
+            for t in p.parse():
+                if t.name == 'add_subdirectory':
+                    # print('\nRecursing to subdir',
+                    #       self.cmake_root / t.args[0].value,
+                    #       '\n')
+                    self.convert(subdir / t.args[0].value)
+                    # print('\nReturning to', self.cmake_root, '\n')
+                self.write_entry(outfile, t)
+        if subdir == self.cmake_root and len(self.options) > 0:
+            self.write_options()
+
+    def write_options(self):
+        filename = self.cmake_root / 'meson_options.txt'
+        with filename.open('w') as optfile:
+            for o in self.options:
+                (optname, description, default) = o
+                if default is None:
+                    typestr = ''
+                    defaultstr = ''
+                else:
+                    if default == 'OFF':
+                        typestr = ' type : \'boolean\','
+                        default = 'false'
+                    elif default == 'ON':
+                        default = 'true'
+                        typestr = ' type : \'boolean\','
+                    else:
+                        typestr = ' type : \'string\','
+                    defaultstr = ' value : %s,' % default
+                line = "option(%r,%s%s description : '%s')\n" % (optname,
+                                                                 typestr,
+                                                                 defaultstr,
+                                                                 description)
+                optfile.write(line)
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser(description='Convert CMakeLists.txt to meson.build and meson_options.txt')
+    p.add_argument('cmake_root', help='CMake project root (where top-level CMakeLists.txt is)')
+    P = p.parse_args()
+
+    Converter(P.cmake_root).convert()
diff --git a/meson/tools/copy_files.py b/meson/tools/copy_files.py
new file mode 100644
index 000000000..39eaa0a66
--- /dev/null
+++ b/meson/tools/copy_files.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''
+Copy files
+'''
+
+import argparse
+import shutil
+import typing as T
+from pathlib import Path
+
+PathLike = T.Union[Path,str]
+
+def copy_files(files: T.List[str], input_dir: PathLike, output_dir: PathLike) -> None:
+    if not input_dir:
+        raise ValueError(f'Input directory value is not set')
+    if not output_dir:
+        raise ValueError(f'Output directory value is not set')
+    
+    input_dir = Path(input_dir).resolve()
+    output_dir = Path(output_dir).resolve()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    for f in files:
+        if (input_dir/f).is_dir():
+            shutil.copytree(input_dir/f, output_dir/f)
+        else:
+            shutil.copy2(input_dir/f, output_dir/f)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Copy files')
+    parser.add_argument('files', metavar='FILE', nargs='*')
+    parser.add_argument('-C', dest='input_dir', required=True)
+    parser.add_argument('--output-dir', required=True)
+    
+    args = parser.parse_args()
+
+    copy_files(files=args.files,
+               input_dir=args.input_dir,
+               output_dir=args.output_dir)
diff --git a/meson/tools/dircondenser.py b/meson/tools/dircondenser.py
new file mode 100755
index 000000000..0e28becca
--- /dev/null
+++ b/meson/tools/dircondenser.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''Renames test case directories using Git from this:
+
+1 something
+3 other
+3 foo
+3 bar
+
+to this:
+
+1 something
+2 other
+3 foo
+4 bar
+
+This directory must be run from source root as it touches run_unittests.py.
+'''
+
+import typing as T
+import os
+import sys
+import subprocess
+
+from glob import glob
+
+def get_entries() -> T.List[T.Tuple[int, str]]:
+    entries = []
+    for e in glob('*'):
+        if not os.path.isdir(e):
+            raise SystemExit('Current directory must not contain any files.')
+        (number, rest) = e.split(' ', 1)
+        try:
+            numstr = int(number)
+        except ValueError:
+            raise SystemExit('Dir name {} does not start with a number.'.format(e))
+        entries.append((numstr, rest))
+    entries.sort()
+    return entries
+
+def replace_source(sourcefile: str, replacements: T.List[T.Tuple[str, str]]):
+    with open(sourcefile, 'r') as f:
+        contents = f.read()
+    for old_name, new_name in replacements:
+        contents = contents.replace(old_name, new_name)
+    with open(sourcefile, 'w') as f:
+        f.write(contents)
+
+def condense(dirname: str):
+    curdir = os.getcwd()
+    os.chdir(dirname)
+    entries = get_entries()
+    replacements = []
+    for _i, e in enumerate(entries):
+        i = _i + 1
+        if e[0] != i:
+            old_name = str(e[0]) + ' ' + e[1]
+            new_name = str(i) + ' ' + e[1]
+            #print('git mv "%s" "%s"' % (old_name, new_name))
+            subprocess.check_call(['git', 'mv', old_name, new_name])
+            replacements.append((old_name, new_name))
+            # update any appearances of old_name in expected stdout in test.json
+            json = os.path.join(new_name, 'test.json')
+            if os.path.isfile(json):
+                replace_source(json, [(old_name, new_name)])
+    os.chdir(curdir)
+    replace_source('run_unittests.py', replacements)
+    replace_source('run_project_tests.py', replacements)
+
+if __name__ == '__main__':
+    if len(sys.argv) != 1:
+        raise SystemExit('This script takes no arguments.')
+    for d in glob('test cases/*'):
+        condense(d)
diff --git a/meson/tools/gen_data.py b/meson/tools/gen_data.py
new file mode 100755
index 000000000..2cc05a44e
--- /dev/null
+++ b/meson/tools/gen_data.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Daniel Mensinger
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import hashlib
+import textwrap
+import re
+from pathlib import Path
+from datetime import datetime
+import typing as T
+
+class DataFile:
+    file_counter = 0
+
+    def __init__(self, path: Path, root: Path):
+        self.path = path
+        self.id = self.path.relative_to(root)
+        self.data_str = f'file_{DataFile.file_counter}_data_' + re.sub('[^a-zA-Z0-9]', '_', self.path.name)
+        DataFile.file_counter += 1
+
+        b = self.path.read_bytes()
+        self.data = b.decode()
+        self.sha256sum = hashlib.sha256(b).hexdigest()
+
+    def __repr__(self) -> str:
+        return f'<{type(self).__name__}: [{self.sha256sum}] {self.id}>'
+
+def main() -> int:
+    root_dir = Path(__file__).resolve().parents[1]
+    mesonbuild_dir = root_dir / 'mesonbuild'
+    out_file = mesonbuild_dir / 'mesondata.py'
+
+    data_dirs = mesonbuild_dir.glob('**/data')
+
+    data_files: T.List[DataFile] = []
+
+    for d in data_dirs:
+        for p in d.iterdir():
+            data_files += [DataFile(p, mesonbuild_dir)]
+
+    print(f'Found {len(data_files)} data files')
+
+    # Generate the data script
+    data = ''
+
+    data += textwrap.dedent(f'''\
+        # Copyright {datetime.today().year} The Meson development team
+
+        # Licensed under the Apache License, Version 2.0 (the "License");
+        # you may not use this file except in compliance with the License.
+        # You may obtain a copy of the License at
+
+        #     http://www.apache.org/licenses/LICENSE-2.0
+
+        # Unless required by applicable law or agreed to in writing, software
+        # distributed under the License is distributed on an "AS IS" BASIS,
+        # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+        # See the License for the specific language governing permissions and
+        # limitations under the License.
+
+
+        ####
+        ####  WARNING: This is an automatically generated file! Do not edit!
+        ####           Generated by {Path(__file__).resolve().relative_to(root_dir)}
+        ####
+
+
+        from pathlib import Path
+        import typing as T
+
+        if T.TYPE_CHECKING:
+            from .environment import Environment
+
+        ######################
+        # BEGIN Data section #
+        ######################
+
+    ''')
+
+    for i in data_files:
+        data += f"{i.data_str} = '''\\\n{i.data}'''\n\n"
+
+    data += textwrap.dedent(f'''
+        ####################
+        # END Data section #
+        ####################
+
+        class DataFile:
+            def __init__(self, path: Path, sha256sum: str, data: str) -> None:
+                self.path = path
+                self.sha256sum = sha256sum
+                self.data = data
+
+            def write_once(self, path: Path) -> None:
+                if not path.exists():
+                    path.write_text(self.data)
+
+            def write_to_private(self, env: 'Environment') -> Path:
+                out_file = Path(env.scratch_dir) / 'data' / self.path.name
+                out_file.parent.mkdir(exist_ok=True)
+                self.write_once(out_file)
+                return out_file
+
+
+        mesondata = {{
+    ''')
+
+    for i in data_files:
+        data += textwrap.indent(textwrap.dedent(f"""\
+            '{i.id}': DataFile(
+                Path('{i.id}'),
+                '{i.sha256sum}',
+                {i.data_str},
+            ),
+        """), '    ')
+
+    data += textwrap.dedent('''\
+        }
+    ''')
+
+    print(f'Updating {out_file}')
+    out_file.write_text(data)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/meson/tools/regenerate_docs.py b/meson/tools/regenerate_docs.py
new file mode 100755
index 000000000..d44357036
--- /dev/null
+++ b/meson/tools/regenerate_docs.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+
+# Copyright 2018 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''
+Regenerate markdown docs by using `meson.py` from the root dir
+'''
+
+import argparse
+import jinja2
+import os
+import re
+import subprocess
+import sys
+import textwrap
+import typing as T
+from pathlib import Path
+
+PathLike = T.Union[Path,str]
+
+def _get_meson_output(root_dir: Path, args: T.List):
+    env = os.environ.copy()
+    env['COLUMNS'] = '80'
+    return subprocess.run([str(sys.executable), str(root_dir/'meson.py')] + args, check=True, capture_output=True, text=True, env=env).stdout.strip()
+
+def get_commands_data(root_dir: Path):
+    usage_start_pattern = re.compile(r'^usage: ', re.MULTILINE)
+    positional_start_pattern = re.compile(r'^positional arguments:[\t ]*[\r\n]+', re.MULTILINE)
+    options_start_pattern = re.compile(r'^optional arguments:[\t ]*[\r\n]+', re.MULTILINE)
+    commands_start_pattern = re.compile(r'^[A-Za-z ]*[Cc]ommands:[\t ]*[\r\n]+', re.MULTILINE)
+
+    def get_next_start(iterators, end):
+        return next((i.start() for i in iterators if i), end)
+
+    def normalize_text(text):
+        # clean up formatting
+        out = text
+        out = re.sub(r'\r\n', r'\r', out, flags=re.MULTILINE) # replace newlines with a linux EOL
+        out = re.sub(r'^ +$', '', out, flags=re.MULTILINE) # remove trailing whitespace
+        out = re.sub(r'(?:^\n+|\n+$)', '', out) # remove trailing empty lines
+        return out
+
+    def parse_cmd(cmd):
+        cmd_len = len(cmd)
+        usage = usage_start_pattern.search(cmd)
+        positionals = positional_start_pattern.search(cmd)
+        options = options_start_pattern.search(cmd)
+        commands = commands_start_pattern.search(cmd)
+
+        arguments_start = get_next_start([positionals, options, commands], None)
+        assert arguments_start
+
+        # replace `usage:` with `$` and dedent
+        dedent_size = (usage.end() - usage.start()) - len('$ ')
+        usage_text = textwrap.dedent(f'{dedent_size * " "}$ {normalize_text(cmd[usage.end():arguments_start])}')
+
+        return {
+            'usage': usage_text,
+            'arguments': normalize_text(cmd[arguments_start:cmd_len]),
+        }
+
+    def clean_dir_arguments(text):
+        # Remove platform specific defaults
+        args = [
+            'prefix',
+            'bindir',
+            'datadir',
+            'includedir',
+            'infodir',
+            'libdir',
+            'libexecdir',
+            'localedir',
+            'localstatedir',
+            'mandir',
+            'sbindir',
+            'sharedstatedir',
+            'sysconfdir'
+        ]
+        out = text
+        for a in args:
+            out = re.sub(r'(--' + a + r' .+?)\s+\(default:.+?\)(\.)?', r'\1\2', out, flags=re.MULTILINE|re.DOTALL)
+        return out
+
+    output = _get_meson_output(root_dir, ['--help'])
+    commands = set(c.strip() for c in re.findall(r'usage:(?:.+)?{((?:[a-z]+,*)+?)}', output, re.MULTILINE|re.DOTALL)[0].split(','))
+    commands.remove('help')
+
+    cmd_data = dict()
+
+    for cmd in commands:
+        cmd_output = _get_meson_output(root_dir, [cmd, '--help'])
+        cmd_data[cmd] = parse_cmd(cmd_output)
+        if cmd in ['setup', 'configure']:
+            cmd_data[cmd]['arguments'] = clean_dir_arguments(cmd_data[cmd]['arguments'])
+
+    return cmd_data
+
+def regenerate_commands(root_dir: Path, output_dir: Path) -> None:
+    with open(root_dir/'docs'/'markdown_dynamic'/'Commands.md') as f:
+        template = f.read()
+
+    cmd_data = get_commands_data(root_dir)
+
+    t = jinja2.Template(template, undefined=jinja2.StrictUndefined, keep_trailing_newline=True)
+    content = t.render(cmd_help=cmd_data)
+
+    output_file = output_dir/'Commands.md'
+    with open(output_file, 'w') as f:
+        f.write(content)
+
+    print(f'`{output_file}` was regenerated')
+
+def regenerate_docs(output_dir: PathLike,
+                    dummy_output_file: T.Optional[PathLike]) -> None:
+    if not output_dir:
+        raise ValueError(f'Output directory value is not set')
+        
+    output_dir = Path(output_dir).resolve()
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    root_dir = Path(__file__).resolve().parent.parent
+
+    regenerate_commands(root_dir, output_dir)
+
+    if dummy_output_file:
+        with open(output_dir/dummy_output_file, 'w') as f:
+            f.write('dummy file for custom_target output')
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate meson docs')
+    parser.add_argument('--output-dir', required=True)
+    parser.add_argument('--dummy-output-file', type=str)
+    
+    args = parser.parse_args()
+
+    regenerate_docs(output_dir=args.output_dir,
+                    dummy_output_file=args.dummy_output_file)
diff --git a/meson_options.txt b/meson_options.txt
new file mode 100644
index 000000000..f6f64785f
--- /dev/null
+++ b/meson_options.txt
@@ -0,0 +1,78 @@
+option('qemu_suffix', type : 'string', value: 'qemu',
+       description: 'Suffix for QEMU data/modules/config directories (can be empty)')
+option('docdir', type : 'string', value : 'doc',
+       description: 'Base directory for documentation installation (can be empty)')
+option('qemu_firmwarepath', type : 'string', value : '',
+       description: 'search PATH for firmware files')
+option('sphinx_build', type : 'string', value : '',
+       description: 'Use specified sphinx-build [$sphinx_build] for building document (default to be empty)')
+
+option('docs', type : 'feature', value : 'auto',
+       description: 'Documentations build support')
+option('gettext', type : 'boolean', value : true,
+       description: 'Localization of the GTK+ user interface')
+option('install_blobs', type : 'boolean', value : true,
+       description: 'install provided firmware blobs')
+option('sparse', type : 'feature', value : 'auto',
+       description: 'sparse checker')
+
+option('malloc_trim', type : 'feature', value : 'auto',
+       description: 'enable libc malloc_trim() for memory optimization')
+option('malloc', type : 'combo', choices : ['system', 'tcmalloc', 'jemalloc'],
+       value: 'system', description: 'choose memory allocator to use')
+
+option('kvm', type: 'feature', value: 'auto',
+       description: 'KVM acceleration support')
+option('hax', type: 'feature', value: 'auto',
+       description: 'HAX acceleration support')
+option('whpx', type: 'feature', value: 'auto',
+       description: 'WHPX acceleration support')
+option('hvf', type: 'feature', value: 'auto',
+       description: 'HVF acceleration support')
+option('xen', type: 'feature', value: 'auto',
+       description: 'Xen backend support')
+option('xen_pci_passthrough', type: 'feature', value: 'auto',
+       description: 'Xen PCI passthrough support')
+option('tcg', type: 'feature', value: 'auto',
+       description: 'TCG support')
+
+option('cocoa', type : 'feature', value : 'auto',
+       description: 'Cocoa user interface (macOS only)')
+option('mpath', type : 'feature', value : 'auto',
+       description: 'Multipath persistent reservation passthrough')
+option('iconv', type : 'feature', value : 'auto',
+       description: 'Font glyph conversion support')
+option('curses', type : 'feature', value : 'auto',
+       description: 'curses UI')
+option('libudev', type : 'feature', value : 'auto',
+       description: 'Use libudev to enumerate host devices')
+option('sdl', type : 'feature', value : 'auto',
+       description: 'SDL user interface')
+option('sdl_image', type : 'feature', value : 'auto',
+       description: 'SDL Image support for icons')
+option('u2f', type : 'feature', value : 'auto',
+       description: 'U2F emulation support')
+option('vnc', type : 'feature', value : 'enabled',
+       description: 'VNC server')
+option('vnc_jpeg', type : 'feature', value : 'auto',
+       description: 'JPEG lossy compression for VNC server')
+option('vnc_png', type : 'feature', value : 'auto',
+       description: 'PNG compression for VNC server')
+option('vnc_sasl', type : 'feature', value : 'auto',
+       description: 'SASL authentication for VNC server')
+option('xkbcommon', type : 'feature', value : 'auto',
+       description: 'xkbcommon support')
+option('virtiofsd', type: 'feature', value: 'auto',
+       description: 'build virtiofs daemon (virtiofsd)')
+option('vhost_user_blk_server', type: 'feature', value: 'auto',
+       description: 'build vhost-user-blk server')
+
+option('capstone', type: 'combo', value: 'auto',
+       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
+       description: 'Whether and how to find the capstone library')
+option('slirp', type: 'combo', value: 'auto',
+       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
+       description: 'Whether and how to find the slirp library')
+option('fdt', type: 'combo', value: 'auto',
+       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
+       description: 'Whether and how to find the libfdt library')
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
new file mode 100644
index 000000000..65d8ff484
--- /dev/null
+++ b/monitor/hmp-cmds.c
@@ -0,0 +1,2273 @@
+/*
+ * Human Monitor Interface commands
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/hmp.h"
+#include "net/net.h"
+#include "net/eth.h"
+#include "chardev/char.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/runstate.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/timer.h"
+#include "qemu/sockets.h"
+#include "qemu/help_option.h"
+#include "monitor/monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qapi-commands-block.h"
+#include "qapi/qapi-commands-char.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-net.h"
+#include "qapi/qapi-commands-pci.h"
+#include "qapi/qapi-commands-rocker.h"
+#include "qapi/qapi-commands-run-state.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "qapi/qapi-commands-ui.h"
+#include "qapi/qapi-visit-net.h"
+#include "qapi/qapi-visit-migration.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/string-input-visitor.h"
+#include "qapi/string-output-visitor.h"
+#include "qom/object_interfaces.h"
+#include "ui/console.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "exec/ramlist.h"
+#include "hw/intc/intc.h"
+#include "hw/rdma/rdma.h"
+#include "migration/snapshot.h"
+#include "migration/misc.h"
+
+#ifdef CONFIG_SPICE
+#include 
+#endif
+
+void hmp_handle_error(Monitor *mon, Error *err)
+{
+    if (err) {
+        error_reportf_err(err, "Error: ");
+    }
+}
+
+/*
+ * Produce a strList from a comma separated list.
+ * A NULL or empty input string return NULL.
+ */
+static strList *strList_from_comma_list(const char *in)
+{
+    strList *res = NULL;
+    strList **hook = &res;
+
+    while (in && in[0]) {
+        char *comma = strchr(in, ',');
+        *hook = g_new0(strList, 1);
+
+        if (comma) {
+            (*hook)->value = g_strndup(in, comma - in);
+            in = comma + 1; /* skip the , */
+        } else {
+            (*hook)->value = g_strdup(in);
+            in = NULL;
+        }
+        hook = &(*hook)->next;
+    }
+
+    return res;
+}
+
+void hmp_info_name(Monitor *mon, const QDict *qdict)
+{
+    NameInfo *info;
+
+    info = qmp_query_name(NULL);
+    if (info->has_name) {
+        monitor_printf(mon, "%s\n", info->name);
+    }
+    qapi_free_NameInfo(info);
+}
+
+void hmp_info_version(Monitor *mon, const QDict *qdict)
+{
+    VersionInfo *info;
+
+    info = qmp_query_version(NULL);
+
+    monitor_printf(mon, "%" PRId64 ".%" PRId64 ".%" PRId64 "%s\n",
+                   info->qemu->major, info->qemu->minor, info->qemu->micro,
+                   info->package);
+
+    qapi_free_VersionInfo(info);
+}
+
+void hmp_info_kvm(Monitor *mon, const QDict *qdict)
+{
+    KvmInfo *info;
+
+    info = qmp_query_kvm(NULL);
+    monitor_printf(mon, "kvm support: ");
+    if (info->present) {
+        monitor_printf(mon, "%s\n", info->enabled ? "enabled" : "disabled");
+    } else {
+        monitor_printf(mon, "not compiled\n");
+    }
+
+    qapi_free_KvmInfo(info);
+}
+
+void hmp_info_status(Monitor *mon, const QDict *qdict)
+{
+    StatusInfo *info;
+
+    info = qmp_query_status(NULL);
+
+    monitor_printf(mon, "VM status: %s%s",
+                   info->running ? "running" : "paused",
+                   info->singlestep ? " (single step mode)" : "");
+
+    if (!info->running && info->status != RUN_STATE_PAUSED) {
+        monitor_printf(mon, " (%s)", RunState_str(info->status));
+    }
+
+    monitor_printf(mon, "\n");
+
+    qapi_free_StatusInfo(info);
+}
+
+void hmp_info_uuid(Monitor *mon, const QDict *qdict)
+{
+    UuidInfo *info;
+
+    info = qmp_query_uuid(NULL);
+    monitor_printf(mon, "%s\n", info->UUID);
+    qapi_free_UuidInfo(info);
+}
+
+void hmp_info_chardev(Monitor *mon, const QDict *qdict)
+{
+    ChardevInfoList *char_info, *info;
+
+    char_info = qmp_query_chardev(NULL);
+    for (info = char_info; info; info = info->next) {
+        monitor_printf(mon, "%s: filename=%s\n", info->value->label,
+                                                 info->value->filename);
+    }
+
+    qapi_free_ChardevInfoList(char_info);
+}
+
+void hmp_info_mice(Monitor *mon, const QDict *qdict)
+{
+    MouseInfoList *mice_list, *mouse;
+
+    mice_list = qmp_query_mice(NULL);
+    if (!mice_list) {
+        monitor_printf(mon, "No mouse devices connected\n");
+        return;
+    }
+
+    for (mouse = mice_list; mouse; mouse = mouse->next) {
+        monitor_printf(mon, "%c Mouse #%" PRId64 ": %s%s\n",
+                       mouse->value->current ? '*' : ' ',
+                       mouse->value->index, mouse->value->name,
+                       mouse->value->absolute ? " (absolute)" : "");
+    }
+
+    qapi_free_MouseInfoList(mice_list);
+}
+
+static char *SocketAddress_to_str(SocketAddress *addr)
+{
+    switch (addr->type) {
+    case SOCKET_ADDRESS_TYPE_INET:
+        return g_strdup_printf("tcp:%s:%s",
+                               addr->u.inet.host,
+                               addr->u.inet.port);
+    case SOCKET_ADDRESS_TYPE_UNIX:
+        return g_strdup_printf("unix:%s",
+                               addr->u.q_unix.path);
+    case SOCKET_ADDRESS_TYPE_FD:
+        return g_strdup_printf("fd:%s", addr->u.fd.str);
+    case SOCKET_ADDRESS_TYPE_VSOCK:
+        return g_strdup_printf("tcp:%s:%s",
+                               addr->u.vsock.cid,
+                               addr->u.vsock.port);
+    default:
+        return g_strdup("unknown address type");
+    }
+}
+
+void hmp_info_migrate(Monitor *mon, const QDict *qdict)
+{
+    MigrationInfo *info;
+
+    info = qmp_query_migrate(NULL);
+
+    migration_global_dump(mon);
+
+    if (info->has_status) {
+        monitor_printf(mon, "Migration status: %s",
+                       MigrationStatus_str(info->status));
+        if (info->status == MIGRATION_STATUS_FAILED &&
+            info->has_error_desc) {
+            monitor_printf(mon, " (%s)\n", info->error_desc);
+        } else {
+            monitor_printf(mon, "\n");
+        }
+
+        monitor_printf(mon, "total time: %" PRIu64 " ms\n",
+                       info->total_time);
+        if (info->has_expected_downtime) {
+            monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n",
+                           info->expected_downtime);
+        }
+        if (info->has_downtime) {
+            monitor_printf(mon, "downtime: %" PRIu64 " ms\n",
+                           info->downtime);
+        }
+        if (info->has_setup_time) {
+            monitor_printf(mon, "setup: %" PRIu64 " ms\n",
+                           info->setup_time);
+        }
+    }
+
+    if (info->has_ram) {
+        monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n",
+                       info->ram->transferred >> 10);
+        monitor_printf(mon, "throughput: %0.2f mbps\n",
+                       info->ram->mbps);
+        monitor_printf(mon, "remaining ram: %" PRIu64 " kbytes\n",
+                       info->ram->remaining >> 10);
+        monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n",
+                       info->ram->total >> 10);
+        monitor_printf(mon, "duplicate: %" PRIu64 " pages\n",
+                       info->ram->duplicate);
+        monitor_printf(mon, "skipped: %" PRIu64 " pages\n",
+                       info->ram->skipped);
+        monitor_printf(mon, "normal: %" PRIu64 " pages\n",
+                       info->ram->normal);
+        monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
+                       info->ram->normal_bytes >> 10);
+        monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
+                       info->ram->dirty_sync_count);
+        monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
+                       info->ram->page_size >> 10);
+        monitor_printf(mon, "multifd bytes: %" PRIu64 " kbytes\n",
+                       info->ram->multifd_bytes >> 10);
+        monitor_printf(mon, "pages-per-second: %" PRIu64 "\n",
+                       info->ram->pages_per_second);
+
+        if (info->ram->dirty_pages_rate) {
+            monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
+                           info->ram->dirty_pages_rate);
+        }
+        if (info->ram->postcopy_requests) {
+            monitor_printf(mon, "postcopy request count: %" PRIu64 "\n",
+                           info->ram->postcopy_requests);
+        }
+    }
+
+    if (info->has_disk) {
+        monitor_printf(mon, "transferred disk: %" PRIu64 " kbytes\n",
+                       info->disk->transferred >> 10);
+        monitor_printf(mon, "remaining disk: %" PRIu64 " kbytes\n",
+                       info->disk->remaining >> 10);
+        monitor_printf(mon, "total disk: %" PRIu64 " kbytes\n",
+                       info->disk->total >> 10);
+    }
+
+    if (info->has_xbzrle_cache) {
+        monitor_printf(mon, "cache size: %" PRIu64 " bytes\n",
+                       info->xbzrle_cache->cache_size);
+        monitor_printf(mon, "xbzrle transferred: %" PRIu64 " kbytes\n",
+                       info->xbzrle_cache->bytes >> 10);
+        monitor_printf(mon, "xbzrle pages: %" PRIu64 " pages\n",
+                       info->xbzrle_cache->pages);
+        monitor_printf(mon, "xbzrle cache miss: %" PRIu64 " pages\n",
+                       info->xbzrle_cache->cache_miss);
+        monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n",
+                       info->xbzrle_cache->cache_miss_rate);
+        monitor_printf(mon, "xbzrle encoding rate: %0.2f\n",
+                       info->xbzrle_cache->encoding_rate);
+        monitor_printf(mon, "xbzrle overflow: %" PRIu64 "\n",
+                       info->xbzrle_cache->overflow);
+    }
+
+    if (info->has_compression) {
+        monitor_printf(mon, "compression pages: %" PRIu64 " pages\n",
+                       info->compression->pages);
+        monitor_printf(mon, "compression busy: %" PRIu64 "\n",
+                       info->compression->busy);
+        monitor_printf(mon, "compression busy rate: %0.2f\n",
+                       info->compression->busy_rate);
+        monitor_printf(mon, "compressed size: %" PRIu64 " kbytes\n",
+                       info->compression->compressed_size >> 10);
+        monitor_printf(mon, "compression rate: %0.2f\n",
+                       info->compression->compression_rate);
+    }
+
+    if (info->has_cpu_throttle_percentage) {
+        monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n",
+                       info->cpu_throttle_percentage);
+    }
+
+    if (info->has_postcopy_blocktime) {
+        monitor_printf(mon, "postcopy blocktime: %u\n",
+                       info->postcopy_blocktime);
+    }
+
+    if (info->has_postcopy_vcpu_blocktime) {
+        Visitor *v;
+        char *str;
+        v = string_output_visitor_new(false, &str);
+        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime,
+                              &error_abort);
+        visit_complete(v, &str);
+        monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+        g_free(str);
+        visit_free(v);
+    }
+    if (info->has_socket_address) {
+        SocketAddressList *addr;
+
+        monitor_printf(mon, "socket address: [\n");
+
+        for (addr = info->socket_address; addr; addr = addr->next) {
+            char *s = SocketAddress_to_str(addr->value);
+            monitor_printf(mon, "\t%s\n", s);
+            g_free(s);
+        }
+        monitor_printf(mon, "]\n");
+    }
+
+    if (info->has_vfio) {
+        monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n",
+                       info->vfio->transferred >> 10);
+    }
+
+    qapi_free_MigrationInfo(info);
+}
+
+void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict)
+{
+    MigrationCapabilityStatusList *caps, *cap;
+
+    caps = qmp_query_migrate_capabilities(NULL);
+
+    if (caps) {
+        for (cap = caps; cap; cap = cap->next) {
+            monitor_printf(mon, "%s: %s\n",
+                           MigrationCapability_str(cap->value->capability),
+                           cap->value->state ? "on" : "off");
+        }
+    }
+
+    qapi_free_MigrationCapabilityStatusList(caps);
+}
+
+void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
+{
+    MigrationParameters *params;
+
+    params = qmp_query_migrate_parameters(NULL);
+
+    if (params) {
+        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_INITIAL),
+            params->announce_initial);
+        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_MAX),
+            params->announce_max);
+        monitor_printf(mon, "%s: %" PRIu64 "\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_ROUNDS),
+            params->announce_rounds);
+        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_STEP),
+            params->announce_step);
+        assert(params->has_compress_level);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_LEVEL),
+            params->compress_level);
+        assert(params->has_compress_threads);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_THREADS),
+            params->compress_threads);
+        assert(params->has_compress_wait_thread);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD),
+            params->compress_wait_thread ? "on" : "off");
+        assert(params->has_decompress_threads);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS),
+            params->decompress_threads);
+        assert(params->has_throttle_trigger_threshold);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD),
+            params->throttle_trigger_threshold);
+        assert(params->has_cpu_throttle_initial);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL),
+            params->cpu_throttle_initial);
+        assert(params->has_cpu_throttle_increment);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT),
+            params->cpu_throttle_increment);
+        assert(params->has_cpu_throttle_tailslow);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW),
+            params->cpu_throttle_tailslow ? "on" : "off");
+        assert(params->has_max_cpu_throttle);
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MAX_CPU_THROTTLE),
+            params->max_cpu_throttle);
+        assert(params->has_tls_creds);
+        monitor_printf(mon, "%s: '%s'\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_TLS_CREDS),
+            params->tls_creds);
+        assert(params->has_tls_hostname);
+        monitor_printf(mon, "%s: '%s'\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_TLS_HOSTNAME),
+            params->tls_hostname);
+        assert(params->has_max_bandwidth);
+        monitor_printf(mon, "%s: %" PRIu64 " bytes/second\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MAX_BANDWIDTH),
+            params->max_bandwidth);
+        assert(params->has_downtime_limit);
+        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_DOWNTIME_LIMIT),
+            params->downtime_limit);
+        assert(params->has_x_checkpoint_delay);
+        monitor_printf(mon, "%s: %u ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_X_CHECKPOINT_DELAY),
+            params->x_checkpoint_delay);
+        assert(params->has_block_incremental);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
+            params->block_incremental ? "on" : "off");
+        monitor_printf(mon, "%s: %u\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_CHANNELS),
+            params->multifd_channels);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
+            MultiFDCompression_str(params->multifd_compression));
+        monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
+            params->xbzrle_cache_size);
+        monitor_printf(mon, "%s: %" PRIu64 "\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH),
+            params->max_postcopy_bandwidth);
+        monitor_printf(mon, "%s: '%s'\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
+            params->tls_authz);
+
+        if (params->has_block_bitmap_mapping) {
+            const BitmapMigrationNodeAliasList *bmnal;
+
+            monitor_printf(mon, "%s:\n",
+                           MigrationParameter_str(
+                               MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING));
+
+            for (bmnal = params->block_bitmap_mapping;
+                 bmnal;
+                 bmnal = bmnal->next)
+            {
+                const BitmapMigrationNodeAlias *bmna = bmnal->value;
+                const BitmapMigrationBitmapAliasList *bmbal;
+
+                monitor_printf(mon, "  '%s' -> '%s'\n",
+                               bmna->node_name, bmna->alias);
+
+                for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) {
+                    const BitmapMigrationBitmapAlias *bmba = bmbal->value;
+
+                    monitor_printf(mon, "    '%s' -> '%s'\n",
+                                   bmba->name, bmba->alias);
+                }
+            }
+        }
+    }
+
+    qapi_free_MigrationParameters(params);
+}
+
+void hmp_info_migrate_cache_size(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "xbzrel cache size: %" PRId64 " kbytes\n",
+                   qmp_query_migrate_cache_size(NULL) >> 10);
+}
+
+
+#ifdef CONFIG_VNC
+/* Helper for hmp_info_vnc_clients, _servers */
+static void hmp_info_VncBasicInfo(Monitor *mon, VncBasicInfo *info,
+                                  const char *name)
+{
+    monitor_printf(mon, "  %s: %s:%s (%s%s)\n",
+                   name,
+                   info->host,
+                   info->service,
+                   NetworkAddressFamily_str(info->family),
+                   info->websocket ? " (Websocket)" : "");
+}
+
+/* Helper displaying and auth and crypt info */
+static void hmp_info_vnc_authcrypt(Monitor *mon, const char *indent,
+                                   VncPrimaryAuth auth,
+                                   VncVencryptSubAuth *vencrypt)
+{
+    monitor_printf(mon, "%sAuth: %s (Sub: %s)\n", indent,
+                   VncPrimaryAuth_str(auth),
+                   vencrypt ? VncVencryptSubAuth_str(*vencrypt) : "none");
+}
+
+static void hmp_info_vnc_clients(Monitor *mon, VncClientInfoList *client)
+{
+    while (client) {
+        VncClientInfo *cinfo = client->value;
+
+        hmp_info_VncBasicInfo(mon, qapi_VncClientInfo_base(cinfo), "Client");
+        monitor_printf(mon, "    x509_dname: %s\n",
+                       cinfo->has_x509_dname ?
+                       cinfo->x509_dname : "none");
+        monitor_printf(mon, "    sasl_username: %s\n",
+                       cinfo->has_sasl_username ?
+                       cinfo->sasl_username : "none");
+
+        client = client->next;
+    }
+}
+
+static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server)
+{
+    while (server) {
+        VncServerInfo2 *sinfo = server->value;
+        hmp_info_VncBasicInfo(mon, qapi_VncServerInfo2_base(sinfo), "Server");
+        hmp_info_vnc_authcrypt(mon, "    ", sinfo->auth,
+                               sinfo->has_vencrypt ? &sinfo->vencrypt : NULL);
+        server = server->next;
+    }
+}
+
+void hmp_info_vnc(Monitor *mon, const QDict *qdict)
+{
+    VncInfo2List *info2l, *info2l_head;
+    Error *err = NULL;
+
+    info2l = qmp_query_vnc_servers(&err);
+    info2l_head = info2l;
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+    if (!info2l) {
+        monitor_printf(mon, "None\n");
+        return;
+    }
+
+    while (info2l) {
+        VncInfo2 *info = info2l->value;
+        monitor_printf(mon, "%s:\n", info->id);
+        hmp_info_vnc_servers(mon, info->server);
+        hmp_info_vnc_clients(mon, info->clients);
+        if (!info->server) {
+            /* The server entry displays its auth, we only
+             * need to display in the case of 'reverse' connections
+             * where there's no server.
+             */
+            hmp_info_vnc_authcrypt(mon, "  ", info->auth,
+                               info->has_vencrypt ? &info->vencrypt : NULL);
+        }
+        if (info->has_display) {
+            monitor_printf(mon, "  Display: %s\n", info->display);
+        }
+        info2l = info2l->next;
+    }
+
+    qapi_free_VncInfo2List(info2l_head);
+
+}
+#endif
+
+#ifdef CONFIG_SPICE
+void hmp_info_spice(Monitor *mon, const QDict *qdict)
+{
+    SpiceChannelList *chan;
+    SpiceInfo *info;
+    const char *channel_name;
+    const char * const channel_names[] = {
+        [SPICE_CHANNEL_MAIN] = "main",
+        [SPICE_CHANNEL_DISPLAY] = "display",
+        [SPICE_CHANNEL_INPUTS] = "inputs",
+        [SPICE_CHANNEL_CURSOR] = "cursor",
+        [SPICE_CHANNEL_PLAYBACK] = "playback",
+        [SPICE_CHANNEL_RECORD] = "record",
+        [SPICE_CHANNEL_TUNNEL] = "tunnel",
+        [SPICE_CHANNEL_SMARTCARD] = "smartcard",
+        [SPICE_CHANNEL_USBREDIR] = "usbredir",
+        [SPICE_CHANNEL_PORT] = "port",
+#if 0
+        /* minimum spice-protocol is 0.12.3, webdav was added in 0.12.7,
+         * no easy way to #ifdef (SPICE_CHANNEL_* is a enum).  Disable
+         * as quick fix for build failures with older versions. */
+        [SPICE_CHANNEL_WEBDAV] = "webdav",
+#endif
+    };
+
+    info = qmp_query_spice(NULL);
+
+    if (!info->enabled) {
+        monitor_printf(mon, "Server: disabled\n");
+        goto out;
+    }
+
+    monitor_printf(mon, "Server:\n");
+    if (info->has_port) {
+        monitor_printf(mon, "     address: %s:%" PRId64 "\n",
+                       info->host, info->port);
+    }
+    if (info->has_tls_port) {
+        monitor_printf(mon, "     address: %s:%" PRId64 " [tls]\n",
+                       info->host, info->tls_port);
+    }
+    monitor_printf(mon, "    migrated: %s\n",
+                   info->migrated ? "true" : "false");
+    monitor_printf(mon, "        auth: %s\n", info->auth);
+    monitor_printf(mon, "    compiled: %s\n", info->compiled_version);
+    monitor_printf(mon, "  mouse-mode: %s\n",
+                   SpiceQueryMouseMode_str(info->mouse_mode));
+
+    if (!info->has_channels || info->channels == NULL) {
+        monitor_printf(mon, "Channels: none\n");
+    } else {
+        for (chan = info->channels; chan; chan = chan->next) {
+            monitor_printf(mon, "Channel:\n");
+            monitor_printf(mon, "     address: %s:%s%s\n",
+                           chan->value->host, chan->value->port,
+                           chan->value->tls ? " [tls]" : "");
+            monitor_printf(mon, "     session: %" PRId64 "\n",
+                           chan->value->connection_id);
+            monitor_printf(mon, "     channel: %" PRId64 ":%" PRId64 "\n",
+                           chan->value->channel_type, chan->value->channel_id);
+
+            channel_name = "unknown";
+            if (chan->value->channel_type > 0 &&
+                chan->value->channel_type < ARRAY_SIZE(channel_names) &&
+                channel_names[chan->value->channel_type]) {
+                channel_name = channel_names[chan->value->channel_type];
+            }
+
+            monitor_printf(mon, "     channel name: %s\n", channel_name);
+        }
+    }
+
+out:
+    qapi_free_SpiceInfo(info);
+}
+#endif
+
+void hmp_info_balloon(Monitor *mon, const QDict *qdict)
+{
+    BalloonInfo *info;
+    Error *err = NULL;
+
+    info = qmp_query_balloon(&err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
+
+    qapi_free_BalloonInfo(info);
+}
+
+static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
+{
+    PciMemoryRegionList *region;
+
+    monitor_printf(mon, "  Bus %2" PRId64 ", ", dev->bus);
+    monitor_printf(mon, "device %3" PRId64 ", function %" PRId64 ":\n",
+                   dev->slot, dev->function);
+    monitor_printf(mon, "    ");
+
+    if (dev->class_info->has_desc) {
+        monitor_printf(mon, "%s", dev->class_info->desc);
+    } else {
+        monitor_printf(mon, "Class %04" PRId64, dev->class_info->q_class);
+    }
+
+    monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n",
+                   dev->id->vendor, dev->id->device);
+    if (dev->id->has_subsystem_vendor && dev->id->has_subsystem) {
+        monitor_printf(mon, "      PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n",
+                       dev->id->subsystem_vendor, dev->id->subsystem);
+    }
+
+    if (dev->has_irq) {
+        monitor_printf(mon, "      IRQ %" PRId64 ", pin %c\n",
+                       dev->irq, (char)('A' + dev->irq_pin - 1));
+    }
+
+    if (dev->has_pci_bridge) {
+        monitor_printf(mon, "      BUS %" PRId64 ".\n",
+                       dev->pci_bridge->bus->number);
+        monitor_printf(mon, "      secondary bus %" PRId64 ".\n",
+                       dev->pci_bridge->bus->secondary);
+        monitor_printf(mon, "      subordinate bus %" PRId64 ".\n",
+                       dev->pci_bridge->bus->subordinate);
+
+        monitor_printf(mon, "      IO range [0x%04"PRIx64", 0x%04"PRIx64"]\n",
+                       dev->pci_bridge->bus->io_range->base,
+                       dev->pci_bridge->bus->io_range->limit);
+
+        monitor_printf(mon,
+                       "      memory range [0x%08"PRIx64", 0x%08"PRIx64"]\n",
+                       dev->pci_bridge->bus->memory_range->base,
+                       dev->pci_bridge->bus->memory_range->limit);
+
+        monitor_printf(mon, "      prefetchable memory range "
+                       "[0x%08"PRIx64", 0x%08"PRIx64"]\n",
+                       dev->pci_bridge->bus->prefetchable_range->base,
+                       dev->pci_bridge->bus->prefetchable_range->limit);
+    }
+
+    for (region = dev->regions; region; region = region->next) {
+        uint64_t addr, size;
+
+        addr = region->value->address;
+        size = region->value->size;
+
+        monitor_printf(mon, "      BAR%" PRId64 ": ", region->value->bar);
+
+        if (!strcmp(region->value->type, "io")) {
+            monitor_printf(mon, "I/O at 0x%04" PRIx64
+                                " [0x%04" PRIx64 "].\n",
+                           addr, addr + size - 1);
+        } else {
+            monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64
+                               " [0x%08" PRIx64 "].\n",
+                           region->value->mem_type_64 ? 64 : 32,
+                           region->value->prefetch ? " prefetchable" : "",
+                           addr, addr + size - 1);
+        }
+    }
+
+    monitor_printf(mon, "      id \"%s\"\n", dev->qdev_id);
+
+    if (dev->has_pci_bridge) {
+        if (dev->pci_bridge->has_devices) {
+            PciDeviceInfoList *cdev;
+            for (cdev = dev->pci_bridge->devices; cdev; cdev = cdev->next) {
+                hmp_info_pci_device(mon, cdev->value);
+            }
+        }
+    }
+}
+
+static int hmp_info_irq_foreach(Object *obj, void *opaque)
+{
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    Monitor *mon = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        uint64_t *irq_counts;
+        unsigned int nb_irqs, i;
+        if (k->get_statistics &&
+            k->get_statistics(intc, &irq_counts, &nb_irqs)) {
+            if (nb_irqs > 0) {
+                monitor_printf(mon, "IRQ statistics for %s:\n",
+                               object_get_typename(obj));
+                for (i = 0; i < nb_irqs; i++) {
+                    if (irq_counts[i] > 0) {
+                        monitor_printf(mon, "%2d: %" PRId64 "\n", i,
+                                       irq_counts[i]);
+                    }
+                }
+            }
+        } else {
+            monitor_printf(mon, "IRQ statistics not available for %s.\n",
+                           object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+void hmp_info_irq(Monitor *mon, const QDict *qdict)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_irq_foreach, mon);
+}
+
+static int hmp_info_pic_foreach(Object *obj, void *opaque)
+{
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    Monitor *mon = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        if (k->print_info) {
+            k->print_info(intc, mon);
+        } else {
+            monitor_printf(mon, "Interrupt controller information not available for %s.\n",
+                           object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+void hmp_info_pic(Monitor *mon, const QDict *qdict)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_pic_foreach, mon);
+}
+
+static int hmp_info_rdma_foreach(Object *obj, void *opaque)
+{
+    RdmaProvider *rdma;
+    RdmaProviderClass *k;
+    Monitor *mon = opaque;
+
+    if (object_dynamic_cast(obj, INTERFACE_RDMA_PROVIDER)) {
+        rdma = RDMA_PROVIDER(obj);
+        k = RDMA_PROVIDER_GET_CLASS(obj);
+        if (k->print_statistics) {
+            k->print_statistics(mon, rdma);
+        } else {
+            monitor_printf(mon, "RDMA statistics not available for %s.\n",
+                           object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+void hmp_info_rdma(Monitor *mon, const QDict *qdict)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_rdma_foreach, mon);
+}
+
+void hmp_info_pci(Monitor *mon, const QDict *qdict)
+{
+    PciInfoList *info_list, *info;
+    Error *err = NULL;
+
+    info_list = qmp_query_pci(&err);
+    if (err) {
+        monitor_printf(mon, "PCI devices not supported\n");
+        error_free(err);
+        return;
+    }
+
+    for (info = info_list; info; info = info->next) {
+        PciDeviceInfoList *dev;
+
+        for (dev = info->value->devices; dev; dev = dev->next) {
+            hmp_info_pci_device(mon, dev->value);
+        }
+    }
+
+    qapi_free_PciInfoList(info_list);
+}
+
+void hmp_info_tpm(Monitor *mon, const QDict *qdict)
+{
+    TPMInfoList *info_list, *info;
+    Error *err = NULL;
+    unsigned int c = 0;
+    TPMPassthroughOptions *tpo;
+    TPMEmulatorOptions *teo;
+
+    info_list = qmp_query_tpm(&err);
+    if (err) {
+        monitor_printf(mon, "TPM device not supported\n");
+        error_free(err);
+        return;
+    }
+
+    if (info_list) {
+        monitor_printf(mon, "TPM device:\n");
+    }
+
+    for (info = info_list; info; info = info->next) {
+        TPMInfo *ti = info->value;
+        monitor_printf(mon, " tpm%d: model=%s\n",
+                       c, TpmModel_str(ti->model));
+
+        monitor_printf(mon, "  \\ %s: type=%s",
+                       ti->id, TpmTypeOptionsKind_str(ti->options->type));
+
+        switch (ti->options->type) {
+        case TPM_TYPE_OPTIONS_KIND_PASSTHROUGH:
+            tpo = ti->options->u.passthrough.data;
+            monitor_printf(mon, "%s%s%s%s",
+                           tpo->has_path ? ",path=" : "",
+                           tpo->has_path ? tpo->path : "",
+                           tpo->has_cancel_path ? ",cancel-path=" : "",
+                           tpo->has_cancel_path ? tpo->cancel_path : "");
+            break;
+        case TPM_TYPE_OPTIONS_KIND_EMULATOR:
+            teo = ti->options->u.emulator.data;
+            monitor_printf(mon, ",chardev=%s", teo->chardev);
+            break;
+        case TPM_TYPE_OPTIONS_KIND__MAX:
+            break;
+        }
+        monitor_printf(mon, "\n");
+        c++;
+    }
+    qapi_free_TPMInfoList(info_list);
+}
+
+void hmp_quit(Monitor *mon, const QDict *qdict)
+{
+    monitor_suspend(mon);
+    qmp_quit(NULL);
+}
+
+void hmp_stop(Monitor *mon, const QDict *qdict)
+{
+    qmp_stop(NULL);
+}
+
+void hmp_sync_profile(Monitor *mon, const QDict *qdict)
+{
+    const char *op = qdict_get_try_str(qdict, "op");
+
+    if (op == NULL) {
+        bool on = qsp_is_enabled();
+
+        monitor_printf(mon, "sync-profile is %s\n", on ? "on" : "off");
+        return;
+    }
+    if (!strcmp(op, "on")) {
+        qsp_enable();
+    } else if (!strcmp(op, "off")) {
+        qsp_disable();
+    } else if (!strcmp(op, "reset")) {
+        qsp_reset();
+    } else {
+        Error *err = NULL;
+
+        error_setg(&err, QERR_INVALID_PARAMETER, op);
+        hmp_handle_error(mon, err);
+    }
+}
+
+void hmp_system_reset(Monitor *mon, const QDict *qdict)
+{
+    qmp_system_reset(NULL);
+}
+
+void hmp_system_powerdown(Monitor *mon, const QDict *qdict)
+{
+    qmp_system_powerdown(NULL);
+}
+
+void hmp_exit_preconfig(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_x_exit_preconfig(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_cpu(Monitor *mon, const QDict *qdict)
+{
+    int64_t cpu_index;
+
+    /* XXX: drop the monitor_set_cpu() usage when all HMP commands that
+            use it are converted to the QAPI */
+    cpu_index = qdict_get_int(qdict, "index");
+    if (monitor_set_cpu(mon, cpu_index) < 0) {
+        monitor_printf(mon, "invalid CPU index\n");
+    }
+}
+
+void hmp_memsave(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    const char *filename = qdict_get_str(qdict, "filename");
+    uint64_t addr = qdict_get_int(qdict, "val");
+    Error *err = NULL;
+    int cpu_index = monitor_get_cpu_index(mon);
+
+    if (cpu_index < 0) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+
+    qmp_memsave(addr, size, filename, true, cpu_index, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_pmemsave(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    const char *filename = qdict_get_str(qdict, "filename");
+    uint64_t addr = qdict_get_int(qdict, "val");
+    Error *err = NULL;
+
+    qmp_pmemsave(addr, size, filename, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_ringbuf_write(Monitor *mon, const QDict *qdict)
+{
+    const char *chardev = qdict_get_str(qdict, "device");
+    const char *data = qdict_get_str(qdict, "data");
+    Error *err = NULL;
+
+    qmp_ringbuf_write(chardev, data, false, 0, &err);
+
+    hmp_handle_error(mon, err);
+}
+
+void hmp_ringbuf_read(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    const char *chardev = qdict_get_str(qdict, "device");
+    char *data;
+    Error *err = NULL;
+    int i;
+
+    data = qmp_ringbuf_read(chardev, size, false, 0, &err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    for (i = 0; data[i]; i++) {
+        unsigned char ch = data[i];
+
+        if (ch == '\\') {
+            monitor_printf(mon, "\\\\");
+        } else if ((ch < 0x20 && ch != '\n' && ch != '\t') || ch == 0x7F) {
+            monitor_printf(mon, "\\u%04X", ch);
+        } else {
+            monitor_printf(mon, "%c", ch);
+        }
+
+    }
+    monitor_printf(mon, "\n");
+    g_free(data);
+}
+
+void hmp_cont(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_cont(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_system_wakeup(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_system_wakeup(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_nmi(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_inject_nmi(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_set_link(Monitor *mon, const QDict *qdict)
+{
+    const char *name = qdict_get_str(qdict, "name");
+    bool up = qdict_get_bool(qdict, "up");
+    Error *err = NULL;
+
+    qmp_set_link(name, up, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_balloon(Monitor *mon, const QDict *qdict)
+{
+    int64_t value = qdict_get_int(qdict, "value");
+    Error *err = NULL;
+
+    qmp_balloon(value, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_loadvm(Monitor *mon, const QDict *qdict)
+{
+    int saved_vm_running  = runstate_is_running();
+    const char *name = qdict_get_str(qdict, "name");
+    Error *err = NULL;
+
+    vm_stop(RUN_STATE_RESTORE_VM);
+
+    if (load_snapshot(name, &err) == 0 && saved_vm_running) {
+        vm_start();
+    }
+    hmp_handle_error(mon, err);
+}
+
+void hmp_savevm(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    save_snapshot(qdict_get_try_str(qdict, "name"), &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_delvm(Monitor *mon, const QDict *qdict)
+{
+    BlockDriverState *bs;
+    Error *err = NULL;
+    const char *name = qdict_get_str(qdict, "name");
+
+    if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
+        error_prepend(&err,
+                      "deleting snapshot on device '%s': ",
+                      bdrv_get_device_name(bs));
+    }
+    hmp_handle_error(mon, err);
+}
+
+void hmp_announce_self(Monitor *mon, const QDict *qdict)
+{
+    const char *interfaces_str = qdict_get_try_str(qdict, "interfaces");
+    const char *id = qdict_get_try_str(qdict, "id");
+    AnnounceParameters *params = QAPI_CLONE(AnnounceParameters,
+                                            migrate_announce_params());
+
+    qapi_free_strList(params->interfaces);
+    params->interfaces = strList_from_comma_list(interfaces_str);
+    params->has_interfaces = params->interfaces != NULL;
+    params->id = g_strdup(id);
+    params->has_id = !!params->id;
+    qmp_announce_self(params, NULL);
+    qapi_free_AnnounceParameters(params);
+}
+
+void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
+{
+    qmp_migrate_cancel(NULL);
+}
+
+void hmp_migrate_continue(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    const char *state = qdict_get_str(qdict, "state");
+    int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err);
+
+    if (val >= 0) {
+        qmp_migrate_continue(val, &err);
+    }
+
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    const char *uri = qdict_get_str(qdict, "uri");
+
+    qmp_migrate_incoming(uri, &err);
+
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_recover(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    const char *uri = qdict_get_str(qdict, "uri");
+
+    qmp_migrate_recover(uri, &err);
+
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_pause(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_migrate_pause(&err);
+
+    hmp_handle_error(mon, err);
+}
+
+/* Kept for backwards compatibility */
+void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    double value = qdict_get_double(qdict, "value");
+    qmp_migrate_set_downtime(value, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict)
+{
+    int64_t value = qdict_get_int(qdict, "value");
+    Error *err = NULL;
+
+    qmp_migrate_set_cache_size(value, &err);
+    hmp_handle_error(mon, err);
+}
+
+/* Kept for backwards compatibility */
+void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    int64_t value = qdict_get_int(qdict, "value");
+    qmp_migrate_set_speed(value, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict)
+{
+    const char *cap = qdict_get_str(qdict, "capability");
+    bool state = qdict_get_bool(qdict, "state");
+    Error *err = NULL;
+    MigrationCapabilityStatusList *caps = g_malloc0(sizeof(*caps));
+    int val;
+
+    val = qapi_enum_parse(&MigrationCapability_lookup, cap, -1, &err);
+    if (val < 0) {
+        goto end;
+    }
+
+    caps->value = g_malloc0(sizeof(*caps->value));
+    caps->value->capability = val;
+    caps->value->state = state;
+    caps->next = NULL;
+    qmp_migrate_set_capabilities(caps, &err);
+
+end:
+    qapi_free_MigrationCapabilityStatusList(caps);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
+{
+    const char *param = qdict_get_str(qdict, "parameter");
+    const char *valuestr = qdict_get_str(qdict, "value");
+    Visitor *v = string_input_visitor_new(valuestr);
+    MigrateSetParameters *p = g_new0(MigrateSetParameters, 1);
+    uint64_t valuebw = 0;
+    uint64_t cache_size;
+    Error *err = NULL;
+    int val, ret;
+
+    val = qapi_enum_parse(&MigrationParameter_lookup, param, -1, &err);
+    if (val < 0) {
+        goto cleanup;
+    }
+
+    switch (val) {
+    case MIGRATION_PARAMETER_COMPRESS_LEVEL:
+        p->has_compress_level = true;
+        visit_type_int(v, param, &p->compress_level, &err);
+        break;
+    case MIGRATION_PARAMETER_COMPRESS_THREADS:
+        p->has_compress_threads = true;
+        visit_type_int(v, param, &p->compress_threads, &err);
+        break;
+    case MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD:
+        p->has_compress_wait_thread = true;
+        visit_type_bool(v, param, &p->compress_wait_thread, &err);
+        break;
+    case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
+        p->has_decompress_threads = true;
+        visit_type_int(v, param, &p->decompress_threads, &err);
+        break;
+    case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD:
+        p->has_throttle_trigger_threshold = true;
+        visit_type_int(v, param, &p->throttle_trigger_threshold, &err);
+        break;
+    case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
+        p->has_cpu_throttle_initial = true;
+        visit_type_int(v, param, &p->cpu_throttle_initial, &err);
+        break;
+    case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT:
+        p->has_cpu_throttle_increment = true;
+        visit_type_int(v, param, &p->cpu_throttle_increment, &err);
+        break;
+    case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW:
+        p->has_cpu_throttle_tailslow = true;
+        visit_type_bool(v, param, &p->cpu_throttle_tailslow, &err);
+        break;
+    case MIGRATION_PARAMETER_MAX_CPU_THROTTLE:
+        p->has_max_cpu_throttle = true;
+        visit_type_int(v, param, &p->max_cpu_throttle, &err);
+        break;
+    case MIGRATION_PARAMETER_TLS_CREDS:
+        p->has_tls_creds = true;
+        p->tls_creds = g_new0(StrOrNull, 1);
+        p->tls_creds->type = QTYPE_QSTRING;
+        visit_type_str(v, param, &p->tls_creds->u.s, &err);
+        break;
+    case MIGRATION_PARAMETER_TLS_HOSTNAME:
+        p->has_tls_hostname = true;
+        p->tls_hostname = g_new0(StrOrNull, 1);
+        p->tls_hostname->type = QTYPE_QSTRING;
+        visit_type_str(v, param, &p->tls_hostname->u.s, &err);
+        break;
+    case MIGRATION_PARAMETER_TLS_AUTHZ:
+        p->has_tls_authz = true;
+        p->tls_authz = g_new0(StrOrNull, 1);
+        p->tls_authz->type = QTYPE_QSTRING;
+        visit_type_str(v, param, &p->tls_authz->u.s, &err);
+        break;
+    case MIGRATION_PARAMETER_MAX_BANDWIDTH:
+        p->has_max_bandwidth = true;
+        /*
+         * Can't use visit_type_size() here, because it
+         * defaults to Bytes rather than Mebibytes.
+         */
+        ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
+        if (ret < 0 || valuebw > INT64_MAX
+            || (size_t)valuebw != valuebw) {
+            error_setg(&err, "Invalid size %s", valuestr);
+            break;
+        }
+        p->max_bandwidth = valuebw;
+        break;
+    case MIGRATION_PARAMETER_DOWNTIME_LIMIT:
+        p->has_downtime_limit = true;
+        visit_type_int(v, param, &p->downtime_limit, &err);
+        break;
+    case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
+        p->has_x_checkpoint_delay = true;
+        visit_type_int(v, param, &p->x_checkpoint_delay, &err);
+        break;
+    case MIGRATION_PARAMETER_BLOCK_INCREMENTAL:
+        p->has_block_incremental = true;
+        visit_type_bool(v, param, &p->block_incremental, &err);
+        break;
+    case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
+        p->has_multifd_channels = true;
+        visit_type_int(v, param, &p->multifd_channels, &err);
+        break;
+    case MIGRATION_PARAMETER_MULTIFD_COMPRESSION:
+        p->has_multifd_compression = true;
+        visit_type_MultiFDCompression(v, param, &p->multifd_compression,
+                                      &err);
+        break;
+    case MIGRATION_PARAMETER_MULTIFD_ZLIB_LEVEL:
+        p->has_multifd_zlib_level = true;
+        visit_type_int(v, param, &p->multifd_zlib_level, &err);
+        break;
+    case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL:
+        p->has_multifd_zstd_level = true;
+        visit_type_int(v, param, &p->multifd_zstd_level, &err);
+        break;
+    case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
+        p->has_xbzrle_cache_size = true;
+        if (!visit_type_size(v, param, &cache_size, &err)) {
+            break;
+        }
+        if (cache_size > INT64_MAX || (size_t)cache_size != cache_size) {
+            error_setg(&err, "Invalid size %s", valuestr);
+            break;
+        }
+        p->xbzrle_cache_size = cache_size;
+        break;
+    case MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH:
+        p->has_max_postcopy_bandwidth = true;
+        visit_type_size(v, param, &p->max_postcopy_bandwidth, &err);
+        break;
+    case MIGRATION_PARAMETER_ANNOUNCE_INITIAL:
+        p->has_announce_initial = true;
+        visit_type_size(v, param, &p->announce_initial, &err);
+        break;
+    case MIGRATION_PARAMETER_ANNOUNCE_MAX:
+        p->has_announce_max = true;
+        visit_type_size(v, param, &p->announce_max, &err);
+        break;
+    case MIGRATION_PARAMETER_ANNOUNCE_ROUNDS:
+        p->has_announce_rounds = true;
+        visit_type_size(v, param, &p->announce_rounds, &err);
+        break;
+    case MIGRATION_PARAMETER_ANNOUNCE_STEP:
+        p->has_announce_step = true;
+        visit_type_size(v, param, &p->announce_step, &err);
+        break;
+    case MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING:
+        error_setg(&err, "The block-bitmap-mapping parameter can only be set "
+                   "through QMP");
+        break;
+    default:
+        assert(0);
+    }
+
+    if (err) {
+        goto cleanup;
+    }
+
+    qmp_migrate_set_parameters(p, &err);
+
+ cleanup:
+    qapi_free_MigrateSetParameters(p);
+    visit_free(v);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    const char *protocol = qdict_get_str(qdict, "protocol");
+    const char *hostname = qdict_get_str(qdict, "hostname");
+    bool has_port        = qdict_haskey(qdict, "port");
+    int port             = qdict_get_try_int(qdict, "port", -1);
+    bool has_tls_port    = qdict_haskey(qdict, "tls-port");
+    int tls_port         = qdict_get_try_int(qdict, "tls-port", -1);
+    const char *cert_subject = qdict_get_try_str(qdict, "cert-subject");
+
+    qmp_client_migrate_info(protocol, hostname,
+                            has_port, port, has_tls_port, tls_port,
+                            !!cert_subject, cert_subject, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    qmp_migrate_start_postcopy(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_x_colo_lost_heartbeat(&err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_set_password(Monitor *mon, const QDict *qdict)
+{
+    const char *protocol  = qdict_get_str(qdict, "protocol");
+    const char *password  = qdict_get_str(qdict, "password");
+    const char *connected = qdict_get_try_str(qdict, "connected");
+    Error *err = NULL;
+
+    qmp_set_password(protocol, password, !!connected, connected, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_expire_password(Monitor *mon, const QDict *qdict)
+{
+    const char *protocol  = qdict_get_str(qdict, "protocol");
+    const char *whenstr = qdict_get_str(qdict, "time");
+    Error *err = NULL;
+
+    qmp_expire_password(protocol, whenstr, &err);
+    hmp_handle_error(mon, err);
+}
+
+
+#ifdef CONFIG_VNC
+static void hmp_change_read_arg(void *opaque, const char *password,
+                                void *readline_opaque)
+{
+    qmp_change_vnc_password(password, NULL);
+    monitor_read_command(opaque, 1);
+}
+#endif
+
+void hmp_change(Monitor *mon, const QDict *qdict)
+{
+    const char *device = qdict_get_str(qdict, "device");
+    const char *target = qdict_get_str(qdict, "target");
+    const char *arg = qdict_get_try_str(qdict, "arg");
+    const char *read_only = qdict_get_try_str(qdict, "read-only-mode");
+    BlockdevChangeReadOnlyMode read_only_mode = 0;
+    Error *err = NULL;
+
+#ifdef CONFIG_VNC
+    if (strcmp(device, "vnc") == 0) {
+        if (read_only) {
+            monitor_printf(mon,
+                           "Parameter 'read-only-mode' is invalid for VNC\n");
+            return;
+        }
+        if (strcmp(target, "passwd") == 0 ||
+            strcmp(target, "password") == 0) {
+            if (!arg) {
+                MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+                monitor_read_password(hmp_mon, hmp_change_read_arg, NULL);
+                return;
+            }
+        }
+        qmp_change("vnc", target, !!arg, arg, &err);
+    } else
+#endif
+    {
+        if (read_only) {
+            read_only_mode =
+                qapi_enum_parse(&BlockdevChangeReadOnlyMode_lookup,
+                                read_only,
+                                BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN, &err);
+            if (err) {
+                goto end;
+            }
+        }
+
+        qmp_blockdev_change_medium(true, device, false, NULL, target,
+                                   !!arg, arg, !!read_only, read_only_mode,
+                                   &err);
+    }
+
+end:
+    hmp_handle_error(mon, err);
+}
+
+typedef struct HMPMigrationStatus
+{
+    QEMUTimer *timer;
+    Monitor *mon;
+    bool is_block_migration;
+} HMPMigrationStatus;
+
+static void hmp_migrate_status_cb(void *opaque)
+{
+    HMPMigrationStatus *status = opaque;
+    MigrationInfo *info;
+
+    info = qmp_query_migrate(NULL);
+    if (!info->has_status || info->status == MIGRATION_STATUS_ACTIVE ||
+        info->status == MIGRATION_STATUS_SETUP) {
+        if (info->has_disk) {
+            int progress;
+
+            if (info->disk->remaining) {
+                progress = info->disk->transferred * 100 / info->disk->total;
+            } else {
+                progress = 100;
+            }
+
+            monitor_printf(status->mon, "Completed %d %%\r", progress);
+            monitor_flush(status->mon);
+        }
+
+        timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+    } else {
+        if (status->is_block_migration) {
+            monitor_printf(status->mon, "\n");
+        }
+        if (info->has_error_desc) {
+            error_report("%s", info->error_desc);
+        }
+        monitor_resume(status->mon);
+        timer_del(status->timer);
+        timer_free(status->timer);
+        g_free(status);
+    }
+
+    qapi_free_MigrationInfo(info);
+}
+
+void hmp_migrate(Monitor *mon, const QDict *qdict)
+{
+    bool detach = qdict_get_try_bool(qdict, "detach", false);
+    bool blk = qdict_get_try_bool(qdict, "blk", false);
+    bool inc = qdict_get_try_bool(qdict, "inc", false);
+    bool resume = qdict_get_try_bool(qdict, "resume", false);
+    const char *uri = qdict_get_str(qdict, "uri");
+    Error *err = NULL;
+
+    qmp_migrate(uri, !!blk, blk, !!inc, inc,
+                false, false, true, resume, &err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    if (!detach) {
+        HMPMigrationStatus *status;
+
+        if (monitor_suspend(mon) < 0) {
+            monitor_printf(mon, "terminal does not allow synchronous "
+                           "migration, continuing detached\n");
+            return;
+        }
+
+        status = g_malloc0(sizeof(*status));
+        status->mon = mon;
+        status->is_block_migration = blk || inc;
+        status->timer = timer_new_ms(QEMU_CLOCK_REALTIME, hmp_migrate_status_cb,
+                                          status);
+        timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
+    }
+}
+
+void hmp_netdev_add(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    const char *type = qdict_get_try_str(qdict, "type");
+
+    if (type && is_help_option(type)) {
+        show_netdevs();
+        return;
+    }
+    opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
+    if (err) {
+        goto out;
+    }
+
+    netdev_add(opts, &err);
+    if (err) {
+        qemu_opts_del(opts);
+    }
+
+out:
+    hmp_handle_error(mon, err);
+}
+
+void hmp_netdev_del(Monitor *mon, const QDict *qdict)
+{
+    const char *id = qdict_get_str(qdict, "id");
+    Error *err = NULL;
+
+    qmp_netdev_del(id, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_object_add(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    Object *obj = NULL;
+
+    opts = qemu_opts_from_qdict(qemu_find_opts("object"), qdict, &err);
+    if (err) {
+        goto end;
+    }
+
+    obj = user_creatable_add_opts(opts, &err);
+    qemu_opts_del(opts);
+
+end:
+    hmp_handle_error(mon, err);
+
+    if (obj) {
+        object_unref(obj);
+    }
+}
+
+void hmp_getfd(Monitor *mon, const QDict *qdict)
+{
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *err = NULL;
+
+    qmp_getfd(fdname, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_closefd(Monitor *mon, const QDict *qdict)
+{
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *err = NULL;
+
+    qmp_closefd(fdname, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_sendkey(Monitor *mon, const QDict *qdict)
+{
+    const char *keys = qdict_get_str(qdict, "keys");
+    KeyValueList *keylist, *head = NULL, *tmp = NULL;
+    int has_hold_time = qdict_haskey(qdict, "hold-time");
+    int hold_time = qdict_get_try_int(qdict, "hold-time", -1);
+    Error *err = NULL;
+    const char *separator;
+    int keyname_len;
+
+    while (1) {
+        separator = qemu_strchrnul(keys, '-');
+        keyname_len = separator - keys;
+
+        /* Be compatible with old interface, convert user inputted "<" */
+        if (keys[0] == '<' && keyname_len == 1) {
+            keys = "less";
+            keyname_len = 4;
+        }
+
+        keylist = g_malloc0(sizeof(*keylist));
+        keylist->value = g_malloc0(sizeof(*keylist->value));
+
+        if (!head) {
+            head = keylist;
+        }
+        if (tmp) {
+            tmp->next = keylist;
+        }
+        tmp = keylist;
+
+        if (strstart(keys, "0x", NULL)) {
+            char *endp;
+            int value = strtoul(keys, &endp, 0);
+            assert(endp <= keys + keyname_len);
+            if (endp != keys + keyname_len) {
+                goto err_out;
+            }
+            keylist->value->type = KEY_VALUE_KIND_NUMBER;
+            keylist->value->u.number.data = value;
+        } else {
+            int idx = index_from_key(keys, keyname_len);
+            if (idx == Q_KEY_CODE__MAX) {
+                goto err_out;
+            }
+            keylist->value->type = KEY_VALUE_KIND_QCODE;
+            keylist->value->u.qcode.data = idx;
+        }
+
+        if (!*separator) {
+            break;
+        }
+        keys = separator + 1;
+    }
+
+    qmp_send_key(head, has_hold_time, hold_time, &err);
+    hmp_handle_error(mon, err);
+
+out:
+    qapi_free_KeyValueList(head);
+    return;
+
+err_out:
+    monitor_printf(mon, "invalid parameter: %.*s\n", keyname_len, keys);
+    goto out;
+}
+
+void coroutine_fn
+hmp_screendump(Monitor *mon, const QDict *qdict)
+{
+    const char *filename = qdict_get_str(qdict, "filename");
+    const char *id = qdict_get_try_str(qdict, "device");
+    int64_t head = qdict_get_try_int(qdict, "head", 0);
+    Error *err = NULL;
+
+    qmp_screendump(filename, id != NULL, id, id != NULL, head, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_chardev_add(Monitor *mon, const QDict *qdict)
+{
+    const char *args = qdict_get_str(qdict, "args");
+    Error *err = NULL;
+    QemuOpts *opts;
+
+    opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args, true);
+    if (opts == NULL) {
+        error_setg(&err, "Parsing chardev args failed");
+    } else {
+        qemu_chr_new_from_opts(opts, NULL, &err);
+        qemu_opts_del(opts);
+    }
+    hmp_handle_error(mon, err);
+}
+
+void hmp_chardev_change(Monitor *mon, const QDict *qdict)
+{
+    const char *args = qdict_get_str(qdict, "args");
+    const char *id;
+    Error *err = NULL;
+    ChardevBackend *backend = NULL;
+    ChardevReturn *ret = NULL;
+    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args,
+                                             true);
+    if (!opts) {
+        error_setg(&err, "Parsing chardev args failed");
+        goto end;
+    }
+
+    id = qdict_get_str(qdict, "id");
+    if (qemu_opts_id(opts)) {
+        error_setg(&err, "Unexpected 'id' parameter");
+        goto end;
+    }
+
+    backend = qemu_chr_parse_opts(opts, &err);
+    if (!backend) {
+        goto end;
+    }
+
+    ret = qmp_chardev_change(id, backend, &err);
+
+end:
+    qapi_free_ChardevReturn(ret);
+    qapi_free_ChardevBackend(backend);
+    qemu_opts_del(opts);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
+{
+    Error *local_err = NULL;
+
+    qmp_chardev_remove(qdict_get_str(qdict, "id"), &local_err);
+    hmp_handle_error(mon, local_err);
+}
+
+void hmp_chardev_send_break(Monitor *mon, const QDict *qdict)
+{
+    Error *local_err = NULL;
+
+    qmp_chardev_send_break(qdict_get_str(qdict, "id"), &local_err);
+    hmp_handle_error(mon, local_err);
+}
+
+void hmp_object_del(Monitor *mon, const QDict *qdict)
+{
+    const char *id = qdict_get_str(qdict, "id");
+    Error *err = NULL;
+
+    user_creatable_del(id, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    MemoryDeviceInfoList *info_list = qmp_query_memory_devices(&err);
+    MemoryDeviceInfoList *info;
+    VirtioPMEMDeviceInfo *vpi;
+    VirtioMEMDeviceInfo *vmi;
+    MemoryDeviceInfo *value;
+    PCDIMMDeviceInfo *di;
+
+    for (info = info_list; info; info = info->next) {
+        value = info->value;
+
+        if (value) {
+            switch (value->type) {
+            case MEMORY_DEVICE_INFO_KIND_DIMM:
+            case MEMORY_DEVICE_INFO_KIND_NVDIMM:
+                di = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ?
+                     value->u.dimm.data : value->u.nvdimm.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               di->id ? di->id : "");
+                monitor_printf(mon, "  addr: 0x%" PRIx64 "\n", di->addr);
+                monitor_printf(mon, "  slot: %" PRId64 "\n", di->slot);
+                monitor_printf(mon, "  node: %" PRId64 "\n", di->node);
+                monitor_printf(mon, "  size: %" PRIu64 "\n", di->size);
+                monitor_printf(mon, "  memdev: %s\n", di->memdev);
+                monitor_printf(mon, "  hotplugged: %s\n",
+                               di->hotplugged ? "true" : "false");
+                monitor_printf(mon, "  hotpluggable: %s\n",
+                               di->hotpluggable ? "true" : "false");
+                break;
+            case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM:
+                vpi = value->u.virtio_pmem.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               vpi->id ? vpi->id : "");
+                monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n", vpi->memaddr);
+                monitor_printf(mon, "  size: %" PRIu64 "\n", vpi->size);
+                monitor_printf(mon, "  memdev: %s\n", vpi->memdev);
+                break;
+            case MEMORY_DEVICE_INFO_KIND_VIRTIO_MEM:
+                vmi = value->u.virtio_mem.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               vmi->id ? vmi->id : "");
+                monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n", vmi->memaddr);
+                monitor_printf(mon, "  node: %" PRId64 "\n", vmi->node);
+                monitor_printf(mon, "  requested-size: %" PRIu64 "\n",
+                               vmi->requested_size);
+                monitor_printf(mon, "  size: %" PRIu64 "\n", vmi->size);
+                monitor_printf(mon, "  max-size: %" PRIu64 "\n", vmi->max_size);
+                monitor_printf(mon, "  block-size: %" PRIu64 "\n",
+                               vmi->block_size);
+                monitor_printf(mon, "  memdev: %s\n", vmi->memdev);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+    }
+
+    qapi_free_MemoryDeviceInfoList(info_list);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
+{
+    IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
+    IOThreadInfoList *info;
+    IOThreadInfo *value;
+
+    for (info = info_list; info; info = info->next) {
+        value = info->value;
+        monitor_printf(mon, "%s:\n", value->id);
+        monitor_printf(mon, "  thread_id=%" PRId64 "\n", value->thread_id);
+        monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
+        monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
+        monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
+    }
+
+    qapi_free_IOThreadInfoList(info_list);
+}
+
+void hmp_rocker(Monitor *mon, const QDict *qdict)
+{
+    const char *name = qdict_get_str(qdict, "name");
+    RockerSwitch *rocker;
+    Error *err = NULL;
+
+    rocker = qmp_query_rocker(name, &err);
+    if (err != NULL) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "name: %s\n", rocker->name);
+    monitor_printf(mon, "id: 0x%" PRIx64 "\n", rocker->id);
+    monitor_printf(mon, "ports: %d\n", rocker->ports);
+
+    qapi_free_RockerSwitch(rocker);
+}
+
+void hmp_rocker_ports(Monitor *mon, const QDict *qdict)
+{
+    RockerPortList *list, *port;
+    const char *name = qdict_get_str(qdict, "name");
+    Error *err = NULL;
+
+    list = qmp_query_rocker_ports(name, &err);
+    if (err != NULL) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "            ena/    speed/ auto\n");
+    monitor_printf(mon, "      port  link    duplex neg?\n");
+
+    for (port = list; port; port = port->next) {
+        monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %-3s\n",
+                       port->value->name,
+                       port->value->enabled ? port->value->link_up ?
+                       "up" : "down" : "!ena",
+                       port->value->speed == 10000 ? "10G" : "??",
+                       port->value->duplex ? "FD" : "HD",
+                       port->value->autoneg ? "Yes" : "No");
+    }
+
+    qapi_free_RockerPortList(list);
+}
+
+void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict)
+{
+    RockerOfDpaFlowList *list, *info;
+    const char *name = qdict_get_str(qdict, "name");
+    uint32_t tbl_id = qdict_get_try_int(qdict, "tbl_id", -1);
+    Error *err = NULL;
+
+    list = qmp_query_rocker_of_dpa_flows(name, tbl_id != -1, tbl_id, &err);
+    if (err != NULL) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "prio tbl hits key(mask) --> actions\n");
+
+    for (info = list; info; info = info->next) {
+        RockerOfDpaFlow *flow = info->value;
+        RockerOfDpaFlowKey *key = flow->key;
+        RockerOfDpaFlowMask *mask = flow->mask;
+        RockerOfDpaFlowAction *action = flow->action;
+
+        if (flow->hits) {
+            monitor_printf(mon, "%-4d %-3d %-4" PRIu64,
+                           key->priority, key->tbl_id, flow->hits);
+        } else {
+            monitor_printf(mon, "%-4d %-3d     ",
+                           key->priority, key->tbl_id);
+        }
+
+        if (key->has_in_pport) {
+            monitor_printf(mon, " pport %d", key->in_pport);
+            if (mask->has_in_pport) {
+                monitor_printf(mon, "(0x%x)", mask->in_pport);
+            }
+        }
+
+        if (key->has_vlan_id) {
+            monitor_printf(mon, " vlan %d",
+                           key->vlan_id & VLAN_VID_MASK);
+            if (mask->has_vlan_id) {
+                monitor_printf(mon, "(0x%x)", mask->vlan_id);
+            }
+        }
+
+        if (key->has_tunnel_id) {
+            monitor_printf(mon, " tunnel %d", key->tunnel_id);
+            if (mask->has_tunnel_id) {
+                monitor_printf(mon, "(0x%x)", mask->tunnel_id);
+            }
+        }
+
+        if (key->has_eth_type) {
+            switch (key->eth_type) {
+            case 0x0806:
+                monitor_printf(mon, " ARP");
+                break;
+            case 0x0800:
+                monitor_printf(mon, " IP");
+                break;
+            case 0x86dd:
+                monitor_printf(mon, " IPv6");
+                break;
+            case 0x8809:
+                monitor_printf(mon, " LACP");
+                break;
+            case 0x88cc:
+                monitor_printf(mon, " LLDP");
+                break;
+            default:
+                monitor_printf(mon, " eth type 0x%04x", key->eth_type);
+                break;
+            }
+        }
+
+        if (key->has_eth_src) {
+            if ((strcmp(key->eth_src, "01:00:00:00:00:00") == 0) &&
+                (mask->has_eth_src) &&
+                (strcmp(mask->eth_src, "01:00:00:00:00:00") == 0)) {
+                monitor_printf(mon, " src ");
+            } else if ((strcmp(key->eth_src, "00:00:00:00:00:00") == 0) &&
+                (mask->has_eth_src) &&
+                (strcmp(mask->eth_src, "01:00:00:00:00:00") == 0)) {
+                monitor_printf(mon, " src ");
+            } else {
+                monitor_printf(mon, " src %s", key->eth_src);
+                if (mask->has_eth_src) {
+                    monitor_printf(mon, "(%s)", mask->eth_src);
+                }
+            }
+        }
+
+        if (key->has_eth_dst) {
+            if ((strcmp(key->eth_dst, "01:00:00:00:00:00") == 0) &&
+                (mask->has_eth_dst) &&
+                (strcmp(mask->eth_dst, "01:00:00:00:00:00") == 0)) {
+                monitor_printf(mon, " dst ");
+            } else if ((strcmp(key->eth_dst, "00:00:00:00:00:00") == 0) &&
+                (mask->has_eth_dst) &&
+                (strcmp(mask->eth_dst, "01:00:00:00:00:00") == 0)) {
+                monitor_printf(mon, " dst ");
+            } else {
+                monitor_printf(mon, " dst %s", key->eth_dst);
+                if (mask->has_eth_dst) {
+                    monitor_printf(mon, "(%s)", mask->eth_dst);
+                }
+            }
+        }
+
+        if (key->has_ip_proto) {
+            monitor_printf(mon, " proto %d", key->ip_proto);
+            if (mask->has_ip_proto) {
+                monitor_printf(mon, "(0x%x)", mask->ip_proto);
+            }
+        }
+
+        if (key->has_ip_tos) {
+            monitor_printf(mon, " TOS %d", key->ip_tos);
+            if (mask->has_ip_tos) {
+                monitor_printf(mon, "(0x%x)", mask->ip_tos);
+            }
+        }
+
+        if (key->has_ip_dst) {
+            monitor_printf(mon, " dst %s", key->ip_dst);
+        }
+
+        if (action->has_goto_tbl || action->has_group_id ||
+            action->has_new_vlan_id) {
+            monitor_printf(mon, " -->");
+        }
+
+        if (action->has_new_vlan_id) {
+            monitor_printf(mon, " apply new vlan %d",
+                           ntohs(action->new_vlan_id));
+        }
+
+        if (action->has_group_id) {
+            monitor_printf(mon, " write group 0x%08x", action->group_id);
+        }
+
+        if (action->has_goto_tbl) {
+            monitor_printf(mon, " goto tbl %d", action->goto_tbl);
+        }
+
+        monitor_printf(mon, "\n");
+    }
+
+    qapi_free_RockerOfDpaFlowList(list);
+}
+
+void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict)
+{
+    RockerOfDpaGroupList *list, *g;
+    const char *name = qdict_get_str(qdict, "name");
+    uint8_t type = qdict_get_try_int(qdict, "type", 9);
+    Error *err = NULL;
+
+    list = qmp_query_rocker_of_dpa_groups(name, type != 9, type, &err);
+    if (err != NULL) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "id (decode) --> buckets\n");
+
+    for (g = list; g; g = g->next) {
+        RockerOfDpaGroup *group = g->value;
+        bool set = false;
+
+        monitor_printf(mon, "0x%08x", group->id);
+
+        monitor_printf(mon, " (type %s", group->type == 0 ? "L2 interface" :
+                                         group->type == 1 ? "L2 rewrite" :
+                                         group->type == 2 ? "L3 unicast" :
+                                         group->type == 3 ? "L2 multicast" :
+                                         group->type == 4 ? "L2 flood" :
+                                         group->type == 5 ? "L3 interface" :
+                                         group->type == 6 ? "L3 multicast" :
+                                         group->type == 7 ? "L3 ECMP" :
+                                         group->type == 8 ? "L2 overlay" :
+                                         "unknown");
+
+        if (group->has_vlan_id) {
+            monitor_printf(mon, " vlan %d", group->vlan_id);
+        }
+
+        if (group->has_pport) {
+            monitor_printf(mon, " pport %d", group->pport);
+        }
+
+        if (group->has_index) {
+            monitor_printf(mon, " index %d", group->index);
+        }
+
+        monitor_printf(mon, ") -->");
+
+        if (group->has_set_vlan_id && group->set_vlan_id) {
+            set = true;
+            monitor_printf(mon, " set vlan %d",
+                           group->set_vlan_id & VLAN_VID_MASK);
+        }
+
+        if (group->has_set_eth_src) {
+            if (!set) {
+                set = true;
+                monitor_printf(mon, " set");
+            }
+            monitor_printf(mon, " src %s", group->set_eth_src);
+        }
+
+        if (group->has_set_eth_dst) {
+            if (!set) {
+                monitor_printf(mon, " set");
+            }
+            monitor_printf(mon, " dst %s", group->set_eth_dst);
+        }
+
+        if (group->has_ttl_check && group->ttl_check) {
+            monitor_printf(mon, " check TTL");
+        }
+
+        if (group->has_group_id && group->group_id) {
+            monitor_printf(mon, " group id 0x%08x", group->group_id);
+        }
+
+        if (group->has_pop_vlan && group->pop_vlan) {
+            monitor_printf(mon, " pop vlan");
+        }
+
+        if (group->has_out_pport) {
+            monitor_printf(mon, " out pport %d", group->out_pport);
+        }
+
+        if (group->has_group_ids) {
+            struct uint32List *id;
+
+            monitor_printf(mon, " groups [");
+            for (id = group->group_ids; id; id = id->next) {
+                monitor_printf(mon, "0x%08x", id->value);
+                if (id->next) {
+                    monitor_printf(mon, ",");
+                }
+            }
+            monitor_printf(mon, "]");
+        }
+
+        monitor_printf(mon, "\n");
+    }
+
+    qapi_free_RockerOfDpaGroupList(list);
+}
+
+void hmp_info_ramblock(Monitor *mon, const QDict *qdict)
+{
+    ram_block_dump(mon);
+}
+
+void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    GuidInfo *info = qmp_query_vm_generation_id(&err);
+    if (info) {
+        monitor_printf(mon, "%s\n", info->guid);
+    }
+    hmp_handle_error(mon, err);
+    qapi_free_GuidInfo(info);
+}
+
+void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    MemoryInfo *info = qmp_query_memory_size_summary(&err);
+    if (info) {
+        monitor_printf(mon, "base memory: %" PRIu64 "\n",
+                       info->base_memory);
+
+        if (info->has_plugged_memory) {
+            monitor_printf(mon, "plugged memory: %" PRIu64 "\n",
+                           info->plugged_memory);
+        }
+
+        qapi_free_MemoryInfo(info);
+    }
+    hmp_handle_error(mon, err);
+}
diff --git a/monitor/hmp.c b/monitor/hmp.c
new file mode 100644
index 000000000..120423399
--- /dev/null
+++ b/monitor/hmp.c
@@ -0,0 +1,1451 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qemu/config-file.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "qemu/option.h"
+#include "qemu/units.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/runstate.h"
+#include "trace.h"
+
+static void monitor_command_cb(void *opaque, const char *cmdline,
+                               void *readline_opaque)
+{
+    MonitorHMP *mon = opaque;
+
+    monitor_suspend(&mon->common);
+    handle_hmp_command(mon, cmdline);
+    monitor_resume(&mon->common);
+}
+
+void monitor_read_command(MonitorHMP *mon, int show_prompt)
+{
+    if (!mon->rs) {
+        return;
+    }
+
+    readline_start(mon->rs, "(qemu) ", 0, monitor_command_cb, NULL);
+    if (show_prompt) {
+        readline_show_prompt(mon->rs);
+    }
+}
+
+int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func,
+                          void *opaque)
+{
+    if (mon->rs) {
+        readline_start(mon->rs, "Password: ", 1, readline_func, opaque);
+        /* prompt is printed on return from the command handler */
+        return 0;
+    } else {
+        monitor_printf(&mon->common,
+                       "terminal does not support password prompting\n");
+        return -ENOTTY;
+    }
+}
+
+static int get_str(char *buf, int buf_size, const char **pp)
+{
+    const char *p;
+    char *q;
+    int c;
+
+    q = buf;
+    p = *pp;
+    while (qemu_isspace(*p)) {
+        p++;
+    }
+    if (*p == '\0') {
+    fail:
+        *q = '\0';
+        *pp = p;
+        return -1;
+    }
+    if (*p == '\"') {
+        p++;
+        while (*p != '\0' && *p != '\"') {
+            if (*p == '\\') {
+                p++;
+                c = *p++;
+                switch (c) {
+                case 'n':
+                    c = '\n';
+                    break;
+                case 'r':
+                    c = '\r';
+                    break;
+                case '\\':
+                case '\'':
+                case '\"':
+                    break;
+                default:
+                    printf("unsupported escape code: '\\%c'\n", c);
+                    goto fail;
+                }
+                if ((q - buf) < buf_size - 1) {
+                    *q++ = c;
+                }
+            } else {
+                if ((q - buf) < buf_size - 1) {
+                    *q++ = *p;
+                }
+                p++;
+            }
+        }
+        if (*p != '\"') {
+            printf("unterminated string\n");
+            goto fail;
+        }
+        p++;
+    } else {
+        while (*p != '\0' && !qemu_isspace(*p)) {
+            if ((q - buf) < buf_size - 1) {
+                *q++ = *p;
+            }
+            p++;
+        }
+    }
+    *q = '\0';
+    *pp = p;
+    return 0;
+}
+
+#define MAX_ARGS 16
+
+static void free_cmdline_args(char **args, int nb_args)
+{
+    int i;
+
+    assert(nb_args <= MAX_ARGS);
+
+    for (i = 0; i < nb_args; i++) {
+        g_free(args[i]);
+    }
+
+}
+
+/*
+ * Parse the command line to get valid args.
+ * @cmdline: command line to be parsed.
+ * @pnb_args: location to store the number of args, must NOT be NULL.
+ * @args: location to store the args, which should be freed by caller, must
+ *        NOT be NULL.
+ *
+ * Returns 0 on success, negative on failure.
+ *
+ * NOTE: this parser is an approximate form of the real command parser. Number
+ *       of args have a limit of MAX_ARGS. If cmdline contains more, it will
+ *       return with failure.
+ */
+static int parse_cmdline(const char *cmdline,
+                         int *pnb_args, char **args)
+{
+    const char *p;
+    int nb_args, ret;
+    char buf[1024];
+
+    p = cmdline;
+    nb_args = 0;
+    for (;;) {
+        while (qemu_isspace(*p)) {
+            p++;
+        }
+        if (*p == '\0') {
+            break;
+        }
+        if (nb_args >= MAX_ARGS) {
+            goto fail;
+        }
+        ret = get_str(buf, sizeof(buf), &p);
+        if (ret < 0) {
+            goto fail;
+        }
+        args[nb_args] = g_strdup(buf);
+        nb_args++;
+    }
+    *pnb_args = nb_args;
+    return 0;
+
+ fail:
+    free_cmdline_args(args, nb_args);
+    return -1;
+}
+
+/*
+ * Can command @cmd be executed in preconfig state?
+ */
+static bool cmd_can_preconfig(const HMPCommand *cmd)
+{
+    if (!cmd->flags) {
+        return false;
+    }
+
+    return strchr(cmd->flags, 'p');
+}
+
+static void help_cmd_dump_one(Monitor *mon,
+                              const HMPCommand *cmd,
+                              char **prefix_args,
+                              int prefix_args_nb)
+{
+    int i;
+
+    if (runstate_check(RUN_STATE_PRECONFIG) && !cmd_can_preconfig(cmd)) {
+        return;
+    }
+
+    for (i = 0; i < prefix_args_nb; i++) {
+        monitor_printf(mon, "%s ", prefix_args[i]);
+    }
+    monitor_printf(mon, "%s %s -- %s\n", cmd->name, cmd->params, cmd->help);
+}
+
+/* @args[@arg_index] is the valid command need to find in @cmds */
+static void help_cmd_dump(Monitor *mon, const HMPCommand *cmds,
+                          char **args, int nb_args, int arg_index)
+{
+    const HMPCommand *cmd;
+    size_t i;
+
+    /* No valid arg need to compare with, dump all in *cmds */
+    if (arg_index >= nb_args) {
+        for (cmd = cmds; cmd->name != NULL; cmd++) {
+            help_cmd_dump_one(mon, cmd, args, arg_index);
+        }
+        return;
+    }
+
+    /* Find one entry to dump */
+    for (cmd = cmds; cmd->name != NULL; cmd++) {
+        if (hmp_compare_cmd(args[arg_index], cmd->name) &&
+            ((!runstate_check(RUN_STATE_PRECONFIG) ||
+                cmd_can_preconfig(cmd)))) {
+            if (cmd->sub_table) {
+                /* continue with next arg */
+                help_cmd_dump(mon, cmd->sub_table,
+                              args, nb_args, arg_index + 1);
+            } else {
+                help_cmd_dump_one(mon, cmd, args, arg_index);
+            }
+            return;
+        }
+    }
+
+    /* Command not found */
+    monitor_printf(mon, "unknown command: '");
+    for (i = 0; i <= arg_index; i++) {
+        monitor_printf(mon, "%s%s", args[i], i == arg_index ? "'\n" : " ");
+    }
+}
+
+void help_cmd(Monitor *mon, const char *name)
+{
+    char *args[MAX_ARGS];
+    int nb_args = 0;
+
+    /* 1. parse user input */
+    if (name) {
+        /* special case for log, directly dump and return */
+        if (!strcmp(name, "log")) {
+            const QEMULogItem *item;
+            monitor_printf(mon, "Log items (comma separated):\n");
+            monitor_printf(mon, "%-10s %s\n", "none", "remove all logs");
+            for (item = qemu_log_items; item->mask != 0; item++) {
+                monitor_printf(mon, "%-10s %s\n", item->name, item->help);
+            }
+            return;
+        }
+
+        if (parse_cmdline(name, &nb_args, args) < 0) {
+            return;
+        }
+    }
+
+    /* 2. dump the contents according to parsed args */
+    help_cmd_dump(mon, hmp_cmds, args, nb_args, 0);
+
+    free_cmdline_args(args, nb_args);
+}
+
+/*******************************************************************/
+
+static const char *pch;
+static sigjmp_buf expr_env;
+
+static void GCC_FMT_ATTR(2, 3) QEMU_NORETURN
+expr_error(Monitor *mon, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    monitor_vprintf(mon, fmt, ap);
+    monitor_printf(mon, "\n");
+    va_end(ap);
+    siglongjmp(expr_env, 1);
+}
+
+static void next(void)
+{
+    if (*pch != '\0') {
+        pch++;
+        while (qemu_isspace(*pch)) {
+            pch++;
+        }
+    }
+}
+
+static int64_t expr_sum(Monitor *mon);
+
+static int64_t expr_unary(Monitor *mon)
+{
+    int64_t n;
+    char *p;
+    int ret;
+
+    switch (*pch) {
+    case '+':
+        next();
+        n = expr_unary(mon);
+        break;
+    case '-':
+        next();
+        n = -expr_unary(mon);
+        break;
+    case '~':
+        next();
+        n = ~expr_unary(mon);
+        break;
+    case '(':
+        next();
+        n = expr_sum(mon);
+        if (*pch != ')') {
+            expr_error(mon, "')' expected");
+        }
+        next();
+        break;
+    case '\'':
+        pch++;
+        if (*pch == '\0') {
+            expr_error(mon, "character constant expected");
+        }
+        n = *pch;
+        pch++;
+        if (*pch != '\'') {
+            expr_error(mon, "missing terminating \' character");
+        }
+        next();
+        break;
+    case '$':
+        {
+            char buf[128], *q;
+            int64_t reg = 0;
+
+            pch++;
+            q = buf;
+            while ((*pch >= 'a' && *pch <= 'z') ||
+                   (*pch >= 'A' && *pch <= 'Z') ||
+                   (*pch >= '0' && *pch <= '9') ||
+                   *pch == '_' || *pch == '.') {
+                if ((q - buf) < sizeof(buf) - 1) {
+                    *q++ = *pch;
+                }
+                pch++;
+            }
+            while (qemu_isspace(*pch)) {
+                pch++;
+            }
+            *q = 0;
+            ret = get_monitor_def(mon, ®, buf);
+            if (ret < 0) {
+                expr_error(mon, "unknown register");
+            }
+            n = reg;
+        }
+        break;
+    case '\0':
+        expr_error(mon, "unexpected end of expression");
+        n = 0;
+        break;
+    default:
+        errno = 0;
+        n = strtoull(pch, &p, 0);
+        if (errno == ERANGE) {
+            expr_error(mon, "number too large");
+        }
+        if (pch == p) {
+            expr_error(mon, "invalid char '%c' in expression", *p);
+        }
+        pch = p;
+        while (qemu_isspace(*pch)) {
+            pch++;
+        }
+        break;
+    }
+    return n;
+}
+
+static int64_t expr_prod(Monitor *mon)
+{
+    int64_t val, val2;
+    int op;
+
+    val = expr_unary(mon);
+    for (;;) {
+        op = *pch;
+        if (op != '*' && op != '/' && op != '%') {
+            break;
+        }
+        next();
+        val2 = expr_unary(mon);
+        switch (op) {
+        default:
+        case '*':
+            val *= val2;
+            break;
+        case '/':
+        case '%':
+            if (val2 == 0) {
+                expr_error(mon, "division by zero");
+            }
+            if (op == '/') {
+                val /= val2;
+            } else {
+                val %= val2;
+            }
+            break;
+        }
+    }
+    return val;
+}
+
+static int64_t expr_logic(Monitor *mon)
+{
+    int64_t val, val2;
+    int op;
+
+    val = expr_prod(mon);
+    for (;;) {
+        op = *pch;
+        if (op != '&' && op != '|' && op != '^') {
+            break;
+        }
+        next();
+        val2 = expr_prod(mon);
+        switch (op) {
+        default:
+        case '&':
+            val &= val2;
+            break;
+        case '|':
+            val |= val2;
+            break;
+        case '^':
+            val ^= val2;
+            break;
+        }
+    }
+    return val;
+}
+
+static int64_t expr_sum(Monitor *mon)
+{
+    int64_t val, val2;
+    int op;
+
+    val = expr_logic(mon);
+    for (;;) {
+        op = *pch;
+        if (op != '+' && op != '-') {
+            break;
+        }
+        next();
+        val2 = expr_logic(mon);
+        if (op == '+') {
+            val += val2;
+        } else {
+            val -= val2;
+        }
+    }
+    return val;
+}
+
+static int get_expr(Monitor *mon, int64_t *pval, const char **pp)
+{
+    pch = *pp;
+    if (sigsetjmp(expr_env, 0)) {
+        *pp = pch;
+        return -1;
+    }
+    while (qemu_isspace(*pch)) {
+        pch++;
+    }
+    *pval = expr_sum(mon);
+    *pp = pch;
+    return 0;
+}
+
+static int get_double(Monitor *mon, double *pval, const char **pp)
+{
+    const char *p = *pp;
+    char *tailp;
+    double d;
+
+    d = strtod(p, &tailp);
+    if (tailp == p) {
+        monitor_printf(mon, "Number expected\n");
+        return -1;
+    }
+    if (d != d || d - d != 0) {
+        /* NaN or infinity */
+        monitor_printf(mon, "Bad number\n");
+        return -1;
+    }
+    *pval = d;
+    *pp = tailp;
+    return 0;
+}
+
+/*
+ * Store the command-name in cmdname, and return a pointer to
+ * the remaining of the command string.
+ */
+static const char *get_command_name(const char *cmdline,
+                                    char *cmdname, size_t nlen)
+{
+    size_t len;
+    const char *p, *pstart;
+
+    p = cmdline;
+    while (qemu_isspace(*p)) {
+        p++;
+    }
+    if (*p == '\0') {
+        return NULL;
+    }
+    pstart = p;
+    while (*p != '\0' && *p != '/' && !qemu_isspace(*p)) {
+        p++;
+    }
+    len = p - pstart;
+    if (len > nlen - 1) {
+        len = nlen - 1;
+    }
+    memcpy(cmdname, pstart, len);
+    cmdname[len] = '\0';
+    return p;
+}
+
+/**
+ * Read key of 'type' into 'key' and return the current
+ * 'type' pointer.
+ */
+static char *key_get_info(const char *type, char **key)
+{
+    size_t len;
+    char *p, *str;
+
+    if (*type == ',') {
+        type++;
+    }
+
+    p = strchr(type, ':');
+    if (!p) {
+        *key = NULL;
+        return NULL;
+    }
+    len = p - type;
+
+    str = g_malloc(len + 1);
+    memcpy(str, type, len);
+    str[len] = '\0';
+
+    *key = str;
+    return ++p;
+}
+
+static int default_fmt_format = 'x';
+static int default_fmt_size = 4;
+
+static int is_valid_option(const char *c, const char *typestr)
+{
+    char option[3];
+
+    option[0] = '-';
+    option[1] = *c;
+    option[2] = '\0';
+
+    typestr = strstr(typestr, option);
+    return (typestr != NULL);
+}
+
+static const HMPCommand *search_dispatch_table(const HMPCommand *disp_table,
+                                               const char *cmdname)
+{
+    const HMPCommand *cmd;
+
+    for (cmd = disp_table; cmd->name != NULL; cmd++) {
+        if (hmp_compare_cmd(cmdname, cmd->name)) {
+            return cmd;
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Parse command name from @cmdp according to command table @table.
+ * If blank, return NULL.
+ * Else, if no valid command can be found, report to @mon, and return
+ * NULL.
+ * Else, change @cmdp to point right behind the name, and return its
+ * command table entry.
+ * Do not assume the return value points into @table!  It doesn't when
+ * the command is found in a sub-command table.
+ */
+static const HMPCommand *monitor_parse_command(MonitorHMP *hmp_mon,
+                                               const char *cmdp_start,
+                                               const char **cmdp,
+                                               HMPCommand *table)
+{
+    Monitor *mon = &hmp_mon->common;
+    const char *p;
+    const HMPCommand *cmd;
+    char cmdname[256];
+
+    /* extract the command name */
+    p = get_command_name(*cmdp, cmdname, sizeof(cmdname));
+    if (!p) {
+        return NULL;
+    }
+
+    cmd = search_dispatch_table(table, cmdname);
+    if (!cmd) {
+        monitor_printf(mon, "unknown command: '%.*s'\n",
+                       (int)(p - cmdp_start), cmdp_start);
+        return NULL;
+    }
+    if (runstate_check(RUN_STATE_PRECONFIG) && !cmd_can_preconfig(cmd)) {
+        monitor_printf(mon, "Command '%.*s' not available with -preconfig "
+                            "until after exit_preconfig.\n",
+                       (int)(p - cmdp_start), cmdp_start);
+        return NULL;
+    }
+
+    /* filter out following useless space */
+    while (qemu_isspace(*p)) {
+        p++;
+    }
+
+    *cmdp = p;
+    /* search sub command */
+    if (cmd->sub_table != NULL && *p != '\0') {
+        return monitor_parse_command(hmp_mon, cmdp_start, cmdp, cmd->sub_table);
+    }
+
+    return cmd;
+}
+
+/*
+ * Parse arguments for @cmd.
+ * If it can't be parsed, report to @mon, and return NULL.
+ * Else, insert command arguments into a QDict, and return it.
+ * Note: On success, caller has to free the QDict structure.
+ */
+static QDict *monitor_parse_arguments(Monitor *mon,
+                                      const char **endp,
+                                      const HMPCommand *cmd)
+{
+    const char *typestr;
+    char *key;
+    int c;
+    const char *p = *endp;
+    char buf[1024];
+    QDict *qdict = qdict_new();
+
+    /* parse the parameters */
+    typestr = cmd->args_type;
+    for (;;) {
+        typestr = key_get_info(typestr, &key);
+        if (!typestr) {
+            break;
+        }
+        c = *typestr;
+        typestr++;
+        switch (c) {
+        case 'F':
+        case 'B':
+        case 's':
+            {
+                int ret;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*typestr == '?') {
+                    typestr++;
+                    if (*p == '\0') {
+                        /* no optional string: NULL argument */
+                        break;
+                    }
+                }
+                ret = get_str(buf, sizeof(buf), &p);
+                if (ret < 0) {
+                    switch (c) {
+                    case 'F':
+                        monitor_printf(mon, "%s: filename expected\n",
+                                       cmd->name);
+                        break;
+                    case 'B':
+                        monitor_printf(mon, "%s: block device name expected\n",
+                                       cmd->name);
+                        break;
+                    default:
+                        monitor_printf(mon, "%s: string expected\n", cmd->name);
+                        break;
+                    }
+                    goto fail;
+                }
+                qdict_put_str(qdict, key, buf);
+            }
+            break;
+        case 'O':
+            {
+                QemuOptsList *opts_list;
+                QemuOpts *opts;
+
+                opts_list = qemu_find_opts(key);
+                if (!opts_list || opts_list->desc->name) {
+                    goto bad_type;
+                }
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (!*p) {
+                    break;
+                }
+                if (get_str(buf, sizeof(buf), &p) < 0) {
+                    goto fail;
+                }
+                opts = qemu_opts_parse_noisily(opts_list, buf, true);
+                if (!opts) {
+                    goto fail;
+                }
+                qemu_opts_to_qdict(opts, qdict);
+                qemu_opts_del(opts);
+            }
+            break;
+        case '/':
+            {
+                int count, format, size;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*p == '/') {
+                    /* format found */
+                    p++;
+                    count = 1;
+                    if (qemu_isdigit(*p)) {
+                        count = 0;
+                        while (qemu_isdigit(*p)) {
+                            count = count * 10 + (*p - '0');
+                            p++;
+                        }
+                    }
+                    size = -1;
+                    format = -1;
+                    for (;;) {
+                        switch (*p) {
+                        case 'o':
+                        case 'd':
+                        case 'u':
+                        case 'x':
+                        case 'i':
+                        case 'c':
+                            format = *p++;
+                            break;
+                        case 'b':
+                            size = 1;
+                            p++;
+                            break;
+                        case 'h':
+                            size = 2;
+                            p++;
+                            break;
+                        case 'w':
+                            size = 4;
+                            p++;
+                            break;
+                        case 'g':
+                        case 'L':
+                            size = 8;
+                            p++;
+                            break;
+                        default:
+                            goto next;
+                        }
+                    }
+                next:
+                    if (*p != '\0' && !qemu_isspace(*p)) {
+                        monitor_printf(mon, "invalid char in format: '%c'\n",
+                                       *p);
+                        goto fail;
+                    }
+                    if (format < 0) {
+                        format = default_fmt_format;
+                    }
+                    if (format != 'i') {
+                        /* for 'i', not specifying a size gives -1 as size */
+                        if (size < 0) {
+                            size = default_fmt_size;
+                        }
+                        default_fmt_size = size;
+                    }
+                    default_fmt_format = format;
+                } else {
+                    count = 1;
+                    format = default_fmt_format;
+                    if (format != 'i') {
+                        size = default_fmt_size;
+                    } else {
+                        size = -1;
+                    }
+                }
+                qdict_put_int(qdict, "count", count);
+                qdict_put_int(qdict, "format", format);
+                qdict_put_int(qdict, "size", size);
+            }
+            break;
+        case 'i':
+        case 'l':
+        case 'M':
+            {
+                int64_t val;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*typestr == '?' || *typestr == '.') {
+                    if (*typestr == '?') {
+                        if (*p == '\0') {
+                            typestr++;
+                            break;
+                        }
+                    } else {
+                        if (*p == '.') {
+                            p++;
+                            while (qemu_isspace(*p)) {
+                                p++;
+                            }
+                        } else {
+                            typestr++;
+                            break;
+                        }
+                    }
+                    typestr++;
+                }
+                if (get_expr(mon, &val, &p)) {
+                    goto fail;
+                }
+                /* Check if 'i' is greater than 32-bit */
+                if ((c == 'i') && ((val >> 32) & 0xffffffff)) {
+                    monitor_printf(mon, "\'%s\' has failed: ", cmd->name);
+                    monitor_printf(mon, "integer is for 32-bit values\n");
+                    goto fail;
+                } else if (c == 'M') {
+                    if (val < 0) {
+                        monitor_printf(mon, "enter a positive value\n");
+                        goto fail;
+                    }
+                    val *= MiB;
+                }
+                qdict_put_int(qdict, key, val);
+            }
+            break;
+        case 'o':
+            {
+                int ret;
+                uint64_t val;
+                const char *end;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*typestr == '?') {
+                    typestr++;
+                    if (*p == '\0') {
+                        break;
+                    }
+                }
+                ret = qemu_strtosz_MiB(p, &end, &val);
+                if (ret < 0 || val > INT64_MAX) {
+                    monitor_printf(mon, "invalid size\n");
+                    goto fail;
+                }
+                qdict_put_int(qdict, key, val);
+                p = end;
+            }
+            break;
+        case 'T':
+            {
+                double val;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*typestr == '?') {
+                    typestr++;
+                    if (*p == '\0') {
+                        break;
+                    }
+                }
+                if (get_double(mon, &val, &p) < 0) {
+                    goto fail;
+                }
+                if (p[0] && p[1] == 's') {
+                    switch (*p) {
+                    case 'm':
+                        val /= 1e3; p += 2; break;
+                    case 'u':
+                        val /= 1e6; p += 2; break;
+                    case 'n':
+                        val /= 1e9; p += 2; break;
+                    }
+                }
+                if (*p && !qemu_isspace(*p)) {
+                    monitor_printf(mon, "Unknown unit suffix\n");
+                    goto fail;
+                }
+                qdict_put(qdict, key, qnum_from_double(val));
+            }
+            break;
+        case 'b':
+            {
+                const char *beg;
+                bool val;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                beg = p;
+                while (qemu_isgraph(*p)) {
+                    p++;
+                }
+                if (p - beg == 2 && !memcmp(beg, "on", p - beg)) {
+                    val = true;
+                } else if (p - beg == 3 && !memcmp(beg, "off", p - beg)) {
+                    val = false;
+                } else {
+                    monitor_printf(mon, "Expected 'on' or 'off'\n");
+                    goto fail;
+                }
+                qdict_put_bool(qdict, key, val);
+            }
+            break;
+        case '-':
+            {
+                const char *tmp = p;
+                int skip_key = 0;
+                /* option */
+
+                c = *typestr++;
+                if (c == '\0') {
+                    goto bad_type;
+                }
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*p == '-') {
+                    p++;
+                    if (c != *p) {
+                        if (!is_valid_option(p, typestr)) {
+                            monitor_printf(mon, "%s: unsupported option -%c\n",
+                                           cmd->name, *p);
+                            goto fail;
+                        } else {
+                            skip_key = 1;
+                        }
+                    }
+                    if (skip_key) {
+                        p = tmp;
+                    } else {
+                        /* has option */
+                        p++;
+                        qdict_put_bool(qdict, key, true);
+                    }
+                }
+            }
+            break;
+        case 'S':
+            {
+                /* package all remaining string */
+                int len;
+
+                while (qemu_isspace(*p)) {
+                    p++;
+                }
+                if (*typestr == '?') {
+                    typestr++;
+                    if (*p == '\0') {
+                        /* no remaining string: NULL argument */
+                        break;
+                    }
+                }
+                len = strlen(p);
+                if (len <= 0) {
+                    monitor_printf(mon, "%s: string expected\n",
+                                   cmd->name);
+                    goto fail;
+                }
+                qdict_put_str(qdict, key, p);
+                p += len;
+            }
+            break;
+        default:
+        bad_type:
+            monitor_printf(mon, "%s: unknown type '%c'\n", cmd->name, c);
+            goto fail;
+        }
+        g_free(key);
+        key = NULL;
+    }
+    /* check that all arguments were parsed */
+    while (qemu_isspace(*p)) {
+        p++;
+    }
+    if (*p != '\0') {
+        monitor_printf(mon, "%s: extraneous characters at the end of line\n",
+                       cmd->name);
+        goto fail;
+    }
+
+    return qdict;
+
+fail:
+    qobject_unref(qdict);
+    g_free(key);
+    return NULL;
+}
+
+typedef struct HandleHmpCommandCo {
+    Monitor *mon;
+    const HMPCommand *cmd;
+    QDict *qdict;
+    bool done;
+} HandleHmpCommandCo;
+
+static void handle_hmp_command_co(void *opaque)
+{
+    HandleHmpCommandCo *data = opaque;
+    data->cmd->cmd(data->mon, data->qdict);
+    monitor_set_cur(qemu_coroutine_self(), NULL);
+    data->done = true;
+}
+
+void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
+{
+    QDict *qdict;
+    const HMPCommand *cmd;
+    const char *cmd_start = cmdline;
+
+    trace_handle_hmp_command(mon, cmdline);
+
+    cmd = monitor_parse_command(mon, cmdline, &cmdline, hmp_cmds);
+    if (!cmd) {
+        return;
+    }
+
+    qdict = monitor_parse_arguments(&mon->common, &cmdline, cmd);
+    if (!qdict) {
+        while (cmdline > cmd_start && qemu_isspace(cmdline[-1])) {
+            cmdline--;
+        }
+        monitor_printf(&mon->common, "Try \"help %.*s\" for more information\n",
+                       (int)(cmdline - cmd_start), cmd_start);
+        return;
+    }
+
+    if (!cmd->coroutine) {
+        /* old_mon is non-NULL when called from qmp_human_monitor_command() */
+        Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
+        cmd->cmd(&mon->common, qdict);
+        monitor_set_cur(qemu_coroutine_self(), old_mon);
+    } else {
+        HandleHmpCommandCo data = {
+            .mon = &mon->common,
+            .cmd = cmd,
+            .qdict = qdict,
+            .done = false,
+        };
+        Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
+        monitor_set_cur(co, &mon->common);
+        aio_co_enter(qemu_get_aio_context(), co);
+        AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
+    }
+
+    qobject_unref(qdict);
+}
+
+static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
+{
+    const char *p, *pstart;
+    char cmd[128];
+    int len;
+
+    p = list;
+    for (;;) {
+        pstart = p;
+        p = qemu_strchrnul(p, '|');
+        len = p - pstart;
+        if (len > sizeof(cmd) - 2) {
+            len = sizeof(cmd) - 2;
+        }
+        memcpy(cmd, pstart, len);
+        cmd[len] = '\0';
+        if (name[0] == '\0' || !strncmp(name, cmd, strlen(name))) {
+            readline_add_completion(mon->rs, cmd);
+        }
+        if (*p == '\0') {
+            break;
+        }
+        p++;
+    }
+}
+
+static void file_completion(MonitorHMP *mon, const char *input)
+{
+    DIR *ffs;
+    struct dirent *d;
+    char path[1024];
+    char file[1024], file_prefix[1024];
+    int input_path_len;
+    const char *p;
+
+    p = strrchr(input, '/');
+    if (!p) {
+        input_path_len = 0;
+        pstrcpy(file_prefix, sizeof(file_prefix), input);
+        pstrcpy(path, sizeof(path), ".");
+    } else {
+        input_path_len = p - input + 1;
+        memcpy(path, input, input_path_len);
+        if (input_path_len > sizeof(path) - 1) {
+            input_path_len = sizeof(path) - 1;
+        }
+        path[input_path_len] = '\0';
+        pstrcpy(file_prefix, sizeof(file_prefix), p + 1);
+    }
+
+    ffs = opendir(path);
+    if (!ffs) {
+        return;
+    }
+    for (;;) {
+        struct stat sb;
+        d = readdir(ffs);
+        if (!d) {
+            break;
+        }
+
+        if (strcmp(d->d_name, ".") == 0 || strcmp(d->d_name, "..") == 0) {
+            continue;
+        }
+
+        if (strstart(d->d_name, file_prefix, NULL)) {
+            memcpy(file, input, input_path_len);
+            if (input_path_len < sizeof(file)) {
+                pstrcpy(file + input_path_len, sizeof(file) - input_path_len,
+                        d->d_name);
+            }
+            /*
+             * stat the file to find out if it's a directory.
+             * In that case add a slash to speed up typing long paths
+             */
+            if (stat(file, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+                pstrcat(file, sizeof(file), "/");
+            }
+            readline_add_completion(mon->rs, file);
+        }
+    }
+    closedir(ffs);
+}
+
+static const char *next_arg_type(const char *typestr)
+{
+    const char *p = strchr(typestr, ':');
+    return (p != NULL ? ++p : typestr);
+}
+
+static void monitor_find_completion_by_table(MonitorHMP *mon,
+                                             const HMPCommand *cmd_table,
+                                             char **args,
+                                             int nb_args)
+{
+    const char *cmdname;
+    int i;
+    const char *ptype, *old_ptype, *str, *name;
+    const HMPCommand *cmd;
+    BlockBackend *blk = NULL;
+
+    if (nb_args <= 1) {
+        /* command completion */
+        if (nb_args == 0) {
+            cmdname = "";
+        } else {
+            cmdname = args[0];
+        }
+        readline_set_completion_index(mon->rs, strlen(cmdname));
+        for (cmd = cmd_table; cmd->name != NULL; cmd++) {
+            if (!runstate_check(RUN_STATE_PRECONFIG) ||
+                 cmd_can_preconfig(cmd)) {
+                cmd_completion(mon, cmdname, cmd->name);
+            }
+        }
+    } else {
+        /* find the command */
+        for (cmd = cmd_table; cmd->name != NULL; cmd++) {
+            if (hmp_compare_cmd(args[0], cmd->name) &&
+                (!runstate_check(RUN_STATE_PRECONFIG) ||
+                 cmd_can_preconfig(cmd))) {
+                break;
+            }
+        }
+        if (!cmd->name) {
+            return;
+        }
+
+        if (cmd->sub_table) {
+            /* do the job again */
+            monitor_find_completion_by_table(mon, cmd->sub_table,
+                                             &args[1], nb_args - 1);
+            return;
+        }
+        if (cmd->command_completion) {
+            cmd->command_completion(mon->rs, nb_args, args[nb_args - 1]);
+            return;
+        }
+
+        ptype = next_arg_type(cmd->args_type);
+        for (i = 0; i < nb_args - 2; i++) {
+            if (*ptype != '\0') {
+                ptype = next_arg_type(ptype);
+                while (*ptype == '?') {
+                    ptype = next_arg_type(ptype);
+                }
+            }
+        }
+        str = args[nb_args - 1];
+        old_ptype = NULL;
+        while (*ptype == '-' && old_ptype != ptype) {
+            old_ptype = ptype;
+            ptype = next_arg_type(ptype);
+        }
+        switch (*ptype) {
+        case 'F':
+            /* file completion */
+            readline_set_completion_index(mon->rs, strlen(str));
+            file_completion(mon, str);
+            break;
+        case 'B':
+            /* block device name completion */
+            readline_set_completion_index(mon->rs, strlen(str));
+            while ((blk = blk_next(blk)) != NULL) {
+                name = blk_name(blk);
+                if (str[0] == '\0' ||
+                    !strncmp(name, str, strlen(str))) {
+                    readline_add_completion(mon->rs, name);
+                }
+            }
+            break;
+        case 's':
+        case 'S':
+            if (!strcmp(cmd->name, "help|?")) {
+                monitor_find_completion_by_table(mon, cmd_table,
+                                                 &args[1], nb_args - 1);
+            }
+            break;
+        default:
+            break;
+        }
+    }
+}
+
+static void monitor_find_completion(void *opaque,
+                                    const char *cmdline)
+{
+    MonitorHMP *mon = opaque;
+    char *args[MAX_ARGS];
+    int nb_args, len;
+
+    /* 1. parse the cmdline */
+    if (parse_cmdline(cmdline, &nb_args, args) < 0) {
+        return;
+    }
+
+    /*
+     * if the line ends with a space, it means we want to complete the
+     * next arg
+     */
+    len = strlen(cmdline);
+    if (len > 0 && qemu_isspace(cmdline[len - 1])) {
+        if (nb_args >= MAX_ARGS) {
+            goto cleanup;
+        }
+        args[nb_args++] = g_strdup("");
+    }
+
+    /* 2. auto complete according to args */
+    monitor_find_completion_by_table(mon, hmp_cmds, args, nb_args);
+
+cleanup:
+    free_cmdline_args(args, nb_args);
+}
+
+static void monitor_read(void *opaque, const uint8_t *buf, int size)
+{
+    MonitorHMP *mon = container_of(opaque, MonitorHMP, common);
+    int i;
+
+    if (mon->rs) {
+        for (i = 0; i < size; i++) {
+            readline_handle_byte(mon->rs, buf[i]);
+        }
+    } else {
+        if (size == 0 || buf[size - 1] != 0) {
+            monitor_printf(&mon->common, "corrupted command\n");
+        } else {
+            handle_hmp_command(mon, (char *)buf);
+        }
+    }
+}
+
+static void monitor_event(void *opaque, QEMUChrEvent event)
+{
+    Monitor *mon = opaque;
+    MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+
+    switch (event) {
+    case CHR_EVENT_MUX_IN:
+        qemu_mutex_lock(&mon->mon_lock);
+        mon->mux_out = 0;
+        qemu_mutex_unlock(&mon->mon_lock);
+        if (mon->reset_seen) {
+            readline_restart(hmp_mon->rs);
+            monitor_resume(mon);
+            monitor_flush(mon);
+        } else {
+            qatomic_mb_set(&mon->suspend_cnt, 0);
+        }
+        break;
+
+    case CHR_EVENT_MUX_OUT:
+        if (mon->reset_seen) {
+            if (qatomic_mb_read(&mon->suspend_cnt) == 0) {
+                monitor_printf(mon, "\n");
+            }
+            monitor_flush(mon);
+            monitor_suspend(mon);
+        } else {
+            qatomic_inc(&mon->suspend_cnt);
+        }
+        qemu_mutex_lock(&mon->mon_lock);
+        mon->mux_out = 1;
+        qemu_mutex_unlock(&mon->mon_lock);
+        break;
+
+    case CHR_EVENT_OPENED:
+        monitor_printf(mon, "QEMU %s monitor - type 'help' for more "
+                       "information\n", QEMU_VERSION);
+        if (!mon->mux_out) {
+            readline_restart(hmp_mon->rs);
+            readline_show_prompt(hmp_mon->rs);
+        }
+        mon->reset_seen = 1;
+        mon_refcount++;
+        break;
+
+    case CHR_EVENT_CLOSED:
+        mon_refcount--;
+        monitor_fdsets_cleanup();
+        break;
+
+    case CHR_EVENT_BREAK:
+        /* Ignored */
+        break;
+    }
+}
+
+
+/*
+ * These functions just adapt the readline interface in a typesafe way.  We
+ * could cast function pointers but that discards compiler checks.
+ */
+static void GCC_FMT_ATTR(2, 3) monitor_readline_printf(void *opaque,
+                                                       const char *fmt, ...)
+{
+    MonitorHMP *mon = opaque;
+    va_list ap;
+    va_start(ap, fmt);
+    monitor_vprintf(&mon->common, fmt, ap);
+    va_end(ap);
+}
+
+static void monitor_readline_flush(void *opaque)
+{
+    MonitorHMP *mon = opaque;
+    monitor_flush(&mon->common);
+}
+
+void monitor_init_hmp(Chardev *chr, bool use_readline, Error **errp)
+{
+    MonitorHMP *mon = g_new0(MonitorHMP, 1);
+
+    if (!qemu_chr_fe_init(&mon->common.chr, chr, errp)) {
+        g_free(mon);
+        return;
+    }
+
+    monitor_data_init(&mon->common, false, false, false);
+
+    mon->use_readline = use_readline;
+    if (mon->use_readline) {
+        mon->rs = readline_init(monitor_readline_printf,
+                                monitor_readline_flush,
+                                mon,
+                                monitor_find_completion);
+        monitor_read_command(mon, 0);
+    }
+
+    qemu_chr_fe_set_handlers(&mon->common.chr, monitor_can_read, monitor_read,
+                             monitor_event, NULL, &mon->common, NULL, true);
+    monitor_list_append(&mon->common);
+}
diff --git a/monitor/meson.build b/monitor/meson.build
new file mode 100644
index 000000000..6d00985ac
--- /dev/null
+++ b/monitor/meson.build
@@ -0,0 +1,9 @@
+qmp_ss.add(files('monitor.c', 'qmp.c', 'qmp-cmds-control.c'))
+
+softmmu_ss.add(files(
+  'hmp-cmds.c',
+  'hmp.c',
+))
+softmmu_ss.add([spice_headers, files('qmp-cmds.c')])
+
+specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files('misc.c'), spice])
diff --git a/monitor/misc.c b/monitor/misc.c
new file mode 100644
index 000000000..398211a03
--- /dev/null
+++ b/monitor/misc.c
@@ -0,0 +1,2185 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor-internal.h"
+#include "cpu.h"
+#include "monitor/qdev.h"
+#include "hw/usb.h"
+#include "hw/pci/pci.h"
+#include "sysemu/watchdog.h"
+#include "hw/loader.h"
+#include "exec/gdbstub.h"
+#include "net/net.h"
+#include "net/slirp.h"
+#include "ui/qemu-spice.h"
+#include "qemu/config-file.h"
+#include "qemu/ctype.h"
+#include "ui/console.h"
+#include "ui/input.h"
+#include "audio/audio.h"
+#include "disas/disas.h"
+#include "sysemu/balloon.h"
+#include "qemu/timer.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "authz/list.h"
+#include "qapi/util.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/tcg.h"
+#include "sysemu/tpm.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qom/object_interfaces.h"
+#include "trace/control.h"
+#include "monitor/hmp-target.h"
+#include "monitor/hmp.h"
+#ifdef CONFIG_TRACE_SIMPLE
+#include "trace/simple.h"
+#endif
+#include "exec/memory.h"
+#include "exec/exec-all.h"
+#include "qemu/option.h"
+#include "qemu/thread.h"
+#include "block/qapi.h"
+#include "block/block-hmp-cmds.h"
+#include "qapi/qapi-commands-char.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-qom.h"
+#include "qapi/qapi-commands-trace.h"
+#include "qapi/qapi-init-commands.h"
+#include "qapi/error.h"
+#include "qapi/qmp-event.h"
+#include "sysemu/cpus.h"
+#include "qemu/cutils.h"
+#include "tcg/tcg.h"
+
+#if defined(TARGET_S390X)
+#include "hw/s390x/storage-keys.h"
+#include "hw/s390x/storage-attributes.h"
+#endif
+
+/* file descriptors passed via SCM_RIGHTS */
+typedef struct mon_fd_t mon_fd_t;
+struct mon_fd_t {
+    char *name;
+    int fd;
+    QLIST_ENTRY(mon_fd_t) next;
+};
+
+/* file descriptor associated with a file descriptor set */
+typedef struct MonFdsetFd MonFdsetFd;
+struct MonFdsetFd {
+    int fd;
+    bool removed;
+    char *opaque;
+    QLIST_ENTRY(MonFdsetFd) next;
+};
+
+/* file descriptor set containing fds passed via SCM_RIGHTS */
+typedef struct MonFdset MonFdset;
+struct MonFdset {
+    int64_t id;
+    QLIST_HEAD(, MonFdsetFd) fds;
+    QLIST_HEAD(, MonFdsetFd) dup_fds;
+    QLIST_ENTRY(MonFdset) next;
+};
+
+/* Protects mon_fdsets */
+static QemuMutex mon_fdsets_lock;
+static QLIST_HEAD(, MonFdset) mon_fdsets;
+
+static HMPCommand hmp_info_cmds[];
+
+char *qmp_human_monitor_command(const char *command_line, bool has_cpu_index,
+                                int64_t cpu_index, Error **errp)
+{
+    char *output = NULL;
+    MonitorHMP hmp = {};
+
+    monitor_data_init(&hmp.common, false, true, false);
+
+    if (has_cpu_index) {
+        int ret = monitor_set_cpu(&hmp.common, cpu_index);
+        if (ret < 0) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+                       "a CPU number");
+            goto out;
+        }
+    }
+
+    handle_hmp_command(&hmp, command_line);
+
+    WITH_QEMU_LOCK_GUARD(&hmp.common.mon_lock) {
+        if (qstring_get_length(hmp.common.outbuf) > 0) {
+            output = g_strdup(qstring_get_str(hmp.common.outbuf));
+        } else {
+            output = g_strdup("");
+        }
+    }
+
+out:
+    monitor_data_destroy(&hmp.common);
+    return output;
+}
+
+/**
+ * Is @name in the '|' separated list of names @list?
+ */
+int hmp_compare_cmd(const char *name, const char *list)
+{
+    const char *p, *pstart;
+    int len;
+    len = strlen(name);
+    p = list;
+    for (;;) {
+        pstart = p;
+        p = qemu_strchrnul(p, '|');
+        if ((p - pstart) == len && !memcmp(pstart, name, len)) {
+            return 1;
+        }
+        if (*p == '\0') {
+            break;
+        }
+        p++;
+    }
+    return 0;
+}
+
+static void do_help_cmd(Monitor *mon, const QDict *qdict)
+{
+    help_cmd(mon, qdict_get_try_str(qdict, "name"));
+}
+
+static void hmp_trace_event(Monitor *mon, const QDict *qdict)
+{
+    const char *tp_name = qdict_get_str(qdict, "name");
+    bool new_state = qdict_get_bool(qdict, "option");
+    bool has_vcpu = qdict_haskey(qdict, "vcpu");
+    int vcpu = qdict_get_try_int(qdict, "vcpu", 0);
+    Error *local_err = NULL;
+
+    if (vcpu < 0) {
+        monitor_printf(mon, "argument vcpu must be positive");
+        return;
+    }
+
+    qmp_trace_event_set_state(tp_name, new_state, true, true, has_vcpu, vcpu, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+}
+
+#ifdef CONFIG_TRACE_SIMPLE
+static void hmp_trace_file(Monitor *mon, const QDict *qdict)
+{
+    const char *op = qdict_get_try_str(qdict, "op");
+    const char *arg = qdict_get_try_str(qdict, "arg");
+
+    if (!op) {
+        st_print_trace_file_status();
+    } else if (!strcmp(op, "on")) {
+        st_set_trace_file_enabled(true);
+    } else if (!strcmp(op, "off")) {
+        st_set_trace_file_enabled(false);
+    } else if (!strcmp(op, "flush")) {
+        st_flush_trace_buffer();
+    } else if (!strcmp(op, "set")) {
+        if (arg) {
+            st_set_trace_file(arg);
+        }
+    } else {
+        monitor_printf(mon, "unexpected argument \"%s\"\n", op);
+        help_cmd(mon, "trace-file");
+    }
+}
+#endif
+
+static void hmp_info_help(Monitor *mon, const QDict *qdict)
+{
+    help_cmd(mon, "info");
+}
+
+static void monitor_init_qmp_commands(void)
+{
+    /*
+     * Two command lists:
+     * - qmp_commands contains all QMP commands
+     * - qmp_cap_negotiation_commands contains just
+     *   "qmp_capabilities", to enforce capability negotiation
+     */
+
+    qmp_init_marshal(&qmp_commands);
+
+    qmp_register_command(&qmp_commands, "query-qmp-schema",
+                         qmp_query_qmp_schema, QCO_ALLOW_PRECONFIG);
+    qmp_register_command(&qmp_commands, "device_add", qmp_device_add,
+                         QCO_NO_OPTIONS);
+    qmp_register_command(&qmp_commands, "object-add", qmp_object_add,
+                         QCO_NO_OPTIONS);
+
+    QTAILQ_INIT(&qmp_cap_negotiation_commands);
+    qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
+                         qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
+}
+
+/* Set the current CPU defined by the user. Callers must hold BQL. */
+int monitor_set_cpu(Monitor *mon, int cpu_index)
+{
+    CPUState *cpu;
+
+    cpu = qemu_get_cpu(cpu_index);
+    if (cpu == NULL) {
+        return -1;
+    }
+    g_free(mon->mon_cpu_path);
+    mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
+    return 0;
+}
+
+/* Callers must hold BQL. */
+static CPUState *mon_get_cpu_sync(Monitor *mon, bool synchronize)
+{
+    CPUState *cpu = NULL;
+
+    if (mon->mon_cpu_path) {
+        cpu = (CPUState *) object_resolve_path_type(mon->mon_cpu_path,
+                                                    TYPE_CPU, NULL);
+        if (!cpu) {
+            g_free(mon->mon_cpu_path);
+            mon->mon_cpu_path = NULL;
+        }
+    }
+    if (!mon->mon_cpu_path) {
+        if (!first_cpu) {
+            return NULL;
+        }
+        monitor_set_cpu(mon, first_cpu->cpu_index);
+        cpu = first_cpu;
+    }
+    assert(cpu != NULL);
+    if (synchronize) {
+        cpu_synchronize_state(cpu);
+    }
+    return cpu;
+}
+
+CPUState *mon_get_cpu(Monitor *mon)
+{
+    return mon_get_cpu_sync(mon, true);
+}
+
+CPUArchState *mon_get_cpu_env(Monitor *mon)
+{
+    CPUState *cs = mon_get_cpu(mon);
+
+    return cs ? cs->env_ptr : NULL;
+}
+
+int monitor_get_cpu_index(Monitor *mon)
+{
+    CPUState *cs = mon_get_cpu_sync(mon, false);
+
+    return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
+}
+
+static void hmp_info_registers(Monitor *mon, const QDict *qdict)
+{
+    bool all_cpus = qdict_get_try_bool(qdict, "cpustate_all", false);
+    CPUState *cs;
+
+    if (all_cpus) {
+        CPU_FOREACH(cs) {
+            monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
+            cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+        }
+    } else {
+        cs = mon_get_cpu(mon);
+
+        if (!cs) {
+            monitor_printf(mon, "No CPU available\n");
+            return;
+        }
+
+        cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+    }
+}
+
+#ifdef CONFIG_TCG
+static void hmp_info_jit(Monitor *mon, const QDict *qdict)
+{
+    if (!tcg_enabled()) {
+        error_report("JIT information is only available with accel=tcg");
+        return;
+    }
+
+    dump_exec_info();
+    dump_drift_info();
+}
+
+static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
+{
+    dump_opcount_info();
+}
+#endif
+
+static void hmp_info_sync_profile(Monitor *mon, const QDict *qdict)
+{
+    int64_t max = qdict_get_try_int(qdict, "max", 10);
+    bool mean = qdict_get_try_bool(qdict, "mean", false);
+    bool coalesce = !qdict_get_try_bool(qdict, "no_coalesce", false);
+    enum QSPSortBy sort_by;
+
+    sort_by = mean ? QSP_SORT_BY_AVG_WAIT_TIME : QSP_SORT_BY_TOTAL_WAIT_TIME;
+    qsp_report(max, sort_by, coalesce);
+}
+
+static void hmp_info_history(Monitor *mon, const QDict *qdict)
+{
+    MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+    int i;
+    const char *str;
+
+    if (!hmp_mon->rs) {
+        return;
+    }
+    i = 0;
+    for(;;) {
+        str = readline_get_history(hmp_mon->rs, i);
+        if (!str) {
+            break;
+        }
+        monitor_printf(mon, "%d: '%s'\n", i, str);
+        i++;
+    }
+}
+
+static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
+{
+    CPUState *cs = mon_get_cpu(mon);
+
+    if (!cs) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+    cpu_dump_statistics(cs, 0);
+}
+
+static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
+{
+    const char *name = qdict_get_try_str(qdict, "name");
+    bool has_vcpu = qdict_haskey(qdict, "vcpu");
+    int vcpu = qdict_get_try_int(qdict, "vcpu", 0);
+    TraceEventInfoList *events;
+    TraceEventInfoList *elem;
+    Error *local_err = NULL;
+
+    if (name == NULL) {
+        name = "*";
+    }
+    if (vcpu < 0) {
+        monitor_printf(mon, "argument vcpu must be positive");
+        return;
+    }
+
+    events = qmp_trace_event_get_state(name, has_vcpu, vcpu, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    for (elem = events; elem != NULL; elem = elem->next) {
+        monitor_printf(mon, "%s : state %u\n",
+                       elem->value->name,
+                       elem->value->state == TRACE_EVENT_STATE_ENABLED ? 1 : 0);
+    }
+    qapi_free_TraceEventInfoList(events);
+}
+
+void qmp_client_migrate_info(const char *protocol, const char *hostname,
+                             bool has_port, int64_t port,
+                             bool has_tls_port, int64_t tls_port,
+                             bool has_cert_subject, const char *cert_subject,
+                             Error **errp)
+{
+    if (strcmp(protocol, "spice") == 0) {
+        if (!qemu_using_spice(errp)) {
+            return;
+        }
+
+        if (!has_port && !has_tls_port) {
+            error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
+            return;
+        }
+
+        if (qemu_spice.migrate_info(hostname,
+                                    has_port ? port : -1,
+                                    has_tls_port ? tls_port : -1,
+                                    cert_subject)) {
+            error_setg(errp, QERR_UNDEFINED_ERROR);
+            return;
+        }
+        return;
+    }
+
+    error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "spice");
+}
+
+static void hmp_logfile(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qemu_set_log_filename(qdict_get_str(qdict, "filename"), &err);
+    if (err) {
+        error_report_err(err);
+    }
+}
+
+static void hmp_log(Monitor *mon, const QDict *qdict)
+{
+    int mask;
+    const char *items = qdict_get_str(qdict, "items");
+
+    if (!strcmp(items, "none")) {
+        mask = 0;
+    } else {
+        mask = qemu_str_to_log_mask(items);
+        if (!mask) {
+            help_cmd(mon, "log");
+            return;
+        }
+    }
+    qemu_set_log(mask);
+}
+
+static void hmp_singlestep(Monitor *mon, const QDict *qdict)
+{
+    const char *option = qdict_get_try_str(qdict, "option");
+    if (!option || !strcmp(option, "on")) {
+        singlestep = 1;
+    } else if (!strcmp(option, "off")) {
+        singlestep = 0;
+    } else {
+        monitor_printf(mon, "unexpected option %s\n", option);
+    }
+}
+
+static void hmp_gdbserver(Monitor *mon, const QDict *qdict)
+{
+    const char *device = qdict_get_try_str(qdict, "device");
+    if (!device)
+        device = "tcp::" DEFAULT_GDBSTUB_PORT;
+    if (gdbserver_start(device) < 0) {
+        monitor_printf(mon, "Could not open gdbserver on device '%s'\n",
+                       device);
+    } else if (strcmp(device, "none") == 0) {
+        monitor_printf(mon, "Disabled gdbserver\n");
+    } else {
+        monitor_printf(mon, "Waiting for gdb connection on device '%s'\n",
+                       device);
+    }
+}
+
+static void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
+{
+    const char *action = qdict_get_str(qdict, "action");
+    if (select_watchdog_action(action) == -1) {
+        monitor_printf(mon, "Unknown watchdog action '%s'\n", action);
+    }
+}
+
+static void monitor_printc(Monitor *mon, int c)
+{
+    monitor_printf(mon, "'");
+    switch(c) {
+    case '\'':
+        monitor_printf(mon, "\\'");
+        break;
+    case '\\':
+        monitor_printf(mon, "\\\\");
+        break;
+    case '\n':
+        monitor_printf(mon, "\\n");
+        break;
+    case '\r':
+        monitor_printf(mon, "\\r");
+        break;
+    default:
+        if (c >= 32 && c <= 126) {
+            monitor_printf(mon, "%c", c);
+        } else {
+            monitor_printf(mon, "\\x%02x", c);
+        }
+        break;
+    }
+    monitor_printf(mon, "'");
+}
+
+static void memory_dump(Monitor *mon, int count, int format, int wsize,
+                        hwaddr addr, int is_physical)
+{
+    int l, line_size, i, max_digits, len;
+    uint8_t buf[16];
+    uint64_t v;
+    CPUState *cs = mon_get_cpu(mon);
+
+    if (!cs && (format == 'i' || !is_physical)) {
+        monitor_printf(mon, "Can not dump without CPU\n");
+        return;
+    }
+
+    if (format == 'i') {
+        monitor_disas(mon, cs, addr, count, is_physical);
+        return;
+    }
+
+    len = wsize * count;
+    if (wsize == 1)
+        line_size = 8;
+    else
+        line_size = 16;
+    max_digits = 0;
+
+    switch(format) {
+    case 'o':
+        max_digits = DIV_ROUND_UP(wsize * 8, 3);
+        break;
+    default:
+    case 'x':
+        max_digits = (wsize * 8) / 4;
+        break;
+    case 'u':
+    case 'd':
+        max_digits = DIV_ROUND_UP(wsize * 8 * 10, 33);
+        break;
+    case 'c':
+        wsize = 1;
+        break;
+    }
+
+    while (len > 0) {
+        if (is_physical)
+            monitor_printf(mon, TARGET_FMT_plx ":", addr);
+        else
+            monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr);
+        l = len;
+        if (l > line_size)
+            l = line_size;
+        if (is_physical) {
+            AddressSpace *as = cs ? cs->as : &address_space_memory;
+            MemTxResult r = address_space_read(as, addr,
+                                               MEMTXATTRS_UNSPECIFIED, buf, l);
+            if (r != MEMTX_OK) {
+                monitor_printf(mon, " Cannot access memory\n");
+                break;
+            }
+        } else {
+            if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
+                monitor_printf(mon, " Cannot access memory\n");
+                break;
+            }
+        }
+        i = 0;
+        while (i < l) {
+            switch(wsize) {
+            default:
+            case 1:
+                v = ldub_p(buf + i);
+                break;
+            case 2:
+                v = lduw_p(buf + i);
+                break;
+            case 4:
+                v = (uint32_t)ldl_p(buf + i);
+                break;
+            case 8:
+                v = ldq_p(buf + i);
+                break;
+            }
+            monitor_printf(mon, " ");
+            switch(format) {
+            case 'o':
+                monitor_printf(mon, "%#*" PRIo64, max_digits, v);
+                break;
+            case 'x':
+                monitor_printf(mon, "0x%0*" PRIx64, max_digits, v);
+                break;
+            case 'u':
+                monitor_printf(mon, "%*" PRIu64, max_digits, v);
+                break;
+            case 'd':
+                monitor_printf(mon, "%*" PRId64, max_digits, v);
+                break;
+            case 'c':
+                monitor_printc(mon, v);
+                break;
+            }
+            i += wsize;
+        }
+        monitor_printf(mon, "\n");
+        addr += l;
+        len -= l;
+    }
+}
+
+static void hmp_memory_dump(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    target_long addr = qdict_get_int(qdict, "addr");
+
+    memory_dump(mon, count, format, size, addr, 0);
+}
+
+static void hmp_physical_memory_dump(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    hwaddr addr = qdict_get_int(qdict, "addr");
+
+    memory_dump(mon, count, format, size, addr, 1);
+}
+
+static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
+{
+    MemoryRegionSection mrs = memory_region_find(get_system_memory(),
+                                                 addr, 1);
+
+    if (!mrs.mr) {
+        error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, addr);
+        return NULL;
+    }
+
+    if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
+        error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", addr);
+        memory_region_unref(mrs.mr);
+        return NULL;
+    }
+
+    *p_mr = mrs.mr;
+    return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
+}
+
+static void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
+{
+    hwaddr addr = qdict_get_int(qdict, "addr");
+    Error *local_err = NULL;
+    MemoryRegion *mr = NULL;
+    void *ptr;
+
+    ptr = gpa2hva(&mr, addr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    monitor_printf(mon, "Host virtual address for 0x%" HWADDR_PRIx
+                   " (%s) is %p\n",
+                   addr, mr->name, ptr);
+
+    memory_region_unref(mr);
+}
+
+static void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
+{
+    target_ulong addr = qdict_get_int(qdict, "addr");
+    MemTxAttrs attrs;
+    CPUState *cs = mon_get_cpu(mon);
+    hwaddr gpa;
+
+    if (!cs) {
+        monitor_printf(mon, "No cpu\n");
+        return;
+    }
+
+    gpa  = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
+    if (gpa == -1) {
+        monitor_printf(mon, "Unmapped\n");
+    } else {
+        monitor_printf(mon, "gpa: %#" HWADDR_PRIx "\n",
+                       gpa + (addr & ~TARGET_PAGE_MASK));
+    }
+}
+
+#ifdef CONFIG_LINUX
+static uint64_t vtop(void *ptr, Error **errp)
+{
+    uint64_t pinfo;
+    uint64_t ret = -1;
+    uintptr_t addr = (uintptr_t) ptr;
+    uintptr_t pagesize = qemu_real_host_page_size;
+    off_t offset = addr / pagesize * sizeof(pinfo);
+    int fd;
+
+    fd = open("/proc/self/pagemap", O_RDONLY);
+    if (fd == -1) {
+        error_setg_errno(errp, errno, "Cannot open /proc/self/pagemap");
+        return -1;
+    }
+
+    /* Force copy-on-write if necessary.  */
+    qatomic_add((uint8_t *)ptr, 0);
+
+    if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+        error_setg_errno(errp, errno, "Cannot read pagemap");
+        goto out;
+    }
+    if ((pinfo & (1ull << 63)) == 0) {
+        error_setg(errp, "Page not present");
+        goto out;
+    }
+    ret = ((pinfo & 0x007fffffffffffffull) * pagesize) | (addr & (pagesize - 1));
+
+out:
+    close(fd);
+    return ret;
+}
+
+static void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
+{
+    hwaddr addr = qdict_get_int(qdict, "addr");
+    Error *local_err = NULL;
+    MemoryRegion *mr = NULL;
+    void *ptr;
+    uint64_t physaddr;
+
+    ptr = gpa2hva(&mr, addr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    physaddr = vtop(ptr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        monitor_printf(mon, "Host physical address for 0x%" HWADDR_PRIx
+                       " (%s) is 0x%" PRIx64 "\n",
+                       addr, mr->name, (uint64_t) physaddr);
+    }
+
+    memory_region_unref(mr);
+}
+#endif
+
+static void do_print(Monitor *mon, const QDict *qdict)
+{
+    int format = qdict_get_int(qdict, "format");
+    hwaddr val = qdict_get_int(qdict, "val");
+
+    switch(format) {
+    case 'o':
+        monitor_printf(mon, "%#" HWADDR_PRIo, val);
+        break;
+    case 'x':
+        monitor_printf(mon, "%#" HWADDR_PRIx, val);
+        break;
+    case 'u':
+        monitor_printf(mon, "%" HWADDR_PRIu, val);
+        break;
+    default:
+    case 'd':
+        monitor_printf(mon, "%" HWADDR_PRId, val);
+        break;
+    case 'c':
+        monitor_printc(mon, val);
+        break;
+    }
+    monitor_printf(mon, "\n");
+}
+
+static void hmp_sum(Monitor *mon, const QDict *qdict)
+{
+    uint32_t addr;
+    uint16_t sum;
+    uint32_t start = qdict_get_int(qdict, "start");
+    uint32_t size = qdict_get_int(qdict, "size");
+
+    sum = 0;
+    for(addr = start; addr < (start + size); addr++) {
+        uint8_t val = address_space_ldub(&address_space_memory, addr,
+                                         MEMTXATTRS_UNSPECIFIED, NULL);
+        /* BSD sum algorithm ('sum' Unix command) */
+        sum = (sum >> 1) | (sum << 15);
+        sum += val;
+    }
+    monitor_printf(mon, "%05d\n", sum);
+}
+
+static int mouse_button_state;
+
+static void hmp_mouse_move(Monitor *mon, const QDict *qdict)
+{
+    int dx, dy, dz, button;
+    const char *dx_str = qdict_get_str(qdict, "dx_str");
+    const char *dy_str = qdict_get_str(qdict, "dy_str");
+    const char *dz_str = qdict_get_try_str(qdict, "dz_str");
+
+    dx = strtol(dx_str, NULL, 0);
+    dy = strtol(dy_str, NULL, 0);
+    qemu_input_queue_rel(NULL, INPUT_AXIS_X, dx);
+    qemu_input_queue_rel(NULL, INPUT_AXIS_Y, dy);
+
+    if (dz_str) {
+        dz = strtol(dz_str, NULL, 0);
+        if (dz != 0) {
+            button = (dz > 0) ? INPUT_BUTTON_WHEEL_UP : INPUT_BUTTON_WHEEL_DOWN;
+            qemu_input_queue_btn(NULL, button, true);
+            qemu_input_event_sync();
+            qemu_input_queue_btn(NULL, button, false);
+        }
+    }
+    qemu_input_event_sync();
+}
+
+static void hmp_mouse_button(Monitor *mon, const QDict *qdict)
+{
+    static uint32_t bmap[INPUT_BUTTON__MAX] = {
+        [INPUT_BUTTON_LEFT]       = MOUSE_EVENT_LBUTTON,
+        [INPUT_BUTTON_MIDDLE]     = MOUSE_EVENT_MBUTTON,
+        [INPUT_BUTTON_RIGHT]      = MOUSE_EVENT_RBUTTON,
+    };
+    int button_state = qdict_get_int(qdict, "button_state");
+
+    if (mouse_button_state == button_state) {
+        return;
+    }
+    qemu_input_update_buttons(NULL, bmap, mouse_button_state, button_state);
+    qemu_input_event_sync();
+    mouse_button_state = button_state;
+}
+
+static void hmp_ioport_read(Monitor *mon, const QDict *qdict)
+{
+    int size = qdict_get_int(qdict, "size");
+    int addr = qdict_get_int(qdict, "addr");
+    int has_index = qdict_haskey(qdict, "index");
+    uint32_t val;
+    int suffix;
+
+    if (has_index) {
+        int index = qdict_get_int(qdict, "index");
+        cpu_outb(addr & IOPORTS_MASK, index & 0xff);
+        addr++;
+    }
+    addr &= 0xffff;
+
+    switch(size) {
+    default:
+    case 1:
+        val = cpu_inb(addr);
+        suffix = 'b';
+        break;
+    case 2:
+        val = cpu_inw(addr);
+        suffix = 'w';
+        break;
+    case 4:
+        val = cpu_inl(addr);
+        suffix = 'l';
+        break;
+    }
+    monitor_printf(mon, "port%c[0x%04x] = %#0*x\n",
+                   suffix, addr, size * 2, val);
+}
+
+static void hmp_ioport_write(Monitor *mon, const QDict *qdict)
+{
+    int size = qdict_get_int(qdict, "size");
+    int addr = qdict_get_int(qdict, "addr");
+    int val = qdict_get_int(qdict, "val");
+
+    addr &= IOPORTS_MASK;
+
+    switch (size) {
+    default:
+    case 1:
+        cpu_outb(addr, val);
+        break;
+    case 2:
+        cpu_outw(addr, val);
+        break;
+    case 4:
+        cpu_outl(addr, val);
+        break;
+    }
+}
+
+static void hmp_boot_set(Monitor *mon, const QDict *qdict)
+{
+    Error *local_err = NULL;
+    const char *bootdevice = qdict_get_str(qdict, "bootdevice");
+
+    qemu_boot_set(bootdevice, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        monitor_printf(mon, "boot device list now set to %s\n", bootdevice);
+    }
+}
+
+static void hmp_info_mtree(Monitor *mon, const QDict *qdict)
+{
+    bool flatview = qdict_get_try_bool(qdict, "flatview", false);
+    bool dispatch_tree = qdict_get_try_bool(qdict, "dispatch_tree", false);
+    bool owner = qdict_get_try_bool(qdict, "owner", false);
+    bool disabled = qdict_get_try_bool(qdict, "disabled", false);
+
+    mtree_info(flatview, dispatch_tree, owner, disabled);
+}
+
+#ifdef CONFIG_PROFILER
+
+int64_t dev_time;
+
+static void hmp_info_profile(Monitor *mon, const QDict *qdict)
+{
+    static int64_t last_cpu_exec_time;
+    int64_t cpu_exec_time;
+    int64_t delta;
+
+    cpu_exec_time = tcg_cpu_exec_time();
+    delta = cpu_exec_time - last_cpu_exec_time;
+
+    monitor_printf(mon, "async time  %" PRId64 " (%0.3f)\n",
+                   dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
+    monitor_printf(mon, "qemu time   %" PRId64 " (%0.3f)\n",
+                   delta, delta / (double)NANOSECONDS_PER_SECOND);
+    last_cpu_exec_time = cpu_exec_time;
+    dev_time = 0;
+}
+#else
+static void hmp_info_profile(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "Internal profiler not compiled\n");
+}
+#endif
+
+/* Capture support */
+static QLIST_HEAD (capture_list_head, CaptureState) capture_head;
+
+static void hmp_info_capture(Monitor *mon, const QDict *qdict)
+{
+    int i;
+    CaptureState *s;
+
+    for (s = capture_head.lh_first, i = 0; s; s = s->entries.le_next, ++i) {
+        monitor_printf(mon, "[%d]: ", i);
+        s->ops.info (s->opaque);
+    }
+}
+
+static void hmp_stopcapture(Monitor *mon, const QDict *qdict)
+{
+    int i;
+    int n = qdict_get_int(qdict, "n");
+    CaptureState *s;
+
+    for (s = capture_head.lh_first, i = 0; s; s = s->entries.le_next, ++i) {
+        if (i == n) {
+            s->ops.destroy (s->opaque);
+            QLIST_REMOVE (s, entries);
+            g_free (s);
+            return;
+        }
+    }
+}
+
+static void hmp_wavcapture(Monitor *mon, const QDict *qdict)
+{
+    const char *path = qdict_get_str(qdict, "path");
+    int freq = qdict_get_try_int(qdict, "freq", 44100);
+    int bits = qdict_get_try_int(qdict, "bits", 16);
+    int nchannels = qdict_get_try_int(qdict, "nchannels", 2);
+    const char *audiodev = qdict_get_str(qdict, "audiodev");
+    CaptureState *s;
+    AudioState *as = audio_state_by_name(audiodev);
+
+    if (!as) {
+        monitor_printf(mon, "Audiodev '%s' not found\n", audiodev);
+        return;
+    }
+
+    s = g_malloc0 (sizeof (*s));
+
+    if (wav_start_capture(as, s, path, freq, bits, nchannels)) {
+        monitor_printf(mon, "Failed to add wave capture\n");
+        g_free (s);
+        return;
+    }
+    QLIST_INSERT_HEAD (&capture_head, s, entries);
+}
+
+static QAuthZList *find_auth(Monitor *mon, const char *name)
+{
+    Object *obj;
+    Object *container;
+
+    container = object_get_objects_root();
+    obj = object_resolve_path_component(container, name);
+    if (!obj) {
+        monitor_printf(mon, "acl: unknown list '%s'\n", name);
+        return NULL;
+    }
+
+    return QAUTHZ_LIST(obj);
+}
+
+static bool warn_acl;
+static void hmp_warn_acl(void)
+{
+    if (warn_acl) {
+        return;
+    }
+    error_report("The acl_show, acl_reset, acl_policy, acl_add, acl_remove "
+                 "commands are deprecated with no replacement. Authorization "
+                 "for VNC should be performed using the pluggable QAuthZ "
+                 "objects");
+    warn_acl = true;
+}
+
+static void hmp_acl_show(Monitor *mon, const QDict *qdict)
+{
+    const char *aclname = qdict_get_str(qdict, "aclname");
+    QAuthZList *auth = find_auth(mon, aclname);
+    QAuthZListRuleList *rules;
+    size_t i = 0;
+
+    hmp_warn_acl();
+
+    if (!auth) {
+        return;
+    }
+
+    monitor_printf(mon, "policy: %s\n",
+                   QAuthZListPolicy_str(auth->policy));
+
+    rules = auth->rules;
+    while (rules) {
+        QAuthZListRule *rule = rules->value;
+        i++;
+        monitor_printf(mon, "%zu: %s %s\n", i,
+                       QAuthZListPolicy_str(rule->policy),
+                       rule->match);
+        rules = rules->next;
+    }
+}
+
+static void hmp_acl_reset(Monitor *mon, const QDict *qdict)
+{
+    const char *aclname = qdict_get_str(qdict, "aclname");
+    QAuthZList *auth = find_auth(mon, aclname);
+
+    hmp_warn_acl();
+
+    if (!auth) {
+        return;
+    }
+
+    auth->policy = QAUTHZ_LIST_POLICY_DENY;
+    qapi_free_QAuthZListRuleList(auth->rules);
+    auth->rules = NULL;
+    monitor_printf(mon, "acl: removed all rules\n");
+}
+
+static void hmp_acl_policy(Monitor *mon, const QDict *qdict)
+{
+    const char *aclname = qdict_get_str(qdict, "aclname");
+    const char *policy = qdict_get_str(qdict, "policy");
+    QAuthZList *auth = find_auth(mon, aclname);
+    int val;
+    Error *err = NULL;
+
+    hmp_warn_acl();
+
+    if (!auth) {
+        return;
+    }
+
+    val = qapi_enum_parse(&QAuthZListPolicy_lookup,
+                          policy,
+                          QAUTHZ_LIST_POLICY_DENY,
+                          &err);
+    if (err) {
+        error_free(err);
+        monitor_printf(mon, "acl: unknown policy '%s', "
+                       "expected 'deny' or 'allow'\n", policy);
+    } else {
+        auth->policy = val;
+        if (auth->policy == QAUTHZ_LIST_POLICY_ALLOW) {
+            monitor_printf(mon, "acl: policy set to 'allow'\n");
+        } else {
+            monitor_printf(mon, "acl: policy set to 'deny'\n");
+        }
+    }
+}
+
+static QAuthZListFormat hmp_acl_get_format(const char *match)
+{
+    if (strchr(match, '*')) {
+        return QAUTHZ_LIST_FORMAT_GLOB;
+    } else {
+        return QAUTHZ_LIST_FORMAT_EXACT;
+    }
+}
+
+static void hmp_acl_add(Monitor *mon, const QDict *qdict)
+{
+    const char *aclname = qdict_get_str(qdict, "aclname");
+    const char *match = qdict_get_str(qdict, "match");
+    const char *policystr = qdict_get_str(qdict, "policy");
+    int has_index = qdict_haskey(qdict, "index");
+    int index = qdict_get_try_int(qdict, "index", -1);
+    QAuthZList *auth = find_auth(mon, aclname);
+    Error *err = NULL;
+    QAuthZListPolicy policy;
+    QAuthZListFormat format;
+    size_t i = 0;
+
+    hmp_warn_acl();
+
+    if (!auth) {
+        return;
+    }
+
+    policy = qapi_enum_parse(&QAuthZListPolicy_lookup,
+                             policystr,
+                             QAUTHZ_LIST_POLICY_DENY,
+                             &err);
+    if (err) {
+        error_free(err);
+        monitor_printf(mon, "acl: unknown policy '%s', "
+                       "expected 'deny' or 'allow'\n", policystr);
+        return;
+    }
+
+    format = hmp_acl_get_format(match);
+
+    if (has_index && index == 0) {
+        monitor_printf(mon, "acl: unable to add acl entry\n");
+        return;
+    }
+
+    if (has_index) {
+        i = qauthz_list_insert_rule(auth, match, policy,
+                                    format, index - 1, &err);
+    } else {
+        i = qauthz_list_append_rule(auth, match, policy,
+                                    format, &err);
+    }
+    if (err) {
+        monitor_printf(mon, "acl: unable to add rule: %s",
+                       error_get_pretty(err));
+        error_free(err);
+    } else {
+        monitor_printf(mon, "acl: added rule at position %zu\n", i + 1);
+    }
+}
+
+static void hmp_acl_remove(Monitor *mon, const QDict *qdict)
+{
+    const char *aclname = qdict_get_str(qdict, "aclname");
+    const char *match = qdict_get_str(qdict, "match");
+    QAuthZList *auth = find_auth(mon, aclname);
+    ssize_t i = 0;
+
+    hmp_warn_acl();
+
+    if (!auth) {
+        return;
+    }
+
+    i = qauthz_list_delete_rule(auth, match);
+    if (i >= 0) {
+        monitor_printf(mon, "acl: removed rule at position %zu\n", i + 1);
+    } else {
+        monitor_printf(mon, "acl: no matching acl entry\n");
+    }
+}
+
+void qmp_getfd(const char *fdname, Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    mon_fd_t *monfd;
+    int fd, tmp_fd;
+
+    fd = qemu_chr_fe_get_msgfd(&cur_mon->chr);
+    if (fd == -1) {
+        error_setg(errp, QERR_FD_NOT_SUPPLIED);
+        return;
+    }
+
+    if (qemu_isdigit(fdname[0])) {
+        close(fd);
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdname",
+                   "a name not starting with a digit");
+        return;
+    }
+
+    QEMU_LOCK_GUARD(&cur_mon->mon_lock);
+    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        tmp_fd = monfd->fd;
+        monfd->fd = fd;
+        /* Make sure close() is outside critical section */
+        close(tmp_fd);
+        return;
+    }
+
+    monfd = g_malloc0(sizeof(mon_fd_t));
+    monfd->name = g_strdup(fdname);
+    monfd->fd = fd;
+
+    QLIST_INSERT_HEAD(&cur_mon->fds, monfd, next);
+}
+
+void qmp_closefd(const char *fdname, Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    mon_fd_t *monfd;
+    int tmp_fd;
+
+    qemu_mutex_lock(&cur_mon->mon_lock);
+    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        QLIST_REMOVE(monfd, next);
+        tmp_fd = monfd->fd;
+        g_free(monfd->name);
+        g_free(monfd);
+        qemu_mutex_unlock(&cur_mon->mon_lock);
+        /* Make sure close() is outside critical section */
+        close(tmp_fd);
+        return;
+    }
+
+    qemu_mutex_unlock(&cur_mon->mon_lock);
+    error_setg(errp, QERR_FD_NOT_FOUND, fdname);
+}
+
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
+{
+    mon_fd_t *monfd;
+
+    QEMU_LOCK_GUARD(&mon->mon_lock);
+    QLIST_FOREACH(monfd, &mon->fds, next) {
+        int fd;
+
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        fd = monfd->fd;
+
+        /* caller takes ownership of fd */
+        QLIST_REMOVE(monfd, next);
+        g_free(monfd->name);
+        g_free(monfd);
+
+        return fd;
+    }
+
+    error_setg(errp, "File descriptor named '%s' has not been found", fdname);
+    return -1;
+}
+
+static void monitor_fdset_cleanup(MonFdset *mon_fdset)
+{
+    MonFdsetFd *mon_fdset_fd;
+    MonFdsetFd *mon_fdset_fd_next;
+
+    QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) {
+        if ((mon_fdset_fd->removed ||
+                (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) &&
+                runstate_is_running()) {
+            close(mon_fdset_fd->fd);
+            g_free(mon_fdset_fd->opaque);
+            QLIST_REMOVE(mon_fdset_fd, next);
+            g_free(mon_fdset_fd);
+        }
+    }
+
+    if (QLIST_EMPTY(&mon_fdset->fds) && QLIST_EMPTY(&mon_fdset->dup_fds)) {
+        QLIST_REMOVE(mon_fdset, next);
+        g_free(mon_fdset);
+    }
+}
+
+void monitor_fdsets_cleanup(void)
+{
+    MonFdset *mon_fdset;
+    MonFdset *mon_fdset_next;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) {
+        monitor_fdset_cleanup(mon_fdset);
+    }
+}
+
+AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
+                      const char *opaque, Error **errp)
+{
+    int fd;
+    Monitor *mon = monitor_cur();
+    AddfdInfo *fdinfo;
+
+    fd = qemu_chr_fe_get_msgfd(&mon->chr);
+    if (fd == -1) {
+        error_setg(errp, QERR_FD_NOT_SUPPLIED);
+        goto error;
+    }
+
+    fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id,
+                                  has_opaque, opaque, errp);
+    if (fdinfo) {
+        return fdinfo;
+    }
+
+error:
+    if (fd != -1) {
+        close(fd);
+    }
+    return NULL;
+}
+
+void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd;
+    char fd_str[60];
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            if (has_fd) {
+                if (mon_fdset_fd->fd != fd) {
+                    continue;
+                }
+                mon_fdset_fd->removed = true;
+                break;
+            } else {
+                mon_fdset_fd->removed = true;
+            }
+        }
+        if (has_fd && !mon_fdset_fd) {
+            goto error;
+        }
+        monitor_fdset_cleanup(mon_fdset);
+        return;
+    }
+
+error:
+    if (has_fd) {
+        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64,
+                 fdset_id, fd);
+    } else {
+        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64, fdset_id);
+    }
+    error_setg(errp, QERR_FD_NOT_FOUND, fd_str);
+}
+
+FdsetInfoList *qmp_query_fdsets(Error **errp)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd;
+    FdsetInfoList *fdset_list = NULL;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        FdsetInfoList *fdset_info = g_malloc0(sizeof(*fdset_info));
+        FdsetFdInfoList *fdsetfd_list = NULL;
+
+        fdset_info->value = g_malloc0(sizeof(*fdset_info->value));
+        fdset_info->value->fdset_id = mon_fdset->id;
+
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            FdsetFdInfoList *fdsetfd_info;
+
+            fdsetfd_info = g_malloc0(sizeof(*fdsetfd_info));
+            fdsetfd_info->value = g_malloc0(sizeof(*fdsetfd_info->value));
+            fdsetfd_info->value->fd = mon_fdset_fd->fd;
+            if (mon_fdset_fd->opaque) {
+                fdsetfd_info->value->has_opaque = true;
+                fdsetfd_info->value->opaque = g_strdup(mon_fdset_fd->opaque);
+            } else {
+                fdsetfd_info->value->has_opaque = false;
+            }
+
+            fdsetfd_info->next = fdsetfd_list;
+            fdsetfd_list = fdsetfd_info;
+        }
+
+        fdset_info->value->fds = fdsetfd_list;
+
+        fdset_info->next = fdset_list;
+        fdset_list = fdset_info;
+    }
+
+    return fdset_list;
+}
+
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+                                bool has_opaque, const char *opaque,
+                                Error **errp)
+{
+    MonFdset *mon_fdset = NULL;
+    MonFdsetFd *mon_fdset_fd;
+    AddfdInfo *fdinfo;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    if (has_fdset_id) {
+        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+            /* Break if match found or match impossible due to ordering by ID */
+            if (fdset_id <= mon_fdset->id) {
+                if (fdset_id < mon_fdset->id) {
+                    mon_fdset = NULL;
+                }
+                break;
+            }
+        }
+    }
+
+    if (mon_fdset == NULL) {
+        int64_t fdset_id_prev = -1;
+        MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
+
+        if (has_fdset_id) {
+            if (fdset_id < 0) {
+                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
+                           "a non-negative value");
+                return NULL;
+            }
+            /* Use specified fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id < mon_fdset_cur->id) {
+                    break;
+                }
+            }
+        } else {
+            /* Use first available fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id_prev == mon_fdset_cur->id - 1) {
+                    fdset_id_prev = mon_fdset_cur->id;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        mon_fdset = g_malloc0(sizeof(*mon_fdset));
+        if (has_fdset_id) {
+            mon_fdset->id = fdset_id;
+        } else {
+            mon_fdset->id = fdset_id_prev + 1;
+        }
+
+        /* The fdset list is ordered by fdset ID */
+        if (!mon_fdset_cur) {
+            QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
+        } else if (mon_fdset->id < mon_fdset_cur->id) {
+            QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
+        } else {
+            QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next);
+        }
+    }
+
+    mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
+    mon_fdset_fd->fd = fd;
+    mon_fdset_fd->removed = false;
+    if (has_opaque) {
+        mon_fdset_fd->opaque = g_strdup(opaque);
+    }
+    QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
+
+    fdinfo = g_malloc0(sizeof(*fdinfo));
+    fdinfo->fdset_id = mon_fdset->id;
+    fdinfo->fd = mon_fdset_fd->fd;
+
+    return fdinfo;
+}
+
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
+{
+#ifdef _WIN32
+    return -ENOENT;
+#else
+    MonFdset *mon_fdset;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        MonFdsetFd *mon_fdset_fd;
+        MonFdsetFd *mon_fdset_fd_dup;
+        int fd = -1;
+        int dup_fd;
+        int mon_fd_flags;
+
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
+            if (mon_fd_flags == -1) {
+                return -1;
+            }
+
+            if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
+                fd = mon_fdset_fd->fd;
+                break;
+            }
+        }
+
+        if (fd == -1) {
+            errno = EACCES;
+            return -1;
+        }
+
+        dup_fd = qemu_dup_flags(fd, flags);
+        if (dup_fd == -1) {
+            return -1;
+        }
+
+        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
+        mon_fdset_fd_dup->fd = dup_fd;
+        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
+        return dup_fd;
+    }
+
+    errno = ENOENT;
+    return -1;
+#endif
+}
+
+static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd_dup;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+            if (mon_fdset_fd_dup->fd == dup_fd) {
+                if (remove) {
+                    QLIST_REMOVE(mon_fdset_fd_dup, next);
+                    g_free(mon_fdset_fd_dup);
+                    if (QLIST_EMPTY(&mon_fdset->dup_fds)) {
+                        monitor_fdset_cleanup(mon_fdset);
+                    }
+                    return -1;
+                } else {
+                    return mon_fdset->id;
+                }
+            }
+        }
+    }
+
+    return -1;
+}
+
+int64_t monitor_fdset_dup_fd_find(int dup_fd)
+{
+    return monitor_fdset_dup_fd_find_remove(dup_fd, false);
+}
+
+void monitor_fdset_dup_fd_remove(int dup_fd)
+{
+    monitor_fdset_dup_fd_find_remove(dup_fd, true);
+}
+
+int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp)
+{
+    int fd;
+    Error *local_err = NULL;
+
+    if (!qemu_isdigit(fdname[0]) && mon) {
+        fd = monitor_get_fd(mon, fdname, &local_err);
+    } else {
+        fd = qemu_parse_fd(fdname);
+        if (fd == -1) {
+            error_setg(&local_err, "Invalid file descriptor number '%s'",
+                       fdname);
+        }
+    }
+    if (local_err) {
+        error_propagate(errp, local_err);
+        assert(fd == -1);
+    } else {
+        assert(fd != -1);
+    }
+
+    return fd;
+}
+
+/* Please update hmp-commands.hx when adding or changing commands */
+static HMPCommand hmp_info_cmds[] = {
+#include "hmp-commands-info.h"
+    { NULL, NULL, },
+};
+
+/* hmp_cmds and hmp_info_cmds would be sorted at runtime */
+HMPCommand hmp_cmds[] = {
+#include "hmp-commands.h"
+    { NULL, NULL, },
+};
+
+/*
+ * Set @pval to the value in the register identified by @name.
+ * return 0 if OK, -1 if not found
+ */
+int get_monitor_def(Monitor *mon, int64_t *pval, const char *name)
+{
+    const MonitorDef *md = target_monitor_defs();
+    CPUState *cs = mon_get_cpu(mon);
+    void *ptr;
+    uint64_t tmp = 0;
+    int ret;
+
+    if (cs == NULL || md == NULL) {
+        return -1;
+    }
+
+    for(; md->name != NULL; md++) {
+        if (hmp_compare_cmd(name, md->name)) {
+            if (md->get_value) {
+                *pval = md->get_value(mon, md, md->offset);
+            } else {
+                CPUArchState *env = mon_get_cpu_env(mon);
+                ptr = (uint8_t *)env + md->offset;
+                switch(md->type) {
+                case MD_I32:
+                    *pval = *(int32_t *)ptr;
+                    break;
+                case MD_TLONG:
+                    *pval = *(target_long *)ptr;
+                    break;
+                default:
+                    *pval = 0;
+                    break;
+                }
+            }
+            return 0;
+        }
+    }
+
+    ret = target_get_monitor_def(cs, name, &tmp);
+    if (!ret) {
+        *pval = (target_long) tmp;
+    }
+
+    return ret;
+}
+
+static void add_completion_option(ReadLineState *rs, const char *str,
+                                  const char *option)
+{
+    if (!str || !option) {
+        return;
+    }
+    if (!strncmp(option, str, strlen(str))) {
+        readline_add_completion(rs, option);
+    }
+}
+
+void chardev_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+    ChardevBackendInfoList *list, *start;
+
+    if (nb_args != 2) {
+        return;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    start = list = qmp_query_chardev_backends(NULL);
+    while (list) {
+        const char *chr_name = list->value->name;
+
+        if (!strncmp(chr_name, str, len)) {
+            readline_add_completion(rs, chr_name);
+        }
+        list = list->next;
+    }
+    qapi_free_ChardevBackendInfoList(start);
+}
+
+void netdev_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+    int i;
+
+    if (nb_args != 2) {
+        return;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    for (i = 0; i < NET_CLIENT_DRIVER__MAX; i++) {
+        add_completion_option(rs, str, NetClientDriver_str(i));
+    }
+}
+
+void device_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    GSList *list, *elt;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    list = elt = object_class_get_list(TYPE_DEVICE, false);
+    while (elt) {
+        const char *name;
+        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
+                                             TYPE_DEVICE);
+        name = object_class_get_name(OBJECT_CLASS(dc));
+
+        if (dc->user_creatable
+            && !strncmp(name, str, len)) {
+            readline_add_completion(rs, name);
+        }
+        elt = elt->next;
+    }
+    g_slist_free(list);
+}
+
+void object_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    GSList *list, *elt;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    list = elt = object_class_get_list(TYPE_USER_CREATABLE, false);
+    while (elt) {
+        const char *name;
+
+        name = object_class_get_name(OBJECT_CLASS(elt->data));
+        if (!strncmp(name, str, len) && strcmp(name, TYPE_USER_CREATABLE)) {
+            readline_add_completion(rs, name);
+        }
+        elt = elt->next;
+    }
+    g_slist_free(list);
+}
+
+static int qdev_add_hotpluggable_device(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+    DeviceState *dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
+
+    if (dev == NULL) {
+        return 0;
+    }
+
+    if (dev->realized && object_property_get_bool(obj, "hotpluggable", NULL)) {
+        *list = g_slist_append(*list, dev);
+    }
+
+    return 0;
+}
+
+static GSList *qdev_build_hotpluggable_device_list(Object *peripheral)
+{
+    GSList *list = NULL;
+
+    object_child_foreach(peripheral, qdev_add_hotpluggable_device, &list);
+
+    return list;
+}
+
+static void peripheral_device_del_completion(ReadLineState *rs,
+                                             const char *str, size_t len)
+{
+    Object *peripheral = container_get(qdev_get_machine(), "/peripheral");
+    GSList *list, *item;
+
+    list = qdev_build_hotpluggable_device_list(peripheral);
+    if (!list) {
+        return;
+    }
+
+    for (item = list; item; item = g_slist_next(item)) {
+        DeviceState *dev = item->data;
+
+        if (dev->id && !strncmp(str, dev->id, len)) {
+            readline_add_completion(rs, dev->id);
+        }
+    }
+
+    g_slist_free(list);
+}
+
+void chardev_remove_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+    ChardevInfoList *list, *start;
+
+    if (nb_args != 2) {
+        return;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    start = list = qmp_query_chardev(NULL);
+    while (list) {
+        ChardevInfo *chr = list->value;
+
+        if (!strncmp(chr->label, str, len)) {
+            readline_add_completion(rs, chr->label);
+        }
+        list = list->next;
+    }
+    qapi_free_ChardevInfoList(start);
+}
+
+static void ringbuf_completion(ReadLineState *rs, const char *str)
+{
+    size_t len;
+    ChardevInfoList *list, *start;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    start = list = qmp_query_chardev(NULL);
+    while (list) {
+        ChardevInfo *chr_info = list->value;
+
+        if (!strncmp(chr_info->label, str, len)) {
+            Chardev *chr = qemu_chr_find(chr_info->label);
+            if (chr && CHARDEV_IS_RINGBUF(chr)) {
+                readline_add_completion(rs, chr_info->label);
+            }
+        }
+        list = list->next;
+    }
+    qapi_free_ChardevInfoList(start);
+}
+
+void ringbuf_write_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    if (nb_args != 2) {
+        return;
+    }
+    ringbuf_completion(rs, str);
+}
+
+void device_del_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    peripheral_device_del_completion(rs, str, len);
+}
+
+void object_del_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    ObjectPropertyInfoList *list, *start;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    start = list = qmp_qom_list("/objects", NULL);
+    while (list) {
+        ObjectPropertyInfo *info = list->value;
+
+        if (!strncmp(info->type, "child<", 5)
+            && !strncmp(info->name, str, len)) {
+            readline_add_completion(rs, info->name);
+        }
+        list = list->next;
+    }
+    qapi_free_ObjectPropertyInfoList(start);
+}
+
+void sendkey_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    int i;
+    char *sep;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+    sep = strrchr(str, '-');
+    if (sep) {
+        str = sep + 1;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    for (i = 0; i < Q_KEY_CODE__MAX; i++) {
+        if (!strncmp(str, QKeyCode_str(i), len)) {
+            readline_add_completion(rs, QKeyCode_str(i));
+        }
+    }
+}
+
+void set_link_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        NetClientState *ncs[MAX_QUEUE_NUM];
+        int count, i;
+        count = qemu_find_net_clients_except(NULL, ncs,
+                                             NET_CLIENT_DRIVER_NONE,
+                                             MAX_QUEUE_NUM);
+        for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
+            const char *name = ncs[i]->name;
+            if (!strncmp(str, name, len)) {
+                readline_add_completion(rs, name);
+            }
+        }
+    } else if (nb_args == 3) {
+        add_completion_option(rs, str, "on");
+        add_completion_option(rs, str, "off");
+    }
+}
+
+void netdev_del_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    int len, count, i;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    count = qemu_find_net_clients_except(NULL, ncs, NET_CLIENT_DRIVER_NIC,
+                                         MAX_QUEUE_NUM);
+    for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
+        const char *name = ncs[i]->name;
+        if (strncmp(str, name, len)) {
+            continue;
+        }
+        if (ncs[i]->is_netdev) {
+            readline_add_completion(rs, name);
+        }
+    }
+}
+
+void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
+        }
+        g_free(pattern);
+    }
+}
+
+void trace_event_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
+        }
+        g_free(pattern);
+    } else if (nb_args == 3) {
+        add_completion_option(rs, str, "on");
+        add_completion_option(rs, str, "off");
+    }
+}
+
+void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    int i;
+
+    if (nb_args != 2) {
+        return;
+    }
+    readline_set_completion_index(rs, strlen(str));
+    for (i = 0; i < WATCHDOG_ACTION__MAX; i++) {
+        add_completion_option(rs, str, WatchdogAction_str(i));
+    }
+}
+
+void migrate_set_capability_completion(ReadLineState *rs, int nb_args,
+                                       const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        int i;
+        for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+            const char *name = MigrationCapability_str(i);
+            if (!strncmp(str, name, len)) {
+                readline_add_completion(rs, name);
+            }
+        }
+    } else if (nb_args == 3) {
+        add_completion_option(rs, str, "on");
+        add_completion_option(rs, str, "off");
+    }
+}
+
+void migrate_set_parameter_completion(ReadLineState *rs, int nb_args,
+                                      const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        int i;
+        for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
+            const char *name = MigrationParameter_str(i);
+            if (!strncmp(str, name, len)) {
+                readline_add_completion(rs, name);
+            }
+        }
+    }
+}
+
+static void vm_completion(ReadLineState *rs, const char *str)
+{
+    size_t len;
+    BlockDriverState *bs;
+    BdrvNextIterator it;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+        SnapshotInfoList *snapshots, *snapshot;
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        bool ok = false;
+
+        aio_context_acquire(ctx);
+        if (bdrv_can_snapshot(bs)) {
+            ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0;
+        }
+        aio_context_release(ctx);
+        if (!ok) {
+            continue;
+        }
+
+        snapshot = snapshots;
+        while (snapshot) {
+            char *completion = snapshot->value->name;
+            if (!strncmp(str, completion, len)) {
+                readline_add_completion(rs, completion);
+            }
+            completion = snapshot->value->id;
+            if (!strncmp(str, completion, len)) {
+                readline_add_completion(rs, completion);
+            }
+            snapshot = snapshot->next;
+        }
+        qapi_free_SnapshotInfoList(snapshots);
+    }
+
+}
+
+void delvm_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    if (nb_args == 2) {
+        vm_completion(rs, str);
+    }
+}
+
+void loadvm_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    if (nb_args == 2) {
+        vm_completion(rs, str);
+    }
+}
+
+static int
+compare_mon_cmd(const void *a, const void *b)
+{
+    return strcmp(((const HMPCommand *)a)->name,
+            ((const HMPCommand *)b)->name);
+}
+
+static void sortcmdlist(void)
+{
+    qsort(hmp_cmds, ARRAY_SIZE(hmp_cmds) - 1,
+          sizeof(*hmp_cmds),
+          compare_mon_cmd);
+    qsort(hmp_info_cmds, ARRAY_SIZE(hmp_info_cmds) - 1,
+          sizeof(*hmp_info_cmds),
+          compare_mon_cmd);
+}
+
+void monitor_init_globals(void)
+{
+    monitor_init_globals_core();
+    monitor_init_qmp_commands();
+    sortcmdlist();
+    qemu_mutex_init(&mon_fdsets_lock);
+}
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
new file mode 100644
index 000000000..a6131554d
--- /dev/null
+++ b/monitor/monitor-internal.h
@@ -0,0 +1,189 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MONITOR_INTERNAL_H
+#define MONITOR_INTERNAL_H
+
+#include "chardev/char-fe.h"
+#include "monitor/monitor.h"
+#include "qapi/qapi-types-control.h"
+#include "qapi/qmp/dispatch.h"
+#include "qapi/qmp/json-parser.h"
+#include "qemu/readline.h"
+#include "sysemu/iothread.h"
+
+/*
+ * Supported types:
+ *
+ * 'F'          filename
+ * 'B'          block device name
+ * 's'          string (accept optional quote)
+ * 'S'          it just appends the rest of the string (accept optional quote)
+ * 'O'          option string of the form NAME=VALUE,...
+ *              parsed according to QemuOptsList given by its name
+ *              Example: 'device:O' uses qemu_device_opts.
+ *              Restriction: only lists with empty desc are supported
+ *              TODO lift the restriction
+ * 'i'          32 bit integer
+ * 'l'          target long (32 or 64 bit)
+ * 'M'          Non-negative target long (32 or 64 bit), in user mode the
+ *              value is multiplied by 2^20 (think Mebibyte)
+ * 'o'          octets (aka bytes)
+ *              user mode accepts an optional E, e, P, p, T, t, G, g, M, m,
+ *              K, k suffix, which multiplies the value by 2^60 for suffixes E
+ *              and e, 2^50 for suffixes P and p, 2^40 for suffixes T and t,
+ *              2^30 for suffixes G and g, 2^20 for M and m, 2^10 for K and k
+ * 'T'          double
+ *              user mode accepts an optional ms, us, ns suffix,
+ *              which divides the value by 1e3, 1e6, 1e9, respectively
+ * '/'          optional gdb-like print format (like "/10x")
+ *
+ * '?'          optional type (for all types, except '/')
+ * '.'          other form of optional type (for 'i' and 'l')
+ * 'b'          boolean
+ *              user mode accepts "on" or "off"
+ * '-'          optional parameter (eg. '-f')
+ *
+ */
+
+typedef struct HMPCommand {
+    const char *name;
+    const char *args_type;
+    const char *params;
+    const char *help;
+    const char *flags; /* p=preconfig */
+    void (*cmd)(Monitor *mon, const QDict *qdict);
+    bool coroutine;
+    /*
+     * @sub_table is a list of 2nd level of commands. If it does not exist,
+     * cmd should be used. If it exists, sub_table[?].cmd should be
+     * used, and cmd of 1st level plays the role of help function.
+     */
+    struct HMPCommand *sub_table;
+    void (*command_completion)(ReadLineState *rs, int nb_args, const char *str);
+} HMPCommand;
+
+struct Monitor {
+    CharBackend chr;
+    int reset_seen;
+    int suspend_cnt;            /* Needs to be accessed atomically */
+    bool is_qmp;
+    bool skip_flush;
+    bool use_io_thread;
+
+    char *mon_cpu_path;
+    QTAILQ_ENTRY(Monitor) entry;
+
+    /*
+     * The per-monitor lock. We can't access guest memory when holding
+     * the lock.
+     */
+    QemuMutex mon_lock;
+
+    /*
+     * Members that are protected by the per-monitor lock
+     */
+    QLIST_HEAD(, mon_fd_t) fds;
+    QString *outbuf;
+    guint out_watch;
+    /* Read under either BQL or mon_lock, written with BQL+mon_lock.  */
+    int mux_out;
+};
+
+struct MonitorHMP {
+    Monitor common;
+    bool use_readline;
+    /*
+     * State used only in the thread "owning" the monitor.
+     * If @use_io_thread, this is @mon_iothread. (This does not actually happen
+     * in the current state of the code.)
+     * Else, it's the main thread.
+     * These members can be safely accessed without locks.
+     */
+    ReadLineState *rs;
+};
+
+typedef struct {
+    Monitor common;
+    JSONMessageParser parser;
+    bool pretty;
+    /*
+     * When a client connects, we're in capabilities negotiation mode.
+     * @commands is &qmp_cap_negotiation_commands then.  When command
+     * qmp_capabilities succeeds, we go into command mode, and
+     * @command becomes &qmp_commands.
+     */
+    const QmpCommandList *commands;
+    bool capab_offered[QMP_CAPABILITY__MAX]; /* capabilities offered */
+    bool capab[QMP_CAPABILITY__MAX];         /* offered and accepted */
+    /*
+     * Protects qmp request/response queue.
+     * Take monitor_lock first when you need both.
+     */
+    QemuMutex qmp_queue_lock;
+    /* Input queue that holds all the parsed QMP requests */
+    GQueue *qmp_requests;
+} MonitorQMP;
+
+/**
+ * Is @mon a QMP monitor?
+ */
+static inline bool monitor_is_qmp(const Monitor *mon)
+{
+    return mon->is_qmp;
+}
+
+typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
+extern IOThread *mon_iothread;
+extern Coroutine *qmp_dispatcher_co;
+extern bool qmp_dispatcher_co_shutdown;
+extern bool qmp_dispatcher_co_busy;
+extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
+extern QemuMutex monitor_lock;
+extern MonitorList mon_list;
+extern int mon_refcount;
+
+extern HMPCommand hmp_cmds[];
+
+int monitor_puts(Monitor *mon, const char *str);
+void monitor_data_init(Monitor *mon, bool is_qmp, bool skip_flush,
+                       bool use_io_thread);
+void monitor_data_destroy(Monitor *mon);
+int monitor_can_read(void *opaque);
+void monitor_list_append(Monitor *mon);
+void monitor_fdsets_cleanup(void);
+
+void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
+void monitor_data_destroy_qmp(MonitorQMP *mon);
+void coroutine_fn monitor_qmp_dispatcher_co(void *data);
+
+int get_monitor_def(Monitor *mon, int64_t *pval, const char *name);
+void help_cmd(Monitor *mon, const char *name);
+void handle_hmp_command(MonitorHMP *mon, const char *cmdline);
+int hmp_compare_cmd(const char *name, const char *list);
+
+void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
+                                 Error **errp);
+
+#endif
diff --git a/monitor/monitor.c b/monitor/monitor.c
new file mode 100644
index 000000000..27573348f
--- /dev/null
+++ b/monitor/monitor.c
@@ -0,0 +1,778 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/qapi-emit-events.h"
+#include "qapi/qapi-visit-control.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "sysemu/qtest.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+
+/*
+ * To prevent flooding clients, events can be throttled. The
+ * throttling is calculated globally, rather than per-Monitor
+ * instance.
+ */
+typedef struct MonitorQAPIEventState {
+    QAPIEvent event;    /* Throttling state for this event type and... */
+    QDict *data;        /* ... data, see qapi_event_throttle_equal() */
+    QEMUTimer *timer;   /* Timer for handling delayed events */
+    QDict *qdict;       /* Delayed event (if any) */
+} MonitorQAPIEventState;
+
+typedef struct {
+    int64_t rate;       /* Minimum time (in ns) between two events */
+} MonitorQAPIEventConf;
+
+/* Shared monitor I/O thread */
+IOThread *mon_iothread;
+
+/* Coroutine to dispatch the requests received from I/O thread */
+Coroutine *qmp_dispatcher_co;
+
+/* Set to true when the dispatcher coroutine should terminate */
+bool qmp_dispatcher_co_shutdown;
+
+/*
+ * qmp_dispatcher_co_busy is used for synchronisation between the
+ * monitor thread and the main thread to ensure that the dispatcher
+ * coroutine never gets scheduled a second time when it's already
+ * scheduled (scheduling the same coroutine twice is forbidden).
+ *
+ * It is true if the coroutine is active and processing requests.
+ * Additional requests may then be pushed onto mon->qmp_requests,
+ * and @qmp_dispatcher_co_shutdown may be set without further ado.
+ * @qmp_dispatcher_co_busy must not be woken up in this case.
+ *
+ * If false, you also have to set @qmp_dispatcher_co_busy to true and
+ * wake up @qmp_dispatcher_co after pushing the new requests.
+ *
+ * The coroutine will automatically change this variable back to false
+ * before it yields.  Nobody else may set the variable to false.
+ *
+ * Access must be atomic for thread safety.
+ */
+bool qmp_dispatcher_co_busy;
+
+/*
+ * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
+ * monitor_destroyed.
+ */
+QemuMutex monitor_lock;
+static GHashTable *monitor_qapi_event_state;
+static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
+
+MonitorList mon_list;
+int mon_refcount;
+static bool monitor_destroyed;
+
+Monitor *monitor_cur(void)
+{
+    Monitor *mon;
+
+    qemu_mutex_lock(&monitor_lock);
+    mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
+    qemu_mutex_unlock(&monitor_lock);
+
+    return mon;
+}
+
+/**
+ * Sets a new current monitor and returns the old one.
+ *
+ * If a non-NULL monitor is set for a coroutine, another call
+ * resetting it to NULL is required before the coroutine terminates,
+ * otherwise a stale entry would remain in the hash table.
+ */
+Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
+{
+    Monitor *old_monitor = monitor_cur();
+
+    qemu_mutex_lock(&monitor_lock);
+    if (mon) {
+        g_hash_table_replace(coroutine_mon, co, mon);
+    } else {
+        g_hash_table_remove(coroutine_mon, co);
+    }
+    qemu_mutex_unlock(&monitor_lock);
+
+    return old_monitor;
+}
+
+/**
+ * Is the current monitor, if any, a QMP monitor?
+ */
+bool monitor_cur_is_qmp(void)
+{
+    Monitor *cur_mon = monitor_cur();
+
+    return cur_mon && monitor_is_qmp(cur_mon);
+}
+
+/**
+ * Is @mon is using readline?
+ * Note: not all HMP monitors use readline, e.g., gdbserver has a
+ * non-interactive HMP monitor, so readline is not used there.
+ */
+static inline bool monitor_uses_readline(const MonitorHMP *mon)
+{
+    return mon->use_readline;
+}
+
+static inline bool monitor_is_hmp_non_interactive(const Monitor *mon)
+{
+    if (monitor_is_qmp(mon)) {
+        return false;
+    }
+
+    return !monitor_uses_readline(container_of(mon, MonitorHMP, common));
+}
+
+static void monitor_flush_locked(Monitor *mon);
+
+static gboolean monitor_unblocked(GIOChannel *chan, GIOCondition cond,
+                                  void *opaque)
+{
+    Monitor *mon = opaque;
+
+    qemu_mutex_lock(&mon->mon_lock);
+    mon->out_watch = 0;
+    monitor_flush_locked(mon);
+    qemu_mutex_unlock(&mon->mon_lock);
+    return FALSE;
+}
+
+/* Caller must hold mon->mon_lock */
+static void monitor_flush_locked(Monitor *mon)
+{
+    int rc;
+    size_t len;
+    const char *buf;
+
+    if (mon->skip_flush) {
+        return;
+    }
+
+    buf = qstring_get_str(mon->outbuf);
+    len = qstring_get_length(mon->outbuf);
+
+    if (len && !mon->mux_out) {
+        rc = qemu_chr_fe_write(&mon->chr, (const uint8_t *) buf, len);
+        if ((rc < 0 && errno != EAGAIN) || (rc == len)) {
+            /* all flushed or error */
+            qobject_unref(mon->outbuf);
+            mon->outbuf = qstring_new();
+            return;
+        }
+        if (rc > 0) {
+            /* partial write */
+            QString *tmp = qstring_from_str(buf + rc);
+            qobject_unref(mon->outbuf);
+            mon->outbuf = tmp;
+        }
+        if (mon->out_watch == 0) {
+            mon->out_watch =
+                qemu_chr_fe_add_watch(&mon->chr, G_IO_OUT | G_IO_HUP,
+                                      monitor_unblocked, mon);
+        }
+    }
+}
+
+void monitor_flush(Monitor *mon)
+{
+    qemu_mutex_lock(&mon->mon_lock);
+    monitor_flush_locked(mon);
+    qemu_mutex_unlock(&mon->mon_lock);
+}
+
+/* flush at every end of line */
+int monitor_puts(Monitor *mon, const char *str)
+{
+    int i;
+    char c;
+
+    qemu_mutex_lock(&mon->mon_lock);
+    for (i = 0; str[i]; i++) {
+        c = str[i];
+        if (c == '\n') {
+            qstring_append_chr(mon->outbuf, '\r');
+        }
+        qstring_append_chr(mon->outbuf, c);
+        if (c == '\n') {
+            monitor_flush_locked(mon);
+        }
+    }
+    qemu_mutex_unlock(&mon->mon_lock);
+
+    return i;
+}
+
+int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+{
+    char *buf;
+    int n;
+
+    if (!mon) {
+        return -1;
+    }
+
+    if (monitor_is_qmp(mon)) {
+        return -1;
+    }
+
+    buf = g_strdup_vprintf(fmt, ap);
+    n = monitor_puts(mon, buf);
+    g_free(buf);
+    return n;
+}
+
+int monitor_printf(Monitor *mon, const char *fmt, ...)
+{
+    int ret;
+
+    va_list ap;
+    va_start(ap, fmt);
+    ret = monitor_vprintf(mon, fmt, ap);
+    va_end(ap);
+    return ret;
+}
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ */
+int error_vprintf(const char *fmt, va_list ap)
+{
+    Monitor *cur_mon = monitor_cur();
+
+    if (cur_mon && !monitor_cur_is_qmp()) {
+        return monitor_vprintf(cur_mon, fmt, ap);
+    }
+    return vfprintf(stderr, fmt, ap);
+}
+
+int error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+    Monitor *cur_mon = monitor_cur();
+
+    if (!cur_mon) {
+        return vfprintf(stderr, fmt, ap);
+    }
+    if (!monitor_cur_is_qmp()) {
+        return monitor_vprintf(cur_mon, fmt, ap);
+    }
+    return -1;
+}
+
+
+static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
+    /* Limit guest-triggerable events to 1 per second */
+    [QAPI_EVENT_RTC_CHANGE]        = { 1000 * SCALE_MS },
+    [QAPI_EVENT_WATCHDOG]          = { 1000 * SCALE_MS },
+    [QAPI_EVENT_BALLOON_CHANGE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
+    [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
+};
+
+/*
+ * Return the clock to use for recording an event's time.
+ * It's QEMU_CLOCK_REALTIME, except for qtests it's
+ * QEMU_CLOCK_VIRTUAL, to support testing rate limits.
+ * Beware: result is invalid before configure_accelerator().
+ */
+static inline QEMUClockType monitor_get_event_clock(void)
+{
+    return qtest_enabled() ? QEMU_CLOCK_VIRTUAL : QEMU_CLOCK_REALTIME;
+}
+
+/*
+ * Broadcast an event to all monitors.
+ * @qdict is the event object.  Its member "event" must match @event.
+ * Caller must hold monitor_lock.
+ */
+static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict)
+{
+    Monitor *mon;
+    MonitorQMP *qmp_mon;
+
+    trace_monitor_protocol_event_emit(event, qdict);
+    QTAILQ_FOREACH(mon, &mon_list, entry) {
+        if (!monitor_is_qmp(mon)) {
+            continue;
+        }
+
+        qmp_mon = container_of(mon, MonitorQMP, common);
+        if (qmp_mon->commands != &qmp_cap_negotiation_commands) {
+            qmp_send_response(qmp_mon, qdict);
+        }
+    }
+}
+
+static void monitor_qapi_event_handler(void *opaque);
+
+/*
+ * Queue a new event for emission to Monitor instances,
+ * applying any rate limiting if required.
+ */
+static void
+monitor_qapi_event_queue_no_reenter(QAPIEvent event, QDict *qdict)
+{
+    MonitorQAPIEventConf *evconf;
+    MonitorQAPIEventState *evstate;
+
+    assert(event < QAPI_EVENT__MAX);
+    evconf = &monitor_qapi_event_conf[event];
+    trace_monitor_protocol_event_queue(event, qdict, evconf->rate);
+
+    qemu_mutex_lock(&monitor_lock);
+
+    if (!evconf->rate) {
+        /* Unthrottled event */
+        monitor_qapi_event_emit(event, qdict);
+    } else {
+        QDict *data = qobject_to(QDict, qdict_get(qdict, "data"));
+        MonitorQAPIEventState key = { .event = event, .data = data };
+
+        evstate = g_hash_table_lookup(monitor_qapi_event_state, &key);
+        assert(!evstate || timer_pending(evstate->timer));
+
+        if (evstate) {
+            /*
+             * Timer is pending for (at least) evconf->rate ns after
+             * last send.  Store event for sending when timer fires,
+             * replacing a prior stored event if any.
+             */
+            qobject_unref(evstate->qdict);
+            evstate->qdict = qobject_ref(qdict);
+        } else {
+            /*
+             * Last send was (at least) evconf->rate ns ago.
+             * Send immediately, and arm the timer to call
+             * monitor_qapi_event_handler() in evconf->rate ns.  Any
+             * events arriving before then will be delayed until then.
+             */
+            int64_t now = qemu_clock_get_ns(monitor_get_event_clock());
+
+            monitor_qapi_event_emit(event, qdict);
+
+            evstate = g_new(MonitorQAPIEventState, 1);
+            evstate->event = event;
+            evstate->data = qobject_ref(data);
+            evstate->qdict = NULL;
+            evstate->timer = timer_new_ns(monitor_get_event_clock(),
+                                          monitor_qapi_event_handler,
+                                          evstate);
+            g_hash_table_add(monitor_qapi_event_state, evstate);
+            timer_mod_ns(evstate->timer, now + evconf->rate);
+        }
+    }
+
+    qemu_mutex_unlock(&monitor_lock);
+}
+
+void qapi_event_emit(QAPIEvent event, QDict *qdict)
+{
+    /*
+     * monitor_qapi_event_queue_no_reenter() is not reentrant: it
+     * would deadlock on monitor_lock.  Work around by queueing
+     * events in thread-local storage.
+     * TODO: remove this, make it re-enter safe.
+     */
+    typedef struct MonitorQapiEvent {
+        QAPIEvent event;
+        QDict *qdict;
+        QSIMPLEQ_ENTRY(MonitorQapiEvent) entry;
+    } MonitorQapiEvent;
+    static __thread QSIMPLEQ_HEAD(, MonitorQapiEvent) event_queue;
+    static __thread bool reentered;
+    MonitorQapiEvent *ev;
+
+    if (!reentered) {
+        QSIMPLEQ_INIT(&event_queue);
+    }
+
+    ev = g_new(MonitorQapiEvent, 1);
+    ev->qdict = qobject_ref(qdict);
+    ev->event = event;
+    QSIMPLEQ_INSERT_TAIL(&event_queue, ev, entry);
+    if (reentered) {
+        return;
+    }
+
+    reentered = true;
+
+    while ((ev = QSIMPLEQ_FIRST(&event_queue)) != NULL) {
+        QSIMPLEQ_REMOVE_HEAD(&event_queue, entry);
+        monitor_qapi_event_queue_no_reenter(ev->event, ev->qdict);
+        qobject_unref(ev->qdict);
+        g_free(ev);
+    }
+
+    reentered = false;
+}
+
+/*
+ * This function runs evconf->rate ns after sending a throttled
+ * event.
+ * If another event has since been stored, send it.
+ */
+static void monitor_qapi_event_handler(void *opaque)
+{
+    MonitorQAPIEventState *evstate = opaque;
+    MonitorQAPIEventConf *evconf = &monitor_qapi_event_conf[evstate->event];
+
+    trace_monitor_protocol_event_handler(evstate->event, evstate->qdict);
+    qemu_mutex_lock(&monitor_lock);
+
+    if (evstate->qdict) {
+        int64_t now = qemu_clock_get_ns(monitor_get_event_clock());
+
+        monitor_qapi_event_emit(evstate->event, evstate->qdict);
+        qobject_unref(evstate->qdict);
+        evstate->qdict = NULL;
+        timer_mod_ns(evstate->timer, now + evconf->rate);
+    } else {
+        g_hash_table_remove(monitor_qapi_event_state, evstate);
+        qobject_unref(evstate->data);
+        timer_free(evstate->timer);
+        g_free(evstate);
+    }
+
+    qemu_mutex_unlock(&monitor_lock);
+}
+
+static unsigned int qapi_event_throttle_hash(const void *key)
+{
+    const MonitorQAPIEventState *evstate = key;
+    unsigned int hash = evstate->event * 255;
+
+    if (evstate->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "id"));
+    }
+
+    if (evstate->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
+    }
+
+    return hash;
+}
+
+static gboolean qapi_event_throttle_equal(const void *a, const void *b)
+{
+    const MonitorQAPIEventState *eva = a;
+    const MonitorQAPIEventState *evb = b;
+
+    if (eva->event != evb->event) {
+        return FALSE;
+    }
+
+    if (eva->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        return !strcmp(qdict_get_str(eva->data, "id"),
+                       qdict_get_str(evb->data, "id"));
+    }
+
+    if (eva->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+        return !strcmp(qdict_get_str(eva->data, "node-name"),
+                       qdict_get_str(evb->data, "node-name"));
+    }
+
+    return TRUE;
+}
+
+int monitor_suspend(Monitor *mon)
+{
+    if (monitor_is_hmp_non_interactive(mon)) {
+        return -ENOTTY;
+    }
+
+    qatomic_inc(&mon->suspend_cnt);
+
+    if (mon->use_io_thread) {
+        /*
+         * Kick I/O thread to make sure this takes effect.  It'll be
+         * evaluated again in prepare() of the watch object.
+         */
+        aio_notify(iothread_get_aio_context(mon_iothread));
+    }
+
+    trace_monitor_suspend(mon, 1);
+    return 0;
+}
+
+static void monitor_accept_input(void *opaque)
+{
+    Monitor *mon = opaque;
+
+    qemu_chr_fe_accept_input(&mon->chr);
+}
+
+void monitor_resume(Monitor *mon)
+{
+    if (monitor_is_hmp_non_interactive(mon)) {
+        return;
+    }
+
+    if (qatomic_dec_fetch(&mon->suspend_cnt) == 0) {
+        AioContext *ctx;
+
+        if (mon->use_io_thread) {
+            ctx = iothread_get_aio_context(mon_iothread);
+        } else {
+            ctx = qemu_get_aio_context();
+        }
+
+        if (!monitor_is_qmp(mon)) {
+            MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+            assert(hmp_mon->rs);
+            readline_show_prompt(hmp_mon->rs);
+        }
+
+        aio_bh_schedule_oneshot(ctx, monitor_accept_input, mon);
+    }
+
+    trace_monitor_suspend(mon, -1);
+}
+
+int monitor_can_read(void *opaque)
+{
+    Monitor *mon = opaque;
+
+    return !qatomic_mb_read(&mon->suspend_cnt);
+}
+
+void monitor_list_append(Monitor *mon)
+{
+    qemu_mutex_lock(&monitor_lock);
+    /*
+     * This prevents inserting new monitors during monitor_cleanup().
+     * A cleaner solution would involve the main thread telling other
+     * threads to terminate, waiting for their termination.
+     */
+    if (!monitor_destroyed) {
+        QTAILQ_INSERT_HEAD(&mon_list, mon, entry);
+        mon = NULL;
+    }
+    qemu_mutex_unlock(&monitor_lock);
+
+    if (mon) {
+        monitor_data_destroy(mon);
+        g_free(mon);
+    }
+}
+
+static void monitor_iothread_init(void)
+{
+    mon_iothread = iothread_create("mon_iothread", &error_abort);
+}
+
+void monitor_data_init(Monitor *mon, bool is_qmp, bool skip_flush,
+                       bool use_io_thread)
+{
+    if (use_io_thread && !mon_iothread) {
+        monitor_iothread_init();
+    }
+    qemu_mutex_init(&mon->mon_lock);
+    mon->is_qmp = is_qmp;
+    mon->outbuf = qstring_new();
+    mon->skip_flush = skip_flush;
+    mon->use_io_thread = use_io_thread;
+}
+
+void monitor_data_destroy(Monitor *mon)
+{
+    g_free(mon->mon_cpu_path);
+    qemu_chr_fe_deinit(&mon->chr, false);
+    if (monitor_is_qmp(mon)) {
+        monitor_data_destroy_qmp(container_of(mon, MonitorQMP, common));
+    } else {
+        readline_free(container_of(mon, MonitorHMP, common)->rs);
+    }
+    qobject_unref(mon->outbuf);
+    qemu_mutex_destroy(&mon->mon_lock);
+}
+
+void monitor_cleanup(void)
+{
+    /*
+     * The dispatcher needs to stop before destroying the monitor and
+     * the I/O thread.
+     *
+     * We need to poll both qemu_aio_context and iohandler_ctx to make
+     * sure that the dispatcher coroutine keeps making progress and
+     * eventually terminates.  qemu_aio_context is automatically
+     * polled by calling AIO_WAIT_WHILE on it, but we must poll
+     * iohandler_ctx manually.
+     *
+     * Letting the iothread continue while shutting down the dispatcher
+     * means that new requests may still be coming in. This is okay,
+     * we'll just leave them in the queue without sending a response
+     * and monitor_data_destroy() will free them.
+     */
+    qmp_dispatcher_co_shutdown = true;
+    if (!qatomic_xchg(&qmp_dispatcher_co_busy, true)) {
+        aio_co_wake(qmp_dispatcher_co);
+    }
+
+    AIO_WAIT_WHILE(qemu_get_aio_context(),
+                   (aio_poll(iohandler_get_aio_context(), false),
+                    qatomic_mb_read(&qmp_dispatcher_co_busy)));
+
+    /*
+     * We need to explicitly stop the I/O thread (but not destroy it),
+     * clean up the monitor resources, then destroy the I/O thread since
+     * we need to unregister from chardev below in
+     * monitor_data_destroy(), and chardev is not thread-safe yet
+     */
+    if (mon_iothread) {
+        iothread_stop(mon_iothread);
+    }
+
+    /* Flush output buffers and destroy monitors */
+    qemu_mutex_lock(&monitor_lock);
+    monitor_destroyed = true;
+    while (!QTAILQ_EMPTY(&mon_list)) {
+        Monitor *mon = QTAILQ_FIRST(&mon_list);
+        QTAILQ_REMOVE(&mon_list, mon, entry);
+        /* Permit QAPI event emission from character frontend release */
+        qemu_mutex_unlock(&monitor_lock);
+        monitor_flush(mon);
+        monitor_data_destroy(mon);
+        qemu_mutex_lock(&monitor_lock);
+        g_free(mon);
+    }
+    qemu_mutex_unlock(&monitor_lock);
+
+    if (mon_iothread) {
+        iothread_destroy(mon_iothread);
+        mon_iothread = NULL;
+    }
+}
+
+static void monitor_qapi_event_init(void)
+{
+    monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash,
+                                                qapi_event_throttle_equal);
+}
+
+void monitor_init_globals_core(void)
+{
+    monitor_qapi_event_init();
+    qemu_mutex_init(&monitor_lock);
+    coroutine_mon = g_hash_table_new(NULL, NULL);
+
+    /*
+     * The dispatcher BH must run in the main loop thread, since we
+     * have commands assuming that context.  It would be nice to get
+     * rid of those assumptions.
+     */
+    qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
+    qatomic_mb_set(&qmp_dispatcher_co_busy, true);
+    aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
+}
+
+int monitor_init(MonitorOptions *opts, bool allow_hmp, Error **errp)
+{
+    Chardev *chr;
+    Error *local_err = NULL;
+
+    chr = qemu_chr_find(opts->chardev);
+    if (chr == NULL) {
+        error_setg(errp, "chardev \"%s\" not found", opts->chardev);
+        return -1;
+    }
+
+    if (!opts->has_mode) {
+        opts->mode = allow_hmp ? MONITOR_MODE_READLINE : MONITOR_MODE_CONTROL;
+    }
+
+    switch (opts->mode) {
+    case MONITOR_MODE_CONTROL:
+        monitor_init_qmp(chr, opts->pretty, &local_err);
+        break;
+    case MONITOR_MODE_READLINE:
+        if (!allow_hmp) {
+            error_setg(errp, "Only QMP is supported");
+            return -1;
+        }
+        if (opts->pretty) {
+            warn_report("'pretty' is deprecated for HMP monitors, it has no "
+                        "effect and will be removed in future versions");
+        }
+        monitor_init_hmp(chr, true, &local_err);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -1;
+    }
+    return 0;
+}
+
+int monitor_init_opts(QemuOpts *opts, Error **errp)
+{
+    Visitor *v;
+    MonitorOptions *options;
+    int ret;
+
+    v = opts_visitor_new(opts);
+    visit_type_MonitorOptions(v, NULL, &options, errp);
+    visit_free(v);
+    if (!options) {
+        return -1;
+    }
+
+    ret = monitor_init(options, true, errp);
+    qapi_free_MonitorOptions(options);
+    return ret;
+}
+
+QemuOptsList qemu_mon_opts = {
+    .name = "mon",
+    .implied_opt_name = "chardev",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_mon_opts.head),
+    .desc = {
+        {
+            .name = "mode",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "chardev",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "pretty",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end of list */ }
+    },
+};
diff --git a/monitor/qmp-cmds-control.c b/monitor/qmp-cmds-control.c
new file mode 100644
index 000000000..a456762f6
--- /dev/null
+++ b/monitor/qmp-cmds-control.c
@@ -0,0 +1,171 @@
+/*
+ * QMP commands related to the monitor (common to sysemu and tools)
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "monitor-internal.h"
+#include "qemu-version.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-emit-events.h"
+#include "qapi/qapi-introspect.h"
+
+/*
+ * Accept QMP capabilities in @list for @mon.
+ * On success, set mon->qmp.capab[], and return true.
+ * On error, set @errp, and return false.
+ */
+static bool qmp_caps_accept(MonitorQMP *mon, QMPCapabilityList *list,
+                            Error **errp)
+{
+    GString *unavailable = NULL;
+    bool capab[QMP_CAPABILITY__MAX];
+
+    memset(capab, 0, sizeof(capab));
+
+    for (; list; list = list->next) {
+        if (!mon->capab_offered[list->value]) {
+            if (!unavailable) {
+                unavailable = g_string_new(QMPCapability_str(list->value));
+            } else {
+                g_string_append_printf(unavailable, ", %s",
+                                      QMPCapability_str(list->value));
+            }
+        }
+        capab[list->value] = true;
+    }
+
+    if (unavailable) {
+        error_setg(errp, "Capability %s not available", unavailable->str);
+        g_string_free(unavailable, true);
+        return false;
+    }
+
+    memcpy(mon->capab, capab, sizeof(capab));
+    return true;
+}
+
+void qmp_qmp_capabilities(bool has_enable, QMPCapabilityList *enable,
+                          Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    MonitorQMP *mon;
+
+    assert(monitor_is_qmp(cur_mon));
+    mon = container_of(cur_mon, MonitorQMP, common);
+
+    if (mon->commands == &qmp_commands) {
+        error_set(errp, ERROR_CLASS_COMMAND_NOT_FOUND,
+                  "Capabilities negotiation is already complete, command "
+                  "ignored");
+        return;
+    }
+
+    if (!qmp_caps_accept(mon, enable, errp)) {
+        return;
+    }
+
+    mon->commands = &qmp_commands;
+}
+
+VersionInfo *qmp_query_version(Error **errp)
+{
+    VersionInfo *info = g_new0(VersionInfo, 1);
+
+    info->qemu = g_new0(VersionTriple, 1);
+    info->qemu->major = QEMU_VERSION_MAJOR;
+    info->qemu->minor = QEMU_VERSION_MINOR;
+    info->qemu->micro = QEMU_VERSION_MICRO;
+    info->package = g_strdup(QEMU_PKGVERSION);
+
+    return info;
+}
+
+static void query_commands_cb(const QmpCommand *cmd, void *opaque)
+{
+    CommandInfoList *info, **list = opaque;
+
+    if (!cmd->enabled) {
+        return;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    info->value = g_malloc0(sizeof(*info->value));
+    info->value->name = g_strdup(cmd->name);
+    info->next = *list;
+    *list = info;
+}
+
+CommandInfoList *qmp_query_commands(Error **errp)
+{
+    CommandInfoList *list = NULL;
+    Monitor *cur_mon = monitor_cur();
+    MonitorQMP *mon;
+
+    assert(monitor_is_qmp(cur_mon));
+    mon = container_of(cur_mon, MonitorQMP, common);
+
+    qmp_for_each_command(mon->commands, query_commands_cb, &list);
+
+    return list;
+}
+
+EventInfoList *qmp_query_events(Error **errp)
+{
+    /*
+     * TODO This deprecated command is the only user of
+     * QAPIEvent_str() and QAPIEvent_lookup[].  When the command goes,
+     * they should go, too.
+     */
+    EventInfoList *info, *ev_list = NULL;
+    QAPIEvent e;
+
+    for (e = 0 ; e < QAPI_EVENT__MAX ; e++) {
+        const char *event_name = QAPIEvent_str(e);
+        assert(event_name != NULL);
+        info = g_malloc0(sizeof(*info));
+        info->value = g_malloc0(sizeof(*info->value));
+        info->value->name = g_strdup(event_name);
+
+        info->next = ev_list;
+        ev_list = info;
+    }
+
+    return ev_list;
+}
+
+/*
+ * Minor hack: generated marshalling suppressed for this command
+ * ('gen': false in the schema) so we can parse the JSON string
+ * directly into QObject instead of first parsing it with
+ * visit_type_SchemaInfoList() into a SchemaInfoList, then marshal it
+ * to QObject with generated output marshallers, every time.  Instead,
+ * we do it in test-qobject-input-visitor.c, just to make sure
+ * qapi-gen.py's output actually conforms to the schema.
+ */
+void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
+                                 Error **errp)
+{
+    *ret_data = qobject_from_qlit(&qmp_schema_qlit);
+}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
new file mode 100644
index 000000000..a08143b32
--- /dev/null
+++ b/monitor/qmp-cmds.c
@@ -0,0 +1,403 @@
+/*
+ * QEMU Management Protocol commands
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+#include "monitor/monitor.h"
+#include "sysemu/sysemu.h"
+#include "qemu/config-file.h"
+#include "qemu/uuid.h"
+#include "chardev/char.h"
+#include "ui/qemu-spice.h"
+#include "ui/vnc.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "sysemu/arch_init.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-acpi.h"
+#include "qapi/qapi-commands-block.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-ui.h"
+#include "qapi/qmp/qerror.h"
+#include "hw/mem/memory-device.h"
+#include "hw/acpi/acpi_dev_interface.h"
+
+NameInfo *qmp_query_name(Error **errp)
+{
+    NameInfo *info = g_malloc0(sizeof(*info));
+
+    if (qemu_name) {
+        info->has_name = true;
+        info->name = g_strdup(qemu_name);
+    }
+
+    return info;
+}
+
+KvmInfo *qmp_query_kvm(Error **errp)
+{
+    KvmInfo *info = g_malloc0(sizeof(*info));
+
+    info->enabled = kvm_enabled();
+    info->present = kvm_available();
+
+    return info;
+}
+
+UuidInfo *qmp_query_uuid(Error **errp)
+{
+    UuidInfo *info = g_malloc0(sizeof(*info));
+
+    info->UUID = qemu_uuid_unparse_strdup(&qemu_uuid);
+    return info;
+}
+
+void qmp_quit(Error **errp)
+{
+    no_shutdown = 0;
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_QMP_QUIT);
+}
+
+void qmp_stop(Error **errp)
+{
+    /* if there is a dump in background, we should wait until the dump
+     * finished */
+    if (dump_in_progress()) {
+        error_setg(errp, "There is a dump in process, please wait.");
+        return;
+    }
+
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        autostart = 0;
+    } else {
+        vm_stop(RUN_STATE_PAUSED);
+    }
+}
+
+void qmp_system_reset(Error **errp)
+{
+    qemu_system_reset_request(SHUTDOWN_CAUSE_HOST_QMP_SYSTEM_RESET);
+}
+
+void qmp_system_powerdown(Error **errp)
+{
+    qemu_system_powerdown_request();
+}
+
+void qmp_x_exit_preconfig(Error **errp)
+{
+    if (!runstate_check(RUN_STATE_PRECONFIG)) {
+        error_setg(errp, "The command is permitted only in '%s' state",
+                   RunState_str(RUN_STATE_PRECONFIG));
+        return;
+    }
+    qemu_exit_preconfig_request();
+}
+
+void qmp_cont(Error **errp)
+{
+    BlockBackend *blk;
+    BlockJob *job;
+    Error *local_err = NULL;
+
+    /* if there is a dump in background, we should wait until the dump
+     * finished */
+    if (dump_in_progress()) {
+        error_setg(errp, "There is a dump in process, please wait.");
+        return;
+    }
+
+    if (runstate_needs_reset()) {
+        error_setg(errp, "Resetting the Virtual Machine is required");
+        return;
+    } else if (runstate_check(RUN_STATE_SUSPENDED)) {
+        return;
+    } else if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+        error_setg(errp, "Migration is not finalized yet");
+        return;
+    }
+
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+        blk_iostatus_reset(blk);
+    }
+
+    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+        block_job_iostatus_reset(job);
+    }
+
+    /* Continuing after completed migration. Images have been inactivated to
+     * allow the destination to take control. Need to get control back now.
+     *
+     * If there are no inactive block nodes (e.g. because the VM was just
+     * paused rather than completing a migration), bdrv_inactivate_all() simply
+     * doesn't do anything. */
+    bdrv_invalidate_cache_all(&local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        autostart = 1;
+    } else {
+        vm_start();
+    }
+}
+
+void qmp_system_wakeup(Error **errp)
+{
+    if (!qemu_wakeup_suspend_enabled()) {
+        error_setg(errp,
+                   "wake-up from suspend is not supported by this guest");
+        return;
+    }
+
+    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, errp);
+}
+
+void qmp_set_password(const char *protocol, const char *password,
+                      bool has_connected, const char *connected, Error **errp)
+{
+    int disconnect_if_connected = 0;
+    int fail_if_connected = 0;
+    int rc;
+
+    if (has_connected) {
+        if (strcmp(connected, "fail") == 0) {
+            fail_if_connected = 1;
+        } else if (strcmp(connected, "disconnect") == 0) {
+            disconnect_if_connected = 1;
+        } else if (strcmp(connected, "keep") == 0) {
+            /* nothing */
+        } else {
+            error_setg(errp, QERR_INVALID_PARAMETER, "connected");
+            return;
+        }
+    }
+
+    if (strcmp(protocol, "spice") == 0) {
+        if (!qemu_using_spice(errp)) {
+            return;
+        }
+        rc = qemu_spice.set_passwd(password, fail_if_connected,
+                                   disconnect_if_connected);
+        if (rc != 0) {
+            error_setg(errp, QERR_SET_PASSWD_FAILED);
+        }
+        return;
+    }
+
+    if (strcmp(protocol, "vnc") == 0) {
+        if (fail_if_connected || disconnect_if_connected) {
+            /* vnc supports "connected=keep" only */
+            error_setg(errp, QERR_INVALID_PARAMETER, "connected");
+            return;
+        }
+        /* Note that setting an empty password will not disable login through
+         * this interface. */
+        rc = vnc_display_password(NULL, password);
+        if (rc < 0) {
+            error_setg(errp, QERR_SET_PASSWD_FAILED);
+        }
+        return;
+    }
+
+    error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
+}
+
+void qmp_expire_password(const char *protocol, const char *whenstr,
+                         Error **errp)
+{
+    time_t when;
+    int rc;
+
+    if (strcmp(whenstr, "now") == 0) {
+        when = 0;
+    } else if (strcmp(whenstr, "never") == 0) {
+        when = TIME_MAX;
+    } else if (whenstr[0] == '+') {
+        when = time(NULL) + strtoull(whenstr+1, NULL, 10);
+    } else {
+        when = strtoull(whenstr, NULL, 10);
+    }
+
+    if (strcmp(protocol, "spice") == 0) {
+        if (!qemu_using_spice(errp)) {
+            return;
+        }
+        rc = qemu_spice.set_pw_expire(when);
+        if (rc != 0) {
+            error_setg(errp, QERR_SET_PASSWD_FAILED);
+        }
+        return;
+    }
+
+    if (strcmp(protocol, "vnc") == 0) {
+        rc = vnc_display_pw_expire(NULL, when);
+        if (rc != 0) {
+            error_setg(errp, QERR_SET_PASSWD_FAILED);
+        }
+        return;
+    }
+
+    error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
+}
+
+#ifdef CONFIG_VNC
+void qmp_change_vnc_password(const char *password, Error **errp)
+{
+    if (vnc_display_password(NULL, password) < 0) {
+        error_setg(errp, QERR_SET_PASSWD_FAILED);
+    }
+}
+
+static void qmp_change_vnc_listen(const char *target, Error **errp)
+{
+    QemuOptsList *olist = qemu_find_opts("vnc");
+    QemuOpts *opts;
+
+    if (strstr(target, "id=")) {
+        error_setg(errp, "id not supported");
+        return;
+    }
+
+    opts = qemu_opts_find(olist, "default");
+    if (opts) {
+        qemu_opts_del(opts);
+    }
+    opts = vnc_parse(target, errp);
+    if (!opts) {
+        return;
+    }
+
+    vnc_display_open("default", errp);
+}
+
+static void qmp_change_vnc(const char *target, bool has_arg, const char *arg,
+                           Error **errp)
+{
+    if (strcmp(target, "passwd") == 0 || strcmp(target, "password") == 0) {
+        if (!has_arg) {
+            error_setg(errp, QERR_MISSING_PARAMETER, "password");
+        } else {
+            qmp_change_vnc_password(arg, errp);
+        }
+    } else {
+        qmp_change_vnc_listen(target, errp);
+    }
+}
+#endif /* !CONFIG_VNC */
+
+void qmp_change(const char *device, const char *target,
+                bool has_arg, const char *arg, Error **errp)
+{
+    if (strcmp(device, "vnc") == 0) {
+#ifdef CONFIG_VNC
+        qmp_change_vnc(target, has_arg, arg, errp);
+#else
+        error_setg(errp, QERR_FEATURE_DISABLED, "vnc");
+#endif
+    } else {
+        qmp_blockdev_change_medium(true, device, false, NULL, target,
+                                   has_arg, arg, false, 0, errp);
+    }
+}
+
+void qmp_add_client(const char *protocol, const char *fdname,
+                    bool has_skipauth, bool skipauth, bool has_tls, bool tls,
+                    Error **errp)
+{
+    Chardev *s;
+    int fd;
+
+    fd = monitor_get_fd(monitor_cur(), fdname, errp);
+    if (fd < 0) {
+        return;
+    }
+
+    if (strcmp(protocol, "spice") == 0) {
+        if (!qemu_using_spice(errp)) {
+            close(fd);
+            return;
+        }
+        skipauth = has_skipauth ? skipauth : false;
+        tls = has_tls ? tls : false;
+        if (qemu_spice.display_add_client(fd, skipauth, tls) < 0) {
+            error_setg(errp, "spice failed to add client");
+            close(fd);
+        }
+        return;
+#ifdef CONFIG_VNC
+    } else if (strcmp(protocol, "vnc") == 0) {
+        skipauth = has_skipauth ? skipauth : false;
+        vnc_display_add_client(NULL, fd, skipauth);
+        return;
+#endif
+    } else if ((s = qemu_chr_find(protocol)) != NULL) {
+        if (qemu_chr_add_client(s, fd) < 0) {
+            error_setg(errp, "failed to add client");
+            close(fd);
+            return;
+        }
+        return;
+    }
+
+    error_setg(errp, "protocol '%s' is invalid", protocol);
+    close(fd);
+}
+
+
+MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp)
+{
+    return qmp_memory_device_list();
+}
+
+ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
+{
+    bool ambig;
+    ACPIOSTInfoList *head = NULL;
+    ACPIOSTInfoList **prev = &head;
+    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig);
+
+    if (obj) {
+        AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+        AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj);
+
+        adevc->ospm_status(adev, &prev);
+    } else {
+        error_setg(errp, "command is not supported, missing ACPI device");
+    }
+
+    return head;
+}
+
+MemoryInfo *qmp_query_memory_size_summary(Error **errp)
+{
+    MemoryInfo *mem_info = g_malloc0(sizeof(MemoryInfo));
+
+    mem_info->base_memory = ram_size;
+
+    mem_info->plugged_memory = get_plugged_memory_size();
+    mem_info->has_plugged_memory =
+        mem_info->plugged_memory != (uint64_t)-1;
+
+    return mem_info;
+}
diff --git a/monitor/qmp.c b/monitor/qmp.c
new file mode 100644
index 000000000..b42f8c6af
--- /dev/null
+++ b/monitor/qmp.c
@@ -0,0 +1,498 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "chardev/char-io.h"
+#include "monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+
+struct QMPRequest {
+    /* Owner of the request */
+    MonitorQMP *mon;
+    /*
+     * Request object to be handled or Error to be reported
+     * (exactly one of them is non-null)
+     */
+    QObject *req;
+    Error *err;
+};
+typedef struct QMPRequest QMPRequest;
+
+QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
+
+static bool qmp_oob_enabled(MonitorQMP *mon)
+{
+    return mon->capab[QMP_CAPABILITY_OOB];
+}
+
+static void monitor_qmp_caps_reset(MonitorQMP *mon)
+{
+    memset(mon->capab_offered, 0, sizeof(mon->capab_offered));
+    memset(mon->capab, 0, sizeof(mon->capab));
+    mon->capab_offered[QMP_CAPABILITY_OOB] = mon->common.use_io_thread;
+}
+
+static void qmp_request_free(QMPRequest *req)
+{
+    qobject_unref(req->req);
+    error_free(req->err);
+    g_free(req);
+}
+
+/* Caller must hold mon->qmp.qmp_queue_lock */
+static void monitor_qmp_cleanup_req_queue_locked(MonitorQMP *mon)
+{
+    while (!g_queue_is_empty(mon->qmp_requests)) {
+        qmp_request_free(g_queue_pop_head(mon->qmp_requests));
+    }
+}
+
+static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP *mon)
+{
+    qemu_mutex_lock(&mon->qmp_queue_lock);
+
+    /*
+     * Same condition as in monitor_qmp_bh_dispatcher(), but before
+     * removing an element from the queue (hence no `- 1`).
+     * Also, the queue should not be empty either, otherwise the
+     * monitor hasn't been suspended yet (or was already resumed).
+     */
+    bool need_resume = (!qmp_oob_enabled(mon) ||
+        mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX)
+        && !g_queue_is_empty(mon->qmp_requests);
+
+    monitor_qmp_cleanup_req_queue_locked(mon);
+
+    if (need_resume) {
+        /*
+         * handle_qmp_command() suspended the monitor because the
+         * request queue filled up, to be resumed when the queue has
+         * space again.  We just emptied it; resume the monitor.
+         *
+         * Without this, the monitor would remain suspended forever
+         * when we get here while the monitor is suspended.  An
+         * unfortunately timed CHR_EVENT_CLOSED can do the trick.
+         */
+        monitor_resume(&mon->common);
+    }
+
+    qemu_mutex_unlock(&mon->qmp_queue_lock);
+}
+
+void qmp_send_response(MonitorQMP *mon, const QDict *rsp)
+{
+    const QObject *data = QOBJECT(rsp);
+    QString *json;
+
+    json = mon->pretty ? qobject_to_json_pretty(data) : qobject_to_json(data);
+    assert(json != NULL);
+
+    qstring_append_chr(json, '\n');
+    monitor_puts(&mon->common, qstring_get_str(json));
+
+    qobject_unref(json);
+}
+
+/*
+ * Emit QMP response @rsp to @mon.
+ * Null @rsp can only happen for commands with QCO_NO_SUCCESS_RESP.
+ * Nothing is emitted then.
+ */
+static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp)
+{
+    if (rsp) {
+        qmp_send_response(mon, rsp);
+    }
+}
+
+/*
+ * Runs outside of coroutine context for OOB commands, but in
+ * coroutine context for everything else.
+ */
+static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
+{
+    QDict *rsp;
+    QDict *error;
+
+    rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
+                       &mon->common);
+
+    if (mon->commands == &qmp_cap_negotiation_commands) {
+        error = qdict_get_qdict(rsp, "error");
+        if (error
+            && !g_strcmp0(qdict_get_try_str(error, "class"),
+                    QapiErrorClass_str(ERROR_CLASS_COMMAND_NOT_FOUND))) {
+            /* Provide a more useful error message */
+            qdict_del(error, "desc");
+            qdict_put_str(error, "desc", "Expecting capabilities negotiation"
+                          " with 'qmp_capabilities'");
+        }
+    }
+
+    monitor_qmp_respond(mon, rsp);
+    qobject_unref(rsp);
+}
+
+/*
+ * Pop a QMP request from a monitor request queue.
+ * Return the request, or NULL all request queues are empty.
+ * We are using round-robin fashion to pop the request, to avoid
+ * processing commands only on a very busy monitor.  To achieve that,
+ * when we process one request on a specific monitor, we put that
+ * monitor to the end of mon_list queue.
+ *
+ * Note: if the function returned with non-NULL, then the caller will
+ * be with qmp_mon->qmp_queue_lock held, and the caller is responsible
+ * to release it.
+ */
+static QMPRequest *monitor_qmp_requests_pop_any_with_lock(void)
+{
+    QMPRequest *req_obj = NULL;
+    Monitor *mon;
+    MonitorQMP *qmp_mon;
+
+    qemu_mutex_lock(&monitor_lock);
+
+    QTAILQ_FOREACH(mon, &mon_list, entry) {
+        if (!monitor_is_qmp(mon)) {
+            continue;
+        }
+
+        qmp_mon = container_of(mon, MonitorQMP, common);
+        qemu_mutex_lock(&qmp_mon->qmp_queue_lock);
+        req_obj = g_queue_pop_head(qmp_mon->qmp_requests);
+        if (req_obj) {
+            /* With the lock of corresponding queue held */
+            break;
+        }
+        qemu_mutex_unlock(&qmp_mon->qmp_queue_lock);
+    }
+
+    if (req_obj) {
+        /*
+         * We found one request on the monitor. Degrade this monitor's
+         * priority to lowest by re-inserting it to end of queue.
+         */
+        QTAILQ_REMOVE(&mon_list, mon, entry);
+        QTAILQ_INSERT_TAIL(&mon_list, mon, entry);
+    }
+
+    qemu_mutex_unlock(&monitor_lock);
+
+    return req_obj;
+}
+
+void coroutine_fn monitor_qmp_dispatcher_co(void *data)
+{
+    QMPRequest *req_obj = NULL;
+    QDict *rsp;
+    bool need_resume;
+    MonitorQMP *mon;
+
+    while (true) {
+        assert(qatomic_mb_read(&qmp_dispatcher_co_busy) == true);
+
+        /*
+         * Mark the dispatcher as not busy already here so that we
+         * don't miss any new requests coming in the middle of our
+         * processing.
+         */
+        qatomic_mb_set(&qmp_dispatcher_co_busy, false);
+
+        while (!(req_obj = monitor_qmp_requests_pop_any_with_lock())) {
+            /*
+             * No more requests to process.  Wait to be reentered from
+             * handle_qmp_command() when it pushes more requests, or
+             * from monitor_cleanup() when it requests shutdown.
+             */
+            if (!qmp_dispatcher_co_shutdown) {
+                qemu_coroutine_yield();
+
+                /*
+                 * busy must be set to true again by whoever
+                 * rescheduled us to avoid double scheduling
+                 */
+                assert(qatomic_xchg(&qmp_dispatcher_co_busy, false) == true);
+            }
+
+            /*
+             * qmp_dispatcher_co_shutdown may have changed if we
+             * yielded and were reentered from monitor_cleanup()
+             */
+            if (qmp_dispatcher_co_shutdown) {
+                return;
+            }
+        }
+
+        if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
+            /*
+             * Someone rescheduled us (probably because a new requests
+             * came in), but we didn't actually yield. Do that now,
+             * only to be immediately reentered and removed from the
+             * list of scheduled coroutines.
+             */
+            qemu_coroutine_yield();
+        }
+
+        /*
+         * Move the coroutine from iohandler_ctx to qemu_aio_context for
+         * executing the command handler so that it can make progress if it
+         * involves an AIO_WAIT_WHILE().
+         */
+        aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
+        qemu_coroutine_yield();
+
+        mon = req_obj->mon;
+        /* qmp_oob_enabled() might change after "qmp_capabilities" */
+        need_resume = !qmp_oob_enabled(mon) ||
+            mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1;
+        qemu_mutex_unlock(&mon->qmp_queue_lock);
+        if (req_obj->req) {
+            QDict *qdict = qobject_to(QDict, req_obj->req);
+            QObject *id = qdict ? qdict_get(qdict, "id") : NULL;
+            trace_monitor_qmp_cmd_in_band(qobject_get_try_str(id) ?: "");
+            monitor_qmp_dispatch(mon, req_obj->req);
+        } else {
+            assert(req_obj->err);
+            rsp = qmp_error_response(req_obj->err);
+            req_obj->err = NULL;
+            monitor_qmp_respond(mon, rsp);
+            qobject_unref(rsp);
+        }
+
+        if (need_resume) {
+            /* Pairs with the monitor_suspend() in handle_qmp_command() */
+            monitor_resume(&mon->common);
+        }
+        qmp_request_free(req_obj);
+
+        /*
+         * Yield and reschedule so the main loop stays responsive.
+         *
+         * Move back to iohandler_ctx so that nested event loops for
+         * qemu_aio_context don't start new monitor commands.
+         */
+        aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
+        qemu_coroutine_yield();
+    }
+}
+
+static void handle_qmp_command(void *opaque, QObject *req, Error *err)
+{
+    MonitorQMP *mon = opaque;
+    QObject *id = NULL;
+    QDict *qdict;
+    QMPRequest *req_obj;
+
+    assert(!req != !err);
+
+    qdict = qobject_to(QDict, req);
+    if (qdict) {
+        id = qdict_get(qdict, "id");
+    } /* else will fail qmp_dispatch() */
+
+    if (req && trace_event_get_state_backends(TRACE_HANDLE_QMP_COMMAND)) {
+        QString *req_json = qobject_to_json(req);
+        trace_handle_qmp_command(mon, qstring_get_str(req_json));
+        qobject_unref(req_json);
+    }
+
+    if (qdict && qmp_is_oob(qdict)) {
+        /* OOB commands are executed immediately */
+        trace_monitor_qmp_cmd_out_of_band(qobject_get_try_str(id) ?: "");
+        monitor_qmp_dispatch(mon, req);
+        qobject_unref(req);
+        return;
+    }
+
+    req_obj = g_new0(QMPRequest, 1);
+    req_obj->mon = mon;
+    req_obj->req = req;
+    req_obj->err = err;
+
+    /* Protect qmp_requests and fetching its length. */
+    qemu_mutex_lock(&mon->qmp_queue_lock);
+
+    /*
+     * Suspend the monitor when we can't queue more requests after
+     * this one.  Dequeuing in monitor_qmp_bh_dispatcher() or
+     * monitor_qmp_cleanup_queue_and_resume() will resume it.
+     * Note that when OOB is disabled, we queue at most one command,
+     * for backward compatibility.
+     */
+    if (!qmp_oob_enabled(mon) ||
+        mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1) {
+        monitor_suspend(&mon->common);
+    }
+
+    /*
+     * Put the request to the end of queue so that requests will be
+     * handled in time order.  Ownership for req_obj, req,
+     * etc. will be delivered to the handler side.
+     */
+    assert(mon->qmp_requests->length < QMP_REQ_QUEUE_LEN_MAX);
+    g_queue_push_tail(mon->qmp_requests, req_obj);
+    qemu_mutex_unlock(&mon->qmp_queue_lock);
+
+    /* Kick the dispatcher routine */
+    if (!qatomic_xchg(&qmp_dispatcher_co_busy, true)) {
+        aio_co_wake(qmp_dispatcher_co);
+    }
+}
+
+static void monitor_qmp_read(void *opaque, const uint8_t *buf, int size)
+{
+    MonitorQMP *mon = opaque;
+
+    json_message_parser_feed(&mon->parser, (const char *) buf, size);
+}
+
+static QDict *qmp_greeting(MonitorQMP *mon)
+{
+    QList *cap_list = qlist_new();
+    QObject *ver = NULL;
+    QDict *args;
+    QMPCapability cap;
+
+    args = qdict_new();
+    qmp_marshal_query_version(args, &ver, NULL);
+    qobject_unref(args);
+
+    for (cap = 0; cap < QMP_CAPABILITY__MAX; cap++) {
+        if (mon->capab_offered[cap]) {
+            qlist_append_str(cap_list, QMPCapability_str(cap));
+        }
+    }
+
+    return qdict_from_jsonf_nofail(
+        "{'QMP': {'version': %p, 'capabilities': %p}}",
+        ver, cap_list);
+}
+
+static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
+{
+    QDict *data;
+    MonitorQMP *mon = opaque;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        mon->commands = &qmp_cap_negotiation_commands;
+        monitor_qmp_caps_reset(mon);
+        data = qmp_greeting(mon);
+        qmp_send_response(mon, data);
+        qobject_unref(data);
+        mon_refcount++;
+        break;
+    case CHR_EVENT_CLOSED:
+        /*
+         * Note: this is only useful when the output of the chardev
+         * backend is still open.  For example, when the backend is
+         * stdio, it's possible that stdout is still open when stdin
+         * is closed.
+         */
+        monitor_qmp_cleanup_queue_and_resume(mon);
+        json_message_parser_destroy(&mon->parser);
+        json_message_parser_init(&mon->parser, handle_qmp_command,
+                                 mon, NULL);
+        mon_refcount--;
+        monitor_fdsets_cleanup();
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+void monitor_data_destroy_qmp(MonitorQMP *mon)
+{
+    json_message_parser_destroy(&mon->parser);
+    qemu_mutex_destroy(&mon->qmp_queue_lock);
+    monitor_qmp_cleanup_req_queue_locked(mon);
+    g_queue_free(mon->qmp_requests);
+}
+
+static void monitor_qmp_setup_handlers_bh(void *opaque)
+{
+    MonitorQMP *mon = opaque;
+    GMainContext *context;
+
+    assert(mon->common.use_io_thread);
+    context = iothread_get_g_main_context(mon_iothread);
+    assert(context);
+    qemu_chr_fe_set_handlers(&mon->common.chr, monitor_can_read,
+                             monitor_qmp_read, monitor_qmp_event,
+                             NULL, &mon->common, context, true);
+    monitor_list_append(&mon->common);
+}
+
+void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
+{
+    MonitorQMP *mon = g_new0(MonitorQMP, 1);
+
+    if (!qemu_chr_fe_init(&mon->common.chr, chr, errp)) {
+        g_free(mon);
+        return;
+    }
+    qemu_chr_fe_set_echo(&mon->common.chr, true);
+
+    /* Note: we run QMP monitor in I/O thread when @chr supports that */
+    monitor_data_init(&mon->common, true, false,
+                      qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT));
+
+    mon->pretty = pretty;
+
+    qemu_mutex_init(&mon->qmp_queue_lock);
+    mon->qmp_requests = g_queue_new();
+
+    json_message_parser_init(&mon->parser, handle_qmp_command, mon, NULL);
+    if (mon->common.use_io_thread) {
+        /*
+         * Make sure the old iowatch is gone.  It's possible when
+         * e.g. the chardev is in client mode, with wait=on.
+         */
+        remove_fd_in_watch(chr);
+        /*
+         * We can't call qemu_chr_fe_set_handlers() directly here
+         * since chardev might be running in the monitor I/O
+         * thread.  Schedule a bottom half.
+         */
+        aio_bh_schedule_oneshot(iothread_get_aio_context(mon_iothread),
+                                monitor_qmp_setup_handlers_bh, mon);
+        /* The bottom half will add @mon to @mon_list */
+    } else {
+        qemu_chr_fe_set_handlers(&mon->common.chr, monitor_can_read,
+                                 monitor_qmp_read, monitor_qmp_event,
+                                 NULL, &mon->common, NULL, true);
+        monitor_list_append(&mon->common);
+    }
+}
diff --git a/monitor/trace-events b/monitor/trace-events
new file mode 100644
index 000000000..0365ac4d9
--- /dev/null
+++ b/monitor/trace-events
@@ -0,0 +1,15 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# hmp.c
+handle_hmp_command(void *mon, const char *cmdline) "mon %p cmdline: %s"
+
+# monitor.c
+monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
+monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
+monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
+monitor_suspend(void *ptr, int cnt) "mon %p: %d"
+
+# qmp.c
+monitor_qmp_cmd_in_band(const char *id) "%s"
+monitor_qmp_cmd_out_of_band(const char *id) "%s"
+handle_qmp_command(void *mon, const char *req) "mon %p req: %s"
diff --git a/monitor/trace.h b/monitor/trace.h
new file mode 100644
index 000000000..f216e31be
--- /dev/null
+++ b/monitor/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-monitor.h"
diff --git a/nbd/client.c b/nbd/client.c
new file mode 100644
index 000000000..0c2db4bcb
--- /dev/null
+++ b/nbd/client.c
@@ -0,0 +1,1514 @@
+/*
+ *  Copyright (C) 2016-2019 Red Hat, Inc.
+ *  Copyright (C) 2005  Anthony Liguori 
+ *
+ *  Network Block Device Client Side
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/queue.h"
+#include "trace.h"
+#include "nbd-internal.h"
+#include "qemu/cutils.h"
+
+/* Definitions for opaque data types */
+
+static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+
+/* That's all folks */
+
+/* Basic flow for negotiation
+
+   Server         Client
+   Negotiate
+
+   or
+
+   Server         Client
+   Negotiate #1
+                  Option
+   Negotiate #2
+
+   ----
+
+   followed by
+
+   Server         Client
+                  Request
+   Response
+                  Request
+   Response
+                  ...
+   ...
+                  Request (type == 2)
+
+*/
+
+/* Send an option request.
+ *
+ * The request is for option @opt, with @data containing @len bytes of
+ * additional payload for the request (@len may be -1 to treat @data as
+ * a C string; and @data may be NULL if @len is 0).
+ * Return 0 if successful, -1 with errp set if it is impossible to
+ * continue. */
+static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
+                                   uint32_t len, const char *data,
+                                   Error **errp)
+{
+    ERRP_GUARD();
+    NBDOption req;
+    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
+
+    if (len == -1) {
+        req.length = len = strlen(data);
+    }
+    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
+
+    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
+    stl_be_p(&req.option, opt);
+    stl_be_p(&req.length, len);
+
+    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
+        error_prepend(errp, "Failed to send option request header: ");
+        return -1;
+    }
+
+    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
+        error_prepend(errp, "Failed to send option request data: ");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
+ * not going to attempt further negotiation. */
+static void nbd_send_opt_abort(QIOChannel *ioc)
+{
+    /* Technically, a compliant server is supposed to reply to us; but
+     * older servers disconnected instead. At any rate, we're allowed
+     * to disconnect without waiting for the server reply, so we don't
+     * even care if the request makes it to the server, let alone
+     * waiting around for whether the server replies. */
+    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
+}
+
+
+/* Receive the header of an option reply, which should match the given
+ * opt.  Read through the length field, but NOT the length bytes of
+ * payload. Return 0 if successful, -1 with errp set if it is
+ * impossible to continue. */
+static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
+                                    NBDOptionReply *reply, Error **errp)
+{
+    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
+    if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    reply->magic = be64_to_cpu(reply->magic);
+    reply->option = be32_to_cpu(reply->option);
+    reply->type = be32_to_cpu(reply->type);
+    reply->length = be32_to_cpu(reply->length);
+
+    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
+                                   reply->type, nbd_rep_lookup(reply->type),
+                                   reply->length);
+
+    if (reply->magic != NBD_REP_MAGIC) {
+        error_setg(errp, "Unexpected option reply magic");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    if (reply->option != opt) {
+        error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
+                   reply->option, nbd_opt_lookup(reply->option),
+                   opt, nbd_opt_lookup(opt));
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    return 0;
+}
+
+/*
+ * If reply represents success, return 1 without further action.  If
+ * reply represents an error, consume the optional payload of the
+ * packet on ioc.  Then return 0 for unsupported (so the client can
+ * fall back to other approaches), where @strict determines if only
+ * ERR_UNSUP or all errors fit that category, or -1 with errp set for
+ * other errors.
+ */
+static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
+                                bool strict, Error **errp)
+{
+    ERRP_GUARD();
+    g_autofree char *msg = NULL;
+
+    if (!(reply->type & (1 << 31))) {
+        return 1;
+    }
+
+    if (reply->length) {
+        if (reply->length > NBD_MAX_BUFFER_SIZE) {
+            error_setg(errp, "server error %" PRIu32
+                       " (%s) message is too long",
+                       reply->type, nbd_rep_lookup(reply->type));
+            goto err;
+        }
+        msg = g_malloc(reply->length + 1);
+        if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
+            error_prepend(errp, "Failed to read option error %" PRIu32
+                          " (%s) message: ",
+                          reply->type, nbd_rep_lookup(reply->type));
+            goto err;
+        }
+        msg[reply->length] = '\0';
+        trace_nbd_server_error_msg(reply->type,
+                                   nbd_reply_type_lookup(reply->type), msg);
+    }
+
+    if (reply->type == NBD_REP_ERR_UNSUP || !strict) {
+        trace_nbd_reply_err_ignored(reply->option,
+                                    nbd_opt_lookup(reply->option),
+                                    reply->type, nbd_rep_lookup(reply->type));
+        return 0;
+    }
+
+    switch (reply->type) {
+    case NBD_REP_ERR_POLICY:
+        error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
+                   reply->option, nbd_opt_lookup(reply->option));
+        break;
+
+    case NBD_REP_ERR_INVALID:
+        error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
+                   reply->option, nbd_opt_lookup(reply->option));
+        break;
+
+    case NBD_REP_ERR_PLATFORM:
+        error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
+                   reply->option, nbd_opt_lookup(reply->option));
+        break;
+
+    case NBD_REP_ERR_TLS_REQD:
+        error_setg(errp, "TLS negotiation required before option %" PRIu32
+                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
+        error_append_hint(errp, "Did you forget a valid tls-creds?\n");
+        break;
+
+    case NBD_REP_ERR_UNKNOWN:
+        error_setg(errp, "Requested export not available");
+        break;
+
+    case NBD_REP_ERR_SHUTDOWN:
+        error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
+                   reply->option, nbd_opt_lookup(reply->option));
+        break;
+
+    case NBD_REP_ERR_BLOCK_SIZE_REQD:
+        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
+                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
+        break;
+
+    default:
+        error_setg(errp, "Unknown error code when asking for option %" PRIu32
+                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
+        break;
+    }
+
+    if (msg) {
+        error_append_hint(errp, "server reported: %s\n", msg);
+    }
+
+ err:
+    nbd_send_opt_abort(ioc);
+    return -1;
+}
+
+/* nbd_receive_list:
+ * Process another portion of the NBD_OPT_LIST reply, populating any
+ * name received into *@name. If @description is non-NULL, and the
+ * server provided a description, that is also populated. The caller
+ * must eventually call g_free() on success.
+ * Returns 1 if name and description were set and iteration must continue,
+ *         0 if iteration is complete (including if OPT_LIST unsupported),
+ *         -1 with @errp set if an unrecoverable error occurred.
+ */
+static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
+                            Error **errp)
+{
+    NBDOptionReply reply;
+    uint32_t len;
+    uint32_t namelen;
+    g_autofree char *local_name = NULL;
+    g_autofree char *local_desc = NULL;
+    int error;
+
+    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
+        return -1;
+    }
+    error = nbd_handle_reply_err(ioc, &reply, true, errp);
+    if (error <= 0) {
+        return error;
+    }
+    len = reply.length;
+
+    if (reply.type == NBD_REP_ACK) {
+        if (len != 0) {
+            error_setg(errp, "length too long for option end");
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        return 0;
+    } else if (reply.type != NBD_REP_SERVER) {
+        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
+                   reply.type, nbd_rep_lookup(reply.type),
+                   NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
+        error_setg(errp, "incorrect option length %" PRIu32, len);
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    len -= sizeof(namelen);
+    if (len < namelen || namelen > NBD_MAX_STRING_SIZE) {
+        error_setg(errp, "incorrect name length in server's list response");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    local_name = g_malloc(namelen + 1);
+    if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    local_name[namelen] = '\0';
+    len -= namelen;
+    if (len) {
+        if (len > NBD_MAX_STRING_SIZE) {
+            error_setg(errp, "incorrect description length in server's "
+                       "list response");
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        local_desc = g_malloc(len + 1);
+        if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        local_desc[len] = '\0';
+    }
+
+    trace_nbd_receive_list(local_name, local_desc ?: "");
+    *name = g_steal_pointer(&local_name);
+    if (description) {
+        *description = g_steal_pointer(&local_desc);
+    }
+    return 1;
+}
+
+
+/*
+ * nbd_opt_info_or_go:
+ * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
+ * Returns -1 if the option proves the export @info->name cannot be
+ * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
+ * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
+ * go (with the rest of @info populated).
+ */
+static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
+                              NBDExportInfo *info, Error **errp)
+{
+    ERRP_GUARD();
+    NBDOptionReply reply;
+    uint32_t len = strlen(info->name);
+    uint16_t type;
+    int error;
+    char *buf;
+
+    /* The protocol requires that the server send NBD_INFO_EXPORT with
+     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
+     * flags still 0 is a witness of a broken server. */
+    info->flags = 0;
+
+    assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
+    trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
+    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
+    stl_be_p(buf, len);
+    memcpy(buf + 4, info->name, len);
+    /* At most one request, everything else up to server */
+    stw_be_p(buf + 4 + len, info->request_sizes);
+    if (info->request_sizes) {
+        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
+    }
+    error = nbd_send_option_request(ioc, opt,
+                                    4 + len + 2 + 2 * info->request_sizes,
+                                    buf, errp);
+    g_free(buf);
+    if (error < 0) {
+        return -1;
+    }
+
+    while (1) {
+        if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
+            return -1;
+        }
+        error = nbd_handle_reply_err(ioc, &reply, true, errp);
+        if (error <= 0) {
+            return error;
+        }
+        len = reply.length;
+
+        if (reply.type == NBD_REP_ACK) {
+            /*
+             * Server is done sending info, and moved into transmission
+             * phase for NBD_OPT_GO, but make sure it sent flags
+             */
+            if (len) {
+                error_setg(errp, "server sent invalid NBD_REP_ACK");
+                return -1;
+            }
+            if (!info->flags) {
+                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
+                return -1;
+            }
+            trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
+            return 1;
+        }
+        if (reply.type != NBD_REP_INFO) {
+            error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
+                       reply.type, nbd_rep_lookup(reply.type),
+                       NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        if (len < sizeof(type)) {
+            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
+                       len);
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        if (nbd_read16(ioc, &type, "info type", errp) < 0) {
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        len -= sizeof(type);
+        switch (type) {
+        case NBD_INFO_EXPORT:
+            if (len != sizeof(info->size) + sizeof(info->flags)) {
+                error_setg(errp, "remaining export info len %" PRIu32
+                           " is unexpected size", len);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (info->min_block &&
+                !QEMU_IS_ALIGNED(info->size, info->min_block)) {
+                error_setg(errp, "export size %" PRIu64 " is not multiple of "
+                           "minimum block size %" PRIu32, info->size,
+                           info->min_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
+            break;
+
+        case NBD_INFO_BLOCK_SIZE:
+            if (len != sizeof(info->min_block) * 3) {
+                error_setg(errp, "remaining export info len %" PRIu32
+                           " is unexpected size", len);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read32(ioc, &info->min_block, "info minimum block size",
+                           errp) < 0) {
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (!is_power_of_2(info->min_block)) {
+                error_setg(errp, "server minimum block size %" PRIu32
+                           " is not a power of two", info->min_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
+                           errp) < 0)
+            {
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (!is_power_of_2(info->opt_block) ||
+                info->opt_block < info->min_block) {
+                error_setg(errp, "server preferred block size %" PRIu32
+                           " is not valid", info->opt_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read32(ioc, &info->max_block, "info maximum block size",
+                           errp) < 0)
+            {
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (info->max_block < info->min_block) {
+                error_setg(errp, "server maximum block size %" PRIu32
+                           " is not valid", info->max_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
+                                          info->max_block);
+            break;
+
+        default:
+            /*
+             * Not worth the bother to check if NBD_INFO_NAME or
+             * NBD_INFO_DESCRIPTION exceed NBD_MAX_STRING_SIZE.
+             */
+            trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
+            if (nbd_drop(ioc, len, errp) < 0) {
+                error_prepend(errp, "Failed to read info payload: ");
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            break;
+        }
+    }
+}
+
+/* Return -1 on failure, 0 if wantname is an available export. */
+static int nbd_receive_query_exports(QIOChannel *ioc,
+                                     const char *wantname,
+                                     Error **errp)
+{
+    bool list_empty = true;
+    bool found_export = false;
+
+    trace_nbd_receive_query_exports_start(wantname);
+    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
+        return -1;
+    }
+
+    while (1) {
+        char *name;
+        int ret = nbd_receive_list(ioc, &name, NULL, errp);
+
+        if (ret < 0) {
+            /* Server gave unexpected reply */
+            return -1;
+        } else if (ret == 0) {
+            /* Done iterating. */
+            if (list_empty) {
+                /*
+                 * We don't have enough context to tell a server that
+                 * sent an empty list apart from a server that does
+                 * not support the list command; but as this function
+                 * is just used to trigger a nicer error message
+                 * before trying NBD_OPT_EXPORT_NAME, assume the
+                 * export is available.
+                 */
+                return 0;
+            } else if (!found_export) {
+                error_setg(errp, "No export with name '%s' available",
+                           wantname);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            trace_nbd_receive_query_exports_success(wantname);
+            return 0;
+        }
+        list_empty = false;
+        if (!strcmp(name, wantname)) {
+            found_export = true;
+        }
+        g_free(name);
+    }
+}
+
+/*
+ * nbd_request_simple_option: Send an option request, and parse the reply.
+ * @strict controls whether ERR_UNSUP or all errors produce 0 status.
+ * return 1 for successful negotiation,
+ *        0 if operation is unsupported,
+ *        -1 with errp set for any other error
+ */
+static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
+                                     Error **errp)
+{
+    NBDOptionReply reply;
+    int error;
+
+    if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
+        return -1;
+    }
+
+    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
+        return -1;
+    }
+    error = nbd_handle_reply_err(ioc, &reply, strict, errp);
+    if (error <= 0) {
+        return error;
+    }
+
+    if (reply.type != NBD_REP_ACK) {
+        error_setg(errp, "Server answered option %d (%s) with unexpected "
+                   "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
+                   reply.type, nbd_rep_lookup(reply.type));
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    if (reply.length != 0) {
+        error_setg(errp, "Option %d ('%s') response length is %" PRIu32
+                   " (it should be zero)", opt, nbd_opt_lookup(opt),
+                   reply.length);
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    return 1;
+}
+
+static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
+                                        QCryptoTLSCreds *tlscreds,
+                                        const char *hostname, Error **errp)
+{
+    int ret;
+    QIOChannelTLS *tioc;
+    struct NBDTLSHandshakeData data = { 0 };
+
+    ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
+    if (ret <= 0) {
+        if (ret == 0) {
+            error_setg(errp, "Server don't support STARTTLS option");
+            nbd_send_opt_abort(ioc);
+        }
+        return NULL;
+    }
+
+    trace_nbd_receive_starttls_new_client();
+    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
+    if (!tioc) {
+        return NULL;
+    }
+    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
+    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
+    trace_nbd_receive_starttls_tls_handshake();
+    qio_channel_tls_handshake(tioc,
+                              nbd_tls_handshake,
+                              &data,
+                              NULL,
+                              NULL);
+
+    if (!data.complete) {
+        g_main_loop_run(data.loop);
+    }
+    g_main_loop_unref(data.loop);
+    if (data.error) {
+        error_propagate(errp, data.error);
+        object_unref(OBJECT(tioc));
+        return NULL;
+    }
+
+    return QIO_CHANNEL(tioc);
+}
+
+/*
+ * nbd_send_meta_query:
+ * Send 0 or 1 set/list meta context queries.
+ * Return 0 on success, -1 with errp set for any error
+ */
+static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
+                               const char *export, const char *query,
+                               Error **errp)
+{
+    int ret;
+    uint32_t export_len = strlen(export);
+    uint32_t queries = !!query;
+    uint32_t query_len = 0;
+    uint32_t data_len;
+    char *data;
+    char *p;
+
+    data_len = sizeof(export_len) + export_len + sizeof(queries);
+    assert(export_len <= NBD_MAX_STRING_SIZE);
+    if (query) {
+        query_len = strlen(query);
+        data_len += sizeof(query_len) + query_len;
+        assert(query_len <= NBD_MAX_STRING_SIZE);
+    } else {
+        assert(opt == NBD_OPT_LIST_META_CONTEXT);
+    }
+    p = data = g_malloc(data_len);
+
+    trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
+    stl_be_p(p, export_len);
+    memcpy(p += sizeof(export_len), export, export_len);
+    stl_be_p(p += export_len, queries);
+    if (query) {
+        stl_be_p(p += sizeof(queries), query_len);
+        memcpy(p += sizeof(query_len), query, query_len);
+    }
+
+    ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
+    g_free(data);
+    return ret;
+}
+
+/*
+ * nbd_receive_one_meta_context:
+ * Called in a loop to receive and trace one set/list meta context reply.
+ * Pass non-NULL @name or @id to collect results back to the caller, which
+ * must eventually call g_free().
+ * return 1 if name is set and iteration must continue,
+ *        0 if iteration is complete (including if option is unsupported),
+ *        -1 with errp set for any error
+ */
+static int nbd_receive_one_meta_context(QIOChannel *ioc,
+                                        uint32_t opt,
+                                        char **name,
+                                        uint32_t *id,
+                                        Error **errp)
+{
+    int ret;
+    NBDOptionReply reply;
+    char *local_name = NULL;
+    uint32_t local_id;
+
+    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
+        return -1;
+    }
+
+    ret = nbd_handle_reply_err(ioc, &reply, false, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    if (reply.type == NBD_REP_ACK) {
+        if (reply.length != 0) {
+            error_setg(errp, "Unexpected length to ACK response");
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        return 0;
+    } else if (reply.type != NBD_REP_META_CONTEXT) {
+        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
+                   reply.type, nbd_rep_lookup(reply.type),
+                   NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    if (reply.length <= sizeof(local_id) ||
+        reply.length > NBD_MAX_BUFFER_SIZE) {
+        error_setg(errp, "Failed to negotiate meta context, server "
+                   "answered with unexpected length %" PRIu32,
+                   reply.length);
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
+        return -1;
+    }
+
+    reply.length -= sizeof(local_id);
+    local_name = g_malloc(reply.length + 1);
+    if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
+        g_free(local_name);
+        return -1;
+    }
+    local_name[reply.length] = '\0';
+    trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
+
+    if (name) {
+        *name = local_name;
+    } else {
+        g_free(local_name);
+    }
+    if (id) {
+        *id = local_id;
+    }
+    return 1;
+}
+
+/*
+ * nbd_negotiate_simple_meta_context:
+ * Request the server to set the meta context for export @info->name
+ * using @info->x_dirty_bitmap with a fallback to "base:allocation",
+ * setting @info->context_id to the resulting id. Fail if the server
+ * responds with more than one context or with a context different
+ * than the query.
+ * return 1 for successful negotiation,
+ *        0 if operation is unsupported,
+ *        -1 with errp set for any other error
+ */
+static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
+                                             NBDExportInfo *info,
+                                             Error **errp)
+{
+    /*
+     * TODO: Removing the x_dirty_bitmap hack will mean refactoring
+     * this function to request and store ids for multiple contexts
+     * (both base:allocation and a dirty bitmap), at which point this
+     * function should lose the term _simple.
+     */
+    int ret;
+    const char *context = info->x_dirty_bitmap ?: "base:allocation";
+    bool received = false;
+    char *name = NULL;
+
+    if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
+                            info->name, context, errp) < 0) {
+        return -1;
+    }
+
+    ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
+                                       &name, &info->context_id, errp);
+    if (ret < 0) {
+        return -1;
+    }
+    if (ret == 1) {
+        if (strcmp(context, name)) {
+            error_setg(errp, "Failed to negotiate meta context '%s', server "
+                       "answered with different context '%s'", context,
+                       name);
+            g_free(name);
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        g_free(name);
+        received = true;
+
+        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
+                                           NULL, NULL, errp);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+    if (ret != 0) {
+        error_setg(errp, "Server answered with more than one context");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    return received;
+}
+
+/*
+ * nbd_list_meta_contexts:
+ * Request the server to list all meta contexts for export @info->name.
+ * return 0 if list is complete (even if empty),
+ *        -1 with errp set for any error
+ */
+static int nbd_list_meta_contexts(QIOChannel *ioc,
+                                  NBDExportInfo *info,
+                                  Error **errp)
+{
+    int ret;
+    int seen_any = false;
+    int seen_qemu = false;
+
+    if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
+                            info->name, NULL, errp) < 0) {
+        return -1;
+    }
+
+    while (1) {
+        char *context;
+
+        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
+                                           &context, NULL, errp);
+        if (ret == 0 && seen_any && !seen_qemu) {
+            /*
+             * Work around qemu 3.0 bug: the server forgot to send
+             * "qemu:" replies to 0 queries. If we saw at least one
+             * reply (probably base:allocation), but none of them were
+             * qemu:, then run a more specific query to make sure.
+             */
+            seen_qemu = true;
+            if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
+                                    info->name, "qemu:", errp) < 0) {
+                return -1;
+            }
+            continue;
+        }
+        if (ret <= 0) {
+            return ret;
+        }
+        seen_any = true;
+        seen_qemu |= strstart(context, "qemu:", NULL);
+        info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
+        info->contexts[info->n_contexts - 1] = context;
+    }
+}
+
+/*
+ * nbd_start_negotiate:
+ * Start the handshake to the server.  After a positive return, the server
+ * is ready to accept additional NBD_OPT requests.
+ * Returns: negative errno: failure talking to server
+ *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
+ *          1: server is newstyle, but can only accept EXPORT_NAME
+ *          2: server is newstyle, but lacks structured replies
+ *          3: server is newstyle and set up for structured replies
+ */
+static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
+                               QCryptoTLSCreds *tlscreds,
+                               const char *hostname, QIOChannel **outioc,
+                               bool structured_reply, bool *zeroes,
+                               Error **errp)
+{
+    ERRP_GUARD();
+    uint64_t magic;
+
+    trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "");
+
+    if (zeroes) {
+        *zeroes = true;
+    }
+    if (outioc) {
+        *outioc = NULL;
+    }
+    if (tlscreds && !outioc) {
+        error_setg(errp, "Output I/O channel required for TLS");
+        return -EINVAL;
+    }
+
+    if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
+        return -EINVAL;
+    }
+    trace_nbd_receive_negotiate_magic(magic);
+
+    if (magic != NBD_INIT_MAGIC) {
+        error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
+        return -EINVAL;
+    }
+
+    if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
+        return -EINVAL;
+    }
+    trace_nbd_receive_negotiate_magic(magic);
+
+    if (magic == NBD_OPTS_MAGIC) {
+        uint32_t clientflags = 0;
+        uint16_t globalflags;
+        bool fixedNewStyle = false;
+
+        if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
+            return -EINVAL;
+        }
+        trace_nbd_receive_negotiate_server_flags(globalflags);
+        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
+            fixedNewStyle = true;
+            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
+        }
+        if (globalflags & NBD_FLAG_NO_ZEROES) {
+            if (zeroes) {
+                *zeroes = false;
+            }
+            clientflags |= NBD_FLAG_C_NO_ZEROES;
+        }
+        /* client requested flags */
+        clientflags = cpu_to_be32(clientflags);
+        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
+            error_prepend(errp, "Failed to send clientflags field: ");
+            return -EINVAL;
+        }
+        if (tlscreds) {
+            if (fixedNewStyle) {
+                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
+                if (!*outioc) {
+                    return -EINVAL;
+                }
+                ioc = *outioc;
+                if (aio_context) {
+                    qio_channel_set_blocking(ioc, false, NULL);
+                    qio_channel_attach_aio_context(ioc, aio_context);
+                }
+            } else {
+                error_setg(errp, "Server does not support STARTTLS");
+                return -EINVAL;
+            }
+        }
+        if (fixedNewStyle) {
+            int result = 0;
+
+            if (structured_reply) {
+                result = nbd_request_simple_option(ioc,
+                                                   NBD_OPT_STRUCTURED_REPLY,
+                                                   false, errp);
+                if (result < 0) {
+                    return -EINVAL;
+                }
+            }
+            return 2 + result;
+        } else {
+            return 1;
+        }
+    } else if (magic == NBD_CLIENT_MAGIC) {
+        if (tlscreds) {
+            error_setg(errp, "Server does not support STARTTLS");
+            return -EINVAL;
+        }
+        return 0;
+    } else {
+        error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
+        return -EINVAL;
+    }
+}
+
+/*
+ * nbd_negotiate_finish_oldstyle:
+ * Populate @info with the size and export flags from an oldstyle server,
+ * but does not consume 124 bytes of reserved zero padding.
+ * Returns 0 on success, -1 with @errp set on failure
+ */
+static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
+                                         Error **errp)
+{
+    uint32_t oldflags;
+
+    if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
+        return -EINVAL;
+    }
+
+    if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
+        return -EINVAL;
+    }
+    if (oldflags & ~0xffff) {
+        error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
+        return -EINVAL;
+    }
+    info->flags = oldflags;
+    return 0;
+}
+
+/*
+ * nbd_receive_negotiate:
+ * Connect to server, complete negotiation, and move into transmission phase.
+ * Returns: negative errno: failure talking to server
+ *          0: server is connected
+ */
+int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
+                          QCryptoTLSCreds *tlscreds,
+                          const char *hostname, QIOChannel **outioc,
+                          NBDExportInfo *info, Error **errp)
+{
+    ERRP_GUARD();
+    int result;
+    bool zeroes;
+    bool base_allocation = info->base_allocation;
+
+    assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
+    trace_nbd_receive_negotiate_name(info->name);
+
+    result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
+                                 info->structured_reply, &zeroes, errp);
+
+    info->structured_reply = false;
+    info->base_allocation = false;
+    if (tlscreds && *outioc) {
+        ioc = *outioc;
+    }
+
+    switch (result) {
+    case 3: /* newstyle, with structured replies */
+        info->structured_reply = true;
+        if (base_allocation) {
+            result = nbd_negotiate_simple_meta_context(ioc, info, errp);
+            if (result < 0) {
+                return -EINVAL;
+            }
+            info->base_allocation = result == 1;
+        }
+        /* fall through */
+    case 2: /* newstyle, try OPT_GO */
+        /* Try NBD_OPT_GO first - if it works, we are done (it
+         * also gives us a good message if the server requires
+         * TLS).  If it is not available, fall back to
+         * NBD_OPT_LIST for nicer error messages about a missing
+         * export, then use NBD_OPT_EXPORT_NAME.  */
+        result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
+        if (result < 0) {
+            return -EINVAL;
+        }
+        if (result > 0) {
+            return 0;
+        }
+        /* Check our desired export is present in the
+         * server export list. Since NBD_OPT_EXPORT_NAME
+         * cannot return an error message, running this
+         * query gives us better error reporting if the
+         * export name is not available.
+         */
+        if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
+            return -EINVAL;
+        }
+        /* fall through */
+    case 1: /* newstyle, but limited to EXPORT_NAME */
+        /* write the export name request */
+        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
+                                    errp) < 0) {
+            return -EINVAL;
+        }
+
+        /* Read the response */
+        if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
+            return -EINVAL;
+        }
+
+        if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
+            return -EINVAL;
+        }
+        break;
+    case 0: /* oldstyle, parse length and flags */
+        if (*info->name) {
+            error_setg(errp, "Server does not support non-empty export names");
+            return -EINVAL;
+        }
+        if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
+            return -EINVAL;
+        }
+        break;
+    default:
+        return result;
+    }
+
+    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
+    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
+        error_prepend(errp, "Failed to read reserved block: ");
+        return -EINVAL;
+    }
+    return 0;
+}
+
+/* Clean up result of nbd_receive_export_list */
+void nbd_free_export_list(NBDExportInfo *info, int count)
+{
+    int i, j;
+
+    if (!info) {
+        return;
+    }
+
+    for (i = 0; i < count; i++) {
+        g_free(info[i].name);
+        g_free(info[i].description);
+        for (j = 0; j < info[i].n_contexts; j++) {
+            g_free(info[i].contexts[j]);
+        }
+        g_free(info[i].contexts);
+    }
+    g_free(info);
+}
+
+/*
+ * nbd_receive_export_list:
+ * Query details about a server's exports, then disconnect without
+ * going into transmission phase. Return a count of the exports listed
+ * in @info by the server, or -1 on error. Caller must free @info using
+ * nbd_free_export_list().
+ */
+int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
+                            const char *hostname, NBDExportInfo **info,
+                            Error **errp)
+{
+    int result;
+    int count = 0;
+    int i;
+    int rc;
+    int ret = -1;
+    NBDExportInfo *array = NULL;
+    QIOChannel *sioc = NULL;
+
+    *info = NULL;
+    result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
+                                 NULL, errp);
+    if (tlscreds && sioc) {
+        ioc = sioc;
+    }
+
+    switch (result) {
+    case 2:
+    case 3:
+        /* newstyle - use NBD_OPT_LIST to populate array, then try
+         * NBD_OPT_INFO on each array member. If structured replies
+         * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
+        if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
+            goto out;
+        }
+        while (1) {
+            char *name;
+            char *desc;
+
+            rc = nbd_receive_list(ioc, &name, &desc, errp);
+            if (rc < 0) {
+                goto out;
+            } else if (rc == 0) {
+                break;
+            }
+            array = g_renew(NBDExportInfo, array, ++count);
+            memset(&array[count - 1], 0, sizeof(*array));
+            array[count - 1].name = name;
+            array[count - 1].description = desc;
+            array[count - 1].structured_reply = result == 3;
+        }
+
+        for (i = 0; i < count; i++) {
+            array[i].request_sizes = true;
+            rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
+            if (rc < 0) {
+                goto out;
+            } else if (rc == 0) {
+                /*
+                 * Pointless to try rest of loop. If OPT_INFO doesn't work,
+                 * it's unlikely that meta contexts work either
+                 */
+                break;
+            }
+
+            if (result == 3 &&
+                nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
+                goto out;
+            }
+        }
+
+        /* Send NBD_OPT_ABORT as a courtesy before hanging up */
+        nbd_send_opt_abort(ioc);
+        break;
+    case 1: /* newstyle, but limited to EXPORT_NAME */
+        error_setg(errp, "Server does not support export lists");
+        /* We can't even send NBD_OPT_ABORT, so merely hang up */
+        goto out;
+    case 0: /* oldstyle, parse length and flags */
+        array = g_new0(NBDExportInfo, 1);
+        array->name = g_strdup("");
+        count = 1;
+
+        if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
+            goto out;
+        }
+
+        /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
+         * errors now that we have the information we wanted. */
+        if (nbd_drop(ioc, 124, NULL) == 0) {
+            NBDRequest request = { .type = NBD_CMD_DISC };
+
+            nbd_send_request(ioc, &request);
+        }
+        break;
+    default:
+        goto out;
+    }
+
+    *info = array;
+    array = NULL;
+    ret = count;
+
+ out:
+    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    qio_channel_close(ioc, NULL);
+    object_unref(OBJECT(sioc));
+    nbd_free_export_list(array, count);
+    return ret;
+}
+
+#ifdef __linux__
+int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
+             Error **errp)
+{
+    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
+    unsigned long sectors = info->size / sector_size;
+
+    /* FIXME: Once the kernel module is patched to honor block sizes,
+     * and to advertise that fact to user space, we should update the
+     * hand-off to the kernel to use any block sizes we learned. */
+    assert(!info->request_sizes);
+    if (info->size / sector_size != sectors) {
+        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
+                   info->size);
+        return -E2BIG;
+    }
+
+    trace_nbd_init_set_socket();
+
+    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
+        int serrno = errno;
+        error_setg(errp, "Failed to set NBD socket");
+        return -serrno;
+    }
+
+    trace_nbd_init_set_block_size(sector_size);
+
+    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
+        int serrno = errno;
+        error_setg(errp, "Failed setting NBD block size");
+        return -serrno;
+    }
+
+    trace_nbd_init_set_size(sectors);
+    if (info->size % sector_size) {
+        trace_nbd_init_trailing_bytes(info->size % sector_size);
+    }
+
+    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
+        int serrno = errno;
+        error_setg(errp, "Failed setting size (in blocks)");
+        return -serrno;
+    }
+
+    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
+        if (errno == ENOTTY) {
+            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
+            trace_nbd_init_set_readonly();
+
+            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
+                int serrno = errno;
+                error_setg(errp, "Failed setting read-only attribute");
+                return -serrno;
+            }
+        } else {
+            int serrno = errno;
+            error_setg(errp, "Failed setting flags");
+            return -serrno;
+        }
+    }
+
+    trace_nbd_init_finish();
+
+    return 0;
+}
+
+int nbd_client(int fd)
+{
+    int ret;
+    int serrno;
+
+    trace_nbd_client_loop();
+
+    ret = ioctl(fd, NBD_DO_IT);
+    if (ret < 0 && errno == EPIPE) {
+        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
+         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
+         * that case.
+         */
+        ret = 0;
+    }
+    serrno = errno;
+
+    trace_nbd_client_loop_ret(ret, strerror(serrno));
+
+    trace_nbd_client_clear_queue();
+    ioctl(fd, NBD_CLEAR_QUE);
+
+    trace_nbd_client_clear_socket();
+    ioctl(fd, NBD_CLEAR_SOCK);
+
+    errno = serrno;
+    return ret;
+}
+
+int nbd_disconnect(int fd)
+{
+    ioctl(fd, NBD_CLEAR_QUE);
+    ioctl(fd, NBD_DISCONNECT);
+    ioctl(fd, NBD_CLEAR_SOCK);
+    return 0;
+}
+
+#endif /* __linux__ */
+
+int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
+{
+    uint8_t buf[NBD_REQUEST_SIZE];
+
+    trace_nbd_send_request(request->from, request->len, request->handle,
+                           request->flags, request->type,
+                           nbd_cmd_lookup(request->type));
+
+    stl_be_p(buf, NBD_REQUEST_MAGIC);
+    stw_be_p(buf + 4, request->flags);
+    stw_be_p(buf + 6, request->type);
+    stq_be_p(buf + 8, request->handle);
+    stq_be_p(buf + 16, request->from);
+    stl_be_p(buf + 24, request->len);
+
+    return nbd_write(ioc, buf, sizeof(buf), NULL);
+}
+
+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read).
+ * Payload is not read (payload is possible for CMD_READ, but here we even
+ * don't know whether it take place or not).
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
+                                    Error **errp)
+{
+    int ret;
+
+    assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
+
+    ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
+                   sizeof(*reply) - sizeof(reply->magic), "reply", errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    reply->error = be32_to_cpu(reply->error);
+    reply->handle = be64_to_cpu(reply->handle);
+
+    return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already
+ * read).
+ * Payload is not read.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
+                                              NBDStructuredReplyChunk *chunk,
+                                              Error **errp)
+{
+    int ret;
+
+    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+
+    ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
+                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
+                   errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    chunk->flags = be16_to_cpu(chunk->flags);
+    chunk->type = be16_to_cpu(chunk->type);
+    chunk->handle = be64_to_cpu(chunk->handle);
+    chunk->length = be32_to_cpu(chunk->length);
+
+    return 0;
+}
+
+/* nbd_read_eof
+ * Tries to read @size bytes from @ioc.
+ * Returns 1 on success
+ *         0 on eof, when no data was read (errp is not set)
+ *         negative errno on failure (errp is set)
+ */
+static inline int coroutine_fn
+nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
+             Error **errp)
+{
+    bool partial = false;
+
+    assert(size);
+    while (size > 0) {
+        struct iovec iov = { .iov_base = buffer, .iov_len = size };
+        ssize_t len;
+
+        len = qio_channel_readv(ioc, &iov, 1, errp);
+        if (len == QIO_CHANNEL_ERR_BLOCK) {
+            bdrv_dec_in_flight(bs);
+            qio_channel_yield(ioc, G_IO_IN);
+            bdrv_inc_in_flight(bs);
+            continue;
+        } else if (len < 0) {
+            return -EIO;
+        } else if (len == 0) {
+            if (partial) {
+                error_setg(errp,
+                           "Unexpected end-of-file before all bytes were read");
+                return -EIO;
+            } else {
+                return 0;
+            }
+        }
+
+        partial = true;
+        size -= len;
+        buffer = (uint8_t*) buffer + len;
+    }
+    return 1;
+}
+
+/* nbd_receive_reply
+ *
+ * Decreases bs->in_flight while waiting for a new reply. This yield is where
+ * we wait indefinitely and the coroutine must be able to be safely reentered
+ * for nbd_client_attach_aio_context().
+ *
+ * Returns 1 on success
+ *         0 on eof, when no data was read (errp is not set)
+ *         negative errno on failure (errp is set)
+ */
+int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
+                                   NBDReply *reply, Error **errp)
+{
+    int ret;
+    const char *type;
+
+    ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    reply->magic = be32_to_cpu(reply->magic);
+
+    switch (reply->magic) {
+    case NBD_SIMPLE_REPLY_MAGIC:
+        ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
+        if (ret < 0) {
+            break;
+        }
+        trace_nbd_receive_simple_reply(reply->simple.error,
+                                       nbd_err_lookup(reply->simple.error),
+                                       reply->handle);
+        break;
+    case NBD_STRUCTURED_REPLY_MAGIC:
+        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
+        if (ret < 0) {
+            break;
+        }
+        type = nbd_reply_type_lookup(reply->structured.type);
+        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
+                                                 reply->structured.type, type,
+                                                 reply->structured.handle,
+                                                 reply->structured.length);
+        break;
+    default:
+        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
+        return -EINVAL;
+    }
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 1;
+}
+
diff --git a/nbd/common.c b/nbd/common.c
new file mode 100644
index 000000000..ddfe7d118
--- /dev/null
+++ b/nbd/common.c
@@ -0,0 +1,250 @@
+/*
+ *  Copyright (C) 2005  Anthony Liguori 
+ *
+ *  Network Block Device Common Code
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "nbd-internal.h"
+
+/* Discard length bytes from channel.  Return -errno on failure and 0 on
+ * success */
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
+{
+    ssize_t ret = 0;
+    char small[1024];
+    char *buffer;
+
+    buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
+    while (size > 0) {
+        ssize_t count = MIN(65536, size);
+        ret = nbd_read(ioc, buffer, MIN(65536, size), NULL, errp);
+
+        if (ret < 0) {
+            goto cleanup;
+        }
+        size -= count;
+    }
+
+ cleanup:
+    if (buffer != small) {
+        g_free(buffer);
+    }
+    return ret;
+}
+
+
+void nbd_tls_handshake(QIOTask *task,
+                       void *opaque)
+{
+    struct NBDTLSHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    g_main_loop_quit(data->loop);
+}
+
+
+const char *nbd_opt_lookup(uint32_t opt)
+{
+    switch (opt) {
+    case NBD_OPT_EXPORT_NAME:
+        return "export name";
+    case NBD_OPT_ABORT:
+        return "abort";
+    case NBD_OPT_LIST:
+        return "list";
+    case NBD_OPT_STARTTLS:
+        return "starttls";
+    case NBD_OPT_INFO:
+        return "info";
+    case NBD_OPT_GO:
+        return "go";
+    case NBD_OPT_STRUCTURED_REPLY:
+        return "structured reply";
+    case NBD_OPT_LIST_META_CONTEXT:
+        return "list meta context";
+    case NBD_OPT_SET_META_CONTEXT:
+        return "set meta context";
+    default:
+        return "";
+    }
+}
+
+
+const char *nbd_rep_lookup(uint32_t rep)
+{
+    switch (rep) {
+    case NBD_REP_ACK:
+        return "ack";
+    case NBD_REP_SERVER:
+        return "server";
+    case NBD_REP_INFO:
+        return "info";
+    case NBD_REP_META_CONTEXT:
+        return "meta context";
+    case NBD_REP_ERR_UNSUP:
+        return "unsupported";
+    case NBD_REP_ERR_POLICY:
+        return "denied by policy";
+    case NBD_REP_ERR_INVALID:
+        return "invalid";
+    case NBD_REP_ERR_PLATFORM:
+        return "platform lacks support";
+    case NBD_REP_ERR_TLS_REQD:
+        return "TLS required";
+    case NBD_REP_ERR_UNKNOWN:
+        return "export unknown";
+    case NBD_REP_ERR_SHUTDOWN:
+        return "server shutting down";
+    case NBD_REP_ERR_BLOCK_SIZE_REQD:
+        return "block size required";
+    default:
+        return "";
+    }
+}
+
+
+const char *nbd_info_lookup(uint16_t info)
+{
+    switch (info) {
+    case NBD_INFO_EXPORT:
+        return "export";
+    case NBD_INFO_NAME:
+        return "name";
+    case NBD_INFO_DESCRIPTION:
+        return "description";
+    case NBD_INFO_BLOCK_SIZE:
+        return "block size";
+    default:
+        return "";
+    }
+}
+
+
+const char *nbd_cmd_lookup(uint16_t cmd)
+{
+    switch (cmd) {
+    case NBD_CMD_READ:
+        return "read";
+    case NBD_CMD_WRITE:
+        return "write";
+    case NBD_CMD_DISC:
+        return "disconnect";
+    case NBD_CMD_FLUSH:
+        return "flush";
+    case NBD_CMD_TRIM:
+        return "trim";
+    case NBD_CMD_CACHE:
+        return "cache";
+    case NBD_CMD_WRITE_ZEROES:
+        return "write zeroes";
+    case NBD_CMD_BLOCK_STATUS:
+        return "block status";
+    default:
+        return "";
+    }
+}
+
+
+const char *nbd_reply_type_lookup(uint16_t type)
+{
+    switch (type) {
+    case NBD_REPLY_TYPE_NONE:
+        return "none";
+    case NBD_REPLY_TYPE_OFFSET_DATA:
+        return "data";
+    case NBD_REPLY_TYPE_OFFSET_HOLE:
+        return "hole";
+    case NBD_REPLY_TYPE_BLOCK_STATUS:
+        return "block status";
+    case NBD_REPLY_TYPE_ERROR:
+        return "generic error";
+    case NBD_REPLY_TYPE_ERROR_OFFSET:
+        return "error at offset";
+    default:
+        if (type & (1 << 15)) {
+            return "";
+        }
+        return "";
+    }
+}
+
+
+const char *nbd_err_lookup(int err)
+{
+    switch (err) {
+    case NBD_SUCCESS:
+        return "success";
+    case NBD_EPERM:
+        return "EPERM";
+    case NBD_EIO:
+        return "EIO";
+    case NBD_ENOMEM:
+        return "ENOMEM";
+    case NBD_EINVAL:
+        return "EINVAL";
+    case NBD_ENOSPC:
+        return "ENOSPC";
+    case NBD_EOVERFLOW:
+        return "EOVERFLOW";
+    case NBD_ENOTSUP:
+        return "ENOTSUP";
+    case NBD_ESHUTDOWN:
+        return "ESHUTDOWN";
+    default:
+        return "";
+    }
+}
+
+
+int nbd_errno_to_system_errno(int err)
+{
+    int ret;
+    switch (err) {
+    case NBD_SUCCESS:
+        ret = 0;
+        break;
+    case NBD_EPERM:
+        ret = EPERM;
+        break;
+    case NBD_EIO:
+        ret = EIO;
+        break;
+    case NBD_ENOMEM:
+        ret = ENOMEM;
+        break;
+    case NBD_ENOSPC:
+        ret = ENOSPC;
+        break;
+    case NBD_EOVERFLOW:
+        ret = EOVERFLOW;
+        break;
+    case NBD_ENOTSUP:
+        ret = ENOTSUP;
+        break;
+    case NBD_ESHUTDOWN:
+        ret = ESHUTDOWN;
+        break;
+    default:
+        trace_nbd_unknown_error(err);
+        /* fallthrough */
+    case NBD_EINVAL:
+        ret = EINVAL;
+        break;
+    }
+    return ret;
+}
diff --git a/nbd/meson.build b/nbd/meson.build
new file mode 100644
index 000000000..2baaa3694
--- /dev/null
+++ b/nbd/meson.build
@@ -0,0 +1,7 @@
+block_ss.add(files(
+  'client.c',
+  'common.c',
+))
+blockdev_ss.add(files(
+  'server.c',
+))
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
new file mode 100644
index 000000000..1b2141ab4
--- /dev/null
+++ b/nbd/nbd-internal.h
@@ -0,0 +1,86 @@
+/*
+ * NBD Internal Declarations
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef NBD_INTERNAL_H
+#define NBD_INTERNAL_H
+#include "block/nbd.h"
+#include "sysemu/block-backend.h"
+#include "io/channel-tls.h"
+
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+
+#ifndef _WIN32
+#include 
+#endif
+#ifdef HAVE_SYS_IOCCOM_H
+#include 
+#endif
+
+#ifdef __linux__
+#include 
+#endif
+
+#include "qemu/bswap.h"
+
+/* This is all part of the "official" NBD API.
+ *
+ * The most up-to-date documentation is available at:
+ * https://github.com/yoe/nbd/blob/master/doc/proto.md
+ */
+
+/* Size of all NBD_OPT_*, without payload */
+#define NBD_REQUEST_SIZE            (4 + 2 + 2 + 8 + 8 + 4)
+/* Size of all NBD_REP_* sent in answer to most NBD_OPT_*, without payload */
+#define NBD_REPLY_SIZE              (4 + 4 + 8)
+/* Size of reply to NBD_OPT_EXPORT_NAME */
+#define NBD_REPLY_EXPORT_NAME_SIZE  (8 + 2 + 124)
+/* Size of oldstyle negotiation */
+#define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)
+
+#define NBD_INIT_MAGIC              0x4e42444d41474943LL /* ASCII "NBDMAGIC" */
+#define NBD_REQUEST_MAGIC           0x25609513
+#define NBD_OPTS_MAGIC              0x49484156454F5054LL /* ASCII "IHAVEOPT" */
+#define NBD_CLIENT_MAGIC            0x0000420281861253LL
+#define NBD_REP_MAGIC               0x0003e889045565a9LL
+
+#define NBD_SET_SOCK                _IO(0xab, 0)
+#define NBD_SET_BLKSIZE             _IO(0xab, 1)
+#define NBD_SET_SIZE                _IO(0xab, 2)
+#define NBD_DO_IT                   _IO(0xab, 3)
+#define NBD_CLEAR_SOCK              _IO(0xab, 4)
+#define NBD_CLEAR_QUE               _IO(0xab, 5)
+#define NBD_PRINT_DEBUG             _IO(0xab, 6)
+#define NBD_SET_SIZE_BLOCKS         _IO(0xab, 7)
+#define NBD_DISCONNECT              _IO(0xab, 8)
+#define NBD_SET_TIMEOUT             _IO(0xab, 9)
+#define NBD_SET_FLAGS               _IO(0xab, 10)
+
+/* nbd_write
+ * Writes @size bytes to @ioc. Returns 0 on success.
+ */
+static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
+                            Error **errp)
+{
+    return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+}
+
+struct NBDTLSHandshakeData {
+    GMainLoop *loop;
+    bool complete;
+    Error *error;
+};
+
+
+void nbd_tls_handshake(QIOTask *task,
+                       void *opaque);
+
+int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
+
+#endif
diff --git a/nbd/server.c b/nbd/server.c
new file mode 100644
index 000000000..613ed2634
--- /dev/null
+++ b/nbd/server.c
@@ -0,0 +1,2621 @@
+/*
+ *  Copyright (C) 2016-2020 Red Hat, Inc.
+ *  Copyright (C) 2005  Anthony Liguori 
+ *
+ *  Network Block Device Server Side
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/export.h"
+#include "qapi/error.h"
+#include "qemu/queue.h"
+#include "trace.h"
+#include "nbd-internal.h"
+#include "qemu/units.h"
+
+#define NBD_META_ID_BASE_ALLOCATION 0
+#define NBD_META_ID_ALLOCATION_DEPTH 1
+/* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
+#define NBD_META_ID_DIRTY_BITMAP 2
+
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
+ * constant. If an increase is needed, note that the NBD protocol
+ * recommends no larger than 32 mb, so that the client won't consider
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
+
+static int system_errno_to_nbd_errno(int err)
+{
+    switch (err) {
+    case 0:
+        return NBD_SUCCESS;
+    case EPERM:
+    case EROFS:
+        return NBD_EPERM;
+    case EIO:
+        return NBD_EIO;
+    case ENOMEM:
+        return NBD_ENOMEM;
+#ifdef EDQUOT
+    case EDQUOT:
+#endif
+    case EFBIG:
+    case ENOSPC:
+        return NBD_ENOSPC;
+    case EOVERFLOW:
+        return NBD_EOVERFLOW;
+    case ENOTSUP:
+#if ENOTSUP != EOPNOTSUPP
+    case EOPNOTSUPP:
+#endif
+        return NBD_ENOTSUP;
+    case ESHUTDOWN:
+        return NBD_ESHUTDOWN;
+    case EINVAL:
+    default:
+        return NBD_EINVAL;
+    }
+}
+
+/* Definitions for opaque data types */
+
+typedef struct NBDRequestData NBDRequestData;
+
+struct NBDRequestData {
+    QSIMPLEQ_ENTRY(NBDRequestData) entry;
+    NBDClient *client;
+    uint8_t *data;
+    bool complete;
+};
+
+struct NBDExport {
+    BlockExport common;
+
+    char *name;
+    char *description;
+    uint64_t size;
+    uint16_t nbdflags;
+    QTAILQ_HEAD(, NBDClient) clients;
+    QTAILQ_ENTRY(NBDExport) next;
+
+    BlockBackend *eject_notifier_blk;
+    Notifier eject_notifier;
+
+    bool allocation_depth;
+    BdrvDirtyBitmap **export_bitmaps;
+    size_t nr_export_bitmaps;
+};
+
+static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+
+/* NBDExportMetaContexts represents a list of contexts to be exported,
+ * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
+ * NBD_OPT_LIST_META_CONTEXT. */
+typedef struct NBDExportMetaContexts {
+    NBDExport *exp;
+    size_t count; /* number of negotiated contexts */
+    bool base_allocation; /* export base:allocation context (block status) */
+    bool allocation_depth; /* export qemu:allocation-depth */
+    bool *bitmaps; /*
+                    * export qemu:dirty-bitmap:,
+                    * sized by exp->nr_export_bitmaps
+                    */
+} NBDExportMetaContexts;
+
+struct NBDClient {
+    int refcount;
+    void (*close_fn)(NBDClient *client, bool negotiated);
+
+    NBDExport *exp;
+    QCryptoTLSCreds *tlscreds;
+    char *tlsauthz;
+    QIOChannelSocket *sioc; /* The underlying data channel */
+    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
+
+    Coroutine *recv_coroutine;
+
+    CoMutex send_lock;
+    Coroutine *send_coroutine;
+
+    QTAILQ_ENTRY(NBDClient) next;
+    int nb_requests;
+    bool closing;
+
+    uint32_t check_align; /* If non-zero, check for aligned client requests */
+
+    bool structured_reply;
+    NBDExportMetaContexts export_meta;
+
+    uint32_t opt; /* Current option being negotiated */
+    uint32_t optlen; /* remaining length of data in ioc for the option being
+                        negotiated now */
+};
+
+static void nbd_client_receive_next_request(NBDClient *client);
+
+/* Basic flow for negotiation
+
+   Server         Client
+   Negotiate
+
+   or
+
+   Server         Client
+   Negotiate #1
+                  Option
+   Negotiate #2
+
+   ----
+
+   followed by
+
+   Server         Client
+                  Request
+   Response
+                  Request
+   Response
+                  ...
+   ...
+                  Request (type == 2)
+
+*/
+
+static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
+                                     uint32_t type, uint32_t length)
+{
+    stq_be_p(&rep->magic, NBD_REP_MAGIC);
+    stl_be_p(&rep->option, option);
+    stl_be_p(&rep->type, type);
+    stl_be_p(&rep->length, length);
+}
+
+/* Send a reply header, including length, but no payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
+                                      uint32_t len, Error **errp)
+{
+    NBDOptionReply rep;
+
+    trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
+                                     type, nbd_rep_lookup(type), len);
+
+    assert(len < NBD_MAX_BUFFER_SIZE);
+
+    set_be_option_rep(&rep, client->opt, type, len);
+    return nbd_write(client->ioc, &rep, sizeof(rep), errp);
+}
+
+/* Send a reply header with default 0 length.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
+                                  Error **errp)
+{
+    return nbd_negotiate_send_rep_len(client, type, 0, errp);
+}
+
+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 0)
+nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
+                            Error **errp, const char *fmt, va_list va)
+{
+    ERRP_GUARD();
+    g_autofree char *msg = NULL;
+    int ret;
+    size_t len;
+
+    msg = g_strdup_vprintf(fmt, va);
+    len = strlen(msg);
+    assert(len < NBD_MAX_STRING_SIZE);
+    trace_nbd_negotiate_send_rep_err(msg);
+    ret = nbd_negotiate_send_rep_len(client, type, len, errp);
+    if (ret < 0) {
+        return ret;
+    }
+    if (nbd_write(client->ioc, msg, len, errp) < 0) {
+        error_prepend(errp, "write failed (error message): ");
+        return -EIO;
+    }
+
+    return 0;
+}
+
+/*
+ * Return a malloc'd copy of @name suitable for use in an error reply.
+ */
+static char *
+nbd_sanitize_name(const char *name)
+{
+    if (strnlen(name, 80) < 80) {
+        return g_strdup(name);
+    }
+    /* XXX Should we also try to sanitize any control characters? */
+    return g_strdup_printf("%.80s...", name);
+}
+
+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 5)
+nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
+                           Error **errp, const char *fmt, ...)
+{
+    va_list va;
+    int ret;
+
+    va_start(va, fmt);
+    ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
+    va_end(va);
+    return ret;
+}
+
+/* Drop remainder of the current option, and send a reply with the
+ * given error type and message. Return -errno on read or write
+ * failure; or 0 if connection is still live. */
+static int GCC_FMT_ATTR(4, 0)
+nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
+              const char *fmt, va_list va)
+{
+    int ret = nbd_drop(client->ioc, client->optlen, errp);
+
+    client->optlen = 0;
+    if (!ret) {
+        ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
+    }
+    return ret;
+}
+
+static int GCC_FMT_ATTR(4, 5)
+nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
+             const char *fmt, ...)
+{
+    int ret;
+    va_list va;
+
+    va_start(va, fmt);
+    ret = nbd_opt_vdrop(client, type, errp, fmt, va);
+    va_end(va);
+
+    return ret;
+}
+
+static int GCC_FMT_ATTR(3, 4)
+nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
+{
+    int ret;
+    va_list va;
+
+    va_start(va, fmt);
+    ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
+    va_end(va);
+
+    return ret;
+}
+
+/* Read size bytes from the unparsed payload of the current option.
+ * If @check_nul, require that no NUL bytes appear in buffer.
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
+                        bool check_nul, Error **errp)
+{
+    if (size > client->optlen) {
+        return nbd_opt_invalid(client, errp,
+                               "Inconsistent lengths in option %s",
+                               nbd_opt_lookup(client->opt));
+    }
+    client->optlen -= size;
+    if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
+        return -EIO;
+    }
+
+    if (check_nul && strnlen(buffer, size) != size) {
+        return nbd_opt_invalid(client, errp,
+                               "Unexpected embedded NUL in option %s",
+                               nbd_opt_lookup(client->opt));
+    }
+    return 1;
+}
+
+/* Drop size bytes from the unparsed payload of the current option.
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
+{
+    if (size > client->optlen) {
+        return nbd_opt_invalid(client, errp,
+                               "Inconsistent lengths in option %s",
+                               nbd_opt_lookup(client->opt));
+    }
+    client->optlen -= size;
+    return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
+}
+
+/* nbd_opt_read_name
+ *
+ * Read a string with the format:
+ *   uint32_t len     (<= NBD_MAX_STRING_SIZE)
+ *   len bytes string (not 0-terminated)
+ *
+ * On success, @name will be allocated.
+ * If @length is non-null, it will be set to the actual string length.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success.
+ */
+static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
+                             Error **errp)
+{
+    int ret;
+    uint32_t len;
+    g_autofree char *local_name = NULL;
+
+    *name = NULL;
+    ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    len = cpu_to_be32(len);
+
+    if (len > NBD_MAX_STRING_SIZE) {
+        return nbd_opt_invalid(client, errp,
+                               "Invalid name length: %" PRIu32, len);
+    }
+
+    local_name = g_malloc(len + 1);
+    ret = nbd_opt_read(client, local_name, len, true, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    local_name[len] = '\0';
+
+    if (length) {
+        *length = len;
+    }
+    *name = g_steal_pointer(&local_name);
+
+    return 1;
+}
+
+/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
+                                       Error **errp)
+{
+    ERRP_GUARD();
+    size_t name_len, desc_len;
+    uint32_t len;
+    const char *name = exp->name ? exp->name : "";
+    const char *desc = exp->description ? exp->description : "";
+    QIOChannel *ioc = client->ioc;
+    int ret;
+
+    trace_nbd_negotiate_send_rep_list(name, desc);
+    name_len = strlen(name);
+    desc_len = strlen(desc);
+    assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
+    len = name_len + desc_len + sizeof(len);
+    ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    len = cpu_to_be32(name_len);
+    if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
+        error_prepend(errp, "write failed (name length): ");
+        return -EINVAL;
+    }
+
+    if (nbd_write(ioc, name, name_len, errp) < 0) {
+        error_prepend(errp, "write failed (name buffer): ");
+        return -EINVAL;
+    }
+
+    if (nbd_write(ioc, desc, desc_len, errp) < 0) {
+        error_prepend(errp, "write failed (description buffer): ");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+/* Process the NBD_OPT_LIST command, with a potential series of replies.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
+{
+    NBDExport *exp;
+    assert(client->opt == NBD_OPT_LIST);
+
+    /* For each export, send a NBD_REP_SERVER reply. */
+    QTAILQ_FOREACH(exp, &exports, next) {
+        if (nbd_negotiate_send_rep_list(client, exp, errp)) {
+            return -EINVAL;
+        }
+    }
+    /* Finish with a NBD_REP_ACK. */
+    return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+}
+
+static void nbd_check_meta_export(NBDClient *client)
+{
+    if (client->exp != client->export_meta.exp) {
+        client->export_meta.count = 0;
+    }
+}
+
+/* Send a reply to NBD_OPT_EXPORT_NAME.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
+                                            Error **errp)
+{
+    ERRP_GUARD();
+    g_autofree char *name = NULL;
+    char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
+    size_t len;
+    int ret;
+    uint16_t myflags;
+
+    /* Client sends:
+        [20 ..  xx]   export name (length bytes)
+       Server replies:
+        [ 0 ..   7]   size
+        [ 8 ..   9]   export flags
+        [10 .. 133]   reserved     (0) [unless no_zeroes]
+     */
+    trace_nbd_negotiate_handle_export_name();
+    if (client->optlen > NBD_MAX_STRING_SIZE) {
+        error_setg(errp, "Bad length received");
+        return -EINVAL;
+    }
+    name = g_malloc(client->optlen + 1);
+    if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
+        return -EIO;
+    }
+    name[client->optlen] = '\0';
+    client->optlen = 0;
+
+    trace_nbd_negotiate_handle_export_name_request(name);
+
+    client->exp = nbd_export_find(name);
+    if (!client->exp) {
+        error_setg(errp, "export not found");
+        return -EINVAL;
+    }
+
+    myflags = client->exp->nbdflags;
+    if (client->structured_reply) {
+        myflags |= NBD_FLAG_SEND_DF;
+    }
+    trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
+    stq_be_p(buf, client->exp->size);
+    stw_be_p(buf + 8, myflags);
+    len = no_zeroes ? 10 : sizeof(buf);
+    ret = nbd_write(client->ioc, buf, len, errp);
+    if (ret < 0) {
+        error_prepend(errp, "write failed: ");
+        return ret;
+    }
+
+    QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
+    blk_exp_ref(&client->exp->common);
+    nbd_check_meta_export(client);
+
+    return 0;
+}
+
+/* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes.
+ * The buffer does NOT include the info type prefix.
+ * Return -errno on error, 0 if ready to send more. */
+static int nbd_negotiate_send_info(NBDClient *client,
+                                   uint16_t info, uint32_t length, void *buf,
+                                   Error **errp)
+{
+    int rc;
+
+    trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
+    rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
+                                    sizeof(info) + length, errp);
+    if (rc < 0) {
+        return rc;
+    }
+    info = cpu_to_be16(info);
+    if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
+        return -EIO;
+    }
+    if (nbd_write(client->ioc, buf, length, errp) < 0) {
+        return -EIO;
+    }
+    return 0;
+}
+
+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error reply.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0       error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
+{
+    int ret;
+
+    assert(client->optlen);
+    ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
+                          nbd_opt_lookup(client->opt));
+    if (fatal && !ret) {
+        error_setg(errp, "option '%s' has unexpected length",
+                   nbd_opt_lookup(client->opt));
+        return -EINVAL;
+    }
+    return ret;
+}
+
+/* Handle NBD_OPT_INFO and NBD_OPT_GO.
+ * Return -errno on error, 0 if ready for next option, and 1 to move
+ * into transmission phase.  */
+static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
+{
+    int rc;
+    g_autofree char *name = NULL;
+    NBDExport *exp;
+    uint16_t requests;
+    uint16_t request;
+    uint32_t namelen = 0;
+    bool sendname = false;
+    bool blocksize = false;
+    uint32_t sizes[3];
+    char buf[sizeof(uint64_t) + sizeof(uint16_t)];
+    uint32_t check_align = 0;
+    uint16_t myflags;
+
+    /* Client sends:
+        4 bytes: L, name length (can be 0)
+        L bytes: export name
+        2 bytes: N, number of requests (can be 0)
+        N * 2 bytes: N requests
+    */
+    rc = nbd_opt_read_name(client, &name, &namelen, errp);
+    if (rc <= 0) {
+        return rc;
+    }
+    trace_nbd_negotiate_handle_export_name_request(name);
+
+    rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
+    if (rc <= 0) {
+        return rc;
+    }
+    requests = be16_to_cpu(requests);
+    trace_nbd_negotiate_handle_info_requests(requests);
+    while (requests--) {
+        rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
+        if (rc <= 0) {
+            return rc;
+        }
+        request = be16_to_cpu(request);
+        trace_nbd_negotiate_handle_info_request(request,
+                                                nbd_info_lookup(request));
+        /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE;
+         * everything else is either a request we don't know or
+         * something we send regardless of request */
+        switch (request) {
+        case NBD_INFO_NAME:
+            sendname = true;
+            break;
+        case NBD_INFO_BLOCK_SIZE:
+            blocksize = true;
+            break;
+        }
+    }
+    if (client->optlen) {
+        return nbd_reject_length(client, false, errp);
+    }
+
+    exp = nbd_export_find(name);
+    if (!exp) {
+        g_autofree char *sane_name = nbd_sanitize_name(name);
+
+        return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
+                                          errp, "export '%s' not present",
+                                          sane_name);
+    }
+
+    /* Don't bother sending NBD_INFO_NAME unless client requested it */
+    if (sendname) {
+        rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
+                                     errp);
+        if (rc < 0) {
+            return rc;
+        }
+    }
+
+    /* Send NBD_INFO_DESCRIPTION only if available, regardless of
+     * client request */
+    if (exp->description) {
+        size_t len = strlen(exp->description);
+
+        assert(len <= NBD_MAX_STRING_SIZE);
+        rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
+                                     len, exp->description, errp);
+        if (rc < 0) {
+            return rc;
+        }
+    }
+
+    /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
+     * according to whether the client requested it, and according to
+     * whether this is OPT_INFO or OPT_GO. */
+    /* minimum - 1 for back-compat, or actual if client will obey it. */
+    if (client->opt == NBD_OPT_INFO || blocksize) {
+        check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
+    } else {
+        sizes[0] = 1;
+    }
+    assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
+    /* preferred - Hard-code to 4096 for now.
+     * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
+    sizes[1] = MAX(4096, sizes[0]);
+    /* maximum - At most 32M, but smaller as appropriate. */
+    sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
+    trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
+    sizes[0] = cpu_to_be32(sizes[0]);
+    sizes[1] = cpu_to_be32(sizes[1]);
+    sizes[2] = cpu_to_be32(sizes[2]);
+    rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
+                                 sizeof(sizes), sizes, errp);
+    if (rc < 0) {
+        return rc;
+    }
+
+    /* Send NBD_INFO_EXPORT always */
+    myflags = exp->nbdflags;
+    if (client->structured_reply) {
+        myflags |= NBD_FLAG_SEND_DF;
+    }
+    trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
+    stq_be_p(buf, exp->size);
+    stw_be_p(buf + 8, myflags);
+    rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
+                                 sizeof(buf), buf, errp);
+    if (rc < 0) {
+        return rc;
+    }
+
+    /*
+     * If the client is just asking for NBD_OPT_INFO, but forgot to
+     * request block sizes in a situation that would impact
+     * performance, then return an error. But for NBD_OPT_GO, we
+     * tolerate all clients, regardless of alignments.
+     */
+    if (client->opt == NBD_OPT_INFO && !blocksize &&
+        blk_get_request_alignment(exp->common.blk) > 1) {
+        return nbd_negotiate_send_rep_err(client,
+                                          NBD_REP_ERR_BLOCK_SIZE_REQD,
+                                          errp,
+                                          "request NBD_INFO_BLOCK_SIZE to "
+                                          "use this export");
+    }
+
+    /* Final reply */
+    rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+    if (rc < 0) {
+        return rc;
+    }
+
+    if (client->opt == NBD_OPT_GO) {
+        client->exp = exp;
+        client->check_align = check_align;
+        QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
+        blk_exp_ref(&client->exp->common);
+        nbd_check_meta_export(client);
+        rc = 1;
+    }
+    return rc;
+}
+
+
+/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
+ * new channel for all further (now-encrypted) communication. */
+static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
+                                                 Error **errp)
+{
+    QIOChannel *ioc;
+    QIOChannelTLS *tioc;
+    struct NBDTLSHandshakeData data = { 0 };
+
+    assert(client->opt == NBD_OPT_STARTTLS);
+
+    trace_nbd_negotiate_handle_starttls();
+    ioc = client->ioc;
+
+    if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
+        return NULL;
+    }
+
+    tioc = qio_channel_tls_new_server(ioc,
+                                      client->tlscreds,
+                                      client->tlsauthz,
+                                      errp);
+    if (!tioc) {
+        return NULL;
+    }
+
+    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
+    trace_nbd_negotiate_handle_starttls_handshake();
+    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
+    qio_channel_tls_handshake(tioc,
+                              nbd_tls_handshake,
+                              &data,
+                              NULL,
+                              NULL);
+
+    if (!data.complete) {
+        g_main_loop_run(data.loop);
+    }
+    g_main_loop_unref(data.loop);
+    if (data.error) {
+        object_unref(OBJECT(tioc));
+        error_propagate(errp, data.error);
+        return NULL;
+    }
+
+    return QIO_CHANNEL(tioc);
+}
+
+/* nbd_negotiate_send_meta_context
+ *
+ * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT
+ *
+ * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead.
+ */
+static int nbd_negotiate_send_meta_context(NBDClient *client,
+                                           const char *context,
+                                           uint32_t context_id,
+                                           Error **errp)
+{
+    NBDOptionReplyMetaContext opt;
+    struct iovec iov[] = {
+        {.iov_base = &opt, .iov_len = sizeof(opt)},
+        {.iov_base = (void *)context, .iov_len = strlen(context)}
+    };
+
+    assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        context_id = 0;
+    }
+
+    trace_nbd_negotiate_meta_query_reply(context, context_id);
+    set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
+                      sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
+    stl_be_p(&opt.context_id, context_id);
+
+    return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
+}
+
+/*
+ * Return true if @query matches @pattern, or if @query is empty when
+ * the @client is performing _LIST_.
+ */
+static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
+                                      const char *query)
+{
+    if (!*query) {
+        trace_nbd_negotiate_meta_query_parse("empty");
+        return client->opt == NBD_OPT_LIST_META_CONTEXT;
+    }
+    if (strcmp(query, pattern) == 0) {
+        trace_nbd_negotiate_meta_query_parse(pattern);
+        return true;
+    }
+    trace_nbd_negotiate_meta_query_skip("pattern not matched");
+    return false;
+}
+
+/*
+ * Return true and adjust @str in place if it begins with @prefix.
+ */
+static bool nbd_strshift(const char **str, const char *prefix)
+{
+    size_t len = strlen(prefix);
+
+    if (strncmp(*str, prefix, len) == 0) {
+        *str += len;
+        return true;
+    }
+    return false;
+}
+
+/* nbd_meta_base_query
+ *
+ * Handle queries to 'base' namespace. For now, only the base:allocation
+ * context is available.  Return true if @query has been handled.
+ */
+static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+                                const char *query)
+{
+    if (!nbd_strshift(&query, "base:")) {
+        return false;
+    }
+    trace_nbd_negotiate_meta_query_parse("base:");
+
+    if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
+        meta->base_allocation = true;
+    }
+    return true;
+}
+
+/* nbd_meta_qemu_query
+ *
+ * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap:
+ * and qemu:allocation-depth contexts are available.  Return true if @query
+ * has been handled.
+ */
+static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
+                                const char *query)
+{
+    size_t i;
+
+    if (!nbd_strshift(&query, "qemu:")) {
+        return false;
+    }
+    trace_nbd_negotiate_meta_query_parse("qemu:");
+
+    if (!*query) {
+        if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            meta->allocation_depth = meta->exp->allocation_depth;
+            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        }
+        trace_nbd_negotiate_meta_query_parse("empty");
+        return true;
+    }
+
+    if (strcmp(query, "allocation-depth") == 0) {
+        trace_nbd_negotiate_meta_query_parse("allocation-depth");
+        meta->allocation_depth = meta->exp->allocation_depth;
+        return true;
+    }
+
+    if (nbd_strshift(&query, "dirty-bitmap:")) {
+        trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
+        if (!*query) {
+            if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+                memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            }
+            trace_nbd_negotiate_meta_query_parse("empty");
+            return true;
+        }
+
+        for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
+            const char *bm_name;
+
+            bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
+            if (strcmp(bm_name, query) == 0) {
+                meta->bitmaps[i] = true;
+                trace_nbd_negotiate_meta_query_parse(query);
+                return true;
+            }
+        }
+        trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
+        return true;
+    }
+
+    trace_nbd_negotiate_meta_query_skip("unknown qemu context");
+    return true;
+}
+
+/* nbd_negotiate_meta_query
+ *
+ * Parse namespace name and call corresponding function to parse body of the
+ * query.
+ *
+ * The only supported namespaces are 'base' and 'qemu'.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_negotiate_meta_query(NBDClient *client,
+                                    NBDExportMetaContexts *meta, Error **errp)
+{
+    int ret;
+    g_autofree char *query = NULL;
+    uint32_t len;
+
+    ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    len = cpu_to_be32(len);
+
+    if (len > NBD_MAX_STRING_SIZE) {
+        trace_nbd_negotiate_meta_query_skip("length too long");
+        return nbd_opt_skip(client, len, errp);
+    }
+
+    query = g_malloc(len + 1);
+    ret = nbd_opt_read(client, query, len, true, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    query[len] = '\0';
+
+    if (nbd_meta_base_query(client, meta, query)) {
+        return 1;
+    }
+    if (nbd_meta_qemu_query(client, meta, query)) {
+        return 1;
+    }
+
+    trace_nbd_negotiate_meta_query_skip("unknown namespace");
+    return 1;
+}
+
+/* nbd_negotiate_meta_queries
+ * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
+ *
+ * Return -errno on I/O error, or 0 if option was completely handled. */
+static int nbd_negotiate_meta_queries(NBDClient *client,
+                                      NBDExportMetaContexts *meta, Error **errp)
+{
+    int ret;
+    g_autofree char *export_name = NULL;
+    g_autofree bool *bitmaps = NULL;
+    NBDExportMetaContexts local_meta = {0};
+    uint32_t nb_queries;
+    size_t i;
+    size_t count = 0;
+
+    if (!client->structured_reply) {
+        return nbd_opt_invalid(client, errp,
+                               "request option '%s' when structured reply "
+                               "is not negotiated",
+                               nbd_opt_lookup(client->opt));
+    }
+
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        /* Only change the caller's meta on SET. */
+        meta = &local_meta;
+    }
+
+    g_free(meta->bitmaps);
+    memset(meta, 0, sizeof(*meta));
+
+    ret = nbd_opt_read_name(client, &export_name, NULL, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    meta->exp = nbd_export_find(export_name);
+    if (meta->exp == NULL) {
+        g_autofree char *sane_name = nbd_sanitize_name(export_name);
+
+        return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
+                            "export '%s' not present", sane_name);
+    }
+    meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        bitmaps = meta->bitmaps;
+    }
+
+    ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    nb_queries = cpu_to_be32(nb_queries);
+    trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
+                                     export_name, nb_queries);
+
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
+        /* enable all known contexts */
+        meta->base_allocation = true;
+        meta->allocation_depth = meta->exp->allocation_depth;
+        memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+    } else {
+        for (i = 0; i < nb_queries; ++i) {
+            ret = nbd_negotiate_meta_query(client, meta, errp);
+            if (ret <= 0) {
+                return ret;
+            }
+        }
+    }
+
+    if (meta->base_allocation) {
+        ret = nbd_negotiate_send_meta_context(client, "base:allocation",
+                                              NBD_META_ID_BASE_ALLOCATION,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+        count++;
+    }
+
+    if (meta->allocation_depth) {
+        ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
+                                              NBD_META_ID_ALLOCATION_DEPTH,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+        count++;
+    }
+
+    for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
+        const char *bm_name;
+        g_autofree char *context = NULL;
+
+        if (!meta->bitmaps[i]) {
+            continue;
+        }
+
+        bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
+        context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
+
+        ret = nbd_negotiate_send_meta_context(client, context,
+                                              NBD_META_ID_DIRTY_BITMAP + i,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+        count++;
+    }
+
+    ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+    if (ret == 0) {
+        meta->count = count;
+    }
+
+    return ret;
+}
+
+/* nbd_negotiate_options
+ * Process all NBD_OPT_* client option commands, during fixed newstyle
+ * negotiation.
+ * Return:
+ * -errno  on error, errp is set
+ * 0       on successful negotiation, errp is not set
+ * 1       if client sent NBD_OPT_ABORT, i.e. on valid disconnect,
+ *         errp is not set
+ */
+static int nbd_negotiate_options(NBDClient *client, Error **errp)
+{
+    uint32_t flags;
+    bool fixedNewstyle = false;
+    bool no_zeroes = false;
+
+    /* Client sends:
+        [ 0 ..   3]   client flags
+
+       Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO:
+        [ 0 ..   7]   NBD_OPTS_MAGIC
+        [ 8 ..  11]   NBD option
+        [12 ..  15]   Data length
+        ...           Rest of request
+
+        [ 0 ..   7]   NBD_OPTS_MAGIC
+        [ 8 ..  11]   Second NBD option
+        [12 ..  15]   Data length
+        ...           Rest of request
+    */
+
+    if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
+        return -EIO;
+    }
+    trace_nbd_negotiate_options_flags(flags);
+    if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
+        fixedNewstyle = true;
+        flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
+    }
+    if (flags & NBD_FLAG_C_NO_ZEROES) {
+        no_zeroes = true;
+        flags &= ~NBD_FLAG_C_NO_ZEROES;
+    }
+    if (flags != 0) {
+        error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
+        return -EINVAL;
+    }
+
+    while (1) {
+        int ret;
+        uint32_t option, length;
+        uint64_t magic;
+
+        if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
+            return -EINVAL;
+        }
+        trace_nbd_negotiate_options_check_magic(magic);
+        if (magic != NBD_OPTS_MAGIC) {
+            error_setg(errp, "Bad magic received");
+            return -EINVAL;
+        }
+
+        if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
+            return -EINVAL;
+        }
+        client->opt = option;
+
+        if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
+            return -EINVAL;
+        }
+        assert(!client->optlen);
+        client->optlen = length;
+
+        if (length > NBD_MAX_BUFFER_SIZE) {
+            error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
+                       length, NBD_MAX_BUFFER_SIZE);
+            return -EINVAL;
+        }
+
+        trace_nbd_negotiate_options_check_option(option,
+                                                 nbd_opt_lookup(option));
+        if (client->tlscreds &&
+            client->ioc == (QIOChannel *)client->sioc) {
+            QIOChannel *tioc;
+            if (!fixedNewstyle) {
+                error_setg(errp, "Unsupported option 0x%" PRIx32, option);
+                return -EINVAL;
+            }
+            switch (option) {
+            case NBD_OPT_STARTTLS:
+                if (length) {
+                    /* Unconditionally drop the connection if the client
+                     * can't start a TLS negotiation correctly */
+                    return nbd_reject_length(client, true, errp);
+                }
+                tioc = nbd_negotiate_handle_starttls(client, errp);
+                if (!tioc) {
+                    return -EIO;
+                }
+                ret = 0;
+                object_unref(OBJECT(client->ioc));
+                client->ioc = QIO_CHANNEL(tioc);
+                break;
+
+            case NBD_OPT_EXPORT_NAME:
+                /* No way to return an error to client, so drop connection */
+                error_setg(errp, "Option 0x%x not permitted before TLS",
+                           option);
+                return -EINVAL;
+
+            default:
+                /* Let the client keep trying, unless they asked to
+                 * quit. Always try to give an error back to the
+                 * client; but when replying to OPT_ABORT, be aware
+                 * that the client may hang up before receiving the
+                 * error, in which case we are fine ignoring the
+                 * resulting EPIPE. */
+                ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
+                                   option == NBD_OPT_ABORT ? NULL : errp,
+                                   "Option 0x%" PRIx32
+                                   " not permitted before TLS", option);
+                if (option == NBD_OPT_ABORT) {
+                    return 1;
+                }
+                break;
+            }
+        } else if (fixedNewstyle) {
+            switch (option) {
+            case NBD_OPT_LIST:
+                if (length) {
+                    ret = nbd_reject_length(client, false, errp);
+                } else {
+                    ret = nbd_negotiate_handle_list(client, errp);
+                }
+                break;
+
+            case NBD_OPT_ABORT:
+                /* NBD spec says we must try to reply before
+                 * disconnecting, but that we must also tolerate
+                 * guests that don't wait for our reply. */
+                nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
+                return 1;
+
+            case NBD_OPT_EXPORT_NAME:
+                return nbd_negotiate_handle_export_name(client, no_zeroes,
+                                                        errp);
+
+            case NBD_OPT_INFO:
+            case NBD_OPT_GO:
+                ret = nbd_negotiate_handle_info(client, errp);
+                if (ret == 1) {
+                    assert(option == NBD_OPT_GO);
+                    return 0;
+                }
+                break;
+
+            case NBD_OPT_STARTTLS:
+                if (length) {
+                    ret = nbd_reject_length(client, false, errp);
+                } else if (client->tlscreds) {
+                    ret = nbd_negotiate_send_rep_err(client,
+                                                     NBD_REP_ERR_INVALID, errp,
+                                                     "TLS already enabled");
+                } else {
+                    ret = nbd_negotiate_send_rep_err(client,
+                                                     NBD_REP_ERR_POLICY, errp,
+                                                     "TLS not configured");
+                }
+                break;
+
+            case NBD_OPT_STRUCTURED_REPLY:
+                if (length) {
+                    ret = nbd_reject_length(client, false, errp);
+                } else if (client->structured_reply) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client, NBD_REP_ERR_INVALID, errp,
+                        "structured reply already negotiated");
+                } else {
+                    ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+                    client->structured_reply = true;
+                }
+                break;
+
+            case NBD_OPT_LIST_META_CONTEXT:
+            case NBD_OPT_SET_META_CONTEXT:
+                ret = nbd_negotiate_meta_queries(client, &client->export_meta,
+                                                 errp);
+                break;
+
+            default:
+                ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
+                                   "Unsupported option %" PRIu32 " (%s)",
+                                   option, nbd_opt_lookup(option));
+                break;
+            }
+        } else {
+            /*
+             * If broken new-style we should drop the connection
+             * for anything except NBD_OPT_EXPORT_NAME
+             */
+            switch (option) {
+            case NBD_OPT_EXPORT_NAME:
+                return nbd_negotiate_handle_export_name(client, no_zeroes,
+                                                        errp);
+
+            default:
+                error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
+                           option, nbd_opt_lookup(option));
+                return -EINVAL;
+            }
+        }
+        if (ret < 0) {
+            return ret;
+        }
+    }
+}
+
+/* nbd_negotiate
+ * Return:
+ * -errno  on error, errp is set
+ * 0       on successful negotiation, errp is not set
+ * 1       if client sent NBD_OPT_ABORT, i.e. on valid disconnect,
+ *         errp is not set
+ */
+static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
+{
+    ERRP_GUARD();
+    char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
+    int ret;
+
+    /* Old style negotiation header, no room for options
+        [ 0 ..   7]   passwd       ("NBDMAGIC")
+        [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
+        [16 ..  23]   size
+        [24 ..  27]   export flags (zero-extended)
+        [28 .. 151]   reserved     (0)
+
+       New style negotiation header, client can send options
+        [ 0 ..   7]   passwd       ("NBDMAGIC")
+        [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
+        [16 ..  17]   server flags (0)
+        ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO....
+     */
+
+    qio_channel_set_blocking(client->ioc, false, NULL);
+
+    trace_nbd_negotiate_begin();
+    memcpy(buf, "NBDMAGIC", 8);
+
+    stq_be_p(buf + 8, NBD_OPTS_MAGIC);
+    stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
+
+    if (nbd_write(client->ioc, buf, 18, errp) < 0) {
+        error_prepend(errp, "write failed: ");
+        return -EINVAL;
+    }
+    ret = nbd_negotiate_options(client, errp);
+    if (ret != 0) {
+        if (ret < 0) {
+            error_prepend(errp, "option negotiation failed: ");
+        }
+        return ret;
+    }
+
+    /* Attach the channel to the same AioContext as the export */
+    if (client->exp && client->exp->common.ctx) {
+        qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
+    }
+
+    assert(!client->optlen);
+    trace_nbd_negotiate_success();
+
+    return 0;
+}
+
+static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
+                               Error **errp)
+{
+    uint8_t buf[NBD_REQUEST_SIZE];
+    uint32_t magic;
+    int ret;
+
+    ret = nbd_read(ioc, buf, sizeof(buf), "request", errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Request
+       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
+       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
+       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
+       [ 8 .. 15]   handle
+       [16 .. 23]   from
+       [24 .. 27]   len
+     */
+
+    magic = ldl_be_p(buf);
+    request->flags  = lduw_be_p(buf + 4);
+    request->type   = lduw_be_p(buf + 6);
+    request->handle = ldq_be_p(buf + 8);
+    request->from   = ldq_be_p(buf + 16);
+    request->len    = ldl_be_p(buf + 24);
+
+    trace_nbd_receive_request(magic, request->flags, request->type,
+                              request->from, request->len);
+
+    if (magic != NBD_REQUEST_MAGIC) {
+        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+#define MAX_NBD_REQUESTS 16
+
+void nbd_client_get(NBDClient *client)
+{
+    client->refcount++;
+}
+
+void nbd_client_put(NBDClient *client)
+{
+    if (--client->refcount == 0) {
+        /* The last reference should be dropped by client->close,
+         * which is called by client_close.
+         */
+        assert(client->closing);
+
+        qio_channel_detach_aio_context(client->ioc);
+        object_unref(OBJECT(client->sioc));
+        object_unref(OBJECT(client->ioc));
+        if (client->tlscreds) {
+            object_unref(OBJECT(client->tlscreds));
+        }
+        g_free(client->tlsauthz);
+        if (client->exp) {
+            QTAILQ_REMOVE(&client->exp->clients, client, next);
+            blk_exp_unref(&client->exp->common);
+        }
+        g_free(client->export_meta.bitmaps);
+        g_free(client);
+    }
+}
+
+static void client_close(NBDClient *client, bool negotiated)
+{
+    if (client->closing) {
+        return;
+    }
+
+    client->closing = true;
+
+    /* Force requests to finish.  They will drop their own references,
+     * then we'll close the socket and free the NBDClient.
+     */
+    qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
+                         NULL);
+
+    /* Also tell the client, so that they release their reference.  */
+    if (client->close_fn) {
+        client->close_fn(client, negotiated);
+    }
+}
+
+static NBDRequestData *nbd_request_get(NBDClient *client)
+{
+    NBDRequestData *req;
+
+    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
+    client->nb_requests++;
+
+    req = g_new0(NBDRequestData, 1);
+    nbd_client_get(client);
+    req->client = client;
+    return req;
+}
+
+static void nbd_request_put(NBDRequestData *req)
+{
+    NBDClient *client = req->client;
+
+    if (req->data) {
+        qemu_vfree(req->data);
+    }
+    g_free(req);
+
+    client->nb_requests--;
+    nbd_client_receive_next_request(client);
+
+    nbd_client_put(client);
+}
+
+static void blk_aio_attached(AioContext *ctx, void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    trace_nbd_blk_aio_attached(exp->name, ctx);
+
+    exp->common.ctx = ctx;
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        qio_channel_attach_aio_context(client->ioc, ctx);
+        if (client->recv_coroutine) {
+            aio_co_schedule(ctx, client->recv_coroutine);
+        }
+        if (client->send_coroutine) {
+            aio_co_schedule(ctx, client->send_coroutine);
+        }
+    }
+}
+
+static void blk_aio_detach(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        qio_channel_detach_aio_context(client->ioc);
+    }
+
+    exp->common.ctx = NULL;
+}
+
+static void nbd_eject_notifier(Notifier *n, void *data)
+{
+    NBDExport *exp = container_of(n, NBDExport, eject_notifier);
+
+    blk_exp_request_shutdown(&exp->common);
+}
+
+void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
+{
+    NBDExport *nbd_exp = container_of(exp, NBDExport, common);
+    assert(exp->drv == &blk_exp_nbd);
+    assert(nbd_exp->eject_notifier_blk == NULL);
+
+    blk_ref(blk);
+    nbd_exp->eject_notifier_blk = blk;
+    nbd_exp->eject_notifier.notify = nbd_eject_notifier;
+    blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
+}
+
+static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
+                             Error **errp)
+{
+    NBDExport *exp = container_of(blk_exp, NBDExport, common);
+    BlockExportOptionsNbd *arg = &exp_args->u.nbd;
+    BlockBackend *blk = blk_exp->blk;
+    int64_t size;
+    uint64_t perm, shared_perm;
+    bool readonly = !exp_args->writable;
+    bool shared = !exp_args->writable;
+    strList *bitmaps;
+    size_t i;
+    int ret;
+
+    assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
+
+    if (!nbd_server_is_running()) {
+        error_setg(errp, "NBD server not running");
+        return -EINVAL;
+    }
+
+    if (!arg->has_name) {
+        arg->name = exp_args->node_name;
+    }
+
+    if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
+        error_setg(errp, "export name '%s' too long", arg->name);
+        return -EINVAL;
+    }
+
+    if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
+        error_setg(errp, "description '%s' too long", arg->description);
+        return -EINVAL;
+    }
+
+    if (nbd_export_find(arg->name)) {
+        error_setg(errp, "NBD server already has export named '%s'", arg->name);
+        return -EEXIST;
+    }
+
+    size = blk_getlength(blk);
+    if (size < 0) {
+        error_setg_errno(errp, -size,
+                         "Failed to determine the NBD export's length");
+        return size;
+    }
+
+    /* Don't allow resize while the NBD server is running, otherwise we don't
+     * care what happens with the node. */
+    blk_get_perm(blk, &perm, &shared_perm);
+    ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    QTAILQ_INIT(&exp->clients);
+    exp->name = g_strdup(arg->name);
+    exp->description = g_strdup(arg->description);
+    exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
+                     NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
+    if (readonly) {
+        exp->nbdflags |= NBD_FLAG_READ_ONLY;
+        if (shared) {
+            exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
+        }
+    } else {
+        exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
+                          NBD_FLAG_SEND_FAST_ZERO);
+    }
+    exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
+
+    for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
+        exp->nr_export_bitmaps++;
+    }
+    exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
+    for (i = 0, bitmaps = arg->bitmaps; bitmaps;
+         i++, bitmaps = bitmaps->next) {
+        const char *bitmap = bitmaps->value;
+        BlockDriverState *bs = blk_bs(blk);
+        BdrvDirtyBitmap *bm = NULL;
+
+        while (bs) {
+            bm = bdrv_find_dirty_bitmap(bs, bitmap);
+            if (bm != NULL) {
+                break;
+            }
+
+            bs = bdrv_filter_or_cow_bs(bs);
+        }
+
+        if (bm == NULL) {
+            ret = -ENOENT;
+            error_setg(errp, "Bitmap '%s' is not found", bitmap);
+            goto fail;
+        }
+
+        if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        if (readonly && bdrv_is_writable(bs) &&
+            bdrv_dirty_bitmap_enabled(bm)) {
+            ret = -EINVAL;
+            error_setg(errp,
+                       "Enabled bitmap '%s' incompatible with readonly export",
+                       bitmap);
+            goto fail;
+        }
+
+        exp->export_bitmaps[i] = bm;
+        assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
+    }
+
+    /* Mark bitmaps busy in a separate loop, to simplify roll-back concerns. */
+    for (i = 0; i < exp->nr_export_bitmaps; i++) {
+        bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
+    }
+
+    exp->allocation_depth = arg->allocation_depth;
+
+    blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
+
+    QTAILQ_INSERT_TAIL(&exports, exp, next);
+
+    return 0;
+
+fail:
+    g_free(exp->export_bitmaps);
+    g_free(exp->name);
+    g_free(exp->description);
+    return ret;
+}
+
+NBDExport *nbd_export_find(const char *name)
+{
+    NBDExport *exp;
+    QTAILQ_FOREACH(exp, &exports, next) {
+        if (strcmp(name, exp->name) == 0) {
+            return exp;
+        }
+    }
+
+    return NULL;
+}
+
+AioContext *
+nbd_export_aio_context(NBDExport *exp)
+{
+    return exp->common.ctx;
+}
+
+static void nbd_export_request_shutdown(BlockExport *blk_exp)
+{
+    NBDExport *exp = container_of(blk_exp, NBDExport, common);
+    NBDClient *client, *next;
+
+    blk_exp_ref(&exp->common);
+    /*
+     * TODO: Should we expand QMP NbdServerRemoveNode enum to allow a
+     * close mode that stops advertising the export to new clients but
+     * still permits existing clients to run to completion? Because of
+     * that possibility, nbd_export_close() can be called more than
+     * once on an export.
+     */
+    QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
+        client_close(client, true);
+    }
+    if (exp->name) {
+        g_free(exp->name);
+        exp->name = NULL;
+        QTAILQ_REMOVE(&exports, exp, next);
+    }
+    blk_exp_unref(&exp->common);
+}
+
+static void nbd_export_delete(BlockExport *blk_exp)
+{
+    size_t i;
+    NBDExport *exp = container_of(blk_exp, NBDExport, common);
+
+    assert(exp->name == NULL);
+    assert(QTAILQ_EMPTY(&exp->clients));
+
+    g_free(exp->description);
+    exp->description = NULL;
+
+    if (exp->common.blk) {
+        if (exp->eject_notifier_blk) {
+            notifier_remove(&exp->eject_notifier);
+            blk_unref(exp->eject_notifier_blk);
+        }
+        blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
+                                        blk_aio_detach, exp);
+    }
+
+    for (i = 0; i < exp->nr_export_bitmaps; i++) {
+        bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
+    }
+}
+
+const BlockExportDriver blk_exp_nbd = {
+    .type               = BLOCK_EXPORT_TYPE_NBD,
+    .instance_size      = sizeof(NBDExport),
+    .create             = nbd_export_create,
+    .delete             = nbd_export_delete,
+    .request_shutdown   = nbd_export_request_shutdown,
+};
+
+static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
+                                        unsigned niov, Error **errp)
+{
+    int ret;
+
+    g_assert(qemu_in_coroutine());
+    qemu_co_mutex_lock(&client->send_lock);
+    client->send_coroutine = qemu_coroutine_self();
+
+    ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
+
+    client->send_coroutine = NULL;
+    qemu_co_mutex_unlock(&client->send_lock);
+
+    return ret;
+}
+
+static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
+                                       uint64_t handle)
+{
+    stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
+    stl_be_p(&reply->error, error);
+    stq_be_p(&reply->handle, handle);
+}
+
+static int nbd_co_send_simple_reply(NBDClient *client,
+                                    uint64_t handle,
+                                    uint32_t error,
+                                    void *data,
+                                    size_t len,
+                                    Error **errp)
+{
+    NBDSimpleReply reply;
+    int nbd_err = system_errno_to_nbd_errno(error);
+    struct iovec iov[] = {
+        {.iov_base = &reply, .iov_len = sizeof(reply)},
+        {.iov_base = data, .iov_len = len}
+    };
+
+    trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
+                                   len);
+    set_be_simple_reply(&reply, nbd_err, handle);
+
+    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
+}
+
+static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+                                uint16_t type, uint64_t handle, uint32_t length)
+{
+    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+    stw_be_p(&chunk->flags, flags);
+    stw_be_p(&chunk->type, type);
+    stq_be_p(&chunk->handle, handle);
+    stl_be_p(&chunk->length, length);
+}
+
+static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
+                                                    uint64_t handle,
+                                                    Error **errp)
+{
+    NBDStructuredReplyChunk chunk;
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+    };
+
+    trace_nbd_co_send_structured_done(handle);
+    set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
+
+    return nbd_co_send_iov(client, iov, 1, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
+                                                    uint64_t handle,
+                                                    uint64_t offset,
+                                                    void *data,
+                                                    size_t size,
+                                                    bool final,
+                                                    Error **errp)
+{
+    NBDStructuredReadData chunk;
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = data, .iov_len = size}
+    };
+
+    assert(size);
+    trace_nbd_co_send_structured_read(handle, offset, data, size);
+    set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
+                 NBD_REPLY_TYPE_OFFSET_DATA, handle,
+                 sizeof(chunk) - sizeof(chunk.h) + size);
+    stq_be_p(&chunk.offset, offset);
+
+    return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+                                                     uint64_t handle,
+                                                     uint32_t error,
+                                                     const char *msg,
+                                                     Error **errp)
+{
+    NBDStructuredError chunk;
+    int nbd_err = system_errno_to_nbd_errno(error);
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+    };
+
+    assert(nbd_err);
+    trace_nbd_co_send_structured_error(handle, nbd_err,
+                                       nbd_err_lookup(nbd_err), msg ? msg : "");
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+                 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.error, nbd_err);
+    stw_be_p(&chunk.message_length, iov[1].iov_len);
+
+    return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+}
+
+/* Do a sparse read and send the structured reply to the client.
+ * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * reported to the client, at which point this function succeeds.
+ */
+static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
+                                                uint64_t handle,
+                                                uint64_t offset,
+                                                uint8_t *data,
+                                                size_t size,
+                                                Error **errp)
+{
+    int ret = 0;
+    NBDExport *exp = client->exp;
+    size_t progress = 0;
+
+    while (progress < size) {
+        int64_t pnum;
+        int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
+                                             offset + progress,
+                                             size - progress, &pnum, NULL,
+                                             NULL);
+        bool final;
+
+        if (status < 0) {
+            char *msg = g_strdup_printf("unable to check for holes: %s",
+                                        strerror(-status));
+
+            ret = nbd_co_send_structured_error(client, handle, -status, msg,
+                                               errp);
+            g_free(msg);
+            return ret;
+        }
+        assert(pnum && pnum <= size - progress);
+        final = progress + pnum == size;
+        if (status & BDRV_BLOCK_ZERO) {
+            NBDStructuredReadHole chunk;
+            struct iovec iov[] = {
+                {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+            };
+
+            trace_nbd_co_send_structured_read_hole(handle, offset + progress,
+                                                   pnum);
+            set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
+                         NBD_REPLY_TYPE_OFFSET_HOLE,
+                         handle, sizeof(chunk) - sizeof(chunk.h));
+            stq_be_p(&chunk.offset, offset + progress);
+            stl_be_p(&chunk.length, pnum);
+            ret = nbd_co_send_iov(client, iov, 1, errp);
+        } else {
+            ret = blk_pread(exp->common.blk, offset + progress,
+                            data + progress, pnum);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "reading from file failed");
+                break;
+            }
+            ret = nbd_co_send_structured_read(client, handle, offset + progress,
+                                              data + progress, pnum, final,
+                                              errp);
+        }
+
+        if (ret < 0) {
+            break;
+        }
+        progress += pnum;
+    }
+    return ret;
+}
+
+typedef struct NBDExtentArray {
+    NBDExtent *extents;
+    unsigned int nb_alloc;
+    unsigned int count;
+    uint64_t total_length;
+    bool can_add;
+    bool converted_to_be;
+} NBDExtentArray;
+
+static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
+{
+    NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
+
+    ea->nb_alloc = nb_alloc;
+    ea->extents = g_new(NBDExtent, nb_alloc);
+    ea->can_add = true;
+
+    return ea;
+}
+
+static void nbd_extent_array_free(NBDExtentArray *ea)
+{
+    g_free(ea->extents);
+    g_free(ea);
+}
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
+
+/* Further modifications of the array after conversion are abandoned */
+static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
+{
+    int i;
+
+    assert(!ea->converted_to_be);
+    ea->can_add = false;
+    ea->converted_to_be = true;
+
+    for (i = 0; i < ea->count; i++) {
+        ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
+        ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
+    }
+}
+
+/*
+ * Add extent to NBDExtentArray. If extent can't be added (no available space),
+ * return -1.
+ * For safety, when returning -1 for the first time, .can_add is set to false,
+ * further call to nbd_extent_array_add() will crash.
+ * (to avoid the situation, when after failing to add an extent (returned -1),
+ * user miss this failure and add another extent, which is successfully added
+ * (array is full, but new extent may be squashed into the last one), then we
+ * have invalid array with skipped extent)
+ */
+static int nbd_extent_array_add(NBDExtentArray *ea,
+                                uint32_t length, uint32_t flags)
+{
+    assert(ea->can_add);
+
+    if (!length) {
+        return 0;
+    }
+
+    /* Extend previous extent if flags are the same */
+    if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
+        uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
+
+        if (sum <= UINT32_MAX) {
+            ea->extents[ea->count - 1].length = sum;
+            ea->total_length += length;
+            return 0;
+        }
+    }
+
+    if (ea->count >= ea->nb_alloc) {
+        ea->can_add = false;
+        return -1;
+    }
+
+    ea->total_length += length;
+    ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
+    ea->count++;
+
+    return 0;
+}
+
+static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
+                                  uint64_t bytes, NBDExtentArray *ea)
+{
+    while (bytes) {
+        uint32_t flags;
+        int64_t num;
+        int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
+                                          NULL, NULL);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
+                (ret & BDRV_BLOCK_ZERO      ? NBD_STATE_ZERO : 0);
+
+        if (nbd_extent_array_add(ea, num, flags) < 0) {
+            return 0;
+        }
+
+        offset += num;
+        bytes -= num;
+    }
+
+    return 0;
+}
+
+static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t bytes, NBDExtentArray *ea)
+{
+    while (bytes) {
+        int64_t num;
+        int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
+                                          &num);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (nbd_extent_array_add(ea, num, ret) < 0) {
+            return 0;
+        }
+
+        offset += num;
+        bytes -= num;
+    }
+
+    return 0;
+}
+
+/*
+ * nbd_co_send_extents
+ *
+ * @ea is converted to BE by the function
+ * @last controls whether NBD_REPLY_FLAG_DONE is sent.
+ */
+static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
+                               NBDExtentArray *ea,
+                               bool last, uint32_t context_id, Error **errp)
+{
+    NBDStructuredMeta chunk;
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
+    };
+
+    nbd_extent_array_convert_to_be(ea);
+
+    trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
+                              last);
+    set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
+                 NBD_REPLY_TYPE_BLOCK_STATUS,
+                 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.context_id, context_id);
+
+    return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+/* Get block status from the exported device and send it to the client */
+static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
+                                    BlockDriverState *bs, uint64_t offset,
+                                    uint32_t length, bool dont_fragment,
+                                    bool last, uint32_t context_id,
+                                    Error **errp)
+{
+    int ret;
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
+    g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+
+    if (context_id == NBD_META_ID_BASE_ALLOCATION) {
+        ret = blockstatus_to_extents(bs, offset, length, ea);
+    } else {
+        ret = blockalloc_to_extents(bs, offset, length, ea);
+    }
+    if (ret < 0) {
+        return nbd_co_send_structured_error(
+                client, handle, -ret, "can't get block status", errp);
+    }
+
+    return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
+}
+
+/* Populate @ea from a dirty bitmap. */
+static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
+                              uint64_t offset, uint64_t length,
+                              NBDExtentArray *es)
+{
+    int64_t start, dirty_start, dirty_count;
+    int64_t end = offset + length;
+    bool full = false;
+
+    bdrv_dirty_bitmap_lock(bitmap);
+
+    for (start = offset;
+         bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
+                                           &dirty_start, &dirty_count);
+         start = dirty_start + dirty_count)
+    {
+        if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
+            (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
+        {
+            full = true;
+            break;
+        }
+    }
+
+    if (!full) {
+        /* last non dirty extent, nothing to do if array is now full */
+        (void) nbd_extent_array_add(es, end - start, 0);
+    }
+
+    bdrv_dirty_bitmap_unlock(bitmap);
+}
+
+static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
+                              BdrvDirtyBitmap *bitmap, uint64_t offset,
+                              uint32_t length, bool dont_fragment, bool last,
+                              uint32_t context_id, Error **errp)
+{
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
+    g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+
+    bitmap_to_extents(bitmap, offset, length, ea);
+
+    return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
+}
+
+/* nbd_co_receive_request
+ * Collect a client request. Return 0 if request looks valid, -EIO to drop
+ * connection right away, and any other negative value to report an error to
+ * the client (although the caller may still need to disconnect after reporting
+ * the error).
+ */
+static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
+                                  Error **errp)
+{
+    NBDClient *client = req->client;
+    int valid_flags;
+
+    g_assert(qemu_in_coroutine());
+    assert(client->recv_coroutine == qemu_coroutine_self());
+    if (nbd_receive_request(client->ioc, request, errp) < 0) {
+        return -EIO;
+    }
+
+    trace_nbd_co_receive_request_decode_type(request->handle, request->type,
+                                             nbd_cmd_lookup(request->type));
+
+    if (request->type != NBD_CMD_WRITE) {
+        /* No payload, we are ready to read the next request.  */
+        req->complete = true;
+    }
+
+    if (request->type == NBD_CMD_DISC) {
+        /* Special case: we're going to disconnect without a reply,
+         * whether or not flags, from, or len are bogus */
+        return -EIO;
+    }
+
+    if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
+        request->type == NBD_CMD_CACHE)
+    {
+        if (request->len > NBD_MAX_BUFFER_SIZE) {
+            error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
+                       request->len, NBD_MAX_BUFFER_SIZE);
+            return -EINVAL;
+        }
+
+        if (request->type != NBD_CMD_CACHE) {
+            req->data = blk_try_blockalign(client->exp->common.blk,
+                                           request->len);
+            if (req->data == NULL) {
+                error_setg(errp, "No memory");
+                return -ENOMEM;
+            }
+        }
+    }
+
+    if (request->type == NBD_CMD_WRITE) {
+        if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
+                     errp) < 0)
+        {
+            return -EIO;
+        }
+        req->complete = true;
+
+        trace_nbd_co_receive_request_payload_received(request->handle,
+                                                      request->len);
+    }
+
+    /* Sanity checks. */
+    if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
+        (request->type == NBD_CMD_WRITE ||
+         request->type == NBD_CMD_WRITE_ZEROES ||
+         request->type == NBD_CMD_TRIM)) {
+        error_setg(errp, "Export is read-only");
+        return -EROFS;
+    }
+    if (request->from > client->exp->size ||
+        request->len > client->exp->size - request->from) {
+        error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
+                   ", Size: %" PRIu64, request->from, request->len,
+                   client->exp->size);
+        return (request->type == NBD_CMD_WRITE ||
+                request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
+    }
+    if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
+                                                client->check_align)) {
+        /*
+         * The block layer gracefully handles unaligned requests, but
+         * it's still worth tracing client non-compliance
+         */
+        trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
+                                              request->from,
+                                              request->len,
+                                              client->check_align);
+    }
+    valid_flags = NBD_CMD_FLAG_FUA;
+    if (request->type == NBD_CMD_READ && client->structured_reply) {
+        valid_flags |= NBD_CMD_FLAG_DF;
+    } else if (request->type == NBD_CMD_WRITE_ZEROES) {
+        valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
+    } else if (request->type == NBD_CMD_BLOCK_STATUS) {
+        valid_flags |= NBD_CMD_FLAG_REQ_ONE;
+    }
+    if (request->flags & ~valid_flags) {
+        error_setg(errp, "unsupported flags for command %s (got 0x%x)",
+                   nbd_cmd_lookup(request->type), request->flags);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+/* Send simple reply without a payload, or a structured error
+ * @error_msg is ignored if @ret >= 0
+ * Returns 0 if connection is still live, -errno on failure to talk to client
+ */
+static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
+                                               uint64_t handle,
+                                               int ret,
+                                               const char *error_msg,
+                                               Error **errp)
+{
+    if (client->structured_reply && ret < 0) {
+        return nbd_co_send_structured_error(client, handle, -ret, error_msg,
+                                            errp);
+    } else {
+        return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
+                                        NULL, 0, errp);
+    }
+}
+
+/* Handle NBD_CMD_READ request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
+                                        uint8_t *data, Error **errp)
+{
+    int ret;
+    NBDExport *exp = client->exp;
+
+    assert(request->type == NBD_CMD_READ);
+
+    /* XXX: NBD Protocol only documents use of FUA with WRITE */
+    if (request->flags & NBD_CMD_FLAG_FUA) {
+        ret = blk_co_flush(exp->common.blk);
+        if (ret < 0) {
+            return nbd_send_generic_reply(client, request->handle, ret,
+                                          "flush failed", errp);
+        }
+    }
+
+    if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
+        request->len)
+    {
+        return nbd_co_send_sparse_read(client, request->handle, request->from,
+                                       data, request->len, errp);
+    }
+
+    ret = blk_pread(exp->common.blk, request->from, data, request->len);
+    if (ret < 0) {
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "reading from file failed", errp);
+    }
+
+    if (client->structured_reply) {
+        if (request->len) {
+            return nbd_co_send_structured_read(client, request->handle,
+                                               request->from, data,
+                                               request->len, true, errp);
+        } else {
+            return nbd_co_send_structured_done(client, request->handle, errp);
+        }
+    } else {
+        return nbd_co_send_simple_reply(client, request->handle, 0,
+                                        data, request->len, errp);
+    }
+}
+
+/*
+ * nbd_do_cmd_cache
+ *
+ * Handle NBD_CMD_CACHE request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply.
+ */
+static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
+                                         Error **errp)
+{
+    int ret;
+    NBDExport *exp = client->exp;
+
+    assert(request->type == NBD_CMD_CACHE);
+
+    ret = blk_co_preadv(exp->common.blk, request->from, request->len,
+                        NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+
+    return nbd_send_generic_reply(client, request->handle, ret,
+                                  "caching data failed", errp);
+}
+
+/* Handle NBD request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_handle_request(NBDClient *client,
+                                           NBDRequest *request,
+                                           uint8_t *data, Error **errp)
+{
+    int ret;
+    int flags;
+    NBDExport *exp = client->exp;
+    char *msg;
+    size_t i;
+
+    switch (request->type) {
+    case NBD_CMD_CACHE:
+        return nbd_do_cmd_cache(client, request, errp);
+
+    case NBD_CMD_READ:
+        return nbd_do_cmd_read(client, request, data, errp);
+
+    case NBD_CMD_WRITE:
+        flags = 0;
+        if (request->flags & NBD_CMD_FLAG_FUA) {
+            flags |= BDRV_REQ_FUA;
+        }
+        ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
+                         flags);
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "writing to file failed", errp);
+
+    case NBD_CMD_WRITE_ZEROES:
+        flags = 0;
+        if (request->flags & NBD_CMD_FLAG_FUA) {
+            flags |= BDRV_REQ_FUA;
+        }
+        if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+        if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
+            flags |= BDRV_REQ_NO_FALLBACK;
+        }
+        ret = 0;
+        /* FIXME simplify this when blk_pwrite_zeroes switches to 64-bit */
+        while (ret >= 0 && request->len) {
+            int align = client->check_align ?: 1;
+            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
+                                                        align));
+            ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
+            request->len -= len;
+            request->from += len;
+        }
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "writing to file failed", errp);
+
+    case NBD_CMD_DISC:
+        /* unreachable, thanks to special case in nbd_co_receive_request() */
+        abort();
+
+    case NBD_CMD_FLUSH:
+        ret = blk_co_flush(exp->common.blk);
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "flush failed", errp);
+
+    case NBD_CMD_TRIM:
+        ret = 0;
+        /* FIXME simplify this when blk_co_pdiscard switches to 64-bit */
+        while (ret >= 0 && request->len) {
+            int align = client->check_align ?: 1;
+            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
+                                                        align));
+            ret = blk_co_pdiscard(exp->common.blk, request->from, len);
+            request->len -= len;
+            request->from += len;
+        }
+        if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
+            ret = blk_co_flush(exp->common.blk);
+        }
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "discard failed", errp);
+
+    case NBD_CMD_BLOCK_STATUS:
+        if (!request->len) {
+            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+                                          "need non-zero length", errp);
+        }
+        if (client->export_meta.count) {
+            bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
+            int contexts_remaining = client->export_meta.count;
+
+            if (client->export_meta.base_allocation) {
+                ret = nbd_co_send_block_status(client, request->handle,
+                                               blk_bs(exp->common.blk),
+                                               request->from,
+                                               request->len, dont_fragment,
+                                               !--contexts_remaining,
+                                               NBD_META_ID_BASE_ALLOCATION,
+                                               errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
+            if (client->export_meta.allocation_depth) {
+                ret = nbd_co_send_block_status(client, request->handle,
+                                               blk_bs(exp->common.blk),
+                                               request->from, request->len,
+                                               dont_fragment,
+                                               !--contexts_remaining,
+                                               NBD_META_ID_ALLOCATION_DEPTH,
+                                               errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
+            for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
+                if (!client->export_meta.bitmaps[i]) {
+                    continue;
+                }
+                ret = nbd_co_send_bitmap(client, request->handle,
+                                         client->exp->export_bitmaps[i],
+                                         request->from, request->len,
+                                         dont_fragment, !--contexts_remaining,
+                                         NBD_META_ID_DIRTY_BITMAP + i, errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
+            assert(!contexts_remaining);
+
+            return 0;
+        } else {
+            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+                                          "CMD_BLOCK_STATUS not negotiated",
+                                          errp);
+        }
+
+    default:
+        msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
+                              request->type);
+        ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
+                                     errp);
+        g_free(msg);
+        return ret;
+    }
+}
+
+/* Owns a reference to the NBDClient passed as opaque.  */
+static coroutine_fn void nbd_trip(void *opaque)
+{
+    NBDClient *client = opaque;
+    NBDRequestData *req;
+    NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
+    int ret;
+    Error *local_err = NULL;
+
+    trace_nbd_trip();
+    if (client->closing) {
+        nbd_client_put(client);
+        return;
+    }
+
+    req = nbd_request_get(client);
+    ret = nbd_co_receive_request(req, &request, &local_err);
+    client->recv_coroutine = NULL;
+
+    if (client->closing) {
+        /*
+         * The client may be closed when we are blocked in
+         * nbd_co_receive_request()
+         */
+        goto done;
+    }
+
+    nbd_client_receive_next_request(client);
+    if (ret == -EIO) {
+        goto disconnect;
+    }
+
+    if (ret < 0) {
+        /* It wans't -EIO, so, according to nbd_co_receive_request()
+         * semantics, we should return the error to the client. */
+        Error *export_err = local_err;
+
+        local_err = NULL;
+        ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
+                                     error_get_pretty(export_err), &local_err);
+        error_free(export_err);
+    } else {
+        ret = nbd_handle_request(client, &request, req->data, &local_err);
+    }
+    if (ret < 0) {
+        error_prepend(&local_err, "Failed to send reply: ");
+        goto disconnect;
+    }
+
+    /* We must disconnect after NBD_CMD_WRITE if we did not
+     * read the payload.
+     */
+    if (!req->complete) {
+        error_setg(&local_err, "Request handling failed in intermediate state");
+        goto disconnect;
+    }
+
+done:
+    nbd_request_put(req);
+    nbd_client_put(client);
+    return;
+
+disconnect:
+    if (local_err) {
+        error_reportf_err(local_err, "Disconnect client, due to: ");
+    }
+    nbd_request_put(req);
+    client_close(client, true);
+    nbd_client_put(client);
+}
+
+static void nbd_client_receive_next_request(NBDClient *client)
+{
+    if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
+        nbd_client_get(client);
+        client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
+        aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
+    }
+}
+
+static coroutine_fn void nbd_co_client_start(void *opaque)
+{
+    NBDClient *client = opaque;
+    Error *local_err = NULL;
+
+    qemu_co_mutex_init(&client->send_lock);
+
+    if (nbd_negotiate(client, &local_err)) {
+        if (local_err) {
+            error_report_err(local_err);
+        }
+        client_close(client, false);
+        return;
+    }
+
+    nbd_client_receive_next_request(client);
+}
+
+/*
+ * Create a new client listener using the given channel @sioc.
+ * Begin servicing it in a coroutine.  When the connection closes, call
+ * @close_fn with an indication of whether the client completed negotiation.
+ */
+void nbd_client_new(QIOChannelSocket *sioc,
+                    QCryptoTLSCreds *tlscreds,
+                    const char *tlsauthz,
+                    void (*close_fn)(NBDClient *, bool))
+{
+    NBDClient *client;
+    Coroutine *co;
+
+    client = g_new0(NBDClient, 1);
+    client->refcount = 1;
+    client->tlscreds = tlscreds;
+    if (tlscreds) {
+        object_ref(OBJECT(client->tlscreds));
+    }
+    client->tlsauthz = g_strdup(tlsauthz);
+    client->sioc = sioc;
+    object_ref(OBJECT(client->sioc));
+    client->ioc = QIO_CHANNEL(sioc);
+    object_ref(OBJECT(client->ioc));
+    client->close_fn = close_fn;
+
+    co = qemu_coroutine_create(nbd_co_client_start, client);
+    qemu_coroutine_enter(co);
+}
diff --git a/nbd/trace-events b/nbd/trace-events
new file mode 100644
index 000000000..a955918e9
--- /dev/null
+++ b/nbd/trace-events
@@ -0,0 +1,75 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# client.c
+nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32
+nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, const char *typename, uint32_t length) "Received option reply %" PRIu32" (%s), type %" PRIu32" (%s), len %" PRIu32
+nbd_server_error_msg(uint32_t err, const char *type, const char *msg) "server reported error 0x%" PRIx32 " (%s) with additional message: %s"
+nbd_reply_err_ignored(uint32_t option, const char *name, uint32_t reply, const char *reply_name) "server failed request %" PRIu32 " (%s) with error 0x%" PRIx32 " (%s), attempting fallback"
+nbd_receive_list(const char *name, const char *desc) "export list includes '%s', description '%s'"
+nbd_opt_info_go_start(const char *opt, const char *name) "Attempting %s for export '%s'"
+nbd_opt_info_go_success(const char *opt) "Export is ready after %s request"
+nbd_opt_info_unknown(int info, const char *name) "Ignoring unknown info %d (%s)"
+nbd_opt_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
+nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'"
+nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'"
+nbd_receive_starttls_new_client(void) "Setting up TLS"
+nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake"
+nbd_opt_meta_request(const char *optname, const char *context, const char *export) "Requesting %s %s for export %s"
+nbd_opt_meta_reply(const char *optname, const char *context, uint32_t id) "Received %s mapping of %s to id %" PRIu32
+nbd_start_negotiate(void *tlscreds, const char *hostname) "Receiving negotiation tlscreds=%p hostname=%s"
+nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64
+nbd_receive_negotiate_server_flags(uint32_t globalflags) "Global flags are 0x%" PRIx32
+nbd_receive_negotiate_name(const char *name) "Requesting NBD export name '%s'"
+nbd_receive_negotiate_size_flags(uint64_t size, uint16_t flags) "Size is %" PRIu64 ", export flags 0x%" PRIx16
+nbd_init_set_socket(void) "Setting NBD socket"
+nbd_init_set_block_size(unsigned long block_size) "Setting block size to %lu"
+nbd_init_set_size(unsigned long sectors) "Setting size to %lu block(s)"
+nbd_init_trailing_bytes(int ignored_bytes) "Ignoring trailing %d bytes of export"
+nbd_init_set_readonly(void) "Setting readonly attribute"
+nbd_init_finish(void) "Negotiation ended"
+nbd_client_loop(void) "Doing NBD loop"
+nbd_client_loop_ret(int ret, const char *error) "NBD loop returned %d: %s"
+nbd_client_clear_queue(void) "Clearing NBD queue"
+nbd_client_clear_socket(void) "Clearing NBD socket"
+nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
+nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }"
+nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, const char *name, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d (%s), handle = %" PRIu64 ", length = %" PRIu32 " }"
+
+# common.c
+nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
+
+# server.c
+nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, const char *typename, uint32_t len) "Reply opt=%" PRIu32 " (%s), type=%" PRIu32 " (%s), len=%" PRIu32
+nbd_negotiate_send_rep_err(const char *msg) "sending error message \"%s\""
+nbd_negotiate_send_rep_list(const char *name, const char *desc) "Advertising export name '%s' description '%s'"
+nbd_negotiate_handle_export_name(void) "Checking length"
+nbd_negotiate_handle_export_name_request(const char *name) "Client requested export '%s'"
+nbd_negotiate_send_info(int info, const char *name, uint32_t length) "Sending NBD_REP_INFO type %d (%s) with remaining length %" PRIu32
+nbd_negotiate_handle_info_requests(int requests) "Client requested %d items of info"
+nbd_negotiate_handle_info_request(int request, const char *name) "Client requested info %d (%s)"
+nbd_negotiate_handle_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "advertising minimum 0x%" PRIx32 ", preferred 0x%" PRIx32 ", maximum 0x%" PRIx32
+nbd_negotiate_handle_starttls(void) "Setting up TLS"
+nbd_negotiate_handle_starttls_handshake(void) "Starting TLS handshake"
+nbd_negotiate_meta_context(const char *optname, const char *export, uint32_t queries) "Client requested %s for export %s, with %" PRIu32 " queries"
+nbd_negotiate_meta_query_skip(const char *reason) "Skipping meta query: %s"
+nbd_negotiate_meta_query_parse(const char *query) "Parsed meta query '%s'"
+nbd_negotiate_meta_query_reply(const char *context, uint32_t id) "Replying with meta context '%s' id %" PRIu32
+nbd_negotiate_options_flags(uint32_t flags) "Received client flags 0x%" PRIx32
+nbd_negotiate_options_check_magic(uint64_t magic) "Checking opts magic 0x%" PRIx64
+nbd_negotiate_options_check_option(uint32_t option, const char *name) "Checking option %" PRIu32 " (%s)"
+nbd_negotiate_begin(void) "Beginning negotiation"
+nbd_negotiate_new_style_size_flags(uint64_t size, unsigned flags) "advertising size %" PRIu64 " and flags 0x%x"
+nbd_negotiate_success(void) "Negotiation succeeded"
+nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
+nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching clients to AIO context %p"
+nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients from AIO context %p"
+nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), len = %d"
+nbd_co_send_structured_done(uint64_t handle) "Send structured reply done: handle = %" PRIu64
+nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu"
+nbd_co_send_structured_read_hole(uint64_t handle, uint64_t offset, size_t size) "Send structured read hole reply: handle = %" PRIu64 ", offset = %" PRIu64 ", len = %zu"
+nbd_co_send_extents(uint64_t handle, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: handle = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)"
+nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'"
+nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
+nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32
+nbd_co_receive_align_compliance(const char *op, uint64_t from, uint32_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx32 ", align=0x%" PRIx32
+nbd_trip(void) "Reading request"
diff --git a/nbd/trace.h b/nbd/trace.h
new file mode 100644
index 000000000..233d08fdd
--- /dev/null
+++ b/nbd/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-nbd.h"
diff --git a/os-posix.c b/os-posix.c
new file mode 100644
index 000000000..1de283955
--- /dev/null
+++ b/os-posix.c
@@ -0,0 +1,331 @@
+/*
+ * os-posix.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#include 
+#include 
+
+#include "qemu-common.h"
+/* Needed early for CONFIG_BSD etc. */
+#include "net/slirp.h"
+#include "qemu-options.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "sysemu/runstate.h"
+#include "qemu/cutils.h"
+
+#ifdef CONFIG_LINUX
+#include 
+#endif
+
+/*
+ * Must set all three of these at once.
+ * Legal combinations are              unset   by name   by uid
+ */
+static struct passwd *user_pwd;    /*   NULL   non-NULL   NULL   */
+static uid_t user_uid = (uid_t)-1; /*   -1      -1        >=0    */
+static gid_t user_gid = (gid_t)-1; /*   -1      -1        >=0    */
+
+static const char *chroot_dir;
+static int daemonize;
+static int daemon_pipe;
+
+void os_setup_early_signal_handling(void)
+{
+    struct sigaction act;
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+    act.sa_handler = SIG_IGN;
+    sigaction(SIGPIPE, &act, NULL);
+}
+
+static void termsig_handler(int signal, siginfo_t *info, void *c)
+{
+    qemu_system_killed(info->si_signo, info->si_pid);
+}
+
+void os_setup_signal_handling(void)
+{
+    struct sigaction act;
+
+    memset(&act, 0, sizeof(act));
+    act.sa_sigaction = termsig_handler;
+    act.sa_flags = SA_SIGINFO;
+    sigaction(SIGINT,  &act, NULL);
+    sigaction(SIGHUP,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+}
+
+void os_set_proc_name(const char *s)
+{
+#if defined(PR_SET_NAME)
+    char name[16];
+    if (!s)
+        return;
+    pstrcpy(name, sizeof(name), s);
+    /* Could rewrite argv[0] too, but that's a bit more complicated.
+       This simple way is enough for `top'. */
+    if (prctl(PR_SET_NAME, name)) {
+        error_report("unable to change process name: %s", strerror(errno));
+        exit(1);
+    }
+#else
+    error_report("Change of process name not supported by your OS");
+    exit(1);
+#endif
+}
+
+
+static bool os_parse_runas_uid_gid(const char *optarg)
+{
+    unsigned long lv;
+    const char *ep;
+    uid_t got_uid;
+    gid_t got_gid;
+    int rc;
+
+    rc = qemu_strtoul(optarg, &ep, 0, &lv);
+    got_uid = lv; /* overflow here is ID in C99 */
+    if (rc || *ep != ':' || got_uid != lv || got_uid == (uid_t)-1) {
+        return false;
+    }
+
+    rc = qemu_strtoul(ep + 1, 0, 0, &lv);
+    got_gid = lv; /* overflow here is ID in C99 */
+    if (rc || got_gid != lv || got_gid == (gid_t)-1) {
+        return false;
+    }
+
+    user_pwd = NULL;
+    user_uid = got_uid;
+    user_gid = got_gid;
+    return true;
+}
+
+/*
+ * Parse OS specific command line options.
+ * return 0 if option handled, -1 otherwise
+ */
+int os_parse_cmd_args(int index, const char *optarg)
+{
+    switch (index) {
+    case QEMU_OPTION_runas:
+        user_pwd = getpwnam(optarg);
+        if (user_pwd) {
+            user_uid = -1;
+            user_gid = -1;
+        } else if (!os_parse_runas_uid_gid(optarg)) {
+            error_report("User \"%s\" doesn't exist"
+                         " (and is not :)",
+                         optarg);
+            exit(1);
+        }
+        break;
+    case QEMU_OPTION_chroot:
+        chroot_dir = optarg;
+        break;
+    case QEMU_OPTION_daemonize:
+        daemonize = 1;
+        break;
+#if defined(CONFIG_LINUX)
+    case QEMU_OPTION_enablefips:
+        fips_set_state(true);
+        break;
+#endif
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+static void change_process_uid(void)
+{
+    assert((user_uid == (uid_t)-1) || user_pwd == NULL);
+    assert((user_uid == (uid_t)-1) ==
+           (user_gid == (gid_t)-1));
+
+    if (user_pwd || user_uid != (uid_t)-1) {
+        gid_t intended_gid = user_pwd ? user_pwd->pw_gid : user_gid;
+        uid_t intended_uid = user_pwd ? user_pwd->pw_uid : user_uid;
+        if (setgid(intended_gid) < 0) {
+            error_report("Failed to setgid(%d)", intended_gid);
+            exit(1);
+        }
+        if (user_pwd) {
+            if (initgroups(user_pwd->pw_name, user_pwd->pw_gid) < 0) {
+                error_report("Failed to initgroups(\"%s\", %d)",
+                        user_pwd->pw_name, user_pwd->pw_gid);
+                exit(1);
+            }
+        } else {
+            if (setgroups(1, &user_gid) < 0) {
+                error_report("Failed to setgroups(1, [%d])",
+                        user_gid);
+                exit(1);
+            }
+        }
+        if (setuid(intended_uid) < 0) {
+            error_report("Failed to setuid(%d)", intended_uid);
+            exit(1);
+        }
+        if (setuid(0) != -1) {
+            error_report("Dropping privileges failed");
+            exit(1);
+        }
+    }
+}
+
+static void change_root(void)
+{
+    if (chroot_dir) {
+        if (chroot(chroot_dir) < 0) {
+            error_report("chroot failed");
+            exit(1);
+        }
+        if (chdir("/")) {
+            error_report("not able to chdir to /: %s", strerror(errno));
+            exit(1);
+        }
+    }
+
+}
+
+void os_daemonize(void)
+{
+    if (daemonize) {
+        pid_t pid;
+        int fds[2];
+
+        if (pipe(fds) == -1) {
+            exit(1);
+        }
+
+        pid = fork();
+        if (pid > 0) {
+            uint8_t status;
+            ssize_t len;
+
+            close(fds[1]);
+
+            do {
+                len = read(fds[0], &status, 1);
+            } while (len < 0 && errno == EINTR);
+
+            /* only exit successfully if our child actually wrote
+             * a one-byte zero to our pipe, upon successful init */
+            exit(len == 1 && status == 0 ? 0 : 1);
+
+        } else if (pid < 0) {
+            exit(1);
+        }
+
+        close(fds[0]);
+        daemon_pipe = fds[1];
+        qemu_set_cloexec(daemon_pipe);
+
+        setsid();
+
+        pid = fork();
+        if (pid > 0) {
+            exit(0);
+        } else if (pid < 0) {
+            exit(1);
+        }
+        umask(027);
+
+        signal(SIGTSTP, SIG_IGN);
+        signal(SIGTTOU, SIG_IGN);
+        signal(SIGTTIN, SIG_IGN);
+    }
+}
+
+void os_setup_post(void)
+{
+    int fd = 0;
+
+    if (daemonize) {
+        if (chdir("/")) {
+            error_report("not able to chdir to /: %s", strerror(errno));
+            exit(1);
+        }
+        TFR(fd = qemu_open_old("/dev/null", O_RDWR));
+        if (fd == -1) {
+            exit(1);
+        }
+    }
+
+    change_root();
+    change_process_uid();
+
+    if (daemonize) {
+        uint8_t status = 0;
+        ssize_t len;
+
+        dup2(fd, 0);
+        dup2(fd, 1);
+        /* In case -D is given do not redirect stderr to /dev/null */
+        if (!qemu_logfile) {
+            dup2(fd, 2);
+        }
+
+        close(fd);
+
+        do {        
+            len = write(daemon_pipe, &status, 1);
+        } while (len < 0 && errno == EINTR);
+        if (len != 1) {
+            exit(1);
+        }
+    }
+}
+
+void os_set_line_buffering(void)
+{
+    setvbuf(stdout, NULL, _IOLBF, 0);
+}
+
+bool is_daemonized(void)
+{
+    return daemonize;
+}
+
+int os_mlock(void)
+{
+#ifdef HAVE_MLOCKALL
+    int ret = 0;
+
+    ret = mlockall(MCL_CURRENT | MCL_FUTURE);
+    if (ret < 0) {
+        error_report("mlockall: %s", strerror(errno));
+    }
+
+    return ret;
+#else
+    return -ENOSYS;
+#endif
+}
diff --git a/packaging/bridge.conf b/packaging/bridge.conf
new file mode 100755
index 000000000..d06c62a33
--- /dev/null
+++ b/packaging/bridge.conf
@@ -0,0 +1,11 @@
+# Access control file for qemu bridge helper
+# Syntax consists of:
+#   # comment (ignored)
+#   allow all
+#   allow 
+#   deny all
+#   deny 
+#   include /path/to/additional/ACL/file
+# Users are blacklisted by default and 'deny' takes precedence over 'allow'.
+# Including additional ACL files allows file access permissions to be used as
+# a component of the policy to allow access or deny access to specific bridges.
diff --git a/packaging/qemu-tools.spec b/packaging/qemu-tools.spec
new file mode 100755
index 000000000..de74b4582
--- /dev/null
+++ b/packaging/qemu-tools.spec
@@ -0,0 +1,141 @@
+#!ForceMultiversion
+
+%define _buildshell /bin/bash
+
+%define qemuver 5.2.0
+
+Name:           qemu
+URL:            https://www.qemu.org/
+Summary:        Machine emulator and virtualizer
+License:        BSD-2-Clause AND BSD-3-Clause AND GPL-2.0-only AND GPL-2.0-or-later AND LGPL-2.1-or-later AND MIT
+Group:          System/Emulators/PC
+Version:        %qemuver
+Release:        20.1
+Source:         qemu-%{version}.tar.xz
+Source1:        bridge.conf
+BuildRoot:      %{_tmppath}/%{name}-%{version}-build
+
+BuildRequires:  gcc-c++
+BuildRequires:  meson
+BuildRequires:  ninja >= 1.7
+
+BuildRequires:  pkgconfig
+BuildRequires:  pkgconfig(glib-2.0) >= 2.48
+
+%description
+QEMU provides CPU emulation along with other related capabilities. This package
+provides programs to run user space binaries and libraries meant for another
+architecture. The syscall interface is intercepted and execution below the
+syscall layer occurs on the native hardware and operating system.
+
+%package tools
+Summary:        Tools for QEMU
+Group:          System/Emulators/PC
+Version:        %{qemuver}
+Release:        20.1
+
+%description tools
+This package contains various QEMU related tools, including a bridge helper,
+a virtfs helper, ivshmem, disk utilities and scripts for various purposes.
+
+%prep
+%setup -q -n qemu-%{version}
+
+%build
+
+%define srcdir %{_builddir}/%buildsubdir
+%define blddir %srcdir/build
+mkdir -p %blddir
+cd %blddir
+
+%srcdir/configure \
+	--prefix=%_prefix \
+	--sysconfdir=%_sysconfdir \
+	--libdir=%_libdir \
+	--libexecdir=%_libexecdir \
+	--localstatedir=%_localstatedir \
+	--docdir=%_docdir \
+	--firmwarepath=%_datadir/%name \
+	--python=%_bindir/python3 \
+	--extra-cflags="%{optflags}" \
+	--without-default-devices \
+	--disable-auth-pam \
+	--disable-bochs \
+	--disable-blobs \
+	--disable-capstone \
+	--disable-cloop \
+	--disable-coroutine-pool \
+	--disable-dmg \
+	--disable-docs \
+	--disable-guest-agent \
+	--disable-hax \
+	--disable-hvf \
+	--disable-keyring \
+	--disable-libxml2 \
+	--disable-linux-aio \
+	--disable-linux-io-uring \
+	--disable-linux-user \
+	--disable-live-block-migration \
+	--disable-malloc-trim \
+	--disable-membarrier \
+	--disable-nettle \
+	--disable-parallels \
+	--disable-pie \
+	--disable-qcow1 \
+	--disable-qed \
+	--disable-qom-cast-debug \
+	--disable-rbd \
+	--disable-replication \
+	--disable-safe-stack \
+	--disable-sdl \
+	--disable-sdl-image \
+	--disable-stack-protector \
+	--disable-strip \
+	--disable-system \
+	--disable-tcg \
+	--disable-tpm \
+	--disable-vdi \
+	--disable-vnc \
+	--disable-vhost-crypto \
+	--disable-vhost-kernel \
+	--disable-vhost-net \
+	--disable-vhost-scsi \
+	--disable-vhost-user \
+	--disable-vhost-user-blk-server \
+	--disable-vhost-user-fs \
+	--disable-vhost-vdpa \
+	--disable-vhost-vsock \
+	--disable-vvfat \
+	--disable-whpx \
+	--disable-xen \
+	--disable-xkbcommon \
+	--enable-tools
+
+make %{?_smp_mflags} V=1
+
+%check
+
+%install
+cd %blddir
+
+make %{?_smp_mflags} install DESTDIR=%{buildroot}
+
+install -D -m 0644 %{SOURCE1} %{buildroot}%_sysconfdir/%name/bridge.conf
+install -D -m 0755 scripts/analyze-migration.py  %{buildroot}%_bindir/analyze-migration.py
+install -D -m 0755 scripts/vmstate-static-checker.py  %{buildroot}%_bindir/vmstate-static-checker.py
+
+rm -rf %{buildroot}%_datadir/qemu/keymaps
+unlink %{buildroot}%_datadir/qemu/trace-events-all
+
+%clean
+rm -rf %{buildroot}
+
+%files tools
+%defattr(-, root, root)
+%_bindir/analyze-migration.py
+%_bindir/qemu-img
+%_bindir/qemu-io
+%_bindir/qemu-nbd
+%_bindir/vmstate-static-checker.py
+%dir %_sysconfdir/%name
+%config %_sysconfdir/%name/bridge.conf
diff --git a/qapi/acpi.json b/qapi/acpi.json
new file mode 100644
index 000000000..51f0d55db
--- /dev/null
+++ b/qapi/acpi.json
@@ -0,0 +1,141 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# = ACPI
+##
+
+##
+# @AcpiTableOptions:
+#
+# Specify an ACPI table on the command line to load.
+#
+# At most one of @file and @data can be specified. The list of files specified
+# by any one of them is loaded and concatenated in order. If both are omitted,
+# @data is implied.
+#
+# Other fields / optargs can be used to override fields of the generic ACPI
+# table header; refer to the ACPI specification 5.0, section 5.2.6 System
+# Description Table Header. If a header field is not overridden, then the
+# corresponding value from the concatenated blob is used (in case of @file), or
+# it is filled in with a hard-coded value (in case of @data).
+#
+# String fields are copied into the matching ACPI member from lowest address
+# upwards, and silently truncated / NUL-padded to length.
+#
+# @sig: table signature / identifier (4 bytes)
+#
+# @rev: table revision number (dependent on signature, 1 byte)
+#
+# @oem_id: OEM identifier (6 bytes)
+#
+# @oem_table_id: OEM table identifier (8 bytes)
+#
+# @oem_rev: OEM-supplied revision number (4 bytes)
+#
+# @asl_compiler_id: identifier of the utility that created the table
+#                   (4 bytes)
+#
+# @asl_compiler_rev: revision number of the utility that created the
+#                    table (4 bytes)
+#
+# @file: colon (:) separated list of pathnames to load and
+#        concatenate as table data. The resultant binary blob is expected to
+#        have an ACPI table header. At least one file is required. This field
+#        excludes @data.
+#
+# @data: colon (:) separated list of pathnames to load and
+#        concatenate as table data. The resultant binary blob must not have an
+#        ACPI table header. At least one file is required. This field excludes
+#        @file.
+#
+# Since: 1.5
+##
+{ 'struct': 'AcpiTableOptions',
+  'data': {
+    '*sig':               'str',
+    '*rev':               'uint8',
+    '*oem_id':            'str',
+    '*oem_table_id':      'str',
+    '*oem_rev':           'uint32',
+    '*asl_compiler_id':   'str',
+    '*asl_compiler_rev':  'uint32',
+    '*file':              'str',
+    '*data':              'str' }}
+
+##
+# @ACPISlotType:
+#
+# @DIMM: memory slot
+# @CPU: logical CPU slot (since 2.7)
+##
+{ 'enum': 'ACPISlotType', 'data': [ 'DIMM', 'CPU' ] }
+
+##
+# @ACPIOSTInfo:
+#
+# OSPM Status Indication for a device
+# For description of possible values of @source and @status fields
+# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec.
+#
+# @device: device ID associated with slot
+#
+# @slot: slot ID, unique per slot of a given @slot-type
+#
+# @slot-type: type of the slot
+#
+# @source: an integer containing the source event
+#
+# @status: an integer containing the status code
+#
+# Since: 2.1
+##
+{ 'struct': 'ACPIOSTInfo',
+  'data'  : { '*device': 'str',
+              'slot': 'str',
+              'slot-type': 'ACPISlotType',
+              'source': 'int',
+              'status': 'int' } }
+
+##
+# @query-acpi-ospm-status:
+#
+# Return a list of ACPIOSTInfo for devices that support status
+# reporting via ACPI _OST method.
+#
+# Since: 2.1
+#
+# Example:
+#
+# -> { "execute": "query-acpi-ospm-status" }
+# <- { "return": [ { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0},
+#                  { "slot": "1", "slot-type": "DIMM", "source": 0, "status": 0},
+#                  { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0},
+#                  { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0}
+#    ]}
+#
+##
+{ 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] }
+
+##
+# @ACPI_DEVICE_OST:
+#
+# Emitted when guest executes ACPI _OST method.
+#
+# @info: OSPM Status Indication
+#
+# Since: 2.1
+#
+# Example:
+#
+# <- { "event": "ACPI_DEVICE_OST",
+#      "data": { "device": "d1", "slot": "0",
+#                "slot-type": "DIMM", "source": 1, "status": 0 } }
+#
+##
+{ 'event': 'ACPI_DEVICE_OST',
+     'data': { 'info': 'ACPIOSTInfo' } }
diff --git a/qapi/audio.json b/qapi/audio.json
new file mode 100644
index 000000000..072ed79de
--- /dev/null
+++ b/qapi/audio.json
@@ -0,0 +1,390 @@
+# -*- mode: python -*-
+#
+# Copyright (C) 2015-2019 Zoltán Kővágó 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Audio
+##
+
+##
+# @AudiodevPerDirectionOptions:
+#
+# General audio backend options that are used for both playback and
+# recording.
+#
+# @mixing-engine: use QEMU's mixing engine to mix all streams inside QEMU and
+#                 convert audio formats when not supported by the backend. When
+#                 set to off, fixed-settings must be also off (default on,
+#                 since 4.2)
+#
+# @fixed-settings: use fixed settings for host input/output. When off,
+#                  frequency, channels and format must not be
+#                  specified (default true)
+#
+# @frequency: frequency to use when using fixed settings
+#             (default 44100)
+#
+# @channels: number of channels when using fixed settings (default 2)
+#
+# @voices: number of voices to use (default 1)
+#
+# @format: sample format to use when using fixed settings
+#          (default s16)
+#
+# @buffer-length: the buffer length in microseconds
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*mixing-engine':  'bool',
+    '*fixed-settings': 'bool',
+    '*frequency':      'uint32',
+    '*channels':       'uint32',
+    '*voices':         'uint32',
+    '*format':         'AudioFormat',
+    '*buffer-length':  'uint32' } }
+
+##
+# @AudiodevGenericOptions:
+#
+# Generic driver-specific options.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevGenericOptions',
+  'data': {
+    '*in':  'AudiodevPerDirectionOptions',
+    '*out': 'AudiodevPerDirectionOptions' } }
+
+##
+# @AudiodevAlsaPerDirectionOptions:
+#
+# Options of the ALSA backend that are used for both playback and
+# recording.
+#
+# @dev: the name of the ALSA device to use (default 'default')
+#
+# @period-length: the period length in microseconds
+#
+# @try-poll: attempt to use poll mode, falling back to non-polling
+#            access on failure (default true)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevAlsaPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*dev':           'str',
+    '*period-length': 'uint32',
+    '*try-poll':      'bool' } }
+
+##
+# @AudiodevAlsaOptions:
+#
+# Options of the ALSA audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @threshold: set the threshold (in microseconds) when playback starts
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevAlsaOptions',
+  'data': {
+    '*in':        'AudiodevAlsaPerDirectionOptions',
+    '*out':       'AudiodevAlsaPerDirectionOptions',
+    '*threshold': 'uint32' } }
+
+##
+# @AudiodevCoreaudioPerDirectionOptions:
+#
+# Options of the Core Audio backend that are used for both playback and
+# recording.
+#
+# @buffer-count: number of buffers
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevCoreaudioPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*buffer-count': 'uint32' } }
+
+##
+# @AudiodevCoreaudioOptions:
+#
+# Options of the coreaudio audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevCoreaudioOptions',
+  'data': {
+    '*in':  'AudiodevCoreaudioPerDirectionOptions',
+    '*out': 'AudiodevCoreaudioPerDirectionOptions' } }
+
+##
+# @AudiodevDsoundOptions:
+#
+# Options of the DirectSound audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @latency: add extra latency to playback in microseconds
+#           (default 10000)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevDsoundOptions',
+  'data': {
+    '*in':      'AudiodevPerDirectionOptions',
+    '*out':     'AudiodevPerDirectionOptions',
+    '*latency': 'uint32' } }
+
+##
+# @AudiodevJackPerDirectionOptions:
+#
+# Options of the JACK backend that are used for both playback and
+# recording.
+#
+# @server-name: select from among several possible concurrent server instances
+#               (default: environment variable $JACK_DEFAULT_SERVER if set, else "default")
+#
+# @client-name: the client name to use. The server will modify this name to
+#               create a unique variant, if needed unless @exact-name is true (default: the
+#               guest's name)
+#
+# @connect-ports: if set, a regular expression of JACK client port name(s) to
+#                 monitor for and automatically connect to
+#
+# @start-server: start a jack server process if one is not already present
+#                (default: false)
+#
+# @exact-name: use the exact name requested otherwise JACK automatically
+#              generates a unique one, if needed (default: false)
+#
+# Since: 5.1
+##
+{ 'struct': 'AudiodevJackPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*server-name':   'str',
+    '*client-name':   'str',
+    '*connect-ports': 'str',
+    '*start-server':  'bool',
+    '*exact-name':    'bool' } }
+
+##
+# @AudiodevJackOptions:
+#
+# Options of the JACK audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# Since: 5.1
+##
+{ 'struct': 'AudiodevJackOptions',
+  'data': {
+    '*in':  'AudiodevJackPerDirectionOptions',
+    '*out': 'AudiodevJackPerDirectionOptions' } }
+
+##
+# @AudiodevOssPerDirectionOptions:
+#
+# Options of the OSS backend that are used for both playback and
+# recording.
+#
+# @dev: file name of the OSS device (default '/dev/dsp')
+#
+# @buffer-count: number of buffers
+#
+# @try-poll: attempt to use poll mode, falling back to non-polling
+#            access on failure (default true)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevOssPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*dev':          'str',
+    '*buffer-count': 'uint32',
+    '*try-poll':     'bool' } }
+
+##
+# @AudiodevOssOptions:
+#
+# Options of the OSS audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @try-mmap: try using memory-mapped access, falling back to
+#            non-memory-mapped access on failure (default true)
+#
+# @exclusive: open device in exclusive mode (vmix won't work)
+#             (default false)
+#
+# @dsp-policy: set the timing policy of the device (between 0 and 10,
+#              where smaller number means smaller latency but higher
+#              CPU usage) or -1 to use fragment mode (option ignored
+#              on some platforms) (default 5)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevOssOptions',
+  'data': {
+    '*in':         'AudiodevOssPerDirectionOptions',
+    '*out':        'AudiodevOssPerDirectionOptions',
+    '*try-mmap':   'bool',
+    '*exclusive':  'bool',
+    '*dsp-policy': 'uint32' } }
+
+##
+# @AudiodevPaPerDirectionOptions:
+#
+# Options of the Pulseaudio backend that are used for both playback and
+# recording.
+#
+# @name: name of the sink/source to use
+#
+# @stream-name: name of the PulseAudio stream created by qemu.  Can be
+#               used to identify the stream in PulseAudio when you
+#               create multiple PulseAudio devices or run multiple qemu
+#               instances (default: audiodev's id, since 4.2)
+#
+# @latency: latency you want PulseAudio to achieve in microseconds
+#           (default 15000)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevPaPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*name': 'str',
+    '*stream-name': 'str',
+    '*latency': 'uint32' } }
+
+##
+# @AudiodevPaOptions:
+#
+# Options of the PulseAudio audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @server: PulseAudio server address (default: let PulseAudio choose)
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevPaOptions',
+  'data': {
+    '*in':     'AudiodevPaPerDirectionOptions',
+    '*out':    'AudiodevPaPerDirectionOptions',
+    '*server': 'str' } }
+
+##
+# @AudiodevWavOptions:
+#
+# Options of the wav audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @path: name of the wav file to record (default 'qemu.wav')
+#
+# Since: 4.0
+##
+{ 'struct': 'AudiodevWavOptions',
+  'data': {
+    '*in':   'AudiodevPerDirectionOptions',
+    '*out':  'AudiodevPerDirectionOptions',
+    '*path': 'str' } }
+
+
+##
+# @AudioFormat:
+#
+# An enumeration of possible audio formats.
+#
+# @u8: unsigned 8 bit integer
+#
+# @s8: signed 8 bit integer
+#
+# @u16: unsigned 16 bit integer
+#
+# @s16: signed 16 bit integer
+#
+# @u32: unsigned 32 bit integer
+#
+# @s32: signed 32 bit integer
+#
+# @f32: single precision floating-point (since 5.0)
+#
+# Since: 4.0
+##
+{ 'enum': 'AudioFormat',
+  'data': [ 'u8', 's8', 'u16', 's16', 'u32', 's32', 'f32' ] }
+
+##
+# @AudiodevDriver:
+#
+# An enumeration of possible audio backend drivers.
+#
+# @jack: JACK audio backend (since 5.1)
+#
+# Since: 4.0
+##
+{ 'enum': 'AudiodevDriver',
+  'data': [ 'none', 'alsa', 'coreaudio', 'dsound', 'jack', 'oss', 'pa',
+            'sdl', 'spice', 'wav' ] }
+
+##
+# @Audiodev:
+#
+# Options of an audio backend.
+#
+# @id: identifier of the backend
+#
+# @driver: the backend driver to use
+#
+# @timer-period: timer period (in microseconds, 0: use lowest possible)
+#
+# Since: 4.0
+##
+{ 'union': 'Audiodev',
+  'base': {
+    'id':            'str',
+    'driver':        'AudiodevDriver',
+    '*timer-period': 'uint32' },
+  'discriminator': 'driver',
+  'data': {
+    'none':      'AudiodevGenericOptions',
+    'alsa':      'AudiodevAlsaOptions',
+    'coreaudio': 'AudiodevCoreaudioOptions',
+    'dsound':    'AudiodevDsoundOptions',
+    'jack':      'AudiodevJackOptions',
+    'oss':       'AudiodevOssOptions',
+    'pa':        'AudiodevPaOptions',
+    'sdl':       'AudiodevGenericOptions',
+    'spice':     'AudiodevGenericOptions',
+    'wav':       'AudiodevWavOptions' } }
diff --git a/qapi/authz.json b/qapi/authz.json
new file mode 100644
index 000000000..42afe752d
--- /dev/null
+++ b/qapi/authz.json
@@ -0,0 +1,61 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = User authorization
+##
+
+##
+# @QAuthZListPolicy:
+#
+# The authorization policy result
+#
+# @deny: deny access
+# @allow: allow access
+#
+# Since: 4.0
+##
+{ 'enum': 'QAuthZListPolicy',
+  'prefix': 'QAUTHZ_LIST_POLICY',
+  'data': ['deny', 'allow']}
+
+##
+# @QAuthZListFormat:
+#
+# The authorization policy match format
+#
+# @exact: an exact string match
+# @glob: string with ? and * shell wildcard support
+#
+# Since: 4.0
+##
+{ 'enum': 'QAuthZListFormat',
+  'prefix': 'QAUTHZ_LIST_FORMAT',
+  'data': ['exact', 'glob']}
+
+##
+# @QAuthZListRule:
+#
+# A single authorization rule.
+#
+# @match: a string or glob to match against a user identity
+# @policy: the result to return if @match evaluates to true
+# @format: the format of the @match rule (default 'exact')
+#
+# Since: 4.0
+##
+{ 'struct': 'QAuthZListRule',
+  'data': {'match': 'str',
+           'policy': 'QAuthZListPolicy',
+           '*format': 'QAuthZListFormat'}}
+
+##
+# @QAuthZListRuleListHack:
+#
+# Not exposed via QMP; hack to generate QAuthZListRuleList
+# for use internally by the code.
+#
+# Since: 4.0
+##
+{ 'struct': 'QAuthZListRuleListHack',
+  'data': { 'unused': ['QAuthZListRule'] } }
diff --git a/qapi/block-core.json b/qapi/block-core.json
new file mode 100644
index 000000000..04ad80bc1
--- /dev/null
+++ b/qapi/block-core.json
@@ -0,0 +1,5385 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# == Block core (VM unrelated)
+##
+
+{ 'include': 'common.json' }
+{ 'include': 'crypto.json' }
+{ 'include': 'job.json' }
+{ 'include': 'sockets.json' }
+
+##
+# @SnapshotInfo:
+#
+# @id: unique snapshot id
+#
+# @name: user chosen name
+#
+# @vm-state-size: size of the VM state
+#
+# @date-sec: UTC date of the snapshot in seconds
+#
+# @date-nsec: fractional part in nano seconds to be used with date-sec
+#
+# @vm-clock-sec: VM clock relative to boot in seconds
+#
+# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+#
+# @icount: Current instruction count. Appears when execution record/replay
+#          is enabled. Used for "time-traveling" to match the moment
+#          in the recorded execution with the snapshots. This counter may
+#          be obtained through @query-replay command (since 5.2)
+#
+# Since: 1.3
+#
+##
+{ 'struct': 'SnapshotInfo',
+  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
+            'date-sec': 'int', 'date-nsec': 'int',
+            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int',
+            '*icount': 'int' } }
+
+##
+# @ImageInfoSpecificQCow2EncryptionBase:
+#
+# @format: The encryption format
+#
+# Since: 2.10
+##
+{ 'struct': 'ImageInfoSpecificQCow2EncryptionBase',
+  'data': { 'format': 'BlockdevQcow2EncryptionFormat'}}
+
+##
+# @ImageInfoSpecificQCow2Encryption:
+#
+# Since: 2.10
+##
+{ 'union': 'ImageInfoSpecificQCow2Encryption',
+  'base': 'ImageInfoSpecificQCow2EncryptionBase',
+  'discriminator': 'format',
+  'data': { 'luks': 'QCryptoBlockInfoLUKS' } }
+
+##
+# @ImageInfoSpecificQCow2:
+#
+# @compat: compatibility level
+#
+# @data-file: the filename of the external data file that is stored in the
+#             image and used as a default for opening the image (since: 4.0)
+#
+# @data-file-raw: True if the external data file must stay valid as a
+#                 standalone (read-only) raw image without looking at qcow2
+#                 metadata (since: 4.0)
+#
+# @extended-l2: true if the image has extended L2 entries; only valid for
+#               compat >= 1.1 (since 5.2)
+#
+# @lazy-refcounts: on or off; only valid for compat >= 1.1
+#
+# @corrupt: true if the image has been marked corrupt; only valid for
+#           compat >= 1.1 (since 2.2)
+#
+# @refcount-bits: width of a refcount entry in bits (since 2.3)
+#
+# @encrypt: details about encryption parameters; only set if image
+#           is encrypted (since 2.10)
+#
+# @bitmaps: A list of qcow2 bitmap details (since 4.0)
+#
+# @compression-type: the image cluster compression method (since 5.1)
+#
+# Since: 1.7
+##
+{ 'struct': 'ImageInfoSpecificQCow2',
+  'data': {
+      'compat': 'str',
+      '*data-file': 'str',
+      '*data-file-raw': 'bool',
+      '*extended-l2': 'bool',
+      '*lazy-refcounts': 'bool',
+      '*corrupt': 'bool',
+      'refcount-bits': 'int',
+      '*encrypt': 'ImageInfoSpecificQCow2Encryption',
+      '*bitmaps': ['Qcow2BitmapInfo'],
+      'compression-type': 'Qcow2CompressionType'
+  } }
+
+##
+# @ImageInfoSpecificVmdk:
+#
+# @create-type: The create type of VMDK image
+#
+# @cid: Content id of image
+#
+# @parent-cid: Parent VMDK image's cid
+#
+# @extents: List of extent files
+#
+# Since: 1.7
+##
+{ 'struct': 'ImageInfoSpecificVmdk',
+  'data': {
+      'create-type': 'str',
+      'cid': 'int',
+      'parent-cid': 'int',
+      'extents': ['ImageInfo']
+  } }
+
+##
+# @ImageInfoSpecific:
+#
+# A discriminated record of image format specific information structures.
+#
+# Since: 1.7
+##
+{ 'union': 'ImageInfoSpecific',
+  'data': {
+      'qcow2': 'ImageInfoSpecificQCow2',
+      'vmdk': 'ImageInfoSpecificVmdk',
+      # If we need to add block driver specific parameters for
+      # LUKS in future, then we'll subclass QCryptoBlockInfoLUKS
+      # to define a ImageInfoSpecificLUKS
+      'luks': 'QCryptoBlockInfoLUKS'
+  } }
+
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file
+#
+# @filename: name of the image file
+#
+# @format: format of the image file
+#
+# @virtual-size: maximum capacity in bytes of the image
+#
+# @actual-size: actual size on disk in bytes of the image
+#
+# @dirty-flag: true if image is not cleanly closed
+#
+# @cluster-size: size of a cluster in bytes
+#
+# @encrypted: true if the image is encrypted
+#
+# @compressed: true if the image is compressed (Since 1.7)
+#
+# @backing-filename: name of the backing file
+#
+# @full-backing-filename: full path of the backing file
+#
+# @backing-filename-format: the format of the backing file
+#
+# @snapshots: list of VM snapshots
+#
+# @backing-image: info of the backing image (since 1.6)
+#
+# @format-specific: structure supplying additional format-specific
+#                   information (since 1.7)
+#
+# Since: 1.3
+#
+##
+{ 'struct': 'ImageInfo',
+  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
+           '*actual-size': 'int', 'virtual-size': 'int',
+           '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
+           '*backing-filename': 'str', '*full-backing-filename': 'str',
+           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
+           '*backing-image': 'ImageInfo',
+           '*format-specific': 'ImageInfoSpecific' } }
+
+##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: offset (in bytes) where the image ends, this
+#                    field is present if the driver for the image format
+#                    supports it
+#
+# @corruptions: number of corruptions found during the check if any
+#
+# @leaks: number of leaks found during the check if any
+#
+# @corruptions-fixed: number of corruptions fixed during the check
+#                     if any
+#
+# @leaks-fixed: number of leaks fixed during the check if any
+#
+# @total-clusters: total number of clusters, this field is present
+#                  if the driver for the image format supports it
+#
+# @allocated-clusters: total number of allocated clusters, this
+#                      field is present if the driver for the image format
+#                      supports it
+#
+# @fragmented-clusters: total number of fragmented clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# @compressed-clusters: total number of compressed clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# Since: 1.4
+#
+##
+{ 'struct': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+           '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+           '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+           '*total-clusters': 'int', '*allocated-clusters': 'int',
+           '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
+
+##
+# @MapEntry:
+#
+# Mapping information from a virtual block range to a host file range
+#
+# @start: virtual (guest) offset of the first byte described by this
+#         entry
+#
+# @length: the number of bytes of the mapped virtual range
+#
+# @data: reading the image will actually read data from a file (in
+#        particular, if @offset is present this means that the sectors
+#        are not simply preallocated, but contain actual data in raw
+#        format)
+#
+# @zero: whether the virtual blocks read as zeroes
+#
+# @depth: number of layers (0 = top image, 1 = top image's backing
+#         file, ..., n - 1 = bottom image (where n is the number of
+#         images in the chain)) before reaching one for which the
+#         range is allocated
+#
+# @offset: if present, the image file stores the data for this range
+#          in raw format at the given (host) offset
+#
+# @filename: filename that is referred to by @offset
+#
+# Since: 2.6
+#
+##
+{ 'struct': 'MapEntry',
+  'data': {'start': 'int', 'length': 'int', 'data': 'bool',
+           'zero': 'bool', 'depth': 'int', '*offset': 'int',
+           '*filename': 'str' } }
+
+##
+# @BlockdevCacheInfo:
+#
+# Cache mode information for a block device
+#
+# @writeback:   true if writeback mode is enabled
+# @direct:      true if the host page cache is bypassed (O_DIRECT)
+# @no-flush:    true if flush requests are ignored for the device
+#
+# Since: 2.3
+##
+{ 'struct': 'BlockdevCacheInfo',
+  'data': { 'writeback': 'bool',
+            'direct': 'bool',
+            'no-flush': 'bool' } }
+
+##
+# @BlockDeviceInfo:
+#
+# Information about the backing device for a block device.
+#
+# @file: the filename of the backing device
+#
+# @node-name: the name of the block driver node (Since 2.0)
+#
+# @ro: true if the backing device was open read-only
+#
+# @drv: the name of the block format used to open the backing device. As of
+#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
+#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+#       'http', 'https', 'luks', 'nbd', 'parallels', 'qcow',
+#       'qcow2', 'raw', 'vdi', 'vmdk', 'vpc', 'vvfat'
+#       2.2: 'archipelago' added, 'cow' dropped
+#       2.3: 'host_floppy' deprecated
+#       2.5: 'host_floppy' dropped
+#       2.6: 'luks' added
+#       2.8: 'replication' added, 'tftp' dropped
+#       2.9: 'archipelago' dropped
+#
+# @backing_file: the name of the backing file (for copy-on-write)
+#
+# @backing_file_depth: number of files in the backing file chain (since: 1.2)
+#
+# @encrypted: true if the backing device is encrypted
+#
+# @encryption_key_missing: always false
+#
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
+# @bps: total throughput limit in bytes per second is specified
+#
+# @bps_rd: read throughput limit in bytes per second is specified
+#
+# @bps_wr: write throughput limit in bytes per second is specified
+#
+# @iops: total I/O operations per second is specified
+#
+# @iops_rd: read I/O operations per second is specified
+#
+# @iops_wr: write I/O operations per second is specified
+#
+# @image: the info of image used (since: 1.6)
+#
+# @bps_max: total throughput limit during bursts,
+#                     in bytes (Since 1.7)
+#
+# @bps_rd_max: read throughput limit during bursts,
+#                        in bytes (Since 1.7)
+#
+# @bps_wr_max: write throughput limit during bursts,
+#                        in bytes (Since 1.7)
+#
+# @iops_max: total I/O operations per second during bursts,
+#                      in bytes (Since 1.7)
+#
+# @iops_rd_max: read I/O operations per second during bursts,
+#                         in bytes (Since 1.7)
+#
+# @iops_wr_max: write I/O operations per second during bursts,
+#                         in bytes (Since 1.7)
+#
+# @bps_max_length: maximum length of the @bps_max burst
+#                            period, in seconds. (Since 2.6)
+#
+# @bps_rd_max_length: maximum length of the @bps_rd_max
+#                               burst period, in seconds. (Since 2.6)
+#
+# @bps_wr_max_length: maximum length of the @bps_wr_max
+#                               burst period, in seconds. (Since 2.6)
+#
+# @iops_max_length: maximum length of the @iops burst
+#                             period, in seconds. (Since 2.6)
+#
+# @iops_rd_max_length: maximum length of the @iops_rd_max
+#                                burst period, in seconds. (Since 2.6)
+#
+# @iops_wr_max_length: maximum length of the @iops_wr_max
+#                                burst period, in seconds. (Since 2.6)
+#
+# @iops_size: an I/O size in bytes (Since 1.7)
+#
+# @group: throttle group name (Since 2.4)
+#
+# @cache: the cache mode used for the block device (since: 2.3)
+#
+# @write_threshold: configured write threshold for the device.
+#                   0 if disabled. (Since 2.3)
+#
+# @dirty-bitmaps: dirty bitmaps information (only present if node
+#                 has one or more dirty bitmaps) (Since 4.2)
+#
+# Features:
+# @deprecated: Member @encryption_key_missing is deprecated.  It is
+#              always false.
+#
+# Since: 0.14.0
+#
+##
+{ 'struct': 'BlockDeviceInfo',
+  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
+            '*backing_file': 'str', 'backing_file_depth': 'int',
+            'encrypted': 'bool',
+            'encryption_key_missing': { 'type': 'bool',
+                                        'features': [ 'deprecated' ] },
+            'detect_zeroes': 'BlockdevDetectZeroesOptions',
+            'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            'image': 'ImageInfo',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*bps_max_length': 'int', '*bps_rd_max_length': 'int',
+            '*bps_wr_max_length': 'int', '*iops_max_length': 'int',
+            '*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
+            '*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo',
+            'write_threshold': 'int', '*dirty-bitmaps': ['BlockDirtyInfo'] } }
+
+##
+# @BlockDeviceIoStatus:
+#
+# An enumeration of block device I/O status.
+#
+# @ok: The last I/O operation has succeeded
+#
+# @failed: The last I/O operation has failed
+#
+# @nospace: The last I/O operation has failed due to a no-space condition
+#
+# Since: 1.0
+##
+{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
+
+##
+# @DirtyBitmapStatus:
+#
+# An enumeration of possible states that a dirty bitmap can report to the user.
+#
+# @frozen: The bitmap is currently in-use by some operation and is immutable.
+#          If the bitmap was @active prior to the operation, new writes by the
+#          guest are being recorded in a temporary buffer, and will not be lost.
+#          Generally, bitmaps are cleared on successful use in an operation and
+#          the temporary buffer is committed into the bitmap. On failure, the
+#          temporary buffer is merged back into the bitmap without first
+#          clearing it.
+#          Please refer to the documentation for each bitmap-using operation,
+#          See also @blockdev-backup, @drive-backup.
+#
+# @disabled: The bitmap is not currently recording new writes by the guest.
+#            This is requested explicitly via @block-dirty-bitmap-disable.
+#            It can still be cleared, deleted, or used for backup operations.
+#
+# @active: The bitmap is actively monitoring for new writes, and can be cleared,
+#          deleted, or used for backup operations.
+#
+# @locked: The bitmap is currently in-use by some operation and is immutable.
+#          If the bitmap was @active prior to the operation, it is still
+#          recording new writes. If the bitmap was @disabled, it is not
+#          recording new writes. (Since 2.12)
+#
+# @inconsistent: This is a persistent dirty bitmap that was marked in-use on
+#                disk, and is unusable by QEMU. It can only be deleted.
+#                Please rely on the inconsistent field in @BlockDirtyInfo
+#                instead, as the status field is deprecated. (Since 4.0)
+#
+# Since: 2.4
+##
+{ 'enum': 'DirtyBitmapStatus',
+  'data': ['active', 'disabled', 'frozen', 'locked', 'inconsistent'] }
+
+##
+# @BlockDirtyInfo:
+#
+# Block dirty bitmap information.
+#
+# @name: the name of the dirty bitmap (Since 2.4)
+#
+# @count: number of dirty bytes according to the dirty bitmap
+#
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
+# @status: current status of the dirty bitmap (since 2.4)
+#
+# @recording: true if the bitmap is recording new writes from the guest.
+#             Replaces `active` and `disabled` statuses. (since 4.0)
+#
+# @busy: true if the bitmap is in-use by some operation (NBD or jobs)
+#        and cannot be modified via QMP or used by another operation.
+#        Replaces `locked` and `frozen` statuses. (since 4.0)
+#
+# @persistent: true if the bitmap was stored on disk, is scheduled to be stored
+#              on disk, or both. (since 4.0)
+#
+# @inconsistent: true if this is a persistent bitmap that was improperly
+#                stored. Implies @persistent to be true; @recording and
+#                @busy to be false. This bitmap cannot be used. To remove
+#                it, use @block-dirty-bitmap-remove. (Since 4.0)
+#
+# Features:
+# @deprecated: Member @status is deprecated.  Use @recording and
+#              @locked instead.
+#
+# Since: 1.3
+##
+{ 'struct': 'BlockDirtyInfo',
+  'data': {'*name': 'str', 'count': 'int', 'granularity': 'uint32',
+           'recording': 'bool', 'busy': 'bool',
+           'status': { 'type': 'DirtyBitmapStatus',
+                       'features': [ 'deprecated' ] },
+           'persistent': 'bool', '*inconsistent': 'bool' } }
+
+##
+# @Qcow2BitmapInfoFlags:
+#
+# An enumeration of flags that a bitmap can report to the user.
+#
+# @in-use: This flag is set by any process actively modifying the qcow2 file,
+#          and cleared when the updated bitmap is flushed to the qcow2 image.
+#          The presence of this flag in an offline image means that the bitmap
+#          was not saved correctly after its last usage, and may contain
+#          inconsistent data.
+#
+# @auto: The bitmap must reflect all changes of the virtual disk by any
+#        application that would write to this qcow2 file.
+#
+# Since: 4.0
+##
+{ 'enum': 'Qcow2BitmapInfoFlags',
+  'data': ['in-use', 'auto'] }
+
+##
+# @Qcow2BitmapInfo:
+#
+# Qcow2 bitmap information.
+#
+# @name: the name of the bitmap
+#
+# @granularity: granularity of the bitmap in bytes
+#
+# @flags: flags of the bitmap
+#
+# Since: 4.0
+##
+{ 'struct': 'Qcow2BitmapInfo',
+  'data': {'name': 'str', 'granularity': 'uint32',
+           'flags': ['Qcow2BitmapInfoFlags'] } }
+
+##
+# @BlockLatencyHistogramInfo:
+#
+# Block latency histogram.
+#
+# @boundaries: list of interval boundary values in nanoseconds, all greater
+#              than zero and in ascending order.
+#              For example, the list [10, 50, 100] produces the following
+#              histogram intervals: [0, 10), [10, 50), [50, 100), [100, +inf).
+#
+# @bins: list of io request counts corresponding to histogram intervals.
+#        len(@bins) = len(@boundaries) + 1
+#        For the example above, @bins may be something like [3, 1, 5, 2],
+#        and corresponding histogram looks like:
+#
+# ::
+#
+#        5|           *
+#        4|           *
+#        3| *         *
+#        2| *         *    *
+#        1| *    *    *    *
+#         +------------------
+#             10   50   100
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockLatencyHistogramInfo',
+  'data': {'boundaries': ['uint64'], 'bins': ['uint64'] } }
+
+##
+# @BlockInfo:
+#
+# Block device information.  This structure describes a virtual device and
+# the backing device associated with it.
+#
+# @device: The device name associated with the virtual device.
+#
+# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the block
+#        device. (since 2.10)
+#
+# @type: This field is returned only for compatibility reasons, it should
+#        not be used (always returns 'unknown')
+#
+# @removable: True if the device supports removable media.
+#
+# @locked: True if the guest has locked this device from having its media
+#          removed
+#
+# @tray_open: True if the device's tray is open
+#             (only present if it has a tray)
+#
+# @dirty-bitmaps: dirty bitmaps information (only present if the
+#                 driver has one or more dirty bitmaps) (Since 2.0)
+#
+# @io-status: @BlockDeviceIoStatus. Only present if the device
+#             supports it and the VM is configured to stop on errors
+#             (supported device models: virtio-blk, IDE, SCSI except
+#             scsi-generic)
+#
+# @inserted: @BlockDeviceInfo describing the device if media is
+#            present
+#
+# Features:
+# @deprecated: Member @dirty-bitmaps is deprecated.  Use @inserted
+#              member @dirty-bitmaps instead.
+#
+# Since:  0.14.0
+##
+{ 'struct': 'BlockInfo',
+  'data': {'device': 'str', '*qdev': 'str', 'type': 'str', 'removable': 'bool',
+           'locked': 'bool', '*inserted': 'BlockDeviceInfo',
+           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
+           '*dirty-bitmaps': { 'type': ['BlockDirtyInfo'],
+                               'features': [ 'deprecated' ] } } }
+
+##
+# @BlockMeasureInfo:
+#
+# Image file size calculation information.  This structure describes the size
+# requirements for creating a new image file.
+#
+# The size requirements depend on the new image file format.  File size always
+# equals virtual disk size for the 'raw' format, even for sparse POSIX files.
+# Compact formats such as 'qcow2' represent unallocated and zero regions
+# efficiently so file size may be smaller than virtual disk size.
+#
+# The values are upper bounds that are guaranteed to fit the new image file.
+# Subsequent modification, such as internal snapshot or further bitmap
+# creation, may require additional space and is not covered here.
+#
+# @required: Size required for a new image file, in bytes, when copying just
+#            allocated guest-visible contents.
+#
+# @fully-allocated: Image file size, in bytes, once data has been written
+#                   to all sectors, when copying just guest-visible contents.
+#
+# @bitmaps: Additional size required if all the top-level bitmap metadata
+#           in the source image were to be copied to the destination,
+#           present only when source and destination both support
+#           persistent bitmaps. (since 5.1)
+#
+# Since: 2.10
+##
+{ 'struct': 'BlockMeasureInfo',
+  'data': {'required': 'int', 'fully-allocated': 'int', '*bitmaps': 'int'} }
+
+##
+# @query-block:
+#
+# Get a list of BlockInfo for all virtual block devices.
+#
+# Returns: a list of @BlockInfo describing each virtual block device. Filter
+#          nodes that were created implicitly are skipped over.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-block" }
+# <- {
+#       "return":[
+#          {
+#             "io-status": "ok",
+#             "device":"ide0-hd0",
+#             "locked":false,
+#             "removable":false,
+#             "inserted":{
+#                "ro":false,
+#                "drv":"qcow2",
+#                "encrypted":false,
+#                "file":"disks/test.qcow2",
+#                "backing_file_depth":1,
+#                "bps":1000000,
+#                "bps_rd":0,
+#                "bps_wr":0,
+#                "iops":1000000,
+#                "iops_rd":0,
+#                "iops_wr":0,
+#                "bps_max": 8000000,
+#                "bps_rd_max": 0,
+#                "bps_wr_max": 0,
+#                "iops_max": 0,
+#                "iops_rd_max": 0,
+#                "iops_wr_max": 0,
+#                "iops_size": 0,
+#                "detect_zeroes": "on",
+#                "write_threshold": 0,
+#                "image":{
+#                   "filename":"disks/test.qcow2",
+#                   "format":"qcow2",
+#                   "virtual-size":2048000,
+#                   "backing_file":"base.qcow2",
+#                   "full-backing-filename":"disks/base.qcow2",
+#                   "backing-filename-format":"qcow2",
+#                   "snapshots":[
+#                      {
+#                         "id": "1",
+#                         "name": "snapshot1",
+#                         "vm-state-size": 0,
+#                         "date-sec": 10000200,
+#                         "date-nsec": 12,
+#                         "vm-clock-sec": 206,
+#                         "vm-clock-nsec": 30
+#                      }
+#                   ],
+#                   "backing-image":{
+#                       "filename":"disks/base.qcow2",
+#                       "format":"qcow2",
+#                       "virtual-size":2048000
+#                   }
+#                }
+#             },
+#             "qdev": "ide_disk",
+#             "type":"unknown"
+#          },
+#          {
+#             "io-status": "ok",
+#             "device":"ide1-cd0",
+#             "locked":false,
+#             "removable":true,
+#             "qdev": "/machine/unattached/device[23]",
+#             "tray_open": false,
+#             "type":"unknown"
+#          },
+#          {
+#             "device":"floppy0",
+#             "locked":false,
+#             "removable":true,
+#             "qdev": "/machine/unattached/device[20]",
+#             "type":"unknown"
+#          },
+#          {
+#             "device":"sd0",
+#             "locked":false,
+#             "removable":true,
+#             "type":"unknown"
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-block', 'returns': ['BlockInfo'] }
+
+
+##
+# @BlockDeviceTimedStats:
+#
+# Statistics of a block device during a given interval of time.
+#
+# @interval_length: Interval used for calculating the statistics,
+#                   in seconds.
+#
+# @min_rd_latency_ns: Minimum latency of read operations in the
+#                     defined interval, in nanoseconds.
+#
+# @min_wr_latency_ns: Minimum latency of write operations in the
+#                     defined interval, in nanoseconds.
+#
+# @min_flush_latency_ns: Minimum latency of flush operations in the
+#                        defined interval, in nanoseconds.
+#
+# @max_rd_latency_ns: Maximum latency of read operations in the
+#                     defined interval, in nanoseconds.
+#
+# @max_wr_latency_ns: Maximum latency of write operations in the
+#                     defined interval, in nanoseconds.
+#
+# @max_flush_latency_ns: Maximum latency of flush operations in the
+#                        defined interval, in nanoseconds.
+#
+# @avg_rd_latency_ns: Average latency of read operations in the
+#                     defined interval, in nanoseconds.
+#
+# @avg_wr_latency_ns: Average latency of write operations in the
+#                     defined interval, in nanoseconds.
+#
+# @avg_flush_latency_ns: Average latency of flush operations in the
+#                        defined interval, in nanoseconds.
+#
+# @avg_rd_queue_depth: Average number of pending read operations
+#                      in the defined interval.
+#
+# @avg_wr_queue_depth: Average number of pending write operations
+#                      in the defined interval.
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockDeviceTimedStats',
+  'data': { 'interval_length': 'int', 'min_rd_latency_ns': 'int',
+            'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int',
+            'min_wr_latency_ns': 'int', 'max_wr_latency_ns': 'int',
+            'avg_wr_latency_ns': 'int', 'min_flush_latency_ns': 'int',
+            'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int',
+            'avg_rd_queue_depth': 'number', 'avg_wr_queue_depth': 'number' } }
+
+##
+# @BlockDeviceStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @rd_bytes:      The number of bytes read by the device.
+#
+# @wr_bytes:      The number of bytes written by the device.
+#
+# @unmap_bytes: The number of bytes unmapped by the device (Since 4.2)
+#
+# @rd_operations: The number of read operations performed by the device.
+#
+# @wr_operations: The number of write operations performed by the device.
+#
+# @flush_operations: The number of cache flush operations performed by the
+#                    device (since 0.15.0)
+#
+# @unmap_operations: The number of unmap operations performed by the device
+#                    (Since 4.2)
+#
+# @rd_total_time_ns: Total time spent on reads in nanoseconds (since 0.15.0).
+#
+# @wr_total_time_ns: Total time spent on writes in nanoseconds (since 0.15.0).
+#
+# @flush_total_time_ns: Total time spent on cache flushes in nanoseconds
+#                       (since 0.15.0).
+#
+# @unmap_total_time_ns: Total time spent on unmap operations in nanoseconds
+#                       (Since 4.2)
+#
+# @wr_highest_offset: The offset after the greatest byte written to the
+#                     device.  The intended use of this information is for
+#                     growable sparse files (like qcow2) that are used on top
+#                     of a physical device.
+#
+# @rd_merged: Number of read requests that have been merged into another
+#             request (Since 2.3).
+#
+# @wr_merged: Number of write requests that have been merged into another
+#             request (Since 2.3).
+#
+# @unmap_merged: Number of unmap requests that have been merged into another
+#                request (Since 4.2)
+#
+# @idle_time_ns: Time since the last I/O operation, in
+#                nanoseconds. If the field is absent it means that
+#                there haven't been any operations yet (Since 2.5).
+#
+# @failed_rd_operations: The number of failed read operations
+#                        performed by the device (Since 2.5)
+#
+# @failed_wr_operations: The number of failed write operations
+#                        performed by the device (Since 2.5)
+#
+# @failed_flush_operations: The number of failed flush operations
+#                           performed by the device (Since 2.5)
+#
+# @failed_unmap_operations: The number of failed unmap operations performed
+#                           by the device (Since 4.2)
+#
+# @invalid_rd_operations: The number of invalid read operations
+#                          performed by the device (Since 2.5)
+#
+# @invalid_wr_operations: The number of invalid write operations
+#                         performed by the device (Since 2.5)
+#
+# @invalid_flush_operations: The number of invalid flush operations
+#                            performed by the device (Since 2.5)
+#
+# @invalid_unmap_operations: The number of invalid unmap operations performed
+#                            by the device (Since 4.2)
+#
+# @account_invalid: Whether invalid operations are included in the
+#                   last access statistics (Since 2.5)
+#
+# @account_failed: Whether failed operations are included in the
+#                  latency and last access statistics (Since 2.5)
+#
+# @timed_stats: Statistics specific to the set of previously defined
+#               intervals of time (Since 2.5)
+#
+# @rd_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+#
+# @wr_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+#
+# @flush_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+#
+# Since: 0.14.0
+##
+{ 'struct': 'BlockDeviceStats',
+  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'unmap_bytes' : 'int',
+           'rd_operations': 'int', 'wr_operations': 'int',
+           'flush_operations': 'int', 'unmap_operations': 'int',
+           'rd_total_time_ns': 'int', 'wr_total_time_ns': 'int',
+           'flush_total_time_ns': 'int', 'unmap_total_time_ns': 'int',
+           'wr_highest_offset': 'int',
+           'rd_merged': 'int', 'wr_merged': 'int', 'unmap_merged': 'int',
+           '*idle_time_ns': 'int',
+           'failed_rd_operations': 'int', 'failed_wr_operations': 'int',
+           'failed_flush_operations': 'int', 'failed_unmap_operations': 'int',
+           'invalid_rd_operations': 'int', 'invalid_wr_operations': 'int',
+           'invalid_flush_operations': 'int', 'invalid_unmap_operations': 'int',
+           'account_invalid': 'bool', 'account_failed': 'bool',
+           'timed_stats': ['BlockDeviceTimedStats'],
+           '*rd_latency_histogram': 'BlockLatencyHistogramInfo',
+           '*wr_latency_histogram': 'BlockLatencyHistogramInfo',
+           '*flush_latency_histogram': 'BlockLatencyHistogramInfo' } }
+
+##
+# @BlockStatsSpecificFile:
+#
+# File driver statistics
+#
+# @discard-nb-ok: The number of successful discard operations performed by
+#                 the driver.
+#
+# @discard-nb-failed: The number of failed discard operations performed by
+#                     the driver.
+#
+# @discard-bytes-ok: The number of bytes discarded by the driver.
+#
+# Since: 4.2
+##
+{ 'struct': 'BlockStatsSpecificFile',
+  'data': {
+      'discard-nb-ok': 'uint64',
+      'discard-nb-failed': 'uint64',
+      'discard-bytes-ok': 'uint64' } }
+
+##
+# @BlockStatsSpecificNvme:
+#
+# NVMe driver statistics
+#
+# @completion-errors: The number of completion errors.
+#
+# @aligned-accesses: The number of aligned accesses performed by
+#                    the driver.
+#
+# @unaligned-accesses: The number of unaligned accesses performed by
+#                      the driver.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockStatsSpecificNvme',
+  'data': {
+      'completion-errors': 'uint64',
+      'aligned-accesses': 'uint64',
+      'unaligned-accesses': 'uint64' } }
+
+##
+# @BlockStatsSpecific:
+#
+# Block driver specific statistics
+#
+# Since: 4.2
+##
+{ 'union': 'BlockStatsSpecific',
+  'base': { 'driver': 'BlockdevDriver' },
+  'discriminator': 'driver',
+  'data': {
+      'file': 'BlockStatsSpecificFile',
+      'host_device': 'BlockStatsSpecificFile',
+      'nvme': 'BlockStatsSpecificNvme' } }
+
+##
+# @BlockStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @device: If the stats are for a virtual block device, the name
+#          corresponding to the virtual block device.
+#
+# @node-name: The node name of the device. (Since 2.3)
+#
+# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the block
+#        device. (since 3.0)
+#
+# @stats:  A @BlockDeviceStats for the device.
+#
+# @driver-specific: Optional driver-specific stats. (Since 4.2)
+#
+# @parent: This describes the file block device if it has one.
+#          Contains recursively the statistics of the underlying
+#          protocol (e.g. the host file for a qcow2 image). If there is
+#          no underlying protocol, this field is omitted
+#
+# @backing: This describes the backing block device if it has one.
+#           (Since 2.0)
+#
+# Since: 0.14.0
+##
+{ 'struct': 'BlockStats',
+  'data': {'*device': 'str', '*qdev': 'str', '*node-name': 'str',
+           'stats': 'BlockDeviceStats',
+           '*driver-specific': 'BlockStatsSpecific',
+           '*parent': 'BlockStats',
+           '*backing': 'BlockStats'} }
+
+##
+# @query-blockstats:
+#
+# Query the @BlockStats for all virtual block devices.
+#
+# @query-nodes: If true, the command will query all the block nodes
+#               that have a node name, in a list which will include "parent"
+#               information, but not "backing".
+#               If false or omitted, the behavior is as before - query all the
+#               device backends, recursively including their "parent" and
+#               "backing". Filter nodes that were created implicitly are
+#               skipped over in this mode. (Since 2.3)
+#
+# Returns: A list of @BlockStats for each virtual block devices.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-blockstats" }
+# <- {
+#       "return":[
+#          {
+#             "device":"ide0-hd0",
+#             "parent":{
+#                "stats":{
+#                   "wr_highest_offset":3686448128,
+#                   "wr_bytes":9786368,
+#                   "wr_operations":751,
+#                   "rd_bytes":122567168,
+#                   "rd_operations":36772
+#                   "wr_total_times_ns":313253456
+#                   "rd_total_times_ns":3465673657
+#                   "flush_total_times_ns":49653
+#                   "flush_operations":61,
+#                   "rd_merged":0,
+#                   "wr_merged":0,
+#                   "idle_time_ns":2953431879,
+#                   "account_invalid":true,
+#                   "account_failed":false
+#                }
+#             },
+#             "stats":{
+#                "wr_highest_offset":2821110784,
+#                "wr_bytes":9786368,
+#                "wr_operations":692,
+#                "rd_bytes":122739200,
+#                "rd_operations":36604
+#                "flush_operations":51,
+#                "wr_total_times_ns":313253456
+#                "rd_total_times_ns":3465673657
+#                "flush_total_times_ns":49653,
+#                "rd_merged":0,
+#                "wr_merged":0,
+#                "idle_time_ns":2953431879,
+#                "account_invalid":true,
+#                "account_failed":false
+#             },
+#             "qdev": "/machine/unattached/device[23]"
+#          },
+#          {
+#             "device":"ide1-cd0",
+#             "stats":{
+#                "wr_highest_offset":0,
+#                "wr_bytes":0,
+#                "wr_operations":0,
+#                "rd_bytes":0,
+#                "rd_operations":0
+#                "flush_operations":0,
+#                "wr_total_times_ns":0
+#                "rd_total_times_ns":0
+#                "flush_total_times_ns":0,
+#                "rd_merged":0,
+#                "wr_merged":0,
+#                "account_invalid":false,
+#                "account_failed":false
+#             },
+#             "qdev": "/machine/unattached/device[24]"
+#          },
+#          {
+#             "device":"floppy0",
+#             "stats":{
+#                "wr_highest_offset":0,
+#                "wr_bytes":0,
+#                "wr_operations":0,
+#                "rd_bytes":0,
+#                "rd_operations":0
+#                "flush_operations":0,
+#                "wr_total_times_ns":0
+#                "rd_total_times_ns":0
+#                "flush_total_times_ns":0,
+#                "rd_merged":0,
+#                "wr_merged":0,
+#                "account_invalid":false,
+#                "account_failed":false
+#             },
+#             "qdev": "/machine/unattached/device[16]"
+#          },
+#          {
+#             "device":"sd0",
+#             "stats":{
+#                "wr_highest_offset":0,
+#                "wr_bytes":0,
+#                "wr_operations":0,
+#                "rd_bytes":0,
+#                "rd_operations":0
+#                "flush_operations":0,
+#                "wr_total_times_ns":0
+#                "rd_total_times_ns":0
+#                "flush_total_times_ns":0,
+#                "rd_merged":0,
+#                "wr_merged":0,
+#                "account_invalid":false,
+#                "account_failed":false
+#             }
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-blockstats',
+  'data': { '*query-nodes': 'bool' },
+  'returns': ['BlockStats'] }
+
+##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR). The backup, mirror and commit block jobs retry
+#          the failing request later and may still complete successfully. The
+#          stream block job continues to stream and will complete with an
+#          error.
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# @auto: inherit the error handling policy of the backend (since: 2.7)
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] }
+
+##
+# @MirrorSyncMode:
+#
+# An enumeration of possible behaviors for the initial synchronization
+# phase of storage mirroring.
+#
+# @top: copies data in the topmost image to the destination
+#
+# @full: copies data from all images to the destination
+#
+# @none: only copy data written from now on
+#
+# @incremental: only copy data described by the dirty bitmap. (since: 2.4)
+#
+# @bitmap: only copy data described by the dirty bitmap. (since: 4.2)
+#          Behavior on completion is determined by the BitmapSyncMode.
+#
+# Since: 1.3
+##
+{ 'enum': 'MirrorSyncMode',
+  'data': ['top', 'full', 'none', 'incremental', 'bitmap'] }
+
+##
+# @BitmapSyncMode:
+#
+# An enumeration of possible behaviors for the synchronization of a bitmap
+# when used for data copy operations.
+#
+# @on-success: The bitmap is only synced when the operation is successful.
+#              This is the behavior always used for 'INCREMENTAL' backups.
+#
+# @never: The bitmap is never synchronized with the operation, and is
+#         treated solely as a read-only manifest of blocks to copy.
+#
+# @always: The bitmap is always synchronized with the operation,
+#          regardless of whether or not the operation was successful.
+#
+# Since: 4.2
+##
+{ 'enum': 'BitmapSyncMode',
+  'data': ['on-success', 'never', 'always'] }
+
+##
+# @MirrorCopyMode:
+#
+# An enumeration whose values tell the mirror block job when to
+# trigger writes to the target.
+#
+# @background: copy data in background only.
+#
+# @write-blocking: when data is written to the source, write it
+#                  (synchronously) to the target as well.  In
+#                  addition, data is copied in background just like in
+#                  @background mode.
+#
+# Since: 3.0
+##
+{ 'enum': 'MirrorCopyMode',
+  'data': ['background', 'write-blocking'] }
+
+##
+# @BlockJobInfo:
+#
+# Information about a long-running block device operation.
+#
+# @type: the job type ('stream' for image streaming)
+#
+# @device: The job identifier. Originally the device name but other
+#          values are allowed since QEMU 2.7
+#
+# @len: Estimated @offset value at the completion of the job. This value can
+#       arbitrarily change while the job is running, in both directions.
+#
+# @offset: Progress made until now. The unit is arbitrary and the value can
+#          only meaningfully be used for the ratio of @offset to @len. The
+#          value is monotonically increasing.
+#
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
+# @speed: the rate limit, bytes per second
+#
+# @io-status: the status of the job (since 1.3)
+#
+# @ready: true if the job may be completed (since 2.2)
+#
+# @status: Current job state/status (since 2.12)
+#
+# @auto-finalize: Job will finalize itself when PENDING, moving to
+#                 the CONCLUDED state. (since 2.12)
+#
+# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the NULL
+#                state and disappearing from the query list. (since 2.12)
+#
+# @error: Error information if the job did not complete successfully.
+#         Not set if the job completed successfully. (since 2.12.1)
+#
+# Since: 1.1
+##
+{ 'struct': 'BlockJobInfo',
+  'data': {'type': 'str', 'device': 'str', 'len': 'int',
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+           'io-status': 'BlockDeviceIoStatus', 'ready': 'bool',
+           'status': 'JobStatus',
+           'auto-finalize': 'bool', 'auto-dismiss': 'bool',
+           '*error': 'str' } }
+
+##
+# @query-block-jobs:
+#
+# Return information about long-running block device operations.
+#
+# Returns: a list of @BlockJobInfo for each active block job
+#
+# Since: 1.1
+##
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+
+##
+# @block_passwd:
+#
+# This command sets the password of a block device that has not been open
+# with a password and requires one.
+#
+# This command is now obsolete and will always return an error since 2.10
+#
+##
+{ 'command': 'block_passwd',
+  'data': { '*device': 'str',
+            '*node-name': 'str',
+            'password': 'str' } }
+
+##
+# @block_resize:
+#
+# Resize a block image while a guest is running.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: the name of the device to get the image resized
+#
+# @node-name: graph node name to get the image resized (Since 2.0)
+#
+# @size:  new image size in bytes
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "block_resize",
+#      "arguments": { "device": "scratch", "size": 1073741824 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block_resize',
+  'data': { '*device': 'str',
+            '*node-name': 'str',
+            'size': 'int' },
+  'coroutine': true }
+
+##
+# @NewImageMode:
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+#                  for the backing file. If there is no backing file available, the new
+#                  image will not be backed either.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode',
+  'data': [ 'existing', 'absolute-paths' ] }
+
+##
+# @BlockdevSnapshotSync:
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: the name of the device to take a snapshot of.
+#
+# @node-name: graph node name to generate the snapshot from (Since 2.0)
+#
+# @snapshot-file: the target of the new overlay image. If the file
+#                 exists, or if it is a device, the overlay will be created in the
+#                 existing file/device. Otherwise, a new file will be created.
+#
+# @snapshot-node-name: the graph node name of the new image (Since 2.0)
+#
+# @format: the format of the overlay image, default is 'qcow2'.
+#
+# @mode: whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+##
+{ 'struct': 'BlockdevSnapshotSync',
+  'data': { '*device': 'str', '*node-name': 'str',
+            'snapshot-file': 'str', '*snapshot-node-name': 'str',
+            '*format': 'str', '*mode': 'NewImageMode' } }
+
+##
+# @BlockdevSnapshot:
+#
+# @node: device or node name that will have a snapshot taken.
+#
+# @overlay: reference to the existing block device that will become
+#           the overlay of @node, as part of taking the snapshot.
+#           It must not have a current backing file (this can be
+#           achieved by passing "backing": null to blockdev-add).
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockdevSnapshot',
+  'data': { 'node': 'str', 'overlay': 'str' } }
+
+##
+# @BackupCommon:
+#
+# @job-id: identifier for the newly-created block job. If
+#          omitted, the device name will be used. (Since 2.7)
+#
+# @device: the device name or node-name of a root node which should be copied.
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, from a
+#        dirty bitmap, or only new I/O).
+#
+# @speed: the maximum speed, in bytes per second. The default is 0,
+#         for unlimited.
+#
+# @bitmap: The name of a dirty bitmap to use.
+#          Must be present if sync is "bitmap" or "incremental".
+#          Can be present if sync is "full" or "top".
+#          Must not be present otherwise.
+#          (Since 2.4 (drive-backup), 3.1 (blockdev-backup))
+#
+# @bitmap-mode: Specifies the type of data the bitmap should contain after
+#               the operation concludes.
+#               Must be present if a bitmap was provided,
+#               Must NOT be present otherwise. (Since 4.2)
+#
+# @compress: true to compress data, if the target format supports it.
+#            (default: false) (since 2.8)
+#
+# @on-source-error: the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 2.12)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 2.12)
+#
+# @filter-node-name: the node name that should be assigned to the
+#                    filter driver that the backup job inserts into the graph
+#                    above node specified by @drive. If this option is not given,
+#                    a node name is autogenerated. (Since: 4.2)
+#
+# Note: @on-source-error and @on-target-error only affect background
+#       I/O.  If an error occurs during a guest write request, the device's
+#       rerror/werror actions will be used.
+#
+# Since: 4.2
+##
+{ 'struct': 'BackupCommon',
+  'data': { '*job-id': 'str', 'device': 'str',
+            'sync': 'MirrorSyncMode', '*speed': 'int',
+            '*bitmap': 'str', '*bitmap-mode': 'BitmapSyncMode',
+            '*compress': 'bool',
+            '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+            '*filter-node-name': 'str' } }
+
+##
+# @DriveBackup:
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @mode: whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# Since: 1.6
+##
+{ 'struct': 'DriveBackup',
+  'base': 'BackupCommon',
+  'data': { 'target': 'str',
+            '*format': 'str',
+            '*mode': 'NewImageMode' } }
+
+##
+# @BlockdevBackup:
+#
+# @target: the device name or node-name of the backup target node.
+#
+# Since: 2.3
+##
+{ 'struct': 'BlockdevBackup',
+  'base': 'BackupCommon',
+  'data': { 'target': 'str' } }
+
+##
+# @blockdev-snapshot-sync:
+#
+# Takes a synchronous snapshot of a block device.
+#
+# For the arguments, see the documentation of BlockdevSnapshotSync.
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "blockdev-snapshot-sync",
+#      "arguments": { "device": "ide-hd0",
+#                     "snapshot-file":
+#                     "/some/place/my-image",
+#                     "format": "qcow2" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-snapshot-sync',
+  'data': 'BlockdevSnapshotSync' }
+
+
+##
+# @blockdev-snapshot:
+#
+# Takes a snapshot of a block device.
+#
+# Take a snapshot, by installing 'node' as the backing image of
+# 'overlay'. Additionally, if 'node' is associated with a block
+# device, the block device changes to using 'overlay' as its new active
+# image.
+#
+# For the arguments, see the documentation of BlockdevSnapshot.
+#
+# Features:
+# @allow-write-only-overlay: If present, the check whether this operation is safe
+#                            was relaxed so that it can be used to change
+#                            backing file of a destination of a blockdev-mirror.
+#                            (since 5.0)
+#
+# Since: 2.5
+#
+# Example:
+#
+# -> { "execute": "blockdev-add",
+#      "arguments": { "driver": "qcow2",
+#                     "node-name": "node1534",
+#                     "file": { "driver": "file",
+#                               "filename": "hd1.qcow2" },
+#                     "backing": null } }
+#
+# <- { "return": {} }
+#
+# -> { "execute": "blockdev-snapshot",
+#      "arguments": { "node": "ide-hd0",
+#                     "overlay": "node1534" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-snapshot',
+  'data': 'BlockdevSnapshot',
+  'features': [ 'allow-write-only-overlay' ] }
+
+##
+# @change-backing-file:
+#
+# Change the backing file in the image file metadata.  This does not
+# cause QEMU to reopen the image file to reparse the backing filename
+# (it may, however, perform a reopen to change permissions from
+# r/o -> r/w -> r/o, if needed). The new backing file string is written
+# into the image file metadata, and the QEMU internal strings are
+# updated.
+#
+# @image-node-name: The name of the block driver state node of the
+#                   image to modify. The "device" argument is used
+#                   to verify "image-node-name" is in the chain
+#                   described by "device".
+#
+# @device: The device name or node-name of the root node that owns
+#          image-node-name.
+#
+# @backing-file: The string to write as the backing file.  This
+#                string is not validated, so care should be taken
+#                when specifying the string or the image chain may
+#                not be able to be reopened again.
+#
+# Returns: - Nothing on success
+#          - If "device" does not exist or cannot be determined, DeviceNotFound
+#
+# Since: 2.1
+##
+{ 'command': 'change-backing-file',
+  'data': { 'device': 'str', 'image-node-name': 'str',
+            'backing-file': 'str' } }
+
+##
+# @block-commit:
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# If top == base, that is an error.
+# If top has no overlays on top of it, or if it is in use by a writer,
+# the job will not be completed by itself.  The user needs to complete
+# the job with the block-job-complete command after getting the ready
+# event. (Since 2.0)
+#
+# If the base image is smaller than top, then the base image will be
+# resized to be the same size as top.  If top is smaller than the base
+# image, the base will not be truncated.  If you want the base image
+# size to match the size of the smaller top, you can safely truncate
+# it yourself once the commit operation successfully completes.
+#
+# @job-id: identifier for the newly-created block job. If
+#          omitted, the device name will be used. (Since 2.7)
+#
+# @device: the device name or node-name of a root node
+#
+# @base-node: The node name of the backing image to write data into.
+#             If not specified, this is the deepest backing image.
+#             (since: 3.1)
+#
+# @base: Same as @base-node, except that it is a file name rather than a node
+#        name. This must be the exact filename string that was used to open the
+#        node; other strings, even if addressing the same file, are not
+#        accepted
+#
+# @top-node: The node name of the backing image within the image chain
+#            which contains the topmost data to be committed down. If
+#            not specified, this is the active layer. (since: 3.1)
+#
+# @top: Same as @top-node, except that it is a file name rather than a node
+#       name. This must be the exact filename string that was used to open the
+#       node; other strings, even if addressing the same file, are not
+#       accepted
+#
+# @backing-file: The backing file string to write into the overlay
+#                image of 'top'.  If 'top' does not have an overlay
+#                image, or if 'top' is in use by a writer, specifying
+#                a backing file string is an error.
+#
+#                This filename is not validated.  If a pathname string
+#                is such that it cannot be resolved by QEMU, that
+#                means that subsequent QMP or HMP commands must use
+#                node-names for the image in question, as filename
+#                lookup methods will fail.
+#
+#                If not specified, QEMU will automatically determine
+#                the backing file string to use, or error out if
+#                there is no obvious choice. Care should be taken
+#                when specifying the string, to specify a valid
+#                filename or protocol.
+#                (Since 2.1)
+#
+# @speed: the maximum speed, in bytes per second
+#
+# @on-error: the action to take on an error. 'ignore' means that the request
+#            should be retried. (default: report; Since: 5.0)
+#
+# @filter-node-name: the node name that should be assigned to the
+#                    filter driver that the commit job inserts into the graph
+#                    above @top. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+#
+# Features:
+# @deprecated: Members @base and @top are deprecated.  Use @base-node
+#              and @top-node instead.
+#
+# Returns: - Nothing on success
+#          - If @device does not exist, DeviceNotFound
+#          - Any other error returns a GenericError.
+#
+# Since: 1.3
+#
+# Example:
+#
+# -> { "execute": "block-commit",
+#      "arguments": { "device": "virtio0",
+#                     "top": "/tmp/snap1.qcow2" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-commit',
+  'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str',
+            '*base': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*top-node': 'str',
+            '*top': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*backing-file': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError',
+            '*filter-node-name': 'str',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+
+##
+# @drive-backup:
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing drive-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, GenericError
+#
+# Since: 1.6
+#
+# Example:
+#
+# -> { "execute": "drive-backup",
+#      "arguments": { "device": "drive0",
+#                     "sync": "full",
+#                     "target": "backup.img" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'drive-backup', 'boxed': true,
+  'data': 'DriveBackup' }
+
+##
+# @blockdev-backup:
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing blockdev-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, DeviceNotFound
+#
+# Since: 2.3
+#
+# Example:
+# -> { "execute": "blockdev-backup",
+#      "arguments": { "device": "src-id",
+#                     "sync": "full",
+#                     "target": "tgt-id" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-backup', 'boxed': true,
+  'data': 'BlockdevBackup' }
+
+
+##
+# @query-named-block-nodes:
+#
+# Get the named block driver list
+#
+# @flat: Omit the nested data about backing image ("backing-image" key) if true.
+#        Default is false (Since 5.0)
+#
+# Returns: the list of BlockDeviceInfo
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "query-named-block-nodes" }
+# <- { "return": [ { "ro":false,
+#                    "drv":"qcow2",
+#                    "encrypted":false,
+#                    "file":"disks/test.qcow2",
+#                    "node-name": "my-node",
+#                    "backing_file_depth":1,
+#                    "bps":1000000,
+#                    "bps_rd":0,
+#                    "bps_wr":0,
+#                    "iops":1000000,
+#                    "iops_rd":0,
+#                    "iops_wr":0,
+#                    "bps_max": 8000000,
+#                    "bps_rd_max": 0,
+#                    "bps_wr_max": 0,
+#                    "iops_max": 0,
+#                    "iops_rd_max": 0,
+#                    "iops_wr_max": 0,
+#                    "iops_size": 0,
+#                    "write_threshold": 0,
+#                    "image":{
+#                       "filename":"disks/test.qcow2",
+#                       "format":"qcow2",
+#                       "virtual-size":2048000,
+#                       "backing_file":"base.qcow2",
+#                       "full-backing-filename":"disks/base.qcow2",
+#                       "backing-filename-format":"qcow2",
+#                       "snapshots":[
+#                          {
+#                             "id": "1",
+#                             "name": "snapshot1",
+#                             "vm-state-size": 0,
+#                             "date-sec": 10000200,
+#                             "date-nsec": 12,
+#                             "vm-clock-sec": 206,
+#                             "vm-clock-nsec": 30
+#                          }
+#                       ],
+#                       "backing-image":{
+#                           "filename":"disks/base.qcow2",
+#                           "format":"qcow2",
+#                           "virtual-size":2048000
+#                       }
+#                    } } ] }
+#
+##
+{ 'command': 'query-named-block-nodes',
+  'returns': [ 'BlockDeviceInfo' ],
+  'data': { '*flat': 'bool' } }
+
+##
+# @XDbgBlockGraphNodeType:
+#
+# @block-backend: corresponds to BlockBackend
+#
+# @block-job: corresponds to BlockJob
+#
+# @block-driver: corresponds to BlockDriverState
+#
+# Since: 4.0
+##
+{ 'enum': 'XDbgBlockGraphNodeType',
+  'data': [ 'block-backend', 'block-job', 'block-driver' ] }
+
+##
+# @XDbgBlockGraphNode:
+#
+# @id: Block graph node identifier. This @id is generated only for
+#      x-debug-query-block-graph and does not relate to any other identifiers in
+#      Qemu.
+#
+# @type: Type of graph node. Can be one of block-backend, block-job or
+#        block-driver-state.
+#
+# @name: Human readable name of the node. Corresponds to node-name for
+#        block-driver-state nodes; is not guaranteed to be unique in the whole
+#        graph (with block-jobs and block-backends).
+#
+# Since: 4.0
+##
+{ 'struct': 'XDbgBlockGraphNode',
+  'data': { 'id': 'uint64', 'type': 'XDbgBlockGraphNodeType', 'name': 'str' } }
+
+##
+# @BlockPermission:
+#
+# Enum of base block permissions.
+#
+# @consistent-read: A user that has the "permission" of consistent reads is
+#                   guaranteed that their view of the contents of the block
+#                   device is complete and self-consistent, representing the
+#                   contents of a disk at a specific point.
+#                   For most block devices (including their backing files) this
+#                   is true, but the property cannot be maintained in a few
+#                   situations like for intermediate nodes of a commit block
+#                   job.
+#
+# @write: This permission is required to change the visible disk contents.
+#
+# @write-unchanged: This permission (which is weaker than BLK_PERM_WRITE) is
+#                   both enough and required for writes to the block node when
+#                   the caller promises that the visible disk content doesn't
+#                   change.
+#                   As the BLK_PERM_WRITE permission is strictly stronger,
+#                   either is sufficient to perform an unchanging write.
+#
+# @resize: This permission is required to change the size of a block node.
+#
+# @graph-mod: This permission is required to change the node that this
+#             BdrvChild points to.
+#
+# Since: 4.0
+##
+{ 'enum': 'BlockPermission',
+  'data': [ 'consistent-read', 'write', 'write-unchanged', 'resize',
+            'graph-mod' ] }
+##
+# @XDbgBlockGraphEdge:
+#
+# Block Graph edge description for x-debug-query-block-graph.
+#
+# @parent: parent id
+#
+# @child: child id
+#
+# @name: name of the relation (examples are 'file' and 'backing')
+#
+# @perm: granted permissions for the parent operating on the child
+#
+# @shared-perm: permissions that can still be granted to other users of the
+#               child while it is still attached to this parent
+#
+# Since: 4.0
+##
+{ 'struct': 'XDbgBlockGraphEdge',
+  'data': { 'parent': 'uint64', 'child': 'uint64',
+            'name': 'str', 'perm': [ 'BlockPermission' ],
+            'shared-perm': [ 'BlockPermission' ] } }
+
+##
+# @XDbgBlockGraph:
+#
+# Block Graph - list of nodes and list of edges.
+#
+# Since: 4.0
+##
+{ 'struct': 'XDbgBlockGraph',
+  'data': { 'nodes': ['XDbgBlockGraphNode'], 'edges': ['XDbgBlockGraphEdge'] } }
+
+##
+# @x-debug-query-block-graph:
+#
+# Get the block graph.
+#
+# Since: 4.0
+##
+{ 'command': 'x-debug-query-block-graph', 'returns': 'XDbgBlockGraph' }
+
+##
+# @drive-mirror:
+#
+# Start mirroring a block device's writes to a new destination. target
+# specifies the target of the new image. If the file exists, or if it
+# is a device, it will be used as the new destination for writes. If
+# it does not exist, a new file will be created. format specifies the
+# format of the mirror image, default is to probe if mode='existing',
+# else the format of the source.
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, GenericError
+#
+# Since: 1.3
+#
+# Example:
+#
+# -> { "execute": "drive-mirror",
+#      "arguments": { "device": "ide-hd0",
+#                     "target": "/some/place/my-image",
+#                     "sync": "full",
+#                     "format": "qcow2" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'drive-mirror', 'boxed': true,
+  'data': 'DriveMirror' }
+
+##
+# @DriveMirror:
+#
+# A set of parameters describing drive mirror setup.
+#
+# @job-id: identifier for the newly-created block job. If
+#          omitted, the device name will be used. (Since 2.7)
+#
+# @device:  the device name or node-name of a root node whose writes should be
+#           mirrored.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @node-name: the new block driver state node name in the graph
+#             (Since 2.1)
+#
+# @replaces: with sync=full graph node name to be replaced by the new
+#            image when a whole image copy is done. This can be used to repair
+#            broken Quorum files.  By default, @device is replaced, although
+#            implicitly created filters on it are kept. (Since 2.1)
+#
+# @mode: whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed:  the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @granularity: granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: maximum amount of data in flight from source to
+#            target (since 1.4).
+#
+# @on-source-error: the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+# @unmap: Whether to try to unmap target sectors where source has
+#         only zero. If true, and target unallocated sectors will read as zero,
+#         target image sectors will be unmapped; otherwise, zeroes will be
+#         written. Both will result in identical contents.
+#         Default is true. (Since 2.4)
+#
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+#             (Since: 3.0)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+# Since: 1.3
+##
+{ 'struct': 'DriveMirror',
+  'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
+            '*format': 'str', '*node-name': 'str', '*replaces': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError',
+            '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+
+##
+# @BlockDirtyBitmap:
+#
+# @node: name of device/node which the bitmap is tracking
+#
+# @name: name of the dirty bitmap
+#
+# Since: 2.4
+##
+{ 'struct': 'BlockDirtyBitmap',
+  'data': { 'node': 'str', 'name': 'str' } }
+
+##
+# @BlockDirtyBitmapAdd:
+#
+# @node: name of device/node which the bitmap is tracking
+#
+# @name: name of the dirty bitmap (must be less than 1024 bytes)
+#
+# @granularity: the bitmap granularity, default is 64k for
+#               block-dirty-bitmap-add
+#
+# @persistent: the bitmap is persistent, i.e. it will be saved to the
+#              corresponding block device image file on its close. For now only
+#              Qcow2 disks support persistent bitmaps. Default is false for
+#              block-dirty-bitmap-add. (Since: 2.10)
+#
+# @disabled: the bitmap is created in the disabled state, which means that
+#            it will not track drive changes. The bitmap may be enabled with
+#            block-dirty-bitmap-enable. Default is false. (Since: 4.0)
+#
+# Since: 2.4
+##
+{ 'struct': 'BlockDirtyBitmapAdd',
+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
+            '*persistent': 'bool', '*disabled': 'bool' } }
+
+##
+# @BlockDirtyBitmapMergeSource:
+#
+# @local: name of the bitmap, attached to the same node as target bitmap.
+#
+# @external: bitmap with specified node
+#
+# Since: 4.1
+##
+{ 'alternate': 'BlockDirtyBitmapMergeSource',
+  'data': { 'local': 'str',
+            'external': 'BlockDirtyBitmap' } }
+
+##
+# @BlockDirtyBitmapMerge:
+#
+# @node: name of device/node which the @target bitmap is tracking
+#
+# @target: name of the destination dirty bitmap
+#
+# @bitmaps: name(s) of the source dirty bitmap(s) at @node and/or fully
+#           specified BlockDirtyBitmap elements. The latter are supported
+#           since 4.1.
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockDirtyBitmapMerge',
+  'data': { 'node': 'str', 'target': 'str',
+            'bitmaps': ['BlockDirtyBitmapMergeSource'] } }
+
+##
+# @block-dirty-bitmap-add:
+#
+# Create a dirty bitmap with a name on the node, and start tracking the writes.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device or node, DeviceNotFound
+#          - If @name is already taken, GenericError with an explanation
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-add",
+#      "arguments": { "node": "drive0", "name": "bitmap0" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-add',
+  'data': 'BlockDirtyBitmapAdd' }
+
+##
+# @block-dirty-bitmap-remove:
+#
+# Stop write tracking and remove the dirty bitmap that was created
+# with block-dirty-bitmap-add. If the bitmap is persistent, remove it from its
+# storage too.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device or node, DeviceNotFound
+#          - If @name is not found, GenericError with an explanation
+#          - if @name is frozen by an operation, GenericError
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-remove",
+#      "arguments": { "node": "drive0", "name": "bitmap0" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-remove',
+  'data': 'BlockDirtyBitmap' }
+
+##
+# @block-dirty-bitmap-clear:
+#
+# Clear (reset) a dirty bitmap on the device, so that an incremental
+# backup from this point in time forward will only backup clusters
+# modified after this clear operation.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device, DeviceNotFound
+#          - If @name is not found, GenericError with an explanation
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-clear",
+#      "arguments": { "node": "drive0", "name": "bitmap0" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-clear',
+  'data': 'BlockDirtyBitmap' }
+
+##
+# @block-dirty-bitmap-enable:
+#
+# Enables a dirty bitmap so that it will begin tracking disk changes.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device, DeviceNotFound
+#          - If @name is not found, GenericError with an explanation
+#
+# Since: 4.0
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-enable",
+#      "arguments": { "node": "drive0", "name": "bitmap0" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-enable',
+  'data': 'BlockDirtyBitmap' }
+
+##
+# @block-dirty-bitmap-disable:
+#
+# Disables a dirty bitmap so that it will stop tracking disk changes.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device, DeviceNotFound
+#          - If @name is not found, GenericError with an explanation
+#
+# Since: 4.0
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-disable",
+#      "arguments": { "node": "drive0", "name": "bitmap0" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-disable',
+  'data': 'BlockDirtyBitmap' }
+
+##
+# @block-dirty-bitmap-merge:
+#
+# Merge dirty bitmaps listed in @bitmaps to the @target dirty bitmap.
+# Dirty bitmaps in @bitmaps will be unchanged, except if it also appears
+# as the @target bitmap. Any bits already set in @target will still be
+# set after the merge, i.e., this operation does not clear the target.
+# On error, @target is unchanged.
+#
+# The resulting bitmap will count as dirty any clusters that were dirty in any
+# of the source bitmaps. This can be used to achieve backup checkpoints, or in
+# simpler usages, to copy bitmaps.
+#
+# Returns: - nothing on success
+#          - If @node is not a valid block device, DeviceNotFound
+#          - If any bitmap in @bitmaps or @target is not found, GenericError
+#          - If any of the bitmaps have different sizes or granularities,
+#            GenericError
+#
+# Since: 4.0
+#
+# Example:
+#
+# -> { "execute": "block-dirty-bitmap-merge",
+#      "arguments": { "node": "drive0", "target": "bitmap0",
+#                     "bitmaps": ["bitmap1"] } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-dirty-bitmap-merge',
+  'data': 'BlockDirtyBitmapMerge' }
+
+##
+# @BlockDirtyBitmapSha256:
+#
+# SHA256 hash of dirty bitmap data
+#
+# @sha256: ASCII representation of SHA256 bitmap hash
+#
+# Since: 2.10
+##
+{ 'struct': 'BlockDirtyBitmapSha256',
+  'data': {'sha256': 'str'} }
+
+##
+# @x-debug-block-dirty-bitmap-sha256:
+#
+# Get bitmap SHA256.
+#
+# Returns: - BlockDirtyBitmapSha256 on success
+#          - If @node is not a valid block device, DeviceNotFound
+#          - If @name is not found or if hashing has failed, GenericError with an
+#            explanation
+#
+# Since: 2.10
+##
+{ 'command': 'x-debug-block-dirty-bitmap-sha256',
+  'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+
+##
+# @blockdev-mirror:
+#
+# Start mirroring a block device's writes to a new destination.
+#
+# @job-id: identifier for the newly-created block job. If
+#          omitted, the device name will be used. (Since 2.7)
+#
+# @device: The device name or node-name of a root node whose writes should be
+#          mirrored.
+#
+# @target: the id or node-name of the block device to mirror to. This mustn't be
+#          attached to guest.
+#
+# @replaces: with sync=full graph node name to be replaced by the new
+#            image when a whole image copy is done. This can be used to repair
+#            broken Quorum files.  By default, @device is replaced, although
+#            implicitly created filters on it are kept.
+#
+# @speed:  the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @granularity: granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M
+#
+# @buf-size: maximum amount of data in flight from source to
+#            target
+#
+# @on-source-error: the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# @filter-node-name: the node name that should be assigned to the
+#                    filter driver that the mirror job inserts into the graph
+#                    above @device. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+#             (Since: 3.0)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+# Returns: nothing on success.
+#
+# Since: 2.6
+#
+# Example:
+#
+# -> { "execute": "blockdev-mirror",
+#      "arguments": { "device": "ide-hd0",
+#                     "target": "target0",
+#                     "sync": "full" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-mirror',
+  'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
+            '*replaces': 'str',
+            'sync': 'MirrorSyncMode',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError',
+            '*filter-node-name': 'str',
+            '*copy-mode': 'MirrorCopyMode',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+
+##
+# @BlockIOThrottle:
+#
+# A set of parameters describing block throttling.
+#
+# @device: Block device name
+#
+# @id: The name or QOM path of the guest device (since: 2.8)
+#
+# @bps: total throughput limit in bytes per second
+#
+# @bps_rd: read throughput limit in bytes per second
+#
+# @bps_wr: write throughput limit in bytes per second
+#
+# @iops: total I/O operations per second
+#
+# @iops_rd: read I/O operations per second
+#
+# @iops_wr: write I/O operations per second
+#
+# @bps_max: total throughput limit during bursts,
+#           in bytes (Since 1.7)
+#
+# @bps_rd_max: read throughput limit during bursts,
+#              in bytes (Since 1.7)
+#
+# @bps_wr_max: write throughput limit during bursts,
+#              in bytes (Since 1.7)
+#
+# @iops_max: total I/O operations per second during bursts,
+#            in bytes (Since 1.7)
+#
+# @iops_rd_max: read I/O operations per second during bursts,
+#               in bytes (Since 1.7)
+#
+# @iops_wr_max: write I/O operations per second during bursts,
+#               in bytes (Since 1.7)
+#
+# @bps_max_length: maximum length of the @bps_max burst
+#                  period, in seconds. It must only
+#                  be set if @bps_max is set as well.
+#                  Defaults to 1. (Since 2.6)
+#
+# @bps_rd_max_length: maximum length of the @bps_rd_max
+#                     burst period, in seconds. It must only
+#                     be set if @bps_rd_max is set as well.
+#                     Defaults to 1. (Since 2.6)
+#
+# @bps_wr_max_length: maximum length of the @bps_wr_max
+#                     burst period, in seconds. It must only
+#                     be set if @bps_wr_max is set as well.
+#                     Defaults to 1. (Since 2.6)
+#
+# @iops_max_length: maximum length of the @iops burst
+#                   period, in seconds. It must only
+#                   be set if @iops_max is set as well.
+#                   Defaults to 1. (Since 2.6)
+#
+# @iops_rd_max_length: maximum length of the @iops_rd_max
+#                      burst period, in seconds. It must only
+#                      be set if @iops_rd_max is set as well.
+#                      Defaults to 1. (Since 2.6)
+#
+# @iops_wr_max_length: maximum length of the @iops_wr_max
+#                      burst period, in seconds. It must only
+#                      be set if @iops_wr_max is set as well.
+#                      Defaults to 1. (Since 2.6)
+#
+# @iops_size: an I/O size in bytes (Since 1.7)
+#
+# @group: throttle group name (Since 2.4)
+#
+# Features:
+# @deprecated: Member @device is deprecated.  Use @id instead.
+#
+# Since: 1.1
+##
+{ 'struct': 'BlockIOThrottle',
+  'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*id': 'str', 'bps': 'int', 'bps_rd': 'int',
+            'bps_wr': 'int', 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*bps_max_length': 'int', '*bps_rd_max_length': 'int',
+            '*bps_wr_max_length': 'int', '*iops_max_length': 'int',
+            '*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
+            '*iops_size': 'int', '*group': 'str' } }
+
+##
+# @ThrottleLimits:
+#
+# Limit parameters for throttling.
+# Since some limit combinations are illegal, limits should always be set in one
+# transaction. All fields are optional. When setting limits, if a field is
+# missing the current value is not changed.
+#
+# @iops-total: limit total I/O operations per second
+# @iops-total-max: I/O operations burst
+# @iops-total-max-length: length of the iops-total-max burst period, in seconds
+#                         It must only be set if @iops-total-max is set as well.
+# @iops-read: limit read operations per second
+# @iops-read-max: I/O operations read burst
+# @iops-read-max-length: length of the iops-read-max burst period, in seconds
+#                        It must only be set if @iops-read-max is set as well.
+# @iops-write: limit write operations per second
+# @iops-write-max: I/O operations write burst
+# @iops-write-max-length: length of the iops-write-max burst period, in seconds
+#                         It must only be set if @iops-write-max is set as well.
+# @bps-total: limit total bytes per second
+# @bps-total-max: total bytes burst
+# @bps-total-max-length: length of the bps-total-max burst period, in seconds.
+#                        It must only be set if @bps-total-max is set as well.
+# @bps-read: limit read bytes per second
+# @bps-read-max: total bytes read burst
+# @bps-read-max-length: length of the bps-read-max burst period, in seconds
+#                       It must only be set if @bps-read-max is set as well.
+# @bps-write: limit write bytes per second
+# @bps-write-max: total bytes write burst
+# @bps-write-max-length: length of the bps-write-max burst period, in seconds
+#                        It must only be set if @bps-write-max is set as well.
+# @iops-size: when limiting by iops max size of an I/O in bytes
+#
+# Since: 2.11
+##
+{ 'struct': 'ThrottleLimits',
+  'data': { '*iops-total' : 'int', '*iops-total-max' : 'int',
+            '*iops-total-max-length' : 'int', '*iops-read' : 'int',
+            '*iops-read-max' : 'int', '*iops-read-max-length' : 'int',
+            '*iops-write' : 'int', '*iops-write-max' : 'int',
+            '*iops-write-max-length' : 'int', '*bps-total' : 'int',
+            '*bps-total-max' : 'int', '*bps-total-max-length' : 'int',
+            '*bps-read' : 'int', '*bps-read-max' : 'int',
+            '*bps-read-max-length' : 'int', '*bps-write' : 'int',
+            '*bps-write-max' : 'int', '*bps-write-max-length' : 'int',
+            '*iops-size' : 'int' } }
+
+##
+# @block-stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied.  This command returns immediately once streaming
+# has started.  The status of ongoing block streaming operations can be checked
+# with query-block-jobs.  The operation can be stopped before it has completed
+# using the block-job-cancel command.
+#
+# The node that receives the data is called the top image, can be located in
+# any part of the chain (but always above the base image; see below) and can be
+# specified using its device or node name. Earlier qemu versions only allowed
+# 'device' to name the top level node; presence of the 'base-node' parameter
+# during introspection can be used as a witness of the enhanced semantics
+# of 'device'.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain.  This can be used to stream a subset of the backing file
+# chain instead of flattening the entire image.
+# When streaming completes the image file will have the base file as its backing
+# file, unless that node was changed while the job was running.  In that case,
+# base's parent's backing (or filtered, whichever exists) child (i.e., base at
+# the beginning of the job) will be the new backing file.
+#
+# On successful completion the image file is updated to drop the backing file
+# and the BLOCK_JOB_COMPLETED event is emitted.
+#
+# In case @device is a filter node, block-stream modifies the first non-filter
+# overlay node below it to point to the new backing node instead of modifying
+# @device itself.
+#
+# @job-id: identifier for the newly-created block job. If
+#          omitted, the device name will be used. (Since 2.7)
+#
+# @device: the device or node name of the top image
+#
+# @base: the common backing file name.
+#        It cannot be set if @base-node is also set.
+#
+# @base-node: the node name of the backing file.
+#             It cannot be set if @base is also set. (Since 2.8)
+#
+# @backing-file: The backing file string to write into the top
+#                image. This filename is not validated.
+#
+#                If a pathname string is such that it cannot be
+#                resolved by QEMU, that means that subsequent QMP or
+#                HMP commands must use node-names for the image in
+#                question, as filename lookup methods will fail.
+#
+#                If not specified, QEMU will automatically determine
+#                the backing file string to use, or error out if there
+#                is no obvious choice.  Care should be taken when
+#                specifying the string, to specify a valid filename or
+#                protocol.
+#                (Since 2.1)
+#
+# @speed: the maximum speed, in bytes per second
+#
+# @on-error: the action to take on an error (default report).
+#            'stop' and 'enospc' can only be used if the block device
+#            supports io-status (see BlockInfo).  Since 1.3.
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+#
+# Returns: - Nothing on success.
+#          - If @device does not exist, DeviceNotFound.
+#
+# Since: 1.1
+#
+# Example:
+#
+# -> { "execute": "block-stream",
+#      "arguments": { "device": "virtio0",
+#                     "base": "/tmp/master.qcow2" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-stream',
+  'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
+            '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+
+##
+# @block-job-set-speed:
+#
+# Set maximum speed for a background block operation.
+#
+# This command can only be issued when there is an active block job.
+#
+# Throttling can be disabled by setting the speed to 0.
+#
+# @device: The job identifier. This used to be a device name (hence
+#          the name of the parameter), but since QEMU 2.7 it can have
+#          other values.
+#
+# @speed: the maximum speed, in bytes per second, or 0 for unlimited.
+#         Defaults to 0.
+#
+# Returns: - Nothing on success
+#          - If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-set-speed',
+  'data': { 'device': 'str', 'speed': 'int' } }
+
+##
+# @block-job-cancel:
+#
+# Stop an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for cancellation.  It is an error to call this command if no
+# operation is in progress.
+#
+# The operation will cancel as soon as possible and then emit the
+# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
+# enumerated using query-block-jobs.
+#
+# Note that if you issue 'block-job-cancel' after 'drive-mirror' has indicated
+# (via the event BLOCK_JOB_READY) that the source and destination are
+# synchronized, then the event triggered by this command changes to
+# BLOCK_JOB_COMPLETED, to indicate that the mirroring has ended and the
+# destination now has a point-in-time copy tied to the time of the cancellation.
+#
+# For streaming, the image file retains its backing file unless the streaming
+# operation happens to complete just as it is being cancelled.  A new streaming
+# operation can be started at a later time to finish copying all data from the
+# backing file.
+#
+# @device: The job identifier. This used to be a device name (hence
+#          the name of the parameter), but since QEMU 2.7 it can have
+#          other values.
+#
+# @force: If true, and the job has already emitted the event BLOCK_JOB_READY,
+#         abandon the job immediately (even if it is paused) instead of waiting
+#         for the destination to complete its final synchronization (since 1.3)
+#
+# Returns: - Nothing on success
+#          - If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress or if the job is already paused.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: The job identifier. This used to be a device name (hence
+#          the name of the parameter), but since QEMU 2.7 it can have
+#          other values.
+#
+# Returns: - Nothing on success
+#          - If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress or if the job is not paused.
+#
+# This command also clears the error status of the job.
+#
+# @device: The job identifier. This used to be a device name (hence
+#          the name of the parameter), but since QEMU 2.7 it can have
+#          other values.
+#
+# Returns: - Nothing on success
+#          - If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
+
+##
+# @block-job-complete:
+#
+# Manually trigger completion of an active background block operation.  This
+# is supported for drive mirroring, where it also switches the device to
+# write to the target path only.  The ability to complete is signaled with
+# a BLOCK_JOB_READY event.
+#
+# This command completes an active background block operation synchronously.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused job cannot be completed.
+#
+# @device: The job identifier. This used to be a device name (hence
+#          the name of the parameter), but since QEMU 2.7 it can have
+#          other values.
+#
+# Returns: - Nothing on success
+#          - If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+
+##
+# @block-job-dismiss:
+#
+# For jobs that have already concluded, remove them from the block-job-query
+# list. This command only needs to be run for jobs which were started with
+# QEMU 2.12+ job lifetime management semantics.
+#
+# This command will refuse to operate on any job that has not yet reached
+# its terminal state, JOB_STATUS_CONCLUDED. For jobs that make use of the
+# BLOCK_JOB_READY event, block-job-cancel or block-job-complete will still need
+# to be used as appropriate.
+#
+# @id: The job identifier.
+#
+# Returns: Nothing on success
+#
+# Since: 2.12
+##
+{ 'command': 'block-job-dismiss', 'data': { 'id': 'str' } }
+
+##
+# @block-job-finalize:
+#
+# Once a job that has manual=true reaches the pending state, it can be
+# instructed to finalize any graph changes and do any necessary cleanup
+# via this command.
+# For jobs in a transaction, instructing one job to finalize will force
+# ALL jobs in the transaction to finalize, so it is only necessary to instruct
+# a single member job to finalize.
+#
+# @id: The job identifier.
+#
+# Returns: Nothing on success
+#
+# Since: 2.12
+##
+{ 'command': 'block-job-finalize', 'data': { 'id': 'str' } }
+
+##
+# @BlockdevDiscardOptions:
+#
+# Determines how to handle discard requests.
+#
+# @ignore: Ignore the request
+# @unmap: Forward as an unmap request
+#
+# Since: 2.9
+##
+{ 'enum': 'BlockdevDiscardOptions',
+  'data': [ 'ignore', 'unmap' ] }
+
+##
+# @BlockdevDetectZeroesOptions:
+#
+# Describes the operation mode for the automatic conversion of plain
+# zero writes by the OS to driver specific optimized zero write commands.
+#
+# @off: Disabled (default)
+# @on: Enabled
+# @unmap: Enabled and even try to unmap blocks if possible. This requires
+#         also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
+# @BlockdevAioOptions:
+#
+# Selects the AIO backend to handle I/O requests
+#
+# @threads: Use qemu's thread pool
+# @native: Use native AIO backend (only Linux and Windows)
+# @io_uring: Use linux io_uring (since 5.0)
+#
+# Since: 2.9
+##
+{ 'enum': 'BlockdevAioOptions',
+  'data': [ 'threads', 'native',
+            { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] }
+
+##
+# @BlockdevCacheOptions:
+#
+# Includes cache-related options for block devices
+#
+# @direct: enables use of O_DIRECT (bypass the host page cache;
+#          default: false)
+# @no-flush: ignore any flush requests for the device (default:
+#            false)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevCacheOptions',
+  'data': { '*direct': 'bool',
+            '*no-flush': 'bool' } }
+
+##
+# @BlockdevDriver:
+#
+# Drivers that are supported in block device operations.
+#
+# @throttle: Since 2.11
+# @nvme: Since 2.12
+# @copy-on-read: Since 3.0
+# @blklogwrites: Since 3.0
+# @blkreplay: Since 4.2
+# @compress: Since 5.0
+#
+# Since: 2.9
+##
+{ 'enum': 'BlockdevDriver',
+  'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
+            'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps',
+            'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi',
+            'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
+            'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
+            { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
+            'sheepdog',
+            'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+
+##
+# @BlockdevOptionsFile:
+#
+# Driver specific block device options for the file backend.
+#
+# @filename: path to the image file
+# @pr-manager: the id for the object that will handle persistent reservations
+#              for this device (default: none, forward the commands via SG_IO;
+#              since 2.11)
+# @aio: AIO backend (default: threads) (since: 2.8)
+# @locking: whether to enable file locking. If set to 'auto', only enable
+#           when Open File Descriptor (OFD) locking API is available
+#           (default: auto, since 2.10)
+# @drop-cache: invalidate page cache during live migration.  This prevents
+#              stale data on the migration destination with cache.direct=off.
+#              Currently only supported on Linux hosts.
+#              (default: on, since: 4.0)
+# @x-check-cache-dropped: whether to check that page cache was dropped on live
+#                         migration.  May cause noticeable delays if the image
+#                         file is large, do not use in production.
+#                         (default: off) (since: 3.0)
+#
+# Features:
+# @dynamic-auto-read-only: If present, enabled auto-read-only means that the
+#                          driver will open the image read-only at first,
+#                          dynamically reopen the image file read-write when
+#                          the first writer is attached to the node and reopen
+#                          read-only when the last writer is detached. This
+#                          allows giving QEMU write permissions only on demand
+#                          when an operation actually needs write access.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsFile',
+  'data': { 'filename': 'str',
+            '*pr-manager': 'str',
+            '*locking': 'OnOffAuto',
+            '*aio': 'BlockdevAioOptions',
+            '*drop-cache': {'type': 'bool',
+                            'if': 'defined(CONFIG_LINUX)'},
+            '*x-check-cache-dropped': 'bool' },
+  'features': [ { 'name': 'dynamic-auto-read-only',
+                  'if': 'defined(CONFIG_POSIX)' } ] }
+
+##
+# @BlockdevOptionsNull:
+#
+# Driver specific block device options for the null backend.
+#
+# @size: size of the device in bytes.
+# @latency-ns: emulated latency (in nanoseconds) in processing
+#              requests. Default to zero which completes requests immediately.
+#              (Since 2.4)
+# @read-zeroes: if true, reads from the device produce zeroes; if false, the
+#               buffer is left unchanged. (default: false; since: 4.1)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsNull',
+  'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
+
+##
+# @BlockdevOptionsNVMe:
+#
+# Driver specific block device options for the NVMe backend.
+#
+# @device: PCI controller address of the NVMe device in
+#          format hhhh:bb:ss.f (host:bus:slot.function)
+# @namespace: namespace number of the device, starting from 1.
+#
+# Note that the PCI @device must have been unbound from any host
+# kernel driver before instructing QEMU to add the blockdev.
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevOptionsNVMe',
+  'data': { 'device': 'str', 'namespace': 'int' } }
+
+##
+# @BlockdevOptionsVVFAT:
+#
+# Driver specific block device options for the vvfat protocol.
+#
+# @dir: directory to be exported as FAT image
+# @fat-type: FAT type: 12, 16 or 32
+# @floppy: whether to export a floppy image (true) or
+#          partitioned hard disk (false; default)
+# @label: set the volume label, limited to 11 bytes. FAT16 and
+#         FAT32 traditionally have some restrictions on labels, which are
+#         ignored by most operating systems. Defaults to "QEMU VVFAT".
+#         (since 2.4)
+# @rw: whether to allow write operations (default: false)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsVVFAT',
+  'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
+            '*label': 'str', '*rw': 'bool' } }
+
+##
+# @BlockdevOptionsGenericFormat:
+#
+# Driver specific block device options for image format that have no option
+# besides their data source.
+#
+# @file: reference to or definition of the data source block device
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsGenericFormat',
+  'data': { 'file': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsLUKS:
+#
+# Driver specific block device options for LUKS.
+#
+# @key-secret: the ID of a QCryptoSecret object providing
+#              the decryption key (since 2.6). Mandatory except when
+#              doing a metadata-only probe of the image.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsLUKS',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*key-secret': 'str' } }
+
+
+##
+# @BlockdevOptionsGenericCOWFormat:
+#
+# Driver specific block device options for image format that have no option
+# besides their data source and an optional backing file.
+#
+# @backing: reference to or definition of the backing file block
+#           device, null disables the backing file entirely.
+#           Defaults to the backing file stored the image file.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsGenericCOWFormat',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*backing': 'BlockdevRefOrNull' } }
+
+##
+# @Qcow2OverlapCheckMode:
+#
+# General overlap check modes.
+#
+# @none: Do not perform any checks
+#
+# @constant: Perform only checks which can be done in constant time and
+#            without reading anything from disk
+#
+# @cached: Perform only checks which can be done without reading anything
+#          from disk
+#
+# @all: Perform all available overlap checks
+#
+# Since: 2.9
+##
+{ 'enum': 'Qcow2OverlapCheckMode',
+  'data': [ 'none', 'constant', 'cached', 'all' ] }
+
+##
+# @Qcow2OverlapCheckFlags:
+#
+# Structure of flags for each metadata structure. Setting a field to 'true'
+# makes qemu guard that structure against unintended overwriting. The default
+# value is chosen according to the template given.
+#
+# @template: Specifies a template mode which can be adjusted using the other
+#            flags, defaults to 'cached'
+#
+# @bitmap-directory: since 3.0
+#
+# Since: 2.9
+##
+{ 'struct': 'Qcow2OverlapCheckFlags',
+  'data': { '*template':         'Qcow2OverlapCheckMode',
+            '*main-header':      'bool',
+            '*active-l1':        'bool',
+            '*active-l2':        'bool',
+            '*refcount-table':   'bool',
+            '*refcount-block':   'bool',
+            '*snapshot-table':   'bool',
+            '*inactive-l1':      'bool',
+            '*inactive-l2':      'bool',
+            '*bitmap-directory': 'bool' } }
+
+##
+# @Qcow2OverlapChecks:
+#
+# Specifies which metadata structures should be guarded against unintended
+# overwriting.
+#
+# @flags: set of flags for separate specification of each metadata structure
+#         type
+#
+# @mode: named mode which chooses a specific set of flags
+#
+# Since: 2.9
+##
+{ 'alternate': 'Qcow2OverlapChecks',
+  'data': { 'flags': 'Qcow2OverlapCheckFlags',
+            'mode':  'Qcow2OverlapCheckMode' } }
+
+##
+# @BlockdevQcowEncryptionFormat:
+#
+# @aes: AES-CBC with plain64 initialization vectors
+#
+# Since: 2.10
+##
+{ 'enum': 'BlockdevQcowEncryptionFormat',
+  'data': [ 'aes' ] }
+
+##
+# @BlockdevQcowEncryption:
+#
+# Since: 2.10
+##
+{ 'union': 'BlockdevQcowEncryption',
+  'base': { 'format': 'BlockdevQcowEncryptionFormat' },
+  'discriminator': 'format',
+  'data': { 'aes': 'QCryptoBlockOptionsQCow' } }
+
+##
+# @BlockdevOptionsQcow:
+#
+# Driver specific block device options for qcow.
+#
+# @encrypt: Image decryption options. Mandatory for
+#           encrypted images, except when doing a metadata-only
+#           probe of the image.
+#
+# Since: 2.10
+##
+{ 'struct': 'BlockdevOptionsQcow',
+  'base': 'BlockdevOptionsGenericCOWFormat',
+  'data': { '*encrypt': 'BlockdevQcowEncryption' } }
+
+
+
+##
+# @BlockdevQcow2EncryptionFormat:
+# @aes: AES-CBC with plain64 initialization vectors
+#
+# Since: 2.10
+##
+{ 'enum': 'BlockdevQcow2EncryptionFormat',
+  'data': [ 'aes', 'luks' ] }
+
+##
+# @BlockdevQcow2Encryption:
+#
+# Since: 2.10
+##
+{ 'union': 'BlockdevQcow2Encryption',
+  'base': { 'format': 'BlockdevQcow2EncryptionFormat' },
+  'discriminator': 'format',
+  'data': { 'aes': 'QCryptoBlockOptionsQCow',
+            'luks': 'QCryptoBlockOptionsLUKS'} }
+
+##
+# @BlockdevOptionsQcow2:
+#
+# Driver specific block device options for qcow2.
+#
+# @lazy-refcounts: whether to enable the lazy refcounts
+#                  feature (default is taken from the image file)
+#
+# @pass-discard-request: whether discard requests to the qcow2
+#                        device should be forwarded to the data source
+#
+# @pass-discard-snapshot: whether discard requests for the data source
+#                         should be issued when a snapshot operation (e.g.
+#                         deleting a snapshot) frees clusters in the qcow2 file
+#
+# @pass-discard-other: whether discard requests for the data source
+#                      should be issued on other occasions where a cluster
+#                      gets freed
+#
+# @overlap-check: which overlap checks to perform for writes
+#                 to the image, defaults to 'cached' (since 2.2)
+#
+# @cache-size: the maximum total size of the L2 table and
+#              refcount block caches in bytes (since 2.2)
+#
+# @l2-cache-size: the maximum size of the L2 table cache in
+#                 bytes (since 2.2)
+#
+# @l2-cache-entry-size: the size of each entry in the L2 cache in
+#                       bytes. It must be a power of two between 512
+#                       and the cluster size. The default value is
+#                       the cluster size (since 2.12)
+#
+# @refcount-cache-size: the maximum size of the refcount block cache
+#                       in bytes (since 2.2)
+#
+# @cache-clean-interval: clean unused entries in the L2 and refcount
+#                        caches. The interval is in seconds. The default value
+#                        is 600 on supporting platforms, and 0 on other
+#                        platforms. 0 disables this feature. (since 2.5)
+#
+# @encrypt: Image decryption options. Mandatory for
+#           encrypted images, except when doing a metadata-only
+#           probe of the image. (since 2.10)
+#
+# @data-file: reference to or definition of the external data file.
+#             This may only be specified for images that require an
+#             external data file. If it is not specified for such
+#             an image, the data file name is loaded from the image
+#             file. (since 4.0)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsQcow2',
+  'base': 'BlockdevOptionsGenericCOWFormat',
+  'data': { '*lazy-refcounts': 'bool',
+            '*pass-discard-request': 'bool',
+            '*pass-discard-snapshot': 'bool',
+            '*pass-discard-other': 'bool',
+            '*overlap-check': 'Qcow2OverlapChecks',
+            '*cache-size': 'int',
+            '*l2-cache-size': 'int',
+            '*l2-cache-entry-size': 'int',
+            '*refcount-cache-size': 'int',
+            '*cache-clean-interval': 'int',
+            '*encrypt': 'BlockdevQcow2Encryption',
+            '*data-file': 'BlockdevRef' } }
+
+##
+# @SshHostKeyCheckMode:
+#
+# @none: Don't check the host key at all
+# @hash: Compare the host key with a given hash
+# @known_hosts: Check the host key against the known_hosts file
+#
+# Since: 2.12
+##
+{ 'enum': 'SshHostKeyCheckMode',
+  'data': [ 'none', 'hash', 'known_hosts' ] }
+
+##
+# @SshHostKeyCheckHashType:
+#
+# @md5: The given hash is an md5 hash
+# @sha1: The given hash is an sha1 hash
+#
+# Since: 2.12
+##
+{ 'enum': 'SshHostKeyCheckHashType',
+  'data': [ 'md5', 'sha1' ] }
+
+##
+# @SshHostKeyHash:
+#
+# @type: The hash algorithm used for the hash
+# @hash: The expected hash value
+#
+# Since: 2.12
+##
+{ 'struct': 'SshHostKeyHash',
+  'data': { 'type': 'SshHostKeyCheckHashType',
+            'hash': 'str' }}
+
+##
+# @SshHostKeyCheck:
+#
+# Since: 2.12
+##
+{ 'union': 'SshHostKeyCheck',
+  'base': { 'mode': 'SshHostKeyCheckMode' },
+  'discriminator': 'mode',
+  'data': { 'hash': 'SshHostKeyHash' } }
+
+##
+# @BlockdevOptionsSsh:
+#
+# @server:              host address
+#
+# @path:                path to the image on the host
+#
+# @user:                user as which to connect, defaults to current
+#                       local user name
+#
+# @host-key-check:      Defines how and what to check the host key against
+#                       (default: known_hosts)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsSsh',
+  'data': { 'server': 'InetSocketAddress',
+            'path': 'str',
+            '*user': 'str',
+            '*host-key-check': 'SshHostKeyCheck' } }
+
+
+##
+# @BlkdebugEvent:
+#
+# Trigger events supported by blkdebug.
+#
+# @l1_shrink_write_table: write zeros to the l1 table to shrink image.
+#                         (since 2.11)
+#
+# @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11)
+#
+# @cor_write: a write due to copy-on-read (since 2.11)
+#
+# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
+#
+# @none: triggers once at creation of the blkdebug node (since 4.1)
+#
+# Since: 2.9
+##
+{ 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
+  'data': [ 'l1_update', 'l1_grow_alloc_table', 'l1_grow_write_table',
+            'l1_grow_activate_table', 'l2_load', 'l2_update',
+            'l2_update_compressed', 'l2_alloc_cow_read', 'l2_alloc_write',
+            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
+            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
+            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
+            'refblock_load', 'refblock_update', 'refblock_update_part',
+            'refblock_alloc', 'refblock_alloc_hookup', 'refblock_alloc_write',
+            'refblock_alloc_write_blocks', 'refblock_alloc_write_table',
+            'refblock_alloc_switch_table', 'cluster_alloc',
+            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
+            'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head',
+            'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
+            'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
+            'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
+            'cor_write', 'cluster_alloc_space', 'none'] }
+
+##
+# @BlkdebugIOType:
+#
+# Kinds of I/O that blkdebug can inject errors in.
+#
+# @read: .bdrv_co_preadv()
+#
+# @write: .bdrv_co_pwritev()
+#
+# @write-zeroes: .bdrv_co_pwrite_zeroes()
+#
+# @discard: .bdrv_co_pdiscard()
+#
+# @flush: .bdrv_co_flush_to_disk()
+#
+# @block-status: .bdrv_co_block_status()
+#
+# Since: 4.1
+##
+{ 'enum': 'BlkdebugIOType', 'prefix': 'BLKDEBUG_IO_TYPE',
+  'data': [ 'read', 'write', 'write-zeroes', 'discard', 'flush',
+            'block-status' ] }
+
+##
+# @BlkdebugInjectErrorOptions:
+#
+# Describes a single error injection for blkdebug.
+#
+# @event: trigger event
+#
+# @state: the state identifier blkdebug needs to be in to
+#         actually trigger the event; defaults to "any"
+#
+# @iotype: the type of I/O operations on which this error should
+#          be injected; defaults to "all read, write,
+#          write-zeroes, discard, and flush operations"
+#          (since: 4.1)
+#
+# @errno: error identifier (errno) to be returned; defaults to
+#         EIO
+#
+# @sector: specifies the sector index which has to be affected
+#          in order to actually trigger the event; defaults to "any
+#          sector"
+#
+# @once: disables further events after this one has been
+#        triggered; defaults to false
+#
+# @immediately: fail immediately; defaults to false
+#
+# Since: 2.9
+##
+{ 'struct': 'BlkdebugInjectErrorOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            '*iotype': 'BlkdebugIOType',
+            '*errno': 'int',
+            '*sector': 'int',
+            '*once': 'bool',
+            '*immediately': 'bool' } }
+
+##
+# @BlkdebugSetStateOptions:
+#
+# Describes a single state-change event for blkdebug.
+#
+# @event: trigger event
+#
+# @state: the current state identifier blkdebug needs to be in;
+#         defaults to "any"
+#
+# @new_state: the state identifier blkdebug is supposed to assume if
+#             this event is triggered
+#
+# Since: 2.9
+##
+{ 'struct': 'BlkdebugSetStateOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            'new_state': 'int' } }
+
+##
+# @BlockdevOptionsBlkdebug:
+#
+# Driver specific block device options for blkdebug.
+#
+# @image: underlying raw block device (or image file)
+#
+# @config: filename of the configuration file
+#
+# @align: required alignment for requests in bytes, must be
+#         positive power of 2, or 0 for default
+#
+# @max-transfer: maximum size for I/O transfers in bytes, must be
+#                positive multiple of @align and of the underlying
+#                file's request alignment (but need not be a power of
+#                2), or 0 for default (since 2.10)
+#
+# @opt-write-zero: preferred alignment for write zero requests in bytes,
+#                  must be positive multiple of @align and of the
+#                  underlying file's request alignment (but need not be a
+#                  power of 2), or 0 for default (since 2.10)
+#
+# @max-write-zero: maximum size for write zero requests in bytes, must be
+#                  positive multiple of @align, of @opt-write-zero, and of
+#                  the underlying file's request alignment (but need not
+#                  be a power of 2), or 0 for default (since 2.10)
+#
+# @opt-discard: preferred alignment for discard requests in bytes, must
+#               be positive multiple of @align and of the underlying
+#               file's request alignment (but need not be a power of
+#               2), or 0 for default (since 2.10)
+#
+# @max-discard: maximum size for discard requests in bytes, must be
+#               positive multiple of @align, of @opt-discard, and of
+#               the underlying file's request alignment (but need not
+#               be a power of 2), or 0 for default (since 2.10)
+#
+# @inject-error: array of error injection descriptions
+#
+# @set-state: array of state-change descriptions
+#
+# @take-child-perms: Permissions to take on @image in addition to what
+#                    is necessary anyway (which depends on how the
+#                    blkdebug node is used).  Defaults to none.
+#                    (since 5.0)
+#
+# @unshare-child-perms: Permissions not to share on @image in addition
+#                       to what cannot be shared anyway (which depends
+#                       on how the blkdebug node is used).  Defaults
+#                       to none.  (since 5.0)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsBlkdebug',
+  'data': { 'image': 'BlockdevRef',
+            '*config': 'str',
+            '*align': 'int', '*max-transfer': 'int32',
+            '*opt-write-zero': 'int32', '*max-write-zero': 'int32',
+            '*opt-discard': 'int32', '*max-discard': 'int32',
+            '*inject-error': ['BlkdebugInjectErrorOptions'],
+            '*set-state': ['BlkdebugSetStateOptions'],
+            '*take-child-perms': ['BlockPermission'],
+            '*unshare-child-perms': ['BlockPermission'] } }
+
+##
+# @BlockdevOptionsBlklogwrites:
+#
+# Driver specific block device options for blklogwrites.
+#
+# @file: block device
+#
+# @log: block device used to log writes to @file
+#
+# @log-sector-size: sector size used in logging writes to @file, determines
+#                   granularity of offsets and sizes of writes (default: 512)
+#
+# @log-append: append to an existing log (default: false)
+#
+# @log-super-update-interval: interval of write requests after which the log
+#                             super block is updated to disk (default: 4096)
+#
+# Since: 3.0
+##
+{ 'struct': 'BlockdevOptionsBlklogwrites',
+  'data': { 'file': 'BlockdevRef',
+            'log': 'BlockdevRef',
+            '*log-sector-size': 'uint32',
+            '*log-append': 'bool',
+            '*log-super-update-interval': 'uint64' } }
+
+##
+# @BlockdevOptionsBlkverify:
+#
+# Driver specific block device options for blkverify.
+#
+# @test: block device to be tested
+#
+# @raw: raw image used for verification
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsBlkverify',
+  'data': { 'test': 'BlockdevRef',
+            'raw': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsBlkreplay:
+#
+# Driver specific block device options for blkreplay.
+#
+# @image: disk image which should be controlled with blkreplay
+#
+# Since: 4.2
+##
+{ 'struct': 'BlockdevOptionsBlkreplay',
+  'data': { 'image': 'BlockdevRef' } }
+
+##
+# @QuorumReadPattern:
+#
+# An enumeration of quorum read patterns.
+#
+# @quorum: read all the children and do a quorum vote on reads
+#
+# @fifo: read only from the first child that has not failed
+#
+# Since: 2.9
+##
+{ 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] }
+
+##
+# @BlockdevOptionsQuorum:
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify: true if the driver must print content mismatch
+#                  set to false by default
+#
+# @children: the children block devices to use
+#
+# @vote-threshold: the vote limit under which a read will fail
+#
+# @rewrite-corrupted: rewrite corrupted data when quorum is reached
+#                     (Since 2.1)
+#
+# @read-pattern: choose read pattern and set to quorum by default
+#                (Since 2.2)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsQuorum',
+  'data': { '*blkverify': 'bool',
+            'children': [ 'BlockdevRef' ],
+            'vote-threshold': 'int',
+            '*rewrite-corrupted': 'bool',
+            '*read-pattern': 'QuorumReadPattern' } }
+
+##
+# @BlockdevOptionsGluster:
+#
+# Driver specific block device options for Gluster
+#
+# @volume: name of gluster volume where VM image resides
+#
+# @path: absolute path to image file in gluster volume
+#
+# @server: gluster servers description
+#
+# @debug: libgfapi log level (default '4' which is Error)
+#         (Since 2.8)
+#
+# @logfile: libgfapi log file (default /dev/stderr) (Since 2.8)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsGluster',
+  'data': { 'volume': 'str',
+            'path': 'str',
+            'server': ['SocketAddress'],
+            '*debug': 'int',
+            '*logfile': 'str' } }
+
+##
+# @IscsiTransport:
+#
+# An enumeration of libiscsi transport types
+#
+# Since: 2.9
+##
+{ 'enum': 'IscsiTransport',
+  'data': [ 'tcp', 'iser' ] }
+
+##
+# @IscsiHeaderDigest:
+#
+# An enumeration of header digests supported by libiscsi
+#
+# Since: 2.9
+##
+{ 'enum': 'IscsiHeaderDigest',
+  'prefix': 'QAPI_ISCSI_HEADER_DIGEST',
+  'data': [ 'crc32c', 'none', 'crc32c-none', 'none-crc32c' ] }
+
+##
+# @BlockdevOptionsIscsi:
+#
+# @transport: The iscsi transport type
+#
+# @portal: The address of the iscsi portal
+#
+# @target: The target iqn name
+#
+# @lun: LUN to connect to. Defaults to 0.
+#
+# @user: User name to log in with. If omitted, no CHAP
+#        authentication is performed.
+#
+# @password-secret: The ID of a QCryptoSecret object providing
+#                   the password for the login. This option is required if
+#                   @user is specified.
+#
+# @initiator-name: The iqn name we want to identify to the target
+#                  as. If this option is not specified, an initiator name is
+#                  generated automatically.
+#
+# @header-digest: The desired header digest. Defaults to
+#                 none-crc32c.
+#
+# @timeout: Timeout in seconds after which a request will
+#           timeout. 0 means no timeout and is the default.
+#
+# Driver specific block device options for iscsi
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsIscsi',
+  'data': { 'transport': 'IscsiTransport',
+            'portal': 'str',
+            'target': 'str',
+            '*lun': 'int',
+            '*user': 'str',
+            '*password-secret': 'str',
+            '*initiator-name': 'str',
+            '*header-digest': 'IscsiHeaderDigest',
+            '*timeout': 'int' } }
+
+
+##
+# @RbdAuthMode:
+#
+# Since: 3.0
+##
+{ 'enum': 'RbdAuthMode',
+  'data': [ 'cephx', 'none' ] }
+
+##
+# @BlockdevOptionsRbd:
+#
+# @pool: Ceph pool name.
+#
+# @namespace: Rados namespace name in the Ceph pool. (Since 5.0)
+#
+# @image: Image name in the Ceph pool.
+#
+# @conf: path to Ceph configuration file.  Values
+#        in the configuration file will be overridden by
+#        options specified via QAPI.
+#
+# @snapshot: Ceph snapshot name.
+#
+# @user: Ceph id name.
+#
+# @auth-client-required: Acceptable authentication modes.
+#                        This maps to Ceph configuration option
+#                        "auth_client_required".  (Since 3.0)
+#
+# @key-secret: ID of a QCryptoSecret object providing a key
+#              for cephx authentication.
+#              This maps to Ceph configuration option
+#              "key".  (Since 3.0)
+#
+# @server: Monitor host address and port.  This maps
+#          to the "mon_host" Ceph option.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsRbd',
+  'data': { 'pool': 'str',
+            '*namespace': 'str',
+            'image': 'str',
+            '*conf': 'str',
+            '*snapshot': 'str',
+            '*user': 'str',
+            '*auth-client-required': ['RbdAuthMode'],
+            '*key-secret': 'str',
+            '*server': ['InetSocketAddressBase'] } }
+
+##
+# @BlockdevOptionsSheepdog:
+#
+# Driver specific block device options for sheepdog
+#
+# @vdi: Virtual disk image name
+# @server: The Sheepdog server to connect to
+# @snap-id: Snapshot ID
+# @tag: Snapshot tag name
+#
+# Only one of @snap-id and @tag may be present.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsSheepdog',
+  'data': { 'server': 'SocketAddress',
+            'vdi': 'str',
+            '*snap-id': 'uint32',
+            '*tag': 'str' } }
+
+##
+# @ReplicationMode:
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.9
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ],
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @BlockdevOptionsReplication:
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# @top-id: In secondary mode, node name or device ID of the root
+#          node who owns the replication node chain. Must not be given in
+#          primary mode.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode',
+            '*top-id': 'str' },
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @NFSTransport:
+#
+# An enumeration of NFS transport types
+#
+# @inet: TCP transport
+#
+# Since: 2.9
+##
+{ 'enum': 'NFSTransport',
+  'data': [ 'inet' ] }
+
+##
+# @NFSServer:
+#
+# Captures the address of the socket
+#
+# @type: transport type used for NFS (only TCP supported)
+#
+# @host: host address for NFS server
+#
+# Since: 2.9
+##
+{ 'struct': 'NFSServer',
+  'data': { 'type': 'NFSTransport',
+            'host': 'str' } }
+
+##
+# @BlockdevOptionsNfs:
+#
+# Driver specific block device option for NFS
+#
+# @server: host address
+#
+# @path: path of the image on the host
+#
+# @user: UID value to use when talking to the
+#        server (defaults to 65534 on Windows and getuid()
+#        on unix)
+#
+# @group: GID value to use when talking to the
+#         server (defaults to 65534 on Windows and getgid()
+#         in unix)
+#
+# @tcp-syn-count: number of SYNs during the session
+#                 establishment (defaults to libnfs default)
+#
+# @readahead-size: set the readahead size in bytes (defaults
+#                  to libnfs default)
+#
+# @page-cache-size: set the pagecache size in bytes (defaults
+#                   to libnfs default)
+#
+# @debug: set the NFS debug level (max 2) (defaults
+#         to libnfs default)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsNfs',
+  'data': { 'server': 'NFSServer',
+            'path': 'str',
+            '*user': 'int',
+            '*group': 'int',
+            '*tcp-syn-count': 'int',
+            '*readahead-size': 'int',
+            '*page-cache-size': 'int',
+            '*debug': 'int' } }
+
+##
+# @BlockdevOptionsCurlBase:
+#
+# Driver specific block device options shared by all protocols supported by the
+# curl backend.
+#
+# @url: URL of the image file
+#
+# @readahead: Size of the read-ahead cache; must be a multiple of
+#             512 (defaults to 256 kB)
+#
+# @timeout: Timeout for connections, in seconds (defaults to 5)
+#
+# @username: Username for authentication (defaults to none)
+#
+# @password-secret: ID of a QCryptoSecret object providing a password
+#                   for authentication (defaults to no password)
+#
+# @proxy-username: Username for proxy authentication (defaults to none)
+#
+# @proxy-password-secret: ID of a QCryptoSecret object providing a password
+#                         for proxy authentication (defaults to no password)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlBase',
+  'data': { 'url': 'str',
+            '*readahead': 'int',
+            '*timeout': 'int',
+            '*username': 'str',
+            '*password-secret': 'str',
+            '*proxy-username': 'str',
+            '*proxy-password-secret': 'str' } }
+
+##
+# @BlockdevOptionsCurlHttp:
+#
+# Driver specific block device options for HTTP connections over the curl
+# backend.  URLs must start with "http://".
+#
+# @cookie: List of cookies to set; format is
+#          "name1=content1; name2=content2;" as explained by
+#          CURLOPT_COOKIE(3). Defaults to no cookies.
+#
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
+#                 secure way. See @cookie for the format. (since 2.10)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlHttp',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*cookie': 'str',
+            '*cookie-secret': 'str'} }
+
+##
+# @BlockdevOptionsCurlHttps:
+#
+# Driver specific block device options for HTTPS connections over the curl
+# backend.  URLs must start with "https://".
+#
+# @cookie: List of cookies to set; format is
+#          "name1=content1; name2=content2;" as explained by
+#          CURLOPT_COOKIE(3). Defaults to no cookies.
+#
+# @sslverify: Whether to verify the SSL certificate's validity (defaults to
+#             true)
+#
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
+#                 secure way. See @cookie for the format. (since 2.10)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlHttps',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*cookie': 'str',
+            '*sslverify': 'bool',
+            '*cookie-secret': 'str'} }
+
+##
+# @BlockdevOptionsCurlFtp:
+#
+# Driver specific block device options for FTP connections over the curl
+# backend.  URLs must start with "ftp://".
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlFtp',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { } }
+
+##
+# @BlockdevOptionsCurlFtps:
+#
+# Driver specific block device options for FTPS connections over the curl
+# backend.  URLs must start with "ftps://".
+#
+# @sslverify: Whether to verify the SSL certificate's validity (defaults to
+#             true)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlFtps',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*sslverify': 'bool' } }
+
+##
+# @BlockdevOptionsNbd:
+#
+# Driver specific block device options for NBD.
+#
+# @server: NBD server address
+#
+# @export: export name
+#
+# @tls-creds: TLS credentials ID
+#
+# @x-dirty-bitmap: A metadata context name such as "qemu:dirty-bitmap:NAME"
+#                  or "qemu:allocation-depth" to query in place of the
+#                  traditional "base:allocation" block status (see
+#                  NBD_OPT_LIST_META_CONTEXT in the NBD protocol; and
+#                  yes, naming this option x-context would have made
+#                  more sense) (since 3.0)
+#
+# @reconnect-delay: On an unexpected disconnect, the nbd client tries to
+#                   connect again until succeeding or encountering a serious
+#                   error.  During the first @reconnect-delay seconds, all
+#                   requests are paused and will be rerun on a successful
+#                   reconnect. After that time, any delayed requests and all
+#                   future requests before a successful reconnect will
+#                   immediately fail. Default 0 (Since 4.2)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsNbd',
+  'data': { 'server': 'SocketAddress',
+            '*export': 'str',
+            '*tls-creds': 'str',
+            '*x-dirty-bitmap': 'str',
+            '*reconnect-delay': 'uint32' } }
+
+##
+# @BlockdevOptionsRaw:
+#
+# Driver specific block device options for the raw driver.
+#
+# @offset: position where the block device starts
+# @size: the assumed size of the device
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsRaw',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*offset': 'int', '*size': 'int' } }
+
+##
+# @BlockdevOptionsThrottle:
+#
+# Driver specific block device options for the throttle driver
+#
+# @throttle-group: the name of the throttle-group object to use. It
+#                  must already exist.
+# @file: reference to or definition of the data source block device
+# Since: 2.11
+##
+{ 'struct': 'BlockdevOptionsThrottle',
+  'data': { 'throttle-group': 'str',
+            'file' : 'BlockdevRef'
+             } }
+##
+# @BlockdevOptions:
+#
+# Options for creating a block device.  Many options are available for all
+# block devices, independent of the block driver:
+#
+# @driver: block driver name
+# @node-name: the node name of the new node (Since 2.0).
+#             This option is required on the top level of blockdev-add.
+#             Valid node names start with an alphabetic character and may
+#             contain only alphanumeric characters, '-', '.' and '_'. Their
+#             maximum length is 31 characters.
+# @discard: discard-related options (default: ignore)
+# @cache: cache-related options
+# @read-only: whether the block device should be read-only (default: false).
+#             Note that some block drivers support only read-only access,
+#             either generally or in certain configurations. In this case,
+#             the default value does not work and the option must be
+#             specified explicitly.
+# @auto-read-only: if true and @read-only is false, QEMU may automatically
+#                  decide not to open the image read-write as requested, but
+#                  fall back to read-only instead (and switch between the modes
+#                  later), e.g. depending on whether the image file is writable
+#                  or whether a writing user is attached to the node
+#                  (default: false, since 3.1)
+# @detect-zeroes: detect and optimize zero writes (Since 2.1)
+#                 (default: off)
+# @force-share: force share all permission on added nodes.
+#               Requires read-only=true. (Since 2.10)
+#
+# Remaining options are determined by the block driver.
+#
+# Since: 2.9
+##
+{ 'union': 'BlockdevOptions',
+  'base': { 'driver': 'BlockdevDriver',
+            '*node-name': 'str',
+            '*discard': 'BlockdevDiscardOptions',
+            '*cache': 'BlockdevCacheOptions',
+            '*read-only': 'bool',
+            '*auto-read-only': 'bool',
+            '*force-share': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' },
+  'discriminator': 'driver',
+  'data': {
+      'blkdebug':   'BlockdevOptionsBlkdebug',
+      'blklogwrites':'BlockdevOptionsBlklogwrites',
+      'blkverify':  'BlockdevOptionsBlkverify',
+      'blkreplay':  'BlockdevOptionsBlkreplay',
+      'bochs':      'BlockdevOptionsGenericFormat',
+      'cloop':      'BlockdevOptionsGenericFormat',
+      'compress':   'BlockdevOptionsGenericFormat',
+      'copy-on-read':'BlockdevOptionsGenericFormat',
+      'dmg':        'BlockdevOptionsGenericFormat',
+      'file':       'BlockdevOptionsFile',
+      'ftp':        'BlockdevOptionsCurlFtp',
+      'ftps':       'BlockdevOptionsCurlFtps',
+      'gluster':    'BlockdevOptionsGluster',
+      'host_cdrom': 'BlockdevOptionsFile',
+      'host_device':'BlockdevOptionsFile',
+      'http':       'BlockdevOptionsCurlHttp',
+      'https':      'BlockdevOptionsCurlHttps',
+      'iscsi':      'BlockdevOptionsIscsi',
+      'luks':       'BlockdevOptionsLUKS',
+      'nbd':        'BlockdevOptionsNbd',
+      'nfs':        'BlockdevOptionsNfs',
+      'null-aio':   'BlockdevOptionsNull',
+      'null-co':    'BlockdevOptionsNull',
+      'nvme':       'BlockdevOptionsNVMe',
+      'parallels':  'BlockdevOptionsGenericFormat',
+      'qcow2':      'BlockdevOptionsQcow2',
+      'qcow':       'BlockdevOptionsQcow',
+      'qed':        'BlockdevOptionsGenericCOWFormat',
+      'quorum':     'BlockdevOptionsQuorum',
+      'raw':        'BlockdevOptionsRaw',
+      'rbd':        'BlockdevOptionsRbd',
+      'replication': { 'type': 'BlockdevOptionsReplication',
+                       'if': 'defined(CONFIG_REPLICATION)' },
+      'sheepdog':   'BlockdevOptionsSheepdog',
+      'ssh':        'BlockdevOptionsSsh',
+      'throttle':   'BlockdevOptionsThrottle',
+      'vdi':        'BlockdevOptionsGenericFormat',
+      'vhdx':       'BlockdevOptionsGenericFormat',
+      'vmdk':       'BlockdevOptionsGenericCOWFormat',
+      'vpc':        'BlockdevOptionsGenericFormat',
+      'vvfat':      'BlockdevOptionsVVFAT'
+  } }
+
+##
+# @BlockdevRef:
+#
+# Reference to a block device.
+#
+# @definition: defines a new block device inline
+# @reference: references the ID of an existing block device
+#
+# Since: 2.9
+##
+{ 'alternate': 'BlockdevRef',
+  'data': { 'definition': 'BlockdevOptions',
+            'reference': 'str' } }
+
+##
+# @BlockdevRefOrNull:
+#
+# Reference to a block device.
+#
+# @definition: defines a new block device inline
+# @reference: references the ID of an existing block device.
+#             An empty string means that no block device should
+#             be referenced.  Deprecated; use null instead.
+# @null: No block device should be referenced (since 2.10)
+#
+# Since: 2.9
+##
+{ 'alternate': 'BlockdevRefOrNull',
+  'data': { 'definition': 'BlockdevOptions',
+            'reference': 'str',
+            'null': 'null' } }
+
+##
+# @blockdev-add:
+#
+# Creates a new block device.
+#
+# Since: 2.9
+#
+# Example:
+#
+# 1.
+# -> { "execute": "blockdev-add",
+#      "arguments": {
+#           "driver": "qcow2",
+#           "node-name": "test1",
+#           "file": {
+#               "driver": "file",
+#               "filename": "test.qcow2"
+#            }
+#       }
+#     }
+# <- { "return": {} }
+#
+# 2.
+# -> { "execute": "blockdev-add",
+#      "arguments": {
+#           "driver": "qcow2",
+#           "node-name": "node0",
+#           "discard": "unmap",
+#           "cache": {
+#              "direct": true
+#            },
+#            "file": {
+#              "driver": "file",
+#              "filename": "/tmp/test.qcow2"
+#            },
+#            "backing": {
+#               "driver": "raw",
+#               "file": {
+#                  "driver": "file",
+#                  "filename": "/dev/fdset/4"
+#                }
+#            }
+#        }
+#      }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
+
+##
+# @x-blockdev-reopen:
+#
+# Reopens a block device using the given set of options. Any option
+# not specified will be reset to its default value regardless of its
+# previous status. If an option cannot be changed or a particular
+# driver does not support reopening then the command will return an
+# error.
+#
+# The top-level @node-name option (from BlockdevOptions) must be
+# specified and is used to select the block device to be reopened.
+# Other @node-name options must be either omitted or set to the
+# current name of the appropriate node. This command won't change any
+# node name and any attempt to do it will result in an error.
+#
+# In the case of options that refer to child nodes, the behavior of
+# this command depends on the value:
+#
+#  1) A set of options (BlockdevOptions): the child is reopened with
+#     the specified set of options.
+#
+#  2) A reference to the current child: the child is reopened using
+#     its existing set of options.
+#
+#  3) A reference to a different node: the current child is replaced
+#     with the specified one.
+#
+#  4) NULL: the current child (if any) is detached.
+#
+# Options (1) and (2) are supported in all cases, but at the moment
+# only @backing allows replacing or detaching an existing child.
+#
+# Unlike with blockdev-add, the @backing option must always be present
+# unless the node being reopened does not have a backing file and its
+# image does not have a default backing file name as part of its
+# metadata.
+#
+# Since: 4.0
+##
+{ 'command': 'x-blockdev-reopen',
+  'data': 'BlockdevOptions', 'boxed': true }
+
+##
+# @blockdev-del:
+#
+# Deletes a block device that has been added using blockdev-add.
+# The command will fail if the node is attached to a device or is
+# otherwise being used.
+#
+# @node-name: Name of the graph node to delete.
+#
+# Since: 2.9
+#
+# Example:
+#
+# -> { "execute": "blockdev-add",
+#      "arguments": {
+#           "driver": "qcow2",
+#           "node-name": "node0",
+#           "file": {
+#               "driver": "file",
+#               "filename": "test.qcow2"
+#           }
+#      }
+#    }
+# <- { "return": {} }
+#
+# -> { "execute": "blockdev-del",
+#      "arguments": { "node-name": "node0" }
+#    }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-del', 'data': { 'node-name': 'str' } }
+
+##
+# @BlockdevCreateOptionsFile:
+#
+# Driver specific image creation options for file.
+#
+# @filename: Filename for the new image file
+# @size: Size of the virtual disk in bytes
+# @preallocation: Preallocation mode for the new image (default: off;
+#                 allowed values: off,
+#                 falloc (if defined CONFIG_POSIX_FALLOCATE),
+#                 full (if defined CONFIG_POSIX))
+# @nocow: Turn off copy-on-write (valid only on btrfs; default: off)
+# @extent-size-hint: Extent size hint to add to the image file; 0 for not
+#                    adding an extent size hint (default: 1 MB, since 5.1)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsFile',
+  'data': { 'filename':             'str',
+            'size':                 'size',
+            '*preallocation':       'PreallocMode',
+            '*nocow':               'bool',
+            '*extent-size-hint':    'size'} }
+
+##
+# @BlockdevCreateOptionsGluster:
+#
+# Driver specific image creation options for gluster.
+#
+# @location: Where to store the new image file
+# @size: Size of the virtual disk in bytes
+# @preallocation: Preallocation mode for the new image (default: off;
+#                 allowed values: off,
+#                 falloc (if defined CONFIG_GLUSTERFS_FALLOCATE),
+#                 full (if defined CONFIG_GLUSTERFS_ZEROFILL))
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsGluster',
+  'data': { 'location':         'BlockdevOptionsGluster',
+            'size':             'size',
+            '*preallocation':   'PreallocMode' } }
+
+##
+# @BlockdevCreateOptionsLUKS:
+#
+# Driver specific image creation options for LUKS.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @preallocation: Preallocation mode for the new image
+#                 (since: 4.2)
+#                 (default: off; allowed values: off, metadata, falloc, full)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsLUKS',
+  'base': 'QCryptoBlockCreateOptionsLUKS',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*preallocation':   'PreallocMode' } }
+
+##
+# @BlockdevCreateOptionsNfs:
+#
+# Driver specific image creation options for NFS.
+#
+# @location: Where to store the new image file
+# @size: Size of the virtual disk in bytes
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsNfs',
+  'data': { 'location':         'BlockdevOptionsNfs',
+            'size':             'size' } }
+
+##
+# @BlockdevCreateOptionsParallels:
+#
+# Driver specific image creation options for parallels.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @cluster-size: Cluster size in bytes (default: 1 MB)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsParallels',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*cluster-size':    'size' } }
+
+##
+# @BlockdevCreateOptionsQcow:
+#
+# Driver specific image creation options for qcow.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @backing-file: File name of the backing file if a backing file
+#                should be used
+# @encrypt: Encryption options if the image should be encrypted
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsQcow',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*backing-file':    'str',
+            '*encrypt':         'QCryptoBlockCreateOptions' } }
+
+##
+# @BlockdevQcow2Version:
+#
+# @v2:  The original QCOW2 format as introduced in qemu 0.10 (version 2)
+# @v3:  The extended QCOW2 format as introduced in qemu 1.1 (version 3)
+#
+# Since: 2.12
+##
+{ 'enum': 'BlockdevQcow2Version',
+  'data': [ 'v2', 'v3' ] }
+
+
+##
+# @Qcow2CompressionType:
+#
+# Compression type used in qcow2 image file
+#
+# @zlib: zlib compression, see 
+# @zstd: zstd compression, see 
+#
+# Since: 5.1
+##
+{ 'enum': 'Qcow2CompressionType',
+  'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+
+##
+# @BlockdevCreateOptionsQcow2:
+#
+# Driver specific image creation options for qcow2.
+#
+# @file: Node to create the image format on
+# @data-file: Node to use as an external data file in which all guest
+#             data is stored so that only metadata remains in the qcow2
+#             file (since: 4.0)
+# @data-file-raw: True if the external data file must stay valid as a
+#                 standalone (read-only) raw image without looking at qcow2
+#                 metadata (default: false; since: 4.0)
+# @extended-l2: True to make the image have extended L2 entries
+#               (default: false; since 5.2)
+# @size: Size of the virtual disk in bytes
+# @version: Compatibility level (default: v3)
+# @backing-file: File name of the backing file if a backing file
+#                should be used
+# @backing-fmt: Name of the block driver to use for the backing file
+# @encrypt: Encryption options if the image should be encrypted
+# @cluster-size: qcow2 cluster size in bytes (default: 65536)
+# @preallocation: Preallocation mode for the new image (default: off;
+#                 allowed values: off, falloc, full, metadata)
+# @lazy-refcounts: True if refcounts may be updated lazily (default: off)
+# @refcount-bits: Width of reference counts in bits (default: 16)
+# @compression-type: The image cluster compression method
+#                    (default: zlib, since 5.1)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsQcow2',
+  'data': { 'file':             'BlockdevRef',
+            '*data-file':       'BlockdevRef',
+            '*data-file-raw':   'bool',
+            '*extended-l2':     'bool',
+            'size':             'size',
+            '*version':         'BlockdevQcow2Version',
+            '*backing-file':    'str',
+            '*backing-fmt':     'BlockdevDriver',
+            '*encrypt':         'QCryptoBlockCreateOptions',
+            '*cluster-size':    'size',
+            '*preallocation':   'PreallocMode',
+            '*lazy-refcounts':  'bool',
+            '*refcount-bits':   'int',
+            '*compression-type':'Qcow2CompressionType' } }
+
+##
+# @BlockdevCreateOptionsQed:
+#
+# Driver specific image creation options for qed.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @backing-file: File name of the backing file if a backing file
+#                should be used
+# @backing-fmt: Name of the block driver to use for the backing file
+# @cluster-size: Cluster size in bytes (default: 65536)
+# @table-size: L1/L2 table size (in clusters)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsQed',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*backing-file':    'str',
+            '*backing-fmt':     'BlockdevDriver',
+            '*cluster-size':    'size',
+            '*table-size':      'int' } }
+
+##
+# @BlockdevCreateOptionsRbd:
+#
+# Driver specific image creation options for rbd/Ceph.
+#
+# @location: Where to store the new image file. This location cannot
+#            point to a snapshot.
+# @size: Size of the virtual disk in bytes
+# @cluster-size: RBD object size
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsRbd',
+  'data': { 'location':         'BlockdevOptionsRbd',
+            'size':             'size',
+            '*cluster-size' :   'size' } }
+
+##
+# @BlockdevVmdkSubformat:
+#
+# Subformat options for VMDK images
+#
+# @monolithicSparse:     Single file image with sparse cluster allocation
+#
+# @monolithicFlat:       Single flat data image and a descriptor file
+#
+# @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA) sparse extent
+#                        files, in addition to a descriptor file
+#
+# @twoGbMaxExtentFlat:   Data is split into 2GB (per virtual LBA) flat extent
+#                        files, in addition to a descriptor file
+#
+# @streamOptimized:      Single file image sparse cluster allocation, optimized
+#                        for streaming over network.
+#
+# Since: 4.0
+##
+{ 'enum': 'BlockdevVmdkSubformat',
+  'data': [ 'monolithicSparse', 'monolithicFlat', 'twoGbMaxExtentSparse',
+            'twoGbMaxExtentFlat', 'streamOptimized'] }
+
+##
+# @BlockdevVmdkAdapterType:
+#
+# Adapter type info for VMDK images
+#
+# Since: 4.0
+##
+{ 'enum': 'BlockdevVmdkAdapterType',
+  'data': [ 'ide', 'buslogic', 'lsilogic', 'legacyESX'] }
+
+##
+# @BlockdevCreateOptionsVmdk:
+#
+# Driver specific image creation options for VMDK.
+#
+# @file: Where to store the new image file. This refers to the image
+#        file for monolithcSparse and streamOptimized format, or the
+#        descriptor file for other formats.
+# @size: Size of the virtual disk in bytes
+# @extents: Where to store the data extents. Required for monolithcFlat,
+#           twoGbMaxExtentSparse and twoGbMaxExtentFlat formats. For
+#           monolithicFlat, only one entry is required; for
+#           twoGbMaxExtent* formats, the number of entries required is
+#           calculated as extent_number = virtual_size / 2GB. Providing
+#           more extents than will be used is an error.
+# @subformat: The subformat of the VMDK image. Default: "monolithicSparse".
+# @backing-file: The path of backing file. Default: no backing file is used.
+# @adapter-type: The adapter type used to fill in the descriptor. Default: ide.
+# @hwversion: Hardware version. The meaningful options are "4" or "6".
+#             Default: "4".
+# @zeroed-grain: Whether to enable zeroed-grain feature for sparse subformats.
+#                Default: false.
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockdevCreateOptionsVmdk',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*extents':          ['BlockdevRef'],
+            '*subformat':       'BlockdevVmdkSubformat',
+            '*backing-file':    'str',
+            '*adapter-type':    'BlockdevVmdkAdapterType',
+            '*hwversion':       'str',
+            '*zeroed-grain':    'bool' } }
+
+
+##
+# @SheepdogRedundancyType:
+#
+# @full: Create a fully replicated vdi with x copies
+# @erasure-coded: Create an erasure coded vdi with x data strips and
+#                 y parity strips
+#
+# Since: 2.12
+##
+{ 'enum': 'SheepdogRedundancyType',
+  'data': [ 'full', 'erasure-coded' ] }
+
+##
+# @SheepdogRedundancyFull:
+#
+# @copies: Number of copies to use (between 1 and 31)
+#
+# Since: 2.12
+##
+{ 'struct': 'SheepdogRedundancyFull',
+  'data': { 'copies': 'int' }}
+
+##
+# @SheepdogRedundancyErasureCoded:
+#
+# @data-strips: Number of data strips to use (one of {2,4,8,16})
+# @parity-strips: Number of parity strips to use (between 1 and 15)
+#
+# Since: 2.12
+##
+{ 'struct': 'SheepdogRedundancyErasureCoded',
+  'data': { 'data-strips': 'int',
+            'parity-strips': 'int' }}
+
+##
+# @SheepdogRedundancy:
+#
+# Since: 2.12
+##
+{ 'union': 'SheepdogRedundancy',
+  'base': { 'type': 'SheepdogRedundancyType' },
+  'discriminator': 'type',
+  'data': { 'full': 'SheepdogRedundancyFull',
+            'erasure-coded': 'SheepdogRedundancyErasureCoded' } }
+
+##
+# @BlockdevCreateOptionsSheepdog:
+#
+# Driver specific image creation options for Sheepdog.
+#
+# @location: Where to store the new image file
+# @size: Size of the virtual disk in bytes
+# @backing-file: File name of a base image
+# @preallocation: Preallocation mode for the new image (default: off;
+#                 allowed values: off, full)
+# @redundancy: Redundancy of the image
+# @object-size: Object size of the image
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsSheepdog',
+  'data': { 'location':         'BlockdevOptionsSheepdog',
+            'size':             'size',
+            '*backing-file':    'str',
+            '*preallocation':   'PreallocMode',
+            '*redundancy':      'SheepdogRedundancy',
+            '*object-size':     'size' } }
+
+##
+# @BlockdevCreateOptionsSsh:
+#
+# Driver specific image creation options for SSH.
+#
+# @location: Where to store the new image file
+# @size: Size of the virtual disk in bytes
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsSsh',
+  'data': { 'location':         'BlockdevOptionsSsh',
+            'size':             'size' } }
+
+##
+# @BlockdevCreateOptionsVdi:
+#
+# Driver specific image creation options for VDI.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @preallocation: Preallocation mode for the new image (default: off;
+#                 allowed values: off, metadata)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsVdi',
+  'data': { 'file':             'BlockdevRef',
+            'size':             'size',
+            '*preallocation':   'PreallocMode' } }
+
+##
+# @BlockdevVhdxSubformat:
+#
+# @dynamic: Growing image file
+# @fixed:   Preallocated fixed-size image file
+#
+# Since: 2.12
+##
+{ 'enum': 'BlockdevVhdxSubformat',
+  'data': [ 'dynamic', 'fixed' ] }
+
+##
+# @BlockdevCreateOptionsVhdx:
+#
+# Driver specific image creation options for vhdx.
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @log-size: Log size in bytes, must be a multiple of 1 MB
+#            (default: 1 MB)
+# @block-size: Block size in bytes, must be a multiple of 1 MB and not
+#              larger than 256 MB (default: automatically choose a block
+#              size depending on the image size)
+# @subformat: vhdx subformat (default: dynamic)
+# @block-state-zero: Force use of payload blocks of type 'ZERO'. Non-standard,
+#                    but default.  Do not set to 'off' when using 'qemu-img
+#                    convert' with subformat=dynamic.
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsVhdx',
+  'data': { 'file':                 'BlockdevRef',
+            'size':                 'size',
+            '*log-size':            'size',
+            '*block-size':          'size',
+            '*subformat':           'BlockdevVhdxSubformat',
+            '*block-state-zero':    'bool' } }
+
+##
+# @BlockdevVpcSubformat:
+#
+# @dynamic: Growing image file
+# @fixed:   Preallocated fixed-size image file
+#
+# Since: 2.12
+##
+{ 'enum': 'BlockdevVpcSubformat',
+  'data': [ 'dynamic', 'fixed' ] }
+
+##
+# @BlockdevCreateOptionsVpc:
+#
+# Driver specific image creation options for vpc (VHD).
+#
+# @file: Node to create the image format on
+# @size: Size of the virtual disk in bytes
+# @subformat: vhdx subformat (default: dynamic)
+# @force-size: Force use of the exact byte size instead of rounding to the
+#              next size that can be represented in CHS geometry
+#              (default: false)
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevCreateOptionsVpc',
+  'data': { 'file':                 'BlockdevRef',
+            'size':                 'size',
+            '*subformat':           'BlockdevVpcSubformat',
+            '*force-size':          'bool' } }
+
+##
+# @BlockdevCreateOptions:
+#
+# Options for creating an image format on a given node.
+#
+# @driver: block driver to create the image format
+#
+# Since: 2.12
+##
+{ 'union': 'BlockdevCreateOptions',
+  'base': {
+      'driver':         'BlockdevDriver' },
+  'discriminator': 'driver',
+  'data': {
+      'file':           'BlockdevCreateOptionsFile',
+      'gluster':        'BlockdevCreateOptionsGluster',
+      'luks':           'BlockdevCreateOptionsLUKS',
+      'nfs':            'BlockdevCreateOptionsNfs',
+      'parallels':      'BlockdevCreateOptionsParallels',
+      'qcow':           'BlockdevCreateOptionsQcow',
+      'qcow2':          'BlockdevCreateOptionsQcow2',
+      'qed':            'BlockdevCreateOptionsQed',
+      'rbd':            'BlockdevCreateOptionsRbd',
+      'sheepdog':       'BlockdevCreateOptionsSheepdog',
+      'ssh':            'BlockdevCreateOptionsSsh',
+      'vdi':            'BlockdevCreateOptionsVdi',
+      'vhdx':           'BlockdevCreateOptionsVhdx',
+      'vmdk':           'BlockdevCreateOptionsVmdk',
+      'vpc':            'BlockdevCreateOptionsVpc'
+  } }
+
+##
+# @blockdev-create:
+#
+# Starts a job to create an image format on a given node. The job is
+# automatically finalized, but a manual job-dismiss is required.
+#
+# @job-id:          Identifier for the newly created job.
+#
+# @options:         Options for the image creation.
+#
+# Since: 3.0
+##
+{ 'command': 'blockdev-create',
+  'data': { 'job-id': 'str',
+            'options': 'BlockdevCreateOptions' } }
+
+##
+# @BlockdevAmendOptionsLUKS:
+#
+# Driver specific image amend options for LUKS.
+#
+# Since: 5.1
+##
+{ 'struct': 'BlockdevAmendOptionsLUKS',
+  'base': 'QCryptoBlockAmendOptionsLUKS',
+  'data': { }
+}
+
+##
+# @BlockdevAmendOptionsQcow2:
+#
+# Driver specific image amend options for qcow2.
+# For now, only encryption options can be amended
+#
+# @encrypt          Encryption options to be amended
+#
+# Since: 5.1
+##
+{ 'struct': 'BlockdevAmendOptionsQcow2',
+  'data': { '*encrypt':         'QCryptoBlockAmendOptions' } }
+
+##
+# @BlockdevAmendOptions:
+#
+# Options for amending an image format
+#
+# @driver:          Block driver of the node to amend.
+#
+# Since: 5.1
+##
+{ 'union': 'BlockdevAmendOptions',
+  'base': {
+      'driver':         'BlockdevDriver' },
+  'discriminator': 'driver',
+  'data': {
+      'luks':           'BlockdevAmendOptionsLUKS',
+      'qcow2':          'BlockdevAmendOptionsQcow2' } }
+
+##
+# @x-blockdev-amend:
+#
+# Starts a job to amend format specific options of an existing open block device
+# The job is automatically finalized, but a manual job-dismiss is required.
+#
+# @job-id:          Identifier for the newly created job.
+#
+# @node-name:       Name of the block node to work on
+#
+# @options:         Options (driver specific)
+#
+# @force:           Allow unsafe operations, format specific
+#                   For luks that allows erase of the last active keyslot
+#                   (permanent loss of data),
+#                   and replacement of an active keyslot
+#                   (possible loss of data if IO error happens)
+#
+# Since: 5.1
+##
+{ 'command': 'x-blockdev-amend',
+  'data': { 'job-id': 'str',
+            'node-name': 'str',
+            'options': 'BlockdevAmendOptions',
+            '*force': 'bool' } }
+
+##
+# @BlockErrorAction:
+#
+# An enumeration of action that has been taken when a DISK I/O occurs
+#
+# @ignore: error has been ignored
+#
+# @report: error has been reported to the device
+#
+# @stop: error caused VM to be stopped
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockErrorAction',
+  'data': [ 'ignore', 'report', 'stop' ] }
+
+
+##
+# @BLOCK_IMAGE_CORRUPTED:
+#
+# Emitted when a disk image is being marked corrupt. The image can be
+# identified by its device or node name. The 'device' field is always
+# present for compatibility reasons, but it can be empty ("") if the
+# image does not have a device name associated.
+#
+# @device: device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @node-name: node name (Since: 2.4)
+#
+# @msg: informative message for human consumption, such as the kind of
+#       corruption being detected. It should not be parsed by machine as it is
+#       not guaranteed to be stable
+#
+# @offset: if the corruption resulted from an image access, this is
+#          the host's access offset into the image
+#
+# @size: if the corruption resulted from an image access, this is
+#        the access size
+#
+# @fatal: if set, the image is marked corrupt and therefore unusable after this
+#         event and must be repaired (Since 2.2; before, every
+#         BLOCK_IMAGE_CORRUPTED event was fatal)
+#
+# Note: If action is "stop", a STOP event will eventually follow the
+#       BLOCK_IO_ERROR event.
+#
+# Example:
+#
+# <- { "event": "BLOCK_IMAGE_CORRUPTED",
+#      "data": { "device": "ide0-hd0", "node-name": "node0",
+#                "msg": "Prevented active L1 table overwrite", "offset": 196608,
+#                "size": 65536 },
+#      "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+#
+# Since: 1.7
+##
+{ 'event': 'BLOCK_IMAGE_CORRUPTED',
+  'data': { 'device'     : 'str',
+            '*node-name' : 'str',
+            'msg'        : 'str',
+            '*offset'    : 'int',
+            '*size'      : 'int',
+            'fatal'      : 'bool' } }
+
+##
+# @BLOCK_IO_ERROR:
+#
+# Emitted when a disk I/O error occurs
+#
+# @device: device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @node-name: node name. Note that errors may be reported for the root node
+#             that is directly attached to a guest device rather than for the
+#             node where the error occurred. The node name is not present if
+#             the drive is empty. (Since: 2.8)
+#
+# @operation: I/O operation
+#
+# @action: action that has been taken
+#
+# @nospace: true if I/O error was caused due to a no-space
+#           condition. This key is only present if query-block's
+#           io-status is present, please see query-block documentation
+#           for more information (since: 2.2)
+#
+# @reason: human readable string describing the error cause.
+#          (This field is a debugging aid for humans, it should not
+#          be parsed by applications) (since: 2.2)
+#
+# Note: If action is "stop", a STOP event will eventually follow the
+#       BLOCK_IO_ERROR event
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <- { "event": "BLOCK_IO_ERROR",
+#      "data": { "device": "ide0-hd1",
+#                "node-name": "#block212",
+#                "operation": "write",
+#                "action": "stop" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'BLOCK_IO_ERROR',
+  'data': { 'device': 'str', '*node-name': 'str',
+            'operation': 'IoOperationType',
+            'action': 'BlockErrorAction', '*nospace': 'bool',
+            'reason': 'str' } }
+
+##
+# @BLOCK_JOB_COMPLETED:
+#
+# Emitted when a block job has completed
+#
+# @type: job type
+#
+# @device: The job identifier. Originally the device name but other
+#          values are allowed since QEMU 2.7
+#
+# @len: maximum progress value
+#
+# @offset: current progress value. On success this is equal to len.
+#          On failure this is less than len
+#
+# @speed: rate limit, bytes per second
+#
+# @error: error message. Only present on failure. This field
+#         contains a human-readable error message. There are no semantics
+#         other than that streaming has failed and clients should not try to
+#         interpret the error string
+#
+# Since: 1.1
+#
+# Example:
+#
+# <- { "event": "BLOCK_JOB_COMPLETED",
+#      "data": { "type": "stream", "device": "virtio-disk0",
+#                "len": 10737418240, "offset": 10737418240,
+#                "speed": 0 },
+#      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+#
+##
+{ 'event': 'BLOCK_JOB_COMPLETED',
+  'data': { 'type'  : 'JobType',
+            'device': 'str',
+            'len'   : 'int',
+            'offset': 'int',
+            'speed' : 'int',
+            '*error': 'str' } }
+
+##
+# @BLOCK_JOB_CANCELLED:
+#
+# Emitted when a block job has been cancelled
+#
+# @type: job type
+#
+# @device: The job identifier. Originally the device name but other
+#          values are allowed since QEMU 2.7
+#
+# @len: maximum progress value
+#
+# @offset: current progress value. On success this is equal to len.
+#          On failure this is less than len
+#
+# @speed: rate limit, bytes per second
+#
+# Since: 1.1
+#
+# Example:
+#
+# <- { "event": "BLOCK_JOB_CANCELLED",
+#      "data": { "type": "stream", "device": "virtio-disk0",
+#                "len": 10737418240, "offset": 134217728,
+#                "speed": 0 },
+#      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+#
+##
+{ 'event': 'BLOCK_JOB_CANCELLED',
+  'data': { 'type'  : 'JobType',
+            'device': 'str',
+            'len'   : 'int',
+            'offset': 'int',
+            'speed' : 'int' } }
+
+##
+# @BLOCK_JOB_ERROR:
+#
+# Emitted when a block job encounters an error
+#
+# @device: The job identifier. Originally the device name but other
+#          values are allowed since QEMU 2.7
+#
+# @operation: I/O operation
+#
+# @action: action that has been taken
+#
+# Since: 1.3
+#
+# Example:
+#
+# <- { "event": "BLOCK_JOB_ERROR",
+#      "data": { "device": "ide0-hd1",
+#                "operation": "write",
+#                "action": "stop" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'BLOCK_JOB_ERROR',
+  'data': { 'device'   : 'str',
+            'operation': 'IoOperationType',
+            'action'   : 'BlockErrorAction' } }
+
+##
+# @BLOCK_JOB_READY:
+#
+# Emitted when a block job is ready to complete
+#
+# @type: job type
+#
+# @device: The job identifier. Originally the device name but other
+#          values are allowed since QEMU 2.7
+#
+# @len: maximum progress value
+#
+# @offset: current progress value. On success this is equal to len.
+#          On failure this is less than len
+#
+# @speed: rate limit, bytes per second
+#
+# Note: The "ready to complete" status is always reset by a @BLOCK_JOB_ERROR
+#       event
+#
+# Since: 1.3
+#
+# Example:
+#
+# <- { "event": "BLOCK_JOB_READY",
+#      "data": { "device": "drive0", "type": "mirror", "speed": 0,
+#                "len": 2097152, "offset": 2097152 }
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'BLOCK_JOB_READY',
+  'data': { 'type'  : 'JobType',
+            'device': 'str',
+            'len'   : 'int',
+            'offset': 'int',
+            'speed' : 'int' } }
+
+##
+# @BLOCK_JOB_PENDING:
+#
+# Emitted when a block job is awaiting explicit authorization to finalize graph
+# changes via @block-job-finalize. If this job is part of a transaction, it will
+# not emit this event until the transaction has converged first.
+#
+# @type: job type
+#
+# @id: The job identifier.
+#
+# Since: 2.12
+#
+# Example:
+#
+# <- { "event": "BLOCK_JOB_WAITING",
+#      "data": { "device": "drive0", "type": "mirror" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'BLOCK_JOB_PENDING',
+  'data': { 'type'  : 'JobType',
+            'id'    : 'str' } }
+
+##
+# @PreallocMode:
+#
+# Preallocation mode of QEMU image file
+#
+# @off: no preallocation
+# @metadata: preallocate only for metadata
+# @falloc: like @full preallocation but allocate disk space by
+#          posix_fallocate() rather than writing data.
+# @full: preallocate all data by writing it to the device to ensure
+#        disk space is really available. This data may or may not be
+#        zero, depending on the image format and storage.
+#        @full preallocation also sets up metadata correctly.
+#
+# Since: 2.2
+##
+{ 'enum': 'PreallocMode',
+  'data': [ 'off', 'metadata', 'falloc', 'full' ] }
+
+##
+# @BLOCK_WRITE_THRESHOLD:
+#
+# Emitted when writes on block device reaches or exceeds the
+# configured write threshold. For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for
+# disk exhaustion.
+# The event is one shot. Once triggered, it needs to be
+# re-registered with another block-set-write-threshold command.
+#
+# @node-name: graph node name on which the threshold was exceeded.
+#
+# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+#
+# @write-threshold: last configured threshold, in bytes.
+#
+# Since: 2.3
+##
+{ 'event': 'BLOCK_WRITE_THRESHOLD',
+  'data': { 'node-name': 'str',
+            'amount-exceeded': 'uint64',
+            'write-threshold': 'uint64' } }
+
+##
+# @block-set-write-threshold:
+#
+# Change the write threshold for a block drive. An event will be
+# delivered if a write to this block drive crosses the configured
+# threshold.  The threshold is an offset, thus must be
+# non-negative. Default is no write threshold. Setting the threshold
+# to zero disables it.
+#
+# This is useful to transparently resize thin-provisioned drives without
+# the guest OS noticing.
+#
+# @node-name: graph node name on which the threshold must be set.
+#
+# @write-threshold: configured threshold for the block device, bytes.
+#                   Use 0 to disable the threshold.
+#
+# Since: 2.3
+#
+# Example:
+#
+# -> { "execute": "block-set-write-threshold",
+#      "arguments": { "node-name": "mydev",
+#                     "write-threshold": 17179869184 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'block-set-write-threshold',
+  'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
+
+##
+# @x-blockdev-change:
+#
+# Dynamically reconfigure the block driver state graph. It can be used
+# to add, remove, insert or replace a graph node. Currently only the
+# Quorum driver implements this feature to add or remove its child. This
+# is useful to fix a broken quorum child.
+#
+# If @node is specified, it will be inserted under @parent. @child
+# may not be specified in this case. If both @parent and @child are
+# specified but @node is not, @child will be detached from @parent.
+#
+# @parent: the id or name of the parent node.
+#
+# @child: the name of a child under the given parent node.
+#
+# @node: the name of the node that will be added.
+#
+# Note: this command is experimental, and its API is not stable. It
+#       does not support all kinds of operations, all kinds of children, nor
+#       all block drivers.
+#
+#       FIXME Removing children from a quorum node means introducing gaps in the
+#       child indices. This cannot be represented in the 'children' list of
+#       BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
+#
+#       Warning: The data in a new quorum child MUST be consistent with that of
+#       the rest of the array.
+#
+# Since: 2.7
+#
+# Example:
+#
+# 1. Add a new node to a quorum
+# -> { "execute": "blockdev-add",
+#      "arguments": {
+#          "driver": "raw",
+#          "node-name": "new_node",
+#          "file": { "driver": "file",
+#                    "filename": "test.raw" } } }
+# <- { "return": {} }
+# -> { "execute": "x-blockdev-change",
+#      "arguments": { "parent": "disk1",
+#                     "node": "new_node" } }
+# <- { "return": {} }
+#
+# 2. Delete a quorum's node
+# -> { "execute": "x-blockdev-change",
+#      "arguments": { "parent": "disk1",
+#                     "child": "children.1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-blockdev-change',
+  'data' : { 'parent': 'str',
+             '*child': 'str',
+             '*node': 'str' } }
+
+##
+# @x-blockdev-set-iothread:
+#
+# Move @node and its children into the @iothread.  If @iothread is null then
+# move @node and its children into the main loop.
+#
+# The node must not be attached to a BlockBackend.
+#
+# @node-name: the name of the block driver node
+#
+# @iothread: the name of the IOThread object or null for the main loop
+#
+# @force: true if the node and its children should be moved when a BlockBackend
+#         is already attached
+#
+# Note: this command is experimental and intended for test cases that need
+#       control over IOThreads only.
+#
+# Since: 2.12
+#
+# Example:
+#
+# 1. Move a node into an IOThread
+# -> { "execute": "x-blockdev-set-iothread",
+#      "arguments": { "node-name": "disk1",
+#                     "iothread": "iothread0" } }
+# <- { "return": {} }
+#
+# 2. Move a node into the main loop
+# -> { "execute": "x-blockdev-set-iothread",
+#      "arguments": { "node-name": "disk1",
+#                     "iothread": null } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-blockdev-set-iothread',
+  'data' : { 'node-name': 'str',
+             'iothread': 'StrOrNull',
+             '*force': 'bool' } }
+
+##
+# @QuorumOpType:
+#
+# An enumeration of the quorum operation types
+#
+# @read: read operation
+#
+# @write: write operation
+#
+# @flush: flush operation
+#
+# Since: 2.6
+##
+{ 'enum': 'QuorumOpType',
+  'data': [ 'read', 'write', 'flush' ] }
+
+##
+# @QUORUM_FAILURE:
+#
+# Emitted by the Quorum block driver if it fails to establish a quorum
+#
+# @reference: device name if defined else node name
+#
+# @sector-num: number of the first sector of the failed read operation
+#
+# @sectors-count: failed read operation sector count
+#
+# Note: This event is rate-limited.
+#
+# Since: 2.0
+#
+# Example:
+#
+# <- { "event": "QUORUM_FAILURE",
+#      "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
+#      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+#
+##
+{ 'event': 'QUORUM_FAILURE',
+  'data': { 'reference': 'str', 'sector-num': 'int', 'sectors-count': 'int' } }
+
+##
+# @QUORUM_REPORT_BAD:
+#
+# Emitted to report a corruption of a Quorum file
+#
+# @type: quorum operation type (Since 2.6)
+#
+# @error: error message. Only present on failure. This field
+#         contains a human-readable error message. There are no semantics other
+#         than that the block layer reported an error and clients should not
+#         try to interpret the error string.
+#
+# @node-name: the graph node name of the block driver state
+#
+# @sector-num: number of the first sector of the failed read operation
+#
+# @sectors-count: failed read operation sector count
+#
+# Note: This event is rate-limited.
+#
+# Since: 2.0
+#
+# Example:
+#
+# 1. Read operation
+#
+# { "event": "QUORUM_REPORT_BAD",
+#      "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
+#                "type": "read" },
+#      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+#
+# 2. Flush operation
+#
+# { "event": "QUORUM_REPORT_BAD",
+#      "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
+#                "type": "flush", "error": "Broken pipe" },
+#      "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
+#
+##
+{ 'event': 'QUORUM_REPORT_BAD',
+  'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str',
+            'sector-num': 'int', 'sectors-count': 'int' } }
+
+##
+# @BlockdevSnapshotInternal:
+#
+# @device: the device name or node-name of a root node to generate the snapshot
+#          from
+#
+# @name: the name of the internal snapshot to be created
+#
+# Notes: In transaction, if @name is empty, or any snapshot matching @name
+#        exists, the operation will fail. Only some image formats support it,
+#        for example, qcow2, rbd, and sheepdog.
+#
+# Since: 1.7
+##
+{ 'struct': 'BlockdevSnapshotInternal',
+  'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @blockdev-snapshot-internal-sync:
+#
+# Synchronously take an internal snapshot of a block device, when the
+# format of the image used supports it. If the name is an empty
+# string, or a snapshot with name already exists, the operation will
+# fail.
+#
+# For the arguments, see the documentation of BlockdevSnapshotInternal.
+#
+# Returns: - nothing on success
+#          - If @device is not a valid block device, GenericError
+#          - If any snapshot matching @name exists, or @name is empty,
+#            GenericError
+#          - If the format of the image used does not support it,
+#            BlockFormatFeatureNotSupported
+#
+# Since: 1.7
+#
+# Example:
+#
+# -> { "execute": "blockdev-snapshot-internal-sync",
+#      "arguments": { "device": "ide-hd0",
+#                     "name": "snapshot0" }
+#    }
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-snapshot-internal-sync',
+  'data': 'BlockdevSnapshotInternal' }
+
+##
+# @blockdev-snapshot-delete-internal-sync:
+#
+# Synchronously delete an internal snapshot of a block device, when the format
+# of the image used support it. The snapshot is identified by name or id or
+# both. One of the name or id is required. Return SnapshotInfo for the
+# successfully deleted snapshot.
+#
+# @device: the device name or node-name of a root node to delete the snapshot
+#          from
+#
+# @id: optional the snapshot's ID to be deleted
+#
+# @name: optional the snapshot's name to be deleted
+#
+# Returns: - SnapshotInfo on success
+#          - If @device is not a valid block device, GenericError
+#          - If snapshot not found, GenericError
+#          - If the format of the image used does not support it,
+#            BlockFormatFeatureNotSupported
+#          - If @id and @name are both not specified, GenericError
+#
+# Since: 1.7
+#
+# Example:
+#
+# -> { "execute": "blockdev-snapshot-delete-internal-sync",
+#      "arguments": { "device": "ide-hd0",
+#                     "name": "snapshot0" }
+#    }
+# <- { "return": {
+#                    "id": "1",
+#                    "name": "snapshot0",
+#                    "vm-state-size": 0,
+#                    "date-sec": 1000012,
+#                    "date-nsec": 10,
+#                    "vm-clock-sec": 100,
+#                    "vm-clock-nsec": 20,
+#                    "icount": 220414
+#      }
+#    }
+#
+##
+{ 'command': 'blockdev-snapshot-delete-internal-sync',
+  'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
+  'returns': 'SnapshotInfo' }
diff --git a/qapi/block-export.json b/qapi/block-export.json
new file mode 100644
index 000000000..a9f488f99
--- /dev/null
+++ b/qapi/block-export.json
@@ -0,0 +1,345 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# == Block device exports
+##
+
+{ 'include': 'sockets.json' }
+
+##
+# @NbdServerOptions:
+#
+# Keep this type consistent with the nbd-server-start arguments. The only
+# intended difference is using SocketAddress instead of SocketAddressLegacy.
+#
+# @addr: Address on which to listen.
+# @tls-creds: ID of the TLS credentials object (since 2.6).
+# @tls-authz: ID of the QAuthZ authorization object used to validate
+#             the client's x509 distinguished name. This object is
+#             is only resolved at time of use, so can be deleted and
+#             recreated on the fly while the NBD server is active.
+#             If missing, it will default to denying access (since 4.0).
+# @max-connections: The maximum number of connections to allow at the same
+#                   time, 0 for unlimited. (since 5.2; default: 0)
+#
+# Since: 4.2
+##
+{ 'struct': 'NbdServerOptions',
+  'data': { 'addr': 'SocketAddress',
+            '*tls-creds': 'str',
+            '*tls-authz': 'str',
+            '*max-connections': 'uint32' } }
+
+##
+# @nbd-server-start:
+#
+# Start an NBD server listening on the given host and port.  Block
+# devices can then be exported using @nbd-server-add.  The NBD
+# server will present them as named exports; for example, another
+# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+#
+# Keep this type consistent with the NbdServerOptions type. The only intended
+# difference is using SocketAddressLegacy instead of SocketAddress.
+#
+# @addr: Address on which to listen.
+# @tls-creds: ID of the TLS credentials object (since 2.6).
+# @tls-authz: ID of the QAuthZ authorization object used to validate
+#             the client's x509 distinguished name. This object is
+#             is only resolved at time of use, so can be deleted and
+#             recreated on the fly while the NBD server is active.
+#             If missing, it will default to denying access (since 4.0).
+# @max-connections: The maximum number of connections to allow at the same
+#                   time, 0 for unlimited. (since 5.2; default: 0)
+#
+# Returns: error if the server is already running.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-start',
+  'data': { 'addr': 'SocketAddressLegacy',
+            '*tls-creds': 'str',
+            '*tls-authz': 'str',
+            '*max-connections': 'uint32' } }
+
+##
+# @BlockExportOptionsNbdBase:
+#
+# An NBD block export (common options shared between nbd-server-add and
+# the NBD branch of block-export-add).
+#
+# @name: Export name. If unspecified, the @device parameter is used as the
+#        export name. (Since 2.12)
+#
+# @description: Free-form description of the export, up to 4096 bytes.
+#               (Since 5.0)
+#
+# Since: 5.0
+##
+{ 'struct': 'BlockExportOptionsNbdBase',
+  'data': { '*name': 'str', '*description': 'str' } }
+
+##
+# @BlockExportOptionsNbd:
+#
+# An NBD block export (distinct options used in the NBD branch of
+# block-export-add).
+#
+# @bitmaps: Also export each of the named dirty bitmaps reachable from
+#           @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#           the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
+#           each bitmap.
+#
+# @allocation-depth: Also export the allocation depth map for @device, so
+#                    the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#                    the metadata context name "qemu:allocation-depth" to
+#                    inspect allocation details. (since 5.2)
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockExportOptionsNbd',
+  'base': 'BlockExportOptionsNbdBase',
+  'data': { '*bitmaps': ['str'], '*allocation-depth': 'bool' } }
+
+##
+# @BlockExportOptionsVhostUserBlk:
+#
+# A vhost-user-blk block export.
+#
+# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd'
+#        SocketAddress types are supported. Passed fds must be UNIX domain
+#        sockets.
+# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
+# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults
+#              to 1.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockExportOptionsVhostUserBlk',
+  'data': { 'addr': 'SocketAddress',
+	    '*logical-block-size': 'size',
+            '*num-queues': 'uint16'} }
+
+##
+# @NbdServerAddOptions:
+#
+# An NBD block export, per legacy nbd-server-add command.
+#
+# @device: The device name or node name of the node to be exported
+#
+# @writable: Whether clients should be able to write to the device via the
+#            NBD connection (default false).
+#
+# @bitmap: Also export a single dirty bitmap reachable from @device, so the
+#          NBD client can use NBD_OPT_SET_META_CONTEXT with the metadata
+#          context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap
+#          (since 4.0).
+#
+# Since: 5.0
+##
+{ 'struct': 'NbdServerAddOptions',
+  'base': 'BlockExportOptionsNbdBase',
+  'data': { 'device': 'str',
+            '*writable': 'bool', '*bitmap': 'str' } }
+
+##
+# @nbd-server-add:
+#
+# Export a block node to QEMU's embedded NBD server.
+#
+# The export name will be used as the id for the resulting block export.
+#
+# Features:
+# @deprecated: This command is deprecated. Use @block-export-add instead.
+#
+# Returns: error if the server is not running, or export with the same name
+#          already exists.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-add',
+  'data': 'NbdServerAddOptions', 'boxed': true, 'features': ['deprecated'] }
+
+##
+# @BlockExportRemoveMode:
+#
+# Mode for removing a block export.
+#
+# @safe: Remove export if there are no existing connections, fail otherwise.
+#
+# @hard: Drop all connections immediately and remove export.
+#
+# Potential additional modes to be added in the future:
+#
+# hide: Just hide export from new clients, leave existing connections as is.
+# Remove export after all clients are disconnected.
+#
+# soft: Hide export from new clients, answer with ESHUTDOWN for all further
+# requests from existing clients.
+#
+# Since: 2.12
+##
+{'enum': 'BlockExportRemoveMode', 'data': ['safe', 'hard']}
+
+##
+# @nbd-server-remove:
+#
+# Remove NBD export by name.
+#
+# @name: Block export id.
+#
+# @mode: Mode of command operation. See @BlockExportRemoveMode description.
+#        Default is 'safe'.
+#
+# Features:
+# @deprecated: This command is deprecated. Use @block-export-del instead.
+#
+# Returns: error if
+#            - the server is not running
+#            - export is not found
+#            - mode is 'safe' and there are existing connections
+#
+# Since: 2.12
+##
+{ 'command': 'nbd-server-remove',
+  'data': {'name': 'str', '*mode': 'BlockExportRemoveMode'},
+  'features': ['deprecated'] }
+
+##
+# @nbd-server-stop:
+#
+# Stop QEMU's embedded NBD server, and unregister all devices previously
+# added via @nbd-server-add.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-stop' }
+
+##
+# @BlockExportType:
+#
+# An enumeration of block export types
+#
+# @nbd: NBD export
+# @vhost-user-blk: vhost-user-blk export (since 5.2)
+#
+# Since: 4.2
+##
+{ 'enum': 'BlockExportType',
+  'data': [ 'nbd', 'vhost-user-blk' ] }
+
+##
+# @BlockExportOptions:
+#
+# Describes a block export, i.e. how single node should be exported on an
+# external interface.
+#
+# @id: A unique identifier for the block export (across all export types)
+#
+# @node-name: The node name of the block node to be exported (since: 5.2)
+#
+# @writable: True if clients should be able to write to the export
+#            (default false)
+#
+# @writethrough: If true, caches are flushed after every write request to the
+#                export before completion is signalled. (since: 5.2;
+#                default: false)
+#
+# @iothread: The name of the iothread object where the export will run. The
+#            default is to use the thread currently associated with the
+#            block node. (since: 5.2)
+#
+# @fixed-iothread: True prevents the block node from being moved to another
+#                  thread while the export is active. If true and @iothread is
+#                  given, export creation fails if the block node cannot be
+#                  moved to the iothread. The default is false. (since: 5.2)
+#
+# Since: 4.2
+##
+{ 'union': 'BlockExportOptions',
+  'base': { 'type': 'BlockExportType',
+            'id': 'str',
+            '*fixed-iothread': 'bool',
+            '*iothread': 'str',
+            'node-name': 'str',
+            '*writable': 'bool',
+            '*writethrough': 'bool' },
+  'discriminator': 'type',
+  'data': {
+      'nbd': 'BlockExportOptionsNbd',
+      'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
+   } }
+
+##
+# @block-export-add:
+#
+# Creates a new block export.
+#
+# Since: 5.2
+##
+{ 'command': 'block-export-add',
+  'data': 'BlockExportOptions', 'boxed': true }
+
+##
+# @block-export-del:
+#
+# Request to remove a block export. This drops the user's reference to the
+# export, but the export may still stay around after this command returns until
+# the shutdown of the export has completed.
+#
+# @id: Block export id.
+#
+# @mode: Mode of command operation. See @BlockExportRemoveMode description.
+#        Default is 'safe'.
+#
+# Returns: Error if the export is not found or @mode is 'safe' and the export
+#          is still in use (e.g. by existing client connections)
+#
+# Since: 5.2
+##
+{ 'command': 'block-export-del',
+  'data': { 'id': 'str', '*mode': 'BlockExportRemoveMode' } }
+
+##
+# @BLOCK_EXPORT_DELETED:
+#
+# Emitted when a block export is removed and its id can be reused.
+#
+# @id: Block export id.
+#
+# Since: 5.2
+##
+{ 'event': 'BLOCK_EXPORT_DELETED',
+  'data': { 'id': 'str' } }
+
+##
+# @BlockExportInfo:
+#
+# Information about a single block export.
+#
+# @id: The unique identifier for the block export
+#
+# @type: The block export type
+#
+# @node-name: The node name of the block node that is exported
+#
+# @shutting-down: True if the export is shutting down (e.g. after a
+#                 block-export-del command, but before the shutdown has
+#                 completed)
+#
+# Since:  5.2
+##
+{ 'struct': 'BlockExportInfo',
+  'data': { 'id': 'str',
+            'type': 'BlockExportType',
+            'node-name': 'str',
+            'shutting-down': 'bool' } }
+
+##
+# @query-block-exports:
+#
+# Returns: A list of BlockExportInfo describing all block exports
+#
+# Since: 5.2
+##
+{ 'command': 'query-block-exports', 'returns': ['BlockExportInfo'] }
diff --git a/qapi/block.json b/qapi/block.json
new file mode 100644
index 000000000..a009f7d3a
--- /dev/null
+++ b/qapi/block.json
@@ -0,0 +1,572 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = Block devices
+##
+
+{ 'include': 'block-core.json' }
+
+##
+# == Additional block stuff (VM related)
+##
+
+##
+# @BiosAtaTranslation:
+#
+# Policy that BIOS should use to interpret cylinder/head/sector
+# addresses.  Note that Bochs BIOS and SeaBIOS will not actually
+# translate logical CHS to physical; instead, they will use logical
+# block addressing.
+#
+# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
+#        depending on the size of the disk.  If they are not passed,
+#        choose none if QEMU can guess that the disk had 16 or fewer
+#        heads, large if QEMU can guess that the disk had 131072 or
+#        fewer tracks across all heads (i.e. cylinders*heads<131072),
+#        otherwise LBA.
+#
+# @none: The physical disk geometry is equal to the logical geometry.
+#
+# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
+#       heads (if fewer than 255 are enough to cover the whole disk
+#       with 1024 cylinders/head).  The number of cylinders/head is
+#       then computed based on the number of sectors and heads.
+#
+# @large: The number of cylinders per head is scaled down to 1024
+#         by correspondingly scaling up the number of heads.
+#
+# @rechs: Same as @large, but first convert a 16-head geometry to
+#         15-head, by proportionally scaling up the number of
+#         cylinders/head.
+#
+# Since: 2.0
+##
+{ 'enum': 'BiosAtaTranslation',
+  'data': ['auto', 'none', 'lba', 'large', 'rechs']}
+
+##
+# @FloppyDriveType:
+#
+# Type of Floppy drive to be emulated by the Floppy Disk Controller.
+#
+# @144:  1.44MB 3.5" drive
+# @288:  2.88MB 3.5" drive
+# @120:  1.2MB 5.25" drive
+# @none: No drive connected
+# @auto: Automatically determined by inserted media at boot
+#
+# Since: 2.6
+##
+{ 'enum': 'FloppyDriveType',
+  'data': ['144', '288', '120', 'none', 'auto']}
+
+##
+# @PRManagerInfo:
+#
+# Information about a persistent reservation manager
+#
+# @id: the identifier of the persistent reservation manager
+#
+# @connected: true if the persistent reservation manager is connected to
+#             the underlying storage or helper
+#
+# Since: 3.0
+##
+{ 'struct': 'PRManagerInfo',
+  'data': {'id': 'str', 'connected': 'bool'} }
+
+##
+# @query-pr-managers:
+#
+# Returns a list of information about each persistent reservation manager.
+#
+# Returns: a list of @PRManagerInfo for each persistent reservation manager
+#
+# Since: 3.0
+##
+{ 'command': 'query-pr-managers', 'returns': ['PRManagerInfo'],
+  'allow-preconfig': true }
+
+##
+# @eject:
+#
+# Ejects the medium from a removable drive.
+#
+# @device: Block device name
+#
+# @id: The name or QOM path of the guest device (since: 2.8)
+#
+# @force: If true, eject regardless of whether the drive is locked.
+#         If not specified, the default value is false.
+#
+# Features:
+# @deprecated: Member @device is deprecated.  Use @id instead.
+#
+# Returns: - Nothing on success
+#          - If @device is not a valid block device, DeviceNotFound
+# Notes:    Ejecting a device with no media results in success
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "eject", "arguments": { "id": "ide1-0-1" } }
+# <- { "return": {} }
+##
+{ 'command': 'eject',
+  'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*id': 'str',
+            '*force': 'bool' } }
+
+##
+# @blockdev-open-tray:
+#
+# Opens a block device's tray. If there is a block driver state tree inserted as
+# a medium, it will become inaccessible to the guest (but it will remain
+# associated to the block device, so closing the tray will make it accessible
+# again).
+#
+# If the tray was already open before, this will be a no-op.
+#
+# Once the tray opens, a DEVICE_TRAY_MOVED event is emitted. There are cases in
+# which no such event will be generated, these include:
+#
+# - if the guest has locked the tray, @force is false and the guest does not
+#   respond to the eject request
+# - if the BlockBackend denoted by @device does not have a guest device attached
+#   to it
+# - if the guest device does not have an actual tray
+#
+# @device: Block device name
+#
+# @id: The name or QOM path of the guest device (since: 2.8)
+#
+# @force: if false (the default), an eject request will be sent to
+#         the guest if it has locked the tray (and the tray will not be opened
+#         immediately); if true, the tray will be opened regardless of whether
+#         it is locked
+#
+# Features:
+# @deprecated: Member @device is deprecated.  Use @id instead.
+#
+# Since: 2.5
+#
+# Example:
+#
+# -> { "execute": "blockdev-open-tray",
+#      "arguments": { "id": "ide0-1-0" } }
+#
+# <- { "timestamp": { "seconds": 1418751016,
+#                     "microseconds": 716996 },
+#      "event": "DEVICE_TRAY_MOVED",
+#      "data": { "device": "ide1-cd0",
+#                "id": "ide0-1-0",
+#                "tray-open": true } }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-open-tray',
+  'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*id': 'str',
+            '*force': 'bool' } }
+
+##
+# @blockdev-close-tray:
+#
+# Closes a block device's tray. If there is a block driver state tree associated
+# with the block device (which is currently ejected), that tree will be loaded
+# as the medium.
+#
+# If the tray was already closed before, this will be a no-op.
+#
+# @device: Block device name
+#
+# @id: The name or QOM path of the guest device (since: 2.8)
+#
+# Features:
+# @deprecated: Member @device is deprecated.  Use @id instead.
+#
+# Since: 2.5
+#
+# Example:
+#
+# -> { "execute": "blockdev-close-tray",
+#      "arguments": { "id": "ide0-1-0" } }
+#
+# <- { "timestamp": { "seconds": 1418751345,
+#                     "microseconds": 272147 },
+#      "event": "DEVICE_TRAY_MOVED",
+#      "data": { "device": "ide1-cd0",
+#                "id": "ide0-1-0",
+#                "tray-open": false } }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-close-tray',
+  'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*id': 'str' } }
+
+##
+# @blockdev-remove-medium:
+#
+# Removes a medium (a block driver state tree) from a block device. That block
+# device's tray must currently be open (unless there is no attached guest
+# device).
+#
+# If the tray is open and there is no medium inserted, this will be a no-op.
+#
+# @id: The name or QOM path of the guest device
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "blockdev-remove-medium",
+#      "arguments": { "id": "ide0-1-0" } }
+#
+# <- { "error": { "class": "GenericError",
+#                 "desc": "Tray of device 'ide0-1-0' is not open" } }
+#
+# -> { "execute": "blockdev-open-tray",
+#      "arguments": { "id": "ide0-1-0" } }
+#
+# <- { "timestamp": { "seconds": 1418751627,
+#                     "microseconds": 549958 },
+#      "event": "DEVICE_TRAY_MOVED",
+#      "data": { "device": "ide1-cd0",
+#                "id": "ide0-1-0",
+#                "tray-open": true } }
+#
+# <- { "return": {} }
+#
+# -> { "execute": "blockdev-remove-medium",
+#      "arguments": { "id": "ide0-1-0" } }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-remove-medium',
+  'data': { 'id': 'str' } }
+
+##
+# @blockdev-insert-medium:
+#
+# Inserts a medium (a block driver state tree) into a block device. That block
+# device's tray must currently be open (unless there is no attached guest
+# device) and there must be no medium inserted already.
+#
+# @id: The name or QOM path of the guest device
+#
+# @node-name: name of a node in the block driver state graph
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "blockdev-add",
+#      "arguments": {
+#          "node-name": "node0",
+#          "driver": "raw",
+#          "file": { "driver": "file",
+#                    "filename": "fedora.iso" } } }
+# <- { "return": {} }
+#
+# -> { "execute": "blockdev-insert-medium",
+#      "arguments": { "id": "ide0-1-0",
+#                     "node-name": "node0" } }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-insert-medium',
+  'data': { 'id': 'str',
+            'node-name': 'str'} }
+
+
+##
+# @BlockdevChangeReadOnlyMode:
+#
+# Specifies the new read-only mode of a block device subject to the
+# @blockdev-change-medium command.
+#
+# @retain: Retains the current read-only mode
+#
+# @read-only: Makes the device read-only
+#
+# @read-write: Makes the device writable
+#
+# Since: 2.3
+#
+##
+{ 'enum': 'BlockdevChangeReadOnlyMode',
+  'data': ['retain', 'read-only', 'read-write'] }
+
+
+##
+# @blockdev-change-medium:
+#
+# Changes the medium inserted into a block device by ejecting the current medium
+# and loading a new image file which is inserted as the new medium (this command
+# combines blockdev-open-tray, blockdev-remove-medium, blockdev-insert-medium
+# and blockdev-close-tray).
+#
+# @device: Block device name
+#
+# @id: The name or QOM path of the guest device
+#      (since: 2.8)
+#
+# @filename: filename of the new image to be loaded
+#
+# @format: format to open the new image with (defaults to
+#          the probed format)
+#
+# @read-only-mode: change the read-only mode of the device; defaults
+#                  to 'retain'
+#
+# Features:
+# @deprecated: Member @device is deprecated.  Use @id instead.
+#
+# Since: 2.5
+#
+# Examples:
+#
+# 1. Change a removable medium
+#
+# -> { "execute": "blockdev-change-medium",
+#      "arguments": { "id": "ide0-1-0",
+#                     "filename": "/srv/images/Fedora-12-x86_64-DVD.iso",
+#                     "format": "raw" } }
+# <- { "return": {} }
+#
+# 2. Load a read-only medium into a writable drive
+#
+# -> { "execute": "blockdev-change-medium",
+#      "arguments": { "id": "floppyA",
+#                     "filename": "/srv/images/ro.img",
+#                     "format": "raw",
+#                     "read-only-mode": "retain" } }
+#
+# <- { "error":
+#      { "class": "GenericError",
+#        "desc": "Could not open '/srv/images/ro.img': Permission denied" } }
+#
+# -> { "execute": "blockdev-change-medium",
+#      "arguments": { "id": "floppyA",
+#                     "filename": "/srv/images/ro.img",
+#                     "format": "raw",
+#                     "read-only-mode": "read-only" } }
+#
+# <- { "return": {} }
+#
+##
+{ 'command': 'blockdev-change-medium',
+  'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
+            '*id': 'str',
+            'filename': 'str',
+            '*format': 'str',
+            '*read-only-mode': 'BlockdevChangeReadOnlyMode' } }
+
+
+##
+# @DEVICE_TRAY_MOVED:
+#
+# Emitted whenever the tray of a removable device is moved by the guest or by
+# HMP/QMP commands
+#
+# @device: Block device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @id: The name or QOM path of the guest device (since 2.8)
+#
+# @tray-open: true if the tray has been opened or false if it has been closed
+#
+# Since: 1.1
+#
+# Example:
+#
+# <- { "event": "DEVICE_TRAY_MOVED",
+#      "data": { "device": "ide1-cd0",
+#                "id": "/machine/unattached/device[22]",
+#                "tray-open": true
+#      },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'DEVICE_TRAY_MOVED',
+  'data': { 'device': 'str', 'id': 'str', 'tray-open': 'bool' } }
+
+##
+# @PR_MANAGER_STATUS_CHANGED:
+#
+# Emitted whenever the connected status of a persistent reservation
+# manager changes.
+#
+# @id: The id of the PR manager object
+#
+# @connected: true if the PR manager is connected to a backend
+#
+# Since: 3.0
+#
+# Example:
+#
+# <- { "event": "PR_MANAGER_STATUS_CHANGED",
+#      "data": { "id": "pr-helper0",
+#                "connected": true
+#      },
+#      "timestamp": { "seconds": 1519840375, "microseconds": 450486 } }
+#
+##
+{ 'event': 'PR_MANAGER_STATUS_CHANGED',
+  'data': { 'id': 'str', 'connected': 'bool' } }
+
+##
+# @block_set_io_throttle:
+#
+# Change I/O throttle limits for a block drive.
+#
+# Since QEMU 2.4, each device with I/O limits is member of a throttle
+# group.
+#
+# If two or more devices are members of the same group, the limits
+# will apply to the combined I/O of the whole group in a round-robin
+# fashion. Therefore, setting new I/O limits to a device will affect
+# the whole group.
+#
+# The name of the group can be specified using the 'group' parameter.
+# If the parameter is unset, it is assumed to be the current group of
+# that device. If it's not in any group yet, the name of the device
+# will be used as the name for its group.
+#
+# The 'group' parameter can also be used to move a device to a
+# different group. In this case the limits specified in the parameters
+# will be applied to the new group only.
+#
+# I/O limits can be disabled by setting all of them to 0. In this case
+# the device will be removed from its group and the rest of its
+# members will not be affected. The 'group' parameter is ignored.
+#
+# Returns: - Nothing on success
+#          - If @device is not a valid block device, DeviceNotFound
+#
+# Since: 1.1
+#
+# Example:
+#
+# -> { "execute": "block_set_io_throttle",
+#      "arguments": { "id": "virtio-blk-pci0/virtio-backend",
+#                     "bps": 0,
+#                     "bps_rd": 0,
+#                     "bps_wr": 0,
+#                     "iops": 512,
+#                     "iops_rd": 0,
+#                     "iops_wr": 0,
+#                     "bps_max": 0,
+#                     "bps_rd_max": 0,
+#                     "bps_wr_max": 0,
+#                     "iops_max": 0,
+#                     "iops_rd_max": 0,
+#                     "iops_wr_max": 0,
+#                     "bps_max_length": 0,
+#                     "iops_size": 0 } }
+# <- { "return": {} }
+#
+# -> { "execute": "block_set_io_throttle",
+#      "arguments": { "id": "ide0-1-0",
+#                     "bps": 1000000,
+#                     "bps_rd": 0,
+#                     "bps_wr": 0,
+#                     "iops": 0,
+#                     "iops_rd": 0,
+#                     "iops_wr": 0,
+#                     "bps_max": 8000000,
+#                     "bps_rd_max": 0,
+#                     "bps_wr_max": 0,
+#                     "iops_max": 0,
+#                     "iops_rd_max": 0,
+#                     "iops_wr_max": 0,
+#                     "bps_max_length": 60,
+#                     "iops_size": 0 } }
+# <- { "return": {} }
+##
+{ 'command': 'block_set_io_throttle', 'boxed': true,
+  'data': 'BlockIOThrottle' }
+
+##
+# @block-latency-histogram-set:
+#
+# Manage read, write and flush latency histograms for the device.
+#
+# If only @id parameter is specified, remove all present latency histograms
+# for the device. Otherwise, add/reset some of (or all) latency histograms.
+#
+# @id: The name or QOM path of the guest device.
+#
+# @boundaries: list of interval boundary values (see description in
+#              BlockLatencyHistogramInfo definition). If specified, all
+#              latency histograms are removed, and empty ones created for all
+#              io types with intervals corresponding to @boundaries (except for
+#              io types, for which specific boundaries are set through the
+#              following parameters).
+#
+# @boundaries-read: list of interval boundary values for read latency
+#                   histogram. If specified, old read latency histogram is
+#                   removed, and empty one created with intervals
+#                   corresponding to @boundaries-read. The parameter has higher
+#                   priority then @boundaries.
+#
+# @boundaries-write: list of interval boundary values for write latency
+#                    histogram.
+#
+# @boundaries-flush: list of interval boundary values for flush latency
+#                    histogram.
+#
+# Returns: error if device is not found or any boundary arrays are invalid.
+#
+# Since: 4.0
+#
+# Example:
+# set new histograms for all io types with intervals
+# [0, 10), [10, 50), [50, 100), [100, +inf):
+#
+# -> { "execute": "block-latency-histogram-set",
+#      "arguments": { "id": "drive0",
+#                     "boundaries": [10, 50, 100] } }
+# <- { "return": {} }
+#
+# Example:
+# set new histogram only for write, other histograms will remain
+# not changed (or not created):
+#
+# -> { "execute": "block-latency-histogram-set",
+#      "arguments": { "id": "drive0",
+#                     "boundaries-write": [10, 50, 100] } }
+# <- { "return": {} }
+#
+# Example:
+# set new histograms with the following intervals:
+#   read, flush: [0, 10), [10, 50), [50, 100), [100, +inf)
+#   write: [0, 1000), [1000, 5000), [5000, +inf)
+#
+# -> { "execute": "block-latency-histogram-set",
+#      "arguments": { "id": "drive0",
+#                     "boundaries": [10, 50, 100],
+#                     "boundaries-write": [1000, 5000] } }
+# <- { "return": {} }
+#
+# Example:
+# remove all latency histograms:
+#
+# -> { "execute": "block-latency-histogram-set",
+#      "arguments": { "id": "drive0" } }
+# <- { "return": {} }
+##
+{ 'command': 'block-latency-histogram-set',
+  'data': {'id': 'str',
+           '*boundaries': ['uint64'],
+           '*boundaries-read': ['uint64'],
+           '*boundaries-write': ['uint64'],
+           '*boundaries-flush': ['uint64'] } }
diff --git a/qapi/char.json b/qapi/char.json
new file mode 100644
index 000000000..43486d1da
--- /dev/null
+++ b/qapi/char.json
@@ -0,0 +1,577 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Character devices
+##
+
+{ 'include': 'sockets.json' }
+
+##
+# @ChardevInfo:
+#
+# Information about a character device.
+#
+# @label: the label of the character device
+#
+# @filename: the filename of the character device
+#
+# @frontend-open: shows whether the frontend device attached to this backend
+#                 (eg. with the chardev=... option) is in open or closed state
+#                 (since 2.1)
+#
+# Notes: @filename is encoded using the QEMU command line character device
+#        encoding.  See the QEMU man page for details.
+#
+# Since: 0.14.0
+##
+{ 'struct': 'ChardevInfo',
+  'data': { 'label': 'str',
+            'filename': 'str',
+            'frontend-open': 'bool' } }
+
+##
+# @query-chardev:
+#
+# Returns information about current character devices.
+#
+# Returns: a list of @ChardevInfo
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-chardev" }
+# <- {
+#       "return": [
+#          {
+#             "label": "charchannel0",
+#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server",
+#             "frontend-open": false
+#          },
+#          {
+#             "label": "charmonitor",
+#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server",
+#             "frontend-open": true
+#          },
+#          {
+#             "label": "charserial0",
+#             "filename": "pty:/dev/pts/2",
+#             "frontend-open": true
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-chardev', 'returns': ['ChardevInfo'],
+  'allow-preconfig': true }
+
+##
+# @ChardevBackendInfo:
+#
+# Information about a character device backend
+#
+# @name: The backend name
+#
+# Since: 2.0
+##
+{ 'struct': 'ChardevBackendInfo', 'data': {'name': 'str'} }
+
+##
+# @query-chardev-backends:
+#
+# Returns information about character device backends.
+#
+# Returns: a list of @ChardevBackendInfo
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "query-chardev-backends" }
+# <- {
+#       "return":[
+#          {
+#             "name":"udp"
+#          },
+#          {
+#             "name":"tcp"
+#          },
+#          {
+#             "name":"unix"
+#          },
+#          {
+#             "name":"spiceport"
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-chardev-backends', 'returns': ['ChardevBackendInfo'] }
+
+##
+# @DataFormat:
+#
+# An enumeration of data format.
+#
+# @utf8: Data is a UTF-8 string (RFC 3629)
+#
+# @base64: Data is Base64 encoded binary (RFC 3548)
+#
+# Since: 1.4
+##
+{ 'enum': 'DataFormat',
+  'data': [ 'utf8', 'base64' ] }
+
+##
+# @ringbuf-write:
+#
+# Write to a ring buffer character device.
+#
+# @device: the ring buffer character device name
+#
+# @data: data to write
+#
+# @format: data encoding (default 'utf8').
+#
+#          - base64: data must be base64 encoded text.  Its binary
+#            decoding gets written.
+#          - utf8: data's UTF-8 encoding is written
+#          - data itself is always Unicode regardless of format, like
+#            any other string.
+#
+# Returns: Nothing on success
+#
+# Since: 1.4
+#
+# Example:
+#
+# -> { "execute": "ringbuf-write",
+#      "arguments": { "device": "foo",
+#                     "data": "abcdefgh",
+#                     "format": "utf8" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'ringbuf-write',
+  'data': { 'device': 'str',
+            'data': 'str',
+           '*format': 'DataFormat'} }
+
+##
+# @ringbuf-read:
+#
+# Read from a ring buffer character device.
+#
+# @device: the ring buffer character device name
+#
+# @size: how many bytes to read at most
+#
+# @format: data encoding (default 'utf8').
+#
+#          - base64: the data read is returned in base64 encoding.
+#          - utf8: the data read is interpreted as UTF-8.
+#            Bug: can screw up when the buffer contains invalid UTF-8
+#            sequences, NUL characters, after the ring buffer lost
+#            data, and when reading stops because the size limit is
+#            reached.
+#          - The return value is always Unicode regardless of format,
+#            like any other string.
+#
+# Returns: data read from the device
+#
+# Since: 1.4
+#
+# Example:
+#
+# -> { "execute": "ringbuf-read",
+#      "arguments": { "device": "foo",
+#                     "size": 1000,
+#                     "format": "utf8" } }
+# <- { "return": "abcdefgh" }
+#
+##
+{ 'command': 'ringbuf-read',
+  'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
+  'returns': 'str' }
+
+##
+# @ChardevCommon:
+#
+# Configuration shared across all chardev backends
+#
+# @logfile: The name of a logfile to save output
+# @logappend: true to append instead of truncate
+#             (default to false to truncate)
+#
+# Since: 2.6
+##
+{ 'struct': 'ChardevCommon',
+  'data': { '*logfile': 'str',
+            '*logappend': 'bool' } }
+
+##
+# @ChardevFile:
+#
+# Configuration info for file chardevs.
+#
+# @in:  The name of the input file
+# @out: The name of the output file
+# @append: Open the file in append mode (default false to
+#          truncate) (Since 2.6)
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevFile',
+  'data': { '*in': 'str',
+            'out': 'str',
+            '*append': 'bool' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevHostdev:
+#
+# Configuration info for device and pipe chardevs.
+#
+# @device: The name of the special file for the device,
+#          i.e. /dev/ttyS0 on Unix or COM1: on Windows
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevHostdev',
+  'data': { 'device': 'str' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevSocket:
+#
+# Configuration info for (stream) socket chardevs.
+#
+# @addr: socket address to listen on (server=true)
+#        or connect to (server=false)
+# @tls-creds: the ID of the TLS credentials object (since 2.6)
+# @tls-authz: the ID of the QAuthZ authorization object against which
+#             the client's x509 distinguished name will be validated. This
+#             object is only resolved at time of use, so can be deleted
+#             and recreated on the fly while the chardev server is active.
+#             If missing, it will default to denying access (since 4.0)
+# @server: create server socket (default: true)
+# @wait: wait for incoming connection on server
+#        sockets (default: false).
+#        Silently ignored with server: false.  This use is deprecated.
+# @nodelay: set TCP_NODELAY socket option (default: false)
+# @telnet: enable telnet protocol on server
+#          sockets (default: false)
+# @tn3270: enable tn3270 protocol on server
+#          sockets (default: false) (Since: 2.10)
+# @websocket: enable websocket protocol on server
+#             sockets (default: false) (Since: 3.1)
+# @reconnect: For a client socket, if a socket is disconnected,
+#             then attempt a reconnect after the given number of seconds.
+#             Setting this to zero disables this function. (default: 0)
+#             (Since: 2.2)
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevSocket',
+  'data': { 'addr': 'SocketAddressLegacy',
+            '*tls-creds': 'str',
+            '*tls-authz'  : 'str',
+            '*server': 'bool',
+            '*wait': 'bool',
+            '*nodelay': 'bool',
+            '*telnet': 'bool',
+            '*tn3270': 'bool',
+            '*websocket': 'bool',
+            '*reconnect': 'int' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevUdp:
+#
+# Configuration info for datagram socket chardevs.
+#
+# @remote: remote address
+# @local: local address
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevUdp',
+  'data': { 'remote': 'SocketAddressLegacy',
+            '*local': 'SocketAddressLegacy' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevMux:
+#
+# Configuration info for mux chardevs.
+#
+# @chardev: name of the base chardev.
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevMux',
+  'data': { 'chardev': 'str' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevStdio:
+#
+# Configuration info for stdio chardevs.
+#
+# @signal: Allow signals (such as SIGINT triggered by ^C)
+#          be delivered to qemu.  Default: true.
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevStdio',
+  'data': { '*signal': 'bool' },
+  'base': 'ChardevCommon' }
+
+
+##
+# @ChardevSpiceChannel:
+#
+# Configuration info for spice vm channel chardevs.
+#
+# @type: kind of channel (for example vdagent).
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpiceChannel',
+  'data': { 'type': 'str' },
+  'base': 'ChardevCommon',
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @ChardevSpicePort:
+#
+# Configuration info for spice port chardevs.
+#
+# @fqdn: name of the channel (see docs/spice-port-fqdn.txt)
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpicePort',
+  'data': { 'fqdn': 'str' },
+  'base': 'ChardevCommon',
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @ChardevVC:
+#
+# Configuration info for virtual console chardevs.
+#
+# @width:  console width,  in pixels
+# @height: console height, in pixels
+# @cols:   console width,  in chars
+# @rows:   console height, in chars
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevVC',
+  'data': { '*width': 'int',
+            '*height': 'int',
+            '*cols': 'int',
+            '*rows': 'int' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevRingbuf:
+#
+# Configuration info for ring buffer chardevs.
+#
+# @size: ring buffer size, must be power of two, default is 65536
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevRingbuf',
+  'data': { '*size': 'int' },
+  'base': 'ChardevCommon' }
+
+##
+# @ChardevBackend:
+#
+# Configuration info for the new chardev backend.
+#
+# Since: 1.4 (testdev since 2.2, wctablet since 2.9)
+##
+{ 'union': 'ChardevBackend',
+  'data': { 'file': 'ChardevFile',
+            'serial': 'ChardevHostdev',
+            'parallel': 'ChardevHostdev',
+            'pipe': 'ChardevHostdev',
+            'socket': 'ChardevSocket',
+            'udp': 'ChardevUdp',
+            'pty': 'ChardevCommon',
+            'null': 'ChardevCommon',
+            'mux': 'ChardevMux',
+            'msmouse': 'ChardevCommon',
+            'wctablet': 'ChardevCommon',
+            'braille': 'ChardevCommon',
+            'testdev': 'ChardevCommon',
+            'stdio': 'ChardevStdio',
+            'console': 'ChardevCommon',
+            'spicevmc': { 'type': 'ChardevSpiceChannel',
+                          'if': 'defined(CONFIG_SPICE)' },
+            'spiceport': { 'type': 'ChardevSpicePort',
+                           'if': 'defined(CONFIG_SPICE)' },
+            'vc': 'ChardevVC',
+            'ringbuf': 'ChardevRingbuf',
+            # next one is just for compatibility
+            'memory': 'ChardevRingbuf' } }
+
+##
+# @ChardevReturn:
+#
+# Return info about the chardev backend just created.
+#
+# @pty: name of the slave pseudoterminal device, present if
+#       and only if a chardev of type 'pty' was created
+#
+# Since: 1.4
+##
+{ 'struct' : 'ChardevReturn',
+  'data': { '*pty': 'str' } }
+
+##
+# @chardev-add:
+#
+# Add a character device backend
+#
+# @id: the chardev's ID, must be unique
+# @backend: backend type and parameters
+#
+# Returns: ChardevReturn.
+#
+# Since: 1.4
+#
+# Example:
+#
+# -> { "execute" : "chardev-add",
+#      "arguments" : { "id" : "foo",
+#                      "backend" : { "type" : "null", "data" : {} } } }
+# <- { "return": {} }
+#
+# -> { "execute" : "chardev-add",
+#      "arguments" : { "id" : "bar",
+#                      "backend" : { "type" : "file",
+#                                    "data" : { "out" : "/tmp/bar.log" } } } }
+# <- { "return": {} }
+#
+# -> { "execute" : "chardev-add",
+#      "arguments" : { "id" : "baz",
+#                      "backend" : { "type" : "pty", "data" : {} } } }
+# <- { "return": { "pty" : "/dev/pty/42" } }
+#
+##
+{ 'command': 'chardev-add',
+  'data': { 'id': 'str',
+            'backend': 'ChardevBackend' },
+  'returns': 'ChardevReturn' }
+
+##
+# @chardev-change:
+#
+# Change a character device backend
+#
+# @id: the chardev's ID, must exist
+# @backend: new backend type and parameters
+#
+# Returns: ChardevReturn.
+#
+# Since: 2.10
+#
+# Example:
+#
+# -> { "execute" : "chardev-change",
+#      "arguments" : { "id" : "baz",
+#                      "backend" : { "type" : "pty", "data" : {} } } }
+# <- { "return": { "pty" : "/dev/pty/42" } }
+#
+# -> {"execute" : "chardev-change",
+#     "arguments" : {
+#         "id" : "charchannel2",
+#         "backend" : {
+#             "type" : "socket",
+#             "data" : {
+#                 "addr" : {
+#                     "type" : "unix" ,
+#                     "data" : {
+#                         "path" : "/tmp/charchannel2.socket"
+#                     }
+#                  },
+#                  "server" : true,
+#                  "wait" : false }}}}
+# <- {"return": {}}
+#
+##
+{ 'command': 'chardev-change',
+  'data': { 'id': 'str',
+            'backend': 'ChardevBackend' },
+  'returns': 'ChardevReturn' }
+
+##
+# @chardev-remove:
+#
+# Remove a character device backend
+#
+# @id: the chardev's ID, must exist and not be in use
+#
+# Returns: Nothing on success
+#
+# Since: 1.4
+#
+# Example:
+#
+# -> { "execute": "chardev-remove", "arguments": { "id" : "foo" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'chardev-remove',
+  'data': { 'id': 'str' } }
+
+##
+# @chardev-send-break:
+#
+# Send a break to a character device
+#
+# @id: the chardev's ID, must exist
+#
+# Returns: Nothing on success
+#
+# Since: 2.10
+#
+# Example:
+#
+# -> { "execute": "chardev-send-break", "arguments": { "id" : "foo" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'chardev-send-break',
+  'data': { 'id': 'str' } }
+
+##
+# @VSERPORT_CHANGE:
+#
+# Emitted when the guest opens or closes a virtio-serial port.
+#
+# @id: device identifier of the virtio-serial port
+#
+# @open: true if the guest has opened the virtio-serial port
+#
+# Note: This event is rate-limited.
+#
+# Since: 2.1
+#
+# Example:
+#
+# <- { "event": "VSERPORT_CHANGE",
+#      "data": { "id": "channel0", "open": true },
+#      "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
+#
+##
+{ 'event': 'VSERPORT_CHANGE',
+  'data': { 'id': 'str',
+            'open': 'bool' } }
diff --git a/qapi/common.json b/qapi/common.json
new file mode 100644
index 000000000..716712d4b
--- /dev/null
+++ b/qapi/common.json
@@ -0,0 +1,147 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = Common data types
+##
+
+##
+# @IoOperationType:
+#
+# An enumeration of the I/O operation types
+#
+# @read: read operation
+#
+# @write: write operation
+#
+# Since: 2.1
+##
+{ 'enum': 'IoOperationType',
+  'data': [ 'read', 'write' ] }
+
+##
+# @OnOffAuto:
+#
+# An enumeration of three options: on, off, and auto
+#
+# @auto: QEMU selects the value between on and off
+#
+# @on: Enabled
+#
+# @off: Disabled
+#
+# Since: 2.2
+##
+{ 'enum': 'OnOffAuto',
+  'data': [ 'auto', 'on', 'off' ] }
+
+##
+# @OnOffSplit:
+#
+# An enumeration of three values: on, off, and split
+#
+# @on: Enabled
+#
+# @off: Disabled
+#
+# @split: Mixed
+#
+# Since: 2.6
+##
+{ 'enum': 'OnOffSplit',
+  'data': [ 'on', 'off', 'split' ] }
+
+##
+# @String:
+#
+# A fat type wrapping 'str', to be embedded in lists.
+#
+# Since: 1.2
+##
+{ 'struct': 'String',
+  'data': {
+    'str': 'str' } }
+
+##
+# @StrOrNull:
+#
+# This is a string value or the explicit lack of a string (null
+# pointer in C).  Intended for cases when 'optional absent' already
+# has a different meaning.
+#
+# @s: the string value
+# @n: no string value
+#
+# Since: 2.10
+##
+{ 'alternate': 'StrOrNull',
+  'data': { 's': 'str',
+            'n': 'null' } }
+
+##
+# @OffAutoPCIBAR:
+#
+# An enumeration of options for specifying a PCI BAR
+#
+# @off: The specified feature is disabled
+#
+# @auto: The PCI BAR for the feature is automatically selected
+#
+# @bar0: PCI BAR0 is used for the feature
+#
+# @bar1: PCI BAR1 is used for the feature
+#
+# @bar2: PCI BAR2 is used for the feature
+#
+# @bar3: PCI BAR3 is used for the feature
+#
+# @bar4: PCI BAR4 is used for the feature
+#
+# @bar5: PCI BAR5 is used for the feature
+#
+# Since: 2.12
+##
+{ 'enum': 'OffAutoPCIBAR',
+  'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] }
+
+##
+# @PCIELinkSpeed:
+#
+# An enumeration of PCIe link speeds in units of GT/s
+#
+# @2_5: 2.5GT/s
+#
+# @5: 5.0GT/s
+#
+# @8: 8.0GT/s
+#
+# @16: 16.0GT/s
+#
+# Since: 4.0
+##
+{ 'enum': 'PCIELinkSpeed',
+  'data': [ '2_5', '5', '8', '16' ] }
+
+##
+# @PCIELinkWidth:
+#
+# An enumeration of PCIe link width
+#
+# @1: x1
+#
+# @2: x2
+#
+# @4: x4
+#
+# @8: x8
+#
+# @12: x12
+#
+# @16: x16
+#
+# @32: x32
+#
+# Since: 4.0
+##
+{ 'enum': 'PCIELinkWidth',
+  'data': [ '1', '2', '4', '8', '12', '16', '32' ] }
diff --git a/qapi/control.json b/qapi/control.json
new file mode 100644
index 000000000..134f842ba
--- /dev/null
+++ b/qapi/control.json
@@ -0,0 +1,259 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = QMP monitor control
+##
+
+##
+# @qmp_capabilities:
+#
+# Enable QMP capabilities.
+#
+# Arguments:
+#
+# @enable: An optional list of QMPCapability values to enable.  The
+#          client must not enable any capability that is not
+#          mentioned in the QMP greeting message.  If the field is not
+#          provided, it means no QMP capabilities will be enabled.
+#          (since 2.12)
+#
+# Example:
+#
+# -> { "execute": "qmp_capabilities",
+#      "arguments": { "enable": [ "oob" ] } }
+# <- { "return": {} }
+#
+# Notes: This command is valid exactly when first connecting: it must be
+#        issued before any other command will be accepted, and will fail once the
+#        monitor is accepting other commands. (see qemu docs/interop/qmp-spec.txt)
+#
+#        The QMP client needs to explicitly enable QMP capabilities, otherwise
+#        all the QMP capabilities will be turned off by default.
+#
+# Since: 0.13
+#
+##
+{ 'command': 'qmp_capabilities',
+  'data': { '*enable': [ 'QMPCapability' ] },
+  'allow-preconfig': true }
+
+##
+# @QMPCapability:
+#
+# Enumeration of capabilities to be advertised during initial client
+# connection, used for agreeing on particular QMP extension behaviors.
+#
+# @oob: QMP ability to support out-of-band requests.
+#       (Please refer to qmp-spec.txt for more information on OOB)
+#
+# Since: 2.12
+#
+##
+{ 'enum': 'QMPCapability',
+  'data': [ 'oob' ] }
+
+##
+# @VersionTriple:
+#
+# A three-part version number.
+#
+# @major: The major version number.
+#
+# @minor: The minor version number.
+#
+# @micro: The micro version number.
+#
+# Since: 2.4
+##
+{ 'struct': 'VersionTriple',
+  'data': {'major': 'int', 'minor': 'int', 'micro': 'int'} }
+
+
+##
+# @VersionInfo:
+#
+# A description of QEMU's version.
+#
+# @qemu: The version of QEMU.  By current convention, a micro
+#        version of 50 signifies a development branch.  A micro version
+#        greater than or equal to 90 signifies a release candidate for
+#        the next minor version.  A micro version of less than 50
+#        signifies a stable release.
+#
+# @package: QEMU will always set this field to an empty string.  Downstream
+#           versions of QEMU should set this to a non-empty string.  The
+#           exact format depends on the downstream however it highly
+#           recommended that a unique name is used.
+#
+# Since: 0.14.0
+##
+{ 'struct': 'VersionInfo',
+  'data': {'qemu': 'VersionTriple', 'package': 'str'} }
+
+##
+# @query-version:
+#
+# Returns the current version of QEMU.
+#
+# Returns: A @VersionInfo object describing the current version of QEMU.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-version" }
+# <- {
+#       "return":{
+#          "qemu":{
+#             "major":0,
+#             "minor":11,
+#             "micro":5
+#          },
+#          "package":""
+#       }
+#    }
+#
+##
+{ 'command': 'query-version', 'returns': 'VersionInfo',
+  'allow-preconfig': true }
+
+##
+# @CommandInfo:
+#
+# Information about a QMP command
+#
+# @name: The command name
+#
+# Since: 0.14.0
+##
+{ 'struct': 'CommandInfo', 'data': {'name': 'str'} }
+
+##
+# @query-commands:
+#
+# Return a list of supported QMP commands by this server
+#
+# Returns: A list of @CommandInfo for all supported commands
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-commands" }
+# <- {
+#      "return":[
+#         {
+#            "name":"query-balloon"
+#         },
+#         {
+#            "name":"system_powerdown"
+#         }
+#      ]
+#    }
+#
+# Note: This example has been shortened as the real response is too long.
+#
+##
+{ 'command': 'query-commands', 'returns': ['CommandInfo'],
+  'allow-preconfig': true }
+
+##
+# @EventInfo:
+#
+# Information about a QMP event
+#
+# @name: The event name
+#
+# Since: 1.2.0
+##
+{ 'struct': 'EventInfo', 'data': {'name': 'str'} }
+
+##
+# @query-events:
+#
+# Return information on QMP events.
+#
+# Features:
+# @deprecated: This command is deprecated, because its output doesn't
+#              reflect compile-time configuration.  Use 'query-qmp-schema'
+#              instead.
+#
+# Returns: A list of @EventInfo.
+#
+# Since: 1.2.0
+#
+# Example:
+#
+# -> { "execute": "query-events" }
+# <- {
+#      "return": [
+#          {
+#             "name":"SHUTDOWN"
+#          },
+#          {
+#             "name":"RESET"
+#          }
+#       ]
+#    }
+#
+# Note: This example has been shortened as the real response is too long.
+#
+##
+{ 'command': 'query-events', 'returns': ['EventInfo'],
+  'features': [ 'deprecated' ] }
+
+##
+# @quit:
+#
+# This command will cause the QEMU process to exit gracefully.  While every
+# attempt is made to send the QMP response before terminating, this is not
+# guaranteed.  When using this interface, a premature EOF would not be
+# unexpected.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "quit" }
+# <- { "return": {} }
+##
+{ 'command': 'quit' }
+
+##
+# @MonitorMode:
+#
+# An enumeration of monitor modes.
+#
+# @readline: HMP monitor (human-oriented command line interface)
+#
+# @control: QMP monitor (JSON-based machine interface)
+#
+# Since: 5.0
+##
+{ 'enum': 'MonitorMode', 'data': [ 'readline', 'control' ] }
+
+##
+# @MonitorOptions:
+#
+# Options to be used for adding a new monitor.
+#
+# @id:          Name of the monitor
+#
+# @mode:        Selects the monitor mode (default: readline in the system
+#               emulator, control in qemu-storage-daemon)
+#
+# @pretty:      Enables pretty printing (QMP only)
+#
+# @chardev:     Name of a character device to expose the monitor on
+#
+# Since: 5.0
+##
+{ 'struct': 'MonitorOptions',
+  'data': {
+      '*id': 'str',
+      '*mode': 'MonitorMode',
+      '*pretty': 'bool',
+      'chardev': 'str'
+  } }
diff --git a/qapi/crypto.json b/qapi/crypto.json
new file mode 100644
index 000000000..2aebe6fa2
--- /dev/null
+++ b/qapi/crypto.json
@@ -0,0 +1,383 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Cryptography
+##
+
+##
+# @QCryptoTLSCredsEndpoint:
+#
+# The type of network endpoint that will be using the credentials.
+# Most types of credential require different setup / structures
+# depending on whether they will be used in a server versus a
+# client.
+#
+# @client: the network endpoint is acting as the client
+#
+# @server: the network endpoint is acting as the server
+#
+# Since: 2.5
+##
+{ 'enum': 'QCryptoTLSCredsEndpoint',
+  'prefix': 'QCRYPTO_TLS_CREDS_ENDPOINT',
+  'data': ['client', 'server']}
+
+
+##
+# @QCryptoSecretFormat:
+#
+# The data format that the secret is provided in
+#
+# @raw: raw bytes. When encoded in JSON only valid UTF-8 sequences can be used
+# @base64: arbitrary base64 encoded binary data
+# Since: 2.6
+##
+{ 'enum': 'QCryptoSecretFormat',
+  'prefix': 'QCRYPTO_SECRET_FORMAT',
+  'data': ['raw', 'base64']}
+
+
+##
+# @QCryptoHashAlgorithm:
+#
+# The supported algorithms for computing content digests
+#
+# @md5: MD5. Should not be used in any new code, legacy compat only
+# @sha1: SHA-1. Should not be used in any new code, legacy compat only
+# @sha224: SHA-224. (since 2.7)
+# @sha256: SHA-256. Current recommended strong hash.
+# @sha384: SHA-384. (since 2.7)
+# @sha512: SHA-512. (since 2.7)
+# @ripemd160: RIPEMD-160. (since 2.7)
+# Since: 2.6
+##
+{ 'enum': 'QCryptoHashAlgorithm',
+  'prefix': 'QCRYPTO_HASH_ALG',
+  'data': ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'ripemd160']}
+
+
+##
+# @QCryptoCipherAlgorithm:
+#
+# The supported algorithms for content encryption ciphers
+#
+# @aes-128: AES with 128 bit / 16 byte keys
+# @aes-192: AES with 192 bit / 24 byte keys
+# @aes-256: AES with 256 bit / 32 byte keys
+# @des-rfb: RFB specific variant of single DES. Do not use except in VNC.
+# @3des: 3DES(EDE) with 192 bit / 24 byte keys (since 2.9)
+# @cast5-128: Cast5 with 128 bit / 16 byte keys
+# @serpent-128: Serpent with 128 bit / 16 byte keys
+# @serpent-192: Serpent with 192 bit / 24 byte keys
+# @serpent-256: Serpent with 256 bit / 32 byte keys
+# @twofish-128: Twofish with 128 bit / 16 byte keys
+# @twofish-192: Twofish with 192 bit / 24 byte keys
+# @twofish-256: Twofish with 256 bit / 32 byte keys
+# Since: 2.6
+##
+{ 'enum': 'QCryptoCipherAlgorithm',
+  'prefix': 'QCRYPTO_CIPHER_ALG',
+  'data': ['aes-128', 'aes-192', 'aes-256',
+           'des-rfb', '3des',
+           'cast5-128',
+           'serpent-128', 'serpent-192', 'serpent-256',
+           'twofish-128', 'twofish-192', 'twofish-256']}
+
+
+##
+# @QCryptoCipherMode:
+#
+# The supported modes for content encryption ciphers
+#
+# @ecb: Electronic Code Book
+# @cbc: Cipher Block Chaining
+# @xts: XEX with tweaked code book and ciphertext stealing
+# @ctr: Counter (Since 2.8)
+# Since: 2.6
+##
+{ 'enum': 'QCryptoCipherMode',
+  'prefix': 'QCRYPTO_CIPHER_MODE',
+  'data': ['ecb', 'cbc', 'xts', 'ctr']}
+
+
+##
+# @QCryptoIVGenAlgorithm:
+#
+# The supported algorithms for generating initialization
+# vectors for full disk encryption. The 'plain' generator
+# should not be used for disks with sector numbers larger
+# than 2^32, except where compatibility with pre-existing
+# Linux dm-crypt volumes is required.
+#
+# @plain: 64-bit sector number truncated to 32-bits
+# @plain64: 64-bit sector number
+# @essiv: 64-bit sector number encrypted with a hash of the encryption key
+# Since: 2.6
+##
+{ 'enum': 'QCryptoIVGenAlgorithm',
+  'prefix': 'QCRYPTO_IVGEN_ALG',
+  'data': ['plain', 'plain64', 'essiv']}
+
+##
+# @QCryptoBlockFormat:
+#
+# The supported full disk encryption formats
+#
+# @qcow: QCow/QCow2 built-in AES-CBC encryption. Use only
+#        for liberating data from old images.
+# @luks: LUKS encryption format. Recommended for new images
+#
+# Since: 2.6
+##
+{ 'enum': 'QCryptoBlockFormat',
+#  'prefix': 'QCRYPTO_BLOCK_FORMAT',
+  'data': ['qcow', 'luks']}
+
+##
+# @QCryptoBlockOptionsBase:
+#
+# The common options that apply to all full disk
+# encryption formats
+#
+# @format: the encryption format
+#
+# Since: 2.6
+##
+{ 'struct': 'QCryptoBlockOptionsBase',
+  'data': { 'format': 'QCryptoBlockFormat' }}
+
+##
+# @QCryptoBlockOptionsQCow:
+#
+# The options that apply to QCow/QCow2 AES-CBC encryption format
+#
+# @key-secret: the ID of a QCryptoSecret object providing the
+#              decryption key. Mandatory except when probing image for
+#              metadata only.
+#
+# Since: 2.6
+##
+{ 'struct': 'QCryptoBlockOptionsQCow',
+  'data': { '*key-secret': 'str' }}
+
+##
+# @QCryptoBlockOptionsLUKS:
+#
+# The options that apply to LUKS encryption format
+#
+# @key-secret: the ID of a QCryptoSecret object providing the
+#              decryption key. Mandatory except when probing image for
+#              metadata only.
+# Since: 2.6
+##
+{ 'struct': 'QCryptoBlockOptionsLUKS',
+  'data': { '*key-secret': 'str' }}
+
+
+##
+# @QCryptoBlockCreateOptionsLUKS:
+#
+# The options that apply to LUKS encryption format initialization
+#
+# @cipher-alg: the cipher algorithm for data encryption
+#              Currently defaults to 'aes-256'.
+# @cipher-mode: the cipher mode for data encryption
+#               Currently defaults to 'xts'
+# @ivgen-alg: the initialization vector generator
+#             Currently defaults to 'plain64'
+# @ivgen-hash-alg: the initialization vector generator hash
+#                  Currently defaults to 'sha256'
+# @hash-alg: the master key hash algorithm
+#            Currently defaults to 'sha256'
+# @iter-time: number of milliseconds to spend in
+#             PBKDF passphrase processing. Currently defaults
+#             to 2000. (since 2.8)
+# Since: 2.6
+##
+{ 'struct': 'QCryptoBlockCreateOptionsLUKS',
+  'base': 'QCryptoBlockOptionsLUKS',
+  'data': { '*cipher-alg': 'QCryptoCipherAlgorithm',
+            '*cipher-mode': 'QCryptoCipherMode',
+            '*ivgen-alg': 'QCryptoIVGenAlgorithm',
+            '*ivgen-hash-alg': 'QCryptoHashAlgorithm',
+            '*hash-alg': 'QCryptoHashAlgorithm',
+            '*iter-time': 'int'}}
+
+
+##
+# @QCryptoBlockOpenOptions:
+#
+# The options that are available for all encryption formats
+# when opening an existing volume
+#
+# Since: 2.6
+##
+{ 'union': 'QCryptoBlockOpenOptions',
+  'base': 'QCryptoBlockOptionsBase',
+  'discriminator': 'format',
+  'data': { 'qcow': 'QCryptoBlockOptionsQCow',
+            'luks': 'QCryptoBlockOptionsLUKS' } }
+
+
+##
+# @QCryptoBlockCreateOptions:
+#
+# The options that are available for all encryption formats
+# when initializing a new volume
+#
+# Since: 2.6
+##
+{ 'union': 'QCryptoBlockCreateOptions',
+  'base': 'QCryptoBlockOptionsBase',
+  'discriminator': 'format',
+  'data': { 'qcow': 'QCryptoBlockOptionsQCow',
+            'luks': 'QCryptoBlockCreateOptionsLUKS' } }
+
+
+##
+# @QCryptoBlockInfoBase:
+#
+# The common information that applies to all full disk
+# encryption formats
+#
+# @format: the encryption format
+#
+# Since: 2.7
+##
+{ 'struct': 'QCryptoBlockInfoBase',
+  'data': { 'format': 'QCryptoBlockFormat' }}
+
+
+##
+# @QCryptoBlockInfoLUKSSlot:
+#
+# Information about the LUKS block encryption key
+# slot options
+#
+# @active: whether the key slot is currently in use
+# @key-offset: offset to the key material in bytes
+# @iters: number of PBKDF2 iterations for key material
+# @stripes: number of stripes for splitting key material
+#
+# Since: 2.7
+##
+{ 'struct': 'QCryptoBlockInfoLUKSSlot',
+  'data': {'active': 'bool',
+           '*iters': 'int',
+           '*stripes': 'int',
+           'key-offset': 'int' } }
+
+
+##
+# @QCryptoBlockInfoLUKS:
+#
+# Information about the LUKS block encryption options
+#
+# @cipher-alg: the cipher algorithm for data encryption
+# @cipher-mode: the cipher mode for data encryption
+# @ivgen-alg: the initialization vector generator
+# @ivgen-hash-alg: the initialization vector generator hash
+# @hash-alg: the master key hash algorithm
+# @payload-offset: offset to the payload data in bytes
+# @master-key-iters: number of PBKDF2 iterations for key material
+# @uuid: unique identifier for the volume
+# @slots: information about each key slot
+#
+# Since: 2.7
+##
+{ 'struct': 'QCryptoBlockInfoLUKS',
+  'data': {'cipher-alg': 'QCryptoCipherAlgorithm',
+           'cipher-mode': 'QCryptoCipherMode',
+           'ivgen-alg': 'QCryptoIVGenAlgorithm',
+           '*ivgen-hash-alg': 'QCryptoHashAlgorithm',
+           'hash-alg': 'QCryptoHashAlgorithm',
+           'payload-offset': 'int',
+           'master-key-iters': 'int',
+           'uuid': 'str',
+           'slots': [ 'QCryptoBlockInfoLUKSSlot' ] }}
+
+##
+# @QCryptoBlockInfo:
+#
+# Information about the block encryption options
+#
+# Since: 2.7
+##
+{ 'union': 'QCryptoBlockInfo',
+  'base': 'QCryptoBlockInfoBase',
+  'discriminator': 'format',
+  'data': { 'luks': 'QCryptoBlockInfoLUKS' } }
+
+##
+# @QCryptoBlockLUKSKeyslotState:
+#
+# Defines state of keyslots that are affected by the update
+#
+# @active:    The slots contain the given password and marked as active
+# @inactive:  The slots are erased (contain garbage) and marked as inactive
+#
+# Since: 5.1
+##
+{ 'enum': 'QCryptoBlockLUKSKeyslotState',
+  'data': [ 'active', 'inactive' ] }
+
+
+##
+# @QCryptoBlockAmendOptionsLUKS:
+#
+# This struct defines the update parameters that activate/de-activate set
+# of keyslots
+#
+# @state: the desired state of the keyslots
+#
+# @new-secret:    The ID of a QCryptoSecret object providing the password to be
+#                 written into added active keyslots
+#
+# @old-secret:    Optional (for deactivation only)
+#                 If given will deactivate all keyslots that
+#                 match password located in QCryptoSecret with this ID
+#
+# @iter-time:     Optional (for activation only)
+#                 Number of milliseconds to spend in
+#                 PBKDF passphrase processing for the newly activated keyslot.
+#                 Currently defaults to 2000.
+#
+# @keyslot:       Optional. ID of the keyslot to activate/deactivate.
+#                 For keyslot activation, keyslot should not be active already
+#                 (this is unsafe to update an active keyslot),
+#                 but possible if 'force' parameter is given.
+#                 If keyslot is not given, first free keyslot will be written.
+#
+#                 For keyslot deactivation, this parameter specifies the exact
+#                 keyslot to deactivate
+#
+# @secret:        Optional. The ID of a QCryptoSecret object providing the
+#                 password to use to retrieve current master key.
+#                 Defaults to the same secret that was used to open the image
+#
+#
+# Since 5.1
+##
+{ 'struct': 'QCryptoBlockAmendOptionsLUKS',
+  'data': { 'state': 'QCryptoBlockLUKSKeyslotState',
+            '*new-secret': 'str',
+            '*old-secret': 'str',
+            '*keyslot': 'int',
+            '*iter-time': 'int',
+            '*secret': 'str' } }
+
+##
+# @QCryptoBlockAmendOptions:
+#
+# The options that are available for all encryption formats
+# when amending encryption settings
+#
+# Since: 5.1
+##
+{ 'union': 'QCryptoBlockAmendOptions',
+  'base': 'QCryptoBlockOptionsBase',
+  'discriminator': 'format',
+  'data': {
+          'luks': 'QCryptoBlockAmendOptionsLUKS' } }
diff --git a/qapi/dump.json b/qapi/dump.json
new file mode 100644
index 000000000..f7c4267e3
--- /dev/null
+++ b/qapi/dump.json
@@ -0,0 +1,201 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Dump guest memory
+##
+
+##
+# @DumpGuestMemoryFormat:
+#
+# An enumeration of guest-memory-dump's format.
+#
+# @elf: elf format
+#
+# @kdump-zlib: kdump-compressed format with zlib-compressed
+#
+# @kdump-lzo: kdump-compressed format with lzo-compressed
+#
+# @kdump-snappy: kdump-compressed format with snappy-compressed
+#
+# @win-dmp: Windows full crashdump format,
+#           can be used instead of ELF converting (since 2.13)
+#
+# Since: 2.0
+##
+{ 'enum': 'DumpGuestMemoryFormat',
+  'data': [ 'elf', 'kdump-zlib', 'kdump-lzo', 'kdump-snappy', 'win-dmp' ] }
+
+##
+# @dump-guest-memory:
+#
+# Dump guest's memory to vmcore. It is a synchronous operation that can take
+# very long depending on the amount of guest memory.
+#
+# @paging: if true, do paging to get guest's memory mapping. This allows
+#          using gdb to process the core file.
+#
+#          IMPORTANT: this option can make QEMU allocate several gigabytes
+#          of RAM. This can happen for a large guest, or a
+#          malicious guest pretending to be large.
+#
+#          Also, paging=true has the following limitations:
+#
+#             1. The guest may be in a catastrophic state or can have corrupted
+#                memory, which cannot be trusted
+#             2. The guest can be in real-mode even if paging is enabled. For
+#                example, the guest uses ACPI to sleep, and ACPI sleep state
+#                goes in real-mode
+#             3. Currently only supported on i386 and x86_64.
+#
+# @protocol: the filename or file descriptor of the vmcore. The supported
+#            protocols are:
+#
+#            1. file: the protocol starts with "file:", and the following
+#               string is the file's path.
+#            2. fd: the protocol starts with "fd:", and the following string
+#               is the fd's name.
+#
+# @detach: if true, QMP will return immediately rather than
+#          waiting for the dump to finish. The user can track progress
+#          using "query-dump". (since 2.6).
+#
+# @begin: if specified, the starting physical address.
+#
+# @length: if specified, the memory size, in bytes. If you don't
+#          want to dump all guest's memory, please specify the start @begin
+#          and @length
+#
+# @format: if specified, the format of guest memory dump. But non-elf
+#          format is conflict with paging and filter, ie. @paging, @begin and
+#          @length is not allowed to be specified with non-elf @format at the
+#          same time (since 2.0)
+#
+# Note: All boolean arguments default to false
+#
+# Returns: nothing on success
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "dump-guest-memory",
+#      "arguments": { "protocol": "fd:dump" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'dump-guest-memory',
+  'data': { 'paging': 'bool', 'protocol': 'str', '*detach': 'bool',
+            '*begin': 'int', '*length': 'int',
+            '*format': 'DumpGuestMemoryFormat'} }
+
+##
+# @DumpStatus:
+#
+# Describe the status of a long-running background guest memory dump.
+#
+# @none: no dump-guest-memory has started yet.
+#
+# @active: there is one dump running in background.
+#
+# @completed: the last dump has finished successfully.
+#
+# @failed: the last dump has failed.
+#
+# Since: 2.6
+##
+{ 'enum': 'DumpStatus',
+  'data': [ 'none', 'active', 'completed', 'failed' ] }
+
+##
+# @DumpQueryResult:
+#
+# The result format for 'query-dump'.
+#
+# @status: enum of @DumpStatus, which shows current dump status
+#
+# @completed: bytes written in latest dump (uncompressed)
+#
+# @total: total bytes to be written in latest dump (uncompressed)
+#
+# Since: 2.6
+##
+{ 'struct': 'DumpQueryResult',
+  'data': { 'status': 'DumpStatus',
+            'completed': 'int',
+            'total': 'int' } }
+
+##
+# @query-dump:
+#
+# Query latest dump status.
+#
+# Returns: A @DumpStatus object showing the dump status.
+#
+# Since: 2.6
+#
+# Example:
+#
+# -> { "execute": "query-dump" }
+# <- { "return": { "status": "active", "completed": 1024000,
+#                  "total": 2048000 } }
+#
+##
+{ 'command': 'query-dump', 'returns': 'DumpQueryResult' }
+
+##
+# @DUMP_COMPLETED:
+#
+# Emitted when background dump has completed
+#
+# @result: final dump status
+#
+# @error: human-readable error string that provides
+#         hint on why dump failed. Only presents on failure. The
+#         user should not try to interpret the error string.
+#
+# Since: 2.6
+#
+# Example:
+#
+# { "event": "DUMP_COMPLETED",
+#   "data": {"result": {"total": 1090650112, "status": "completed",
+#                       "completed": 1090650112} } }
+#
+##
+{ 'event': 'DUMP_COMPLETED' ,
+  'data': { 'result': 'DumpQueryResult', '*error': 'str' } }
+
+##
+# @DumpGuestMemoryCapability:
+#
+# A list of the available formats for dump-guest-memory
+#
+# Since: 2.0
+##
+{ 'struct': 'DumpGuestMemoryCapability',
+  'data': {
+      'formats': ['DumpGuestMemoryFormat'] } }
+
+##
+# @query-dump-guest-memory-capability:
+#
+# Returns the available formats for dump-guest-memory
+#
+# Returns:  A @DumpGuestMemoryCapability object listing available formats for
+#           dump-guest-memory
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "query-dump-guest-memory-capability" }
+# <- { "return": { "formats":
+#                  ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] }
+#
+##
+{ 'command': 'query-dump-guest-memory-capability',
+  'returns': 'DumpGuestMemoryCapability' }
diff --git a/qapi/error.json b/qapi/error.json
new file mode 100644
index 000000000..94a6502de
--- /dev/null
+++ b/qapi/error.json
@@ -0,0 +1,30 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = QMP errors
+##
+
+##
+# @QapiErrorClass:
+#
+# QEMU error classes
+#
+# @GenericError: this is used for errors that don't require a specific error
+#                class. This should be the default case for most errors
+#
+# @CommandNotFound: the requested command has not been found
+#
+# @DeviceNotActive: a device has failed to be become active
+#
+# @DeviceNotFound: the requested device has not been found
+#
+# @KVMMissingCap: the requested operation can't be fulfilled because a
+#                 required KVM capability is missing
+#
+# Since: 1.2
+##
+{ 'enum': 'QapiErrorClass',
+  # Keep this in sync with ErrorClass in error.h
+  'data': [ 'GenericError', 'CommandNotFound',
+            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
diff --git a/qapi/introspect.json b/qapi/introspect.json
new file mode 100644
index 000000000..944bb87a2
--- /dev/null
+++ b/qapi/introspect.json
@@ -0,0 +1,295 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = QMP introspection
+##
+
+##
+# @query-qmp-schema:
+#
+# Command query-qmp-schema exposes the QMP wire ABI as an array of
+# SchemaInfo.  This lets QMP clients figure out what commands and
+# events are available in this QEMU, and their parameters and results.
+#
+# However, the SchemaInfo can't reflect all the rules and restrictions
+# that apply to QMP.  It's interface introspection (figuring out
+# what's there), not interface specification.  The specification is in
+# the QAPI schema.
+#
+# Furthermore, while we strive to keep the QMP wire format
+# backwards-compatible across qemu versions, the introspection output
+# is not guaranteed to have the same stability.  For example, one
+# version of qemu may list an object member as an optional
+# non-variant, while another lists the same member only through the
+# object's variants; or the type of a member may change from a generic
+# string into a specific enum or from one specific type into an
+# alternate that includes the original type alongside something else.
+#
+# Returns: array of @SchemaInfo, where each element describes an
+#          entity in the ABI: command, event, type, ...
+#
+#          The order of the various SchemaInfo is unspecified; however, all
+#          names are guaranteed to be unique (no name will be duplicated with
+#          different meta-types).
+#
+# Note: the QAPI schema is also used to help define *internal*
+#       interfaces, by defining QAPI types.  These are not part of the QMP
+#       wire ABI, and therefore not returned by this command.
+#
+# Since: 2.5
+##
+{ 'command': 'query-qmp-schema',
+  'returns': [ 'SchemaInfo' ],
+  'gen': false }                # just to simplify qmp_query_json()
+
+##
+# @SchemaMetaType:
+#
+# This is a @SchemaInfo's meta type, i.e. the kind of entity it
+# describes.
+#
+# @builtin: a predefined type such as 'int' or 'bool'.
+#
+# @enum: an enumeration type
+#
+# @array: an array type
+#
+# @object: an object type (struct or union)
+#
+# @alternate: an alternate type
+#
+# @command: a QMP command
+#
+# @event: a QMP event
+#
+# Since: 2.5
+##
+{ 'enum': 'SchemaMetaType',
+  'data': [ 'builtin', 'enum', 'array', 'object', 'alternate',
+            'command', 'event' ] }
+
+##
+# @SchemaInfo:
+#
+# @name: the entity's name, inherited from @base.
+#        The SchemaInfo is always referenced by this name.
+#        Commands and events have the name defined in the QAPI schema.
+#        Unlike command and event names, type names are not part of
+#        the wire ABI.  Consequently, type names are meaningless
+#        strings here, although they are still guaranteed unique
+#        regardless of @meta-type.
+#
+# @meta-type: the entity's meta type, inherited from @base.
+#
+# @features: names of features associated with the entity, in no
+#            particular order.
+#            (since 4.1 for object types, 4.2 for commands, 5.0 for
+#            the rest)
+#
+# Additional members depend on the value of @meta-type.
+#
+# Since: 2.5
+##
+{ 'union': 'SchemaInfo',
+  'base': { 'name': 'str', 'meta-type': 'SchemaMetaType',
+            '*features': [ 'str' ] },
+  'discriminator': 'meta-type',
+  'data': {
+      'builtin': 'SchemaInfoBuiltin',
+      'enum': 'SchemaInfoEnum',
+      'array': 'SchemaInfoArray',
+      'object': 'SchemaInfoObject',
+      'alternate': 'SchemaInfoAlternate',
+      'command': 'SchemaInfoCommand',
+      'event': 'SchemaInfoEvent' } }
+
+##
+# @SchemaInfoBuiltin:
+#
+# Additional SchemaInfo members for meta-type 'builtin'.
+#
+# @json-type: the JSON type used for this type on the wire.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoBuiltin',
+  'data': { 'json-type': 'JSONType' } }
+
+##
+# @JSONType:
+#
+# The four primitive and two structured types according to RFC 8259
+# section 1, plus 'int' (split off 'number'), plus the obvious top
+# type 'value'.
+#
+# Since: 2.5
+##
+{ 'enum': 'JSONType',
+  'data': [ 'string', 'number', 'int', 'boolean', 'null',
+            'object', 'array', 'value' ] }
+
+##
+# @SchemaInfoEnum:
+#
+# Additional SchemaInfo members for meta-type 'enum'.
+#
+# @values: the enumeration type's values, in no particular order.
+#
+# Values of this type are JSON string on the wire.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoEnum',
+  'data': { 'values': ['str'] } }
+
+##
+# @SchemaInfoArray:
+#
+# Additional SchemaInfo members for meta-type 'array'.
+#
+# @element-type: the array type's element type.
+#
+# Values of this type are JSON array on the wire.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoArray',
+  'data': { 'element-type': 'str' } }
+
+##
+# @SchemaInfoObject:
+#
+# Additional SchemaInfo members for meta-type 'object'.
+#
+# @members: the object type's (non-variant) members, in no particular order.
+#
+# @tag: the name of the member serving as type tag.
+#       An element of @members with this name must exist.
+#
+# @variants: variant members, i.e. additional members that
+#            depend on the type tag's value.  Present exactly when
+#            @tag is present.  The variants are in no particular order,
+#            and may even differ from the order of the values of the
+#            enum type of the @tag.
+#
+# Values of this type are JSON object on the wire.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoObject',
+  'data': { 'members': [ 'SchemaInfoObjectMember' ],
+            '*tag': 'str',
+            '*variants': [ 'SchemaInfoObjectVariant' ] } }
+
+##
+# @SchemaInfoObjectMember:
+#
+# An object member.
+#
+# @name: the member's name, as defined in the QAPI schema.
+#
+# @type: the name of the member's type.
+#
+# @default: default when used as command parameter.
+#           If absent, the parameter is mandatory.
+#           If present, the value must be null.  The parameter is
+#           optional, and behavior when it's missing is not specified
+#           here.
+#           Future extension: if present and non-null, the parameter
+#           is optional, and defaults to this value.
+#
+# @features: names of features associated with the member, in no
+#            particular order.  (since 5.0)
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoObjectMember',
+  'data': { 'name': 'str', 'type': 'str', '*default': 'any',
+# @default's type must be null or match @type
+            '*features': [ 'str' ] } }
+
+##
+# @SchemaInfoObjectVariant:
+#
+# The variant members for a value of the type tag.
+#
+# @case: a value of the type tag.
+#
+# @type: the name of the object type that provides the variant members
+#        when the type tag has value @case.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoObjectVariant',
+  'data': { 'case': 'str', 'type': 'str' } }
+
+##
+# @SchemaInfoAlternate:
+#
+# Additional SchemaInfo members for meta-type 'alternate'.
+#
+# @members: the alternate type's members, in no particular order.
+#           The members' wire encoding is distinct, see
+#           docs/devel/qapi-code-gen.txt section Alternate types.
+#
+# On the wire, this can be any of the members.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoAlternate',
+  'data': { 'members': [ 'SchemaInfoAlternateMember' ] } }
+
+##
+# @SchemaInfoAlternateMember:
+#
+# An alternate member.
+#
+# @type: the name of the member's type.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoAlternateMember',
+  'data': { 'type': 'str' } }
+
+##
+# @SchemaInfoCommand:
+#
+# Additional SchemaInfo members for meta-type 'command'.
+#
+# @arg-type: the name of the object type that provides the command's
+#            parameters.
+#
+# @ret-type: the name of the command's result type.
+#
+# @allow-oob: whether the command allows out-of-band execution,
+#             defaults to false (Since: 2.12)
+#
+# TODO: @success-response (currently irrelevant, because it's QGA, not QMP)
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoCommand',
+  'data': { 'arg-type': 'str', 'ret-type': 'str',
+            '*allow-oob': 'bool' } }
+
+##
+# @SchemaInfoEvent:
+#
+# Additional SchemaInfo members for meta-type 'event'.
+#
+# @arg-type: the name of the object type that provides the event's
+#            parameters.
+#
+# Since: 2.5
+##
+{ 'struct': 'SchemaInfoEvent',
+  'data': { 'arg-type': 'str' } }
diff --git a/qapi/job.json b/qapi/job.json
new file mode 100644
index 000000000..280c2f76f
--- /dev/null
+++ b/qapi/job.json
@@ -0,0 +1,259 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# == Background jobs
+##
+
+##
+# @JobType:
+#
+# Type of a background job.
+#
+# @commit: block commit job type, see "block-commit"
+#
+# @stream: block stream job type, see "block-stream"
+#
+# @mirror: drive mirror job type, see "drive-mirror"
+#
+# @backup: drive backup job type, see "drive-backup"
+#
+# @create: image creation job type, see "blockdev-create" (since 3.0)
+#
+# @amend: image options amend job type, see "x-blockdev-amend" (since 5.1)
+#
+# Since: 1.7
+##
+{ 'enum': 'JobType',
+  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend'] }
+
+##
+# @JobStatus:
+#
+# Indicates the present state of a given job in its lifetime.
+#
+# @undefined: Erroneous, default state. Should not ever be visible.
+#
+# @created: The job has been created, but not yet started.
+#
+# @running: The job is currently running.
+#
+# @paused: The job is running, but paused. The pause may be requested by
+#          either the QMP user or by internal processes.
+#
+# @ready: The job is running, but is ready for the user to signal completion.
+#         This is used for long-running jobs like mirror that are designed to
+#         run indefinitely.
+#
+# @standby: The job is ready, but paused. This is nearly identical to @paused.
+#           The job may return to @ready or otherwise be canceled.
+#
+# @waiting: The job is waiting for other jobs in the transaction to converge
+#           to the waiting state. This status will likely not be visible for
+#           the last job in a transaction.
+#
+# @pending: The job has finished its work, but has finalization steps that it
+#           needs to make prior to completing. These changes will require
+#           manual intervention via @job-finalize if auto-finalize was set to
+#           false. These pending changes may still fail.
+#
+# @aborting: The job is in the process of being aborted, and will finish with
+#            an error. The job will afterwards report that it is @concluded.
+#            This status may not be visible to the management process.
+#
+# @concluded: The job has finished all work. If auto-dismiss was set to false,
+#             the job will remain in the query list until it is dismissed via
+#             @job-dismiss.
+#
+# @null: The job is in the process of being dismantled. This state should not
+#        ever be visible externally.
+#
+# Since: 2.12
+##
+{ 'enum': 'JobStatus',
+  'data': ['undefined', 'created', 'running', 'paused', 'ready', 'standby',
+           'waiting', 'pending', 'aborting', 'concluded', 'null' ] }
+
+##
+# @JobVerb:
+#
+# Represents command verbs that can be applied to a job.
+#
+# @cancel: see @job-cancel
+#
+# @pause: see @job-pause
+#
+# @resume: see @job-resume
+#
+# @set-speed: see @block-job-set-speed
+#
+# @complete: see @job-complete
+#
+# @dismiss: see @job-dismiss
+#
+# @finalize: see @job-finalize
+#
+# Since: 2.12
+##
+{ 'enum': 'JobVerb',
+  'data': ['cancel', 'pause', 'resume', 'set-speed', 'complete', 'dismiss',
+           'finalize' ] }
+
+##
+# @JOB_STATUS_CHANGE:
+#
+# Emitted when a job transitions to a different status.
+#
+# @id: The job identifier
+# @status: The new job status
+#
+# Since: 3.0
+##
+{ 'event': 'JOB_STATUS_CHANGE',
+  'data': { 'id': 'str',
+            'status': 'JobStatus' } }
+
+##
+# @job-pause:
+#
+# Pause an active job.
+#
+# This command returns immediately after marking the active job for pausing.
+# Pausing an already paused job is an error.
+#
+# The job will pause as soon as possible, which means transitioning into the
+# PAUSED state if it was RUNNING, or into STANDBY if it was READY. The
+# corresponding JOB_STATUS_CHANGE event will be emitted.
+#
+# Cancelling a paused job automatically resumes it.
+#
+# @id: The job identifier.
+#
+# Since: 3.0
+##
+{ 'command': 'job-pause', 'data': { 'id': 'str' } }
+
+##
+# @job-resume:
+#
+# Resume a paused job.
+#
+# This command returns immediately after resuming a paused job. Resuming an
+# already running job is an error.
+#
+# @id : The job identifier.
+#
+# Since: 3.0
+##
+{ 'command': 'job-resume', 'data': { 'id': 'str' } }
+
+##
+# @job-cancel:
+#
+# Instruct an active background job to cancel at the next opportunity.
+# This command returns immediately after marking the active job for
+# cancellation.
+#
+# The job will cancel as soon as possible and then emit a JOB_STATUS_CHANGE
+# event. Usually, the status will change to ABORTING, but it is possible that
+# a job successfully completes (e.g. because it was almost done and there was
+# no opportunity to cancel earlier than completing the job) and transitions to
+# PENDING instead.
+#
+# @id: The job identifier.
+#
+# Since: 3.0
+##
+{ 'command': 'job-cancel', 'data': { 'id': 'str' } }
+
+
+##
+# @job-complete:
+#
+# Manually trigger completion of an active job in the READY state.
+#
+# @id: The job identifier.
+#
+# Since: 3.0
+##
+{ 'command': 'job-complete', 'data': { 'id': 'str' } }
+
+##
+# @job-dismiss:
+#
+# Deletes a job that is in the CONCLUDED state. This command only needs to be
+# run explicitly for jobs that don't have automatic dismiss enabled.
+#
+# This command will refuse to operate on any job that has not yet reached its
+# terminal state, JOB_STATUS_CONCLUDED. For jobs that make use of JOB_READY
+# event, job-cancel or job-complete will still need to be used as appropriate.
+#
+# @id: The job identifier.
+#
+# Since: 3.0
+##
+{ 'command': 'job-dismiss', 'data': { 'id': 'str' } }
+
+##
+# @job-finalize:
+#
+# Instructs all jobs in a transaction (or a single job if it is not part of any
+# transaction) to finalize any graph changes and do any necessary cleanup. This
+# command requires that all involved jobs are in the PENDING state.
+#
+# For jobs in a transaction, instructing one job to finalize will force
+# ALL jobs in the transaction to finalize, so it is only necessary to instruct
+# a single member job to finalize.
+#
+# @id: The identifier of any job in the transaction, or of a job that is not
+#      part of any transaction.
+#
+# Since: 3.0
+##
+{ 'command': 'job-finalize', 'data': { 'id': 'str' } }
+
+##
+# @JobInfo:
+#
+# Information about a job.
+#
+# @id: The job identifier
+#
+# @type: The kind of job that is being performed
+#
+# @status: Current job state/status
+#
+# @current-progress: Progress made until now. The unit is arbitrary and the
+#                    value can only meaningfully be used for the ratio of
+#                    @current-progress to @total-progress. The value is
+#                    monotonically increasing.
+#
+# @total-progress: Estimated @current-progress value at the completion of
+#                  the job. This value can arbitrarily change while the
+#                  job is running, in both directions.
+#
+# @error: If this field is present, the job failed; if it is
+#         still missing in the CONCLUDED state, this indicates
+#         successful completion.
+#
+#         The value is a human-readable error message to describe
+#         the reason for the job failure. It should not be parsed
+#         by applications.
+#
+# Since: 3.0
+##
+{ 'struct': 'JobInfo',
+  'data': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus',
+            'current-progress': 'int', 'total-progress': 'int',
+            '*error': 'str' } }
+
+##
+# @query-jobs:
+#
+# Return information about jobs.
+#
+# Returns: a list with a @JobInfo for each active job
+#
+# Since: 3.0
+##
+{ 'command': 'query-jobs', 'returns': ['JobInfo'] }
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
new file mode 100644
index 000000000..fec3bb867
--- /dev/null
+++ b/qapi/machine-target.json
@@ -0,0 +1,331 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# @CpuModelInfo:
+#
+# Virtual CPU model.
+#
+# A CPU model consists of the name of a CPU definition, to which
+# delta changes are applied (e.g. features added/removed). Most magic values
+# that an architecture might require should be hidden behind the name.
+# However, if required, architectures can expose relevant properties.
+#
+# @name: the name of the CPU definition the model is based on
+# @props: a dictionary of QOM properties to be applied
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelInfo',
+  'data': { 'name': 'str',
+            '*props': 'any' } }
+
+##
+# @CpuModelExpansionType:
+#
+# An enumeration of CPU model expansion types.
+#
+# @static: Expand to a static CPU model, a combination of a static base
+#          model name and property delta changes. As the static base model will
+#          never change, the expanded CPU model will be the same, independent of
+#          QEMU version, machine type, machine options, and accelerator options.
+#          Therefore, the resulting model can be used by tooling without having
+#          to specify a compatibility machine - e.g. when displaying the "host"
+#          model. The @static CPU models are migration-safe.
+
+# @full: Expand all properties. The produced model is not guaranteed to be
+#        migration-safe, but allows tooling to get an insight and work with
+#        model details.
+#
+# Note: When a non-migration-safe CPU model is expanded in static mode, some
+#       features enabled by the CPU model may be omitted, because they can't be
+#       implemented by a static CPU model definition (e.g. cache info passthrough and
+#       PMU passthrough in x86). If you need an accurate representation of the
+#       features enabled by a non-migration-safe CPU model, use @full. If you need a
+#       static representation that will keep ABI compatibility even when changing QEMU
+#       version or machine-type, use @static (but keep in mind that some features may
+#       be omitted).
+#
+# Since: 2.8.0
+##
+{ 'enum': 'CpuModelExpansionType',
+  'data': [ 'static', 'full' ] }
+
+
+##
+# @CpuModelCompareResult:
+#
+# An enumeration of CPU model comparison results. The result is usually
+# calculated using e.g. CPU features or CPU generations.
+#
+# @incompatible: If model A is incompatible to model B, model A is not
+#                guaranteed to run where model B runs and the other way around.
+#
+# @identical: If model A is identical to model B, model A is guaranteed to run
+#             where model B runs and the other way around.
+#
+# @superset: If model A is a superset of model B, model B is guaranteed to run
+#            where model A runs. There are no guarantees about the other way.
+#
+# @subset: If model A is a subset of model B, model A is guaranteed to run
+#          where model B runs. There are no guarantees about the other way.
+#
+# Since: 2.8.0
+##
+{ 'enum': 'CpuModelCompareResult',
+  'data': [ 'incompatible', 'identical', 'superset', 'subset' ] }
+
+##
+# @CpuModelBaselineInfo:
+#
+# The result of a CPU model baseline.
+#
+# @model: the baselined CpuModelInfo.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelBaselineInfo',
+  'data': { 'model': 'CpuModelInfo' },
+  'if': 'defined(TARGET_S390X)' }
+
+##
+# @CpuModelCompareInfo:
+#
+# The result of a CPU model comparison.
+#
+# @result: The result of the compare operation.
+# @responsible-properties: List of properties that led to the comparison result
+#                          not being identical.
+#
+# @responsible-properties is a list of QOM property names that led to
+# both CPUs not being detected as identical. For identical models, this
+# list is empty.
+# If a QOM property is read-only, that means there's no known way to make the
+# CPU models identical. If the special property name "type" is included, the
+# models are by definition not identical and cannot be made identical.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelCompareInfo',
+  'data': { 'result': 'CpuModelCompareResult',
+            'responsible-properties': ['str'] },
+  'if': 'defined(TARGET_S390X)' }
+
+##
+# @query-cpu-model-comparison:
+#
+# Compares two CPU models, returning how they compare in a specific
+# configuration. The results indicates how both models compare regarding
+# runnability. This result can be used by tooling to make decisions if a
+# certain CPU model will run in a certain configuration or if a compatible
+# CPU model has to be created by baselining.
+#
+# Usually, a CPU model is compared against the maximum possible CPU model
+# of a certain configuration (e.g. the "host" model for KVM). If that CPU
+# model is identical or a subset, it will run in that configuration.
+#
+# The result returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support comparing CPU models. s390x supports
+# comparing CPU models.
+#
+# Returns: a CpuModelBaselineInfo. Returns an error if comparing CPU models is
+#          not supported, if a model cannot be used, if a model contains
+#          an unknown cpu definition name, unknown properties or properties
+#          with wrong types.
+#
+# Note: this command isn't specific to s390x, but is only implemented
+#       on this architecture currently.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-comparison',
+  'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' },
+  'returns': 'CpuModelCompareInfo',
+  'if': 'defined(TARGET_S390X)' }
+
+##
+# @query-cpu-model-baseline:
+#
+# Baseline two CPU models, creating a compatible third model. The created
+# model will always be a static, migration-safe CPU model (see "static"
+# CPU model expansion for details).
+#
+# This interface can be used by tooling to create a compatible CPU model out
+# two CPU models. The created CPU model will be identical to or a subset of
+# both CPU models when comparing them. Therefore, the created CPU model is
+# guaranteed to run where the given CPU models run.
+#
+# The result returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support baselining CPU models. s390x supports
+# baselining CPU models.
+#
+# Returns: a CpuModelBaselineInfo. Returns an error if baselining CPU models is
+#          not supported, if a model cannot be used, if a model contains
+#          an unknown cpu definition name, unknown properties or properties
+#          with wrong types.
+#
+# Note: this command isn't specific to s390x, but is only implemented
+#       on this architecture currently.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-baseline',
+  'data': { 'modela': 'CpuModelInfo',
+            'modelb': 'CpuModelInfo' },
+  'returns': 'CpuModelBaselineInfo',
+  'if': 'defined(TARGET_S390X)' }
+
+##
+# @CpuModelExpansionInfo:
+#
+# The result of a cpu model expansion.
+#
+# @model: the expanded CpuModelInfo.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelExpansionInfo',
+  'data': { 'model': 'CpuModelInfo' },
+  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+
+##
+# @query-cpu-model-expansion:
+#
+# Expands a given CPU model (or a combination of CPU model + additional options)
+# to different granularities, allowing tooling to get an understanding what a
+# specific CPU model looks like in QEMU under a certain configuration.
+#
+# This interface can be used to query the "host" CPU model.
+#
+# The data returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model  may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support all expansion types. s390x supports
+# "full" and "static". Arm only supports "full".
+#
+# Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is
+#          not supported, if the model cannot be expanded, if the model contains
+#          an unknown CPU definition name, unknown properties or properties
+#          with a wrong type. Also returns an error if an expansion type is
+#          not supported.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-expansion',
+  'data': { 'type': 'CpuModelExpansionType',
+            'model': 'CpuModelInfo' },
+  'returns': 'CpuModelExpansionInfo',
+  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+
+##
+# @CpuDefinitionInfo:
+#
+# Virtual CPU definition.
+#
+# @name: the name of the CPU definition
+#
+# @migration-safe: whether a CPU definition can be safely used for
+#                  migration in combination with a QEMU compatibility machine
+#                  when migrating between different QEMU versions and between
+#                  hosts with different sets of (hardware or software)
+#                  capabilities. If not provided, information is not available
+#                  and callers should not assume the CPU definition to be
+#                  migration-safe. (since 2.8)
+#
+# @static: whether a CPU definition is static and will not change depending on
+#          QEMU version, machine type, machine options and accelerator options.
+#          A static model is always migration-safe. (since 2.8)
+#
+# @unavailable-features: List of properties that prevent
+#                        the CPU model from running in the current
+#                        host. (since 2.8)
+# @typename: Type name that can be used as argument to @device-list-properties,
+#            to introspect properties configurable using -cpu or -global.
+#            (since 2.9)
+#
+# @alias-of: Name of CPU model this model is an alias for.  The target of the
+#            CPU model alias may change depending on the machine type.
+#            Management software is supposed to translate CPU model aliases
+#            in the VM configuration, because aliases may stop being
+#            migration-safe in the future (since 4.1)
+#
+# @deprecated: If true, this CPU model is deprecated and may be removed in
+#              in some future version of QEMU according to the QEMU deprecation
+#              policy. (since 5.2)
+#
+# @unavailable-features is a list of QOM property names that
+# represent CPU model attributes that prevent the CPU from running.
+# If the QOM property is read-only, that means there's no known
+# way to make the CPU model run in the current host. Implementations
+# that choose not to provide specific information return the
+# property name "type".
+# If the property is read-write, it means that it MAY be possible
+# to run the CPU model in the current host if that property is
+# changed. Management software can use it as hints to suggest or
+# choose an alternative for the user, or just to generate meaningful
+# error messages explaining why the CPU model can't be used.
+# If @unavailable-features is an empty list, the CPU model is
+# runnable using the current host and machine-type.
+# If @unavailable-features is not present, runnability
+# information for the CPU is not available.
+#
+# Since: 1.2.0
+##
+{ 'struct': 'CpuDefinitionInfo',
+  'data': { 'name': 'str',
+            '*migration-safe': 'bool',
+            'static': 'bool',
+            '*unavailable-features': [ 'str' ],
+            'typename': 'str',
+            '*alias-of' : 'str',
+            'deprecated' : 'bool' },
+  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
+
+##
+# @query-cpu-definitions:
+#
+# Return a list of supported virtual CPU definitions
+#
+# Returns: a list of CpuDefInfo
+#
+# Since: 1.2.0
+##
+{ 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'],
+  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
diff --git a/qapi/machine.json b/qapi/machine.json
new file mode 100644
index 000000000..7c9a26377
--- /dev/null
+++ b/qapi/machine.json
@@ -0,0 +1,1473 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Machines
+##
+
+##
+# @SysEmuTarget:
+#
+# The comprehensive enumeration of QEMU system emulation ("softmmu")
+# targets. Run "./configure --help" in the project root directory, and
+# look for the \*-softmmu targets near the "--target-list" option. The
+# individual target constants are not documented here, for the time
+# being.
+#
+# @rx: since 5.0
+# @avr: since 5.1
+#
+# Notes: The resulting QMP strings can be appended to the "qemu-system-"
+#        prefix to produce the corresponding QEMU executable name. This
+#        is true even for "qemu-system-x86_64".
+#
+# Since: 3.0
+##
+{ 'enum' : 'SysEmuTarget',
+  'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'cris', 'hppa', 'i386', 'lm32',
+             'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64',
+             'mips64el', 'mipsel', 'moxie', 'nios2', 'or1k', 'ppc',
+             'ppc64', 'riscv32', 'riscv64', 'rx', 's390x', 'sh4',
+             'sh4eb', 'sparc', 'sparc64', 'tricore', 'unicore32',
+             'x86_64', 'xtensa', 'xtensaeb' ] }
+
+##
+# @CpuInfoArch:
+#
+# An enumeration of cpu types that enable additional information during
+# @query-cpus and @query-cpus-fast.
+#
+# @s390: since 2.12
+#
+# @riscv: since 2.12
+#
+# Since: 2.6
+##
+{ 'enum': 'CpuInfoArch',
+  'data': ['x86', 'sparc', 'ppc', 'mips', 'tricore', 's390', 'riscv', 'other' ] }
+
+##
+# @CpuInfo:
+#
+# Information about a virtual CPU
+#
+# @CPU: the index of the virtual CPU
+#
+# @current: this only exists for backwards compatibility and should be ignored
+#
+# @halted: true if the virtual CPU is in the halt state.  Halt usually refers
+#          to a processor specific low power mode.
+#
+# @qom_path: path to the CPU object in the QOM tree (since 2.4)
+#
+# @thread_id: ID of the underlying host thread
+#
+# @props: properties describing to which node/socket/core/thread
+#         virtual CPU belongs to, provided if supported by board (since 2.10)
+#
+# @arch: architecture of the cpu, which determines which additional fields
+#        will be listed (since 2.6)
+#
+# Since: 0.14.0
+#
+# Notes: @halted is a transient state that changes frequently.  By the time the
+#        data is sent to the client, the guest may no longer be halted.
+##
+{ 'union': 'CpuInfo',
+  'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool',
+           'qom_path': 'str', 'thread_id': 'int',
+           '*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' },
+  'discriminator': 'arch',
+  'data': { 'x86': 'CpuInfoX86',
+            'sparc': 'CpuInfoSPARC',
+            'ppc': 'CpuInfoPPC',
+            'mips': 'CpuInfoMIPS',
+            'tricore': 'CpuInfoTricore',
+            's390': 'CpuInfoS390',
+            'riscv': 'CpuInfoRISCV' } }
+
+##
+# @CpuInfoX86:
+#
+# Additional information about a virtual i386 or x86_64 CPU
+#
+# @pc: the 64-bit instruction pointer
+#
+# Since: 2.6
+##
+{ 'struct': 'CpuInfoX86', 'data': { 'pc': 'int' } }
+
+##
+# @CpuInfoSPARC:
+#
+# Additional information about a virtual SPARC CPU
+#
+# @pc: the PC component of the instruction pointer
+#
+# @npc: the NPC component of the instruction pointer
+#
+# Since: 2.6
+##
+{ 'struct': 'CpuInfoSPARC', 'data': { 'pc': 'int', 'npc': 'int' } }
+
+##
+# @CpuInfoPPC:
+#
+# Additional information about a virtual PPC CPU
+#
+# @nip: the instruction pointer
+#
+# Since: 2.6
+##
+{ 'struct': 'CpuInfoPPC', 'data': { 'nip': 'int' } }
+
+##
+# @CpuInfoMIPS:
+#
+# Additional information about a virtual MIPS CPU
+#
+# @PC: the instruction pointer
+#
+# Since: 2.6
+##
+{ 'struct': 'CpuInfoMIPS', 'data': { 'PC': 'int' } }
+
+##
+# @CpuInfoTricore:
+#
+# Additional information about a virtual Tricore CPU
+#
+# @PC: the instruction pointer
+#
+# Since: 2.6
+##
+{ 'struct': 'CpuInfoTricore', 'data': { 'PC': 'int' } }
+
+##
+# @CpuInfoRISCV:
+#
+# Additional information about a virtual RISCV CPU
+#
+# @pc: the instruction pointer
+#
+# Since 2.12
+##
+{ 'struct': 'CpuInfoRISCV', 'data': { 'pc': 'int' } }
+
+##
+# @CpuS390State:
+#
+# An enumeration of cpu states that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 2.12
+##
+{ 'enum': 'CpuS390State',
+  'prefix': 'S390_CPU_STATE',
+  'data': [ 'uninitialized', 'stopped', 'check-stop', 'operating', 'load' ] }
+
+##
+# @CpuInfoS390:
+#
+# Additional information about a virtual S390 CPU
+#
+# @cpu-state: the virtual CPU's state
+#
+# Since: 2.12
+##
+{ 'struct': 'CpuInfoS390', 'data': { 'cpu-state': 'CpuS390State' } }
+
+##
+# @query-cpus:
+#
+# Returns a list of information about each virtual CPU.
+#
+# This command causes vCPU threads to exit to userspace, which causes
+# a small interruption to guest CPU execution. This will have a negative
+# impact on realtime guests and other latency sensitive guest workloads.
+#
+# Features:
+# @deprecated: This command is deprecated, because it interferes with
+#              the guest.  Use 'query-cpus-fast' instead to avoid the vCPU
+#              interruption.
+#
+# Returns: a list of @CpuInfo for each virtual CPU
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-cpus" }
+# <- { "return": [
+#          {
+#             "CPU":0,
+#             "current":true,
+#             "halted":false,
+#             "qom_path":"/machine/unattached/device[0]",
+#             "arch":"x86",
+#             "pc":3227107138,
+#             "thread_id":3134
+#          },
+#          {
+#             "CPU":1,
+#             "current":false,
+#             "halted":true,
+#             "qom_path":"/machine/unattached/device[2]",
+#             "arch":"x86",
+#             "pc":7108165,
+#             "thread_id":3135
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-cpus', 'returns': ['CpuInfo'],
+  'features': [ 'deprecated' ] }
+
+##
+# @CpuInfoFast:
+#
+# Information about a virtual CPU
+#
+# @cpu-index: index of the virtual CPU
+#
+# @qom-path: path to the CPU object in the QOM tree
+#
+# @thread-id: ID of the underlying host thread
+#
+# @props: properties describing to which node/socket/core/thread
+#         virtual CPU belongs to, provided if supported by board
+#
+# @arch: base architecture of the cpu
+#
+# @target: the QEMU system emulation target, which determines which
+#          additional fields will be listed (since 3.0)
+#
+# Features:
+# @deprecated: Member @arch is deprecated.  Use @target instead.
+#
+# Since: 2.12
+#
+##
+{ 'union'         : 'CpuInfoFast',
+  'base'          : { 'cpu-index'    : 'int',
+                      'qom-path'     : 'str',
+                      'thread-id'    : 'int',
+                      '*props'       : 'CpuInstanceProperties',
+                      'arch'         : { 'type': 'CpuInfoArch',
+                                         'features': [ 'deprecated' ] },
+                      'target'       : 'SysEmuTarget' },
+  'discriminator' : 'target',
+  'data'          : { 's390x'        : 'CpuInfoS390' } }
+
+##
+# @query-cpus-fast:
+#
+# Returns information about all virtual CPUs. This command does not
+# incur a performance penalty and should be used in production
+# instead of query-cpus.
+#
+# Returns: list of @CpuInfoFast
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "query-cpus-fast" }
+# <- { "return": [
+#         {
+#             "thread-id": 25627,
+#             "props": {
+#                 "core-id": 0,
+#                 "thread-id": 0,
+#                 "socket-id": 0
+#             },
+#             "qom-path": "/machine/unattached/device[0]",
+#             "arch":"x86",
+#             "target":"x86_64",
+#             "cpu-index": 0
+#         },
+#         {
+#             "thread-id": 25628,
+#             "props": {
+#                 "core-id": 0,
+#                 "thread-id": 0,
+#                 "socket-id": 1
+#             },
+#             "qom-path": "/machine/unattached/device[2]",
+#             "arch":"x86",
+#             "target":"x86_64",
+#             "cpu-index": 1
+#         }
+#     ]
+# }
+##
+{ 'command': 'query-cpus-fast', 'returns': [ 'CpuInfoFast' ] }
+
+##
+# @MachineInfo:
+#
+# Information describing a machine.
+#
+# @name: the name of the machine
+#
+# @alias: an alias for the machine name
+#
+# @is-default: whether the machine is default
+#
+# @cpu-max: maximum number of CPUs supported by the machine type
+#           (since 1.5.0)
+#
+# @hotpluggable-cpus: cpu hotplug via -device is supported (since 2.7.0)
+#
+# @numa-mem-supported: true if '-numa node,mem' option is supported by
+#                      the machine type and false otherwise (since 4.1)
+#
+# @deprecated: if true, the machine type is deprecated and may be removed
+#              in future versions of QEMU according to the QEMU deprecation
+#              policy (since 4.1.0)
+#
+# @default-cpu-type: default CPU model typename if none is requested via
+#                    the -cpu argument. (since 4.2)
+#
+# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
+#
+# Since: 1.2.0
+##
+{ 'struct': 'MachineInfo',
+  'data': { 'name': 'str', '*alias': 'str',
+            '*is-default': 'bool', 'cpu-max': 'int',
+            'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
+            'deprecated': 'bool', '*default-cpu-type': 'str',
+            '*default-ram-id': 'str' } }
+
+##
+# @query-machines:
+#
+# Return a list of supported machines
+#
+# Returns: a list of MachineInfo
+#
+# Since: 1.2.0
+##
+{ 'command': 'query-machines', 'returns': ['MachineInfo'] }
+
+##
+# @CurrentMachineParams:
+#
+# Information describing the running machine parameters.
+#
+# @wakeup-suspend-support: true if the machine supports wake up from
+#                          suspend
+#
+# Since: 4.0
+##
+{ 'struct': 'CurrentMachineParams',
+  'data': { 'wakeup-suspend-support': 'bool'} }
+
+##
+# @query-current-machine:
+#
+# Return information on the current virtual machine.
+#
+# Returns: CurrentMachineParams
+#
+# Since: 4.0
+##
+{ 'command': 'query-current-machine', 'returns': 'CurrentMachineParams' }
+
+##
+# @TargetInfo:
+#
+# Information describing the QEMU target.
+#
+# @arch: the target architecture
+#
+# Since: 1.2.0
+##
+{ 'struct': 'TargetInfo',
+  'data': { 'arch': 'SysEmuTarget' } }
+
+##
+# @query-target:
+#
+# Return information about the target for this QEMU
+#
+# Returns: TargetInfo
+#
+# Since: 1.2.0
+##
+{ 'command': 'query-target', 'returns': 'TargetInfo' }
+
+##
+# @UuidInfo:
+#
+# Guest UUID information (Universally Unique Identifier).
+#
+# @UUID: the UUID of the guest
+#
+# Since: 0.14.0
+#
+# Notes: If no UUID was specified for the guest, a null UUID is returned.
+##
+{ 'struct': 'UuidInfo', 'data': {'UUID': 'str'} }
+
+##
+# @query-uuid:
+#
+# Query the guest UUID information.
+#
+# Returns: The @UuidInfo for the guest
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-uuid" }
+# <- { "return": { "UUID": "550e8400-e29b-41d4-a716-446655440000" } }
+#
+##
+{ 'command': 'query-uuid', 'returns': 'UuidInfo', 'allow-preconfig': true }
+
+##
+# @GuidInfo:
+#
+# GUID information.
+#
+# @guid: the globally unique identifier
+#
+# Since: 2.9
+##
+{ 'struct': 'GuidInfo', 'data': {'guid': 'str'} }
+
+##
+# @query-vm-generation-id:
+#
+# Show Virtual Machine Generation ID
+#
+# Since: 2.9
+##
+{ 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
+
+##
+# @system_reset:
+#
+# Performs a hard reset of a guest.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "system_reset" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'system_reset' }
+
+##
+# @system_powerdown:
+#
+# Requests that a guest perform a powerdown operation.
+#
+# Since: 0.14.0
+#
+# Notes: A guest may or may not respond to this command.  This command
+#        returning does not indicate that a guest has accepted the request or
+#        that it has shut down.  Many guests will respond to this command by
+#        prompting the user in some way.
+# Example:
+#
+# -> { "execute": "system_powerdown" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'system_powerdown' }
+
+##
+# @system_wakeup:
+#
+# Wake up guest from suspend. If the guest has wake-up from suspend
+# support enabled (wakeup-suspend-support flag from
+# query-current-machine), wake-up guest from suspend if the guest is
+# in SUSPENDED state. Return an error otherwise.
+#
+# Since:  1.1
+#
+# Returns:  nothing.
+#
+# Note: prior to 4.0, this command does nothing in case the guest
+#       isn't suspended.
+#
+# Example:
+#
+# -> { "execute": "system_wakeup" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'system_wakeup' }
+
+##
+# @LostTickPolicy:
+#
+# Policy for handling lost ticks in timer devices.  Ticks end up getting
+# lost when, for example, the guest is paused.
+#
+# @discard: throw away the missed ticks and continue with future injection
+#           normally.  The guest OS will see the timer jump ahead by a
+#           potentially quite significant amount all at once, as if the
+#           intervening chunk of time had simply not existed; needless to
+#           say, such a sudden jump can easily confuse a guest OS which is
+#           not specifically prepared to deal with it.  Assuming the guest
+#           OS can deal correctly with the time jump, the time in the guest
+#           and in the host should now match.
+#
+# @delay: continue to deliver ticks at the normal rate.  The guest OS will
+#         not notice anything is amiss, as from its point of view time will
+#         have continued to flow normally.  The time in the guest should now
+#         be behind the time in the host by exactly the amount of time during
+#         which ticks have been missed.
+#
+# @slew: deliver ticks at a higher rate to catch up with the missed ticks.
+#        The guest OS will not notice anything is amiss, as from its point
+#        of view time will have continued to flow normally.  Once the timer
+#        has managed to catch up with all the missing ticks, the time in
+#        the guest and in the host should match.
+#
+# Since: 2.0
+##
+{ 'enum': 'LostTickPolicy',
+  'data': ['discard', 'delay', 'slew' ] }
+
+##
+# @inject-nmi:
+#
+# Injects a Non-Maskable Interrupt into the default CPU (x86/s390) or all CPUs (ppc64).
+# The command fails when the guest doesn't support injecting.
+#
+# Returns:  If successful, nothing
+#
+# Since:  0.14.0
+#
+# Note: prior to 2.1, this command was only supported for x86 and s390 VMs
+#
+# Example:
+#
+# -> { "execute": "inject-nmi" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'inject-nmi' }
+
+##
+# @KvmInfo:
+#
+# Information about support for KVM acceleration
+#
+# @enabled: true if KVM acceleration is active
+#
+# @present: true if KVM acceleration is built into this executable
+#
+# Since: 0.14.0
+##
+{ 'struct': 'KvmInfo', 'data': {'enabled': 'bool', 'present': 'bool'} }
+
+##
+# @query-kvm:
+#
+# Returns information about KVM acceleration
+#
+# Returns: @KvmInfo
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-kvm" }
+# <- { "return": { "enabled": true, "present": true } }
+#
+##
+{ 'command': 'query-kvm', 'returns': 'KvmInfo' }
+
+##
+# @NumaOptionsType:
+#
+# @node: NUMA nodes configuration
+#
+# @dist: NUMA distance configuration (since 2.10)
+#
+# @cpu: property based CPU(s) to node mapping (Since: 2.10)
+#
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
+#
+# @hmat-cache: memory side cache information (Since: 5.0)
+#
+# Since: 2.1
+##
+{ 'enum': 'NumaOptionsType',
+  'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
+
+##
+# @NumaOptions:
+#
+# A discriminated record of NUMA options. (for OptsVisitor)
+#
+# Since: 2.1
+##
+{ 'union': 'NumaOptions',
+  'base': { 'type': 'NumaOptionsType' },
+  'discriminator': 'type',
+  'data': {
+    'node': 'NumaNodeOptions',
+    'dist': 'NumaDistOptions',
+    'cpu': 'NumaCpuOptions',
+    'hmat-lb': 'NumaHmatLBOptions',
+    'hmat-cache': 'NumaHmatCacheOptions' }}
+
+##
+# @NumaNodeOptions:
+#
+# Create a guest NUMA node. (for OptsVisitor)
+#
+# @nodeid: NUMA node ID (increase by 1 from 0 if omitted)
+#
+# @cpus: VCPUs belonging to this node (assign VCPUS round-robin
+#         if omitted)
+#
+# @mem: memory size of this node; mutually exclusive with @memdev.
+#       Equally divide total memory among nodes if both @mem and @memdev are
+#       omitted.
+#
+# @memdev: memory backend object.  If specified for one node,
+#          it must be specified for all nodes.
+#
+# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145,
+#             points to the nodeid which has the memory controller
+#             responsible for this NUMA node. This field provides
+#             additional information as to the initiator node that
+#             is closest (as in directly attached) to this node, and
+#             therefore has the best performance (since 5.0)
+#
+# Since: 2.1
+##
+{ 'struct': 'NumaNodeOptions',
+  'data': {
+   '*nodeid': 'uint16',
+   '*cpus':   ['uint16'],
+   '*mem':    'size',
+   '*memdev': 'str',
+   '*initiator': 'uint16' }}
+
+##
+# @NumaDistOptions:
+#
+# Set the distance between 2 NUMA nodes.
+#
+# @src: source NUMA node.
+#
+# @dst: destination NUMA node.
+#
+# @val: NUMA distance from source node to destination node.
+#       When a node is unreachable from another node, set the distance
+#       between them to 255.
+#
+# Since: 2.10
+##
+{ 'struct': 'NumaDistOptions',
+  'data': {
+   'src': 'uint16',
+   'dst': 'uint16',
+   'val': 'uint8' }}
+
+##
+# @X86CPURegister32:
+#
+# A X86 32-bit register
+#
+# Since: 1.5
+##
+{ 'enum': 'X86CPURegister32',
+  'data': [ 'EAX', 'EBX', 'ECX', 'EDX', 'ESP', 'EBP', 'ESI', 'EDI' ] }
+
+##
+# @X86CPUFeatureWordInfo:
+#
+# Information about a X86 CPU feature word
+#
+# @cpuid-input-eax: Input EAX value for CPUID instruction for that feature word
+#
+# @cpuid-input-ecx: Input ECX value for CPUID instruction for that
+#                   feature word
+#
+# @cpuid-register: Output register containing the feature bits
+#
+# @features: value of output register, containing the feature bits
+#
+# Since: 1.5
+##
+{ 'struct': 'X86CPUFeatureWordInfo',
+  'data': { 'cpuid-input-eax': 'int',
+            '*cpuid-input-ecx': 'int',
+            'cpuid-register': 'X86CPURegister32',
+            'features': 'int' } }
+
+##
+# @DummyForceArrays:
+#
+# Not used by QMP; hack to let us use X86CPUFeatureWordInfoList internally
+#
+# Since: 2.5
+##
+{ 'struct': 'DummyForceArrays',
+  'data': { 'unused': ['X86CPUFeatureWordInfo'] } }
+
+##
+# @NumaCpuOptions:
+#
+# Option "-numa cpu" overrides default cpu to node mapping.
+# It accepts the same set of cpu properties as returned by
+# query-hotpluggable-cpus[].props, where node-id could be used to
+# override default node mapping.
+#
+# Since: 2.10
+##
+{ 'struct': 'NumaCpuOptions',
+   'base': 'CpuInstanceProperties',
+   'data' : {} }
+
+##
+# @HmatLBMemoryHierarchy:
+#
+# The memory hierarchy in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBMemoryHierarchy, see chapter
+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
+#
+# @memory: the structure represents the memory performance
+#
+# @first-level: first level of memory side cache
+#
+# @second-level: second level of memory side cache
+#
+# @third-level: third level of memory side cache
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBMemoryHierarchy',
+  'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
+
+##
+# @HmatLBDataType:
+#
+# Data type in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBDataType, see chapter
+# 5.2.27.4: Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
+#
+# @access-latency: access latency (nanoseconds)
+#
+# @read-latency: read latency (nanoseconds)
+#
+# @write-latency: write latency (nanoseconds)
+#
+# @access-bandwidth: access bandwidth (Bytes per second)
+#
+# @read-bandwidth: read bandwidth (Bytes per second)
+#
+# @write-bandwidth: write bandwidth (Bytes per second)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBDataType',
+  'data': [ 'access-latency', 'read-latency', 'write-latency',
+            'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
+
+##
+# @NumaHmatLBOptions:
+#
+# Set the system locality latency and bandwidth information
+# between Initiator and Target proximity Domains.
+#
+# For more information about @NumaHmatLBOptions, see chapter
+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
+#
+# @initiator: the Initiator Proximity Domain.
+#
+# @target: the Target Proximity Domain.
+#
+# @hierarchy: the Memory Hierarchy. Indicates the performance
+#             of memory or side cache.
+#
+# @data-type: presents the type of data, access/read/write
+#             latency or hit latency.
+#
+# @latency: the value of latency from @initiator to @target
+#           proximity domain, the latency unit is "ns(nanosecond)".
+#
+# @bandwidth: the value of bandwidth between @initiator and @target
+#             proximity domain, the bandwidth unit is
+#             "Bytes per second".
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatLBOptions',
+    'data': {
+    'initiator': 'uint16',
+    'target': 'uint16',
+    'hierarchy': 'HmatLBMemoryHierarchy',
+    'data-type': 'HmatLBDataType',
+    '*latency': 'uint64',
+    '*bandwidth': 'size' }}
+
+##
+# @HmatCacheAssociativity:
+#
+# Cache associativity in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheAssociativity, see chapter
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+#              or cache associativity unknown)
+#
+# @direct: Direct Mapped
+#
+# @complex: Complex Cache Indexing (implementation specific)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheAssociativity',
+  'data': [ 'none', 'direct', 'complex' ] }
+
+##
+# @HmatCacheWritePolicy:
+#
+# Cache write policy in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheWritePolicy, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+#        or cache write policy unknown)
+#
+# @write-back: Write Back (WB)
+#
+# @write-through: Write Through (WT)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheWritePolicy',
+  'data': [ 'none', 'write-back', 'write-through' ] }
+
+##
+# @NumaHmatCacheOptions:
+#
+# Set the memory side cache information for a given memory domain.
+#
+# For more information of @NumaHmatCacheOptions, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @node-id: the memory proximity domain to which the memory belongs.
+#
+# @size: the size of memory side cache in bytes.
+#
+# @level: the cache level described in this structure.
+#
+# @associativity: the cache associativity,
+#                 none/direct-mapped/complex(complex cache indexing).
+#
+# @policy: the write policy, none/write-back/write-through.
+#
+# @line: the cache Line size in bytes.
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatCacheOptions',
+  'data': {
+   'node-id': 'uint32',
+   'size': 'size',
+   'level': 'uint8',
+   'associativity': 'HmatCacheAssociativity',
+   'policy': 'HmatCacheWritePolicy',
+   'line': 'uint16' }}
+
+##
+# @HostMemPolicy:
+#
+# Host memory policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred host nodes for allocation
+#
+# @bind: a strict policy that restricts memory allocation to the
+#        host nodes specified
+#
+# @interleave: memory allocations are interleaved across the set
+#              of host nodes specified
+#
+# Since: 2.1
+##
+{ 'enum': 'HostMemPolicy',
+  'data': [ 'default', 'preferred', 'bind', 'interleave' ] }
+
+##
+# @memsave:
+#
+# Save a portion of guest memory to a file.
+#
+# @val: the virtual address of the guest to start from
+#
+# @size: the size of memory region to save
+#
+# @filename: the file to save the memory to as binary data
+#
+# @cpu-index: the index of the virtual CPU to use for translating the
+#             virtual address (defaults to CPU 0)
+#
+# Returns: Nothing on success
+#
+# Since: 0.14.0
+#
+# Notes: Errors were not reliably returned until 1.1
+#
+# Example:
+#
+# -> { "execute": "memsave",
+#      "arguments": { "val": 10,
+#                     "size": 100,
+#                     "filename": "/tmp/virtual-mem-dump" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'memsave',
+  'data': {'val': 'int', 'size': 'int', 'filename': 'str', '*cpu-index': 'int'} }
+
+##
+# @pmemsave:
+#
+# Save a portion of guest physical memory to a file.
+#
+# @val: the physical address of the guest to start from
+#
+# @size: the size of memory region to save
+#
+# @filename: the file to save the memory to as binary data
+#
+# Returns: Nothing on success
+#
+# Since: 0.14.0
+#
+# Notes: Errors were not reliably returned until 1.1
+#
+# Example:
+#
+# -> { "execute": "pmemsave",
+#      "arguments": { "val": 10,
+#                     "size": 100,
+#                     "filename": "/tmp/physical-mem-dump" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'pmemsave',
+  'data': {'val': 'int', 'size': 'int', 'filename': 'str'} }
+
+##
+# @Memdev:
+#
+# Information about memory backend
+#
+# @id: backend's ID if backend has 'id' property (since 2.9)
+#
+# @size: memory backend size
+#
+# @merge: enables or disables memory merge support
+#
+# @dump: includes memory backend's memory in a core dump or not
+#
+# @prealloc: enables or disables memory preallocation
+#
+# @host-nodes: host nodes for its memory policy
+#
+# @policy: memory policy of memory backend
+#
+# Since: 2.1
+##
+{ 'struct': 'Memdev',
+  'data': {
+    '*id':        'str',
+    'size':       'size',
+    'merge':      'bool',
+    'dump':       'bool',
+    'prealloc':   'bool',
+    'host-nodes': ['uint16'],
+    'policy':     'HostMemPolicy' }}
+
+##
+# @query-memdev:
+#
+# Returns information for all memory backends.
+#
+# Returns: a list of @Memdev.
+#
+# Since: 2.1
+#
+# Example:
+#
+# -> { "execute": "query-memdev" }
+# <- { "return": [
+#        {
+#          "id": "mem1",
+#          "size": 536870912,
+#          "merge": false,
+#          "dump": true,
+#          "prealloc": false,
+#          "host-nodes": [0, 1],
+#          "policy": "bind"
+#        },
+#        {
+#          "size": 536870912,
+#          "merge": false,
+#          "dump": true,
+#          "prealloc": true,
+#          "host-nodes": [2, 3],
+#          "policy": "preferred"
+#        }
+#      ]
+#    }
+#
+##
+{ 'command': 'query-memdev', 'returns': ['Memdev'], 'allow-preconfig': true }
+
+##
+# @CpuInstanceProperties:
+#
+# List of properties to be used for hotplugging a CPU instance,
+# it should be passed by management with device_add command when
+# a CPU is being hotplugged.
+#
+# @node-id: NUMA node ID the CPU belongs to
+# @socket-id: socket number within node/board the CPU belongs to
+# @die-id: die number within node/board the CPU belongs to (Since 4.1)
+# @core-id: core number within die the CPU belongs to
+# @thread-id: thread number within core the CPU belongs to
+#
+# Note: currently there are 5 properties that could be present
+#       but management should be prepared to pass through other
+#       properties with device_add command to allow for future
+#       interface extension. This also requires the filed names to be kept in
+#       sync with the properties passed to -device/device_add.
+#
+# Since: 2.7
+##
+{ 'struct': 'CpuInstanceProperties',
+  'data': { '*node-id': 'int',
+            '*socket-id': 'int',
+            '*die-id': 'int',
+            '*core-id': 'int',
+            '*thread-id': 'int'
+  }
+}
+
+##
+# @HotpluggableCPU:
+#
+# @type: CPU object type for usage with device_add command
+# @props: list of properties to be used for hotplugging CPU
+# @vcpus-count: number of logical VCPU threads @HotpluggableCPU provides
+# @qom-path: link to existing CPU object if CPU is present or
+#            omitted if CPU is not present.
+#
+# Since: 2.7
+##
+{ 'struct': 'HotpluggableCPU',
+  'data': { 'type': 'str',
+            'vcpus-count': 'int',
+            'props': 'CpuInstanceProperties',
+            '*qom-path': 'str'
+          }
+}
+
+##
+# @query-hotpluggable-cpus:
+#
+# TODO: Better documentation; currently there is none.
+#
+# Returns: a list of HotpluggableCPU objects.
+#
+# Since: 2.7
+#
+# Example:
+#
+# For pseries machine type started with -smp 2,cores=2,maxcpus=4 -cpu POWER8:
+#
+# -> { "execute": "query-hotpluggable-cpus" }
+# <- {"return": [
+#      { "props": { "core": 8 }, "type": "POWER8-spapr-cpu-core",
+#        "vcpus-count": 1 },
+#      { "props": { "core": 0 }, "type": "POWER8-spapr-cpu-core",
+#        "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
+#    ]}'
+#
+# For pc machine type started with -smp 1,maxcpus=2:
+#
+# -> { "execute": "query-hotpluggable-cpus" }
+# <- {"return": [
+#      {
+#         "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
+#         "props": {"core-id": 0, "socket-id": 1, "thread-id": 0}
+#      },
+#      {
+#         "qom-path": "/machine/unattached/device[0]",
+#         "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
+#         "props": {"core-id": 0, "socket-id": 0, "thread-id": 0}
+#      }
+#    ]}
+#
+# For s390x-virtio-ccw machine type started with -smp 1,maxcpus=2 -cpu qemu
+# (Since: 2.11):
+#
+# -> { "execute": "query-hotpluggable-cpus" }
+# <- {"return": [
+#      {
+#         "type": "qemu-s390x-cpu", "vcpus-count": 1,
+#         "props": { "core-id": 1 }
+#      },
+#      {
+#         "qom-path": "/machine/unattached/device[0]",
+#         "type": "qemu-s390x-cpu", "vcpus-count": 1,
+#         "props": { "core-id": 0 }
+#      }
+#    ]}
+#
+##
+{ 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'],
+             'allow-preconfig': true }
+
+##
+# @set-numa-node:
+#
+# Runtime equivalent of '-numa' CLI option, available at
+# preconfigure stage to configure numa mapping before initializing
+# machine.
+#
+# Since 3.0
+##
+{ 'command': 'set-numa-node', 'boxed': true,
+  'data': 'NumaOptions',
+  'allow-preconfig': true
+}
+
+##
+# @balloon:
+#
+# Request the balloon driver to change its balloon size.
+#
+# @value: the target logical size of the VM in bytes.
+#         We can deduce the size of the balloon using this formula:
+#
+#            logical_vm_size = vm_ram_size - balloon_size
+#
+#         From it we have: balloon_size = vm_ram_size - @value
+#
+# Returns: - Nothing on success
+#          - If the balloon driver is enabled but not functional because the KVM
+#            kernel module cannot support it, KvmMissingCap
+#          - If no balloon device is present, DeviceNotActive
+#
+# Notes: This command just issues a request to the guest.  When it returns,
+#        the balloon size may not have changed.  A guest can change the balloon
+#        size independent of this command.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "balloon", "arguments": { "value": 536870912 } }
+# <- { "return": {} }
+#
+# With a 2.5GiB guest this command inflated the ballon to 3GiB.
+#
+##
+{ 'command': 'balloon', 'data': {'value': 'int'} }
+
+##
+# @BalloonInfo:
+#
+# Information about the guest balloon device.
+#
+# @actual: the logical size of the VM in bytes
+#          Formula used: logical_vm_size = vm_ram_size - balloon_size
+#
+# Since: 0.14.0
+#
+##
+{ 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
+
+##
+# @query-balloon:
+#
+# Return information about the balloon device.
+#
+# Returns: - @BalloonInfo on success
+#          - If the balloon driver is enabled but not functional because the KVM
+#            kernel module cannot support it, KvmMissingCap
+#          - If no balloon device is present, DeviceNotActive
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-balloon" }
+# <- { "return": {
+#          "actual": 1073741824,
+#       }
+#    }
+#
+##
+{ 'command': 'query-balloon', 'returns': 'BalloonInfo' }
+
+##
+# @BALLOON_CHANGE:
+#
+# Emitted when the guest changes the actual BALLOON level. This value is
+# equivalent to the @actual field return by the 'query-balloon' command
+#
+# @actual: the logical size of the VM in bytes
+#          Formula used: logical_vm_size = vm_ram_size - balloon_size
+#
+# Note: this event is rate-limited.
+#
+# Since: 1.2
+#
+# Example:
+#
+# <- { "event": "BALLOON_CHANGE",
+#      "data": { "actual": 944766976 },
+#      "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+#
+##
+{ 'event': 'BALLOON_CHANGE',
+  'data': { 'actual': 'int' } }
+
+##
+# @MemoryInfo:
+#
+# Actual memory information in bytes.
+#
+# @base-memory: size of "base" memory specified with command line
+#               option -m.
+#
+# @plugged-memory: size of memory that can be hot-unplugged. This field
+#                  is omitted if target doesn't support memory hotplug
+#                  (i.e. CONFIG_MEM_DEVICE not defined at build time).
+#
+# Since: 2.11.0
+##
+{ 'struct': 'MemoryInfo',
+  'data'  : { 'base-memory': 'size', '*plugged-memory': 'size' } }
+
+##
+# @query-memory-size-summary:
+#
+# Return the amount of initially allocated and present hotpluggable (if
+# enabled) memory in bytes.
+#
+# Example:
+#
+# -> { "execute": "query-memory-size-summary" }
+# <- { "return": { "base-memory": 4294967296, "plugged-memory": 0 } }
+#
+# Since: 2.11.0
+##
+{ 'command': 'query-memory-size-summary', 'returns': 'MemoryInfo' }
+
+##
+# @PCDIMMDeviceInfo:
+#
+# PCDIMMDevice state information
+#
+# @id: device's ID
+#
+# @addr: physical address, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @slot: slot number at which device is plugged in
+#
+# @node: NUMA node number where device is plugged in
+#
+# @memdev: memory backend linked with device
+#
+# @hotplugged: true if device was hotplugged
+#
+# @hotpluggable: true if device if could be added/removed while machine is running
+#
+# Since: 2.1
+##
+{ 'struct': 'PCDIMMDeviceInfo',
+  'data': { '*id': 'str',
+            'addr': 'int',
+            'size': 'int',
+            'slot': 'int',
+            'node': 'int',
+            'memdev': 'str',
+            'hotplugged': 'bool',
+            'hotpluggable': 'bool'
+          }
+}
+
+##
+# @VirtioPMEMDeviceInfo:
+#
+# VirtioPMEM state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @memdev: memory backend linked with device
+#
+# Since: 4.1
+##
+{ 'struct': 'VirtioPMEMDeviceInfo',
+  'data': { '*id': 'str',
+            'memaddr': 'size',
+            'size': 'size',
+            'memdev': 'str'
+          }
+}
+
+##
+# @VirtioMEMDeviceInfo:
+#
+# VirtioMEMDevice state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @requested-size: the user requested size of the device
+#
+# @size: the (current) size of memory that the device provides
+#
+# @max-size: the maximum size of memory that the device can provide
+#
+# @block-size: the block size of memory that the device provides
+#
+# @node: NUMA node number where device is assigned to
+#
+# @memdev: memory backend linked with the region
+#
+# Since: 5.1
+##
+{ 'struct': 'VirtioMEMDeviceInfo',
+  'data': { '*id': 'str',
+            'memaddr': 'size',
+            'requested-size': 'size',
+            'size': 'size',
+            'max-size': 'size',
+            'block-size': 'size',
+            'node': 'int',
+            'memdev': 'str'
+          }
+}
+
+##
+# @MemoryDeviceInfo:
+#
+# Union containing information about a memory device
+#
+# nvdimm is included since 2.12. virtio-pmem is included since 4.1.
+# virtio-mem is included since 5.1.
+#
+# Since: 2.1
+##
+{ 'union': 'MemoryDeviceInfo',
+  'data': { 'dimm': 'PCDIMMDeviceInfo',
+            'nvdimm': 'PCDIMMDeviceInfo',
+            'virtio-pmem': 'VirtioPMEMDeviceInfo',
+            'virtio-mem': 'VirtioMEMDeviceInfo'
+          }
+}
+
+##
+# @query-memory-devices:
+#
+# Lists available memory devices and their state
+#
+# Since: 2.1
+#
+# Example:
+#
+# -> { "execute": "query-memory-devices" }
+# <- { "return": [ { "data":
+#                       { "addr": 5368709120,
+#                         "hotpluggable": true,
+#                         "hotplugged": true,
+#                         "id": "d1",
+#                         "memdev": "/objects/memX",
+#                         "node": 0,
+#                         "size": 1073741824,
+#                         "slot": 0},
+#                    "type": "dimm"
+#                  } ] }
+#
+##
+{ 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] }
+
+##
+# @MEMORY_DEVICE_SIZE_CHANGE:
+#
+# Emitted when the size of a memory device changes. Only emitted for memory
+# devices that can actually change the size (e.g., virtio-mem due to guest
+# action).
+#
+# @id: device's ID
+# @size: the new size of memory that the device provides
+#
+# Note: this event is rate-limited.
+#
+# Since: 5.1
+#
+# Example:
+#
+# <- { "event": "MEMORY_DEVICE_SIZE_CHANGE",
+#      "data": { "id": "vm0", "size": 1073741824},
+#      "timestamp": { "seconds": 1588168529, "microseconds": 201316 } }
+#
+##
+{ 'event': 'MEMORY_DEVICE_SIZE_CHANGE',
+  'data': { '*id': 'str', 'size': 'size' } }
+
+
+##
+# @MEM_UNPLUG_ERROR:
+#
+# Emitted when memory hot unplug error occurs.
+#
+# @device: device name
+#
+# @msg: Informative message
+#
+# Since: 2.4
+#
+# Example:
+#
+# <- { "event": "MEM_UNPLUG_ERROR"
+#      "data": { "device": "dimm1",
+#                "msg": "acpi: device unplug for unsupported device"
+#      },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'MEM_UNPLUG_ERROR',
+  'data': { 'device': 'str', 'msg': 'str' } }
diff --git a/qapi/meson.build b/qapi/meson.build
new file mode 100644
index 000000000..0e98146f1
--- /dev/null
+++ b/qapi/meson.build
@@ -0,0 +1,128 @@
+util_ss.add(files(
+  'opts-visitor.c',
+  'qapi-clone-visitor.c',
+  'qapi-dealloc-visitor.c',
+  'qapi-util.c',
+  'qapi-visit-core.c',
+  'qmp-dispatch.c',
+  'qmp-event.c',
+  'qmp-registry.c',
+  'qobject-input-visitor.c',
+  'qobject-output-visitor.c',
+  'string-input-visitor.c',
+  'string-output-visitor.c',
+))
+
+qapi_all_modules = [
+  'acpi',
+  'audio',
+  'authz',
+  'block',
+  'block-core',
+  'block-export',
+  'char',
+  'common',
+  'control',
+  'crypto',
+  'dump',
+  'error',
+  'introspect',
+  'job',
+  'machine',
+  'machine-target',
+  'migration',
+  'misc',
+  'misc-target',
+  'net',
+  'pragma',
+  'qdev',
+  'pci',
+  'qom',
+  'rdma',
+  'replay',
+  'rocker',
+  'run-state',
+  'sockets',
+  'tpm',
+  'trace',
+  'transaction',
+  'ui',
+]
+
+qapi_storage_daemon_modules = [
+  'block-core',
+  'block-export',
+  'char',
+  'common',
+  'control',
+  'crypto',
+  'introspect',
+  'job',
+  'qom',
+  'sockets',
+  'pragma',
+  'transaction',
+]
+
+qapi_nonmodule_outputs = [
+  'qapi-introspect.c', 'qapi-introspect.h',
+  'qapi-types.c', 'qapi-types.h',
+  'qapi-visit.h', 'qapi-visit.c',
+  'qapi-commands.h', 'qapi-commands.c',
+  'qapi-init-commands.h', 'qapi-init-commands.c',
+  'qapi-events.h', 'qapi-events.c',
+  'qapi-emit-events.c', 'qapi-emit-events.h',
+]
+
+# First build all sources
+qapi_util_outputs = [
+  'qapi-builtin-types.c', 'qapi-builtin-visit.c',
+  'qapi-builtin-types.h', 'qapi-builtin-visit.h',
+]
+
+qapi_inputs = []
+qapi_specific_outputs = []
+foreach module : qapi_all_modules
+  qapi_inputs += [ files(module + '.json') ]
+  qapi_module_outputs = [
+    'qapi-types-@0@.c'.format(module),
+    'qapi-types-@0@.h'.format(module),
+    'qapi-visit-@0@.c'.format(module),
+    'qapi-visit-@0@.h'.format(module),
+    'qapi-events-@0@.c'.format(module),
+    'qapi-events-@0@.h'.format(module),
+    'qapi-commands-@0@.c'.format(module),
+    'qapi-commands-@0@.h'.format(module),
+  ]
+  if module.endswith('-target')
+    qapi_specific_outputs += qapi_module_outputs
+  else
+    qapi_util_outputs += qapi_module_outputs
+  endif
+endforeach
+
+qapi_files = custom_target('shared QAPI source files',
+  output: qapi_util_outputs + qapi_specific_outputs + qapi_nonmodule_outputs,
+  input: [ files('qapi-schema.json') ],
+  command: [ qapi_gen, '-o', 'qapi', '-b', '@INPUT0@' ],
+  depend_files: [ qapi_inputs, qapi_gen_depends ])
+
+# Now go through all the outputs and add them to the right sourceset.
+# These loops must be synchronized with the output of the above custom target.
+
+i = 0
+foreach output : qapi_util_outputs
+  if output.endswith('.h')
+    genh += qapi_files[i]
+  endif
+  util_ss.add(qapi_files[i])
+  i = i + 1
+endforeach
+
+foreach output : qapi_specific_outputs + qapi_nonmodule_outputs
+  if output.endswith('.h')
+    genh += qapi_files[i]
+  endif
+  specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: qapi_files[i])
+  i = i + 1
+endforeach
diff --git a/qapi/migration.json b/qapi/migration.json
new file mode 100644
index 000000000..3c7582052
--- /dev/null
+++ b/qapi/migration.json
@@ -0,0 +1,1845 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Migration
+##
+
+{ 'include': 'common.json' }
+{ 'include': 'sockets.json' }
+
+##
+# @MigrationStats:
+#
+# Detailed migration status.
+#
+# @transferred: amount of bytes already transferred to the target VM
+#
+# @remaining: amount of bytes remaining to be transferred to the target VM
+#
+# @total: total amount of bytes involved in the migration process
+#
+# @duplicate: number of duplicate (zero) pages (since 1.2)
+#
+# @skipped: number of skipped zero pages (since 1.5)
+#
+# @normal: number of normal pages (since 1.2)
+#
+# @normal-bytes: number of normal bytes sent (since 1.2)
+#
+# @dirty-pages-rate: number of pages dirtied by second by the
+#                    guest (since 1.3)
+#
+# @mbps: throughput in megabits/sec. (since 1.6)
+#
+# @dirty-sync-count: number of times that dirty ram was synchronized (since 2.1)
+#
+# @postcopy-requests: The number of page requests received from the destination
+#                     (since 2.7)
+#
+# @page-size: The number of bytes per page for the various page-based
+#             statistics (since 2.10)
+#
+# @multifd-bytes: The number of bytes sent through multifd (since 3.0)
+#
+# @pages-per-second: the number of memory pages transferred per second
+#                    (Since 4.0)
+#
+# Since: 0.14.0
+##
+{ 'struct': 'MigrationStats',
+  'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
+           'duplicate': 'int', 'skipped': 'int', 'normal': 'int',
+           'normal-bytes': 'int', 'dirty-pages-rate' : 'int',
+           'mbps' : 'number', 'dirty-sync-count' : 'int',
+           'postcopy-requests' : 'int', 'page-size' : 'int',
+           'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } }
+
+##
+# @XBZRLECacheStats:
+#
+# Detailed XBZRLE migration cache statistics
+#
+# @cache-size: XBZRLE cache size
+#
+# @bytes: amount of bytes already transferred to the target VM
+#
+# @pages: amount of pages transferred to the target VM
+#
+# @cache-miss: number of cache miss
+#
+# @cache-miss-rate: rate of cache miss (since 2.1)
+#
+# @encoding-rate: rate of encoded bytes (since 5.1)
+#
+# @overflow: number of overflows
+#
+# Since: 1.2
+##
+{ 'struct': 'XBZRLECacheStats',
+  'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int',
+           'cache-miss': 'int', 'cache-miss-rate': 'number',
+           'encoding-rate': 'number', 'overflow': 'int' } }
+
+##
+# @CompressionStats:
+#
+# Detailed migration compression statistics
+#
+# @pages: amount of pages compressed and transferred to the target VM
+#
+# @busy: count of times that no free thread was available to compress data
+#
+# @busy-rate: rate of thread busy
+#
+# @compressed-size: amount of bytes after compression
+#
+# @compression-rate: rate of compressed size
+#
+# Since: 3.1
+##
+{ 'struct': 'CompressionStats',
+  'data': {'pages': 'int', 'busy': 'int', 'busy-rate': 'number',
+           'compressed-size': 'int', 'compression-rate': 'number' } }
+
+##
+# @MigrationStatus:
+#
+# An enumeration of migration status.
+#
+# @none: no migration has ever happened.
+#
+# @setup: migration process has been initiated.
+#
+# @cancelling: in the process of cancelling migration.
+#
+# @cancelled: cancelling migration is finished.
+#
+# @active: in the process of doing migration.
+#
+# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
+#
+# @postcopy-paused: during postcopy but paused. (since 3.0)
+#
+# @postcopy-recover: trying to recover from a paused postcopy. (since 3.0)
+#
+# @completed: migration is finished.
+#
+# @failed: some error occurred during migration process.
+#
+# @colo: VM is in the process of fault tolerance, VM can not get into this
+#        state unless colo capability is enabled for migration. (since 2.8)
+#
+# @pre-switchover: Paused before device serialisation. (since 2.11)
+#
+# @device: During device serialisation when pause-before-switchover is enabled
+#          (since 2.11)
+#
+# @wait-unplug: wait for device unplug request by guest OS to be completed.
+#               (since 4.2)
+#
+# Since: 2.3
+#
+##
+{ 'enum': 'MigrationStatus',
+  'data': [ 'none', 'setup', 'cancelling', 'cancelled',
+            'active', 'postcopy-active', 'postcopy-paused',
+            'postcopy-recover', 'completed', 'failed', 'colo',
+            'pre-switchover', 'device', 'wait-unplug' ] }
+##
+# @VfioStats:
+#
+# Detailed VFIO devices migration statistics
+#
+# @transferred: amount of bytes transferred to the target VM by VFIO devices
+#
+# Since: 5.2
+#
+##
+{ 'struct': 'VfioStats',
+  'data': {'transferred': 'int' } }
+
+##
+# @MigrationInfo:
+#
+# Information about current migration process.
+#
+# @status: @MigrationStatus describing the current migration status.
+#          If this field is not returned, no migration process
+#          has been initiated
+#
+# @ram: @MigrationStats containing detailed migration
+#       status, only returned if status is 'active' or
+#       'completed'(since 1.2)
+#
+# @disk: @MigrationStats containing detailed disk migration
+#        status, only returned if status is 'active' and it is a block
+#        migration
+#
+# @xbzrle-cache: @XBZRLECacheStats containing detailed XBZRLE
+#                migration statistics, only returned if XBZRLE feature is on and
+#                status is 'active' or 'completed' (since 1.2)
+#
+# @total-time: total amount of milliseconds since migration started.
+#              If migration has ended, it returns the total migration
+#              time. (since 1.2)
+#
+# @downtime: only present when migration finishes correctly
+#            total downtime in milliseconds for the guest.
+#            (since 1.3)
+#
+# @expected-downtime: only present while migration is active
+#                     expected downtime in milliseconds for the guest in last walk
+#                     of the dirty bitmap. (since 1.3)
+#
+# @setup-time: amount of setup time in milliseconds *before* the
+#              iterations begin but *after* the QMP command is issued. This is designed
+#              to provide an accounting of any activities (such as RDMA pinning) which
+#              may be expensive, but do not actually occur during the iterative
+#              migration rounds themselves. (since 1.6)
+#
+# @cpu-throttle-percentage: percentage of time guest cpus are being
+#                           throttled during auto-converge. This is only present when auto-converge
+#                           has started throttling guest cpus. (Since 2.7)
+#
+# @error-desc: the human readable error description string, when
+#              @status is 'failed'. Clients should not attempt to parse the
+#              error strings. (Since 2.7)
+#
+# @postcopy-blocktime: total time when all vCPU were blocked during postcopy
+#                      live migration. This is only present when the postcopy-blocktime
+#                      migration capability is enabled. (Since 3.0)
+#
+# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU.  This is
+#                           only present when the postcopy-blocktime migration capability
+#                           is enabled. (Since 3.0)
+#
+# @compression: migration compression statistics, only returned if compression
+#               feature is on and status is 'active' or 'completed' (Since 3.1)
+#
+# @socket-address: Only used for tcp, to know what the real port is (Since 4.0)
+#
+# @vfio: @VfioStats containing detailed VFIO devices migration statistics,
+#        only returned if VFIO device is present, migration is supported by all
+#        VFIO devices and status is 'active' or 'completed' (since 5.2)
+#
+# Since: 0.14.0
+##
+{ 'struct': 'MigrationInfo',
+  'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats',
+           '*disk': 'MigrationStats',
+           '*vfio': 'VfioStats',
+           '*xbzrle-cache': 'XBZRLECacheStats',
+           '*total-time': 'int',
+           '*expected-downtime': 'int',
+           '*downtime': 'int',
+           '*setup-time': 'int',
+           '*cpu-throttle-percentage': 'int',
+           '*error-desc': 'str',
+           '*postcopy-blocktime' : 'uint32',
+           '*postcopy-vcpu-blocktime': ['uint32'],
+           '*compression': 'CompressionStats',
+           '*socket-address': ['SocketAddress'] } }
+
+##
+# @query-migrate:
+#
+# Returns information about current migration process. If migration
+# is active there will be another json-object with RAM migration
+# status and if block migration is active another one with block
+# migration status.
+#
+# Returns: @MigrationInfo
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# 1. Before the first migration
+#
+# -> { "execute": "query-migrate" }
+# <- { "return": {} }
+#
+# 2. Migration is done and has succeeded
+#
+# -> { "execute": "query-migrate" }
+# <- { "return": {
+#         "status": "completed",
+#         "total-time":12345,
+#         "setup-time":12345,
+#         "downtime":12345,
+#         "ram":{
+#           "transferred":123,
+#           "remaining":123,
+#           "total":246,
+#           "duplicate":123,
+#           "normal":123,
+#           "normal-bytes":123456,
+#           "dirty-sync-count":15
+#         }
+#      }
+#    }
+#
+# 3. Migration is done and has failed
+#
+# -> { "execute": "query-migrate" }
+# <- { "return": { "status": "failed" } }
+#
+# 4. Migration is being performed and is not a block migration:
+#
+# -> { "execute": "query-migrate" }
+# <- {
+#       "return":{
+#          "status":"active",
+#          "total-time":12345,
+#          "setup-time":12345,
+#          "expected-downtime":12345,
+#          "ram":{
+#             "transferred":123,
+#             "remaining":123,
+#             "total":246,
+#             "duplicate":123,
+#             "normal":123,
+#             "normal-bytes":123456,
+#             "dirty-sync-count":15
+#          }
+#       }
+#    }
+#
+# 5. Migration is being performed and is a block migration:
+#
+# -> { "execute": "query-migrate" }
+# <- {
+#       "return":{
+#          "status":"active",
+#          "total-time":12345,
+#          "setup-time":12345,
+#          "expected-downtime":12345,
+#          "ram":{
+#             "total":1057024,
+#             "remaining":1053304,
+#             "transferred":3720,
+#             "duplicate":123,
+#             "normal":123,
+#             "normal-bytes":123456,
+#             "dirty-sync-count":15
+#          },
+#          "disk":{
+#             "total":20971520,
+#             "remaining":20880384,
+#             "transferred":91136
+#          }
+#       }
+#    }
+#
+# 6. Migration is being performed and XBZRLE is active:
+#
+# -> { "execute": "query-migrate" }
+# <- {
+#       "return":{
+#          "status":"active",
+#          "total-time":12345,
+#          "setup-time":12345,
+#          "expected-downtime":12345,
+#          "ram":{
+#             "total":1057024,
+#             "remaining":1053304,
+#             "transferred":3720,
+#             "duplicate":10,
+#             "normal":3333,
+#             "normal-bytes":3412992,
+#             "dirty-sync-count":15
+#          },
+#          "xbzrle-cache":{
+#             "cache-size":67108864,
+#             "bytes":20971520,
+#             "pages":2444343,
+#             "cache-miss":2244,
+#             "cache-miss-rate":0.123,
+#             "encoding-rate":80.1,
+#             "overflow":34434
+#          }
+#       }
+#    }
+#
+##
+{ 'command': 'query-migrate', 'returns': 'MigrationInfo' }
+
+##
+# @MigrationCapability:
+#
+# Migration capabilities enumeration
+#
+# @xbzrle: Migration supports xbzrle (Xor Based Zero Run Length Encoding).
+#          This feature allows us to minimize migration traffic for certain work
+#          loads, by sending compressed difference of the pages
+#
+# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
+#                mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
+#                Disabled by default. (since 2.0)
+#
+# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
+#               essentially saves 1MB of zeroes per block on the wire. Enabling requires
+#               source and target VM to support this feature. To enable it is sufficient
+#               to enable the capability on the source VM. The feature is disabled by
+#               default. (since 1.6)
+#
+# @compress: Use multiple compression threads to accelerate live migration.
+#            This feature can help to reduce the migration traffic, by sending
+#            compressed pages. Please note that if compress and xbzrle are both
+#            on, compress only takes effect in the ram bulk stage, after that,
+#            it will be disabled and only xbzrle takes effect, this can help to
+#            minimize migration traffic. The feature is disabled by default.
+#            (since 2.4 )
+#
+# @events: generate events for each migration state change
+#          (since 2.4 )
+#
+# @auto-converge: If enabled, QEMU will automatically throttle down the guest
+#                 to speed up convergence of RAM migration. (since 1.6)
+#
+# @postcopy-ram: Start executing on the migration target before all of RAM has
+#                been migrated, pulling the remaining pages along as needed. The
+#                capacity must have the same setting on both source and target
+#                or migration will not even start. NOTE: If the migration fails during
+#                postcopy the VM will fail.  (since 2.6)
+#
+# @x-colo: If enabled, migration will never end, and the state of the VM on the
+#          primary side will be migrated continuously to the VM on secondary
+#          side, this process is called COarse-Grain LOck Stepping (COLO) for
+#          Non-stop Service. (since 2.8)
+#
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+#               during postcopy-ram migration. (since 2.9)
+#
+# @block: If enabled, QEMU will also migrate the contents of all block
+#         devices.  Default is disabled.  A possible alternative uses
+#         mirror jobs to a builtin NBD server on the destination, which
+#         offers more flexibility.
+#         (Since 2.10)
+#
+# @return-path: If enabled, migration will use the return path even
+#               for precopy. (since 2.10)
+#
+# @pause-before-switchover: Pause outgoing migration before serialising device
+#                           state and before disabling block IO (since 2.11)
+#
+# @multifd: Use more than one fd for migration (since 4.0)
+#
+# @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps.
+#                 (since 2.12)
+#
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+#                      (since 3.0)
+#
+# @late-block-activate: If enabled, the destination will not activate block
+#                       devices (and thus take locks) immediately at the end of migration.
+#                       (since 3.0)
+#
+# @x-ignore-shared: If enabled, QEMU will not migrate shared memory (since 4.0)
+#
+# @validate-uuid: Send the UUID of the source to allow the destination
+#                 to ensure it is the same. (since 4.2)
+#
+# Since: 1.2
+##
+{ 'enum': 'MigrationCapability',
+  'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
+           'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
+           'block', 'return-path', 'pause-before-switchover', 'multifd',
+           'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
+           'x-ignore-shared', 'validate-uuid' ] }
+
+##
+# @MigrationCapabilityStatus:
+#
+# Migration capability information
+#
+# @capability: capability enum
+#
+# @state: capability state bool
+#
+# Since: 1.2
+##
+{ 'struct': 'MigrationCapabilityStatus',
+  'data': { 'capability' : 'MigrationCapability', 'state' : 'bool' } }
+
+##
+# @migrate-set-capabilities:
+#
+# Enable/Disable the following migration capabilities (like xbzrle)
+#
+# @capabilities: json array of capability modifications to make
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "migrate-set-capabilities" , "arguments":
+#      { "capabilities": [ { "capability": "xbzrle", "state": true } ] } }
+#
+##
+{ 'command': 'migrate-set-capabilities',
+  'data': { 'capabilities': ['MigrationCapabilityStatus'] } }
+
+##
+# @query-migrate-capabilities:
+#
+# Returns information about the current migration capabilities status
+#
+# Returns: @MigrationCapabilitiesStatus
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "query-migrate-capabilities" }
+# <- { "return": [
+#       {"state": false, "capability": "xbzrle"},
+#       {"state": false, "capability": "rdma-pin-all"},
+#       {"state": false, "capability": "auto-converge"},
+#       {"state": false, "capability": "zero-blocks"},
+#       {"state": false, "capability": "compress"},
+#       {"state": true, "capability": "events"},
+#       {"state": false, "capability": "postcopy-ram"},
+#       {"state": false, "capability": "x-colo"}
+#    ]}
+#
+##
+{ 'command': 'query-migrate-capabilities', 'returns':   ['MigrationCapabilityStatus']}
+
+##
+# @MultiFDCompression:
+#
+# An enumeration of multifd compression methods.
+#
+# @none: no compression.
+# @zlib: use zlib compression method.
+# @zstd: use zstd compression method.
+#
+# Since: 5.0
+#
+##
+{ 'enum': 'MultiFDCompression',
+  'data': [ 'none', 'zlib',
+            { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+
+##
+# @BitmapMigrationBitmapAlias:
+#
+# @name: The name of the bitmap.
+#
+# @alias: An alias name for migration (for example the bitmap name on
+#         the opposite site).
+#
+# Since: 5.2
+##
+{ 'struct': 'BitmapMigrationBitmapAlias',
+  'data': {
+      'name': 'str',
+      'alias': 'str'
+  } }
+
+##
+# @BitmapMigrationNodeAlias:
+#
+# Maps a block node name and the bitmaps it has to aliases for dirty
+# bitmap migration.
+#
+# @node-name: A block node name.
+#
+# @alias: An alias block node name for migration (for example the
+#         node name on the opposite site).
+#
+# @bitmaps: Mappings for the bitmaps on this node.
+#
+# Since: 5.2
+##
+{ 'struct': 'BitmapMigrationNodeAlias',
+  'data': {
+      'node-name': 'str',
+      'alias': 'str',
+      'bitmaps': [ 'BitmapMigrationBitmapAlias' ]
+  } }
+
+##
+# @MigrationParameter:
+#
+# Migration parameters enumeration
+#
+# @announce-initial: Initial delay (in milliseconds) before sending the first
+#                    announce (Since 4.0)
+#
+# @announce-max: Maximum delay (in milliseconds) between packets in the
+#                announcement (Since 4.0)
+#
+# @announce-rounds: Number of self-announce packets sent after migration
+#                   (Since 4.0)
+#
+# @announce-step: Increase in delay (in milliseconds) between subsequent
+#                 packets in the announcement (Since 4.0)
+#
+# @compress-level: Set the compression level to be used in live migration,
+#                  the compression level is an integer between 0 and 9, where 0 means
+#                  no compression, 1 means the best compression speed, and 9 means best
+#                  compression ratio which will consume more CPU.
+#
+# @compress-threads: Set compression thread count to be used in live migration,
+#                    the compression thread count is an integer between 1 and 255.
+#
+# @compress-wait-thread: Controls behavior when all compression threads are
+#                        currently busy. If true (default), wait for a free
+#                        compression thread to become available; otherwise,
+#                        send the page uncompressed. (Since 3.1)
+#
+# @decompress-threads: Set decompression thread count to be used in live
+#                      migration, the decompression thread count is an integer between 1
+#                      and 255. Usually, decompression is at least 4 times as fast as
+#                      compression, so set the decompress-threads to the number about 1/4
+#                      of compress-threads is adequate.
+#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+#                              to trigger throttling. It is expressed as percentage.
+#                              The default value is 50. (Since 5.0)
+#
+# @cpu-throttle-initial: Initial percentage of time guest cpus are throttled
+#                        when migration auto-converge is activated. The
+#                        default value is 20. (Since 2.7)
+#
+# @cpu-throttle-increment: throttle percentage increase each time
+#                          auto-converge detects that migration is not making
+#                          progress. The default value is 10. (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials for
+#             establishing a TLS connection over the migration data channel.
+#             On the outgoing side of the migration, the credentials must
+#             be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint. Setting this
+#             will enable TLS for all migrations. The default is unset,
+#             resulting in unsecured migration at the QEMU level. (Since 2.7)
+#
+# @tls-hostname: hostname of the target host for the migration. This is
+#                required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity can be validated. (Since 2.7)
+#
+# @tls-authz: ID of the 'authz' object subclass that provides access control
+#             checking of the TLS x509 certificate distinguished name.
+#             This object is only resolved at time of use, so can be deleted
+#             and recreated on the fly while the migration server is active.
+#             If missing, it will default to denying access (Since 4.0)
+#
+# @max-bandwidth: to set maximum speed for migration. maximum speed in
+#                 bytes per second. (Since 2.8)
+#
+# @downtime-limit: set maximum tolerated downtime for migration. maximum
+#                  downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
+#                      periodic mode. (Since 2.8)
+#
+# @block-incremental: Affects how much storage is migrated when the
+#                     block migration capability is enabled.  When false, the entire
+#                     storage backing chain is migrated into a flattened image at
+#                     the destination; when true, only the active qcow2 layer is
+#                     migrated and the destination must already have access to the
+#                     same backing chain as was used on the source.  (since 2.10)
+#
+# @multifd-channels: Number of channels used to migrate data in
+#                    parallel. This is the same number that the
+#                    number of sockets used for migration.  The
+#                    default value is 2 (since 4.0)
+#
+# @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
+#                     needs to be a multiple of the target page size
+#                     and a power of 2
+#                     (Since 2.11)
+#
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                          Defaults to 0 (unlimited).  In bytes per second.
+#                          (Since 3.0)
+#
+# @max-cpu-throttle: maximum cpu throttle percentage.
+#                    Defaults to 99. (Since 3.1)
+#
+# @multifd-compression: Which compression method to use.
+#                       Defaults to none. (Since 5.0)
+#
+# @multifd-zlib-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 9, where 0 means no compression, 1 means the best
+#                      compression speed, and 9 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @multifd-zstd-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 20, where 0 means no compression, 1 means the best
+#                      compression speed, and 20 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+#                        aliases for the purpose of dirty bitmap migration.  Such
+#                        aliases may for example be the corresponding names on the
+#                        opposite site.
+#                        The mapping must be one-to-one, but not necessarily
+#                        complete: On the source, unmapped bitmaps and all bitmaps
+#                        on unmapped nodes will be ignored.  On the destination,
+#                        encountering an unmapped alias in the incoming migration
+#                        stream will result in a report, and all further bitmap
+#                        migration data will then be discarded.
+#                        Note that the destination does not know about bitmaps it
+#                        does not receive, so there is no limitation or requirement
+#                        regarding the number of bitmaps received, or how they are
+#                        named, or on which nodes they are placed.
+#                        By default (when this parameter has never been set), bitmap
+#                        names are mapped to themselves.  Nodes are mapped to their
+#                        block device name if there is one, and to their node name
+#                        otherwise. (Since 5.2)
+#
+# Since: 2.4
+##
+{ 'enum': 'MigrationParameter',
+  'data': ['announce-initial', 'announce-max',
+           'announce-rounds', 'announce-step',
+           'compress-level', 'compress-threads', 'decompress-threads',
+           'compress-wait-thread', 'throttle-trigger-threshold',
+           'cpu-throttle-initial', 'cpu-throttle-increment',
+           'cpu-throttle-tailslow',
+           'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
+           'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
+           'multifd-channels',
+           'xbzrle-cache-size', 'max-postcopy-bandwidth',
+           'max-cpu-throttle', 'multifd-compression',
+           'multifd-zlib-level' ,'multifd-zstd-level',
+           'block-bitmap-mapping' ] }
+
+##
+# @MigrateSetParameters:
+#
+# @announce-initial: Initial delay (in milliseconds) before sending the first
+#                    announce (Since 4.0)
+#
+# @announce-max: Maximum delay (in milliseconds) between packets in the
+#                announcement (Since 4.0)
+#
+# @announce-rounds: Number of self-announce packets sent after migration
+#                   (Since 4.0)
+#
+# @announce-step: Increase in delay (in milliseconds) between subsequent
+#                 packets in the announcement (Since 4.0)
+#
+# @compress-level: compression level
+#
+# @compress-threads: compression thread count
+#
+# @compress-wait-thread: Controls behavior when all compression threads are
+#                        currently busy. If true (default), wait for a free
+#                        compression thread to become available; otherwise,
+#                        send the page uncompressed. (Since 3.1)
+#
+# @decompress-threads: decompression thread count
+#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+#                              to trigger throttling. It is expressed as percentage.
+#                              The default value is 50. (Since 5.0)
+#
+# @cpu-throttle-initial: Initial percentage of time guest cpus are
+#                        throttled when migration auto-converge is activated.
+#                        The default value is 20. (Since 2.7)
+#
+# @cpu-throttle-increment: throttle percentage increase each time
+#                          auto-converge detects that migration is not making
+#                          progress. The default value is 10. (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials
+#             for establishing a TLS connection over the migration data
+#             channel. On the outgoing side of the migration, the credentials
+#             must be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint. Setting this
+#             to a non-empty string enables TLS for all migrations.
+#             An empty string means that QEMU will use plain text mode for
+#             migration, rather than TLS (Since 2.9)
+#             Previously (since 2.7), this was reported by omitting
+#             tls-creds instead.
+#
+# @tls-hostname: hostname of the target host for the migration. This
+#                is required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity can be validated. (Since 2.7)
+#                An empty string means that QEMU will use the hostname
+#                associated with the migration URI, if any. (Since 2.9)
+#                Previously (since 2.7), this was reported by omitting
+#                tls-hostname instead.
+#
+# @max-bandwidth: to set maximum speed for migration. maximum speed in
+#                 bytes per second. (Since 2.8)
+#
+# @downtime-limit: set maximum tolerated downtime for migration. maximum
+#                  downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#
+# @block-incremental: Affects how much storage is migrated when the
+#                     block migration capability is enabled.  When false, the entire
+#                     storage backing chain is migrated into a flattened image at
+#                     the destination; when true, only the active qcow2 layer is
+#                     migrated and the destination must already have access to the
+#                     same backing chain as was used on the source.  (since 2.10)
+#
+# @multifd-channels: Number of channels used to migrate data in
+#                    parallel. This is the same number that the
+#                    number of sockets used for migration.  The
+#                    default value is 2 (since 4.0)
+#
+# @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
+#                     needs to be a multiple of the target page size
+#                     and a power of 2
+#                     (Since 2.11)
+#
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                          Defaults to 0 (unlimited).  In bytes per second.
+#                          (Since 3.0)
+#
+# @max-cpu-throttle: maximum cpu throttle percentage.
+#                    The default value is 99. (Since 3.1)
+#
+# @multifd-compression: Which compression method to use.
+#                       Defaults to none. (Since 5.0)
+#
+# @multifd-zlib-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 9, where 0 means no compression, 1 means the best
+#                      compression speed, and 9 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @multifd-zstd-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 20, where 0 means no compression, 1 means the best
+#                      compression speed, and 20 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+#                        aliases for the purpose of dirty bitmap migration.  Such
+#                        aliases may for example be the corresponding names on the
+#                        opposite site.
+#                        The mapping must be one-to-one, but not necessarily
+#                        complete: On the source, unmapped bitmaps and all bitmaps
+#                        on unmapped nodes will be ignored.  On the destination,
+#                        encountering an unmapped alias in the incoming migration
+#                        stream will result in a report, and all further bitmap
+#                        migration data will then be discarded.
+#                        Note that the destination does not know about bitmaps it
+#                        does not receive, so there is no limitation or requirement
+#                        regarding the number of bitmaps received, or how they are
+#                        named, or on which nodes they are placed.
+#                        By default (when this parameter has never been set), bitmap
+#                        names are mapped to themselves.  Nodes are mapped to their
+#                        block device name if there is one, and to their node name
+#                        otherwise. (Since 5.2)
+#
+# Since: 2.4
+##
+# TODO either fuse back into MigrationParameters, or make
+# MigrationParameters members mandatory
+{ 'struct': 'MigrateSetParameters',
+  'data': { '*announce-initial': 'size',
+            '*announce-max': 'size',
+            '*announce-rounds': 'size',
+            '*announce-step': 'size',
+            '*compress-level': 'int',
+            '*compress-threads': 'int',
+            '*compress-wait-thread': 'bool',
+            '*decompress-threads': 'int',
+            '*throttle-trigger-threshold': 'int',
+            '*cpu-throttle-initial': 'int',
+            '*cpu-throttle-increment': 'int',
+            '*cpu-throttle-tailslow': 'bool',
+            '*tls-creds': 'StrOrNull',
+            '*tls-hostname': 'StrOrNull',
+            '*tls-authz': 'StrOrNull',
+            '*max-bandwidth': 'int',
+            '*downtime-limit': 'int',
+            '*x-checkpoint-delay': 'int',
+            '*block-incremental': 'bool',
+            '*multifd-channels': 'int',
+            '*xbzrle-cache-size': 'size',
+            '*max-postcopy-bandwidth': 'size',
+            '*max-cpu-throttle': 'int',
+            '*multifd-compression': 'MultiFDCompression',
+            '*multifd-zlib-level': 'int',
+            '*multifd-zstd-level': 'int',
+            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+
+##
+# @migrate-set-parameters:
+#
+# Set various migration parameters.
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "migrate-set-parameters" ,
+#      "arguments": { "compress-level": 1 } }
+#
+##
+{ 'command': 'migrate-set-parameters', 'boxed': true,
+  'data': 'MigrateSetParameters' }
+
+##
+# @MigrationParameters:
+#
+# The optional members aren't actually optional.
+#
+# @announce-initial: Initial delay (in milliseconds) before sending the
+#                    first announce (Since 4.0)
+#
+# @announce-max: Maximum delay (in milliseconds) between packets in the
+#                announcement (Since 4.0)
+#
+# @announce-rounds: Number of self-announce packets sent after migration
+#                   (Since 4.0)
+#
+# @announce-step: Increase in delay (in milliseconds) between subsequent
+#                 packets in the announcement (Since 4.0)
+#
+# @compress-level: compression level
+#
+# @compress-threads: compression thread count
+#
+# @compress-wait-thread: Controls behavior when all compression threads are
+#                        currently busy. If true (default), wait for a free
+#                        compression thread to become available; otherwise,
+#                        send the page uncompressed. (Since 3.1)
+#
+# @decompress-threads: decompression thread count
+#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+#                              to trigger throttling. It is expressed as percentage.
+#                              The default value is 50. (Since 5.0)
+#
+# @cpu-throttle-initial: Initial percentage of time guest cpus are
+#                        throttled when migration auto-converge is activated.
+#                        (Since 2.7)
+#
+# @cpu-throttle-increment: throttle percentage increase each time
+#                          auto-converge detects that migration is not making
+#                          progress. (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
+#                         At the tail stage of throttling, the Guest is very
+#                         sensitive to CPU percentage while the @cpu-throttle
+#                         -increment is excessive usually at tail stage.
+#                         If this parameter is true, we will compute the ideal
+#                         CPU percentage used by the Guest, which may exactly make
+#                         the dirty rate match the dirty rate threshold. Then we
+#                         will choose a smaller throttle increment between the
+#                         one specified by @cpu-throttle-increment and the one
+#                         generated by ideal CPU percentage.
+#                         Therefore, it is compatible to traditional throttling,
+#                         meanwhile the throttle increment won't be excessive
+#                         at tail stage.
+#                         The default value is false. (Since 5.1)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials
+#             for establishing a TLS connection over the migration data
+#             channel. On the outgoing side of the migration, the credentials
+#             must be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint.
+#             An empty string means that QEMU will use plain text mode for
+#             migration, rather than TLS (Since 2.7)
+#             Note: 2.8 reports this by omitting tls-creds instead.
+#
+# @tls-hostname: hostname of the target host for the migration. This
+#                is required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity can be validated. (Since 2.7)
+#                An empty string means that QEMU will use the hostname
+#                associated with the migration URI, if any. (Since 2.9)
+#                Note: 2.8 reports this by omitting tls-hostname instead.
+#
+# @tls-authz: ID of the 'authz' object subclass that provides access control
+#             checking of the TLS x509 certificate distinguished name. (Since
+#             4.0)
+#
+# @max-bandwidth: to set maximum speed for migration. maximum speed in
+#                 bytes per second. (Since 2.8)
+#
+# @downtime-limit: set maximum tolerated downtime for migration. maximum
+#                  downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#
+# @block-incremental: Affects how much storage is migrated when the
+#                     block migration capability is enabled.  When false, the entire
+#                     storage backing chain is migrated into a flattened image at
+#                     the destination; when true, only the active qcow2 layer is
+#                     migrated and the destination must already have access to the
+#                     same backing chain as was used on the source.  (since 2.10)
+#
+# @multifd-channels: Number of channels used to migrate data in
+#                    parallel. This is the same number that the
+#                    number of sockets used for migration.
+#                    The default value is 2 (since 4.0)
+#
+# @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
+#                     needs to be a multiple of the target page size
+#                     and a power of 2
+#                     (Since 2.11)
+#
+# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
+#                          Defaults to 0 (unlimited).  In bytes per second.
+#                          (Since 3.0)
+#
+# @max-cpu-throttle: maximum cpu throttle percentage.
+#                    Defaults to 99.
+#                    (Since 3.1)
+#
+# @multifd-compression: Which compression method to use.
+#                       Defaults to none. (Since 5.0)
+#
+# @multifd-zlib-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 9, where 0 means no compression, 1 means the best
+#                      compression speed, and 9 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @multifd-zstd-level: Set the compression level to be used in live
+#                      migration, the compression level is an integer between 0
+#                      and 20, where 0 means no compression, 1 means the best
+#                      compression speed, and 20 means best compression ratio which
+#                      will consume more CPU.
+#                      Defaults to 1. (Since 5.0)
+#
+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+#                        aliases for the purpose of dirty bitmap migration.  Such
+#                        aliases may for example be the corresponding names on the
+#                        opposite site.
+#                        The mapping must be one-to-one, but not necessarily
+#                        complete: On the source, unmapped bitmaps and all bitmaps
+#                        on unmapped nodes will be ignored.  On the destination,
+#                        encountering an unmapped alias in the incoming migration
+#                        stream will result in a report, and all further bitmap
+#                        migration data will then be discarded.
+#                        Note that the destination does not know about bitmaps it
+#                        does not receive, so there is no limitation or requirement
+#                        regarding the number of bitmaps received, or how they are
+#                        named, or on which nodes they are placed.
+#                        By default (when this parameter has never been set), bitmap
+#                        names are mapped to themselves.  Nodes are mapped to their
+#                        block device name if there is one, and to their node name
+#                        otherwise. (Since 5.2)
+#
+# Since: 2.4
+##
+{ 'struct': 'MigrationParameters',
+  'data': { '*announce-initial': 'size',
+            '*announce-max': 'size',
+            '*announce-rounds': 'size',
+            '*announce-step': 'size',
+            '*compress-level': 'uint8',
+            '*compress-threads': 'uint8',
+            '*compress-wait-thread': 'bool',
+            '*decompress-threads': 'uint8',
+            '*throttle-trigger-threshold': 'uint8',
+            '*cpu-throttle-initial': 'uint8',
+            '*cpu-throttle-increment': 'uint8',
+            '*cpu-throttle-tailslow': 'bool',
+            '*tls-creds': 'str',
+            '*tls-hostname': 'str',
+            '*tls-authz': 'str',
+            '*max-bandwidth': 'size',
+            '*downtime-limit': 'uint64',
+            '*x-checkpoint-delay': 'uint32',
+            '*block-incremental': 'bool' ,
+            '*multifd-channels': 'uint8',
+            '*xbzrle-cache-size': 'size',
+            '*max-postcopy-bandwidth': 'size',
+            '*max-cpu-throttle': 'uint8',
+            '*multifd-compression': 'MultiFDCompression',
+            '*multifd-zlib-level': 'uint8',
+            '*multifd-zstd-level': 'uint8',
+            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+
+##
+# @query-migrate-parameters:
+#
+# Returns information about the current migration parameters
+#
+# Returns: @MigrationParameters
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "query-migrate-parameters" }
+# <- { "return": {
+#          "decompress-threads": 2,
+#          "cpu-throttle-increment": 10,
+#          "compress-threads": 8,
+#          "compress-level": 1,
+#          "cpu-throttle-initial": 20,
+#          "max-bandwidth": 33554432,
+#          "downtime-limit": 300
+#       }
+#    }
+#
+##
+{ 'command': 'query-migrate-parameters',
+  'returns': 'MigrationParameters' }
+
+##
+# @client_migrate_info:
+#
+# Set migration information for remote display.  This makes the server
+# ask the client to automatically reconnect using the new parameters
+# once migration finished successfully.  Only implemented for SPICE.
+#
+# @protocol:     must be "spice"
+# @hostname:     migration target hostname
+# @port:         spice tcp port for plaintext channels
+# @tls-port:     spice tcp port for tls-secured channels
+# @cert-subject: server certificate subject
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "client_migrate_info",
+#      "arguments": { "protocol": "spice",
+#                     "hostname": "virt42.lab.kraxel.org",
+#                     "port": 1234 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'client_migrate_info',
+  'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
+            '*tls-port': 'int', '*cert-subject': 'str' } }
+
+##
+# @migrate-start-postcopy:
+#
+# Followup to a migration command to switch the migration to postcopy mode.
+# The postcopy-ram capability must be set on both source and destination
+# before the original migration command.
+#
+# Since: 2.5
+#
+# Example:
+#
+# -> { "execute": "migrate-start-postcopy" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate-start-postcopy' }
+
+##
+# @MIGRATION:
+#
+# Emitted when a migration event happens
+#
+# @status: @MigrationStatus describing the current migration status.
+#
+# Since: 2.4
+#
+# Example:
+#
+# <- {"timestamp": {"seconds": 1432121972, "microseconds": 744001},
+#     "event": "MIGRATION",
+#     "data": {"status": "completed"} }
+#
+##
+{ 'event': 'MIGRATION',
+  'data': {'status': 'MigrationStatus'}}
+
+##
+# @MIGRATION_PASS:
+#
+# Emitted from the source side of a migration at the start of each pass
+# (when it syncs the dirty bitmap)
+#
+# @pass: An incrementing count (starting at 1 on the first pass)
+#
+# Since: 2.6
+#
+# Example:
+#
+# { "timestamp": {"seconds": 1449669631, "microseconds": 239225},
+#   "event": "MIGRATION_PASS", "data": {"pass": 2} }
+#
+##
+{ 'event': 'MIGRATION_PASS',
+  'data': { 'pass': 'int' } }
+
+##
+# @COLOMessage:
+#
+# The message transmission between Primary side and Secondary side.
+#
+# @checkpoint-ready: Secondary VM (SVM) is ready for checkpointing
+#
+# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for checkpointing
+#
+# @checkpoint-reply: SVM gets PVM's checkpoint request
+#
+# @vmstate-send: VM's state will be sent by PVM.
+#
+# @vmstate-size: The total size of VMstate.
+#
+# @vmstate-received: VM's state has been received by SVM.
+#
+# @vmstate-loaded: VM's state has been loaded by SVM.
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMessage',
+  'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
+            'vmstate-send', 'vmstate-size', 'vmstate-received',
+            'vmstate-loaded' ] }
+
+##
+# @COLOMode:
+#
+# The COLO current mode.
+#
+# @none: COLO is disabled.
+#
+# @primary: COLO node in primary side.
+#
+# @secondary: COLO node in slave side.
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMode',
+  'data': [ 'none', 'primary', 'secondary'] }
+
+##
+# @FailoverStatus:
+#
+# An enumeration of COLO failover status
+#
+# @none: no failover has ever happened
+#
+# @require: got failover requirement but not handled
+#
+# @active: in the process of doing failover
+#
+# @completed: finish the process of failover
+#
+# @relaunch: restart the failover process, from 'none' -> 'completed' (Since 2.9)
+#
+# Since: 2.8
+##
+{ 'enum': 'FailoverStatus',
+  'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
+
+##
+# @COLO_EXIT:
+#
+# Emitted when VM finishes COLO mode due to some errors happening or
+# at the request of users.
+#
+# @mode: report COLO mode when COLO exited.
+#
+# @reason: describes the reason for the COLO exit.
+#
+# Since: 3.1
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
+#      "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
+#
+##
+{ 'event': 'COLO_EXIT',
+  'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
+
+##
+# @COLOExitReason:
+#
+# The reason for a COLO exit.
+#
+# @none: failover has never happened. This state does not occur
+#        in the COLO_EXIT event, and is only visible in the result of
+#        query-colo-status.
+#
+# @request: COLO exit is due to an external request.
+#
+# @error: COLO exit is due to an internal error.
+#
+# @processing: COLO is currently handling a failover (since 4.0).
+#
+# Since: 3.1
+##
+{ 'enum': 'COLOExitReason',
+  'data': [ 'none', 'request', 'error' , 'processing' ] }
+
+##
+# @x-colo-lost-heartbeat:
+#
+# Tell qemu that heartbeat is lost, request it to do takeover procedures.
+# If this command is sent to the PVM, the Primary side will exit COLO mode.
+# If sent to the Secondary, the Secondary side will run failover work,
+# then takes over server operation to become the service VM.
+#
+# Since: 2.8
+#
+# Example:
+#
+# -> { "execute": "x-colo-lost-heartbeat" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-colo-lost-heartbeat' }
+
+##
+# @migrate_cancel:
+#
+# Cancel the current executing migration process.
+#
+# Returns: nothing on success
+#
+# Notes: This command succeeds even if there is no migration process running.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "migrate_cancel" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate_cancel' }
+
+##
+# @migrate-continue:
+#
+# Continue migration when it's in a paused state.
+#
+# @state: The state the migration is currently expected to be in
+#
+# Returns: nothing on success
+# Since: 2.11
+# Example:
+#
+# -> { "execute": "migrate-continue" , "arguments":
+#      { "state": "pre-switchover" } }
+# <- { "return": {} }
+##
+{ 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} }
+
+##
+# @migrate_set_downtime:
+#
+# Set maximum tolerated downtime for migration.
+#
+# @value: maximum downtime in seconds
+#
+# Features:
+# @deprecated: This command is deprecated.  Use
+#              'migrate-set-parameters' instead.
+#
+# Returns: nothing on success
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "migrate_set_downtime", "arguments": { "value": 0.1 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate_set_downtime', 'data': {'value': 'number'},
+  'features': [ 'deprecated' ] }
+
+##
+# @migrate_set_speed:
+#
+# Set maximum speed for migration.
+#
+# @value: maximum speed in bytes per second.
+#
+# Features:
+# @deprecated: This command is deprecated.  Use
+#              'migrate-set-parameters' instead.
+#
+# Returns: nothing on success
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "migrate_set_speed", "arguments": { "value": 1024 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate_set_speed', 'data': {'value': 'int'},
+  'features': [ 'deprecated' ] }
+
+##
+# @migrate-set-cache-size:
+#
+# Set cache size to be used by XBZRLE migration
+#
+# @value: cache size in bytes
+#
+# Features:
+# @deprecated: This command is deprecated.  Use
+#              'migrate-set-parameters' instead.
+#
+# The size will be rounded down to the nearest power of 2.
+# The cache size can be modified before and during ongoing migration
+#
+# Returns: nothing on success
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "migrate-set-cache-size",
+#      "arguments": { "value": 536870912 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate-set-cache-size', 'data': {'value': 'int'},
+  'features': [ 'deprecated' ] }
+
+##
+# @query-migrate-cache-size:
+#
+# Query migration XBZRLE cache size
+#
+# Features:
+# @deprecated: This command is deprecated.  Use
+#              'query-migrate-parameters' instead.
+#
+# Returns: XBZRLE cache size in bytes
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "query-migrate-cache-size" }
+# <- { "return": 67108864 }
+#
+##
+{ 'command': 'query-migrate-cache-size', 'returns': 'int',
+  'features': [ 'deprecated' ] }
+
+##
+# @migrate:
+#
+# Migrates the current running guest to another Virtual Machine.
+#
+# @uri: the Uniform Resource Identifier of the destination VM
+#
+# @blk: do block migration (full disk copy)
+#
+# @inc: incremental disk copy migration
+#
+# @detach: this argument exists only for compatibility reasons and
+#          is ignored by QEMU
+#
+# @resume: resume one paused migration, default "off". (since 3.0)
+#
+# Returns: nothing on success
+#
+# Since: 0.14.0
+#
+# Notes:
+#
+# 1. The 'query-migrate' command should be used to check migration's progress
+#    and final result (this information is provided by the 'status' member)
+#
+# 2. All boolean arguments default to false
+#
+# 3. The user Monitor's "detach" argument is invalid in QMP and should not
+#    be used
+#
+# Example:
+#
+# -> { "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate',
+  'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool',
+           '*detach': 'bool', '*resume': 'bool' } }
+
+##
+# @migrate-incoming:
+#
+# Start an incoming migration, the qemu must have been started
+# with -incoming defer
+#
+# @uri: The Uniform Resource Identifier identifying the source or
+#       address to listen on
+#
+# Returns: nothing on success
+#
+# Since: 2.3
+#
+# Notes:
+#
+# 1. It's a bad idea to use a string for the uri, but it needs to stay
+#    compatible with -incoming and the format of the uri is already exposed
+#    above libvirt.
+#
+# 2. QEMU must be started with -incoming defer to allow migrate-incoming to
+#    be used.
+#
+# 3. The uri format is the same as for -incoming
+#
+# Example:
+#
+# -> { "execute": "migrate-incoming",
+#      "arguments": { "uri": "tcp::4446" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
+
+##
+# @xen-save-devices-state:
+#
+# Save the state of all devices to file. The RAM and the block devices
+# of the VM are not saved by this command.
+#
+# @filename: the file to save the state of the devices to as binary
+#            data. See xen-save-devices-state.txt for a description of the binary
+#            format.
+#
+# @live: Optional argument to ask QEMU to treat this command as part of a live
+#        migration. Default to true. (since 2.11)
+#
+# Returns: Nothing on success
+#
+# Since: 1.1
+#
+# Example:
+#
+# -> { "execute": "xen-save-devices-state",
+#      "arguments": { "filename": "/tmp/save" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'xen-save-devices-state',
+  'data': {'filename': 'str', '*live':'bool' } }
+
+##
+# @xen-set-global-dirty-log:
+#
+# Enable or disable the global dirty log mode.
+#
+# @enable: true to enable, false to disable.
+#
+# Returns: nothing
+#
+# Since: 1.3
+#
+# Example:
+#
+# -> { "execute": "xen-set-global-dirty-log",
+#      "arguments": { "enable": true } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
+
+##
+# @xen-load-devices-state:
+#
+# Load the state of all devices from file. The RAM and the block devices
+# of the VM are not loaded by this command.
+#
+# @filename: the file to load the state of the devices from as binary
+#            data. See xen-save-devices-state.txt for a description of the binary
+#            format.
+#
+# Since: 2.7
+#
+# Example:
+#
+# -> { "execute": "xen-load-devices-state",
+#      "arguments": { "filename": "/tmp/resume" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} }
+
+##
+# @xen-set-replication:
+#
+# Enable or disable replication.
+#
+# @enable: true to enable, false to disable.
+#
+# @primary: true for primary or false for secondary.
+#
+# @failover: true to do failover, false to stop. but cannot be
+#            specified if 'enable' is true. default value is false.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-set-replication",
+#      "arguments": {"enable": true, "primary": false} }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-set-replication',
+  'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' },
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @ReplicationStatus:
+#
+# The result format for 'query-xen-replication-status'.
+#
+# @error: true if an error happened, false if replication is normal.
+#
+# @desc: the human readable error description string, when
+#        @error is 'true'.
+#
+# Since: 2.9
+##
+{ 'struct': 'ReplicationStatus',
+  'data': { 'error': 'bool', '*desc': 'str' },
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @query-xen-replication-status:
+#
+# Query replication status while the vm is running.
+#
+# Returns: A @ReplicationResult object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-xen-replication-status" }
+# <- { "return": { "error": false } }
+#
+# Since: 2.9
+##
+{ 'command': 'query-xen-replication-status',
+  'returns': 'ReplicationStatus',
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @xen-colo-do-checkpoint:
+#
+# Xen uses this command to notify replication to trigger a checkpoint.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-colo-do-checkpoint" }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-colo-do-checkpoint',
+  'if': 'defined(CONFIG_REPLICATION)' }
+
+##
+# @COLOStatus:
+#
+# The result format for 'query-colo-status'.
+#
+# @mode: COLO running mode. If COLO is running, this field will return
+#        'primary' or 'secondary'.
+#
+# @last-mode: COLO last running mode. If COLO is running, this field
+#             will return same like mode field, after failover we can
+#             use this field to get last colo mode. (since 4.0)
+#
+# @reason: describes the reason for the COLO exit.
+#
+# Since: 3.1
+##
+{ 'struct': 'COLOStatus',
+  'data': { 'mode': 'COLOMode', 'last-mode': 'COLOMode',
+            'reason': 'COLOExitReason' } }
+
+##
+# @query-colo-status:
+#
+# Query COLO status while the vm is running.
+#
+# Returns: A @COLOStatus object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-colo-status" }
+# <- { "return": { "mode": "primary", "reason": "request" } }
+#
+# Since: 3.1
+##
+{ 'command': 'query-colo-status',
+  'returns': 'COLOStatus' }
+
+##
+# @migrate-recover:
+#
+# Provide a recovery migration stream URI.
+#
+# @uri: the URI to be used for the recovery of migration stream.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "migrate-recover",
+#      "arguments": { "uri": "tcp:192.168.1.200:12345" } }
+# <- { "return": {} }
+#
+# Since: 3.0
+##
+{ 'command': 'migrate-recover',
+  'data': { 'uri': 'str' },
+  'allow-oob': true }
+
+##
+# @migrate-pause:
+#
+# Pause a migration.  Currently it only supports postcopy.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "migrate-pause" }
+# <- { "return": {} }
+#
+# Since: 3.0
+##
+{ 'command': 'migrate-pause', 'allow-oob': true }
+
+##
+# @UNPLUG_PRIMARY:
+#
+# Emitted from source side of a migration when migration state is
+# WAIT_UNPLUG. Device was unplugged by guest operating system.
+# Device resources in QEMU are kept on standby to be able to re-plug it in case
+# of migration failure.
+#
+# @device-id: QEMU device id of the unplugged device
+#
+# Since: 4.2
+#
+# Example:
+#   {"event": "UNPLUG_PRIMARY", "data": {"device-id": "hostdev0"} }
+#
+##
+{ 'event': 'UNPLUG_PRIMARY',
+  'data': { 'device-id': 'str' } }
+
+##
+# @DirtyRateStatus:
+#
+# An enumeration of dirtyrate status.
+#
+# @unstarted: the dirtyrate thread has not been started.
+#
+# @measuring: the dirtyrate thread is measuring.
+#
+# @measured: the dirtyrate thread has measured and results are available.
+#
+# Since: 5.2
+#
+##
+{ 'enum': 'DirtyRateStatus',
+  'data': [ 'unstarted', 'measuring', 'measured'] }
+
+##
+# @DirtyRateInfo:
+#
+# Information about current dirty page rate of vm.
+#
+# @dirty-rate: an estimate of the dirty page rate of the VM in units of
+#              MB/s, present only when estimating the rate has completed.
+#
+# @status: status containing dirtyrate query status includes
+#          'unstarted' or 'measuring' or 'measured'
+#
+# @start-time: start time in units of second for calculation
+#
+# @calc-time: time in units of second for sample dirty pages
+#
+# Since: 5.2
+#
+##
+{ 'struct': 'DirtyRateInfo',
+  'data': {'*dirty-rate': 'int64',
+           'status': 'DirtyRateStatus',
+           'start-time': 'int64',
+           'calc-time': 'int64'} }
+
+##
+# @calc-dirty-rate:
+#
+# start calculating dirty page rate for vm
+#
+# @calc-time: time in units of second for sample dirty pages
+#
+# Since: 5.2
+#
+# Example:
+#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
+#
+##
+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
+
+##
+# @query-dirty-rate:
+#
+# query dirty page rate in units of MB/s for vm
+#
+# Since: 5.2
+##
+{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
new file mode 100644
index 000000000..1e561fa97
--- /dev/null
+++ b/qapi/misc-target.json
@@ -0,0 +1,269 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# @RTC_CHANGE:
+#
+# Emitted when the guest changes the RTC time.
+#
+# @offset: offset between base RTC clock (as specified by -rtc base), and
+#          new RTC clock value
+#
+# Note: This event is rate-limited.
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <-   { "event": "RTC_CHANGE",
+#        "data": { "offset": 78 },
+#        "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+#
+##
+{ 'event': 'RTC_CHANGE',
+  'data': { 'offset': 'int' },
+  'if': 'defined(TARGET_ALPHA) || defined(TARGET_ARM) || defined(TARGET_HPPA) || defined(TARGET_I386) || defined(TARGET_MIPS) || defined(TARGET_MIPS64) || defined(TARGET_MOXIE) || defined(TARGET_PPC) || defined(TARGET_PPC64) || defined(TARGET_S390X) || defined(TARGET_SH4) || defined(TARGET_SPARC)' }
+
+##
+# @rtc-reset-reinjection:
+#
+# This command will reset the RTC interrupt reinjection backlog.
+# Can be used if another mechanism to synchronize guest time
+# is in effect, for example QEMU guest agent's guest-set-time
+# command.
+#
+# Since: 2.1
+#
+# Example:
+#
+# -> { "execute": "rtc-reset-reinjection" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'rtc-reset-reinjection',
+  'if': 'defined(TARGET_I386)' }
+
+
+##
+# @SevState:
+#
+# An enumeration of SEV state information used during @query-sev.
+#
+# @uninit: The guest is uninitialized.
+#
+# @launch-update: The guest is currently being launched; plaintext data and
+#                 register state is being imported.
+#
+# @launch-secret: The guest is currently being launched; ciphertext data
+#                 is being imported.
+#
+# @running: The guest is fully launched or migrated in.
+#
+# @send-update: The guest is currently being migrated out to another machine.
+#
+# @receive-update: The guest is currently being migrated from another machine.
+#
+# Since: 2.12
+##
+{ 'enum': 'SevState',
+  'data': ['uninit', 'launch-update', 'launch-secret', 'running',
+           'send-update', 'receive-update' ],
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @SevInfo:
+#
+# Information about Secure Encrypted Virtualization (SEV) support
+#
+# @enabled: true if SEV is active
+#
+# @api-major: SEV API major version
+#
+# @api-minor: SEV API minor version
+#
+# @build-id: SEV FW build id
+#
+# @policy: SEV policy value
+#
+# @state: SEV guest state
+#
+# @handle: SEV firmware handle
+#
+# Since: 2.12
+##
+{ 'struct': 'SevInfo',
+    'data': { 'enabled': 'bool',
+              'api-major': 'uint8',
+              'api-minor' : 'uint8',
+              'build-id' : 'uint8',
+              'policy' : 'uint32',
+              'state' : 'SevState',
+              'handle' : 'uint32'
+            },
+  'if': 'defined(TARGET_I386)'
+}
+
+##
+# @query-sev:
+#
+# Returns information about SEV
+#
+# Returns: @SevInfo
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "query-sev" }
+# <- { "return": { "enabled": true, "api-major" : 0, "api-minor" : 0,
+#                  "build-id" : 0, "policy" : 0, "state" : "running",
+#                  "handle" : 1 } }
+#
+##
+{ 'command': 'query-sev', 'returns': 'SevInfo',
+  'if': 'defined(TARGET_I386)' }
+
+
+##
+# @SevLaunchMeasureInfo:
+#
+# SEV Guest Launch measurement information
+#
+# @data: the measurement value encoded in base64
+#
+# Since: 2.12
+#
+##
+{ 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'},
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @query-sev-launch-measure:
+#
+# Query the SEV guest launch information.
+#
+# Returns: The @SevLaunchMeasureInfo for the guest
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "query-sev-launch-measure" }
+# <- { "return": { "data": "4l8LXeNlSPUDlXPJG5966/8%YZ" } }
+#
+##
+{ 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo',
+  'if': 'defined(TARGET_I386)' }
+
+
+##
+# @SevCapability:
+#
+# The struct describes capability for a Secure Encrypted Virtualization
+# feature.
+#
+# @pdh:  Platform Diffie-Hellman key (base64 encoded)
+#
+# @cert-chain:  PDH certificate chain (base64 encoded)
+#
+# @cbitpos: C-bit location in page table entry
+#
+# @reduced-phys-bits: Number of physical Address bit reduction when SEV is
+#                     enabled
+#
+# Since: 2.12
+##
+{ 'struct': 'SevCapability',
+  'data': { 'pdh': 'str',
+            'cert-chain': 'str',
+            'cbitpos': 'int',
+            'reduced-phys-bits': 'int'},
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @query-sev-capabilities:
+#
+# This command is used to get the SEV capabilities, and is supported on AMD
+# X86 platforms only.
+#
+# Returns: SevCapability objects.
+#
+# Since: 2.12
+#
+# Example:
+#
+# -> { "execute": "query-sev-capabilities" }
+# <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE",
+#                  "cbitpos": 47, "reduced-phys-bits": 5}}
+#
+##
+{ 'command': 'query-sev-capabilities', 'returns': 'SevCapability',
+  'if': 'defined(TARGET_I386)' }
+
+##
+# @dump-skeys:
+#
+# Dump guest's storage keys
+#
+# @filename: the path to the file to dump to
+#
+# This command is only supported on s390 architecture.
+#
+# Since: 2.5
+#
+# Example:
+#
+# -> { "execute": "dump-skeys",
+#      "arguments": { "filename": "/tmp/skeys" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'dump-skeys',
+  'data': { 'filename': 'str' },
+  'if': 'defined(TARGET_S390X)' }
+
+##
+# @GICCapability:
+#
+# The struct describes capability for a specific GIC (Generic
+# Interrupt Controller) version. These bits are not only decided by
+# QEMU/KVM software version, but also decided by the hardware that
+# the program is running upon.
+#
+# @version: version of GIC to be described. Currently, only 2 and 3
+#           are supported.
+#
+# @emulated: whether current QEMU/hardware supports emulated GIC
+#            device in user space.
+#
+# @kernel: whether current QEMU/hardware supports hardware
+#          accelerated GIC device in kernel.
+#
+# Since: 2.6
+##
+{ 'struct': 'GICCapability',
+  'data': { 'version': 'int',
+            'emulated': 'bool',
+            'kernel': 'bool' },
+  'if': 'defined(TARGET_ARM)' }
+
+##
+# @query-gic-capabilities:
+#
+# This command is ARM-only. It will return a list of GICCapability
+# objects that describe its capability bits.
+#
+# Returns: a list of GICCapability objects.
+#
+# Since: 2.6
+#
+# Example:
+#
+# -> { "execute": "query-gic-capabilities" }
+# <- { "return": [{ "version": 2, "emulated": true, "kernel": false },
+#                 { "version": 3, "emulated": false, "kernel": true } ] }
+#
+##
+{ 'command': 'query-gic-capabilities', 'returns': ['GICCapability'],
+  'if': 'defined(TARGET_ARM)' }
diff --git a/qapi/misc.json b/qapi/misc.json
new file mode 100644
index 000000000..40df51385
--- /dev/null
+++ b/qapi/misc.json
@@ -0,0 +1,570 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Miscellanea
+##
+
+{ 'include': 'common.json' }
+
+##
+# @add_client:
+#
+# Allow client connections for VNC, Spice and socket based
+# character devices to be passed in to QEMU via SCM_RIGHTS.
+#
+# @protocol: protocol name. Valid names are "vnc", "spice" or the
+#            name of a character device (eg. from -chardev id=XXXX)
+#
+# @fdname: file descriptor name previously passed via 'getfd' command
+#
+# @skipauth: whether to skip authentication. Only applies
+#            to "vnc" and "spice" protocols
+#
+# @tls: whether to perform TLS. Only applies to the "spice"
+#       protocol
+#
+# Returns: nothing on success.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "add_client", "arguments": { "protocol": "vnc",
+#                                              "fdname": "myclient" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'add_client',
+  'data': { 'protocol': 'str', 'fdname': 'str', '*skipauth': 'bool',
+            '*tls': 'bool' } }
+
+##
+# @NameInfo:
+#
+# Guest name information.
+#
+# @name: The name of the guest
+#
+# Since: 0.14.0
+##
+{ 'struct': 'NameInfo', 'data': {'*name': 'str'} }
+
+##
+# @query-name:
+#
+# Return the name information of a guest.
+#
+# Returns: @NameInfo of the guest
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-name" }
+# <- { "return": { "name": "qemu-name" } }
+#
+##
+{ 'command': 'query-name', 'returns': 'NameInfo', 'allow-preconfig': true }
+
+##
+# @IOThreadInfo:
+#
+# Information about an iothread
+#
+# @id: the identifier of the iothread
+#
+# @thread-id: ID of the underlying host thread
+#
+# @poll-max-ns: maximum polling time in ns, 0 means polling is disabled
+#               (since 2.9)
+#
+# @poll-grow: how many ns will be added to polling time, 0 means that it's not
+#             configured (since 2.9)
+#
+# @poll-shrink: how many ns will be removed from polling time, 0 means that
+#               it's not configured (since 2.9)
+#
+# Since: 2.0
+##
+{ 'struct': 'IOThreadInfo',
+  'data': {'id': 'str',
+           'thread-id': 'int',
+           'poll-max-ns': 'int',
+           'poll-grow': 'int',
+           'poll-shrink': 'int' } }
+
+##
+# @query-iothreads:
+#
+# Returns a list of information about each iothread.
+#
+# Note: this list excludes the QEMU main loop thread, which is not declared
+#       using the -object iothread command-line option.  It is always the main thread
+#       of the process.
+#
+# Returns: a list of @IOThreadInfo for each iothread
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "query-iothreads" }
+# <- { "return": [
+#          {
+#             "id":"iothread0",
+#             "thread-id":3134
+#          },
+#          {
+#             "id":"iothread1",
+#             "thread-id":3135
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-iothreads', 'returns': ['IOThreadInfo'],
+  'allow-preconfig': true }
+
+##
+# @stop:
+#
+# Stop all guest VCPU execution.
+#
+# Since:  0.14.0
+#
+# Notes: This function will succeed even if the guest is already in the stopped
+#        state.  In "inmigrate" state, it will ensure that the guest
+#        remains paused once migration finishes, as if the -S option was
+#        passed on the command line.
+#
+# Example:
+#
+# -> { "execute": "stop" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'stop' }
+
+##
+# @cont:
+#
+# Resume guest VCPU execution.
+#
+# Since:  0.14.0
+#
+# Returns:  If successful, nothing
+#
+# Notes: This command will succeed if the guest is currently running.  It
+#        will also succeed if the guest is in the "inmigrate" state; in
+#        this case, the effect of the command is to make sure the guest
+#        starts once migration finishes, removing the effect of the -S
+#        command line option if it was passed.
+#
+# Example:
+#
+# -> { "execute": "cont" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'cont' }
+
+##
+# @x-exit-preconfig:
+#
+# Exit from "preconfig" state
+#
+# This command makes QEMU exit the preconfig state and proceed with
+# VM initialization using configuration data provided on the command line
+# and via the QMP monitor during the preconfig state. The command is only
+# available during the preconfig state (i.e. when the --preconfig command
+# line option was in use).
+#
+# Since 3.0
+#
+# Returns: nothing
+#
+# Example:
+#
+# -> { "execute": "x-exit-preconfig" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-exit-preconfig', 'allow-preconfig': true }
+
+##
+# @human-monitor-command:
+#
+# Execute a command on the human monitor and return the output.
+#
+# @command-line: the command to execute in the human monitor
+#
+# @cpu-index: The CPU to use for commands that require an implicit CPU
+#
+# Features:
+# @savevm-monitor-nodes: If present, HMP command savevm only snapshots
+#                        monitor-owned nodes if they have no parents.
+#                        This allows the use of 'savevm' with
+#                        -blockdev. (since 4.2)
+#
+# Returns: the output of the command as a string
+#
+# Since: 0.14.0
+#
+# Notes: This command only exists as a stop-gap.  Its use is highly
+#        discouraged.  The semantics of this command are not
+#        guaranteed: this means that command names, arguments and
+#        responses can change or be removed at ANY time.  Applications
+#        that rely on long term stability guarantees should NOT
+#        use this command.
+#
+#        Known limitations:
+#
+#        * This command is stateless, this means that commands that depend
+#          on state information (such as getfd) might not work
+#
+#        * Commands that prompt the user for data don't currently work
+#
+# Example:
+#
+# -> { "execute": "human-monitor-command",
+#      "arguments": { "command-line": "info kvm" } }
+# <- { "return": "kvm support: enabled\r\n" }
+#
+##
+{ 'command': 'human-monitor-command',
+  'data': {'command-line': 'str', '*cpu-index': 'int'},
+  'returns': 'str',
+  'features': [ 'savevm-monitor-nodes' ] }
+
+##
+# @change:
+#
+# This command is multiple commands multiplexed together.
+#
+# @device: This is normally the name of a block device but it may also be 'vnc'.
+#          when it's 'vnc', then sub command depends on @target
+#
+# @target: If @device is a block device, then this is the new filename.
+#          If @device is 'vnc', then if the value 'password' selects the vnc
+#          change password command.   Otherwise, this specifies a new server URI
+#          address to listen to for VNC connections.
+#
+# @arg: If @device is a block device, then this is an optional format to open
+#       the device with.
+#       If @device is 'vnc' and @target is 'password', this is the new VNC
+#       password to set.  See change-vnc-password for additional notes.
+#
+# Features:
+# @deprecated: This command is deprecated.  For changing block
+#              devices, use 'blockdev-change-medium' instead; for changing VNC
+#              parameters, use 'change-vnc-password' instead.
+#
+# Returns: - Nothing on success.
+#          - If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# 1. Change a removable medium
+#
+# -> { "execute": "change",
+#      "arguments": { "device": "ide1-cd0",
+#                     "target": "/srv/images/Fedora-12-x86_64-DVD.iso" } }
+# <- { "return": {} }
+#
+# 2. Change VNC password
+#
+# -> { "execute": "change",
+#      "arguments": { "device": "vnc", "target": "password",
+#                     "arg": "foobar1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'change',
+  'data': {'device': 'str', 'target': 'str', '*arg': 'str'},
+  'features': [ 'deprecated' ] }
+
+##
+# @getfd:
+#
+# Receive a file descriptor via SCM rights and assign it a name
+#
+# @fdname: file descriptor name
+#
+# Returns: Nothing on success
+#
+# Since: 0.14.0
+#
+# Notes: If @fdname already exists, the file descriptor assigned to
+#        it will be closed and replaced by the received file
+#        descriptor.
+#
+#        The 'closefd' command can be used to explicitly close the
+#        file descriptor when it is no longer needed.
+#
+# Example:
+#
+# -> { "execute": "getfd", "arguments": { "fdname": "fd1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'getfd', 'data': {'fdname': 'str'} }
+
+##
+# @closefd:
+#
+# Close a file descriptor previously passed via SCM rights
+#
+# @fdname: file descriptor name
+#
+# Returns: Nothing on success
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "closefd", "arguments": { "fdname": "fd1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'closefd', 'data': {'fdname': 'str'} }
+
+##
+# @AddfdInfo:
+#
+# Information about a file descriptor that was added to an fd set.
+#
+# @fdset-id: The ID of the fd set that @fd was added to.
+#
+# @fd: The file descriptor that was received via SCM rights and
+#      added to the fd set.
+#
+# Since: 1.2.0
+##
+{ 'struct': 'AddfdInfo', 'data': {'fdset-id': 'int', 'fd': 'int'} }
+
+##
+# @add-fd:
+#
+# Add a file descriptor, that was passed via SCM rights, to an fd set.
+#
+# @fdset-id: The ID of the fd set to add the file descriptor to.
+#
+# @opaque: A free-form string that can be used to describe the fd.
+#
+# Returns: - @AddfdInfo on success
+#          - If file descriptor was not received, FdNotSupplied
+#          - If @fdset-id is a negative value, InvalidParameterValue
+#
+# Notes: The list of fd sets is shared by all monitor connections.
+#
+#        If @fdset-id is not specified, a new fd set will be created.
+#
+# Since: 1.2.0
+#
+# Example:
+#
+# -> { "execute": "add-fd", "arguments": { "fdset-id": 1 } }
+# <- { "return": { "fdset-id": 1, "fd": 3 } }
+#
+##
+{ 'command': 'add-fd',
+  'data': { '*fdset-id': 'int',
+            '*opaque': 'str' },
+  'returns': 'AddfdInfo' }
+
+##
+# @remove-fd:
+#
+# Remove a file descriptor from an fd set.
+#
+# @fdset-id: The ID of the fd set that the file descriptor belongs to.
+#
+# @fd: The file descriptor that is to be removed.
+#
+# Returns: - Nothing on success
+#          - If @fdset-id or @fd is not found, FdNotFound
+#
+# Since: 1.2.0
+#
+# Notes: The list of fd sets is shared by all monitor connections.
+#
+#        If @fd is not specified, all file descriptors in @fdset-id
+#        will be removed.
+#
+# Example:
+#
+# -> { "execute": "remove-fd", "arguments": { "fdset-id": 1, "fd": 3 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'remove-fd', 'data': {'fdset-id': 'int', '*fd': 'int'} }
+
+##
+# @FdsetFdInfo:
+#
+# Information about a file descriptor that belongs to an fd set.
+#
+# @fd: The file descriptor value.
+#
+# @opaque: A free-form string that can be used to describe the fd.
+#
+# Since: 1.2.0
+##
+{ 'struct': 'FdsetFdInfo',
+  'data': {'fd': 'int', '*opaque': 'str'} }
+
+##
+# @FdsetInfo:
+#
+# Information about an fd set.
+#
+# @fdset-id: The ID of the fd set.
+#
+# @fds: A list of file descriptors that belong to this fd set.
+#
+# Since: 1.2.0
+##
+{ 'struct': 'FdsetInfo',
+  'data': {'fdset-id': 'int', 'fds': ['FdsetFdInfo']} }
+
+##
+# @query-fdsets:
+#
+# Return information describing all fd sets.
+#
+# Returns: A list of @FdsetInfo
+#
+# Since: 1.2.0
+#
+# Note: The list of fd sets is shared by all monitor connections.
+#
+# Example:
+#
+# -> { "execute": "query-fdsets" }
+# <- { "return": [
+#        {
+#          "fds": [
+#            {
+#              "fd": 30,
+#              "opaque": "rdonly:/path/to/file"
+#            },
+#            {
+#              "fd": 24,
+#              "opaque": "rdwr:/path/to/file"
+#            }
+#          ],
+#          "fdset-id": 1
+#        },
+#        {
+#          "fds": [
+#            {
+#              "fd": 28
+#            },
+#            {
+#              "fd": 29
+#            }
+#          ],
+#          "fdset-id": 0
+#        }
+#      ]
+#    }
+#
+##
+{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+
+##
+# @CommandLineParameterType:
+#
+# Possible types for an option parameter.
+#
+# @string: accepts a character string
+#
+# @boolean: accepts "on" or "off"
+#
+# @number: accepts a number
+#
+# @size: accepts a number followed by an optional suffix (K)ilo,
+#        (M)ega, (G)iga, (T)era
+#
+# Since: 1.5
+##
+{ 'enum': 'CommandLineParameterType',
+  'data': ['string', 'boolean', 'number', 'size'] }
+
+##
+# @CommandLineParameterInfo:
+#
+# Details about a single parameter of a command line option.
+#
+# @name: parameter name
+#
+# @type: parameter @CommandLineParameterType
+#
+# @help: human readable text string, not suitable for parsing.
+#
+# @default: default value string (since 2.1)
+#
+# Since: 1.5
+##
+{ 'struct': 'CommandLineParameterInfo',
+  'data': { 'name': 'str',
+            'type': 'CommandLineParameterType',
+            '*help': 'str',
+            '*default': 'str' } }
+
+##
+# @CommandLineOptionInfo:
+#
+# Details about a command line option, including its list of parameter details
+#
+# @option: option name
+#
+# @parameters: an array of @CommandLineParameterInfo
+#
+# Since: 1.5
+##
+{ 'struct': 'CommandLineOptionInfo',
+  'data': { 'option': 'str', 'parameters': ['CommandLineParameterInfo'] } }
+
+##
+# @query-command-line-options:
+#
+# Query command line option schema.
+#
+# @option: option name
+#
+# Returns: list of @CommandLineOptionInfo for all options (or for the given
+#          @option).  Returns an error if the given @option doesn't exist.
+#
+# Since: 1.5
+#
+# Example:
+#
+# -> { "execute": "query-command-line-options",
+#      "arguments": { "option": "option-rom" } }
+# <- { "return": [
+#         {
+#             "parameters": [
+#                 {
+#                     "name": "romfile",
+#                     "type": "string"
+#                 },
+#                 {
+#                     "name": "bootindex",
+#                     "type": "number"
+#                 }
+#             ],
+#             "option": "option-rom"
+#         }
+#      ]
+#    }
+#
+##
+{'command': 'query-command-line-options',
+ 'data': { '*option': 'str' },
+ 'returns': ['CommandLineOptionInfo'],
+ 'allow-preconfig': true }
diff --git a/qapi/net.json b/qapi/net.json
new file mode 100644
index 000000000..a3a133600
--- /dev/null
+++ b/qapi/net.json
@@ -0,0 +1,716 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Net devices
+##
+
+{ 'include': 'common.json' }
+
+##
+# @set_link:
+#
+# Sets the link status of a virtual network adapter.
+#
+# @name: the device name of the virtual network adapter
+#
+# @up: true to set the link status to be up
+#
+# Returns: Nothing on success
+#          If @name is not a valid network device, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Notes: Not all network adapters support setting link status.  This command
+#        will succeed even if the network adapter does not support link status
+#        notification.
+#
+# Example:
+#
+# -> { "execute": "set_link",
+#      "arguments": { "name": "e1000.0", "up": false } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
+
+##
+# @netdev_add:
+#
+# Add a network backend.
+#
+# Additional arguments depend on the type.
+#
+# Since: 0.14.0
+#
+# Returns: Nothing on success
+#          If @type is not a valid network backend, DeviceNotFound
+#
+# Example:
+#
+# -> { "execute": "netdev_add",
+#      "arguments": { "type": "user", "id": "netdev1",
+#                     "dnssearch": "example.org" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'netdev_add', 'data': 'Netdev', 'boxed': true }
+
+##
+# @netdev_del:
+#
+# Remove a network backend.
+#
+# @id: the name of the network backend to remove
+#
+# Returns: Nothing on success
+#          If @id is not a valid network backend, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "netdev_del", "arguments": { "id": "netdev1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'netdev_del', 'data': {'id': 'str'} }
+
+##
+# @NetLegacyNicOptions:
+#
+# Create a new Network Interface Card.
+#
+# @netdev: id of -netdev to connect to
+#
+# @macaddr: MAC address
+#
+# @model: device model (e1000, rtl8139, virtio etc.)
+#
+# @addr: PCI device address
+#
+# @vectors: number of MSI-x vectors, 0 to disable MSI-X
+#
+# Since: 1.2
+##
+{ 'struct': 'NetLegacyNicOptions',
+  'data': {
+    '*netdev':  'str',
+    '*macaddr': 'str',
+    '*model':   'str',
+    '*addr':    'str',
+    '*vectors': 'uint32' } }
+
+##
+# @NetdevUserOptions:
+#
+# Use the user mode network stack which requires no administrator privilege to
+# run.
+#
+# @hostname: client hostname reported by the builtin DHCP server
+#
+# @restrict: isolate the guest from the host
+#
+# @ipv4: whether to support IPv4, default true for enabled
+#        (since 2.6)
+#
+# @ipv6: whether to support IPv6, default true for enabled
+#        (since 2.6)
+#
+# @ip: legacy parameter, use net= instead
+#
+# @net: IP network address that the guest will see, in the
+#       form addr[/netmask] The netmask is optional, and can be
+#       either in the form a.b.c.d or as a number of valid top-most
+#       bits. Default is 10.0.2.0/24.
+#
+# @host: guest-visible address of the host
+#
+# @tftp: root directory of the built-in TFTP server
+#
+# @bootfile: BOOTP filename, for use with tftp=
+#
+# @dhcpstart: the first of the 16 IPs the built-in DHCP server can
+#             assign
+#
+# @dns: guest-visible address of the virtual nameserver
+#
+# @dnssearch: list of DNS suffixes to search, passed as DHCP option
+#             to the guest
+#
+# @domainname: guest-visible domain name of the virtual nameserver
+#              (since 3.0)
+#
+# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since
+#               2.6). The network prefix is given in the usual
+#               hexadecimal IPv6 address notation.
+#
+# @ipv6-prefixlen: IPv6 network prefix length (default is 64)
+#                  (since 2.6)
+#
+# @ipv6-host: guest-visible IPv6 address of the host (since 2.6)
+#
+# @ipv6-dns: guest-visible IPv6 address of the virtual
+#            nameserver (since 2.6)
+#
+# @smb: root directory of the built-in SMB server
+#
+# @smbserver: IP address of the built-in SMB server
+#
+# @hostfwd: redirect incoming TCP or UDP host connections to guest
+#           endpoints
+#
+# @guestfwd: forward guest TCP connections
+#
+# @tftp-server-name: RFC2132 "TFTP server name" string (Since 3.1)
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevUserOptions',
+  'data': {
+    '*hostname':  'str',
+    '*restrict':  'bool',
+    '*ipv4':      'bool',
+    '*ipv6':      'bool',
+    '*ip':        'str',
+    '*net':       'str',
+    '*host':      'str',
+    '*tftp':      'str',
+    '*bootfile':  'str',
+    '*dhcpstart': 'str',
+    '*dns':       'str',
+    '*dnssearch': ['String'],
+    '*domainname': 'str',
+    '*ipv6-prefix':      'str',
+    '*ipv6-prefixlen':   'int',
+    '*ipv6-host':        'str',
+    '*ipv6-dns':         'str',
+    '*smb':       'str',
+    '*smbserver': 'str',
+    '*hostfwd':   ['String'],
+    '*guestfwd':  ['String'],
+    '*tftp-server-name': 'str' } }
+
+##
+# @NetdevTapOptions:
+#
+# Used to configure a host TAP network interface backend.
+#
+# @ifname: interface name
+#
+# @fd: file descriptor of an already opened tap
+#
+# @fds: multiple file descriptors of already opened multiqueue capable
+#       tap
+#
+# @script: script to initialize the interface
+#
+# @downscript: script to shut down the interface
+#
+# @br: bridge name (since 2.8)
+#
+# @helper: command to execute to configure bridge
+#
+# @sndbuf: send buffer limit. Understands [TGMKkb] suffixes.
+#
+# @vnet_hdr: enable the IFF_VNET_HDR flag on the tap interface
+#
+# @vhost: enable vhost-net network accelerator
+#
+# @vhostfd: file descriptor of an already opened vhost net device
+#
+# @vhostfds: file descriptors of multiple already opened vhost net
+#            devices
+#
+# @vhostforce: vhost on for non-MSIX virtio guests
+#
+# @queues: number of queues to be created for multiqueue capable tap
+#
+# @poll-us: maximum number of microseconds that could
+#           be spent on busy polling for tap (since 2.7)
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevTapOptions',
+  'data': {
+    '*ifname':     'str',
+    '*fd':         'str',
+    '*fds':        'str',
+    '*script':     'str',
+    '*downscript': 'str',
+    '*br':         'str',
+    '*helper':     'str',
+    '*sndbuf':     'size',
+    '*vnet_hdr':   'bool',
+    '*vhost':      'bool',
+    '*vhostfd':    'str',
+    '*vhostfds':   'str',
+    '*vhostforce': 'bool',
+    '*queues':     'uint32',
+    '*poll-us':    'uint32'} }
+
+##
+# @NetdevSocketOptions:
+#
+# Socket netdevs are used to establish a network connection to another
+# QEMU virtual machine via a TCP socket.
+#
+# @fd: file descriptor of an already opened socket
+#
+# @listen: port number, and optional hostname, to listen on
+#
+# @connect: port number, and optional hostname, to connect to
+#
+# @mcast: UDP multicast address and port number
+#
+# @localaddr: source address and port for multicast and udp packets
+#
+# @udp: UDP unicast address and port number
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevSocketOptions',
+  'data': {
+    '*fd':        'str',
+    '*listen':    'str',
+    '*connect':   'str',
+    '*mcast':     'str',
+    '*localaddr': 'str',
+    '*udp':       'str' } }
+
+##
+# @NetdevL2TPv3Options:
+#
+# Configure an Ethernet over L2TPv3 tunnel.
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: source port - mandatory for udp, optional for ip
+#
+# @dstport: destination port - mandatory for udp, optional for ip
+#
+# @ipv6: force the use of ipv6
+#
+# @udp: use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: use 64 bit coookies
+#
+# @counter: have sequence counter
+#
+# @pincounter: pin sequence counter to zero -
+#              workaround for buggy implementations or
+#              networks with packet reorder
+#
+# @txcookie: 32 or 64 bit transmit cookie
+#
+# @rxcookie: 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: 32 bit receive session - if not specified
+#             set to the same value as transmit
+#
+# @offset: additional offset - allows the insertion of
+#          additional application-specific data before the packet payload
+#
+# Since: 2.1
+##
+{ 'struct': 'NetdevL2TPv3Options',
+  'data': {
+    'src':          'str',
+    'dst':          'str',
+    '*srcport':     'str',
+    '*dstport':     'str',
+    '*ipv6':        'bool',
+    '*udp':         'bool',
+    '*cookie64':    'bool',
+    '*counter':     'bool',
+    '*pincounter':  'bool',
+    '*txcookie':    'uint64',
+    '*rxcookie':    'uint64',
+    'txsession':    'uint32',
+    '*rxsession':   'uint32',
+    '*offset':      'uint32' } }
+
+##
+# @NetdevVdeOptions:
+#
+# Connect to a vde switch running on the host.
+#
+# @sock: socket path
+#
+# @port: port number
+#
+# @group: group owner of socket
+#
+# @mode: permissions for socket
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevVdeOptions',
+  'data': {
+    '*sock':  'str',
+    '*port':  'uint16',
+    '*group': 'str',
+    '*mode':  'uint16' } }
+
+##
+# @NetdevBridgeOptions:
+#
+# Connect a host TAP network interface to a host bridge device.
+#
+# @br: bridge name
+#
+# @helper: command to execute to configure bridge
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevBridgeOptions',
+  'data': {
+    '*br':     'str',
+    '*helper': 'str' } }
+
+##
+# @NetdevHubPortOptions:
+#
+# Connect two or more net clients through a software hub.
+#
+# @hubid: hub identifier number
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
+#
+# Since: 1.2
+##
+{ 'struct': 'NetdevHubPortOptions',
+  'data': {
+    'hubid':     'int32',
+    '*netdev':    'str' } }
+
+##
+# @NetdevNetmapOptions:
+#
+# Connect a client to a netmap-enabled NIC or to a VALE switch port
+#
+# @ifname: Either the name of an existing network interface supported by
+#          netmap, or the name of a VALE port (created on the fly).
+#          A VALE port name is in the form 'valeXXX:YYY', where XXX and
+#          YYY are non-negative integers. XXX identifies a switch and
+#          YYY identifies a port of the switch. VALE ports having the
+#          same XXX are therefore connected to the same switch.
+#
+# @devname: path of the netmap device (default: '/dev/netmap').
+#
+# Since: 2.0
+##
+{ 'struct': 'NetdevNetmapOptions',
+  'data': {
+    'ifname':     'str',
+    '*devname':    'str' } }
+
+##
+# @NetdevVhostUserOptions:
+#
+# Vhost-user network backend
+#
+# @chardev: name of a unix socket chardev
+#
+# @vhostforce: vhost on for non-MSIX virtio guests (default: false).
+#
+# @queues: number of queues to be created for multiqueue vhost-user
+#          (default: 1) (Since 2.5)
+#
+# Since: 2.1
+##
+{ 'struct': 'NetdevVhostUserOptions',
+  'data': {
+    'chardev':        'str',
+    '*vhostforce':    'bool',
+    '*queues':        'int' } }
+
+##
+# @NetdevVhostVDPAOptions:
+#
+# Vhost-vdpa network backend
+#
+# vDPA device is a device that uses a datapath which complies with the virtio
+# specifications with a vendor specific control path.
+#
+# @vhostdev: path of vhost-vdpa device
+#            (default:'/dev/vhost-vdpa-0')
+#
+# @queues: number of queues to be created for multiqueue vhost-vdpa
+#          (default: 1)
+#
+# Since: 5.1
+##
+{ 'struct': 'NetdevVhostVDPAOptions',
+  'data': {
+    '*vhostdev':     'str',
+    '*queues':       'int' } }
+
+##
+# @NetClientDriver:
+#
+# Available netdev drivers.
+#
+# Since: 2.7
+#
+#        @vhost-vdpa since 5.1
+##
+{ 'enum': 'NetClientDriver',
+  'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
+            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
+
+##
+# @Netdev:
+#
+# Captures the configuration of a network device.
+#
+# @id: identifier for monitor commands.
+#
+# @type: Specify the driver used for interpreting remaining arguments.
+#
+# Since: 1.2
+#
+#        'l2tpv3' - since 2.1
+##
+{ 'union': 'Netdev',
+  'base': { 'id': 'str', 'type': 'NetClientDriver' },
+  'discriminator': 'type',
+  'data': {
+    'nic':      'NetLegacyNicOptions',
+    'user':     'NetdevUserOptions',
+    'tap':      'NetdevTapOptions',
+    'l2tpv3':   'NetdevL2TPv3Options',
+    'socket':   'NetdevSocketOptions',
+    'vde':      'NetdevVdeOptions',
+    'bridge':   'NetdevBridgeOptions',
+    'hubport':  'NetdevHubPortOptions',
+    'netmap':   'NetdevNetmapOptions',
+    'vhost-user': 'NetdevVhostUserOptions',
+    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
+
+##
+# @NetFilterDirection:
+#
+# Indicates whether a netfilter is attached to a netdev's transmit queue or
+# receive queue or both.
+#
+# @all: the filter is attached both to the receive and the transmit
+#       queue of the netdev (default).
+#
+# @rx: the filter is attached to the receive queue of the netdev,
+#      where it will receive packets sent to the netdev.
+#
+# @tx: the filter is attached to the transmit queue of the netdev,
+#      where it will receive packets sent by the netdev.
+#
+# Since: 2.5
+##
+{ 'enum': 'NetFilterDirection',
+  'data': [ 'all', 'rx', 'tx' ] }
+
+##
+# @RxState:
+#
+# Packets receiving state
+#
+# @normal: filter assigned packets according to the mac-table
+#
+# @none: don't receive any assigned packet
+#
+# @all: receive all assigned packets
+#
+# Since: 1.6
+##
+{ 'enum': 'RxState', 'data': [ 'normal', 'none', 'all' ] }
+
+##
+# @RxFilterInfo:
+#
+# Rx-filter information for a NIC.
+#
+# @name: net client name
+#
+# @promiscuous: whether promiscuous mode is enabled
+#
+# @multicast: multicast receive state
+#
+# @unicast: unicast receive state
+#
+# @vlan: vlan receive state (Since 2.0)
+#
+# @broadcast-allowed: whether to receive broadcast
+#
+# @multicast-overflow: multicast table is overflowed or not
+#
+# @unicast-overflow: unicast table is overflowed or not
+#
+# @main-mac: the main macaddr string
+#
+# @vlan-table: a list of active vlan id
+#
+# @unicast-table: a list of unicast macaddr string
+#
+# @multicast-table: a list of multicast macaddr string
+#
+# Since: 1.6
+##
+{ 'struct': 'RxFilterInfo',
+  'data': {
+    'name':               'str',
+    'promiscuous':        'bool',
+    'multicast':          'RxState',
+    'unicast':            'RxState',
+    'vlan':               'RxState',
+    'broadcast-allowed':  'bool',
+    'multicast-overflow': 'bool',
+    'unicast-overflow':   'bool',
+    'main-mac':           'str',
+    'vlan-table':         ['int'],
+    'unicast-table':      ['str'],
+    'multicast-table':    ['str'] }}
+
+##
+# @query-rx-filter:
+#
+# Return rx-filter information for all NICs (or for the given NIC).
+#
+# @name: net client name
+#
+# Returns: list of @RxFilterInfo for all NICs (or for the given NIC).
+#          Returns an error if the given @name doesn't exist, or given
+#          NIC doesn't support rx-filter querying, or given net client
+#          isn't a NIC.
+#
+# Since: 1.6
+#
+# Example:
+#
+# -> { "execute": "query-rx-filter", "arguments": { "name": "vnet0" } }
+# <- { "return": [
+#         {
+#             "promiscuous": true,
+#             "name": "vnet0",
+#             "main-mac": "52:54:00:12:34:56",
+#             "unicast": "normal",
+#             "vlan": "normal",
+#             "vlan-table": [
+#                 4,
+#                 0
+#             ],
+#             "unicast-table": [
+#             ],
+#             "multicast": "normal",
+#             "multicast-overflow": false,
+#             "unicast-overflow": false,
+#             "multicast-table": [
+#                 "01:00:5e:00:00:01",
+#                 "33:33:00:00:00:01",
+#                 "33:33:ff:12:34:56"
+#             ],
+#             "broadcast-allowed": false
+#         }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-rx-filter',
+  'data': { '*name': 'str' },
+  'returns': ['RxFilterInfo'] }
+
+##
+# @NIC_RX_FILTER_CHANGED:
+#
+# Emitted once until the 'query-rx-filter' command is executed, the first event
+# will always be emitted
+#
+# @name: net client name
+#
+# @path: device path
+#
+# Since: 1.6
+#
+# Example:
+#
+# <- { "event": "NIC_RX_FILTER_CHANGED",
+#      "data": { "name": "vnet0",
+#                "path": "/machine/peripheral/vnet0/virtio-backend" },
+#      "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
+#    }
+#
+##
+{ 'event': 'NIC_RX_FILTER_CHANGED',
+  'data': { '*name': 'str', 'path': 'str' } }
+
+##
+# @AnnounceParameters:
+#
+# Parameters for self-announce timers
+#
+# @initial: Initial delay (in ms) before sending the first GARP/RARP
+#           announcement
+#
+# @max: Maximum delay (in ms) between GARP/RARP announcement packets
+#
+# @rounds: Number of self-announcement attempts
+#
+# @step: Delay increase (in ms) after each self-announcement attempt
+#
+# @interfaces: An optional list of interface names, which restricts the
+#              announcement to the listed interfaces. (Since 4.1)
+#
+# @id: A name to be used to identify an instance of announce-timers
+#      and to allow it to modified later.  Not for use as
+#      part of the migration parameters. (Since 4.1)
+#
+# Since: 4.0
+##
+
+{ 'struct': 'AnnounceParameters',
+  'data': { 'initial': 'int',
+            'max': 'int',
+            'rounds': 'int',
+            'step': 'int',
+            '*interfaces': ['str'],
+            '*id' : 'str' } }
+
+##
+# @announce-self:
+#
+# Trigger generation of broadcast RARP frames to update network switches.
+# This can be useful when network bonds fail-over the active slave.
+#
+# Example:
+#
+# -> { "execute": "announce-self",
+#      "arguments": {
+#          "initial": 50, "max": 550, "rounds": 10, "step": 50,
+#          "interfaces": ["vn2", "vn3"], "id": "bob" } }
+# <- { "return": {} }
+#
+# Since: 4.0
+##
+{ 'command': 'announce-self', 'boxed': true,
+  'data' : 'AnnounceParameters'}
+
+##
+# @FAILOVER_NEGOTIATED:
+#
+# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature negotiation.
+# Failover primary devices which were hidden (not hotplugged when requested)
+# before will now be hotplugged by the virtio-net standby device.
+#
+# device-id: QEMU device id of the unplugged device
+# Since: 4.2
+#
+# Example:
+#
+# <- { "event": "FAILOVER_NEGOTIATED",
+#      "data": "net1" }
+#
+##
+{ 'event': 'FAILOVER_NEGOTIATED',
+  'data': {'device-id': 'str'} }
diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c
new file mode 100644
index 000000000..587f31baf
--- /dev/null
+++ b/qapi/opts-visitor.c
@@ -0,0 +1,586 @@
+/*
+ * Options Visitor
+ *
+ * Copyright Red Hat, Inc. 2012-2016
+ *
+ * Author: Laszlo Ersek 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/opts-visitor.h"
+#include "qemu/queue.h"
+#include "qemu/option_int.h"
+#include "qapi/visitor-impl.h"
+
+
+enum ListMode
+{
+    LM_NONE,             /* not traversing a list of repeated options */
+
+    LM_IN_PROGRESS,      /*
+                          * opts_next_list() ready to be called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently parsed QemuOpt instance of the repeated
+                          * option.
+                          *
+                          * Parsing a value into the list link will examine the
+                          * next QemuOpt instance of the repeated option, and
+                          * possibly enter LM_SIGNED_INTERVAL or
+                          * LM_UNSIGNED_INTERVAL.
+                          */
+
+    LM_SIGNED_INTERVAL,  /*
+                          * opts_next_list() has been called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently stored element from the signed interval,
+                          * parsed from the most recent QemuOpt instance of the
+                          * repeated option. This may consume QemuOpt itself
+                          * and return to LM_IN_PROGRESS.
+                          *
+                          * Parsing a value into the list link will store the
+                          * next element of the signed interval.
+                          */
+
+    LM_UNSIGNED_INTERVAL, /* Same as above, only for an unsigned interval. */
+
+    LM_TRAVERSED          /*
+                           * opts_next_list() has been called.
+                           *
+                           * No more QemuOpt instance in the list.
+                           * The traversal has been completed.
+                           */
+};
+
+typedef enum ListMode ListMode;
+
+struct OptsVisitor
+{
+    Visitor visitor;
+
+    /* Ownership remains with opts_visitor_new()'s caller. */
+    const QemuOpts *opts_root;
+
+    unsigned depth;
+
+    /* Non-null iff depth is positive. Each key is a QemuOpt name. Each value
+     * is a non-empty GQueue, enumerating all QemuOpt occurrences with that
+     * name. */
+    GHashTable *unprocessed_opts;
+
+    /* The list currently being traversed with opts_start_list() /
+     * opts_next_list(). The list must have a struct element type in the
+     * schema, with a single mandatory scalar member. */
+    ListMode list_mode;
+    GQueue *repeated_opts;
+
+    /* When parsing a list of repeating options as integers, values of the form
+     * "a-b", representing a closed interval, are allowed. Elements in the
+     * range are generated individually.
+     */
+    union {
+        int64_t s;
+        uint64_t u;
+    } range_next, range_limit;
+
+    /* If "opts_root->id" is set, reinstantiate it as a fake QemuOpt for
+     * uniformity. Only its "name" and "str" fields are set. "fake_id_opt" does
+     * not survive or escape the OptsVisitor object.
+     */
+    QemuOpt *fake_id_opt;
+};
+
+
+static OptsVisitor *to_ov(Visitor *v)
+{
+    return container_of(v, OptsVisitor, visitor);
+}
+
+
+static void
+destroy_list(gpointer list)
+{
+  g_queue_free(list);
+}
+
+
+static void
+opts_visitor_insert(GHashTable *unprocessed_opts, const QemuOpt *opt)
+{
+    GQueue *list;
+
+    list = g_hash_table_lookup(unprocessed_opts, opt->name);
+    if (list == NULL) {
+        list = g_queue_new();
+
+        /* GHashTable will never try to free the keys -- we supply NULL as
+         * "key_destroy_func" in opts_start_struct(). Thus cast away key
+         * const-ness in order to suppress gcc's warning.
+         */
+        g_hash_table_insert(unprocessed_opts, (gpointer)opt->name, list);
+    }
+
+    /* Similarly, destroy_list() doesn't call g_queue_free_full(). */
+    g_queue_push_tail(list, (gpointer)opt);
+}
+
+
+static bool
+opts_start_struct(Visitor *v, const char *name, void **obj,
+                  size_t size, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+
+    if (obj) {
+        *obj = g_malloc0(size);
+    }
+    if (ov->depth++ > 0) {
+        return true;
+    }
+
+    ov->unprocessed_opts = g_hash_table_new_full(&g_str_hash, &g_str_equal,
+                                                 NULL, &destroy_list);
+    QTAILQ_FOREACH(opt, &ov->opts_root->head, next) {
+        /* ensured by qemu-option.c::opts_do_parse() */
+        assert(strcmp(opt->name, "id") != 0);
+
+        opts_visitor_insert(ov->unprocessed_opts, opt);
+    }
+
+    if (ov->opts_root->id != NULL) {
+        ov->fake_id_opt = g_malloc0(sizeof *ov->fake_id_opt);
+
+        ov->fake_id_opt->name = g_strdup("id");
+        ov->fake_id_opt->str = g_strdup(ov->opts_root->id);
+        opts_visitor_insert(ov->unprocessed_opts, ov->fake_id_opt);
+    }
+    return true;
+}
+
+
+static bool
+opts_check_struct(Visitor *v, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    GHashTableIter iter;
+    GQueue *any;
+
+    if (ov->depth > 1) {
+        return true;
+    }
+
+    /* we should have processed all (distinct) QemuOpt instances */
+    g_hash_table_iter_init(&iter, ov->unprocessed_opts);
+    if (g_hash_table_iter_next(&iter, NULL, (void **)&any)) {
+        const QemuOpt *first;
+
+        first = g_queue_peek_head(any);
+        error_setg(errp, QERR_INVALID_PARAMETER, first->name);
+        return false;
+    }
+    return true;
+}
+
+
+static void
+opts_end_struct(Visitor *v, void **obj)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    if (--ov->depth > 0) {
+        return;
+    }
+
+    g_hash_table_destroy(ov->unprocessed_opts);
+    ov->unprocessed_opts = NULL;
+    if (ov->fake_id_opt) {
+        g_free(ov->fake_id_opt->name);
+        g_free(ov->fake_id_opt->str);
+        g_free(ov->fake_id_opt);
+    }
+    ov->fake_id_opt = NULL;
+}
+
+
+static GQueue *
+lookup_distinct(const OptsVisitor *ov, const char *name, Error **errp)
+{
+    GQueue *list;
+
+    list = g_hash_table_lookup(ov->unprocessed_opts, name);
+    if (!list) {
+        error_setg(errp, QERR_MISSING_PARAMETER, name);
+    }
+    return list;
+}
+
+
+static bool
+opts_start_list(Visitor *v, const char *name, GenericList **list, size_t size,
+                Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    /* we can't traverse a list in a list */
+    assert(ov->list_mode == LM_NONE);
+    /* we don't support visits without a list */
+    assert(list);
+    ov->repeated_opts = lookup_distinct(ov, name, errp);
+    if (!ov->repeated_opts) {
+        *list = NULL;
+        return false;
+    }
+    ov->list_mode = LM_IN_PROGRESS;
+    *list = g_malloc0(size);
+    return true;
+}
+
+
+static GenericList *
+opts_next_list(Visitor *v, GenericList *tail, size_t size)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    switch (ov->list_mode) {
+    case LM_TRAVERSED:
+        return NULL;
+    case LM_SIGNED_INTERVAL:
+    case LM_UNSIGNED_INTERVAL:
+        if (ov->list_mode == LM_SIGNED_INTERVAL) {
+            if (ov->range_next.s < ov->range_limit.s) {
+                ++ov->range_next.s;
+                break;
+            }
+        } else if (ov->range_next.u < ov->range_limit.u) {
+            ++ov->range_next.u;
+            break;
+        }
+        ov->list_mode = LM_IN_PROGRESS;
+        /* range has been completed, fall through in order to pop option */
+
+    case LM_IN_PROGRESS: {
+        const QemuOpt *opt;
+
+        opt = g_queue_pop_head(ov->repeated_opts);
+        if (g_queue_is_empty(ov->repeated_opts)) {
+            g_hash_table_remove(ov->unprocessed_opts, opt->name);
+            ov->repeated_opts = NULL;
+            ov->list_mode = LM_TRAVERSED;
+            return NULL;
+        }
+        break;
+    }
+
+    default:
+        abort();
+    }
+
+    tail->next = g_malloc0(size);
+    return tail->next;
+}
+
+
+static bool
+opts_check_list(Visitor *v, Error **errp)
+{
+    /*
+     * Unvisited list elements will be reported later when checking
+     * whether unvisited struct members remain.
+     */
+    return true;
+}
+
+
+static void
+opts_end_list(Visitor *v, void **obj)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    assert(ov->list_mode == LM_IN_PROGRESS ||
+           ov->list_mode == LM_SIGNED_INTERVAL ||
+           ov->list_mode == LM_UNSIGNED_INTERVAL ||
+           ov->list_mode == LM_TRAVERSED);
+    ov->repeated_opts = NULL;
+    ov->list_mode = LM_NONE;
+}
+
+
+static const QemuOpt *
+lookup_scalar(const OptsVisitor *ov, const char *name, Error **errp)
+{
+    if (ov->list_mode == LM_NONE) {
+        GQueue *list;
+
+        /* the last occurrence of any QemuOpt takes effect when queried by name
+         */
+        list = lookup_distinct(ov, name, errp);
+        return list ? g_queue_peek_tail(list) : NULL;
+    }
+    if (ov->list_mode == LM_TRAVERSED) {
+        error_setg(errp, "Fewer list elements than expected");
+        return NULL;
+    }
+    assert(ov->list_mode == LM_IN_PROGRESS);
+    return g_queue_peek_head(ov->repeated_opts);
+}
+
+
+static void
+processed(OptsVisitor *ov, const char *name)
+{
+    if (ov->list_mode == LM_NONE) {
+        g_hash_table_remove(ov->unprocessed_opts, name);
+        return;
+    }
+    assert(ov->list_mode == LM_IN_PROGRESS);
+    /* do nothing */
+}
+
+
+static bool
+opts_type_str(Visitor *v, const char *name, char **obj, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+
+    opt = lookup_scalar(ov, name, errp);
+    if (!opt) {
+        *obj = NULL;
+        return false;
+    }
+    *obj = g_strdup(opt->str ? opt->str : "");
+    /* Note that we consume a string even if this is called as part of
+     * an enum visit that later fails because the string is not a
+     * valid enum value; this is harmless because tracking what gets
+     * consumed only matters to visit_end_struct() as the final error
+     * check if there were no other failures during the visit.  */
+    processed(ov, name);
+    return true;
+}
+
+
+static bool
+opts_type_bool(Visitor *v, const char *name, bool *obj, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+
+    opt = lookup_scalar(ov, name, errp);
+    if (!opt) {
+        return false;
+    }
+    if (opt->str) {
+        if (!qapi_bool_parse(opt->name, opt->str, obj, errp)) {
+            return false;
+        }
+    } else {
+        *obj = true;
+    }
+
+    processed(ov, name);
+    return true;
+}
+
+
+static bool
+opts_type_int64(Visitor *v, const char *name, int64_t *obj, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+    const char *str;
+    long long val;
+    char *endptr;
+
+    if (ov->list_mode == LM_SIGNED_INTERVAL) {
+        *obj = ov->range_next.s;
+        return true;
+    }
+
+    opt = lookup_scalar(ov, name, errp);
+    if (!opt) {
+        return false;
+    }
+    str = opt->str ? opt->str : "";
+
+    /* we've gotten past lookup_scalar() */
+    assert(ov->list_mode == LM_NONE || ov->list_mode == LM_IN_PROGRESS);
+
+    errno = 0;
+    val = strtoll(str, &endptr, 0);
+    if (errno == 0 && endptr > str && INT64_MIN <= val && val <= INT64_MAX) {
+        if (*endptr == '\0') {
+            *obj = val;
+            processed(ov, name);
+            return true;
+        }
+        if (*endptr == '-' && ov->list_mode == LM_IN_PROGRESS) {
+            long long val2;
+
+            str = endptr + 1;
+            val2 = strtoll(str, &endptr, 0);
+            if (errno == 0 && endptr > str && *endptr == '\0' &&
+                INT64_MIN <= val2 && val2 <= INT64_MAX && val <= val2 &&
+                (val > INT64_MAX - OPTS_VISITOR_RANGE_MAX ||
+                 val2 < val + OPTS_VISITOR_RANGE_MAX)) {
+                ov->range_next.s = val;
+                ov->range_limit.s = val2;
+                ov->list_mode = LM_SIGNED_INTERVAL;
+
+                /* as if entering on the top */
+                *obj = ov->range_next.s;
+                return true;
+            }
+        }
+    }
+    error_setg(errp, QERR_INVALID_PARAMETER_VALUE, opt->name,
+               (ov->list_mode == LM_NONE) ? "an int64 value" :
+                                            "an int64 value or range");
+    return false;
+}
+
+
+static bool
+opts_type_uint64(Visitor *v, const char *name, uint64_t *obj, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+    const char *str;
+    unsigned long long val;
+    char *endptr;
+
+    if (ov->list_mode == LM_UNSIGNED_INTERVAL) {
+        *obj = ov->range_next.u;
+        return true;
+    }
+
+    opt = lookup_scalar(ov, name, errp);
+    if (!opt) {
+        return false;
+    }
+    str = opt->str;
+
+    /* we've gotten past lookup_scalar() */
+    assert(ov->list_mode == LM_NONE || ov->list_mode == LM_IN_PROGRESS);
+
+    if (parse_uint(str, &val, &endptr, 0) == 0 && val <= UINT64_MAX) {
+        if (*endptr == '\0') {
+            *obj = val;
+            processed(ov, name);
+            return true;
+        }
+        if (*endptr == '-' && ov->list_mode == LM_IN_PROGRESS) {
+            unsigned long long val2;
+
+            str = endptr + 1;
+            if (parse_uint_full(str, &val2, 0) == 0 &&
+                val2 <= UINT64_MAX && val <= val2 &&
+                val2 - val < OPTS_VISITOR_RANGE_MAX) {
+                ov->range_next.u = val;
+                ov->range_limit.u = val2;
+                ov->list_mode = LM_UNSIGNED_INTERVAL;
+
+                /* as if entering on the top */
+                *obj = ov->range_next.u;
+                return true;
+            }
+        }
+    }
+    error_setg(errp, QERR_INVALID_PARAMETER_VALUE, opt->name,
+               (ov->list_mode == LM_NONE) ? "a uint64 value" :
+                                            "a uint64 value or range");
+    return false;
+}
+
+
+static bool
+opts_type_size(Visitor *v, const char *name, uint64_t *obj, Error **errp)
+{
+    OptsVisitor *ov = to_ov(v);
+    const QemuOpt *opt;
+    int err;
+
+    opt = lookup_scalar(ov, name, errp);
+    if (!opt) {
+        return false;
+    }
+
+    err = qemu_strtosz(opt->str ? opt->str : "", NULL, obj);
+    if (err < 0) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, opt->name,
+                   "a size value");
+        return false;
+    }
+
+    processed(ov, name);
+    return true;
+}
+
+
+static void
+opts_optional(Visitor *v, const char *name, bool *present)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    /* we only support a single mandatory scalar field in a list node */
+    assert(ov->list_mode == LM_NONE);
+    *present = (lookup_distinct(ov, name, NULL) != NULL);
+}
+
+
+static void
+opts_free(Visitor *v)
+{
+    OptsVisitor *ov = to_ov(v);
+
+    if (ov->unprocessed_opts != NULL) {
+        g_hash_table_destroy(ov->unprocessed_opts);
+    }
+    g_free(ov->fake_id_opt);
+    g_free(ov);
+}
+
+
+Visitor *
+opts_visitor_new(const QemuOpts *opts)
+{
+    OptsVisitor *ov;
+
+    assert(opts);
+    ov = g_malloc0(sizeof *ov);
+
+    ov->visitor.type = VISITOR_INPUT;
+
+    ov->visitor.start_struct = &opts_start_struct;
+    ov->visitor.check_struct = &opts_check_struct;
+    ov->visitor.end_struct   = &opts_end_struct;
+
+    ov->visitor.start_list = &opts_start_list;
+    ov->visitor.next_list  = &opts_next_list;
+    ov->visitor.check_list = &opts_check_list;
+    ov->visitor.end_list   = &opts_end_list;
+
+    ov->visitor.type_int64  = &opts_type_int64;
+    ov->visitor.type_uint64 = &opts_type_uint64;
+    ov->visitor.type_size   = &opts_type_size;
+    ov->visitor.type_bool   = &opts_type_bool;
+    ov->visitor.type_str    = &opts_type_str;
+
+    /* type_number() is not filled in, but this is not the first visitor to
+     * skip some mandatory methods... */
+
+    ov->visitor.optional = &opts_optional;
+    ov->visitor.free = opts_free;
+
+    ov->opts_root = opts;
+
+    return &ov->visitor;
+}
diff --git a/qapi/pci.json b/qapi/pci.json
new file mode 100644
index 000000000..b79cbd787
--- /dev/null
+++ b/qapi/pci.json
@@ -0,0 +1,316 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# = PCI
+##
+
+##
+# @PciMemoryRange:
+#
+# A PCI device memory region
+#
+# @base: the starting address (guest physical)
+#
+# @limit: the ending address (guest physical)
+#
+# Since: 0.14.0
+##
+{ 'struct': 'PciMemoryRange', 'data': {'base': 'int', 'limit': 'int'} }
+
+##
+# @PciMemoryRegion:
+#
+# Information about a PCI device I/O region.
+#
+# @bar: the index of the Base Address Register for this region
+#
+# @type: - 'io' if the region is a PIO region
+#        - 'memory' if the region is a MMIO region
+#
+# @size: memory size
+#
+# @prefetch: if @type is 'memory', true if the memory is prefetchable
+#
+# @mem_type_64: if @type is 'memory', true if the BAR is 64-bit
+#
+# Since: 0.14.0
+##
+{ 'struct': 'PciMemoryRegion',
+  'data': {'bar': 'int', 'type': 'str', 'address': 'int', 'size': 'int',
+           '*prefetch': 'bool', '*mem_type_64': 'bool' } }
+
+##
+# @PciBusInfo:
+#
+# Information about a bus of a PCI Bridge device
+#
+# @number: primary bus interface number.  This should be the number of the
+#          bus the device resides on.
+#
+# @secondary: secondary bus interface number.  This is the number of the
+#             main bus for the bridge
+#
+# @subordinate: This is the highest number bus that resides below the
+#               bridge.
+#
+# @io_range: The PIO range for all devices on this bridge
+#
+# @memory_range: The MMIO range for all devices on this bridge
+#
+# @prefetchable_range: The range of prefetchable MMIO for all devices on
+#                      this bridge
+#
+# Since: 2.4
+##
+{ 'struct': 'PciBusInfo',
+  'data': {'number': 'int', 'secondary': 'int', 'subordinate': 'int',
+           'io_range': 'PciMemoryRange',
+           'memory_range': 'PciMemoryRange',
+           'prefetchable_range': 'PciMemoryRange' } }
+
+##
+# @PciBridgeInfo:
+#
+# Information about a PCI Bridge device
+#
+# @bus: information about the bus the device resides on
+#
+# @devices: a list of @PciDeviceInfo for each device on this bridge
+#
+# Since: 0.14.0
+##
+{ 'struct': 'PciBridgeInfo',
+  'data': {'bus': 'PciBusInfo', '*devices': ['PciDeviceInfo']} }
+
+##
+# @PciDeviceClass:
+#
+# Information about the Class of a PCI device
+#
+# @desc: a string description of the device's class
+#
+# @class: the class code of the device
+#
+# Since: 2.4
+##
+{ 'struct': 'PciDeviceClass',
+  'data': {'*desc': 'str', 'class': 'int'} }
+
+##
+# @PciDeviceId:
+#
+# Information about the Id of a PCI device
+#
+# @device: the PCI device id
+#
+# @vendor: the PCI vendor id
+#
+# @subsystem: the PCI subsystem id (since 3.1)
+#
+# @subsystem-vendor: the PCI subsystem vendor id (since 3.1)
+#
+# Since: 2.4
+##
+{ 'struct': 'PciDeviceId',
+  'data': {'device': 'int', 'vendor': 'int', '*subsystem': 'int',
+            '*subsystem-vendor': 'int'} }
+
+##
+# @PciDeviceInfo:
+#
+# Information about a PCI device
+#
+# @bus: the bus number of the device
+#
+# @slot: the slot the device is located in
+#
+# @function: the function of the slot used by the device
+#
+# @class_info: the class of the device
+#
+# @id: the PCI device id
+#
+# @irq: if an IRQ is assigned to the device, the IRQ number
+#
+# @irq_pin: the IRQ pin, zero means no IRQ (since 5.1)
+#
+# @qdev_id: the device name of the PCI device
+#
+# @pci_bridge: if the device is a PCI bridge, the bridge information
+#
+# @regions: a list of the PCI I/O regions associated with the device
+#
+# Notes: the contents of @class_info.desc are not stable and should only be
+#        treated as informational.
+#
+# Since: 0.14.0
+##
+{ 'struct': 'PciDeviceInfo',
+  'data': {'bus': 'int', 'slot': 'int', 'function': 'int',
+           'class_info': 'PciDeviceClass', 'id': 'PciDeviceId',
+           '*irq': 'int', 'irq_pin': 'int', 'qdev_id': 'str',
+           '*pci_bridge': 'PciBridgeInfo', 'regions': ['PciMemoryRegion'] }}
+
+##
+# @PciInfo:
+#
+# Information about a PCI bus
+#
+# @bus: the bus index
+#
+# @devices: a list of devices on this bus
+#
+# Since: 0.14.0
+##
+{ 'struct': 'PciInfo', 'data': {'bus': 'int', 'devices': ['PciDeviceInfo']} }
+
+##
+# @query-pci:
+#
+# Return information about the PCI bus topology of the guest.
+#
+# Returns: a list of @PciInfo for each PCI bus. Each bus is
+#          represented by a json-object, which has a key with a json-array of
+#          all PCI devices attached to it. Each device is represented by a
+#          json-object.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-pci" }
+# <- { "return": [
+#          {
+#             "bus": 0,
+#             "devices": [
+#                {
+#                   "bus": 0,
+#                   "qdev_id": "",
+#                   "slot": 0,
+#                   "class_info": {
+#                      "class": 1536,
+#                      "desc": "Host bridge"
+#                   },
+#                   "id": {
+#                      "device": 32902,
+#                      "vendor": 4663
+#                   },
+#                   "function": 0,
+#                   "regions": [
+#                   ]
+#                },
+#                {
+#                   "bus": 0,
+#                   "qdev_id": "",
+#                   "slot": 1,
+#                   "class_info": {
+#                      "class": 1537,
+#                      "desc": "ISA bridge"
+#                   },
+#                   "id": {
+#                      "device": 32902,
+#                      "vendor": 28672
+#                   },
+#                   "function": 0,
+#                   "regions": [
+#                   ]
+#                },
+#                {
+#                   "bus": 0,
+#                   "qdev_id": "",
+#                   "slot": 1,
+#                   "class_info": {
+#                      "class": 257,
+#                      "desc": "IDE controller"
+#                   },
+#                   "id": {
+#                      "device": 32902,
+#                      "vendor": 28688
+#                   },
+#                   "function": 1,
+#                   "regions": [
+#                      {
+#                         "bar": 4,
+#                         "size": 16,
+#                         "address": 49152,
+#                         "type": "io"
+#                      }
+#                   ]
+#                },
+#                {
+#                   "bus": 0,
+#                   "qdev_id": "",
+#                   "slot": 2,
+#                   "class_info": {
+#                      "class": 768,
+#                      "desc": "VGA controller"
+#                   },
+#                   "id": {
+#                      "device": 4115,
+#                      "vendor": 184
+#                   },
+#                   "function": 0,
+#                   "regions": [
+#                      {
+#                         "prefetch": true,
+#                         "mem_type_64": false,
+#                         "bar": 0,
+#                         "size": 33554432,
+#                         "address": 4026531840,
+#                         "type": "memory"
+#                      },
+#                      {
+#                         "prefetch": false,
+#                         "mem_type_64": false,
+#                         "bar": 1,
+#                         "size": 4096,
+#                         "address": 4060086272,
+#                         "type": "memory"
+#                      },
+#                      {
+#                         "prefetch": false,
+#                         "mem_type_64": false,
+#                         "bar": 6,
+#                         "size": 65536,
+#                         "address": -1,
+#                         "type": "memory"
+#                      }
+#                   ]
+#                },
+#                {
+#                   "bus": 0,
+#                   "qdev_id": "",
+#                   "irq": 11,
+#                   "slot": 4,
+#                   "class_info": {
+#                      "class": 1280,
+#                      "desc": "RAM controller"
+#                   },
+#                   "id": {
+#                      "device": 6900,
+#                      "vendor": 4098
+#                   },
+#                   "function": 0,
+#                   "regions": [
+#                      {
+#                         "bar": 0,
+#                         "size": 32,
+#                         "address": 49280,
+#                         "type": "io"
+#                      }
+#                   ]
+#                }
+#             ]
+#          }
+#       ]
+#    }
+#
+# Note: This example has been shortened as the real response is too long.
+#
+##
+{ 'command': 'query-pci', 'returns': ['PciInfo'] }
diff --git a/qapi/pragma.json b/qapi/pragma.json
new file mode 100644
index 000000000..cffae2766
--- /dev/null
+++ b/qapi/pragma.json
@@ -0,0 +1,24 @@
+{ 'pragma': { 'doc-required': true } }
+
+# Whitelists to permit QAPI rule violations; think twice before you
+# add to them!
+{ 'pragma': {
+    # Commands allowed to return a non-dictionary:
+    'returns-whitelist': [
+        'human-monitor-command',
+        'qom-get',
+        'query-migrate-cache-size',
+        'query-tpm-models',
+        'query-tpm-types',
+        'ringbuf-read' ],
+    'name-case-whitelist': [
+        'ACPISlotType',             # DIMM, visible through query-acpi-ospm-status
+        'CpuInfoMIPS',              # PC, visible through query-cpu
+        'CpuInfoTricore',           # PC, visible through query-cpu
+        'BlockdevVmdkSubformat',    # all members, to match VMDK spec spellings
+        'BlockdevVmdkAdapterType',  # legacyESX, to match VMDK spec spellings
+        'QapiErrorClass',           # all members, visible through errors
+        'UuidInfo',                 # UUID, visible through query-uuid
+        'X86CPURegister32',         # all members, visible indirectly through qom-get
+        'CpuInfo'                   # CPU, visible through query-cpu
+    ] } }
diff --git a/qapi/qapi-clone-visitor.c b/qapi/qapi-clone-visitor.c
new file mode 100644
index 000000000..c45c5caa3
--- /dev/null
+++ b/qapi/qapi-clone-visitor.c
@@ -0,0 +1,204 @@
+/*
+ * Copy one QAPI object to another
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qnull.h"
+
+struct QapiCloneVisitor {
+    Visitor visitor;
+    size_t depth;
+};
+
+static QapiCloneVisitor *to_qcv(Visitor *v)
+{
+    return container_of(v, QapiCloneVisitor, visitor);
+}
+
+static bool qapi_clone_start_struct(Visitor *v, const char *name, void **obj,
+                                    size_t size, Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    if (!obj) {
+        assert(qcv->depth);
+        /* Only possible when visiting an alternate's object
+         * branch. Nothing further to do here, since the earlier
+         * visit_start_alternate() already copied memory. */
+        return true;
+    }
+
+    *obj = g_memdup(*obj, size);
+    qcv->depth++;
+    return true;
+}
+
+static void qapi_clone_end(Visitor *v, void **obj)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    if (obj) {
+        qcv->depth--;
+    }
+}
+
+static bool qapi_clone_start_list(Visitor *v, const char *name,
+                                  GenericList **listp, size_t size,
+                                  Error **errp)
+{
+    return qapi_clone_start_struct(v, name, (void **)listp, size, errp);
+}
+
+static GenericList *qapi_clone_next_list(Visitor *v, GenericList *tail,
+                                         size_t size)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /* Unshare the tail of the list cloned by g_memdup() */
+    tail->next = g_memdup(tail->next, size);
+    return tail->next;
+}
+
+static bool qapi_clone_start_alternate(Visitor *v, const char *name,
+                                       GenericAlternate **obj, size_t size,
+                                       Error **errp)
+{
+    return qapi_clone_start_struct(v, name, (void **)obj, size, errp);
+}
+
+static bool qapi_clone_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                  Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /* Value was already cloned by g_memdup() */
+    return true;
+}
+
+static bool qapi_clone_type_uint64(Visitor *v, const char *name,
+                                   uint64_t *obj, Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /* Value was already cloned by g_memdup() */
+    return true;
+}
+
+static bool qapi_clone_type_bool(Visitor *v, const char *name, bool *obj,
+                                 Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /* Value was already cloned by g_memdup() */
+    return true;
+}
+
+static bool qapi_clone_type_str(Visitor *v, const char *name, char **obj,
+                                Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /*
+     * Pointer was already cloned by g_memdup; create fresh copy.
+     * Note that as long as qobject-output-visitor accepts NULL instead of
+     * "", then we must do likewise. However, we want to obey the
+     * input visitor semantics of never producing NULL when the empty
+     * string is intended.
+     */
+    *obj = g_strdup(*obj ?: "");
+    return true;
+}
+
+static bool qapi_clone_type_number(Visitor *v, const char *name, double *obj,
+                                   Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    /* Value was already cloned by g_memdup() */
+    return true;
+}
+
+static bool qapi_clone_type_null(Visitor *v, const char *name, QNull **obj,
+                                 Error **errp)
+{
+    QapiCloneVisitor *qcv = to_qcv(v);
+
+    assert(qcv->depth);
+    *obj = qnull();
+    return true;
+}
+
+static void qapi_clone_free(Visitor *v)
+{
+    g_free(v);
+}
+
+static Visitor *qapi_clone_visitor_new(void)
+{
+    QapiCloneVisitor *v;
+
+    v = g_malloc0(sizeof(*v));
+
+    v->visitor.type = VISITOR_CLONE;
+    v->visitor.start_struct = qapi_clone_start_struct;
+    v->visitor.end_struct = qapi_clone_end;
+    v->visitor.start_list = qapi_clone_start_list;
+    v->visitor.next_list = qapi_clone_next_list;
+    v->visitor.end_list = qapi_clone_end;
+    v->visitor.start_alternate = qapi_clone_start_alternate;
+    v->visitor.end_alternate = qapi_clone_end;
+    v->visitor.type_int64 = qapi_clone_type_int64;
+    v->visitor.type_uint64 = qapi_clone_type_uint64;
+    v->visitor.type_bool = qapi_clone_type_bool;
+    v->visitor.type_str = qapi_clone_type_str;
+    v->visitor.type_number = qapi_clone_type_number;
+    v->visitor.type_null = qapi_clone_type_null;
+    v->visitor.free = qapi_clone_free;
+
+    return &v->visitor;
+}
+
+void *qapi_clone(const void *src, bool (*visit_type)(Visitor *, const char *,
+                                                     void **, Error **))
+{
+    Visitor *v;
+    void *dst = (void *) src; /* Cast away const */
+
+    if (!src) {
+        return NULL;
+    }
+
+    v = qapi_clone_visitor_new();
+    visit_type(v, NULL, &dst, &error_abort);
+    visit_free(v);
+    return dst;
+}
+
+void qapi_clone_members(void *dst, const void *src, size_t sz,
+                        bool (*visit_type_members)(Visitor *, void *,
+                                                   Error **))
+{
+    Visitor *v;
+
+    v = qapi_clone_visitor_new();
+    memcpy(dst, src, sz);
+    to_qcv(v)->depth++;
+    visit_type_members(v, dst, &error_abort);
+    visit_free(v);
+}
diff --git a/qapi/qapi-dealloc-visitor.c b/qapi/qapi-dealloc-visitor.c
new file mode 100644
index 000000000..ef283f296
--- /dev/null
+++ b/qapi/qapi-dealloc-visitor.c
@@ -0,0 +1,143 @@
+/*
+ * Dealloc Visitor
+ *
+ * Copyright (C) 2012-2016 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Michael Roth   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/dealloc-visitor.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/visitor-impl.h"
+
+struct QapiDeallocVisitor
+{
+    Visitor visitor;
+};
+
+static bool qapi_dealloc_start_struct(Visitor *v, const char *name, void **obj,
+                                      size_t unused, Error **errp)
+{
+    return true;
+}
+
+static void qapi_dealloc_end_struct(Visitor *v, void **obj)
+{
+    if (obj) {
+        g_free(*obj);
+    }
+}
+
+static void qapi_dealloc_end_alternate(Visitor *v, void **obj)
+{
+    if (obj) {
+        g_free(*obj);
+    }
+}
+
+static bool qapi_dealloc_start_list(Visitor *v, const char *name,
+                                    GenericList **list, size_t size,
+                                    Error **errp)
+{
+    return true;
+}
+
+static GenericList *qapi_dealloc_next_list(Visitor *v, GenericList *tail,
+                                           size_t size)
+{
+    GenericList *next = tail->next;
+    g_free(tail);
+    return next;
+}
+
+static void qapi_dealloc_end_list(Visitor *v, void **obj)
+{
+}
+
+static bool qapi_dealloc_type_str(Visitor *v, const char *name, char **obj,
+                                  Error **errp)
+{
+    if (obj) {
+        g_free(*obj);
+    }
+    return true;
+}
+
+static bool qapi_dealloc_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                    Error **errp)
+{
+    return true;
+}
+
+static bool qapi_dealloc_type_uint64(Visitor *v, const char *name,
+                                     uint64_t *obj, Error **errp)
+{
+    return true;
+}
+
+static bool qapi_dealloc_type_bool(Visitor *v, const char *name, bool *obj,
+                                   Error **errp)
+{
+    return true;
+}
+
+static bool qapi_dealloc_type_number(Visitor *v, const char *name, double *obj,
+                                     Error **errp)
+{
+    return true;
+}
+
+static bool qapi_dealloc_type_anything(Visitor *v, const char *name,
+                                       QObject **obj, Error **errp)
+{
+    if (obj) {
+        qobject_unref(*obj);
+    }
+    return true;
+}
+
+static bool qapi_dealloc_type_null(Visitor *v, const char *name,
+                                   QNull **obj, Error **errp)
+{
+    if (obj) {
+        qobject_unref(*obj);
+    }
+    return true;
+}
+
+static void qapi_dealloc_free(Visitor *v)
+{
+    g_free(container_of(v, QapiDeallocVisitor, visitor));
+}
+
+Visitor *qapi_dealloc_visitor_new(void)
+{
+    QapiDeallocVisitor *v;
+
+    v = g_malloc0(sizeof(*v));
+
+    v->visitor.type = VISITOR_DEALLOC;
+    v->visitor.start_struct = qapi_dealloc_start_struct;
+    v->visitor.end_struct = qapi_dealloc_end_struct;
+    v->visitor.end_alternate = qapi_dealloc_end_alternate;
+    v->visitor.start_list = qapi_dealloc_start_list;
+    v->visitor.next_list = qapi_dealloc_next_list;
+    v->visitor.end_list = qapi_dealloc_end_list;
+    v->visitor.type_int64 = qapi_dealloc_type_int64;
+    v->visitor.type_uint64 = qapi_dealloc_type_uint64;
+    v->visitor.type_bool = qapi_dealloc_type_bool;
+    v->visitor.type_str = qapi_dealloc_type_str;
+    v->visitor.type_number = qapi_dealloc_type_number;
+    v->visitor.type_any = qapi_dealloc_type_anything;
+    v->visitor.type_null = qapi_dealloc_type_null;
+    v->visitor.free = qapi_dealloc_free;
+
+    return &v->visitor;
+}
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
new file mode 100644
index 000000000..0b444b76d
--- /dev/null
+++ b/qapi/qapi-schema.json
@@ -0,0 +1,93 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+##
+# = Introduction
+#
+# This document describes all commands currently supported by QMP.
+#
+# Most of the time their usage is exactly the same as in the user Monitor, this
+# means that any other document which also describe commands (the manpage,
+# QEMU's manual, etc) can and should be consulted.
+#
+# QMP has two types of commands: regular and query commands. Regular commands
+# usually change the Virtual Machine's state someway, while query commands just
+# return information. The sections below are divided accordingly.
+#
+# It's important to observe that all communication examples are formatted in
+# a reader-friendly way, so that they're easier to understand. However, in real
+# protocol usage, they're emitted as a single line.
+#
+# Also, the following notation is used to denote data flow:
+#
+# Example:
+#
+# ::
+#
+#   -> data issued by the Client
+#   <- Server data response
+#
+# Please, refer to the QMP specification (docs/interop/qmp-spec.txt) for
+# detailed information on the Server command and response formats.
+#
+# = Stability Considerations
+#
+# The current QMP command set (described in this file) may be useful for a
+# number of use cases, however it's limited and several commands have bad
+# defined semantics, specially with regard to command completion.
+#
+# These problems are going to be solved incrementally in the next QEMU releases
+# and we're going to establish a deprecation policy for badly defined commands.
+#
+# If you're planning to adopt QMP, please observe the following:
+#
+#     1. The deprecation policy will take effect and be documented soon, please
+#        check the documentation of each used command as soon as a new release of
+#        QEMU is available
+#
+#     2. DO NOT rely on anything which is not explicit documented
+#
+#     3. Errors, in special, are not documented. Applications should NOT check
+#        for specific errors classes or data (it's strongly recommended to only
+#        check for the "error" key)
+#
+##
+
+{ 'include': 'pragma.json' }
+
+# Documentation generated with qapi-gen.py is in source order, with
+# included sub-schemas inserted at the first include directive
+# (subsequent include directives have no effect).  To get a sane and
+# stable order, it's best to include each sub-schema just once, or
+# include it first right here.
+
+{ 'include': 'error.json' }
+{ 'include': 'common.json' }
+{ 'include': 'sockets.json' }
+{ 'include': 'run-state.json' }
+{ 'include': 'crypto.json' }
+{ 'include': 'block.json' }
+{ 'include': 'block-export.json' }
+{ 'include': 'char.json' }
+{ 'include': 'dump.json' }
+{ 'include': 'job.json' }
+{ 'include': 'net.json' }
+{ 'include': 'rdma.json' }
+{ 'include': 'rocker.json' }
+{ 'include': 'tpm.json' }
+{ 'include': 'ui.json' }
+{ 'include': 'authz.json' }
+{ 'include': 'migration.json' }
+{ 'include': 'transaction.json' }
+{ 'include': 'trace.json' }
+{ 'include': 'control.json' }
+{ 'include': 'introspect.json' }
+{ 'include': 'qom.json' }
+{ 'include': 'qdev.json' }
+{ 'include': 'machine.json' }
+{ 'include': 'machine-target.json' }
+{ 'include': 'replay.json' }
+{ 'include': 'misc.json' }
+{ 'include': 'misc-target.json' }
+{ 'include': 'audio.json' }
+{ 'include': 'acpi.json' }
+{ 'include': 'pci.json' }
diff --git a/qapi/qapi-util.c b/qapi/qapi-util.c
new file mode 100644
index 000000000..3c24bb3d4
--- /dev/null
+++ b/qapi/qapi-util.c
@@ -0,0 +1,111 @@
+/*
+ * QAPI util functions
+ *
+ * Authors:
+ *  Hu Tao       
+ *  Peter Lieven 
+ * 
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/ctype.h"
+#include "qapi/qmp/qerror.h"
+
+const char *qapi_enum_lookup(const QEnumLookup *lookup, int val)
+{
+    assert(val >= 0 && val < lookup->size);
+
+    return lookup->array[val];
+}
+
+int qapi_enum_parse(const QEnumLookup *lookup, const char *buf,
+                    int def, Error **errp)
+{
+    int i;
+
+    if (!buf) {
+        return def;
+    }
+
+    for (i = 0; i < lookup->size; i++) {
+        if (!strcmp(buf, lookup->array[i])) {
+            return i;
+        }
+    }
+
+    error_setg(errp, "invalid parameter value: %s", buf);
+    return def;
+}
+
+bool qapi_bool_parse(const char *name, const char *value, bool *obj, Error **errp)
+{
+    if (g_str_equal(value, "on") ||
+        g_str_equal(value, "yes") ||
+        g_str_equal(value, "true") ||
+        g_str_equal(value, "y")) {
+        *obj = true;
+        return true;
+    }
+    if (g_str_equal(value, "off") ||
+        g_str_equal(value, "no") ||
+        g_str_equal(value, "false") ||
+        g_str_equal(value, "n")) {
+        *obj = false;
+        return true;
+    }
+
+    error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+               "'on' or 'off'");
+    return false;
+}
+
+/*
+ * Parse a valid QAPI name from @str.
+ * A valid name consists of letters, digits, hyphen and underscore.
+ * It may be prefixed by __RFQDN_ (downstream extension), where RFQDN
+ * may contain only letters, digits, hyphen and period.
+ * The special exception for enumeration names is not implemented.
+ * See docs/devel/qapi-code-gen.txt for more on QAPI naming rules.
+ * Keep this consistent with scripts/qapi.py!
+ * If @complete, the parse fails unless it consumes @str completely.
+ * Return its length on success, -1 on failure.
+ */
+int parse_qapi_name(const char *str, bool complete)
+{
+    const char *p = str;
+
+    if (*p == '_') {            /* Downstream __RFQDN_ */
+        p++;
+        if (*p != '_') {
+            return -1;
+        }
+        while (*++p) {
+            if (!qemu_isalnum(*p) && *p != '-' && *p != '.') {
+                break;
+            }
+        }
+
+        if (*p != '_') {
+            return -1;
+        }
+        p++;
+    }
+
+    if (!qemu_isalpha(*p)) {
+        return -1;
+    }
+    while (*++p) {
+        if (!qemu_isalnum(*p) && *p != '-' && *p != '_') {
+            break;
+        }
+    }
+
+    if (complete && *p) {
+        return -1;
+    }
+    return p - str;
+}
diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
new file mode 100644
index 000000000..7e5f40e7f
--- /dev/null
+++ b/qapi/qapi-visit-core.c
@@ -0,0 +1,407 @@
+/*
+ * Core Definitions for QAPI Visitor Classes
+ *
+ * Copyright (C) 2012-2016 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/visitor.h"
+#include "qapi/visitor-impl.h"
+#include "trace.h"
+
+void visit_complete(Visitor *v, void *opaque)
+{
+    assert(v->type != VISITOR_OUTPUT || v->complete);
+    trace_visit_complete(v, opaque);
+    if (v->complete) {
+        v->complete(v, opaque);
+    }
+}
+
+void visit_free(Visitor *v)
+{
+    trace_visit_free(v);
+    if (v) {
+        v->free(v);
+    }
+}
+
+bool visit_start_struct(Visitor *v, const char *name, void **obj,
+                        size_t size, Error **errp)
+{
+    bool ok;
+
+    trace_visit_start_struct(v, name, obj, size);
+    if (obj) {
+        assert(size);
+        assert(!(v->type & VISITOR_OUTPUT) || *obj);
+    }
+    ok = v->start_struct(v, name, obj, size, errp);
+    if (obj && (v->type & VISITOR_INPUT)) {
+        assert(ok != !*obj);
+    }
+    return ok;
+}
+
+bool visit_check_struct(Visitor *v, Error **errp)
+{
+    trace_visit_check_struct(v);
+    return v->check_struct ? v->check_struct(v, errp) : true;
+}
+
+void visit_end_struct(Visitor *v, void **obj)
+{
+    trace_visit_end_struct(v, obj);
+    v->end_struct(v, obj);
+}
+
+bool visit_start_list(Visitor *v, const char *name, GenericList **list,
+                      size_t size, Error **errp)
+{
+    bool ok;
+
+    assert(!list || size >= sizeof(GenericList));
+    trace_visit_start_list(v, name, list, size);
+    ok = v->start_list(v, name, list, size, errp);
+    if (list && (v->type & VISITOR_INPUT)) {
+        assert(ok || !*list);
+    }
+    return ok;
+}
+
+GenericList *visit_next_list(Visitor *v, GenericList *tail, size_t size)
+{
+    assert(tail && size >= sizeof(GenericList));
+    trace_visit_next_list(v, tail, size);
+    return v->next_list(v, tail, size);
+}
+
+bool visit_check_list(Visitor *v, Error **errp)
+{
+    trace_visit_check_list(v);
+    return v->check_list ? v->check_list(v, errp) : true;
+}
+
+void visit_end_list(Visitor *v, void **obj)
+{
+    trace_visit_end_list(v, obj);
+    v->end_list(v, obj);
+}
+
+bool visit_start_alternate(Visitor *v, const char *name,
+                           GenericAlternate **obj, size_t size,
+                           Error **errp)
+{
+    bool ok;
+
+    assert(obj && size >= sizeof(GenericAlternate));
+    assert(!(v->type & VISITOR_OUTPUT) || *obj);
+    trace_visit_start_alternate(v, name, obj, size);
+    if (!v->start_alternate) {
+        assert(!(v->type & VISITOR_INPUT));
+        return true;
+    }
+    ok = v->start_alternate(v, name, obj, size, errp);
+    if (v->type & VISITOR_INPUT) {
+        assert(ok != !*obj);
+    }
+    return ok;
+}
+
+void visit_end_alternate(Visitor *v, void **obj)
+{
+    trace_visit_end_alternate(v, obj);
+    if (v->end_alternate) {
+        v->end_alternate(v, obj);
+    }
+}
+
+bool visit_optional(Visitor *v, const char *name, bool *present)
+{
+    trace_visit_optional(v, name, present);
+    if (v->optional) {
+        v->optional(v, name, present);
+    }
+    return *present;
+}
+
+bool visit_is_input(Visitor *v)
+{
+    return v->type == VISITOR_INPUT;
+}
+
+bool visit_is_dealloc(Visitor *v)
+{
+    return v->type == VISITOR_DEALLOC;
+}
+
+bool visit_type_int(Visitor *v, const char *name, int64_t *obj, Error **errp)
+{
+    assert(obj);
+    trace_visit_type_int(v, name, obj);
+    return v->type_int64(v, name, obj, errp);
+}
+
+static bool visit_type_uintN(Visitor *v, uint64_t *obj, const char *name,
+                             uint64_t max, const char *type, Error **errp)
+{
+    uint64_t value = *obj;
+
+    assert(v->type == VISITOR_INPUT || value <= max);
+
+    if (!v->type_uint64(v, name, &value, errp)) {
+        return false;
+    }
+    if (value > max) {
+        assert(v->type == VISITOR_INPUT);
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   name ? name : "null", type);
+        return false;
+    }
+    *obj = value;
+    return true;
+}
+
+bool visit_type_uint8(Visitor *v, const char *name, uint8_t *obj,
+                      Error **errp)
+{
+    uint64_t value;
+    bool ok;
+
+    trace_visit_type_uint8(v, name, obj);
+    value = *obj;
+    ok = visit_type_uintN(v, &value, name, UINT8_MAX, "uint8_t", errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_uint16(Visitor *v, const char *name, uint16_t *obj,
+                       Error **errp)
+{
+    uint64_t value;
+    bool ok;
+
+    trace_visit_type_uint16(v, name, obj);
+    value = *obj;
+    ok = visit_type_uintN(v, &value, name, UINT16_MAX, "uint16_t", errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_uint32(Visitor *v, const char *name, uint32_t *obj,
+                       Error **errp)
+{
+    uint64_t value;
+    bool ok;
+
+    trace_visit_type_uint32(v, name, obj);
+    value = *obj;
+    ok = visit_type_uintN(v, &value, name, UINT32_MAX, "uint32_t", errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_uint64(Visitor *v, const char *name, uint64_t *obj,
+                       Error **errp)
+{
+    assert(obj);
+    trace_visit_type_uint64(v, name, obj);
+    return v->type_uint64(v, name, obj, errp);
+}
+
+static bool visit_type_intN(Visitor *v, int64_t *obj, const char *name,
+                            int64_t min, int64_t max, const char *type,
+                            Error **errp)
+{
+    int64_t value = *obj;
+
+    assert(v->type == VISITOR_INPUT || (value >= min && value <= max));
+
+    if (!v->type_int64(v, name, &value, errp)) {
+        return false;
+    }
+    if (value < min || value > max) {
+        assert(v->type == VISITOR_INPUT);
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   name ? name : "null", type);
+        return false;
+    }
+    *obj = value;
+    return true;
+}
+
+bool visit_type_int8(Visitor *v, const char *name, int8_t *obj, Error **errp)
+{
+    int64_t value;
+    bool ok;
+
+    trace_visit_type_int8(v, name, obj);
+    value = *obj;
+    ok = visit_type_intN(v, &value, name, INT8_MIN, INT8_MAX, "int8_t", errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_int16(Visitor *v, const char *name, int16_t *obj,
+                      Error **errp)
+{
+    int64_t value;
+    bool ok;
+
+    trace_visit_type_int16(v, name, obj);
+    value = *obj;
+    ok = visit_type_intN(v, &value, name, INT16_MIN, INT16_MAX, "int16_t",
+                         errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_int32(Visitor *v, const char *name, int32_t *obj,
+                      Error **errp)
+{
+    int64_t value;
+    bool ok;
+
+    trace_visit_type_int32(v, name, obj);
+    value = *obj;
+    ok = visit_type_intN(v, &value, name, INT32_MIN, INT32_MAX, "int32_t",
+                        errp);
+    *obj = value;
+    return ok;
+}
+
+bool visit_type_int64(Visitor *v, const char *name, int64_t *obj,
+                      Error **errp)
+{
+    assert(obj);
+    trace_visit_type_int64(v, name, obj);
+    return v->type_int64(v, name, obj, errp);
+}
+
+bool visit_type_size(Visitor *v, const char *name, uint64_t *obj,
+                     Error **errp)
+{
+    assert(obj);
+    trace_visit_type_size(v, name, obj);
+    if (v->type_size) {
+        return v->type_size(v, name, obj, errp);
+    }
+    return v->type_uint64(v, name, obj, errp);
+}
+
+bool visit_type_bool(Visitor *v, const char *name, bool *obj, Error **errp)
+{
+    assert(obj);
+    trace_visit_type_bool(v, name, obj);
+    return v->type_bool(v, name, obj, errp);
+}
+
+bool visit_type_str(Visitor *v, const char *name, char **obj, Error **errp)
+{
+    bool ok;
+
+    assert(obj);
+    /* TODO: Fix callers to not pass NULL when they mean "", so that we
+     * can enable:
+    assert(!(v->type & VISITOR_OUTPUT) || *obj);
+     */
+    trace_visit_type_str(v, name, obj);
+    ok = v->type_str(v, name, obj, errp);
+    if (v->type & VISITOR_INPUT) {
+        assert(ok != !*obj);
+    }
+    return ok;
+}
+
+bool visit_type_number(Visitor *v, const char *name, double *obj,
+                       Error **errp)
+{
+    assert(obj);
+    trace_visit_type_number(v, name, obj);
+    return v->type_number(v, name, obj, errp);
+}
+
+bool visit_type_any(Visitor *v, const char *name, QObject **obj, Error **errp)
+{
+    bool ok;
+
+    assert(obj);
+    assert(v->type != VISITOR_OUTPUT || *obj);
+    trace_visit_type_any(v, name, obj);
+    ok = v->type_any(v, name, obj, errp);
+    if (v->type == VISITOR_INPUT) {
+        assert(ok != !*obj);
+    }
+    return ok;
+}
+
+bool visit_type_null(Visitor *v, const char *name, QNull **obj,
+                     Error **errp)
+{
+    trace_visit_type_null(v, name, obj);
+    return v->type_null(v, name, obj, errp);
+}
+
+static bool output_type_enum(Visitor *v, const char *name, int *obj,
+                             const QEnumLookup *lookup, Error **errp)
+{
+    int value = *obj;
+    char *enum_str;
+
+    enum_str = (char *)qapi_enum_lookup(lookup, value);
+    return visit_type_str(v, name, &enum_str, errp);
+}
+
+static bool input_type_enum(Visitor *v, const char *name, int *obj,
+                            const QEnumLookup *lookup, Error **errp)
+{
+    int64_t value;
+    char *enum_str;
+
+    if (!visit_type_str(v, name, &enum_str, errp)) {
+        return false;
+    }
+
+    value = qapi_enum_parse(lookup, enum_str, -1, NULL);
+    if (value < 0) {
+        error_setg(errp, QERR_INVALID_PARAMETER, enum_str);
+        g_free(enum_str);
+        return false;
+    }
+
+    g_free(enum_str);
+    *obj = value;
+    return true;
+}
+
+bool visit_type_enum(Visitor *v, const char *name, int *obj,
+                     const QEnumLookup *lookup, Error **errp)
+{
+    assert(obj && lookup);
+    trace_visit_type_enum(v, name, obj);
+    switch (v->type) {
+    case VISITOR_INPUT:
+        return input_type_enum(v, name, obj, lookup, errp);
+    case VISITOR_OUTPUT:
+        return output_type_enum(v, name, obj, lookup, errp);
+    case VISITOR_CLONE:
+        /* nothing further to do, scalar value was already copied by
+         * g_memdup() during visit_start_*() */
+        return true;
+    case VISITOR_DEALLOC:
+        /* nothing to deallocate for a scalar */
+        return true;
+    default:
+        abort();
+    }
+}
diff --git a/qapi/qdev.json b/qapi/qdev.json
new file mode 100644
index 000000000..13254529b
--- /dev/null
+++ b/qapi/qdev.json
@@ -0,0 +1,126 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Device infrastructure (qdev)
+##
+
+{ 'include': 'qom.json' }
+
+##
+# @device-list-properties:
+#
+# List properties associated with a device.
+#
+# @typename: the type name of a device
+#
+# Returns: a list of ObjectPropertyInfo describing a devices properties
+#
+# Note: objects can create properties at runtime, for example to describe
+#       links between different devices and/or objects. These properties
+#       are not included in the output of this command.
+#
+# Since: 1.2
+##
+{ 'command': 'device-list-properties',
+  'data': { 'typename': 'str'},
+  'returns': [ 'ObjectPropertyInfo' ] }
+
+##
+# @device_add:
+#
+# @driver: the name of the new device's driver
+#
+# @bus: the device's parent bus (device tree path)
+#
+# @id: the device's ID, must be unique
+#
+# Additional arguments depend on the type.
+#
+# Add a device.
+#
+# Notes:
+# 1. For detailed information about this command, please refer to the
+#    'docs/qdev-device-use.txt' file.
+#
+# 2. It's possible to list device properties by running QEMU with the
+#    "-device DEVICE,help" command-line argument, where DEVICE is the
+#    device's name
+#
+# Example:
+#
+# -> { "execute": "device_add",
+#      "arguments": { "driver": "e1000", "id": "net1",
+#                     "bus": "pci.0",
+#                     "mac": "52:54:00:12:34:56" } }
+# <- { "return": {} }
+#
+# TODO: This command effectively bypasses QAPI completely due to its
+#       "additional arguments" business.  It shouldn't have been added to
+#       the schema in this form.  It should be qapified properly, or
+#       replaced by a properly qapified command.
+#
+# Since: 0.13
+##
+{ 'command': 'device_add',
+  'data': {'driver': 'str', '*bus': 'str', '*id': 'str'},
+  'gen': false } # so we can get the additional arguments
+
+##
+# @device_del:
+#
+# Remove a device from a guest
+#
+# @id: the device's ID or QOM path
+#
+# Returns: Nothing on success
+#          If @id is not a valid device, DeviceNotFound
+#
+# Notes: When this command completes, the device may not be removed from the
+#        guest.  Hot removal is an operation that requires guest cooperation.
+#        This command merely requests that the guest begin the hot removal
+#        process.  Completion of the device removal process is signaled with a
+#        DEVICE_DELETED event. Guest reset will automatically complete removal
+#        for all devices.
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "device_del",
+#      "arguments": { "id": "net1" } }
+# <- { "return": {} }
+#
+# -> { "execute": "device_del",
+#      "arguments": { "id": "/machine/peripheral-anon/device[0]" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'device_del', 'data': {'id': 'str'} }
+
+##
+# @DEVICE_DELETED:
+#
+# Emitted whenever the device removal completion is acknowledged by the guest.
+# At this point, it's safe to reuse the specified device ID. Device removal can
+# be initiated by the guest or by HMP/QMP commands.
+#
+# @device: device name
+#
+# @path: device path
+#
+# Since: 1.5
+#
+# Example:
+#
+# <- { "event": "DEVICE_DELETED",
+#      "data": { "device": "virtio-net-pci-0",
+#                "path": "/machine/peripheral/virtio-net-pci-0" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'DEVICE_DELETED',
+  'data': { '*device': 'str', 'path': 'str' } }
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
new file mode 100644
index 000000000..9a2d7dd29
--- /dev/null
+++ b/qapi/qmp-dispatch.c
@@ -0,0 +1,252 @@
+/*
+ * Core Definitions for QAPI/QMP Dispatch
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/aio.h"
+#include "qapi/error.h"
+#include "qapi/qmp/dispatch.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qjson.h"
+#include "sysemu/runstate.h"
+#include "qapi/qmp/qbool.h"
+#include "qemu/coroutine.h"
+#include "qemu/main-loop.h"
+
+static QDict *qmp_dispatch_check_obj(QDict *dict, bool allow_oob,
+                                     Error **errp)
+{
+    const char *exec_key = NULL;
+    const QDictEntry *ent;
+    const char *arg_name;
+    const QObject *arg_obj;
+
+    for (ent = qdict_first(dict); ent;
+         ent = qdict_next(dict, ent)) {
+        arg_name = qdict_entry_key(ent);
+        arg_obj = qdict_entry_value(ent);
+
+        if (!strcmp(arg_name, "execute")
+            || (!strcmp(arg_name, "exec-oob") && allow_oob)) {
+            if (qobject_type(arg_obj) != QTYPE_QSTRING) {
+                error_setg(errp, "QMP input member '%s' must be a string",
+                           arg_name);
+                return NULL;
+            }
+            if (exec_key) {
+                error_setg(errp, "QMP input member '%s' clashes with '%s'",
+                           arg_name, exec_key);
+                return NULL;
+            }
+            exec_key = arg_name;
+        } else if (!strcmp(arg_name, "arguments")) {
+            if (qobject_type(arg_obj) != QTYPE_QDICT) {
+                error_setg(errp,
+                           "QMP input member 'arguments' must be an object");
+                return NULL;
+            }
+        } else if (!strcmp(arg_name, "id")) {
+            continue;
+        } else {
+            error_setg(errp, "QMP input member '%s' is unexpected",
+                       arg_name);
+            return NULL;
+        }
+    }
+
+    if (!exec_key) {
+        error_setg(errp, "QMP input lacks member 'execute'");
+        return NULL;
+    }
+
+    return dict;
+}
+
+QDict *qmp_error_response(Error *err)
+{
+    QDict *rsp;
+
+    rsp = qdict_from_jsonf_nofail("{ 'error': { 'class': %s, 'desc': %s } }",
+                                  QapiErrorClass_str(error_get_class(err)),
+                                  error_get_pretty(err));
+    error_free(err);
+    return rsp;
+}
+
+/*
+ * Does @qdict look like a command to be run out-of-band?
+ */
+bool qmp_is_oob(const QDict *dict)
+{
+    return qdict_haskey(dict, "exec-oob")
+        && !qdict_haskey(dict, "execute");
+}
+
+typedef struct QmpDispatchBH {
+    const QmpCommand *cmd;
+    Monitor *cur_mon;
+    QDict *args;
+    QObject **ret;
+    Error **errp;
+    Coroutine *co;
+} QmpDispatchBH;
+
+static void do_qmp_dispatch_bh(void *opaque)
+{
+    QmpDispatchBH *data = opaque;
+
+    assert(monitor_cur() == NULL);
+    monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
+    data->cmd->fn(data->args, data->ret, data->errp);
+    monitor_set_cur(qemu_coroutine_self(), NULL);
+    aio_co_wake(data->co);
+}
+
+/*
+ * Runs outside of coroutine context for OOB commands, but in coroutine
+ * context for everything else.
+ */
+QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
+                    bool allow_oob, Monitor *cur_mon)
+{
+    Error *err = NULL;
+    bool oob;
+    const char *command;
+    QDict *args;
+    const QmpCommand *cmd;
+    QDict *dict;
+    QObject *id;
+    QObject *ret = NULL;
+    QDict *rsp = NULL;
+
+    dict = qobject_to(QDict, request);
+    if (!dict) {
+        id = NULL;
+        error_setg(&err, "QMP input must be a JSON object");
+        goto out;
+    }
+
+    id = qdict_get(dict, "id");
+
+    if (!qmp_dispatch_check_obj(dict, allow_oob, &err)) {
+        goto out;
+    }
+
+    command = qdict_get_try_str(dict, "execute");
+    oob = false;
+    if (!command) {
+        assert(allow_oob);
+        command = qdict_get_str(dict, "exec-oob");
+        oob = true;
+    }
+    cmd = qmp_find_command(cmds, command);
+    if (cmd == NULL) {
+        error_set(&err, ERROR_CLASS_COMMAND_NOT_FOUND,
+                  "The command %s has not been found", command);
+        goto out;
+    }
+    if (!cmd->enabled) {
+        error_set(&err, ERROR_CLASS_COMMAND_NOT_FOUND,
+                  "The command %s has been disabled for this instance",
+                  command);
+        goto out;
+    }
+    if (oob && !(cmd->options & QCO_ALLOW_OOB)) {
+        error_setg(&err, "The command %s does not support OOB",
+                   command);
+        goto out;
+    }
+
+    if (runstate_check(RUN_STATE_PRECONFIG) &&
+        !(cmd->options & QCO_ALLOW_PRECONFIG)) {
+        error_setg(&err, "The command '%s' isn't permitted in '%s' state",
+                   cmd->name, RunState_str(RUN_STATE_PRECONFIG));
+        goto out;
+    }
+
+    if (!qdict_haskey(dict, "arguments")) {
+        args = qdict_new();
+    } else {
+        args = qdict_get_qdict(dict, "arguments");
+        qobject_ref(args);
+    }
+
+    assert(!(oob && qemu_in_coroutine()));
+    assert(monitor_cur() == NULL);
+    if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) {
+        monitor_set_cur(qemu_coroutine_self(), cur_mon);
+        cmd->fn(args, &ret, &err);
+        monitor_set_cur(qemu_coroutine_self(), NULL);
+    } else {
+       /*
+        * Actual context doesn't match the one the command needs.
+        *
+        * Case 1: we are in coroutine context, but command does not
+        * have QCO_COROUTINE.  We need to drop out of coroutine
+        * context for executing it.
+        *
+        * Case 2: we are outside coroutine context, but command has
+        * QCO_COROUTINE.  Can't actually happen, because we get here
+        * outside coroutine context only when executing a command
+        * out of band, and OOB commands never have QCO_COROUTINE.
+        */
+        assert(!oob && qemu_in_coroutine() && !(cmd->options & QCO_COROUTINE));
+
+        QmpDispatchBH data = {
+            .cur_mon    = cur_mon,
+            .cmd        = cmd,
+            .args       = args,
+            .ret        = &ret,
+            .errp       = &err,
+            .co         = qemu_coroutine_self(),
+        };
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
+                                &data);
+        qemu_coroutine_yield();
+    }
+    qobject_unref(args);
+    if (err) {
+        /* or assert(!ret) after reviewing all handlers: */
+        qobject_unref(ret);
+        goto out;
+    }
+
+    if (cmd->options & QCO_NO_SUCCESS_RESP) {
+        g_assert(!ret);
+        return NULL;
+    } else if (!ret) {
+        /*
+         * When the command's schema has no 'returns', cmd->fn()
+         * leaves @ret null.  The QMP spec calls for an empty object
+         * then; supply it.
+         */
+        ret = QOBJECT(qdict_new());
+    }
+
+    rsp = qdict_new();
+    qdict_put_obj(rsp, "return", ret);
+
+out:
+    if (err) {
+        assert(!rsp);
+        rsp = qmp_error_response(err);
+    }
+
+    assert(rsp);
+
+    if (id) {
+        qdict_put_obj(rsp, "id", qobject_ref(id));
+    }
+
+    return rsp;
+}
diff --git a/qapi/qmp-event.c b/qapi/qmp-event.c
new file mode 100644
index 000000000..19d3cd003
--- /dev/null
+++ b/qapi/qmp-event.c
@@ -0,0 +1,45 @@
+/*
+ * QMP Event related
+ *
+ * Copyright (c) 2014 Wenchao Xia
+ *
+ * Authors:
+ *  Wenchao Xia   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/qmp-event.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qjson.h"
+
+static void timestamp_put(QDict *qdict)
+{
+    int err;
+    QDict *ts;
+    qemu_timeval tv;
+
+    err = qemu_gettimeofday(&tv);
+    /* Put -1 to indicate failure of getting host time */
+    ts = qdict_from_jsonf_nofail("{ 'seconds': %lld, 'microseconds': %lld }",
+                                 err < 0 ? -1LL : (long long)tv.tv_sec,
+                                 err < 0 ? -1LL : (long long)tv.tv_usec);
+    qdict_put(qdict, "timestamp", ts);
+}
+
+/*
+ * Build a QDict, then fill event name and time stamp, caller should free the
+ * QDict after usage.
+ */
+QDict *qmp_event_build_dict(const char *event_name)
+{
+    QDict *dict = qdict_new();
+    qdict_put_str(dict, "event", event_name);
+    timestamp_put(dict);
+    return dict;
+}
diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
new file mode 100644
index 000000000..58c65b505
--- /dev/null
+++ b/qapi/qmp-registry.c
@@ -0,0 +1,91 @@
+/*
+ * Core Definitions for QAPI/QMP Dispatch
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *  Michael Roth      
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/dispatch.h"
+
+void qmp_register_command(QmpCommandList *cmds, const char *name,
+                          QmpCommandFunc *fn, QmpCommandOptions options)
+{
+    QmpCommand *cmd = g_malloc0(sizeof(*cmd));
+
+    /* QCO_COROUTINE and QCO_ALLOW_OOB are incompatible for now */
+    assert(!((options & QCO_COROUTINE) && (options & QCO_ALLOW_OOB)));
+
+    cmd->name = name;
+    cmd->fn = fn;
+    cmd->enabled = true;
+    cmd->options = options;
+    QTAILQ_INSERT_TAIL(cmds, cmd, node);
+}
+
+const QmpCommand *qmp_find_command(const QmpCommandList *cmds, const char *name)
+{
+    QmpCommand *cmd;
+
+    QTAILQ_FOREACH(cmd, cmds, node) {
+        if (strcmp(cmd->name, name) == 0) {
+            return cmd;
+        }
+    }
+    return NULL;
+}
+
+static void qmp_toggle_command(QmpCommandList *cmds, const char *name,
+                               bool enabled)
+{
+    QmpCommand *cmd;
+
+    QTAILQ_FOREACH(cmd, cmds, node) {
+        if (strcmp(cmd->name, name) == 0) {
+            cmd->enabled = enabled;
+            return;
+        }
+    }
+}
+
+void qmp_disable_command(QmpCommandList *cmds, const char *name)
+{
+    qmp_toggle_command(cmds, name, false);
+}
+
+void qmp_enable_command(QmpCommandList *cmds, const char *name)
+{
+    qmp_toggle_command(cmds, name, true);
+}
+
+bool qmp_command_is_enabled(const QmpCommand *cmd)
+{
+    return cmd->enabled;
+}
+
+const char *qmp_command_name(const QmpCommand *cmd)
+{
+    return cmd->name;
+}
+
+bool qmp_has_success_response(const QmpCommand *cmd)
+{
+    return !(cmd->options & QCO_NO_SUCCESS_RESP);
+}
+
+void qmp_for_each_command(const QmpCommandList *cmds, qmp_cmd_callback_fn fn,
+                          void *opaque)
+{
+    const QmpCommand *cmd;
+
+    QTAILQ_FOREACH(cmd, cmds, node) {
+        fn(cmd, opaque);
+    }
+}
diff --git a/qapi/qobject-input-visitor.c b/qapi/qobject-input-visitor.c
new file mode 100644
index 000000000..23843b242
--- /dev/null
+++ b/qapi/qobject-input-visitor.c
@@ -0,0 +1,765 @@
+/*
+ * Input Visitor
+ *
+ * Copyright (C) 2012-2017 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "qapi/error.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/queue.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+
+typedef struct StackObject {
+    const char *name;            /* Name of @obj in its parent, if any */
+    QObject *obj;                /* QDict or QList being visited */
+    void *qapi; /* sanity check that caller uses same pointer */
+
+    GHashTable *h;              /* If @obj is QDict: unvisited keys */
+    const QListEntry *entry;    /* If @obj is QList: unvisited tail */
+    unsigned index;             /* If @obj is QList: list index of @entry */
+
+    QSLIST_ENTRY(StackObject) node; /* parent */
+} StackObject;
+
+struct QObjectInputVisitor {
+    Visitor visitor;
+
+    /* Root of visit at visitor creation. */
+    QObject *root;
+    bool keyval;                /* Assume @root made with keyval_parse() */
+
+    /* Stack of objects being visited (all entries will be either
+     * QDict or QList). */
+    QSLIST_HEAD(, StackObject) stack;
+
+    GString *errname;           /* Accumulator for full_name() */
+};
+
+static QObjectInputVisitor *to_qiv(Visitor *v)
+{
+    return container_of(v, QObjectInputVisitor, visitor);
+}
+
+/*
+ * Find the full name of something @qiv is currently visiting.
+ * @qiv is visiting something named @name in the stack of containers
+ * @qiv->stack.
+ * If @n is zero, return its full name.
+ * If @n is positive, return the full name of the @n-th container
+ * counting from the top.  The stack of containers must have at least
+ * @n elements.
+ * The returned string is valid until the next full_name_nth(@v) or
+ * destruction of @v.
+ */
+static const char *full_name_nth(QObjectInputVisitor *qiv, const char *name,
+                                 int n)
+{
+    StackObject *so;
+    char buf[32];
+
+    if (qiv->errname) {
+        g_string_truncate(qiv->errname, 0);
+    } else {
+        qiv->errname = g_string_new("");
+    }
+
+    QSLIST_FOREACH(so , &qiv->stack, node) {
+        if (n) {
+            n--;
+        } else if (qobject_type(so->obj) == QTYPE_QDICT) {
+            g_string_prepend(qiv->errname, name ?: "");
+            g_string_prepend_c(qiv->errname, '.');
+        } else {
+            snprintf(buf, sizeof(buf),
+                     qiv->keyval ? ".%u" : "[%u]",
+                     so->index);
+            g_string_prepend(qiv->errname, buf);
+        }
+        name = so->name;
+    }
+    assert(!n);
+
+    if (name) {
+        g_string_prepend(qiv->errname, name);
+    } else if (qiv->errname->str[0] == '.') {
+        g_string_erase(qiv->errname, 0, 1);
+    } else if (!qiv->errname->str[0]) {
+        return "";
+    }
+
+    return qiv->errname->str;
+}
+
+static const char *full_name(QObjectInputVisitor *qiv, const char *name)
+{
+    return full_name_nth(qiv, name, 0);
+}
+
+static QObject *qobject_input_try_get_object(QObjectInputVisitor *qiv,
+                                             const char *name,
+                                             bool consume)
+{
+    StackObject *tos;
+    QObject *qobj;
+    QObject *ret;
+
+    if (QSLIST_EMPTY(&qiv->stack)) {
+        /* Starting at root, name is ignored. */
+        assert(qiv->root);
+        return qiv->root;
+    }
+
+    /* We are in a container; find the next element. */
+    tos = QSLIST_FIRST(&qiv->stack);
+    qobj = tos->obj;
+    assert(qobj);
+
+    if (qobject_type(qobj) == QTYPE_QDICT) {
+        assert(name);
+        ret = qdict_get(qobject_to(QDict, qobj), name);
+        if (tos->h && consume && ret) {
+            bool removed = g_hash_table_remove(tos->h, name);
+            assert(removed);
+        }
+    } else {
+        assert(qobject_type(qobj) == QTYPE_QLIST);
+        assert(!name);
+        if (tos->entry) {
+            ret = qlist_entry_obj(tos->entry);
+            if (consume) {
+                tos->entry = qlist_next(tos->entry);
+            }
+        } else {
+            ret = NULL;
+        }
+        if (consume) {
+            tos->index++;
+        }
+    }
+
+    return ret;
+}
+
+static QObject *qobject_input_get_object(QObjectInputVisitor *qiv,
+                                         const char *name,
+                                         bool consume, Error **errp)
+{
+    QObject *obj = qobject_input_try_get_object(qiv, name, consume);
+
+    if (!obj) {
+        error_setg(errp, QERR_MISSING_PARAMETER, full_name(qiv, name));
+    }
+    return obj;
+}
+
+static const char *qobject_input_get_keyval(QObjectInputVisitor *qiv,
+                                            const char *name,
+                                            Error **errp)
+{
+    QObject *qobj;
+    QString *qstr;
+
+    qobj = qobject_input_get_object(qiv, name, true, errp);
+    if (!qobj) {
+        return NULL;
+    }
+
+    qstr = qobject_to(QString, qobj);
+    if (!qstr) {
+        switch (qobject_type(qobj)) {
+        case QTYPE_QDICT:
+        case QTYPE_QLIST:
+            error_setg(errp, "Parameters '%s.*' are unexpected",
+                       full_name(qiv, name));
+            return NULL;
+        default:
+            /* Non-string scalar (should this be an assertion?) */
+            error_setg(errp, "Internal error: parameter %s invalid",
+                       full_name(qiv, name));
+            return NULL;
+        }
+    }
+
+    return qstring_get_str(qstr);
+}
+
+static const QListEntry *qobject_input_push(QObjectInputVisitor *qiv,
+                                            const char *name,
+                                            QObject *obj, void *qapi)
+{
+    GHashTable *h;
+    StackObject *tos = g_new0(StackObject, 1);
+    QDict *qdict = qobject_to(QDict, obj);
+    QList *qlist = qobject_to(QList, obj);
+    const QDictEntry *entry;
+
+    assert(obj);
+    tos->name = name;
+    tos->obj = obj;
+    tos->qapi = qapi;
+
+    if (qdict) {
+        h = g_hash_table_new(g_str_hash, g_str_equal);
+        for (entry = qdict_first(qdict);
+             entry;
+             entry = qdict_next(qdict, entry)) {
+            g_hash_table_insert(h, (void *)qdict_entry_key(entry), NULL);
+        }
+        tos->h = h;
+    } else {
+        assert(qlist);
+        tos->entry = qlist_first(qlist);
+        tos->index = -1;
+    }
+
+    QSLIST_INSERT_HEAD(&qiv->stack, tos, node);
+    return tos->entry;
+}
+
+
+static bool qobject_input_check_struct(Visitor *v, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+    GHashTableIter iter;
+    const char *key;
+
+    assert(tos && !tos->entry);
+
+    g_hash_table_iter_init(&iter, tos->h);
+    if (g_hash_table_iter_next(&iter, (void **)&key, NULL)) {
+        error_setg(errp, "Parameter '%s' is unexpected",
+                   full_name(qiv, key));
+        return false;
+    }
+    return true;
+}
+
+static void qobject_input_stack_object_free(StackObject *tos)
+{
+    if (tos->h) {
+        g_hash_table_unref(tos->h);
+    }
+
+    g_free(tos);
+}
+
+static void qobject_input_pop(Visitor *v, void **obj)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+    assert(tos && tos->qapi == obj);
+    QSLIST_REMOVE_HEAD(&qiv->stack, node);
+    qobject_input_stack_object_free(tos);
+}
+
+static bool qobject_input_start_struct(Visitor *v, const char *name, void **obj,
+                                       size_t size, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+
+    if (obj) {
+        *obj = NULL;
+    }
+    if (!qobj) {
+        return false;
+    }
+    if (qobject_type(qobj) != QTYPE_QDICT) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "object");
+        return false;
+    }
+
+    qobject_input_push(qiv, name, qobj, obj);
+
+    if (obj) {
+        *obj = g_malloc0(size);
+    }
+    return true;
+}
+
+static void qobject_input_end_struct(Visitor *v, void **obj)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+    assert(qobject_type(tos->obj) == QTYPE_QDICT && tos->h);
+    qobject_input_pop(v, obj);
+}
+
+
+static bool qobject_input_start_list(Visitor *v, const char *name,
+                                     GenericList **list, size_t size,
+                                     Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    const QListEntry *entry;
+
+    if (list) {
+        *list = NULL;
+    }
+    if (!qobj) {
+        return false;
+    }
+    if (qobject_type(qobj) != QTYPE_QLIST) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "array");
+        return false;
+    }
+
+    entry = qobject_input_push(qiv, name, qobj, list);
+    if (entry && list) {
+        *list = g_malloc0(size);
+    }
+    return true;
+}
+
+static GenericList *qobject_input_next_list(Visitor *v, GenericList *tail,
+                                            size_t size)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+    assert(tos && qobject_to(QList, tos->obj));
+
+    if (!tos->entry) {
+        return NULL;
+    }
+    tail->next = g_malloc0(size);
+    return tail->next;
+}
+
+static bool qobject_input_check_list(Visitor *v, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+    assert(tos && qobject_to(QList, tos->obj));
+
+    if (tos->entry) {
+        error_setg(errp, "Only %u list elements expected in %s",
+                   tos->index + 1, full_name_nth(qiv, NULL, 1));
+        return false;
+    }
+    return true;
+}
+
+static void qobject_input_end_list(Visitor *v, void **obj)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+    assert(qobject_type(tos->obj) == QTYPE_QLIST && !tos->h);
+    qobject_input_pop(v, obj);
+}
+
+static bool qobject_input_start_alternate(Visitor *v, const char *name,
+                                          GenericAlternate **obj, size_t size,
+                                          Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, false, errp);
+
+    if (!qobj) {
+        *obj = NULL;
+        return false;
+    }
+    *obj = g_malloc0(size);
+    (*obj)->type = qobject_type(qobj);
+    return true;
+}
+
+static bool qobject_input_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                     Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QNum *qnum;
+
+    if (!qobj) {
+        return false;
+    }
+    qnum = qobject_to(QNum, qobj);
+    if (!qnum || !qnum_get_try_int(qnum, obj)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "integer");
+        return false;
+    }
+    return true;
+}
+
+static bool qobject_input_type_int64_keyval(Visitor *v, const char *name,
+                                            int64_t *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+    if (!str) {
+        return false;
+    }
+
+    if (qemu_strtoi64(str, NULL, 0, obj) < 0) {
+        /* TODO report -ERANGE more nicely */
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   full_name(qiv, name), "integer");
+        return false;
+    }
+    return true;
+}
+
+static bool qobject_input_type_uint64(Visitor *v, const char *name,
+                                      uint64_t *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QNum *qnum;
+    int64_t val;
+
+    if (!qobj) {
+        return false;
+    }
+    qnum = qobject_to(QNum, qobj);
+    if (!qnum) {
+        goto err;
+    }
+
+    if (qnum_get_try_uint(qnum, obj)) {
+        return true;
+    }
+
+    /* Need to accept negative values for backward compatibility */
+    if (qnum_get_try_int(qnum, &val)) {
+        *obj = val;
+        return true;
+    }
+
+err:
+    error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+               full_name(qiv, name), "uint64");
+    return false;
+}
+
+static bool qobject_input_type_uint64_keyval(Visitor *v, const char *name,
+                                             uint64_t *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+    if (!str) {
+        return false;
+    }
+
+    if (qemu_strtou64(str, NULL, 0, obj) < 0) {
+        /* TODO report -ERANGE more nicely */
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   full_name(qiv, name), "integer");
+        return false;
+    }
+    return true;
+}
+
+static bool qobject_input_type_bool(Visitor *v, const char *name, bool *obj,
+                                    Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QBool *qbool;
+
+    if (!qobj) {
+        return false;
+    }
+    qbool = qobject_to(QBool, qobj);
+    if (!qbool) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "boolean");
+        return false;
+    }
+
+    *obj = qbool_get_bool(qbool);
+    return true;
+}
+
+static bool qobject_input_type_bool_keyval(Visitor *v, const char *name,
+                                           bool *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+    if (!str) {
+        return false;
+    }
+
+    if (!qapi_bool_parse(name, str, obj, NULL)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   full_name(qiv, name), "'on' or 'off'");
+        return false;
+    }
+    return true;
+}
+
+static bool qobject_input_type_str(Visitor *v, const char *name, char **obj,
+                                   Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QString *qstr;
+
+    *obj = NULL;
+    if (!qobj) {
+        return false;
+    }
+    qstr = qobject_to(QString, qobj);
+    if (!qstr) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "string");
+        return false;
+    }
+
+    *obj = g_strdup(qstring_get_str(qstr));
+    return true;
+}
+
+static bool qobject_input_type_str_keyval(Visitor *v, const char *name,
+                                          char **obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+    *obj = g_strdup(str);
+    return !!str;
+}
+
+static bool qobject_input_type_number(Visitor *v, const char *name, double *obj,
+                                      Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QNum *qnum;
+
+    if (!qobj) {
+        return false;
+    }
+    qnum = qobject_to(QNum, qobj);
+    if (!qnum) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "number");
+        return false;
+    }
+
+    *obj = qnum_get_double(qnum);
+    return true;
+}
+
+static bool qobject_input_type_number_keyval(Visitor *v, const char *name,
+                                             double *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+    double val;
+
+    if (!str) {
+        return false;
+    }
+
+    if (qemu_strtod_finite(str, NULL, &val)) {
+        /* TODO report -ERANGE more nicely */
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "number");
+        return false;
+    }
+
+    *obj = val;
+    return true;
+}
+
+static bool qobject_input_type_any(Visitor *v, const char *name, QObject **obj,
+                                   Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+
+    *obj = NULL;
+    if (!qobj) {
+        return false;
+    }
+
+    *obj = qobject_ref(qobj);
+    return true;
+}
+
+static bool qobject_input_type_null(Visitor *v, const char *name,
+                                    QNull **obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+
+    *obj = NULL;
+    if (!qobj) {
+        return false;
+    }
+
+    if (qobject_type(qobj) != QTYPE_QNULL) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
+                   full_name(qiv, name), "null");
+        return false;
+    }
+    *obj = qnull();
+    return true;
+}
+
+static bool qobject_input_type_size_keyval(Visitor *v, const char *name,
+                                           uint64_t *obj, Error **errp)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+    if (!str) {
+        return false;
+    }
+
+    if (qemu_strtosz(str, NULL, obj) < 0) {
+        /* TODO report -ERANGE more nicely */
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   full_name(qiv, name), "size");
+        return false;
+    }
+    return true;
+}
+
+static void qobject_input_optional(Visitor *v, const char *name, bool *present)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_try_get_object(qiv, name, false);
+
+    if (!qobj) {
+        *present = false;
+        return;
+    }
+
+    *present = true;
+}
+
+static void qobject_input_free(Visitor *v)
+{
+    QObjectInputVisitor *qiv = to_qiv(v);
+
+    while (!QSLIST_EMPTY(&qiv->stack)) {
+        StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+        QSLIST_REMOVE_HEAD(&qiv->stack, node);
+        qobject_input_stack_object_free(tos);
+    }
+
+    qobject_unref(qiv->root);
+    if (qiv->errname) {
+        g_string_free(qiv->errname, TRUE);
+    }
+    g_free(qiv);
+}
+
+static QObjectInputVisitor *qobject_input_visitor_base_new(QObject *obj)
+{
+    QObjectInputVisitor *v = g_malloc0(sizeof(*v));
+
+    assert(obj);
+
+    v->visitor.type = VISITOR_INPUT;
+    v->visitor.start_struct = qobject_input_start_struct;
+    v->visitor.check_struct = qobject_input_check_struct;
+    v->visitor.end_struct = qobject_input_end_struct;
+    v->visitor.start_list = qobject_input_start_list;
+    v->visitor.next_list = qobject_input_next_list;
+    v->visitor.check_list = qobject_input_check_list;
+    v->visitor.end_list = qobject_input_end_list;
+    v->visitor.start_alternate = qobject_input_start_alternate;
+    v->visitor.optional = qobject_input_optional;
+    v->visitor.free = qobject_input_free;
+
+    v->root = qobject_ref(obj);
+
+    return v;
+}
+
+Visitor *qobject_input_visitor_new(QObject *obj)
+{
+    QObjectInputVisitor *v = qobject_input_visitor_base_new(obj);
+
+    v->visitor.type_int64 = qobject_input_type_int64;
+    v->visitor.type_uint64 = qobject_input_type_uint64;
+    v->visitor.type_bool = qobject_input_type_bool;
+    v->visitor.type_str = qobject_input_type_str;
+    v->visitor.type_number = qobject_input_type_number;
+    v->visitor.type_any = qobject_input_type_any;
+    v->visitor.type_null = qobject_input_type_null;
+
+    return &v->visitor;
+}
+
+Visitor *qobject_input_visitor_new_keyval(QObject *obj)
+{
+    QObjectInputVisitor *v = qobject_input_visitor_base_new(obj);
+
+    v->visitor.type_int64 = qobject_input_type_int64_keyval;
+    v->visitor.type_uint64 = qobject_input_type_uint64_keyval;
+    v->visitor.type_bool = qobject_input_type_bool_keyval;
+    v->visitor.type_str = qobject_input_type_str_keyval;
+    v->visitor.type_number = qobject_input_type_number_keyval;
+    v->visitor.type_any = qobject_input_type_any;
+    v->visitor.type_null = qobject_input_type_null;
+    v->visitor.type_size = qobject_input_type_size_keyval;
+    v->keyval = true;
+
+    return &v->visitor;
+}
+
+Visitor *qobject_input_visitor_new_str(const char *str,
+                                       const char *implied_key,
+                                       Error **errp)
+{
+    bool is_json = str[0] == '{';
+    QObject *obj;
+    QDict *args;
+    Visitor *v;
+
+    if (is_json) {
+        obj = qobject_from_json(str, errp);
+        if (!obj) {
+            return NULL;
+        }
+        args = qobject_to(QDict, obj);
+        assert(args);
+        v = qobject_input_visitor_new(QOBJECT(args));
+    } else {
+        args = keyval_parse(str, implied_key, NULL, errp);
+        if (!args) {
+            return NULL;
+        }
+        v = qobject_input_visitor_new_keyval(QOBJECT(args));
+    }
+    qobject_unref(args);
+
+    return v;
+}
diff --git a/qapi/qobject-output-visitor.c b/qapi/qobject-output-visitor.c
new file mode 100644
index 000000000..ba6f6ac8a
--- /dev/null
+++ b/qapi/qobject-output-visitor.c
@@ -0,0 +1,266 @@
+/*
+ * Core Definitions for QAPI/QMP Command Registry
+ *
+ * Copyright (C) 2012-2016 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/queue.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+
+typedef struct QStackEntry {
+    QObject *value;
+    void *qapi; /* sanity check that caller uses same pointer */
+    QSLIST_ENTRY(QStackEntry) node;
+} QStackEntry;
+
+struct QObjectOutputVisitor {
+    Visitor visitor;
+    QSLIST_HEAD(, QStackEntry) stack; /* Stack of unfinished containers */
+    QObject *root; /* Root of the output visit */
+    QObject **result; /* User's storage location for result */
+};
+
+#define qobject_output_add(qov, name, value) \
+    qobject_output_add_obj(qov, name, QOBJECT(value))
+#define qobject_output_push(qov, value, qapi) \
+    qobject_output_push_obj(qov, QOBJECT(value), qapi)
+
+static QObjectOutputVisitor *to_qov(Visitor *v)
+{
+    return container_of(v, QObjectOutputVisitor, visitor);
+}
+
+/* Push @value onto the stack of current QObjects being built */
+static void qobject_output_push_obj(QObjectOutputVisitor *qov, QObject *value,
+                                    void *qapi)
+{
+    QStackEntry *e = g_malloc0(sizeof(*e));
+
+    assert(qov->root);
+    assert(value);
+    e->value = value;
+    e->qapi = qapi;
+    QSLIST_INSERT_HEAD(&qov->stack, e, node);
+}
+
+/* Pop a value off the stack of QObjects being built, and return it. */
+static QObject *qobject_output_pop(QObjectOutputVisitor *qov, void *qapi)
+{
+    QStackEntry *e = QSLIST_FIRST(&qov->stack);
+    QObject *value;
+
+    assert(e);
+    assert(e->qapi == qapi);
+    QSLIST_REMOVE_HEAD(&qov->stack, node);
+    value = e->value;
+    assert(value);
+    g_free(e);
+    return value;
+}
+
+/* Add @value to the current QObject being built.
+ * If the stack is visiting a dictionary or list, @value is now owned
+ * by that container. Otherwise, @value is now the root.  */
+static void qobject_output_add_obj(QObjectOutputVisitor *qov, const char *name,
+                                   QObject *value)
+{
+    QStackEntry *e = QSLIST_FIRST(&qov->stack);
+    QObject *cur = e ? e->value : NULL;
+
+    if (!cur) {
+        /* Don't allow reuse of visitor on more than one root */
+        assert(!qov->root);
+        qov->root = value;
+    } else {
+        switch (qobject_type(cur)) {
+        case QTYPE_QDICT:
+            assert(name);
+            qdict_put_obj(qobject_to(QDict, cur), name, value);
+            break;
+        case QTYPE_QLIST:
+            assert(!name);
+            qlist_append_obj(qobject_to(QList, cur), value);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static bool qobject_output_start_struct(Visitor *v, const char *name,
+                                        void **obj, size_t unused, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QDict *dict = qdict_new();
+
+    qobject_output_add(qov, name, dict);
+    qobject_output_push(qov, dict, obj);
+    return true;
+}
+
+static void qobject_output_end_struct(Visitor *v, void **obj)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QObject *value = qobject_output_pop(qov, obj);
+    assert(qobject_type(value) == QTYPE_QDICT);
+}
+
+static bool qobject_output_start_list(Visitor *v, const char *name,
+                                      GenericList **listp, size_t size,
+                                      Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QList *list = qlist_new();
+
+    qobject_output_add(qov, name, list);
+    qobject_output_push(qov, list, listp);
+    return true;
+}
+
+static GenericList *qobject_output_next_list(Visitor *v, GenericList *tail,
+                                             size_t size)
+{
+    return tail->next;
+}
+
+static void qobject_output_end_list(Visitor *v, void **obj)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QObject *value = qobject_output_pop(qov, obj);
+    assert(qobject_type(value) == QTYPE_QLIST);
+}
+
+static bool qobject_output_type_int64(Visitor *v, const char *name,
+                                      int64_t *obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qnum_from_int(*obj));
+    return true;
+}
+
+static bool qobject_output_type_uint64(Visitor *v, const char *name,
+                                       uint64_t *obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qnum_from_uint(*obj));
+    return true;
+}
+
+static bool qobject_output_type_bool(Visitor *v, const char *name, bool *obj,
+                                     Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qbool_from_bool(*obj));
+    return true;
+}
+
+static bool qobject_output_type_str(Visitor *v, const char *name, char **obj,
+                                    Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    if (*obj) {
+        qobject_output_add(qov, name, qstring_from_str(*obj));
+    } else {
+        qobject_output_add(qov, name, qstring_from_str(""));
+    }
+    return true;
+}
+
+static bool qobject_output_type_number(Visitor *v, const char *name,
+                                       double *obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qnum_from_double(*obj));
+    return true;
+}
+
+static bool qobject_output_type_any(Visitor *v, const char *name,
+                                    QObject **obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+
+    qobject_output_add_obj(qov, name, qobject_ref(*obj));
+    return true;
+}
+
+static bool qobject_output_type_null(Visitor *v, const char *name,
+                                     QNull **obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qnull());
+    return true;
+}
+
+/* Finish building, and return the root object.
+ * The root object is never null. The caller becomes the object's
+ * owner, and should use qobject_unref() when done with it.  */
+static void qobject_output_complete(Visitor *v, void *opaque)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+
+    /* A visit must have occurred, with each start paired with end.  */
+    assert(qov->root && QSLIST_EMPTY(&qov->stack));
+    assert(opaque == qov->result);
+
+    *qov->result = qobject_ref(qov->root);
+    qov->result = NULL;
+}
+
+static void qobject_output_free(Visitor *v)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QStackEntry *e;
+
+    while (!QSLIST_EMPTY(&qov->stack)) {
+        e = QSLIST_FIRST(&qov->stack);
+        QSLIST_REMOVE_HEAD(&qov->stack, node);
+        g_free(e);
+    }
+
+    qobject_unref(qov->root);
+    g_free(qov);
+}
+
+Visitor *qobject_output_visitor_new(QObject **result)
+{
+    QObjectOutputVisitor *v;
+
+    v = g_malloc0(sizeof(*v));
+
+    v->visitor.type = VISITOR_OUTPUT;
+    v->visitor.start_struct = qobject_output_start_struct;
+    v->visitor.end_struct = qobject_output_end_struct;
+    v->visitor.start_list = qobject_output_start_list;
+    v->visitor.next_list = qobject_output_next_list;
+    v->visitor.end_list = qobject_output_end_list;
+    v->visitor.type_int64 = qobject_output_type_int64;
+    v->visitor.type_uint64 = qobject_output_type_uint64;
+    v->visitor.type_bool = qobject_output_type_bool;
+    v->visitor.type_str = qobject_output_type_str;
+    v->visitor.type_number = qobject_output_type_number;
+    v->visitor.type_any = qobject_output_type_any;
+    v->visitor.type_null = qobject_output_type_null;
+    v->visitor.complete = qobject_output_complete;
+    v->visitor.free = qobject_output_free;
+
+    *result = NULL;
+    v->result = result;
+
+    return &v->visitor;
+}
diff --git a/qapi/qom.json b/qapi/qom.json
new file mode 100644
index 000000000..0b0b92944
--- /dev/null
+++ b/qapi/qom.json
@@ -0,0 +1,256 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = QEMU Object Model (QOM)
+##
+
+##
+# @ObjectPropertyInfo:
+#
+# @name: the name of the property
+#
+# @type: the type of the property.  This will typically come in one of four
+#        forms:
+#
+#        1) A primitive type such as 'u8', 'u16', 'bool', 'str', or 'double'.
+#           These types are mapped to the appropriate JSON type.
+#
+#        2) A child type in the form 'child' where subtype is a qdev
+#           device type name.  Child properties create the composition tree.
+#
+#        3) A link type in the form 'link' where subtype is a qdev
+#           device type name.  Link properties form the device model graph.
+#
+# @description: if specified, the description of the property.
+#
+# @default-value: the default value, if any (since 5.0)
+#
+# Since: 1.2
+##
+{ 'struct': 'ObjectPropertyInfo',
+  'data': { 'name': 'str',
+            'type': 'str',
+            '*description': 'str',
+            '*default-value': 'any' } }
+
+##
+# @qom-list:
+#
+# This command will list any properties of a object given a path in the object
+# model.
+#
+# @path: the path within the object model.  See @qom-get for a description of
+#        this parameter.
+#
+# Returns: a list of @ObjectPropertyInfo that describe the properties of the
+#          object.
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "qom-list",
+#      "arguments": { "path": "/chardevs" } }
+# <- { "return": [ { "name": "type", "type": "string" },
+#                  { "name": "parallel0", "type": "child" },
+#                  { "name": "serial0", "type": "child" },
+#                  { "name": "mon0", "type": "child" } ] }
+#
+##
+{ 'command': 'qom-list',
+  'data': { 'path': 'str' },
+  'returns': [ 'ObjectPropertyInfo' ],
+  'allow-preconfig': true }
+
+##
+# @qom-get:
+#
+# This command will get a property from a object model path and return the
+# value.
+#
+# @path: The path within the object model.  There are two forms of supported
+#        paths--absolute and partial paths.
+#
+#        Absolute paths are derived from the root object and can follow child<>
+#        or link<> properties.  Since they can follow link<> properties, they
+#        can be arbitrarily long.  Absolute paths look like absolute filenames
+#        and are prefixed  with a leading slash.
+#
+#        Partial paths look like relative filenames.  They do not begin
+#        with a prefix.  The matching rules for partial paths are subtle but
+#        designed to make specifying objects easy.  At each level of the
+#        composition tree, the partial path is matched as an absolute path.
+#        The first match is not returned.  At least two matches are searched
+#        for.  A successful result is only returned if only one match is
+#        found.  If more than one match is found, a flag is return to
+#        indicate that the match was ambiguous.
+#
+# @property: The property name to read
+#
+# Returns: The property value.  The type depends on the property
+#          type. child<> and link<> properties are returned as #str
+#          pathnames.  All integer property types (u8, u16, etc) are
+#          returned as #int.
+#
+# Since: 1.2
+#
+# Example:
+#
+# 1. Use absolute path
+#
+# -> { "execute": "qom-get",
+#      "arguments": { "path": "/machine/unattached/device[0]",
+#                     "property": "hotplugged" } }
+# <- { "return": false }
+#
+# 2. Use partial path
+#
+# -> { "execute": "qom-get",
+#      "arguments": { "path": "unattached/sysbus",
+#                     "property": "type" } }
+# <- { "return": "System" }
+#
+##
+{ 'command': 'qom-get',
+  'data': { 'path': 'str', 'property': 'str' },
+  'returns': 'any',
+  'allow-preconfig': true }
+
+##
+# @qom-set:
+#
+# This command will set a property from a object model path.
+#
+# @path: see @qom-get for a description of this parameter
+#
+# @property: the property name to set
+#
+# @value: a value who's type is appropriate for the property type.  See @qom-get
+#         for a description of type mapping.
+#
+# Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "qom-set",
+#      "arguments": { "path": "/machine",
+#                     "property": "graphics",
+#                     "value": false } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'qom-set',
+  'data': { 'path': 'str', 'property': 'str', 'value': 'any' },
+  'allow-preconfig': true }
+
+##
+# @ObjectTypeInfo:
+#
+# This structure describes a search result from @qom-list-types
+#
+# @name: the type name found in the search
+#
+# @abstract: the type is abstract and can't be directly instantiated.
+#            Omitted if false. (since 2.10)
+#
+# @parent: Name of parent type, if any (since 2.10)
+#
+# Since: 1.1
+##
+{ 'struct': 'ObjectTypeInfo',
+  'data': { 'name': 'str', '*abstract': 'bool', '*parent': 'str' } }
+
+##
+# @qom-list-types:
+#
+# This command will return a list of types given search parameters
+#
+# @implements: if specified, only return types that implement this type name
+#
+# @abstract: if true, include abstract types in the results
+#
+# Returns: a list of @ObjectTypeInfo or an empty list if no results are found
+#
+# Since: 1.1
+##
+{ 'command': 'qom-list-types',
+  'data': { '*implements': 'str', '*abstract': 'bool' },
+  'returns': [ 'ObjectTypeInfo' ],
+  'allow-preconfig': true }
+
+##
+# @qom-list-properties:
+#
+# List properties associated with a QOM object.
+#
+# @typename: the type name of an object
+#
+# Note: objects can create properties at runtime, for example to describe
+#       links between different devices and/or objects. These properties
+#       are not included in the output of this command.
+#
+# Returns: a list of ObjectPropertyInfo describing object properties
+#
+# Since: 2.12
+##
+{ 'command': 'qom-list-properties',
+  'data': { 'typename': 'str'},
+  'returns': [ 'ObjectPropertyInfo' ],
+  'allow-preconfig': true }
+
+##
+# @object-add:
+#
+# Create a QOM object.
+#
+# @qom-type: the class name for the object to be created
+#
+# @id: the name of the new object
+#
+# @props: a dictionary of properties to be passed to the backend. Deprecated
+#         since 5.0, specify the properties on the top level instead. It is an
+#         error to specify the same option both on the top level and in @props.
+#
+# Additional arguments depend on qom-type and are passed to the backend
+# unchanged.
+#
+# Returns: Nothing on success
+#          Error if @qom-type is not a valid class name
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "object-add",
+#      "arguments": { "qom-type": "rng-random", "id": "rng1",
+#                     "filename": "/dev/hwrng" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'object-add',
+  'data': {'qom-type': 'str', 'id': 'str', '*props': 'any'},
+  'gen': false } # so we can get the additional arguments
+
+##
+# @object-del:
+#
+# Remove a QOM object.
+#
+# @id: the name of the QOM object to remove
+#
+# Returns: Nothing on success
+#          Error if @id is not a valid id for a QOM object
+#
+# Since: 2.0
+#
+# Example:
+#
+# -> { "execute": "object-del", "arguments": { "id": "rng1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'object-del', 'data': {'id': 'str'} }
diff --git a/qapi/rdma.json b/qapi/rdma.json
new file mode 100644
index 000000000..a1d2175a8
--- /dev/null
+++ b/qapi/rdma.json
@@ -0,0 +1,39 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = RDMA device
+##
+
+##
+# @RDMA_GID_STATUS_CHANGED:
+#
+# Emitted when guest driver adds/deletes GID to/from device
+#
+# @netdev: RoCE Network Device name
+#
+# @gid-status: Add or delete indication
+#
+# @subnet-prefix: Subnet Prefix
+#
+# @interface-id : Interface ID
+#
+# Since: 4.0
+#
+# Example:
+#
+# <- {"timestamp": {"seconds": 1541579657, "microseconds": 986760},
+#     "event": "RDMA_GID_STATUS_CHANGED",
+#     "data":
+#         {"netdev": "bridge0",
+#         "interface-id": 15880512517475447892,
+#         "gid-status": true,
+#         "subnet-prefix": 33022}}
+#
+##
+{ 'event': 'RDMA_GID_STATUS_CHANGED',
+  'data': { 'netdev'        : 'str',
+            'gid-status'    : 'bool',
+            'subnet-prefix' : 'uint64',
+            'interface-id'  : 'uint64' } }
diff --git a/qapi/replay.json b/qapi/replay.json
new file mode 100644
index 000000000..bfd83d759
--- /dev/null
+++ b/qapi/replay.json
@@ -0,0 +1,121 @@
+# -*- Mode: Python -*-
+#
+
+##
+# = Record/replay
+##
+
+{ 'include': 'common.json' }
+
+##
+# @ReplayMode:
+#
+# Mode of the replay subsystem.
+#
+# @none: normal execution mode. Replay or record are not enabled.
+#
+# @record: record mode. All non-deterministic data is written into the
+#          replay log.
+#
+# @play: replay mode. Non-deterministic data required for system execution
+#        is read from the log.
+#
+# Since: 2.5
+##
+{ 'enum': 'ReplayMode',
+  'data': [ 'none', 'record', 'play' ] }
+
+##
+# @ReplayInfo:
+#
+# Record/replay information.
+#
+# @mode: current mode.
+#
+# @filename: name of the record/replay log file.
+#            It is present only in record or replay modes, when the log
+#            is recorded or replayed.
+#
+# @icount: current number of executed instructions.
+#
+# Since: 5.2
+#
+##
+{ 'struct': 'ReplayInfo',
+  'data': { 'mode': 'ReplayMode', '*filename': 'str', 'icount': 'int' } }
+
+##
+# @query-replay:
+#
+# Retrieve the record/replay information.
+# It includes current instruction count which may be used for
+# @replay-break and @replay-seek commands.
+#
+# Returns: record/replay information.
+#
+# Since: 5.2
+#
+# Example:
+#
+# -> { "execute": "query-replay" }
+# <- { "return": { "mode": "play", "filename": "log.rr", "icount": 220414 } }
+#
+##
+{ 'command': 'query-replay',
+  'returns': 'ReplayInfo' }
+
+##
+# @replay-break:
+#
+# Set replay breakpoint at instruction count @icount.
+# Execution stops when the specified instruction is reached.
+# There can be at most one breakpoint. When breakpoint is set, any prior
+# one is removed.  The breakpoint may be set only in replay mode and only
+# "in the future", i.e. at instruction counts greater than the current one.
+# The current instruction count can be observed with @query-replay.
+#
+# @icount: instruction count to stop at
+#
+# Since: 5.2
+#
+# Example:
+#
+# -> { "execute": "replay-break", "data": { "icount": 220414 } }
+#
+##
+{ 'command': 'replay-break', 'data': { 'icount': 'int' } }
+
+##
+# @replay-delete-break:
+#
+# Remove replay breakpoint which was set with @replay-break.
+# The command is ignored when there are no replay breakpoints.
+#
+# Since: 5.2
+#
+# Example:
+#
+# -> { "execute": "replay-delete-break" }
+#
+##
+{ 'command': 'replay-delete-break' }
+
+##
+# @replay-seek:
+#
+# Automatically proceed to the instruction count @icount, when
+# replaying the execution. The command automatically loads nearest
+# snapshot and replays the execution to find the desired instruction.
+# When there is no preceding snapshot or the execution is not replayed,
+# then the command fails.
+# icount for the reference may be obtained with @query-replay command.
+#
+# @icount: target instruction count
+#
+# Since: 5.2
+#
+# Example:
+#
+# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
+##
+{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
diff --git a/qapi/rocker.json b/qapi/rocker.json
new file mode 100644
index 000000000..b48e49a89
--- /dev/null
+++ b/qapi/rocker.json
@@ -0,0 +1,342 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = Rocker switch device
+##
+
+##
+# @RockerSwitch:
+#
+# Rocker switch information.
+#
+# @name: switch name
+#
+# @id: switch ID
+#
+# @ports: number of front-panel ports
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerSwitch',
+  'data': { 'name': 'str', 'id': 'uint64', 'ports': 'uint32' } }
+
+##
+# @query-rocker:
+#
+# Return rocker switch information.
+#
+# Returns: @Rocker information
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "query-rocker", "arguments": { "name": "sw1" } }
+# <- { "return": {"name": "sw1", "ports": 2, "id": 1327446905938}}
+#
+##
+{ 'command': 'query-rocker',
+  'data': { 'name': 'str' },
+  'returns': 'RockerSwitch' }
+
+##
+# @RockerPortDuplex:
+#
+# An eumeration of port duplex states.
+#
+# @half: half duplex
+#
+# @full: full duplex
+#
+# Since: 2.4
+##
+{ 'enum': 'RockerPortDuplex', 'data': [ 'half', 'full' ] }
+
+##
+# @RockerPortAutoneg:
+#
+# An eumeration of port autoneg states.
+#
+# @off: autoneg is off
+#
+# @on: autoneg is on
+#
+# Since: 2.4
+##
+{ 'enum': 'RockerPortAutoneg', 'data': [ 'off', 'on' ] }
+
+##
+# @RockerPort:
+#
+# Rocker switch port information.
+#
+# @name: port name
+#
+# @enabled: port is enabled for I/O
+#
+# @link-up: physical link is UP on port
+#
+# @speed: port link speed in Mbps
+#
+# @duplex: port link duplex
+#
+# @autoneg: port link autoneg
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerPort',
+  'data': { 'name': 'str', 'enabled': 'bool', 'link-up': 'bool',
+            'speed': 'uint32', 'duplex': 'RockerPortDuplex',
+            'autoneg': 'RockerPortAutoneg' } }
+
+##
+# @query-rocker-ports:
+#
+# Return rocker switch port information.
+#
+# Returns: a list of @RockerPort information
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "query-rocker-ports", "arguments": { "name": "sw1" } }
+# <- { "return": [ {"duplex": "full", "enabled": true, "name": "sw1.1",
+#                   "autoneg": "off", "link-up": true, "speed": 10000},
+#                  {"duplex": "full", "enabled": true, "name": "sw1.2",
+#                   "autoneg": "off", "link-up": true, "speed": 10000}
+#    ]}
+#
+##
+{ 'command': 'query-rocker-ports',
+  'data': { 'name': 'str' },
+  'returns': ['RockerPort'] }
+
+##
+# @RockerOfDpaFlowKey:
+#
+# Rocker switch OF-DPA flow key
+#
+# @priority: key priority, 0 being lowest priority
+#
+# @tbl-id: flow table ID
+#
+# @in-pport: physical input port
+#
+# @tunnel-id: tunnel ID
+#
+# @vlan-id: VLAN ID
+#
+# @eth-type: Ethernet header type
+#
+# @eth-src: Ethernet header source MAC address
+#
+# @eth-dst: Ethernet header destination MAC address
+#
+# @ip-proto: IP Header protocol field
+#
+# @ip-tos: IP header TOS field
+#
+# @ip-dst: IP header destination address
+#
+# Note: optional members may or may not appear in the flow key
+#       depending if they're relevant to the flow key.
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerOfDpaFlowKey',
+  'data' : { 'priority': 'uint32', 'tbl-id': 'uint32', '*in-pport': 'uint32',
+             '*tunnel-id': 'uint32', '*vlan-id': 'uint16',
+             '*eth-type': 'uint16', '*eth-src': 'str', '*eth-dst': 'str',
+             '*ip-proto': 'uint8', '*ip-tos': 'uint8', '*ip-dst': 'str' } }
+
+##
+# @RockerOfDpaFlowMask:
+#
+# Rocker switch OF-DPA flow mask
+#
+# @in-pport: physical input port
+#
+# @tunnel-id: tunnel ID
+#
+# @vlan-id: VLAN ID
+#
+# @eth-src: Ethernet header source MAC address
+#
+# @eth-dst: Ethernet header destination MAC address
+#
+# @ip-proto: IP Header protocol field
+#
+# @ip-tos: IP header TOS field
+#
+# Note: optional members may or may not appear in the flow mask
+#       depending if they're relevant to the flow mask.
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerOfDpaFlowMask',
+  'data' : { '*in-pport': 'uint32', '*tunnel-id': 'uint32',
+             '*vlan-id': 'uint16', '*eth-src': 'str', '*eth-dst': 'str',
+             '*ip-proto': 'uint8', '*ip-tos': 'uint8' } }
+
+##
+# @RockerOfDpaFlowAction:
+#
+# Rocker switch OF-DPA flow action
+#
+# @goto-tbl: next table ID
+#
+# @group-id: group ID
+#
+# @tunnel-lport: tunnel logical port ID
+#
+# @vlan-id: VLAN ID
+#
+# @new-vlan-id: new VLAN ID
+#
+# @out-pport: physical output port
+#
+# Note: optional members may or may not appear in the flow action
+#       depending if they're relevant to the flow action.
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerOfDpaFlowAction',
+  'data' : { '*goto-tbl': 'uint32', '*group-id': 'uint32',
+             '*tunnel-lport': 'uint32', '*vlan-id': 'uint16',
+             '*new-vlan-id': 'uint16', '*out-pport': 'uint32' } }
+
+##
+# @RockerOfDpaFlow:
+#
+# Rocker switch OF-DPA flow
+#
+# @cookie: flow unique cookie ID
+#
+# @hits: count of matches (hits) on flow
+#
+# @key: flow key
+#
+# @mask: flow mask
+#
+# @action: flow action
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerOfDpaFlow',
+  'data': { 'cookie': 'uint64', 'hits': 'uint64', 'key': 'RockerOfDpaFlowKey',
+            'mask': 'RockerOfDpaFlowMask', 'action': 'RockerOfDpaFlowAction' } }
+
+##
+# @query-rocker-of-dpa-flows:
+#
+# Return rocker OF-DPA flow information.
+#
+# @name: switch name
+#
+# @tbl-id: flow table ID.  If tbl-id is not specified, returns
+#          flow information for all tables.
+#
+# Returns: rocker OF-DPA flow information
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "query-rocker-of-dpa-flows",
+#      "arguments": { "name": "sw1" } }
+# <- { "return": [ {"key": {"in-pport": 0, "priority": 1, "tbl-id": 0},
+#                   "hits": 138,
+#                   "cookie": 0,
+#                   "action": {"goto-tbl": 10},
+#                   "mask": {"in-pport": 4294901760}
+#                  },
+#                  {...more...},
+#    ]}
+#
+##
+{ 'command': 'query-rocker-of-dpa-flows',
+  'data': { 'name': 'str', '*tbl-id': 'uint32' },
+  'returns': ['RockerOfDpaFlow'] }
+
+##
+# @RockerOfDpaGroup:
+#
+# Rocker switch OF-DPA group
+#
+# @id: group unique ID
+#
+# @type: group type
+#
+# @vlan-id: VLAN ID
+#
+# @pport: physical port number
+#
+# @index: group index, unique with group type
+#
+# @out-pport: output physical port number
+#
+# @group-id: next group ID
+#
+# @set-vlan-id: VLAN ID to set
+#
+# @pop-vlan: pop VLAN headr from packet
+#
+# @group-ids: list of next group IDs
+#
+# @set-eth-src: set source MAC address in Ethernet header
+#
+# @set-eth-dst: set destination MAC address in Ethernet header
+#
+# @ttl-check: perform TTL check
+#
+# Note: optional members may or may not appear in the group depending
+#       if they're relevant to the group type.
+#
+# Since: 2.4
+##
+{ 'struct': 'RockerOfDpaGroup',
+  'data': { 'id': 'uint32',  'type': 'uint8', '*vlan-id': 'uint16',
+            '*pport': 'uint32', '*index': 'uint32', '*out-pport': 'uint32',
+            '*group-id': 'uint32', '*set-vlan-id': 'uint16',
+            '*pop-vlan': 'uint8', '*group-ids': ['uint32'],
+            '*set-eth-src': 'str', '*set-eth-dst': 'str',
+            '*ttl-check': 'uint8' } }
+
+##
+# @query-rocker-of-dpa-groups:
+#
+# Return rocker OF-DPA group information.
+#
+# @name: switch name
+#
+# @type: group type.  If type is not specified, returns
+#        group information for all group types.
+#
+# Returns: rocker OF-DPA group information
+#
+# Since: 2.4
+#
+# Example:
+#
+# -> { "execute": "query-rocker-of-dpa-groups",
+#      "arguments": { "name": "sw1" } }
+# <- { "return": [ {"type": 0, "out-pport": 2,
+#                   "pport": 2, "vlan-id": 3841,
+#                   "pop-vlan": 1, "id": 251723778},
+#                  {"type": 0, "out-pport": 0,
+#                   "pport": 0, "vlan-id": 3841,
+#                   "pop-vlan": 1, "id": 251723776},
+#                  {"type": 0, "out-pport": 1,
+#                   "pport": 1, "vlan-id": 3840,
+#                   "pop-vlan": 1, "id": 251658241},
+#                  {"type": 0, "out-pport": 0,
+#                   "pport": 0, "vlan-id": 3840,
+#                   "pop-vlan": 1, "id": 251658240}
+#    ]}
+#
+##
+{ 'command': 'query-rocker-of-dpa-groups',
+  'data': { 'name': 'str', '*type': 'uint8' },
+  'returns': ['RockerOfDpaGroup'] }
diff --git a/qapi/run-state.json b/qapi/run-state.json
new file mode 100644
index 000000000..964c8ef39
--- /dev/null
+++ b/qapi/run-state.json
@@ -0,0 +1,567 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = VM run state
+##
+
+##
+# @RunState:
+#
+# An enumeration of VM run states.
+#
+# @debug: QEMU is running on a debugger
+#
+# @finish-migrate: guest is paused to finish the migration process
+#
+# @inmigrate: guest is paused waiting for an incoming migration.  Note
+#             that this state does not tell whether the machine will start at the
+#             end of the migration.  This depends on the command-line -S option and
+#             any invocation of 'stop' or 'cont' that has happened since QEMU was
+#             started.
+#
+# @internal-error: An internal error that prevents further guest execution
+#                  has occurred
+#
+# @io-error: the last IOP has failed and the device is configured to pause
+#            on I/O errors
+#
+# @paused: guest has been paused via the 'stop' command
+#
+# @postmigrate: guest is paused following a successful 'migrate'
+#
+# @prelaunch: QEMU was started with -S and guest has not started
+#
+# @restore-vm: guest is paused to restore VM state
+#
+# @running: guest is actively running
+#
+# @save-vm: guest is paused to save the VM state
+#
+# @shutdown: guest is shut down (and -no-shutdown is in use)
+#
+# @suspended: guest is suspended (ACPI S3)
+#
+# @watchdog: the watchdog action is configured to pause and has been triggered
+#
+# @guest-panicked: guest has been panicked as a result of guest OS panic
+#
+# @colo: guest is paused to save/restore VM state under colo checkpoint,
+#        VM can not get into this state unless colo capability is enabled
+#        for migration. (since 2.8)
+# @preconfig: QEMU is paused before board specific init callback is executed.
+#             The state is reachable only if the --preconfig CLI option is used.
+#             (Since 3.0)
+##
+{ 'enum': 'RunState',
+  'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
+            'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
+            'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
+            'guest-panicked', 'colo', 'preconfig' ] }
+
+##
+# @ShutdownCause:
+#
+# An enumeration of reasons for a Shutdown.
+#
+# @none: No shutdown request pending
+#
+# @host-error: An error prevents further use of guest
+#
+# @host-qmp-quit: Reaction to the QMP command 'quit'
+#
+# @host-qmp-system-reset: Reaction to the QMP command 'system_reset'
+#
+# @host-signal: Reaction to a signal, such as SIGINT
+#
+# @host-ui: Reaction to a UI event, like window close
+#
+# @guest-shutdown: Guest shutdown/suspend request, via ACPI or other
+#                  hardware-specific means
+#
+# @guest-reset: Guest reset request, and command line turns that into
+#               a shutdown
+#
+# @guest-panic: Guest panicked, and command line turns that into a shutdown
+#
+# @subsystem-reset: Partial guest reset that does not trigger QMP events and
+#                   ignores --no-reboot. This is useful for sanitizing
+#                   hypercalls on s390 that are used during kexec/kdump/boot
+#
+##
+{ 'enum': 'ShutdownCause',
+  # Beware, shutdown_caused_by_guest() depends on enumeration order
+  'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset',
+            'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset',
+            'guest-panic', 'subsystem-reset'] }
+
+##
+# @StatusInfo:
+#
+# Information about VCPU run state
+#
+# @running: true if all VCPUs are runnable, false if not runnable
+#
+# @singlestep: true if VCPUs are in single-step mode
+#
+# @status: the virtual machine @RunState
+#
+# Since:  0.14.0
+#
+# Notes: @singlestep is enabled through the GDB stub
+##
+{ 'struct': 'StatusInfo',
+  'data': {'running': 'bool', 'singlestep': 'bool', 'status': 'RunState'} }
+
+##
+# @query-status:
+#
+# Query the run status of all VCPUs
+#
+# Returns: @StatusInfo reflecting all VCPUs
+#
+# Since:  0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-status" }
+# <- { "return": { "running": true,
+#                  "singlestep": false,
+#                  "status": "running" } }
+#
+##
+{ 'command': 'query-status', 'returns': 'StatusInfo',
+  'allow-preconfig': true }
+
+##
+# @SHUTDOWN:
+#
+# Emitted when the virtual machine has shut down, indicating that qemu is
+# about to exit.
+#
+# @guest: If true, the shutdown was triggered by a guest request (such as
+#         a guest-initiated ACPI shutdown request or other hardware-specific action)
+#         rather than a host request (such as sending qemu a SIGINT). (since 2.10)
+#
+# @reason: The @ShutdownCause which resulted in the SHUTDOWN. (since 4.0)
+#
+# Note: If the command-line option "-no-shutdown" has been specified, qemu will
+#       not exit, and a STOP event will eventually follow the SHUTDOWN event
+#
+# Since: 0.12.0
+#
+# Example:
+#
+# <- { "event": "SHUTDOWN", "data": { "guest": true },
+#      "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+#
+##
+{ 'event': 'SHUTDOWN', 'data': { 'guest': 'bool', 'reason': 'ShutdownCause' } }
+
+##
+# @POWERDOWN:
+#
+# Emitted when the virtual machine is powered down through the power control
+# system, such as via ACPI.
+#
+# Since: 0.12.0
+#
+# Example:
+#
+# <- { "event": "POWERDOWN",
+#      "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+#
+##
+{ 'event': 'POWERDOWN' }
+
+##
+# @RESET:
+#
+# Emitted when the virtual machine is reset
+#
+# @guest: If true, the reset was triggered by a guest request (such as
+#         a guest-initiated ACPI reboot request or other hardware-specific action)
+#         rather than a host request (such as the QMP command system_reset).
+#         (since 2.10)
+#
+# @reason: The @ShutdownCause of the RESET. (since 4.0)
+#
+# Since: 0.12.0
+#
+# Example:
+#
+# <- { "event": "RESET", "data": { "guest": false },
+#      "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
+#
+##
+{ 'event': 'RESET', 'data': { 'guest': 'bool', 'reason': 'ShutdownCause' } }
+
+##
+# @STOP:
+#
+# Emitted when the virtual machine is stopped
+#
+# Since: 0.12.0
+#
+# Example:
+#
+# <- { "event": "STOP",
+#      "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
+#
+##
+{ 'event': 'STOP' }
+
+##
+# @RESUME:
+#
+# Emitted when the virtual machine resumes execution
+#
+# Since: 0.12.0
+#
+# Example:
+#
+# <- { "event": "RESUME",
+#      "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
+#
+##
+{ 'event': 'RESUME' }
+
+##
+# @SUSPEND:
+#
+# Emitted when guest enters a hardware suspension state, for example, S3 state,
+# which is sometimes called standby state
+#
+# Since: 1.1
+#
+# Example:
+#
+# <- { "event": "SUSPEND",
+#      "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+#
+##
+{ 'event': 'SUSPEND' }
+
+##
+# @SUSPEND_DISK:
+#
+# Emitted when guest enters a hardware suspension state with data saved on
+# disk, for example, S4 state, which is sometimes called hibernate state
+#
+# Note: QEMU shuts down (similar to event @SHUTDOWN) when entering this state
+#
+# Since: 1.2
+#
+# Example:
+#
+# <-   { "event": "SUSPEND_DISK",
+#        "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+#
+##
+{ 'event': 'SUSPEND_DISK' }
+
+##
+# @WAKEUP:
+#
+# Emitted when the guest has woken up from suspend state and is running
+#
+# Since: 1.1
+#
+# Example:
+#
+# <- { "event": "WAKEUP",
+#      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+#
+##
+{ 'event': 'WAKEUP' }
+
+##
+# @WATCHDOG:
+#
+# Emitted when the watchdog device's timer is expired
+#
+# @action: action that has been taken
+#
+# Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
+#       followed respectively by the RESET, SHUTDOWN, or STOP events
+#
+# Note: This event is rate-limited.
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <- { "event": "WATCHDOG",
+#      "data": { "action": "reset" },
+#      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+#
+##
+{ 'event': 'WATCHDOG',
+  'data': { 'action': 'WatchdogAction' } }
+
+##
+# @WatchdogAction:
+#
+# An enumeration of the actions taken when the watchdog device's timer is
+# expired
+#
+# @reset: system resets
+#
+# @shutdown: system shutdown, note that it is similar to @powerdown, which
+#            tries to set to system status and notify guest
+#
+# @poweroff: system poweroff, the emulator program exits
+#
+# @pause: system pauses, similar to @stop
+#
+# @debug: system enters debug state
+#
+# @none: nothing is done
+#
+# @inject-nmi: a non-maskable interrupt is injected into the first VCPU (all
+#              VCPUS on x86) (since 2.4)
+#
+# Since: 2.1
+##
+{ 'enum': 'WatchdogAction',
+  'data': [ 'reset', 'shutdown', 'poweroff', 'pause', 'debug', 'none',
+            'inject-nmi' ] }
+
+##
+# @watchdog-set-action:
+#
+# Set watchdog action
+#
+# Since: 2.11
+##
+{ 'command': 'watchdog-set-action', 'data' : {'action': 'WatchdogAction'} }
+
+##
+# @GUEST_PANICKED:
+#
+# Emitted when guest OS panic is detected
+#
+# @action: action that has been taken, currently always "pause"
+#
+# @info: information about a panic (since 2.9)
+#
+# Since: 1.5
+#
+# Example:
+#
+# <- { "event": "GUEST_PANICKED",
+#      "data": { "action": "pause" } }
+#
+##
+{ 'event': 'GUEST_PANICKED',
+  'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } }
+
+##
+# @GUEST_CRASHLOADED:
+#
+# Emitted when guest OS crash loaded is detected
+#
+# @action: action that has been taken, currently always "run"
+#
+# @info: information about a panic
+#
+# Since: 5.0
+#
+# Example:
+#
+# <- { "event": "GUEST_CRASHLOADED",
+#      "data": { "action": "run" } }
+#
+##
+{ 'event': 'GUEST_CRASHLOADED',
+  'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } }
+
+##
+# @GuestPanicAction:
+#
+# An enumeration of the actions taken when guest OS panic is detected
+#
+# @pause: system pauses
+#
+# Since: 2.1 (poweroff since 2.8, run since 5.0)
+##
+{ 'enum': 'GuestPanicAction',
+  'data': [ 'pause', 'poweroff', 'run' ] }
+
+##
+# @GuestPanicInformationType:
+#
+# An enumeration of the guest panic information types
+#
+# @hyper-v: hyper-v guest panic information type
+#
+# @s390: s390 guest panic information type (Since: 2.12)
+#
+# Since: 2.9
+##
+{ 'enum': 'GuestPanicInformationType',
+  'data': [ 'hyper-v', 's390' ] }
+
+##
+# @GuestPanicInformation:
+#
+# Information about a guest panic
+#
+# @type: Crash type that defines the hypervisor specific information
+#
+# Since: 2.9
+##
+{'union': 'GuestPanicInformation',
+ 'base': {'type': 'GuestPanicInformationType'},
+ 'discriminator': 'type',
+ 'data': { 'hyper-v': 'GuestPanicInformationHyperV',
+           's390': 'GuestPanicInformationS390' } }
+
+##
+# @GuestPanicInformationHyperV:
+#
+# Hyper-V specific guest panic information (HV crash MSRs)
+#
+# Since: 2.9
+##
+{'struct': 'GuestPanicInformationHyperV',
+ 'data': { 'arg1': 'uint64',
+           'arg2': 'uint64',
+           'arg3': 'uint64',
+           'arg4': 'uint64',
+           'arg5': 'uint64' } }
+
+##
+# @S390CrashReason:
+#
+# Reason why the CPU is in a crashed state.
+#
+# @unknown: no crash reason was set
+#
+# @disabled-wait: the CPU has entered a disabled wait state
+#
+# @extint-loop: clock comparator or cpu timer interrupt with new PSW enabled
+#               for external interrupts
+#
+# @pgmint-loop: program interrupt with BAD new PSW
+#
+# @opint-loop: operation exception interrupt with invalid code at the program
+#              interrupt new PSW
+#
+# Since: 2.12
+##
+{ 'enum': 'S390CrashReason',
+  'data': [ 'unknown',
+            'disabled-wait',
+            'extint-loop',
+            'pgmint-loop',
+            'opint-loop' ] }
+
+##
+# @GuestPanicInformationS390:
+#
+# S390 specific guest panic information (PSW)
+#
+# @core: core id of the CPU that crashed
+# @psw-mask: control fields of guest PSW
+# @psw-addr: guest instruction address
+# @reason: guest crash reason
+#
+# Since: 2.12
+##
+{'struct': 'GuestPanicInformationS390',
+ 'data': { 'core': 'uint32',
+           'psw-mask': 'uint64',
+           'psw-addr': 'uint64',
+           'reason': 'S390CrashReason' } }
+
+##
+# @MEMORY_FAILURE:
+#
+# Emitted when a memory failure occurs on host side.
+#
+# @recipient: recipient is defined as @MemoryFailureRecipient.
+#
+# @action: action that has been taken. action is defined as @MemoryFailureAction.
+#
+# @flags: flags for MemoryFailureAction. action is defined as @MemoryFailureFlags.
+#
+# Since: 5.2
+#
+# Example:
+#
+# <- { "event": "MEMORY_FAILURE",
+#      "data": { "recipient": "hypervisor",
+#                "action": "fatal",
+#                "flags": { 'action-required': false } }
+#
+##
+{ 'event': 'MEMORY_FAILURE',
+  'data': { 'recipient': 'MemoryFailureRecipient',
+            'action': 'MemoryFailureAction',
+            'flags': 'MemoryFailureFlags'} }
+
+##
+# @MemoryFailureRecipient:
+#
+# Hardware memory failure occurs, handled by recipient.
+#
+# @hypervisor: memory failure at QEMU process address space.
+#              (none guest memory, but used by QEMU itself).
+#
+# @guest: memory failure at guest memory,
+#
+# Since: 5.2
+#
+##
+{ 'enum': 'MemoryFailureRecipient',
+  'data': [ 'hypervisor',
+            'guest' ] }
+
+
+##
+# @MemoryFailureAction:
+#
+# Actions taken by QEMU in response to a hardware memory failure.
+#
+# @ignore: the memory failure could be ignored.  This will only be the case
+#          for action-optional failures.
+#
+# @inject: memory failure occurred in guest memory, the guest enabled MCE
+#          handling mechanism, and QEMU could inject the MCE into the guest
+#          successfully.
+#
+# @fatal: the failure is unrecoverable.  This occurs for action-required
+#         failures if the recipient is the hypervisor; QEMU will exit.
+#
+# @reset: the failure is unrecoverable but confined to the guest.  This
+#         occurs if the recipient is a guest guest which is not ready
+#         to handle memory failures.
+#
+# Since: 5.2
+#
+##
+{ 'enum': 'MemoryFailureAction',
+  'data': [ 'ignore',
+            'inject',
+            'fatal',
+            'reset' ] }
+
+##
+# @MemoryFailureFlags:
+#
+# Additional information on memory failures.
+#
+# @action-required: whether a memory failure event is action-required
+#                   or action-optional (e.g. a failure during memory scrub).
+#
+# @recursive: whether the failure occurred while the previous
+#             failure was still in progress.
+#
+# Since: 5.2
+#
+##
+{ 'struct': 'MemoryFailureFlags',
+  'data': { 'action-required': 'bool',
+            'recursive': 'bool'} }
diff --git a/qapi/sockets.json b/qapi/sockets.json
new file mode 100644
index 000000000..2e8345279
--- /dev/null
+++ b/qapi/sockets.json
@@ -0,0 +1,167 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = Socket data types
+##
+
+{ 'include': 'common.json' }
+
+##
+# @NetworkAddressFamily:
+#
+# The network address family
+#
+# @ipv4: IPV4 family
+#
+# @ipv6: IPV6 family
+#
+# @unix: unix socket
+#
+# @vsock: vsock family (since 2.8)
+#
+# @unknown: otherwise
+#
+# Since: 2.1
+##
+{ 'enum': 'NetworkAddressFamily',
+  'data': [ 'ipv4', 'ipv6', 'unix', 'vsock', 'unknown' ] }
+
+##
+# @InetSocketAddressBase:
+#
+# @host: host part of the address
+# @port: port part of the address
+##
+{ 'struct': 'InetSocketAddressBase',
+  'data': {
+    'host': 'str',
+    'port': 'str' } }
+
+##
+# @InetSocketAddress:
+#
+# Captures a socket address or address range in the Internet namespace.
+#
+# @numeric: true if the host/port are guaranteed to be numeric,
+#           false if name resolution should be attempted. Defaults to false.
+#           (Since 2.9)
+#
+# @to: If present, this is range of possible addresses, with port
+#      between @port and @to.
+#
+# @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6
+#
+# @ipv6: whether to accept IPv6 addresses, default try both IPv4 and IPv6
+#
+# @keep-alive: enable keep-alive when connecting to this socket. Not supported
+#              for passive sockets. (Since 4.2)
+#
+# Since: 1.3
+##
+{ 'struct': 'InetSocketAddress',
+  'base': 'InetSocketAddressBase',
+  'data': {
+    '*numeric':  'bool',
+    '*to': 'uint16',
+    '*ipv4': 'bool',
+    '*ipv6': 'bool',
+    '*keep-alive': 'bool' } }
+
+##
+# @UnixSocketAddress:
+#
+# Captures a socket address in the local ("Unix socket") namespace.
+#
+# @path: filesystem path to use
+# @abstract: if true, this is a Linux abstract socket address.  @path
+#            will be prefixed by a null byte, and optionally padded
+#            with null bytes.  Defaults to false.  (Since 5.1)
+# @tight: if false, pad an abstract socket address with enough null
+#         bytes to make it fill struct sockaddr_un member sun_path.
+#         Defaults to true.  (Since 5.1)
+#
+# Since: 1.3
+##
+{ 'struct': 'UnixSocketAddress',
+  'data': {
+    'path': 'str',
+    '*abstract': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' },
+    '*tight': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' } } }
+
+##
+# @VsockSocketAddress:
+#
+# Captures a socket address in the vsock namespace.
+#
+# @cid: unique host identifier
+# @port: port
+#
+# Note: string types are used to allow for possible future hostname or
+#       service resolution support.
+#
+# Since: 2.8
+##
+{ 'struct': 'VsockSocketAddress',
+  'data': {
+    'cid': 'str',
+    'port': 'str' } }
+
+##
+# @SocketAddressLegacy:
+#
+# Captures the address of a socket, which could also be a named file descriptor
+#
+# Note: This type is deprecated in favor of SocketAddress.  The
+#       difference between SocketAddressLegacy and SocketAddress is that the
+#       latter is a flat union rather than a simple union. Flat is nicer
+#       because it avoids nesting on the wire, i.e. that form has fewer {}.
+
+#
+# Since: 1.3
+##
+{ 'union': 'SocketAddressLegacy',
+  'data': {
+    'inet': 'InetSocketAddress',
+    'unix': 'UnixSocketAddress',
+    'vsock': 'VsockSocketAddress',
+    'fd': 'String' } }
+
+##
+# @SocketAddressType:
+#
+# Available SocketAddress types
+#
+# @inet:  Internet address
+#
+# @unix:  Unix domain socket
+#
+# @vsock: VMCI address
+#
+# @fd: decimal is for file descriptor number, otherwise a file descriptor name.
+#      Named file descriptors are permitted in monitor commands, in combination
+#      with the 'getfd' command. Decimal file descriptors are permitted at
+#      startup or other contexts where no monitor context is active.
+#
+# Since: 2.9
+##
+{ 'enum': 'SocketAddressType',
+  'data': [ 'inet', 'unix', 'vsock', 'fd' ] }
+
+##
+# @SocketAddress:
+#
+# Captures the address of a socket, which could also be a named file
+# descriptor
+#
+# @type:       Transport type
+#
+# Since: 2.9
+##
+{ 'union': 'SocketAddress',
+  'base': { 'type': 'SocketAddressType' },
+  'discriminator': 'type',
+  'data': { 'inet': 'InetSocketAddress',
+            'unix': 'UnixSocketAddress',
+            'vsock': 'VsockSocketAddress',
+            'fd': 'String' } }
diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
new file mode 100644
index 000000000..197139c1c
--- /dev/null
+++ b/qapi/string-input-visitor.c
@@ -0,0 +1,414 @@
+/*
+ * String parsing visitor
+ *
+ * Copyright Red Hat, Inc. 2012-2016
+ *
+ * Author: Paolo Bonzini 
+ *         David Hildenbrand 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/string-input-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qnull.h"
+#include "qemu/option.h"
+#include "qemu/cutils.h"
+
+typedef enum ListMode {
+    /* no list parsing active / no list expected */
+    LM_NONE,
+    /* we have an unparsed string remaining */
+    LM_UNPARSED,
+    /* we have an unfinished int64 range */
+    LM_INT64_RANGE,
+    /* we have an unfinished uint64 range */
+    LM_UINT64_RANGE,
+    /* we have parsed the string completely and no range is remaining */
+    LM_END,
+} ListMode;
+
+/* protect against DOS attacks, limit the amount of elements per range */
+#define RANGE_MAX_ELEMENTS 65536
+
+typedef union RangeElement {
+    int64_t i64;
+    uint64_t u64;
+} RangeElement;
+
+struct StringInputVisitor
+{
+    Visitor visitor;
+
+    /* List parsing state */
+    ListMode lm;
+    RangeElement rangeNext;
+    RangeElement rangeEnd;
+    const char *unparsed_string;
+    void *list;
+
+    /* The original string to parse */
+    const char *string;
+};
+
+static StringInputVisitor *to_siv(Visitor *v)
+{
+    return container_of(v, StringInputVisitor, visitor);
+}
+
+static bool start_list(Visitor *v, const char *name, GenericList **list,
+                       size_t size, Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    assert(siv->lm == LM_NONE);
+    siv->list = list;
+    siv->unparsed_string = siv->string;
+
+    if (!siv->string[0]) {
+        if (list) {
+            *list = NULL;
+        }
+        siv->lm = LM_END;
+    } else {
+        if (list) {
+            *list = g_malloc0(size);
+        }
+        siv->lm = LM_UNPARSED;
+    }
+    return true;
+}
+
+static GenericList *next_list(Visitor *v, GenericList *tail, size_t size)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    switch (siv->lm) {
+    case LM_END:
+        return NULL;
+    case LM_INT64_RANGE:
+    case LM_UINT64_RANGE:
+    case LM_UNPARSED:
+        /* we have an unparsed string or something left in a range */
+        break;
+    default:
+        abort();
+    }
+
+    tail->next = g_malloc0(size);
+    return tail->next;
+}
+
+static bool check_list(Visitor *v, Error **errp)
+{
+    const StringInputVisitor *siv = to_siv(v);
+
+    switch (siv->lm) {
+    case LM_INT64_RANGE:
+    case LM_UINT64_RANGE:
+    case LM_UNPARSED:
+        error_setg(errp, "Fewer list elements expected");
+        return false;
+    case LM_END:
+        return true;
+    default:
+        abort();
+    }
+}
+
+static void end_list(Visitor *v, void **obj)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    assert(siv->lm != LM_NONE);
+    assert(siv->list == obj);
+    siv->list = NULL;
+    siv->unparsed_string = NULL;
+    siv->lm = LM_NONE;
+}
+
+static int try_parse_int64_list_entry(StringInputVisitor *siv, int64_t *obj)
+{
+    const char *endptr;
+    int64_t start, end;
+
+    /* parse a simple int64 or range */
+    if (qemu_strtoi64(siv->unparsed_string, &endptr, 0, &start)) {
+        return -EINVAL;
+    }
+    end = start;
+
+    switch (endptr[0]) {
+    case '\0':
+        siv->unparsed_string = endptr;
+        break;
+    case ',':
+        siv->unparsed_string = endptr + 1;
+        break;
+    case '-':
+        /* parse the end of the range */
+        if (qemu_strtoi64(endptr + 1, &endptr, 0, &end)) {
+            return -EINVAL;
+        }
+        if (start > end || end - start >= RANGE_MAX_ELEMENTS) {
+            return -EINVAL;
+        }
+        switch (endptr[0]) {
+        case '\0':
+            siv->unparsed_string = endptr;
+            break;
+        case ',':
+            siv->unparsed_string = endptr + 1;
+            break;
+        default:
+            return -EINVAL;
+        }
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    /* we have a proper range (with maybe only one element) */
+    siv->lm = LM_INT64_RANGE;
+    siv->rangeNext.i64 = start;
+    siv->rangeEnd.i64 = end;
+    return 0;
+}
+
+static bool parse_type_int64(Visitor *v, const char *name, int64_t *obj,
+                             Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+    int64_t val;
+
+    switch (siv->lm) {
+    case LM_NONE:
+        /* just parse a simple int64, bail out if not completely consumed */
+        if (qemu_strtoi64(siv->string, NULL, 0, &val)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                       name ? name : "null", "int64");
+            return false;
+        }
+        *obj = val;
+        return true;
+    case LM_UNPARSED:
+        if (try_parse_int64_list_entry(siv, obj)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name ? name : "null",
+                       "list of int64 values or ranges");
+            return false;
+        }
+        assert(siv->lm == LM_INT64_RANGE);
+        /* fall through */
+    case LM_INT64_RANGE:
+        /* return the next element in the range */
+        assert(siv->rangeNext.i64 <= siv->rangeEnd.i64);
+        *obj = siv->rangeNext.i64++;
+
+        if (siv->rangeNext.i64 > siv->rangeEnd.i64 || *obj == INT64_MAX) {
+            /* end of range, check if there is more to parse */
+            siv->lm = siv->unparsed_string[0] ? LM_UNPARSED : LM_END;
+        }
+        return true;
+    case LM_END:
+        error_setg(errp, "Fewer list elements expected");
+        return false;
+    default:
+        abort();
+    }
+}
+
+static int try_parse_uint64_list_entry(StringInputVisitor *siv, uint64_t *obj)
+{
+    const char *endptr;
+    uint64_t start, end;
+
+    /* parse a simple uint64 or range */
+    if (qemu_strtou64(siv->unparsed_string, &endptr, 0, &start)) {
+        return -EINVAL;
+    }
+    end = start;
+
+    switch (endptr[0]) {
+    case '\0':
+        siv->unparsed_string = endptr;
+        break;
+    case ',':
+        siv->unparsed_string = endptr + 1;
+        break;
+    case '-':
+        /* parse the end of the range */
+        if (qemu_strtou64(endptr + 1, &endptr, 0, &end)) {
+            return -EINVAL;
+        }
+        if (start > end || end - start >= RANGE_MAX_ELEMENTS) {
+            return -EINVAL;
+        }
+        switch (endptr[0]) {
+        case '\0':
+            siv->unparsed_string = endptr;
+            break;
+        case ',':
+            siv->unparsed_string = endptr + 1;
+            break;
+        default:
+            return -EINVAL;
+        }
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    /* we have a proper range (with maybe only one element) */
+    siv->lm = LM_UINT64_RANGE;
+    siv->rangeNext.u64 = start;
+    siv->rangeEnd.u64 = end;
+    return 0;
+}
+
+static bool parse_type_uint64(Visitor *v, const char *name, uint64_t *obj,
+                              Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+    uint64_t val;
+
+    switch (siv->lm) {
+    case LM_NONE:
+        /* just parse a simple uint64, bail out if not completely consumed */
+        if (qemu_strtou64(siv->string, NULL, 0, &val)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name ? name : "null",
+                       "uint64");
+            return false;
+        }
+        *obj = val;
+        return true;
+    case LM_UNPARSED:
+        if (try_parse_uint64_list_entry(siv, obj)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name ? name : "null",
+                       "list of uint64 values or ranges");
+            return false;
+        }
+        assert(siv->lm == LM_UINT64_RANGE);
+        /* fall through */
+    case LM_UINT64_RANGE:
+        /* return the next element in the range */
+        assert(siv->rangeNext.u64 <= siv->rangeEnd.u64);
+        *obj = siv->rangeNext.u64++;
+
+        if (siv->rangeNext.u64 > siv->rangeEnd.u64 || *obj == UINT64_MAX) {
+            /* end of range, check if there is more to parse */
+            siv->lm = siv->unparsed_string[0] ? LM_UNPARSED : LM_END;
+        }
+        return true;
+    case LM_END:
+        error_setg(errp, "Fewer list elements expected");
+        return false;
+    default:
+        abort();
+    }
+}
+
+static bool parse_type_size(Visitor *v, const char *name, uint64_t *obj,
+                            Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+    uint64_t val;
+
+    assert(siv->lm == LM_NONE);
+    if (!parse_option_size(name, siv->string, &val, errp)) {
+        return false;
+    }
+
+    *obj = val;
+    return true;
+}
+
+static bool parse_type_bool(Visitor *v, const char *name, bool *obj,
+                            Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    assert(siv->lm == LM_NONE);
+    return qapi_bool_parse(name ? name : "null", siv->string, obj, errp);
+}
+
+static bool parse_type_str(Visitor *v, const char *name, char **obj,
+                           Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    assert(siv->lm == LM_NONE);
+    *obj = g_strdup(siv->string);
+    return true;
+}
+
+static bool parse_type_number(Visitor *v, const char *name, double *obj,
+                              Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+    double val;
+
+    assert(siv->lm == LM_NONE);
+    if (qemu_strtod_finite(siv->string, NULL, &val)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
+                   "number");
+        return false;
+    }
+
+    *obj = val;
+    return true;
+}
+
+static bool parse_type_null(Visitor *v, const char *name, QNull **obj,
+                            Error **errp)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    assert(siv->lm == LM_NONE);
+    *obj = NULL;
+
+    if (siv->string[0]) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
+                   "null");
+        return false;
+    }
+
+    *obj = qnull();
+    return true;
+}
+
+static void string_input_free(Visitor *v)
+{
+    StringInputVisitor *siv = to_siv(v);
+
+    g_free(siv);
+}
+
+Visitor *string_input_visitor_new(const char *str)
+{
+    StringInputVisitor *v;
+
+    assert(str);
+    v = g_malloc0(sizeof(*v));
+
+    v->visitor.type = VISITOR_INPUT;
+    v->visitor.type_int64 = parse_type_int64;
+    v->visitor.type_uint64 = parse_type_uint64;
+    v->visitor.type_size = parse_type_size;
+    v->visitor.type_bool = parse_type_bool;
+    v->visitor.type_str = parse_type_str;
+    v->visitor.type_number = parse_type_number;
+    v->visitor.type_null = parse_type_null;
+    v->visitor.start_list = start_list;
+    v->visitor.next_list = next_list;
+    v->visitor.check_list = check_list;
+    v->visitor.end_list = end_list;
+    v->visitor.free = string_input_free;
+
+    v->string = str;
+    v->lm = LM_NONE;
+    return &v->visitor;
+}
diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c
new file mode 100644
index 000000000..b74aa4d44
--- /dev/null
+++ b/qapi/string-output-visitor.c
@@ -0,0 +1,374 @@
+/*
+ * String printing Visitor
+ *
+ * Copyright Red Hat, Inc. 2012-2016
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/string-output-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/host-utils.h"
+#include 
+#include "qemu/range.h"
+
+enum ListMode {
+    LM_NONE,             /* not traversing a list of repeated options */
+    LM_STARTED,          /* next_list() ready to be called */
+
+    LM_IN_PROGRESS,      /* next_list() has been called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently parsed QemuOpt instance of the repeated
+                          * option.
+                          *
+                          * Parsing a value into the list link will examine the
+                          * next QemuOpt instance of the repeated option, and
+                          * possibly enter LM_SIGNED_INTERVAL or
+                          * LM_UNSIGNED_INTERVAL.
+                          */
+
+    LM_SIGNED_INTERVAL,  /* next_list() has been called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently stored element from the signed interval,
+                          * parsed from the most recent QemuOpt instance of the
+                          * repeated option. This may consume QemuOpt itself
+                          * and return to LM_IN_PROGRESS.
+                          *
+                          * Parsing a value into the list link will store the
+                          * next element of the signed interval.
+                          */
+
+    LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */
+
+    LM_END,              /* next_list() called, about to see last element. */
+};
+
+typedef enum ListMode ListMode;
+
+struct StringOutputVisitor
+{
+    Visitor visitor;
+    bool human;
+    GString *string;
+    char **result;
+    ListMode list_mode;
+    union {
+        int64_t s;
+        uint64_t u;
+    } range_start, range_end;
+    GList *ranges;
+    void *list; /* Only needed for sanity checking the caller */
+};
+
+static StringOutputVisitor *to_sov(Visitor *v)
+{
+    return container_of(v, StringOutputVisitor, visitor);
+}
+
+static void string_output_set(StringOutputVisitor *sov, char *string)
+{
+    if (sov->string) {
+        g_string_free(sov->string, true);
+    }
+    sov->string = g_string_new(string);
+    g_free(string);
+}
+
+static void string_output_append(StringOutputVisitor *sov, int64_t a)
+{
+    Range *r = g_malloc0(sizeof(*r));
+
+    range_set_bounds(r, a, a);
+    sov->ranges = range_list_insert(sov->ranges, r);
+}
+
+static void string_output_append_range(StringOutputVisitor *sov,
+                                       int64_t s, int64_t e)
+{
+    Range *r = g_malloc0(sizeof(*r));
+
+    range_set_bounds(r, s, e);
+    sov->ranges = range_list_insert(sov->ranges, r);
+}
+
+static void format_string(StringOutputVisitor *sov, Range *r, bool next,
+                          bool human)
+{
+    if (range_lob(r) != range_upb(r)) {
+        if (human) {
+            g_string_append_printf(sov->string, "0x%" PRIx64 "-0x%" PRIx64,
+                                   range_lob(r), range_upb(r));
+
+        } else {
+            g_string_append_printf(sov->string, "%" PRId64 "-%" PRId64,
+                                   range_lob(r), range_upb(r));
+        }
+    } else {
+        if (human) {
+            g_string_append_printf(sov->string, "0x%" PRIx64, range_lob(r));
+        } else {
+            g_string_append_printf(sov->string, "%" PRId64, range_lob(r));
+        }
+    }
+    if (next) {
+        g_string_append(sov->string, ",");
+    }
+}
+
+static bool print_type_int64(Visitor *v, const char *name, int64_t *obj,
+                             Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    GList *l;
+
+    switch (sov->list_mode) {
+    case LM_NONE:
+        string_output_append(sov, *obj);
+        break;
+
+    case LM_STARTED:
+        sov->range_start.s = *obj;
+        sov->range_end.s = *obj;
+        sov->list_mode = LM_IN_PROGRESS;
+        return true;
+
+    case LM_IN_PROGRESS:
+        if (sov->range_end.s + 1 == *obj) {
+            sov->range_end.s++;
+        } else {
+            if (sov->range_start.s == sov->range_end.s) {
+                string_output_append(sov, sov->range_end.s);
+            } else {
+                assert(sov->range_start.s < sov->range_end.s);
+                string_output_append_range(sov, sov->range_start.s,
+                                           sov->range_end.s);
+            }
+
+            sov->range_start.s = *obj;
+            sov->range_end.s = *obj;
+        }
+        return true;
+
+    case LM_END:
+        if (sov->range_end.s + 1 == *obj) {
+            sov->range_end.s++;
+            assert(sov->range_start.s < sov->range_end.s);
+            string_output_append_range(sov, sov->range_start.s,
+                                       sov->range_end.s);
+        } else {
+            if (sov->range_start.s == sov->range_end.s) {
+                string_output_append(sov, sov->range_end.s);
+            } else {
+                assert(sov->range_start.s < sov->range_end.s);
+
+                string_output_append_range(sov, sov->range_start.s,
+                                           sov->range_end.s);
+            }
+            string_output_append(sov, *obj);
+        }
+        break;
+
+    default:
+        abort();
+    }
+
+    l = sov->ranges;
+    while (l) {
+        Range *r = l->data;
+        format_string(sov, r, l->next != NULL, false);
+        l = l->next;
+    }
+
+    if (sov->human) {
+        l = sov->ranges;
+        g_string_append(sov->string, " (");
+        while (l) {
+            Range *r = l->data;
+            format_string(sov, r, l->next != NULL, true);
+            l = l->next;
+        }
+        g_string_append(sov->string, ")");
+    }
+
+    return true;
+}
+
+static bool print_type_uint64(Visitor *v, const char *name, uint64_t *obj,
+                             Error **errp)
+{
+    /* FIXME: print_type_int64 mishandles values over INT64_MAX */
+    int64_t i = *obj;
+    return print_type_int64(v, name, &i, errp);
+}
+
+static bool print_type_size(Visitor *v, const char *name, uint64_t *obj,
+                            Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    uint64_t val;
+    char *out, *psize;
+
+    if (!sov->human) {
+        out = g_strdup_printf("%"PRIu64, *obj);
+        string_output_set(sov, out);
+        return true;
+    }
+
+    val = *obj;
+    psize = size_to_str(val);
+    out = g_strdup_printf("%"PRIu64" (%s)", val, psize);
+    string_output_set(sov, out);
+
+    g_free(psize);
+    return true;
+}
+
+static bool print_type_bool(Visitor *v, const char *name, bool *obj,
+                            Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    string_output_set(sov, g_strdup(*obj ? "true" : "false"));
+    return true;
+}
+
+static bool print_type_str(Visitor *v, const char *name, char **obj,
+                           Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    char *out;
+
+    if (sov->human) {
+        out = *obj ? g_strdup_printf("\"%s\"", *obj) : g_strdup("");
+    } else {
+        out = g_strdup(*obj ? *obj : "");
+    }
+    string_output_set(sov, out);
+    return true;
+}
+
+static bool print_type_number(Visitor *v, const char *name, double *obj,
+                              Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    string_output_set(sov, g_strdup_printf("%f", *obj));
+    return true;
+}
+
+static bool print_type_null(Visitor *v, const char *name, QNull **obj,
+                            Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    char *out;
+
+    if (sov->human) {
+        out = g_strdup("");
+    } else {
+        out = g_strdup("");
+    }
+    string_output_set(sov, out);
+    return true;
+}
+
+static bool
+start_list(Visitor *v, const char *name, GenericList **list, size_t size,
+           Error **errp)
+{
+    StringOutputVisitor *sov = to_sov(v);
+
+    /* we can't traverse a list in a list */
+    assert(sov->list_mode == LM_NONE);
+    /* We don't support visits without a list */
+    assert(list);
+    sov->list = list;
+    /* List handling is only needed if there are at least two elements */
+    if (*list && (*list)->next) {
+        sov->list_mode = LM_STARTED;
+    }
+    return true;
+}
+
+static GenericList *next_list(Visitor *v, GenericList *tail, size_t size)
+{
+    StringOutputVisitor *sov = to_sov(v);
+    GenericList *ret = tail->next;
+
+    if (ret && !ret->next) {
+        sov->list_mode = LM_END;
+    }
+    return ret;
+}
+
+static void end_list(Visitor *v, void **obj)
+{
+    StringOutputVisitor *sov = to_sov(v);
+
+    assert(sov->list == obj);
+    assert(sov->list_mode == LM_STARTED ||
+           sov->list_mode == LM_END ||
+           sov->list_mode == LM_NONE ||
+           sov->list_mode == LM_IN_PROGRESS);
+    sov->list_mode = LM_NONE;
+}
+
+static void string_output_complete(Visitor *v, void *opaque)
+{
+    StringOutputVisitor *sov = to_sov(v);
+
+    assert(opaque == sov->result);
+    *sov->result = g_string_free(sov->string, false);
+    sov->string = NULL;
+}
+
+static void free_range(void *range, void *dummy)
+{
+    g_free(range);
+}
+
+static void string_output_free(Visitor *v)
+{
+    StringOutputVisitor *sov = to_sov(v);
+
+    if (sov->string) {
+        g_string_free(sov->string, true);
+    }
+
+    g_list_foreach(sov->ranges, free_range, NULL);
+    g_list_free(sov->ranges);
+    g_free(sov);
+}
+
+Visitor *string_output_visitor_new(bool human, char **result)
+{
+    StringOutputVisitor *v;
+
+    v = g_malloc0(sizeof(*v));
+
+    v->string = g_string_new(NULL);
+    v->human = human;
+    v->result = result;
+    *result = NULL;
+
+    v->visitor.type = VISITOR_OUTPUT;
+    v->visitor.type_int64 = print_type_int64;
+    v->visitor.type_uint64 = print_type_uint64;
+    v->visitor.type_size = print_type_size;
+    v->visitor.type_bool = print_type_bool;
+    v->visitor.type_str = print_type_str;
+    v->visitor.type_number = print_type_number;
+    v->visitor.type_null = print_type_null;
+    v->visitor.start_list = start_list;
+    v->visitor.next_list = next_list;
+    v->visitor.end_list = end_list;
+    v->visitor.complete = string_output_complete;
+    v->visitor.free = string_output_free;
+
+    return &v->visitor;
+}
diff --git a/qapi/tpm.json b/qapi/tpm.json
new file mode 100644
index 000000000..6a10c9ed8
--- /dev/null
+++ b/qapi/tpm.json
@@ -0,0 +1,155 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = TPM (trusted platform module) devices
+##
+
+##
+# @TpmModel:
+#
+# An enumeration of TPM models
+#
+# @tpm-tis: TPM TIS model
+# @tpm-crb: TPM CRB model (since 2.12)
+# @tpm-spapr: TPM SPAPR model (since 5.0)
+#
+# Since: 1.5
+##
+{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] }
+##
+# @query-tpm-models:
+#
+# Return a list of supported TPM models
+#
+# Returns: a list of TpmModel
+#
+# Since: 1.5
+#
+# Example:
+#
+# -> { "execute": "query-tpm-models" }
+# <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
+#
+##
+{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
+
+##
+# @TpmType:
+#
+# An enumeration of TPM types
+#
+# @passthrough: TPM passthrough type
+# @emulator: Software Emulator TPM type
+#            Since: 2.11
+#
+# Since: 1.5
+##
+{ 'enum': 'TpmType', 'data': [ 'passthrough', 'emulator' ] }
+
+##
+# @query-tpm-types:
+#
+# Return a list of supported TPM types
+#
+# Returns: a list of TpmType
+#
+# Since: 1.5
+#
+# Example:
+#
+# -> { "execute": "query-tpm-types" }
+# <- { "return": [ "passthrough", "emulator" ] }
+#
+##
+{ 'command': 'query-tpm-types', 'returns': ['TpmType'] }
+
+##
+# @TPMPassthroughOptions:
+#
+# Information about the TPM passthrough type
+#
+# @path: string describing the path used for accessing the TPM device
+#
+# @cancel-path: string showing the TPM's sysfs cancel file
+#               for cancellation of TPM commands while they are executing
+#
+# Since: 1.5
+##
+{ 'struct': 'TPMPassthroughOptions',
+  'data': { '*path': 'str',
+            '*cancel-path': 'str' } }
+
+##
+# @TPMEmulatorOptions:
+#
+# Information about the TPM emulator type
+#
+# @chardev: Name of a unix socket chardev
+#
+# Since: 2.11
+##
+{ 'struct': 'TPMEmulatorOptions', 'data': { 'chardev' : 'str' } }
+
+##
+# @TpmTypeOptions:
+#
+# A union referencing different TPM backend types' configuration options
+#
+# @type: - 'passthrough' The configuration options for the TPM passthrough type
+#        - 'emulator' The configuration options for TPM emulator backend type
+#
+# Since: 1.5
+##
+{ 'union': 'TpmTypeOptions',
+   'data': { 'passthrough' : 'TPMPassthroughOptions',
+             'emulator': 'TPMEmulatorOptions' } }
+
+##
+# @TPMInfo:
+#
+# Information about the TPM
+#
+# @id: The Id of the TPM
+#
+# @model: The TPM frontend model
+#
+# @options: The TPM (backend) type configuration options
+#
+# Since: 1.5
+##
+{ 'struct': 'TPMInfo',
+  'data': {'id': 'str',
+           'model': 'TpmModel',
+           'options': 'TpmTypeOptions' } }
+
+##
+# @query-tpm:
+#
+# Return information about the TPM device
+#
+# Returns: @TPMInfo on success
+#
+# Since: 1.5
+#
+# Example:
+#
+# -> { "execute": "query-tpm" }
+# <- { "return":
+#      [
+#        { "model": "tpm-tis",
+#          "options":
+#            { "type": "passthrough",
+#              "data":
+#                { "cancel-path": "/sys/class/misc/tpm0/device/cancel",
+#                  "path": "/dev/tpm0"
+#                }
+#            },
+#          "id": "tpm0"
+#        }
+#      ]
+#    }
+#
+##
+{ 'command': 'query-tpm', 'returns': ['TPMInfo'] }
diff --git a/qapi/trace-events b/qapi/trace-events
new file mode 100644
index 000000000..5eb4afa11
--- /dev/null
+++ b/qapi/trace-events
@@ -0,0 +1,36 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# qapi-visit-core.c
+visit_free(void *v) "v=%p"
+visit_complete(void *v, void *opaque) "v=%p opaque=%p"
+
+visit_start_struct(void *v, const char *name, void *obj, size_t size) "v=%p name=%s obj=%p size=%zu"
+visit_check_struct(void *v) "v=%p"
+visit_end_struct(void *v, void *obj) "v=%p obj=%p"
+
+visit_start_list(void *v, const char *name, void *obj, size_t size) "v=%p name=%s obj=%p size=%zu"
+visit_next_list(void *v, void *tail, size_t size) "v=%p tail=%p size=%zu"
+visit_check_list(void *v) "v=%p"
+visit_end_list(void *v, void *obj) "v=%p obj=%p"
+
+visit_start_alternate(void *v, const char *name, void *obj, size_t size) "v=%p name=%s obj=%p size=%zu"
+visit_end_alternate(void *v, void *obj) "v=%p obj=%p"
+
+visit_optional(void *v, const char *name, bool *present) "v=%p name=%s present=%p"
+
+visit_type_enum(void *v, const char *name, int *obj) "v=%p name=%s obj=%p"
+visit_type_int(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint8(void *v, const char *name, uint8_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint16(void *v, const char *name, uint16_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint32(void *v, const char *name, uint32_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint64(void *v, const char *name, uint64_t *obj) "v=%p name=%s obj=%p"
+visit_type_int8(void *v, const char *name, int8_t *obj) "v=%p name=%s obj=%p"
+visit_type_int16(void *v, const char *name, int16_t *obj) "v=%p name=%s obj=%p"
+visit_type_int32(void *v, const char *name, int32_t *obj) "v=%p name=%s obj=%p"
+visit_type_int64(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
+visit_type_size(void *v, const char *name, uint64_t *obj) "v=%p name=%s obj=%p"
+visit_type_bool(void *v, const char *name, bool *obj) "v=%p name=%s obj=%p"
+visit_type_str(void *v, const char *name, char **obj) "v=%p name=%s obj=%p"
+visit_type_number(void *v, const char *name, void *obj) "v=%p name=%s obj=%p"
+visit_type_any(void *v, const char *name, void *obj) "v=%p name=%s obj=%p"
+visit_type_null(void *v, const char *name, void *obj) "v=%p name=%s obj=%p"
diff --git a/qapi/trace.h b/qapi/trace.h
new file mode 100644
index 000000000..5c3fb674d
--- /dev/null
+++ b/qapi/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-qapi.h"
diff --git a/qapi/trace.json b/qapi/trace.json
new file mode 100644
index 000000000..47c68f04d
--- /dev/null
+++ b/qapi/trace.json
@@ -0,0 +1,108 @@
+# -*- mode: python -*-
+#
+# Copyright (C) 2011-2016 Lluís Vilanova 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Tracing
+##
+
+##
+# @TraceEventState:
+#
+# State of a tracing event.
+#
+# @unavailable: The event is statically disabled.
+#
+# @disabled: The event is dynamically disabled.
+#
+# @enabled: The event is dynamically enabled.
+#
+# Since: 2.2
+##
+{ 'enum': 'TraceEventState',
+  'data': ['unavailable', 'disabled', 'enabled'] }
+
+##
+# @TraceEventInfo:
+#
+# Information of a tracing event.
+#
+# @name: Event name.
+# @state: Tracing state.
+# @vcpu: Whether this is a per-vCPU event (since 2.7).
+#
+# An event is per-vCPU if it has the "vcpu" property in the "trace-events"
+# files.
+#
+# Since: 2.2
+##
+{ 'struct': 'TraceEventInfo',
+  'data': {'name': 'str', 'state': 'TraceEventState', 'vcpu': 'bool'} }
+
+##
+# @trace-event-get-state:
+#
+# Query the state of events.
+#
+# @name: Event name pattern (case-sensitive glob).
+# @vcpu: The vCPU to query (any by default; since 2.7).
+#
+# Returns: a list of @TraceEventInfo for the matching events
+#
+#          An event is returned if:
+#
+#          - its name matches the @name pattern, and
+#          - if @vcpu is given, the event has the "vcpu" property.
+#
+#          Therefore, if @vcpu is given, the operation will only match per-vCPU events,
+#          returning their state on the specified vCPU. Special case: if @name is an
+#          exact match, @vcpu is given and the event does not have the "vcpu" property,
+#          an error is returned.
+#
+# Since: 2.2
+#
+# Example:
+#
+# -> { "execute": "trace-event-get-state",
+#      "arguments": { "name": "qemu_memalign" } }
+# <- { "return": [ { "name": "qemu_memalign", "state": "disabled" } ] }
+#
+##
+{ 'command': 'trace-event-get-state',
+  'data': {'name': 'str', '*vcpu': 'int'},
+  'returns': ['TraceEventInfo'] }
+
+##
+# @trace-event-set-state:
+#
+# Set the dynamic tracing state of events.
+#
+# @name: Event name pattern (case-sensitive glob).
+# @enable: Whether to enable tracing.
+# @ignore-unavailable: Do not match unavailable events with @name.
+# @vcpu: The vCPU to act upon (all by default; since 2.7).
+#
+# An event's state is modified if:
+# - its name matches the @name pattern, and
+# - if @vcpu is given, the event has the "vcpu" property.
+#
+# Therefore, if @vcpu is given, the operation will only match per-vCPU events,
+# setting their state on the specified vCPU. Special case: if @name is an exact
+# match, @vcpu is given and the event does not have the "vcpu" property, an
+# error is returned.
+#
+# Since: 2.2
+#
+# Example:
+#
+# -> { "execute": "trace-event-set-state",
+#      "arguments": { "name": "qemu_memalign", "enable": "true" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'trace-event-set-state',
+  'data': {'name': 'str', 'enable': 'bool', '*ignore-unavailable': 'bool',
+           '*vcpu': 'int'} }
diff --git a/qapi/transaction.json b/qapi/transaction.json
new file mode 100644
index 000000000..15ddebdbc
--- /dev/null
+++ b/qapi/transaction.json
@@ -0,0 +1,167 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Transactions
+##
+
+{ 'include': 'block-core.json' }
+
+##
+# @Abort:
+#
+# This action can be used to test transaction failure.
+#
+# Since: 1.6
+##
+{ 'struct': 'Abort',
+  'data': { } }
+
+##
+# @ActionCompletionMode:
+#
+# An enumeration of Transactional completion modes.
+#
+# @individual: Do not attempt to cancel any other Actions if any Actions fail
+#              after the Transaction request succeeds. All Actions that
+#              can complete successfully will do so without waiting on others.
+#              This is the default.
+#
+# @grouped: If any Action fails after the Transaction succeeds, cancel all
+#           Actions. Actions do not complete until all Actions are ready to
+#           complete. May be rejected by Actions that do not support this
+#           completion mode.
+#
+# Since: 2.5
+##
+{ 'enum': 'ActionCompletionMode',
+  'data': [ 'individual', 'grouped' ] }
+
+##
+# @TransactionAction:
+#
+# A discriminated record of operations that can be performed with
+# @transaction. Action @type can be:
+#
+# - @abort: since 1.6
+# - @block-dirty-bitmap-add: since 2.5
+# - @block-dirty-bitmap-remove: since 4.2
+# - @block-dirty-bitmap-clear: since 2.5
+# - @block-dirty-bitmap-enable: since 4.0
+# - @block-dirty-bitmap-disable: since 4.0
+# - @block-dirty-bitmap-merge: since 4.0
+# - @blockdev-backup: since 2.3
+# - @blockdev-snapshot: since 2.5
+# - @blockdev-snapshot-internal-sync: since 1.7
+# - @blockdev-snapshot-sync: since 1.1
+# - @drive-backup: since 1.6
+#
+# Since: 1.1
+##
+{ 'union': 'TransactionAction',
+  'data': {
+       'abort': 'Abort',
+       'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd',
+       'block-dirty-bitmap-remove': 'BlockDirtyBitmap',
+       'block-dirty-bitmap-clear': 'BlockDirtyBitmap',
+       'block-dirty-bitmap-enable': 'BlockDirtyBitmap',
+       'block-dirty-bitmap-disable': 'BlockDirtyBitmap',
+       'block-dirty-bitmap-merge': 'BlockDirtyBitmapMerge',
+       'blockdev-backup': 'BlockdevBackup',
+       'blockdev-snapshot': 'BlockdevSnapshot',
+       'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal',
+       'blockdev-snapshot-sync': 'BlockdevSnapshotSync',
+       'drive-backup': 'DriveBackup'
+   } }
+
+##
+# @TransactionProperties:
+#
+# Optional arguments to modify the behavior of a Transaction.
+#
+# @completion-mode: Controls how jobs launched asynchronously by
+#                   Actions will complete or fail as a group.
+#                   See @ActionCompletionMode for details.
+#
+# Since: 2.5
+##
+{ 'struct': 'TransactionProperties',
+  'data': {
+       '*completion-mode': 'ActionCompletionMode'
+  }
+}
+
+##
+# @transaction:
+#
+# Executes a number of transactionable QMP commands atomically. If any
+# operation fails, then the entire set of actions will be abandoned and the
+# appropriate error returned.
+#
+# For external snapshots, the dictionary contains the device, the file to use for
+# the new snapshot, and the format.  The default format, if not specified, is
+# qcow2.
+#
+# Each new snapshot defaults to being created by QEMU (wiping any
+# contents if the file already exists), but it is also possible to reuse
+# an externally-created file.  In the latter case, you should ensure that
+# the new image file has the same contents as the current one; QEMU cannot
+# perform any meaningful check.  Typically this is achieved by using the
+# current image file as the backing file for the new image.
+#
+# On failure, the original disks pre-snapshot attempt will be used.
+#
+# For internal snapshots, the dictionary contains the device and the snapshot's
+# name.  If an internal snapshot matching name already exists, the request will
+# be rejected.  Only some image formats support it, for example, qcow2, rbd,
+# and sheepdog.
+#
+# On failure, qemu will try delete the newly created internal snapshot in the
+# transaction.  When an I/O error occurs during deletion, the user needs to fix
+# it later with qemu-img or other command.
+#
+# @actions: List of @TransactionAction;
+#           information needed for the respective operations.
+#
+# @properties: structure of additional options to control the
+#              execution of the transaction. See @TransactionProperties
+#              for additional detail.
+#
+# Returns: nothing on success
+#
+#          Errors depend on the operations of the transaction
+#
+# Note: The transaction aborts on the first failure.  Therefore, there will be
+#       information on only one failed operation returned in an error condition, and
+#       subsequent actions will not have been attempted.
+#
+# Since: 1.1
+#
+# Example:
+#
+# -> { "execute": "transaction",
+#      "arguments": { "actions": [
+#          { "type": "blockdev-snapshot-sync", "data" : { "device": "ide-hd0",
+#                                      "snapshot-file": "/some/place/my-image",
+#                                      "format": "qcow2" } },
+#          { "type": "blockdev-snapshot-sync", "data" : { "node-name": "myfile",
+#                                      "snapshot-file": "/some/place/my-image2",
+#                                      "snapshot-node-name": "node3432",
+#                                      "mode": "existing",
+#                                      "format": "qcow2" } },
+#          { "type": "blockdev-snapshot-sync", "data" : { "device": "ide-hd1",
+#                                      "snapshot-file": "/some/place/my-image2",
+#                                      "mode": "existing",
+#                                      "format": "qcow2" } },
+#          { "type": "blockdev-snapshot-internal-sync", "data" : {
+#                                      "device": "ide-hd2",
+#                                      "name": "snapshot0" } } ] } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'transaction',
+  'data': { 'actions': [ 'TransactionAction' ],
+            '*properties': 'TransactionProperties'
+          }
+}
diff --git a/qapi/ui.json b/qapi/ui.json
new file mode 100644
index 000000000..6c7b33cb7
--- /dev/null
+++ b/qapi/ui.json
@@ -0,0 +1,1181 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Remote desktop
+##
+
+{ 'include': 'sockets.json' }
+
+##
+# @set_password:
+#
+# Sets the password of a remote display session.
+#
+# @protocol: - 'vnc' to modify the VNC server password
+#            - 'spice' to modify the Spice server password
+#
+# @password: the new password
+#
+# @connected: how to handle existing clients when changing the
+#             password.  If nothing is specified, defaults to 'keep'
+#             'fail' to fail the command if clients are connected
+#             'disconnect' to disconnect existing clients
+#             'keep' to maintain existing clients
+#
+# Returns: - Nothing on success
+#          - If Spice is not enabled, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "set_password", "arguments": { "protocol": "vnc",
+#                                                "password": "secret" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'set_password',
+  'data': {'protocol': 'str', 'password': 'str', '*connected': 'str'} }
+
+##
+# @expire_password:
+#
+# Expire the password of a remote display server.
+#
+# @protocol: the name of the remote display protocol 'vnc' or 'spice'
+#
+# @time: when to expire the password.
+#
+#        - 'now' to expire the password immediately
+#        - 'never' to cancel password expiration
+#        - '+INT' where INT is the number of seconds from now (integer)
+#        - 'INT' where INT is the absolute time in seconds
+#
+# Returns: - Nothing on success
+#          - If @protocol is 'spice' and Spice is not active, DeviceNotFound
+#
+# Since: 0.14.0
+#
+# Notes: Time is relative to the server and currently there is no way to
+#        coordinate server time with client time.  It is not recommended to
+#        use the absolute time version of the @time parameter unless you're
+#        sure you are on the same machine as the QEMU instance.
+#
+# Example:
+#
+# -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
+#                                                   "time": "+60" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
+
+##
+# @screendump:
+#
+# Write a PPM of the VGA screen to a file.
+#
+# @filename: the path of a new PPM file to store the image
+#
+# @device: ID of the display device that should be dumped. If this parameter
+#          is missing, the primary display will be used. (Since 2.12)
+#
+# @head: head to use in case the device supports multiple heads. If this
+#        parameter is missing, head #0 will be used. Also note that the head
+#        can only be specified in conjunction with the device ID. (Since 2.12)
+#
+# Returns: Nothing on success
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "screendump",
+#      "arguments": { "filename": "/tmp/image" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'screendump',
+  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
+  'coroutine': true }
+
+##
+# == Spice
+##
+
+##
+# @SpiceBasicInfo:
+#
+# The basic information for SPICE network connection
+#
+# @host: IP address
+#
+# @port: port number
+#
+# @family: address family
+#
+# Since: 2.1
+##
+{ 'struct': 'SpiceBasicInfo',
+  'data': { 'host': 'str',
+            'port': 'str',
+            'family': 'NetworkAddressFamily' },
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SpiceServerInfo:
+#
+# Information about a SPICE server
+#
+# @auth: authentication method
+#
+# Since: 2.1
+##
+{ 'struct': 'SpiceServerInfo',
+  'base': 'SpiceBasicInfo',
+  'data': { '*auth': 'str' },
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SpiceChannel:
+#
+# Information about a SPICE client channel.
+#
+# @connection-id: SPICE connection id number.  All channels with the same id
+#                 belong to the same SPICE session.
+#
+# @channel-type: SPICE channel type number.  "1" is the main control
+#                channel, filter for this one if you want to track spice
+#                sessions only
+#
+# @channel-id: SPICE channel ID number.  Usually "0", might be different when
+#              multiple channels of the same type exist, such as multiple
+#              display channels in a multihead setup
+#
+# @tls: true if the channel is encrypted, false otherwise.
+#
+# Since: 0.14.0
+##
+{ 'struct': 'SpiceChannel',
+  'base': 'SpiceBasicInfo',
+  'data': {'connection-id': 'int', 'channel-type': 'int', 'channel-id': 'int',
+           'tls': 'bool'},
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SpiceQueryMouseMode:
+#
+# An enumeration of Spice mouse states.
+#
+# @client: Mouse cursor position is determined by the client.
+#
+# @server: Mouse cursor position is determined by the server.
+#
+# @unknown: No information is available about mouse mode used by
+#           the spice server.
+#
+# Note: spice/enums.h has a SpiceMouseMode already, hence the name.
+#
+# Since: 1.1
+##
+{ 'enum': 'SpiceQueryMouseMode',
+  'data': [ 'client', 'server', 'unknown' ],
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SpiceInfo:
+#
+# Information about the SPICE session.
+#
+# @enabled: true if the SPICE server is enabled, false otherwise
+#
+# @migrated: true if the last guest migration completed and spice
+#            migration had completed as well. false otherwise. (since 1.4)
+#
+# @host: The hostname the SPICE server is bound to.  This depends on
+#        the name resolution on the host and may be an IP address.
+#
+# @port: The SPICE server's port number.
+#
+# @compiled-version: SPICE server version.
+#
+# @tls-port: The SPICE server's TLS port number.
+#
+# @auth: the current authentication type used by the server
+#
+#        - 'none'  if no authentication is being used
+#        - 'spice' uses SASL or direct TLS authentication, depending on command
+#          line options
+#
+# @mouse-mode: The mode in which the mouse cursor is displayed currently. Can
+#              be determined by the client or the server, or unknown if spice
+#              server doesn't provide this information. (since: 1.1)
+#
+# @channels: a list of @SpiceChannel for each active spice channel
+#
+# Since: 0.14.0
+##
+{ 'struct': 'SpiceInfo',
+  'data': {'enabled': 'bool', 'migrated': 'bool', '*host': 'str', '*port': 'int',
+           '*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
+           'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @query-spice:
+#
+# Returns information about the current SPICE server
+#
+# Returns: @SpiceInfo
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-spice" }
+# <- { "return": {
+#          "enabled": true,
+#          "auth": "spice",
+#          "port": 5920,
+#          "tls-port": 5921,
+#          "host": "0.0.0.0",
+#          "channels": [
+#             {
+#                "port": "54924",
+#                "family": "ipv4",
+#                "channel-type": 1,
+#                "connection-id": 1804289383,
+#                "host": "127.0.0.1",
+#                "channel-id": 0,
+#                "tls": true
+#             },
+#             {
+#                "port": "36710",
+#                "family": "ipv4",
+#                "channel-type": 4,
+#                "connection-id": 1804289383,
+#                "host": "127.0.0.1",
+#                "channel-id": 0,
+#                "tls": false
+#             },
+#             [ ... more channels follow ... ]
+#          ]
+#       }
+#    }
+#
+##
+{ 'command': 'query-spice', 'returns': 'SpiceInfo',
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SPICE_CONNECTED:
+#
+# Emitted when a SPICE client establishes a connection
+#
+# @server: server information
+#
+# @client: client information
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+#      "event": "SPICE_CONNECTED",
+#      "data": {
+#        "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+#        "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+#    }}
+#
+##
+{ 'event': 'SPICE_CONNECTED',
+  'data': { 'server': 'SpiceBasicInfo',
+            'client': 'SpiceBasicInfo' },
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SPICE_INITIALIZED:
+#
+# Emitted after initial handshake and authentication takes place (if any)
+# and the SPICE channel is up and running
+#
+# @server: server information
+#
+# @client: client information
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+#      "event": "SPICE_INITIALIZED",
+#      "data": {"server": {"auth": "spice", "port": "5921",
+#                          "family": "ipv4", "host": "127.0.0.1"},
+#               "client": {"port": "49004", "family": "ipv4", "channel-type": 3,
+#                          "connection-id": 1804289383, "host": "127.0.0.1",
+#                          "channel-id": 0, "tls": true}
+#    }}
+#
+##
+{ 'event': 'SPICE_INITIALIZED',
+  'data': { 'server': 'SpiceServerInfo',
+            'client': 'SpiceChannel' },
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SPICE_DISCONNECTED:
+#
+# Emitted when the SPICE connection is closed
+#
+# @server: server information
+#
+# @client: client information
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+#      "event": "SPICE_DISCONNECTED",
+#      "data": {
+#        "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+#        "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+#    }}
+#
+##
+{ 'event': 'SPICE_DISCONNECTED',
+  'data': { 'server': 'SpiceBasicInfo',
+            'client': 'SpiceBasicInfo' },
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# @SPICE_MIGRATE_COMPLETED:
+#
+# Emitted when SPICE migration has completed
+#
+# Since: 1.3
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+#      "event": "SPICE_MIGRATE_COMPLETED" }
+#
+##
+{ 'event': 'SPICE_MIGRATE_COMPLETED',
+  'if': 'defined(CONFIG_SPICE)' }
+
+##
+# == VNC
+##
+
+##
+# @VncBasicInfo:
+#
+# The basic information for vnc network connection
+#
+# @host: IP address
+#
+# @service: The service name of the vnc port. This may depend on the host
+#           system's service database so symbolic names should not be relied
+#           on.
+#
+# @family: address family
+#
+# @websocket: true in case the socket is a websocket (since 2.3).
+#
+# Since: 2.1
+##
+{ 'struct': 'VncBasicInfo',
+  'data': { 'host': 'str',
+            'service': 'str',
+            'family': 'NetworkAddressFamily',
+            'websocket': 'bool' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncServerInfo:
+#
+# The network connection information for server
+#
+# @auth: authentication method used for
+#        the plain (non-websocket) VNC server
+#
+# Since: 2.1
+##
+{ 'struct': 'VncServerInfo',
+  'base': 'VncBasicInfo',
+  'data': { '*auth': 'str' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncClientInfo:
+#
+# Information about a connected VNC client.
+#
+# @x509_dname: If x509 authentication is in use, the Distinguished
+#              Name of the client.
+#
+# @sasl_username: If SASL authentication is in use, the SASL username
+#                 used for authentication.
+#
+# Since: 0.14.0
+##
+{ 'struct': 'VncClientInfo',
+  'base': 'VncBasicInfo',
+  'data': { '*x509_dname': 'str', '*sasl_username': 'str' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncInfo:
+#
+# Information about the VNC session.
+#
+# @enabled: true if the VNC server is enabled, false otherwise
+#
+# @host: The hostname the VNC server is bound to.  This depends on
+#        the name resolution on the host and may be an IP address.
+#
+# @family: - 'ipv6' if the host is listening for IPv6 connections
+#          - 'ipv4' if the host is listening for IPv4 connections
+#          - 'unix' if the host is listening on a unix domain socket
+#          - 'unknown' otherwise
+#
+# @service: The service name of the server's port.  This may depends
+#           on the host system's service database so symbolic names should not
+#           be relied on.
+#
+# @auth: the current authentication type used by the server
+#
+#        - 'none' if no authentication is being used
+#        - 'vnc' if VNC authentication is being used
+#        - 'vencrypt+plain' if VEncrypt is used with plain text authentication
+#        - 'vencrypt+tls+none' if VEncrypt is used with TLS and no authentication
+#        - 'vencrypt+tls+vnc' if VEncrypt is used with TLS and VNC authentication
+#        - 'vencrypt+tls+plain' if VEncrypt is used with TLS and plain text auth
+#        - 'vencrypt+x509+none' if VEncrypt is used with x509 and no auth
+#        - 'vencrypt+x509+vnc' if VEncrypt is used with x509 and VNC auth
+#        - 'vencrypt+x509+plain' if VEncrypt is used with x509 and plain text auth
+#        - 'vencrypt+tls+sasl' if VEncrypt is used with TLS and SASL auth
+#        - 'vencrypt+x509+sasl' if VEncrypt is used with x509 and SASL auth
+#
+# @clients: a list of @VncClientInfo of all currently connected clients
+#
+# Since: 0.14.0
+##
+{ 'struct': 'VncInfo',
+  'data': {'enabled': 'bool', '*host': 'str',
+           '*family': 'NetworkAddressFamily',
+           '*service': 'str', '*auth': 'str', '*clients': ['VncClientInfo']},
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncPrimaryAuth:
+#
+# vnc primary authentication method.
+#
+# Since: 2.3
+##
+{ 'enum': 'VncPrimaryAuth',
+  'data': [ 'none', 'vnc', 'ra2', 'ra2ne', 'tight', 'ultra',
+            'tls', 'vencrypt', 'sasl' ],
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncVencryptSubAuth:
+#
+# vnc sub authentication method with vencrypt.
+#
+# Since: 2.3
+##
+{ 'enum': 'VncVencryptSubAuth',
+  'data': [ 'plain',
+            'tls-none',  'x509-none',
+            'tls-vnc',   'x509-vnc',
+            'tls-plain', 'x509-plain',
+            'tls-sasl',  'x509-sasl' ],
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncServerInfo2:
+#
+# The network connection information for server
+#
+# @auth: The current authentication type used by the servers
+#
+# @vencrypt: The vencrypt sub authentication type used by the
+#            servers, only specified in case auth == vencrypt.
+#
+# Since: 2.9
+##
+{ 'struct': 'VncServerInfo2',
+  'base': 'VncBasicInfo',
+  'data': { 'auth'      : 'VncPrimaryAuth',
+            '*vencrypt' : 'VncVencryptSubAuth' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VncInfo2:
+#
+# Information about a vnc server
+#
+# @id: vnc server name.
+#
+# @server: A list of @VncBasincInfo describing all listening sockets.
+#          The list can be empty (in case the vnc server is disabled).
+#          It also may have multiple entries: normal + websocket,
+#          possibly also ipv4 + ipv6 in the future.
+#
+# @clients: A list of @VncClientInfo of all currently connected clients.
+#           The list can be empty, for obvious reasons.
+#
+# @auth: The current authentication type used by the non-websockets servers
+#
+# @vencrypt: The vencrypt authentication type used by the servers,
+#            only specified in case auth == vencrypt.
+#
+# @display: The display device the vnc server is linked to.
+#
+# Since: 2.3
+##
+{ 'struct': 'VncInfo2',
+  'data': { 'id'        : 'str',
+            'server'    : ['VncServerInfo2'],
+            'clients'   : ['VncClientInfo'],
+            'auth'      : 'VncPrimaryAuth',
+            '*vencrypt' : 'VncVencryptSubAuth',
+            '*display'  : 'str' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @query-vnc:
+#
+# Returns information about the current VNC server
+#
+# Returns: @VncInfo
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-vnc" }
+# <- { "return": {
+#          "enabled":true,
+#          "host":"0.0.0.0",
+#          "service":"50402",
+#          "auth":"vnc",
+#          "family":"ipv4",
+#          "clients":[
+#             {
+#                "host":"127.0.0.1",
+#                "service":"50401",
+#                "family":"ipv4"
+#             }
+#          ]
+#       }
+#    }
+#
+##
+{ 'command': 'query-vnc', 'returns': 'VncInfo',
+  'if': 'defined(CONFIG_VNC)' }
+##
+# @query-vnc-servers:
+#
+# Returns a list of vnc servers.  The list can be empty.
+#
+# Returns: a list of @VncInfo2
+#
+# Since: 2.3
+##
+{ 'command': 'query-vnc-servers', 'returns': ['VncInfo2'],
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @change-vnc-password:
+#
+# Change the VNC server password.
+#
+# @password: the new password to use with VNC authentication
+#
+# Since: 1.1
+#
+# Notes: An empty password in this command will set the password to the empty
+#        string.  Existing clients are unaffected by executing this command.
+##
+{ 'command': 'change-vnc-password',
+  'data': { 'password': 'str' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VNC_CONNECTED:
+#
+# Emitted when a VNC client establishes a connection
+#
+# @server: server information
+#
+# @client: client information
+#
+# Note: This event is emitted before any authentication takes place, thus
+#       the authentication ID is not provided
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <- { "event": "VNC_CONNECTED",
+#      "data": {
+#            "server": { "auth": "sasl", "family": "ipv4",
+#                        "service": "5901", "host": "0.0.0.0" },
+#            "client": { "family": "ipv4", "service": "58425",
+#                        "host": "127.0.0.1" } },
+#      "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+#
+##
+{ 'event': 'VNC_CONNECTED',
+  'data': { 'server': 'VncServerInfo',
+            'client': 'VncBasicInfo' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VNC_INITIALIZED:
+#
+# Emitted after authentication takes place (if any) and the VNC session is
+# made active
+#
+# @server: server information
+#
+# @client: client information
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <-  { "event": "VNC_INITIALIZED",
+#       "data": {
+#            "server": { "auth": "sasl", "family": "ipv4",
+#                        "service": "5901", "host": "0.0.0.0"},
+#            "client": { "family": "ipv4", "service": "46089",
+#                        "host": "127.0.0.1", "sasl_username": "luiz" } },
+#       "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
+#
+##
+{ 'event': 'VNC_INITIALIZED',
+  'data': { 'server': 'VncServerInfo',
+            'client': 'VncClientInfo' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# @VNC_DISCONNECTED:
+#
+# Emitted when the connection is closed
+#
+# @server: server information
+#
+# @client: client information
+#
+# Since: 0.13.0
+#
+# Example:
+#
+# <- { "event": "VNC_DISCONNECTED",
+#      "data": {
+#            "server": { "auth": "sasl", "family": "ipv4",
+#                        "service": "5901", "host": "0.0.0.0" },
+#            "client": { "family": "ipv4", "service": "58425",
+#                        "host": "127.0.0.1", "sasl_username": "luiz" } },
+#      "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+#
+##
+{ 'event': 'VNC_DISCONNECTED',
+  'data': { 'server': 'VncServerInfo',
+            'client': 'VncClientInfo' },
+  'if': 'defined(CONFIG_VNC)' }
+
+##
+# = Input
+##
+
+##
+# @MouseInfo:
+#
+# Information about a mouse device.
+#
+# @name: the name of the mouse device
+#
+# @index: the index of the mouse device
+#
+# @current: true if this device is currently receiving mouse events
+#
+# @absolute: true if this device supports absolute coordinates as input
+#
+# Since: 0.14.0
+##
+{ 'struct': 'MouseInfo',
+  'data': {'name': 'str', 'index': 'int', 'current': 'bool',
+           'absolute': 'bool'} }
+
+##
+# @query-mice:
+#
+# Returns information about each active mouse device
+#
+# Returns: a list of @MouseInfo for each device
+#
+# Since: 0.14.0
+#
+# Example:
+#
+# -> { "execute": "query-mice" }
+# <- { "return": [
+#          {
+#             "name":"QEMU Microsoft Mouse",
+#             "index":0,
+#             "current":false,
+#             "absolute":false
+#          },
+#          {
+#             "name":"QEMU PS/2 Mouse",
+#             "index":1,
+#             "current":true,
+#             "absolute":true
+#          }
+#       ]
+#    }
+#
+##
+{ 'command': 'query-mice', 'returns': ['MouseInfo'] }
+
+##
+# @QKeyCode:
+#
+# An enumeration of key name.
+#
+# This is used by the @send-key command.
+#
+# @unmapped: since 2.0
+# @pause: since 2.0
+# @ro: since 2.4
+# @kp_comma: since 2.4
+# @kp_equals: since 2.6
+# @power: since 2.6
+# @hiragana: since 2.9
+# @henkan: since 2.9
+# @yen: since 2.9
+#
+# @sleep: since 2.10
+# @wake: since 2.10
+# @audionext: since 2.10
+# @audioprev: since 2.10
+# @audiostop: since 2.10
+# @audioplay: since 2.10
+# @audiomute: since 2.10
+# @volumeup: since 2.10
+# @volumedown: since 2.10
+# @mediaselect: since 2.10
+# @mail: since 2.10
+# @calculator: since 2.10
+# @computer: since 2.10
+# @ac_home: since 2.10
+# @ac_back: since 2.10
+# @ac_forward: since 2.10
+# @ac_refresh: since 2.10
+# @ac_bookmarks: since 2.10
+#
+# @muhenkan: since 2.12
+# @katakanahiragana: since 2.12
+#
+# 'sysrq' was mistakenly added to hack around the fact that
+# the ps2 driver was not generating correct scancodes sequences
+# when 'alt+print' was pressed. This flaw is now fixed and the
+# 'sysrq' key serves no further purpose. Any further use of
+# 'sysrq' will be transparently changed to 'print', so they
+# are effectively synonyms.
+#
+# Since: 1.3.0
+#
+##
+{ 'enum': 'QKeyCode',
+  'data': [ 'unmapped',
+            'shift', 'shift_r', 'alt', 'alt_r', 'ctrl',
+            'ctrl_r', 'menu', 'esc', '1', '2', '3', '4', '5', '6', '7', '8',
+            '9', '0', 'minus', 'equal', 'backspace', 'tab', 'q', 'w', 'e',
+            'r', 't', 'y', 'u', 'i', 'o', 'p', 'bracket_left', 'bracket_right',
+            'ret', 'a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'semicolon',
+            'apostrophe', 'grave_accent', 'backslash', 'z', 'x', 'c', 'v', 'b',
+            'n', 'm', 'comma', 'dot', 'slash', 'asterisk', 'spc', 'caps_lock',
+            'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10',
+            'num_lock', 'scroll_lock', 'kp_divide', 'kp_multiply',
+            'kp_subtract', 'kp_add', 'kp_enter', 'kp_decimal', 'sysrq', 'kp_0',
+            'kp_1', 'kp_2', 'kp_3', 'kp_4', 'kp_5', 'kp_6', 'kp_7', 'kp_8',
+            'kp_9', 'less', 'f11', 'f12', 'print', 'home', 'pgup', 'pgdn', 'end',
+            'left', 'up', 'down', 'right', 'insert', 'delete', 'stop', 'again',
+            'props', 'undo', 'front', 'copy', 'open', 'paste', 'find', 'cut',
+            'lf', 'help', 'meta_l', 'meta_r', 'compose', 'pause',
+            'ro', 'hiragana', 'henkan', 'yen', 'muhenkan', 'katakanahiragana',
+            'kp_comma', 'kp_equals', 'power', 'sleep', 'wake',
+            'audionext', 'audioprev', 'audiostop', 'audioplay', 'audiomute',
+            'volumeup', 'volumedown', 'mediaselect',
+            'mail', 'calculator', 'computer',
+            'ac_home', 'ac_back', 'ac_forward', 'ac_refresh', 'ac_bookmarks' ] }
+
+##
+# @KeyValue:
+#
+# Represents a keyboard key.
+#
+# Since: 1.3.0
+##
+{ 'union': 'KeyValue',
+  'data': {
+    'number': 'int',
+    'qcode': 'QKeyCode' } }
+
+##
+# @send-key:
+#
+# Send keys to guest.
+#
+# @keys: An array of @KeyValue elements. All @KeyValues in this array are
+#        simultaneously sent to the guest. A @KeyValue.number value is sent
+#        directly to the guest, while @KeyValue.qcode must be a valid
+#        @QKeyCode value
+#
+# @hold-time: time to delay key up events, milliseconds. Defaults
+#             to 100
+#
+# Returns: - Nothing on success
+#          - If key is unknown or redundant, InvalidParameter
+#
+# Since: 1.3.0
+#
+# Example:
+#
+# -> { "execute": "send-key",
+#      "arguments": { "keys": [ { "type": "qcode", "data": "ctrl" },
+#                               { "type": "qcode", "data": "alt" },
+#                               { "type": "qcode", "data": "delete" } ] } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'send-key',
+  'data': { 'keys': ['KeyValue'], '*hold-time': 'int' } }
+
+##
+# @InputButton:
+#
+# Button of a pointer input device (mouse, tablet).
+#
+# @side: front side button of a 5-button mouse (since 2.9)
+#
+# @extra: rear side button of a 5-button mouse (since 2.9)
+#
+# Since: 2.0
+##
+{ 'enum'  : 'InputButton',
+  'data'  : [ 'left', 'middle', 'right', 'wheel-up', 'wheel-down', 'side',
+  'extra' ] }
+
+##
+# @InputAxis:
+#
+# Position axis of a pointer input device (mouse, tablet).
+#
+# Since: 2.0
+##
+{ 'enum'  : 'InputAxis',
+  'data'  : [ 'x', 'y' ] }
+
+##
+# @InputKeyEvent:
+#
+# Keyboard input event.
+#
+# @key:    Which key this event is for.
+# @down:   True for key-down and false for key-up events.
+#
+# Since: 2.0
+##
+{ 'struct'  : 'InputKeyEvent',
+  'data'  : { 'key'     : 'KeyValue',
+              'down'    : 'bool' } }
+
+##
+# @InputBtnEvent:
+#
+# Pointer button input event.
+#
+# @button: Which button this event is for.
+# @down:   True for key-down and false for key-up events.
+#
+# Since: 2.0
+##
+{ 'struct'  : 'InputBtnEvent',
+  'data'  : { 'button'  : 'InputButton',
+              'down'    : 'bool' } }
+
+##
+# @InputMoveEvent:
+#
+# Pointer motion input event.
+#
+# @axis: Which axis is referenced by @value.
+# @value: Pointer position.  For absolute coordinates the
+#         valid range is 0 -> 0x7ffff
+#
+# Since: 2.0
+##
+{ 'struct'  : 'InputMoveEvent',
+  'data'  : { 'axis'    : 'InputAxis',
+              'value'   : 'int' } }
+
+##
+# @InputEvent:
+#
+# Input event union.
+#
+# @type: the input type, one of:
+#
+#        - 'key': Input event of Keyboard
+#        - 'btn': Input event of pointer buttons
+#        - 'rel': Input event of relative pointer motion
+#        - 'abs': Input event of absolute pointer motion
+#
+# Since: 2.0
+##
+{ 'union' : 'InputEvent',
+  'data'  : { 'key'     : 'InputKeyEvent',
+              'btn'     : 'InputBtnEvent',
+              'rel'     : 'InputMoveEvent',
+              'abs'     : 'InputMoveEvent' } }
+
+##
+# @input-send-event:
+#
+# Send input event(s) to guest.
+#
+# The @device and @head parameters can be used to send the input event
+# to specific input devices in case (a) multiple input devices of the
+# same kind are added to the virtual machine and (b) you have
+# configured input routing (see docs/multiseat.txt) for those input
+# devices.  The parameters work exactly like the device and head
+# properties of input devices.  If @device is missing, only devices
+# that have no input routing config are admissible.  If @device is
+# specified, both input devices with and without input routing config
+# are admissible, but devices with input routing config take
+# precedence.
+#
+# @device: display device to send event(s) to.
+# @head: head to send event(s) to, in case the
+#        display device supports multiple scanouts.
+# @events: List of InputEvent union.
+#
+# Returns: Nothing on success.
+#
+# Since: 2.6
+#
+# Note: The consoles are visible in the qom tree, under
+#       /backend/console[$index]. They have a device link and head property,
+#       so it is possible to map which console belongs to which device and
+#       display.
+#
+# Example:
+#
+# 1. Press left mouse button.
+#
+# -> { "execute": "input-send-event",
+#     "arguments": { "device": "video0",
+#                    "events": [ { "type": "btn",
+#                    "data" : { "down": true, "button": "left" } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "input-send-event",
+#     "arguments": { "device": "video0",
+#                    "events": [ { "type": "btn",
+#                    "data" : { "down": false, "button": "left" } } ] } }
+# <- { "return": {} }
+#
+# 2. Press ctrl-alt-del.
+#
+# -> { "execute": "input-send-event",
+#      "arguments": { "events": [
+#         { "type": "key", "data" : { "down": true,
+#           "key": {"type": "qcode", "data": "ctrl" } } },
+#         { "type": "key", "data" : { "down": true,
+#           "key": {"type": "qcode", "data": "alt" } } },
+#         { "type": "key", "data" : { "down": true,
+#           "key": {"type": "qcode", "data": "delete" } } } ] } }
+# <- { "return": {} }
+#
+# 3. Move mouse pointer to absolute coordinates (20000, 400).
+#
+# -> { "execute": "input-send-event" ,
+#   "arguments": { "events": [
+#                { "type": "abs", "data" : { "axis": "x", "value" : 20000 } },
+#                { "type": "abs", "data" : { "axis": "y", "value" : 400 } } ] } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'input-send-event',
+  'data': { '*device': 'str',
+            '*head'  : 'int',
+            'events' : [ 'InputEvent' ] } }
+
+##
+# @GrabToggleKeys:
+#
+# Keys to toggle input-linux between host and guest.
+#
+# Since: 4.0
+#
+##
+{ 'enum': 'GrabToggleKeys',
+  'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
+            'ctrl-scrolllock' ] }
+
+##
+# @DisplayGTK:
+#
+# GTK display options.
+#
+# @grab-on-hover: Grab keyboard input on mouse hover.
+# @zoom-to-fit: Zoom guest display to fit into the host window.  When
+#               turned off the host window will be resized instead.
+#               In case the display device can notify the guest on
+#               window resizes (virtio-gpu) this will default to "on",
+#               assuming the guest will resize the display to match
+#               the window size then.  Otherwise it defaults to "off".
+#               Since 3.1
+#
+# Since: 2.12
+#
+##
+{ 'struct'  : 'DisplayGTK',
+  'data'    : { '*grab-on-hover' : 'bool',
+                '*zoom-to-fit'   : 'bool'  } }
+
+##
+# @DisplayEGLHeadless:
+#
+# EGL headless display options.
+#
+# @rendernode: Which DRM render node should be used. Default is the first
+#              available node on the host.
+#
+# Since: 3.1
+#
+##
+{ 'struct'  : 'DisplayEGLHeadless',
+  'data'    : { '*rendernode' : 'str' } }
+
+ ##
+ # @DisplayGLMode:
+ #
+ # Display OpenGL mode.
+ #
+ # @off: Disable OpenGL (default).
+ # @on: Use OpenGL, pick context type automatically.
+ #      Would better be named 'auto' but is called 'on' for backward
+ #      compatibility with bool type.
+ # @core: Use OpenGL with Core (desktop) Context.
+ # @es: Use OpenGL with ES (embedded systems) Context.
+ #
+ # Since: 3.0
+ #
+ ##
+{ 'enum'    : 'DisplayGLMode',
+  'data'    : [ 'off', 'on', 'core', 'es' ] }
+
+##
+# @DisplayCurses:
+#
+# Curses display options.
+#
+# @charset:       Font charset used by guest (default: CP437).
+#
+# Since: 4.0
+#
+##
+{ 'struct'  : 'DisplayCurses',
+  'data'    : { '*charset'       : 'str' } }
+
+##
+# @DisplayType:
+#
+# Display (user interface) type.
+#
+# @default: The default user interface, selecting from the first available
+#           of gtk, sdl, cocoa, and vnc.
+#
+# @none: No user interface or video output display. The guest will
+#        still see an emulated graphics card, but its output will not
+#        be displayed to the QEMU user.
+#
+# @gtk: The GTK user interface.
+#
+# @sdl: The SDL user interface.
+#
+# @egl-headless: No user interface, offload GL operations to a local
+#                DRI device. Graphical display need to be paired with
+#                VNC or Spice. (Since 3.1)
+#
+# @curses: Display video output via curses.  For graphics device
+#          models which support a text mode, QEMU can display this
+#          output using a curses/ncurses interface. Nothing is
+#          displayed when the graphics device is in graphical mode or
+#          if the graphics device does not support a text
+#          mode. Generally only the VGA device models support text
+#          mode.
+#
+# @cocoa: The Cocoa user interface.
+#
+# @spice-app: Set up a Spice server and run the default associated
+#             application to connect to it. The server will redirect
+#             the serial console and QEMU monitors. (Since 4.0)
+#
+# Since: 2.12
+#
+##
+{ 'enum'    : 'DisplayType',
+  'data'    : [ 'default', 'none', 'gtk', 'sdl',
+                'egl-headless', 'curses', 'cocoa',
+                'spice-app'] }
+
+##
+# @DisplayOptions:
+#
+# Display (user interface) options.
+#
+# @type:          Which DisplayType qemu should use.
+# @full-screen:   Start user interface in fullscreen mode (default: off).
+# @window-close:  Allow to quit qemu with window close button (default: on).
+# @show-cursor:   Force showing the mouse cursor (default: off).
+#                 (since: 5.0)
+# @gl:            Enable OpenGL support (default: off).
+#
+# Since: 2.12
+#
+##
+{ 'union'   : 'DisplayOptions',
+  'base'    : { 'type'           : 'DisplayType',
+                '*full-screen'   : 'bool',
+                '*window-close'  : 'bool',
+                '*show-cursor'   : 'bool',
+                '*gl'            : 'DisplayGLMode' },
+  'discriminator' : 'type',
+  'data'    : { 'gtk'            : 'DisplayGTK',
+                'curses'         : 'DisplayCurses',
+                'egl-headless'   : 'DisplayEGLHeadless'} }
+
+##
+# @query-display-options:
+#
+# Returns information about display configuration
+#
+# Returns: @DisplayOptions
+#
+# Since: 3.1
+#
+##
+{ 'command': 'query-display-options',
+  'returns': 'DisplayOptions' }
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
new file mode 100644
index 000000000..b3620f29e
--- /dev/null
+++ b/qemu-img-cmds.hx
@@ -0,0 +1,100 @@
+HXCOMM Keep the list of subcommands sorted by name.
+HXCOMM Use DEFHEADING() to define headings in both help text and rST
+HXCOMM Text between SRST and ERST are copied to rST version and
+HXCOMM discarded from C version
+HXCOMM DEF(command, callback, arg_string) is used to construct
+HXCOMM command structures and help message.
+HXCOMM HXCOMM can be used for comments, discarded from both rST and C
+
+HXCOMM When amending the rST sections, please remember to copy the usage
+HXCOMM over to the per-command sections in docs/tools/qemu-img.rst.
+
+DEF("amend", img_amend,
+    "amend [--object objectdef] [--image-opts] [-p] [-q] [-f fmt] [-t cache] [--force] -o options filename")
+SRST
+.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t CACHE] [--force] -o OPTIONS FILENAME
+ERST
+
+DEF("bench", img_bench,
+    "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-i aio] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename")
+SRST
+.. option:: bench [-c COUNT] [-d DEPTH] [-f FMT] [--flush-interval=FLUSH_INTERVAL] [-i AIO] [-n] [--no-drain] [-o OFFSET] [--pattern=PATTERN] [-q] [-s BUFFER_SIZE] [-S STEP_SIZE] [-t CACHE] [-w] [-U] FILENAME
+ERST
+
+DEF("bitmap", img_bitmap,
+    "bitmap (--merge SOURCE | --add | --remove | --clear | --enable | --disable)... [-b source_file [-F source_fmt]] [-g granularity] [--object objectdef] [--image-opts | -f fmt] filename bitmap")
+SRST
+.. option:: bitmap (--merge SOURCE | --add | --remove | --clear | --enable | --disable)... [-b SOURCE_FILE [-F SOURCE_FMT]] [-g GRANULARITY] [--object OBJECTDEF] [--image-opts | -f FMT] FILENAME BITMAP
+ERST
+
+DEF("check", img_check,
+    "check [--object objectdef] [--image-opts] [-q] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename")
+SRST
+.. option:: check [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [--output=OFMT] [-r [leaks | all]] [-T SRC_CACHE] [-U] FILENAME
+ERST
+
+DEF("commit", img_commit,
+    "commit [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-b base] [-r rate_limit] [-d] [-p] filename")
+SRST
+.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-r RATE_LIMIT] [-d] [-p] FILENAME
+ERST
+
+DEF("compare", img_compare,
+    "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] filename1 filename2")
+SRST
+.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2
+ERST
+
+DEF("convert", img_convert,
+    "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
+SRST
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
+ERST
+
+DEF("create", img_create,
+    "create [--object objectdef] [-q] [-f fmt] [-b backing_file] [-F backing_fmt] [-u] [-o options] filename [size]")
+SRST
+.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]
+ERST
+
+DEF("dd", img_dd,
+    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output")
+SRST
+.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT
+ERST
+
+DEF("info", img_info,
+    "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] [-U] filename")
+SRST
+.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME
+ERST
+
+DEF("map", img_map,
+    "map [--object objectdef] [--image-opts] [-f fmt] [--start-offset=offset] [--max-length=len] [--output=ofmt] [-U] filename")
+SRST
+.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] [--start-offset=OFFSET] [--max-length=LEN] [--output=OFMT] [-U] FILENAME
+ERST
+
+DEF("measure", img_measure,
+"measure [--output=ofmt] [-O output_fmt] [-o options] [--size N | [--object objectdef] [--image-opts] [-f fmt] [-l snapshot_param] filename]")
+SRST
+.. option:: measure [--output=OFMT] [-O OUTPUT_FMT] [-o OPTIONS] [--size N | [--object OBJECTDEF] [--image-opts] [-f FMT] [-l SNAPSHOT_PARAM] FILENAME]
+ERST
+
+DEF("snapshot", img_snapshot,
+    "snapshot [--object objectdef] [--image-opts] [-U] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
+SRST
+.. option:: snapshot [--object OBJECTDEF] [--image-opts] [-U] [-q] [-l | -a SNAPSHOT | -c SNAPSHOT | -d SNAPSHOT] FILENAME
+ERST
+
+DEF("rebase", img_rebase,
+    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+SRST
+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
+ERST
+
+DEF("resize", img_resize,
+    "resize [--object objectdef] [--image-opts] [-f fmt] [--preallocation=prealloc] [-q] [--shrink] filename [+ | -]size")
+SRST
+.. option:: resize [--object OBJECTDEF] [--image-opts] [-f FMT] [--preallocation=PREALLOC] [-q] [--shrink] FILENAME [+ | -]SIZE
+ERST
diff --git a/qemu-img.c b/qemu-img.c
new file mode 100644
index 000000000..8bdea40b5
--- /dev/null
+++ b/qemu-img.c
@@ -0,0 +1,5548 @@
+/*
+ * QEMU disk image utility
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include "qemu-common.h"
+#include "qemu-version.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-block-core.h"
+#include "qapi/qapi-visit-block-core.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/sockets.h"
+#include "qemu/units.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/qapi.h"
+#include "crypto/init.h"
+#include "trace/control.h"
+#include "qemu/throttle.h"
+#include "block/throttle-groups.h"
+
+#define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
+                          "\n" QEMU_COPYRIGHT "\n"
+
+typedef struct img_cmd_t {
+    const char *name;
+    int (*handler)(int argc, char **argv);
+} img_cmd_t;
+
+enum {
+    OPTION_OUTPUT = 256,
+    OPTION_BACKING_CHAIN = 257,
+    OPTION_OBJECT = 258,
+    OPTION_IMAGE_OPTS = 259,
+    OPTION_PATTERN = 260,
+    OPTION_FLUSH_INTERVAL = 261,
+    OPTION_NO_DRAIN = 262,
+    OPTION_TARGET_IMAGE_OPTS = 263,
+    OPTION_SIZE = 264,
+    OPTION_PREALLOCATION = 265,
+    OPTION_SHRINK = 266,
+    OPTION_SALVAGE = 267,
+    OPTION_TARGET_IS_ZERO = 268,
+    OPTION_ADD = 269,
+    OPTION_REMOVE = 270,
+    OPTION_CLEAR = 271,
+    OPTION_ENABLE = 272,
+    OPTION_DISABLE = 273,
+    OPTION_MERGE = 274,
+    OPTION_BITMAPS = 275,
+    OPTION_FORCE = 276,
+};
+
+typedef enum OutputFormat {
+    OFORMAT_JSON,
+    OFORMAT_HUMAN,
+} OutputFormat;
+
+/* Default to cache=writeback as data integrity is not important for qemu-img */
+#define BDRV_DEFAULT_CACHE "writeback"
+
+static void format_print(void *opaque, const char *name)
+{
+    printf(" %s", name);
+}
+
+static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    error_printf("Try 'qemu-img --help' for more information\n");
+    exit(EXIT_FAILURE);
+}
+
+static void QEMU_NORETURN missing_argument(const char *option)
+{
+    error_exit("missing argument for option '%s'", option);
+}
+
+static void QEMU_NORETURN unrecognized_option(const char *option)
+{
+    error_exit("unrecognized option '%s'", option);
+}
+
+/* Please keep in synch with docs/tools/qemu-img.rst */
+static void QEMU_NORETURN help(void)
+{
+    const char *help_msg =
+           QEMU_IMG_VERSION
+           "usage: qemu-img [standard options] command [command options]\n"
+           "QEMU disk image utility\n"
+           "\n"
+           "    '-h', '--help'       display this help and exit\n"
+           "    '-V', '--version'    output version information and exit\n"
+           "    '-T', '--trace'      [[enable=]][,events=][,file=]\n"
+           "                         specify tracing options\n"
+           "\n"
+           "Command syntax:\n"
+#define DEF(option, callback, arg_string)        \
+           "  " arg_string "\n"
+#include "qemu-img-cmds.h"
+#undef DEF
+           "\n"
+           "Command parameters:\n"
+           "  'filename' is a disk image filename\n"
+           "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
+           "    manual page for a description of the object properties. The most common\n"
+           "    object type is a 'secret', which is used to supply passwords and/or\n"
+           "    encryption keys.\n"
+           "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
+           "  'cache' is the cache mode used to write the output disk image, the valid\n"
+           "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
+           "    'directsync' and 'unsafe' (default for convert)\n"
+           "  'src_cache' is the cache mode used to read input disk images, the valid\n"
+           "    options are the same as for the 'cache' option\n"
+           "  'size' is the disk image size in bytes. Optional suffixes\n"
+           "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
+           "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
+           "    supported. 'b' is ignored.\n"
+           "  'output_filename' is the destination disk image filename\n"
+           "  'output_fmt' is the destination format\n"
+           "  'options' is a comma separated list of format specific options in a\n"
+           "    name=value format. Use -o ? for an overview of the options supported by the\n"
+           "    used format\n"
+           "  'snapshot_param' is param used for internal snapshot, format\n"
+           "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
+           "    '[ID_OR_NAME]'\n"
+           "  '-c' indicates that target image must be compressed (qcow format only)\n"
+           "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
+           "       new backing file match exactly. The image doesn't need a working\n"
+           "       backing file before rebasing in this case (useful for renaming the\n"
+           "       backing file). For image creation, allow creating without attempting\n"
+           "       to open the backing file.\n"
+           "  '-h' with or without a command shows this help and lists the supported formats\n"
+           "  '-p' show progress of command (only certain commands)\n"
+           "  '-q' use Quiet mode - do not print any output (except errors)\n"
+           "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
+           "       contain only zeros for qemu-img to create a sparse image during\n"
+           "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
+           "       unallocated or zero sectors, and the destination image will always be\n"
+           "       fully allocated\n"
+           "  '--output' takes the format in which the output must be done (human or json)\n"
+           "  '-n' skips the target volume creation (useful if the volume is created\n"
+           "       prior to running qemu-img)\n"
+           "\n"
+           "Parameters to bitmap subcommand:\n"
+           "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
+           "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
+           "       or '--merge source'\n"
+           "  '-g granularity' sets the granularity for '--add' actions\n"
+           "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
+           "       bitmaps from an alternative file\n"
+           "\n"
+           "Parameters to check subcommand:\n"
+           "  '-r' tries to repair any inconsistencies that are found during the check.\n"
+           "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
+           "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
+           "       hiding corruption that has already occurred.\n"
+           "\n"
+           "Parameters to convert subcommand:\n"
+           "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
+           "  '-m' specifies how many coroutines work in parallel during the convert\n"
+           "       process (defaults to 8)\n"
+           "  '-W' allow to write to the target out of order rather than sequential\n"
+           "\n"
+           "Parameters to snapshot subcommand:\n"
+           "  'snapshot' is the name of the snapshot to create, apply or delete\n"
+           "  '-a' applies a snapshot (revert disk to saved state)\n"
+           "  '-c' creates a snapshot\n"
+           "  '-d' deletes a snapshot\n"
+           "  '-l' lists all snapshots in the given image\n"
+           "\n"
+           "Parameters to compare subcommand:\n"
+           "  '-f' first image format\n"
+           "  '-F' second image format\n"
+           "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
+           "\n"
+           "Parameters to dd subcommand:\n"
+           "  'bs=BYTES' read and write up to BYTES bytes at a time "
+           "(default: 512)\n"
+           "  'count=N' copy only N input blocks\n"
+           "  'if=FILE' read from FILE\n"
+           "  'of=FILE' write to FILE\n"
+           "  'skip=N' skip N bs-sized blocks at the start of input\n";
+
+    printf("%s\nSupported formats:", help_msg);
+    bdrv_iterate_format(format_print, NULL, false);
+    printf("\n\n" QEMU_HELP_BOTTOM "\n");
+    exit(EXIT_SUCCESS);
+}
+
+static QemuOptsList qemu_object_opts = {
+    .name = "object",
+    .implied_opt_name = "qom-type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+    .desc = {
+        { }
+    },
+};
+
+static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
+{
+    if (user_creatable_print_help(type, opts)) {
+        exit(0);
+    }
+    return true;
+}
+
+/*
+ * Is @optarg safe for accumulate_options()?
+ * It is when multiple of them can be joined together separated by ','.
+ * To make that work, @optarg must not start with ',' (or else a
+ * separating ',' preceding it gets escaped), and it must not end with
+ * an odd number of ',' (or else a separating ',' following it gets
+ * escaped), or be empty (or else a separating ',' preceding it can
+ * escape a separating ',' following it).
+ * 
+ */
+static bool is_valid_option_list(const char *optarg)
+{
+    size_t len = strlen(optarg);
+    size_t i;
+
+    if (!optarg[0] || optarg[0] == ',') {
+        return false;
+    }
+
+    for (i = len; i > 0 && optarg[i - 1] == ','; i--) {
+    }
+    if ((len - i) % 2) {
+        return false;
+    }
+
+    return true;
+}
+
+static int accumulate_options(char **options, char *optarg)
+{
+    char *new_options;
+
+    if (!is_valid_option_list(optarg)) {
+        error_report("Invalid option list: %s", optarg);
+        return -1;
+    }
+
+    if (!*options) {
+        *options = g_strdup(optarg);
+    } else {
+        new_options = g_strdup_printf("%s,%s", *options, optarg);
+        g_free(*options);
+        *options = new_options;
+    }
+    return 0;
+}
+
+static QemuOptsList qemu_source_opts = {
+    .name = "source",
+    .implied_opt_name = "file",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
+    .desc = {
+        { }
+    },
+};
+
+static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
+{
+    int ret = 0;
+    if (!quiet) {
+        va_list args;
+        va_start(args, fmt);
+        ret = vprintf(fmt, args);
+        va_end(args);
+    }
+    return ret;
+}
+
+
+static int print_block_option_help(const char *filename, const char *fmt)
+{
+    BlockDriver *drv, *proto_drv;
+    QemuOptsList *create_opts = NULL;
+    Error *local_err = NULL;
+
+    /* Find driver and parse its options */
+    drv = bdrv_find_format(fmt);
+    if (!drv) {
+        error_report("Unknown file format '%s'", fmt);
+        return 1;
+    }
+
+    if (!drv->create_opts) {
+        error_report("Format driver '%s' does not support image creation", fmt);
+        return 1;
+    }
+
+    create_opts = qemu_opts_append(create_opts, drv->create_opts);
+    if (filename) {
+        proto_drv = bdrv_find_protocol(filename, true, &local_err);
+        if (!proto_drv) {
+            error_report_err(local_err);
+            qemu_opts_free(create_opts);
+            return 1;
+        }
+        if (!proto_drv->create_opts) {
+            error_report("Protocol driver '%s' does not support image creation",
+                         proto_drv->format_name);
+            qemu_opts_free(create_opts);
+            return 1;
+        }
+        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
+    }
+
+    if (filename) {
+        printf("Supported options:\n");
+    } else {
+        printf("Supported %s options:\n", fmt);
+    }
+    qemu_opts_print_help(create_opts, false);
+    qemu_opts_free(create_opts);
+
+    if (!filename) {
+        printf("\n"
+               "The protocol level may support further options.\n"
+               "Specify the target filename to include those options.\n");
+    }
+
+    return 0;
+}
+
+
+static BlockBackend *img_open_opts(const char *optstr,
+                                   QemuOpts *opts, int flags, bool writethrough,
+                                   bool quiet, bool force_share)
+{
+    QDict *options;
+    Error *local_err = NULL;
+    BlockBackend *blk;
+    options = qemu_opts_to_qdict(opts, NULL);
+    if (force_share) {
+        if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
+            && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
+            error_report("--force-share/-U conflicts with image options");
+            qobject_unref(options);
+            return NULL;
+        }
+        qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
+    }
+    blk = blk_new_open(NULL, NULL, options, flags, &local_err);
+    if (!blk) {
+        error_reportf_err(local_err, "Could not open '%s': ", optstr);
+        return NULL;
+    }
+    blk_set_enable_write_cache(blk, !writethrough);
+
+    return blk;
+}
+
+static BlockBackend *img_open_file(const char *filename,
+                                   QDict *options,
+                                   const char *fmt, int flags,
+                                   bool writethrough, bool quiet,
+                                   bool force_share)
+{
+    BlockBackend *blk;
+    Error *local_err = NULL;
+
+    if (!options) {
+        options = qdict_new();
+    }
+    if (fmt) {
+        qdict_put_str(options, "driver", fmt);
+    }
+
+    if (force_share) {
+        qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
+    }
+    blk = blk_new_open(filename, NULL, options, flags, &local_err);
+    if (!blk) {
+        error_reportf_err(local_err, "Could not open '%s': ", filename);
+        return NULL;
+    }
+    blk_set_enable_write_cache(blk, !writethrough);
+
+    return blk;
+}
+
+
+static int img_add_key_secrets(void *opaque,
+                               const char *name, const char *value,
+                               Error **errp)
+{
+    QDict *options = opaque;
+
+    if (g_str_has_suffix(name, "key-secret")) {
+        qdict_put_str(options, name, value);
+    }
+
+    return 0;
+}
+
+
+static BlockBackend *img_open(bool image_opts,
+                              const char *filename,
+                              const char *fmt, int flags, bool writethrough,
+                              bool quiet, bool force_share)
+{
+    BlockBackend *blk;
+    if (image_opts) {
+        QemuOpts *opts;
+        if (fmt) {
+            error_report("--image-opts and --format are mutually exclusive");
+            return NULL;
+        }
+        opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
+                                       filename, true);
+        if (!opts) {
+            return NULL;
+        }
+        blk = img_open_opts(filename, opts, flags, writethrough, quiet,
+                            force_share);
+    } else {
+        blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
+                            force_share);
+    }
+    return blk;
+}
+
+
+static int add_old_style_options(const char *fmt, QemuOpts *opts,
+                                 const char *base_filename,
+                                 const char *base_fmt)
+{
+    if (base_filename) {
+        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
+                          NULL)) {
+            error_report("Backing file not supported for file format '%s'",
+                         fmt);
+            return -1;
+        }
+    }
+    if (base_fmt) {
+        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
+            error_report("Backing file format not supported for file "
+                         "format '%s'", fmt);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
+                           int64_t max)
+{
+    int err;
+    uint64_t res;
+
+    err = qemu_strtosz(value, NULL, &res);
+    if (err < 0 && err != -ERANGE) {
+        error_report("Invalid %s specified. You may use "
+                     "k, M, G, T, P or E suffixes for", name);
+        error_report("kilobytes, megabytes, gigabytes, terabytes, "
+                     "petabytes and exabytes.");
+        return err;
+    }
+    if (err == -ERANGE || res > max || res < min) {
+        error_report("Invalid %s specified. Must be between %" PRId64
+                     " and %" PRId64 ".", name, min, max);
+        return -ERANGE;
+    }
+    return res;
+}
+
+static int64_t cvtnum(const char *name, const char *value)
+{
+    return cvtnum_full(name, value, 0, INT64_MAX);
+}
+
+static int img_create(int argc, char **argv)
+{
+    int c;
+    uint64_t img_size = -1;
+    const char *fmt = "raw";
+    const char *base_fmt = NULL;
+    const char *filename;
+    const char *base_filename = NULL;
+    char *options = NULL;
+    Error *local_err = NULL;
+    bool quiet = false;
+    int flags = 0;
+
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":F:b:f:ho:qu",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'F':
+            base_fmt = optarg;
+            break;
+        case 'b':
+            base_filename = optarg;
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'o':
+            if (accumulate_options(&options, optarg) < 0) {
+                goto fail;
+            }
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 'u':
+            flags |= BDRV_O_NO_BACKING;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                goto fail;
+            }
+        }   break;
+        }
+    }
+
+    /* Get the filename */
+    filename = (optind < argc) ? argv[optind] : NULL;
+    if (options && has_help_option(options)) {
+        g_free(options);
+        return print_block_option_help(filename, fmt);
+    }
+
+    if (optind >= argc) {
+        error_exit("Expecting image file name");
+    }
+    optind++;
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        goto fail;
+    }
+
+    /* Get image size, if specified */
+    if (optind < argc) {
+        int64_t sval;
+
+        sval = cvtnum("image size", argv[optind++]);
+        if (sval < 0) {
+            goto fail;
+        }
+        img_size = (uint64_t)sval;
+    }
+    if (optind != argc) {
+        error_exit("Unexpected argument: %s", argv[optind]);
+    }
+
+    bdrv_img_create(filename, fmt, base_filename, base_fmt,
+                    options, img_size, flags, quiet, &local_err);
+    if (local_err) {
+        error_reportf_err(local_err, "%s: ", filename);
+        goto fail;
+    }
+
+    g_free(options);
+    return 0;
+
+fail:
+    g_free(options);
+    return 1;
+}
+
+static void dump_json_image_check(ImageCheck *check, bool quiet)
+{
+    QString *str;
+    QObject *obj;
+    Visitor *v = qobject_output_visitor_new(&obj);
+
+    visit_type_ImageCheck(v, NULL, &check, &error_abort);
+    visit_complete(v, &obj);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    qprintf(quiet, "%s\n", qstring_get_str(str));
+    qobject_unref(obj);
+    visit_free(v);
+    qobject_unref(str);
+}
+
+static void dump_human_image_check(ImageCheck *check, bool quiet)
+{
+    if (!(check->corruptions || check->leaks || check->check_errors)) {
+        qprintf(quiet, "No errors were found on the image.\n");
+    } else {
+        if (check->corruptions) {
+            qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
+                    "Data may be corrupted, or further writes to the image "
+                    "may corrupt it.\n",
+                    check->corruptions);
+        }
+
+        if (check->leaks) {
+            qprintf(quiet,
+                    "\n%" PRId64 " leaked clusters were found on the image.\n"
+                    "This means waste of disk space, but no harm to data.\n",
+                    check->leaks);
+        }
+
+        if (check->check_errors) {
+            qprintf(quiet,
+                    "\n%" PRId64
+                    " internal errors have occurred during the check.\n",
+                    check->check_errors);
+        }
+    }
+
+    if (check->total_clusters != 0 && check->allocated_clusters != 0) {
+        qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
+                "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
+                check->allocated_clusters, check->total_clusters,
+                check->allocated_clusters * 100.0 / check->total_clusters,
+                check->fragmented_clusters * 100.0 / check->allocated_clusters,
+                check->compressed_clusters * 100.0 /
+                check->allocated_clusters);
+    }
+
+    if (check->image_end_offset) {
+        qprintf(quiet,
+                "Image end offset: %" PRId64 "\n", check->image_end_offset);
+    }
+}
+
+static int collect_image_check(BlockDriverState *bs,
+                   ImageCheck *check,
+                   const char *filename,
+                   const char *fmt,
+                   int fix)
+{
+    int ret;
+    BdrvCheckResult result;
+
+    ret = bdrv_check(bs, &result, fix);
+    if (ret < 0) {
+        return ret;
+    }
+
+    check->filename                 = g_strdup(filename);
+    check->format                   = g_strdup(bdrv_get_format_name(bs));
+    check->check_errors             = result.check_errors;
+    check->corruptions              = result.corruptions;
+    check->has_corruptions          = result.corruptions != 0;
+    check->leaks                    = result.leaks;
+    check->has_leaks                = result.leaks != 0;
+    check->corruptions_fixed        = result.corruptions_fixed;
+    check->has_corruptions_fixed    = result.corruptions_fixed != 0;
+    check->leaks_fixed              = result.leaks_fixed;
+    check->has_leaks_fixed          = result.leaks_fixed != 0;
+    check->image_end_offset         = result.image_end_offset;
+    check->has_image_end_offset     = result.image_end_offset != 0;
+    check->total_clusters           = result.bfi.total_clusters;
+    check->has_total_clusters       = result.bfi.total_clusters != 0;
+    check->allocated_clusters       = result.bfi.allocated_clusters;
+    check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
+    check->fragmented_clusters      = result.bfi.fragmented_clusters;
+    check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
+    check->compressed_clusters      = result.bfi.compressed_clusters;
+    check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
+
+    return 0;
+}
+
+/*
+ * Checks an image for consistency. Exit codes:
+ *
+ *  0 - Check completed, image is good
+ *  1 - Check not completed because of internal errors
+ *  2 - Check completed, image is corrupted
+ *  3 - Check completed, image has leaked clusters, but is good otherwise
+ * 63 - Checks are not supported by the image format
+ */
+static int img_check(int argc, char **argv)
+{
+    int c, ret;
+    OutputFormat output_format = OFORMAT_HUMAN;
+    const char *filename, *fmt, *output, *cache;
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    int fix = 0;
+    int flags = BDRV_O_CHECK;
+    bool writethrough;
+    ImageCheck *check;
+    bool quiet = false;
+    bool image_opts = false;
+    bool force_share = false;
+
+    fmt = NULL;
+    output = NULL;
+    cache = BDRV_DEFAULT_CACHE;
+
+    for(;;) {
+        int option_index = 0;
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"format", required_argument, 0, 'f'},
+            {"repair", required_argument, 0, 'r'},
+            {"output", required_argument, 0, OPTION_OUTPUT},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":hf:r:T:qU",
+                        long_options, &option_index);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'r':
+            flags |= BDRV_O_RDWR;
+
+            if (!strcmp(optarg, "leaks")) {
+                fix = BDRV_FIX_LEAKS;
+            } else if (!strcmp(optarg, "all")) {
+                fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
+            } else {
+                error_exit("Unknown option value for -r "
+                           "(expecting 'leaks' or 'all'): %s", optarg);
+            }
+            break;
+        case OPTION_OUTPUT:
+            output = optarg;
+            break;
+        case 'T':
+            cache = optarg;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[optind++];
+
+    if (output && !strcmp(output, "json")) {
+        output_format = OFORMAT_JSON;
+    } else if (output && !strcmp(output, "human")) {
+        output_format = OFORMAT_HUMAN;
+    } else if (output) {
+        error_report("--output must be used with human or json as argument.");
+        return 1;
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid source cache option: %s", cache);
+        return 1;
+    }
+
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   force_share);
+    if (!blk) {
+        return 1;
+    }
+    bs = blk_bs(blk);
+
+    check = g_new0(ImageCheck, 1);
+    ret = collect_image_check(bs, check, filename, fmt, fix);
+
+    if (ret == -ENOTSUP) {
+        error_report("This image format does not support checks");
+        ret = 63;
+        goto fail;
+    }
+
+    if (check->corruptions_fixed || check->leaks_fixed) {
+        int corruptions_fixed, leaks_fixed;
+        bool has_leaks_fixed, has_corruptions_fixed;
+
+        leaks_fixed         = check->leaks_fixed;
+        has_leaks_fixed     = check->has_leaks_fixed;
+        corruptions_fixed   = check->corruptions_fixed;
+        has_corruptions_fixed = check->has_corruptions_fixed;
+
+        if (output_format == OFORMAT_HUMAN) {
+            qprintf(quiet,
+                    "The following inconsistencies were found and repaired:\n\n"
+                    "    %" PRId64 " leaked clusters\n"
+                    "    %" PRId64 " corruptions\n\n"
+                    "Double checking the fixed image now...\n",
+                    check->leaks_fixed,
+                    check->corruptions_fixed);
+        }
+
+        qapi_free_ImageCheck(check);
+        check = g_new0(ImageCheck, 1);
+        ret = collect_image_check(bs, check, filename, fmt, 0);
+
+        check->leaks_fixed          = leaks_fixed;
+        check->has_leaks_fixed      = has_leaks_fixed;
+        check->corruptions_fixed    = corruptions_fixed;
+        check->has_corruptions_fixed = has_corruptions_fixed;
+    }
+
+    if (!ret) {
+        switch (output_format) {
+        case OFORMAT_HUMAN:
+            dump_human_image_check(check, quiet);
+            break;
+        case OFORMAT_JSON:
+            dump_json_image_check(check, quiet);
+            break;
+        }
+    }
+
+    if (ret || check->check_errors) {
+        if (ret) {
+            error_report("Check failed: %s", strerror(-ret));
+        } else {
+            error_report("Check failed");
+        }
+        ret = 1;
+        goto fail;
+    }
+
+    if (check->corruptions) {
+        ret = 2;
+    } else if (check->leaks) {
+        ret = 3;
+    } else {
+        ret = 0;
+    }
+
+fail:
+    qapi_free_ImageCheck(check);
+    blk_unref(blk);
+    return ret;
+}
+
+typedef struct CommonBlockJobCBInfo {
+    BlockDriverState *bs;
+    Error **errp;
+} CommonBlockJobCBInfo;
+
+static void common_block_job_cb(void *opaque, int ret)
+{
+    CommonBlockJobCBInfo *cbi = opaque;
+
+    if (ret < 0) {
+        error_setg_errno(cbi->errp, -ret, "Block job failed");
+    }
+}
+
+static void run_block_job(BlockJob *job, Error **errp)
+{
+    AioContext *aio_context = blk_get_aio_context(job->blk);
+    int ret = 0;
+
+    aio_context_acquire(aio_context);
+    job_ref(&job->job);
+    do {
+        float progress = 0.0f;
+        aio_poll(aio_context, true);
+        if (job->job.progress.total) {
+            progress = (float)job->job.progress.current /
+                       job->job.progress.total * 100.f;
+        }
+        qemu_progress_print(progress, 0);
+    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
+
+    if (!job_is_completed(&job->job)) {
+        ret = job_complete_sync(&job->job, errp);
+    } else {
+        ret = job->job.ret;
+    }
+    job_unref(&job->job);
+    aio_context_release(aio_context);
+
+    /* publish completion progress only when success */
+    if (!ret) {
+        qemu_progress_print(100.f, 0);
+    }
+}
+
+static int img_commit(int argc, char **argv)
+{
+    int c, ret, flags;
+    const char *filename, *fmt, *cache, *base;
+    BlockBackend *blk;
+    BlockDriverState *bs, *base_bs;
+    BlockJob *job;
+    bool progress = false, quiet = false, drop = false;
+    bool writethrough;
+    Error *local_err = NULL;
+    CommonBlockJobCBInfo cbi;
+    bool image_opts = false;
+    AioContext *aio_context;
+    int64_t rate_limit = 0;
+
+    fmt = NULL;
+    cache = BDRV_DEFAULT_CACHE;
+    base = NULL;
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":f:ht:b:dpqr:",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 't':
+            cache = optarg;
+            break;
+        case 'b':
+            base = optarg;
+            /* -b implies -d */
+            drop = true;
+            break;
+        case 'd':
+            drop = true;
+            break;
+        case 'p':
+            progress = true;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 'r':
+            rate_limit = cvtnum("rate limit", optarg);
+            if (rate_limit < 0) {
+                return 1;
+            }
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    /* Progress is not shown in Quiet mode */
+    if (quiet) {
+        progress = false;
+    }
+
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[optind++];
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    flags = BDRV_O_RDWR | BDRV_O_UNMAP;
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid cache option: %s", cache);
+        return 1;
+    }
+
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
+    if (!blk) {
+        return 1;
+    }
+    bs = blk_bs(blk);
+
+    qemu_progress_init(progress, 1.f);
+    qemu_progress_print(0.f, 100);
+
+    if (base) {
+        base_bs = bdrv_find_backing_image(bs, base);
+        if (!base_bs) {
+            error_setg(&local_err,
+                       "Did not find '%s' in the backing chain of '%s'",
+                       base, filename);
+            goto done;
+        }
+    } else {
+        /* This is different from QMP, which by default uses the deepest file in
+         * the backing chain (i.e., the very base); however, the traditional
+         * behavior of qemu-img commit is using the immediate backing file. */
+        base_bs = bdrv_backing_chain_next(bs);
+        if (!base_bs) {
+            error_setg(&local_err, "Image does not have a backing file");
+            goto done;
+        }
+    }
+
+    cbi = (CommonBlockJobCBInfo){
+        .errp = &local_err,
+        .bs   = bs,
+    };
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
+                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
+                        &cbi, false, &local_err);
+    aio_context_release(aio_context);
+    if (local_err) {
+        goto done;
+    }
+
+    /* When the block job completes, the BlockBackend reference will point to
+     * the old backing file. In order to avoid that the top image is already
+     * deleted, so we can still empty it afterwards, increment the reference
+     * counter here preemptively. */
+    if (!drop) {
+        bdrv_ref(bs);
+    }
+
+    job = block_job_get("commit");
+    assert(job);
+    run_block_job(job, &local_err);
+    if (local_err) {
+        goto unref_backing;
+    }
+
+    if (!drop) {
+        BlockBackend *old_backing_blk;
+
+        old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
+                                          &local_err);
+        if (!old_backing_blk) {
+            goto unref_backing;
+        }
+        ret = blk_make_empty(old_backing_blk, &local_err);
+        blk_unref(old_backing_blk);
+        if (ret == -ENOTSUP) {
+            error_free(local_err);
+            local_err = NULL;
+        } else if (ret < 0) {
+            goto unref_backing;
+        }
+    }
+
+unref_backing:
+    if (!drop) {
+        bdrv_unref(bs);
+    }
+
+done:
+    qemu_progress_end();
+
+    blk_unref(blk);
+
+    if (local_err) {
+        error_report_err(local_err);
+        return 1;
+    }
+
+    qprintf(quiet, "Image committed.\n");
+    return 0;
+}
+
+/*
+ * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
+ * of the first sector boundary within buf where the sector contains a
+ * non-zero byte.  This function is robust to a buffer that is not
+ * sector-aligned.
+ */
+static int64_t find_nonzero(const uint8_t *buf, int64_t n)
+{
+    int64_t i;
+    int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
+
+    for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
+        if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
+            return i;
+        }
+    }
+    if (i < n && !buffer_is_zero(buf + i, n - end)) {
+        return i;
+    }
+    return -1;
+}
+
+/*
+ * Returns true iff the first sector pointed to by 'buf' contains at least
+ * a non-NUL byte.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the first one) that are known to be in the same allocated/unallocated state.
+ * The function will try to align the end offset to alignment boundaries so
+ * that the request will at least end aligned and consecutive requests will
+ * also start at an aligned offset.
+ */
+static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
+                                int64_t sector_num, int alignment)
+{
+    bool is_zero;
+    int i, tail;
+
+    if (n <= 0) {
+        *pnum = 0;
+        return 0;
+    }
+    is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
+    for(i = 1; i < n; i++) {
+        buf += BDRV_SECTOR_SIZE;
+        if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
+            break;
+        }
+    }
+
+    tail = (sector_num + i) & (alignment - 1);
+    if (tail) {
+        if (is_zero && i <= tail) {
+            /* treat unallocated areas which only consist
+             * of a small tail as allocated. */
+            is_zero = false;
+        }
+        if (!is_zero) {
+            /* align up end offset of allocated areas. */
+            i += alignment - tail;
+            i = MIN(i, n);
+        } else {
+            /* align down end offset of zero areas. */
+            i -= tail;
+        }
+    }
+    *pnum = i;
+    return !is_zero;
+}
+
+/*
+ * Like is_allocated_sectors, but if the buffer starts with a used sector,
+ * up to 'min' consecutive sectors containing zeros are ignored. This avoids
+ * breaking up write requests for only small sparse areas.
+ */
+static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
+    int min, int64_t sector_num, int alignment)
+{
+    int ret;
+    int num_checked, num_used;
+
+    if (n < min) {
+        min = n;
+    }
+
+    ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
+    if (!ret) {
+        return ret;
+    }
+
+    num_used = *pnum;
+    buf += BDRV_SECTOR_SIZE * *pnum;
+    n -= *pnum;
+    sector_num += *pnum;
+    num_checked = num_used;
+
+    while (n > 0) {
+        ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
+
+        buf += BDRV_SECTOR_SIZE * *pnum;
+        n -= *pnum;
+        sector_num += *pnum;
+        num_checked += *pnum;
+        if (ret) {
+            num_used = num_checked;
+        } else if (*pnum >= min) {
+            break;
+        }
+    }
+
+    *pnum = num_used;
+    return 1;
+}
+
+/*
+ * Compares two buffers sector by sector. Returns 0 if the first
+ * sector of each buffer matches, non-zero otherwise.
+ *
+ * pnum is set to the sector-aligned size of the buffer prefix that
+ * has the same matching status as the first sector.
+ */
+static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
+                           int64_t bytes, int64_t *pnum)
+{
+    bool res;
+    int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
+
+    assert(bytes > 0);
+
+    res = !!memcmp(buf1, buf2, i);
+    while (i < bytes) {
+        int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
+
+        if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
+            break;
+        }
+        i += len;
+    }
+
+    *pnum = i;
+    return res;
+}
+
+#define IO_BUF_SIZE (2 * MiB)
+
+/*
+ * Check if passed sectors are empty (not allocated or contain only 0 bytes)
+ *
+ * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
+ * filled with 0, 1 if sectors contain non-zero data (this is a comparison
+ * failure), and 4 on error (the exit status for read errors), after emitting
+ * an error message.
+ *
+ * @param blk:  BlockBackend for the image
+ * @param offset: Starting offset to check
+ * @param bytes: Number of bytes to check
+ * @param filename: Name of disk file we are checking (logging purpose)
+ * @param buffer: Allocated buffer for storing read data
+ * @param quiet: Flag for quiet mode
+ */
+static int check_empty_sectors(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, const char *filename,
+                               uint8_t *buffer, bool quiet)
+{
+    int ret = 0;
+    int64_t idx;
+
+    ret = blk_pread(blk, offset, buffer, bytes);
+    if (ret < 0) {
+        error_report("Error while reading offset %" PRId64 " of %s: %s",
+                     offset, filename, strerror(-ret));
+        return 4;
+    }
+    idx = find_nonzero(buffer, bytes);
+    if (idx >= 0) {
+        qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
+                offset + idx);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * Compares two images. Exit codes:
+ *
+ * 0 - Images are identical
+ * 1 - Images differ
+ * >1 - Error occurred
+ */
+static int img_compare(int argc, char **argv)
+{
+    const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
+    BlockBackend *blk1, *blk2;
+    BlockDriverState *bs1, *bs2;
+    int64_t total_size1, total_size2;
+    uint8_t *buf1 = NULL, *buf2 = NULL;
+    int64_t pnum1, pnum2;
+    int allocated1, allocated2;
+    int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
+    bool progress = false, quiet = false, strict = false;
+    int flags;
+    bool writethrough;
+    int64_t total_size;
+    int64_t offset = 0;
+    int64_t chunk;
+    int c;
+    uint64_t progress_base;
+    bool image_opts = false;
+    bool force_share = false;
+
+    cache = BDRV_DEFAULT_CACHE;
+    for (;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":hf:F:T:pqsU",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt1 = optarg;
+            break;
+        case 'F':
+            fmt2 = optarg;
+            break;
+        case 'T':
+            cache = optarg;
+            break;
+        case 'p':
+            progress = true;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 's':
+            strict = true;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                ret = 2;
+                goto out4;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    /* Progress is not shown in Quiet mode */
+    if (quiet) {
+        progress = false;
+    }
+
+
+    if (optind != argc - 2) {
+        error_exit("Expecting two image file names");
+    }
+    filename1 = argv[optind++];
+    filename2 = argv[optind++];
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        ret = 2;
+        goto out4;
+    }
+
+    /* Initialize before goto out */
+    qemu_progress_init(progress, 2.0);
+
+    flags = 0;
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid source cache option: %s", cache);
+        ret = 2;
+        goto out3;
+    }
+
+    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
+                    force_share);
+    if (!blk1) {
+        ret = 2;
+        goto out3;
+    }
+
+    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
+                    force_share);
+    if (!blk2) {
+        ret = 2;
+        goto out2;
+    }
+    bs1 = blk_bs(blk1);
+    bs2 = blk_bs(blk2);
+
+    buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
+    buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
+    total_size1 = blk_getlength(blk1);
+    if (total_size1 < 0) {
+        error_report("Can't get size of %s: %s",
+                     filename1, strerror(-total_size1));
+        ret = 4;
+        goto out;
+    }
+    total_size2 = blk_getlength(blk2);
+    if (total_size2 < 0) {
+        error_report("Can't get size of %s: %s",
+                     filename2, strerror(-total_size2));
+        ret = 4;
+        goto out;
+    }
+    total_size = MIN(total_size1, total_size2);
+    progress_base = MAX(total_size1, total_size2);
+
+    qemu_progress_print(0, 100);
+
+    if (strict && total_size1 != total_size2) {
+        ret = 1;
+        qprintf(quiet, "Strict mode: Image size mismatch!\n");
+        goto out;
+    }
+
+    while (offset < total_size) {
+        int status1, status2;
+
+        status1 = bdrv_block_status_above(bs1, NULL, offset,
+                                          total_size1 - offset, &pnum1, NULL,
+                                          NULL);
+        if (status1 < 0) {
+            ret = 3;
+            error_report("Sector allocation test failed for %s", filename1);
+            goto out;
+        }
+        allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
+
+        status2 = bdrv_block_status_above(bs2, NULL, offset,
+                                          total_size2 - offset, &pnum2, NULL,
+                                          NULL);
+        if (status2 < 0) {
+            ret = 3;
+            error_report("Sector allocation test failed for %s", filename2);
+            goto out;
+        }
+        allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
+
+        assert(pnum1 && pnum2);
+        chunk = MIN(pnum1, pnum2);
+
+        if (strict) {
+            if (status1 != status2) {
+                ret = 1;
+                qprintf(quiet, "Strict mode: Offset %" PRId64
+                        " block status mismatch!\n", offset);
+                goto out;
+            }
+        }
+        if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
+            /* nothing to do */
+        } else if (allocated1 == allocated2) {
+            if (allocated1) {
+                int64_t pnum;
+
+                chunk = MIN(chunk, IO_BUF_SIZE);
+                ret = blk_pread(blk1, offset, buf1, chunk);
+                if (ret < 0) {
+                    error_report("Error while reading offset %" PRId64
+                                 " of %s: %s",
+                                 offset, filename1, strerror(-ret));
+                    ret = 4;
+                    goto out;
+                }
+                ret = blk_pread(blk2, offset, buf2, chunk);
+                if (ret < 0) {
+                    error_report("Error while reading offset %" PRId64
+                                 " of %s: %s",
+                                 offset, filename2, strerror(-ret));
+                    ret = 4;
+                    goto out;
+                }
+                ret = compare_buffers(buf1, buf2, chunk, &pnum);
+                if (ret || pnum != chunk) {
+                    qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
+                            offset + (ret ? 0 : pnum));
+                    ret = 1;
+                    goto out;
+                }
+            }
+        } else {
+            chunk = MIN(chunk, IO_BUF_SIZE);
+            if (allocated1) {
+                ret = check_empty_sectors(blk1, offset, chunk,
+                                          filename1, buf1, quiet);
+            } else {
+                ret = check_empty_sectors(blk2, offset, chunk,
+                                          filename2, buf1, quiet);
+            }
+            if (ret) {
+                goto out;
+            }
+        }
+        offset += chunk;
+        qemu_progress_print(((float) chunk / progress_base) * 100, 100);
+    }
+
+    if (total_size1 != total_size2) {
+        BlockBackend *blk_over;
+        const char *filename_over;
+
+        qprintf(quiet, "Warning: Image size mismatch!\n");
+        if (total_size1 > total_size2) {
+            blk_over = blk1;
+            filename_over = filename1;
+        } else {
+            blk_over = blk2;
+            filename_over = filename2;
+        }
+
+        while (offset < progress_base) {
+            ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
+                                          progress_base - offset, &chunk,
+                                          NULL, NULL);
+            if (ret < 0) {
+                ret = 3;
+                error_report("Sector allocation test failed for %s",
+                             filename_over);
+                goto out;
+
+            }
+            if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
+                chunk = MIN(chunk, IO_BUF_SIZE);
+                ret = check_empty_sectors(blk_over, offset, chunk,
+                                          filename_over, buf1, quiet);
+                if (ret) {
+                    goto out;
+                }
+            }
+            offset += chunk;
+            qemu_progress_print(((float) chunk / progress_base) * 100, 100);
+        }
+    }
+
+    qprintf(quiet, "Images are identical.\n");
+    ret = 0;
+
+out:
+    qemu_vfree(buf1);
+    qemu_vfree(buf2);
+    blk_unref(blk2);
+out2:
+    blk_unref(blk1);
+out3:
+    qemu_progress_end();
+out4:
+    return ret;
+}
+
+/* Convenience wrapper around qmp_block_dirty_bitmap_merge */
+static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
+                                  const char *src_node, const char *src_name,
+                                  Error **errp)
+{
+    BlockDirtyBitmapMergeSource *merge_src;
+    BlockDirtyBitmapMergeSourceList *list;
+
+    merge_src = g_new0(BlockDirtyBitmapMergeSource, 1);
+    merge_src->type = QTYPE_QDICT;
+    merge_src->u.external.node = g_strdup(src_node);
+    merge_src->u.external.name = g_strdup(src_name);
+    list = g_new0(BlockDirtyBitmapMergeSourceList, 1);
+    list->value = merge_src;
+    qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
+    qapi_free_BlockDirtyBitmapMergeSourceList(list);
+}
+
+enum ImgConvertBlockStatus {
+    BLK_DATA,
+    BLK_ZERO,
+    BLK_BACKING_FILE,
+};
+
+#define MAX_COROUTINES 16
+#define CONVERT_THROTTLE_GROUP "img_convert"
+
+typedef struct ImgConvertState {
+    BlockBackend **src;
+    int64_t *src_sectors;
+    int *src_alignment;
+    int src_num;
+    int64_t total_sectors;
+    int64_t allocated_sectors;
+    int64_t allocated_done;
+    int64_t sector_num;
+    int64_t wr_offs;
+    enum ImgConvertBlockStatus status;
+    int64_t sector_next_status;
+    BlockBackend *target;
+    bool has_zero_init;
+    bool compressed;
+    bool target_is_new;
+    bool target_has_backing;
+    int64_t target_backing_sectors; /* negative if unknown */
+    bool wr_in_order;
+    bool copy_range;
+    bool salvage;
+    bool quiet;
+    int min_sparse;
+    int alignment;
+    size_t cluster_sectors;
+    size_t buf_sectors;
+    long num_coroutines;
+    int running_coroutines;
+    Coroutine *co[MAX_COROUTINES];
+    int64_t wait_sector_num[MAX_COROUTINES];
+    CoMutex lock;
+    int ret;
+} ImgConvertState;
+
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+                                int *src_cur, int64_t *src_cur_offset)
+{
+    *src_cur = 0;
+    *src_cur_offset = 0;
+    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+        *src_cur_offset += s->src_sectors[*src_cur];
+        (*src_cur)++;
+        assert(*src_cur < s->src_num);
+    }
+}
+
+static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
+{
+    int64_t src_cur_offset;
+    int ret, n, src_cur;
+    bool post_backing_zero = false;
+
+    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+
+    assert(s->total_sectors > sector_num);
+    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+
+    if (s->target_backing_sectors >= 0) {
+        if (sector_num >= s->target_backing_sectors) {
+            post_backing_zero = true;
+        } else if (sector_num + n > s->target_backing_sectors) {
+            /* Split requests around target_backing_sectors (because
+             * starting from there, zeros are handled differently) */
+            n = s->target_backing_sectors - sector_num;
+        }
+    }
+
+    if (s->sector_next_status <= sector_num) {
+        uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
+        int64_t count;
+        int tail;
+        BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
+        BlockDriverState *base;
+
+        if (s->target_has_backing) {
+            base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
+        } else {
+            base = NULL;
+        }
+
+        do {
+            count = n * BDRV_SECTOR_SIZE;
+
+            ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
+                                          NULL, NULL);
+
+            if (ret < 0) {
+                if (s->salvage) {
+                    if (n == 1) {
+                        if (!s->quiet) {
+                            warn_report("error while reading block status at "
+                                        "offset %" PRIu64 ": %s", offset,
+                                        strerror(-ret));
+                        }
+                        /* Just try to read the data, then */
+                        ret = BDRV_BLOCK_DATA;
+                        count = BDRV_SECTOR_SIZE;
+                    } else {
+                        /* Retry on a shorter range */
+                        n = DIV_ROUND_UP(n, 4);
+                    }
+                } else {
+                    error_report("error while reading block status at offset "
+                                 "%" PRIu64 ": %s", offset, strerror(-ret));
+                    return ret;
+                }
+            }
+        } while (ret < 0);
+
+        n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
+
+        /*
+         * Avoid that s->sector_next_status becomes unaligned to the source
+         * request alignment and/or cluster size to avoid unnecessary read
+         * cycles.
+         */
+        tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
+        if (n > tail) {
+            n -= tail;
+        }
+
+        if (ret & BDRV_BLOCK_ZERO) {
+            s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
+        } else if (ret & BDRV_BLOCK_DATA) {
+            s->status = BLK_DATA;
+        } else {
+            s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
+        }
+
+        s->sector_next_status = sector_num + n;
+    }
+
+    n = MIN(n, s->sector_next_status - sector_num);
+    if (s->status == BLK_DATA) {
+        n = MIN(n, s->buf_sectors);
+    }
+
+    /* We need to write complete clusters for compressed images, so if an
+     * unallocated area is shorter than that, we must consider the whole
+     * cluster allocated. */
+    if (s->compressed) {
+        if (n < s->cluster_sectors) {
+            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
+            s->status = BLK_DATA;
+        } else {
+            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
+        }
+    }
+
+    return n;
+}
+
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+                                        int nb_sectors, uint8_t *buf)
+{
+    uint64_t single_read_until = 0;
+    int n, ret;
+
+    assert(nb_sectors <= s->buf_sectors);
+    while (nb_sectors > 0) {
+        BlockBackend *blk;
+        int src_cur;
+        int64_t bs_sectors, src_cur_offset;
+        uint64_t offset;
+
+        /* In the case of compression with multiple source files, we can get a
+         * nb_sectors that spreads into the next part. So we must be able to
+         * read across multiple BDSes for one convert_read() call. */
+        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+        blk = s->src[src_cur];
+        bs_sectors = s->src_sectors[src_cur];
+
+        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
+
+        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+        if (single_read_until > offset) {
+            n = 1;
+        }
+
+        ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
+        if (ret < 0) {
+            if (s->salvage) {
+                if (n > 1) {
+                    single_read_until = offset + (n << BDRV_SECTOR_BITS);
+                    continue;
+                } else {
+                    if (!s->quiet) {
+                        warn_report("error while reading offset %" PRIu64
+                                    ": %s", offset, strerror(-ret));
+                    }
+                    memset(buf, 0, BDRV_SECTOR_SIZE);
+                }
+            } else {
+                return ret;
+            }
+        }
+
+        sector_num += n;
+        nb_sectors -= n;
+        buf += n * BDRV_SECTOR_SIZE;
+    }
+
+    return 0;
+}
+
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+                                         int nb_sectors, uint8_t *buf,
+                                         enum ImgConvertBlockStatus status)
+{
+    int ret;
+
+    while (nb_sectors > 0) {
+        int n = nb_sectors;
+        BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
+
+        switch (status) {
+        case BLK_BACKING_FILE:
+            /* If we have a backing file, leave clusters unallocated that are
+             * unallocated in the source image, so that the backing file is
+             * visible at the respective offset. */
+            assert(s->target_has_backing);
+            break;
+
+        case BLK_DATA:
+            /* If we're told to keep the target fully allocated (-S 0) or there
+             * is real non-zero data, we must write it. Otherwise we can treat
+             * it as zero sectors.
+             * Compressed clusters need to be written as a whole, so in that
+             * case we can only save the write if the buffer is completely
+             * zeroed. */
+            if (!s->min_sparse ||
+                (!s->compressed &&
+                 is_allocated_sectors_min(buf, n, &n, s->min_sparse,
+                                          sector_num, s->alignment)) ||
+                (s->compressed &&
+                 !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
+            {
+                ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
+                                    n << BDRV_SECTOR_BITS, buf, flags);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+            }
+            /* fall-through */
+
+        case BLK_ZERO:
+            if (s->has_zero_init) {
+                assert(!s->target_has_backing);
+                break;
+            }
+            ret = blk_co_pwrite_zeroes(s->target,
+                                       sector_num << BDRV_SECTOR_BITS,
+                                       n << BDRV_SECTOR_BITS,
+                                       BDRV_REQ_MAY_UNMAP);
+            if (ret < 0) {
+                return ret;
+            }
+            break;
+        }
+
+        sector_num += n;
+        nb_sectors -= n;
+        buf += n * BDRV_SECTOR_SIZE;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
+                                              int nb_sectors)
+{
+    int n, ret;
+
+    while (nb_sectors > 0) {
+        BlockBackend *blk;
+        int src_cur;
+        int64_t bs_sectors, src_cur_offset;
+        int64_t offset;
+
+        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
+        blk = s->src[src_cur];
+        bs_sectors = s->src_sectors[src_cur];
+
+        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+
+        ret = blk_co_copy_range(blk, offset, s->target,
+                                sector_num << BDRV_SECTOR_BITS,
+                                n << BDRV_SECTOR_BITS, 0, 0);
+        if (ret < 0) {
+            return ret;
+        }
+
+        sector_num += n;
+        nb_sectors -= n;
+    }
+    return 0;
+}
+
+static void coroutine_fn convert_co_do_copy(void *opaque)
+{
+    ImgConvertState *s = opaque;
+    uint8_t *buf = NULL;
+    int ret, i;
+    int index = -1;
+
+    for (i = 0; i < s->num_coroutines; i++) {
+        if (s->co[i] == qemu_coroutine_self()) {
+            index = i;
+            break;
+        }
+    }
+    assert(index >= 0);
+
+    s->running_coroutines++;
+    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+    while (1) {
+        int n;
+        int64_t sector_num;
+        enum ImgConvertBlockStatus status;
+        bool copy_range;
+
+        qemu_co_mutex_lock(&s->lock);
+        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+            qemu_co_mutex_unlock(&s->lock);
+            break;
+        }
+        n = convert_iteration_sectors(s, s->sector_num);
+        if (n < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            s->ret = n;
+            break;
+        }
+        /* save current sector and allocation status to local variables */
+        sector_num = s->sector_num;
+        status = s->status;
+        if (!s->min_sparse && s->status == BLK_ZERO) {
+            n = MIN(n, s->buf_sectors);
+        }
+        /* increment global sector counter so that other coroutines can
+         * already continue reading beyond this request */
+        s->sector_num += n;
+        qemu_co_mutex_unlock(&s->lock);
+
+        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+            s->allocated_done += n;
+            qemu_progress_print(100.0 * s->allocated_done /
+                                        s->allocated_sectors, 0);
+        }
+
+retry:
+        copy_range = s->copy_range && s->status == BLK_DATA;
+        if (status == BLK_DATA && !copy_range) {
+            ret = convert_co_read(s, sector_num, n, buf);
+            if (ret < 0) {
+                error_report("error while reading at byte %lld: %s",
+                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
+                s->ret = ret;
+            }
+        } else if (!s->min_sparse && status == BLK_ZERO) {
+            status = BLK_DATA;
+            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+        }
+
+        if (s->wr_in_order) {
+            /* keep writes in order */
+            while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
+                s->wait_sector_num[index] = sector_num;
+                qemu_coroutine_yield();
+            }
+            s->wait_sector_num[index] = -1;
+        }
+
+        if (s->ret == -EINPROGRESS) {
+            if (copy_range) {
+                ret = convert_co_copy_range(s, sector_num, n);
+                if (ret) {
+                    s->copy_range = false;
+                    goto retry;
+                }
+            } else {
+                ret = convert_co_write(s, sector_num, n, buf, status);
+            }
+            if (ret < 0) {
+                error_report("error while writing at byte %lld: %s",
+                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
+                s->ret = ret;
+            }
+        }
+
+        if (s->wr_in_order) {
+            /* reenter the coroutine that might have waited
+             * for this write to complete */
+            s->wr_offs = sector_num + n;
+            for (i = 0; i < s->num_coroutines; i++) {
+                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+                    /*
+                     * A -> B -> A cannot occur because A has
+                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
+                     * B will never enter A during this time window.
+                     */
+                    qemu_coroutine_enter(s->co[i]);
+                    break;
+                }
+            }
+        }
+    }
+
+    qemu_vfree(buf);
+    s->co[index] = NULL;
+    s->running_coroutines--;
+    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+        /* the convert job finished successfully */
+        s->ret = 0;
+    }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+    int ret, i, n;
+    int64_t sector_num = 0;
+
+    /* Check whether we have zero initialisation or can get it efficiently */
+    if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
+        !s->target_has_backing) {
+        s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
+    }
+
+    /* Allocate buffer for copied data. For compressed images, only one cluster
+     * can be copied at a time. */
+    if (s->compressed) {
+        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
+            error_report("invalid cluster size");
+            return -EINVAL;
+        }
+        s->buf_sectors = s->cluster_sectors;
+    }
+
+    while (sector_num < s->total_sectors) {
+        n = convert_iteration_sectors(s, sector_num);
+        if (n < 0) {
+            return n;
+        }
+        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
+        {
+            s->allocated_sectors += n;
+        }
+        sector_num += n;
+    }
+
+    /* Do the copy */
+    s->sector_next_status = 0;
+    s->ret = -EINPROGRESS;
+
+    qemu_co_mutex_init(&s->lock);
+    for (i = 0; i < s->num_coroutines; i++) {
+        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+        s->wait_sector_num[i] = -1;
+        qemu_coroutine_enter(s->co[i]);
+    }
+
+    while (s->running_coroutines) {
+        main_loop_wait(false);
+    }
+
+    if (s->compressed && !s->ret) {
+        /* signal EOF to align */
+        ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return s->ret;
+}
+
+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
+{
+    BdrvDirtyBitmap *bm;
+    Error *err = NULL;
+
+    FOR_EACH_DIRTY_BITMAP(src, bm) {
+        const char *name;
+
+        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
+            continue;
+        }
+        name = bdrv_dirty_bitmap_name(bm);
+        qmp_block_dirty_bitmap_add(dst->node_name, name,
+                                   true, bdrv_dirty_bitmap_granularity(bm),
+                                   true, true,
+                                   true, !bdrv_dirty_bitmap_enabled(bm),
+                                   &err);
+        if (err) {
+            error_reportf_err(err, "Failed to create bitmap %s: ", name);
+            return -1;
+        }
+
+        do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
+                              &err);
+        if (err) {
+            error_reportf_err(err, "Failed to populate bitmap %s: ", name);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+#define MAX_BUF_SECTORS 32768
+
+static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
+{
+    ThrottleConfig cfg;
+
+    throttle_config_init(&cfg);
+    cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
+
+    blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
+    blk_set_io_limits(blk, &cfg);
+}
+
+static int img_convert(int argc, char **argv)
+{
+    int c, bs_i, flags, src_flags = 0;
+    const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
+               *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
+               *out_filename, *out_baseimg_param, *snapshot_name = NULL;
+    BlockDriver *drv = NULL, *proto_drv = NULL;
+    BlockDriverInfo bdi;
+    BlockDriverState *out_bs;
+    QemuOpts *opts = NULL, *sn_opts = NULL;
+    QemuOptsList *create_opts = NULL;
+    QDict *open_opts = NULL;
+    char *options = NULL;
+    Error *local_err = NULL;
+    bool writethrough, src_writethrough, image_opts = false,
+         skip_create = false, progress = false, tgt_image_opts = false;
+    int64_t ret = -EINVAL;
+    bool force_share = false;
+    bool explict_min_sparse = false;
+    bool bitmaps = false;
+    int64_t rate_limit = 0;
+
+    ImgConvertState s = (ImgConvertState) {
+        /* Need at least 4k of zeros for sparse detection */
+        .min_sparse         = 8,
+        .copy_range         = false,
+        .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
+        .wr_in_order        = true,
+        .num_coroutines     = 8,
+    };
+
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
+            {"salvage", no_argument, 0, OPTION_SALVAGE},
+            {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
+            {"bitmaps", no_argument, 0, OPTION_BITMAPS},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'O':
+            out_fmt = optarg;
+            break;
+        case 'B':
+            out_baseimg = optarg;
+            break;
+        case 'C':
+            s.copy_range = true;
+            break;
+        case 'c':
+            s.compressed = true;
+            break;
+        case 'o':
+            if (accumulate_options(&options, optarg) < 0) {
+                goto fail_getopt;
+            }
+            break;
+        case 'l':
+            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+                                                  optarg, false);
+                if (!sn_opts) {
+                    error_report("Failed in parsing snapshot param '%s'",
+                                 optarg);
+                    goto fail_getopt;
+                }
+            } else {
+                snapshot_name = optarg;
+            }
+            break;
+        case 'S':
+        {
+            int64_t sval;
+
+            sval = cvtnum("buffer size for sparse output", optarg);
+            if (sval < 0) {
+                goto fail_getopt;
+            } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
+                sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
+                error_report("Invalid buffer size for sparse output specified. "
+                    "Valid sizes are multiples of %llu up to %llu. Select "
+                    "0 to disable sparse detection (fully allocates output).",
+                    BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
+                goto fail_getopt;
+            }
+
+            s.min_sparse = sval / BDRV_SECTOR_SIZE;
+            explict_min_sparse = true;
+            break;
+        }
+        case 'p':
+            progress = true;
+            break;
+        case 't':
+            cache = optarg;
+            break;
+        case 'T':
+            src_cache = optarg;
+            break;
+        case 'q':
+            s.quiet = true;
+            break;
+        case 'n':
+            skip_create = true;
+            break;
+        case 'm':
+            if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
+                s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
+                error_report("Invalid number of coroutines. Allowed number of"
+                             " coroutines is between 1 and %d", MAX_COROUTINES);
+                goto fail_getopt;
+            }
+            break;
+        case 'W':
+            s.wr_in_order = false;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case 'r':
+            rate_limit = cvtnum("rate limit", optarg);
+            if (rate_limit < 0) {
+                goto fail_getopt;
+            }
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *object_opts;
+            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                                  optarg, true);
+            if (!object_opts) {
+                goto fail_getopt;
+            }
+            break;
+        }
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        case OPTION_SALVAGE:
+            s.salvage = true;
+            break;
+        case OPTION_TARGET_IMAGE_OPTS:
+            tgt_image_opts = true;
+            break;
+        case OPTION_TARGET_IS_ZERO:
+            /*
+             * The user asserting that the target is blank has the
+             * same effect as the target driver supporting zero
+             * initialisation.
+             */
+            s.has_zero_init = true;
+            break;
+        case OPTION_BITMAPS:
+            bitmaps = true;
+            break;
+        }
+    }
+
+    if (!out_fmt && !tgt_image_opts) {
+        out_fmt = "raw";
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        goto fail_getopt;
+    }
+
+    if (s.compressed && s.copy_range) {
+        error_report("Cannot enable copy offloading when -c is used");
+        goto fail_getopt;
+    }
+
+    if (explict_min_sparse && s.copy_range) {
+        error_report("Cannot enable copy offloading when -S is used");
+        goto fail_getopt;
+    }
+
+    if (s.copy_range && s.salvage) {
+        error_report("Cannot use copy offloading in salvaging mode");
+        goto fail_getopt;
+    }
+
+    if (tgt_image_opts && !skip_create) {
+        error_report("--target-image-opts requires use of -n flag");
+        goto fail_getopt;
+    }
+
+    if (skip_create && options) {
+        error_report("-o has no effect when skipping image creation");
+        goto fail_getopt;
+    }
+
+    if (s.has_zero_init && !skip_create) {
+        error_report("--target-is-zero requires use of -n flag");
+        goto fail_getopt;
+    }
+
+    s.src_num = argc - optind - 1;
+    out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
+
+    if (options && has_help_option(options)) {
+        if (out_fmt) {
+            ret = print_block_option_help(out_filename, out_fmt);
+            goto fail_getopt;
+        } else {
+            error_report("Option help requires a format be specified");
+            goto fail_getopt;
+        }
+    }
+
+    if (s.src_num < 1) {
+        error_report("Must specify image file name");
+        goto fail_getopt;
+    }
+
+    /* ret is still -EINVAL until here */
+    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
+    if (ret < 0) {
+        error_report("Invalid source cache option: %s", src_cache);
+        goto fail_getopt;
+    }
+
+    /* Initialize before goto out */
+    if (s.quiet) {
+        progress = false;
+    }
+    qemu_progress_init(progress, 1.0);
+    qemu_progress_print(0, 100);
+
+    s.src = g_new0(BlockBackend *, s.src_num);
+    s.src_sectors = g_new(int64_t, s.src_num);
+    s.src_alignment = g_new(int, s.src_num);
+
+    for (bs_i = 0; bs_i < s.src_num; bs_i++) {
+        BlockDriverState *src_bs;
+        s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
+                               fmt, src_flags, src_writethrough, s.quiet,
+                               force_share);
+        if (!s.src[bs_i]) {
+            ret = -1;
+            goto out;
+        }
+        s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
+        if (s.src_sectors[bs_i] < 0) {
+            error_report("Could not get size of %s: %s",
+                         argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
+            ret = -1;
+            goto out;
+        }
+        src_bs = blk_bs(s.src[bs_i]);
+        s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
+                                             BDRV_SECTOR_SIZE);
+        if (!bdrv_get_info(src_bs, &bdi)) {
+            s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
+                                        bdi.cluster_size / BDRV_SECTOR_SIZE);
+        }
+        s.total_sectors += s.src_sectors[bs_i];
+    }
+
+    if (sn_opts) {
+        bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
+                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+                               &local_err);
+    } else if (snapshot_name != NULL) {
+        if (s.src_num > 1) {
+            error_report("No support for concatenating multiple snapshot");
+            ret = -1;
+            goto out;
+        }
+
+        bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
+                                             &local_err);
+    }
+    if (local_err) {
+        error_reportf_err(local_err, "Failed to load snapshot: ");
+        ret = -1;
+        goto out;
+    }
+
+    if (!skip_create) {
+        /* Find driver and parse its options */
+        drv = bdrv_find_format(out_fmt);
+        if (!drv) {
+            error_report("Unknown file format '%s'", out_fmt);
+            ret = -1;
+            goto out;
+        }
+
+        proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
+        if (!proto_drv) {
+            error_report_err(local_err);
+            ret = -1;
+            goto out;
+        }
+
+        if (!drv->create_opts) {
+            error_report("Format driver '%s' does not support image creation",
+                         drv->format_name);
+            ret = -1;
+            goto out;
+        }
+
+        if (!proto_drv->create_opts) {
+            error_report("Protocol driver '%s' does not support image creation",
+                         proto_drv->format_name);
+            ret = -1;
+            goto out;
+        }
+
+        create_opts = qemu_opts_append(create_opts, drv->create_opts);
+        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
+
+        opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
+        if (options) {
+            if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
+                error_report_err(local_err);
+                ret = -1;
+                goto out;
+            }
+        }
+
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+                            s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
+        ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+
+    /* Get backing file name if -o backing_file was used */
+    out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
+    if (out_baseimg_param) {
+        out_baseimg = out_baseimg_param;
+    }
+    s.target_has_backing = (bool) out_baseimg;
+
+    if (s.has_zero_init && s.target_has_backing) {
+        error_report("Cannot use --target-is-zero when the destination "
+                     "image has a backing file");
+        goto out;
+    }
+
+    if (s.src_num > 1 && out_baseimg) {
+        error_report("Having a backing file for the target makes no sense when "
+                     "concatenating multiple input images");
+        ret = -1;
+        goto out;
+    }
+
+    if (out_baseimg_param) {
+        if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
+            warn_report("Deprecated use of backing file without explicit "
+                        "backing format");
+        }
+    }
+
+    /* Check if compression is supported */
+    if (s.compressed) {
+        bool encryption =
+            qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
+        const char *encryptfmt =
+            qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
+        const char *preallocation =
+            qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
+
+        if (drv && !block_driver_can_compress(drv)) {
+            error_report("Compression not supported for this file format");
+            ret = -1;
+            goto out;
+        }
+
+        if (encryption || encryptfmt) {
+            error_report("Compression and encryption not supported at "
+                         "the same time");
+            ret = -1;
+            goto out;
+        }
+
+        if (preallocation
+            && strcmp(preallocation, "off"))
+        {
+            error_report("Compression and preallocation not supported at "
+                         "the same time");
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /* Determine if bitmaps need copying */
+    if (bitmaps) {
+        if (s.src_num > 1) {
+            error_report("Copying bitmaps only possible with single source");
+            ret = -1;
+            goto out;
+        }
+        if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) {
+            error_report("Source lacks bitmap support");
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /*
+     * The later open call will need any decryption secrets, and
+     * bdrv_create() will purge "opts", so extract them now before
+     * they are lost.
+     */
+    if (!skip_create) {
+        open_opts = qdict_new();
+        qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
+
+        /* Create the new image */
+        ret = bdrv_create(drv, out_filename, opts, &local_err);
+        if (ret < 0) {
+            error_reportf_err(local_err, "%s: error while converting %s: ",
+                              out_filename, out_fmt);
+            goto out;
+        }
+    }
+
+    s.target_is_new = !skip_create;
+
+    flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid cache option: %s", cache);
+        goto out;
+    }
+
+    if (skip_create) {
+        s.target = img_open(tgt_image_opts, out_filename, out_fmt,
+                            flags, writethrough, s.quiet, false);
+    } else {
+        /* TODO ultimately we should allow --target-image-opts
+         * to be used even when -n is not given.
+         * That has to wait for bdrv_create to be improved
+         * to allow filenames in option syntax
+         */
+        s.target = img_open_file(out_filename, open_opts, out_fmt,
+                                 flags, writethrough, s.quiet, false);
+        open_opts = NULL; /* blk_new_open will have freed it */
+    }
+    if (!s.target) {
+        ret = -1;
+        goto out;
+    }
+    out_bs = blk_bs(s.target);
+
+    if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
+        error_report("Format driver '%s' does not support bitmaps",
+                     out_bs->drv->format_name);
+        ret = -1;
+        goto out;
+    }
+
+    if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
+        error_report("Compression not supported for this file format");
+        ret = -1;
+        goto out;
+    }
+
+    /* increase bufsectors from the default 4096 (2M) if opt_transfer
+     * or discard_alignment of the out_bs is greater. Limit to
+     * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
+    s.buf_sectors = MIN(MAX_BUF_SECTORS,
+                        MAX(s.buf_sectors,
+                            MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
+                                out_bs->bl.pdiscard_alignment >>
+                                BDRV_SECTOR_BITS)));
+
+    /* try to align the write requests to the destination to avoid unnecessary
+     * RMW cycles. */
+    s.alignment = MAX(pow2floor(s.min_sparse),
+                      DIV_ROUND_UP(out_bs->bl.request_alignment,
+                                   BDRV_SECTOR_SIZE));
+    assert(is_power_of_2(s.alignment));
+
+    if (skip_create) {
+        int64_t output_sectors = blk_nb_sectors(s.target);
+        if (output_sectors < 0) {
+            error_report("unable to get output image length: %s",
+                         strerror(-output_sectors));
+            ret = -1;
+            goto out;
+        } else if (output_sectors < s.total_sectors) {
+            error_report("output file is smaller than input file");
+            ret = -1;
+            goto out;
+        }
+    }
+
+    if (s.target_has_backing && s.target_is_new) {
+        /* Errors are treated as "backing length unknown" (which means
+         * s.target_backing_sectors has to be negative, which it will
+         * be automatically).  The backing file length is used only
+         * for optimizations, so such a case is not fatal. */
+        s.target_backing_sectors =
+            bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
+    } else {
+        s.target_backing_sectors = -1;
+    }
+
+    ret = bdrv_get_info(out_bs, &bdi);
+    if (ret < 0) {
+        if (s.compressed) {
+            error_report("could not get block driver info");
+            goto out;
+        }
+    } else {
+        s.compressed = s.compressed || bdi.needs_compressed_writes;
+        s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
+    }
+
+    if (rate_limit) {
+        set_rate_limit(s.target, rate_limit);
+    }
+
+    ret = convert_do_copy(&s);
+
+    /* Now copy the bitmaps */
+    if (bitmaps && ret == 0) {
+        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs);
+    }
+
+out:
+    if (!ret) {
+        qemu_progress_print(100, 0);
+    }
+    qemu_progress_end();
+    qemu_opts_del(opts);
+    qemu_opts_free(create_opts);
+    qobject_unref(open_opts);
+    blk_unref(s.target);
+    if (s.src) {
+        for (bs_i = 0; bs_i < s.src_num; bs_i++) {
+            blk_unref(s.src[bs_i]);
+        }
+        g_free(s.src);
+    }
+    g_free(s.src_sectors);
+    g_free(s.src_alignment);
+fail_getopt:
+    qemu_opts_del(sn_opts);
+    g_free(options);
+
+    return !!ret;
+}
+
+
+static void dump_snapshots(BlockDriverState *bs)
+{
+    QEMUSnapshotInfo *sn_tab, *sn;
+    int nb_sns, i;
+
+    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+    if (nb_sns <= 0)
+        return;
+    printf("Snapshot list:\n");
+    bdrv_snapshot_dump(NULL);
+    printf("\n");
+    for(i = 0; i < nb_sns; i++) {
+        sn = &sn_tab[i];
+        bdrv_snapshot_dump(sn);
+        printf("\n");
+    }
+    g_free(sn_tab);
+}
+
+static void dump_json_image_info_list(ImageInfoList *list)
+{
+    QString *str;
+    QObject *obj;
+    Visitor *v = qobject_output_visitor_new(&obj);
+
+    visit_type_ImageInfoList(v, NULL, &list, &error_abort);
+    visit_complete(v, &obj);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    printf("%s\n", qstring_get_str(str));
+    qobject_unref(obj);
+    visit_free(v);
+    qobject_unref(str);
+}
+
+static void dump_json_image_info(ImageInfo *info)
+{
+    QString *str;
+    QObject *obj;
+    Visitor *v = qobject_output_visitor_new(&obj);
+
+    visit_type_ImageInfo(v, NULL, &info, &error_abort);
+    visit_complete(v, &obj);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    printf("%s\n", qstring_get_str(str));
+    qobject_unref(obj);
+    visit_free(v);
+    qobject_unref(str);
+}
+
+static void dump_human_image_info_list(ImageInfoList *list)
+{
+    ImageInfoList *elem;
+    bool delim = false;
+
+    for (elem = list; elem; elem = elem->next) {
+        if (delim) {
+            printf("\n");
+        }
+        delim = true;
+
+        bdrv_image_info_dump(elem->value);
+    }
+}
+
+static gboolean str_equal_func(gconstpointer a, gconstpointer b)
+{
+    return strcmp(a, b) == 0;
+}
+
+/**
+ * Open an image file chain and return an ImageInfoList
+ *
+ * @filename: topmost image filename
+ * @fmt: topmost image format (may be NULL to autodetect)
+ * @chain: true  - enumerate entire backing file chain
+ *         false - only topmost image file
+ *
+ * Returns a list of ImageInfo objects or NULL if there was an error opening an
+ * image file.  If there was an error a message will have been printed to
+ * stderr.
+ */
+static ImageInfoList *collect_image_info_list(bool image_opts,
+                                              const char *filename,
+                                              const char *fmt,
+                                              bool chain, bool force_share)
+{
+    ImageInfoList *head = NULL;
+    ImageInfoList **last = &head;
+    GHashTable *filenames;
+    Error *err = NULL;
+
+    filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
+
+    while (filename) {
+        BlockBackend *blk;
+        BlockDriverState *bs;
+        ImageInfo *info;
+        ImageInfoList *elem;
+
+        if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
+            error_report("Backing file '%s' creates an infinite loop.",
+                         filename);
+            goto err;
+        }
+        g_hash_table_insert(filenames, (gpointer)filename, NULL);
+
+        blk = img_open(image_opts, filename, fmt,
+                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
+                       force_share);
+        if (!blk) {
+            goto err;
+        }
+        bs = blk_bs(blk);
+
+        bdrv_query_image_info(bs, &info, &err);
+        if (err) {
+            error_report_err(err);
+            blk_unref(blk);
+            goto err;
+        }
+
+        elem = g_new0(ImageInfoList, 1);
+        elem->value = info;
+        *last = elem;
+        last = &elem->next;
+
+        blk_unref(blk);
+
+        /* Clear parameters that only apply to the topmost image */
+        filename = fmt = NULL;
+        image_opts = false;
+
+        if (chain) {
+            if (info->has_full_backing_filename) {
+                filename = info->full_backing_filename;
+            } else if (info->has_backing_filename) {
+                error_report("Could not determine absolute backing filename,"
+                             " but backing filename '%s' present",
+                             info->backing_filename);
+                goto err;
+            }
+            if (info->has_backing_filename_format) {
+                fmt = info->backing_filename_format;
+            }
+        }
+    }
+    g_hash_table_destroy(filenames);
+    return head;
+
+err:
+    qapi_free_ImageInfoList(head);
+    g_hash_table_destroy(filenames);
+    return NULL;
+}
+
+static int img_info(int argc, char **argv)
+{
+    int c;
+    OutputFormat output_format = OFORMAT_HUMAN;
+    bool chain = false;
+    const char *filename, *fmt, *output;
+    ImageInfoList *list;
+    bool image_opts = false;
+    bool force_share = false;
+
+    fmt = NULL;
+    output = NULL;
+    for(;;) {
+        int option_index = 0;
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"format", required_argument, 0, 'f'},
+            {"output", required_argument, 0, OPTION_OUTPUT},
+            {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":f:hU",
+                        long_options, &option_index);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OUTPUT:
+            output = optarg;
+            break;
+        case OPTION_BACKING_CHAIN:
+            chain = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[optind++];
+
+    if (output && !strcmp(output, "json")) {
+        output_format = OFORMAT_JSON;
+    } else if (output && !strcmp(output, "human")) {
+        output_format = OFORMAT_HUMAN;
+    } else if (output) {
+        error_report("--output must be used with human or json as argument.");
+        return 1;
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    list = collect_image_info_list(image_opts, filename, fmt, chain,
+                                   force_share);
+    if (!list) {
+        return 1;
+    }
+
+    switch (output_format) {
+    case OFORMAT_HUMAN:
+        dump_human_image_info_list(list);
+        break;
+    case OFORMAT_JSON:
+        if (chain) {
+            dump_json_image_info_list(list);
+        } else {
+            dump_json_image_info(list->value);
+        }
+        break;
+    }
+
+    qapi_free_ImageInfoList(list);
+    return 0;
+}
+
+static int dump_map_entry(OutputFormat output_format, MapEntry *e,
+                          MapEntry *next)
+{
+    switch (output_format) {
+    case OFORMAT_HUMAN:
+        if (e->data && !e->has_offset) {
+            error_report("File contains external, encrypted or compressed clusters.");
+            return -1;
+        }
+        if (e->data && !e->zero) {
+            printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
+                   e->start, e->length,
+                   e->has_offset ? e->offset : 0,
+                   e->has_filename ? e->filename : "");
+        }
+        /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
+         * Modify the flags here to allow more coalescing.
+         */
+        if (next && (!next->data || next->zero)) {
+            next->data = false;
+            next->zero = true;
+        }
+        break;
+    case OFORMAT_JSON:
+        printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
+               " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
+               e->start, e->length, e->depth,
+               e->zero ? "true" : "false",
+               e->data ? "true" : "false");
+        if (e->has_offset) {
+            printf(", \"offset\": %"PRId64"", e->offset);
+        }
+        putchar('}');
+
+        if (next) {
+            puts(",");
+        }
+        break;
+    }
+    return 0;
+}
+
+static int get_block_status(BlockDriverState *bs, int64_t offset,
+                            int64_t bytes, MapEntry *e)
+{
+    int ret;
+    int depth;
+    BlockDriverState *file;
+    bool has_offset;
+    int64_t map;
+    char *filename = NULL;
+
+    /* As an optimization, we could cache the current range of unallocated
+     * clusters in each file of the chain, and avoid querying the same
+     * range repeatedly.
+     */
+
+    depth = 0;
+    for (;;) {
+        bs = bdrv_skip_filters(bs);
+        ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
+        if (ret < 0) {
+            return ret;
+        }
+        assert(bytes);
+        if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
+            break;
+        }
+        bs = bdrv_cow_bs(bs);
+        if (bs == NULL) {
+            ret = 0;
+            break;
+        }
+
+        depth++;
+    }
+
+    has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
+
+    if (file && has_offset) {
+        bdrv_refresh_filename(file);
+        filename = file->filename;
+    }
+
+    *e = (MapEntry) {
+        .start = offset,
+        .length = bytes,
+        .data = !!(ret & BDRV_BLOCK_DATA),
+        .zero = !!(ret & BDRV_BLOCK_ZERO),
+        .offset = map,
+        .has_offset = has_offset,
+        .depth = depth,
+        .has_filename = filename,
+        .filename = filename,
+    };
+
+    return 0;
+}
+
+static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
+{
+    if (curr->length == 0) {
+        return false;
+    }
+    if (curr->zero != next->zero ||
+        curr->data != next->data ||
+        curr->depth != next->depth ||
+        curr->has_filename != next->has_filename ||
+        curr->has_offset != next->has_offset) {
+        return false;
+    }
+    if (curr->has_filename && strcmp(curr->filename, next->filename)) {
+        return false;
+    }
+    if (curr->has_offset && curr->offset + curr->length != next->offset) {
+        return false;
+    }
+    return true;
+}
+
+static int img_map(int argc, char **argv)
+{
+    int c;
+    OutputFormat output_format = OFORMAT_HUMAN;
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    const char *filename, *fmt, *output;
+    int64_t length;
+    MapEntry curr = { .length = 0 }, next;
+    int ret = 0;
+    bool image_opts = false;
+    bool force_share = false;
+    int64_t start_offset = 0;
+    int64_t max_length = -1;
+
+    fmt = NULL;
+    output = NULL;
+    for (;;) {
+        int option_index = 0;
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"format", required_argument, 0, 'f'},
+            {"output", required_argument, 0, OPTION_OUTPUT},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {"start-offset", required_argument, 0, 's'},
+            {"max-length", required_argument, 0, 'l'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":f:s:l:hU",
+                        long_options, &option_index);
+        if (c == -1) {
+            break;
+        }
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OUTPUT:
+            output = optarg;
+            break;
+        case 's':
+            start_offset = cvtnum("start offset", optarg);
+            if (start_offset < 0) {
+                return 1;
+            }
+            break;
+        case 'l':
+            max_length = cvtnum("max length", optarg);
+            if (max_length < 0) {
+                return 1;
+            }
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[optind];
+
+    if (output && !strcmp(output, "json")) {
+        output_format = OFORMAT_JSON;
+    } else if (output && !strcmp(output, "human")) {
+        output_format = OFORMAT_HUMAN;
+    } else if (output) {
+        error_report("--output must be used with human or json as argument.");
+        return 1;
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
+    if (!blk) {
+        return 1;
+    }
+    bs = blk_bs(blk);
+
+    if (output_format == OFORMAT_HUMAN) {
+        printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
+    } else if (output_format == OFORMAT_JSON) {
+        putchar('[');
+    }
+
+    length = blk_getlength(blk);
+    if (length < 0) {
+        error_report("Failed to get size for '%s'", filename);
+        return 1;
+    }
+    if (max_length != -1) {
+        length = MIN(start_offset + max_length, length);
+    }
+
+    curr.start = start_offset;
+    while (curr.start + curr.length < length) {
+        int64_t offset = curr.start + curr.length;
+        int64_t n = length - offset;
+
+        ret = get_block_status(bs, offset, n, &next);
+        if (ret < 0) {
+            error_report("Could not read file metadata: %s", strerror(-ret));
+            goto out;
+        }
+
+        if (entry_mergeable(&curr, &next)) {
+            curr.length += next.length;
+            continue;
+        }
+
+        if (curr.length > 0) {
+            ret = dump_map_entry(output_format, &curr, &next);
+            if (ret < 0) {
+                goto out;
+            }
+        }
+        curr = next;
+    }
+
+    ret = dump_map_entry(output_format, &curr, NULL);
+    if (output_format == OFORMAT_JSON) {
+        puts("]");
+    }
+
+out:
+    blk_unref(blk);
+    return ret < 0;
+}
+
+#define SNAPSHOT_LIST   1
+#define SNAPSHOT_CREATE 2
+#define SNAPSHOT_APPLY  3
+#define SNAPSHOT_DELETE 4
+
+static int img_snapshot(int argc, char **argv)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    QEMUSnapshotInfo sn;
+    char *filename, *snapshot_name = NULL;
+    int c, ret = 0, bdrv_oflags;
+    int action = 0;
+    qemu_timeval tv;
+    bool quiet = false;
+    Error *err = NULL;
+    bool image_opts = false;
+    bool force_share = false;
+
+    bdrv_oflags = BDRV_O_RDWR;
+    /* Parse commandline parameters */
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":la:c:d:hqU",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            return 0;
+        case 'l':
+            if (action) {
+                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
+                return 0;
+            }
+            action = SNAPSHOT_LIST;
+            bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
+            break;
+        case 'a':
+            if (action) {
+                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
+                return 0;
+            }
+            action = SNAPSHOT_APPLY;
+            snapshot_name = optarg;
+            break;
+        case 'c':
+            if (action) {
+                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
+                return 0;
+            }
+            action = SNAPSHOT_CREATE;
+            snapshot_name = optarg;
+            break;
+        case 'd':
+            if (action) {
+                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
+                return 0;
+            }
+            action = SNAPSHOT_DELETE;
+            snapshot_name = optarg;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[optind++];
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    /* Open the image */
+    blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
+                   force_share);
+    if (!blk) {
+        return 1;
+    }
+    bs = blk_bs(blk);
+
+    /* Perform the requested action */
+    switch(action) {
+    case SNAPSHOT_LIST:
+        dump_snapshots(bs);
+        break;
+
+    case SNAPSHOT_CREATE:
+        memset(&sn, 0, sizeof(sn));
+        pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
+
+        qemu_gettimeofday(&tv);
+        sn.date_sec = tv.tv_sec;
+        sn.date_nsec = tv.tv_usec * 1000;
+
+        ret = bdrv_snapshot_create(bs, &sn);
+        if (ret) {
+            error_report("Could not create snapshot '%s': %d (%s)",
+                snapshot_name, ret, strerror(-ret));
+        }
+        break;
+
+    case SNAPSHOT_APPLY:
+        ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
+        if (ret) {
+            error_reportf_err(err, "Could not apply snapshot '%s': ",
+                              snapshot_name);
+        }
+        break;
+
+    case SNAPSHOT_DELETE:
+        ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
+        if (ret < 0) {
+            error_report("Could not delete snapshot '%s': snapshot not "
+                         "found", snapshot_name);
+            ret = 1;
+        } else {
+            ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
+            if (ret < 0) {
+                error_reportf_err(err, "Could not delete snapshot '%s': ",
+                                  snapshot_name);
+                ret = 1;
+            }
+        }
+        break;
+    }
+
+    /* Cleanup */
+    blk_unref(blk);
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+static int img_rebase(int argc, char **argv)
+{
+    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
+    uint8_t *buf_old = NULL;
+    uint8_t *buf_new = NULL;
+    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
+    BlockDriverState *unfiltered_bs;
+    char *filename;
+    const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
+    int c, flags, src_flags, ret;
+    bool writethrough, src_writethrough;
+    int unsafe = 0;
+    bool force_share = false;
+    int progress = 0;
+    bool quiet = false;
+    Error *local_err = NULL;
+    bool image_opts = false;
+
+    /* Parse commandline parameters */
+    fmt = NULL;
+    cache = BDRV_DEFAULT_CACHE;
+    src_cache = BDRV_DEFAULT_CACHE;
+    out_baseimg = NULL;
+    out_basefmt = NULL;
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            return 0;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'F':
+            out_basefmt = optarg;
+            break;
+        case 'b':
+            out_baseimg = optarg;
+            break;
+        case 'u':
+            unsafe = 1;
+            break;
+        case 'p':
+            progress = 1;
+            break;
+        case 't':
+            cache = optarg;
+            break;
+        case 'T':
+            src_cache = optarg;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        }
+    }
+
+    if (quiet) {
+        progress = 0;
+    }
+
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    if (!unsafe && !out_baseimg) {
+        error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
+    }
+    filename = argv[optind++];
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    qemu_progress_init(progress, 2.0);
+    qemu_progress_print(0, 100);
+
+    flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid cache option: %s", cache);
+        goto out;
+    }
+
+    src_flags = 0;
+    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
+    if (ret < 0) {
+        error_report("Invalid source cache option: %s", src_cache);
+        goto out;
+    }
+
+    /* The source files are opened read-only, don't care about WCE */
+    assert((src_flags & BDRV_O_RDWR) == 0);
+    (void) src_writethrough;
+
+    /*
+     * Open the images.
+     *
+     * Ignore the old backing file for unsafe rebase in case we want to correct
+     * the reference to a renamed or moved backing file.
+     */
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
+    if (!blk) {
+        ret = -1;
+        goto out;
+    }
+    bs = blk_bs(blk);
+
+    unfiltered_bs = bdrv_skip_filters(bs);
+
+    if (out_basefmt != NULL) {
+        if (bdrv_find_format(out_basefmt) == NULL) {
+            error_report("Invalid format name: '%s'", out_basefmt);
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /* For safe rebasing we need to compare old and new backing file */
+    if (!unsafe) {
+        QDict *options = NULL;
+        BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
+
+        if (base_bs) {
+            blk_old_backing = blk_new(qemu_get_aio_context(),
+                                      BLK_PERM_CONSISTENT_READ,
+                                      BLK_PERM_ALL);
+            ret = blk_insert_bs(blk_old_backing, base_bs,
+                                &local_err);
+            if (ret < 0) {
+                error_reportf_err(local_err,
+                                  "Could not reuse old backing file '%s': ",
+                                  base_bs->filename);
+                goto out;
+            }
+        } else {
+            blk_old_backing = NULL;
+        }
+
+        if (out_baseimg[0]) {
+            const char *overlay_filename;
+            char *out_real_path;
+
+            options = qdict_new();
+            if (out_basefmt) {
+                qdict_put_str(options, "driver", out_basefmt);
+            }
+            if (force_share) {
+                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
+            }
+
+            bdrv_refresh_filename(bs);
+            overlay_filename = bs->exact_filename[0] ? bs->exact_filename
+                                                     : bs->filename;
+            out_real_path =
+                bdrv_get_full_backing_filename_from_filename(overlay_filename,
+                                                             out_baseimg,
+                                                             &local_err);
+            if (local_err) {
+                qobject_unref(options);
+                error_reportf_err(local_err,
+                                  "Could not resolve backing filename: ");
+                ret = -1;
+                goto out;
+            }
+
+            /*
+             * Find out whether we rebase an image on top of a previous image
+             * in its chain.
+             */
+            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
+            if (prefix_chain_bs) {
+                qobject_unref(options);
+                g_free(out_real_path);
+
+                blk_new_backing = blk_new(qemu_get_aio_context(),
+                                          BLK_PERM_CONSISTENT_READ,
+                                          BLK_PERM_ALL);
+                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
+                                    &local_err);
+                if (ret < 0) {
+                    error_reportf_err(local_err,
+                                      "Could not reuse backing file '%s': ",
+                                      out_baseimg);
+                    goto out;
+                }
+            } else {
+                blk_new_backing = blk_new_open(out_real_path, NULL,
+                                               options, src_flags, &local_err);
+                g_free(out_real_path);
+                if (!blk_new_backing) {
+                    error_reportf_err(local_err,
+                                      "Could not open new backing file '%s': ",
+                                      out_baseimg);
+                    ret = -1;
+                    goto out;
+                }
+            }
+        }
+    }
+
+    /*
+     * Check each unallocated cluster in the COW file. If it is unallocated,
+     * accesses go to the backing file. We must therefore compare this cluster
+     * in the old and new backing file, and if they differ we need to copy it
+     * from the old backing file into the COW file.
+     *
+     * If qemu-img crashes during this step, no harm is done. The content of
+     * the image is the same as the original one at any time.
+     */
+    if (!unsafe) {
+        int64_t size;
+        int64_t old_backing_size = 0;
+        int64_t new_backing_size = 0;
+        uint64_t offset;
+        int64_t n;
+        float local_progress = 0;
+
+        buf_old = blk_blockalign(blk, IO_BUF_SIZE);
+        buf_new = blk_blockalign(blk, IO_BUF_SIZE);
+
+        size = blk_getlength(blk);
+        if (size < 0) {
+            error_report("Could not get size of '%s': %s",
+                         filename, strerror(-size));
+            ret = -1;
+            goto out;
+        }
+        if (blk_old_backing) {
+            old_backing_size = blk_getlength(blk_old_backing);
+            if (old_backing_size < 0) {
+                char backing_name[PATH_MAX];
+
+                bdrv_get_backing_filename(bs, backing_name,
+                                          sizeof(backing_name));
+                error_report("Could not get size of '%s': %s",
+                             backing_name, strerror(-old_backing_size));
+                ret = -1;
+                goto out;
+            }
+        }
+        if (blk_new_backing) {
+            new_backing_size = blk_getlength(blk_new_backing);
+            if (new_backing_size < 0) {
+                error_report("Could not get size of '%s': %s",
+                             out_baseimg, strerror(-new_backing_size));
+                ret = -1;
+                goto out;
+            }
+        }
+
+        if (size != 0) {
+            local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
+        }
+
+        for (offset = 0; offset < size; offset += n) {
+            bool buf_old_is_zero = false;
+
+            /* How many bytes can we handle with the next read? */
+            n = MIN(IO_BUF_SIZE, size - offset);
+
+            /* If the cluster is allocated, we don't need to take action */
+            ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
+            if (ret < 0) {
+                error_report("error while reading image metadata: %s",
+                             strerror(-ret));
+                goto out;
+            }
+            if (ret) {
+                continue;
+            }
+
+            if (prefix_chain_bs) {
+                /*
+                 * If cluster wasn't changed since prefix_chain, we don't need
+                 * to take action
+                 */
+                ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+                                              prefix_chain_bs, false,
+                                              offset, n, &n);
+                if (ret < 0) {
+                    error_report("error while reading image metadata: %s",
+                                 strerror(-ret));
+                    goto out;
+                }
+                if (!ret) {
+                    continue;
+                }
+            }
+
+            /*
+             * Read old and new backing file and take into consideration that
+             * backing files may be smaller than the COW image.
+             */
+            if (offset >= old_backing_size) {
+                memset(buf_old, 0, n);
+                buf_old_is_zero = true;
+            } else {
+                if (offset + n > old_backing_size) {
+                    n = old_backing_size - offset;
+                }
+
+                ret = blk_pread(blk_old_backing, offset, buf_old, n);
+                if (ret < 0) {
+                    error_report("error while reading from old backing file");
+                    goto out;
+                }
+            }
+
+            if (offset >= new_backing_size || !blk_new_backing) {
+                memset(buf_new, 0, n);
+            } else {
+                if (offset + n > new_backing_size) {
+                    n = new_backing_size - offset;
+                }
+
+                ret = blk_pread(blk_new_backing, offset, buf_new, n);
+                if (ret < 0) {
+                    error_report("error while reading from new backing file");
+                    goto out;
+                }
+            }
+
+            /* If they differ, we need to write to the COW file */
+            uint64_t written = 0;
+
+            while (written < n) {
+                int64_t pnum;
+
+                if (compare_buffers(buf_old + written, buf_new + written,
+                                    n - written, &pnum))
+                {
+                    if (buf_old_is_zero) {
+                        ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
+                    } else {
+                        ret = blk_pwrite(blk, offset + written,
+                                         buf_old + written, pnum, 0);
+                    }
+                    if (ret < 0) {
+                        error_report("Error while writing to COW image: %s",
+                            strerror(-ret));
+                        goto out;
+                    }
+                }
+
+                written += pnum;
+            }
+            qemu_progress_print(local_progress, 100);
+        }
+    }
+
+    /*
+     * Change the backing file. All clusters that are different from the old
+     * backing file are overwritten in the COW file now, so the visible content
+     * doesn't change when we switch the backing file.
+     */
+    if (out_baseimg && *out_baseimg) {
+        ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
+                                       true);
+    } else {
+        ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
+    }
+
+    if (ret == -ENOSPC) {
+        error_report("Could not change the backing file to '%s': No "
+                     "space left in the file header", out_baseimg);
+    } else if (ret < 0) {
+        error_report("Could not change the backing file to '%s': %s",
+            out_baseimg, strerror(-ret));
+    }
+
+    qemu_progress_print(100, 0);
+    /*
+     * TODO At this point it is possible to check if any clusters that are
+     * allocated in the COW file are the same in the backing file. If so, they
+     * could be dropped from the COW file. Don't do this before switching the
+     * backing file, in case of a crash this would lead to corruption.
+     */
+out:
+    qemu_progress_end();
+    /* Cleanup */
+    if (!unsafe) {
+        blk_unref(blk_old_backing);
+        blk_unref(blk_new_backing);
+    }
+    qemu_vfree(buf_old);
+    qemu_vfree(buf_new);
+
+    blk_unref(blk);
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+static int img_resize(int argc, char **argv)
+{
+    Error *err = NULL;
+    int c, ret, relative;
+    const char *filename, *fmt, *size;
+    int64_t n, total_size, current_size;
+    bool quiet = false;
+    BlockBackend *blk = NULL;
+    PreallocMode prealloc = PREALLOC_MODE_OFF;
+    QemuOpts *param;
+
+    static QemuOptsList resize_options = {
+        .name = "resize_options",
+        .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
+        .desc = {
+            {
+                .name = BLOCK_OPT_SIZE,
+                .type = QEMU_OPT_SIZE,
+                .help = "Virtual disk size"
+            }, {
+                /* end of list */
+            }
+        },
+    };
+    bool image_opts = false;
+    bool shrink = false;
+
+    /* Remove size from argv manually so that negative numbers are not treated
+     * as options by getopt. */
+    if (argc < 3) {
+        error_exit("Not enough arguments");
+        return 1;
+    }
+
+    size = argv[--argc];
+
+    /* Parse getopt arguments */
+    fmt = NULL;
+    for(;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
+            {"shrink", no_argument, 0, OPTION_SHRINK},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":f:hq",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+        switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                return 1;
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        case OPTION_PREALLOCATION:
+            prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
+                                       PREALLOC_MODE__MAX, NULL);
+            if (prealloc == PREALLOC_MODE__MAX) {
+                error_report("Invalid preallocation mode '%s'", optarg);
+                return 1;
+            }
+            break;
+        case OPTION_SHRINK:
+            shrink = true;
+            break;
+        }
+    }
+    if (optind != argc - 1) {
+        error_exit("Expecting image file name and size");
+    }
+    filename = argv[optind++];
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        return 1;
+    }
+
+    /* Choose grow, shrink, or absolute resize mode */
+    switch (size[0]) {
+    case '+':
+        relative = 1;
+        size++;
+        break;
+    case '-':
+        relative = -1;
+        size++;
+        break;
+    default:
+        relative = 0;
+        break;
+    }
+
+    /* Parse size */
+    param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
+    if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
+        error_report_err(err);
+        ret = -1;
+        qemu_opts_del(param);
+        goto out;
+    }
+    n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
+    qemu_opts_del(param);
+
+    blk = img_open(image_opts, filename, fmt,
+                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
+                   false);
+    if (!blk) {
+        ret = -1;
+        goto out;
+    }
+
+    current_size = blk_getlength(blk);
+    if (current_size < 0) {
+        error_report("Failed to inquire current image length: %s",
+                     strerror(-current_size));
+        ret = -1;
+        goto out;
+    }
+
+    if (relative) {
+        total_size = current_size + n * relative;
+    } else {
+        total_size = n;
+    }
+    if (total_size <= 0) {
+        error_report("New image size must be positive");
+        ret = -1;
+        goto out;
+    }
+
+    if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
+        error_report("Preallocation can only be used for growing images");
+        ret = -1;
+        goto out;
+    }
+
+    if (total_size < current_size && !shrink) {
+        error_report("Use the --shrink option to perform a shrink operation.");
+        warn_report("Shrinking an image will delete all data beyond the "
+                    "shrunken image's end. Before performing such an "
+                    "operation, make sure there is no important data there.");
+        ret = -1;
+        goto out;
+    }
+
+    /*
+     * The user expects the image to have the desired size after
+     * resizing, so pass @exact=true.  It is of no use to report
+     * success when the image has not actually been resized.
+     */
+    ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
+    if (!ret) {
+        qprintf(quiet, "Image resized.\n");
+    } else {
+        error_report_err(err);
+    }
+out:
+    blk_unref(blk);
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+static void amend_status_cb(BlockDriverState *bs,
+                            int64_t offset, int64_t total_work_size,
+                            void *opaque)
+{
+    qemu_progress_print(100.f * offset / total_work_size, 0);
+}
+
+static int print_amend_option_help(const char *format)
+{
+    BlockDriver *drv;
+
+    /* Find driver and parse its options */
+    drv = bdrv_find_format(format);
+    if (!drv) {
+        error_report("Unknown file format '%s'", format);
+        return 1;
+    }
+
+    if (!drv->bdrv_amend_options) {
+        error_report("Format driver '%s' does not support option amendment",
+                     format);
+        return 1;
+    }
+
+    /* Every driver supporting amendment must have amend_opts */
+    assert(drv->amend_opts);
+
+    printf("Amend options for '%s':\n", format);
+    qemu_opts_print_help(drv->amend_opts, false);
+    return 0;
+}
+
+static int img_amend(int argc, char **argv)
+{
+    Error *err = NULL;
+    int c, ret = 0;
+    char *options = NULL;
+    QemuOptsList *amend_opts = NULL;
+    QemuOpts *opts = NULL;
+    const char *fmt = NULL, *filename, *cache;
+    int flags;
+    bool writethrough;
+    bool quiet = false, progress = false;
+    BlockBackend *blk = NULL;
+    BlockDriverState *bs = NULL;
+    bool image_opts = false;
+    bool force = false;
+
+    cache = BDRV_DEFAULT_CACHE;
+    for (;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force", no_argument, 0, OPTION_FORCE},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":ho:f:t:pq",
+                        long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'o':
+            if (accumulate_options(&options, optarg) < 0) {
+                ret = -1;
+                goto out_no_progress;
+            }
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 't':
+            cache = optarg;
+            break;
+        case 'p':
+            progress = true;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case OPTION_OBJECT:
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                ret = -1;
+                goto out_no_progress;
+            }
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        case OPTION_FORCE:
+            force = true;
+            break;
+        }
+    }
+
+    if (!options) {
+        error_exit("Must specify options (-o)");
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        ret = -1;
+        goto out_no_progress;
+    }
+
+    if (quiet) {
+        progress = false;
+    }
+    qemu_progress_init(progress, 1.0);
+
+    filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
+    if (fmt && has_help_option(options)) {
+        /* If a format is explicitly specified (and possibly no filename is
+         * given), print option help here */
+        ret = print_amend_option_help(fmt);
+        goto out;
+    }
+
+    if (optind != argc - 1) {
+        error_report("Expecting one image file name");
+        ret = -1;
+        goto out;
+    }
+
+    flags = BDRV_O_RDWR;
+    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
+    if (ret < 0) {
+        error_report("Invalid cache option: %s", cache);
+        goto out;
+    }
+
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
+    if (!blk) {
+        ret = -1;
+        goto out;
+    }
+    bs = blk_bs(blk);
+
+    fmt = bs->drv->format_name;
+
+    if (has_help_option(options)) {
+        /* If the format was auto-detected, print option help here */
+        ret = print_amend_option_help(fmt);
+        goto out;
+    }
+
+    if (!bs->drv->bdrv_amend_options) {
+        error_report("Format driver '%s' does not support option amendment",
+                     fmt);
+        ret = -1;
+        goto out;
+    }
+
+    /* Every driver supporting amendment must have amend_opts */
+    assert(bs->drv->amend_opts);
+
+    amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
+    opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
+    if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
+        /* Try to parse options using the create options */
+        amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
+        qemu_opts_del(opts);
+        opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
+        if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
+            error_append_hint(&err,
+                              "This option is only supported for image creation\n");
+        }
+
+        error_report_err(err);
+        ret = -1;
+        goto out;
+    }
+
+    /* In case the driver does not call amend_status_cb() */
+    qemu_progress_print(0.f, 0);
+    ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
+    qemu_progress_print(100.f, 0);
+    if (ret < 0) {
+        error_report_err(err);
+        goto out;
+    }
+
+out:
+    qemu_progress_end();
+
+out_no_progress:
+    blk_unref(blk);
+    qemu_opts_del(opts);
+    qemu_opts_free(amend_opts);
+    g_free(options);
+
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+typedef struct BenchData {
+    BlockBackend *blk;
+    uint64_t image_size;
+    bool write;
+    int bufsize;
+    int step;
+    int nrreq;
+    int n;
+    int flush_interval;
+    bool drain_on_flush;
+    uint8_t *buf;
+    QEMUIOVector *qiov;
+
+    int in_flight;
+    bool in_flush;
+    uint64_t offset;
+} BenchData;
+
+static void bench_undrained_flush_cb(void *opaque, int ret)
+{
+    if (ret < 0) {
+        error_report("Failed flush request: %s", strerror(-ret));
+        exit(EXIT_FAILURE);
+    }
+}
+
+static void bench_cb(void *opaque, int ret)
+{
+    BenchData *b = opaque;
+    BlockAIOCB *acb;
+
+    if (ret < 0) {
+        error_report("Failed request: %s", strerror(-ret));
+        exit(EXIT_FAILURE);
+    }
+
+    if (b->in_flush) {
+        /* Just finished a flush with drained queue: Start next requests */
+        assert(b->in_flight == 0);
+        b->in_flush = false;
+    } else if (b->in_flight > 0) {
+        int remaining = b->n - b->in_flight;
+
+        b->n--;
+        b->in_flight--;
+
+        /* Time for flush? Drain queue if requested, then flush */
+        if (b->flush_interval && remaining % b->flush_interval == 0) {
+            if (!b->in_flight || !b->drain_on_flush) {
+                BlockCompletionFunc *cb;
+
+                if (b->drain_on_flush) {
+                    b->in_flush = true;
+                    cb = bench_cb;
+                } else {
+                    cb = bench_undrained_flush_cb;
+                }
+
+                acb = blk_aio_flush(b->blk, cb, b);
+                if (!acb) {
+                    error_report("Failed to issue flush request");
+                    exit(EXIT_FAILURE);
+                }
+            }
+            if (b->drain_on_flush) {
+                return;
+            }
+        }
+    }
+
+    while (b->n > b->in_flight && b->in_flight < b->nrreq) {
+        int64_t offset = b->offset;
+        /* blk_aio_* might look for completed I/Os and kick bench_cb
+         * again, so make sure this operation is counted by in_flight
+         * and b->offset is ready for the next submission.
+         */
+        b->in_flight++;
+        b->offset += b->step;
+        b->offset %= b->image_size;
+        if (b->write) {
+            acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
+        } else {
+            acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
+        }
+        if (!acb) {
+            error_report("Failed to issue request");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+static int img_bench(int argc, char **argv)
+{
+    int c, ret = 0;
+    const char *fmt = NULL, *filename;
+    bool quiet = false;
+    bool image_opts = false;
+    bool is_write = false;
+    int count = 75000;
+    int depth = 64;
+    int64_t offset = 0;
+    size_t bufsize = 4096;
+    int pattern = 0;
+    size_t step = 0;
+    int flush_interval = 0;
+    bool drain_on_flush = true;
+    int64_t image_size;
+    BlockBackend *blk = NULL;
+    BenchData data = {};
+    int flags = 0;
+    bool writethrough = false;
+    struct timeval t1, t2;
+    int i;
+    bool force_share = false;
+    size_t buf_size;
+
+    for (;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"pattern", required_argument, 0, OPTION_PATTERN},
+            {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
+            {"force-share", no_argument, 0, 'U'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
+                        NULL);
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'c':
+        {
+            unsigned long res;
+
+            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
+                error_report("Invalid request count specified");
+                return 1;
+            }
+            count = res;
+            break;
+        }
+        case 'd':
+        {
+            unsigned long res;
+
+            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
+                error_report("Invalid queue depth specified");
+                return 1;
+            }
+            depth = res;
+            break;
+        }
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'n':
+            flags |= BDRV_O_NATIVE_AIO;
+            break;
+        case 'i':
+            ret = bdrv_parse_aio(optarg, &flags);
+            if (ret < 0) {
+                error_report("Invalid aio option: %s", optarg);
+                ret = -1;
+                goto out;
+            }
+            break;
+        case 'o':
+        {
+            offset = cvtnum("offset", optarg);
+            if (offset < 0) {
+                return 1;
+            }
+            break;
+        }
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case 's':
+        {
+            int64_t sval;
+
+            sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
+            if (sval < 0) {
+                return 1;
+            }
+
+            bufsize = sval;
+            break;
+        }
+        case 'S':
+        {
+            int64_t sval;
+
+            sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
+            if (sval < 0) {
+                return 1;
+            }
+
+            step = sval;
+            break;
+        }
+        case 't':
+            ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
+            if (ret < 0) {
+                error_report("Invalid cache mode");
+                ret = -1;
+                goto out;
+            }
+            break;
+        case 'w':
+            flags |= BDRV_O_RDWR;
+            is_write = true;
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_PATTERN:
+        {
+            unsigned long res;
+
+            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
+                error_report("Invalid pattern byte specified");
+                return 1;
+            }
+            pattern = res;
+            break;
+        }
+        case OPTION_FLUSH_INTERVAL:
+        {
+            unsigned long res;
+
+            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
+                error_report("Invalid flush interval specified");
+                return 1;
+            }
+            flush_interval = res;
+            break;
+        }
+        case OPTION_NO_DRAIN:
+            drain_on_flush = false;
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    if (optind != argc - 1) {
+        error_exit("Expecting one image file name");
+    }
+    filename = argv[argc - 1];
+
+    if (!is_write && flush_interval) {
+        error_report("--flush-interval is only available in write tests");
+        ret = -1;
+        goto out;
+    }
+    if (flush_interval && flush_interval < depth) {
+        error_report("Flush interval can't be smaller than depth");
+        ret = -1;
+        goto out;
+    }
+
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   force_share);
+    if (!blk) {
+        ret = -1;
+        goto out;
+    }
+
+    image_size = blk_getlength(blk);
+    if (image_size < 0) {
+        ret = image_size;
+        goto out;
+    }
+
+    data = (BenchData) {
+        .blk            = blk,
+        .image_size     = image_size,
+        .bufsize        = bufsize,
+        .step           = step ?: bufsize,
+        .nrreq          = depth,
+        .n              = count,
+        .offset         = offset,
+        .write          = is_write,
+        .flush_interval = flush_interval,
+        .drain_on_flush = drain_on_flush,
+    };
+    printf("Sending %d %s requests, %d bytes each, %d in parallel "
+           "(starting at offset %" PRId64 ", step size %d)\n",
+           data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
+           data.offset, data.step);
+    if (flush_interval) {
+        printf("Sending flush every %d requests\n", flush_interval);
+    }
+
+    buf_size = data.nrreq * data.bufsize;
+    data.buf = blk_blockalign(blk, buf_size);
+    memset(data.buf, pattern, data.nrreq * data.bufsize);
+
+    blk_register_buf(blk, data.buf, buf_size);
+
+    data.qiov = g_new(QEMUIOVector, data.nrreq);
+    for (i = 0; i < data.nrreq; i++) {
+        qemu_iovec_init(&data.qiov[i], 1);
+        qemu_iovec_add(&data.qiov[i],
+                       data.buf + i * data.bufsize, data.bufsize);
+    }
+
+    gettimeofday(&t1, NULL);
+    bench_cb(&data, 0);
+
+    while (data.n > 0) {
+        main_loop_wait(false);
+    }
+    gettimeofday(&t2, NULL);
+
+    printf("Run completed in %3.3f seconds.\n",
+           (t2.tv_sec - t1.tv_sec)
+           + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
+
+out:
+    if (data.buf) {
+        blk_unregister_buf(blk, data.buf);
+    }
+    qemu_vfree(data.buf);
+    blk_unref(blk);
+
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+enum ImgBitmapAct {
+    BITMAP_ADD,
+    BITMAP_REMOVE,
+    BITMAP_CLEAR,
+    BITMAP_ENABLE,
+    BITMAP_DISABLE,
+    BITMAP_MERGE,
+};
+typedef struct ImgBitmapAction {
+    enum ImgBitmapAct act;
+    const char *src; /* only used for merge */
+    QSIMPLEQ_ENTRY(ImgBitmapAction) next;
+} ImgBitmapAction;
+
+static int img_bitmap(int argc, char **argv)
+{
+    Error *err = NULL;
+    int c, ret = 1;
+    QemuOpts *opts = NULL;
+    const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
+    const char *filename, *bitmap;
+    BlockBackend *blk = NULL, *src = NULL;
+    BlockDriverState *bs = NULL, *src_bs = NULL;
+    bool image_opts = false;
+    int64_t granularity = 0;
+    bool add = false, merge = false;
+    QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
+    ImgBitmapAction *act, *act_next;
+    const char *op;
+
+    QSIMPLEQ_INIT(&actions);
+
+    for (;;) {
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"object", required_argument, 0, OPTION_OBJECT},
+            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"add", no_argument, 0, OPTION_ADD},
+            {"remove", no_argument, 0, OPTION_REMOVE},
+            {"clear", no_argument, 0, OPTION_CLEAR},
+            {"enable", no_argument, 0, OPTION_ENABLE},
+            {"disable", no_argument, 0, OPTION_DISABLE},
+            {"merge", required_argument, 0, OPTION_MERGE},
+            {"granularity", required_argument, 0, 'g'},
+            {"source-file", required_argument, 0, 'b'},
+            {"source-format", required_argument, 0, 'F'},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'b':
+            src_filename = optarg;
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'F':
+            src_fmt = optarg;
+            break;
+        case 'g':
+            granularity = cvtnum("granularity", optarg);
+            if (granularity < 0) {
+                return 1;
+            }
+            break;
+        case OPTION_ADD:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_ADD;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            add = true;
+            break;
+        case OPTION_REMOVE:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_REMOVE;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            break;
+        case OPTION_CLEAR:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_CLEAR;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            break;
+        case OPTION_ENABLE:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_ENABLE;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            break;
+        case OPTION_DISABLE:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_DISABLE;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            break;
+        case OPTION_MERGE:
+            act = g_new0(ImgBitmapAction, 1);
+            act->act = BITMAP_MERGE;
+            act->src = optarg;
+            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
+            merge = true;
+            break;
+        case OPTION_OBJECT:
+            opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true);
+            if (!opts) {
+                goto out;
+            }
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        goto out;
+    }
+
+    if (QSIMPLEQ_EMPTY(&actions)) {
+        error_report("Need at least one of --add, --remove, --clear, "
+                     "--enable, --disable, or --merge");
+        goto out;
+    }
+
+    if (granularity && !add) {
+        error_report("granularity only supported with --add");
+        goto out;
+    }
+    if (src_fmt && !src_filename) {
+        error_report("-F only supported with -b");
+        goto out;
+    }
+    if (src_filename && !merge) {
+        error_report("Merge bitmap source file only supported with "
+                     "--merge");
+        goto out;
+    }
+
+    if (optind != argc - 2) {
+        error_report("Expecting filename and bitmap name");
+        goto out;
+    }
+
+    filename = argv[optind];
+    bitmap = argv[optind + 1];
+
+    /*
+     * No need to open backing chains; we will be manipulating bitmaps
+     * directly in this image without reference to image contents.
+     */
+    blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
+                   false, false, false);
+    if (!blk) {
+        goto out;
+    }
+    bs = blk_bs(blk);
+    if (src_filename) {
+        src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
+                       false, false, false);
+        if (!src) {
+            goto out;
+        }
+        src_bs = blk_bs(src);
+    } else {
+        src_bs = bs;
+    }
+
+    QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
+        switch (act->act) {
+        case BITMAP_ADD:
+            qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
+                                       !!granularity, granularity, true, true,
+                                       false, false, &err);
+            op = "add";
+            break;
+        case BITMAP_REMOVE:
+            qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
+            op = "remove";
+            break;
+        case BITMAP_CLEAR:
+            qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
+            op = "clear";
+            break;
+        case BITMAP_ENABLE:
+            qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
+            op = "enable";
+            break;
+        case BITMAP_DISABLE:
+            qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
+            op = "disable";
+            break;
+        case BITMAP_MERGE:
+            do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
+                                  act->src, &err);
+            op = "merge";
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (err) {
+            error_reportf_err(err, "Operation %s on bitmap %s failed: ",
+                              op, bitmap);
+            goto out;
+        }
+        g_free(act);
+    }
+
+    ret = 0;
+
+ out:
+    blk_unref(src);
+    blk_unref(blk);
+    qemu_opts_del(opts);
+    return ret;
+}
+
+#define C_BS      01
+#define C_COUNT   02
+#define C_IF      04
+#define C_OF      010
+#define C_SKIP    020
+
+struct DdInfo {
+    unsigned int flags;
+    int64_t count;
+};
+
+struct DdIo {
+    int bsz;    /* Block size */
+    char *filename;
+    uint8_t *buf;
+    int64_t offset;
+};
+
+struct DdOpts {
+    const char *name;
+    int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
+    unsigned int flag;
+};
+
+static int img_dd_bs(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    int64_t res;
+
+    res = cvtnum_full("bs", arg, 1, INT_MAX);
+
+    if (res < 0) {
+        return 1;
+    }
+    in->bsz = out->bsz = res;
+
+    return 0;
+}
+
+static int img_dd_count(const char *arg,
+                        struct DdIo *in, struct DdIo *out,
+                        struct DdInfo *dd)
+{
+    dd->count = cvtnum("count", arg);
+
+    if (dd->count < 0) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int img_dd_if(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    in->filename = g_strdup(arg);
+
+    return 0;
+}
+
+static int img_dd_of(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    out->filename = g_strdup(arg);
+
+    return 0;
+}
+
+static int img_dd_skip(const char *arg,
+                       struct DdIo *in, struct DdIo *out,
+                       struct DdInfo *dd)
+{
+    in->offset = cvtnum("skip", arg);
+
+    if (in->offset < 0) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int img_dd(int argc, char **argv)
+{
+    int ret = 0;
+    char *arg = NULL;
+    char *tmp;
+    BlockDriver *drv = NULL, *proto_drv = NULL;
+    BlockBackend *blk1 = NULL, *blk2 = NULL;
+    QemuOpts *opts = NULL;
+    QemuOptsList *create_opts = NULL;
+    Error *local_err = NULL;
+    bool image_opts = false;
+    int c, i;
+    const char *out_fmt = "raw";
+    const char *fmt = NULL;
+    int64_t size = 0;
+    int64_t block_count = 0, out_pos, in_pos;
+    bool force_share = false;
+    struct DdInfo dd = {
+        .flags = 0,
+        .count = 0,
+    };
+    struct DdIo in = {
+        .bsz = 512, /* Block size is by default 512 bytes */
+        .filename = NULL,
+        .buf = NULL,
+        .offset = 0
+    };
+    struct DdIo out = {
+        .bsz = 512,
+        .filename = NULL,
+        .buf = NULL,
+        .offset = 0
+    };
+
+    const struct DdOpts options[] = {
+        { "bs", img_dd_bs, C_BS },
+        { "count", img_dd_count, C_COUNT },
+        { "if", img_dd_if, C_IF },
+        { "of", img_dd_of, C_OF },
+        { "skip", img_dd_skip, C_SKIP },
+        { NULL, NULL, 0 }
+    };
+    const struct option long_options[] = {
+        { "help", no_argument, 0, 'h'},
+        { "object", required_argument, 0, OPTION_OBJECT},
+        { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+        { "force-share", no_argument, 0, 'U'},
+        { 0, 0, 0, 0 }
+    };
+
+    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
+        if (c == EOF) {
+            break;
+        }
+        switch (c) {
+        case 'O':
+            out_fmt = optarg;
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT:
+            if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
+                ret = -1;
+                goto out;
+            }
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    for (i = optind; i < argc; i++) {
+        int j;
+        arg = g_strdup(argv[i]);
+
+        tmp = strchr(arg, '=');
+        if (tmp == NULL) {
+            error_report("unrecognized operand %s", arg);
+            ret = -1;
+            goto out;
+        }
+
+        *tmp++ = '\0';
+
+        for (j = 0; options[j].name != NULL; j++) {
+            if (!strcmp(arg, options[j].name)) {
+                break;
+            }
+        }
+        if (options[j].name == NULL) {
+            error_report("unrecognized operand %s", arg);
+            ret = -1;
+            goto out;
+        }
+
+        if (options[j].f(tmp, &in, &out, &dd) != 0) {
+            ret = -1;
+            goto out;
+        }
+        dd.flags |= options[j].flag;
+        g_free(arg);
+        arg = NULL;
+    }
+
+    if (!(dd.flags & C_IF && dd.flags & C_OF)) {
+        error_report("Must specify both input and output files");
+        ret = -1;
+        goto out;
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        ret = -1;
+        goto out;
+    }
+
+    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+                    force_share);
+
+    if (!blk1) {
+        ret = -1;
+        goto out;
+    }
+
+    drv = bdrv_find_format(out_fmt);
+    if (!drv) {
+        error_report("Unknown file format");
+        ret = -1;
+        goto out;
+    }
+    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
+
+    if (!proto_drv) {
+        error_report_err(local_err);
+        ret = -1;
+        goto out;
+    }
+    if (!drv->create_opts) {
+        error_report("Format driver '%s' does not support image creation",
+                     drv->format_name);
+        ret = -1;
+        goto out;
+    }
+    if (!proto_drv->create_opts) {
+        error_report("Protocol driver '%s' does not support image creation",
+                     proto_drv->format_name);
+        ret = -1;
+        goto out;
+    }
+    create_opts = qemu_opts_append(create_opts, drv->create_opts);
+    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
+
+    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
+
+    size = blk_getlength(blk1);
+    if (size < 0) {
+        error_report("Failed to get size for '%s'", in.filename);
+        ret = -1;
+        goto out;
+    }
+
+    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+        dd.count * in.bsz < size) {
+        size = dd.count * in.bsz;
+    }
+
+    /* Overflow means the specified offset is beyond input image's size */
+    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
+                              size < in.bsz * in.offset)) {
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
+    } else {
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+                            size - in.bsz * in.offset, &error_abort);
+    }
+
+    ret = bdrv_create(drv, out.filename, opts, &local_err);
+    if (ret < 0) {
+        error_reportf_err(local_err,
+                          "%s: error while creating output image: ",
+                          out.filename);
+        ret = -1;
+        goto out;
+    }
+
+    /* TODO, we can't honour --image-opts for the target,
+     * since it needs to be given in a format compatible
+     * with the bdrv_create() call above which does not
+     * support image-opts style.
+     */
+    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
+                         false, false, false);
+
+    if (!blk2) {
+        ret = -1;
+        goto out;
+    }
+
+    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
+                              size < in.offset * in.bsz)) {
+        /* We give a warning if the skip option is bigger than the input
+         * size and create an empty output disk image (i.e. like dd(1)).
+         */
+        error_report("%s: cannot skip to specified offset", in.filename);
+        in_pos = size;
+    } else {
+        in_pos = in.offset * in.bsz;
+    }
+
+    in.buf = g_new(uint8_t, in.bsz);
+
+    for (out_pos = 0; in_pos < size; block_count++) {
+        int in_ret, out_ret;
+
+        if (in_pos + in.bsz > size) {
+            in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+        } else {
+            in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+        }
+        if (in_ret < 0) {
+            error_report("error while reading from input image file: %s",
+                         strerror(-in_ret));
+            ret = -1;
+            goto out;
+        }
+        in_pos += in_ret;
+
+        out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+
+        if (out_ret < 0) {
+            error_report("error while writing to output image file: %s",
+                         strerror(-out_ret));
+            ret = -1;
+            goto out;
+        }
+        out_pos += out_ret;
+    }
+
+out:
+    g_free(arg);
+    qemu_opts_del(opts);
+    qemu_opts_free(create_opts);
+    blk_unref(blk1);
+    blk_unref(blk2);
+    g_free(in.filename);
+    g_free(out.filename);
+    g_free(in.buf);
+    g_free(out.buf);
+
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
+static void dump_json_block_measure_info(BlockMeasureInfo *info)
+{
+    QString *str;
+    QObject *obj;
+    Visitor *v = qobject_output_visitor_new(&obj);
+
+    visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
+    visit_complete(v, &obj);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    printf("%s\n", qstring_get_str(str));
+    qobject_unref(obj);
+    visit_free(v);
+    qobject_unref(str);
+}
+
+static int img_measure(int argc, char **argv)
+{
+    static const struct option long_options[] = {
+        {"help", no_argument, 0, 'h'},
+        {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+        {"object", required_argument, 0, OPTION_OBJECT},
+        {"output", required_argument, 0, OPTION_OUTPUT},
+        {"size", required_argument, 0, OPTION_SIZE},
+        {"force-share", no_argument, 0, 'U'},
+        {0, 0, 0, 0}
+    };
+    OutputFormat output_format = OFORMAT_HUMAN;
+    BlockBackend *in_blk = NULL;
+    BlockDriver *drv;
+    const char *filename = NULL;
+    const char *fmt = NULL;
+    const char *out_fmt = "raw";
+    char *options = NULL;
+    char *snapshot_name = NULL;
+    bool force_share = false;
+    QemuOpts *opts = NULL;
+    QemuOpts *object_opts = NULL;
+    QemuOpts *sn_opts = NULL;
+    QemuOptsList *create_opts = NULL;
+    bool image_opts = false;
+    uint64_t img_size = UINT64_MAX;
+    BlockMeasureInfo *info = NULL;
+    Error *local_err = NULL;
+    int ret = 1;
+    int c;
+
+    while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
+                            long_options, NULL)) != -1) {
+        switch (c) {
+        case '?':
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'O':
+            out_fmt = optarg;
+            break;
+        case 'o':
+            if (accumulate_options(&options, optarg) < 0) {
+                goto out;
+            }
+            break;
+        case 'l':
+            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+                                                  optarg, false);
+                if (!sn_opts) {
+                    error_report("Failed in parsing snapshot param '%s'",
+                                 optarg);
+                    goto out;
+                }
+            } else {
+                snapshot_name = optarg;
+            }
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT:
+            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                                  optarg, true);
+            if (!object_opts) {
+                goto out;
+            }
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        case OPTION_OUTPUT:
+            if (!strcmp(optarg, "json")) {
+                output_format = OFORMAT_JSON;
+            } else if (!strcmp(optarg, "human")) {
+                output_format = OFORMAT_HUMAN;
+            } else {
+                error_report("--output must be used with human or json "
+                             "as argument.");
+                goto out;
+            }
+            break;
+        case OPTION_SIZE:
+        {
+            int64_t sval;
+
+            sval = cvtnum("image size", optarg);
+            if (sval < 0) {
+                goto out;
+            }
+            img_size = (uint64_t)sval;
+        }
+        break;
+        }
+    }
+
+    if (qemu_opts_foreach(&qemu_object_opts,
+                          user_creatable_add_opts_foreach,
+                          qemu_img_object_print_help, &error_fatal)) {
+        goto out;
+    }
+
+    if (argc - optind > 1) {
+        error_report("At most one filename argument is allowed.");
+        goto out;
+    } else if (argc - optind == 1) {
+        filename = argv[optind];
+    }
+
+    if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
+        error_report("--image-opts, -f, and -l require a filename argument.");
+        goto out;
+    }
+    if (filename && img_size != UINT64_MAX) {
+        error_report("--size N cannot be used together with a filename.");
+        goto out;
+    }
+    if (!filename && img_size == UINT64_MAX) {
+        error_report("Either --size N or one filename must be specified.");
+        goto out;
+    }
+
+    if (filename) {
+        in_blk = img_open(image_opts, filename, fmt, 0,
+                          false, false, force_share);
+        if (!in_blk) {
+            goto out;
+        }
+
+        if (sn_opts) {
+            bdrv_snapshot_load_tmp(blk_bs(in_blk),
+                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+                    &local_err);
+        } else if (snapshot_name != NULL) {
+            bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
+                    snapshot_name, &local_err);
+        }
+        if (local_err) {
+            error_reportf_err(local_err, "Failed to load snapshot: ");
+            goto out;
+        }
+    }
+
+    drv = bdrv_find_format(out_fmt);
+    if (!drv) {
+        error_report("Unknown file format '%s'", out_fmt);
+        goto out;
+    }
+    if (!drv->create_opts) {
+        error_report("Format driver '%s' does not support image creation",
+                     drv->format_name);
+        goto out;
+    }
+
+    create_opts = qemu_opts_append(create_opts, drv->create_opts);
+    create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
+    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
+    if (options) {
+        if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
+            error_report_err(local_err);
+            error_report("Invalid options for file format '%s'", out_fmt);
+            goto out;
+        }
+    }
+    if (img_size != UINT64_MAX) {
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
+    }
+
+    info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        goto out;
+    }
+
+    if (output_format == OFORMAT_HUMAN) {
+        printf("required size: %" PRIu64 "\n", info->required);
+        printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
+        if (info->has_bitmaps) {
+            printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
+        }
+    } else {
+        dump_json_block_measure_info(info);
+    }
+
+    ret = 0;
+
+out:
+    qapi_free_BlockMeasureInfo(info);
+    qemu_opts_del(object_opts);
+    qemu_opts_del(opts);
+    qemu_opts_del(sn_opts);
+    qemu_opts_free(create_opts);
+    g_free(options);
+    blk_unref(in_blk);
+    return ret;
+}
+
+static const img_cmd_t img_cmds[] = {
+#define DEF(option, callback, arg_string)        \
+    { option, callback },
+#include "qemu-img-cmds.h"
+#undef DEF
+    { NULL, NULL, },
+};
+
+int main(int argc, char **argv)
+{
+    const img_cmd_t *cmd;
+    const char *cmdname;
+    Error *local_error = NULL;
+    int c;
+    static const struct option long_options[] = {
+        {"help", no_argument, 0, 'h'},
+        {"version", no_argument, 0, 'V'},
+        {"trace", required_argument, NULL, 'T'},
+        {0, 0, 0, 0}
+    };
+
+#ifdef CONFIG_POSIX
+    signal(SIGPIPE, SIG_IGN);
+#endif
+
+    socket_init();
+    error_init(argv[0]);
+    module_call_init(MODULE_INIT_TRACE);
+    qemu_init_exec_dir(argv[0]);
+
+    if (qemu_init_main_loop(&local_error)) {
+        error_report_err(local_error);
+        exit(EXIT_FAILURE);
+    }
+
+    qcrypto_init(&error_fatal);
+
+    module_call_init(MODULE_INIT_QOM);
+    bdrv_init();
+    if (argc < 2) {
+        error_exit("Not enough arguments");
+    }
+
+    qemu_add_opts(&qemu_object_opts);
+    qemu_add_opts(&qemu_source_opts);
+    qemu_add_opts(&qemu_trace_opts);
+
+    while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
+        switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            return 0;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            return 0;
+        case 'h':
+            help();
+            return 0;
+        case 'V':
+            printf(QEMU_IMG_VERSION);
+            return 0;
+        case 'T':
+            trace_opt_parse(optarg);
+            break;
+        }
+    }
+
+    cmdname = argv[optind];
+
+    /* reset getopt_long scanning */
+    argc -= optind;
+    if (argc < 1) {
+        return 0;
+    }
+    argv += optind;
+    qemu_reset_optind();
+
+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file();
+    qemu_set_log(LOG_TRACE);
+
+    /* find the command */
+    for (cmd = img_cmds; cmd->name != NULL; cmd++) {
+        if (!strcmp(cmdname, cmd->name)) {
+            return cmd->handler(argc, argv);
+        }
+    }
+
+    /* not found */
+    error_exit("Command not found: %s", cmdname);
+}
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
new file mode 100644
index 000000000..4153f1c0b
--- /dev/null
+++ b/qemu-io-cmds.c
@@ -0,0 +1,2497 @@
+/*
+ * Command line utility to exercise the QEMU I/O path.
+ *
+ * Copyright (C) 2009-2016 Red Hat, Inc.
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu-io.h"
+#include "sysemu/block-backend.h"
+#include "block/block.h"
+#include "block/block_int.h" /* for info_f() */
+#include "block/qapi.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qemu/timer.h"
+#include "qemu/cutils.h"
+
+#define CMD_NOFILE_OK   0x01
+
+bool qemuio_misalign;
+
+static cmdinfo_t *cmdtab;
+static int ncmds;
+
+static int compare_cmdname(const void *a, const void *b)
+{
+    return strcmp(((const cmdinfo_t *)a)->name,
+                  ((const cmdinfo_t *)b)->name);
+}
+
+void qemuio_add_command(const cmdinfo_t *ci)
+{
+    /* ci->perm assumes a file is open, but the GLOBAL and NOFILE_OK
+     * flags allow it not to be, so that combination is invalid.
+     * Catch it now rather than letting it manifest as a crash if a
+     * particular set of command line options are used.
+     */
+    assert(ci->perm == 0 ||
+           (ci->flags & (CMD_FLAG_GLOBAL | CMD_NOFILE_OK)) == 0);
+    cmdtab = g_renew(cmdinfo_t, cmdtab, ++ncmds);
+    cmdtab[ncmds - 1] = *ci;
+    qsort(cmdtab, ncmds, sizeof(*cmdtab), compare_cmdname);
+}
+
+void qemuio_command_usage(const cmdinfo_t *ci)
+{
+    printf("%s %s -- %s\n", ci->name, ci->args, ci->oneline);
+}
+
+static int init_check_command(BlockBackend *blk, const cmdinfo_t *ct)
+{
+    if (ct->flags & CMD_FLAG_GLOBAL) {
+        return 1;
+    }
+    if (!(ct->flags & CMD_NOFILE_OK) && !blk) {
+        fprintf(stderr, "no file open, try 'help open'\n");
+        return 0;
+    }
+    return 1;
+}
+
+static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
+                   char **argv)
+{
+    char *cmd = argv[0];
+
+    if (!init_check_command(blk, ct)) {
+        return -EINVAL;
+    }
+
+    if (argc - 1 < ct->argmin || (ct->argmax != -1 && argc - 1 > ct->argmax)) {
+        if (ct->argmax == -1) {
+            fprintf(stderr,
+                    "bad argument count %d to %s, expected at least %d arguments\n",
+                    argc-1, cmd, ct->argmin);
+        } else if (ct->argmin == ct->argmax) {
+            fprintf(stderr,
+                    "bad argument count %d to %s, expected %d arguments\n",
+                    argc-1, cmd, ct->argmin);
+        } else {
+            fprintf(stderr,
+                    "bad argument count %d to %s, expected between %d and %d arguments\n",
+                    argc-1, cmd, ct->argmin, ct->argmax);
+        }
+        return -EINVAL;
+    }
+
+    /* Request additional permissions if necessary for this command. The caller
+     * is responsible for restoring the original permissions afterwards if this
+     * is what it wants. */
+    if (ct->perm && blk_is_available(blk)) {
+        uint64_t orig_perm, orig_shared_perm;
+        blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+
+        if (ct->perm & ~orig_perm) {
+            uint64_t new_perm;
+            Error *local_err = NULL;
+            int ret;
+
+            new_perm = orig_perm | ct->perm;
+
+            ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err);
+            if (ret < 0) {
+                error_report_err(local_err);
+                return ret;
+            }
+        }
+    }
+
+    qemu_reset_optind();
+    return ct->cfunc(blk, argc, argv);
+}
+
+static const cmdinfo_t *find_command(const char *cmd)
+{
+    cmdinfo_t *ct;
+
+    for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+        if (strcmp(ct->name, cmd) == 0 ||
+            (ct->altname && strcmp(ct->altname, cmd) == 0))
+        {
+            return (const cmdinfo_t *)ct;
+        }
+    }
+    return NULL;
+}
+
+/* Invoke fn() for commands with a matching prefix */
+void qemuio_complete_command(const char *input,
+                             void (*fn)(const char *cmd, void *opaque),
+                             void *opaque)
+{
+    cmdinfo_t *ct;
+    size_t input_len = strlen(input);
+
+    for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+        if (strncmp(input, ct->name, input_len) == 0) {
+            fn(ct->name, opaque);
+        }
+    }
+}
+
+static char **breakline(char *input, int *count)
+{
+    int c = 0;
+    char *p;
+    char **rval = g_new0(char *, 1);
+
+    while (rval && (p = qemu_strsep(&input, " ")) != NULL) {
+        if (!*p) {
+            continue;
+        }
+        c++;
+        rval = g_renew(char *, rval, (c + 1));
+        rval[c - 1] = p;
+        rval[c] = NULL;
+    }
+    *count = c;
+    return rval;
+}
+
+static int64_t cvtnum(const char *s)
+{
+    int err;
+    uint64_t value;
+
+    err = qemu_strtosz(s, NULL, &value);
+    if (err < 0) {
+        return err;
+    }
+    if (value > INT64_MAX) {
+        return -ERANGE;
+    }
+    return value;
+}
+
+static void print_cvtnum_err(int64_t rc, const char *arg)
+{
+    switch (rc) {
+    case -EINVAL:
+        printf("Parsing error: non-numeric argument,"
+               " or extraneous/unrecognized suffix -- %s\n", arg);
+        break;
+    case -ERANGE:
+        printf("Parsing error: argument too large -- %s\n", arg);
+        break;
+    default:
+        printf("Parsing error: %s\n", arg);
+    }
+}
+
+#define EXABYTES(x)     ((long long)(x) << 60)
+#define PETABYTES(x)    ((long long)(x) << 50)
+#define TERABYTES(x)    ((long long)(x) << 40)
+#define GIGABYTES(x)    ((long long)(x) << 30)
+#define MEGABYTES(x)    ((long long)(x) << 20)
+#define KILOBYTES(x)    ((long long)(x) << 10)
+
+#define TO_EXABYTES(x)  ((x) / EXABYTES(1))
+#define TO_PETABYTES(x) ((x) / PETABYTES(1))
+#define TO_TERABYTES(x) ((x) / TERABYTES(1))
+#define TO_GIGABYTES(x) ((x) / GIGABYTES(1))
+#define TO_MEGABYTES(x) ((x) / MEGABYTES(1))
+#define TO_KILOBYTES(x) ((x) / KILOBYTES(1))
+
+static void cvtstr(double value, char *str, size_t size)
+{
+    char *trim;
+    const char *suffix;
+
+    if (value >= EXABYTES(1)) {
+        suffix = " EiB";
+        snprintf(str, size - 4, "%.3f", TO_EXABYTES(value));
+    } else if (value >= PETABYTES(1)) {
+        suffix = " PiB";
+        snprintf(str, size - 4, "%.3f", TO_PETABYTES(value));
+    } else if (value >= TERABYTES(1)) {
+        suffix = " TiB";
+        snprintf(str, size - 4, "%.3f", TO_TERABYTES(value));
+    } else if (value >= GIGABYTES(1)) {
+        suffix = " GiB";
+        snprintf(str, size - 4, "%.3f", TO_GIGABYTES(value));
+    } else if (value >= MEGABYTES(1)) {
+        suffix = " MiB";
+        snprintf(str, size - 4, "%.3f", TO_MEGABYTES(value));
+    } else if (value >= KILOBYTES(1)) {
+        suffix = " KiB";
+        snprintf(str, size - 4, "%.3f", TO_KILOBYTES(value));
+    } else {
+        suffix = " bytes";
+        snprintf(str, size - 6, "%f", value);
+    }
+
+    trim = strstr(str, ".000");
+    if (trim) {
+        strcpy(trim, suffix);
+    } else {
+        strcat(str, suffix);
+    }
+}
+
+
+
+static struct timespec tsub(struct timespec t1, struct timespec t2)
+{
+    t1.tv_nsec -= t2.tv_nsec;
+    if (t1.tv_nsec < 0) {
+        t1.tv_nsec += NANOSECONDS_PER_SECOND;
+        t1.tv_sec--;
+    }
+    t1.tv_sec -= t2.tv_sec;
+    return t1;
+}
+
+static double tdiv(double value, struct timespec tv)
+{
+    double seconds = tv.tv_sec + (tv.tv_nsec / 1e9);
+    return value / seconds;
+}
+
+#define HOURS(sec)      ((sec) / (60 * 60))
+#define MINUTES(sec)    (((sec) % (60 * 60)) / 60)
+#define SECONDS(sec)    ((sec) % 60)
+
+enum {
+    DEFAULT_TIME        = 0x0,
+    TERSE_FIXED_TIME    = 0x1,
+    VERBOSE_FIXED_TIME  = 0x2,
+};
+
+static void timestr(struct timespec *tv, char *ts, size_t size, int format)
+{
+    double frac_sec = tv->tv_nsec / 1e9;
+
+    if (format & TERSE_FIXED_TIME) {
+        if (!HOURS(tv->tv_sec)) {
+            snprintf(ts, size, "%u:%05.2f",
+                     (unsigned int) MINUTES(tv->tv_sec),
+                     SECONDS(tv->tv_sec) + frac_sec);
+            return;
+        }
+        format |= VERBOSE_FIXED_TIME; /* fallback if hours needed */
+    }
+
+    if ((format & VERBOSE_FIXED_TIME) || tv->tv_sec) {
+        snprintf(ts, size, "%u:%02u:%05.2f",
+                (unsigned int) HOURS(tv->tv_sec),
+                (unsigned int) MINUTES(tv->tv_sec),
+                 SECONDS(tv->tv_sec) + frac_sec);
+    } else {
+        snprintf(ts, size, "%05.2f sec", frac_sec);
+    }
+}
+
+/*
+ * Parse the pattern argument to various sub-commands.
+ *
+ * Because the pattern is used as an argument to memset it must evaluate
+ * to an unsigned integer that fits into a single byte.
+ */
+static int parse_pattern(const char *arg)
+{
+    char *endptr = NULL;
+    long pattern;
+
+    pattern = strtol(arg, &endptr, 0);
+    if (pattern < 0 || pattern > UCHAR_MAX || *endptr != '\0') {
+        printf("%s is not a valid pattern byte\n", arg);
+        return -1;
+    }
+
+    return pattern;
+}
+
+/*
+ * Memory allocation helpers.
+ *
+ * Make sure memory is aligned by default, or purposefully misaligned if
+ * that is specified on the command line.
+ */
+
+#define MISALIGN_OFFSET     16
+static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
+{
+    void *buf;
+
+    if (qemuio_misalign) {
+        len += MISALIGN_OFFSET;
+    }
+    buf = blk_blockalign(blk, len);
+    memset(buf, pattern, len);
+    if (qemuio_misalign) {
+        buf += MISALIGN_OFFSET;
+    }
+    return buf;
+}
+
+static void qemu_io_free(void *p)
+{
+    if (qemuio_misalign) {
+        p -= MISALIGN_OFFSET;
+    }
+    qemu_vfree(p);
+}
+
+/*
+ * qemu_io_alloc_from_file()
+ *
+ * Allocates the buffer and populates it with the content of the given file
+ * up to @len bytes. If the file length is less than @len, then the buffer
+ * is populated with the file content cyclically.
+ *
+ * @blk - the block backend where the buffer content is going to be written to
+ * @len - the buffer length
+ * @file_name - the file to read the content from
+ *
+ * Returns: the buffer pointer on success
+ *          NULL on error
+ */
+static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
+                                     const char *file_name)
+{
+    char *buf, *buf_origin;
+    FILE *f = fopen(file_name, "r");
+    int pattern_len;
+
+    if (!f) {
+        perror(file_name);
+        return NULL;
+    }
+
+    if (qemuio_misalign) {
+        len += MISALIGN_OFFSET;
+    }
+
+    buf_origin = buf = blk_blockalign(blk, len);
+
+    if (qemuio_misalign) {
+        buf_origin += MISALIGN_OFFSET;
+        buf += MISALIGN_OFFSET;
+        len -= MISALIGN_OFFSET;
+    }
+
+    pattern_len = fread(buf_origin, 1, len, f);
+
+    if (ferror(f)) {
+        perror(file_name);
+        goto error;
+    }
+
+    if (pattern_len == 0) {
+        fprintf(stderr, "%s: file is empty\n", file_name);
+        goto error;
+    }
+
+    fclose(f);
+    f = NULL;
+
+    if (len > pattern_len) {
+        len -= pattern_len;
+        buf += pattern_len;
+
+        while (len > 0) {
+            size_t len_to_copy = MIN(pattern_len, len);
+
+            memcpy(buf, buf_origin, len_to_copy);
+
+            len -= len_to_copy;
+            buf += len_to_copy;
+        }
+    }
+
+    return buf_origin;
+
+error:
+    qemu_io_free(buf_origin);
+    if (f) {
+        fclose(f);
+    }
+    return NULL;
+}
+
+static void dump_buffer(const void *buffer, int64_t offset, int64_t len)
+{
+    uint64_t i;
+    int j;
+    const uint8_t *p;
+
+    for (i = 0, p = buffer; i < len; i += 16) {
+        const uint8_t *s = p;
+
+        printf("%08" PRIx64 ":  ", offset + i);
+        for (j = 0; j < 16 && i + j < len; j++, p++) {
+            printf("%02x ", *p);
+        }
+        printf(" ");
+        for (j = 0; j < 16 && i + j < len; j++, s++) {
+            if (isalnum(*s)) {
+                printf("%c", *s);
+            } else {
+                printf(".");
+            }
+        }
+        printf("\n");
+    }
+}
+
+static void print_report(const char *op, struct timespec *t, int64_t offset,
+                         int64_t count, int64_t total, int cnt, bool Cflag)
+{
+    char s1[64], s2[64], ts[64];
+
+    timestr(t, ts, sizeof(ts), Cflag ? VERBOSE_FIXED_TIME : 0);
+    if (!Cflag) {
+        cvtstr((double)total, s1, sizeof(s1));
+        cvtstr(tdiv((double)total, *t), s2, sizeof(s2));
+        printf("%s %"PRId64"/%"PRId64" bytes at offset %" PRId64 "\n",
+               op, total, count, offset);
+        printf("%s, %d ops; %s (%s/sec and %.4f ops/sec)\n",
+               s1, cnt, ts, s2, tdiv((double)cnt, *t));
+    } else {/* bytes,ops,time,bytes/sec,ops/sec */
+        printf("%"PRId64",%d,%s,%.3f,%.3f\n",
+            total, cnt, ts,
+            tdiv((double)total, *t),
+            tdiv((double)cnt, *t));
+    }
+}
+
+/*
+ * Parse multiple length statements for vectored I/O, and construct an I/O
+ * vector matching it.
+ */
+static void *
+create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
+             int pattern)
+{
+    size_t *sizes = g_new0(size_t, nr_iov);
+    size_t count = 0;
+    void *buf = NULL;
+    void *p;
+    int i;
+
+    for (i = 0; i < nr_iov; i++) {
+        char *arg = argv[i];
+        int64_t len;
+
+        len = cvtnum(arg);
+        if (len < 0) {
+            print_cvtnum_err(len, arg);
+            goto fail;
+        }
+
+        if (len > BDRV_REQUEST_MAX_BYTES) {
+            printf("Argument '%s' exceeds maximum size %" PRIu64 "\n", arg,
+                   (uint64_t)BDRV_REQUEST_MAX_BYTES);
+            goto fail;
+        }
+
+        if (count > BDRV_REQUEST_MAX_BYTES - len) {
+            printf("The total number of bytes exceed the maximum size %" PRIu64
+                   "\n", (uint64_t)BDRV_REQUEST_MAX_BYTES);
+            goto fail;
+        }
+
+        sizes[i] = len;
+        count += len;
+    }
+
+    qemu_iovec_init(qiov, nr_iov);
+
+    buf = p = qemu_io_alloc(blk, count, pattern);
+
+    for (i = 0; i < nr_iov; i++) {
+        qemu_iovec_add(qiov, p, sizes[i]);
+        p += sizes[i];
+    }
+
+fail:
+    g_free(sizes);
+    return buf;
+}
+
+static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
+                    int64_t bytes, int64_t *total)
+{
+    if (bytes > INT_MAX) {
+        return -ERANGE;
+    }
+
+    *total = blk_pread(blk, offset, (uint8_t *)buf, bytes);
+    if (*total < 0) {
+        return *total;
+    }
+    return 1;
+}
+
+static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
+                     int64_t bytes, int flags, int64_t *total)
+{
+    if (bytes > INT_MAX) {
+        return -ERANGE;
+    }
+
+    *total = blk_pwrite(blk, offset, (uint8_t *)buf, bytes, flags);
+    if (*total < 0) {
+        return *total;
+    }
+    return 1;
+}
+
+typedef struct {
+    BlockBackend *blk;
+    int64_t offset;
+    int64_t bytes;
+    int64_t *total;
+    int flags;
+    int ret;
+    bool done;
+} CoWriteZeroes;
+
+static void coroutine_fn co_pwrite_zeroes_entry(void *opaque)
+{
+    CoWriteZeroes *data = opaque;
+
+    data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->bytes,
+                                     data->flags);
+    data->done = true;
+    if (data->ret < 0) {
+        *data->total = data->ret;
+        return;
+    }
+
+    *data->total = data->bytes;
+}
+
+static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, int flags, int64_t *total)
+{
+    Coroutine *co;
+    CoWriteZeroes data = {
+        .blk    = blk,
+        .offset = offset,
+        .bytes  = bytes,
+        .total  = total,
+        .flags  = flags,
+        .done   = false,
+    };
+
+    if (bytes > INT_MAX) {
+        return -ERANGE;
+    }
+
+    co = qemu_coroutine_create(co_pwrite_zeroes_entry, &data);
+    bdrv_coroutine_enter(blk_bs(blk), co);
+    while (!data.done) {
+        aio_poll(blk_get_aio_context(blk), true);
+    }
+    if (data.ret < 0) {
+        return data.ret;
+    } else {
+        return 1;
+    }
+}
+
+static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset,
+                               int64_t bytes, int64_t *total)
+{
+    int ret;
+
+    if (bytes > BDRV_REQUEST_MAX_BYTES) {
+        return -ERANGE;
+    }
+
+    ret = blk_pwrite_compressed(blk, offset, buf, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+    *total = bytes;
+    return 1;
+}
+
+static int do_load_vmstate(BlockBackend *blk, char *buf, int64_t offset,
+                           int64_t count, int64_t *total)
+{
+    if (count > INT_MAX) {
+        return -ERANGE;
+    }
+
+    *total = blk_load_vmstate(blk, (uint8_t *)buf, offset, count);
+    if (*total < 0) {
+        return *total;
+    }
+    return 1;
+}
+
+static int do_save_vmstate(BlockBackend *blk, char *buf, int64_t offset,
+                           int64_t count, int64_t *total)
+{
+    if (count > INT_MAX) {
+        return -ERANGE;
+    }
+
+    *total = blk_save_vmstate(blk, (uint8_t *)buf, offset, count);
+    if (*total < 0) {
+        return *total;
+    }
+    return 1;
+}
+
+#define NOT_DONE 0x7fffffff
+static void aio_rw_done(void *opaque, int ret)
+{
+    *(int *)opaque = ret;
+}
+
+static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
+                        int64_t offset, int *total)
+{
+    int async_ret = NOT_DONE;
+
+    blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, &async_ret);
+    while (async_ret == NOT_DONE) {
+        main_loop_wait(false);
+    }
+
+    *total = qiov->size;
+    return async_ret < 0 ? async_ret : 1;
+}
+
+static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov,
+                         int64_t offset, int flags, int *total)
+{
+    int async_ret = NOT_DONE;
+
+    blk_aio_pwritev(blk, offset, qiov, flags, aio_rw_done, &async_ret);
+    while (async_ret == NOT_DONE) {
+        main_loop_wait(false);
+    }
+
+    *total = qiov->size;
+    return async_ret < 0 ? async_ret : 1;
+}
+
+static void read_help(void)
+{
+    printf(
+"\n"
+" reads a range of bytes from the given offset\n"
+"\n"
+" Example:\n"
+" 'read -v 512 1k' - dumps 1 kilobyte read from 512 bytes into the file\n"
+"\n"
+" Reads a segment of the currently open file, optionally dumping it to the\n"
+" standard output stream (with -v option) for subsequent inspection.\n"
+" -b, -- read from the VM state rather than the virtual disk\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -l, -- length for pattern verification (only with -P)\n"
+" -p, -- ignored for backwards compatibility\n"
+" -P, -- use a pattern to verify read data\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+" -s, -- start offset for pattern verification (only with -P)\n"
+" -v, -- dump buffer to standard output\n"
+"\n");
+}
+
+static int read_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t read_cmd = {
+    .name       = "read",
+    .altname    = "r",
+    .cfunc      = read_f,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-abCqv] [-P pattern [-s off] [-l len]] off len",
+    .oneline    = "reads a number of bytes at a specified offset",
+    .help       = read_help,
+};
+
+static int read_f(BlockBackend *blk, int argc, char **argv)
+{
+    struct timespec t1, t2;
+    bool Cflag = false, qflag = false, vflag = false;
+    bool Pflag = false, sflag = false, lflag = false, bflag = false;
+    int c, cnt, ret;
+    char *buf;
+    int64_t offset;
+    int64_t count;
+    /* Some compilers get confused and warn if this is not initialized.  */
+    int64_t total = 0;
+    int pattern = 0;
+    int64_t pattern_offset = 0, pattern_count = 0;
+
+    while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) {
+        switch (c) {
+        case 'b':
+            bflag = true;
+            break;
+        case 'C':
+            Cflag = true;
+            break;
+        case 'l':
+            lflag = true;
+            pattern_count = cvtnum(optarg);
+            if (pattern_count < 0) {
+                print_cvtnum_err(pattern_count, optarg);
+                return pattern_count;
+            }
+            break;
+        case 'p':
+            /* Ignored for backwards compatibility */
+            break;
+        case 'P':
+            Pflag = true;
+            pattern = parse_pattern(optarg);
+            if (pattern < 0) {
+                return -EINVAL;
+            }
+            break;
+        case 'q':
+            qflag = true;
+            break;
+        case 's':
+            sflag = true;
+            pattern_offset = cvtnum(optarg);
+            if (pattern_offset < 0) {
+                print_cvtnum_err(pattern_offset, optarg);
+                return pattern_offset;
+            }
+            break;
+        case 'v':
+            vflag = true;
+            break;
+        default:
+            qemuio_command_usage(&read_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind != argc - 2) {
+        qemuio_command_usage(&read_cmd);
+        return -EINVAL;
+    }
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+
+    optind++;
+    count = cvtnum(argv[optind]);
+    if (count < 0) {
+        print_cvtnum_err(count, argv[optind]);
+        return count;
+    } else if (count > BDRV_REQUEST_MAX_BYTES) {
+        printf("length cannot exceed %" PRIu64 ", given %s\n",
+               (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]);
+        return -EINVAL;
+    }
+
+    if (!Pflag && (lflag || sflag)) {
+        qemuio_command_usage(&read_cmd);
+        return -EINVAL;
+    }
+
+    if (!lflag) {
+        pattern_count = count - pattern_offset;
+    }
+
+    if ((pattern_count < 0) || (pattern_count + pattern_offset > count))  {
+        printf("pattern verification range exceeds end of read data\n");
+        return -EINVAL;
+    }
+
+    if (bflag) {
+        if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
+            printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
+                   offset);
+            return -EINVAL;
+        }
+        if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
+            printf("%"PRId64" is not a sector-aligned value for 'count'\n",
+                   count);
+            return -EINVAL;
+        }
+    }
+
+    buf = qemu_io_alloc(blk, count, 0xab);
+
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    if (bflag) {
+        ret = do_load_vmstate(blk, buf, offset, count, &total);
+    } else {
+        ret = do_pread(blk, buf, offset, count, &total);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("read failed: %s\n", strerror(-ret));
+        goto out;
+    }
+    cnt = ret;
+
+    ret = 0;
+
+    if (Pflag) {
+        void *cmp_buf = g_malloc(pattern_count);
+        memset(cmp_buf, pattern, pattern_count);
+        if (memcmp(buf + pattern_offset, cmp_buf, pattern_count)) {
+            printf("Pattern verification failed at offset %"
+                   PRId64 ", %"PRId64" bytes\n",
+                   offset + pattern_offset, pattern_count);
+            ret = -EINVAL;
+        }
+        g_free(cmp_buf);
+    }
+
+    if (qflag) {
+        goto out;
+    }
+
+    if (vflag) {
+        dump_buffer(buf, offset, count);
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, t1);
+    print_report("read", &t2, offset, count, total, cnt, Cflag);
+
+out:
+    qemu_io_free(buf);
+    return ret;
+}
+
+static void readv_help(void)
+{
+    printf(
+"\n"
+" reads a range of bytes from the given offset into multiple buffers\n"
+"\n"
+" Example:\n"
+" 'readv -v 512 1k 1k ' - dumps 2 kilobytes read from 512 bytes into the file\n"
+"\n"
+" Reads a segment of the currently open file, optionally dumping it to the\n"
+" standard output stream (with -v option) for subsequent inspection.\n"
+" Uses multiple iovec buffers if more than one byte range is specified.\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -P, -- use a pattern to verify read data\n"
+" -v, -- dump buffer to standard output\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+"\n");
+}
+
+static int readv_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t readv_cmd = {
+    .name       = "readv",
+    .cfunc      = readv_f,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-Cqv] [-P pattern] off len [len..]",
+    .oneline    = "reads a number of bytes at a specified offset",
+    .help       = readv_help,
+};
+
+static int readv_f(BlockBackend *blk, int argc, char **argv)
+{
+    struct timespec t1, t2;
+    bool Cflag = false, qflag = false, vflag = false;
+    int c, cnt, ret;
+    char *buf;
+    int64_t offset;
+    /* Some compilers get confused and warn if this is not initialized.  */
+    int total = 0;
+    int nr_iov;
+    QEMUIOVector qiov;
+    int pattern = 0;
+    bool Pflag = false;
+
+    while ((c = getopt(argc, argv, "CP:qv")) != -1) {
+        switch (c) {
+        case 'C':
+            Cflag = true;
+            break;
+        case 'P':
+            Pflag = true;
+            pattern = parse_pattern(optarg);
+            if (pattern < 0) {
+                return -EINVAL;
+            }
+            break;
+        case 'q':
+            qflag = true;
+            break;
+        case 'v':
+            vflag = true;
+            break;
+        default:
+            qemuio_command_usage(&readv_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind > argc - 2) {
+        qemuio_command_usage(&readv_cmd);
+        return -EINVAL;
+    }
+
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+    optind++;
+
+    nr_iov = argc - optind;
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab);
+    if (buf == NULL) {
+        return -EINVAL;
+    }
+
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    ret = do_aio_readv(blk, &qiov, offset, &total);
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("readv failed: %s\n", strerror(-ret));
+        goto out;
+    }
+    cnt = ret;
+
+    ret = 0;
+
+    if (Pflag) {
+        void *cmp_buf = g_malloc(qiov.size);
+        memset(cmp_buf, pattern, qiov.size);
+        if (memcmp(buf, cmp_buf, qiov.size)) {
+            printf("Pattern verification failed at offset %"
+                   PRId64 ", %zu bytes\n", offset, qiov.size);
+            ret = -EINVAL;
+        }
+        g_free(cmp_buf);
+    }
+
+    if (qflag) {
+        goto out;
+    }
+
+    if (vflag) {
+        dump_buffer(buf, offset, qiov.size);
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, t1);
+    print_report("read", &t2, offset, qiov.size, total, cnt, Cflag);
+
+out:
+    qemu_iovec_destroy(&qiov);
+    qemu_io_free(buf);
+    return ret;
+}
+
+static void write_help(void)
+{
+    printf(
+"\n"
+" writes a range of bytes from the given offset\n"
+"\n"
+" Example:\n"
+" 'write 512 1k' - writes 1 kilobyte at 512 bytes into the open file\n"
+"\n"
+" Writes into a segment of the currently open file, using a buffer\n"
+" filled with a set pattern (0xcdcdcdcd).\n"
+" -b, -- write to the VM state rather than the virtual disk\n"
+" -c, -- write compressed data with blk_write_compressed\n"
+" -f, -- use Force Unit Access semantics\n"
+" -n, -- with -z, don't allow slow fallback\n"
+" -p, -- ignored for backwards compatibility\n"
+" -P, -- use different pattern to fill file\n"
+" -s, -- use a pattern file to fill the write buffer\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+" -u, -- with -z, allow unmapping\n"
+" -z, -- write zeroes using blk_co_pwrite_zeroes\n"
+"\n");
+}
+
+static int write_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t write_cmd = {
+    .name       = "write",
+    .altname    = "w",
+    .cfunc      = write_f,
+    .perm       = BLK_PERM_WRITE,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-bcCfnquz] [-P pattern | -s source_file] off len",
+    .oneline    = "writes a number of bytes at a specified offset",
+    .help       = write_help,
+};
+
+static int write_f(BlockBackend *blk, int argc, char **argv)
+{
+    struct timespec t1, t2;
+    bool Cflag = false, qflag = false, bflag = false;
+    bool Pflag = false, zflag = false, cflag = false, sflag = false;
+    int flags = 0;
+    int c, cnt, ret;
+    char *buf = NULL;
+    int64_t offset;
+    int64_t count;
+    /* Some compilers get confused and warn if this is not initialized.  */
+    int64_t total = 0;
+    int pattern = 0xcd;
+    const char *file_name = NULL;
+
+    while ((c = getopt(argc, argv, "bcCfnpP:qs:uz")) != -1) {
+        switch (c) {
+        case 'b':
+            bflag = true;
+            break;
+        case 'c':
+            cflag = true;
+            break;
+        case 'C':
+            Cflag = true;
+            break;
+        case 'f':
+            flags |= BDRV_REQ_FUA;
+            break;
+        case 'n':
+            flags |= BDRV_REQ_NO_FALLBACK;
+            break;
+        case 'p':
+            /* Ignored for backwards compatibility */
+            break;
+        case 'P':
+            Pflag = true;
+            pattern = parse_pattern(optarg);
+            if (pattern < 0) {
+                return -EINVAL;
+            }
+            break;
+        case 'q':
+            qflag = true;
+            break;
+        case 's':
+            sflag = true;
+            file_name = optarg;
+            break;
+        case 'u':
+            flags |= BDRV_REQ_MAY_UNMAP;
+            break;
+        case 'z':
+            zflag = true;
+            break;
+        default:
+            qemuio_command_usage(&write_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind != argc - 2) {
+        qemuio_command_usage(&write_cmd);
+        return -EINVAL;
+    }
+
+    if (bflag && zflag) {
+        printf("-b and -z cannot be specified at the same time\n");
+        return -EINVAL;
+    }
+
+    if ((flags & BDRV_REQ_FUA) && (bflag || cflag)) {
+        printf("-f and -b or -c cannot be specified at the same time\n");
+        return -EINVAL;
+    }
+
+    if ((flags & BDRV_REQ_NO_FALLBACK) && !zflag) {
+        printf("-n requires -z to be specified\n");
+        return -EINVAL;
+    }
+
+    if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) {
+        printf("-u requires -z to be specified\n");
+        return -EINVAL;
+    }
+
+    if (zflag + Pflag + sflag > 1) {
+        printf("Only one of -z, -P, and -s "
+               "can be specified at the same time\n");
+        return -EINVAL;
+    }
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+
+    optind++;
+    count = cvtnum(argv[optind]);
+    if (count < 0) {
+        print_cvtnum_err(count, argv[optind]);
+        return count;
+    } else if (count > BDRV_REQUEST_MAX_BYTES) {
+        printf("length cannot exceed %" PRIu64 ", given %s\n",
+               (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]);
+        return -EINVAL;
+    }
+
+    if (bflag || cflag) {
+        if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
+            printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
+                   offset);
+            return -EINVAL;
+        }
+
+        if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
+            printf("%"PRId64" is not a sector-aligned value for 'count'\n",
+                   count);
+            return -EINVAL;
+        }
+    }
+
+    if (!zflag) {
+        if (sflag) {
+            buf = qemu_io_alloc_from_file(blk, count, file_name);
+            if (!buf) {
+                return -EINVAL;
+            }
+        } else {
+            buf = qemu_io_alloc(blk, count, pattern);
+        }
+    }
+
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    if (bflag) {
+        ret = do_save_vmstate(blk, buf, offset, count, &total);
+    } else if (zflag) {
+        ret = do_co_pwrite_zeroes(blk, offset, count, flags, &total);
+    } else if (cflag) {
+        ret = do_write_compressed(blk, buf, offset, count, &total);
+    } else {
+        ret = do_pwrite(blk, buf, offset, count, flags, &total);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("write failed: %s\n", strerror(-ret));
+        goto out;
+    }
+    cnt = ret;
+
+    ret = 0;
+
+    if (qflag) {
+        goto out;
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, t1);
+    print_report("wrote", &t2, offset, count, total, cnt, Cflag);
+
+out:
+    if (!zflag) {
+        qemu_io_free(buf);
+    }
+    return ret;
+}
+
+static void
+writev_help(void)
+{
+    printf(
+"\n"
+" writes a range of bytes from the given offset source from multiple buffers\n"
+"\n"
+" Example:\n"
+" 'writev 512 1k 1k' - writes 2 kilobytes at 512 bytes into the open file\n"
+"\n"
+" Writes into a segment of the currently open file, using a buffer\n"
+" filled with a set pattern (0xcdcdcdcd).\n"
+" -P, -- use different pattern to fill file\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -f, -- use Force Unit Access semantics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+"\n");
+}
+
+static int writev_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t writev_cmd = {
+    .name       = "writev",
+    .cfunc      = writev_f,
+    .perm       = BLK_PERM_WRITE,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-Cfq] [-P pattern] off len [len..]",
+    .oneline    = "writes a number of bytes at a specified offset",
+    .help       = writev_help,
+};
+
+static int writev_f(BlockBackend *blk, int argc, char **argv)
+{
+    struct timespec t1, t2;
+    bool Cflag = false, qflag = false;
+    int flags = 0;
+    int c, cnt, ret;
+    char *buf;
+    int64_t offset;
+    /* Some compilers get confused and warn if this is not initialized.  */
+    int total = 0;
+    int nr_iov;
+    int pattern = 0xcd;
+    QEMUIOVector qiov;
+
+    while ((c = getopt(argc, argv, "CfqP:")) != -1) {
+        switch (c) {
+        case 'C':
+            Cflag = true;
+            break;
+        case 'f':
+            flags |= BDRV_REQ_FUA;
+            break;
+        case 'q':
+            qflag = true;
+            break;
+        case 'P':
+            pattern = parse_pattern(optarg);
+            if (pattern < 0) {
+                return -EINVAL;
+            }
+            break;
+        default:
+            qemuio_command_usage(&writev_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind > argc - 2) {
+        qemuio_command_usage(&writev_cmd);
+        return -EINVAL;
+    }
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+    optind++;
+
+    nr_iov = argc - optind;
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern);
+    if (buf == NULL) {
+        return -EINVAL;
+    }
+
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    ret = do_aio_writev(blk, &qiov, offset, flags, &total);
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("writev failed: %s\n", strerror(-ret));
+        goto out;
+    }
+    cnt = ret;
+
+    ret = 0;
+
+    if (qflag) {
+        goto out;
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, t1);
+    print_report("wrote", &t2, offset, qiov.size, total, cnt, Cflag);
+out:
+    qemu_iovec_destroy(&qiov);
+    qemu_io_free(buf);
+    return ret;
+}
+
+struct aio_ctx {
+    BlockBackend *blk;
+    QEMUIOVector qiov;
+    int64_t offset;
+    char *buf;
+    bool qflag;
+    bool vflag;
+    bool Cflag;
+    bool Pflag;
+    bool zflag;
+    BlockAcctCookie acct;
+    int pattern;
+    struct timespec t1;
+};
+
+static void aio_write_done(void *opaque, int ret)
+{
+    struct aio_ctx *ctx = opaque;
+    struct timespec t2;
+
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+
+    if (ret < 0) {
+        printf("aio_write failed: %s\n", strerror(-ret));
+        block_acct_failed(blk_get_stats(ctx->blk), &ctx->acct);
+        goto out;
+    }
+
+    block_acct_done(blk_get_stats(ctx->blk), &ctx->acct);
+
+    if (ctx->qflag) {
+        goto out;
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, ctx->t1);
+    print_report("wrote", &t2, ctx->offset, ctx->qiov.size,
+                 ctx->qiov.size, 1, ctx->Cflag);
+out:
+    if (!ctx->zflag) {
+        qemu_io_free(ctx->buf);
+        qemu_iovec_destroy(&ctx->qiov);
+    }
+    g_free(ctx);
+}
+
+static void aio_read_done(void *opaque, int ret)
+{
+    struct aio_ctx *ctx = opaque;
+    struct timespec t2;
+
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("readv failed: %s\n", strerror(-ret));
+        block_acct_failed(blk_get_stats(ctx->blk), &ctx->acct);
+        goto out;
+    }
+
+    if (ctx->Pflag) {
+        void *cmp_buf = g_malloc(ctx->qiov.size);
+
+        memset(cmp_buf, ctx->pattern, ctx->qiov.size);
+        if (memcmp(ctx->buf, cmp_buf, ctx->qiov.size)) {
+            printf("Pattern verification failed at offset %"
+                   PRId64 ", %zu bytes\n", ctx->offset, ctx->qiov.size);
+        }
+        g_free(cmp_buf);
+    }
+
+    block_acct_done(blk_get_stats(ctx->blk), &ctx->acct);
+
+    if (ctx->qflag) {
+        goto out;
+    }
+
+    if (ctx->vflag) {
+        dump_buffer(ctx->buf, ctx->offset, ctx->qiov.size);
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    t2 = tsub(t2, ctx->t1);
+    print_report("read", &t2, ctx->offset, ctx->qiov.size,
+                 ctx->qiov.size, 1, ctx->Cflag);
+out:
+    qemu_io_free(ctx->buf);
+    qemu_iovec_destroy(&ctx->qiov);
+    g_free(ctx);
+}
+
+static void aio_read_help(void)
+{
+    printf(
+"\n"
+" asynchronously reads a range of bytes from the given offset\n"
+"\n"
+" Example:\n"
+" 'aio_read -v 512 1k 1k ' - dumps 2 kilobytes read from 512 bytes into the file\n"
+"\n"
+" Reads a segment of the currently open file, optionally dumping it to the\n"
+" standard output stream (with -v option) for subsequent inspection.\n"
+" The read is performed asynchronously and the aio_flush command must be\n"
+" used to ensure all outstanding aio requests have been completed.\n"
+" Note that due to its asynchronous nature, this command will be\n"
+" considered successful once the request is submitted, independently\n"
+" of potential I/O errors or pattern mismatches.\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -P, -- use a pattern to verify read data\n"
+" -i, -- treat request as invalid, for exercising stats\n"
+" -v, -- dump buffer to standard output\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+"\n");
+}
+
+static int aio_read_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t aio_read_cmd = {
+    .name       = "aio_read",
+    .cfunc      = aio_read_f,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-Ciqv] [-P pattern] off len [len..]",
+    .oneline    = "asynchronously reads a number of bytes",
+    .help       = aio_read_help,
+};
+
+static int aio_read_f(BlockBackend *blk, int argc, char **argv)
+{
+    int nr_iov, c;
+    struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
+
+    ctx->blk = blk;
+    while ((c = getopt(argc, argv, "CP:iqv")) != -1) {
+        switch (c) {
+        case 'C':
+            ctx->Cflag = true;
+            break;
+        case 'P':
+            ctx->Pflag = true;
+            ctx->pattern = parse_pattern(optarg);
+            if (ctx->pattern < 0) {
+                g_free(ctx);
+                return -EINVAL;
+            }
+            break;
+        case 'i':
+            printf("injecting invalid read request\n");
+            block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
+            g_free(ctx);
+            return 0;
+        case 'q':
+            ctx->qflag = true;
+            break;
+        case 'v':
+            ctx->vflag = true;
+            break;
+        default:
+            g_free(ctx);
+            qemuio_command_usage(&aio_read_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind > argc - 2) {
+        g_free(ctx);
+        qemuio_command_usage(&aio_read_cmd);
+        return -EINVAL;
+    }
+
+    ctx->offset = cvtnum(argv[optind]);
+    if (ctx->offset < 0) {
+        int ret = ctx->offset;
+        print_cvtnum_err(ret, argv[optind]);
+        g_free(ctx);
+        return ret;
+    }
+    optind++;
+
+    nr_iov = argc - optind;
+    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab);
+    if (ctx->buf == NULL) {
+        block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
+        g_free(ctx);
+        return -EINVAL;
+    }
+
+    clock_gettime(CLOCK_MONOTONIC, &ctx->t1);
+    block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
+                     BLOCK_ACCT_READ);
+    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, 0, aio_read_done, ctx);
+    return 0;
+}
+
+static void aio_write_help(void)
+{
+    printf(
+"\n"
+" asynchronously writes a range of bytes from the given offset source\n"
+" from multiple buffers\n"
+"\n"
+" Example:\n"
+" 'aio_write 512 1k 1k' - writes 2 kilobytes at 512 bytes into the open file\n"
+"\n"
+" Writes into a segment of the currently open file, using a buffer\n"
+" filled with a set pattern (0xcdcdcdcd).\n"
+" The write is performed asynchronously and the aio_flush command must be\n"
+" used to ensure all outstanding aio requests have been completed.\n"
+" Note that due to its asynchronous nature, this command will be\n"
+" considered successful once the request is submitted, independently\n"
+" of potential I/O errors or pattern mismatches.\n"
+" -P, -- use different pattern to fill file\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -f, -- use Force Unit Access semantics\n"
+" -i, -- treat request as invalid, for exercising stats\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+" -u, -- with -z, allow unmapping\n"
+" -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
+"\n");
+}
+
+static int aio_write_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t aio_write_cmd = {
+    .name       = "aio_write",
+    .cfunc      = aio_write_f,
+    .perm       = BLK_PERM_WRITE,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-Cfiquz] [-P pattern] off len [len..]",
+    .oneline    = "asynchronously writes a number of bytes",
+    .help       = aio_write_help,
+};
+
+static int aio_write_f(BlockBackend *blk, int argc, char **argv)
+{
+    int nr_iov, c;
+    int pattern = 0xcd;
+    struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
+    int flags = 0;
+
+    ctx->blk = blk;
+    while ((c = getopt(argc, argv, "CfiqP:uz")) != -1) {
+        switch (c) {
+        case 'C':
+            ctx->Cflag = true;
+            break;
+        case 'f':
+            flags |= BDRV_REQ_FUA;
+            break;
+        case 'q':
+            ctx->qflag = true;
+            break;
+        case 'u':
+            flags |= BDRV_REQ_MAY_UNMAP;
+            break;
+        case 'P':
+            pattern = parse_pattern(optarg);
+            if (pattern < 0) {
+                g_free(ctx);
+                return -EINVAL;
+            }
+            break;
+        case 'i':
+            printf("injecting invalid write request\n");
+            block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
+            g_free(ctx);
+            return 0;
+        case 'z':
+            ctx->zflag = true;
+            break;
+        default:
+            g_free(ctx);
+            qemuio_command_usage(&aio_write_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind > argc - 2) {
+        g_free(ctx);
+        qemuio_command_usage(&aio_write_cmd);
+        return -EINVAL;
+    }
+
+    if (ctx->zflag && optind != argc - 2) {
+        printf("-z supports only a single length parameter\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
+    if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
+        printf("-u requires -z to be specified\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
+    if (ctx->zflag && ctx->Pflag) {
+        printf("-z and -P cannot be specified at the same time\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
+    ctx->offset = cvtnum(argv[optind]);
+    if (ctx->offset < 0) {
+        int ret = ctx->offset;
+        print_cvtnum_err(ret, argv[optind]);
+        g_free(ctx);
+        return ret;
+    }
+    optind++;
+
+    if (ctx->zflag) {
+        int64_t count = cvtnum(argv[optind]);
+        if (count < 0) {
+            print_cvtnum_err(count, argv[optind]);
+            g_free(ctx);
+            return count;
+        }
+
+        ctx->qiov.size = count;
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
+                              ctx);
+    } else {
+        nr_iov = argc - optind;
+        ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
+                                pattern);
+        if (ctx->buf == NULL) {
+            block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
+            g_free(ctx);
+            return -EINVAL;
+        }
+
+        clock_gettime(CLOCK_MONOTONIC, &ctx->t1);
+        block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
+                         BLOCK_ACCT_WRITE);
+
+        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, flags, aio_write_done,
+                        ctx);
+    }
+
+    return 0;
+}
+
+static int aio_flush_f(BlockBackend *blk, int argc, char **argv)
+{
+    BlockAcctCookie cookie;
+    block_acct_start(blk_get_stats(blk), &cookie, 0, BLOCK_ACCT_FLUSH);
+    blk_drain_all();
+    block_acct_done(blk_get_stats(blk), &cookie);
+    return 0;
+}
+
+static const cmdinfo_t aio_flush_cmd = {
+    .name       = "aio_flush",
+    .cfunc      = aio_flush_f,
+    .oneline    = "completes all outstanding aio requests"
+};
+
+static int flush_f(BlockBackend *blk, int argc, char **argv)
+{
+    return blk_flush(blk);
+}
+
+static const cmdinfo_t flush_cmd = {
+    .name       = "flush",
+    .altname    = "f",
+    .cfunc      = flush_f,
+    .oneline    = "flush all in-core file state to disk",
+};
+
+static int truncate_f(BlockBackend *blk, int argc, char **argv)
+{
+    Error *local_err = NULL;
+    int64_t offset;
+    int ret;
+
+    offset = cvtnum(argv[1]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[1]);
+        return offset;
+    }
+
+    /*
+     * qemu-io is a debugging tool, so let us be strict here and pass
+     * exact=true.  It is better to err on the "emit more errors" side
+     * than to be overly permissive.
+     */
+    ret = blk_truncate(blk, offset, false, PREALLOC_MODE_OFF, 0, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        return ret;
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t truncate_cmd = {
+    .name       = "truncate",
+    .altname    = "t",
+    .cfunc      = truncate_f,
+    .perm       = BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    .argmin     = 1,
+    .argmax     = 1,
+    .args       = "off",
+    .oneline    = "truncates the current file at the given offset",
+};
+
+static int length_f(BlockBackend *blk, int argc, char **argv)
+{
+    int64_t size;
+    char s1[64];
+
+    size = blk_getlength(blk);
+    if (size < 0) {
+        printf("getlength: %s\n", strerror(-size));
+        return size;
+    }
+
+    cvtstr(size, s1, sizeof(s1));
+    printf("%s\n", s1);
+    return 0;
+}
+
+
+static const cmdinfo_t length_cmd = {
+    .name   = "length",
+    .altname    = "l",
+    .cfunc      = length_f,
+    .oneline    = "gets the length of the current file",
+};
+
+
+static int info_f(BlockBackend *blk, int argc, char **argv)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverInfo bdi;
+    ImageInfoSpecific *spec_info;
+    Error *local_err = NULL;
+    char s1[64], s2[64];
+    int ret;
+
+    if (bs->drv && bs->drv->format_name) {
+        printf("format name: %s\n", bs->drv->format_name);
+    }
+    if (bs->drv && bs->drv->protocol_name) {
+        printf("format name: %s\n", bs->drv->protocol_name);
+    }
+
+    ret = bdrv_get_info(bs, &bdi);
+    if (ret) {
+        return ret;
+    }
+
+    cvtstr(bdi.cluster_size, s1, sizeof(s1));
+    cvtstr(bdi.vm_state_offset, s2, sizeof(s2));
+
+    printf("cluster size: %s\n", s1);
+    printf("vm state offset: %s\n", s2);
+
+    spec_info = bdrv_get_specific_info(bs, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -EIO;
+    }
+    if (spec_info) {
+        printf("Format specific information:\n");
+        bdrv_image_info_specific_dump(spec_info);
+        qapi_free_ImageInfoSpecific(spec_info);
+    }
+
+    return 0;
+}
+
+
+
+static const cmdinfo_t info_cmd = {
+    .name       = "info",
+    .altname    = "i",
+    .cfunc      = info_f,
+    .oneline    = "prints information about the current file",
+};
+
+static void discard_help(void)
+{
+    printf(
+"\n"
+" discards a range of bytes from the given offset\n"
+"\n"
+" Example:\n"
+" 'discard 512 1k' - discards 1 kilobyte from 512 bytes into the file\n"
+"\n"
+" Discards a segment of the currently open file.\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+"\n");
+}
+
+static int discard_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t discard_cmd = {
+    .name       = "discard",
+    .altname    = "d",
+    .cfunc      = discard_f,
+    .perm       = BLK_PERM_WRITE,
+    .argmin     = 2,
+    .argmax     = -1,
+    .args       = "[-Cq] off len",
+    .oneline    = "discards a number of bytes at a specified offset",
+    .help       = discard_help,
+};
+
+static int discard_f(BlockBackend *blk, int argc, char **argv)
+{
+    struct timespec t1, t2;
+    bool Cflag = false, qflag = false;
+    int c, ret;
+    int64_t offset, bytes;
+
+    while ((c = getopt(argc, argv, "Cq")) != -1) {
+        switch (c) {
+        case 'C':
+            Cflag = true;
+            break;
+        case 'q':
+            qflag = true;
+            break;
+        default:
+            qemuio_command_usage(&discard_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind != argc - 2) {
+        qemuio_command_usage(&discard_cmd);
+        return -EINVAL;
+    }
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+
+    optind++;
+    bytes = cvtnum(argv[optind]);
+    if (bytes < 0) {
+        print_cvtnum_err(bytes, argv[optind]);
+        return bytes;
+    } else if (bytes > BDRV_REQUEST_MAX_BYTES) {
+        printf("length cannot exceed %"PRIu64", given %s\n",
+               (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]);
+        return -EINVAL;
+    }
+
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    ret = blk_pdiscard(blk, offset, bytes);
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+
+    if (ret < 0) {
+        printf("discard failed: %s\n", strerror(-ret));
+        return ret;
+    }
+
+    /* Finally, report back -- -C gives a parsable format */
+    if (!qflag) {
+        t2 = tsub(t2, t1);
+        print_report("discard", &t2, offset, bytes, bytes, 1, Cflag);
+    }
+
+    return 0;
+}
+
+static int alloc_f(BlockBackend *blk, int argc, char **argv)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    int64_t offset, start, remaining, count;
+    char s1[64];
+    int ret;
+    int64_t num, sum_alloc;
+
+    start = offset = cvtnum(argv[1]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[1]);
+        return offset;
+    }
+
+    if (argc == 3) {
+        count = cvtnum(argv[2]);
+        if (count < 0) {
+            print_cvtnum_err(count, argv[2]);
+            return count;
+        }
+    } else {
+        count = BDRV_SECTOR_SIZE;
+    }
+
+    remaining = count;
+    sum_alloc = 0;
+    while (remaining) {
+        ret = bdrv_is_allocated(bs, offset, remaining, &num);
+        if (ret < 0) {
+            printf("is_allocated failed: %s\n", strerror(-ret));
+            return ret;
+        }
+        offset += num;
+        remaining -= num;
+        if (ret) {
+            sum_alloc += num;
+        }
+        if (num == 0) {
+            count -= remaining;
+            remaining = 0;
+        }
+    }
+
+    cvtstr(start, s1, sizeof(s1));
+
+    printf("%"PRId64"/%"PRId64" bytes allocated at offset %s\n",
+           sum_alloc, count, s1);
+    return 0;
+}
+
+static const cmdinfo_t alloc_cmd = {
+    .name       = "alloc",
+    .altname    = "a",
+    .argmin     = 1,
+    .argmax     = 2,
+    .cfunc      = alloc_f,
+    .args       = "offset [count]",
+    .oneline    = "checks if offset is allocated in the file",
+};
+
+
+static int map_is_allocated(BlockDriverState *bs, int64_t offset,
+                            int64_t bytes, int64_t *pnum)
+{
+    int64_t num;
+    int num_checked;
+    int ret, firstret;
+
+    num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
+    ret = bdrv_is_allocated(bs, offset, num_checked, &num);
+    if (ret < 0) {
+        return ret;
+    }
+
+    firstret = ret;
+    *pnum = num;
+
+    while (bytes > 0 && ret == firstret) {
+        offset += num;
+        bytes -= num;
+
+        num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
+        ret = bdrv_is_allocated(bs, offset, num_checked, &num);
+        if (ret == firstret && num) {
+            *pnum += num;
+        } else {
+            break;
+        }
+    }
+
+    return firstret;
+}
+
+static int map_f(BlockBackend *blk, int argc, char **argv)
+{
+    int64_t offset, bytes;
+    char s1[64], s2[64];
+    int64_t num;
+    int ret;
+    const char *retstr;
+
+    offset = 0;
+    bytes = blk_getlength(blk);
+    if (bytes < 0) {
+        error_report("Failed to query image length: %s", strerror(-bytes));
+        return bytes;
+    }
+
+    while (bytes) {
+        ret = map_is_allocated(blk_bs(blk), offset, bytes, &num);
+        if (ret < 0) {
+            error_report("Failed to get allocation status: %s", strerror(-ret));
+            return ret;
+        } else if (!num) {
+            error_report("Unexpected end of image");
+            return -EIO;
+        }
+
+        retstr = ret ? "    allocated" : "not allocated";
+        cvtstr(num, s1, sizeof(s1));
+        cvtstr(offset, s2, sizeof(s2));
+        printf("%s (0x%" PRIx64 ") bytes %s at offset %s (0x%" PRIx64 ")\n",
+               s1, num, retstr, s2, offset);
+
+        offset += num;
+        bytes -= num;
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t map_cmd = {
+       .name           = "map",
+       .argmin         = 0,
+       .argmax         = 0,
+       .cfunc          = map_f,
+       .args           = "",
+       .oneline        = "prints the allocated areas of a file",
+};
+
+static void reopen_help(void)
+{
+    printf(
+"\n"
+" Changes the open options of an already opened image\n"
+"\n"
+" Example:\n"
+" 'reopen -o lazy-refcounts=on' - activates lazy refcount writeback on a qcow2 image\n"
+"\n"
+" -r, -- Reopen the image read-only\n"
+" -w, -- Reopen the image read-write\n"
+" -c, -- Change the cache mode to the given value\n"
+" -o, -- Changes block driver options (cf. 'open' command)\n"
+"\n");
+}
+
+static int reopen_f(BlockBackend *blk, int argc, char **argv);
+
+static QemuOptsList reopen_opts = {
+    .name = "reopen",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(reopen_opts.head),
+    .desc = {
+        /* no elements => accept any params */
+        { /* end of list */ }
+    },
+};
+
+static const cmdinfo_t reopen_cmd = {
+       .name           = "reopen",
+       .argmin         = 0,
+       .argmax         = -1,
+       .cfunc          = reopen_f,
+       .args           = "[(-r|-w)] [-c cache] [-o options]",
+       .oneline        = "reopens an image with new options",
+       .help           = reopen_help,
+};
+
+static int reopen_f(BlockBackend *blk, int argc, char **argv)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    QemuOpts *qopts;
+    QDict *opts;
+    int c;
+    int flags = bs->open_flags;
+    bool writethrough = !blk_enable_write_cache(blk);
+    bool has_rw_option = false;
+    bool has_cache_option = false;
+
+    BlockReopenQueue *brq;
+    Error *local_err = NULL;
+
+    while ((c = getopt(argc, argv, "c:o:rw")) != -1) {
+        switch (c) {
+        case 'c':
+            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) {
+                error_report("Invalid cache option: %s", optarg);
+                return -EINVAL;
+            }
+            has_cache_option = true;
+            break;
+        case 'o':
+            if (!qemu_opts_parse_noisily(&reopen_opts, optarg, 0)) {
+                qemu_opts_reset(&reopen_opts);
+                return -EINVAL;
+            }
+            break;
+        case 'r':
+            if (has_rw_option) {
+                error_report("Only one -r/-w option may be given");
+                return -EINVAL;
+            }
+            flags &= ~BDRV_O_RDWR;
+            has_rw_option = true;
+            break;
+        case 'w':
+            if (has_rw_option) {
+                error_report("Only one -r/-w option may be given");
+                return -EINVAL;
+            }
+            flags |= BDRV_O_RDWR;
+            has_rw_option = true;
+            break;
+        default:
+            qemu_opts_reset(&reopen_opts);
+            qemuio_command_usage(&reopen_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (optind != argc) {
+        qemu_opts_reset(&reopen_opts);
+        qemuio_command_usage(&reopen_cmd);
+        return -EINVAL;
+    }
+
+    if (!writethrough != blk_enable_write_cache(blk) &&
+        blk_get_attached_dev(blk))
+    {
+        error_report("Cannot change cache.writeback: Device attached");
+        qemu_opts_reset(&reopen_opts);
+        return -EBUSY;
+    }
+
+    if (!(flags & BDRV_O_RDWR)) {
+        uint64_t orig_perm, orig_shared_perm;
+
+        bdrv_drain(bs);
+
+        blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+        blk_set_perm(blk,
+                     orig_perm & ~(BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED),
+                     orig_shared_perm,
+                     &error_abort);
+    }
+
+    qopts = qemu_opts_find(&reopen_opts, NULL);
+    opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : qdict_new();
+    qemu_opts_reset(&reopen_opts);
+
+    if (qdict_haskey(opts, BDRV_OPT_READ_ONLY)) {
+        if (has_rw_option) {
+            error_report("Cannot set both -r/-w and '" BDRV_OPT_READ_ONLY "'");
+            qobject_unref(opts);
+            return -EINVAL;
+        }
+    } else {
+        qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
+    }
+
+    if (qdict_haskey(opts, BDRV_OPT_CACHE_DIRECT) ||
+        qdict_haskey(opts, BDRV_OPT_CACHE_NO_FLUSH)) {
+        if (has_cache_option) {
+            error_report("Cannot set both -c and the cache options");
+            qobject_unref(opts);
+            return -EINVAL;
+        }
+    } else {
+        qdict_put_bool(opts, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
+        qdict_put_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, flags & BDRV_O_NO_FLUSH);
+    }
+
+    bdrv_subtree_drained_begin(bs);
+    brq = bdrv_reopen_queue(NULL, bs, opts, true);
+    bdrv_reopen_multiple(brq, &local_err);
+    bdrv_subtree_drained_end(bs);
+
+    if (local_err) {
+        error_report_err(local_err);
+        return -EINVAL;
+    }
+
+    blk_set_enable_write_cache(blk, !writethrough);
+    return 0;
+}
+
+static int break_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_breakpoint(blk_bs(blk), argv[1], argv[2]);
+    if (ret < 0) {
+        printf("Could not set breakpoint: %s\n", strerror(-ret));
+        return ret;
+    }
+
+    return 0;
+}
+
+static int remove_break_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_remove_breakpoint(blk_bs(blk), argv[1]);
+    if (ret < 0) {
+        printf("Could not remove breakpoint %s: %s\n", argv[1], strerror(-ret));
+        return ret;
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t break_cmd = {
+       .name           = "break",
+       .argmin         = 2,
+       .argmax         = 2,
+       .cfunc          = break_f,
+       .args           = "event tag",
+       .oneline        = "sets a breakpoint on event and tags the stopped "
+                         "request as tag",
+};
+
+static const cmdinfo_t remove_break_cmd = {
+       .name           = "remove_break",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = remove_break_f,
+       .args           = "tag",
+       .oneline        = "remove a breakpoint by tag",
+};
+
+static int resume_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_resume(blk_bs(blk), argv[1]);
+    if (ret < 0) {
+        printf("Could not resume request: %s\n", strerror(-ret));
+        return ret;
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t resume_cmd = {
+       .name           = "resume",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = resume_f,
+       .args           = "tag",
+       .oneline        = "resumes the request tagged as tag",
+};
+
+static int wait_break_f(BlockBackend *blk, int argc, char **argv)
+{
+    while (!bdrv_debug_is_suspended(blk_bs(blk), argv[1])) {
+        aio_poll(blk_get_aio_context(blk), true);
+    }
+    return 0;
+}
+
+static const cmdinfo_t wait_break_cmd = {
+       .name           = "wait_break",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = wait_break_f,
+       .args           = "tag",
+       .oneline        = "waits for the suspension of a request",
+};
+
+static int abort_f(BlockBackend *blk, int argc, char **argv)
+{
+    abort();
+}
+
+static const cmdinfo_t abort_cmd = {
+       .name           = "abort",
+       .cfunc          = abort_f,
+       .flags          = CMD_NOFILE_OK,
+       .oneline        = "simulate a program crash using abort(3)",
+};
+
+static void sigraise_help(void)
+{
+    printf(
+"\n"
+" raises the given signal\n"
+"\n"
+" Example:\n"
+" 'sigraise %i' - raises SIGTERM\n"
+"\n"
+" Invokes raise(signal), where \"signal\" is the mandatory integer argument\n"
+" given to sigraise.\n"
+"\n", SIGTERM);
+}
+
+static int sigraise_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t sigraise_cmd = {
+    .name       = "sigraise",
+    .cfunc      = sigraise_f,
+    .argmin     = 1,
+    .argmax     = 1,
+    .flags      = CMD_NOFILE_OK,
+    .args       = "signal",
+    .oneline    = "raises a signal",
+    .help       = sigraise_help,
+};
+
+static int sigraise_f(BlockBackend *blk, int argc, char **argv)
+{
+    int64_t sig = cvtnum(argv[1]);
+    if (sig < 0) {
+        print_cvtnum_err(sig, argv[1]);
+        return sig;
+    } else if (sig > NSIG) {
+        printf("signal argument '%s' is too large to be a valid signal\n",
+               argv[1]);
+        return -EINVAL;
+    }
+
+    /* Using raise() to kill this process does not necessarily flush all open
+     * streams. At least stdout and stderr (although the latter should be
+     * non-buffered anyway) should be flushed, though. */
+    fflush(stdout);
+    fflush(stderr);
+
+    raise(sig);
+
+    return 0;
+}
+
+static void sleep_cb(void *opaque)
+{
+    bool *expired = opaque;
+    *expired = true;
+}
+
+static int sleep_f(BlockBackend *blk, int argc, char **argv)
+{
+    char *endptr;
+    long ms;
+    struct QEMUTimer *timer;
+    bool expired = false;
+
+    ms = strtol(argv[1], &endptr, 0);
+    if (ms < 0 || *endptr != '\0') {
+        printf("%s is not a valid number\n", argv[1]);
+        return -EINVAL;
+    }
+
+    timer = timer_new_ns(QEMU_CLOCK_HOST, sleep_cb, &expired);
+    timer_mod(timer, qemu_clock_get_ns(QEMU_CLOCK_HOST) + SCALE_MS * ms);
+
+    while (!expired) {
+        main_loop_wait(false);
+    }
+
+    timer_free(timer);
+    return 0;
+}
+
+static const cmdinfo_t sleep_cmd = {
+       .name           = "sleep",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = sleep_f,
+       .flags          = CMD_NOFILE_OK,
+       .oneline        = "waits for the given value in milliseconds",
+};
+
+static void help_oneline(const char *cmd, const cmdinfo_t *ct)
+{
+    printf("%s ", cmd);
+
+    if (ct->args) {
+        printf("%s ", ct->args);
+    }
+    printf("-- %s\n", ct->oneline);
+}
+
+static void help_onecmd(const char *cmd, const cmdinfo_t *ct)
+{
+    help_oneline(cmd, ct);
+    if (ct->help) {
+        ct->help();
+    }
+}
+
+static void help_all(void)
+{
+    const cmdinfo_t *ct;
+
+    for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+        help_oneline(ct->name, ct);
+    }
+    printf("\nUse 'help commandname' for extended help.\n");
+}
+
+static int help_f(BlockBackend *blk, int argc, char **argv)
+{
+    const cmdinfo_t *ct;
+
+    if (argc < 2) {
+        help_all();
+        return 0;
+    }
+
+    ct = find_command(argv[1]);
+    if (ct == NULL) {
+        printf("command %s not found\n", argv[1]);
+        return -EINVAL;
+    }
+
+    help_onecmd(argv[1], ct);
+    return 0;
+}
+
+static const cmdinfo_t help_cmd = {
+    .name       = "help",
+    .altname    = "?",
+    .cfunc      = help_f,
+    .argmin     = 0,
+    .argmax     = 1,
+    .flags      = CMD_FLAG_GLOBAL,
+    .args       = "[command]",
+    .oneline    = "help for one or all commands",
+};
+
+int qemuio_command(BlockBackend *blk, const char *cmd)
+{
+    AioContext *ctx;
+    char *input;
+    const cmdinfo_t *ct;
+    char **v;
+    int c;
+    int ret = 0;
+
+    input = g_strdup(cmd);
+    v = breakline(input, &c);
+    if (c) {
+        ct = find_command(v[0]);
+        if (ct) {
+            ctx = blk ? blk_get_aio_context(blk) : qemu_get_aio_context();
+            aio_context_acquire(ctx);
+            ret = command(blk, ct, c, v);
+            aio_context_release(ctx);
+        } else {
+            fprintf(stderr, "command \"%s\" not found\n", v[0]);
+            ret = -EINVAL;
+        }
+    }
+    g_free(input);
+    g_free(v);
+
+    return ret;
+}
+
+static void __attribute((constructor)) init_qemuio_commands(void)
+{
+    /* initialize commands */
+    qemuio_add_command(&help_cmd);
+    qemuio_add_command(&read_cmd);
+    qemuio_add_command(&readv_cmd);
+    qemuio_add_command(&write_cmd);
+    qemuio_add_command(&writev_cmd);
+    qemuio_add_command(&aio_read_cmd);
+    qemuio_add_command(&aio_write_cmd);
+    qemuio_add_command(&aio_flush_cmd);
+    qemuio_add_command(&flush_cmd);
+    qemuio_add_command(&truncate_cmd);
+    qemuio_add_command(&length_cmd);
+    qemuio_add_command(&info_cmd);
+    qemuio_add_command(&discard_cmd);
+    qemuio_add_command(&alloc_cmd);
+    qemuio_add_command(&map_cmd);
+    qemuio_add_command(&reopen_cmd);
+    qemuio_add_command(&break_cmd);
+    qemuio_add_command(&remove_break_cmd);
+    qemuio_add_command(&resume_cmd);
+    qemuio_add_command(&wait_break_cmd);
+    qemuio_add_command(&abort_cmd);
+    qemuio_add_command(&sleep_cmd);
+    qemuio_add_command(&sigraise_cmd);
+}
diff --git a/qemu-io.c b/qemu-io.c
new file mode 100644
index 000000000..ac88d8bd4
--- /dev/null
+++ b/qemu-io.c
@@ -0,0 +1,714 @@
+/*
+ * Command line utility to exercise the QEMU I/O path.
+ *
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#ifndef _WIN32
+#include 
+#endif
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu-io.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qemu/readline.h"
+#include "qemu/log.h"
+#include "qemu/sockets.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+#include "trace/control.h"
+#include "crypto/init.h"
+#include "qemu-version.h"
+
+#define CMD_NOFILE_OK   0x01
+
+static BlockBackend *qemuio_blk;
+static bool quit_qemu_io;
+
+/* qemu-io commands passed using -c */
+static int ncmdline;
+static char **cmdline;
+static bool imageOpts;
+
+static ReadLineState *readline_state;
+
+static int ttyEOF;
+
+static int get_eof_char(void)
+{
+#ifdef _WIN32
+    return 0x4; /* Ctrl-D */
+#else
+    struct termios tty;
+    if (tcgetattr(STDIN_FILENO, &tty) != 0) {
+        if (errno == ENOTTY) {
+            return 0x0; /* just expect read() == 0 */
+        } else {
+            return 0x4; /* Ctrl-D */
+        }
+    }
+
+    return tty.c_cc[VEOF];
+#endif
+}
+
+static int close_f(BlockBackend *blk, int argc, char **argv)
+{
+    blk_unref(qemuio_blk);
+    qemuio_blk = NULL;
+    return 0;
+}
+
+static const cmdinfo_t close_cmd = {
+    .name       = "close",
+    .altname    = "c",
+    .cfunc      = close_f,
+    .oneline    = "close the current open file",
+};
+
+static int openfile(char *name, int flags, bool writethrough, bool force_share,
+                    QDict *opts)
+{
+    Error *local_err = NULL;
+
+    if (qemuio_blk) {
+        error_report("file open already, try 'help close'");
+        qobject_unref(opts);
+        return 1;
+    }
+
+    if (force_share) {
+        if (!opts) {
+            opts = qdict_new();
+        }
+        if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE)
+            && strcmp(qdict_get_str(opts, BDRV_OPT_FORCE_SHARE), "on")) {
+            error_report("-U conflicts with image options");
+            qobject_unref(opts);
+            return 1;
+        }
+        qdict_put_str(opts, BDRV_OPT_FORCE_SHARE, "on");
+    }
+    qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err);
+    if (!qemuio_blk) {
+        error_reportf_err(local_err, "can't open%s%s: ",
+                          name ? " device " : "", name ?: "");
+        return 1;
+    }
+
+    blk_set_enable_write_cache(qemuio_blk, !writethrough);
+
+    return 0;
+}
+
+static void open_help(void)
+{
+    printf(
+"\n"
+" opens a new file in the requested mode\n"
+"\n"
+" Example:\n"
+" 'open -n -o driver=raw /tmp/data' - opens raw data file read-write, uncached\n"
+"\n"
+" Opens a file for subsequent use by all of the other qemu-io commands.\n"
+" -r, -- open file read-only\n"
+" -s, -- use snapshot file\n"
+" -C, -- use copy-on-read\n"
+" -n, -- disable host cache, short for -t none\n"
+" -U, -- force shared permissions\n"
+" -k, -- use kernel AIO implementation (Linux only, prefer use of -i)\n"
+" -i, -- use AIO mode (threads, native or io_uring)\n"
+" -t, -- use the given cache mode for the image\n"
+" -d, -- use the given discard mode for the image\n"
+" -o, -- options to be given to the block driver"
+"\n");
+}
+
+static int open_f(BlockBackend *blk, int argc, char **argv);
+
+static const cmdinfo_t open_cmd = {
+    .name       = "open",
+    .altname    = "o",
+    .cfunc      = open_f,
+    .argmin     = 1,
+    .argmax     = -1,
+    .flags      = CMD_NOFILE_OK,
+    .args       = "[-rsCnkU] [-t cache] [-d discard] [-o options] [path]",
+    .oneline    = "open the file specified by path",
+    .help       = open_help,
+};
+
+static QemuOptsList empty_opts = {
+    .name = "drive",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(empty_opts.head),
+    .desc = {
+        /* no elements => accept any params */
+        { /* end of list */ }
+    },
+};
+
+static int open_f(BlockBackend *blk, int argc, char **argv)
+{
+    int flags = BDRV_O_UNMAP;
+    int readonly = 0;
+    bool writethrough = true;
+    int c;
+    int ret;
+    QemuOpts *qopts;
+    QDict *opts;
+    bool force_share = false;
+
+    while ((c = getopt(argc, argv, "snCro:ki:t:d:U")) != -1) {
+        switch (c) {
+        case 's':
+            flags |= BDRV_O_SNAPSHOT;
+            break;
+        case 'n':
+            flags |= BDRV_O_NOCACHE;
+            writethrough = false;
+            break;
+        case 'C':
+            flags |= BDRV_O_COPY_ON_READ;
+            break;
+        case 'r':
+            readonly = 1;
+            break;
+        case 'k':
+            flags |= BDRV_O_NATIVE_AIO;
+            break;
+        case 't':
+            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) {
+                error_report("Invalid cache option: %s", optarg);
+                qemu_opts_reset(&empty_opts);
+                return -EINVAL;
+            }
+            break;
+        case 'd':
+            if (bdrv_parse_discard_flags(optarg, &flags) < 0) {
+                error_report("Invalid discard option: %s", optarg);
+                qemu_opts_reset(&empty_opts);
+                return -EINVAL;
+            }
+            break;
+        case 'i':
+            if (bdrv_parse_aio(optarg, &flags) < 0) {
+                error_report("Invalid aio option: %s", optarg);
+                qemu_opts_reset(&empty_opts);
+                return -EINVAL;
+            }
+            break;
+        case 'o':
+            if (imageOpts) {
+                printf("--image-opts and 'open -o' are mutually exclusive\n");
+                qemu_opts_reset(&empty_opts);
+                return -EINVAL;
+            }
+            if (!qemu_opts_parse_noisily(&empty_opts, optarg, false)) {
+                qemu_opts_reset(&empty_opts);
+                return -EINVAL;
+            }
+            break;
+        case 'U':
+            force_share = true;
+            break;
+        default:
+            qemu_opts_reset(&empty_opts);
+            qemuio_command_usage(&open_cmd);
+            return -EINVAL;
+        }
+    }
+
+    if (!readonly) {
+        flags |= BDRV_O_RDWR;
+    }
+
+    if (imageOpts && (optind == argc - 1)) {
+        if (!qemu_opts_parse_noisily(&empty_opts, argv[optind], false)) {
+            qemu_opts_reset(&empty_opts);
+            return -EINVAL;
+        }
+        optind++;
+    }
+
+    qopts = qemu_opts_find(&empty_opts, NULL);
+    opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
+    qemu_opts_reset(&empty_opts);
+
+    if (optind == argc - 1) {
+        ret = openfile(argv[optind], flags, writethrough, force_share, opts);
+    } else if (optind == argc) {
+        ret = openfile(NULL, flags, writethrough, force_share, opts);
+    } else {
+        qobject_unref(opts);
+        qemuio_command_usage(&open_cmd);
+        return -EINVAL;
+    }
+
+    if (ret) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int quit_f(BlockBackend *blk, int argc, char **argv)
+{
+    quit_qemu_io = true;
+    return 0;
+}
+
+static const cmdinfo_t quit_cmd = {
+    .name       = "quit",
+    .altname    = "q",
+    .cfunc      = quit_f,
+    .argmin     = -1,
+    .argmax     = -1,
+    .flags      = CMD_FLAG_GLOBAL,
+    .oneline    = "exit the program",
+};
+
+static void usage(const char *name)
+{
+    printf(
+"Usage: %s [OPTIONS]... [-c STRING]... [file]\n"
+"QEMU Disk exerciser\n"
+"\n"
+"  --object OBJECTDEF   define an object such as 'secret' for\n"
+"                       passwords and/or encryption keys\n"
+"  --image-opts         treat file as option string\n"
+"  -c, --cmd STRING     execute command with its arguments\n"
+"                       from the given string\n"
+"  -f, --format FMT     specifies the block driver to use\n"
+"  -r, --read-only      export read-only\n"
+"  -s, --snapshot       use snapshot file\n"
+"  -n, --nocache        disable host cache, short for -t none\n"
+"  -C, --copy-on-read   enable copy-on-read\n"
+"  -m, --misalign       misalign allocations for O_DIRECT\n"
+"  -k, --native-aio     use kernel AIO implementation\n"
+"                       (Linux only, prefer use of -i)\n"
+"  -i, --aio=MODE       use AIO mode (threads, native or io_uring)\n"
+"  -t, --cache=MODE     use the given cache mode for the image\n"
+"  -d, --discard=MODE   use the given discard mode for the image\n"
+"  -T, --trace [[enable=]][,events=][,file=]\n"
+"                       specify tracing options\n"
+"                       see qemu-img(1) man page for full description\n"
+"  -U, --force-share    force shared permissions\n"
+"  -h, --help           display this help and exit\n"
+"  -V, --version        output version information and exit\n"
+"\n"
+"See '%s -c help' for information on available commands.\n"
+"\n"
+QEMU_HELP_BOTTOM "\n",
+    name, name);
+}
+
+static char *get_prompt(void)
+{
+    static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ];
+
+    if (!prompt[0]) {
+        snprintf(prompt, sizeof(prompt), "%s> ", error_get_progname());
+    }
+
+    return prompt;
+}
+
+static void GCC_FMT_ATTR(2, 3) readline_printf_func(void *opaque,
+                                                    const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vprintf(fmt, ap);
+    va_end(ap);
+}
+
+static void readline_flush_func(void *opaque)
+{
+    fflush(stdout);
+}
+
+static void readline_func(void *opaque, const char *str, void *readline_opaque)
+{
+    char **line = readline_opaque;
+    *line = g_strdup(str);
+}
+
+static void completion_match(const char *cmd, void *opaque)
+{
+    readline_add_completion(readline_state, cmd);
+}
+
+static void readline_completion_func(void *opaque, const char *str)
+{
+    readline_set_completion_index(readline_state, strlen(str));
+    qemuio_complete_command(str, completion_match, NULL);
+}
+
+static char *fetchline_readline(void)
+{
+    char *line = NULL;
+
+    readline_start(readline_state, get_prompt(), 0, readline_func, &line);
+    while (!line) {
+        int ch = getchar();
+        if (ttyEOF != 0x0 && ch == ttyEOF) {
+            printf("\n");
+            break;
+        }
+        readline_handle_byte(readline_state, ch);
+    }
+    return line;
+}
+
+#define MAXREADLINESZ 1024
+static char *fetchline_fgets(void)
+{
+    char *p, *line = g_malloc(MAXREADLINESZ);
+
+    if (!fgets(line, MAXREADLINESZ, stdin)) {
+        g_free(line);
+        return NULL;
+    }
+
+    p = line + strlen(line);
+    if (p != line && p[-1] == '\n') {
+        p[-1] = '\0';
+    }
+
+    return line;
+}
+
+static char *fetchline(void)
+{
+    if (readline_state) {
+        return fetchline_readline();
+    } else {
+        return fetchline_fgets();
+    }
+}
+
+static void prep_fetchline(void *opaque)
+{
+    int *fetchable = opaque;
+
+    qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
+    *fetchable= 1;
+}
+
+static int command_loop(void)
+{
+    int i, fetchable = 0, prompted = 0;
+    int ret, last_error = 0;
+    char *input;
+
+    for (i = 0; !quit_qemu_io && i < ncmdline; i++) {
+        ret = qemuio_command(qemuio_blk, cmdline[i]);
+        if (ret < 0) {
+            last_error = ret;
+        }
+    }
+    if (cmdline) {
+        g_free(cmdline);
+        return last_error;
+    }
+
+    while (!quit_qemu_io) {
+        if (!prompted) {
+            printf("%s", get_prompt());
+            fflush(stdout);
+            qemu_set_fd_handler(STDIN_FILENO, prep_fetchline, NULL, &fetchable);
+            prompted = 1;
+        }
+
+        main_loop_wait(false);
+
+        if (!fetchable) {
+            continue;
+        }
+
+        input = fetchline();
+        if (input == NULL) {
+            break;
+        }
+        ret = qemuio_command(qemuio_blk, input);
+        g_free(input);
+
+        if (ret < 0) {
+            last_error = ret;
+        }
+
+        prompted = 0;
+        fetchable = 0;
+    }
+    qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
+
+    return last_error;
+}
+
+static void add_user_command(char *optarg)
+{
+    cmdline = g_renew(char *, cmdline, ++ncmdline);
+    cmdline[ncmdline-1] = optarg;
+}
+
+static void reenable_tty_echo(void)
+{
+    qemu_set_tty_echo(STDIN_FILENO, true);
+}
+
+enum {
+    OPTION_OBJECT = 256,
+    OPTION_IMAGE_OPTS = 257,
+};
+
+static QemuOptsList qemu_object_opts = {
+    .name = "object",
+    .implied_opt_name = "qom-type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+    .desc = {
+        { }
+    },
+};
+
+static bool qemu_io_object_print_help(const char *type, QemuOpts *opts)
+{
+    if (user_creatable_print_help(type, opts)) {
+        exit(0);
+    }
+    return true;
+}
+
+static QemuOptsList file_opts = {
+    .name = "file",
+    .implied_opt_name = "file",
+    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
+    .desc = {
+        /* no elements => accept any params */
+        { /* end of list */ }
+    },
+};
+
+int main(int argc, char **argv)
+{
+    int readonly = 0;
+    const char *sopt = "hVc:d:f:rsnCmki:t:T:U";
+    const struct option lopt[] = {
+        { "help", no_argument, NULL, 'h' },
+        { "version", no_argument, NULL, 'V' },
+        { "cmd", required_argument, NULL, 'c' },
+        { "format", required_argument, NULL, 'f' },
+        { "read-only", no_argument, NULL, 'r' },
+        { "snapshot", no_argument, NULL, 's' },
+        { "nocache", no_argument, NULL, 'n' },
+        { "copy-on-read", no_argument, NULL, 'C' },
+        { "misalign", no_argument, NULL, 'm' },
+        { "native-aio", no_argument, NULL, 'k' },
+        { "aio", required_argument, NULL, 'i' },
+        { "discard", required_argument, NULL, 'd' },
+        { "cache", required_argument, NULL, 't' },
+        { "trace", required_argument, NULL, 'T' },
+        { "object", required_argument, NULL, OPTION_OBJECT },
+        { "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS },
+        { "force-share", no_argument, 0, 'U'},
+        { NULL, 0, NULL, 0 }
+    };
+    int c;
+    int opt_index = 0;
+    int flags = BDRV_O_UNMAP;
+    int ret;
+    bool writethrough = true;
+    Error *local_error = NULL;
+    QDict *opts = NULL;
+    const char *format = NULL;
+    bool force_share = false;
+
+#ifdef CONFIG_POSIX
+    signal(SIGPIPE, SIG_IGN);
+#endif
+
+    socket_init();
+    error_init(argv[0]);
+    module_call_init(MODULE_INIT_TRACE);
+    qemu_init_exec_dir(argv[0]);
+
+    qcrypto_init(&error_fatal);
+
+    module_call_init(MODULE_INIT_QOM);
+    qemu_add_opts(&qemu_object_opts);
+    qemu_add_opts(&qemu_trace_opts);
+    bdrv_init();
+
+    while ((c = getopt_long(argc, argv, sopt, lopt, &opt_index)) != -1) {
+        switch (c) {
+        case 's':
+            flags |= BDRV_O_SNAPSHOT;
+            break;
+        case 'n':
+            flags |= BDRV_O_NOCACHE;
+            writethrough = false;
+            break;
+        case 'C':
+            flags |= BDRV_O_COPY_ON_READ;
+            break;
+        case 'd':
+            if (bdrv_parse_discard_flags(optarg, &flags) < 0) {
+                error_report("Invalid discard option: %s", optarg);
+                exit(1);
+            }
+            break;
+        case 'f':
+            format = optarg;
+            break;
+        case 'c':
+            add_user_command(optarg);
+            break;
+        case 'r':
+            readonly = 1;
+            break;
+        case 'm':
+            qemuio_misalign = true;
+            break;
+        case 'k':
+            flags |= BDRV_O_NATIVE_AIO;
+            break;
+        case 'i':
+            if (bdrv_parse_aio(optarg, &flags) < 0) {
+                error_report("Invalid aio option: %s", optarg);
+                exit(1);
+            }
+            break;
+        case 't':
+            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) {
+                error_report("Invalid cache option: %s", optarg);
+                exit(1);
+            }
+            break;
+        case 'T':
+            trace_opt_parse(optarg);
+            break;
+        case 'V':
+            printf("%s version " QEMU_FULL_VERSION "\n"
+                   QEMU_COPYRIGHT "\n", error_get_progname());
+            exit(0);
+        case 'h':
+            usage(error_get_progname());
+            exit(0);
+        case 'U':
+            force_share = true;
+            break;
+        case OPTION_OBJECT: {
+            QemuOpts *qopts;
+            qopts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                            optarg, true);
+            if (!qopts) {
+                exit(1);
+            }
+        }   break;
+        case OPTION_IMAGE_OPTS:
+            imageOpts = true;
+            break;
+        default:
+            usage(error_get_progname());
+            exit(1);
+        }
+    }
+
+    if ((argc - optind) > 1) {
+        usage(error_get_progname());
+        exit(1);
+    }
+
+    if (format && imageOpts) {
+        error_report("--image-opts and -f are mutually exclusive");
+        exit(1);
+    }
+
+    if (qemu_init_main_loop(&local_error)) {
+        error_report_err(local_error);
+        exit(1);
+    }
+
+    qemu_opts_foreach(&qemu_object_opts,
+                      user_creatable_add_opts_foreach,
+                      qemu_io_object_print_help, &error_fatal);
+
+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file();
+    qemu_set_log(LOG_TRACE);
+
+    /* initialize commands */
+    qemuio_add_command(&quit_cmd);
+    qemuio_add_command(&open_cmd);
+    qemuio_add_command(&close_cmd);
+
+    if (isatty(STDIN_FILENO)) {
+        ttyEOF = get_eof_char();
+        readline_state = readline_init(readline_printf_func,
+                                       readline_flush_func,
+                                       NULL,
+                                       readline_completion_func);
+        qemu_set_tty_echo(STDIN_FILENO, false);
+        atexit(reenable_tty_echo);
+    }
+
+    /* open the device */
+    if (!readonly) {
+        flags |= BDRV_O_RDWR;
+    }
+
+    if ((argc - optind) == 1) {
+        if (imageOpts) {
+            QemuOpts *qopts = NULL;
+            qopts = qemu_opts_parse_noisily(&file_opts, argv[optind], false);
+            if (!qopts) {
+                exit(1);
+            }
+            opts = qemu_opts_to_qdict(qopts, NULL);
+            if (openfile(NULL, flags, writethrough, force_share, opts)) {
+                exit(1);
+            }
+        } else {
+            if (format) {
+                opts = qdict_new();
+                qdict_put_str(opts, "driver", format);
+            }
+            if (openfile(argv[optind], flags, writethrough,
+                         force_share, opts)) {
+                exit(1);
+            }
+        }
+    }
+    ret = command_loop();
+
+    /*
+     * Make sure all outstanding requests complete before the program exits.
+     */
+    bdrv_drain_all();
+
+    blk_unref(qemuio_blk);
+    g_free(readline_state);
+
+    if (ret < 0) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
diff --git a/qemu-nbd.c b/qemu-nbd.c
new file mode 100644
index 000000000..39b517c94
--- /dev/null
+++ b/qemu-nbd.c
@@ -0,0 +1,1152 @@
+/*
+ *  Copyright (C) 2005  Anthony Liguori 
+ *
+ *  Network Block Device
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#include 
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
+#include "block/block_int.h"
+#include "block/nbd.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/bswap.h"
+#include "qemu/log.h"
+#include "qemu/systemd.h"
+#include "block/snapshot.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qom/object_interfaces.h"
+#include "io/channel-socket.h"
+#include "io/net-listener.h"
+#include "crypto/init.h"
+#include "trace/control.h"
+#include "qemu-version.h"
+
+#ifdef __linux__
+#define HAVE_NBD_DEVICE 1
+#else
+#define HAVE_NBD_DEVICE 0
+#endif
+
+#define SOCKET_PATH                "/var/lock/qemu-nbd-%s"
+#define QEMU_NBD_OPT_CACHE         256
+#define QEMU_NBD_OPT_AIO           257
+#define QEMU_NBD_OPT_DISCARD       258
+#define QEMU_NBD_OPT_DETECT_ZEROES 259
+#define QEMU_NBD_OPT_OBJECT        260
+#define QEMU_NBD_OPT_TLSCREDS      261
+#define QEMU_NBD_OPT_IMAGE_OPTS    262
+#define QEMU_NBD_OPT_FORK          263
+#define QEMU_NBD_OPT_TLSAUTHZ      264
+#define QEMU_NBD_OPT_PID_FILE      265
+
+#define MBR_SIZE 512
+
+static int verbose;
+static char *srcpath;
+static SocketAddress *saddr;
+static int persistent = 0;
+static enum { RUNNING, TERMINATE, TERMINATED } state;
+static int shared = 1;
+static int nb_fds;
+static QIONetListener *server;
+static QCryptoTLSCreds *tlscreds;
+static const char *tlsauthz;
+
+static void usage(const char *name)
+{
+    (printf) (
+"Usage: %s [OPTIONS] FILE\n"
+"  or:  %s -L [OPTIONS]\n"
+"QEMU Disk Network Block Device Utility\n"
+"\n"
+"  -h, --help                display this help and exit\n"
+"  -V, --version             output version information and exit\n"
+"\n"
+"Connection properties:\n"
+"  -p, --port=PORT           port to listen on (default `%d')\n"
+"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
+"  -k, --socket=PATH         path to the unix socket\n"
+"                            (default '"SOCKET_PATH"')\n"
+"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
+"  -t, --persistent          don't exit on the last connection\n"
+"  -v, --verbose             display extra debugging information\n"
+"  -x, --export-name=NAME    expose export by name (default is empty string)\n"
+"  -D, --description=TEXT    export a human-readable description\n"
+"\n"
+"Exposing part of the image:\n"
+"  -o, --offset=OFFSET       offset into the image\n"
+"  -A, --allocation-depth    expose the allocation depth\n"
+"  -B, --bitmap=NAME         expose a persistent dirty bitmap\n"
+"\n"
+"General purpose options:\n"
+"  -L, --list                list exports available from another NBD server\n"
+"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
+"                            passwords and/or encryption keys\n"
+"  --tls-creds=ID            use id of an earlier --object to provide TLS\n"
+"  --tls-authz=ID            use id of an earlier --object to provide\n"
+"                            authorization\n"
+"  -T, --trace [[enable=]][,events=][,file=]\n"
+"                            specify tracing options\n"
+"  --fork                    fork off the server process and exit the parent\n"
+"                            once the server is running\n"
+"  --pid-file=PATH           store the server's process ID in the given file\n"
+#if HAVE_NBD_DEVICE
+"\n"
+"Kernel NBD client support:\n"
+"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
+"  -d, --disconnect          disconnect the specified device\n"
+#endif
+"\n"
+"Block device options:\n"
+"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
+"  -r, --read-only           export read-only\n"
+"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
+"                            file with backing_file=FILE, redirect the write to\n"
+"                            the temporary one\n"
+"  -l, --load-snapshot=SNAPSHOT_PARAM\n"
+"                            load an internal snapshot inside FILE and export it\n"
+"                            as an read-only device, SNAPSHOT_PARAM format is\n"
+"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
+"                            '[ID_OR_NAME]'\n"
+"  -n, --nocache             disable host cache\n"
+"      --cache=MODE          set cache mode (none, writeback, ...)\n"
+"      --aio=MODE            set AIO mode (native, io_uring or threads)\n"
+"      --discard=MODE        set discard mode (ignore, unmap)\n"
+"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
+"      --image-opts          treat FILE as a full set of image options\n"
+"\n"
+QEMU_HELP_BOTTOM "\n"
+    , name, name, NBD_DEFAULT_PORT, "DEVICE");
+}
+
+static void version(const char *name)
+{
+    printf(
+"%s " QEMU_FULL_VERSION "\n"
+"Written by Anthony Liguori.\n"
+"\n"
+QEMU_COPYRIGHT "\n"
+"This is free software; see the source for copying conditions.  There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+    , name);
+}
+
+#ifdef CONFIG_POSIX
+/*
+ * The client thread uses SIGTERM to interrupt the server.  A signal
+ * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
+ */
+void qemu_system_killed(int signum, pid_t pid)
+{
+    qatomic_cmpxchg(&state, RUNNING, TERMINATE);
+    qemu_notify_event();
+}
+#endif /* CONFIG_POSIX */
+
+static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
+                                const char *hostname)
+{
+    int ret = EXIT_FAILURE;
+    int rc;
+    Error *err = NULL;
+    QIOChannelSocket *sioc;
+    NBDExportInfo *list;
+    int i, j;
+
+    sioc = qio_channel_socket_new();
+    if (qio_channel_socket_connect_sync(sioc, saddr, &err) < 0) {
+        error_report_err(err);
+        return EXIT_FAILURE;
+    }
+    rc = nbd_receive_export_list(QIO_CHANNEL(sioc), tls, hostname, &list,
+                                 &err);
+    if (rc < 0) {
+        if (err) {
+            error_report_err(err);
+        }
+        goto out;
+    }
+    printf("exports available: %d\n", rc);
+    for (i = 0; i < rc; i++) {
+        printf(" export: '%s'\n", list[i].name);
+        if (list[i].description && *list[i].description) {
+            printf("  description: %s\n", list[i].description);
+        }
+        if (list[i].flags & NBD_FLAG_HAS_FLAGS) {
+            static const char *const flag_names[] = {
+                [NBD_FLAG_READ_ONLY_BIT]            = "readonly",
+                [NBD_FLAG_SEND_FLUSH_BIT]           = "flush",
+                [NBD_FLAG_SEND_FUA_BIT]             = "fua",
+                [NBD_FLAG_ROTATIONAL_BIT]           = "rotational",
+                [NBD_FLAG_SEND_TRIM_BIT]            = "trim",
+                [NBD_FLAG_SEND_WRITE_ZEROES_BIT]    = "zeroes",
+                [NBD_FLAG_SEND_DF_BIT]              = "df",
+                [NBD_FLAG_CAN_MULTI_CONN_BIT]       = "multi",
+                [NBD_FLAG_SEND_RESIZE_BIT]          = "resize",
+                [NBD_FLAG_SEND_CACHE_BIT]           = "cache",
+                [NBD_FLAG_SEND_FAST_ZERO_BIT]       = "fast-zero",
+            };
+
+            printf("  size:  %" PRIu64 "\n", list[i].size);
+            printf("  flags: 0x%x (", list[i].flags);
+            for (size_t bit = 0; bit < ARRAY_SIZE(flag_names); bit++) {
+                if (flag_names[bit] && (list[i].flags & (1 << bit))) {
+                    printf(" %s", flag_names[bit]);
+                }
+            }
+            printf(" )\n");
+        }
+        if (list[i].min_block) {
+            printf("  min block: %u\n", list[i].min_block);
+            printf("  opt block: %u\n", list[i].opt_block);
+            printf("  max block: %u\n", list[i].max_block);
+        }
+        if (list[i].n_contexts) {
+            printf("  available meta contexts: %d\n", list[i].n_contexts);
+            for (j = 0; j < list[i].n_contexts; j++) {
+                printf("   %s\n", list[i].contexts[j]);
+            }
+        }
+    }
+    nbd_free_export_list(list, rc);
+
+    ret = EXIT_SUCCESS;
+ out:
+    object_unref(OBJECT(sioc));
+    return ret;
+}
+
+
+#if HAVE_NBD_DEVICE
+static void *show_parts(void *arg)
+{
+    char *device = arg;
+    int nbd;
+
+    /* linux just needs an open() to trigger
+     * the partition table update
+     * but remember to load the module with max_part != 0 :
+     *     modprobe nbd max_part=63
+     */
+    nbd = open(device, O_RDWR);
+    if (nbd >= 0) {
+        close(nbd);
+    }
+    return NULL;
+}
+
+static void *nbd_client_thread(void *arg)
+{
+    char *device = arg;
+    NBDExportInfo info = { .request_sizes = false, .name = g_strdup("") };
+    QIOChannelSocket *sioc;
+    int fd;
+    int ret;
+    pthread_t show_parts_thread;
+    Error *local_error = NULL;
+
+    sioc = qio_channel_socket_new();
+    if (qio_channel_socket_connect_sync(sioc,
+                                        saddr,
+                                        &local_error) < 0) {
+        error_report_err(local_error);
+        goto out;
+    }
+
+    ret = nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc),
+                                NULL, NULL, NULL, &info, &local_error);
+    if (ret < 0) {
+        if (local_error) {
+            error_report_err(local_error);
+        }
+        goto out_socket;
+    }
+
+    fd = open(device, O_RDWR);
+    if (fd < 0) {
+        /* Linux-only, we can use %m in printf.  */
+        error_report("Failed to open %s: %m", device);
+        goto out_socket;
+    }
+
+    ret = nbd_init(fd, sioc, &info, &local_error);
+    if (ret < 0) {
+        error_report_err(local_error);
+        goto out_fd;
+    }
+
+    /* update partition table */
+    pthread_create(&show_parts_thread, NULL, show_parts, device);
+
+    if (verbose) {
+        fprintf(stderr, "NBD device %s is now connected to %s\n",
+                device, srcpath);
+    } else {
+        /* Close stderr so that the qemu-nbd process exits.  */
+        dup2(STDOUT_FILENO, STDERR_FILENO);
+    }
+
+    ret = nbd_client(fd);
+    if (ret) {
+        goto out_fd;
+    }
+    close(fd);
+    object_unref(OBJECT(sioc));
+    g_free(info.name);
+    kill(getpid(), SIGTERM);
+    return (void *) EXIT_SUCCESS;
+
+out_fd:
+    close(fd);
+out_socket:
+    object_unref(OBJECT(sioc));
+out:
+    g_free(info.name);
+    kill(getpid(), SIGTERM);
+    return (void *) EXIT_FAILURE;
+}
+#endif /* HAVE_NBD_DEVICE */
+
+static int nbd_can_accept(void)
+{
+    return state == RUNNING && nb_fds < shared;
+}
+
+static void nbd_update_server_watch(void);
+
+static void nbd_client_closed(NBDClient *client, bool negotiated)
+{
+    nb_fds--;
+    if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) {
+        state = TERMINATE;
+    }
+    nbd_update_server_watch();
+    nbd_client_put(client);
+}
+
+static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
+                       gpointer opaque)
+{
+    if (state >= TERMINATE) {
+        return;
+    }
+
+    nb_fds++;
+    nbd_update_server_watch();
+    nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
+}
+
+static void nbd_update_server_watch(void)
+{
+    if (nbd_can_accept()) {
+        qio_net_listener_set_client_func(server, nbd_accept, NULL, NULL);
+    } else {
+        qio_net_listener_set_client_func(server, NULL, NULL, NULL);
+    }
+}
+
+
+static SocketAddress *nbd_build_socket_address(const char *sockpath,
+                                               const char *bindto,
+                                               const char *port)
+{
+    SocketAddress *saddr;
+
+    saddr = g_new0(SocketAddress, 1);
+    if (sockpath) {
+        saddr->type = SOCKET_ADDRESS_TYPE_UNIX;
+        saddr->u.q_unix.path = g_strdup(sockpath);
+    } else {
+        InetSocketAddress *inet;
+        saddr->type = SOCKET_ADDRESS_TYPE_INET;
+        inet = &saddr->u.inet;
+        inet->host = g_strdup(bindto);
+        if (port) {
+            inet->port = g_strdup(port);
+        } else  {
+            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+        }
+    }
+
+    return saddr;
+}
+
+
+static QemuOptsList file_opts = {
+    .name = "file",
+    .implied_opt_name = "file",
+    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
+    .desc = {
+        /* no elements => accept any params */
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_object_opts = {
+    .name = "object",
+    .implied_opt_name = "qom-type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+    .desc = {
+        { }
+    },
+};
+
+static bool qemu_nbd_object_print_help(const char *type, QemuOpts *opts)
+{
+    if (user_creatable_print_help(type, opts)) {
+        exit(0);
+    }
+    return true;
+}
+
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, bool list,
+                                          Error **errp)
+{
+    Object *obj;
+    QCryptoTLSCreds *creds;
+
+    obj = object_resolve_path_component(
+        object_get_objects_root(), id);
+    if (!obj) {
+        error_setg(errp, "No TLS credentials with id '%s'",
+                   id);
+        return NULL;
+    }
+    creds = (QCryptoTLSCreds *)
+        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+    if (!creds) {
+        error_setg(errp, "Object with id '%s' is not TLS credentials",
+                   id);
+        return NULL;
+    }
+
+    if (list) {
+        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+            error_setg(errp,
+                       "Expecting TLS credentials with a client endpoint");
+            return NULL;
+        }
+    } else {
+        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+            error_setg(errp,
+                       "Expecting TLS credentials with a server endpoint");
+            return NULL;
+        }
+    }
+    object_ref(obj);
+    return creds;
+}
+
+static void setup_address_and_port(const char **address, const char **port)
+{
+    if (*address == NULL) {
+        *address = "0.0.0.0";
+    }
+
+    if (*port == NULL) {
+        *port = stringify(NBD_DEFAULT_PORT);
+    }
+}
+
+/*
+ * Check socket parameters compatibility when socket activation is used.
+ */
+static const char *socket_activation_validate_opts(const char *device,
+                                                   const char *sockpath,
+                                                   const char *address,
+                                                   const char *port,
+                                                   bool list)
+{
+    if (device != NULL) {
+        return "NBD device can't be set when using socket activation";
+    }
+
+    if (sockpath != NULL) {
+        return "Unix socket can't be set when using socket activation";
+    }
+
+    if (address != NULL) {
+        return "The interface can't be set when using socket activation";
+    }
+
+    if (port != NULL) {
+        return "TCP port number can't be set when using socket activation";
+    }
+
+    if (list) {
+        return "List mode is incompatible with socket activation";
+    }
+
+    return NULL;
+}
+
+static void qemu_nbd_shutdown(void)
+{
+    job_cancel_sync_all();
+    bdrv_close_all();
+}
+
+int main(int argc, char **argv)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+    uint64_t dev_offset = 0;
+    bool readonly = false;
+    bool disconnect = false;
+    const char *bindto = NULL;
+    const char *port = NULL;
+    char *sockpath = NULL;
+    char *device = NULL;
+    QemuOpts *sn_opts = NULL;
+    const char *sn_id_or_name = NULL;
+    const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L";
+    struct option lopt[] = {
+        { "help", no_argument, NULL, 'h' },
+        { "version", no_argument, NULL, 'V' },
+        { "bind", required_argument, NULL, 'b' },
+        { "port", required_argument, NULL, 'p' },
+        { "socket", required_argument, NULL, 'k' },
+        { "offset", required_argument, NULL, 'o' },
+        { "read-only", no_argument, NULL, 'r' },
+        { "allocation-depth", no_argument, NULL, 'A' },
+        { "bitmap", required_argument, NULL, 'B' },
+        { "connect", required_argument, NULL, 'c' },
+        { "disconnect", no_argument, NULL, 'd' },
+        { "list", no_argument, NULL, 'L' },
+        { "snapshot", no_argument, NULL, 's' },
+        { "load-snapshot", required_argument, NULL, 'l' },
+        { "nocache", no_argument, NULL, 'n' },
+        { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
+        { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
+        { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
+        { "detect-zeroes", required_argument, NULL,
+          QEMU_NBD_OPT_DETECT_ZEROES },
+        { "shared", required_argument, NULL, 'e' },
+        { "format", required_argument, NULL, 'f' },
+        { "persistent", no_argument, NULL, 't' },
+        { "verbose", no_argument, NULL, 'v' },
+        { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
+        { "export-name", required_argument, NULL, 'x' },
+        { "description", required_argument, NULL, 'D' },
+        { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
+        { "tls-authz", required_argument, NULL, QEMU_NBD_OPT_TLSAUTHZ },
+        { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
+        { "trace", required_argument, NULL, 'T' },
+        { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
+        { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
+        { NULL, 0, NULL, 0 }
+    };
+    int ch;
+    int opt_ind = 0;
+    int flags = BDRV_O_RDWR;
+    int ret = 0;
+    bool seen_cache = false;
+    bool seen_discard = false;
+    bool seen_aio = false;
+    pthread_t client_thread;
+    const char *fmt = NULL;
+    Error *local_err = NULL;
+    BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+    QDict *options = NULL;
+    const char *export_name = NULL; /* defaults to "" later for server mode */
+    const char *export_description = NULL;
+    strList *bitmaps = NULL;
+    bool alloc_depth = false;
+    const char *tlscredsid = NULL;
+    bool imageOpts = false;
+    bool writethrough = true;
+    bool fork_process = false;
+    bool list = false;
+    int old_stderr = -1;
+    unsigned socket_activation;
+    const char *pid_file_name = NULL;
+    BlockExportOptions *export_opts;
+
+#ifdef CONFIG_POSIX
+    os_setup_early_signal_handling();
+    os_setup_signal_handling();
+#endif
+
+    socket_init();
+    error_init(argv[0]);
+    module_call_init(MODULE_INIT_TRACE);
+    qcrypto_init(&error_fatal);
+
+    module_call_init(MODULE_INIT_QOM);
+    qemu_add_opts(&qemu_object_opts);
+    qemu_add_opts(&qemu_trace_opts);
+    qemu_init_exec_dir(argv[0]);
+
+    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+        switch (ch) {
+        case 's':
+            flags |= BDRV_O_SNAPSHOT;
+            break;
+        case 'n':
+            optarg = (char *) "none";
+            /* fallthrough */
+        case QEMU_NBD_OPT_CACHE:
+            if (seen_cache) {
+                error_report("-n and --cache can only be specified once");
+                exit(EXIT_FAILURE);
+            }
+            seen_cache = true;
+            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) {
+                error_report("Invalid cache mode `%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case QEMU_NBD_OPT_AIO:
+            if (seen_aio) {
+                error_report("--aio can only be specified once");
+                exit(EXIT_FAILURE);
+            }
+            seen_aio = true;
+            if (bdrv_parse_aio(optarg, &flags) < 0) {
+                error_report("Invalid aio mode '%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case QEMU_NBD_OPT_DISCARD:
+            if (seen_discard) {
+                error_report("--discard can only be specified once");
+                exit(EXIT_FAILURE);
+            }
+            seen_discard = true;
+            if (bdrv_parse_discard_flags(optarg, &flags) == -1) {
+                error_report("Invalid discard mode `%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case QEMU_NBD_OPT_DETECT_ZEROES:
+            detect_zeroes =
+                qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
+                                optarg,
+                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                                &local_err);
+            if (local_err) {
+                error_reportf_err(local_err,
+                                  "Failed to parse detect_zeroes mode: ");
+                exit(EXIT_FAILURE);
+            }
+            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+                !(flags & BDRV_O_UNMAP)) {
+                error_report("setting detect-zeroes to unmap is not allowed "
+                             "without setting discard operation to unmap");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'b':
+            bindto = optarg;
+            break;
+        case 'p':
+            port = optarg;
+            break;
+        case 'o':
+            if (qemu_strtou64(optarg, NULL, 0, &dev_offset) < 0) {
+                error_report("Invalid offset '%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'l':
+            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
+                                                  optarg, false);
+                if (!sn_opts) {
+                    error_report("Failed in parsing snapshot param `%s'",
+                                 optarg);
+                    exit(EXIT_FAILURE);
+                }
+            } else {
+                sn_id_or_name = optarg;
+            }
+            /* fall through */
+        case 'r':
+            readonly = true;
+            flags &= ~BDRV_O_RDWR;
+            break;
+        case 'A':
+            alloc_depth = true;
+            break;
+        case 'B':
+            QAPI_LIST_PREPEND(bitmaps, g_strdup(optarg));
+            break;
+        case 'k':
+            sockpath = optarg;
+            if (sockpath[0] != '/') {
+                error_report("socket path must be absolute");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'd':
+            disconnect = true;
+            break;
+        case 'c':
+            device = optarg;
+            break;
+        case 'e':
+            if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 ||
+                shared < 1) {
+                error_report("Invalid shared device number '%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 't':
+            persistent = 1;
+            break;
+        case 'x':
+            export_name = optarg;
+            if (strlen(export_name) > NBD_MAX_STRING_SIZE) {
+                error_report("export name '%s' too long", export_name);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'D':
+            export_description = optarg;
+            if (strlen(export_description) > NBD_MAX_STRING_SIZE) {
+                error_report("export description '%s' too long",
+                             export_description);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'v':
+            verbose = 1;
+            break;
+        case 'V':
+            version(argv[0]);
+            exit(0);
+            break;
+        case 'h':
+            usage(argv[0]);
+            exit(0);
+            break;
+        case '?':
+            error_report("Try `%s --help' for more information.", argv[0]);
+            exit(EXIT_FAILURE);
+        case QEMU_NBD_OPT_OBJECT: {
+            QemuOpts *opts;
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                exit(EXIT_FAILURE);
+            }
+        }   break;
+        case QEMU_NBD_OPT_TLSCREDS:
+            tlscredsid = optarg;
+            break;
+        case QEMU_NBD_OPT_IMAGE_OPTS:
+            imageOpts = true;
+            break;
+        case 'T':
+            trace_opt_parse(optarg);
+            break;
+        case QEMU_NBD_OPT_TLSAUTHZ:
+            tlsauthz = optarg;
+            break;
+        case QEMU_NBD_OPT_FORK:
+            fork_process = true;
+            break;
+        case 'L':
+            list = true;
+            break;
+        case QEMU_NBD_OPT_PID_FILE:
+            pid_file_name = optarg;
+            break;
+        }
+    }
+
+    if (list) {
+        if (argc != optind) {
+            error_report("List mode is incompatible with a file name");
+            exit(EXIT_FAILURE);
+        }
+        if (export_name || export_description || dev_offset ||
+            device || disconnect || fmt || sn_id_or_name || bitmaps ||
+            alloc_depth || seen_aio || seen_discard || seen_cache) {
+            error_report("List mode is incompatible with per-device settings");
+            exit(EXIT_FAILURE);
+        }
+        if (fork_process) {
+            error_report("List mode is incompatible with forking");
+            exit(EXIT_FAILURE);
+        }
+    } else if ((argc - optind) != 1) {
+        error_report("Invalid number of arguments");
+        error_printf("Try `%s --help' for more information.\n", argv[0]);
+        exit(EXIT_FAILURE);
+    } else if (!export_name) {
+        export_name = "";
+    }
+
+    qemu_opts_foreach(&qemu_object_opts,
+                      user_creatable_add_opts_foreach,
+                      qemu_nbd_object_print_help, &error_fatal);
+
+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file();
+    qemu_set_log(LOG_TRACE);
+
+    socket_activation = check_socket_activation();
+    if (socket_activation == 0) {
+        setup_address_and_port(&bindto, &port);
+    } else {
+        /* Using socket activation - check user didn't use -p etc. */
+        const char *err_msg = socket_activation_validate_opts(device, sockpath,
+                                                              bindto, port,
+                                                              list);
+        if (err_msg != NULL) {
+            error_report("%s", err_msg);
+            exit(EXIT_FAILURE);
+        }
+
+        /* qemu-nbd can only listen on a single socket.  */
+        if (socket_activation > 1) {
+            error_report("qemu-nbd does not support socket activation with %s > 1",
+                         "LISTEN_FDS");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if (tlscredsid) {
+        if (sockpath) {
+            error_report("TLS is only supported with IPv4/IPv6");
+            exit(EXIT_FAILURE);
+        }
+        if (device) {
+            error_report("TLS is not supported with a host device");
+            exit(EXIT_FAILURE);
+        }
+        if (tlsauthz && list) {
+            error_report("TLS authorization is incompatible with export list");
+            exit(EXIT_FAILURE);
+        }
+        tlscreds = nbd_get_tls_creds(tlscredsid, list, &local_err);
+        if (local_err) {
+            error_reportf_err(local_err, "Failed to get TLS creds: ");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (tlsauthz) {
+            error_report("--tls-authz is not permitted without --tls-creds");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if (list) {
+        saddr = nbd_build_socket_address(sockpath, bindto, port);
+        return qemu_nbd_client_list(saddr, tlscreds, bindto);
+    }
+
+#if !HAVE_NBD_DEVICE
+    if (disconnect || device) {
+        error_report("Kernel /dev/nbdN support not available");
+        exit(EXIT_FAILURE);
+    }
+#else /* HAVE_NBD_DEVICE */
+    if (disconnect) {
+        int nbdfd = open(argv[optind], O_RDWR);
+        if (nbdfd < 0) {
+            error_report("Cannot open %s: %s", argv[optind],
+                         strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        nbd_disconnect(nbdfd);
+
+        close(nbdfd);
+
+        printf("%s disconnected\n", argv[optind]);
+
+        return 0;
+    }
+#endif
+
+    if ((device && !verbose) || fork_process) {
+#ifndef WIN32
+        int stderr_fd[2];
+        pid_t pid;
+        int ret;
+
+        if (qemu_pipe(stderr_fd) < 0) {
+            error_report("Error setting up communication pipe: %s",
+                         strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+
+        /* Now daemonize, but keep a communication channel open to
+         * print errors and exit with the proper status code.
+         */
+        pid = fork();
+        if (pid < 0) {
+            error_report("Failed to fork: %s", strerror(errno));
+            exit(EXIT_FAILURE);
+        } else if (pid == 0) {
+            close(stderr_fd[0]);
+
+            /* Remember parent's stderr if we will be restoring it. */
+            if (fork_process) {
+                old_stderr = dup(STDERR_FILENO);
+            }
+
+            ret = qemu_daemon(1, 0);
+
+            /* Temporarily redirect stderr to the parent's pipe...  */
+            dup2(stderr_fd[1], STDERR_FILENO);
+            if (ret < 0) {
+                error_report("Failed to daemonize: %s", strerror(errno));
+                exit(EXIT_FAILURE);
+            }
+
+            /* ... close the descriptor we inherited and go on.  */
+            close(stderr_fd[1]);
+        } else {
+            bool errors = false;
+            char *buf;
+
+            /* In the parent.  Print error messages from the child until
+             * it closes the pipe.
+             */
+            close(stderr_fd[1]);
+            buf = g_malloc(1024);
+            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
+                errors = true;
+                ret = qemu_write_full(STDERR_FILENO, buf, ret);
+                if (ret < 0) {
+                    exit(EXIT_FAILURE);
+                }
+            }
+            if (ret < 0) {
+                error_report("Cannot read from daemon: %s",
+                             strerror(errno));
+                exit(EXIT_FAILURE);
+            }
+
+            /* Usually the daemon should not print any message.
+             * Exit with zero status in that case.
+             */
+            exit(errors);
+        }
+#else /* WIN32 */
+        error_report("Unable to fork into background on Windows hosts");
+        exit(EXIT_FAILURE);
+#endif /* WIN32 */
+    }
+
+    if (device != NULL && sockpath == NULL) {
+        sockpath = g_malloc(128);
+        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
+    }
+
+    server = qio_net_listener_new();
+    if (socket_activation == 0) {
+        int backlog;
+
+        if (persistent) {
+            backlog = SOMAXCONN;
+        } else {
+            backlog = MIN(shared, SOMAXCONN);
+        }
+        saddr = nbd_build_socket_address(sockpath, bindto, port);
+        if (qio_net_listener_open_sync(server, saddr, backlog,
+                                       &local_err) < 0) {
+            object_unref(OBJECT(server));
+            error_report_err(local_err);
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        size_t i;
+        /* See comment in check_socket_activation above. */
+        for (i = 0; i < socket_activation; i++) {
+            QIOChannelSocket *sioc;
+            sioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD + i,
+                                             &local_err);
+            if (sioc == NULL) {
+                object_unref(OBJECT(server));
+                error_reportf_err(local_err,
+                                  "Failed to use socket activation: ");
+                exit(EXIT_FAILURE);
+            }
+            qio_net_listener_add(server, sioc);
+            object_unref(OBJECT(sioc));
+        }
+    }
+
+    if (qemu_init_main_loop(&local_err)) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
+    bdrv_init();
+    atexit(qemu_nbd_shutdown);
+
+    srcpath = argv[optind];
+    if (imageOpts) {
+        QemuOpts *opts;
+        if (fmt) {
+            error_report("--image-opts and -f are mutually exclusive");
+            exit(EXIT_FAILURE);
+        }
+        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
+        if (!opts) {
+            qemu_opts_reset(&file_opts);
+            exit(EXIT_FAILURE);
+        }
+        options = qemu_opts_to_qdict(opts, NULL);
+        qemu_opts_reset(&file_opts);
+        blk = blk_new_open(NULL, NULL, options, flags, &local_err);
+    } else {
+        if (fmt) {
+            options = qdict_new();
+            qdict_put_str(options, "driver", fmt);
+        }
+        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
+    }
+
+    if (!blk) {
+        error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
+                          argv[optind]);
+        exit(EXIT_FAILURE);
+    }
+    bs = blk_bs(blk);
+
+    if (dev_offset) {
+        QDict *raw_opts = qdict_new();
+        qdict_put_str(raw_opts, "driver", "raw");
+        qdict_put_str(raw_opts, "file", bs->node_name);
+        qdict_put_int(raw_opts, "offset", dev_offset);
+        bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
+        blk_remove_bs(blk);
+        blk_insert_bs(blk, bs, &error_fatal);
+        bdrv_unref(bs);
+    }
+
+    blk_set_enable_write_cache(blk, !writethrough);
+
+    if (sn_opts) {
+        ret = bdrv_snapshot_load_tmp(bs,
+                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+                                     &local_err);
+    } else if (sn_id_or_name) {
+        ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
+                                                   &local_err);
+    }
+    if (ret < 0) {
+        error_reportf_err(local_err, "Failed to load snapshot: ");
+        exit(EXIT_FAILURE);
+    }
+
+    bs->detect_zeroes = detect_zeroes;
+
+    nbd_server_is_qemu_nbd(true);
+
+    export_opts = g_new(BlockExportOptions, 1);
+    *export_opts = (BlockExportOptions) {
+        .type               = BLOCK_EXPORT_TYPE_NBD,
+        .id                 = g_strdup("qemu-nbd-export"),
+        .node_name          = g_strdup(bdrv_get_node_name(bs)),
+        .has_writethrough   = true,
+        .writethrough       = writethrough,
+        .has_writable       = true,
+        .writable           = !readonly,
+        .u.nbd = {
+            .has_name             = true,
+            .name                 = g_strdup(export_name),
+            .has_description      = !!export_description,
+            .description          = g_strdup(export_description),
+            .has_bitmaps          = !!bitmaps,
+            .bitmaps              = bitmaps,
+            .has_allocation_depth = alloc_depth,
+            .allocation_depth     = alloc_depth,
+        },
+    };
+    blk_exp_add(export_opts, &error_fatal);
+    qapi_free_BlockExportOptions(export_opts);
+
+    if (device) {
+#if HAVE_NBD_DEVICE
+        int ret;
+
+        ret = pthread_create(&client_thread, NULL, nbd_client_thread, device);
+        if (ret != 0) {
+            error_report("Failed to create client thread: %s", strerror(ret));
+            exit(EXIT_FAILURE);
+        }
+#endif
+    } else {
+        /* Shut up GCC warnings.  */
+        memset(&client_thread, 0, sizeof(client_thread));
+    }
+
+    nbd_update_server_watch();
+
+    if (pid_file_name) {
+        qemu_write_pidfile(pid_file_name, &error_fatal);
+    }
+
+    /* now when the initialization is (almost) complete, chdir("/")
+     * to free any busy filesystems */
+    if (chdir("/") < 0) {
+        error_report("Could not chdir to root directory: %s",
+                     strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+
+    if (fork_process) {
+        dup2(old_stderr, STDERR_FILENO);
+        close(old_stderr);
+    }
+
+    state = RUNNING;
+    do {
+        main_loop_wait(false);
+        if (state == TERMINATE) {
+            blk_exp_close_all();
+            state = TERMINATED;
+        }
+    } while (state != TERMINATED);
+
+    blk_unref(blk);
+    if (sockpath) {
+        unlink(sockpath);
+    }
+
+    qemu_opts_del(sn_opts);
+
+    if (device) {
+        void *ret;
+        pthread_join(client_thread, &ret);
+        exit(ret != NULL);
+    } else {
+        exit(EXIT_SUCCESS);
+    }
+}
diff --git a/qemu-options-wrapper.h b/qemu-options-wrapper.h
new file mode 100644
index 000000000..6f548e392
--- /dev/null
+++ b/qemu-options-wrapper.h
@@ -0,0 +1,40 @@
+
+#if defined(QEMU_OPTIONS_GENERATE_ENUM)
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    opt_enum,
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#elif defined(QEMU_OPTIONS_GENERATE_HELP)
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
+    if ((arch_mask) & arch_type)                               \
+        fputs(opt_help, stdout);
+
+#define ARCHHEADING(text, arch_mask) \
+    if ((arch_mask) & arch_type)    \
+        puts(stringify(text));
+
+#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
+
+#elif defined(QEMU_OPTIONS_GENERATE_OPTIONS)
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    { option, opt_arg, opt_enum, arch_mask },
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#else
+#error "qemu-options-wrapper.h included with no option defined"
+#endif
+
+#include "qemu-options.def"
+
+#undef DEF
+#undef DEFHEADING
+#undef ARCHHEADING
+
+#undef QEMU_OPTIONS_GENERATE_ENUM
+#undef QEMU_OPTIONS_GENERATE_HELP
+#undef QEMU_OPTIONS_GENERATE_OPTIONS
diff --git a/qemu-options.h b/qemu-options.h
new file mode 100644
index 000000000..b4ee63cd6
--- /dev/null
+++ b/qemu-options.h
@@ -0,0 +1,36 @@
+/*
+ * qemu-options.h
+ *
+ * Defines needed for command line argument processing.
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OPTIONS_H
+#define QEMU_OPTIONS_H
+
+enum {
+#define QEMU_OPTIONS_GENERATE_ENUM
+#include "qemu-options-wrapper.h"
+};
+
+#endif
diff --git a/qemu-options.hx b/qemu-options.hx
new file mode 100644
index 000000000..104632ea3
--- /dev/null
+++ b/qemu-options.hx
@@ -0,0 +1,5135 @@
+HXCOMM Use DEFHEADING() to define headings in both help text and rST.
+HXCOMM Text between SRST and ERST is copied to the rST version and
+HXCOMM discarded from C version.
+HXCOMM DEF(option, HAS_ARG/0, opt_enum, opt_help, arch_mask) is used to
+HXCOMM construct option structures, enums and help message for specified
+HXCOMM architectures.
+HXCOMM HXCOMM can be used for comments, discarded from both rST and C.
+
+DEFHEADING(Standard options:)
+
+DEF("help", 0, QEMU_OPTION_h,
+    "-h or -help     display this help and exit\n", QEMU_ARCH_ALL)
+SRST
+``-h``
+    Display help and exit
+ERST
+
+DEF("version", 0, QEMU_OPTION_version,
+    "-version        display version information and exit\n", QEMU_ARCH_ALL)
+SRST
+``-version``
+    Display version information and exit
+ERST
+
+DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
+    "-machine [type=]name[,prop[=value][,...]]\n"
+    "                selects emulated machine ('-machine help' for list)\n"
+    "                property accel=accel1[:accel2[:...]] selects accelerator\n"
+    "                supported accelerators are kvm, xen, hax, hvf, whpx or tcg (default: tcg)\n"
+    "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
+    "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
+    "                mem-merge=on|off controls memory merge support (default: on)\n"
+    "                aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n"
+    "                dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n"
+    "                suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
+    "                nvdimm=on|off controls NVDIMM support (default=off)\n"
+    "                memory-encryption=@var{} memory encryption object to use (default=none)\n"
+    "                hmat=on|off controls ACPI HMAT support (default=off)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-machine [type=]name[,prop=value[,...]]``
+    Select the emulated machine by name. Use ``-machine help`` to list
+    available machines.
+
+    For architectures which aim to support live migration compatibility
+    across releases, each release will introduce a new versioned machine
+    type. For example, the 2.8.0 release introduced machine types
+    "pc-i440fx-2.8" and "pc-q35-2.8" for the x86\_64/i686 architectures.
+
+    To allow live migration of guests from QEMU version 2.8.0, to QEMU
+    version 2.9.0, the 2.9.0 version must support the "pc-i440fx-2.8"
+    and "pc-q35-2.8" machines too. To allow users live migrating VMs to
+    skip multiple intermediate releases when upgrading, new releases of
+    QEMU will support machine types from many previous versions.
+
+    Supported machine properties are:
+
+    ``accel=accels1[:accels2[:...]]``
+        This is used to enable an accelerator. Depending on the target
+        architecture, kvm, xen, hax, hvf, whpx or tcg can be available.
+        By default, tcg is used. If there is more than one accelerator
+        specified, the next one is used if the previous one fails to
+        initialize.
+
+    ``vmport=on|off|auto``
+        Enables emulation of VMWare IO port, for vmmouse etc. auto says
+        to select the value based on accel. For accel=xen the default is
+        off otherwise the default is on.
+
+    ``dump-guest-core=on|off``
+        Include guest memory in a core dump. The default is on.
+
+    ``mem-merge=on|off``
+        Enables or disables memory merge support. This feature, when
+        supported by the host, de-duplicates identical memory pages
+        among VMs instances (enabled by default).
+
+    ``aes-key-wrap=on|off``
+        Enables or disables AES key wrapping support on s390-ccw hosts.
+        This feature controls whether AES wrapping keys will be created
+        to allow execution of AES cryptographic functions. The default
+        is on.
+
+    ``dea-key-wrap=on|off``
+        Enables or disables DEA key wrapping support on s390-ccw hosts.
+        This feature controls whether DEA wrapping keys will be created
+        to allow execution of DEA cryptographic functions. The default
+        is on.
+
+    ``nvdimm=on|off``
+        Enables or disables NVDIMM support. The default is off.
+
+    ``memory-encryption=``
+        Memory encryption object to use. The default is none.
+
+    ``hmat=on|off``
+        Enables or disables ACPI Heterogeneous Memory Attribute Table
+        (HMAT) support. The default is off.
+ERST
+
+HXCOMM Deprecated by -machine
+DEF("M", HAS_ARG, QEMU_OPTION_M, "", QEMU_ARCH_ALL)
+
+DEF("cpu", HAS_ARG, QEMU_OPTION_cpu,
+    "-cpu cpu        select CPU ('-cpu help' for list)\n", QEMU_ARCH_ALL)
+SRST
+``-cpu model``
+    Select CPU model (``-cpu help`` for list and additional feature
+    selection)
+ERST
+
+DEF("accel", HAS_ARG, QEMU_OPTION_accel,
+    "-accel [accel=]accelerator[,prop[=value][,...]]\n"
+    "                select accelerator (kvm, xen, hax, hvf, whpx or tcg; use 'help' for a list)\n"
+    "                igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
+    "                kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
+    "                kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
+    "                tb-size=n (TCG translation block cache size)\n"
+    "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
+SRST
+``-accel name[,prop=value[,...]]``
+    This is used to enable an accelerator. Depending on the target
+    architecture, kvm, xen, hax, hvf, whpx or tcg can be available. By
+    default, tcg is used. If there is more than one accelerator
+    specified, the next one is used if the previous one fails to
+    initialize.
+
+    ``igd-passthru=on|off``
+        When Xen is in use, this option controls whether Intel
+        integrated graphics devices can be passed through to the guest
+        (default=off)
+
+    ``kernel-irqchip=on|off|split``
+        Controls KVM in-kernel irqchip support. The default is full
+        acceleration of the interrupt controllers. On x86, split irqchip
+        reduces the kernel attack surface, at a performance cost for
+        non-MSI interrupts. Disabling the in-kernel irqchip completely
+        is not recommended except for debugging purposes.
+
+    ``kvm-shadow-mem=size``
+        Defines the size of the KVM shadow MMU.
+
+    ``tb-size=n``
+        Controls the size (in MiB) of the TCG translation block cache.
+
+    ``thread=single|multi``
+        Controls number of TCG threads. When the TCG is multi-threaded
+        there will be one thread per vCPU therefor taking advantage of
+        additional host cores. The default is to enable multi-threading
+        where both the back-end and front-ends support it and no
+        incompatible TCG features have been enabled (e.g.
+        icount/replay).
+ERST
+
+DEF("smp", HAS_ARG, QEMU_OPTION_smp,
+    "-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,dies=dies][,sockets=sockets]\n"
+    "                set the number of CPUs to 'n' [default=1]\n"
+    "                maxcpus= maximum number of total cpus, including\n"
+    "                offline CPUs for hotplug, etc\n"
+    "                cores= number of CPU cores on one socket (for PC, it's on one die)\n"
+    "                threads= number of threads on one CPU core\n"
+    "                dies= number of CPU dies on one socket (for PC only)\n"
+    "                sockets= number of discrete sockets in the system\n",
+        QEMU_ARCH_ALL)
+SRST
+``-smp [cpus=]n[,cores=cores][,threads=threads][,dies=dies][,sockets=sockets][,maxcpus=maxcpus]``
+    Simulate an SMP system with n CPUs. On the PC target, up to 255 CPUs
+    are supported. On Sparc32 target, Linux limits the number of usable
+    CPUs to 4. For the PC target, the number of cores per die, the
+    number of threads per cores, the number of dies per packages and the
+    total number of sockets can be specified. Missing values will be
+    computed. If any on the three values is given, the total number of
+    CPUs n can be omitted. maxcpus specifies the maximum number of
+    hotpluggable CPUs.
+ERST
+
+DEF("numa", HAS_ARG, QEMU_OPTION_numa,
+    "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
+    "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
+    "-numa dist,src=source,dst=destination,val=distance\n"
+    "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
+    "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
+    "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
+    QEMU_ARCH_ALL)
+SRST
+``-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=initiator]``
+  \ 
+``-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=initiator]``
+  \
+``-numa dist,src=source,dst=destination,val=distance``
+  \ 
+``-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]``
+  \ 
+``-numa hmat-lb,initiator=node,target=node,hierarchy=hierarchy,data-type=tpye[,latency=lat][,bandwidth=bw]``
+  \ 
+``-numa hmat-cache,node-id=node,size=size,level=level[,associativity=str][,policy=str][,line=size]``
+    Define a NUMA node and assign RAM and VCPUs to it. Set the NUMA
+    distance from a source node to a destination node. Set the ACPI
+    Heterogeneous Memory Attributes for the given nodes.
+
+    Legacy VCPU assignment uses '\ ``cpus``\ ' option where firstcpu and
+    lastcpu are CPU indexes. Each '\ ``cpus``\ ' option represent a
+    contiguous range of CPU indexes (or a single VCPU if lastcpu is
+    omitted). A non-contiguous set of VCPUs can be represented by
+    providing multiple '\ ``cpus``\ ' options. If '\ ``cpus``\ ' is
+    omitted on all nodes, VCPUs are automatically split between them.
+
+    For example, the following option assigns VCPUs 0, 1, 2 and 5 to a
+    NUMA node:
+
+    ::
+
+        -numa node,cpus=0-2,cpus=5
+
+    '\ ``cpu``\ ' option is a new alternative to '\ ``cpus``\ ' option
+    which uses '\ ``socket-id|core-id|thread-id``\ ' properties to
+    assign CPU objects to a node using topology layout properties of
+    CPU. The set of properties is machine specific, and depends on used
+    machine type/'\ ``smp``\ ' options. It could be queried with
+    '\ ``hotpluggable-cpus``\ ' monitor command. '\ ``node-id``\ '
+    property specifies node to which CPU object will be assigned, it's
+    required for node to be declared with '\ ``node``\ ' option before
+    it's used with '\ ``cpu``\ ' option.
+
+    For example:
+
+    ::
+
+        -M pc \
+        -smp 1,sockets=2,maxcpus=2 \
+        -numa node,nodeid=0 -numa node,nodeid=1 \
+        -numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1
+
+    Legacy '\ ``mem``\ ' assigns a given RAM amount to a node (not supported
+    for 5.1 and newer machine types). '\ ``memdev``\ ' assigns RAM from
+    a given memory backend device to a node. If '\ ``mem``\ ' and
+    '\ ``memdev``\ ' are omitted in all nodes, RAM is split equally between them.
+
+
+    '\ ``mem``\ ' and '\ ``memdev``\ ' are mutually exclusive.
+    Furthermore, if one node uses '\ ``memdev``\ ', all of them have to
+    use it.
+
+    '\ ``initiator``\ ' is an additional option that points to an
+    initiator NUMA node that has best performance (the lowest latency or
+    largest bandwidth) to this NUMA node. Note that this option can be
+    set only when the machine property 'hmat' is set to 'on'.
+
+    Following example creates a machine with 2 NUMA nodes, node 0 has
+    CPU. node 1 has only memory, and its initiator is node 0. Note that
+    because node 0 has CPU, by default the initiator of node 0 is itself
+    and must be itself.
+
+    ::
+
+        -machine hmat=on \
+        -m 2G,slots=2,maxmem=4G \
+        -object memory-backend-ram,size=1G,id=m0 \
+        -object memory-backend-ram,size=1G,id=m1 \
+        -numa node,nodeid=0,memdev=m0 \
+        -numa node,nodeid=1,memdev=m1,initiator=0 \
+        -smp 2,sockets=2,maxcpus=2  \
+        -numa cpu,node-id=0,socket-id=0 \
+        -numa cpu,node-id=0,socket-id=1
+
+    source and destination are NUMA node IDs. distance is the NUMA
+    distance from source to destination. The distance from a node to
+    itself is always 10. If any pair of nodes is given a distance, then
+    all pairs must be given distances. Although, when distances are only
+    given in one direction for each pair of nodes, then the distances in
+    the opposite directions are assumed to be the same. If, however, an
+    asymmetrical pair of distances is given for even one node pair, then
+    all node pairs must be provided distance values for both directions,
+    even when they are symmetrical. When a node is unreachable from
+    another node, set the pair's distance to 255.
+
+    Note that the -``numa`` option doesn't allocate any of the specified
+    resources, it just assigns existing resources to NUMA nodes. This
+    means that one still has to use the ``-m``, ``-smp`` options to
+    allocate RAM and VCPUs respectively.
+
+    Use '\ ``hmat-lb``\ ' to set System Locality Latency and Bandwidth
+    Information between initiator and target NUMA nodes in ACPI
+    Heterogeneous Attribute Memory Table (HMAT). Initiator NUMA node can
+    create memory requests, usually it has one or more processors.
+    Target NUMA node contains addressable memory.
+
+    In '\ ``hmat-lb``\ ' option, node are NUMA node IDs. hierarchy is
+    the memory hierarchy of the target NUMA node: if hierarchy is
+    'memory', the structure represents the memory performance; if
+    hierarchy is 'first-level\|second-level\|third-level', this
+    structure represents aggregated performance of memory side caches
+    for each domain. type of 'data-type' is type of data represented by
+    this structure instance: if 'hierarchy' is 'memory', 'data-type' is
+    'access\|read\|write' latency or 'access\|read\|write' bandwidth of
+    the target memory; if 'hierarchy' is
+    'first-level\|second-level\|third-level', 'data-type' is
+    'access\|read\|write' hit latency or 'access\|read\|write' hit
+    bandwidth of the target memory side cache.
+
+    lat is latency value in nanoseconds. bw is bandwidth value, the
+    possible value and units are NUM[M\|G\|T], mean that the bandwidth
+    value are NUM byte per second (or MB/s, GB/s or TB/s depending on
+    used suffix). Note that if latency or bandwidth value is 0, means
+    the corresponding latency or bandwidth information is not provided.
+
+    In '\ ``hmat-cache``\ ' option, node-id is the NUMA-id of the memory
+    belongs. size is the size of memory side cache in bytes. level is
+    the cache level described in this structure, note that the cache
+    level 0 should not be used with '\ ``hmat-cache``\ ' option.
+    associativity is the cache associativity, the possible value is
+    'none/direct(direct-mapped)/complex(complex cache indexing)'. policy
+    is the write policy. line is the cache Line size in bytes.
+
+    For example, the following options describe 2 NUMA nodes. Node 0 has
+    2 cpus and a ram, node 1 has only a ram. The processors in node 0
+    access memory in node 0 with access-latency 5 nanoseconds,
+    access-bandwidth is 200 MB/s; The processors in NUMA node 0 access
+    memory in NUMA node 1 with access-latency 10 nanoseconds,
+    access-bandwidth is 100 MB/s. And for memory side cache information,
+    NUMA node 0 and 1 both have 1 level memory cache, size is 10KB,
+    policy is write-back, the cache Line size is 8 bytes:
+
+    ::
+
+        -machine hmat=on \
+        -m 2G \
+        -object memory-backend-ram,size=1G,id=m0 \
+        -object memory-backend-ram,size=1G,id=m1 \
+        -smp 2 \
+        -numa node,nodeid=0,memdev=m0 \
+        -numa node,nodeid=1,memdev=m1,initiator=0 \
+        -numa cpu,node-id=0,socket-id=0 \
+        -numa cpu,node-id=0,socket-id=1 \
+        -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
+        -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
+        -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
+        -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
+        -numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
+        -numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
+ERST
+
+DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
+    "-add-fd fd=fd,set=set[,opaque=opaque]\n"
+    "                Add 'fd' to fd 'set'\n", QEMU_ARCH_ALL)
+SRST
+``-add-fd fd=fd,set=set[,opaque=opaque]``
+    Add a file descriptor to an fd set. Valid options are:
+
+    ``fd=fd``
+        This option defines the file descriptor of which a duplicate is
+        added to fd set. The file descriptor cannot be stdin, stdout, or
+        stderr.
+
+    ``set=set``
+        This option defines the ID of the fd set to add the file
+        descriptor to.
+
+    ``opaque=opaque``
+        This option defines a free-form string that can be used to
+        describe fd.
+
+    You can open an image using pre-opened file descriptors from an fd
+    set:
+
+    .. parsed-literal::
+
+        |qemu_system| \\
+         -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" \\
+         -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" \\
+         -drive file=/dev/fdset/2,index=0,media=disk
+ERST
+
+DEF("set", HAS_ARG, QEMU_OPTION_set,
+    "-set group.id.arg=value\n"
+    "                set  parameter for item  of type \n"
+    "                i.e. -set drive.$id.file=/path/to/image\n", QEMU_ARCH_ALL)
+SRST
+``-set group.id.arg=value``
+    Set parameter arg for item id of type group
+ERST
+
+DEF("global", HAS_ARG, QEMU_OPTION_global,
+    "-global driver.property=value\n"
+    "-global driver=driver,property=property,value=value\n"
+    "                set a global default for a driver property\n",
+    QEMU_ARCH_ALL)
+SRST
+``-global driver.prop=value``
+  \ 
+``-global driver=driver,property=property,value=value``
+    Set default value of driver's property prop to value, e.g.:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -global ide-hd.physical_block_size=4096 disk-image.img
+
+    In particular, you can use this to set driver properties for devices
+    which are created automatically by the machine model. To create a
+    device which is not created automatically and set properties on it,
+    use -``device``.
+
+    -global driver.prop=value is shorthand for -global
+    driver=driver,property=prop,value=value. The longhand syntax works
+    even when driver contains a dot.
+ERST
+
+DEF("boot", HAS_ARG, QEMU_OPTION_boot,
+    "-boot [order=drives][,once=drives][,menu=on|off]\n"
+    "      [,splash=sp_name][,splash-time=sp_time][,reboot-timeout=rb_time][,strict=on|off]\n"
+    "                'drives': floppy (a), hard disk (c), CD-ROM (d), network (n)\n"
+    "                'sp_name': the file's name that would be passed to bios as logo picture, if menu=on\n"
+    "                'sp_time': the period that splash picture last if menu=on, unit is ms\n"
+    "                'rb_timeout': the timeout before guest reboot when boot failed, unit is ms\n",
+    QEMU_ARCH_ALL)
+SRST
+``-boot [order=drives][,once=drives][,menu=on|off][,splash=sp_name][,splash-time=sp_time][,reboot-timeout=rb_timeout][,strict=on|off]``
+    Specify boot order drives as a string of drive letters. Valid drive
+    letters depend on the target architecture. The x86 PC uses: a, b
+    (floppy 1 and 2), c (first hard disk), d (first CD-ROM), n-p
+    (Etherboot from network adapter 1-4), hard disk boot is the default.
+    To apply a particular boot order only on the first startup, specify
+    it via ``once``. Note that the ``order`` or ``once`` parameter
+    should not be used together with the ``bootindex`` property of
+    devices, since the firmware implementations normally do not support
+    both at the same time.
+
+    Interactive boot menus/prompts can be enabled via ``menu=on`` as far
+    as firmware/BIOS supports them. The default is non-interactive boot.
+
+    A splash picture could be passed to bios, enabling user to show it
+    as logo, when option splash=sp\_name is given and menu=on, If
+    firmware/BIOS supports them. Currently Seabios for X86 system
+    support it. limitation: The splash file could be a jpeg file or a
+    BMP file in 24 BPP format(true color). The resolution should be
+    supported by the SVGA mode, so the recommended is 320x240, 640x480,
+    800x640.
+
+    A timeout could be passed to bios, guest will pause for rb\_timeout
+    ms when boot failed, then reboot. If rb\_timeout is '-1', guest will
+    not reboot, qemu passes '-1' to bios by default. Currently Seabios
+    for X86 system support it.
+
+    Do strict boot via ``strict=on`` as far as firmware/BIOS supports
+    it. This only effects when boot priority is changed by bootindex
+    options. The default is non-strict boot.
+
+    .. parsed-literal::
+
+        # try to boot from network first, then from hard disk
+        |qemu_system_x86| -boot order=nc
+        # boot from CD-ROM first, switch back to default order after reboot
+        |qemu_system_x86| -boot once=d
+        # boot with a splash picture for 5 seconds.
+        |qemu_system_x86| -boot menu=on,splash=/root/boot.bmp,splash-time=5000
+
+    Note: The legacy format '-boot drives' is still supported but its
+    use is discouraged as it may be removed from future versions.
+ERST
+
+DEF("m", HAS_ARG, QEMU_OPTION_m,
+    "-m [size=]megs[,slots=n,maxmem=size]\n"
+    "                configure guest RAM\n"
+    "                size: initial amount of guest memory\n"
+    "                slots: number of hotplug slots (default: none)\n"
+    "                maxmem: maximum amount of guest memory (default: none)\n"
+    "NOTE: Some architectures might enforce a specific granularity\n",
+    QEMU_ARCH_ALL)
+SRST
+``-m [size=]megs[,slots=n,maxmem=size]``
+    Sets guest startup RAM size to megs megabytes. Default is 128 MiB.
+    Optionally, a suffix of "M" or "G" can be used to signify a value in
+    megabytes or gigabytes respectively. Optional pair slots, maxmem
+    could be used to set amount of hotpluggable memory slots and maximum
+    amount of memory. Note that maxmem must be aligned to the page size.
+
+    For example, the following command-line sets the guest startup RAM
+    size to 1GB, creates 3 slots to hotplug additional memory and sets
+    the maximum memory the guest can reach to 4GB:
+
+    .. parsed-literal::
+
+        |qemu_system| -m 1G,slots=3,maxmem=4G
+
+    If slots and maxmem are not specified, memory hotplug won't be
+    enabled and the guest startup RAM will never increase.
+ERST
+
+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
+    "-mem-path FILE  provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
+SRST
+``-mem-path path``
+    Allocate guest RAM from a temporarily created file in path.
+ERST
+
+DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
+    "-mem-prealloc   preallocate guest memory (use with -mem-path)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-mem-prealloc``
+    Preallocate memory when using -mem-path.
+ERST
+
+DEF("k", HAS_ARG, QEMU_OPTION_k,
+    "-k language     use keyboard layout (for example 'fr' for French)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-k language``
+    Use keyboard layout language (for example ``fr`` for French). This
+    option is only needed where it is not easy to get raw PC keycodes
+    (e.g. on Macs, with some X11 servers or with a VNC or curses
+    display). You don't normally need to use it on PC/Linux or
+    PC/Windows hosts.
+
+    The available layouts are:
+
+    ::
+
+        ar  de-ch  es  fo     fr-ca  hu  ja  mk     no  pt-br  sv
+        da  en-gb  et  fr     fr-ch  is  lt  nl     pl  ru     th
+        de  en-us  fi  fr-be  hr     it  lv  nl-be  pt  sl     tr
+
+    The default is ``en-us``.
+ERST
+
+
+HXCOMM Deprecated by -audiodev
+DEF("audio-help", 0, QEMU_OPTION_audio_help,
+    "-audio-help     show -audiodev equivalent of the currently specified audio settings\n",
+    QEMU_ARCH_ALL)
+SRST
+``-audio-help``
+    Will show the -audiodev equivalent of the currently specified
+    (deprecated) environment variables.
+ERST
+
+DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev,
+    "-audiodev [driver=]driver,id=id[,prop[=value][,...]]\n"
+    "                specifies the audio backend to use\n"
+    "                id= identifier of the backend\n"
+    "                timer-period= timer period in microseconds\n"
+    "                in|out.mixing-engine= use mixing engine to mix streams inside QEMU\n"
+    "                in|out.fixed-settings= use fixed settings for host audio\n"
+    "                in|out.frequency= frequency to use with fixed settings\n"
+    "                in|out.channels= number of channels to use with fixed settings\n"
+    "                in|out.format= sample format to use with fixed settings\n"
+    "                valid values: s8, s16, s32, u8, u16, u32, f32\n"
+    "                in|out.voices= number of voices to use\n"
+    "                in|out.buffer-length= length of buffer in microseconds\n"
+    "-audiodev none,id=id,[,prop[=value][,...]]\n"
+    "                dummy driver that discards all output\n"
+#ifdef CONFIG_AUDIO_ALSA
+    "-audiodev alsa,id=id[,prop[=value][,...]]\n"
+    "                in|out.dev= name of the audio device to use\n"
+    "                in|out.period-length= length of period in microseconds\n"
+    "                in|out.try-poll= attempt to use poll mode\n"
+    "                threshold= threshold (in microseconds) when playback starts\n"
+#endif
+#ifdef CONFIG_AUDIO_COREAUDIO
+    "-audiodev coreaudio,id=id[,prop[=value][,...]]\n"
+    "                in|out.buffer-count= number of buffers\n"
+#endif
+#ifdef CONFIG_AUDIO_DSOUND
+    "-audiodev dsound,id=id[,prop[=value][,...]]\n"
+    "                latency= add extra latency to playback in microseconds\n"
+#endif
+#ifdef CONFIG_AUDIO_OSS
+    "-audiodev oss,id=id[,prop[=value][,...]]\n"
+    "                in|out.dev= path of the audio device to use\n"
+    "                in|out.buffer-count= number of buffers\n"
+    "                in|out.try-poll= attempt to use poll mode\n"
+    "                try-mmap= try using memory mapped access\n"
+    "                exclusive= open device in exclusive mode\n"
+    "                dsp-policy= set timing policy (0..10), -1 to use fragment mode\n"
+#endif
+#ifdef CONFIG_AUDIO_PA
+    "-audiodev pa,id=id[,prop[=value][,...]]\n"
+    "                server= PulseAudio server address\n"
+    "                in|out.name= source/sink device name\n"
+    "                in|out.latency= desired latency in microseconds\n"
+#endif
+#ifdef CONFIG_AUDIO_SDL
+    "-audiodev sdl,id=id[,prop[=value][,...]]\n"
+#endif
+#ifdef CONFIG_SPICE
+    "-audiodev spice,id=id[,prop[=value][,...]]\n"
+#endif
+    "-audiodev wav,id=id[,prop[=value][,...]]\n"
+    "                path= path of wav file to record\n",
+    QEMU_ARCH_ALL)
+SRST
+``-audiodev [driver=]driver,id=id[,prop[=value][,...]]``
+    Adds a new audio backend driver identified by id. There are global
+    and driver specific properties. Some values can be set differently
+    for input and output, they're marked with ``in|out.``. You can set
+    the input's property with ``in.prop`` and the output's property with
+    ``out.prop``. For example:
+
+    ::
+
+        -audiodev alsa,id=example,in.frequency=44110,out.frequency=8000
+        -audiodev alsa,id=example,out.channels=1 # leaves in.channels unspecified
+
+    NOTE: parameter validation is known to be incomplete, in many cases
+    specifying an invalid option causes QEMU to print an error message
+    and continue emulation without sound.
+
+    Valid global options are:
+
+    ``id=identifier``
+        Identifies the audio backend.
+
+    ``timer-period=period``
+        Sets the timer period used by the audio subsystem in
+        microseconds. Default is 10000 (10 ms).
+
+    ``in|out.mixing-engine=on|off``
+        Use QEMU's mixing engine to mix all streams inside QEMU and
+        convert audio formats when not supported by the backend. When
+        off, fixed-settings must be off too. Note that disabling this
+        option means that the selected backend must support multiple
+        streams and the audio formats used by the virtual cards,
+        otherwise you'll get no sound. It's not recommended to disable
+        this option unless you want to use 5.1 or 7.1 audio, as mixing
+        engine only supports mono and stereo audio. Default is on.
+
+    ``in|out.fixed-settings=on|off``
+        Use fixed settings for host audio. When off, it will change
+        based on how the guest opens the sound card. In this case you
+        must not specify frequency, channels or format. Default is on.
+
+    ``in|out.frequency=frequency``
+        Specify the frequency to use when using fixed-settings. Default
+        is 44100Hz.
+
+    ``in|out.channels=channels``
+        Specify the number of channels to use when using fixed-settings.
+        Default is 2 (stereo).
+
+    ``in|out.format=format``
+        Specify the sample format to use when using fixed-settings.
+        Valid values are: ``s8``, ``s16``, ``s32``, ``u8``, ``u16``,
+        ``u32``, ``f32``. Default is ``s16``.
+
+    ``in|out.voices=voices``
+        Specify the number of voices to use. Default is 1.
+
+    ``in|out.buffer-length=usecs``
+        Sets the size of the buffer in microseconds.
+
+``-audiodev none,id=id[,prop[=value][,...]]``
+    Creates a dummy backend that discards all outputs. This backend has
+    no backend specific properties.
+
+``-audiodev alsa,id=id[,prop[=value][,...]]``
+    Creates backend using the ALSA. This backend is only available on
+    Linux.
+
+    ALSA specific options are:
+
+    ``in|out.dev=device``
+        Specify the ALSA device to use for input and/or output. Default
+        is ``default``.
+
+    ``in|out.period-length=usecs``
+        Sets the period length in microseconds.
+
+    ``in|out.try-poll=on|off``
+        Attempt to use poll mode with the device. Default is on.
+
+    ``threshold=threshold``
+        Threshold (in microseconds) when playback starts. Default is 0.
+
+``-audiodev coreaudio,id=id[,prop[=value][,...]]``
+    Creates a backend using Apple's Core Audio. This backend is only
+    available on Mac OS and only supports playback.
+
+    Core Audio specific options are:
+
+    ``in|out.buffer-count=count``
+        Sets the count of the buffers.
+
+``-audiodev dsound,id=id[,prop[=value][,...]]``
+    Creates a backend using Microsoft's DirectSound. This backend is
+    only available on Windows and only supports playback.
+
+    DirectSound specific options are:
+
+    ``latency=usecs``
+        Add extra usecs microseconds latency to playback. Default is
+        10000 (10 ms).
+
+``-audiodev oss,id=id[,prop[=value][,...]]``
+    Creates a backend using OSS. This backend is available on most
+    Unix-like systems.
+
+    OSS specific options are:
+
+    ``in|out.dev=device``
+        Specify the file name of the OSS device to use. Default is
+        ``/dev/dsp``.
+
+    ``in|out.buffer-count=count``
+        Sets the count of the buffers.
+
+    ``in|out.try-poll=on|of``
+        Attempt to use poll mode with the device. Default is on.
+
+    ``try-mmap=on|off``
+        Try using memory mapped device access. Default is off.
+
+    ``exclusive=on|off``
+        Open the device in exclusive mode (vmix won't work in this
+        case). Default is off.
+
+    ``dsp-policy=policy``
+        Sets the timing policy (between 0 and 10, where smaller number
+        means smaller latency but higher CPU usage). Use -1 to use
+        buffer sizes specified by ``buffer`` and ``buffer-count``. This
+        option is ignored if you do not have OSS 4. Default is 5.
+
+``-audiodev pa,id=id[,prop[=value][,...]]``
+    Creates a backend using PulseAudio. This backend is available on
+    most systems.
+
+    PulseAudio specific options are:
+
+    ``server=server``
+        Sets the PulseAudio server to connect to.
+
+    ``in|out.name=sink``
+        Use the specified source/sink for recording/playback.
+
+    ``in|out.latency=usecs``
+        Desired latency in microseconds. The PulseAudio server will try
+        to honor this value but actual latencies may be lower or higher.
+
+``-audiodev sdl,id=id[,prop[=value][,...]]``
+    Creates a backend using SDL. This backend is available on most
+    systems, but you should use your platform's native backend if
+    possible. This backend has no backend specific properties.
+
+``-audiodev spice,id=id[,prop[=value][,...]]``
+    Creates a backend that sends audio through SPICE. This backend
+    requires ``-spice`` and automatically selected in that case, so
+    usually you can ignore this option. This backend has no backend
+    specific properties.
+
+``-audiodev wav,id=id[,prop[=value][,...]]``
+    Creates a backend that writes audio to a WAV file.
+
+    Backend specific options are:
+
+    ``path=path``
+        Write recorded audio into the specified file. Default is
+        ``qemu.wav``.
+ERST
+
+DEF("soundhw", HAS_ARG, QEMU_OPTION_soundhw,
+    "-soundhw c1,... enable audio support\n"
+    "                and only specified sound cards (comma separated list)\n"
+    "                use '-soundhw help' to get the list of supported cards\n"
+    "                use '-soundhw all' to enable all of them\n", QEMU_ARCH_ALL)
+SRST
+``-soundhw card1[,card2,...] or -soundhw all``
+    Enable audio and selected sound hardware. Use 'help' to print all
+    available sound hardware. For example:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -soundhw sb16,adlib disk.img
+        |qemu_system_x86| -soundhw es1370 disk.img
+        |qemu_system_x86| -soundhw ac97 disk.img
+        |qemu_system_x86| -soundhw hda disk.img
+        |qemu_system_x86| -soundhw all disk.img
+        |qemu_system_x86| -soundhw help
+
+    Note that Linux's i810\_audio OSS kernel (for AC97) module might
+    require manually specifying clocking.
+
+    ::
+
+        modprobe i810_audio clocking=48000
+ERST
+
+DEF("device", HAS_ARG, QEMU_OPTION_device,
+    "-device driver[,prop[=value][,...]]\n"
+    "                add device (based on driver)\n"
+    "                prop=value,... sets driver properties\n"
+    "                use '-device help' to print all possible drivers\n"
+    "                use '-device driver,help' to print all possible properties\n",
+    QEMU_ARCH_ALL)
+SRST
+``-device driver[,prop[=value][,...]]``
+    Add device driver. prop=value sets driver properties. Valid
+    properties depend on the driver. To get help on possible drivers and
+    properties, use ``-device help`` and ``-device driver,help``.
+
+    Some drivers are:
+
+``-device ipmi-bmc-sim,id=id[,prop[=value][,...]]``
+    Add an IPMI BMC. This is a simulation of a hardware management
+    interface processor that normally sits on a system. It provides a
+    watchdog and the ability to reset and power control the system. You
+    need to connect this to an IPMI interface to make it useful
+
+    The IPMI slave address to use for the BMC. The default is 0x20. This
+    address is the BMC's address on the I2C network of management
+    controllers. If you don't know what this means, it is safe to ignore
+    it.
+
+    ``id=id``
+        The BMC id for interfaces to use this device.
+
+    ``slave_addr=val``
+        Define slave address to use for the BMC. The default is 0x20.
+
+    ``sdrfile=file``
+        file containing raw Sensor Data Records (SDR) data. The default
+        is none.
+
+    ``fruareasize=val``
+        size of a Field Replaceable Unit (FRU) area. The default is
+        1024.
+
+    ``frudatafile=file``
+        file containing raw Field Replaceable Unit (FRU) inventory data.
+        The default is none.
+
+    ``guid=uuid``
+        value for the GUID for the BMC, in standard UUID format. If this
+        is set, get "Get GUID" command to the BMC will return it.
+        Otherwise "Get GUID" will return an error.
+
+``-device ipmi-bmc-extern,id=id,chardev=id[,slave_addr=val]``
+    Add a connection to an external IPMI BMC simulator. Instead of
+    locally emulating the BMC like the above item, instead connect to an
+    external entity that provides the IPMI services.
+
+    A connection is made to an external BMC simulator. If you do this,
+    it is strongly recommended that you use the "reconnect=" chardev
+    option to reconnect to the simulator if the connection is lost. Note
+    that if this is not used carefully, it can be a security issue, as
+    the interface has the ability to send resets, NMIs, and power off
+    the VM. It's best if QEMU makes a connection to an external
+    simulator running on a secure port on localhost, so neither the
+    simulator nor QEMU is exposed to any outside network.
+
+    See the "lanserv/README.vm" file in the OpenIPMI library for more
+    details on the external interface.
+
+``-device isa-ipmi-kcs,bmc=id[,ioport=val][,irq=val]``
+    Add a KCS IPMI interafce on the ISA bus. This also adds a
+    corresponding ACPI and SMBIOS entries, if appropriate.
+
+    ``bmc=id``
+        The BMC to connect to, one of ipmi-bmc-sim or ipmi-bmc-extern
+        above.
+
+    ``ioport=val``
+        Define the I/O address of the interface. The default is 0xca0
+        for KCS.
+
+    ``irq=val``
+        Define the interrupt to use. The default is 5. To disable
+        interrupts, set this to 0.
+
+``-device isa-ipmi-bt,bmc=id[,ioport=val][,irq=val]``
+    Like the KCS interface, but defines a BT interface. The default port
+    is 0xe4 and the default interrupt is 5.
+
+``-device pci-ipmi-kcs,bmc=id``
+    Add a KCS IPMI interafce on the PCI bus.
+
+    ``bmc=id``
+        The BMC to connect to, one of ipmi-bmc-sim or ipmi-bmc-extern above.
+
+``-device pci-ipmi-bt,bmc=id``
+    Like the KCS interface, but defines a BT interface on the PCI bus.
+ERST
+
+DEF("name", HAS_ARG, QEMU_OPTION_name,
+    "-name string1[,process=string2][,debug-threads=on|off]\n"
+    "                set the name of the guest\n"
+    "                string1 sets the window title and string2 the process name\n"
+    "                When debug-threads is enabled, individual threads are given a separate name\n"
+    "                NOTE: The thread names are for debugging and not a stable API.\n",
+    QEMU_ARCH_ALL)
+SRST
+``-name name``
+    Sets the name of the guest. This name will be displayed in the SDL
+    window caption. The name will also be used for the VNC server. Also
+    optionally set the top visible process name in Linux. Naming of
+    individual threads can also be enabled on Linux to aid debugging.
+ERST
+
+DEF("uuid", HAS_ARG, QEMU_OPTION_uuid,
+    "-uuid %08x-%04x-%04x-%04x-%012x\n"
+    "                specify machine UUID\n", QEMU_ARCH_ALL)
+SRST
+``-uuid uuid``
+    Set system UUID.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Block device options:)
+
+DEF("fda", HAS_ARG, QEMU_OPTION_fda,
+    "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
+DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
+SRST
+``-fda file``
+  \
+``-fdb file``
+    Use file as floppy disk 0/1 image (see the :ref:`disk images` chapter in
+    the System Emulation Users Guide).
+ERST
+
+DEF("hda", HAS_ARG, QEMU_OPTION_hda,
+    "-hda/-hdb file  use 'file' as IDE hard disk 0/1 image\n", QEMU_ARCH_ALL)
+DEF("hdb", HAS_ARG, QEMU_OPTION_hdb, "", QEMU_ARCH_ALL)
+DEF("hdc", HAS_ARG, QEMU_OPTION_hdc,
+    "-hdc/-hdd file  use 'file' as IDE hard disk 2/3 image\n", QEMU_ARCH_ALL)
+DEF("hdd", HAS_ARG, QEMU_OPTION_hdd, "", QEMU_ARCH_ALL)
+SRST
+``-hda file``
+  \
+``-hdb file``
+  \ 
+``-hdc file``
+  \ 
+``-hdd file``
+    Use file as hard disk 0, 1, 2 or 3 image (see the :ref:`disk images`
+    chapter in the System Emulation Users Guide).
+ERST
+
+DEF("cdrom", HAS_ARG, QEMU_OPTION_cdrom,
+    "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-cdrom file``
+    Use file as CD-ROM image (you cannot use ``-hdc`` and ``-cdrom`` at
+    the same time). You can use the host CD-ROM by using ``/dev/cdrom``
+    as filename.
+ERST
+
+DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev,
+    "-blockdev [driver=]driver[,node-name=N][,discard=ignore|unmap]\n"
+    "          [,cache.direct=on|off][,cache.no-flush=on|off]\n"
+    "          [,read-only=on|off][,auto-read-only=on|off]\n"
+    "          [,force-share=on|off][,detect-zeroes=on|off|unmap]\n"
+    "          [,driver specific parameters...]\n"
+    "                configure a block backend\n", QEMU_ARCH_ALL)
+SRST
+``-blockdev option[,option[,option[,...]]]``
+    Define a new block driver node. Some of the options apply to all
+    block drivers, other options are only accepted for a specific block
+    driver. See below for a list of generic options and options for the
+    most common block drivers.
+
+    Options that expect a reference to another node (e.g. ``file``) can
+    be given in two ways. Either you specify the node name of an already
+    existing node (file=node-name), or you define a new node inline,
+    adding options for the referenced node after a dot
+    (file.filename=path,file.aio=native).
+
+    A block driver node created with ``-blockdev`` can be used for a
+    guest device by specifying its node name for the ``drive`` property
+    in a ``-device`` argument that defines a block device.
+
+    ``Valid options for any block driver node:``
+        ``driver``
+            Specifies the block driver to use for the given node.
+
+        ``node-name``
+            This defines the name of the block driver node by which it
+            will be referenced later. The name must be unique, i.e. it
+            must not match the name of a different block driver node, or
+            (if you use ``-drive`` as well) the ID of a drive.
+
+            If no node name is specified, it is automatically generated.
+            The generated node name is not intended to be predictable
+            and changes between QEMU invocations. For the top level, an
+            explicit node name must be specified.
+
+        ``read-only``
+            Open the node read-only. Guest write attempts will fail.
+
+            Note that some block drivers support only read-only access,
+            either generally or in certain configurations. In this case,
+            the default value ``read-only=off`` does not work and the
+            option must be specified explicitly.
+
+        ``auto-read-only``
+            If ``auto-read-only=on`` is set, QEMU may fall back to
+            read-only usage even when ``read-only=off`` is requested, or
+            even switch between modes as needed, e.g. depending on
+            whether the image file is writable or whether a writing user
+            is attached to the node.
+
+        ``force-share``
+            Override the image locking system of QEMU by forcing the
+            node to utilize weaker shared access for permissions where
+            it would normally request exclusive access. When there is
+            the potential for multiple instances to have the same file
+            open (whether this invocation of QEMU is the first or the
+            second instance), both instances must permit shared access
+            for the second instance to succeed at opening the file.
+
+            Enabling ``force-share=on`` requires ``read-only=on``.
+
+        ``cache.direct``
+            The host page cache can be avoided with ``cache.direct=on``.
+            This will attempt to do disk IO directly to the guest's
+            memory. QEMU may still perform an internal copy of the data.
+
+        ``cache.no-flush``
+            In case you don't care about data integrity over host
+            failures, you can use ``cache.no-flush=on``. This option
+            tells QEMU that it never needs to write any data to the disk
+            but can instead keep things in cache. If anything goes
+            wrong, like your host losing power, the disk storage getting
+            disconnected accidentally, etc. your image will most
+            probably be rendered unusable.
+
+        ``discard=discard``
+            discard is one of "ignore" (or "off") or "unmap" (or "on")
+            and controls whether ``discard`` (also known as ``trim`` or
+            ``unmap``) requests are ignored or passed to the filesystem.
+            Some machine types may not support discard requests.
+
+        ``detect-zeroes=detect-zeroes``
+            detect-zeroes is "off", "on" or "unmap" and enables the
+            automatic conversion of plain zero writes by the OS to
+            driver specific optimized zero write commands. You may even
+            choose "unmap" if discard is set to "unmap" to allow a zero
+            write to be converted to an ``unmap`` operation.
+
+    ``Driver-specific options for file``
+        This is the protocol-level block driver for accessing regular
+        files.
+
+        ``filename``
+            The path to the image file in the local filesystem
+
+        ``aio``
+            Specifies the AIO backend (threads/native/io_uring,
+            default: threads)
+
+        ``locking``
+            Specifies whether the image file is protected with Linux OFD
+            / POSIX locks. The default is to use the Linux Open File
+            Descriptor API if available, otherwise no lock is applied.
+            (auto/on/off, default: auto)
+
+        Example:
+
+        ::
+
+            -blockdev driver=file,node-name=disk,filename=disk.img
+
+    ``Driver-specific options for raw``
+        This is the image format block driver for raw images. It is
+        usually stacked on top of a protocol level block driver such as
+        ``file``.
+
+        ``file``
+            Reference to or definition of the data source block driver
+            node (e.g. a ``file`` driver node)
+
+        Example 1:
+
+        ::
+
+            -blockdev driver=file,node-name=disk_file,filename=disk.img
+            -blockdev driver=raw,node-name=disk,file=disk_file
+
+        Example 2:
+
+        ::
+
+            -blockdev driver=raw,node-name=disk,file.driver=file,file.filename=disk.img
+
+    ``Driver-specific options for qcow2``
+        This is the image format block driver for qcow2 images. It is
+        usually stacked on top of a protocol level block driver such as
+        ``file``.
+
+        ``file``
+            Reference to or definition of the data source block driver
+            node (e.g. a ``file`` driver node)
+
+        ``backing``
+            Reference to or definition of the backing file block device
+            (default is taken from the image file). It is allowed to
+            pass ``null`` here in order to disable the default backing
+            file.
+
+        ``lazy-refcounts``
+            Whether to enable the lazy refcounts feature (on/off;
+            default is taken from the image file)
+
+        ``cache-size``
+            The maximum total size of the L2 table and refcount block
+            caches in bytes (default: the sum of l2-cache-size and
+            refcount-cache-size)
+
+        ``l2-cache-size``
+            The maximum size of the L2 table cache in bytes (default: if
+            cache-size is not specified - 32M on Linux platforms, and 8M
+            on non-Linux platforms; otherwise, as large as possible
+            within the cache-size, while permitting the requested or the
+            minimal refcount cache size)
+
+        ``refcount-cache-size``
+            The maximum size of the refcount block cache in bytes
+            (default: 4 times the cluster size; or if cache-size is
+            specified, the part of it which is not used for the L2
+            cache)
+
+        ``cache-clean-interval``
+            Clean unused entries in the L2 and refcount caches. The
+            interval is in seconds. The default value is 600 on
+            supporting platforms, and 0 on other platforms. Setting it
+            to 0 disables this feature.
+
+        ``pass-discard-request``
+            Whether discard requests to the qcow2 device should be
+            forwarded to the data source (on/off; default: on if
+            discard=unmap is specified, off otherwise)
+
+        ``pass-discard-snapshot``
+            Whether discard requests for the data source should be
+            issued when a snapshot operation (e.g. deleting a snapshot)
+            frees clusters in the qcow2 file (on/off; default: on)
+
+        ``pass-discard-other``
+            Whether discard requests for the data source should be
+            issued on other occasions where a cluster gets freed
+            (on/off; default: off)
+
+        ``overlap-check``
+            Which overlap checks to perform for writes to the image
+            (none/constant/cached/all; default: cached). For details or
+            finer granularity control refer to the QAPI documentation of
+            ``blockdev-add``.
+
+        Example 1:
+
+        ::
+
+            -blockdev driver=file,node-name=my_file,filename=/tmp/disk.qcow2
+            -blockdev driver=qcow2,node-name=hda,file=my_file,overlap-check=none,cache-size=16777216
+
+        Example 2:
+
+        ::
+
+            -blockdev driver=qcow2,node-name=disk,file.driver=http,file.filename=http://example.com/image.qcow2
+
+    ``Driver-specific options for other drivers``
+        Please refer to the QAPI documentation of the ``blockdev-add``
+        QMP command.
+ERST
+
+DEF("drive", HAS_ARG, QEMU_OPTION_drive,
+    "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
+    "       [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
+    "       [,snapshot=on|off][,rerror=ignore|stop|report]\n"
+    "       [,werror=ignore|stop|report|enospc][,id=name]\n"
+    "       [,aio=threads|native|io_uring]\n"
+    "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
+    "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
+    "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
+    "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
+    "       [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n"
+    "       [[,iops_size=is]]\n"
+    "       [[,group=g]]\n"
+    "                use 'file' as a drive image\n", QEMU_ARCH_ALL)
+SRST
+``-drive option[,option[,option[,...]]]``
+    Define a new drive. This includes creating a block driver node (the
+    backend) as well as a guest device, and is mostly a shortcut for
+    defining the corresponding ``-blockdev`` and ``-device`` options.
+
+    ``-drive`` accepts all options that are accepted by ``-blockdev``.
+    In addition, it knows the following options:
+
+    ``file=file``
+        This option defines which disk image (see the :ref:`disk images`
+        chapter in the System Emulation Users Guide) to use with this drive.
+        If the filename contains comma, you must double it (for instance,
+        "file=my,,file" to use file "my,file").
+
+        Special files such as iSCSI devices can be specified using
+        protocol specific URLs. See the section for "Device URL Syntax"
+        for more information.
+
+    ``if=interface``
+        This option defines on which type on interface the drive is
+        connected. Available types are: ide, scsi, sd, mtd, floppy,
+        pflash, virtio, none.
+
+    ``bus=bus,unit=unit``
+        These options define where is connected the drive by defining
+        the bus number and the unit id.
+
+    ``index=index``
+        This option defines where is connected the drive by using an
+        index in the list of available connectors of a given interface
+        type.
+
+    ``media=media``
+        This option defines the type of the media: disk or cdrom.
+
+    ``snapshot=snapshot``
+        snapshot is "on" or "off" and controls snapshot mode for the
+        given drive (see ``-snapshot``).
+
+    ``cache=cache``
+        cache is "none", "writeback", "unsafe", "directsync" or
+        "writethrough" and controls how the host cache is used to access
+        block data. This is a shortcut that sets the ``cache.direct``
+        and ``cache.no-flush`` options (as in ``-blockdev``), and
+        additionally ``cache.writeback``, which provides a default for
+        the ``write-cache`` option of block guest devices (as in
+        ``-device``). The modes correspond to the following settings:
+
+        =============  ===============   ============   ==============
+        \              cache.writeback   cache.direct   cache.no-flush
+        =============  ===============   ============   ==============
+        writeback      on                off            off
+        none           on                on             off
+        writethrough   off               off            off
+        directsync     off               on             off
+        unsafe         on                off            on
+        =============  ===============   ============   ==============
+
+        The default mode is ``cache=writeback``.
+
+    ``aio=aio``
+        aio is "threads", "native", or "io_uring" and selects between pthread
+        based disk I/O, native Linux AIO, or Linux io_uring API.
+
+    ``format=format``
+        Specify which disk format will be used rather than detecting the
+        format. Can be used to specify format=raw to avoid interpreting
+        an untrusted format header.
+
+    ``werror=action,rerror=action``
+        Specify which action to take on write and read errors. Valid
+        actions are: "ignore" (ignore the error and try to continue),
+        "stop" (pause QEMU), "report" (report the error to the guest),
+        "enospc" (pause QEMU only if the host disk is full; report the
+        error to the guest otherwise). The default setting is
+        ``werror=enospc`` and ``rerror=report``.
+
+    ``copy-on-read=copy-on-read``
+        copy-on-read is "on" or "off" and enables whether to copy read
+        backing file sectors into the image file.
+
+    ``bps=b,bps_rd=r,bps_wr=w``
+        Specify bandwidth throttling limits in bytes per second, either
+        for all request types or for reads or writes only. Small values
+        can lead to timeouts or hangs inside the guest. A safe minimum
+        for disks is 2 MB/s.
+
+    ``bps_max=bm,bps_rd_max=rm,bps_wr_max=wm``
+        Specify bursts in bytes per second, either for all request types
+        or for reads or writes only. Bursts allow the guest I/O to spike
+        above the limit temporarily.
+
+    ``iops=i,iops_rd=r,iops_wr=w``
+        Specify request rate limits in requests per second, either for
+        all request types or for reads or writes only.
+
+    ``iops_max=bm,iops_rd_max=rm,iops_wr_max=wm``
+        Specify bursts in requests per second, either for all request
+        types or for reads or writes only. Bursts allow the guest I/O to
+        spike above the limit temporarily.
+
+    ``iops_size=is``
+        Let every is bytes of a request count as a new request for iops
+        throttling purposes. Use this option to prevent guests from
+        circumventing iops limits by sending fewer but larger requests.
+
+    ``group=g``
+        Join a throttling quota group with given name g. All drives that
+        are members of the same group are accounted for together. Use
+        this option to prevent guests from circumventing throttling
+        limits by using many small disks instead of a single larger
+        disk.
+
+    By default, the ``cache.writeback=on`` mode is used. It will report
+    data writes as completed as soon as the data is present in the host
+    page cache. This is safe as long as your guest OS makes sure to
+    correctly flush disk caches where needed. If your guest OS does not
+    handle volatile disk write caches correctly and your host crashes or
+    loses power, then the guest may experience data corruption.
+
+    For such guests, you should consider using ``cache.writeback=off``.
+    This means that the host page cache will be used to read and write
+    data, but write notification will be sent to the guest only after
+    QEMU has made sure to flush each write to the disk. Be aware that
+    this has a major impact on performance.
+
+    When using the ``-snapshot`` option, unsafe caching is always used.
+
+    Copy-on-read avoids accessing the same backing file sectors
+    repeatedly and is useful when the backing file is over a slow
+    network. By default copy-on-read is off.
+
+    Instead of ``-cdrom`` you can use:
+
+    .. parsed-literal::
+
+        |qemu_system| -drive file=file,index=2,media=cdrom
+
+    Instead of ``-hda``, ``-hdb``, ``-hdc``, ``-hdd``, you can use:
+
+    .. parsed-literal::
+
+        |qemu_system| -drive file=file,index=0,media=disk
+        |qemu_system| -drive file=file,index=1,media=disk
+        |qemu_system| -drive file=file,index=2,media=disk
+        |qemu_system| -drive file=file,index=3,media=disk
+
+    You can open an image using pre-opened file descriptors from an fd
+    set:
+
+    .. parsed-literal::
+
+        |qemu_system| \\
+         -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" \\
+         -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" \\
+         -drive file=/dev/fdset/2,index=0,media=disk
+
+    You can connect a CDROM to the slave of ide0:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -drive file=file,if=ide,index=1,media=cdrom
+
+    If you don't specify the "file=" argument, you define an empty
+    drive:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -drive if=ide,index=1,media=cdrom
+
+    Instead of ``-fda``, ``-fdb``, you can use:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -drive file=file,index=0,if=floppy
+        |qemu_system_x86| -drive file=file,index=1,if=floppy
+
+    By default, interface is "ide" and index is automatically
+    incremented:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -drive file=a -drive file=b"
+
+    is interpreted like:
+
+    .. parsed-literal::
+
+        |qemu_system_x86| -hda a -hdb b
+ERST
+
+DEF("mtdblock", HAS_ARG, QEMU_OPTION_mtdblock,
+    "-mtdblock file  use 'file' as on-board Flash memory image\n",
+    QEMU_ARCH_ALL)
+SRST
+``-mtdblock file``
+    Use file as on-board Flash memory image.
+ERST
+
+DEF("sd", HAS_ARG, QEMU_OPTION_sd,
+    "-sd file        use 'file' as SecureDigital card image\n", QEMU_ARCH_ALL)
+SRST
+``-sd file``
+    Use file as SecureDigital card image.
+ERST
+
+DEF("pflash", HAS_ARG, QEMU_OPTION_pflash,
+    "-pflash file    use 'file' as a parallel flash image\n", QEMU_ARCH_ALL)
+SRST
+``-pflash file``
+    Use file as a parallel flash image.
+ERST
+
+DEF("snapshot", 0, QEMU_OPTION_snapshot,
+    "-snapshot       write to temporary files instead of disk image files\n",
+    QEMU_ARCH_ALL)
+SRST
+``-snapshot``
+    Write to temporary files instead of disk image files. In this case,
+    the raw disk image you use is not written back. You can however
+    force the write back by pressing C-a s (see the :ref:`disk images`
+    chapter in the System Emulation Users Guide).
+ERST
+
+DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
+    "-fsdev local,id=id,path=path,security_model=mapped-xattr|mapped-file|passthrough|none\n"
+    " [,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n"
+    " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n"
+    " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n"
+    " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n"
+    " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n"
+    " [[,throttling.iops-size=is]]\n"
+    "-fsdev proxy,id=id,socket=socket[,writeout=immediate][,readonly]\n"
+    "-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=immediate][,readonly]\n"
+    "-fsdev synth,id=id\n",
+    QEMU_ARCH_ALL)
+
+SRST
+``-fsdev local,id=id,path=path,security_model=security_model [,writeout=writeout][,readonly][,fmode=fmode][,dmode=dmode] [,throttling.option=value[,throttling.option=value[,...]]]``
+  \ 
+``-fsdev proxy,id=id,socket=socket[,writeout=writeout][,readonly]``
+  \
+``-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=writeout][,readonly]``
+  \
+``-fsdev synth,id=id[,readonly]``
+    Define a new file system device. Valid options are:
+
+    ``local``
+        Accesses to the filesystem are done by QEMU.
+
+    ``proxy``
+        Accesses to the filesystem are done by virtfs-proxy-helper(1).
+
+    ``synth``
+        Synthetic filesystem, only used by QTests.
+
+    ``id=id``
+        Specifies identifier for this device.
+
+    ``path=path``
+        Specifies the export path for the file system device. Files
+        under this path will be available to the 9p client on the guest.
+
+    ``security_model=security_model``
+        Specifies the security model to be used for this export path.
+        Supported security models are "passthrough", "mapped-xattr",
+        "mapped-file" and "none". In "passthrough" security model, files
+        are stored using the same credentials as they are created on the
+        guest. This requires QEMU to run as root. In "mapped-xattr"
+        security model, some of the file attributes like uid, gid, mode
+        bits and link target are stored as file attributes. For
+        "mapped-file" these attributes are stored in the hidden
+        .virtfs\_metadata directory. Directories exported by this
+        security model cannot interact with other unix tools. "none"
+        security model is same as passthrough except the sever won't
+        report failures if it fails to set file attributes like
+        ownership. Security model is mandatory only for local fsdriver.
+        Other fsdrivers (like proxy) don't take security model as a
+        parameter.
+
+    ``writeout=writeout``
+        This is an optional argument. The only supported value is
+        "immediate". This means that host page cache will be used to
+        read and write data but write notification will be sent to the
+        guest only when the data has been reported as written by the
+        storage subsystem.
+
+    ``readonly``
+        Enables exporting 9p share as a readonly mount for guests. By
+        default read-write access is given.
+
+    ``socket=socket``
+        Enables proxy filesystem driver to use passed socket file for
+        communicating with virtfs-proxy-helper(1).
+
+    ``sock_fd=sock_fd``
+        Enables proxy filesystem driver to use passed socket descriptor
+        for communicating with virtfs-proxy-helper(1). Usually a helper
+        like libvirt will create socketpair and pass one of the fds as
+        sock\_fd.
+
+    ``fmode=fmode``
+        Specifies the default mode for newly created files on the host.
+        Works only with security models "mapped-xattr" and
+        "mapped-file".
+
+    ``dmode=dmode``
+        Specifies the default mode for newly created directories on the
+        host. Works only with security models "mapped-xattr" and
+        "mapped-file".
+
+    ``throttling.bps-total=b,throttling.bps-read=r,throttling.bps-write=w``
+        Specify bandwidth throttling limits in bytes per second, either
+        for all request types or for reads or writes only.
+
+    ``throttling.bps-total-max=bm,bps-read-max=rm,bps-write-max=wm``
+        Specify bursts in bytes per second, either for all request types
+        or for reads or writes only. Bursts allow the guest I/O to spike
+        above the limit temporarily.
+
+    ``throttling.iops-total=i,throttling.iops-read=r, throttling.iops-write=w``
+        Specify request rate limits in requests per second, either for
+        all request types or for reads or writes only.
+
+    ``throttling.iops-total-max=im,throttling.iops-read-max=irm, throttling.iops-write-max=iwm``
+        Specify bursts in requests per second, either for all request
+        types or for reads or writes only. Bursts allow the guest I/O to
+        spike above the limit temporarily.
+
+    ``throttling.iops-size=is``
+        Let every is bytes of a request count as a new request for iops
+        throttling purposes.
+
+    -fsdev option is used along with -device driver "virtio-9p-...".
+
+``-device virtio-9p-type,fsdev=id,mount_tag=mount_tag``
+    Options for virtio-9p-... driver are:
+
+    ``type``
+        Specifies the variant to be used. Supported values are "pci",
+        "ccw" or "device", depending on the machine type.
+
+    ``fsdev=id``
+        Specifies the id value specified along with -fsdev option.
+
+    ``mount_tag=mount_tag``
+        Specifies the tag name to be used by the guest to mount this
+        export point.
+ERST
+
+DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
+    "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n"
+    "        [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n"
+    "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n"
+    "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n"
+    "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n",
+    QEMU_ARCH_ALL)
+
+SRST
+``-virtfs local,path=path,mount_tag=mount_tag ,security_model=security_model[,writeout=writeout][,readonly] [,fmode=fmode][,dmode=dmode][,multidevs=multidevs]``
+  \ 
+``-virtfs proxy,socket=socket,mount_tag=mount_tag [,writeout=writeout][,readonly]``
+  \ 
+``-virtfs proxy,sock_fd=sock_fd,mount_tag=mount_tag [,writeout=writeout][,readonly]``
+  \
+``-virtfs synth,mount_tag=mount_tag``
+    Define a new virtual filesystem device and expose it to the guest using
+    a virtio-9p-device (a.k.a. 9pfs), which essentially means that a certain
+    directory on host is made directly accessible by guest as a pass-through
+    file system by using the 9P network protocol for communication between
+    host and guests, if desired even accessible, shared by several guests
+    simultaniously.
+
+    Note that ``-virtfs`` is actually just a convenience shortcut for its
+    generalized form ``-fsdev -device virtio-9p-pci``.
+
+    The general form of pass-through file system options are:
+
+    ``local``
+        Accesses to the filesystem are done by QEMU.
+
+    ``proxy``
+        Accesses to the filesystem are done by virtfs-proxy-helper(1).
+
+    ``synth``
+        Synthetic filesystem, only used by QTests.
+
+    ``id=id``
+        Specifies identifier for the filesystem device
+
+    ``path=path``
+        Specifies the export path for the file system device. Files
+        under this path will be available to the 9p client on the guest.
+
+    ``security_model=security_model``
+        Specifies the security model to be used for this export path.
+        Supported security models are "passthrough", "mapped-xattr",
+        "mapped-file" and "none". In "passthrough" security model, files
+        are stored using the same credentials as they are created on the
+        guest. This requires QEMU to run as root. In "mapped-xattr"
+        security model, some of the file attributes like uid, gid, mode
+        bits and link target are stored as file attributes. For
+        "mapped-file" these attributes are stored in the hidden
+        .virtfs\_metadata directory. Directories exported by this
+        security model cannot interact with other unix tools. "none"
+        security model is same as passthrough except the sever won't
+        report failures if it fails to set file attributes like
+        ownership. Security model is mandatory only for local fsdriver.
+        Other fsdrivers (like proxy) don't take security model as a
+        parameter.
+
+    ``writeout=writeout``
+        This is an optional argument. The only supported value is
+        "immediate". This means that host page cache will be used to
+        read and write data but write notification will be sent to the
+        guest only when the data has been reported as written by the
+        storage subsystem.
+
+    ``readonly``
+        Enables exporting 9p share as a readonly mount for guests. By
+        default read-write access is given.
+
+    ``socket=socket``
+        Enables proxy filesystem driver to use passed socket file for
+        communicating with virtfs-proxy-helper(1). Usually a helper like
+        libvirt will create socketpair and pass one of the fds as
+        sock\_fd.
+
+    ``sock_fd``
+        Enables proxy filesystem driver to use passed 'sock\_fd' as the
+        socket descriptor for interfacing with virtfs-proxy-helper(1).
+
+    ``fmode=fmode``
+        Specifies the default mode for newly created files on the host.
+        Works only with security models "mapped-xattr" and
+        "mapped-file".
+
+    ``dmode=dmode``
+        Specifies the default mode for newly created directories on the
+        host. Works only with security models "mapped-xattr" and
+        "mapped-file".
+
+    ``mount_tag=mount_tag``
+        Specifies the tag name to be used by the guest to mount this
+        export point.
+
+    ``multidevs=multidevs``
+        Specifies how to deal with multiple devices being shared with a
+        9p export. Supported behaviours are either "remap", "forbid" or
+        "warn". The latter is the default behaviour on which virtfs 9p
+        expects only one device to be shared with the same export, and
+        if more than one device is shared and accessed via the same 9p
+        export then only a warning message is logged (once) by qemu on
+        host side. In order to avoid file ID collisions on guest you
+        should either create a separate virtfs export for each device to
+        be shared with guests (recommended way) or you might use "remap"
+        instead which allows you to share multiple devices with only one
+        export instead, which is achieved by remapping the original
+        inode numbers from host to guest in a way that would prevent
+        such collisions. Remapping inodes in such use cases is required
+        because the original device IDs from host are never passed and
+        exposed on guest. Instead all files of an export shared with
+        virtfs always share the same device id on guest. So two files
+        with identical inode numbers but from actually different devices
+        on host would otherwise cause a file ID collision and hence
+        potential misbehaviours on guest. "forbid" on the other hand
+        assumes like "warn" that only one device is shared by the same
+        export, however it will not only log a warning message but also
+        deny access to additional devices on guest. Note though that
+        "forbid" does currently not block all possible file access
+        operations (e.g. readdir() would still return entries from other
+        devices).
+ERST
+
+DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi,
+    "-iscsi [user=user][,password=password]\n"
+    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
+    "       [,initiator-name=initiator-iqn][,id=target-iqn]\n"
+    "       [,timeout=timeout]\n"
+    "                iSCSI session parameters\n", QEMU_ARCH_ALL)
+
+SRST
+``-iscsi``
+    Configure iSCSI session parameters.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(USB options:)
+
+DEF("usb", 0, QEMU_OPTION_usb,
+    "-usb            enable on-board USB host controller (if not enabled by default)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-usb``
+    Enable USB emulation on machine types with an on-board USB host
+    controller (if not enabled by default). Note that on-board USB host
+    controllers may not support USB 3.0. In this case
+    ``-device qemu-xhci`` can be used instead on machines with PCI.
+ERST
+
+DEF("usbdevice", HAS_ARG, QEMU_OPTION_usbdevice,
+    "-usbdevice name add the host or guest USB device 'name'\n",
+    QEMU_ARCH_ALL)
+SRST
+``-usbdevice devname``
+    Add the USB device devname. Note that this option is deprecated,
+    please use ``-device usb-...`` instead. See the chapter about
+    :ref:`Connecting USB devices` in the System Emulation Users Guide.
+
+    ``mouse``
+        Virtual Mouse. This will override the PS/2 mouse emulation when
+        activated.
+
+    ``tablet``
+        Pointer device that uses absolute coordinates (like a
+        touchscreen). This means QEMU is able to report the mouse
+        position without having to grab the mouse. Also overrides the
+        PS/2 mouse emulation when activated.
+
+    ``braille``
+        Braille device. This will use BrlAPI to display the braille
+        output on a real or fake device.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Display options:)
+
+DEF("display", HAS_ARG, QEMU_OPTION_display,
+#if defined(CONFIG_SPICE)
+    "-display spice-app[,gl=on|off]\n"
+#endif
+#if defined(CONFIG_SDL)
+    "-display sdl[,alt_grab=on|off][,ctrl_grab=on|off]\n"
+    "            [,window_close=on|off][,gl=on|core|es|off]\n"
+#endif
+#if defined(CONFIG_GTK)
+    "-display gtk[,grab_on_hover=on|off][,gl=on|off]|\n"
+#endif
+#if defined(CONFIG_VNC)
+    "-display vnc=[,]\n"
+#endif
+#if defined(CONFIG_CURSES)
+    "-display curses[,charset=]\n"
+#endif
+#if defined(CONFIG_OPENGL)
+    "-display egl-headless[,rendernode=]\n"
+#endif
+    "-display none\n"
+    "                select display backend type\n"
+    "                The default display is equivalent to\n                "
+#if defined(CONFIG_GTK)
+            "\"-display gtk\"\n"
+#elif defined(CONFIG_SDL)
+            "\"-display sdl\"\n"
+#elif defined(CONFIG_COCOA)
+            "\"-display cocoa\"\n"
+#elif defined(CONFIG_VNC)
+            "\"-vnc localhost:0,to=99,id=default\"\n"
+#else
+            "\"-display none\"\n"
+#endif
+    , QEMU_ARCH_ALL)
+SRST
+``-display type``
+    Select type of display to use. This option is a replacement for the
+    old style -sdl/-curses/... options. Use ``-display help`` to list
+    the available display types. Valid values for type are
+
+    ``sdl``
+        Display video output via SDL (usually in a separate graphics
+        window; see the SDL documentation for other possibilities).
+
+    ``curses``
+        Display video output via curses. For graphics device models
+        which support a text mode, QEMU can display this output using a
+        curses/ncurses interface. Nothing is displayed when the graphics
+        device is in graphical mode or if the graphics device does not
+        support a text mode. Generally only the VGA device models
+        support text mode. The font charset used by the guest can be
+        specified with the ``charset`` option, for example
+        ``charset=CP850`` for IBM CP850 encoding. The default is
+        ``CP437``.
+
+    ``none``
+        Do not display video output. The guest will still see an
+        emulated graphics card, but its output will not be displayed to
+        the QEMU user. This option differs from the -nographic option in
+        that it only affects what is done with video output; -nographic
+        also changes the destination of the serial and parallel port
+        data.
+
+    ``gtk``
+        Display video output in a GTK window. This interface provides
+        drop-down menus and other UI elements to configure and control
+        the VM during runtime.
+
+    ``vnc``
+        Start a VNC server on display 
+
+    ``egl-headless``
+        Offload all OpenGL operations to a local DRI device. For any
+        graphical display, this display needs to be paired with either
+        VNC or SPICE displays.
+
+    ``spice-app``
+        Start QEMU as a Spice server and launch the default Spice client
+        application. The Spice server will redirect the serial consoles
+        and QEMU monitors. (Since 4.0)
+ERST
+
+DEF("nographic", 0, QEMU_OPTION_nographic,
+    "-nographic      disable graphical output and redirect serial I/Os to console\n",
+    QEMU_ARCH_ALL)
+SRST
+``-nographic``
+    Normally, if QEMU is compiled with graphical window support, it
+    displays output such as guest graphics, guest console, and the QEMU
+    monitor in a window. With this option, you can totally disable
+    graphical output so that QEMU is a simple command line application.
+    The emulated serial port is redirected on the console and muxed with
+    the monitor (unless redirected elsewhere explicitly). Therefore, you
+    can still use QEMU to debug a Linux kernel with a serial console.
+    Use C-a h for help on switching between the console and monitor.
+ERST
+
+DEF("curses", 0, QEMU_OPTION_curses,
+    "-curses         shorthand for -display curses\n",
+    QEMU_ARCH_ALL)
+SRST
+``-curses``
+    Normally, if QEMU is compiled with graphical window support, it
+    displays output such as guest graphics, guest console, and the QEMU
+    monitor in a window. With this option, QEMU can display the VGA
+    output when in text mode using a curses/ncurses interface. Nothing
+    is displayed in graphical mode.
+ERST
+
+DEF("alt-grab", 0, QEMU_OPTION_alt_grab,
+    "-alt-grab       use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-alt-grab``
+    Use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt). Note that
+    this also affects the special keys (for fullscreen, monitor-mode
+    switching, etc).
+ERST
+
+DEF("ctrl-grab", 0, QEMU_OPTION_ctrl_grab,
+    "-ctrl-grab      use Right-Ctrl to grab mouse (instead of Ctrl-Alt)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-ctrl-grab``
+    Use Right-Ctrl to grab mouse (instead of Ctrl-Alt). Note that this
+    also affects the special keys (for fullscreen, monitor-mode
+    switching, etc).
+ERST
+
+DEF("no-quit", 0, QEMU_OPTION_no_quit,
+    "-no-quit        disable SDL window close capability\n", QEMU_ARCH_ALL)
+SRST
+``-no-quit``
+    Disable SDL window close capability.
+ERST
+
+DEF("sdl", 0, QEMU_OPTION_sdl,
+    "-sdl            shorthand for -display sdl\n", QEMU_ARCH_ALL)
+SRST
+``-sdl``
+    Enable SDL.
+ERST
+
+DEF("spice", HAS_ARG, QEMU_OPTION_spice,
+    "-spice [port=port][,tls-port=secured-port][,x509-dir=]\n"
+    "       [,x509-key-file=][,x509-key-password=]\n"
+    "       [,x509-cert-file=][,x509-cacert-file=]\n"
+    "       [,x509-dh-key-file=][,addr=addr][,ipv4|ipv6|unix]\n"
+    "       [,tls-ciphers=]\n"
+    "       [,tls-channel=[main|display|cursor|inputs|record|playback]]\n"
+    "       [,plaintext-channel=[main|display|cursor|inputs|record|playback]]\n"
+    "       [,sasl][,password=][,disable-ticketing]\n"
+    "       [,image-compression=[auto_glz|auto_lz|quic|glz|lz|off]]\n"
+    "       [,jpeg-wan-compression=[auto|never|always]]\n"
+    "       [,zlib-glz-wan-compression=[auto|never|always]]\n"
+    "       [,streaming-video=[off|all|filter]][,disable-copy-paste]\n"
+    "       [,disable-agent-file-xfer][,agent-mouse=[on|off]]\n"
+    "       [,playback-compression=[on|off]][,seamless-migration=[on|off]]\n"
+    "       [,gl=[on|off]][,rendernode=]\n"
+    "   enable spice\n"
+    "   at least one of {port, tls-port} is mandatory\n",
+    QEMU_ARCH_ALL)
+SRST
+``-spice option[,option[,...]]``
+    Enable the spice remote desktop protocol. Valid options are
+
+    ``port=``
+        Set the TCP port spice is listening on for plaintext channels.
+
+    ``addr=``
+        Set the IP address spice is listening on. Default is any
+        address.
+
+    ``ipv4``; \ ``ipv6``; \ ``unix``
+        Force using the specified IP version.
+
+    ``password=``
+        Set the password you need to authenticate.
+
+    ``sasl``
+        Require that the client use SASL to authenticate with the spice.
+        The exact choice of authentication method used is controlled
+        from the system / user's SASL configuration file for the 'qemu'
+        service. This is typically found in /etc/sasl2/qemu.conf. If
+        running QEMU as an unprivileged user, an environment variable
+        SASL\_CONF\_PATH can be used to make it search alternate
+        locations for the service config. While some SASL auth methods
+        can also provide data encryption (eg GSSAPI), it is recommended
+        that SASL always be combined with the 'tls' and 'x509' settings
+        to enable use of SSL and server certificates. This ensures a
+        data encryption preventing compromise of authentication
+        credentials.
+
+    ``disable-ticketing``
+        Allow client connects without authentication.
+
+    ``disable-copy-paste``
+        Disable copy paste between the client and the guest.
+
+    ``disable-agent-file-xfer``
+        Disable spice-vdagent based file-xfer between the client and the
+        guest.
+
+    ``tls-port=``
+        Set the TCP port spice is listening on for encrypted channels.
+
+    ``x509-dir=``
+        Set the x509 file directory. Expects same filenames as -vnc
+        $display,x509=$dir
+
+    ``x509-key-file=``; \ ``x509-key-password=``; \ ``x509-cert-file=``; \ ``x509-cacert-file=``; \ ``x509-dh-key-file=``
+        The x509 file names can also be configured individually.
+
+    ``tls-ciphers=``
+        Specify which ciphers to use.
+
+    ``tls-channel=[main|display|cursor|inputs|record|playback]``; \ ``plaintext-channel=[main|display|cursor|inputs|record|playback]``
+        Force specific channel to be used with or without TLS
+        encryption. The options can be specified multiple times to
+        configure multiple channels. The special name "default" can be
+        used to set the default mode. For channels which are not
+        explicitly forced into one mode the spice client is allowed to
+        pick tls/plaintext as he pleases.
+
+    ``image-compression=[auto_glz|auto_lz|quic|glz|lz|off]``
+        Configure image compression (lossless). Default is auto\_glz.
+
+    ``jpeg-wan-compression=[auto|never|always]``; \ ``zlib-glz-wan-compression=[auto|never|always]``
+        Configure wan image compression (lossy for slow links). Default
+        is auto.
+
+    ``streaming-video=[off|all|filter]``
+        Configure video stream detection. Default is off.
+
+    ``agent-mouse=[on|off]``
+        Enable/disable passing mouse events via vdagent. Default is on.
+
+    ``playback-compression=[on|off]``
+        Enable/disable audio stream compression (using celt 0.5.1).
+        Default is on.
+
+    ``seamless-migration=[on|off]``
+        Enable/disable spice seamless migration. Default is off.
+
+    ``gl=[on|off]``
+        Enable/disable OpenGL context. Default is off.
+
+    ``rendernode=``
+        DRM render node for OpenGL rendering. If not specified, it will
+        pick the first available. (Since 2.9)
+ERST
+
+DEF("portrait", 0, QEMU_OPTION_portrait,
+    "-portrait       rotate graphical output 90 deg left (only PXA LCD)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-portrait``
+    Rotate graphical output 90 deg left (only PXA LCD).
+ERST
+
+DEF("rotate", HAS_ARG, QEMU_OPTION_rotate,
+    "-rotate    rotate graphical output some deg left (only PXA LCD)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-rotate deg``
+    Rotate graphical output some deg left (only PXA LCD).
+ERST
+
+DEF("vga", HAS_ARG, QEMU_OPTION_vga,
+    "-vga [std|cirrus|vmware|qxl|xenfb|tcx|cg3|virtio|none]\n"
+    "                select video card type\n", QEMU_ARCH_ALL)
+SRST
+``-vga type``
+    Select type of VGA card to emulate. Valid values for type are
+
+    ``cirrus``
+        Cirrus Logic GD5446 Video card. All Windows versions starting
+        from Windows 95 should recognize and use this graphic card. For
+        optimal performances, use 16 bit color depth in the guest and
+        the host OS. (This card was the default before QEMU 2.2)
+
+    ``std``
+        Standard VGA card with Bochs VBE extensions. If your guest OS
+        supports the VESA 2.0 VBE extensions (e.g. Windows XP) and if
+        you want to use high resolution modes (>= 1280x1024x16) then you
+        should use this option. (This card is the default since QEMU
+        2.2)
+
+    ``vmware``
+        VMWare SVGA-II compatible adapter. Use it if you have
+        sufficiently recent XFree86/XOrg server or Windows guest with a
+        driver for this card.
+
+    ``qxl``
+        QXL paravirtual graphic card. It is VGA compatible (including
+        VESA 2.0 VBE support). Works best with qxl guest drivers
+        installed though. Recommended choice when using the spice
+        protocol.
+
+    ``tcx``
+        (sun4m only) Sun TCX framebuffer. This is the default
+        framebuffer for sun4m machines and offers both 8-bit and 24-bit
+        colour depths at a fixed resolution of 1024x768.
+
+    ``cg3``
+        (sun4m only) Sun cgthree framebuffer. This is a simple 8-bit
+        framebuffer for sun4m machines available in both 1024x768
+        (OpenBIOS) and 1152x900 (OBP) resolutions aimed at people
+        wishing to run older Solaris versions.
+
+    ``virtio``
+        Virtio VGA card.
+
+    ``none``
+        Disable VGA card.
+ERST
+
+DEF("full-screen", 0, QEMU_OPTION_full_screen,
+    "-full-screen    start in full screen\n", QEMU_ARCH_ALL)
+SRST
+``-full-screen``
+    Start in full screen.
+ERST
+
+DEF("g", HAS_ARG, QEMU_OPTION_g ,
+    "-g WxH[xDEPTH]  Set the initial graphical resolution and depth\n",
+    QEMU_ARCH_PPC | QEMU_ARCH_SPARC | QEMU_ARCH_M68K)
+SRST
+``-g`` *width*\ ``x``\ *height*\ ``[x``\ *depth*\ ``]``
+    Set the initial graphical resolution and depth (PPC, SPARC only).
+
+    For PPC the default is 800x600x32.
+
+    For SPARC with the TCX graphics device, the default is 1024x768x8
+    with the option of 1024x768x24. For cgthree, the default is
+    1024x768x8 with the option of 1152x900x8 for people who wish to use
+    OBP.
+ERST
+
+DEF("vnc", HAS_ARG, QEMU_OPTION_vnc ,
+    "-vnc   shorthand for -display vnc=\n", QEMU_ARCH_ALL)
+SRST
+``-vnc display[,option[,option[,...]]]``
+    Normally, if QEMU is compiled with graphical window support, it
+    displays output such as guest graphics, guest console, and the QEMU
+    monitor in a window. With this option, you can have QEMU listen on
+    VNC display display and redirect the VGA display over the VNC
+    session. It is very useful to enable the usb tablet device when
+    using this option (option ``-device usb-tablet``). When using the
+    VNC display, you must use the ``-k`` parameter to set the keyboard
+    layout if you are not using en-us. Valid syntax for the display is
+
+    ``to=L``
+        With this option, QEMU will try next available VNC displays,
+        until the number L, if the origianlly defined "-vnc display" is
+        not available, e.g. port 5900+display is already used by another
+        application. By default, to=0.
+
+    ``host:d``
+        TCP connections will only be allowed from host on display d. By
+        convention the TCP port is 5900+d. Optionally, host can be
+        omitted in which case the server will accept connections from
+        any host.
+
+    ``unix:path``
+        Connections will be allowed over UNIX domain sockets where path
+        is the location of a unix socket to listen for connections on.
+
+    ``none``
+        VNC is initialized but not started. The monitor ``change``
+        command can be used to later start the VNC server.
+
+    Following the display value there may be one or more option flags
+    separated by commas. Valid options are
+
+    ``reverse``
+        Connect to a listening VNC client via a "reverse" connection.
+        The client is specified by the display. For reverse network
+        connections (host:d,``reverse``), the d argument is a TCP port
+        number, not a display number.
+
+    ``websocket``
+        Opens an additional TCP listening port dedicated to VNC
+        Websocket connections. If a bare websocket option is given, the
+        Websocket port is 5700+display. An alternative port can be
+        specified with the syntax ``websocket``\ =port.
+
+        If host is specified connections will only be allowed from this
+        host. It is possible to control the websocket listen address
+        independently, using the syntax ``websocket``\ =host:port.
+
+        If no TLS credentials are provided, the websocket connection
+        runs in unencrypted mode. If TLS credentials are provided, the
+        websocket connection requires encrypted client connections.
+
+    ``password``
+        Require that password based authentication is used for client
+        connections.
+
+        The password must be set separately using the ``set_password``
+        command in the :ref:`QEMU monitor`. The
+        syntax to change your password is:
+        ``set_password  `` where  could be
+        either "vnc" or "spice".
+
+        If you would like to change  password expiration, you
+        should use ``expire_password  ``
+        where expiration time could be one of the following options:
+        now, never, +seconds or UNIX time of expiration, e.g. +60 to
+        make password expire in 60 seconds, or 1335196800 to make
+        password expire on "Mon Apr 23 12:00:00 EDT 2012" (UNIX time for
+        this date and time).
+
+        You can also use keywords "now" or "never" for the expiration
+        time to allow  password to expire immediately or never
+        expire.
+
+    ``tls-creds=ID``
+        Provides the ID of a set of TLS credentials to use to secure the
+        VNC server. They will apply to both the normal VNC server socket
+        and the websocket socket (if enabled). Setting TLS credentials
+        will cause the VNC server socket to enable the VeNCrypt auth
+        mechanism. The credentials should have been previously created
+        using the ``-object tls-creds`` argument.
+
+    ``tls-authz=ID``
+        Provides the ID of the QAuthZ authorization object against which
+        the client's x509 distinguished name will validated. This object
+        is only resolved at time of use, so can be deleted and recreated
+        on the fly while the VNC server is active. If missing, it will
+        default to denying access.
+
+    ``sasl``
+        Require that the client use SASL to authenticate with the VNC
+        server. The exact choice of authentication method used is
+        controlled from the system / user's SASL configuration file for
+        the 'qemu' service. This is typically found in
+        /etc/sasl2/qemu.conf. If running QEMU as an unprivileged user,
+        an environment variable SASL\_CONF\_PATH can be used to make it
+        search alternate locations for the service config. While some
+        SASL auth methods can also provide data encryption (eg GSSAPI),
+        it is recommended that SASL always be combined with the 'tls'
+        and 'x509' settings to enable use of SSL and server
+        certificates. This ensures a data encryption preventing
+        compromise of authentication credentials. See the
+        :ref:`VNC security` section in the System Emulation Users Guide
+        for details on using SASL authentication.
+
+    ``sasl-authz=ID``
+        Provides the ID of the QAuthZ authorization object against which
+        the client's SASL username will validated. This object is only
+        resolved at time of use, so can be deleted and recreated on the
+        fly while the VNC server is active. If missing, it will default
+        to denying access.
+
+    ``acl``
+        Legacy method for enabling authorization of clients against the
+        x509 distinguished name and SASL username. It results in the
+        creation of two ``authz-list`` objects with IDs of
+        ``vnc.username`` and ``vnc.x509dname``. The rules for these
+        objects must be configured with the HMP ACL commands.
+
+        This option is deprecated and should no longer be used. The new
+        ``sasl-authz`` and ``tls-authz`` options are a replacement.
+
+    ``lossy``
+        Enable lossy compression methods (gradient, JPEG, ...). If this
+        option is set, VNC client may receive lossy framebuffer updates
+        depending on its encoding settings. Enabling this option can
+        save a lot of bandwidth at the expense of quality.
+
+    ``non-adaptive``
+        Disable adaptive encodings. Adaptive encodings are enabled by
+        default. An adaptive encoding will try to detect frequently
+        updated screen regions, and send updates in these regions using
+        a lossy encoding (like JPEG). This can be really helpful to save
+        bandwidth when playing videos. Disabling adaptive encodings
+        restores the original static behavior of encodings like Tight.
+
+    ``share=[allow-exclusive|force-shared|ignore]``
+        Set display sharing policy. 'allow-exclusive' allows clients to
+        ask for exclusive access. As suggested by the rfb spec this is
+        implemented by dropping other connections. Connecting multiple
+        clients in parallel requires all clients asking for a shared
+        session (vncviewer: -shared switch). This is the default.
+        'force-shared' disables exclusive client access. Useful for
+        shared desktop sessions, where you don't want someone forgetting
+        specify -shared disconnect everybody else. 'ignore' completely
+        ignores the shared flag and allows everybody connect
+        unconditionally. Doesn't conform to the rfb spec but is
+        traditional QEMU behavior.
+
+    ``key-delay-ms``
+        Set keyboard delay, for key down and key up events, in
+        milliseconds. Default is 10. Keyboards are low-bandwidth
+        devices, so this slowdown can help the device and guest to keep
+        up and not lose events in case events are arriving in bulk.
+        Possible causes for the latter are flaky network connections, or
+        scripts for automated testing.
+
+    ``audiodev=audiodev``
+        Use the specified audiodev when the VNC client requests audio
+        transmission. When not using an -audiodev argument, this option
+        must be omitted, otherwise is must be present and specify a
+        valid audiodev.
+ERST
+
+ARCHHEADING(, QEMU_ARCH_I386)
+
+ARCHHEADING(i386 target only:, QEMU_ARCH_I386)
+
+DEF("win2k-hack", 0, QEMU_OPTION_win2k_hack,
+    "-win2k-hack     use it when installing Windows 2000 to avoid a disk full bug\n",
+    QEMU_ARCH_I386)
+SRST
+``-win2k-hack``
+    Use it when installing Windows 2000 to avoid a disk full bug. After
+    Windows 2000 is installed, you no longer need this option (this
+    option slows down the IDE transfers).
+ERST
+
+DEF("no-fd-bootchk", 0, QEMU_OPTION_no_fd_bootchk,
+    "-no-fd-bootchk  disable boot signature checking for floppy disks\n",
+    QEMU_ARCH_I386)
+SRST
+``-no-fd-bootchk``
+    Disable boot signature checking for floppy disks in BIOS. May be
+    needed to boot from old floppy disks.
+ERST
+
+DEF("no-acpi", 0, QEMU_OPTION_no_acpi,
+           "-no-acpi        disable ACPI\n", QEMU_ARCH_I386 | QEMU_ARCH_ARM)
+SRST
+``-no-acpi``
+    Disable ACPI (Advanced Configuration and Power Interface) support.
+    Use it if your guest OS complains about ACPI problems (PC target
+    machine only).
+ERST
+
+DEF("no-hpet", 0, QEMU_OPTION_no_hpet,
+    "-no-hpet        disable HPET\n", QEMU_ARCH_I386)
+SRST
+``-no-hpet``
+    Disable HPET support.
+ERST
+
+DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable,
+    "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n"
+    "                ACPI table description\n", QEMU_ARCH_I386)
+SRST
+``-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n] [,asl_compiler_id=str][,asl_compiler_rev=n][,data=file1[:file2]...]``
+    Add ACPI table with specified header fields and context from
+    specified files. For file=, take whole ACPI table from the specified
+    files, including all ACPI headers (possible overridden by other
+    options). For data=, only data portion of the table is used, all
+    header information is specified in the command line. If a SLIC table
+    is supplied to QEMU, then the SLIC's oem\_id and oem\_table\_id
+    fields will override the same in the RSDT and the FADT (a.k.a.
+    FACP), in order to ensure the field matches required by the
+    Microsoft SLIC spec and the ACPI spec.
+ERST
+
+DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
+    "-smbios file=binary\n"
+    "                load SMBIOS entry from binary file\n"
+    "-smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d]\n"
+    "              [,uefi=on|off]\n"
+    "                specify SMBIOS type 0 fields\n"
+    "-smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str]\n"
+    "              [,uuid=uuid][,sku=str][,family=str]\n"
+    "                specify SMBIOS type 1 fields\n"
+    "-smbios type=2[,manufacturer=str][,product=str][,version=str][,serial=str]\n"
+    "              [,asset=str][,location=str]\n"
+    "                specify SMBIOS type 2 fields\n"
+    "-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str]\n"
+    "              [,sku=str]\n"
+    "                specify SMBIOS type 3 fields\n"
+    "-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n"
+    "              [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n"
+    "                specify SMBIOS type 4 fields\n"
+    "-smbios type=11[,value=str][,path=filename]\n"
+    "                specify SMBIOS type 11 fields\n"
+    "-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str]\n"
+    "               [,asset=str][,part=str][,speed=%d]\n"
+    "                specify SMBIOS type 17 fields\n",
+    QEMU_ARCH_I386 | QEMU_ARCH_ARM)
+SRST
+``-smbios file=binary``
+    Load SMBIOS entry from binary file.
+
+``-smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d][,uefi=on|off]``
+    Specify SMBIOS type 0 fields
+
+``-smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str]``
+    Specify SMBIOS type 1 fields
+
+``-smbios type=2[,manufacturer=str][,product=str][,version=str][,serial=str][,asset=str][,location=str]``
+    Specify SMBIOS type 2 fields
+
+``-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str]``
+    Specify SMBIOS type 3 fields
+
+``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str]``
+    Specify SMBIOS type 4 fields
+
+``-smbios type=11[,value=str][,path=filename]``
+    Specify SMBIOS type 11 fields
+
+    This argument can be repeated multiple times, and values are added in the order they are parsed.
+    Applications intending to use OEM strings data are encouraged to use their application name as
+    a prefix for the value string. This facilitates passing information for multiple applications
+    concurrently.
+
+    The ``value=str`` syntax provides the string data inline, while the ``path=filename`` syntax
+    loads data from a file on disk. Note that the file is not permitted to contain any NUL bytes.
+
+    Both the ``value`` and ``path`` options can be repeated multiple times and will be added to
+    the SMBIOS table in the order in which they appear.
+
+    Note that on the x86 architecture, the total size of all SMBIOS tables is limited to 65535
+    bytes. Thus the OEM strings data is not suitable for passing large amounts of data into the
+    guest. Instead it should be used as a indicator to inform the guest where to locate the real
+    data set, for example, by specifying the serial ID of a block device.
+
+    An example passing three strings is
+
+    .. parsed-literal::
+
+        -smbios type=11,value=cloud-init:ds=nocloud-net;s=http://10.10.0.1:8000/,\\
+                        value=anaconda:method=http://dl.fedoraproject.org/pub/fedora/linux/releases/25/x86_64/os,\\
+                        path=/some/file/with/oemstringsdata.txt
+
+    In the guest OS this is visible with the ``dmidecode`` command
+
+     .. parsed-literal::
+
+         $ dmidecode -t 11
+         Handle 0x0E00, DMI type 11, 5 bytes
+         OEM Strings
+              String 1: cloud-init:ds=nocloud-net;s=http://10.10.0.1:8000/
+              String 2: anaconda:method=http://dl.fedoraproject.org/pub/fedora/linux/releases/25/x86_64/os
+              String 3: myapp:some extra data
+
+
+``-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str][,asset=str][,part=str][,speed=%d]``
+    Specify SMBIOS type 17 fields
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Network options:)
+
+DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
+#ifdef CONFIG_SLIRP
+    "-netdev user,id=str[,ipv4[=on|off]][,net=addr[/mask]][,host=addr]\n"
+    "         [,ipv6[=on|off]][,ipv6-net=addr[/int]][,ipv6-host=addr]\n"
+    "         [,restrict=on|off][,hostname=host][,dhcpstart=addr]\n"
+    "         [,dns=addr][,ipv6-dns=addr][,dnssearch=domain][,domainname=domain]\n"
+    "         [,tftp=dir][,tftp-server-name=name][,bootfile=f][,hostfwd=rule][,guestfwd=rule]"
+#ifndef _WIN32
+                                             "[,smb=dir[,smbserver=addr]]\n"
+#endif
+    "                configure a user mode network backend with ID 'str',\n"
+    "                its DHCP server and optional services\n"
+#endif
+#ifdef _WIN32
+    "-netdev tap,id=str,ifname=name\n"
+    "                configure a host TAP network backend with ID 'str'\n"
+#else
+    "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
+    "         [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
+    "         [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
+    "         [,poll-us=n]\n"
+    "                configure a host TAP network backend with ID 'str'\n"
+    "                connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n"
+    "                use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n"
+    "                to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
+    "                to deconfigure it\n"
+    "                use '[down]script=no' to disable script execution\n"
+    "                use network helper 'helper' (default=" DEFAULT_BRIDGE_HELPER ") to\n"
+    "                configure it\n"
+    "                use 'fd=h' to connect to an already opened TAP interface\n"
+    "                use 'fds=x:y:...:z' to connect to already opened multiqueue capable TAP interfaces\n"
+    "                use 'sndbuf=nbytes' to limit the size of the send buffer (the\n"
+    "                default is disabled 'sndbuf=0' to enable flow control set 'sndbuf=1048576')\n"
+    "                use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag\n"
+    "                use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition\n"
+    "                use vhost=on to enable experimental in kernel accelerator\n"
+    "                    (only has effect for virtio guests which use MSIX)\n"
+    "                use vhostforce=on to force vhost on for non-MSIX virtio guests\n"
+    "                use 'vhostfd=h' to connect to an already opened vhost net device\n"
+    "                use 'vhostfds=x:y:...:z to connect to multiple already opened vhost net devices\n"
+    "                use 'queues=n' to specify the number of queues to be created for multiqueue TAP\n"
+    "                use 'poll-us=n' to speciy the maximum number of microseconds that could be\n"
+    "                spent on busy polling for vhost net\n"
+    "-netdev bridge,id=str[,br=bridge][,helper=helper]\n"
+    "                configure a host TAP network backend with ID 'str' that is\n"
+    "                connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n"
+    "                using the program 'helper (default=" DEFAULT_BRIDGE_HELPER ")\n"
+#endif
+#ifdef __linux__
+    "-netdev l2tpv3,id=str,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport]\n"
+    "         [,rxsession=rxsession],txsession=txsession[,ipv6=on/off][,udp=on/off]\n"
+    "         [,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie]\n"
+    "         [,rxcookie=rxcookie][,offset=offset]\n"
+    "                configure a network backend with ID 'str' connected to\n"
+    "                an Ethernet over L2TPv3 pseudowire.\n"
+    "                Linux kernel 3.3+ as well as most routers can talk\n"
+    "                L2TPv3. This transport allows connecting a VM to a VM,\n"
+    "                VM to a router and even VM to Host. It is a nearly-universal\n"
+    "                standard (RFC3931). Note - this implementation uses static\n"
+    "                pre-configured tunnels (same as the Linux kernel).\n"
+    "                use 'src=' to specify source address\n"
+    "                use 'dst=' to specify destination address\n"
+    "                use 'udp=on' to specify udp encapsulation\n"
+    "                use 'srcport=' to specify source udp port\n"
+    "                use 'dstport=' to specify destination udp port\n"
+    "                use 'ipv6=on' to force v6\n"
+    "                L2TPv3 uses cookies to prevent misconfiguration as\n"
+    "                well as a weak security measure\n"
+    "                use 'rxcookie=0x012345678' to specify a rxcookie\n"
+    "                use 'txcookie=0x012345678' to specify a txcookie\n"
+    "                use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+    "                use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+    "                use 'pincounter=on' to work around broken counter handling in peer\n"
+    "                use 'offset=X' to add an extra offset between header and data\n"
+#endif
+    "-netdev socket,id=str[,fd=h][,listen=[host]:port][,connect=host:port]\n"
+    "                configure a network backend to connect to another network\n"
+    "                using a socket connection\n"
+    "-netdev socket,id=str[,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
+    "                configure a network backend to connect to a multicast maddr and port\n"
+    "                use 'localaddr=addr' to specify the host address to send packets from\n"
+    "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
+    "                configure a network backend to connect to another network\n"
+    "                using an UDP tunnel\n"
+#ifdef CONFIG_VDE
+    "-netdev vde,id=str[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n"
+    "                configure a network backend to connect to port 'n' of a vde switch\n"
+    "                running on host and listening for incoming connections on 'socketpath'.\n"
+    "                Use group 'groupname' and mode 'octalmode' to change default\n"
+    "                ownership and permissions for communication port.\n"
+#endif
+#ifdef CONFIG_NETMAP
+    "-netdev netmap,id=str,ifname=name[,devname=nmname]\n"
+    "                attach to the existing netmap-enabled network interface 'name', or to a\n"
+    "                VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
+    "                netmap device, defaults to '/dev/netmap')\n"
+#endif
+#ifdef CONFIG_POSIX
+    "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
+    "                configure a vhost-user network, backed by a chardev 'dev'\n"
+#endif
+#ifdef __linux__
+    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
+    "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
+#endif
+    "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
+    "                configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL)
+DEF("nic", HAS_ARG, QEMU_OPTION_nic,
+    "-nic [tap|bridge|"
+#ifdef CONFIG_SLIRP
+    "user|"
+#endif
+#ifdef __linux__
+    "l2tpv3|"
+#endif
+#ifdef CONFIG_VDE
+    "vde|"
+#endif
+#ifdef CONFIG_NETMAP
+    "netmap|"
+#endif
+#ifdef CONFIG_POSIX
+    "vhost-user|"
+#endif
+    "socket][,option][,...][mac=macaddr]\n"
+    "                initialize an on-board / default host NIC (using MAC address\n"
+    "                macaddr) and connect it to the given host network backend\n"
+    "-nic none       use it alone to have zero network devices (the default is to\n"
+    "                provided a 'user' network connection)\n",
+    QEMU_ARCH_ALL)
+DEF("net", HAS_ARG, QEMU_OPTION_net,
+    "-net nic[,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
+    "                configure or create an on-board (or machine default) NIC and\n"
+    "                connect it to hub 0 (please use -nic unless you need a hub)\n"
+    "-net ["
+#ifdef CONFIG_SLIRP
+    "user|"
+#endif
+    "tap|"
+    "bridge|"
+#ifdef CONFIG_VDE
+    "vde|"
+#endif
+#ifdef CONFIG_NETMAP
+    "netmap|"
+#endif
+    "socket][,option][,option][,...]\n"
+    "                old way to initialize a host network interface\n"
+    "                (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
+SRST
+``-nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
+    This option is a shortcut for configuring both the on-board
+    (default) guest NIC hardware and the host network backend in one go.
+    The host backend options are the same as with the corresponding
+    ``-netdev`` options below. The guest NIC model can be set with
+    ``model=modelname``. Use ``model=help`` to list the available device
+    types. The hardware MAC address can be set with ``mac=macaddr``.
+
+    The following two example do exactly the same, to show how ``-nic``
+    can be used to shorten the command line length:
+
+    .. parsed-literal::
+
+        |qemu_system| -netdev user,id=n1,ipv6=off -device e1000,netdev=n1,mac=52:54:98:76:54:32
+        |qemu_system| -nic user,ipv6=off,model=e1000,mac=52:54:98:76:54:32
+
+``-nic none``
+    Indicate that no network devices should be configured. It is used to
+    override the default configuration (default NIC with "user" host
+    network backend) which is activated if no other networking options
+    are provided.
+
+``-netdev user,id=id[,option][,option][,...]``
+    Configure user mode host network backend which requires no
+    administrator privilege to run. Valid options are:
+
+    ``id=id``
+        Assign symbolic name for use in monitor commands.
+
+    ``ipv4=on|off and ipv6=on|off``
+        Specify that either IPv4 or IPv6 must be enabled. If neither is
+        specified both protocols are enabled.
+
+    ``net=addr[/mask]``
+        Set IP network address the guest will see. Optionally specify
+        the netmask, either in the form a.b.c.d or as number of valid
+        top-most bits. Default is 10.0.2.0/24.
+
+    ``host=addr``
+        Specify the guest-visible address of the host. Default is the
+        2nd IP in the guest network, i.e. x.x.x.2.
+
+    ``ipv6-net=addr[/int]``
+        Set IPv6 network address the guest will see (default is
+        fec0::/64). The network prefix is given in the usual hexadecimal
+        IPv6 address notation. The prefix size is optional, and is given
+        as the number of valid top-most bits (default is 64).
+
+    ``ipv6-host=addr``
+        Specify the guest-visible IPv6 address of the host. Default is
+        the 2nd IPv6 in the guest network, i.e. xxxx::2.
+
+    ``restrict=on|off``
+        If this option is enabled, the guest will be isolated, i.e. it
+        will not be able to contact the host and no guest IP packets
+        will be routed over the host to the outside. This option does
+        not affect any explicitly set forwarding rules.
+
+    ``hostname=name``
+        Specifies the client hostname reported by the built-in DHCP
+        server.
+
+    ``dhcpstart=addr``
+        Specify the first of the 16 IPs the built-in DHCP server can
+        assign. Default is the 15th to 31st IP in the guest network,
+        i.e. x.x.x.15 to x.x.x.31.
+
+    ``dns=addr``
+        Specify the guest-visible address of the virtual nameserver. The
+        address must be different from the host address. Default is the
+        3rd IP in the guest network, i.e. x.x.x.3.
+
+    ``ipv6-dns=addr``
+        Specify the guest-visible address of the IPv6 virtual
+        nameserver. The address must be different from the host address.
+        Default is the 3rd IP in the guest network, i.e. xxxx::3.
+
+    ``dnssearch=domain``
+        Provides an entry for the domain-search list sent by the
+        built-in DHCP server. More than one domain suffix can be
+        transmitted by specifying this option multiple times. If
+        supported, this will cause the guest to automatically try to
+        append the given domain suffix(es) in case a domain name can not
+        be resolved.
+
+        Example:
+
+        .. parsed-literal::
+
+            |qemu_system| -nic user,dnssearch=mgmt.example.org,dnssearch=example.org
+
+    ``domainname=domain``
+        Specifies the client domain name reported by the built-in DHCP
+        server.
+
+    ``tftp=dir``
+        When using the user mode network stack, activate a built-in TFTP
+        server. The files in dir will be exposed as the root of a TFTP
+        server. The TFTP client on the guest must be configured in
+        binary mode (use the command ``bin`` of the Unix TFTP client).
+
+    ``tftp-server-name=name``
+        In BOOTP reply, broadcast name as the "TFTP server name"
+        (RFC2132 option 66). This can be used to advise the guest to
+        load boot files or configurations from a different server than
+        the host address.
+
+    ``bootfile=file``
+        When using the user mode network stack, broadcast file as the
+        BOOTP filename. In conjunction with ``tftp``, this can be used
+        to network boot a guest from a local directory.
+
+        Example (using pxelinux):
+
+        .. parsed-literal::
+
+            |qemu_system| -hda linux.img -boot n -device e1000,netdev=n1 \\
+                -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0
+
+    ``smb=dir[,smbserver=addr]``
+        When using the user mode network stack, activate a built-in SMB
+        server so that Windows OSes can access to the host files in
+        ``dir`` transparently. The IP address of the SMB server can be
+        set to addr. By default the 4th IP in the guest network is used,
+        i.e. x.x.x.4.
+
+        In the guest Windows OS, the line:
+
+        ::
+
+            10.0.2.4 smbserver
+
+        must be added in the file ``C:\WINDOWS\LMHOSTS`` (for windows
+        9x/Me) or ``C:\WINNT\SYSTEM32\DRIVERS\ETC\LMHOSTS`` (Windows
+        NT/2000).
+
+        Then ``dir`` can be accessed in ``\\smbserver\qemu``.
+
+        Note that a SAMBA server must be installed on the host OS.
+
+    ``hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport``
+        Redirect incoming TCP or UDP connections to the host port
+        hostport to the guest IP address guestaddr on guest port
+        guestport. If guestaddr is not specified, its value is x.x.x.15
+        (default first address given by the built-in DHCP server). By
+        specifying hostaddr, the rule can be bound to a specific host
+        interface. If no connection type is set, TCP is used. This
+        option can be given multiple times.
+
+        For example, to redirect host X11 connection from screen 1 to
+        guest screen 0, use the following:
+
+        .. parsed-literal::
+
+            # on the host
+            |qemu_system| -nic user,hostfwd=tcp:127.0.0.1:6001-:6000
+            # this host xterm should open in the guest X11 server
+            xterm -display :1
+
+        To redirect telnet connections from host port 5555 to telnet
+        port on the guest, use the following:
+
+        .. parsed-literal::
+
+            # on the host
+            |qemu_system| -nic user,hostfwd=tcp::5555-:23
+            telnet localhost 5555
+
+        Then when you use on the host ``telnet localhost 5555``, you
+        connect to the guest telnet server.
+
+    ``guestfwd=[tcp]:server:port-dev``; \ ``guestfwd=[tcp]:server:port-cmd:command``
+        Forward guest TCP connections to the IP address server on port
+        port to the character device dev or to a program executed by
+        cmd:command which gets spawned for each connection. This option
+        can be given multiple times.
+
+        You can either use a chardev directly and have that one used
+        throughout QEMU's lifetime, like in the following example:
+
+        .. parsed-literal::
+
+            # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever
+            # the guest accesses it
+            |qemu_system| -nic user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321
+
+        Or you can execute a command on every TCP connection established
+        by the guest, so that QEMU behaves similar to an inetd process
+        for that virtual server:
+
+        .. parsed-literal::
+
+            # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234
+            # and connect the TCP stream to its stdin/stdout
+            |qemu_system| -nic  'user,id=n1,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321'
+
+``-netdev tap,id=id[,fd=h][,ifname=name][,script=file][,downscript=dfile][,br=bridge][,helper=helper]``
+    Configure a host TAP network backend with ID id.
+
+    Use the network script file to configure it and the network script
+    dfile to deconfigure it. If name is not provided, the OS
+    automatically provides one. The default network configure script is
+    ``/etc/qemu-ifup`` and the default network deconfigure script is
+    ``/etc/qemu-ifdown``. Use ``script=no`` or ``downscript=no`` to
+    disable script execution.
+
+    If running QEMU as an unprivileged user, use the network helper
+    to configure the TAP interface and attach it to the bridge.
+    The default network helper executable is
+    ``/path/to/qemu-bridge-helper`` and the default bridge device is
+    ``br0``.
+
+    ``fd``\ =h can be used to specify the handle of an already opened
+    host TAP interface.
+
+    Examples:
+
+    .. parsed-literal::
+
+        #launch a QEMU instance with the default network script
+        |qemu_system| linux.img -nic tap
+
+    .. parsed-literal::
+
+        #launch a QEMU instance with two NICs, each one connected
+        #to a TAP device
+        |qemu_system| linux.img \\
+                -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \\
+                -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
+
+    .. parsed-literal::
+
+        #launch a QEMU instance with the default network helper to
+        #connect a TAP device to bridge br0
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+                -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper"
+
+``-netdev bridge,id=id[,br=bridge][,helper=helper]``
+    Connect a host TAP network interface to a host bridge device.
+
+    Use the network helper helper to configure the TAP interface and
+    attach it to the bridge. The default network helper executable is
+    ``/path/to/qemu-bridge-helper`` and the default bridge device is
+    ``br0``.
+
+    Examples:
+
+    .. parsed-literal::
+
+        #launch a QEMU instance with the default network helper to
+        #connect a TAP device to bridge br0
+        |qemu_system| linux.img -netdev bridge,id=n1 -device virtio-net,netdev=n1
+
+    .. parsed-literal::
+
+        #launch a QEMU instance with the default network helper to
+        #connect a TAP device to bridge qemubr0
+        |qemu_system| linux.img -netdev bridge,br=qemubr0,id=n1 -device virtio-net,netdev=n1
+
+``-netdev socket,id=id[,fd=h][,listen=[host]:port][,connect=host:port]``
+    This host network backend can be used to connect the guest's network
+    to another QEMU virtual machine using a TCP socket connection. If
+    ``listen`` is specified, QEMU waits for incoming connections on port
+    (host is optional). ``connect`` is used to connect to another QEMU
+    instance using the ``listen`` option. ``fd``\ =h specifies an
+    already opened TCP socket.
+
+    Example:
+
+    .. parsed-literal::
+
+        # launch a first QEMU instance
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
+                         -netdev socket,id=n1,listen=:1234
+        # connect the network of this instance to the network of the first instance
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n2,mac=52:54:00:12:34:57 \\
+                         -netdev socket,id=n2,connect=127.0.0.1:1234
+
+``-netdev socket,id=id[,fd=h][,mcast=maddr:port[,localaddr=addr]]``
+    Configure a socket host network backend to share the guest's network
+    traffic with another QEMU virtual machines using a UDP multicast
+    socket, effectively making a bus for every QEMU with same multicast
+    address maddr and port. NOTES:
+
+    1. Several QEMU can be running on different hosts and share same bus
+       (assuming correct multicast setup for these hosts).
+
+    2. mcast support is compatible with User Mode Linux (argument
+       ``ethN=mcast``), see http://user-mode-linux.sf.net.
+
+    3. Use ``fd=h`` to specify an already opened UDP multicast socket.
+
+    Example:
+
+    .. parsed-literal::
+
+        # launch one QEMU instance
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
+                         -netdev socket,id=n1,mcast=230.0.0.1:1234
+        # launch another QEMU instance on same "bus"
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n2,mac=52:54:00:12:34:57 \\
+                         -netdev socket,id=n2,mcast=230.0.0.1:1234
+        # launch yet another QEMU instance on same "bus"
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n3,mac=52:54:00:12:34:58 \\
+                         -netdev socket,id=n3,mcast=230.0.0.1:1234
+
+    Example (User Mode Linux compat.):
+
+    .. parsed-literal::
+
+        # launch QEMU instance (note mcast address selected is UML's default)
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
+                         -netdev socket,id=n1,mcast=239.192.168.1:1102
+        # launch UML
+        /path/to/linux ubd0=/path/to/root_fs eth0=mcast
+
+    Example (send packets from host's 1.2.3.4):
+
+    .. parsed-literal::
+
+        |qemu_system| linux.img \\
+                         -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
+                         -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4
+
+``-netdev l2tpv3,id=id,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]``
+    Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3931)
+    is a popular protocol to transport Ethernet (and other Layer 2) data
+    frames between two systems. It is present in routers, firewalls and
+    the Linux kernel (from version 3.3 onwards).
+
+    This transport allows a VM to communicate to another VM, router or
+    firewall directly.
+
+    ``src=srcaddr``
+        source address (mandatory)
+
+    ``dst=dstaddr``
+        destination address (mandatory)
+
+    ``udp``
+        select udp encapsulation (default is ip).
+
+    ``srcport=srcport``
+        source udp port.
+
+    ``dstport=dstport``
+        destination udp port.
+
+    ``ipv6``
+        force v6, otherwise defaults to v4.
+
+    ``rxcookie=rxcookie``; \ ``txcookie=txcookie``
+        Cookies are a weak form of security in the l2tpv3 specification.
+        Their function is mostly to prevent misconfiguration. By default
+        they are 32 bit.
+
+    ``cookie64``
+        Set cookie size to 64 bit instead of the default 32
+
+    ``counter=off``
+        Force a 'cut-down' L2TPv3 with no counter as in
+        draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+
+    ``pincounter=on``
+        Work around broken counter handling in peer. This may also help
+        on networks which have packet reorder.
+
+    ``offset=offset``
+        Add an extra offset between header and data
+
+    For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to
+    the bridge br-lan on the remote Linux host 1.2.3.4:
+
+    .. parsed-literal::
+
+        # Setup tunnel on linux host using raw ip as encapsulation
+        # on 1.2.3.4
+        ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \\
+            encap udp udp_sport 16384 udp_dport 16384
+        ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \\
+            0xFFFFFFFF peer_session_id 0xFFFFFFFF
+        ifconfig vmtunnel0 mtu 1500
+        ifconfig vmtunnel0 up
+        brctl addif br-lan vmtunnel0
+
+
+        # on 4.3.2.1
+        # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+        |qemu_system| linux.img -device e1000,netdev=n1 \\
+            -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+``-netdev vde,id=id[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]``
+    Configure VDE backend to connect to PORT n of a vde switch running
+    on host and listening for incoming connections on socketpath. Use
+    GROUP groupname and MODE octalmode to change default ownership and
+    permissions for communication port. This option is only available if
+    QEMU has been compiled with vde support enabled.
+
+    Example:
+
+    .. parsed-literal::
+
+        # launch vde switch
+        vde_switch -F -sock /tmp/myswitch
+        # launch QEMU instance
+        |qemu_system| linux.img -nic vde,sock=/tmp/myswitch
+
+``-netdev vhost-user,chardev=id[,vhostforce=on|off][,queues=n]``
+    Establish a vhost-user netdev, backed by a chardev id. The chardev
+    should be a unix domain socket backed one. The vhost-user uses a
+    specifically defined protocol to pass vhost ioctl replacement
+    messages to an application on the other end of the socket. On
+    non-MSIX guests, the feature can be forced with vhostforce. Use
+    'queues=n' to specify the number of queues to be created for
+    multiqueue vhost-user.
+
+    Example:
+
+    ::
+
+        qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
+             -numa node,memdev=mem \
+             -chardev socket,id=chr0,path=/path/to/socket \
+             -netdev type=vhost-user,id=net0,chardev=chr0 \
+             -device virtio-net-pci,netdev=net0
+
+``-netdev vhost-vdpa,vhostdev=/path/to/dev``
+    Establish a vhost-vdpa netdev.
+
+    vDPA device is a device that uses a datapath which complies with
+    the virtio specifications with a vendor specific control path.
+    vDPA devices can be both physically located on the hardware or
+    emulated by software.
+
+``-netdev hubport,id=id,hubid=hubid[,netdev=nd]``
+    Create a hub port on the emulated hub with ID hubid.
+
+    The hubport netdev lets you connect a NIC to a QEMU emulated hub
+    instead of a single netdev. Alternatively, you can also connect the
+    hubport to another netdev with ID nd by using the ``netdev=nd``
+    option.
+
+``-net nic[,netdev=nd][,macaddr=mac][,model=type] [,name=name][,addr=addr][,vectors=v]``
+    Legacy option to configure or create an on-board (or machine
+    default) Network Interface Card(NIC) and connect it either to the
+    emulated hub with ID 0 (i.e. the default hub), or to the netdev nd.
+    If model is omitted, then the default NIC model associated with the
+    machine type is used. Note that the default NIC model may change in
+    future QEMU releases, so it is highly recommended to always specify
+    a model. Optionally, the MAC address can be changed to mac, the
+    device address set to addr (PCI cards only), and a name can be
+    assigned for use in monitor commands. Optionally, for PCI cards, you
+    can specify the number v of MSI-X vectors that the card should have;
+    this option currently only affects virtio cards; set v = 0 to
+    disable MSI-X. If no ``-net`` option is specified, a single NIC is
+    created. QEMU can emulate several different models of network card.
+    Use ``-net nic,model=help`` for a list of available devices for your
+    target.
+
+``-net user|tap|bridge|socket|l2tpv3|vde[,...][,name=name]``
+    Configure a host network backend (with the options corresponding to
+    the same ``-netdev`` option) and connect it to the emulated hub 0
+    (the default hub). Use name to specify the name of the hub port.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Character device options:)
+
+DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
+    "-chardev help\n"
+    "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n"
+    "         [,server][,nowait][,telnet][,websocket][,reconnect=seconds][,mux=on|off]\n"
+    "         [,logfile=PATH][,logappend=on|off][,tls-creds=ID][,tls-authz=ID] (tcp)\n"
+    "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,websocket][,reconnect=seconds]\n"
+    "         [,mux=on|off][,logfile=PATH][,logappend=on|off][,abstract=on|off][,tight=on|off] (unix)\n"
+    "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n"
+    "         [,localport=localport][,ipv4][,ipv6][,mux=on|off]\n"
+    "         [,logfile=PATH][,logappend=on|off]\n"
+    "-chardev msmouse,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
+    "         [,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev ringbuf,id=id[,size=size][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev file,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev pipe,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+#ifdef _WIN32
+    "-chardev console,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev serial,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+#else
+    "-chardev pty,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev stdio,id=id[,mux=on|off][,signal=on|off][,logfile=PATH][,logappend=on|off]\n"
+#endif
+#ifdef CONFIG_BRLAPI
+    "-chardev braille,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+#endif
+#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
+        || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
+    "-chardev serial,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev tty,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__)
+    "-chardev parallel,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev parport,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+#endif
+#if defined(CONFIG_SPICE)
+    "-chardev spicevmc,id=id,name=name[,debug=debug][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev spiceport,id=id,name=name[,debug=debug][,logfile=PATH][,logappend=on|off]\n"
+#endif
+    , QEMU_ARCH_ALL
+)
+
+SRST
+The general form of a character device option is:
+
+``-chardev backend,id=id[,mux=on|off][,options]``
+    Backend is one of: ``null``, ``socket``, ``udp``, ``msmouse``,
+    ``vc``, ``ringbuf``, ``file``, ``pipe``, ``console``, ``serial``,
+    ``pty``, ``stdio``, ``braille``, ``tty``, ``parallel``, ``parport``,
+    ``spicevmc``, ``spiceport``. The specific backend will determine the
+    applicable options.
+
+    Use ``-chardev help`` to print all available chardev backend types.
+
+    All devices must have an id, which can be any string up to 127
+    characters long. It is used to uniquely identify this device in
+    other command line directives.
+
+    A character device may be used in multiplexing mode by multiple
+    front-ends. Specify ``mux=on`` to enable this mode. A multiplexer is
+    a "1:N" device, and here the "1" end is your specified chardev
+    backend, and the "N" end is the various parts of QEMU that can talk
+    to a chardev. If you create a chardev with ``id=myid`` and
+    ``mux=on``, QEMU will create a multiplexer with your specified ID,
+    and you can then configure multiple front ends to use that chardev
+    ID for their input/output. Up to four different front ends can be
+    connected to a single multiplexed chardev. (Without multiplexing
+    enabled, a chardev can only be used by a single front end.) For
+    instance you could use this to allow a single stdio chardev to be
+    used by two serial ports and the QEMU monitor:
+
+    ::
+
+        -chardev stdio,mux=on,id=char0 \
+        -mon chardev=char0,mode=readline \
+        -serial chardev:char0 \
+        -serial chardev:char0
+
+    You can have more than one multiplexer in a system configuration;
+    for instance you could have a TCP port multiplexed between UART 0
+    and UART 1, and stdio multiplexed between the QEMU monitor and a
+    parallel port:
+
+    ::
+
+        -chardev stdio,mux=on,id=char0 \
+        -mon chardev=char0,mode=readline \
+        -parallel chardev:char0 \
+        -chardev tcp,...,mux=on,id=char1 \
+        -serial chardev:char1 \
+        -serial chardev:char1
+
+    When you're using a multiplexed character device, some escape
+    sequences are interpreted in the input. See the chapter about
+    :ref:`keys in the character backend multiplexer` in the
+    System Emulation Users Guide for more details.
+
+    Note that some other command line options may implicitly create
+    multiplexed character backends; for instance ``-serial mon:stdio``
+    creates a multiplexed stdio backend connected to the serial port and
+    the QEMU monitor, and ``-nographic`` also multiplexes the console
+    and the monitor to stdio.
+
+    There is currently no support for multiplexing in the other
+    direction (where a single QEMU front end takes input and output from
+    multiple chardevs).
+
+    Every backend supports the ``logfile`` option, which supplies the
+    path to a file to record all data transmitted via the backend. The
+    ``logappend`` option controls whether the log file will be truncated
+    or appended to when opened.
+
+The available backends are:
+
+``-chardev null,id=id``
+    A void device. This device will not emit any data, and will drop any
+    data it receives. The null backend does not take any options.
+
+``-chardev socket,id=id[,TCP options or unix options][,server][,nowait][,telnet][,websocket][,reconnect=seconds][,tls-creds=id][,tls-authz=id]``
+    Create a two-way stream socket, which can be either a TCP or a unix
+    socket. A unix socket will be created if ``path`` is specified.
+    Behaviour is undefined if TCP options are specified for a unix
+    socket.
+
+    ``server`` specifies that the socket shall be a listening socket.
+
+    ``nowait`` specifies that QEMU should not block waiting for a client
+    to connect to a listening socket.
+
+    ``telnet`` specifies that traffic on the socket should interpret
+    telnet escape sequences.
+
+    ``websocket`` specifies that the socket uses WebSocket protocol for
+    communication.
+
+    ``reconnect`` sets the timeout for reconnecting on non-server
+    sockets when the remote end goes away. qemu will delay this many
+    seconds and then attempt to reconnect. Zero disables reconnecting,
+    and is the default.
+
+    ``tls-creds`` requests enablement of the TLS protocol for
+    encryption, and specifies the id of the TLS credentials to use for
+    the handshake. The credentials must be previously created with the
+    ``-object tls-creds`` argument.
+
+    ``tls-auth`` provides the ID of the QAuthZ authorization object
+    against which the client's x509 distinguished name will be
+    validated. This object is only resolved at time of use, so can be
+    deleted and recreated on the fly while the chardev server is active.
+    If missing, it will default to denying access.
+
+    TCP and unix socket options are given below:
+
+    ``TCP options: port=port[,host=host][,to=to][,ipv4][,ipv6][,nodelay]``
+        ``host`` for a listening socket specifies the local address to
+        be bound. For a connecting socket species the remote host to
+        connect to. ``host`` is optional for listening sockets. If not
+        specified it defaults to ``0.0.0.0``.
+
+        ``port`` for a listening socket specifies the local port to be
+        bound. For a connecting socket specifies the port on the remote
+        host to connect to. ``port`` can be given as either a port
+        number or a service name. ``port`` is required.
+
+        ``to`` is only relevant to listening sockets. If it is
+        specified, and ``port`` cannot be bound, QEMU will attempt to
+        bind to subsequent ports up to and including ``to`` until it
+        succeeds. ``to`` must be specified as a port number.
+
+        ``ipv4`` and ``ipv6`` specify that either IPv4 or IPv6 must be
+        used. If neither is specified the socket may use either
+        protocol.
+
+        ``nodelay`` disables the Nagle algorithm.
+
+    ``unix options: path=path[,abstract=on|off][,tight=on|off]``
+        ``path`` specifies the local path of the unix socket. ``path``
+        is required.
+        ``abstract`` specifies the use of the abstract socket namespace,
+        rather than the filesystem.  Optional, defaults to false.
+        ``tight`` sets the socket length of abstract sockets to their minimum,
+        rather than the full sun_path length.  Optional, defaults to true.
+
+``-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr][,localport=localport][,ipv4][,ipv6]``
+    Sends all traffic from the guest to a remote host over UDP.
+
+    ``host`` specifies the remote host to connect to. If not specified
+    it defaults to ``localhost``.
+
+    ``port`` specifies the port on the remote host to connect to.
+    ``port`` is required.
+
+    ``localaddr`` specifies the local address to bind to. If not
+    specified it defaults to ``0.0.0.0``.
+
+    ``localport`` specifies the local port to bind to. If not specified
+    any available local port will be used.
+
+    ``ipv4`` and ``ipv6`` specify that either IPv4 or IPv6 must be used.
+    If neither is specified the device may use either protocol.
+
+``-chardev msmouse,id=id``
+    Forward QEMU's emulated msmouse events to the guest. ``msmouse``
+    does not take any options.
+
+``-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]``
+    Connect to a QEMU text console. ``vc`` may optionally be given a
+    specific size.
+
+    ``width`` and ``height`` specify the width and height respectively
+    of the console, in pixels.
+
+    ``cols`` and ``rows`` specify that the console be sized to fit a
+    text console with the given dimensions.
+
+``-chardev ringbuf,id=id[,size=size]``
+    Create a ring buffer with fixed size ``size``. size must be a power
+    of two and defaults to ``64K``.
+
+``-chardev file,id=id,path=path``
+    Log all traffic received from the guest to a file.
+
+    ``path`` specifies the path of the file to be opened. This file will
+    be created if it does not already exist, and overwritten if it does.
+    ``path`` is required.
+
+``-chardev pipe,id=id,path=path``
+    Create a two-way connection to the guest. The behaviour differs
+    slightly between Windows hosts and other hosts:
+
+    On Windows, a single duplex pipe will be created at
+    ``\\.pipe\path``.
+
+    On other hosts, 2 pipes will be created called ``path.in`` and
+    ``path.out``. Data written to ``path.in`` will be received by the
+    guest. Data written by the guest can be read from ``path.out``. QEMU
+    will not create these fifos, and requires them to be present.
+
+    ``path`` forms part of the pipe path as described above. ``path`` is
+    required.
+
+``-chardev console,id=id``
+    Send traffic from the guest to QEMU's standard output. ``console``
+    does not take any options.
+
+    ``console`` is only available on Windows hosts.
+
+``-chardev serial,id=id,path=path``
+    Send traffic from the guest to a serial device on the host.
+
+    On Unix hosts serial will actually accept any tty device, not only
+    serial lines.
+
+    ``path`` specifies the name of the serial device to open.
+
+``-chardev pty,id=id``
+    Create a new pseudo-terminal on the host and connect to it. ``pty``
+    does not take any options.
+
+    ``pty`` is not available on Windows hosts.
+
+``-chardev stdio,id=id[,signal=on|off]``
+    Connect to standard input and standard output of the QEMU process.
+
+    ``signal`` controls if signals are enabled on the terminal, that
+    includes exiting QEMU with the key sequence Control-c. This option
+    is enabled by default, use ``signal=off`` to disable it.
+
+``-chardev braille,id=id``
+    Connect to a local BrlAPI server. ``braille`` does not take any
+    options.
+
+``-chardev tty,id=id,path=path``
+    ``tty`` is only available on Linux, Sun, FreeBSD, NetBSD, OpenBSD
+    and DragonFlyBSD hosts. It is an alias for ``serial``.
+
+    ``path`` specifies the path to the tty. ``path`` is required.
+
+``-chardev parallel,id=id,path=path``
+  \
+``-chardev parport,id=id,path=path``
+    ``parallel`` is only available on Linux, FreeBSD and DragonFlyBSD
+    hosts.
+
+    Connect to a local parallel port.
+
+    ``path`` specifies the path to the parallel port device. ``path`` is
+    required.
+
+``-chardev spicevmc,id=id,debug=debug,name=name``
+    ``spicevmc`` is only available when spice support is built in.
+
+    ``debug`` debug level for spicevmc
+
+    ``name`` name of spice channel to connect to
+
+    Connect to a spice virtual machine channel, such as vdiport.
+
+``-chardev spiceport,id=id,debug=debug,name=name``
+    ``spiceport`` is only available when spice support is built in.
+
+    ``debug`` debug level for spicevmc
+
+    ``name`` name of spice port to connect to
+
+    Connect to a spice port, allowing a Spice client to handle the
+    traffic identified by a name (preferably a fqdn).
+ERST
+
+DEFHEADING()
+
+#ifdef CONFIG_TPM
+DEFHEADING(TPM device options:)
+
+DEF("tpmdev", HAS_ARG, QEMU_OPTION_tpmdev, \
+    "-tpmdev passthrough,id=id[,path=path][,cancel-path=path]\n"
+    "                use path to provide path to a character device; default is /dev/tpm0\n"
+    "                use cancel-path to provide path to TPM's cancel sysfs entry; if\n"
+    "                not provided it will be searched for in /sys/class/misc/tpm?/device\n"
+    "-tpmdev emulator,id=id,chardev=dev\n"
+    "                configure the TPM device using chardev backend\n",
+    QEMU_ARCH_ALL)
+SRST
+The general form of a TPM device option is:
+
+``-tpmdev backend,id=id[,options]``
+    The specific backend type will determine the applicable options. The
+    ``-tpmdev`` option creates the TPM backend and requires a
+    ``-device`` option that specifies the TPM frontend interface model.
+
+    Use ``-tpmdev help`` to print all available TPM backend types.
+
+The available backends are:
+
+``-tpmdev passthrough,id=id,path=path,cancel-path=cancel-path``
+    (Linux-host only) Enable access to the host's TPM using the
+    passthrough driver.
+
+    ``path`` specifies the path to the host's TPM device, i.e., on a
+    Linux host this would be ``/dev/tpm0``. ``path`` is optional and by
+    default ``/dev/tpm0`` is used.
+
+    ``cancel-path`` specifies the path to the host TPM device's sysfs
+    entry allowing for cancellation of an ongoing TPM command.
+    ``cancel-path`` is optional and by default QEMU will search for the
+    sysfs entry to use.
+
+    Some notes about using the host's TPM with the passthrough driver:
+
+    The TPM device accessed by the passthrough driver must not be used
+    by any other application on the host.
+
+    Since the host's firmware (BIOS/UEFI) has already initialized the
+    TPM, the VM's firmware (BIOS/UEFI) will not be able to initialize
+    the TPM again and may therefore not show a TPM-specific menu that
+    would otherwise allow the user to configure the TPM, e.g., allow the
+    user to enable/disable or activate/deactivate the TPM. Further, if
+    TPM ownership is released from within a VM then the host's TPM will
+    get disabled and deactivated. To enable and activate the TPM again
+    afterwards, the host has to be rebooted and the user is required to
+    enter the firmware's menu to enable and activate the TPM. If the TPM
+    is left disabled and/or deactivated most TPM commands will fail.
+
+    To create a passthrough TPM use the following two options:
+
+    ::
+
+        -tpmdev passthrough,id=tpm0 -device tpm-tis,tpmdev=tpm0
+
+    Note that the ``-tpmdev`` id is ``tpm0`` and is referenced by
+    ``tpmdev=tpm0`` in the device option.
+
+``-tpmdev emulator,id=id,chardev=dev``
+    (Linux-host only) Enable access to a TPM emulator using Unix domain
+    socket based chardev backend.
+
+    ``chardev`` specifies the unique ID of a character device backend
+    that provides connection to the software TPM server.
+
+    To create a TPM emulator backend device with chardev socket backend:
+
+    ::
+
+        -chardev socket,id=chrtpm,path=/tmp/swtpm-sock -tpmdev emulator,id=tpm0,chardev=chrtpm -device tpm-tis,tpmdev=tpm0
+ERST
+
+DEFHEADING()
+
+#endif
+
+DEFHEADING(Linux/Multiboot boot specific:)
+SRST
+When using these options, you can use a given Linux or Multiboot kernel
+without installing it in the disk image. It can be useful for easier
+testing of various kernels.
+
+
+ERST
+
+DEF("kernel", HAS_ARG, QEMU_OPTION_kernel, \
+    "-kernel bzImage use 'bzImage' as kernel image\n", QEMU_ARCH_ALL)
+SRST
+``-kernel bzImage``
+    Use bzImage as kernel image. The kernel can be either a Linux kernel
+    or in multiboot format.
+ERST
+
+DEF("append", HAS_ARG, QEMU_OPTION_append, \
+    "-append cmdline use 'cmdline' as kernel command line\n", QEMU_ARCH_ALL)
+SRST
+``-append cmdline``
+    Use cmdline as kernel command line
+ERST
+
+DEF("initrd", HAS_ARG, QEMU_OPTION_initrd, \
+           "-initrd file    use 'file' as initial ram disk\n", QEMU_ARCH_ALL)
+SRST
+``-initrd file``
+    Use file as initial ram disk.
+
+``-initrd "file1 arg=foo,file2"``
+    This syntax is only available with multiboot.
+
+    Use file1 and file2 as modules and pass arg=foo as parameter to the
+    first module.
+ERST
+
+DEF("dtb", HAS_ARG, QEMU_OPTION_dtb, \
+    "-dtb    file    use 'file' as device tree image\n", QEMU_ARCH_ALL)
+SRST
+``-dtb file``
+    Use file as a device tree binary (dtb) image and pass it to the
+    kernel on boot.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Debug/Expert options:)
+
+DEF("fw_cfg", HAS_ARG, QEMU_OPTION_fwcfg,
+    "-fw_cfg [name=],file=\n"
+    "                add named fw_cfg entry with contents from file\n"
+    "-fw_cfg [name=],string=\n"
+    "                add named fw_cfg entry with contents from string\n",
+    QEMU_ARCH_ALL)
+SRST
+``-fw_cfg [name=]name,file=file``
+    Add named fw\_cfg entry with contents from file file.
+
+``-fw_cfg [name=]name,string=str``
+    Add named fw\_cfg entry with contents from string str.
+
+    The terminating NUL character of the contents of str will not be
+    included as part of the fw\_cfg item data. To insert contents with
+    embedded NUL characters, you have to use the file parameter.
+
+    The fw\_cfg entries are passed by QEMU through to the guest.
+
+    Example:
+
+    ::
+
+            -fw_cfg name=opt/com.mycompany/blob,file=./my_blob.bin
+
+    creates an fw\_cfg entry named opt/com.mycompany/blob with contents
+    from ./my\_blob.bin.
+ERST
+
+DEF("serial", HAS_ARG, QEMU_OPTION_serial, \
+    "-serial dev     redirect the serial port to char device 'dev'\n",
+    QEMU_ARCH_ALL)
+SRST
+``-serial dev``
+    Redirect the virtual serial port to host character device dev. The
+    default device is ``vc`` in graphical mode and ``stdio`` in non
+    graphical mode.
+
+    This option can be used several times to simulate up to 4 serial
+    ports.
+
+    Use ``-serial none`` to disable all serial ports.
+
+    Available character devices are:
+
+    ``vc[:WxH]``
+        Virtual console. Optionally, a width and height can be given in
+        pixel with
+
+        ::
+
+            vc:800x600
+
+        It is also possible to specify width or height in characters:
+
+        ::
+
+            vc:80Cx24C
+
+    ``pty``
+        [Linux only] Pseudo TTY (a new PTY is automatically allocated)
+
+    ``none``
+        No device is allocated.
+
+    ``null``
+        void device
+
+    ``chardev:id``
+        Use a named character device defined with the ``-chardev``
+        option.
+
+    ``/dev/XXX``
+        [Linux only] Use host tty, e.g. ``/dev/ttyS0``. The host serial
+        port parameters are set according to the emulated ones.
+
+    ``/dev/parportN``
+        [Linux only, parallel port only] Use host parallel port N.
+        Currently SPP and EPP parallel port features can be used.
+
+    ``file:filename``
+        Write output to filename. No character can be read.
+
+    ``stdio``
+        [Unix only] standard input/output
+
+    ``pipe:filename``
+        name pipe filename
+
+    ``COMn``
+        [Windows only] Use host serial port n
+
+    ``udp:[remote_host]:remote_port[@[src_ip]:src_port]``
+        This implements UDP Net Console. When remote\_host or src\_ip
+        are not specified they default to ``0.0.0.0``. When not using a
+        specified src\_port a random port is automatically chosen.
+
+        If you just want a simple readonly console you can use
+        ``netcat`` or ``nc``, by starting QEMU with:
+        ``-serial udp::4555`` and nc as: ``nc -u -l -p 4555``. Any time
+        QEMU writes something to that port it will appear in the
+        netconsole session.
+
+        If you plan to send characters back via netconsole or you want
+        to stop and start QEMU a lot of times, you should have QEMU use
+        the same source port each time by using something like ``-serial
+        udp::4555@:4556`` to QEMU. Another approach is to use a patched
+        version of netcat which can listen to a TCP port and send and
+        receive characters via udp. If you have a patched version of
+        netcat which activates telnet remote echo and single char
+        transfer, then you can use the following options to set up a
+        netcat redirector to allow telnet on port 5555 to access the
+        QEMU port.
+
+        ``QEMU Options:``
+            -serial udp::4555@:4556
+
+        ``netcat options:``
+            -u -P 4555 -L 0.0.0.0:4556 -t -p 5555 -I -T
+
+        ``telnet options:``
+            localhost 5555
+
+    ``tcp:[host]:port[,server][,nowait][,nodelay][,reconnect=seconds]``
+        The TCP Net Console has two modes of operation. It can send the
+        serial I/O to a location or wait for a connection from a
+        location. By default the TCP Net Console is sent to host at the
+        port. If you use the server option QEMU will wait for a client
+        socket application to connect to the port before continuing,
+        unless the ``nowait`` option was specified. The ``nodelay``
+        option disables the Nagle buffering algorithm. The ``reconnect``
+        option only applies if noserver is set, if the connection goes
+        down it will attempt to reconnect at the given interval. If host
+        is omitted, 0.0.0.0 is assumed. Only one TCP connection at a
+        time is accepted. You can use ``telnet`` to connect to the
+        corresponding character device.
+
+        ``Example to send tcp console to 192.168.0.2 port 4444``
+            -serial tcp:192.168.0.2:4444
+
+        ``Example to listen and wait on port 4444 for connection``
+            -serial tcp::4444,server
+
+        ``Example to not wait and listen on ip 192.168.0.100 port 4444``
+            -serial tcp:192.168.0.100:4444,server,nowait
+
+    ``telnet:host:port[,server][,nowait][,nodelay]``
+        The telnet protocol is used instead of raw tcp sockets. The
+        options work the same as if you had specified ``-serial tcp``.
+        The difference is that the port acts like a telnet server or
+        client using telnet option negotiation. This will also allow you
+        to send the MAGIC\_SYSRQ sequence if you use a telnet that
+        supports sending the break sequence. Typically in unix telnet
+        you do it with Control-] and then type "send break" followed by
+        pressing the enter key.
+
+    ``websocket:host:port,server[,nowait][,nodelay]``
+        The WebSocket protocol is used instead of raw tcp socket. The
+        port acts as a WebSocket server. Client mode is not supported.
+
+    ``unix:path[,server][,nowait][,reconnect=seconds]``
+        A unix domain socket is used instead of a tcp socket. The option
+        works the same as if you had specified ``-serial tcp`` except
+        the unix domain socket path is used for connections.
+
+    ``mon:dev_string``
+        This is a special option to allow the monitor to be multiplexed
+        onto another serial port. The monitor is accessed with key
+        sequence of Control-a and then pressing c. dev\_string should be
+        any one of the serial devices specified above. An example to
+        multiplex the monitor onto a telnet server listening on port
+        4444 would be:
+
+        ``-serial mon:telnet::4444,server,nowait``
+
+        When the monitor is multiplexed to stdio in this way, Ctrl+C
+        will not terminate QEMU any more but will be passed to the guest
+        instead.
+
+    ``braille``
+        Braille device. This will use BrlAPI to display the braille
+        output on a real or fake device.
+
+    ``msmouse``
+        Three button serial mouse. Configure the guest to use Microsoft
+        protocol.
+ERST
+
+DEF("parallel", HAS_ARG, QEMU_OPTION_parallel, \
+    "-parallel dev   redirect the parallel port to char device 'dev'\n",
+    QEMU_ARCH_ALL)
+SRST
+``-parallel dev``
+    Redirect the virtual parallel port to host device dev (same devices
+    as the serial port). On Linux hosts, ``/dev/parportN`` can be used
+    to use hardware devices connected on the corresponding host parallel
+    port.
+
+    This option can be used several times to simulate up to 3 parallel
+    ports.
+
+    Use ``-parallel none`` to disable all parallel ports.
+ERST
+
+DEF("monitor", HAS_ARG, QEMU_OPTION_monitor, \
+    "-monitor dev    redirect the monitor to char device 'dev'\n",
+    QEMU_ARCH_ALL)
+SRST
+``-monitor dev``
+    Redirect the monitor to host device dev (same devices as the serial
+    port). The default device is ``vc`` in graphical mode and ``stdio``
+    in non graphical mode. Use ``-monitor none`` to disable the default
+    monitor.
+ERST
+DEF("qmp", HAS_ARG, QEMU_OPTION_qmp, \
+    "-qmp dev        like -monitor but opens in 'control' mode\n",
+    QEMU_ARCH_ALL)
+SRST
+``-qmp dev``
+    Like -monitor but opens in 'control' mode.
+ERST
+DEF("qmp-pretty", HAS_ARG, QEMU_OPTION_qmp_pretty, \
+    "-qmp-pretty dev like -qmp but uses pretty JSON formatting\n",
+    QEMU_ARCH_ALL)
+SRST
+``-qmp-pretty dev``
+    Like -qmp but uses pretty JSON formatting.
+ERST
+
+DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
+    "-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]\n", QEMU_ARCH_ALL)
+SRST
+``-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]``
+    Setup monitor on chardev name. ``pretty`` turns on JSON pretty
+    printing easing human reading and debugging.
+ERST
+
+DEF("debugcon", HAS_ARG, QEMU_OPTION_debugcon, \
+    "-debugcon dev   redirect the debug console to char device 'dev'\n",
+    QEMU_ARCH_ALL)
+SRST
+``-debugcon dev``
+    Redirect the debug console to host device dev (same devices as the
+    serial port). The debug console is an I/O port which is typically
+    port 0xe9; writing to that I/O port sends output to this device. The
+    default device is ``vc`` in graphical mode and ``stdio`` in non
+    graphical mode.
+ERST
+
+DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
+    "-pidfile file   write PID to 'file'\n", QEMU_ARCH_ALL)
+SRST
+``-pidfile file``
+    Store the QEMU process PID in file. It is useful if you launch QEMU
+    from a script.
+ERST
+
+DEF("singlestep", 0, QEMU_OPTION_singlestep, \
+    "-singlestep     always run in singlestep mode\n", QEMU_ARCH_ALL)
+SRST
+``-singlestep``
+    Run the emulation in single step mode.
+ERST
+
+DEF("preconfig", 0, QEMU_OPTION_preconfig, \
+    "--preconfig     pause QEMU before machine is initialized (experimental)\n",
+    QEMU_ARCH_ALL)
+SRST
+``--preconfig``
+    Pause QEMU for interactive configuration before the machine is
+    created, which allows querying and configuring properties that will
+    affect machine initialization. Use QMP command 'x-exit-preconfig' to
+    exit the preconfig state and move to the next state (i.e. run guest
+    if -S isn't used or pause the second time if -S is used). This
+    option is experimental.
+ERST
+
+DEF("S", 0, QEMU_OPTION_S, \
+    "-S              freeze CPU at startup (use 'c' to start execution)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-S``
+    Do not start CPU at startup (you must type 'c' in the monitor).
+ERST
+
+DEF("realtime", HAS_ARG, QEMU_OPTION_realtime,
+    "-realtime [mlock=on|off]\n"
+    "                run qemu with realtime features\n"
+    "                mlock=on|off controls mlock support (default: on)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-realtime mlock=on|off``
+    Run qemu with realtime features. mlocking qemu and guest memory can
+    be enabled via ``mlock=on`` (enabled by default).
+ERST
+
+DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
+    "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
+    "                run qemu with overcommit hints\n"
+    "                mem-lock=on|off controls memory lock support (default: off)\n"
+    "                cpu-pm=on|off controls cpu power management (default: off)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-overcommit mem-lock=on|off``
+  \ 
+``-overcommit cpu-pm=on|off``
+    Run qemu with hints about host resource overcommit. The default is
+    to assume that host overcommits all resources.
+
+    Locking qemu and guest memory can be enabled via ``mem-lock=on``
+    (disabled by default). This works when host memory is not
+    overcommitted and reduces the worst-case latency for guest. This is
+    equivalent to ``realtime``.
+
+    Guest ability to manage power state of host cpus (increasing latency
+    for other processes on the same host cpu, but decreasing latency for
+    guest) can be enabled via ``cpu-pm=on`` (disabled by default). This
+    works best when host CPU is not overcommitted. When used, host
+    estimates of CPU cycle and power utilization will be incorrect, not
+    taking into account guest idle time.
+ERST
+
+DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \
+    "-gdb dev        accept gdb connection on 'dev'. (QEMU defaults to starting\n"
+    "                the guest without waiting for gdb to connect; use -S too\n"
+    "                if you want it to not start execution.)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-gdb dev``
+    Accept a gdb connection on device dev (see the :ref:`GDB usage` chapter
+    in the System Emulation Users Guide). Note that this option does not pause QEMU
+    execution -- if you want QEMU to not start the guest until you
+    connect with gdb and issue a ``continue`` command, you will need to
+    also pass the ``-S`` option to QEMU.
+
+    The most usual configuration is to listen on a local TCP socket::
+
+        -gdb tcp::3117
+
+    but you can specify other backends; UDP, pseudo TTY, or even stdio
+    are all reasonable use cases. For example, a stdio connection
+    allows you to start QEMU from within gdb and establish the
+    connection via a pipe:
+
+    .. parsed-literal::
+
+        (gdb) target remote | exec |qemu_system| -gdb stdio ...
+ERST
+
+DEF("s", 0, QEMU_OPTION_s, \
+    "-s              shorthand for -gdb tcp::" DEFAULT_GDBSTUB_PORT "\n",
+    QEMU_ARCH_ALL)
+SRST
+``-s``
+    Shorthand for -gdb tcp::1234, i.e. open a gdbserver on TCP port 1234
+    (see the :ref:`GDB usage` chapter in the System Emulation Users Guide).
+ERST
+
+DEF("d", HAS_ARG, QEMU_OPTION_d, \
+    "-d item1,...    enable logging of specified items (use '-d help' for a list of log items)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-d item1[,...]``
+    Enable logging of specified items. Use '-d help' for a list of log
+    items.
+ERST
+
+DEF("D", HAS_ARG, QEMU_OPTION_D, \
+    "-D logfile      output log to logfile (default stderr)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-D logfile``
+    Output log in logfile instead of to stderr
+ERST
+
+DEF("dfilter", HAS_ARG, QEMU_OPTION_DFILTER, \
+    "-dfilter range,..  filter debug output to range of addresses (useful for -d cpu,exec,etc..)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-dfilter range1[,...]``
+    Filter debug output to that relevant to a range of target addresses.
+    The filter spec can be either start+size, start-size or start..end
+    where start end and size are the addresses and sizes required. For
+    example:
+
+    ::
+
+            -dfilter 0x8000..0x8fff,0xffffffc000080000+0x200,0xffffffc000060000-0x1000
+
+    Will dump output for any code in the 0x1000 sized block starting at
+    0x8000 and the 0x200 sized block starting at 0xffffffc000080000 and
+    another 0x1000 sized block starting at 0xffffffc00005f000.
+ERST
+
+DEF("seed", HAS_ARG, QEMU_OPTION_seed, \
+    "-seed number       seed the pseudo-random number generator\n",
+    QEMU_ARCH_ALL)
+SRST
+``-seed number``
+    Force the guest to use a deterministic pseudo-random number
+    generator, seeded with number. This does not affect crypto routines
+    within the host.
+ERST
+
+DEF("L", HAS_ARG, QEMU_OPTION_L, \
+    "-L path         set the directory for the BIOS, VGA BIOS and keymaps\n",
+    QEMU_ARCH_ALL)
+SRST
+``-L  path``
+    Set the directory for the BIOS, VGA BIOS and keymaps.
+
+    To list all the data directories, use ``-L help``.
+ERST
+
+DEF("bios", HAS_ARG, QEMU_OPTION_bios, \
+    "-bios file      set the filename for the BIOS\n", QEMU_ARCH_ALL)
+SRST
+``-bios file``
+    Set the filename for the BIOS.
+ERST
+
+DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
+    "-enable-kvm     enable KVM full virtualization support\n", QEMU_ARCH_ALL)
+SRST
+``-enable-kvm``
+    Enable KVM full virtualization support. This option is only
+    available if KVM support is enabled when compiling.
+ERST
+
+DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
+    "-xen-domid id   specify xen guest domain id\n", QEMU_ARCH_ALL)
+DEF("xen-attach", 0, QEMU_OPTION_xen_attach,
+    "-xen-attach     attach to existing xen domain\n"
+    "                libxl will use this when starting QEMU\n",
+    QEMU_ARCH_ALL)
+DEF("xen-domid-restrict", 0, QEMU_OPTION_xen_domid_restrict,
+    "-xen-domid-restrict     restrict set of available xen operations\n"
+    "                        to specified domain id. (Does not affect\n"
+    "                        xenpv machine type).\n",
+    QEMU_ARCH_ALL)
+SRST
+``-xen-domid id``
+    Specify xen guest domain id (XEN only).
+
+``-xen-attach``
+    Attach to existing xen domain. libxl will use this when starting
+    QEMU (XEN only). Restrict set of available xen operations to
+    specified domain id (XEN only).
+ERST
+
+DEF("no-reboot", 0, QEMU_OPTION_no_reboot, \
+    "-no-reboot      exit instead of rebooting\n", QEMU_ARCH_ALL)
+SRST
+``-no-reboot``
+    Exit instead of rebooting.
+ERST
+
+DEF("no-shutdown", 0, QEMU_OPTION_no_shutdown, \
+    "-no-shutdown    stop before shutdown\n", QEMU_ARCH_ALL)
+SRST
+``-no-shutdown``
+    Don't exit QEMU on guest shutdown, but instead only stop the
+    emulation. This allows for instance switching to monitor to commit
+    changes to the disk image.
+ERST
+
+DEF("loadvm", HAS_ARG, QEMU_OPTION_loadvm, \
+    "-loadvm [tag|id]\n" \
+    "                start right away with a saved state (loadvm in monitor)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-loadvm file``
+    Start right away with a saved state (``loadvm`` in monitor)
+ERST
+
+#ifndef _WIN32
+DEF("daemonize", 0, QEMU_OPTION_daemonize, \
+    "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
+#endif
+SRST
+``-daemonize``
+    Daemonize the QEMU process after initialization. QEMU will not
+    detach from standard IO until it is ready to receive connections on
+    any of its devices. This option is a useful way for external
+    programs to launch QEMU without having to cope with initialization
+    race conditions.
+ERST
+
+DEF("option-rom", HAS_ARG, QEMU_OPTION_option_rom, \
+    "-option-rom rom load a file, rom, into the option ROM space\n",
+    QEMU_ARCH_ALL)
+SRST
+``-option-rom file``
+    Load the contents of file as an option ROM. This option is useful to
+    load things like EtherBoot.
+ERST
+
+DEF("rtc", HAS_ARG, QEMU_OPTION_rtc, \
+    "-rtc [base=utc|localtime|][,clock=host|rt|vm][,driftfix=none|slew]\n" \
+    "                set the RTC base and clock, enable drift fix for clock ticks (x86 only)\n",
+    QEMU_ARCH_ALL)
+
+SRST
+``-rtc [base=utc|localtime|datetime][,clock=host|rt|vm][,driftfix=none|slew]``
+    Specify ``base`` as ``utc`` or ``localtime`` to let the RTC start at
+    the current UTC or local time, respectively. ``localtime`` is
+    required for correct date in MS-DOS or Windows. To start at a
+    specific point in time, provide datetime in the format
+    ``2006-06-17T16:01:21`` or ``2006-06-17``. The default base is UTC.
+
+    By default the RTC is driven by the host system time. This allows
+    using of the RTC as accurate reference clock inside the guest,
+    specifically if the host time is smoothly following an accurate
+    external reference clock, e.g. via NTP. If you want to isolate the
+    guest time from the host, you can set ``clock`` to ``rt`` instead,
+    which provides a host monotonic clock if host support it. To even
+    prevent the RTC from progressing during suspension, you can set
+    ``clock`` to ``vm`` (virtual clock). '\ ``clock=vm``\ ' is
+    recommended especially in icount mode in order to preserve
+    determinism; however, note that in icount mode the speed of the
+    virtual clock is variable and can in general differ from the host
+    clock.
+
+    Enable ``driftfix`` (i386 targets only) if you experience time drift
+    problems, specifically with Windows' ACPI HAL. This option will try
+    to figure out how many timer interrupts were not processed by the
+    Windows guest and will re-inject them.
+ERST
+
+DEF("icount", HAS_ARG, QEMU_OPTION_icount, \
+    "-icount [shift=N|auto][,align=on|off][,sleep=on|off,rr=record|replay,rrfile=,rrsnapshot=]\n" \
+    "                enable virtual instruction counter with 2^N clock ticks per\n" \
+    "                instruction, enable aligning the host and virtual clocks\n" \
+    "                or disable real time cpu sleeping\n", QEMU_ARCH_ALL)
+SRST
+``-icount [shift=N|auto][,rr=record|replay,rrfile=filename,rrsnapshot=snapshot]``
+    Enable virtual instruction counter. The virtual cpu will execute one
+    instruction every 2^N ns of virtual time. If ``auto`` is specified
+    then the virtual cpu speed will be automatically adjusted to keep
+    virtual time within a few seconds of real time.
+
+    When the virtual cpu is sleeping, the virtual time will advance at
+    default speed unless ``sleep=on|off`` is specified. With
+    ``sleep=on|off``, the virtual time will jump to the next timer
+    deadline instantly whenever the virtual cpu goes to sleep mode and
+    will not advance if no timer is enabled. This behavior give
+    deterministic execution times from the guest point of view.
+
+    Note that while this option can give deterministic behavior, it does
+    not provide cycle accurate emulation. Modern CPUs contain
+    superscalar out of order cores with complex cache hierarchies. The
+    number of instructions executed often has little or no correlation
+    with actual performance.
+
+    ``align=on`` will activate the delay algorithm which will try to
+    synchronise the host clock and the virtual clock. The goal is to
+    have a guest running at the real frequency imposed by the shift
+    option. Whenever the guest clock is behind the host clock and if
+    ``align=on`` is specified then we print a message to the user to
+    inform about the delay. Currently this option does not work when
+    ``shift`` is ``auto``. Note: The sync algorithm will work for those
+    shift values for which the guest clock runs ahead of the host clock.
+    Typically this happens when the shift value is high (how high
+    depends on the host machine).
+
+    When ``rr`` option is specified deterministic record/replay is
+    enabled. Replay log is written into filename file in record mode and
+    read from this file in replay mode.
+
+    Option rrsnapshot is used to create new vm snapshot named snapshot
+    at the start of execution recording. In replay mode this option is
+    used to load the initial VM state.
+ERST
+
+DEF("watchdog", HAS_ARG, QEMU_OPTION_watchdog, \
+    "-watchdog model\n" \
+    "                enable virtual hardware watchdog [default=none]\n",
+    QEMU_ARCH_ALL)
+SRST
+``-watchdog model``
+    Create a virtual hardware watchdog device. Once enabled (by a guest
+    action), the watchdog must be periodically polled by an agent inside
+    the guest or else the guest will be restarted. Choose a model for
+    which your guest has drivers.
+
+    The model is the model of hardware watchdog to emulate. Use
+    ``-watchdog help`` to list available hardware models. Only one
+    watchdog can be enabled for a guest.
+
+    The following models may be available:
+
+    ``ib700``
+        iBASE 700 is a very simple ISA watchdog with a single timer.
+
+    ``i6300esb``
+        Intel 6300ESB I/O controller hub is a much more featureful
+        PCI-based dual-timer watchdog.
+
+    ``diag288``
+        A virtual watchdog for s390x backed by the diagnose 288
+        hypercall (currently KVM only).
+ERST
+
+DEF("watchdog-action", HAS_ARG, QEMU_OPTION_watchdog_action, \
+    "-watchdog-action reset|shutdown|poweroff|inject-nmi|pause|debug|none\n" \
+    "                action when watchdog fires [default=reset]\n",
+    QEMU_ARCH_ALL)
+SRST
+``-watchdog-action action``
+    The action controls what QEMU will do when the watchdog timer
+    expires. The default is ``reset`` (forcefully reset the guest).
+    Other possible actions are: ``shutdown`` (attempt to gracefully
+    shutdown the guest), ``poweroff`` (forcefully poweroff the guest),
+    ``inject-nmi`` (inject a NMI into the guest), ``pause`` (pause the
+    guest), ``debug`` (print a debug message and continue), or ``none``
+    (do nothing).
+
+    Note that the ``shutdown`` action requires that the guest responds
+    to ACPI signals, which it may not be able to do in the sort of
+    situations where the watchdog would have expired, and thus
+    ``-watchdog-action shutdown`` is not recommended for production use.
+
+    Examples:
+
+    ``-watchdog i6300esb -watchdog-action pause``; \ ``-watchdog ib700``
+
+ERST
+
+DEF("echr", HAS_ARG, QEMU_OPTION_echr, \
+    "-echr chr       set terminal escape character instead of ctrl-a\n",
+    QEMU_ARCH_ALL)
+SRST
+``-echr numeric_ascii_value``
+    Change the escape character used for switching to the monitor when
+    using monitor and serial sharing. The default is ``0x01`` when using
+    the ``-nographic`` option. ``0x01`` is equal to pressing
+    ``Control-a``. You can select a different character from the ascii
+    control keys where 1 through 26 map to Control-a through Control-z.
+    For instance you could use the either of the following to change the
+    escape character to Control-t.
+
+    ``-echr 0x14``; \ ``-echr 20``
+
+ERST
+
+DEF("show-cursor", 0, QEMU_OPTION_show_cursor, \
+    "-show-cursor    show cursor\n", QEMU_ARCH_ALL)
+SRST
+``-show-cursor``
+    Show cursor.
+ERST
+
+DEF("tb-size", HAS_ARG, QEMU_OPTION_tb_size, \
+    "-tb-size n      set TB size\n", QEMU_ARCH_ALL)
+SRST
+``-tb-size n``
+    Set TCG translation block cache size. Deprecated, use
+    '\ ``-accel tcg,tb-size=n``\ ' instead.
+ERST
+
+DEF("incoming", HAS_ARG, QEMU_OPTION_incoming, \
+    "-incoming tcp:[host]:port[,to=maxport][,ipv4][,ipv6]\n" \
+    "-incoming rdma:host:port[,ipv4][,ipv6]\n" \
+    "-incoming unix:socketpath\n" \
+    "                prepare for incoming migration, listen on\n" \
+    "                specified protocol and socket address\n" \
+    "-incoming fd:fd\n" \
+    "-incoming exec:cmdline\n" \
+    "                accept incoming migration on given file descriptor\n" \
+    "                or from given external command\n" \
+    "-incoming defer\n" \
+    "                wait for the URI to be specified via migrate_incoming\n",
+    QEMU_ARCH_ALL)
+SRST
+``-incoming tcp:[host]:port[,to=maxport][,ipv4][,ipv6]``
+  \ 
+``-incoming rdma:host:port[,ipv4][,ipv6]``
+    Prepare for incoming migration, listen on a given tcp port.
+
+``-incoming unix:socketpath``
+    Prepare for incoming migration, listen on a given unix socket.
+
+``-incoming fd:fd``
+    Accept incoming migration from a given filedescriptor.
+
+``-incoming exec:cmdline``
+    Accept incoming migration as an output from specified external
+    command.
+
+``-incoming defer``
+    Wait for the URI to be specified via migrate\_incoming. The monitor
+    can be used to change settings (such as migration parameters) prior
+    to issuing the migrate\_incoming to allow the migration to begin.
+ERST
+
+DEF("only-migratable", 0, QEMU_OPTION_only_migratable, \
+    "-only-migratable     allow only migratable devices\n", QEMU_ARCH_ALL)
+SRST
+``-only-migratable``
+    Only allow migratable devices. Devices will not be allowed to enter
+    an unmigratable state.
+ERST
+
+DEF("nodefaults", 0, QEMU_OPTION_nodefaults, \
+    "-nodefaults     don't create default devices\n", QEMU_ARCH_ALL)
+SRST
+``-nodefaults``
+    Don't create default devices. Normally, QEMU sets the default
+    devices like serial port, parallel port, virtual console, monitor
+    device, VGA adapter, floppy and CD-ROM drive and others. The
+    ``-nodefaults`` option will disable all those default devices.
+ERST
+
+#ifndef _WIN32
+DEF("chroot", HAS_ARG, QEMU_OPTION_chroot, \
+    "-chroot dir     chroot to dir just before starting the VM\n",
+    QEMU_ARCH_ALL)
+#endif
+SRST
+``-chroot dir``
+    Immediately before starting guest execution, chroot to the specified
+    directory. Especially useful in combination with -runas.
+ERST
+
+#ifndef _WIN32
+DEF("runas", HAS_ARG, QEMU_OPTION_runas, \
+    "-runas user     change to user id user just before starting the VM\n" \
+    "                user can be numeric uid:gid instead\n",
+    QEMU_ARCH_ALL)
+#endif
+SRST
+``-runas user``
+    Immediately before starting guest execution, drop root privileges,
+    switching to the specified user.
+ERST
+
+DEF("prom-env", HAS_ARG, QEMU_OPTION_prom_env,
+    "-prom-env variable=value\n"
+    "                set OpenBIOS nvram variables\n",
+    QEMU_ARCH_PPC | QEMU_ARCH_SPARC)
+SRST
+``-prom-env variable=value``
+    Set OpenBIOS nvram variable to given value (PPC, SPARC only).
+
+    ::
+
+        qemu-system-sparc -prom-env 'auto-boot?=false' \
+         -prom-env 'boot-device=sd(0,2,0):d' -prom-env 'boot-args=linux single'
+
+    ::
+
+        qemu-system-ppc -prom-env 'auto-boot?=false' \
+         -prom-env 'boot-device=hd:2,\yaboot' \
+         -prom-env 'boot-args=conf=hd:2,\yaboot.conf'
+ERST
+DEF("semihosting", 0, QEMU_OPTION_semihosting,
+    "-semihosting    semihosting mode\n",
+    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+    QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2)
+SRST
+``-semihosting``
+    Enable semihosting mode (ARM, M68K, Xtensa, MIPS, Nios II only).
+
+    Note that this allows guest direct access to the host filesystem, so
+    should only be used with a trusted guest OS.
+
+    See the -semihosting-config option documentation for further
+    information about the facilities this enables.
+ERST
+DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
+    "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]\n" \
+    "                semihosting configuration\n",
+QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2)
+SRST
+``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]``
+    Enable and configure semihosting (ARM, M68K, Xtensa, MIPS, Nios II
+    only).
+
+    Note that this allows guest direct access to the host filesystem, so
+    should only be used with a trusted guest OS.
+
+    On Arm this implements the standard semihosting API, version 2.0.
+
+    On M68K this implements the "ColdFire GDB" interface used by
+    libgloss.
+
+    Xtensa semihosting provides basic file IO calls, such as
+    open/read/write/seek/select. Tensilica baremetal libc for ISS and
+    linux platform "sim" use this interface.
+
+    ``target=native|gdb|auto``
+        Defines where the semihosting calls will be addressed, to QEMU
+        (``native``) or to GDB (``gdb``). The default is ``auto``, which
+        means ``gdb`` during debug sessions and ``native`` otherwise.
+
+    ``chardev=str1``
+        Send the output to a chardev backend output for native or auto
+        output when not in gdb
+
+    ``arg=str1,arg=str2,...``
+        Allows the user to pass input arguments, and can be used
+        multiple times to build up a list. The old-style
+        ``-kernel``/``-append`` method of passing a command line is
+        still supported for backward compatibility. If both the
+        ``--semihosting-config arg`` and the ``-kernel``/``-append`` are
+        specified, the former is passed to semihosting as it always
+        takes precedence.
+ERST
+DEF("old-param", 0, QEMU_OPTION_old_param,
+    "-old-param      old param mode\n", QEMU_ARCH_ARM)
+SRST
+``-old-param``
+    Old param mode (ARM only).
+ERST
+
+DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
+    "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
+    "          [,spawn=allow|deny][,resourcecontrol=allow|deny]\n" \
+    "                Enable seccomp mode 2 system call filter (default 'off').\n" \
+    "                use 'obsolete' to allow obsolete system calls that are provided\n" \
+    "                    by the kernel, but typically no longer used by modern\n" \
+    "                    C library implementations.\n" \
+    "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
+    "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
+    "                    The value 'children' will deny set*uid|gid system calls for\n" \
+    "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
+    "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
+    "                     blacklisting *fork and execve\n" \
+    "                use 'resourcecontrol' to disable process affinity and schedular priority\n",
+    QEMU_ARCH_ALL)
+SRST
+``-sandbox arg[,obsolete=string][,elevateprivileges=string][,spawn=string][,resourcecontrol=string]``
+    Enable Seccomp mode 2 system call filter. 'on' will enable syscall
+    filtering and 'off' will disable it. The default is 'off'.
+
+    ``obsolete=string``
+        Enable Obsolete system calls
+
+    ``elevateprivileges=string``
+        Disable set\*uid\|gid system calls
+
+    ``spawn=string``
+        Disable \*fork and execve
+
+    ``resourcecontrol=string``
+        Disable process affinity and schedular priority
+ERST
+
+DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
+    "-readconfig \n", QEMU_ARCH_ALL)
+SRST
+``-readconfig file``
+    Read device configuration from file. This approach is useful when
+    you want to spawn QEMU process with many command line options but
+    you don't want to exceed the command line character limit.
+ERST
+DEF("writeconfig", HAS_ARG, QEMU_OPTION_writeconfig,
+    "-writeconfig \n"
+    "                read/write config file\n", QEMU_ARCH_ALL)
+SRST
+``-writeconfig file``
+    Write device configuration to file. The file can be either filename
+    to save command line and device configuration into file or dash
+    ``-``) character to print the output to stdout. This can be later
+    used as input file for ``-readconfig`` option.
+ERST
+
+DEF("no-user-config", 0, QEMU_OPTION_nouserconfig,
+    "-no-user-config\n"
+    "                do not load default user-provided config files at startup\n",
+    QEMU_ARCH_ALL)
+SRST
+``-no-user-config``
+    The ``-no-user-config`` option makes QEMU not load any of the
+    user-provided config files on sysconfdir.
+ERST
+
+DEF("trace", HAS_ARG, QEMU_OPTION_trace,
+    "-trace [[enable=]][,events=][,file=]\n"
+    "                specify tracing options\n",
+    QEMU_ARCH_ALL)
+SRST
+``-trace [[enable=]pattern][,events=file][,file=file]``
+  .. include:: ../qemu-option-trace.rst.inc
+
+ERST
+DEF("plugin", HAS_ARG, QEMU_OPTION_plugin,
+    "-plugin [file=][,arg=]\n"
+    "                load a plugin\n",
+    QEMU_ARCH_ALL)
+SRST
+``-plugin file=file[,arg=string]``
+    Load a plugin.
+
+    ``file=file``
+        Load the given plugin from a shared library file.
+
+    ``arg=string``
+        Argument string passed to the plugin. (Can be given multiple
+        times.)
+ERST
+
+HXCOMM Internal use
+DEF("qtest", HAS_ARG, QEMU_OPTION_qtest, "", QEMU_ARCH_ALL)
+DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL)
+
+#ifdef __linux__
+DEF("enable-fips", 0, QEMU_OPTION_enablefips,
+    "-enable-fips    enable FIPS 140-2 compliance\n",
+    QEMU_ARCH_ALL)
+#endif
+SRST
+``-enable-fips``
+    Enable FIPS 140-2 compliance mode.
+ERST
+
+DEF("msg", HAS_ARG, QEMU_OPTION_msg,
+    "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n"
+    "                control error message format\n"
+    "                timestamp=on enables timestamps (default: off)\n"
+    "                guest-name=on enables guest name prefix but only if\n"
+    "                              -name guest option is set (default: off)\n",
+    QEMU_ARCH_ALL)
+SRST
+``-msg [timestamp[=on|off]][,guest-name[=on|off]]``
+    Control error message format.
+
+    ``timestamp=on|off``
+        Prefix messages with a timestamp. Default is off.
+
+    ``guest-name=on|off``
+        Prefix messages with guest name but only if -name guest option is set
+        otherwise the option is ignored. Default is off.
+ERST
+
+DEF("dump-vmstate", HAS_ARG, QEMU_OPTION_dump_vmstate,
+    "-dump-vmstate \n"
+    "                Output vmstate information in JSON format to file.\n"
+    "                Use the scripts/vmstate-static-checker.py file to\n"
+    "                check for possible regressions in migration code\n"
+    "                by comparing two such vmstate dumps.\n",
+    QEMU_ARCH_ALL)
+SRST
+``-dump-vmstate file``
+    Dump json-encoded vmstate information for current machine type to
+    file in file
+ERST
+
+DEF("enable-sync-profile", 0, QEMU_OPTION_enable_sync_profile,
+    "-enable-sync-profile\n"
+    "                enable synchronization profiling\n",
+    QEMU_ARCH_ALL)
+SRST
+``-enable-sync-profile``
+    Enable synchronization profiling.
+ERST
+
+DEFHEADING()
+
+DEFHEADING(Generic object creation:)
+
+DEF("object", HAS_ARG, QEMU_OPTION_object,
+    "-object TYPENAME[,PROP1=VALUE1,...]\n"
+    "                create a new object of type TYPENAME setting properties\n"
+    "                in the order they are specified.  Note that the 'id'\n"
+    "                property must be set.  These objects are placed in the\n"
+    "                '/objects' path.\n",
+    QEMU_ARCH_ALL)
+SRST
+``-object typename[,prop1=value1,...]``
+    Create a new object of type typename setting properties in the order
+    they are specified. Note that the 'id' property must be set. These
+    objects are placed in the '/objects' path.
+
+    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align``
+        Creates a memory file backend object, which can be used to back
+        the guest RAM with huge pages.
+
+        The ``id`` parameter is a unique ID that will be used to
+        reference this memory region when configuring the ``-numa``
+        argument.
+
+        The ``size`` option provides the size of the memory region, and
+        accepts common suffixes, eg ``500M``.
+
+        The ``mem-path`` provides the path to either a shared memory or
+        huge page filesystem mount.
+
+        The ``share`` boolean option determines whether the memory
+        region is marked as private to QEMU, or shared. The latter
+        allows a co-operating external process to access the QEMU memory
+        region.
+
+        The ``share`` is also required for pvrdma devices due to
+        limitations in the RDMA API provided by Linux.
+
+        Setting share=on might affect the ability to configure NUMA
+        bindings for the memory backend under some circumstances, see
+        Documentation/vm/numa\_memory\_policy.txt on the Linux kernel
+        source tree for additional details.
+
+        Setting the ``discard-data`` boolean option to on indicates that
+        file contents can be destroyed when QEMU exits, to avoid
+        unnecessarily flushing data to the backing file. Note that
+        ``discard-data`` is only an optimization, and QEMU might not
+        discard file contents if it aborts unexpectedly or is terminated
+        using SIGKILL.
+
+        The ``merge`` boolean option enables memory merge, also known as
+        MADV\_MERGEABLE, so that Kernel Samepage Merging will consider
+        the pages for memory deduplication.
+
+        Setting the ``dump`` boolean option to off excludes the memory
+        from core dumps. This feature is also known as MADV\_DONTDUMP.
+
+        The ``prealloc`` boolean option enables memory preallocation.
+
+        The ``host-nodes`` option binds the memory range to a list of
+        NUMA host nodes.
+
+        The ``policy`` option sets the NUMA policy to one of the
+        following values:
+
+        ``default``
+            default host policy
+
+        ``preferred``
+            prefer the given host node list for allocation
+
+        ``bind``
+            restrict memory allocation to the given host node list
+
+        ``interleave``
+            interleave memory allocations across the given host node
+            list
+
+        The ``align`` option specifies the base address alignment when
+        QEMU mmap(2) ``mem-path``, and accepts common suffixes, eg
+        ``2M``. Some backend store specified by ``mem-path`` requires an
+        alignment different than the default one used by QEMU, eg the
+        device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
+        such cases, users can specify the required alignment via this
+        option.
+
+        The ``pmem`` option specifies whether the backing file specified
+        by ``mem-path`` is in host persistent memory that can be
+        accessed using the SNIA NVM programming model (e.g. Intel
+        NVDIMM). If ``pmem`` is set to 'on', QEMU will take necessary
+        operations to guarantee the persistence of its own writes to
+        ``mem-path`` (e.g. in vNVDIMM label emulation and live
+        migration). Also, we will map the backend-file with MAP\_SYNC
+        flag, which ensures the file metadata is in sync for
+        ``mem-path`` in case of host crash or a power failure. MAP\_SYNC
+        requires support from both the host kernel (since Linux kernel
+        4.15) and the filesystem of ``mem-path`` mounted with DAX
+        option.
+
+    ``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
+        Creates a memory backend object, which can be used to back the
+        guest RAM. Memory backend objects offer more control than the
+        ``-m`` option that is traditionally used to define guest RAM.
+        Please refer to ``memory-backend-file`` for a description of the
+        options.
+
+    ``-object memory-backend-memfd,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave,seal=on|off,hugetlb=on|off,hugetlbsize=size``
+        Creates an anonymous memory file backend object, which allows
+        QEMU to share the memory with an external process (e.g. when
+        using vhost-user). The memory is allocated with memfd and
+        optional sealing. (Linux only)
+
+        The ``seal`` option creates a sealed-file, that will block
+        further resizing the memory ('on' by default).
+
+        The ``hugetlb`` option specify the file to be created resides in
+        the hugetlbfs filesystem (since Linux 4.14). Used in conjunction
+        with the ``hugetlb`` option, the ``hugetlbsize`` option specify
+        the hugetlb page size on systems that support multiple hugetlb
+        page sizes (it must be a power of 2 value supported by the
+        system).
+
+        In some versions of Linux, the ``hugetlb`` option is
+        incompatible with the ``seal`` option (requires at least Linux
+        4.16).
+
+        Please refer to ``memory-backend-file`` for a description of the
+        other options.
+
+        The ``share`` boolean option is on by default with memfd.
+
+    ``-object rng-builtin,id=id``
+        Creates a random number generator backend which obtains entropy
+        from QEMU builtin functions. The ``id`` parameter is a unique ID
+        that will be used to reference this entropy backend from the
+        ``virtio-rng`` device. By default, the ``virtio-rng`` device
+        uses this RNG backend.
+
+    ``-object rng-random,id=id,filename=/dev/random``
+        Creates a random number generator backend which obtains entropy
+        from a device on the host. The ``id`` parameter is a unique ID
+        that will be used to reference this entropy backend from the
+        ``virtio-rng`` device. The ``filename`` parameter specifies
+        which file to obtain entropy from and if omitted defaults to
+        ``/dev/urandom``.
+
+    ``-object rng-egd,id=id,chardev=chardevid``
+        Creates a random number generator backend which obtains entropy
+        from an external daemon running on the host. The ``id``
+        parameter is a unique ID that will be used to reference this
+        entropy backend from the ``virtio-rng`` device. The ``chardev``
+        parameter is the unique ID of a character device backend that
+        provides the connection to the RNG daemon.
+
+    ``-object tls-creds-anon,id=id,endpoint=endpoint,dir=/path/to/cred/dir,verify-peer=on|off``
+        Creates a TLS anonymous credentials object, which can be used to
+        provide TLS support on network backends. The ``id`` parameter is
+        a unique ID which network backends will use to access the
+        credentials. The ``endpoint`` is either ``server`` or ``client``
+        depending on whether the QEMU network backend that uses the
+        credentials will be acting as a client or as a server. If
+        ``verify-peer`` is enabled (the default) then once the handshake
+        is completed, the peer credentials will be verified, though this
+        is a no-op for anonymous credentials.
+
+        The dir parameter tells QEMU where to find the credential files.
+        For server endpoints, this directory may contain a file
+        dh-params.pem providing diffie-hellman parameters to use for the
+        TLS server. If the file is missing, QEMU will generate a set of
+        DH parameters at startup. This is a computationally expensive
+        operation that consumes random pool entropy, so it is
+        recommended that a persistent set of parameters be generated
+        upfront and saved.
+
+    ``-object tls-creds-psk,id=id,endpoint=endpoint,dir=/path/to/keys/dir[,username=username]``
+        Creates a TLS Pre-Shared Keys (PSK) credentials object, which
+        can be used to provide TLS support on network backends. The
+        ``id`` parameter is a unique ID which network backends will use
+        to access the credentials. The ``endpoint`` is either ``server``
+        or ``client`` depending on whether the QEMU network backend that
+        uses the credentials will be acting as a client or as a server.
+        For clients only, ``username`` is the username which will be
+        sent to the server. If omitted it defaults to "qemu".
+
+        The dir parameter tells QEMU where to find the keys file. It is
+        called "dir/keys.psk" and contains "username:key" pairs. This
+        file can most easily be created using the GnuTLS ``psktool``
+        program.
+
+        For server endpoints, dir may also contain a file dh-params.pem
+        providing diffie-hellman parameters to use for the TLS server.
+        If the file is missing, QEMU will generate a set of DH
+        parameters at startup. This is a computationally expensive
+        operation that consumes random pool entropy, so it is
+        recommended that a persistent set of parameters be generated up
+        front and saved.
+
+    ``-object tls-creds-x509,id=id,endpoint=endpoint,dir=/path/to/cred/dir,priority=priority,verify-peer=on|off,passwordid=id``
+        Creates a TLS anonymous credentials object, which can be used to
+        provide TLS support on network backends. The ``id`` parameter is
+        a unique ID which network backends will use to access the
+        credentials. The ``endpoint`` is either ``server`` or ``client``
+        depending on whether the QEMU network backend that uses the
+        credentials will be acting as a client or as a server. If
+        ``verify-peer`` is enabled (the default) then once the handshake
+        is completed, the peer credentials will be verified. With x509
+        certificates, this implies that the clients must be provided
+        with valid client certificates too.
+
+        The dir parameter tells QEMU where to find the credential files.
+        For server endpoints, this directory may contain a file
+        dh-params.pem providing diffie-hellman parameters to use for the
+        TLS server. If the file is missing, QEMU will generate a set of
+        DH parameters at startup. This is a computationally expensive
+        operation that consumes random pool entropy, so it is
+        recommended that a persistent set of parameters be generated
+        upfront and saved.
+
+        For x509 certificate credentials the directory will contain
+        further files providing the x509 certificates. The certificates
+        must be stored in PEM format, in filenames ca-cert.pem,
+        ca-crl.pem (optional), server-cert.pem (only servers),
+        server-key.pem (only servers), client-cert.pem (only clients),
+        and client-key.pem (only clients).
+
+        For the server-key.pem and client-key.pem files which contain
+        sensitive private keys, it is possible to use an encrypted
+        version by providing the passwordid parameter. This provides the
+        ID of a previously created ``secret`` object containing the
+        password for decryption.
+
+        The priority parameter allows to override the global default
+        priority used by gnutls. This can be useful if the system
+        administrator needs to use a weaker set of crypto priorities for
+        QEMU without potentially forcing the weakness onto all
+        applications. Or conversely if one wants wants a stronger
+        default for QEMU than for all other applications, they can do
+        this through this parameter. Its format is a gnutls priority
+        string as described at
+        https://gnutls.org/manual/html_node/Priority-Strings.html.
+
+    ``-object tls-cipher-suites,id=id,priority=priority``
+        Creates a TLS cipher suites object, which can be used to control
+        the TLS cipher/protocol algorithms that applications are permitted
+        to use.
+
+        The ``id`` parameter is a unique ID which frontends will use to
+        access the ordered list of permitted TLS cipher suites from the
+        host.
+
+        The ``priority`` parameter allows to override the global default
+        priority used by gnutls. This can be useful if the system
+        administrator needs to use a weaker set of crypto priorities for
+        QEMU without potentially forcing the weakness onto all
+        applications. Or conversely if one wants wants a stronger
+        default for QEMU than for all other applications, they can do
+        this through this parameter. Its format is a gnutls priority
+        string as described at
+        https://gnutls.org/manual/html_node/Priority-Strings.html.
+
+        An example of use of this object is to control UEFI HTTPS Boot.
+        The tls-cipher-suites object exposes the ordered list of permitted
+        TLS cipher suites from the host side to the guest firmware, via
+        fw_cfg. The list is represented as an array of IANA_TLS_CIPHER
+        objects. The firmware uses the IANA_TLS_CIPHER array for configuring
+        guest-side TLS.
+
+        In the following example, the priority at which the host-side policy
+        is retrieved is given by the ``priority`` property.
+        Given that QEMU uses GNUTLS, ``priority=@SYSTEM`` may be used to
+        refer to /etc/crypto-policies/back-ends/gnutls.config.
+
+        .. parsed-literal::
+
+             # |qemu_system| \\
+                 -object tls-cipher-suites,id=mysuite0,priority=@SYSTEM \\
+                 -fw_cfg name=etc/edk2/https/ciphers,gen_id=mysuite0
+
+    ``-object filter-buffer,id=id,netdev=netdevid,interval=t[,queue=all|rx|tx][,status=on|off][,position=head|tail|id=][,insert=behind|before]``
+        Interval t can't be 0, this filter batches the packet delivery:
+        all packets arriving in a given interval on netdev netdevid are
+        delayed until the end of the interval. Interval is in
+        microseconds. ``status`` is optional that indicate whether the
+        netfilter is on (enabled) or off (disabled), the default status
+        for netfilter will be 'on'.
+
+        queue all\|rx\|tx is an option that can be applied to any
+        netfilter.
+
+        ``all``: the filter is attached both to the receive and the
+        transmit queue of the netdev (default).
+
+        ``rx``: the filter is attached to the receive queue of the
+        netdev, where it will receive packets sent to the netdev.
+
+        ``tx``: the filter is attached to the transmit queue of the
+        netdev, where it will receive packets sent by the netdev.
+
+        position head\|tail\|id= is an option to specify where the
+        filter should be inserted in the filter list. It can be applied
+        to any netfilter.
+
+        ``head``: the filter is inserted at the head of the filter list,
+        before any existing filters.
+
+        ``tail``: the filter is inserted at the tail of the filter list,
+        behind any existing filters (default).
+
+        ``id=``: the filter is inserted before or behind the filter
+        specified by , see the insert option below.
+
+        insert behind\|before is an option to specify where to insert
+        the new filter relative to the one specified with
+        position=id=. It can be applied to any netfilter.
+
+        ``before``: insert before the specified filter.
+
+        ``behind``: insert behind the specified filter (default).
+
+    ``-object filter-mirror,id=id,netdev=netdevid,outdev=chardevid,queue=all|rx|tx[,vnet_hdr_support][,position=head|tail|id=][,insert=behind|before]``
+        filter-mirror on netdev netdevid,mirror net packet to
+        chardevchardevid, if it has the vnet\_hdr\_support flag,
+        filter-mirror will mirror packet with vnet\_hdr\_len.
+
+    ``-object filter-redirector,id=id,netdev=netdevid,indev=chardevid,outdev=chardevid,queue=all|rx|tx[,vnet_hdr_support][,position=head|tail|id=][,insert=behind|before]``
+        filter-redirector on netdev netdevid,redirect filter's net
+        packet to chardev chardevid,and redirect indev's packet to
+        filter.if it has the vnet\_hdr\_support flag, filter-redirector
+        will redirect packet with vnet\_hdr\_len. Create a
+        filter-redirector we need to differ outdev id from indev id, id
+        can not be the same. we can just use indev or outdev, but at
+        least one of indev or outdev need to be specified.
+
+    ``-object filter-rewriter,id=id,netdev=netdevid,queue=all|rx|tx,[vnet_hdr_support][,position=head|tail|id=][,insert=behind|before]``
+        Filter-rewriter is a part of COLO project.It will rewrite tcp
+        packet to secondary from primary to keep secondary tcp
+        connection,and rewrite tcp packet to primary from secondary make
+        tcp packet can be handled by client.if it has the
+        vnet\_hdr\_support flag, we can parse packet with vnet header.
+
+        usage: colo secondary: -object
+        filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 -object
+        filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 -object
+        filter-rewriter,id=rew0,netdev=hn0,queue=all
+
+    ``-object filter-dump,id=id,netdev=dev[,file=filename][,maxlen=len][,position=head|tail|id=][,insert=behind|before]``
+        Dump the network traffic on netdev dev to the file specified by
+        filename. At most len bytes (64k by default) per packet are
+        stored. The file format is libpcap, so it can be analyzed with
+        tools such as tcpdump or Wireshark.
+
+    ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}][,max_queue_size=@var{size}]``
+        Colo-compare gets packet from primary\_in chardevid and
+        secondary\_in, then compare whether the payload of primary packet
+        and secondary packet are the same. If same, it will output
+        primary packet to out\_dev, else it will notify COLO-framework to do
+        checkpoint and send primary packet to out\_dev. In order to
+        improve efficiency, we need to put the task of comparison in
+        another iothread. If it has the vnet\_hdr\_support flag,
+        colo compare will send/recv packet with vnet\_hdr\_len.
+        The compare\_timeout=@var{ms} determines the maximum time of the
+        colo-compare hold the packet. The expired\_scan\_cycle=@var{ms}
+        is to set the period of scanning expired primary node network packets.
+        The max\_queue\_size=@var{size} is to set the max compare queue
+        size depend on user environment.
+        If user want to use Xen COLO, need to add the notify\_dev to
+        notify Xen colo-frame to do checkpoint.
+
+        COLO-compare must be used with the help of filter-mirror,
+        filter-redirector and filter-rewriter.
+
+        ::
+
+            KVM COLO
+
+            primary:
+            -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+            -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+            -chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+            -chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+            -object iothread,id=iothread1
+            -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
+            -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
+            -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
+            -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,iothread=iothread1
+
+            secondary:
+            -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+            -device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+            -chardev socket,id=red0,host=3.3.3.3,port=9003
+            -chardev socket,id=red1,host=3.3.3.3,port=9004
+            -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
+            -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+
+
+            Xen COLO
+
+            primary:
+            -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+            -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+            -chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+            -chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+            -chardev socket,id=notify_way,host=3.3.3.3,port=9009,server,nowait
+            -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
+            -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
+            -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
+            -object iothread,id=iothread1
+            -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,notify_dev=nofity_way,iothread=iothread1
+
+            secondary:
+            -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+            -device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+            -chardev socket,id=red0,host=3.3.3.3,port=9003
+            -chardev socket,id=red1,host=3.3.3.3,port=9004
+            -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
+            -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+
+        If you want to know the detail of above command line, you can
+        read the colo-compare git log.
+
+    ``-object cryptodev-backend-builtin,id=id[,queues=queues]``
+        Creates a cryptodev backend which executes crypto opreation from
+        the QEMU cipher APIS. The id parameter is a unique ID that will
+        be used to reference this cryptodev backend from the
+        ``virtio-crypto`` device. The queues parameter is optional,
+        which specify the queue number of cryptodev backend, the default
+        of queues is 1.
+
+        .. parsed-literal::
+
+             # |qemu_system| \\
+               [...] \\
+                   -object cryptodev-backend-builtin,id=cryptodev0 \\
+                   -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \\
+               [...]
+
+    ``-object cryptodev-vhost-user,id=id,chardev=chardevid[,queues=queues]``
+        Creates a vhost-user cryptodev backend, backed by a chardev
+        chardevid. The id parameter is a unique ID that will be used to
+        reference this cryptodev backend from the ``virtio-crypto``
+        device. The chardev should be a unix domain socket backed one.
+        The vhost-user uses a specifically defined protocol to pass
+        vhost ioctl replacement messages to an application on the other
+        end of the socket. The queues parameter is optional, which
+        specify the queue number of cryptodev backend for multiqueue
+        vhost-user, the default of queues is 1.
+
+        .. parsed-literal::
+
+             # |qemu_system| \\
+               [...] \\
+                   -chardev socket,id=chardev0,path=/path/to/socket \\
+                   -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \\
+                   -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \\
+               [...]
+
+    ``-object secret,id=id,data=string,format=raw|base64[,keyid=secretid,iv=string]``
+      \ 
+    ``-object secret,id=id,file=filename,format=raw|base64[,keyid=secretid,iv=string]``
+        Defines a secret to store a password, encryption key, or some
+        other sensitive data. The sensitive data can either be passed
+        directly via the data parameter, or indirectly via the file
+        parameter. Using the data parameter is insecure unless the
+        sensitive data is encrypted.
+
+        The sensitive data can be provided in raw format (the default),
+        or base64. When encoded as JSON, the raw format only supports
+        valid UTF-8 characters, so base64 is recommended for sending
+        binary data. QEMU will convert from which ever format is
+        provided to the format it needs internally. eg, an RBD password
+        can be provided in raw format, even though it will be base64
+        encoded when passed onto the RBD sever.
+
+        For added protection, it is possible to encrypt the data
+        associated with a secret using the AES-256-CBC cipher. Use of
+        encryption is indicated by providing the keyid and iv
+        parameters. The keyid parameter provides the ID of a previously
+        defined secret that contains the AES-256 decryption key. This
+        key should be 32-bytes long and be base64 encoded. The iv
+        parameter provides the random initialization vector used for
+        encryption of this particular secret and should be a base64
+        encrypted string of the 16-byte IV.
+
+        The simplest (insecure) usage is to provide the secret inline
+
+        .. parsed-literal::
+
+             # |qemu_system| -object secret,id=sec0,data=letmein,format=raw
+
+        The simplest secure usage is to provide the secret via a file
+
+        # printf "letmein" > mypasswd.txt # QEMU\_SYSTEM\_MACRO -object
+        secret,id=sec0,file=mypasswd.txt,format=raw
+
+        For greater security, AES-256-CBC should be used. To illustrate
+        usage, consider the openssl command line tool which can encrypt
+        the data. Note that when encrypting, the plaintext must be
+        padded to the cipher block size (32 bytes) using the standard
+        PKCS#5/6 compatible padding algorithm.
+
+        First a master key needs to be created in base64 encoding:
+
+        ::
+
+             # openssl rand -base64 32 > key.b64
+             # KEY=$(base64 -d key.b64 | hexdump  -v -e '/1 "%02X"')
+
+        Each secret to be encrypted needs to have a random
+        initialization vector generated. These do not need to be kept
+        secret
+
+        ::
+
+             # openssl rand -base64 16 > iv.b64
+             # IV=$(base64 -d iv.b64 | hexdump  -v -e '/1 "%02X"')
+
+        The secret to be defined can now be encrypted, in this case
+        we're telling openssl to base64 encode the result, but it could
+        be left as raw bytes if desired.
+
+        ::
+
+             # SECRET=$(printf "letmein" |
+                        openssl enc -aes-256-cbc -a -K $KEY -iv $IV)
+
+        When launching QEMU, create a master secret pointing to
+        ``key.b64`` and specify that to be used to decrypt the user
+        password. Pass the contents of ``iv.b64`` to the second secret
+
+        .. parsed-literal::
+
+             # |qemu_system| \\
+                 -object secret,id=secmaster0,format=base64,file=key.b64 \\
+                 -object secret,id=sec0,keyid=secmaster0,format=base64,\\
+                     data=$SECRET,iv=$(key);
+        } else {
+            key = entry->key;
+            new_key = NULL;
+        }
+
+        /*
+         * Flatten non-empty QDict and QList recursively into @target,
+         * copy other objects to @target.
+         * On the root level (if @qdict == @target), remove flattened
+         * nested QDicts and QLists from @qdict.
+         *
+         * (Note that we do not need to remove entries from nested
+         * dicts or lists.  Their reference count is decremented on
+         * the root level, so there are no leaks.  In fact, if they
+         * have a reference count greater than one, we are probably
+         * well advised not to modify them altogether.)
+         */
+        if (dict_val && qdict_size(dict_val)) {
+            qdict_flatten_qdict(dict_val, target, key);
+            if (target == qdict) {
+                qdict_del(qdict, entry->key);
+            }
+        } else if (list_val && !qlist_empty(list_val)) {
+            qdict_flatten_qlist(list_val, target, key);
+            if (target == qdict) {
+                qdict_del(qdict, entry->key);
+            }
+        } else if (target != qdict) {
+            qdict_put_obj(target, key, qobject_ref(value));
+        }
+
+        g_free(new_key);
+        entry = next;
+    }
+}
+
+/**
+ * qdict_flatten(): For each nested non-empty QDict with key x, all
+ * fields with key y are moved to this QDict and their key is renamed
+ * to "x.y". For each nested non-empty QList with key x, the field at
+ * index y is moved to this QDict with the key "x.y" (i.e., the
+ * reverse of what qdict_array_split() does).
+ * This operation is applied recursively for nested QDicts and QLists.
+ */
+void qdict_flatten(QDict *qdict)
+{
+    qdict_flatten_qdict(qdict, qdict, NULL);
+}
+
+/* extract all the src QDict entries starting by start into dst.
+ * If dst is NULL then the entries are simply removed from src. */
+void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start)
+
+{
+    const QDictEntry *entry, *next;
+    const char *p;
+
+    if (dst) {
+        *dst = qdict_new();
+    }
+    entry = qdict_first(src);
+
+    while (entry != NULL) {
+        next = qdict_next(src, entry);
+        if (strstart(entry->key, start, &p)) {
+            if (dst) {
+                qdict_put_obj(*dst, p, qobject_ref(entry->value));
+            }
+            qdict_del(src, entry->key);
+        }
+        entry = next;
+    }
+}
+
+static int qdict_count_prefixed_entries(const QDict *src, const char *start)
+{
+    const QDictEntry *entry;
+    int count = 0;
+
+    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
+        if (strstart(entry->key, start, NULL)) {
+            if (count == INT_MAX) {
+                return -ERANGE;
+            }
+            count++;
+        }
+    }
+
+    return count;
+}
+
+/**
+ * qdict_array_split(): This function moves array-like elements of a QDict into
+ * a new QList. Every entry in the original QDict with a key "%u" or one
+ * prefixed "%u.", where %u designates an unsigned integer starting at 0 and
+ * incrementally counting up, will be moved to a new QDict at index %u in the
+ * output QList with the key prefix removed, if that prefix is "%u.". If the
+ * whole key is just "%u", the whole QObject will be moved unchanged without
+ * creating a new QDict. The function terminates when there is no entry in the
+ * QDict with a prefix directly (incrementally) following the last one; it also
+ * returns if there are both entries with "%u" and "%u." for the same index %u.
+ * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "4.y": 1, "o.o": 7, "2": 66}
+ *      (or {"1.x": 0, "4.y": 1, "0.a": 42, "o.o": 7, "0.b": 23, "2": 66})
+ *       => [{"a": 42, "b": 23}, {"x": 0}, 66]
+ *      and {"4.y": 1, "o.o": 7} (remainder of the old QDict)
+ */
+void qdict_array_split(QDict *src, QList **dst)
+{
+    unsigned i;
+
+    *dst = qlist_new();
+
+    for (i = 0; i < UINT_MAX; i++) {
+        QObject *subqobj;
+        bool is_subqdict;
+        QDict *subqdict;
+        char indexstr[32], prefix[32];
+        size_t snprintf_ret;
+
+        snprintf_ret = snprintf(indexstr, 32, "%u", i);
+        assert(snprintf_ret < 32);
+
+        subqobj = qdict_get(src, indexstr);
+
+        snprintf_ret = snprintf(prefix, 32, "%u.", i);
+        assert(snprintf_ret < 32);
+
+        /* Overflow is the same as positive non-zero results */
+        is_subqdict = qdict_count_prefixed_entries(src, prefix);
+
+        /*
+         * There may be either a single subordinate object (named
+         * "%u") or multiple objects (each with a key prefixed "%u."),
+         * but not both.
+         */
+        if (!subqobj == !is_subqdict) {
+            break;
+        }
+
+        if (is_subqdict) {
+            qdict_extract_subqdict(src, &subqdict, prefix);
+            assert(qdict_size(subqdict) > 0);
+        } else {
+            qobject_ref(subqobj);
+            qdict_del(src, indexstr);
+        }
+
+        qlist_append_obj(*dst, subqobj ?: QOBJECT(subqdict));
+    }
+}
+
+/**
+ * qdict_split_flat_key:
+ * @key: the key string to split
+ * @prefix: non-NULL pointer to hold extracted prefix
+ * @suffix: non-NULL pointer to remaining suffix
+ *
+ * Given a flattened key such as 'foo.0.bar', split it into two parts
+ * at the first '.' separator. Allows double dot ('..') to escape the
+ * normal separator.
+ *
+ * e.g.
+ *    'foo.0.bar' -> prefix='foo' and suffix='0.bar'
+ *    'foo..0.bar' -> prefix='foo.0' and suffix='bar'
+ *
+ * The '..' sequence will be unescaped in the returned 'prefix'
+ * string. The 'suffix' string will be left in escaped format, so it
+ * can be fed back into the qdict_split_flat_key() key as the input
+ * later.
+ *
+ * The caller is responsible for freeing the string returned in @prefix
+ * using g_free().
+ */
+static void qdict_split_flat_key(const char *key, char **prefix,
+                                 const char **suffix)
+{
+    const char *separator;
+    size_t i, j;
+
+    /* Find first '.' separator, but if there is a pair '..'
+     * that acts as an escape, so skip over '..' */
+    separator = NULL;
+    do {
+        if (separator) {
+            separator += 2;
+        } else {
+            separator = key;
+        }
+        separator = strchr(separator, '.');
+    } while (separator && separator[1] == '.');
+
+    if (separator) {
+        *prefix = g_strndup(key, separator - key);
+        *suffix = separator + 1;
+    } else {
+        *prefix = g_strdup(key);
+        *suffix = NULL;
+    }
+
+    /* Unescape the '..' sequence into '.' */
+    for (i = 0, j = 0; (*prefix)[i] != '\0'; i++, j++) {
+        if ((*prefix)[i] == '.') {
+            assert((*prefix)[i + 1] == '.');
+            i++;
+        }
+        (*prefix)[j] = (*prefix)[i];
+    }
+    (*prefix)[j] = '\0';
+}
+
+/**
+ * qdict_is_list:
+ * @maybe_list: dict to check if keys represent list elements.
+ *
+ * Determine whether all keys in @maybe_list are valid list elements.
+ * If @maybe_list is non-zero in length and all the keys look like
+ * valid list indexes, this will return 1. If @maybe_list is zero
+ * length or all keys are non-numeric then it will return 0 to indicate
+ * it is a normal qdict. If there is a mix of numeric and non-numeric
+ * keys, or the list indexes are non-contiguous, an error is reported.
+ *
+ * Returns: 1 if a valid list, 0 if a dict, -1 on error
+ */
+static int qdict_is_list(QDict *maybe_list, Error **errp)
+{
+    const QDictEntry *ent;
+    ssize_t len = 0;
+    ssize_t max = -1;
+    int is_list = -1;
+    int64_t val;
+
+    for (ent = qdict_first(maybe_list); ent != NULL;
+         ent = qdict_next(maybe_list, ent)) {
+        int is_index = !qemu_strtoi64(ent->key, NULL, 10, &val);
+
+        if (is_list == -1) {
+            is_list = is_index;
+        }
+
+        if (is_index != is_list) {
+            error_setg(errp, "Cannot mix list and non-list keys");
+            return -1;
+        }
+
+        if (is_index) {
+            len++;
+            if (val > max) {
+                max = val;
+            }
+        }
+    }
+
+    if (is_list == -1) {
+        assert(!qdict_size(maybe_list));
+        is_list = 0;
+    }
+
+    /* NB this isn't a perfect check - e.g. it won't catch
+     * a list containing '1', '+1', '01', '3', but that
+     * does not matter - we've still proved that the
+     * input is a list. It is up the caller to do a
+     * stricter check if desired */
+    if (len != (max + 1)) {
+        error_setg(errp, "List indices are not contiguous, "
+                   "saw %zd elements but %zd largest index",
+                   len, max);
+        return -1;
+    }
+
+    return is_list;
+}
+
+/**
+ * qdict_crumple:
+ * @src: the original flat dictionary (only scalar values) to crumple
+ *
+ * Takes a flat dictionary whose keys use '.' separator to indicate
+ * nesting, and values are scalars, empty dictionaries or empty lists,
+ * and crumples it into a nested structure.
+ *
+ * To include a literal '.' in a key name, it must be escaped as '..'
+ *
+ * For example, an input of:
+ *
+ * { 'foo.0.bar': 'one', 'foo.0.wizz': '1',
+ *   'foo.1.bar': 'two', 'foo.1.wizz': '2' }
+ *
+ * will result in an output of:
+ *
+ * {
+ *   'foo': [
+ *      { 'bar': 'one', 'wizz': '1' },
+ *      { 'bar': 'two', 'wizz': '2' }
+ *   ],
+ * }
+ *
+ * The following scenarios in the input dict will result in an
+ * error being returned:
+ *
+ *  - Any values in @src are non-scalar types
+ *  - If keys in @src imply that a particular level is both a
+ *    list and a dict. e.g., "foo.0.bar" and "foo.eek.bar".
+ *  - If keys in @src imply that a particular level is a list,
+ *    but the indices are non-contiguous. e.g. "foo.0.bar" and
+ *    "foo.2.bar" without any "foo.1.bar" present.
+ *  - If keys in @src represent list indexes, but are not in
+ *    the "%zu" format. e.g. "foo.+0.bar"
+ *
+ * Returns: either a QDict or QList for the nested data structure, or NULL
+ * on error
+ */
+QObject *qdict_crumple(const QDict *src, Error **errp)
+{
+    const QDictEntry *ent;
+    QDict *two_level, *multi_level = NULL, *child_dict;
+    QDict *dict_val;
+    QList *list_val;
+    QObject *dst = NULL, *child;
+    size_t i;
+    char *prefix = NULL;
+    const char *suffix = NULL;
+    int is_list;
+
+    two_level = qdict_new();
+
+    /* Step 1: split our totally flat dict into a two level dict */
+    for (ent = qdict_first(src); ent != NULL; ent = qdict_next(src, ent)) {
+        dict_val = qobject_to(QDict, ent->value);
+        list_val = qobject_to(QList, ent->value);
+        if ((dict_val && qdict_size(dict_val))
+            || (list_val && !qlist_empty(list_val))) {
+            error_setg(errp, "Value %s is not flat", ent->key);
+            goto error;
+        }
+
+        qdict_split_flat_key(ent->key, &prefix, &suffix);
+        child = qdict_get(two_level, prefix);
+        child_dict = qobject_to(QDict, child);
+
+        if (child) {
+            /*
+             * If @child_dict, then all previous keys with this prefix
+             * had a suffix.  If @suffix, this one has one as well,
+             * and we're good, else there's a clash.
+             */
+            if (!child_dict || !suffix) {
+                error_setg(errp, "Cannot mix scalar and non-scalar keys");
+                goto error;
+            }
+        }
+
+        if (suffix) {
+            if (!child_dict) {
+                child_dict = qdict_new();
+                qdict_put(two_level, prefix, child_dict);
+            }
+            qdict_put_obj(child_dict, suffix, qobject_ref(ent->value));
+        } else {
+            qdict_put_obj(two_level, prefix, qobject_ref(ent->value));
+        }
+
+        g_free(prefix);
+        prefix = NULL;
+    }
+
+    /* Step 2: optionally process the two level dict recursively
+     * into a multi-level dict */
+    multi_level = qdict_new();
+    for (ent = qdict_first(two_level); ent != NULL;
+         ent = qdict_next(two_level, ent)) {
+        dict_val = qobject_to(QDict, ent->value);
+        if (dict_val && qdict_size(dict_val)) {
+            child = qdict_crumple(dict_val, errp);
+            if (!child) {
+                goto error;
+            }
+
+            qdict_put_obj(multi_level, ent->key, child);
+        } else {
+            qdict_put_obj(multi_level, ent->key, qobject_ref(ent->value));
+        }
+    }
+    qobject_unref(two_level);
+    two_level = NULL;
+
+    /* Step 3: detect if we need to turn our dict into list */
+    is_list = qdict_is_list(multi_level, errp);
+    if (is_list < 0) {
+        goto error;
+    }
+
+    if (is_list) {
+        dst = QOBJECT(qlist_new());
+
+        for (i = 0; i < qdict_size(multi_level); i++) {
+            char *key = g_strdup_printf("%zu", i);
+
+            child = qdict_get(multi_level, key);
+            g_free(key);
+
+            if (!child) {
+                error_setg(errp, "Missing list index %zu", i);
+                goto error;
+            }
+
+            qlist_append_obj(qobject_to(QList, dst), qobject_ref(child));
+        }
+        qobject_unref(multi_level);
+        multi_level = NULL;
+    } else {
+        dst = QOBJECT(multi_level);
+    }
+
+    return dst;
+
+ error:
+    g_free(prefix);
+    qobject_unref(multi_level);
+    qobject_unref(two_level);
+    qobject_unref(dst);
+    return NULL;
+}
+
+/**
+ * qdict_crumple_for_keyval_qiv:
+ * @src: the flat dictionary (only scalar values) to crumple
+ * @errp: location to store error
+ *
+ * Like qdict_crumple(), but additionally transforms scalar values so
+ * the result can be passed to qobject_input_visitor_new_keyval().
+ *
+ * The block subsystem uses this function to prepare its flat QDict
+ * with possibly confused scalar types for a visit.  It should not be
+ * used for anything else, and it should go away once the block
+ * subsystem has been cleaned up.
+ */
+static QObject *qdict_crumple_for_keyval_qiv(QDict *src, Error **errp)
+{
+    QDict *tmp = NULL;
+    char *buf;
+    const char *s;
+    const QDictEntry *ent;
+    QObject *dst;
+
+    for (ent = qdict_first(src); ent; ent = qdict_next(src, ent)) {
+        buf = NULL;
+        switch (qobject_type(ent->value)) {
+        case QTYPE_QNULL:
+        case QTYPE_QSTRING:
+            continue;
+        case QTYPE_QNUM:
+            s = buf = qnum_to_string(qobject_to(QNum, ent->value));
+            break;
+        case QTYPE_QDICT:
+        case QTYPE_QLIST:
+            /* @src isn't flat; qdict_crumple() will fail */
+            continue;
+        case QTYPE_QBOOL:
+            s = qbool_get_bool(qobject_to(QBool, ent->value))
+                ? "on" : "off";
+            break;
+        default:
+            abort();
+        }
+
+        if (!tmp) {
+            tmp = qdict_clone_shallow(src);
+        }
+        qdict_put_str(tmp, ent->key, s);
+        g_free(buf);
+    }
+
+    dst = qdict_crumple(tmp ?: src, errp);
+    qobject_unref(tmp);
+    return dst;
+}
+
+/**
+ * qdict_array_entries(): Returns the number of direct array entries if the
+ * sub-QDict of src specified by the prefix in subqdict (or src itself for
+ * prefix == "") is valid as an array, i.e. the length of the created list if
+ * the sub-QDict would become empty after calling qdict_array_split() on it. If
+ * the array is not valid, -EINVAL is returned.
+ */
+int qdict_array_entries(QDict *src, const char *subqdict)
+{
+    const QDictEntry *entry;
+    unsigned i;
+    unsigned entries = 0;
+    size_t subqdict_len = strlen(subqdict);
+
+    assert(!subqdict_len || subqdict[subqdict_len - 1] == '.');
+
+    /* qdict_array_split() loops until UINT_MAX, but as we want to return
+     * negative errors, we only have a signed return value here. Any additional
+     * entries will lead to -EINVAL. */
+    for (i = 0; i < INT_MAX; i++) {
+        QObject *subqobj;
+        int subqdict_entries;
+        char *prefix = g_strdup_printf("%s%u.", subqdict, i);
+
+        subqdict_entries = qdict_count_prefixed_entries(src, prefix);
+
+        /* Remove ending "." */
+        prefix[strlen(prefix) - 1] = 0;
+        subqobj = qdict_get(src, prefix);
+
+        g_free(prefix);
+
+        if (subqdict_entries < 0) {
+            return subqdict_entries;
+        }
+
+        /* There may be either a single subordinate object (named "%u") or
+         * multiple objects (each with a key prefixed "%u."), but not both. */
+        if (subqobj && subqdict_entries) {
+            return -EINVAL;
+        } else if (!subqobj && !subqdict_entries) {
+            break;
+        }
+
+        entries += subqdict_entries ? subqdict_entries : 1;
+    }
+
+    /* Consider everything handled that isn't part of the given sub-QDict */
+    for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) {
+        if (!strstart(qdict_entry_key(entry), subqdict, NULL)) {
+            entries++;
+        }
+    }
+
+    /* Anything left in the sub-QDict that wasn't handled? */
+    if (qdict_size(src) != entries) {
+        return -EINVAL;
+    }
+
+    return i;
+}
+
+/**
+ * qdict_join(): Absorb the src QDict into the dest QDict, that is, move all
+ * elements from src to dest.
+ *
+ * If an element from src has a key already present in dest, it will not be
+ * moved unless overwrite is true.
+ *
+ * If overwrite is true, the conflicting values in dest will be discarded and
+ * replaced by the corresponding values from src.
+ *
+ * Therefore, with overwrite being true, the src QDict will always be empty when
+ * this function returns. If overwrite is false, the src QDict will be empty
+ * iff there were no conflicts.
+ */
+void qdict_join(QDict *dest, QDict *src, bool overwrite)
+{
+    const QDictEntry *entry, *next;
+
+    entry = qdict_first(src);
+    while (entry) {
+        next = qdict_next(src, entry);
+
+        if (overwrite || !qdict_haskey(dest, entry->key)) {
+            qdict_put_obj(dest, entry->key, qobject_ref(entry->value));
+            qdict_del(src, entry->key);
+        }
+
+        entry = next;
+    }
+}
+
+/**
+ * qdict_rename_keys(): Rename keys in qdict according to the replacements
+ * specified in the array renames. The array must be terminated by an entry
+ * with from = NULL.
+ *
+ * The renames are performed individually in the order of the array, so entries
+ * may be renamed multiple times and may or may not conflict depending on the
+ * order of the renames array.
+ *
+ * Returns true for success, false in error cases.
+ */
+bool qdict_rename_keys(QDict *qdict, const QDictRenames *renames, Error **errp)
+{
+    QObject *qobj;
+
+    while (renames->from) {
+        if (qdict_haskey(qdict, renames->from)) {
+            if (qdict_haskey(qdict, renames->to)) {
+                error_setg(errp, "'%s' and its alias '%s' can't be used at the "
+                           "same time", renames->to, renames->from);
+                return false;
+            }
+
+            qobj = qdict_get(qdict, renames->from);
+            qdict_put_obj(qdict, renames->to, qobject_ref(qobj));
+            qdict_del(qdict, renames->from);
+        }
+
+        renames++;
+    }
+    return true;
+}
+
+/*
+ * Create a QObject input visitor for flat @qdict with possibly
+ * confused scalar types.
+ *
+ * The block subsystem uses this function to visit its flat QDict with
+ * possibly confused scalar types.  It should not be used for anything
+ * else, and it should go away once the block subsystem has been
+ * cleaned up.
+ */
+Visitor *qobject_input_visitor_new_flat_confused(QDict *qdict,
+                                                 Error **errp)
+{
+    QObject *crumpled;
+    Visitor *v;
+
+    crumpled = qdict_crumple_for_keyval_qiv(qdict, errp);
+    if (!crumpled) {
+        return NULL;
+    }
+
+    v = qobject_input_visitor_new_keyval(crumpled);
+    qobject_unref(crumpled);
+    return v;
+}
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
new file mode 100644
index 000000000..632320d72
--- /dev/null
+++ b/qobject/json-lexer.c
@@ -0,0 +1,365 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "json-parser-int.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+
+/*
+ * From RFC 8259 "The JavaScript Object Notation (JSON) Data
+ * Interchange Format", with [comments in brackets]:
+ *
+ * The set of tokens includes six structural characters, strings,
+ * numbers, and three literal names.
+ *
+ * These are the six structural characters:
+ *
+ *    begin-array     = ws %x5B ws  ; [ left square bracket
+ *    begin-object    = ws %x7B ws  ; { left curly bracket
+ *    end-array       = ws %x5D ws  ; ] right square bracket
+ *    end-object      = ws %x7D ws  ; } right curly bracket
+ *    name-separator  = ws %x3A ws  ; : colon
+ *    value-separator = ws %x2C ws  ; , comma
+ *
+ * Insignificant whitespace is allowed before or after any of the six
+ * structural characters.
+ * [This lexer accepts it before or after any token, which is actually
+ * the same, as the grammar always has structural characters between
+ * other tokens.]
+ *
+ *    ws = *(
+ *           %x20 /              ; Space
+ *           %x09 /              ; Horizontal tab
+ *           %x0A /              ; Line feed or New line
+ *           %x0D )              ; Carriage return
+ *
+ * [...] three literal names:
+ *    false null true
+ *  [This lexer accepts [a-z]+, and leaves rejecting unknown literal
+ *  names to the parser.]
+ *
+ * [Numbers:]
+ *
+ *    number = [ minus ] int [ frac ] [ exp ]
+ *    decimal-point = %x2E       ; .
+ *    digit1-9 = %x31-39         ; 1-9
+ *    e = %x65 / %x45            ; e E
+ *    exp = e [ minus / plus ] 1*DIGIT
+ *    frac = decimal-point 1*DIGIT
+ *    int = zero / ( digit1-9 *DIGIT )
+ *    minus = %x2D               ; -
+ *    plus = %x2B                ; +
+ *    zero = %x30                ; 0
+ *
+ * [Strings:]
+ *    string = quotation-mark *char quotation-mark
+ *
+ *    char = unescaped /
+ *        escape (
+ *            %x22 /          ; "    quotation mark  U+0022
+ *            %x5C /          ; \    reverse solidus U+005C
+ *            %x2F /          ; /    solidus         U+002F
+ *            %x62 /          ; b    backspace       U+0008
+ *            %x66 /          ; f    form feed       U+000C
+ *            %x6E /          ; n    line feed       U+000A
+ *            %x72 /          ; r    carriage return U+000D
+ *            %x74 /          ; t    tab             U+0009
+ *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
+ *    escape = %x5C              ; \
+ *    quotation-mark = %x22      ; "
+ *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+ *    [This lexer accepts any non-control character after escape, and
+ *    leaves rejecting invalid ones to the parser.]
+ *
+ *
+ * Extensions over RFC 8259:
+ * - Extra escape sequence in strings:
+ *   0x27 (apostrophe) is recognized after escape, too
+ * - Single-quoted strings:
+ *   Like double-quoted strings, except they're delimited by %x27
+ *   (apostrophe) instead of %x22 (quotation mark), and can't contain
+ *   unescaped apostrophe, but can contain unescaped quotation mark.
+ * - Interpolation, if enabled:
+ *   The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid
+ *   ones to the parser.
+ *
+ * Note:
+ * - Input must be encoded in modified UTF-8.
+ * - Decoding and validating is left to the parser.
+ */
+
+enum json_lexer_state {
+    IN_RECOVERY = 1,
+    IN_DQ_STRING_ESCAPE,
+    IN_DQ_STRING,
+    IN_SQ_STRING_ESCAPE,
+    IN_SQ_STRING,
+    IN_ZERO,
+    IN_EXP_DIGITS,
+    IN_EXP_SIGN,
+    IN_EXP_E,
+    IN_MANTISSA,
+    IN_MANTISSA_DIGITS,
+    IN_DIGITS,
+    IN_SIGN,
+    IN_KEYWORD,
+    IN_INTERP,
+    IN_START,
+    IN_START_INTERP,            /* must be IN_START + 1 */
+};
+
+QEMU_BUILD_BUG_ON(JSON_ERROR != 0);
+QEMU_BUILD_BUG_ON(IN_RECOVERY != JSON_ERROR + 1);
+QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP);
+QEMU_BUILD_BUG_ON(JSON_MAX >= 0x80);
+QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);
+
+#define LOOKAHEAD 0x80
+#define TERMINAL(state) [0 ... 0xFF] = ((state) | LOOKAHEAD)
+
+static const uint8_t json_lexer[][256] =  {
+    /* Relies on default initialization to IN_ERROR! */
+
+    /* error recovery */
+    [IN_RECOVERY] = {
+        /*
+         * Skip characters until a structural character, an ASCII
+         * control character other than '\t', or impossible UTF-8
+         * bytes '\xFE', '\xFF'.  Structural characters and line
+         * endings are promising resynchronization points.  Clients
+         * may use the others to force the JSON parser into known-good
+         * state; see docs/interop/qmp-spec.txt.
+         */
+        [0 ... 0x1F] = IN_START | LOOKAHEAD,
+        [0x20 ... 0xFD] = IN_RECOVERY,
+        [0xFE ... 0xFF] = IN_START | LOOKAHEAD,
+        ['\t'] = IN_RECOVERY,
+        ['['] = IN_START | LOOKAHEAD,
+        [']'] = IN_START | LOOKAHEAD,
+        ['{'] = IN_START | LOOKAHEAD,
+        ['}'] = IN_START | LOOKAHEAD,
+        [':'] = IN_START | LOOKAHEAD,
+        [','] = IN_START | LOOKAHEAD,
+    },
+
+    /* double quote string */
+    [IN_DQ_STRING_ESCAPE] = {
+        [0x20 ... 0xFD] = IN_DQ_STRING,
+    },
+    [IN_DQ_STRING] = {
+        [0x20 ... 0xFD] = IN_DQ_STRING,
+        ['\\'] = IN_DQ_STRING_ESCAPE,
+        ['"'] = JSON_STRING,
+    },
+
+    /* single quote string */
+    [IN_SQ_STRING_ESCAPE] = {
+        [0x20 ... 0xFD] = IN_SQ_STRING,
+    },
+    [IN_SQ_STRING] = {
+        [0x20 ... 0xFD] = IN_SQ_STRING,
+        ['\\'] = IN_SQ_STRING_ESCAPE,
+        ['\''] = JSON_STRING,
+    },
+
+    /* Zero */
+    [IN_ZERO] = {
+        TERMINAL(JSON_INTEGER),
+        ['0' ... '9'] = JSON_ERROR,
+        ['.'] = IN_MANTISSA,
+    },
+
+    /* Float */
+    [IN_EXP_DIGITS] = {
+        TERMINAL(JSON_FLOAT),
+        ['0' ... '9'] = IN_EXP_DIGITS,
+    },
+
+    [IN_EXP_SIGN] = {
+        ['0' ... '9'] = IN_EXP_DIGITS,
+    },
+
+    [IN_EXP_E] = {
+        ['-'] = IN_EXP_SIGN,
+        ['+'] = IN_EXP_SIGN,
+        ['0' ... '9'] = IN_EXP_DIGITS,
+    },
+
+    [IN_MANTISSA_DIGITS] = {
+        TERMINAL(JSON_FLOAT),
+        ['0' ... '9'] = IN_MANTISSA_DIGITS,
+        ['e'] = IN_EXP_E,
+        ['E'] = IN_EXP_E,
+    },
+
+    [IN_MANTISSA] = {
+        ['0' ... '9'] = IN_MANTISSA_DIGITS,
+    },
+
+    /* Number */
+    [IN_DIGITS] = {
+        TERMINAL(JSON_INTEGER),
+        ['0' ... '9'] = IN_DIGITS,
+        ['e'] = IN_EXP_E,
+        ['E'] = IN_EXP_E,
+        ['.'] = IN_MANTISSA,
+    },
+
+    [IN_SIGN] = {
+        ['0'] = IN_ZERO,
+        ['1' ... '9'] = IN_DIGITS,
+    },
+
+    /* keywords */
+    [IN_KEYWORD] = {
+        TERMINAL(JSON_KEYWORD),
+        ['a' ... 'z'] = IN_KEYWORD,
+    },
+
+    /* interpolation */
+    [IN_INTERP] = {
+        TERMINAL(JSON_INTERP),
+        ['A' ... 'Z'] = IN_INTERP,
+        ['a' ... 'z'] = IN_INTERP,
+        ['0' ... '9'] = IN_INTERP,
+    },
+
+    /*
+     * Two start states:
+     * - IN_START recognizes JSON tokens with our string extensions
+     * - IN_START_INTERP additionally recognizes interpolation.
+     */
+    [IN_START ... IN_START_INTERP] = {
+        ['"'] = IN_DQ_STRING,
+        ['\''] = IN_SQ_STRING,
+        ['0'] = IN_ZERO,
+        ['1' ... '9'] = IN_DIGITS,
+        ['-'] = IN_SIGN,
+        ['{'] = JSON_LCURLY,
+        ['}'] = JSON_RCURLY,
+        ['['] = JSON_LSQUARE,
+        [']'] = JSON_RSQUARE,
+        [','] = JSON_COMMA,
+        [':'] = JSON_COLON,
+        ['a' ... 'z'] = IN_KEYWORD,
+        [' '] = IN_START,
+        ['\t'] = IN_START,
+        ['\r'] = IN_START,
+        ['\n'] = IN_START,
+    },
+    [IN_START_INTERP]['%'] = IN_INTERP,
+};
+
+static inline uint8_t next_state(JSONLexer *lexer, char ch, bool flush,
+                                 bool *char_consumed)
+{
+    uint8_t next;
+
+    assert(lexer->state < ARRAY_SIZE(json_lexer));
+    next = json_lexer[lexer->state][(uint8_t)ch];
+    *char_consumed = !flush && !(next & LOOKAHEAD);
+    return next & ~LOOKAHEAD;
+}
+
+void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
+{
+    lexer->start_state = lexer->state = enable_interpolation
+        ? IN_START_INTERP : IN_START;
+    lexer->token = g_string_sized_new(3);
+    lexer->x = lexer->y = 0;
+}
+
+static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
+{
+    int new_state;
+    bool char_consumed = false;
+
+    lexer->x++;
+    if (ch == '\n') {
+        lexer->x = 0;
+        lexer->y++;
+    }
+
+    while (flush ? lexer->state != lexer->start_state : !char_consumed) {
+        new_state = next_state(lexer, ch, flush, &char_consumed);
+        if (char_consumed) {
+            assert(!flush);
+            g_string_append_c(lexer->token, ch);
+        }
+
+        switch (new_state) {
+        case JSON_LCURLY:
+        case JSON_RCURLY:
+        case JSON_LSQUARE:
+        case JSON_RSQUARE:
+        case JSON_COLON:
+        case JSON_COMMA:
+        case JSON_INTERP:
+        case JSON_INTEGER:
+        case JSON_FLOAT:
+        case JSON_KEYWORD:
+        case JSON_STRING:
+            json_message_process_token(lexer, lexer->token, new_state,
+                                       lexer->x, lexer->y);
+            /* fall through */
+        case IN_START:
+            g_string_truncate(lexer->token, 0);
+            new_state = lexer->start_state;
+            break;
+        case JSON_ERROR:
+            json_message_process_token(lexer, lexer->token, JSON_ERROR,
+                                       lexer->x, lexer->y);
+            new_state = IN_RECOVERY;
+            /* fall through */
+        case IN_RECOVERY:
+            g_string_truncate(lexer->token, 0);
+            break;
+        default:
+            break;
+        }
+        lexer->state = new_state;
+    }
+
+    /* Do not let a single token grow to an arbitrarily large size,
+     * this is a security consideration.
+     */
+    if (lexer->token->len > MAX_TOKEN_SIZE) {
+        json_message_process_token(lexer, lexer->token, lexer->state,
+                                   lexer->x, lexer->y);
+        g_string_truncate(lexer->token, 0);
+        lexer->state = lexer->start_state;
+    }
+}
+
+void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
+{
+    size_t i;
+
+    for (i = 0; i < size; i++) {
+        json_lexer_feed_char(lexer, buffer[i], false);
+    }
+}
+
+void json_lexer_flush(JSONLexer *lexer)
+{
+    json_lexer_feed_char(lexer, 0, true);
+    assert(lexer->state == lexer->start_state);
+    json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT,
+                               lexer->x, lexer->y);
+}
+
+void json_lexer_destroy(JSONLexer *lexer)
+{
+    g_string_free(lexer->token, true);
+}
diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h
new file mode 100644
index 000000000..16a25d00b
--- /dev/null
+++ b/qobject/json-parser-int.h
@@ -0,0 +1,54 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef JSON_PARSER_INT_H
+#define JSON_PARSER_INT_H
+
+#include "qapi/qmp/json-parser.h"
+
+typedef enum json_token_type {
+    JSON_ERROR = 0,             /* must be zero, see json_lexer[] */
+    /* Gap for lexer states */
+    JSON_LCURLY = 100,
+    JSON_MIN = JSON_LCURLY,
+    JSON_RCURLY,
+    JSON_LSQUARE,
+    JSON_RSQUARE,
+    JSON_COLON,
+    JSON_COMMA,
+    JSON_INTEGER,
+    JSON_FLOAT,
+    JSON_KEYWORD,
+    JSON_STRING,
+    JSON_INTERP,
+    JSON_END_OF_INPUT,
+    JSON_MAX = JSON_END_OF_INPUT
+} JSONTokenType;
+
+typedef struct JSONToken JSONToken;
+
+/* json-lexer.c */
+void json_lexer_init(JSONLexer *lexer, bool enable_interpolation);
+void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size);
+void json_lexer_flush(JSONLexer *lexer);
+void json_lexer_destroy(JSONLexer *lexer);
+
+/* json-streamer.c */
+void json_message_process_token(JSONLexer *lexer, GString *input,
+                                JSONTokenType type, int x, int y);
+
+/* json-parser.c */
+JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr);
+QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp);
+
+#endif
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
new file mode 100644
index 000000000..c0f521b56
--- /dev/null
+++ b/qobject/json-parser.c
@@ -0,0 +1,591 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
+#include "qemu/unicode.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "json-parser-int.h"
+
+struct JSONToken {
+    JSONTokenType type;
+    int x;
+    int y;
+    char str[];
+};
+
+typedef struct JSONParserContext
+{
+    Error *err;
+    JSONToken *current;
+    GQueue *buf;
+    va_list *ap;
+} JSONParserContext;
+
+#define BUG_ON(cond) assert(!(cond))
+
+/**
+ * TODO
+ *
+ * 0) make errors meaningful again
+ * 1) add geometry information to tokens
+ * 3) should we return a parsed size?
+ * 4) deal with premature EOI
+ */
+
+static QObject *parse_value(JSONParserContext *ctxt);
+
+/**
+ * Error handler
+ */
+static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
+                                           JSONToken *token, const char *msg, ...)
+{
+    va_list ap;
+    char message[1024];
+
+    if (ctxt->err) {
+        return;
+    }
+    va_start(ap, msg);
+    vsnprintf(message, sizeof(message), msg, ap);
+    va_end(ap);
+    error_setg(&ctxt->err, "JSON parse error, %s", message);
+}
+
+static int cvt4hex(const char *s)
+{
+    int cp, i;
+
+    cp = 0;
+    for (i = 0; i < 4; i++) {
+        if (!qemu_isxdigit(s[i])) {
+            return -1;
+        }
+        cp <<= 4;
+        if (s[i] >= '0' && s[i] <= '9') {
+            cp |= s[i] - '0';
+        } else if (s[i] >= 'a' && s[i] <= 'f') {
+            cp |= 10 + s[i] - 'a';
+        } else if (s[i] >= 'A' && s[i] <= 'F') {
+            cp |= 10 + s[i] - 'A';
+        } else {
+            return -1;
+        }
+    }
+    return cp;
+}
+
+/**
+ * parse_string(): Parse a JSON string
+ *
+ * From RFC 8259 "The JavaScript Object Notation (JSON) Data
+ * Interchange Format":
+ *
+ *    char = unescaped /
+ *        escape (
+ *            %x22 /          ; "    quotation mark  U+0022
+ *            %x5C /          ; \    reverse solidus U+005C
+ *            %x2F /          ; /    solidus         U+002F
+ *            %x62 /          ; b    backspace       U+0008
+ *            %x66 /          ; f    form feed       U+000C
+ *            %x6E /          ; n    line feed       U+000A
+ *            %x72 /          ; r    carriage return U+000D
+ *            %x74 /          ; t    tab             U+0009
+ *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
+ *    escape = %x5C              ; \
+ *    quotation-mark = %x22      ; "
+ *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+ *
+ * Extensions over RFC 8259:
+ * - Extra escape sequence in strings:
+ *   0x27 (apostrophe) is recognized after escape, too
+ * - Single-quoted strings:
+ *   Like double-quoted strings, except they're delimited by %x27
+ *   (apostrophe) instead of %x22 (quotation mark), and can't contain
+ *   unescaped apostrophe, but can contain unescaped quotation mark.
+ *
+ * Note:
+ * - Encoding is modified UTF-8.
+ * - Invalid Unicode characters are rejected.
+ * - Control characters \x00..\x1F are rejected by the lexer.
+ */
+static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
+{
+    const char *ptr = token->str;
+    QString *str;
+    char quote;
+    const char *beg;
+    int cp, trailing;
+    char *end;
+    ssize_t len;
+    char utf8_buf[5];
+
+    assert(*ptr == '"' || *ptr == '\'');
+    quote = *ptr++;
+    str = qstring_new();
+
+    while (*ptr != quote) {
+        assert(*ptr);
+        switch (*ptr) {
+        case '\\':
+            beg = ptr++;
+            switch (*ptr++) {
+            case '"':
+                qstring_append_chr(str, '"');
+                break;
+            case '\'':
+                qstring_append_chr(str, '\'');
+                break;
+            case '\\':
+                qstring_append_chr(str, '\\');
+                break;
+            case '/':
+                qstring_append_chr(str, '/');
+                break;
+            case 'b':
+                qstring_append_chr(str, '\b');
+                break;
+            case 'f':
+                qstring_append_chr(str, '\f');
+                break;
+            case 'n':
+                qstring_append_chr(str, '\n');
+                break;
+            case 'r':
+                qstring_append_chr(str, '\r');
+                break;
+            case 't':
+                qstring_append_chr(str, '\t');
+                break;
+            case 'u':
+                cp = cvt4hex(ptr);
+                ptr += 4;
+
+                /* handle surrogate pairs */
+                if (cp >= 0xD800 && cp <= 0xDBFF
+                    && ptr[0] == '\\' && ptr[1] == 'u') {
+                    /* leading surrogate followed by \u */
+                    cp = 0x10000 + ((cp & 0x3FF) << 10);
+                    trailing = cvt4hex(ptr + 2);
+                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
+                        /* followed by trailing surrogate */
+                        cp |= trailing & 0x3FF;
+                        ptr += 6;
+                    } else {
+                        cp = -1; /* invalid */
+                    }
+                }
+
+                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
+                    parse_error(ctxt, token,
+                                "%.*s is not a valid Unicode character",
+                                (int)(ptr - beg), beg);
+                    goto out;
+                }
+                qstring_append(str, utf8_buf);
+                break;
+            default:
+                parse_error(ctxt, token, "invalid escape sequence in string");
+                goto out;
+            }
+            break;
+        case '%':
+            if (ctxt->ap) {
+                if (ptr[1] != '%') {
+                    parse_error(ctxt, token, "can't interpolate into string");
+                    goto out;
+                }
+                ptr++;
+            }
+            /* fall through */
+        default:
+            cp = mod_utf8_codepoint(ptr, 6, &end);
+            if (cp < 0) {
+                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
+                goto out;
+            }
+            ptr = end;
+            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
+            assert(len >= 0);
+            qstring_append(str, utf8_buf);
+        }
+    }
+
+    return str;
+
+out:
+    qobject_unref(str);
+    return NULL;
+}
+
+/* Note: the token object returned by parser_context_peek_token or
+ * parser_context_pop_token is deleted as soon as parser_context_pop_token
+ * is called again.
+ */
+static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
+{
+    g_free(ctxt->current);
+    ctxt->current = g_queue_pop_head(ctxt->buf);
+    return ctxt->current;
+}
+
+static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
+{
+    return g_queue_peek_head(ctxt->buf);
+}
+
+/**
+ * Parsing rules
+ */
+static int parse_pair(JSONParserContext *ctxt, QDict *dict)
+{
+    QObject *key_obj = NULL;
+    QString *key;
+    QObject *value;
+    JSONToken *peek, *token;
+
+    peek = parser_context_peek_token(ctxt);
+    if (peek == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        goto out;
+    }
+
+    key_obj = parse_value(ctxt);
+    key = qobject_to(QString, key_obj);
+    if (!key) {
+        parse_error(ctxt, peek, "key is not a string in object");
+        goto out;
+    }
+
+    token = parser_context_pop_token(ctxt);
+    if (token == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        goto out;
+    }
+
+    if (token->type != JSON_COLON) {
+        parse_error(ctxt, token, "missing : in object pair");
+        goto out;
+    }
+
+    value = parse_value(ctxt);
+    if (value == NULL) {
+        parse_error(ctxt, token, "Missing value in dict");
+        goto out;
+    }
+
+    if (qdict_haskey(dict, qstring_get_str(key))) {
+        parse_error(ctxt, token, "duplicate key");
+        goto out;
+    }
+
+    qdict_put_obj(dict, qstring_get_str(key), value);
+
+    qobject_unref(key_obj);
+    return 0;
+
+out:
+    qobject_unref(key_obj);
+    return -1;
+}
+
+static QObject *parse_object(JSONParserContext *ctxt)
+{
+    QDict *dict = NULL;
+    JSONToken *token, *peek;
+
+    token = parser_context_pop_token(ctxt);
+    assert(token && token->type == JSON_LCURLY);
+
+    dict = qdict_new();
+
+    peek = parser_context_peek_token(ctxt);
+    if (peek == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        goto out;
+    }
+
+    if (peek->type != JSON_RCURLY) {
+        if (parse_pair(ctxt, dict) == -1) {
+            goto out;
+        }
+
+        token = parser_context_pop_token(ctxt);
+        if (token == NULL) {
+            parse_error(ctxt, NULL, "premature EOI");
+            goto out;
+        }
+
+        while (token->type != JSON_RCURLY) {
+            if (token->type != JSON_COMMA) {
+                parse_error(ctxt, token, "expected separator in dict");
+                goto out;
+            }
+
+            if (parse_pair(ctxt, dict) == -1) {
+                goto out;
+            }
+
+            token = parser_context_pop_token(ctxt);
+            if (token == NULL) {
+                parse_error(ctxt, NULL, "premature EOI");
+                goto out;
+            }
+        }
+    } else {
+        (void)parser_context_pop_token(ctxt);
+    }
+
+    return QOBJECT(dict);
+
+out:
+    qobject_unref(dict);
+    return NULL;
+}
+
+static QObject *parse_array(JSONParserContext *ctxt)
+{
+    QList *list = NULL;
+    JSONToken *token, *peek;
+
+    token = parser_context_pop_token(ctxt);
+    assert(token && token->type == JSON_LSQUARE);
+
+    list = qlist_new();
+
+    peek = parser_context_peek_token(ctxt);
+    if (peek == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        goto out;
+    }
+
+    if (peek->type != JSON_RSQUARE) {
+        QObject *obj;
+
+        obj = parse_value(ctxt);
+        if (obj == NULL) {
+            parse_error(ctxt, token, "expecting value");
+            goto out;
+        }
+
+        qlist_append_obj(list, obj);
+
+        token = parser_context_pop_token(ctxt);
+        if (token == NULL) {
+            parse_error(ctxt, NULL, "premature EOI");
+            goto out;
+        }
+
+        while (token->type != JSON_RSQUARE) {
+            if (token->type != JSON_COMMA) {
+                parse_error(ctxt, token, "expected separator in list");
+                goto out;
+            }
+
+            obj = parse_value(ctxt);
+            if (obj == NULL) {
+                parse_error(ctxt, token, "expecting value");
+                goto out;
+            }
+
+            qlist_append_obj(list, obj);
+
+            token = parser_context_pop_token(ctxt);
+            if (token == NULL) {
+                parse_error(ctxt, NULL, "premature EOI");
+                goto out;
+            }
+        }
+    } else {
+        (void)parser_context_pop_token(ctxt);
+    }
+
+    return QOBJECT(list);
+
+out:
+    qobject_unref(list);
+    return NULL;
+}
+
+static QObject *parse_keyword(JSONParserContext *ctxt)
+{
+    JSONToken *token;
+
+    token = parser_context_pop_token(ctxt);
+    assert(token && token->type == JSON_KEYWORD);
+
+    if (!strcmp(token->str, "true")) {
+        return QOBJECT(qbool_from_bool(true));
+    } else if (!strcmp(token->str, "false")) {
+        return QOBJECT(qbool_from_bool(false));
+    } else if (!strcmp(token->str, "null")) {
+        return QOBJECT(qnull());
+    }
+    parse_error(ctxt, token, "invalid keyword '%s'", token->str);
+    return NULL;
+}
+
+static QObject *parse_interpolation(JSONParserContext *ctxt)
+{
+    JSONToken *token;
+
+    token = parser_context_pop_token(ctxt);
+    assert(token && token->type == JSON_INTERP);
+
+    if (!strcmp(token->str, "%p")) {
+        return va_arg(*ctxt->ap, QObject *);
+    } else if (!strcmp(token->str, "%i")) {
+        return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
+    } else if (!strcmp(token->str, "%d")) {
+        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
+    } else if (!strcmp(token->str, "%ld")) {
+        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
+    } else if (!strcmp(token->str, "%lld")) {
+        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
+    } else if (!strcmp(token->str, "%" PRId64)) {
+        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
+    } else if (!strcmp(token->str, "%u")) {
+        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
+    } else if (!strcmp(token->str, "%lu")) {
+        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
+    } else if (!strcmp(token->str, "%llu")) {
+        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
+    } else if (!strcmp(token->str, "%" PRIu64)) {
+        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
+    } else if (!strcmp(token->str, "%s")) {
+        return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
+    } else if (!strcmp(token->str, "%f")) {
+        return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
+    }
+    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
+    return NULL;
+}
+
+static QObject *parse_literal(JSONParserContext *ctxt)
+{
+    JSONToken *token;
+
+    token = parser_context_pop_token(ctxt);
+    assert(token);
+
+    switch (token->type) {
+    case JSON_STRING:
+        return QOBJECT(parse_string(ctxt, token));
+    case JSON_INTEGER: {
+        /*
+         * Represent JSON_INTEGER as QNUM_I64 if possible, else as
+         * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
+         * and qemu_strtou64() fail with ERANGE when it's not
+         * possible.
+         *
+         * qnum_get_int() will then work for any signed 64-bit
+         * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
+         * integer, and qnum_get_double() both for any JSON_INTEGER
+         * and any JSON_FLOAT (with precision loss for integers beyond
+         * 53 bits)
+         */
+        int ret;
+        int64_t value;
+        uint64_t uvalue;
+
+        ret = qemu_strtoi64(token->str, NULL, 10, &value);
+        if (!ret) {
+            return QOBJECT(qnum_from_int(value));
+        }
+        assert(ret == -ERANGE);
+
+        if (token->str[0] != '-') {
+            ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
+            if (!ret) {
+                return QOBJECT(qnum_from_uint(uvalue));
+            }
+            assert(ret == -ERANGE);
+        }
+    }
+    /* fall through to JSON_FLOAT */
+    case JSON_FLOAT:
+        /* FIXME dependent on locale; a pervasive issue in QEMU */
+        /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
+         * but those might be useful extensions beyond JSON */
+        return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
+    default:
+        abort();
+    }
+}
+
+static QObject *parse_value(JSONParserContext *ctxt)
+{
+    JSONToken *token;
+
+    token = parser_context_peek_token(ctxt);
+    if (token == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        return NULL;
+    }
+
+    switch (token->type) {
+    case JSON_LCURLY:
+        return parse_object(ctxt);
+    case JSON_LSQUARE:
+        return parse_array(ctxt);
+    case JSON_INTERP:
+        return parse_interpolation(ctxt);
+    case JSON_INTEGER:
+    case JSON_FLOAT:
+    case JSON_STRING:
+        return parse_literal(ctxt);
+    case JSON_KEYWORD:
+        return parse_keyword(ctxt);
+    default:
+        parse_error(ctxt, token, "expecting value");
+        return NULL;
+    }
+}
+
+JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
+{
+    JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
+
+    token->type = type;
+    memcpy(token->str, tokstr->str, tokstr->len);
+    token->str[tokstr->len] = 0;
+    token->x = x;
+    token->y = y;
+    return token;
+}
+
+QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
+{
+    JSONParserContext ctxt = { .buf = tokens, .ap = ap };
+    QObject *result;
+
+    result = parse_value(&ctxt);
+    assert(ctxt.err || g_queue_is_empty(ctxt.buf));
+
+    error_propagate(errp, ctxt.err);
+
+    while (!g_queue_is_empty(ctxt.buf)) {
+        parser_context_pop_token(&ctxt);
+    }
+    g_free(ctxt.current);
+
+    return result;
+}
diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
new file mode 100644
index 000000000..b93d97b99
--- /dev/null
+++ b/qobject/json-streamer.c
@@ -0,0 +1,134 @@
+/*
+ * JSON streaming support
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "json-parser-int.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+#define MAX_TOKEN_COUNT (2ULL << 20)
+#define MAX_NESTING (1 << 10)
+
+static void json_message_free_tokens(JSONMessageParser *parser)
+{
+    JSONToken *token;
+
+    while ((token = g_queue_pop_head(&parser->tokens))) {
+        g_free(token);
+    }
+}
+
+void json_message_process_token(JSONLexer *lexer, GString *input,
+                                JSONTokenType type, int x, int y)
+{
+    JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
+    QObject *json = NULL;
+    Error *err = NULL;
+    JSONToken *token;
+
+    switch (type) {
+    case JSON_LCURLY:
+        parser->brace_count++;
+        break;
+    case JSON_RCURLY:
+        parser->brace_count--;
+        break;
+    case JSON_LSQUARE:
+        parser->bracket_count++;
+        break;
+    case JSON_RSQUARE:
+        parser->bracket_count--;
+        break;
+    case JSON_ERROR:
+        error_setg(&err, "JSON parse error, stray '%s'", input->str);
+        goto out_emit;
+    case JSON_END_OF_INPUT:
+        if (g_queue_is_empty(&parser->tokens)) {
+            return;
+        }
+        json = json_parser_parse(&parser->tokens, parser->ap, &err);
+        goto out_emit;
+    default:
+        break;
+    }
+
+    /*
+     * Security consideration, we limit total memory allocated per object
+     * and the maximum recursion depth that a message can force.
+     */
+    if (parser->token_size + input->len + 1 > MAX_TOKEN_SIZE) {
+        error_setg(&err, "JSON token size limit exceeded");
+        goto out_emit;
+    }
+    if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) {
+        error_setg(&err, "JSON token count limit exceeded");
+        goto out_emit;
+    }
+    if (parser->bracket_count + parser->brace_count > MAX_NESTING) {
+        error_setg(&err, "JSON nesting depth limit exceeded");
+        goto out_emit;
+    }
+
+    token = json_token(type, x, y, input);
+    parser->token_size += input->len;
+
+    g_queue_push_tail(&parser->tokens, token);
+
+    if ((parser->brace_count > 0 || parser->bracket_count > 0)
+        && parser->brace_count >= 0 && parser->bracket_count >= 0) {
+        return;
+    }
+
+    json = json_parser_parse(&parser->tokens, parser->ap, &err);
+
+out_emit:
+    parser->brace_count = 0;
+    parser->bracket_count = 0;
+    json_message_free_tokens(parser);
+    parser->token_size = 0;
+    parser->emit(parser->opaque, json, err);
+}
+
+void json_message_parser_init(JSONMessageParser *parser,
+                              void (*emit)(void *opaque, QObject *json,
+                                           Error *err),
+                              void *opaque, va_list *ap)
+{
+    parser->emit = emit;
+    parser->opaque = opaque;
+    parser->ap = ap;
+    parser->brace_count = 0;
+    parser->bracket_count = 0;
+    g_queue_init(&parser->tokens);
+    parser->token_size = 0;
+
+    json_lexer_init(&parser->lexer, !!ap);
+}
+
+void json_message_parser_feed(JSONMessageParser *parser,
+                             const char *buffer, size_t size)
+{
+    json_lexer_feed(&parser->lexer, buffer, size);
+}
+
+void json_message_parser_flush(JSONMessageParser *parser)
+{
+    json_lexer_flush(&parser->lexer);
+    assert(g_queue_is_empty(&parser->tokens));
+}
+
+void json_message_parser_destroy(JSONMessageParser *parser)
+{
+    json_lexer_destroy(&parser->lexer);
+    json_message_free_tokens(parser);
+}
diff --git a/qobject/meson.build b/qobject/meson.build
new file mode 100644
index 000000000..bb63c06b6
--- /dev/null
+++ b/qobject/meson.build
@@ -0,0 +1,3 @@
+util_ss.add(files('qnull.c', 'qnum.c', 'qstring.c', 'qdict.c', 'qlist.c', 'qbool.c',
+  'qlit.c', 'qjson.c', 'qobject.c', 'json-lexer.c', 'json-streamer.c', 'json-parser.c',
+  'block-qdict.c'))
diff --git a/qobject/qbool.c b/qobject/qbool.c
new file mode 100644
index 000000000..06dfc4349
--- /dev/null
+++ b/qobject/qbool.c
@@ -0,0 +1,57 @@
+/*
+ * QBool Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qbool.h"
+
+/**
+ * qbool_from_bool(): Create a new QBool from a bool
+ *
+ * Return strong reference.
+ */
+QBool *qbool_from_bool(bool value)
+{
+    QBool *qb;
+
+    qb = g_malloc(sizeof(*qb));
+    qobject_init(QOBJECT(qb), QTYPE_QBOOL);
+    qb->value = value;
+
+    return qb;
+}
+
+/**
+ * qbool_get_bool(): Get the stored bool
+ */
+bool qbool_get_bool(const QBool *qb)
+{
+    return qb->value;
+}
+
+/**
+ * qbool_is_equal(): Test whether the two QBools are equal
+ */
+bool qbool_is_equal(const QObject *x, const QObject *y)
+{
+    return qobject_to(QBool, x)->value == qobject_to(QBool, y)->value;
+}
+
+/**
+ * qbool_destroy_obj(): Free all memory allocated by a
+ * QBool object
+ */
+void qbool_destroy_obj(QObject *obj)
+{
+    assert(obj != NULL);
+    g_free(qobject_to(QBool, obj));
+}
diff --git a/qobject/qdict.c b/qobject/qdict.c
new file mode 100644
index 000000000..1079bd3f6
--- /dev/null
+++ b/qobject/qdict.c
@@ -0,0 +1,441 @@
+/*
+ * QDict Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ *  Luiz Capitulino 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qstring.h"
+
+/**
+ * qdict_new(): Create a new QDict
+ *
+ * Return strong reference.
+ */
+QDict *qdict_new(void)
+{
+    QDict *qdict;
+
+    qdict = g_malloc0(sizeof(*qdict));
+    qobject_init(QOBJECT(qdict), QTYPE_QDICT);
+
+    return qdict;
+}
+
+/**
+ * tdb_hash(): based on the hash algorithm from gdbm, via tdb
+ * (from module-init-tools)
+ */
+static unsigned int tdb_hash(const char *name)
+{
+    unsigned value;	/* Used to compute the hash value.  */
+    unsigned   i;	/* Used to cycle through random values. */
+
+    /* Set the initial value from the key size. */
+    for (value = 0x238F13AF * strlen(name), i=0; name[i]; i++)
+        value = (value + (((const unsigned char *)name)[i] << (i*5 % 24)));
+
+    return (1103515243 * value + 12345);
+}
+
+/**
+ * alloc_entry(): allocate a new QDictEntry
+ */
+static QDictEntry *alloc_entry(const char *key, QObject *value)
+{
+    QDictEntry *entry;
+
+    entry = g_malloc0(sizeof(*entry));
+    entry->key = g_strdup(key);
+    entry->value = value;
+
+    return entry;
+}
+
+/**
+ * qdict_entry_value(): Return qdict entry value
+ *
+ * Return weak reference.
+ */
+QObject *qdict_entry_value(const QDictEntry *entry)
+{
+    return entry->value;
+}
+
+/**
+ * qdict_entry_key(): Return qdict entry key
+ *
+ * Return a *pointer* to the string, it has to be duplicated before being
+ * stored.
+ */
+const char *qdict_entry_key(const QDictEntry *entry)
+{
+    return entry->key;
+}
+
+/**
+ * qdict_find(): List lookup function
+ */
+static QDictEntry *qdict_find(const QDict *qdict,
+                              const char *key, unsigned int bucket)
+{
+    QDictEntry *entry;
+
+    QLIST_FOREACH(entry, &qdict->table[bucket], next)
+        if (!strcmp(entry->key, key))
+            return entry;
+
+    return NULL;
+}
+
+/**
+ * qdict_put_obj(): Put a new QObject into the dictionary
+ *
+ * Insert the pair 'key:value' into 'qdict', if 'key' already exists
+ * its 'value' will be replaced.
+ *
+ * This is done by freeing the reference to the stored QObject and
+ * storing the new one in the same entry.
+ *
+ * NOTE: ownership of 'value' is transferred to the QDict
+ */
+void qdict_put_obj(QDict *qdict, const char *key, QObject *value)
+{
+    unsigned int bucket;
+    QDictEntry *entry;
+
+    bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+    entry = qdict_find(qdict, key, bucket);
+    if (entry) {
+        /* replace key's value */
+        qobject_unref(entry->value);
+        entry->value = value;
+    } else {
+        /* allocate a new entry */
+        entry = alloc_entry(key, value);
+        QLIST_INSERT_HEAD(&qdict->table[bucket], entry, next);
+        qdict->size++;
+    }
+}
+
+void qdict_put_int(QDict *qdict, const char *key, int64_t value)
+{
+    qdict_put(qdict, key, qnum_from_int(value));
+}
+
+void qdict_put_bool(QDict *qdict, const char *key, bool value)
+{
+    qdict_put(qdict, key, qbool_from_bool(value));
+}
+
+void qdict_put_str(QDict *qdict, const char *key, const char *value)
+{
+    qdict_put(qdict, key, qstring_from_str(value));
+}
+
+void qdict_put_null(QDict *qdict, const char *key)
+{
+    qdict_put(qdict, key, qnull());
+}
+
+/**
+ * qdict_get(): Lookup for a given 'key'
+ *
+ * Return a weak reference to the QObject associated with 'key' if
+ * 'key' is present in the dictionary, NULL otherwise.
+ */
+QObject *qdict_get(const QDict *qdict, const char *key)
+{
+    QDictEntry *entry;
+
+    entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+    return (entry == NULL ? NULL : entry->value);
+}
+
+/**
+ * qdict_haskey(): Check if 'key' exists
+ *
+ * Return 1 if 'key' exists in the dict, 0 otherwise
+ */
+int qdict_haskey(const QDict *qdict, const char *key)
+{
+    unsigned int bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+    return (qdict_find(qdict, key, bucket) == NULL ? 0 : 1);
+}
+
+/**
+ * qdict_size(): Return the size of the dictionary
+ */
+size_t qdict_size(const QDict *qdict)
+{
+    return qdict->size;
+}
+
+/**
+ * qdict_get_double(): Get an number mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a QNum.
+ *
+ * Return number mapped by 'key'.
+ */
+double qdict_get_double(const QDict *qdict, const char *key)
+{
+    return qnum_get_double(qobject_to(QNum, qdict_get(qdict, key)));
+}
+
+/**
+ * qdict_get_int(): Get an integer mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QNum representable as int.
+ *
+ * Return integer mapped by 'key'.
+ */
+int64_t qdict_get_int(const QDict *qdict, const char *key)
+{
+    return qnum_get_int(qobject_to(QNum, qdict_get(qdict, key)));
+}
+
+/**
+ * qdict_get_bool(): Get a bool mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QBool object.
+ *
+ * Return bool mapped by 'key'.
+ */
+bool qdict_get_bool(const QDict *qdict, const char *key)
+{
+    return qbool_get_bool(qobject_to(QBool, qdict_get(qdict, key)));
+}
+
+/**
+ * qdict_get_qlist(): If @qdict maps @key to a QList, return it, else NULL.
+ */
+QList *qdict_get_qlist(const QDict *qdict, const char *key)
+{
+    return qobject_to(QList, qdict_get(qdict, key));
+}
+
+/**
+ * qdict_get_qdict(): If @qdict maps @key to a QDict, return it, else NULL.
+ */
+QDict *qdict_get_qdict(const QDict *qdict, const char *key)
+{
+    return qobject_to(QDict, qdict_get(qdict, key));
+}
+
+/**
+ * qdict_get_str(): Get a pointer to the stored string mapped
+ * by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QString object.
+ *
+ * Return pointer to the string mapped by 'key'.
+ */
+const char *qdict_get_str(const QDict *qdict, const char *key)
+{
+    return qstring_get_str(qobject_to(QString, qdict_get(qdict, key)));
+}
+
+/**
+ * qdict_get_try_int(): Try to get integer mapped by 'key'
+ *
+ * Return integer mapped by 'key', if it is not present in the
+ * dictionary or if the stored object is not a QNum representing an
+ * integer, 'def_value' will be returned.
+ */
+int64_t qdict_get_try_int(const QDict *qdict, const char *key,
+                          int64_t def_value)
+{
+    QNum *qnum = qobject_to(QNum, qdict_get(qdict, key));
+    int64_t val;
+
+    if (!qnum || !qnum_get_try_int(qnum, &val)) {
+        return def_value;
+    }
+
+    return val;
+}
+
+/**
+ * qdict_get_try_bool(): Try to get a bool mapped by 'key'
+ *
+ * Return bool mapped by 'key', if it is not present in the
+ * dictionary or if the stored object is not of QBool type
+ * 'def_value' will be returned.
+ */
+bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value)
+{
+    QBool *qbool = qobject_to(QBool, qdict_get(qdict, key));
+
+    return qbool ? qbool_get_bool(qbool) : def_value;
+}
+
+/**
+ * qdict_get_try_str(): Try to get a pointer to the stored string
+ * mapped by 'key'
+ *
+ * Return a pointer to the string mapped by 'key', if it is not present
+ * in the dictionary or if the stored object is not of QString type
+ * NULL will be returned.
+ */
+const char *qdict_get_try_str(const QDict *qdict, const char *key)
+{
+    QString *qstr = qobject_to(QString, qdict_get(qdict, key));
+
+    return qstr ? qstring_get_str(qstr) : NULL;
+}
+
+static QDictEntry *qdict_next_entry(const QDict *qdict, int first_bucket)
+{
+    int i;
+
+    for (i = first_bucket; i < QDICT_BUCKET_MAX; i++) {
+        if (!QLIST_EMPTY(&qdict->table[i])) {
+            return QLIST_FIRST(&qdict->table[i]);
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * qdict_first(): Return first qdict entry for iteration.
+ */
+const QDictEntry *qdict_first(const QDict *qdict)
+{
+    return qdict_next_entry(qdict, 0);
+}
+
+/**
+ * qdict_next(): Return next qdict entry in an iteration.
+ */
+const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry)
+{
+    QDictEntry *ret;
+
+    ret = QLIST_NEXT(entry, next);
+    if (!ret) {
+        unsigned int bucket = tdb_hash(entry->key) % QDICT_BUCKET_MAX;
+        ret = qdict_next_entry(qdict, bucket + 1);
+    }
+
+    return ret;
+}
+
+/**
+ * qdict_clone_shallow(): Clones a given QDict. Its entries are not copied, but
+ * another reference is added.
+ */
+QDict *qdict_clone_shallow(const QDict *src)
+{
+    QDict *dest;
+    QDictEntry *entry;
+    int i;
+
+    dest = qdict_new();
+
+    for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+        QLIST_FOREACH(entry, &src->table[i], next) {
+            qdict_put_obj(dest, entry->key, qobject_ref(entry->value));
+        }
+    }
+
+    return dest;
+}
+
+/**
+ * qentry_destroy(): Free all the memory allocated by a QDictEntry
+ */
+static void qentry_destroy(QDictEntry *e)
+{
+    assert(e != NULL);
+    assert(e->key != NULL);
+    assert(e->value != NULL);
+
+    qobject_unref(e->value);
+    g_free(e->key);
+    g_free(e);
+}
+
+/**
+ * qdict_del(): Delete a 'key:value' pair from the dictionary
+ *
+ * This will destroy all data allocated by this entry.
+ */
+void qdict_del(QDict *qdict, const char *key)
+{
+    QDictEntry *entry;
+
+    entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+    if (entry) {
+        QLIST_REMOVE(entry, next);
+        qentry_destroy(entry);
+        qdict->size--;
+    }
+}
+
+/**
+ * qdict_is_equal(): Test whether the two QDicts are equal
+ *
+ * Here, equality means whether they contain the same keys and whether
+ * the respective values are in turn equal (i.e. invoking
+ * qobject_is_equal() on them yields true).
+ */
+bool qdict_is_equal(const QObject *x, const QObject *y)
+{
+    const QDict *dict_x = qobject_to(QDict, x);
+    const QDict *dict_y = qobject_to(QDict, y);
+    const QDictEntry *e;
+
+    if (qdict_size(dict_x) != qdict_size(dict_y)) {
+        return false;
+    }
+
+    for (e = qdict_first(dict_x); e; e = qdict_next(dict_x, e)) {
+        const QObject *obj_x = qdict_entry_value(e);
+        const QObject *obj_y = qdict_get(dict_y, qdict_entry_key(e));
+
+        if (!qobject_is_equal(obj_x, obj_y)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * qdict_destroy_obj(): Free all the memory allocated by a QDict
+ */
+void qdict_destroy_obj(QObject *obj)
+{
+    int i;
+    QDict *qdict;
+
+    assert(obj != NULL);
+    qdict = qobject_to(QDict, obj);
+
+    for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+        QDictEntry *entry = QLIST_FIRST(&qdict->table[i]);
+        while (entry) {
+            QDictEntry *tmp = QLIST_NEXT(entry, next);
+            QLIST_REMOVE(entry, next);
+            qentry_destroy(entry);
+            entry = tmp;
+        }
+    }
+
+    g_free(qdict);
+}
diff --git a/qobject/qjson.c b/qobject/qjson.c
new file mode 100644
index 000000000..f1f2c6970
--- /dev/null
+++ b/qobject/qjson.c
@@ -0,0 +1,313 @@
+/*
+ * QObject JSON integration
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/unicode.h"
+
+typedef struct JSONParsingState
+{
+    JSONMessageParser parser;
+    QObject *result;
+    Error *err;
+} JSONParsingState;
+
+static void consume_json(void *opaque, QObject *json, Error *err)
+{
+    JSONParsingState *s = opaque;
+
+    assert(!json != !err);
+    assert(!s->result || !s->err);
+
+    if (s->result) {
+        qobject_unref(s->result);
+        s->result = NULL;
+        error_setg(&s->err, "Expecting at most one JSON value");
+    }
+    if (s->err) {
+        qobject_unref(json);
+        error_free(err);
+        return;
+    }
+    s->result = json;
+    s->err = err;
+}
+
+/*
+ * Parse @string as JSON value.
+ * If @ap is non-null, interpolate %-escapes.
+ * Takes ownership of %p arguments.
+ * On success, return the JSON value.
+ * On failure, store an error through @errp and return NULL.
+ * Ownership of %p arguments becomes indeterminate then.  To avoid
+ * leaks, callers passing %p must terminate on error, e.g. by passing
+ * &error_abort.
+ */
+static QObject *qobject_from_jsonv(const char *string, va_list *ap,
+                                   Error **errp)
+{
+    JSONParsingState state = {};
+
+    json_message_parser_init(&state.parser, consume_json, &state, ap);
+    json_message_parser_feed(&state.parser, string, strlen(string));
+    json_message_parser_flush(&state.parser);
+    json_message_parser_destroy(&state.parser);
+
+    if (!state.result && !state.err) {
+        error_setg(&state.err, "Expecting a JSON value");
+    }
+
+    error_propagate(errp, state.err);
+    return state.result;
+}
+
+QObject *qobject_from_json(const char *string, Error **errp)
+{
+    return qobject_from_jsonv(string, NULL, errp);
+}
+
+/*
+ * Parse @string as JSON value with %-escapes interpolated.
+ * Abort on error.  Do not use with untrusted @string.
+ * Return the resulting QObject.  It is never null.
+ */
+QObject *qobject_from_vjsonf_nofail(const char *string, va_list ap)
+{
+    va_list ap_copy;
+    QObject *obj;
+
+    /* va_copy() is needed when va_list is an array type */
+    va_copy(ap_copy, ap);
+    obj = qobject_from_jsonv(string, &ap_copy, &error_abort);
+    va_end(ap_copy);
+
+    assert(obj);
+    return obj;
+}
+
+/*
+ * Parse @string as JSON value with %-escapes interpolated.
+ * Abort on error.  Do not use with untrusted @string.
+ * Return the resulting QObject.  It is never null.
+ */
+QObject *qobject_from_jsonf_nofail(const char *string, ...)
+{
+    QObject *obj;
+    va_list ap;
+
+    va_start(ap, string);
+    obj = qobject_from_vjsonf_nofail(string, ap);
+    va_end(ap);
+
+    return obj;
+}
+
+/*
+ * Parse @string as JSON object with %-escapes interpolated.
+ * Abort on error.  Do not use with untrusted @string.
+ * Return the resulting QDict.  It is never null.
+ */
+QDict *qdict_from_vjsonf_nofail(const char *string, va_list ap)
+{
+    QDict *qdict;
+
+    qdict = qobject_to(QDict, qobject_from_vjsonf_nofail(string, ap));
+    assert(qdict);
+    return qdict;
+}
+
+/*
+ * Parse @string as JSON object with %-escapes interpolated.
+ * Abort on error.  Do not use with untrusted @string.
+ * Return the resulting QDict.  It is never null.
+ */
+QDict *qdict_from_jsonf_nofail(const char *string, ...)
+{
+    QDict *qdict;
+    va_list ap;
+
+    va_start(ap, string);
+    qdict = qdict_from_vjsonf_nofail(string, ap);
+    va_end(ap);
+    return qdict;
+}
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent);
+
+static void json_pretty_newline(QString *str, bool pretty, int indent)
+{
+    int i;
+
+    if (pretty) {
+        qstring_append(str, "\n");
+        for (i = 0; i < indent; i++) {
+            qstring_append(str, "    ");
+        }
+    }
+}
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent)
+{
+    switch (qobject_type(obj)) {
+    case QTYPE_QNULL:
+        qstring_append(str, "null");
+        break;
+    case QTYPE_QNUM: {
+        QNum *val = qobject_to(QNum, obj);
+        char *buffer = qnum_to_string(val);
+        qstring_append(str, buffer);
+        g_free(buffer);
+        break;
+    }
+    case QTYPE_QSTRING: {
+        QString *val = qobject_to(QString, obj);
+        const char *ptr;
+        int cp;
+        char buf[16];
+        char *end;
+
+        ptr = qstring_get_str(val);
+        qstring_append(str, "\"");
+
+        for (; *ptr; ptr = end) {
+            cp = mod_utf8_codepoint(ptr, 6, &end);
+            switch (cp) {
+            case '\"':
+                qstring_append(str, "\\\"");
+                break;
+            case '\\':
+                qstring_append(str, "\\\\");
+                break;
+            case '\b':
+                qstring_append(str, "\\b");
+                break;
+            case '\f':
+                qstring_append(str, "\\f");
+                break;
+            case '\n':
+                qstring_append(str, "\\n");
+                break;
+            case '\r':
+                qstring_append(str, "\\r");
+                break;
+            case '\t':
+                qstring_append(str, "\\t");
+                break;
+            default:
+                if (cp < 0) {
+                    cp = 0xFFFD; /* replacement character */
+                }
+                if (cp > 0xFFFF) {
+                    /* beyond BMP; need a surrogate pair */
+                    snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
+                             0xD800 + ((cp - 0x10000) >> 10),
+                             0xDC00 + ((cp - 0x10000) & 0x3FF));
+                } else if (cp < 0x20 || cp >= 0x7F) {
+                    snprintf(buf, sizeof(buf), "\\u%04X", cp);
+                } else {
+                    buf[0] = cp;
+                    buf[1] = 0;
+                }
+                qstring_append(str, buf);
+            }
+        };
+
+        qstring_append(str, "\"");
+        break;
+    }
+    case QTYPE_QDICT: {
+        QDict *val = qobject_to(QDict, obj);
+        const char *comma = pretty ? "," : ", ";
+        const char *sep = "";
+        const QDictEntry *entry;
+        QString *qkey;
+
+        qstring_append(str, "{");
+
+        for (entry = qdict_first(val);
+             entry;
+             entry = qdict_next(val, entry)) {
+            qstring_append(str, sep);
+            json_pretty_newline(str, pretty, indent + 1);
+
+            qkey = qstring_from_str(qdict_entry_key(entry));
+            to_json(QOBJECT(qkey), str, pretty, indent + 1);
+            qobject_unref(qkey);
+
+            qstring_append(str, ": ");
+            to_json(qdict_entry_value(entry), str, pretty, indent + 1);
+            sep = comma;
+        }
+
+        json_pretty_newline(str, pretty, indent);
+        qstring_append(str, "}");
+        break;
+    }
+    case QTYPE_QLIST: {
+        QList *val = qobject_to(QList, obj);
+        const char *comma = pretty ? "," : ", ";
+        const char *sep = "";
+        QListEntry *entry;
+
+        qstring_append(str, "[");
+
+        QLIST_FOREACH_ENTRY(val, entry) {
+            qstring_append(str, sep);
+            json_pretty_newline(str, pretty, indent + 1);
+            to_json(qlist_entry_obj(entry), str, pretty, indent + 1);
+            sep = comma;
+        }
+
+        json_pretty_newline(str, pretty, indent);
+        qstring_append(str, "]");
+        break;
+    }
+    case QTYPE_QBOOL: {
+        QBool *val = qobject_to(QBool, obj);
+
+        if (qbool_get_bool(val)) {
+            qstring_append(str, "true");
+        } else {
+            qstring_append(str, "false");
+        }
+        break;
+    }
+    default:
+        abort();
+    }
+}
+
+QString *qobject_to_json(const QObject *obj)
+{
+    QString *str = qstring_new();
+
+    to_json(obj, str, 0, 0);
+
+    return str;
+}
+
+QString *qobject_to_json_pretty(const QObject *obj)
+{
+    QString *str = qstring_new();
+
+    to_json(obj, str, 1, 0);
+
+    return str;
+}
diff --git a/qobject/qlist.c b/qobject/qlist.c
new file mode 100644
index 000000000..1be95367d
--- /dev/null
+++ b/qobject/qlist.c
@@ -0,0 +1,183 @@
+/*
+ * QList Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ *  Luiz Capitulino 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/queue.h"
+
+/**
+ * qlist_new(): Create a new QList
+ *
+ * Return strong reference.
+ */
+QList *qlist_new(void)
+{
+    QList *qlist;
+
+    qlist = g_malloc(sizeof(*qlist));
+    qobject_init(QOBJECT(qlist), QTYPE_QLIST);
+    QTAILQ_INIT(&qlist->head);
+
+    return qlist;
+}
+
+QList *qlist_copy(QList *src)
+{
+    QList *dst = qlist_new();
+    QListEntry *entry;
+    QObject *elt;
+
+    QLIST_FOREACH_ENTRY(src, entry) {
+        elt = qlist_entry_obj(entry);
+        qobject_ref(elt);
+        qlist_append_obj(dst, elt);
+    }
+    return dst;
+}
+
+/**
+ * qlist_append_obj(): Append an QObject into QList
+ *
+ * NOTE: ownership of 'value' is transferred to the QList
+ */
+void qlist_append_obj(QList *qlist, QObject *value)
+{
+    QListEntry *entry;
+
+    entry = g_malloc(sizeof(*entry));
+    entry->value = value;
+
+    QTAILQ_INSERT_TAIL(&qlist->head, entry, next);
+}
+
+void qlist_append_int(QList *qlist, int64_t value)
+{
+    qlist_append(qlist, qnum_from_int(value));
+}
+
+void qlist_append_bool(QList *qlist, bool value)
+{
+    qlist_append(qlist, qbool_from_bool(value));
+}
+
+void qlist_append_str(QList *qlist, const char *value)
+{
+    qlist_append(qlist, qstring_from_str(value));
+}
+
+void qlist_append_null(QList *qlist)
+{
+    qlist_append(qlist, qnull());
+}
+
+QObject *qlist_pop(QList *qlist)
+{
+    QListEntry *entry;
+    QObject *ret;
+
+    if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+        return NULL;
+    }
+
+    entry = QTAILQ_FIRST(&qlist->head);
+    QTAILQ_REMOVE(&qlist->head, entry, next);
+
+    ret = entry->value;
+    g_free(entry);
+
+    return ret;
+}
+
+QObject *qlist_peek(QList *qlist)
+{
+    QListEntry *entry;
+
+    if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+        return NULL;
+    }
+
+    entry = QTAILQ_FIRST(&qlist->head);
+
+    return entry->value;
+}
+
+int qlist_empty(const QList *qlist)
+{
+    return QTAILQ_EMPTY(&qlist->head);
+}
+
+size_t qlist_size(const QList *qlist)
+{
+    size_t count = 0;
+    QListEntry *entry;
+
+    QLIST_FOREACH_ENTRY(qlist, entry) {
+        count++;
+    }
+    return count;
+}
+
+/**
+ * qlist_is_equal(): Test whether the two QLists are equal
+ *
+ * In order to be considered equal, the respective two objects at each
+ * index of the two lists have to compare equal (regarding
+ * qobject_is_equal()), and both lists have to have the same number of
+ * elements.
+ * That means both lists have to contain equal objects in equal order.
+ */
+bool qlist_is_equal(const QObject *x, const QObject *y)
+{
+    const QList *list_x = qobject_to(QList, x);
+    const QList *list_y = qobject_to(QList, y);
+    const QListEntry *entry_x, *entry_y;
+
+    entry_x = qlist_first(list_x);
+    entry_y = qlist_first(list_y);
+
+    while (entry_x && entry_y) {
+        if (!qobject_is_equal(qlist_entry_obj(entry_x),
+                              qlist_entry_obj(entry_y)))
+        {
+            return false;
+        }
+
+        entry_x = qlist_next(entry_x);
+        entry_y = qlist_next(entry_y);
+    }
+
+    return !entry_x && !entry_y;
+}
+
+/**
+ * qlist_destroy_obj(): Free all the memory allocated by a QList
+ */
+void qlist_destroy_obj(QObject *obj)
+{
+    QList *qlist;
+    QListEntry *entry, *next_entry;
+
+    assert(obj != NULL);
+    qlist = qobject_to(QList, obj);
+
+    QTAILQ_FOREACH_SAFE(entry, &qlist->head, next, next_entry) {
+        QTAILQ_REMOVE(&qlist->head, entry, next);
+        qobject_unref(entry->value);
+        g_free(entry);
+    }
+
+    g_free(qlist);
+}
diff --git a/qobject/qlit.c b/qobject/qlit.c
new file mode 100644
index 000000000..be8332136
--- /dev/null
+++ b/qobject/qlit.c
@@ -0,0 +1,125 @@
+/*
+ * QLit literal qobject
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (c) 2013, 2015, 2017 Red Hat Inc.
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *  Markus Armbruster 
+ *  Marc-André Lureau 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/qmp/qlit.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qnull.h"
+
+static bool qlit_equal_qdict(const QLitObject *lhs, const QDict *qdict)
+{
+    int i;
+
+    for (i = 0; lhs->value.qdict[i].key; i++) {
+        QObject *obj = qdict_get(qdict, lhs->value.qdict[i].key);
+
+        if (!qlit_equal_qobject(&lhs->value.qdict[i].value, obj)) {
+            return false;
+        }
+    }
+
+    /* Note: the literal qdict must not contain duplicates, this is
+     * considered a programming error and it isn't checked here. */
+    if (qdict_size(qdict) != i) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool qlit_equal_qlist(const QLitObject *lhs, const QList *qlist)
+{
+    QListEntry *e;
+    int i = 0;
+
+    QLIST_FOREACH_ENTRY(qlist, e) {
+        QObject *obj = qlist_entry_obj(e);
+
+        if (!qlit_equal_qobject(&lhs->value.qlist[i], obj)) {
+            return false;
+        }
+        i++;
+    }
+
+    return !e && lhs->value.qlist[i].type == QTYPE_NONE;
+}
+
+bool qlit_equal_qobject(const QLitObject *lhs, const QObject *rhs)
+{
+    if (!rhs || lhs->type != qobject_type(rhs)) {
+        return false;
+    }
+
+    switch (lhs->type) {
+    case QTYPE_QBOOL:
+        return lhs->value.qbool == qbool_get_bool(qobject_to(QBool, rhs));
+    case QTYPE_QNUM:
+        return lhs->value.qnum ==  qnum_get_int(qobject_to(QNum, rhs));
+    case QTYPE_QSTRING:
+        return (strcmp(lhs->value.qstr,
+                       qstring_get_str(qobject_to(QString, rhs))) == 0);
+    case QTYPE_QDICT:
+        return qlit_equal_qdict(lhs, qobject_to(QDict, rhs));
+    case QTYPE_QLIST:
+        return qlit_equal_qlist(lhs, qobject_to(QList, rhs));
+    case QTYPE_QNULL:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
+}
+
+QObject *qobject_from_qlit(const QLitObject *qlit)
+{
+    switch (qlit->type) {
+    case QTYPE_QNULL:
+        return QOBJECT(qnull());
+    case QTYPE_QNUM:
+        return QOBJECT(qnum_from_int(qlit->value.qnum));
+    case QTYPE_QSTRING:
+        return QOBJECT(qstring_from_str(qlit->value.qstr));
+    case QTYPE_QDICT: {
+        QDict *qdict = qdict_new();
+        QLitDictEntry *e;
+
+        for (e = qlit->value.qdict; e->key; e++) {
+            qdict_put_obj(qdict, e->key, qobject_from_qlit(&e->value));
+        }
+        return QOBJECT(qdict);
+    }
+    case QTYPE_QLIST: {
+        QList *qlist = qlist_new();
+        QLitObject *e;
+
+        for (e = qlit->value.qlist; e->type != QTYPE_NONE; e++) {
+            qlist_append_obj(qlist, qobject_from_qlit(e));
+        }
+        return QOBJECT(qlist);
+    }
+    case QTYPE_QBOOL:
+        return QOBJECT(qbool_from_bool(qlit->value.qbool));
+    default:
+        assert(0);
+    }
+
+    return NULL;
+}
diff --git a/qobject/qnull.c b/qobject/qnull.c
new file mode 100644
index 000000000..00870a182
--- /dev/null
+++ b/qobject/qnull.c
@@ -0,0 +1,30 @@
+/*
+ * QNull
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qnull.h"
+
+QNull qnull_ = {
+    .base = {
+        .type = QTYPE_QNULL,
+        .refcnt = 1,
+    },
+};
+
+/**
+ * qnull_is_equal(): Always return true because any two QNull objects
+ * are equal.
+ */
+bool qnull_is_equal(const QObject *x, const QObject *y)
+{
+    return true;
+}
diff --git a/qobject/qnum.c b/qobject/qnum.c
new file mode 100644
index 000000000..7012fc57f
--- /dev/null
+++ b/qobject/qnum.c
@@ -0,0 +1,263 @@
+/*
+ * QNum Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ *  Luiz Capitulino 
+ *  Anthony Liguori 
+ *  Marc-André Lureau 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qnum.h"
+
+/**
+ * qnum_from_int(): Create a new QNum from an int64_t
+ *
+ * Return strong reference.
+ */
+QNum *qnum_from_int(int64_t value)
+{
+    QNum *qn = g_new(QNum, 1);
+
+    qobject_init(QOBJECT(qn), QTYPE_QNUM);
+    qn->kind = QNUM_I64;
+    qn->u.i64 = value;
+
+    return qn;
+}
+
+/**
+ * qnum_from_uint(): Create a new QNum from an uint64_t
+ *
+ * Return strong reference.
+ */
+QNum *qnum_from_uint(uint64_t value)
+{
+    QNum *qn = g_new(QNum, 1);
+
+    qobject_init(QOBJECT(qn), QTYPE_QNUM);
+    qn->kind = QNUM_U64;
+    qn->u.u64 = value;
+
+    return qn;
+}
+
+/**
+ * qnum_from_double(): Create a new QNum from a double
+ *
+ * Return strong reference.
+ */
+QNum *qnum_from_double(double value)
+{
+    QNum *qn = g_new(QNum, 1);
+
+    qobject_init(QOBJECT(qn), QTYPE_QNUM);
+    qn->kind = QNUM_DOUBLE;
+    qn->u.dbl = value;
+
+    return qn;
+}
+
+/**
+ * qnum_get_try_int(): Get an integer representation of the number
+ *
+ * Return true on success.
+ */
+bool qnum_get_try_int(const QNum *qn, int64_t *val)
+{
+    switch (qn->kind) {
+    case QNUM_I64:
+        *val = qn->u.i64;
+        return true;
+    case QNUM_U64:
+        if (qn->u.u64 > INT64_MAX) {
+            return false;
+        }
+        *val = qn->u.u64;
+        return true;
+    case QNUM_DOUBLE:
+        return false;
+    }
+
+    assert(0);
+    return false;
+}
+
+/**
+ * qnum_get_int(): Get an integer representation of the number
+ *
+ * assert() on failure.
+ */
+int64_t qnum_get_int(const QNum *qn)
+{
+    int64_t val;
+    bool success = qnum_get_try_int(qn, &val);
+    assert(success);
+    return val;
+}
+
+/**
+ * qnum_get_uint(): Get an unsigned integer from the number
+ *
+ * Return true on success.
+ */
+bool qnum_get_try_uint(const QNum *qn, uint64_t *val)
+{
+    switch (qn->kind) {
+    case QNUM_I64:
+        if (qn->u.i64 < 0) {
+            return false;
+        }
+        *val = qn->u.i64;
+        return true;
+    case QNUM_U64:
+        *val = qn->u.u64;
+        return true;
+    case QNUM_DOUBLE:
+        return false;
+    }
+
+    assert(0);
+    return false;
+}
+
+/**
+ * qnum_get_uint(): Get an unsigned integer from the number
+ *
+ * assert() on failure.
+ */
+uint64_t qnum_get_uint(const QNum *qn)
+{
+    uint64_t val;
+    bool success = qnum_get_try_uint(qn, &val);
+    assert(success);
+    return val;
+}
+
+/**
+ * qnum_get_double(): Get a float representation of the number
+ *
+ * qnum_get_double() loses precision for integers beyond 53 bits.
+ */
+double qnum_get_double(QNum *qn)
+{
+    switch (qn->kind) {
+    case QNUM_I64:
+        return qn->u.i64;
+    case QNUM_U64:
+        return qn->u.u64;
+    case QNUM_DOUBLE:
+        return qn->u.dbl;
+    }
+
+    assert(0);
+    return 0.0;
+}
+
+char *qnum_to_string(QNum *qn)
+{
+    char *buffer;
+    int len;
+
+    switch (qn->kind) {
+    case QNUM_I64:
+        return g_strdup_printf("%" PRId64, qn->u.i64);
+    case QNUM_U64:
+        return g_strdup_printf("%" PRIu64, qn->u.u64);
+    case QNUM_DOUBLE:
+        /* FIXME: snprintf() is locale dependent; but JSON requires
+         * numbers to be formatted as if in the C locale. Dependence
+         * on C locale is a pervasive issue in QEMU. */
+        /* FIXME: This risks printing Inf or NaN, which are not valid
+         * JSON values. */
+        /* FIXME: the default precision of 6 for %f often causes
+         * rounding errors; we should be using DBL_DECIMAL_DIG (17),
+         * and only rounding to a shorter number if the result would
+         * still produce the same floating point value.  */
+        buffer = g_strdup_printf("%f" , qn->u.dbl);
+        len = strlen(buffer);
+        while (len > 0 && buffer[len - 1] == '0') {
+            len--;
+        }
+
+        if (len && buffer[len - 1] == '.') {
+            buffer[len - 1] = 0;
+        } else {
+            buffer[len] = 0;
+        }
+
+        return buffer;
+    }
+
+    assert(0);
+    return NULL;
+}
+
+/**
+ * qnum_is_equal(): Test whether the two QNums are equal
+ *
+ * Negative integers are never considered equal to unsigned integers,
+ * but positive integers in the range [0, INT64_MAX] are considered
+ * equal independently of whether the QNum's kind is i64 or u64.
+ *
+ * Doubles are never considered equal to integers.
+ */
+bool qnum_is_equal(const QObject *x, const QObject *y)
+{
+    QNum *num_x = qobject_to(QNum, x);
+    QNum *num_y = qobject_to(QNum, y);
+
+    switch (num_x->kind) {
+    case QNUM_I64:
+        switch (num_y->kind) {
+        case QNUM_I64:
+            /* Comparison in native int64_t type */
+            return num_x->u.i64 == num_y->u.i64;
+        case QNUM_U64:
+            /* Implicit conversion of x to uin64_t, so we have to
+             * check its sign before */
+            return num_x->u.i64 >= 0 && num_x->u.i64 == num_y->u.u64;
+        case QNUM_DOUBLE:
+            return false;
+        }
+        abort();
+    case QNUM_U64:
+        switch (num_y->kind) {
+        case QNUM_I64:
+            return qnum_is_equal(y, x);
+        case QNUM_U64:
+            /* Comparison in native uint64_t type */
+            return num_x->u.u64 == num_y->u.u64;
+        case QNUM_DOUBLE:
+            return false;
+        }
+        abort();
+    case QNUM_DOUBLE:
+        switch (num_y->kind) {
+        case QNUM_I64:
+        case QNUM_U64:
+            return false;
+        case QNUM_DOUBLE:
+            /* Comparison in native double type */
+            return num_x->u.dbl == num_y->u.dbl;
+        }
+        abort();
+    }
+
+    abort();
+}
+
+/**
+ * qnum_destroy_obj(): Free all memory allocated by a
+ * QNum object
+ */
+void qnum_destroy_obj(QObject *obj)
+{
+    assert(obj != NULL);
+    g_free(qobject_to(QNum, obj));
+}
diff --git a/qobject/qobject.c b/qobject/qobject.c
new file mode 100644
index 000000000..878dd76e7
--- /dev/null
+++ b/qobject/qobject.c
@@ -0,0 +1,71 @@
+/*
+ * QObject
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qstring.h"
+
+QEMU_BUILD_BUG_MSG(
+    offsetof(QNull, base) != 0 ||
+    offsetof(QNum, base) != 0 ||
+    offsetof(QString, base) != 0 ||
+    offsetof(QDict, base) != 0 ||
+    offsetof(QList, base) != 0 ||
+    offsetof(QBool, base) != 0,
+    "base qobject must be at offset 0");
+
+static void (*qdestroy[QTYPE__MAX])(QObject *) = {
+    [QTYPE_NONE] = NULL,               /* No such object exists */
+    [QTYPE_QNULL] = NULL,              /* qnull_ is indestructible */
+    [QTYPE_QNUM] = qnum_destroy_obj,
+    [QTYPE_QSTRING] = qstring_destroy_obj,
+    [QTYPE_QDICT] = qdict_destroy_obj,
+    [QTYPE_QLIST] = qlist_destroy_obj,
+    [QTYPE_QBOOL] = qbool_destroy_obj,
+};
+
+void qobject_destroy(QObject *obj)
+{
+    assert(!obj->base.refcnt);
+    assert(QTYPE_QNULL < obj->base.type && obj->base.type < QTYPE__MAX);
+    qdestroy[obj->base.type](obj);
+}
+
+
+static bool (*qis_equal[QTYPE__MAX])(const QObject *, const QObject *) = {
+    [QTYPE_NONE] = NULL,               /* No such object exists */
+    [QTYPE_QNULL] = qnull_is_equal,
+    [QTYPE_QNUM] = qnum_is_equal,
+    [QTYPE_QSTRING] = qstring_is_equal,
+    [QTYPE_QDICT] = qdict_is_equal,
+    [QTYPE_QLIST] = qlist_is_equal,
+    [QTYPE_QBOOL] = qbool_is_equal,
+};
+
+bool qobject_is_equal(const QObject *x, const QObject *y)
+{
+    /* We cannot test x == y because an object does not need to be
+     * equal to itself (e.g. NaN floats are not). */
+
+    if (!x && !y) {
+        return true;
+    }
+
+    if (!x || !y || x->base.type != y->base.type) {
+        return false;
+    }
+
+    assert(QTYPE_NONE < x->base.type && x->base.type < QTYPE__MAX);
+
+    return qis_equal[x->base.type](x, y);
+}
diff --git a/qobject/qstring.c b/qobject/qstring.c
new file mode 100644
index 000000000..b66a2c35f
--- /dev/null
+++ b/qobject/qstring.c
@@ -0,0 +1,181 @@
+/*
+ * QString Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ *  Luiz Capitulino 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qstring.h"
+
+/**
+ * qstring_new(): Create a new empty QString
+ *
+ * Return strong reference.
+ */
+QString *qstring_new(void)
+{
+    return qstring_from_str("");
+}
+
+/**
+ * qstring_get_length(): Get the length of a QString
+ */
+size_t qstring_get_length(const QString *qstring)
+{
+    return qstring->length;
+}
+
+/**
+ * qstring_from_substr(): Create a new QString from a C string substring
+ *
+ * Return string reference
+ */
+QString *qstring_from_substr(const char *str, size_t start, size_t end)
+{
+    QString *qstring;
+
+    assert(start <= end);
+
+    qstring = g_malloc(sizeof(*qstring));
+    qobject_init(QOBJECT(qstring), QTYPE_QSTRING);
+
+    qstring->length = end - start;
+    qstring->capacity = qstring->length;
+
+    assert(qstring->capacity < SIZE_MAX);
+    qstring->string = g_malloc(qstring->capacity + 1);
+    memcpy(qstring->string, str + start, qstring->length);
+    qstring->string[qstring->length] = 0;
+
+    return qstring;
+}
+
+/**
+ * qstring_from_str(): Create a new QString from a regular C string
+ *
+ * Return strong reference.
+ */
+QString *qstring_from_str(const char *str)
+{
+    return qstring_from_substr(str, 0, strlen(str));
+}
+
+static void capacity_increase(QString *qstring, size_t len)
+{
+    if (qstring->capacity < (qstring->length + len)) {
+        assert(len <= SIZE_MAX - qstring->capacity);
+        qstring->capacity += len;
+        assert(qstring->capacity <= SIZE_MAX / 2);
+        qstring->capacity *= 2; /* use exponential growth */
+
+        qstring->string = g_realloc(qstring->string, qstring->capacity + 1);
+    }
+}
+
+/* qstring_append(): Append a C string to a QString
+ */
+void qstring_append(QString *qstring, const char *str)
+{
+    size_t len = strlen(str);
+
+    capacity_increase(qstring, len);
+    memcpy(qstring->string + qstring->length, str, len);
+    qstring->length += len;
+    qstring->string[qstring->length] = 0;
+}
+
+void qstring_append_int(QString *qstring, int64_t value)
+{
+    char num[32];
+
+    snprintf(num, sizeof(num), "%" PRId64, value);
+    qstring_append(qstring, num);
+}
+
+/**
+ * qstring_append_chr(): Append a C char to a QString
+ */
+void qstring_append_chr(QString *qstring, int c)
+{
+    capacity_increase(qstring, 1);
+    qstring->string[qstring->length++] = c;
+    qstring->string[qstring->length] = 0;
+}
+
+/**
+ * qstring_get_str(): Return a pointer to the stored string
+ *
+ * NOTE: Should be used with caution, if the object is deallocated
+ * this pointer becomes invalid.
+ */
+const char *qstring_get_str(const QString *qstring)
+{
+    return qstring->string;
+}
+
+/**
+ * qstring_get_try_str(): Return a pointer to the stored string
+ *
+ * NOTE: will return NULL if qstring is not provided.
+ */
+const char *qstring_get_try_str(const QString *qstring)
+{
+    return qstring ? qstring_get_str(qstring) : NULL;
+}
+
+/**
+ * qobject_get_try_str(): Return a pointer to the corresponding string
+ *
+ * NOTE: the string will only be returned if the object is valid, and
+ * its type is QString, otherwise NULL is returned.
+ */
+const char *qobject_get_try_str(const QObject *qstring)
+{
+    return qstring_get_try_str(qobject_to(QString, qstring));
+}
+
+/**
+ * qstring_is_equal(): Test whether the two QStrings are equal
+ */
+bool qstring_is_equal(const QObject *x, const QObject *y)
+{
+    return !strcmp(qobject_to(QString, x)->string,
+                   qobject_to(QString, y)->string);
+}
+
+/**
+ * qstring_free(): Free the memory allocated by a QString object
+ *
+ * Return: if @return_str, return the underlying string, to be
+ * g_free(), otherwise NULL is returned.
+ */
+char *qstring_free(QString *qstring, bool return_str)
+{
+    char *rv = NULL;
+
+    if (return_str) {
+        rv = qstring->string;
+    } else {
+        g_free(qstring->string);
+    }
+
+    g_free(qstring);
+
+    return rv;
+}
+
+/**
+ * qstring_destroy_obj(): Free all memory allocated by a QString
+ * object
+ */
+void qstring_destroy_obj(QObject *obj)
+{
+    assert(obj != NULL);
+    qstring_free(qobject_to(QString, obj), FALSE);
+}
diff --git a/qom/container.c b/qom/container.c
new file mode 100644
index 000000000..455e8410c
--- /dev/null
+++ b/qom/container.c
@@ -0,0 +1,52 @@
+/*
+ * Device Container
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "qemu/module.h"
+
+static const TypeInfo container_info = {
+    .name          = "container",
+    .parent        = TYPE_OBJECT,
+};
+
+static void container_register_types(void)
+{
+    type_register_static(&container_info);
+}
+
+Object *container_get(Object *root, const char *path)
+{
+    Object *obj, *child;
+    char **parts;
+    int i;
+
+    parts = g_strsplit(path, "/", 0);
+    assert(parts != NULL && parts[0] != NULL && !parts[0][0]);
+    obj = root;
+
+    for (i = 1; parts[i] != NULL; i++, obj = child) {
+        child = object_resolve_path_component(obj, parts[i]);
+        if (!child) {
+            child = object_new("container");
+            object_property_add_child(obj, parts[i], child);
+            object_unref(child);
+        }
+    }
+
+    g_strfreev(parts);
+
+    return obj;
+}
+
+
+type_init(container_register_types)
diff --git a/qom/meson.build b/qom/meson.build
new file mode 100644
index 000000000..062a3789d
--- /dev/null
+++ b/qom/meson.build
@@ -0,0 +1,10 @@
+qom_ss.add(genh)
+qom_ss.add(files(
+  'container.c',
+  'object.c',
+  'object_interfaces.c',
+  'qom-qobject.c',
+))
+
+qmp_ss.add(files('qom-qmp-cmds.c'))
+softmmu_ss.add(files('qom-hmp-cmds.c'))
diff --git a/qom/object.c b/qom/object.c
new file mode 100644
index 000000000..0dec16419
--- /dev/null
+++ b/qom/object.c
@@ -0,0 +1,2831 @@
+/*
+ * QEMU Object Model
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "qom/object_interfaces.h"
+#include "qemu/cutils.h"
+#include "qapi/visitor.h"
+#include "qapi/string-input-visitor.h"
+#include "qapi/string-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qjson.h"
+#include "trace.h"
+
+/* TODO: replace QObject with a simpler visitor to avoid a dependency
+ * of the QOM core on QObject?  */
+#include "qom/qom-qobject.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+
+#define MAX_INTERFACES 32
+
+typedef struct InterfaceImpl InterfaceImpl;
+typedef struct TypeImpl TypeImpl;
+
+struct InterfaceImpl
+{
+    const char *typename;
+};
+
+struct TypeImpl
+{
+    const char *name;
+
+    size_t class_size;
+
+    size_t instance_size;
+    size_t instance_align;
+
+    void (*class_init)(ObjectClass *klass, void *data);
+    void (*class_base_init)(ObjectClass *klass, void *data);
+
+    void *class_data;
+
+    void (*instance_init)(Object *obj);
+    void (*instance_post_init)(Object *obj);
+    void (*instance_finalize)(Object *obj);
+
+    bool abstract;
+
+    const char *parent;
+    TypeImpl *parent_type;
+
+    ObjectClass *class;
+
+    int num_interfaces;
+    InterfaceImpl interfaces[MAX_INTERFACES];
+};
+
+static Type type_interface;
+
+static GHashTable *type_table_get(void)
+{
+    static GHashTable *type_table;
+
+    if (type_table == NULL) {
+        type_table = g_hash_table_new(g_str_hash, g_str_equal);
+    }
+
+    return type_table;
+}
+
+static bool enumerating_types;
+
+static void type_table_add(TypeImpl *ti)
+{
+    assert(!enumerating_types);
+    g_hash_table_insert(type_table_get(), (void *)ti->name, ti);
+}
+
+static TypeImpl *type_table_lookup(const char *name)
+{
+    return g_hash_table_lookup(type_table_get(), name);
+}
+
+static TypeImpl *type_new(const TypeInfo *info)
+{
+    TypeImpl *ti = g_malloc0(sizeof(*ti));
+    int i;
+
+    g_assert(info->name != NULL);
+
+    if (type_table_lookup(info->name) != NULL) {
+        fprintf(stderr, "Registering `%s' which already exists\n", info->name);
+        abort();
+    }
+
+    ti->name = g_strdup(info->name);
+    ti->parent = g_strdup(info->parent);
+
+    ti->class_size = info->class_size;
+    ti->instance_size = info->instance_size;
+    ti->instance_align = info->instance_align;
+
+    ti->class_init = info->class_init;
+    ti->class_base_init = info->class_base_init;
+    ti->class_data = info->class_data;
+
+    ti->instance_init = info->instance_init;
+    ti->instance_post_init = info->instance_post_init;
+    ti->instance_finalize = info->instance_finalize;
+
+    ti->abstract = info->abstract;
+
+    for (i = 0; info->interfaces && info->interfaces[i].type; i++) {
+        ti->interfaces[i].typename = g_strdup(info->interfaces[i].type);
+    }
+    ti->num_interfaces = i;
+
+    return ti;
+}
+
+static TypeImpl *type_register_internal(const TypeInfo *info)
+{
+    TypeImpl *ti;
+    ti = type_new(info);
+
+    type_table_add(ti);
+    return ti;
+}
+
+TypeImpl *type_register(const TypeInfo *info)
+{
+    assert(info->parent);
+    return type_register_internal(info);
+}
+
+TypeImpl *type_register_static(const TypeInfo *info)
+{
+    return type_register(info);
+}
+
+void type_register_static_array(const TypeInfo *infos, int nr_infos)
+{
+    int i;
+
+    for (i = 0; i < nr_infos; i++) {
+        type_register_static(&infos[i]);
+    }
+}
+
+static TypeImpl *type_get_by_name(const char *name)
+{
+    if (name == NULL) {
+        return NULL;
+    }
+
+    return type_table_lookup(name);
+}
+
+static TypeImpl *type_get_parent(TypeImpl *type)
+{
+    if (!type->parent_type && type->parent) {
+        type->parent_type = type_get_by_name(type->parent);
+        if (!type->parent_type) {
+            fprintf(stderr, "Type '%s' is missing its parent '%s'\n",
+                    type->name, type->parent);
+            abort();
+        }
+    }
+
+    return type->parent_type;
+}
+
+static bool type_has_parent(TypeImpl *type)
+{
+    return (type->parent != NULL);
+}
+
+static size_t type_class_get_size(TypeImpl *ti)
+{
+    if (ti->class_size) {
+        return ti->class_size;
+    }
+
+    if (type_has_parent(ti)) {
+        return type_class_get_size(type_get_parent(ti));
+    }
+
+    return sizeof(ObjectClass);
+}
+
+static size_t type_object_get_size(TypeImpl *ti)
+{
+    if (ti->instance_size) {
+        return ti->instance_size;
+    }
+
+    if (type_has_parent(ti)) {
+        return type_object_get_size(type_get_parent(ti));
+    }
+
+    return 0;
+}
+
+size_t object_type_get_instance_size(const char *typename)
+{
+    TypeImpl *type = type_get_by_name(typename);
+
+    g_assert(type != NULL);
+    return type_object_get_size(type);
+}
+
+static bool type_is_ancestor(TypeImpl *type, TypeImpl *target_type)
+{
+    assert(target_type);
+
+    /* Check if target_type is a direct ancestor of type */
+    while (type) {
+        if (type == target_type) {
+            return true;
+        }
+
+
+        if (type->parent && !strcmp(type->parent, "chardev-spiceport")) {
+            if (!type->parent_type && !type_get_by_name(type->parent)) {
+                return false;
+            }
+        }
+        type = type_get_parent(type);
+    }
+
+    return false;
+}
+
+static void type_initialize(TypeImpl *ti);
+
+static void type_initialize_interface(TypeImpl *ti, TypeImpl *interface_type,
+                                      TypeImpl *parent_type)
+{
+    InterfaceClass *new_iface;
+    TypeInfo info = { };
+    TypeImpl *iface_impl;
+
+    info.parent = parent_type->name;
+    info.name = g_strdup_printf("%s::%s", ti->name, interface_type->name);
+    info.abstract = true;
+
+    iface_impl = type_new(&info);
+    iface_impl->parent_type = parent_type;
+    type_initialize(iface_impl);
+    g_free((char *)info.name);
+
+    new_iface = (InterfaceClass *)iface_impl->class;
+    new_iface->concrete_class = ti->class;
+    new_iface->interface_type = interface_type;
+
+    ti->class->interfaces = g_slist_append(ti->class->interfaces, new_iface);
+}
+
+static void object_property_free(gpointer data)
+{
+    ObjectProperty *prop = data;
+
+    if (prop->defval) {
+        qobject_unref(prop->defval);
+        prop->defval = NULL;
+    }
+    g_free(prop->name);
+    g_free(prop->type);
+    g_free(prop->description);
+    g_free(prop);
+}
+
+static void type_initialize(TypeImpl *ti)
+{
+    TypeImpl *parent;
+
+    if (ti->class) {
+        return;
+    }
+
+    ti->class_size = type_class_get_size(ti);
+    ti->instance_size = type_object_get_size(ti);
+    /* Any type with zero instance_size is implicitly abstract.
+     * This means interface types are all abstract.
+     */
+    if (ti->instance_size == 0) {
+        ti->abstract = true;
+    }
+    if (type_is_ancestor(ti, type_interface)) {
+        assert(ti->instance_size == 0);
+        assert(ti->abstract);
+        assert(!ti->instance_init);
+        assert(!ti->instance_post_init);
+        assert(!ti->instance_finalize);
+        assert(!ti->num_interfaces);
+    }
+    ti->class = g_malloc0(ti->class_size);
+
+    parent = type_get_parent(ti);
+    if (parent) {
+        type_initialize(parent);
+        GSList *e;
+        int i;
+
+        g_assert(parent->class_size <= ti->class_size);
+        g_assert(parent->instance_size <= ti->instance_size);
+        memcpy(ti->class, parent->class, parent->class_size);
+        ti->class->interfaces = NULL;
+
+        for (e = parent->class->interfaces; e; e = e->next) {
+            InterfaceClass *iface = e->data;
+            ObjectClass *klass = OBJECT_CLASS(iface);
+
+            type_initialize_interface(ti, iface->interface_type, klass->type);
+        }
+
+        for (i = 0; i < ti->num_interfaces; i++) {
+            TypeImpl *t = type_get_by_name(ti->interfaces[i].typename);
+            if (!t) {
+                error_report("missing interface '%s' for object '%s'",
+                             ti->interfaces[i].typename, parent->name);
+                abort();
+            }
+            for (e = ti->class->interfaces; e; e = e->next) {
+                TypeImpl *target_type = OBJECT_CLASS(e->data)->type;
+
+                if (type_is_ancestor(target_type, t)) {
+                    break;
+                }
+            }
+
+            if (e) {
+                continue;
+            }
+
+            type_initialize_interface(ti, t, t);
+        }
+    }
+
+    ti->class->properties = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
+                                                  object_property_free);
+
+    ti->class->type = ti;
+
+    while (parent) {
+        if (parent->class_base_init) {
+            parent->class_base_init(ti->class, ti->class_data);
+        }
+        parent = type_get_parent(parent);
+    }
+
+    if (ti->class_init) {
+        ti->class_init(ti->class, ti->class_data);
+    }
+}
+
+static void object_init_with_type(Object *obj, TypeImpl *ti)
+{
+    if (type_has_parent(ti)) {
+        object_init_with_type(obj, type_get_parent(ti));
+    }
+
+    if (ti->instance_init) {
+        ti->instance_init(obj);
+    }
+}
+
+static void object_post_init_with_type(Object *obj, TypeImpl *ti)
+{
+    if (ti->instance_post_init) {
+        ti->instance_post_init(obj);
+    }
+
+    if (type_has_parent(ti)) {
+        object_post_init_with_type(obj, type_get_parent(ti));
+    }
+}
+
+bool object_apply_global_props(Object *obj, const GPtrArray *props,
+                               Error **errp)
+{
+    int i;
+
+    if (!props) {
+        return true;
+    }
+
+    for (i = 0; i < props->len; i++) {
+        GlobalProperty *p = g_ptr_array_index(props, i);
+        Error *err = NULL;
+
+        if (object_dynamic_cast(obj, p->driver) == NULL) {
+            continue;
+        }
+        if (p->optional && !object_property_find(obj, p->property)) {
+            continue;
+        }
+        p->used = true;
+        if (!object_property_parse(obj, p->property, p->value, &err)) {
+            error_prepend(&err, "can't apply global %s.%s=%s: ",
+                          p->driver, p->property, p->value);
+            /*
+             * If errp != NULL, propagate error and return.
+             * If errp == NULL, report a warning, but keep going
+             * with the remaining globals.
+             */
+            if (errp) {
+                error_propagate(errp, err);
+                return false;
+            } else {
+                warn_report_err(err);
+            }
+        }
+    }
+
+    return true;
+}
+
+/*
+ * Global property defaults
+ * Slot 0: accelerator's global property defaults
+ * Slot 1: machine's global property defaults
+ * Slot 2: global properties from legacy command line option
+ * Each is a GPtrArray of of GlobalProperty.
+ * Applied in order, later entries override earlier ones.
+ */
+static GPtrArray *object_compat_props[3];
+
+/*
+ * Retrieve @GPtrArray for global property defined with options
+ * other than "-global".  These are generally used for syntactic
+ * sugar and legacy command line options.
+ */
+void object_register_sugar_prop(const char *driver, const char *prop, const char *value)
+{
+    GlobalProperty *g;
+    if (!object_compat_props[2]) {
+        object_compat_props[2] = g_ptr_array_new();
+    }
+    g = g_new0(GlobalProperty, 1);
+    g->driver = g_strdup(driver);
+    g->property = g_strdup(prop);
+    g->value = g_strdup(value);
+    g_ptr_array_add(object_compat_props[2], g);
+}
+
+/*
+ * Set machine's global property defaults to @compat_props.
+ * May be called at most once.
+ */
+void object_set_machine_compat_props(GPtrArray *compat_props)
+{
+    assert(!object_compat_props[1]);
+    object_compat_props[1] = compat_props;
+}
+
+/*
+ * Set accelerator's global property defaults to @compat_props.
+ * May be called at most once.
+ */
+void object_set_accelerator_compat_props(GPtrArray *compat_props)
+{
+    assert(!object_compat_props[0]);
+    object_compat_props[0] = compat_props;
+}
+
+void object_apply_compat_props(Object *obj)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(object_compat_props); i++) {
+        object_apply_global_props(obj, object_compat_props[i],
+                                  i == 2 ? &error_fatal : &error_abort);
+    }
+}
+
+static void object_class_property_init_all(Object *obj)
+{
+    ObjectPropertyIterator iter;
+    ObjectProperty *prop;
+
+    object_class_property_iter_init(&iter, object_get_class(obj));
+    while ((prop = object_property_iter_next(&iter))) {
+        if (prop->init) {
+            prop->init(obj, prop);
+        }
+    }
+}
+
+static void object_initialize_with_type(Object *obj, size_t size, TypeImpl *type)
+{
+    type_initialize(type);
+
+    g_assert(type->instance_size >= sizeof(Object));
+    g_assert(type->abstract == false);
+    g_assert(size >= type->instance_size);
+
+    memset(obj, 0, type->instance_size);
+    obj->class = type->class;
+    object_ref(obj);
+    object_class_property_init_all(obj);
+    obj->properties = g_hash_table_new_full(g_str_hash, g_str_equal,
+                                            NULL, object_property_free);
+    object_init_with_type(obj, type);
+    object_post_init_with_type(obj, type);
+}
+
+#ifdef CONFIG_MODULES
+int module_load_check(const char *name)
+{
+    TypeImpl *type = type_get_by_name(name);
+    if (!type) {
+        module_load_qom_one(name);
+        type = type_get_by_name(name);
+    }
+    return type == NULL;
+}
+#endif
+
+void object_initialize(void *data, size_t size, const char *typename)
+{
+    TypeImpl *type = type_get_by_name(typename);
+
+#ifdef CONFIG_MODULES
+    if (!type) {
+        module_load_qom_one(typename);
+        type = type_get_by_name(typename);
+    }
+#endif
+    if (!type) {
+        error_report("missing object type '%s'", typename);
+        abort();
+    }
+
+    object_initialize_with_type(data, size, type);
+}
+
+bool object_initialize_child_with_props(Object *parentobj,
+                                        const char *propname,
+                                        void *childobj, size_t size,
+                                        const char *type,
+                                        Error **errp, ...)
+{
+    va_list vargs;
+    bool ok;
+
+    va_start(vargs, errp);
+    ok = object_initialize_child_with_propsv(parentobj, propname,
+                                             childobj, size, type, errp,
+                                             vargs);
+    va_end(vargs);
+    return ok;
+}
+
+bool object_initialize_child_with_propsv(Object *parentobj,
+                                         const char *propname,
+                                         void *childobj, size_t size,
+                                         const char *type,
+                                         Error **errp, va_list vargs)
+{
+    bool ok = false;
+    Object *obj;
+    UserCreatable *uc;
+
+    object_initialize(childobj, size, type);
+    obj = OBJECT(childobj);
+
+    if (!object_set_propv(obj, errp, vargs)) {
+        goto out;
+    }
+
+    object_property_add_child(parentobj, propname, obj);
+
+    uc = (UserCreatable *)object_dynamic_cast(obj, TYPE_USER_CREATABLE);
+    if (uc) {
+        if (!user_creatable_complete(uc, errp)) {
+            object_unparent(obj);
+            goto out;
+        }
+    }
+
+    ok = true;
+
+out:
+    /*
+     * We want @obj's reference to be 1 on success, 0 on failure.
+     * On success, it's 2: one taken by object_initialize(), and one
+     * by object_property_add_child().
+     * On failure in object_initialize() or earlier, it's 1.
+     * On failure afterwards, it's also 1: object_unparent() releases
+     * the reference taken by object_property_add_child().
+     */
+    object_unref(obj);
+    return ok;
+}
+
+void object_initialize_child_internal(Object *parent,
+                                      const char *propname,
+                                      void *child, size_t size,
+                                      const char *type)
+{
+    object_initialize_child_with_props(parent, propname, child, size, type,
+                                       &error_abort, NULL);
+}
+
+static inline bool object_property_is_child(ObjectProperty *prop)
+{
+    return strstart(prop->type, "child<", NULL);
+}
+
+static void object_property_del_all(Object *obj)
+{
+    g_autoptr(GHashTable) done = g_hash_table_new(NULL, NULL);
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+    bool released;
+
+    do {
+        released = false;
+        object_property_iter_init(&iter, obj);
+        while ((prop = object_property_iter_next(&iter)) != NULL) {
+            if (g_hash_table_add(done, prop)) {
+                if (prop->release) {
+                    prop->release(obj, prop->name, prop->opaque);
+                    released = true;
+                    break;
+                }
+            }
+        }
+    } while (released);
+
+    g_hash_table_unref(obj->properties);
+}
+
+static void object_property_del_child(Object *obj, Object *child)
+{
+    ObjectProperty *prop;
+    GHashTableIter iter;
+    gpointer key, value;
+
+    g_hash_table_iter_init(&iter, obj->properties);
+    while (g_hash_table_iter_next(&iter, &key, &value)) {
+        prop = value;
+        if (object_property_is_child(prop) && prop->opaque == child) {
+            if (prop->release) {
+                prop->release(obj, prop->name, prop->opaque);
+                prop->release = NULL;
+            }
+            break;
+        }
+    }
+    g_hash_table_iter_init(&iter, obj->properties);
+    while (g_hash_table_iter_next(&iter, &key, &value)) {
+        prop = value;
+        if (object_property_is_child(prop) && prop->opaque == child) {
+            g_hash_table_iter_remove(&iter);
+            break;
+        }
+    }
+}
+
+void object_unparent(Object *obj)
+{
+    if (obj->parent) {
+        object_property_del_child(obj->parent, obj);
+    }
+}
+
+static void object_deinit(Object *obj, TypeImpl *type)
+{
+    if (type->instance_finalize) {
+        type->instance_finalize(obj);
+    }
+
+    if (type_has_parent(type)) {
+        object_deinit(obj, type_get_parent(type));
+    }
+}
+
+static void object_finalize(void *data)
+{
+    Object *obj = data;
+    TypeImpl *ti = obj->class->type;
+
+    object_property_del_all(obj);
+    object_deinit(obj, ti);
+
+    g_assert(obj->ref == 0);
+    if (obj->free) {
+        obj->free(obj);
+    }
+}
+
+/* Find the minimum alignment guaranteed by the system malloc. */
+#if __STDC_VERSION__ >= 201112L
+typddef max_align_t qemu_max_align_t;
+#else
+typedef union {
+    long l;
+    void *p;
+    double d;
+    long double ld;
+} qemu_max_align_t;
+#endif
+
+static Object *object_new_with_type(Type type)
+{
+    Object *obj;
+    size_t size, align;
+    void (*obj_free)(void *);
+
+    g_assert(type != NULL);
+    type_initialize(type);
+
+    size = type->instance_size;
+    align = type->instance_align;
+
+    /*
+     * Do not use qemu_memalign unless required.  Depending on the
+     * implementation, extra alignment implies extra overhead.
+     */
+    if (likely(align <= __alignof__(qemu_max_align_t))) {
+        obj = g_malloc(size);
+        obj_free = g_free;
+    } else {
+        obj = qemu_memalign(align, size);
+        obj_free = qemu_vfree;
+    }
+
+    object_initialize_with_type(obj, size, type);
+    obj->free = obj_free;
+
+    return obj;
+}
+
+Object *object_new_with_class(ObjectClass *klass)
+{
+    return object_new_with_type(klass->type);
+}
+
+Object *object_new(const char *typename)
+{
+    TypeImpl *ti = type_get_by_name(typename);
+
+    return object_new_with_type(ti);
+}
+
+
+Object *object_new_with_props(const char *typename,
+                              Object *parent,
+                              const char *id,
+                              Error **errp,
+                              ...)
+{
+    va_list vargs;
+    Object *obj;
+
+    va_start(vargs, errp);
+    obj = object_new_with_propv(typename, parent, id, errp, vargs);
+    va_end(vargs);
+
+    return obj;
+}
+
+
+Object *object_new_with_propv(const char *typename,
+                              Object *parent,
+                              const char *id,
+                              Error **errp,
+                              va_list vargs)
+{
+    Object *obj;
+    ObjectClass *klass;
+    UserCreatable *uc;
+
+    klass = object_class_by_name(typename);
+    if (!klass) {
+        error_setg(errp, "invalid object type: %s", typename);
+        return NULL;
+    }
+
+    if (object_class_is_abstract(klass)) {
+        error_setg(errp, "object type '%s' is abstract", typename);
+        return NULL;
+    }
+    obj = object_new_with_type(klass->type);
+
+    if (!object_set_propv(obj, errp, vargs)) {
+        goto error;
+    }
+
+    if (id != NULL) {
+        object_property_add_child(parent, id, obj);
+    }
+
+    uc = (UserCreatable *)object_dynamic_cast(obj, TYPE_USER_CREATABLE);
+    if (uc) {
+        if (!user_creatable_complete(uc, errp)) {
+            if (id != NULL) {
+                object_unparent(obj);
+            }
+            goto error;
+        }
+    }
+
+    object_unref(obj);
+    return obj;
+
+ error:
+    object_unref(obj);
+    return NULL;
+}
+
+
+bool object_set_props(Object *obj,
+                     Error **errp,
+                     ...)
+{
+    va_list vargs;
+    bool ret;
+
+    va_start(vargs, errp);
+    ret = object_set_propv(obj, errp, vargs);
+    va_end(vargs);
+
+    return ret;
+}
+
+
+bool object_set_propv(Object *obj,
+                     Error **errp,
+                     va_list vargs)
+{
+    const char *propname;
+
+    propname = va_arg(vargs, char *);
+    while (propname != NULL) {
+        const char *value = va_arg(vargs, char *);
+
+        g_assert(value != NULL);
+        if (!object_property_parse(obj, propname, value, errp)) {
+            return false;
+        }
+        propname = va_arg(vargs, char *);
+    }
+
+    return true;
+}
+
+
+Object *object_dynamic_cast(Object *obj, const char *typename)
+{
+    if (obj && object_class_dynamic_cast(object_get_class(obj), typename)) {
+        return obj;
+    }
+
+    return NULL;
+}
+
+Object *object_dynamic_cast_assert(Object *obj, const char *typename,
+                                   const char *file, int line, const char *func)
+{
+    trace_object_dynamic_cast_assert(obj ? obj->class->type->name : "(null)",
+                                     typename, file, line, func);
+
+#ifdef CONFIG_QOM_CAST_DEBUG
+    int i;
+    Object *inst;
+
+    for (i = 0; obj && i < OBJECT_CLASS_CAST_CACHE; i++) {
+        if (qatomic_read(&obj->class->object_cast_cache[i]) == typename) {
+            goto out;
+        }
+    }
+
+    inst = object_dynamic_cast(obj, typename);
+
+    if (!inst && obj) {
+        fprintf(stderr, "%s:%d:%s: Object %p is not an instance of type %s\n",
+                file, line, func, obj, typename);
+        abort();
+    }
+
+    assert(obj == inst);
+
+    if (obj && obj == inst) {
+        for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
+            qatomic_set(&obj->class->object_cast_cache[i - 1],
+                       qatomic_read(&obj->class->object_cast_cache[i]));
+        }
+        qatomic_set(&obj->class->object_cast_cache[i - 1], typename);
+    }
+
+out:
+#endif
+    return obj;
+}
+
+ObjectClass *object_class_dynamic_cast(ObjectClass *class,
+                                       const char *typename)
+{
+    ObjectClass *ret = NULL;
+    TypeImpl *target_type;
+    TypeImpl *type;
+
+    if (!class) {
+        return NULL;
+    }
+
+    /* A simple fast path that can trigger a lot for leaf classes.  */
+    type = class->type;
+    if (type->name == typename) {
+        return class;
+    }
+
+    target_type = type_get_by_name(typename);
+    if (!target_type) {
+        /* target class type unknown, so fail the cast */
+        return NULL;
+    }
+
+    if (type->class->interfaces &&
+            type_is_ancestor(target_type, type_interface)) {
+        int found = 0;
+        GSList *i;
+
+        for (i = class->interfaces; i; i = i->next) {
+            ObjectClass *target_class = i->data;
+
+            if (type_is_ancestor(target_class->type, target_type)) {
+                ret = target_class;
+                found++;
+            }
+         }
+
+        /* The match was ambiguous, don't allow a cast */
+        if (found > 1) {
+            ret = NULL;
+        }
+    } else if (type_is_ancestor(type, target_type)) {
+        ret = class;
+    }
+
+    return ret;
+}
+
+ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class,
+                                              const char *typename,
+                                              const char *file, int line,
+                                              const char *func)
+{
+    ObjectClass *ret;
+
+    trace_object_class_dynamic_cast_assert(class ? class->type->name : "(null)",
+                                           typename, file, line, func);
+
+#ifdef CONFIG_QOM_CAST_DEBUG
+    int i;
+
+    for (i = 0; class && i < OBJECT_CLASS_CAST_CACHE; i++) {
+        if (qatomic_read(&class->class_cast_cache[i]) == typename) {
+            ret = class;
+            goto out;
+        }
+    }
+#else
+    if (!class || !class->interfaces) {
+        return class;
+    }
+#endif
+
+    ret = object_class_dynamic_cast(class, typename);
+    if (!ret && class) {
+        fprintf(stderr, "%s:%d:%s: Object %p is not an instance of type %s\n",
+                file, line, func, class, typename);
+        abort();
+    }
+
+#ifdef CONFIG_QOM_CAST_DEBUG
+    if (class && ret == class) {
+        for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
+            qatomic_set(&class->class_cast_cache[i - 1],
+                       qatomic_read(&class->class_cast_cache[i]));
+        }
+        qatomic_set(&class->class_cast_cache[i - 1], typename);
+    }
+out:
+#endif
+    return ret;
+}
+
+const char *object_get_typename(const Object *obj)
+{
+    return obj->class->type->name;
+}
+
+ObjectClass *object_get_class(Object *obj)
+{
+    return obj->class;
+}
+
+bool object_class_is_abstract(ObjectClass *klass)
+{
+    return klass->type->abstract;
+}
+
+const char *object_class_get_name(ObjectClass *klass)
+{
+    return klass->type->name;
+}
+
+ObjectClass *object_class_by_name(const char *typename)
+{
+    TypeImpl *type = type_get_by_name(typename);
+
+    if (!type) {
+        return NULL;
+    }
+
+    type_initialize(type);
+
+    return type->class;
+}
+
+ObjectClass *module_object_class_by_name(const char *typename)
+{
+    ObjectClass *oc;
+
+    oc = object_class_by_name(typename);
+#ifdef CONFIG_MODULES
+    if (!oc) {
+        module_load_qom_one(typename);
+        oc = object_class_by_name(typename);
+    }
+#endif
+    return oc;
+}
+
+ObjectClass *object_class_get_parent(ObjectClass *class)
+{
+    TypeImpl *type = type_get_parent(class->type);
+
+    if (!type) {
+        return NULL;
+    }
+
+    type_initialize(type);
+
+    return type->class;
+}
+
+typedef struct OCFData
+{
+    void (*fn)(ObjectClass *klass, void *opaque);
+    const char *implements_type;
+    bool include_abstract;
+    void *opaque;
+} OCFData;
+
+static void object_class_foreach_tramp(gpointer key, gpointer value,
+                                       gpointer opaque)
+{
+    OCFData *data = opaque;
+    TypeImpl *type = value;
+    ObjectClass *k;
+
+    type_initialize(type);
+    k = type->class;
+
+    if (!data->include_abstract && type->abstract) {
+        return;
+    }
+
+    if (data->implements_type && 
+        !object_class_dynamic_cast(k, data->implements_type)) {
+        return;
+    }
+
+    data->fn(k, data->opaque);
+}
+
+void object_class_foreach(void (*fn)(ObjectClass *klass, void *opaque),
+                          const char *implements_type, bool include_abstract,
+                          void *opaque)
+{
+    OCFData data = { fn, implements_type, include_abstract, opaque };
+
+    enumerating_types = true;
+    g_hash_table_foreach(type_table_get(), object_class_foreach_tramp, &data);
+    enumerating_types = false;
+}
+
+static int do_object_child_foreach(Object *obj,
+                                   int (*fn)(Object *child, void *opaque),
+                                   void *opaque, bool recurse)
+{
+    GHashTableIter iter;
+    ObjectProperty *prop;
+    int ret = 0;
+
+    g_hash_table_iter_init(&iter, obj->properties);
+    while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) {
+        if (object_property_is_child(prop)) {
+            Object *child = prop->opaque;
+
+            ret = fn(child, opaque);
+            if (ret != 0) {
+                break;
+            }
+            if (recurse) {
+                ret = do_object_child_foreach(child, fn, opaque, true);
+                if (ret != 0) {
+                    break;
+                }
+            }
+        }
+    }
+    return ret;
+}
+
+int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque),
+                         void *opaque)
+{
+    return do_object_child_foreach(obj, fn, opaque, false);
+}
+
+int object_child_foreach_recursive(Object *obj,
+                                   int (*fn)(Object *child, void *opaque),
+                                   void *opaque)
+{
+    return do_object_child_foreach(obj, fn, opaque, true);
+}
+
+static void object_class_get_list_tramp(ObjectClass *klass, void *opaque)
+{
+    GSList **list = opaque;
+
+    *list = g_slist_prepend(*list, klass);
+}
+
+GSList *object_class_get_list(const char *implements_type,
+                              bool include_abstract)
+{
+    GSList *list = NULL;
+
+    object_class_foreach(object_class_get_list_tramp,
+                         implements_type, include_abstract, &list);
+    return list;
+}
+
+static gint object_class_cmp(gconstpointer a, gconstpointer b)
+{
+    return strcasecmp(object_class_get_name((ObjectClass *)a),
+                      object_class_get_name((ObjectClass *)b));
+}
+
+GSList *object_class_get_list_sorted(const char *implements_type,
+                                     bool include_abstract)
+{
+    return g_slist_sort(object_class_get_list(implements_type, include_abstract),
+                        object_class_cmp);
+}
+
+Object *object_ref(void *objptr)
+{
+    Object *obj = OBJECT(objptr);
+    if (!obj) {
+        return NULL;
+    }
+    qatomic_inc(&obj->ref);
+    return obj;
+}
+
+void object_unref(void *objptr)
+{
+    Object *obj = OBJECT(objptr);
+    if (!obj) {
+        return;
+    }
+    g_assert(obj->ref > 0);
+
+    /* parent always holds a reference to its children */
+    if (qatomic_fetch_dec(&obj->ref) == 1) {
+        object_finalize(obj);
+    }
+}
+
+ObjectProperty *
+object_property_try_add(Object *obj, const char *name, const char *type,
+                        ObjectPropertyAccessor *get,
+                        ObjectPropertyAccessor *set,
+                        ObjectPropertyRelease *release,
+                        void *opaque, Error **errp)
+{
+    ObjectProperty *prop;
+    size_t name_len = strlen(name);
+
+    if (name_len >= 3 && !memcmp(name + name_len - 3, "[*]", 4)) {
+        int i;
+        ObjectProperty *ret;
+        char *name_no_array = g_strdup(name);
+
+        name_no_array[name_len - 3] = '\0';
+        for (i = 0; ; ++i) {
+            char *full_name = g_strdup_printf("%s[%d]", name_no_array, i);
+
+            ret = object_property_try_add(obj, full_name, type, get, set,
+                                          release, opaque, NULL);
+            g_free(full_name);
+            if (ret) {
+                break;
+            }
+        }
+        g_free(name_no_array);
+        return ret;
+    }
+
+    if (object_property_find(obj, name) != NULL) {
+        error_setg(errp, "attempt to add duplicate property '%s' to object (type '%s')",
+                   name, object_get_typename(obj));
+        return NULL;
+    }
+
+    prop = g_malloc0(sizeof(*prop));
+
+    prop->name = g_strdup(name);
+    prop->type = g_strdup(type);
+
+    prop->get = get;
+    prop->set = set;
+    prop->release = release;
+    prop->opaque = opaque;
+
+    g_hash_table_insert(obj->properties, prop->name, prop);
+    return prop;
+}
+
+ObjectProperty *
+object_property_add(Object *obj, const char *name, const char *type,
+                    ObjectPropertyAccessor *get,
+                    ObjectPropertyAccessor *set,
+                    ObjectPropertyRelease *release,
+                    void *opaque)
+{
+    return object_property_try_add(obj, name, type, get, set, release,
+                                   opaque, &error_abort);
+}
+
+ObjectProperty *
+object_class_property_add(ObjectClass *klass,
+                          const char *name,
+                          const char *type,
+                          ObjectPropertyAccessor *get,
+                          ObjectPropertyAccessor *set,
+                          ObjectPropertyRelease *release,
+                          void *opaque)
+{
+    ObjectProperty *prop;
+
+    assert(!object_class_property_find(klass, name));
+
+    prop = g_malloc0(sizeof(*prop));
+
+    prop->name = g_strdup(name);
+    prop->type = g_strdup(type);
+
+    prop->get = get;
+    prop->set = set;
+    prop->release = release;
+    prop->opaque = opaque;
+
+    g_hash_table_insert(klass->properties, prop->name, prop);
+
+    return prop;
+}
+
+ObjectProperty *object_property_find(Object *obj, const char *name)
+{
+    ObjectProperty *prop;
+    ObjectClass *klass = object_get_class(obj);
+
+    prop = object_class_property_find(klass, name);
+    if (prop) {
+        return prop;
+    }
+
+    return g_hash_table_lookup(obj->properties, name);
+}
+
+ObjectProperty *object_property_find_err(Object *obj, const char *name,
+                                         Error **errp)
+{
+    ObjectProperty *prop = object_property_find(obj, name);
+    if (!prop) {
+        error_setg(errp, "Property '%s.%s' not found",
+                   object_get_typename(obj), name);
+    }
+    return prop;
+}
+
+void object_property_iter_init(ObjectPropertyIterator *iter,
+                               Object *obj)
+{
+    g_hash_table_iter_init(&iter->iter, obj->properties);
+    iter->nextclass = object_get_class(obj);
+}
+
+ObjectProperty *object_property_iter_next(ObjectPropertyIterator *iter)
+{
+    gpointer key, val;
+    while (!g_hash_table_iter_next(&iter->iter, &key, &val)) {
+        if (!iter->nextclass) {
+            return NULL;
+        }
+        g_hash_table_iter_init(&iter->iter, iter->nextclass->properties);
+        iter->nextclass = object_class_get_parent(iter->nextclass);
+    }
+    return val;
+}
+
+void object_class_property_iter_init(ObjectPropertyIterator *iter,
+                                     ObjectClass *klass)
+{
+    g_hash_table_iter_init(&iter->iter, klass->properties);
+    iter->nextclass = object_class_get_parent(klass);
+}
+
+ObjectProperty *object_class_property_find(ObjectClass *klass, const char *name)
+{
+    ObjectClass *parent_klass;
+
+    parent_klass = object_class_get_parent(klass);
+    if (parent_klass) {
+        ObjectProperty *prop =
+            object_class_property_find(parent_klass, name);
+        if (prop) {
+            return prop;
+        }
+    }
+
+    return g_hash_table_lookup(klass->properties, name);
+}
+
+ObjectProperty *object_class_property_find_err(ObjectClass *klass,
+                                               const char *name,
+                                               Error **errp)
+{
+    ObjectProperty *prop = object_class_property_find(klass, name);
+    if (!prop) {
+        error_setg(errp, "Property '.%s' not found", name);
+    }
+    return prop;
+}
+
+
+void object_property_del(Object *obj, const char *name)
+{
+    ObjectProperty *prop = g_hash_table_lookup(obj->properties, name);
+
+    if (prop->release) {
+        prop->release(obj, name, prop->opaque);
+    }
+    g_hash_table_remove(obj->properties, name);
+}
+
+bool object_property_get(Object *obj, const char *name, Visitor *v,
+                         Error **errp)
+{
+    Error *err = NULL;
+    ObjectProperty *prop = object_property_find_err(obj, name, errp);
+
+    if (prop == NULL) {
+        return false;
+    }
+
+    if (!prop->get) {
+        error_setg(errp, QERR_PERMISSION_DENIED);
+        return false;
+    }
+    prop->get(obj, v, name, prop->opaque, &err);
+    error_propagate(errp, err);
+    return !err;
+}
+
+bool object_property_set(Object *obj, const char *name, Visitor *v,
+                         Error **errp)
+{
+    Error *err = NULL;
+    ObjectProperty *prop = object_property_find_err(obj, name, errp);
+
+    if (prop == NULL) {
+        return false;
+    }
+
+    if (!prop->set) {
+        error_setg(errp, QERR_PERMISSION_DENIED);
+        return false;
+    }
+    prop->set(obj, v, name, prop->opaque, &err);
+    error_propagate(errp, err);
+    return !err;
+}
+
+bool object_property_set_str(Object *obj, const char *name,
+                             const char *value, Error **errp)
+{
+    QString *qstr = qstring_from_str(value);
+    bool ok = object_property_set_qobject(obj, name, QOBJECT(qstr), errp);
+
+    qobject_unref(qstr);
+    return ok;
+}
+
+char *object_property_get_str(Object *obj, const char *name,
+                              Error **errp)
+{
+    QObject *ret = object_property_get_qobject(obj, name, errp);
+    char *retval;
+
+    if (!ret) {
+        return NULL;
+    }
+
+    retval = g_strdup(qobject_get_try_str(ret));
+    if (!retval) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, "string");
+    }
+
+    qobject_unref(ret);
+    return retval;
+}
+
+bool object_property_set_link(Object *obj, const char *name,
+                              Object *value, Error **errp)
+{
+    g_autofree char *path = NULL;
+
+    if (value) {
+        path = object_get_canonical_path(value);
+    }
+    return object_property_set_str(obj, name, path ?: "", errp);
+}
+
+Object *object_property_get_link(Object *obj, const char *name,
+                                 Error **errp)
+{
+    char *str = object_property_get_str(obj, name, errp);
+    Object *target = NULL;
+
+    if (str && *str) {
+        target = object_resolve_path(str, NULL);
+        if (!target) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", str);
+        }
+    }
+
+    g_free(str);
+    return target;
+}
+
+bool object_property_set_bool(Object *obj, const char *name,
+                              bool value, Error **errp)
+{
+    QBool *qbool = qbool_from_bool(value);
+    bool ok = object_property_set_qobject(obj, name, QOBJECT(qbool), errp);
+
+    qobject_unref(qbool);
+    return ok;
+}
+
+bool object_property_get_bool(Object *obj, const char *name,
+                              Error **errp)
+{
+    QObject *ret = object_property_get_qobject(obj, name, errp);
+    QBool *qbool;
+    bool retval;
+
+    if (!ret) {
+        return false;
+    }
+    qbool = qobject_to(QBool, ret);
+    if (!qbool) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, "boolean");
+        retval = false;
+    } else {
+        retval = qbool_get_bool(qbool);
+    }
+
+    qobject_unref(ret);
+    return retval;
+}
+
+bool object_property_set_int(Object *obj, const char *name,
+                             int64_t value, Error **errp)
+{
+    QNum *qnum = qnum_from_int(value);
+    bool ok = object_property_set_qobject(obj, name, QOBJECT(qnum), errp);
+
+    qobject_unref(qnum);
+    return ok;
+}
+
+int64_t object_property_get_int(Object *obj, const char *name,
+                                Error **errp)
+{
+    QObject *ret = object_property_get_qobject(obj, name, errp);
+    QNum *qnum;
+    int64_t retval;
+
+    if (!ret) {
+        return -1;
+    }
+
+    qnum = qobject_to(QNum, ret);
+    if (!qnum || !qnum_get_try_int(qnum, &retval)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, "int");
+        retval = -1;
+    }
+
+    qobject_unref(ret);
+    return retval;
+}
+
+static void object_property_init_defval(Object *obj, ObjectProperty *prop)
+{
+    Visitor *v = qobject_input_visitor_new(prop->defval);
+
+    assert(prop->set != NULL);
+    prop->set(obj, v, prop->name, prop->opaque, &error_abort);
+
+    visit_free(v);
+}
+
+static void object_property_set_default(ObjectProperty *prop, QObject *defval)
+{
+    assert(!prop->defval);
+    assert(!prop->init);
+
+    prop->defval = defval;
+    prop->init = object_property_init_defval;
+}
+
+void object_property_set_default_bool(ObjectProperty *prop, bool value)
+{
+    object_property_set_default(prop, QOBJECT(qbool_from_bool(value)));
+}
+
+void object_property_set_default_str(ObjectProperty *prop, const char *value)
+{
+    object_property_set_default(prop, QOBJECT(qstring_from_str(value)));
+}
+
+void object_property_set_default_int(ObjectProperty *prop, int64_t value)
+{
+    object_property_set_default(prop, QOBJECT(qnum_from_int(value)));
+}
+
+void object_property_set_default_uint(ObjectProperty *prop, uint64_t value)
+{
+    object_property_set_default(prop, QOBJECT(qnum_from_uint(value)));
+}
+
+bool object_property_set_uint(Object *obj, const char *name,
+                              uint64_t value, Error **errp)
+{
+    QNum *qnum = qnum_from_uint(value);
+    bool ok = object_property_set_qobject(obj, name, QOBJECT(qnum), errp);
+
+    qobject_unref(qnum);
+    return ok;
+}
+
+uint64_t object_property_get_uint(Object *obj, const char *name,
+                                  Error **errp)
+{
+    QObject *ret = object_property_get_qobject(obj, name, errp);
+    QNum *qnum;
+    uint64_t retval;
+
+    if (!ret) {
+        return 0;
+    }
+    qnum = qobject_to(QNum, ret);
+    if (!qnum || !qnum_get_try_uint(qnum, &retval)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, "uint");
+        retval = 0;
+    }
+
+    qobject_unref(ret);
+    return retval;
+}
+
+typedef struct EnumProperty {
+    const QEnumLookup *lookup;
+    int (*get)(Object *, Error **);
+    void (*set)(Object *, int, Error **);
+} EnumProperty;
+
+int object_property_get_enum(Object *obj, const char *name,
+                             const char *typename, Error **errp)
+{
+    char *str;
+    int ret;
+    ObjectProperty *prop = object_property_find_err(obj, name, errp);
+    EnumProperty *enumprop;
+
+    if (prop == NULL) {
+        return -1;
+    }
+
+    if (!g_str_equal(prop->type, typename)) {
+        error_setg(errp, "Property %s on %s is not '%s' enum type",
+                   name, object_class_get_name(
+                       object_get_class(obj)), typename);
+        return -1;
+    }
+
+    enumprop = prop->opaque;
+
+    str = object_property_get_str(obj, name, errp);
+    if (!str) {
+        return -1;
+    }
+
+    ret = qapi_enum_parse(enumprop->lookup, str, -1, errp);
+    g_free(str);
+
+    return ret;
+}
+
+bool object_property_parse(Object *obj, const char *name,
+                           const char *string, Error **errp)
+{
+    Visitor *v = string_input_visitor_new(string);
+    bool ok = object_property_set(obj, name, v, errp);
+
+    visit_free(v);
+    return ok;
+}
+
+char *object_property_print(Object *obj, const char *name, bool human,
+                            Error **errp)
+{
+    Visitor *v;
+    char *string = NULL;
+
+    v = string_output_visitor_new(human, &string);
+    if (!object_property_get(obj, name, v, errp)) {
+        goto out;
+    }
+
+    visit_complete(v, &string);
+
+out:
+    visit_free(v);
+    return string;
+}
+
+const char *object_property_get_type(Object *obj, const char *name, Error **errp)
+{
+    ObjectProperty *prop = object_property_find_err(obj, name, errp);
+    if (prop == NULL) {
+        return NULL;
+    }
+
+    return prop->type;
+}
+
+Object *object_get_root(void)
+{
+    static Object *root;
+
+    if (!root) {
+        root = object_new("container");
+    }
+
+    return root;
+}
+
+Object *object_get_objects_root(void)
+{
+    return container_get(object_get_root(), "/objects");
+}
+
+Object *object_get_internal_root(void)
+{
+    static Object *internal_root;
+
+    if (!internal_root) {
+        internal_root = object_new("container");
+    }
+
+    return internal_root;
+}
+
+static void object_get_child_property(Object *obj, Visitor *v,
+                                      const char *name, void *opaque,
+                                      Error **errp)
+{
+    Object *child = opaque;
+    char *path;
+
+    path = object_get_canonical_path(child);
+    visit_type_str(v, name, &path, errp);
+    g_free(path);
+}
+
+static Object *object_resolve_child_property(Object *parent, void *opaque,
+                                             const char *part)
+{
+    return opaque;
+}
+
+static void object_finalize_child_property(Object *obj, const char *name,
+                                           void *opaque)
+{
+    Object *child = opaque;
+
+    if (child->class->unparent) {
+        (child->class->unparent)(child);
+    }
+    child->parent = NULL;
+    object_unref(child);
+}
+
+ObjectProperty *
+object_property_try_add_child(Object *obj, const char *name,
+                              Object *child, Error **errp)
+{
+    g_autofree char *type = NULL;
+    ObjectProperty *op;
+
+    assert(!child->parent);
+
+    type = g_strdup_printf("child<%s>", object_get_typename(child));
+
+    op = object_property_try_add(obj, name, type, object_get_child_property,
+                                 NULL, object_finalize_child_property,
+                                 child, errp);
+    if (!op) {
+        return NULL;
+    }
+    op->resolve = object_resolve_child_property;
+    object_ref(child);
+    child->parent = obj;
+    return op;
+}
+
+ObjectProperty *
+object_property_add_child(Object *obj, const char *name,
+                          Object *child)
+{
+    return object_property_try_add_child(obj, name, child, &error_abort);
+}
+
+void object_property_allow_set_link(const Object *obj, const char *name,
+                                    Object *val, Error **errp)
+{
+    /* Allow the link to be set, always */
+}
+
+typedef struct {
+    union {
+        Object **targetp;
+        Object *target; /* if OBJ_PROP_LINK_DIRECT, when holding the pointer  */
+        ptrdiff_t offset; /* if OBJ_PROP_LINK_CLASS */
+    };
+    void (*check)(const Object *, const char *, Object *, Error **);
+    ObjectPropertyLinkFlags flags;
+} LinkProperty;
+
+static Object **
+object_link_get_targetp(Object *obj, LinkProperty *lprop)
+{
+    if (lprop->flags & OBJ_PROP_LINK_DIRECT) {
+        return &lprop->target;
+    } else if (lprop->flags & OBJ_PROP_LINK_CLASS) {
+        return (void *)obj + lprop->offset;
+    } else {
+        return lprop->targetp;
+    }
+}
+
+static void object_get_link_property(Object *obj, Visitor *v,
+                                     const char *name, void *opaque,
+                                     Error **errp)
+{
+    LinkProperty *lprop = opaque;
+    Object **targetp = object_link_get_targetp(obj, lprop);
+    char *path;
+
+    if (*targetp) {
+        path = object_get_canonical_path(*targetp);
+        visit_type_str(v, name, &path, errp);
+        g_free(path);
+    } else {
+        path = (char *)"";
+        visit_type_str(v, name, &path, errp);
+    }
+}
+
+/*
+ * object_resolve_link:
+ *
+ * Lookup an object and ensure its type matches the link property type.  This
+ * is similar to object_resolve_path() except type verification against the
+ * link property is performed.
+ *
+ * Returns: The matched object or NULL on path lookup failures.
+ */
+static Object *object_resolve_link(Object *obj, const char *name,
+                                   const char *path, Error **errp)
+{
+    const char *type;
+    char *target_type;
+    bool ambiguous = false;
+    Object *target;
+
+    /* Go from link to FOO.  */
+    type = object_property_get_type(obj, name, NULL);
+    target_type = g_strndup(&type[5], strlen(type) - 6);
+    target = object_resolve_path_type(path, target_type, &ambiguous);
+
+    if (ambiguous) {
+        error_setg(errp, "Path '%s' does not uniquely identify an object",
+                   path);
+    } else if (!target) {
+        target = object_resolve_path(path, &ambiguous);
+        if (target || ambiguous) {
+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, target_type);
+        } else {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", path);
+        }
+        target = NULL;
+    }
+    g_free(target_type);
+
+    return target;
+}
+
+static void object_set_link_property(Object *obj, Visitor *v,
+                                     const char *name, void *opaque,
+                                     Error **errp)
+{
+    Error *local_err = NULL;
+    LinkProperty *prop = opaque;
+    Object **targetp = object_link_get_targetp(obj, prop);
+    Object *old_target = *targetp;
+    Object *new_target;
+    char *path = NULL;
+
+    if (!visit_type_str(v, name, &path, errp)) {
+        return;
+    }
+
+    if (*path) {
+        new_target = object_resolve_link(obj, name, path, errp);
+        if (!new_target) {
+            g_free(path);
+            return;
+        }
+    } else {
+        new_target = NULL;
+    }
+
+    g_free(path);
+
+    prop->check(obj, name, new_target, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    *targetp = new_target;
+    if (prop->flags & OBJ_PROP_LINK_STRONG) {
+        object_ref(new_target);
+        object_unref(old_target);
+    }
+}
+
+static Object *object_resolve_link_property(Object *parent, void *opaque,
+                                            const char *part)
+{
+    LinkProperty *lprop = opaque;
+
+    return *object_link_get_targetp(parent, lprop);
+}
+
+static void object_release_link_property(Object *obj, const char *name,
+                                         void *opaque)
+{
+    LinkProperty *prop = opaque;
+    Object **targetp = object_link_get_targetp(obj, prop);
+
+    if ((prop->flags & OBJ_PROP_LINK_STRONG) && *targetp) {
+        object_unref(*targetp);
+    }
+    if (!(prop->flags & OBJ_PROP_LINK_CLASS)) {
+        g_free(prop);
+    }
+}
+
+static ObjectProperty *
+object_add_link_prop(Object *obj, const char *name,
+                     const char *type, void *ptr,
+                     void (*check)(const Object *, const char *,
+                                   Object *, Error **),
+                     ObjectPropertyLinkFlags flags)
+{
+    LinkProperty *prop = g_malloc(sizeof(*prop));
+    g_autofree char *full_type = NULL;
+    ObjectProperty *op;
+
+    if (flags & OBJ_PROP_LINK_DIRECT) {
+        prop->target = ptr;
+    } else {
+        prop->targetp = ptr;
+    }
+    prop->check = check;
+    prop->flags = flags;
+
+    full_type = g_strdup_printf("link<%s>", type);
+
+    op = object_property_add(obj, name, full_type,
+                             object_get_link_property,
+                             check ? object_set_link_property : NULL,
+                             object_release_link_property,
+                             prop);
+    op->resolve = object_resolve_link_property;
+    return op;
+}
+
+ObjectProperty *
+object_property_add_link(Object *obj, const char *name,
+                         const char *type, Object **targetp,
+                         void (*check)(const Object *, const char *,
+                                       Object *, Error **),
+                         ObjectPropertyLinkFlags flags)
+{
+    return object_add_link_prop(obj, name, type, targetp, check, flags);
+}
+
+ObjectProperty *
+object_class_property_add_link(ObjectClass *oc,
+    const char *name,
+    const char *type, ptrdiff_t offset,
+    void (*check)(const Object *obj, const char *name,
+                  Object *val, Error **errp),
+    ObjectPropertyLinkFlags flags)
+{
+    LinkProperty *prop = g_new0(LinkProperty, 1);
+    char *full_type;
+    ObjectProperty *op;
+
+    prop->offset = offset;
+    prop->check = check;
+    prop->flags = flags | OBJ_PROP_LINK_CLASS;
+
+    full_type = g_strdup_printf("link<%s>", type);
+
+    op = object_class_property_add(oc, name, full_type,
+                                   object_get_link_property,
+                                   check ? object_set_link_property : NULL,
+                                   object_release_link_property,
+                                   prop);
+
+    op->resolve = object_resolve_link_property;
+
+    g_free(full_type);
+    return op;
+}
+
+ObjectProperty *
+object_property_add_const_link(Object *obj, const char *name,
+                               Object *target)
+{
+    return object_add_link_prop(obj, name,
+                                object_get_typename(target), target,
+                                NULL, OBJ_PROP_LINK_DIRECT);
+}
+
+const char *object_get_canonical_path_component(const Object *obj)
+{
+    ObjectProperty *prop = NULL;
+    GHashTableIter iter;
+
+    if (obj->parent == NULL) {
+        return NULL;
+    }
+
+    g_hash_table_iter_init(&iter, obj->parent->properties);
+    while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) {
+        if (!object_property_is_child(prop)) {
+            continue;
+        }
+
+        if (prop->opaque == obj) {
+            return prop->name;
+        }
+    }
+
+    /* obj had a parent but was not a child, should never happen */
+    g_assert_not_reached();
+    return NULL;
+}
+
+char *object_get_canonical_path(const Object *obj)
+{
+    Object *root = object_get_root();
+    char *newpath, *path = NULL;
+
+    if (obj == root) {
+        return g_strdup("/");
+    }
+
+    do {
+        const char *component = object_get_canonical_path_component(obj);
+
+        if (!component) {
+            /* A canonical path must be complete, so discard what was
+             * collected so far.
+             */
+            g_free(path);
+            return NULL;
+        }
+
+        newpath = g_strdup_printf("/%s%s", component, path ? path : "");
+        g_free(path);
+        path = newpath;
+        obj = obj->parent;
+    } while (obj != root);
+
+    return path;
+}
+
+Object *object_resolve_path_component(Object *parent, const char *part)
+{
+    ObjectProperty *prop = object_property_find(parent, part);
+    if (prop == NULL) {
+        return NULL;
+    }
+
+    if (prop->resolve) {
+        return prop->resolve(parent, prop->opaque, part);
+    } else {
+        return NULL;
+    }
+}
+
+static Object *object_resolve_abs_path(Object *parent,
+                                          char **parts,
+                                          const char *typename)
+{
+    Object *child;
+
+    if (*parts == NULL) {
+        return object_dynamic_cast(parent, typename);
+    }
+
+    if (strcmp(*parts, "") == 0) {
+        return object_resolve_abs_path(parent, parts + 1, typename);
+    }
+
+    child = object_resolve_path_component(parent, *parts);
+    if (!child) {
+        return NULL;
+    }
+
+    return object_resolve_abs_path(child, parts + 1, typename);
+}
+
+static Object *object_resolve_partial_path(Object *parent,
+                                           char **parts,
+                                           const char *typename,
+                                           bool *ambiguous)
+{
+    Object *obj;
+    GHashTableIter iter;
+    ObjectProperty *prop;
+
+    obj = object_resolve_abs_path(parent, parts, typename);
+
+    g_hash_table_iter_init(&iter, parent->properties);
+    while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&prop)) {
+        Object *found;
+
+        if (!object_property_is_child(prop)) {
+            continue;
+        }
+
+        found = object_resolve_partial_path(prop->opaque, parts,
+                                            typename, ambiguous);
+        if (found) {
+            if (obj) {
+                *ambiguous = true;
+                return NULL;
+            }
+            obj = found;
+        }
+
+        if (*ambiguous) {
+            return NULL;
+        }
+    }
+
+    return obj;
+}
+
+Object *object_resolve_path_type(const char *path, const char *typename,
+                                 bool *ambiguousp)
+{
+    Object *obj;
+    char **parts;
+
+    parts = g_strsplit(path, "/", 0);
+    assert(parts);
+
+    if (parts[0] == NULL || strcmp(parts[0], "") != 0) {
+        bool ambiguous = false;
+        obj = object_resolve_partial_path(object_get_root(), parts,
+                                          typename, &ambiguous);
+        if (ambiguousp) {
+            *ambiguousp = ambiguous;
+        }
+    } else {
+        obj = object_resolve_abs_path(object_get_root(), parts + 1, typename);
+    }
+
+    g_strfreev(parts);
+
+    return obj;
+}
+
+Object *object_resolve_path(const char *path, bool *ambiguous)
+{
+    return object_resolve_path_type(path, TYPE_OBJECT, ambiguous);
+}
+
+typedef struct StringProperty
+{
+    char *(*get)(Object *, Error **);
+    void (*set)(Object *, const char *, Error **);
+} StringProperty;
+
+static void property_get_str(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    StringProperty *prop = opaque;
+    char *value;
+    Error *err = NULL;
+
+    value = prop->get(obj, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    visit_type_str(v, name, &value, errp);
+    g_free(value);
+}
+
+static void property_set_str(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    StringProperty *prop = opaque;
+    char *value;
+
+    if (!visit_type_str(v, name, &value, errp)) {
+        return;
+    }
+
+    prop->set(obj, value, errp);
+    g_free(value);
+}
+
+static void property_release_str(Object *obj, const char *name,
+                                 void *opaque)
+{
+    StringProperty *prop = opaque;
+    g_free(prop);
+}
+
+ObjectProperty *
+object_property_add_str(Object *obj, const char *name,
+                        char *(*get)(Object *, Error **),
+                        void (*set)(Object *, const char *, Error **))
+{
+    StringProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+    prop->set = set;
+
+    return object_property_add(obj, name, "string",
+                               get ? property_get_str : NULL,
+                               set ? property_set_str : NULL,
+                               property_release_str,
+                               prop);
+}
+
+ObjectProperty *
+object_class_property_add_str(ObjectClass *klass, const char *name,
+                                   char *(*get)(Object *, Error **),
+                                   void (*set)(Object *, const char *,
+                                               Error **))
+{
+    StringProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+    prop->set = set;
+
+    return object_class_property_add(klass, name, "string",
+                                     get ? property_get_str : NULL,
+                                     set ? property_set_str : NULL,
+                                     NULL,
+                                     prop);
+}
+
+typedef struct BoolProperty
+{
+    bool (*get)(Object *, Error **);
+    void (*set)(Object *, bool, Error **);
+} BoolProperty;
+
+static void property_get_bool(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    BoolProperty *prop = opaque;
+    bool value;
+    Error *err = NULL;
+
+    value = prop->get(obj, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void property_set_bool(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    BoolProperty *prop = opaque;
+    bool value;
+
+    if (!visit_type_bool(v, name, &value, errp)) {
+        return;
+    }
+
+    prop->set(obj, value, errp);
+}
+
+static void property_release_bool(Object *obj, const char *name,
+                                  void *opaque)
+{
+    BoolProperty *prop = opaque;
+    g_free(prop);
+}
+
+ObjectProperty *
+object_property_add_bool(Object *obj, const char *name,
+                         bool (*get)(Object *, Error **),
+                         void (*set)(Object *, bool, Error **))
+{
+    BoolProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+    prop->set = set;
+
+    return object_property_add(obj, name, "bool",
+                               get ? property_get_bool : NULL,
+                               set ? property_set_bool : NULL,
+                               property_release_bool,
+                               prop);
+}
+
+ObjectProperty *
+object_class_property_add_bool(ObjectClass *klass, const char *name,
+                                    bool (*get)(Object *, Error **),
+                                    void (*set)(Object *, bool, Error **))
+{
+    BoolProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+    prop->set = set;
+
+    return object_class_property_add(klass, name, "bool",
+                                     get ? property_get_bool : NULL,
+                                     set ? property_set_bool : NULL,
+                                     NULL,
+                                     prop);
+}
+
+static void property_get_enum(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    EnumProperty *prop = opaque;
+    int value;
+    Error *err = NULL;
+
+    value = prop->get(obj, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    visit_type_enum(v, name, &value, prop->lookup, errp);
+}
+
+static void property_set_enum(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    EnumProperty *prop = opaque;
+    int value;
+
+    if (!visit_type_enum(v, name, &value, prop->lookup, errp)) {
+        return;
+    }
+    prop->set(obj, value, errp);
+}
+
+static void property_release_enum(Object *obj, const char *name,
+                                  void *opaque)
+{
+    EnumProperty *prop = opaque;
+    g_free(prop);
+}
+
+ObjectProperty *
+object_property_add_enum(Object *obj, const char *name,
+                         const char *typename,
+                         const QEnumLookup *lookup,
+                         int (*get)(Object *, Error **),
+                         void (*set)(Object *, int, Error **))
+{
+    EnumProperty *prop = g_malloc(sizeof(*prop));
+
+    prop->lookup = lookup;
+    prop->get = get;
+    prop->set = set;
+
+    return object_property_add(obj, name, typename,
+                               get ? property_get_enum : NULL,
+                               set ? property_set_enum : NULL,
+                               property_release_enum,
+                               prop);
+}
+
+ObjectProperty *
+object_class_property_add_enum(ObjectClass *klass, const char *name,
+                                    const char *typename,
+                                    const QEnumLookup *lookup,
+                                    int (*get)(Object *, Error **),
+                                    void (*set)(Object *, int, Error **))
+{
+    EnumProperty *prop = g_malloc(sizeof(*prop));
+
+    prop->lookup = lookup;
+    prop->get = get;
+    prop->set = set;
+
+    return object_class_property_add(klass, name, typename,
+                                     get ? property_get_enum : NULL,
+                                     set ? property_set_enum : NULL,
+                                     NULL,
+                                     prop);
+}
+
+typedef struct TMProperty {
+    void (*get)(Object *, struct tm *, Error **);
+} TMProperty;
+
+static void property_get_tm(Object *obj, Visitor *v, const char *name,
+                            void *opaque, Error **errp)
+{
+    TMProperty *prop = opaque;
+    Error *err = NULL;
+    struct tm value;
+
+    prop->get(obj, &value, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    if (!visit_start_struct(v, name, NULL, 0, errp)) {
+        return;
+    }
+    if (!visit_type_int32(v, "tm_year", &value.tm_year, errp)) {
+        goto out_end;
+    }
+    if (!visit_type_int32(v, "tm_mon", &value.tm_mon, errp)) {
+        goto out_end;
+    }
+    if (!visit_type_int32(v, "tm_mday", &value.tm_mday, errp)) {
+        goto out_end;
+    }
+    if (!visit_type_int32(v, "tm_hour", &value.tm_hour, errp)) {
+        goto out_end;
+    }
+    if (!visit_type_int32(v, "tm_min", &value.tm_min, errp)) {
+        goto out_end;
+    }
+    if (!visit_type_int32(v, "tm_sec", &value.tm_sec, errp)) {
+        goto out_end;
+    }
+    visit_check_struct(v, errp);
+out_end:
+    visit_end_struct(v, NULL);
+}
+
+static void property_release_tm(Object *obj, const char *name,
+                                void *opaque)
+{
+    TMProperty *prop = opaque;
+    g_free(prop);
+}
+
+ObjectProperty *
+object_property_add_tm(Object *obj, const char *name,
+                       void (*get)(Object *, struct tm *, Error **))
+{
+    TMProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+
+    return object_property_add(obj, name, "struct tm",
+                               get ? property_get_tm : NULL, NULL,
+                               property_release_tm,
+                               prop);
+}
+
+ObjectProperty *
+object_class_property_add_tm(ObjectClass *klass, const char *name,
+                             void (*get)(Object *, struct tm *, Error **))
+{
+    TMProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->get = get;
+
+    return object_class_property_add(klass, name, "struct tm",
+                                     get ? property_get_tm : NULL,
+                                     NULL, NULL, prop);
+}
+
+static char *object_get_type(Object *obj, Error **errp)
+{
+    return g_strdup(object_get_typename(obj));
+}
+
+static void property_get_uint8_ptr(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    uint8_t value = *(uint8_t *)opaque;
+    visit_type_uint8(v, name, &value, errp);
+}
+
+static void property_set_uint8_ptr(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    uint8_t *field = opaque;
+    uint8_t value;
+
+    if (!visit_type_uint8(v, name, &value, errp)) {
+        return;
+    }
+
+    *field = value;
+}
+
+static void property_get_uint16_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint16_t value = *(uint16_t *)opaque;
+    visit_type_uint16(v, name, &value, errp);
+}
+
+static void property_set_uint16_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint16_t *field = opaque;
+    uint16_t value;
+
+    if (!visit_type_uint16(v, name, &value, errp)) {
+        return;
+    }
+
+    *field = value;
+}
+
+static void property_get_uint32_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint32_t value = *(uint32_t *)opaque;
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void property_set_uint32_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint32_t *field = opaque;
+    uint32_t value;
+
+    if (!visit_type_uint32(v, name, &value, errp)) {
+        return;
+    }
+
+    *field = value;
+}
+
+static void property_get_uint64_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint64_t value = *(uint64_t *)opaque;
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void property_set_uint64_ptr(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
+{
+    uint64_t *field = opaque;
+    uint64_t value;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    *field = value;
+}
+
+ObjectProperty *
+object_property_add_uint8_ptr(Object *obj, const char *name,
+                              const uint8_t *v,
+                              ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint8_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint8_ptr;
+    }
+
+    return object_property_add(obj, name, "uint8",
+                               getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_class_property_add_uint8_ptr(ObjectClass *klass, const char *name,
+                                    const uint8_t *v,
+                                    ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint8_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint8_ptr;
+    }
+
+    return object_class_property_add(klass, name, "uint8",
+                                     getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_property_add_uint16_ptr(Object *obj, const char *name,
+                               const uint16_t *v,
+                               ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint16_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint16_ptr;
+    }
+
+    return object_property_add(obj, name, "uint16",
+                               getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_class_property_add_uint16_ptr(ObjectClass *klass, const char *name,
+                                     const uint16_t *v,
+                                     ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint16_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint16_ptr;
+    }
+
+    return object_class_property_add(klass, name, "uint16",
+                                     getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_property_add_uint32_ptr(Object *obj, const char *name,
+                               const uint32_t *v,
+                               ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint32_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint32_ptr;
+    }
+
+    return object_property_add(obj, name, "uint32",
+                               getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_class_property_add_uint32_ptr(ObjectClass *klass, const char *name,
+                                     const uint32_t *v,
+                                     ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint32_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint32_ptr;
+    }
+
+    return object_class_property_add(klass, name, "uint32",
+                                     getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_property_add_uint64_ptr(Object *obj, const char *name,
+                               const uint64_t *v,
+                               ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint64_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint64_ptr;
+    }
+
+    return object_property_add(obj, name, "uint64",
+                               getter, setter, NULL, (void *)v);
+}
+
+ObjectProperty *
+object_class_property_add_uint64_ptr(ObjectClass *klass, const char *name,
+                                     const uint64_t *v,
+                                     ObjectPropertyFlags flags)
+{
+    ObjectPropertyAccessor *getter = NULL;
+    ObjectPropertyAccessor *setter = NULL;
+
+    if ((flags & OBJ_PROP_FLAG_READ) == OBJ_PROP_FLAG_READ) {
+        getter = property_get_uint64_ptr;
+    }
+
+    if ((flags & OBJ_PROP_FLAG_WRITE) == OBJ_PROP_FLAG_WRITE) {
+        setter = property_set_uint64_ptr;
+    }
+
+    return object_class_property_add(klass, name, "uint64",
+                                     getter, setter, NULL, (void *)v);
+}
+
+typedef struct {
+    Object *target_obj;
+    char *target_name;
+} AliasProperty;
+
+static void property_get_alias(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    AliasProperty *prop = opaque;
+
+    object_property_get(prop->target_obj, prop->target_name, v, errp);
+}
+
+static void property_set_alias(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    AliasProperty *prop = opaque;
+
+    object_property_set(prop->target_obj, prop->target_name, v, errp);
+}
+
+static Object *property_resolve_alias(Object *obj, void *opaque,
+                                      const char *part)
+{
+    AliasProperty *prop = opaque;
+
+    return object_resolve_path_component(prop->target_obj, prop->target_name);
+}
+
+static void property_release_alias(Object *obj, const char *name, void *opaque)
+{
+    AliasProperty *prop = opaque;
+
+    g_free(prop->target_name);
+    g_free(prop);
+}
+
+ObjectProperty *
+object_property_add_alias(Object *obj, const char *name,
+                          Object *target_obj, const char *target_name)
+{
+    AliasProperty *prop;
+    ObjectProperty *op;
+    ObjectProperty *target_prop;
+    g_autofree char *prop_type = NULL;
+
+    target_prop = object_property_find_err(target_obj, target_name,
+                                           &error_abort);
+
+    if (object_property_is_child(target_prop)) {
+        prop_type = g_strdup_printf("link%s",
+                                    target_prop->type + strlen("child"));
+    } else {
+        prop_type = g_strdup(target_prop->type);
+    }
+
+    prop = g_malloc(sizeof(*prop));
+    prop->target_obj = target_obj;
+    prop->target_name = g_strdup(target_name);
+
+    op = object_property_add(obj, name, prop_type,
+                             property_get_alias,
+                             property_set_alias,
+                             property_release_alias,
+                             prop);
+    op->resolve = property_resolve_alias;
+    if (target_prop->defval) {
+        op->defval = qobject_ref(target_prop->defval);
+    }
+
+    object_property_set_description(obj, op->name,
+                                    target_prop->description);
+    return op;
+}
+
+void object_property_set_description(Object *obj, const char *name,
+                                     const char *description)
+{
+    ObjectProperty *op;
+
+    op = object_property_find_err(obj, name, &error_abort);
+    g_free(op->description);
+    op->description = g_strdup(description);
+}
+
+void object_class_property_set_description(ObjectClass *klass,
+                                           const char *name,
+                                           const char *description)
+{
+    ObjectProperty *op;
+
+    op = g_hash_table_lookup(klass->properties, name);
+    g_free(op->description);
+    op->description = g_strdup(description);
+}
+
+static void object_class_init(ObjectClass *klass, void *data)
+{
+    object_class_property_add_str(klass, "type", object_get_type,
+                                  NULL);
+}
+
+static void register_types(void)
+{
+    static TypeInfo interface_info = {
+        .name = TYPE_INTERFACE,
+        .class_size = sizeof(InterfaceClass),
+        .abstract = true,
+    };
+
+    static TypeInfo object_info = {
+        .name = TYPE_OBJECT,
+        .instance_size = sizeof(Object),
+        .class_init = object_class_init,
+        .abstract = true,
+    };
+
+    type_interface = type_register_internal(&interface_info);
+    type_register_internal(&object_info);
+}
+
+type_init(register_types)
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
new file mode 100644
index 000000000..ed896fe76
--- /dev/null
+++ b/qom/object_interfaces.c
@@ -0,0 +1,335 @@
+#include "qemu/osdep.h"
+
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qom/object_interfaces.h"
+#include "qemu/help_option.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qapi/opts-visitor.h"
+#include "qemu/config-file.h"
+
+bool user_creatable_complete(UserCreatable *uc, Error **errp)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_GET_CLASS(uc);
+    Error *err = NULL;
+
+    if (ucc->complete) {
+        ucc->complete(uc, &err);
+        error_propagate(errp, err);
+    }
+    return !err;
+}
+
+bool user_creatable_can_be_deleted(UserCreatable *uc)
+{
+
+    UserCreatableClass *ucc = USER_CREATABLE_GET_CLASS(uc);
+
+    if (ucc->can_be_deleted) {
+        return ucc->can_be_deleted(uc);
+    } else {
+        return true;
+    }
+}
+
+Object *user_creatable_add_type(const char *type, const char *id,
+                                const QDict *qdict,
+                                Visitor *v, Error **errp)
+{
+    Object *obj;
+    ObjectClass *klass;
+    const QDictEntry *e;
+    Error *local_err = NULL;
+
+    klass = object_class_by_name(type);
+    if (!klass) {
+        error_setg(errp, "invalid object type: %s", type);
+        return NULL;
+    }
+
+    if (!object_class_dynamic_cast(klass, TYPE_USER_CREATABLE)) {
+        error_setg(errp, "object type '%s' isn't supported by object-add",
+                   type);
+        return NULL;
+    }
+
+    if (object_class_is_abstract(klass)) {
+        error_setg(errp, "object type '%s' is abstract", type);
+        return NULL;
+    }
+
+    assert(qdict);
+    obj = object_new(type);
+    if (!visit_start_struct(v, NULL, NULL, 0, &local_err)) {
+        goto out;
+    }
+    for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+        if (!object_property_set(obj, e->key, v, &local_err)) {
+            break;
+        }
+    }
+    if (!local_err) {
+        visit_check_struct(v, &local_err);
+    }
+    visit_end_struct(v, NULL);
+    if (local_err) {
+        goto out;
+    }
+
+    if (id != NULL) {
+        object_property_try_add_child(object_get_objects_root(),
+                                      id, obj, &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+
+    if (!user_creatable_complete(USER_CREATABLE(obj), &local_err)) {
+        if (id != NULL) {
+            object_property_del(object_get_objects_root(), id);
+        }
+        goto out;
+    }
+out:
+    if (local_err) {
+        error_propagate(errp, local_err);
+        object_unref(obj);
+        return NULL;
+    }
+    return obj;
+}
+
+bool user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
+{
+    Visitor *v;
+    Object *obj;
+    g_autofree char *type = NULL;
+    g_autofree char *id = NULL;
+
+    type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
+    if (!type) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
+        return false;
+    }
+    qdict_del(qdict, "qom-type");
+
+    id = g_strdup(qdict_get_try_str(qdict, "id"));
+    if (!id) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "id");
+        return false;
+    }
+    qdict_del(qdict, "id");
+
+    if (keyval) {
+        v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    } else {
+        v = qobject_input_visitor_new(QOBJECT(qdict));
+    }
+    obj = user_creatable_add_type(type, id, qdict, v, errp);
+    visit_free(v);
+    object_unref(obj);
+    return !!obj;
+}
+
+Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
+{
+    Visitor *v;
+    QDict *pdict;
+    Object *obj;
+    const char *id = qemu_opts_id(opts);
+    char *type = qemu_opt_get_del(opts, "qom-type");
+
+    if (!type) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
+        return NULL;
+    }
+    if (!id) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "id");
+        qemu_opt_set(opts, "qom-type", type, &error_abort);
+        g_free(type);
+        return NULL;
+    }
+
+    qemu_opts_set_id(opts, NULL);
+    pdict = qemu_opts_to_qdict(opts, NULL);
+
+    v = opts_visitor_new(opts);
+    obj = user_creatable_add_type(type, id, pdict, v, errp);
+    visit_free(v);
+
+    qemu_opts_set_id(opts, (char *) id);
+    qemu_opt_set(opts, "qom-type", type, &error_abort);
+    g_free(type);
+    qobject_unref(pdict);
+    return obj;
+}
+
+
+int user_creatable_add_opts_foreach(void *opaque, QemuOpts *opts, Error **errp)
+{
+    bool (*type_opt_predicate)(const char *, QemuOpts *) = opaque;
+    Object *obj = NULL;
+    const char *type;
+
+    type = qemu_opt_get(opts, "qom-type");
+    if (type && type_opt_predicate &&
+        !type_opt_predicate(type, opts)) {
+        return 0;
+    }
+
+    obj = user_creatable_add_opts(opts, errp);
+    if (!obj) {
+        return -1;
+    }
+    object_unref(obj);
+    return 0;
+}
+
+char *object_property_help(const char *name, const char *type,
+                           QObject *defval, const char *description)
+{
+    GString *str = g_string_new(NULL);
+
+    g_string_append_printf(str, "  %s=<%s>", name, type);
+    if (description || defval) {
+        if (str->len < 24) {
+            g_string_append_printf(str, "%*s", 24 - (int)str->len, "");
+        }
+        g_string_append(str, " - ");
+    }
+    if (description) {
+        g_string_append(str, description);
+    }
+    if (defval) {
+        g_autofree char *def_json = qstring_free(qobject_to_json(defval), TRUE);
+        g_string_append_printf(str, " (default: %s)", def_json);
+    }
+
+    return g_string_free(str, false);
+}
+
+static void user_creatable_print_types(void)
+{
+    GSList *l, *list;
+
+    printf("List of user creatable objects:\n");
+    list = object_class_get_list_sorted(TYPE_USER_CREATABLE, false);
+    for (l = list; l != NULL; l = l->next) {
+        ObjectClass *oc = OBJECT_CLASS(l->data);
+        printf("  %s\n", object_class_get_name(oc));
+    }
+    g_slist_free(list);
+}
+
+static bool user_creatable_print_type_properites(const char *type)
+{
+    ObjectClass *klass;
+    ObjectPropertyIterator iter;
+    ObjectProperty *prop;
+    GPtrArray *array;
+    int i;
+
+    klass = object_class_by_name(type);
+    if (!klass) {
+        return false;
+    }
+
+    array = g_ptr_array_new();
+    object_class_property_iter_init(&iter, klass);
+    while ((prop = object_property_iter_next(&iter))) {
+        if (!prop->set) {
+            continue;
+        }
+
+        g_ptr_array_add(array,
+                        object_property_help(prop->name, prop->type,
+                                             prop->defval, prop->description));
+    }
+    g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+    if (array->len > 0) {
+        printf("%s options:\n", type);
+    } else {
+        printf("There are no options for %s.\n", type);
+    }
+    for (i = 0; i < array->len; i++) {
+        printf("%s\n", (char *)array->pdata[i]);
+    }
+    g_ptr_array_set_free_func(array, g_free);
+    g_ptr_array_free(array, true);
+    return true;
+}
+
+bool user_creatable_print_help(const char *type, QemuOpts *opts)
+{
+    if (is_help_option(type)) {
+        user_creatable_print_types();
+        return true;
+    }
+
+    if (qemu_opt_has_help_opt(opts)) {
+        return user_creatable_print_type_properites(type);
+    }
+
+    return false;
+}
+
+void user_creatable_print_help_from_qdict(QDict *args)
+{
+    const char *type = qdict_get_try_str(args, "qom-type");
+
+    if (!type || !user_creatable_print_type_properites(type)) {
+        user_creatable_print_types();
+    }
+}
+
+bool user_creatable_del(const char *id, Error **errp)
+{
+    Object *container;
+    Object *obj;
+
+    container = object_get_objects_root();
+    obj = object_resolve_path_component(container, id);
+    if (!obj) {
+        error_setg(errp, "object '%s' not found", id);
+        return false;
+    }
+
+    if (!user_creatable_can_be_deleted(USER_CREATABLE(obj))) {
+        error_setg(errp, "object '%s' is in use, can not be deleted", id);
+        return false;
+    }
+
+    /*
+     * if object was defined on the command-line, remove its corresponding
+     * option group entry
+     */
+    qemu_opts_del(qemu_opts_find(qemu_find_opts_err("object", &error_abort),
+                                 id));
+
+    object_unparent(obj);
+    return true;
+}
+
+void user_creatable_cleanup(void)
+{
+    object_unparent(object_get_objects_root());
+}
+
+static void register_types(void)
+{
+    static const TypeInfo uc_interface_info = {
+        .name          = TYPE_USER_CREATABLE,
+        .parent        = TYPE_INTERFACE,
+        .class_size = sizeof(UserCreatableClass),
+    };
+
+    type_register_static(&uc_interface_info);
+}
+
+type_init(register_types)
diff --git a/qom/qom-hmp-cmds.c b/qom/qom-hmp-cmds.c
new file mode 100644
index 000000000..8861a109d
--- /dev/null
+++ b/qom/qom-hmp-cmds.c
@@ -0,0 +1,153 @@
+/*
+ * HMP commands related to QOM
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-qom.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qstring.h"
+#include "qom/object.h"
+
+void hmp_qom_list(Monitor *mon, const QDict *qdict)
+{
+    const char *path = qdict_get_try_str(qdict, "path");
+    ObjectPropertyInfoList *list;
+    Error *err = NULL;
+
+    if (path == NULL) {
+        monitor_printf(mon, "/\n");
+        return;
+    }
+
+    list = qmp_qom_list(path, &err);
+    if (err == NULL) {
+        ObjectPropertyInfoList *start = list;
+        while (list != NULL) {
+            ObjectPropertyInfo *value = list->value;
+
+            monitor_printf(mon, "%s (%s)\n",
+                           value->name, value->type);
+            list = list->next;
+        }
+        qapi_free_ObjectPropertyInfoList(start);
+    }
+    hmp_handle_error(mon, err);
+}
+
+void hmp_qom_set(Monitor *mon, const QDict *qdict)
+{
+    const bool json = qdict_get_try_bool(qdict, "json", false);
+    const char *path = qdict_get_str(qdict, "path");
+    const char *property = qdict_get_str(qdict, "property");
+    const char *value = qdict_get_str(qdict, "value");
+    Error *err = NULL;
+
+    if (!json) {
+        Object *obj = object_resolve_path(path, NULL);
+
+        if (!obj) {
+            error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", path);
+        } else {
+            object_property_parse(obj, property, value, &err);
+        }
+    } else {
+        QObject *obj = qobject_from_json(value, &err);
+
+        if (!err) {
+            qmp_qom_set(path, property, obj, &err);
+        }
+    }
+
+    hmp_handle_error(mon, err);
+}
+
+void hmp_qom_get(Monitor *mon, const QDict *qdict)
+{
+    const char *path = qdict_get_str(qdict, "path");
+    const char *property = qdict_get_str(qdict, "property");
+    Error *err = NULL;
+    QObject *obj = qmp_qom_get(path, property, &err);
+
+    if (err == NULL) {
+        QString *str = qobject_to_json_pretty(obj);
+        monitor_printf(mon, "%s\n", qstring_get_str(str));
+        qobject_unref(str);
+    }
+
+    qobject_unref(obj);
+    hmp_handle_error(mon, err);
+}
+
+typedef struct QOMCompositionState {
+    Monitor *mon;
+    int indent;
+} QOMCompositionState;
+
+static void print_qom_composition(Monitor *mon, Object *obj, int indent);
+
+static int qom_composition_compare(const void *a, const void *b)
+{
+    return g_strcmp0(object_get_canonical_path_component(*(Object **)a),
+                     object_get_canonical_path_component(*(Object **)b));
+}
+
+static int insert_qom_composition_child(Object *obj, void *opaque)
+{
+    g_array_append_val(opaque, obj);
+    return 0;
+}
+
+static void print_qom_composition(Monitor *mon, Object *obj, int indent)
+{
+    GArray *children = g_array_new(false, false, sizeof(Object *));
+    const char *name;
+    int i;
+
+    if (obj == object_get_root()) {
+        name = "";
+    } else {
+        name = object_get_canonical_path_component(obj);
+    }
+    monitor_printf(mon, "%*s/%s (%s)\n", indent, "", name,
+                   object_get_typename(obj));
+
+    object_child_foreach(obj, insert_qom_composition_child, children);
+    g_array_sort(children, qom_composition_compare);
+
+    for (i = 0; i < children->len; i++) {
+        print_qom_composition(mon, g_array_index(children, Object *, i),
+                              indent + 2);
+    }
+    g_array_free(children, TRUE);
+}
+
+void hmp_info_qom_tree(Monitor *mon, const QDict *dict)
+{
+    const char *path = qdict_get_try_str(dict, "path");
+    Object *obj;
+    bool ambiguous = false;
+
+    if (path) {
+        obj = object_resolve_path(path, &ambiguous);
+        if (!obj) {
+            monitor_printf(mon, "Path '%s' could not be resolved.\n", path);
+            return;
+        }
+        if (ambiguous) {
+            monitor_printf(mon, "Warning: Path '%s' is ambiguous.\n", path);
+            return;
+        }
+    } else {
+        obj = qdev_get_machine();
+    }
+    print_qom_composition(mon, obj, 0);
+}
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
new file mode 100644
index 000000000..cf130dc87
--- /dev/null
+++ b/qom/qom-qmp-cmds.c
@@ -0,0 +1,290 @@
+/*
+ * QMP commands related to QOM
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "block/qdict.h"
+#include "hw/qdev-core.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-qdev.h"
+#include "qapi/qapi-commands-qom.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/cutils.h"
+#include "qom/object_interfaces.h"
+#include "qom/qom-qobject.h"
+
+ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp)
+{
+    Object *obj;
+    bool ambiguous = false;
+    ObjectPropertyInfoList *props = NULL;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+
+    obj = object_resolve_path(path, &ambiguous);
+    if (obj == NULL) {
+        if (ambiguous) {
+            error_setg(errp, "Path '%s' is ambiguous", path);
+        } else {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", path);
+        }
+        return NULL;
+    }
+
+    object_property_iter_init(&iter, obj);
+    while ((prop = object_property_iter_next(&iter))) {
+        ObjectPropertyInfoList *entry = g_malloc0(sizeof(*entry));
+
+        entry->value = g_malloc0(sizeof(ObjectPropertyInfo));
+        entry->next = props;
+        props = entry;
+
+        entry->value->name = g_strdup(prop->name);
+        entry->value->type = g_strdup(prop->type);
+    }
+
+    return props;
+}
+
+void qmp_qom_set(const char *path, const char *property, QObject *value,
+                 Error **errp)
+{
+    Object *obj;
+
+    obj = object_resolve_path(path, NULL);
+    if (!obj) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", path);
+        return;
+    }
+
+    object_property_set_qobject(obj, property, value, errp);
+}
+
+QObject *qmp_qom_get(const char *path, const char *property, Error **errp)
+{
+    Object *obj;
+
+    obj = object_resolve_path(path, NULL);
+    if (!obj) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", path);
+        return NULL;
+    }
+
+    return object_property_get_qobject(obj, property, errp);
+}
+
+static void qom_list_types_tramp(ObjectClass *klass, void *data)
+{
+    ObjectTypeInfoList *e, **pret = data;
+    ObjectTypeInfo *info;
+    ObjectClass *parent = object_class_get_parent(klass);
+
+    info = g_malloc0(sizeof(*info));
+    info->name = g_strdup(object_class_get_name(klass));
+    info->has_abstract = info->abstract = object_class_is_abstract(klass);
+    if (parent) {
+        info->has_parent = true;
+        info->parent = g_strdup(object_class_get_name(parent));
+    }
+
+    e = g_malloc0(sizeof(*e));
+    e->value = info;
+    e->next = *pret;
+    *pret = e;
+}
+
+ObjectTypeInfoList *qmp_qom_list_types(bool has_implements,
+                                       const char *implements,
+                                       bool has_abstract,
+                                       bool abstract,
+                                       Error **errp)
+{
+    ObjectTypeInfoList *ret = NULL;
+
+    module_load_qom_all();
+    object_class_foreach(qom_list_types_tramp, implements, abstract, &ret);
+
+    return ret;
+}
+
+ObjectPropertyInfoList *qmp_device_list_properties(const char *typename,
+                                                Error **errp)
+{
+    ObjectClass *klass;
+    Object *obj;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+    ObjectPropertyInfoList *prop_list = NULL;
+
+#ifdef CONFIG_MODULES
+    if (!strcmp(typename, "virtio-gpu-pci") || !strcmp(typename, "virtio-gpu-ccw")) {
+        if (module_load_check("virtio-gpu-device")) {
+            ObjectPropertyInfo *info;
+            info = g_new0(ObjectPropertyInfo, 1);
+            info->name = g_strdup("dummy");
+            info->type = g_strdup("dummy");
+            info->has_description = false;
+            info->description = NULL;
+            info->default_value = 0;
+            info->has_default_value = 0;
+            QAPI_LIST_PREPEND(prop_list, info);
+            return prop_list;
+        }
+    }
+#endif
+
+    klass = module_object_class_by_name(typename);
+    if (klass == NULL) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", typename);
+        return NULL;
+    }
+
+    klass = object_class_dynamic_cast(klass, TYPE_DEVICE);
+    if (klass == NULL) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename", TYPE_DEVICE);
+        return NULL;
+    }
+
+    if (object_class_is_abstract(klass)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename",
+                   "non-abstract device type");
+        return NULL;
+    }
+
+    obj = object_new(typename);
+
+    object_property_iter_init(&iter, obj);
+    while ((prop = object_property_iter_next(&iter))) {
+        ObjectPropertyInfo *info;
+        ObjectPropertyInfoList *entry;
+
+        /* Skip Object and DeviceState properties */
+        if (strcmp(prop->name, "type") == 0 ||
+            strcmp(prop->name, "realized") == 0 ||
+            strcmp(prop->name, "hotpluggable") == 0 ||
+            strcmp(prop->name, "hotplugged") == 0 ||
+            strcmp(prop->name, "parent_bus") == 0) {
+            continue;
+        }
+
+        /* Skip legacy properties since they are just string versions of
+         * properties that we already list.
+         */
+        if (strstart(prop->name, "legacy-", NULL)) {
+            continue;
+        }
+
+        info = g_new0(ObjectPropertyInfo, 1);
+        info->name = g_strdup(prop->name);
+        info->type = g_strdup(prop->type);
+        info->has_description = !!prop->description;
+        info->description = g_strdup(prop->description);
+        info->default_value = qobject_ref(prop->defval);
+        info->has_default_value = !!info->default_value;
+
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = info;
+        entry->next = prop_list;
+        prop_list = entry;
+    }
+
+    object_unref(obj);
+
+    return prop_list;
+}
+
+ObjectPropertyInfoList *qmp_qom_list_properties(const char *typename,
+                                             Error **errp)
+{
+    ObjectClass *klass;
+    Object *obj = NULL;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+    ObjectPropertyInfoList *prop_list = NULL;
+
+    klass = object_class_by_name(typename);
+    if (klass == NULL) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Class '%s' not found", typename);
+        return NULL;
+    }
+
+    klass = object_class_dynamic_cast(klass, TYPE_OBJECT);
+    if (klass == NULL) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename", TYPE_OBJECT);
+        return NULL;
+    }
+
+    if (object_class_is_abstract(klass)) {
+        object_class_property_iter_init(&iter, klass);
+    } else {
+        obj = object_new(typename);
+        object_property_iter_init(&iter, obj);
+    }
+    while ((prop = object_property_iter_next(&iter))) {
+        ObjectPropertyInfo *info;
+        ObjectPropertyInfoList *entry;
+
+        info = g_malloc0(sizeof(*info));
+        info->name = g_strdup(prop->name);
+        info->type = g_strdup(prop->type);
+        info->has_description = !!prop->description;
+        info->description = g_strdup(prop->description);
+
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = info;
+        entry->next = prop_list;
+        prop_list = entry;
+    }
+
+    object_unref(obj);
+
+    return prop_list;
+}
+
+void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
+{
+    QObject *props;
+    QDict *pdict;
+
+    props = qdict_get(qdict, "props");
+    if (props) {
+        pdict = qobject_to(QDict, props);
+        if (!pdict) {
+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
+            return;
+        }
+        qobject_ref(pdict);
+        qdict_del(qdict, "props");
+        qdict_join(qdict, pdict, false);
+        if (qdict_size(pdict) != 0) {
+            error_setg(errp, "Option in 'props' conflicts with top level");
+            qobject_unref(pdict);
+            return;
+        }
+        qobject_unref(pdict);
+    }
+
+    user_creatable_add_dict(qdict, false, errp);
+}
+
+void qmp_object_del(const char *id, Error **errp)
+{
+    user_creatable_del(id, errp);
+}
diff --git a/qom/qom-qobject.c b/qom/qom-qobject.c
new file mode 100644
index 000000000..21ce22de9
--- /dev/null
+++ b/qom/qom-qobject.c
@@ -0,0 +1,45 @@
+/*
+ * QEMU Object Model - QObject wrappers
+ *
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "qom/qom-qobject.h"
+#include "qapi/visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
+
+bool object_property_set_qobject(Object *obj,
+                                 const char *name, QObject *value,
+                                 Error **errp)
+{
+    Visitor *v;
+    bool ok;
+
+    v = qobject_input_visitor_new(value);
+    ok = object_property_set(obj, name, v, errp);
+    visit_free(v);
+    return ok;
+}
+
+QObject *object_property_get_qobject(Object *obj, const char *name,
+                                     Error **errp)
+{
+    QObject *ret = NULL;
+    Visitor *v;
+
+    v = qobject_output_visitor_new(&ret);
+    if (object_property_get(obj, name, v, errp)) {
+        visit_complete(v, &ret);
+    }
+    visit_free(v);
+    return ret;
+}
diff --git a/qom/trace-events b/qom/trace-events
new file mode 100644
index 000000000..945205bd1
--- /dev/null
+++ b/qom/trace-events
@@ -0,0 +1,5 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# object.c
+object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
+object_class_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
diff --git a/qom/trace.h b/qom/trace.h
new file mode 100644
index 000000000..f2895e699
--- /dev/null
+++ b/qom/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-qom.h"
diff --git a/scripts/analyse-9p-simpletrace.py b/scripts/analyse-9p-simpletrace.py
new file mode 100755
index 000000000..7dfcb6ba2
--- /dev/null
+++ b/scripts/analyse-9p-simpletrace.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+# Pretty print 9p simpletrace log
+# Usage: ./analyse-9p-simpletrace  
+#
+# Author: Harsh Prateek Bora
+import os
+import simpletrace
+
+symbol_9p = {
+    6   : 'TLERROR',
+    7   : 'RLERROR',
+    8   : 'TSTATFS',
+    9   : 'RSTATFS',
+    12  : 'TLOPEN',
+    13  : 'RLOPEN',
+    14  : 'TLCREATE',
+    15  : 'RLCREATE',
+    16  : 'TSYMLINK',
+    17  : 'RSYMLINK',
+    18  : 'TMKNOD',
+    19  : 'RMKNOD',
+    20  : 'TRENAME',
+    21  : 'RRENAME',
+    22  : 'TREADLINK',
+    23  : 'RREADLINK',
+    24  : 'TGETATTR',
+    25  : 'RGETATTR',
+    26  : 'TSETATTR',
+    27  : 'RSETATTR',
+    30  : 'TXATTRWALK',
+    31  : 'RXATTRWALK',
+    32  : 'TXATTRCREATE',
+    33  : 'RXATTRCREATE',
+    40  : 'TREADDIR',
+    41  : 'RREADDIR',
+    50  : 'TFSYNC',
+    51  : 'RFSYNC',
+    52  : 'TLOCK',
+    53  : 'RLOCK',
+    54  : 'TGETLOCK',
+    55  : 'RGETLOCK',
+    70  : 'TLINK',
+    71  : 'RLINK',
+    72  : 'TMKDIR',
+    73  : 'RMKDIR',
+    74  : 'TRENAMEAT',
+    75  : 'RRENAMEAT',
+    76  : 'TUNLINKAT',
+    77  : 'RUNLINKAT',
+    100 : 'TVERSION',
+    101 : 'RVERSION',
+    102 : 'TAUTH',
+    103 : 'RAUTH',
+    104 : 'TATTACH',
+    105 : 'RATTACH',
+    106 : 'TERROR',
+    107 : 'RERROR',
+    108 : 'TFLUSH',
+    109 : 'RFLUSH',
+    110 : 'TWALK',
+    111 : 'RWALK',
+    112 : 'TOPEN',
+    113 : 'ROPEN',
+    114 : 'TCREATE',
+    115 : 'RCREATE',
+    116 : 'TREAD',
+    117 : 'RREAD',
+    118 : 'TWRITE',
+    119 : 'RWRITE',
+    120 : 'TCLUNK',
+    121 : 'RCLUNK',
+    122 : 'TREMOVE',
+    123 : 'RREMOVE',
+    124 : 'TSTAT',
+    125 : 'RSTAT',
+    126 : 'TWSTAT',
+    127 : 'RWSTAT'
+}
+
+class VirtFSRequestTracker(simpletrace.Analyzer):
+        def begin(self):
+                print("Pretty printing 9p simpletrace log ...")
+
+        def v9fs_rerror(self, tag, id, err):
+                print("RERROR (tag =", tag, ", id =", symbol_9p[id], ", err = \"", os.strerror(err), "\")")
+
+        def v9fs_version(self, tag, id, msize, version):
+                print("TVERSION (tag =", tag, ", msize =", msize, ", version =", version, ")")
+
+        def v9fs_version_return(self, tag, id, msize, version):
+                print("RVERSION (tag =", tag, ", msize =", msize, ", version =", version, ")")
+
+        def v9fs_attach(self, tag, id, fid, afid, uname, aname):
+                print("TATTACH (tag =", tag, ", fid =", fid, ", afid =", afid, ", uname =", uname, ", aname =", aname, ")")
+
+        def v9fs_attach_return(self, tag, id, type, version, path):
+                print("RATTACH (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "})")
+
+        def v9fs_stat(self, tag, id, fid):
+                print("TSTAT (tag =", tag, ", fid =", fid, ")")
+
+        def v9fs_stat_return(self, tag, id, mode, atime, mtime, length):
+                print("RSTAT (tag =", tag, ", mode =", mode, ", atime =", atime, ", mtime =", mtime, ", length =", length, ")")
+
+        def v9fs_getattr(self, tag, id, fid, request_mask):
+                print("TGETATTR (tag =", tag, ", fid =", fid, ", request_mask =", hex(request_mask), ")")
+
+        def v9fs_getattr_return(self, tag, id, result_mask, mode, uid, gid):
+                print("RGETATTR (tag =", tag, ", result_mask =", hex(result_mask), ", mode =", oct(mode), ", uid =", uid, ", gid =", gid, ")")
+
+        def v9fs_walk(self, tag, id, fid, newfid, nwnames):
+                print("TWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", nwnames =", nwnames, ")")
+
+        def v9fs_walk_return(self, tag, id, nwnames, qids):
+                print("RWALK (tag =", tag, ", nwnames =", nwnames, ", qids =", hex(qids), ")")
+
+        def v9fs_open(self, tag, id, fid, mode):
+                print("TOPEN (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ")")
+
+        def v9fs_open_return(self, tag, id, type, version, path, iounit):
+                print("ROPEN (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")")
+
+        def v9fs_lcreate(self, tag, id, dfid, flags, mode, gid):
+                print("TLCREATE (tag =", tag, ", dfid =", dfid, ", flags =", oct(flags), ", mode =", oct(mode), ", gid =", gid, ")")
+
+        def v9fs_lcreate_return(self, tag, id, type, version, path, iounit):
+                print("RLCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")")
+
+        def v9fs_fsync(self, tag, id, fid, datasync):
+                print("TFSYNC (tag =", tag, ", fid =", fid, ", datasync =", datasync, ")")
+
+        def v9fs_clunk(self, tag, id, fid):
+                print("TCLUNK (tag =", tag, ", fid =", fid, ")")
+
+        def v9fs_read(self, tag, id, fid, off, max_count):
+                print("TREAD (tag =", tag, ", fid =", fid, ", off =", off, ", max_count =", max_count, ")")
+
+        def v9fs_read_return(self, tag, id, count, err):
+                print("RREAD (tag =", tag, ", count =", count, ", err =", err, ")")
+
+        def v9fs_readdir(self, tag, id, fid, offset, max_count):
+                print("TREADDIR (tag =", tag, ", fid =", fid, ", offset =", offset, ", max_count =", max_count, ")")
+
+        def v9fs_readdir_return(self, tag, id, count, retval):
+                print("RREADDIR (tag =", tag, ", count =", count, ", retval =", retval, ")")
+
+        def v9fs_write(self, tag, id, fid, off, count, cnt):
+                print("TWRITE (tag =", tag, ", fid =", fid, ", off =", off, ", count =", count, ", cnt =", cnt, ")")
+
+        def v9fs_write_return(self, tag, id, total, err):
+                print("RWRITE (tag =", tag, ", total =", total, ", err =", err, ")")
+
+        def v9fs_create(self, tag, id, fid, name, perm, mode):
+                print("TCREATE (tag =", tag, ", fid =", fid, ", perm =", oct(perm), ", name =", name, ", mode =", oct(mode), ")")
+
+        def v9fs_create_return(self, tag, id, type, version, path, iounit):
+                print("RCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")")
+
+        def v9fs_symlink(self, tag, id, fid, name, symname, gid):
+                print("TSYMLINK (tag =", tag, ", fid =", fid, ", name =", name, ", symname =", symname, ", gid =", gid, ")")
+
+        def v9fs_symlink_return(self, tag, id, type, version, path):
+                print("RSYMLINK (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "})")
+
+        def v9fs_flush(self, tag, id, flush_tag):
+                print("TFLUSH (tag =", tag, ", flush_tag =", flush_tag, ")")
+
+        def v9fs_link(self, tag, id, dfid, oldfid, name):
+                print("TLINK (tag =", tag, ", dfid =", dfid, ", oldfid =", oldfid, ", name =", name, ")")
+
+        def v9fs_remove(self, tag, id, fid):
+                print("TREMOVE (tag =", tag, ", fid =", fid, ")")
+
+        def v9fs_wstat(self, tag, id, fid, mode, atime, mtime):
+                print("TWSTAT (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", atime =", atime, "mtime =", mtime, ")")
+
+        def v9fs_mknod(self, tag, id, fid, mode, major, minor):
+                print("TMKNOD (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", major =", major, ", minor =", minor, ")")
+
+        def v9fs_lock(self, tag, id, fid, type, start, length):
+                print("TLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")")
+
+        def v9fs_lock_return(self, tag, id, status):
+                print("RLOCK (tag =", tag, ", status =", status, ")")
+
+        def v9fs_getlock(self, tag, id, fid, type, start, length):
+                print("TGETLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")")
+
+        def v9fs_getlock_return(self, tag, id, type, start, length, proc_id):
+                print("RGETLOCK (tag =", tag, "type =", type, ", start =", start, ", length =", length, ", proc_id =", proc_id,  ")")
+
+        def v9fs_mkdir(self, tag, id, fid, name, mode, gid):
+                print("TMKDIR (tag =", tag, ", fid =", fid, ", name =", name, ", mode =", mode, ", gid =", gid, ")")
+
+        def v9fs_mkdir_return(self, tag, id, type, version, path, err):
+                print("RMKDIR (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, err =", err, ")")
+
+        def v9fs_xattrwalk(self, tag, id, fid, newfid, name):
+                print("TXATTRWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", xattr name =", name, ")")
+
+        def v9fs_xattrwalk_return(self, tag, id, size):
+                print("RXATTRWALK (tag =", tag, ", xattrsize  =", size, ")")
+
+        def v9fs_xattrcreate(self, tag, id, fid, name, size, flags):
+                print("TXATTRCREATE (tag =", tag, ", fid =", fid, ", name =", name, ", xattrsize =", size, ", flags =", flags, ")")
+
+        def v9fs_readlink(self, tag, id, fid):
+                print("TREADLINK (tag =", tag, ", fid =", fid, ")")
+
+        def v9fs_readlink_return(self, tag, id, target):
+                print("RREADLINK (tag =", tag, ", target =", target, ")")
+
+simpletrace.run(VirtFSRequestTracker())
diff --git a/scripts/analyse-locks-simpletrace.py b/scripts/analyse-locks-simpletrace.py
new file mode 100755
index 000000000..63c11f4fc
--- /dev/null
+++ b/scripts/analyse-locks-simpletrace.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Analyse lock events and compute statistics
+#
+# Author: Alex Bennée 
+#
+
+import simpletrace
+import argparse
+import numpy as np
+
+class MutexAnalyser(simpletrace.Analyzer):
+    "A simpletrace Analyser for checking locks."
+
+    def __init__(self):
+        self.locks = 0
+        self.locked = 0
+        self.unlocks = 0
+        self.mutex_records = {}
+
+    def _get_mutex(self, mutex):
+        if not mutex in self.mutex_records:
+            self.mutex_records[mutex] = {"locks": 0,
+                                         "lock_time": 0,
+                                         "acquire_times": [],
+                                         "locked": 0,
+                                         "locked_time": 0,
+                                         "held_times": [],
+                                         "unlocked": 0}
+
+        return self.mutex_records[mutex]
+
+    def qemu_mutex_lock(self, timestamp, mutex, filename, line):
+        self.locks += 1
+        rec = self._get_mutex(mutex)
+        rec["locks"] += 1
+        rec["lock_time"] = timestamp[0]
+        rec["lock_loc"] = (filename, line)
+
+    def qemu_mutex_locked(self, timestamp, mutex, filename, line):
+        self.locked += 1
+        rec = self._get_mutex(mutex)
+        rec["locked"] += 1
+        rec["locked_time"] = timestamp[0]
+        acquire_time = rec["locked_time"] - rec["lock_time"]
+        rec["locked_loc"] = (filename, line)
+        rec["acquire_times"].append(acquire_time)
+
+    def qemu_mutex_unlock(self, timestamp, mutex, filename, line):
+        self.unlocks += 1
+        rec = self._get_mutex(mutex)
+        rec["unlocked"] += 1
+        held_time = timestamp[0] - rec["locked_time"]
+        rec["held_times"].append(held_time)
+        rec["unlock_loc"] = (filename, line)
+
+
+def get_args():
+    "Grab options"
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output", "-o", type=str, help="Render plot to file")
+    parser.add_argument("events", type=str, help='trace file read from')
+    parser.add_argument("tracefile", type=str, help='trace file read from')
+    return parser.parse_args()
+
+if __name__ == '__main__':
+    args = get_args()
+
+    # Gather data from the trace
+    analyser = MutexAnalyser()
+    simpletrace.process(args.events, args.tracefile, analyser)
+
+    print ("Total locks: %d, locked: %d, unlocked: %d" %
+           (analyser.locks, analyser.locked, analyser.unlocks))
+
+    # Now dump the individual lock stats
+    for key, val in sorted(analyser.mutex_records.iteritems(),
+                           key=lambda k_v: k_v[1]["locks"]):
+        print ("Lock: %#x locks: %d, locked: %d, unlocked: %d" %
+               (key, val["locks"], val["locked"], val["unlocked"]))
+
+        acquire_times = np.array(val["acquire_times"])
+        if len(acquire_times) > 0:
+            print ("  Acquire Time: min:%d median:%d avg:%.2f max:%d" %
+                   (acquire_times.min(), np.median(acquire_times),
+                    acquire_times.mean(), acquire_times.max()))
+
+        held_times = np.array(val["held_times"])
+        if len(held_times) > 0:
+            print ("  Held Time: min:%d median:%d avg:%.2f max:%d" %
+                   (held_times.min(), np.median(held_times),
+                    held_times.mean(), held_times.max()))
+
+        # Check if any locks still held
+        if val["locks"] > val["locked"]:
+            print ("  LOCK HELD (%s:%s)" % (val["locked_loc"]))
+            print ("  BLOCKED   (%s:%s)" % (val["lock_loc"]))
diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions
new file mode 100644
index 000000000..14806e18c
--- /dev/null
+++ b/scripts/analyze-inclusions
@@ -0,0 +1,102 @@
+#! /bin/sh
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# Author: Paolo Bonzini 
+#
+# Print statistics about header file inclusions.
+#
+# The script has two modes of execution:
+#
+# 1) if invoked with a path on the command line (possibly
+# preceded by a "--" argument), it will run the analysis on
+# an existing build directory
+#
+# 2) otherwise, it will configure and builds QEMU itself in a
+# "+build" subdirectory which is left around when the script
+# exits.  In this case the command line is passed directly to
+# "make" (typically used for a "-j" argument suitable for your
+# system).
+#
+# Inspired by a post by Markus Armbruster.
+
+case "x$1" in
+x--)
+  shift
+  cd "$1" || exit $?
+  ;;
+x-* | x)
+  mkdir -p +build
+  cd +build
+  test -f Makefile && make distclean
+  ../configure
+  make "$@"
+  ;;
+*)
+  cd "$1" || exit $?
+esac
+
+QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak)
+QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \
+    sed 's/$(SRC_PATH)/../g' )
+CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak)
+
+grep_include() {
+  find . -name "*.d" -exec grep -l "$@" {} + | wc -l
+}
+
+echo Found $(find . -name "*.d" | wc -l) object files
+echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
+echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
+echo $(grep_include 'target/[a-z0-9]*/cpu\.h') files include cpu.h
+echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
+echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h
+echo $(grep_include -F 'qapi/error.h') files include qapi/error.h
+echo $(grep_include -F 'qom/object.h') files include qom/object.h
+echo $(grep_include -F 'block/aio.h') files include block/aio.h
+echo $(grep_include -F 'exec/memory.h') files include exec/memory.h
+echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h
+echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h
+echo
+
+awk1='
+    /^# / { file = $3;next }
+    NR>1 { bytes[file]+=length()+1; lines[file]++ }
+    END { for(i in lines) print i,lines[i],bytes[i] }'
+
+awk2='
+    {tot_l+=$2;tot_b+=$3;tot_f++}
+    /\/usr.*\/glib/ {glib_l+=$2;glib_b+=$3;glib_f++;next}
+    /\/usr/ {sys_l+=$2;sys_b+=$3;sys_f++;next}
+    {qemu_l+=$2;qemu_b+=$3;qemu_f++;next}
+    END {
+      printf "%s\t %s\t %s\t %s\n", "lines", "bytes", "files", "source"
+      printf "%s\t %s\t %s\t %s\n", qemu_l, qemu_b, qemu_f, "QEMU"
+      printf "%s\t %s\t %s\t %s\n", sys_l, sys_b, sys_f, "system"
+      printf "%s\t %s\t %s\t %s\n", glib_l, glib_b, glib_f, "glib"
+      printf "%s\t %s\t %s\t %s\n", tot_l, tot_b, tot_f, "total"
+    }'
+
+analyze() {
+  cc $QEMU_CFLAGS $QEMU_INCLUDES $CFLAGS  -E -o - "$@" | \
+    awk "$awk1" | awk "$awk2"
+  echo
+}
+
+echo osdep.h:
+analyze ../include/qemu/osdep.h
+
+echo qemu-common.h:
+analyze  -include ../include/qemu/osdep.h ../include/qemu-common.h
+
+echo hw/hw.h:
+analyze -include ../include/qemu/osdep.h ../include/hw/hw.h
+
+echo trace/generated-tracers.h:
+analyze -include ../include/qemu/osdep.h trace/generated-tracers.h
+
+echo target/i386/cpu.h:
+analyze -DNEED_CPU_H -I../target/i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../target/i386/cpu.h
+
+echo hw/hw.h + NEED_CPU_H:
+analyze -DNEED_CPU_H -I../target/i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../include/hw/hw.h
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
new file mode 100755
index 000000000..502ae1411
--- /dev/null
+++ b/scripts/analyze-migration.py
@@ -0,0 +1,613 @@
+#!/usr/bin/python3
+#
+#  Migration Stream Analyzer
+#
+#  Copyright (c) 2015 Alexander Graf 
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+
+import json
+import os
+import argparse
+import collections
+import struct
+import sys
+
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError:
+        pass
+
+
+class MigrationFile(object):
+    def __init__(self, filename):
+        self.filename = filename
+        self.file = open(self.filename, "rb")
+
+    def read64(self):
+        return int.from_bytes(self.file.read(8), byteorder='big', signed=True)
+
+    def read32(self):
+        return int.from_bytes(self.file.read(4), byteorder='big', signed=True)
+
+    def read16(self):
+        return int.from_bytes(self.file.read(2), byteorder='big', signed=True)
+
+    def read8(self):
+        return int.from_bytes(self.file.read(1), byteorder='big', signed=True)
+
+    def readstr(self, len = None):
+        return self.readvar(len).decode('utf-8')
+
+    def readvar(self, size = None):
+        if size is None:
+            size = self.read8()
+        if size == 0:
+            return ""
+        value = self.file.read(size)
+        if len(value) != size:
+            raise Exception("Unexpected end of %s at 0x%x" % (self.filename, self.file.tell()))
+        return value
+
+    def tell(self):
+        return self.file.tell()
+
+    # The VMSD description is at the end of the file, after EOF. Look for
+    # the last NULL byte, then for the beginning brace of JSON.
+    def read_migration_debug_json(self):
+        QEMU_VM_VMDESCRIPTION = 0x06
+
+        # Remember the offset in the file when we started
+        entrypos = self.file.tell()
+
+        # Read the last 10MB
+        self.file.seek(0, os.SEEK_END)
+        endpos = self.file.tell()
+        self.file.seek(max(-endpos, -10 * 1024 * 1024), os.SEEK_END)
+        datapos = self.file.tell()
+        data = self.file.read()
+        # The full file read closed the file as well, reopen it
+        self.file = open(self.filename, "rb")
+
+        # Find the last NULL byte, then the first brace after that. This should
+        # be the beginning of our JSON data.
+        nulpos = data.rfind(b'\0')
+        jsonpos = data.find(b'{', nulpos)
+
+        # Check backwards from there and see whether we guessed right
+        self.file.seek(datapos + jsonpos - 5, 0)
+        if self.read8() != QEMU_VM_VMDESCRIPTION:
+            raise Exception("No Debug Migration device found")
+
+        jsonlen = self.read32()
+
+        # Seek back to where we were at the beginning
+        self.file.seek(entrypos, 0)
+
+        # explicit decode() needed for Python 3.5 compatibility
+        return data[jsonpos:jsonpos + jsonlen].decode("utf-8")
+
+    def close(self):
+        self.file.close()
+
+class RamSection(object):
+    RAM_SAVE_FLAG_COMPRESS = 0x02
+    RAM_SAVE_FLAG_MEM_SIZE = 0x04
+    RAM_SAVE_FLAG_PAGE     = 0x08
+    RAM_SAVE_FLAG_EOS      = 0x10
+    RAM_SAVE_FLAG_CONTINUE = 0x20
+    RAM_SAVE_FLAG_XBZRLE   = 0x40
+    RAM_SAVE_FLAG_HOOK     = 0x80
+
+    def __init__(self, file, version_id, ramargs, section_key):
+        if version_id != 4:
+            raise Exception("Unknown RAM version %d" % version_id)
+
+        self.file = file
+        self.section_key = section_key
+        self.TARGET_PAGE_SIZE = ramargs['page_size']
+        self.dump_memory = ramargs['dump_memory']
+        self.write_memory = ramargs['write_memory']
+        self.sizeinfo = collections.OrderedDict()
+        self.data = collections.OrderedDict()
+        self.data['section sizes'] = self.sizeinfo
+        self.name = ''
+        if self.write_memory:
+            self.files = { }
+        if self.dump_memory:
+            self.memory = collections.OrderedDict()
+            self.data['memory'] = self.memory
+
+    def __repr__(self):
+        return self.data.__repr__()
+
+    def __str__(self):
+        return self.data.__str__()
+
+    def getDict(self):
+        return self.data
+
+    def read(self):
+        # Read all RAM sections
+        while True:
+            addr = self.file.read64()
+            flags = addr & (self.TARGET_PAGE_SIZE - 1)
+            addr &= ~(self.TARGET_PAGE_SIZE - 1)
+
+            if flags & self.RAM_SAVE_FLAG_MEM_SIZE:
+                while True:
+                    namelen = self.file.read8()
+                    # We assume that no RAM chunk is big enough to ever
+                    # hit the first byte of the address, so when we see
+                    # a zero here we know it has to be an address, not the
+                    # length of the next block.
+                    if namelen == 0:
+                        self.file.file.seek(-1, 1)
+                        break
+                    self.name = self.file.readstr(len = namelen)
+                    len = self.file.read64()
+                    self.sizeinfo[self.name] = '0x%016x' % len
+                    if self.write_memory:
+                        print(self.name)
+                        mkdir_p('./' + os.path.dirname(self.name))
+                        f = open('./' + self.name, "wb")
+                        f.truncate(0)
+                        f.truncate(len)
+                        self.files[self.name] = f
+                flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE
+
+            if flags & self.RAM_SAVE_FLAG_COMPRESS:
+                if flags & self.RAM_SAVE_FLAG_CONTINUE:
+                    flags &= ~self.RAM_SAVE_FLAG_CONTINUE
+                else:
+                    self.name = self.file.readstr()
+                fill_char = self.file.read8()
+                # The page in question is filled with fill_char now
+                if self.write_memory and fill_char != 0:
+                    self.files[self.name].seek(addr, os.SEEK_SET)
+                    self.files[self.name].write(chr(fill_char) * self.TARGET_PAGE_SIZE)
+                if self.dump_memory:
+                    self.memory['%s (0x%016x)' % (self.name, addr)] = 'Filled with 0x%02x' % fill_char
+                flags &= ~self.RAM_SAVE_FLAG_COMPRESS
+            elif flags & self.RAM_SAVE_FLAG_PAGE:
+                if flags & self.RAM_SAVE_FLAG_CONTINUE:
+                    flags &= ~self.RAM_SAVE_FLAG_CONTINUE
+                else:
+                    self.name = self.file.readstr()
+
+                if self.write_memory or self.dump_memory:
+                    data = self.file.readvar(size = self.TARGET_PAGE_SIZE)
+                else: # Just skip RAM data
+                    self.file.file.seek(self.TARGET_PAGE_SIZE, 1)
+
+                if self.write_memory:
+                    self.files[self.name].seek(addr, os.SEEK_SET)
+                    self.files[self.name].write(data)
+                if self.dump_memory:
+                    hexdata = " ".join("{0:02x}".format(ord(c)) for c in data)
+                    self.memory['%s (0x%016x)' % (self.name, addr)] = hexdata
+
+                flags &= ~self.RAM_SAVE_FLAG_PAGE
+            elif flags & self.RAM_SAVE_FLAG_XBZRLE:
+                raise Exception("XBZRLE RAM compression is not supported yet")
+            elif flags & self.RAM_SAVE_FLAG_HOOK:
+                raise Exception("RAM hooks don't make sense with files")
+
+            # End of RAM section
+            if flags & self.RAM_SAVE_FLAG_EOS:
+                break
+
+            if flags != 0:
+                raise Exception("Unknown RAM flags: %x" % flags)
+
+    def __del__(self):
+        if self.write_memory:
+            for key in self.files:
+                self.files[key].close()
+
+
+class HTABSection(object):
+    HASH_PTE_SIZE_64       = 16
+
+    def __init__(self, file, version_id, device, section_key):
+        if version_id != 1:
+            raise Exception("Unknown HTAB version %d" % version_id)
+
+        self.file = file
+        self.section_key = section_key
+
+    def read(self):
+
+        header = self.file.read32()
+
+        if (header == -1):
+            # "no HPT" encoding
+            return
+
+        if (header > 0):
+            # First section, just the hash shift
+            return
+
+        # Read until end marker
+        while True:
+            index = self.file.read32()
+            n_valid = self.file.read16()
+            n_invalid = self.file.read16()
+
+            if index == 0 and n_valid == 0 and n_invalid == 0:
+                break
+
+            self.file.readvar(n_valid * self.HASH_PTE_SIZE_64)
+
+    def getDict(self):
+        return ""
+
+
+class ConfigurationSection(object):
+    def __init__(self, file):
+        self.file = file
+
+    def read(self):
+        name_len = self.file.read32()
+        name = self.file.readstr(len = name_len)
+
+class VMSDFieldGeneric(object):
+    def __init__(self, desc, file):
+        self.file = file
+        self.desc = desc
+        self.data = ""
+
+    def __repr__(self):
+        return str(self.__str__())
+
+    def __str__(self):
+        return " ".join("{0:02x}".format(c) for c in self.data)
+
+    def getDict(self):
+        return self.__str__()
+
+    def read(self):
+        size = int(self.desc['size'])
+        self.data = self.file.readvar(size)
+        return self.data
+
+class VMSDFieldInt(VMSDFieldGeneric):
+    def __init__(self, desc, file):
+        super(VMSDFieldInt, self).__init__(desc, file)
+        self.size = int(desc['size'])
+        self.format = '0x%%0%dx' % (self.size * 2)
+        self.sdtype = '>i%d' % self.size
+        self.udtype = '>u%d' % self.size
+
+    def __repr__(self):
+        if self.data < 0:
+            return ('%s (%d)' % ((self.format % self.udata), self.data))
+        else:
+            return self.format % self.data
+
+    def __str__(self):
+        return self.__repr__()
+
+    def getDict(self):
+        return self.__str__()
+
+    def read(self):
+        super(VMSDFieldInt, self).read()
+        self.sdata = int.from_bytes(self.data, byteorder='big', signed=True)
+        self.udata = int.from_bytes(self.data, byteorder='big', signed=False)
+        self.data = self.sdata
+        return self.data
+
+class VMSDFieldUInt(VMSDFieldInt):
+    def __init__(self, desc, file):
+        super(VMSDFieldUInt, self).__init__(desc, file)
+
+    def read(self):
+        super(VMSDFieldUInt, self).read()
+        self.data = self.udata
+        return self.data
+
+class VMSDFieldIntLE(VMSDFieldInt):
+    def __init__(self, desc, file):
+        super(VMSDFieldIntLE, self).__init__(desc, file)
+        self.dtype = '
+#
+# Archive source tree, including submodules. This is created for test code to
+# export the source files, in order to be built in a different environment,
+# such as in a docker instance or VM.
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+
+error() {
+    printf %s\\n "$*" >&2
+    exit 1
+}
+
+if test $# -lt 1; then
+    error "Usage: $0 "
+fi
+
+tar_file=$(realpath "$1")
+sub_tdir=$(mktemp -d "${tar_file%.tar}.sub.XXXXXXXX")
+sub_file="${sub_tdir}/submodule.tar"
+
+# We want a predictable list of submodules for builds, that is
+# independent of what the developer currently has initialized
+# in their checkout, because the build environment is completely
+# different to the host OS.
+submodules="dtc slirp meson ui/keycodemapdb"
+submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
+sub_deinit=""
+
+function cleanup() {
+    local status=$?
+    rm -rf "$sub_tdir"
+    if test "$sub_deinit" != ""; then
+        git submodule deinit $sub_deinit
+    fi
+    exit $status
+}
+trap "cleanup" 0 1 2 3 15
+
+function tree_ish() {
+    local retval='HEAD'
+    if ! git diff-index --quiet --ignore-submodules=all HEAD -- &>/dev/null
+    then
+        retval=$(git stash create)
+    fi
+    echo "$retval"
+}
+
+git archive --format tar "$(tree_ish)" > "$tar_file"
+test $? -ne 0 && error "failed to archive qemu"
+for sm in $submodules; do
+    status="$(git submodule status "$sm")"
+    smhash="${status#[ +-]}"
+    smhash="${smhash%% *}"
+    case "$status" in
+        -*)
+            sub_deinit="$sub_deinit $sm"
+            git submodule update --init "$sm"
+            test $? -ne 0 && error "failed to update submodule $sm"
+            ;;
+        +*)
+            echo "WARNING: submodule $sm is out of sync"
+            ;;
+    esac
+    (cd $sm; git archive --format tar --prefix "$sm/" $(tree_ish)) > "$sub_file"
+    test $? -ne 0 && error "failed to archive submodule $sm ($smhash)"
+    tar --concatenate --file "$tar_file" "$sub_file"
+    test $? -ne 0 && error "failed append submodule $sm to $tar_file"
+done
+exit 0
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
new file mode 100644
index 000000000..0461fd1c4
--- /dev/null
+++ b/scripts/block-coroutine-wrapper.py
@@ -0,0 +1,167 @@
+#! /usr/bin/env python3
+"""Generate coroutine wrappers for block subsystem.
+
+The program parses one or several concatenated c files from stdin,
+searches for functions with the 'generated_co_wrapper' specifier
+and generates corresponding wrappers on stdout.
+
+Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
+
+Copyright (c) 2020 Virtuozzo International GmbH.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see .
+"""
+
+import sys
+import re
+from typing import Iterator
+
+
+def gen_header():
+    copyright = re.sub('^.*Copyright', 'Copyright', __doc__, flags=re.DOTALL)
+    copyright = re.sub('^(?=.)', ' * ', copyright.strip(), flags=re.MULTILINE)
+    copyright = re.sub('^$', ' *', copyright, flags=re.MULTILINE)
+    return f"""\
+/*
+ * File is generated by scripts/block-coroutine-wrapper.py
+ *
+{copyright}
+ */
+
+#include "qemu/osdep.h"
+#include "block/coroutines.h"
+#include "block/block-gen.h"
+#include "block/block_int.h"\
+"""
+
+
+class ParamDecl:
+    param_re = re.compile(r'(?P'
+                          r'(?P.*[ *])'
+                          r'(?P[a-z][a-z0-9_]*)'
+                          r')')
+
+    def __init__(self, param_decl: str) -> None:
+        m = self.param_re.match(param_decl.strip())
+        if m is None:
+            raise ValueError(f'Wrong parameter declaration: "{param_decl}"')
+        self.decl = m.group('decl')
+        self.type = m.group('type')
+        self.name = m.group('name')
+
+
+class FuncDecl:
+    def __init__(self, return_type: str, name: str, args: str) -> None:
+        self.return_type = return_type.strip()
+        self.name = name.strip()
+        self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
+
+    def gen_list(self, format: str) -> str:
+        return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
+
+    def gen_block(self, format: str) -> str:
+        return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
+
+
+# Match wrappers declared with a generated_co_wrapper mark
+func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
+                          r'(?P[a-z][a-z0-9_]*)'
+                          r'\((?P[^)]*)\);$', re.MULTILINE)
+
+
+def func_decl_iter(text: str) -> Iterator:
+    for m in func_decl_re.finditer(text):
+        yield FuncDecl(return_type='int',
+                       name=m.group('wrapper_name'),
+                       args=m.group('args'))
+
+
+def snake_to_camel(func_name: str) -> str:
+    """
+    Convert underscore names like 'some_function_name' to camel-case like
+    'SomeFunctionName'
+    """
+    words = func_name.split('_')
+    words = [w[0].upper() + w[1:] for w in words]
+    return ''.join(words)
+
+
+def gen_wrapper(func: FuncDecl) -> str:
+    assert func.name.startswith('bdrv_')
+    assert not func.name.startswith('bdrv_co_')
+    assert func.return_type == 'int'
+    assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
+
+    name = 'bdrv_co_' + func.name[5:]
+    bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
+    struct_name = snake_to_camel(name)
+
+    return f"""\
+/*
+ * Wrappers for {name}
+ */
+
+typedef struct {struct_name} {{
+    BdrvPollCo poll_state;
+{ func.gen_block('    {decl};') }
+}} {struct_name};
+
+static void coroutine_fn {name}_entry(void *opaque)
+{{
+    {struct_name} *s = opaque;
+
+    s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
+    s->poll_state.in_progress = false;
+
+    aio_wait_kick();
+}}
+
+int {func.name}({ func.gen_list('{decl}') })
+{{
+    if (qemu_in_coroutine()) {{
+        return {name}({ func.gen_list('{name}') });
+    }} else {{
+        {struct_name} s = {{
+            .poll_state.bs = {bs},
+            .poll_state.in_progress = true,
+
+{ func.gen_block('            .{name} = {name},') }
+        }};
+
+        s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
+
+        return bdrv_poll_co(&s.poll_state);
+    }}
+}}"""
+
+
+def gen_wrappers(input_code: str) -> str:
+    res = ''
+    for func in func_decl_iter(input_code):
+        res += '\n\n\n'
+        res += gen_wrapper(func)
+
+    return res
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        exit(f'Usage: {sys.argv[0]} OUT_FILE.c IN_FILE.[ch]...')
+
+    with open(sys.argv[1], 'w', encoding='utf-8') as f_out:
+        f_out.write(gen_header())
+        for fname in sys.argv[2:]:
+            with open(fname, encoding='utf-8') as f_in:
+                f_out.write(gen_wrappers(f_in.read()))
+                f_out.write('\n')
diff --git a/scripts/check_sparse.py b/scripts/check_sparse.py
new file mode 100644
index 000000000..295612444
--- /dev/null
+++ b/scripts/check_sparse.py
@@ -0,0 +1,59 @@
+#! /usr/bin/env python3
+
+# Invoke sparse based on the contents of compile_commands.json,
+# also working around several deficiencies in cgcc's command line
+# parsing
+
+import json
+import subprocess
+import os
+import sys
+import shlex
+
+def cmdline_for_sparse(sparse, cmdline):
+    # Do not include the C compiler executable
+    skip = True
+    arg = False
+    out = sparse + ['-no-compile']
+    for x in cmdline:
+        if arg:
+            out.append(x)
+            arg = False
+            continue
+        if skip:
+            skip = False
+            continue
+        # prevent sparse from treating output files as inputs
+        if x == '-MF' or x == '-MQ' or x == '-o':
+            skip = True
+            continue
+        # cgcc ignores -no-compile if it sees -M or -MM?
+        if x.startswith('-M'):
+            continue
+        # sparse does not understand these!
+        if x == '-iquote' or x == '-isystem':
+            x = '-I'
+        if x == '-I':
+            arg = True
+        out.append(x)
+    return out
+
+root_path = os.getenv('MESON_BUILD_ROOT')
+def build_path(s):
+    return s if not root_path else os.path.join(root_path, s)
+
+ccjson_path = build_path(sys.argv[1])
+with open(ccjson_path, 'r') as fd:
+    compile_commands = json.load(fd)
+
+sparse = sys.argv[2:]
+sparse_env = os.environ.copy()
+for cmd in compile_commands:
+    cmdline = shlex.split(cmd['command'])
+    cmd = cmdline_for_sparse(sparse, cmdline)
+    print('REAL_CC=%s' % shlex.quote(cmdline[0]),
+          ' '.join((shlex.quote(x) for x in cmd)))
+    sparse_env['REAL_CC'] = cmdline[0]
+    r = subprocess.run(cmd, env=sparse_env, cwd=root_path)
+    if r.returncode != 0:
+        sys.exit(r.returncode)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
new file mode 100755
index 000000000..88c858f67
--- /dev/null
+++ b/scripts/checkpatch.pl
@@ -0,0 +1,3045 @@
+#!/usr/bin/env perl
+# (c) 2001, Dave Jones. (the file handling bit)
+# (c) 2005, Joel Schopp  (the ugly bit)
+# (c) 2007,2008, Andy Whitcroft  (new conditions, test suite)
+# (c) 2008-2010 Andy Whitcroft 
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+use warnings;
+use Term::ANSIColor qw(:constants);
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+our $SrcFile    = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$};
+
+my $V = '0.31';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $quiet = 0;
+my $tree = 1;
+my $chk_signoff = 1;
+my $chk_patch = undef;
+my $chk_branch = undef;
+my $tst_only;
+my $emacs = 0;
+my $terse = 0;
+my $file = undef;
+my $color = "auto";
+my $no_warnings = 0;
+my $summary = 1;
+my $mailback = 0;
+my $summary_file = 0;
+my $root;
+my %debug;
+my $help = 0;
+
+sub help {
+	my ($exitcode) = @_;
+
+	print << "EOM";
+Usage:
+
+    $P [OPTION]... [FILE]...
+    $P [OPTION]... [GIT-REV-LIST]
+
+Version: $V
+
+Options:
+  -q, --quiet                quiet
+  --no-tree                  run without a qemu tree
+  --no-signoff               do not check for 'Signed-off-by' line
+  --patch                    treat FILE as patchfile
+  --branch                   treat args as GIT revision list
+  --emacs                    emacs compile window format
+  --terse                    one line per report
+  -f, --file                 treat FILE as regular source file
+  --strict                   fail if only warnings are found
+  --root=PATH                PATH to the qemu tree root
+  --no-summary               suppress the per-file summary
+  --mailback                 only produce a report in case of warnings/errors
+  --summary-file             include the filename in summary
+  --debug KEY=[0|1]          turn on/off debugging of KEY, where KEY is one of
+                             'values', 'possible', 'type', and 'attr' (default
+                             is all off)
+  --test-only=WORD           report only warnings/errors containing WORD
+                             literally
+  --color[=WHEN]             Use colors 'always', 'never', or only when output
+                             is a terminal ('auto'). Default is 'auto'.
+  -h, --help, --version      display this help and exit
+
+When FILE is - read standard input.
+EOM
+
+	exit($exitcode);
+}
+
+# Perl's Getopt::Long allows options to take optional arguments after a space.
+# Prevent --color by itself from consuming other arguments
+foreach (@ARGV) {
+	if ($_ eq "--color" || $_ eq "-color") {
+		$_ = "--color=$color";
+	}
+}
+
+GetOptions(
+	'q|quiet+'	=> \$quiet,
+	'tree!'		=> \$tree,
+	'signoff!'	=> \$chk_signoff,
+	'patch!'	=> \$chk_patch,
+	'branch!'	=> \$chk_branch,
+	'emacs!'	=> \$emacs,
+	'terse!'	=> \$terse,
+	'f|file!'	=> \$file,
+	'strict!'	=> \$no_warnings,
+	'root=s'	=> \$root,
+	'summary!'	=> \$summary,
+	'mailback!'	=> \$mailback,
+	'summary-file!'	=> \$summary_file,
+
+	'debug=s'	=> \%debug,
+	'test-only=s'	=> \$tst_only,
+	'color=s'       => \$color,
+	'no-color'      => sub { $color = 'never'; },
+	'h|help'	=> \$help,
+	'version'	=> \$help
+) or help(1);
+
+help(0) if ($help);
+
+my $exit = 0;
+
+if ($#ARGV < 0) {
+	print "$P: no input files\n";
+	exit(1);
+}
+
+if (!defined $chk_branch && !defined $chk_patch && !defined $file) {
+	$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
+	$file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
+	$chk_patch = $chk_branch || $file ? 0 : 1;
+} elsif (!defined $chk_branch && !defined $chk_patch) {
+	if ($file) {
+		$chk_branch = $chk_patch = 0;
+	} else {
+		$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
+		$chk_patch = $chk_branch ? 0 : 1;
+	}
+} elsif (!defined $chk_branch && !defined $file) {
+	if ($chk_patch) {
+		$chk_branch = $file = 0;
+	} else {
+		$chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
+		$file = $chk_branch ? 0 : 1;
+	}
+} elsif (!defined $chk_patch && !defined $file) {
+	if ($chk_branch) {
+		$chk_patch = $file = 0;
+	} else {
+		$file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
+		$chk_patch = $file ? 0 : 1;
+	}
+} elsif (!defined $chk_branch) {
+	$chk_branch = $chk_patch || $file ? 0 : 1;
+} elsif (!defined $chk_patch) {
+	$chk_patch = $chk_branch || $file ? 0 : 1;
+} elsif (!defined $file) {
+	$file = $chk_patch || $chk_branch ? 0 : 1;
+}
+
+if (($chk_patch && $chk_branch) ||
+    ($chk_patch && $file) ||
+    ($chk_branch && $file)) {
+	die "Only one of --file, --branch, --patch is permitted\n";
+}
+if (!$chk_patch && !$chk_branch && !$file) {
+	die "One of --file, --branch, --patch is required\n";
+}
+
+if ($color =~ /^always$/i) {
+	$color = 1;
+} elsif ($color =~ /^never$/i) {
+	$color = 0;
+} elsif ($color =~ /^auto$/i) {
+	$color = (-t STDOUT);
+} else {
+	die "Invalid color mode: $color\n";
+}
+
+my $dbg_values = 0;
+my $dbg_possible = 0;
+my $dbg_type = 0;
+my $dbg_attr = 0;
+my $dbg_adv_dcs = 0;
+my $dbg_adv_checking = 0;
+my $dbg_adv_apw = 0;
+for my $key (keys %debug) {
+	## no critic
+	eval "\${dbg_$key} = '$debug{$key}';";
+	die "$@" if ($@);
+}
+
+my $rpt_cleaners = 0;
+
+if ($terse) {
+	$emacs = 1;
+	$quiet++;
+}
+
+if ($tree) {
+	if (defined $root) {
+		if (!top_of_kernel_tree($root)) {
+			die "$P: $root: --root does not point at a valid tree\n";
+		}
+	} else {
+		if (top_of_kernel_tree('.')) {
+			$root = '.';
+		} elsif ($0 =~ m@(.*)/scripts/[^/]*$@ &&
+						top_of_kernel_tree($1)) {
+			$root = $1;
+		}
+	}
+
+	if (!defined $root) {
+		print "Must be run from the top-level dir. of a qemu tree\n";
+		exit(2);
+	}
+}
+
+my $emitted_corrupt = 0;
+
+our $Ident	= qr{
+			[A-Za-z_][A-Za-z\d_]*
+			(?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)*
+		}x;
+our $Storage	= qr{extern|static|asmlinkage};
+our $Sparse	= qr{
+			__force
+		}x;
+
+# Notes to $Attribute:
+our $Attribute	= qr{
+			const|
+			volatile|
+			QEMU_NORETURN|
+			QEMU_WARN_UNUSED_RESULT|
+			QEMU_SENTINEL|
+			QEMU_PACKED|
+			GCC_FMT_ATTR
+		  }x;
+our $Modifier;
+our $Inline	= qr{inline};
+our $Member	= qr{->$Ident|\.$Ident|\[[^]]*\]};
+our $Lval	= qr{$Ident(?:$Member)*};
+
+our $Constant	= qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*};
+our $Assignment	= qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)};
+our $Compare    = qr{<=|>=|==|!=|<|>};
+our $Operators	= qr{
+			<=|>=|==|!=|
+			=>|->|<<|>>|<|>|!|~|
+			&&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%
+		  }x;
+
+our $NonptrType;
+our $Type;
+our $Declare;
+
+our $NON_ASCII_UTF8	= qr{
+	[\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
+	|  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
+	| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
+	|  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
+	|  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
+	| [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
+	|  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
+}x;
+
+our $UTF8	= qr{
+	[\x09\x0A\x0D\x20-\x7E]              # ASCII
+	| $NON_ASCII_UTF8
+}x;
+
+# some readers default to ISO-8859-1 when showing email source. detect
+# when UTF-8 is incorrectly interpreted as ISO-8859-1 and reencoded back.
+# False positives are possible but very unlikely.
+our $UTF8_MOJIBAKE = qr{
+	\xC3[\x82-\x9F] \xC2[\x80-\xBF]                    # c2-df 80-bf
+	| \xC3\xA0 \xC2[\xA0-\xBF] \xC2[\x80-\xBF]         # e0 a0-bf 80-bf
+	| \xC3[\xA1-\xAC\xAE\xAF] (?: \xC2[\x80-\xBF]){2}  # e1-ec/ee/ef 80-bf 80-bf
+	| \xC3\xAD \xC2[\x80-\x9F] \xC2[\x80-\xBF]         # ed 80-9f 80-bf
+	| \xC3\xB0 \xC2[\x90-\xBF] (?: \xC2[\x80-\xBF]){2} # f0 90-bf 80-bf 80-bf
+	| \xC3[\xB1-\xB3] (?: \xC2[\x80-\xBF]){3}          # f1-f3 80-bf 80-bf 80-bf
+	| \xC3\xB4 \xC2[\x80-\x8F] (?: \xC2[\x80-\xBF]){2} # f4 80-b8 80-bf 80-bf
+}x;
+
+# There are still some false positives, but this catches most
+# common cases.
+our $typeTypedefs = qr{(?x:
+        (?![KMGTPE]iB)                      # IEC binary prefix (do not match)
+        [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]*     # camelcase
+        | [A-Z][A-Z\d_]*AIOCB               # all uppercase
+        | [A-Z][A-Z\d_]*CPU                 # all uppercase
+        | QEMUBH                            # all uppercase
+)};
+
+our @typeList = (
+	qr{void},
+	qr{(?:unsigned\s+)?char},
+	qr{(?:unsigned\s+)?short},
+	qr{(?:unsigned\s+)?int},
+	qr{(?:unsigned\s+)?long},
+	qr{(?:unsigned\s+)?long\s+int},
+	qr{(?:unsigned\s+)?long\s+long},
+	qr{(?:unsigned\s+)?long\s+long\s+int},
+	qr{unsigned},
+	qr{float},
+	qr{double},
+	qr{bool},
+	qr{struct\s+$Ident},
+	qr{union\s+$Ident},
+	qr{enum\s+$Ident},
+	qr{${Ident}_t},
+	qr{${Ident}_handler},
+	qr{${Ident}_handler_fn},
+	qr{target_(?:u)?long},
+	qr{hwaddr},
+        # external libraries
+	qr{xml${Ident}},
+	qr{xen\w+_handle},
+	# Glib definitions
+	qr{gchar},
+	qr{gshort},
+	qr{glong},
+	qr{gint},
+	qr{gboolean},
+	qr{guchar},
+	qr{gushort},
+	qr{gulong},
+	qr{guint},
+	qr{gfloat},
+	qr{gdouble},
+	qr{gpointer},
+	qr{gconstpointer},
+	qr{gint8},
+	qr{guint8},
+	qr{gint16},
+	qr{guint16},
+	qr{gint32},
+	qr{guint32},
+	qr{gint64},
+	qr{guint64},
+	qr{gsize},
+	qr{gssize},
+	qr{goffset},
+	qr{gintptr},
+	qr{guintptr},
+);
+
+# This can be modified by sub possible.  Since it can be empty, be careful
+# about regexes that always match, because they can cause infinite loops.
+our @modifierList = (
+);
+
+sub build_types {
+	my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
+	if (@modifierList > 0) {
+		my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
+		$Modifier = qr{(?:$Attribute|$Sparse|$mods)};
+	} else {
+		$Modifier = qr{(?:$Attribute|$Sparse)};
+	}
+	$NonptrType	= qr{
+			(?:$Modifier\s+|const\s+)*
+			(?:
+				(?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)|
+				(?:$typeTypedefs\b)|
+				(?:${all}\b)
+			)
+			(?:\s+$Modifier|\s+const)*
+		  }x;
+	$Type	= qr{
+			$NonptrType
+			(?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)?
+			(?:\s+$Inline|\s+$Modifier)*
+		  }x;
+	$Declare	= qr{(?:$Storage\s+)?$Type};
+}
+build_types();
+
+$chk_signoff = 0 if ($file);
+
+my @rawlines = ();
+my @lines = ();
+my $vname;
+if ($chk_branch) {
+	my @patches;
+	my %git_commits = ();
+	my $HASH;
+	open($HASH, "-|", "git", "log", "--reverse", "--no-merges", "--format=%H %s", $ARGV[0]) ||
+		die "$P: git log --reverse --no-merges --format='%H %s' $ARGV[0] failed - $!\n";
+
+	for my $line (<$HASH>) {
+		$line =~ /^([0-9a-fA-F]{40,40}) (.*)$/;
+		next if (!defined($1) || !defined($2));
+		my $sha1 = $1;
+		my $subject = $2;
+		push(@patches, $sha1);
+		$git_commits{$sha1} = $subject;
+	}
+
+	close $HASH;
+
+	die "$P: no revisions returned for revlist '$ARGV[0]'\n"
+	    unless @patches;
+
+	my $i = 1;
+	my $num_patches = @patches;
+	for my $hash (@patches) {
+		my $FILE;
+		open($FILE, '-|', "git", "show", $hash) ||
+			die "$P: git show $hash - $!\n";
+		while (<$FILE>) {
+			chomp;
+			push(@rawlines, $_);
+		}
+		close($FILE);
+		$vname = substr($hash, 0, 12) . ' (' . $git_commits{$hash} . ')';
+		if ($num_patches > 1 && $quiet == 0) {
+			my $prefix = "$i/$num_patches";
+			$prefix = BLUE . BOLD . $prefix . RESET if $color;
+			print "$prefix Checking commit $vname\n";
+			$vname = "Patch $i/$num_patches";
+		} else {
+			$vname = "Commit " . $vname;
+		}
+		if (!process($hash)) {
+			$exit = 1;
+			print "\n" if ($num_patches > 1 && $quiet == 0);
+		}
+		@rawlines = ();
+		@lines = ();
+		$i++;
+	}
+} else {
+	for my $filename (@ARGV) {
+		my $FILE;
+		if ($file) {
+			open($FILE, '-|', "diff -u /dev/null $filename") ||
+				die "$P: $filename: diff failed - $!\n";
+		} elsif ($filename eq '-') {
+			open($FILE, '<&STDIN');
+		} else {
+			open($FILE, '<', "$filename") ||
+				die "$P: $filename: open failed - $!\n";
+		}
+		if ($filename eq '-') {
+			$vname = 'Your patch';
+		} else {
+			$vname = $filename;
+		}
+		print "Checking $filename...\n" if @ARGV > 1 && $quiet == 0;
+		while (<$FILE>) {
+			chomp;
+			push(@rawlines, $_);
+		}
+		close($FILE);
+		if (!process($filename)) {
+			$exit = 1;
+		}
+		@rawlines = ();
+		@lines = ();
+	}
+}
+
+exit($exit);
+
+sub top_of_kernel_tree {
+	my ($root) = @_;
+
+	my @tree_check = (
+		"COPYING", "MAINTAINERS", "Makefile",
+		"README.rst", "docs", "VERSION",
+		"linux-user", "softmmu"
+	);
+
+	foreach my $check (@tree_check) {
+		if (! -e $root . '/' . $check) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+sub expand_tabs {
+	my ($str) = @_;
+
+	my $res = '';
+	my $n = 0;
+	for my $c (split(//, $str)) {
+		if ($c eq "\t") {
+			$res .= ' ';
+			$n++;
+			for (; ($n % 8) != 0; $n++) {
+				$res .= ' ';
+			}
+			next;
+		}
+		$res .= $c;
+		$n++;
+	}
+
+	return $res;
+}
+sub copy_spacing {
+	(my $res = shift) =~ tr/\t/ /c;
+	return $res;
+}
+
+sub line_stats {
+	my ($line) = @_;
+
+	# Drop the diff line leader and expand tabs
+	$line =~ s/^.//;
+	$line = expand_tabs($line);
+
+	# Pick the indent from the front of the line.
+	my ($white) = ($line =~ /^(\s*)/);
+
+	return (length($line), length($white));
+}
+
+my $sanitise_quote = '';
+
+sub sanitise_line_reset {
+	my ($in_comment) = @_;
+
+	if ($in_comment) {
+		$sanitise_quote = '*/';
+	} else {
+		$sanitise_quote = '';
+	}
+}
+sub sanitise_line {
+	my ($line) = @_;
+
+	my $res = '';
+	my $l = '';
+
+	my $qlen = 0;
+	my $off = 0;
+	my $c;
+
+	# Always copy over the diff marker.
+	$res = substr($line, 0, 1);
+
+	for ($off = 1; $off < length($line); $off++) {
+		$c = substr($line, $off, 1);
+
+		# Comments we are wacking completely including the begin
+		# and end, all to $;.
+		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
+			$sanitise_quote = '*/';
+
+			substr($res, $off, 2, "$;$;");
+			$off++;
+			next;
+		}
+		if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') {
+			$sanitise_quote = '';
+			substr($res, $off, 2, "$;$;");
+			$off++;
+			next;
+		}
+		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
+			$sanitise_quote = '//';
+
+			substr($res, $off, 2, $sanitise_quote);
+			$off++;
+			next;
+		}
+
+		# A \ in a string means ignore the next character.
+		if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
+		    $c eq "\\") {
+			substr($res, $off, 2, 'XX');
+			$off++;
+			next;
+		}
+		# Regular quotes.
+		if ($c eq "'" || $c eq '"') {
+			if ($sanitise_quote eq '') {
+				$sanitise_quote = $c;
+
+				substr($res, $off, 1, $c);
+				next;
+			} elsif ($sanitise_quote eq $c) {
+				$sanitise_quote = '';
+			}
+		}
+
+		#print "c<$c> SQ<$sanitise_quote>\n";
+		if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
+			substr($res, $off, 1, $;);
+		} elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
+			substr($res, $off, 1, $;);
+		} elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
+			substr($res, $off, 1, 'X');
+		} else {
+			substr($res, $off, 1, $c);
+		}
+	}
+
+	if ($sanitise_quote eq '//') {
+		$sanitise_quote = '';
+	}
+
+	# The pathname on a #include may be surrounded by '<' and '>'.
+	if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
+		my $clean = 'X' x length($1);
+		$res =~ s@\<.*\>@<$clean>@;
+
+	# The whole of a #error is a string.
+	} elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) {
+		my $clean = 'X' x length($1);
+		$res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@;
+	}
+
+	return $res;
+}
+
+sub ctx_statement_block {
+	my ($linenr, $remain, $off) = @_;
+	my $line = $linenr - 1;
+	my $blk = '';
+	my $soff = $off;
+	my $coff = $off - 1;
+	my $coff_set = 0;
+
+	my $loff = 0;
+
+	my $type = '';
+	my $level = 0;
+	my @stack = ();
+	my $p;
+	my $c;
+	my $len = 0;
+
+	my $remainder;
+	while (1) {
+		@stack = (['', 0]) if ($#stack == -1);
+
+		#warn "CSB: blk<$blk> remain<$remain>\n";
+		# If we are about to drop off the end, pull in more
+		# context.
+		if ($off >= $len) {
+			for (; $remain > 0; $line++) {
+				last if (!defined $lines[$line]);
+				next if ($lines[$line] =~ /^-/);
+				$remain--;
+				$loff = $len;
+				$blk .= $lines[$line] . "\n";
+				$len = length($blk);
+				$line++;
+				last;
+			}
+			# Bail if there is no further context.
+			#warn "CSB: blk<$blk> off<$off> len<$len>\n";
+			if ($off >= $len) {
+				last;
+			}
+		}
+		$p = $c;
+		$c = substr($blk, $off, 1);
+		$remainder = substr($blk, $off);
+
+		#warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
+
+		# Handle nested #if/#else.
+		if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
+			push(@stack, [ $type, $level ]);
+		} elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
+			($type, $level) = @{$stack[$#stack - 1]};
+		} elsif ($remainder =~ /^#\s*endif\b/) {
+			($type, $level) = @{pop(@stack)};
+		}
+
+		# Statement ends at the ';' or a close '}' at the
+		# outermost level.
+		if ($level == 0 && $c eq ';') {
+			last;
+		}
+
+		# An else is really a conditional as long as its not else if
+		if ($level == 0 && $coff_set == 0 &&
+				(!defined($p) || $p =~ /(?:\s|\}|\+)/) &&
+				$remainder =~ /^(else)(?:\s|{)/ &&
+				$remainder !~ /^else\s+if\b/) {
+			$coff = $off + length($1) - 1;
+			$coff_set = 1;
+			#warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n";
+			#warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n";
+		}
+
+		if (($type eq '' || $type eq '(') && $c eq '(') {
+			$level++;
+			$type = '(';
+		}
+		if ($type eq '(' && $c eq ')') {
+			$level--;
+			$type = ($level != 0)? '(' : '';
+
+			if ($level == 0 && $coff < $soff) {
+				$coff = $off;
+				$coff_set = 1;
+				#warn "CSB: mark coff<$coff>\n";
+			}
+		}
+		if (($type eq '' || $type eq '{') && $c eq '{') {
+			$level++;
+			$type = '{';
+		}
+		if ($type eq '{' && $c eq '}') {
+			$level--;
+			$type = ($level != 0)? '{' : '';
+
+			if ($level == 0) {
+				if (substr($blk, $off + 1, 1) eq ';') {
+					$off++;
+				}
+				last;
+			}
+		}
+		$off++;
+	}
+	# We are truly at the end, so shuffle to the next line.
+	if ($off == $len) {
+		$loff = $len + 1;
+		$line++;
+		$remain--;
+	}
+
+	my $statement = substr($blk, $soff, $off - $soff + 1);
+	my $condition = substr($blk, $soff, $coff - $soff + 1);
+
+	#warn "STATEMENT<$statement>\n";
+	#warn "CONDITION<$condition>\n";
+
+	#print "coff<$coff> soff<$off> loff<$loff>\n";
+
+	return ($statement, $condition,
+			$line, $remain + 1, $off - $loff + 1, $level);
+}
+
+sub statement_lines {
+	my ($stmt) = @_;
+
+	# Strip the diff line prefixes and rip blank lines at start and end.
+	$stmt =~ s/(^|\n)./$1/g;
+	$stmt =~ s/^\s*//;
+	$stmt =~ s/\s*$//;
+
+	my @stmt_lines = ($stmt =~ /\n/g);
+
+	return $#stmt_lines + 2;
+}
+
+sub statement_rawlines {
+	my ($stmt) = @_;
+
+	my @stmt_lines = ($stmt =~ /\n/g);
+
+	return $#stmt_lines + 2;
+}
+
+sub statement_block_size {
+	my ($stmt) = @_;
+
+	$stmt =~ s/(^|\n)./$1/g;
+	$stmt =~ s/^\s*\{//;
+	$stmt =~ s/}\s*$//;
+	$stmt =~ s/^\s*//;
+	$stmt =~ s/\s*$//;
+
+	my @stmt_lines = ($stmt =~ /\n/g);
+	my @stmt_statements = ($stmt =~ /;/g);
+
+	my $stmt_lines = $#stmt_lines + 2;
+	my $stmt_statements = $#stmt_statements + 1;
+
+	if ($stmt_lines > $stmt_statements) {
+		return $stmt_lines;
+	} else {
+		return $stmt_statements;
+	}
+}
+
+sub ctx_statement_full {
+	my ($linenr, $remain, $off) = @_;
+	my ($statement, $condition, $level);
+
+	my (@chunks);
+
+	# Grab the first conditional/block pair.
+	($statement, $condition, $linenr, $remain, $off, $level) =
+				ctx_statement_block($linenr, $remain, $off);
+	#print "F: c<$condition> s<$statement> remain<$remain>\n";
+	push(@chunks, [ $condition, $statement ]);
+	if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) {
+		return ($level, $linenr, @chunks);
+	}
+
+	# Pull in the following conditional/block pairs and see if they
+	# could continue the statement.
+	for (;;) {
+		($statement, $condition, $linenr, $remain, $off, $level) =
+				ctx_statement_block($linenr, $remain, $off);
+		#print "C: c<$condition> s<$statement> remain<$remain>\n";
+		last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s));
+		#print "C: push\n";
+		push(@chunks, [ $condition, $statement ]);
+	}
+
+	return ($level, $linenr, @chunks);
+}
+
+sub ctx_block_get {
+	my ($linenr, $remain, $outer, $open, $close, $off) = @_;
+	my $line;
+	my $start = $linenr - 1;
+	my $blk = '';
+	my @o;
+	my @c;
+	my @res = ();
+
+	my $level = 0;
+	my @stack = ($level);
+	for ($line = $start; $remain > 0; $line++) {
+		next if ($rawlines[$line] =~ /^-/);
+		$remain--;
+
+		$blk .= $rawlines[$line];
+
+		# Handle nested #if/#else.
+		if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
+			push(@stack, $level);
+		} elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
+			$level = $stack[$#stack - 1];
+		} elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) {
+			$level = pop(@stack);
+		}
+
+		foreach my $c (split(//, $lines[$line])) {
+			##print "C<$c>L<$level><$open$close>O<$off>\n";
+			if ($off > 0) {
+				$off--;
+				next;
+			}
+
+			if ($c eq $close && $level > 0) {
+				$level--;
+				last if ($level == 0);
+			} elsif ($c eq $open) {
+				$level++;
+			}
+		}
+
+		if (!$outer || $level <= 1) {
+			push(@res, $rawlines[$line]);
+		}
+
+		last if ($level == 0);
+	}
+
+	return ($level, @res);
+}
+sub ctx_block_outer {
+	my ($linenr, $remain) = @_;
+
+	my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0);
+	return @r;
+}
+sub ctx_block {
+	my ($linenr, $remain) = @_;
+
+	my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+	return @r;
+}
+sub ctx_statement {
+	my ($linenr, $remain, $off) = @_;
+
+	my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+	return @r;
+}
+sub ctx_block_level {
+	my ($linenr, $remain) = @_;
+
+	return ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+}
+sub ctx_statement_level {
+	my ($linenr, $remain, $off) = @_;
+
+	return ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+}
+
+sub ctx_locate_comment {
+	my ($first_line, $end_line) = @_;
+
+	# Catch a comment on the end of the line itself.
+	my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
+	return $current_comment if (defined $current_comment);
+
+	# Look through the context and try and figure out if there is a
+	# comment.
+	my $in_comment = 0;
+	$current_comment = '';
+	for (my $linenr = $first_line; $linenr < $end_line; $linenr++) {
+		my $line = $rawlines[$linenr - 1];
+		#warn "           $line\n";
+		if ($linenr == $first_line and $line =~ m@^.\s*\*@) {
+			$in_comment = 1;
+		}
+		if ($line =~ m@/\*@) {
+			$in_comment = 1;
+		}
+		if (!$in_comment && $current_comment ne '') {
+			$current_comment = '';
+		}
+		$current_comment .= $line . "\n" if ($in_comment);
+		if ($line =~ m@\*/@) {
+			$in_comment = 0;
+		}
+	}
+
+	chomp($current_comment);
+	return($current_comment);
+}
+sub ctx_has_comment {
+	my ($first_line, $end_line) = @_;
+	my $cmt = ctx_locate_comment($first_line, $end_line);
+
+	##print "LINE: $rawlines[$end_line - 1 ]\n";
+	##print "CMMT: $cmt\n";
+
+	return ($cmt ne '');
+}
+
+sub raw_line {
+	my ($linenr, $cnt) = @_;
+
+	my $offset = $linenr - 1;
+	$cnt++;
+
+	my $line;
+	while ($cnt) {
+		$line = $rawlines[$offset++];
+		next if (defined($line) && $line =~ /^-/);
+		$cnt--;
+	}
+
+	return $line;
+}
+
+sub cat_vet {
+	my ($vet) = @_;
+	my ($res, $coded);
+
+	$res = '';
+	while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) {
+		$res .= $1;
+		if ($2 ne '') {
+			$coded = sprintf("^%c", unpack('C', $2) + 64);
+			$res .= $coded;
+		}
+	}
+	$res =~ s/$/\$/;
+
+	return $res;
+}
+
+my $av_preprocessor = 0;
+my $av_pending;
+my @av_paren_type;
+my $av_pend_colon;
+
+sub annotate_reset {
+	$av_preprocessor = 0;
+	$av_pending = '_';
+	@av_paren_type = ('E');
+	$av_pend_colon = 'O';
+}
+
+sub annotate_values {
+	my ($stream, $type) = @_;
+
+	my $res;
+	my $var = '_' x length($stream);
+	my $cur = $stream;
+
+	print "$stream\n" if ($dbg_values > 1);
+
+	while (length($cur)) {
+		@av_paren_type = ('E') if ($#av_paren_type < 0);
+		print " <" . join('', @av_paren_type) .
+				"> <$type> <$av_pending>" if ($dbg_values > 1);
+		if ($cur =~ /^(\s+)/o) {
+			print "WS($1)\n" if ($dbg_values > 1);
+			if ($1 =~ /\n/ && $av_preprocessor) {
+				$type = pop(@av_paren_type);
+				$av_preprocessor = 0;
+			}
+
+		} elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') {
+			print "CAST($1)\n" if ($dbg_values > 1);
+			push(@av_paren_type, $type);
+			$type = 'C';
+
+		} elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) {
+			print "DECLARE($1)\n" if ($dbg_values > 1);
+			$type = 'T';
+
+		} elsif ($cur =~ /^($Modifier)\s*/) {
+			print "MODIFIER($1)\n" if ($dbg_values > 1);
+			$type = 'T';
+
+		} elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) {
+			print "DEFINE($1,$2)\n" if ($dbg_values > 1);
+			$av_preprocessor = 1;
+			push(@av_paren_type, $type);
+			if ($2 ne '') {
+				$av_pending = 'N';
+			}
+			$type = 'E';
+
+		} elsif ($cur =~ /^(\#\s*(?:undef\s*$Ident|include\b))/o) {
+			print "UNDEF($1)\n" if ($dbg_values > 1);
+			$av_preprocessor = 1;
+			push(@av_paren_type, $type);
+
+		} elsif ($cur =~ /^(\#\s*(?:ifdef|ifndef|if))/o) {
+			print "PRE_START($1)\n" if ($dbg_values > 1);
+			$av_preprocessor = 1;
+
+			push(@av_paren_type, $type);
+			push(@av_paren_type, $type);
+			$type = 'E';
+
+		} elsif ($cur =~ /^(\#\s*(?:else|elif))/o) {
+			print "PRE_RESTART($1)\n" if ($dbg_values > 1);
+			$av_preprocessor = 1;
+
+			push(@av_paren_type, $av_paren_type[$#av_paren_type]);
+
+			$type = 'E';
+
+		} elsif ($cur =~ /^(\#\s*(?:endif))/o) {
+			print "PRE_END($1)\n" if ($dbg_values > 1);
+
+			$av_preprocessor = 1;
+
+			# Assume all arms of the conditional end as this
+			# one does, and continue as if the #endif was not here.
+			pop(@av_paren_type);
+			push(@av_paren_type, $type);
+			$type = 'E';
+
+		} elsif ($cur =~ /^(\\\n)/o) {
+			print "PRECONT($1)\n" if ($dbg_values > 1);
+
+		} elsif ($cur =~ /^(__attribute__)\s*\(?/o) {
+			print "ATTR($1)\n" if ($dbg_values > 1);
+			$av_pending = $type;
+			$type = 'N';
+
+		} elsif ($cur =~ /^(sizeof)\s*(\()?/o) {
+			print "SIZEOF($1)\n" if ($dbg_values > 1);
+			if (defined $2) {
+				$av_pending = 'V';
+			}
+			$type = 'N';
+
+		} elsif ($cur =~ /^(if|while|for)\b/o) {
+			print "COND($1)\n" if ($dbg_values > 1);
+			$av_pending = 'E';
+			$type = 'N';
+
+		} elsif ($cur =~/^(case)/o) {
+			print "CASE($1)\n" if ($dbg_values > 1);
+			$av_pend_colon = 'C';
+			$type = 'N';
+
+		} elsif ($cur =~/^(return|else|goto|typeof|__typeof__)\b/o) {
+			print "KEYWORD($1)\n" if ($dbg_values > 1);
+			$type = 'N';
+
+		} elsif ($cur =~ /^(\()/o) {
+			print "PAREN('$1')\n" if ($dbg_values > 1);
+			push(@av_paren_type, $av_pending);
+			$av_pending = '_';
+			$type = 'N';
+
+		} elsif ($cur =~ /^(\))/o) {
+			my $new_type = pop(@av_paren_type);
+			if ($new_type ne '_') {
+				$type = $new_type;
+				print "PAREN('$1') -> $type\n"
+							if ($dbg_values > 1);
+			} else {
+				print "PAREN('$1')\n" if ($dbg_values > 1);
+			}
+
+		} elsif ($cur =~ /^($Ident)\s*\(/o) {
+			print "FUNC($1)\n" if ($dbg_values > 1);
+			$type = 'V';
+			$av_pending = 'V';
+
+		} elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) {
+			if (defined $2 && $type eq 'C' || $type eq 'T') {
+				$av_pend_colon = 'B';
+			} elsif ($type eq 'E') {
+				$av_pend_colon = 'L';
+			}
+			print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
+			$type = 'V';
+
+		} elsif ($cur =~ /^($Ident|$Constant)/o) {
+			print "IDENT($1)\n" if ($dbg_values > 1);
+			$type = 'V';
+
+		} elsif ($cur =~ /^($Assignment)/o) {
+			print "ASSIGN($1)\n" if ($dbg_values > 1);
+			$type = 'N';
+
+		} elsif ($cur =~/^(;|{|})/) {
+			print "END($1)\n" if ($dbg_values > 1);
+			$type = 'E';
+			$av_pend_colon = 'O';
+
+		} elsif ($cur =~/^(,)/) {
+			print "COMMA($1)\n" if ($dbg_values > 1);
+			$type = 'C';
+
+		} elsif ($cur =~ /^(\?)/o) {
+			print "QUESTION($1)\n" if ($dbg_values > 1);
+			$type = 'N';
+
+		} elsif ($cur =~ /^(:)/o) {
+			print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1);
+
+			substr($var, length($res), 1, $av_pend_colon);
+			if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') {
+				$type = 'E';
+			} else {
+				$type = 'N';
+			}
+			$av_pend_colon = 'O';
+
+		} elsif ($cur =~ /^(\[)/o) {
+			print "CLOSE($1)\n" if ($dbg_values > 1);
+			$type = 'N';
+
+		} elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&\&|\&)/o) {
+			my $variant;
+
+			print "OPV($1)\n" if ($dbg_values > 1);
+			if ($type eq 'V') {
+				$variant = 'B';
+			} else {
+				$variant = 'U';
+			}
+
+			substr($var, length($res), 1, $variant);
+			$type = 'N';
+
+		} elsif ($cur =~ /^($Operators)/o) {
+			print "OP($1)\n" if ($dbg_values > 1);
+			if ($1 ne '++' && $1 ne '--') {
+				$type = 'N';
+			}
+
+		} elsif ($cur =~ /(^.)/o) {
+			print "C($1)\n" if ($dbg_values > 1);
+		}
+		if (defined $1) {
+			$cur = substr($cur, length($1));
+			$res .= $type x length($1);
+		}
+	}
+
+	return ($res, $var);
+}
+
+sub possible {
+	my ($possible, $line) = @_;
+	my $notPermitted = qr{(?:
+		^(?:
+			$Modifier|
+			$Storage|
+			$Type|
+			DEFINE_\S+
+		)$|
+		^(?:
+			goto|
+			return|
+			case|
+			else|
+			asm|__asm__|
+			do
+		)(?:\s|$)|
+		^(?:typedef|struct|enum)\b|
+		^\#
+	    )}x;
+	warn "CHECK<$possible> ($line)\n" if ($dbg_possible > 2);
+	if ($possible !~ $notPermitted) {
+		# Check for modifiers.
+		$possible =~ s/\s*$Storage\s*//g;
+		$possible =~ s/\s*$Sparse\s*//g;
+		if ($possible =~ /^\s*$/) {
+
+		} elsif ($possible =~ /\s/) {
+			$possible =~ s/\s*(?:$Type|\#\#)\s*//g;
+			for my $modifier (split(' ', $possible)) {
+				if ($modifier !~ $notPermitted) {
+					warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible);
+					push(@modifierList, $modifier);
+				}
+			}
+
+		} else {
+			warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible);
+			push(@typeList, $possible);
+		}
+		build_types();
+	} else {
+		warn "NOTPOSS: $possible ($line)\n" if ($dbg_possible > 1);
+	}
+}
+
+my $prefix = '';
+
+sub report {
+	my ($level, $msg) = @_;
+	if (defined $tst_only && $msg !~ /\Q$tst_only\E/) {
+		return 0;
+	}
+
+	my $output = '';
+	$output .= BOLD if $color;
+	$output .= $prefix;
+	$output .= RED if $color && $level eq 'ERROR';
+	$output .= MAGENTA if $color && $level eq 'WARNING';
+	$output .= $level . ':';
+	$output .= RESET if $color;
+	$output .= ' ' . $msg . "\n";
+
+	$output = (split('\n', $output))[0] . "\n" if ($terse);
+
+	push(our @report, $output);
+
+	return 1;
+}
+sub report_dump {
+	our @report;
+}
+sub ERROR {
+	if (report("ERROR", $_[0])) {
+		our $clean = 0;
+		our $cnt_error++;
+	}
+}
+sub WARN {
+	if (report("WARNING", $_[0])) {
+		our $clean = 0;
+		our $cnt_warn++;
+	}
+}
+
+# According to tests/qtest/bios-tables-test.c: do not
+# change expected file in the same commit with adding test
+sub checkfilename {
+	my ($name, $acpi_testexpected, $acpi_nontestexpected) = @_;
+
+        # Note: shell script that rebuilds the expected files is in the same
+        # directory as files themselves.
+        # Note: allowed diff list can be changed both when changing expected
+        # files and when changing tests.
+	if ($name =~ m#^tests/data/acpi/# and not $name =~ m#^\.sh$#) {
+		$$acpi_testexpected = $name;
+	} elsif ($name !~ m#^tests/qtest/bios-tables-test-allowed-diff.h$#) {
+		$$acpi_nontestexpected = $name;
+	}
+	if (defined $$acpi_testexpected and defined $$acpi_nontestexpected) {
+		ERROR("Do not add expected files together with tests, " .
+		      "follow instructions in " .
+		      "tests/qtest/bios-tables-test.c: both " .
+		      $$acpi_testexpected . " and " .
+		      $$acpi_nontestexpected . " found\n");
+	}
+}
+
+sub process {
+	my $filename = shift;
+
+	my $linenr=0;
+	my $prevline="";
+	my $prevrawline="";
+	my $stashline="";
+	my $stashrawline="";
+
+	my $length;
+	my $indent;
+	my $previndent=0;
+	my $stashindent=0;
+
+	our $clean = 1;
+	my $signoff = 0;
+	my $is_patch = 0;
+
+	my $in_header_lines = $file ? 0 : 1;
+	my $in_commit_log = 0;		#Scanning lines before patch
+	my $reported_maintainer_file = 0;
+	my $non_utf8_charset = 0;
+
+	our @report = ();
+	our $cnt_lines = 0;
+	our $cnt_error = 0;
+	our $cnt_warn = 0;
+	our $cnt_chk = 0;
+
+	# Trace the real file/line as we go.
+	my $realfile = '';
+	my $realline = 0;
+	my $realcnt = 0;
+	my $here = '';
+	my $in_comment = 0;
+	my $comment_edge = 0;
+	my $first_line = 0;
+	my $p1_prefix = '';
+
+	my $prev_values = 'E';
+
+	# suppression flags
+	my %suppress_ifbraces;
+	my %suppress_whiletrailers;
+	my %suppress_export;
+
+        my $acpi_testexpected;
+        my $acpi_nontestexpected;
+
+	# Pre-scan the patch sanitizing the lines.
+
+	sanitise_line_reset();
+	my $line;
+	foreach my $rawline (@rawlines) {
+		$linenr++;
+		$line = $rawline;
+
+		if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+			$realline=$1-1;
+			if (defined $2) {
+				$realcnt=$3+1;
+			} else {
+				$realcnt=1+1;
+			}
+			$in_comment = 0;
+
+			# Guestimate if this is a continuing comment.  Run
+			# the context looking for a comment "edge".  If this
+			# edge is a close comment then we must be in a comment
+			# at context start.
+			my $edge;
+			my $cnt = $realcnt;
+			for (my $ln = $linenr + 1; $cnt > 0; $ln++) {
+				next if (defined $rawlines[$ln - 1] &&
+					 $rawlines[$ln - 1] =~ /^-/);
+				$cnt--;
+				#print "RAW<$rawlines[$ln - 1]>\n";
+				last if (!defined $rawlines[$ln - 1]);
+				if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ &&
+				    $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) {
+					($edge) = $1;
+					last;
+				}
+			}
+			if (defined $edge && $edge eq '*/') {
+				$in_comment = 1;
+			}
+
+			# Guestimate if this is a continuing comment.  If this
+			# is the start of a diff block and this line starts
+			# ' *' then it is very likely a comment.
+			if (!defined $edge &&
+			    $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@)
+			{
+				$in_comment = 1;
+			}
+
+			##print "COMMENT:$in_comment edge<$edge> $rawline\n";
+			sanitise_line_reset($in_comment);
+
+		} elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) {
+			# Standardise the strings and chars within the input to
+			# simplify matching -- only bother with positive lines.
+			$line = sanitise_line($rawline);
+		}
+		push(@lines, $line);
+
+		if ($realcnt > 1) {
+			$realcnt-- if ($line =~ /^(?:\+| |$)/);
+		} else {
+			$realcnt = 0;
+		}
+
+		#print "==>$rawline\n";
+		#print "-->$line\n";
+	}
+
+	$prefix = '';
+
+	$realcnt = 0;
+	$linenr = 0;
+	foreach my $line (@lines) {
+		$linenr++;
+
+		my $rawline = $rawlines[$linenr - 1];
+
+#extract the line range in the file after the patch is applied
+		if ($line=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+			$is_patch = 1;
+			$first_line = $linenr + 1;
+			$realline=$1-1;
+			if (defined $2) {
+				$realcnt=$3+1;
+			} else {
+				$realcnt=1+1;
+			}
+			annotate_reset();
+			$prev_values = 'E';
+
+			%suppress_ifbraces = ();
+			%suppress_whiletrailers = ();
+			%suppress_export = ();
+			next;
+
+# track the line number as we move through the hunk, note that
+# new versions of GNU diff omit the leading space on completely
+# blank context lines so we need to count that too.
+		} elsif ($line =~ /^( |\+|$)/) {
+			$realline++;
+			$realcnt-- if ($realcnt != 0);
+
+			# Measure the line length and indent.
+			($length, $indent) = line_stats($rawline);
+
+			# Track the previous line.
+			($prevline, $stashline) = ($stashline, $line);
+			($previndent, $stashindent) = ($stashindent, $indent);
+			($prevrawline, $stashrawline) = ($stashrawline, $rawline);
+
+			#warn "line<$line>\n";
+
+		} elsif ($realcnt == 1) {
+			$realcnt--;
+		}
+
+		my $hunk_line = ($realcnt != 0);
+
+#make up the handle for any error we report on this line
+		$prefix = "$filename:$realline: " if ($emacs && $file);
+		$prefix = "$filename:$linenr: " if ($emacs && !$file);
+
+		$here = "#$linenr: " if (!$file);
+		$here = "#$realline: " if ($file);
+
+		# extract the filename as it passes
+		if ($line =~ /^diff --git.*?(\S+)$/) {
+			$realfile = $1;
+			$realfile =~ s@^([^/]*)/@@ if (!$file);
+	                checkfilename($realfile, \$acpi_testexpected, \$acpi_nontestexpected);
+		} elsif ($line =~ /^\+\+\+\s+(\S+)/) {
+			$realfile = $1;
+			$realfile =~ s@^([^/]*)/@@ if (!$file);
+	                checkfilename($realfile, \$acpi_testexpected, \$acpi_nontestexpected);
+
+			$p1_prefix = $1;
+			if (!$file && $tree && $p1_prefix ne '' &&
+			    -e "$root/$p1_prefix") {
+				WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
+			}
+
+			next;
+		}
+
+		$here .= "FILE: $realfile:$realline:" if ($realcnt != 0);
+
+		my $hereline = "$here\n$rawline\n";
+		my $herecurr = "$here\n$rawline\n";
+		my $hereprev = "$here\n$prevrawline\n$rawline\n";
+
+		$cnt_lines++ if ($realcnt != 0);
+
+# Check for incorrect file permissions
+		if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
+			my $permhere = $here . "FILE: $realfile\n";
+			if ($realfile =~ /(\bMakefile(?:\.objs)?|\.c|\.cc|\.cpp|\.h|\.mak|\.[sS])$/) {
+				ERROR("do not set execute permissions for source files\n" . $permhere);
+			}
+		}
+
+# Only allow Python 3 interpreter
+		if ($realline == 1 &&
+			$line =~ /^\+#!\ *\/usr\/bin\/(?:env )?python$/) {
+			ERROR("please use python3 interpreter\n" . $herecurr);
+		}
+
+# Accept git diff extended headers as valid patches
+		if ($line =~ /^(?:rename|copy) (?:from|to) [\w\/\.\-]+\s*$/) {
+			$is_patch = 1;
+		}
+
+		if ($line =~ /^Author: .*via Qemu-devel.*/) {
+		    ERROR("Author email address is mangled by the mailing list\n" . $herecurr);
+		}
+
+#check the patch for a signoff:
+		if ($line =~ /^\s*signed-off-by:/i) {
+			# This is a signoff, if ugly, so do not double report.
+			$signoff++;
+			$in_commit_log = 0;
+
+			if (!($line =~ /^\s*Signed-off-by:/)) {
+				ERROR("The correct form is \"Signed-off-by\"\n" .
+					$herecurr);
+			}
+			if ($line =~ /^\s*signed-off-by:\S/i) {
+				ERROR("space required after Signed-off-by:\n" .
+					$herecurr);
+			}
+		}
+
+# Check if MAINTAINERS is being updated.  If so, there's probably no need to
+# emit the "does MAINTAINERS need updating?" message on file add/move/delete
+		if ($line =~ /^\s*MAINTAINERS\s*\|/) {
+			$reported_maintainer_file = 1;
+		}
+
+# Check for added, moved or deleted files
+		if (!$reported_maintainer_file && !$in_commit_log &&
+		    ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+		     $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+		     ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+		      (defined($1) || defined($2))))) {
+			$reported_maintainer_file = 1;
+			WARN("added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+		}
+
+# Check for wrappage within a valid hunk of the file
+		if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
+			ERROR("patch seems to be corrupt (line wrapped?)\n" .
+				$herecurr) if (!$emitted_corrupt++);
+		}
+
+# UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php
+		if (($realfile =~ /^$/ || $line =~ /^\+/) &&
+		    $rawline !~ m/^$UTF8*$/) {
+			my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/);
+
+			my $blank = copy_spacing($rawline);
+			my $ptr = substr($blank, 0, length($utf8_prefix)) . "^";
+			my $hereptr = "$hereline$ptr\n";
+
+			ERROR("Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr);
+		}
+
+		if ($rawline =~ m/$UTF8_MOJIBAKE/) {
+			ERROR("Doubly-encoded UTF-8\n" . $herecurr);
+		}
+# Check if it's the start of a commit log
+# (not a header line and we haven't seen the patch filename)
+		if ($in_header_lines && $realfile =~ /^$/ &&
+		    !($rawline =~ /^\s+\S/ ||
+		      $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
+			$in_header_lines = 0;
+			$in_commit_log = 1;
+		}
+
+# Check if there is UTF-8 in a commit log when a mail header has explicitly
+# declined it, i.e defined some charset where it is missing.
+		if ($in_header_lines &&
+		    $rawline =~ /^Content-Type:.+charset="(.+)".*$/ &&
+		    $1 !~ /utf-8/i) {
+			$non_utf8_charset = 1;
+		}
+
+		if ($in_commit_log && $non_utf8_charset && $realfile =~ /^$/ &&
+		    $rawline =~ /$NON_ASCII_UTF8/) {
+			WARN("8-bit UTF-8 used in possible commit log\n" . $herecurr);
+		}
+
+# ignore non-hunk lines and lines being removed
+		next if (!$hunk_line || $line =~ /^-/);
+
+# ignore files that are being periodically imported from Linux
+		next if ($realfile =~ /^(linux-headers|include\/standard-headers)\//);
+
+#trailing whitespace
+		if ($line =~ /^\+.*\015/) {
+			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+			ERROR("DOS line endings\n" . $herevet);
+
+		} elsif ($realfile =~ /^docs\/.+\.txt/ ||
+			 $realfile =~ /^docs\/.+\.md/) {
+		    if ($rawline =~ /^\+\s+$/ && $rawline !~ /^\+ {4}$/) {
+			# TODO: properly check we're in a code block
+			#       (surrounding text is 4-column aligned)
+			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+			ERROR("code blocks in documentation should have " .
+			      "empty lines with exactly 4 columns of " .
+			      "whitespace\n" . $herevet);
+		    }
+		} elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
+			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+			ERROR("trailing whitespace\n" . $herevet);
+			$rpt_cleaners = 1;
+		}
+
+# checks for trace-events files
+		if ($realfile =~ /trace-events$/ && $line =~ /^\+/) {
+			if ($rawline =~ /%[-+ 0]*#/) {
+				ERROR("Don't use '#' flag of printf format ('%#') in " .
+				      "trace-events, use '0x' prefix instead\n" . $herecurr);
+			} else {
+				my $hex =
+					qr/%[-+ *.0-9]*([hljztL]|ll|hh)?(x|X|"\s*PRI[xX][^"]*"?)/;
+
+				# don't consider groups splitted by [.:/ ], like 2A.20:12ab
+				my $tmpline = $rawline;
+				$tmpline =~ s/($hex[.:\/ ])+$hex//g;
+
+				if ($tmpline =~ /(? 80)
+		{
+			if ($length > 90) {
+				ERROR("line over 90 characters\n" . $herecurr);
+			} else {
+				WARN("line over 80 characters\n" . $herecurr);
+			}
+		}
+
+# check for spaces before a quoted newline
+		if ($rawline =~ /^.*\".*\s\\n/) {
+			ERROR("unnecessary whitespace before a quoted newline\n" . $herecurr);
+		}
+
+# check for adding lines without a newline.
+		if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) {
+			ERROR("adding a line without newline at end of file\n" . $herecurr);
+		}
+
+# check for RCS/CVS revision markers
+		if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|\b)/) {
+			ERROR("CVS style keyword markers, these will _not_ be updated\n". $herecurr);
+		}
+
+# tabs are only allowed in assembly source code, and in
+# some scripts we imported from other projects.
+		next if ($realfile =~ /\.(s|S)$/);
+		next if ($realfile =~ /(checkpatch|get_maintainer)\.pl$/);
+
+		if ($rawline =~ /^\+.*\t/) {
+			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+			ERROR("code indent should never use tabs\n" . $herevet);
+			$rpt_cleaners = 1;
+		}
+
+# check we are in a valid C source file if not then ignore this hunk
+		next if ($realfile !~ /\.(h|c|cpp)$/);
+
+# Block comment styles
+
+		# Block comments use /* on a line of its own
+		if ($rawline !~ m@^\+.*/\*.*\*/[ \t)}]*$@ &&	#inline /*...*/
+		    $rawline =~ m@^\+.*/\*\*?+[ \t]*[^ \t]@) { # /* or /** non-blank
+			WARN("Block comments use a leading /* on a separate line\n" . $herecurr);
+		}
+
+# Block comments use * on subsequent lines
+		if ($prevline =~ /$;[ \t]*$/ &&			#ends in comment
+		    $prevrawline =~ /^\+.*?\/\*/ &&		#starting /*
+		    $prevrawline !~ /\*\/[ \t]*$/ &&		#no trailing */
+		    $rawline =~ /^\+/ &&			#line is new
+		    $rawline !~ /^\+[ \t]*\*/) {		#no leading *
+			WARN("Block comments use * on subsequent lines\n" . $hereprev);
+		}
+
+# Block comments use */ on trailing lines
+		if ($rawline !~ m@^\+[ \t]*\*/[ \t]*$@ &&	#trailing */
+		    $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ &&	#inline /*...*/
+		    $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ &&	#trailing **/
+		    $rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) {	#non blank */
+			WARN("Block comments use a trailing */ on a separate line\n" . $herecurr);
+		}
+
+# Block comment * alignment
+		if ($prevline =~ /$;[ \t]*$/ &&			#ends in comment
+		    $line =~ /^\+[ \t]*$;/ &&			#leading comment
+		    $rawline =~ /^\+[ \t]*\*/ &&		#leading *
+		    (($prevrawline =~ /^\+.*?\/\*/ &&		#leading /*
+		      $prevrawline !~ /\*\/[ \t]*$/) ||		#no trailing */
+		     $prevrawline =~ /^\+[ \t]*\*/)) {		#leading *
+			my $oldindent;
+			$prevrawline =~ m@^\+([ \t]*/?)\*@;
+			if (defined($1)) {
+				$oldindent = expand_tabs($1);
+			} else {
+				$prevrawline =~ m@^\+(.*/?)\*@;
+				$oldindent = expand_tabs($1);
+			}
+			$rawline =~ m@^\+([ \t]*)\*@;
+			my $newindent = $1;
+			$newindent = expand_tabs($newindent);
+			if (length($oldindent) ne length($newindent)) {
+				WARN("Block comments should align the * on each line\n" . $hereprev);
+			}
+		}
+
+# Check for potential 'bare' types
+		my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
+		    $realline_next);
+		if ($realcnt && $line =~ /.\s*\S/) {
+			($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+				ctx_statement_block($linenr, $realcnt, 0);
+			$stat =~ s/\n./\n /g;
+			$cond =~ s/\n./\n /g;
+
+			# Find the real next line.
+			$realline_next = $line_nr_next;
+			if (defined $realline_next &&
+			    (!defined $lines[$realline_next - 1] ||
+			     substr($lines[$realline_next - 1], $off_next) =~ /^\s*$/)) {
+				$realline_next++;
+			}
+
+			my $s = $stat;
+			$s =~ s/{.*$//s;
+
+			# Ignore goto labels.
+			if ($s =~ /$Ident:\*$/s) {
+
+			# Ignore functions being called
+			} elsif ($s =~ /^.\s*$Ident\s*\(/s) {
+
+			} elsif ($s =~ /^.\s*else\b/s) {
+
+			# declarations always start with types
+			} elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
+				my $type = $1;
+				$type =~ s/\s+/ /g;
+				possible($type, "A:" . $s);
+
+			# definitions in global scope can only start with types
+			} elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b\s*(?!:)/s) {
+				possible($1, "B:" . $s);
+			}
+
+			# any (foo ... *) is a pointer cast, and foo is a type
+			while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) {
+				possible($1, "C:" . $s);
+			}
+
+			# Check for any sort of function declaration.
+			# int foo(something bar, other baz);
+			# void (*store_gdt)(x86_descr_ptr *);
+			if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) {
+				my ($name_len) = length($1);
+
+				my $ctx = $s;
+				substr($ctx, 0, $name_len + 1, '');
+				$ctx =~ s/\)[^\)]*$//;
+
+				for my $arg (split(/\s*,\s*/, $ctx)) {
+					if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/s || $arg =~ /^($Ident)$/s) {
+
+						possible($1, "D:" . $s);
+					}
+				}
+			}
+
+		}
+
+#
+# Checks which may be anchored in the context.
+#
+
+# Check for switch () and associated case and default
+# statements should be at the same indent.
+		if ($line=~/\bswitch\s*\(.*\)/) {
+			my $err = '';
+			my $sep = '';
+			my @ctx = ctx_block_outer($linenr, $realcnt);
+			shift(@ctx);
+			for my $ctx (@ctx) {
+				my ($clen, $cindent) = line_stats($ctx);
+				if ($ctx =~ /^\+\s*(case\s+|default:)/ &&
+							$indent != $cindent) {
+					$err .= "$sep$ctx\n";
+					$sep = '';
+				} else {
+					$sep = "[...]\n";
+				}
+			}
+			if ($err ne '') {
+				ERROR("switch and case should be at the same indent\n$hereline$err");
+			}
+		}
+
+# if/while/etc brace do not go on next line, unless defining a do while loop,
+# or if that brace on the next line is for something else
+		if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+			my $pre_ctx = "$1$2";
+
+			my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
+			my $ctx_cnt = $realcnt - $#ctx - 1;
+			my $ctx = join("\n", @ctx);
+
+			my $ctx_ln = $linenr;
+			my $ctx_skip = $realcnt;
+
+			while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt &&
+					defined $lines[$ctx_ln - 1] &&
+					$lines[$ctx_ln - 1] =~ /^-/)) {
+				##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n";
+				$ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/);
+				$ctx_ln++;
+			}
+
+			#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+			#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
+
+			# The length of the "previous line" is checked against 80 because it
+			# includes the + at the beginning of the line (if the actual line has
+			# 79 or 80 characters, it is no longer possible to add a space and an
+			# opening brace there)
+			if ($#ctx == 0 && $ctx !~ /{\s*/ &&
+			    defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*\{/ &&
+			    defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) {
+				ERROR("that open brace { should be on the previous line\n" .
+					"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+			}
+			if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ &&
+			    $ctx =~ /\)\s*\;\s*$/ &&
+			    defined $lines[$ctx_ln - 1])
+			{
+				my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]);
+				if ($nindent > $indent) {
+					ERROR("trailing semicolon indicates no statements, indent implies otherwise\n" .
+						"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+				}
+			}
+		}
+
+# 'do ... while (0/false)' only makes sense in macros, without trailing ';'
+		if ($line =~ /while\s*\((0|false)\);/) {
+			ERROR("suspicious ; after while (0)\n" . $herecurr);
+		}
+
+# Check superfluous trailing ';'
+		if ($line =~ /;;$/) {
+			ERROR("superfluous trailing semicolon\n" . $herecurr);
+		}
+
+# Check relative indent for conditionals and blocks.
+		if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+			my ($s, $c) = ($stat, $cond);
+
+			substr($s, 0, length($c), '');
+
+			# Make sure we remove the line prefixes as we have
+			# none on the first line, and are going to re-add them
+			# where necessary.
+			$s =~ s/\n./\n/gs;
+
+			# Find out how long the conditional actually is.
+			my @newlines = ($c =~ /\n/gs);
+			my $cond_lines = 1 + $#newlines;
+
+			# We want to check the first line inside the block
+			# starting at the end of the conditional, so remove:
+			#  1) any blank line termination
+			#  2) any opening brace { on end of the line
+			#  3) any do (...) {
+			my $continuation = 0;
+			my $check = 0;
+			$s =~ s/^.*\bdo\b//;
+			$s =~ s/^\s*\{//;
+			if ($s =~ s/^\s*\\//) {
+				$continuation = 1;
+			}
+			if ($s =~ s/^\s*?\n//) {
+				$check = 1;
+				$cond_lines++;
+			}
+
+			# Also ignore a loop construct at the end of a
+			# preprocessor statement.
+			if (($prevline =~ /^.\s*#\s*define\s/ ||
+			    $prevline =~ /\\\s*$/) && $continuation == 0) {
+				$check = 0;
+			}
+
+			my $cond_ptr = -1;
+			$continuation = 0;
+			while ($cond_ptr != $cond_lines) {
+				$cond_ptr = $cond_lines;
+
+				# If we see an #else/#elif then the code
+				# is not linear.
+				if ($s =~ /^\s*\#\s*(?:else|elif)/) {
+					$check = 0;
+				}
+
+				# Ignore:
+				#  1) blank lines, they should be at 0,
+				#  2) preprocessor lines, and
+				#  3) labels.
+				if ($continuation ||
+				    $s =~ /^\s*?\n/ ||
+				    $s =~ /^\s*#\s*?/ ||
+				    $s =~ /^\s*$Ident\s*:/) {
+					$continuation = ($s =~ /^.*?\\\n/) ? 1 : 0;
+					if ($s =~ s/^.*?\n//) {
+						$cond_lines++;
+					}
+				}
+			}
+
+			my (undef, $sindent) = line_stats("+" . $s);
+			my $stat_real = raw_line($linenr, $cond_lines);
+
+			# Check if either of these lines are modified, else
+			# this is not this patch's fault.
+			if (!defined($stat_real) ||
+			    $stat !~ /^\+/ && $stat_real !~ /^\+/) {
+				$check = 0;
+			}
+			if (defined($stat_real) && $cond_lines > 1) {
+				$stat_real = "[...]\n$stat_real";
+			}
+
+			#print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n";
+
+			if ($check && (($sindent % 4) != 0 ||
+			    ($sindent <= $indent && $s ne ''))) {
+				ERROR("suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n");
+			}
+		}
+
+		# Track the 'values' across context and added lines.
+		my $opline = $line; $opline =~ s/^./ /;
+		my ($curr_values, $curr_vars) =
+				annotate_values($opline . "\n", $prev_values);
+		$curr_values = $prev_values . $curr_values;
+		if ($dbg_values) {
+			my $outline = $opline; $outline =~ s/\t/ /g;
+			print "$linenr > .$outline\n";
+			print "$linenr > $curr_values\n";
+			print "$linenr >  $curr_vars\n";
+		}
+		$prev_values = substr($curr_values, -1);
+
+#ignore lines not being added
+		if ($line=~/^[^\+]/) {next;}
+
+# TEST: allow direct testing of the type matcher.
+		if ($dbg_type) {
+			if ($line =~ /^.\s*$Declare\s*$/) {
+				ERROR("TEST: is type\n" . $herecurr);
+			} elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) {
+				ERROR("TEST: is not type ($1 is)\n". $herecurr);
+			}
+			next;
+		}
+# TEST: allow direct testing of the attribute matcher.
+		if ($dbg_attr) {
+			if ($line =~ /^.\s*$Modifier\s*$/) {
+				ERROR("TEST: is attr\n" . $herecurr);
+			} elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) {
+				ERROR("TEST: is not attr ($1 is)\n". $herecurr);
+			}
+			next;
+		}
+
+# check for initialisation to aggregates open brace on the next line
+		if ($line =~ /^.\s*\{/ &&
+		    $prevline =~ /(?:^|[^=])=\s*$/) {
+			ERROR("that open brace { should be on the previous line\n" . $hereprev);
+		}
+
+#
+# Checks which are anchored on the added line.
+#
+
+# check for malformed paths in #include statements (uses RAW line)
+		if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) {
+			my $path = $1;
+			if ($path =~ m{//}) {
+				ERROR("malformed #include filename\n" .
+					$herecurr);
+			}
+		}
+
+# no C99 // comments
+		if ($line =~ m{//} &&
+		    $rawline !~ m{// SPDX-License-Identifier: }) {
+			ERROR("do not use C99 // comments\n" . $herecurr);
+		}
+		# Remove C99 comments.
+		$line =~ s@//.*@@;
+		$opline =~ s@//.*@@;
+
+# check for global initialisers.
+		if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) {
+			ERROR("do not initialise globals to 0 or NULL\n" .
+				$herecurr);
+		}
+# check for static initialisers.
+		if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
+			ERROR("do not initialise statics to 0 or NULL\n" .
+				$herecurr);
+		}
+
+# * goes on variable not on type
+		# (char*[ const])
+		if ($line =~ m{\($NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)\)}) {
+			my ($from, $to) = ($1, $1);
+
+			# Should start with a space.
+			$to =~ s/^(\S)/ $1/;
+			# Should not end with a space.
+			$to =~ s/\s+$//;
+			# '*'s should not have spaces between.
+			while ($to =~ s/\*\s+\*/\*\*/) {
+			}
+
+			#print "from<$from> to<$to>\n";
+			if ($from ne $to) {
+				ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" .  $herecurr);
+			}
+		} elsif ($line =~ m{\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident)}) {
+			my ($from, $to, $ident) = ($1, $1, $2);
+
+			# Should start with a space.
+			$to =~ s/^(\S)/ $1/;
+			# Should not end with a space.
+			$to =~ s/\s+$//;
+			# '*'s should not have spaces between.
+			while ($to =~ s/\*\s+\*/\*\*/) {
+			}
+			# Modifiers should have spaces.
+			$to =~ s/(\b$Modifier$)/$1 /;
+
+			#print "from<$from> to<$to> ident<$ident>\n";
+			if ($from ne $to && $ident !~ /^$Modifier$/) {
+				ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" .  $herecurr);
+			}
+		}
+
+# function brace can't be on same line, except for #defines of do while,
+# or if closed on same line
+		if (($line=~/$Type\s*$Ident\(.*\).*\s\{/) and
+		    !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) {
+			ERROR("open brace '{' following function declarations go on the next line\n" . $herecurr);
+		}
+
+# open braces for enum, union and struct go on the same line.
+		if ($line =~ /^.\s*\{/ &&
+		    $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
+			ERROR("open brace '{' following $1 go on the same line\n" . $hereprev);
+		}
+
+# missing space after union, struct or enum definition
+		if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?(?:\s+$Ident)?[=\{]/) {
+		    ERROR("missing space after $1 definition\n" . $herecurr);
+		}
+
+# check for spacing round square brackets; allowed:
+#  1. with a type on the left -- int [] a;
+#  2. at the beginning of a line for slice initialisers -- [0...10] = 5,
+#  3. inside a curly brace -- = { [0...10] = 5 }
+#  4. after a comma -- [1] = 5, [2] = 6
+#  5. in a macro definition -- #define abc(x) [x] = y
+		while ($line =~ /(.*?\s)\[/g) {
+			my ($where, $prefix) = ($-[1], $1);
+			if ($prefix !~ /$Type\s+$/ &&
+			    ($where != 0 || $prefix !~ /^.\s+$/) &&
+			    $prefix !~ /\#\s*define[^(]*\([^)]*\)\s+$/ &&
+			    $prefix !~ /[,{:]\s+$/) {
+				ERROR("space prohibited before open square bracket '['\n" . $herecurr);
+			}
+		}
+
+# check for spaces between functions and their parentheses.
+		while ($line =~ /($Ident)\s+\(/g) {
+			my $name = $1;
+			my $ctx_before = substr($line, 0, $-[1]);
+			my $ctx = "$ctx_before$name";
+
+			# Ignore those directives where spaces _are_ permitted.
+			if ($name =~ /^(?:
+				if|for|while|switch|return|case|
+				volatile|__volatile__|coroutine_fn|
+				__attribute__|format|__extension__|
+				asm|__asm__)$/x)
+			{
+
+			# Ignore 'catch (...)' in C++
+			} elsif ($name =~ /^catch$/ && $realfile =~ /(\.cpp|\.h)$/) {
+
+			# cpp #define statements have non-optional spaces, ie
+			# if there is a space between the name and the open
+			# parenthesis it is simply not a parameter group.
+			} elsif ($ctx_before =~ /^.\s*\#\s*define\s*$/) {
+
+			# cpp #elif statement condition may start with a (
+			} elsif ($ctx =~ /^.\s*\#\s*elif\s*$/) {
+
+			# If this whole things ends with a type its most
+			# likely a typedef for a function.
+			} elsif ($ctx =~ /$Type$/) {
+
+			} else {
+				ERROR("space prohibited between function name and open parenthesis '('\n" . $herecurr);
+			}
+		}
+# Check operator spacing.
+		if (!($line=~/\#\s*include/)) {
+			my $ops = qr{
+				<<=|>>=|<=|>=|==|!=|
+				\+=|-=|\*=|\/=|%=|\^=|\|=|&=|
+				=>|->|<<|>>|<|>|=|!|~|
+				&&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%|
+				\?|::|:
+			}x;
+			my @elements = split(/($ops|;)/, $opline);
+			my $off = 0;
+
+			my $blank = copy_spacing($opline);
+
+			for (my $n = 0; $n < $#elements; $n += 2) {
+				$off += length($elements[$n]);
+
+				# Pick up the preceding and succeeding characters.
+				my $ca = substr($opline, 0, $off);
+				my $cc = '';
+				if (length($opline) >= ($off + length($elements[$n + 1]))) {
+					$cc = substr($opline, $off + length($elements[$n + 1]));
+				}
+				my $cb = "$ca$;$cc";
+
+				my $a = '';
+				$a = 'V' if ($elements[$n] ne '');
+				$a = 'W' if ($elements[$n] =~ /\s$/);
+				$a = 'C' if ($elements[$n] =~ /$;$/);
+				$a = 'B' if ($elements[$n] =~ /(\[|\()$/);
+				$a = 'O' if ($elements[$n] eq '');
+				$a = 'E' if ($ca =~ /^\s*$/);
+
+				my $op = $elements[$n + 1];
+
+				my $c = '';
+				if (defined $elements[$n + 2]) {
+					$c = 'V' if ($elements[$n + 2] ne '');
+					$c = 'W' if ($elements[$n + 2] =~ /^\s/);
+					$c = 'C' if ($elements[$n + 2] =~ /^$;/);
+					$c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
+					$c = 'O' if ($elements[$n + 2] eq '');
+					$c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/);
+				} else {
+					$c = 'E';
+				}
+
+				my $ctx = "${a}x${c}";
+
+				my $at = "(ctx:$ctx)";
+
+				my $ptr = substr($blank, 0, $off) . "^";
+				my $hereptr = "$hereline$ptr\n";
+
+				# Pull out the value of this operator.
+				my $op_type = substr($curr_values, $off + 1, 1);
+
+				# Get the full operator variant.
+				my $opv = $op . substr($curr_vars, $off, 1);
+
+				# Ignore operators passed as parameters.
+				if ($op_type ne 'V' &&
+				    $ca =~ /\s$/ && $cc =~ /^\s*,/) {
+
+#				# Ignore comments
+#				} elsif ($op =~ /^$;+$/) {
+
+				# ; should have either the end of line or a space or \ after it
+				} elsif ($op eq ';') {
+					if ($ctx !~ /.x[WEBC]/ &&
+					    $cc !~ /^\\/ && $cc !~ /^;/) {
+						ERROR("space required after that '$op' $at\n" . $hereptr);
+					}
+
+				# // is a comment
+				} elsif ($op eq '//') {
+
+				# Ignore : used in class declaration in C++
+				} elsif ($opv eq ':B' && $ctx =~ /Wx[WE]/ &&
+						 $line =~ /class/ && $realfile =~ /(\.cpp|\.h)$/) {
+
+				# No spaces for:
+				#   ->
+				#   :   when part of a bitfield
+				} elsif ($op eq '->' || $opv eq ':B') {
+					if ($ctx =~ /Wx.|.xW/) {
+						ERROR("spaces prohibited around that '$op' $at\n" . $hereptr);
+					}
+
+				# , must have a space on the right.
+                                # not required when having a single },{ on one line
+				} elsif ($op eq ',') {
+					if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/ &&
+                                            ($elements[$n] . $elements[$n + 2]) !~ " *}\\{") {
+						ERROR("space required after that '$op' $at\n" . $hereptr);
+					}
+
+				# '*' as part of a type definition -- reported already.
+				} elsif ($opv eq '*_') {
+					#warn "'*' is part of type\n";
+
+				# unary operators should have a space before and
+				# none after.  May be left adjacent to another
+				# unary operator, or a cast
+				} elsif ($op eq '!' || $op eq '~' ||
+					 $opv eq '*U' || $opv eq '-U' ||
+					 $opv eq '&U' || $opv eq '&&U') {
+					if ($op eq '~' && $ca =~ /::$/ && $realfile =~ /(\.cpp|\.h)$/) {
+						# '~' used as a name of Destructor
+
+					} elsif ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
+						ERROR("space required before that '$op' $at\n" . $hereptr);
+					}
+					if ($op eq '*' && $cc =~/\s*$Modifier\b/) {
+						# A unary '*' may be const
+
+					} elsif ($ctx =~ /.xW/) {
+						ERROR("space prohibited after that '$op' $at\n" . $hereptr);
+					}
+
+				# unary ++ and unary -- are allowed no space on one side.
+				} elsif ($op eq '++' or $op eq '--') {
+					if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) {
+						ERROR("space required one side of that '$op' $at\n" . $hereptr);
+					}
+					if ($ctx =~ /Wx[BE]/ ||
+					    ($ctx =~ /Wx./ && $cc =~ /^;/)) {
+						ERROR("space prohibited before that '$op' $at\n" . $hereptr);
+					}
+					if ($ctx =~ /ExW/) {
+						ERROR("space prohibited after that '$op' $at\n" . $hereptr);
+					}
+
+				# A colon needs no spaces before when it is
+				# terminating a case value or a label.
+				} elsif ($opv eq ':C' || $opv eq ':L') {
+					if ($ctx =~ /Wx./) {
+						ERROR("space prohibited before that '$op' $at\n" . $hereptr);
+					}
+
+				# All the others need spaces both sides.
+				} elsif ($ctx !~ /[EWC]x[CWE]/) {
+					my $ok = 0;
+
+					if ($realfile =~ /\.cpp|\.h$/) {
+						# Ignore template arguments <...> in C++
+						if (($op eq '<' || $op eq '>') && $line =~ /<.*>/) {
+							$ok = 1;
+						}
+
+						# Ignore :: in C++
+						if ($op eq '::') {
+							$ok = 1;
+						}
+					}
+
+					# Ignore email addresses 
+					if (($op eq '<' &&
+					     $cc =~ /^\S+\@\S+>/) ||
+					    ($op eq '>' &&
+					     $ca =~ /<\S+\@\S+$/))
+					{
+						$ok = 1;
+					}
+
+					# Ignore ?:
+					if (($opv eq ':O' && $ca =~ /\?$/) ||
+					    ($op eq '?' && $cc =~ /^:/)) {
+						$ok = 1;
+					}
+
+					if ($ok == 0) {
+						ERROR("spaces required around that '$op' $at\n" . $hereptr);
+					}
+				}
+				$off += length($elements[$n + 1]);
+			}
+		}
+
+#need space before brace following if, while, etc
+		if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) ||
+		    $line =~ /do\{/) {
+			ERROR("space required before the open brace '{'\n" . $herecurr);
+		}
+
+# closing brace should have a space following it when it has anything
+# on the line
+		if ($line =~ /}(?!(?:,|;|\)))\S/) {
+			ERROR("space required after that close brace '}'\n" . $herecurr);
+		}
+
+# check spacing on square brackets
+		if ($line =~ /\[\s/ && $line !~ /\[\s*$/) {
+			ERROR("space prohibited after that open square bracket '['\n" . $herecurr);
+		}
+		if ($line =~ /\s\]/) {
+			ERROR("space prohibited before that close square bracket ']'\n" . $herecurr);
+		}
+
+# check spacing on parentheses
+		if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ &&
+		    $line !~ /for\s*\(\s+;/) {
+			ERROR("space prohibited after that open parenthesis '('\n" . $herecurr);
+		}
+		if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ &&
+		    $line !~ /for\s*\(.*;\s+\)/ &&
+		    $line !~ /:\s+\)/) {
+			ERROR("space prohibited before that close parenthesis ')'\n" . $herecurr);
+		}
+
+# Return is not a function.
+		if (defined($stat) && $stat =~ /^.\s*return(\s*)(\(.*);/s) {
+			my $spacing = $1;
+			my $value = $2;
+
+			# Flatten any parentheses
+			$value =~ s/\(/ \(/g;
+			$value =~ s/\)/\) /g;
+			while ($value =~ s/\[[^\{\}]*\]/1/ ||
+			       $value !~ /(?:$Ident|-?$Constant)\s*
+					     $Compare\s*
+					     (?:$Ident|-?$Constant)/x &&
+			       $value =~ s/\([^\(\)]*\)/1/) {
+			}
+#print "value<$value>\n";
+			if ($value =~ /^\s*(?:$Ident|-?$Constant)\s*$/ &&
+			    $line =~ /;$/) {
+				ERROR("return is not a function, parentheses are not required\n" . $herecurr);
+
+			} elsif ($spacing !~ /\s+/) {
+				ERROR("space required before the open parenthesis '('\n" . $herecurr);
+			}
+		}
+# Return of what appears to be an errno should normally be -'ve
+		if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) {
+			my $name = $1;
+			if ($name ne 'EOF' && $name ne 'ERROR') {
+				ERROR("return of an errno should typically be -ve (return -$1)\n" . $herecurr);
+			}
+		}
+
+		if ($line =~ /^.\s*(Q(?:S?LIST|SIMPLEQ|TAILQ)_HEAD)\s*\(\s*[^,]/ &&
+		    $line !~ /^.typedef/) {
+		    ERROR("named $1 should be typedefed separately\n" . $herecurr);
+		}
+
+# Need a space before open parenthesis after if, while etc
+		if ($line=~/\b(if|while|for|switch)\(/) {
+			ERROR("space required before the open parenthesis '('\n" . $herecurr);
+		}
+
+# Check for illegal assignment in if conditional -- and check for trailing
+# statements after the conditional.
+		if ($line =~ /do\s*(?!{)/) {
+			my ($stat_next) = ctx_statement_block($line_nr_next,
+						$remain_next, $off_next);
+			$stat_next =~ s/\n./\n /g;
+			##print "stat<$stat> stat_next<$stat_next>\n";
+
+			if ($stat_next =~ /^\s*while\b/) {
+				# If the statement carries leading newlines,
+				# then count those as offsets.
+				my ($whitespace) =
+					($stat_next =~ /^((?:\s*\n[+-])*\s*)/s);
+				my $offset =
+					statement_rawlines($whitespace) - 1;
+
+				$suppress_whiletrailers{$line_nr_next +
+								$offset} = 1;
+			}
+		}
+		if (!defined $suppress_whiletrailers{$linenr} &&
+		    $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
+			my ($s, $c) = ($stat, $cond);
+
+			if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) {
+				ERROR("do not use assignment in if condition\n" . $herecurr);
+			}
+
+			# Find out what is on the end of the line after the
+			# conditional.
+			substr($s, 0, length($c), '');
+			$s =~ s/\n.*//g;
+			$s =~ s/$;//g; 	# Remove any comments
+			if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ &&
+			    $c !~ /}\s*while\s*/)
+			{
+				# Find out how long the conditional actually is.
+				my @newlines = ($c =~ /\n/gs);
+				my $cond_lines = 1 + $#newlines;
+				my $stat_real = '';
+
+				$stat_real = raw_line($linenr, $cond_lines)
+							. "\n" if ($cond_lines);
+				if (defined($stat_real) && $cond_lines > 1) {
+					$stat_real = "[...]\n$stat_real";
+				}
+
+				ERROR("trailing statements should be on next line\n" . $herecurr . $stat_real);
+			}
+		}
+
+# Check for bitwise tests written as boolean
+		if ($line =~ /
+			(?:
+				(?:\[|\(|\&\&|\|\|)
+				\s*0[xX][0-9]+\s*
+				(?:\&\&|\|\|)
+			|
+				(?:\&\&|\|\|)
+				\s*0[xX][0-9]+\s*
+				(?:\&\&|\|\||\)|\])
+			)/x)
+		{
+			ERROR("boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr);
+		}
+
+# if and else should not have general statements after it
+		if ($line =~ /^.\s*(?:}\s*)?else\b(.*)/) {
+			my $s = $1;
+			$s =~ s/$;//g; 	# Remove any comments
+			if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) {
+				ERROR("trailing statements should be on next line\n" . $herecurr);
+			}
+		}
+# if should not continue a brace
+		if ($line =~ /}\s*if\b/) {
+			ERROR("trailing statements should be on next line\n" .
+				$herecurr);
+		}
+# case and default should not have general statements after them
+		if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g &&
+		    $line !~ /\G(?:
+			(?:\s*$;*)(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$|
+			\s*return\s+
+		    )/xg)
+		{
+			ERROR("trailing statements should be on next line\n" . $herecurr);
+		}
+
+		# Check for }else {, these must be at the same
+		# indent level to be relevant to each other.
+		if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
+						$previndent == $indent) {
+			ERROR("else should follow close brace '}'\n" . $hereprev);
+		}
+
+		if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
+						$previndent == $indent) {
+			my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
+
+			# Find out what is on the end of the line after the
+			# conditional.
+			substr($s, 0, length($c), '');
+			$s =~ s/\n.*//g;
+
+			if ($s =~ /^\s*;/) {
+				ERROR("while should follow close brace '}'\n" . $hereprev);
+			}
+		}
+
+#studly caps, commented out until figure out how to distinguish between use of existing and adding new
+#		if (($line=~/[\w_][a-z\d]+[A-Z]/) and !($line=~/print/)) {
+#		    print "No studly caps, use _\n";
+#		    print "$herecurr";
+#		    $clean = 0;
+#		}
+
+#no spaces allowed after \ in define
+		if ($line=~/\#\s*define.*\\\s$/) {
+			ERROR("Whitespace after \\ makes next lines useless\n" . $herecurr);
+		}
+
+# multi-statement macros should be enclosed in a do while loop, grab the
+# first statement and ensure its the whole macro if its not enclosed
+# in a known good container
+		if ($realfile !~ m@/vmlinux.lds.h$@ &&
+		    $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) {
+			my $ln = $linenr;
+			my $cnt = $realcnt;
+			my ($off, $dstat, $dcond, $rest);
+			my $ctx = '';
+
+			my $args = defined($1);
+
+			# Find the end of the macro and limit our statement
+			# search to that.
+			while ($cnt > 0 && defined $lines[$ln - 1] &&
+				$lines[$ln - 1] =~ /^(?:-|..*\\$)/)
+			{
+				$ctx .= $rawlines[$ln - 1] . "\n";
+				$cnt-- if ($lines[$ln - 1] !~ /^-/);
+				$ln++;
+			}
+			$ctx .= $rawlines[$ln - 1];
+
+			($dstat, $dcond, $ln, $cnt, $off) =
+				ctx_statement_block($linenr, $ln - $linenr + 1, 0);
+			#print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n";
+			#print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n";
+
+			# Extract the remainder of the define (if any) and
+			# rip off surrounding spaces, and trailing \'s.
+			$rest = '';
+			while ($off != 0 || ($cnt > 0 && $rest =~ /\\\s*$/)) {
+				#print "ADDING cnt<$cnt> $off <" . substr($lines[$ln - 1], $off) . "> rest<$rest>\n";
+				if ($off != 0 || $lines[$ln - 1] !~ /^-/) {
+					$rest .= substr($lines[$ln - 1], $off) . "\n";
+					$cnt--;
+				}
+				$ln++;
+				$off = 0;
+			}
+			$rest =~ s/\\\n.//g;
+			$rest =~ s/^\s*//s;
+			$rest =~ s/\s*$//s;
+
+			# Clean up the original statement.
+			if ($args) {
+				substr($dstat, 0, length($dcond), '');
+			} else {
+				$dstat =~ s/^.\s*\#\s*define\s+$Ident\s*//;
+			}
+			$dstat =~ s/$;//g;
+			$dstat =~ s/\\\n.//g;
+			$dstat =~ s/^\s*//s;
+			$dstat =~ s/\s*$//s;
+
+			# Flatten any parentheses and braces
+			while ($dstat =~ s/\([^\(\)]*\)/1/ ||
+			       $dstat =~ s/\{[^\{\}]*\}/1/ ||
+			       $dstat =~ s/\[[^\{\}]*\]/1/)
+			{
+			}
+
+			my $exceptions = qr{
+				$Declare|
+				module_param_named|
+				MODULE_PARAM_DESC|
+				DECLARE_PER_CPU|
+				DEFINE_PER_CPU|
+				__typeof__\(|
+				union|
+				struct|
+				\.$Ident\s*=\s*|
+				^\"|\"$
+			}x;
+			#print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
+			if ($rest ne '' && $rest ne ',') {
+				if ($rest !~ /while\s*\(/ &&
+				    $dstat !~ /$exceptions/)
+				{
+					ERROR("Macros with multiple statements should be enclosed in a do - while loop\n" . "$here\n$ctx\n");
+				}
+
+			} elsif ($ctx !~ /;/) {
+				if ($dstat ne '' &&
+				    $dstat !~ /^(?:$Ident|-?$Constant)$/ &&
+				    $dstat !~ /$exceptions/ &&
+				    $dstat !~ /^\.$Ident\s*=/ &&
+				    $dstat =~ /$Operators/)
+				{
+					ERROR("Macros with complex values should be enclosed in parenthesis\n" . "$here\n$ctx\n");
+				}
+			}
+		}
+
+# check for missing bracing around if etc
+		if ($line =~ /(^.*)\b(?:if|while|for)\b/ &&
+			$line !~ /\#\s*if/) {
+			my $allowed = 0;
+
+			# Check the pre-context.
+			if ($line =~ /(\}.*?)$/) {
+				my $pre = $1;
+
+				if ($line !~ /else/) {
+					print "APW: ALLOWED: pre<$pre> line<$line>\n"
+						if $dbg_adv_apw;
+					$allowed = 1;
+				}
+			}
+			my ($level, $endln, @chunks) =
+				ctx_statement_full($linenr, $realcnt, 1);
+                        if ($dbg_adv_apw) {
+                            print "APW: chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n";
+                            print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n"
+                                if $#chunks >= 1;
+                        }
+			if ($#chunks >= 0 && $level == 0) {
+				my $seen = 0;
+				my $herectx = $here . "\n";
+				my $ln = $linenr - 1;
+				for my $chunk (@chunks) {
+					my ($cond, $block) = @{$chunk};
+
+					# If the condition carries leading newlines, then count those as offsets.
+					my ($whitespace) = ($cond =~ /^((?:\s*\n[+-])*\s*)/s);
+					my $offset = statement_rawlines($whitespace) - 1;
+
+					#print "COND<$cond> whitespace<$whitespace> offset<$offset>\n";
+
+					# We have looked at and allowed this specific line.
+					$suppress_ifbraces{$ln + $offset} = 1;
+
+					$herectx .= "$rawlines[$ln + $offset]\n[...]\n";
+					$ln += statement_rawlines($block) - 1;
+
+					substr($block, 0, length($cond), '');
+
+					my $spaced_block = $block;
+					$spaced_block =~ s/\n\+/ /g;
+
+					$seen++ if ($spaced_block =~ /^\s*\{/);
+
+                                        print "APW: cond<$cond> block<$block> allowed<$allowed>\n"
+                                            if $dbg_adv_apw;
+					if (statement_lines($cond) > 1) {
+                                            print "APW: ALLOWED: cond<$cond>\n"
+                                                if $dbg_adv_apw;
+                                            $allowed = 1;
+					}
+					if ($block =~/\b(?:if|for|while)\b/) {
+                                            print "APW: ALLOWED: block<$block>\n"
+                                                if $dbg_adv_apw;
+                                            $allowed = 1;
+					}
+					if (statement_block_size($block) > 1) {
+                                            print "APW: ALLOWED: lines block<$block>\n"
+                                                if $dbg_adv_apw;
+                                            $allowed = 1;
+					}
+				}
+				if ($seen != ($#chunks + 1) && !$allowed) {
+					ERROR("braces {} are necessary for all arms of this statement\n" . $herectx);
+				}
+			}
+		}
+		if (!defined $suppress_ifbraces{$linenr - 1} &&
+					$line =~ /\b(if|while|for|else)\b/ &&
+					$line !~ /\#\s*if/ &&
+					$line !~ /\#\s*else/) {
+			my $allowed = 0;
+
+                        # Check the pre-context.
+                        if (substr($line, 0, $-[0]) =~ /(\}\s*)$/) {
+                            my $pre = $1;
+
+                            if ($line !~ /else/) {
+                                print "APW: ALLOWED: pre<$pre> line<$line>\n"
+                                    if $dbg_adv_apw;
+                                $allowed = 1;
+                            }
+                        }
+
+			my ($level, $endln, @chunks) =
+				ctx_statement_full($linenr, $realcnt, $-[0]);
+
+			# Check the condition.
+			my ($cond, $block) = @{$chunks[0]};
+                        print "CHECKING<$linenr> cond<$cond> block<$block>\n"
+                            if $dbg_adv_checking;
+			if (defined $cond) {
+				substr($block, 0, length($cond), '');
+			}
+			if (statement_lines($cond) > 1) {
+                            print "APW: ALLOWED: cond<$cond>\n"
+                                if $dbg_adv_apw;
+                            $allowed = 1;
+			}
+			if ($block =~/\b(?:if|for|while)\b/) {
+                            print "APW: ALLOWED: block<$block>\n"
+                                if $dbg_adv_apw;
+                            $allowed = 1;
+			}
+			if (statement_block_size($block) > 1) {
+                            print "APW: ALLOWED: lines block<$block>\n"
+                                if $dbg_adv_apw;
+                            $allowed = 1;
+			}
+			# Check the post-context.
+			if (defined $chunks[1]) {
+				my ($cond, $block) = @{$chunks[1]};
+				if (defined $cond) {
+					substr($block, 0, length($cond), '');
+				}
+				if ($block =~ /^\s*\{/) {
+                                    print "APW: ALLOWED: chunk-1 block<$block>\n"
+                                        if $dbg_adv_apw;
+                                    $allowed = 1;
+				}
+			}
+                        print "DCS: level=$level block<$block> allowed=$allowed\n"
+                            if $dbg_adv_dcs;
+			if ($level == 0 && $block !~ /^\s*\{/ && !$allowed) {
+				my $herectx = $here . "\n";;
+				my $cnt = statement_rawlines($block);
+
+				for (my $n = 0; $n < $cnt; $n++) {
+					$herectx .= raw_line($linenr, $n) . "\n";;
+				}
+
+				ERROR("braces {} are necessary even for single statement blocks\n" . $herectx);
+			}
+		}
+
+# no volatiles please
+		my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
+		if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/ &&
+                    $line !~ /sig_atomic_t/ &&
+                    !ctx_has_comment($first_line, $linenr)) {
+			my $msg = "Use of volatile is usually wrong, please add a comment\n" . $herecurr;
+                        ERROR($msg);
+		}
+
+# warn about #if 0
+		if ($line =~ /^.\s*\#\s*if\s+0\b/) {
+			ERROR("if this code is redundant consider removing it\n" .
+				$herecurr);
+		}
+
+# check for needless g_free() checks
+		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
+			my $expr = $1;
+			if ($line =~ /\bg_free\(\Q$expr\E\);/) {
+				ERROR("g_free(NULL) is safe this check is probably not required\n" . $hereprev);
+			}
+		}
+
+# warn about #ifdefs in C files
+#		if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) {
+#			print "#ifdef in C files should be avoided\n";
+#			print "$herecurr";
+#			$clean = 0;
+#		}
+
+# warn about spacing in #ifdefs
+		if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) {
+			ERROR("exactly one space required after that #$1\n" . $herecurr);
+		}
+# check for memory barriers without a comment.
+		if ($line =~ /\b(smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) {
+			if (!ctx_has_comment($first_line, $linenr)) {
+				ERROR("memory barrier without comment\n" . $herecurr);
+			}
+		}
+# check of hardware specific defines
+# we have e.g. CONFIG_LINUX and CONFIG_WIN32 for common cases
+# where they might be necessary.
+		if ($line =~ m@^.\s*\#\s*if.*\b__@) {
+			WARN("architecture specific defines should be avoided\n" .  $herecurr);
+		}
+
+# Check that the storage class is at the beginning of a declaration
+		if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+			ERROR("storage class should be at the beginning of the declaration\n" . $herecurr)
+		}
+
+# check the location of the inline attribute, that it is between
+# storage class and type.
+		if ($line =~ /\b$Type\s+$Inline\b/ ||
+		    $line =~ /\b$Inline\s+$Storage\b/) {
+			ERROR("inline keyword should sit between storage class and type\n" . $herecurr);
+		}
+
+# check for sizeof(&)
+		if ($line =~ /\bsizeof\s*\(\s*\&/) {
+			ERROR("sizeof(& should be avoided\n" . $herecurr);
+		}
+
+# check for new externs in .c files.
+		if ($realfile =~ /\.c$/ && defined $stat &&
+		    $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s)
+		{
+			my $function_name = $1;
+			my $paren_space = $2;
+
+			my $s = $stat;
+			if (defined $cond) {
+				substr($s, 0, length($cond), '');
+			}
+			if ($s =~ /^\s*;/ &&
+			    $function_name ne 'uninitialized_var')
+			{
+				ERROR("externs should be avoided in .c files\n" .  $herecurr);
+			}
+
+			if ($paren_space =~ /\n/) {
+				ERROR("arguments for function declarations should follow identifier\n" . $herecurr);
+			}
+
+		} elsif ($realfile =~ /\.c$/ && defined $stat &&
+		    $stat =~ /^.\s*extern\s+/)
+		{
+			ERROR("externs should be avoided in .c files\n" .  $herecurr);
+		}
+
+# check for pointless casting of g_malloc return
+		if ($line =~ /\*\s*\)\s*g_(try)?(m|re)alloc(0?)(_n)?\b/) {
+			if ($2 == 'm') {
+				ERROR("unnecessary cast may hide bugs, use g_$1new$3 instead\n" . $herecurr);
+			} else {
+				ERROR("unnecessary cast may hide bugs, use g_$1renew$3 instead\n" . $herecurr);
+			}
+		}
+
+# check for gcc specific __FUNCTION__
+		if ($line =~ /__FUNCTION__/) {
+			ERROR("__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr);
+		}
+
+# recommend g_path_get_* over g_strdup(basename/dirname(...))
+		if ($line =~ /\bg_strdup\s*\(\s*(basename|dirname)\s*\(/) {
+			WARN("consider using g_path_get_$1() in preference to g_strdup($1())\n" . $herecurr);
+		}
+
+# recommend qemu_strto* over strto* for numeric conversions
+		if ($line =~ /\b(strto[^kd].*?)\s*\(/) {
+			ERROR("consider using qemu_$1 in preference to $1\n" . $herecurr);
+		}
+# recommend sigaction over signal for portability, when establishing a handler
+		if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
+			ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
+		}
+# check for module_init(), use category-specific init macros explicitly please
+		if ($line =~ /^module_init\s*\(/) {
+			ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
+		}
+# check for various ops structs, ensure they are const.
+		my $struct_ops = qr{AIOCBInfo|
+				BdrvActionOps|
+				BlockDevOps|
+				BlockJobDriver|
+				DisplayChangeListenerOps|
+				GraphicHwOps|
+				IDEDMAOps|
+				KVMCapabilityInfo|
+				MemoryRegionIOMMUOps|
+				MemoryRegionOps|
+				MemoryRegionPortio|
+				QEMUFileOps|
+				SCSIBusInfo|
+				SCSIReqOps|
+				Spice[A-Z][a-zA-Z0-9]*Interface|
+				USBDesc[A-Z][a-zA-Z0-9]*|
+				VhostOps|
+				VMStateDescription|
+				VMStateInfo}x;
+		if ($line !~ /\bconst\b/ &&
+		    $line =~ /\b($struct_ops)\b.*=/) {
+			ERROR("initializer for struct $1 should normally be const\n" .
+				$herecurr);
+		}
+
+# format strings checks
+		my $string;
+		while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
+			$string = substr($rawline, $-[1], $+[1] - $-[1]);
+			$string =~ s/%%/__/g;
+			# check for %L{u,d,i} in strings
+			if ($string =~ /(?= 0) & $rawlines[$i] =~ /$continued_str_literal/) {
+			$i--;
+		}
+
+		if ($rawlines[$i] =~ /\b(?:$qemu_error_funcs)\s*\(/) {
+			ERROR("Error messages should not contain newlines\n" . $herecurr);
+		}
+	}
+
+# check for non-portable libc calls that have portable alternatives in QEMU
+		if ($line =~ /\bffs\(/) {
+			ERROR("use ctz32() instead of ffs()\n" . $herecurr);
+		}
+		if ($line =~ /\bffsl\(/) {
+			ERROR("use ctz32() or ctz64() instead of ffsl()\n" . $herecurr);
+		}
+		if ($line =~ /\bffsll\(/) {
+			ERROR("use ctz64() instead of ffsll()\n" . $herecurr);
+		}
+		if ($line =~ /\bbzero\(/) {
+			ERROR("use memset() instead of bzero()\n" . $herecurr);
+		}
+		if ($line =~ /\bgetpagesize\(\)/) {
+			ERROR("use qemu_real_host_page_size instead of getpagesize()\n" . $herecurr);
+		}
+		if ($line =~ /\bsysconf\(_SC_PAGESIZE\)/) {
+			ERROR("use qemu_real_host_page_size instead of sysconf(_SC_PAGESIZE)\n" . $herecurr);
+		}
+		my $non_exit_glib_asserts = qr{g_assert_cmpstr|
+						g_assert_cmpint|
+						g_assert_cmpuint|
+						g_assert_cmphex|
+						g_assert_cmpfloat|
+						g_assert_true|
+						g_assert_false|
+						g_assert_nonnull|
+						g_assert_null|
+						g_assert_no_error|
+						g_assert_error|
+						g_test_assert_expected_messages|
+						g_test_trap_assert_passed|
+						g_test_trap_assert_stdout|
+						g_test_trap_assert_stdout_unmatched|
+						g_test_trap_assert_stderr|
+						g_test_trap_assert_stderr_unmatched}x;
+		if ($realfile !~ /^tests\// &&
+			$line =~ /\b(?:$non_exit_glib_asserts)\(/) {
+			ERROR("Use g_assert or g_assert_not_reached\n". $herecurr);
+		}
+	}
+
+	if ($is_patch && $chk_signoff && $signoff == 0) {
+		ERROR("Missing Signed-off-by: line(s)\n");
+	}
+
+	# If we have no input at all, then there is nothing to report on
+	# so just keep quiet.
+	if ($#rawlines == -1) {
+		return 1;
+	}
+
+	# In mailback mode only produce a report in the negative, for
+	# things that appear to be patches.
+	if ($mailback && ($clean == 1 || !$is_patch)) {
+		return 1;
+	}
+
+	# This is not a patch, and we are are in 'no-patch' mode so
+	# just keep quiet.
+	if (!$chk_patch && !$is_patch) {
+		return 1;
+	}
+
+	if (!$is_patch && $filename !~ /cover-letter\.patch$/) {
+		ERROR("Does not appear to be a unified-diff format patch\n");
+	}
+
+	print report_dump();
+	if ($summary && !($clean == 1 && $quiet == 1)) {
+		print "$filename " if ($summary_file);
+		print "total: $cnt_error errors, $cnt_warn warnings, " .
+			"$cnt_lines lines checked\n";
+		print "\n" if ($quiet == 0);
+	}
+
+	if ($quiet == 0) {
+		# If there were whitespace errors which cleanpatch can fix
+		# then suggest that.
+#		if ($rpt_cleaners) {
+#			print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n";
+#			print "      scripts/cleanfile\n\n";
+#		}
+	}
+
+	if ($clean == 1 && $quiet == 0) {
+		print "$vname has no obvious style problems and is ready for submission.\n"
+	}
+	if ($clean == 0 && $quiet == 0) {
+		print "$vname has style problems, please review.  If any of these errors\n";
+		print "are false positives report them to the maintainer, see\n";
+		print "CHECKPATCH in MAINTAINERS.\n";
+	}
+
+	return ($no_warnings ? $clean : $cnt_error == 0);
+}
diff --git a/scripts/ci/gitlab-pipeline-status b/scripts/ci/gitlab-pipeline-status
new file mode 100755
index 000000000..78e72f600
--- /dev/null
+++ b/scripts/ci/gitlab-pipeline-status
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2019-2020 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+"""
+Checks the GitLab pipeline status for a given commit ID
+"""
+
+# pylint: disable=C0103
+
+import argparse
+import http.client
+import json
+import os
+import subprocess
+import time
+import sys
+
+
+class CommunicationFailure(Exception):
+    """Failed to communicate to gitlab.com APIs."""
+
+
+class NoPipelineFound(Exception):
+    """Communication is successfull but pipeline is not found."""
+
+
+def get_local_branch_commit(branch):
+    """
+    Returns the commit sha1 for the *local* branch named "staging"
+    """
+    result = subprocess.run(['git', 'rev-parse', branch],
+                            stdin=subprocess.DEVNULL,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.DEVNULL,
+                            cwd=os.path.dirname(__file__),
+                            universal_newlines=True).stdout.strip()
+    if result == branch:
+        raise ValueError("There's no local branch named '%s'" % branch)
+    if len(result) != 40:
+        raise ValueError("Branch '%s' HEAD doesn't look like a sha1" % branch)
+    return result
+
+
+def get_pipeline_status(project_id, commit_sha1):
+    """
+    Returns the JSON content of the pipeline status API response
+    """
+    url = '/api/v4/projects/{}/pipelines?sha={}'.format(project_id,
+                                                        commit_sha1)
+    connection = http.client.HTTPSConnection('gitlab.com')
+    connection.request('GET', url=url)
+    response = connection.getresponse()
+    if response.code != http.HTTPStatus.OK:
+        raise CommunicationFailure("Failed to receive a successful response")
+    json_response = json.loads(response.read())
+
+    # As far as I can tell, there should be only one pipeline for the same
+    # project + commit. If this assumption is false, we can add further
+    # filters to the url, such as username, and order_by.
+    if not json_response:
+        raise NoPipelineFound("No pipeline found")
+    return json_response[0]
+
+
+def wait_on_pipeline_success(timeout, interval,
+                             project_id, commit_sha):
+    """
+    Waits for the pipeline to finish within the given timeout
+    """
+    start = time.time()
+    while True:
+        if time.time() >= (start + timeout):
+            msg = ("Timeout (-t/--timeout) of %i seconds reached, "
+                   "won't wait any longer for the pipeline to complete")
+            msg %= timeout
+            print(msg)
+            return False
+
+        try:
+            status = get_pipeline_status(project_id, commit_sha)
+        except NoPipelineFound:
+            print('Pipeline has not been found, it may not have been created yet.')
+            time.sleep(1)
+            continue
+
+        pipeline_status = status['status']
+        status_to_wait = ('created', 'waiting_for_resource', 'preparing',
+                          'pending', 'running')
+        if pipeline_status in status_to_wait:
+            print('%s...' % pipeline_status)
+            time.sleep(interval)
+            continue
+
+        if pipeline_status == 'success':
+            return True
+
+        msg = "Pipeline failed, check: %s" % status['web_url']
+        print(msg)
+        return False
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(
+        prog='pipeline-status',
+        description='check or wait on a pipeline status')
+
+    parser.add_argument('-t', '--timeout', type=int, default=7200,
+                        help=('Amount of time (in seconds) to wait for the '
+                              'pipeline to complete.  Defaults to '
+                              '%(default)s'))
+    parser.add_argument('-i', '--interval', type=int, default=60,
+                        help=('Amount of time (in seconds) to wait between '
+                              'checks of the pipeline status.  Defaults '
+                              'to %(default)s'))
+    parser.add_argument('-w', '--wait', action='store_true', default=False,
+                        help=('Wether to wait, instead of checking only once '
+                              'the status of a pipeline'))
+    parser.add_argument('-p', '--project-id', type=int, default=11167699,
+                        help=('The GitLab project ID. Defaults to the project '
+                              'for https://gitlab.com/qemu-project/qemu, that '
+                              'is, "%(default)s"'))
+    parser.add_argument('-b', '--branch', type=str, default="staging",
+                        help=('Specify the branch to check. '
+                              'Use HEAD for your current branch. '
+                              'Otherwise looks at "%(default)s"'))
+    parser.add_argument('-c', '--commit',
+                        default=None,
+                        help=('Look for a pipeline associated with the given '
+                              'commit.  If one is not explicitly given, the '
+                              'commit associated with the default branch '
+                              'is used.'))
+    parser.add_argument('--verbose', action='store_true', default=False,
+                        help=('A minimal verbosity level that prints the '
+                              'overall result of the check/wait'))
+    return parser
+
+def main():
+    """
+    Script entry point
+    """
+    parser = create_parser()
+    args = parser.parse_args()
+
+    if not args.commit:
+        args.commit = get_local_branch_commit(args.branch)
+
+    success = False
+    try:
+        if args.wait:
+            success = wait_on_pipeline_success(
+                args.timeout,
+                args.interval,
+                args.project_id,
+                args.commit)
+        else:
+            status = get_pipeline_status(args.project_id,
+                                         args.commit)
+            success = status['status'] == 'success'
+    except Exception as error:      # pylint: disable=W0703
+        if args.verbose:
+            print("ERROR: %s" % error.args[0])
+    except KeyboardInterrupt:
+        if args.verbose:
+            print("Exiting on user's request")
+
+    if success:
+        if args.verbose:
+            print('success')
+        sys.exit(0)
+    else:
+        if args.verbose:
+            print('failure')
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/clean-header-guards.pl b/scripts/clean-header-guards.pl
new file mode 100755
index 000000000..a6680253b
--- /dev/null
+++ b/scripts/clean-header-guards.pl
@@ -0,0 +1,216 @@
+#!/usr/bin/env perl
+#
+# Clean up include guards in headers
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# (at your option) any later version. See the COPYING file in the
+# top-level directory.
+#
+# Usage: scripts/clean-header-guards.pl [OPTION]... [FILE]...
+#     -c CC     Use a compiler other than cc
+#     -n        Suppress actual cleanup
+#     -v        Show which files are cleaned up, and which are skipped
+#
+# Does the following:
+# - Header files without a recognizable header guard are skipped.
+# - Clean up any untidy header guards in-place.  Warn if the cleanup
+#   renames guard symbols, and explain how to find occurrences of these
+#   symbols that may have to be updated manually.
+# - Warn about duplicate header guard symbols.  To make full use of
+#   this warning, you should clean up *all* headers in one run.
+# - Warn when preprocessing a header with its guard symbol defined
+#   produces anything but whitespace.  The preprocessor is run like
+#   "cc -E -DGUARD_H -c -P -", and fed the test program on stdin.
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+# Stuff we don't want to clean because we import it into our tree:
+my $exclude = qr,^(disas/libvixl/|include/standard-headers/
+    |linux-headers/|pc-bios/|tests/tcg/|tests/multiboot/),x;
+# Stuff that is expected to fail the preprocessing test:
+my $exclude_cpp = qr,^include/libdecnumber/decNumberLocal.h,;
+
+my %guarded = ();
+my %old_guard = ();
+
+our $opt_c = "cc";
+our $opt_n = 0;
+our $opt_v = 0;
+getopts("c:nv");
+
+sub skipping {
+    my ($fname, $msg, $line1, $line2) = @_;
+
+    return if !$opt_v or $fname =~ $exclude;
+    print "$fname skipped: $msg\n";
+    print "    $line1" if defined $line1;
+    print "    $line2" if defined $line2;
+}
+
+sub gripe {
+    my ($fname, $msg) = @_;
+    return if $fname =~ $exclude;
+    print STDERR "$fname: warning: $msg\n";
+}
+
+sub slurp {
+    my ($fname) = @_;
+    local $/;                   # slurp
+    open(my $in, "<", $fname)
+        or die "can't open $fname for reading: $!";
+    return <$in>;
+}
+
+sub unslurp {
+    my ($fname, $contents) = @_;
+    open (my $out, ">", $fname)
+        or die "can't open $fname for writing: $!";
+    print $out $contents
+        or die "error writing $fname: $!";
+    close $out
+        or die "error writing $fname: $!";
+}
+
+sub fname2guard {
+    my ($fname) = @_;
+    $fname =~ tr/a-z/A-Z/;
+    $fname =~ tr/A-Z0-9/_/cs;
+    return $fname;
+}
+
+sub preprocess {
+    my ($fname, $guard) = @_;
+
+    open(my $pipe, "-|", "$opt_c -E -D$guard -c -P - <$fname")
+        or die "can't run $opt_c: $!";
+    while (<$pipe>) {
+        if ($_ =~ /\S/) {
+            gripe($fname, "not blank after preprocessing");
+            last;
+        }
+    }
+    close $pipe
+        or gripe($fname, "preprocessing failed ($opt_c exit status $?)");
+}
+
+for my $fname (@ARGV) {
+    my $text = slurp($fname);
+
+    $text =~ m,\A(\s*\n|\s*//\N*\n|\s*/\*.*?\*/\s*\n)*|,sg;
+    my $pre = $&;
+    unless ($text =~ /\G(.*\n)/g) {
+        $text =~ /\G.*/;
+        skipping($fname, "no recognizable header guard", "$&\n");
+        next;
+    }
+    my $line1 = $1;
+    unless ($text =~ /\G(.*\n)/g) {
+        $text =~ /\G.*/;
+        skipping($fname, "no recognizable header guard", "$&\n");
+        next;
+    }
+    my $line2 = $1;
+    my $body = substr($text, pos($text));
+
+    unless ($line1 =~ /^\s*\#\s*(if\s*\!\s*defined(\s*\()?|ifndef)\s*
+                       ([A-Za-z0-9_]+)/x) {
+        skipping($fname, "no recognizable header guard", $line1, $line2);
+        next;
+    }
+    my $guard = $3;
+    unless ($line2 =~ /^\s*\#\s*define\s+([A-Za-z0-9_]+)/) {
+        skipping($fname, "no recognizable header guard", $line1, $line2);
+        next;
+    }
+    my $guard2 = $1;
+    unless ($guard2 eq $guard) {
+        skipping($fname, "mismatched header guard ($guard vs. $guard2) ",
+                 $line1, $line2);
+        next;
+    }
+
+    unless ($body =~ m,\A((.*\n)*)
+                       ([ \t]*\#[ \t]*endif([ \t]*\N*)\n)
+                       ((?s)(\s*\n|\s*//\N*\n|\s*/\*.*?\*/\s*\n)*)
+                       \Z,x) {
+        skipping($fname, "can't find end of header guard");
+        next;
+    }
+    $body = $1;
+    my $line3 = $3;
+    my $endif_comment = $4;
+    my $post = $5;
+
+    my $oldg = $guard;
+
+    unless ($fname =~ $exclude) {
+        my @issues = ();
+        $guard =~ tr/a-z/A-Z/
+            and push @issues, "contains lowercase letters";
+        $guard =~ s/^_+//
+            and push @issues, "is a reserved identifier";
+        $guard =~ s/(_H)?_*$/_H/
+            and $& ne "_H" and push @issues, "doesn't end with _H";
+        unless ($guard =~ /^[A-Z][A-Z0-9_]*_H/) {
+            skipping($fname, "can't clean up odd guard symbol $oldg\n",
+                     $line1, $line2);
+            next;
+        }
+
+        my $exp = fname2guard($fname =~ s,.*/,,r);
+        unless ($guard =~ /\Q$exp\E\Z/) {
+            $guard = fname2guard($fname =~ s,^include/,,r);
+            push @issues, "doesn't match the file name";
+        }
+        if (@issues and $opt_v) {
+            print "$fname guard $oldg needs cleanup:\n    ",
+                join(", ", @issues), "\n";
+        }
+    }
+
+    $old_guard{$guard} = $oldg
+        if $guard ne $oldg;
+
+    if (exists $guarded{$guard}) {
+        gripe($fname, "guard $guard also used by $guarded{$guard}");
+    } else {
+        $guarded{$guard} = $fname;
+    }
+
+    unless ($fname =~ $exclude) {
+        my $newl1 = "#ifndef $guard\n";
+        my $newl2 = "#define $guard\n";
+        my $newl3 = "#endif\n";
+        $newl3 =~ s,\Z, /* $guard */, if $endif_comment;
+        if ($line1 ne $newl1 or $line2 ne $newl2 or $line3 ne $newl3) {
+            $pre =~ s/\n*\Z/\n\n/ if $pre =~ /\N/;
+            $body =~ s/\A\n*/\n/;
+            if ($opt_n) {
+                print "$fname would be cleaned up\n" if $opt_v;
+            } else {
+                unslurp($fname, "$pre$newl1$newl2$body$newl3$post");
+                print "$fname cleaned up\n" if $opt_v;
+            }
+        }
+    }
+
+    preprocess($fname, $opt_n ? $oldg : $guard)
+        unless $fname =~ $exclude or $fname =~ $exclude_cpp;
+}
+
+if (%old_guard) {
+    print STDERR "warning: guard symbol renaming may break things\n";
+    for my $guard (sort keys %old_guard) {
+        print STDERR "    $old_guard{$guard} -> $guard\n";
+    }
+    print STDERR "To find uses that may have to be updated try:\n";
+    print STDERR "    git grep -Ew '", join("|", sort values %old_guard),
+        "'\n";
+}
diff --git a/scripts/clean-includes b/scripts/clean-includes
new file mode 100755
index 000000000..aaa7d4ceb
--- /dev/null
+++ b/scripts/clean-includes
@@ -0,0 +1,198 @@
+#!/bin/sh -e
+#
+# Clean up QEMU #include lines by ensuring that qemu/osdep.h
+# is the first include listed in .c files, and no headers provided
+# by osdep.h itself are redundantly included in either .c or .h files.
+#
+# Copyright (c) 2015 Linaro Limited
+#
+# Authors:
+#  Peter Maydell 
+#
+# This work is licensed under the terms of the GNU GPL, version 2
+# or (at your option) any later version. See the COPYING file in
+# the top-level directory.
+
+# Usage:
+#   clean-includes [--git subjectprefix] [--check-dup-head] file ...
+# or
+#   clean-includes [--git subjectprefix] [--check-dup-head] --all
+#
+# If the --git subjectprefix option is given, then after making
+# the changes to the files this script will create a git commit
+# with the subject line "subjectprefix: Clean up includes"
+# and a boilerplate commit message.
+#
+# If --check-dup-head is specified, additionally check for duplicate
+# header includes.
+#
+# Using --all will cause clean-includes to run on the whole source
+# tree (excluding certain directories which are known not to need
+# handling).
+
+# This script requires Coccinelle to be installed.
+
+# .c files will have the osdep.h included added, and redundant
+# includes removed.
+# .h files will have redundant includes (including includes of osdep.h)
+# removed.
+# Other files (including C++ and ObjectiveC) can't be handled by this script.
+
+# The following one-liner may be handy for finding files to run this on.
+# However some caution is required regarding files that might be part
+# of the guest agent or standalone tests.
+
+# for i in $(git ls-tree --name-only HEAD) ; do test -f $i && \
+#   grep -E '^# *include' $i | head -1 | grep 'osdep.h' ; test $? != 0 && \
+#   echo $i ; done
+
+
+GIT=no
+DUPHEAD=no
+
+# Extended regular expression defining files to ignore when using --all
+XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios|disas/libvixl)'
+
+while true
+do
+    case $1 in
+    "--git")
+         if [ $# -eq 1 ]; then
+             echo "--git option requires an argument"
+             exit 1
+         fi
+         GITSUBJ="$2"
+         GIT=yes
+         shift
+         shift
+         ;;
+    "--check-dup-head")
+        DUPHEAD=yes
+        shift
+        ;;
+    "--")
+        shift
+        break
+        ;;
+    *)
+        break
+        ;;
+   esac
+done
+
+if [ $# -eq 0 ]; then
+    echo "Usage: clean-includes [--git subjectprefix] [--check-dup-head] [--all | foo.c ...]"
+    echo "(modifies the files in place)"
+    exit 1
+fi
+
+if [ "$1" = "--all" ]; then
+    # We assume there are no files in the tree with spaces in their name
+    set -- $(git ls-files '*.[ch]' | grep -E -v "$XDIRREGEX")
+fi
+
+# Annoyingly coccinelle won't read a scriptfile unless its
+# name ends '.cocci', so write it out to a tempfile with the
+# right kind of name.
+COCCIFILE="$(mktemp --suffix=.cocci)"
+
+trap 'rm -f -- "$COCCIFILE"' INT TERM HUP EXIT
+
+cat >"$COCCIFILE" <
+)
+EOT
+
+for f in "$@"; do
+  case "$f" in
+    *.c.inc)
+      # These aren't standalone C source files
+      echo "SKIPPING $f (not a standalone source file)"
+      continue
+      ;;
+    *.c)
+      MODE=c
+      ;;
+    *include/qemu/osdep.h | \
+    *include/qemu/compiler.h | \
+    *include/qemu/qemu-plugin.h | \
+    *include/glib-compat.h | \
+    *include/sysemu/os-posix.h | \
+    *include/sysemu/os-win32.h | \
+    *include/standard-headers/ )
+      # Removing include lines from osdep.h itself would be counterproductive.
+      echo "SKIPPING $f (special case header)"
+      continue
+      ;;
+    *include/standard-headers/*)
+      echo "SKIPPING $f (autogenerated header)"
+      continue
+      ;;
+    *.h)
+      MODE=h
+      ;;
+    *)
+      echo "WARNING: ignoring $f (cannot handle non-C files)"
+      continue
+      ;;
+  esac
+
+  if [ "$MODE" = "c" ]; then
+    # First, use Coccinelle to add qemu/osdep.h before the first existing include
+    # (this will add two lines if the file uses both "..." and <...> #includes,
+    # but we will remove the extras in the next step)
+    spatch  --in-place --no-show-diff --cocci-file "$COCCIFILE" "$f"
+
+    # Now remove any duplicate osdep.h includes
+    perl -n -i -e 'print if !/#include "qemu\/osdep.h"/ || !$n++;' "$f"
+  else
+    # Remove includes of osdep.h itself
+    perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ ||
+                            ! (grep { $_ eq $1 } qw ("qemu/osdep.h"))' "$f"
+  fi
+
+  # Remove includes that osdep.h already provides
+  perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ ||
+                          ! (grep { $_ eq $1 } qw (
+           "config-host.h" "config-target.h" "qemu/compiler.h"
+                
+               
+                
+               
+                
+           "sysemu/os-posix.h, sysemu/os-win32.h "glib-compat.h"
+           "qemu/typedefs.h"
+            ))' "$f"
+
+done
+
+if [ "$DUPHEAD" = "yes" ]; then
+    egrep "^[[:space:]]*#[[:space:]]*include" "$@" | tr -d '[:blank:]' \
+        | sort | uniq -c | awk '{if ($1 > 1) print $0}'
+    if [ $? -eq 0 ]; then
+        echo "Found duplicate header file includes. Please check the above files manually."
+        exit 1
+    fi
+fi
+
+if [ "$GIT" = "yes" ]; then
+    git add -- "$@"
+    git commit --signoff -F - <
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+# Usage: cleanup-trace-events.pl trace-events
+#
+# Print cleaned up trace-events to standard output.
+
+use warnings;
+use strict;
+use File::Basename;
+
+my @files = ();
+my $events = '';
+my %seen = ();
+
+sub out {
+    print sort @files;
+    print $events;
+    @files = ();
+    $events = '';
+    %seen = ();
+}
+
+$#ARGV == 0 or die "usage: $0 FILE";
+my $in = $ARGV[0];
+my $dir = dirname($in);
+open(IN, $in) or die "open $in: $!";
+chdir($dir) or die "chdir $dir: $!";
+
+while () {
+    if (/^(disable |(tcg) |(vcpu) )*([a-z_0-9]+)\(/i) {
+        my $pat = "trace_$4";
+        $pat .= '_tcg' if defined $2;
+        open GREP, '-|', 'git', 'grep', '-lw',
+	    defined $3 ? () : ('--max-depth', '1'),
+	    $pat
+            or die "run git grep: $!";
+        while (my $fname = ) {
+            chomp $fname;
+            next if $seen{$fname} || $fname eq 'trace-events';
+            $seen{$fname} = 1;
+            push @files, "# $fname\n";
+        }
+        unless (close GREP) {
+            die "close git grep: $!"
+                if $!;
+            next;
+        }
+    } elsif (/^# ([^ ]*\.[ch])$/) {
+        out;
+        next;
+    } elsif (!/^#|^$/) {
+        warn "unintelligible line";
+    }
+    $events .= $_;
+}
+
+out;
+close(IN) or die "close $in: $!";
diff --git a/scripts/cocci-macro-file.h b/scripts/cocci-macro-file.h
new file mode 100644
index 000000000..c6bbc05ba
--- /dev/null
+++ b/scripts/cocci-macro-file.h
@@ -0,0 +1,119 @@
+/* Macro file for Coccinelle
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+/* Coccinelle only does limited parsing of headers, and chokes on some idioms
+ * defined in compiler.h and queue.h.  Macros that Coccinelle must know about
+ * in order to parse .c files must be in a separate macro file---which is
+ * exactly what you're staring at now.
+ *
+ * To use this file, add the "--macro-file scripts/cocci-macro-file.h" to the
+ * Coccinelle command line.
+ */
+
+/* From qemu/compiler.h */
+#define QEMU_GNUC_PREREQ(maj, min) 1
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#define QEMU_SENTINEL __attribute__((sentinel))
+
+#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
+# define QEMU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define QEMU_PACKED __attribute__((packed))
+#endif
+
+#define cat(x,y) x ## y
+#define cat2(x,y) cat(x,y)
+#define QEMU_BUILD_BUG_ON(x) \
+    typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused));
+
+#define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+/* From qemu/queue.h */
+
+#define QLIST_HEAD(name, type)                                          \
+struct name {                                                           \
+        struct type *lh_first;  /* first element */                     \
+}
+
+#define QLIST_HEAD_INITIALIZER(head)                                    \
+        { NULL }
+
+#define QLIST_ENTRY(type)                                               \
+struct {                                                                \
+        struct type *le_next;   /* next element */                      \
+        struct type **le_prev;  /* address of previous next element */  \
+}
+
+/*
+ * Singly-linked List definitions.
+ */
+#define QSLIST_HEAD(name, type)                                          \
+struct name {                                                           \
+        struct type *slh_first; /* first element */                     \
+}
+
+#define QSLIST_HEAD_INITIALIZER(head)                                    \
+        { NULL }
+
+#define QSLIST_ENTRY(type)                                               \
+struct {                                                                \
+        struct type *sle_next;  /* next element */                      \
+}
+
+/*
+ * Simple queue definitions.
+ */
+#define QSIMPLEQ_HEAD(name, type)                                       \
+struct name {                                                           \
+    struct type *sqh_first;    /* first element */                      \
+    struct type **sqh_last;    /* addr of last next element */          \
+}
+
+#define QSIMPLEQ_HEAD_INITIALIZER(head)                                 \
+    { NULL, &(head).sqh_first }
+
+#define QSIMPLEQ_ENTRY(type)                                            \
+struct {                                                                \
+    struct type *sqe_next;    /* next element */                        \
+}
+
+/*
+ * Tail queue definitions.
+ */
+#define QTAILQ_HEAD(name, type)                                         \
+union name {                                                            \
+        struct type *tqh_first;       /* first element */               \
+        QTailQLink tqh_circ;          /* link for last element */       \
+}
+
+#define QTAILQ_HEAD_INITIALIZER(head)                                   \
+        { .tqh_circ = { NULL, &(head).tqh_circ } }
+
+#define QTAILQ_ENTRY(type)                                              \
+union {                                                                 \
+        struct type *tqe_next;        /* next element */                \
+        QTailQLink tqe_circ;          /* link for prev element */       \
+}
+
+/* From glib */
+#define g_assert_cmpint(a, op, b)   g_assert(a op b)
+#define g_assert_cmpuint(a, op, b)   g_assert(a op b)
+#define g_assert_cmphex(a, op, b)   g_assert(a op b)
+#define g_assert_cmpstr(a, op, b)   g_assert(strcmp(a, b) op 0)
diff --git a/scripts/coccinelle/cpu-reset.cocci b/scripts/coccinelle/cpu-reset.cocci
new file mode 100644
index 000000000..396a724e5
--- /dev/null
+++ b/scripts/coccinelle/cpu-reset.cocci
@@ -0,0 +1,47 @@
+// Convert targets using the old CPUState reset to DeviceState reset
+//
+// Copyright Linaro Ltd 2020
+// This work is licensed under the terms of the GNU GPLv2 or later.
+//
+// spatch --macro-file scripts/cocci-macro-file.h \
+//        --sp-file scripts/coccinelle/cpu-reset.cocci \
+//        --keep-comments --smpl-spacing --in-place --include-headers --dir target
+//
+// For simplicity we assume some things about the code we're modifying
+// that happen to be true for all our targets:
+//  * all cpu_class_set_parent_reset() callsites have a 'DeviceClass *dc' local
+//  * the parent reset field in the target CPU class is 'parent_reset'
+//  * no reset function already has a 'dev' local
+
+@@
+identifier cpu, x;
+typedef CPUState;
+@@
+struct x {
+...
+- void (*parent_reset)(CPUState *cpu);
++ DeviceReset parent_reset;
+...
+};
+@ rule1 @
+identifier resetfn;
+expression resetfield;
+identifier cc;
+@@
+- cpu_class_set_parent_reset(cc, resetfn, resetfield)
++ device_class_set_parent_reset(dc, resetfn, resetfield)
+@@
+identifier rule1.resetfn;
+identifier cpu, cc;
+typedef CPUState, DeviceState;
+@@
+-resetfn(CPUState *cpu)
+-{
++resetfn(DeviceState *dev)
++{
++    CPUState *cpu = CPU(dev);
+<...
+-    cc->parent_reset(cpu);
++    cc->parent_reset(dev);
+...>
+}
diff --git a/scripts/coccinelle/cpu_restore_state.cocci b/scripts/coccinelle/cpu_restore_state.cocci
new file mode 100644
index 000000000..61bc749d1
--- /dev/null
+++ b/scripts/coccinelle/cpu_restore_state.cocci
@@ -0,0 +1,19 @@
+// Remove unneeded tests before calling cpu_restore_state
+//
+// spatch --macro-file scripts/cocci-macro-file.h \
+//        --sp-file ./scripts/coccinelle/cpu_restore_state.cocci \
+//        --keep-comments --in-place --use-gitgrep --dir target
+@@
+expression A;
+expression C;
+@@
+-if (A) {
+     cpu_restore_state(C, A);
+-}
+@@
+expression A;
+expression C;
+@@
+- cpu_restore_state(C, A);
+- cpu_loop_exit(C);
++ cpu_loop_exit_restore(C, A);
diff --git a/scripts/coccinelle/err-bad-newline.cocci b/scripts/coccinelle/err-bad-newline.cocci
new file mode 100644
index 000000000..539442187
--- /dev/null
+++ b/scripts/coccinelle/err-bad-newline.cocci
@@ -0,0 +1,49 @@
+// Error messages should not contain newlines.  This script finds
+// messages that do.  Fixing them is manual.
+@r@
+expression errp, err, eno, cls, fmt, ap;
+position p;
+@@
+(
+error_vreport(fmt, ap)@p
+|
+warn_vreport(fmt, ap)@p
+|
+info_vreport(fmt, ap)@p
+|
+error_report(fmt, ...)@p
+|
+warn_report(fmt, ...)@p
+|
+info_report(fmt, ...)@p
+|
+error_report_once(fmt, ...)@p
+|
+warn_report_once(fmt, ...)@p
+|
+error_setg(errp, fmt, ...)@p
+|
+error_setg_errno(errp, eno, fmt, ...)@p
+|
+error_setg_win32(errp, eno, cls, fmt, ...)@p
+|
+error_propagate_prepend(errp, err, fmt, ...)@p
+|
+error_vprepend(errp, fmt, ap)@p
+|
+error_prepend(errp, fmt, ...)@p
+|
+error_setg_file_open(errp, eno, cls, fmt, ...)@p
+|
+warn_reportf_err(errp, fmt, ...)@p
+|
+error_reportf_err(errp, fmt, ...)@p
+|
+error_set(errp, cls, fmt, ...)@p
+)
+@script:python@
+fmt << r.fmt;
+p << r.p;
+@@
+if "\\n" in str(fmt):
+    print("%s:%s:%s:%s" % (p[0].file, p[0].line, p[0].column, fmt))
diff --git a/scripts/coccinelle/error-use-after-free.cocci b/scripts/coccinelle/error-use-after-free.cocci
new file mode 100644
index 000000000..72ae9fdeb
--- /dev/null
+++ b/scripts/coccinelle/error-use-after-free.cocci
@@ -0,0 +1,52 @@
+// Find and fix trivial use-after-free of Error objects
+//
+// Copyright (c) 2020 Virtuozzo International GmbH.
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see
+// .
+//
+// How to use:
+// spatch --sp-file scripts/coccinelle/error-use-after-free.cocci \
+//  --macro-file scripts/cocci-macro-file.h --in-place \
+//  --no-show-diff ( FILES... | --use-gitgrep . )
+
+@ exists@
+identifier fn, fn2;
+expression err;
+@@
+
+ fn(...)
+ {
+     <...
+(
+     error_free(err);
++    err = NULL;
+|
+     error_report_err(err);
++    err = NULL;
+|
+     error_reportf_err(err, ...);
++    err = NULL;
+|
+     warn_report_err(err);
++    err = NULL;
+|
+     warn_reportf_err(err, ...);
++    err = NULL;
+)
+     ... when != err = NULL
+         when != exit(...)
+     fn2(..., err, ...)
+     ...>
+ }
diff --git a/scripts/coccinelle/error_propagate_null.cocci b/scripts/coccinelle/error_propagate_null.cocci
new file mode 100644
index 000000000..c23638007
--- /dev/null
+++ b/scripts/coccinelle/error_propagate_null.cocci
@@ -0,0 +1,10 @@
+// error_propagate() already ignores local_err==NULL, so there's
+// no need to check it before calling.
+
+@@
+identifier L;
+expression E;
+@@
+-if (L) {
+     error_propagate(E, L);
+-}
diff --git a/scripts/coccinelle/errp-guard.cocci b/scripts/coccinelle/errp-guard.cocci
new file mode 100644
index 000000000..6e789acf2
--- /dev/null
+++ b/scripts/coccinelle/errp-guard.cocci
@@ -0,0 +1,336 @@
+// Use ERRP_GUARD() (see include/qapi/error.h)
+//
+// Copyright (c) 2020 Virtuozzo International GmbH.
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see
+// .
+//
+// Usage example:
+// spatch --sp-file scripts/coccinelle/errp-guard.cocci \
+//  --macro-file scripts/cocci-macro-file.h --in-place \
+//  --no-show-diff --max-width 80 FILES...
+//
+// Note: --max-width 80 is needed because coccinelle default is less
+// than 80, and without this parameter coccinelle may reindent some
+// lines which fit into 80 characters but not to coccinelle default,
+// which in turn produces extra patch hunks for no reason.
+
+// Switch unusual Error ** parameter names to errp
+// (this is necessary to use ERRP_GUARD).
+//
+// Disable optional_qualifier to skip functions with
+// "Error *const *errp" parameter.
+//
+// Skip functions with "assert(_errp && *_errp)" statement, because
+// that signals unusual semantics, and the parameter name may well
+// serve a purpose. (like nbd_iter_channel_error()).
+//
+// Skip util/error.c to not touch, for example, error_propagate() and
+// error_propagate_prepend().
+@ depends on !(file in "util/error.c") disable optional_qualifier@
+identifier fn;
+identifier _errp != errp;
+@@
+
+ fn(...,
+-   Error **_errp
++   Error **errp
+    ,...)
+ {
+(
+     ... when != assert(_errp && *_errp)
+&
+     <...
+-    _errp
++    errp
+     ...>
+)
+ }
+
+// Add invocation of ERRP_GUARD() to errp-functions where // necessary
+//
+// Note, that without "when any" the final "..." does not mach
+// something matched by previous pattern, i.e. the rule will not match
+// double error_prepend in control flow like in
+// vfio_set_irq_signaling().
+//
+// Note, "exists" says that we want apply rule even if it does not
+// match on all possible control flows (otherwise, it will not match
+// standard pattern when error_propagate() call is in if branch).
+@ disable optional_qualifier exists@
+identifier fn, local_err;
+symbol errp;
+@@
+
+ fn(..., Error **errp, ...)
+ {
++   ERRP_GUARD();
+    ...  when != ERRP_GUARD();
+(
+(
+    error_append_hint(errp, ...);
+|
+    error_prepend(errp, ...);
+|
+    error_vprepend(errp, ...);
+)
+    ... when any
+|
+    Error *local_err = NULL;
+    ...
+(
+    error_propagate_prepend(errp, local_err, ...);
+|
+    error_propagate(errp, local_err);
+)
+    ...
+)
+ }
+
+// Warn when several Error * definitions are in the control flow.
+// This rule is not chained to rule1 and less restrictive, to cover more
+// functions to warn (even those we are not going to convert).
+//
+// Note, that even with one (or zero) Error * definition in the each
+// control flow we may have several (in total) Error * definitions in
+// the function. This case deserves attention too, but I don't see
+// simple way to match with help of coccinelle.
+@check1 disable optional_qualifier exists@
+identifier fn, _errp, local_err, local_err2;
+position p1, p2;
+@@
+
+ fn(..., Error **_errp, ...)
+ {
+     ...
+     Error *local_err = NULL;@p1
+     ... when any
+     Error *local_err2 = NULL;@p2
+     ... when any
+ }
+
+@ script:python @
+fn << check1.fn;
+p1 << check1.p1;
+p2 << check1.p2;
+@@
+
+print('Warning: function {} has several definitions of '
+      'Error * local variable: at {}:{} and then at {}:{}'.format(
+          fn, p1[0].file, p1[0].line, p2[0].file, p2[0].line))
+
+// Warn when several propagations are in the control flow.
+@check2 disable optional_qualifier exists@
+identifier fn, _errp;
+position p1, p2;
+@@
+
+ fn(..., Error **_errp, ...)
+ {
+     ...
+(
+     error_propagate_prepend(_errp, ...);@p1
+|
+     error_propagate(_errp, ...);@p1
+)
+     ...
+(
+     error_propagate_prepend(_errp, ...);@p2
+|
+     error_propagate(_errp, ...);@p2
+)
+     ... when any
+ }
+
+@ script:python @
+fn << check2.fn;
+p1 << check2.p1;
+p2 << check2.p2;
+@@
+
+print('Warning: function {} propagates to errp several times in '
+      'one control flow: at {}:{} and then at {}:{}'.format(
+          fn, p1[0].file, p1[0].line, p2[0].file, p2[0].line))
+
+// Match functions with propagation of local error to errp.
+// We want to refer these functions in several following rules, but I
+// don't know a proper way to inherit a function, not just its name
+// (to not match another functions with same name in following rules).
+// Not-proper way is as follows: rename errp parameter in functions
+// header and match it in following rules. Rename it back after all
+// transformations.
+//
+// The common case is a single definition of local_err with at most one
+// error_propagate_prepend() or error_propagate() on each control-flow
+// path. Functions with multiple definitions or propagates we want to
+// examine manually. Rules check1 and check2 emit warnings to guide us
+// to them.
+//
+// Note that we match not only this "common case", but any function,
+// which has the "common case" on at least one control-flow path.
+@rule1 disable optional_qualifier exists@
+identifier fn, local_err;
+symbol errp;
+@@
+
+ fn(..., Error **
+-    errp
++    ____
+    , ...)
+ {
+     ...
+     Error *local_err = NULL;
+     ...
+(
+     error_propagate_prepend(errp, local_err, ...);
+|
+     error_propagate(errp, local_err);
+)
+     ...
+ }
+
+// Convert special case with goto separately.
+// I tried merging this into the following rule the obvious way, but
+// it made Coccinelle hang on block.c
+//
+// Note interesting thing: if we don't do it here, and try to fixup
+// "out: }" things later after all transformations (the rule will be
+// the same, just without error_propagate() call), coccinelle fails to
+// match this "out: }".
+@ disable optional_qualifier@
+identifier rule1.fn, rule1.local_err, out;
+symbol errp;
+@@
+
+ fn(..., Error ** ____, ...)
+ {
+     <...
+-    goto out;
++    return;
+     ...>
+- out:
+-    error_propagate(errp, local_err);
+ }
+
+// Convert most of local_err related stuff.
+//
+// Note, that we inherit rule1.fn and rule1.local_err names, not
+// objects themselves. We may match something not related to the
+// pattern matched by rule1. For example, local_err may be defined with
+// the same name in different blocks inside one function, and in one
+// block follow the propagation pattern and in other block doesn't.
+//
+// Note also that errp-cleaning functions
+//   error_free_errp
+//   error_report_errp
+//   error_reportf_errp
+//   warn_report_errp
+//   warn_reportf_errp
+// are not yet implemented. They must call corresponding Error* -
+// freeing function and then set *errp to NULL, to avoid further
+// propagation to original errp (consider ERRP_GUARD in use).
+// For example, error_free_errp may look like this:
+//
+//    void error_free_errp(Error **errp)
+//    {
+//        error_free(*errp);
+//        *errp = NULL;
+//    }
+@ disable optional_qualifier exists@
+identifier rule1.fn, rule1.local_err;
+expression list args;
+symbol errp;
+@@
+
+ fn(..., Error ** ____, ...)
+ {
+     <...
+(
+-    Error *local_err = NULL;
+|
+
+// Convert error clearing functions
+(
+-    error_free(local_err);
++    error_free_errp(errp);
+|
+-    error_report_err(local_err);
++    error_report_errp(errp);
+|
+-    error_reportf_err(local_err, args);
++    error_reportf_errp(errp, args);
+|
+-    warn_report_err(local_err);
++    warn_report_errp(errp);
+|
+-    warn_reportf_err(local_err, args);
++    warn_reportf_errp(errp, args);
+)
+?-    local_err = NULL;
+
+|
+-    error_propagate_prepend(errp, local_err, args);
++    error_prepend(errp, args);
+|
+-    error_propagate(errp, local_err);
+|
+-    &local_err
++    errp
+)
+     ...>
+ }
+
+// Convert remaining local_err usage. For example, different kinds of
+// error checking in if conditionals. We can't merge this into
+// previous hunk, as this conflicts with other substitutions in it (at
+// least with "- local_err = NULL").
+@ disable optional_qualifier@
+identifier rule1.fn, rule1.local_err;
+symbol errp;
+@@
+
+ fn(..., Error ** ____, ...)
+ {
+     <...
+-    local_err
++    *errp
+     ...>
+ }
+
+// Always use the same pattern for checking error
+@ disable optional_qualifier@
+identifier rule1.fn;
+symbol errp;
+@@
+
+ fn(..., Error ** ____, ...)
+ {
+     <...
+-    *errp != NULL
++    *errp
+     ...>
+ }
+
+// Revert temporary ___ identifier.
+@ disable optional_qualifier@
+identifier rule1.fn;
+@@
+
+ fn(..., Error **
+-   ____
++   errp
+    , ...)
+ {
+     ...
+ }
diff --git a/scripts/coccinelle/exec_rw_const.cocci b/scripts/coccinelle/exec_rw_const.cocci
new file mode 100644
index 000000000..1a2029695
--- /dev/null
+++ b/scripts/coccinelle/exec_rw_const.cocci
@@ -0,0 +1,111 @@
+/*
+  Usage:
+
+    spatch \
+           --macro-file scripts/cocci-macro-file.h \
+           --sp-file scripts/coccinelle/exec_rw_const.cocci \
+           --keep-comments \
+           --in-place \
+           --dir .
+*/
+
+// Convert to boolean
+@@
+expression E1, E2, E3, E4, E5;
+@@
+(
+- address_space_rw(E1, E2, E3, E4, E5, 0)
++ address_space_rw(E1, E2, E3, E4, E5, false)
+|
+- address_space_rw(E1, E2, E3, E4, E5, 1)
++ address_space_rw(E1, E2, E3, E4, E5, true)
+|
+
+- cpu_physical_memory_rw(E1, E2, E3, 0)
++ cpu_physical_memory_rw(E1, E2, E3, false)
+|
+- cpu_physical_memory_rw(E1, E2, E3, 1)
++ cpu_physical_memory_rw(E1, E2, E3, true)
+|
+
+- cpu_physical_memory_map(E1, E2, 0)
++ cpu_physical_memory_map(E1, E2, false)
+|
+- cpu_physical_memory_map(E1, E2, 1)
++ cpu_physical_memory_map(E1, E2, true)
+)
+
+// Use address_space_write instead of casting to non-const
+@@
+type T;
+const T *V;
+expression E1, E2, E3, E4;
+@@
+(
+- address_space_rw(E1, E2, E3, (T *)V, E4, 1)
++ address_space_write(E1, E2, E3, V, E4)
+|
+- address_space_rw(E1, E2, E3, (void *)V, E4, 1)
++ address_space_write(E1, E2, E3, V, E4)
+)
+
+// Avoid uses of address_space_rw() with a constant is_write argument.
+@@
+expression E1, E2, E3, E4, E5;
+symbol true, false;
+@@
+(
+- address_space_rw(E1, E2, E3, E4, E5, false)
++ address_space_read(E1, E2, E3, E4, E5)
+|
+- address_space_rw(E1, E2, E3, E4, E5, true)
++ address_space_write(E1, E2, E3, E4, E5)
+)
+
+// Avoid uses of cpu_physical_memory_rw() with a constant is_write argument.
+@@
+expression E1, E2, E3;
+@@
+(
+- cpu_physical_memory_rw(E1, E2, E3, false)
++ cpu_physical_memory_read(E1, E2, E3)
+|
+- cpu_physical_memory_rw(E1, E2, E3, true)
++ cpu_physical_memory_write(E1, E2, E3)
+)
+
+// Remove useless cast
+@@
+expression E1, E2, E3, E4, E5, E6;
+type T;
+@@
+(
+- address_space_rw(E1, E2, E3, (T *)(E4), E5, E6)
++ address_space_rw(E1, E2, E3, E4, E5, E6)
+|
+- address_space_read(E1, E2, E3, (T *)(E4), E5)
++ address_space_read(E1, E2, E3, E4, E5)
+|
+- address_space_write(E1, E2, E3, (T *)(E4), E5)
++ address_space_write(E1, E2, E3, E4, E5)
+|
+- address_space_write_rom(E1, E2, E3, (T *)(E4), E5)
++ address_space_write_rom(E1, E2, E3, E4, E5)
+|
+
+- cpu_physical_memory_rw(E1, (T *)(E2), E3, E4)
++ cpu_physical_memory_rw(E1, E2, E3, E4)
+|
+- cpu_physical_memory_read(E1, (T *)(E2), E3)
++ cpu_physical_memory_read(E1, E2, E3)
+|
+- cpu_physical_memory_write(E1, (T *)(E2), E3)
++ cpu_physical_memory_write(E1, E2, E3)
+|
+
+- dma_memory_read(E1, E2, (T *)(E3), E4)
++ dma_memory_read(E1, E2, E3, E4)
+|
+- dma_memory_write(E1, E2, (T *)(E3), E4)
++ dma_memory_write(E1, E2, E3, E4)
+)
diff --git a/scripts/coccinelle/inplace-byteswaps.cocci b/scripts/coccinelle/inplace-byteswaps.cocci
new file mode 100644
index 000000000..a869a90cb
--- /dev/null
+++ b/scripts/coccinelle/inplace-byteswaps.cocci
@@ -0,0 +1,65 @@
+// Replace uses of in-place byteswapping functions with calls to the
+// equivalent not-in-place functions.  This is necessary to avoid
+// undefined behaviour if the expression being swapped is a field in a
+// packed struct.
+
+@@
+expression E;
+@@
+-be16_to_cpus(&E);
++E = be16_to_cpu(E);
+@@
+expression E;
+@@
+-be32_to_cpus(&E);
++E = be32_to_cpu(E);
+@@
+expression E;
+@@
+-be64_to_cpus(&E);
++E = be64_to_cpu(E);
+@@
+expression E;
+@@
+-cpu_to_be16s(&E);
++E = cpu_to_be16(E);
+@@
+expression E;
+@@
+-cpu_to_be32s(&E);
++E = cpu_to_be32(E);
+@@
+expression E;
+@@
+-cpu_to_be64s(&E);
++E = cpu_to_be64(E);
+@@
+expression E;
+@@
+-le16_to_cpus(&E);
++E = le16_to_cpu(E);
+@@
+expression E;
+@@
+-le32_to_cpus(&E);
++E = le32_to_cpu(E);
+@@
+expression E;
+@@
+-le64_to_cpus(&E);
++E = le64_to_cpu(E);
+@@
+expression E;
+@@
+-cpu_to_le16s(&E);
++E = cpu_to_le16(E);
+@@
+expression E;
+@@
+-cpu_to_le32s(&E);
++E = cpu_to_le32(E);
+@@
+expression E;
+@@
+-cpu_to_le64s(&E);
++E = cpu_to_le64(E);
diff --git a/scripts/coccinelle/memory-region-housekeeping.cocci b/scripts/coccinelle/memory-region-housekeeping.cocci
new file mode 100644
index 000000000..c768d8140
--- /dev/null
+++ b/scripts/coccinelle/memory-region-housekeeping.cocci
@@ -0,0 +1,159 @@
+/*
+  Usage:
+
+    spatch \
+        --macro-file scripts/cocci-macro-file.h \
+        --sp-file scripts/coccinelle/memory-region-housekeeping.cocci \
+        --keep-comments \
+        --in-place \
+        --dir .
+
+*/
+
+
+// Replace memory_region_init_ram(readonly) by memory_region_init_rom()
+@@
+expression E1, E2, E3, E4, E5;
+symbol true;
+@@
+(
+- memory_region_init_ram(E1, E2, E3, E4, E5);
++ memory_region_init_rom(E1, E2, E3, E4, E5);
+  ... WHEN != E1
+- memory_region_set_readonly(E1, true);
+|
+- memory_region_init_ram_nomigrate(E1, E2, E3, E4, E5);
++ memory_region_init_rom_nomigrate(E1, E2, E3, E4, E5);
+  ... WHEN != E1
+- memory_region_set_readonly(E1, true);
+)
+
+
+@possible_memory_region_init_rom@
+expression E1, E2, E3, E4, E5;
+position p;
+@@
+(
+  memory_region_init_ram@p(E1, E2, E3, E4, E5);
+  ...
+  memory_region_set_readonly(E1, true);
+|
+  memory_region_init_ram_nomigrate@p(E1, E2, E3, E4, E5);
+  ...
+  memory_region_set_readonly(E1, true);
+)
+@script:python@
+p << possible_memory_region_init_rom.p;
+@@
+cocci.print_main("potential use of memory_region_init_rom*() in ", p)
+
+
+// Do not call memory_region_set_readonly() on ROM alias
+@@
+expression ROM, E1, E2, E3, E4;
+expression ALIAS, E5, E6, E7, E8;
+@@
+(
+  memory_region_init_rom(ROM, E1, E2, E3, E4);
+|
+  memory_region_init_rom_nomigrate(ROM, E1, E2, E3, E4);
+)
+  ...
+   memory_region_init_alias(ALIAS, E5, E6, ROM, E7, E8);
+-  memory_region_set_readonly(ALIAS, true);
+
+
+// Replace by-hand memory_region_init_ram_nomigrate/vmstate_register_ram
+// code sequences with use of the new memory_region_init_ram function.
+// Similarly for the _rom and _rom_device functions.
+// We don't try to replace sequences with a non-NULL owner, because
+// there are none in the tree that can be automatically converted
+// (and only a handful that can be manually converted).
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_ram_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_ram(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_rom(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression OPS;
+expression OPAQUE;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_device_nomigrate(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
++memory_region_init_rom_device(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+
+
+// Device is owner
+@@
+typedef DeviceState;
+identifier device_fn, dev, obj;
+expression E1, E2, E3, E4, E5;
+@@
+static void device_fn(DeviceState *dev, ...)
+{
+  ...
+  Object *obj = OBJECT(dev);
+  <+...
+(
+- memory_region_init(E1, NULL, E2, E3);
++ memory_region_init(E1, obj, E2, E3);
+|
+- memory_region_init_io(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_io(E1, obj, E2, E3, E4, E5);
+|
+- memory_region_init_alias(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_alias(E1, obj, E2, E3, E4, E5);
+|
+- memory_region_init_rom(E1, NULL, E2, E3, E4);
++ memory_region_init_rom(E1, obj, E2, E3, E4);
+|
+- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_shared_nomigrate(E1, obj, E2, E3, E4, E5);
+)
+  ...+>
+}
+@@
+identifier device_fn, dev;
+expression E1, E2, E3, E4, E5;
+@@
+static void device_fn(DeviceState *dev, ...)
+{
+  <+...
+(
+- memory_region_init(E1, NULL, E2, E3);
++ memory_region_init(E1, OBJECT(dev), E2, E3);
+|
+- memory_region_init_io(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_io(E1, OBJECT(dev), E2, E3, E4, E5);
+|
+- memory_region_init_alias(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_alias(E1, OBJECT(dev), E2, E3, E4, E5);
+|
+- memory_region_init_rom(E1, NULL, E2, E3, E4);
++ memory_region_init_rom(E1, OBJECT(dev), E2, E3, E4);
+|
+- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_shared_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
+)
+  ...+>
+}
diff --git a/scripts/coccinelle/overflow_muldiv64.cocci b/scripts/coccinelle/overflow_muldiv64.cocci
new file mode 100644
index 000000000..08ec4a8de
--- /dev/null
+++ b/scripts/coccinelle/overflow_muldiv64.cocci
@@ -0,0 +1,16 @@
+// Find muldiv64(i64, i64, x) for potential overflow
+@filter@
+typedef uint64_t;
+typedef int64_t;
+{ uint64_t, int64_t, long, unsigned long } a, b;
+expression c;
+position p;
+@@
+
+muldiv64(a,b,c)@p
+
+@script:python@
+p << filter.p;
+@@
+
+cocci.print_main("potential muldiv64() overflow", p)
diff --git a/scripts/coccinelle/qobject.cocci b/scripts/coccinelle/qobject.cocci
new file mode 100644
index 000000000..9fee9c0d9
--- /dev/null
+++ b/scripts/coccinelle/qobject.cocci
@@ -0,0 +1,47 @@
+// Use QDict macros where they make sense
+@@
+expression Obj, Key, E;
+@@
+(
+- qobject_ref(QOBJECT(E));
++ qobject_ref(E);
+|
+- qobject_unref(QOBJECT(E));
++ qobject_unref(E);
+|
+- qdict_put_obj(Obj, Key, QOBJECT(E));
++ qdict_put(Obj, Key, E);
+|
+- qdict_put(Obj, Key, qnum_from_int(E));
++ qdict_put_int(Obj, Key, E);
+|
+- qdict_put(Obj, Key, qbool_from_bool(E));
++ qdict_put_bool(Obj, Key, E);
+|
+- qdict_put(Obj, Key, qstring_from_str(E));
++ qdict_put_str(Obj, Key, E);
+|
+- qdict_put(Obj, Key, qnull());
++ qdict_put_null(Obj, Key);
+)
+
+// Use QList macros where they make sense
+@@
+expression Obj, E;
+@@
+(
+- qlist_append_obj(Obj, QOBJECT(E));
++ qlist_append(Obj, E);
+|
+- qlist_append(Obj, qnum_from_int(E));
++ qlist_append_int(Obj, E);
+|
+- qlist_append(Obj, qbool_from_bool(E));
++ qlist_append_bool(Obj, E);
+|
+- qlist_append(Obj, qstring_from_str(E));
++ qlist_append_str(Obj, E);
+|
+- qlist_append(Obj, qnull());
++ qlist_append_null(Obj);
+)
diff --git a/scripts/coccinelle/qom-parent-type.cocci b/scripts/coccinelle/qom-parent-type.cocci
new file mode 100644
index 000000000..9afb3edd9
--- /dev/null
+++ b/scripts/coccinelle/qom-parent-type.cocci
@@ -0,0 +1,26 @@
+// Highlight object declarations that don't look like object class but
+// accidentally inherit from it.
+
+@match@
+identifier obj_t, fld;
+type parent_t =~ ".*Class$";
+@@
+struct obj_t {
+    parent_t fld;
+    ...
+};
+
+@script:python filter depends on match@
+obj_t << match.obj_t;
+@@
+is_class_obj = obj_t.endswith('Class')
+cocci.include_match(not is_class_obj)
+
+@replacement depends on filter@
+identifier match.obj_t, match.fld;
+type match.parent_t;
+@@
+struct obj_t {
+*   parent_t fld;
+    ...
+};
diff --git a/scripts/coccinelle/remove_local_err.cocci b/scripts/coccinelle/remove_local_err.cocci
new file mode 100644
index 000000000..9261c9968
--- /dev/null
+++ b/scripts/coccinelle/remove_local_err.cocci
@@ -0,0 +1,29 @@
+// Replace unnecessary usage of local_err variable with
+// direct usage of errp argument
+
+@@
+identifier F;
+expression list ARGS;
+expression F2;
+identifier LOCAL_ERR;
+identifier ERRP;
+idexpression V;
+typedef Error;
+@@
+ F(..., Error **ERRP)
+ {
+     ...
+-    Error *LOCAL_ERR;
+     ... when != LOCAL_ERR
+         when != ERRP
+(
+-    F2(ARGS, &LOCAL_ERR);
+-    error_propagate(ERRP, LOCAL_ERR);
++    F2(ARGS, ERRP);
+|
+-    V = F2(ARGS, &LOCAL_ERR);
+-    error_propagate(ERRP, LOCAL_ERR);
++    V = F2(ARGS, ERRP);
+)
+     ... when != LOCAL_ERR
+ }
diff --git a/scripts/coccinelle/remove_muldiv64.cocci b/scripts/coccinelle/remove_muldiv64.cocci
new file mode 100644
index 000000000..4c10bd57d
--- /dev/null
+++ b/scripts/coccinelle/remove_muldiv64.cocci
@@ -0,0 +1,6 @@
+// replace muldiv64(a, 1, b) by "a / b"
+@@
+expression a, b;
+@@
+-muldiv64(a, 1, b)
++a / b
diff --git a/scripts/coccinelle/return_directly.cocci b/scripts/coccinelle/return_directly.cocci
new file mode 100644
index 000000000..4cf50e75e
--- /dev/null
+++ b/scripts/coccinelle/return_directly.cocci
@@ -0,0 +1,19 @@
+// replace 'R = X; return R;' with 'return X;'
+@@
+identifier VAR;
+expression E;
+type T;
+identifier F;
+@@
+ T F(...)
+ {
+     ...
+-    T VAR;
+     ... when != VAR
+
+-    VAR =
++    return
+     E;
+-    return VAR;
+     ... when != VAR
+ }
diff --git a/scripts/coccinelle/round.cocci b/scripts/coccinelle/round.cocci
new file mode 100644
index 000000000..ed0677328
--- /dev/null
+++ b/scripts/coccinelle/round.cocci
@@ -0,0 +1,19 @@
+// Use macro DIV_ROUND_UP instead of (((n) + (d) - 1) /(d))
+@@
+expression e1;
+expression e2;
+@@
+(
+- ((e1) + e2 - 1) / (e2)
++ DIV_ROUND_UP(e1,e2)
+|
+- ((e1) + (e2 - 1)) / (e2)
++ DIV_ROUND_UP(e1,e2)
+)
+
+@@
+expression e1;
+expression e2;
+@@
+-(DIV_ROUND_UP(e1,e2))
++DIV_ROUND_UP(e1,e2)
diff --git a/scripts/coccinelle/simplify_muldiv64.cocci b/scripts/coccinelle/simplify_muldiv64.cocci
new file mode 100644
index 000000000..3d7c9744a
--- /dev/null
+++ b/scripts/coccinelle/simplify_muldiv64.cocci
@@ -0,0 +1,11 @@
+// replace muldiv64(i32, i32, x) by (uint64_t)i32 * i32 / x
+@@
+typedef uint32_t;
+typedef int32_t;
+{ uint32_t, int32_t, int, unsigned int } a, b;
+typedef uint64_t;
+expression c;
+@@
+
+-muldiv64(a,b,c)
++(uint64_t) a * b / c
diff --git a/scripts/coccinelle/swap_muldiv64.cocci b/scripts/coccinelle/swap_muldiv64.cocci
new file mode 100644
index 000000000..b48b0d084
--- /dev/null
+++ b/scripts/coccinelle/swap_muldiv64.cocci
@@ -0,0 +1,13 @@
+// replace muldiv64(i32, i64, x) by muldiv64(i64, i32, x)
+@@
+typedef uint64_t;
+typedef int64_t;
+typedef uint32_t;
+typedef int32_t;
+{ uint32_t, int32_t, int, unsigned int } a;
+{ uint64_t, int64_t, long, unsigned long } b;
+expression c;
+@@
+
+-muldiv64(a,b,c)
++muldiv64(b,a,c)
diff --git a/scripts/coccinelle/tcg_gen_extract.cocci b/scripts/coccinelle/tcg_gen_extract.cocci
new file mode 100644
index 000000000..c10c86348
--- /dev/null
+++ b/scripts/coccinelle/tcg_gen_extract.cocci
@@ -0,0 +1,107 @@
+// optimize TCG using extract op
+//
+// Copyright: (C) 2017 Philippe Mathieu-Daudé. GPLv2+.
+// Confidence: High
+// Options: --macro-file scripts/cocci-macro-file.h
+//
+// Nikunj A Dadhania optimization:
+// http://lists.nongnu.org/archive/html/qemu-devel/2017-02/msg05211.html
+// Aurelien Jarno optimization:
+// http://lists.nongnu.org/archive/html/qemu-devel/2017-05/msg01466.html
+//
+// This script can be run either using spatch locally or via a docker image:
+//
+// $ spatch \
+//     --macro-file scripts/cocci-macro-file.h \
+//     --sp-file scripts/coccinelle/tcg_gen_extract.cocci \
+//     --keep-comments --in-place \
+//     --use-gitgrep --dir target
+//
+// $ docker run --rm -v $PWD:$PWD -w $PWD philmd/coccinelle \
+//     --macro-file scripts/cocci-macro-file.h \
+//     --sp-file scripts/coccinelle/tcg_gen_extract.cocci \
+//     --keep-comments --in-place \
+//     --use-gitgrep --dir target
+
+@initialize:python@
+@@
+import sys
+fd = sys.stderr
+def debug(msg="", trailer="\n"):
+    fd.write("[DBG] " + msg + trailer)
+def low_bits_count(value):
+    bits_count = 0
+    while (value & (1 << bits_count)):
+        bits_count += 1
+    return bits_count
+def Mn(order): # Mersenne number
+    return (1 << order) - 1
+
+@match@
+identifier ret;
+metavariable arg;
+constant ofs, msk;
+position shr_p, and_p;
+@@
+(
+    tcg_gen_shri_i32@shr_p
+|
+    tcg_gen_shri_i64@shr_p
+|
+    tcg_gen_shri_tl@shr_p
+)(ret, arg, ofs);
+...  WHEN != ret
+(
+    tcg_gen_andi_i32@and_p
+|
+    tcg_gen_andi_i64@and_p
+|
+    tcg_gen_andi_tl@and_p
+)(ret, ret, msk);
+
+@script:python verify_len depends on match@
+ret_s << match.ret;
+msk_s << match.msk;
+shr_p << match.shr_p;
+extract_len;
+@@
+is_optimizable = False
+debug("candidate at %s:%s" % (shr_p[0].file, shr_p[0].line))
+try: # only eval integer, no #define like 'SR_M' (cpp did this, else some headers are missing).
+    msk_v = long(msk_s.strip("UL"), 0)
+    msk_b = low_bits_count(msk_v)
+    if msk_b == 0:
+        debug("  value: 0x%x low_bits: %d" % (msk_v, msk_b))
+    else:
+        debug("  value: 0x%x low_bits: %d [Mersenne number: 0x%x]" % (msk_v, msk_b, Mn(msk_b)))
+        is_optimizable = Mn(msk_b) == msk_v # check low_bits
+        coccinelle.extract_len = "%d" % msk_b
+    debug("  candidate %s optimizable" % ("IS" if is_optimizable else "is NOT"))
+except:
+    debug("  ERROR (check included headers?)")
+cocci.include_match(is_optimizable)
+debug()
+
+@replacement depends on verify_len@
+identifier match.ret;
+metavariable match.arg;
+constant match.ofs, match.msk;
+position match.shr_p, match.and_p;
+identifier verify_len.extract_len;
+@@
+(
+-tcg_gen_shri_i32@shr_p(ret, arg, ofs);
++tcg_gen_extract_i32(ret, arg, ofs, extract_len);
+...  WHEN != ret
+-tcg_gen_andi_i32@and_p(ret, ret, msk);
+|
+-tcg_gen_shri_i64@shr_p(ret, arg, ofs);
++tcg_gen_extract_i64(ret, arg, ofs, extract_len);
+...  WHEN != ret
+-tcg_gen_andi_i64@and_p(ret, ret, msk);
+|
+-tcg_gen_shri_tl@shr_p(ret, arg, ofs);
++tcg_gen_extract_tl(ret, arg, ofs, extract_len);
+...  WHEN != ret
+-tcg_gen_andi_tl@and_p(ret, ret, msk);
+)
diff --git a/scripts/coccinelle/typecast.cocci b/scripts/coccinelle/typecast.cocci
new file mode 100644
index 000000000..be2183ee4
--- /dev/null
+++ b/scripts/coccinelle/typecast.cocci
@@ -0,0 +1,7 @@
+// Remove useless casts
+@@
+type T;
+T v;
+@@
+-	(T *)&v
++	&v
diff --git a/scripts/coccinelle/use-error_fatal.cocci b/scripts/coccinelle/use-error_fatal.cocci
new file mode 100644
index 000000000..10fff0aec
--- /dev/null
+++ b/scripts/coccinelle/use-error_fatal.cocci
@@ -0,0 +1,20 @@
+@@
+type T;
+identifier FUN, RET;
+expression list ARGS;
+expression ERR, EC, FAIL;
+@@
+(
+-    T RET = FUN(ARGS, &ERR);
++    T RET = FUN(ARGS, &error_fatal);
+|
+-    RET = FUN(ARGS, &ERR);
++    RET = FUN(ARGS, &error_fatal);
+|
+-    FUN(ARGS, &ERR);
++    FUN(ARGS, &error_fatal);
+)
+-    if (FAIL) {
+-        error_report_err(ERR);
+-        exit(EC);
+-    }
diff --git a/scripts/codeconverter/codeconverter/__init__.py b/scripts/codeconverter/codeconverter/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/codeconverter/codeconverter/patching.py b/scripts/codeconverter/codeconverter/patching.py
new file mode 100644
index 000000000..9e92505d3
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/patching.py
@@ -0,0 +1,466 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+from typing import IO, Match, NamedTuple, Optional, Literal, Iterable, Type, Dict, List, Any, TypeVar, NewType, Tuple, Union
+from pathlib import Path
+from itertools import chain
+from tempfile import NamedTemporaryFile
+import os
+import re
+import subprocess
+from io import StringIO
+
+import logging
+logger = logging.getLogger(__name__)
+DBG = logger.debug
+INFO = logger.info
+WARN = logger.warning
+ERROR = logger.error
+
+from .utils import *
+
+T = TypeVar('T')
+
+class Patch(NamedTuple):
+    # start inside file.original_content
+    start: int
+    # end position inside file.original_content
+    end: int
+    # replacement string for file.original_content[start:end]
+    replacement: str
+
+IdentifierType = Literal['type', 'symbol', 'include', 'constant']
+class RequiredIdentifier(NamedTuple):
+    type: IdentifierType
+    name: str
+
+class FileMatch:
+    """Base class for regex matches
+
+    Subclasses just need to set the `regexp` class attribute
+    """
+    regexp: Optional[str] = None
+
+    def __init__(self, f: 'FileInfo', m: Match) -> None:
+        self.file: 'FileInfo' = f
+        self.match: Match[str] = m
+
+    @property
+    def name(self) -> str:
+        if 'name' not in self.match.groupdict():
+            return '[no name]'
+        return self.group('name')
+
+    @classmethod
+    def compiled_re(klass):
+        return re.compile(klass.regexp, re.MULTILINE)
+
+    def start(self) -> int:
+        return self.match.start()
+
+    def end(self) -> int:
+        return self.match.end()
+
+    def line_col(self) -> LineAndColumn:
+        return self.file.line_col(self.start())
+
+    def group(self, group: Union[int, str]) -> str:
+        return self.match.group(group)
+
+    def getgroup(self, group: str) -> Optional[str]:
+        if group not in self.match.groupdict():
+            return None
+        return self.match.group(group)
+
+    def log(self, level, fmt, *args) -> None:
+        pos = self.line_col()
+        logger.log(level, '%s:%d:%d: '+fmt, self.file.filename, pos.line, pos.col, *args)
+
+    def debug(self, fmt, *args) -> None:
+        self.log(logging.DEBUG, fmt, *args)
+
+    def info(self, fmt, *args) -> None:
+        self.log(logging.INFO, fmt, *args)
+
+    def warn(self, fmt, *args) -> None:
+        self.log(logging.WARNING, fmt, *args)
+
+    def error(self, fmt, *args) -> None:
+        self.log(logging.ERROR, fmt, *args)
+
+    def sub(self, original: str, replacement: str) -> str:
+        """Replace content
+
+        XXX: this won't use the match position, but will just
+        replace all strings that look like the original match.
+        This should be enough for all the patterns used in this
+        script.
+        """
+        return original.replace(self.group(0), replacement)
+
+    def sanity_check(self) -> None:
+        """Sanity check match, and print warnings if necessary"""
+        pass
+
+    def replacement(self) -> Optional[str]:
+        """Return replacement text for pattern, to use new code conventions"""
+        return None
+
+    def make_patch(self, replacement: str) -> 'Patch':
+        """Make patch replacing the content of this match"""
+        return Patch(self.start(), self.end(), replacement)
+
+    def make_subpatch(self, start: int, end: int, replacement: str) -> 'Patch':
+        return Patch(self.start() + start, self.start() + end, replacement)
+
+    def make_removal_patch(self) -> 'Patch':
+        """Make patch removing contents of match completely"""
+        return self.make_patch('')
+
+    def append(self, s: str) -> 'Patch':
+        """Make patch appending string after this match"""
+        return Patch(self.end(), self.end(), s)
+
+    def prepend(self, s: str) -> 'Patch':
+        """Make patch prepending string before this match"""
+        return Patch(self.start(), self.start(), s)
+
+    def gen_patches(self) -> Iterable['Patch']:
+        """Patch source code contents to use new code patterns"""
+        replacement = self.replacement()
+        if replacement is not None:
+            yield self.make_patch(replacement)
+
+    @classmethod
+    def has_replacement_rule(klass) -> bool:
+        return (klass.gen_patches is not FileMatch.gen_patches
+                or klass.replacement is not FileMatch.replacement)
+
+    def contains(self, other: 'FileMatch') -> bool:
+        return other.start() >= self.start() and other.end() <= self.end()
+
+    def __repr__(self) -> str:
+        start = self.file.line_col(self.start())
+        end = self.file.line_col(self.end() - 1)
+        return '<%s %s at %d:%d-%d:%d: %r>' % (self.__class__.__name__,
+                                                    self.name,
+                                                    start.line, start.col,
+                                                    end.line, end.col, self.group(0)[:100])
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        """Can be implemented by subclasses to keep track of identifier references
+
+        This method will be used by the code that moves declarations around the file,
+        to make sure we find the right spot for them.
+        """
+        raise NotImplementedError()
+
+    def provided_identifiers(self) -> Iterable[RequiredIdentifier]:
+        """Can be implemented by subclasses to keep track of identifier references
+
+        This method will be used by the code that moves declarations around the file,
+        to make sure we find the right spot for them.
+        """
+        raise NotImplementedError()
+
+    @classmethod
+    def finditer(klass, content: str, pos=0, endpos=-1) -> Iterable[Match]:
+        """Helper for re.finditer()"""
+        if endpos >= 0:
+            content = content[:endpos]
+        return klass.compiled_re().finditer(content, pos)
+
+    @classmethod
+    def domatch(klass, content: str, pos=0, endpos=-1) -> Optional[Match]:
+        """Helper for re.match()"""
+        if endpos >= 0:
+            content = content[:endpos]
+        return klass.compiled_re().match(content, pos)
+
+    def group_finditer(self, klass: Type['FileMatch'], group: Union[str, int]) -> Iterable['FileMatch']:
+        assert self.file.original_content
+        return (klass(self.file, m)
+                for m in klass.finditer(self.file.original_content,
+                                        self.match.start(group),
+                                        self.match.end(group)))
+
+    def try_group_match(self, klass: Type['FileMatch'], group: Union[str, int]) -> Optional['FileMatch']:
+        assert self.file.original_content
+        m = klass.domatch(self.file.original_content,
+                          self.match.start(group),
+                          self.match.end(group))
+        if not m:
+            return None
+        else:
+            return klass(self.file, m)
+
+    def group_match(self, group: Union[str, int]) -> 'FileMatch':
+        m = self.try_group_match(FullMatch, group)
+        assert m
+        return m
+
+    @property
+    def allfiles(self) -> 'FileList':
+        return self.file.allfiles
+
+class FullMatch(FileMatch):
+    """Regexp that will match all contents of string
+    Useful when used with group_match()
+    """
+    regexp = r'(?s).*' # (?s) is re.DOTALL
+
+def all_subclasses(c: Type[FileMatch]) -> Iterable[Type[FileMatch]]:
+    for sc in c.__subclasses__():
+        yield sc
+        yield from all_subclasses(sc)
+
+def match_class_dict() -> Dict[str, Type[FileMatch]]:
+    d = dict((t.__name__, t) for t in all_subclasses(FileMatch))
+    return d
+
+def names(matches: Iterable[FileMatch]) -> Iterable[str]:
+    return [m.name for m in matches]
+
+class PatchingError(Exception):
+    pass
+
+class OverLappingPatchesError(PatchingError):
+    pass
+
+def apply_patches(s: str, patches: Iterable[Patch]) -> str:
+    """Apply a sequence of patches to string
+
+    >>> apply_patches('abcdefg', [Patch(2,2,'xxx'), Patch(0, 1, 'yy')])
+    'yybxxxcdefg'
+    """
+    r = StringIO()
+    last = 0
+    def patch_sort_key(item: Tuple[int, Patch]) -> Tuple[int, int, int]:
+        """Patches are sorted by byte position,
+        patches at the same byte position are applied in the order
+        they were generated.
+        """
+        i,p = item
+        return (p.start, p.end, i)
+
+    for i,p in sorted(enumerate(patches), key=patch_sort_key):
+        DBG("Applying patch at position %d (%s) - %d (%s): %r",
+            p.start, line_col(s, p.start),
+            p.end, line_col(s, p.end),
+            p.replacement)
+        if last > p.start:
+            raise OverLappingPatchesError("Overlapping patch at position %d (%s), last patch at %d (%s)" % \
+                (p.start, line_col(s, p.start), last, line_col(s, last)))
+        r.write(s[last:p.start])
+        r.write(p.replacement)
+        last = p.end
+    r.write(s[last:])
+    return r.getvalue()
+
+class RegexpScanner:
+    def __init__(self) -> None:
+        self.match_index: Dict[Type[Any], List[FileMatch]] = {}
+        self.match_name_index: Dict[Tuple[Type[Any], str, str], Optional[FileMatch]] = {}
+
+    def _matches_of_type(self, klass: Type[Any]) -> Iterable[FileMatch]:
+        raise NotImplementedError()
+
+    def matches_of_type(self, t: Type[T]) -> List[T]:
+        if t not in self.match_index:
+            self.match_index[t] = list(self._matches_of_type(t))
+        return self.match_index[t] # type: ignore
+
+    def find_matches(self, t: Type[T], name: str, group: str='name') -> List[T]:
+        indexkey = (t, name, group)
+        if indexkey in self.match_name_index:
+            return self.match_name_index[indexkey] # type: ignore
+        r: List[T] = []
+        for m in self.matches_of_type(t):
+            assert isinstance(m, FileMatch)
+            if m.getgroup(group) == name:
+                r.append(m) # type: ignore
+        self.match_name_index[indexkey] = r # type: ignore
+        return r
+
+    def find_match(self, t: Type[T], name: str, group: str='name') -> Optional[T]:
+        l = self.find_matches(t, name, group)
+        if not l:
+            return None
+        if len(l) > 1:
+            logger.warn("multiple matches found for %r (%s=%r)", t, group, name)
+            return None
+        return l[0]
+
+    def reset_index(self) -> None:
+        self.match_index.clear()
+        self.match_name_index.clear()
+
+class FileInfo(RegexpScanner):
+    filename: Path
+    original_content: Optional[str] = None
+
+    def __init__(self, files: 'FileList', filename: os.PathLike, force:bool=False) -> None:
+        super().__init__()
+        self.allfiles = files
+        self.filename = Path(filename)
+        self.patches: List[Patch] = []
+        self.force = force
+
+    def __repr__(self) -> str:
+        return f''
+
+    def filename_matches(self, name: str) -> bool:
+        nameparts = Path(name).parts
+        return self.filename.parts[-len(nameparts):] == nameparts
+
+    def line_col(self, start: int) -> LineAndColumn:
+        """Return line and column for a match object inside original_content"""
+        return line_col(self.original_content, start)
+
+    def _matches_of_type(self, klass: Type[Any]) -> List[FileMatch]:
+        """Build FileMatch objects for each match of regexp"""
+        if not hasattr(klass, 'regexp') or klass.regexp is None:
+            return []
+        assert hasattr(klass, 'regexp')
+        DBG("%s: scanning for %s", self.filename, klass.__name__)
+        DBG("regexp: %s", klass.regexp)
+        matches = [klass(self, m) for m in klass.finditer(self.original_content)]
+        DBG('%s: %d matches found for %s: %s', self.filename, len(matches),
+            klass.__name__,' '.join(names(matches)))
+        return matches
+
+    def find_match(self, t: Type[T], name: str, group: str='name') -> Optional[T]:
+        for m in self.matches_of_type(t):
+            assert isinstance(m, FileMatch)
+            if m.getgroup(group) == name:
+                return m # type: ignore
+        return None
+
+    def reset_content(self, s:str):
+        self.original_content = s
+        self.patches.clear()
+        self.reset_index()
+        self.allfiles.reset_index()
+
+    def load(self) -> None:
+        if self.original_content is not None:
+            return
+        with open(self.filename, 'rt') as f:
+            self.reset_content(f.read())
+
+    @property
+    def all_matches(self) -> Iterable[FileMatch]:
+        lists = list(self.match_index.values())
+        return (m for l in lists
+                  for m in l)
+
+    def gen_patches(self, matches: List[FileMatch]) -> None:
+        for m in matches:
+            DBG("Generating patches for %r", m)
+            for i,p in enumerate(m.gen_patches()):
+                DBG("patch %d generated by %r:", i, m)
+                DBG("replace contents at %s-%s with %r",
+                    self.line_col(p.start), self.line_col(p.end), p.replacement)
+                self.patches.append(p)
+
+    def scan_for_matches(self, class_names: Optional[List[str]]=None) -> Iterable[FileMatch]:
+        DBG("class names: %r", class_names)
+        class_dict = match_class_dict()
+        if class_names is None:
+            DBG("default class names")
+            class_names = list(name for name,klass in class_dict.items()
+                               if klass.has_replacement_rule())
+        DBG("class_names: %r", class_names)
+        for cn in class_names:
+            matches = self.matches_of_type(class_dict[cn])
+            DBG('%d matches found for %s: %s',
+                    len(matches), cn, ' '.join(names(matches)))
+            yield from matches
+
+    def apply_patches(self) -> None:
+        """Replace self.original_content after applying patches from self.patches"""
+        self.reset_content(self.get_patched_content())
+
+    def get_patched_content(self) -> str:
+        assert self.original_content is not None
+        return apply_patches(self.original_content, self.patches)
+
+    def write_to_file(self, f: IO[str]) -> None:
+        f.write(self.get_patched_content())
+
+    def write_to_filename(self, filename: os.PathLike) -> None:
+        with open(filename, 'wt') as of:
+            self.write_to_file(of)
+
+    def patch_inplace(self) -> None:
+        newfile = self.filename.with_suffix('.changed')
+        self.write_to_filename(newfile)
+        os.rename(newfile, self.filename)
+
+    def show_diff(self) -> None:
+        with NamedTemporaryFile('wt') as f:
+            self.write_to_file(f)
+            f.flush()
+            subprocess.call(['diff', '-u', self.filename, f.name])
+
+    def ref(self):
+        return TypeInfoReference
+
+class FileList(RegexpScanner):
+    def __init__(self):
+        super().__init__()
+        self.files: List[FileInfo] = []
+
+    def extend(self, *args, **kwargs):
+        self.files.extend(*args, **kwargs)
+
+    def __iter__(self):
+        return iter(self.files)
+
+    def _matches_of_type(self, klass: Type[Any]) -> Iterable[FileMatch]:
+        return chain(*(f._matches_of_type(klass) for f in self.files))
+
+    def find_file(self, name: str) -> Optional[FileInfo]:
+        """Get file with path ending with @name"""
+        for f in self.files:
+            if f.filename_matches(name):
+                return f
+        else:
+            return None
+
+    def one_pass(self, class_names: List[str]) -> int:
+        total_patches = 0
+        for f in self.files:
+            INFO("Scanning file %s", f.filename)
+            matches = list(f.scan_for_matches(class_names))
+            INFO("Generating patches for file %s", f.filename)
+            f.gen_patches(matches)
+            total_patches += len(f.patches)
+        if total_patches:
+            for f in self.files:
+                try:
+                    f.apply_patches()
+                except PatchingError:
+                    logger.exception("%s: failed to patch file", f.filename)
+        return total_patches
+
+    def patch_content(self, max_passes, class_names: List[str]) -> None:
+        """Multi-pass content patching loop
+
+        We run multiple passes because there are rules that will
+        delete init functions once they become empty.
+        """
+        passes = 0
+        total_patches  = 0
+        DBG("max_passes: %r", max_passes)
+        while not max_passes or max_passes <= 0 or passes < max_passes:
+            passes += 1
+            INFO("Running pass: %d", passes)
+            count = self.one_pass(class_names)
+            DBG("patch content: pass %d: %d patches generated", passes, count)
+            total_patches += count
+        DBG("%d patches applied total in %d passes", total_patches, passes)
diff --git a/scripts/codeconverter/codeconverter/qom_macros.py b/scripts/codeconverter/codeconverter/qom_macros.py
new file mode 100644
index 000000000..2d2f2055a
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/qom_macros.py
@@ -0,0 +1,861 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+import re
+from itertools import chain
+from typing import *
+
+from .regexps import *
+from .patching import *
+from .utils import *
+
+import logging
+logger = logging.getLogger(__name__)
+DBG = logger.debug
+INFO = logger.info
+WARN = logger.warning
+
+# simple expressions:
+
+RE_CONSTANT = OR(RE_STRING, RE_NUMBER)
+
+class DefineDirective(FileMatch):
+    """Match any #define directive"""
+    regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER), r'\b')
+
+class ExpressionDefine(FileMatch):
+    """Simple #define preprocessor directive for an expression"""
+    regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER),
+               CPP_SPACE, NAMED('value', RE_EXPRESSION), r'[ \t]*\n')
+
+    def provided_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('constant', self.group('name'))
+
+class ConstantDefine(ExpressionDefine):
+    """Simple #define preprocessor directive for a number or string constant"""
+    regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER),
+               CPP_SPACE, NAMED('value', RE_CONSTANT), r'[ \t]*\n')
+
+
+class TypeIdentifiers(NamedTuple):
+    """Type names found in type declarations"""
+    # TYPE_MYDEVICE
+    typename: Optional[str]
+    # MYDEVICE
+    uppercase: Optional[str] = None
+    # MyDevice
+    instancetype: Optional[str] = None
+    # MyDeviceClass
+    classtype: Optional[str] = None
+    # my_device
+    lowercase: Optional[str] = None
+
+    def allfields(self):
+        return tuple(getattr(self, f) for f in self._fields)
+
+    def merge(self, other: 'TypeIdentifiers') -> Optional['TypeIdentifiers']:
+        """Check if identifiers match, return new identifier with complete list"""
+        if any(not opt_compare(a, b) for a,b in zip(self, other)):
+            return None
+        return TypeIdentifiers(*(merge(a, b) for a,b in zip(self, other)))
+
+    def __str__(self) -> str:
+        values = ((f, getattr(self, f)) for f in self._fields)
+        s = ', '.join('%s=%s' % (f,v) for f,v in values if v is not None)
+        return f'{s}'
+
+    def check_consistency(self) -> List[str]:
+        """Check if identifiers are consistent with each other,
+        return list of problems (or empty list if everything seems consistent)
+        """
+        r = []
+        if self.typename is None:
+            r.append("typename (TYPE_MYDEVICE) is unavailable")
+
+        if self.uppercase is None:
+            r.append("uppercase name is unavailable")
+
+        if (self.instancetype is not None
+            and self.classtype is not None
+            and self.classtype != f'{self.instancetype}Class'):
+                r.append("class typedef %s doesn't match instance typedef %s" %
+                         (self.classtype, self.instancetype))
+
+        if (self.uppercase is not None
+            and self.typename is not None
+            and f'TYPE_{self.uppercase}' != self.typename):
+            r.append("uppercase name (%s) doesn't match type name (%s)" %
+                     (self.uppercase, self.typename))
+
+        return r
+
+class TypedefMatch(FileMatch):
+    """typedef declaration"""
+    def provided_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('type', self.group('name'))
+
+class SimpleTypedefMatch(TypedefMatch):
+    """Simple typedef declaration
+    (no replacement rules)"""
+    regexp = S(r'^[ \t]*typedef', SP,
+               NAMED('typedef_type', RE_TYPE), SP,
+               NAMED('name', RE_IDENTIFIER), r'\s*;[ \t]*\n')
+
+RE_MACRO_DEFINE = S(r'^[ \t]*#\s*define\s+', NAMED('name', RE_IDENTIFIER),
+                    r'\s*\(\s*', RE_IDENTIFIER, r'\s*\)', CPP_SPACE)
+
+RE_STRUCT_ATTRIBUTE = r'QEMU_PACKED'
+
+# This doesn't parse the struct definitions completely, it just assumes
+# the closing brackets are going to be in an unindented line:
+RE_FULL_STRUCT = S('struct', SP, M(RE_IDENTIFIER, n='?', name='structname'), SP,
+                   NAMED('body', r'{\n',
+                         # acceptable inside the struct body:
+                         # - lines starting with space or tab
+                         # - empty lines
+                         # - preprocessor directives
+                         # - comments
+                         OR(r'[ \t][^\n]*\n',
+                            r'#[^\n]*\n',
+                            r'\n',
+                            S(r'[ \t]*', RE_COMMENT, r'[ \t]*\n'),
+                            repeat='*?'),
+                         r'}', M(RE_STRUCT_ATTRIBUTE, SP, n='*')))
+RE_STRUCT_TYPEDEF = S(r'^[ \t]*typedef', SP, RE_FULL_STRUCT, SP,
+                      NAMED('name', RE_IDENTIFIER), r'\s*;[ \t]*\n')
+
+class FullStructTypedefMatch(TypedefMatch):
+    """typedef struct [SomeStruct] { ...} SomeType
+    Will be replaced by separate struct declaration + typedef
+    """
+    regexp = RE_STRUCT_TYPEDEF
+
+    def make_structname(self) -> str:
+        """Make struct name for struct+typedef split"""
+        name = self.group('structname')
+        if not name:
+            name = self.name
+        return name
+
+    def strip_typedef(self) -> Patch:
+        """generate patch that will strip typedef from the struct declartion
+
+        The caller is responsible for readding the typedef somewhere else.
+        """
+        name = self.make_structname()
+        body = self.group('body')
+        return self.make_patch(f'struct {name} {body};\n')
+
+    def make_simple_typedef(self) -> str:
+        structname = self.make_structname()
+        name = self.name
+        return f'typedef struct {structname} {name};\n'
+
+    def move_typedef(self, position) -> Iterator[Patch]:
+        """Generate patches to move typedef elsewhere"""
+        yield self.strip_typedef()
+        yield Patch(position, position, self.make_simple_typedef())
+
+    def split_typedef(self) -> Iterator[Patch]:
+        """Split into struct definition + typedef in-place"""
+        yield self.strip_typedef()
+        yield self.append(self.make_simple_typedef())
+
+class StructTypedefSplit(FullStructTypedefMatch):
+    """split struct+typedef declaration"""
+    def gen_patches(self) -> Iterator[Patch]:
+        if self.group('structname'):
+            yield from self.split_typedef()
+
+class DuplicatedTypedefs(SimpleTypedefMatch):
+    """Delete ALL duplicate typedefs (unsafe)"""
+    def gen_patches(self) -> Iterable[Patch]:
+        other_td = [td for td in chain(self.file.matches_of_type(SimpleTypedefMatch),
+                                       self.file.matches_of_type(FullStructTypedefMatch))
+                    if td.name == self.name]
+        DBG("other_td: %r", other_td)
+        if any(td.start() < self.start() for td in other_td):
+            # patch only if handling the first typedef
+            return
+        for td in other_td:
+            if isinstance(td, SimpleTypedefMatch):
+                DBG("other td: %r", td.match.groupdict())
+                if td.group('typedef_type') != self.group('typedef_type'):
+                    yield td.make_removal_patch()
+            elif isinstance(td, FullStructTypedefMatch):
+                DBG("other td: %r", td.match.groupdict())
+                if self.group('typedef_type') == 'struct '+td.group('structname'):
+                    yield td.strip_typedef()
+
+class QOMDuplicatedTypedefs(DuplicatedTypedefs):
+    """Delete duplicate typedefs if used by QOM type"""
+    def gen_patches(self) -> Iterable[Patch]:
+        qom_macros = [TypeCheckMacro, DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers]
+        qom_matches = chain(*(self.file.matches_of_type(t) for t in qom_macros))
+        in_use = any(RequiredIdentifier('type', self.name) in m.required_identifiers()
+                     for m in qom_matches)
+        if in_use:
+            yield from DuplicatedTypedefs.gen_patches(self)
+
+class QOMStructTypedefSplit(FullStructTypedefMatch):
+    """split struct+typedef declaration if used by QOM type"""
+    def gen_patches(self) -> Iterator[Patch]:
+        qom_macros = [TypeCheckMacro, DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers]
+        qom_matches = chain(*(self.file.matches_of_type(t) for t in qom_macros))
+        in_use = any(RequiredIdentifier('type', self.name) in m.required_identifiers()
+                     for m in qom_matches)
+        if in_use:
+            yield from self.split_typedef()
+
+def typedefs(file: FileInfo) -> Iterable[TypedefMatch]:
+    return (cast(TypedefMatch, m)
+            for m in chain(file.matches_of_type(SimpleTypedefMatch),
+                           file.matches_of_type(FullStructTypedefMatch)))
+
+def find_typedef(f: FileInfo, name: Optional[str]) -> Optional[TypedefMatch]:
+    if not name:
+        return None
+    for td in typedefs(f):
+        if td.name == name:
+            return td
+    return None
+
+CHECKER_MACROS = ['OBJECT_CHECK', 'OBJECT_CLASS_CHECK', 'OBJECT_GET_CLASS']
+CheckerMacroName = Literal['OBJECT_CHECK', 'OBJECT_CLASS_CHECK', 'OBJECT_GET_CLASS']
+
+RE_CHECK_MACRO = \
+    S(RE_MACRO_DEFINE,
+      OR(*CHECKER_MACROS, name='checker'),
+      M(r'\s*\(\s*', OR(NAMED('typedefname', RE_IDENTIFIER), RE_TYPE, name='c_type'), r'\s*,', CPP_SPACE,
+        OPTIONAL_PARS(RE_IDENTIFIER), r',', CPP_SPACE,
+        NAMED('qom_typename', RE_IDENTIFIER), r'\s*\)\n',
+        n='?', name='check_args'))
+
+EXPECTED_CHECKER_SUFFIXES: List[Tuple[CheckerMacroName, str]] = [
+    ('OBJECT_GET_CLASS', '_GET_CLASS'),
+    ('OBJECT_CLASS_CHECK', '_CLASS'),
+]
+
+class TypeCheckMacro(FileMatch):
+    """OBJECT_CHECK/OBJECT_CLASS_CHECK/OBJECT_GET_CLASS macro definitions
+    Will be replaced by DECLARE_*_CHECKERS macro
+    """
+    regexp = RE_CHECK_MACRO
+
+    @property
+    def checker(self) -> CheckerMacroName:
+        """Name of checker macro being used"""
+        return self.group('checker') # type: ignore
+
+    @property
+    def typedefname(self) -> Optional[str]:
+        return self.group('typedefname')
+
+    def find_typedef(self) -> Optional[TypedefMatch]:
+        return find_typedef(self.file, self.typedefname)
+
+    def sanity_check(self) -> None:
+        DBG("groups: %r", self.match.groups())
+        if not self.group('check_args'):
+            self.warn("type check macro not parsed completely: %s", self.name)
+            return
+        DBG("type identifiers: %r", self.type_identifiers)
+        if self.typedefname and self.find_typedef() is None:
+            self.warn("typedef used by %s not found", self.name)
+
+    def find_matching_macros(self) -> List['TypeCheckMacro']:
+        """Find other check macros that generate the same macro names
+
+        The returned list will always be sorted.
+        """
+        my_ids = self.type_identifiers
+        assert my_ids
+        return [m for m in self.file.matches_of_type(TypeCheckMacro)
+                if m.type_identifiers is not None
+                   and my_ids.uppercase is not None
+                   and (my_ids.uppercase == m.type_identifiers.uppercase
+                        or my_ids.typename == m.type_identifiers.typename)]
+
+    def merge_ids(self, matches: List['TypeCheckMacro']) -> Optional[TypeIdentifiers]:
+        """Try to merge info about type identifiers from all matches in a list"""
+        if not matches:
+            return None
+        r = matches[0].type_identifiers
+        if r is None:
+            return None
+        for m in matches[1:]:
+            assert m.type_identifiers
+            new = r.merge(m.type_identifiers)
+            if new is None:
+                self.warn("macro %s identifiers (%s) don't match macro %s (%s)",
+                          matches[0].name, r, m.name, m.type_identifiers)
+                return None
+            r = new
+        return r
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        if self.type_identifiers is None:
+            return
+        # to make sure typedefs will be moved above all related macros,
+        # return dependencies from all of them, not just this match
+        for m in self.find_matching_macros():
+            yield RequiredIdentifier('type', m.group('c_type'))
+            yield RequiredIdentifier('constant', m.group('qom_typename'))
+
+    @property
+    def type_identifiers(self) -> Optional[TypeIdentifiers]:
+        """Extract type identifier information from match"""
+        typename = self.group('qom_typename')
+        c_type = self.group('c_type')
+        if not typename or not c_type:
+            return None
+        typedef = self.group('typedefname')
+        classtype = None
+        instancetype = None
+        uppercase = None
+        expected_suffix = dict(EXPECTED_CHECKER_SUFFIXES).get(self.checker)
+
+        # here the available data depends on the checker macro being called:
+        # - we need to remove the suffix from the macro name
+        # - depending on the macro type, we know the class type name, or
+        #   the instance type name
+        if self.checker in ('OBJECT_GET_CLASS', 'OBJECT_CLASS_CHECK'):
+            classtype = c_type
+        elif self.checker == 'OBJECT_CHECK':
+            instancetype = c_type
+            uppercase = self.name
+        else:
+            assert False
+        if expected_suffix and self.name.endswith(expected_suffix):
+            uppercase = self.name[:-len(expected_suffix)]
+        return TypeIdentifiers(typename=typename, classtype=classtype,
+                               instancetype=instancetype, uppercase=uppercase)
+
+    def gen_patches(self) -> Iterable[Patch]:
+        # the implementation is a bit tricky because we need to group
+        # macros dealing with the same type into a single declaration
+        if self.type_identifiers is None:
+            self.warn("couldn't extract type information from macro %s", self.name)
+            return
+
+        if self.name == 'INTERFACE_CLASS':
+            # INTERFACE_CLASS is special and won't be patched
+            return
+
+        for checker,suffix in EXPECTED_CHECKER_SUFFIXES:
+            if self.name.endswith(suffix):
+                if self.checker != checker:
+                    self.warn("macro %s is using macro %s instead of %s", self.name, self.checker, checker)
+                    return
+                break
+
+        matches = self.find_matching_macros()
+        DBG("found %d matching macros: %s", len(matches), ' '.join(m.name for m in matches))
+        # we will generate patches only when processing the first macro:
+        if matches[0].start != self.start:
+            DBG("skipping %s (will patch when handling %s)", self.name, matches[0].name)
+            return
+
+
+        ids = self.merge_ids(matches)
+        if ids is None:
+            DBG("type identifier mismatch, won't patch %s", self.name)
+            return
+
+        if not ids.uppercase:
+            self.warn("macro %s doesn't follow the expected name pattern", self.name)
+            return
+        if not ids.typename:
+            self.warn("macro %s: couldn't extract type name", self.name)
+            return
+
+        #issues = ids.check_consistency()
+        #if issues:
+        #    for i in issues:
+        #        self.warn("inconsistent identifiers: %s", i)
+
+        names = [n for n in (ids.instancetype, ids.classtype, ids.uppercase, ids.typename)
+                 if n is not None]
+        if len(set(names)) != len(names):
+            self.warn("duplicate names used by macro: %r", ids)
+            return
+
+        assert ids.classtype or ids.instancetype
+        assert ids.typename
+        assert ids.uppercase
+        if ids.classtype and ids.instancetype:
+            new_decl = (f'DECLARE_OBJ_CHECKERS({ids.instancetype}, {ids.classtype},\n'
+                        f'                     {ids.uppercase}, {ids.typename})\n')
+        elif ids.classtype:
+            new_decl = (f'DECLARE_CLASS_CHECKERS({ids.classtype}, {ids.uppercase},\n'
+                        f'                       {ids.typename})\n')
+        elif ids.instancetype:
+            new_decl = (f'DECLARE_INSTANCE_CHECKER({ids.instancetype}, {ids.uppercase},\n'
+                        f'                         {ids.typename})\n')
+        else:
+            assert False
+
+        # we need to ensure the typedefs are already available
+        issues = []
+        for t in [ids.instancetype, ids.classtype]:
+            if not t:
+                continue
+            if re.fullmatch(RE_STRUCT_TYPE, t):
+                self.info("type %s is not a typedef", t)
+                continue
+            td = find_typedef(self.file, t)
+            #if not td and self.allfiles.find_file('include/qemu/typedefs.h'):
+            #
+            if not td:
+                # it is OK if the typedef is in typedefs.h
+                f = self.allfiles.find_file('include/qemu/typedefs.h')
+                if f and find_typedef(f, t):
+                    self.info("typedef %s found in typedefs.h", t)
+                    continue
+
+                issues.append("couldn't find typedef %s" % (t))
+            elif td.start() > self.start():
+                issues.append("typedef %s need to be moved earlier in the file" % (td.name))
+
+        for issue in issues:
+            self.warn(issue)
+
+        if issues and not self.file.force:
+            return
+
+        # delete all matching macros and add new declaration:
+        for m in matches:
+            yield m.make_patch('')
+        for issue in issues:
+            yield self.prepend("/* FIXME: %s */\n" % (issue))
+        yield self.append(new_decl)
+
+class InterfaceCheckMacro(FileMatch):
+    """Type checking macro using INTERFACE_CHECK
+    Will be replaced by DECLARE_INTERFACE_CHECKER
+    """
+    regexp = S(RE_MACRO_DEFINE,
+               'INTERFACE_CHECK',
+               r'\s*\(\s*', OR(NAMED('instancetype', RE_IDENTIFIER), RE_TYPE, name='c_type'),
+               r'\s*,', CPP_SPACE,
+               OPTIONAL_PARS(RE_IDENTIFIER), r',', CPP_SPACE,
+               NAMED('qom_typename', RE_IDENTIFIER), r'\s*\)\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('type', self.group('instancetype'))
+        yield RequiredIdentifier('constant', self.group('qom_typename'))
+
+    def gen_patches(self) -> Iterable[Patch]:
+        if self.file.filename_matches('qom/object.h'):
+            self.debug("skipping object.h")
+            return
+
+        typename = self.group('qom_typename')
+        uppercase = self.name
+        instancetype = self.group('instancetype')
+        c = f"DECLARE_INTERFACE_CHECKER({instancetype}, {uppercase},\n"+\
+            f"                          {typename})\n"
+        yield self.make_patch(c)
+
+
+class TypeDeclaration(FileMatch):
+    """Parent class to all type declarations"""
+    @property
+    def instancetype(self) -> Optional[str]:
+        return self.getgroup('instancetype')
+
+    @property
+    def classtype(self) -> Optional[str]:
+        return self.getgroup('classtype')
+
+    @property
+    def typename(self) -> Optional[str]:
+        return self.getgroup('typename')
+
+class TypeCheckerDeclaration(TypeDeclaration):
+    """Parent class to all type checker declarations"""
+    @property
+    def typename(self) -> str:
+        return self.group('typename')
+
+    @property
+    def uppercase(self) -> str:
+        return self.group('uppercase')
+
+class DeclareInstanceChecker(TypeCheckerDeclaration):
+    """DECLARE_INSTANCE_CHECKER use"""
+    #TODO: replace lonely DECLARE_INSTANCE_CHECKER with DECLARE_OBJ_CHECKERS
+    #      if all types are found.
+    #      This will require looking up the correct class type in the TypeInfo
+    #      structs in another file
+    regexp = S(r'^[ \t]*DECLARE_INSTANCE_CHECKER\s*\(\s*',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('constant', self.group('typename'))
+        yield RequiredIdentifier('type', self.group('instancetype'))
+
+class DeclareInterfaceChecker(TypeCheckerDeclaration):
+    """DECLARE_INTERFACE_CHECKER use"""
+    regexp = S(r'^[ \t]*DECLARE_INTERFACE_CHECKER\s*\(\s*',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('constant', self.group('typename'))
+        yield RequiredIdentifier('type', self.group('instancetype'))
+
+class DeclareInstanceType(TypeDeclaration):
+    """DECLARE_INSTANCE_TYPE use"""
+    regexp = S(r'^[ \t]*DECLARE_INSTANCE_TYPE\s*\(\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('instancetype', RE_TYPE), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('type', self.group('instancetype'))
+
+class DeclareClassType(TypeDeclaration):
+    """DECLARE_CLASS_TYPE use"""
+    regexp = S(r'^[ \t]*DECLARE_CLASS_TYPE\s*\(\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('classtype', RE_TYPE), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('type', self.group('classtype'))
+
+
+
+class DeclareClassCheckers(TypeCheckerDeclaration):
+    """DECLARE_CLASS_CHECKER use"""
+    regexp = S(r'^[ \t]*DECLARE_CLASS_CHECKERS\s*\(\s*',
+               NAMED('classtype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('constant', self.group('typename'))
+        yield RequiredIdentifier('type', self.group('classtype'))
+
+class DeclareObjCheckers(TypeCheckerDeclaration):
+    """DECLARE_OBJ_CHECKERS use"""
+    #TODO: detect when OBJECT_DECLARE_SIMPLE_TYPE can be used
+    regexp = S(r'^[ \t]*DECLARE_OBJ_CHECKERS\s*\(\s*',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('classtype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def required_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', '"qom/object.h"')
+        yield RequiredIdentifier('constant', self.group('typename'))
+        yield RequiredIdentifier('type', self.group('classtype'))
+        yield RequiredIdentifier('type', self.group('instancetype'))
+
+class TypeDeclarationFixup(FileMatch):
+    """Common base class for code that will look at a set of type declarations"""
+    regexp = RE_FILE_BEGIN
+    def gen_patches(self) -> Iterable[Patch]:
+        if self.file.filename_matches('qom/object.h'):
+            self.debug("skipping object.h")
+            return
+
+        # group checkers by uppercase name:
+        decl_types: List[Type[TypeDeclaration]] = [DeclareInstanceChecker, DeclareInstanceType,
+                                                   DeclareClassCheckers, DeclareClassType,
+                                                   DeclareObjCheckers]
+        checker_dict: Dict[str, List[TypeDeclaration]] = {}
+        for t in decl_types:
+            for m in self.file.matches_of_type(t):
+                checker_dict.setdefault(m.group('uppercase'), []).append(m)
+        self.debug("checker_dict: %r", checker_dict)
+        for uppercase,checkers in checker_dict.items():
+            fields = ('instancetype', 'classtype', 'uppercase', 'typename')
+            fvalues = dict((field, set(getattr(m, field) for m in checkers
+                                       if getattr(m, field, None) is not None))
+                            for field in fields)
+            for field,values in fvalues.items():
+                if len(values) > 1:
+                    for c in checkers:
+                        c.warn("%s mismatch (%s)", field, ' '.join(values))
+                    return
+
+            field_dict = dict((f, v.pop() if v else None) for f,v in fvalues.items())
+            yield from self.gen_patches_for_type(uppercase, checkers, field_dict)
+
+    def find_conflicts(self, uppercase: str, checkers: List[TypeDeclaration]) -> bool:
+        """Look for conflicting declarations that would make it unsafe to add new ones"""
+        conflicting: List[FileMatch] = []
+        # conflicts in the same file:
+        conflicting.extend(chain(self.file.find_matches(DefineDirective, uppercase),
+                                 self.file.find_matches(DeclareInterfaceChecker, uppercase, 'uppercase'),
+                                 self.file.find_matches(DeclareClassType, uppercase, 'uppercase'),
+                                 self.file.find_matches(DeclareInstanceType, uppercase, 'uppercase')))
+
+        # conflicts in another file:
+        conflicting.extend(o for o in chain(self.allfiles.find_matches(DeclareInstanceChecker, uppercase, 'uppercase'),
+                                            self.allfiles.find_matches(DeclareClassCheckers, uppercase, 'uppercase'),
+                                            self.allfiles.find_matches(DeclareInterfaceChecker, uppercase, 'uppercase'),
+                                            self.allfiles.find_matches(DefineDirective, uppercase))
+                           if o is not None and o.file != self.file
+                               # if both are .c files, there's no conflict at all:
+                               and not (o.file.filename.suffix == '.c' and
+                                       self.file.filename.suffix == '.c'))
+
+        if conflicting:
+            for c in checkers:
+                c.warn("skipping due to conflicting %s macro", uppercase)
+            for o in conflicting:
+                if o is None:
+                    continue
+                o.warn("conflicting %s macro is here", uppercase)
+            return True
+
+        return False
+
+    def gen_patches_for_type(self, uppercase: str,
+                             checkers: List[TypeDeclaration],
+                             fields: Dict[str, Optional[str]]) -> Iterable[Patch]:
+        """Should be reimplemented by subclasses"""
+        return
+        yield
+
+class DeclareVoidTypes(TypeDeclarationFixup):
+    """Add DECLARE_*_TYPE(..., void) when there's no declared type"""
+    regexp = RE_FILE_BEGIN
+    def gen_patches_for_type(self, uppercase: str,
+                             checkers: List[TypeDeclaration],
+                             fields: Dict[str, Optional[str]]) -> Iterable[Patch]:
+        if self.find_conflicts(uppercase, checkers):
+            return
+
+        #_,last_checker = max((m.start(), m) for m in checkers)
+        _,first_checker = min((m.start(), m) for m in checkers)
+
+        if not any(m.instancetype for m in checkers):
+            yield first_checker.prepend(f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n')
+        if not any(m.classtype for m in checkers):
+            yield first_checker.prepend(f'DECLARE_CLASS_TYPE({uppercase}, void)\n')
+
+        #if not all(len(v) == 1 for v in fvalues.values()):
+        #    return
+        #
+        #final_values = dict((field, values.pop())
+        #                    for field,values in fvalues.items())
+        #s = (f"DECLARE_OBJ_CHECKERS({final_values['instancetype']}, {final_values['classtype']},\n"+
+        #        f"                     {final_values['uppercase']}, {final_values['typename']})\n")
+        #for c in checkers:
+        #    yield c.make_removal_patch()
+        #yield last_checker.append(s)
+
+
+class AddDeclareTypeName(TypeDeclarationFixup):
+    """Add DECLARE_TYPE_NAME declarations if necessary"""
+    def gen_patches_for_type(self, uppercase: str,
+                             checkers: List[TypeDeclaration],
+                             fields: Dict[str, Optional[str]]) -> Iterable[Patch]:
+        typename = fields.get('typename')
+        if typename is None:
+            self.warn("typename unavailable")
+            return
+        if typename == f'TYPE_{uppercase}':
+            self.info("already using TYPE_%s as type name", uppercase)
+            return
+        if self.file.find_match(DeclareTypeName, uppercase, 'uppercase'):
+            self.info("type name for %s already declared", uppercase)
+            return
+        _,first_checker = min((m.start(), m) for m in checkers)
+        s = f'DECLARE_TYPE_NAME({uppercase}, {typename})\n'
+        yield first_checker.prepend(s)
+
+class TrivialClassStruct(FileMatch):
+    """Trivial class struct"""
+    regexp = S(r'^[ \t]*struct\s*', NAMED('name', RE_IDENTIFIER),
+               r'\s*{\s*', NAMED('parent_struct', RE_IDENTIFIER), r'\s*parent(_class)?\s*;\s*};\n')
+
+class DeclareTypeName(FileMatch):
+    """DECLARE_TYPE_NAME usage"""
+    regexp = S(r'^[ \t]*DECLARE_TYPE_NAME\s*\(',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'),
+               r'\s*\);?[ \t]*\n')
+
+class ObjectDeclareType(TypeCheckerDeclaration):
+    """OBJECT_DECLARE_TYPE usage
+    Will be replaced with OBJECT_DECLARE_SIMPLE_TYPE if possible
+    """
+    regexp = S(r'^[ \t]*OBJECT_DECLARE_TYPE\s*\(',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('classtype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    def gen_patches(self):
+        DBG("groups: %r", self.match.groupdict())
+        trivial_struct = self.file.find_match(TrivialClassStruct, self.group('classtype'))
+        if trivial_struct:
+            d = self.match.groupdict().copy()
+            d['parent_struct'] = trivial_struct.group("parent_struct")
+            yield trivial_struct.make_removal_patch()
+            c = ("OBJECT_DECLARE_SIMPLE_TYPE(%(instancetype)s, %(lowercase)s,\n"
+                 "                           %(uppercase)s, %(parent_struct)s)\n" % d)
+            yield self.make_patch(c)
+
+class ObjectDeclareSimpleType(TypeCheckerDeclaration):
+    """OBJECT_DECLARE_SIMPLE_TYPE usage"""
+    regexp = S(r'^[ \t]*OBJECT_DECLARE_SIMPLE_TYPE\s*\(',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+class OldStyleObjectDeclareSimpleType(TypeCheckerDeclaration):
+    """OBJECT_DECLARE_SIMPLE_TYPE usage (old API)"""
+    regexp = S(r'^[ \t]*OBJECT_DECLARE_SIMPLE_TYPE\s*\(',
+               NAMED('instancetype', RE_TYPE), r'\s*,\s*',
+               NAMED('lowercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('parent_classtype', RE_TYPE), SP,
+               r'\)[ \t]*;?[ \t]*\n')
+
+    @property
+    def classtype(self) -> Optional[str]:
+        instancetype = self.instancetype
+        assert instancetype
+        return f"{instancetype}Class"
+
+def find_typename_uppercase(files: FileList, typename: str) -> Optional[str]:
+    """Try to find what's the right MODULE_OBJ_NAME for a given type name"""
+    decl = files.find_match(DeclareTypeName, name=typename, group='typename')
+    if decl:
+        return decl.group('uppercase')
+    if typename.startswith('TYPE_'):
+        return typename[len('TYPE_'):]
+    return None
+
+def find_type_checkers(files:FileList, name:str, group:str='uppercase') -> Iterable[TypeCheckerDeclaration]:
+    """Find usage of DECLARE*CHECKER macro"""
+    c: Type[TypeCheckerDeclaration]
+    for c in (DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers, ObjectDeclareType, ObjectDeclareSimpleType):
+        yield from files.find_matches(c, name=name, group=group)
+
+class Include(FileMatch):
+    """#include directive"""
+    regexp = RE_INCLUDE
+    def provided_identifiers(self) -> Iterable[RequiredIdentifier]:
+        yield RequiredIdentifier('include', self.group('includepath'))
+
+class InitialIncludes(FileMatch):
+    """Initial #include block"""
+    regexp = S(RE_FILE_BEGIN,
+               M(SP, RE_COMMENTS,
+                 r'^[ \t]*#[ \t]*ifndef[ \t]+', RE_IDENTIFIER, r'[ \t]*\n',
+                 n='?', name='ifndef_block'),
+               M(SP, RE_COMMENTS,
+                 OR(RE_INCLUDE, RE_SIMPLEDEFINE),
+                 n='*', name='includes'))
+
+class SymbolUserList(NamedTuple):
+    definitions: List[FileMatch]
+    users: List[FileMatch]
+
+class MoveSymbols(FileMatch):
+    """Handle missing symbols
+    - Move typedefs and defines when necessary
+    - Add missing #include lines when necessary
+    """
+    regexp = RE_FILE_BEGIN
+
+    def gen_patches(self) -> Iterator[Patch]:
+        if self.file.filename_matches('qom/object.h'):
+            self.debug("skipping object.h")
+            return
+
+        index: Dict[RequiredIdentifier, SymbolUserList] = {}
+        definition_classes = [SimpleTypedefMatch, FullStructTypedefMatch, ConstantDefine, Include]
+        user_classes = [TypeCheckMacro, DeclareObjCheckers, DeclareInstanceChecker, DeclareClassCheckers, InterfaceCheckMacro]
+
+        # first we scan for all symbol definitions and usage:
+        for dc in definition_classes:
+            defs = self.file.matches_of_type(dc)
+            for d in defs:
+                DBG("scanning %r", d)
+                for i in d.provided_identifiers():
+                    index.setdefault(i, SymbolUserList([], [])).definitions.append(d)
+        DBG("index: %r", list(index.keys()))
+        for uc in user_classes:
+            users = self.file.matches_of_type(uc)
+            for u in users:
+                for i in u.required_identifiers():
+                    index.setdefault(i, SymbolUserList([], [])).users.append(u)
+
+        # validate all symbols:
+        for i,ul in index.items():
+            if not ul.users:
+                # unused symbol
+                continue
+
+            # symbol not defined
+            if len(ul.definitions) == 0:
+                if i.type == 'include':
+                   includes, = self.file.matches_of_type(InitialIncludes)
+                   #FIXME: don't do this if we're already inside qom/object.h
+                   yield includes.append(f'#include {i.name}\n')
+                else:
+                    u.warn("definition of %s %s not found in file", i.type, i.name)
+                continue
+
+            # symbol defined twice:
+            if len(ul.definitions) > 1:
+                ul.definitions[1].warn("%s defined twice", i.name)
+                ul.definitions[0].warn("previously defined here")
+                continue
+
+            # symbol defined.  check if all users are after its definition:
+            assert len(ul.definitions) == 1
+            definition = ul.definitions[0]
+            DBG("handling repositioning of %r", definition)
+            earliest = min(ul.users, key=lambda u: u.start())
+            if earliest.start() > definition.start():
+                DBG("%r is OK", definition)
+                continue
+
+            DBG("%r needs to be moved", definition)
+            if isinstance(definition, SimpleTypedefMatch) \
+               or isinstance(definition, ConstantDefine):
+                # simple typedef or define can be moved directly:
+                yield definition.make_removal_patch()
+                yield earliest.prepend(definition.group(0))
+            elif isinstance(definition, FullStructTypedefMatch) \
+                 and definition.group('structname'):
+                # full struct typedef is more complex: we need to remove
+                # the typedef
+                yield from definition.move_typedef(earliest.start())
+            else:
+                definition.warn("definition of %s %s needs to be moved earlier in the file", i.type, i.name)
+                earliest.warn("definition of %s %s is used here", i.type, i.name)
+
+
+class EmptyPreprocessorConditional(FileMatch):
+    """Delete empty preprocessor conditionals"""
+    regexp = r'^[ \t]*#(if|ifdef)[ \t].*\n+[ \t]*#endif[ \t]*\n'
+    def gen_patches(self) -> Iterable[Patch]:
+        yield self.make_removal_patch()
diff --git a/scripts/codeconverter/codeconverter/qom_type_info.py b/scripts/codeconverter/codeconverter/qom_type_info.py
new file mode 100644
index 000000000..255cb5992
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/qom_type_info.py
@@ -0,0 +1,969 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+import re
+from .regexps import *
+from .patching import *
+from .utils import *
+from .qom_macros import *
+
+TI_FIELDS = [ 'name', 'parent', 'abstract', 'interfaces',
+    'instance_size', 'instance_init', 'instance_post_init', 'instance_finalize',
+    'class_size', 'class_init', 'class_base_init', 'class_data']
+
+RE_TI_FIELD_NAME = OR(*TI_FIELDS)
+
+RE_TI_FIELD_INIT = S(r'[ \t]*', NAMED('comments', RE_COMMENTS),
+                     r'\.', NAMED('field', RE_TI_FIELD_NAME), r'\s*=\s*',
+                     NAMED('value', RE_EXPRESSION), r'[ \t]*,?[ \t]*\n')
+RE_TI_FIELDS = M(RE_TI_FIELD_INIT)
+
+RE_TYPEINFO_START = S(r'^[ \t]*', M(r'(static|const)\s+', name='modifiers'), r'TypeInfo\s+',
+                      NAMED('name', RE_IDENTIFIER), r'\s*=\s*{[ \t]*\n')
+
+ParsedArray = List[str]
+ParsedInitializerValue = Union[str, ParsedArray]
+class InitializerValue(NamedTuple):
+    raw: str
+    parsed: Optional[ParsedInitializerValue]
+    match: Optional[Match]
+
+class ArrayItem(FileMatch):
+    regexp = RE_ARRAY_ITEM
+
+class ArrayInitializer(FileMatch):
+    regexp = RE_ARRAY
+
+    def parsed(self) -> ParsedArray:
+        #DBG('parse_array: %r', m.group(0))
+        return [m.group('arrayitem') for m in self.group_finditer(ArrayItem, 'arrayitems')]
+
+class FieldInitializer(FileMatch):
+    regexp = RE_TI_FIELD_INIT
+
+    @property
+    def raw(self) -> str:
+        return self.group('value')
+
+    @property
+    def parsed(self) -> ParsedInitializerValue:
+        parsed: ParsedInitializerValue = self.raw
+        #DBG("parse_initializer_value: %r", s)
+        array = self.try_group_match(ArrayInitializer, 'value')
+        if array:
+            assert isinstance(array, ArrayInitializer)
+            return array.parsed()
+        return parsed
+
+TypeInfoInitializers = Dict[str, FieldInitializer]
+
+class TypeDefinition(FileMatch):
+    """
+    Common base class for type definitions (TypeInfo variables or OBJECT_DEFINE* macros)
+    """
+    @property
+    def instancetype(self) -> Optional[str]:
+        return self.group('instancetype')
+
+    @property
+    def classtype(self) -> Optional[str]:
+        return self.group('classtype')
+
+    @property
+    def uppercase(self) -> Optional[str]:
+        return self.group('uppercase')
+
+    @property
+    def parent_uppercase(self) -> str:
+        return self.group('parent_uppercase')
+
+    @property
+    def initializers(self) -> Optional[TypeInfoInitializers]:
+        if getattr(self, '_inititalizers', None):
+            self._initializers: TypeInfoInitializers
+            return self._initializers
+        fields = self.group('fields')
+        if fields is None:
+            return None
+        d = dict((fm.group('field'), fm)
+                  for fm in self.group_finditer(FieldInitializer, 'fields'))
+        self._initializers = d # type: ignore
+        return self._initializers
+
+
+class TypeInfoVar(TypeDefinition):
+    """TypeInfo variable declaration with initializer"""
+    regexp = S(NAMED('begin', RE_TYPEINFO_START),
+               M(NAMED('fields', RE_TI_FIELDS),
+                 NAMED('endcomments', SP, RE_COMMENTS),
+                 NAMED('end', r'};?\n'),
+                 n='?', name='fullspec'))
+
+    def is_static(self) -> bool:
+        return 'static' in self.group('modifiers')
+
+    def is_const(self) -> bool:
+        return 'const' in self.group('modifiers')
+
+    def is_full(self) -> bool:
+        return bool(self.group('fullspec'))
+
+    def get_initializers(self) -> TypeInfoInitializers:
+        """Helper for code that needs to deal with missing initializer info"""
+        if self.initializers is None:
+            return {}
+        return self.initializers
+
+    def get_raw_initializer_value(self, field: str, default: str = '') -> str:
+        initializers = self.get_initializers()
+        if field in initializers:
+            return initializers[field].raw
+        else:
+            return default
+
+    @property
+    def typename(self) -> Optional[str]:
+        return self.get_raw_initializer_value('name')
+
+    @property
+    def uppercase(self) -> Optional[str]:
+        typename = self.typename
+        if not typename:
+            return None
+        if not typename.startswith('TYPE_'):
+            return None
+        return typename[len('TYPE_'):]
+
+    @property
+    def classtype(self) -> Optional[str]:
+        class_size = self.get_raw_initializer_value('class_size')
+        if not class_size:
+            return None
+        m = re.fullmatch(RE_SIZEOF, class_size)
+        if not m:
+            return None
+        return m.group('sizeoftype')
+
+    @property
+    def instancetype(self) -> Optional[str]:
+        instance_size = self.get_raw_initializer_value('instance_size')
+        if not instance_size:
+            return None
+        m = re.fullmatch(RE_SIZEOF, instance_size)
+        if not m:
+            return None
+        return m.group('sizeoftype')
+
+
+    #def extract_identifiers(self) -> Optional[TypeIdentifiers]:
+    #    """Try to extract identifiers from names being used"""
+    #    DBG("extracting idenfiers from %s", self.name)
+        #uppercase = None
+        #if typename and re.fullmatch(RE_IDENTIFIER, typename) and typename.startswith("TYPE_"):
+        #    uppercase = typename[len('TYPE_'):]
+        #lowercase = None
+        #funcs = set()
+        #prefixes = set()
+        #for field,suffix in [('instance_init', '_init'),
+        #                     ('instance_finalize', '_finalize'),
+        #                     ('class_init', '_class_init')]:
+        #    if field not in values:
+        #        continue
+        #    func = values[field].raw
+        #    funcs.add(func)
+        #    if func.endswith(suffix):
+        #        prefixes.add(func[:-len(suffix)])
+        #    else:
+        #        self.warn("function name %s doesn't have expected %s suffix",
+        #                  func, suffix)
+        #if len(prefixes) == 1:
+        #    lowercase = prefixes.pop()
+        #elif len(prefixes) > 1:
+        #    self.warn("inconsistent function names: %s", ' '.join(funcs))
+
+        #.parent = TYPE_##PARENT_MODULE_OBJ_NAME, \
+        #return TypeIdentifiers(typename=typename,
+        #                       uppercase=uppercase, lowercase=lowercase,
+        #                       instancetype=instancetype, classtype=classtype)
+
+    def append_field(self, field: str, value: str) -> Patch:
+        """Generate patch appending a field initializer"""
+        content = f'    .{field} = {value},\n'
+        fm = self.group_match('fields')
+        assert fm
+        return fm.append(content)
+
+    def patch_field(self, field: str, replacement: str) -> Patch:
+        """Generate patch replacing a field initializer"""
+        initializers = self.initializers
+        assert initializers
+        value = initializers.get(field)
+        assert value
+        return value.make_patch(replacement)
+
+    def remove_field(self, field: str) -> Iterable[Patch]:
+        initializers = self.initializers
+        assert initializers
+        if field in initializers:
+            yield self.patch_field(field, '')
+
+    def remove_fields(self, *fields: str) -> Iterable[Patch]:
+        for f in fields:
+            yield from self.remove_field(f)
+
+    def patch_field_value(self, field: str, replacement: str) -> Patch:
+        """Replace just the value of a field initializer"""
+        initializers = self.initializers
+        assert initializers
+        value = initializers.get(field)
+        assert value
+        vm = value.group_match('value')
+        assert vm
+        return vm.make_patch(replacement)
+
+
+class RemoveRedundantClassSize(TypeInfoVar):
+    """Remove class_size when using OBJECT_DECLARE_SIMPLE_TYPE"""
+    def gen_patches(self) -> Iterable[Patch]:
+        initializers = self.initializers
+        if initializers is None:
+            return
+        if 'class_size' not in initializers:
+            return
+
+        self.debug("Handling %s", self.name)
+        m = re.fullmatch(RE_SIZEOF, initializers['class_size'].raw)
+        if not m:
+            self.warn("%s class_size is not sizeof?", self.name)
+            return
+        classtype = m.group('sizeoftype')
+        if not classtype.endswith('Class'):
+            self.warn("%s class size type (%s) is not *Class?", self.name, classtype)
+            return
+        self.debug("classtype is %s", classtype)
+        instancetype = classtype[:-len('Class')]
+        self.debug("intanceypte is %s", instancetype)
+        self.debug("searching for simpletype declaration using %s as InstanceType", instancetype)
+        decl = self.allfiles.find_match(OldStyleObjectDeclareSimpleType,
+                                        instancetype, 'instancetype')
+        if not decl:
+            self.debug("No simpletype declaration found for %s", instancetype)
+            return
+        self.debug("Found simple type declaration")
+        decl.debug("declaration is here")
+        yield from self.remove_field('class_size')
+
+class RemoveDeclareSimpleTypeArg(OldStyleObjectDeclareSimpleType):
+    """Remove class_size when using OBJECT_DECLARE_SIMPLE_TYPE"""
+    def gen_patches(self) -> Iterable[Patch]:
+        c = (f'OBJECT_DECLARE_SIMPLE_TYPE({self.group("instancetype")}, {self.group("lowercase")},\n'
+             f'                           {self.group("uppercase")})\n')
+        yield self.make_patch(c)
+
+class UseDeclareTypeExtended(TypeInfoVar):
+    """Replace TypeInfo variable with OBJECT_DEFINE_TYPE_EXTENDED"""
+    def gen_patches(self) -> Iterable[Patch]:
+        # this will just ensure the caches for find_match() and matches_for_type()
+        # will be loaded in advance:
+        find_type_checkers(self.allfiles, 'xxxxxxxxxxxxxxxxx')
+
+        if not self.is_static():
+            self.info("Skipping non-static TypeInfo variable")
+            return
+
+        type_info_macro = self.file.find_match(TypeInfoMacro, self.name)
+        if not type_info_macro:
+            self.warn("TYPE_INFO(%s) line not found", self.name)
+            return
+
+        values = self.initializers
+        if values is None:
+            return
+        if 'name' not in values:
+            self.warn("name not set in TypeInfo variable %s", self.name)
+            return
+
+        typename = values['name'].raw
+
+        if 'parent' not in values:
+            self.warn("parent not set in TypeInfo variable %s", self.name)
+            return
+        parent_typename = values['parent'].raw
+
+        instancetype = None
+        if 'instance_size' in values:
+            m = re.fullmatch(RE_SIZEOF, values['instance_size'].raw)
+            if m:
+                instancetype = m.group('sizeoftype')
+            else:
+                self.warn("can't extract instance type in TypeInfo variable %s", self.name)
+                self.warn("instance_size is set to: %r", values['instance_size'].raw)
+                return
+
+        classtype = None
+        if 'class_size' in values:
+            m = re.fullmatch(RE_SIZEOF, values['class_size'].raw)
+            if m:
+                classtype = m.group('sizeoftype')
+            else:
+                self.warn("can't extract class type in TypeInfo variable %s", self.name)
+                self.warn("class_size is set to: %r", values['class_size'].raw)
+                return
+
+        #for t in (typename, parent_typename):
+        #    if not re.fullmatch(RE_IDENTIFIER, t):
+        #        self.info("type name is not a macro/constant")
+        #        if instancetype or classtype:
+        #            self.warn("macro/constant type name is required for instance/class type")
+        #        if not self.file.force:
+        #            return
+
+        # Now, the challenge is to find out the right MODULE_OBJ_NAME for the
+        # type and for the parent type
+        self.info("TypeInfo variable for %s is here", typename)
+        uppercase = find_typename_uppercase(self.allfiles, typename)
+        if not uppercase:
+            self.info("Can't find right uppercase name for %s", typename)
+            if instancetype or classtype:
+                self.warn("Can't find right uppercase name for %s", typename)
+                self.warn("This will make type validation difficult in the future")
+            return
+
+        parent_uppercase = find_typename_uppercase(self.allfiles, parent_typename)
+        if not parent_uppercase:
+            self.info("Can't find right uppercase name for parent type (%s)", parent_typename)
+            if instancetype or classtype:
+                self.warn("Can't find right uppercase name for parent type (%s)", parent_typename)
+                self.warn("This will make type validation difficult in the future")
+            return
+
+        ok = True
+
+        #checkers: List[TypeCheckerDeclaration] = list(find_type_checkers(self.allfiles, uppercase))
+        #for c in checkers:
+        #    c.info("instance type checker declaration (%s) is here", c.group('uppercase'))
+        #if not checkers:
+        #    self.info("No type checkers declared for %s", uppercase)
+        #    if instancetype or classtype:
+        #        self.warn("Can't find where type checkers for %s (%s) are declared.  We will need them to validate sizes of %s",
+        #                  typename, uppercase, self.name)
+
+        if not instancetype:
+            instancetype = 'void'
+        if not classtype:
+            classtype = 'void'
+
+        #checker_instancetypes = set(c.instancetype for c in checkers
+        #                            if c.instancetype is not None)
+        #if len(checker_instancetypes) > 1:
+        #    self.warn("ambiguous set of type checkers")
+        #    for c in checkers:
+        #        c.warn("instancetype is %s here", c.instancetype)
+        #    ok = False
+        #elif len(checker_instancetypes) == 1:
+        #    checker_instancetype = checker_instancetypes.pop()
+        #    DBG("checker instance type: %r", checker_instancetype)
+        #    if instancetype != checker_instancetype:
+        #        self.warn("type at instance_size is %r.  Should instance_size be set to sizeof(%s) ?",
+        #                instancetype, checker_instancetype)
+        #        ok = False
+        #else:
+        #    if instancetype != 'void':
+        #        self.warn("instance type checker for %s (%s) not found", typename, instancetype)
+        #        ok = False
+
+        #checker_classtypes = set(c.classtype for c in checkers
+        #                         if c.classtype is not None)
+        #if len(checker_classtypes) > 1:
+        #    self.warn("ambiguous set of type checkers")
+        #    for c in checkers:
+        #        c.warn("classtype is %s here", c.classtype)
+        #    ok = False
+        #elif len(checker_classtypes) == 1:
+        #    checker_classtype = checker_classtypes.pop()
+        #    DBG("checker class type: %r", checker_classtype)
+        #    if classtype != checker_classtype:
+        #        self.warn("type at class_size is %r.  Should class_size be set to sizeof(%s) ?",
+        #                classtype, checker_classtype)
+        #        ok = False
+        #else:
+        #    if classtype != 'void':
+        #        self.warn("class type checker for %s (%s) not found", typename, classtype)
+        #        ok = False
+
+        #if not ok:
+        #    for c in checkers:
+        #        c.warn("Type checker declaration for %s (%s) is here",
+        #                           typename, type(c).__name__)
+        #    return
+
+        #if parent_decl is None:
+        #    self.warn("Can't find where parent type %s is declared", parent_typename)
+
+        #yield self.prepend(f'DECLARE_TYPE_NAME({uppercase}, {typename})\n')
+        #if not instancetype:
+        #    yield self.prepend(f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n')
+        #if not classtype:
+        #    yield self.prepend(f'DECLARE_CLASS_TYPE({uppercase}, void)\n')
+        self.info("%s can be patched!", self.name)
+        replaced_fields = ['name', 'parent', 'instance_size', 'class_size']
+        begin = self.group_match('begin')
+        newbegin =  f'OBJECT_DEFINE_TYPE_EXTENDED({self.name},\n'
+        newbegin += f'                            {instancetype}, {classtype},\n'
+        newbegin += f'                            {uppercase}, {parent_uppercase}'
+        if set(values.keys()) - set(replaced_fields):
+            newbegin += ',\n'
+        yield begin.make_patch(newbegin)
+        yield from self.remove_fields(*replaced_fields)
+        end = self.group_match('end')
+        yield end.make_patch(')\n')
+        yield type_info_macro.make_removal_patch()
+
+class ObjectDefineTypeExtended(TypeDefinition):
+    """OBJECT_DEFINE_TYPE_EXTENDED usage"""
+    regexp = S(r'^[ \t]*OBJECT_DEFINE_TYPE_EXTENDED\s*\(\s*',
+               NAMED('name', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('instancetype', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('classtype', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('parent_uppercase', RE_IDENTIFIER),
+               M(r',\s*\n',
+                 NAMED('fields', RE_TI_FIELDS),
+                 n='?'),
+               r'\s*\);?\n?')
+
+class ObjectDefineType(TypeDefinition):
+    """OBJECT_DEFINE_TYPE usage"""
+    regexp = S(r'^[ \t]*OBJECT_DEFINE_TYPE\s*\(\s*',
+               NAMED('lowercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*',
+               NAMED('parent_uppercase', RE_IDENTIFIER),
+               M(r',\s*\n',
+                 NAMED('fields', RE_TI_FIELDS),
+                 n='?'),
+               r'\s*\);?\n?')
+
+def find_type_definitions(files: FileList, uppercase: str) -> Iterable[TypeDefinition]:
+    types: List[Type[TypeDefinition]] = [TypeInfoVar, ObjectDefineType, ObjectDefineTypeExtended]
+    for t in types:
+        for m in files.matches_of_type(t):
+            m.debug("uppercase: %s", m.uppercase)
+    yield from (m for t in types
+                  for m in files.matches_of_type(t)
+                if m.uppercase == uppercase)
+
+class AddDeclareVoidClassType(TypeDeclarationFixup):
+    """Will add DECLARE_CLASS_TYPE(..., void) if possible"""
+    def gen_patches_for_type(self, uppercase: str,
+                             checkers: List[TypeDeclaration],
+                             fields: Dict[str, Optional[str]]) -> Iterable[Patch]:
+        defs = list(find_type_definitions(self.allfiles, uppercase))
+        if len(defs) > 1:
+            self.warn("multiple definitions for %s", uppercase)
+            for d in defs:
+                d.warn("definition found here")
+            return
+        elif len(defs) == 0:
+            self.warn("type definition for %s not found", uppercase)
+            return
+        d = defs[0]
+        if d.classtype is None:
+            d.info("definition for %s has classtype, skipping", uppercase)
+            return
+        class_type_checkers = [c for c in checkers
+                               if c.classtype is not None]
+        if class_type_checkers:
+            for c in class_type_checkers:
+                c.warn("class type checker for %s is present here", uppercase)
+            return
+
+        _,last_checker = max((m.start(), m) for m in checkers)
+        s = f'DECLARE_CLASS_TYPE({uppercase}, void)\n'
+        yield last_checker.append(s)
+
+class AddDeclareVoidInstanceType(FileMatch):
+    """Will add DECLARE_INSTANCE_TYPE(..., void) if possible"""
+    regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE,
+               NAMED('name', r'TYPE_[a-zA-Z0-9_]+\b'),
+               CPP_SPACE, r'.*\n')
+
+    def gen_patches(self) -> Iterable[Patch]:
+        assert self.name.startswith('TYPE_')
+        uppercase = self.name[len('TYPE_'):]
+        defs = list(find_type_definitions(self.allfiles, uppercase))
+        if len(defs) > 1:
+            self.warn("multiple definitions for %s", uppercase)
+            for d in defs:
+                d.warn("definition found here")
+            return
+        elif len(defs) == 0:
+            self.warn("type definition for %s not found", uppercase)
+            return
+        d = defs[0]
+        instancetype = d.instancetype
+        if instancetype is not None and instancetype != 'void':
+            return
+
+        instance_checkers = [c for c in find_type_checkers(self.allfiles, uppercase)
+                             if c.instancetype]
+        if instance_checkers:
+            d.warn("instance type checker for %s already declared", uppercase)
+            for c in instance_checkers:
+                c.warn("instance checker for %s is here", uppercase)
+            return
+
+        s = f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n'
+        yield self.append(s)
+
+class AddObjectDeclareType(DeclareObjCheckers):
+    """Will add OBJECT_DECLARE_TYPE(...) if possible"""
+    def gen_patches(self) -> Iterable[Patch]:
+        uppercase = self.uppercase
+        typename = self.group('typename')
+        instancetype = self.group('instancetype')
+        classtype = self.group('classtype')
+
+        if typename != f'TYPE_{uppercase}':
+            self.warn("type name mismatch: %s vs %s", typename, uppercase)
+            return
+
+        typedefs = [(t,self.allfiles.find_matches(SimpleTypedefMatch, t))
+                    for t in (instancetype, classtype)]
+        for t,tds in typedefs:
+            if not tds:
+                self.warn("typedef %s not found", t)
+                return
+            for td in tds:
+                td_type = td.group('typedef_type')
+                if td_type != f'struct {t}':
+                    self.warn("typedef mismatch: %s is defined as %s", t, td_type)
+                    td.warn("typedef is here")
+                    return
+
+        # look for reuse of same struct type
+        other_instance_checkers = [c for c in find_type_checkers(self.allfiles, instancetype, 'instancetype')
+                                if c.uppercase != uppercase]
+        if other_instance_checkers:
+            self.warn("typedef %s is being reused", instancetype)
+            for ic in other_instance_checkers:
+                ic.warn("%s is reused here", instancetype)
+            if not self.file.force:
+                return
+
+        decl_types: List[Type[TypeDeclaration]] = [DeclareClassCheckers, DeclareObjCheckers]
+        class_decls = [m for t in decl_types
+                       for m in self.allfiles.find_matches(t, uppercase, 'uppercase')]
+
+        defs = list(find_type_definitions(self.allfiles, uppercase))
+        if len(defs) > 1:
+            self.warn("multiple definitions for %s", uppercase)
+            for d in defs:
+                d.warn("definition found here")
+            if not self.file.force:
+                return
+        elif len(defs) == 0:
+            self.warn("type definition for %s not found", uppercase)
+            if not self.file.force:
+                return
+        else:
+            d = defs[0]
+            if d.instancetype != instancetype:
+                self.warn("mismatching instance type for %s (%s)", uppercase, instancetype)
+                d.warn("instance type declared here (%s)", d.instancetype)
+                if not self.file.force:
+                    return
+            if d.classtype != classtype:
+                self.warn("mismatching class type for %s (%s)", uppercase, classtype)
+                d.warn("class type declared here (%s)", d.classtype)
+                if not self.file.force:
+                    return
+
+        assert self.file.original_content
+        for t,tds in typedefs:
+            assert tds
+            for td in tds:
+                if td.file is not self.file:
+                    continue
+
+                # delete typedefs that are truly redundant:
+                # 1) defined after DECLARE_OBJ_CHECKERS
+                if td.start() > self.start():
+                    yield td.make_removal_patch()
+                # 2) defined before DECLARE_OBJ_CHECKERS, but unused
+                elif not re.search(r'\b'+t+r'\b', self.file.original_content[td.end():self.start()]):
+                    yield td.make_removal_patch()
+
+        c = (f'OBJECT_DECLARE_TYPE({instancetype}, {classtype}, {uppercase})\n')
+        yield self.make_patch(c)
+
+class AddObjectDeclareSimpleType(DeclareInstanceChecker):
+    """Will add OBJECT_DECLARE_SIMPLE_TYPE(...) if possible"""
+    def gen_patches(self) -> Iterable[Patch]:
+        uppercase = self.uppercase
+        typename = self.group('typename')
+        instancetype = self.group('instancetype')
+
+        if typename != f'TYPE_{uppercase}':
+            self.warn("type name mismatch: %s vs %s", typename, uppercase)
+            return
+
+        typedefs = [(t,self.allfiles.find_matches(SimpleTypedefMatch, t))
+                    for t in (instancetype,)]
+        for t,tds in typedefs:
+            if not tds:
+                self.warn("typedef %s not found", t)
+                return
+            for td in tds:
+                td_type = td.group('typedef_type')
+                if td_type != f'struct {t}':
+                    self.warn("typedef mismatch: %s is defined as %s", t, td_type)
+                    td.warn("typedef is here")
+                    return
+
+        # look for reuse of same struct type
+        other_instance_checkers = [c for c in find_type_checkers(self.allfiles, instancetype, 'instancetype')
+                                if c.uppercase != uppercase]
+        if other_instance_checkers:
+            self.warn("typedef %s is being reused", instancetype)
+            for ic in other_instance_checkers:
+                ic.warn("%s is reused here", instancetype)
+            if not self.file.force:
+                return
+
+        decl_types: List[Type[TypeDeclaration]] = [DeclareClassCheckers, DeclareObjCheckers]
+        class_decls = [m for t in decl_types
+                       for m in self.allfiles.find_matches(t, uppercase, 'uppercase')]
+        if class_decls:
+            self.warn("class type declared for %s", uppercase)
+            for cd in class_decls:
+                cd.warn("class declaration found here")
+            return
+
+        defs = list(find_type_definitions(self.allfiles, uppercase))
+        if len(defs) > 1:
+            self.warn("multiple definitions for %s", uppercase)
+            for d in defs:
+                d.warn("definition found here")
+            if not self.file.force:
+                return
+        elif len(defs) == 0:
+            self.warn("type definition for %s not found", uppercase)
+            if not self.file.force:
+                return
+        else:
+            d = defs[0]
+            if d.instancetype != instancetype:
+                self.warn("mismatching instance type for %s (%s)", uppercase, instancetype)
+                d.warn("instance type declared here (%s)", d.instancetype)
+                if not self.file.force:
+                    return
+            if d.classtype:
+                self.warn("class type set for %s", uppercase)
+                d.warn("class type declared here")
+                if not self.file.force:
+                    return
+
+        assert self.file.original_content
+        for t,tds in typedefs:
+            assert tds
+            for td in tds:
+                if td.file is not self.file:
+                    continue
+
+                # delete typedefs that are truly redundant:
+                # 1) defined after DECLARE_OBJ_CHECKERS
+                if td.start() > self.start():
+                    yield td.make_removal_patch()
+                # 2) defined before DECLARE_OBJ_CHECKERS, but unused
+                elif not re.search(r'\b'+t+r'\b', self.file.original_content[td.end():self.start()]):
+                    yield td.make_removal_patch()
+
+        c = (f'OBJECT_DECLARE_SIMPLE_TYPE({instancetype}, {uppercase})\n')
+        yield self.make_patch(c)
+
+
+class TypeInfoStringName(TypeInfoVar):
+    """Replace hardcoded type names with TYPE_ constant"""
+    def gen_patches(self) -> Iterable[Patch]:
+        values = self.initializers
+        if values is None:
+            return
+        if 'name' not in values:
+            self.warn("name not set in TypeInfo variable %s", self.name)
+            return
+        typename = values['name'].raw
+        if re.fullmatch(RE_IDENTIFIER, typename):
+            return
+
+        self.warn("name %s is not an identifier", typename)
+        #all_defines = [m for m in self.allfiles.matches_of_type(ExpressionDefine)]
+        #self.debug("all_defines: %r", all_defines)
+        constants = [m for m in self.allfiles.matches_of_type(ExpressionDefine)
+                     if m.group('value').strip() == typename.strip()]
+        if not constants:
+            self.warn("No macro for %s found", typename)
+            return
+        if len(constants) > 1:
+            self.warn("I don't know which macro to use: %r", constants)
+            return
+        yield self.patch_field_value('name', constants[0].name)
+
+class RedundantTypeSizes(TypeInfoVar):
+    """Remove redundant instance_size/class_size from TypeInfo vars"""
+    def gen_patches(self) -> Iterable[Patch]:
+        values = self.initializers
+        if values is None:
+            return
+        if 'name' not in values:
+            self.warn("name not set in TypeInfo variable %s", self.name)
+            return
+        typename = values['name'].raw
+        if 'parent' not in values:
+            self.warn("parent not set in TypeInfo variable %s", self.name)
+            return
+        parent_typename = values['parent'].raw
+
+        if 'instance_size' not in values and 'class_size' not in values:
+            self.debug("no need to validate %s", self.name)
+            return
+
+        instance_decls = find_type_checkers(self.allfiles, typename)
+        if instance_decls:
+            self.debug("won't touch TypeInfo var that has type checkers")
+            return
+
+        parent = find_type_info(self.allfiles, parent_typename)
+        if not parent:
+            self.warn("Can't find TypeInfo for %s", parent_typename)
+            return
+
+        if 'instance_size' in values and parent.get_raw_initializer_value('instance_size') != values['instance_size'].raw:
+            self.info("instance_size mismatch")
+            parent.info("parent type declared here")
+            return
+
+        if 'class_size' in values and parent.get_raw_initializer_value('class_size') != values['class_size'].raw:
+            self.info("class_size mismatch")
+            parent.info("parent type declared here")
+            return
+
+        self.debug("will patch variable %s", self.name)
+
+        if 'instance_size' in values:
+            self.debug("deleting instance_size")
+            yield self.patch_field('instance_size', '')
+
+        if 'class_size' in values:
+            self.debug("deleting class_size")
+            yield self.patch_field('class_size', '')
+
+
+#class TypeInfoVarInitFuncs(TypeInfoVar):
+#    """TypeInfo variable
+#    Will create missing init functions
+#    """
+#    def gen_patches(self) -> Iterable[Patch]:
+#        values = self.initializers
+#        if values is None:
+#            self.warn("type not parsed completely: %s", self.name)
+#            return
+#
+#        macro = self.file.find_match(TypeInfoVar, self.name)
+#        if macro is None:
+#            self.warn("No TYPE_INFO macro for %s", self.name)
+#            return
+#
+#        ids = self.extract_identifiers()
+#        if ids is None:
+#            return
+#
+#        DBG("identifiers extracted: %r", ids)
+#        fields = set(values.keys())
+#        if ids.lowercase:
+#            if 'instance_init' not in fields:
+#                yield self.prepend(('static void %s_init(Object *obj)\n'
+#                                    '{\n'
+#                                    '}\n\n') % (ids.lowercase))
+#                yield self.append_field('instance_init', ids.lowercase+'_init')
+#
+#            if 'instance_finalize' not in fields:
+#                yield self.prepend(('static void %s_finalize(Object *obj)\n'
+#                                    '{\n'
+#                                    '}\n\n') % (ids.lowercase))
+#                yield self.append_field('instance_finalize', ids.lowercase+'_finalize')
+#
+#
+#            if 'class_init' not in fields:
+#                yield self.prepend(('static void %s_class_init(ObjectClass *oc, void *data)\n'
+#                                    '{\n'
+#                                    '}\n\n') % (ids.lowercase))
+#                yield self.append_field('class_init', ids.lowercase+'_class_init')
+
+class TypeInitMacro(FileMatch):
+    """Use of type_init(...) macro"""
+    regexp = S(r'^[ \t]*type_init\s*\(\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);?[ \t]*\n')
+
+class DeleteEmptyTypeInitFunc(TypeInitMacro):
+    """Delete empty function declared using type_init(...)"""
+    def gen_patches(self) -> Iterable[Patch]:
+        fn = self.file.find_match(StaticVoidFunction, self.name)
+        DBG("function for %s: %s", self.name, fn)
+        if fn and fn.body == '':
+            yield fn.make_patch('')
+            yield self.make_patch('')
+
+class StaticVoidFunction(FileMatch):
+    """simple static void function
+    (no replacement rules)
+    """
+    #NOTE: just like RE_FULL_STRUCT, this doesn't parse any of the body contents
+    #      of the function.  Tt will just look for "}" in the beginning of a line
+    regexp = S(r'static\s+void\s+', NAMED('name', RE_IDENTIFIER), r'\s*\(\s*void\s*\)\n',
+               r'{\n',
+               NAMED('body',
+                     # acceptable inside the function body:
+                     # - lines starting with space or tab
+                     # - empty lines
+                     # - preprocessor directives
+                     OR(r'[ \t][^\n]*\n',
+                        r'#[^\n]*\n',
+                        r'\n',
+                        repeat='*')),
+               r'};?\n')
+
+    @property
+    def body(self) -> str:
+        return self.group('body')
+
+    def has_preprocessor_directive(self) -> bool:
+        return bool(re.search(r'^[ \t]*#', self.body, re.MULTILINE))
+
+def find_containing_func(m: FileMatch) -> Optional['StaticVoidFunction']:
+    """Return function containing this match"""
+    for fn in m.file.matches_of_type(StaticVoidFunction):
+        if fn.contains(m):
+            return fn
+    return None
+
+class TypeRegisterStaticCall(FileMatch):
+    """type_register_static() call
+    Will be replaced by TYPE_INFO() macro
+    """
+    regexp = S(r'^[ \t]*', NAMED('func_name', 'type_register_static'),
+               r'\s*\(&\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);[ \t]*\n')
+
+class UseTypeInfo(TypeRegisterStaticCall):
+    """Replace type_register_static() call with TYPE_INFO declaration"""
+    def gen_patches(self) -> Iterable[Patch]:
+        fn = find_containing_func(self)
+        if fn:
+            DBG("%r is inside %r", self, fn)
+            type_init = self.file.find_match(TypeInitMacro, fn.name)
+            if type_init is None:
+                self.warn("can't find type_init(%s) line", fn.name)
+                if not self.file.force:
+                    return
+        else:
+            self.warn("can't identify the function where type_register_static(&%s) is called", self.name)
+            if not self.file.force:
+                return
+
+        #if fn.has_preprocessor_directive() and not self.file.force:
+        #    self.warn("function %s has preprocessor directives, this requires --force", fn.name)
+        #    return
+
+        var = self.file.find_match(TypeInfoVar, self.name)
+        if var is None:
+            self.warn("can't find TypeInfo var declaration for %s", self.name)
+            return
+
+        if not var.is_full():
+            self.warn("variable declaration %s wasn't parsed fully", var.name)
+            if not self.file.force:
+                return
+
+        if fn and fn.contains(var):
+            self.warn("TypeInfo %s variable is inside a function", self.name)
+            if not self.file.force:
+                return
+
+        # delete type_register_static() call:
+        yield self.make_patch('')
+        # append TYPE_REGISTER(...) after variable declaration:
+        yield var.append(f'TYPE_INFO({self.name})\n')
+
+class TypeRegisterCall(FileMatch):
+    """type_register_static() call"""
+    regexp = S(r'^[ \t]*', NAMED('func_name', 'type_register'),
+               r'\s*\(&\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);[ \t]*\n')
+
+class MakeTypeRegisterStatic(TypeRegisterCall):
+    """Make type_register() call static if variable is static const"""
+    def gen_patches(self):
+        var = self.file.find_match(TypeInfoVar, self.name)
+        if var is None:
+            self.warn("can't find TypeInfo var declaration for %s", self.name)
+            return
+        if var.is_static() and var.is_const():
+            yield self.group_match('func_name').make_patch('type_register_static')
+
+class MakeTypeRegisterNotStatic(TypeRegisterStaticCall):
+    """Make type_register() call static if variable is static const"""
+    def gen_patches(self):
+        var = self.file.find_match(TypeInfoVar, self.name)
+        if var is None:
+            self.warn("can't find TypeInfo var declaration for %s", self.name)
+            return
+        if not var.is_static() or not var.is_const():
+            yield self.group_match('func_name').make_patch('type_register')
+
+class TypeInfoMacro(FileMatch):
+    """TYPE_INFO macro usage"""
+    regexp = S(r'^[ \t]*TYPE_INFO\s*\(\s*', NAMED('name', RE_IDENTIFIER), r'\s*\)[ \t]*;?[ \t]*\n')
+
+def find_type_info(files: RegexpScanner, name: str) -> Optional[TypeInfoVar]:
+    ti = [ti for ti in files.matches_of_type(TypeInfoVar)
+            if ti.get_raw_initializer_value('name') == name]
+    DBG("type info vars: %r", ti)
+    if len(ti) > 1:
+        DBG("multiple TypeInfo vars found for %s", name)
+        return None
+    if len(ti) == 0:
+        DBG("no TypeInfo var found for %s", name)
+        return None
+    return ti[0]
+
+class CreateClassStruct(DeclareInstanceChecker):
+    """Replace DECLARE_INSTANCE_CHECKER with OBJECT_DECLARE_SIMPLE_TYPE"""
+    def gen_patches(self) -> Iterable[Patch]:
+        typename = self.group('typename')
+        DBG("looking for TypeInfo variable for %s", typename)
+        var = find_type_info(self.allfiles, typename)
+        if var is None:
+            self.warn("no TypeInfo var found for %s", typename)
+            return
+        assert var.initializers
+        if 'class_size' in var.initializers:
+            self.warn("class size already set for TypeInfo %s", var.name)
+            return
+        classtype = self.group('instancetype')+'Class'
+        return
+        yield
+        #TODO: need to find out what's the parent class type...
+        #yield var.append_field('class_size', f'sizeof({classtype})')
+        #c = (f'OBJECT_DECLARE_SIMPLE_TYPE({instancetype}, {lowercase},\n'
+        #     f'                           MODULE_OBJ_NAME, ParentClassType)\n')
+        #yield self.make_patch(c)
+
+def type_infos(file: FileInfo) -> Iterable[TypeInfoVar]:
+    return file.matches_of_type(TypeInfoVar)
+
+def full_types(file: FileInfo) -> Iterable[TypeInfoVar]:
+    return [t for t in type_infos(file) if t.is_full()]
+
+def partial_types(file: FileInfo) -> Iterable[TypeInfoVar]:
+    return [t for t in type_infos(file) if not t.is_full()]
diff --git a/scripts/codeconverter/codeconverter/regexps.py b/scripts/codeconverter/codeconverter/regexps.py
new file mode 100644
index 000000000..77993cc3b
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/regexps.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+"""Helpers for creation of regular expressions"""
+import re
+
+import logging
+logger = logging.getLogger(__name__)
+DBG = logger.debug
+INFO = logger.info
+WARN = logger.warning
+
+def S(*regexps) -> str:
+    """Just a shortcut to concatenate multiple regexps more easily"""
+    return ''.join(regexps)
+
+def P(*regexps, name=None, capture=False, repeat='') -> str:
+    """Just add parenthesis around regexp(s), with optional name or repeat suffix"""
+    s = S(*regexps)
+    if name:
+        return f'(?P<{name}>{s}){repeat}'
+    elif capture:
+        return f'({s}){repeat}'
+    else:
+        return f'(?:{s}){repeat}'
+
+def NAMED(name, *regexps) -> str:
+    """Make named group using ...) syntax
+
+    >>> NAMED('mygroup', 'xyz', 'abc')
+    '(?Pxyzabc)'
+    """
+    return P(*regexps, name=name)
+
+def OR(*regexps, **kwargs) -> str:
+    """Build (a|b|c) regexp"""
+    return P('|'.join(regexps), **kwargs)
+
+def M(*regexps, n='*', name=None) -> str:
+    """Add repetition qualifier to regexp(s)
+
+    >>> M('a', 'b')
+    '(?:ab)*'
+    >>> M('a' , 'b', n='+')
+    '(?:ab)+'
+    >>> M('a' , 'b', n='{2,3}', name='name')
+    '(?P(?:ab){2,3})'
+    """
+    r = P(*regexps, repeat=n)
+    if name:
+        r = NAMED(name, r)
+    return r
+
+# helper to make parenthesis optional around regexp
+OPTIONAL_PARS = lambda R: OR(S(r'\(\s*', R, r'\s*\)'), R)
+def test_optional_pars():
+    r = OPTIONAL_PARS('abc')+'$'
+    assert re.match(r, 'abc')
+    assert re.match(r, '(abc)')
+    assert not re.match(r, '(abcd)')
+    assert not re.match(r, '(abc')
+    assert not re.match(r, 'abc)')
+
+
+# this disables the MULTILINE flag, so it will match at the
+# beginning of the file:
+RE_FILE_BEGIN = r'(?-m:^)'
+
+# C primitives:
+
+SP = r'\s*'
+
+RE_COMMENT = r'//[^\n]*$|/\*([^*]|\*[^/])*\*/'
+RE_COMMENTS = M(RE_COMMENT + SP)
+
+RE_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])'
+RE_STRING = r'\"([^\"\\]|\\[a-z\"])*\"'
+RE_NUMBER = r'[0-9]+|0x[0-9a-fA-F]+'
+
+# space or escaped newlines:
+CPP_SPACE = OR(r'\s', r'\\\n', repeat='+')
+
+RE_PATH = '[a-zA-Z0-9/_.-]+'
+
+RE_INCLUDEPATH = OR(S(r'\"', RE_PATH, r'\"'),
+                    S(r'<', RE_PATH, r'>'))
+
+RE_INCLUDE = S(r'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH), r'[ \t]*\n')
+RE_SIMPLEDEFINE = S(r'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER, r'[ \t]*\n')
+
+RE_STRUCT_TYPE = S(r'struct\s+', RE_IDENTIFIER)
+RE_TYPE = OR(RE_IDENTIFIER, RE_STRUCT_TYPE)
+
+RE_MACRO_CONCAT = M(S(OR(RE_IDENTIFIER, RE_STRING), SP), n='{2,}')
+
+RE_SIMPLE_VALUE = OR(RE_IDENTIFIER, RE_STRING, RE_NUMBER)
+
+RE_FUN_CALL = S(RE_IDENTIFIER, r'\s*\(\s*', RE_SIMPLE_VALUE, r'\s*\)')
+RE_SIZEOF = S(r'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE), r'\s*\)')
+
+RE_ADDRESS = S(r'&\s*', RE_IDENTIFIER)
+
+RE_ARRAY_ITEM = S(r'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE, n='?')), r'\s*}\s*,?')
+RE_ARRAY_CAST = S(r'\(\s*', RE_IDENTIFIER, r'\s*\[\s*\]\)')
+RE_ARRAY_ITEMS = M(S(RE_ARRAY_ITEM, SP))
+RE_ARRAY = S(M(RE_ARRAY_CAST, n='?'), r'\s*{\s*',
+             NAMED('arrayitems', RE_ARRAY_ITEMS),
+             r'}')
+
+# NOTE: this covers a very small subset of valid expressions
+
+RE_EXPRESSION = OR(RE_SIZEOF, RE_FUN_CALL, RE_MACRO_CONCAT, RE_SIMPLE_VALUE,
+                   RE_ARRAY, RE_ADDRESS)
+
diff --git a/scripts/codeconverter/codeconverter/test_patching.py b/scripts/codeconverter/codeconverter/test_patching.py
new file mode 100644
index 000000000..71dfbd47e
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/test_patching.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+from tempfile import NamedTemporaryFile
+from .patching import FileInfo, FileMatch, Patch, FileList
+from .regexps import *
+
+class BasicPattern(FileMatch):
+    regexp = '[abc]{3}'
+
+    @property
+    def name(self):
+        return self.group(0)
+
+    def replacement(self) -> str:
+        # replace match with the middle character repeated 5 times
+        return self.group(0)[1].upper()*5
+
+def test_pattern_patching():
+    of = NamedTemporaryFile('wt')
+    of.writelines(['one line\n',
+                  'this pattern will be patched: defbbahij\n',
+                  'third line\n',
+                  'another pattern: jihaabfed'])
+    of.flush()
+
+    files = FileList()
+    f = FileInfo(files, of.name)
+    f.load()
+    matches = f.matches_of_type(BasicPattern)
+    assert len(matches) == 2
+    p2 = matches[1]
+
+    # manually add patch, to see if .append() works:
+    f.patches.append(p2.append('XXX'))
+
+    # apply all patches:
+    f.gen_patches(matches)
+    patched = f.get_patched_content()
+    assert patched == ('one line\n'+
+                       'this pattern will be patched: defBBBBBhij\n'+
+                       'third line\n'+
+                       'another pattern: jihAAAAAXXXfed')
+
+class Function(FileMatch):
+    regexp = S(r'BEGIN\s+', NAMED('name', RE_IDENTIFIER), r'\n',
+               r'(.*\n)*?END\n')
+
+class Statement(FileMatch):
+    regexp = S(r'^\s*', NAMED('name', RE_IDENTIFIER), r'\(\)\n')
+
+def test_container_match():
+    of = NamedTemporaryFile('wt')
+    of.writelines(['statement1()\n',
+                   'statement2()\n',
+                   'BEGIN function1\n',
+                   '  statement3()\n',
+                   '  statement4()\n',
+                   'END\n',
+                   'BEGIN function2\n',
+                   '  statement5()\n',
+                   '  statement6()\n',
+                   'END\n',
+                   'statement7()\n'])
+    of.flush()
+
+    files = FileList()
+    f = FileInfo(files, of.name)
+    f.load()
+    assert len(f.matches_of_type(Function)) == 2
+    print(' '.join(m.name for m in f.matches_of_type(Statement)))
+    assert len(f.matches_of_type(Statement)) == 7
+
+    f1 = f.find_match(Function, 'function1')
+    f2 = f.find_match(Function, 'function2')
+    st1 = f.find_match(Statement, 'statement1')
+    st2 = f.find_match(Statement, 'statement2')
+    st3 = f.find_match(Statement, 'statement3')
+    st4 = f.find_match(Statement, 'statement4')
+    st5 = f.find_match(Statement, 'statement5')
+    st6 = f.find_match(Statement, 'statement6')
+    st7 = f.find_match(Statement, 'statement7')
+
+    assert not f1.contains(st1)
+    assert not f1.contains(st2)
+    assert not f1.contains(st2)
+    assert f1.contains(st3)
+    assert f1.contains(st4)
+    assert not f1.contains(st5)
+    assert not f1.contains(st6)
+    assert not f1.contains(st7)
+
+    assert not f2.contains(st1)
+    assert not f2.contains(st2)
+    assert not f2.contains(st2)
+    assert not f2.contains(st3)
+    assert not f2.contains(st4)
+    assert f2.contains(st5)
+    assert f2.contains(st6)
+    assert not f2.contains(st7)
diff --git a/scripts/codeconverter/codeconverter/test_regexps.py b/scripts/codeconverter/codeconverter/test_regexps.py
new file mode 100644
index 000000000..a445634d8
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/test_regexps.py
@@ -0,0 +1,282 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+from .regexps import *
+from .qom_macros import *
+from .qom_type_info import *
+
+def test_res() -> None:
+    def fullmatch(regexp, s):
+        return re.fullmatch(regexp, s, re.MULTILINE)
+
+    assert fullmatch(RE_IDENTIFIER, 'sizeof')
+    assert fullmatch(RE_IDENTIFIER, 'X86CPU')
+    assert fullmatch(RE_FUN_CALL, 'sizeof(X86CPU)')
+    assert fullmatch(RE_IDENTIFIER, 'X86_CPU_TYPE_NAME')
+    assert fullmatch(RE_SIMPLE_VALUE, '"base"')
+    print(RE_FUN_CALL)
+    assert fullmatch(RE_FUN_CALL, 'X86_CPU_TYPE_NAME("base")')
+    print(RE_TI_FIELD_INIT)
+    assert fullmatch(RE_TI_FIELD_INIT, '.name = X86_CPU_TYPE_NAME("base"),\n')
+
+
+    assert fullmatch(RE_MACRO_CONCAT, 'TYPE_ASPEED_GPIO "-ast2600"')
+    assert fullmatch(RE_EXPRESSION, 'TYPE_ASPEED_GPIO "-ast2600"')
+
+    print(RE_MACRO_DEFINE)
+    assert re.search(RE_MACRO_DEFINE, r'''
+    #define OFFSET_CHECK(c)                     \
+    do {                                        \
+        if (!(c)) {                             \
+            goto bad_offset;                    \
+        }                                       \
+    } while (0)
+    ''', re.MULTILINE)
+
+    print(RE_CHECK_MACRO)
+    print(CPP_SPACE)
+    assert not re.match(RE_CHECK_MACRO, r'''
+    #define OFFSET_CHECK(c)                     \
+    do {                                        \
+        if (!(c)) {                             \
+            goto bad_offset;                    \
+        }                                       \
+    } while (0)''', re.MULTILINE)
+
+    print(RE_CHECK_MACRO)
+    assert fullmatch(RE_CHECK_MACRO, r'''#define PCI_DEVICE(obj) \
+                     OBJECT_CHECK(PCIDevice, (obj), TYPE_PCI_DEVICE)
+''')
+    assert fullmatch(RE_CHECK_MACRO, r'''#define COLLIE_MACHINE(obj) \
+                     OBJECT_CHECK(CollieMachineState, obj, TYPE_COLLIE_MACHINE)
+''')
+
+    print(RE_TYPEINFO_START)
+    assert re.search(RE_TYPEINFO_START, r'''
+    cc->open = qmp_chardev_open_file;
+}
+
+static const TypeInfo char_file_type_info = {
+    .name = TYPE_CHARDEV_FILE,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+''', re.MULTILINE)
+    assert re.search(RE_TYPEINFO_START, r'''
+        TypeInfo ti = {
+            .name = armsse_variants[i].name,
+            .parent = TYPE_ARMSSE,
+            .class_init = armsse_class_init,
+            .class_data = (void *)&armsse_variants[i],
+        };''', re.MULTILINE)
+
+    print(RE_ARRAY_ITEM)
+    assert fullmatch(RE_ARRAY_ITEM, '{ TYPE_HOTPLUG_HANDLER },')
+    assert fullmatch(RE_ARRAY_ITEM, '{ TYPE_ACPI_DEVICE_IF },')
+    assert fullmatch(RE_ARRAY_ITEM, '{ }')
+    assert fullmatch(RE_ARRAY_CAST, '(InterfaceInfo[])')
+    assert fullmatch(RE_ARRAY, '''(InterfaceInfo[]) {
+            { TYPE_HOTPLUG_HANDLER },
+            { TYPE_ACPI_DEVICE_IF },
+            { }
+    }''')
+    print(RE_COMMENT)
+    assert fullmatch(RE_COMMENT, r'''/* multi-line
+                                      * comment
+                                      */''')
+
+    print(RE_TI_FIELDS)
+    assert fullmatch(RE_TI_FIELDS,
+    r'''/* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
+        .parent = TYPE_DEVICE,
+''')
+    assert fullmatch(RE_TI_FIELDS, r'''.name = TYPE_TPM_CRB,
+        /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
+        .parent = TYPE_DEVICE,
+        .instance_size = sizeof(CRBState),
+        .class_init  = tpm_crb_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_TPM_IF },
+            { }
+        }
+''')
+    assert fullmatch(RE_TI_FIELDS + SP + RE_COMMENTS,
+        r'''.name = TYPE_PALM_MISC_GPIO,
+            .parent = TYPE_SYS_BUS_DEVICE,
+            .instance_size = sizeof(PalmMiscGPIOState),
+            .instance_init = palm_misc_gpio_init,
+            /*
+             * No class init required: device has no internal state so does not
+             * need to set up reset or vmstate, and has no realize method.
+             */''')
+
+    print(TypeInfoVar.regexp)
+    test_empty = 'static const TypeInfo x86_base_cpu_type_info = {\n'+\
+                 '};\n';
+    assert fullmatch(TypeInfoVar.regexp, test_empty)
+
+    test_simple = r'''
+    static const TypeInfo x86_base_cpu_type_info = {
+        .name = X86_CPU_TYPE_NAME("base"),
+        .parent = TYPE_X86_CPU,
+        .class_init = x86_cpu_base_class_init,
+    };
+    '''
+    assert re.search(TypeInfoVar.regexp, test_simple, re.MULTILINE)
+
+    test_interfaces = r'''
+    static const TypeInfo acpi_ged_info = {
+        .name          = TYPE_ACPI_GED,
+        .parent        = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(AcpiGedState),
+        .instance_init  = acpi_ged_initfn,
+        .class_init    = acpi_ged_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_HOTPLUG_HANDLER },
+            { TYPE_ACPI_DEVICE_IF },
+            { }
+        }
+    };
+    '''
+    assert re.search(TypeInfoVar.regexp, test_interfaces, re.MULTILINE)
+
+    test_comments = r'''
+    static const TypeInfo palm_misc_gpio_info = {
+        .name = TYPE_PALM_MISC_GPIO,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(PalmMiscGPIOState),
+        .instance_init = palm_misc_gpio_init,
+        /*
+         * No class init required: device has no internal state so does not
+         * need to set up reset or vmstate, and has no realize method.
+         */
+    };
+    '''
+    assert re.search(TypeInfoVar.regexp, test_comments, re.MULTILINE)
+
+    test_comments = r'''
+    static const TypeInfo tpm_crb_info = {
+        .name = TYPE_TPM_CRB,
+        /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
+        .parent = TYPE_DEVICE,
+        .instance_size = sizeof(CRBState),
+        .class_init  = tpm_crb_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_TPM_IF },
+            { }
+        }
+    };
+    '''
+    assert re.search(TypeInfoVar.regexp, test_comments, re.MULTILINE)
+
+def test_struct_re():
+    print('---')
+    print(RE_STRUCT_TYPEDEF)
+    assert re.search(RE_STRUCT_TYPEDEF, r'''
+typedef struct TCGState {
+    AccelState parent_obj;
+
+    bool mttcg_enabled;
+    unsigned long tb_size;
+} TCGState;
+''', re.MULTILINE)
+
+    assert re.search(RE_STRUCT_TYPEDEF, r'''
+typedef struct {
+    ISADevice parent_obj;
+
+    QEMUSoundCard card;
+    uint32_t freq;
+    uint32_t port;
+    int ticking[2];
+    int enabled;
+    int active;
+    int bufpos;
+#ifdef DEBUG
+    int64_t exp[2];
+#endif
+    int16_t *mixbuf;
+    uint64_t dexp[2];
+    SWVoiceOut *voice;
+    int left, pos, samples;
+    QEMUAudioTimeStamp ats;
+    FM_OPL *opl;
+    PortioList port_list;
+} AdlibState;
+''', re.MULTILINE)
+
+    false_positive = r'''
+typedef struct dma_pagetable_entry {
+    int32_t frame;
+    int32_t owner;
+} A B C D E;
+struct foo {
+    int x;
+} some_variable;
+'''
+    assert not re.search(RE_STRUCT_TYPEDEF, false_positive, re.MULTILINE)
+
+def test_initial_includes():
+    print(InitialIncludes.regexp)
+    c = '''
+#ifndef HW_FLASH_H
+#define HW_FLASH_H
+
+/* NOR flash devices */
+
+#include "qom/object.h"
+#include "exec/hwaddr.h"
+
+/* pflash_cfi01.c */
+'''
+    print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c))))
+    m = InitialIncludes.domatch(c)
+    assert m
+    print(repr(m.group(0)))
+    assert m.group(0).endswith('#include "exec/hwaddr.h"\n')
+
+    c = '''#ifndef QEMU_VIRTIO_9P_H
+#define QEMU_VIRTIO_9P_H
+
+#include "standard-headers/linux/virtio_9p.h"
+#include "hw/virtio/virtio.h"
+#include "9p.h"
+
+
+'''
+    print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c))))
+    m = InitialIncludes.domatch(c)
+    assert m
+    print(repr(m.group(0)))
+    assert m.group(0).endswith('#include "9p.h"\n')
+
+    c = '''#include "qom/object.h"
+/*
+ * QEMU ES1370 emulation
+...
+ */
+
+/* #define DEBUG_ES1370 */
+/* #define VERBOSE_ES1370 */
+#define SILENT_ES1370
+
+#include "qemu/osdep.h"
+#include "hw/audio/soundhw.h"
+#include "audio/audio.h"
+#include "hw/pci/pci.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "sysemu/dma.h"
+
+/* Missing stuff:
+   SCTRL_P[12](END|ST)INC
+'''
+    print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c))))
+    m = InitialIncludes.domatch(c)
+    assert m
+    print(repr(m.group(0)))
+    assert m.group(0).endswith('#include "sysemu/dma.h"\n')
+
diff --git a/scripts/codeconverter/codeconverter/utils.py b/scripts/codeconverter/codeconverter/utils.py
new file mode 100644
index 000000000..760ab7eec
--- /dev/null
+++ b/scripts/codeconverter/codeconverter/utils.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+from typing import *
+
+import logging
+logger = logging.getLogger(__name__)
+DBG = logger.debug
+INFO = logger.info
+WARN = logger.warning
+
+T = TypeVar('T')
+def opt_compare(a: T, b: T) -> bool:
+    """Compare two values, ignoring mismatches if one of them is None"""
+    return (a is None) or (b is None) or (a == b)
+
+def merge(a: T, b: T) -> T:
+    """Merge two values if they matched using opt_compare()"""
+    assert opt_compare(a, b)
+    if a is None:
+        return b
+    else:
+        return a
+
+def test_comp_merge():
+    assert opt_compare(None, 1) == True
+    assert opt_compare(2, None) == True
+    assert opt_compare(1, 1) == True
+    assert opt_compare(1, 2) == False
+
+    assert merge(None, None) is None
+    assert merge(None, 10) == 10
+    assert merge(10, None) == 10
+    assert merge(10, 10) == 10
+
+
+LineNumber = NewType('LineNumber', int)
+ColumnNumber = NewType('ColumnNumber', int)
+class LineAndColumn(NamedTuple):
+    line: int
+    col: int
+
+    def __str__(self):
+        return '%d:%d' % (self.line, self.col)
+
+def line_col(s, position: int) -> LineAndColumn:
+    """Return line and column for a char position in string
+
+    Character position starts in 0, but lines and columns start in 1.
+    """
+    before = s[:position]
+    lines = before.split('\n')
+    line = len(lines)
+    col = len(lines[-1]) + 1
+    return LineAndColumn(line, col)
+
+def test_line_col():
+    assert line_col('abc\ndefg\nhijkl', 0) == (1, 1)
+    assert line_col('abc\ndefg\nhijkl', 2) == (1, 3)
+    assert line_col('abc\ndefg\nhijkl', 3) == (1, 4)
+    assert line_col('abc\ndefg\nhijkl', 4) == (2, 1)
+    assert line_col('abc\ndefg\nhijkl', 10) == (3, 2)
+
+def not_optional(arg: Optional[T]) -> T:
+    assert arg is not None
+    return arg
+
+__all__ = ['not_optional', 'opt_compare', 'merge', 'line_col', 'LineAndColumn']
\ No newline at end of file
diff --git a/scripts/codeconverter/converter.py b/scripts/codeconverter/converter.py
new file mode 100755
index 000000000..75cb515d9
--- /dev/null
+++ b/scripts/codeconverter/converter.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# QEMU library
+#
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Authors:
+#  Eduardo Habkost 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+import sys
+import argparse
+import os
+import os.path
+import re
+from typing import *
+
+from codeconverter.patching import FileInfo, match_class_dict, FileList
+import codeconverter.qom_macros
+from codeconverter.qom_type_info import TI_FIELDS, type_infos, TypeInfoVar
+
+import logging
+logger = logging.getLogger(__name__)
+DBG = logger.debug
+INFO = logger.info
+WARN = logger.warning
+
+def process_all_files(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None:
+    DBG("filenames: %r", args.filenames)
+
+    files = FileList()
+    files.extend(FileInfo(files, fn, args.force) for fn in args.filenames)
+    for f in files:
+        DBG('opening %s', f.filename)
+        f.load()
+
+    if args.table:
+        fields = ['filename', 'variable_name'] + TI_FIELDS
+        print('\t'.join(fields))
+        for f in files:
+            for t in f.matches_of_type(TypeInfoVar):
+                assert isinstance(t, TypeInfoVar)
+                values = [f.filename, t.name] + \
+                         [t.get_raw_initializer_value(f)
+                          for f in TI_FIELDS]
+                DBG('values: %r', values)
+                assert all('\t' not in v for v in values)
+                values = [v.replace('\n', ' ').replace('"', '') for v in values]
+                print('\t'.join(values))
+        return
+
+    match_classes = match_class_dict()
+    if not args.patterns:
+        parser.error("--pattern is required")
+
+    classes = [p for arg in args.patterns
+               for p in re.split(r'[\s,]', arg)
+               if p.strip()]
+    for c in classes:
+        if c not in match_classes \
+           or not match_classes[c].regexp:
+            print("Invalid pattern name: %s" % (c), file=sys.stderr)
+            print("Valid patterns:", file=sys.stderr)
+            print(PATTERN_HELP, file=sys.stderr)
+            sys.exit(1)
+
+    DBG("classes: %r", classes)
+    files.patch_content(max_passes=args.passes, class_names=classes)
+
+    for f in files:
+        #alltypes.extend(f.type_infos)
+        #full_types.extend(f.full_types())
+
+        if not args.dry_run:
+            if args.inplace:
+                f.patch_inplace()
+            if args.diff:
+                f.show_diff()
+            if not args.diff and not args.inplace:
+                f.write_to_file(sys.stdout)
+                sys.stdout.flush()
+
+
+PATTERN_HELP = ('\n'.join("  %s: %s" % (n, str(c.__doc__).strip())
+                for (n,c) in sorted(match_class_dict().items())
+                if c.has_replacement_rule()))
+
+def main() -> None:
+    p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument('filenames', nargs='+')
+    p.add_argument('--passes', type=int, default=1,
+                   help="Number of passes (0 means unlimited)")
+    p.add_argument('--pattern', required=True, action='append',
+                   default=[], dest='patterns',
+                   help="Pattern to scan for")
+    p.add_argument('--inplace', '-i', action='store_true',
+                   help="Patch file in place")
+    p.add_argument('--dry-run', action='store_true',
+                   help="Don't patch files or print patching results")
+    p.add_argument('--force', '-f', action='store_true',
+                   help="Perform changes even if not completely safe")
+    p.add_argument('--diff', action='store_true',
+                   help="Print diff output on stdout")
+    p.add_argument('--debug', '-d', action='store_true',
+                   help="Enable debugging")
+    p.add_argument('--verbose', '-v', action='store_true',
+                   help="Verbose logging on stderr")
+    p.add_argument('--table', action='store_true',
+                   help="Print CSV table of type information")
+    p.add_argument_group("Valid pattern names",
+                         PATTERN_HELP)
+    args = p.parse_args()
+
+    loglevel = (logging.DEBUG if args.debug
+             else logging.INFO if args.verbose
+             else logging.WARN)
+    logging.basicConfig(format='%(levelname)s: %(message)s', level=loglevel)
+    DBG("args: %r", args)
+    process_all_files(p, args)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/scripts/coverity-model.c b/scripts/coverity-model.c
new file mode 100644
index 000000000..2c0346ff2
--- /dev/null
+++ b/scripts/coverity-model.c
@@ -0,0 +1,386 @@
+/* Coverity Scan model
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster 
+ *  Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+
+/*
+ * This is the source code for our Coverity user model file.  The
+ * purpose of user models is to increase scanning accuracy by explaining
+ * code Coverity can't see (out of tree libraries) or doesn't
+ * sufficiently understand.  Better accuracy means both fewer false
+ * positives and more true defects.  Memory leaks in particular.
+ *
+ * - A model file can't import any header files.  Some built-in primitives are
+ *   available but not wchar_t, NULL etc.
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ *   and similar types are sufficient.
+ * - An uninitialized local variable signifies that the variable could be
+ *   any value.
+ *
+ * The model file must be uploaded by an admin in the analysis settings of
+ * http://scan.coverity.com/projects/378
+ */
+
+#define NULL ((void *)0)
+
+typedef unsigned char uint8_t;
+typedef char int8_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef long ssize_t;
+typedef unsigned long long uint64_t;
+typedef long long int64_t;
+typedef _Bool bool;
+
+typedef struct va_list_str *va_list;
+
+/* exec.c */
+
+typedef struct AddressSpace AddressSpace;
+typedef uint64_t hwaddr;
+typedef uint32_t MemTxResult;
+typedef uint64_t MemTxAttrs;
+
+static void __bufwrite(uint8_t *buf, ssize_t len)
+{
+    int first, last;
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    buf[0] = first;
+    buf[len-1] = last;
+    __coverity_writeall__(buf);
+}
+
+static void __bufread(uint8_t *buf, ssize_t len)
+{
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    int first = buf[0];
+    int last = buf[len-1];
+}
+
+MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
+                               MemTxAttrs attrs,
+                               uint8_t *buf, int len)
+{
+    MemTxResult result;
+    // TODO: investigate impact of treating reads as producing
+    // tainted data, with __coverity_tainted_data_argument__(buf).
+    __bufwrite(buf, len);
+    return result;
+}
+
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
+                                MemTxAttrs attrs,
+                                const uint8_t *buf, int len)
+{
+    MemTxResult result;
+    __bufread(buf, len);
+    return result;
+}
+
+
+/* Tainting */
+
+typedef struct {} name2keysym_t;
+static int get_keysym(const name2keysym_t *table,
+                      const char *name)
+{
+    int result;
+    if (result > 0) {
+        __coverity_tainted_string_sanitize_content__(name);
+        return result;
+    } else {
+        return 0;
+    }
+}
+
+/* Replay data is considered trusted.  */
+uint8_t replay_get_byte(void)
+{
+    uint8_t byte;
+    return byte;
+}
+
+
+/*
+ * GLib memory allocation functions.
+ *
+ * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
+ * and g_realloc of 0 bytes frees the pointer.
+ *
+ * Modeling this would result in Coverity flagging a lot of memory
+ * allocations as potentially returning NULL, and asking us to check
+ * whether the result of the allocation is NULL or not.  However, the
+ * resulting pointer should never be dereferenced anyway, and in fact
+ * it is not in the vast majority of cases.
+ *
+ * If a dereference did happen, this would suppress a defect report
+ * for an actual null pointer dereference.  But it's too unlikely to
+ * be worth wading through the false positives, and with some luck
+ * we'll get a buffer overflow reported anyway.
+ */
+
+/*
+ * Allocation primitives, cannot return NULL
+ * See also Coverity's library/generic/libc/all/all.c
+ */
+
+void *g_malloc_n(size_t nmemb, size_t size)
+{
+    size_t sz;
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    sz = nmemb * size;
+    ptr = __coverity_alloc__(sz);
+    __coverity_mark_as_uninitialized_buffer__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, "g_free");
+    return ptr;
+}
+
+void *g_malloc0_n(size_t nmemb, size_t size)
+{
+    size_t sz;
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    sz = nmemb * size;
+    ptr = __coverity_alloc__(sz);
+    __coverity_writeall0__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, "g_free");
+    return ptr;
+}
+
+void *g_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    size_t sz;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    sz = nmemb * size;
+    __coverity_escape__(ptr);
+    ptr = __coverity_alloc__(sz);
+    /*
+     * Memory beyond the old size isn't actually initialized.  Can't
+     * model that.  See Coverity's realloc() model
+     */
+    __coverity_writeall__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, "g_free");
+    return ptr;
+}
+
+void g_free(void *ptr)
+{
+    __coverity_free__(ptr);
+    __coverity_mark_as_afm_freed__(ptr, "g_free");
+}
+
+/*
+ * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate
+ * out of memory conditions
+ */
+
+void *g_try_malloc_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc_n(nmemb, size);
+}
+
+void *g_try_malloc0_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc0_n(nmemb, size);
+}
+
+void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_realloc_n(ptr, nmemb, size);
+}
+
+/* Trivially derive the g_FOO() from the g_FOO_n() */
+
+void *g_malloc(size_t size)
+{
+    return g_malloc_n(1, size);
+}
+
+void *g_malloc0(size_t size)
+{
+    return g_malloc0_n(1, size);
+}
+
+void *g_realloc(void *ptr, size_t size)
+{
+    return g_realloc_n(ptr, 1, size);
+}
+
+void *g_try_malloc(size_t size)
+{
+    return g_try_malloc_n(1, size);
+}
+
+void *g_try_malloc0(size_t size)
+{
+    return g_try_malloc0_n(1, size);
+}
+
+void *g_try_realloc(void *ptr, size_t size)
+{
+    return g_try_realloc_n(ptr, 1, size);
+}
+
+/* Other memory allocation functions */
+
+void *g_memdup(const void *ptr, unsigned size)
+{
+    unsigned char *dup;
+    unsigned i;
+
+    if (!ptr) {
+        return NULL;
+    }
+
+    dup = g_malloc(size);
+    for (i = 0; i < size; i++)
+        dup[i] = ((unsigned char *)ptr)[i];
+    return dup;
+}
+
+/*
+ * GLib string allocation functions
+ */
+
+char *g_strdup(const char *s)
+{
+    char *dup;
+    size_t i;
+
+    if (!s) {
+        return NULL;
+    }
+
+    __coverity_string_null_sink__(s);
+    __coverity_string_size_sink__(s);
+    dup = __coverity_alloc_nosize__();
+    __coverity_mark_as_afm_allocated__(dup, "g_free");
+    for (i = 0; (dup[i] = s[i]); i++) ;
+    return dup;
+}
+
+char *g_strndup(const char *s, size_t n)
+{
+    char *dup;
+    size_t i;
+
+    __coverity_negative_sink__(n);
+
+    if (!s) {
+        return NULL;
+    }
+
+    dup = g_malloc(n + 1);
+    for (i = 0; i < n && (dup[i] = s[i]); i++) ;
+    dup[i] = 0;
+    return dup;
+}
+
+char *g_strdup_printf(const char *format, ...)
+{
+    char ch, *s;
+    size_t len;
+
+    __coverity_string_null_sink__(format);
+    __coverity_string_size_sink__(format);
+
+    ch = *format;
+
+    s = __coverity_alloc_nosize__();
+    __coverity_writeall__(s);
+    __coverity_mark_as_afm_allocated__(s, "g_free");
+    return s;
+}
+
+char *g_strdup_vprintf(const char *format, va_list ap)
+{
+    char ch, *s;
+    size_t len;
+
+    __coverity_string_null_sink__(format);
+    __coverity_string_size_sink__(format);
+
+    ch = *format;
+    ch = *(char *)ap;
+
+    s = __coverity_alloc_nosize__();
+    __coverity_writeall__(s);
+    __coverity_mark_as_afm_allocated__(s, "g_free");
+
+    return len;
+}
+
+char *g_strconcat(const char *s, ...)
+{
+    char *s;
+
+    /*
+     * Can't model: last argument must be null, the others
+     * null-terminated strings
+     */
+
+    s = __coverity_alloc_nosize__();
+    __coverity_writeall__(s);
+    __coverity_mark_as_afm_allocated__(s, "g_free");
+    return s;
+}
+
+/* Other glib functions */
+
+typedef struct pollfd GPollFD;
+
+int poll();
+
+int g_poll (GPollFD *fds, unsigned nfds, int timeout)
+{
+    return poll(fds, nfds, timeout);
+}
+
+typedef struct _GIOChannel GIOChannel;
+GIOChannel *g_io_channel_unix_new(int fd)
+{
+    GIOChannel *c = g_malloc0(sizeof(GIOChannel));
+    __coverity_escape__(fd);
+    return c;
+}
+
+void g_assertion_message_expr(const char     *domain,
+                              const char     *file,
+                              int             line,
+                              const char     *func,
+                              const char     *expr)
+{
+    __coverity_panic__();
+}
diff --git a/scripts/coverity-scan/coverity-scan.docker b/scripts/coverity-scan/coverity-scan.docker
new file mode 100644
index 000000000..501ac6723
--- /dev/null
+++ b/scripts/coverity-scan/coverity-scan.docker
@@ -0,0 +1,129 @@
+# syntax=docker/dockerfile:1.0.0-experimental
+#
+# Docker setup for running the "Coverity Scan" tools over the source
+# tree and uploading them to the website, as per
+# https://scan.coverity.com/projects/qemu/builds/new
+# We do this on a fixed config (currently Fedora 30 with a known
+# set of dependencies and a configure command that enables a specific
+# set of options) so that random changes don't result in our accidentally
+# dropping some files from the scan.
+#
+# We don't build on top of the fedora.docker file because we don't
+# want to accidentally change or break the scan config when that
+# is updated.
+
+# The work of actually doing the build is handled by the
+# run-coverity-scan script.
+
+FROM fedora:30
+ENV PACKAGES \
+    alsa-lib-devel \
+    bc \
+    brlapi-devel \
+    bzip2 \
+    bzip2-devel \
+    ccache \
+    clang \
+    curl \
+    cyrus-sasl-devel \
+    dbus-daemon \
+    device-mapper-multipath-devel \
+    findutils \
+    gcc \
+    gcc-c++ \
+    gettext \
+    git \
+    glib2-devel \
+    glusterfs-api-devel \
+    gnutls-devel \
+    gtk3-devel \
+    hostname \
+    libaio-devel \
+    libasan \
+    libattr-devel \
+    libblockdev-mpath-devel \
+    libcap-devel \
+    libcap-ng-devel \
+    libcurl-devel \
+    libepoxy-devel \
+    libfdt-devel \
+    libgbm-devel \
+    libiscsi-devel \
+    libjpeg-devel \
+    libpmem-devel \
+    libnfs-devel \
+    libpng-devel \
+    librbd-devel \
+    libseccomp-devel \
+    libssh-devel \
+    libubsan \
+    libudev-devel \
+    libusbx-devel \
+    libxml2-devel \
+    libzstd-devel \
+    llvm \
+    lzo-devel \
+    make \
+    mingw32-bzip2 \
+    mingw32-curl \
+    mingw32-glib2 \
+    mingw32-gmp \
+    mingw32-gnutls \
+    mingw32-gtk3 \
+    mingw32-libjpeg-turbo \
+    mingw32-libpng \
+    mingw32-libtasn1 \
+    mingw32-nettle \
+    mingw32-nsis \
+    mingw32-pixman \
+    mingw32-pkg-config \
+    mingw32-SDL2 \
+    mingw64-bzip2 \
+    mingw64-curl \
+    mingw64-glib2 \
+    mingw64-gmp \
+    mingw64-gnutls \
+    mingw64-gtk3 \
+    mingw64-libjpeg-turbo \
+    mingw64-libpng \
+    mingw64-libtasn1 \
+    mingw64-nettle \
+    mingw64-pixman \
+    mingw64-pkg-config \
+    mingw64-SDL2 \
+    ncurses-devel \
+    nettle-devel \
+    nss-devel \
+    numactl-devel \
+    perl \
+    perl-Test-Harness \
+    pixman-devel \
+    pulseaudio-libs-devel \
+    python3 \
+    python3-sphinx \
+    PyYAML \
+    rdma-core-devel \
+    SDL2-devel \
+    snappy-devel \
+    sparse \
+    spice-server-devel \
+    systemd-devel \
+    systemtap-sdt-devel \
+    tar \
+    usbredir-devel \
+    virglrenderer-devel \
+    vte291-devel \
+    wget \
+    which \
+    xen-devel \
+    xfsprogs-devel \
+    zlib-devel
+ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
+
+RUN dnf install -y $PACKAGES
+RUN rpm -q $PACKAGES | sort > /packages.txt
+ENV PATH $PATH:/usr/libexec/python3-sphinx/
+ENV COVERITY_TOOL_BASE=/coverity-tools
+COPY coverity_tool.tgz coverity_tool.tgz
+RUN mkdir -p /coverity-tools/coverity_tool && cd /coverity-tools/coverity_tool && tar xf /coverity_tool.tgz
+COPY run-coverity-scan run-coverity-scan
diff --git a/scripts/coverity-scan/run-coverity-scan b/scripts/coverity-scan/run-coverity-scan
new file mode 100755
index 000000000..6eefb4b55
--- /dev/null
+++ b/scripts/coverity-scan/run-coverity-scan
@@ -0,0 +1,431 @@
+#!/bin/sh -e
+
+# Upload a created tarball to Coverity Scan, as per
+# https://scan.coverity.com/projects/qemu/builds/new
+
+# This work is licensed under the terms of the GNU GPL version 2,
+# or (at your option) any later version.
+# See the COPYING file in the top-level directory.
+#
+# Copyright (c) 2017-2020 Linaro Limited
+# Written by Peter Maydell
+
+# Note that this script will automatically download and
+# run the (closed-source) coverity build tools, so don't
+# use it if you don't trust them!
+
+# This script assumes that you're running it from a QEMU source
+# tree, and that tree is a fresh clean one, because we do an in-tree
+# build. (This is necessary so that the filenames that the Coverity
+# Scan server sees are relative paths that match up with the component
+# regular expressions it uses; an out-of-tree build won't work for this.)
+# The host machine should have as many of QEMU's dependencies
+# installed as possible, for maximum coverity coverage.
+
+# To do an upload you need to be a maintainer in the Coverity online
+# service, and you will need to know the "Coverity token", which is a
+# secret 8 digit hex string. You can find that from the web UI in the
+# project settings, if you have maintainer access there.
+
+# Command line options:
+#   --dry-run : run the tools, but don't actually do the upload
+#   --docker : create and work inside a container
+#   --docker-engine : specify the container engine to use (docker/podman/auto);
+#                     implies --docker
+#   --update-tools-only : update the cached copy of the tools, but don't run them
+#   --no-update-tools : do not update the cached copy of the tools
+#   --tokenfile : file to read Coverity token from
+#   --version ver : specify version being analyzed (default: ask git)
+#   --description desc : specify description of this version (default: ask git)
+#   --srcdir : QEMU source tree to analyze (default: current working dir)
+#   --results-tarball : path to copy the results tarball to (default: don't
+#                       copy it anywhere, just upload it)
+#   --src-tarball : tarball to untar into src dir (default: none); this
+#                   is intended mainly for internal use by the Docker support
+#
+# User-specifiable environment variables:
+#  COVERITY_TOKEN -- Coverity token (default: looks at your
+#                    coverity.token config)
+#  COVERITY_EMAIL -- the email address to use for uploads (default:
+#                    looks at your git coverity.email or user.email config)
+#  COVERITY_BUILD_CMD -- make command (default: 'make -jN' where N is
+#                    number of CPUs as determined by 'nproc')
+#  COVERITY_TOOL_BASE -- set to directory to put coverity tools
+#                        (default: /tmp/coverity-tools)
+#
+# You must specify the token, either by environment variable or by
+# putting it in a file and using --tokenfile. Everything else has
+# a reasonable default if this is run from a git tree.
+
+check_upload_permissions() {
+    # Check whether we can do an upload to the server; will exit the script
+    # with status 1 if the check failed (usually a bad token);
+    # will exit the script with status 0 if the check indicated that we
+    # can't upload yet (ie we are at quota)
+    # Assumes that COVERITY_TOKEN, PROJNAME and DRYRUN have been initialized.
+
+    echo "Checking upload permissions..."
+
+    if ! up_perm="$(wget https://scan.coverity.com/api/upload_permitted --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -q -O -)"; then
+        echo "Coverity Scan API access denied: bad token?"
+        exit 1
+    fi
+
+    # Really up_perm is a JSON response with either
+    # {upload_permitted:true} or {next_upload_permitted_at:}
+    # We do some hacky string parsing instead of properly parsing it.
+    case "$up_perm" in
+        *upload_permitted*true*)
+            echo "Coverity Scan: upload permitted"
+            ;;
+        *next_upload_permitted_at*)
+            if [ "$DRYRUN" = yes ]; then
+                echo "Coverity Scan: upload quota reached, continuing dry run"
+            else
+                echo "Coverity Scan: upload quota reached; stopping here"
+                # Exit success as this isn't a build error.
+                exit 0
+            fi
+            ;;
+        *)
+            echo "Coverity Scan upload check: unexpected result $up_perm"
+            exit 1
+            ;;
+    esac
+}
+
+
+build_docker_image() {
+    # build docker container including the coverity-scan tools
+    echo "Building docker container..."
+    # TODO: This re-unpacks the tools every time, rather than caching
+    # and reusing the image produced by the COPY of the .tgz file.
+    # Not sure why.
+    tests/docker/docker.py --engine ${DOCKER_ENGINE} build \
+                   -t coverity-scanner -f scripts/coverity-scan/coverity-scan.docker \
+                   --extra-files scripts/coverity-scan/run-coverity-scan \
+                                 "$COVERITY_TOOL_BASE"/coverity_tool.tgz
+}
+
+update_coverity_tools () {
+    # Check for whether we need to download the Coverity tools
+    # (either because we don't have a copy, or because it's out of date)
+    # Assumes that COVERITY_TOOL_BASE, COVERITY_TOKEN and PROJNAME are set.
+
+    mkdir -p "$COVERITY_TOOL_BASE"
+    cd "$COVERITY_TOOL_BASE"
+
+    echo "Checking for new version of coverity build tools..."
+    wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME&md5=1" -O coverity_tool.md5.new
+
+    if ! cmp -s coverity_tool.md5 coverity_tool.md5.new; then
+        # out of date md5 or no md5: download new build tool
+        # blow away the old build tool
+        echo "Downloading coverity build tools..."
+        rm -rf coverity_tool coverity_tool.tgz
+        wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -O coverity_tool.tgz
+        if ! (cat coverity_tool.md5.new; echo "  coverity_tool.tgz") | md5sum -c --status; then
+            echo "Downloaded tarball didn't match md5sum!"
+            exit 1
+        fi
+
+        if [ "$DOCKER" != yes ]; then
+            # extract the new one, keeping it corralled in a 'coverity_tool' directory
+            echo "Unpacking coverity build tools..."
+            mkdir -p coverity_tool
+            cd coverity_tool
+            tar xf ../coverity_tool.tgz
+            cd ..
+            mv coverity_tool.md5.new coverity_tool.md5
+        fi
+    fi
+    rm -f coverity_tool.md5.new
+    cd "$SRCDIR"
+
+    if [ "$DOCKER" = yes ]; then
+        build_docker_image
+    fi
+}
+
+
+# Check user-provided environment variables and arguments
+DRYRUN=no
+UPDATE=yes
+DOCKER=no
+
+while [ "$#" -ge 1 ]; do
+    case "$1" in
+        --dry-run)
+            shift
+            DRYRUN=yes
+            ;;
+        --no-update-tools)
+            shift
+            UPDATE=no
+            ;;
+        --update-tools-only)
+            shift
+            UPDATE=only
+            ;;
+        --version)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--version needs an argument"
+                exit 1
+            fi
+            VERSION="$1"
+            shift
+            ;;
+        --description)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--description needs an argument"
+                exit 1
+            fi
+            DESCRIPTION="$1"
+            shift
+            ;;
+        --tokenfile)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--tokenfile needs an argument"
+                exit 1
+            fi
+            COVERITY_TOKEN="$(cat "$1")"
+            shift
+            ;;
+        --srcdir)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--srcdir needs an argument"
+                exit 1
+            fi
+            SRCDIR="$1"
+            shift
+            ;;
+        --results-tarball)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--results-tarball needs an argument"
+                exit 1
+            fi
+            RESULTSTARBALL="$1"
+            shift
+            ;;
+        --src-tarball)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--src-tarball needs an argument"
+                exit 1
+            fi
+            SRCTARBALL="$1"
+            shift
+            ;;
+        --docker)
+            DOCKER=yes
+            DOCKER_ENGINE=auto
+            shift
+            ;;
+        --docker-engine)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--docker-engine needs an argument"
+                exit 1
+            fi
+            DOCKER=yes
+            DOCKER_ENGINE="$1"
+            shift
+            ;;
+        *)
+            echo "Unexpected argument '$1'"
+            exit 1
+            ;;
+    esac
+done
+
+if [ -z "$COVERITY_TOKEN" ]; then
+    COVERITY_TOKEN="$(git config coverity.token)"
+fi
+if [ -z "$COVERITY_TOKEN" ]; then
+    echo "COVERITY_TOKEN environment variable not set"
+    exit 1
+fi
+
+if [ -z "$COVERITY_BUILD_CMD" ]; then
+    NPROC=$(nproc)
+    COVERITY_BUILD_CMD="make -j$NPROC"
+    echo "COVERITY_BUILD_CMD: using default '$COVERITY_BUILD_CMD'"
+fi
+
+if [ -z "$COVERITY_TOOL_BASE" ]; then
+    echo "COVERITY_TOOL_BASE: using default /tmp/coverity-tools"
+    COVERITY_TOOL_BASE=/tmp/coverity-tools
+fi
+
+if [ -z "$SRCDIR" ]; then
+    SRCDIR="$PWD"
+fi
+
+PROJNAME=QEMU
+TARBALL=cov-int.tar.xz
+
+if [ "$UPDATE" = only ]; then
+    # Just do the tools update; we don't need to check whether
+    # we are in a source tree or have upload rights for this,
+    # so do it before some of the command line and source tree checks.
+
+    if [ "$DOCKER" = yes ] && [ ! -z "$SRCTARBALL" ]; then
+        echo --update-tools-only --docker is incompatible with --src-tarball.
+        exit 1
+    fi
+
+    update_coverity_tools
+    exit 0
+fi
+
+if [ ! -e "$SRCDIR" ]; then
+    mkdir "$SRCDIR"
+fi
+
+cd "$SRCDIR"
+
+if [ ! -z "$SRCTARBALL" ]; then
+    echo "Untarring source tarball into $SRCDIR..."
+    tar xvf "$SRCTARBALL"
+fi
+
+echo "Checking this is a QEMU source tree..."
+if ! [ -e "$SRCDIR/VERSION" ]; then
+    echo "Not in a QEMU source tree?"
+    exit 1
+fi
+
+# Fill in defaults used by the non-update-only process
+if [ -z "$VERSION" ]; then
+    VERSION="$(git describe --always HEAD)"
+fi
+
+if [ -z "$DESCRIPTION" ]; then
+    DESCRIPTION="$(git rev-parse HEAD)"
+fi
+
+if [ -z "$COVERITY_EMAIL" ]; then
+    COVERITY_EMAIL="$(git config coverity.email)"
+fi
+if [ -z "$COVERITY_EMAIL" ]; then
+    COVERITY_EMAIL="$(git config user.email)"
+fi
+
+# Otherwise, continue with the full build and upload process.
+
+check_upload_permissions
+
+if [ "$UPDATE" != no ]; then
+    update_coverity_tools
+fi
+
+# Run ourselves inside docker if that's what the user wants
+if [ "$DOCKER" = yes ]; then
+    # Put the Coverity token into a temporary file that only
+    # we have read access to, and then pass it to docker build
+    # using a volume.  A volume is enough for the token not to
+    # leak into the Docker image.
+    umask 077
+    SECRETDIR=$(mktemp -d)
+    if [ -z "$SECRETDIR" ]; then
+        echo "Failed to create temporary directory"
+        exit 1
+    fi
+    trap 'rm -rf "$SECRETDIR"' INT TERM EXIT
+    echo "Created temporary directory $SECRETDIR"
+    SECRET="$SECRETDIR/token"
+    echo "$COVERITY_TOKEN" > "$SECRET"
+    echo "Archiving sources to be analyzed..."
+    ./scripts/archive-source.sh "$SECRETDIR/qemu-sources.tgz"
+    ARGS="--no-update-tools"
+    if [ "$DRYRUN" = yes ]; then
+        ARGS="$ARGS --dry-run"
+    fi
+    echo "Running scanner..."
+    # If we need to capture the output tarball, get the inner run to
+    # save it to the secrets directory so we can copy it out before the
+    # directory is cleaned up.
+    if [ ! -z "$RESULTSTARBALL" ]; then
+        ARGS="$ARGS --results-tarball /work/cov-int.tar.xz"
+    fi
+    # Arrange for this docker run to get access to the sources with -v.
+    # We pass through all the configuration from the outer script to the inner.
+    export COVERITY_EMAIL COVERITY_BUILD_CMD
+    tests/docker/docker.py run -it --env COVERITY_EMAIL --env COVERITY_BUILD_CMD \
+           -v "$SECRETDIR:/work" coverity-scanner \
+           ./run-coverity-scan --version "$VERSION" \
+           --description "$DESCRIPTION" $ARGS --tokenfile /work/token \
+           --srcdir /qemu --src-tarball /work/qemu-sources.tgz
+    if [ ! -z "$RESULTSTARBALL" ]; then
+        echo "Copying results tarball to $RESULTSTARBALL..."
+        cp "$SECRETDIR/cov-int.tar.xz" "$RESULTSTARBALL"
+    fi
+    echo "Docker work complete."
+    exit 0
+fi
+
+TOOLBIN="$(cd "$COVERITY_TOOL_BASE" && echo $PWD/coverity_tool/cov-analysis-*/bin)"
+
+if ! test -x "$TOOLBIN/cov-build"; then
+    echo "Couldn't find cov-build in the coverity build-tool directory??"
+    exit 1
+fi
+
+export PATH="$TOOLBIN:$PATH"
+
+cd "$SRCDIR"
+
+echo "Doing make distclean..."
+make distclean
+
+echo "Configuring..."
+# We configure with a fixed set of enables here to ensure that we don't
+# accidentally reduce the scope of the analysis by doing the build on
+# the system that's missing a dependency that we need to build part of
+# the codebase.
+./configure --disable-modules --enable-sdl --enable-gtk \
+    --enable-opengl --enable-vte --enable-gnutls \
+    --enable-nettle --enable-curses --enable-curl \
+    --audio-drv-list=oss,alsa,sdl,pa --enable-virtfs \
+    --enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-vnc-png \
+    --enable-xen --enable-brlapi \
+    --enable-linux-aio --enable-attr \
+    --enable-cap-ng --enable-trace-backends=log --enable-spice --enable-rbd \
+    --enable-xfsctl --enable-libusb --enable-usb-redir \
+    --enable-libiscsi --enable-libnfs --enable-seccomp \
+    --enable-tpm --enable-libssh --enable-lzo --enable-snappy --enable-bzip2 \
+    --enable-numa --enable-rdma --enable-smartcard --enable-virglrenderer \
+    --enable-mpath --enable-libxml2 --enable-glusterfs \
+    --enable-virtfs --enable-zstd
+
+echo "Running cov-build..."
+rm -rf cov-int
+mkdir cov-int
+cov-build --dir cov-int $COVERITY_BUILD_CMD
+
+echo "Creating results tarball..."
+tar cvf - cov-int | xz > "$TARBALL"
+
+if [ ! -z "$RESULTSTARBALL" ]; then
+    echo "Copying results tarball to $RESULTSTARBALL..."
+    cp "$TARBALL" "$RESULTSTARBALL"
+fi
+
+echo "Uploading results tarball..."
+
+if [ "$DRYRUN" = yes ]; then
+    echo "Dry run only, not uploading $TARBALL"
+    exit 0
+fi
+
+curl --form token="$COVERITY_TOKEN" --form email="$COVERITY_EMAIL" \
+     --form file=@"$TARBALL" --form version="$VERSION" \
+     --form description="$DESCRIPTION" \
+     https://scan.coverity.com/builds?project="$PROJNAME"
+
+echo "Done."
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
new file mode 100644
index 000000000..47aa9caf6
--- /dev/null
+++ b/scripts/decodetree.py
@@ -0,0 +1,1397 @@
+#!/usr/bin/env python3
+# Copyright (c) 2018 Linaro Limited
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+#
+
+#
+# Generate a decoding tree from a specification file.
+# See the syntax and semantics in docs/devel/decodetree.rst.
+#
+
+import os
+import re
+import sys
+import getopt
+
+insnwidth = 32
+insnmask = 0xffffffff
+variablewidth = False
+fields = {}
+arguments = {}
+formats = {}
+allpatterns = []
+anyextern = False
+
+translate_prefix = 'trans'
+translate_scope = 'static '
+input_file = ''
+output_file = None
+output_fd = None
+insntype = 'uint32_t'
+decode_function = 'decode'
+
+# An identifier for C.
+re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
+
+# Identifiers for Arguments, Fields, Formats and Patterns.
+re_arg_ident = '&[a-zA-Z0-9_]*'
+re_fld_ident = '%[a-zA-Z0-9_]*'
+re_fmt_ident = '@[a-zA-Z0-9_]*'
+re_pat_ident = '[a-zA-Z0-9_]*'
+
+def error_with_file(file, lineno, *args):
+    """Print an error message from file:line and args and exit."""
+    global output_file
+    global output_fd
+
+    prefix = ''
+    if file:
+        prefix += '{0}:'.format(file)
+    if lineno:
+        prefix += '{0}:'.format(lineno)
+    if prefix:
+        prefix += ' '
+    print(prefix, end='error: ', file=sys.stderr)
+    print(*args, file=sys.stderr)
+
+    if output_file and output_fd:
+        output_fd.close()
+        os.remove(output_file)
+    exit(1)
+# end error_with_file
+
+
+def error(lineno, *args):
+    error_with_file(input_file, lineno, *args)
+# end error
+
+
+def output(*args):
+    global output_fd
+    for a in args:
+        output_fd.write(a)
+
+
+def output_autogen():
+    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
+
+
+def str_indent(c):
+    """Return a string with C spaces"""
+    return ' ' * c
+
+
+def str_fields(fields):
+    """Return a string uniquely identifying FIELDS"""
+    r = ''
+    for n in sorted(fields.keys()):
+        r += '_' + n
+    return r[1:]
+
+
+def str_match_bits(bits, mask):
+    """Return a string pretty-printing BITS/MASK"""
+    global insnwidth
+
+    i = 1 << (insnwidth - 1)
+    space = 0x01010100
+    r = ''
+    while i != 0:
+        if i & mask:
+            if i & bits:
+                r += '1'
+            else:
+                r += '0'
+        else:
+            r += '.'
+        if i & space:
+            r += ' '
+        i >>= 1
+    return r
+
+
+def is_pow2(x):
+    """Return true iff X is equal to a power of 2."""
+    return (x & (x - 1)) == 0
+
+
+def ctz(x):
+    """Return the number of times 2 factors into X."""
+    assert x != 0
+    r = 0
+    while ((x >> r) & 1) == 0:
+        r += 1
+    return r
+
+
+def is_contiguous(bits):
+    if bits == 0:
+        return -1
+    shift = ctz(bits)
+    if is_pow2((bits >> shift) + 1):
+        return shift
+    else:
+        return -1
+
+
+def eq_fields_for_args(flds_a, flds_b):
+    if len(flds_a) != len(flds_b):
+        return False
+    for k, a in flds_a.items():
+        if k not in flds_b:
+            return False
+    return True
+
+
+def eq_fields_for_fmts(flds_a, flds_b):
+    if len(flds_a) != len(flds_b):
+        return False
+    for k, a in flds_a.items():
+        if k not in flds_b:
+            return False
+        b = flds_b[k]
+        if a.__class__ != b.__class__ or a != b:
+            return False
+    return True
+
+
+class Field:
+    """Class representing a simple instruction field"""
+    def __init__(self, sign, pos, len):
+        self.sign = sign
+        self.pos = pos
+        self.len = len
+        self.mask = ((1 << len) - 1) << pos
+
+    def __str__(self):
+        if self.sign:
+            s = 's'
+        else:
+            s = ''
+        return str(self.pos) + ':' + s + str(self.len)
+
+    def str_extract(self):
+        if self.sign:
+            extr = 'sextract32'
+        else:
+            extr = 'extract32'
+        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
+
+    def __eq__(self, other):
+        return self.sign == other.sign and self.mask == other.mask
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+# end Field
+
+
+class MultiField:
+    """Class representing a compound instruction field"""
+    def __init__(self, subs, mask):
+        self.subs = subs
+        self.sign = subs[0].sign
+        self.mask = mask
+
+    def __str__(self):
+        return str(self.subs)
+
+    def str_extract(self):
+        ret = '0'
+        pos = 0
+        for f in reversed(self.subs):
+            if pos == 0:
+                ret = f.str_extract()
+            else:
+                ret = 'deposit32({0}, {1}, {2}, {3})' \
+                      .format(ret, pos, 32 - pos, f.str_extract())
+            pos += f.len
+        return ret
+
+    def __ne__(self, other):
+        if len(self.subs) != len(other.subs):
+            return True
+        for a, b in zip(self.subs, other.subs):
+            if a.__class__ != b.__class__ or a != b:
+                return True
+        return False
+
+    def __eq__(self, other):
+        return not self.__ne__(other)
+# end MultiField
+
+
+class ConstField:
+    """Class representing an argument field with constant value"""
+    def __init__(self, value):
+        self.value = value
+        self.mask = 0
+        self.sign = value < 0
+
+    def __str__(self):
+        return str(self.value)
+
+    def str_extract(self):
+        return str(self.value)
+
+    def __cmp__(self, other):
+        return self.value - other.value
+# end ConstField
+
+
+class FunctionField:
+    """Class representing a field passed through a function"""
+    def __init__(self, func, base):
+        self.mask = base.mask
+        self.sign = base.sign
+        self.base = base
+        self.func = func
+
+    def __str__(self):
+        return self.func + '(' + str(self.base) + ')'
+
+    def str_extract(self):
+        return self.func + '(ctx, ' + self.base.str_extract() + ')'
+
+    def __eq__(self, other):
+        return self.func == other.func and self.base == other.base
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+# end FunctionField
+
+
+class ParameterField:
+    """Class representing a pseudo-field read from a function"""
+    def __init__(self, func):
+        self.mask = 0
+        self.sign = 0
+        self.func = func
+
+    def __str__(self):
+        return self.func
+
+    def str_extract(self):
+        return self.func + '(ctx)'
+
+    def __eq__(self, other):
+        return self.func == other.func
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+# end ParameterField
+
+
+class Arguments:
+    """Class representing the extracted fields of a format"""
+    def __init__(self, nm, flds, extern):
+        self.name = nm
+        self.extern = extern
+        self.fields = sorted(flds)
+
+    def __str__(self):
+        return self.name + ' ' + str(self.fields)
+
+    def struct_name(self):
+        return 'arg_' + self.name
+
+    def output_def(self):
+        if not self.extern:
+            output('typedef struct {\n')
+            for n in self.fields:
+                output('    int ', n, ';\n')
+            output('} ', self.struct_name(), ';\n\n')
+# end Arguments
+
+
+class General:
+    """Common code between instruction formats and instruction patterns"""
+    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
+        self.name = name
+        self.file = input_file
+        self.lineno = lineno
+        self.base = base
+        self.fixedbits = fixb
+        self.fixedmask = fixm
+        self.undefmask = udfm
+        self.fieldmask = fldm
+        self.fields = flds
+        self.width = w
+
+    def __str__(self):
+        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
+
+    def str1(self, i):
+        return str_indent(i) + self.__str__()
+# end General
+
+
+class Format(General):
+    """Class representing an instruction format"""
+
+    def extract_name(self):
+        global decode_function
+        return decode_function + '_extract_' + self.name
+
+    def output_extract(self):
+        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
+               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
+        for n, f in self.fields.items():
+            output('    a->', n, ' = ', f.str_extract(), ';\n')
+        output('}\n\n')
+# end Format
+
+
+class Pattern(General):
+    """Class representing an instruction pattern"""
+
+    def output_decl(self):
+        global translate_scope
+        global translate_prefix
+        output('typedef ', self.base.base.struct_name(),
+               ' arg_', self.name, ';\n')
+        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
+               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        global translate_prefix
+        ind = str_indent(i)
+        arg = self.base.base.name
+        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
+        if not extracted:
+            output(ind, self.base.extract_name(),
+                   '(ctx, &u.f_', arg, ', insn);\n')
+        for n, f in self.fields.items():
+            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
+        output(ind, 'if (', translate_prefix, '_', self.name,
+               '(ctx, &u.f_', arg, ')) return true;\n')
+
+    # Normal patterns do not have children.
+    def build_tree(self):
+        return
+    def prop_masks(self):
+        return
+    def prop_format(self):
+        return
+    def prop_width(self):
+        return
+
+# end Pattern
+
+
+class MultiPattern(General):
+    """Class representing a set of instruction patterns"""
+
+    def __init__(self, lineno):
+        self.file = input_file
+        self.lineno = lineno
+        self.pats = []
+        self.base = None
+        self.fixedbits = 0
+        self.fixedmask = 0
+        self.undefmask = 0
+        self.width = None
+
+    def __str__(self):
+        r = 'group'
+        if self.fixedbits is not None:
+            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
+        return r
+
+    def output_decl(self):
+        for p in self.pats:
+            p.output_decl()
+
+    def prop_masks(self):
+        global insnmask
+
+        fixedmask = insnmask
+        undefmask = insnmask
+
+        # Collect fixedmask/undefmask for all of the children.
+        for p in self.pats:
+            p.prop_masks()
+            fixedmask &= p.fixedmask
+            undefmask &= p.undefmask
+
+        # Widen fixedmask until all fixedbits match
+        repeat = True
+        fixedbits = 0
+        while repeat and fixedmask != 0:
+            fixedbits = None
+            for p in self.pats:
+                thisbits = p.fixedbits & fixedmask
+                if fixedbits is None:
+                    fixedbits = thisbits
+                elif fixedbits != thisbits:
+                    fixedmask &= ~(fixedbits ^ thisbits)
+                    break
+            else:
+                repeat = False
+
+        self.fixedbits = fixedbits
+        self.fixedmask = fixedmask
+        self.undefmask = undefmask
+
+    def build_tree(self):
+        for p in self.pats:
+            p.build_tree()
+
+    def prop_format(self):
+        for p in self.pats:
+            p.build_tree()
+
+    def prop_width(self):
+        width = None
+        for p in self.pats:
+            p.prop_width()
+            if width is None:
+                width = p.width
+            elif width != p.width:
+                error_with_file(self.file, self.lineno,
+                                'width mismatch in patterns within braces')
+        self.width = width
+
+# end MultiPattern
+
+
+class IncMultiPattern(MultiPattern):
+    """Class representing an overlapping set of instruction patterns"""
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        global translate_prefix
+        ind = str_indent(i)
+        for p in self.pats:
+            if outermask != p.fixedmask:
+                innermask = p.fixedmask & ~outermask
+                innerbits = p.fixedbits & ~outermask
+                output(ind, 'if ((insn & ',
+                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
+                       ') {\n')
+                output(ind, '    /* ',
+                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
+                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
+                output(ind, '}\n')
+            else:
+                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
+#end IncMultiPattern
+
+
+class Tree:
+    """Class representing a node in a decode tree"""
+
+    def __init__(self, fm, tm):
+        self.fixedmask = fm
+        self.thismask = tm
+        self.subs = []
+        self.base = None
+
+    def str1(self, i):
+        ind = str_indent(i)
+        r = '{0}{1:08x}'.format(ind, self.fixedmask)
+        if self.format:
+            r += ' ' + self.format.name
+        r += ' [\n'
+        for (b, s) in self.subs:
+            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += s.str1(i + 4) + '\n'
+        r += ind + ']'
+        return r
+
+    def __str__(self):
+        return self.str1(0)
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        ind = str_indent(i)
+
+        # If we identified all nodes below have the same format,
+        # extract the fields now.
+        if not extracted and self.base:
+            output(ind, self.base.extract_name(),
+                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
+            extracted = True
+
+        # Attempt to aid the compiler in producing compact switch statements.
+        # If the bits in the mask are contiguous, extract them.
+        sh = is_contiguous(self.thismask)
+        if sh > 0:
+            # Propagate SH down into the local functions.
+            def str_switch(b, sh=sh):
+                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+
+            def str_case(b, sh=sh):
+                return '0x{0:x}'.format(b >> sh)
+        else:
+            def str_switch(b):
+                return 'insn & 0x{0:08x}'.format(b)
+
+            def str_case(b):
+                return '0x{0:08x}'.format(b)
+
+        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
+        for b, s in sorted(self.subs):
+            assert (self.thismask & ~s.fixedmask) == 0
+            innermask = outermask | self.thismask
+            innerbits = outerbits | b
+            output(ind, 'case ', str_case(b), ':\n')
+            output(ind, '    /* ',
+                   str_match_bits(innerbits, innermask), ' */\n')
+            s.output_code(i + 4, extracted, innerbits, innermask)
+            output(ind, '    break;\n')
+        output(ind, '}\n')
+# end Tree
+
+
+class ExcMultiPattern(MultiPattern):
+    """Class representing a non-overlapping set of instruction patterns"""
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        # Defer everything to our decomposed Tree node
+        self.tree.output_code(i, extracted, outerbits, outermask)
+
+    @staticmethod
+    def __build_tree(pats, outerbits, outermask):
+        # Find the intersection of all remaining fixedmask.
+        innermask = ~outermask & insnmask
+        for i in pats:
+            innermask &= i.fixedmask
+
+        if innermask == 0:
+            # Edge condition: One pattern covers the entire insnmask
+            if len(pats) == 1:
+                t = Tree(outermask, innermask)
+                t.subs.append((0, pats[0]))
+                return t
+
+            text = 'overlapping patterns:'
+            for p in pats:
+                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
+            error_with_file(pats[0].file, pats[0].lineno, text)
+
+        fullmask = outermask | innermask
+
+        # Sort each element of pats into the bin selected by the mask.
+        bins = {}
+        for i in pats:
+            fb = i.fixedbits & innermask
+            if fb in bins:
+                bins[fb].append(i)
+            else:
+                bins[fb] = [i]
+
+        # We must recurse if any bin has more than one element or if
+        # the single element in the bin has not been fully matched.
+        t = Tree(fullmask, innermask)
+
+        for b, l in bins.items():
+            s = l[0]
+            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
+                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
+            t.subs.append((b, s))
+
+        return t
+
+    def build_tree(self):
+        super().prop_format()
+        self.tree = self.__build_tree(self.pats, self.fixedbits,
+                                      self.fixedmask)
+
+    @staticmethod
+    def __prop_format(tree):
+        """Propagate Format objects into the decode tree"""
+
+        # Depth first search.
+        for (b, s) in tree.subs:
+            if isinstance(s, Tree):
+                ExcMultiPattern.__prop_format(s)
+
+        # If all entries in SUBS have the same format, then
+        # propagate that into the tree.
+        f = None
+        for (b, s) in tree.subs:
+            if f is None:
+                f = s.base
+                if f is None:
+                    return
+            if f is not s.base:
+                return
+        tree.base = f
+
+    def prop_format(self):
+        super().prop_format()
+        self.__prop_format(self.tree)
+
+# end ExcMultiPattern
+
+
+def parse_field(lineno, name, toks):
+    """Parse one instruction field from TOKS at LINENO"""
+    global fields
+    global insnwidth
+
+    # A "simple" field will have only one entry;
+    # a "multifield" will have several.
+    subs = []
+    width = 0
+    func = None
+    for t in toks:
+        if re.match('^!function=', t):
+            if func:
+                error(lineno, 'duplicate function')
+            func = t.split('=')
+            func = func[1]
+            continue
+
+        if re.fullmatch('[0-9]+:s[0-9]+', t):
+            # Signed field extract
+            subtoks = t.split(':s')
+            sign = True
+        elif re.fullmatch('[0-9]+:[0-9]+', t):
+            # Unsigned field extract
+            subtoks = t.split(':')
+            sign = False
+        else:
+            error(lineno, 'invalid field token "{0}"'.format(t))
+        po = int(subtoks[0])
+        le = int(subtoks[1])
+        if po + le > insnwidth:
+            error(lineno, 'field {0} too large'.format(t))
+        f = Field(sign, po, le)
+        subs.append(f)
+        width += le
+
+    if width > insnwidth:
+        error(lineno, 'field too large')
+    if len(subs) == 0:
+        if func:
+            f = ParameterField(func)
+        else:
+            error(lineno, 'field with no value')
+    else:
+        if len(subs) == 1:
+            f = subs[0]
+        else:
+            mask = 0
+            for s in subs:
+                if mask & s.mask:
+                    error(lineno, 'field components overlap')
+                mask |= s.mask
+            f = MultiField(subs, mask)
+        if func:
+            f = FunctionField(func, f)
+
+    if name in fields:
+        error(lineno, 'duplicate field', name)
+    fields[name] = f
+# end parse_field
+
+
+def parse_arguments(lineno, name, toks):
+    """Parse one argument set from TOKS at LINENO"""
+    global arguments
+    global re_C_ident
+    global anyextern
+
+    flds = []
+    extern = False
+    for t in toks:
+        if re.fullmatch('!extern', t):
+            extern = True
+            anyextern = True
+            continue
+        if not re.fullmatch(re_C_ident, t):
+            error(lineno, 'invalid argument set token "{0}"'.format(t))
+        if t in flds:
+            error(lineno, 'duplicate argument "{0}"'.format(t))
+        flds.append(t)
+
+    if name in arguments:
+        error(lineno, 'duplicate argument set', name)
+    arguments[name] = Arguments(name, flds, extern)
+# end parse_arguments
+
+
+def lookup_field(lineno, name):
+    global fields
+    if name in fields:
+        return fields[name]
+    error(lineno, 'undefined field', name)
+
+
+def add_field(lineno, flds, new_name, f):
+    if new_name in flds:
+        error(lineno, 'duplicate field', new_name)
+    flds[new_name] = f
+    return flds
+
+
+def add_field_byname(lineno, flds, new_name, old_name):
+    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
+
+
+def infer_argument_set(flds):
+    global arguments
+    global decode_function
+
+    for arg in arguments.values():
+        if eq_fields_for_args(flds, arg.fields):
+            return arg
+
+    name = decode_function + str(len(arguments))
+    arg = Arguments(name, flds.keys(), False)
+    arguments[name] = arg
+    return arg
+
+
+def infer_format(arg, fieldmask, flds, width):
+    global arguments
+    global formats
+    global decode_function
+
+    const_flds = {}
+    var_flds = {}
+    for n, c in flds.items():
+        if c is ConstField:
+            const_flds[n] = c
+        else:
+            var_flds[n] = c
+
+    # Look for an existing format with the same argument set and fields
+    for fmt in formats.values():
+        if arg and fmt.base != arg:
+            continue
+        if fieldmask != fmt.fieldmask:
+            continue
+        if width != fmt.width:
+            continue
+        if not eq_fields_for_fmts(flds, fmt.fields):
+            continue
+        return (fmt, const_flds)
+
+    name = decode_function + '_Fmt_' + str(len(formats))
+    if not arg:
+        arg = infer_argument_set(flds)
+
+    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
+    formats[name] = fmt
+
+    return (fmt, const_flds)
+# end infer_format
+
+
+def parse_generic(lineno, parent_pat, name, toks):
+    """Parse one instruction format from TOKS at LINENO"""
+    global fields
+    global arguments
+    global formats
+    global allpatterns
+    global re_arg_ident
+    global re_fld_ident
+    global re_fmt_ident
+    global re_C_ident
+    global insnwidth
+    global insnmask
+    global variablewidth
+
+    is_format = parent_pat is None
+
+    fixedmask = 0
+    fixedbits = 0
+    undefmask = 0
+    width = 0
+    flds = {}
+    arg = None
+    fmt = None
+    for t in toks:
+        # '&Foo' gives a format an explicit argument set.
+        if re.fullmatch(re_arg_ident, t):
+            tt = t[1:]
+            if arg:
+                error(lineno, 'multiple argument sets')
+            if tt in arguments:
+                arg = arguments[tt]
+            else:
+                error(lineno, 'undefined argument set', t)
+            continue
+
+        # '@Foo' gives a pattern an explicit format.
+        if re.fullmatch(re_fmt_ident, t):
+            tt = t[1:]
+            if fmt:
+                error(lineno, 'multiple formats')
+            if tt in formats:
+                fmt = formats[tt]
+            else:
+                error(lineno, 'undefined format', t)
+            continue
+
+        # '%Foo' imports a field.
+        if re.fullmatch(re_fld_ident, t):
+            tt = t[1:]
+            flds = add_field_byname(lineno, flds, tt, tt)
+            continue
+
+        # 'Foo=%Bar' imports a field with a different name.
+        if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
+            (fname, iname) = t.split('=%')
+            flds = add_field_byname(lineno, flds, fname, iname)
+            continue
+
+        # 'Foo=number' sets an argument field to a constant value
+        if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
+            (fname, value) = t.split('=')
+            value = int(value)
+            flds = add_field(lineno, flds, fname, ConstField(value))
+            continue
+
+        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
+        # required ones, or dont-cares.
+        if re.fullmatch('[01.-]+', t):
+            shift = len(t)
+            fms = t.replace('0', '1')
+            fms = fms.replace('.', '0')
+            fms = fms.replace('-', '0')
+            fbs = t.replace('.', '0')
+            fbs = fbs.replace('-', '0')
+            ubm = t.replace('1', '0')
+            ubm = ubm.replace('.', '0')
+            ubm = ubm.replace('-', '1')
+            fms = int(fms, 2)
+            fbs = int(fbs, 2)
+            ubm = int(ubm, 2)
+            fixedbits = (fixedbits << shift) | fbs
+            fixedmask = (fixedmask << shift) | fms
+            undefmask = (undefmask << shift) | ubm
+        # Otherwise, fieldname:fieldwidth
+        elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
+            (fname, flen) = t.split(':')
+            sign = False
+            if flen[0] == 's':
+                sign = True
+                flen = flen[1:]
+            shift = int(flen, 10)
+            if shift + width > insnwidth:
+                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
+            f = Field(sign, insnwidth - width - shift, shift)
+            flds = add_field(lineno, flds, fname, f)
+            fixedbits <<= shift
+            fixedmask <<= shift
+            undefmask <<= shift
+        else:
+            error(lineno, 'invalid token "{0}"'.format(t))
+        width += shift
+
+    if variablewidth and width < insnwidth and width % 8 == 0:
+        shift = insnwidth - width
+        fixedbits <<= shift
+        fixedmask <<= shift
+        undefmask <<= shift
+        undefmask |= (1 << shift) - 1
+
+    # We should have filled in all of the bits of the instruction.
+    elif not (is_format and width == 0) and width != insnwidth:
+        error(lineno, 'definition has {0} bits'.format(width))
+
+    # Do not check for fields overlapping fields; one valid usage
+    # is to be able to duplicate fields via import.
+    fieldmask = 0
+    for f in flds.values():
+        fieldmask |= f.mask
+
+    # Fix up what we've parsed to match either a format or a pattern.
+    if is_format:
+        # Formats cannot reference formats.
+        if fmt:
+            error(lineno, 'format referencing format')
+        # If an argument set is given, then there should be no fields
+        # without a place to store it.
+        if arg:
+            for f in flds.keys():
+                if f not in arg.fields:
+                    error(lineno, 'field {0} not in argument set {1}'
+                                  .format(f, arg.name))
+        else:
+            arg = infer_argument_set(flds)
+        if name in formats:
+            error(lineno, 'duplicate format name', name)
+        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
+                     undefmask, fieldmask, flds, width)
+        formats[name] = fmt
+    else:
+        # Patterns can reference a format ...
+        if fmt:
+            # ... but not an argument simultaneously
+            if arg:
+                error(lineno, 'pattern specifies both format and argument set')
+            if fixedmask & fmt.fixedmask:
+                error(lineno, 'pattern fixed bits overlap format fixed bits')
+            if width != fmt.width:
+                error(lineno, 'pattern uses format of different width')
+            fieldmask |= fmt.fieldmask
+            fixedbits |= fmt.fixedbits
+            fixedmask |= fmt.fixedmask
+            undefmask |= fmt.undefmask
+        else:
+            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
+        arg = fmt.base
+        for f in flds.keys():
+            if f not in arg.fields:
+                error(lineno, 'field {0} not in argument set {1}'
+                              .format(f, arg.name))
+            if f in fmt.fields.keys():
+                error(lineno, 'field {0} set by format and pattern'.format(f))
+        for f in arg.fields:
+            if f not in flds.keys() and f not in fmt.fields.keys():
+                error(lineno, 'field {0} not initialized'.format(f))
+        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
+                      undefmask, fieldmask, flds, width)
+        parent_pat.pats.append(pat)
+        allpatterns.append(pat)
+
+    # Validate the masks that we have assembled.
+    if fieldmask & fixedmask:
+        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
+                      .format(fieldmask, fixedmask))
+    if fieldmask & undefmask:
+        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
+                      .format(fieldmask, undefmask))
+    if fixedmask & undefmask:
+        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
+                      .format(fixedmask, undefmask))
+    if not is_format:
+        allbits = fieldmask | fixedmask | undefmask
+        if allbits != insnmask:
+            error(lineno, 'bits left unspecified (0x{0:08x})'
+                          .format(allbits ^ insnmask))
+# end parse_general
+
+
+def parse_file(f, parent_pat):
+    """Parse all of the patterns within a file"""
+    global re_arg_ident
+    global re_fld_ident
+    global re_fmt_ident
+    global re_pat_ident
+
+    # Read all of the lines of the file.  Concatenate lines
+    # ending in backslash; discard empty lines and comments.
+    toks = []
+    lineno = 0
+    nesting = 0
+    nesting_pats = []
+
+    for line in f:
+        lineno += 1
+
+        # Expand and strip spaces, to find indent.
+        line = line.rstrip()
+        line = line.expandtabs()
+        len1 = len(line)
+        line = line.lstrip()
+        len2 = len(line)
+
+        # Discard comments
+        end = line.find('#')
+        if end >= 0:
+            line = line[:end]
+
+        t = line.split()
+        if len(toks) != 0:
+            # Next line after continuation
+            toks.extend(t)
+        else:
+            # Allow completely blank lines.
+            if len1 == 0:
+                continue
+            indent = len1 - len2
+            # Empty line due to comment.
+            if len(t) == 0:
+                # Indentation must be correct, even for comment lines.
+                if indent != nesting:
+                    error(lineno, 'indentation ', indent, ' != ', nesting)
+                continue
+            start_lineno = lineno
+            toks = t
+
+        # Continuation?
+        if toks[-1] == '\\':
+            toks.pop()
+            continue
+
+        name = toks[0]
+        del toks[0]
+
+        # End nesting?
+        if name == '}' or name == ']':
+            if len(toks) != 0:
+                error(start_lineno, 'extra tokens after close brace')
+
+            # Make sure { } and [ ] nest properly.
+            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
+                error(lineno, 'mismatched close brace')
+
+            try:
+                parent_pat = nesting_pats.pop()
+            except:
+                error(lineno, 'extra close brace')
+
+            nesting -= 2
+            if indent != nesting:
+                error(lineno, 'indentation ', indent, ' != ', nesting)
+
+            toks = []
+            continue
+
+        # Everything else should have current indentation.
+        if indent != nesting:
+            error(start_lineno, 'indentation ', indent, ' != ', nesting)
+
+        # Start nesting?
+        if name == '{' or name == '[':
+            if len(toks) != 0:
+                error(start_lineno, 'extra tokens after open brace')
+
+            if name == '{':
+                nested_pat = IncMultiPattern(start_lineno)
+            else:
+                nested_pat = ExcMultiPattern(start_lineno)
+            parent_pat.pats.append(nested_pat)
+            nesting_pats.append(parent_pat)
+            parent_pat = nested_pat
+
+            nesting += 2
+            toks = []
+            continue
+
+        # Determine the type of object needing to be parsed.
+        if re.fullmatch(re_fld_ident, name):
+            parse_field(start_lineno, name[1:], toks)
+        elif re.fullmatch(re_arg_ident, name):
+            parse_arguments(start_lineno, name[1:], toks)
+        elif re.fullmatch(re_fmt_ident, name):
+            parse_generic(start_lineno, None, name[1:], toks)
+        elif re.fullmatch(re_pat_ident, name):
+            parse_generic(start_lineno, parent_pat, name, toks)
+        else:
+            error(lineno, 'invalid token "{0}"'.format(name))
+        toks = []
+
+    if nesting != 0:
+        error(lineno, 'missing close brace')
+# end parse_file
+
+
+class SizeTree:
+    """Class representing a node in a size decode tree"""
+
+    def __init__(self, m, w):
+        self.mask = m
+        self.subs = []
+        self.base = None
+        self.width = w
+
+    def str1(self, i):
+        ind = str_indent(i)
+        r = '{0}{1:08x}'.format(ind, self.mask)
+        r += ' [\n'
+        for (b, s) in self.subs:
+            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += s.str1(i + 4) + '\n'
+        r += ind + ']'
+        return r
+
+    def __str__(self):
+        return self.str1(0)
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        ind = str_indent(i)
+
+        # If we need to load more bytes to test, do so now.
+        if extracted < self.width:
+            output(ind, 'insn = ', decode_function,
+                   '_load_bytes(ctx, insn, {0}, {1});\n'
+                   .format(extracted // 8, self.width // 8));
+            extracted = self.width
+
+        # Attempt to aid the compiler in producing compact switch statements.
+        # If the bits in the mask are contiguous, extract them.
+        sh = is_contiguous(self.mask)
+        if sh > 0:
+            # Propagate SH down into the local functions.
+            def str_switch(b, sh=sh):
+                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+
+            def str_case(b, sh=sh):
+                return '0x{0:x}'.format(b >> sh)
+        else:
+            def str_switch(b):
+                return 'insn & 0x{0:08x}'.format(b)
+
+            def str_case(b):
+                return '0x{0:08x}'.format(b)
+
+        output(ind, 'switch (', str_switch(self.mask), ') {\n')
+        for b, s in sorted(self.subs):
+            innermask = outermask | self.mask
+            innerbits = outerbits | b
+            output(ind, 'case ', str_case(b), ':\n')
+            output(ind, '    /* ',
+                   str_match_bits(innerbits, innermask), ' */\n')
+            s.output_code(i + 4, extracted, innerbits, innermask)
+        output(ind, '}\n')
+        output(ind, 'return insn;\n')
+# end SizeTree
+
+class SizeLeaf:
+    """Class representing a leaf node in a size decode tree"""
+
+    def __init__(self, m, w):
+        self.mask = m
+        self.width = w
+
+    def str1(self, i):
+        ind = str_indent(i)
+        return '{0}{1:08x}'.format(ind, self.mask)
+
+    def __str__(self):
+        return self.str1(0)
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        global decode_function
+        ind = str_indent(i)
+
+        # If we need to load more bytes, do so now.
+        if extracted < self.width:
+            output(ind, 'insn = ', decode_function,
+                   '_load_bytes(ctx, insn, {0}, {1});\n'
+                   .format(extracted // 8, self.width // 8));
+            extracted = self.width
+        output(ind, 'return insn;\n')
+# end SizeLeaf
+
+
+def build_size_tree(pats, width, outerbits, outermask):
+    global insnwidth
+
+    # Collect the mask of bits that are fixed in this width
+    innermask = 0xff << (insnwidth - width)
+    innermask &= ~outermask
+    minwidth = None
+    onewidth = True
+    for i in pats:
+        innermask &= i.fixedmask
+        if minwidth is None:
+            minwidth = i.width
+        elif minwidth != i.width:
+            onewidth = False;
+            if minwidth < i.width:
+                minwidth = i.width
+
+    if onewidth:
+        return SizeLeaf(innermask, minwidth)
+
+    if innermask == 0:
+        if width < minwidth:
+            return build_size_tree(pats, width + 8, outerbits, outermask)
+
+        pnames = []
+        for p in pats:
+            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
+        error_with_file(pats[0].file, pats[0].lineno,
+                        'overlapping patterns size {0}:'.format(width), pnames)
+
+    bins = {}
+    for i in pats:
+        fb = i.fixedbits & innermask
+        if fb in bins:
+            bins[fb].append(i)
+        else:
+            bins[fb] = [i]
+
+    fullmask = outermask | innermask
+    lens = sorted(bins.keys())
+    if len(lens) == 1:
+        b = lens[0]
+        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
+
+    r = SizeTree(innermask, width)
+    for b, l in bins.items():
+        s = build_size_tree(l, width, b | outerbits, fullmask)
+        r.subs.append((b, s))
+    return r
+# end build_size_tree
+
+
+def prop_size(tree):
+    """Propagate minimum widths up the decode size tree"""
+
+    if isinstance(tree, SizeTree):
+        min = None
+        for (b, s) in tree.subs:
+            width = prop_size(s)
+            if min is None or min > width:
+                min = width
+        assert min >= tree.width
+        tree.width = min
+    else:
+        min = tree.width
+    return min
+# end prop_size
+
+
+def main():
+    global arguments
+    global formats
+    global allpatterns
+    global translate_scope
+    global translate_prefix
+    global output_fd
+    global output_file
+    global input_file
+    global insnwidth
+    global insntype
+    global insnmask
+    global decode_function
+    global variablewidth
+    global anyextern
+
+    decode_scope = 'static '
+
+    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
+                 'static-decode=', 'varinsnwidth=']
+    try:
+        (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
+    except getopt.GetoptError as err:
+        error(0, err)
+    for o, a in opts:
+        if o in ('-o', '--output'):
+            output_file = a
+        elif o == '--decode':
+            decode_function = a
+            decode_scope = ''
+        elif o == '--static-decode':
+            decode_function = a
+        elif o == '--translate':
+            translate_prefix = a
+            translate_scope = ''
+        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
+            if o == '--varinsnwidth':
+                variablewidth = True
+            insnwidth = int(a)
+            if insnwidth == 16:
+                insntype = 'uint16_t'
+                insnmask = 0xffff
+            elif insnwidth != 32:
+                error(0, 'cannot handle insns of width', insnwidth)
+        else:
+            assert False, 'unhandled option'
+
+    if len(args) < 1:
+        error(0, 'missing input file')
+
+    toppat = ExcMultiPattern(0)
+
+    for filename in args:
+        input_file = filename
+        f = open(filename, 'r')
+        parse_file(f, toppat)
+        f.close()
+
+    # We do not want to compute masks for toppat, because those masks
+    # are used as a starting point for build_tree.  For toppat, we must
+    # insist that decode begins from naught.
+    for i in toppat.pats:
+        i.prop_masks()
+
+    toppat.build_tree()
+    toppat.prop_format()
+
+    if variablewidth:
+        for i in toppat.pats:
+            i.prop_width()
+        stree = build_size_tree(toppat.pats, 8, 0, 0)
+        prop_size(stree)
+
+    if output_file:
+        output_fd = open(output_file, 'w')
+    else:
+        output_fd = sys.stdout
+
+    output_autogen()
+    for n in sorted(arguments.keys()):
+        f = arguments[n]
+        f.output_def()
+
+    # A single translate function can be invoked for different patterns.
+    # Make sure that the argument sets are the same, and declare the
+    # function only once.
+    #
+    # If we're sharing formats, we're likely also sharing trans_* functions,
+    # but we can't tell which ones.  Prevent issues from the compiler by
+    # suppressing redundant declaration warnings.
+    if anyextern:
+        output("#pragma GCC diagnostic push\n",
+               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
+               "#ifdef __clang__\n"
+               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
+               "#endif\n\n")
+
+    out_pats = {}
+    for i in allpatterns:
+        if i.name in out_pats:
+            p = out_pats[i.name]
+            if i.base.base != p.base.base:
+                error(0, i.name, ' has conflicting argument sets')
+        else:
+            i.output_decl()
+            out_pats[i.name] = i
+    output('\n')
+
+    if anyextern:
+        output("#pragma GCC diagnostic pop\n\n")
+
+    for n in sorted(formats.keys()):
+        f = formats[n]
+        f.output_extract()
+
+    output(decode_scope, 'bool ', decode_function,
+           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
+
+    i4 = str_indent(4)
+
+    if len(allpatterns) != 0:
+        output(i4, 'union {\n')
+        for n in sorted(arguments.keys()):
+            f = arguments[n]
+            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
+        output(i4, '} u;\n\n')
+        toppat.output_code(4, False, 0, 0)
+
+    output(i4, 'return false;\n')
+    output('}\n')
+
+    if variablewidth:
+        output('\n', decode_scope, insntype, ' ', decode_function,
+               '_load(DisasContext *ctx)\n{\n',
+               '    ', insntype, ' insn = 0;\n\n')
+        stree.output_code(4, 0, 0, 0)
+        output('}\n')
+
+    if output_file:
+        output_fd.close()
+# end main
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/device-crash-test b/scripts/device-crash-test
new file mode 100755
index 000000000..04118669b
--- /dev/null
+++ b/scripts/device-crash-test
@@ -0,0 +1,576 @@
+#!/usr/bin/env python3
+#
+#  Copyright (c) 2017 Red Hat Inc
+#
+# Author:
+#  Eduardo Habkost 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""
+Run QEMU with all combinations of -machine and -device types,
+check for crashes and unexpected errors.
+"""
+
+import os
+import sys
+import glob
+import logging
+import traceback
+import re
+import random
+import argparse
+from itertools import chain
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
+from qemu.machine import QEMUMachine
+
+logger = logging.getLogger('device-crash-test')
+dbg = logger.debug
+
+
+# Purposes of the following whitelist:
+# * Avoiding verbose log messages when we find known non-fatal
+#   (exitcode=1) errors
+# * Avoiding fatal errors when we find known crashes
+# * Skipping machines/devices that are known not to work out of
+#   the box, when running in --quick mode
+#
+# Keeping the whitelist updated is desirable, but not required,
+# because unexpected cases where QEMU exits with exitcode=1 will
+# just trigger a INFO message.
+
+# Valid whitelist entry keys:
+# * accel: regexp, full match only
+# * machine: regexp, full match only
+# * device: regexp, full match only
+# * log: regexp, partial match allowed
+# * exitcode: if not present, defaults to 1. If None, matches any exitcode
+# * warn: if True, matching failures will be logged as warnings
+# * expected: if True, QEMU is expected to always fail every time
+#   when testing the corresponding test case
+# * loglevel: log level of log output when there's a match.
+ERROR_WHITELIST = [
+    # Machines that won't work out of the box:
+    #             MACHINE                         | ERROR MESSAGE
+    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
+    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
+    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
+
+    # devices that don't work out of the box because they require extra options to "-device DEV":
+    #            DEVICE                                    | ERROR MESSAGE
+    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
+    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
+    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
+    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
+    {'device':'ide-cd'},                                 # No drive specified
+    {'device':'ide-drive', 'expected':True},               # No drive specified
+    {'device':'ide-hd', 'expected':True},                  # No drive specified
+    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
+    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
+    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
+    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
+    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
+    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
+    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
+    {'device':'loader', 'expected':True},                  # please include valid arguments
+    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
+    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
+    {'device':'nvme', 'expected':True},                    # Device initialization failed
+    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
+    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'scsi-block', 'expected':True},              # drive property not set
+    {'device':'scsi-disk', 'expected':True},               # drive property not set
+    {'device':'scsi-generic', 'expected':True},            # drive property not set
+    {'device':'scsi-hd', 'expected':True},                 # drive property not set
+    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
+    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
+    {'device':'spapr-vty', 'expected':True},               # chardev property not set
+    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
+    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
+    {'device':'usb-braille', 'expected':True},             # Property chardev is required
+    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
+    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
+    {'device':'usb-serial', 'expected':True},              # Property chardev is required
+    {'device':'usb-storage', 'expected':True},             # drive property not set
+    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
+    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
+    {'device':'vfio-pci', 'expected':True},                # No provided host device
+    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
+    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
+    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
+    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
+    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
+    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
+    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
+    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
+    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
+    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
+    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
+    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
+    {'device':'zpci', 'expected':True},                    # target must be defined
+    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
+    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
+
+    # ioapic devices are already created by pc and will fail:
+    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
+    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
+
+    # "spapr-cpu-core needs a pseries machine"
+    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
+
+    # KVM-specific devices shouldn't be tried without accel=kvm:
+    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
+
+    # xen-specific machines and devices:
+    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
+    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
+
+    # this fails on some machine-types, but not all, so they don't have expected=True:
+    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
+
+    # Silence INFO messages for errors that are common on multiple
+    # devices/machines:
+    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
+    {'log':r"images* must be given with the 'pflash' parameter"},
+    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
+    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
+    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
+    {'log':r"speed mismatch trying to attach usb device"},
+    {'log':r"Can't create a second ISA bus"},
+    {'log':r"duplicate fw_cfg file name"},
+    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
+    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
+    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
+    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
+    {'log':r"Platform Bus: Can not fit MMIO region of size "},
+    # other more specific errors we will ignore:
+    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
+    {'log':r"MSI(-X)? is not supported by interrupt controller"},
+    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
+    {'log':r"Ignoring smp_cpus value"},
+    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
+    {'log':r"This CPU requires a smaller page size than the system is using"},
+    {'log':r"MSI-X support is mandatory in the S390 architecture"},
+    {'log':r"rom check and register reset failed"},
+    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
+    {'log':r"Multiple VT220 operator consoles are not supported"},
+    {'log':r"core 0 already populated"},
+    {'log':r"could not find stage1 bootloader"},
+
+    # other exitcode=1 failures not listed above will just generate INFO messages:
+    {'exitcode':1, 'loglevel':logging.INFO},
+
+    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
+    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
+]
+
+
+def whitelistTestCaseMatch(wl, t):
+    """Check if a test case specification can match a whitelist entry
+
+    This only checks if a whitelist entry is a candidate match
+    for a given test case, it won't check if the test case
+    results/output match the entry.  See whitelistResultMatch().
+    """
+    return (('machine' not in wl or
+             'machine' not in t or
+             re.match(wl['machine'] + '$', t['machine'])) and
+            ('accel' not in wl or
+             'accel' not in t or
+             re.match(wl['accel'] + '$', t['accel'])) and
+            ('device' not in wl or
+             'device' not in t or
+             re.match(wl['device'] + '$', t['device'])))
+
+
+def whitelistCandidates(t):
+    """Generate the list of candidates that can match a test case"""
+    for i, wl in enumerate(ERROR_WHITELIST):
+        if whitelistTestCaseMatch(wl, t):
+            yield (i, wl)
+
+
+def findExpectedResult(t):
+    """Check if there's an expected=True whitelist entry for a test case
+
+    Returns (i, wl) tuple, where i is the index in
+    ERROR_WHITELIST and wl is the whitelist entry itself.
+    """
+    for i, wl in whitelistCandidates(t):
+        if wl.get('expected'):
+            return (i, wl)
+
+
+def whitelistResultMatch(wl, r):
+    """Check if test case results/output match a whitelist entry
+
+    It is valid to call this function only if
+    whitelistTestCaseMatch() is True for the entry (e.g. on
+    entries returned by whitelistCandidates())
+    """
+    assert whitelistTestCaseMatch(wl, r['testcase'])
+    return ((wl.get('exitcode', 1) is None or
+             r['exitcode'] == wl.get('exitcode', 1)) and
+            ('log' not in wl or
+             re.search(wl['log'], r['log'], re.MULTILINE)))
+
+
+def checkResultWhitelist(r):
+    """Look up whitelist entry for a given test case result
+
+    Returns (i, wl) tuple, where i is the index in
+    ERROR_WHITELIST and wl is the whitelist entry itself.
+    """
+    for i, wl in whitelistCandidates(r['testcase']):
+        if whitelistResultMatch(wl, r):
+            return i, wl
+
+    raise Exception("this should never happen")
+
+
+def qemuOptsEscape(s):
+    """Escape option value QemuOpts"""
+    return s.replace(",", ",,")
+
+
+def formatTestCase(t):
+    """Format test case info as "key=value key=value" for prettier logging output"""
+    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
+
+
+def qomListTypeNames(vm, **kwargs):
+    """Run qom-list-types QMP command, return type names"""
+    types = vm.command('qom-list-types', **kwargs)
+    return [t['name'] for t in types]
+
+
+def infoQDM(vm):
+    """Parse 'info qdm' output"""
+    args = {'command-line': 'info qdm'}
+    devhelp = vm.command('human-monitor-command', **args)
+    for l in devhelp.split('\n'):
+        l = l.strip()
+        if l == '' or l.endswith(':'):
+            continue
+        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
+             'no-user': (re.search(', no-user', l) is not None)}
+        yield d
+
+
+class QemuBinaryInfo(object):
+    def __init__(self, binary, devtype):
+        if devtype is None:
+            devtype = 'device'
+
+        self.binary = binary
+        self._machine_info = {}
+
+        dbg("devtype: %r", devtype)
+        args = ['-S', '-machine', 'none,accel=kvm:tcg']
+        dbg("querying info for QEMU binary: %s", binary)
+        vm = QEMUMachine(binary=binary, args=args)
+        vm.launch()
+        try:
+            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
+            # there's no way to query DeviceClass::user_creatable using QMP,
+            # so use 'info qdm':
+            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
+            self.machines = list(m['name'] for m in vm.command('query-machines'))
+            self.user_devs = self.alldevs.difference(self.no_user_devs)
+            self.kvm_available = vm.command('query-kvm')['enabled']
+        finally:
+            vm.shutdown()
+
+    def machineInfo(self, machine):
+        """Query for information on a specific machine-type
+
+        Results are cached internally, in case the same machine-
+        type is queried multiple times.
+        """
+        if machine in self._machine_info:
+            return self._machine_info[machine]
+
+        mi = {}
+        args = ['-S', '-machine', '%s' % (machine)]
+        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
+        vm = QEMUMachine(binary=self.binary, args=args)
+        try:
+            vm.launch()
+            mi['runnable'] = True
+        except KeyboardInterrupt:
+            raise
+        except:
+            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
+            dbg("log: %r", vm.get_log())
+            mi['runnable'] = False
+
+        vm.shutdown()
+        self._machine_info[machine] = mi
+        return mi
+
+
+BINARY_INFO = {}
+
+
+def getBinaryInfo(args, binary):
+    if binary not in BINARY_INFO:
+        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
+    return BINARY_INFO[binary]
+
+
+def checkOneCase(args, testcase):
+    """Check one specific case
+
+    Returns a dictionary containing failure information on error,
+    or None on success
+    """
+    binary = testcase['binary']
+    accel = testcase['accel']
+    machine = testcase['machine']
+    device = testcase['device']
+
+    dbg("will test: %r", testcase)
+
+    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
+            '-device', qemuOptsEscape(device)]
+    cmdline = ' '.join([binary] + args)
+    dbg("will launch QEMU: %s", cmdline)
+    vm = QEMUMachine(binary=binary, args=args)
+
+    exc_traceback = None
+    try:
+        vm.launch()
+    except KeyboardInterrupt:
+        raise
+    except:
+        exc_traceback = traceback.format_exc()
+        dbg("Exception while running test case")
+    finally:
+        vm.shutdown()
+        ec = vm.exitcode()
+        log = vm.get_log()
+
+    if exc_traceback is not None or ec != 0:
+        return {'exc_traceback':exc_traceback,
+                'exitcode':ec,
+                'log':log,
+                'testcase':testcase,
+                'cmdline':cmdline}
+
+
+def binariesToTest(args, testcase):
+    if args.qemu:
+        r = args.qemu
+    else:
+        r = [f.path for f in os.scandir('.')
+             if f.name.startswith('qemu-system-') and
+                f.is_file() and os.access(f, os.X_OK)]
+    return r
+
+
+def accelsToTest(args, testcase):
+    if getBinaryInfo(args, testcase['binary']).kvm_available:
+        yield 'kvm'
+    yield 'tcg'
+
+
+def machinesToTest(args, testcase):
+    return getBinaryInfo(args, testcase['binary']).machines
+
+
+def devicesToTest(args, testcase):
+    return getBinaryInfo(args, testcase['binary']).user_devs
+
+
+TESTCASE_VARIABLES = [
+    ('binary', binariesToTest),
+    ('accel', accelsToTest),
+    ('machine', machinesToTest),
+    ('device', devicesToTest),
+]
+
+
+def genCases1(args, testcases, var, fn):
+    """Generate new testcases for one variable
+
+    If an existing item already has a variable set, don't
+    generate new items and just return it directly. This
+    allows the "-t" command-line option to be used to choose
+    a specific test case.
+    """
+    for testcase in testcases:
+        if var in testcase:
+            yield testcase.copy()
+        else:
+            for i in fn(args, testcase):
+                t = testcase.copy()
+                t[var] = i
+                yield t
+
+
+def genCases(args, testcase):
+    """Generate test cases for all variables
+    """
+    cases = [testcase.copy()]
+    for var, fn in TESTCASE_VARIABLES:
+        dbg("var: %r, fn: %r", var, fn)
+        cases = genCases1(args, cases, var, fn)
+    return cases
+
+
+def casesToTest(args, testcase):
+    cases = genCases(args, testcase)
+    if args.random:
+        cases = list(cases)
+        cases = random.sample(cases, min(args.random, len(cases)))
+    if args.debug:
+        cases = list(cases)
+        dbg("%d test cases to test", len(cases))
+    if args.shuffle:
+        cases = list(cases)
+        random.shuffle(cases)
+    return cases
+
+
+def logFailure(f, level):
+    t = f['testcase']
+    logger.log(level, "failed: %s", formatTestCase(t))
+    logger.log(level, "cmdline: %s", f['cmdline'])
+    for l in f['log'].strip().split('\n'):
+        logger.log(level, "log: %s", l)
+    logger.log(level, "exit code: %r", f['exitcode'])
+    if f['exc_traceback']:
+        logger.log(level, "exception:")
+        for l in f['exc_traceback'].split('\n'):
+            logger.log(level, "  %s", l.rstrip('\n'))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="QEMU -device crash test")
+    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
+                        help="Limit test cases to KEY=VALUE",
+                        action='append', dest='testcases', default=[])
+    parser.add_argument('-d', '--debug', action='store_true',
+                        help='debug output')
+    parser.add_argument('-v', '--verbose', action='store_true', default=True,
+                        help='verbose output')
+    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
+                        help='non-verbose output')
+    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
+                        help='run a random sample of COUNT test cases',
+                        default=0)
+    parser.add_argument('--shuffle', action='store_true',
+                        help='Run test cases in random order')
+    parser.add_argument('--dry-run', action='store_true',
+                        help="Don't run any tests, just generate list")
+    parser.add_argument('-D', '--devtype', metavar='TYPE',
+                        help="Test only device types that implement TYPE")
+    parser.add_argument('-Q', '--quick', action='store_true', default=True,
+                        help="Quick mode: skip test cases that are expected to fail")
+    parser.add_argument('-F', '--full', action='store_false', dest='quick',
+                        help="Full mode: test cases that are expected to fail")
+    parser.add_argument('--strict', action='store_true', dest='strict',
+                        help="Treat all warnings as fatal")
+    parser.add_argument('qemu', nargs='*', metavar='QEMU',
+                        help='QEMU binary to run')
+    args = parser.parse_args()
+
+    if args.debug:
+        lvl = logging.DEBUG
+    elif args.verbose:
+        lvl = logging.INFO
+    else:
+        lvl = logging.WARN
+    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
+
+    fatal_failures = []
+    wl_stats = {}
+    skipped = 0
+    total = 0
+
+    tc = {}
+    dbg("testcases: %r", args.testcases)
+    if args.testcases:
+        for t in chain(*args.testcases):
+            for kv in t.split():
+                k, v = kv.split('=', 1)
+                tc[k] = v
+
+    if len(binariesToTest(args, tc)) == 0:
+        print("No QEMU binary found", file=sys.stderr)
+        parser.print_usage(sys.stderr)
+        return 1
+
+    for t in casesToTest(args, tc):
+        logger.info("running test case: %s", formatTestCase(t))
+        total += 1
+
+        expected_match = findExpectedResult(t)
+        if (args.quick and
+                (expected_match or
+                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
+            dbg("skipped: %s", formatTestCase(t))
+            skipped += 1
+            continue
+
+        if args.dry_run:
+            continue
+
+        try:
+            f = checkOneCase(args, t)
+        except KeyboardInterrupt:
+            break
+
+        if f:
+            i, wl = checkResultWhitelist(f)
+            dbg("testcase: %r, whitelist match: %r", t, wl)
+            wl_stats.setdefault(i, []).append(f)
+            level = wl.get('loglevel', logging.DEBUG)
+            logFailure(f, level)
+            if wl.get('fatal') or (args.strict and level >= logging.WARN):
+                fatal_failures.append(f)
+        else:
+            dbg("success: %s", formatTestCase(t))
+            if expected_match:
+                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
+
+    logger.info("Total: %d test cases", total)
+    if skipped:
+        logger.info("Skipped %d test cases", skipped)
+
+    if args.debug:
+        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
+                         enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
+        for count, wl in stats:
+            dbg("whitelist entry stats: %d: %r", count, wl)
+
+    if fatal_failures:
+        for f in fatal_failures:
+            t = f['testcase']
+            logger.error("Fatal failure: %s", formatTestCase(t))
+        logger.error("Fatal failures on some machine/device combinations")
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/disas-objdump.pl b/scripts/disas-objdump.pl
new file mode 100755
index 000000000..bec905f04
--- /dev/null
+++ b/scripts/disas-objdump.pl
@@ -0,0 +1,101 @@
+#!/usr/bin/env perl
+
+use warnings;
+
+use File::Temp qw/ tempfile /;
+use Getopt::Long;
+
+# Default to the system objdump if a cross-compiler edition not given.
+my $aobjdump = "objdump";
+my $hobjdump = "";
+my $tobjdump = "";
+my $hmachine = "";
+my $tmachine = "";
+
+GetOptions ('O|objdump=s' => \$aobjdump,
+            'host-objdump=s' => \$hobjdump,
+            'target-objdump=s' => \$tobjdump,
+            'h|host-machine=s' => \$hmachine,
+            't|target-machine=s' => \$tmachine);
+
+# But we can't default the machines.  Sanity check that we've at least one.
+die "No host or target machine type" if !$hmachine && !$tmachine;
+
+# Reuse one temp file for all of the hunks.
+my ($outh, $outname) = tempfile();
+binmode($outh);
+END { unlink $outname; }
+
+# Pre-construct the command-lines for executing the dump.
+sub mkobjcommand ($$) {
+    my ($cmd, $mach) = @_;
+    return 0 if !$mach;
+    $cmd = $aobjdump if !$cmd;
+    return "$cmd -m $mach --disassemble-all -b binary";
+}
+
+$objdump[1] = mkobjcommand($hobjdump, $hmachine);
+$objdump[2] = mkobjcommand($tobjdump, $tmachine);
+
+# Zero-initialize current dumping state.
+my $mem = "";
+my $inobjd = 0;
+my $vma = 0;
+
+sub objcommand {
+    my $ret = $objdump[$inobjd];
+    if (!$ret) {
+        die "Host machine type not specified" if $inobjd == 1;
+        die "Target machine type not specified" if $inobjd == 2;
+        die "Internal error";
+    }
+    return $ret;
+}
+
+while (<>) {
+    # Collect the data from the relevant OBJD-* lines ...
+    if (/^OBJD-H: /) {
+        die "Internal error" if $inobjd == 2;
+        $mem = $mem . pack("H*", substr($_, 8, -1));
+        $inobjd = 1;
+    } elsif (/^OBJD-T: /) {
+        die "Internal error" if $inobjd == 1;
+        $mem = $mem . pack("H*", substr($_, 8, -1));
+        $inobjd = 2;
+    }
+    # ... which will always be followed by a blank line,
+    # at which point we should produce our dump.
+    elsif ($inobjd) {
+        # Rewrite the temp file in one go; it will usually be small.
+        sysseek $outh, 0, 0;
+        truncate $outh, 0;
+        syswrite $outh, $mem;
+
+        my $cmd = objcommand();
+        $cmd = $cmd . " --adjust-vma=" . $vma if $vma;
+        $cmd = $cmd . " " . $outname;
+
+        # Pipe from objdump...
+        open IN, "-|", $cmd;
+
+        # ... copying all but the first 7 lines of boilerplate to our stdout.
+	my $i = 0;
+	while () {
+	    print if (++$i > 7);
+        }
+        close IN;
+        print "\n";
+
+        $mem = "";
+        $inobjd = 0;
+        $vma = 0;
+    }
+    # The line before "OBJD-*" will be of the form "0x+: +\n".
+    # Extract the value for passing to --adjust-vma.
+    elsif (/^(0x[0-9a-fA-F]+):\s*$/) {
+        $vma = $1;
+        print;
+    } else {
+        print;
+    }
+}
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
new file mode 100644
index 000000000..4177261d3
--- /dev/null
+++ b/scripts/dump-guest-memory.py
@@ -0,0 +1,598 @@
+"""
+This python script adds a new gdb command, "dump-guest-memory". It
+should be loaded with "source dump-guest-memory.py" at the (gdb)
+prompt.
+
+Copyright (C) 2013, Red Hat, Inc.
+
+Authors:
+   Laszlo Ersek 
+   Janosch Frank 
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+"""
+
+import ctypes
+import struct
+
+try:
+    UINTPTR_T = gdb.lookup_type("uintptr_t")
+except Exception as inst:
+    raise gdb.GdbError("Symbols must be loaded prior to sourcing dump-guest-memory.\n"
+                       "Symbols may be loaded by 'attach'ing a QEMU process id or by "
+                       "'load'ing a QEMU binary.")
+
+TARGET_PAGE_SIZE = 0x1000
+TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
+
+# Special value for e_phnum. This indicates that the real number of
+# program headers is too large to fit into e_phnum. Instead the real
+# value is in the field sh_info of section 0.
+PN_XNUM = 0xFFFF
+
+EV_CURRENT = 1
+
+ELFCLASS32 = 1
+ELFCLASS64 = 2
+
+ELFDATA2LSB = 1
+ELFDATA2MSB = 2
+
+ET_CORE = 4
+
+PT_LOAD = 1
+PT_NOTE = 4
+
+EM_386 = 3
+EM_PPC = 20
+EM_PPC64 = 21
+EM_S390 = 22
+EM_AARCH = 183
+EM_X86_64 = 62
+
+VMCOREINFO_FORMAT_ELF = 1
+
+def le16_to_cpu(val):
+    return struct.unpack(" 1 << 20:
+            print('warning: invalid vmcoreinfo size')
+            return
+        # now get the full note
+        note = get_arch_note(self.endianness,
+                             header.n_namesz - 1, header.n_descsz)
+        ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note))
+
+        self.notes.append(note)
+        self.segments[0].p_filesz += ctypes.sizeof(note)
+        self.segments[0].p_memsz += ctypes.sizeof(note)
+
+    def add_segment(self, p_type, p_paddr, p_size):
+        """Adds a segment to the elf."""
+
+        phdr = get_arch_phdr(self.endianness, self.elfclass)
+        phdr.p_type = p_type
+        phdr.p_paddr = p_paddr
+        phdr.p_vaddr = p_paddr
+        phdr.p_filesz = p_size
+        phdr.p_memsz = p_size
+        self.segments.append(phdr)
+        self.ehdr.e_phnum += 1
+
+    def to_file(self, elf_file):
+        """Writes all ELF structures to the passed file.
+
+        Structure:
+        Ehdr
+        Segment 0:PT_NOTE
+        Segment 1:PT_LOAD
+        Segment N:PT_LOAD
+        Note    0..N
+        Dump contents
+        """
+        elf_file.write(self.ehdr)
+        off = ctypes.sizeof(self.ehdr) + \
+              len(self.segments) * ctypes.sizeof(self.segments[0])
+
+        for phdr in self.segments:
+            phdr.p_offset = off
+            elf_file.write(phdr)
+            off += phdr.p_filesz
+
+        for note in self.notes:
+            elf_file.write(note)
+
+
+def get_arch_note(endianness, len_name, len_desc):
+    """Returns a Note class with the specified endianness."""
+
+    if endianness == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    len_name = len_name + 1
+
+    class Note(superclass):
+        """Represents an ELF note, includes the content."""
+
+        _fields_ = [("n_namesz", ctypes.c_uint32),
+                    ("n_descsz", ctypes.c_uint32),
+                    ("n_type", ctypes.c_uint32),
+                    ("n_name", ctypes.c_char * len_name),
+                    ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
+    return Note()
+
+
+class Ident(ctypes.Structure):
+    """Represents the ELF ident array in the ehdr structure."""
+
+    _fields_ = [('ei_mag0', ctypes.c_ubyte),
+                ('ei_mag1', ctypes.c_ubyte),
+                ('ei_mag2', ctypes.c_ubyte),
+                ('ei_mag3', ctypes.c_ubyte),
+                ('ei_class', ctypes.c_ubyte),
+                ('ei_data', ctypes.c_ubyte),
+                ('ei_version', ctypes.c_ubyte),
+                ('ei_osabi', ctypes.c_ubyte),
+                ('ei_abiversion', ctypes.c_ubyte),
+                ('ei_pad', ctypes.c_ubyte * 7)]
+
+    def __init__(self, endianness, elfclass):
+        self.ei_mag0 = 0x7F
+        self.ei_mag1 = ord('E')
+        self.ei_mag2 = ord('L')
+        self.ei_mag3 = ord('F')
+        self.ei_class = elfclass
+        self.ei_data = endianness
+        self.ei_version = EV_CURRENT
+
+
+def get_arch_ehdr(endianness, elfclass):
+    """Returns a EHDR64 class with the specified endianness."""
+
+    if endianness == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    class EHDR64(superclass):
+        """Represents the 64 bit ELF header struct."""
+
+        _fields_ = [('e_ident', Ident),
+                    ('e_type', ctypes.c_uint16),
+                    ('e_machine', ctypes.c_uint16),
+                    ('e_version', ctypes.c_uint32),
+                    ('e_entry', ctypes.c_uint64),
+                    ('e_phoff', ctypes.c_uint64),
+                    ('e_shoff', ctypes.c_uint64),
+                    ('e_flags', ctypes.c_uint32),
+                    ('e_ehsize', ctypes.c_uint16),
+                    ('e_phentsize', ctypes.c_uint16),
+                    ('e_phnum', ctypes.c_uint16),
+                    ('e_shentsize', ctypes.c_uint16),
+                    ('e_shnum', ctypes.c_uint16),
+                    ('e_shstrndx', ctypes.c_uint16)]
+
+        def __init__(self):
+            super(superclass, self).__init__()
+            self.e_ident = Ident(endianness, elfclass)
+            self.e_type = ET_CORE
+            self.e_version = EV_CURRENT
+            self.e_ehsize = ctypes.sizeof(self)
+            self.e_phoff = ctypes.sizeof(self)
+            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
+            self.e_phnum = 0
+
+
+    class EHDR32(superclass):
+        """Represents the 32 bit ELF header struct."""
+
+        _fields_ = [('e_ident', Ident),
+                    ('e_type', ctypes.c_uint16),
+                    ('e_machine', ctypes.c_uint16),
+                    ('e_version', ctypes.c_uint32),
+                    ('e_entry', ctypes.c_uint32),
+                    ('e_phoff', ctypes.c_uint32),
+                    ('e_shoff', ctypes.c_uint32),
+                    ('e_flags', ctypes.c_uint32),
+                    ('e_ehsize', ctypes.c_uint16),
+                    ('e_phentsize', ctypes.c_uint16),
+                    ('e_phnum', ctypes.c_uint16),
+                    ('e_shentsize', ctypes.c_uint16),
+                    ('e_shnum', ctypes.c_uint16),
+                    ('e_shstrndx', ctypes.c_uint16)]
+
+        def __init__(self):
+            super(superclass, self).__init__()
+            self.e_ident = Ident(endianness, elfclass)
+            self.e_type = ET_CORE
+            self.e_version = EV_CURRENT
+            self.e_ehsize = ctypes.sizeof(self)
+            self.e_phoff = ctypes.sizeof(self)
+            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
+            self.e_phnum = 0
+
+    # End get_arch_ehdr
+    if elfclass == ELFCLASS64:
+        return EHDR64()
+    else:
+        return EHDR32()
+
+
+def get_arch_phdr(endianness, elfclass):
+    """Returns a 32 or 64 bit PHDR class with the specified endianness."""
+
+    if endianness == ELFDATA2LSB:
+        superclass = ctypes.LittleEndianStructure
+    else:
+        superclass = ctypes.BigEndianStructure
+
+    class PHDR64(superclass):
+        """Represents the 64 bit ELF program header struct."""
+
+        _fields_ = [('p_type', ctypes.c_uint32),
+                    ('p_flags', ctypes.c_uint32),
+                    ('p_offset', ctypes.c_uint64),
+                    ('p_vaddr', ctypes.c_uint64),
+                    ('p_paddr', ctypes.c_uint64),
+                    ('p_filesz', ctypes.c_uint64),
+                    ('p_memsz', ctypes.c_uint64),
+                    ('p_align', ctypes.c_uint64)]
+
+    class PHDR32(superclass):
+        """Represents the 32 bit ELF program header struct."""
+
+        _fields_ = [('p_type', ctypes.c_uint32),
+                    ('p_offset', ctypes.c_uint32),
+                    ('p_vaddr', ctypes.c_uint32),
+                    ('p_paddr', ctypes.c_uint32),
+                    ('p_filesz', ctypes.c_uint32),
+                    ('p_memsz', ctypes.c_uint32),
+                    ('p_flags', ctypes.c_uint32),
+                    ('p_align', ctypes.c_uint32)]
+
+    # End get_arch_phdr
+    if elfclass == ELFCLASS64:
+        return PHDR64()
+    else:
+        return PHDR32()
+
+
+def int128_get64(val):
+    """Returns low 64bit part of Int128 struct."""
+
+    try:
+        assert val["hi"] == 0
+        return val["lo"]
+    except gdb.error:
+        u64t = gdb.lookup_type('uint64_t').array(2)
+        u64 = val.cast(u64t)
+        if sys.byteorder == 'little':
+            assert u64[1] == 0
+            return u64[0]
+        else:
+            assert u64[0] == 0
+            return u64[1]
+
+
+def qlist_foreach(head, field_str):
+    """Generator for qlists."""
+
+    var_p = head["lh_first"]
+    while var_p != 0:
+        var = var_p.dereference()
+        var_p = var[field_str]["le_next"]
+        yield var
+
+
+def qemu_map_ram_ptr(block, offset):
+    """Returns qemu vaddr for given guest physical address."""
+
+    return block["host"] + offset
+
+
+def memory_region_get_ram_ptr(memory_region):
+    if memory_region["alias"] != 0:
+        return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+                + memory_region["alias_offset"])
+
+    return qemu_map_ram_ptr(memory_region["ram_block"], 0)
+
+
+def get_guest_phys_blocks():
+    """Returns a list of ram blocks.
+
+    Each block entry contains:
+    'target_start': guest block phys start address
+    'target_end':   guest block phys end address
+    'host_addr':    qemu vaddr of the block's start
+    """
+
+    guest_phys_blocks = []
+
+    print("guest RAM blocks:")
+    print("target_start     target_end       host_addr        message "
+          "count")
+    print("---------------- ---------------- ---------------- ------- "
+          "-----")
+
+    current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
+    current_map = current_map_p.dereference()
+
+    # Conversion to int is needed for python 3
+    # compatibility. Otherwise range doesn't cast the value itself and
+    # breaks.
+    for cur in range(int(current_map["nr"])):
+        flat_range = (current_map["ranges"] + cur).dereference()
+        memory_region = flat_range["mr"].dereference()
+
+        # we only care about RAM
+        if (not memory_region["ram"] or
+            memory_region["ram_device"] or
+            memory_region["nonvolatile"]):
+            continue
+
+        section_size = int128_get64(flat_range["addr"]["size"])
+        target_start = int128_get64(flat_range["addr"]["start"])
+        target_end = target_start + section_size
+        host_addr = (memory_region_get_ram_ptr(memory_region)
+                     + flat_range["offset_in_region"])
+        predecessor = None
+
+        # find continuity in guest physical address space
+        if len(guest_phys_blocks) > 0:
+            predecessor = guest_phys_blocks[-1]
+            predecessor_size = (predecessor["target_end"] -
+                                predecessor["target_start"])
+
+            # the memory API guarantees monotonically increasing
+            # traversal
+            assert predecessor["target_end"] <= target_start
+
+            # we want continuity in both guest-physical and
+            # host-virtual memory
+            if (predecessor["target_end"] < target_start or
+                predecessor["host_addr"] + predecessor_size != host_addr):
+                predecessor = None
+
+        if predecessor is None:
+            # isolated mapping, add it to the list
+            guest_phys_blocks.append({"target_start": target_start,
+                                      "target_end":   target_end,
+                                      "host_addr":    host_addr})
+            message = "added"
+        else:
+            # expand predecessor until @target_end; predecessor's
+            # start doesn't change
+            predecessor["target_end"] = target_end
+            message = "joined"
+
+        print("%016x %016x %016x %-7s %5u" %
+              (target_start, target_end, host_addr.cast(UINTPTR_T),
+               message, len(guest_phys_blocks)))
+
+    return guest_phys_blocks
+
+
+# The leading docstring doesn't have idiomatic Python formatting. It is
+# printed by gdb's "help" command (the first line is printed in the
+# "help data" summary), and it should match how other help texts look in
+# gdb.
+class DumpGuestMemory(gdb.Command):
+    """Extract guest vmcore from qemu process coredump.
+
+The two required arguments are FILE and ARCH:
+FILE identifies the target file to write the guest vmcore to.
+ARCH specifies the architecture for which the core will be generated.
+
+This GDB command reimplements the dump-guest-memory QMP command in
+python, using the representation of guest memory as captured in the qemu
+coredump. The qemu process that has been dumped must have had the
+command line option "-machine dump-guest-core=on" which is the default.
+
+For simplicity, the "paging", "begin" and "end" parameters of the QMP
+command are not supported -- no attempt is made to get the guest's
+internal paging structures (ie. paging=false is hard-wired), and guest
+memory is always fully dumped.
+
+Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
+ppc64-le guests are supported.
+
+The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
+not written to the vmcore. Preparing these would require context that is
+only present in the KVM host kernel module when the guest is alive. A
+fake ELF note is written instead, only to keep the ELF parser of "crash"
+happy.
+
+Dependent on how busted the qemu process was at the time of the
+coredump, this command might produce unpredictable results. If qemu
+deliberately called abort(), or it was dumped in response to a signal at
+a halfway fortunate point, then its coredump should be in reasonable
+shape and this command should mostly work."""
+
+    def __init__(self):
+        super(DumpGuestMemory, self).__init__("dump-guest-memory",
+                                              gdb.COMMAND_DATA,
+                                              gdb.COMPLETE_FILENAME)
+        self.elf = None
+        self.guest_phys_blocks = None
+
+    def dump_init(self, vmcore):
+        """Prepares and writes ELF structures to core file."""
+
+        # Needed to make crash happy, data for more useful notes is
+        # not available in a qemu core.
+        self.elf.add_note("NONE", "EMPTY", 0)
+
+        # We should never reach PN_XNUM for paging=false dumps,
+        # there's just a handful of discontiguous ranges after
+        # merging.
+        # The constant is needed to account for the PT_NOTE segment.
+        phdr_num = len(self.guest_phys_blocks) + 1
+        assert phdr_num < PN_XNUM
+
+        for block in self.guest_phys_blocks:
+            block_size = block["target_end"] - block["target_start"]
+            self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
+
+        self.elf.to_file(vmcore)
+
+    def dump_iterate(self, vmcore):
+        """Writes guest core to file."""
+
+        qemu_core = gdb.inferiors()[0]
+        for block in self.guest_phys_blocks:
+            cur = block["host_addr"]
+            left = block["target_end"] - block["target_start"]
+            print("dumping range at %016x for length %016x" %
+                  (cur.cast(UINTPTR_T), left))
+
+            while left > 0:
+                chunk_size = min(TARGET_PAGE_SIZE, left)
+                chunk = qemu_core.read_memory(cur, chunk_size)
+                vmcore.write(chunk)
+                cur += chunk_size
+                left -= chunk_size
+
+    def phys_memory_read(self, addr, size):
+        qemu_core = gdb.inferiors()[0]
+        for block in self.guest_phys_blocks:
+            if block["target_start"] <= addr \
+               and addr + size <= block["target_end"]:
+                haddr = block["host_addr"] + (addr - block["target_start"])
+                return qemu_core.read_memory(haddr, size)
+        return None
+
+    def add_vmcoreinfo(self):
+        if gdb.lookup_symbol("vmcoreinfo_realize")[0] is None:
+            return
+        vmci = 'vmcoreinfo_realize::vmcoreinfo_state'
+        if not gdb.parse_and_eval("%s" % vmci) \
+           or not gdb.parse_and_eval("(%s)->has_vmcoreinfo" % vmci):
+            return
+
+        fmt = gdb.parse_and_eval("(%s)->vmcoreinfo.guest_format" % vmci)
+        addr = gdb.parse_and_eval("(%s)->vmcoreinfo.paddr" % vmci)
+        size = gdb.parse_and_eval("(%s)->vmcoreinfo.size" % vmci)
+
+        fmt = le16_to_cpu(fmt)
+        addr = le64_to_cpu(addr)
+        size = le32_to_cpu(size)
+
+        if fmt != VMCOREINFO_FORMAT_ELF:
+            return
+
+        vmcoreinfo = self.phys_memory_read(addr, size)
+        if vmcoreinfo:
+            self.elf.add_vmcoreinfo_note(bytes(vmcoreinfo))
+
+    def invoke(self, args, from_tty):
+        """Handles command invocation from gdb."""
+
+        # Unwittingly pressing the Enter key after the command should
+        # not dump the same multi-gig coredump to the same file.
+        self.dont_repeat()
+
+        argv = gdb.string_to_argv(args)
+        if len(argv) != 2:
+            raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
+
+        self.elf = ELF(argv[1])
+        self.guest_phys_blocks = get_guest_phys_blocks()
+        self.add_vmcoreinfo()
+
+        with open(argv[0], "wb") as vmcore:
+            self.dump_init(vmcore)
+            self.dump_iterate(vmcore)
+
+DumpGuestMemory()
diff --git a/scripts/extract-vsssdk-headers b/scripts/extract-vsssdk-headers
new file mode 100755
index 000000000..9e38510f0
--- /dev/null
+++ b/scripts/extract-vsssdk-headers
@@ -0,0 +1,35 @@
+#! /bin/bash
+
+# extract-vsssdk-headers
+# Author: Paolo Bonzini 
+
+set -e
+if test $# != 1 || ! test -f "$1"; then
+  echo 'Usage: extract-vsssdk-headers /path/to/setup.exe' >&2
+  exit 1
+fi
+
+if ! command -v msiextract > /dev/null; then
+  echo 'msiextract not found. Please install msitools.' >&2
+  exit 1
+fi
+
+if test -e inc; then
+  echo '"inc" already exists.' >&2
+  exit 1
+fi
+
+# Extract .MSI file in the .exe, looking for the OLE compound
+# document signature.  Extra data at the end does not matter.
+export LC_ALL=C
+MAGIC=$'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
+offset=$(grep -abom1 "$MAGIC" "$1" | sed -n 's/:/\n/; P')
+tmpdir=$(mktemp -d)
+trap 'rm -fr -- "$tmpdir" vsssdk.msi' EXIT HUP INT QUIT ALRM TERM
+tail -c +$(($offset+1)) -- "$1" > vsssdk.msi
+
+# Now extract the files.
+msiextract -C $tmpdir vsssdk.msi
+mv "$tmpdir/Program Files/Microsoft/VSSSDK72/inc" inc
+echo 'Extracted SDK headers into "inc" directory.'
+exit 0
diff --git a/scripts/feature_to_c.sh b/scripts/feature_to_c.sh
new file mode 100644
index 000000000..b1169899c
--- /dev/null
+++ b/scripts/feature_to_c.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# Convert text files to compilable C arrays.
+#
+# Copyright (C) 2007 Free Software Foundation, Inc.
+#
+# This file is part of GDB.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see .
+
+if test -z "$1"; then
+  echo "Usage: $0 INPUTFILE..."
+  exit 1
+fi
+
+for input; do
+  arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g')
+
+  ${AWK:-awk} 'BEGIN { n = 0
+      printf "#include \"qemu/osdep.h\"\n"
+      print "static const char '$arrayname'[] = {"
+      for (i = 0; i < 255; i++)
+        _ord_[sprintf("%c", i)] = i
+    } {
+      split($0, line, "");
+      printf "  "
+      for (i = 1; i <= length($0); i++) {
+        c = line[i]
+        if (c == "'\''") {
+          printf "'\''\\'\'''\'', "
+        } else if (c == "\\") {
+          printf "'\''\\\\'\'', "
+        } else if (_ord_[c] >= 32 && _ord_[c] < 127) {
+	  printf "'\''%s'\'', ", c
+        } else {
+          printf "'\''\\%03o'\'', ", _ord_[c]
+        }
+        if (i % 10 == 0)
+          printf "\n   "
+      }
+      printf "'\''\\n'\'', \n"
+    } END {
+      print "  0 };"
+    }' < $input
+done
+
+echo
+echo "const char *const xml_builtin[][2] = {"
+
+for input; do
+  basename=$(echo $input | sed 's,.*/,,')
+  arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g')
+  echo "  { \"$basename\", $arrayname },"
+done
+
+echo "  { (char *)0, (char *)0 }"
+echo "};"
diff --git a/scripts/fix-multiline-comments.sh b/scripts/fix-multiline-comments.sh
new file mode 100755
index 000000000..93f9b1066
--- /dev/null
+++ b/scripts/fix-multiline-comments.sh
@@ -0,0 +1,62 @@
+#! /bin/sh
+#
+# Fix multiline comments to match CODING_STYLE
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# Author: Paolo Bonzini
+#
+# Usage: scripts/fix-multiline-comments.sh [-i] FILE...
+#
+# -i edits the file in place (requires gawk 4.1.0).
+#
+# Set the AWK environment variable to choose the awk interpreter to use
+# (default 'awk')
+
+if test "$1" = -i; then
+  # gawk extension
+  inplace="-i inplace"
+  shift
+fi
+${AWK-awk} $inplace 'BEGIN { indent = -1 }
+{
+    line = $0
+    # apply a star to the indent on lines after the first
+    if (indent != -1) {
+        if (line == "") {
+            line = sp " *"
+        } else if (substr(line, 1, indent + 2) == sp "  ") {
+            line = sp " *" substr(line, indent + 3)
+        }
+    }
+
+    is_lead = (line ~ /^[ \t]*\/\*/)
+    is_trail = (line ~ /\*\//)
+    if (is_lead && !is_trail) {
+        # grab the indent at the start of a comment, but not for
+        # single-line comments
+        match(line, /^[ \t]*\/\*/)
+        indent = RLENGTH - 2
+        sp = substr(line, 1, indent)
+    }
+
+    # the regular expression filters out lone /*, /**, or */
+    if (indent != -1 && !(line ~ /^[ \t]*(\/\*+|\*\/)[ \t]*$/)) {
+        if (is_lead) {
+            # split the leading /* or /** on a separate line
+            match(line, /^[ \t]*\/\*+/)
+            lead = substr(line, 1, RLENGTH)
+            match(line, /^[ \t]*\/\*+[ \t]*/)
+            line = lead "\n" sp " *" substr(line, RLENGTH)
+        }
+        if (is_trail) {
+            # split the trailing */ on a separate line
+            match(line, /[ \t]*\*\//)
+            line = substr(line, 1, RSTART - 1) "\n" sp " */"
+        }
+    }
+    if (is_trail) {
+        indent = -1
+    }
+    print line
+}' "$@"
diff --git a/scripts/gensyscalls.sh b/scripts/gensyscalls.sh
new file mode 100755
index 000000000..bba9fb052
--- /dev/null
+++ b/scripts/gensyscalls.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+#
+# Update syscall_nr.h files from linux headers asm-generic/unistd.h
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+#
+
+linux="$1"
+output="$2"
+
+TMP=$(mktemp -d)
+
+if [ "$linux" = "" ] ; then
+    echo "Needs path to linux source tree" 1>&2
+    exit 1
+fi
+
+if [ "$output" = "" ] ; then
+    output="$PWD"
+fi
+
+upper()
+{
+    echo "$1" | tr "[:lower:]" "[:upper:]" | tr "[:punct:]" "_"
+}
+
+qemu_arch()
+{
+    case "$1" in
+    arm64)
+        echo "aarch64"
+        ;;
+    *)
+        echo "$1"
+        ;;
+    esac
+}
+
+read_includes()
+{
+    arch=$1
+    bits=$2
+
+     cpp -P -nostdinc -fdirectives-only \
+        -D_UAPI_ASM_$(upper ${arch})_BITSPERLONG_H \
+        -D__BITS_PER_LONG=${bits} \
+        -I${linux}/arch/${arch}/include/uapi/ \
+        -I${linux}/include/uapi \
+        -I${TMP} \
+        "${linux}/arch/${arch}/include/uapi/asm/unistd.h"
+}
+
+filter_defines()
+{
+    grep -e "#define __NR_" -e "#define __NR3264"
+}
+
+rename_defines()
+{
+    sed "s/ __NR_/ TARGET_NR_/g;s/(__NR_/(TARGET_NR_/g"
+}
+
+evaluate_values()
+{
+    sed "s/#define TARGET_NR_/QEMU TARGET_NR_/" | \
+    cpp -P -nostdinc | \
+    sed "s/^QEMU /#define /"
+}
+
+generate_syscall_nr()
+{
+    arch=$1
+    bits=$2
+    file="$3"
+    guard="$(upper LINUX_USER_$(qemu_arch $arch)_$(basename "$file"))"
+
+    (echo "/*"
+    echo " * This file contains the system call numbers."
+    echo " * Do not modify."
+    echo " * This file is generated by scripts/gensyscalls.sh"
+    echo " */"
+    echo "#ifndef ${guard}"
+    echo "#define ${guard}"
+    echo
+    read_includes $arch $bits | filter_defines | rename_defines | \
+                                evaluate_values | sort -n -k 3
+    echo
+    echo "#endif /* ${guard} */") > "$file"
+}
+
+mkdir "$TMP/asm"
+> "$TMP/asm/bitsperlong.h"
+
+generate_syscall_nr arm64 64 "$output/linux-user/aarch64/syscall_nr.h"
+generate_syscall_nr nios2 32 "$output/linux-user/nios2/syscall_nr.h"
+generate_syscall_nr openrisc 32 "$output/linux-user/openrisc/syscall_nr.h"
+
+generate_syscall_nr riscv 32 "$output/linux-user/riscv/syscall32_nr.h"
+generate_syscall_nr riscv 64 "$output/linux-user/riscv/syscall64_nr.h"
+rm -fr "$TMP"
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
new file mode 100755
index 000000000..271f5ff42
--- /dev/null
+++ b/scripts/get_maintainer.pl
@@ -0,0 +1,2144 @@
+#!/usr/bin/env perl
+# (c) 2007, Joe Perches 
+#           created from checkpatch.pl
+#
+# Print selected MAINTAINERS information for
+# the files modified in a patch or for a file
+#
+# usage: perl scripts/get_maintainer.pl [OPTIONS] 
+#        perl scripts/get_maintainer.pl [OPTIONS] -f 
+#
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+use warnings;
+
+my $P = $0;
+my $V = '0.26';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $lk_path = "./";
+my $email = 1;
+my $email_usename = 1;
+my $email_maintainer = 1;
+my $email_reviewer = 1;
+my $email_list = 1;
+my $email_subscriber_list = 0;
+my $email_git = 0;
+my $email_git_all_signature_types = 0;
+my $email_git_blame = 0;
+my $email_git_blame_signatures = 1;
+my $email_git_fallback = 1;
+my $email_git_min_signatures = 1;
+my $email_git_max_maintainers = 5;
+my $email_git_min_percent = 5;
+my $email_git_since = "1-year-ago";
+my $email_hg_since = "-365";
+my $interactive = 0;
+my $email_remove_duplicates = 1;
+my $email_use_mailmap = 1;
+my $output_multiline = 1;
+my $output_separator = ", ";
+my $output_roles = 0;
+my $output_rolestats = 1;
+my $scm = 0;
+my $web = 0;
+my $subsystem = 0;
+my $status = 0;
+my $keywords = 1;
+my $sections = 0;
+my $file_emails = 0;
+my $from_filename = 0;
+my $pattern_depth = 0;
+my $version = 0;
+my $help = 0;
+
+my $vcs_used = 0;
+
+my $exit = 0;
+
+my %commit_author_hash;
+my %commit_signer_hash;
+
+# Signature types of people who are either
+# 	a) responsible for the code in question, or
+# 	b) familiar enough with it to give relevant feedback
+my @signature_tags = ();
+push(@signature_tags, "Signed-off-by:");
+push(@signature_tags, "Reviewed-by:");
+push(@signature_tags, "Acked-by:");
+
+my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
+
+# rfc822 email address - preloaded methods go here.
+my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
+my $rfc822_char = '[\\000-\\377]';
+
+# VCS command support: class-like functions and strings
+
+my %VCS_cmds;
+
+my %VCS_cmds_git = (
+    "execute_cmd" => \&git_execute_cmd,
+    "available" => '(which("git") ne "") && (-e ".git")',
+    "find_signers_cmd" =>
+	"git log --no-color --follow --since=\$email_git_since " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n' .
+		      '%b%n"' .
+	    " -- \$file",
+    "find_commit_signers_cmd" =>
+	"git log --no-color " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n' .
+		      '%b%n"' .
+	    " -1 \$commit",
+    "find_commit_author_cmd" =>
+	"git log --no-color " .
+	    '--format="GitCommit: %H%n' .
+		      'GitAuthor: %an <%ae>%n' .
+		      'GitDate: %aD%n' .
+		      'GitSubject: %s%n"' .
+	    " -1 \$commit",
+    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
+    "blame_file_cmd" => "git blame -l \$file",
+    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
+    "blame_commit_pattern" => "^([0-9a-f]+) ",
+    "author_pattern" => "^GitAuthor: (.*)",
+    "subject_pattern" => "^GitSubject: (.*)",
+);
+
+my %VCS_cmds_hg = (
+    "execute_cmd" => \&hg_execute_cmd,
+    "available" => '(which("hg") ne "") && (-d ".hg")',
+    "find_signers_cmd" =>
+	"hg log --date=\$email_hg_since " .
+	    "--template='HgCommit: {node}\\n" .
+	                "HgAuthor: {author}\\n" .
+			"HgSubject: {desc}\\n'" .
+	    " -- \$file",
+    "find_commit_signers_cmd" =>
+	"hg log " .
+	    "--template='HgSubject: {desc}\\n'" .
+	    " -r \$commit",
+    "find_commit_author_cmd" =>
+	"hg log " .
+	    "--template='HgCommit: {node}\\n" .
+		        "HgAuthor: {author}\\n" .
+			"HgSubject: {desc|firstline}\\n'" .
+	    " -r \$commit",
+    "blame_range_cmd" => "",		# not supported
+    "blame_file_cmd" => "hg blame -n \$file",
+    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
+    "blame_commit_pattern" => "^([ 0-9a-f]+):",
+    "author_pattern" => "^HgAuthor: (.*)",
+    "subject_pattern" => "^HgSubject: (.*)",
+);
+
+my $conf = which_conf(".get_maintainer.conf");
+if (-f $conf) {
+    my @conf_args;
+    open(my $conffile, '<', "$conf")
+	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
+
+    while (<$conffile>) {
+	my $line = $_;
+
+	$line =~ s/\s*\n?$//g;
+	$line =~ s/^\s*//g;
+	$line =~ s/\s+/ /g;
+
+	next if ($line =~ m/^\s*#/);
+	next if ($line =~ m/^\s*$/);
+
+	my @words = split(" ", $line);
+	foreach my $word (@words) {
+	    last if ($word =~ m/^#/);
+	    push (@conf_args, $word);
+	}
+    }
+    close($conffile);
+    unshift(@ARGV, @conf_args) if @conf_args;
+}
+
+if (!GetOptions(
+		'email!' => \$email,
+		'git!' => \$email_git,
+		'git-all-signature-types!' => \$email_git_all_signature_types,
+		'git-blame!' => \$email_git_blame,
+		'git-blame-signatures!' => \$email_git_blame_signatures,
+		'git-fallback!' => \$email_git_fallback,
+		'git-min-signatures=i' => \$email_git_min_signatures,
+		'git-max-maintainers=i' => \$email_git_max_maintainers,
+		'git-min-percent=i' => \$email_git_min_percent,
+		'git-since=s' => \$email_git_since,
+		'hg-since=s' => \$email_hg_since,
+		'i|interactive!' => \$interactive,
+		'remove-duplicates!' => \$email_remove_duplicates,
+		'mailmap!' => \$email_use_mailmap,
+		'm!' => \$email_maintainer,
+		'r!' => \$email_reviewer,
+		'n!' => \$email_usename,
+		'l!' => \$email_list,
+		's!' => \$email_subscriber_list,
+		'multiline!' => \$output_multiline,
+		'roles!' => \$output_roles,
+		'rolestats!' => \$output_rolestats,
+		'separator=s' => \$output_separator,
+		'subsystem!' => \$subsystem,
+		'status!' => \$status,
+		'scm!' => \$scm,
+		'web!' => \$web,
+		'pattern-depth=i' => \$pattern_depth,
+		'k|keywords!' => \$keywords,
+		'sections!' => \$sections,
+		'fe|file-emails!' => \$file_emails,
+		'f|file' => \$from_filename,
+		'v|version' => \$version,
+		'h|help|usage' => \$help,
+		)) {
+    die "$P: invalid argument - use --help if necessary\n";
+}
+
+if ($help != 0) {
+    usage();
+    exit 0;
+}
+
+if ($version != 0) {
+    print("${P} ${V}\n");
+    exit 0;
+}
+
+if (-t STDIN && !@ARGV) {
+    # We're talking to a terminal, but have no command line arguments.
+    die "$P: missing patchfile or -f file - use --help if necessary\n";
+}
+
+$output_multiline = 0 if ($output_separator ne ", ");
+$output_rolestats = 1 if ($interactive);
+$output_roles = 1 if ($output_rolestats);
+
+if ($sections) {
+    $email = 0;
+    $email_list = 0;
+    $scm = 0;
+    $status = 0;
+    $subsystem = 0;
+    $web = 0;
+    $keywords = 0;
+    $interactive = 0;
+} else {
+    my $selections = $email + $scm + $status + $subsystem + $web;
+    if ($selections == 0) {
+	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
+    }
+}
+
+if ($email &&
+    ($email_maintainer + $email_reviewer +
+     $email_list + $email_subscriber_list +
+     $email_git + $email_git_blame) == 0) {
+    die "$P: Please select at least 1 email option\n";
+}
+
+if (!top_of_tree($lk_path)) {
+    die "$P: The current directory does not appear to be "
+	. "a QEMU source tree.\n";
+}
+
+## Read MAINTAINERS for type/value pairs
+
+my @typevalue = ();
+my %keyword_hash;
+
+open (my $maint, '<', "${lk_path}MAINTAINERS")
+    or die "$P: Can't open MAINTAINERS: $!\n";
+while (<$maint>) {
+    my $line = $_;
+
+    if ($line =~ m/^(.):\s*(.*)/) {
+	my $type = $1;
+	my $value = $2;
+
+	##Filename pattern matching
+	if ($type eq "F" || $type eq "X") {
+	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
+	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
+	    $value =~ s/\?/\./g;         ##Convert ? to .
+	    ##if pattern is a directory and it lacks a trailing slash, add one
+	    if ((-d $value)) {
+		$value =~ s@([^/])$@$1/@;
+	    }
+	} elsif ($type eq "K") {
+	    $keyword_hash{@typevalue} = $value;
+	}
+	push(@typevalue, "$type:$value");
+    } elsif (!/^(\s)*$/) {
+	$line =~ s/\n$//g;
+	push(@typevalue, $line);
+    }
+}
+close($maint);
+
+
+#
+# Read mail address map
+#
+
+my $mailmap;
+
+read_mailmap();
+
+sub read_mailmap {
+    $mailmap = {
+	names => {},
+	addresses => {}
+    };
+
+    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
+
+    open(my $mailmap_file, '<', "${lk_path}.mailmap")
+	or warn "$P: Can't open .mailmap: $!\n";
+
+    while (<$mailmap_file>) {
+	s/#.*$//; #strip comments
+	s/^\s+|\s+$//g; #trim
+
+	next if (/^\s*$/); #skip empty lines
+	#entries have one of the following formats:
+	# name1 
+	#  
+	# name1  
+	# name1  name2 
+	# (see man git-shortlog)
+
+	if (/^([^<]+)<([^>]+)>$/) {
+	    my $real_name = $1;
+	    my $address = $2;
+
+	    $real_name =~ s/\s+$//;
+	    ($real_name, $address) = parse_email("$real_name <$address>");
+	    $mailmap->{names}->{$address} = $real_name;
+
+	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
+	    my $real_address = $1;
+	    my $wrong_address = $2;
+
+	    $mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
+	    my $real_name = $1;
+	    my $real_address = $2;
+	    my $wrong_address = $3;
+
+	    $real_name =~ s/\s+$//;
+	    ($real_name, $real_address) =
+		parse_email("$real_name <$real_address>");
+	    $mailmap->{names}->{$wrong_address} = $real_name;
+	    $mailmap->{addresses}->{$wrong_address} = $real_address;
+
+	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
+	    my $real_name = $1;
+	    my $real_address = $2;
+	    my $wrong_name = $3;
+	    my $wrong_address = $4;
+
+	    $real_name =~ s/\s+$//;
+	    ($real_name, $real_address) =
+		parse_email("$real_name <$real_address>");
+
+	    $wrong_name =~ s/\s+$//;
+	    ($wrong_name, $wrong_address) =
+		parse_email("$wrong_name <$wrong_address>");
+
+	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
+	    $mailmap->{names}->{$wrong_email} = $real_name;
+	    $mailmap->{addresses}->{$wrong_email} = $real_address;
+	}
+    }
+    close($mailmap_file);
+}
+
+## use the filenames on the command line or find the filenames in the patchfiles
+
+my @files = ();
+my @range = ();
+my @keyword_tvi = ();
+my @file_emails = ();
+
+if (!@ARGV) {
+    push(@ARGV, "&STDIN");
+}
+
+foreach my $file (@ARGV) {
+    if ($file ne "&STDIN") {
+	##if $file is a directory and it lacks a trailing slash, add one
+	if ((-d $file)) {
+	    $file =~ s@([^/])$@$1/@;
+	} elsif (!(stat $file)) {
+	    die "$P: file '${file}' not found: $!\n";
+	}
+    }
+    if ($from_filename) {
+	push(@files, $file);
+	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
+	    open(my $f, '<', $file)
+		or die "$P: Can't open $file: $!\n";
+	    my $text = do { local($/) ; <$f> };
+	    close($f);
+	    if ($keywords) {
+		foreach my $line (keys %keyword_hash) {
+		    if ($text =~ m/$keyword_hash{$line}/x) {
+			push(@keyword_tvi, $line);
+		    }
+		}
+	    }
+	    if ($file_emails) {
+		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
+		push(@file_emails, clean_file_emails(@poss_addr));
+	    }
+	}
+    } else {
+	my $file_cnt = @files;
+	my $lastfile;
+
+	open(my $patch, "< $file")
+	    or die "$P: Can't open $file: $!\n";
+
+	# We can check arbitrary information before the patch
+	# like the commit message, mail headers, etc...
+	# This allows us to match arbitrary keywords against any part
+	# of a git format-patch generated file (subject tags, etc...)
+
+	my $patch_prefix = "";			#Parsing the intro
+
+	while (<$patch>) {
+	    my $patch_line = $_;
+	    if (m/^\+\+\+\s+(\S+)/) {
+		my $filename = $1;
+		$filename =~ s@^[^/]*/@@;
+		$filename =~ s@\n@@;
+		$lastfile = $filename;
+		push(@files, $filename);
+		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
+	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
+		if ($email_git_blame) {
+		    push(@range, "$lastfile:$1:$2");
+		}
+	    } elsif ($keywords) {
+		foreach my $line (keys %keyword_hash) {
+		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
+			push(@keyword_tvi, $line);
+		    }
+		}
+	    }
+	}
+	close($patch);
+
+	if ($file_cnt == @files) {
+	    warn "$P: file '${file}' doesn't appear to be a patch.  "
+		. "Add -f to options?\n";
+	}
+	@files = sort_and_uniq(@files);
+    }
+}
+
+@file_emails = uniq(@file_emails);
+
+my %email_hash_name;
+my %email_hash_address;
+my @email_to = ();
+my %hash_list_to;
+my @list_to = ();
+my @scm = ();
+my @web = ();
+my @subsystem = ();
+my @status = ();
+my %deduplicate_name_hash = ();
+my %deduplicate_address_hash = ();
+
+my @maintainers = get_maintainers();
+
+if (@maintainers) {
+    @maintainers = merge_email(@maintainers);
+    output(@maintainers);
+}
+
+if ($scm) {
+    @scm = uniq(@scm);
+    output(@scm);
+}
+
+if ($status) {
+    @status = uniq(@status);
+    output(@status);
+}
+
+if ($subsystem) {
+    @subsystem = uniq(@subsystem);
+    output(@subsystem);
+}
+
+if ($web) {
+    @web = uniq(@web);
+    output(@web);
+}
+
+exit($exit);
+
+sub range_is_maintained {
+    my ($start, $end) = @_;
+
+    for (my $i = $start; $i < $end; $i++) {
+	my $line = $typevalue[$i];
+	if ($line =~ m/^(.):\s*(.*)/) {
+	    my $type = $1;
+	    my $value = $2;
+	    if ($type eq 'S') {
+		if ($value =~ /(maintain|support)/i) {
+		    return 1;
+		}
+	    }
+	}
+    }
+    return 0;
+}
+
+sub range_has_maintainer {
+    my ($start, $end) = @_;
+
+    for (my $i = $start; $i < $end; $i++) {
+	my $line = $typevalue[$i];
+	if ($line =~ m/^(.):\s*(.*)/) {
+	    my $type = $1;
+	    my $value = $2;
+	    if ($type eq 'M') {
+		return 1;
+	    }
+	}
+    }
+    return 0;
+}
+
+sub get_maintainers {
+    %email_hash_name = ();
+    %email_hash_address = ();
+    %commit_author_hash = ();
+    %commit_signer_hash = ();
+    @email_to = ();
+    %hash_list_to = ();
+    @list_to = ();
+    @scm = ();
+    @web = ();
+    @subsystem = ();
+    @status = ();
+    %deduplicate_name_hash = ();
+    %deduplicate_address_hash = ();
+    if ($email_git_all_signature_types) {
+	$signature_pattern = "(.+?)[Bb][Yy]:";
+    } else {
+	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
+    }
+
+    # Find responsible parties
+
+    my %exact_pattern_match_hash = ();
+
+    foreach my $file (@files) {
+
+	my %hash;
+	my $tvi = find_first_section();
+	while ($tvi < @typevalue) {
+	    my $start = find_starting_index($tvi);
+	    my $end = find_ending_index($tvi);
+	    my $exclude = 0;
+	    my $i;
+
+	    #Do not match excluded file patterns
+
+	    for ($i = $start; $i < $end; $i++) {
+		my $line = $typevalue[$i];
+		if ($line =~ m/^(.):\s*(.*)/) {
+		    my $type = $1;
+		    my $value = $2;
+		    if ($type eq 'X') {
+			if (file_match_pattern($file, $value)) {
+			    $exclude = 1;
+			    last;
+			}
+		    }
+		}
+	    }
+
+	    if (!$exclude) {
+		for ($i = $start; $i < $end; $i++) {
+		    my $line = $typevalue[$i];
+		    if ($line =~ m/^(.):\s*(.*)/) {
+			my $type = $1;
+			my $value = $2;
+			if ($type eq 'F') {
+			    if (file_match_pattern($file, $value)) {
+				my $value_pd = ($value =~ tr@/@@);
+				my $file_pd = ($file  =~ tr@/@@);
+				$value_pd++ if (substr($value,-1,1) ne "/");
+				$value_pd = -1 if ($value =~ /^\.\*/);
+				if ($value_pd >= $file_pd &&
+				    range_is_maintained($start, $end) &&
+				    range_has_maintainer($start, $end)) {
+				    $exact_pattern_match_hash{$file} = 1;
+				}
+				if ($pattern_depth == 0 ||
+				    (($file_pd - $value_pd) < $pattern_depth)) {
+				    $hash{$tvi} = $value_pd;
+				}
+			    }
+			}
+		    }
+		}
+	    }
+	    $tvi = $end + 1;
+	}
+
+	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	    add_categories($line);
+	    if ($sections) {
+		my $i;
+		my $start = find_starting_index($line);
+		my $end = find_ending_index($line);
+		for ($i = $start; $i < $end; $i++) {
+		    my $line = $typevalue[$i];
+		    if ($line =~ /^[FX]:/) {		##Restore file patterns
+			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
+			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
+			$line =~ s/\\\./\./g;       	##Convert \. to .
+			$line =~ s/\.\*/\*/g;       	##Convert .* to *
+		    }
+		    $line =~ s/^([A-Z]):/$1:\t/g;
+		    print("$line\n");
+		}
+		print("\n");
+	    }
+	}
+    }
+
+    if ($keywords) {
+	@keyword_tvi = sort_and_uniq(@keyword_tvi);
+	foreach my $line (@keyword_tvi) {
+	    add_categories($line);
+	}
+    }
+
+    foreach my $email (@email_to, @list_to) {
+	$email->[0] = deduplicate_email($email->[0]);
+    }
+
+    if ($email) {
+	if (! $interactive) {
+	    $email_git_fallback = 0 if @email_to > 0 || $email_git || $email_git_blame;
+	    if ($email_git_fallback) {
+	        print STDERR "get_maintainer.pl: No maintainers found, printing recent contributors.\n";
+	        print STDERR "get_maintainer.pl: Do not blindly cc: them on patches!  Use common sense.\n";
+	        print STDERR "\n";
+            }
+        }
+
+	foreach my $file (@files) {
+	    if ($email_git || ($email_git_fallback &&
+			       !$exact_pattern_match_hash{$file})) {
+	        vcs_file_signoffs($file);
+	    }
+	    if ($email_git_blame) {
+	        vcs_file_blame($file);
+	    }
+	}
+
+	foreach my $email (@file_emails) {
+	    my ($name, $address) = parse_email($email);
+
+	    my $tmp_email = format_email($name, $address, $email_usename);
+	    push_email_address($tmp_email, '');
+	    add_role($tmp_email, 'in file');
+	}
+    }
+
+    my @to = ();
+    if ($email || $email_list) {
+	if ($email) {
+	    @to = (@to, @email_to);
+	}
+	if ($email_list) {
+	    @to = (@to, @list_to);
+	}
+    }
+
+    if ($interactive) {
+	@to = interactive_get_maintainers(\@to);
+    }
+
+    return @to;
+}
+
+sub file_match_pattern {
+    my ($file, $pattern) = @_;
+    if (substr($pattern, -1) eq "/") {
+	if ($file =~ m@^$pattern@) {
+	    return 1;
+	}
+    } else {
+	if ($file =~ m@^$pattern@) {
+	    my $s1 = ($file =~ tr@/@@);
+	    my $s2 = ($pattern =~ tr@/@@);
+	    if ($s1 == $s2) {
+		return 1;
+	    }
+	}
+    }
+    return 0;
+}
+
+sub usage {
+    print < print email address(es) if any
+    --git => include recent git \*-by: signers
+    --git-all-signature-types => include signers regardless of signature type
+        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
+    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
+    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
+    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
+    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
+    --git-blame => use git blame to find modified commits for patch or file
+    --git-since => git history to use (default: $email_git_since)
+    --hg-since => hg history to use (default: $email_hg_since)
+    --interactive => display a menu (mostly useful if used with the --git option)
+    --m => include maintainer(s) if any
+    --r => include reviewer(s) if any
+    --n => include name 'Full Name '
+    --l => include list(s) if any
+    --s => include subscriber only list(s) if any
+    --remove-duplicates => minimize duplicate email names/addresses
+    --roles => show roles (status:subsystem, git-signer, list, etc...)
+    --rolestats => show roles and statistics (commits/total_commits, %)
+    --file-emails => add email addresses found in -f file (default: 0 (off))
+  --scm => print SCM tree(s) if any
+  --status => print status if any
+  --subsystem => print subsystem name if any
+  --web => print website(s) if any
+
+Output type options:
+  --separator [, ] => separator for multiple entries on 1 line
+    using --separator also sets --nomultiline if --separator is not [, ]
+  --multiline => print 1 entry per line
+
+Other options:
+  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
+  --keywords => scan patch for keywords (default: $keywords)
+  --sections => print all of the subsystem sections with pattern matches
+  --mailmap => use .mailmap file (default: $email_use_mailmap)
+  --version => show version
+  --help => show this help information
+
+Default options:
+  [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
+   --remove-duplicates --rolestats]
+
+Notes:
+  Using "-f directory" may give unexpected results:
+      Used with "--git", git signators for _all_ files in and below
+          directory are examined as git recurses directories.
+          Any specified X: (exclude) pattern matches are _not_ ignored.
+      Used with "--nogit", directory is used as a pattern match,
+          no individual file within the directory or subdirectory
+          is matched.
+      Used with "--git-blame", does not iterate all files in directory
+  Using "--git-blame" is slow and may add old committers and authors
+      that are no longer active maintainers to the output.
+  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
+      other automated tools that expect only ["name"] 
+      may not work because of additional output after .
+  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
+      not the percentage of the entire file authored.  # of commits is
+      not a good measure of amount of code authored.  1 major commit may
+      contain a thousand lines, 5 trivial commits may modify a single line.
+  If git is not installed, but mercurial (hg) is installed and an .hg
+      repository exists, the following options apply to mercurial:
+          --git,
+          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
+          --git-blame
+      Use --hg-since not --git-since to control date selection
+  File ".get_maintainer.conf", if it exists in the QEMU source root
+      directory, can change whatever get_maintainer defaults are desired.
+      Entries in this file can be any command line argument.
+      This file is prepended to any additional command line arguments.
+      Multiple lines and # comments are allowed.
+EOT
+}
+
+sub top_of_tree {
+    my ($lk_path) = @_;
+
+    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
+	$lk_path .= "/";
+    }
+    if (    (-f "${lk_path}COPYING")
+        && (-f "${lk_path}MAINTAINERS")
+        && (-f "${lk_path}Makefile")
+        && (-d "${lk_path}docs")
+        && (-f "${lk_path}VERSION")
+        && (-d "${lk_path}linux-user/")
+        && (-d "${lk_path}softmmu/")) {
+	return 1;
+    }
+    return 0;
+}
+
+sub parse_email {
+    my ($formatted_email) = @_;
+
+    my $name = "";
+    my $address = "";
+
+    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
+	$name = $1;
+	$address = $2;
+    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
+	$address = $1;
+    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
+	$address = $1;
+    }
+
+    $name =~ s/^\s+|\s+$//g;
+    $name =~ s/^\"|\"$//g;
+    $address =~ s/^\s+|\s+$//g;
+
+    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
+	$name =~ s/(?";
+	}
+    } else {
+	$formatted_email = $address;
+    }
+
+    return $formatted_email;
+}
+
+sub find_first_section {
+    my $index = 0;
+
+    while ($index < @typevalue) {
+	my $tv = $typevalue[$index];
+	if (($tv =~ m/^(.):\s*(.*)/)) {
+	    last;
+	}
+	$index++;
+    }
+
+    return $index;
+}
+
+sub find_starting_index {
+    my ($index) = @_;
+
+    while ($index > 0) {
+	my $tv = $typevalue[$index];
+	if (!($tv =~ m/^(.):\s*(.*)/)) {
+	    last;
+	}
+	$index--;
+    }
+
+    return $index;
+}
+
+sub find_ending_index {
+    my ($index) = @_;
+
+    while ($index < @typevalue) {
+	my $tv = $typevalue[$index];
+	if (!($tv =~ m/^(.):\s*(.*)/)) {
+	    last;
+	}
+	$index++;
+    }
+
+    return $index;
+}
+
+sub get_subsystem_name {
+    my ($index) = @_;
+
+    my $start = find_starting_index($index);
+
+    my $subsystem = $typevalue[$start];
+    if (length($subsystem) > 20) {
+	$subsystem = substr($subsystem, 0, 17);
+	$subsystem =~ s/\s*$//;
+	$subsystem = $subsystem . "...";
+    }
+    return $subsystem;
+}
+
+sub get_maintainer_role {
+    my ($index) = @_;
+
+    my $i;
+    my $start = find_starting_index($index);
+    my $end = find_ending_index($index);
+
+    my $role = "unknown";
+    my $subsystem = get_subsystem_name($index);
+
+    for ($i = $start + 1; $i < $end; $i++) {
+	my $tv = $typevalue[$i];
+	if ($tv =~ m/^(.):\s*(.*)/) {
+	    my $ptype = $1;
+	    my $pvalue = $2;
+	    if ($ptype eq "S") {
+		$role = $pvalue;
+	    }
+	}
+    }
+
+    $role = lc($role);
+    if      ($role eq "supported") {
+	$role = "supporter";
+    } elsif ($role eq "maintained") {
+	$role = "maintainer";
+    } elsif ($role eq "odd fixes") {
+	$role = "odd fixer";
+    } elsif ($role eq "orphan") {
+	$role = "orphan minder";
+    } elsif ($role eq "obsolete") {
+	$role = "obsolete minder";
+    } elsif ($role eq "buried alive in reporters") {
+	$role = "chief penguin";
+    }
+
+    return $role . ":" . $subsystem;
+}
+
+sub get_list_role {
+    my ($index) = @_;
+
+    my $subsystem = get_subsystem_name($index);
+
+    if ($subsystem eq "THE REST") {
+	$subsystem = "";
+    }
+
+    return $subsystem;
+}
+
+sub add_categories {
+    my ($index) = @_;
+
+    my $i;
+    my $start = find_starting_index($index);
+    my $end = find_ending_index($index);
+
+    push(@subsystem, $typevalue[$start]);
+
+    for ($i = $start + 1; $i < $end; $i++) {
+	my $tv = $typevalue[$i];
+	if ($tv =~ m/^(.):\s*(.*)/) {
+	    my $ptype = $1;
+	    my $pvalue = $2;
+	    if ($ptype eq "L") {
+		my $list_address = $pvalue;
+		my $list_additional = "";
+		my $list_role = get_list_role($i);
+
+		if ($list_role ne "") {
+		    $list_role = ":" . $list_role;
+		}
+		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
+		    $list_address = $1;
+		    $list_additional = $2;
+		}
+		if ($list_additional =~ m/subscribers-only/) {
+		    if ($email_subscriber_list) {
+			if (!$hash_list_to{lc($list_address)}) {
+			    $hash_list_to{lc($list_address)} = 1;
+			    push(@list_to, [$list_address,
+					    "subscriber list${list_role}"]);
+			}
+		    }
+		} else {
+		    if ($email_list) {
+			if (!$hash_list_to{lc($list_address)}) {
+			    $hash_list_to{lc($list_address)} = 1;
+			    if ($list_additional =~ m/moderated/) {
+				push(@list_to, [$list_address,
+						"moderated list${list_role}"]);
+			    } else {
+				push(@list_to, [$list_address,
+						"open list${list_role}"]);
+			    }
+			}
+		    }
+		}
+	    } elsif ($ptype eq "M") {
+		my ($name, $address) = parse_email($pvalue);
+		if ($name eq "") {
+		    if ($i > 0) {
+			my $tv = $typevalue[$i - 1];
+			if ($tv =~ m/^(.):\s*(.*)/) {
+			    if ($1 eq "P") {
+				$name = $2;
+				$pvalue = format_email($name, $address, $email_usename);
+			    }
+			}
+		    }
+		}
+		if ($email_maintainer) {
+		    my $role = get_maintainer_role($i);
+		    push_email_addresses($pvalue, $role);
+		}
+	    } elsif ($ptype eq "R") {
+		my ($name, $address) = parse_email($pvalue);
+		if ($name eq "") {
+		    if ($i > 0) {
+			my $tv = $typevalue[$i - 1];
+			if ($tv =~ m/^(.):\s*(.*)/) {
+			    if ($1 eq "P") {
+				$name = $2;
+				$pvalue = format_email($name, $address, $email_usename);
+			    }
+			}
+		    }
+		}
+		if ($email_reviewer) {
+		    my $subsystem = get_subsystem_name($i);
+		    push_email_addresses($pvalue, "reviewer:$subsystem");
+		}
+	    } elsif ($ptype eq "T") {
+		push(@scm, $pvalue);
+	    } elsif ($ptype eq "W") {
+		push(@web, $pvalue);
+	    } elsif ($ptype eq "S") {
+		push(@status, $pvalue);
+	    }
+	}
+    }
+}
+
+sub email_inuse {
+    my ($name, $address) = @_;
+
+    return 1 if (($name eq "") && ($address eq ""));
+    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
+    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
+
+    return 0;
+}
+
+sub push_email_address {
+    my ($line, $role) = @_;
+
+    my ($name, $address) = parse_email($line);
+
+    if ($address eq "") {
+	return 0;
+    }
+
+    if (!$email_remove_duplicates) {
+	push(@email_to, [format_email($name, $address, $email_usename), $role]);
+    } elsif (!email_inuse($name, $address)) {
+	push(@email_to, [format_email($name, $address, $email_usename), $role]);
+	$email_hash_name{lc($name)}++ if ($name ne "");
+	$email_hash_address{lc($address)}++;
+    }
+
+    return 1;
+}
+
+sub push_email_addresses {
+    my ($address, $role) = @_;
+
+    my @address_list = ();
+
+    if (rfc822_valid($address)) {
+	push_email_address($address, $role);
+    } elsif (@address_list = rfc822_validlist($address)) {
+	my $array_count = shift(@address_list);
+	while (my $entry = shift(@address_list)) {
+	    push_email_address($entry, $role);
+	}
+    } else {
+	if (!push_email_address($address, $role)) {
+	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
+	}
+    }
+}
+
+sub add_role {
+    my ($line, $role) = @_;
+
+    my ($name, $address) = parse_email($line);
+    my $email = format_email($name, $address, $email_usename);
+
+    foreach my $entry (@email_to) {
+	if ($email_remove_duplicates) {
+	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
+	    if (($name eq $entry_name || $address eq $entry_address)
+		&& ($role eq "" || !($entry->[1] =~ m/$role/))
+	    ) {
+		if ($entry->[1] eq "") {
+		    $entry->[1] = "$role";
+		} else {
+		    $entry->[1] = "$entry->[1],$role";
+		}
+	    }
+	} else {
+	    if ($email eq $entry->[0]
+		&& ($role eq "" || !($entry->[1] =~ m/$role/))
+	    ) {
+		if ($entry->[1] eq "") {
+		    $entry->[1] = "$role";
+		} else {
+		    $entry->[1] = "$entry->[1],$role";
+		}
+	    }
+	}
+    }
+}
+
+sub which {
+    my ($bin) = @_;
+
+    foreach my $path (split(/:/, $ENV{PATH})) {
+	if (-e "$path/$bin") {
+	    return "$path/$bin";
+	}
+    }
+
+    return "";
+}
+
+sub which_conf {
+    my ($conf) = @_;
+
+    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
+	if (-e "$path/$conf") {
+	    return "$path/$conf";
+	}
+    }
+
+    return "";
+}
+
+sub mailmap_email {
+    my ($line) = @_;
+
+    my ($name, $address) = parse_email($line);
+    my $email = format_email($name, $address, 1);
+    my $real_name = $name;
+    my $real_address = $address;
+
+    if (exists $mailmap->{names}->{$email} ||
+	exists $mailmap->{addresses}->{$email}) {
+	if (exists $mailmap->{names}->{$email}) {
+	    $real_name = $mailmap->{names}->{$email};
+	}
+	if (exists $mailmap->{addresses}->{$email}) {
+	    $real_address = $mailmap->{addresses}->{$email};
+	}
+    } else {
+	if (exists $mailmap->{names}->{$address}) {
+	    $real_name = $mailmap->{names}->{$address};
+	}
+	if (exists $mailmap->{addresses}->{$address}) {
+	    $real_address = $mailmap->{addresses}->{$address};
+	}
+    }
+    return format_email($real_name, $real_address, 1);
+}
+
+sub mailmap {
+    my (@addresses) = @_;
+
+    my @mapped_emails = ();
+    foreach my $line (@addresses) {
+	push(@mapped_emails, mailmap_email($line));
+    }
+    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
+    return @mapped_emails;
+}
+
+sub merge_by_realname {
+    my %address_map;
+    my (@emails) = @_;
+
+    foreach my $email (@emails) {
+	my ($name, $address) = parse_email($email);
+	if (exists $address_map{$name}) {
+	    $address = $address_map{$name};
+	    $email = format_email($name, $address, 1);
+	} else {
+	    $address_map{$name} = $address;
+	}
+    }
+}
+
+sub git_execute_cmd {
+    my ($cmd) = @_;
+    my @lines = ();
+
+    my $output = `$cmd`;
+    $output =~ s/^\s*//gm;
+    @lines = split("\n", $output);
+
+    return @lines;
+}
+
+sub hg_execute_cmd {
+    my ($cmd) = @_;
+    my @lines = ();
+
+    my $output = `$cmd`;
+    @lines = split("\n", $output);
+
+    return @lines;
+}
+
+sub extract_formatted_signatures {
+    my (@signature_lines) = @_;
+
+    my @type = @signature_lines;
+
+    s/\s*(.*):.*/$1/ for (@type);
+
+    # cut -f2- -d":"
+    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
+
+## Reformat email addresses (with names) to avoid badly written signatures
+
+    foreach my $signer (@signature_lines) {
+	$signer = deduplicate_email($signer);
+    }
+
+    return (\@type, \@signature_lines);
+}
+
+sub vcs_find_signers {
+    my ($cmd) = @_;
+    my $commits;
+    my @lines = ();
+    my @signatures = ();
+
+    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+
+    my $pattern = $VCS_cmds{"commit_pattern"};
+
+    $commits = grep(/$pattern/, @lines);	# of commits
+
+    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
+
+    return (0, @signatures) if !@signatures;
+
+    save_commits_by_author(@lines) if ($interactive);
+    save_commits_by_signer(@lines) if ($interactive);
+
+    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
+
+    return ($commits, @$signers_ref);
+}
+
+sub vcs_find_author {
+    my ($cmd) = @_;
+    my @lines = ();
+
+    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+
+    return @lines if !@lines;
+
+    my @authors = ();
+    foreach my $line (@lines) {
+	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+	    my $author = $1;
+	    my ($name, $address) = parse_email($author);
+	    $author = format_email($name, $address, 1);
+	    push(@authors, $author);
+	}
+    }
+
+    save_commits_by_author(@lines) if ($interactive);
+    save_commits_by_signer(@lines) if ($interactive);
+
+    return @authors;
+}
+
+sub vcs_save_commits {
+    my ($cmd) = @_;
+    my @lines = ();
+    my @commits = ();
+
+    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+
+    foreach my $line (@lines) {
+	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
+	    push(@commits, $1);
+	}
+    }
+
+    return @commits;
+}
+
+sub vcs_blame {
+    my ($file) = @_;
+    my $cmd;
+    my @commits = ();
+
+    return @commits if (!(-f $file));
+
+    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
+	my @all_commits = ();
+
+	$cmd = $VCS_cmds{"blame_file_cmd"};
+	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
+	@all_commits = vcs_save_commits($cmd);
+
+	foreach my $file_range_diff (@range) {
+	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
+	    my $diff_file = $1;
+	    my $diff_start = $2;
+	    my $diff_length = $3;
+	    next if ("$file" ne "$diff_file");
+	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
+		push(@commits, $all_commits[$i]);
+	    }
+	}
+    } elsif (@range) {
+	foreach my $file_range_diff (@range) {
+	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
+	    my $diff_file = $1;
+	    my $diff_start = $2;
+	    my $diff_length = $3;
+	    next if ("$file" ne "$diff_file");
+	    $cmd = $VCS_cmds{"blame_range_cmd"};
+	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
+	    push(@commits, vcs_save_commits($cmd));
+	}
+    } else {
+	$cmd = $VCS_cmds{"blame_file_cmd"};
+	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
+	@commits = vcs_save_commits($cmd);
+    }
+
+    foreach my $commit (@commits) {
+	$commit =~ s/^\^//g;
+    }
+
+    return @commits;
+}
+
+my $printed_novcs = 0;
+sub vcs_exists {
+    %VCS_cmds = %VCS_cmds_git;
+    return 1 if eval $VCS_cmds{"available"};
+    %VCS_cmds = %VCS_cmds_hg;
+    return 2 if eval $VCS_cmds{"available"};
+    %VCS_cmds = ();
+    if (!$printed_novcs) {
+	warn("$P: No supported VCS found.  Add --nogit to options?\n");
+	warn("Using a git repository produces better results.\n");
+	warn("Try latest git repository using:\n");
+	warn("git clone https://git.qemu.org/git/qemu.git\n");
+	$printed_novcs = 1;
+    }
+    return 0;
+}
+
+sub vcs_is_git {
+    vcs_exists();
+    return $vcs_used == 1;
+}
+
+sub vcs_is_hg {
+    return $vcs_used == 2;
+}
+
+sub interactive_get_maintainers {
+    my ($list_ref) = @_;
+    my @list = @$list_ref;
+
+    vcs_exists();
+
+    my %selected;
+    my %authored;
+    my %signed;
+    my $count = 0;
+    my $maintained = 0;
+    foreach my $entry (@list) {
+	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
+	$selected{$count} = 1;
+	$authored{$count} = 0;
+	$signed{$count} = 0;
+	$count++;
+    }
+
+    #menu loop
+    my $done = 0;
+    my $print_options = 0;
+    my $redraw = 1;
+    while (!$done) {
+	$count = 0;
+	if ($redraw) {
+	    printf STDERR "\n%1s %2s %-65s",
+			  "*", "#", "email/list and role:stats";
+	    if ($email_git ||
+		($email_git_fallback && !$maintained) ||
+		$email_git_blame) {
+		print STDERR "auth sign";
+	    }
+	    print STDERR "\n";
+	    foreach my $entry (@list) {
+		my $email = $entry->[0];
+		my $role = $entry->[1];
+		my $sel = "";
+		$sel = "*" if ($selected{$count});
+		my $commit_author = $commit_author_hash{$email};
+		my $commit_signer = $commit_signer_hash{$email};
+		my $authored = 0;
+		my $signed = 0;
+		$authored++ for (@{$commit_author});
+		$signed++ for (@{$commit_signer});
+		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
+		printf STDERR "%4d %4d", $authored, $signed
+		    if ($authored > 0 || $signed > 0);
+		printf STDERR "\n     %s\n", $role;
+		if ($authored{$count}) {
+		    my $commit_author = $commit_author_hash{$email};
+		    foreach my $ref (@{$commit_author}) {
+			print STDERR "     Author: @{$ref}[1]\n";
+		    }
+		}
+		if ($signed{$count}) {
+		    my $commit_signer = $commit_signer_hash{$email};
+		    foreach my $ref (@{$commit_signer}) {
+			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
+		    }
+		}
+
+		$count++;
+	    }
+	}
+	my $date_ref = \$email_git_since;
+	$date_ref = \$email_hg_since if (vcs_is_hg());
+	if ($print_options) {
+	    $print_options = 0;
+	    if (vcs_exists()) {
+		print STDERR <;
+	chomp($input);
+
+	$redraw = 1;
+	my $rerun = 0;
+	my @wish = split(/[, ]+/, $input);
+	foreach my $nr (@wish) {
+	    $nr = lc($nr);
+	    my $sel = substr($nr, 0, 1);
+	    my $str = substr($nr, 1);
+	    my $val = 0;
+	    $val = $1 if $str =~ /^(\d+)$/;
+
+	    if ($sel eq "y") {
+		$interactive = 0;
+		$done = 1;
+		$output_rolestats = 0;
+		$output_roles = 0;
+		last;
+	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
+		$selected{$nr - 1} = !$selected{$nr - 1};
+	    } elsif ($sel eq "*" || $sel eq '^') {
+		my $toggle = 0;
+		$toggle = 1 if ($sel eq '*');
+		for (my $i = 0; $i < $count; $i++) {
+		    $selected{$i} = $toggle;
+		}
+	    } elsif ($sel eq "0") {
+		for (my $i = 0; $i < $count; $i++) {
+		    $selected{$i} = !$selected{$i};
+		}
+	    } elsif ($sel eq "t") {
+		if (lc($str) eq "m") {
+		    for (my $i = 0; $i < $count; $i++) {
+			$selected{$i} = !$selected{$i}
+			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
+		    }
+		} elsif (lc($str) eq "g") {
+		    for (my $i = 0; $i < $count; $i++) {
+			$selected{$i} = !$selected{$i}
+			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
+		    }
+		} elsif (lc($str) eq "l") {
+		    for (my $i = 0; $i < $count; $i++) {
+			$selected{$i} = !$selected{$i}
+			    if ($list[$i]->[1] =~ /^(open list)/i);
+		    }
+		} elsif (lc($str) eq "s") {
+		    for (my $i = 0; $i < $count; $i++) {
+			$selected{$i} = !$selected{$i}
+			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
+		    }
+		}
+	    } elsif ($sel eq "a") {
+		if ($val > 0 && $val <= $count) {
+		    $authored{$val - 1} = !$authored{$val - 1};
+		} elsif ($str eq '*' || $str eq '^') {
+		    my $toggle = 0;
+		    $toggle = 1 if ($str eq '*');
+		    for (my $i = 0; $i < $count; $i++) {
+			$authored{$i} = $toggle;
+		    }
+		}
+	    } elsif ($sel eq "s") {
+		if ($val > 0 && $val <= $count) {
+		    $signed{$val - 1} = !$signed{$val - 1};
+		} elsif ($str eq '*' || $str eq '^') {
+		    my $toggle = 0;
+		    $toggle = 1 if ($str eq '*');
+		    for (my $i = 0; $i < $count; $i++) {
+			$signed{$i} = $toggle;
+		    }
+		}
+	    } elsif ($sel eq "o") {
+		$print_options = 1;
+		$redraw = 1;
+	    } elsif ($sel eq "g") {
+		if ($str eq "f") {
+		    bool_invert(\$email_git_fallback);
+		} else {
+		    bool_invert(\$email_git);
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "b") {
+		if ($str eq "s") {
+		    bool_invert(\$email_git_blame_signatures);
+		} else {
+		    bool_invert(\$email_git_blame);
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "c") {
+		if ($val > 0) {
+		    $email_git_min_signatures = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "x") {
+		if ($val > 0) {
+		    $email_git_max_maintainers = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "%") {
+		if ($str ne "" && $val >= 0) {
+		    $email_git_min_percent = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "d") {
+		if (vcs_is_git()) {
+		    $email_git_since = $str;
+		} elsif (vcs_is_hg()) {
+		    $email_hg_since = $str;
+		}
+		$rerun = 1;
+	    } elsif ($sel eq "t") {
+		bool_invert(\$email_git_all_signature_types);
+		$rerun = 1;
+	    } elsif ($sel eq "f") {
+		bool_invert(\$file_emails);
+		$rerun = 1;
+	    } elsif ($sel eq "r") {
+		bool_invert(\$email_remove_duplicates);
+		$rerun = 1;
+	    } elsif ($sel eq "m") {
+		bool_invert(\$email_use_mailmap);
+		read_mailmap();
+		$rerun = 1;
+	    } elsif ($sel eq "k") {
+		bool_invert(\$keywords);
+		$rerun = 1;
+	    } elsif ($sel eq "p") {
+		if ($str ne "" && $val >= 0) {
+		    $pattern_depth = $val;
+		    $rerun = 1;
+		}
+	    } elsif ($sel eq "h" || $sel eq "?") {
+		print STDERR <[0];
+	$address = $deduplicate_name_hash{lc($name)}->[1];
+	$matched = 1;
+    } elsif ($deduplicate_address_hash{lc($address)}) {
+	$name = $deduplicate_address_hash{lc($address)}->[0];
+	$address = $deduplicate_address_hash{lc($address)}->[1];
+	$matched = 1;
+    }
+    if (!$matched) {
+	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
+	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
+    }
+    $email = format_email($name, $address, 1);
+    $email = mailmap_email($email);
+    return $email;
+}
+
+sub save_commits_by_author {
+    my (@lines) = @_;
+
+    my @authors = ();
+    my @commits = ();
+    my @subjects = ();
+
+    foreach my $line (@lines) {
+	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+	    my $author = $1;
+	    $author = deduplicate_email($author);
+	    push(@authors, $author);
+	}
+	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
+	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
+    }
+
+    for (my $i = 0; $i < @authors; $i++) {
+	my $exists = 0;
+	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
+	    if (@{$ref}[0] eq $commits[$i] &&
+		@{$ref}[1] eq $subjects[$i]) {
+		$exists = 1;
+		last;
+	    }
+	}
+	if (!$exists) {
+	    push(@{$commit_author_hash{$authors[$i]}},
+		 [ ($commits[$i], $subjects[$i]) ]);
+	}
+    }
+}
+
+sub save_commits_by_signer {
+    my (@lines) = @_;
+
+    my $commit = "";
+    my $subject = "";
+
+    foreach my $line (@lines) {
+	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
+	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
+	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
+	    my @signatures = ($line);
+	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
+	    my @types = @$types_ref;
+	    my @signers = @$signers_ref;
+
+	    my $type = $types[0];
+	    my $signer = $signers[0];
+
+	    $signer = deduplicate_email($signer);
+
+	    my $exists = 0;
+	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
+		if (@{$ref}[0] eq $commit &&
+		    @{$ref}[1] eq $subject &&
+		    @{$ref}[2] eq $type) {
+		    $exists = 1;
+		    last;
+		}
+	    }
+	    if (!$exists) {
+		push(@{$commit_signer_hash{$signer}},
+		     [ ($commit, $subject, $type) ]);
+	    }
+	}
+    }
+}
+
+sub vcs_assign {
+    my ($role, $divisor, @lines) = @_;
+
+    my %hash;
+    my $count = 0;
+
+    return if (@lines <= 0);
+
+    if ($divisor <= 0) {
+	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
+	$divisor = 1;
+    }
+
+    @lines = mailmap(@lines);
+
+    return if (@lines <= 0);
+
+    @lines = sort(@lines);
+
+    # uniq -c
+    $hash{$_}++ for @lines;
+
+    # sort -rn
+    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	my $sign_offs = $hash{$line};
+	my $percent = $sign_offs * 100 / $divisor;
+
+	$percent = 100 if ($percent > 100);
+	$count++;
+	last if ($sign_offs < $email_git_min_signatures ||
+		 $count > $email_git_max_maintainers ||
+		 $percent < $email_git_min_percent);
+	push_email_address($line, '');
+	if ($output_rolestats) {
+	    my $fmt_percent = sprintf("%.0f", $percent);
+	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
+	} else {
+	    add_role($line, $role);
+	}
+    }
+}
+
+sub vcs_file_signoffs {
+    my ($file) = @_;
+
+    my @signers = ();
+    my $commits;
+
+    $vcs_used = vcs_exists();
+    return if (!$vcs_used);
+
+    my $cmd = $VCS_cmds{"find_signers_cmd"};
+    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
+
+    ($commits, @signers) = vcs_find_signers($cmd);
+
+    foreach my $signer (@signers) {
+	$signer = deduplicate_email($signer);
+    }
+
+    vcs_assign("commit_signer", $commits, @signers);
+}
+
+sub vcs_file_blame {
+    my ($file) = @_;
+
+    my @signers = ();
+    my @all_commits = ();
+    my @commits = ();
+    my $total_commits;
+    my $total_lines;
+
+    $vcs_used = vcs_exists();
+    return if (!$vcs_used);
+
+    @all_commits = vcs_blame($file);
+    @commits = uniq(@all_commits);
+    $total_commits = @commits;
+    $total_lines = @all_commits;
+
+    if ($email_git_blame_signatures) {
+	if (vcs_is_hg()) {
+	    my $commit_count;
+	    my @commit_signers = ();
+	    my $commit = join(" -r ", @commits);
+	    my $cmd;
+
+	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
+	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+	    ($commit_count, @commit_signers) = vcs_find_signers($cmd);
+
+	    push(@signers, @commit_signers);
+	} else {
+	    foreach my $commit (@commits) {
+		my $commit_count;
+		my @commit_signers = ();
+		my $cmd;
+
+		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+		($commit_count, @commit_signers) = vcs_find_signers($cmd);
+
+		push(@signers, @commit_signers);
+	    }
+	}
+    }
+
+    if ($from_filename) {
+	if ($output_rolestats) {
+	    my @blame_signers;
+	    if (vcs_is_hg()) {{		# Double brace for last exit
+		my $commit_count;
+		my @commit_signers = ();
+		@commits = uniq(@commits);
+		@commits = sort(@commits);
+		my $commit = join(" -r ", @commits);
+		my $cmd;
+
+		$cmd = $VCS_cmds{"find_commit_author_cmd"};
+		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+		my @lines = ();
+
+		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
+
+		last if !@lines;
+
+		my @authors = ();
+		foreach my $line (@lines) {
+		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
+			my $author = $1;
+			$author = deduplicate_email($author);
+			push(@authors, $author);
+		    }
+		}
+
+		save_commits_by_author(@lines) if ($interactive);
+		save_commits_by_signer(@lines) if ($interactive);
+
+		push(@signers, @authors);
+	    }}
+	    else {
+		foreach my $commit (@commits) {
+		    my $i;
+		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
+		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
+		    my @author = vcs_find_author($cmd);
+		    next if !@author;
+
+		    my $formatted_author = deduplicate_email($author[0]);
+
+		    my $count = grep(/$commit/, @all_commits);
+		    for ($i = 0; $i < $count ; $i++) {
+			push(@blame_signers, $formatted_author);
+		    }
+		}
+	    }
+	    if (@blame_signers) {
+		vcs_assign("authored lines", $total_lines, @blame_signers);
+	    }
+	}
+	foreach my $signer (@signers) {
+	    $signer = deduplicate_email($signer);
+	}
+	vcs_assign("commits", $total_commits, @signers);
+    } else {
+	foreach my $signer (@signers) {
+	    $signer = deduplicate_email($signer);
+	}
+	vcs_assign("modified commits", $total_commits, @signers);
+    }
+}
+
+sub uniq {
+    my (@parms) = @_;
+
+    my %saw;
+    @parms = grep(!$saw{$_}++, @parms);
+    return @parms;
+}
+
+sub sort_and_uniq {
+    my (@parms) = @_;
+
+    my %saw;
+    @parms = sort @parms;
+    @parms = grep(!$saw{$_}++, @parms);
+    return @parms;
+}
+
+sub clean_file_emails {
+    my (@file_emails) = @_;
+    my @fmt_emails = ();
+
+    foreach my $email (@file_emails) {
+	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
+	my ($name, $address) = parse_email($email);
+	if ($name eq '"[,\.]"') {
+	    $name = "";
+	}
+
+	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
+	if (@nw > 2) {
+	    my $first = $nw[@nw - 3];
+	    my $middle = $nw[@nw - 2];
+	    my $last = $nw[@nw - 1];
+
+	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
+		 (length($first) == 2 && substr($first, -1) eq ".")) ||
+		(length($middle) == 1 ||
+		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
+		$name = "$first $middle $last";
+	    } else {
+		$name = "$middle $last";
+	    }
+	}
+
+	if (substr($name, -1) =~ /[,\.]/) {
+	    $name = substr($name, 0, length($name) - 1);
+	} elsif (substr($name, -2) =~ /[,\.]"/) {
+	    $name = substr($name, 0, length($name) - 2) . '"';
+	}
+
+	if (substr($name, 0, 1) =~ /[,\.]/) {
+	    $name = substr($name, 1, length($name) - 1);
+	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
+	    $name = '"' . substr($name, 2, length($name) - 2);
+	}
+
+	my $fmt_email = format_email($name, $address, $email_usename);
+	push(@fmt_emails, $fmt_email);
+    }
+    return @fmt_emails;
+}
+
+sub merge_email {
+    my @lines;
+    my %saw;
+
+    for (@_) {
+	my ($address, $role) = @$_;
+	if (!$saw{$address}) {
+	    if ($output_roles) {
+		push(@lines, "$address ($role)");
+	    } else {
+		push(@lines, $address);
+	    }
+	    $saw{$address} = 1;
+	}
+    }
+
+    return @lines;
+}
+
+sub output {
+    my (@parms) = @_;
+
+    if ($output_multiline) {
+	foreach my $line (@parms) {
+	    print("${line}\n");
+	}
+    } else {
+	print(join($output_separator, @parms));
+	print("\n");
+    }
+}
+
+my $rfc822re;
+
+sub make_rfc822re {
+#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
+#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
+#   This regexp will only work on addresses which have had comments stripped
+#   and replaced with rfc822_lwsp.
+
+    my $specials = '()<>@,;:\\\\".\\[\\]';
+    my $controls = '\\000-\\037\\177';
+
+    my $dtext = "[^\\[\\]\\r\\\\]";
+    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
+
+    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
+
+#   Use zero-width assertion to spot the limit of an atom.  A simple
+#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
+    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
+    my $word = "(?:$atom|$quoted_string)";
+    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
+
+    my $sub_domain = "(?:$atom|$domain_literal)";
+    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
+
+    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
+
+    my $phrase = "$word*";
+    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
+    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
+    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
+
+    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
+    my $address = "(?:$mailbox|$group)";
+
+    return "$rfc822_lwsp*$address";
+}
+
+sub rfc822_strip_comments {
+    my $s = shift;
+#   Recursively remove comments, and replace with a single space.  The simpler
+#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
+#   chars in atoms, for example.
+
+    while ($s =~ s/^((?:[^"\\]|\\.)*
+                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
+                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
+    return $s;
+}
+
+#   valid: returns true if the parameter is an RFC822 valid address
+#
+sub rfc822_valid {
+    my $s = rfc822_strip_comments(shift);
+
+    if (!$rfc822re) {
+        $rfc822re = make_rfc822re();
+    }
+
+    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
+}
+
+#   validlist: In scalar context, returns true if the parameter is an RFC822
+#              valid list of addresses.
+#
+#              In list context, returns an empty list on failure (an invalid
+#              address was found); otherwise a list whose first element is the
+#              number of addresses found and whose remaining elements are the
+#              addresses.  This is needed to disambiguate failure (invalid)
+#              from success with no addresses found, because an empty string is
+#              a valid list.
+
+sub rfc822_validlist {
+    my $s = rfc822_strip_comments(shift);
+
+    if (!$rfc822re) {
+        $rfc822re = make_rfc822re();
+    }
+    # * null list items are valid according to the RFC
+    # * the '1' business is to aid in distinguishing failure from no results
+
+    my @r;
+    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
+	$s =~ m/^$rfc822_char*$/) {
+        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
+            push(@r, $1);
+        }
+        return wantarray ? (scalar(@r), @r) : 1;
+    }
+    return wantarray ? () : 0;
+}
diff --git a/scripts/git-submodule.sh b/scripts/git-submodule.sh
new file mode 100755
index 000000000..65ed877ae
--- /dev/null
+++ b/scripts/git-submodule.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+
+substat=".git-submodule-status"
+
+command=$1
+shift
+maybe_modules="$@"
+
+test -z "$GIT" && GIT=git
+
+error() {
+    echo "$0: $*"
+    echo
+    echo "Unable to automatically checkout GIT submodules '$modules'."
+    echo "If you require use of an alternative GIT binary (for example to"
+    echo "enable use of a transparent proxy), then please specify it by"
+    echo "running configure by with the '--with-git' argument. e.g."
+    echo
+    echo " $ ./configure --with-git='tsocks git'"
+    echo
+    echo "Alternatively you may disable automatic GIT submodule checkout"
+    echo "with:"
+    echo
+    echo " $ ./configure --disable-git-update"
+    echo
+    echo "and then manually update submodules prior to running make, with:"
+    echo
+    echo " $ scripts/git-submodule.sh update $modules"
+    echo
+    exit 1
+}
+
+modules=""
+for m in $maybe_modules
+do
+    $GIT submodule status $m 1> /dev/null 2>&1
+    if test $? = 0
+    then
+        modules="$modules $m"
+    else
+        echo "warn: ignoring non-existent submodule $m"
+    fi
+done
+
+if test -n "$maybe_modules" && ! test -e ".git"
+then
+    echo "$0: unexpectedly called with submodules but no git checkout exists"
+    exit 1
+fi
+
+case "$command" in
+status)
+    if test -z "$maybe_modules"
+    then
+         test -s ${substat} && exit 1 || exit 0
+    fi
+
+    test -f "$substat" || exit 1
+    for module in $modules; do
+        CURSTATUS=$($GIT submodule status $module)
+        OLDSTATUS=$(cat $substat | grep $module)
+        if test "$CURSTATUS" != "$OLDSTATUS"; then
+            exit 1
+        fi
+    done
+    exit 0
+    ;;
+update)
+    if test -z "$maybe_modules"
+    then
+        test -e $substat || touch $substat
+        exit 0
+    fi
+
+    $GIT submodule update --init $modules 1>/dev/null
+    test $? -ne 0 && error "failed to update modules"
+
+    $GIT submodule status $modules > "${substat}"
+    test $? -ne 0 && error "failed to save git submodule status" >&2
+    ;;
+esac
+
+exit 0
diff --git a/scripts/git.orderfile b/scripts/git.orderfile
new file mode 100644
index 000000000..3736c1d6a
--- /dev/null
+++ b/scripts/git.orderfile
@@ -0,0 +1,36 @@
+#
+# order file for git, to produce patches which are easier to review
+# by diffing the important stuff like interface changes first.
+#
+# one-off usage:
+#   git diff -O scripts/git.orderfile ...
+#
+# add to git config:
+#   git config diff.orderFile scripts/git.orderfile
+#
+
+# Documentation
+docs/*
+*.rst
+
+# build system
+configure
+Makefile*
+*.mak
+meson.build
+
+# qapi schema
+qapi/*.json
+qga/*.json
+
+# semantic patches
+*.cocci
+
+# headers
+*.h
+
+# decoding tree specification
+*.decode
+
+# code
+*.c
diff --git a/scripts/hxtool b/scripts/hxtool
new file mode 100755
index 000000000..80516b943
--- /dev/null
+++ b/scripts/hxtool
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+hxtoh()
+{
+    flag=1
+    while read -r str; do
+        case $str in
+            HXCOMM*)
+            ;;
+            SRST*|ERST*) flag=$(($flag^1))
+            ;;
+            *)
+            test $flag -eq 1 && printf "%s\n" "$str"
+            ;;
+        esac
+    done
+}
+
+case "$1" in
+"-h") hxtoh ;;
+*) exit 1 ;;
+esac < "$2"
+
+exit 0
diff --git a/scripts/hxtool-conv.pl b/scripts/hxtool-conv.pl
new file mode 100755
index 000000000..eede40b34
--- /dev/null
+++ b/scripts/hxtool-conv.pl
@@ -0,0 +1,137 @@
+#!/usr/bin/perl -w
+#
+# Script to convert .hx file STEXI/ETEXI blocks to SRST/ERST
+#
+# Copyright (C) 2020 Linaro
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# (at your option) any later version. See the COPYING file in the
+# top-level directory.
+
+# This script was only ever intended as a one-off conversion operation.
+# Please excuse the places where it is a bit hacky.
+# Some manual intervention after the conversion is expected, as are
+# some warnings from makeinfo.
+# Warning: this script is not idempotent: don't try to run it on
+# a .hx file that already has SRST/ERST sections.
+
+# Expected usage:
+# scripts/hxtool-conv.pl file.hx > file.hx.new
+
+use utf8;
+
+my $reading_texi = 0;
+my $texiblock = '';
+my @tables = ();
+
+sub update_tables($) {
+    my ($texi) = @_;
+    # Update our list of open table directives: every @table
+    # line in the texi fragment is added to the list, and every
+    # @end table line means we remove an entry from the list.
+    # If this fragment had a completely self contained table with
+    # both the @table and @end table lines, this will be a no-op.
+    foreach (split(/\n/, $texi)) {
+        push @tables, $_ if /^\@table/;
+        pop @tables if /^\@end table/;
+    }
+}
+
+sub only_table_directives($) {
+    # Return true if every line in the fragment is a start or end table directive
+    my ($texi) = @_;
+    foreach (split(/\n/, $texi)) {
+        return 0 unless /^\@table/ or /^\@end table/;
+    }
+    return 1;
+}
+
+sub output_rstblock($) {
+    # Write the output to /tmp/frag.texi, wrapped in whatever current @table
+    # lines we need.
+    my ($texi) = @_;
+
+    # As a special case, if this fragment is only table directives and
+    # nothing else, update our set of open table directives but otherwise
+    # ignore it. This avoids emitting an empty SRST/ERST block.
+    if (only_table_directives($texi)) {
+        update_tables($texi);
+        return;
+    }
+
+    open(my $fragfh, '>', '/tmp/frag.texi');
+    # First output the currently active set of open table directives
+    print $fragfh join("\n", @tables);
+    # Next, update our list of open table directives.
+    # We need to do this before we emit the closing table directives
+    # so that we emit the right number if this fragment had an
+    # unbalanced set of directives.
+    update_tables($texi);
+    # Then emit the texi fragment itself.
+    print $fragfh "\n$texi\n";
+    # Finally, add the necessary closing table directives.
+    print $fragfh "\@end table\n" x scalar @tables;
+    close $fragfh;
+
+    # Now invoke makeinfo/pandoc on it and slurp the results into a string
+    open(my $fh, '-|', "makeinfo --force -o - --docbook "
+         . "-D 'qemu_system_x86 QEMU_SYSTEM_X86_MACRO' "
+         . "-D 'qemu_system     QEMU_SYSTEM_MACRO'  /tmp/frag.texi "
+         . " | pandoc  -f docbook -t rst")
+        or die "can't start makeinfo/pandoc: $!";
+
+    binmode $fh, ':encoding(utf8)';
+
+    print "SRST\n";
+
+    # Slurp the whole thing into a string so we can do multiline
+    # string matches on it.
+    my $rst = do {
+        local $/ = undef;
+        <$fh>;
+    };
+    $rst =~ s/^-  − /-  /gm;
+    $rst =~ s/“/"/gm;
+    $rst =~ s/”/"/gm;
+    $rst =~ s/‘/'/gm;
+    $rst =~ s/’/'/gm;
+    $rst =~ s/QEMU_SYSTEM_MACRO/|qemu_system|/g;
+    $rst =~ s/QEMU_SYSTEM_X86_MACRO/|qemu_system_x86|/g;
+    $rst =~ s/(?=::\n\n +\|qemu)/.. parsed-literal/g;
+    $rst =~ s/:\n\n::$/::/gm;
+
+    # Fix up the invalid reference format makeinfo/pandoc emit:
+    # `Some string here <#anchorname>`__
+    # should be:
+    # :ref:`anchorname`
+    $rst =~ s/\`[^<`]+\<\#([^>]+)\>\`__/:ref:`$1`/gm;
+    print $rst;
+
+    close $fh or die "error on close: $!";
+    print "ERST\n";
+}
+
+# Read the whole .hx input file.
+while (<>) {
+    # Always print the current line
+    print;
+    if (/STEXI/) {
+        $reading_texi = 1;
+        $texiblock = '';
+        next;
+    }
+    if (/ETEXI/) {
+        $reading_texi = 0;
+        # dump RST version of block
+        output_rstblock($texiblock);
+        next;
+    }
+    if ($reading_texi) {
+        # Accumulate the texi into a string
+        # but drop findex entries as they will confuse makeinfo
+        next if /^\@findex/;
+        $texiblock .= $_;
+    }
+}
+
+die "Unexpectedly still in texi block at EOF" if $reading_texi;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
new file mode 100755
index 000000000..4fbaaa05e
--- /dev/null
+++ b/scripts/kernel-doc
@@ -0,0 +1,2256 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+use warnings;
+use strict;
+
+## Copyright (c) 1998 Michael Zucchi, All Rights Reserved        ##
+## Copyright (C) 2000, 1  Tim Waugh           ##
+## Copyright (C) 2001  Simon Huggins                             ##
+## Copyright (C) 2005-2012  Randy Dunlap                         ##
+## Copyright (C) 2012  Dan Luedtke                               ##
+## 								 ##
+## #define enhancements by Armin Kuster 	 ##
+## Copyright (c) 2000 MontaVista Software, Inc.			 ##
+## 								 ##
+## This software falls under the GNU General Public License.     ##
+## Please read the COPYING file for more information             ##
+
+# 18/01/2001 - 	Cleanups
+# 		Functions prototyped as foo(void) same as foo()
+# 		Stop eval'ing where we don't need to.
+# -- huggie@earth.li
+
+# 27/06/2001 -  Allowed whitespace after initial "/**" and
+#               allowed comments before function declarations.
+# -- Christian Kreibich 
+
+# Still to do:
+# 	- add perldoc documentation
+# 	- Look more closely at some of the scarier bits :)
+
+# 26/05/2001 - 	Support for separate source and object trees.
+#		Return error code.
+# 		Keith Owens 
+
+# 23/09/2001 - Added support for typedefs, structs, enums and unions
+#              Support for Context section; can be terminated using empty line
+#              Small fixes (like spaces vs. \s in regex)
+# -- Tim Jansen 
+
+# 25/07/2012 - Added support for HTML5
+# -- Dan Luedtke 
+
+sub usage {
+    my $message = <<"EOF";
+Usage: $0 [OPTION ...] FILE ...
+
+Read C language source or header FILEs, extract embedded documentation comments,
+and print formatted documentation to standard output.
+
+The documentation comments are identified by "/**" opening comment mark. See
+Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+
+Output format selection (mutually exclusive):
+  -man			Output troff manual page format. This is the default.
+  -rst			Output reStructuredText format.
+  -none			Do not output documentation, only warnings.
+
+Output selection (mutually exclusive):
+  -export		Only output documentation for symbols that have been
+			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
+                        in any input FILE or -export-file FILE.
+  -internal		Only output documentation for symbols that have NOT been
+			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
+                        in any input FILE or -export-file FILE.
+  -function NAME	Only output documentation for the given function(s)
+			or DOC: section title(s). All other functions and DOC:
+			sections are ignored. May be specified multiple times.
+  -nofunction NAME	Do NOT output documentation for the given function(s);
+			only output documentation for the other functions and
+			DOC: sections. May be specified multiple times.
+
+Output selection modifiers:
+  -sphinx-version VER   Generate rST syntax for the specified Sphinx version.
+                        Only works with reStructuredTextFormat.
+  -no-doc-sections	Do not output DOC: sections.
+  -enable-lineno        Enable output of #define LINENO lines. Only works with
+                        reStructuredText format.
+  -export-file FILE     Specify an additional FILE in which to look for
+                        EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with
+                        -export or -internal. May be specified multiple times.
+
+Other parameters:
+  -v			Verbose output, more warnings and other information.
+  -h			Print this help.
+
+EOF
+    print $message;
+    exit 1;
+}
+
+#
+# format of comments.
+# In the following table, (...)? signifies optional structure.
+#                         (...)* signifies 0 or more structure elements
+# /**
+#  * function_name(:)? (- short description)?
+# (* @parameterx: (description of parameter x)?)*
+# (* a blank line)?
+#  * (Description:)? (Description of function)?
+#  * (section header: (section description)? )*
+#  (*)?*/
+#
+# So .. the trivial example would be:
+#
+# /**
+#  * my_function
+#  */
+#
+# If the Description: header tag is omitted, then there must be a blank line
+# after the last parameter specification.
+# e.g.
+# /**
+#  * my_function - does my stuff
+#  * @my_arg: its mine damnit
+#  *
+#  * Does my stuff explained.
+#  */
+#
+#  or, could also use:
+# /**
+#  * my_function - does my stuff
+#  * @my_arg: its mine damnit
+#  * Description: Does my stuff explained.
+#  */
+# etc.
+#
+# Besides functions you can also write documentation for structs, unions,
+# enums and typedefs. Instead of the function name you must write the name
+# of the declaration;  the struct/union/enum/typedef must always precede
+# the name. Nesting of declarations is not supported.
+# Use the argument mechanism to document members or constants.
+# e.g.
+# /**
+#  * struct my_struct - short description
+#  * @a: first member
+#  * @b: second member
+#  *
+#  * Longer description
+#  */
+# struct my_struct {
+#     int a;
+#     int b;
+# /* private: */
+#     int c;
+# };
+#
+# All descriptions can be multiline, except the short function description.
+#
+# For really longs structs, you can also describe arguments inside the
+# body of the struct.
+# eg.
+# /**
+#  * struct my_struct - short description
+#  * @a: first member
+#  * @b: second member
+#  *
+#  * Longer description
+#  */
+# struct my_struct {
+#     int a;
+#     int b;
+#     /**
+#      * @c: This is longer description of C
+#      *
+#      * You can use paragraphs to describe arguments
+#      * using this method.
+#      */
+#     int c;
+# };
+#
+# This should be use only for struct/enum members.
+#
+# You can also add additional sections. When documenting kernel functions you
+# should document the "Context:" of the function, e.g. whether the functions
+# can be called form interrupts. Unlike other sections you can end it with an
+# empty line.
+# A non-void function should have a "Return:" section describing the return
+# value(s).
+# Example-sections should contain the string EXAMPLE so that they are marked
+# appropriately in DocBook.
+#
+# Example:
+# /**
+#  * user_function - function that can only be called in user context
+#  * @a: some argument
+#  * Context: !in_interrupt()
+#  *
+#  * Some description
+#  * Example:
+#  *    user_function(22);
+#  */
+# ...
+#
+#
+# All descriptive text is further processed, scanning for the following special
+# patterns, which are highlighted appropriately.
+#
+# 'funcname()' - function
+# '$ENVVAR' - environmental variable
+# '&struct_name' - name of a structure (up to two words including 'struct')
+# '&struct_name.member' - name of a structure member
+# '@parameter' - name of a parameter
+# '%CONST' - name of a constant.
+# '``LITERAL``' - literal string without any spaces on it.
+
+## init lots of data
+
+my $errors = 0;
+my $warnings = 0;
+my $anon_struct_union = 0;
+
+# match expressions used to find embedded type information
+my $type_constant = '\b``([^\`]+)``\b';
+my $type_constant2 = '\%([-_\w]+)';
+my $type_func = '(\w+)\(\)';
+my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
+my $type_env = '(\$\w+)';
+my $type_enum = '#(enum\s*([_\w]+))';
+my $type_struct = '#(struct\s*([_\w]+))';
+my $type_typedef = '#(([A-Z][_\w]*))';
+my $type_union = '#(union\s*([_\w]+))';
+my $type_member = '#([_\w]+)(\.|->)([_\w]+)';
+my $type_fallback = '(?!)';    # this never matches
+my $type_member_func = $type_member . '\(\)';
+
+# Output conversion substitutions.
+#  One for each output format
+
+# these are pretty rough
+my @highlights_man = (
+                      [$type_constant, "\$1"],
+                      [$type_constant2, "\$1"],
+                      [$type_func, "\\\\fB\$1\\\\fP"],
+                      [$type_enum, "\\\\fI\$1\\\\fP"],
+                      [$type_struct, "\\\\fI\$1\\\\fP"],
+                      [$type_typedef, "\\\\fI\$1\\\\fP"],
+                      [$type_union, "\\\\fI\$1\\\\fP"],
+                      [$type_param, "\\\\fI\$1\\\\fP"],
+                      [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
+                      [$type_fallback, "\\\\fI\$1\\\\fP"]
+		     );
+my $blankline_man = "";
+
+# rst-mode
+my @highlights_rst = (
+                       [$type_constant, "``\$1``"],
+                       [$type_constant2, "``\$1``"],
+                       # Note: need to escape () to avoid func matching later
+                       [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
+                       [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
+		       [$type_fp_param, "**\$1\\\\(\\\\)**"],
+                       [$type_func, "\$1()"],
+                       [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
+                       [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
+                       [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
+                       [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
+                       # in rst this can refer to any type
+                       [$type_fallback, "\\:c\\:type\\:`\$1`"],
+                       [$type_param, "**\$1**"]
+		      );
+my $blankline_rst = "\n";
+
+# read arguments
+if ($#ARGV == -1) {
+    usage();
+}
+
+my $kernelversion;
+my $dohighlight = "";
+
+my $verbose = 0;
+my $output_mode = "rst";
+my $output_preformatted = 0;
+my $no_doc_sections = 0;
+my $enable_lineno = 0;
+my @highlights = @highlights_rst;
+my $blankline = $blankline_rst;
+my $modulename = "Kernel API";
+
+use constant {
+    OUTPUT_ALL          => 0, # output all symbols and doc sections
+    OUTPUT_INCLUDE      => 1, # output only specified symbols
+    OUTPUT_EXCLUDE      => 2, # output everything except specified symbols
+    OUTPUT_EXPORTED     => 3, # output exported symbols
+    OUTPUT_INTERNAL     => 4, # output non-exported symbols
+};
+my $output_selection = OUTPUT_ALL;
+my $show_not_found = 0;	# No longer used
+my $sphinx_version = "0.0"; # if not specified, assume old
+
+my @export_file_list;
+
+my @build_time;
+if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) &&
+    (my $seconds = `date -d"${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') {
+    @build_time = gmtime($seconds);
+} else {
+    @build_time = localtime;
+}
+
+my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
+		'July', 'August', 'September', 'October',
+		'November', 'December')[$build_time[4]] .
+  " " . ($build_time[5]+1900);
+
+# Essentially these are globals.
+# They probably want to be tidied up, made more localised or something.
+# CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
+# could cause "use of undefined value" or other bugs.
+my ($function, %function_table, %parametertypes, $declaration_purpose);
+my $declaration_start_line;
+my ($type, $declaration_name, $return_type);
+my ($newsection, $newcontents, $prototype, $brcount, %source_map);
+
+if (defined($ENV{'KBUILD_VERBOSE'})) {
+	$verbose = "$ENV{'KBUILD_VERBOSE'}";
+}
+
+# Generated docbook code is inserted in a template at a point where
+# docbook v3.1 requires a non-zero sequence of RefEntry's; see:
+# http://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
+# We keep track of number of generated entries and generate a dummy
+# if needs be to ensure the expanded template can be postprocessed
+# into html.
+my $section_counter = 0;
+
+my $lineprefix="";
+
+# Parser states
+use constant {
+    STATE_NORMAL        => 0, # normal code
+    STATE_NAME          => 1, # looking for function name
+    STATE_BODY_MAYBE    => 2, # body - or maybe more description
+    STATE_BODY          => 3, # the body of the comment
+    STATE_PROTO         => 4, # scanning prototype
+    STATE_DOCBLOCK      => 5, # documentation block
+    STATE_INLINE        => 6, # gathering documentation outside main block
+};
+my $state;
+my $in_doc_sect;
+my $leading_space;
+
+# Inline documentation state
+use constant {
+    STATE_INLINE_NA     => 0, # not applicable ($state != STATE_INLINE)
+    STATE_INLINE_NAME   => 1, # looking for member name (@foo:)
+    STATE_INLINE_TEXT   => 2, # looking for member documentation
+    STATE_INLINE_END    => 3, # done
+    STATE_INLINE_ERROR  => 4, # error - Comment without header was found.
+                              # Spit a warning as it's not
+                              # proper kernel-doc and ignore the rest.
+};
+my $inline_doc_state;
+
+#declaration types: can be
+# 'function', 'struct', 'union', 'enum', 'typedef'
+my $decl_type;
+
+my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start.
+my $doc_end = '\*/';
+my $doc_com = '\s*\*\s*';
+my $doc_com_body = '\s*\* ?';
+my $doc_decl = $doc_com . '(\w+)';
+# @params and a strictly limited set of supported section names
+my $doc_sect = $doc_com .
+    '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:(.*)';
+my $doc_content = $doc_com_body . '(.*)';
+my $doc_block = $doc_com . 'DOC:\s*(.*)?';
+my $doc_inline_start = '^\s*/\*\*\s*$';
+my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
+my $doc_inline_end = '^\s*\*/\s*$';
+my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
+my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+
+my %parameterdescs;
+my %parameterdesc_start_lines;
+my @parameterlist;
+my %sections;
+my @sectionlist;
+my %section_start_lines;
+my $sectcheck;
+my $struct_actual;
+
+my $contents = "";
+my $new_start_line = 0;
+
+# the canonical section names. see also $doc_sect above.
+my $section_default = "Description";	# default section
+my $section_intro = "Introduction";
+my $section = $section_default;
+my $section_context = "Context";
+my $section_return = "Return";
+
+my $undescribed = "-- undescribed --";
+
+reset_state();
+
+while ($ARGV[0] =~ m/^--?(.*)/) {
+    my $cmd = $1;
+    shift @ARGV;
+    if ($cmd eq "man") {
+	$output_mode = "man";
+	@highlights = @highlights_man;
+	$blankline = $blankline_man;
+    } elsif ($cmd eq "rst") {
+	$output_mode = "rst";
+	@highlights = @highlights_rst;
+	$blankline = $blankline_rst;
+    } elsif ($cmd eq "none") {
+	$output_mode = "none";
+    } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document
+	$modulename = shift @ARGV;
+    } elsif ($cmd eq "function") { # to only output specific functions
+	$output_selection = OUTPUT_INCLUDE;
+	$function = shift @ARGV;
+	$function_table{$function} = 1;
+    } elsif ($cmd eq "nofunction") { # output all except specific functions
+	$output_selection = OUTPUT_EXCLUDE;
+	$function = shift @ARGV;
+	$function_table{$function} = 1;
+    } elsif ($cmd eq "export") { # only exported symbols
+	$output_selection = OUTPUT_EXPORTED;
+	%function_table = ();
+    } elsif ($cmd eq "internal") { # only non-exported symbols
+	$output_selection = OUTPUT_INTERNAL;
+	%function_table = ();
+    } elsif ($cmd eq "export-file") {
+	my $file = shift @ARGV;
+	push(@export_file_list, $file);
+    } elsif ($cmd eq "v") {
+	$verbose = 1;
+    } elsif (($cmd eq "h") || ($cmd eq "help")) {
+	usage();
+    } elsif ($cmd eq 'no-doc-sections') {
+	    $no_doc_sections = 1;
+    } elsif ($cmd eq 'enable-lineno') {
+	    $enable_lineno = 1;
+    } elsif ($cmd eq 'show-not-found') {
+	$show_not_found = 1;  # A no-op but don't fail
+    } elsif ($cmd eq 'sphinx-version') {
+        $sphinx_version = shift @ARGV;
+    } else {
+	# Unknown argument
+        usage();
+    }
+}
+
+# continue execution near EOF;
+
+# get kernel version from env
+sub get_kernel_version() {
+    my $version = 'unknown kernel version';
+
+    if (defined($ENV{'KERNELVERSION'})) {
+	$version = $ENV{'KERNELVERSION'};
+    }
+    return $version;
+}
+
+#
+sub print_lineno {
+    my $lineno = shift;
+    if ($enable_lineno && defined($lineno)) {
+        print "#define LINENO " . $lineno . "\n";
+    }
+}
+##
+# dumps section contents to arrays/hashes intended for that purpose.
+#
+sub dump_section {
+    my $file = shift;
+    my $name = shift;
+    my $contents = join "\n", @_;
+
+    if ($name =~ m/$type_param/) {
+	$name = $1;
+	$parameterdescs{$name} = $contents;
+	$sectcheck = $sectcheck . $name . " ";
+        $parameterdesc_start_lines{$name} = $new_start_line;
+        $new_start_line = 0;
+    } elsif ($name eq "@\.\.\.") {
+	$name = "...";
+	$parameterdescs{$name} = $contents;
+	$sectcheck = $sectcheck . $name . " ";
+        $parameterdesc_start_lines{$name} = $new_start_line;
+        $new_start_line = 0;
+    } else {
+	if (defined($sections{$name}) && ($sections{$name} ne "")) {
+	    # Only warn on user specified duplicate section names.
+	    if ($name ne $section_default) {
+		print STDERR "${file}:$.: warning: duplicate section name '$name'\n";
+		++$warnings;
+	    }
+	    $sections{$name} .= $contents;
+	} else {
+	    $sections{$name} = $contents;
+	    push @sectionlist, $name;
+            $section_start_lines{$name} = $new_start_line;
+            $new_start_line = 0;
+	}
+    }
+}
+
+##
+# dump DOC: section after checking that it should go out
+#
+sub dump_doc_section {
+    my $file = shift;
+    my $name = shift;
+    my $contents = join "\n", @_;
+
+    if ($no_doc_sections) {
+        return;
+    }
+
+    if (($output_selection == OUTPUT_ALL) ||
+	($output_selection == OUTPUT_INCLUDE &&
+	 defined($function_table{$name})) ||
+	($output_selection == OUTPUT_EXCLUDE &&
+	 !defined($function_table{$name})))
+    {
+	dump_section($file, $name, $contents);
+	output_blockhead({'sectionlist' => \@sectionlist,
+			  'sections' => \%sections,
+			  'module' => $modulename,
+			  'content-only' => ($output_selection != OUTPUT_ALL), });
+    }
+}
+
+##
+# output function
+#
+# parameterdescs, a hash.
+#  function => "function name"
+#  parameterlist => @list of parameters
+#  parameterdescs => %parameter descriptions
+#  sectionlist => @list of sections
+#  sections => %section descriptions
+#
+
+sub output_highlight {
+    my $contents = join "\n",@_;
+    my $line;
+
+#   DEBUG
+#   if (!defined $contents) {
+#	use Carp;
+#	confess "output_highlight got called with no args?\n";
+#   }
+
+#   print STDERR "contents b4:$contents\n";
+    eval $dohighlight;
+    die $@ if $@;
+#   print STDERR "contents af:$contents\n";
+
+    foreach $line (split "\n", $contents) {
+	if (! $output_preformatted) {
+	    $line =~ s/^\s*//;
+	}
+	if ($line eq ""){
+	    if (! $output_preformatted) {
+		print $lineprefix, $blankline;
+	    }
+	} else {
+	    if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
+		print "\\&$line";
+	    } else {
+		print $lineprefix, $line;
+	    }
+	}
+	print "\n";
+    }
+}
+
+##
+# output function in man
+sub output_function_man(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+    my $count;
+
+    print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n";
+
+    print ".SH NAME\n";
+    print $args{'function'} . " \\- " . $args{'purpose'} . "\n";
+
+    print ".SH SYNOPSIS\n";
+    if ($args{'functiontype'} ne "") {
+	print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
+    } else {
+	print ".B \"" . $args{'function'} . "\n";
+    }
+    $count = 0;
+    my $parenth = "(";
+    my $post = ",";
+    foreach my $parameter (@{$args{'parameterlist'}}) {
+	if ($count == $#{$args{'parameterlist'}}) {
+	    $post = ");";
+	}
+	$type = $args{'parametertypes'}{$parameter};
+	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	    # pointer-to-function
+	    print ".BI \"" . $parenth . $1 . "\" " . $parameter . " \") (" . $2 . ")" . $post . "\"\n";
+	} else {
+	    $type =~ s/([^\*])$/$1 /;
+	    print ".BI \"" . $parenth . $type . "\" " . $parameter . " \"" . $post . "\"\n";
+	}
+	$count++;
+	$parenth = "";
+    }
+
+    print ".SH ARGUMENTS\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	print ".IP \"" . $parameter . "\" 12\n";
+	output_highlight($args{'parameterdescs'}{$parameter_name});
+    }
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ".SH \"", uc $section, "\"\n";
+	output_highlight($args{'sections'}{$section});
+    }
+}
+
+##
+# output enum in man
+sub output_enum_man(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+    my $count;
+
+    print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+    print ".SH NAME\n";
+    print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n";
+
+    print ".SH SYNOPSIS\n";
+    print "enum " . $args{'enum'} . " {\n";
+    $count = 0;
+    foreach my $parameter (@{$args{'parameterlist'}}) {
+	print ".br\n.BI \"    $parameter\"\n";
+	if ($count == $#{$args{'parameterlist'}}) {
+	    print "\n};\n";
+	    last;
+	}
+	else {
+	    print ", \n.br\n";
+	}
+	$count++;
+    }
+
+    print ".SH Constants\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	print ".IP \"" . $parameter . "\" 12\n";
+	output_highlight($args{'parameterdescs'}{$parameter_name});
+    }
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ".SH \"$section\"\n";
+	output_highlight($args{'sections'}{$section});
+    }
+}
+
+##
+# output struct in man
+sub output_struct_man(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+
+    print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n";
+
+    print ".SH NAME\n";
+    print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n";
+
+    my $declaration = $args{'definition'};
+    $declaration =~ s/\t/  /g;
+    $declaration =~ s/\n/"\n.br\n.BI \"/g;
+    print ".SH SYNOPSIS\n";
+    print $args{'type'} . " " . $args{'struct'} . " {\n.br\n";
+    print ".BI \"$declaration\n};\n.br\n\n";
+
+    print ".SH Members\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	($parameter =~ /^#/) && next;
+
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+	print ".IP \"" . $parameter . "\" 12\n";
+	output_highlight($args{'parameterdescs'}{$parameter_name});
+    }
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ".SH \"$section\"\n";
+	output_highlight($args{'sections'}{$section});
+    }
+}
+
+##
+# output typedef in man
+sub output_typedef_man(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+
+    print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+    print ".SH NAME\n";
+    print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n";
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ".SH \"$section\"\n";
+	output_highlight($args{'sections'}{$section});
+    }
+}
+
+sub output_blockhead_man(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+    my $count;
+
+    print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n";
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ".SH \"$section\"\n";
+	output_highlight($args{'sections'}{$section});
+    }
+}
+
+##
+# output in restructured text
+#
+
+#
+# This could use some work; it's used to output the DOC: sections, and
+# starts by putting out the name of the doc section itself, but that tends
+# to duplicate a header already in the template file.
+#
+sub output_blockhead_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	if ($output_selection != OUTPUT_INCLUDE) {
+	    print "**$section**\n\n";
+	}
+        print_lineno($section_start_lines{$section});
+	output_highlight_rst($args{'sections'}{$section});
+	print "\n";
+    }
+}
+
+#
+# Apply the RST highlights to a sub-block of text.
+#
+sub highlight_block($) {
+    # The dohighlight kludge requires the text be called $contents
+    my $contents = shift;
+    eval $dohighlight;
+    die $@ if $@;
+    return $contents;
+}
+
+#
+# Regexes used only here.
+#
+my $sphinx_literal = '^[^.].*::$';
+my $sphinx_cblock = '^\.\.\ +code-block::';
+
+sub output_highlight_rst {
+    my $input = join "\n",@_;
+    my $output = "";
+    my $line;
+    my $in_literal = 0;
+    my $litprefix;
+    my $block = "";
+
+    foreach $line (split "\n",$input) {
+	#
+	# If we're in a literal block, see if we should drop out
+	# of it.  Otherwise pass the line straight through unmunged.
+	#
+	if ($in_literal) {
+	    if (! ($line =~ /^\s*$/)) {
+		#
+		# If this is the first non-blank line in a literal
+		# block we need to figure out what the proper indent is.
+		#
+		if ($litprefix eq "") {
+		    $line =~ /^(\s*)/;
+		    $litprefix = '^' . $1;
+		    $output .= $line . "\n";
+		} elsif (! ($line =~ /$litprefix/)) {
+		    $in_literal = 0;
+		} else {
+		    $output .= $line . "\n";
+		}
+	    } else {
+		$output .= $line . "\n";
+	    }
+	}
+	#
+	# Not in a literal block (or just dropped out)
+	#
+	if (! $in_literal) {
+	    $block .= $line . "\n";
+	    if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
+		$in_literal = 1;
+		$litprefix = "";
+		$output .= highlight_block($block);
+		$block = ""
+	    }
+	}
+    }
+
+    if ($block) {
+	$output .= highlight_block($block);
+    }
+    foreach $line (split "\n", $output) {
+	print $lineprefix . $line . "\n";
+    }
+}
+
+sub output_function_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+    my $oldprefix = $lineprefix;
+    my $start = "";
+
+    if ($args{'typedef'}) {
+	print ".. c:type:: ". $args{'function'} . "\n\n";
+	print_lineno($declaration_start_line);
+	print "   **Typedef**: ";
+	$lineprefix = "";
+	output_highlight_rst($args{'purpose'});
+	$start = "\n\n**Syntax**\n\n  ``";
+    } else {
+        if ((split(/\./, $sphinx_version))[0] >= 3) {
+            # Sphinx 3 and later distinguish macros and functions and
+            # complain if you use c:function with something that's not
+            # syntactically valid as a function declaration.
+            # We assume that anything with a return type is a function
+            # and anything without is a macro.
+            if ($args{'functiontype'} ne "") {
+                print ".. c:function:: ";
+            } else {
+                print ".. c:macro:: ";
+            }
+        } else {
+            # Older Sphinx don't support documenting macros that take
+            # arguments with c:macro, and don't complain about the use
+            # of c:function for this.
+            print ".. c:function:: ";
+        }
+    }
+    if ($args{'functiontype'} ne "") {
+	$start .= $args{'functiontype'} . " " . $args{'function'} . " (";
+    } else {
+	$start .= $args{'function'} . " (";
+    }
+    print $start;
+
+    my $count = 0;
+    foreach my $parameter (@{$args{'parameterlist'}}) {
+	if ($count ne 0) {
+	    print ", ";
+	}
+	$count++;
+	$type = $args{'parametertypes'}{$parameter};
+
+	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	    # pointer-to-function
+	    print $1 . $parameter . ") (" . $2 . ")";
+	} else {
+	    print $type . " " . $parameter;
+	}
+    }
+    if ($args{'typedef'}) {
+	print ");``\n\n";
+    } else {
+	print ")\n\n";
+	print_lineno($declaration_start_line);
+	$lineprefix = "   ";
+	output_highlight_rst($args{'purpose'});
+	print "\n";
+    }
+
+    print "**Parameters**\n\n";
+    $lineprefix = "  ";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+	$type = $args{'parametertypes'}{$parameter};
+
+	if ($type ne "") {
+	    print "``$type $parameter``\n";
+	} else {
+	    print "``$parameter``\n";
+	}
+
+        print_lineno($parameterdesc_start_lines{$parameter_name});
+
+	if (defined($args{'parameterdescs'}{$parameter_name}) &&
+	    $args{'parameterdescs'}{$parameter_name} ne $undescribed) {
+	    output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+	} else {
+	    print "  *undescribed*\n";
+	}
+	print "\n";
+    }
+
+    $lineprefix = $oldprefix;
+    output_section_rst(@_);
+}
+
+sub output_section_rst(%) {
+    my %args = %{$_[0]};
+    my $section;
+    my $oldprefix = $lineprefix;
+    $lineprefix = "";
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	print "**$section**\n\n";
+        print_lineno($section_start_lines{$section});
+	output_highlight_rst($args{'sections'}{$section});
+	print "\n";
+    }
+    print "\n";
+    $lineprefix = $oldprefix;
+}
+
+sub output_enum_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $oldprefix = $lineprefix;
+    my $count;
+    my $name = "enum " . $args{'enum'};
+
+    print "\n\n.. c:type:: " . $name . "\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
+
+    print "**Constants**\n\n";
+    $lineprefix = "  ";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	print "``$parameter``\n";
+	if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
+	    output_highlight_rst($args{'parameterdescs'}{$parameter});
+	} else {
+	    print "  *undescribed*\n";
+	}
+	print "\n";
+    }
+
+    $lineprefix = $oldprefix;
+    output_section_rst(@_);
+}
+
+sub output_typedef_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $oldprefix = $lineprefix;
+    my $name = "typedef " . $args{'typedef'};
+
+    print "\n\n.. c:type:: " . $name . "\n\n";
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
+
+    $lineprefix = $oldprefix;
+    output_section_rst(@_);
+}
+
+sub output_struct_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $oldprefix = $lineprefix;
+    my $name = $args{'type'} . " " . $args{'struct'};
+
+    # Sphinx 3.0 and up will emit warnings for "c:type:: struct Foo".
+    # It wants to see "c:struct:: Foo" (and will add the word 'struct' in
+    # the rendered output).
+    if ((split(/\./, $sphinx_version))[0] >= 3) {
+        my $sname = $name;
+        $sname =~ s/^struct //;
+        print "\n\n.. c:struct:: " . $sname . "\n\n";
+    } else {
+        print "\n\n.. c:type:: " . $name . "\n\n";
+    }
+    print_lineno($declaration_start_line);
+    $lineprefix = "   ";
+    output_highlight_rst($args{'purpose'});
+    print "\n";
+
+    print "**Definition**\n\n";
+    print "::\n\n";
+    my $declaration = $args{'definition'};
+    $declaration =~ s/\t/  /g;
+    print "  " . $args{'type'} . " " . $args{'struct'} . " {\n$declaration  };\n\n";
+
+    print "**Members**\n\n";
+    $lineprefix = "  ";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	($parameter =~ /^#/) && next;
+
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+	$type = $args{'parametertypes'}{$parameter};
+        print_lineno($parameterdesc_start_lines{$parameter_name});
+	print "``" . $parameter . "``\n";
+	output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+	print "\n";
+    }
+    print "\n";
+
+    $lineprefix = $oldprefix;
+    output_section_rst(@_);
+}
+
+## none mode output functions
+
+sub output_function_none(%) {
+}
+
+sub output_enum_none(%) {
+}
+
+sub output_typedef_none(%) {
+}
+
+sub output_struct_none(%) {
+}
+
+sub output_blockhead_none(%) {
+}
+
+##
+# generic output function for all types (function, struct/union, typedef, enum);
+# calls the generated, variable output_ function name based on
+# functype and output_mode
+sub output_declaration {
+    no strict 'refs';
+    my $name = shift;
+    my $functype = shift;
+    my $func = "output_${functype}_$output_mode";
+    if (($output_selection == OUTPUT_ALL) ||
+	(($output_selection == OUTPUT_INCLUDE ||
+	  $output_selection == OUTPUT_EXPORTED) &&
+	 defined($function_table{$name})) ||
+	(($output_selection == OUTPUT_EXCLUDE ||
+	  $output_selection == OUTPUT_INTERNAL) &&
+	 !($functype eq "function" && defined($function_table{$name}))))
+    {
+	&$func(@_);
+	$section_counter++;
+    }
+}
+
+##
+# generic output function - calls the right one based on current output mode.
+sub output_blockhead {
+    no strict 'refs';
+    my $func = "output_blockhead_" . $output_mode;
+    &$func(@_);
+    $section_counter++;
+}
+
+##
+# takes a declaration (struct, union, enum, typedef) and
+# invokes the right handler. NOT called for functions.
+sub dump_declaration($$) {
+    no strict 'refs';
+    my ($prototype, $file) = @_;
+    my $func = "dump_" . $decl_type;
+    &$func(@_);
+}
+
+sub dump_union($$) {
+    dump_struct(@_);
+}
+
+sub dump_struct($$) {
+    my $x = shift;
+    my $file = shift;
+
+    if ($x =~ /(struct|union)\s+(\w+)\s*\{(.*)\}(\s*(__packed|__aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*/) {
+	my $decl_type = $1;
+	$declaration_name = $2;
+	my $members = $3;
+
+	# ignore members marked private:
+	$members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi;
+	$members =~ s/\/\*\s*private:.*//gosi;
+	# strip comments:
+	$members =~ s/\/\*.*?\*\///gos;
+	# strip attributes
+	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)//gi;
+	$members =~ s/\s*__aligned\s*\([^;]*\)//gos;
+	$members =~ s/\s*__packed\s*//gos;
+	$members =~ s/\s*CRYPTO_MINALIGN_ATTR//gos;
+	# replace DECLARE_BITMAP
+	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	# replace DECLARE_HASHTABLE
+	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	# replace DECLARE_KFIFO
+	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	# replace DECLARE_KFIFO_PTR
+	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+
+	my $declaration = $members;
+
+	# Split nested struct/union elements as newer ones
+	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+		my $newmember;
+		my $maintype = $1;
+		my $ids = $4;
+		my $content = $3;
+		foreach my $id(split /,/, $ids) {
+			$newmember .= "$maintype $id; ";
+
+			$id =~ s/[:\[].*//;
+			$id =~ s/^\s*\**(\S+)\s*/$1/;
+			foreach my $arg (split /;/, $content) {
+				next if ($arg =~ m/^\s*$/);
+				if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) {
+					# pointer-to-function
+					my $type = $1;
+					my $name = $2;
+					my $extra = $3;
+					next if (!$name);
+					if ($id =~ m/^\s*$/) {
+						# anonymous struct/union
+						$newmember .= "$type$name$extra; ";
+					} else {
+						$newmember .= "$type$id.$name$extra; ";
+					}
+				} else {
+					my $type;
+					my $names;
+					$arg =~ s/^\s+//;
+					$arg =~ s/\s+$//;
+					# Handle bitmaps
+					$arg =~ s/:\s*\d+\s*//g;
+					# Handle arrays
+					$arg =~ s/\[.*\]//g;
+					# The type may have multiple words,
+					# and multiple IDs can be defined, like:
+					#	const struct foo, *bar, foobar
+					# So, we remove spaces when parsing the
+					# names, in order to match just names
+					# and commas for the names
+					$arg =~ s/\s*,\s*/,/g;
+					if ($arg =~ m/(.*)\s+([\S+,]+)/) {
+						$type = $1;
+						$names = $2;
+					} else {
+						$newmember .= "$arg; ";
+						next;
+					}
+					foreach my $name (split /,/, $names) {
+						$name =~ s/^\s*\**(\S+)\s*/$1/;
+						next if (($name =~ m/^\s*$/));
+						if ($id =~ m/^\s*$/) {
+							# anonymous struct/union
+							$newmember .= "$type $name; ";
+						} else {
+							$newmember .= "$type $id.$name; ";
+						}
+					}
+				}
+			}
+		}
+		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+	}
+
+	# Ignore other nested elements, like enums
+	$members =~ s/(\{[^\{\}]*\})//g;
+
+	create_parameterlist($members, ';', $file, $declaration_name);
+	check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual);
+
+	# Adjust declaration for better display
+	$declaration =~ s/([\{;])/$1\n/g;
+	$declaration =~ s/\}\s+;/};/g;
+	# Better handle inlined enums
+	do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/);
+
+	my @def_args = split /\n/, $declaration;
+	my $level = 1;
+	$declaration = "";
+	foreach my $clause (@def_args) {
+		$clause =~ s/^\s+//;
+		$clause =~ s/\s+$//;
+		$clause =~ s/\s+/ /;
+		next if (!$clause);
+		$level-- if ($clause =~ m/(\})/ && $level > 1);
+		if (!($clause =~ m/^\s*#/)) {
+			$declaration .= "\t" x $level;
+		}
+		$declaration .= "\t" . $clause . "\n";
+		$level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/));
+	}
+	output_declaration($declaration_name,
+			   'struct',
+			   {'struct' => $declaration_name,
+			    'module' => $modulename,
+			    'definition' => $declaration,
+			    'parameterlist' => \@parameterlist,
+			    'parameterdescs' => \%parameterdescs,
+			    'parametertypes' => \%parametertypes,
+			    'sectionlist' => \@sectionlist,
+			    'sections' => \%sections,
+			    'purpose' => $declaration_purpose,
+			    'type' => $decl_type
+			   });
+    }
+    else {
+	print STDERR "${file}:$.: error: Cannot parse struct or union!\n";
+	++$errors;
+    }
+}
+
+
+sub show_warnings($$) {
+	my $functype = shift;
+	my $name = shift;
+
+	return 1 if ($output_selection == OUTPUT_ALL);
+
+	if ($output_selection == OUTPUT_EXPORTED) {
+		if (defined($function_table{$name})) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+        if ($output_selection == OUTPUT_INTERNAL) {
+		if (!($functype eq "function" && defined($function_table{$name}))) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+	if ($output_selection == OUTPUT_INCLUDE) {
+		if (defined($function_table{$name})) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+	if ($output_selection == OUTPUT_EXCLUDE) {
+		if (!defined($function_table{$name})) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+	die("Please add the new output type at show_warnings()");
+}
+
+sub dump_enum($$) {
+    my $x = shift;
+    my $file = shift;
+
+    $x =~ s@/\*.*?\*/@@gos;	# strip comments.
+    # strip #define macros inside enums
+    $x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos;
+
+    if ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
+	$declaration_name = $1;
+	my $members = $2;
+	my %_members;
+
+	$members =~ s/\s+$//;
+
+	foreach my $arg (split ',', $members) {
+	    $arg =~ s/^\s*(\w+).*/$1/;
+	    push @parameterlist, $arg;
+	    if (!$parameterdescs{$arg}) {
+		$parameterdescs{$arg} = $undescribed;
+	        if (show_warnings("enum", $declaration_name)) {
+			print STDERR "${file}:$.: warning: Enum value '$arg' not described in enum '$declaration_name'\n";
+		}
+	    }
+	    $_members{$arg} = 1;
+	}
+
+	while (my ($k, $v) = each %parameterdescs) {
+	    if (!exists($_members{$k})) {
+	        if (show_warnings("enum", $declaration_name)) {
+		     print STDERR "${file}:$.: warning: Excess enum value '$k' description in '$declaration_name'\n";
+		}
+	    }
+        }
+
+	output_declaration($declaration_name,
+			   'enum',
+			   {'enum' => $declaration_name,
+			    'module' => $modulename,
+			    'parameterlist' => \@parameterlist,
+			    'parameterdescs' => \%parameterdescs,
+			    'sectionlist' => \@sectionlist,
+			    'sections' => \%sections,
+			    'purpose' => $declaration_purpose
+			   });
+    }
+    else {
+	print STDERR "${file}:$.: error: Cannot parse enum!\n";
+	++$errors;
+    }
+}
+
+sub dump_typedef($$) {
+    my $x = shift;
+    my $file = shift;
+
+    $x =~ s@/\*.*?\*/@@gos;	# strip comments.
+
+    # Parse function prototypes
+    if ($x =~ /typedef\s+(\w+\s*\**)\s*\(\*?\s*(\w\S+)\s*\)\s*\((.*)\);/ ||
+	$x =~ /typedef\s+(\w+\s*\**)\s*(\w\S+)\s*\s*\((.*)\);/) {
+
+	# Function typedefs
+	$return_type = $1;
+	$declaration_name = $2;
+	my $args = $3;
+
+	create_parameterlist($args, ',', $file, $declaration_name);
+
+	output_declaration($declaration_name,
+			   'function',
+			   {'function' => $declaration_name,
+			    'typedef' => 1,
+			    'module' => $modulename,
+			    'functiontype' => $return_type,
+			    'parameterlist' => \@parameterlist,
+			    'parameterdescs' => \%parameterdescs,
+			    'parametertypes' => \%parametertypes,
+			    'sectionlist' => \@sectionlist,
+			    'sections' => \%sections,
+			    'purpose' => $declaration_purpose
+			   });
+	return;
+    }
+
+    while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) {
+	$x =~ s/\(*.\)\s*;$/;/;
+	$x =~ s/\[*.\]\s*;$/;/;
+    }
+
+    if ($x =~ /typedef.*\s+(\w+)\s*;/) {
+	$declaration_name = $1;
+
+	output_declaration($declaration_name,
+			   'typedef',
+			   {'typedef' => $declaration_name,
+			    'module' => $modulename,
+			    'sectionlist' => \@sectionlist,
+			    'sections' => \%sections,
+			    'purpose' => $declaration_purpose
+			   });
+    }
+    else {
+	print STDERR "${file}:$.: error: Cannot parse typedef!\n";
+	++$errors;
+    }
+}
+
+sub save_struct_actual($) {
+    my $actual = shift;
+
+    # strip all spaces from the actual param so that it looks like one string item
+    $actual =~ s/\s*//g;
+    $struct_actual = $struct_actual . $actual . " ";
+}
+
+sub create_parameterlist($$$$) {
+    my $args = shift;
+    my $splitter = shift;
+    my $file = shift;
+    my $declaration_name = shift;
+    my $type;
+    my $param;
+
+    # temporarily replace commas inside function pointer definition
+    while ($args =~ /(\([^\),]+),/) {
+	$args =~ s/(\([^\),]+),/$1#/g;
+    }
+
+    foreach my $arg (split($splitter, $args)) {
+	# strip comments
+	$arg =~ s/\/\*.*\*\///;
+	# strip leading/trailing spaces
+	$arg =~ s/^\s*//;
+	$arg =~ s/\s*$//;
+	$arg =~ s/\s+/ /;
+
+	if ($arg =~ /^#/) {
+	    # Treat preprocessor directive as a typeless variable just to fill
+	    # corresponding data structures "correctly". Catch it later in
+	    # output_* subs.
+	    push_parameter($arg, "", $file);
+	} elsif ($arg =~ m/\(.+\)\s*\(/) {
+	    # pointer-to-function
+	    $arg =~ tr/#/,/;
+	    $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/;
+	    $param = $1;
+	    $type = $arg;
+	    $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
+	    save_struct_actual($param);
+	    push_parameter($param, $type, $file, $declaration_name);
+	} elsif ($arg) {
+	    $arg =~ s/\s*:\s*/:/g;
+	    $arg =~ s/\s*\[/\[/g;
+
+	    my @args = split('\s*,\s*', $arg);
+	    if ($args[0] =~ m/\*/) {
+		$args[0] =~ s/(\*+)\s*/ $1/;
+	    }
+
+	    my @first_arg;
+	    if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) {
+		    shift @args;
+		    push(@first_arg, split('\s+', $1));
+		    push(@first_arg, $2);
+	    } else {
+		    @first_arg = split('\s+', shift @args);
+	    }
+
+	    unshift(@args, pop @first_arg);
+	    $type = join " ", @first_arg;
+
+	    foreach $param (@args) {
+		if ($param =~ m/^(\*+)\s*(.*)/) {
+		    save_struct_actual($2);
+		    push_parameter($2, "$type $1", $file, $declaration_name);
+		}
+		elsif ($param =~ m/(.*?):(\d+)/) {
+		    if ($type ne "") { # skip unnamed bit-fields
+			save_struct_actual($1);
+			push_parameter($1, "$type:$2", $file, $declaration_name)
+		    }
+		}
+		else {
+		    save_struct_actual($param);
+		    push_parameter($param, $type, $file, $declaration_name);
+		}
+	    }
+	}
+    }
+}
+
+sub push_parameter($$$$) {
+	my $param = shift;
+	my $type = shift;
+	my $file = shift;
+	my $declaration_name = shift;
+
+	if (($anon_struct_union == 1) && ($type eq "") &&
+	    ($param eq "}")) {
+		return;		# ignore the ending }; from anon. struct/union
+	}
+
+	$anon_struct_union = 0;
+	$param =~ s/[\[\)].*//;
+
+	if ($type eq "" && $param =~ /\.\.\.$/)
+	{
+	    if (!$param =~ /\w\.\.\.$/) {
+	      # handles unnamed variable parameters
+	      $param = "...";
+	    }
+	    if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
+		$parameterdescs{$param} = "variable arguments";
+	    }
+	}
+	elsif ($type eq "" && ($param eq "" or $param eq "void"))
+	{
+	    $param="void";
+	    $parameterdescs{void} = "no arguments";
+	}
+	elsif ($type eq "" && ($param eq "struct" or $param eq "union"))
+	# handle unnamed (anonymous) union or struct:
+	{
+		$type = $param;
+		$param = "{unnamed_" . $param . "}";
+		$parameterdescs{$param} = "anonymous\n";
+		$anon_struct_union = 1;
+	}
+
+	# warn if parameter has no description
+	# (but ignore ones starting with # as these are not parameters
+	# but inline preprocessor statements);
+	# Note: It will also ignore void params and unnamed structs/unions
+	if (!defined $parameterdescs{$param} && $param !~ /^#/) {
+		$parameterdescs{$param} = $undescribed;
+
+	        if (show_warnings($type, $declaration_name) && $param !~ /\./) {
+			print STDERR
+			      "${file}:$.: warning: Function parameter or member '$param' not described in '$declaration_name'\n";
+			++$warnings;
+		}
+	}
+
+	# strip spaces from $param so that it is one continuous string
+	# on @parameterlist;
+	# this fixes a problem where check_sections() cannot find
+	# a parameter like "addr[6 + 2]" because it actually appears
+	# as "addr[6", "+", "2]" on the parameter list;
+	# but it's better to maintain the param string unchanged for output,
+	# so just weaken the string compare in check_sections() to ignore
+	# "[blah" in a parameter string;
+	###$param =~ s/\s*//g;
+	push @parameterlist, $param;
+	$type =~ s/\s\s+/ /g;
+	$parametertypes{$param} = $type;
+}
+
+sub check_sections($$$$$) {
+	my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_;
+	my @sects = split ' ', $sectcheck;
+	my @prms = split ' ', $prmscheck;
+	my $err;
+	my ($px, $sx);
+	my $prm_clean;		# strip trailing "[array size]" and/or beginning "*"
+
+	foreach $sx (0 .. $#sects) {
+		$err = 1;
+		foreach $px (0 .. $#prms) {
+			$prm_clean = $prms[$px];
+			$prm_clean =~ s/\[.*\]//;
+			$prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
+			# ignore array size in a parameter string;
+			# however, the original param string may contain
+			# spaces, e.g.:  addr[6 + 2]
+			# and this appears in @prms as "addr[6" since the
+			# parameter list is split at spaces;
+			# hence just ignore "[..." for the sections check;
+			$prm_clean =~ s/\[.*//;
+
+			##$prm_clean =~ s/^\**//;
+			if ($prm_clean eq $sects[$sx]) {
+				$err = 0;
+				last;
+			}
+		}
+		if ($err) {
+			if ($decl_type eq "function") {
+				print STDERR "${file}:$.: warning: " .
+					"Excess function parameter " .
+					"'$sects[$sx]' " .
+					"description in '$decl_name'\n";
+				++$warnings;
+			}
+		}
+	}
+}
+
+##
+# Checks the section describing the return value of a function.
+sub check_return_section {
+        my $file = shift;
+        my $declaration_name = shift;
+        my $return_type = shift;
+
+        # Ignore an empty return type (It's a macro)
+        # Ignore functions with a "void" return type. (But don't ignore "void *")
+        if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) {
+                return;
+        }
+
+        if (!defined($sections{$section_return}) ||
+            $sections{$section_return} eq "") {
+                print STDERR "${file}:$.: warning: " .
+                        "No description found for return value of " .
+                        "'$declaration_name'\n";
+                ++$warnings;
+        }
+}
+
+##
+# takes a function prototype and the name of the current file being
+# processed and spits out all the details stored in the global
+# arrays/hashes.
+sub dump_function($$) {
+    my $prototype = shift;
+    my $file = shift;
+    my $noret = 0;
+
+    $prototype =~ s/^static +//;
+    $prototype =~ s/^extern +//;
+    $prototype =~ s/^asmlinkage +//;
+    $prototype =~ s/^inline +//;
+    $prototype =~ s/^__inline__ +//;
+    $prototype =~ s/^__inline +//;
+    $prototype =~ s/^__always_inline +//;
+    $prototype =~ s/^noinline +//;
+    $prototype =~ s/__init +//;
+    $prototype =~ s/__init_or_module +//;
+    $prototype =~ s/__meminit +//;
+    $prototype =~ s/__must_check +//;
+    $prototype =~ s/__weak +//;
+    $prototype =~ s/__sched +//;
+    $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
+    my $define = $prototype =~ s/^#\s*define\s+//; #ak added
+    $prototype =~ s/__attribute__\s*\(\(
+            (?:
+                 [\w\s]++          # attribute name
+                 (?:\([^)]*+\))?   # attribute arguments
+                 \s*+,?            # optional comma at the end
+            )+
+          \)\)\s+//x;
+
+    # Yes, this truly is vile.  We are looking for:
+    # 1. Return type (may be nothing if we're looking at a macro)
+    # 2. Function name
+    # 3. Function parameters.
+    #
+    # All the while we have to watch out for function pointer parameters
+    # (which IIRC is what the two sections are for), C types (these
+    # regexps don't even start to express all the possibilities), and
+    # so on.
+    #
+    # If you mess with these regexps, it's a good idea to check that
+    # the following functions' documentation still comes out right:
+    # - parport_register_device (function pointer parameters)
+    # - qatomic_set (macro)
+    # - pci_match_device, __copy_to_user (long return type)
+
+    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+        # This is an object-like macro, it has no return type and no parameter
+        # list.
+        # Function-like macros are not allowed to have spaces between
+        # declaration_name and opening parenthesis (notice the \s+).
+        $return_type = $1;
+        $declaration_name = $2;
+        $noret = 1;
+    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
+	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
+	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+	$return_type = $1;
+	$declaration_name = $2;
+	my $args = $3;
+
+	create_parameterlist($args, ',', $file, $declaration_name);
+    } else {
+	print STDERR "${file}:$.: warning: cannot understand function prototype: '$prototype'\n";
+	return;
+    }
+
+	my $prms = join " ", @parameterlist;
+	check_sections($file, $declaration_name, "function", $sectcheck, $prms);
+
+        # This check emits a lot of warnings at the moment, because many
+        # functions don't have a 'Return' doc section. So until the number
+        # of warnings goes sufficiently down, the check is only performed in
+        # verbose mode.
+        # TODO: always perform the check.
+        if ($verbose && !$noret) {
+                check_return_section($file, $declaration_name, $return_type);
+        }
+
+    output_declaration($declaration_name,
+		       'function',
+		       {'function' => $declaration_name,
+			'module' => $modulename,
+			'functiontype' => $return_type,
+			'parameterlist' => \@parameterlist,
+			'parameterdescs' => \%parameterdescs,
+			'parametertypes' => \%parametertypes,
+			'sectionlist' => \@sectionlist,
+			'sections' => \%sections,
+			'purpose' => $declaration_purpose
+		       });
+}
+
+sub reset_state {
+    $function = "";
+    %parameterdescs = ();
+    %parametertypes = ();
+    @parameterlist = ();
+    %sections = ();
+    @sectionlist = ();
+    $sectcheck = "";
+    $struct_actual = "";
+    $prototype = "";
+
+    $state = STATE_NORMAL;
+    $inline_doc_state = STATE_INLINE_NA;
+}
+
+sub tracepoint_munge($) {
+	my $file = shift;
+	my $tracepointname = 0;
+	my $tracepointargs = 0;
+
+	if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
+		$tracepointname = $2;
+	}
+	$tracepointname =~ s/^\s+//; #strip leading whitespace
+	if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
+		$tracepointargs = $1;
+	}
+	if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+		print STDERR "${file}:$.: warning: Unrecognized tracepoint format: \n".
+			     "$prototype\n";
+	} else {
+		$prototype = "static inline void trace_$tracepointname($tracepointargs)";
+	}
+}
+
+sub syscall_munge() {
+	my $void = 0;
+
+	$prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's
+##	if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) {
+	if ($prototype =~ m/SYSCALL_DEFINE0/) {
+		$void = 1;
+##		$prototype = "long sys_$1(void)";
+	}
+
+	$prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name
+	if ($prototype =~ m/long (sys_.*?),/) {
+		$prototype =~ s/,/\(/;
+	} elsif ($void) {
+		$prototype =~ s/\)/\(void\)/;
+	}
+
+	# now delete all of the odd-number commas in $prototype
+	# so that arg types & arg names don't have a comma between them
+	my $count = 0;
+	my $len = length($prototype);
+	if ($void) {
+		$len = 0;	# skip the for-loop
+	}
+	for (my $ix = 0; $ix < $len; $ix++) {
+		if (substr($prototype, $ix, 1) eq ',') {
+			$count++;
+			if ($count % 2 == 1) {
+				substr($prototype, $ix, 1) = ' ';
+			}
+		}
+	}
+}
+
+sub process_proto_function($$) {
+    my $x = shift;
+    my $file = shift;
+
+    $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
+
+    if ($x =~ m#\s*/\*\s+MACDOC\s*#io || ($x =~ /^#/ && $x !~ /^#\s*define/)) {
+	# do nothing
+    }
+    elsif ($x =~ /([^\{]*)/) {
+	$prototype .= $1;
+    }
+
+    if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) {
+	$prototype =~ s@/\*.*?\*/@@gos;	# strip comments.
+	$prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
+	$prototype =~ s@^\s+@@gos; # strip leading spaces
+	if ($prototype =~ /SYSCALL_DEFINE/) {
+		syscall_munge();
+	}
+	if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
+	    $prototype =~ /DEFINE_SINGLE_EVENT/)
+	{
+		tracepoint_munge($file);
+	}
+	dump_function($prototype, $file);
+	reset_state();
+    }
+}
+
+sub process_proto_type($$) {
+    my $x = shift;
+    my $file = shift;
+
+    $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
+    $x =~ s@^\s+@@gos; # strip leading spaces
+    $x =~ s@\s+$@@gos; # strip trailing spaces
+    $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line
+
+    if ($x =~ /^#/) {
+	# To distinguish preprocessor directive from regular declaration later.
+	$x .= ";";
+    }
+
+    while (1) {
+	if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) {
+            if( length $prototype ) {
+                $prototype .= " "
+            }
+	    $prototype .= $1 . $2;
+	    ($2 eq '{') && $brcount++;
+	    ($2 eq '}') && $brcount--;
+	    if (($2 eq ';') && ($brcount == 0)) {
+		dump_declaration($prototype, $file);
+		reset_state();
+		last;
+	    }
+	    $x = $3;
+	} else {
+	    $prototype .= $x;
+	    last;
+	}
+    }
+}
+
+
+sub map_filename($) {
+    my $file;
+    my ($orig_file) = @_;
+
+    if (defined($ENV{'SRCTREE'})) {
+	$file = "$ENV{'SRCTREE'}" . "/" . $orig_file;
+    } else {
+	$file = $orig_file;
+    }
+
+    if (defined($source_map{$file})) {
+	$file = $source_map{$file};
+    }
+
+    return $file;
+}
+
+sub process_export_file($) {
+    my ($orig_file) = @_;
+    my $file = map_filename($orig_file);
+
+    if (!open(IN,"<$file")) {
+	print STDERR "Error: Cannot open file $file\n";
+	++$errors;
+	return;
+    }
+
+    while () {
+	if (/$export_symbol/) {
+	    $function_table{$2} = 1;
+	}
+    }
+
+    close(IN);
+}
+
+#
+# Parsers for the various processing states.
+#
+# STATE_NORMAL: looking for the /** to begin everything.
+#
+sub process_normal() {
+    if (/$doc_start/o) {
+	$state = STATE_NAME;	# next line is always the function name
+	$in_doc_sect = 0;
+	$declaration_start_line = $. + 1;
+    }
+}
+
+#
+# STATE_NAME: Looking for the "name - description" line
+#
+sub process_name($$) {
+    my $file = shift;
+    my $identifier;
+    my $descr;
+
+    if (/$doc_block/o) {
+	$state = STATE_DOCBLOCK;
+	$contents = "";
+	$new_start_line = $. + 1;
+
+	if ( $1 eq "" ) {
+	    $section = $section_intro;
+	} else {
+	    $section = $1;
+	}
+    }
+    elsif (/$doc_decl/o) {
+	$identifier = $1;
+	if (/\s*([\w\s]+?)(\s*-|:)/) {
+	    $identifier = $1;
+	}
+
+	$state = STATE_BODY;
+	# if there's no @param blocks need to set up default section
+	# here
+	$contents = "";
+	$section = $section_default;
+	$new_start_line = $. + 1;
+	if (/[-:](.*)/) {
+	    # strip leading/trailing/multiple spaces
+	    $descr= $1;
+	    $descr =~ s/^\s*//;
+	    $descr =~ s/\s*$//;
+	    $descr =~ s/\s+/ /g;
+	    $declaration_purpose = $descr;
+	    $state = STATE_BODY_MAYBE;
+	} else {
+	    $declaration_purpose = "";
+	}
+
+	if (($declaration_purpose eq "") && $verbose) {
+	    print STDERR "${file}:$.: warning: missing initial short description on line:\n";
+	    print STDERR $_;
+	    ++$warnings;
+	}
+
+	if ($identifier =~ m/^struct\b/) {
+	    $decl_type = 'struct';
+	} elsif ($identifier =~ m/^union\b/) {
+	    $decl_type = 'union';
+	} elsif ($identifier =~ m/^enum\b/) {
+	    $decl_type = 'enum';
+	} elsif ($identifier =~ m/^typedef\b/) {
+	    $decl_type = 'typedef';
+	} else {
+	    $decl_type = 'function';
+	}
+
+	if ($verbose) {
+	    print STDERR "${file}:$.: info: Scanning doc for $identifier\n";
+	}
+    } else {
+	print STDERR "${file}:$.: warning: Cannot understand $_ on line $.",
+	    " - I thought it was a doc line\n";
+	++$warnings;
+	$state = STATE_NORMAL;
+    }
+}
+
+
+#
+# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
+#
+sub process_body($$) {
+    my $file = shift;
+
+    if (/$doc_sect/i) { # case insensitive for supported section names
+	$newsection = $1;
+	$newcontents = $2;
+
+	# map the supported section names to the canonical names
+	if ($newsection =~ m/^description$/i) {
+	    $newsection = $section_default;
+	} elsif ($newsection =~ m/^context$/i) {
+	    $newsection = $section_context;
+	} elsif ($newsection =~ m/^returns?$/i) {
+	    $newsection = $section_return;
+	} elsif ($newsection =~ m/^\@return$/) {
+	    # special: @return is a section, not a param description
+	    $newsection = $section_return;
+	}
+
+	if (($contents ne "") && ($contents ne "\n")) {
+	    if (!$in_doc_sect && $verbose) {
+		print STDERR "${file}:$.: warning: contents before sections\n";
+		++$warnings;
+	    }
+	    dump_section($file, $section, $contents);
+	    $section = $section_default;
+	}
+
+	$in_doc_sect = 1;
+	$state = STATE_BODY;
+	$contents = $newcontents;
+	$new_start_line = $.;
+	while (substr($contents, 0, 1) eq " ") {
+	    $contents = substr($contents, 1);
+	}
+	if ($contents ne "") {
+	    $contents .= "\n";
+	}
+	$section = $newsection;
+	$leading_space = undef;
+    } elsif (/$doc_end/) {
+	if (($contents ne "") && ($contents ne "\n")) {
+	    dump_section($file, $section, $contents);
+	    $section = $section_default;
+	    $contents = "";
+	}
+	# look for doc_com +  + doc_end:
+	if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') {
+	    print STDERR "${file}:$.: warning: suspicious ending line: $_";
+	    ++$warnings;
+	}
+
+	$prototype = "";
+	$state = STATE_PROTO;
+	$brcount = 0;
+    } elsif (/$doc_content/) {
+	# miguel-style comment kludge, look for blank lines after
+	# @parameter line to signify start of description
+	if ($1 eq "") {
+	    if ($section =~ m/^@/ || $section eq $section_context) {
+		dump_section($file, $section, $contents);
+		$section = $section_default;
+		$contents = "";
+		$new_start_line = $.;
+	    } else {
+		$contents .= "\n";
+	    }
+	    $state = STATE_BODY;
+	} elsif ($state == STATE_BODY_MAYBE) {
+	    # Continued declaration purpose
+	    chomp($declaration_purpose);
+	    $declaration_purpose .= " " . $1;
+	    $declaration_purpose =~ s/\s+/ /g;
+	} else {
+	    my $cont = $1;
+	    if ($section =~ m/^@/ || $section eq $section_context) {
+		if (!defined $leading_space) {
+		    if ($cont =~ m/^(\s+)/) {
+			$leading_space = $1;
+		    } else {
+			$leading_space = "";
+		    }
+		}
+		$cont =~ s/^$leading_space//;
+	    }
+	    $contents .= $cont . "\n";
+	}
+    } else {
+	# i dont know - bad line?  ignore.
+	print STDERR "${file}:$.: warning: bad line: $_";
+	++$warnings;
+    }
+}
+
+
+#
+# STATE_PROTO: reading a function/whatever prototype.
+#
+sub process_proto($$) {
+    my $file = shift;
+
+    if (/$doc_inline_oneline/) {
+	$section = $1;
+	$contents = $2;
+	if ($contents ne "") {
+	    $contents .= "\n";
+	    dump_section($file, $section, $contents);
+	    $section = $section_default;
+	    $contents = "";
+	}
+    } elsif (/$doc_inline_start/) {
+	$state = STATE_INLINE;
+	$inline_doc_state = STATE_INLINE_NAME;
+    } elsif ($decl_type eq 'function') {
+	process_proto_function($_, $file);
+    } else {
+	process_proto_type($_, $file);
+    }
+}
+
+#
+# STATE_DOCBLOCK: within a DOC: block.
+#
+sub process_docblock($$) {
+    my $file = shift;
+
+    if (/$doc_end/) {
+	dump_doc_section($file, $section, $contents);
+	$section = $section_default;
+	$contents = "";
+	$function = "";
+	%parameterdescs = ();
+	%parametertypes = ();
+	@parameterlist = ();
+	%sections = ();
+	@sectionlist = ();
+	$prototype = "";
+	$state = STATE_NORMAL;
+    } elsif (/$doc_content/) {
+	if ( $1 eq "" )	{
+	    $contents .= $blankline;
+	} else {
+	    $contents .= $1 . "\n";
+	}
+    }
+}
+
+#
+# STATE_INLINE: docbook comments within a prototype.
+#
+sub process_inline($$) {
+    my $file = shift;
+
+    # First line (state 1) needs to be a @parameter
+    if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
+	$section = $1;
+	$contents = $2;
+	$new_start_line = $.;
+	if ($contents ne "") {
+	    while (substr($contents, 0, 1) eq " ") {
+		$contents = substr($contents, 1);
+	    }
+	    $contents .= "\n";
+	}
+	$inline_doc_state = STATE_INLINE_TEXT;
+	# Documentation block end */
+    } elsif (/$doc_inline_end/) {
+	if (($contents ne "") && ($contents ne "\n")) {
+	    dump_section($file, $section, $contents);
+	    $section = $section_default;
+	    $contents = "";
+	}
+	$state = STATE_PROTO;
+	$inline_doc_state = STATE_INLINE_NA;
+	# Regular text
+    } elsif (/$doc_content/) {
+	if ($inline_doc_state == STATE_INLINE_TEXT) {
+	    $contents .= $1 . "\n";
+	    # nuke leading blank lines
+	    if ($contents =~ /^\s*$/) {
+		$contents = "";
+	    }
+	} elsif ($inline_doc_state == STATE_INLINE_NAME) {
+	    $inline_doc_state = STATE_INLINE_ERROR;
+	    print STDERR "${file}:$.: warning: ";
+	    print STDERR "Incorrect use of kernel-doc format: $_";
+	    ++$warnings;
+	}
+    }
+}
+
+
+sub process_file($) {
+    my $file;
+    my $initial_section_counter = $section_counter;
+    my ($orig_file) = @_;
+
+    $file = map_filename($orig_file);
+
+    if (!open(IN,"<$file")) {
+	print STDERR "Error: Cannot open file $file\n";
+	++$errors;
+	return;
+    }
+
+    $. = 1;
+
+    $section_counter = 0;
+    while () {
+	while (s/\\\s*$//) {
+	    $_ .= ;
+	}
+	# Replace tabs by spaces
+        while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
+	# Hand this line to the appropriate state handler
+	if ($state == STATE_NORMAL) {
+	    process_normal();
+	} elsif ($state == STATE_NAME) {
+	    process_name($file, $_);
+	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) {
+	    process_body($file, $_);
+	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
+	    process_inline($file, $_);
+	} elsif ($state == STATE_PROTO) {
+	    process_proto($file, $_);
+	} elsif ($state == STATE_DOCBLOCK) {
+	    process_docblock($file, $_);
+	}
+    }
+
+    # Make sure we got something interesting.
+    if ($initial_section_counter == $section_counter && $
+	output_mode ne "none") {
+	if ($output_selection == OUTPUT_INCLUDE) {
+	    print STDERR "${file}:1: warning: '$_' not found\n"
+		for keys %function_table;
+	}
+	else {
+	    print STDERR "${file}:1: warning: no structured comments found\n";
+	}
+    }
+}
+
+
+$kernelversion = get_kernel_version();
+
+# generate a sequence of code that will splice in highlighting information
+# using the s// operator.
+for (my $k = 0; $k < @highlights; $k++) {
+    my $pattern = $highlights[$k][0];
+    my $result = $highlights[$k][1];
+#   print STDERR "scanning pattern:$pattern, highlight:($result)\n";
+    $dohighlight .=  "\$contents =~ s:$pattern:$result:gs;\n";
+}
+
+# Read the file that maps relative names to absolute names for
+# separate source and object directories and for shadow trees.
+if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
+	my ($relname, $absname);
+	while() {
+		chop();
+		($relname, $absname) = (split())[0..1];
+		$relname =~ s:^/+::;
+		$source_map{$relname} = $absname;
+	}
+	close(SOURCE_MAP);
+}
+
+if ($output_selection == OUTPUT_EXPORTED ||
+    $output_selection == OUTPUT_INTERNAL) {
+
+    push(@export_file_list, @ARGV);
+
+    foreach (@export_file_list) {
+	chomp;
+	process_export_file($_);
+    }
+}
+
+foreach (@ARGV) {
+    chomp;
+    process_file($_);
+}
+if ($verbose && $errors) {
+  print STDERR "$errors errors\n";
+}
+if ($verbose && $warnings) {
+  print STDERR "$warnings warnings\n";
+}
+
+exit($output_mode eq "none" ? 0 : $errors);
diff --git a/scripts/kvm/kvm_flightrecorder b/scripts/kvm/kvm_flightrecorder
new file mode 100755
index 000000000..78ca3af9c
--- /dev/null
+++ b/scripts/kvm/kvm_flightrecorder
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+#
+# KVM Flight Recorder - ring buffer tracing script
+#
+# Copyright (C) 2012 IBM Corp
+#
+# Author: Stefan Hajnoczi 
+#
+# This script provides a command-line interface to kvm ftrace and is designed
+# to be used as a flight recorder that is always running.  To start in-memory
+# recording:
+#
+# sudo kvm_flightrecorder start 8192  # 8 MB per-cpu ring buffers
+#
+# The per-cpu ring buffer size can be given in KB as an optional argument to
+# the 'start' subcommand.
+#
+# To stop the flight recorder:
+#
+# sudo kvm_flightrecorder stop
+#
+# To dump the contents of the flight recorder (this can be done when the
+# recorder is stopped or while it is running):
+#
+# sudo kvm_flightrecorder dump >/path/to/dump.txt
+#
+# To observe the trace while it is running, use the 'tail' subcommand:
+#
+# sudo kvm_flightrecorder tail
+#
+# Note that the flight recorder may impact overall system performance by
+# consuming CPU cycles.  No disk I/O is performed since the ring buffer holds a
+# fixed-size in-memory trace.
+
+import sys
+import os
+
+tracing_dir = '/sys/kernel/debug/tracing'
+
+def trace_path(*args):
+    return os.path.join(tracing_dir, *args)
+
+def write_file(path, data):
+    open(path, 'wb').write(data)
+
+def enable_event(subsystem, event, enable):
+    write_file(trace_path('events', subsystem, event, 'enable'), '1' if enable else '0')
+
+def enable_subsystem(subsystem, enable):
+    write_file(trace_path('events', subsystem, 'enable'), '1' if enable else '0')
+
+def start_tracing():
+    enable_subsystem('kvm', True)
+    write_file(trace_path('tracing_on'), '1')
+
+def stop_tracing():
+    write_file(trace_path('tracing_on'), '0')
+    enable_subsystem('kvm', False)
+    write_file(trace_path('events', 'enable'), '0')
+    write_file(trace_path('current_tracer'), 'nop')
+
+def dump_trace():
+    tracefile = open(trace_path('trace'), 'r')
+    try:
+        lines = True
+        while lines:
+            lines = tracefile.readlines(64 * 1024)
+            sys.stdout.writelines(lines)
+    except KeyboardInterrupt:
+        pass
+
+def tail_trace():
+    try:
+        for line in open(trace_path('trace_pipe'), 'r'):
+            sys.stdout.write(line)
+    except KeyboardInterrupt:
+        pass
+
+def usage():
+    print('Usage: %s start [buffer_size_kb] | stop | dump | tail' % sys.argv[0])
+    print('Control the KVM flight recorder tracing.')
+    sys.exit(0)
+
+def main():
+    if len(sys.argv) < 2:
+        usage()
+
+    cmd = sys.argv[1]
+    if cmd == '--version':
+        print('kvm_flightrecorder version 1.0')
+        sys.exit(0)
+
+    if not os.path.isdir(tracing_dir):
+        print('Unable to tracing debugfs directory, try:')
+        print('mount -t debugfs none /sys/kernel/debug')
+        sys.exit(1)
+    if not os.access(tracing_dir, os.W_OK):
+        print('Unable to write to tracing debugfs directory, please run as root')
+        sys.exit(1)
+
+    if cmd == 'start':
+        stop_tracing() # clean up first
+
+        if len(sys.argv) == 3:
+            try:
+                buffer_size_kb = int(sys.argv[2])
+            except ValueError:
+                print('Invalid per-cpu trace buffer size in KB')
+                sys.exit(1)
+            write_file(trace_path('buffer_size_kb'), str(buffer_size_kb))
+            print('Per-CPU ring buffer size set to %d KB' % buffer_size_kb)
+
+        start_tracing()
+        print('KVM flight recorder enabled')
+    elif cmd == 'stop':
+        stop_tracing()
+        print('KVM flight recorder disabled')
+    elif cmd == 'dump':
+        dump_trace()
+    elif cmd == 'tail':
+        tail_trace()
+    else:
+        usage()
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
new file mode 100755
index 000000000..6fe66d5f5
--- /dev/null
+++ b/scripts/kvm/vmxcap
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+#
+# tool for querying VMX capabilities
+#
+# Copyright 2009-2010 Red Hat, Inc.
+#
+# Authors:
+#  Avi Kivity 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+MSR_IA32_VMX_BASIC = 0x480
+MSR_IA32_VMX_PINBASED_CTLS = 0x481
+MSR_IA32_VMX_PROCBASED_CTLS = 0x482
+MSR_IA32_VMX_EXIT_CTLS = 0x483
+MSR_IA32_VMX_ENTRY_CTLS = 0x484
+MSR_IA32_VMX_MISC_CTLS = 0x485
+MSR_IA32_VMX_PROCBASED_CTLS2 = 0x48B
+MSR_IA32_VMX_EPT_VPID_CAP = 0x48C
+MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D
+MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E
+MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
+MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
+MSR_IA32_VMX_VMFUNC = 0x491
+
+class msr(object):
+    def __init__(self):
+        try:
+            self.f = open('/dev/cpu/0/msr', 'rb', 0)
+        except:
+            self.f = open('/dev/msr0', 'rb', 0)
+    def read(self, index, default = None):
+        import struct
+        self.f.seek(index)
+        try:
+            return struct.unpack('Q', self.f.read(8))[0]
+        except:
+            return default
+
+class Control(object):
+    def __init__(self, name, bits, cap_msr, true_cap_msr = None):
+        self.name = name
+        self.bits = bits
+        self.cap_msr = cap_msr
+        self.true_cap_msr = true_cap_msr
+    def read2(self, nr):
+        m = msr()
+        val = m.read(nr, 0)
+        return (val & 0xffffffff, val >> 32)
+    def show(self):
+        print(self.name)
+        mb1, cb1 = self.read2(self.cap_msr)
+        tmb1, tcb1 = 0, 0
+        if self.true_cap_msr:
+            tmb1, tcb1 = self.read2(self.true_cap_msr)
+        for bit in sorted(self.bits.keys()):
+            zero = not (mb1 & (1 << bit))
+            one = cb1 & (1 << bit)
+            true_zero = not (tmb1 & (1 << bit))
+            true_one = tcb1 & (1 << bit)
+            s= '?'
+            if (self.true_cap_msr and true_zero and true_one
+                and one and not zero):
+                s = 'default'
+            elif zero and not one:
+                s = 'no'
+            elif one and not zero:
+                s = 'forced'
+            elif one and zero:
+                s = 'yes'
+            print('  %-40s %s' % (self.bits[bit], s))
+
+class Misc(object):
+    def __init__(self, name, bits, msr):
+        self.name = name
+        self.bits = bits
+        self.msr = msr
+    def show(self):
+        print(self.name)
+        value = msr().read(self.msr, 0)
+        print('  Hex: 0x%x' % (value))
+        def first_bit(key):
+            if type(key) is tuple:
+                return key[0]
+            else:
+                return key
+        for bits in sorted(self.bits.keys(), key = first_bit):
+            if type(bits) is tuple:
+                lo, hi = bits
+                fmt = int
+            else:
+                lo = hi = bits
+                def fmt(x):
+                    return { True: 'yes', False: 'no' }[x]
+            v = (value >> lo) & ((1 << (hi - lo + 1)) - 1)
+            print('  %-40s %s' % (self.bits[bits], fmt(v)))
+
+controls = [
+    Misc(
+        name = 'Basic VMX Information',
+        bits = {
+            (0, 30): 'Revision',
+            (32,44): 'VMCS size',
+            48: 'VMCS restricted to 32 bit addresses',
+            49: 'Dual-monitor support',
+            (50, 53): 'VMCS memory type',
+            54: 'INS/OUTS instruction information',
+            55: 'IA32_VMX_TRUE_*_CTLS support',
+            },
+        msr = MSR_IA32_VMX_BASIC,
+        ),
+    Control(
+        name = 'pin-based controls',
+        bits = {
+            0: 'External interrupt exiting',
+            3: 'NMI exiting',
+            5: 'Virtual NMIs',
+            6: 'Activate VMX-preemption timer',
+            7: 'Process posted interrupts',
+            },
+        cap_msr = MSR_IA32_VMX_PINBASED_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+        ),
+
+    Control(
+        name = 'primary processor-based controls',
+        bits = {
+            2: 'Interrupt window exiting',
+            3: 'Use TSC offsetting',
+            7: 'HLT exiting',
+            9: 'INVLPG exiting',
+            10: 'MWAIT exiting',
+            11: 'RDPMC exiting',
+            12: 'RDTSC exiting',
+            15: 'CR3-load exiting',
+            16: 'CR3-store exiting',
+            19: 'CR8-load exiting',
+            20: 'CR8-store exiting',
+            21: 'Use TPR shadow',
+            22: 'NMI-window exiting',
+            23: 'MOV-DR exiting',
+            24: 'Unconditional I/O exiting',
+            25: 'Use I/O bitmaps',
+            27: 'Monitor trap flag',
+            28: 'Use MSR bitmaps',
+            29: 'MONITOR exiting',
+            30: 'PAUSE exiting',
+            31: 'Activate secondary control',
+            },
+        cap_msr = MSR_IA32_VMX_PROCBASED_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+        ),
+
+    Control(
+        name = 'secondary processor-based controls',
+        bits = {
+            0: 'Virtualize APIC accesses',
+            1: 'Enable EPT',
+            2: 'Descriptor-table exiting',
+            3: 'Enable RDTSCP',
+            4: 'Virtualize x2APIC mode',
+            5: 'Enable VPID',
+            6: 'WBINVD exiting',
+            7: 'Unrestricted guest',
+            8: 'APIC register emulation',
+            9: 'Virtual interrupt delivery',
+            10: 'PAUSE-loop exiting',
+            11: 'RDRAND exiting',
+            12: 'Enable INVPCID',
+            13: 'Enable VM functions',
+            14: 'VMCS shadowing',
+            15: 'Enable ENCLS exiting',
+            16: 'RDSEED exiting',
+            17: 'Enable PML',
+            18: 'EPT-violation #VE',
+            19: 'Conceal non-root operation from PT',
+            20: 'Enable XSAVES/XRSTORS',
+            22: 'Mode-based execute control (XS/XU)',
+            23: 'Sub-page write permissions',
+            24: 'GPA translation for PT',
+            25: 'TSC scaling',
+            26: 'User wait and pause',
+            28: 'ENCLV exiting',
+            },
+        cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
+        ),
+
+    Control(
+        name = 'VM-Exit controls',
+        bits = {
+            2: 'Save debug controls',
+            9: 'Host address-space size',
+            12: 'Load IA32_PERF_GLOBAL_CTRL',
+            15: 'Acknowledge interrupt on exit',
+            18: 'Save IA32_PAT',
+            19: 'Load IA32_PAT',
+            20: 'Save IA32_EFER',
+            21: 'Load IA32_EFER',
+            22: 'Save VMX-preemption timer value',
+            23: 'Clear IA32_BNDCFGS',
+            24: 'Conceal VM exits from PT',
+            25: 'Clear IA32_RTIT_CTL',
+            },
+        cap_msr = MSR_IA32_VMX_EXIT_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
+        ),
+
+    Control(
+        name = 'VM-Entry controls',
+        bits = {
+            2: 'Load debug controls',
+            9: 'IA-32e mode guest',
+            10: 'Entry to SMM',
+            11: 'Deactivate dual-monitor treatment',
+            13: 'Load IA32_PERF_GLOBAL_CTRL',
+            14: 'Load IA32_PAT',
+            15: 'Load IA32_EFER',
+            16: 'Load IA32_BNDCFGS',
+            17: 'Conceal VM entries from PT',
+            18: 'Load IA32_RTIT_CTL',
+            },
+        cap_msr = MSR_IA32_VMX_ENTRY_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+        ),
+
+    Misc(
+        name = 'Miscellaneous data',
+        bits = {
+            (0,4): 'VMX-preemption timer scale (log2)',
+            5: 'Store EFER.LMA into IA-32e mode guest control',
+            6: 'HLT activity state',
+            7: 'Shutdown activity state',
+            8: 'Wait-for-SIPI activity state',
+            14: 'PT in VMX operation',
+            15: 'IA32_SMBASE support',
+            (16,24): 'Number of CR3-target values',
+            (25,27): 'MSR-load/store count recommendation',
+            28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1',
+            29: 'VMWRITE to VM-exit information fields',
+            30: 'Inject event with insn length=0',
+            (32,63): 'MSEG revision identifier',
+            },
+        msr = MSR_IA32_VMX_MISC_CTLS,
+        ),
+
+    Misc(
+        name = 'VPID and EPT capabilities',
+        bits = {
+            0: 'Execute-only EPT translations',
+            6: 'Page-walk length 4',
+            8: 'Paging-structure memory type UC',
+            14: 'Paging-structure memory type WB',
+            16: '2MB EPT pages',
+            17: '1GB EPT pages',
+            20: 'INVEPT supported',
+            21: 'EPT accessed and dirty flags',
+            22: 'Advanced VM-exit information for EPT violations',
+            25: 'Single-context INVEPT',
+            26: 'All-context INVEPT',
+            32: 'INVVPID supported',
+            40: 'Individual-address INVVPID',
+            41: 'Single-context INVVPID',
+            42: 'All-context INVVPID',
+            43: 'Single-context-retaining-globals INVVPID',
+            },
+        msr = MSR_IA32_VMX_EPT_VPID_CAP,
+        ),
+    Misc(
+        name = 'VM Functions',
+        bits = {
+            0: 'EPTP Switching',
+            },
+        msr = MSR_IA32_VMX_VMFUNC,
+        ),
+    ]
+
+if __name__ == '__main__':
+    for c in controls:
+        c.show()
diff --git a/scripts/make-release b/scripts/make-release
new file mode 100755
index 000000000..a2a8cda33
--- /dev/null
+++ b/scripts/make-release
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+#
+# QEMU Release Script
+#
+# Copyright IBM, Corp. 2012
+#
+# Authors:
+#  Anthony Liguori 
+#
+# This work is licensed under the terms of the GNU GPLv2 or later.
+# See the COPYING file in the top-level directory.
+
+src="$1"
+version="$2"
+destination=qemu-${version}
+
+git clone "${src}" ${destination}
+pushd ${destination}
+git checkout "v${version}"
+git submodule update --init
+(cd roms/seabios && git describe --tags --long --dirty > .version)
+(cd roms/skiboot && ./make_version.sh > .version)
+# Fetch edk2 submodule's submodules, since it won't have access to them via
+# the tarball later.
+#
+# A more uniform way to handle this sort of situation would be nice, but we
+# don't necessarily have much control over how a submodule handles its
+# submodule dependencies, so we continue to handle these on a case-by-case
+# basis for now.
+(cd roms/edk2 && git submodule update --init)
+popd
+tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination}
+rm -rf ${destination}
diff --git a/scripts/meson.build b/scripts/meson.build
new file mode 100644
index 000000000..e8cc63896
--- /dev/null
+++ b/scripts/meson.build
@@ -0,0 +1,3 @@
+if 'CONFIG_TRACE_SYSTEMTAP' in config_host
+  install_data('qemu-trace-stap', install_dir: get_option('bindir'))
+endif
diff --git a/scripts/minikconf.py b/scripts/minikconf.py
new file mode 100644
index 000000000..bcd91015d
--- /dev/null
+++ b/scripts/minikconf.py
@@ -0,0 +1,711 @@
+#!/usr/bin/env python3
+#
+# Mini-Kconfig parser
+#
+# Copyright (c) 2015 Red Hat Inc.
+#
+# Authors:
+#  Paolo Bonzini 
+#
+# This work is licensed under the terms of the GNU GPL, version 2
+# or, at your option, any later version.  See the COPYING file in
+# the top-level directory.
+
+import os
+import sys
+import re
+import random
+
+__all__ = [ 'KconfigDataError', 'KconfigParserError',
+            'KconfigData', 'KconfigParser' ,
+            'defconfig', 'allyesconfig', 'allnoconfig', 'randconfig' ]
+
+def debug_print(*args):
+    #print('# ' + (' '.join(str(x) for x in args)))
+    pass
+
+# -------------------------------------------
+# KconfigData implements the Kconfig semantics.  For now it can only
+# detect undefined symbols, i.e. symbols that were referenced in
+# assignments or dependencies but were not declared with "config FOO".
+#
+# Semantic actions are represented by methods called do_*.  The do_var
+# method return the semantic value of a variable (which right now is
+# just its name).
+# -------------------------------------------
+
+class KconfigDataError(Exception):
+    def __init__(self, msg):
+        self.msg = msg
+
+    def __str__(self):
+        return self.msg
+
+allyesconfig = lambda x: True
+allnoconfig = lambda x: False
+defconfig = lambda x: x
+randconfig = lambda x: random.randint(0, 1) == 1
+
+class KconfigData:
+    class Expr:
+        def __and__(self, rhs):
+            return KconfigData.AND(self, rhs)
+        def __or__(self, rhs):
+            return KconfigData.OR(self, rhs)
+        def __invert__(self):
+            return KconfigData.NOT(self)
+
+        # Abstract methods
+        def add_edges_to(self, var):
+            pass
+        def evaluate(self):
+            assert False
+
+    class AND(Expr):
+        def __init__(self, lhs, rhs):
+            self.lhs = lhs
+            self.rhs = rhs
+        def __str__(self):
+            return "(%s && %s)" % (self.lhs, self.rhs)
+
+        def add_edges_to(self, var):
+            self.lhs.add_edges_to(var)
+            self.rhs.add_edges_to(var)
+        def evaluate(self):
+            return self.lhs.evaluate() and self.rhs.evaluate()
+
+    class OR(Expr):
+        def __init__(self, lhs, rhs):
+            self.lhs = lhs
+            self.rhs = rhs
+        def __str__(self):
+            return "(%s || %s)" % (self.lhs, self.rhs)
+
+        def add_edges_to(self, var):
+            self.lhs.add_edges_to(var)
+            self.rhs.add_edges_to(var)
+        def evaluate(self):
+            return self.lhs.evaluate() or self.rhs.evaluate()
+
+    class NOT(Expr):
+        def __init__(self, lhs):
+            self.lhs = lhs
+        def __str__(self):
+            return "!%s" % (self.lhs)
+
+        def add_edges_to(self, var):
+            self.lhs.add_edges_to(var)
+        def evaluate(self):
+            return not self.lhs.evaluate()
+
+    class Var(Expr):
+        def __init__(self, name):
+            self.name = name
+            self.value = None
+            self.outgoing = set()
+            self.clauses_for_var = list()
+        def __str__(self):
+            return self.name
+
+        def has_value(self):
+            return not (self.value is None)
+        def set_value(self, val, clause):
+            self.clauses_for_var.append(clause)
+            if self.has_value() and self.value != val:
+                print("The following clauses were found for " + self.name)
+                for i in self.clauses_for_var:
+                    print("    " + str(i), file=sys.stderr)
+                raise KconfigDataError('contradiction between clauses when setting %s' % self)
+            debug_print("=> %s is now %s" % (self.name, val))
+            self.value = val
+
+        # depth first search of the dependency graph
+        def dfs(self, visited, f):
+            if self in visited:
+                return
+            visited.add(self)
+            for v in self.outgoing:
+                v.dfs(visited, f)
+            f(self)
+
+        def add_edges_to(self, var):
+            self.outgoing.add(var)
+        def evaluate(self):
+            if not self.has_value():
+                raise KconfigDataError('cycle found including %s' % self)
+            return self.value
+
+    class Clause:
+        def __init__(self, dest):
+            self.dest = dest
+        def priority(self):
+            return 0
+        def process(self):
+            pass
+
+    class AssignmentClause(Clause):
+        def __init__(self, dest, value):
+            KconfigData.Clause.__init__(self, dest)
+            self.value = value
+        def __str__(self):
+            return "CONFIG_%s=%s" % (self.dest, 'y' if self.value else 'n')
+
+        def process(self):
+            self.dest.set_value(self.value, self)
+
+    class DefaultClause(Clause):
+        def __init__(self, dest, value, cond=None):
+            KconfigData.Clause.__init__(self, dest)
+            self.value = value
+            self.cond = cond
+            if not (self.cond is None):
+                self.cond.add_edges_to(self.dest)
+        def __str__(self):
+            value = 'y' if self.value else 'n'
+            if self.cond is None:
+                return "config %s default %s" % (self.dest, value)
+            else:
+                return "config %s default %s if %s" % (self.dest, value, self.cond)
+
+        def priority(self):
+            # Defaults are processed just before leaving the variable
+            return -1
+        def process(self):
+            if not self.dest.has_value() and \
+                    (self.cond is None or self.cond.evaluate()):
+                self.dest.set_value(self.value, self)
+
+    class DependsOnClause(Clause):
+        def __init__(self, dest, expr):
+            KconfigData.Clause.__init__(self, dest)
+            self.expr = expr
+            self.expr.add_edges_to(self.dest)
+        def __str__(self):
+            return "config %s depends on %s" % (self.dest, self.expr)
+
+        def process(self):
+            if not self.expr.evaluate():
+                self.dest.set_value(False, self)
+
+    class SelectClause(Clause):
+        def __init__(self, dest, cond):
+            KconfigData.Clause.__init__(self, dest)
+            self.cond = cond
+            self.cond.add_edges_to(self.dest)
+        def __str__(self):
+            return "select %s if %s" % (self.dest, self.cond)
+
+        def process(self):
+            if self.cond.evaluate():
+                self.dest.set_value(True, self)
+
+    def __init__(self, value_mangler=defconfig):
+        self.value_mangler = value_mangler
+        self.previously_included = []
+        self.incl_info = None
+        self.defined_vars = set()
+        self.referenced_vars = dict()
+        self.clauses = list()
+
+    # semantic analysis -------------
+
+    def check_undefined(self):
+        undef = False
+        for i in self.referenced_vars:
+            if not (i in self.defined_vars):
+                print("undefined symbol %s" % (i), file=sys.stderr)
+                undef = True
+        return undef
+
+    def compute_config(self):
+        if self.check_undefined():
+            raise KconfigDataError("there were undefined symbols")
+            return None
+
+        debug_print("Input:")
+        for clause in self.clauses:
+            debug_print(clause)
+
+        debug_print("\nDependency graph:")
+        for i in self.referenced_vars:
+            debug_print(i, "->", [str(x) for x in self.referenced_vars[i].outgoing])
+
+        # The reverse of the depth-first order is the topological sort
+        dfo = dict()
+        visited = set()
+        debug_print("\n")
+        def visit_fn(var):
+            debug_print(var, "has DFS number", len(dfo))
+            dfo[var] = len(dfo)
+
+        for name, v in self.referenced_vars.items():
+            self.do_default(v, False)
+            v.dfs(visited, visit_fn)
+
+        # Put higher DFS numbers and higher priorities first.  This
+        # places the clauses in topological order and places defaults
+        # after assignments and dependencies.
+        self.clauses.sort(key=lambda x: (-dfo[x.dest], -x.priority()))
+
+        debug_print("\nSorted clauses:")
+        for clause in self.clauses:
+            debug_print(clause)
+            clause.process()
+
+        debug_print("")
+        values = dict()
+        for name, v in self.referenced_vars.items():
+            debug_print("Evaluating", name)
+            values[name] = v.evaluate()
+
+        return values
+
+    # semantic actions -------------
+
+    def do_declaration(self, var):
+        if (var in self.defined_vars):
+            raise KconfigDataError('variable "' + var + '" defined twice')
+
+        self.defined_vars.add(var.name)
+
+    # var is a string with the variable's name.
+    def do_var(self, var):
+        if (var in self.referenced_vars):
+            return self.referenced_vars[var]
+
+        var_obj = self.referenced_vars[var] = KconfigData.Var(var)
+        return var_obj
+
+    def do_assignment(self, var, val):
+        self.clauses.append(KconfigData.AssignmentClause(var, val))
+
+    def do_default(self, var, val, cond=None):
+        val = self.value_mangler(val)
+        self.clauses.append(KconfigData.DefaultClause(var, val, cond))
+
+    def do_depends_on(self, var, expr):
+        self.clauses.append(KconfigData.DependsOnClause(var, expr))
+
+    def do_select(self, var, symbol, cond=None):
+        cond = (cond & var) if cond is not None else var
+        self.clauses.append(KconfigData.SelectClause(symbol, cond))
+
+    def do_imply(self, var, symbol, cond=None):
+        # "config X imply Y [if COND]" is the same as
+        # "config Y default y if X [&& COND]"
+        cond = (cond & var) if cond is not None else var
+        self.do_default(symbol, True, cond)
+
+# -------------------------------------------
+# KconfigParser implements a recursive descent parser for (simplified)
+# Kconfig syntax.
+# -------------------------------------------
+
+# tokens table
+TOKENS = {}
+TOK_NONE = -1
+TOK_LPAREN = 0;   TOKENS[TOK_LPAREN] = '"("';
+TOK_RPAREN = 1;   TOKENS[TOK_RPAREN] = '")"';
+TOK_EQUAL = 2;    TOKENS[TOK_EQUAL] = '"="';
+TOK_AND = 3;      TOKENS[TOK_AND] = '"&&"';
+TOK_OR = 4;       TOKENS[TOK_OR] = '"||"';
+TOK_NOT = 5;      TOKENS[TOK_NOT] = '"!"';
+TOK_DEPENDS = 6;  TOKENS[TOK_DEPENDS] = '"depends"';
+TOK_ON = 7;       TOKENS[TOK_ON] = '"on"';
+TOK_SELECT = 8;   TOKENS[TOK_SELECT] = '"select"';
+TOK_IMPLY = 9;    TOKENS[TOK_IMPLY] = '"imply"';
+TOK_CONFIG = 10;  TOKENS[TOK_CONFIG] = '"config"';
+TOK_DEFAULT = 11; TOKENS[TOK_DEFAULT] = '"default"';
+TOK_Y = 12;       TOKENS[TOK_Y] = '"y"';
+TOK_N = 13;       TOKENS[TOK_N] = '"n"';
+TOK_SOURCE = 14;  TOKENS[TOK_SOURCE] = '"source"';
+TOK_BOOL = 15;    TOKENS[TOK_BOOL] = '"bool"';
+TOK_IF = 16;      TOKENS[TOK_IF] = '"if"';
+TOK_ID = 17;      TOKENS[TOK_ID] = 'identifier';
+TOK_EOF = 18;     TOKENS[TOK_EOF] = 'end of file';
+
+class KconfigParserError(Exception):
+    def __init__(self, parser, msg, tok=None):
+        self.loc = parser.location()
+        tok = tok or parser.tok
+        if tok != TOK_NONE:
+            location = TOKENS.get(tok, None) or ('"%s"' % tok)
+            msg = '%s before %s' % (msg, location)
+        self.msg = msg
+
+    def __str__(self):
+        return "%s: %s" % (self.loc, self.msg)
+
+class KconfigParser:
+
+    @classmethod
+    def parse(self, fp, mode=None):
+        data = KconfigData(mode or KconfigParser.defconfig)
+        parser = KconfigParser(data)
+        parser.parse_file(fp)
+        return data
+
+    def __init__(self, data):
+        self.data = data
+
+    def parse_file(self, fp):
+        self.abs_fname = os.path.abspath(fp.name)
+        self.fname = fp.name
+        self.data.previously_included.append(self.abs_fname)
+        self.src = fp.read()
+        if self.src == '' or self.src[-1] != '\n':
+            self.src += '\n'
+        self.cursor = 0
+        self.line = 1
+        self.line_pos = 0
+        self.get_token()
+        self.parse_config()
+
+    def do_assignment(self, var, val):
+        if not var.startswith("CONFIG_"):
+            raise Error('assigned variable should start with CONFIG_')
+        var = self.data.do_var(var[7:])
+        self.data.do_assignment(var, val)
+
+    # file management -----
+
+    def error_path(self):
+        inf = self.data.incl_info
+        res = ""
+        while inf:
+            res = ("In file included from %s:%d:\n" % (inf['file'],
+                                                       inf['line'])) + res
+            inf = inf['parent']
+        return res
+
+    def location(self):
+        col = 1
+        for ch in self.src[self.line_pos:self.pos]:
+            if ch == '\t':
+                col += 8 - ((col - 1) % 8)
+            else:
+                col += 1
+        return '%s%s:%d:%d' %(self.error_path(), self.fname, self.line, col)
+
+    def do_include(self, include):
+        incl_abs_fname = os.path.join(os.path.dirname(self.abs_fname),
+                                      include)
+        # catch inclusion cycle
+        inf = self.data.incl_info
+        while inf:
+            if incl_abs_fname == os.path.abspath(inf['file']):
+                raise KconfigParserError(self, "Inclusion loop for %s"
+                                    % include)
+            inf = inf['parent']
+
+        # skip multiple include of the same file
+        if incl_abs_fname in self.data.previously_included:
+            return
+        try:
+            fp = open(incl_abs_fname, 'rt', encoding='utf-8')
+        except IOError as e:
+            raise KconfigParserError(self,
+                                '%s: %s' % (e.strerror, include))
+
+        inf = self.data.incl_info
+        self.data.incl_info = { 'file': self.fname, 'line': self.line,
+                'parent': inf }
+        KconfigParser(self.data).parse_file(fp)
+        self.data.incl_info = inf
+
+    # recursive descent parser -----
+
+    # y_or_n: Y | N
+    def parse_y_or_n(self):
+        if self.tok == TOK_Y:
+            self.get_token()
+            return True
+        if self.tok == TOK_N:
+            self.get_token()
+            return False
+        raise KconfigParserError(self, 'Expected "y" or "n"')
+
+    # var: ID
+    def parse_var(self):
+        if self.tok == TOK_ID:
+            val = self.val
+            self.get_token()
+            return self.data.do_var(val)
+        else:
+            raise KconfigParserError(self, 'Expected identifier')
+
+    # assignment_var: ID (starting with "CONFIG_")
+    def parse_assignment_var(self):
+        if self.tok == TOK_ID:
+            val = self.val
+            if not val.startswith("CONFIG_"):
+                raise KconfigParserError(self,
+                           'Expected identifier starting with "CONFIG_"', TOK_NONE)
+            self.get_token()
+            return self.data.do_var(val[7:])
+        else:
+            raise KconfigParserError(self, 'Expected identifier')
+
+    # assignment: var EQUAL y_or_n
+    def parse_assignment(self):
+        var = self.parse_assignment_var()
+        if self.tok != TOK_EQUAL:
+            raise KconfigParserError(self, 'Expected "="')
+        self.get_token()
+        self.data.do_assignment(var, self.parse_y_or_n())
+
+    # primary: NOT primary
+    #       | LPAREN expr RPAREN
+    #       | var
+    def parse_primary(self):
+        if self.tok == TOK_NOT:
+            self.get_token()
+            val = ~self.parse_primary()
+        elif self.tok == TOK_LPAREN:
+            self.get_token()
+            val = self.parse_expr()
+            if self.tok != TOK_RPAREN:
+                raise KconfigParserError(self, 'Expected ")"')
+            self.get_token()
+        elif self.tok == TOK_ID:
+            val = self.parse_var()
+        else:
+            raise KconfigParserError(self, 'Expected "!" or "(" or identifier')
+        return val
+
+    # disj: primary (OR primary)*
+    def parse_disj(self):
+        lhs = self.parse_primary()
+        while self.tok == TOK_OR:
+            self.get_token()
+            lhs = lhs | self.parse_primary()
+        return lhs
+
+    # expr: disj (AND disj)*
+    def parse_expr(self):
+        lhs = self.parse_disj()
+        while self.tok == TOK_AND:
+            self.get_token()
+            lhs = lhs & self.parse_disj()
+        return lhs
+
+    # condition: IF expr
+    #       | empty
+    def parse_condition(self):
+        if self.tok == TOK_IF:
+            self.get_token()
+            return self.parse_expr()
+        else:
+            return None
+
+    # property: DEFAULT y_or_n condition
+    #       | DEPENDS ON expr
+    #       | SELECT var condition
+    #       | BOOL
+    def parse_property(self, var):
+        if self.tok == TOK_DEFAULT:
+            self.get_token()
+            val = self.parse_y_or_n()
+            cond = self.parse_condition()
+            self.data.do_default(var, val, cond)
+        elif self.tok == TOK_DEPENDS:
+            self.get_token()
+            if self.tok != TOK_ON:
+                raise KconfigParserError(self, 'Expected "on"')
+            self.get_token()
+            self.data.do_depends_on(var, self.parse_expr())
+        elif self.tok == TOK_SELECT:
+            self.get_token()
+            symbol = self.parse_var()
+            cond = self.parse_condition()
+            self.data.do_select(var, symbol, cond)
+        elif self.tok == TOK_IMPLY:
+            self.get_token()
+            symbol = self.parse_var()
+            cond = self.parse_condition()
+            self.data.do_imply(var, symbol, cond)
+        elif self.tok == TOK_BOOL:
+            self.get_token()
+        else:
+            raise KconfigParserError(self, 'Error in recursive descent?')
+
+    # properties: properties property
+    #       | /* empty */
+    def parse_properties(self, var):
+        had_default = False
+        while self.tok == TOK_DEFAULT or self.tok == TOK_DEPENDS or \
+              self.tok == TOK_SELECT or self.tok == TOK_BOOL or \
+              self.tok == TOK_IMPLY:
+            self.parse_property(var)
+
+        # for nicer error message
+        if self.tok != TOK_SOURCE and self.tok != TOK_CONFIG and \
+           self.tok != TOK_ID and self.tok != TOK_EOF:
+            raise KconfigParserError(self, 'expected "source", "config", identifier, '
+                    + '"default", "depends on", "imply" or "select"')
+
+    # declaration: config var properties
+    def parse_declaration(self):
+        if self.tok == TOK_CONFIG:
+            self.get_token()
+            var = self.parse_var()
+            self.data.do_declaration(var)
+            self.parse_properties(var)
+        else:
+            raise KconfigParserError(self, 'Error in recursive descent?')
+
+    # clause: SOURCE
+    #       | declaration
+    #       | assignment
+    def parse_clause(self):
+        if self.tok == TOK_SOURCE:
+            val = self.val
+            self.get_token()
+            self.do_include(val)
+        elif self.tok == TOK_CONFIG:
+            self.parse_declaration()
+        elif self.tok == TOK_ID:
+            self.parse_assignment()
+        else:
+            raise KconfigParserError(self, 'expected "source", "config" or identifier')
+
+    # config: clause+ EOF
+    def parse_config(self):
+        while self.tok != TOK_EOF:
+            self.parse_clause()
+        return self.data
+
+    # scanner -----
+
+    def get_token(self):
+        while True:
+            self.tok = self.src[self.cursor]
+            self.pos = self.cursor
+            self.cursor += 1
+
+            self.val = None
+            self.tok = self.scan_token()
+            if self.tok is not None:
+                return
+
+    def check_keyword(self, rest):
+        if not self.src.startswith(rest, self.cursor):
+            return False
+        length = len(rest)
+        if self.src[self.cursor + length].isalnum() or self.src[self.cursor + length] == '_':
+            return False
+        self.cursor += length
+        return True
+
+    def scan_token(self):
+        if self.tok == '#':
+            self.cursor = self.src.find('\n', self.cursor)
+            return None
+        elif self.tok == '=':
+            return TOK_EQUAL
+        elif self.tok == '(':
+            return TOK_LPAREN
+        elif self.tok == ')':
+            return TOK_RPAREN
+        elif self.tok == '&' and self.src[self.pos+1] == '&':
+            self.cursor += 1
+            return TOK_AND
+        elif self.tok == '|' and self.src[self.pos+1] == '|':
+            self.cursor += 1
+            return TOK_OR
+        elif self.tok == '!':
+            return TOK_NOT
+        elif self.tok == 'd' and self.check_keyword("epends"):
+            return TOK_DEPENDS
+        elif self.tok == 'o' and self.check_keyword("n"):
+            return TOK_ON
+        elif self.tok == 's' and self.check_keyword("elect"):
+            return TOK_SELECT
+        elif self.tok == 'i' and self.check_keyword("mply"):
+            return TOK_IMPLY
+        elif self.tok == 'c' and self.check_keyword("onfig"):
+            return TOK_CONFIG
+        elif self.tok == 'd' and self.check_keyword("efault"):
+            return TOK_DEFAULT
+        elif self.tok == 'b' and self.check_keyword("ool"):
+            return TOK_BOOL
+        elif self.tok == 'i' and self.check_keyword("f"):
+            return TOK_IF
+        elif self.tok == 'y' and self.check_keyword(""):
+            return TOK_Y
+        elif self.tok == 'n' and self.check_keyword(""):
+            return TOK_N
+        elif (self.tok == 's' and self.check_keyword("ource")) or \
+              self.tok == 'i' and self.check_keyword("nclude"):
+            # source FILENAME
+            # include FILENAME
+            while self.src[self.cursor].isspace():
+                self.cursor += 1
+            start = self.cursor
+            self.cursor = self.src.find('\n', self.cursor)
+            self.val = self.src[start:self.cursor]
+            return TOK_SOURCE
+        elif self.tok.isalnum():
+            # identifier
+            while self.src[self.cursor].isalnum() or self.src[self.cursor] == '_':
+                self.cursor += 1
+            self.val = self.src[self.pos:self.cursor]
+            return TOK_ID
+        elif self.tok == '\n':
+            if self.cursor == len(self.src):
+                return TOK_EOF
+            self.line += 1
+            self.line_pos = self.cursor
+        elif not self.tok.isspace():
+            raise KconfigParserError(self, 'invalid input')
+
+        return None
+
+if __name__ == '__main__':
+    argv = sys.argv
+    mode = defconfig
+    if len(sys.argv) > 1:
+        if argv[1] == '--defconfig':
+            del argv[1]
+        elif argv[1] == '--randconfig':
+            random.seed()
+            mode = randconfig
+            del argv[1]
+        elif argv[1] == '--allyesconfig':
+            mode = allyesconfig
+            del argv[1]
+        elif argv[1] == '--allnoconfig':
+            mode = allnoconfig
+            del argv[1]
+
+    if len(argv) == 1:
+        print ("%s: at least one argument is required" % argv[0], file=sys.stderr)
+        sys.exit(1)
+
+    if argv[1].startswith('-'):
+        print ("%s: invalid option %s" % (argv[0], argv[1]), file=sys.stderr)
+        sys.exit(1)
+
+    data = KconfigData(mode)
+    parser = KconfigParser(data)
+    external_vars = set()
+    for arg in argv[3:]:
+        m = re.match(r'^(CONFIG_[A-Z0-9_]+)=([yn]?)$', arg)
+        if m is not None:
+            name, value = m.groups()
+            parser.do_assignment(name, value == 'y')
+            external_vars.add(name[7:])
+        else:
+            fp = open(arg, 'rt', encoding='utf-8')
+            parser.parse_file(fp)
+            fp.close()
+
+    config = data.compute_config()
+    for key in sorted(config.keys()):
+        if key not in external_vars and config[key]:
+            print ('CONFIG_%s=y' % key)
+
+    deps = open(argv[2], 'wt', encoding='utf-8')
+    for fname in data.previously_included:
+        print ('%s: %s' % (argv[1], fname), file=deps)
+    deps.close()
diff --git a/scripts/modules/module_block.py b/scripts/modules/module_block.py
new file mode 100644
index 000000000..1109df827
--- /dev/null
+++ b/scripts/modules/module_block.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+#
+# Module information generator
+#
+# Copyright Red Hat, Inc. 2015 - 2016
+#
+# Authors:
+#  Marc Mari 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+import sys
+import os
+
+def get_string_struct(line):
+    data = line.split()
+
+    # data[0] -> struct element name
+    # data[1] -> =
+    # data[2] -> value
+
+    return data[2].replace('"', '')[:-1]
+
+def add_module(fheader, library, format_name, protocol_name):
+    lines = []
+    lines.append('.library_name = "' + library + '",')
+    if format_name != "":
+        lines.append('.format_name = "' + format_name + '",')
+    if protocol_name != "":
+        lines.append('.protocol_name = "' + protocol_name + '",')
+
+    text = '\n        '.join(lines)
+    fheader.write('\n    {\n        ' + text + '\n    },')
+
+def process_file(fheader, filename):
+    # This parser assumes the coding style rules are being followed
+    with open(filename, "r") as cfile:
+        found_start = False
+        library, _ = os.path.splitext(os.path.basename(filename))
+        for line in cfile:
+            if found_start:
+                line = line.replace('\n', '')
+                if line.find(".format_name") != -1:
+                    format_name = get_string_struct(line)
+                elif line.find(".protocol_name") != -1:
+                    protocol_name = get_string_struct(line)
+                elif line == "};":
+                    add_module(fheader, library, format_name, protocol_name)
+                    found_start = False
+            elif line.find("static BlockDriver") != -1:
+                found_start = True
+                format_name = ""
+                protocol_name = ""
+
+def print_top(fheader):
+    fheader.write('''/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+/*
+ * QEMU Block Module Infrastructure
+ *
+ * Authors:
+ *  Marc Mari       
+ */
+
+''')
+
+    fheader.write('''#ifndef QEMU_MODULE_BLOCK_H
+#define QEMU_MODULE_BLOCK_H
+
+static const struct {
+    const char *format_name;
+    const char *protocol_name;
+    const char *library_name;
+} block_driver_modules[] = {''')
+
+def print_bottom(fheader):
+    fheader.write('''
+};
+
+#endif
+''')
+
+if __name__ == '__main__':
+    # First argument: output file
+    # All other arguments: modules source files (.c)
+    output_file = sys.argv[1]
+    with open(output_file, 'w') as fheader:
+        print_top(fheader)
+
+        for filename in sys.argv[2:]:
+            if os.path.isfile(filename):
+                process_file(fheader, filename)
+            else:
+                print("File " + filename + " does not exist.", file=sys.stderr)
+                sys.exit(1)
+
+        print_bottom(fheader)
+
+    sys.exit(0)
diff --git a/scripts/mtest2make.py b/scripts/mtest2make.py
new file mode 100644
index 000000000..25ee6887c
--- /dev/null
+++ b/scripts/mtest2make.py
@@ -0,0 +1,131 @@
+#! /usr/bin/env python3
+
+# Create Makefile targets to run tests, from Meson's test introspection data.
+#
+# Author: Paolo Bonzini 
+
+from collections import defaultdict
+import itertools
+import json
+import os
+import shlex
+import sys
+
+class Suite(object):
+    def __init__(self):
+        self.tests = list()
+        self.slow_tests = list()
+        self.executables = set()
+
+print('''
+SPEED = quick
+
+# $1 = environment, $2 = test command, $3 = test name, $4 = dir
+.test-human-tap = $1 $(if $4,(cd $4 && $2),$2) < /dev/null | ./scripts/tap-driver.pl --test-name="$3" $(if $(V),,--show-failures-only)
+.test-human-exitcode = $1 $(PYTHON) scripts/test-driver.py $(if $4,-C$4) $(if $(V),--verbose) -- $2 < /dev/null
+.test-tap-tap = $1 $(if $4,(cd $4 && $2),$2) < /dev/null | sed "s/^[a-z][a-z]* [0-9]*/& $3/" || true
+.test-tap-exitcode = printf "%s\\n" 1..1 "`$1 $(if $4,(cd $4 && $2),$2) < /dev/null > /dev/null || echo "not "`ok 1 $3"
+.test.human-print = echo $(if $(V),'$1 $2','Running test $3') &&
+.test.env = MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$(( $${RANDOM:-0} % 255 + 1))}
+
+# $1 = test name, $2 = test target (human or tap)
+.test.run = $(call .test.$2-print,$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1)) $(call .test-$2-$(.test.driver.$1),$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1),$(.test.dir.$1))
+
+.test.output-format = human
+''')
+
+introspect = json.load(sys.stdin)
+i = 0
+
+def process_tests(test, targets, suites):
+    global i
+    env = ' '.join(('%s=%s' % (shlex.quote(k), shlex.quote(v))
+                    for k, v in test['env'].items()))
+    executable = test['cmd'][0]
+    try:
+        executable = os.path.relpath(executable)
+    except:
+        pass
+    if test['workdir'] is not None:
+        try:
+            test['cmd'][0] = os.path.relpath(executable, test['workdir'])
+        except:
+            test['cmd'][0] = executable
+    else:
+        test['cmd'][0] = executable
+    cmd = ' '.join((shlex.quote(x) for x in test['cmd']))
+    driver = test['protocol'] if 'protocol' in test else 'exitcode'
+
+    i += 1
+    if test['workdir'] is not None:
+        print('.test.dir.%d := %s' % (i, shlex.quote(test['workdir'])))
+
+    if 'depends' in test:
+        deps = (targets.get(x, []) for x in test['depends'])
+        deps = itertools.chain.from_iterable(deps)
+    else:
+        deps = ['all']
+
+    print('.test.name.%d := %s' % (i, test['name']))
+    print('.test.driver.%d := %s' % (i, driver))
+    print('.test.env.%d := $(.test.env) %s' % (i, env))
+    print('.test.cmd.%d := %s' % (i, cmd))
+    print('.test.deps.%d := %s' % (i, ' '.join(deps)))
+    print('.PHONY: run-test-%d' % (i,))
+    print('run-test-%d: $(.test.deps.%d)' % (i,i))
+    print('\t@$(call .test.run,%d,$(.test.output-format))' % (i,))
+
+    test_suites = test['suite'] or ['default']
+    is_slow = any(s.endswith('-slow') for s in test_suites)
+    for s in test_suites:
+        # The suite name in the introspection info is "PROJECT:SUITE"
+        s = s.split(':')[1]
+        if s.endswith('-slow'):
+            s = s[:-5]
+        if is_slow:
+            suites[s].slow_tests.append(i)
+        else:
+            suites[s].tests.append(i)
+        suites[s].executables.add(executable)
+
+def emit_prolog(suites, prefix):
+    all_tap = ' '.join(('%s-report-%s.tap' % (prefix, k) for k in suites.keys()))
+    print('.PHONY: %s %s-report.tap %s' % (prefix, prefix, all_tap))
+    print('%s: run-tests' % (prefix,))
+    print('%s-report.tap %s: %s-report%%.tap: all' % (prefix, all_tap, prefix))
+    print('''\t$(MAKE) .test.output-format=tap --quiet -Otarget V=1 %s$* | ./scripts/tap-merge.pl | tee "$@" \\
+              | ./scripts/tap-driver.pl $(if $(V),, --show-failures-only)''' % (prefix, ))
+
+def emit_suite(name, suite, prefix):
+    executables = ' '.join(suite.executables)
+    slow_test_numbers = ' '.join((str(x) for x in suite.slow_tests))
+    test_numbers = ' '.join((str(x) for x in suite.tests))
+    target = '%s-%s' % (prefix, name)
+    print('.test.quick.%s := %s' % (target, test_numbers))
+    print('.test.slow.%s := $(.test.quick.%s) %s' % (target, target, slow_test_numbers))
+    print('%s-build: %s' % (prefix, executables))
+    print('.PHONY: %s' % (target, ))
+    print('.PHONY: %s-report-%s.tap' % (prefix, name))
+    print('%s: run-tests' % (target, ))
+    print('ifneq ($(filter %s %s, $(MAKECMDGOALS)),)' % (target, prefix))
+    print('.tests += $(.test.$(SPEED).%s)' % (target, ))
+    print('endif')
+
+targets = {t['id']: [os.path.relpath(f) for f in t['filename']]
+           for t in introspect['targets']}
+
+testsuites = defaultdict(Suite)
+for test in introspect['tests']:
+    process_tests(test, targets, testsuites)
+emit_prolog(testsuites, 'check')
+for name, suite in testsuites.items():
+    emit_suite(name, suite, 'check')
+
+benchsuites = defaultdict(Suite)
+for test in introspect['benchmarks']:
+    process_tests(test, targets, benchsuites)
+emit_prolog(benchsuites, 'bench')
+for name, suite in benchsuites.items():
+    emit_suite(name, suite, 'bench')
+
+print('run-tests: $(patsubst %, run-test-%, $(.tests))')
diff --git a/scripts/nsis.py b/scripts/nsis.py
new file mode 100644
index 000000000..5135a0583
--- /dev/null
+++ b/scripts/nsis.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import glob
+import os
+import shutil
+import subprocess
+import tempfile
+
+
+def signcode(path):
+    cmd = os.environ.get("SIGNCODE")
+    if not cmd:
+        return
+    subprocess.run([cmd, path])
+
+
+def main():
+    parser = argparse.ArgumentParser(description="QEMU NSIS build helper.")
+    parser.add_argument("outfile")
+    parser.add_argument("prefix")
+    parser.add_argument("srcdir")
+    parser.add_argument("cpu")
+    parser.add_argument("nsisargs", nargs="*")
+    args = parser.parse_args()
+
+    destdir = tempfile.mkdtemp()
+    try:
+        subprocess.run(["make", "install", "DESTDIR=" + destdir + os.path.sep])
+        with open(
+            os.path.join(destdir + args.prefix, "system-emulations.nsh"), "w"
+        ) as nsh:
+            for exe in glob.glob(
+                os.path.join(destdir + args.prefix, "qemu-system-*.exe")
+            ):
+                exe = os.path.basename(exe)
+                arch = exe[12:-4]
+                nsh.write(
+                    """
+                Section "{0}" Section_{0}
+                SetOutPath "$INSTDIR"
+                File "${{BINDIR}}\\{1}"
+                SectionEnd
+                """.format(
+                        arch, exe
+                    )
+                )
+
+        for exe in glob.glob(os.path.join(destdir + args.prefix, "*.exe")):
+            signcode(exe)
+
+        makensis = [
+            "makensis",
+            "-V2",
+            "-NOCD",
+            "-DSRCDIR=" + args.srcdir,
+            "-DBINDIR=" + destdir + args.prefix,
+        ]
+        dlldir = "w32"
+        if args.cpu == "x86_64":
+            dlldir = "w64"
+            makensis += ["-DW64"]
+        if os.path.exists(os.path.join(args.srcdir, "dll")):
+            makensis += ["-DDLLDIR={0}/dll/{1}".format(args.srcdir, dlldir)]
+
+        makensis += ["-DOUTFILE=" + args.outfile] + args.nsisargs
+        subprocess.run(makensis)
+        signcode(args.outfile)
+    finally:
+        shutil.rmtree(destdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh
new file mode 100755
index 000000000..c1af43fde
--- /dev/null
+++ b/scripts/oss-fuzz/build.sh
@@ -0,0 +1,113 @@
+#!/bin/sh -e
+#
+# OSS-Fuzz build script. See:
+# https://google.github.io/oss-fuzz/getting-started/new-project-guide/#buildsh
+#
+# The file is consumed by:
+# https://github.com/google/oss-fuzz/blob/master/projects/qemu/Dockerfiles
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+#
+
+# build project
+# e.g.
+# ./autogen.sh
+# ./configure
+# make -j$(nproc) all
+
+# build fuzzers
+# e.g.
+# $CXX $CXXFLAGS -std=c++11 -Iinclude \
+#     /path/to/name_of_fuzzer.cc -o $OUT/name_of_fuzzer \
+#     -fsanitize=fuzzer /path/to/library.a
+
+fatal () {
+    echo "Error : ${*}, exiting."
+    exit 1
+}
+
+OSS_FUZZ_BUILD_DIR="./build-oss-fuzz/"
+
+# There seems to be a bug in clang-11 (used for builds on oss-fuzz) :
+#   accel/tcg/cputlb.o: In function `load_memop':
+#   accel/tcg/cputlb.c:1505: undefined reference to `qemu_build_not_reached'
+#
+# When building with optimization, the compiler is expected to prove that the
+# statement cannot be reached, and remove it. For some reason clang-11 doesn't
+# remove it, resulting in an unresolved reference to qemu_build_not_reached
+# Undefine the __OPTIMIZE__ macro which compiler.h relies on to choose whether
+# to " #define qemu_build_not_reached()  g_assert_not_reached() "
+EXTRA_CFLAGS="$CFLAGS -U __OPTIMIZE__"
+
+if ! { [ -e "./COPYING" ] &&
+   [ -e "./MAINTAINERS" ] &&
+   [ -e "./Makefile" ] &&
+   [ -e "./docs" ] &&
+   [ -e "./VERSION" ] &&
+   [ -e "./linux-user" ] &&
+   [ -e "./softmmu" ];} ; then
+    fatal "Please run the script from the top of the QEMU tree"
+fi
+
+mkdir -p $OSS_FUZZ_BUILD_DIR || fatal "mkdir $OSS_FUZZ_BUILD_DIR failed"
+cd $OSS_FUZZ_BUILD_DIR || fatal "cd $OSS_FUZZ_BUILD_DIR failed"
+
+
+if [ -z ${OUT+x} ]; then
+    DEST_DIR=$(realpath "./DEST_DIR")
+else
+    DEST_DIR=$OUT
+fi
+
+mkdir -p "$DEST_DIR/lib/"  # Copy the shared libraries here
+
+# Build once to get the list of dynamic lib paths, and copy them over
+../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
+    --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
+    --extra-cflags="$EXTRA_CFLAGS" --target-list="i386-softmmu"
+
+if ! make "-j$(nproc)" qemu-fuzz-i386; then
+    fatal "Build failed. Please specify a compiler with fuzzing support"\
+          "using the \$CC and \$CXX environment variables"\
+          "\nFor example: CC=clang CXX=clang++ $0"
+fi
+
+for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do
+    cp "$i" "$DEST_DIR/lib/"
+done
+rm qemu-fuzz-i386
+
+# Build a second time to build the final binary with correct rpath
+../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
+    --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
+    --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \
+    --target-list="i386-softmmu"
+make "-j$(nproc)" qemu-fuzz-i386 V=1
+
+# Copy over the datadir
+cp  -r ../pc-bios/ "$DEST_DIR/pc-bios"
+
+targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/  {print $2}')
+base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)"
+
+cp "./qemu-fuzz-i386" "$base_copy"
+
+# Run the fuzzer with no arguments, to print the help-string and get the list
+# of available fuzz-targets. Copy over the qemu-fuzz-i386, naming it according
+# to each available fuzz target (See 05509c8e6d fuzz: select fuzz target using
+# executable name)
+for target in $(echo "$targets" | tail -n +2);
+do
+    # Ignore the generic-fuzz target, as it requires some environment variables
+    # to be configured. We have some generic-fuzz-{pc-q35, floppy, ...} targets
+    # that are thin wrappers around this target that set the required
+    # environment variables according to predefined configs.
+    if [ "$target" != "generic-fuzz" ]; then
+        ln  $base_copy \
+            "$DEST_DIR/qemu-fuzz-i386-target-$target"
+    fi
+done
+
+echo "Done. The fuzzers are located in $DEST_DIR"
+exit 0
diff --git a/scripts/oss-fuzz/minimize_qtest_trace.py b/scripts/oss-fuzz/minimize_qtest_trace.py
new file mode 100755
index 000000000..5e405a0d5
--- /dev/null
+++ b/scripts/oss-fuzz/minimize_qtest_trace.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+This takes a crashing qtest trace and tries to remove superflous operations
+"""
+
+import sys
+import os
+import subprocess
+import time
+import struct
+
+QEMU_ARGS = None
+QEMU_PATH = None
+TIMEOUT = 5
+CRASH_TOKEN = None
+
+write_suffix_lookup = {"b": (1, "B"),
+                       "w": (2, "H"),
+                       "l": (4, "L"),
+                       "q": (8, "Q")}
+
+def usage():
+    sys.exit("""\
+Usage: QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} input_trace output_trace
+By default, will try to use the second-to-last line in the output to identify
+whether the crash occred. Optionally, manually set a string that idenitifes the
+crash by setting CRASH_TOKEN=
+""".format((sys.argv[0])))
+
+def check_if_trace_crashes(trace, path):
+    global CRASH_TOKEN
+    with open(path, "w") as tracefile:
+        tracefile.write("".join(trace))
+
+    rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
+    < {trace_path}".format(timeout=TIMEOUT,
+                           qemu_path=QEMU_PATH,
+                           qemu_args=QEMU_ARGS,
+                           trace_path=path),
+                          shell=True,
+                          stdin=subprocess.PIPE,
+                          stdout=subprocess.PIPE)
+    stdo = rc.communicate()[0]
+    output = stdo.decode('unicode_escape')
+    if rc.returncode == 137:    # Timed Out
+        return False
+    if len(output.splitlines()) < 2:
+        return False
+
+    if CRASH_TOKEN is None:
+        CRASH_TOKEN = output.splitlines()[-2]
+
+    return CRASH_TOKEN in output
+
+
+def minimize_trace(inpath, outpath):
+    global TIMEOUT
+    with open(inpath) as f:
+        trace = f.readlines()
+    start = time.time()
+    if not check_if_trace_crashes(trace, outpath):
+        sys.exit("The input qtest trace didn't cause a crash...")
+    end = time.time()
+    print("Crashed in {} seconds".format(end-start))
+    TIMEOUT = (end-start)*5
+    print("Setting the timeout for {} seconds".format(TIMEOUT))
+    print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
+
+    i = 0
+    newtrace = trace[:]
+    # For each line
+    while i < len(newtrace):
+        # 1.) Try to remove it completely and reproduce the crash. If it works,
+        # we're done.
+        prior = newtrace[i]
+        print("Trying to remove {}".format(newtrace[i]))
+        # Try to remove the line completely
+        newtrace[i] = ""
+        if check_if_trace_crashes(newtrace, outpath):
+            i += 1
+            continue
+        newtrace[i] = prior
+
+        # 2.) Try to replace write{bwlq} commands with a write addr, len
+        # command. Since this can require swapping endianness, try both LE and
+        # BE options. We do this, so we can "trim" the writes in (3)
+        if (newtrace[i].startswith("write") and not
+            newtrace[i].startswith("write ")):
+            suffix = newtrace[i].split()[0][-1]
+            assert(suffix in write_suffix_lookup)
+            addr = int(newtrace[i].split()[1], 16)
+            value = int(newtrace[i].split()[2], 16)
+            for endianness in ['<', '>']:
+                data = struct.pack("{end}{size}".format(end=endianness,
+                                   size=write_suffix_lookup[suffix][1]),
+                                   value)
+                newtrace[i] = "write {addr} {size} 0x{data}\n".format(
+                    addr=hex(addr),
+                    size=hex(write_suffix_lookup[suffix][0]),
+                    data=data.hex())
+                if(check_if_trace_crashes(newtrace, outpath)):
+                    break
+            else:
+                newtrace[i] = prior
+
+        # 3.) If it is a qtest write command: write addr len data, try to split
+        # it into two separate write commands. If splitting the write down the
+        # middle does not work, try to move the pivot "left" and retry, until
+        # there is no space left. The idea is to prune unneccessary bytes from
+        # long writes, while accommodating arbitrary MemoryRegion access sizes
+        # and alignments.
+        if newtrace[i].startswith("write "):
+            addr = int(newtrace[i].split()[1], 16)
+            length = int(newtrace[i].split()[2], 16)
+            data = newtrace[i].split()[3][2:]
+            if length > 1:
+                leftlength = int(length/2)
+                rightlength = length - leftlength
+                newtrace.insert(i+1, "")
+                while leftlength > 0:
+                    newtrace[i] = "write {addr} {size} 0x{data}\n".format(
+                            addr=hex(addr),
+                            size=hex(leftlength),
+                            data=data[:leftlength*2])
+                    newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
+                            addr=hex(addr+leftlength),
+                            size=hex(rightlength),
+                            data=data[leftlength*2:])
+                    if check_if_trace_crashes(newtrace, outpath):
+                        break
+                    else:
+                        leftlength -= 1
+                        rightlength += 1
+                if check_if_trace_crashes(newtrace, outpath):
+                    i -= 1
+                else:
+                    newtrace[i] = prior
+                    del newtrace[i+1]
+        i += 1
+    check_if_trace_crashes(newtrace, outpath)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        usage()
+
+    QEMU_PATH = os.getenv("QEMU_PATH")
+    QEMU_ARGS = os.getenv("QEMU_ARGS")
+    if QEMU_PATH is None or QEMU_ARGS is None:
+        usage()
+    # if "accel" not in QEMU_ARGS:
+    #     QEMU_ARGS += " -accel qtest"
+    CRASH_TOKEN = os.getenv("CRASH_TOKEN")
+    QEMU_ARGS += " -qtest stdio -monitor none -serial none "
+    minimize_trace(sys.argv[1], sys.argv[2])
diff --git a/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py
new file mode 100755
index 000000000..890e1def8
--- /dev/null
+++ b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Use this to convert qtest log info from a generic fuzzer input into a qtest
+trace that you can feed into a standard qemu-system process. Example usage:
+
+QEMU_FUZZ_ARGS="-machine q35,accel=qtest" QEMU_FUZZ_OBJECTS="*" \
+        ./i386-softmmu/qemu-fuzz-i386 --fuzz-target=generic-pci-fuzz
+# .. Finds some crash
+QTEST_LOG=1 FUZZ_SERIALIZE_QTEST=1 \
+QEMU_FUZZ_ARGS="-machine q35,accel=qtest" QEMU_FUZZ_OBJECTS="*" \
+        ./i386-softmmu/qemu-fuzz-i386 --fuzz-target=generic-pci-fuzz
+        /path/to/crash 2> qtest_log_output
+scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py qtest_log_output > qtest_trace
+./i386-softmmu/qemu-fuzz-i386 -machine q35,accel=qtest \
+        -qtest stdin < qtest_trace
+
+### Details ###
+
+Some fuzzer make use of hooks that allow us to populate some memory range, just
+before a DMA read from that range. This means that the fuzzer can produce
+activity that looks like:
+    [start] read from mmio addr
+    [end]   read from mmio addr
+    [start] write to pio addr
+        [start] fill a DMA buffer just in time
+        [end]   fill a DMA buffer just in time
+        [start] fill a DMA buffer just in time
+        [end]   fill a DMA buffer just in time
+    [end]   write to pio addr
+    [start] read from mmio addr
+    [end]   read from mmio addr
+
+We annotate these "nested" DMA writes, so with QTEST_LOG=1 the QTest trace
+might look something like:
+[R +0.028431] readw 0x10000
+[R +0.028434] outl 0xc000 0xbeef  # Triggers a DMA read from 0xbeef and 0xbf00
+[DMA][R +0.034639] write 0xbeef 0x2 0xAAAA
+[DMA][R +0.034639] write 0xbf00 0x2 0xBBBB
+[R +0.028431] readw 0xfc000
+
+This script would reorder the above trace so it becomes:
+readw 0x10000
+write 0xbeef 0x2 0xAAAA
+write 0xbf00 0x2 0xBBBB
+outl 0xc000 0xbeef
+readw 0xfc000
+
+I.e. by the time, 0xc000 tries to read from DMA, those DMA buffers have already
+been set up, removing the need for the DMA hooks. We can simply provide this
+reordered trace via -qtest stdio to reproduce the input
+
+Note: this won't work for traces where the device tries to read from the same
+DMA region twice in between MMIO/PIO commands. E.g:
+    [R +0.028434] outl 0xc000 0xbeef
+    [DMA][R +0.034639] write 0xbeef 0x2 0xAAAA
+    [DMA][R +0.034639] write 0xbeef 0x2 0xBBBB
+
+The fuzzer will annotate suspected double-fetches with [DOUBLE-FETCH]. This
+script looks for these tags and warns the users that the resulting trace might
+not reproduce the bug.
+"""
+
+import sys
+
+__author__     = "Alexander Bulekov "
+__copyright__  = "Copyright (C) 2020, Red Hat, Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Alexander Bulekov"
+__email__      = "alxndr@bu.edu"
+
+
+def usage():
+    sys.exit("Usage: {} /path/to/qtest_log_output".format((sys.argv[0])))
+
+
+def main(filename):
+    with open(filename, "r") as f:
+        trace = f.readlines()
+
+    # Leave only lines that look like logged qtest commands
+    trace[:] = [x.strip() for x in trace if "[R +" in x
+                or "[S +" in x and "CLOSED" not in x]
+
+    for i in range(len(trace)):
+        if i+1 < len(trace):
+            if "[DMA]" in trace[i+1]:
+                if "[DOUBLE-FETCH]" in trace[i+1]:
+                    sys.stderr.write("Warning: Likely double fetch on line"
+                                     "{}.\n There will likely be problems "
+                                     "reproducing behavior with the "
+                                     "resulting qtest trace\n\n".format(i+1))
+                trace[i], trace[i+1] = trace[i+1], trace[i]
+    for line in trace:
+        print(line.split("]")[-1].strip())
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        usage()
+    main(sys.argv[1])
diff --git a/scripts/performance/dissect.py b/scripts/performance/dissect.py
new file mode 100755
index 000000000..bf24f5092
--- /dev/null
+++ b/scripts/performance/dissect.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+
+#  Print the percentage of instructions spent in each phase of QEMU
+#  execution.
+#
+#  Syntax:
+#  dissect.py [-h] --  [] \
+#                    []
+#
+#  [-h] - Print the script arguments help message.
+#
+#  Example of usage:
+#  dissect.py -- qemu-arm coulomb_double-arm
+#
+#  This file is a part of the project "TCG Continuous Benchmarking".
+#
+#  Copyright (C) 2020  Ahmed Karaman 
+#  Copyright (C) 2020  Aleksandar Markovic 
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program. If not, see .
+
+import argparse
+import os
+import subprocess
+import sys
+import tempfile
+
+
+def get_JIT_line(callgrind_data):
+    """
+    Search for the first instance of the JIT call in
+    the callgrind_annotate output when ran using --tree=caller
+    This is equivalent to the self number of instructions of JIT.
+
+    Parameters:
+    callgrind_data (list): callgrind_annotate output
+
+    Returns:
+    (int): Line number
+    """
+    line = -1
+    for i in range(len(callgrind_data)):
+        if callgrind_data[i].strip('\n') and \
+                callgrind_data[i].split()[-1] == "[???]":
+            line = i
+            break
+    if line == -1:
+        sys.exit("Couldn't locate the JIT call ... Exiting.")
+    return line
+
+
+def main():
+    # Parse the command line arguments
+    parser = argparse.ArgumentParser(
+        usage='dissect.py [-h] -- '
+        ' [] '
+        ' []')
+
+    parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS)
+
+    args = parser.parse_args()
+
+    # Extract the needed variables from the args
+    command = args.command
+
+    # Insure that valgrind is installed
+    check_valgrind = subprocess.run(
+        ["which", "valgrind"], stdout=subprocess.DEVNULL)
+    if check_valgrind.returncode:
+        sys.exit("Please install valgrind before running the script.")
+
+    # Save all intermediate files in a temporary directory
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # callgrind output file path
+        data_path = os.path.join(tmpdirname, "callgrind.data")
+        # callgrind_annotate output file path
+        annotate_out_path = os.path.join(tmpdirname, "callgrind_annotate.out")
+
+        # Run callgrind
+        callgrind = subprocess.run((["valgrind",
+                                     "--tool=callgrind",
+                                     "--callgrind-out-file=" + data_path]
+                                    + command),
+                                   stdout=subprocess.DEVNULL,
+                                   stderr=subprocess.PIPE)
+        if callgrind.returncode:
+            sys.exit(callgrind.stderr.decode("utf-8"))
+
+        # Save callgrind_annotate output
+        with open(annotate_out_path, "w") as output:
+            callgrind_annotate = subprocess.run(
+                ["callgrind_annotate", data_path, "--tree=caller"],
+                stdout=output,
+                stderr=subprocess.PIPE)
+            if callgrind_annotate.returncode:
+                sys.exit(callgrind_annotate.stderr.decode("utf-8"))
+
+        # Read the callgrind_annotate output to callgrind_data[]
+        callgrind_data = []
+        with open(annotate_out_path, 'r') as data:
+            callgrind_data = data.readlines()
+
+        # Line number with the total number of instructions
+        total_instructions_line_number = 20
+        # Get the total number of instructions
+        total_instructions_line_data = \
+            callgrind_data[total_instructions_line_number]
+        total_instructions = total_instructions_line_data.split()[0]
+        total_instructions = int(total_instructions.replace(',', ''))
+
+        # Line number with the JIT self number of instructions
+        JIT_self_instructions_line_number = get_JIT_line(callgrind_data)
+        # Get the JIT self number of instructions
+        JIT_self_instructions_line_data = \
+            callgrind_data[JIT_self_instructions_line_number]
+        JIT_self_instructions = JIT_self_instructions_line_data.split()[0]
+        JIT_self_instructions = int(JIT_self_instructions.replace(',', ''))
+
+        # Line number with the JIT self + inclusive number of instructions
+        # It's the line above the first JIT call when running with --tree=caller
+        JIT_total_instructions_line_number = JIT_self_instructions_line_number-1
+        # Get the JIT self + inclusive number of instructions
+        JIT_total_instructions_line_data = \
+            callgrind_data[JIT_total_instructions_line_number]
+        JIT_total_instructions = JIT_total_instructions_line_data.split()[0]
+        JIT_total_instructions = int(JIT_total_instructions.replace(',', ''))
+
+        # Calculate number of instructions in helpers and code generation
+        helpers_instructions = JIT_total_instructions-JIT_self_instructions
+        code_generation_instructions = total_instructions-JIT_total_instructions
+
+        # Print results (Insert commas in large numbers)
+        # Print total number of instructions
+        print('{:<20}{:>20}\n'.
+              format("Total Instructions:",
+                     format(total_instructions, ',')))
+        # Print code generation instructions and percentage
+        print('{:<20}{:>20}\t{:>6.3f}%'.
+              format("Code Generation:",
+                     format(code_generation_instructions, ","),
+                     (code_generation_instructions / total_instructions) * 100))
+        # Print JIT instructions and percentage
+        print('{:<20}{:>20}\t{:>6.3f}%'.
+              format("JIT Execution:",
+                     format(JIT_self_instructions, ","),
+                     (JIT_self_instructions / total_instructions) * 100))
+        # Print helpers instructions and percentage
+        print('{:<20}{:>20}\t{:>6.3f}%'.
+              format("Helpers:",
+                     format(helpers_instructions, ","),
+                     (helpers_instructions/total_instructions)*100))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/performance/topN_callgrind.py b/scripts/performance/topN_callgrind.py
new file mode 100755
index 000000000..67c59197a
--- /dev/null
+++ b/scripts/performance/topN_callgrind.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+
+#  Print the top N most executed functions in QEMU using callgrind.
+#  Syntax:
+#  topN_callgrind.py [-h] [-n]   -- \
+#            [] \
+#            []
+#
+#  [-h] - Print the script arguments help message.
+#  [-n] - Specify the number of top functions to print.
+#       - If this flag is not specified, the tool defaults to 25.
+#
+#  Example of usage:
+#  topN_callgrind.py -n 20 -- qemu-arm coulomb_double-arm
+#
+#  This file is a part of the project "TCG Continuous Benchmarking".
+#
+#  Copyright (C) 2020  Ahmed Karaman 
+#  Copyright (C) 2020  Aleksandar Markovic 
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program. If not, see .
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+# Parse the command line arguments
+parser = argparse.ArgumentParser(
+    usage='topN_callgrind.py [-h] [-n]   -- '
+          ' [] '
+          ' []')
+
+parser.add_argument('-n', dest='top', type=int, default=25,
+                    help='Specify the number of top functions to print.')
+
+parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS)
+
+args = parser.parse_args()
+
+# Extract the needed variables from the args
+command = args.command
+top = args.top
+
+# Insure that valgrind is installed
+check_valgrind_presence = subprocess.run(["which", "valgrind"],
+                                         stdout=subprocess.DEVNULL)
+if check_valgrind_presence.returncode:
+    sys.exit("Please install valgrind before running the script!")
+
+# Run callgrind
+callgrind = subprocess.run((
+    ["valgrind", "--tool=callgrind", "--callgrind-out-file=/tmp/callgrind.data"]
+    + command),
+    stdout=subprocess.DEVNULL,
+    stderr=subprocess.PIPE)
+if callgrind.returncode:
+    sys.exit(callgrind.stderr.decode("utf-8"))
+
+# Save callgrind_annotate output to /tmp/callgrind_annotate.out
+with open("/tmp/callgrind_annotate.out", "w") as output:
+    callgrind_annotate = subprocess.run(["callgrind_annotate",
+                                         "/tmp/callgrind.data"],
+                                        stdout=output,
+                                        stderr=subprocess.PIPE)
+    if callgrind_annotate.returncode:
+        os.unlink('/tmp/callgrind.data')
+        output.close()
+        os.unlink('/tmp/callgrind_annotate.out')
+        sys.exit(callgrind_annotate.stderr.decode("utf-8"))
+
+# Read the callgrind_annotate output to callgrind_data[]
+callgrind_data = []
+with open('/tmp/callgrind_annotate.out', 'r') as data:
+    callgrind_data = data.readlines()
+
+# Line number with the total number of instructions
+total_instructions_line_number = 20
+
+# Get the total number of instructions
+total_instructions_line_data = callgrind_data[total_instructions_line_number]
+total_number_of_instructions = total_instructions_line_data.split(' ')[0]
+total_number_of_instructions = int(
+    total_number_of_instructions.replace(',', ''))
+
+# Line number with the top function
+first_func_line = 25
+
+# Number of functions recorded by callgrind, last two lines are always empty
+number_of_functions = len(callgrind_data) - first_func_line - 2
+
+# Limit the number of top functions to "top"
+number_of_top_functions = (top if number_of_functions >
+                           top else number_of_functions)
+
+# Store the data of the top functions in top_functions[]
+top_functions = callgrind_data[first_func_line:
+                               first_func_line + number_of_top_functions]
+
+# Print table header
+print('{:>4}  {:>10}  {:<30}  {}\n{}  {}  {}  {}'.format('No.',
+                                                         'Percentage',
+                                                         'Function Name',
+                                                         'Source File',
+                                                         '-' * 4,
+                                                         '-' * 10,
+                                                         '-' * 30,
+                                                         '-' * 30,
+                                                         ))
+
+# Print top N functions
+for (index, function) in enumerate(top_functions, start=1):
+    function_data = function.split()
+    # Calculate function percentage
+    function_instructions = float(function_data[0].replace(',', ''))
+    function_percentage = (function_instructions /
+                           total_number_of_instructions)*100
+    # Get function name and source files path
+    function_source_file, function_name = function_data[1].split(':')
+    # Print extracted data
+    print('{:>4}  {:>9.3f}%  {:<30}  {}'.format(index,
+                                                round(function_percentage, 3),
+                                                function_name,
+                                                function_source_file))
+
+# Remove intermediate files
+os.unlink('/tmp/callgrind.data')
+os.unlink('/tmp/callgrind_annotate.out')
diff --git a/scripts/performance/topN_perf.py b/scripts/performance/topN_perf.py
new file mode 100755
index 000000000..07be195fc
--- /dev/null
+++ b/scripts/performance/topN_perf.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+
+#  Print the top N most executed functions in QEMU using perf.
+#  Syntax:
+#  topN_perf.py [-h] [-n]   -- \
+#            [] \
+#            []
+#
+#  [-h] - Print the script arguments help message.
+#  [-n] - Specify the number of top functions to print.
+#       - If this flag is not specified, the tool defaults to 25.
+#
+#  Example of usage:
+#  topN_perf.py -n 20 -- qemu-arm coulomb_double-arm
+#
+#  This file is a part of the project "TCG Continuous Benchmarking".
+#
+#  Copyright (C) 2020  Ahmed Karaman 
+#  Copyright (C) 2020  Aleksandar Markovic 
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program. If not, see .
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+# Parse the command line arguments
+parser = argparse.ArgumentParser(
+    usage='topN_perf.py [-h] [-n]   -- '
+          ' [] '
+          ' []')
+
+parser.add_argument('-n', dest='top', type=int, default=25,
+                    help='Specify the number of top functions to print.')
+
+parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS)
+
+args = parser.parse_args()
+
+# Extract the needed variables from the args
+command = args.command
+top = args.top
+
+# Insure that perf is installed
+check_perf_presence = subprocess.run(["which", "perf"],
+                                     stdout=subprocess.DEVNULL)
+if check_perf_presence.returncode:
+    sys.exit("Please install perf before running the script!")
+
+# Insure user has previllage to run perf
+check_perf_executability = subprocess.run(["perf", "stat", "ls", "/"],
+                                          stdout=subprocess.DEVNULL,
+                                          stderr=subprocess.DEVNULL)
+if check_perf_executability.returncode:
+    sys.exit(
+"""
+Error:
+You may not have permission to collect stats.
+
+Consider tweaking /proc/sys/kernel/perf_event_paranoid,
+which controls use of the performance events system by
+unprivileged users (without CAP_SYS_ADMIN).
+
+  -1: Allow use of (almost) all events by all users
+      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+   0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+      Disallow raw tracepoint access by users without CAP_SYS_ADMIN
+   1: Disallow CPU event access by users without CAP_SYS_ADMIN
+   2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+To make this setting permanent, edit /etc/sysctl.conf too, e.g.:
+   kernel.perf_event_paranoid = -1
+
+* Alternatively, you can run this script under sudo privileges.
+"""
+)
+
+# Run perf record
+perf_record = subprocess.run((["perf", "record", "--output=/tmp/perf.data"] +
+                              command),
+                             stdout=subprocess.DEVNULL,
+                             stderr=subprocess.PIPE)
+if perf_record.returncode:
+    os.unlink('/tmp/perf.data')
+    sys.exit(perf_record.stderr.decode("utf-8"))
+
+# Save perf report output to /tmp/perf_report.out
+with open("/tmp/perf_report.out", "w") as output:
+    perf_report = subprocess.run(
+        ["perf", "report", "--input=/tmp/perf.data", "--stdio"],
+        stdout=output,
+        stderr=subprocess.PIPE)
+    if perf_report.returncode:
+        os.unlink('/tmp/perf.data')
+        output.close()
+        os.unlink('/tmp/perf_report.out')
+        sys.exit(perf_report.stderr.decode("utf-8"))
+
+# Read the reported data to functions[]
+functions = []
+with open("/tmp/perf_report.out", "r") as data:
+    # Only read lines that are not comments (comments start with #)
+    # Only read lines that are not empty
+    functions = [line for line in data.readlines() if line and line[0]
+                 != '#' and line[0] != "\n"]
+
+# Limit the number of top functions to "top"
+number_of_top_functions = top if len(functions) > top else len(functions)
+
+# Store the data of the top functions in top_functions[]
+top_functions = functions[:number_of_top_functions]
+
+# Print table header
+print('{:>4}  {:>10}  {:<30}  {}\n{}  {}  {}  {}'.format('No.',
+                                                         'Percentage',
+                                                         'Name',
+                                                         'Invoked by',
+                                                         '-' * 4,
+                                                         '-' * 10,
+                                                         '-' * 30,
+                                                         '-' * 25))
+
+# Print top N functions
+for (index, function) in enumerate(top_functions, start=1):
+    function_data = function.split()
+    function_percentage = function_data[0]
+    function_name = function_data[-1]
+    function_invoker = ' '.join(function_data[2:-2])
+    print('{:>4}  {:>10}  {:<30}  {}'.format(index,
+                                             function_percentage,
+                                             function_name,
+                                             function_invoker))
+
+# Remove intermediate files
+os.unlink('/tmp/perf.data')
+os.unlink('/tmp/perf_report.out')
diff --git a/scripts/qapi-gen.py b/scripts/qapi-gen.py
new file mode 100644
index 000000000..f3518d29a
--- /dev/null
+++ b/scripts/qapi-gen.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+"""
+QAPI code generation execution shim.
+
+This standalone script exists primarily to facilitate the running of the QAPI
+code generator without needing to install the python module to the current
+execution environment.
+"""
+
+import sys
+
+from qapi import main
+
+if __name__ == '__main__':
+    sys.exit(main.main())
diff --git a/scripts/qapi/.flake8 b/scripts/qapi/.flake8
new file mode 100644
index 000000000..6b158c68b
--- /dev/null
+++ b/scripts/qapi/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
diff --git a/scripts/qapi/.isort.cfg b/scripts/qapi/.isort.cfg
new file mode 100644
index 000000000..643caa1fb
--- /dev/null
+++ b/scripts/qapi/.isort.cfg
@@ -0,0 +1,7 @@
+[settings]
+force_grid_wrap=4
+force_sort_within_sections=True
+include_trailing_comma=True
+line_length=72
+lines_after_imports=2
+multi_line_output=3
diff --git a/scripts/qapi/__init__.py b/scripts/qapi/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
new file mode 100644
index 000000000..50978090b
--- /dev/null
+++ b/scripts/qapi/commands.py
@@ -0,0 +1,340 @@
+"""
+QAPI command marshaller generator
+
+Copyright IBM, Corp. 2011
+Copyright (C) 2014-2018 Red Hat, Inc.
+
+Authors:
+ Anthony Liguori 
+ Michael Roth 
+ Markus Armbruster 
+
+This work is licensed under the terms of the GNU GPL, version 2.
+See the COPYING file in the top-level directory.
+"""
+
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Set,
+)
+
+from .common import c_name, mcgen
+from .gen import (
+    QAPIGenC,
+    QAPIGenCCode,
+    QAPISchemaModularCVisitor,
+    build_params,
+    ifcontext,
+)
+from .schema import (
+    QAPISchema,
+    QAPISchemaFeature,
+    QAPISchemaObjectType,
+    QAPISchemaType,
+)
+from .source import QAPISourceInfo
+
+
+def gen_command_decl(name: str,
+                     arg_type: Optional[QAPISchemaObjectType],
+                     boxed: bool,
+                     ret_type: Optional[QAPISchemaType]) -> str:
+    return mcgen('''
+%(c_type)s qmp_%(c_name)s(%(params)s);
+''',
+                 c_type=(ret_type and ret_type.c_type()) or 'void',
+                 c_name=c_name(name),
+                 params=build_params(arg_type, boxed, 'Error **errp'))
+
+
+def gen_call(name: str,
+             arg_type: Optional[QAPISchemaObjectType],
+             boxed: bool,
+             ret_type: Optional[QAPISchemaType]) -> str:
+    ret = ''
+
+    argstr = ''
+    if boxed:
+        assert arg_type
+        argstr = '&arg, '
+    elif arg_type:
+        assert not arg_type.variants
+        for memb in arg_type.members:
+            if memb.optional:
+                argstr += 'arg.has_%s, ' % c_name(memb.name)
+            argstr += 'arg.%s, ' % c_name(memb.name)
+
+    lhs = ''
+    if ret_type:
+        lhs = 'retval = '
+
+    ret = mcgen('''
+
+    %(lhs)sqmp_%(c_name)s(%(args)s&err);
+    error_propagate(errp, err);
+''',
+                c_name=c_name(name), args=argstr, lhs=lhs)
+    if ret_type:
+        ret += mcgen('''
+    if (err) {
+        goto out;
+    }
+
+    qmp_marshal_output_%(c_name)s(retval, ret, errp);
+''',
+                     c_name=ret_type.c_name())
+    return ret
+
+
+def gen_marshal_output(ret_type: QAPISchemaType) -> str:
+    return mcgen('''
+
+static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
+                                QObject **ret_out, Error **errp)
+{
+    Visitor *v;
+
+    v = qobject_output_visitor_new(ret_out);
+    if (visit_type_%(c_name)s(v, "unused", &ret_in, errp)) {
+        visit_complete(v, ret_out);
+    }
+    visit_free(v);
+    v = qapi_dealloc_visitor_new();
+    visit_type_%(c_name)s(v, "unused", &ret_in, NULL);
+    visit_free(v);
+}
+''',
+                 c_type=ret_type.c_type(), c_name=ret_type.c_name())
+
+
+def build_marshal_proto(name: str) -> str:
+    return ('void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)'
+            % c_name(name))
+
+
+def gen_marshal_decl(name: str) -> str:
+    return mcgen('''
+%(proto)s;
+''',
+                 proto=build_marshal_proto(name))
+
+
+def gen_marshal(name: str,
+                arg_type: Optional[QAPISchemaObjectType],
+                boxed: bool,
+                ret_type: Optional[QAPISchemaType]) -> str:
+    have_args = boxed or (arg_type and not arg_type.is_empty())
+
+    ret = mcgen('''
+
+%(proto)s
+{
+    Error *err = NULL;
+    bool ok = false;
+    Visitor *v;
+''',
+                proto=build_marshal_proto(name))
+
+    if ret_type:
+        ret += mcgen('''
+    %(c_type)s retval;
+''',
+                     c_type=ret_type.c_type())
+
+    if have_args:
+        ret += mcgen('''
+    %(c_name)s arg = {0};
+''',
+                     c_name=arg_type.c_name())
+
+    ret += mcgen('''
+
+    v = qobject_input_visitor_new(QOBJECT(args));
+    if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
+        goto out;
+    }
+''')
+
+    if have_args:
+        ret += mcgen('''
+    if (visit_type_%(c_arg_type)s_members(v, &arg, errp)) {
+        ok = visit_check_struct(v, errp);
+    }
+''',
+                     c_arg_type=arg_type.c_name())
+    else:
+        ret += mcgen('''
+    ok = visit_check_struct(v, errp);
+''')
+
+    ret += mcgen('''
+    visit_end_struct(v, NULL);
+    if (!ok) {
+        goto out;
+    }
+''')
+
+    ret += gen_call(name, arg_type, boxed, ret_type)
+
+    ret += mcgen('''
+
+out:
+    visit_free(v);
+''')
+
+    ret += mcgen('''
+    v = qapi_dealloc_visitor_new();
+    visit_start_struct(v, NULL, NULL, 0, NULL);
+''')
+
+    if have_args:
+        ret += mcgen('''
+    visit_type_%(c_arg_type)s_members(v, &arg, NULL);
+''',
+                     c_arg_type=arg_type.c_name())
+
+    ret += mcgen('''
+    visit_end_struct(v, NULL);
+    visit_free(v);
+''')
+
+    ret += mcgen('''
+}
+''')
+    return ret
+
+
+def gen_register_command(name: str,
+                         success_response: bool,
+                         allow_oob: bool,
+                         allow_preconfig: bool,
+                         coroutine: bool) -> str:
+    options = []
+
+    if not success_response:
+        options += ['QCO_NO_SUCCESS_RESP']
+    if allow_oob:
+        options += ['QCO_ALLOW_OOB']
+    if allow_preconfig:
+        options += ['QCO_ALLOW_PRECONFIG']
+    if coroutine:
+        options += ['QCO_COROUTINE']
+
+    if not options:
+        options = ['QCO_NO_OPTIONS']
+
+    ret = mcgen('''
+    qmp_register_command(cmds, "%(name)s",
+                         qmp_marshal_%(c_name)s, %(opts)s);
+''',
+                name=name, c_name=c_name(name),
+                opts=" | ".join(options))
+    return ret
+
+
+def gen_registry(registry: str, prefix: str) -> str:
+    ret = mcgen('''
+
+void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds)
+{
+    QTAILQ_INIT(cmds);
+
+''',
+                c_prefix=c_name(prefix, protect=False))
+    ret += registry
+    ret += mcgen('''
+}
+''')
+    return ret
+
+
+class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor):
+    def __init__(self, prefix: str):
+        super().__init__(
+            prefix, 'qapi-commands',
+            ' * Schema-defined QAPI/QMP commands', None, __doc__)
+        self._regy = QAPIGenCCode(None)
+        self._visited_ret_types: Dict[QAPIGenC, Set[QAPISchemaType]] = {}
+
+    def _begin_user_module(self, name: str) -> None:
+        self._visited_ret_types[self._genc] = set()
+        commands = self._module_basename('qapi-commands', name)
+        types = self._module_basename('qapi-types', name)
+        visit = self._module_basename('qapi-visit', name)
+        self._genc.add(mcgen('''
+#include "qemu/osdep.h"
+#include "qapi/visitor.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/dealloc-visitor.h"
+#include "qapi/error.h"
+#include "%(visit)s.h"
+#include "%(commands)s.h"
+
+''',
+                             commands=commands, visit=visit))
+        self._genh.add(mcgen('''
+#include "%(types)s.h"
+
+''',
+                             types=types))
+
+    def visit_end(self) -> None:
+        self._add_system_module('init', ' * QAPI Commands initialization')
+        self._genh.add(mcgen('''
+#include "qapi/qmp/dispatch.h"
+
+void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds);
+''',
+                             c_prefix=c_name(self._prefix, protect=False)))
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "%(prefix)sqapi-commands.h"
+#include "%(prefix)sqapi-init-commands.h"
+''',
+                                      prefix=self._prefix))
+        self._genc.add(gen_registry(self._regy.get_content(), self._prefix))
+
+    def visit_command(self,
+                      name: str,
+                      info: QAPISourceInfo,
+                      ifcond: List[str],
+                      features: List[QAPISchemaFeature],
+                      arg_type: Optional[QAPISchemaObjectType],
+                      ret_type: Optional[QAPISchemaType],
+                      gen: bool,
+                      success_response: bool,
+                      boxed: bool,
+                      allow_oob: bool,
+                      allow_preconfig: bool,
+                      coroutine: bool) -> None:
+        if not gen:
+            return
+        # FIXME: If T is a user-defined type, the user is responsible
+        # for making this work, i.e. to make T's condition the
+        # conjunction of the T-returning commands' conditions.  If T
+        # is a built-in type, this isn't possible: the
+        # qmp_marshal_output_T() will be generated unconditionally.
+        if ret_type and ret_type not in self._visited_ret_types[self._genc]:
+            self._visited_ret_types[self._genc].add(ret_type)
+            with ifcontext(ret_type.ifcond,
+                           self._genh, self._genc, self._regy):
+                self._genc.add(gen_marshal_output(ret_type))
+        with ifcontext(ifcond, self._genh, self._genc, self._regy):
+            self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type))
+            self._genh.add(gen_marshal_decl(name))
+            self._genc.add(gen_marshal(name, arg_type, boxed, ret_type))
+            self._regy.add(gen_register_command(name, success_response,
+                                                allow_oob, allow_preconfig,
+                                                coroutine))
+
+
+def gen_commands(schema: QAPISchema,
+                 output_dir: str,
+                 prefix: str) -> None:
+    vis = QAPISchemaGenCommandVisitor(prefix)
+    schema.visit(vis)
+    vis.write(output_dir)
diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
new file mode 100644
index 000000000..11b86beea
--- /dev/null
+++ b/scripts/qapi/common.py
@@ -0,0 +1,212 @@
+#
+# QAPI helper library
+#
+# Copyright IBM, Corp. 2011
+# Copyright (c) 2013-2018 Red Hat Inc.
+#
+# Authors:
+#  Anthony Liguori 
+#  Markus Armbruster 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+import re
+from typing import Optional, Sequence
+
+
+#: Magic string that gets removed along with all space to its right.
+EATSPACE = '\033EATSPACE.'
+POINTER_SUFFIX = ' *' + EATSPACE
+_C_NAME_TRANS = str.maketrans('.-', '__')
+
+
+def camel_to_upper(value: str) -> str:
+    """
+    Converts CamelCase to CAMEL_CASE.
+
+    Examples::
+
+        ENUMName -> ENUM_NAME
+        EnumName1 -> ENUM_NAME1
+        ENUM_NAME -> ENUM_NAME
+        ENUM_NAME1 -> ENUM_NAME1
+        ENUM_Name2 -> ENUM_NAME2
+        ENUM24_Name -> ENUM24_NAME
+    """
+    c_fun_str = c_name(value, False)
+    if value.isupper():
+        return c_fun_str
+
+    new_name = ''
+    length = len(c_fun_str)
+    for i in range(length):
+        char = c_fun_str[i]
+        # When char is upper case and no '_' appears before, do more checks
+        if char.isupper() and (i > 0) and c_fun_str[i - 1] != '_':
+            if i < length - 1 and c_fun_str[i + 1].islower():
+                new_name += '_'
+            elif c_fun_str[i - 1].isdigit():
+                new_name += '_'
+        new_name += char
+    return new_name.lstrip('_').upper()
+
+
+def c_enum_const(type_name: str,
+                 const_name: str,
+                 prefix: Optional[str] = None) -> str:
+    """
+    Generate a C enumeration constant name.
+
+    :param type_name: The name of the enumeration.
+    :param const_name: The name of this constant.
+    :param prefix: Optional, prefix that overrides the type_name.
+    """
+    if prefix is not None:
+        type_name = prefix
+    return camel_to_upper(type_name) + '_' + c_name(const_name, False).upper()
+
+
+def c_name(name: str, protect: bool = True) -> str:
+    """
+    Map ``name`` to a valid C identifier.
+
+    Used for converting 'name' from a 'name':'type' qapi definition
+    into a generated struct member, as well as converting type names
+    into substrings of a generated C function name.
+
+    '__a.b_c' -> '__a_b_c', 'x-foo' -> 'x_foo'
+    protect=True: 'int' -> 'q_int'; protect=False: 'int' -> 'int'
+
+    :param name: The name to map.
+    :param protect: If true, avoid returning certain ticklish identifiers
+                    (like C keywords) by prepending ``q_``.
+    """
+    # ANSI X3J11/88-090, 3.1.1
+    c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue',
+                     'default', 'do', 'double', 'else', 'enum', 'extern',
+                     'float', 'for', 'goto', 'if', 'int', 'long', 'register',
+                     'return', 'short', 'signed', 'sizeof', 'static',
+                     'struct', 'switch', 'typedef', 'union', 'unsigned',
+                     'void', 'volatile', 'while'])
+    # ISO/IEC 9899:1999, 6.4.1
+    c99_words = set(['inline', 'restrict', '_Bool', '_Complex', '_Imaginary'])
+    # ISO/IEC 9899:2011, 6.4.1
+    c11_words = set(['_Alignas', '_Alignof', '_Atomic', '_Generic',
+                     '_Noreturn', '_Static_assert', '_Thread_local'])
+    # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
+    # excluding _.*
+    gcc_words = set(['asm', 'typeof'])
+    # C++ ISO/IEC 14882:2003 2.11
+    cpp_words = set(['bool', 'catch', 'class', 'const_cast', 'delete',
+                     'dynamic_cast', 'explicit', 'false', 'friend', 'mutable',
+                     'namespace', 'new', 'operator', 'private', 'protected',
+                     'public', 'reinterpret_cast', 'static_cast', 'template',
+                     'this', 'throw', 'true', 'try', 'typeid', 'typename',
+                     'using', 'virtual', 'wchar_t',
+                     # alternative representations
+                     'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',
+                     'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'])
+    # namespace pollution:
+    polluted_words = set(['unix', 'errno', 'mips', 'sparc', 'i386'])
+    name = name.translate(_C_NAME_TRANS)
+    if protect and (name in c89_words | c99_words | c11_words | gcc_words
+                    | cpp_words | polluted_words):
+        return 'q_' + name
+    return name
+
+
+class Indentation:
+    """
+    Indentation level management.
+
+    :param initial: Initial number of spaces, default 0.
+    """
+    def __init__(self, initial: int = 0) -> None:
+        self._level = initial
+
+    def __int__(self) -> int:
+        return self._level
+
+    def __repr__(self) -> str:
+        return "{}({:d})".format(type(self).__name__, self._level)
+
+    def __str__(self) -> str:
+        """Return the current indentation as a string of spaces."""
+        return ' ' * self._level
+
+    def __bool__(self) -> bool:
+        """True when there is a non-zero indentation."""
+        return bool(self._level)
+
+    def increase(self, amount: int = 4) -> None:
+        """Increase the indentation level by ``amount``, default 4."""
+        self._level += amount
+
+    def decrease(self, amount: int = 4) -> None:
+        """Decrease the indentation level by ``amount``, default 4."""
+        if self._level < amount:
+            raise ArithmeticError(
+                f"Can't remove {amount:d} spaces from {self!r}")
+        self._level -= amount
+
+
+#: Global, current indent level for code generation.
+indent = Indentation()
+
+
+def cgen(code: str, **kwds: object) -> str:
+    """
+    Generate ``code`` with ``kwds`` interpolated.
+
+    Obey `indent`, and strip `EATSPACE`.
+    """
+    raw = code % kwds
+    if indent:
+        raw = re.sub(r'^(?!(#|$))', str(indent), raw, flags=re.MULTILINE)
+    return re.sub(re.escape(EATSPACE) + r' *', '', raw)
+
+
+def mcgen(code: str, **kwds: object) -> str:
+    if code[0] == '\n':
+        code = code[1:]
+    return cgen(code, **kwds)
+
+
+def c_fname(filename: str) -> str:
+    return re.sub(r'[^A-Za-z0-9_]', '_', filename)
+
+
+def guardstart(name: str) -> str:
+    return mcgen('''
+#ifndef %(name)s
+#define %(name)s
+
+''',
+                 name=c_fname(name).upper())
+
+
+def guardend(name: str) -> str:
+    return mcgen('''
+
+#endif /* %(name)s */
+''',
+                 name=c_fname(name).upper())
+
+
+def gen_if(ifcond: Sequence[str]) -> str:
+    ret = ''
+    for ifc in ifcond:
+        ret += mcgen('''
+#if %(cond)s
+''', cond=ifc)
+    return ret
+
+
+def gen_endif(ifcond: Sequence[str]) -> str:
+    ret = ''
+    for ifc in reversed(ifcond):
+        ret += mcgen('''
+#endif /* %(cond)s */
+''', cond=ifc)
+    return ret
diff --git a/scripts/qapi/error.py b/scripts/qapi/error.py
new file mode 100644
index 000000000..ae60d9e2f
--- /dev/null
+++ b/scripts/qapi/error.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+# QAPI error classes
+#
+# Copyright (c) 2017-2019 Red Hat Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#  Marc-André Lureau 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+
+class QAPIError(Exception):
+    def __init__(self, info, col, msg):
+        Exception.__init__(self)
+        self.info = info
+        self.col = col
+        self.msg = msg
+
+    def __str__(self):
+        loc = str(self.info)
+        if self.col is not None:
+            assert self.info.line is not None
+            loc += ':%s' % self.col
+        return loc + ': ' + self.msg
+
+
+class QAPIParseError(QAPIError):
+    def __init__(self, parser, msg):
+        col = 1
+        for ch in parser.src[parser.line_pos:parser.pos]:
+            if ch == '\t':
+                col = (col + 7) % 8 + 1
+            else:
+                col += 1
+        super().__init__(parser.info, col, msg)
+
+
+class QAPISemError(QAPIError):
+    def __init__(self, info, msg):
+        super().__init__(info, None, msg)
diff --git a/scripts/qapi/events.py b/scripts/qapi/events.py
new file mode 100644
index 000000000..599f3d1f5
--- /dev/null
+++ b/scripts/qapi/events.py
@@ -0,0 +1,234 @@
+"""
+QAPI event generator
+
+Copyright (c) 2014 Wenchao Xia
+Copyright (c) 2015-2018 Red Hat Inc.
+
+Authors:
+ Wenchao Xia 
+ Markus Armbruster 
+
+This work is licensed under the terms of the GNU GPL, version 2.
+See the COPYING file in the top-level directory.
+"""
+
+from typing import List
+
+from .common import c_enum_const, c_name, mcgen
+from .gen import QAPISchemaModularCVisitor, build_params, ifcontext
+from .schema import (
+    QAPISchema,
+    QAPISchemaEnumMember,
+    QAPISchemaFeature,
+    QAPISchemaObjectType,
+)
+from .source import QAPISourceInfo
+from .types import gen_enum, gen_enum_lookup
+
+
+def build_event_send_proto(name: str,
+                           arg_type: QAPISchemaObjectType,
+                           boxed: bool) -> str:
+    return 'void qapi_event_send_%(c_name)s(%(param)s)' % {
+        'c_name': c_name(name.lower()),
+        'param': build_params(arg_type, boxed)}
+
+
+def gen_event_send_decl(name: str,
+                        arg_type: QAPISchemaObjectType,
+                        boxed: bool) -> str:
+    return mcgen('''
+
+%(proto)s;
+''',
+                 proto=build_event_send_proto(name, arg_type, boxed))
+
+
+def gen_param_var(typ: QAPISchemaObjectType) -> str:
+    """
+    Generate a struct variable holding the event parameters.
+
+    Initialize it with the function arguments defined in `gen_event_send`.
+    """
+    assert not typ.variants
+    ret = mcgen('''
+    %(c_name)s param = {
+''',
+                c_name=typ.c_name())
+    sep = '        '
+    for memb in typ.members:
+        ret += sep
+        sep = ', '
+        if memb.optional:
+            ret += 'has_' + c_name(memb.name) + sep
+        if memb.type.name == 'str':
+            # Cast away const added in build_params()
+            ret += '(char *)'
+        ret += c_name(memb.name)
+    ret += mcgen('''
+
+    };
+''')
+    if not typ.is_implicit():
+        ret += mcgen('''
+    %(c_name)s *arg = ¶m;
+''',
+                     c_name=typ.c_name())
+    return ret
+
+
+def gen_event_send(name: str,
+                   arg_type: QAPISchemaObjectType,
+                   boxed: bool,
+                   event_enum_name: str,
+                   event_emit: str) -> str:
+    # FIXME: Our declaration of local variables (and of 'errp' in the
+    # parameter list) can collide with exploded members of the event's
+    # data type passed in as parameters.  If this collision ever hits in
+    # practice, we can rename our local variables with a leading _ prefix,
+    # or split the code into a wrapper function that creates a boxed
+    # 'param' object then calls another to do the real work.
+    have_args = boxed or (arg_type and not arg_type.is_empty())
+
+    ret = mcgen('''
+
+%(proto)s
+{
+    QDict *qmp;
+''',
+                proto=build_event_send_proto(name, arg_type, boxed))
+
+    if have_args:
+        ret += mcgen('''
+    QObject *obj;
+    Visitor *v;
+''')
+        if not boxed:
+            ret += gen_param_var(arg_type)
+
+    ret += mcgen('''
+
+    qmp = qmp_event_build_dict("%(name)s");
+
+''',
+                 name=name)
+
+    if have_args:
+        ret += mcgen('''
+    v = qobject_output_visitor_new(&obj);
+''')
+        if not arg_type.is_implicit():
+            ret += mcgen('''
+    visit_type_%(c_name)s(v, "%(name)s", &arg, &error_abort);
+''',
+                         name=name, c_name=arg_type.c_name())
+        else:
+            ret += mcgen('''
+
+    visit_start_struct(v, "%(name)s", NULL, 0, &error_abort);
+    visit_type_%(c_name)s_members(v, ¶m, &error_abort);
+    visit_check_struct(v, &error_abort);
+    visit_end_struct(v, NULL);
+''',
+                         name=name, c_name=arg_type.c_name())
+        ret += mcgen('''
+
+    visit_complete(v, &obj);
+    qdict_put_obj(qmp, "data", obj);
+''')
+
+    ret += mcgen('''
+    %(event_emit)s(%(c_enum)s, qmp);
+
+''',
+                 event_emit=event_emit,
+                 c_enum=c_enum_const(event_enum_name, name))
+
+    if have_args:
+        ret += mcgen('''
+    visit_free(v);
+''')
+    ret += mcgen('''
+    qobject_unref(qmp);
+}
+''')
+    return ret
+
+
+class QAPISchemaGenEventVisitor(QAPISchemaModularCVisitor):
+
+    def __init__(self, prefix: str):
+        super().__init__(
+            prefix, 'qapi-events',
+            ' * Schema-defined QAPI/QMP events', None, __doc__)
+        self._event_enum_name = c_name(prefix + 'QAPIEvent', protect=False)
+        self._event_enum_members: List[QAPISchemaEnumMember] = []
+        self._event_emit_name = c_name(prefix + 'qapi_event_emit')
+
+    def _begin_user_module(self, name: str) -> None:
+        events = self._module_basename('qapi-events', name)
+        types = self._module_basename('qapi-types', name)
+        visit = self._module_basename('qapi-visit', name)
+        self._genc.add(mcgen('''
+#include "qemu/osdep.h"
+#include "%(prefix)sqapi-emit-events.h"
+#include "%(events)s.h"
+#include "%(visit)s.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/qmp-event.h"
+
+''',
+                             events=events, visit=visit,
+                             prefix=self._prefix))
+        self._genh.add(mcgen('''
+#include "qapi/util.h"
+#include "%(types)s.h"
+''',
+                             types=types))
+
+    def visit_end(self) -> None:
+        self._add_system_module('emit', ' * QAPI Events emission')
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "%(prefix)sqapi-emit-events.h"
+''',
+                                      prefix=self._prefix))
+        self._genh.preamble_add(mcgen('''
+#include "qapi/util.h"
+'''))
+        self._genh.add(gen_enum(self._event_enum_name,
+                                self._event_enum_members))
+        self._genc.add(gen_enum_lookup(self._event_enum_name,
+                                       self._event_enum_members))
+        self._genh.add(mcgen('''
+
+void %(event_emit)s(%(event_enum)s event, QDict *qdict);
+''',
+                             event_emit=self._event_emit_name,
+                             event_enum=self._event_enum_name))
+
+    def visit_event(self,
+                    name: str,
+                    info: QAPISourceInfo,
+                    ifcond: List[str],
+                    features: List[QAPISchemaFeature],
+                    arg_type: QAPISchemaObjectType,
+                    boxed: bool) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_event_send_decl(name, arg_type, boxed))
+            self._genc.add(gen_event_send(name, arg_type, boxed,
+                                          self._event_enum_name,
+                                          self._event_emit_name))
+        # Note: we generate the enum member regardless of @ifcond, to
+        # keep the enumeration usable in target-independent code.
+        self._event_enum_members.append(QAPISchemaEnumMember(name, None))
+
+
+def gen_events(schema: QAPISchema,
+               output_dir: str,
+               prefix: str) -> None:
+    vis = QAPISchemaGenEventVisitor(prefix)
+    schema.visit(vis)
+    vis.write(output_dir)
diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py
new file mode 100644
index 000000000..2fcaaa249
--- /dev/null
+++ b/scripts/qapi/expr.py
@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+#
+# Check (context-free) QAPI schema expression structure
+#
+# Copyright IBM, Corp. 2011
+# Copyright (c) 2013-2019 Red Hat Inc.
+#
+# Authors:
+#  Anthony Liguori 
+#  Markus Armbruster 
+#  Eric Blake 
+#  Marc-André Lureau 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+from collections import OrderedDict
+import re
+
+from .common import c_name
+from .error import QAPISemError
+
+
+# Names must be letters, numbers, -, and _.  They must start with letter,
+# except for downstream extensions which must start with __RFQDN_.
+# Dots are only valid in the downstream extension prefix.
+valid_name = re.compile(r'^(__[a-zA-Z0-9.-]+_)?'
+                        '[a-zA-Z][a-zA-Z0-9_-]*$')
+
+
+def check_name_is_str(name, info, source):
+    if not isinstance(name, str):
+        raise QAPISemError(info, "%s requires a string name" % source)
+
+
+def check_name_str(name, info, source,
+                   allow_optional=False, enum_member=False,
+                   permit_upper=False):
+    membername = name
+
+    if allow_optional and name.startswith('*'):
+        membername = name[1:]
+    # Enum members can start with a digit, because the generated C
+    # code always prefixes it with the enum name
+    if enum_member and membername[0].isdigit():
+        membername = 'D' + membername
+    # Reserve the entire 'q_' namespace for c_name(), and for 'q_empty'
+    # and 'q_obj_*' implicit type names.
+    if not valid_name.match(membername) or \
+       c_name(membername, False).startswith('q_'):
+        raise QAPISemError(info, "%s has an invalid name" % source)
+    if not permit_upper and name.lower() != name:
+        raise QAPISemError(
+            info, "%s uses uppercase in name" % source)
+    assert not membername.startswith('*')
+
+
+def check_defn_name_str(name, info, meta):
+    check_name_str(name, info, meta, permit_upper=True)
+    if name.endswith('Kind') or name.endswith('List'):
+        raise QAPISemError(
+            info, "%s name should not end in '%s'" % (meta, name[-4:]))
+
+
+def check_keys(value, info, source, required, optional):
+
+    def pprint(elems):
+        return ', '.join("'" + e + "'" for e in sorted(elems))
+
+    missing = set(required) - set(value)
+    if missing:
+        raise QAPISemError(
+            info,
+            "%s misses key%s %s"
+            % (source, 's' if len(missing) > 1 else '',
+               pprint(missing)))
+    allowed = set(required + optional)
+    unknown = set(value) - allowed
+    if unknown:
+        raise QAPISemError(
+            info,
+            "%s has unknown key%s %s\nValid keys are %s."
+            % (source, 's' if len(unknown) > 1 else '',
+               pprint(unknown), pprint(allowed)))
+
+
+def check_flags(expr, info):
+    for key in ['gen', 'success-response']:
+        if key in expr and expr[key] is not False:
+            raise QAPISemError(
+                info, "flag '%s' may only use false value" % key)
+    for key in ['boxed', 'allow-oob', 'allow-preconfig', 'coroutine']:
+        if key in expr and expr[key] is not True:
+            raise QAPISemError(
+                info, "flag '%s' may only use true value" % key)
+    if 'allow-oob' in expr and 'coroutine' in expr:
+        # This is not necessarily a fundamental incompatibility, but
+        # we don't have a use case and the desired semantics isn't
+        # obvious.  The simplest solution is to forbid it until we get
+        # a use case for it.
+        raise QAPISemError(info, "flags 'allow-oob' and 'coroutine' "
+                                 "are incompatible")
+
+
+def check_if(expr, info, source):
+
+    def check_if_str(ifcond, info):
+        if not isinstance(ifcond, str):
+            raise QAPISemError(
+                info,
+                "'if' condition of %s must be a string or a list of strings"
+                % source)
+        if ifcond.strip() == '':
+            raise QAPISemError(
+                info,
+                "'if' condition '%s' of %s makes no sense"
+                % (ifcond, source))
+
+    ifcond = expr.get('if')
+    if ifcond is None:
+        return
+    if isinstance(ifcond, list):
+        if ifcond == []:
+            raise QAPISemError(
+                info, "'if' condition [] of %s is useless" % source)
+        for elt in ifcond:
+            check_if_str(elt, info)
+    else:
+        check_if_str(ifcond, info)
+        expr['if'] = [ifcond]
+
+
+def normalize_members(members):
+    if isinstance(members, OrderedDict):
+        for key, arg in members.items():
+            if isinstance(arg, dict):
+                continue
+            members[key] = {'type': arg}
+
+
+def check_type(value, info, source,
+               allow_array=False, allow_dict=False):
+    if value is None:
+        return
+
+    # Array type
+    if isinstance(value, list):
+        if not allow_array:
+            raise QAPISemError(info, "%s cannot be an array" % source)
+        if len(value) != 1 or not isinstance(value[0], str):
+            raise QAPISemError(info,
+                               "%s: array type must contain single type name" %
+                               source)
+        return
+
+    # Type name
+    if isinstance(value, str):
+        return
+
+    # Anonymous type
+
+    if not allow_dict:
+        raise QAPISemError(info, "%s should be a type name" % source)
+
+    if not isinstance(value, OrderedDict):
+        raise QAPISemError(info,
+                           "%s should be an object or type name" % source)
+
+    permit_upper = allow_dict in info.pragma.name_case_whitelist
+
+    # value is a dictionary, check that each member is okay
+    for (key, arg) in value.items():
+        key_source = "%s member '%s'" % (source, key)
+        check_name_str(key, info, key_source,
+                       allow_optional=True, permit_upper=permit_upper)
+        if c_name(key, False) == 'u' or c_name(key, False).startswith('has_'):
+            raise QAPISemError(info, "%s uses reserved name" % key_source)
+        check_keys(arg, info, key_source, ['type'], ['if', 'features'])
+        check_if(arg, info, key_source)
+        check_features(arg.get('features'), info)
+        check_type(arg['type'], info, key_source, allow_array=True)
+
+
+def check_features(features, info):
+    if features is None:
+        return
+    if not isinstance(features, list):
+        raise QAPISemError(info, "'features' must be an array")
+    features[:] = [f if isinstance(f, dict) else {'name': f}
+                   for f in features]
+    for f in features:
+        source = "'features' member"
+        assert isinstance(f, dict)
+        check_keys(f, info, source, ['name'], ['if'])
+        check_name_is_str(f['name'], info, source)
+        source = "%s '%s'" % (source, f['name'])
+        check_name_str(f['name'], info, source)
+        check_if(f, info, source)
+
+
+def check_enum(expr, info):
+    name = expr['enum']
+    members = expr['data']
+    prefix = expr.get('prefix')
+
+    if not isinstance(members, list):
+        raise QAPISemError(info, "'data' must be an array")
+    if prefix is not None and not isinstance(prefix, str):
+        raise QAPISemError(info, "'prefix' must be a string")
+
+    permit_upper = name in info.pragma.name_case_whitelist
+
+    members[:] = [m if isinstance(m, dict) else {'name': m}
+                  for m in members]
+    for member in members:
+        source = "'data' member"
+        check_keys(member, info, source, ['name'], ['if'])
+        check_name_is_str(member['name'], info, source)
+        source = "%s '%s'" % (source, member['name'])
+        check_name_str(member['name'], info, source,
+                       enum_member=True, permit_upper=permit_upper)
+        check_if(member, info, source)
+
+
+def check_struct(expr, info):
+    name = expr['struct']
+    members = expr['data']
+
+    check_type(members, info, "'data'", allow_dict=name)
+    check_type(expr.get('base'), info, "'base'")
+
+
+def check_union(expr, info):
+    name = expr['union']
+    base = expr.get('base')
+    discriminator = expr.get('discriminator')
+    members = expr['data']
+
+    if discriminator is None:   # simple union
+        if base is not None:
+            raise QAPISemError(info, "'base' requires 'discriminator'")
+    else:                       # flat union
+        check_type(base, info, "'base'", allow_dict=name)
+        if not base:
+            raise QAPISemError(info, "'discriminator' requires 'base'")
+        check_name_is_str(discriminator, info, "'discriminator'")
+
+    for (key, value) in members.items():
+        source = "'data' member '%s'" % key
+        check_name_str(key, info, source)
+        check_keys(value, info, source, ['type'], ['if'])
+        check_if(value, info, source)
+        check_type(value['type'], info, source, allow_array=not base)
+
+
+def check_alternate(expr, info):
+    members = expr['data']
+
+    if not members:
+        raise QAPISemError(info, "'data' must not be empty")
+    for (key, value) in members.items():
+        source = "'data' member '%s'" % key
+        check_name_str(key, info, source)
+        check_keys(value, info, source, ['type'], ['if'])
+        check_if(value, info, source)
+        check_type(value['type'], info, source)
+
+
+def check_command(expr, info):
+    args = expr.get('data')
+    rets = expr.get('returns')
+    boxed = expr.get('boxed', False)
+
+    if boxed and args is None:
+        raise QAPISemError(info, "'boxed': true requires 'data'")
+    check_type(args, info, "'data'", allow_dict=not boxed)
+    check_type(rets, info, "'returns'", allow_array=True)
+
+
+def check_event(expr, info):
+    args = expr.get('data')
+    boxed = expr.get('boxed', False)
+
+    if boxed and args is None:
+        raise QAPISemError(info, "'boxed': true requires 'data'")
+    check_type(args, info, "'data'", allow_dict=not boxed)
+
+
+def check_exprs(exprs):
+    for expr_elem in exprs:
+        expr = expr_elem['expr']
+        info = expr_elem['info']
+        doc = expr_elem.get('doc')
+
+        if 'include' in expr:
+            continue
+
+        if 'enum' in expr:
+            meta = 'enum'
+        elif 'union' in expr:
+            meta = 'union'
+        elif 'alternate' in expr:
+            meta = 'alternate'
+        elif 'struct' in expr:
+            meta = 'struct'
+        elif 'command' in expr:
+            meta = 'command'
+        elif 'event' in expr:
+            meta = 'event'
+        else:
+            raise QAPISemError(info, "expression is missing metatype")
+
+        name = expr[meta]
+        check_name_is_str(name, info, "'%s'" % meta)
+        info.set_defn(meta, name)
+        check_defn_name_str(name, info, meta)
+
+        if doc:
+            if doc.symbol != name:
+                raise QAPISemError(
+                    info, "documentation comment is for '%s'" % doc.symbol)
+            doc.check_expr(expr)
+        elif info.pragma.doc_required:
+            raise QAPISemError(info,
+                               "documentation comment required")
+
+        if meta == 'enum':
+            check_keys(expr, info, meta,
+                       ['enum', 'data'], ['if', 'features', 'prefix'])
+            check_enum(expr, info)
+        elif meta == 'union':
+            check_keys(expr, info, meta,
+                       ['union', 'data'],
+                       ['base', 'discriminator', 'if', 'features'])
+            normalize_members(expr.get('base'))
+            normalize_members(expr['data'])
+            check_union(expr, info)
+        elif meta == 'alternate':
+            check_keys(expr, info, meta,
+                       ['alternate', 'data'], ['if', 'features'])
+            normalize_members(expr['data'])
+            check_alternate(expr, info)
+        elif meta == 'struct':
+            check_keys(expr, info, meta,
+                       ['struct', 'data'], ['base', 'if', 'features'])
+            normalize_members(expr['data'])
+            check_struct(expr, info)
+        elif meta == 'command':
+            check_keys(expr, info, meta,
+                       ['command'],
+                       ['data', 'returns', 'boxed', 'if', 'features',
+                        'gen', 'success-response', 'allow-oob',
+                        'allow-preconfig', 'coroutine'])
+            normalize_members(expr.get('data'))
+            check_command(expr, info)
+        elif meta == 'event':
+            check_keys(expr, info, meta,
+                       ['event'], ['data', 'boxed', 'if', 'features'])
+            normalize_members(expr.get('data'))
+            check_event(expr, info)
+        else:
+            assert False, 'unexpected meta type'
+
+        check_if(expr, info, meta)
+        check_features(expr.get('features'), info)
+        check_flags(expr, info)
+
+    return exprs
diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
new file mode 100644
index 000000000..b40f18eee
--- /dev/null
+++ b/scripts/qapi/gen.py
@@ -0,0 +1,330 @@
+# -*- coding: utf-8 -*-
+#
+# QAPI code generation
+#
+# Copyright (c) 2015-2019 Red Hat Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#  Marc-André Lureau 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+from contextlib import contextmanager
+import os
+import re
+from typing import (
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+)
+
+from .common import (
+    c_fname,
+    c_name,
+    gen_endif,
+    gen_if,
+    guardend,
+    guardstart,
+    mcgen,
+)
+from .schema import QAPISchemaObjectType, QAPISchemaVisitor
+from .source import QAPISourceInfo
+
+
+class QAPIGen:
+    def __init__(self, fname: Optional[str]):
+        self.fname = fname
+        self._preamble = ''
+        self._body = ''
+
+    def preamble_add(self, text: str) -> None:
+        self._preamble += text
+
+    def add(self, text: str) -> None:
+        self._body += text
+
+    def get_content(self) -> str:
+        return self._top() + self._preamble + self._body + self._bottom()
+
+    def _top(self) -> str:
+        # pylint: disable=no-self-use
+        return ''
+
+    def _bottom(self) -> str:
+        # pylint: disable=no-self-use
+        return ''
+
+    def write(self, output_dir: str) -> None:
+        # Include paths starting with ../ are used to reuse modules of the main
+        # schema in specialised schemas. Don't overwrite the files that are
+        # already generated for the main schema.
+        if self.fname.startswith('../'):
+            return
+        pathname = os.path.join(output_dir, self.fname)
+        odir = os.path.dirname(pathname)
+
+        if odir:
+            os.makedirs(odir, exist_ok=True)
+
+        # use os.open for O_CREAT to create and read a non-existant file
+        fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666)
+        with os.fdopen(fd, 'r+', encoding='utf-8') as fp:
+            text = self.get_content()
+            oldtext = fp.read(len(text) + 1)
+            if text != oldtext:
+                fp.seek(0)
+                fp.truncate(0)
+                fp.write(text)
+
+
+def _wrap_ifcond(ifcond: List[str], before: str, after: str) -> str:
+    if before == after:
+        return after   # suppress empty #if ... #endif
+
+    assert after.startswith(before)
+    out = before
+    added = after[len(before):]
+    if added[0] == '\n':
+        out += '\n'
+        added = added[1:]
+    out += gen_if(ifcond)
+    out += added
+    out += gen_endif(ifcond)
+    return out
+
+
+def build_params(arg_type: Optional[QAPISchemaObjectType],
+                 boxed: bool,
+                 extra: Optional[str] = None) -> str:
+    ret = ''
+    sep = ''
+    if boxed:
+        assert arg_type
+        ret += '%s arg' % arg_type.c_param_type()
+        sep = ', '
+    elif arg_type:
+        assert not arg_type.variants
+        for memb in arg_type.members:
+            ret += sep
+            sep = ', '
+            if memb.optional:
+                ret += 'bool has_%s, ' % c_name(memb.name)
+            ret += '%s %s' % (memb.type.c_param_type(),
+                              c_name(memb.name))
+    if extra:
+        ret += sep + extra
+    return ret if ret else 'void'
+
+
+class QAPIGenCCode(QAPIGen):
+    def __init__(self, fname: Optional[str]):
+        super().__init__(fname)
+        self._start_if: Optional[Tuple[List[str], str, str]] = None
+
+    def start_if(self, ifcond: List[str]) -> None:
+        assert self._start_if is None
+        self._start_if = (ifcond, self._body, self._preamble)
+
+    def end_if(self) -> None:
+        assert self._start_if
+        self._wrap_ifcond()
+        self._start_if = None
+
+    def _wrap_ifcond(self) -> None:
+        self._body = _wrap_ifcond(self._start_if[0],
+                                  self._start_if[1], self._body)
+        self._preamble = _wrap_ifcond(self._start_if[0],
+                                      self._start_if[2], self._preamble)
+
+    def get_content(self) -> str:
+        assert self._start_if is None
+        return super().get_content()
+
+
+class QAPIGenC(QAPIGenCCode):
+    def __init__(self, fname: str, blurb: str, pydoc: str):
+        super().__init__(fname)
+        self._blurb = blurb
+        self._copyright = '\n * '.join(re.findall(r'^Copyright .*', pydoc,
+                                                  re.MULTILINE))
+
+    def _top(self) -> str:
+        return mcgen('''
+/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+%(blurb)s
+ *
+ * %(copyright)s
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+''',
+                     blurb=self._blurb, copyright=self._copyright)
+
+    def _bottom(self) -> str:
+        return mcgen('''
+
+/* Dummy declaration to prevent empty .o file */
+char qapi_dummy_%(name)s;
+''',
+                     name=c_fname(self.fname))
+
+
+class QAPIGenH(QAPIGenC):
+    def _top(self) -> str:
+        return super()._top() + guardstart(self.fname)
+
+    def _bottom(self) -> str:
+        return guardend(self.fname)
+
+
+@contextmanager
+def ifcontext(ifcond: List[str], *args: QAPIGenCCode) -> Iterator[None]:
+    """
+    A with-statement context manager that wraps with `start_if()` / `end_if()`.
+
+    :param ifcond: A list of conditionals, passed to `start_if()`.
+    :param args: any number of `QAPIGenCCode`.
+
+    Example::
+
+        with ifcontext(ifcond, self._genh, self._genc):
+            modify self._genh and self._genc ...
+
+    Is equivalent to calling::
+
+        self._genh.start_if(ifcond)
+        self._genc.start_if(ifcond)
+        modify self._genh and self._genc ...
+        self._genh.end_if()
+        self._genc.end_if()
+    """
+    for arg in args:
+        arg.start_if(ifcond)
+    yield
+    for arg in args:
+        arg.end_if()
+
+
+class QAPISchemaMonolithicCVisitor(QAPISchemaVisitor):
+    def __init__(self,
+                 prefix: str,
+                 what: str,
+                 blurb: str,
+                 pydoc: str):
+        self._prefix = prefix
+        self._what = what
+        self._genc = QAPIGenC(self._prefix + self._what + '.c',
+                              blurb, pydoc)
+        self._genh = QAPIGenH(self._prefix + self._what + '.h',
+                              blurb, pydoc)
+
+    def write(self, output_dir: str) -> None:
+        self._genc.write(output_dir)
+        self._genh.write(output_dir)
+
+
+class QAPISchemaModularCVisitor(QAPISchemaVisitor):
+    def __init__(self,
+                 prefix: str,
+                 what: str,
+                 user_blurb: str,
+                 builtin_blurb: Optional[str],
+                 pydoc: str):
+        self._prefix = prefix
+        self._what = what
+        self._user_blurb = user_blurb
+        self._builtin_blurb = builtin_blurb
+        self._pydoc = pydoc
+        self._genc: Optional[QAPIGenC] = None
+        self._genh: Optional[QAPIGenH] = None
+        self._module: Dict[Optional[str], Tuple[QAPIGenC, QAPIGenH]] = {}
+        self._main_module: Optional[str] = None
+
+    @staticmethod
+    def _is_user_module(name: Optional[str]) -> bool:
+        return bool(name and not name.startswith('./'))
+
+    @staticmethod
+    def _is_builtin_module(name: Optional[str]) -> bool:
+        return not name
+
+    def _module_dirname(self, name: Optional[str]) -> str:
+        if self._is_user_module(name):
+            return os.path.dirname(name)
+        return ''
+
+    def _module_basename(self, what: str, name: Optional[str]) -> str:
+        ret = '' if self._is_builtin_module(name) else self._prefix
+        if self._is_user_module(name):
+            basename = os.path.basename(name)
+            ret += what
+            if name != self._main_module:
+                ret += '-' + os.path.splitext(basename)[0]
+        else:
+            name = name[2:] if name else 'builtin'
+            ret += re.sub(r'-', '-' + name + '-', what)
+        return ret
+
+    def _module_filename(self, what: str, name: Optional[str]) -> str:
+        return os.path.join(self._module_dirname(name),
+                            self._module_basename(what, name))
+
+    def _add_module(self, name: Optional[str], blurb: str) -> None:
+        basename = self._module_filename(self._what, name)
+        genc = QAPIGenC(basename + '.c', blurb, self._pydoc)
+        genh = QAPIGenH(basename + '.h', blurb, self._pydoc)
+        self._module[name] = (genc, genh)
+        self._genc, self._genh = self._module[name]
+
+    def _add_user_module(self, name: str, blurb: str) -> None:
+        assert self._is_user_module(name)
+        if self._main_module is None:
+            self._main_module = name
+        self._add_module(name, blurb)
+
+    def _add_system_module(self, name: Optional[str], blurb: str) -> None:
+        self._add_module(name and './' + name, blurb)
+
+    def write(self, output_dir: str, opt_builtins: bool = False) -> None:
+        for name in self._module:
+            if self._is_builtin_module(name) and not opt_builtins:
+                continue
+            (genc, genh) = self._module[name]
+            genc.write(output_dir)
+            genh.write(output_dir)
+
+    def _begin_system_module(self, name: None) -> None:
+        pass
+
+    def _begin_user_module(self, name: str) -> None:
+        pass
+
+    def visit_module(self, name: Optional[str]) -> None:
+        if name is None:
+            if self._builtin_blurb:
+                self._add_system_module(None, self._builtin_blurb)
+                self._begin_system_module(name)
+            else:
+                # The built-in module has not been created.  No code may
+                # be generated.
+                self._genc = None
+                self._genh = None
+        else:
+            self._add_user_module(name, self._user_blurb)
+            self._begin_user_module(name)
+
+    def visit_include(self, name: str, info: QAPISourceInfo) -> None:
+        relname = os.path.relpath(self._module_filename(self._what, name),
+                                  os.path.dirname(self._genh.fname))
+        self._genh.preamble_add(mcgen('''
+#include "%(relname)s.h"
+''',
+                                      relname=relname))
diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
new file mode 100644
index 000000000..fafec94e0
--- /dev/null
+++ b/scripts/qapi/introspect.py
@@ -0,0 +1,245 @@
+"""
+QAPI introspection generator
+
+Copyright (C) 2015-2018 Red Hat, Inc.
+
+Authors:
+ Markus Armbruster 
+
+This work is licensed under the terms of the GNU GPL, version 2.
+See the COPYING file in the top-level directory.
+"""
+
+from .common import (
+    c_name,
+    gen_endif,
+    gen_if,
+    mcgen,
+)
+from .gen import QAPISchemaMonolithicCVisitor
+from .schema import (
+    QAPISchemaArrayType,
+    QAPISchemaBuiltinType,
+    QAPISchemaType,
+)
+
+
+def _make_tree(obj, ifcond, features, extra=None):
+    if extra is None:
+        extra = {}
+    if ifcond:
+        extra['if'] = ifcond
+    if features:
+        obj['features'] = [(f.name, {'if': f.ifcond}) for f in features]
+    if extra:
+        return (obj, extra)
+    return obj
+
+
+def _tree_to_qlit(obj, level=0, suppress_first_indent=False):
+
+    def indent(level):
+        return level * 4 * ' '
+
+    if isinstance(obj, tuple):
+        ifobj, extra = obj
+        ifcond = extra.get('if')
+        comment = extra.get('comment')
+        ret = ''
+        if comment:
+            ret += indent(level) + '/* %s */\n' % comment
+        if ifcond:
+            ret += gen_if(ifcond)
+        ret += _tree_to_qlit(ifobj, level)
+        if ifcond:
+            ret += '\n' + gen_endif(ifcond)
+        return ret
+
+    ret = ''
+    if not suppress_first_indent:
+        ret += indent(level)
+    if obj is None:
+        ret += 'QLIT_QNULL'
+    elif isinstance(obj, str):
+        ret += 'QLIT_QSTR(' + to_c_string(obj) + ')'
+    elif isinstance(obj, list):
+        elts = [_tree_to_qlit(elt, level + 1).strip('\n')
+                for elt in obj]
+        elts.append(indent(level + 1) + "{}")
+        ret += 'QLIT_QLIST(((QLitObject[]) {\n'
+        ret += '\n'.join(elts) + '\n'
+        ret += indent(level) + '}))'
+    elif isinstance(obj, dict):
+        elts = []
+        for key, value in sorted(obj.items()):
+            elts.append(indent(level + 1) + '{ %s, %s }' %
+                        (to_c_string(key),
+                         _tree_to_qlit(value, level + 1, True)))
+        elts.append(indent(level + 1) + '{}')
+        ret += 'QLIT_QDICT(((QLitDictEntry[]) {\n'
+        ret += ',\n'.join(elts) + '\n'
+        ret += indent(level) + '}))'
+    elif isinstance(obj, bool):
+        ret += 'QLIT_QBOOL(%s)' % ('true' if obj else 'false')
+    else:
+        assert False                # not implemented
+    if level > 0:
+        ret += ','
+    return ret
+
+
+def to_c_string(string):
+    return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"'
+
+
+class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor):
+
+    def __init__(self, prefix, unmask):
+        super().__init__(
+            prefix, 'qapi-introspect',
+            ' * QAPI/QMP schema introspection', __doc__)
+        self._unmask = unmask
+        self._schema = None
+        self._trees = []
+        self._used_types = []
+        self._name_map = {}
+        self._genc.add(mcgen('''
+#include "qemu/osdep.h"
+#include "%(prefix)sqapi-introspect.h"
+
+''',
+                             prefix=prefix))
+
+    def visit_begin(self, schema):
+        self._schema = schema
+
+    def visit_end(self):
+        # visit the types that are actually used
+        for typ in self._used_types:
+            typ.visit(self)
+        # generate C
+        name = c_name(self._prefix, protect=False) + 'qmp_schema_qlit'
+        self._genh.add(mcgen('''
+#include "qapi/qmp/qlit.h"
+
+extern const QLitObject %(c_name)s;
+''',
+                             c_name=c_name(name)))
+        self._genc.add(mcgen('''
+const QLitObject %(c_name)s = %(c_string)s;
+''',
+                             c_name=c_name(name),
+                             c_string=_tree_to_qlit(self._trees)))
+        self._schema = None
+        self._trees = []
+        self._used_types = []
+        self._name_map = {}
+
+    def visit_needed(self, entity):
+        # Ignore types on first pass; visit_end() will pick up used types
+        return not isinstance(entity, QAPISchemaType)
+
+    def _name(self, name):
+        if self._unmask:
+            return name
+        if name not in self._name_map:
+            self._name_map[name] = '%d' % len(self._name_map)
+        return self._name_map[name]
+
+    def _use_type(self, typ):
+        # Map the various integer types to plain int
+        if typ.json_type() == 'int':
+            typ = self._schema.lookup_type('int')
+        elif (isinstance(typ, QAPISchemaArrayType) and
+              typ.element_type.json_type() == 'int'):
+            typ = self._schema.lookup_type('intList')
+        # Add type to work queue if new
+        if typ not in self._used_types:
+            self._used_types.append(typ)
+        # Clients should examine commands and events, not types.  Hide
+        # type names as integers to reduce the temptation.  Also, it
+        # saves a few characters on the wire.
+        if isinstance(typ, QAPISchemaBuiltinType):
+            return typ.name
+        if isinstance(typ, QAPISchemaArrayType):
+            return '[' + self._use_type(typ.element_type) + ']'
+        return self._name(typ.name)
+
+    def _gen_tree(self, name, mtype, obj, ifcond, features):
+        extra = None
+        if mtype not in ('command', 'event', 'builtin', 'array'):
+            if not self._unmask:
+                # Output a comment to make it easy to map masked names
+                # back to the source when reading the generated output.
+                extra = {'comment': '"%s" = %s' % (self._name(name), name)}
+            name = self._name(name)
+        obj['name'] = name
+        obj['meta-type'] = mtype
+        self._trees.append(_make_tree(obj, ifcond, features, extra))
+
+    def _gen_member(self, member):
+        obj = {'name': member.name, 'type': self._use_type(member.type)}
+        if member.optional:
+            obj['default'] = None
+        return _make_tree(obj, member.ifcond, member.features)
+
+    def _gen_variants(self, tag_name, variants):
+        return {'tag': tag_name,
+                'variants': [self._gen_variant(v) for v in variants]}
+
+    def _gen_variant(self, variant):
+        obj = {'case': variant.name, 'type': self._use_type(variant.type)}
+        return _make_tree(obj, variant.ifcond, None)
+
+    def visit_builtin_type(self, name, info, json_type):
+        self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None)
+
+    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
+        self._gen_tree(name, 'enum',
+                       {'values': [_make_tree(m.name, m.ifcond, None)
+                                   for m in members]},
+                       ifcond, features)
+
+    def visit_array_type(self, name, info, ifcond, element_type):
+        element = self._use_type(element_type)
+        self._gen_tree('[' + element + ']', 'array', {'element-type': element},
+                       ifcond, None)
+
+    def visit_object_type_flat(self, name, info, ifcond, features,
+                               members, variants):
+        obj = {'members': [self._gen_member(m) for m in members]}
+        if variants:
+            obj.update(self._gen_variants(variants.tag_member.name,
+                                          variants.variants))
+
+        self._gen_tree(name, 'object', obj, ifcond, features)
+
+    def visit_alternate_type(self, name, info, ifcond, features, variants):
+        self._gen_tree(name, 'alternate',
+                       {'members': [
+                           _make_tree({'type': self._use_type(m.type)},
+                                      m.ifcond, None)
+                           for m in variants.variants]},
+                       ifcond, features)
+
+    def visit_command(self, name, info, ifcond, features,
+                      arg_type, ret_type, gen, success_response, boxed,
+                      allow_oob, allow_preconfig, coroutine):
+        arg_type = arg_type or self._schema.the_empty_object_type
+        ret_type = ret_type or self._schema.the_empty_object_type
+        obj = {'arg-type': self._use_type(arg_type),
+               'ret-type': self._use_type(ret_type)}
+        if allow_oob:
+            obj['allow-oob'] = allow_oob
+        self._gen_tree(name, 'command', obj, ifcond, features)
+
+    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+        arg_type = arg_type or self._schema.the_empty_object_type
+        self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)},
+                       ifcond, features)
+
+
+def gen_introspect(schema, output_dir, prefix, opt_unmask):
+    vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask)
+    schema.visit(vis)
+    vis.write(output_dir)
diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py
new file mode 100644
index 000000000..42517210b
--- /dev/null
+++ b/scripts/qapi/main.py
@@ -0,0 +1,95 @@
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+"""
+QAPI Generator
+
+This is the main entry point for generating C code from the QAPI schema.
+"""
+
+import argparse
+import re
+import sys
+from typing import Optional
+
+from .commands import gen_commands
+from .error import QAPIError
+from .events import gen_events
+from .introspect import gen_introspect
+from .schema import QAPISchema
+from .types import gen_types
+from .visit import gen_visit
+
+
+def invalid_prefix_char(prefix: str) -> Optional[str]:
+    match = re.match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix)
+    if match.end() != len(prefix):
+        return prefix[match.end()]
+    return None
+
+
+def generate(schema_file: str,
+             output_dir: str,
+             prefix: str,
+             unmask: bool = False,
+             builtins: bool = False) -> None:
+    """
+    Generate C code for the given schema into the target directory.
+
+    :param schema_file: The primary QAPI schema file.
+    :param output_dir: The output directory to store generated code.
+    :param prefix: Optional C-code prefix for symbol names.
+    :param unmask: Expose non-ABI names through introspection?
+    :param builtins: Generate code for built-in types?
+
+    :raise QAPIError: On failures.
+    """
+    assert invalid_prefix_char(prefix) is None
+
+    schema = QAPISchema(schema_file)
+    gen_types(schema, output_dir, prefix, builtins)
+    gen_visit(schema, output_dir, prefix, builtins)
+    gen_commands(schema, output_dir, prefix)
+    gen_events(schema, output_dir, prefix)
+    gen_introspect(schema, output_dir, prefix, unmask)
+
+
+def main() -> int:
+    """
+    gapi-gen executable entry point.
+    Expects arguments via sys.argv, see --help for details.
+
+    :return: int, 0 on success, 1 on failure.
+    """
+    parser = argparse.ArgumentParser(
+        description='Generate code from a QAPI schema')
+    parser.add_argument('-b', '--builtins', action='store_true',
+                        help="generate code for built-in types")
+    parser.add_argument('-o', '--output-dir', action='store',
+                        default='',
+                        help="write output to directory OUTPUT_DIR")
+    parser.add_argument('-p', '--prefix', action='store',
+                        default='',
+                        help="prefix for symbols")
+    parser.add_argument('-u', '--unmask-non-abi-names', action='store_true',
+                        dest='unmask',
+                        help="expose non-ABI names in introspection")
+    parser.add_argument('schema', action='store')
+    args = parser.parse_args()
+
+    funny_char = invalid_prefix_char(args.prefix)
+    if funny_char:
+        msg = f"funny character '{funny_char}' in argument of --prefix"
+        print(f"{sys.argv[0]}: {msg}", file=sys.stderr)
+        return 1
+
+    try:
+        generate(args.schema,
+                 output_dir=args.output_dir,
+                 prefix=args.prefix,
+                 unmask=args.unmask,
+                 builtins=args.builtins)
+    except QAPIError as err:
+        print(f"{sys.argv[0]}: {str(err)}", file=sys.stderr)
+        return 1
+    return 0
diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
new file mode 100644
index 000000000..74fc6c821
--- /dev/null
+++ b/scripts/qapi/mypy.ini
@@ -0,0 +1,30 @@
+[mypy]
+strict = True
+strict_optional = False
+disallow_untyped_calls = False
+python_version = 3.6
+
+[mypy-qapi.error]
+disallow_untyped_defs = False
+disallow_incomplete_defs = False
+check_untyped_defs = False
+
+[mypy-qapi.expr]
+disallow_untyped_defs = False
+disallow_incomplete_defs = False
+check_untyped_defs = False
+
+[mypy-qapi.introspect]
+disallow_untyped_defs = False
+disallow_incomplete_defs = False
+check_untyped_defs = False
+
+[mypy-qapi.parser]
+disallow_untyped_defs = False
+disallow_incomplete_defs = False
+check_untyped_defs = False
+
+[mypy-qapi.schema]
+disallow_untyped_defs = False
+disallow_incomplete_defs = False
+check_untyped_defs = False
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
new file mode 100644
index 000000000..e7b9d670a
--- /dev/null
+++ b/scripts/qapi/parser.py
@@ -0,0 +1,640 @@
+# -*- coding: utf-8 -*-
+#
+# QAPI schema parser
+#
+# Copyright IBM, Corp. 2011
+# Copyright (c) 2013-2019 Red Hat Inc.
+#
+# Authors:
+#  Anthony Liguori 
+#  Markus Armbruster 
+#  Marc-André Lureau 
+#  Kevin Wolf 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+from collections import OrderedDict
+import os
+import re
+
+from .error import QAPIParseError, QAPISemError
+from .source import QAPISourceInfo
+
+
+class QAPISchemaParser:
+
+    def __init__(self, fname, previously_included=None, incl_info=None):
+        previously_included = previously_included or set()
+        previously_included.add(os.path.abspath(fname))
+
+        try:
+            fp = open(fname, 'r', encoding='utf-8')
+            self.src = fp.read()
+        except IOError as e:
+            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
+                               "can't read %s file '%s': %s"
+                               % ("include" if incl_info else "schema",
+                                  fname,
+                                  e.strerror))
+
+        if self.src == '' or self.src[-1] != '\n':
+            self.src += '\n'
+        self.cursor = 0
+        self.info = QAPISourceInfo(fname, 1, incl_info)
+        self.line_pos = 0
+        self.exprs = []
+        self.docs = []
+        self.accept()
+        cur_doc = None
+
+        while self.tok is not None:
+            info = self.info
+            if self.tok == '#':
+                self.reject_expr_doc(cur_doc)
+                for cur_doc in self.get_doc(info):
+                    self.docs.append(cur_doc)
+                continue
+
+            expr = self.get_expr(False)
+            if 'include' in expr:
+                self.reject_expr_doc(cur_doc)
+                if len(expr) != 1:
+                    raise QAPISemError(info, "invalid 'include' directive")
+                include = expr['include']
+                if not isinstance(include, str):
+                    raise QAPISemError(info,
+                                       "value of 'include' must be a string")
+                incl_fname = os.path.join(os.path.dirname(fname),
+                                          include)
+                self.exprs.append({'expr': {'include': incl_fname},
+                                   'info': info})
+                exprs_include = self._include(include, info, incl_fname,
+                                              previously_included)
+                if exprs_include:
+                    self.exprs.extend(exprs_include.exprs)
+                    self.docs.extend(exprs_include.docs)
+            elif "pragma" in expr:
+                self.reject_expr_doc(cur_doc)
+                if len(expr) != 1:
+                    raise QAPISemError(info, "invalid 'pragma' directive")
+                pragma = expr['pragma']
+                if not isinstance(pragma, dict):
+                    raise QAPISemError(
+                        info, "value of 'pragma' must be an object")
+                for name, value in pragma.items():
+                    self._pragma(name, value, info)
+            else:
+                expr_elem = {'expr': expr,
+                             'info': info}
+                if cur_doc:
+                    if not cur_doc.symbol:
+                        raise QAPISemError(
+                            cur_doc.info, "definition documentation required")
+                    expr_elem['doc'] = cur_doc
+                self.exprs.append(expr_elem)
+            cur_doc = None
+        self.reject_expr_doc(cur_doc)
+
+    @staticmethod
+    def reject_expr_doc(doc):
+        if doc and doc.symbol:
+            raise QAPISemError(
+                doc.info,
+                "documentation for '%s' is not followed by the definition"
+                % doc.symbol)
+
+    def _include(self, include, info, incl_fname, previously_included):
+        incl_abs_fname = os.path.abspath(incl_fname)
+        # catch inclusion cycle
+        inf = info
+        while inf:
+            if incl_abs_fname == os.path.abspath(inf.fname):
+                raise QAPISemError(info, "inclusion loop for %s" % include)
+            inf = inf.parent
+
+        # skip multiple include of the same file
+        if incl_abs_fname in previously_included:
+            return None
+
+        return QAPISchemaParser(incl_fname, previously_included, info)
+
+    def _pragma(self, name, value, info):
+        if name == 'doc-required':
+            if not isinstance(value, bool):
+                raise QAPISemError(info,
+                                   "pragma 'doc-required' must be boolean")
+            info.pragma.doc_required = value
+        elif name == 'returns-whitelist':
+            if (not isinstance(value, list)
+                    or any([not isinstance(elt, str) for elt in value])):
+                raise QAPISemError(
+                    info,
+                    "pragma returns-whitelist must be a list of strings")
+            info.pragma.returns_whitelist = value
+        elif name == 'name-case-whitelist':
+            if (not isinstance(value, list)
+                    or any([not isinstance(elt, str) for elt in value])):
+                raise QAPISemError(
+                    info,
+                    "pragma name-case-whitelist must be a list of strings")
+            info.pragma.name_case_whitelist = value
+        else:
+            raise QAPISemError(info, "unknown pragma '%s'" % name)
+
+    def accept(self, skip_comment=True):
+        while True:
+            self.tok = self.src[self.cursor]
+            self.pos = self.cursor
+            self.cursor += 1
+            self.val = None
+
+            if self.tok == '#':
+                if self.src[self.cursor] == '#':
+                    # Start of doc comment
+                    skip_comment = False
+                self.cursor = self.src.find('\n', self.cursor)
+                if not skip_comment:
+                    self.val = self.src[self.pos:self.cursor]
+                    return
+            elif self.tok in '{}:,[]':
+                return
+            elif self.tok == "'":
+                # Note: we accept only printable ASCII
+                string = ''
+                esc = False
+                while True:
+                    ch = self.src[self.cursor]
+                    self.cursor += 1
+                    if ch == '\n':
+                        raise QAPIParseError(self, "missing terminating \"'\"")
+                    if esc:
+                        # Note: we recognize only \\ because we have
+                        # no use for funny characters in strings
+                        if ch != '\\':
+                            raise QAPIParseError(self,
+                                                 "unknown escape \\%s" % ch)
+                        esc = False
+                    elif ch == '\\':
+                        esc = True
+                        continue
+                    elif ch == "'":
+                        self.val = string
+                        return
+                    if ord(ch) < 32 or ord(ch) >= 127:
+                        raise QAPIParseError(
+                            self, "funny character in string")
+                    string += ch
+            elif self.src.startswith('true', self.pos):
+                self.val = True
+                self.cursor += 3
+                return
+            elif self.src.startswith('false', self.pos):
+                self.val = False
+                self.cursor += 4
+                return
+            elif self.tok == '\n':
+                if self.cursor == len(self.src):
+                    self.tok = None
+                    return
+                self.info = self.info.next_line()
+                self.line_pos = self.cursor
+            elif not self.tok.isspace():
+                # Show up to next structural, whitespace or quote
+                # character
+                match = re.match('[^[\\]{}:,\\s\'"]+',
+                                 self.src[self.cursor-1:])
+                raise QAPIParseError(self, "stray '%s'" % match.group(0))
+
+    def get_members(self):
+        expr = OrderedDict()
+        if self.tok == '}':
+            self.accept()
+            return expr
+        if self.tok != "'":
+            raise QAPIParseError(self, "expected string or '}'")
+        while True:
+            key = self.val
+            self.accept()
+            if self.tok != ':':
+                raise QAPIParseError(self, "expected ':'")
+            self.accept()
+            if key in expr:
+                raise QAPIParseError(self, "duplicate key '%s'" % key)
+            expr[key] = self.get_expr(True)
+            if self.tok == '}':
+                self.accept()
+                return expr
+            if self.tok != ',':
+                raise QAPIParseError(self, "expected ',' or '}'")
+            self.accept()
+            if self.tok != "'":
+                raise QAPIParseError(self, "expected string")
+
+    def get_values(self):
+        expr = []
+        if self.tok == ']':
+            self.accept()
+            return expr
+        if self.tok not in "{['tfn":
+            raise QAPIParseError(
+                self, "expected '{', '[', ']', string, boolean or 'null'")
+        while True:
+            expr.append(self.get_expr(True))
+            if self.tok == ']':
+                self.accept()
+                return expr
+            if self.tok != ',':
+                raise QAPIParseError(self, "expected ',' or ']'")
+            self.accept()
+
+    def get_expr(self, nested):
+        if self.tok != '{' and not nested:
+            raise QAPIParseError(self, "expected '{'")
+        if self.tok == '{':
+            self.accept()
+            expr = self.get_members()
+        elif self.tok == '[':
+            self.accept()
+            expr = self.get_values()
+        elif self.tok in "'tfn":
+            expr = self.val
+            self.accept()
+        else:
+            raise QAPIParseError(
+                self, "expected '{', '[', string, boolean or 'null'")
+        return expr
+
+    def get_doc(self, info):
+        if self.val != '##':
+            raise QAPIParseError(
+                self, "junk after '##' at start of documentation comment")
+
+        docs = []
+        cur_doc = QAPIDoc(self, info)
+        self.accept(False)
+        while self.tok == '#':
+            if self.val.startswith('##'):
+                # End of doc comment
+                if self.val != '##':
+                    raise QAPIParseError(
+                        self,
+                        "junk after '##' at end of documentation comment")
+                cur_doc.end_comment()
+                docs.append(cur_doc)
+                self.accept()
+                return docs
+            if self.val.startswith('# ='):
+                if cur_doc.symbol:
+                    raise QAPIParseError(
+                        self,
+                        "unexpected '=' markup in definition documentation")
+                if cur_doc.body.text:
+                    cur_doc.end_comment()
+                    docs.append(cur_doc)
+                    cur_doc = QAPIDoc(self, info)
+            cur_doc.append(self.val)
+            self.accept(False)
+
+        raise QAPIParseError(self, "documentation comment must end with '##'")
+
+
+class QAPIDoc:
+    """
+    A documentation comment block, either definition or free-form
+
+    Definition documentation blocks consist of
+
+    * a body section: one line naming the definition, followed by an
+      overview (any number of lines)
+
+    * argument sections: a description of each argument (for commands
+      and events) or member (for structs, unions and alternates)
+
+    * features sections: a description of each feature flag
+
+    * additional (non-argument) sections, possibly tagged
+
+    Free-form documentation blocks consist only of a body section.
+    """
+
+    class Section:
+        def __init__(self, parser, name=None, indent=0):
+            # parser, for error messages about indentation
+            self._parser = parser
+            # optional section name (argument/member or section name)
+            self.name = name
+            self.text = ''
+            # the expected indent level of the text of this section
+            self._indent = indent
+
+        def append(self, line):
+            # Strip leading spaces corresponding to the expected indent level
+            # Blank lines are always OK.
+            if line:
+                indent = re.match(r'\s*', line).end()
+                if indent < self._indent:
+                    raise QAPIParseError(
+                        self._parser,
+                        "unexpected de-indent (expected at least %d spaces)" %
+                        self._indent)
+                line = line[self._indent:]
+
+            self.text += line.rstrip() + '\n'
+
+    class ArgSection(Section):
+        def __init__(self, parser, name, indent=0):
+            super().__init__(parser, name, indent)
+            self.member = None
+
+        def connect(self, member):
+            self.member = member
+
+    def __init__(self, parser, info):
+        # self._parser is used to report errors with QAPIParseError.  The
+        # resulting error position depends on the state of the parser.
+        # It happens to be the beginning of the comment.  More or less
+        # servicable, but action at a distance.
+        self._parser = parser
+        self.info = info
+        self.symbol = None
+        self.body = QAPIDoc.Section(parser)
+        # dict mapping parameter name to ArgSection
+        self.args = OrderedDict()
+        self.features = OrderedDict()
+        # a list of Section
+        self.sections = []
+        # the current section
+        self._section = self.body
+        self._append_line = self._append_body_line
+
+    def has_section(self, name):
+        """Return True if we have a section with this name."""
+        for i in self.sections:
+            if i.name == name:
+                return True
+        return False
+
+    def append(self, line):
+        """
+        Parse a comment line and add it to the documentation.
+
+        The way that the line is dealt with depends on which part of
+        the documentation we're parsing right now:
+        * The body section: ._append_line is ._append_body_line
+        * An argument section: ._append_line is ._append_args_line
+        * A features section: ._append_line is ._append_features_line
+        * An additional section: ._append_line is ._append_various_line
+        """
+        line = line[1:]
+        if not line:
+            self._append_freeform(line)
+            return
+
+        if line[0] != ' ':
+            raise QAPIParseError(self._parser, "missing space after #")
+        line = line[1:]
+        self._append_line(line)
+
+    def end_comment(self):
+        self._end_section()
+
+    @staticmethod
+    def _is_section_tag(name):
+        return name in ('Returns:', 'Since:',
+                        # those are often singular or plural
+                        'Note:', 'Notes:',
+                        'Example:', 'Examples:',
+                        'TODO:')
+
+    def _append_body_line(self, line):
+        """
+        Process a line of documentation text in the body section.
+
+        If this a symbol line and it is the section's first line, this
+        is a definition documentation block for that symbol.
+
+        If it's a definition documentation block, another symbol line
+        begins the argument section for the argument named by it, and
+        a section tag begins an additional section.  Start that
+        section and append the line to it.
+
+        Else, append the line to the current section.
+        """
+        name = line.split(' ', 1)[0]
+        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
+        # recognized, and get silently treated as ordinary text
+        if not self.symbol and not self.body.text and line.startswith('@'):
+            if not line.endswith(':'):
+                raise QAPIParseError(self._parser, "line should end with ':'")
+            self.symbol = line[1:-1]
+            # FIXME invalid names other than the empty string aren't flagged
+            if not self.symbol:
+                raise QAPIParseError(self._parser, "invalid name")
+        elif self.symbol:
+            # This is a definition documentation block
+            if name.startswith('@') and name.endswith(':'):
+                self._append_line = self._append_args_line
+                self._append_args_line(line)
+            elif line == 'Features:':
+                self._append_line = self._append_features_line
+            elif self._is_section_tag(name):
+                self._append_line = self._append_various_line
+                self._append_various_line(line)
+            else:
+                self._append_freeform(line)
+        else:
+            # This is a free-form documentation block
+            self._append_freeform(line)
+
+    def _append_args_line(self, line):
+        """
+        Process a line of documentation text in an argument section.
+
+        A symbol line begins the next argument section, a section tag
+        section or a non-indented line after a blank line begins an
+        additional section.  Start that section and append the line to
+        it.
+
+        Else, append the line to the current section.
+
+        """
+        name = line.split(' ', 1)[0]
+
+        if name.startswith('@') and name.endswith(':'):
+            # If line is "@arg:   first line of description", find
+            # the index of 'f', which is the indent we expect for any
+            # following lines.  We then remove the leading "@arg:"
+            # from line and replace it with spaces so that 'f' has the
+            # same index as it did in the original line and can be
+            # handled the same way we will handle following lines.
+            indent = re.match(r'@\S*:\s*', line).end()
+            line = line[indent:]
+            if not line:
+                # Line was just the "@arg:" header; following lines
+                # are not indented
+                indent = 0
+            else:
+                line = ' ' * indent + line
+            self._start_args_section(name[1:-1], indent)
+        elif self._is_section_tag(name):
+            self._append_line = self._append_various_line
+            self._append_various_line(line)
+            return
+        elif (self._section.text.endswith('\n\n')
+              and line and not line[0].isspace()):
+            if line == 'Features:':
+                self._append_line = self._append_features_line
+            else:
+                self._start_section()
+                self._append_line = self._append_various_line
+                self._append_various_line(line)
+            return
+
+        self._append_freeform(line)
+
+    def _append_features_line(self, line):
+        name = line.split(' ', 1)[0]
+
+        if name.startswith('@') and name.endswith(':'):
+            # If line is "@arg:   first line of description", find
+            # the index of 'f', which is the indent we expect for any
+            # following lines.  We then remove the leading "@arg:"
+            # from line and replace it with spaces so that 'f' has the
+            # same index as it did in the original line and can be
+            # handled the same way we will handle following lines.
+            indent = re.match(r'@\S*:\s*', line).end()
+            line = line[indent:]
+            if not line:
+                # Line was just the "@arg:" header; following lines
+                # are not indented
+                indent = 0
+            else:
+                line = ' ' * indent + line
+            self._start_features_section(name[1:-1], indent)
+        elif self._is_section_tag(name):
+            self._append_line = self._append_various_line
+            self._append_various_line(line)
+            return
+        elif (self._section.text.endswith('\n\n')
+              and line and not line[0].isspace()):
+            self._start_section()
+            self._append_line = self._append_various_line
+            self._append_various_line(line)
+            return
+
+        self._append_freeform(line)
+
+    def _append_various_line(self, line):
+        """
+        Process a line of documentation text in an additional section.
+
+        A symbol line is an error.
+
+        A section tag begins an additional section.  Start that
+        section and append the line to it.
+
+        Else, append the line to the current section.
+        """
+        name = line.split(' ', 1)[0]
+
+        if name.startswith('@') and name.endswith(':'):
+            raise QAPIParseError(self._parser,
+                                 "'%s' can't follow '%s' section"
+                                 % (name, self.sections[0].name))
+        if self._is_section_tag(name):
+            # If line is "Section:   first line of description", find
+            # the index of 'f', which is the indent we expect for any
+            # following lines.  We then remove the leading "Section:"
+            # from line and replace it with spaces so that 'f' has the
+            # same index as it did in the original line and can be
+            # handled the same way we will handle following lines.
+            indent = re.match(r'\S*:\s*', line).end()
+            line = line[indent:]
+            if not line:
+                # Line was just the "Section:" header; following lines
+                # are not indented
+                indent = 0
+            else:
+                line = ' ' * indent + line
+            self._start_section(name[:-1], indent)
+
+        self._append_freeform(line)
+
+    def _start_symbol_section(self, symbols_dict, name, indent):
+        # FIXME invalid names other than the empty string aren't flagged
+        if not name:
+            raise QAPIParseError(self._parser, "invalid parameter name")
+        if name in symbols_dict:
+            raise QAPIParseError(self._parser,
+                                 "'%s' parameter name duplicated" % name)
+        assert not self.sections
+        self._end_section()
+        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
+        symbols_dict[name] = self._section
+
+    def _start_args_section(self, name, indent):
+        self._start_symbol_section(self.args, name, indent)
+
+    def _start_features_section(self, name, indent):
+        self._start_symbol_section(self.features, name, indent)
+
+    def _start_section(self, name=None, indent=0):
+        if name in ('Returns', 'Since') and self.has_section(name):
+            raise QAPIParseError(self._parser,
+                                 "duplicated '%s' section" % name)
+        self._end_section()
+        self._section = QAPIDoc.Section(self._parser, name, indent)
+        self.sections.append(self._section)
+
+    def _end_section(self):
+        if self._section:
+            text = self._section.text = self._section.text.strip()
+            if self._section.name and (not text or text.isspace()):
+                raise QAPIParseError(
+                    self._parser,
+                    "empty doc section '%s'" % self._section.name)
+            self._section = None
+
+    def _append_freeform(self, line):
+        match = re.match(r'(@\S+:)', line)
+        if match:
+            raise QAPIParseError(self._parser,
+                                 "'%s' not allowed in free-form documentation"
+                                 % match.group(1))
+        self._section.append(line)
+
+    def connect_member(self, member):
+        if member.name not in self.args:
+            # Undocumented TODO outlaw
+            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
+                                                        member.name)
+        self.args[member.name].connect(member)
+
+    def connect_feature(self, feature):
+        if feature.name not in self.features:
+            raise QAPISemError(feature.info,
+                               "feature '%s' lacks documentation"
+                               % feature.name)
+        self.features[feature.name].connect(feature)
+
+    def check_expr(self, expr):
+        if self.has_section('Returns') and 'command' not in expr:
+            raise QAPISemError(self.info,
+                               "'Returns:' is only valid for commands")
+
+    def check(self):
+
+        def check_args_section(args, info, what):
+            bogus = [name for name, section in args.items()
+                     if not section.member]
+            if bogus:
+                raise QAPISemError(
+                    self.info,
+                    "documented member%s '%s' %s not exist"
+                    % ("s" if len(bogus) > 1 else "",
+                       "', '".join(bogus),
+                       "do" if len(bogus) > 1 else "does"))
+
+        check_args_section(self.args, self.info, 'members')
+        check_args_section(self.features, self.info, 'features')
diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
new file mode 100644
index 000000000..b9e077a16
--- /dev/null
+++ b/scripts/qapi/pylintrc
@@ -0,0 +1,70 @@
+[MASTER]
+
+# Add files or directories matching the regex patterns to the ignore list.
+# The regex matches against base names, not paths.
+ignore-patterns=error.py,
+                expr.py,
+                parser.py,
+                schema.py,
+
+
+[MESSAGES CONTROL]
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=fixme,
+        missing-docstring,
+        too-many-arguments,
+        too-many-branches,
+        too-many-statements,
+        too-many-instance-attributes,
+
+[REPORTS]
+
+[REFACTORING]
+
+[MISCELLANEOUS]
+
+[LOGGING]
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _,
+           fp,  # fp = open(...)
+           fd,  # fd = os.open(...)
+
+[VARIABLES]
+
+[STRING]
+
+[SPELLING]
+
+[FORMAT]
+
+[SIMILARITIES]
+
+# Ignore import statements themselves when computing similarities.
+ignore-imports=yes
+
+[TYPECHECK]
+
+[CLASSES]
+
+[IMPORTS]
+
+[DESIGN]
+
+[EXCEPTIONS]
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
new file mode 100644
index 000000000..720449fee
--- /dev/null
+++ b/scripts/qapi/schema.py
@@ -0,0 +1,1146 @@
+# -*- coding: utf-8 -*-
+#
+# QAPI schema internal representation
+#
+# Copyright (c) 2015-2019 Red Hat Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#  Eric Blake 
+#  Marc-André Lureau 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+# TODO catching name collisions in generated code would be nice
+
+from collections import OrderedDict
+import os
+import re
+from typing import Optional
+
+from .common import POINTER_SUFFIX, c_name
+from .error import QAPIError, QAPISemError
+from .expr import check_exprs
+from .parser import QAPISchemaParser
+
+
+class QAPISchemaEntity:
+    meta: Optional[str] = None
+
+    def __init__(self, name, info, doc, ifcond=None, features=None):
+        assert name is None or isinstance(name, str)
+        for f in features or []:
+            assert isinstance(f, QAPISchemaFeature)
+            f.set_defined_in(name)
+        self.name = name
+        self._module = None
+        # For explicitly defined entities, info points to the (explicit)
+        # definition.  For builtins (and their arrays), info is None.
+        # For implicitly defined entities, info points to a place that
+        # triggered the implicit definition (there may be more than one
+        # such place).
+        self.info = info
+        self.doc = doc
+        self._ifcond = ifcond or []
+        self.features = features or []
+        self._checked = False
+
+    def c_name(self):
+        return c_name(self.name)
+
+    def check(self, schema):
+        assert not self._checked
+        seen = {}
+        for f in self.features:
+            f.check_clash(self.info, seen)
+        self._checked = True
+
+    def connect_doc(self, doc=None):
+        doc = doc or self.doc
+        if doc:
+            for f in self.features:
+                doc.connect_feature(f)
+
+    def check_doc(self):
+        if self.doc:
+            self.doc.check()
+
+    def _set_module(self, schema, info):
+        assert self._checked
+        self._module = schema.module_by_fname(info and info.fname)
+        self._module.add_entity(self)
+
+    def set_module(self, schema):
+        self._set_module(schema, self.info)
+
+    @property
+    def ifcond(self):
+        assert self._checked
+        return self._ifcond
+
+    def is_implicit(self):
+        return not self.info
+
+    def visit(self, visitor):
+        assert self._checked
+
+    def describe(self):
+        assert self.meta
+        return "%s '%s'" % (self.meta, self.name)
+
+
+class QAPISchemaVisitor:
+    def visit_begin(self, schema):
+        pass
+
+    def visit_end(self):
+        pass
+
+    def visit_module(self, name):
+        pass
+
+    def visit_needed(self, entity):
+        # Default to visiting everything
+        return True
+
+    def visit_include(self, name, info):
+        pass
+
+    def visit_builtin_type(self, name, info, json_type):
+        pass
+
+    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
+        pass
+
+    def visit_array_type(self, name, info, ifcond, element_type):
+        pass
+
+    def visit_object_type(self, name, info, ifcond, features,
+                          base, members, variants):
+        pass
+
+    def visit_object_type_flat(self, name, info, ifcond, features,
+                               members, variants):
+        pass
+
+    def visit_alternate_type(self, name, info, ifcond, features, variants):
+        pass
+
+    def visit_command(self, name, info, ifcond, features,
+                      arg_type, ret_type, gen, success_response, boxed,
+                      allow_oob, allow_preconfig, coroutine):
+        pass
+
+    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+        pass
+
+
+class QAPISchemaModule:
+    def __init__(self, name):
+        self.name = name
+        self._entity_list = []
+
+    def add_entity(self, ent):
+        self._entity_list.append(ent)
+
+    def visit(self, visitor):
+        visitor.visit_module(self.name)
+        for entity in self._entity_list:
+            if visitor.visit_needed(entity):
+                entity.visit(visitor)
+
+
+class QAPISchemaInclude(QAPISchemaEntity):
+    def __init__(self, sub_module, info):
+        super().__init__(None, info, None)
+        self._sub_module = sub_module
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_include(self._sub_module.name, self.info)
+
+
+class QAPISchemaType(QAPISchemaEntity):
+    # Return the C type for common use.
+    # For the types we commonly box, this is a pointer type.
+    def c_type(self):
+        pass
+
+    # Return the C type to be used in a parameter list.
+    def c_param_type(self):
+        return self.c_type()
+
+    # Return the C type to be used where we suppress boxing.
+    def c_unboxed_type(self):
+        return self.c_type()
+
+    def json_type(self):
+        pass
+
+    def alternate_qtype(self):
+        json2qtype = {
+            'null':    'QTYPE_QNULL',
+            'string':  'QTYPE_QSTRING',
+            'number':  'QTYPE_QNUM',
+            'int':     'QTYPE_QNUM',
+            'boolean': 'QTYPE_QBOOL',
+            'object':  'QTYPE_QDICT'
+        }
+        return json2qtype.get(self.json_type())
+
+    def doc_type(self):
+        if self.is_implicit():
+            return None
+        return self.name
+
+    def check(self, schema):
+        QAPISchemaEntity.check(self, schema)
+        if 'deprecated' in [f.name for f in self.features]:
+            raise QAPISemError(
+                self.info, "feature 'deprecated' is not supported for types")
+
+    def describe(self):
+        assert self.meta
+        return "%s type '%s'" % (self.meta, self.name)
+
+
+class QAPISchemaBuiltinType(QAPISchemaType):
+    meta = 'built-in'
+
+    def __init__(self, name, json_type, c_type):
+        super().__init__(name, None, None)
+        assert not c_type or isinstance(c_type, str)
+        assert json_type in ('string', 'number', 'int', 'boolean', 'null',
+                             'value')
+        self._json_type_name = json_type
+        self._c_type_name = c_type
+
+    def c_name(self):
+        return self.name
+
+    def c_type(self):
+        return self._c_type_name
+
+    def c_param_type(self):
+        if self.name == 'str':
+            return 'const ' + self._c_type_name
+        return self._c_type_name
+
+    def json_type(self):
+        return self._json_type_name
+
+    def doc_type(self):
+        return self.json_type()
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_builtin_type(self.name, self.info, self.json_type())
+
+
+class QAPISchemaEnumType(QAPISchemaType):
+    meta = 'enum'
+
+    def __init__(self, name, info, doc, ifcond, features, members, prefix):
+        super().__init__(name, info, doc, ifcond, features)
+        for m in members:
+            assert isinstance(m, QAPISchemaEnumMember)
+            m.set_defined_in(name)
+        assert prefix is None or isinstance(prefix, str)
+        self.members = members
+        self.prefix = prefix
+
+    def check(self, schema):
+        super().check(schema)
+        seen = {}
+        for m in self.members:
+            m.check_clash(self.info, seen)
+
+    def connect_doc(self, doc=None):
+        super().connect_doc(doc)
+        doc = doc or self.doc
+        for m in self.members:
+            m.connect_doc(doc)
+
+    def is_implicit(self):
+        # See QAPISchema._make_implicit_enum_type() and ._def_predefineds()
+        return self.name.endswith('Kind') or self.name == 'QType'
+
+    def c_type(self):
+        return c_name(self.name)
+
+    def member_names(self):
+        return [m.name for m in self.members]
+
+    def json_type(self):
+        return 'string'
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_enum_type(
+            self.name, self.info, self.ifcond, self.features,
+            self.members, self.prefix)
+
+
+class QAPISchemaArrayType(QAPISchemaType):
+    meta = 'array'
+
+    def __init__(self, name, info, element_type):
+        super().__init__(name, info, None)
+        assert isinstance(element_type, str)
+        self._element_type_name = element_type
+        self.element_type = None
+
+    def check(self, schema):
+        super().check(schema)
+        self.element_type = schema.resolve_type(
+            self._element_type_name, self.info,
+            self.info and self.info.defn_meta)
+        assert not isinstance(self.element_type, QAPISchemaArrayType)
+
+    def set_module(self, schema):
+        self._set_module(schema, self.element_type.info)
+
+    @property
+    def ifcond(self):
+        assert self._checked
+        return self.element_type.ifcond
+
+    def is_implicit(self):
+        return True
+
+    def c_type(self):
+        return c_name(self.name) + POINTER_SUFFIX
+
+    def json_type(self):
+        return 'array'
+
+    def doc_type(self):
+        elt_doc_type = self.element_type.doc_type()
+        if not elt_doc_type:
+            return None
+        return 'array of ' + elt_doc_type
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_array_type(self.name, self.info, self.ifcond,
+                                 self.element_type)
+
+    def describe(self):
+        assert self.meta
+        return "%s type ['%s']" % (self.meta, self._element_type_name)
+
+
+class QAPISchemaObjectType(QAPISchemaType):
+    def __init__(self, name, info, doc, ifcond, features,
+                 base, local_members, variants):
+        # struct has local_members, optional base, and no variants
+        # flat union has base, variants, and no local_members
+        # simple union has local_members, variants, and no base
+        super().__init__(name, info, doc, ifcond, features)
+        self.meta = 'union' if variants else 'struct'
+        assert base is None or isinstance(base, str)
+        for m in local_members:
+            assert isinstance(m, QAPISchemaObjectTypeMember)
+            m.set_defined_in(name)
+        if variants is not None:
+            assert isinstance(variants, QAPISchemaVariants)
+            variants.set_defined_in(name)
+        self._base_name = base
+        self.base = None
+        self.local_members = local_members
+        self.variants = variants
+        self.members = None
+
+    def check(self, schema):
+        # This calls another type T's .check() exactly when the C
+        # struct emitted by gen_object() contains that T's C struct
+        # (pointers don't count).
+        if self.members is not None:
+            # A previous .check() completed: nothing to do
+            return
+        if self._checked:
+            # Recursed: C struct contains itself
+            raise QAPISemError(self.info,
+                               "object %s contains itself" % self.name)
+
+        super().check(schema)
+        assert self._checked and self.members is None
+
+        seen = OrderedDict()
+        if self._base_name:
+            self.base = schema.resolve_type(self._base_name, self.info,
+                                            "'base'")
+            if (not isinstance(self.base, QAPISchemaObjectType)
+                    or self.base.variants):
+                raise QAPISemError(
+                    self.info,
+                    "'base' requires a struct type, %s isn't"
+                    % self.base.describe())
+            self.base.check(schema)
+            self.base.check_clash(self.info, seen)
+        for m in self.local_members:
+            m.check(schema)
+            m.check_clash(self.info, seen)
+        members = seen.values()
+
+        if self.variants:
+            self.variants.check(schema, seen)
+            self.variants.check_clash(self.info, seen)
+
+        self.members = members  # mark completed
+
+    # Check that the members of this type do not cause duplicate JSON members,
+    # and update seen to track the members seen so far. Report any errors
+    # on behalf of info, which is not necessarily self.info
+    def check_clash(self, info, seen):
+        assert self._checked
+        assert not self.variants       # not implemented
+        for m in self.members:
+            m.check_clash(info, seen)
+
+    def connect_doc(self, doc=None):
+        super().connect_doc(doc)
+        doc = doc or self.doc
+        if self.base and self.base.is_implicit():
+            self.base.connect_doc(doc)
+        for m in self.local_members:
+            m.connect_doc(doc)
+
+    @property
+    def ifcond(self):
+        assert self._checked
+        if isinstance(self._ifcond, QAPISchemaType):
+            # Simple union wrapper type inherits from wrapped type;
+            # see _make_implicit_object_type()
+            return self._ifcond.ifcond
+        return self._ifcond
+
+    def is_implicit(self):
+        # See QAPISchema._make_implicit_object_type(), as well as
+        # _def_predefineds()
+        return self.name.startswith('q_')
+
+    def is_empty(self):
+        assert self.members is not None
+        return not self.members and not self.variants
+
+    def c_name(self):
+        assert self.name != 'q_empty'
+        return super().c_name()
+
+    def c_type(self):
+        assert not self.is_implicit()
+        return c_name(self.name) + POINTER_SUFFIX
+
+    def c_unboxed_type(self):
+        return c_name(self.name)
+
+    def json_type(self):
+        return 'object'
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_object_type(
+            self.name, self.info, self.ifcond, self.features,
+            self.base, self.local_members, self.variants)
+        visitor.visit_object_type_flat(
+            self.name, self.info, self.ifcond, self.features,
+            self.members, self.variants)
+
+
+class QAPISchemaAlternateType(QAPISchemaType):
+    meta = 'alternate'
+
+    def __init__(self, name, info, doc, ifcond, features, variants):
+        super().__init__(name, info, doc, ifcond, features)
+        assert isinstance(variants, QAPISchemaVariants)
+        assert variants.tag_member
+        variants.set_defined_in(name)
+        variants.tag_member.set_defined_in(self.name)
+        self.variants = variants
+
+    def check(self, schema):
+        super().check(schema)
+        self.variants.tag_member.check(schema)
+        # Not calling self.variants.check_clash(), because there's nothing
+        # to clash with
+        self.variants.check(schema, {})
+        # Alternate branch names have no relation to the tag enum values;
+        # so we have to check for potential name collisions ourselves.
+        seen = {}
+        types_seen = {}
+        for v in self.variants.variants:
+            v.check_clash(self.info, seen)
+            qtype = v.type.alternate_qtype()
+            if not qtype:
+                raise QAPISemError(
+                    self.info,
+                    "%s cannot use %s"
+                    % (v.describe(self.info), v.type.describe()))
+            conflicting = set([qtype])
+            if qtype == 'QTYPE_QSTRING':
+                if isinstance(v.type, QAPISchemaEnumType):
+                    for m in v.type.members:
+                        if m.name in ['on', 'off']:
+                            conflicting.add('QTYPE_QBOOL')
+                        if re.match(r'[-+0-9.]', m.name):
+                            # lazy, could be tightened
+                            conflicting.add('QTYPE_QNUM')
+                else:
+                    conflicting.add('QTYPE_QNUM')
+                    conflicting.add('QTYPE_QBOOL')
+            for qt in conflicting:
+                if qt in types_seen:
+                    raise QAPISemError(
+                        self.info,
+                        "%s can't be distinguished from '%s'"
+                        % (v.describe(self.info), types_seen[qt]))
+                types_seen[qt] = v.name
+
+    def connect_doc(self, doc=None):
+        super().connect_doc(doc)
+        doc = doc or self.doc
+        for v in self.variants.variants:
+            v.connect_doc(doc)
+
+    def c_type(self):
+        return c_name(self.name) + POINTER_SUFFIX
+
+    def json_type(self):
+        return 'value'
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_alternate_type(
+            self.name, self.info, self.ifcond, self.features, self.variants)
+
+
+class QAPISchemaVariants:
+    def __init__(self, tag_name, info, tag_member, variants):
+        # Flat unions pass tag_name but not tag_member.
+        # Simple unions and alternates pass tag_member but not tag_name.
+        # After check(), tag_member is always set, and tag_name remains
+        # a reliable witness of being used by a flat union.
+        assert bool(tag_member) != bool(tag_name)
+        assert (isinstance(tag_name, str) or
+                isinstance(tag_member, QAPISchemaObjectTypeMember))
+        for v in variants:
+            assert isinstance(v, QAPISchemaVariant)
+        self._tag_name = tag_name
+        self.info = info
+        self.tag_member = tag_member
+        self.variants = variants
+
+    def set_defined_in(self, name):
+        for v in self.variants:
+            v.set_defined_in(name)
+
+    def check(self, schema, seen):
+        if not self.tag_member:  # flat union
+            self.tag_member = seen.get(c_name(self._tag_name))
+            base = "'base'"
+            # Pointing to the base type when not implicit would be
+            # nice, but we don't know it here
+            if not self.tag_member or self._tag_name != self.tag_member.name:
+                raise QAPISemError(
+                    self.info,
+                    "discriminator '%s' is not a member of %s"
+                    % (self._tag_name, base))
+            # Here we do:
+            base_type = schema.lookup_type(self.tag_member.defined_in)
+            assert base_type
+            if not base_type.is_implicit():
+                base = "base type '%s'" % self.tag_member.defined_in
+            if not isinstance(self.tag_member.type, QAPISchemaEnumType):
+                raise QAPISemError(
+                    self.info,
+                    "discriminator member '%s' of %s must be of enum type"
+                    % (self._tag_name, base))
+            if self.tag_member.optional:
+                raise QAPISemError(
+                    self.info,
+                    "discriminator member '%s' of %s must not be optional"
+                    % (self._tag_name, base))
+            if self.tag_member.ifcond:
+                raise QAPISemError(
+                    self.info,
+                    "discriminator member '%s' of %s must not be conditional"
+                    % (self._tag_name, base))
+        else:                   # simple union
+            assert isinstance(self.tag_member.type, QAPISchemaEnumType)
+            assert not self.tag_member.optional
+            assert self.tag_member.ifcond == []
+        if self._tag_name:    # flat union
+            # branches that are not explicitly covered get an empty type
+            cases = {v.name for v in self.variants}
+            for m in self.tag_member.type.members:
+                if m.name not in cases:
+                    v = QAPISchemaVariant(m.name, self.info,
+                                          'q_empty', m.ifcond)
+                    v.set_defined_in(self.tag_member.defined_in)
+                    self.variants.append(v)
+        if not self.variants:
+            raise QAPISemError(self.info, "union has no branches")
+        for v in self.variants:
+            v.check(schema)
+            # Union names must match enum values; alternate names are
+            # checked separately. Use 'seen' to tell the two apart.
+            if seen:
+                if v.name not in self.tag_member.type.member_names():
+                    raise QAPISemError(
+                        self.info,
+                        "branch '%s' is not a value of %s"
+                        % (v.name, self.tag_member.type.describe()))
+                if (not isinstance(v.type, QAPISchemaObjectType)
+                        or v.type.variants):
+                    raise QAPISemError(
+                        self.info,
+                        "%s cannot use %s"
+                        % (v.describe(self.info), v.type.describe()))
+                v.type.check(schema)
+
+    def check_clash(self, info, seen):
+        for v in self.variants:
+            # Reset seen map for each variant, since qapi names from one
+            # branch do not affect another branch
+            v.type.check_clash(info, dict(seen))
+
+
+class QAPISchemaMember:
+    """ Represents object members, enum members and features """
+    role = 'member'
+
+    def __init__(self, name, info, ifcond=None):
+        assert isinstance(name, str)
+        self.name = name
+        self.info = info
+        self.ifcond = ifcond or []
+        self.defined_in = None
+
+    def set_defined_in(self, name):
+        assert not self.defined_in
+        self.defined_in = name
+
+    def check_clash(self, info, seen):
+        cname = c_name(self.name)
+        if cname in seen:
+            raise QAPISemError(
+                info,
+                "%s collides with %s"
+                % (self.describe(info), seen[cname].describe(info)))
+        seen[cname] = self
+
+    def connect_doc(self, doc):
+        if doc:
+            doc.connect_member(self)
+
+    def describe(self, info):
+        role = self.role
+        defined_in = self.defined_in
+        assert defined_in
+
+        if defined_in.startswith('q_obj_'):
+            # See QAPISchema._make_implicit_object_type() - reverse the
+            # mapping there to create a nice human-readable description
+            defined_in = defined_in[6:]
+            if defined_in.endswith('-arg'):
+                # Implicit type created for a command's dict 'data'
+                assert role == 'member'
+                role = 'parameter'
+            elif defined_in.endswith('-base'):
+                # Implicit type created for a flat union's dict 'base'
+                role = 'base ' + role
+            else:
+                # Implicit type created for a simple union's branch
+                assert defined_in.endswith('-wrapper')
+                # Unreachable and not implemented
+                assert False
+        elif defined_in.endswith('Kind'):
+            # See QAPISchema._make_implicit_enum_type()
+            # Implicit enum created for simple union's branches
+            assert role == 'value'
+            role = 'branch'
+        elif defined_in != info.defn_name:
+            return "%s '%s' of type '%s'" % (role, self.name, defined_in)
+        return "%s '%s'" % (role, self.name)
+
+
+class QAPISchemaEnumMember(QAPISchemaMember):
+    role = 'value'
+
+
+class QAPISchemaFeature(QAPISchemaMember):
+    role = 'feature'
+
+
+class QAPISchemaObjectTypeMember(QAPISchemaMember):
+    def __init__(self, name, info, typ, optional, ifcond=None, features=None):
+        super().__init__(name, info, ifcond)
+        assert isinstance(typ, str)
+        assert isinstance(optional, bool)
+        for f in features or []:
+            assert isinstance(f, QAPISchemaFeature)
+            f.set_defined_in(name)
+        self._type_name = typ
+        self.type = None
+        self.optional = optional
+        self.features = features or []
+
+    def check(self, schema):
+        assert self.defined_in
+        self.type = schema.resolve_type(self._type_name, self.info,
+                                        self.describe)
+        seen = {}
+        for f in self.features:
+            f.check_clash(self.info, seen)
+
+    def connect_doc(self, doc):
+        super().connect_doc(doc)
+        if doc:
+            for f in self.features:
+                doc.connect_feature(f)
+
+
+class QAPISchemaVariant(QAPISchemaObjectTypeMember):
+    role = 'branch'
+
+    def __init__(self, name, info, typ, ifcond=None):
+        super().__init__(name, info, typ, False, ifcond)
+
+
+class QAPISchemaCommand(QAPISchemaEntity):
+    meta = 'command'
+
+    def __init__(self, name, info, doc, ifcond, features,
+                 arg_type, ret_type,
+                 gen, success_response, boxed, allow_oob, allow_preconfig,
+                 coroutine):
+        super().__init__(name, info, doc, ifcond, features)
+        assert not arg_type or isinstance(arg_type, str)
+        assert not ret_type or isinstance(ret_type, str)
+        self._arg_type_name = arg_type
+        self.arg_type = None
+        self._ret_type_name = ret_type
+        self.ret_type = None
+        self.gen = gen
+        self.success_response = success_response
+        self.boxed = boxed
+        self.allow_oob = allow_oob
+        self.allow_preconfig = allow_preconfig
+        self.coroutine = coroutine
+
+    def check(self, schema):
+        super().check(schema)
+        if self._arg_type_name:
+            self.arg_type = schema.resolve_type(
+                self._arg_type_name, self.info, "command's 'data'")
+            if not isinstance(self.arg_type, QAPISchemaObjectType):
+                raise QAPISemError(
+                    self.info,
+                    "command's 'data' cannot take %s"
+                    % self.arg_type.describe())
+            if self.arg_type.variants and not self.boxed:
+                raise QAPISemError(
+                    self.info,
+                    "command's 'data' can take %s only with 'boxed': true"
+                    % self.arg_type.describe())
+        if self._ret_type_name:
+            self.ret_type = schema.resolve_type(
+                self._ret_type_name, self.info, "command's 'returns'")
+            if self.name not in self.info.pragma.returns_whitelist:
+                typ = self.ret_type
+                if isinstance(typ, QAPISchemaArrayType):
+                    typ = self.ret_type.element_type
+                    assert typ
+                if not isinstance(typ, QAPISchemaObjectType):
+                    raise QAPISemError(
+                        self.info,
+                        "command's 'returns' cannot take %s"
+                        % self.ret_type.describe())
+
+    def connect_doc(self, doc=None):
+        super().connect_doc(doc)
+        doc = doc or self.doc
+        if doc:
+            if self.arg_type and self.arg_type.is_implicit():
+                self.arg_type.connect_doc(doc)
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_command(
+            self.name, self.info, self.ifcond, self.features,
+            self.arg_type, self.ret_type, self.gen, self.success_response,
+            self.boxed, self.allow_oob, self.allow_preconfig,
+            self.coroutine)
+
+
+class QAPISchemaEvent(QAPISchemaEntity):
+    meta = 'event'
+
+    def __init__(self, name, info, doc, ifcond, features, arg_type, boxed):
+        super().__init__(name, info, doc, ifcond, features)
+        assert not arg_type or isinstance(arg_type, str)
+        self._arg_type_name = arg_type
+        self.arg_type = None
+        self.boxed = boxed
+
+    def check(self, schema):
+        super().check(schema)
+        if self._arg_type_name:
+            self.arg_type = schema.resolve_type(
+                self._arg_type_name, self.info, "event's 'data'")
+            if not isinstance(self.arg_type, QAPISchemaObjectType):
+                raise QAPISemError(
+                    self.info,
+                    "event's 'data' cannot take %s"
+                    % self.arg_type.describe())
+            if self.arg_type.variants and not self.boxed:
+                raise QAPISemError(
+                    self.info,
+                    "event's 'data' can take %s only with 'boxed': true"
+                    % self.arg_type.describe())
+
+    def connect_doc(self, doc=None):
+        super().connect_doc(doc)
+        doc = doc or self.doc
+        if doc:
+            if self.arg_type and self.arg_type.is_implicit():
+                self.arg_type.connect_doc(doc)
+
+    def visit(self, visitor):
+        super().visit(visitor)
+        visitor.visit_event(
+            self.name, self.info, self.ifcond, self.features,
+            self.arg_type, self.boxed)
+
+
+class QAPISchema:
+    def __init__(self, fname):
+        self.fname = fname
+        parser = QAPISchemaParser(fname)
+        exprs = check_exprs(parser.exprs)
+        self.docs = parser.docs
+        self._entity_list = []
+        self._entity_dict = {}
+        self._module_dict = OrderedDict()
+        self._schema_dir = os.path.dirname(fname)
+        self._make_module(None)  # built-ins
+        self._make_module(fname)
+        self._predefining = True
+        self._def_predefineds()
+        self._predefining = False
+        self._def_exprs(exprs)
+        self.check()
+
+    def _def_entity(self, ent):
+        # Only the predefined types are allowed to not have info
+        assert ent.info or self._predefining
+        self._entity_list.append(ent)
+        if ent.name is None:
+            return
+        # TODO reject names that differ only in '_' vs. '.'  vs. '-',
+        # because they're liable to clash in generated C.
+        other_ent = self._entity_dict.get(ent.name)
+        if other_ent:
+            if other_ent.info:
+                where = QAPIError(other_ent.info, None, "previous definition")
+                raise QAPISemError(
+                    ent.info,
+                    "'%s' is already defined\n%s" % (ent.name, where))
+            raise QAPISemError(
+                ent.info, "%s is already defined" % other_ent.describe())
+        self._entity_dict[ent.name] = ent
+
+    def lookup_entity(self, name, typ=None):
+        ent = self._entity_dict.get(name)
+        if typ and not isinstance(ent, typ):
+            return None
+        return ent
+
+    def lookup_type(self, name):
+        return self.lookup_entity(name, QAPISchemaType)
+
+    def resolve_type(self, name, info, what):
+        typ = self.lookup_type(name)
+        if not typ:
+            if callable(what):
+                what = what(info)
+            raise QAPISemError(
+                info, "%s uses unknown type '%s'" % (what, name))
+        return typ
+
+    def _module_name(self, fname):
+        if fname is None:
+            return None
+        return os.path.relpath(fname, self._schema_dir)
+
+    def _make_module(self, fname):
+        name = self._module_name(fname)
+        if name not in self._module_dict:
+            self._module_dict[name] = QAPISchemaModule(name)
+        return self._module_dict[name]
+
+    def module_by_fname(self, fname):
+        name = self._module_name(fname)
+        assert name in self._module_dict
+        return self._module_dict[name]
+
+    def _def_include(self, expr, info, doc):
+        include = expr['include']
+        assert doc is None
+        self._def_entity(QAPISchemaInclude(self._make_module(include), info))
+
+    def _def_builtin_type(self, name, json_type, c_type):
+        self._def_entity(QAPISchemaBuiltinType(name, json_type, c_type))
+        # Instantiating only the arrays that are actually used would
+        # be nice, but we can't as long as their generated code
+        # (qapi-builtin-types.[ch]) may be shared by some other
+        # schema.
+        self._make_array_type(name, None)
+
+    def _def_predefineds(self):
+        for t in [('str',    'string',  'char' + POINTER_SUFFIX),
+                  ('number', 'number',  'double'),
+                  ('int',    'int',     'int64_t'),
+                  ('int8',   'int',     'int8_t'),
+                  ('int16',  'int',     'int16_t'),
+                  ('int32',  'int',     'int32_t'),
+                  ('int64',  'int',     'int64_t'),
+                  ('uint8',  'int',     'uint8_t'),
+                  ('uint16', 'int',     'uint16_t'),
+                  ('uint32', 'int',     'uint32_t'),
+                  ('uint64', 'int',     'uint64_t'),
+                  ('size',   'int',     'uint64_t'),
+                  ('bool',   'boolean', 'bool'),
+                  ('any',    'value',   'QObject' + POINTER_SUFFIX),
+                  ('null',   'null',    'QNull' + POINTER_SUFFIX)]:
+            self._def_builtin_type(*t)
+        self.the_empty_object_type = QAPISchemaObjectType(
+            'q_empty', None, None, None, None, None, [], None)
+        self._def_entity(self.the_empty_object_type)
+
+        qtypes = ['none', 'qnull', 'qnum', 'qstring', 'qdict', 'qlist',
+                  'qbool']
+        qtype_values = self._make_enum_members(
+            [{'name': n} for n in qtypes], None)
+
+        self._def_entity(QAPISchemaEnumType('QType', None, None, None, None,
+                                            qtype_values, 'QTYPE'))
+
+    def _make_features(self, features, info):
+        if features is None:
+            return []
+        return [QAPISchemaFeature(f['name'], info, f.get('if'))
+                for f in features]
+
+    def _make_enum_members(self, values, info):
+        return [QAPISchemaEnumMember(v['name'], info, v.get('if'))
+                for v in values]
+
+    def _make_implicit_enum_type(self, name, info, ifcond, values):
+        # See also QAPISchemaObjectTypeMember.describe()
+        name = name + 'Kind'    # reserved by check_defn_name_str()
+        self._def_entity(QAPISchemaEnumType(
+            name, info, None, ifcond, None,
+            self._make_enum_members(values, info),
+            None))
+        return name
+
+    def _make_array_type(self, element_type, info):
+        name = element_type + 'List'    # reserved by check_defn_name_str()
+        if not self.lookup_type(name):
+            self._def_entity(QAPISchemaArrayType(name, info, element_type))
+        return name
+
+    def _make_implicit_object_type(self, name, info, ifcond, role, members):
+        if not members:
+            return None
+        # See also QAPISchemaObjectTypeMember.describe()
+        name = 'q_obj_%s-%s' % (name, role)
+        typ = self.lookup_entity(name, QAPISchemaObjectType)
+        if typ:
+            # The implicit object type has multiple users.  This can
+            # happen only for simple unions' implicit wrapper types.
+            # Its ifcond should be the disjunction of its user's
+            # ifconds.  Not implemented.  Instead, we always pass the
+            # wrapped type's ifcond, which is trivially the same for all
+            # users.  It's also necessary for the wrapper to compile.
+            # But it's not tight: the disjunction need not imply it.  We
+            # may end up compiling useless wrapper types.
+            # TODO kill simple unions or implement the disjunction
+
+            # pylint: disable=protected-access
+            assert (ifcond or []) == typ._ifcond
+        else:
+            self._def_entity(QAPISchemaObjectType(
+                name, info, None, ifcond, None, None, members, None))
+        return name
+
+    def _def_enum_type(self, expr, info, doc):
+        name = expr['enum']
+        data = expr['data']
+        prefix = expr.get('prefix')
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        self._def_entity(QAPISchemaEnumType(
+            name, info, doc, ifcond, features,
+            self._make_enum_members(data, info), prefix))
+
+    def _make_member(self, name, typ, ifcond, features, info):
+        optional = False
+        if name.startswith('*'):
+            name = name[1:]
+            optional = True
+        if isinstance(typ, list):
+            assert len(typ) == 1
+            typ = self._make_array_type(typ[0], info)
+        return QAPISchemaObjectTypeMember(name, info, typ, optional, ifcond,
+                                          self._make_features(features, info))
+
+    def _make_members(self, data, info):
+        return [self._make_member(key, value['type'], value.get('if'),
+                                  value.get('features'), info)
+                for (key, value) in data.items()]
+
+    def _def_struct_type(self, expr, info, doc):
+        name = expr['struct']
+        base = expr.get('base')
+        data = expr['data']
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        self._def_entity(QAPISchemaObjectType(
+            name, info, doc, ifcond, features, base,
+            self._make_members(data, info),
+            None))
+
+    def _make_variant(self, case, typ, ifcond, info):
+        return QAPISchemaVariant(case, info, typ, ifcond)
+
+    def _make_simple_variant(self, case, typ, ifcond, info):
+        if isinstance(typ, list):
+            assert len(typ) == 1
+            typ = self._make_array_type(typ[0], info)
+        typ = self._make_implicit_object_type(
+            typ, info, self.lookup_type(typ),
+            'wrapper', [self._make_member('data', typ, None, None, info)])
+        return QAPISchemaVariant(case, info, typ, ifcond)
+
+    def _def_union_type(self, expr, info, doc):
+        name = expr['union']
+        data = expr['data']
+        base = expr.get('base')
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        tag_name = expr.get('discriminator')
+        tag_member = None
+        if isinstance(base, dict):
+            base = self._make_implicit_object_type(
+                name, info, ifcond,
+                'base', self._make_members(base, info))
+        if tag_name:
+            variants = [self._make_variant(key, value['type'],
+                                           value.get('if'), info)
+                        for (key, value) in data.items()]
+            members = []
+        else:
+            variants = [self._make_simple_variant(key, value['type'],
+                                                  value.get('if'), info)
+                        for (key, value) in data.items()]
+            enum = [{'name': v.name, 'if': v.ifcond} for v in variants]
+            typ = self._make_implicit_enum_type(name, info, ifcond, enum)
+            tag_member = QAPISchemaObjectTypeMember('type', info, typ, False)
+            members = [tag_member]
+        self._def_entity(
+            QAPISchemaObjectType(name, info, doc, ifcond, features,
+                                 base, members,
+                                 QAPISchemaVariants(
+                                     tag_name, info, tag_member, variants)))
+
+    def _def_alternate_type(self, expr, info, doc):
+        name = expr['alternate']
+        data = expr['data']
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        variants = [self._make_variant(key, value['type'], value.get('if'),
+                                       info)
+                    for (key, value) in data.items()]
+        tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False)
+        self._def_entity(
+            QAPISchemaAlternateType(name, info, doc, ifcond, features,
+                                    QAPISchemaVariants(
+                                        None, info, tag_member, variants)))
+
+    def _def_command(self, expr, info, doc):
+        name = expr['command']
+        data = expr.get('data')
+        rets = expr.get('returns')
+        gen = expr.get('gen', True)
+        success_response = expr.get('success-response', True)
+        boxed = expr.get('boxed', False)
+        allow_oob = expr.get('allow-oob', False)
+        allow_preconfig = expr.get('allow-preconfig', False)
+        coroutine = expr.get('coroutine', False)
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        if isinstance(data, OrderedDict):
+            data = self._make_implicit_object_type(
+                name, info, ifcond,
+                'arg', self._make_members(data, info))
+        if isinstance(rets, list):
+            assert len(rets) == 1
+            rets = self._make_array_type(rets[0], info)
+        self._def_entity(QAPISchemaCommand(name, info, doc, ifcond, features,
+                                           data, rets,
+                                           gen, success_response,
+                                           boxed, allow_oob, allow_preconfig,
+                                           coroutine))
+
+    def _def_event(self, expr, info, doc):
+        name = expr['event']
+        data = expr.get('data')
+        boxed = expr.get('boxed', False)
+        ifcond = expr.get('if')
+        features = self._make_features(expr.get('features'), info)
+        if isinstance(data, OrderedDict):
+            data = self._make_implicit_object_type(
+                name, info, ifcond,
+                'arg', self._make_members(data, info))
+        self._def_entity(QAPISchemaEvent(name, info, doc, ifcond, features,
+                                         data, boxed))
+
+    def _def_exprs(self, exprs):
+        for expr_elem in exprs:
+            expr = expr_elem['expr']
+            info = expr_elem['info']
+            doc = expr_elem.get('doc')
+            if 'enum' in expr:
+                self._def_enum_type(expr, info, doc)
+            elif 'struct' in expr:
+                self._def_struct_type(expr, info, doc)
+            elif 'union' in expr:
+                self._def_union_type(expr, info, doc)
+            elif 'alternate' in expr:
+                self._def_alternate_type(expr, info, doc)
+            elif 'command' in expr:
+                self._def_command(expr, info, doc)
+            elif 'event' in expr:
+                self._def_event(expr, info, doc)
+            elif 'include' in expr:
+                self._def_include(expr, info, doc)
+            else:
+                assert False
+
+    def check(self):
+        for ent in self._entity_list:
+            ent.check(self)
+            ent.connect_doc()
+            ent.check_doc()
+        for ent in self._entity_list:
+            ent.set_module(self)
+
+    def visit(self, visitor):
+        visitor.visit_begin(self)
+        for mod in self._module_dict.values():
+            mod.visit(visitor)
+        visitor.visit_end()
diff --git a/scripts/qapi/source.py b/scripts/qapi/source.py
new file mode 100644
index 000000000..d7a79a9b8
--- /dev/null
+++ b/scripts/qapi/source.py
@@ -0,0 +1,76 @@
+#
+# QAPI frontend source file info
+#
+# Copyright (c) 2019 Red Hat Inc.
+#
+# Authors:
+#  Markus Armbruster 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+import copy
+import sys
+from typing import List, Optional, TypeVar
+
+
+class QAPISchemaPragma:
+    # Replace with @dataclass in Python 3.7+
+    # pylint: disable=too-few-public-methods
+
+    def __init__(self) -> None:
+        # Are documentation comments required?
+        self.doc_required = False
+        # Whitelist of commands allowed to return a non-dictionary
+        self.returns_whitelist: List[str] = []
+        # Whitelist of entities allowed to violate case conventions
+        self.name_case_whitelist: List[str] = []
+
+
+class QAPISourceInfo:
+    T = TypeVar('T', bound='QAPISourceInfo')
+
+    def __init__(self, fname: str, line: int,
+                 parent: Optional['QAPISourceInfo']):
+        self.fname = fname
+        self.line = line
+        self.parent = parent
+        self.pragma: QAPISchemaPragma = (
+            parent.pragma if parent else QAPISchemaPragma()
+        )
+        self.defn_meta: Optional[str] = None
+        self.defn_name: Optional[str] = None
+
+    def set_defn(self, meta: str, name: str) -> None:
+        self.defn_meta = meta
+        self.defn_name = name
+
+    def next_line(self: T) -> T:
+        info = copy.copy(self)
+        info.line += 1
+        return info
+
+    def loc(self) -> str:
+        if self.fname is None:
+            return sys.argv[0]
+        ret = self.fname
+        if self.line is not None:
+            ret += ':%d' % self.line
+        return ret
+
+    def in_defn(self) -> str:
+        if self.defn_name:
+            return "%s: In %s '%s':\n" % (self.fname,
+                                          self.defn_meta, self.defn_name)
+        return ''
+
+    def include_path(self) -> str:
+        ret = ''
+        parent = self.parent
+        while parent:
+            ret = 'In file included from %s:\n' % parent.loc() + ret
+            parent = parent.parent
+        return ret
+
+    def __str__(self) -> str:
+        return self.include_path() + self.in_defn() + self.loc()
diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py
new file mode 100644
index 000000000..2b4916cda
--- /dev/null
+++ b/scripts/qapi/types.py
@@ -0,0 +1,371 @@
+"""
+QAPI types generator
+
+Copyright IBM, Corp. 2011
+Copyright (c) 2013-2018 Red Hat Inc.
+
+Authors:
+ Anthony Liguori 
+ Michael Roth 
+ Markus Armbruster 
+
+This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+"""
+
+from typing import List, Optional
+
+from .common import (
+    c_enum_const,
+    c_name,
+    gen_endif,
+    gen_if,
+    mcgen,
+)
+from .gen import QAPISchemaModularCVisitor, ifcontext
+from .schema import (
+    QAPISchema,
+    QAPISchemaEnumMember,
+    QAPISchemaFeature,
+    QAPISchemaObjectType,
+    QAPISchemaObjectTypeMember,
+    QAPISchemaType,
+    QAPISchemaVariants,
+)
+from .source import QAPISourceInfo
+
+
+# variants must be emitted before their container; track what has already
+# been output
+objects_seen = set()
+
+
+def gen_enum_lookup(name: str,
+                    members: List[QAPISchemaEnumMember],
+                    prefix: Optional[str] = None) -> str:
+    ret = mcgen('''
+
+const QEnumLookup %(c_name)s_lookup = {
+    .array = (const char *const[]) {
+''',
+                c_name=c_name(name))
+    for memb in members:
+        ret += gen_if(memb.ifcond)
+        index = c_enum_const(name, memb.name, prefix)
+        ret += mcgen('''
+        [%(index)s] = "%(name)s",
+''',
+                     index=index, name=memb.name)
+        ret += gen_endif(memb.ifcond)
+
+    ret += mcgen('''
+    },
+    .size = %(max_index)s
+};
+''',
+                 max_index=c_enum_const(name, '_MAX', prefix))
+    return ret
+
+
+def gen_enum(name: str,
+             members: List[QAPISchemaEnumMember],
+             prefix: Optional[str] = None) -> str:
+    # append automatically generated _MAX value
+    enum_members = members + [QAPISchemaEnumMember('_MAX', None)]
+
+    ret = mcgen('''
+
+typedef enum %(c_name)s {
+''',
+                c_name=c_name(name))
+
+    for memb in enum_members:
+        ret += gen_if(memb.ifcond)
+        ret += mcgen('''
+    %(c_enum)s,
+''',
+                     c_enum=c_enum_const(name, memb.name, prefix))
+        ret += gen_endif(memb.ifcond)
+
+    ret += mcgen('''
+} %(c_name)s;
+''',
+                 c_name=c_name(name))
+
+    ret += mcgen('''
+
+#define %(c_name)s_str(val) \\
+    qapi_enum_lookup(&%(c_name)s_lookup, (val))
+
+extern const QEnumLookup %(c_name)s_lookup;
+''',
+                 c_name=c_name(name))
+    return ret
+
+
+def gen_fwd_object_or_array(name: str) -> str:
+    return mcgen('''
+
+typedef struct %(c_name)s %(c_name)s;
+''',
+                 c_name=c_name(name))
+
+
+def gen_array(name: str, element_type: QAPISchemaType) -> str:
+    return mcgen('''
+
+struct %(c_name)s {
+    %(c_name)s *next;
+    %(c_type)s value;
+};
+''',
+                 c_name=c_name(name), c_type=element_type.c_type())
+
+
+def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str:
+    ret = ''
+    for memb in members:
+        ret += gen_if(memb.ifcond)
+        if memb.optional:
+            ret += mcgen('''
+    bool has_%(c_name)s;
+''',
+                         c_name=c_name(memb.name))
+        ret += mcgen('''
+    %(c_type)s %(c_name)s;
+''',
+                     c_type=memb.type.c_type(), c_name=c_name(memb.name))
+        ret += gen_endif(memb.ifcond)
+    return ret
+
+
+def gen_object(name: str, ifcond: List[str],
+               base: Optional[QAPISchemaObjectType],
+               members: List[QAPISchemaObjectTypeMember],
+               variants: Optional[QAPISchemaVariants]) -> str:
+    if name in objects_seen:
+        return ''
+    objects_seen.add(name)
+
+    ret = ''
+    for var in variants.variants if variants else ():
+        obj = var.type
+        if not isinstance(obj, QAPISchemaObjectType):
+            continue
+        ret += gen_object(obj.name, obj.ifcond, obj.base,
+                          obj.local_members, obj.variants)
+
+    ret += mcgen('''
+
+''')
+    ret += gen_if(ifcond)
+    ret += mcgen('''
+struct %(c_name)s {
+''',
+                 c_name=c_name(name))
+
+    if base:
+        if not base.is_implicit():
+            ret += mcgen('''
+    /* Members inherited from %(c_name)s: */
+''',
+                         c_name=base.c_name())
+        ret += gen_struct_members(base.members)
+        if not base.is_implicit():
+            ret += mcgen('''
+    /* Own members: */
+''')
+    ret += gen_struct_members(members)
+
+    if variants:
+        ret += gen_variants(variants)
+
+    # Make sure that all structs have at least one member; this avoids
+    # potential issues with attempting to malloc space for zero-length
+    # structs in C, and also incompatibility with C++ (where an empty
+    # struct is size 1).
+    if (not base or base.is_empty()) and not members and not variants:
+        ret += mcgen('''
+    char qapi_dummy_for_empty_struct;
+''')
+
+    ret += mcgen('''
+};
+''')
+    ret += gen_endif(ifcond)
+
+    return ret
+
+
+def gen_upcast(name: str, base: QAPISchemaObjectType) -> str:
+    # C makes const-correctness ugly.  We have to cast away const to let
+    # this function work for both const and non-const obj.
+    return mcgen('''
+
+static inline %(base)s *qapi_%(c_name)s_base(const %(c_name)s *obj)
+{
+    return (%(base)s *)obj;
+}
+''',
+                 c_name=c_name(name), base=base.c_name())
+
+
+def gen_variants(variants: QAPISchemaVariants) -> str:
+    ret = mcgen('''
+    union { /* union tag is @%(c_name)s */
+''',
+                c_name=c_name(variants.tag_member.name))
+
+    for var in variants.variants:
+        if var.type.name == 'q_empty':
+            continue
+        ret += gen_if(var.ifcond)
+        ret += mcgen('''
+        %(c_type)s %(c_name)s;
+''',
+                     c_type=var.type.c_unboxed_type(),
+                     c_name=c_name(var.name))
+        ret += gen_endif(var.ifcond)
+
+    ret += mcgen('''
+    } u;
+''')
+
+    return ret
+
+
+def gen_type_cleanup_decl(name: str) -> str:
+    ret = mcgen('''
+
+void qapi_free_%(c_name)s(%(c_name)s *obj);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(%(c_name)s, qapi_free_%(c_name)s)
+''',
+                c_name=c_name(name))
+    return ret
+
+
+def gen_type_cleanup(name: str) -> str:
+    ret = mcgen('''
+
+void qapi_free_%(c_name)s(%(c_name)s *obj)
+{
+    Visitor *v;
+
+    if (!obj) {
+        return;
+    }
+
+    v = qapi_dealloc_visitor_new();
+    visit_type_%(c_name)s(v, NULL, &obj, NULL);
+    visit_free(v);
+}
+''',
+                c_name=c_name(name))
+    return ret
+
+
+class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
+
+    def __init__(self, prefix: str):
+        super().__init__(
+            prefix, 'qapi-types', ' * Schema-defined QAPI types',
+            ' * Built-in QAPI types', __doc__)
+
+    def _begin_system_module(self, name: None) -> None:
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "qapi/dealloc-visitor.h"
+#include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-builtin-visit.h"
+'''))
+        self._genh.preamble_add(mcgen('''
+#include "qapi/util.h"
+'''))
+
+    def _begin_user_module(self, name: str) -> None:
+        types = self._module_basename('qapi-types', name)
+        visit = self._module_basename('qapi-visit', name)
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "qapi/dealloc-visitor.h"
+#include "%(types)s.h"
+#include "%(visit)s.h"
+''',
+                                      types=types, visit=visit))
+        self._genh.preamble_add(mcgen('''
+#include "qapi/qapi-builtin-types.h"
+'''))
+
+    def visit_begin(self, schema: QAPISchema) -> None:
+        # gen_object() is recursive, ensure it doesn't visit the empty type
+        objects_seen.add(schema.the_empty_object_type.name)
+
+    def _gen_type_cleanup(self, name: str) -> None:
+        self._genh.add(gen_type_cleanup_decl(name))
+        self._genc.add(gen_type_cleanup(name))
+
+    def visit_enum_type(self,
+                        name: str,
+                        info: Optional[QAPISourceInfo],
+                        ifcond: List[str],
+                        features: List[QAPISchemaFeature],
+                        members: List[QAPISchemaEnumMember],
+                        prefix: Optional[str]) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.preamble_add(gen_enum(name, members, prefix))
+            self._genc.add(gen_enum_lookup(name, members, prefix))
+
+    def visit_array_type(self,
+                         name: str,
+                         info: Optional[QAPISourceInfo],
+                         ifcond: List[str],
+                         element_type: QAPISchemaType) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.preamble_add(gen_fwd_object_or_array(name))
+            self._genh.add(gen_array(name, element_type))
+            self._gen_type_cleanup(name)
+
+    def visit_object_type(self,
+                          name: str,
+                          info: Optional[QAPISourceInfo],
+                          ifcond: List[str],
+                          features: List[QAPISchemaFeature],
+                          base: Optional[QAPISchemaObjectType],
+                          members: List[QAPISchemaObjectTypeMember],
+                          variants: Optional[QAPISchemaVariants]) -> None:
+        # Nothing to do for the special empty builtin
+        if name == 'q_empty':
+            return
+        with ifcontext(ifcond, self._genh):
+            self._genh.preamble_add(gen_fwd_object_or_array(name))
+        self._genh.add(gen_object(name, ifcond, base, members, variants))
+        with ifcontext(ifcond, self._genh, self._genc):
+            if base and not base.is_implicit():
+                self._genh.add(gen_upcast(name, base))
+            # TODO Worth changing the visitor signature, so we could
+            # directly use rather than repeat type.is_implicit()?
+            if not name.startswith('q_'):
+                # implicit types won't be directly allocated/freed
+                self._gen_type_cleanup(name)
+
+    def visit_alternate_type(self,
+                             name: str,
+                             info: QAPISourceInfo,
+                             ifcond: List[str],
+                             features: List[QAPISchemaFeature],
+                             variants: QAPISchemaVariants) -> None:
+        with ifcontext(ifcond, self._genh):
+            self._genh.preamble_add(gen_fwd_object_or_array(name))
+        self._genh.add(gen_object(name, ifcond, None,
+                                  [variants.tag_member], variants))
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._gen_type_cleanup(name)
+
+
+def gen_types(schema: QAPISchema,
+              output_dir: str,
+              prefix: str,
+              opt_builtins: bool) -> None:
+    vis = QAPISchemaGenTypeVisitor(prefix)
+    schema.visit(vis)
+    vis.write(output_dir, opt_builtins)
diff --git a/scripts/qapi/visit.py b/scripts/qapi/visit.py
new file mode 100644
index 000000000..339f15215
--- /dev/null
+++ b/scripts/qapi/visit.py
@@ -0,0 +1,396 @@
+"""
+QAPI visitor generator
+
+Copyright IBM, Corp. 2011
+Copyright (C) 2014-2018 Red Hat, Inc.
+
+Authors:
+ Anthony Liguori 
+ Michael Roth    
+ Markus Armbruster 
+
+This work is licensed under the terms of the GNU GPL, version 2.
+See the COPYING file in the top-level directory.
+"""
+
+from typing import List, Optional
+
+from .common import (
+    c_enum_const,
+    c_name,
+    gen_endif,
+    gen_if,
+    indent,
+    mcgen,
+)
+from .gen import QAPISchemaModularCVisitor, ifcontext
+from .schema import (
+    QAPISchema,
+    QAPISchemaEnumMember,
+    QAPISchemaEnumType,
+    QAPISchemaFeature,
+    QAPISchemaObjectType,
+    QAPISchemaObjectTypeMember,
+    QAPISchemaType,
+    QAPISchemaVariants,
+)
+from .source import QAPISourceInfo
+
+
+def gen_visit_decl(name: str, scalar: bool = False) -> str:
+    c_type = c_name(name) + ' *'
+    if not scalar:
+        c_type += '*'
+    return mcgen('''
+
+bool visit_type_%(c_name)s(Visitor *v, const char *name,
+                 %(c_type)sobj, Error **errp);
+''',
+                 c_name=c_name(name), c_type=c_type)
+
+
+def gen_visit_members_decl(name: str) -> str:
+    return mcgen('''
+
+bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp);
+''',
+                 c_name=c_name(name))
+
+
+def gen_visit_object_members(name: str,
+                             base: Optional[QAPISchemaObjectType],
+                             members: List[QAPISchemaObjectTypeMember],
+                             variants: Optional[QAPISchemaVariants]) -> str:
+    ret = mcgen('''
+
+bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
+{
+''',
+                c_name=c_name(name))
+
+    if base:
+        ret += mcgen('''
+    if (!visit_type_%(c_type)s_members(v, (%(c_type)s *)obj, errp)) {
+        return false;
+    }
+''',
+                     c_type=base.c_name())
+
+    for memb in members:
+        ret += gen_if(memb.ifcond)
+        if memb.optional:
+            ret += mcgen('''
+    if (visit_optional(v, "%(name)s", &obj->has_%(c_name)s)) {
+''',
+                         name=memb.name, c_name=c_name(memb.name))
+            indent.increase()
+        ret += mcgen('''
+    if (!visit_type_%(c_type)s(v, "%(name)s", &obj->%(c_name)s, errp)) {
+        return false;
+    }
+''',
+                     c_type=memb.type.c_name(), name=memb.name,
+                     c_name=c_name(memb.name))
+        if memb.optional:
+            indent.decrease()
+            ret += mcgen('''
+    }
+''')
+        ret += gen_endif(memb.ifcond)
+
+    if variants:
+        tag_member = variants.tag_member
+        assert isinstance(tag_member.type, QAPISchemaEnumType)
+
+        ret += mcgen('''
+    switch (obj->%(c_name)s) {
+''',
+                     c_name=c_name(tag_member.name))
+
+        for var in variants.variants:
+            case_str = c_enum_const(tag_member.type.name, var.name,
+                                    tag_member.type.prefix)
+            ret += gen_if(var.ifcond)
+            if var.type.name == 'q_empty':
+                # valid variant and nothing to do
+                ret += mcgen('''
+    case %(case)s:
+        break;
+''',
+                             case=case_str)
+            else:
+                ret += mcgen('''
+    case %(case)s:
+        return visit_type_%(c_type)s_members(v, &obj->u.%(c_name)s, errp);
+''',
+                             case=case_str,
+                             c_type=var.type.c_name(), c_name=c_name(var.name))
+
+            ret += gen_endif(var.ifcond)
+        ret += mcgen('''
+    default:
+        abort();
+    }
+''')
+
+    ret += mcgen('''
+    return true;
+}
+''')
+    return ret
+
+
+def gen_visit_list(name: str, element_type: QAPISchemaType) -> str:
+    return mcgen('''
+
+bool visit_type_%(c_name)s(Visitor *v, const char *name,
+                 %(c_name)s **obj, Error **errp)
+{
+    bool ok = false;
+    %(c_name)s *tail;
+    size_t size = sizeof(**obj);
+
+    if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) {
+        return false;
+    }
+
+    for (tail = *obj; tail;
+         tail = (%(c_name)s *)visit_next_list(v, (GenericList *)tail, size)) {
+        if (!visit_type_%(c_elt_type)s(v, NULL, &tail->value, errp)) {
+            goto out_obj;
+        }
+    }
+
+    ok = visit_check_list(v, errp);
+out_obj:
+    visit_end_list(v, (void **)obj);
+    if (!ok && visit_is_input(v)) {
+        qapi_free_%(c_name)s(*obj);
+        *obj = NULL;
+    }
+    return ok;
+}
+''',
+                 c_name=c_name(name), c_elt_type=element_type.c_name())
+
+
+def gen_visit_enum(name: str) -> str:
+    return mcgen('''
+
+bool visit_type_%(c_name)s(Visitor *v, const char *name,
+                 %(c_name)s *obj, Error **errp)
+{
+    int value = *obj;
+    bool ok = visit_type_enum(v, name, &value, &%(c_name)s_lookup, errp);
+    *obj = value;
+    return ok;
+}
+''',
+                 c_name=c_name(name))
+
+
+def gen_visit_alternate(name: str, variants: QAPISchemaVariants) -> str:
+    ret = mcgen('''
+
+bool visit_type_%(c_name)s(Visitor *v, const char *name,
+                 %(c_name)s **obj, Error **errp)
+{
+    bool ok = false;
+
+    if (!visit_start_alternate(v, name, (GenericAlternate **)obj,
+                               sizeof(**obj), errp)) {
+        return false;
+    }
+    if (!*obj) {
+        /* incomplete */
+        assert(visit_is_dealloc(v));
+        ok = true;
+        goto out_obj;
+    }
+    switch ((*obj)->type) {
+''',
+                c_name=c_name(name))
+
+    for var in variants.variants:
+        ret += gen_if(var.ifcond)
+        ret += mcgen('''
+    case %(case)s:
+''',
+                     case=var.type.alternate_qtype())
+        if isinstance(var.type, QAPISchemaObjectType):
+            ret += mcgen('''
+        if (!visit_start_struct(v, name, NULL, 0, errp)) {
+            break;
+        }
+        if (visit_type_%(c_type)s_members(v, &(*obj)->u.%(c_name)s, errp)) {
+            ok = visit_check_struct(v, errp);
+        }
+        visit_end_struct(v, NULL);
+''',
+                         c_type=var.type.c_name(),
+                         c_name=c_name(var.name))
+        else:
+            ret += mcgen('''
+        ok = visit_type_%(c_type)s(v, name, &(*obj)->u.%(c_name)s, errp);
+''',
+                         c_type=var.type.c_name(),
+                         c_name=c_name(var.name))
+        ret += mcgen('''
+        break;
+''')
+        ret += gen_endif(var.ifcond)
+
+    ret += mcgen('''
+    case QTYPE_NONE:
+        abort();
+    default:
+        assert(visit_is_input(v));
+        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
+                   "%(name)s");
+        /* Avoid passing invalid *obj to qapi_free_%(c_name)s() */
+        g_free(*obj);
+        *obj = NULL;
+    }
+out_obj:
+    visit_end_alternate(v, (void **)obj);
+    if (!ok && visit_is_input(v)) {
+        qapi_free_%(c_name)s(*obj);
+        *obj = NULL;
+    }
+    return ok;
+}
+''',
+                 name=name, c_name=c_name(name))
+
+    return ret
+
+
+def gen_visit_object(name: str) -> str:
+    return mcgen('''
+
+bool visit_type_%(c_name)s(Visitor *v, const char *name,
+                 %(c_name)s **obj, Error **errp)
+{
+    bool ok = false;
+
+    if (!visit_start_struct(v, name, (void **)obj, sizeof(%(c_name)s), errp)) {
+        return false;
+    }
+    if (!*obj) {
+        /* incomplete */
+        assert(visit_is_dealloc(v));
+        ok = true;
+        goto out_obj;
+    }
+    if (!visit_type_%(c_name)s_members(v, *obj, errp)) {
+        goto out_obj;
+    }
+    ok = visit_check_struct(v, errp);
+out_obj:
+    visit_end_struct(v, (void **)obj);
+    if (!ok && visit_is_input(v)) {
+        qapi_free_%(c_name)s(*obj);
+        *obj = NULL;
+    }
+    return ok;
+}
+''',
+                 c_name=c_name(name))
+
+
+class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
+
+    def __init__(self, prefix: str):
+        super().__init__(
+            prefix, 'qapi-visit', ' * Schema-defined QAPI visitors',
+            ' * Built-in QAPI visitors', __doc__)
+
+    def _begin_system_module(self, name: None) -> None:
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+'''))
+        self._genh.preamble_add(mcgen('''
+#include "qapi/visitor.h"
+#include "qapi/qapi-builtin-types.h"
+
+'''))
+
+    def _begin_user_module(self, name: str) -> None:
+        types = self._module_basename('qapi-types', name)
+        visit = self._module_basename('qapi-visit', name)
+        self._genc.preamble_add(mcgen('''
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "%(visit)s.h"
+''',
+                                      visit=visit))
+        self._genh.preamble_add(mcgen('''
+#include "qapi/qapi-builtin-visit.h"
+#include "%(types)s.h"
+
+''',
+                                      types=types))
+
+    def visit_enum_type(self,
+                        name: str,
+                        info: QAPISourceInfo,
+                        ifcond: List[str],
+                        features: List[QAPISchemaFeature],
+                        members: List[QAPISchemaEnumMember],
+                        prefix: Optional[str]) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_visit_decl(name, scalar=True))
+            self._genc.add(gen_visit_enum(name))
+
+    def visit_array_type(self,
+                         name: str,
+                         info: Optional[QAPISourceInfo],
+                         ifcond: List[str],
+                         element_type: QAPISchemaType) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_visit_decl(name))
+            self._genc.add(gen_visit_list(name, element_type))
+
+    def visit_object_type(self,
+                          name: str,
+                          info: Optional[QAPISourceInfo],
+                          ifcond: List[str],
+                          features: List[QAPISchemaFeature],
+                          base: Optional[QAPISchemaObjectType],
+                          members: List[QAPISchemaObjectTypeMember],
+                          variants: Optional[QAPISchemaVariants]) -> None:
+        # Nothing to do for the special empty builtin
+        if name == 'q_empty':
+            return
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_visit_members_decl(name))
+            self._genc.add(gen_visit_object_members(name, base,
+                                                    members, variants))
+            # TODO Worth changing the visitor signature, so we could
+            # directly use rather than repeat type.is_implicit()?
+            if not name.startswith('q_'):
+                # only explicit types need an allocating visit
+                self._genh.add(gen_visit_decl(name))
+                self._genc.add(gen_visit_object(name))
+
+    def visit_alternate_type(self,
+                             name: str,
+                             info: QAPISourceInfo,
+                             ifcond: List[str],
+                             features: List[QAPISchemaFeature],
+                             variants: QAPISchemaVariants) -> None:
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_visit_decl(name))
+            self._genc.add(gen_visit_alternate(name, variants))
+
+
+def gen_visit(schema: QAPISchema,
+              output_dir: str,
+              prefix: str,
+              opt_builtins: bool) -> None:
+    vis = QAPISchemaGenVisitVisitor(prefix)
+    schema.visit(vis)
+    vis.write(output_dir, opt_builtins)
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
new file mode 100755
index 000000000..e0666a3af
--- /dev/null
+++ b/scripts/qemu-binfmt-conf.sh
@@ -0,0 +1,395 @@
+#!/bin/sh
+# Enable automatic program execution by the kernel.
+
+qemu_target_list="i386 i486 alpha arm armeb sparc sparc32plus sparc64 \
+ppc ppc64 ppc64le m68k mips mipsel mipsn32 mipsn32el mips64 mips64el \
+sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \
+microblaze microblazeel or1k x86_64"
+
+i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00'
+i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+i386_family=i386
+
+i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00'
+i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+i486_family=i386
+
+x86_64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00'
+x86_64_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+x86_64_family=i386
+
+alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90'
+alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+alpha_family=alpha
+
+arm_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00'
+arm_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+arm_family=arm
+
+armeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28'
+armeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+armeb_family=armeb
+
+sparc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02'
+sparc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sparc_family=sparc
+
+sparc32plus_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12'
+sparc32plus_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sparc32plus_family=sparc
+
+sparc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2b'
+sparc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sparc64_family=sparc
+
+ppc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14'
+ppc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+ppc_family=ppc
+
+ppc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15'
+ppc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+ppc64_family=ppc
+
+ppc64le_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00'
+ppc64le_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00'
+ppc64le_family=ppcle
+
+m68k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04'
+m68k_mask='\xff\xff\xff\xff\xff\xff\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+m68k_family=m68k
+
+# FIXME: We could use the other endianness on a MIPS host.
+
+mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips_family=mips
+
+mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsel_family=mips
+
+mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mipsn32_family=mips
+
+mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsn32el_family=mips
+
+mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips64_family=mips
+
+mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mips64el_family=mips
+
+sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00'
+sh4_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+sh4_family=sh4
+
+sh4eb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a'
+sh4eb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sh4eb_family=sh4
+
+s390x_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16'
+s390x_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+s390x_family=s390x
+
+aarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00'
+aarch64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+aarch64_family=arm
+
+aarch64_be_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7'
+aarch64_be_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+aarch64_be_family=armeb
+
+hppa_magic='\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x0f'
+hppa_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+hppa_family=hppa
+
+riscv32_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00'
+riscv32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+riscv32_family=riscv
+
+riscv64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00'
+riscv64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+riscv64_family=riscv
+
+xtensa_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5e\x00'
+xtensa_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+xtensa_family=xtensa
+
+xtensaeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5e'
+xtensaeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+xtensaeb_family=xtensaeb
+
+microblaze_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xba\xab'
+microblaze_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+microblaze_family=microblaze
+
+microblazeel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xab\xba'
+microblazeel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+microblazeel_family=microblazeel
+
+or1k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5c'
+or1k_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+or1k_family=or1k
+
+qemu_get_family() {
+    cpu=${HOST_ARCH:-$(uname -m)}
+    case "$cpu" in
+    amd64|i386|i486|i586|i686|i86pc|BePC|x86_64)
+        echo "i386"
+        ;;
+    mips*)
+        echo "mips"
+        ;;
+    "Power Macintosh"|ppc64|powerpc|ppc)
+        echo "ppc"
+        ;;
+    ppc64el|ppc64le)
+        echo "ppcle"
+        ;;
+    arm|armel|armhf|arm64|armv[4-9]*l|aarch64)
+        echo "arm"
+        ;;
+    armeb|armv[4-9]*b|aarch64_be)
+        echo "armeb"
+        ;;
+    sparc*)
+        echo "sparc"
+        ;;
+    riscv*)
+        echo "riscv"
+        ;;
+    *)
+        echo "$cpu"
+        ;;
+    esac
+}
+
+usage() {
+    cat <&2
+        exit 1
+    fi
+}
+
+qemu_check_bintfmt_misc() {
+    # load the binfmt_misc module
+    if [ ! -d /proc/sys/fs/binfmt_misc ]; then
+      if ! /sbin/modprobe binfmt_misc ; then
+          exit 1
+      fi
+    fi
+    if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then
+      if ! mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc ; then
+          exit 1
+      fi
+    fi
+
+    qemu_check_access /proc/sys/fs/binfmt_misc/register
+}
+
+installed_dpkg() {
+    dpkg --status "$1" > /dev/null 2>&1
+}
+
+qemu_check_debian() {
+    if [ ! -e /etc/debian_version ] ; then
+        echo "WARNING: your system is not a Debian based distro" 1>&2
+    elif ! installed_dpkg binfmt-support ; then
+        echo "WARNING: package binfmt-support is needed" 1>&2
+    fi
+    qemu_check_access "$EXPORTDIR"
+}
+
+qemu_check_systemd() {
+    if ! systemctl -q is-enabled systemd-binfmt.service ; then
+        echo "WARNING: systemd-binfmt.service is missing or disabled" 1>&2
+    fi
+    qemu_check_access "$EXPORTDIR"
+}
+
+qemu_generate_register() {
+    flags=""
+    if [ "$CREDENTIAL" = "yes" ] ; then
+        flags="OC"
+    fi
+    if [ "$PERSISTENT" = "yes" ] ; then
+        flags="${flags}F"
+    fi
+
+    echo ":qemu-$cpu:M::$magic:$mask:$qemu:P$flags"
+}
+
+qemu_register_interpreter() {
+    echo "Setting $qemu as binfmt interpreter for $cpu"
+    qemu_generate_register > /proc/sys/fs/binfmt_misc/register
+}
+
+qemu_generate_systemd() {
+    echo "Setting $qemu as binfmt interpreter for $cpu for systemd-binfmt.service"
+    qemu_generate_register > "$EXPORTDIR/qemu-$cpu.conf"
+}
+
+qemu_generate_debian() {
+    cat > "$EXPORTDIR/qemu-$cpu" <&2
+            continue
+        fi
+
+        qemu="$QEMU_PATH/qemu-$cpu-binfmt"
+        if [ "$cpu" = "i486" ] ; then
+            qemu="$QEMU_PATH/qemu-i386-binfmt"
+        fi
+
+        qemu="$qemu$QEMU_SUFFIX"
+        if [ "$host_family" != "$family" ] ; then
+            $BINFMT_SET
+        fi
+    done
+}
+
+CHECK=qemu_check_bintfmt_misc
+BINFMT_SET=qemu_register_interpreter
+
+SYSTEMDDIR="/etc/binfmt.d"
+DEBIANDIR="/usr/share/binfmts"
+
+QEMU_PATH=/usr/bin
+CREDENTIAL=no
+PERSISTENT=no
+QEMU_SUFFIX=""
+
+options=$(getopt -o ds:Q:S:e:hc:p: -l debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: -- "$@")
+eval set -- "$options"
+
+while true ; do
+    case "$1" in
+    -d|--debian)
+        CHECK=qemu_check_debian
+        BINFMT_SET=qemu_generate_debian
+        EXPORTDIR=${EXPORTDIR:-$DEBIANDIR}
+        ;;
+    -s|--systemd)
+        CHECK=qemu_check_systemd
+        BINFMT_SET=qemu_generate_systemd
+        EXPORTDIR=${EXPORTDIR:-$SYSTEMDDIR}
+        shift
+        # check given cpu is in the supported CPU list
+        if [ "$1" != "ALL" ] ; then
+            for cpu in ${qemu_target_list} ; do
+                if [ "$cpu" = "$1" ] ; then
+                    break
+                fi
+            done
+
+            if [ "$cpu" = "$1" ] ; then
+                qemu_target_list="$1"
+            else
+                echo "ERROR: unknown CPU \"$1\"" 1>&2
+                usage
+                exit 1
+            fi
+        fi
+        ;;
+    -Q|--qemu-path)
+        shift
+        QEMU_PATH="$1"
+        ;;
+    -F|--qemu-suffix)
+        shift
+        QEMU_SUFFIX="$1"
+        ;;
+    -e|--exportdir)
+        shift
+        EXPORTDIR="$1"
+        ;;
+    -h|--help)
+        usage
+        exit 1
+        ;;
+    -c|--credential)
+        shift
+        CREDENTIAL="$1"
+        ;;
+    -p|--persistent)
+        shift
+        PERSISTENT="$1"
+        ;;
+    *)
+        break
+        ;;
+    esac
+    shift
+done
+
+$CHECK
+qemu_set_binfmts
diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py
new file mode 100644
index 000000000..e0bfa7b5a
--- /dev/null
+++ b/scripts/qemu-gdb.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+#
+# GDB debugging support
+#
+# Copyright 2012 Red Hat, Inc. and/or its affiliates
+#
+# Authors:
+#  Avi Kivity 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+# Usage:
+# At the (gdb) prompt, type "source scripts/qemu-gdb.py".
+# "help qemu" should then list the supported QEMU debug support commands.
+
+import gdb
+
+import os, sys
+
+# Annoyingly, gdb doesn't put the directory of scripts onto the
+# module search path. Do it manually.
+
+sys.path.append(os.path.dirname(__file__))
+
+from qemugdb import aio, mtree, coroutine, tcg, timers
+
+class QemuCommand(gdb.Command):
+    '''Prefix for QEMU debug support commands'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE, True)
+
+QemuCommand()
+coroutine.CoroutineCommand()
+mtree.MtreeCommand()
+aio.HandlersCommand()
+tcg.TCGLockStatusCommand()
+timers.TimersCommand()
+
+coroutine.CoroutineSPFunction()
+coroutine.CoroutinePCFunction()
+
+# Default to silently passing through SIGUSR1, because QEMU sends it
+# to itself a lot.
+gdb.execute('handle SIGUSR1 pass noprint nostop')
diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook
new file mode 100755
index 000000000..13aafd484
--- /dev/null
+++ b/scripts/qemu-guest-agent/fsfreeze-hook
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+# This script is executed when a guest agent receives fsfreeze-freeze and
+# fsfreeze-thaw command, if it is specified in --fsfreeze-hook (-F)
+# option of qemu-ga or placed in default path (/etc/qemu/fsfreeze-hook).
+# When the agent receives fsfreeze-freeze request, this script is issued with
+# "freeze" argument before the filesystem is frozen. And for fsfreeze-thaw
+# request, it is issued with "thaw" argument after filesystem is thawed.
+
+LOGFILE=/var/log/qga-fsfreeze-hook.log
+FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d
+
+# Check whether file $1 is a backup or rpm-generated file and should be ignored
+is_ignored_file() {
+    case "$1" in
+        *~ | *.bak | *.orig | *.rpmnew | *.rpmorig | *.rpmsave | *.sample | *.dpkg-old | *.dpkg-new | *.dpkg-tmp | *.dpkg-dist | *.dpkg-bak | *.dpkg-backup | *.dpkg-remove)
+            return 0 ;;
+    esac
+    return 1
+}
+
+# Iterate executables in directory "fsfreeze-hook.d" with the specified args
+[ ! -d "$FSFREEZE_D" ] && exit 0
+for file in "$FSFREEZE_D"/* ; do
+    is_ignored_file "$file" && continue
+    [ -x "$file" ] || continue
+    printf "$(date): execute $file $@\n" >>$LOGFILE
+    "$file" "$@" >>$LOGFILE 2>&1
+    STATUS=$?
+    printf "$(date): $file finished with status=$STATUS\n" >>$LOGFILE
+done
+
+exit 0
diff --git a/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample b/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample
new file mode 100755
index 000000000..2b4fa3aeb
--- /dev/null
+++ b/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+# Flush MySQL tables to the disk before the filesystem is frozen.
+# At the same time, this keeps a read lock in order to avoid write accesses
+# from the other clients until the filesystem is thawed.
+
+MYSQL="/usr/bin/mysql"
+MYSQL_OPTS="-uroot" #"-prootpassword"
+FIFO=/var/run/mysql-flush.fifo
+
+# Check mysql is installed and the server running
+[ -x "$MYSQL" ] && "$MYSQL" $MYSQL_OPTS < /dev/null || exit 0
+
+flush_and_wait() {
+    printf "FLUSH TABLES WITH READ LOCK \\G\n"
+    trap 'printf "$(date): $0 is killed\n">&2' HUP INT QUIT ALRM TERM
+    read < $FIFO
+    printf "UNLOCK TABLES \\G\n"
+    rm -f $FIFO
+}
+
+case "$1" in
+    freeze)
+        mkfifo $FIFO || exit 1
+        flush_and_wait | "$MYSQL" $MYSQL_OPTS &
+        # wait until every block is flushed
+        while [ "$(echo 'SHOW STATUS LIKE "Key_blocks_not_flushed"' |\
+                 "$MYSQL" $MYSQL_OPTS | tail -1 | cut -f 2)" -gt 0 ]; do
+            sleep 1
+        done
+        # for InnoDB, wait until every log is flushed
+        INNODB_STATUS=$(mktemp /tmp/mysql-flush.XXXXXX)
+        [ $? -ne 0 ] && exit 2
+        trap "rm -f $INNODB_STATUS; exit 1" HUP INT QUIT ALRM TERM
+        while :; do
+            printf "SHOW ENGINE INNODB STATUS \\G" |\
+                "$MYSQL" $MYSQL_OPTS > $INNODB_STATUS
+            LOG_CURRENT=$(grep 'Log sequence number' $INNODB_STATUS |\
+                          tr -s ' ' | cut -d' ' -f4)
+            LOG_FLUSHED=$(grep 'Log flushed up to' $INNODB_STATUS |\
+                          tr -s ' ' | cut -d' ' -f5)
+            [ "$LOG_CURRENT" = "$LOG_FLUSHED" ] && break
+            sleep 1
+        done
+        rm -f $INNODB_STATUS
+        ;;
+
+    thaw)
+        [ ! -p $FIFO ] && exit 1
+        echo > $FIFO
+        ;;
+
+    *)
+        exit 1
+        ;;
+esac
diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap
new file mode 100755
index 000000000..90527eb97
--- /dev/null
+++ b/scripts/qemu-trace-stap
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+# -*- python -*-
+#
+# Copyright (C) 2019 Red Hat, Inc
+#
+# QEMU SystemTap Trace Tool
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see .
+
+import argparse
+import copy
+import os.path
+import re
+import subprocess
+import sys
+
+
+def probe_prefix(binary):
+    dirname, filename = os.path.split(binary)
+    return re.sub("-", ".", filename) + ".log"
+
+
+def which(binary):
+    for path in os.environ["PATH"].split(os.pathsep):
+        if os.path.exists(os.path.join(path, binary)):
+                return os.path.join(path, binary)
+
+    print("Unable to find '%s' in $PATH" % binary)
+    sys.exit(1)
+
+
+def tapset_dir(binary):
+    dirname, filename = os.path.split(binary)
+    if dirname == '':
+        thisfile = which(binary)
+    else:
+        thisfile = os.path.realpath(binary)
+        if not os.path.exists(thisfile):
+            print("Unable to find '%s'" % thisfile)
+            sys.exit(1)
+
+    basedir = os.path.split(thisfile)[0]
+    tapset = os.path.join(basedir, "..", "share", "systemtap", "tapset")
+    return os.path.realpath(tapset)
+
+
+def tapset_env(tapset_dir):
+    tenv = copy.copy(os.environ)
+    tenv["SYSTEMTAP_TAPSET"] = tapset_dir
+    return tenv
+
+def cmd_run(args):
+    prefix = probe_prefix(args.binary)
+    tapsets = tapset_dir(args.binary)
+
+    if args.verbose:
+        print("Using tapset dir '%s' for binary '%s'" % (tapsets, args.binary))
+
+    probes = []
+    for probe in args.probes:
+        probes.append("probe %s.%s {}" % (prefix, probe))
+    if len(probes) == 0:
+        print("At least one probe pattern must be specified")
+        sys.exit(1)
+
+    script = " ".join(probes)
+    if args.verbose:
+        print("Compiling script '%s'" % script)
+        script = """probe begin { print("Running script, -c to quit\\n") } """ + script
+
+    # We request an 8MB buffer, since the stap default 1MB buffer
+    # can be easily overflowed by frequently firing QEMU traces
+    stapargs = ["stap", "-s", "8"]
+    if args.pid is not None:
+        stapargs.extend(["-x", args.pid])
+    stapargs.extend(["-e", script])
+    subprocess.call(stapargs, env=tapset_env(tapsets))
+
+
+def cmd_list(args):
+    tapsets = tapset_dir(args.binary)
+
+    if args.verbose:
+        print("Using tapset dir '%s' for binary '%s'" % (tapsets, args.binary))
+
+    def print_probes(verbose, name):
+        prefix = probe_prefix(args.binary)
+        offset = len(prefix) + 1
+        script = prefix + "." + name
+
+        if verbose:
+            print("Listing probes with name '%s'" % script)
+        proc = subprocess.Popen(["stap", "-l", script],
+                                stdout=subprocess.PIPE,
+                                universal_newlines=True,
+                                env=tapset_env(tapsets))
+        out, err = proc.communicate()
+        if proc.returncode != 0:
+            print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary))
+            sys.exit(1)
+
+        for line in out.splitlines():
+            if line.startswith(prefix):
+                print("%s" % line[offset:])
+
+    if len(args.probes) == 0:
+        print_probes(args.verbose, "*")
+    else:
+        for probe in args.probes:
+            print_probes(args.verbose, probe)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="QEMU SystemTap trace tool")
+    parser.add_argument("-v", "--verbose", help="Print verbose progress info",
+                        action='store_true')
+
+    subparser = parser.add_subparsers(help="commands")
+    subparser.required = True
+    subparser.dest = "command"
+
+    runparser = subparser.add_parser("run", help="Run a trace session",
+                                     formatter_class=argparse.RawDescriptionHelpFormatter,
+                                     epilog="""
+
+To watch all trace points on the qemu-system-x86_64 binary:
+
+   %(argv0)s run qemu-system-x86_64
+
+To only watch the trace points matching the qio* and qcrypto* patterns
+
+   %(argv0)s run qemu-system-x86_64 'qio*' 'qcrypto*'
+""" % {"argv0": sys.argv[0]})
+    runparser.set_defaults(func=cmd_run)
+    runparser.add_argument("--pid", "-p", dest="pid",
+                           help="Restrict tracing to a specific process ID")
+    runparser.add_argument("binary", help="QEMU system or user emulator binary")
+    runparser.add_argument("probes", help="Probe names or wildcards",
+                           nargs=argparse.REMAINDER)
+
+    listparser = subparser.add_parser("list", help="List probe points",
+                                      formatter_class=argparse.RawDescriptionHelpFormatter,
+                                      epilog="""
+
+To list all trace points on the qemu-system-x86_64 binary:
+
+   %(argv0)s list qemu-system-x86_64
+
+To only list the trace points matching the qio* and qcrypto* patterns
+
+   %(argv0)s list qemu-system-x86_64 'qio*' 'qcrypto*'
+""" % {"argv0": sys.argv[0]})
+    listparser.set_defaults(func=cmd_list)
+    listparser.add_argument("binary", help="QEMU system or user emulator binary")
+    listparser.add_argument("probes", help="Probe names or wildcards",
+                            nargs=argparse.REMAINDER)
+
+    args = parser.parse_args()
+
+    args.func(args)
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
new file mode 100755
index 000000000..3f6e7e6d4
--- /dev/null
+++ b/scripts/qemu-version.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -eu
+
+dir="$1"
+pkgversion="$2"
+version="$3"
+
+if [ -z "$pkgversion" ]; then
+    cd "$dir"
+    if [ -e .git ]; then
+        pkgversion=$(git describe --match 'v*' --dirty) || :
+    fi
+fi
+
+if [ -n "$pkgversion" ]; then
+    fullversion="$version ($pkgversion)"
+else
+    fullversion="$version"
+fi
+
+cat <
+#
+
+# We don't need to do anything in our init file currently.
+
+"""
+Support routines for debugging QEMU under GDB
+"""
+
+__license__    = "GPL version 2 or (at your option) any later version"
diff --git a/scripts/qemugdb/aio.py b/scripts/qemugdb/aio.py
new file mode 100644
index 000000000..d7c1ba0c2
--- /dev/null
+++ b/scripts/qemugdb/aio.py
@@ -0,0 +1,57 @@
+#
+# GDB debugging support: aio/iohandler debug
+#
+# Copyright (c) 2015 Red Hat, Inc.
+#
+# Author: Dr. David Alan Gilbert 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+
+import gdb
+from qemugdb import coroutine
+
+def isnull(ptr):
+    return ptr == gdb.Value(0).cast(ptr.type)
+
+def dump_aiocontext(context, verbose):
+    '''Display a dump and backtrace for an aiocontext'''
+    cur = context['aio_handlers']['lh_first']
+    # Get pointers to functions we're going to process specially
+    sym_fd_coroutine_enter = gdb.parse_and_eval('fd_coroutine_enter')
+
+    while not isnull(cur):
+        entry = cur.dereference()
+        gdb.write('----\n%s\n' % entry)
+        if verbose and cur['io_read'] == sym_fd_coroutine_enter:
+            coptr = (cur['opaque'].cast(gdb.lookup_type('FDYieldUntilData').pointer()))['co']
+            coptr = coptr.cast(gdb.lookup_type('CoroutineUContext').pointer())
+            coroutine.bt_jmpbuf(coptr['env']['__jmpbuf'])
+        cur = cur['node']['le_next'];
+
+    gdb.write('----\n')
+
+class HandlersCommand(gdb.Command):
+    '''Display aio handlers'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu handlers', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        verbose = False
+        argv = gdb.string_to_argv(arg)
+
+        if len(argv) > 0 and argv[0] == '--verbose':
+            verbose = True
+            argv.pop(0)
+
+        if len(argv) > 1:
+            gdb.write('usage: qemu handlers [--verbose] [handler]\n')
+            return
+
+        if len(argv) == 1:
+            handlers_name = argv[0]
+        else:
+            handlers_name = 'qemu_aio_context'
+        dump_aiocontext(gdb.parse_and_eval(handlers_name), verbose)
diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
new file mode 100644
index 000000000..db6138902
--- /dev/null
+++ b/scripts/qemugdb/coroutine.py
@@ -0,0 +1,115 @@
+#
+# GDB debugging support
+#
+# Copyright 2012 Red Hat, Inc. and/or its affiliates
+#
+# Authors:
+#  Avi Kivity 
+#
+# This work is licensed under the terms of the GNU GPL, version 2
+# or later.  See the COPYING file in the top-level directory.
+
+import gdb
+
+VOID_PTR = gdb.lookup_type('void').pointer()
+
+def get_fs_base():
+    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS).  This is
+       pthread_self().'''
+    # %rsp - 120 is scratch space according to the SystemV ABI
+    old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
+    gdb.execute('call (int)arch_prctl(0x1003, $rsp - 120)', False, True)
+    fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
+    gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True)
+    return fs_base
+
+def pthread_self():
+    '''Fetch pthread_self() from the glibc start_thread function.'''
+    f = gdb.newest_frame()
+    while f.name() != 'start_thread':
+        f = f.older()
+        if f is None:
+            return get_fs_base()
+
+    try:
+        return f.read_var("arg")
+    except ValueError:
+        return get_fs_base()
+
+def get_glibc_pointer_guard():
+    '''Fetch glibc pointer guard value'''
+    fs_base = pthread_self()
+    return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)
+
+def glibc_ptr_demangle(val, pointer_guard):
+    '''Undo effect of glibc's PTR_MANGLE()'''
+    return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))
+
+def get_jmpbuf_regs(jmpbuf):
+    JB_RBX  = 0
+    JB_RBP  = 1
+    JB_R12  = 2
+    JB_R13  = 3
+    JB_R14  = 4
+    JB_R15  = 5
+    JB_RSP  = 6
+    JB_PC   = 7
+
+    pointer_guard = get_glibc_pointer_guard()
+    return {'rbx': jmpbuf[JB_RBX],
+        'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard),
+        'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard),
+        'r12': jmpbuf[JB_R12],
+        'r13': jmpbuf[JB_R13],
+        'r14': jmpbuf[JB_R14],
+        'r15': jmpbuf[JB_R15],
+        'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) }
+
+def bt_jmpbuf(jmpbuf):
+    '''Backtrace a jmpbuf'''
+    regs = get_jmpbuf_regs(jmpbuf)
+    old = dict()
+
+    for i in regs:
+        old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i)
+
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, regs[i]))
+
+    gdb.execute('bt')
+
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, old[i]))
+
+def coroutine_to_jmpbuf(co):
+    coroutine_pointer = co.cast(gdb.lookup_type('CoroutineUContext').pointer())
+    return coroutine_pointer['env']['__jmpbuf']
+
+
+class CoroutineCommand(gdb.Command):
+    '''Display coroutine backtrace'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        argv = gdb.string_to_argv(arg)
+        if len(argv) != 1:
+            gdb.write('usage: qemu coroutine \n')
+            return
+
+        bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))
+
+class CoroutineSPFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_sp')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR)
+
+class CoroutinePCFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_pc')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR)
diff --git a/scripts/qemugdb/mtree.py b/scripts/qemugdb/mtree.py
new file mode 100644
index 000000000..8fe42c3c1
--- /dev/null
+++ b/scripts/qemugdb/mtree.py
@@ -0,0 +1,85 @@
+#
+# GDB debugging support
+#
+# Copyright 2012 Red Hat, Inc. and/or its affiliates
+#
+# Authors:
+#  Avi Kivity 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+# 'qemu mtree' -- display the memory hierarchy
+
+import gdb
+
+def isnull(ptr):
+    return ptr == gdb.Value(0).cast(ptr.type)
+
+def int128(p):
+    '''Read an Int128 type to a python integer.
+
+    QEMU can be built with native Int128 support so we need to detect
+    if the value is a structure or the native type.
+    '''
+    if p.type.code == gdb.TYPE_CODE_STRUCT:
+        return int(p['lo']) + (int(p['hi']) << 64)
+    else:
+        return int(("%s" % p), 16)
+
+class MtreeCommand(gdb.Command):
+    '''Display the memory tree hierarchy'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu mtree', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+        self.queue = []
+    def invoke(self, arg, from_tty):
+        self.seen = set()
+        self.queue_root('address_space_memory')
+        self.queue_root('address_space_io')
+        self.process_queue()
+    def queue_root(self, varname):
+        ptr = gdb.parse_and_eval(varname)['root']
+        self.queue.append(ptr)
+    def process_queue(self):
+        while self.queue:
+            ptr = self.queue.pop(0)
+            if int(ptr) in self.seen:
+                continue
+            self.print_item(ptr)
+    def print_item(self, ptr, offset = gdb.Value(0), level = 0):
+        self.seen.add(int(ptr))
+        addr = ptr['addr']
+        addr += offset
+        size = int128(ptr['size'])
+        alias = ptr['alias']
+        klass = ''
+        if not isnull(alias):
+            klass = ' (alias)'
+        elif not isnull(ptr['ops']):
+            klass = ' (I/O)'
+        elif bool(ptr['ram']):
+            klass = ' (RAM)'
+        gdb.write('%s%016x-%016x %s%s (@ %s)\n'
+                  % ('  ' * level,
+                     int(addr),
+                     int(addr + (size - 1)),
+                     ptr['name'].string(),
+                     klass,
+                     ptr,
+                     ),
+                  gdb.STDOUT)
+        if not isnull(alias):
+            gdb.write('%s    alias: %s@%016x (@ %s)\n' %
+                      ('  ' * level,
+                       alias['name'].string(),
+                       int(ptr['alias_offset']),
+                       alias,
+                       ),
+                      gdb.STDOUT)
+            self.queue.append(alias)
+        subregion = ptr['subregions']['tqh_first']
+        level += 1
+        while not isnull(subregion):
+            self.print_item(subregion, addr, level)
+            subregion = subregion['subregions_link']['tqe_next']
diff --git a/scripts/qemugdb/tcg.py b/scripts/qemugdb/tcg.py
new file mode 100644
index 000000000..16c03c06a
--- /dev/null
+++ b/scripts/qemugdb/tcg.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+#
+# GDB debugging support, TCG status
+#
+# Copyright 2016 Linaro Ltd
+#
+# Authors:
+#  Alex Bennée 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+# 'qemu tcg-lock-status' -- display the TCG lock status across threads
+
+import gdb
+
+class TCGLockStatusCommand(gdb.Command):
+    '''Display TCG Execution Status'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu tcg-lock-status', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        gdb.write("Thread, BQL (iothread_mutex), Replay, Blocked?\n")
+        for thread in gdb.inferiors()[0].threads():
+            thread.switch()
+
+            iothread = gdb.parse_and_eval("iothread_locked")
+            replay = gdb.parse_and_eval("replay_locked")
+
+            frame = gdb.selected_frame()
+            if frame.name() == "__lll_lock_wait":
+                frame.older().select()
+                mutex = gdb.parse_and_eval("mutex")
+                owner = gdb.parse_and_eval("mutex->__data.__owner")
+                blocked = ("__lll_lock_wait waiting on %s from %d" %
+                           (mutex, owner))
+            else:
+                blocked = "not blocked"
+
+            gdb.write("%d/%d, %s, %s, %s\n" % (thread.num, thread.ptid[1],
+                                               iothread, replay, blocked))
diff --git a/scripts/qemugdb/timers.py b/scripts/qemugdb/timers.py
new file mode 100644
index 000000000..46537b27c
--- /dev/null
+++ b/scripts/qemugdb/timers.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+# GDB debugging support
+#
+# Copyright 2017 Linaro Ltd
+#
+# Author: Alex Bennée 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# 'qemu timers' -- display the current timerlists
+
+import gdb
+
+class TimersCommand(gdb.Command):
+    '''Display the current QEMU timers'''
+
+    def __init__(self):
+        'Register the class as a gdb command'
+        gdb.Command.__init__(self, 'qemu timers', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def dump_timers(self, timer):
+        "Follow a timer and recursively dump each one in the list."
+        # timer should be of type QemuTimer
+        gdb.write("    timer %s/%s (cb:%s,opq:%s)\n" % (
+            timer['expire_time'],
+            timer['scale'],
+            timer['cb'],
+            timer['opaque']))
+
+        if int(timer['next']) > 0:
+            self.dump_timers(timer['next'])
+
+
+    def process_timerlist(self, tlist, ttype):
+        gdb.write("Processing %s timers\n" % (ttype))
+        gdb.write("  clock %s is enabled:%s, last:%s\n" % (
+            tlist['clock']['type'],
+            tlist['clock']['enabled'],
+            tlist['clock']['last']))
+        if int(tlist['active_timers']) > 0:
+            self.dump_timers(tlist['active_timers'])
+
+
+    def invoke(self, arg, from_tty):
+        'Run the command'
+        main_timers = gdb.parse_and_eval("main_loop_tlg")
+
+        # This will break if QEMUClockType in timer.h is redfined
+        self.process_timerlist(main_timers['tl'][0], "Realtime")
+        self.process_timerlist(main_timers['tl'][1], "Virtual")
+        self.process_timerlist(main_timers['tl'][2], "Host")
+        self.process_timerlist(main_timers['tl'][3], "Virtual RT")
diff --git a/scripts/qmp/qemu-ga-client b/scripts/qmp/qemu-ga-client
new file mode 100755
index 000000000..ce122984a
--- /dev/null
+++ b/scripts/qmp/qemu-ga-client
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+
+# QEMU Guest Agent Client
+#
+# Copyright (C) 2012 Ryota Ozaki 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Usage:
+#
+# Start QEMU with:
+#
+# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \
+#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+#
+# Run the script:
+#
+# $ qemu-ga-client --address=/tmp/qga.sock  [args...]
+#
+# or
+#
+# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+# $ qemu-ga-client  [args...]
+#
+# For example:
+#
+# $ qemu-ga-client cat /etc/resolv.conf
+# # Generated by NetworkManager
+# nameserver 10.0.2.3
+# $ qemu-ga-client fsfreeze status
+# thawed
+# $ qemu-ga-client fsfreeze freeze
+# 2 filesystems frozen
+#
+# See also: https://wiki.qemu.org/Features/QAPI/GuestAgent
+#
+
+import os
+import sys
+import base64
+import random
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu import qmp
+
+
+class QemuGuestAgent(qmp.QEMUMonitorProtocol):
+    def __getattr__(self, name):
+        def wrapper(**kwds):
+            return self.command('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    error = QemuGuestAgent.error
+
+    def __init__(self, address):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout=3):
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle):
+        eof = False
+        data = ''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path):
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self):
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    def __gen_ipv4_netmask(self, prefixlen):
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self):
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout):
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except self.qga.timeout:
+            return False
+        return True
+
+    def fsfreeze(self, cmd):
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise Exception('Invalid command: ' + cmd)
+
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum=0):
+        return getattr(self.qga, 'fstrim')(minimum=minimum)
+
+    def suspend(self, mode):
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise Exception('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except self.qga.timeout:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode='powerdown'):
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise Exception('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except self.qga.timeout:
+            return
+
+
+def _cmd_cat(client, args):
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat ')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client, args):
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+    elif cmd == 'freeze':
+        print("%d filesystems frozen" % ret)
+    else:
+        print("%d filesystems thawed" % ret)
+
+
+def _cmd_fstrim(client, args):
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client, args):
+    print(client.ifconfig())
+
+
+def _cmd_info(client, args):
+    print(client.info())
+
+
+def _cmd_ping(client, args):
+    if len(args) == 0:
+        timeout = 3
+    else:
+        timeout = float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client, args):
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client, args):
+    client.shutdown()
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client, args):
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client, args):
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def main(address, cmd, args):
+    if not os.path.exists(address):
+        print('%s not found' % address)
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except QemuGuestAgent.error as e:
+        import errno
+
+        print(e)
+        if e.errno == errno.ECONNREFUSED:
+            print('Hint: qemu is not running?')
+        sys.exit(1)
+
+    if cmd == 'fsfreeze' and args[0] == 'freeze':
+        client.sync(60)
+    elif cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+if __name__ == '__main__':
+    import sys
+    import os
+    import optparse
+
+    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
+
+    usage = "%prog [--address=|]  [args...]\n"
+    usage += ': ' + ', '.join(commands)
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--address', action='store', type='string',
+                      default=address, help='Specify a ip:port pair or a unix socket path')
+    options, args = parser.parse_args()
+
+    address = options.address
+    if address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    if len(args) == 0:
+        parser.error('Less argument')
+        sys.exit(1)
+
+    main(address, args[0], args[1:])
diff --git a/scripts/qmp/qmp b/scripts/qmp/qmp
new file mode 100755
index 000000000..0f12307c8
--- /dev/null
+++ b/scripts/qmp/qmp
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+import sys
+
+print('''This unmaintained and undocumented script was removed in preference
+for qmp-shell. The assumption is that most users are using either
+qmp-shell, socat, or pasting/piping JSON into stdio. The duplication of
+facilities here is unwanted, and the divergence of syntax harmful.''',
+      file=sys.stderr)
+
+sys.exit(1)
diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell
new file mode 100755
index 000000000..b4d06096a
--- /dev/null
+++ b/scripts/qmp/qmp-shell
@@ -0,0 +1,459 @@
+#!/usr/bin/env python3
+#
+# Low-level QEMU shell on top of QMP.
+#
+# Copyright (C) 2009, 2010 Red Hat Inc.
+#
+# Authors:
+#  Luiz Capitulino 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Usage:
+#
+# Start QEMU with:
+#
+# # qemu [...] -qmp unix:./qmp-sock,server
+#
+# Run the shell:
+#
+# $ qmp-shell ./qmp-sock
+#
+# Commands have the following format:
+#
+#    < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+#
+# For example:
+#
+# (QEMU) device_add driver=e1000 id=net1
+# {u'return': {}}
+# (QEMU)
+#
+# key=value pairs also support Python or JSON object literal subset notations,
+# without spaces. Dictionaries/objects {} are supported as are arrays [].
+#
+#    example-command arg-name1={'key':'value','obj'={'prop':"value"}}
+#
+# Both JSON and Python formatting should work, including both styles of
+# string literal quotes. Both paradigms of literal values should work,
+# including null/true/false for JSON and None/True/False for Python.
+#
+#
+# Transactions have the following multi-line format:
+#
+#    transaction(
+#    action-name1 [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+#    ...
+#    action-nameN [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+#    )
+#
+# One line transactions are also supported:
+#
+#    transaction( action-name1 ... )
+#
+# For example:
+#
+#     (QEMU) transaction(
+#     TRANS> block-dirty-bitmap-add node=drive0 name=bitmap1
+#     TRANS> block-dirty-bitmap-clear node=drive0 name=bitmap0
+#     TRANS> )
+#     {"return": {}}
+#     (QEMU)
+#
+# Use the -v and -p options to activate the verbose and pretty-print options,
+# which will echo back the properly formatted JSON-compliant QMP that is being
+# sent to QEMU, which is useful for debugging and documentation generation.
+
+import json
+import ast
+import readline
+import sys
+import os
+import errno
+import atexit
+import re
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu import qmp
+
+class QMPCompleter(list):
+    def complete(self, text, state):
+        for cmd in self:
+            if cmd.startswith(text):
+                if not state:
+                    return cmd
+                else:
+                    state -= 1
+
+class QMPShellError(Exception):
+    pass
+
+class QMPShellBadPort(QMPShellError):
+    pass
+
+class FuzzyJSON(ast.NodeTransformer):
+    '''This extension of ast.NodeTransformer filters literal "true/false/null"
+    values in an AST and replaces them by proper "True/False/None" values that
+    Python can properly evaluate.'''
+    def visit_Name(self, node):
+        if node.id == 'true':
+            node.id = 'True'
+        if node.id == 'false':
+            node.id = 'False'
+        if node.id == 'null':
+            node.id = 'None'
+        return node
+
+# TODO: QMPShell's interface is a bit ugly (eg. _fill_completion() and
+#       _execute_cmd()). Let's design a better one.
+class QMPShell(qmp.QEMUMonitorProtocol):
+    def __init__(self, address, pretty=False):
+        super(QMPShell, self).__init__(self.__get_address(address))
+        self._greeting = None
+        self._completer = None
+        self._pretty = pretty
+        self._transmode = False
+        self._actions = list()
+        self._histfile = os.path.join(os.path.expanduser('~'),
+                                      '.qmp-shell_history')
+
+    def __get_address(self, arg):
+        """
+        Figure out if the argument is in the port:host form, if it's not it's
+        probably a file path.
+        """
+        addr = arg.split(':')
+        if len(addr) == 2:
+            try:
+                port = int(addr[1])
+            except ValueError:
+                raise QMPShellBadPort
+            return ( addr[0], port )
+        # socket path
+        return arg
+
+    def _fill_completion(self):
+        cmds = self.cmd('query-commands')
+        if 'error' in cmds:
+            return
+        for cmd in cmds['return']:
+            self._completer.append(cmd['name'])
+
+    def __completer_setup(self):
+        self._completer = QMPCompleter()
+        self._fill_completion()
+        readline.set_history_length(1024)
+        readline.set_completer(self._completer.complete)
+        readline.parse_and_bind("tab: complete")
+        # XXX: default delimiters conflict with some command names (eg. query-),
+        # clearing everything as it doesn't seem to matter
+        readline.set_completer_delims('')
+        try:
+            readline.read_history_file(self._histfile)
+        except Exception as e:
+            if isinstance(e, IOError) and e.errno == errno.ENOENT:
+                # File not found. No problem.
+                pass
+            else:
+                print("Failed to read history '%s'; %s" % (self._histfile, e))
+        atexit.register(self.__save_history)
+
+    def __save_history(self):
+        try:
+            readline.write_history_file(self._histfile)
+        except Exception as e:
+            print("Failed to save history file '%s'; %s" % (self._histfile, e))
+
+    def __parse_value(self, val):
+        try:
+            return int(val)
+        except ValueError:
+            pass
+
+        if val.lower() == 'true':
+            return True
+        if val.lower() == 'false':
+            return False
+        if val.startswith(('{', '[')):
+            # Try first as pure JSON:
+            try:
+                return json.loads(val)
+            except ValueError:
+                pass
+            # Try once again as FuzzyJSON:
+            try:
+                st = ast.parse(val, mode='eval')
+                return ast.literal_eval(FuzzyJSON().visit(st))
+            except SyntaxError:
+                pass
+            except ValueError:
+                pass
+        return val
+
+    def __cli_expr(self, tokens, parent):
+        for arg in tokens:
+            (key, sep, val) = arg.partition('=')
+            if sep != '=':
+                raise QMPShellError("Expected a key=value pair, got '%s'" % arg)
+
+            value = self.__parse_value(val)
+            optpath = key.split('.')
+            curpath = []
+            for p in optpath[:-1]:
+                curpath.append(p)
+                d = parent.get(p, {})
+                if type(d) is not dict:
+                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
+                parent[p] = d
+                parent = d
+            if optpath[-1] in parent:
+                if type(parent[optpath[-1]]) is dict:
+                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
+                else:
+                    raise QMPShellError('Cannot set "%s" multiple times' % key)
+            parent[optpath[-1]] = value
+
+    def __build_cmd(self, cmdline):
+        """
+        Build a QMP input object from a user provided command-line in the
+        following format:
+
+            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+        """
+        cmdargs = re.findall(r'''(?:[^\s"']|"(?:\\.|[^"])*"|'(?:\\.|[^'])*')+''', cmdline)
+
+        # Transactional CLI entry/exit:
+        if cmdargs[0] == 'transaction(':
+            self._transmode = True
+            cmdargs.pop(0)
+        elif cmdargs[0] == ')' and self._transmode:
+            self._transmode = False
+            if len(cmdargs) > 1:
+                raise QMPShellError("Unexpected input after close of Transaction sub-shell")
+            qmpcmd = { 'execute': 'transaction',
+                       'arguments': { 'actions': self._actions } }
+            self._actions = list()
+            return qmpcmd
+
+        # Nothing to process?
+        if not cmdargs:
+            return None
+
+        # Parse and then cache this Transactional Action
+        if self._transmode:
+            finalize = False
+            action = { 'type': cmdargs[0], 'data': {} }
+            if cmdargs[-1] == ')':
+                cmdargs.pop(-1)
+                finalize = True
+            self.__cli_expr(cmdargs[1:], action['data'])
+            self._actions.append(action)
+            return self.__build_cmd(')') if finalize else None
+
+        # Standard command: parse and return it to be executed.
+        qmpcmd = { 'execute': cmdargs[0], 'arguments': {} }
+        self.__cli_expr(cmdargs[1:], qmpcmd['arguments'])
+        return qmpcmd
+
+    def _print(self, qmp):
+        indent = None
+        if self._pretty:
+            indent = 4
+        jsobj = json.dumps(qmp, indent=indent, sort_keys=self._pretty)
+        print(str(jsobj))
+
+    def _execute_cmd(self, cmdline):
+        try:
+            qmpcmd = self.__build_cmd(cmdline)
+        except Exception as e:
+            print('Error while parsing command line: %s' % e)
+            print('command format:  ', end=' ')
+            print('[arg-name1=arg1] ... [arg-nameN=argN]')
+            return True
+        # For transaction mode, we may have just cached the action:
+        if qmpcmd is None:
+            return True
+        if self._verbose:
+            self._print(qmpcmd)
+        resp = self.cmd_obj(qmpcmd)
+        if resp is None:
+            print('Disconnected')
+            return False
+        self._print(resp)
+        return True
+
+    def connect(self, negotiate):
+        self._greeting = super(QMPShell, self).connect(negotiate)
+        self.__completer_setup()
+
+    def show_banner(self, msg='Welcome to the QMP low-level shell!'):
+        print(msg)
+        if not self._greeting:
+            print('Connected')
+            return
+        version = self._greeting['QMP']['version']['qemu']
+        print('Connected to QEMU %d.%d.%d\n' % (version['major'],version['minor'],version['micro']))
+
+    def get_prompt(self):
+        if self._transmode:
+            return "TRANS> "
+        return "(QEMU) "
+
+    def read_exec_command(self, prompt):
+        """
+        Read and execute a command.
+
+        @return True if execution was ok, return False if disconnected.
+        """
+        try:
+            cmdline = input(prompt)
+        except EOFError:
+            print()
+            return False
+        if cmdline == '':
+            for ev in self.get_events():
+                print(ev)
+            self.clear_events()
+            return True
+        else:
+            return self._execute_cmd(cmdline)
+
+    def set_verbosity(self, verbose):
+        self._verbose = verbose
+
+class HMPShell(QMPShell):
+    def __init__(self, address):
+        QMPShell.__init__(self, address)
+        self.__cpu_index = 0
+
+    def __cmd_completion(self):
+        for cmd in self.__cmd_passthrough('help')['return'].split('\r\n'):
+            if cmd and cmd[0] != '[' and cmd[0] != '\t':
+                name = cmd.split()[0] # drop help text
+                if name == 'info':
+                    continue
+                if name.find('|') != -1:
+                    # Command in the form 'foobar|f' or 'f|foobar', take the
+                    # full name
+                    opt = name.split('|')
+                    if len(opt[0]) == 1:
+                        name = opt[1]
+                    else:
+                        name = opt[0]
+                self._completer.append(name)
+                self._completer.append('help ' + name) # help completion
+
+    def __info_completion(self):
+        for cmd in self.__cmd_passthrough('info')['return'].split('\r\n'):
+            if cmd:
+                self._completer.append('info ' + cmd.split()[1])
+
+    def __other_completion(self):
+        # special cases
+        self._completer.append('help info')
+
+    def _fill_completion(self):
+        self.__cmd_completion()
+        self.__info_completion()
+        self.__other_completion()
+
+    def __cmd_passthrough(self, cmdline, cpu_index = 0):
+        return self.cmd_obj({ 'execute': 'human-monitor-command', 'arguments':
+                              { 'command-line': cmdline,
+                                'cpu-index': cpu_index } })
+
+    def _execute_cmd(self, cmdline):
+        if cmdline.split()[0] == "cpu":
+            # trap the cpu command, it requires special setting
+            try:
+                idx = int(cmdline.split()[1])
+                if not 'return' in self.__cmd_passthrough('info version', idx):
+                    print('bad CPU index')
+                    return True
+                self.__cpu_index = idx
+            except ValueError:
+                print('cpu command takes an integer argument')
+                return True
+        resp = self.__cmd_passthrough(cmdline, self.__cpu_index)
+        if resp is None:
+            print('Disconnected')
+            return False
+        assert 'return' in resp or 'error' in resp
+        if 'return' in resp:
+            # Success
+            if len(resp['return']) > 0:
+                print(resp['return'], end=' ')
+        else:
+            # Error
+            print('%s: %s' % (resp['error']['class'], resp['error']['desc']))
+        return True
+
+    def show_banner(self):
+        QMPShell.show_banner(self, msg='Welcome to the HMP shell!')
+
+def die(msg):
+    sys.stderr.write('ERROR: %s\n' % msg)
+    sys.exit(1)
+
+def fail_cmdline(option=None):
+    if option:
+        sys.stderr.write('ERROR: bad command-line option \'%s\'\n' % option)
+    sys.stderr.write('qmp-shell [ -v ] [ -p ] [ -H ] [ -N ] < UNIX socket path> | < TCP address:port >\n')
+    sys.stderr.write('    -v     Verbose (echo command sent and received)\n')
+    sys.stderr.write('    -p     Pretty-print JSON\n')
+    sys.stderr.write('    -H     Use HMP interface\n')
+    sys.stderr.write('    -N     Skip negotiate (for qemu-ga)\n')
+    sys.exit(1)
+
+def main():
+    addr = ''
+    qemu = None
+    hmp = False
+    pretty = False
+    verbose = False
+    negotiate = True
+
+    try:
+        for arg in sys.argv[1:]:
+            if arg == "-H":
+                if qemu is not None:
+                    fail_cmdline(arg)
+                hmp = True
+            elif arg == "-p":
+                pretty = True
+            elif arg == "-N":
+                negotiate = False
+            elif arg == "-v":
+                verbose = True
+            else:
+                if qemu is not None:
+                    fail_cmdline(arg)
+                if hmp:
+                    qemu = HMPShell(arg)
+                else:
+                    qemu = QMPShell(arg, pretty)
+                addr = arg
+
+        if qemu is None:
+            fail_cmdline()
+    except QMPShellBadPort:
+        die('bad port number in command-line')
+
+    try:
+        qemu.connect(negotiate)
+    except qmp.QMPConnectError:
+        die('Didn\'t get QMP greeting message')
+    except qmp.QMPCapabilitiesError:
+        die('Could not negotiate capabilities')
+    except qemu.error:
+        die('Could not connect to %s' % addr)
+
+    qemu.show_banner()
+    qemu.set_verbosity(verbose)
+    while qemu.read_exec_command(qemu.get_prompt()):
+        pass
+    qemu.close()
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/qmp/qom-fuse b/scripts/qmp/qom-fuse
new file mode 100755
index 000000000..7c7cff8ed
--- /dev/null
+++ b/scripts/qmp/qom-fuse
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+##
+# QEMU Object Model test tools
+#
+# Copyright IBM, Corp. 2012
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Authors:
+#  Anthony Liguori   
+#  Markus Armbruster 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+# the COPYING file in the top-level directory.
+##
+
+import fuse, stat
+from fuse import FUSE, FuseOSError, Operations
+import os, posix, sys
+from errno import *
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import QEMUMonitorProtocol
+
+fuse.fuse_python_api = (0, 2)
+
+class QOMFS(Operations):
+    def __init__(self, qmp):
+        self.qmp = qmp
+        self.qmp.connect()
+        self.ino_map = {}
+        self.ino_count = 1
+
+    def get_ino(self, path):
+        if path in self.ino_map:
+            return self.ino_map[path]
+        self.ino_map[path] = self.ino_count
+        self.ino_count += 1
+        return self.ino_map[path]
+
+    def is_object(self, path):
+        try:
+            items = self.qmp.command('qom-list', path=path)
+            return True
+        except:
+            return False
+
+    def is_property(self, path):
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qmp.command('qom-list', path=path):
+                if item['name'] == prop:
+                    return True
+            return False
+        except:
+            return False
+
+    def is_link(self, path):
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qmp.command('qom-list', path=path):
+                if item['name'] == prop:
+                    if item['type'].startswith('link<'):
+                        return True
+                    return False
+            return False
+        except:
+            return False
+
+    def read(self, path, length, offset, fh):
+        if not self.is_property(path):
+            return -ENOENT
+
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            data = self.qmp.command('qom-get', path=path, property=prop)
+            data += '\n' # make values shell friendly
+        except:
+            raise FuseOSError(EPERM)
+
+        if offset > len(data):
+            return ''
+
+        return bytes(data[offset:][:length], encoding='utf-8')
+
+    def readlink(self, path):
+        if not self.is_link(path):
+            return False
+        path, prop = path.rsplit('/', 1)
+        prefix = '/'.join(['..'] * (len(path.split('/')) - 1))
+        return prefix + str(self.qmp.command('qom-get', path=path,
+                                             property=prop))
+
+    def getattr(self, path, fh=None):
+        if self.is_link(path):
+            value = { 'st_mode': 0o755 | stat.S_IFLNK,
+                      'st_ino': self.get_ino(path),
+                      'st_dev': 0,
+                      'st_nlink': 2,
+                      'st_uid': 1000,
+                      'st_gid': 1000,
+                      'st_size': 4096,
+                      'st_atime': 0,
+                      'st_mtime': 0,
+                      'st_ctime': 0 }
+        elif self.is_object(path):
+            value = { 'st_mode': 0o755 | stat.S_IFDIR,
+                      'st_ino': self.get_ino(path),
+                      'st_dev': 0,
+                      'st_nlink': 2,
+                      'st_uid': 1000,
+                      'st_gid': 1000,
+                      'st_size': 4096,
+                      'st_atime': 0,
+                      'st_mtime': 0,
+                      'st_ctime': 0 }
+        elif self.is_property(path):
+            value = { 'st_mode': 0o644 | stat.S_IFREG,
+                      'st_ino': self.get_ino(path),
+                      'st_dev': 0,
+                      'st_nlink': 1,
+                      'st_uid': 1000,
+                      'st_gid': 1000,
+                      'st_size': 4096,
+                      'st_atime': 0,
+                      'st_mtime': 0,
+                      'st_ctime': 0 }
+        else:
+            raise FuseOSError(ENOENT)
+        return value
+
+    def readdir(self, path, fh):
+        yield '.'
+        yield '..'
+        for item in self.qmp.command('qom-list', path=path):
+            yield str(item['name'])
+
+if __name__ == '__main__':
+    import os
+
+    fuse = FUSE(QOMFS(QEMUMonitorProtocol(os.environ['QMP_SOCKET'])),
+                sys.argv[1], foreground=True)
diff --git a/scripts/qmp/qom-get b/scripts/qmp/qom-get
new file mode 100755
index 000000000..666df7183
--- /dev/null
+++ b/scripts/qmp/qom-get
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+##
+# QEMU Object Model test tools
+#
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  Anthony Liguori   
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+# the COPYING file in the top-level directory.
+##
+
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import QEMUMonitorProtocol
+
+cmd, args = sys.argv[0], sys.argv[1:]
+socket_path = None
+path = None
+prop = None
+
+def usage():
+    return '''environment variables:
+    QMP_SOCKET=
+usage:
+    %s [-h] [-s ] .
+''' % cmd
+
+def usage_error(error_msg = "unspecified error"):
+    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
+    exit(1)
+
+if len(args) > 0:
+    if args[0] == "-h":
+        print(usage())
+        exit(0);
+    elif args[0] == "-s":
+        try:
+            socket_path = args[1]
+        except:
+            usage_error("missing argument: QMP socket path or address");
+        args = args[2:]
+
+if not socket_path:
+    if 'QMP_SOCKET' in os.environ:
+        socket_path = os.environ['QMP_SOCKET']
+    else:
+        usage_error("no QMP socket path or address given");
+
+if len(args) > 0:
+    try:
+        path, prop = args[0].rsplit('.', 1)
+    except:
+        usage_error("invalid format for path/property/value")
+else:
+    usage_error("not enough arguments")
+
+srv = QEMUMonitorProtocol(socket_path)
+srv.connect()
+
+rsp = srv.command('qom-get', path=path, property=prop)
+if type(rsp) == dict:
+    for i in rsp.keys():
+        print('%s: %s' % (i, rsp[i]))
+else:
+    print(rsp)
diff --git a/scripts/qmp/qom-list b/scripts/qmp/qom-list
new file mode 100755
index 000000000..5074fd939
--- /dev/null
+++ b/scripts/qmp/qom-list
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+##
+# QEMU Object Model test tools
+#
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  Anthony Liguori   
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+# the COPYING file in the top-level directory.
+##
+
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import QEMUMonitorProtocol
+
+cmd, args = sys.argv[0], sys.argv[1:]
+socket_path = None
+path = None
+prop = None
+
+def usage():
+    return '''environment variables:
+    QMP_SOCKET=
+usage:
+    %s [-h] [-s ] []
+''' % cmd
+
+def usage_error(error_msg = "unspecified error"):
+    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
+    exit(1)
+
+if len(args) > 0:
+    if args[0] == "-h":
+        print(usage())
+        exit(0);
+    elif args[0] == "-s":
+        try:
+            socket_path = args[1]
+        except:
+            usage_error("missing argument: QMP socket path or address");
+        args = args[2:]
+
+if not socket_path:
+    if 'QMP_SOCKET' in os.environ:
+        socket_path = os.environ['QMP_SOCKET']
+    else:
+        usage_error("no QMP socket path or address given");
+
+srv = QEMUMonitorProtocol(socket_path)
+srv.connect()
+
+if len(args) == 0:
+    print('/')
+    sys.exit(0)
+
+for item in srv.command('qom-list', path=args[0]):
+    if item['type'].startswith('child<'):
+        print('%s/' % item['name'])
+    elif item['type'].startswith('link<'):
+        print('@%s/' % item['name'])
+    else:
+        print('%s' % item['name'])
diff --git a/scripts/qmp/qom-set b/scripts/qmp/qom-set
new file mode 100755
index 000000000..240a78187
--- /dev/null
+++ b/scripts/qmp/qom-set
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+##
+# QEMU Object Model test tools
+#
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  Anthony Liguori   
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+# the COPYING file in the top-level directory.
+##
+
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import QEMUMonitorProtocol
+
+cmd, args = sys.argv[0], sys.argv[1:]
+socket_path = None
+path = None
+prop = None
+value = None
+
+def usage():
+    return '''environment variables:
+    QMP_SOCKET=
+usage:
+    %s [-h] [-s ] . 
+''' % cmd
+
+def usage_error(error_msg = "unspecified error"):
+    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
+    exit(1)
+
+if len(args) > 0:
+    if args[0] == "-h":
+        print(usage())
+        exit(0);
+    elif args[0] == "-s":
+        try:
+            socket_path = args[1]
+        except:
+            usage_error("missing argument: QMP socket path or address");
+        args = args[2:]
+
+if not socket_path:
+    if 'QMP_SOCKET' in os.environ:
+        socket_path = os.environ['QMP_SOCKET']
+    else:
+        usage_error("no QMP socket path or address given");
+
+if len(args) > 1:
+    try:
+        path, prop = args[0].rsplit('.', 1)
+    except:
+        usage_error("invalid format for path/property/value")
+    value = args[1]
+else:
+    usage_error("not enough arguments")
+
+srv = QEMUMonitorProtocol(socket_path)
+srv.connect()
+
+print(srv.command('qom-set', path=path, property=prop, value=value))
diff --git a/scripts/qmp/qom-tree b/scripts/qmp/qom-tree
new file mode 100755
index 000000000..25b078132
--- /dev/null
+++ b/scripts/qmp/qom-tree
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+##
+# QEMU Object Model test tools
+#
+# Copyright IBM, Corp. 2011
+# Copyright (c) 2013 SUSE LINUX Products GmbH
+#
+# Authors:
+#  Anthony Liguori   
+#  Andreas Faerber   
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+# the COPYING file in the top-level directory.
+##
+
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import QEMUMonitorProtocol
+
+cmd, args = sys.argv[0], sys.argv[1:]
+socket_path = None
+path = None
+prop = None
+
+def usage():
+    return '''environment variables:
+    QMP_SOCKET=
+usage:
+    %s [-h] [-s ] []
+''' % cmd
+
+def usage_error(error_msg = "unspecified error"):
+    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
+    exit(1)
+
+if len(args) > 0:
+    if args[0] == "-h":
+        print(usage())
+        exit(0);
+    elif args[0] == "-s":
+        try:
+            socket_path = args[1]
+        except:
+            usage_error("missing argument: QMP socket path or address");
+        args = args[2:]
+
+if not socket_path:
+    if 'QMP_SOCKET' in os.environ:
+        socket_path = os.environ['QMP_SOCKET']
+    else:
+        usage_error("no QMP socket path or address given");
+
+srv = QEMUMonitorProtocol(socket_path)
+srv.connect()
+
+def list_node(path):
+    print('%s' % path)
+    items = srv.command('qom-list', path=path)
+    for item in items:
+        if not item['type'].startswith('child<'):
+            try:
+                print('  %s: %s (%s)' % (item['name'], srv.command('qom-get', path=path, property=item['name']), item['type']))
+            except:
+                print('  %s:  (%s)' % (item['name'], item['type']))
+    print('')
+    for item in items:
+        if item['type'].startswith('child<'):
+            list_node((path if (path != '/') else '')  + '/' + item['name'])
+
+if len(args) == 0:
+    path = '/'
+else:
+    path = args[0]
+
+list_node(path)
diff --git a/scripts/refresh-pxe-roms.sh b/scripts/refresh-pxe-roms.sh
new file mode 100755
index 000000000..90fc0b374
--- /dev/null
+++ b/scripts/refresh-pxe-roms.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# PXE ROM build script
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see .
+#
+# Copyright (C) 2011 Red Hat, Inc.
+#   Authors: Alex Williamson 
+#
+# Usage: Run from root of qemu tree
+# ./scripts/refresh-pxe-roms.sh
+
+targets="pxerom"
+if test -x "$(which EfiRom 2>/dev/null)"; then
+    targets="$targets efirom"
+fi
+
+cd roms
+make -j4 $targets || exit 1
+make clean
diff --git a/scripts/render_block_graph.py b/scripts/render_block_graph.py
new file mode 100755
index 000000000..da6acf050
--- /dev/null
+++ b/scripts/render_block_graph.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+#
+# Render Qemu Block Graph
+#
+# Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import sys
+import subprocess
+import json
+from graphviz import Digraph
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
+from qemu.qmp import (
+    QEMUMonitorProtocol,
+    QMPResponseError,
+)
+
+
+def perm(arr):
+    s = 'w' if 'write' in arr else '_'
+    s += 'r' if 'consistent-read' in arr else '_'
+    s += 'u' if 'write-unchanged' in arr else '_'
+    s += 'g' if 'graph-mod' in arr else '_'
+    s += 's' if 'resize' in arr else '_'
+    return s
+
+
+def render_block_graph(qmp, filename, format='png'):
+    '''
+    Render graph in text (dot) representation into "@filename" and
+    representation in @format into "@filename.@format"
+    '''
+
+    bds_nodes = qmp.command('query-named-block-nodes')
+    bds_nodes = {n['node-name']: n for n in bds_nodes}
+
+    job_nodes = qmp.command('query-block-jobs')
+    job_nodes = {n['device']: n for n in job_nodes}
+
+    block_graph = qmp.command('x-debug-query-block-graph')
+
+    graph = Digraph(comment='Block Nodes Graph')
+    graph.format = format
+    graph.node('permission symbols:\l'
+               '  w - Write\l'
+               '  r - consistent-Read\l'
+               '  u - write - Unchanged\l'
+               '  g - Graph-mod\l'
+               '  s - reSize\l'
+               'edge label scheme:\l'
+               '  \l'
+               '  \l'
+               '  \l', shape='none')
+
+    for n in block_graph['nodes']:
+        if n['type'] == 'block-driver':
+            info = bds_nodes[n['name']]
+            label = n['name'] + ' [' + info['drv'] + ']'
+            if info['drv'] == 'file':
+                label += '\n' + os.path.basename(info['file'])
+            shape = 'ellipse'
+        elif n['type'] == 'block-job':
+            info = job_nodes[n['name']]
+            label = info['type'] + ' job (' + n['name'] + ')'
+            shape = 'box'
+        else:
+            assert n['type'] == 'block-backend'
+            label = n['name'] if n['name'] else 'unnamed blk'
+            shape = 'box'
+
+        graph.node(str(n['id']), label, shape=shape)
+
+    for e in block_graph['edges']:
+        label = '%s\l%s\l%s\l' % (e['name'], perm(e['perm']),
+                                  perm(e['shared-perm']))
+        graph.edge(str(e['parent']), str(e['child']), label=label)
+
+    graph.render(filename)
+
+
+class LibvirtGuest():
+    def __init__(self, name):
+        self.name = name
+
+    def command(self, cmd):
+        # only supports qmp commands without parameters
+        m = {'execute': cmd}
+        ar = ['virsh', 'qemu-monitor-command', self.name, json.dumps(m)]
+
+        reply = json.loads(subprocess.check_output(ar))
+
+        if 'error' in reply:
+            raise QMPResponseError(reply)
+
+        return reply['return']
+
+
+if __name__ == '__main__':
+    obj = sys.argv[1]
+    out = sys.argv[2]
+
+    if os.path.exists(obj):
+        # assume unix socket
+        qmp = QEMUMonitorProtocol(obj)
+        qmp.connect()
+    else:
+        # assume libvirt guest name
+        qmp = LibvirtGuest(obj)
+
+    render_block_graph(qmp, out)
diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
new file mode 100755
index 000000000..3ba97a6d3
--- /dev/null
+++ b/scripts/replay-dump.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Dump the contents of a recorded execution stream
+#
+#  Copyright (c) 2017 Alex Bennée 
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+
+import argparse
+import struct
+from collections import namedtuple
+
+# This mirrors some of the global replay state which some of the
+# stream loading refers to. Some decoders may read the next event so
+# we need handle that case. Calling reuse_event will ensure the next
+# event is read from the cache rather than advancing the file.
+
+class ReplayState(object):
+    def __init__(self):
+        self.event = -1
+        self.event_count = 0
+        self.already_read = False
+        self.current_checkpoint = 0
+        self.checkpoint = 0
+
+    def set_event(self, ev):
+        self.event = ev
+        self.event_count += 1
+
+    def get_event(self):
+        self.already_read = False
+        return self.event
+
+    def reuse_event(self, ev):
+        self.event = ev
+        self.already_read = True
+
+    def set_checkpoint(self):
+        self.checkpoint = self.event - self.checkpoint_start
+
+    def get_checkpoint(self):
+        return self.checkpoint
+
+replay_state = ReplayState()
+
+# Simple read functions that mirror replay-internal.c
+# The file-stream is big-endian and manually written out a byte at a time.
+
+def read_byte(fin):
+    "Read a single byte"
+    return struct.unpack('>B', fin.read(1))[0]
+
+def read_event(fin):
+    "Read a single byte event, but save some state"
+    if replay_state.already_read:
+        return replay_state.get_event()
+    else:
+        replay_state.set_event(read_byte(fin))
+        return replay_state.event
+
+def read_word(fin):
+    "Read a 16 bit word"
+    return struct.unpack('>H', fin.read(2))[0]
+
+def read_dword(fin):
+    "Read a 32 bit word"
+    return struct.unpack('>I', fin.read(4))[0]
+
+def read_qword(fin):
+    "Read a 64 bit word"
+    return struct.unpack('>Q', fin.read(8))[0]
+
+# Generic decoder structure
+Decoder = namedtuple("Decoder", "eid name fn")
+
+def call_decode(table, index, dumpfile):
+    "Search decode table for next step"
+    decoder = next((d for d in table if d.eid == index), None)
+    if not decoder:
+        print("Could not decode index: %d" % (index))
+        print("Entry is: %s" % (decoder))
+        print("Decode Table is:\n%s" % (table))
+        return False
+    else:
+        return decoder.fn(decoder.eid, decoder.name, dumpfile)
+
+# Print event
+def print_event(eid, name, string=None, event_count=None):
+    "Print event with count"
+    if not event_count:
+        event_count = replay_state.event_count
+
+    if string:
+        print("%d:%s(%d) %s" % (event_count, name, eid, string))
+    else:
+        print("%d:%s(%d)" % (event_count, name, eid))
+
+
+# Decoders for each event type
+
+def decode_unimp(eid, name, _unused_dumpfile):
+    "Unimplimented decoder, will trigger exit"
+    print("%s not handled - will now stop" % (name))
+    return False
+
+# Checkpoint decoder
+def swallow_async_qword(eid, name, dumpfile):
+    "Swallow a qword of data without looking at it"
+    step_id = read_qword(dumpfile)
+    print("  %s(%d) @ %d" % (name, eid, step_id))
+    return True
+
+async_decode_table = [ Decoder(0, "REPLAY_ASYNC_EVENT_BH", swallow_async_qword),
+                       Decoder(1, "REPLAY_ASYNC_INPUT", decode_unimp),
+                       Decoder(2, "REPLAY_ASYNC_INPUT_SYNC", decode_unimp),
+                       Decoder(3, "REPLAY_ASYNC_CHAR_READ", decode_unimp),
+                       Decoder(4, "REPLAY_ASYNC_EVENT_BLOCK", decode_unimp),
+                       Decoder(5, "REPLAY_ASYNC_EVENT_NET", decode_unimp),
+]
+# See replay_read_events/replay_read_event
+def decode_async(eid, name, dumpfile):
+    """Decode an ASYNC event"""
+
+    print_event(eid, name)
+
+    async_event_kind = read_byte(dumpfile)
+    async_event_checkpoint = read_byte(dumpfile)
+
+    if async_event_checkpoint != replay_state.current_checkpoint:
+        print("  mismatch between checkpoint %d and async data %d" % (
+            replay_state.current_checkpoint, async_event_checkpoint))
+        return True
+
+    return call_decode(async_decode_table, async_event_kind, dumpfile)
+
+
+def decode_instruction(eid, name, dumpfile):
+    ins_diff = read_dword(dumpfile)
+    print_event(eid, name, "0x%x" % (ins_diff))
+    return True
+
+def decode_audio_out(eid, name, dumpfile):
+    audio_data = read_dword(dumpfile)
+    print_event(eid, name, "%d" % (audio_data))
+    return True
+
+def decode_checkpoint(eid, name, dumpfile):
+    """Decode a checkpoint.
+
+    Checkpoints contain a series of async events with their own specific data.
+    """
+    replay_state.set_checkpoint()
+    # save event count as we peek ahead
+    event_number = replay_state.event_count
+    next_event = read_event(dumpfile)
+
+    # if the next event is EVENT_ASYNC there are a bunch of
+    # async events to read, otherwise we are done
+    if next_event != 3:
+        print_event(eid, name, "no additional data", event_number)
+    else:
+        print_event(eid, name, "more data follows", event_number)
+
+    replay_state.reuse_event(next_event)
+    return True
+
+def decode_checkpoint_init(eid, name, dumpfile):
+    print_event(eid, name)
+    return True
+
+def decode_interrupt(eid, name, dumpfile):
+    print_event(eid, name)
+    return True
+
+def decode_clock(eid, name, dumpfile):
+    clock_data = read_qword(dumpfile)
+    print_event(eid, name, "0x%x" % (clock_data))
+    return True
+
+
+# pre-MTTCG merge
+v5_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction),
+                  Decoder(1, "EVENT_INTERRUPT", decode_interrupt),
+                  Decoder(2, "EVENT_EXCEPTION", decode_unimp),
+                  Decoder(3, "EVENT_ASYNC", decode_async),
+                  Decoder(4, "EVENT_SHUTDOWN", decode_unimp),
+                  Decoder(5, "EVENT_CHAR_WRITE", decode_unimp),
+                  Decoder(6, "EVENT_CHAR_READ_ALL", decode_unimp),
+                  Decoder(7, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp),
+                  Decoder(8, "EVENT_CLOCK_HOST", decode_clock),
+                  Decoder(9, "EVENT_CLOCK_VIRTUAL_RT", decode_clock),
+                  Decoder(10, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint),
+                  Decoder(11, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint),
+                  Decoder(12, "EVENT_CP_RESET_REQUESTED", decode_checkpoint),
+                  Decoder(13, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint),
+                  Decoder(14, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint),
+                  Decoder(15, "EVENT_CP_CLOCK_HOST", decode_checkpoint),
+                  Decoder(16, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint),
+                  Decoder(17, "EVENT_CP_INIT", decode_checkpoint_init),
+                  Decoder(18, "EVENT_CP_RESET", decode_checkpoint),
+]
+
+# post-MTTCG merge, AUDIO support added
+v6_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction),
+                  Decoder(1, "EVENT_INTERRUPT", decode_interrupt),
+                  Decoder(2, "EVENT_EXCEPTION", decode_unimp),
+                  Decoder(3, "EVENT_ASYNC", decode_async),
+                  Decoder(4, "EVENT_SHUTDOWN", decode_unimp),
+                  Decoder(5, "EVENT_CHAR_WRITE", decode_unimp),
+                  Decoder(6, "EVENT_CHAR_READ_ALL", decode_unimp),
+                  Decoder(7, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp),
+                  Decoder(8, "EVENT_AUDIO_OUT", decode_audio_out),
+                  Decoder(9, "EVENT_AUDIO_IN", decode_unimp),
+                  Decoder(10, "EVENT_CLOCK_HOST", decode_clock),
+                  Decoder(11, "EVENT_CLOCK_VIRTUAL_RT", decode_clock),
+                  Decoder(12, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint),
+                  Decoder(13, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint),
+                  Decoder(14, "EVENT_CP_RESET_REQUESTED", decode_checkpoint),
+                  Decoder(15, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint),
+                  Decoder(16, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint),
+                  Decoder(17, "EVENT_CP_CLOCK_HOST", decode_checkpoint),
+                  Decoder(18, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint),
+                  Decoder(19, "EVENT_CP_INIT", decode_checkpoint_init),
+                  Decoder(20, "EVENT_CP_RESET", decode_checkpoint),
+]
+
+# Shutdown cause added
+v7_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction),
+                  Decoder(1, "EVENT_INTERRUPT", decode_interrupt),
+                  Decoder(2, "EVENT_EXCEPTION", decode_unimp),
+                  Decoder(3, "EVENT_ASYNC", decode_async),
+                  Decoder(4, "EVENT_SHUTDOWN", decode_unimp),
+                  Decoder(5, "EVENT_SHUTDOWN_HOST_ERR", decode_unimp),
+                  Decoder(6, "EVENT_SHUTDOWN_HOST_QMP", decode_unimp),
+                  Decoder(7, "EVENT_SHUTDOWN_HOST_SIGNAL", decode_unimp),
+                  Decoder(8, "EVENT_SHUTDOWN_HOST_UI", decode_unimp),
+                  Decoder(9, "EVENT_SHUTDOWN_GUEST_SHUTDOWN", decode_unimp),
+                  Decoder(10, "EVENT_SHUTDOWN_GUEST_RESET", decode_unimp),
+                  Decoder(11, "EVENT_SHUTDOWN_GUEST_PANIC", decode_unimp),
+                  Decoder(12, "EVENT_SHUTDOWN___MAX", decode_unimp),
+                  Decoder(13, "EVENT_CHAR_WRITE", decode_unimp),
+                  Decoder(14, "EVENT_CHAR_READ_ALL", decode_unimp),
+                  Decoder(15, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp),
+                  Decoder(16, "EVENT_AUDIO_OUT", decode_audio_out),
+                  Decoder(17, "EVENT_AUDIO_IN", decode_unimp),
+                  Decoder(18, "EVENT_CLOCK_HOST", decode_clock),
+                  Decoder(19, "EVENT_CLOCK_VIRTUAL_RT", decode_clock),
+                  Decoder(20, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint),
+                  Decoder(21, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint),
+                  Decoder(22, "EVENT_CP_RESET_REQUESTED", decode_checkpoint),
+                  Decoder(23, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint),
+                  Decoder(24, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint),
+                  Decoder(25, "EVENT_CP_CLOCK_HOST", decode_checkpoint),
+                  Decoder(26, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint),
+                  Decoder(27, "EVENT_CP_INIT", decode_checkpoint_init),
+                  Decoder(28, "EVENT_CP_RESET", decode_checkpoint),
+]
+
+def parse_arguments():
+    "Grab arguments for script"
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--file", help='record/replay dump to read from',
+                        required=True)
+    return parser.parse_args()
+
+def decode_file(filename):
+    "Decode a record/replay dump"
+    dumpfile = open(filename, "rb")
+
+    # read and throwaway the header
+    version = read_dword(dumpfile)
+    junk = read_qword(dumpfile)
+
+    print("HEADER: version 0x%x" % (version))
+
+    if version == 0xe02007:
+        event_decode_table = v7_event_table
+        replay_state.checkpoint_start = 12
+    elif version == 0xe02006:
+        event_decode_table = v6_event_table
+        replay_state.checkpoint_start = 12
+    else:
+        event_decode_table = v5_event_table
+        replay_state.checkpoint_start = 10
+
+    try:
+        decode_ok = True
+        while decode_ok:
+            event = read_event(dumpfile)
+            decode_ok = call_decode(event_decode_table, event, dumpfile)
+    finally:
+        dumpfile.close()
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    decode_file(args.file)
diff --git a/scripts/shaderinclude.pl b/scripts/shaderinclude.pl
new file mode 100644
index 000000000..cd3bb40b1
--- /dev/null
+++ b/scripts/shaderinclude.pl
@@ -0,0 +1,16 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+my $file = shift;
+open FILE, "<", $file or die "open $file: $!";
+my $name = $file;
+$name =~ s|.*/||;
+$name =~ s/[-.]/_/g;
+print "static GLchar ${name}_src[] =\n";
+while () {
+    chomp;
+    printf "    \"%s\\n\"\n", $_;
+}
+print "    \"\\n\";\n";
+close FILE;
diff --git a/scripts/show-fixed-bugs.sh b/scripts/show-fixed-bugs.sh
new file mode 100755
index 000000000..a095a4d6b
--- /dev/null
+++ b/scripts/show-fixed-bugs.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+
+# This script checks the git log for URLs to the QEMU launchpad bugtracker
+# and optionally checks whether the corresponding bugs are not closed yet.
+
+show_help () {
+    echo "Usage:"
+    echo "  -s   : Start searching at this commit"
+    echo "  -e   : End searching at this commit"
+    echo "  -c           : Check if bugs are still open"
+    echo "  -b           : Open bugs in browser"
+}
+
+while getopts "s:e:cbh" opt; do
+   case "$opt" in
+    s)  start="$OPTARG" ;;
+    e)  end="$OPTARG" ;;
+    c)  check_if_open=1 ;;
+    b)  show_in_browser=1 ;;
+    h)  show_help ; exit 0 ;;
+    *)   echo "Use -h for help." ; exit 1 ;;
+   esac
+done
+
+if [ "x$start" = "x" ]; then
+    start=$(git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1)
+fi
+if [ "x$end" = "x" ]; then
+    end=$(git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1)
+fi
+
+if [ "x$start" = "x" ] || [ "x$end" = "x" ]; then
+    echo "Could not determine start or end revision ... Please note that this"
+    echo "script must be run from a checked out git repository of QEMU."
+    exit 1
+fi
+
+echo "Searching git log for bugs in the range $start..$end"
+
+urlstr='https://bugs.launchpad.net/\(bugs\|qemu/+bug\)/'
+bug_urls=$(git log $start..$end \
+  | sed -n '\,'"$urlstr"', s,\(.*\)\('"$urlstr"'\)\([0-9]*\).*,\2\4,p' \
+  | sort -u)
+
+echo Found bug URLs:
+for i in $bug_urls ; do echo " $i" ; done
+
+if [ "x$check_if_open" = "x1" ]; then
+    echo
+    echo "Checking which ones are still open..."
+    for i in $bug_urls ; do
+        if ! curl -s -L "$i" | grep "value status" | grep -q "Fix Released" ; then
+            echo " $i"
+            final_bug_urls="$final_bug_urls $i"
+        fi
+    done
+else
+    final_bug_urls=$bug_urls
+fi
+
+if [ "x$final_bug_urls" = "x" ]; then
+    echo "No open bugs found."
+elif [ "x$show_in_browser" = "x1" ]; then
+    # Try to determine which browser we should use
+    if [ "x$BROWSER" != "x" ]; then
+        bugbrowser="$BROWSER"
+    elif command -v xdg-open >/dev/null 2>&1; then
+        bugbrowser=xdg-open
+    elif command -v gnome-open >/dev/null 2>&1; then
+        bugbrowser=gnome-open
+    elif [ "$(uname)" = "Darwin" ]; then
+        bugbrowser=open
+    elif command -v sensible-browser >/dev/null 2>&1; then
+        bugbrowser=sensible-browser
+    else
+        echo "Please set the BROWSER variable to the browser of your choice."
+        exit 1
+    fi
+    # Now show the bugs in the browser
+    first=1
+    for i in $final_bug_urls; do
+        "$bugbrowser" "$i"
+        if [ $first = 1 ]; then
+            # if it is the first entry, give the browser some time to start
+            # (to avoid messages like "Firefox is already running, but is
+            # not responding...")
+            sleep 4
+            first=0
+        fi
+    done
+fi
diff --git a/scripts/signrom.py b/scripts/signrom.py
new file mode 100755
index 000000000..43693dba5
--- /dev/null
+++ b/scripts/signrom.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+#
+# Option ROM signing utility
+#
+# Authors:
+#  Jan Kiszka 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+import sys
+import struct
+
+if len(sys.argv) < 3:
+    print('usage: signrom.py input output')
+    sys.exit(1)
+
+fin = open(sys.argv[1], 'rb')
+fout = open(sys.argv[2], 'wb')
+
+magic = fin.read(2)
+if magic != b'\x55\xaa':
+    sys.exit("%s: option ROM does not begin with magic 55 aa" % sys.argv[1])
+
+size_byte = ord(fin.read(1))
+fin.seek(0)
+data = fin.read()
+
+size = size_byte * 512
+if len(data) > size:
+    sys.stderr.write('error: ROM is too large (%d > %d)\n' % (len(data), size))
+    sys.exit(1)
+elif len(data) < size:
+    # Add padding if necessary, rounding the whole input to a multiple of
+    # 512 bytes according to the third byte of the input.
+    # size-1 because a final byte is added below to store the checksum.
+    data = data.ljust(size-1, b'\0')
+else:
+    if ord(data[-1:]) != 0:
+        sys.stderr.write('WARNING: ROM includes nonzero checksum\n')
+    data = data[:size-1]
+
+fout.write(data)
+
+checksum = 0
+for b in data:
+    checksum = (checksum - b) & 255
+
+fout.write(struct.pack('B', checksum))
+
+fin.close()
+fout.close()
diff --git a/scripts/simplebench/bench-example.py b/scripts/simplebench/bench-example.py
new file mode 100644
index 000000000..c642a5b89
--- /dev/null
+++ b/scripts/simplebench/bench-example.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+#
+# Benchmark example
+#
+# Copyright (c) 2019 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import simplebench
+from bench_block_job import bench_block_copy, drv_file, drv_nbd
+
+
+def bench_func(env, case):
+    """ Handle one "cell" of benchmarking table. """
+    return bench_block_copy(env['qemu_binary'], env['cmd'],
+                            case['source'], case['target'])
+
+
+# You may set the following five variables to correct values, to turn this
+# example to real benchmark.
+ssd_source = '/path-to-raw-source-image-at-ssd'
+ssd_target = '/path-to-raw-target-image-at-ssd'
+hdd_target = '/path-to-raw-source-image-at-hdd'
+nbd_ip = 'nbd-ip-addr'
+nbd_port = 'nbd-port-number'
+
+# Test-cases are "rows" in benchmark resulting table, 'id' is a caption for
+# the row, other fields are handled by bench_func.
+test_cases = [
+    {
+        'id': 'ssd -> ssd',
+        'source': drv_file(ssd_source),
+        'target': drv_file(ssd_target)
+    },
+    {
+        'id': 'ssd -> hdd',
+        'source': drv_file(ssd_source),
+        'target': drv_file(hdd_target)
+    },
+    {
+        'id': 'ssd -> nbd',
+        'source': drv_file(ssd_source),
+        'target': drv_nbd(nbd_ip, nbd_port)
+    },
+]
+
+# Test-envs are "columns" in benchmark resulting table, 'id is a caption for
+# the column, other fields are handled by bench_func.
+test_envs = [
+    {
+        'id': 'backup-1',
+        'cmd': 'blockdev-backup',
+        'qemu_binary': '/path-to-qemu-binary-1'
+    },
+    {
+        'id': 'backup-2',
+        'cmd': 'blockdev-backup',
+        'qemu_binary': '/path-to-qemu-binary-2'
+    },
+    {
+        'id': 'mirror',
+        'cmd': 'blockdev-mirror',
+        'qemu_binary': '/path-to-qemu-binary-1'
+    }
+]
+
+result = simplebench.bench(bench_func, test_envs, test_cases, count=3)
+print(simplebench.ascii(result))
diff --git a/scripts/simplebench/bench_block_job.py b/scripts/simplebench/bench_block_job.py
new file mode 100755
index 000000000..9808d696c
--- /dev/null
+++ b/scripts/simplebench/bench_block_job.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Benchmark block jobs
+#
+# Copyright (c) 2019 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+
+import sys
+import os
+import socket
+import json
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.machine import QEMUMachine
+from qemu.qmp import QMPConnectError
+
+
+def bench_block_job(cmd, cmd_args, qemu_args):
+    """Benchmark block-job
+
+    cmd       -- qmp command to run block-job (like blockdev-backup)
+    cmd_args  -- dict of qmp command arguments
+    qemu_args -- list of Qemu command line arguments, including path to Qemu
+                 binary
+
+    Returns {'seconds': int} on success and {'error': str} on failure, dict may
+    contain addional 'vm-log' field. Return value is compatible with
+    simplebench lib.
+    """
+
+    vm = QEMUMachine(qemu_args[0], args=qemu_args[1:])
+
+    try:
+        vm.launch()
+    except OSError as e:
+        return {'error': 'popen failed: ' + str(e)}
+    except (QMPConnectError, socket.timeout):
+        return {'error': 'qemu failed: ' + str(vm.get_log())}
+
+    try:
+        res = vm.qmp(cmd, **cmd_args)
+        if res != {'return': {}}:
+            vm.shutdown()
+            return {'error': '"{}" command failed: {}'.format(cmd, str(res))}
+
+        e = vm.event_wait('JOB_STATUS_CHANGE')
+        assert e['data']['status'] == 'created'
+        start_ms = e['timestamp']['seconds'] * 1000000 + \
+            e['timestamp']['microseconds']
+
+        e = vm.events_wait((('BLOCK_JOB_READY', None),
+                            ('BLOCK_JOB_COMPLETED', None),
+                            ('BLOCK_JOB_FAILED', None)), timeout=True)
+        if e['event'] not in ('BLOCK_JOB_READY', 'BLOCK_JOB_COMPLETED'):
+            vm.shutdown()
+            return {'error': 'block-job failed: ' + str(e),
+                    'vm-log': vm.get_log()}
+        end_ms = e['timestamp']['seconds'] * 1000000 + \
+            e['timestamp']['microseconds']
+    finally:
+        vm.shutdown()
+
+    return {'seconds': (end_ms - start_ms) / 1000000.0}
+
+
+# Bench backup or mirror
+def bench_block_copy(qemu_binary, cmd, source, target):
+    """Helper to run bench_block_job() for mirror or backup"""
+    assert cmd in ('blockdev-backup', 'blockdev-mirror')
+
+    source['node-name'] = 'source'
+    target['node-name'] = 'target'
+
+    return bench_block_job(cmd,
+                           {'job-id': 'job0', 'device': 'source',
+                            'target': 'target', 'sync': 'full'},
+                           [qemu_binary,
+                            '-blockdev', json.dumps(source),
+                            '-blockdev', json.dumps(target)])
+
+
+def drv_file(filename):
+    return {'driver': 'file', 'filename': filename,
+            'cache': {'direct': True}, 'aio': 'native'}
+
+
+def drv_nbd(host, port):
+    return {'driver': 'nbd',
+            'server': {'type': 'inet', 'host': host, 'port': port}}
+
+
+if __name__ == '__main__':
+    import sys
+
+    if len(sys.argv) < 4:
+        print('USAGE: {}  '
+              ' '
+              ''.format(sys.argv[0]))
+        exit(1)
+
+    res = bench_block_job(sys.argv[1], json.loads(sys.argv[2]), sys.argv[3:])
+    if 'seconds' in res:
+        print('{:.2f}'.format(res['seconds']))
+    else:
+        print(res)
diff --git a/scripts/simplebench/bench_write_req.py b/scripts/simplebench/bench_write_req.py
new file mode 100755
index 000000000..ca1178fd6
--- /dev/null
+++ b/scripts/simplebench/bench_write_req.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+#
+# Test to compare performance of write requests for two qemu-img binary files.
+#
+# The idea of the test comes from intention to check the benefit of c8bb23cbdbe
+# "qcow2: skip writing zero buffers to empty COW areas".
+#
+# Copyright (c) 2020 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+
+import sys
+import os
+import subprocess
+import simplebench
+
+
+def bench_func(env, case):
+    """ Handle one "cell" of benchmarking table. """
+    return bench_write_req(env['qemu_img'], env['image_name'],
+                           case['block_size'], case['block_offset'],
+                           case['cluster_size'])
+
+
+def qemu_img_pipe(*args):
+    '''Run qemu-img and return its output'''
+    subp = subprocess.Popen(list(args),
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.STDOUT,
+                            universal_newlines=True)
+    exitcode = subp.wait()
+    if exitcode < 0:
+        sys.stderr.write('qemu-img received signal %i: %s\n'
+                         % (-exitcode, ' '.join(list(args))))
+    return subp.communicate()[0]
+
+
+def bench_write_req(qemu_img, image_name, block_size, block_offset,
+                    cluster_size):
+    """Benchmark write requests
+
+    The function creates a QCOW2 image with the given path/name. Then it runs
+    the 'qemu-img bench' command and makes series of write requests on the
+    image clusters. Finally, it returns the total time of the write operations
+    on the disk.
+
+    qemu_img     -- path to qemu_img executable file
+    image_name   -- QCOW2 image name to create
+    block_size   -- size of a block to write to clusters
+    block_offset -- offset of the block in clusters
+    cluster_size -- size of the image cluster
+
+    Returns {'seconds': int} on success and {'error': str} on failure.
+    Return value is compatible with simplebench lib.
+    """
+
+    if not os.path.isfile(qemu_img):
+        print(f'File not found: {qemu_img}')
+        sys.exit(1)
+
+    image_dir = os.path.dirname(os.path.abspath(image_name))
+    if not os.path.isdir(image_dir):
+        print(f'Path not found: {image_name}')
+        sys.exit(1)
+
+    image_size = 1024 * 1024 * 1024
+
+    args_create = [qemu_img, 'create', '-f', 'qcow2', '-o',
+                   f'cluster_size={cluster_size}',
+                   image_name, str(image_size)]
+
+    count = int(image_size / cluster_size) - 1
+    step = str(cluster_size)
+
+    args_bench = [qemu_img, 'bench', '-w', '-n', '-t', 'none', '-c',
+                  str(count), '-s', f'{block_size}', '-o', str(block_offset),
+                  '-S', step, '-f', 'qcow2', image_name]
+
+    try:
+        qemu_img_pipe(*args_create)
+    except OSError as e:
+        os.remove(image_name)
+        return {'error': 'qemu_img create failed: ' + str(e)}
+
+    try:
+        ret = qemu_img_pipe(*args_bench)
+    except OSError as e:
+        os.remove(image_name)
+        return {'error': 'qemu_img bench failed: ' + str(e)}
+
+    os.remove(image_name)
+
+    if 'seconds' in ret:
+        ret_list = ret.split()
+        index = ret_list.index('seconds.')
+        return {'seconds': float(ret_list[index-1])}
+    else:
+        return {'error': 'qemu_img bench failed: ' + ret}
+
+
+if __name__ == '__main__':
+
+    if len(sys.argv) < 4:
+        program = os.path.basename(sys.argv[0])
+        print(f'USAGE: {program}  '
+              ' '
+              '')
+        exit(1)
+
+    # Test-cases are "rows" in benchmark resulting table, 'id' is a caption
+    # for the row, other fields are handled by bench_func.
+    test_cases = [
+        {
+            'id': '',
+            'block_size': 4096,
+            'block_offset': 0,
+            'cluster_size': 1048576
+        },
+        {
+            'id': '',
+            'block_size': 4096,
+            'block_offset': 524288,
+            'cluster_size': 1048576
+        },
+        {
+            'id': '',
+            'block_size': 1048576,
+            'block_offset': 4096,
+            'cluster_size': 1048576
+        },
+        {
+            'id': '',
+            'block_size': 4096,
+            'block_offset': 0,
+            'cluster_size': 65536
+        },
+    ]
+
+    # Test-envs are "columns" in benchmark resulting table, 'id is a caption
+    # for the column, other fields are handled by bench_func.
+    # Set the paths below to desired values
+    test_envs = [
+        {
+            'id': '',
+            'qemu_img': f'{sys.argv[1]}',
+            'image_name': f'{sys.argv[3]}'
+        },
+        {
+            'id': '',
+            'qemu_img': f'{sys.argv[2]}',
+            'image_name': f'{sys.argv[3]}'
+        },
+    ]
+
+    result = simplebench.bench(bench_func, test_envs, test_cases, count=3,
+                               initial_run=False)
+    print(simplebench.ascii(result))
diff --git a/scripts/simplebench/simplebench.py b/scripts/simplebench/simplebench.py
new file mode 100644
index 000000000..59e7314ff
--- /dev/null
+++ b/scripts/simplebench/simplebench.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+#
+# Simple benchmarking framework
+#
+# Copyright (c) 2019 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+
+def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
+    """Benchmark one test-case
+
+    test_func   -- benchmarking function with prototype
+                   test_func(env, case), which takes test_env and test_case
+                   arguments and returns {'seconds': int} (which is benchmark
+                   result) on success and {'error': str} on error. Returned
+                   dict may contain any other additional fields.
+    test_env    -- test environment - opaque first argument for test_func
+    test_case   -- test case - opaque second argument for test_func
+    count       -- how many times to call test_func, to calculate average
+    initial_run -- do initial run of test_func, which don't get into result
+
+    Returns dict with the following fields:
+        'runs':     list of test_func results
+        'average':  average seconds per run (exists only if at least one run
+                    succeeded)
+        'delta':    maximum delta between test_func result and the average
+                    (exists only if at least one run succeeded)
+        'n-failed': number of failed runs (exists only if at least one run
+                    failed)
+    """
+    if initial_run:
+        print('  #initial run:')
+        print('   ', test_func(test_env, test_case))
+
+    runs = []
+    for i in range(count):
+        print('  #run {}'.format(i+1))
+        res = test_func(test_env, test_case)
+        print('   ', res)
+        runs.append(res)
+
+    result = {'runs': runs}
+
+    successed = [r for r in runs if ('seconds' in r)]
+    if successed:
+        avg = sum(r['seconds'] for r in successed) / len(successed)
+        result['average'] = avg
+        result['delta'] = max(abs(r['seconds'] - avg) for r in successed)
+
+    if len(successed) < count:
+        result['n-failed'] = count - len(successed)
+
+    return result
+
+
+def ascii_one(result):
+    """Return ASCII representation of bench_one() returned dict."""
+    if 'average' in result:
+        s = '{:.2f} +- {:.2f}'.format(result['average'], result['delta'])
+        if 'n-failed' in result:
+            s += '\n({} failed)'.format(result['n-failed'])
+        return s
+    else:
+        return 'FAILED'
+
+
+def bench(test_func, test_envs, test_cases, *args, **vargs):
+    """Fill benchmark table
+
+    test_func -- benchmarking function, see bench_one for description
+    test_envs -- list of test environments, see bench_one
+    test_cases -- list of test cases, see bench_one
+    args, vargs -- additional arguments for bench_one
+
+    Returns dict with the following fields:
+        'envs':  test_envs
+        'cases': test_cases
+        'tab':   filled 2D array, where cell [i][j] is bench_one result for
+                 test_cases[i] for test_envs[j] (i.e., rows are test cases and
+                 columns are test environments)
+    """
+    tab = {}
+    results = {
+        'envs': test_envs,
+        'cases': test_cases,
+        'tab': tab
+    }
+    n = 1
+    n_tests = len(test_envs) * len(test_cases)
+    for env in test_envs:
+        for case in test_cases:
+            print('Testing {}/{}: {} :: {}'.format(n, n_tests,
+                                                   env['id'], case['id']))
+            if case['id'] not in tab:
+                tab[case['id']] = {}
+            tab[case['id']][env['id']] = bench_one(test_func, env, case,
+                                                   *args, **vargs)
+            n += 1
+
+    print('Done')
+    return results
+
+
+def ascii(results):
+    """Return ASCII representation of bench() returned dict."""
+    from tabulate import tabulate
+
+    tab = [[""] + [c['id'] for c in results['envs']]]
+    for case in results['cases']:
+        row = [case['id']]
+        for env in results['envs']:
+            row.append(ascii_one(results['tab'][case['id']][env['id']]))
+        tab.append(row)
+
+    return tabulate(tab)
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
new file mode 100755
index 000000000..20f002606
--- /dev/null
+++ b/scripts/simpletrace.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+#
+# Pretty-printer for simple trace backend binary trace files
+#
+# Copyright IBM, Corp. 2010
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# For help see docs/devel/tracing.txt
+
+import struct
+import inspect
+from tracetool import read_events, Event
+from tracetool.backend.simple import is_string
+
+header_event_id = 0xffffffffffffffff
+header_magic    = 0xf2b177cb0aa429b4
+dropped_event_id = 0xfffffffffffffffe
+
+record_type_mapping = 0
+record_type_event = 1
+
+log_header_fmt = '=QQQ'
+rec_header_fmt = '=QQII'
+
+def read_header(fobj, hfmt):
+    '''Read a trace record header'''
+    hlen = struct.calcsize(hfmt)
+    hdr = fobj.read(hlen)
+    if len(hdr) != hlen:
+        return None
+    return struct.unpack(hfmt, hdr)
+
+def get_record(edict, idtoname, rechdr, fobj):
+    """Deserialize a trace record from a file into a tuple
+       (name, timestamp, pid, arg1, ..., arg6)."""
+    if rechdr is None:
+        return None
+    if rechdr[0] != dropped_event_id:
+        event_id = rechdr[0]
+        name = idtoname[event_id]
+        rec = (name, rechdr[1], rechdr[3])
+        try:
+            event = edict[name]
+        except KeyError as e:
+            import sys
+            sys.stderr.write('%s event is logged but is not declared ' \
+                             'in the trace events file, try using ' \
+                             'trace-events-all instead.\n' % str(e))
+            sys.exit(1)
+
+        for type, name in event.args:
+            if is_string(type):
+                l = fobj.read(4)
+                (len,) = struct.unpack('=L', l)
+                s = fobj.read(len)
+                rec = rec + (s,)
+            else:
+                (value,) = struct.unpack('=Q', fobj.read(8))
+                rec = rec + (value,)
+    else:
+        rec = ("dropped", rechdr[1], rechdr[3])
+        (value,) = struct.unpack('=Q', fobj.read(8))
+        rec = rec + (value,)
+    return rec
+
+def get_mapping(fobj):
+    (event_id, ) = struct.unpack('=Q', fobj.read(8))
+    (len, ) = struct.unpack('=L', fobj.read(4))
+    name = fobj.read(len).decode()
+
+    return (event_id, name)
+
+def read_record(edict, idtoname, fobj):
+    """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
+    rechdr = read_header(fobj, rec_header_fmt)
+    return get_record(edict, idtoname, rechdr, fobj)
+
+def read_trace_header(fobj):
+    """Read and verify trace file header"""
+    header = read_header(fobj, log_header_fmt)
+    if header is None:
+        raise ValueError('Not a valid trace file!')
+    if header[0] != header_event_id:
+        raise ValueError('Not a valid trace file, header id %d != %d' %
+                         (header[0], header_event_id))
+    if header[1] != header_magic:
+        raise ValueError('Not a valid trace file, header magic %d != %d' %
+                         (header[1], header_magic))
+
+    log_version = header[2]
+    if log_version not in [0, 2, 3, 4]:
+        raise ValueError('Unknown version of tracelog format!')
+    if log_version != 4:
+        raise ValueError('Log format %d not supported with this QEMU release!'
+                         % log_version)
+
+def read_trace_records(edict, idtoname, fobj):
+    """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6).
+
+    Note that `idtoname` is modified if the file contains mapping records.
+
+    Args:
+        edict (str -> Event): events dict, indexed by name
+        idtoname (int -> str): event names dict, indexed by event ID
+        fobj (file): input file
+
+    """
+    while True:
+        t = fobj.read(8)
+        if len(t) == 0:
+            break
+
+        (rectype, ) = struct.unpack('=Q', t)
+        if rectype == record_type_mapping:
+            event_id, name = get_mapping(fobj)
+            idtoname[event_id] = name
+        else:
+            rec = read_record(edict, idtoname, fobj)
+
+            yield rec
+
+class Analyzer(object):
+    """A trace file analyzer which processes trace records.
+
+    An analyzer can be passed to run() or process().  The begin() method is
+    invoked, then each trace record is processed, and finally the end() method
+    is invoked.
+
+    If a method matching a trace event name exists, it is invoked to process
+    that trace record.  Otherwise the catchall() method is invoked.
+
+    Example:
+    The following method handles the runstate_set(int new_state) trace event::
+
+      def runstate_set(self, new_state):
+          ...
+
+    The method can also take a timestamp argument before the trace event
+    arguments::
+
+      def runstate_set(self, timestamp, new_state):
+          ...
+
+    Timestamps have the uint64_t type and are in nanoseconds.
+
+    The pid can be included in addition to the timestamp and is useful when
+    dealing with traces from multiple processes::
+
+      def runstate_set(self, timestamp, pid, new_state):
+          ...
+    """
+
+    def begin(self):
+        """Called at the start of the trace."""
+        pass
+
+    def catchall(self, event, rec):
+        """Called if no specific method for processing a trace event has been found."""
+        pass
+
+    def end(self):
+        """Called at the end of the trace."""
+        pass
+
+def process(events, log, analyzer, read_header=True):
+    """Invoke an analyzer on each event in a log."""
+    if isinstance(events, str):
+        events = read_events(open(events, 'r'), events)
+    if isinstance(log, str):
+        log = open(log, 'rb')
+
+    if read_header:
+        read_trace_header(log)
+
+    dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)")
+    edict = {"dropped": dropped_event}
+    idtoname = {dropped_event_id: "dropped"}
+
+    for event in events:
+        edict[event.name] = event
+
+    # If there is no header assume event ID mapping matches events list
+    if not read_header:
+        for event_id, event in enumerate(events):
+            idtoname[event_id] = event.name
+
+    def build_fn(analyzer, event):
+        if isinstance(event, str):
+            return analyzer.catchall
+
+        fn = getattr(analyzer, event.name, None)
+        if fn is None:
+            return analyzer.catchall
+
+        event_argcount = len(event.args)
+        fn_argcount = len(inspect.getargspec(fn)[0]) - 1
+        if fn_argcount == event_argcount + 1:
+            # Include timestamp as first argument
+            return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
+        elif fn_argcount == event_argcount + 2:
+            # Include timestamp and pid
+            return lambda _, rec: fn(*rec[1:3 + event_argcount])
+        else:
+            # Just arguments, no timestamp or pid
+            return lambda _, rec: fn(*rec[3:3 + event_argcount])
+
+    analyzer.begin()
+    fn_cache = {}
+    for rec in read_trace_records(edict, idtoname, log):
+        event_num = rec[0]
+        event = edict[event_num]
+        if event_num not in fn_cache:
+            fn_cache[event_num] = build_fn(analyzer, event)
+        fn_cache[event_num](event, rec)
+    analyzer.end()
+
+def run(analyzer):
+    """Execute an analyzer on a trace file given on the command-line.
+
+    This function is useful as a driver for simple analysis scripts.  More
+    advanced scripts will want to call process() instead."""
+    import sys
+
+    read_header = True
+    if len(sys.argv) == 4 and sys.argv[1] == '--no-header':
+        read_header = False
+        del sys.argv[1]
+    elif len(sys.argv) != 3:
+        sys.stderr.write('usage: %s [--no-header]  ' \
+                         '\n' % sys.argv[0])
+        sys.exit(1)
+
+    events = read_events(open(sys.argv[1], 'r'), sys.argv[1])
+    process(events, sys.argv[2], analyzer, read_header=read_header)
+
+if __name__ == '__main__':
+    class Formatter(Analyzer):
+        def __init__(self):
+            self.last_timestamp = None
+
+        def catchall(self, event, rec):
+            timestamp = rec[1]
+            if self.last_timestamp is None:
+                self.last_timestamp = timestamp
+            delta_ns = timestamp - self.last_timestamp
+            self.last_timestamp = timestamp
+
+            fields = [event.name, '%0.3f' % (delta_ns / 1000.0),
+                      'pid=%d' % rec[2]]
+            i = 3
+            for type, name in event.args:
+                if is_string(type):
+                    fields.append('%s=%s' % (name, rec[i]))
+                else:
+                    fields.append('%s=0x%x' % (name, rec[i]))
+                i += 1
+            print(' '.join(fields))
+
+    run(Formatter())
diff --git a/scripts/switch-timer-api b/scripts/switch-timer-api
new file mode 100755
index 000000000..41736d11d
--- /dev/null
+++ b/scripts/switch-timer-api
@@ -0,0 +1,178 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+use FindBin;
+
+my @legacy = qw(qemu_clock_ptr qemu_get_clock_ns qemu_get_clock_ms qemu_register_clock_reset_notifier qemu_unregister_clock_reset_notifier qemu_new_timer qemu_free_timer qemu_del_timer qemu_mod_timer_ns qemu_mod_timer qemu_run_timers qemu_new_timer_ns qemu_new_timer_us qemu_new_timer_ms);
+my $legacyre = '\b('.join('|', @legacy).')\b';
+my $option_git;
+my $option_dryrun;
+my $option_quiet;
+my $option_rtc;
+my $suffix=".tmp.$$";
+my @files;
+my $getfiles = 'git grep -l -E \'\b((host|rt|vm|rtc)_clock\b|qemu_\w*timer)\' | egrep \'\.[ch]$\' | egrep -v \'qemu-timer\.c$|include/qemu/timer\.h$\'';
+
+sub Syntax
+{
+    print STDERR < \$option_dryrun,
+             "git|g" => \$option_git,
+	     "quiet|q" => \$option_quiet,
+	     "rtc|r" => \$option_rtc,
+             "help|h" => sub { Syntax(); exit(0); }
+        ))
+    {
+        Syntax();
+        die "Bad options";
+    }
+
+    if ($#ARGV >=0)
+    {
+	@files = @ARGV;
+    }
+    else
+    {
+	@files = split(/\s+/, `$getfiles`);
+    }
+
+    foreach my $file (@files)
+    {
+	die "Cannot find $file" unless (-f $file && -r $file);
+    }
+}
+
+sub DoWarn
+{
+    my $text = shift @_;
+    my $line = shift @_;
+    return if ($option_quiet);
+    chomp ($line);
+    print STDERR "$text\n";
+    print STDERR "$line\n\n";
+}
+
+sub Process
+{
+    my $ifn = shift @_;
+    my $ofn = $ifn.$suffix;
+
+    my $intext;
+    my $outtext;
+    my $linenum = 0;
+
+    open my $input, "<", $ifn || die "Cannot open $ifn for read: $!";
+
+    while (<$input>)
+    {
+	my $line = $_;
+	$intext .= $line;
+	$linenum++;
+
+	# fix the specific uses
+	unless ($option_rtc)
+	{
+	    $line =~ s/\bqemu_new_timer(_[num]s)\s*\((vm_|rt_|host_)clock\b/timer_new$1(XXX_$2clock/g;
+	    $line =~ s/\bqemu_new_timer\s*\((vm_|rt_|host_)clock\b/timer_new(XXX_$1clock/g;
+	    $line =~ s/\bqemu_get_clock(_[num]s)\s*\((vm_|rt_|host_)clock\b/qemu_clock_get$1(XXX_$2clock/g;
+	}
+
+	# rtc is different
+	$line =~ s/\bqemu_new_timer(_[num]s)\s*\(rtc_clock\b/timer_new$1(rtc_clock/g;
+	$line =~ s/\bqemu_new_timer\s*\(rtc_clock\b/timer_new(rtc_clock/g;
+	$line =~ s/\bqemu_get_clock(_[num]s)\s*\(rtc_clock\b/qemu_clock_get$1(rtc_clock/g;
+	$line =~ s/\bqemu_register_clock_reset_notifier\s*\(rtc_clock\b/qemu_register_clock_reset_notifier(qemu_clock_ptr(rtc_clock)/g;
+
+	unless ($option_rtc)
+	{
+	    # fix up comments
+	    $line =~ s/\b(vm_|rt_|host_)clock\b/XXX_$1clock/g if ($line =~ m,^[/ ]+\*,);
+
+	    # spurious fprintf error reporting
+	    $line =~ s/: qemu_new_timer_ns failed/: timer_new_ns failed/g;
+
+	    # these have just changed name
+	    $line =~ s/\bqemu_mod_timer\b/timer_mod/g;
+	    $line =~ s/\bqemu_mod_timer_(ns|us|ms)\b/timer_mod_$1/g;
+	    $line =~ s/\bqemu_free_timer\b/timer_free/g;
+	    $line =~ s/\bqemu_del_timer\b/timer_del/g;
+	}
+
+	# fix up rtc_clock
+	$line =~ s/QEMUClock \*rtc_clock;/QEMUClockType rtc_clock;/g;
+	$line =~ s/\brtc_clock = (vm_|rt_|host_)clock\b/rtc_clock = XXX_$1clock/g;
+
+	unless ($option_rtc)
+	{
+	    # replace any more general uses
+	    $line =~ s/\b(vm_|rt_|host_)clock\b/qemu_clock_ptr(XXX_$1clock)/g;
+	}
+
+	# fix up the place holders
+	$line =~ s/\bXXX_vm_clock\b/QEMU_CLOCK_VIRTUAL/g;
+	$line =~ s/\bXXX_rt_clock\b/QEMU_CLOCK_REALTIME/g;
+	$line =~ s/\bXXX_host_clock\b/QEMU_CLOCK_HOST/g;
+
+	unless ($option_rtc)
+	{
+	    DoWarn("$ifn:$linenum WARNING: timer $1 not fixed up", $line) if ($line =~ /\b((vm_|rt_|host_)clock)\b/);
+	    DoWarn("$ifn:$linenum WARNING: function $1 not fixed up", $line) if ($line =~ /\b(qemu_new_timer\w+)\b/);
+	    DoWarn("$ifn:$linenum WARNING: legacy function $1 remains", $line) if ($line =~ /$legacyre/o);
+	}
+
+	$outtext .= $line;
+    }
+
+    close $input;
+
+    if ($intext ne $outtext)
+    {
+	print STDERR "Patching $ifn\n" unless ($option_quiet);
+	unless ($option_dryrun)
+	{
+	    open my $output, ">", $ofn || die "Cannot open $ofn for write: $!";
+	    print $output $outtext;
+	    close $output;
+	    rename ($ofn, $ifn) || die "Cannot rename temp file to $ifn: $!";
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+sub DoCommit
+{
+    my $file = shift @_;
+    open (my $git, "| git commit -F - $file") || die "Cannot run git commit on $file: $!";
+    print $git "timers api: use new timer api in $file\n\nConvert $file to use new timer API.\nThis is an automated commit made by scripts/switch-timer-api\n";
+    close ($git);
+}
+
+ParseOptions;
+
+foreach my $file (@files)
+{
+    my $changed = Process ($file);
+    DoCommit($file) if ($changed && $option_git);
+}
diff --git a/scripts/tap-driver.pl b/scripts/tap-driver.pl
new file mode 100755
index 000000000..b1d3880c5
--- /dev/null
+++ b/scripts/tap-driver.pl
@@ -0,0 +1,379 @@
+#! /usr/bin/env perl
+# Copyright (C) 2011-2013 Free Software Foundation, Inc.
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# ---------------------------------- #
+#  Imports, static data, and setup.  #
+# ---------------------------------- #
+
+use warnings FATAL => 'all';
+use strict;
+use Getopt::Long ();
+use TAP::Parser;
+use Term::ANSIColor qw(:constants);
+
+my $ME = "tap-driver.pl";
+my $VERSION = "2018-11-30";
+
+my $USAGE = <<'END';
+Usage:
+  tap-driver [--test-name=TEST] [--color={always|never|auto}]
+             [--verbose] [--show-failures-only]
+END
+
+my $HELP = "$ME: TAP-aware test driver for QEMU testsuite harness." .
+           "\n" . $USAGE;
+
+# It's important that NO_PLAN evaluates "false" as a boolean.
+use constant NO_PLAN => 0;
+use constant EARLY_PLAN => 1;
+use constant LATE_PLAN => 2;
+
+use constant DIAG_STRING => "#";
+
+# ------------------- #
+#  Global variables.  #
+# ------------------- #
+
+my $testno = 0;     # Number of test results seen so far.
+my $bailed_out = 0; # Whether a "Bail out!" directive has been seen.
+my $failed = 0;     # Final exit code
+
+# Whether the TAP plan has been seen or not, and if yes, which kind
+# it is ("early" is seen before any test result, "late" otherwise).
+my $plan_seen = NO_PLAN;
+
+# ----------------- #
+#  Option parsing.  #
+# ----------------- #
+
+my %cfg = (
+  "color" => 0,
+  "verbose" => 0,
+  "show-failures-only" => 0,
+);
+
+my $color = "auto";
+my $test_name = undef;
+
+# Perl's Getopt::Long allows options to take optional arguments after a space.
+# Prevent --color by itself from consuming other arguments
+foreach (@ARGV) {
+  if ($_ eq "--color" || $_ eq "-color") {
+    $_ = "--color=$color";
+  }
+}
+
+Getopt::Long::GetOptions
+  (
+    'help' => sub { print $HELP; exit 0; },
+    'version' => sub { print "$ME $VERSION\n"; exit 0; },
+    'test-name=s' => \$test_name,
+    'color=s'  => \$color,
+    'show-failures-only' => sub { $cfg{"show-failures-only"} = 1; },
+    'verbose' => sub { $cfg{"verbose"} = 1; },
+  ) or exit 1;
+
+if ($color =~ /^always$/i) {
+  $cfg{'color'} = 1;
+} elsif ($color =~ /^never$/i) {
+  $cfg{'color'} = 0;
+} elsif ($color =~ /^auto$/i) {
+  $cfg{'color'} = (-t STDOUT);
+} else {
+  die "Invalid color mode: $color\n";
+}
+
+# ------------- #
+#  Prototypes.  #
+# ------------- #
+
+sub colored ($$);
+sub decorate_result ($);
+sub extract_tap_comment ($);
+sub handle_tap_bailout ($);
+sub handle_tap_plan ($);
+sub handle_tap_result ($);
+sub is_null_string ($);
+sub main ();
+sub report ($;$);
+sub stringify_result_obj ($);
+sub testsuite_error ($);
+
+# -------------- #
+#  Subroutines.  #
+# -------------- #
+
+# If the given string is undefined or empty, return true, otherwise
+# return false.  This function is useful to avoid pitfalls like:
+#   if ($message) { print "$message\n"; }
+# which wouldn't print anything if $message is the literal "0".
+sub is_null_string ($)
+{
+  my $str = shift;
+  return ! (defined $str and length $str);
+}
+
+sub stringify_result_obj ($)
+{
+  my $result_obj = shift;
+  if ($result_obj->is_unplanned || $result_obj->number != $testno)
+    {
+      return "ERROR";
+    }
+  elsif ($plan_seen == LATE_PLAN)
+    {
+      return "ERROR";
+    }
+  elsif (!$result_obj->directive)
+    {
+      return $result_obj->is_ok ? "PASS" : "FAIL";
+    }
+  elsif ($result_obj->has_todo)
+    {
+      return $result_obj->is_actual_ok ? "XPASS" : "XFAIL";
+    }
+  elsif ($result_obj->has_skip)
+    {
+      return $result_obj->is_ok ? "SKIP" : "FAIL";
+    }
+  die "$ME: INTERNAL ERROR"; # NOTREACHED
+}
+
+sub colored ($$)
+{
+  my ($color_string, $text) = @_;
+  return $color_string . $text . RESET;
+}
+
+sub decorate_result ($)
+{
+  my $result = shift;
+  return $result unless $cfg{"color"};
+  my %color_for_result =
+    (
+      "ERROR" => BOLD.MAGENTA,
+      "PASS"  => GREEN,
+      "XPASS" => BOLD.YELLOW,
+      "FAIL"  => BOLD.RED,
+      "XFAIL" => YELLOW,
+      "SKIP"  => BLUE,
+    );
+  if (my $color = $color_for_result{$result})
+    {
+      return colored ($color, $result);
+    }
+  else
+    {
+      return $result; # Don't colorize unknown stuff.
+    }
+}
+
+sub report ($;$)
+{
+  my ($msg, $result, $explanation) = (undef, @_);
+  if ($result =~ /^(?:X?(?:PASS|FAIL)|SKIP|ERROR)/)
+    {
+      # Output on console might be colorized.
+      $msg = decorate_result($result);
+      if ($result =~ /^(?:PASS|XFAIL|SKIP)/)
+        {
+          return if $cfg{"show-failures-only"};
+        }
+      else
+        {
+          $failed = 1;
+        }
+    }
+  elsif ($result eq "#")
+    {
+      $msg = "  ";
+    }
+  else
+    {
+      die "$ME: INTERNAL ERROR"; # NOTREACHED
+    }
+  $msg .= " $explanation" if defined $explanation;
+  print $msg . "\n";
+}
+
+sub testsuite_error ($)
+{
+  report "ERROR", "$test_name - $_[0]";
+}
+
+sub handle_tap_result ($)
+{
+  $testno++;
+  my $result_obj = shift;
+
+  my $test_result = stringify_result_obj $result_obj;
+  my $string = $result_obj->number;
+
+  my $description = $result_obj->description;
+  $string .= " $test_name" unless is_null_string $test_name;
+  $string .= " $description" unless is_null_string $description;
+
+  if ($plan_seen == LATE_PLAN)
+    {
+      $string .= " # AFTER LATE PLAN";
+    }
+  elsif ($result_obj->is_unplanned)
+    {
+      $string .= " # UNPLANNED";
+    }
+  elsif ($result_obj->number != $testno)
+    {
+      $string .= " # OUT-OF-ORDER (expecting $testno)";
+    }
+  elsif (my $directive = $result_obj->directive)
+    {
+      $string .= " # $directive";
+      my $explanation = $result_obj->explanation;
+      $string .= " $explanation"
+        unless is_null_string $explanation;
+    }
+
+  report $test_result, $string;
+}
+
+sub handle_tap_plan ($)
+{
+  my $plan = shift;
+  if ($plan_seen)
+    {
+      # Error, only one plan per stream is acceptable.
+      testsuite_error "multiple test plans";
+      return;
+    }
+  # The TAP plan can come before or after *all* the TAP results; we speak
+  # respectively of an "early" or a "late" plan.  If we see the plan line
+  # after at least one TAP result has been seen, assume we have a late
+  # plan; in this case, any further test result seen after the plan will
+  # be flagged as an error.
+  $plan_seen = ($testno >= 1 ? LATE_PLAN : EARLY_PLAN);
+  # If $testno > 0, we have an error ("too many tests run") that will be
+  # automatically dealt with later, so don't worry about it here.  If
+  # $plan_seen is true, we have an error due to a repeated plan, and that
+  # has already been dealt with above.  Otherwise, we have a valid "plan
+  # with SKIP" specification, and should report it as a particular kind
+  # of SKIP result.
+  if ($plan->directive && $testno == 0)
+    {
+      my $explanation = is_null_string ($plan->explanation) ?
+                        undef : "- " . $plan->explanation;
+      report "SKIP", $explanation;
+    }
+}
+
+sub handle_tap_bailout ($)
+{
+  my ($bailout, $msg) = ($_[0], "Bail out!");
+  $bailed_out = 1;
+  $msg .= " " . $bailout->explanation
+    unless is_null_string $bailout->explanation;
+  testsuite_error $msg;
+}
+
+sub extract_tap_comment ($)
+{
+  my $line = shift;
+  if (index ($line, DIAG_STRING) == 0)
+    {
+      # Strip leading `DIAG_STRING' from `$line'.
+      $line = substr ($line, length (DIAG_STRING));
+      # And strip any leading and trailing whitespace left.
+      $line =~ s/(?:^\s*|\s*$)//g;
+      # Return what is left (if any).
+      return $line;
+    }
+  return "";
+}
+
+sub main ()
+{
+  my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN);
+  my $parser = TAP::Parser->new ({iterator => $iterator });
+
+  STDOUT->autoflush(1);
+  while (defined (my $cur = $parser->next))
+    {
+      # Parsing of TAP input should stop after a "Bail out!" directive.
+      next if $bailed_out;
+
+      if ($cur->is_plan)
+        {
+          handle_tap_plan ($cur);
+        }
+      elsif ($cur->is_test)
+        {
+          handle_tap_result ($cur);
+        }
+      elsif ($cur->is_bailout)
+        {
+          handle_tap_bailout ($cur);
+        }
+      elsif ($cfg{"verbose"})
+        {
+          my $comment = extract_tap_comment ($cur->raw);
+          report "#", "$comment" if length $comment;
+       }
+    }
+  # A "Bail out!" directive should cause us to ignore any following TAP
+  # error.
+  if (!$bailed_out)
+    {
+      if (!$plan_seen)
+        {
+          testsuite_error "missing test plan";
+        }
+      elsif ($parser->tests_planned != $parser->tests_run)
+        {
+          my ($planned, $run) = ($parser->tests_planned, $parser->tests_run);
+          my $bad_amount = $run > $planned ? "many" : "few";
+          testsuite_error (sprintf "too %s tests run (expected %d, got %d)",
+                                   $bad_amount, $planned, $run);
+        }
+    }
+}
+
+# ----------- #
+#  Main code. #
+# ----------- #
+
+main;
+exit($failed);
+
+# Local Variables:
+# perl-indent-level: 2
+# perl-continued-statement-offset: 2
+# perl-continued-brace-offset: 0
+# perl-brace-offset: 0
+# perl-brace-imaginary-offset: 0
+# perl-label-offset: -2
+# cperl-indent-level: 2
+# cperl-brace-offset: 0
+# cperl-continued-brace-offset: 0
+# cperl-label-offset: -2
+# cperl-extra-newline-before-brace: t
+# cperl-merge-trailing-else: nil
+# cperl-continued-statement-offset: 2
+# End:
diff --git a/scripts/tap-merge.pl b/scripts/tap-merge.pl
new file mode 100755
index 000000000..10ccf57bb
--- /dev/null
+++ b/scripts/tap-merge.pl
@@ -0,0 +1,111 @@
+#! /usr/bin/env perl
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# Author: Paolo Bonzini 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+
+# ---------------------------------- #
+#  Imports, static data, and setup.  #
+# ---------------------------------- #
+
+use warnings FATAL => 'all';
+use strict;
+use Getopt::Long ();
+use TAP::Parser;
+
+my $ME = "tap-merge.pl";
+my $VERSION = "2018-11-30";
+
+my $HELP = "$ME: merge multiple TAP inputs from stdin.";
+
+use constant DIAG_STRING => "#";
+
+# ----------------- #
+#  Option parsing.  #
+# ----------------- #
+
+Getopt::Long::GetOptions
+  (
+    'help' => sub { print $HELP; exit 0; },
+    'version' => sub { print "$ME $VERSION\n"; exit 0; },
+  );
+
+# -------------- #
+#  Subroutines.  #
+# -------------- #
+
+sub main ()
+{
+  my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN);
+  my $parser = TAP::Parser->new ({iterator => $iterator });
+  my $testno = 0;     # Number of test results seen so far.
+  my $bailed_out = 0; # Whether a "Bail out!" directive has been seen.
+
+  STDOUT->autoflush(1);
+  while (defined (my $cur = $parser->next))
+    {
+      if ($cur->is_bailout)
+        {
+          $bailed_out = 1;
+          print DIAG_STRING . " " . $cur->as_string . "\n";
+          next;
+        }
+      elsif ($cur->is_plan)
+        {
+          $bailed_out = 0;
+          next;
+        }
+      elsif ($cur->is_test)
+        {
+          $bailed_out = 0 if $cur->number == 1;
+          $testno++;
+          $cur = TAP::Parser::Result::Test->new({
+                          ok => $cur->ok,
+                          test_num => $testno,
+                          directive => $cur->directive,
+                          explanation => $cur->explanation,
+                          description => $cur->description
+                  });
+        }
+      elsif ($cur->is_version)
+        {
+          next if $testno > 0;
+        }
+      print $cur->as_string . "\n" unless $bailed_out;
+    }
+  print "1..$testno\n";
+}
+
+# ----------- #
+#  Main code. #
+# ----------- #
+
+main;
+
+# Local Variables:
+# perl-indent-level: 2
+# perl-continued-statement-offset: 2
+# perl-continued-brace-offset: 0
+# perl-brace-offset: 0
+# perl-brace-imaginary-offset: 0
+# perl-label-offset: -2
+# cperl-indent-level: 2
+# cperl-brace-offset: 0
+# cperl-continued-brace-offset: 0
+# cperl-label-offset: -2
+# cperl-extra-newline-before-brace: t
+# cperl-merge-trailing-else: nil
+# cperl-continued-statement-offset: 2
+# End:
diff --git a/scripts/test-driver.py b/scripts/test-driver.py
new file mode 100644
index 000000000..eef74b29a
--- /dev/null
+++ b/scripts/test-driver.py
@@ -0,0 +1,35 @@
+#! /usr/bin/env python3
+
+# Wrapper for tests that hides the output if they succeed.
+# Used by "make check"
+#
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Author: Paolo Bonzini 
+
+import subprocess
+import sys
+import os
+import argparse
+
+parser = argparse.ArgumentParser(description='Test driver for QEMU')
+parser.add_argument('-C', metavar='DIR', dest='dir', default='.',
+                    help='change to DIR before doing anything else')
+parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
+                    help='be more verbose')
+parser.add_argument('test_args', nargs=argparse.REMAINDER)
+
+args = parser.parse_args()
+os.chdir(args.dir)
+
+test_args = args.test_args
+if test_args[0] == '--':
+    test_args = test_args[1:]
+
+if args.verbose:
+    result = subprocess.run(test_args, stdout=None, stderr=None)
+else:
+    result = subprocess.run(test_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    if result.returncode:
+        sys.stdout.buffer.write(result.stdout)
+sys.exit(result.returncode)
diff --git a/scripts/tracetool.py b/scripts/tracetool.py
new file mode 100755
index 000000000..31146242b
--- /dev/null
+++ b/scripts/tracetool.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Command-line wrapper for the tracetool machinery.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+import sys
+import getopt
+
+from tracetool import error_write, out
+import tracetool.backend
+import tracetool.format
+
+
+_SCRIPT = ""
+
+def error_opt(msg = None):
+    if msg is not None:
+        error_write("Error: " + msg + "\n")
+
+    backend_descr = "\n".join([ "    %-15s %s" % (n, d)
+                                for n,d in tracetool.backend.get_list() ])
+    format_descr = "\n".join([ "    %-15s %s" % (n, d)
+                               for n,d in tracetool.format.get_list() ])
+    error_write("""\
+Usage: %(script)s --format= --backends= []
+
+Backends:
+%(backends)s
+
+Formats:
+%(formats)s
+
+Options:
+    --help                   This help message.
+    --list-backends          Print list of available backends.
+    --check-backends         Check if the given backend is valid.
+    --binary           Full path to QEMU binary.
+    --target-type      QEMU emulator target type ('system' or 'user').
+    --target-name      QEMU emulator target name.
+    --group            Name of the event group
+    --probe-prefix   Prefix for dtrace probe names
+                             (default: qemu--).\
+""" % {
+            "script" : _SCRIPT,
+            "backends" : backend_descr,
+            "formats" : format_descr,
+            })
+
+    if msg is None:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+def main(args):
+    global _SCRIPT
+    _SCRIPT = args[0]
+
+    long_opts = ["backends=", "format=", "help", "list-backends",
+                 "check-backends", "group="]
+    long_opts += ["binary=", "target-type=", "target-name=", "probe-prefix="]
+
+    try:
+        opts, args = getopt.getopt(args[1:], "", long_opts)
+    except getopt.GetoptError as err:
+        error_opt(str(err))
+
+    check_backends = False
+    arg_backends = []
+    arg_format = ""
+    arg_group = None
+    binary = None
+    target_type = None
+    target_name = None
+    probe_prefix = None
+    for opt, arg in opts:
+        if opt == "--help":
+            error_opt()
+
+        elif opt == "--backends":
+            arg_backends = arg.split(",")
+        elif opt == "--group":
+            arg_group = arg
+        elif opt == "--format":
+            arg_format = arg
+
+        elif opt == "--list-backends":
+            public_backends = tracetool.backend.get_list(only_public = True)
+            out(", ".join([ b for b,_ in public_backends ]))
+            sys.exit(0)
+        elif opt == "--check-backends":
+            check_backends = True
+
+        elif opt == "--binary":
+            binary = arg
+        elif opt == '--target-type':
+            target_type = arg
+        elif opt == '--target-name':
+            target_name = arg
+        elif opt == '--probe-prefix':
+            probe_prefix = arg
+
+        else:
+            error_opt("unhandled option: %s" % opt)
+
+    if len(arg_backends) == 0:
+        error_opt("no backends specified")
+
+    if check_backends:
+        for backend in arg_backends:
+            if not tracetool.backend.exists(backend):
+                sys.exit(1)
+        sys.exit(0)
+
+    if arg_group is None:
+        error_opt("group name is required")
+
+    if arg_format == "stap":
+        if binary is None:
+            error_opt("--binary is required for SystemTAP tapset generator")
+        if probe_prefix is None and target_type is None:
+            error_opt("--target-type is required for SystemTAP tapset generator")
+        if probe_prefix is None and target_name is None:
+            error_opt("--target-name is required for SystemTAP tapset generator")
+
+        if probe_prefix is None:
+            probe_prefix = ".".join(["qemu", target_type, target_name])
+
+    if len(args) < 1:
+        error_opt("missing trace-events filepath")
+    events = []
+    for arg in args:
+        with open(arg, "r") as fh:
+            events.extend(tracetool.read_events(fh, arg))
+
+    try:
+        tracetool.generate(events, arg_group, arg_format, arg_backends,
+                           binary=binary, probe_prefix=probe_prefix)
+    except tracetool.TracetoolError as e:
+        error_opt(str(e))
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
new file mode 100644
index 000000000..3ee54be22
--- /dev/null
+++ b/scripts/tracetool/__init__.py
@@ -0,0 +1,474 @@
+# -*- coding: utf-8 -*-
+
+"""
+Machinery for generating tracing-related intermediate files.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+import re
+import sys
+import weakref
+
+import tracetool.format
+import tracetool.backend
+import tracetool.transform
+
+
+def error_write(*lines):
+    """Write a set of error lines."""
+    sys.stderr.writelines("\n".join(lines) + "\n")
+
+def error(*lines):
+    """Write a set of error lines and exit."""
+    error_write(*lines)
+    sys.exit(1)
+
+
+def out(*lines, **kwargs):
+    """Write a set of output lines.
+
+    You can use kwargs as a shorthand for mapping variables when formatting all
+    the strings in lines.
+    """
+    lines = [ l % kwargs for l in lines ]
+    sys.stdout.writelines("\n".join(lines) + "\n")
+
+# We only want to allow standard C types or fixed sized
+# integer types. We don't want QEMU specific types
+# as we can't assume trace backends can resolve all the
+# typedefs
+ALLOWED_TYPES = [
+    "int",
+    "long",
+    "short",
+    "char",
+    "bool",
+    "unsigned",
+    "signed",
+    "int8_t",
+    "uint8_t",
+    "int16_t",
+    "uint16_t",
+    "int32_t",
+    "uint32_t",
+    "int64_t",
+    "uint64_t",
+    "void",
+    "size_t",
+    "ssize_t",
+    "uintptr_t",
+    "ptrdiff_t",
+    # Magic substitution is done by tracetool
+    "TCGv",
+]
+
+def validate_type(name):
+    bits = name.split(" ")
+    for bit in bits:
+        bit = re.sub("\*", "", bit)
+        if bit == "":
+            continue
+        if bit == "const":
+            continue
+        if bit not in ALLOWED_TYPES:
+            raise ValueError("Argument type '%s' is not in whitelist. "
+                             "Only standard C types and fixed size integer "
+                             "types should be used. struct, union, and "
+                             "other complex pointer types should be "
+                             "declared as 'void *'" % name)
+
+class Arguments:
+    """Event arguments description."""
+
+    def __init__(self, args):
+        """
+        Parameters
+        ----------
+        args :
+            List of (type, name) tuples or Arguments objects.
+        """
+        self._args = []
+        for arg in args:
+            if isinstance(arg, Arguments):
+                self._args.extend(arg._args)
+            else:
+                self._args.append(arg)
+
+    def copy(self):
+        """Create a new copy."""
+        return Arguments(list(self._args))
+
+    @staticmethod
+    def build(arg_str):
+        """Build and Arguments instance from an argument string.
+
+        Parameters
+        ----------
+        arg_str : str
+            String describing the event arguments.
+        """
+        res = []
+        for arg in arg_str.split(","):
+            arg = arg.strip()
+            if not arg:
+                raise ValueError("Empty argument (did you forget to use 'void'?)")
+            if arg == 'void':
+                continue
+
+            if '*' in arg:
+                arg_type, identifier = arg.rsplit('*', 1)
+                arg_type += '*'
+                identifier = identifier.strip()
+            else:
+                arg_type, identifier = arg.rsplit(None, 1)
+
+            validate_type(arg_type)
+            res.append((arg_type, identifier))
+        return Arguments(res)
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return Arguments(self._args[index])
+        else:
+            return self._args[index]
+
+    def __iter__(self):
+        """Iterate over the (type, name) pairs."""
+        return iter(self._args)
+
+    def __len__(self):
+        """Number of arguments."""
+        return len(self._args)
+
+    def __str__(self):
+        """String suitable for declaring function arguments."""
+        if len(self._args) == 0:
+            return "void"
+        else:
+            return ", ".join([ " ".join([t, n]) for t,n in self._args ])
+
+    def __repr__(self):
+        """Evaluable string representation for this object."""
+        return "Arguments(\"%s\")" % str(self)
+
+    def names(self):
+        """List of argument names."""
+        return [ name for _, name in self._args ]
+
+    def types(self):
+        """List of argument types."""
+        return [ type_ for type_, _ in self._args ]
+
+    def casted(self):
+        """List of argument names casted to their type."""
+        return ["(%s)%s" % (type_, name) for type_, name in self._args]
+
+    def transform(self, *trans):
+        """Return a new Arguments instance with transformed types.
+
+        The types in the resulting Arguments instance are transformed according
+        to tracetool.transform.transform_type.
+        """
+        res = []
+        for type_, name in self._args:
+            res.append((tracetool.transform.transform_type(type_, *trans),
+                        name))
+        return Arguments(res)
+
+
+class Event(object):
+    """Event description.
+
+    Attributes
+    ----------
+    name : str
+        The event name.
+    fmt : str
+        The event format string.
+    properties : set(str)
+        Properties of the event.
+    args : Arguments
+        The event arguments.
+
+    """
+
+    _CRE = re.compile("((?P[\w\s]+)\s+)?"
+                      "(?P\w+)"
+                      "\((?P[^)]*)\)"
+                      "\s*"
+                      "(?:(?:(?P\".+),)?\s*(?P\".+))?"
+                      "\s*")
+
+    _VALID_PROPS = set(["disable", "tcg", "tcg-trans", "tcg-exec", "vcpu"])
+
+    def __init__(self, name, props, fmt, args, orig=None,
+                 event_trans=None, event_exec=None):
+        """
+        Parameters
+        ----------
+        name : string
+            Event name.
+        props : list of str
+            Property names.
+        fmt : str, list of str
+            Event printing format string(s).
+        args : Arguments
+            Event arguments.
+        orig : Event or None
+            Original Event before transformation/generation.
+        event_trans : Event or None
+            Generated translation-time event ("tcg" property).
+        event_exec : Event or None
+            Generated execution-time event ("tcg" property).
+
+        """
+        self.name = name
+        self.properties = props
+        self.fmt = fmt
+        self.args = args
+        self.event_trans = event_trans
+        self.event_exec = event_exec
+
+        if len(args) > 10:
+            raise ValueError("Event '%s' has more than maximum permitted "
+                             "argument count" % name)
+
+        if orig is None:
+            self.original = weakref.ref(self)
+        else:
+            self.original = orig
+
+        unknown_props = set(self.properties) - self._VALID_PROPS
+        if len(unknown_props) > 0:
+            raise ValueError("Unknown properties: %s"
+                             % ", ".join(unknown_props))
+        assert isinstance(self.fmt, str) or len(self.fmt) == 2
+
+    def copy(self):
+        """Create a new copy."""
+        return Event(self.name, list(self.properties), self.fmt,
+                     self.args.copy(), self, self.event_trans, self.event_exec)
+
+    @staticmethod
+    def build(line_str):
+        """Build an Event instance from a string.
+
+        Parameters
+        ----------
+        line_str : str
+            Line describing the event.
+        """
+        m = Event._CRE.match(line_str)
+        assert m is not None
+        groups = m.groupdict('')
+
+        name = groups["name"]
+        props = groups["props"].split()
+        fmt = groups["fmt"]
+        fmt_trans = groups["fmt_trans"]
+        if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1:
+            raise ValueError("Event format '%m' is forbidden, pass the error "
+                             "as an explicit trace argument")
+        if fmt.endswith(r'\n"'):
+            raise ValueError("Event format must not end with a newline "
+                             "character")
+
+        if len(fmt_trans) > 0:
+            fmt = [fmt_trans, fmt]
+        args = Arguments.build(groups["args"])
+
+        if "tcg-trans" in props:
+            raise ValueError("Invalid property 'tcg-trans'")
+        if "tcg-exec" in props:
+            raise ValueError("Invalid property 'tcg-exec'")
+        if "tcg" not in props and not isinstance(fmt, str):
+            raise ValueError("Only events with 'tcg' property can have two format strings")
+        if "tcg" in props and isinstance(fmt, str):
+            raise ValueError("Events with 'tcg' property must have two format strings")
+
+        event = Event(name, props, fmt, args)
+
+        # add implicit arguments when using the 'vcpu' property
+        import tracetool.vcpu
+        event = tracetool.vcpu.transform_event(event)
+
+        return event
+
+    def __repr__(self):
+        """Evaluable string representation for this object."""
+        if isinstance(self.fmt, str):
+            fmt = self.fmt
+        else:
+            fmt = "%s, %s" % (self.fmt[0], self.fmt[1])
+        return "Event('%s %s(%s) %s')" % (" ".join(self.properties),
+                                          self.name,
+                                          self.args,
+                                          fmt)
+    # Star matching on PRI is dangerous as one might have multiple
+    # arguments with that format, hence the non-greedy version of it.
+    _FMT = re.compile("(%[\d\.]*\w+|%.*?PRI\S+)")
+
+    def formats(self):
+        """List conversion specifiers in the argument print format string."""
+        assert not isinstance(self.fmt, list)
+        return self._FMT.findall(self.fmt)
+
+    QEMU_TRACE               = "trace_%(name)s"
+    QEMU_TRACE_NOCHECK       = "_nocheck__" + QEMU_TRACE
+    QEMU_TRACE_TCG           = QEMU_TRACE + "_tcg"
+    QEMU_DSTATE              = "_TRACE_%(NAME)s_DSTATE"
+    QEMU_BACKEND_DSTATE      = "TRACE_%(NAME)s_BACKEND_DSTATE"
+    QEMU_EVENT               = "_TRACE_%(NAME)s_EVENT"
+
+    def api(self, fmt=None):
+        if fmt is None:
+            fmt = Event.QEMU_TRACE
+        return fmt % {"name": self.name, "NAME": self.name.upper()}
+
+    def transform(self, *trans):
+        """Return a new Event with transformed Arguments."""
+        return Event(self.name,
+                     list(self.properties),
+                     self.fmt,
+                     self.args.transform(*trans),
+                     self)
+
+
+def read_events(fobj, fname):
+    """Generate the output for the given (format, backends) pair.
+
+    Parameters
+    ----------
+    fobj : file
+        Event description file.
+    fname : str
+        Name of event file
+
+    Returns a list of Event objects
+    """
+
+    events = []
+    for lineno, line in enumerate(fobj, 1):
+        if line[-1] != '\n':
+            raise ValueError("%s does not end with a new line" % fname)
+        if not line.strip():
+            continue
+        if line.lstrip().startswith('#'):
+            continue
+
+        try:
+            event = Event.build(line)
+        except ValueError as e:
+            arg0 = 'Error at %s:%d: %s' % (fname, lineno, e.args[0])
+            e.args = (arg0,) + e.args[1:]
+            raise
+
+        # transform TCG-enabled events
+        if "tcg" not in event.properties:
+            events.append(event)
+        else:
+            event_trans = event.copy()
+            event_trans.name += "_trans"
+            event_trans.properties += ["tcg-trans"]
+            event_trans.fmt = event.fmt[0]
+            # ignore TCG arguments
+            args_trans = []
+            for atrans, aorig in zip(
+                    event_trans.transform(tracetool.transform.TCG_2_HOST).args,
+                    event.args):
+                if atrans == aorig:
+                    args_trans.append(atrans)
+            event_trans.args = Arguments(args_trans)
+
+            event_exec = event.copy()
+            event_exec.name += "_exec"
+            event_exec.properties += ["tcg-exec"]
+            event_exec.fmt = event.fmt[1]
+            event_exec.args = event_exec.args.transform(tracetool.transform.TCG_2_HOST)
+
+            new_event = [event_trans, event_exec]
+            event.event_trans, event.event_exec = new_event
+
+            events.extend(new_event)
+
+    return events
+
+
+class TracetoolError (Exception):
+    """Exception for calls to generate."""
+    pass
+
+
+def try_import(mod_name, attr_name=None, attr_default=None):
+    """Try to import a module and get an attribute from it.
+
+    Parameters
+    ----------
+    mod_name : str
+        Module name.
+    attr_name : str, optional
+        Name of an attribute in the module.
+    attr_default : optional
+        Default value if the attribute does not exist in the module.
+
+    Returns
+    -------
+    A pair indicating whether the module could be imported and the module or
+    object or attribute value.
+    """
+    try:
+        module = __import__(mod_name, globals(), locals(), ["__package__"])
+        if attr_name is None:
+            return True, module
+        return True, getattr(module, str(attr_name), attr_default)
+    except ImportError:
+        return False, None
+
+
+def generate(events, group, format, backends,
+             binary=None, probe_prefix=None):
+    """Generate the output for the given (format, backends) pair.
+
+    Parameters
+    ----------
+    events : list
+        list of Event objects to generate for
+    group: str
+        Name of the tracing group
+    format : str
+        Output format name.
+    backends : list
+        Output backend names.
+    binary : str or None
+        See tracetool.backend.dtrace.BINARY.
+    probe_prefix : str or None
+        See tracetool.backend.dtrace.PROBEPREFIX.
+    """
+    # fix strange python error (UnboundLocalError tracetool)
+    import tracetool
+
+    format = str(format)
+    if len(format) == 0:
+        raise TracetoolError("format not set")
+    if not tracetool.format.exists(format):
+        raise TracetoolError("unknown format: %s" % format)
+
+    if len(backends) == 0:
+        raise TracetoolError("no backends specified")
+    for backend in backends:
+        if not tracetool.backend.exists(backend):
+            raise TracetoolError("unknown backend: %s" % backend)
+    backend = tracetool.backend.Wrapper(backends, format)
+
+    import tracetool.backend.dtrace
+    tracetool.backend.dtrace.BINARY = binary
+    tracetool.backend.dtrace.PROBEPREFIX = probe_prefix
+
+    tracetool.format.generate(events, format, backend, group)
diff --git a/scripts/tracetool/backend/__init__.py b/scripts/tracetool/backend/__init__.py
new file mode 100644
index 000000000..7bfcc86cc
--- /dev/null
+++ b/scripts/tracetool/backend/__init__.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+
+"""
+Backend management.
+
+
+Creating new backends
+---------------------
+
+A new backend named 'foo-bar' corresponds to Python module
+'tracetool/backend/foo_bar.py'.
+
+A backend module should provide a docstring, whose first non-empty line will be
+considered its short description.
+
+All backends must generate their contents through the 'tracetool.out' routine.
+
+
+Backend attributes
+------------------
+
+========= ====================================================================
+Attribute Description
+========= ====================================================================
+PUBLIC    If exists and is set to 'True', the backend is considered "public".
+========= ====================================================================
+
+
+Backend functions
+-----------------
+
+All the following functions are optional, and no output will be generated if
+they do not exist.
+
+=============================== ==============================================
+Function                        Description
+=============================== ==============================================
+generate__begin(events) Generate backend- and format-specific file
+                                header contents.
+generate__end(events)   Generate backend- and format-specific file
+                                footer contents.
+generate_(event)        Generate backend- and format-specific contents
+                                for the given event.
+=============================== ==============================================
+
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+import os
+
+import tracetool
+
+
+def get_list(only_public = False):
+    """Get a list of (name, description) pairs."""
+    res = [("nop", "Tracing disabled.")]
+    modnames = []
+    for filename in os.listdir(tracetool.backend.__path__[0]):
+        if filename.endswith('.py') and filename != '__init__.py':
+            modnames.append(filename.rsplit('.', 1)[0])
+    for modname in sorted(modnames):
+        module = tracetool.try_import("tracetool.backend." + modname)
+
+        # just in case; should never fail unless non-module files are put there
+        if not module[0]:
+            continue
+        module = module[1]
+
+        public = getattr(module, "PUBLIC", False)
+        if only_public and not public:
+            continue
+
+        doc = module.__doc__
+        if doc is None:
+            doc = ""
+        doc = doc.strip().split("\n")[0]
+
+        name = modname.replace("_", "-")
+        res.append((name, doc))
+    return res
+
+
+def exists(name):
+    """Return whether the given backend exists."""
+    if len(name) == 0:
+        return False
+    if name == "nop":
+        return True
+    name = name.replace("-", "_")
+    return tracetool.try_import("tracetool.backend." + name)[1]
+
+
+class Wrapper:
+    def __init__(self, backends, format):
+        self._backends = [backend.replace("-", "_") for backend in backends]
+        self._format = format.replace("-", "_")
+        for backend in self._backends:
+            assert exists(backend)
+        assert tracetool.format.exists(self._format)
+
+    def _run_function(self, name, *args, **kwargs):
+        for backend in self._backends:
+            func = tracetool.try_import("tracetool.backend." + backend,
+                                        name % self._format, None)[1]
+            if func is not None:
+                func(*args, **kwargs)
+
+    def generate_begin(self, events, group):
+        self._run_function("generate_%s_begin", events, group)
+
+    def generate(self, event, group):
+        self._run_function("generate_%s", event, group)
+
+    def generate_backend_dstate(self, event, group):
+        self._run_function("generate_%s_backend_dstate", event, group)
+
+    def generate_end(self, events, group):
+        self._run_function("generate_%s_end", events, group)
diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py
new file mode 100644
index 000000000..e17edc9b9
--- /dev/null
+++ b/scripts/tracetool/backend/dtrace.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+"""
+DTrace/SystemTAP backend.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+PROBEPREFIX = None
+
+def probeprefix():
+    if PROBEPREFIX is None:
+        raise ValueError("you must set PROBEPREFIX")
+    return PROBEPREFIX
+
+
+BINARY = None
+
+def binary():
+    if BINARY is None:
+        raise ValueError("you must set BINARY")
+    return BINARY
+
+
+def generate_h_begin(events, group):
+    if group == "root":
+        header = "trace-dtrace-root.h"
+    else:
+        header = "trace-dtrace-%s.h" % group
+
+    # Workaround for ust backend, which also includes  and may
+    # require SDT_USE_VARIADIC to be defined. If dtrace includes 
+    # first without defining SDT_USE_VARIADIC then ust breaks because the
+    # STAP_PROBEV() macro is not defined.
+    out('#ifndef SDT_USE_VARIADIC')
+    out('#define SDT_USE_VARIADIC 1')
+    out('#endif')
+
+    out('#include "%s"' % header,
+        '')
+
+    out('#undef SDT_USE_VARIADIC')
+
+    # SystemTap defines __ENABLED() but other DTrace
+    # implementations might not.
+    for e in events:
+        out('#ifndef QEMU_%(uppername)s_ENABLED',
+            '#define QEMU_%(uppername)s_ENABLED() true',
+            '#endif',
+            uppername=e.name.upper())
+
+def generate_h(event, group):
+    out('    QEMU_%(uppername)s(%(argnames)s);',
+        uppername=event.name.upper(),
+        argnames=", ".join(event.args.names()))
+
+
+def generate_h_backend_dstate(event, group):
+    out('    QEMU_%(uppername)s_ENABLED() || \\',
+        uppername=event.name.upper())
diff --git a/scripts/tracetool/backend/ftrace.py b/scripts/tracetool/backend/ftrace.py
new file mode 100644
index 000000000..e9844dd33
--- /dev/null
+++ b/scripts/tracetool/backend/ftrace.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+"""
+Ftrace built-in backend.
+"""
+
+__author__     = "Eiichi Tsukata "
+__copyright__  = "Copyright (C) 2013 Hitachi, Ltd."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def generate_h_begin(events, group):
+    out('#include "trace/ftrace.h"',
+        '')
+
+
+def generate_h(event, group):
+    argnames = ", ".join(event.args.names())
+    if len(event.args) > 0:
+        argnames = ", " + argnames
+
+    out('    {',
+        '        char ftrace_buf[MAX_TRACE_STRLEN];',
+        '        int unused __attribute__ ((unused));',
+        '        int trlen;',
+        '        if (trace_event_get_state(%(event_id)s)) {',
+        '            trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
+        '                             "%(name)s " %(fmt)s "\\n" %(argnames)s);',
+        '            trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
+        '            unused = write(trace_marker_fd, ftrace_buf, trlen);',
+        '        }',
+        '    }',
+        name=event.name,
+        args=event.args,
+        event_id="TRACE_" + event.name.upper(),
+        fmt=event.fmt.rstrip("\n"),
+        argnames=argnames)
+
+
+def generate_h_backend_dstate(event, group):
+    out('    trace_event_get_state_dynamic_by_id(%(event_id)s) || \\',
+        event_id="TRACE_" + event.name.upper())
diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py
new file mode 100644
index 000000000..877222bbe
--- /dev/null
+++ b/scripts/tracetool/backend/log.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+
+"""
+Stderr built-in backend.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def generate_h_begin(events, group):
+    out('#include "qemu/log-for-trace.h"',
+        '')
+
+
+def generate_h(event, group):
+    argnames = ", ".join(event.args.names())
+    if len(event.args) > 0:
+        argnames = ", " + argnames
+
+    if "vcpu" in event.properties:
+        # already checked on the generic format code
+        cond = "true"
+    else:
+        cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
+
+    out('    if (%(cond)s && qemu_loglevel_mask(LOG_TRACE)) {',
+        '        struct timeval _now;',
+        '        gettimeofday(&_now, NULL);',
+        '        qemu_log("%%d@%%zu.%%06zu:%(name)s " %(fmt)s "\\n",',
+        '                 qemu_get_thread_id(),',
+        '                 (size_t)_now.tv_sec, (size_t)_now.tv_usec',
+        '                 %(argnames)s);',
+        '    }',
+        cond=cond,
+        name=event.name,
+        fmt=event.fmt.rstrip("\n"),
+        argnames=argnames)
+
+
+def generate_h_backend_dstate(event, group):
+    out('    trace_event_get_state_dynamic_by_id(%(event_id)s) || \\',
+        event_id="TRACE_" + event.name.upper())
diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py
new file mode 100644
index 000000000..a74d61fcd
--- /dev/null
+++ b/scripts/tracetool/backend/simple.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+
+"""
+Simple built-in backend.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def is_string(arg):
+    strtype = ('const char*', 'char*', 'const char *', 'char *')
+    arg_strip = arg.lstrip()
+    if arg_strip.startswith(strtype) and arg_strip.count('*') == 1:
+        return True
+    else:
+        return False
+
+
+def generate_h_begin(events, group):
+    for event in events:
+        out('void _simple_%(api)s(%(args)s);',
+            api=event.api(),
+            args=event.args)
+    out('')
+
+
+def generate_h(event, group):
+    out('    _simple_%(api)s(%(args)s);',
+        api=event.api(),
+        args=", ".join(event.args.names()))
+
+
+def generate_h_backend_dstate(event, group):
+    out('    trace_event_get_state_dynamic_by_id(%(event_id)s) || \\',
+        event_id="TRACE_" + event.name.upper())
+
+
+def generate_c_begin(events, group):
+    out('#include "qemu/osdep.h"',
+        '#include "trace/control.h"',
+        '#include "trace/simple.h"',
+        '')
+
+
+def generate_c(event, group):
+    out('void _simple_%(api)s(%(args)s)',
+        '{',
+        '    TraceBufferRecord rec;',
+        api=event.api(),
+        args=event.args)
+    sizes = []
+    for type_, name in event.args:
+        if is_string(type_):
+            out('    size_t arg%(name)s_len = %(name)s ? MIN(strlen(%(name)s), MAX_TRACE_STRLEN) : 0;',
+                name=name)
+            strsizeinfo = "4 + arg%s_len" % name
+            sizes.append(strsizeinfo)
+        else:
+            sizes.append("8")
+    sizestr = " + ".join(sizes)
+    if len(event.args) == 0:
+        sizestr = '0'
+
+    event_id = 'TRACE_' + event.name.upper()
+    if "vcpu" in event.properties:
+        # already checked on the generic format code
+        cond = "true"
+    else:
+        cond = "trace_event_get_state(%s)" % event_id
+
+    out('',
+        '    if (!%(cond)s) {',
+        '        return;',
+        '    }',
+        '',
+        '    if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {',
+        '        return; /* Trace Buffer Full, Event Dropped ! */',
+        '    }',
+        cond=cond,
+        event_obj=event.api(event.QEMU_EVENT),
+        size_str=sizestr)
+
+    if len(event.args) > 0:
+        for type_, name in event.args:
+            # string
+            if is_string(type_):
+                out('    trace_record_write_str(&rec, %(name)s, arg%(name)s_len);',
+                    name=name)
+            # pointer var (not string)
+            elif type_.endswith('*'):
+                out('    trace_record_write_u64(&rec, (uintptr_t)(uint64_t *)%(name)s);',
+                    name=name)
+            # primitive data type
+            else:
+                out('    trace_record_write_u64(&rec, (uint64_t)%(name)s);',
+                   name=name)
+
+    out('    trace_record_finish(&rec);',
+        '}',
+        '')
diff --git a/scripts/tracetool/backend/syslog.py b/scripts/tracetool/backend/syslog.py
new file mode 100644
index 000000000..1373a9019
--- /dev/null
+++ b/scripts/tracetool/backend/syslog.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+"""
+Syslog built-in backend.
+"""
+
+__author__     = "Paul Durrant "
+__copyright__  = "Copyright 2016, Citrix Systems Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def generate_h_begin(events, group):
+    out('#include ',
+        '')
+
+
+def generate_h(event, group):
+    argnames = ", ".join(event.args.names())
+    if len(event.args) > 0:
+        argnames = ", " + argnames
+
+    if "vcpu" in event.properties:
+        # already checked on the generic format code
+        cond = "true"
+    else:
+        cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
+
+    out('    if (%(cond)s) {',
+        '        syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);',
+        '    }',
+        cond=cond,
+        name=event.name,
+        fmt=event.fmt.rstrip("\n"),
+        argnames=argnames)
+
+
+def generate_h_backend_dstate(event, group):
+    out('    trace_event_get_state_dynamic_by_id(%(event_id)s) || \\',
+        event_id="TRACE_" + event.name.upper())
diff --git a/scripts/tracetool/backend/ust.py b/scripts/tracetool/backend/ust.py
new file mode 100644
index 000000000..c857516f2
--- /dev/null
+++ b/scripts/tracetool/backend/ust.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+"""
+LTTng User Space Tracing backend.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def generate_h_begin(events, group):
+    header = 'trace-ust-' + group + '.h'
+    out('#include ',
+        '#include "%s"' % header,
+        '',
+        '/* tracepoint_enabled() was introduced in LTTng UST 2.7 */',
+        '#ifndef tracepoint_enabled',
+        '#define tracepoint_enabled(a, b) true',
+        '#endif',
+        '')
+
+
+def generate_h(event, group):
+    argnames = ", ".join(event.args.names())
+    if len(event.args) > 0:
+        argnames = ", " + argnames
+
+    out('    tracepoint(qemu, %(name)s%(tp_args)s);',
+        name=event.name,
+        tp_args=argnames)
+
+
+def generate_h_backend_dstate(event, group):
+    out('    tracepoint_enabled(qemu, %(name)s) || \\',
+        name=event.name)
diff --git a/scripts/tracetool/format/__init__.py b/scripts/tracetool/format/__init__.py
new file mode 100644
index 000000000..2dc46f3dd
--- /dev/null
+++ b/scripts/tracetool/format/__init__.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+"""
+Format management.
+
+
+Creating new formats
+--------------------
+
+A new format named 'foo-bar' corresponds to Python module
+'tracetool/format/foo_bar.py'.
+
+A format module should provide a docstring, whose first non-empty line will be
+considered its short description.
+
+All formats must generate their contents through the 'tracetool.out' routine.
+
+
+Format functions
+----------------
+
+======== ==================================================================
+Function Description
+======== ==================================================================
+generate Called to generate a format-specific file.
+======== ==================================================================
+
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+import os
+
+import tracetool
+
+
+def get_list():
+    """Get a list of (name, description) pairs."""
+    res = []
+    modnames = []
+    for filename in os.listdir(tracetool.format.__path__[0]):
+        if filename.endswith('.py') and filename != '__init__.py':
+            modnames.append(filename.rsplit('.', 1)[0])
+    for modname in sorted(modnames):
+        module = tracetool.try_import("tracetool.format." + modname)
+
+        # just in case; should never fail unless non-module files are put there
+        if not module[0]:
+            continue
+        module = module[1]
+
+        doc = module.__doc__
+        if doc is None:
+            doc = ""
+        doc = doc.strip().split("\n")[0]
+
+        name = modname.replace("_", "-")
+        res.append((name, doc))
+    return res
+
+
+def exists(name):
+    """Return whether the given format exists."""
+    if len(name) == 0:
+        return False
+    name = name.replace("-", "_")
+    return tracetool.try_import("tracetool.format." + name)[1]
+
+
+def generate(events, format, backend, group):
+    if not exists(format):
+        raise ValueError("unknown format: %s" % format)
+    format = format.replace("-", "_")
+    func = tracetool.try_import("tracetool.format." + format,
+                                "generate")[1]
+    if func is None:
+        raise AttributeError("format has no 'generate': %s" % format)
+    func(events, backend, group)
diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py
new file mode 100644
index 000000000..c390c1844
--- /dev/null
+++ b/scripts/tracetool/format/c.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+"""
+trace/generated-tracers.c
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+def generate(events, backend, group):
+    active_events = [e for e in events
+                     if "disable" not in e.properties]
+
+    header = "trace-" + group + ".h"
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#include "qemu/osdep.h"',
+        '#include "qemu/module.h"',
+        '#include "%s"' % header,
+        '')
+
+    for e in events:
+        out('uint16_t %s;' % e.api(e.QEMU_DSTATE))
+
+    for e in events:
+        if "vcpu" in e.properties:
+            vcpu_id = 0
+        else:
+            vcpu_id = "TRACE_VCPU_EVENT_NONE"
+        out('TraceEvent %(event)s = {',
+            '    .id = 0,',
+            '    .vcpu_id = %(vcpu_id)s,',
+            '    .name = \"%(name)s\",',
+            '    .sstate = %(sstate)s,',
+            '    .dstate = &%(dstate)s ',
+            '};',
+            event = e.api(e.QEMU_EVENT),
+            vcpu_id = vcpu_id,
+            name = e.name,
+            sstate = "TRACE_%s_ENABLED" % e.name.upper(),
+            dstate = e.api(e.QEMU_DSTATE))
+
+    out('TraceEvent *%(group)s_trace_events[] = {',
+        group = group.lower())
+
+    for e in events:
+        out('    &%(event)s,', event = e.api(e.QEMU_EVENT))
+
+    out('  NULL,',
+        '};',
+        '')
+
+    out('static void trace_%(group)s_register_events(void)',
+        '{',
+        '    trace_event_register_group(%(group)s_trace_events);',
+        '}',
+        'trace_init(trace_%(group)s_register_events)',
+        group = group.lower())
+
+    backend.generate_begin(active_events, group)
+    for event in active_events:
+        backend.generate(event, group)
+    backend.generate_end(active_events, group)
diff --git a/scripts/tracetool/format/d.py b/scripts/tracetool/format/d.py
new file mode 100644
index 000000000..ebfb71420
--- /dev/null
+++ b/scripts/tracetool/format/d.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+
+"""
+trace/generated-tracers.dtrace (DTrace only).
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+from sys import platform
+
+
+# Reserved keywords from
+# https://wikis.oracle.com/display/DTrace/Types,+Operators+and+Expressions
+RESERVED_WORDS = (
+    'auto', 'goto', 'sizeof', 'break', 'if', 'static', 'case', 'import',
+    'string', 'char', 'inline', 'stringof', 'const', 'int', 'struct',
+    'continue', 'long', 'switch', 'counter', 'offsetof', 'this',
+    'default', 'probe', 'translator', 'do', 'provider', 'typedef',
+    'double', 'register', 'union', 'else', 'restrict', 'unsigned',
+    'enum', 'return', 'void', 'extern', 'self', 'volatile', 'float',
+    'short', 'while', 'for', 'signed', 'xlate',
+)
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disable" not in e.properties]
+
+    # SystemTap's dtrace(1) warns about empty "provider qemu {}" but is happy
+    # with an empty file.  Avoid the warning.
+    # But dtrace on macOS can't deal with empty files.
+    if not events and platform != "darwin":
+        return
+
+    out('/* This file is autogenerated by tracetool, do not edit. */'
+        '',
+        'provider qemu {')
+
+    for e in events:
+        args = []
+        for type_, name in e.args:
+            if platform == "darwin":
+                # macOS dtrace accepts only C99 _Bool
+                if type_ == 'bool':
+                    type_ = '_Bool'
+                if type_ == 'bool *':
+                    type_ = '_Bool *'
+                # It converts int8_t * in probe points to char * in header
+                # files and introduces [-Wpointer-sign] warning.
+                # Avoid it by changing probe type to signed char * beforehand.
+                if type_ == 'int8_t *':
+                    type_ = 'signed char *'
+
+            # SystemTap dtrace(1) emits a warning when long long is used
+            type_ = type_.replace('unsigned long long', 'uint64_t')
+            type_ = type_.replace('signed long long', 'int64_t')
+            type_ = type_.replace('long long', 'int64_t')
+
+            if name in RESERVED_WORDS:
+                name += '_'
+            args.append(type_ + ' ' + name)
+
+        # Define prototype for probe arguments
+        out('',
+            'probe %(name)s(%(args)s);',
+            name=e.name,
+            args=','.join(args))
+
+    out('',
+        '};')
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
new file mode 100644
index 000000000..e94f0be7d
--- /dev/null
+++ b/scripts/tracetool/format/h.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+"""
+trace/generated-tracers.h
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+def generate(events, backend, group):
+    if group == "root":
+        header = "trace/control-vcpu.h"
+    else:
+        header = "trace/control.h"
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#ifndef TRACE_%s_GENERATED_TRACERS_H' % group.upper(),
+        '#define TRACE_%s_GENERATED_TRACERS_H' % group.upper(),
+        '',
+        '#include "%s"' % header,
+        '')
+
+    for e in events:
+        out('extern TraceEvent %(event)s;',
+            event = e.api(e.QEMU_EVENT))
+
+    for e in events:
+        out('extern uint16_t %s;' % e.api(e.QEMU_DSTATE))
+
+    # static state
+    for e in events:
+        if 'disable' in e.properties:
+            enabled = 0
+        else:
+            enabled = 1
+        if "tcg-exec" in e.properties:
+            # a single define for the two "sub-events"
+            out('#define TRACE_%(name)s_ENABLED %(enabled)d',
+                name=e.original.name.upper(),
+                enabled=enabled)
+        out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled))
+
+    backend.generate_begin(events, group)
+
+    for e in events:
+        # tracer-specific dstate
+        out('',
+            '#define %(api)s() ( \\',
+            api=e.api(e.QEMU_BACKEND_DSTATE))
+
+        if "disable" not in e.properties:
+            backend.generate_backend_dstate(e, group)
+
+        out('    false)')
+
+        # tracer without checks
+        out('',
+            'static inline void %(api)s(%(args)s)',
+            '{',
+            api=e.api(e.QEMU_TRACE_NOCHECK),
+            args=e.args)
+
+        if "disable" not in e.properties:
+            backend.generate(e, group)
+
+        out('}')
+
+        # tracer wrapper with checks (per-vCPU tracing)
+        if "vcpu" in e.properties:
+            trace_cpu = next(iter(e.args))[1]
+            cond = "trace_event_get_vcpu_state(%(cpu)s,"\
+                   " TRACE_%(id)s)"\
+                   % dict(
+                       cpu=trace_cpu,
+                       id=e.name.upper())
+        else:
+            cond = "true"
+
+        out('',
+            'static inline void %(api)s(%(args)s)',
+            '{',
+            '    if (%(cond)s) {',
+            '        %(api_nocheck)s(%(names)s);',
+            '    }',
+            '}',
+            api=e.api(),
+            api_nocheck=e.api(e.QEMU_TRACE_NOCHECK),
+            args=e.args,
+            names=", ".join(e.args.names()),
+            cond=cond)
+
+    backend.generate_end(events, group)
+
+    out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/log_stap.py b/scripts/tracetool/format/log_stap.py
new file mode 100644
index 000000000..b486beb67
--- /dev/null
+++ b/scripts/tracetool/format/log_stap.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate .stp file that printfs log messages (DTrace with SystemTAP only).
+"""
+
+__author__     = "Daniel P. Berrange "
+__copyright__  = "Copyright (C) 2014-2019, Red Hat, Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Daniel Berrange"
+__email__      = "berrange@redhat.com"
+
+import re
+
+from tracetool import out
+from tracetool.backend.dtrace import binary, probeprefix
+from tracetool.backend.simple import is_string
+from tracetool.format.stap import stap_escape
+
+def global_var_name(name):
+    return probeprefix().replace(".", "_") + "_" + name
+
+STATE_SKIP = 0
+STATE_LITERAL = 1
+STATE_MACRO = 2
+
+def c_macro_to_format(macro):
+    if macro.startswith("PRI"):
+        return macro[3]
+
+    raise Exception("Unhandled macro '%s'" % macro)
+
+def c_fmt_to_stap(fmt):
+    state = 0
+    bits = []
+    literal = ""
+    macro = ""
+    escape = 0;
+    for i in range(len(fmt)):
+        if fmt[i] == '\\':
+            if escape:
+                escape = 0
+            else:
+                escape = 1
+            if state != STATE_LITERAL:
+                raise Exception("Unexpected escape outside string literal")
+            literal = literal + fmt[i]
+        elif fmt[i] == '"' and not escape:
+            if state == STATE_LITERAL:
+                state = STATE_SKIP
+                bits.append(literal)
+                literal = ""
+            else:
+                if state == STATE_MACRO:
+                    bits.append(c_macro_to_format(macro))
+                state = STATE_LITERAL
+        elif fmt[i] == ' ' or fmt[i] == '\t':
+            if state == STATE_MACRO:
+                bits.append(c_macro_to_format(macro))
+                macro = ""
+                state = STATE_SKIP
+            elif state == STATE_LITERAL:
+                literal = literal + fmt[i]
+        else:
+            escape = 0
+            if state == STATE_SKIP:
+                state = STATE_MACRO
+
+            if state == STATE_LITERAL:
+                literal = literal + fmt[i]
+            else:
+                macro = macro + fmt[i]
+
+    if state == STATE_MACRO:
+        bits.append(c_macro_to_format(macro))
+    elif state == STATE_LITERAL:
+        bits.append(literal)
+
+    fmt = re.sub("%(\d*)z(x|u|d)", "%\\1\\2", "".join(bits))
+    return fmt
+
+def generate(events, backend, group):
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '')
+
+    for event_id, e in enumerate(events):
+        if 'disable' in e.properties:
+            continue
+
+        out('probe %(probeprefix)s.log.%(name)s = %(probeprefix)s.%(name)s ?',
+            '{',
+            probeprefix=probeprefix(),
+            name=e.name)
+
+        # Get references to userspace strings
+        for type_, name in e.args:
+            name = stap_escape(name)
+            if is_string(type_):
+                out('    try {',
+                    '        arg%(name)s_str = %(name)s ? ' +
+                    'user_string_n(%(name)s, 512) : ""',
+                    '    } catch {}',
+                    name=name)
+
+        # Determine systemtap's view of variable names
+        fields = ["pid()", "gettimeofday_ns()"]
+        for type_, name in e.args:
+            name = stap_escape(name)
+            if is_string(type_):
+                fields.append("arg" + name + "_str")
+            else:
+                fields.append(name)
+
+        # Emit the entire record in a single SystemTap printf()
+        arg_str = ', '.join(arg for arg in fields)
+        fmt_str = "%d@%d " + e.name + " " + c_fmt_to_stap(e.fmt) + "\\n"
+        out('    printf("%(fmt_str)s", %(arg_str)s)',
+            fmt_str=fmt_str, arg_str=arg_str)
+
+        out('}')
+
+    out()
diff --git a/scripts/tracetool/format/simpletrace_stap.py b/scripts/tracetool/format/simpletrace_stap.py
new file mode 100644
index 000000000..4f4633b4e
--- /dev/null
+++ b/scripts/tracetool/format/simpletrace_stap.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate .stp file that outputs simpletrace binary traces (DTrace with SystemTAP only).
+"""
+
+__author__     = "Stefan Hajnoczi "
+__copyright__  = "Copyright (C) 2014, Red Hat, Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+from tracetool.backend.dtrace import probeprefix
+from tracetool.backend.simple import is_string
+from tracetool.format.stap import stap_escape
+
+def global_var_name(name):
+    return probeprefix().replace(".", "_") + "_" + name
+
+def generate(events, backend, group):
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '')
+
+    for event_id, e in enumerate(events):
+        if 'disable' in e.properties:
+            continue
+
+        out('probe %(probeprefix)s.simpletrace.%(name)s = %(probeprefix)s.%(name)s ?',
+            '{',
+            probeprefix=probeprefix(),
+            name=e.name)
+
+        # Calculate record size
+        sizes = ['24'] # sizeof(TraceRecord)
+        for type_, name in e.args:
+            name = stap_escape(name)
+            if is_string(type_):
+                out('    try {',
+                    '        arg%(name)s_str = %(name)s ? user_string_n(%(name)s, 512) : ""',
+                    '    } catch {}',
+                    '    arg%(name)s_len = strlen(arg%(name)s_str)',
+                    name=name)
+                sizes.append('4 + arg%s_len' % name)
+            else:
+                sizes.append('8')
+        sizestr = ' + '.join(sizes)
+
+        # Generate format string and value pairs for record header and arguments
+        fields = [('8b', str(event_id)),
+                  ('8b', 'gettimeofday_ns()'),
+                  ('4b', sizestr),
+                  ('4b', 'pid()')]
+        for type_, name in e.args:
+            name = stap_escape(name)
+            if is_string(type_):
+                fields.extend([('4b', 'arg%s_len' % name),
+                               ('.*s', 'arg%s_len, arg%s_str' % (name, name))])
+            else:
+                fields.append(('8b', name))
+
+        # Emit the entire record in a single SystemTap printf()
+        fmt_str = '%'.join(fmt for fmt, _ in fields)
+        arg_str = ', '.join(arg for _, arg in fields)
+        out('    printf("%%8b%%%(fmt_str)s", 1, %(arg_str)s)',
+            fmt_str=fmt_str, arg_str=arg_str)
+
+        out('}')
+
+    out()
diff --git a/scripts/tracetool/format/stap.py b/scripts/tracetool/format/stap.py
new file mode 100644
index 000000000..a218b0445
--- /dev/null
+++ b/scripts/tracetool/format/stap.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate .stp file (DTrace with SystemTAP only).
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+from tracetool.backend.dtrace import binary, probeprefix
+
+
+# Technically 'self' is not used by systemtap yet, but
+# they recommended we keep it in the reserved list anyway
+RESERVED_WORDS = (
+    'break', 'catch', 'continue', 'delete', 'else', 'for',
+    'foreach', 'function', 'global', 'if', 'in', 'limit',
+    'long', 'next', 'probe', 'return', 'self', 'string',
+    'try', 'while'
+    )
+
+
+def stap_escape(identifier):
+    # Append underscore to reserved keywords
+    if identifier in RESERVED_WORDS:
+        return identifier + '_'
+    return identifier
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disable" not in e.properties]
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '')
+
+    for e in events:
+        # Define prototype for probe arguments
+        out('probe %(probeprefix)s.%(name)s = process("%(binary)s").mark("%(name)s")',
+            '{',
+            probeprefix=probeprefix(),
+            name=e.name,
+            binary=binary())
+
+        i = 1
+        if len(e.args) > 0:
+            for name in e.args.names():
+                name = stap_escape(name)
+                out('  %s = $arg%d;' % (name, i))
+                i += 1
+
+        out('}')
+
+    out()
diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py
new file mode 100644
index 000000000..4d84440af
--- /dev/null
+++ b/scripts/tracetool/format/tcg_h.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate .h file for TCG code generation.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out, Arguments
+import tracetool.vcpu
+
+
+def vcpu_transform_args(args):
+    assert len(args) == 1
+    return Arguments([
+        args,
+        # NOTE: this name must be kept in sync with the one in "tcg_h"
+        # NOTE: Current helper code uses TCGv_env (CPUArchState*)
+        ("TCGv_env", "__tcg_" + args.names()[0]),
+    ])
+
+
+def generate(events, backend, group):
+    if group == "root":
+        header = "trace/trace-root.h"
+    else:
+        header = "trace.h"
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '/* You must include this file after the inclusion of helper.h */',
+        '',
+        '#ifndef TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
+        '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
+        '',
+        '#include "exec/helper-proto.h"',
+        '#include "%s"' % header,
+        '',
+        )
+
+    for e in events:
+        # just keep one of them
+        if "tcg-exec" not in e.properties:
+            continue
+
+        out('static inline void %(name_tcg)s(%(args)s)',
+            '{',
+            name_tcg=e.original.api(e.QEMU_TRACE_TCG),
+            args=tracetool.vcpu.transform_args("tcg_h", e.original))
+
+        if "disable" not in e.properties:
+            args_trans = e.original.event_trans.args
+            args_exec = tracetool.vcpu.transform_args(
+                "tcg_helper_c", e.original.event_exec, "wrapper")
+            if "vcpu" in e.properties:
+                trace_cpu = e.args.names()[0]
+                cond = "trace_event_get_vcpu_state(%(cpu)s,"\
+                       " TRACE_%(id)s)"\
+                       % dict(
+                           cpu=trace_cpu,
+                           id=e.original.event_exec.name.upper())
+            else:
+                cond = "true"
+
+            out('    %(name_trans)s(%(argnames_trans)s);',
+                '    if (%(cond)s) {',
+                '        gen_helper_%(name_exec)s(%(argnames_exec)s);',
+                '    }',
+                name_trans=e.original.event_trans.api(e.QEMU_TRACE),
+                name_exec=e.original.event_exec.api(e.QEMU_TRACE),
+                argnames_trans=", ".join(args_trans.names()),
+                argnames_exec=", ".join(args_exec.names()),
+                cond=cond)
+
+        out('}')
+
+    out('',
+        '#endif /* TRACE_%s_GENERATED_TCG_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py
new file mode 100644
index 000000000..72576e67d
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_c.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers.c.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import Arguments, out
+from tracetool.transform import *
+import tracetool.vcpu
+
+
+def vcpu_transform_args(args, mode):
+    assert len(args) == 1
+    # NOTE: this name must be kept in sync with the one in "tcg_h"
+    args = Arguments([(args.types()[0], "__tcg_" + args.names()[0])])
+    if mode == "code":
+        return Arguments([
+            # Does cast from helper requirements to tracing types
+            ("CPUState *", "env_cpu(%s)" % args.names()[0]),
+        ])
+    else:
+        args = Arguments([
+            # NOTE: Current helper code uses TCGv_env (CPUArchState*)
+            ("CPUArchState *", args.names()[0]),
+        ])
+        if mode == "header":
+            return args
+        elif mode == "wrapper":
+            return args.transform(HOST_2_TCG)
+        else:
+            assert False
+
+
+def generate(events, backend, group):
+    if group == "root":
+        header = "trace/trace-root.h"
+    else:
+        header = "trace.h"
+
+    events = [e for e in events
+              if "disable" not in e.properties]
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#include "qemu/osdep.h"',
+        '#include "cpu.h"',
+        '#include "exec/helper-proto.h"',
+        '#include "%s"' % header,
+        '',
+        )
+
+    for e in events:
+        if "tcg-exec" not in e.properties:
+            continue
+
+        e_args_api = tracetool.vcpu.transform_args(
+            "tcg_helper_c", e.original, "header").transform(
+                HOST_2_TCG_COMPAT, TCG_2_TCG_HELPER_DEF)
+        e_args_call = tracetool.vcpu.transform_args(
+            "tcg_helper_c", e, "code")
+
+        out('void %(name_tcg)s(%(args_api)s)',
+            '{',
+            # NOTE: the check was already performed at TCG-generation time
+            '    %(name)s(%(args_call)s);',
+            '}',
+            name_tcg="helper_%s_proxy" % e.api(),
+            name=e.api(e.QEMU_TRACE_NOCHECK),
+            args_api=e_args_api,
+            args_call=", ".join(e_args_call.casted()),
+            )
diff --git a/scripts/tracetool/format/tcg_helper_h.py b/scripts/tracetool/format/tcg_helper_h.py
new file mode 100644
index 000000000..08554fbc8
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_h.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers.h.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+from tracetool.transform import *
+import tracetool.vcpu
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disable" not in e.properties]
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        )
+
+    for e in events:
+        if "tcg-exec" not in e.properties:
+            continue
+
+        # TCG helper proxy declaration
+        fmt = "DEF_HELPER_FLAGS_%(argc)d(%(name)s, %(flags)svoid%(types)s)"
+        e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "header")
+        args = e_args.transform(HOST_2_TCG_COMPAT, HOST_2_TCG,
+                                TCG_2_TCG_HELPER_DECL)
+        types = ", ".join(args.types())
+        if types != "":
+            types = ", " + types
+
+        flags = "TCG_CALL_NO_RWG, "
+
+        out(fmt,
+            flags=flags,
+            argc=len(args),
+            name=e.api() + "_proxy",
+            types=types,
+            )
diff --git a/scripts/tracetool/format/tcg_helper_wrapper_h.py b/scripts/tracetool/format/tcg_helper_wrapper_h.py
new file mode 100644
index 000000000..0c5a9797d
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_wrapper_h.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers-wrappers.h.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+from tracetool.transform import *
+import tracetool.vcpu
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disable" not in e.properties]
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#define tcg_temp_new_nop(v) (v)',
+        '#define tcg_temp_free_nop(v)',
+        '',
+        )
+
+    for e in events:
+        if "tcg-exec" not in e.properties:
+            continue
+
+        # tracetool.generate always transforms types to host
+        e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "wrapper")
+
+        # mixed-type to TCG helper bridge
+        args_tcg_compat = e_args.transform(HOST_2_TCG_COMPAT)
+
+        code_new = [
+            "%(tcg_type)s __%(name)s = %(tcg_func)s(%(name)s);" %
+            {"tcg_type": transform_type(type_, HOST_2_TCG),
+             "tcg_func": transform_type(type_, HOST_2_TCG_TMP_NEW),
+             "name": name}
+            for (type_, name) in args_tcg_compat
+        ]
+
+        code_free = [
+            "%(tcg_func)s(__%(name)s);" %
+            {"tcg_func": transform_type(type_, HOST_2_TCG_TMP_FREE),
+             "name": name}
+            for (type_, name) in args_tcg_compat
+        ]
+
+        gen_name = "gen_helper_" + e.api()
+
+        out('static inline void %(name)s(%(args)s)',
+            '{',
+            '    %(code_new)s',
+            '    %(proxy_name)s(%(tmp_names)s);',
+            '    %(code_free)s',
+            '}',
+            name=gen_name,
+            args=e_args,
+            proxy_name=gen_name + "_proxy",
+            code_new="\n    ".join(code_new),
+            code_free="\n    ".join(code_free),
+            tmp_names=", ".join(["__%s" % name for _, name in e_args]),
+            )
diff --git a/scripts/tracetool/format/ust_events_c.py b/scripts/tracetool/format/ust_events_c.py
new file mode 100644
index 000000000..deced9533
--- /dev/null
+++ b/scripts/tracetool/format/ust_events_c.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+"""
+trace/generated-ust.c
+"""
+
+__author__     = "Mohamad Gebai "
+__copyright__  = "Copyright 2012, Mohamad Gebai "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disabled" not in e.properties]
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#include "qemu/osdep.h"',
+        '',
+        '#define TRACEPOINT_DEFINE',
+        '#define TRACEPOINT_CREATE_PROBES',
+        '',
+        '/* If gcc version 4.7 or older is used, LTTng ust gives a warning when compiling with',
+        '   -Wredundant-decls.',
+        ' */',
+        '#pragma GCC diagnostic ignored "-Wredundant-decls"',
+        '',
+        '#include "trace-ust-all.h"')
diff --git a/scripts/tracetool/format/ust_events_h.py b/scripts/tracetool/format/ust_events_h.py
new file mode 100644
index 000000000..6ce559f6c
--- /dev/null
+++ b/scripts/tracetool/format/ust_events_h.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+"""
+trace/generated-ust-provider.h
+"""
+
+__author__     = "Mohamad Gebai "
+__copyright__  = "Copyright 2012, Mohamad Gebai "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+def generate(events, backend, group):
+    events = [e for e in events
+              if "disabled" not in e.properties]
+
+    if group == "all":
+        include = "trace-ust-all.h"
+    else:
+        include = "trace-ust.h"
+
+    out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#undef TRACEPOINT_PROVIDER',
+        '#define TRACEPOINT_PROVIDER qemu',
+        '',
+        '#undef TRACEPOINT_INCLUDE_FILE',
+        '#define TRACEPOINT_INCLUDE_FILE ./%s' % include,
+        '',
+        '#if !defined (TRACE_%s_GENERATED_UST_H) || \\'  % group.upper(),
+        '     defined(TRACEPOINT_HEADER_MULTI_READ)',
+        '#define TRACE_%s_GENERATED_UST_H' % group.upper(),
+        '',
+        '#include ',
+        '',
+        '/*',
+        ' * LTTng ust 2.0 does not allow you to use TP_ARGS(void) for tracepoints',
+        ' * requiring no arguments. We define these macros introduced in more recent'
+        ' * versions of LTTng ust as a workaround',
+        ' */',
+        '#ifndef _TP_EXPROTO1',
+        '#define _TP_EXPROTO1(a)               void',
+        '#endif',
+        '#ifndef _TP_EXDATA_PROTO1',
+        '#define _TP_EXDATA_PROTO1(a)          void *__tp_data',
+        '#endif',
+        '#ifndef _TP_EXDATA_VAR1',
+        '#define _TP_EXDATA_VAR1(a)            __tp_data',
+        '#endif',
+        '#ifndef _TP_EXVAR1',
+        '#define _TP_EXVAR1(a)',
+        '#endif',
+        '')
+
+    for e in events:
+        if len(e.args) > 0:
+            out('TRACEPOINT_EVENT(',
+                '   qemu,',
+                '   %(name)s,',
+                '   TP_ARGS(%(args)s),',
+                '   TP_FIELDS(',
+                name=e.name,
+                args=", ".join(", ".join(i) for i in e.args))
+
+            types = e.args.types()
+            names = e.args.names()
+            fmts = e.formats()
+            for t,n,f in zip(types, names, fmts):
+                if ('char *' in t) or ('char*' in t):
+                    out('       ctf_string(' + n + ', ' + n + ')')
+                elif ("%p" in f) or ("x" in f) or ("PRIx" in f):
+                    out('       ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')')
+                elif ("ptr" in t) or ("*" in t):
+                    out('       ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')')
+                elif ('int' in t) or ('long' in t) or ('unsigned' in t) \
+                        or ('size_t' in t) or ('bool' in t):
+                    out('       ctf_integer(' + t + ', ' + n + ', ' + n + ')')
+                elif ('double' in t) or ('float' in t):
+                    out('       ctf_float(' + t + ', ' + n + ', ' + n + ')')
+                elif ('void *' in t) or ('void*' in t):
+                    out('       ctf_integer_hex(unsigned long, ' + n + ', ' + n + ')')
+
+            out('   )',
+                ')',
+                '')
+
+        else:
+            out('TRACEPOINT_EVENT(',
+                '   qemu,',
+                '   %(name)s,',
+                '   TP_ARGS(void),',
+                '   TP_FIELDS()',
+                ')',
+                '',
+                name=e.name)
+
+    out('#endif /* TRACE_%s_GENERATED_UST_H */' % group.upper(),
+        '',
+        '/* This part must be outside ifdef protection */',
+        '#include ')
diff --git a/scripts/tracetool/transform.py b/scripts/tracetool/transform.py
new file mode 100644
index 000000000..ea8b27799
--- /dev/null
+++ b/scripts/tracetool/transform.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+
+"""
+Type-transformation rules.
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+def _transform_type(type_, trans):
+    if isinstance(trans, str):
+        return trans
+    elif isinstance(trans, dict):
+        if type_ in trans:
+            return _transform_type(type_, trans[type_])
+        elif None in trans:
+            return _transform_type(type_, trans[None])
+        else:
+            return type_
+    elif callable(trans):
+        return trans(type_)
+    else:
+        raise ValueError("Invalid type transformation rule: %s" % trans)
+
+
+def transform_type(type_, *trans):
+    """Return a new type transformed according to the given rules.
+
+    Applies each of the transformation rules in trans in order.
+
+    If an element of trans is a string, return it.
+
+    If an element of trans is a function, call it with type_ as its only
+    argument.
+
+    If an element of trans is a dict, search type_ in its keys. If type_ is
+    a key, use the value as a transformation rule for type_. Otherwise, if
+    None is a key use the value as a transformation rule for type_.
+
+    Otherwise, return type_.
+
+    Parameters
+    ----------
+    type_ : str
+        Type to transform.
+    trans : list of function or dict
+        Type transformation rules.
+    """
+    if len(trans) == 0:
+        raise ValueError
+    res = type_
+    for t in trans:
+        res = _transform_type(res, t)
+    return res
+
+
+##################################################
+# tcg -> host
+
+def _tcg_2_host(type_):
+    if type_ == "TCGv":
+        # force a fixed-size type (target-independent)
+        return "uint64_t"
+    else:
+        return type_
+
+TCG_2_HOST = {
+    "TCGv_i32": "uint32_t",
+    "TCGv_i64": "uint64_t",
+    "TCGv_ptr": "void *",
+    None: _tcg_2_host,
+    }
+
+
+##################################################
+# host -> host compatible with tcg sizes
+
+HOST_2_TCG_COMPAT = {
+    "uint8_t": "uint32_t",
+    "uint16_t": "uint32_t",
+    }
+
+
+##################################################
+# host/tcg -> tcg
+
+def _host_2_tcg(type_):
+    if type_.startswith("TCGv"):
+        return type_
+    raise ValueError("Don't know how to translate '%s' into a TCG type\n" % type_)
+
+HOST_2_TCG = {
+    "uint32_t": "TCGv_i32",
+    "uint64_t": "TCGv_i64",
+    "void *"  : "TCGv_ptr",
+    "CPUArchState *": "TCGv_env",
+    None: _host_2_tcg,
+    }
+
+
+##################################################
+# tcg -> tcg helper definition
+
+def _tcg_2_helper_def(type_):
+    if type_ == "TCGv":
+        return "target_ulong"
+    else:
+        return type_
+
+TCG_2_TCG_HELPER_DEF = {
+    "TCGv_i32": "uint32_t",
+    "TCGv_i64": "uint64_t",
+    "TCGv_ptr": "void *",
+    None: _tcg_2_helper_def,
+    }
+
+
+##################################################
+# tcg -> tcg helper declaration
+
+def _tcg_2_tcg_helper_decl_error(type_):
+    raise ValueError("Don't know how to translate type '%s' into a TCG helper declaration type\n" % type_)
+
+TCG_2_TCG_HELPER_DECL = {
+    "TCGv"    : "tl",
+    "TCGv_ptr": "ptr",
+    "TCGv_i32": "i32",
+    "TCGv_i64": "i64",
+    "TCGv_env": "env",
+    None: _tcg_2_tcg_helper_decl_error,
+    }
+
+
+##################################################
+# host/tcg -> tcg temporal constant allocation
+
+def _host_2_tcg_tmp_new(type_):
+    if type_.startswith("TCGv"):
+        return "tcg_temp_new_nop"
+    raise ValueError("Don't know how to translate type '%s' into a TCG temporal allocation" % type_)
+
+HOST_2_TCG_TMP_NEW = {
+    "uint32_t": "tcg_const_i32",
+    "uint64_t": "tcg_const_i64",
+    "void *"  : "tcg_const_ptr",
+    None: _host_2_tcg_tmp_new,
+    }
+
+
+##################################################
+# host/tcg -> tcg temporal constant deallocation
+
+def _host_2_tcg_tmp_free(type_):
+    if type_.startswith("TCGv"):
+        return "tcg_temp_free_nop"
+    raise ValueError("Don't know how to translate type '%s' into a TCG temporal deallocation" % type_)
+
+HOST_2_TCG_TMP_FREE = {
+    "uint32_t": "tcg_temp_free_i32",
+    "uint64_t": "tcg_temp_free_i64",
+    "void *"  : "tcg_temp_free_ptr",
+    None: _host_2_tcg_tmp_free,
+    }
diff --git a/scripts/tracetool/vcpu.py b/scripts/tracetool/vcpu.py
new file mode 100644
index 000000000..868b4cb04
--- /dev/null
+++ b/scripts/tracetool/vcpu.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+"""
+Generic management for the 'vcpu' property.
+
+"""
+
+__author__     = "Lluís Vilanova "
+__copyright__  = "Copyright 2016, Lluís Vilanova "
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import Arguments, try_import
+
+
+def transform_event(event):
+    """Transform event to comply with the 'vcpu' property (if present)."""
+    if "vcpu" in event.properties:
+        # events with 'tcg-trans' and 'tcg-exec' are auto-generated from
+        # already-patched events
+        assert "tcg-trans" not in event.properties
+        assert "tcg-exec" not in event.properties
+
+        event.args = Arguments([("void *", "__cpu"), event.args])
+        if "tcg" in event.properties:
+            fmt = "\"cpu=%p \""
+            event.fmt = [fmt + event.fmt[0],
+                         fmt + event.fmt[1]]
+        else:
+            fmt = "\"cpu=%p \""
+            event.fmt = fmt + event.fmt
+    return event
+
+
+def transform_args(format, event, *args, **kwargs):
+    """Transforms the arguments to suit the specified format.
+
+    The format module must implement function 'vcpu_args', which receives the
+    implicit arguments added by the 'vcpu' property, and must return suitable
+    arguments for the given format.
+
+    The function is only called for events with the 'vcpu' property.
+
+    Parameters
+    ==========
+    format : str
+        Format module name.
+    event : Event
+    args, kwargs
+        Passed to 'vcpu_transform_args'.
+
+    Returns
+    =======
+    Arguments
+        The transformed arguments, including the non-implicit ones.
+
+    """
+    if "vcpu" in event.properties:
+        ok, func = try_import("tracetool.format." + format,
+                              "vcpu_transform_args")
+        assert ok
+        assert func
+        return Arguments([func(event.args[:1], *args, **kwargs),
+                          event.args[1:]])
+    else:
+        return event.args
diff --git a/scripts/travis/coverage-summary.sh b/scripts/travis/coverage-summary.sh
new file mode 100755
index 000000000..d7086cf9c
--- /dev/null
+++ b/scripts/travis/coverage-summary.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# Author: Alex Bennée 
+#
+# Summerise the state of code coverage with gcovr and tweak the output
+# to be more sane on Travis hosts. As we expect to be executed on a
+# throw away CI instance we do spam temp files all over the shop. You
+# most likely don't want to execute this script but just call gcovr
+# directly. See also "make coverage-report"
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+
+# first generate the coverage report
+gcovr -p -o raw-report.txt
+
+# strip the full-path and line markers
+sed s@$PWD\/@@ raw-report.txt | sed s/[0-9]\*[,-]//g > simplified.txt
+
+# reflow lines that got split
+awk '/.[ch]$/ { printf("%s", $0); next } 1' simplified.txt > rejoined.txt
+
+# columnify
+column -t rejoined.txt > final.txt
+
+# and dump, stripping out 0% coverage
+grep -v "0%" final.txt
diff --git a/scripts/u2f-setup-gen.py b/scripts/u2f-setup-gen.py
new file mode 100755
index 000000000..2122598fe
--- /dev/null
+++ b/scripts/u2f-setup-gen.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+#
+# Libu2f-emu setup directory generator for USB U2F key emulation.
+#
+# Copyright (c) 2020 César Belley 
+# Written by César Belley 
+#
+# This work is licensed under the terms of the GNU GPL, version 2
+# or, at your option, any later version.  See the COPYING file in
+# the top-level directory.
+
+import sys
+import os
+from random import randint
+from typing import Tuple
+
+from cryptography.hazmat.backends import default_backend
+from cryptography.hazmat.primitives.asymmetric import ec
+from cryptography.hazmat.primitives.serialization import Encoding, \
+    NoEncryption, PrivateFormat, PublicFormat
+from OpenSSL import crypto
+
+
+def write_setup_dir(dirpath: str, privkey_pem: bytes, cert_pem: bytes,
+                    entropy: bytes, counter: int) -> None:
+    """
+    Write the setup directory.
+
+    Args:
+        dirpath: The directory path.
+        key_pem: The private key PEM.
+        cert_pem: The certificate PEM.
+        entropy: The 48 bytes of entropy.
+        counter: The counter value.
+    """
+    # Directory
+    os.mkdir(dirpath)
+
+    # Private key
+    with open(f'{dirpath}/private-key.pem', 'bw') as f:
+        f.write(privkey_pem)
+
+    # Certificate
+    with open(f'{dirpath}/certificate.pem', 'bw') as f:
+        f.write(cert_pem)
+
+    # Entropy
+    with open(f'{dirpath}/entropy', 'wb') as f:
+        f.write(entropy)
+
+    # Counter
+    with open(f'{dirpath}/counter', 'w') as f:
+        f.write(f'{str(counter)}\n')
+
+
+def generate_ec_key_pair() -> Tuple[str, str]:
+    """
+    Generate an ec key pair.
+
+    Returns:
+        The private and public key PEM.
+    """
+    # Key generation
+    privkey = ec.generate_private_key(ec.SECP256R1, default_backend())
+    pubkey = privkey.public_key()
+
+    # PEM serialization
+    privkey_pem = privkey.private_bytes(encoding=Encoding.PEM,
+                                        format=PrivateFormat.TraditionalOpenSSL,
+                                        encryption_algorithm=NoEncryption())
+    pubkey_pem = pubkey.public_bytes(encoding=Encoding.PEM,
+                                     format=PublicFormat.SubjectPublicKeyInfo)
+    return privkey_pem, pubkey_pem
+
+
+def generate_certificate(privkey_pem: str, pubkey_pem: str) -> str:
+    """
+    Generate a x509 certificate from a key pair.
+
+    Args:
+        privkey_pem: The private key PEM.
+        pubkey_pem: The public key PEM.
+
+    Returns:
+        The certificate PEM.
+    """
+    # Convert key pair
+    privkey = crypto.load_privatekey(crypto.FILETYPE_PEM, privkey_pem)
+    pubkey = crypto.load_publickey(crypto.FILETYPE_PEM, pubkey_pem)
+
+    # New x509v3 certificate
+    cert = crypto.X509()
+    cert.set_version(0x2)
+
+    # Serial number
+    cert.set_serial_number(randint(1, 2 ** 64))
+
+    # Before / After
+    cert.gmtime_adj_notBefore(0)
+    cert.gmtime_adj_notAfter(4 * (365 * 24 * 60 * 60))
+
+    # Public key
+    cert.set_pubkey(pubkey)
+
+    # Subject name and issueer
+    cert.get_subject().CN = "U2F emulated"
+    cert.set_issuer(cert.get_subject())
+
+    # Extensions
+    cert.add_extensions([
+        crypto.X509Extension(b"subjectKeyIdentifier",
+                             False, b"hash", subject=cert),
+    ])
+    cert.add_extensions([
+        crypto.X509Extension(b"authorityKeyIdentifier",
+                             False, b"keyid:always", issuer=cert),
+    ])
+    cert.add_extensions([
+        crypto.X509Extension(b"basicConstraints", True, b"CA:TRUE")
+    ])
+
+    # Signature
+    cert.sign(privkey, 'sha256')
+
+    return crypto.dump_certificate(crypto.FILETYPE_PEM, cert)
+
+
+def generate_setup_dir(dirpath: str) -> None:
+    """
+    Generates the setup directory.
+
+    Args:
+        dirpath: The directory path.
+    """
+    # Key pair
+    privkey_pem, pubkey_pem = generate_ec_key_pair()
+
+    # Certificate
+    certificate_pem = generate_certificate(privkey_pem, pubkey_pem)
+
+    # Entropy
+    entropy = os.urandom(48)
+
+    # Counter
+    counter = 0
+
+    # Write
+    write_setup_dir(dirpath, privkey_pem, certificate_pem, entropy, counter)
+
+
+def main() -> None:
+    """
+    Main function
+    """
+    # Dir path
+    if len(sys.argv) != 2:
+        sys.stderr.write(f'Usage: {sys.argv[0]} \n')
+        exit(2)
+    dirpath = sys.argv[1]
+
+    # Dir non existence
+    if os.path.exists(dirpath):
+        sys.stderr.write(f'Directory: {dirpath} already exists.\n')
+        exit(1)
+
+    generate_setup_dir(dirpath)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/undefsym.py b/scripts/undefsym.py
new file mode 100644
index 000000000..4b6a72d95
--- /dev/null
+++ b/scripts/undefsym.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+# Before a shared module's DSO is produced, a static library is built for it
+# and passed to this script.  The script generates -Wl,-u options to force
+# the inclusion of symbol from libqemuutil.a if the shared modules need them,
+# This is necessary because the modules may use functions not needed by the
+# executable itself, which would cause the function to not be linked in.
+# Then the DSO loading would fail because of the missing symbol.
+
+
+import sys
+import subprocess
+
+def filter_lines_set(stdout, from_staticlib):
+    linesSet = set()
+    for line in stdout.splitlines():
+        tokens = line.split(b' ')
+        if len(tokens) >= 2:
+            if from_staticlib and tokens[1] == b'U':
+                continue
+            if not from_staticlib and tokens[1] != b'U':
+                continue
+            new_line = b'-Wl,-u,' + tokens[0]
+            if not new_line in linesSet:
+                linesSet.add(new_line)
+    return linesSet
+
+def main(args):
+    if len(args) <= 3:
+        sys.exit(0)
+
+    nm = args[1]
+    staticlib = args[2]
+    pc = subprocess.run([nm, "-P", "-g", staticlib], stdout=subprocess.PIPE)
+    if pc.returncode != 0:
+        sys.exit(1)
+    staticlib_syms = filter_lines_set(pc.stdout, True)
+
+    shared_modules = args[3:]
+    pc = subprocess.run([nm, "-P", "-g"] + shared_modules, stdout=subprocess.PIPE)
+    if pc.returncode != 0:
+        sys.exit(1)
+    modules_undef_syms = filter_lines_set(pc.stdout, False)
+    lines = sorted(staticlib_syms.intersection(modules_undef_syms))
+    sys.stdout.buffer.write(b'\n'.join(lines))
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
new file mode 100755
index 000000000..fa6f2b627
--- /dev/null
+++ b/scripts/update-linux-headers.sh
@@ -0,0 +1,241 @@
+#!/bin/sh -e
+#
+# Update Linux kernel headers QEMU requires from a specified kernel tree.
+#
+# Copyright (C) 2011 Siemens AG
+#
+# Authors:
+#  Jan Kiszka        
+#
+# This work is licensed under the terms of the GNU GPL version 2.
+# See the COPYING file in the top-level directory.
+
+tmpdir=$(mktemp -d)
+linux="$1"
+output="$2"
+
+if [ -z "$linux" ] || ! [ -d "$linux" ]; then
+    cat << EOF
+usage: update-kernel-headers.sh LINUX_PATH [OUTPUT_PATH]
+
+LINUX_PATH      Linux kernel directory to obtain the headers from
+OUTPUT_PATH     output directory, usually the qemu source tree (default: $PWD)
+EOF
+    exit 1
+fi
+
+if [ -z "$output" ]; then
+    output="$PWD"
+fi
+
+cp_portable() {
+    f=$1
+    to=$2
+    if
+        grep '#include' "$f" | grep -v -e 'linux/virtio' \
+                                     -e 'linux/types' \
+                                     -e 'stdint' \
+                                     -e 'linux/if_ether' \
+                                     -e 'input-event-codes' \
+                                     -e 'sys/' \
+                                     -e 'pvrdma_verbs' \
+                                     -e 'drm.h' \
+                                     -e 'limits' \
+                                     -e 'linux/const' \
+                                     -e 'linux/kernel' \
+                                     -e 'linux/sysinfo' \
+                                     -e 'asm-generic/kvm_para' \
+                                     > /dev/null
+    then
+        echo "Unexpected #include in input file $f".
+        exit 2
+    fi
+
+    header=$(basename "$f");
+    sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \
+        -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \
+        -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \
+        -e 's/__s\([0-9][0-9]*\)/int\1_t/g' \
+        -e 's/__le\([0-9][0-9]*\)/uint\1_t/g' \
+        -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \
+        -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \
+        -e 's/]*\)>/"standard-headers\/linux\/\1"/' \
+        -e 's/__bitwise//' \
+        -e 's/__attribute__((packed))/QEMU_PACKED/' \
+        -e 's/__inline__/inline/' \
+        -e 's/__BITS_PER_LONG/HOST_LONG_BITS/' \
+        -e '/\"drm.h\"/d' \
+        -e '/sys\/ioctl.h/d' \
+        -e 's/SW_MAX/SW_MAX_/' \
+        -e 's/atomic_t/int/' \
+        -e 's/__kernel_long_t/long/' \
+        -e 's/__kernel_ulong_t/unsigned long/' \
+        -e 's/struct ethhdr/struct eth_header/' \
+        -e '/\#define _LINUX_ETHTOOL_H/a \\n\#include "net/eth.h"' \
+        "$f" > "$to/$header";
+}
+
+# This will pick up non-directories too (eg "Kconfig") but we will
+# ignore them in the next loop.
+ARCHLIST=$(cd "$linux/arch" && echo *)
+
+for arch in $ARCHLIST; do
+    # Discard anything which isn't a KVM-supporting architecture
+    if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ] &&
+        ! [ -e "$linux/arch/$arch/include/uapi/asm/kvm.h" ] ; then
+        continue
+    fi
+
+    if [ "$arch" = x86 ]; then
+        arch_var=SRCARCH
+    else
+        arch_var=ARCH
+    fi
+
+    make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
+
+    rm -rf "$output/linux-headers/asm-$arch"
+    mkdir -p "$output/linux-headers/asm-$arch"
+    for header in kvm.h unistd.h bitsperlong.h mman.h; do
+        cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
+    done
+
+    if [ $arch = mips ]; then
+        cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
+        cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/"
+        cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/"
+        cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"
+    fi
+    if [ $arch = powerpc ]; then
+        cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/"
+        cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/"
+    fi
+
+    rm -rf "$output/include/standard-headers/asm-$arch"
+    mkdir -p "$output/include/standard-headers/asm-$arch"
+    if [ $arch = s390 ]; then
+        cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
+        cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/"
+        cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/"
+    fi
+    if [ $arch = arm ]; then
+        cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
+    fi
+    if [ $arch = arm64 ]; then
+        cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/"
+    fi
+    if [ $arch = x86 ]; then
+        cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
+        cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
+        cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
+        cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch"
+        # Remove everything except the macros from bootparam.h avoiding the
+        # unnecessary import of several video/ist/etc headers
+        sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \
+               "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h"
+        cp_portable "$tmpdir/bootparam.h" \
+                    "$output/include/standard-headers/asm-$arch"
+    fi
+done
+
+rm -rf "$output/linux-headers/linux"
+mkdir -p "$output/linux-headers/linux"
+for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
+              psci.h psp-sev.h userfaultfd.h mman.h; do
+    cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
+done
+
+rm -rf "$output/linux-headers/asm-generic"
+mkdir -p "$output/linux-headers/asm-generic"
+for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do
+    cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic"
+done
+
+if [ -L "$linux/source" ]; then
+    cp "$linux/source/COPYING" "$output/linux-headers"
+else
+    cp "$linux/COPYING" "$output/linux-headers"
+fi
+
+# Recent kernel sources split the copyright/license info into multiple
+# files, which we need to copy. This set of licenses is the set that
+# are referred to by SPDX lines in the headers we currently copy.
+# We don't copy the Documentation/process/license-rules.rst which
+# is also referred to by COPYING, since it's explanatory rather than license.
+if [ -d "$linux/LICENSES" ]; then
+    mkdir -p "$output/linux-headers/LICENSES/preferred" \
+             "$output/linux-headers/LICENSES/exceptions"
+    for l in preferred/GPL-2.0 preferred/BSD-2-Clause preferred/BSD-3-Clause \
+             exceptions/Linux-syscall-note; do
+        cp "$linux/LICENSES/$l" "$output/linux-headers/LICENSES/$l"
+    done
+fi
+
+cat <$output/linux-headers/linux/virtio_config.h
+#include "standard-headers/linux/virtio_config.h"
+EOF
+cat <$output/linux-headers/linux/virtio_ring.h
+#include "standard-headers/linux/virtio_ring.h"
+EOF
+cat <$output/linux-headers/linux/vhost_types.h
+#include "standard-headers/linux/vhost_types.h"
+EOF
+
+rm -rf "$output/include/standard-headers/linux"
+mkdir -p "$output/include/standard-headers/linux"
+for i in "$tmpdir"/include/linux/*virtio*.h \
+         "$tmpdir/include/linux/qemu_fw_cfg.h" \
+         "$tmpdir/include/linux/fuse.h" \
+         "$tmpdir/include/linux/input.h" \
+         "$tmpdir/include/linux/input-event-codes.h" \
+         "$tmpdir/include/linux/pci_regs.h" \
+         "$tmpdir/include/linux/ethtool.h" \
+         "$tmpdir/include/linux/const.h" \
+         "$tmpdir/include/linux/kernel.h" \
+         "$tmpdir/include/linux/vhost_types.h" \
+         "$tmpdir/include/linux/sysinfo.h"; do
+    cp_portable "$i" "$output/include/standard-headers/linux"
+done
+mkdir -p "$output/include/standard-headers/drm"
+cp_portable "$tmpdir/include/drm/drm_fourcc.h" \
+            "$output/include/standard-headers/drm"
+
+rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma"
+mkdir -p "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma"
+
+# Remove the unused functions from pvrdma_verbs.h avoiding the unnecessary
+# import of several infiniband/networking/other headers
+tmp_pvrdma_verbs="$tmpdir/pvrdma_verbs.h"
+# Parse the entire file instead of single lines to match
+# function declarations expanding over multiple lines
+# and strip the declarations starting with pvrdma prefix.
+sed  -e '1h;2,$H;$!d;g'  -e 's/[^};]*pvrdma[^(| ]*([^)]*);//g' \
+    "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h" > \
+    "$tmp_pvrdma_verbs";
+
+for i in "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h" \
+         "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" \
+         "$tmp_pvrdma_verbs"; do \
+    cp_portable "$i" \
+         "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/"
+done
+
+rm -rf "$output/include/standard-headers/rdma/"
+mkdir -p "$output/include/standard-headers/rdma/"
+for i in "$tmpdir/include/rdma/vmw_pvrdma-abi.h"; do
+    cp_portable "$i" \
+         "$output/include/standard-headers/rdma/"
+done
+
+cat <$output/include/standard-headers/linux/types.h
+/* For QEMU all types are already defined via osdep.h, so this
+ * header does not need to do anything.
+ */
+EOF
+cat <$output/include/standard-headers/linux/if_ether.h
+#define ETH_ALEN    6
+EOF
+
+rm -rf "$tmpdir"
diff --git a/scripts/update-mips-syscall-args.sh b/scripts/update-mips-syscall-args.sh
new file mode 100755
index 000000000..4f0dda4b8
--- /dev/null
+++ b/scripts/update-mips-syscall-args.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+URL=https://raw.githubusercontent.com/strace/strace/master
+FILES="sysent.h sysent_shorthand_defs.h linux/mips/syscallent-compat.h \
+       linux/mips/syscallent-o32.h linux/syscallent-common-32.h \
+       linux/syscallent-common.h"
+
+output="$1"
+if [ "$output" = "" ] ; then
+    output="$PWD"
+fi
+
+INC=linux-user/mips/syscall-args-o32.c.inc
+
+TMP=$(mktemp -d)
+cd $TMP
+
+for file in $FILES; do
+    curl -O $URL/$file
+done
+
+> subcall32.h
+
+cat > gen_mips_o32.c <
+
+#define LINUX_MIPSO32
+#define MAX_ARGS 7
+
+#include "sysent.h"
+#include "sysent_shorthand_defs.h"
+
+#define SEN(syscall_name) 0,0
+const struct_sysent sysent0[] = {
+#include  "syscallent-o32.h"
+};
+
+int main(void)
+{
+    int i;
+
+    for (i = 4000; i < sizeof(sysent0) / sizeof(struct_sysent); i++) {
+        if (sysent0[i].sys_name == NULL) {
+            printf("    [% 4d] = MIPS_SYSCALL_NUMBER_UNUSED,\n", i - 4000);
+        } else {
+            printf("    [% 4d] = %d, /* %s */\n", i - 4000,
+                   sysent0[i].nargs, sysent0[i].sys_name);
+        }
+    }
+
+    return 0;
+}
+EOF
+
+cc -o gen_mips_o32 gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC"
+
+rm -fr "$TMP"
diff --git a/scripts/update-syscalltbl.sh b/scripts/update-syscalltbl.sh
new file mode 100755
index 000000000..2d23e5680
--- /dev/null
+++ b/scripts/update-syscalltbl.sh
@@ -0,0 +1,49 @@
+TBL_LIST="\
+arch/alpha/kernel/syscalls/syscall.tbl,linux-user/alpha/syscall.tbl \
+arch/arm/tools/syscall.tbl,linux-user/arm/syscall.tbl \
+arch/m68k/kernel/syscalls/syscall.tbl,linux-user/m68k/syscall.tbl \
+arch/microblaze/kernel/syscalls/syscall.tbl,linux-user/microblaze/syscall.tbl \
+arch/mips/kernel/syscalls/syscall_n32.tbl,linux-user/mips64/syscall_n32.tbl \
+arch/mips/kernel/syscalls/syscall_n64.tbl,linux-user/mips64/syscall_n64.tbl \
+arch/mips/kernel/syscalls/syscall_o32.tbl,linux-user/mips/syscall_o32.tbl \
+arch/parisc/kernel/syscalls/syscall.tbl,linux-user/hppa/syscall.tbl \
+arch/powerpc/kernel/syscalls/syscall.tbl,linux-user/ppc/syscall.tbl \
+arch/s390/kernel/syscalls/syscall.tbl,linux-user/s390x/syscall.tbl \
+arch/sh/kernel/syscalls/syscall.tbl,linux-user/sh4/syscall.tbl \
+arch/sparc/kernel/syscalls/syscall.tbl,linux-user/sparc64/syscall.tbl \
+arch/sparc/kernel/syscalls/syscall.tbl,linux-user/sparc/syscall.tbl \
+arch/x86/entry/syscalls/syscall_32.tbl,linux-user/i386/syscall_32.tbl \
+arch/x86/entry/syscalls/syscall_64.tbl,linux-user/x86_64/syscall_64.tbl \
+arch/xtensa/kernel/syscalls/syscall.tbl,linux-user/xtensa/syscall.tbl\
+"
+
+linux="$1"
+output="$2"
+
+if [ -z "$linux" ] || ! [ -d "$linux" ]; then
+    cat << EOF
+usage: update-syscalltbl.sh LINUX_PATH [OUTPUT_PATH]
+
+LINUX_PATH      Linux kernel directory to obtain the syscall.tbl from
+OUTPUT_PATH     output directory, usually the qemu source tree (default: $PWD)
+EOF
+    exit 1
+fi
+
+if [ -z "$output" ]; then
+    output="$PWD"
+fi
+
+for entry in $TBL_LIST; do
+    OFS="$IFS"
+    IFS=,
+    set $entry
+    src=$1
+    dst=$2
+    IFS="$OFS"
+    if ! cp "$linux/$src" "$output/$dst" ; then
+        echo "Cannot copy $linux/$src to $output/$dst" 1>&2
+        exit 1
+    fi
+done
+
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
new file mode 100755
index 000000000..0b7d30eef
--- /dev/null
+++ b/scripts/vmstate-static-checker.py
@@ -0,0 +1,432 @@
+#!/usr/bin/python3
+#
+# Compares vmstate information stored in JSON format, obtained from
+# the -dump-vmstate QEMU command.
+#
+# Copyright 2014 Amit Shah 
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, see .
+
+import argparse
+import json
+import sys
+
+# Count the number of errors found
+taint = 0
+
+def bump_taint():
+    global taint
+
+    # Ensure we don't wrap around or reset to 0 -- the shell only has
+    # an 8-bit return value.
+    if taint < 255:
+        taint = taint + 1
+
+
+def check_fields_match(name, s_field, d_field):
+    if s_field == d_field:
+        return True
+
+    # Some fields changed names between qemu versions.  This list
+    # is used to whitelist such changes in each section / description.
+    changed_names = {
+        'apic': ['timer', 'timer_expiry'],
+        'e1000': ['dev', 'parent_obj'],
+        'ehci': ['dev', 'pcidev'],
+        'I440FX': ['dev', 'parent_obj'],
+        'ich9_ahci': ['card', 'parent_obj'],
+        'ich9-ahci': ['ahci', 'ich9_ahci'],
+        'ioh3420': ['PCIDevice', 'PCIEDevice'],
+        'ioh-3240-express-root-port': ['port.br.dev',
+                                       'parent_obj.parent_obj.parent_obj',
+                                       'port.br.dev.exp.aer_log',
+                                'parent_obj.parent_obj.parent_obj.exp.aer_log'],
+        'cirrus_vga': ['hw_cursor_x', 'vga.hw_cursor_x',
+                       'hw_cursor_y', 'vga.hw_cursor_y'],
+        'lsiscsi': ['dev', 'parent_obj'],
+        'mch': ['d', 'parent_obj'],
+        'pci_bridge': ['bridge.dev', 'parent_obj', 'bridge.dev.shpc', 'shpc'],
+        'pcnet': ['pci_dev', 'parent_obj'],
+        'PIIX3': ['pci_irq_levels', 'pci_irq_levels_vmstate'],
+        'piix4_pm': ['dev', 'parent_obj', 'pci0_status',
+                     'acpi_pci_hotplug.acpi_pcihp_pci_status[0x0]',
+                     'pm1a.sts', 'ar.pm1.evt.sts', 'pm1a.en', 'ar.pm1.evt.en',
+                     'pm1_cnt.cnt', 'ar.pm1.cnt.cnt',
+                     'tmr.timer', 'ar.tmr.timer',
+                     'tmr.overflow_time', 'ar.tmr.overflow_time',
+                     'gpe', 'ar.gpe'],
+        'rtl8139': ['dev', 'parent_obj'],
+        'qxl': ['num_surfaces', 'ssd.num_surfaces'],
+        'usb-ccid': ['abProtocolDataStructure', 'abProtocolDataStructure.data'],
+        'usb-host': ['dev', 'parent_obj'],
+        'usb-mouse': ['usb-ptr-queue', 'HIDPointerEventQueue'],
+        'usb-tablet': ['usb-ptr-queue', 'HIDPointerEventQueue'],
+        'vmware_vga': ['card', 'parent_obj'],
+        'vmware_vga_internal': ['depth', 'new_depth'],
+        'xhci': ['pci_dev', 'parent_obj'],
+        'x3130-upstream': ['PCIDevice', 'PCIEDevice'],
+        'xio3130-express-downstream-port': ['port.br.dev',
+                                            'parent_obj.parent_obj.parent_obj',
+                                            'port.br.dev.exp.aer_log',
+                                'parent_obj.parent_obj.parent_obj.exp.aer_log'],
+        'xio3130-downstream': ['PCIDevice', 'PCIEDevice'],
+        'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
+                                          'br.dev.exp.aer_log',
+                                          'parent_obj.parent_obj.exp.aer_log'],
+        'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+                      'mem_win_addr', 'mig_mem_win_addr',
+                      'mem_win_size', 'mig_mem_win_size',
+                      'io_win_addr', 'mig_io_win_addr',
+                      'io_win_size', 'mig_io_win_size'],
+    }
+
+    if not name in changed_names:
+        return False
+
+    if s_field in changed_names[name] and d_field in changed_names[name]:
+        return True
+
+    return False
+
+def get_changed_sec_name(sec):
+    # Section names can change -- see commit 292b1634 for an example.
+    changes = {
+        "ICH9 LPC": "ICH9-LPC",
+        "e1000-82540em": "e1000",
+    }
+
+    for item in changes:
+        if item == sec:
+            return changes[item]
+        if changes[item] == sec:
+            return item
+    return ""
+
+def exists_in_substruct(fields, item):
+    # Some QEMU versions moved a few fields inside a substruct.  This
+    # kept the on-wire format the same.  This function checks if
+    # something got shifted inside a substruct.  For example, the
+    # change in commit 1f42d22233b4f3d1a2933ff30e8d6a6d9ee2d08f
+
+    if not "Description" in fields:
+        return False
+
+    if not "Fields" in fields["Description"]:
+        return False
+
+    substruct_fields = fields["Description"]["Fields"]
+
+    if substruct_fields == []:
+        return False
+
+    return check_fields_match(fields["Description"]["name"],
+                              substruct_fields[0]["field"], item)
+
+
+def check_fields(src_fields, dest_fields, desc, sec):
+    # This function checks for all the fields in a section.  If some
+    # fields got embedded into a substruct, this function will also
+    # attempt to check inside the substruct.
+
+    d_iter = iter(dest_fields)
+    s_iter = iter(src_fields)
+
+    # Using these lists as stacks to store previous value of s_iter
+    # and d_iter, so that when time comes to exit out of a substruct,
+    # we can go back one level up and continue from where we left off.
+
+    s_iter_list = []
+    d_iter_list = []
+
+    advance_src = True
+    advance_dest = True
+    unused_count = 0
+
+    while True:
+        if advance_src:
+            try:
+                s_item = next(s_iter)
+            except StopIteration:
+                if s_iter_list == []:
+                    break
+
+                s_iter = s_iter_list.pop()
+                continue
+        else:
+            if unused_count == 0:
+                # We want to avoid advancing just once -- when entering a
+                # dest substruct, or when exiting one.
+                advance_src = True
+
+        if advance_dest:
+            try:
+                d_item = next(d_iter)
+            except StopIteration:
+                if d_iter_list == []:
+                    # We were not in a substruct
+                    print("Section \"" + sec + "\",", end=' ')
+                    print("Description " + "\"" + desc + "\":", end=' ')
+                    print("expected field \"" + s_item["field"] + "\",", end=' ')
+                    print("while dest has no further fields")
+                    bump_taint()
+                    break
+
+                d_iter = d_iter_list.pop()
+                advance_src = False
+                continue
+        else:
+            if unused_count == 0:
+                advance_dest = True
+
+        if unused_count != 0:
+            if advance_dest == False:
+                unused_count = unused_count - s_item["size"]
+                if unused_count == 0:
+                    advance_dest = True
+                    continue
+                if unused_count < 0:
+                    print("Section \"" + sec + "\",", end=' ')
+                    print("Description \"" + desc + "\":", end=' ')
+                    print("unused size mismatch near \"", end=' ')
+                    print(s_item["field"] + "\"")
+                    bump_taint()
+                    break
+                continue
+
+            if advance_src == False:
+                unused_count = unused_count - d_item["size"]
+                if unused_count == 0:
+                    advance_src = True
+                    continue
+                if unused_count < 0:
+                    print("Section \"" + sec + "\",", end=' ')
+                    print("Description \"" + desc + "\":", end=' ')
+                    print("unused size mismatch near \"", end=' ')
+                    print(d_item["field"] + "\"")
+                    bump_taint()
+                    break
+                continue
+
+        if not check_fields_match(desc, s_item["field"], d_item["field"]):
+            # Some fields were put in substructs, keeping the
+            # on-wire format the same, but breaking static tools
+            # like this one.
+
+            # First, check if dest has a new substruct.
+            if exists_in_substruct(d_item, s_item["field"]):
+                # listiterators don't have a prev() function, so we
+                # have to store our current location, descend into the
+                # substruct, and ensure we come out as if nothing
+                # happened when the substruct is over.
+                #
+                # Essentially we're opening the substructs that got
+                # added which didn't change the wire format.
+                d_iter_list.append(d_iter)
+                substruct_fields = d_item["Description"]["Fields"]
+                d_iter = iter(substruct_fields)
+                advance_src = False
+                continue
+
+            # Next, check if src has substruct that dest removed
+            # (can happen in backward migration: 2.0 -> 1.5)
+            if exists_in_substruct(s_item, d_item["field"]):
+                s_iter_list.append(s_iter)
+                substruct_fields = s_item["Description"]["Fields"]
+                s_iter = iter(substruct_fields)
+                advance_dest = False
+                continue
+
+            if s_item["field"] == "unused" or d_item["field"] == "unused":
+                if s_item["size"] == d_item["size"]:
+                    continue
+
+                if d_item["field"] == "unused":
+                    advance_dest = False
+                    unused_count = d_item["size"] - s_item["size"]
+                    continue
+
+                if s_item["field"] == "unused":
+                    advance_src = False
+                    unused_count = s_item["size"] - d_item["size"]
+                    continue
+
+            print("Section \"" + sec + "\",", end=' ')
+            print("Description \"" + desc + "\":", end=' ')
+            print("expected field \"" + s_item["field"] + "\",", end=' ')
+            print("got \"" + d_item["field"] + "\"; skipping rest")
+            bump_taint()
+            break
+
+        check_version(s_item, d_item, sec, desc)
+
+        if not "Description" in s_item:
+            # Check size of this field only if it's not a VMSTRUCT entry
+            check_size(s_item, d_item, sec, desc, s_item["field"])
+
+        check_description_in_list(s_item, d_item, sec, desc)
+
+
+def check_subsections(src_sub, dest_sub, desc, sec):
+    for s_item in src_sub:
+        found = False
+        for d_item in dest_sub:
+            if s_item["name"] != d_item["name"]:
+                continue
+
+            found = True
+            check_descriptions(s_item, d_item, sec)
+
+        if not found:
+            print("Section \"" + sec + "\", Description \"" + desc + "\":", end=' ')
+            print("Subsection \"" + s_item["name"] + "\" not found")
+            bump_taint()
+
+
+def check_description_in_list(s_item, d_item, sec, desc):
+    if not "Description" in s_item:
+        return
+
+    if not "Description" in d_item:
+        print("Section \"" + sec + "\", Description \"" + desc + "\",", end=' ')
+        print("Field \"" + s_item["field"] + "\": missing description")
+        bump_taint()
+        return
+
+    check_descriptions(s_item["Description"], d_item["Description"], sec)
+
+
+def check_descriptions(src_desc, dest_desc, sec):
+    check_version(src_desc, dest_desc, sec, src_desc["name"])
+
+    if not check_fields_match(sec, src_desc["name"], dest_desc["name"]):
+        print("Section \"" + sec + "\":", end=' ')
+        print("Description \"" + src_desc["name"] + "\"", end=' ')
+        print("missing, got \"" + dest_desc["name"] + "\" instead; skipping")
+        bump_taint()
+        return
+
+    for f in src_desc:
+        if not f in dest_desc:
+            print("Section \"" + sec + "\"", end=' ')
+            print("Description \"" + src_desc["name"] + "\":", end=' ')
+            print("Entry \"" + f + "\" missing")
+            bump_taint()
+            continue
+
+        if f == 'Fields':
+            check_fields(src_desc[f], dest_desc[f], src_desc["name"], sec)
+
+        if f == 'Subsections':
+            check_subsections(src_desc[f], dest_desc[f], src_desc["name"], sec)
+
+
+def check_version(s, d, sec, desc=None):
+    if s["version_id"] > d["version_id"]:
+        print("Section \"" + sec + "\"", end=' ')
+        if desc:
+            print("Description \"" + desc + "\":", end=' ')
+        print("version error:", s["version_id"], ">", d["version_id"])
+        bump_taint()
+
+    if not "minimum_version_id" in d:
+        return
+
+    if s["version_id"] < d["minimum_version_id"]:
+        print("Section \"" + sec + "\"", end=' ')
+        if desc:
+            print("Description \"" + desc + "\":", end=' ')
+            print("minimum version error:", s["version_id"], "<", end=' ')
+            print(d["minimum_version_id"])
+            bump_taint()
+
+
+def check_size(s, d, sec, desc=None, field=None):
+    if s["size"] != d["size"]:
+        print("Section \"" + sec + "\"", end=' ')
+        if desc:
+            print("Description \"" + desc + "\"", end=' ')
+        if field:
+            print("Field \"" + field + "\"", end=' ')
+        print("size mismatch:", s["size"], ",", d["size"])
+        bump_taint()
+
+
+def check_machine_type(s, d):
+    if s["Name"] != d["Name"]:
+        print("Warning: checking incompatible machine types:", end=' ')
+        print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"")
+    return
+
+
+def main():
+    help_text = "Parse JSON-formatted vmstate dumps from QEMU in files SRC and DEST.  Checks whether migration from SRC to DEST QEMU versions would break based on the VMSTATE information contained within the JSON outputs.  The JSON output is created from a QEMU invocation with the -dump-vmstate parameter and a filename argument to it.  Other parameters to QEMU do not matter, except the -M (machine type) parameter."
+
+    parser = argparse.ArgumentParser(description=help_text)
+    parser.add_argument('-s', '--src', type=argparse.FileType('r'),
+                        required=True,
+                        help='json dump from src qemu')
+    parser.add_argument('-d', '--dest', type=argparse.FileType('r'),
+                        required=True,
+                        help='json dump from dest qemu')
+    parser.add_argument('--reverse', required=False, default=False,
+                        action='store_true',
+                        help='reverse the direction')
+    args = parser.parse_args()
+
+    src_data = json.load(args.src)
+    dest_data = json.load(args.dest)
+    args.src.close()
+    args.dest.close()
+
+    if args.reverse:
+        temp = src_data
+        src_data = dest_data
+        dest_data = temp
+
+    for sec in src_data:
+        dest_sec = sec
+        if not dest_sec in dest_data:
+            # Either the section name got changed, or the section
+            # doesn't exist in dest.
+            dest_sec = get_changed_sec_name(sec)
+            if not dest_sec in dest_data:
+                print("Section \"" + sec + "\" does not exist in dest")
+                bump_taint()
+                continue
+
+        s = src_data[sec]
+        d = dest_data[dest_sec]
+
+        if sec == "vmschkmachine":
+            check_machine_type(s, d)
+            continue
+
+        check_version(s, d, sec)
+
+        for entry in s:
+            if not entry in d:
+                print("Section \"" + sec + "\": Entry \"" + entry + "\"", end=' ')
+                print("missing")
+                bump_taint()
+                continue
+
+            if entry == "Description":
+                check_descriptions(s[entry], d[entry], sec)
+
+    return taint
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scsi/meson.build b/scsi/meson.build
new file mode 100644
index 000000000..53f3a1f71
--- /dev/null
+++ b/scsi/meson.build
@@ -0,0 +1,4 @@
+block_ss.add(files('utils.c'))
+block_ss.add(when: 'CONFIG_LINUX',
+             if_true: files('pr-manager.c', 'pr-manager-helper.c'),
+             if_false: files('pr-manager-stub.c'))
diff --git a/scsi/pr-helper.h b/scsi/pr-helper.h
new file mode 100644
index 000000000..e26e104ec
--- /dev/null
+++ b/scsi/pr-helper.h
@@ -0,0 +1,40 @@
+/* Definitions for QEMU's persistent reservation helper daemon
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author:
+ *   Paolo Bonzini 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef QEMU_PR_HELPER_H
+#define QEMU_PR_HELPER_H
+
+#define PR_HELPER_CDB_SIZE     16
+#define PR_HELPER_SENSE_SIZE   96
+#define PR_HELPER_DATA_SIZE    8192
+
+typedef struct PRHelperResponse {
+    int32_t result;
+    int32_t sz;
+    uint8_t sense[PR_HELPER_SENSE_SIZE];
+} PRHelperResponse;
+
+#endif
diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
new file mode 100644
index 000000000..451c7631b
--- /dev/null
+++ b/scsi/pr-manager-helper.c
@@ -0,0 +1,328 @@
+/*
+ * Persistent reservation manager that talks to qemu-pr-helper
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This code is licensed under the LGPL v2.1 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "scsi/constants.h"
+#include "scsi/pr-manager.h"
+#include "scsi/utils.h"
+#include "io/channel.h"
+#include "io/channel-socket.h"
+#include "pr-helper.h"
+#include "qapi/qapi-events-block.h"
+#include "qemu/module.h"
+
+#include 
+#include "qom/object.h"
+
+#define PR_MAX_RECONNECT_ATTEMPTS 5
+
+#define TYPE_PR_MANAGER_HELPER "pr-manager-helper"
+
+OBJECT_DECLARE_SIMPLE_TYPE(PRManagerHelper, PR_MANAGER_HELPER)
+
+struct PRManagerHelper {
+    /*  */
+    PRManager parent;
+
+    char *path;
+
+    QemuMutex lock;
+    QIOChannel *ioc;
+};
+
+static void pr_manager_send_status_changed_event(PRManagerHelper *pr_mgr)
+{
+    const char *id = object_get_canonical_path_component(OBJECT(pr_mgr));
+
+    if (id) {
+        qapi_event_send_pr_manager_status_changed(id, !!pr_mgr->ioc);
+    }
+}
+
+/* Called with lock held.  */
+static int pr_manager_helper_read(PRManagerHelper *pr_mgr,
+                                  void *buf, int sz, Error **errp)
+{
+    ssize_t r = qio_channel_read_all(pr_mgr->ioc, buf, sz, errp);
+
+    if (r < 0) {
+        object_unref(OBJECT(pr_mgr->ioc));
+        pr_mgr->ioc = NULL;
+        pr_manager_send_status_changed_event(pr_mgr);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+/* Called with lock held.  */
+static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
+                                   int fd,
+                                   const void *buf, int sz, Error **errp)
+{
+    size_t nfds = (fd != -1);
+    while (sz > 0) {
+        struct iovec iov;
+        ssize_t n_written;
+
+        iov.iov_base = (void *)buf;
+        iov.iov_len = sz;
+        n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
+                                            nfds ? &fd : NULL, nfds, errp);
+
+        if (n_written <= 0) {
+            assert(n_written != QIO_CHANNEL_ERR_BLOCK);
+            object_unref(OBJECT(pr_mgr->ioc));
+            pr_mgr->ioc = NULL;
+            pr_manager_send_status_changed_event(pr_mgr);
+            return n_written < 0 ? -EINVAL : 0;
+        }
+
+        nfds = 0;
+        buf += n_written;
+        sz -= n_written;
+    }
+
+    return 0;
+}
+
+/* Called with lock held.  */
+static int pr_manager_helper_initialize(PRManagerHelper *pr_mgr,
+                                        Error **errp)
+{
+    char *path = g_strdup(pr_mgr->path);
+    SocketAddress saddr = {
+        .type = SOCKET_ADDRESS_TYPE_UNIX,
+        .u.q_unix.path = path
+    };
+    QIOChannelSocket *sioc = qio_channel_socket_new();
+    Error *local_err = NULL;
+
+    uint32_t flags;
+    int r;
+
+    assert(!pr_mgr->ioc);
+    qio_channel_set_name(QIO_CHANNEL(sioc), "pr-manager-helper");
+    qio_channel_socket_connect_sync(sioc,
+                                    &saddr,
+                                    &local_err);
+    g_free(path);
+    if (local_err) {
+        object_unref(OBJECT(sioc));
+        error_propagate(errp, local_err);
+        return -ENOTCONN;
+    }
+
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+    pr_mgr->ioc = QIO_CHANNEL(sioc);
+
+    /* A simple feature negotiation protocol, even though there is
+     * no optional feature right now.
+     */
+    r = pr_manager_helper_read(pr_mgr, &flags, sizeof(flags), errp);
+    if (r < 0) {
+        goto out_close;
+    }
+
+    flags = 0;
+    r = pr_manager_helper_write(pr_mgr, -1, &flags, sizeof(flags), errp);
+    if (r < 0) {
+        goto out_close;
+    }
+
+    pr_manager_send_status_changed_event(pr_mgr);
+    return 0;
+
+out_close:
+    object_unref(OBJECT(pr_mgr->ioc));
+    pr_mgr->ioc = NULL;
+    return r;
+}
+
+static int pr_manager_helper_run(PRManager *p,
+                                 int fd, struct sg_io_hdr *io_hdr)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(p);
+
+    uint32_t len;
+    PRHelperResponse resp;
+    int ret;
+    int expected_dir;
+    int attempts;
+    uint8_t cdb[PR_HELPER_CDB_SIZE] = { 0 };
+
+    if (!io_hdr->cmd_len || io_hdr->cmd_len > PR_HELPER_CDB_SIZE) {
+        return -EINVAL;
+    }
+
+    memcpy(cdb, io_hdr->cmdp, io_hdr->cmd_len);
+    assert(cdb[0] == PERSISTENT_RESERVE_OUT || cdb[0] == PERSISTENT_RESERVE_IN);
+    expected_dir =
+        (cdb[0] == PERSISTENT_RESERVE_OUT ? SG_DXFER_TO_DEV : SG_DXFER_FROM_DEV);
+    if (io_hdr->dxfer_direction != expected_dir) {
+        return -EINVAL;
+    }
+
+    len = scsi_cdb_xfer(cdb);
+    if (io_hdr->dxfer_len < len || len > PR_HELPER_DATA_SIZE) {
+        return -EINVAL;
+    }
+
+    qemu_mutex_lock(&pr_mgr->lock);
+
+    /* Try to reconnect while sending the CDB.  */
+    for (attempts = 0; attempts < PR_MAX_RECONNECT_ATTEMPTS; attempts++) {
+        if (!pr_mgr->ioc) {
+            ret = pr_manager_helper_initialize(pr_mgr, NULL);
+            if (ret < 0) {
+                qemu_mutex_unlock(&pr_mgr->lock);
+                g_usleep(G_USEC_PER_SEC);
+                qemu_mutex_lock(&pr_mgr->lock);
+                continue;
+            }
+        }
+
+        ret = pr_manager_helper_write(pr_mgr, fd, cdb, ARRAY_SIZE(cdb), NULL);
+        if (ret >= 0) {
+            break;
+        }
+    }
+    if (ret < 0) {
+        goto out;
+    }
+
+    /* After sending the CDB, any communications failure causes the
+     * command to fail.  The failure is transient, retrying the command
+     * will invoke pr_manager_helper_initialize again.
+     */
+    if (expected_dir == SG_DXFER_TO_DEV) {
+        io_hdr->resid = io_hdr->dxfer_len - len;
+        ret = pr_manager_helper_write(pr_mgr, -1, io_hdr->dxferp, len, NULL);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+    ret = pr_manager_helper_read(pr_mgr, &resp, sizeof(resp), NULL);
+    if (ret < 0) {
+        goto out;
+    }
+
+    resp.result = be32_to_cpu(resp.result);
+    resp.sz = be32_to_cpu(resp.sz);
+    if (io_hdr->dxfer_direction == SG_DXFER_FROM_DEV) {
+        assert(resp.sz <= io_hdr->dxfer_len);
+        ret = pr_manager_helper_read(pr_mgr, io_hdr->dxferp, resp.sz, NULL);
+        if (ret < 0) {
+            goto out;
+        }
+        io_hdr->resid = io_hdr->dxfer_len - resp.sz;
+    } else {
+        assert(resp.sz == 0);
+    }
+
+    io_hdr->status = resp.result;
+    if (resp.result == CHECK_CONDITION) {
+        io_hdr->driver_status = SG_ERR_DRIVER_SENSE;
+        io_hdr->sb_len_wr = MIN(io_hdr->mx_sb_len, PR_HELPER_SENSE_SIZE);
+        memcpy(io_hdr->sbp, resp.sense, io_hdr->sb_len_wr);
+    }
+
+out:
+    if (ret < 0) {
+        int sense_len = scsi_build_sense(io_hdr->sbp,
+                                         SENSE_CODE(LUN_COMM_FAILURE));
+        io_hdr->driver_status = SG_ERR_DRIVER_SENSE;
+        io_hdr->sb_len_wr = MIN(io_hdr->mx_sb_len, sense_len);
+        io_hdr->status = CHECK_CONDITION;
+    }
+    qemu_mutex_unlock(&pr_mgr->lock);
+    return ret;
+}
+
+static bool pr_manager_helper_is_connected(PRManager *p)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(p);
+    bool result;
+
+    qemu_mutex_lock(&pr_mgr->lock);
+    result = (pr_mgr->ioc != NULL);
+    qemu_mutex_unlock(&pr_mgr->lock);
+
+    return result;
+}
+
+static void pr_manager_helper_complete(UserCreatable *uc, Error **errp)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(uc);
+
+    qemu_mutex_lock(&pr_mgr->lock);
+    pr_manager_helper_initialize(pr_mgr, errp);
+    qemu_mutex_unlock(&pr_mgr->lock);
+}
+
+static char *get_path(Object *obj, Error **errp)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+    return g_strdup(pr_mgr->path);
+}
+
+static void set_path(Object *obj, const char *str, Error **errp)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+    g_free(pr_mgr->path);
+    pr_mgr->path = g_strdup(str);
+}
+
+static void pr_manager_helper_instance_finalize(Object *obj)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+    object_unref(OBJECT(pr_mgr->ioc));
+    qemu_mutex_destroy(&pr_mgr->lock);
+}
+
+static void pr_manager_helper_instance_init(Object *obj)
+{
+    PRManagerHelper *pr_mgr = PR_MANAGER_HELPER(obj);
+
+    qemu_mutex_init(&pr_mgr->lock);
+}
+
+static void pr_manager_helper_class_init(ObjectClass *klass,
+                                         void *class_data G_GNUC_UNUSED)
+{
+    PRManagerClass *prmgr_klass = PR_MANAGER_CLASS(klass);
+    UserCreatableClass *uc_klass = USER_CREATABLE_CLASS(klass);
+
+    object_class_property_add_str(klass, "path", get_path, set_path);
+    uc_klass->complete = pr_manager_helper_complete;
+    prmgr_klass->run = pr_manager_helper_run;
+    prmgr_klass->is_connected = pr_manager_helper_is_connected;
+}
+
+static const TypeInfo pr_manager_helper_info = {
+    .parent = TYPE_PR_MANAGER,
+    .name = TYPE_PR_MANAGER_HELPER,
+    .instance_size = sizeof(PRManagerHelper),
+    .instance_init = pr_manager_helper_instance_init,
+    .instance_finalize = pr_manager_helper_instance_finalize,
+    .class_init = pr_manager_helper_class_init,
+};
+
+static void pr_manager_helper_register_types(void)
+{
+    type_register_static(&pr_manager_helper_info);
+}
+
+type_init(pr_manager_helper_register_types);
diff --git a/scsi/pr-manager-stub.c b/scsi/pr-manager-stub.c
new file mode 100644
index 000000000..738b6d742
--- /dev/null
+++ b/scsi/pr-manager-stub.c
@@ -0,0 +1,30 @@
+/*
+ * Persistent reservation manager - stub for non-Linux platforms
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This code is licensed under the LGPL.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "scsi/pr-manager.h"
+#include "trace.h"
+#include "qapi/qapi-types-block.h"
+#include "qapi/qapi-commands-block.h"
+
+PRManager *pr_manager_lookup(const char *id, Error **errp)
+{
+    /* The classes do not exist at all!  */
+    error_setg(errp, "No persistent reservation manager with id '%s'", id);
+        return NULL;
+}
+
+
+PRManagerInfoList *qmp_query_pr_managers(Error **errp)
+{
+    return NULL;
+}
diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c
new file mode 100644
index 000000000..32b9287e6
--- /dev/null
+++ b/scsi/pr-manager.c
@@ -0,0 +1,151 @@
+/*
+ * Persistent reservation manager abstract class
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This code is licensed under the LGPL.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include "qapi/error.h"
+#include "block/aio.h"
+#include "block/thread-pool.h"
+#include "scsi/pr-manager.h"
+#include "trace.h"
+#include "qapi/qapi-types-block.h"
+#include "qemu/module.h"
+#include "qapi/qapi-commands-block.h"
+
+#define PR_MANAGER_PATH     "/objects"
+
+typedef struct PRManagerData {
+    PRManager *pr_mgr;
+    struct sg_io_hdr *hdr;
+    int fd;
+} PRManagerData;
+
+static int pr_manager_worker(void *opaque)
+{
+    PRManagerData *data = opaque;
+    PRManager *pr_mgr = data->pr_mgr;
+    PRManagerClass *pr_mgr_class =
+        PR_MANAGER_GET_CLASS(pr_mgr);
+    struct sg_io_hdr *hdr = data->hdr;
+    int fd = data->fd;
+    int r;
+
+    trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);
+
+    /* The reference was taken in pr_manager_execute.  */
+    r = pr_mgr_class->run(pr_mgr, fd, hdr);
+    object_unref(OBJECT(pr_mgr));
+    return r;
+}
+
+
+int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
+                                    struct sg_io_hdr *hdr)
+{
+    ThreadPool *pool = aio_get_thread_pool(ctx);
+    PRManagerData data = {
+        .pr_mgr = pr_mgr,
+        .fd     = fd,
+        .hdr    = hdr,
+    };
+
+    trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1]);
+
+    /* The matching object_unref is in pr_manager_worker.  */
+    object_ref(OBJECT(pr_mgr));
+    return thread_pool_submit_co(pool, pr_manager_worker, &data);
+}
+
+bool pr_manager_is_connected(PRManager *pr_mgr)
+{
+    PRManagerClass *pr_mgr_class =
+        PR_MANAGER_GET_CLASS(pr_mgr);
+
+    return !pr_mgr_class->is_connected || pr_mgr_class->is_connected(pr_mgr);
+}
+
+static const TypeInfo pr_manager_info = {
+    .parent = TYPE_OBJECT,
+    .name = TYPE_PR_MANAGER,
+    .class_size = sizeof(PRManagerClass),
+    .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+PRManager *pr_manager_lookup(const char *id, Error **errp)
+{
+    Object *obj;
+    PRManager *pr_mgr;
+
+    obj = object_resolve_path_component(object_get_objects_root(), id);
+    if (!obj) {
+        error_setg(errp, "No persistent reservation manager with id '%s'", id);
+        return NULL;
+    }
+
+    pr_mgr = (PRManager *)
+        object_dynamic_cast(obj,
+                            TYPE_PR_MANAGER);
+    if (!pr_mgr) {
+        error_setg(errp,
+                   "Object with id '%s' is not a persistent reservation manager",
+                   id);
+        return NULL;
+    }
+
+    return pr_mgr;
+}
+
+static void
+pr_manager_register_types(void)
+{
+    type_register_static(&pr_manager_info);
+}
+
+static int query_one_pr_manager(Object *object, void *opaque)
+{
+    PRManagerInfoList ***prev = opaque;
+    PRManagerInfoList *elem;
+    PRManagerInfo *info;
+    PRManager *pr_mgr;
+
+    pr_mgr = (PRManager *)object_dynamic_cast(object, TYPE_PR_MANAGER);
+    if (!pr_mgr) {
+        return 0;
+    }
+
+    elem = g_new0(PRManagerInfoList, 1);
+    info = g_new0(PRManagerInfo, 1);
+    info->id = g_strdup(object_get_canonical_path_component(object));
+    info->connected = pr_manager_is_connected(pr_mgr);
+    elem->value = info;
+    elem->next = NULL;
+
+    **prev = elem;
+    *prev = &elem->next;
+    return 0;
+}
+
+PRManagerInfoList *qmp_query_pr_managers(Error **errp)
+{
+    PRManagerInfoList *head = NULL;
+    PRManagerInfoList **prev = &head;
+    Object *container = container_get(object_get_root(), PR_MANAGER_PATH);
+
+    object_child_foreach(container, query_one_pr_manager, &prev);
+    return head;
+}
+
+type_init(pr_manager_register_types);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
new file mode 100644
index 000000000..2733d92f2
--- /dev/null
+++ b/scsi/qemu-pr-helper.c
@@ -0,0 +1,1077 @@
+/*
+ * Privileged helper to handle persistent reservation commands for QEMU
+ *
+ * Copyright (C) 2017 Red Hat, Inc. 
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_LIBCAP_NG
+#include 
+#endif
+#include 
+#include 
+
+#ifdef CONFIG_MPATH
+#include 
+#include 
+#include 
+#endif
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/bswap.h"
+#include "qemu/log.h"
+#include "qemu/systemd.h"
+#include "qapi/util.h"
+#include "qapi/qmp/qstring.h"
+#include "io/channel-socket.h"
+#include "trace/control.h"
+#include "qemu-version.h"
+
+#include "block/aio.h"
+#include "block/thread-pool.h"
+
+#include "scsi/constants.h"
+#include "scsi/utils.h"
+#include "pr-helper.h"
+
+#define PR_OUT_FIXED_PARAM_SIZE 24
+
+static char *socket_path;
+static char *pidfile;
+static enum { RUNNING, TERMINATE, TERMINATING } state;
+static QIOChannelSocket *server_ioc;
+static int server_watch;
+static int num_active_sockets = 1;
+static int noisy;
+static int verbose;
+
+#ifdef CONFIG_LIBCAP_NG
+static int uid = -1;
+static int gid = -1;
+#endif
+
+static void compute_default_paths(void)
+{
+    socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
+    pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
+}
+
+static void usage(const char *name)
+{
+    (printf) (
+"Usage: %s [OPTIONS] FILE\n"
+"Persistent Reservation helper program for QEMU\n"
+"\n"
+"  -h, --help                display this help and exit\n"
+"  -V, --version             output version information and exit\n"
+"\n"
+"  -d, --daemon              run in the background\n"
+"  -f, --pidfile=PATH        PID file when running as a daemon\n"
+"                            (default '%s')\n"
+"  -k, --socket=PATH         path to the unix socket\n"
+"                            (default '%s')\n"
+"  -T, --trace [[enable=]][,events=][,file=]\n"
+"                            specify tracing options\n"
+#ifdef CONFIG_LIBCAP_NG
+"  -u, --user=USER           user to drop privileges to\n"
+"  -g, --group=GROUP         group to drop privileges to\n"
+#endif
+"\n"
+QEMU_HELP_BOTTOM "\n"
+    , name, pidfile, socket_path);
+}
+
+static void version(const char *name)
+{
+    printf(
+"%s " QEMU_FULL_VERSION "\n"
+"Written by Paolo Bonzini.\n"
+"\n"
+QEMU_COPYRIGHT "\n"
+"This is free software; see the source for copying conditions.  There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+    , name);
+}
+
+/* SG_IO support */
+
+typedef struct PRHelperSGIOData {
+    int fd;
+    const uint8_t *cdb;
+    uint8_t *sense;
+    uint8_t *buf;
+    int sz;              /* input/output */
+    int dir;
+} PRHelperSGIOData;
+
+static int do_sgio_worker(void *opaque)
+{
+    PRHelperSGIOData *data = opaque;
+    struct sg_io_hdr io_hdr;
+    int ret;
+    int status;
+    SCSISense sense_code;
+
+    memset(data->sense, 0, PR_HELPER_SENSE_SIZE);
+    memset(&io_hdr, 0, sizeof(io_hdr));
+    io_hdr.interface_id = 'S';
+    io_hdr.cmd_len = PR_HELPER_CDB_SIZE;
+    io_hdr.cmdp = (uint8_t *)data->cdb;
+    io_hdr.sbp = data->sense;
+    io_hdr.mx_sb_len = PR_HELPER_SENSE_SIZE;
+    io_hdr.timeout = 1;
+    io_hdr.dxfer_direction = data->dir;
+    io_hdr.dxferp = (char *)data->buf;
+    io_hdr.dxfer_len = data->sz;
+    ret = ioctl(data->fd, SG_IO, &io_hdr);
+    status = sg_io_sense_from_errno(ret < 0 ? errno : 0, &io_hdr,
+                                    &sense_code);
+    if (status == GOOD) {
+        data->sz -= io_hdr.resid;
+    } else {
+        data->sz = 0;
+    }
+
+    if (status == CHECK_CONDITION &&
+        !(io_hdr.driver_status & SG_ERR_DRIVER_SENSE)) {
+        scsi_build_sense(data->sense, sense_code);
+    }
+
+    return status;
+}
+
+static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
+                    uint8_t *buf, int *sz, int dir)
+{
+    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+    int r;
+
+    PRHelperSGIOData data = {
+        .fd = fd,
+        .cdb = cdb,
+        .sense = sense,
+        .buf = buf,
+        .sz = *sz,
+        .dir = dir,
+    };
+
+    r = thread_pool_submit_co(pool, do_sgio_worker, &data);
+    *sz = data.sz;
+    return r;
+}
+
+/* Device mapper interface */
+
+#ifdef CONFIG_MPATH
+#define CONTROL_PATH "/dev/mapper/control"
+
+typedef struct DMData {
+    struct dm_ioctl dm;
+    uint8_t data[1024];
+} DMData;
+
+static int control_fd;
+
+static void *dm_ioctl(int ioc, struct dm_ioctl *dm)
+{
+    static DMData d;
+    memcpy(&d.dm, dm, sizeof(d.dm));
+    QEMU_BUILD_BUG_ON(sizeof(d.data) < sizeof(struct dm_target_spec));
+
+    d.dm.version[0] = DM_VERSION_MAJOR;
+    d.dm.version[1] = 0;
+    d.dm.version[2] = 0;
+    d.dm.data_size = 1024;
+    d.dm.data_start = offsetof(DMData, data);
+    if (ioctl(control_fd, ioc, &d) < 0) {
+        return NULL;
+    }
+    memcpy(dm, &d.dm, sizeof(d.dm));
+    return &d.data;
+}
+
+static void *dm_dev_ioctl(int fd, int ioc, struct dm_ioctl *dm)
+{
+    struct stat st;
+    int r;
+
+    r = fstat(fd, &st);
+    if (r < 0) {
+        perror("fstat");
+        exit(1);
+    }
+
+    dm->dev = st.st_rdev;
+    return dm_ioctl(ioc, dm);
+}
+
+static void dm_init(void)
+{
+    control_fd = open(CONTROL_PATH, O_RDWR);
+    if (control_fd < 0) {
+        perror("Cannot open " CONTROL_PATH);
+        exit(1);
+    }
+    struct dm_ioctl dm = { };
+    if (!dm_ioctl(DM_VERSION, &dm)) {
+        perror("ioctl");
+        exit(1);
+    }
+    if (dm.version[0] != DM_VERSION_MAJOR) {
+        fprintf(stderr, "Unsupported device mapper interface");
+        exit(1);
+    }
+}
+
+/* Variables required by libmultipath and libmpathpersist.  */
+QEMU_BUILD_BUG_ON(PR_HELPER_DATA_SIZE > MPATH_MAX_PARAM_LEN);
+static struct config *multipath_conf;
+unsigned mpath_mx_alloc_len = PR_HELPER_DATA_SIZE;
+int logsink;
+struct udev *udev;
+
+extern struct config *get_multipath_config(void);
+struct config *get_multipath_config(void)
+{
+    return multipath_conf;
+}
+
+extern void put_multipath_config(struct config *conf);
+void put_multipath_config(struct config *conf)
+{
+}
+
+static void multipath_pr_init(void)
+{
+    udev = udev_new();
+#ifdef CONFIG_MPATH_NEW_API
+    multipath_conf = mpath_lib_init();
+#else
+    mpath_lib_init(udev);
+#endif
+}
+
+static int is_mpath(int fd)
+{
+    struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG };
+    struct dm_target_spec *tgt;
+
+    tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm);
+    if (!tgt) {
+        if (errno == ENXIO) {
+            return 0;
+        }
+        perror("ioctl");
+        exit(EXIT_FAILURE);
+    }
+    return !strncmp(tgt->target_type, "multipath", DM_MAX_TYPE_NAME);
+}
+
+static SCSISense mpath_generic_sense(int r)
+{
+    switch (r) {
+    case MPATH_PR_SENSE_NOT_READY:
+         return SENSE_CODE(NOT_READY);
+    case MPATH_PR_SENSE_MEDIUM_ERROR:
+         return SENSE_CODE(READ_ERROR);
+    case MPATH_PR_SENSE_HARDWARE_ERROR:
+         return SENSE_CODE(TARGET_FAILURE);
+    case MPATH_PR_SENSE_ABORTED_COMMAND:
+         return SENSE_CODE(IO_ERROR);
+    default:
+         abort();
+    }
+}
+
+static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
+{
+    switch (r) {
+    case MPATH_PR_SUCCESS:
+        return GOOD;
+    case MPATH_PR_SENSE_NOT_READY:
+    case MPATH_PR_SENSE_MEDIUM_ERROR:
+    case MPATH_PR_SENSE_HARDWARE_ERROR:
+    case MPATH_PR_SENSE_ABORTED_COMMAND:
+        {
+            /* libmpathpersist ate the exact sense.  Try to find it by
+             * issuing TEST UNIT READY.
+             */
+            uint8_t cdb[6] = { TEST_UNIT_READY };
+            int sz = 0;
+            int ret = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE);
+
+            if (ret != GOOD) {
+                return ret;
+            }
+            scsi_build_sense(sense, mpath_generic_sense(r));
+            return CHECK_CONDITION;
+        }
+
+    case MPATH_PR_SENSE_UNIT_ATTENTION:
+        /* Congratulations libmpathpersist, you ruined the Unit Attention...
+         * Return a heavyweight one.
+         */
+        scsi_build_sense(sense, SENSE_CODE(SCSI_BUS_RESET));
+        return CHECK_CONDITION;
+    case MPATH_PR_SENSE_INVALID_OP:
+        /* Only one valid sense.  */
+        scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
+        return CHECK_CONDITION;
+    case MPATH_PR_ILLEGAL_REQ:
+        /* Guess.  */
+        scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+        return CHECK_CONDITION;
+    case MPATH_PR_NO_SENSE:
+        scsi_build_sense(sense, SENSE_CODE(NO_SENSE));
+        return CHECK_CONDITION;
+
+    case MPATH_PR_RESERV_CONFLICT:
+        return RESERVATION_CONFLICT;
+
+    case MPATH_PR_OTHER:
+    default:
+        scsi_build_sense(sense, SENSE_CODE(LUN_COMM_FAILURE));
+        return CHECK_CONDITION;
+    }
+}
+
+static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+                           uint8_t *data, int sz)
+{
+    int rq_servact = cdb[1];
+    struct prin_resp resp;
+    size_t written;
+    int r;
+
+    switch (rq_servact) {
+    case MPATH_PRIN_RKEY_SA:
+    case MPATH_PRIN_RRES_SA:
+    case MPATH_PRIN_RCAP_SA:
+        break;
+    case MPATH_PRIN_RFSTAT_SA:
+        /* Nobody implements it anyway, so bail out. */
+    default:
+        /* Cannot parse any other output.  */
+        scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
+        return CHECK_CONDITION;
+    }
+
+    r = mpath_persistent_reserve_in(fd, rq_servact, &resp, noisy, verbose);
+    if (r == MPATH_PR_SUCCESS) {
+        switch (rq_servact) {
+        case MPATH_PRIN_RKEY_SA:
+        case MPATH_PRIN_RRES_SA: {
+            struct prin_readdescr *out = &resp.prin_descriptor.prin_readkeys;
+            assert(sz >= 8);
+            written = MIN(out->additional_length + 8, sz);
+            stl_be_p(&data[0], out->prgeneration);
+            stl_be_p(&data[4], out->additional_length);
+            memcpy(&data[8], out->key_list, written - 8);
+            break;
+        }
+        case MPATH_PRIN_RCAP_SA: {
+            struct prin_capdescr *out = &resp.prin_descriptor.prin_readcap;
+            assert(sz >= 6);
+            written = 6;
+            stw_be_p(&data[0], out->length);
+            data[2] = out->flags[0];
+            data[3] = out->flags[1];
+            stw_be_p(&data[4], out->pr_type_mask);
+            break;
+        }
+        default:
+            scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
+            return CHECK_CONDITION;
+        }
+        assert(written <= sz);
+        memset(data + written, 0, sz - written);
+    }
+
+    return mpath_reconstruct_sense(fd, r, sense);
+}
+
+static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+                            const uint8_t *param, int sz)
+{
+    int rq_servact = cdb[1];
+    int rq_scope = cdb[2] >> 4;
+    int rq_type = cdb[2] & 0xf;
+    g_autofree struct prout_param_descriptor *paramp = NULL;
+    char transportids[PR_HELPER_DATA_SIZE];
+    int r;
+
+    paramp = g_malloc0(sizeof(struct prout_param_descriptor)
+                       + sizeof(struct transportid *) * MPATH_MX_TIDS);
+
+    if (sz < PR_OUT_FIXED_PARAM_SIZE) {
+        /* Illegal request, Parameter list length error.  This isn't fatal;
+         * we have read the data, send an error without closing the socket.
+         */
+        scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
+        return CHECK_CONDITION;
+    }
+
+    switch (rq_servact) {
+    case MPATH_PROUT_REG_SA:
+    case MPATH_PROUT_RES_SA:
+    case MPATH_PROUT_REL_SA:
+    case MPATH_PROUT_CLEAR_SA:
+    case MPATH_PROUT_PREE_SA:
+    case MPATH_PROUT_PREE_AB_SA:
+    case MPATH_PROUT_REG_IGN_SA:
+        break;
+    case MPATH_PROUT_REG_MOV_SA:
+        /* Not supported by struct prout_param_descriptor.  */
+    default:
+        /* Cannot parse any other input.  */
+        scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
+        return CHECK_CONDITION;
+    }
+
+    /* Convert input data, especially transport IDs, to the structs
+     * used by libmpathpersist (which, of course, will immediately
+     * do the opposite).
+     */
+    memcpy(¶mp->key, ¶m[0], 8);
+    memcpy(¶mp->sa_key, ¶m[8], 8);
+    paramp->sa_flags = param[20];
+    if (sz > PR_OUT_FIXED_PARAM_SIZE) {
+        size_t transportid_len;
+        int i, j;
+        if (sz < PR_OUT_FIXED_PARAM_SIZE + 4) {
+            scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
+            return CHECK_CONDITION;
+        }
+        transportid_len = ldl_be_p(¶m[24]) + PR_OUT_FIXED_PARAM_SIZE + 4;
+        if (transportid_len > sz) {
+            scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+            return CHECK_CONDITION;
+        }
+        for (i = PR_OUT_FIXED_PARAM_SIZE + 4, j = 0; i < transportid_len; ) {
+            struct transportid *id = (struct transportid *) &transportids[j];
+            int len;
+
+            id->format_code = param[i] & 0xc0;
+            id->protocol_id = param[i] & 0x0f;
+            switch (param[i] & 0xcf) {
+            case 0:
+                /* FC transport.  */
+                if (i + 24 > transportid_len) {
+                    goto illegal_req;
+                }
+                memcpy(id->n_port_name, ¶m[i + 8], 8);
+                j += offsetof(struct transportid, n_port_name[8]);
+                i += 24;
+                break;
+            case 5:
+            case 0x45:
+                /* iSCSI transport.  */
+                len = lduw_be_p(¶m[i + 2]);
+                if (len > 252 || (len & 3) || i + len + 4 > transportid_len) {
+                    /* For format code 00, the standard says the maximum is 223
+                     * plus the NUL terminator.  For format code 01 there is no
+                     * maximum length, but libmpathpersist ignores the first
+                     * byte of id->iscsi_name so our maximum is 252.
+                     */
+                    goto illegal_req;
+                }
+                if (memchr(¶m[i + 4], 0, len) == NULL) {
+                    goto illegal_req;
+                }
+                memcpy(id->iscsi_name, ¶m[i + 2], len + 2);
+                j += offsetof(struct transportid, iscsi_name[len + 2]);
+                i += len + 4;
+                break;
+            case 6:
+                /* SAS transport.  */
+                if (i + 24 > transportid_len) {
+                    goto illegal_req;
+                }
+                memcpy(id->sas_address, ¶m[i + 4], 8);
+                j += offsetof(struct transportid, sas_address[8]);
+                i += 24;
+                break;
+            default:
+            illegal_req:
+                scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
+                return CHECK_CONDITION;
+            }
+
+            assert(paramp->num_transportid < MPATH_MX_TIDS);
+            paramp->trnptid_list[paramp->num_transportid++] = id;
+        }
+    }
+
+    r = mpath_persistent_reserve_out(fd, rq_servact, rq_scope, rq_type,
+                                     paramp, noisy, verbose);
+    return mpath_reconstruct_sense(fd, r, sense);
+}
+#endif
+
+static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+                    uint8_t *data, int *resp_sz)
+{
+#ifdef CONFIG_MPATH
+    if (is_mpath(fd)) {
+        /* multipath_pr_in fills the whole input buffer.  */
+        int r = multipath_pr_in(fd, cdb, sense, data, *resp_sz);
+        if (r != GOOD) {
+            *resp_sz = 0;
+        }
+        return r;
+    }
+#endif
+
+    return do_sgio(fd, cdb, sense, data, resp_sz,
+                   SG_DXFER_FROM_DEV);
+}
+
+static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+                     const uint8_t *param, int sz)
+{
+    int resp_sz;
+
+    if ((fcntl(fd, F_GETFL) & O_ACCMODE) == O_RDONLY) {
+        scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
+        return CHECK_CONDITION;
+    }
+
+#ifdef CONFIG_MPATH
+    if (is_mpath(fd)) {
+        return multipath_pr_out(fd, cdb, sense, param, sz);
+    }
+#endif
+
+    resp_sz = sz;
+    return do_sgio(fd, cdb, sense, (uint8_t *)param, &resp_sz,
+                   SG_DXFER_TO_DEV);
+}
+
+/* Client */
+
+typedef struct PRHelperClient {
+    QIOChannelSocket *ioc;
+    Coroutine *co;
+    int fd;
+    uint8_t data[PR_HELPER_DATA_SIZE];
+} PRHelperClient;
+
+typedef struct PRHelperRequest {
+    int fd;
+    size_t sz;
+    uint8_t cdb[PR_HELPER_CDB_SIZE];
+} PRHelperRequest;
+
+static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
+                                 Error **errp)
+{
+    int ret = 0;
+
+    while (sz > 0) {
+        int *fds = NULL;
+        size_t nfds = 0;
+        int i;
+        struct iovec iov;
+        ssize_t n_read;
+
+        iov.iov_base = buf;
+        iov.iov_len = sz;
+        n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
+                                        &fds, &nfds, errp);
+
+        if (n_read == QIO_CHANNEL_ERR_BLOCK) {
+            qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
+            continue;
+        }
+        if (n_read <= 0) {
+            ret = n_read ? n_read : -1;
+            goto err;
+        }
+
+        /* Stash one file descriptor per request.  */
+        if (nfds) {
+            bool too_many = false;
+            for (i = 0; i < nfds; i++) {
+                if (client->fd == -1) {
+                    client->fd = fds[i];
+                } else {
+                    close(fds[i]);
+                    too_many = true;
+                }
+            }
+            g_free(fds);
+            if (too_many) {
+                ret = -1;
+                goto err;
+            }
+        }
+
+        buf += n_read;
+        sz -= n_read;
+    }
+
+    return 0;
+
+err:
+    if (client->fd != -1) {
+        close(client->fd);
+        client->fd = -1;
+    }
+    return ret;
+}
+
+static int coroutine_fn prh_read_request(PRHelperClient *client,
+                                         PRHelperRequest *req,
+                                         PRHelperResponse *resp, Error **errp)
+{
+    uint32_t sz;
+
+    if (prh_read(client, req->cdb, sizeof(req->cdb), NULL) < 0) {
+        return -1;
+    }
+
+    if (client->fd == -1) {
+        error_setg(errp, "No file descriptor in request.");
+        return -1;
+    }
+
+    if (req->cdb[0] != PERSISTENT_RESERVE_OUT &&
+        req->cdb[0] != PERSISTENT_RESERVE_IN) {
+        error_setg(errp, "Invalid CDB, closing socket.");
+        goto out_close;
+    }
+
+    sz = scsi_cdb_xfer(req->cdb);
+    if (sz > sizeof(client->data)) {
+        goto out_close;
+    }
+
+    if (req->cdb[0] == PERSISTENT_RESERVE_OUT) {
+        if (qio_channel_read_all(QIO_CHANNEL(client->ioc),
+                                 (char *)client->data, sz,
+                                 errp) < 0) {
+            goto out_close;
+        }
+    }
+
+    req->fd = client->fd;
+    req->sz = sz;
+    client->fd = -1;
+    return sz;
+
+out_close:
+    close(client->fd);
+    client->fd = -1;
+    return -1;
+}
+
+static int coroutine_fn prh_write_response(PRHelperClient *client,
+                                           PRHelperRequest *req,
+                                           PRHelperResponse *resp, Error **errp)
+{
+    ssize_t r;
+    size_t sz;
+
+    if (req->cdb[0] == PERSISTENT_RESERVE_IN && resp->result == GOOD) {
+        assert(resp->sz <= req->sz && resp->sz <= sizeof(client->data));
+    } else {
+        assert(resp->sz == 0);
+    }
+
+    sz = resp->sz;
+
+    resp->result = cpu_to_be32(resp->result);
+    resp->sz = cpu_to_be32(resp->sz);
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+                              (char *) resp, sizeof(*resp), errp);
+    if (r < 0) {
+        return r;
+    }
+
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+                              (char *) client->data,
+                              sz, errp);
+    return r < 0 ? r : 0;
+}
+
+static void coroutine_fn prh_co_entry(void *opaque)
+{
+    PRHelperClient *client = opaque;
+    Error *local_err = NULL;
+    uint32_t flags;
+    int r;
+
+    qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
+                             false, NULL);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
+                                   qemu_get_aio_context());
+
+    /* A very simple negotiation for future extensibility.  No features
+     * are defined so write 0.
+     */
+    flags = cpu_to_be32(0);
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
+                             (char *) &flags, sizeof(flags), NULL);
+    if (r < 0) {
+        goto out;
+    }
+
+    r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
+                             (char *) &flags, sizeof(flags), NULL);
+    if (be32_to_cpu(flags) != 0 || r < 0) {
+        goto out;
+    }
+
+    while (qatomic_read(&state) == RUNNING) {
+        PRHelperRequest req;
+        PRHelperResponse resp;
+        int sz;
+
+        sz = prh_read_request(client, &req, &resp, &local_err);
+        if (sz < 0) {
+            break;
+        }
+
+        num_active_sockets++;
+        if (req.cdb[0] == PERSISTENT_RESERVE_OUT) {
+            r = do_pr_out(req.fd, req.cdb, resp.sense,
+                          client->data, sz);
+            resp.sz = 0;
+        } else {
+            resp.sz = sizeof(client->data);
+            r = do_pr_in(req.fd, req.cdb, resp.sense,
+                         client->data, &resp.sz);
+            resp.sz = MIN(resp.sz, sz);
+        }
+        num_active_sockets--;
+        close(req.fd);
+        if (r == -1) {
+            break;
+        }
+        resp.result = r;
+
+        if (prh_write_response(client, &req, &resp, &local_err) < 0) {
+            break;
+        }
+    }
+
+    if (local_err) {
+        if (verbose == 0) {
+            error_free(local_err);
+        } else {
+            error_report_err(local_err);
+        }
+    }
+
+out:
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
+    object_unref(OBJECT(client->ioc));
+    g_free(client);
+}
+
+static gboolean accept_client(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
+{
+    QIOChannelSocket *cioc;
+    PRHelperClient *prh;
+
+    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
+                                     NULL);
+    if (!cioc) {
+        return TRUE;
+    }
+
+    prh = g_new(PRHelperClient, 1);
+    prh->ioc = cioc;
+    prh->fd = -1;
+    prh->co = qemu_coroutine_create(prh_co_entry, prh);
+    qemu_coroutine_enter(prh->co);
+
+    return TRUE;
+}
+
+static void termsig_handler(int signum)
+{
+    qatomic_cmpxchg(&state, RUNNING, TERMINATE);
+    qemu_notify_event();
+}
+
+static void close_server_socket(void)
+{
+    assert(server_ioc);
+
+    g_source_remove(server_watch);
+    server_watch = -1;
+    object_unref(OBJECT(server_ioc));
+    num_active_sockets--;
+}
+
+#ifdef CONFIG_LIBCAP_NG
+static int drop_privileges(void)
+{
+    /* clear all capabilities */
+    capng_clear(CAPNG_SELECT_BOTH);
+
+    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
+                     CAP_SYS_RAWIO) < 0) {
+        return -1;
+    }
+
+#ifdef CONFIG_MPATH
+    /* For /dev/mapper/control ioctls */
+    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
+                     CAP_SYS_ADMIN) < 0) {
+        return -1;
+    }
+#endif
+
+    /* Change user/group id, retaining the capabilities.  Because file descriptors
+     * are passed via SCM_RIGHTS, we don't need supplementary groups (and in
+     * fact the helper can run as "nobody").
+     */
+    if (capng_change_id(uid != -1 ? uid : getuid(),
+                        gid != -1 ? gid : getgid(),
+                        CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)) {
+        return -1;
+    }
+
+    return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    const char *sopt = "hVk:f:dT:u:g:vq";
+    struct option lopt[] = {
+        { "help", no_argument, NULL, 'h' },
+        { "version", no_argument, NULL, 'V' },
+        { "socket", required_argument, NULL, 'k' },
+        { "pidfile", required_argument, NULL, 'f' },
+        { "daemon", no_argument, NULL, 'd' },
+        { "trace", required_argument, NULL, 'T' },
+        { "user", required_argument, NULL, 'u' },
+        { "group", required_argument, NULL, 'g' },
+        { "verbose", no_argument, NULL, 'v' },
+        { "quiet", no_argument, NULL, 'q' },
+        { NULL, 0, NULL, 0 }
+    };
+    int opt_ind = 0;
+    int loglevel = 1;
+    int quiet = 0;
+    int ch;
+    Error *local_err = NULL;
+    bool daemonize = false;
+    bool pidfile_specified = false;
+    bool socket_path_specified = false;
+    unsigned socket_activation;
+
+    struct sigaction sa_sigterm;
+    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
+    sa_sigterm.sa_handler = termsig_handler;
+    sigaction(SIGTERM, &sa_sigterm, NULL);
+    sigaction(SIGINT, &sa_sigterm, NULL);
+    sigaction(SIGHUP, &sa_sigterm, NULL);
+
+    signal(SIGPIPE, SIG_IGN);
+
+    error_init(argv[0]);
+    module_call_init(MODULE_INIT_TRACE);
+    module_call_init(MODULE_INIT_QOM);
+    qemu_add_opts(&qemu_trace_opts);
+    qemu_init_exec_dir(argv[0]);
+
+    compute_default_paths();
+
+    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+        switch (ch) {
+        case 'k':
+            g_free(socket_path);
+            socket_path = g_strdup(optarg);
+            socket_path_specified = true;
+            if (socket_path[0] != '/') {
+                error_report("socket path must be absolute");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'f':
+            g_free(pidfile);
+            pidfile = g_strdup(optarg);
+            pidfile_specified = true;
+            break;
+#ifdef CONFIG_LIBCAP_NG
+        case 'u': {
+            unsigned long res;
+            struct passwd *userinfo = getpwnam(optarg);
+            if (userinfo) {
+                uid = userinfo->pw_uid;
+            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
+                       (uid_t)res == res) {
+                uid = res;
+            } else {
+                error_report("invalid user '%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        }
+        case 'g': {
+            unsigned long res;
+            struct group *groupinfo = getgrnam(optarg);
+            if (groupinfo) {
+                gid = groupinfo->gr_gid;
+            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
+                       (gid_t)res == res) {
+                gid = res;
+            } else {
+                error_report("invalid group '%s'", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        }
+#else
+        case 'u':
+        case 'g':
+            error_report("-%c not supported by this %s", ch, argv[0]);
+            exit(1);
+#endif
+        case 'd':
+            daemonize = true;
+            break;
+        case 'q':
+            quiet = 1;
+            break;
+        case 'v':
+            ++loglevel;
+            break;
+        case 'T':
+            trace_opt_parse(optarg);
+            break;
+        case 'V':
+            version(argv[0]);
+            exit(EXIT_SUCCESS);
+            break;
+        case 'h':
+            usage(argv[0]);
+            exit(EXIT_SUCCESS);
+            break;
+        case '?':
+            error_report("Try `%s --help' for more information.", argv[0]);
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    /* set verbosity */
+    noisy = !quiet && (loglevel >= 3);
+    verbose = quiet ? 0 : MIN(loglevel, 3);
+
+    if (!trace_init_backends()) {
+        exit(EXIT_FAILURE);
+    }
+    trace_init_file();
+    qemu_set_log(LOG_TRACE);
+
+#ifdef CONFIG_MPATH
+    dm_init();
+    multipath_pr_init();
+#endif
+
+    socket_activation = check_socket_activation();
+    if (socket_activation == 0) {
+        SocketAddress saddr;
+        saddr = (SocketAddress){
+            .type = SOCKET_ADDRESS_TYPE_UNIX,
+            .u.q_unix.path = socket_path,
+        };
+        server_ioc = qio_channel_socket_new();
+        if (qio_channel_socket_listen_sync(server_ioc, &saddr,
+                                           1, &local_err) < 0) {
+            object_unref(OBJECT(server_ioc));
+            error_report_err(local_err);
+            return 1;
+        }
+    } else {
+        /* Using socket activation - check user didn't use -p etc. */
+        if (socket_path_specified) {
+            error_report("Unix socket can't be set when using socket activation");
+            exit(EXIT_FAILURE);
+        }
+
+        /* Can only listen on a single socket.  */
+        if (socket_activation > 1) {
+            error_report("%s does not support socket activation with LISTEN_FDS > 1",
+                         argv[0]);
+            exit(EXIT_FAILURE);
+        }
+        server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
+                                               &local_err);
+        if (server_ioc == NULL) {
+            error_reportf_err(local_err,
+                              "Failed to use socket activation: ");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if (qemu_init_main_loop(&local_err)) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
+
+    server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
+                                         G_IO_IN,
+                                         accept_client,
+                                         NULL, NULL);
+
+    if (daemonize) {
+        if (daemon(0, 0) < 0) {
+            error_report("Failed to daemonize: %s", strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if ((daemonize || pidfile_specified) &&
+        !qemu_write_pidfile(pidfile, &local_err)) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
+
+#ifdef CONFIG_LIBCAP_NG
+    if (drop_privileges() < 0) {
+        error_report("Failed to drop privileges: %s", strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+#endif
+
+    state = RUNNING;
+    do {
+        main_loop_wait(false);
+        if (state == TERMINATE) {
+            state = TERMINATING;
+            close_server_socket();
+        }
+    } while (num_active_sockets > 0);
+
+    exit(EXIT_SUCCESS);
+}
diff --git a/scsi/trace-events b/scsi/trace-events
new file mode 100644
index 000000000..6dbfeae79
--- /dev/null
+++ b/scsi/trace-events
@@ -0,0 +1,5 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# pr-manager.c
+pr_manager_execute(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
+pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
diff --git a/scsi/trace.h b/scsi/trace.h
new file mode 100644
index 000000000..3e4d89a3a
--- /dev/null
+++ b/scsi/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-scsi.h"
diff --git a/scsi/utils.c b/scsi/utils.c
new file mode 100644
index 000000000..b37c28301
--- /dev/null
+++ b/scsi/utils.c
@@ -0,0 +1,596 @@
+/*
+ *  SCSI helpers
+ *
+ *  Copyright 2017 Red Hat, Inc.
+ *
+ *  Authors:
+ *   Fam Zheng 
+ *   Paolo Bonzini 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "scsi/constants.h"
+#include "scsi/utils.h"
+#include "qemu/bswap.h"
+
+uint32_t scsi_data_cdb_xfer(uint8_t *buf)
+{
+    if ((buf[0] >> 5) == 0 && buf[4] == 0) {
+        return 256;
+    } else {
+        return scsi_cdb_xfer(buf);
+    }
+}
+
+uint32_t scsi_cdb_xfer(uint8_t *buf)
+{
+    switch (buf[0] >> 5) {
+    case 0:
+        return buf[4];
+    case 1:
+    case 2:
+        return lduw_be_p(&buf[7]);
+    case 4:
+        return ldl_be_p(&buf[10]) & 0xffffffffULL;
+    case 5:
+        return ldl_be_p(&buf[6]) & 0xffffffffULL;
+    default:
+        return -1;
+    }
+}
+
+uint64_t scsi_cmd_lba(SCSICommand *cmd)
+{
+    uint8_t *buf = cmd->buf;
+    uint64_t lba;
+
+    switch (buf[0] >> 5) {
+    case 0:
+        lba = ldl_be_p(&buf[0]) & 0x1fffff;
+        break;
+    case 1:
+    case 2:
+    case 5:
+        lba = ldl_be_p(&buf[2]) & 0xffffffffULL;
+        break;
+    case 4:
+        lba = ldq_be_p(&buf[2]);
+        break;
+    default:
+        lba = -1;
+
+    }
+    return lba;
+}
+
+int scsi_cdb_length(uint8_t *buf)
+{
+    int cdb_len;
+
+    switch (buf[0] >> 5) {
+    case 0:
+        cdb_len = 6;
+        break;
+    case 1:
+    case 2:
+        cdb_len = 10;
+        break;
+    case 4:
+        cdb_len = 16;
+        break;
+    case 5:
+        cdb_len = 12;
+        break;
+    default:
+        cdb_len = -1;
+    }
+    return cdb_len;
+}
+
+SCSISense scsi_parse_sense_buf(const uint8_t *in_buf, int in_len)
+{
+    bool fixed_in;
+    SCSISense sense;
+
+    assert(in_len > 0);
+    fixed_in = (in_buf[0] & 2) == 0;
+    if (fixed_in) {
+        if (in_len < 14) {
+            return SENSE_CODE(IO_ERROR);
+        }
+        sense.key = in_buf[2];
+        sense.asc = in_buf[12];
+        sense.ascq = in_buf[13];
+    } else {
+        if (in_len < 4) {
+            return SENSE_CODE(IO_ERROR);
+        }
+        sense.key = in_buf[1];
+        sense.asc = in_buf[2];
+        sense.ascq = in_buf[3];
+    }
+
+    return sense;
+}
+
+int scsi_build_sense_buf(uint8_t *out_buf, size_t size, SCSISense sense,
+                         bool fixed_sense)
+{
+    int len;
+    uint8_t buf[SCSI_SENSE_LEN] = { 0 };
+
+    if (fixed_sense) {
+        buf[0] = 0x70;
+        buf[2] = sense.key;
+        buf[7] = 10;
+        buf[12] = sense.asc;
+        buf[13] = sense.ascq;
+        len = 18;
+    } else {
+        buf[0] = 0x72;
+        buf[1] = sense.key;
+        buf[2] = sense.asc;
+        buf[3] = sense.ascq;
+        len = 8;
+    }
+    len = MIN(len, size);
+    memcpy(out_buf, buf, len);
+    return len;
+}
+
+int scsi_build_sense(uint8_t *buf, SCSISense sense)
+{
+    return scsi_build_sense_buf(buf, SCSI_SENSE_LEN, sense, true);
+}
+
+/*
+ * Predefined sense codes
+ */
+
+/* No sense data available */
+const struct SCSISense sense_code_NO_SENSE = {
+    .key = NO_SENSE , .asc = 0x00 , .ascq = 0x00
+};
+
+/* LUN not ready, Manual intervention required */
+const struct SCSISense sense_code_LUN_NOT_READY = {
+    .key = NOT_READY, .asc = 0x04, .ascq = 0x03
+};
+
+/* LUN not ready, Medium not present */
+const struct SCSISense sense_code_NO_MEDIUM = {
+    .key = NOT_READY, .asc = 0x3a, .ascq = 0x00
+};
+
+/* LUN not ready, medium removal prevented */
+const struct SCSISense sense_code_NOT_READY_REMOVAL_PREVENTED = {
+    .key = NOT_READY, .asc = 0x53, .ascq = 0x02
+};
+
+/* Hardware error, internal target failure */
+const struct SCSISense sense_code_TARGET_FAILURE = {
+    .key = HARDWARE_ERROR, .asc = 0x44, .ascq = 0x00
+};
+
+/* Illegal request, invalid command operation code */
+const struct SCSISense sense_code_INVALID_OPCODE = {
+    .key = ILLEGAL_REQUEST, .asc = 0x20, .ascq = 0x00
+};
+
+/* Illegal request, LBA out of range */
+const struct SCSISense sense_code_LBA_OUT_OF_RANGE = {
+    .key = ILLEGAL_REQUEST, .asc = 0x21, .ascq = 0x00
+};
+
+/* Illegal request, Invalid field in CDB */
+const struct SCSISense sense_code_INVALID_FIELD = {
+    .key = ILLEGAL_REQUEST, .asc = 0x24, .ascq = 0x00
+};
+
+/* Illegal request, Invalid field in parameter list */
+const struct SCSISense sense_code_INVALID_PARAM = {
+    .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x00
+};
+
+/* Illegal request, Parameter list length error */
+const struct SCSISense sense_code_INVALID_PARAM_LEN = {
+    .key = ILLEGAL_REQUEST, .asc = 0x1a, .ascq = 0x00
+};
+
+/* Illegal request, LUN not supported */
+const struct SCSISense sense_code_LUN_NOT_SUPPORTED = {
+    .key = ILLEGAL_REQUEST, .asc = 0x25, .ascq = 0x00
+};
+
+/* Illegal request, Saving parameters not supported */
+const struct SCSISense sense_code_SAVING_PARAMS_NOT_SUPPORTED = {
+    .key = ILLEGAL_REQUEST, .asc = 0x39, .ascq = 0x00
+};
+
+/* Illegal request, Incompatible medium installed */
+const struct SCSISense sense_code_INCOMPATIBLE_FORMAT = {
+    .key = ILLEGAL_REQUEST, .asc = 0x30, .ascq = 0x00
+};
+
+/* Illegal request, medium removal prevented */
+const struct SCSISense sense_code_ILLEGAL_REQ_REMOVAL_PREVENTED = {
+    .key = ILLEGAL_REQUEST, .asc = 0x53, .ascq = 0x02
+};
+
+/* Illegal request, Invalid Transfer Tag */
+const struct SCSISense sense_code_INVALID_TAG = {
+    .key = ILLEGAL_REQUEST, .asc = 0x4b, .ascq = 0x01
+};
+
+/* Command aborted, I/O process terminated */
+const struct SCSISense sense_code_IO_ERROR = {
+    .key = ABORTED_COMMAND, .asc = 0x00, .ascq = 0x06
+};
+
+/* Command aborted, I_T Nexus loss occurred */
+const struct SCSISense sense_code_I_T_NEXUS_LOSS = {
+    .key = ABORTED_COMMAND, .asc = 0x29, .ascq = 0x07
+};
+
+/* Command aborted, Logical Unit failure */
+const struct SCSISense sense_code_LUN_FAILURE = {
+    .key = ABORTED_COMMAND, .asc = 0x3e, .ascq = 0x01
+};
+
+/* Command aborted, Overlapped Commands Attempted */
+const struct SCSISense sense_code_OVERLAPPED_COMMANDS = {
+    .key = ABORTED_COMMAND, .asc = 0x4e, .ascq = 0x00
+};
+
+/* Command aborted, LUN Communication Failure */
+const struct SCSISense sense_code_LUN_COMM_FAILURE = {
+    .key = ABORTED_COMMAND, .asc = 0x08, .ascq = 0x00
+};
+
+/* Medium Error, Unrecovered read error */
+const struct SCSISense sense_code_READ_ERROR = {
+    .key = MEDIUM_ERROR, .asc = 0x11, .ascq = 0x00
+};
+
+/* Not ready, Cause not reportable */
+const struct SCSISense sense_code_NOT_READY = {
+    .key = NOT_READY, .asc = 0x04, .ascq = 0x00
+};
+
+/* Unit attention, Capacity data has changed */
+const struct SCSISense sense_code_CAPACITY_CHANGED = {
+    .key = UNIT_ATTENTION, .asc = 0x2a, .ascq = 0x09
+};
+
+/* Unit attention, Power on, reset or bus device reset occurred */
+const struct SCSISense sense_code_RESET = {
+    .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x00
+};
+
+/* Unit attention, SCSI bus reset */
+const struct SCSISense sense_code_SCSI_BUS_RESET = {
+    .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x02
+};
+
+/* Unit attention, No medium */
+const struct SCSISense sense_code_UNIT_ATTENTION_NO_MEDIUM = {
+    .key = UNIT_ATTENTION, .asc = 0x3a, .ascq = 0x00
+};
+
+/* Unit attention, Medium may have changed */
+const struct SCSISense sense_code_MEDIUM_CHANGED = {
+    .key = UNIT_ATTENTION, .asc = 0x28, .ascq = 0x00
+};
+
+/* Unit attention, Reported LUNs data has changed */
+const struct SCSISense sense_code_REPORTED_LUNS_CHANGED = {
+    .key = UNIT_ATTENTION, .asc = 0x3f, .ascq = 0x0e
+};
+
+/* Unit attention, Device internal reset */
+const struct SCSISense sense_code_DEVICE_INTERNAL_RESET = {
+    .key = UNIT_ATTENTION, .asc = 0x29, .ascq = 0x04
+};
+
+/* Data Protection, Write Protected */
+const struct SCSISense sense_code_WRITE_PROTECTED = {
+    .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x00
+};
+
+/* Data Protection, Space Allocation Failed Write Protect */
+const struct SCSISense sense_code_SPACE_ALLOC_FAILED = {
+    .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x07
+};
+
+/*
+ * scsi_convert_sense
+ *
+ * Convert between fixed and descriptor sense buffers
+ */
+int scsi_convert_sense(uint8_t *in_buf, int in_len,
+                       uint8_t *buf, int len, bool fixed)
+{
+    SCSISense sense;
+    bool fixed_in;
+
+    if (in_len == 0) {
+        return scsi_build_sense_buf(buf, len, SENSE_CODE(NO_SENSE), fixed);
+    }
+
+    fixed_in = (in_buf[0] & 2) == 0;
+    if (fixed == fixed_in) {
+        memcpy(buf, in_buf, MIN(len, in_len));
+        return MIN(len, in_len);
+    } else {
+        sense = scsi_parse_sense_buf(in_buf, in_len);
+        return scsi_build_sense_buf(buf, len, sense, fixed);
+    }
+}
+
+static bool scsi_sense_is_guest_recoverable(int key, int asc, int ascq)
+{
+    switch (key) {
+    case NO_SENSE:
+    case RECOVERED_ERROR:
+    case UNIT_ATTENTION:
+    case ABORTED_COMMAND:
+        return true;
+    case NOT_READY:
+    case ILLEGAL_REQUEST:
+    case DATA_PROTECT:
+        /* Parse ASCQ */
+        break;
+    default:
+        return false;
+    }
+
+    switch ((asc << 8) | ascq) {
+    case 0x1a00: /* PARAMETER LIST LENGTH ERROR */
+    case 0x2000: /* INVALID OPERATION CODE */
+    case 0x2400: /* INVALID FIELD IN CDB */
+    case 0x2500: /* LOGICAL UNIT NOT SUPPORTED */
+    case 0x2600: /* INVALID FIELD IN PARAMETER LIST */
+
+    case 0x2104: /* UNALIGNED WRITE COMMAND */
+    case 0x2105: /* WRITE BOUNDARY VIOLATION */
+    case 0x2106: /* ATTEMPT TO READ INVALID DATA */
+    case 0x550e: /* INSUFFICIENT ZONE RESOURCES */
+
+    case 0x0401: /* NOT READY, IN PROGRESS OF BECOMING READY */
+    case 0x0402: /* NOT READY, INITIALIZING COMMAND REQUIRED */
+        return true;
+    default:
+        return false;
+    }
+}
+
+int scsi_sense_to_errno(int key, int asc, int ascq)
+{
+    switch (key) {
+    case NO_SENSE:
+    case RECOVERED_ERROR:
+    case UNIT_ATTENTION:
+        return EAGAIN;
+    case ABORTED_COMMAND: /* COMMAND ABORTED */
+        return ECANCELED;
+    case NOT_READY:
+    case ILLEGAL_REQUEST:
+    case DATA_PROTECT:
+        /* Parse ASCQ */
+        break;
+    default:
+        return EIO;
+    }
+    switch ((asc << 8) | ascq) {
+    case 0x1a00: /* PARAMETER LIST LENGTH ERROR */
+    case 0x2000: /* INVALID OPERATION CODE */
+    case 0x2400: /* INVALID FIELD IN CDB */
+    case 0x2600: /* INVALID FIELD IN PARAMETER LIST */
+        return EINVAL;
+    case 0x2100: /* LBA OUT OF RANGE */
+    case 0x2707: /* SPACE ALLOC FAILED */
+        return ENOSPC;
+    case 0x2500: /* LOGICAL UNIT NOT SUPPORTED */
+        return ENOTSUP;
+    case 0x3a00: /* MEDIUM NOT PRESENT */
+    case 0x3a01: /* MEDIUM NOT PRESENT TRAY CLOSED */
+    case 0x3a02: /* MEDIUM NOT PRESENT TRAY OPEN */
+        return ENOMEDIUM;
+    case 0x2700: /* WRITE PROTECTED */
+        return EACCES;
+    case 0x0401: /* NOT READY, IN PROGRESS OF BECOMING READY */
+        return EINPROGRESS;
+    case 0x0402: /* NOT READY, INITIALIZING COMMAND REQUIRED */
+        return ENOTCONN;
+    default:
+        return EIO;
+    }
+}
+
+int scsi_sense_buf_to_errno(const uint8_t *in_buf, size_t in_len)
+{
+    SCSISense sense;
+    if (in_len < 1) {
+        return EIO;
+    }
+
+    sense = scsi_parse_sense_buf(in_buf, in_len);
+    return scsi_sense_to_errno(sense.key, sense.asc, sense.ascq);
+}
+
+bool scsi_sense_buf_is_guest_recoverable(const uint8_t *in_buf, size_t in_len)
+{
+    SCSISense sense;
+    if (in_len < 1) {
+        return false;
+    }
+
+    sense = scsi_parse_sense_buf(in_buf, in_len);
+    return scsi_sense_is_guest_recoverable(sense.key, sense.asc, sense.ascq);
+}
+
+const char *scsi_command_name(uint8_t cmd)
+{
+    static const char *names[] = {
+        [ TEST_UNIT_READY          ] = "TEST_UNIT_READY",
+        [ REWIND                   ] = "REWIND",
+        [ REQUEST_SENSE            ] = "REQUEST_SENSE",
+        [ FORMAT_UNIT              ] = "FORMAT_UNIT",
+        [ READ_BLOCK_LIMITS        ] = "READ_BLOCK_LIMITS",
+        [ REASSIGN_BLOCKS          ] = "REASSIGN_BLOCKS/INITIALIZE ELEMENT STATUS",
+        /* LOAD_UNLOAD and INITIALIZE_ELEMENT_STATUS use the same operation code */
+        [ READ_6                   ] = "READ_6",
+        [ WRITE_6                  ] = "WRITE_6",
+        [ SET_CAPACITY             ] = "SET_CAPACITY",
+        [ READ_REVERSE             ] = "READ_REVERSE",
+        [ WRITE_FILEMARKS          ] = "WRITE_FILEMARKS",
+        [ SPACE                    ] = "SPACE",
+        [ INQUIRY                  ] = "INQUIRY",
+        [ RECOVER_BUFFERED_DATA    ] = "RECOVER_BUFFERED_DATA",
+        [ MAINTENANCE_IN           ] = "MAINTENANCE_IN",
+        [ MAINTENANCE_OUT          ] = "MAINTENANCE_OUT",
+        [ MODE_SELECT              ] = "MODE_SELECT",
+        [ RESERVE                  ] = "RESERVE",
+        [ RELEASE                  ] = "RELEASE",
+        [ COPY                     ] = "COPY",
+        [ ERASE                    ] = "ERASE",
+        [ MODE_SENSE               ] = "MODE_SENSE",
+        [ START_STOP               ] = "START_STOP/LOAD_UNLOAD",
+        /* LOAD_UNLOAD and START_STOP use the same operation code */
+        [ RECEIVE_DIAGNOSTIC       ] = "RECEIVE_DIAGNOSTIC",
+        [ SEND_DIAGNOSTIC          ] = "SEND_DIAGNOSTIC",
+        [ ALLOW_MEDIUM_REMOVAL     ] = "ALLOW_MEDIUM_REMOVAL",
+        [ READ_CAPACITY_10         ] = "READ_CAPACITY_10",
+        [ READ_10                  ] = "READ_10",
+        [ WRITE_10                 ] = "WRITE_10",
+        [ SEEK_10                  ] = "SEEK_10/POSITION_TO_ELEMENT",
+        /* SEEK_10 and POSITION_TO_ELEMENT use the same operation code */
+        [ WRITE_VERIFY_10          ] = "WRITE_VERIFY_10",
+        [ VERIFY_10                ] = "VERIFY_10",
+        [ SEARCH_HIGH              ] = "SEARCH_HIGH",
+        [ SEARCH_EQUAL             ] = "SEARCH_EQUAL",
+        [ SEARCH_LOW               ] = "SEARCH_LOW",
+        [ SET_LIMITS               ] = "SET_LIMITS",
+        [ PRE_FETCH                ] = "PRE_FETCH/READ_POSITION",
+        /* READ_POSITION and PRE_FETCH use the same operation code */
+        [ SYNCHRONIZE_CACHE        ] = "SYNCHRONIZE_CACHE",
+        [ LOCK_UNLOCK_CACHE        ] = "LOCK_UNLOCK_CACHE",
+        [ READ_DEFECT_DATA         ] = "READ_DEFECT_DATA/INITIALIZE_ELEMENT_STATUS_WITH_RANGE",
+        /* READ_DEFECT_DATA and INITIALIZE_ELEMENT_STATUS_WITH_RANGE use the same operation code */
+        [ MEDIUM_SCAN              ] = "MEDIUM_SCAN",
+        [ COMPARE                  ] = "COMPARE",
+        [ COPY_VERIFY              ] = "COPY_VERIFY",
+        [ WRITE_BUFFER             ] = "WRITE_BUFFER",
+        [ READ_BUFFER              ] = "READ_BUFFER",
+        [ UPDATE_BLOCK             ] = "UPDATE_BLOCK",
+        [ READ_LONG_10             ] = "READ_LONG_10",
+        [ WRITE_LONG_10            ] = "WRITE_LONG_10",
+        [ CHANGE_DEFINITION        ] = "CHANGE_DEFINITION",
+        [ WRITE_SAME_10            ] = "WRITE_SAME_10",
+        [ UNMAP                    ] = "UNMAP",
+        [ READ_TOC                 ] = "READ_TOC",
+        [ REPORT_DENSITY_SUPPORT   ] = "REPORT_DENSITY_SUPPORT",
+        [ SANITIZE                 ] = "SANITIZE",
+        [ GET_CONFIGURATION        ] = "GET_CONFIGURATION",
+        [ LOG_SELECT               ] = "LOG_SELECT",
+        [ LOG_SENSE                ] = "LOG_SENSE",
+        [ MODE_SELECT_10           ] = "MODE_SELECT_10",
+        [ RESERVE_10               ] = "RESERVE_10",
+        [ RELEASE_10               ] = "RELEASE_10",
+        [ MODE_SENSE_10            ] = "MODE_SENSE_10",
+        [ PERSISTENT_RESERVE_IN    ] = "PERSISTENT_RESERVE_IN",
+        [ PERSISTENT_RESERVE_OUT   ] = "PERSISTENT_RESERVE_OUT",
+        [ WRITE_FILEMARKS_16       ] = "WRITE_FILEMARKS_16",
+        [ EXTENDED_COPY            ] = "EXTENDED_COPY",
+        [ ATA_PASSTHROUGH_16       ] = "ATA_PASSTHROUGH_16",
+        [ ACCESS_CONTROL_IN        ] = "ACCESS_CONTROL_IN",
+        [ ACCESS_CONTROL_OUT       ] = "ACCESS_CONTROL_OUT",
+        [ READ_16                  ] = "READ_16",
+        [ COMPARE_AND_WRITE        ] = "COMPARE_AND_WRITE",
+        [ WRITE_16                 ] = "WRITE_16",
+        [ WRITE_VERIFY_16          ] = "WRITE_VERIFY_16",
+        [ VERIFY_16                ] = "VERIFY_16",
+        [ PRE_FETCH_16             ] = "PRE_FETCH_16",
+        [ SYNCHRONIZE_CACHE_16     ] = "SPACE_16/SYNCHRONIZE_CACHE_16",
+        /* SPACE_16 and SYNCHRONIZE_CACHE_16 use the same operation code */
+        [ LOCATE_16                ] = "LOCATE_16",
+        [ WRITE_SAME_16            ] = "ERASE_16/WRITE_SAME_16",
+        /* ERASE_16 and WRITE_SAME_16 use the same operation code */
+        [ SERVICE_ACTION_IN_16     ] = "SERVICE_ACTION_IN_16",
+        [ WRITE_LONG_16            ] = "WRITE_LONG_16",
+        [ REPORT_LUNS              ] = "REPORT_LUNS",
+        [ ATA_PASSTHROUGH_12       ] = "BLANK/ATA_PASSTHROUGH_12",
+        [ MOVE_MEDIUM              ] = "MOVE_MEDIUM",
+        [ EXCHANGE_MEDIUM          ] = "EXCHANGE MEDIUM",
+        [ READ_12                  ] = "READ_12",
+        [ WRITE_12                 ] = "WRITE_12",
+        [ ERASE_12                 ] = "ERASE_12/GET_PERFORMANCE",
+        /* ERASE_12 and GET_PERFORMANCE use the same operation code */
+        [ SERVICE_ACTION_IN_12     ] = "SERVICE_ACTION_IN_12",
+        [ WRITE_VERIFY_12          ] = "WRITE_VERIFY_12",
+        [ VERIFY_12                ] = "VERIFY_12",
+        [ SEARCH_HIGH_12           ] = "SEARCH_HIGH_12",
+        [ SEARCH_EQUAL_12          ] = "SEARCH_EQUAL_12",
+        [ SEARCH_LOW_12            ] = "SEARCH_LOW_12",
+        [ READ_ELEMENT_STATUS      ] = "READ_ELEMENT_STATUS",
+        [ SEND_VOLUME_TAG          ] = "SEND_VOLUME_TAG/SET_STREAMING",
+        /* SEND_VOLUME_TAG and SET_STREAMING use the same operation code */
+        [ READ_CD                  ] = "READ_CD",
+        [ READ_DEFECT_DATA_12      ] = "READ_DEFECT_DATA_12",
+        [ READ_DVD_STRUCTURE       ] = "READ_DVD_STRUCTURE",
+        [ RESERVE_TRACK            ] = "RESERVE_TRACK",
+        [ SEND_CUE_SHEET           ] = "SEND_CUE_SHEET",
+        [ SEND_DVD_STRUCTURE       ] = "SEND_DVD_STRUCTURE",
+        [ SET_CD_SPEED             ] = "SET_CD_SPEED",
+        [ SET_READ_AHEAD           ] = "SET_READ_AHEAD",
+        [ ALLOW_OVERWRITE          ] = "ALLOW_OVERWRITE",
+        [ MECHANISM_STATUS         ] = "MECHANISM_STATUS",
+        [ GET_EVENT_STATUS_NOTIFICATION ] = "GET_EVENT_STATUS_NOTIFICATION",
+        [ READ_DISC_INFORMATION    ] = "READ_DISC_INFORMATION",
+    };
+
+    if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL) {
+        return "*UNKNOWN*";
+    }
+    return names[cmd];
+}
+
+#ifdef CONFIG_LINUX
+int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr,
+                           SCSISense *sense)
+{
+    if (errno_value != 0) {
+        switch (errno_value) {
+        case EDOM:
+            return TASK_SET_FULL;
+        case ENOMEM:
+            *sense = SENSE_CODE(TARGET_FAILURE);
+            return CHECK_CONDITION;
+        default:
+            *sense = SENSE_CODE(IO_ERROR);
+            return CHECK_CONDITION;
+        }
+    } else {
+        if (io_hdr->host_status == SG_ERR_DID_NO_CONNECT ||
+            io_hdr->host_status == SG_ERR_DID_BUS_BUSY ||
+            io_hdr->host_status == SG_ERR_DID_TIME_OUT ||
+            (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT)) {
+            return BUSY;
+        } else if (io_hdr->host_status) {
+            *sense = SENSE_CODE(I_T_NEXUS_LOSS);
+            return CHECK_CONDITION;
+        } else if (io_hdr->status) {
+            return io_hdr->status;
+        } else if (io_hdr->driver_status & SG_ERR_DRIVER_SENSE) {
+            return CHECK_CONDITION;
+        } else {
+            return GOOD;
+        }
+    }
+}
+#endif
diff --git a/stubs/arch_type.c b/stubs/arch_type.c
new file mode 100644
index 000000000..fc5423bc9
--- /dev/null
+++ b/stubs/arch_type.c
@@ -0,0 +1,4 @@
+#include "qemu/osdep.h"
+#include "sysemu/arch_init.h"
+
+const uint32_t arch_type = QEMU_ARCH_NONE;
diff --git a/stubs/bdrv-next-monitor-owned.c b/stubs/bdrv-next-monitor-owned.c
new file mode 100644
index 000000000..e62c28986
--- /dev/null
+++ b/stubs/bdrv-next-monitor-owned.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "block/block.h"
+
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
+{
+    return NULL;
+}
diff --git a/stubs/blk-commit-all.c b/stubs/blk-commit-all.c
new file mode 100644
index 000000000..e156c57f8
--- /dev/null
+++ b/stubs/blk-commit-all.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+
+int blk_commit_all(void)
+{
+    return 0;
+}
diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
new file mode 100644
index 000000000..1c7131676
--- /dev/null
+++ b/stubs/blk-exp-close-all.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "block/export.h"
+
+/* Only used in programs that support block exports (libblockdev.fa) */
+void blk_exp_close_all(void)
+{
+}
diff --git a/stubs/blockdev-close-all-bdrv-states.c b/stubs/blockdev-close-all-bdrv-states.c
new file mode 100644
index 000000000..f1f1d9cdc
--- /dev/null
+++ b/stubs/blockdev-close-all-bdrv-states.c
@@ -0,0 +1,6 @@
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+
+void blockdev_close_all_bdrv_states(void)
+{
+}
diff --git a/stubs/change-state-handler.c b/stubs/change-state-handler.c
new file mode 100644
index 000000000..d1ed46bfb
--- /dev/null
+++ b/stubs/change-state-handler.c
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "sysemu/runstate.h"
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+                                                     void *opaque)
+{
+    return NULL;
+}
+
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
+{
+    /* Nothing to do. */
+}
diff --git a/stubs/cmos.c b/stubs/cmos.c
new file mode 100644
index 000000000..3fdbae2c6
--- /dev/null
+++ b/stubs/cmos.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/block/fdc.h"
+
+int cmos_get_fd_drive_type(FloppyDriveType fd0)
+{
+    return 0;
+}
diff --git a/stubs/cpu-get-clock.c b/stubs/cpu-get-clock.c
new file mode 100644
index 000000000..9e9240481
--- /dev/null
+++ b/stubs/cpu-get-clock.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/main-loop.h"
+
+int64_t cpu_get_clock(void)
+{
+    return get_clock_realtime();
+}
diff --git a/stubs/cpu-synchronize-state.c b/stubs/cpu-synchronize-state.c
new file mode 100644
index 000000000..d9211da66
--- /dev/null
+++ b/stubs/cpu-synchronize-state.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+#include "sysemu/hw_accel.h"
+
+void cpu_synchronize_state(CPUState *cpu)
+{
+}
+void cpu_synchronize_post_init(CPUState *cpu)
+{
+}
diff --git a/stubs/cpus-get-virtual-clock.c b/stubs/cpus-get-virtual-clock.c
new file mode 100644
index 000000000..fd447d53f
--- /dev/null
+++ b/stubs/cpus-get-virtual-clock.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/main-loop.h"
+
+int64_t cpus_get_virtual_clock(void)
+{
+    return cpu_get_clock();
+}
diff --git a/stubs/dump.c b/stubs/dump.c
new file mode 100644
index 000000000..1f28ec2be
--- /dev/null
+++ b/stubs/dump.c
@@ -0,0 +1,27 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/dump-arch.h"
+
+int cpu_get_dump_info(ArchDumpInfo *info,
+                      const struct GuestPhysBlockList *guest_phys_blocks)
+{
+    return -1;
+}
+
+ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
+{
+    return -1;
+}
+
diff --git a/stubs/error-printf.c b/stubs/error-printf.c
new file mode 100644
index 000000000..a2f61521a
--- /dev/null
+++ b/stubs/error-printf.c
@@ -0,0 +1,22 @@
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+int error_vprintf(const char *fmt, va_list ap)
+{
+    int ret;
+
+    if (g_test_initialized() && !g_test_subprocess() &&
+        getenv("QTEST_SILENT_ERRORS")) {
+        char *msg = g_strdup_vprintf(fmt, ap);
+        g_test_message("%s", msg);
+        ret = strlen(msg);
+        g_free(msg);
+        return ret;
+    }
+    return vfprintf(stderr, fmt, ap);
+}
+
+int error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+    return error_vprintf(fmt, ap);
+}
diff --git a/stubs/fdset.c b/stubs/fdset.c
new file mode 100644
index 000000000..56b3663d5
--- /dev/null
+++ b/stubs/fdset.c
@@ -0,0 +1,17 @@
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int64_t monitor_fdset_dup_fd_find(int dup_fd)
+{
+    return -1;
+}
+
+void monitor_fdset_dup_fd_remove(int dupfd)
+{
+}
diff --git a/stubs/fw_cfg.c b/stubs/fw_cfg.c
new file mode 100644
index 000000000..bb1e3c8aa
--- /dev/null
+++ b/stubs/fw_cfg.c
@@ -0,0 +1,21 @@
+/*
+ * fw_cfg stubs
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author:
+ *   Philippe Mathieu-Daudé 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/fw_cfg.h"
+
+const char *fw_cfg_arch_key_name(uint16_t key)
+{
+    return NULL;
+}
diff --git a/stubs/gdbstub.c b/stubs/gdbstub.c
new file mode 100644
index 000000000..2b7aee50d
--- /dev/null
+++ b/stubs/gdbstub.c
@@ -0,0 +1,6 @@
+#include "qemu/osdep.h"
+#include "exec/gdbstub.h"       /* xml_builtin */
+
+const char *const xml_builtin[][2] = {
+  { NULL, NULL }
+};
diff --git a/stubs/get-vm-name.c b/stubs/get-vm-name.c
new file mode 100644
index 000000000..fa990136b
--- /dev/null
+++ b/stubs/get-vm-name.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+const char *qemu_get_vm_name(void)
+{
+    return NULL;
+}
+
diff --git a/stubs/icount.c b/stubs/icount.c
new file mode 100644
index 000000000..f13c43568
--- /dev/null
+++ b/stubs/icount.c
@@ -0,0 +1,45 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/cpu-timers.h"
+
+/* icount - Instruction Counter API */
+
+int use_icount;
+
+void icount_update(CPUState *cpu)
+{
+    abort();
+}
+void icount_configure(QemuOpts *opts, Error **errp)
+{
+    /* signal error */
+    error_setg(errp, "cannot configure icount, TCG support not available");
+}
+int64_t icount_get_raw(void)
+{
+    abort();
+    return 0;
+}
+int64_t icount_get(void)
+{
+    abort();
+    return 0;
+}
+int64_t icount_to_ns(int64_t icount)
+{
+    abort();
+    return 0;
+}
+int64_t icount_round(int64_t count)
+{
+    abort();
+    return 0;
+}
+void icount_start_warp_timer(void)
+{
+    abort();
+}
+void icount_account_warp_timer(void)
+{
+    abort();
+}
diff --git a/stubs/io_uring.c b/stubs/io_uring.c
new file mode 100644
index 000000000..622d1e464
--- /dev/null
+++ b/stubs/io_uring.c
@@ -0,0 +1,32 @@
+/*
+ * Linux io_uring support.
+ *
+ * Copyright (C) 2009 IBM, Corp.
+ * Copyright (C) 2009 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "block/raw-aio.h"
+
+void luring_detach_aio_context(LuringState *s, AioContext *old_context)
+{
+    abort();
+}
+
+void luring_attach_aio_context(LuringState *s, AioContext *new_context)
+{
+    abort();
+}
+
+LuringState *luring_init(Error **errp)
+{
+    abort();
+}
+
+void luring_cleanup(LuringState *s)
+{
+    abort();
+}
diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c
new file mode 100644
index 000000000..2a6efad64
--- /dev/null
+++ b/stubs/iothread-lock.c
@@ -0,0 +1,15 @@
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+bool qemu_mutex_iothread_locked(void)
+{
+    return true;
+}
+
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
+{
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+}
diff --git a/stubs/iothread.c b/stubs/iothread.c
new file mode 100644
index 000000000..8cc9e28c5
--- /dev/null
+++ b/stubs/iothread.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+
+AioContext *qemu_get_current_aio_context(void)
+{
+    return qemu_get_aio_context();
+}
diff --git a/stubs/is-daemonized.c b/stubs/is-daemonized.c
new file mode 100644
index 000000000..8f63325bb
--- /dev/null
+++ b/stubs/is-daemonized.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+
+/* Win32 has its own inline stub */
+#ifndef _WIN32
+bool is_daemonized(void)
+{
+    return false;
+}
+#endif
diff --git a/stubs/isa-bus.c b/stubs/isa-bus.c
new file mode 100644
index 000000000..522f44899
--- /dev/null
+++ b/stubs/isa-bus.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/isa/isa.h"
+
+ISADevice *isa_create_simple(ISABus *bus, const char *name)
+{
+    g_assert_not_reached();
+}
diff --git a/stubs/linux-aio.c b/stubs/linux-aio.c
new file mode 100644
index 000000000..84d1f784a
--- /dev/null
+++ b/stubs/linux-aio.c
@@ -0,0 +1,32 @@
+/*
+ * Linux native AIO support.
+ *
+ * Copyright (C) 2009 IBM, Corp.
+ * Copyright (C) 2009 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "block/raw-aio.h"
+
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
+{
+    abort();
+}
+
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
+{
+    abort();
+}
+
+LinuxAioState *laio_init(Error **errp)
+{
+    abort();
+}
+
+void laio_cleanup(LinuxAioState *s)
+{
+    abort();
+}
diff --git a/stubs/machine-init-done.c b/stubs/machine-init-done.c
new file mode 100644
index 000000000..cd8e81392
--- /dev/null
+++ b/stubs/machine-init-done.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+
+bool machine_init_done = true;
+
+void qemu_add_machine_init_done_notifier(Notifier *notify)
+{
+}
diff --git a/stubs/meson.build b/stubs/meson.build
new file mode 100644
index 000000000..82b7ba60a
--- /dev/null
+++ b/stubs/meson.build
@@ -0,0 +1,55 @@
+stub_ss.add(files('arch_type.c'))
+stub_ss.add(files('bdrv-next-monitor-owned.c'))
+stub_ss.add(files('blk-commit-all.c'))
+stub_ss.add(files('blk-exp-close-all.c'))
+stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
+stub_ss.add(files('change-state-handler.c'))
+stub_ss.add(files('cmos.c'))
+stub_ss.add(files('cpu-get-clock.c'))
+stub_ss.add(files('cpus-get-virtual-clock.c'))
+stub_ss.add(files('qemu-timer-notify-cb.c'))
+stub_ss.add(files('icount.c'))
+stub_ss.add(files('dump.c'))
+stub_ss.add(files('error-printf.c'))
+stub_ss.add(files('fdset.c'))
+stub_ss.add(files('fw_cfg.c'))
+stub_ss.add(files('gdbstub.c'))
+stub_ss.add(files('get-vm-name.c'))
+stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
+stub_ss.add(files('iothread.c'))
+stub_ss.add(files('iothread-lock.c'))
+stub_ss.add(files('isa-bus.c'))
+stub_ss.add(files('is-daemonized.c'))
+stub_ss.add(when: 'CONFIG_LINUX_AIO', if_true: files('linux-aio.c'))
+stub_ss.add(files('machine-init-done.c'))
+stub_ss.add(files('migr-blocker.c'))
+stub_ss.add(files('monitor.c'))
+stub_ss.add(files('monitor-core.c'))
+stub_ss.add(files('pci-bus.c'))
+stub_ss.add(files('pci-host-piix.c'))
+stub_ss.add(files('qemu-timer-notify-cb.c'))
+stub_ss.add(files('qmp_memory_device.c'))
+stub_ss.add(files('qtest.c'))
+stub_ss.add(files('ram-block.c'))
+stub_ss.add(files('ramfb.c'))
+stub_ss.add(files('replay.c'))
+stub_ss.add(files('runstate-check.c'))
+stub_ss.add(files('set-fd-handler.c'))
+stub_ss.add(files('sysbus.c'))
+stub_ss.add(files('target-get-monitor-def.c'))
+stub_ss.add(files('target-monitor-defs.c'))
+stub_ss.add(files('tpm.c'))
+stub_ss.add(files('trace-control.c'))
+stub_ss.add(files('uuid.c'))
+stub_ss.add(files('vmgenid.c'))
+stub_ss.add(files('vmstate.c'))
+stub_ss.add(files('vm-stop.c'))
+stub_ss.add(files('win32-kbd-hook.c'))
+stub_ss.add(files('cpu-synchronize-state.c'))
+if have_block
+  stub_ss.add(files('replay-tools.c'))
+endif
+if have_system
+  stub_ss.add(files('semihost.c'))
+  stub_ss.add(files('xen-hw-stub.c'))
+endif
diff --git a/stubs/migr-blocker.c b/stubs/migr-blocker.c
new file mode 100644
index 000000000..5676a2f93
--- /dev/null
+++ b/stubs/migr-blocker.c
@@ -0,0 +1,11 @@
+#include "qemu/osdep.h"
+#include "migration/blocker.h"
+
+int migrate_add_blocker(Error *reason, Error **errp)
+{
+    return 0;
+}
+
+void migrate_del_blocker(Error *reason)
+{
+}
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
new file mode 100644
index 000000000..d058a2a00
--- /dev/null
+++ b/stubs/monitor-core.c
@@ -0,0 +1,29 @@
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qapi/qapi-emit-events.h"
+
+Monitor *monitor_cur(void)
+{
+    return NULL;
+}
+
+Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
+{
+    return NULL;
+}
+
+void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
+{
+}
+
+void qapi_event_emit(QAPIEvent event, QDict *qdict)
+{
+}
+
+int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+{
+    abort();
+}
+
+
diff --git a/stubs/monitor.c b/stubs/monitor.c
new file mode 100644
index 000000000..20786ac4f
--- /dev/null
+++ b/stubs/monitor.c
@@ -0,0 +1,18 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "monitor/monitor.h"
+#include "../monitor/monitor-internal.h"
+
+int monitor_get_fd(Monitor *mon, const char *name, Error **errp)
+{
+    error_setg(errp, "only QEMU supports file descriptor passing");
+    return -1;
+}
+
+void monitor_init_hmp(Chardev *chr, bool use_readline, Error **errp)
+{
+}
+
+void monitor_fdsets_cleanup(void)
+{
+}
diff --git a/stubs/pci-bus.c b/stubs/pci-bus.c
new file mode 100644
index 000000000..a8932fa93
--- /dev/null
+++ b/stubs/pci-bus.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+
+PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
+{
+    g_assert_not_reached();
+}
diff --git a/stubs/pci-host-piix.c b/stubs/pci-host-piix.c
new file mode 100644
index 000000000..93975adbf
--- /dev/null
+++ b/stubs/pci-host-piix.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/pci-host/i440fx.h"
+
+PCIBus *find_i440fx(void)
+{
+    return NULL;
+}
diff --git a/stubs/qemu-timer-notify-cb.c b/stubs/qemu-timer-notify-cb.c
new file mode 100644
index 000000000..845e46f8e
--- /dev/null
+++ b/stubs/qemu-timer-notify-cb.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/main-loop.h"
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    qemu_notify_event();
+}
diff --git a/stubs/qmp_memory_device.c b/stubs/qmp_memory_device.c
new file mode 100644
index 000000000..e75cac62d
--- /dev/null
+++ b/stubs/qmp_memory_device.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+#include "hw/mem/memory-device.h"
+
+MemoryDeviceInfoList *qmp_memory_device_list(void)
+{
+   return NULL;
+}
+
+uint64_t get_plugged_memory_size(void)
+{
+    return (uint64_t)-1;
+}
diff --git a/stubs/qtest.c b/stubs/qtest.c
new file mode 100644
index 000000000..4666a49d7
--- /dev/null
+++ b/stubs/qtest.c
@@ -0,0 +1,25 @@
+/*
+ * qtest stubs
+ *
+ * Copyright (c) 2014 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/qtest.h"
+
+/* Needed for qtest_allowed() */
+bool qtest_allowed;
+
+bool qtest_driver(void)
+{
+    return false;
+}
+
+int64_t qtest_get_virtual_clock(void)
+{
+    return 0;
+}
diff --git a/stubs/ram-block.c b/stubs/ram-block.c
new file mode 100644
index 000000000..108197683
--- /dev/null
+++ b/stubs/ram-block.c
@@ -0,0 +1,37 @@
+#include "qemu/osdep.h"
+#include "exec/ramlist.h"
+#include "exec/cpu-common.h"
+#include "exec/memory.h"
+
+void *qemu_ram_get_host_addr(RAMBlock *rb)
+{
+    return 0;
+}
+
+ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
+{
+    return 0;
+}
+
+ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
+{
+    return 0;
+}
+
+void ram_block_notifier_add(RAMBlockNotifier *n)
+{
+}
+
+void ram_block_notifier_remove(RAMBlockNotifier *n)
+{
+}
+
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+{
+    return 0;
+}
+
+int ram_block_discard_disable(bool state)
+{
+    return 0;
+}
diff --git a/stubs/ramfb.c b/stubs/ramfb.c
new file mode 100644
index 000000000..48143f335
--- /dev/null
+++ b/stubs/ramfb.c
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/display/ramfb.h"
+
+void ramfb_display_update(QemuConsole *con, RAMFBState *s)
+{
+}
+
+RAMFBState *ramfb_setup(Error **errp)
+{
+    error_setg(errp, "ramfb support not available");
+    return NULL;
+}
diff --git a/stubs/replay-tools.c b/stubs/replay-tools.c
new file mode 100644
index 000000000..c06b360e2
--- /dev/null
+++ b/stubs/replay-tools.c
@@ -0,0 +1,83 @@
+#include "qemu/osdep.h"
+#include "sysemu/replay.h"
+#include "block/aio.h"
+
+bool replay_events_enabled(void)
+{
+    return false;
+}
+
+int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
+{
+    abort();
+    return 0;
+}
+
+int64_t replay_read_clock(unsigned int kind)
+{
+    abort();
+    return 0;
+}
+
+uint64_t replay_get_current_icount(void)
+{
+    return 0;
+}
+
+void replay_bh_schedule_event(QEMUBH *bh)
+{
+    qemu_bh_schedule(bh);
+}
+
+void replay_bh_schedule_oneshot_event(AioContext *ctx,
+     QEMUBHFunc *cb, void *opaque)
+{
+    aio_bh_schedule_oneshot(ctx, cb, opaque);
+}
+
+bool replay_checkpoint(ReplayCheckpoint checkpoint)
+{
+    return true;
+}
+
+void replay_mutex_lock(void)
+{
+}
+
+void replay_mutex_unlock(void)
+{
+}
+
+void replay_register_char_driver(Chardev *chr)
+{
+}
+
+void replay_chr_be_write(Chardev *s, uint8_t *buf, int len)
+{
+    abort();
+}
+
+void replay_char_write_event_save(int res, int offset)
+{
+    abort();
+}
+
+void replay_char_write_event_load(int *res, int *offset)
+{
+    abort();
+}
+
+int replay_char_read_all_load(uint8_t *buf)
+{
+    abort();
+}
+
+void replay_char_read_all_save_error(int res)
+{
+    abort();
+}
+
+void replay_char_read_all_save_buf(uint8_t *buf, int offset)
+{
+    abort();
+}
diff --git a/stubs/replay.c b/stubs/replay.c
new file mode 100644
index 000000000..9d5b4be33
--- /dev/null
+++ b/stubs/replay.c
@@ -0,0 +1,27 @@
+#include "qemu/osdep.h"
+#include "sysemu/replay.h"
+
+ReplayMode replay_mode;
+
+void replay_finish(void)
+{
+}
+
+void replay_save_random(int ret, void *buf, size_t len)
+{
+}
+
+int replay_read_random(void *buf, size_t len)
+{
+    return 0;
+}
+
+bool replay_reverse_step(void)
+{
+    return false;
+}
+
+bool replay_reverse_continue(void)
+{
+    return false;
+}
diff --git a/stubs/runstate-check.c b/stubs/runstate-check.c
new file mode 100644
index 000000000..2ccda2b70
--- /dev/null
+++ b/stubs/runstate-check.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+
+#include "sysemu/runstate.h"
+bool runstate_check(RunState state)
+{
+    return state == RUN_STATE_PRELAUNCH;
+}
diff --git a/stubs/semihost.c b/stubs/semihost.c
new file mode 100644
index 000000000..1d8b37f7b
--- /dev/null
+++ b/stubs/semihost.c
@@ -0,0 +1,75 @@
+/*
+ * Semihosting Stubs for SoftMMU
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * Stubs for SoftMMU targets that don't actually do semihosting.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+#include "hw/semihosting/semihost.h"
+#include "sysemu/sysemu.h"
+
+/* Empty config */
+QemuOptsList qemu_semihosting_config_opts = {
+    .name = "",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_semihosting_config_opts.head),
+    .desc = {
+        { /* end of list */ }
+    },
+};
+
+/* Queries to config status default to off */
+bool semihosting_enabled(void)
+{
+    return false;
+}
+
+SemihostingTarget semihosting_get_target(void)
+{
+    return SEMIHOSTING_TARGET_AUTO;
+}
+
+/*
+ * All the rest are empty subs. We could g_assert_not_reached() but
+ * that adds extra weight to the final binary. Waste not want not.
+ */
+void qemu_semihosting_enable(void)
+{
+}
+
+int qemu_semihosting_config_options(const char *optarg)
+{
+    return 1;
+}
+
+const char *semihosting_get_arg(int i)
+{
+    return NULL;
+}
+
+int semihosting_get_argc(void)
+{
+    return 0;
+}
+
+const char *semihosting_get_cmdline(void)
+{
+    return NULL;
+}
+
+void semihosting_arg_fallback(const char *file, const char *cmd)
+{
+}
+
+void qemu_semihosting_connect_chardevs(void)
+{
+}
+
+void qemu_semihosting_console_init(void)
+{
+}
diff --git a/stubs/set-fd-handler.c b/stubs/set-fd-handler.c
new file mode 100644
index 000000000..bff7e0a45
--- /dev/null
+++ b/stubs/set-fd-handler.c
@@ -0,0 +1,10 @@
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+void qemu_set_fd_handler(int fd,
+                         IOHandler *fd_read,
+                         IOHandler *fd_write,
+                         void *opaque)
+{
+    abort();
+}
diff --git a/stubs/sysbus.c b/stubs/sysbus.c
new file mode 100644
index 000000000..d8da90caa
--- /dev/null
+++ b/stubs/sysbus.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+
+BusState *sysbus_get_default(void)
+{
+    return NULL;
+}
diff --git a/stubs/target-get-monitor-def.c b/stubs/target-get-monitor-def.c
new file mode 100644
index 000000000..394e0f9a7
--- /dev/null
+++ b/stubs/target-get-monitor-def.c
@@ -0,0 +1,29 @@
+/*
+ *  Stub for target_get_monitor_def.
+ *
+ *  Copyright IBM Corp., 2015
+ *
+ *  Author: Alexey Kardashevskiy 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+
+int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval);
+
+int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval)
+{
+    return -1;
+}
diff --git a/stubs/target-monitor-defs.c b/stubs/target-monitor-defs.c
new file mode 100644
index 000000000..ac07b1906
--- /dev/null
+++ b/stubs/target-monitor-defs.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+
+const MonitorDef *target_monitor_defs(void);
+
+const MonitorDef *target_monitor_defs(void)
+{
+    return NULL;
+}
diff --git a/stubs/tpm.c b/stubs/tpm.c
new file mode 100644
index 000000000..9bded191d
--- /dev/null
+++ b/stubs/tpm.c
@@ -0,0 +1,39 @@
+/*
+ * TPM stubs
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "sysemu/tpm.h"
+#include "hw/acpi/tpm.h"
+
+int tpm_init(void)
+{
+    return 0;
+}
+
+void tpm_cleanup(void)
+{
+}
+
+TPMInfoList *qmp_query_tpm(Error **errp)
+{
+    return NULL;
+}
+
+TpmTypeList *qmp_query_tpm_types(Error **errp)
+{
+    return NULL;
+}
+
+TpmModelList *qmp_query_tpm_models(Error **errp)
+{
+    return NULL;
+}
+
+void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev)
+{
+}
diff --git a/stubs/trace-control.c b/stubs/trace-control.c
new file mode 100644
index 000000000..7f856e5c2
--- /dev/null
+++ b/stubs/trace-control.c
@@ -0,0 +1,51 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2014-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace/control.h"
+
+
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state)
+{
+    trace_event_set_state_dynamic(ev, state);
+}
+
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
+{
+    bool state_pre;
+    assert(trace_event_get_state_static(ev));
+
+    /*
+     * We ignore the "vcpu" property here, since there's no target code. Then
+     * dstate can only be 1 or 0.
+     */
+    state_pre = *(ev->dstate);
+    if (state_pre != state) {
+        if (state) {
+            trace_events_enabled_count++;
+            *(ev->dstate) = 1;
+        } else {
+            trace_events_enabled_count--;
+            *(ev->dstate) = 0;
+        }
+    }
+}
+
+void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
+                                        TraceEvent *ev, bool state)
+{
+    /* should never be called on non-target binaries */
+    abort();
+}
+
+void trace_init_vcpu(CPUState *vcpu)
+{
+    /* should never be called on non-target binaries */
+    abort();
+}
diff --git a/stubs/uuid.c b/stubs/uuid.c
new file mode 100644
index 000000000..e5112eb3f
--- /dev/null
+++ b/stubs/uuid.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qemu/uuid.h"
+
+UuidInfo *qmp_query_uuid(Error **errp)
+{
+    UuidInfo *info = g_malloc0(sizeof(*info));
+
+    info->UUID = g_strdup(UUID_NONE);
+    return info;
+}
+
diff --git a/stubs/vm-stop.c b/stubs/vm-stop.c
new file mode 100644
index 000000000..7f8a9da8a
--- /dev/null
+++ b/stubs/vm-stop.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+
+#include "sysemu/runstate.h"
+void qemu_system_vmstop_request_prepare(void)
+{
+    abort();
+}
+
+void qemu_system_vmstop_request(RunState state)
+{
+    abort();
+}
diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
new file mode 100644
index 000000000..bfad656c6
--- /dev/null
+++ b/stubs/vmgenid.c
@@ -0,0 +1,10 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qerror.h"
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
new file mode 100644
index 000000000..cc4fe41df
--- /dev/null
+++ b/stubs/vmstate.c
@@ -0,0 +1,25 @@
+#include "qemu/osdep.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_dummy = {};
+
+int vmstate_register_with_alias_id(VMStateIf *obj,
+                                   uint32_t instance_id,
+                                   const VMStateDescription *vmsd,
+                                   void *base, int alias_id,
+                                   int required_for_version,
+                                   Error **errp)
+{
+    return 0;
+}
+
+void vmstate_unregister(VMStateIf *obj,
+                        const VMStateDescription *vmsd,
+                        void *opaque)
+{
+}
+
+bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
+{
+    return true;
+}
diff --git a/stubs/win32-kbd-hook.c b/stubs/win32-kbd-hook.c
new file mode 100644
index 000000000..1a084b081
--- /dev/null
+++ b/stubs/win32-kbd-hook.c
@@ -0,0 +1,18 @@
+/*
+ * Win32 keyboard hook stubs
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "ui/win32-kbd-hook.h"
+
+void win32_kbd_set_window(void *hwnd)
+{
+}
+
+void win32_kbd_set_grab(bool grab)
+{
+}
diff --git a/stubs/xen-hw-stub.c b/stubs/xen-hw-stub.c
new file mode 100644
index 000000000..15f3921a7
--- /dev/null
+++ b/stubs/xen-hw-stub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014       Citrix Systems UK Ltd.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xen/xen.h"
+#include "hw/xen/xen-x86.h"
+
+int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+{
+    return -1;
+}
+
+void xen_piix3_set_irq(void *opaque, int irq_num, int level)
+{
+}
+
+void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
+{
+}
+
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
+{
+}
+
+int xen_is_pirq_msi(uint32_t msi_data)
+{
+    return 0;
+}
+
+qemu_irq *xen_interrupt_controller_init(void)
+{
+    return NULL;
+}
+
+void xen_register_framebuffer(MemoryRegion *mr)
+{
+}
+
+void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
+{
+}
diff --git a/trace-events b/trace-events
new file mode 100644
index 000000000..ac7cef933
--- /dev/null
+++ b/trace-events
@@ -0,0 +1,157 @@
+# See docs/devel/tracing.txt for syntax documentation.
+#
+# This file is processed by the tracetool script during the build.
+#
+# To add a new trace event:
+#
+# 1. Choose a name for the trace event.  Declare its arguments and format
+#    string.
+#
+# 2. Call the trace event from code using trace_##name, e.g. multiwrite_cb() ->
+#    trace_multiwrite_cb().  The source file must #include "trace.h".
+#
+# Format of a trace event:
+#
+# [disable] ( [,  ] ...) ""
+#
+# Example: g_malloc(size_t size) "size %zu"
+#
+# The "disable" keyword will build without the trace event.
+#
+# The  must be a valid as a C function name.
+#
+# Types should be standard C types.  Use void * for pointers because the trace
+# system may not have the necessary headers included.
+#
+# The  should be a sprintf()-compatible format string.
+
+# dma-helpers.c
+dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
+dma_aio_cancel(void *dbs) "dbs=%p"
+dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
+dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
+dma_map_wait(void *dbs) "dbs=%p"
+
+# exec.c
+find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64
+find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
+ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
+
+# accel/tcg/cputlb.c
+memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
+memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
+
+# gdbstub.c
+gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
+gdbstub_op_exiting(uint8_t code) "notifying exit with code=0x%02x"
+gdbstub_op_continue(void) "Continuing all CPUs"
+gdbstub_op_continue_cpu(int cpu_index) "Continuing CPU %d"
+gdbstub_op_stepping(int cpu_index) "Stepping CPU %d"
+gdbstub_op_extra_info(const char *info) "Thread extra info: %s"
+gdbstub_hit_watchpoint(const char *type, int cpu_gdb_index, uint64_t vaddr) "Watchpoint hit, type=\"%s\" cpu=%d, vaddr=0x%" PRIx64 ""
+gdbstub_hit_internal_error(void) "RUN_STATE_INTERNAL_ERROR"
+gdbstub_hit_break(void) "RUN_STATE_DEBUG"
+gdbstub_hit_paused(void) "RUN_STATE_PAUSED"
+gdbstub_hit_shutdown(void) "RUN_STATE_SHUTDOWN"
+gdbstub_hit_io_error(void) "RUN_STATE_IO_ERROR"
+gdbstub_hit_watchdog(void) "RUN_STATE_WATCHDOG"
+gdbstub_hit_unknown(int state) "Unknown run state=0x%x"
+gdbstub_io_reply(const char *message) "Sent: %s"
+gdbstub_io_binaryreply(size_t ofs, const char *line) "0x%04zx: %s"
+gdbstub_io_command(const char *command) "Received: %s"
+gdbstub_io_got_ack(void) "Got ACK"
+gdbstub_io_got_unexpected(uint8_t ch) "Got 0x%02x when expecting ACK/NACK"
+gdbstub_err_got_nack(void) "Got NACK, retransmitting"
+gdbstub_err_garbage(uint8_t ch) "received garbage between packets: 0x%02x"
+gdbstub_err_overrun(void) "command buffer overrun, dropping command"
+gdbstub_err_invalid_repeat(uint8_t ch) "got invalid RLE count: 0x%02x"
+gdbstub_err_invalid_rle(void) "got invalid RLE sequence"
+gdbstub_err_checksum_invalid(uint8_t ch) "got invalid command checksum digit: 0x%02x"
+gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packet with incorrect checksum, expected=0x%02x, received=0x%02x"
+
+# job.c
+job_state_transition(void *job,  int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)"
+job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)"
+job_completed(void *job, int ret) "job %p ret %d"
+
+# job-qmp.c
+qmp_job_cancel(void *job) "job %p"
+qmp_job_pause(void *job) "job %p"
+qmp_job_resume(void *job) "job %p"
+qmp_job_complete(void *job) "job %p"
+qmp_job_finalize(void *job) "job %p"
+qmp_job_dismiss(void *job) "job %p"
+
+
+### Guest events, keep at bottom
+
+
+## vCPU
+
+# trace/control-target.c
+
+# Hot-plug a new virtual (guest) CPU
+#
+# Mode: user, softmmu
+# Targets: all
+vcpu guest_cpu_enter(void)
+
+# trace/control.c
+
+# Hot-unplug a virtual (guest) CPU
+#
+# Mode: user, softmmu
+# Targets: all
+vcpu guest_cpu_exit(void)
+
+# hw/core/cpu.c
+
+# Reset the state of a virtual (guest) CPU
+#
+# Mode: user, softmmu
+# Targets: all
+vcpu guest_cpu_reset(void)
+
+# tcg/tcg-op.c
+
+# @vaddr: Access' virtual address.
+# @info : Access' information (see below).
+#
+# Start virtual memory access (before any potential access violation).
+#
+# Does not include memory accesses performed by devices.
+#
+# Access information can be parsed as:
+#
+# struct mem_info {
+#     uint8_t size_shift : 4; /* interpreted as "1 << size_shift" bytes */
+#     bool    sign_extend: 1; /* sign-extended */
+#     uint8_t endianness : 1; /* 0: little, 1: big */
+#     bool    store      : 1; /* whether it is a store operation */
+#             pad        : 1;
+#     uint8_t mmuidx     : 4; /* mmuidx (softmmu only)  */
+# };
+#
+# Mode: user, softmmu
+# Targets: TCG(all)
+vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
+
+# include/user/syscall-trace.h
+
+# @num: System call number.
+# @arg*: System call argument value.
+#
+# Start executing a guest system call in syscall emulation mode.
+#
+# Mode: user
+# Targets: TCG(all)
+vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
+
+# @num: System call number.
+# @ret: System call result value.
+#
+# Finish executing a guest system call in syscall emulation mode.
+#
+# Mode: user
+# Targets: TCG(all)
+vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64
diff --git a/trace/control-internal.h b/trace/control-internal.h
new file mode 100644
index 000000000..8b2b50a7c
--- /dev/null
+++ b/trace/control-internal.h
@@ -0,0 +1,61 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2011-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__CONTROL_INTERNAL_H
+#define TRACE__CONTROL_INTERNAL_H
+
+extern int trace_events_enabled_count;
+
+
+static inline bool trace_event_is_pattern(const char *str)
+{
+    assert(str != NULL);
+    return strchr(str, '*') != NULL;
+}
+
+static inline uint32_t trace_event_get_id(TraceEvent *ev)
+{
+    assert(ev != NULL);
+    return ev->id;
+}
+
+static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev)
+{
+    return ev->vcpu_id;
+}
+
+static inline bool trace_event_is_vcpu(TraceEvent *ev)
+{
+    return ev->vcpu_id != TRACE_VCPU_EVENT_NONE;
+}
+
+static inline const char * trace_event_get_name(TraceEvent *ev)
+{
+    assert(ev != NULL);
+    return ev->name;
+}
+
+static inline bool trace_event_get_state_static(TraceEvent *ev)
+{
+    assert(ev != NULL);
+    return ev->sstate;
+}
+
+/* it's on fast path, avoid consistency checks (asserts) */
+#define trace_event_get_state_dynamic_by_id(id) \
+    (unlikely(trace_events_enabled_count) && _ ## id ## _DSTATE)
+
+static inline bool trace_event_get_state_dynamic(TraceEvent *ev)
+{
+    return unlikely(trace_events_enabled_count) && *ev->dstate;
+}
+
+void trace_event_register_group(TraceEvent **events);
+
+#endif /* TRACE__CONTROL_INTERNAL_H */
diff --git a/trace/control-target.c b/trace/control-target.c
new file mode 100644
index 000000000..e293eeed7
--- /dev/null
+++ b/trace/control-target.c
@@ -0,0 +1,149 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2014-2017 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "trace/trace-root.h"
+#include "trace/control.h"
+
+
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state)
+{
+    bool state_pre;
+    assert(trace_event_get_state_static(ev));
+    /*
+     * We ignore the "vcpu" property here, since no vCPUs have been created
+     * yet. Then dstate can only be 1 or 0.
+     */
+    state_pre = *ev->dstate;
+    if (state_pre != state) {
+        if (state) {
+            trace_events_enabled_count++;
+            *ev->dstate = 1;
+        } else {
+            trace_events_enabled_count--;
+            *ev->dstate = 0;
+        }
+    }
+}
+
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
+{
+    CPUState *vcpu;
+    assert(trace_event_get_state_static(ev));
+    if (trace_event_is_vcpu(ev) && likely(first_cpu != NULL)) {
+        CPU_FOREACH(vcpu) {
+            trace_event_set_vcpu_state_dynamic(vcpu, ev, state);
+        }
+    } else {
+        /*
+         * Without the "vcpu" property, dstate can only be 1 or 0. With it, we
+         * haven't instantiated any vCPU yet, so we will set a global state
+         * instead, and trace_init_vcpu will reconcile it afterwards.
+         */
+        bool state_pre = *ev->dstate;
+        if (state_pre != state) {
+            if (state) {
+                trace_events_enabled_count++;
+                *ev->dstate = 1;
+            } else {
+                trace_events_enabled_count--;
+                *ev->dstate = 0;
+            }
+        }
+    }
+}
+
+static void trace_event_synchronize_vcpu_state_dynamic(
+    CPUState *vcpu, run_on_cpu_data ignored)
+{
+    bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
+                CPU_TRACE_DSTATE_MAX_EVENTS);
+    cpu_tb_jmp_cache_clear(vcpu);
+}
+
+void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
+                                        TraceEvent *ev, bool state)
+{
+    uint32_t vcpu_id;
+    bool state_pre;
+    assert(trace_event_get_state_static(ev));
+    assert(trace_event_is_vcpu(ev));
+    vcpu_id = trace_event_get_vcpu_id(ev);
+    state_pre = test_bit(vcpu_id, vcpu->trace_dstate);
+    if (state_pre != state) {
+        if (state) {
+            trace_events_enabled_count++;
+            set_bit(vcpu_id, vcpu->trace_dstate_delayed);
+            (*ev->dstate)++;
+        } else {
+            trace_events_enabled_count--;
+            clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
+            (*ev->dstate)--;
+        }
+        if (vcpu->created) {
+            /*
+             * Delay changes until next TB; we want all TBs to be built from a
+             * single set of dstate values to ensure consistency of generated
+             * tracing code.
+             */
+            async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic,
+                             RUN_ON_CPU_NULL);
+        } else {
+            trace_event_synchronize_vcpu_state_dynamic(vcpu, RUN_ON_CPU_NULL);
+        }
+    }
+}
+
+static bool adding_first_cpu1(void)
+{
+    CPUState *cpu;
+    size_t count = 0;
+    CPU_FOREACH(cpu) {
+        count++;
+        if (count > 1) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static bool adding_first_cpu(void)
+{
+    bool res;
+    cpu_list_lock();
+    res = adding_first_cpu1();
+    cpu_list_unlock();
+    return res;
+}
+
+void trace_init_vcpu(CPUState *vcpu)
+{
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (trace_event_is_vcpu(ev) &&
+            trace_event_get_state_static(ev) &&
+            trace_event_get_state_dynamic(ev)) {
+            if (adding_first_cpu()) {
+                /* check preconditions */
+                assert(*ev->dstate == 1);
+                /* disable early-init state ... */
+                *ev->dstate = 0;
+                trace_events_enabled_count--;
+                /* ... and properly re-enable */
+                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
+            } else {
+                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
+            }
+        }
+    }
+    trace_guest_cpu_enter(vcpu);
+}
diff --git a/trace/control-vcpu.h b/trace/control-vcpu.h
new file mode 100644
index 000000000..0f98ebe7b
--- /dev/null
+++ b/trace/control-vcpu.h
@@ -0,0 +1,63 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2011-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__CONTROL_VCPU_H
+#define TRACE__CONTROL_VCPU_H
+
+#include "control.h"
+#include "event-internal.h"
+#include "hw/core/cpu.h"
+
+/**
+ * trace_event_get_vcpu_state:
+ * @vcpu: Target vCPU.
+ * @id: Event identifier name.
+ *
+ * Get the tracing state of an event (both static and dynamic) for the given
+ * vCPU.
+ *
+ * If the event has the disabled property, the check will have no performance
+ * impact.
+ */
+#define trace_event_get_vcpu_state(vcpu, id)                            \
+    ((id ##_ENABLED) &&                                                 \
+     trace_event_get_vcpu_state_dynamic_by_vcpu_id(                     \
+         vcpu, _ ## id ## _EVENT.vcpu_id))
+
+/**
+ * trace_event_get_vcpu_state_dynamic:
+ *
+ * Get the dynamic tracing state of an event for the given vCPU.
+ */
+static bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev);
+
+#include "control-internal.h"
+
+static inline bool
+trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
+                                              uint32_t vcpu_id)
+{
+    /* it's on fast path, avoid consistency checks (asserts) */
+    if (unlikely(trace_events_enabled_count)) {
+        return test_bit(vcpu_id, vcpu->trace_dstate);
+    } else {
+        return false;
+    }
+}
+
+static inline bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu,
+                                                      TraceEvent *ev)
+{
+    uint32_t vcpu_id;
+    assert(trace_event_is_vcpu(ev));
+    vcpu_id = trace_event_get_vcpu_id(ev);
+    return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id);
+}
+
+#endif
diff --git a/trace/control.c b/trace/control.c
new file mode 100644
index 000000000..b82fb8731
--- /dev/null
+++ b/trace/control.c
@@ -0,0 +1,312 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2011-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace/control.h"
+#include "qemu/help_option.h"
+#include "qemu/option.h"
+#ifdef CONFIG_TRACE_SIMPLE
+#include "trace/simple.h"
+#endif
+#ifdef CONFIG_TRACE_FTRACE
+#include "trace/ftrace.h"
+#endif
+#ifdef CONFIG_TRACE_LOG
+#include "qemu/log.h"
+#endif
+#ifdef CONFIG_TRACE_SYSLOG
+#include 
+#endif
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "monitor/monitor.h"
+#include "trace/trace-root.h"
+
+int trace_events_enabled_count;
+
+typedef struct TraceEventGroup {
+    TraceEvent **events;
+} TraceEventGroup;
+
+static TraceEventGroup *event_groups;
+static size_t nevent_groups;
+static uint32_t next_id;
+static uint32_t next_vcpu_id;
+static bool init_trace_on_startup;
+
+QemuOptsList qemu_trace_opts = {
+    .name = "trace",
+    .implied_opt_name = "enable",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_trace_opts.head),
+    .desc = {
+        {
+            .name = "enable",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "events",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+
+void trace_event_register_group(TraceEvent **events)
+{
+    size_t i;
+    for (i = 0; events[i] != NULL; i++) {
+        events[i]->id = next_id++;
+        if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) {
+            continue;
+        }
+
+        if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) {
+            events[i]->vcpu_id = next_vcpu_id++;
+        } else {
+            warn_report("too many vcpu trace events; dropping '%s'",
+                        events[i]->name);
+        }
+    }
+    event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
+    event_groups[nevent_groups].events = events;
+    nevent_groups++;
+}
+
+
+TraceEvent *trace_event_name(const char *name)
+{
+    assert(name != NULL);
+
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (strcmp(trace_event_get_name(ev), name) == 0) {
+            return ev;
+        }
+    }
+    return NULL;
+}
+
+void trace_event_iter_init(TraceEventIter *iter, const char *pattern)
+{
+    iter->event = 0;
+    iter->group = 0;
+    iter->pattern = pattern;
+}
+
+TraceEvent *trace_event_iter_next(TraceEventIter *iter)
+{
+    while (iter->group < nevent_groups &&
+           event_groups[iter->group].events[iter->event] != NULL) {
+        TraceEvent *ev = event_groups[iter->group].events[iter->event];
+        iter->event++;
+        if (event_groups[iter->group].events[iter->event] == NULL) {
+            iter->event = 0;
+            iter->group++;
+        }
+        if (!iter->pattern ||
+            g_pattern_match_simple(iter->pattern, trace_event_get_name(ev))) {
+            return ev;
+        }
+    }
+
+    return NULL;
+}
+
+void trace_list_events(void)
+{
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        fprintf(stderr, "%s\n", trace_event_get_name(ev));
+    }
+#ifdef CONFIG_TRACE_DTRACE
+    fprintf(stderr, "This list of names of trace points may be incomplete "
+                    "when using the DTrace/SystemTap backends.\n"
+                    "Run 'qemu-trace-stap list %s' to print the full list.\n",
+            error_get_progname());
+#endif
+}
+
+static void do_trace_enable_events(const char *line_buf)
+{
+    const bool enable = ('-' != line_buf[0]);
+    const char *line_ptr = enable ? line_buf : line_buf + 1;
+    TraceEventIter iter;
+    TraceEvent *ev;
+    bool is_pattern = trace_event_is_pattern(line_ptr);
+
+    trace_event_iter_init(&iter, line_ptr);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (!trace_event_get_state_static(ev)) {
+            if (!is_pattern) {
+                warn_report("trace event '%s' is not traceable",
+                            line_ptr);
+                return;
+            }
+            continue;
+        }
+
+        /* start tracing */
+        trace_event_set_state_dynamic(ev, enable);
+        if (!is_pattern) {
+            return;
+        }
+    }
+
+    if (!is_pattern) {
+        warn_report("trace event '%s' does not exist",
+                    line_ptr);
+    }
+}
+
+void trace_enable_events(const char *line_buf)
+{
+    if (is_help_option(line_buf)) {
+        trace_list_events();
+        if (monitor_cur() == NULL) {
+            exit(0);
+        }
+    } else {
+        do_trace_enable_events(line_buf);
+    }
+}
+
+static void trace_init_events(const char *fname)
+{
+    Location loc;
+    FILE *fp;
+    char line_buf[1024];
+    size_t line_idx = 0;
+
+    if (fname == NULL) {
+        return;
+    }
+
+    loc_push_none(&loc);
+    loc_set_file(fname, 0);
+    fp = fopen(fname, "r");
+    if (!fp) {
+        error_report("%s", strerror(errno));
+        exit(1);
+    }
+    while (fgets(line_buf, sizeof(line_buf), fp)) {
+        loc_set_file(fname, ++line_idx);
+        size_t len = strlen(line_buf);
+        if (len > 1) {              /* skip empty lines */
+            line_buf[len - 1] = '\0';
+            if ('#' == line_buf[0]) { /* skip commented lines */
+                continue;
+            }
+            trace_enable_events(line_buf);
+        }
+    }
+    if (fclose(fp) != 0) {
+        loc_set_file(fname, 0);
+        error_report("%s", strerror(errno));
+        exit(1);
+    }
+    loc_pop(&loc);
+}
+
+void trace_init_file(void)
+{
+    QemuOpts *opts = qemu_find_opts_singleton("trace");
+    const char *file = qemu_opt_get(opts, "file");
+#ifdef CONFIG_TRACE_SIMPLE
+    st_set_trace_file(file);
+    if (init_trace_on_startup) {
+        st_set_trace_file_enabled(true);
+    }
+#elif defined CONFIG_TRACE_LOG
+    /*
+     * If both the simple and the log backends are enabled, "--trace file"
+     * only applies to the simple backend; use "-D" for the log
+     * backend. However we should only override -D if we actually have
+     * something to override it with.
+     */
+    if (file) {
+        qemu_set_log_filename(file, &error_fatal);
+    }
+#else
+    if (file) {
+        fprintf(stderr, "error: --trace file=...: "
+                "option not supported by the selected tracing backends\n");
+        exit(1);
+    }
+#endif
+}
+
+void trace_fini_vcpu(CPUState *vcpu)
+{
+    TraceEventIter iter;
+    TraceEvent *ev;
+
+    trace_guest_cpu_exit(vcpu);
+
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (trace_event_is_vcpu(ev) &&
+            trace_event_get_state_static(ev) &&
+            trace_event_get_vcpu_state_dynamic(vcpu, ev)) {
+            /* must disable to affect the global counter */
+            trace_event_set_vcpu_state_dynamic(vcpu, ev, false);
+        }
+    }
+}
+
+bool trace_init_backends(void)
+{
+#ifdef CONFIG_TRACE_SIMPLE
+    if (!st_init()) {
+        fprintf(stderr, "failed to initialize simple tracing backend.\n");
+        return false;
+    }
+#endif
+
+#ifdef CONFIG_TRACE_FTRACE
+    if (!ftrace_init()) {
+        fprintf(stderr, "failed to initialize ftrace backend.\n");
+        return false;
+    }
+#endif
+
+#ifdef CONFIG_TRACE_SYSLOG
+    openlog(NULL, LOG_PID, LOG_DAEMON);
+#endif
+
+    return true;
+}
+
+void trace_opt_parse(const char *optarg)
+{
+    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("trace"),
+                                             optarg, true);
+    if (!opts) {
+        exit(1);
+    }
+    if (qemu_opt_get(opts, "enable")) {
+        trace_enable_events(qemu_opt_get(opts, "enable"));
+    }
+    trace_init_events(qemu_opt_get(opts, "events"));
+    init_trace_on_startup = true;
+    qemu_opts_del(opts);
+}
+
+uint32_t trace_get_vcpu_event_count(void)
+{
+    return next_vcpu_id;
+}
diff --git a/trace/control.h b/trace/control.h
new file mode 100644
index 000000000..05b95ea45
--- /dev/null
+++ b/trace/control.h
@@ -0,0 +1,241 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2011-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__CONTROL_H
+#define TRACE__CONTROL_H
+
+#include "event-internal.h"
+
+typedef struct TraceEventIter {
+    size_t event;
+    size_t group;
+    const char *pattern;
+} TraceEventIter;
+
+
+/**
+ * trace_event_iter_init:
+ * @iter: the event iterator struct
+ * @pattern: optional pattern to filter events on name
+ *
+ * Initialize the event iterator struct @iter,
+ * optionally using @pattern to filter out events
+ * with non-matching names.
+ */
+void trace_event_iter_init(TraceEventIter *iter, const char *pattern);
+
+/**
+ * trace_event_iter_next:
+ * @iter: the event iterator struct
+ *
+ * Get the next event, if any. When this returns NULL,
+ * the iterator should no longer be used.
+ *
+ * Returns: the next event, or NULL if no more events exist
+ */
+TraceEvent *trace_event_iter_next(TraceEventIter *iter);
+
+
+/**
+ * trace_event_name:
+ * @id: Event name.
+ *
+ * Search an event by its name.
+ *
+ * Returns: pointer to #TraceEvent or NULL if not found.
+ */
+TraceEvent *trace_event_name(const char *name);
+
+/**
+ * trace_event_is_pattern:
+ *
+ * Whether the given string is an event name pattern.
+ */
+static bool trace_event_is_pattern(const char *str);
+
+
+/**
+ * trace_event_get_id:
+ *
+ * Get the identifier of an event.
+ */
+static uint32_t trace_event_get_id(TraceEvent *ev);
+
+/**
+ * trace_event_get_vcpu_id:
+ *
+ * Get the per-vCPU identifier of an event.
+ *
+ * Special value #TRACE_VCPU_EVENT_NONE means the event is not vCPU-specific
+ * (does not have the "vcpu" property).
+ */
+static uint32_t trace_event_get_vcpu_id(TraceEvent *ev);
+
+/**
+ * trace_event_is_vcpu:
+ *
+ * Whether this is a per-vCPU event.
+ */
+static bool trace_event_is_vcpu(TraceEvent *ev);
+
+/**
+ * trace_event_get_name:
+ *
+ * Get the name of an event.
+ */
+static const char * trace_event_get_name(TraceEvent *ev);
+
+/**
+ * trace_event_get_state:
+ * @id: Event identifier name.
+ *
+ * Get the tracing state of an event, both static and the QEMU dynamic state.
+ *
+ * If the event has the disabled property, the check will have no performance
+ * impact.
+ */
+#define trace_event_get_state(id)                       \
+    ((id ##_ENABLED) && trace_event_get_state_dynamic_by_id(id))
+
+/**
+ * trace_event_get_state_backends:
+ * @id: Event identifier name.
+ *
+ * Get the tracing state of an event, both static and dynamic state from all
+ * compiled-in backends.
+ *
+ * If the event has the disabled property, the check will have no performance
+ * impact.
+ *
+ * Returns: true if at least one backend has the event enabled and the event
+ * does not have the disabled property.
+ */
+#define trace_event_get_state_backends(id)              \
+    ((id ##_ENABLED) && id ##_BACKEND_DSTATE())
+
+/**
+ * trace_event_get_state_static:
+ * @id: Event identifier.
+ *
+ * Get the static tracing state of an event.
+ *
+ * Use the define 'TRACE_${EVENT_NAME}_ENABLED' for compile-time checks (it will
+ * be set to 1 or 0 according to the presence of the disabled property).
+ */
+static bool trace_event_get_state_static(TraceEvent *ev);
+
+/**
+ * trace_event_get_state_dynamic:
+ *
+ * Get the dynamic tracing state of an event.
+ *
+ * If the event has the 'vcpu' property, gets the OR'ed state of all vCPUs.
+ */
+static bool trace_event_get_state_dynamic(TraceEvent *ev);
+
+/**
+ * trace_event_set_state_dynamic:
+ *
+ * Set the dynamic tracing state of an event.
+ *
+ * If the event has the 'vcpu' property, sets the state on all vCPUs.
+ *
+ * Pre-condition: trace_event_get_state_static(ev) == true
+ */
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state);
+
+/**
+ * trace_event_set_vcpu_state_dynamic:
+ *
+ * Set the dynamic tracing state of an event for the given vCPU.
+ *
+ * Pre-condition: trace_event_get_vcpu_state_static(ev) == true
+ *
+ * Note: Changes for execution-time events with the 'tcg' property will not be
+ *       propagated until the next TB is executed (iff executing in TCG mode).
+ */
+void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
+                                        TraceEvent *ev, bool state);
+
+
+
+/**
+ * trace_init_backends:
+ *
+ * Initialize the tracing backend.
+ *
+ * Returns: Whether the backends could be successfully initialized.
+ */
+bool trace_init_backends(void);
+
+/**
+ * trace_init_file:
+ *
+ * Record the name of the output file for the tracing backend.
+ * Exits if no selected backend does not support specifying the
+ * output file, and a file was specified with "-trace file=...".
+ */
+void trace_init_file(void);
+
+/**
+ * trace_init_vcpu:
+ * @vcpu: Added vCPU.
+ *
+ * Set initial dynamic event state for a hot-plugged vCPU.
+ */
+void trace_init_vcpu(CPUState *vcpu);
+
+/**
+ * trace_fini_vcpu:
+ * @vcpu: Removed vCPU.
+ *
+ * Disable dynamic event state for a hot-unplugged vCPU.
+ */
+void trace_fini_vcpu(CPUState *vcpu);
+
+/**
+ * trace_list_events:
+ *
+ * List all available events.
+ */
+void trace_list_events(void);
+
+/**
+ * trace_enable_events:
+ * @line_buf: A string with a glob pattern of events to be enabled or,
+ *            if the string starts with '-', disabled.
+ *
+ * Enable or disable matching events.
+ */
+void trace_enable_events(const char *line_buf);
+
+/**
+ * Definition of QEMU options describing trace subsystem configuration
+ */
+extern QemuOptsList qemu_trace_opts;
+
+/**
+ * trace_opt_parse:
+ * @optarg: A string argument of --trace command line argument
+ *
+ * Initialize tracing subsystem.
+ */
+void trace_opt_parse(const char *optarg);
+
+/**
+ * trace_get_vcpu_event_count:
+ *
+ * Return the number of known vcpu-specific events
+ */
+uint32_t trace_get_vcpu_event_count(void);
+
+
+#include "control-internal.h"
+
+#endif /* TRACE__CONTROL_H */
diff --git a/trace/event-internal.h b/trace/event-internal.h
new file mode 100644
index 000000000..f63500b37
--- /dev/null
+++ b/trace/event-internal.h
@@ -0,0 +1,44 @@
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2012-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__EVENT_INTERNAL_H
+#define TRACE__EVENT_INTERNAL_H
+
+/*
+ * Special value for TraceEvent.vcpu_id field to indicate
+ * that the event is not VCPU specific
+ */
+#define TRACE_VCPU_EVENT_NONE ((uint32_t)-1)
+
+/**
+ * TraceEvent:
+ * @id: Unique event identifier.
+ * @vcpu_id: Unique per-vCPU event identifier.
+ * @name: Event name.
+ * @sstate: Static tracing state.
+ * @dstate: Dynamic tracing state
+ *
+ * Interpretation of @dstate depends on whether the event has the 'vcpu'
+ *  property:
+ * - false: Boolean value indicating whether the event is active.
+ * - true : Integral counting the number of vCPUs that have this event enabled.
+ *
+ * Opaque generic description of a tracing event.
+ */
+typedef struct TraceEvent {
+    uint32_t id;
+    uint32_t vcpu_id;
+    const char * name;
+    const bool sstate;
+    uint16_t *dstate;
+} TraceEvent;
+
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state);
+
+#endif /* TRACE__EVENT_INTERNAL_H */
diff --git a/trace/ftrace.c b/trace/ftrace.c
new file mode 100644
index 000000000..9749543d9
--- /dev/null
+++ b/trace/ftrace.c
@@ -0,0 +1,95 @@
+/*
+ * Ftrace trace backend
+ *
+ * Copyright (C) 2013 Hitachi, Ltd.
+ * Created by Eiichi Tsukata 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "trace/control.h"
+#include "trace/ftrace.h"
+
+int trace_marker_fd;
+
+static int find_mount(char *mount_point, const char *fstype)
+{
+    char type[100];
+    FILE *fp;
+    int ret = 0;
+
+    fp = fopen("/proc/mounts", "r");
+    if (fp == NULL) {
+        return 0;
+    }
+
+    while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+                  mount_point, type) == 2) {
+        if (strcmp(type, fstype) == 0) {
+            ret = 1;
+            break;
+        }
+    }
+    fclose(fp);
+
+    return ret;
+}
+
+bool ftrace_init(void)
+{
+    char mount_point[PATH_MAX];
+    char path[PATH_MAX];
+    int tracefs_found;
+    int trace_fd = -1;
+    const char *subdir = "";
+
+    tracefs_found = find_mount(mount_point, "tracefs");
+    if (!tracefs_found) {
+        tracefs_found = find_mount(mount_point, "debugfs");
+        subdir = "/tracing";
+    }
+
+    if (tracefs_found) {
+        if (snprintf(path, PATH_MAX, "%s%s/tracing_on", mount_point, subdir)
+                >= sizeof(path)) {
+            fprintf(stderr, "Using tracefs mountpoint would exceed PATH_MAX\n");
+            return false;
+        }
+        trace_fd = open(path, O_WRONLY);
+        if (trace_fd < 0) {
+            if (errno == EACCES) {
+                trace_marker_fd = open("/dev/null", O_WRONLY);
+                if (trace_marker_fd != -1) {
+                    return true;
+                }
+            }
+            perror("Could not open ftrace 'tracing_on' file");
+            return false;
+        } else {
+            if (write(trace_fd, "1", 1) < 0) {
+                perror("Could not write to 'tracing_on' file");
+                close(trace_fd);
+                return false;
+            }
+            close(trace_fd);
+        }
+        if (snprintf(path, PATH_MAX, "%s%s/trace_marker", mount_point, subdir)
+                >= sizeof(path)) {
+            fprintf(stderr, "Using tracefs mountpoint would exceed PATH_MAX\n");
+            return false;
+        }
+        trace_marker_fd = open(path, O_WRONLY);
+        if (trace_marker_fd < 0) {
+            perror("Could not open ftrace 'trace_marker' file");
+            return false;
+        }
+    } else {
+        fprintf(stderr, "tracefs is not mounted\n");
+        return false;
+    }
+
+    return true;
+}
diff --git a/trace/ftrace.h b/trace/ftrace.h
new file mode 100644
index 000000000..cb5e35d21
--- /dev/null
+++ b/trace/ftrace.h
@@ -0,0 +1,12 @@
+#ifndef TRACE_FTRACE_H
+#define TRACE_FTRACE_H
+
+#define MAX_TRACE_STRLEN 512
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+extern int trace_marker_fd;
+
+bool ftrace_init(void);
+
+#endif /* TRACE_FTRACE_H */
diff --git a/trace/mem-internal.h b/trace/mem-internal.h
new file mode 100644
index 000000000..8b72b678f
--- /dev/null
+++ b/trace/mem-internal.h
@@ -0,0 +1,50 @@
+/*
+ * Helper functions for guest memory tracing
+ *
+ * Copyright (C) 2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__MEM_INTERNAL_H
+#define TRACE__MEM_INTERNAL_H
+
+#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
+#define TRACE_MEM_SE (1ULL << 4)    /* sign extended (y/n) */
+#define TRACE_MEM_BE (1ULL << 5)    /* big endian (y/n) */
+#define TRACE_MEM_ST (1ULL << 6)    /* store (y/n) */
+#define TRACE_MEM_MMU_SHIFT 8       /* mmu idx */
+
+static inline uint16_t trace_mem_build_info(
+    int size_shift, bool sign_extend, MemOp endianness,
+    bool store, unsigned int mmu_idx)
+{
+    uint16_t res;
+
+    res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
+    if (sign_extend) {
+        res |= TRACE_MEM_SE;
+    }
+    if (endianness == MO_BE) {
+        res |= TRACE_MEM_BE;
+    }
+    if (store) {
+        res |= TRACE_MEM_ST;
+    }
+#ifdef CONFIG_SOFTMMU
+    res |= mmu_idx << TRACE_MEM_MMU_SHIFT;
+#endif
+    return res;
+}
+
+static inline uint16_t trace_mem_get_info(MemOp op,
+                                          unsigned int mmu_idx,
+                                          bool store)
+{
+    return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN),
+                                op & MO_BSWAP, store,
+                                mmu_idx);
+}
+
+#endif /* TRACE__MEM_INTERNAL_H */
diff --git a/trace/mem.h b/trace/mem.h
new file mode 100644
index 000000000..9644f592b
--- /dev/null
+++ b/trace/mem.h
@@ -0,0 +1,35 @@
+/*
+ * Helper functions for guest memory tracing
+ *
+ * Copyright (C) 2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TRACE__MEM_H
+#define TRACE__MEM_H
+
+#include "tcg/tcg.h"
+
+
+/**
+ * trace_mem_get_info:
+ *
+ * Return a value for the 'info' argument in guest memory access traces.
+ */
+static uint16_t trace_mem_get_info(MemOp op, unsigned int mmu_idx, bool store);
+
+/**
+ * trace_mem_build_info:
+ *
+ * Return a value for the 'info' argument in guest memory access traces.
+ */
+static uint16_t trace_mem_build_info(int size_shift, bool sign_extend,
+                                     MemOp endianness, bool store,
+                                     unsigned int mmuidx);
+
+
+#include "trace/mem-internal.h"
+
+#endif /* TRACE__MEM_H */
diff --git a/trace/meson.build b/trace/meson.build
new file mode 100644
index 000000000..843ea1449
--- /dev/null
+++ b/trace/meson.build
@@ -0,0 +1,95 @@
+specific_ss.add(files('control-target.c'))
+
+trace_events_files = []
+foreach dir : [ '.' ] + trace_events_subdirs
+  trace_events_file = meson.source_root() / dir / 'trace-events'
+  trace_events_files += [ trace_events_file ]
+  group_name = dir == '.' ? 'root' : dir.underscorify()
+  group = '--group=' + group_name
+  fmt = '@0@-' + group_name + '.@1@'
+
+  trace_h = custom_target(fmt.format('trace', 'h'),
+                          output: fmt.format('trace', 'h'),
+                          input: trace_events_file,
+                          command: [ tracetool, group, '--format=h', '@INPUT@' ],
+                          capture: true)
+  genh += trace_h
+  trace_c = custom_target(fmt.format('trace', 'c'),
+                          output: fmt.format('trace', 'c'),
+                          input: trace_events_file,
+                          command: [ tracetool, group, '--format=c', '@INPUT@' ],
+                          capture: true)
+  if 'CONFIG_TRACE_UST' in config_host
+    trace_ust_h = custom_target(fmt.format('trace-ust', 'h'),
+                                output: fmt.format('trace-ust', 'h'),
+                                input: trace_events_file,
+                                command: [ tracetool, group, '--format=ust-events-h', '@INPUT@' ],
+                                capture: true)
+    trace_ss.add(trace_ust_h, lttng, urcubp)
+    genh += trace_ust_h
+  endif
+  trace_ss.add(trace_h, trace_c)
+  if 'CONFIG_TRACE_DTRACE' in config_host
+    trace_dtrace = custom_target(fmt.format('trace-dtrace', 'dtrace'),
+                                 output: fmt.format('trace-dtrace', 'dtrace'),
+                                 input: trace_events_file,
+                                 command: [ tracetool, group, '--format=d', '@INPUT@' ],
+                                 capture: true)
+    trace_dtrace_h = custom_target(fmt.format('trace-dtrace', 'h'),
+                                   output: fmt.format('trace-dtrace', 'h'),
+                                   input: trace_dtrace,
+                                   command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-h', '-s', '@INPUT@' ])
+    trace_ss.add(trace_dtrace_h)
+    if host_machine.system() != 'darwin'
+      trace_dtrace_o = custom_target(fmt.format('trace-dtrace', 'o'),
+                                     output: fmt.format('trace-dtrace', 'o'),
+                                     input: trace_dtrace,
+                                     command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-G', '-s', '@INPUT@' ])
+      trace_ss.add(trace_dtrace_o)
+    endif
+
+    genh += trace_dtrace_h
+  endif
+endforeach
+
+trace_events_all = custom_target('trace-events-all',
+                                 output: 'trace-events-all',
+                                 input: trace_events_files,
+                                 command: [ 'cat', '@INPUT@' ],
+                                 capture: true,
+                                 install: true,
+                                 install_dir: qemu_datadir)
+
+foreach d : [
+  ['generated-tcg-tracers.h', 'tcg-h'],
+  ['generated-helpers.c', 'tcg-helper-c'],
+  ['generated-helpers.h', 'tcg-helper-h'],
+  ['generated-helpers-wrappers.h', 'tcg-helper-wrapper-h'],
+]
+  gen = custom_target(d[0],
+                output: d[0],
+                input: meson.source_root() / 'trace-events',
+                command: [ tracetool, '--group=root', '--format=@0@'.format(d[1]), '@INPUT@' ],
+                capture: true)
+  specific_ss.add(gen)
+endforeach
+
+if 'CONFIG_TRACE_UST' in config_host
+  trace_ust_all_h = custom_target('trace-ust-all.h',
+                                  output: 'trace-ust-all.h',
+                                  input: trace_events_files,
+                                  command: [ tracetool, '--group=all', '--format=ust-events-h', '@INPUT@' ],
+                                  capture: true)
+  trace_ust_all_c = custom_target('trace-ust-all.c',
+                                  output: 'trace-ust-all.c',
+                                  input: trace_events_files,
+                                  command: [ tracetool, '--group=all', '--format=ust-events-c', '@INPUT@' ],
+                                  capture: true)
+  trace_ss.add(trace_ust_all_h, trace_ust_all_c)
+  genh += trace_ust_all_h
+endif
+
+trace_ss.add(when: 'CONFIG_TRACE_SIMPLE', if_true: files('simple.c'))
+trace_ss.add(when: 'CONFIG_TRACE_FTRACE', if_true: files('ftrace.c'))
+trace_ss.add(files('control.c'))
+trace_ss.add(files('qmp.c'))
diff --git a/trace/qmp.c b/trace/qmp.c
new file mode 100644
index 000000000..38246e1aa
--- /dev/null
+++ b/trace/qmp.c
@@ -0,0 +1,170 @@
+/*
+ * QMP commands for tracing events.
+ *
+ * Copyright (C) 2014-2016 Lluís Vilanova 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-trace.h"
+#include "control-vcpu.h"
+
+
+static CPUState *get_cpu(bool has_vcpu, int vcpu, Error **errp)
+{
+    if (has_vcpu) {
+        CPUState *cpu = qemu_get_cpu(vcpu);
+        if (cpu == NULL) {
+            error_setg(errp, "invalid vCPU index %u", vcpu);
+        }
+        return cpu;
+    } else {
+        return NULL;
+    }
+}
+
+static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern,
+                         const char *name, Error **errp)
+{
+    if (!is_pattern) {
+        TraceEvent *ev = trace_event_name(name);
+
+        /* error for non-existing event */
+        if (ev == NULL) {
+            error_setg(errp, "unknown event \"%s\"", name);
+            return false;
+        }
+
+        /* error for non-vcpu event */
+        if (has_vcpu && !trace_event_is_vcpu(ev)) {
+            error_setg(errp, "event \"%s\" is not vCPU-specific", name);
+            return false;
+        }
+
+        /* error for unavailable event */
+        if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
+            error_setg(errp, "event \"%s\" is disabled", name);
+            return false;
+        }
+
+        return true;
+    } else {
+        /* error for unavailable events */
+        TraceEventIter iter;
+        TraceEvent *ev;
+        trace_event_iter_init(&iter, name);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
+                error_setg(errp, "event \"%s\" is disabled", trace_event_get_name(ev));
+                return false;
+            }
+        }
+        return true;
+    }
+}
+
+TraceEventInfoList *qmp_trace_event_get_state(const char *name,
+                                              bool has_vcpu, int64_t vcpu,
+                                              Error **errp)
+{
+    Error *err = NULL;
+    TraceEventInfoList *events = NULL;
+    TraceEventIter iter;
+    TraceEvent *ev;
+    bool is_pattern = trace_event_is_pattern(name);
+    CPUState *cpu;
+
+    /* Check provided vcpu */
+    cpu = get_cpu(has_vcpu, vcpu, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+
+    /* Check events */
+    if (!check_events(has_vcpu, true, is_pattern, name, errp)) {
+        return NULL;
+    }
+
+    /* Get states (all errors checked above) */
+    trace_event_iter_init(&iter, name);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        TraceEventInfoList *elem;
+        bool is_vcpu = trace_event_is_vcpu(ev);
+        if (has_vcpu && !is_vcpu) {
+            continue;
+        }
+
+        elem = g_new(TraceEventInfoList, 1);
+        elem->value = g_new(TraceEventInfo, 1);
+        elem->value->vcpu = is_vcpu;
+        elem->value->name = g_strdup(trace_event_get_name(ev));
+
+        if (!trace_event_get_state_static(ev)) {
+            elem->value->state = TRACE_EVENT_STATE_UNAVAILABLE;
+        } else {
+            if (has_vcpu) {
+                if (is_vcpu) {
+                    if (trace_event_get_vcpu_state_dynamic(cpu, ev)) {
+                        elem->value->state = TRACE_EVENT_STATE_ENABLED;
+                    } else {
+                        elem->value->state = TRACE_EVENT_STATE_DISABLED;
+                    }
+                }
+                /* else: already skipped above */
+            } else {
+                if (trace_event_get_state_dynamic(ev)) {
+                    elem->value->state = TRACE_EVENT_STATE_ENABLED;
+                } else {
+                    elem->value->state = TRACE_EVENT_STATE_DISABLED;
+                }
+            }
+        }
+        elem->next = events;
+        events = elem;
+    }
+
+    return events;
+}
+
+void qmp_trace_event_set_state(const char *name, bool enable,
+                               bool has_ignore_unavailable, bool ignore_unavailable,
+                               bool has_vcpu, int64_t vcpu,
+                               Error **errp)
+{
+    Error *err = NULL;
+    TraceEventIter iter;
+    TraceEvent *ev;
+    bool is_pattern = trace_event_is_pattern(name);
+    CPUState *cpu;
+
+    /* Check provided vcpu */
+    cpu = get_cpu(has_vcpu, vcpu, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Check events */
+    if (!check_events(has_vcpu, has_ignore_unavailable && ignore_unavailable,
+                      is_pattern, name, errp)) {
+        return;
+    }
+
+    /* Apply changes (all errors checked above) */
+    trace_event_iter_init(&iter, name);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (!trace_event_get_state_static(ev) ||
+            (has_vcpu && !trace_event_is_vcpu(ev))) {
+            continue;
+        }
+        if (has_vcpu) {
+            trace_event_set_vcpu_state_dynamic(cpu, ev, enable);
+        } else {
+            trace_event_set_state_dynamic(ev, enable);
+        }
+    }
+}
diff --git a/trace/simple.c b/trace/simple.c
new file mode 100644
index 000000000..9cd2ed1fb
--- /dev/null
+++ b/trace/simple.c
@@ -0,0 +1,424 @@
+/*
+ * Simple trace backend
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#ifndef _WIN32
+#include 
+#endif
+#include "qemu/timer.h"
+#include "trace/control.h"
+#include "trace/simple.h"
+#include "qemu/error-report.h"
+#include "qemu/qemu-print.h"
+
+/** Trace file header event ID, picked to avoid conflict with real event IDs */
+#define HEADER_EVENT_ID (~(uint64_t)0)
+
+/** Trace file magic number */
+#define HEADER_MAGIC 0xf2b177cb0aa429b4ULL
+
+/** Trace file version number, bump if format changes */
+#define HEADER_VERSION 4
+
+/** Records were dropped event ID */
+#define DROPPED_EVENT_ID (~(uint64_t)0 - 1)
+
+/** Trace record is valid */
+#define TRACE_RECORD_VALID ((uint64_t)1 << 63)
+
+/*
+ * Trace records are written out by a dedicated thread.  The thread waits for
+ * records to become available, writes them out, and then waits again.
+ */
+static GMutex trace_lock;
+static GCond trace_available_cond;
+static GCond trace_empty_cond;
+
+static bool trace_available;
+static bool trace_writeout_enabled;
+
+enum {
+    TRACE_BUF_LEN = 4096 * 64,
+    TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4,
+};
+
+uint8_t trace_buf[TRACE_BUF_LEN];
+static volatile gint trace_idx;
+static unsigned int writeout_idx;
+static volatile gint dropped_events;
+static uint32_t trace_pid;
+static FILE *trace_fp;
+static char *trace_file_name;
+
+#define TRACE_RECORD_TYPE_MAPPING 0
+#define TRACE_RECORD_TYPE_EVENT   1
+
+/* * Trace buffer entry */
+typedef struct {
+    uint64_t event; /* event ID value */
+    uint64_t timestamp_ns;
+    uint32_t length;   /*    in bytes */
+    uint32_t pid;
+    uint64_t arguments[];
+} TraceRecord;
+
+typedef struct {
+    uint64_t header_event_id; /* HEADER_EVENT_ID */
+    uint64_t header_magic;    /* HEADER_MAGIC    */
+    uint64_t header_version;  /* HEADER_VERSION  */
+} TraceLogHeader;
+
+
+static void read_from_buffer(unsigned int idx, void *dataptr, size_t size);
+static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size);
+
+static void clear_buffer_range(unsigned int idx, size_t len)
+{
+    uint32_t num = 0;
+    while (num < len) {
+        if (idx >= TRACE_BUF_LEN) {
+            idx = idx % TRACE_BUF_LEN;
+        }
+        trace_buf[idx++] = 0;
+        num++;
+    }
+}
+/**
+ * Read a trace record from the trace buffer
+ *
+ * @idx         Trace buffer index
+ * @record      Trace record to fill
+ *
+ * Returns false if the record is not valid.
+ */
+static bool get_trace_record(unsigned int idx, TraceRecord **recordptr)
+{
+    uint64_t event_flag = 0;
+    TraceRecord record;
+    /* read the event flag to see if its a valid record */
+    read_from_buffer(idx, &record, sizeof(event_flag));
+
+    if (!(record.event & TRACE_RECORD_VALID)) {
+        return false;
+    }
+
+    smp_rmb(); /* read memory barrier before accessing record */
+    /* read the record header to know record length */
+    read_from_buffer(idx, &record, sizeof(TraceRecord));
+    *recordptr = malloc(record.length); /* don't use g_malloc, can deadlock when traced */
+    /* make a copy of record to avoid being overwritten */
+    read_from_buffer(idx, *recordptr, record.length);
+    smp_rmb(); /* memory barrier before clearing valid flag */
+    (*recordptr)->event &= ~TRACE_RECORD_VALID;
+    /* clear the trace buffer range for consumed record otherwise any byte
+     * with its MSB set may be considered as a valid event id when the writer
+     * thread crosses this range of buffer again.
+     */
+    clear_buffer_range(idx, record.length);
+    return true;
+}
+
+/**
+ * Kick writeout thread
+ *
+ * @wait        Whether to wait for writeout thread to complete
+ */
+static void flush_trace_file(bool wait)
+{
+    g_mutex_lock(&trace_lock);
+    trace_available = true;
+    g_cond_signal(&trace_available_cond);
+
+    if (wait) {
+        g_cond_wait(&trace_empty_cond, &trace_lock);
+    }
+
+    g_mutex_unlock(&trace_lock);
+}
+
+static void wait_for_trace_records_available(void)
+{
+    g_mutex_lock(&trace_lock);
+    while (!(trace_available && trace_writeout_enabled)) {
+        g_cond_signal(&trace_empty_cond);
+        g_cond_wait(&trace_available_cond, &trace_lock);
+    }
+    trace_available = false;
+    g_mutex_unlock(&trace_lock);
+}
+
+static gpointer writeout_thread(gpointer opaque)
+{
+    TraceRecord *recordptr;
+    union {
+        TraceRecord rec;
+        uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)];
+    } dropped;
+    unsigned int idx = 0;
+    int dropped_count;
+    size_t unused __attribute__ ((unused));
+    uint64_t type = TRACE_RECORD_TYPE_EVENT;
+
+    for (;;) {
+        wait_for_trace_records_available();
+
+        if (g_atomic_int_get(&dropped_events)) {
+            dropped.rec.event = DROPPED_EVENT_ID;
+            dropped.rec.timestamp_ns = get_clock();
+            dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t);
+            dropped.rec.pid = trace_pid;
+            do {
+                dropped_count = g_atomic_int_get(&dropped_events);
+            } while (!g_atomic_int_compare_and_exchange(&dropped_events,
+                                                        dropped_count, 0));
+            dropped.rec.arguments[0] = dropped_count;
+            unused = fwrite(&type, sizeof(type), 1, trace_fp);
+            unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp);
+        }
+
+        while (get_trace_record(idx, &recordptr)) {
+            unused = fwrite(&type, sizeof(type), 1, trace_fp);
+            unused = fwrite(recordptr, recordptr->length, 1, trace_fp);
+            writeout_idx += recordptr->length;
+            free(recordptr); /* don't use g_free, can deadlock when traced */
+            idx = writeout_idx % TRACE_BUF_LEN;
+        }
+
+        fflush(trace_fp);
+    }
+    return NULL;
+}
+
+void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val)
+{
+    rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t));
+}
+
+void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen)
+{
+    /* Write string length first */
+    rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen));
+    /* Write actual string now */
+    rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen);
+}
+
+int trace_record_start(TraceBufferRecord *rec, uint32_t event, size_t datasize)
+{
+    unsigned int idx, rec_off, old_idx, new_idx;
+    uint32_t rec_len = sizeof(TraceRecord) + datasize;
+    uint64_t event_u64 = event;
+    uint64_t timestamp_ns = get_clock();
+
+    do {
+        old_idx = g_atomic_int_get(&trace_idx);
+        smp_rmb();
+        new_idx = old_idx + rec_len;
+
+        if (new_idx - writeout_idx > TRACE_BUF_LEN) {
+            /* Trace Buffer Full, Event dropped ! */
+            g_atomic_int_inc(&dropped_events);
+            return -ENOSPC;
+        }
+    } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx));
+
+    idx = old_idx % TRACE_BUF_LEN;
+
+    rec_off = idx;
+    rec_off = write_to_buffer(rec_off, &event_u64, sizeof(event_u64));
+    rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns));
+    rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len));
+    rec_off = write_to_buffer(rec_off, &trace_pid, sizeof(trace_pid));
+
+    rec->tbuf_idx = idx;
+    rec->rec_off  = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN;
+    return 0;
+}
+
+static void read_from_buffer(unsigned int idx, void *dataptr, size_t size)
+{
+    uint8_t *data_ptr = dataptr;
+    uint32_t x = 0;
+    while (x < size) {
+        if (idx >= TRACE_BUF_LEN) {
+            idx = idx % TRACE_BUF_LEN;
+        }
+        data_ptr[x++] = trace_buf[idx++];
+    }
+}
+
+static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size)
+{
+    uint8_t *data_ptr = dataptr;
+    uint32_t x = 0;
+    while (x < size) {
+        if (idx >= TRACE_BUF_LEN) {
+            idx = idx % TRACE_BUF_LEN;
+        }
+        trace_buf[idx++] = data_ptr[x++];
+    }
+    return idx; /* most callers wants to know where to write next */
+}
+
+void trace_record_finish(TraceBufferRecord *rec)
+{
+    TraceRecord record;
+    read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord));
+    smp_wmb(); /* write barrier before marking as valid */
+    record.event |= TRACE_RECORD_VALID;
+    write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord));
+
+    if (((unsigned int)g_atomic_int_get(&trace_idx) - writeout_idx)
+        > TRACE_BUF_FLUSH_THRESHOLD) {
+        flush_trace_file(false);
+    }
+}
+
+static int st_write_event_mapping(void)
+{
+    uint64_t type = TRACE_RECORD_TYPE_MAPPING;
+    TraceEventIter iter;
+    TraceEvent *ev;
+
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        uint64_t id = trace_event_get_id(ev);
+        const char *name = trace_event_get_name(ev);
+        uint32_t len = strlen(name);
+        if (fwrite(&type, sizeof(type), 1, trace_fp) != 1 ||
+            fwrite(&id, sizeof(id), 1, trace_fp) != 1 ||
+            fwrite(&len, sizeof(len), 1, trace_fp) != 1 ||
+            fwrite(name, len, 1, trace_fp) != 1) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Enable / disable tracing, return whether it was enabled.
+ *
+ * @enable: enable if %true, else disable.
+ */
+bool st_set_trace_file_enabled(bool enable)
+{
+    bool was_enabled = trace_fp;
+
+    if (enable == !!trace_fp) {
+        return was_enabled;     /* no change */
+    }
+
+    /* Halt trace writeout */
+    flush_trace_file(true);
+    trace_writeout_enabled = false;
+    flush_trace_file(true);
+
+    if (enable) {
+        static const TraceLogHeader header = {
+            .header_event_id = HEADER_EVENT_ID,
+            .header_magic = HEADER_MAGIC,
+            /* Older log readers will check for version at next location */
+            .header_version = HEADER_VERSION,
+        };
+
+        trace_fp = fopen(trace_file_name, "wb");
+        if (!trace_fp) {
+            return was_enabled;
+        }
+
+        if (fwrite(&header, sizeof header, 1, trace_fp) != 1 ||
+            st_write_event_mapping() < 0) {
+            fclose(trace_fp);
+            trace_fp = NULL;
+            return was_enabled;
+        }
+
+        /* Resume trace writeout */
+        trace_writeout_enabled = true;
+        flush_trace_file(false);
+    } else {
+        fclose(trace_fp);
+        trace_fp = NULL;
+    }
+    return was_enabled;
+}
+
+/**
+ * Set the name of a trace file
+ *
+ * @file        The trace file name or NULL for the default name- set at
+ *              config time
+ */
+void st_set_trace_file(const char *file)
+{
+    bool saved_enable = st_set_trace_file_enabled(false);
+
+    g_free(trace_file_name);
+
+    if (!file) {
+        /* Type cast needed for Windows where getpid() returns an int. */
+        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid());
+    } else {
+        trace_file_name = g_strdup_printf("%s", file);
+    }
+
+    st_set_trace_file_enabled(saved_enable);
+}
+
+void st_print_trace_file_status(void)
+{
+    qemu_printf("Trace file \"%s\" %s.\n",
+                trace_file_name, trace_fp ? "on" : "off");
+}
+
+void st_flush_trace_buffer(void)
+{
+    flush_trace_file(true);
+}
+
+/* Helper function to create a thread with signals blocked.  Use glib's
+ * portable threads since QEMU abstractions cannot be used due to reentrancy in
+ * the tracer.  Also note the signal masking on POSIX hosts so that the thread
+ * does not steal signals when the rest of the program wants them blocked.
+ */
+static GThread *trace_thread_create(GThreadFunc fn)
+{
+    GThread *thread;
+#ifndef _WIN32
+    sigset_t set, oldset;
+
+    sigfillset(&set);
+    pthread_sigmask(SIG_SETMASK, &set, &oldset);
+#endif
+
+    thread = g_thread_new("trace-thread", fn, NULL);
+
+#ifndef _WIN32
+    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+#endif
+
+    return thread;
+}
+
+bool st_init(void)
+{
+    GThread *thread;
+
+    trace_pid = getpid();
+
+    thread = trace_thread_create(writeout_thread);
+    if (!thread) {
+        warn_report("unable to initialize simple trace backend");
+        return false;
+    }
+
+    atexit(st_flush_trace_buffer);
+    return true;
+}
diff --git a/trace/simple.h b/trace/simple.h
new file mode 100644
index 000000000..26ccbc8b8
--- /dev/null
+++ b/trace/simple.h
@@ -0,0 +1,51 @@
+/*
+ * Simple trace backend
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef TRACE_SIMPLE_H
+#define TRACE_SIMPLE_H
+
+void st_print_trace_file_status(void);
+bool st_set_trace_file_enabled(bool enable);
+void st_set_trace_file(const char *file);
+bool st_init(void);
+void st_flush_trace_buffer(void);
+
+typedef struct {
+    unsigned int tbuf_idx;
+    unsigned int rec_off;
+} TraceBufferRecord;
+
+/* Note for hackers: Make sure MAX_TRACE_LEN < sizeof(uint32_t) */
+#define MAX_TRACE_STRLEN 512
+/**
+ * Initialize a trace record and claim space for it in the buffer
+ *
+ * @arglen  number of bytes required for arguments
+ */
+int trace_record_start(TraceBufferRecord *rec, uint32_t id, size_t arglen);
+
+/**
+ * Append a 64-bit argument to a trace record
+ */
+void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val);
+
+/**
+ * Append a string argument to a trace record
+ */
+void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen);
+
+/**
+ * Mark a trace record completed
+ *
+ * Don't append any more arguments to the trace record after calling this.
+ */
+void trace_record_finish(TraceBufferRecord *rec);
+
+#endif /* TRACE_SIMPLE_H */
diff --git a/util/aio-posix.c b/util/aio-posix.c
new file mode 100644
index 000000000..30f5354b1
--- /dev/null
+++ b/util/aio-posix.c
@@ -0,0 +1,718 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/rcu_queue.h"
+#include "qemu/sockets.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+#include "aio-posix.h"
+
+/* Stop userspace polling on a handler if it isn't active for some time */
+#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
+
+bool aio_poll_disabled(AioContext *ctx)
+{
+    return qatomic_read(&ctx->poll_disable_cnt);
+}
+
+void aio_add_ready_handler(AioHandlerList *ready_list,
+                           AioHandler *node,
+                           int revents)
+{
+    QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
+    node->pfd.revents = revents;
+    QLIST_INSERT_HEAD(ready_list, node, node_ready);
+}
+
+static AioHandler *find_aio_handler(AioContext *ctx, int fd)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.fd == fd) {
+            if (!QLIST_IS_INSERTED(node, node_deleted)) {
+                return node;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
+{
+    /* If the GSource is in the process of being destroyed then
+     * g_source_remove_poll() causes an assertion failure.  Skip
+     * removal in that case, because glib cleans up its state during
+     * destruction anyway.
+     */
+    if (!g_source_is_destroyed(&ctx->source)) {
+        g_source_remove_poll(&ctx->source, &node->pfd);
+    }
+
+    node->pfd.revents = 0;
+
+    /* If the fd monitor has already marked it deleted, leave it alone */
+    if (QLIST_IS_INSERTED(node, node_deleted)) {
+        return false;
+    }
+
+    /* If a read is in progress, just mark the node as deleted */
+    if (qemu_lockcnt_count(&ctx->list_lock)) {
+        QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
+        return false;
+    }
+    /* Otherwise, delete it for real.  We can't just mark it as
+     * deleted because deleted nodes are only cleaned up while
+     * no one is walking the handlers list.
+     */
+    QLIST_SAFE_REMOVE(node, node_poll);
+    QLIST_REMOVE(node, node);
+    return true;
+}
+
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        bool is_external,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioPollFn *io_poll,
+                        void *opaque)
+{
+    AioHandler *node;
+    AioHandler *new_node = NULL;
+    bool is_new = false;
+    bool deleted = false;
+    int poll_disable_change;
+
+    qemu_lockcnt_lock(&ctx->list_lock);
+
+    node = find_aio_handler(ctx, fd);
+
+    /* Are we deleting the fd handler? */
+    if (!io_read && !io_write && !io_poll) {
+        if (node == NULL) {
+            qemu_lockcnt_unlock(&ctx->list_lock);
+            return;
+        }
+        /* Clean events in order to unregister fd from the ctx epoll. */
+        node->pfd.events = 0;
+
+        poll_disable_change = -!node->io_poll;
+    } else {
+        poll_disable_change = !io_poll - (node && !node->io_poll);
+        if (node == NULL) {
+            is_new = true;
+        }
+        /* Alloc and insert if it's not already there */
+        new_node = g_new0(AioHandler, 1);
+
+        /* Update handler with latest information */
+        new_node->io_read = io_read;
+        new_node->io_write = io_write;
+        new_node->io_poll = io_poll;
+        new_node->opaque = opaque;
+        new_node->is_external = is_external;
+
+        if (is_new) {
+            new_node->pfd.fd = fd;
+        } else {
+            new_node->pfd = node->pfd;
+        }
+        g_source_add_poll(&ctx->source, &new_node->pfd);
+
+        new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
+        new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+
+        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
+    }
+
+    /* No need to order poll_disable_cnt writes against other updates;
+     * the counter is only used to avoid wasting time and latency on
+     * iterated polling when the system call will be ultimately necessary.
+     * Changing handlers is a rare event, and a little wasted polling until
+     * the aio_notify below is not an issue.
+     */
+    qatomic_set(&ctx->poll_disable_cnt,
+               qatomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
+
+    ctx->fdmon_ops->update(ctx, node, new_node);
+    if (node) {
+        deleted = aio_remove_fd_handler(ctx, node);
+    }
+    qemu_lockcnt_unlock(&ctx->list_lock);
+    aio_notify(ctx);
+
+    if (deleted) {
+        g_free(node);
+    }
+}
+
+void aio_set_fd_poll(AioContext *ctx, int fd,
+                     IOHandler *io_poll_begin,
+                     IOHandler *io_poll_end)
+{
+    AioHandler *node = find_aio_handler(ctx, fd);
+
+    if (!node) {
+        return;
+    }
+
+    node->io_poll_begin = io_poll_begin;
+    node->io_poll_end = io_poll_end;
+}
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *notifier,
+                            bool is_external,
+                            EventNotifierHandler *io_read,
+                            AioPollFn *io_poll)
+{
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
+                       (IOHandler *)io_read, NULL, io_poll, notifier);
+}
+
+void aio_set_event_notifier_poll(AioContext *ctx,
+                                 EventNotifier *notifier,
+                                 EventNotifierHandler *io_poll_begin,
+                                 EventNotifierHandler *io_poll_end)
+{
+    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
+                    (IOHandler *)io_poll_begin,
+                    (IOHandler *)io_poll_end);
+}
+
+static bool poll_set_started(AioContext *ctx, bool started)
+{
+    AioHandler *node;
+    bool progress = false;
+
+    if (started == ctx->poll_started) {
+        return false;
+    }
+
+    ctx->poll_started = started;
+
+    qemu_lockcnt_inc(&ctx->list_lock);
+    QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
+        IOHandler *fn;
+
+        if (QLIST_IS_INSERTED(node, node_deleted)) {
+            continue;
+        }
+
+        if (started) {
+            fn = node->io_poll_begin;
+        } else {
+            fn = node->io_poll_end;
+        }
+
+        if (fn) {
+            fn(node->opaque);
+        }
+
+        /* Poll one last time in case ->io_poll_end() raced with the event */
+        if (!started) {
+            progress = node->io_poll(node->opaque) || progress;
+        }
+    }
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    return progress;
+}
+
+
+bool aio_prepare(AioContext *ctx)
+{
+    /* Poll mode cannot be used with glib's event loop, disable it. */
+    poll_set_started(ctx, false);
+
+    return false;
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+    bool result = false;
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        int revents;
+
+        revents = node->pfd.revents & node->pfd.events;
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
+            aio_node_check(ctx, node->is_external)) {
+            result = true;
+            break;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
+            aio_node_check(ctx, node->is_external)) {
+            result = true;
+            break;
+        }
+    }
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    return result;
+}
+
+static void aio_free_deleted_handlers(AioContext *ctx)
+{
+    AioHandler *node;
+
+    if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
+        return;
+    }
+    if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+        return; /* we are nested, let the parent do the freeing */
+    }
+
+    while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
+        QLIST_REMOVE(node, node);
+        QLIST_REMOVE(node, node_deleted);
+        QLIST_SAFE_REMOVE(node, node_poll);
+        g_free(node);
+    }
+
+    qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+}
+
+static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
+{
+    bool progress = false;
+    int revents;
+
+    revents = node->pfd.revents & node->pfd.events;
+    node->pfd.revents = 0;
+
+    /*
+     * Start polling AioHandlers when they become ready because activity is
+     * likely to continue.  Note that starvation is theoretically possible when
+     * fdmon_supports_polling(), but only until the fd fires for the first
+     * time.
+     */
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        !QLIST_IS_INSERTED(node, node_poll) &&
+        node->io_poll) {
+        trace_poll_add(ctx, node, node->pfd.fd, revents);
+        if (ctx->poll_started && node->io_poll_begin) {
+            node->io_poll_begin(node->opaque);
+        }
+        QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+    }
+
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+        aio_node_check(ctx, node->is_external) &&
+        node->io_read) {
+        node->io_read(node->opaque);
+
+        /* aio_notify() does not count as progress */
+        if (node->opaque != &ctx->notifier) {
+            progress = true;
+        }
+    }
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        (revents & (G_IO_OUT | G_IO_ERR)) &&
+        aio_node_check(ctx, node->is_external) &&
+        node->io_write) {
+        node->io_write(node->opaque);
+        progress = true;
+    }
+
+    return progress;
+}
+
+/*
+ * If we have a list of ready handlers then this is more efficient than
+ * scanning all handlers with aio_dispatch_handlers().
+ */
+static bool aio_dispatch_ready_handlers(AioContext *ctx,
+                                        AioHandlerList *ready_list)
+{
+    bool progress = false;
+    AioHandler *node;
+
+    while ((node = QLIST_FIRST(ready_list))) {
+        QLIST_REMOVE(node, node_ready);
+        progress = aio_dispatch_handler(ctx, node) || progress;
+    }
+
+    return progress;
+}
+
+/* Slower than aio_dispatch_ready_handlers() but only used via glib */
+static bool aio_dispatch_handlers(AioContext *ctx)
+{
+    AioHandler *node, *tmp;
+    bool progress = false;
+
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+        progress = aio_dispatch_handler(ctx, node) || progress;
+    }
+
+    return progress;
+}
+
+void aio_dispatch(AioContext *ctx)
+{
+    qemu_lockcnt_inc(&ctx->list_lock);
+    aio_bh_poll(ctx);
+    aio_dispatch_handlers(ctx);
+    aio_free_deleted_handlers(ctx);
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    timerlistgroup_run_timers(&ctx->tlg);
+}
+
+static bool run_poll_handlers_once(AioContext *ctx,
+                                   int64_t now,
+                                   int64_t *timeout)
+{
+    bool progress = false;
+    AioHandler *node;
+    AioHandler *tmp;
+
+    QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
+        if (aio_node_check(ctx, node->is_external) &&
+            node->io_poll(node->opaque)) {
+            node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
+
+            /*
+             * Polling was successful, exit try_poll_mode immediately
+             * to adjust the next polling time.
+             */
+            *timeout = 0;
+            if (node->opaque != &ctx->notifier) {
+                progress = true;
+            }
+        }
+
+        /* Caller handles freeing deleted nodes.  Don't do it here. */
+    }
+
+    return progress;
+}
+
+static bool fdmon_supports_polling(AioContext *ctx)
+{
+    return ctx->fdmon_ops->need_wait != aio_poll_disabled;
+}
+
+static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
+{
+    AioHandler *node;
+    AioHandler *tmp;
+    bool progress = false;
+
+    /*
+     * File descriptor monitoring implementations without userspace polling
+     * support suffer from starvation when a subset of handlers is polled
+     * because fds will not be processed in a timely fashion.  Don't remove
+     * idle poll handlers.
+     */
+    if (!fdmon_supports_polling(ctx)) {
+        return false;
+    }
+
+    QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
+        if (node->poll_idle_timeout == 0LL) {
+            node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
+        } else if (now >= node->poll_idle_timeout) {
+            trace_poll_remove(ctx, node, node->pfd.fd);
+            node->poll_idle_timeout = 0LL;
+            QLIST_SAFE_REMOVE(node, node_poll);
+            if (ctx->poll_started && node->io_poll_end) {
+                node->io_poll_end(node->opaque);
+
+                /*
+                 * Final poll in case ->io_poll_end() races with an event.
+                 * Nevermind about re-adding the handler in the rare case where
+                 * this causes progress.
+                 */
+                progress = node->io_poll(node->opaque) || progress;
+            }
+        }
+    }
+
+    return progress;
+}
+
+/* run_poll_handlers:
+ * @ctx: the AioContext
+ * @max_ns: maximum time to poll for, in nanoseconds
+ *
+ * Polls for a given time.
+ *
+ * Note that the caller must have incremented ctx->list_lock.
+ *
+ * Returns: true if progress was made, false otherwise
+ */
+static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
+{
+    bool progress;
+    int64_t start_time, elapsed_time;
+
+    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
+
+    trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
+
+    /*
+     * Optimization: ->io_poll() handlers often contain RCU read critical
+     * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
+     * -> rcu_read_lock() -> ... sequences with expensive memory
+     * synchronization primitives.  Make the entire polling loop an RCU
+     * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
+     * are cheap.
+     */
+    RCU_READ_LOCK_GUARD();
+
+    start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    do {
+        progress = run_poll_handlers_once(ctx, start_time, timeout);
+        elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
+        max_ns = qemu_soonest_timeout(*timeout, max_ns);
+        assert(!(max_ns && progress));
+    } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
+
+    if (remove_idle_poll_handlers(ctx, start_time + elapsed_time)) {
+        *timeout = 0;
+        progress = true;
+    }
+
+    /* If time has passed with no successful polling, adjust *timeout to
+     * keep the same ending time.
+     */
+    if (*timeout != -1) {
+        *timeout -= MIN(*timeout, elapsed_time);
+    }
+
+    trace_run_poll_handlers_end(ctx, progress, *timeout);
+    return progress;
+}
+
+/* try_poll_mode:
+ * @ctx: the AioContext
+ * @timeout: timeout for blocking wait, computed by the caller and updated if
+ *    polling succeeds.
+ *
+ * Note that the caller must have incremented ctx->list_lock.
+ *
+ * Returns: true if progress was made, false otherwise
+ */
+static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
+{
+    int64_t max_ns;
+
+    if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
+        return false;
+    }
+
+    max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
+    if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
+        poll_set_started(ctx, true);
+
+        if (run_poll_handlers(ctx, max_ns, timeout)) {
+            return true;
+        }
+    }
+
+    if (poll_set_started(ctx, false)) {
+        *timeout = 0;
+        return true;
+    }
+
+    return false;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
+    int ret = 0;
+    bool progress;
+    bool use_notify_me;
+    int64_t timeout;
+    int64_t start = 0;
+
+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     *
+     * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
+     * is special in that it runs in the main thread, but that thread's context
+     * is qemu_aio_context.
+     */
+    assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
+                                      qemu_get_aio_context() : ctx));
+
+    qemu_lockcnt_inc(&ctx->list_lock);
+
+    if (ctx->poll_max_ns) {
+        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
+
+    timeout = blocking ? aio_compute_timeout(ctx) : 0;
+    progress = try_poll_mode(ctx, &timeout);
+    assert(!(timeout && progress));
+
+    /*
+     * aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns,
+     * so disable the optimization now.
+     */
+    use_notify_me = timeout != 0;
+    if (use_notify_me) {
+        qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before reading ctx->notified.  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
+
+        /* Don't block if aio_notify() was called */
+        if (qatomic_read(&ctx->notified)) {
+            timeout = 0;
+        }
+    }
+
+    /* If polling is allowed, non-blocking aio_poll does not need the
+     * system call---a single round of run_poll_handlers_once suffices.
+     */
+    if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
+        ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
+    }
+
+    if (use_notify_me) {
+        /* Finish the poll before clearing the flag.  */
+        qatomic_store_release(&ctx->notify_me,
+                             qatomic_read(&ctx->notify_me) - 2);
+    }
+
+    aio_notify_accept(ctx);
+
+    /* Adjust polling time */
+    if (ctx->poll_max_ns) {
+        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
+
+        if (block_ns <= ctx->poll_ns) {
+            /* This is the sweet spot, no adjustment needed */
+        } else if (block_ns > ctx->poll_max_ns) {
+            /* We'd have to poll for too long, poll less */
+            int64_t old = ctx->poll_ns;
+
+            if (ctx->poll_shrink) {
+                ctx->poll_ns /= ctx->poll_shrink;
+            } else {
+                ctx->poll_ns = 0;
+            }
+
+            trace_poll_shrink(ctx, old, ctx->poll_ns);
+        } else if (ctx->poll_ns < ctx->poll_max_ns &&
+                   block_ns < ctx->poll_max_ns) {
+            /* There is room to grow, poll longer */
+            int64_t old = ctx->poll_ns;
+            int64_t grow = ctx->poll_grow;
+
+            if (grow == 0) {
+                grow = 2;
+            }
+
+            if (ctx->poll_ns) {
+                ctx->poll_ns *= grow;
+            } else {
+                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
+            }
+
+            if (ctx->poll_ns > ctx->poll_max_ns) {
+                ctx->poll_ns = ctx->poll_max_ns;
+            }
+
+            trace_poll_grow(ctx, old, ctx->poll_ns);
+        }
+    }
+
+    progress |= aio_bh_poll(ctx);
+
+    if (ret > 0) {
+        progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
+    }
+
+    aio_free_deleted_handlers(ctx);
+
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
+    return progress;
+}
+
+void aio_context_setup(AioContext *ctx)
+{
+    ctx->fdmon_ops = &fdmon_poll_ops;
+    ctx->epollfd = -1;
+
+    /* Use the fastest fd monitoring implementation if available */
+    if (fdmon_io_uring_setup(ctx)) {
+        return;
+    }
+
+    fdmon_epoll_setup(ctx);
+}
+
+void aio_context_destroy(AioContext *ctx)
+{
+    fdmon_io_uring_destroy(ctx);
+    fdmon_epoll_disable(ctx);
+    aio_free_deleted_handlers(ctx);
+}
+
+void aio_context_use_g_source(AioContext *ctx)
+{
+    /*
+     * Disable io_uring when the glib main loop is used because it doesn't
+     * support mixed glib/aio_poll() usage. It relies on aio_poll() being
+     * called regularly so that changes to the monitored file descriptors are
+     * submitted, otherwise a list of pending fd handlers builds up.
+     */
+    fdmon_io_uring_destroy(ctx);
+    aio_free_deleted_handlers(ctx);
+}
+
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
+{
+    /* No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
+     */
+    ctx->poll_max_ns = max_ns;
+    ctx->poll_ns = 0;
+    ctx->poll_grow = grow;
+    ctx->poll_shrink = shrink;
+
+    aio_notify(ctx);
+}
diff --git a/util/aio-posix.h b/util/aio-posix.h
new file mode 100644
index 000000000..c80c04506
--- /dev/null
+++ b/util/aio-posix.h
@@ -0,0 +1,81 @@
+/*
+ * AioContext POSIX event loop implementation internal APIs
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2020
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef AIO_POSIX_H
+#define AIO_POSIX_H
+
+#include "block/aio.h"
+
+struct AioHandler {
+    GPollFD pfd;
+    IOHandler *io_read;
+    IOHandler *io_write;
+    AioPollFn *io_poll;
+    IOHandler *io_poll_begin;
+    IOHandler *io_poll_end;
+    void *opaque;
+    QLIST_ENTRY(AioHandler) node;
+    QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
+    QLIST_ENTRY(AioHandler) node_deleted;
+    QLIST_ENTRY(AioHandler) node_poll;
+#ifdef CONFIG_LINUX_IO_URING
+    QSLIST_ENTRY(AioHandler) node_submitted;
+    unsigned flags; /* see fdmon-io_uring.c */
+#endif
+    int64_t poll_idle_timeout; /* when to stop userspace polling */
+    bool is_external;
+};
+
+/* Add a handler to a ready list */
+void aio_add_ready_handler(AioHandlerList *ready_list, AioHandler *node,
+                           int revents);
+
+extern const FDMonOps fdmon_poll_ops;
+
+#ifdef CONFIG_EPOLL_CREATE1
+bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd);
+void fdmon_epoll_setup(AioContext *ctx);
+void fdmon_epoll_disable(AioContext *ctx);
+#else
+static inline bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
+{
+    return false;
+}
+
+static inline void fdmon_epoll_setup(AioContext *ctx)
+{
+}
+
+static inline void fdmon_epoll_disable(AioContext *ctx)
+{
+}
+#endif /* !CONFIG_EPOLL_CREATE1 */
+
+#ifdef CONFIG_LINUX_IO_URING
+bool fdmon_io_uring_setup(AioContext *ctx);
+void fdmon_io_uring_destroy(AioContext *ctx);
+#else
+static inline bool fdmon_io_uring_setup(AioContext *ctx)
+{
+    return false;
+}
+
+static inline void fdmon_io_uring_destroy(AioContext *ctx)
+{
+}
+#endif /* !CONFIG_LINUX_IO_URING */
+
+#endif /* AIO_POSIX_H */
diff --git a/util/aio-wait.c b/util/aio-wait.c
new file mode 100644
index 000000000..bdb3d3af2
--- /dev/null
+++ b/util/aio-wait.c
@@ -0,0 +1,72 @@
+/*
+ * AioContext wait support
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "block/aio-wait.h"
+
+AioWait global_aio_wait;
+
+static void dummy_bh_cb(void *opaque)
+{
+    /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
+}
+
+void aio_wait_kick(void)
+{
+    /* The barrier (or an atomic op) is in the caller.  */
+    if (qatomic_read(&global_aio_wait.num_waiters)) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
+    }
+}
+
+typedef struct {
+    bool done;
+    QEMUBHFunc *cb;
+    void *opaque;
+} AioWaitBHData;
+
+/* Context: BH in IOThread */
+static void aio_wait_bh(void *opaque)
+{
+    AioWaitBHData *data = opaque;
+
+    data->cb(data->opaque);
+
+    data->done = true;
+    aio_wait_kick();
+}
+
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+{
+    AioWaitBHData data = {
+        .cb = cb,
+        .opaque = opaque,
+    };
+
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+
+    aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
+    AIO_WAIT_WHILE(ctx, !data.done);
+}
diff --git a/util/aio-win32.c b/util/aio-win32.c
new file mode 100644
index 000000000..168717b51
--- /dev/null
+++ b/util/aio-win32.c
@@ -0,0 +1,442 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM Corp., 2008
+ * Copyright Red Hat Inc., 2012
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *  Paolo Bonzini     
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block.h"
+#include "qemu/main-loop.h"
+#include "qemu/queue.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu/rcu_queue.h"
+
+struct AioHandler {
+    EventNotifier *e;
+    IOHandler *io_read;
+    IOHandler *io_write;
+    EventNotifierHandler *io_notify;
+    GPollFD pfd;
+    int deleted;
+    void *opaque;
+    bool is_external;
+    QLIST_ENTRY(AioHandler) node;
+};
+
+static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
+{
+    /*
+     * If the GSource is in the process of being destroyed then
+     * g_source_remove_poll() causes an assertion failure.  Skip
+     * removal in that case, because glib cleans up its state during
+     * destruction anyway.
+     */
+    if (!g_source_is_destroyed(&ctx->source)) {
+        g_source_remove_poll(&ctx->source, &node->pfd);
+    }
+
+    /* If aio_poll is in progress, just mark the node as deleted */
+    if (qemu_lockcnt_count(&ctx->list_lock)) {
+        node->deleted = 1;
+        node->pfd.revents = 0;
+    } else {
+        /* Otherwise, delete it for real.  We can't just mark it as
+         * deleted because deleted nodes are only cleaned up after
+         * releasing the list_lock.
+         */
+        QLIST_REMOVE(node, node);
+        g_free(node);
+    }
+}
+
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        bool is_external,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioPollFn *io_poll,
+                        void *opaque)
+{
+    /* fd is a SOCKET in our case */
+    AioHandler *old_node;
+    AioHandler *node = NULL;
+
+    qemu_lockcnt_lock(&ctx->list_lock);
+    QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
+        if (old_node->pfd.fd == fd && !old_node->deleted) {
+            break;
+        }
+    }
+
+    if (io_read || io_write) {
+        HANDLE event;
+        long bitmask = 0;
+
+        /* Alloc and insert if it's not already there */
+        node = g_new0(AioHandler, 1);
+        node->pfd.fd = fd;
+
+        node->pfd.events = 0;
+        if (node->io_read) {
+            node->pfd.events |= G_IO_IN;
+        }
+        if (node->io_write) {
+            node->pfd.events |= G_IO_OUT;
+        }
+
+        node->e = &ctx->notifier;
+
+        /* Update handler with latest information */
+        node->opaque = opaque;
+        node->io_read = io_read;
+        node->io_write = io_write;
+        node->is_external = is_external;
+
+        if (io_read) {
+            bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
+        }
+
+        if (io_write) {
+            bitmask |= FD_WRITE | FD_CONNECT;
+        }
+
+        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+        event = event_notifier_get_handle(&ctx->notifier);
+        WSAEventSelect(node->pfd.fd, event, bitmask);
+    }
+    if (old_node) {
+        aio_remove_fd_handler(ctx, old_node);
+    }
+
+    qemu_lockcnt_unlock(&ctx->list_lock);
+    aio_notify(ctx);
+}
+
+void aio_set_fd_poll(AioContext *ctx, int fd,
+                     IOHandler *io_poll_begin,
+                     IOHandler *io_poll_end)
+{
+    /* Not implemented */
+}
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *e,
+                            bool is_external,
+                            EventNotifierHandler *io_notify,
+                            AioPollFn *io_poll)
+{
+    AioHandler *node;
+
+    qemu_lockcnt_lock(&ctx->list_lock);
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->e == e && !node->deleted) {
+            break;
+        }
+    }
+
+    /* Are we deleting the fd handler? */
+    if (!io_notify) {
+        if (node) {
+            aio_remove_fd_handler(ctx, node);
+        }
+    } else {
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_new0(AioHandler, 1);
+            node->e = e;
+            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
+            node->pfd.events = G_IO_IN;
+            node->is_external = is_external;
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+
+            g_source_add_poll(&ctx->source, &node->pfd);
+        }
+        /* Update handler with latest information */
+        node->io_notify = io_notify;
+    }
+
+    qemu_lockcnt_unlock(&ctx->list_lock);
+    aio_notify(ctx);
+}
+
+void aio_set_event_notifier_poll(AioContext *ctx,
+                                 EventNotifier *notifier,
+                                 EventNotifierHandler *io_poll_begin,
+                                 EventNotifierHandler *io_poll_end)
+{
+    /* Not implemented */
+}
+
+bool aio_prepare(AioContext *ctx)
+{
+    static struct timeval tv0;
+    AioHandler *node;
+    bool have_select_revents = false;
+    fd_set rfds, wfds;
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+
+    /* fill fd sets */
+    FD_ZERO(&rfds);
+    FD_ZERO(&wfds);
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        if (node->io_read) {
+            FD_SET ((SOCKET)node->pfd.fd, &rfds);
+        }
+        if (node->io_write) {
+            FD_SET ((SOCKET)node->pfd.fd, &wfds);
+        }
+    }
+
+    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
+        QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+            node->pfd.revents = 0;
+            if (FD_ISSET(node->pfd.fd, &rfds)) {
+                node->pfd.revents |= G_IO_IN;
+                have_select_revents = true;
+            }
+
+            if (FD_ISSET(node->pfd.fd, &wfds)) {
+                node->pfd.revents |= G_IO_OUT;
+                have_select_revents = true;
+            }
+        }
+    }
+
+    qemu_lockcnt_dec(&ctx->list_lock);
+    return have_select_revents;
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+    bool result = false;
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        if (node->pfd.revents && node->io_notify) {
+            result = true;
+            break;
+        }
+
+        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
+            result = true;
+            break;
+        }
+        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
+            result = true;
+            break;
+        }
+    }
+
+    qemu_lockcnt_dec(&ctx->list_lock);
+    return result;
+}
+
+static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
+{
+    AioHandler *node;
+    bool progress = false;
+    AioHandler *tmp;
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+        int revents = node->pfd.revents;
+
+        if (!node->deleted &&
+            (revents || event_notifier_get_handle(node->e) == event) &&
+            node->io_notify) {
+            node->pfd.revents = 0;
+            node->io_notify(node->e);
+
+            /* aio_notify() does not count as progress */
+            if (node->e != &ctx->notifier) {
+                progress = true;
+            }
+        }
+
+        if (!node->deleted &&
+            (node->io_read || node->io_write)) {
+            node->pfd.revents = 0;
+            if ((revents & G_IO_IN) && node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if ((revents & G_IO_OUT) && node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            /* if the next select() will return an event, we have progressed */
+            if (event == event_notifier_get_handle(&ctx->notifier)) {
+                WSANETWORKEVENTS ev;
+                WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
+                if (ev.lNetworkEvents) {
+                    progress = true;
+                }
+            }
+        }
+
+        if (node->deleted) {
+            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+                QLIST_REMOVE(node, node);
+                g_free(node);
+                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+            }
+        }
+    }
+
+    return progress;
+}
+
+void aio_dispatch(AioContext *ctx)
+{
+    qemu_lockcnt_inc(&ctx->list_lock);
+    aio_bh_poll(ctx);
+    aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    qemu_lockcnt_dec(&ctx->list_lock);
+    timerlistgroup_run_timers(&ctx->tlg);
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    AioHandler *node;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool progress, have_select_revents, first;
+    int count;
+    int timeout;
+
+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     *
+     * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
+     * is special in that it runs in the main thread, but that thread's context
+     * is qemu_aio_context.
+     */
+    assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
+                                      qemu_get_aio_context() : ctx));
+    progress = false;
+
+    /* aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns,
+     * so disable the optimization now.
+     */
+    if (blocking) {
+        qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before computing the timeout
+         * (reading bottom half flags, etc.).  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
+    }
+
+    qemu_lockcnt_inc(&ctx->list_lock);
+    have_select_revents = aio_prepare(ctx);
+
+    /* fill fd sets */
+    count = 0;
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        if (!node->deleted && node->io_notify
+            && aio_node_check(ctx, node->is_external)) {
+            events[count++] = event_notifier_get_handle(node->e);
+        }
+    }
+
+    first = true;
+
+    /* ctx->notifier is always registered.  */
+    assert(count > 0);
+
+    /* Multiple iterations, all of them non-blocking except the first,
+     * may be necessary to process all pending events.  After the first
+     * WaitForMultipleObjects call ctx->notify_me will be decremented.
+     */
+    do {
+        HANDLE event;
+        int ret;
+
+        timeout = blocking && !have_select_revents
+            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
+        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        if (blocking) {
+            assert(first);
+            qatomic_store_release(&ctx->notify_me,
+                                  qatomic_read(&ctx->notify_me) - 2);
+            aio_notify_accept(ctx);
+        }
+
+        if (first) {
+            progress |= aio_bh_poll(ctx);
+            first = false;
+        }
+
+        /* if we have any signaled events, dispatch event */
+        event = NULL;
+        if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
+            event = events[ret - WAIT_OBJECT_0];
+            events[ret - WAIT_OBJECT_0] = events[--count];
+        } else if (!have_select_revents) {
+            break;
+        }
+
+        have_select_revents = false;
+        blocking = false;
+
+        progress |= aio_dispatch_handlers(ctx, event);
+    } while (count > 0);
+
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    return progress;
+}
+
+void aio_context_setup(AioContext *ctx)
+{
+}
+
+void aio_context_destroy(AioContext *ctx)
+{
+}
+
+void aio_context_use_g_source(AioContext *ctx)
+{
+}
+
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
+{
+    if (max_ns) {
+        error_setg(errp, "AioContext polling is not implemented on Windows");
+    }
+}
diff --git a/util/aiocb.c b/util/aiocb.c
new file mode 100644
index 000000000..5aef3a069
--- /dev/null
+++ b/util/aiocb.c
@@ -0,0 +1,55 @@
+/*
+ * BlockAIOCB allocation
+ *
+ * Copyright (c) 2003-2017 Fabrice Bellard and other QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "block/aio.h"
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+                   BlockCompletionFunc *cb, void *opaque)
+{
+    BlockAIOCB *acb;
+
+    acb = g_malloc(aiocb_info->aiocb_size);
+    acb->aiocb_info = aiocb_info;
+    acb->bs = bs;
+    acb->cb = cb;
+    acb->opaque = opaque;
+    acb->refcnt = 1;
+    return acb;
+}
+
+void qemu_aio_ref(void *p)
+{
+    BlockAIOCB *acb = p;
+    acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
+{
+    BlockAIOCB *acb = p;
+    assert(acb->refcnt > 0);
+    if (--acb->refcnt == 0) {
+        g_free(acb);
+    }
+}
diff --git a/util/async.c b/util/async.c
new file mode 100644
index 000000000..674dbefb7
--- /dev/null
+++ b/util/async.c
@@ -0,0 +1,651 @@
+/*
+ * Data plane event loop
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009-2017 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/aio.h"
+#include "block/thread-pool.h"
+#include "qemu/main-loop.h"
+#include "qemu/atomic.h"
+#include "qemu/rcu_queue.h"
+#include "block/raw-aio.h"
+#include "qemu/coroutine_int.h"
+#include "trace.h"
+
+/***********************************************************/
+/* bottom halves (can be seen as timers which expire ASAP) */
+
+/* QEMUBH::flags values */
+enum {
+    /* Already enqueued and waiting for aio_bh_poll() */
+    BH_PENDING   = (1 << 0),
+
+    /* Invoke the callback */
+    BH_SCHEDULED = (1 << 1),
+
+    /* Delete without invoking callback */
+    BH_DELETED   = (1 << 2),
+
+    /* Delete after invoking callback */
+    BH_ONESHOT   = (1 << 3),
+
+    /* Schedule periodically when the event loop is idle */
+    BH_IDLE      = (1 << 4),
+};
+
+struct QEMUBH {
+    AioContext *ctx;
+    QEMUBHFunc *cb;
+    void *opaque;
+    QSLIST_ENTRY(QEMUBH) next;
+    unsigned flags;
+};
+
+/* Called concurrently from any thread */
+static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
+{
+    AioContext *ctx = bh->ctx;
+    unsigned old_flags;
+
+    /*
+     * The memory barrier implicit in qatomic_fetch_or makes sure that:
+     * 1. idle & any writes needed by the callback are done before the
+     *    locations are read in the aio_bh_poll.
+     * 2. ctx is loaded before the callback has a chance to execute and bh
+     *    could be freed.
+     */
+    old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+    if (!(old_flags & BH_PENDING)) {
+        QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
+    }
+
+    aio_notify(ctx);
+}
+
+/* Only called from aio_bh_poll() and aio_ctx_finalize() */
+static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
+{
+    QEMUBH *bh = QSLIST_FIRST_RCU(head);
+
+    if (!bh) {
+        return NULL;
+    }
+
+    QSLIST_REMOVE_HEAD(head, next);
+
+    /*
+     * The qatomic_and is paired with aio_bh_enqueue().  The implicit memory
+     * barrier ensures that the callback sees all writes done by the scheduling
+     * thread.  It also ensures that the scheduling thread sees the cleared
+     * flag before bh->cb has run, and thus will call aio_notify again if
+     * necessary.
+     */
+    *flags = qatomic_fetch_and(&bh->flags,
+                              ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
+    return bh;
+}
+
+void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
+    aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
+}
+
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
+    return bh;
+}
+
+void aio_bh_call(QEMUBH *bh)
+{
+    bh->cb(bh->opaque);
+}
+
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
+int aio_bh_poll(AioContext *ctx)
+{
+    BHListSlice slice;
+    BHListSlice *s;
+    int ret = 0;
+
+    QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
+    QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
+
+    while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
+        QEMUBH *bh;
+        unsigned flags;
+
+        bh = aio_bh_dequeue(&s->bh_list, &flags);
+        if (!bh) {
+            QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
+            continue;
+        }
+
+        if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+            /* Idle BHs don't count as progress */
+            if (!(flags & BH_IDLE)) {
+                ret = 1;
+            }
+            aio_bh_call(bh);
+        }
+        if (flags & (BH_DELETED | BH_ONESHOT)) {
+            g_free(bh);
+        }
+    }
+
+    return ret;
+}
+
+void qemu_bh_schedule_idle(QEMUBH *bh)
+{
+    aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
+}
+
+void qemu_bh_schedule(QEMUBH *bh)
+{
+    aio_bh_enqueue(bh, BH_SCHEDULED);
+}
+
+/* This func is async.
+ */
+void qemu_bh_cancel(QEMUBH *bh)
+{
+    qatomic_and(&bh->flags, ~BH_SCHEDULED);
+}
+
+/* This func is async.The bottom half will do the delete action at the finial
+ * end.
+ */
+void qemu_bh_delete(QEMUBH *bh)
+{
+    aio_bh_enqueue(bh, BH_DELETED);
+}
+
+static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
+{
+    QEMUBH *bh;
+
+    QSLIST_FOREACH_RCU(bh, head, next) {
+        if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+            if (bh->flags & BH_IDLE) {
+                /* idle bottom halves will be polled at least
+                 * every 10ms */
+                timeout = 10000000;
+            } else {
+                /* non-idle bottom halves will be executed
+                 * immediately */
+                return 0;
+            }
+        }
+    }
+
+    return timeout;
+}
+
+int64_t
+aio_compute_timeout(AioContext *ctx)
+{
+    BHListSlice *s;
+    int64_t deadline;
+    int timeout = -1;
+
+    timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
+    if (timeout == 0) {
+        return 0;
+    }
+
+    QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
+        timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
+        if (timeout == 0) {
+            return 0;
+        }
+    }
+
+    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
+    if (deadline == 0) {
+        return 0;
+    } else {
+        return qemu_soonest_timeout(timeout, deadline);
+    }
+}
+
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) | 1);
+
+    /*
+     * Write ctx->notify_me before computing the timeout
+     * (reading bottom half flags, etc.).  Pairs with
+     * smp_mb in aio_notify().
+     */
+    smp_mb();
+
+    /* We assume there is no timeout already supplied */
+    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
+
+    if (aio_prepare(ctx)) {
+        *timeout = 0;
+    }
+
+    return *timeout == 0;
+}
+
+static gboolean
+aio_ctx_check(GSource *source)
+{
+    AioContext *ctx = (AioContext *) source;
+    QEMUBH *bh;
+    BHListSlice *s;
+
+    /* Finish computing the timeout before clearing the flag.  */
+    qatomic_store_release(&ctx->notify_me, qatomic_read(&ctx->notify_me) & ~1);
+    aio_notify_accept(ctx);
+
+    QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
+        if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+            return true;
+        }
+    }
+
+    QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
+        QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
+            if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+                return true;
+            }
+        }
+    }
+    return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
+}
+
+static gboolean
+aio_ctx_dispatch(GSource     *source,
+                 GSourceFunc  callback,
+                 gpointer     user_data)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    assert(callback == NULL);
+    aio_dispatch(ctx);
+    return true;
+}
+
+static void
+aio_ctx_finalize(GSource     *source)
+{
+    AioContext *ctx = (AioContext *) source;
+    QEMUBH *bh;
+    unsigned flags;
+
+    thread_pool_free(ctx->thread_pool);
+
+#ifdef CONFIG_LINUX_AIO
+    if (ctx->linux_aio) {
+        laio_detach_aio_context(ctx->linux_aio, ctx);
+        laio_cleanup(ctx->linux_aio);
+        ctx->linux_aio = NULL;
+    }
+#endif
+
+#ifdef CONFIG_LINUX_IO_URING
+    if (ctx->linux_io_uring) {
+        luring_detach_aio_context(ctx->linux_io_uring, ctx);
+        luring_cleanup(ctx->linux_io_uring);
+        ctx->linux_io_uring = NULL;
+    }
+#endif
+
+    assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
+    qemu_bh_delete(ctx->co_schedule_bh);
+
+    /* There must be no aio_bh_poll() calls going on */
+    assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
+
+    while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
+        /* qemu_bh_delete() must have been called on BHs in this AioContext */
+        assert(flags & BH_DELETED);
+
+        g_free(bh);
+    }
+
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
+    event_notifier_cleanup(&ctx->notifier);
+    qemu_rec_mutex_destroy(&ctx->lock);
+    qemu_lockcnt_destroy(&ctx->list_lock);
+    timerlistgroup_deinit(&ctx->tlg);
+    aio_context_destroy(ctx);
+}
+
+static GSourceFuncs aio_source_funcs = {
+    aio_ctx_prepare,
+    aio_ctx_check,
+    aio_ctx_dispatch,
+    aio_ctx_finalize
+};
+
+GSource *aio_get_g_source(AioContext *ctx)
+{
+    aio_context_use_g_source(ctx);
+    g_source_ref(&ctx->source);
+    return &ctx->source;
+}
+
+ThreadPool *aio_get_thread_pool(AioContext *ctx)
+{
+    if (!ctx->thread_pool) {
+        ctx->thread_pool = thread_pool_new(ctx);
+    }
+    return ctx->thread_pool;
+}
+
+#ifdef CONFIG_LINUX_AIO
+LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp)
+{
+    if (!ctx->linux_aio) {
+        ctx->linux_aio = laio_init(errp);
+        if (ctx->linux_aio) {
+            laio_attach_aio_context(ctx->linux_aio, ctx);
+        }
+    }
+    return ctx->linux_aio;
+}
+
+LinuxAioState *aio_get_linux_aio(AioContext *ctx)
+{
+    assert(ctx->linux_aio);
+    return ctx->linux_aio;
+}
+#endif
+
+#ifdef CONFIG_LINUX_IO_URING
+LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
+{
+    if (ctx->linux_io_uring) {
+        return ctx->linux_io_uring;
+    }
+
+    ctx->linux_io_uring = luring_init(errp);
+    if (!ctx->linux_io_uring) {
+        return NULL;
+    }
+
+    luring_attach_aio_context(ctx->linux_io_uring, ctx);
+    return ctx->linux_io_uring;
+}
+
+LuringState *aio_get_linux_io_uring(AioContext *ctx)
+{
+    assert(ctx->linux_io_uring);
+    return ctx->linux_io_uring;
+}
+#endif
+
+void aio_notify(AioContext *ctx)
+{
+    /*
+     * Write e.g. bh->flags before writing ctx->notified.  Pairs with smp_mb in
+     * aio_notify_accept.
+     */
+    smp_wmb();
+    qatomic_set(&ctx->notified, true);
+
+    /*
+     * Write ctx->notified before reading ctx->notify_me.  Pairs
+     * with smp_mb in aio_ctx_prepare or aio_poll.
+     */
+    smp_mb();
+    if (qatomic_read(&ctx->notify_me)) {
+        event_notifier_set(&ctx->notifier);
+    }
+}
+
+void aio_notify_accept(AioContext *ctx)
+{
+    qatomic_set(&ctx->notified, false);
+
+    /*
+     * Write ctx->notified before reading e.g. bh->flags.  Pairs with smp_wmb
+     * in aio_notify.
+     */
+    smp_mb();
+}
+
+static void aio_timerlist_notify(void *opaque, QEMUClockType type)
+{
+    aio_notify(opaque);
+}
+
+static void aio_context_notifier_cb(EventNotifier *e)
+{
+    AioContext *ctx = container_of(e, AioContext, notifier);
+
+    event_notifier_test_and_clear(&ctx->notifier);
+}
+
+/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
+static bool aio_context_notifier_poll(void *opaque)
+{
+    EventNotifier *e = opaque;
+    AioContext *ctx = container_of(e, AioContext, notifier);
+
+    return qatomic_read(&ctx->notified);
+}
+
+static void co_schedule_bh_cb(void *opaque)
+{
+    AioContext *ctx = opaque;
+    QSLIST_HEAD(, Coroutine) straight, reversed;
+
+    QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
+    QSLIST_INIT(&straight);
+
+    while (!QSLIST_EMPTY(&reversed)) {
+        Coroutine *co = QSLIST_FIRST(&reversed);
+        QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
+        QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
+    }
+
+    while (!QSLIST_EMPTY(&straight)) {
+        Coroutine *co = QSLIST_FIRST(&straight);
+        QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
+        trace_aio_co_schedule_bh_cb(ctx, co);
+        aio_context_acquire(ctx);
+
+        /* Protected by write barrier in qemu_aio_coroutine_enter */
+        qatomic_set(&co->scheduled, NULL);
+        qemu_aio_coroutine_enter(ctx, co);
+        aio_context_release(ctx);
+    }
+}
+
+AioContext *aio_context_new(Error **errp)
+{
+    int ret;
+    AioContext *ctx;
+
+    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    QSLIST_INIT(&ctx->bh_list);
+    QSIMPLEQ_INIT(&ctx->bh_slice_list);
+    aio_context_setup(ctx);
+
+    ret = event_notifier_init(&ctx->notifier, false);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
+        goto fail;
+    }
+    g_source_set_can_recurse(&ctx->source, true);
+    qemu_lockcnt_init(&ctx->list_lock);
+
+    ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
+    QSLIST_INIT(&ctx->scheduled_coroutines);
+
+    aio_set_event_notifier(ctx, &ctx->notifier,
+                           false,
+                           aio_context_notifier_cb,
+                           aio_context_notifier_poll);
+#ifdef CONFIG_LINUX_AIO
+    ctx->linux_aio = NULL;
+#endif
+
+#ifdef CONFIG_LINUX_IO_URING
+    ctx->linux_io_uring = NULL;
+#endif
+
+    ctx->thread_pool = NULL;
+    qemu_rec_mutex_init(&ctx->lock);
+    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
+
+    ctx->poll_ns = 0;
+    ctx->poll_max_ns = 0;
+    ctx->poll_grow = 0;
+    ctx->poll_shrink = 0;
+
+    return ctx;
+fail:
+    g_source_destroy(&ctx->source);
+    return NULL;
+}
+
+void aio_co_schedule(AioContext *ctx, Coroutine *co)
+{
+    trace_aio_co_schedule(ctx, co);
+    const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
+                                           __func__);
+
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+
+    /* The coroutine might run and release the last ctx reference before we
+     * invoke qemu_bh_schedule().  Take a reference to keep ctx alive until
+     * we're done.
+     */
+    aio_context_ref(ctx);
+
+    QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
+                              co, co_scheduled_next);
+    qemu_bh_schedule(ctx->co_schedule_bh);
+
+    aio_context_unref(ctx);
+}
+
+typedef struct AioCoRescheduleSelf {
+    Coroutine *co;
+    AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+    AioCoRescheduleSelf *data = opaque;
+    aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+    AioContext *old_ctx = qemu_get_current_aio_context();
+
+    if (old_ctx != new_ctx) {
+        AioCoRescheduleSelf data = {
+            .co = qemu_coroutine_self(),
+            .new_ctx = new_ctx,
+        };
+        /*
+         * We can't directly schedule the coroutine in the target context
+         * because this would be racy: The other thread could try to enter the
+         * coroutine before it has yielded in this one.
+         */
+        aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
+        qemu_coroutine_yield();
+    }
+}
+
+void aio_co_wake(struct Coroutine *co)
+{
+    AioContext *ctx;
+
+    /* Read coroutine before co->ctx.  Matches smp_wmb in
+     * qemu_coroutine_enter.
+     */
+    smp_read_barrier_depends();
+    ctx = qatomic_read(&co->ctx);
+
+    aio_co_enter(ctx, co);
+}
+
+void aio_co_enter(AioContext *ctx, struct Coroutine *co)
+{
+    if (ctx != qemu_get_current_aio_context()) {
+        aio_co_schedule(ctx, co);
+        return;
+    }
+
+    if (qemu_in_coroutine()) {
+        Coroutine *self = qemu_coroutine_self();
+        assert(self != co);
+        QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
+    } else {
+        aio_context_acquire(ctx);
+        qemu_aio_coroutine_enter(ctx, co);
+        aio_context_release(ctx);
+    }
+}
+
+void aio_context_ref(AioContext *ctx)
+{
+    g_source_ref(&ctx->source);
+}
+
+void aio_context_unref(AioContext *ctx)
+{
+    g_source_unref(&ctx->source);
+}
+
+void aio_context_acquire(AioContext *ctx)
+{
+    qemu_rec_mutex_lock(&ctx->lock);
+}
+
+void aio_context_release(AioContext *ctx)
+{
+    qemu_rec_mutex_unlock(&ctx->lock);
+}
diff --git a/util/atomic64.c b/util/atomic64.c
new file mode 100644
index 000000000..93037d5b1
--- /dev/null
+++ b/util/atomic64.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota 
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+
+#ifdef CONFIG_ATOMIC64
+#error This file must only be compiled if !CONFIG_ATOMIC64
+#endif
+
+/*
+ * When !CONFIG_ATOMIC64, we serialize both reads and writes with spinlocks.
+ * We use an array of spinlocks, with padding computed at run-time based on
+ * the host's dcache line size.
+ * We point to the array with a void * to simplify the padding's computation.
+ * Each spinlock is located every lock_size bytes.
+ */
+static void *lock_array;
+static size_t lock_size;
+
+/*
+ * Systems without CONFIG_ATOMIC64 are unlikely to have many cores, so we use a
+ * small array of locks.
+ */
+#define NR_LOCKS 16
+
+static QemuSpin *addr_to_lock(const void *addr)
+{
+    uintptr_t a = (uintptr_t)addr;
+    uintptr_t idx;
+
+    idx = a >> qemu_dcache_linesize_log;
+    idx ^= (idx >> 8) ^ (idx >> 16);
+    idx &= NR_LOCKS - 1;
+    return lock_array + idx * lock_size;
+}
+
+#define GEN_READ(name, type)                    \
+    type name(const type *ptr)                  \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+        type ret;                               \
+                                                \
+        qemu_spin_lock(lock);                   \
+        ret = *ptr;                             \
+        qemu_spin_unlock(lock);                 \
+        return ret;                             \
+    }
+
+GEN_READ(qatomic_read_i64, int64_t)
+GEN_READ(qatomic_read_u64, uint64_t)
+#undef GEN_READ
+
+#define GEN_SET(name, type)                     \
+    void name(type *ptr, type val)              \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+                                                \
+        qemu_spin_lock(lock);                   \
+        *ptr = val;                             \
+        qemu_spin_unlock(lock);                 \
+    }
+
+GEN_SET(qatomic_set_i64, int64_t)
+GEN_SET(qatomic_set_u64, uint64_t)
+#undef GEN_SET
+
+void qatomic64_init(void)
+{
+    int i;
+
+    lock_size = ROUND_UP(sizeof(QemuSpin), qemu_dcache_linesize);
+    lock_array = qemu_memalign(qemu_dcache_linesize, lock_size * NR_LOCKS);
+    for (i = 0; i < NR_LOCKS; i++) {
+        QemuSpin *lock = lock_array + i * lock_size;
+
+        qemu_spin_init(lock);
+    }
+}
diff --git a/util/base64.c b/util/base64.c
new file mode 100644
index 000000000..811111ac4
--- /dev/null
+++ b/util/base64.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU base64 helpers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/base64.h"
+
+static const char *base64_valid_chars =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n";
+
+uint8_t *qbase64_decode(const char *input,
+                        size_t in_len,
+                        size_t *out_len,
+                        Error **errp)
+{
+    *out_len = 0;
+
+    if (in_len != -1) {
+        /* Lack of NUL terminator is an error */
+        if (input[in_len] != '\0') {
+            error_setg(errp, "Base64 data is not NUL terminated");
+            return NULL;
+        }
+        /* Check there's no NULs embedded since we expect
+         * this to be valid base64 data */
+        if (memchr(input, '\0', in_len) != NULL) {
+            error_setg(errp, "Base64 data contains embedded NUL characters");
+            return NULL;
+        }
+
+        /* Now we know its a valid nul terminated string
+         * strspn is safe to use... */
+    } else {
+        in_len = strlen(input);
+    }
+
+    if (strspn(input, base64_valid_chars) != in_len) {
+        error_setg(errp, "Base64 data contains invalid characters");
+        return NULL;
+    }
+
+    return g_base64_decode(input, out_len);
+}
diff --git a/util/bitmap.c b/util/bitmap.c
new file mode 100644
index 000000000..1f201393a
--- /dev/null
+++ b/util/bitmap.c
@@ -0,0 +1,489 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "qemu/atomic.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an
+ * array of unsigned longs.  The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'.  The implementation makes
+ * no particular effort to keep them zero.  It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap[k]) {
+            return 0;
+        }
+    }
+    if (bits % BITS_PER_LONG) {
+        if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (~bitmap[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+                      const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] != bitmap2[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+                            long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        dst[k] = ~src[k];
+    }
+
+    if (bits % BITS_PER_LONG) {
+        dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+    }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+    }
+    return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] | bitmap2[k];
+    }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+                     const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] ^ bitmap2[k];
+    }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+                       const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+    }
+    return result != 0;
+}
+
+void bitmap_set(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+    assert(start >= 0 && nr >= 0);
+
+    while (nr - bits_to_set >= 0) {
+        *p |= mask_to_set;
+        nr -= bits_to_set;
+        bits_to_set = BITS_PER_LONG;
+        mask_to_set = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+        *p |= mask_to_set;
+    }
+}
+
+void bitmap_set_atomic(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+    assert(start >= 0 && nr >= 0);
+
+    /* First word */
+    if (nr - bits_to_set > 0) {
+        qatomic_or(p, mask_to_set);
+        nr -= bits_to_set;
+        bits_to_set = BITS_PER_LONG;
+        mask_to_set = ~0UL;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_set == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            *p = ~0UL;
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+        qatomic_or(p, mask_to_set);
+    } else {
+        /* If we avoided the full barrier in qatomic_or(), issue a
+         * barrier to account for the assignments in the while loop.
+         */
+        smp_mb();
+    }
+}
+
+void bitmap_clear(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+    assert(start >= 0 && nr >= 0);
+
+    while (nr - bits_to_clear >= 0) {
+        *p &= ~mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        mask_to_clear = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        *p &= ~mask_to_clear;
+    }
+}
+
+bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+    unsigned long dirty = 0;
+    unsigned long old_bits;
+
+    assert(start >= 0 && nr >= 0);
+
+    /* First word */
+    if (nr - bits_to_clear > 0) {
+        old_bits = qatomic_fetch_and(p, ~mask_to_clear);
+        dirty |= old_bits & mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        mask_to_clear = ~0UL;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_clear == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            if (*p) {
+                old_bits = qatomic_xchg(p, 0);
+                dirty |= old_bits;
+            }
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        old_bits = qatomic_fetch_and(p, ~mask_to_clear);
+        dirty |= old_bits & mask_to_clear;
+    } else {
+        if (!dirty) {
+            smp_mb();
+        }
+    }
+
+    return dirty != 0;
+}
+
+void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
+                                  long nr)
+{
+    while (nr > 0) {
+        *dst = qatomic_xchg(src, 0);
+        dst++;
+        src++;
+        nr -= BITS_PER_LONG;
+    }
+}
+
+#define ALIGN_MASK(x,mask)      (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask)
+{
+    unsigned long index, end, i;
+again:
+    index = find_next_zero_bit(map, size, start);
+
+    /* Align allocation */
+    index = ALIGN_MASK(index, align_mask);
+
+    end = index + nr;
+    if (end > size) {
+        return end;
+    }
+    i = find_next_bit(map, end, index);
+    if (i < end) {
+        start = i + 1;
+        goto again;
+    }
+    return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+                           const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] & bitmap2[k]) {
+            return 1;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+long slow_bitmap_count_one(const unsigned long *bitmap, long nbits)
+{
+    long k, lim = nbits / BITS_PER_LONG, result = 0;
+
+    for (k = 0; k < lim; k++) {
+        result += ctpopl(bitmap[k]);
+    }
+
+    if (nbits % BITS_PER_LONG) {
+        result += ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits));
+    }
+
+    return result;
+}
+
+static void bitmap_to_from_le(unsigned long *dst,
+                              const unsigned long *src, long nbits)
+{
+    long len = BITS_TO_LONGS(nbits);
+
+#ifdef HOST_WORDS_BIGENDIAN
+    long index;
+
+    for (index = 0; index < len; index++) {
+# if HOST_LONG_BITS == 64
+        dst[index] = bswap64(src[index]);
+# else
+        dst[index] = bswap32(src[index]);
+# endif
+    }
+#else
+    memcpy(dst, src, len * sizeof(unsigned long));
+#endif
+}
+
+void bitmap_from_le(unsigned long *dst, const unsigned long *src,
+                    long nbits)
+{
+    bitmap_to_from_le(dst, src, nbits);
+}
+
+void bitmap_to_le(unsigned long *dst, const unsigned long *src,
+                  long nbits)
+{
+    bitmap_to_from_le(dst, src, nbits);
+}
+
+/*
+ * Copy "src" bitmap with a positive offset and put it into the "dst"
+ * bitmap.  The caller needs to make sure the bitmap size of "src"
+ * is bigger than (shift + nbits).
+ */
+void bitmap_copy_with_src_offset(unsigned long *dst, const unsigned long *src,
+                                 unsigned long shift, unsigned long nbits)
+{
+    unsigned long left_mask, right_mask, last_mask;
+
+    /* Proper shift src pointer to the first word to copy from */
+    src += BIT_WORD(shift);
+    shift %= BITS_PER_LONG;
+
+    if (!shift) {
+        /* Fast path */
+        bitmap_copy(dst, src, nbits);
+        return;
+    }
+
+    right_mask = (1ul << shift) - 1;
+    left_mask = ~right_mask;
+
+    while (nbits >= BITS_PER_LONG) {
+        *dst = (*src & left_mask) >> shift;
+        *dst |= (src[1] & right_mask) << (BITS_PER_LONG - shift);
+        dst++;
+        src++;
+        nbits -= BITS_PER_LONG;
+    }
+
+    if (nbits > BITS_PER_LONG - shift) {
+        *dst = (*src & left_mask) >> shift;
+        nbits -= BITS_PER_LONG - shift;
+        last_mask = (1ul << nbits) - 1;
+        *dst |= (src[1] & last_mask) << (BITS_PER_LONG - shift);
+    } else if (nbits) {
+        last_mask = (1ul << nbits) - 1;
+        *dst = (*src >> shift) & last_mask;
+    }
+}
+
+/*
+ * Copy "src" bitmap into the "dst" bitmap with an offset in the
+ * "dst".  The caller needs to make sure the bitmap size of "dst" is
+ * bigger than (shift + nbits).
+ */
+void bitmap_copy_with_dst_offset(unsigned long *dst, const unsigned long *src,
+                                 unsigned long shift, unsigned long nbits)
+{
+    unsigned long left_mask, right_mask, last_mask;
+
+    /* Proper shift dst pointer to the first word to copy from */
+    dst += BIT_WORD(shift);
+    shift %= BITS_PER_LONG;
+
+    if (!shift) {
+        /* Fast path */
+        bitmap_copy(dst, src, nbits);
+        return;
+    }
+
+    right_mask = (1ul << (BITS_PER_LONG - shift)) - 1;
+    left_mask = ~right_mask;
+
+    *dst &= (1ul << shift) - 1;
+    while (nbits >= BITS_PER_LONG) {
+        *dst |= (*src & right_mask) << shift;
+        dst[1] = (*src & left_mask) >> (BITS_PER_LONG - shift);
+        dst++;
+        src++;
+        nbits -= BITS_PER_LONG;
+    }
+
+    if (nbits > BITS_PER_LONG - shift) {
+        *dst |= (*src & right_mask) << shift;
+        nbits -= BITS_PER_LONG - shift;
+        last_mask = ((1ul << nbits) - 1) << (BITS_PER_LONG - shift);
+        dst[1] = (*src & last_mask) >> (BITS_PER_LONG - shift);
+    } else if (nbits) {
+        last_mask = (1ul << nbits) - 1;
+        *dst |= (*src & last_mask) << shift;
+    }
+}
diff --git a/util/bitops.c b/util/bitops.c
new file mode 100644
index 000000000..3fe6b1c4f
--- /dev/null
+++ b/util/bitops.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell 
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+                            unsigned long offset)
+{
+    const unsigned long *p = addr + BIT_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size >= 4*BITS_PER_LONG) {
+        unsigned long d1, d2, d3;
+        tmp = *p;
+        d1 = *(p+1);
+        d2 = *(p+2);
+        d3 = *(p+3);
+        if (tmp) {
+            goto found_middle;
+        }
+        if (d1 | d2 | d3) {
+            break;
+        }
+        p += 4;
+        result += 4*BITS_PER_LONG;
+        size -= 4*BITS_PER_LONG;
+    }
+    while (size >= BITS_PER_LONG) {
+        if ((tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL) {		/* Are any bits set? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+                                 unsigned long offset)
+{
+    const unsigned long *p = addr + BIT_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (~tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL) {	/* Are any bits zero? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+    unsigned long words;
+    unsigned long tmp;
+
+    /* Start at final word. */
+    words = size / BITS_PER_LONG;
+
+    /* Partial final word? */
+    if (size & (BITS_PER_LONG-1)) {
+        tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+                                       - (size & (BITS_PER_LONG-1)))));
+        if (tmp) {
+            goto found;
+        }
+    }
+
+    while (words) {
+        tmp = addr[--words];
+        if (tmp) {
+        found:
+            return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+        }
+    }
+
+    /* Not found */
+    return size;
+}
diff --git a/util/block-helpers.c b/util/block-helpers.c
new file mode 100644
index 000000000..c4851432f
--- /dev/null
+++ b/util/block-helpers.c
@@ -0,0 +1,46 @@
+/*
+ * Block utility functions
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (c) 2020 Coiby Xu 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "block-helpers.h"
+
+/**
+ * check_block_size:
+ * @id: The unique ID of the object
+ * @name: The name of the property being validated
+ * @value: The block size in bytes
+ * @errp: A pointer to an area to store an error
+ *
+ * This function checks that the block size meets the following conditions:
+ * 1. At least MIN_BLOCK_SIZE
+ * 2. No larger than MAX_BLOCK_SIZE
+ * 3. A power of 2
+ */
+void check_block_size(const char *id, const char *name, int64_t value,
+                      Error **errp)
+{
+    /* value of 0 means "unset" */
+    if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
+        error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
+                   id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
+        return;
+    }
+
+    /* We rely on power-of-2 blocksizes for bitmasks */
+    if ((value & (value - 1)) != 0) {
+        error_setg(errp,
+                   "Property %s.%s doesn't take value '%" PRId64
+                   "', it's not a power of 2",
+                   id, name, value);
+        return;
+    }
+}
diff --git a/util/block-helpers.h b/util/block-helpers.h
new file mode 100644
index 000000000..b53295a52
--- /dev/null
+++ b/util/block-helpers.h
@@ -0,0 +1,19 @@
+#ifndef BLOCK_HELPERS_H
+#define BLOCK_HELPERS_H
+
+#include "qemu/units.h"
+
+/* lower limit is sector size */
+#define MIN_BLOCK_SIZE          INT64_C(512)
+#define MIN_BLOCK_SIZE_STR      "512 B"
+/*
+ * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
+ * matches qcow2 cluster size limit
+ */
+#define MAX_BLOCK_SIZE          (2 * MiB)
+#define MAX_BLOCK_SIZE_STR      "2 MiB"
+
+void check_block_size(const char *id, const char *name, int64_t value,
+                      Error **errp);
+
+#endif /* BLOCK_HELPERS_H */
diff --git a/util/buffer.c b/util/buffer.c
new file mode 100644
index 000000000..743eaa930
--- /dev/null
+++ b/util/buffer.c
@@ -0,0 +1,173 @@
+/*
+ * QEMU generic buffers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/buffer.h"
+#include "trace.h"
+
+#define BUFFER_MIN_INIT_SIZE     4096
+#define BUFFER_MIN_SHRINK_SIZE  65536
+
+/* define the factor alpha for the exponential smoothing
+ * that is used in the average size calculation. a shift
+ * of 7 results in an alpha of 1/2^7. */
+#define BUFFER_AVG_SIZE_SHIFT       7
+
+static size_t buffer_req_size(Buffer *buffer, size_t len)
+{
+    return MAX(BUFFER_MIN_INIT_SIZE,
+               pow2ceil(buffer->offset + len));
+}
+
+static void buffer_adj_size(Buffer *buffer, size_t len)
+{
+    size_t old = buffer->capacity;
+    buffer->capacity = buffer_req_size(buffer, len);
+    buffer->buffer = g_realloc(buffer->buffer, buffer->capacity);
+    trace_buffer_resize(buffer->name ?: "unnamed",
+                        old, buffer->capacity);
+
+    /* make it even harder for the buffer to shrink, reset average size
+     * to current capacity if it is larger than the average. */
+    buffer->avg_size = MAX(buffer->avg_size,
+                           buffer->capacity << BUFFER_AVG_SIZE_SHIFT);
+}
+
+void buffer_init(Buffer *buffer, const char *name, ...)
+{
+    va_list ap;
+
+    va_start(ap, name);
+    buffer->name = g_strdup_vprintf(name, ap);
+    va_end(ap);
+}
+
+static uint64_t buffer_get_avg_size(Buffer *buffer)
+{
+    return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT;
+}
+
+void buffer_shrink(Buffer *buffer)
+{
+    size_t new;
+
+    /* Calculate the average size of the buffer as
+     * avg_size = avg_size * ( 1 - a ) + required_size * a
+     * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */
+    buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1;
+    buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT;
+    buffer->avg_size += buffer_req_size(buffer, 0);
+
+    /* And then only shrink if the average size of the buffer is much
+     * too big, to avoid bumping up & down the buffers all the time.
+     * realloc() isn't exactly cheap ...  */
+    new = buffer_req_size(buffer, buffer_get_avg_size(buffer));
+    if (new < buffer->capacity >> 3 &&
+        new >= BUFFER_MIN_SHRINK_SIZE) {
+        buffer_adj_size(buffer, buffer_get_avg_size(buffer));
+    }
+
+    buffer_adj_size(buffer, 0);
+}
+
+void buffer_reserve(Buffer *buffer, size_t len)
+{
+    if ((buffer->capacity - buffer->offset) < len) {
+        buffer_adj_size(buffer, len);
+    }
+}
+
+gboolean buffer_empty(Buffer *buffer)
+{
+    return buffer->offset == 0;
+}
+
+uint8_t *buffer_end(Buffer *buffer)
+{
+    return buffer->buffer + buffer->offset;
+}
+
+void buffer_reset(Buffer *buffer)
+{
+    buffer->offset = 0;
+    buffer_shrink(buffer);
+}
+
+void buffer_free(Buffer *buffer)
+{
+    trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity);
+    g_free(buffer->buffer);
+    g_free(buffer->name);
+    buffer->offset = 0;
+    buffer->capacity = 0;
+    buffer->buffer = NULL;
+    buffer->name = NULL;
+}
+
+void buffer_append(Buffer *buffer, const void *data, size_t len)
+{
+    memcpy(buffer->buffer + buffer->offset, data, len);
+    buffer->offset += len;
+}
+
+void buffer_advance(Buffer *buffer, size_t len)
+{
+    memmove(buffer->buffer, buffer->buffer + len,
+            (buffer->offset - len));
+    buffer->offset -= len;
+    buffer_shrink(buffer);
+}
+
+void buffer_move_empty(Buffer *to, Buffer *from)
+{
+    trace_buffer_move_empty(to->name ?: "unnamed",
+                            from->offset,
+                            from->name ?: "unnamed");
+    assert(to->offset == 0);
+
+    g_free(to->buffer);
+    to->offset = from->offset;
+    to->capacity = from->capacity;
+    to->buffer = from->buffer;
+
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
+
+void buffer_move(Buffer *to, Buffer *from)
+{
+    if (to->offset == 0) {
+        buffer_move_empty(to, from);
+        return;
+    }
+
+    trace_buffer_move(to->name ?: "unnamed",
+                      from->offset,
+                      from->name ?: "unnamed");
+    buffer_reserve(to, from->offset);
+    buffer_append(to, from->buffer, from->offset);
+
+    g_free(from->buffer);
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
new file mode 100644
index 000000000..695bb4ce2
--- /dev/null
+++ b/util/bufferiszero.c
@@ -0,0 +1,355 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/bswap.h"
+
+static bool
+buffer_zero_int(const void *buf, size_t len)
+{
+    if (unlikely(len < 8)) {
+        /* For a very small buffer, simply accumulate all the bytes.  */
+        const unsigned char *p = buf;
+        const unsigned char *e = buf + len;
+        unsigned char t = 0;
+
+        do {
+            t |= *p++;
+        } while (p < e);
+
+        return t == 0;
+    } else {
+        /* Otherwise, use the unaligned memory access functions to
+           handle the beginning and end of the buffer, with a couple
+           of loops handling the middle aligned section.  */
+        uint64_t t = ldq_he_p(buf);
+        const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
+        const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
+
+        for (; p + 8 <= e; p += 8) {
+            __builtin_prefetch(p + 8);
+            if (t) {
+                return false;
+            }
+            t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
+        }
+        while (p < e) {
+            t |= *p++;
+        }
+        t |= ldq_he_p(buf + len - 8);
+
+        return t == 0;
+    }
+}
+
+#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
+/* Do not use push_options pragmas unnecessarily, because clang
+ * does not support them.
+ */
+#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#endif
+#include 
+
+/* Note that each of these vectorized functions require len >= 64.  */
+
+static bool
+buffer_zero_sse2(const void *buf, size_t len)
+{
+    __m128i t = _mm_loadu_si128(buf);
+    __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
+    __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
+    __m128i zero = _mm_setzero_si128();
+
+    /* Loop over 16-byte aligned blocks of 64.  */
+    while (likely(p <= e)) {
+        __builtin_prefetch(p);
+        t = _mm_cmpeq_epi8(t, zero);
+        if (unlikely(_mm_movemask_epi8(t) != 0xFFFF)) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    /* Finish the aligned tail.  */
+    t |= e[-3];
+    t |= e[-2];
+    t |= e[-1];
+
+    /* Finish the unaligned tail.  */
+    t |= _mm_loadu_si128(buf + len - 16);
+
+    return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
+}
+#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#pragma GCC pop_options
+#endif
+
+#ifdef CONFIG_AVX2_OPT
+/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
+ * the includes have to be within the corresponding push_options region, and
+ * therefore the regions themselves have to be ordered with increasing ISA.
+ */
+#pragma GCC push_options
+#pragma GCC target("sse4")
+#include 
+
+static bool
+buffer_zero_sse4(const void *buf, size_t len)
+{
+    __m128i t = _mm_loadu_si128(buf);
+    __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
+    __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
+
+    /* Loop over 16-byte aligned blocks of 64.  */
+    while (likely(p <= e)) {
+        __builtin_prefetch(p);
+        if (unlikely(!_mm_testz_si128(t, t))) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    /* Finish the aligned tail.  */
+    t |= e[-3];
+    t |= e[-2];
+    t |= e[-1];
+
+    /* Finish the unaligned tail.  */
+    t |= _mm_loadu_si128(buf + len - 16);
+
+    return _mm_testz_si128(t, t);
+}
+
+#pragma GCC pop_options
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#include 
+
+static bool
+buffer_zero_avx2(const void *buf, size_t len)
+{
+    /* Begin with an unaligned head of 32 bytes.  */
+    __m256i t = _mm256_loadu_si256(buf);
+    __m256i *p = (__m256i *)(((uintptr_t)buf + 5 * 32) & -32);
+    __m256i *e = (__m256i *)(((uintptr_t)buf + len) & -32);
+
+    /* Loop over 32-byte aligned blocks of 128.  */
+    while (p <= e) {
+        __builtin_prefetch(p);
+        if (unlikely(!_mm256_testz_si256(t, t))) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    } ;
+
+    /* Finish the last block of 128 unaligned.  */
+    t |= _mm256_loadu_si256(buf + len - 4 * 32);
+    t |= _mm256_loadu_si256(buf + len - 3 * 32);
+    t |= _mm256_loadu_si256(buf + len - 2 * 32);
+    t |= _mm256_loadu_si256(buf + len - 1 * 32);
+
+    return _mm256_testz_si256(t, t);
+}
+#pragma GCC pop_options
+#endif /* CONFIG_AVX2_OPT */
+
+#ifdef CONFIG_AVX512F_OPT
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#include 
+
+static bool
+buffer_zero_avx512(const void *buf, size_t len)
+{
+    /* Begin with an unaligned head of 64 bytes.  */
+    __m512i t = _mm512_loadu_si512(buf);
+    __m512i *p = (__m512i *)(((uintptr_t)buf + 5 * 64) & -64);
+    __m512i *e = (__m512i *)(((uintptr_t)buf + len) & -64);
+
+    /* Loop over 64-byte aligned blocks of 256.  */
+    while (p <= e) {
+        __builtin_prefetch(p);
+        if (unlikely(_mm512_test_epi64_mask(t, t))) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    t |= _mm512_loadu_si512(buf + len - 4 * 64);
+    t |= _mm512_loadu_si512(buf + len - 3 * 64);
+    t |= _mm512_loadu_si512(buf + len - 2 * 64);
+    t |= _mm512_loadu_si512(buf + len - 1 * 64);
+
+    return !_mm512_test_epi64_mask(t, t);
+
+}
+#pragma GCC pop_options
+#endif
+
+
+/* Note that for test_buffer_is_zero_next_accel, the most preferred
+ * ISA must have the least significant bit.
+ */
+#define CACHE_AVX512F 1
+#define CACHE_AVX2    2
+#define CACHE_SSE4    4
+#define CACHE_SSE2    8
+
+/* Make sure that these variables are appropriately initialized when
+ * SSE2 is enabled on the compiler command-line, but the compiler is
+ * too old to support CONFIG_AVX2_OPT.
+ */
+#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+# define INIT_CACHE 0
+# define INIT_ACCEL buffer_zero_int
+#else
+# ifndef __SSE2__
+#  error "ISA selection confusion"
+# endif
+# define INIT_CACHE CACHE_SSE2
+# define INIT_ACCEL buffer_zero_sse2
+#endif
+
+static unsigned cpuid_cache = INIT_CACHE;
+static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
+static int length_to_accel = 64;
+
+static void init_accel(unsigned cache)
+{
+    bool (*fn)(const void *, size_t) = buffer_zero_int;
+    if (cache & CACHE_SSE2) {
+        fn = buffer_zero_sse2;
+        length_to_accel = 64;
+    }
+#ifdef CONFIG_AVX2_OPT
+    if (cache & CACHE_SSE4) {
+        fn = buffer_zero_sse4;
+        length_to_accel = 64;
+    }
+    if (cache & CACHE_AVX2) {
+        fn = buffer_zero_avx2;
+        length_to_accel = 128;
+    }
+#endif
+#ifdef CONFIG_AVX512F_OPT
+    if (cache & CACHE_AVX512F) {
+        fn = buffer_zero_avx512;
+        length_to_accel = 256;
+    }
+#endif
+    buffer_accel = fn;
+}
+
+#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#include "qemu/cpuid.h"
+
+static void __attribute__((constructor)) init_cpuid_cache(void)
+{
+    int max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    unsigned cache = 0;
+
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+        if (d & bit_SSE2) {
+            cache |= CACHE_SSE2;
+        }
+        if (c & bit_SSE4_1) {
+            cache |= CACHE_SSE4;
+        }
+
+        /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+            if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
+                cache |= CACHE_AVX2;
+            }
+            /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
+                cache |= CACHE_AVX512F;
+            }
+        }
+    }
+    cpuid_cache = cache;
+    init_accel(cache);
+}
+#endif /* CONFIG_AVX2_OPT */
+
+bool test_buffer_is_zero_next_accel(void)
+{
+    /* If no bits set, we just tested buffer_zero_int, and there
+       are no more acceleration options to test.  */
+    if (cpuid_cache == 0) {
+        return false;
+    }
+    /* Disable the accelerator we used before and select a new one.  */
+    cpuid_cache &= cpuid_cache - 1;
+    init_accel(cpuid_cache);
+    return true;
+}
+
+static bool select_accel_fn(const void *buf, size_t len)
+{
+    if (likely(len >= length_to_accel)) {
+        return buffer_accel(buf, len);
+    }
+    return buffer_zero_int(buf, len);
+}
+
+#else
+#define select_accel_fn  buffer_zero_int
+bool test_buffer_is_zero_next_accel(void)
+{
+    return false;
+}
+#endif
+
+/*
+ * Checks if a buffer is all zeroes
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+    if (unlikely(len == 0)) {
+        return true;
+    }
+
+    /* Fetch the beginning of the buffer while we select the accelerator.  */
+    __builtin_prefetch(buf);
+
+    /* Use an optimized zero check if possible.  Note that this also
+       includes a check for an unrolled loop over 64-bit integers.  */
+    return select_accel_fn(buf, len);
+}
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
new file mode 100644
index 000000000..7804c186b
--- /dev/null
+++ b/util/cacheinfo.c
@@ -0,0 +1,197 @@
+/*
+ * cacheinfo.c - helpers to query the host about its caches
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/atomic.h"
+
+int qemu_icache_linesize = 0;
+int qemu_icache_linesize_log;
+int qemu_dcache_linesize = 0;
+int qemu_dcache_linesize_log;
+
+/*
+ * Operating system specific detection mechanisms.
+ */
+
+#if defined(_WIN32)
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
+    DWORD size = 0;
+    BOOL success;
+    size_t i, n;
+
+    /* Check for the required buffer size first.  Note that if the zero
+       size we use for the probe results in success, then there is no
+       data available; fail in that case.  */
+    success = GetLogicalProcessorInformation(0, &size);
+    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        return;
+    }
+
+    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
+    if (!GetLogicalProcessorInformation(buf, &size)) {
+        goto fail;
+    }
+
+    for (i = 0; i < n; i++) {
+        if (buf[i].Relationship == RelationCache
+            && buf[i].Cache.Level == 1) {
+            switch (buf[i].Cache.Type) {
+            case CacheUnified:
+                *isize = *dsize = buf[i].Cache.LineSize;
+                break;
+            case CacheInstruction:
+                *isize = buf[i].Cache.LineSize;
+                break;
+            case CacheData:
+                *dsize = buf[i].Cache.LineSize;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+ fail:
+    g_free(buf);
+}
+
+#elif defined(__APPLE__)
+# include 
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    long size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+# include 
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    int size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#else
+/* POSIX */
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+# ifdef _SC_LEVEL1_ICACHE_LINESIZE
+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    if (tmp_isize > 0) {
+        *isize = tmp_isize;
+    }
+# endif
+# ifdef _SC_LEVEL1_DCACHE_LINESIZE
+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    if (tmp_dsize > 0) {
+        *dsize = tmp_dsize;
+    }
+# endif
+}
+#endif /* sys_cache_info */
+
+/*
+ * Architecture (+ OS) specific detection mechanisms.
+ */
+
+#if defined(__aarch64__)
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0 || *dsize == 0) {
+        uint64_t ctr;
+
+        /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
+           but (at least under Linux) these are marked protected by the
+           kernel.  However, CTR_EL0 contains the minimum linesize in the
+           entire hierarchy, and is used by userspace cache flushing.  */
+        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+        if (*isize == 0) {
+            *isize = 4 << (ctr & 0xf);
+        }
+        if (*dsize == 0) {
+            *dsize = 4 << ((ctr >> 16) & 0xf);
+        }
+    }
+}
+
+#elif defined(_ARCH_PPC) && defined(__linux__)
+# include "elf.h"
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0) {
+        *isize = qemu_getauxval(AT_ICACHEBSIZE);
+    }
+    if (*dsize == 0) {
+        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
+    }
+}
+
+#else
+static void arch_cache_info(int *isize, int *dsize) { }
+#endif /* arch_cache_info */
+
+/*
+ * ... and if all else fails ...
+ */
+
+static void fallback_cache_info(int *isize, int *dsize)
+{
+    /* If we can only find one of the two, assume they're the same.  */
+    if (*isize) {
+        if (*dsize) {
+            /* Success! */
+        } else {
+            *dsize = *isize;
+        }
+    } else if (*dsize) {
+        *isize = *dsize;
+    } else {
+#if defined(_ARCH_PPC)
+        /* For PPC, we're going to use the icache size computed for
+           flush_icache_range.  Which means that we must use the
+           architecture minimum.  */
+        *isize = *dsize = 16;
+#else
+        /* Otherwise, 64 bytes is not uncommon.  */
+        *isize = *dsize = 64;
+#endif
+    }
+}
+
+static void __attribute__((constructor)) init_cache_info(void)
+{
+    int isize = 0, dsize = 0;
+
+    sys_cache_info(&isize, &dsize);
+    arch_cache_info(&isize, &dsize);
+    fallback_cache_info(&isize, &dsize);
+
+    assert((isize & (isize - 1)) == 0);
+    assert((dsize & (dsize - 1)) == 0);
+
+    qemu_icache_linesize = isize;
+    qemu_icache_linesize_log = ctz32(isize);
+    qemu_dcache_linesize = dsize;
+    qemu_dcache_linesize_log = ctz32(dsize);
+
+    qatomic64_init();
+}
diff --git a/util/compatfd.c b/util/compatfd.c
new file mode 100644
index 000000000..ee47dd808
--- /dev/null
+++ b/util/compatfd.c
@@ -0,0 +1,111 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+
+#if defined(CONFIG_SIGNALFD)
+#include 
+#endif
+
+struct sigfd_compat_info
+{
+    sigset_t mask;
+    int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+    struct sigfd_compat_info *info = opaque;
+
+    while (1) {
+        int sig;
+        int err;
+
+        err = sigwait(&info->mask, &sig);
+        if (err != 0) {
+            if (errno == EINTR) {
+                continue;
+            } else {
+                return NULL;
+            }
+        } else {
+            struct qemu_signalfd_siginfo buffer;
+            size_t offset = 0;
+
+            memset(&buffer, 0, sizeof(buffer));
+            buffer.ssi_signo = sig;
+
+            while (offset < sizeof(buffer)) {
+                ssize_t len;
+
+                len = write(info->fd, (char *)&buffer + offset,
+                            sizeof(buffer) - offset);
+                if (len == -1 && errno == EINTR)
+                    continue;
+
+                if (len <= 0) {
+                    return NULL;
+                }
+
+                offset += len;
+            }
+        }
+    }
+}
+
+static int qemu_signalfd_compat(const sigset_t *mask)
+{
+    struct sigfd_compat_info *info;
+    QemuThread thread;
+    int fds[2];
+
+    info = malloc(sizeof(*info));
+    if (info == NULL) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    if (pipe(fds) == -1) {
+        free(info);
+        return -1;
+    }
+
+    qemu_set_cloexec(fds[0]);
+    qemu_set_cloexec(fds[1]);
+
+    memcpy(&info->mask, mask, sizeof(*mask));
+    info->fd = fds[1];
+
+    qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
+                       QEMU_THREAD_DETACHED);
+
+    return fds[0];
+}
+
+int qemu_signalfd(const sigset_t *mask)
+{
+#if defined(CONFIG_SIGNALFD)
+    int ret;
+
+    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+        return ret;
+    }
+#endif
+
+    return qemu_signalfd_compat(mask);
+}
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
new file mode 100644
index 000000000..aade82afb
--- /dev/null
+++ b/util/coroutine-sigaltstack.c
@@ -0,0 +1,295 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori 
+ * Copyright (C) 2011  Kevin Wolf 
+ * Copyright (C) 2012  Alex Barcelo 
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include "qemu/osdep.h"
+#include 
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_SAFESTACK
+#error "SafeStack is not compatible with code run in alternate signal stacks"
+#endif
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    size_t stack_size;
+    sigjmp_buf env;
+} CoroutineSigAltStack;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineSigAltStack leader;
+
+    /** Information for the signal handler (trampoline) */
+    sigjmp_buf tr_reenter;
+    volatile sig_atomic_t tr_called;
+    void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineSigAltStack *self;
+    Coroutine *co;
+    CoroutineThreadState *coTS;
+
+    /* Get the thread specific information */
+    coTS = coroutine_get_thread_state();
+    self = coTS->tr_handler;
+    coTS->tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a siglongjmp here again.
+     */
+    if (!sigsetjmp(coTS->tr_reenter, 0)) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has siglongjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineSigAltStack *co;
+    CoroutineThreadState *coTS;
+    struct sigaction sa;
+    struct sigaction osa;
+    stack_t ss;
+    stack_t oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    sigjmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put sigsetjmp/siglongjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack_size = COROUTINE_STACK_SIZE;
+    co->stack = qemu_alloc_stack(&co->stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    coTS = coroutine_get_thread_state();
+    coTS->tr_handler = co;
+
+    /*
+     * Preserve the SIGUSR2 signal state, block SIGUSR2,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR2);
+    pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = co->stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the sigsetjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    coTS->tr_called = 0;
+    pthread_kill(pthread_self(), SIGUSR2);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR2);
+    while (!coTS->tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR2 signal handler and mask
+     */
+    sigaction(SIGUSR2, &osa, NULL);
+    pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmetic is necessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!sigsetjmp(old_env, 0)) {
+        siglongjmp(coTS->tr_reenter, 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_);
+
+    qemu_free_stack(co->stack, co->stack_size);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_);
+    CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        siglongjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
new file mode 100644
index 000000000..904b37519
--- /dev/null
+++ b/util/coroutine-ucontext.c
@@ -0,0 +1,332 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori 
+ * Copyright (C) 2011  Kevin Wolf 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include "qemu/osdep.h"
+#include 
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include 
+#endif
+
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+#ifdef CONFIG_ASAN_IFACE_FIBER
+#define CONFIG_ASAN 1
+#include 
+#endif
+#endif
+
+#ifdef CONFIG_TSAN
+#include 
+#endif
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    size_t stack_size;
+#ifdef CONFIG_SAFESTACK
+    /* Need an unsafe stack for each coroutine */
+    void *unsafe_stack;
+    size_t unsafe_stack_size;
+#endif
+    sigjmp_buf env;
+
+#ifdef CONFIG_TSAN
+    void *tsan_co_fiber;
+    void *tsan_caller_fiber;
+#endif
+
+#ifdef CONFIG_VALGRIND_H
+    unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+    void *p;
+    int i[2];
+};
+
+/*
+ * QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it.
+ * always_inline is required to avoid TSan runtime fatal errors.
+ */
+static inline __attribute__((always_inline))
+void on_new_fiber(CoroutineUContext *co)
+{
+#ifdef CONFIG_TSAN
+    co->tsan_co_fiber = __tsan_create_fiber(0); /* flags: sync on switch */
+    co->tsan_caller_fiber = __tsan_get_current_fiber();
+#endif
+}
+
+/* always_inline is required to avoid TSan runtime fatal errors. */
+static inline __attribute__((always_inline))
+void finish_switch_fiber(void *fake_stack_save)
+{
+#ifdef CONFIG_ASAN
+    const void *bottom_old;
+    size_t size_old;
+
+    __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
+
+    if (!leader.stack) {
+        leader.stack = (void *)bottom_old;
+        leader.stack_size = size_old;
+    }
+#endif
+#ifdef CONFIG_TSAN
+    if (fake_stack_save) {
+        __tsan_release(fake_stack_save);
+        __tsan_switch_to_fiber(fake_stack_save, 0);  /* 0=synchronize */
+    }
+#endif
+}
+
+/* always_inline is required to avoid TSan runtime fatal errors. */
+static inline __attribute__((always_inline))
+void start_switch_fiber_asan(CoroutineAction action, void **fake_stack_save,
+                             const void *bottom, size_t size)
+{
+#ifdef CONFIG_ASAN
+    __sanitizer_start_switch_fiber(
+            action == COROUTINE_TERMINATE ? NULL : fake_stack_save,
+            bottom, size);
+#endif
+}
+
+/* always_inline is required to avoid TSan runtime fatal errors. */
+static inline __attribute__((always_inline))
+void start_switch_fiber_tsan(void **fake_stack_save,
+                             CoroutineUContext *co,
+                             bool caller)
+{
+#ifdef CONFIG_TSAN
+    void *new_fiber = caller ?
+                      co->tsan_caller_fiber :
+                      co->tsan_co_fiber;
+    void *curr_fiber = __tsan_get_current_fiber();
+    __tsan_acquire(curr_fiber);
+
+    *fake_stack_save = curr_fiber;
+    __tsan_switch_to_fiber(new_fiber, 0);  /* 0=synchronize */
+#endif
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+    union cc_arg arg;
+    CoroutineUContext *self;
+    Coroutine *co;
+    void *fake_stack_save = NULL;
+
+    finish_switch_fiber(NULL);
+
+    arg.i[0] = i0;
+    arg.i[1] = i1;
+    self = arg.p;
+    co = &self->base;
+
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
+                                leader.stack_size);
+        start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    finish_switch_fiber(fake_stack_save);
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineUContext *co;
+    ucontext_t old_uc, uc;
+    sigjmp_buf old_env;
+    union cc_arg arg = {0};
+    void *fake_stack_save = NULL;
+
+    /* The ucontext functions preserve signal masks which incurs a
+     * system call overhead.  sigsetjmp(buf, 0)/siglongjmp() does not
+     * preserve signal masks but only works on the current stack.
+     * Since we need a way to create and switch to a new stack, use
+     * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+     * everything else.
+     */
+
+    if (getcontext(&uc) == -1) {
+        abort();
+    }
+
+    co = g_malloc0(sizeof(*co));
+    co->stack_size = COROUTINE_STACK_SIZE;
+    co->stack = qemu_alloc_stack(&co->stack_size);
+#ifdef CONFIG_SAFESTACK
+    co->unsafe_stack_size = COROUTINE_STACK_SIZE;
+    co->unsafe_stack = qemu_alloc_stack(&co->unsafe_stack_size);
+#endif
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    uc.uc_link = &old_uc;
+    uc.uc_stack.ss_sp = co->stack;
+    uc.uc_stack.ss_size = co->stack_size;
+    uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+    co->valgrind_stack_id =
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
+#endif
+
+    arg.p = co;
+
+    on_new_fiber(co);
+    makecontext(&uc, (void (*)(void))coroutine_trampoline,
+                2, arg.i[0], arg.i[1]);
+
+    /* swapcontext() in, siglongjmp() back out */
+    if (!sigsetjmp(old_env, 0)) {
+        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, co->stack,
+                                co->stack_size);
+        start_switch_fiber_tsan(&fake_stack_save,
+                                co, false); /* false=not caller */
+
+#ifdef CONFIG_SAFESTACK
+        /*
+         * Before we swap the context, set the new unsafe stack
+         * The unsafe stack grows just like the normal stack, so start from
+         * the last usable location of the memory area.
+         * NOTE: we don't have to re-set the usp afterwards because we are
+         * coming back to this context through a siglongjmp.
+         * The compiler already wrapped the corresponding sigsetjmp call with
+         * code that saves the usp on the (safe) stack before the call, and
+         * restores it right after (which is where we return with siglongjmp).
+         */
+        void *usp = co->unsafe_stack + co->unsafe_stack_size;
+        __safestack_unsafe_stack_ptr = usp;
+#endif
+
+        swapcontext(&old_uc, &uc);
+    }
+
+    finish_switch_fiber(fake_stack_save);
+
+    return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+/* Work around an unused variable in the valgrind.h macro... */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+    VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+    valgrind_stack_deregister(co);
+#endif
+
+    qemu_free_stack(co->stack, co->stack_size);
+#ifdef CONFIG_SAFESTACK
+    qemu_free_stack(co->unsafe_stack, co->unsafe_stack_size);
+#endif
+    g_free(co);
+}
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    int ret;
+    void *fake_stack_save = NULL;
+
+    current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        start_switch_fiber_asan(action, &fake_stack_save, to->stack,
+                                to->stack_size);
+        start_switch_fiber_tsan(&fake_stack_save,
+                                to, false); /* false=not caller */
+        siglongjmp(to->env, action);
+    }
+
+    finish_switch_fiber(fake_stack_save);
+
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+    }
+#ifdef CONFIG_TSAN
+    if (!leader.tsan_co_fiber) {
+        leader.tsan_co_fiber = __tsan_get_current_fiber();
+    }
+#endif
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
new file mode 100644
index 000000000..de6bd4fd3
--- /dev/null
+++ b/util/coroutine-win32.c
@@ -0,0 +1,102 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct
+{
+    Coroutine base;
+
+    LPVOID fiber;
+    CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+    current = to_;
+
+    to->action = action;
+    SwitchToFiber(to->fiber);
+    return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+    Coroutine *co = co_;
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = COROUTINE_STACK_SIZE;
+    CoroutineWin32 *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+    DeleteFiber(co->fiber);
+    g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+        leader.fiber = ConvertThreadToFiber(NULL);
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/util/crc32c.c b/util/crc32c.c
new file mode 100644
index 000000000..762657d85
--- /dev/null
+++ b/util/crc32c.c
@@ -0,0 +1,115 @@
+/*
+ *  Castagnoli CRC32C Checksum Algorithm
+ *
+ *  Polynomial: 0x11EDC6F41
+ *
+ *  Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman
+ *               "Optimization of Cyclic Redundancy-Check Codes with 24
+ *                 and 32 Parity Bits",IEEE Transactions on Communication,
+ *                Volume 41, Number 6, June 1993
+ *
+ *  Copyright (c) 2013 Red Hat, Inc.,
+ *
+ *  Authors:
+ *   Jeff Cody 
+ *
+ *  Based on the Linux kernel cryptographic crc32c module,
+ *
+ *  Copyright (c) 2004 Cisco Systems, Inc.
+ *  Copyright (c) 2008 Herbert Xu 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+
+/*
+ * This is the CRC-32C table
+ * Generated with:
+ * width = 32 bits
+ * poly = 0x1EDC6F41
+ * reflect input bytes = true
+ * reflect output bytes = true
+ */
+
+static const uint32_t crc32c_table[256] = {
+    0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+    0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+    0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+    0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+    0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+    0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+    0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+    0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+    0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+    0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+    0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+    0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+    0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+    0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+    0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+    0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+    0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+    0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+    0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+    0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+    0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+    0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+    0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+    0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+    0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+    0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+    0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+    0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+    0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+    0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+    0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+    0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+    0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+    0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+    0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+    0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+    0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+    0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+    0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+    0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+    0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+    0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+    0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+    0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+    0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+    0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+    0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+    0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+    0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+    0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+    0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+    0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+    0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+    0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+    0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+    0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+    0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+    0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+    0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+    0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+    0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+    0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+    0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+    0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+
+uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
+{
+    while (length--) {
+        crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+    }
+    return crc^0xffffffff;
+}
+
diff --git a/util/cutils.c b/util/cutils.c
new file mode 100644
index 000000000..0b5073b33
--- /dev/null
+++ b/util/cutils.c
@@ -0,0 +1,967 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include 
+
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "net/net.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+
+void strpadcpy(char *buf, int buf_size, const char *str, char pad)
+{
+    int len = qemu_strnlen(str, buf_size);
+    memcpy(buf, str, len);
+    memset(buf + len, pad, buf_size - len);
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+    int c;
+    char *q = buf;
+
+    if (buf_size <= 0)
+        return;
+
+    for(;;) {
+        c = *str++;
+        if (c == 0 || q >= buf + buf_size - 1)
+            break;
+        *q++ = c;
+    }
+    *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+    int len;
+    len = strlen(buf);
+    if (len < buf_size)
+        pstrcpy(buf + len, buf_size - len, s);
+    return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (*p != *q)
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (qemu_toupper(*p) != qemu_toupper(*q))
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+    int i;
+
+    for(i = 0; i < max_len; i++) {
+        if (s[i] == '\0') {
+            break;
+        }
+    }
+    return i;
+}
+
+char *qemu_strsep(char **input, const char *delim)
+{
+    char *result = *input;
+    if (result != NULL) {
+        char *p;
+
+        for (p = result; *p != '\0'; p++) {
+            if (strchr(delim, *p)) {
+                break;
+            }
+        }
+        if (*p == '\0') {
+            *input = NULL;
+        } else {
+            *p = '\0';
+            *input = p + 1;
+        }
+    }
+    return result;
+}
+
+time_t mktimegm(struct tm *tm)
+{
+    time_t t;
+    int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+    if (m < 3) {
+        m += 12;
+        y--;
+    }
+    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
+                 y / 400 - 719469);
+    t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+    return t;
+}
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+    return fdatasync(fd);
+#else
+    return fsync(fd);
+#endif
+}
+
+/**
+ * Sync changes made to the memory mapped file back to the backing
+ * storage. For POSIX compliant systems this will fallback
+ * to regular msync call. Otherwise it will trigger whole file sync
+ * (including the metadata case there is no support to skip that otherwise)
+ *
+ * @addr   - start of the memory area to be synced
+ * @length - length of the are to be synced
+ * @fd     - file descriptor for the file to be synced
+ *           (mandatory only for POSIX non-compliant systems)
+ */
+int qemu_msync(void *addr, size_t length, int fd)
+{
+#ifdef CONFIG_POSIX
+    size_t align_mask = ~(qemu_real_host_page_size - 1);
+
+    /**
+     * There are no strict reqs as per the length of mapping
+     * to be synced. Still the length needs to follow the address
+     * alignment changes. Additionally - round the size to the multiple
+     * of PAGE_SIZE
+     */
+    length += ((uintptr_t)addr & (qemu_real_host_page_size - 1));
+    length = (length + ~align_mask) & align_mask;
+
+    addr = (void *)((uintptr_t)addr & align_mask);
+
+    return msync(addr, length, MS_SYNC);
+#else /* CONFIG_POSIX */
+    /**
+     * Perform the sync based on the file descriptor
+     * The sync range will most probably be wider than the one
+     * requested - but it will still get the job done
+     */
+    return qemu_fdatasync(fd);
+#endif /* CONFIG_POSIX */
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+    int flags;
+
+    flags = fcntl(fd, F_GETFL);
+    if (flags == -1)
+        return -errno;
+
+    if (fcntl(fd, F_SETFL, flags | flag) == -1)
+        return -errno;
+
+    return 0;
+}
+#endif
+
+static int64_t suffix_mul(char suffix, int64_t unit)
+{
+    switch (qemu_toupper(suffix)) {
+    case 'B':
+        return 1;
+    case 'K':
+        return unit;
+    case 'M':
+        return unit * unit;
+    case 'G':
+        return unit * unit * unit;
+    case 'T':
+        return unit * unit * unit * unit;
+    case 'P':
+        return unit * unit * unit * unit * unit;
+    case 'E':
+        return unit * unit * unit * unit * unit * unit;
+    }
+    return -1;
+}
+
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, and -EINVAL on
+ * other error.
+ */
+static int do_strtosz(const char *nptr, const char **end,
+                      const char default_suffix, int64_t unit,
+                      uint64_t *result)
+{
+    int retval;
+    const char *endptr;
+    unsigned char c;
+    int mul_required = 0;
+    double val, mul, integral, fraction;
+
+    retval = qemu_strtod_finite(nptr, &endptr, &val);
+    if (retval) {
+        goto out;
+    }
+    fraction = modf(val, &integral);
+    if (fraction != 0) {
+        mul_required = 1;
+    }
+    c = *endptr;
+    mul = suffix_mul(c, unit);
+    if (mul >= 0) {
+        endptr++;
+    } else {
+        mul = suffix_mul(default_suffix, unit);
+        assert(mul >= 0);
+    }
+    if (mul == 1 && mul_required) {
+        retval = -EINVAL;
+        goto out;
+    }
+    /*
+     * Values near UINT64_MAX overflow to 2**64 when converting to double
+     * precision.  Compare against the maximum representable double precision
+     * value below 2**64, computed as "the next value after 2**64 (0x1p64) in
+     * the direction of 0".
+     */
+    if ((val * mul > nextafter(0x1p64, 0)) || val < 0) {
+        retval = -ERANGE;
+        goto out;
+    }
+    *result = val * mul;
+    retval = 0;
+
+out:
+    if (end) {
+        *end = endptr;
+    } else if (*endptr) {
+        retval = -EINVAL;
+    }
+
+    return retval;
+}
+
+int qemu_strtosz(const char *nptr, const char **end, uint64_t *result)
+{
+    return do_strtosz(nptr, end, 'B', 1024, result);
+}
+
+int qemu_strtosz_MiB(const char *nptr, const char **end, uint64_t *result)
+{
+    return do_strtosz(nptr, end, 'M', 1024, result);
+}
+
+int qemu_strtosz_metric(const char *nptr, const char **end, uint64_t *result)
+{
+    return do_strtosz(nptr, end, 'B', 1000, result);
+}
+
+/**
+ * Helper function for error checking after strtol() and the like
+ */
+static int check_strtox_error(const char *nptr, char *ep,
+                              const char **endptr, int libc_errno)
+{
+    assert(ep >= nptr);
+    if (endptr) {
+        *endptr = ep;
+    }
+
+    /* Turn "no conversion" into an error */
+    if (libc_errno == 0 && ep == nptr) {
+        return -EINVAL;
+    }
+
+    /* Fail when we're expected to consume the string, but didn't */
+    if (!endptr && *ep) {
+        return -EINVAL;
+    }
+
+    return -libc_errno;
+}
+
+/**
+ * Convert string @nptr to an integer, and store it in @result.
+ *
+ * This is a wrapper around strtol() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtol() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows @result, store INT_MAX in @result,
+ * and return -ERANGE.
+ *
+ * If the conversion underflows @result, store INT_MIN in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ */
+int qemu_strtoi(const char *nptr, const char **endptr, int base,
+                int *result)
+{
+    char *ep;
+    long long lresult;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    errno = 0;
+    lresult = strtoll(nptr, &ep, base);
+    if (lresult < INT_MIN) {
+        *result = INT_MIN;
+        errno = ERANGE;
+    } else if (lresult > INT_MAX) {
+        *result = INT_MAX;
+        errno = ERANGE;
+    } else {
+        *result = lresult;
+    }
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to an unsigned integer, and store it in @result.
+ *
+ * This is a wrapper around strtoul() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtoul() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows @result, store UINT_MAX in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ *
+ * Note that a number with a leading minus sign gets converted without
+ * the minus sign, checked for overflow (see above), then negated (in
+ * @result's type).  This is exactly how strtoul() works.
+ */
+int qemu_strtoui(const char *nptr, const char **endptr, int base,
+                 unsigned int *result)
+{
+    char *ep;
+    long long lresult;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    errno = 0;
+    lresult = strtoull(nptr, &ep, base);
+
+    /* Windows returns 1 for negative out-of-range values.  */
+    if (errno == ERANGE) {
+        *result = -1;
+    } else {
+        if (lresult > UINT_MAX) {
+            *result = UINT_MAX;
+            errno = ERANGE;
+        } else if (lresult < INT_MIN) {
+            *result = UINT_MAX;
+            errno = ERANGE;
+        } else {
+            *result = lresult;
+        }
+    }
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to a long integer, and store it in @result.
+ *
+ * This is a wrapper around strtol() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtol() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows @result, store LONG_MAX in @result,
+ * and return -ERANGE.
+ *
+ * If the conversion underflows @result, store LONG_MIN in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+                long *result)
+{
+    char *ep;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    errno = 0;
+    *result = strtol(nptr, &ep, base);
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to an unsigned long, and store it in @result.
+ *
+ * This is a wrapper around strtoul() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtoul() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows @result, store ULONG_MAX in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ *
+ * Note that a number with a leading minus sign gets converted without
+ * the minus sign, checked for overflow (see above), then negated (in
+ * @result's type).  This is exactly how strtoul() works.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+                 unsigned long *result)
+{
+    char *ep;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    errno = 0;
+    *result = strtoul(nptr, &ep, base);
+    /* Windows returns 1 for negative out-of-range values.  */
+    if (errno == ERANGE) {
+        *result = -1;
+    }
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to an int64_t.
+ *
+ * Works like qemu_strtol(), except it stores INT64_MAX on overflow,
+ * and INT64_MIN on underflow.
+ */
+int qemu_strtoi64(const char *nptr, const char **endptr, int base,
+                 int64_t *result)
+{
+    char *ep;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    /* This assumes int64_t is long long TODO relax */
+    QEMU_BUILD_BUG_ON(sizeof(int64_t) != sizeof(long long));
+    errno = 0;
+    *result = strtoll(nptr, &ep, base);
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to an uint64_t.
+ *
+ * Works like qemu_strtoul(), except it stores UINT64_MAX on overflow.
+ */
+int qemu_strtou64(const char *nptr, const char **endptr, int base,
+                  uint64_t *result)
+{
+    char *ep;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    /* This assumes uint64_t is unsigned long long TODO relax */
+    QEMU_BUILD_BUG_ON(sizeof(uint64_t) != sizeof(unsigned long long));
+    errno = 0;
+    *result = strtoull(nptr, &ep, base);
+    /* Windows returns 1 for negative out-of-range values.  */
+    if (errno == ERANGE) {
+        *result = -1;
+    }
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to a double.
+  *
+ * This is a wrapper around strtod() that is harder to misuse.
+ * Semantics of @nptr and @endptr match strtod() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL. This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows, store +/-HUGE_VAL in @result, depending
+ * on the sign, and return -ERANGE.
+ *
+ * If the conversion underflows, store +/-0.0 in @result, depending on the
+ * sign, and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ */
+int qemu_strtod(const char *nptr, const char **endptr, double *result)
+{
+    char *ep;
+
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        return -EINVAL;
+    }
+
+    errno = 0;
+    *result = strtod(nptr, &ep);
+    return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/**
+ * Convert string @nptr to a finite double.
+ *
+ * Works like qemu_strtod(), except that "NaN" and "inf" are rejected
+ * with -EINVAL and no conversion is performed.
+ */
+int qemu_strtod_finite(const char *nptr, const char **endptr, double *result)
+{
+    double tmp;
+    int ret;
+
+    ret = qemu_strtod(nptr, endptr, &tmp);
+    if (!ret && !isfinite(tmp)) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        ret = -EINVAL;
+    }
+
+    if (ret != -EINVAL) {
+        *result = tmp;
+    }
+    return ret;
+}
+
+/**
+ * Searches for the first occurrence of 'c' in 's', and returns a pointer
+ * to the trailing null byte if none was found.
+ */
+#ifndef HAVE_STRCHRNUL
+const char *qemu_strchrnul(const char *s, int c)
+{
+    const char *e = strchr(s, c);
+    if (!e) {
+        e = s + strlen(s);
+    }
+    return e;
+}
+#endif
+
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base)
+{
+    int r = 0;
+    char *endp = (char *)s;
+    unsigned long long val = 0;
+
+    assert((unsigned) base <= 36 && base != 1);
+    if (!s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    errno = 0;
+    val = strtoull(s, &endp, base);
+    if (errno) {
+        r = -errno;
+        goto out;
+    }
+
+    if (endp == s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    /* make sure we reject negative numbers: */
+    while (qemu_isspace(*s)) {
+        s++;
+    }
+    if (*s == '-') {
+        val = 0;
+        r = -ERANGE;
+        goto out;
+    }
+
+out:
+    *value = val;
+    *endptr = endp;
+    return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+    char *endp;
+    int r;
+
+    r = parse_uint(s, value, &endp, base);
+    if (r < 0) {
+        return r;
+    }
+    if (*endp) {
+        *value = 0;
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int qemu_parse_fd(const char *param)
+{
+    long fd;
+    char *endptr;
+
+    errno = 0;
+    fd = strtol(param, &endptr, 10);
+    if (param == endptr /* no conversion performed */                    ||
+        errno != 0      /* not representable as long; possibly others */ ||
+        *endptr != '\0' /* final string not empty */                     ||
+        fd < 0          /* invalid as file descriptor */                 ||
+        fd > INT_MAX    /* not representable as int */) {
+        return -1;
+    }
+    return fd;
+}
+
+/*
+ * Implementation of  ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+int uleb128_encode_small(uint8_t *out, uint32_t n)
+{
+    g_assert(n <= 0x3fff);
+    if (n < 0x80) {
+        *out = n;
+        return 1;
+    } else {
+        *out++ = (n & 0x7f) | 0x80;
+        *out = n >> 7;
+        return 2;
+    }
+}
+
+int uleb128_decode_small(const uint8_t *in, uint32_t *n)
+{
+    if (!(*in & 0x80)) {
+        *n = *in;
+        return 1;
+    } else {
+        *n = *in++ & 0x7f;
+        /* we exceed 14 bit number */
+        if (*in & 0x80) {
+            return -1;
+        }
+        *n |= *in << 7;
+        return 2;
+    }
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial)
+{
+    char *debug_env = getenv(name);
+    char *inv = NULL;
+    long debug;
+
+    if (!debug_env) {
+        return initial;
+    }
+    errno = 0;
+    debug = strtol(debug_env, &inv, 10);
+    if (inv == debug_env) {
+        return initial;
+    }
+    if (debug < 0 || debug > max || errno != 0) {
+        warn_report("%s not in [0, %d]", name, max);
+        return initial;
+    }
+    return debug;
+}
+
+/*
+ * Helper to print ethernet mac address
+ */
+const char *qemu_ether_ntoa(const MACAddr *mac)
+{
+    static char ret[18];
+
+    snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x",
+             mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]);
+
+    return ret;
+}
+
+/*
+ * Return human readable string for size @val.
+ * @val can be anything that uint64_t allows (no more than "16 EiB").
+ * Use IEC binary units like KiB, MiB, and so forth.
+ * Caller is responsible for passing it to g_free().
+ */
+char *size_to_str(uint64_t val)
+{
+    static const char *suffixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
+    uint64_t div;
+    int i;
+
+    /*
+     * The exponent (returned in i) minus one gives us
+     * floor(log2(val * 1024 / 1000).  The correction makes us
+     * switch to the higher power when the integer part is >= 1000.
+     * (see e41b509d68afb1f for more info)
+     */
+    frexp(val / (1000.0 / 1024.0), &i);
+    i = (i - 1) / 10;
+    div = 1ULL << (i * 10);
+
+    return g_strdup_printf("%0.3g %sB", (double)val / div, suffixes[i]);
+}
+
+char *freq_to_str(uint64_t freq_hz)
+{
+    static const char *const suffixes[] = { "", "K", "M", "G", "T", "P", "E" };
+    double freq = freq_hz;
+    size_t idx = 0;
+
+    while (freq >= 1000.0) {
+        freq /= 1000.0;
+        idx++;
+    }
+    assert(idx < ARRAY_SIZE(suffixes));
+
+    return g_strdup_printf("%0.3g %sHz", freq, suffixes[idx]);
+}
+
+int qemu_pstrcmp0(const char **str1, const char **str2)
+{
+    return g_strcmp0(*str1, *str2);
+}
+
+static inline bool starts_with_prefix(const char *dir)
+{
+    size_t prefix_len = strlen(CONFIG_PREFIX);
+    return !memcmp(dir, CONFIG_PREFIX, prefix_len) &&
+        (!dir[prefix_len] || G_IS_DIR_SEPARATOR(dir[prefix_len]));
+}
+
+/* Return the next path component in dir, and store its length in *p_len.  */
+static inline const char *next_component(const char *dir, int *p_len)
+{
+    int len;
+    while (*dir && G_IS_DIR_SEPARATOR(*dir)) {
+        dir++;
+    }
+    len = 0;
+    while (dir[len] && !G_IS_DIR_SEPARATOR(dir[len])) {
+        len++;
+    }
+    *p_len = len;
+    return dir;
+}
+
+char *get_relocated_path(const char *dir)
+{
+    size_t prefix_len = strlen(CONFIG_PREFIX);
+    const char *bindir = CONFIG_BINDIR;
+    const char *exec_dir = qemu_get_exec_dir();
+    GString *result;
+    int len_dir, len_bindir;
+
+    /* Fail if qemu_init_exec_dir was not called.  */
+    assert(exec_dir[0]);
+    if (!starts_with_prefix(dir) || !starts_with_prefix(bindir)) {
+        return g_strdup(dir);
+    }
+
+    result = g_string_new(exec_dir);
+
+    /* Advance over common components.  */
+    len_dir = len_bindir = prefix_len;
+    do {
+        dir += len_dir;
+        bindir += len_bindir;
+        dir = next_component(dir, &len_dir);
+        bindir = next_component(bindir, &len_bindir);
+    } while (len_dir && len_dir == len_bindir && !memcmp(dir, bindir, len_dir));
+
+    /* Ascend from bindir to the common prefix with dir.  */
+    while (len_bindir) {
+        bindir += len_bindir;
+        g_string_append(result, "/..");
+        bindir = next_component(bindir, &len_bindir);
+    }
+
+    if (*dir) {
+        assert(G_IS_DIR_SEPARATOR(dir[-1]));
+        g_string_append(result, dir - 1);
+    }
+    return result->str;
+}
diff --git a/util/dbus.c b/util/dbus.c
new file mode 100644
index 000000000..9099dc5b4
--- /dev/null
+++ b/util/dbus.c
@@ -0,0 +1,57 @@
+/*
+ * Helpers for using D-Bus
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/dbus.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+/*
+ * qemu_dbus_get_queued_owners() - return the list of queued unique names
+ * @connection: A GDBusConnection
+ * @name: a service name
+ *
+ * Return: a GStrv of unique names, or NULL on failure.
+ */
+GStrv
+qemu_dbus_get_queued_owners(GDBusConnection *connection, const char *name,
+                            Error **errp)
+{
+    g_autoptr(GDBusProxy) proxy = NULL;
+    g_autoptr(GVariant) result = NULL;
+    g_autoptr(GVariant) child = NULL;
+    g_autoptr(GError) err = NULL;
+
+    proxy = g_dbus_proxy_new_sync(connection, G_DBUS_PROXY_FLAGS_NONE, NULL,
+                                  "org.freedesktop.DBus",
+                                  "/org/freedesktop/DBus",
+                                  "org.freedesktop.DBus",
+                                  NULL, &err);
+    if (!proxy) {
+        error_setg(errp, "Failed to create DBus proxy: %s", err->message);
+        return NULL;
+    }
+
+    result = g_dbus_proxy_call_sync(proxy, "ListQueuedOwners",
+                                    g_variant_new("(s)", name),
+                                    G_DBUS_CALL_FLAGS_NO_AUTO_START,
+                                    -1, NULL, &err);
+    if (!result) {
+        if (g_error_matches(err,
+                            G_DBUS_ERROR,
+                            G_DBUS_ERROR_NAME_HAS_NO_OWNER)) {
+            return g_new0(char *, 1);
+        }
+        error_setg(errp, "Failed to call ListQueuedOwners: %s", err->message);
+        return NULL;
+    }
+
+    child = g_variant_get_child_value(result, 0);
+    return g_variant_dup_strv(child, NULL);
+}
diff --git a/util/drm.c b/util/drm.c
new file mode 100644
index 000000000..dae8ffebc
--- /dev/null
+++ b/util/drm.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2015-2016 Gerd Hoffmann 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+#include "qemu/osdep.h"
+#include "qemu/drm.h"
+
+#include 
+#include 
+
+int qemu_drm_rendernode_open(const char *rendernode)
+{
+    DIR *dir;
+    struct dirent *e;
+    struct stat st;
+    int r, fd, ret;
+    char *p;
+
+    if (rendernode) {
+        return open(rendernode, O_RDWR | O_CLOEXEC | O_NOCTTY | O_NONBLOCK);
+    }
+
+    dir = opendir("/dev/dri");
+    if (!dir) {
+        return -1;
+    }
+
+    fd = -1;
+    while ((e = readdir(dir))) {
+        if (strncmp(e->d_name, "renderD", 7)) {
+            continue;
+        }
+
+        p = g_strdup_printf("/dev/dri/%s", e->d_name);
+
+        r = open(p, O_RDWR | O_CLOEXEC | O_NOCTTY | O_NONBLOCK);
+        if (r < 0) {
+            g_free(p);
+            continue;
+        }
+
+        /*
+         * prefer fstat() over checking e->d_type == DT_CHR for
+         * portability reasons
+         */
+        ret = fstat(r, &st);
+        if (ret < 0 || (st.st_mode & S_IFMT) != S_IFCHR) {
+            close(r);
+            g_free(p);
+            continue;
+        }
+
+        fd = r;
+        g_free(p);
+        break;
+    }
+
+    closedir(dir);
+    if (fd < 0) {
+        return -1;
+    }
+    return fd;
+}
diff --git a/util/envlist.c b/util/envlist.c
new file mode 100644
index 000000000..2bcc13f09
--- /dev/null
+++ b/util/envlist.c
@@ -0,0 +1,232 @@
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "qemu/envlist.h"
+
+struct envlist_entry {
+	const char *ev_var;			/* actual env value */
+	QLIST_ENTRY(envlist_entry) ev_link;
+};
+
+struct envlist {
+	QLIST_HEAD(, envlist_entry) el_entries;	/* actual entries */
+	size_t el_count;			/* number of entries */
+};
+
+static int envlist_parse(envlist_t *envlist,
+    const char *env, int (*)(envlist_t *, const char *));
+
+/*
+ * Allocates new envlist and returns pointer to it.
+ */
+envlist_t *
+envlist_create(void)
+{
+	envlist_t *envlist;
+
+	envlist = g_malloc(sizeof(*envlist));
+
+	QLIST_INIT(&envlist->el_entries);
+	envlist->el_count = 0;
+
+	return (envlist);
+}
+
+/*
+ * Releases given envlist and its entries.
+ */
+void
+envlist_free(envlist_t *envlist)
+{
+	struct envlist_entry *entry;
+
+	assert(envlist != NULL);
+
+	while (envlist->el_entries.lh_first != NULL) {
+		entry = envlist->el_entries.lh_first;
+		QLIST_REMOVE(entry, ev_link);
+
+		g_free((char *)entry->ev_var);
+		g_free(entry);
+	}
+	g_free(envlist);
+}
+
+/*
+ * Parses comma separated list of set/modify environment
+ * variable entries and updates given enlist accordingly.
+ *
+ * For example:
+ *     envlist_parse(el, "HOME=foo,SHELL=/bin/sh");
+ *
+ * inserts/sets environment variables HOME and SHELL.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_set(envlist_t *envlist, const char *env)
+{
+	return (envlist_parse(envlist, env, &envlist_setenv));
+}
+
+/*
+ * Parses comma separated list of unset environment variable
+ * entries and removes given variables from given envlist.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_unset(envlist_t *envlist, const char *env)
+{
+	return (envlist_parse(envlist, env, &envlist_unsetenv));
+}
+
+/*
+ * Parses comma separated list of set, modify or unset entries
+ * and calls given callback for each entry.
+ *
+ * Returns 0 in case of success, errno otherwise.
+ */
+static int
+envlist_parse(envlist_t *envlist, const char *env,
+    int (*callback)(envlist_t *, const char *))
+{
+	char *tmpenv, *envvar;
+	char *envsave = NULL;
+    int ret = 0;
+    assert(callback != NULL);
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	tmpenv = g_strdup(env);
+    envsave = tmpenv;
+
+    do {
+        envvar = strchr(tmpenv, ',');
+        if (envvar != NULL) {
+            *envvar = '\0';
+        }
+        if ((*callback)(envlist, tmpenv) != 0) {
+            ret = errno;
+            break;
+		}
+        tmpenv = envvar + 1;
+    } while (envvar != NULL);
+
+    g_free(envsave);
+    return ret;
+}
+
+/*
+ * Sets environment value to envlist in similar manner
+ * than putenv(3).
+ *
+ * Returns 0 in success, errno otherwise.
+ */
+int
+envlist_setenv(envlist_t *envlist, const char *env)
+{
+	struct envlist_entry *entry = NULL;
+	const char *eq_sign;
+	size_t envname_len;
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	/* find out first equals sign in given env */
+	if ((eq_sign = strchr(env, '=')) == NULL)
+		return (EINVAL);
+	envname_len = eq_sign - env + 1;
+
+	/*
+	 * If there already exists variable with given name
+	 * we remove and release it before allocating a whole
+	 * new entry.
+	 */
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		if (strncmp(entry->ev_var, env, envname_len) == 0)
+			break;
+	}
+
+	if (entry != NULL) {
+		QLIST_REMOVE(entry, ev_link);
+		g_free((char *)entry->ev_var);
+		g_free(entry);
+	} else {
+		envlist->el_count++;
+	}
+
+	entry = g_malloc(sizeof(*entry));
+	entry->ev_var = g_strdup(env);
+	QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
+
+	return (0);
+}
+
+/*
+ * Removes given env value from envlist in similar manner
+ * than unsetenv(3).  Returns 0 in success, errno otherwise.
+ */
+int
+envlist_unsetenv(envlist_t *envlist, const char *env)
+{
+	struct envlist_entry *entry;
+	size_t envname_len;
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	/* env is not allowed to contain '=' */
+	if (strchr(env, '=') != NULL)
+		return (EINVAL);
+
+	/*
+	 * Find out the requested entry and remove
+	 * it from the list.
+	 */
+	envname_len = strlen(env);
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		if (strncmp(entry->ev_var, env, envname_len) == 0)
+			break;
+	}
+	if (entry != NULL) {
+		QLIST_REMOVE(entry, ev_link);
+		g_free((char *)entry->ev_var);
+		g_free(entry);
+
+		envlist->el_count--;
+	}
+	return (0);
+}
+
+/*
+ * Returns given envlist as array of strings (in same form that
+ * global variable environ is).  Caller must free returned memory
+ * by calling g_free for each element and the array.
+ * Returned array and given envlist are not related (no common
+ * references).
+ *
+ * If caller provides count pointer, number of items in array is
+ * stored there.
+ */
+char **
+envlist_to_environ(const envlist_t *envlist, size_t *count)
+{
+	struct envlist_entry *entry;
+	char **env, **penv;
+
+	penv = env = g_malloc((envlist->el_count + 1) * sizeof(char *));
+
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		*(penv++) = g_strdup(entry->ev_var);
+	}
+	*penv = NULL; /* NULL terminate the list */
+
+	if (count != NULL)
+		*count = envlist->el_count;
+
+	return (env);
+}
diff --git a/util/error.c b/util/error.c
new file mode 100644
index 000000000..b6c89d141
--- /dev/null
+++ b/util/error.c
@@ -0,0 +1,305 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.  See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+struct Error
+{
+    char *msg;
+    ErrorClass err_class;
+    const char *src, *func;
+    int line;
+    GString *hint;
+};
+
+Error *error_abort;
+Error *error_fatal;
+
+static void error_handle_fatal(Error **errp, Error *err)
+{
+    if (errp == &error_abort) {
+        fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+                err->func, err->src, err->line);
+        error_report("%s", error_get_pretty(err));
+        if (err->hint) {
+            error_printf("%s", err->hint->str);
+        }
+        abort();
+    }
+    if (errp == &error_fatal) {
+        error_report_err(err);
+        exit(1);
+    }
+}
+
+static void error_setv(Error **errp,
+                       const char *src, int line, const char *func,
+                       ErrorClass err_class, const char *fmt, va_list ap,
+                       const char *suffix)
+{
+    Error *err;
+    int saved_errno = errno;
+
+    if (errp == NULL) {
+        return;
+    }
+    assert(*errp == NULL);
+
+    err = g_malloc0(sizeof(*err));
+    err->msg = g_strdup_vprintf(fmt, ap);
+    if (suffix) {
+        char *msg = err->msg;
+        err->msg = g_strdup_printf("%s: %s", msg, suffix);
+        g_free(msg);
+    }
+    err->err_class = err_class;
+    err->src = src;
+    err->line = line;
+    err->func = func;
+
+    error_handle_fatal(errp, err);
+    *errp = err;
+
+    errno = saved_errno;
+}
+
+void error_set_internal(Error **errp,
+                        const char *src, int line, const char *func,
+                        ErrorClass err_class, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, err_class, fmt, ap, NULL);
+    va_end(ap);
+}
+
+void error_setg_internal(Error **errp,
+                         const char *src, int line, const char *func,
+                         const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap, NULL);
+    va_end(ap);
+}
+
+void error_setg_errno_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int os_errno, const char *fmt, ...)
+{
+    va_list ap;
+    int saved_errno = errno;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap,
+               os_errno != 0 ? strerror(os_errno) : NULL);
+    va_end(ap);
+
+    errno = saved_errno;
+}
+
+void error_setg_file_open_internal(Error **errp,
+                                   const char *src, int line, const char *func,
+                                   int os_errno, const char *filename)
+{
+    error_setg_errno_internal(errp, src, line, func, os_errno,
+                              "Could not open '%s'", filename);
+}
+
+void error_vprepend(Error *const *errp, const char *fmt, va_list ap)
+{
+    GString *newmsg;
+
+    if (!errp) {
+        return;
+    }
+
+    newmsg = g_string_new(NULL);
+    g_string_vprintf(newmsg, fmt, ap);
+    g_string_append(newmsg, (*errp)->msg);
+    g_free((*errp)->msg);
+    (*errp)->msg = g_string_free(newmsg, 0);
+}
+
+void error_prepend(Error *const *errp, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vprepend(errp, fmt, ap);
+    va_end(ap);
+}
+
+void error_append_hint(Error *const *errp, const char *fmt, ...)
+{
+    va_list ap;
+    int saved_errno = errno;
+    Error *err;
+
+    if (!errp) {
+        return;
+    }
+    err = *errp;
+    assert(err && errp != &error_abort && errp != &error_fatal);
+
+    if (!err->hint) {
+        err->hint = g_string_new(NULL);
+    }
+    va_start(ap, fmt);
+    g_string_append_vprintf(err->hint, fmt, ap);
+    va_end(ap);
+
+    errno = saved_errno;
+}
+
+#ifdef _WIN32
+
+void error_setg_win32_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int win32_err, const char *fmt, ...)
+{
+    va_list ap;
+    char *suffix = NULL;
+
+    if (errp == NULL) {
+        return;
+    }
+
+    if (win32_err != 0) {
+        suffix = g_win32_error_message(win32_err);
+    }
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR,
+               fmt, ap, suffix);
+    va_end(ap);
+
+    g_free(suffix);
+}
+
+#endif
+
+Error *error_copy(const Error *err)
+{
+    Error *err_new;
+
+    err_new = g_malloc0(sizeof(*err));
+    err_new->msg = g_strdup(err->msg);
+    err_new->err_class = err->err_class;
+    err_new->src = err->src;
+    err_new->line = err->line;
+    err_new->func = err->func;
+    if (err->hint) {
+        err_new->hint = g_string_new(err->hint->str);
+    }
+
+    return err_new;
+}
+
+ErrorClass error_get_class(const Error *err)
+{
+    return err->err_class;
+}
+
+const char *error_get_pretty(const Error *err)
+{
+    return err->msg;
+}
+
+void error_report_err(Error *err)
+{
+    error_report("%s", error_get_pretty(err));
+    if (err->hint) {
+        error_printf("%s", err->hint->str);
+    }
+    error_free(err);
+}
+
+void warn_report_err(Error *err)
+{
+    warn_report("%s", error_get_pretty(err));
+    if (err->hint) {
+        error_printf("%s", err->hint->str);
+    }
+    error_free(err);
+}
+
+void error_reportf_err(Error *err, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vprepend(&err, fmt, ap);
+    va_end(ap);
+    error_report_err(err);
+}
+
+
+void warn_reportf_err(Error *err, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vprepend(&err, fmt, ap);
+    va_end(ap);
+    warn_report_err(err);
+}
+
+void error_free(Error *err)
+{
+    if (err) {
+        g_free(err->msg);
+        if (err->hint) {
+            g_string_free(err->hint, true);
+        }
+        g_free(err);
+    }
+}
+
+void error_free_or_abort(Error **errp)
+{
+    assert(errp && *errp);
+    error_free(*errp);
+    *errp = NULL;
+}
+
+void error_propagate(Error **dst_errp, Error *local_err)
+{
+    if (!local_err) {
+        return;
+    }
+    error_handle_fatal(dst_errp, local_err);
+    if (dst_errp && !*dst_errp) {
+        *dst_errp = local_err;
+    } else {
+        error_free(local_err);
+    }
+}
+
+void error_propagate_prepend(Error **dst_errp, Error *err,
+                             const char *fmt, ...)
+{
+    va_list ap;
+
+    if (dst_errp && !*dst_errp) {
+        va_start(ap, fmt);
+        error_vprepend(&err, fmt, ap);
+        va_end(ap);
+    } /* else error is being ignored, don't bother with prepending */
+    error_propagate(dst_errp, err);
+}
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
new file mode 100644
index 000000000..00d93204f
--- /dev/null
+++ b/util/event_notifier-posix.c
@@ -0,0 +1,124 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/event_notifier.h"
+#include "qemu/main-loop.h"
+
+#ifdef CONFIG_EVENTFD
+#include 
+#endif
+
+#ifdef CONFIG_EVENTFD
+/*
+ * Initialize @e with existing file descriptor @fd.
+ * @fd must be a genuine eventfd object, emulation with pipe won't do.
+ */
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+    e->rfd = fd;
+    e->wfd = fd;
+}
+#endif
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    int fds[2];
+    int ret;
+
+#ifdef CONFIG_EVENTFD
+    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+#else
+    ret = -1;
+    errno = ENOSYS;
+#endif
+    if (ret >= 0) {
+        e->rfd = e->wfd = ret;
+    } else {
+        if (errno != ENOSYS) {
+            return -errno;
+        }
+        if (qemu_pipe(fds) < 0) {
+            return -errno;
+        }
+        ret = fcntl_setfl(fds[0], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        ret = fcntl_setfl(fds[1], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        e->rfd = fds[0];
+        e->wfd = fds[1];
+    }
+    if (active) {
+        event_notifier_set(e);
+    }
+    return 0;
+
+fail:
+    close(fds[0]);
+    close(fds[1]);
+    return ret;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    if (e->rfd != e->wfd) {
+        close(e->rfd);
+    }
+    e->rfd = -1;
+    close(e->wfd);
+    e->wfd = -1;
+}
+
+int event_notifier_get_fd(const EventNotifier *e)
+{
+    return e->rfd;
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    static const uint64_t value = 1;
+    ssize_t ret;
+
+    do {
+        ret = write(e->wfd, &value, sizeof(value));
+    } while (ret < 0 && errno == EINTR);
+
+    /* EAGAIN is fine, a read must be pending.  */
+    if (ret < 0 && errno != EAGAIN) {
+        return -errno;
+    }
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int value;
+    ssize_t len;
+    char buffer[512];
+
+    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
+    value = 0;
+    do {
+        len = read(e->rfd, buffer, sizeof(buffer));
+        value |= (len > 0);
+    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
+
+    return value;
+}
diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c
new file mode 100644
index 000000000..62c53b0a9
--- /dev/null
+++ b/util/event_notifier-win32.c
@@ -0,0 +1,50 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/main-loop.h"
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
+    assert(e->event);
+    return 0;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    CloseHandle(e->event);
+    e->event = NULL;
+}
+
+HANDLE event_notifier_get_handle(EventNotifier *e)
+{
+    return e->event;
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    SetEvent(e->event);
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int ret = WaitForSingleObject(e->event, 0);
+    if (ret == WAIT_OBJECT_0) {
+        ResetEvent(e->event);
+        return true;
+    }
+    return false;
+}
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
new file mode 100644
index 000000000..e11a8a022
--- /dev/null
+++ b/util/fdmon-epoll.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * epoll(7) file descriptor monitoring
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "qemu/rcu_queue.h"
+#include "aio-posix.h"
+
+/* The fd number threshold to switch to epoll */
+#define EPOLL_ENABLE_THRESHOLD 64
+
+void fdmon_epoll_disable(AioContext *ctx)
+{
+    if (ctx->epollfd >= 0) {
+        close(ctx->epollfd);
+        ctx->epollfd = -1;
+    }
+
+    /* Switch back */
+    ctx->fdmon_ops = &fdmon_poll_ops;
+}
+
+static inline int epoll_events_from_pfd(int pfd_events)
+{
+    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
+           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
+           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
+           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
+}
+
+static void fdmon_epoll_update(AioContext *ctx,
+                               AioHandler *old_node,
+                               AioHandler *new_node)
+{
+    struct epoll_event event = {
+        .data.ptr = new_node,
+        .events = new_node ? epoll_events_from_pfd(new_node->pfd.events) : 0,
+    };
+    int r;
+
+    if (!new_node) {
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, old_node->pfd.fd, &event);
+    } else if (!old_node) {
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, new_node->pfd.fd, &event);
+    } else {
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, new_node->pfd.fd, &event);
+    }
+
+    if (r) {
+        fdmon_epoll_disable(ctx);
+    }
+}
+
+static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
+                            int64_t timeout)
+{
+    GPollFD pfd = {
+        .fd = ctx->epollfd,
+        .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
+    };
+    AioHandler *node;
+    int i, ret = 0;
+    struct epoll_event events[128];
+
+    /* Fall back while external clients are disabled */
+    if (qatomic_read(&ctx->external_disable_cnt)) {
+        return fdmon_poll_ops.wait(ctx, ready_list, timeout);
+    }
+
+    if (timeout > 0) {
+        ret = qemu_poll_ns(&pfd, 1, timeout);
+        if (ret > 0) {
+            timeout = 0;
+        }
+    }
+    if (timeout <= 0 || ret > 0) {
+        ret = epoll_wait(ctx->epollfd, events,
+                         ARRAY_SIZE(events),
+                         timeout);
+        if (ret <= 0) {
+            goto out;
+        }
+        for (i = 0; i < ret; i++) {
+            int ev = events[i].events;
+            int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+                          (ev & EPOLLOUT ? G_IO_OUT : 0) |
+                          (ev & EPOLLHUP ? G_IO_HUP : 0) |
+                          (ev & EPOLLERR ? G_IO_ERR : 0);
+
+            node = events[i].data.ptr;
+            aio_add_ready_handler(ready_list, node, revents);
+        }
+    }
+out:
+    return ret;
+}
+
+static const FDMonOps fdmon_epoll_ops = {
+    .update = fdmon_epoll_update,
+    .wait = fdmon_epoll_wait,
+    .need_wait = aio_poll_disabled,
+};
+
+static bool fdmon_epoll_try_enable(AioContext *ctx)
+{
+    AioHandler *node;
+    struct epoll_event event;
+
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        int r;
+        if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
+            continue;
+        }
+        event.events = epoll_events_from_pfd(node->pfd.events);
+        event.data.ptr = node;
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+        if (r) {
+            return false;
+        }
+    }
+
+    ctx->fdmon_ops = &fdmon_epoll_ops;
+    return true;
+}
+
+bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
+{
+    if (ctx->epollfd < 0) {
+        return false;
+    }
+
+    /* Do not upgrade while external clients are disabled */
+    if (qatomic_read(&ctx->external_disable_cnt)) {
+        return false;
+    }
+
+    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
+        if (fdmon_epoll_try_enable(ctx)) {
+            return true;
+        } else {
+            fdmon_epoll_disable(ctx);
+        }
+    }
+    return false;
+}
+
+void fdmon_epoll_setup(AioContext *ctx)
+{
+    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
+    if (ctx->epollfd == -1) {
+        fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
+    }
+}
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
new file mode 100644
index 000000000..1461dfa40
--- /dev/null
+++ b/util/fdmon-io_uring.c
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Linux io_uring file descriptor monitoring
+ *
+ * The Linux io_uring API supports file descriptor monitoring with a few
+ * advantages over existing APIs like poll(2) and epoll(7):
+ *
+ * 1. Userspace polling of events is possible because the completion queue (cq
+ *    ring) is shared between the kernel and userspace.  This allows
+ *    applications that rely on userspace polling to also monitor file
+ *    descriptors in the same userspace polling loop.
+ *
+ * 2. Submission and completion is batched and done together in a single system
+ *    call.  This minimizes the number of system calls.
+ *
+ * 3. File descriptor monitoring is O(1) like epoll(7) so it scales better than
+ *    poll(2).
+ *
+ * 4. Nanosecond timeouts are supported so it requires fewer syscalls than
+ *    epoll(7).
+ *
+ * This code only monitors file descriptors and does not do asynchronous disk
+ * I/O.  Implementing disk I/O efficiently has other requirements and should
+ * use a separate io_uring so it does not make sense to unify the code.
+ *
+ * File descriptor monitoring is implemented using the following operations:
+ *
+ * 1. IORING_OP_POLL_ADD - adds a file descriptor to be monitored.
+ * 2. IORING_OP_POLL_REMOVE - removes a file descriptor being monitored.  When
+ *    the poll mask changes for a file descriptor it is first removed and then
+ *    re-added with the new poll mask, so this operation is also used as part
+ *    of modifying an existing monitored file descriptor.
+ * 3. IORING_OP_TIMEOUT - added every time a blocking syscall is made to wait
+ *    for events.  This operation self-cancels if another event completes
+ *    before the timeout.
+ *
+ * io_uring calls the submission queue the "sq ring" and the completion queue
+ * the "cq ring".  Ring entries are called "sqe" and "cqe", respectively.
+ *
+ * The code is structured so that sq/cq rings are only modified within
+ * fdmon_io_uring_wait().  Changes to AioHandlers are made by enqueuing them on
+ * ctx->submit_list so that fdmon_io_uring_wait() can submit IORING_OP_POLL_ADD
+ * and/or IORING_OP_POLL_REMOVE sqes for them.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "qemu/rcu_queue.h"
+#include "aio-posix.h"
+
+enum {
+    FDMON_IO_URING_ENTRIES  = 128, /* sq/cq ring size */
+
+    /* AioHandler::flags */
+    FDMON_IO_URING_PENDING  = (1 << 0),
+    FDMON_IO_URING_ADD      = (1 << 1),
+    FDMON_IO_URING_REMOVE   = (1 << 2),
+};
+
+static inline int poll_events_from_pfd(int pfd_events)
+{
+    return (pfd_events & G_IO_IN ? POLLIN : 0) |
+           (pfd_events & G_IO_OUT ? POLLOUT : 0) |
+           (pfd_events & G_IO_HUP ? POLLHUP : 0) |
+           (pfd_events & G_IO_ERR ? POLLERR : 0);
+}
+
+static inline int pfd_events_from_poll(int poll_events)
+{
+    return (poll_events & POLLIN ? G_IO_IN : 0) |
+           (poll_events & POLLOUT ? G_IO_OUT : 0) |
+           (poll_events & POLLHUP ? G_IO_HUP : 0) |
+           (poll_events & POLLERR ? G_IO_ERR : 0);
+}
+
+/*
+ * Returns an sqe for submitting a request.  Only be called within
+ * fdmon_io_uring_wait().
+ */
+static struct io_uring_sqe *get_sqe(AioContext *ctx)
+{
+    struct io_uring *ring = &ctx->fdmon_io_uring;
+    struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
+    int ret;
+
+    if (likely(sqe)) {
+        return sqe;
+    }
+
+    /* No free sqes left, submit pending sqes first */
+    do {
+        ret = io_uring_submit(ring);
+    } while (ret == -EINTR);
+
+    assert(ret > 1);
+    sqe = io_uring_get_sqe(ring);
+    assert(sqe);
+    return sqe;
+}
+
+/* Atomically enqueue an AioHandler for sq ring submission */
+static void enqueue(AioHandlerSList *head, AioHandler *node, unsigned flags)
+{
+    unsigned old_flags;
+
+    old_flags = qatomic_fetch_or(&node->flags, FDMON_IO_URING_PENDING | flags);
+    if (!(old_flags & FDMON_IO_URING_PENDING)) {
+        QSLIST_INSERT_HEAD_ATOMIC(head, node, node_submitted);
+    }
+}
+
+/* Dequeue an AioHandler for sq ring submission.  Called by fill_sq_ring(). */
+static AioHandler *dequeue(AioHandlerSList *head, unsigned *flags)
+{
+    AioHandler *node = QSLIST_FIRST(head);
+
+    if (!node) {
+        return NULL;
+    }
+
+    /* Doesn't need to be atomic since fill_sq_ring() moves the list */
+    QSLIST_REMOVE_HEAD(head, node_submitted);
+
+    /*
+     * Don't clear FDMON_IO_URING_REMOVE.  It's sticky so it can serve two
+     * purposes: telling fill_sq_ring() to submit IORING_OP_POLL_REMOVE and
+     * telling process_cqe() to delete the AioHandler when its
+     * IORING_OP_POLL_ADD completes.
+     */
+    *flags = qatomic_fetch_and(&node->flags, ~(FDMON_IO_URING_PENDING |
+                                              FDMON_IO_URING_ADD));
+    return node;
+}
+
+static void fdmon_io_uring_update(AioContext *ctx,
+                                  AioHandler *old_node,
+                                  AioHandler *new_node)
+{
+    if (new_node) {
+        enqueue(&ctx->submit_list, new_node, FDMON_IO_URING_ADD);
+    }
+
+    if (old_node) {
+        /*
+         * Deletion is tricky because IORING_OP_POLL_ADD and
+         * IORING_OP_POLL_REMOVE are async.  We need to wait for the original
+         * IORING_OP_POLL_ADD to complete before this handler can be freed
+         * safely.
+         *
+         * It's possible that the file descriptor becomes ready and the
+         * IORING_OP_POLL_ADD cqe is enqueued before IORING_OP_POLL_REMOVE is
+         * submitted, too.
+         *
+         * Mark this handler deleted right now but don't place it on
+         * ctx->deleted_aio_handlers yet.  Instead, manually fudge the list
+         * entry to make QLIST_IS_INSERTED() think this handler has been
+         * inserted and other code recognizes this AioHandler as deleted.
+         *
+         * Once the original IORING_OP_POLL_ADD completes we enqueue the
+         * handler on the real ctx->deleted_aio_handlers list to be freed.
+         */
+        assert(!QLIST_IS_INSERTED(old_node, node_deleted));
+        old_node->node_deleted.le_prev = &old_node->node_deleted.le_next;
+
+        enqueue(&ctx->submit_list, old_node, FDMON_IO_URING_REMOVE);
+    }
+}
+
+static void add_poll_add_sqe(AioContext *ctx, AioHandler *node)
+{
+    struct io_uring_sqe *sqe = get_sqe(ctx);
+    int events = poll_events_from_pfd(node->pfd.events);
+
+    io_uring_prep_poll_add(sqe, node->pfd.fd, events);
+    io_uring_sqe_set_data(sqe, node);
+}
+
+static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
+{
+    struct io_uring_sqe *sqe = get_sqe(ctx);
+
+    io_uring_prep_poll_remove(sqe, node);
+}
+
+/* Add a timeout that self-cancels when another cqe becomes ready */
+static void add_timeout_sqe(AioContext *ctx, int64_t ns)
+{
+    struct io_uring_sqe *sqe;
+    struct __kernel_timespec ts = {
+        .tv_sec = ns / NANOSECONDS_PER_SECOND,
+        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
+    };
+
+    sqe = get_sqe(ctx);
+    io_uring_prep_timeout(sqe, &ts, 1, 0);
+}
+
+/* Add sqes from ctx->submit_list for submission */
+static void fill_sq_ring(AioContext *ctx)
+{
+    AioHandlerSList submit_list;
+    AioHandler *node;
+    unsigned flags;
+
+    QSLIST_MOVE_ATOMIC(&submit_list, &ctx->submit_list);
+
+    while ((node = dequeue(&submit_list, &flags))) {
+        /* Order matters, just in case both flags were set */
+        if (flags & FDMON_IO_URING_ADD) {
+            add_poll_add_sqe(ctx, node);
+        }
+        if (flags & FDMON_IO_URING_REMOVE) {
+            add_poll_remove_sqe(ctx, node);
+        }
+    }
+}
+
+/* Returns true if a handler became ready */
+static bool process_cqe(AioContext *ctx,
+                        AioHandlerList *ready_list,
+                        struct io_uring_cqe *cqe)
+{
+    AioHandler *node = io_uring_cqe_get_data(cqe);
+    unsigned flags;
+
+    /* poll_timeout and poll_remove have a zero user_data field */
+    if (!node) {
+        return false;
+    }
+
+    /*
+     * Deletion can only happen when IORING_OP_POLL_ADD completes.  If we race
+     * with enqueue() here then we can safely clear the FDMON_IO_URING_REMOVE
+     * bit before IORING_OP_POLL_REMOVE is submitted.
+     */
+    flags = qatomic_fetch_and(&node->flags, ~FDMON_IO_URING_REMOVE);
+    if (flags & FDMON_IO_URING_REMOVE) {
+        QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
+        return false;
+    }
+
+    aio_add_ready_handler(ready_list, node, pfd_events_from_poll(cqe->res));
+
+    /* IORING_OP_POLL_ADD is one-shot so we must re-arm it */
+    add_poll_add_sqe(ctx, node);
+    return true;
+}
+
+static int process_cq_ring(AioContext *ctx, AioHandlerList *ready_list)
+{
+    struct io_uring *ring = &ctx->fdmon_io_uring;
+    struct io_uring_cqe *cqe;
+    unsigned num_cqes = 0;
+    unsigned num_ready = 0;
+    unsigned head;
+
+    io_uring_for_each_cqe(ring, head, cqe) {
+        if (process_cqe(ctx, ready_list, cqe)) {
+            num_ready++;
+        }
+
+        num_cqes++;
+    }
+
+    io_uring_cq_advance(ring, num_cqes);
+    return num_ready;
+}
+
+static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
+                               int64_t timeout)
+{
+    unsigned wait_nr = 1; /* block until at least one cqe is ready */
+    int ret;
+
+    /* Fall back while external clients are disabled */
+    if (qatomic_read(&ctx->external_disable_cnt)) {
+        return fdmon_poll_ops.wait(ctx, ready_list, timeout);
+    }
+
+    if (timeout == 0) {
+        wait_nr = 0; /* non-blocking */
+    } else if (timeout > 0) {
+        add_timeout_sqe(ctx, timeout);
+    }
+
+    fill_sq_ring(ctx);
+
+    do {
+        ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
+    } while (ret == -EINTR);
+
+    assert(ret >= 0);
+
+    return process_cq_ring(ctx, ready_list);
+}
+
+static bool fdmon_io_uring_need_wait(AioContext *ctx)
+{
+    /* Have io_uring events completed? */
+    if (io_uring_cq_ready(&ctx->fdmon_io_uring)) {
+        return true;
+    }
+
+    /* Are there pending sqes to submit? */
+    if (io_uring_sq_ready(&ctx->fdmon_io_uring)) {
+        return true;
+    }
+
+    /* Do we need to process AioHandlers for io_uring changes? */
+    if (!QSLIST_EMPTY_RCU(&ctx->submit_list)) {
+        return true;
+    }
+
+    /* Are we falling back to fdmon-poll? */
+    return qatomic_read(&ctx->external_disable_cnt);
+}
+
+static const FDMonOps fdmon_io_uring_ops = {
+    .update = fdmon_io_uring_update,
+    .wait = fdmon_io_uring_wait,
+    .need_wait = fdmon_io_uring_need_wait,
+};
+
+bool fdmon_io_uring_setup(AioContext *ctx)
+{
+    int ret;
+
+    ret = io_uring_queue_init(FDMON_IO_URING_ENTRIES, &ctx->fdmon_io_uring, 0);
+    if (ret != 0) {
+        return false;
+    }
+
+    QSLIST_INIT(&ctx->submit_list);
+    ctx->fdmon_ops = &fdmon_io_uring_ops;
+    return true;
+}
+
+void fdmon_io_uring_destroy(AioContext *ctx)
+{
+    if (ctx->fdmon_ops == &fdmon_io_uring_ops) {
+        AioHandler *node;
+
+        io_uring_queue_exit(&ctx->fdmon_io_uring);
+
+        /* Move handlers due to be removed onto the deleted list */
+        while ((node = QSLIST_FIRST_RCU(&ctx->submit_list))) {
+            unsigned flags = qatomic_fetch_and(&node->flags,
+                    ~(FDMON_IO_URING_PENDING |
+                      FDMON_IO_URING_ADD |
+                      FDMON_IO_URING_REMOVE));
+
+            if (flags & FDMON_IO_URING_REMOVE) {
+                QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
+            }
+
+            QSLIST_REMOVE_HEAD_RCU(&ctx->submit_list, node_submitted);
+        }
+
+        ctx->fdmon_ops = &fdmon_poll_ops;
+    }
+}
diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c
new file mode 100644
index 000000000..5fe3b4786
--- /dev/null
+++ b/util/fdmon-poll.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * poll(2) file descriptor monitoring
+ *
+ * Uses ppoll(2) when available, g_poll() otherwise.
+ */
+
+#include "qemu/osdep.h"
+#include "aio-posix.h"
+#include "qemu/rcu_queue.h"
+
+/*
+ * These thread-local variables are used only in fdmon_poll_wait() around the
+ * call to the poll() system call.  In particular they are not used while
+ * aio_poll is performing callbacks, which makes it much easier to think about
+ * reentrancy!
+ *
+ * Stack-allocated arrays would be perfect but they have size limitations;
+ * heap allocation is expensive enough that we want to reuse arrays across
+ * calls to aio_poll().  And because poll() has to be called without holding
+ * any lock, the arrays cannot be stored in AioContext.  Thread-local data
+ * has none of the disadvantages of these three options.
+ */
+static __thread GPollFD *pollfds;
+static __thread AioHandler **nodes;
+static __thread unsigned npfd, nalloc;
+static __thread Notifier pollfds_cleanup_notifier;
+
+static void pollfds_cleanup(Notifier *n, void *unused)
+{
+    g_assert(npfd == 0);
+    g_free(pollfds);
+    g_free(nodes);
+    nalloc = 0;
+}
+
+static void add_pollfd(AioHandler *node)
+{
+    if (npfd == nalloc) {
+        if (nalloc == 0) {
+            pollfds_cleanup_notifier.notify = pollfds_cleanup;
+            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
+            nalloc = 8;
+        } else {
+            g_assert(nalloc <= INT_MAX);
+            nalloc *= 2;
+        }
+        pollfds = g_renew(GPollFD, pollfds, nalloc);
+        nodes = g_renew(AioHandler *, nodes, nalloc);
+    }
+    nodes[npfd] = node;
+    pollfds[npfd] = (GPollFD) {
+        .fd = node->pfd.fd,
+        .events = node->pfd.events,
+    };
+    npfd++;
+}
+
+static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
+                            int64_t timeout)
+{
+    AioHandler *node;
+    int ret;
+
+    assert(npfd == 0);
+
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
+                && aio_node_check(ctx, node->is_external)) {
+            add_pollfd(node);
+        }
+    }
+
+    /* epoll(7) is faster above a certain number of fds */
+    if (fdmon_epoll_try_upgrade(ctx, npfd)) {
+        npfd = 0; /* we won't need pollfds[], reset npfd */
+        return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
+    }
+
+    ret = qemu_poll_ns(pollfds, npfd, timeout);
+    if (ret > 0) {
+        int i;
+
+        for (i = 0; i < npfd; i++) {
+            int revents = pollfds[i].revents;
+
+            if (revents) {
+                aio_add_ready_handler(ready_list, nodes[i], revents);
+            }
+        }
+    }
+
+    npfd = 0;
+    return ret;
+}
+
+static void fdmon_poll_update(AioContext *ctx,
+                              AioHandler *old_node,
+                              AioHandler *new_node)
+{
+    /* Do nothing, AioHandler already contains the state we'll need */
+}
+
+const FDMonOps fdmon_poll_ops = {
+    .update = fdmon_poll_update,
+    .wait = fdmon_poll_wait,
+    .need_wait = aio_poll_disabled,
+};
diff --git a/util/fifo8.c b/util/fifo8.c
new file mode 100644
index 000000000..a5dd789ce
--- /dev/null
+++ b/util/fifo8.c
@@ -0,0 +1,126 @@
+/*
+ * Generic FIFO component, implemented as a circular buffer.
+ *
+ * Copyright (c) 2012 Peter A. G. Crosthwaite
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "migration/vmstate.h"
+#include "qemu/fifo8.h"
+
+void fifo8_create(Fifo8 *fifo, uint32_t capacity)
+{
+    fifo->data = g_new(uint8_t, capacity);
+    fifo->capacity = capacity;
+    fifo->head = 0;
+    fifo->num = 0;
+}
+
+void fifo8_destroy(Fifo8 *fifo)
+{
+    g_free(fifo->data);
+}
+
+void fifo8_push(Fifo8 *fifo, uint8_t data)
+{
+    if (fifo->num == fifo->capacity) {
+        abort();
+    }
+    fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data;
+    fifo->num++;
+}
+
+void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num)
+{
+    uint32_t start, avail;
+
+    if (fifo->num + num > fifo->capacity) {
+        abort();
+    }
+
+    start = (fifo->head + fifo->num) % fifo->capacity;
+
+    if (start + num <= fifo->capacity) {
+        memcpy(&fifo->data[start], data, num);
+    } else {
+        avail = fifo->capacity - start;
+        memcpy(&fifo->data[start], data, avail);
+        memcpy(&fifo->data[0], &data[avail], num - avail);
+    }
+
+    fifo->num += num;
+}
+
+uint8_t fifo8_pop(Fifo8 *fifo)
+{
+    uint8_t ret;
+
+    if (fifo->num == 0) {
+        abort();
+    }
+    ret = fifo->data[fifo->head++];
+    fifo->head %= fifo->capacity;
+    fifo->num--;
+    return ret;
+}
+
+const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num)
+{
+    uint8_t *ret;
+
+    if (max == 0 || max > fifo->num) {
+        abort();
+    }
+    *num = MIN(fifo->capacity - fifo->head, max);
+    ret = &fifo->data[fifo->head];
+    fifo->head += *num;
+    fifo->head %= fifo->capacity;
+    fifo->num -= *num;
+    return ret;
+}
+
+void fifo8_reset(Fifo8 *fifo)
+{
+    fifo->num = 0;
+    fifo->head = 0;
+}
+
+bool fifo8_is_empty(Fifo8 *fifo)
+{
+    return (fifo->num == 0);
+}
+
+bool fifo8_is_full(Fifo8 *fifo)
+{
+    return (fifo->num == fifo->capacity);
+}
+
+uint32_t fifo8_num_free(Fifo8 *fifo)
+{
+    return fifo->capacity - fifo->num;
+}
+
+uint32_t fifo8_num_used(Fifo8 *fifo)
+{
+    return fifo->num;
+}
+
+const VMStateDescription vmstate_fifo8 = {
+    .name = "Fifo8",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, capacity),
+        VMSTATE_UINT32(head, Fifo8),
+        VMSTATE_UINT32(num, Fifo8),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/util/filemonitor-inotify.c b/util/filemonitor-inotify.c
new file mode 100644
index 000000000..2c45f7f17
--- /dev/null
+++ b/util/filemonitor-inotify.c
@@ -0,0 +1,339 @@
+/*
+ * QEMU file monitor Linux inotify impl
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/filemonitor.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+#include 
+
+struct QFileMonitor {
+    int fd;
+    QemuMutex lock; /* protects dirs & idmap */
+    GHashTable *dirs; /* dirname => QFileMonitorDir */
+    GHashTable *idmap; /* inotify ID => dirname */
+};
+
+
+typedef struct {
+    int64_t id; /* watch ID */
+    char *filename; /* optional filter */
+    QFileMonitorHandler cb;
+    void *opaque;
+} QFileMonitorWatch;
+
+
+typedef struct {
+    char *path;
+    int inotify_id; /* inotify ID */
+    int next_file_id; /* file ID counter */
+    GArray *watches; /* QFileMonitorWatch elements */
+} QFileMonitorDir;
+
+
+static void qemu_file_monitor_watch(void *arg)
+{
+    QFileMonitor *mon = arg;
+    char buf[4096]
+        __attribute__ ((aligned(__alignof__(struct inotify_event))));
+    int used = 0;
+    int len;
+
+    qemu_mutex_lock(&mon->lock);
+
+    if (mon->fd == -1) {
+        qemu_mutex_unlock(&mon->lock);
+        return;
+    }
+
+    len = read(mon->fd, buf, sizeof(buf));
+
+    if (len < 0) {
+        if (errno != EAGAIN) {
+            error_report("Failure monitoring inotify FD '%s',"
+                         "disabling events", strerror(errno));
+            goto cleanup;
+        }
+
+        /* no more events right now */
+        goto cleanup;
+    }
+
+    /* Loop over all events in the buffer */
+    while (used < len) {
+        struct inotify_event *ev =
+            (struct inotify_event *)(buf + used);
+        const char *name = ev->len ? ev->name : "";
+        QFileMonitorDir *dir = g_hash_table_lookup(mon->idmap,
+                                                   GINT_TO_POINTER(ev->wd));
+        uint32_t iev = ev->mask &
+            (IN_CREATE | IN_MODIFY | IN_DELETE | IN_IGNORED |
+             IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
+        int qev;
+        gsize i;
+
+        used += sizeof(struct inotify_event) + ev->len;
+
+        if (!dir) {
+            continue;
+        }
+
+        /*
+         * During a rename operation, the old name gets
+         * IN_MOVED_FROM and the new name gets IN_MOVED_TO.
+         * To simplify life for callers, we turn these into
+         * DELETED and CREATED events
+         */
+        switch (iev) {
+        case IN_CREATE:
+        case IN_MOVED_TO:
+            qev = QFILE_MONITOR_EVENT_CREATED;
+            break;
+        case IN_MODIFY:
+            qev = QFILE_MONITOR_EVENT_MODIFIED;
+            break;
+        case IN_DELETE:
+        case IN_MOVED_FROM:
+            qev = QFILE_MONITOR_EVENT_DELETED;
+            break;
+        case IN_ATTRIB:
+            qev = QFILE_MONITOR_EVENT_ATTRIBUTES;
+            break;
+        case IN_IGNORED:
+            qev = QFILE_MONITOR_EVENT_IGNORED;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        trace_qemu_file_monitor_event(mon, dir->path, name, ev->mask,
+                                      dir->inotify_id);
+        for (i = 0; i < dir->watches->len; i++) {
+            QFileMonitorWatch *watch = &g_array_index(dir->watches,
+                                                      QFileMonitorWatch,
+                                                      i);
+
+            if (watch->filename == NULL ||
+                (name && g_str_equal(watch->filename, name))) {
+                trace_qemu_file_monitor_dispatch(mon, dir->path, name,
+                                                 qev, watch->cb,
+                                                 watch->opaque, watch->id);
+                watch->cb(watch->id, qev, name, watch->opaque);
+            }
+        }
+    }
+
+ cleanup:
+    qemu_mutex_unlock(&mon->lock);
+}
+
+
+static void
+qemu_file_monitor_dir_free(void *data)
+{
+    QFileMonitorDir *dir = data;
+    gsize i;
+
+    for (i = 0; i < dir->watches->len; i++) {
+        QFileMonitorWatch *watch = &g_array_index(dir->watches,
+                                                  QFileMonitorWatch, i);
+        g_free(watch->filename);
+    }
+    g_array_unref(dir->watches);
+    g_free(dir->path);
+    g_free(dir);
+}
+
+
+QFileMonitor *
+qemu_file_monitor_new(Error **errp)
+{
+    int fd;
+    QFileMonitor *mon;
+
+    fd = inotify_init1(IN_NONBLOCK);
+    if (fd < 0) {
+        error_setg_errno(errp, errno,
+                         "Unable to initialize inotify");
+        return NULL;
+    }
+
+    mon = g_new0(QFileMonitor, 1);
+    qemu_mutex_init(&mon->lock);
+    mon->fd = fd;
+
+    mon->dirs = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
+                                      qemu_file_monitor_dir_free);
+    mon->idmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+    trace_qemu_file_monitor_new(mon, mon->fd);
+
+    return mon;
+}
+
+static gboolean
+qemu_file_monitor_free_idle(void *opaque)
+{
+    QFileMonitor *mon = opaque;
+
+    if (!mon) {
+        return G_SOURCE_REMOVE;
+    }
+
+    qemu_mutex_lock(&mon->lock);
+
+    g_hash_table_unref(mon->idmap);
+    g_hash_table_unref(mon->dirs);
+
+    qemu_mutex_unlock(&mon->lock);
+
+    qemu_mutex_destroy(&mon->lock);
+    g_free(mon);
+
+    return G_SOURCE_REMOVE;
+}
+
+void
+qemu_file_monitor_free(QFileMonitor *mon)
+{
+    if (!mon) {
+        return;
+    }
+
+    qemu_mutex_lock(&mon->lock);
+    if (mon->fd != -1) {
+        qemu_set_fd_handler(mon->fd, NULL, NULL, NULL);
+        close(mon->fd);
+        mon->fd = -1;
+    }
+    qemu_mutex_unlock(&mon->lock);
+
+    /*
+     * Can't free it yet, because another thread
+     * may be running event loop, so the inotify
+     * callback might be pending. Using an idle
+     * source ensures we'll only free after the
+     * pending callback is done
+     */
+    g_idle_add((GSourceFunc)qemu_file_monitor_free_idle, mon);
+}
+
+int64_t
+qemu_file_monitor_add_watch(QFileMonitor *mon,
+                            const char *dirpath,
+                            const char *filename,
+                            QFileMonitorHandler cb,
+                            void *opaque,
+                            Error **errp)
+{
+    QFileMonitorDir *dir;
+    QFileMonitorWatch watch;
+    int64_t ret = -1;
+
+    qemu_mutex_lock(&mon->lock);
+    dir = g_hash_table_lookup(mon->dirs, dirpath);
+    if (!dir) {
+        int rv = inotify_add_watch(mon->fd, dirpath,
+                                   IN_CREATE | IN_DELETE | IN_MODIFY |
+                                   IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
+
+        if (rv < 0) {
+            error_setg_errno(errp, errno, "Unable to watch '%s'", dirpath);
+            goto cleanup;
+        }
+
+        trace_qemu_file_monitor_enable_watch(mon, dirpath, rv);
+
+        dir = g_new0(QFileMonitorDir, 1);
+        dir->path = g_strdup(dirpath);
+        dir->inotify_id = rv;
+        dir->watches = g_array_new(FALSE, TRUE, sizeof(QFileMonitorWatch));
+
+        g_hash_table_insert(mon->dirs, dir->path, dir);
+        g_hash_table_insert(mon->idmap, GINT_TO_POINTER(rv), dir);
+
+        if (g_hash_table_size(mon->dirs) == 1) {
+            qemu_set_fd_handler(mon->fd, qemu_file_monitor_watch, NULL, mon);
+        }
+    }
+
+    watch.id = (((int64_t)dir->inotify_id) << 32) | dir->next_file_id++;
+    watch.filename = g_strdup(filename);
+    watch.cb = cb;
+    watch.opaque = opaque;
+
+    g_array_append_val(dir->watches, watch);
+
+    trace_qemu_file_monitor_add_watch(mon, dirpath,
+                                      filename ? filename : "",
+                                      cb, opaque, watch.id);
+
+    ret = watch.id;
+
+ cleanup:
+    qemu_mutex_unlock(&mon->lock);
+    return ret;
+}
+
+
+void qemu_file_monitor_remove_watch(QFileMonitor *mon,
+                                    const char *dirpath,
+                                    int64_t id)
+{
+    QFileMonitorDir *dir;
+    gsize i;
+
+    qemu_mutex_lock(&mon->lock);
+
+    trace_qemu_file_monitor_remove_watch(mon, dirpath, id);
+
+    dir = g_hash_table_lookup(mon->dirs, dirpath);
+    if (!dir) {
+        goto cleanup;
+    }
+
+    for (i = 0; i < dir->watches->len; i++) {
+        QFileMonitorWatch *watch = &g_array_index(dir->watches,
+                                                  QFileMonitorWatch, i);
+        if (watch->id == id) {
+            g_free(watch->filename);
+            g_array_remove_index(dir->watches, i);
+            break;
+        }
+    }
+
+    if (dir->watches->len == 0) {
+        inotify_rm_watch(mon->fd, dir->inotify_id);
+        trace_qemu_file_monitor_disable_watch(mon, dir->path, dir->inotify_id);
+
+        g_hash_table_remove(mon->idmap, GINT_TO_POINTER(dir->inotify_id));
+        g_hash_table_remove(mon->dirs, dir->path);
+
+        if (g_hash_table_size(mon->dirs) == 0) {
+            qemu_set_fd_handler(mon->fd, NULL, NULL, NULL);
+        }
+    }
+
+ cleanup:
+    qemu_mutex_unlock(&mon->lock);
+}
diff --git a/util/filemonitor-stub.c b/util/filemonitor-stub.c
new file mode 100644
index 000000000..93fef6534
--- /dev/null
+++ b/util/filemonitor-stub.c
@@ -0,0 +1,59 @@
+/*
+ * QEMU file monitor stub impl
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/filemonitor.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+
+QFileMonitor *
+qemu_file_monitor_new(Error **errp)
+{
+    error_setg(errp, "File monitoring not available on this platform");
+    return NULL;
+}
+
+
+void
+qemu_file_monitor_free(QFileMonitor *mon G_GNUC_UNUSED)
+{
+}
+
+
+int64_t
+qemu_file_monitor_add_watch(QFileMonitor *mon G_GNUC_UNUSED,
+                            const char *dirpath G_GNUC_UNUSED,
+                            const char *filename G_GNUC_UNUSED,
+                            QFileMonitorHandler cb G_GNUC_UNUSED,
+                            void *opaque G_GNUC_UNUSED,
+                            Error **errp)
+{
+    error_setg(errp, "File monitoring not available on this platform");
+    return -1;
+}
+
+
+void
+qemu_file_monitor_remove_watch(QFileMonitor *mon G_GNUC_UNUSED,
+                               const char *dirpath G_GNUC_UNUSED,
+                               int64_t id G_GNUC_UNUSED)
+{
+}
diff --git a/util/getauxval.c b/util/getauxval.c
new file mode 100644
index 000000000..b124107d6
--- /dev/null
+++ b/util/getauxval.c
@@ -0,0 +1,118 @@
+/*
+ * QEMU access to the auxiliary vector
+ *
+ * Copyright (C) 2013 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_GETAUXVAL
+/* Don't inline this in qemu/osdep.h, because pulling in  for
+   the system declaration of getauxval pulls in the system , which
+   conflicts with qemu's version.  */
+
+#include 
+
+unsigned long qemu_getauxval(unsigned long key)
+{
+    return getauxval(key);
+}
+#elif defined(__linux__)
+#include "elf.h"
+
+/* Our elf.h doesn't contain Elf32_auxv_t and Elf64_auxv_t, which is ok because
+   that just makes it easier to define it properly for the host here.  */
+typedef struct {
+    unsigned long a_type;
+    unsigned long a_val;
+} ElfW_auxv_t;
+
+static const ElfW_auxv_t *auxv;
+
+static const ElfW_auxv_t *qemu_init_auxval(void)
+{
+    ElfW_auxv_t *a;
+    ssize_t size = 512, r, ofs;
+    int fd;
+
+    /* Allocate some initial storage.  Make sure the first entry is set
+       to end-of-list, so that we've got a valid list in case of error.  */
+    auxv = a = g_malloc(size);
+    a[0].a_type = 0;
+    a[0].a_val = 0;
+
+    fd = open("/proc/self/auxv", O_RDONLY);
+    if (fd < 0) {
+        return a;
+    }
+
+    /* Read the first SIZE bytes.  Hopefully, this covers everything.  */
+    r = read(fd, a, size);
+
+    if (r == size) {
+        /* Continue to expand until we do get a partial read.  */
+        do {
+            ofs = size;
+            size *= 2;
+            auxv = a = g_realloc(a, size);
+            r = read(fd, (char *)a + ofs, ofs);
+        } while (r == ofs);
+    }
+
+    close(fd);
+    return a;
+}
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+    const ElfW_auxv_t *a = auxv;
+
+    if (unlikely(a == NULL)) {
+        a = qemu_init_auxval();
+    }
+
+    for (; a->a_type != 0; a++) {
+        if (a->a_type == type) {
+            return a->a_val;
+        }
+    }
+
+    return 0;
+}
+
+#elif defined(__FreeBSD__)
+#include 
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+    unsigned long aux = 0;
+    elf_aux_info(type, &aux, sizeof(aux));
+    return aux;
+}
+
+#else
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+    return 0;
+}
+
+#endif
diff --git a/util/guest-random.c b/util/guest-random.c
new file mode 100644
index 000000000..086115bd6
--- /dev/null
+++ b/util/guest-random.c
@@ -0,0 +1,101 @@
+/*
+ * QEMU guest-visible random functions
+ *
+ * Copyright 2019 Linaro, Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qemu/guest-random.h"
+#include "crypto/random.h"
+#include "sysemu/replay.h"
+
+
+static __thread GRand *thread_rand;
+static bool deterministic;
+
+
+static int glib_random_bytes(void *buf, size_t len)
+{
+    GRand *rand = thread_rand;
+    size_t i;
+    uint32_t x;
+
+    if (unlikely(rand == NULL)) {
+        /* Thread not initialized for a cpu, or main w/o -seed.  */
+        thread_rand = rand = g_rand_new();
+    }
+
+    for (i = 0; i + 4 <= len; i += 4) {
+        x = g_rand_int(rand);
+        __builtin_memcpy(buf + i, &x, 4);
+    }
+    if (i < len) {
+        x = g_rand_int(rand);
+        __builtin_memcpy(buf + i, &x, i - len);
+    }
+    return 0;
+}
+
+int qemu_guest_getrandom(void *buf, size_t len, Error **errp)
+{
+    int ret;
+    if (replay_mode == REPLAY_MODE_PLAY) {
+        return replay_read_random(buf, len);
+    }
+    if (unlikely(deterministic)) {
+        /* Deterministic implementation using Glib's Mersenne Twister.  */
+        ret = glib_random_bytes(buf, len);
+    } else {
+        /* Non-deterministic implementation using crypto routines.  */
+        ret = qcrypto_random_bytes(buf, len, errp);
+    }
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        replay_save_random(ret, buf, len);
+    }
+    return ret;
+}
+
+void qemu_guest_getrandom_nofail(void *buf, size_t len)
+{
+    (void)qemu_guest_getrandom(buf, len, &error_fatal);
+}
+
+uint64_t qemu_guest_random_seed_thread_part1(void)
+{
+    if (deterministic) {
+        uint64_t ret;
+        glib_random_bytes(&ret, sizeof(ret));
+        return ret;
+    }
+    return 0;
+}
+
+void qemu_guest_random_seed_thread_part2(uint64_t seed)
+{
+    g_assert(thread_rand == NULL);
+    if (deterministic) {
+        thread_rand =
+            g_rand_new_with_seed_array((const guint32 *)&seed,
+                                       sizeof(seed) / sizeof(guint32));
+    }
+}
+
+int qemu_guest_random_seed_main(const char *optarg, Error **errp)
+{
+    unsigned long long seed;
+    if (parse_uint_full(optarg, &seed, 0)) {
+        error_setg(errp, "Invalid seed number: %s", optarg);
+        return -1;
+    } else {
+        deterministic = true;
+        qemu_guest_random_seed_thread_part2(seed);
+        return 0;
+    }
+}
diff --git a/util/hbitmap.c b/util/hbitmap.c
new file mode 100644
index 000000000..305b894a6
--- /dev/null
+++ b/util/hbitmap.c
@@ -0,0 +1,933 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+#include "crypto/hash.h"
+
+/* HBitmaps provides an array of bits.  The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level.  When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ *   bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word
+ *   bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ *   bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits.  To move down, you shift the index left
+ * similarly, and add the word index within the group.  Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once.  Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps.  Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+    /*
+     * Size of the bitmap, as requested in hbitmap_alloc or in hbitmap_truncate.
+     */
+    uint64_t orig_size;
+
+    /* Number of total bits in the bottom level.  */
+    uint64_t size;
+
+    /* Number of set bits in the bottom level.  */
+    uint64_t count;
+
+    /* A scaling factor.  Given a granularity of G, each bit in the bitmap will
+     * will actually represent a group of 2^G elements.  Each operation on a
+     * range of bits first rounds the bits to determine which group they land
+     * in, and then affect the entire page; iteration will only visit the first
+     * bit of each group.  Here is an example of operations in a size-16,
+     * granularity-1 HBitmap:
+     *
+     *    initial state            00000000
+     *    set(start=0, count=9)    11111000 (iter: 0, 2, 4, 6, 8)
+     *    reset(start=1, count=3)  00111000 (iter: 4, 6, 8)
+     *    set(start=9, count=2)    00111100 (iter: 4, 6, 8, 10)
+     *    reset(start=5, count=5)  00000000
+     *
+     * From an implementation point of view, when setting or resetting bits,
+     * the bitmap will scale bit numbers right by this amount of bits.  When
+     * iterating, the bitmap will scale bit numbers left by this amount of
+     * bits.
+     */
+    int granularity;
+
+    /* A meta dirty bitmap to track the dirtiness of bits in this HBitmap. */
+    HBitmap *meta;
+
+    /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+     * coarsest).  Each bit in level N represents a word in level N+1 that
+     * has a set bit, except the last level where each bit represents the
+     * actual bitmap.
+     *
+     * Note that all bitmaps have the same number of levels.  Even a 1-bit
+     * bitmap will still allocate HBITMAP_LEVELS arrays.
+     */
+    unsigned long *levels[HBITMAP_LEVELS];
+
+    /* The length of each levels[] array. */
+    uint64_t sizes[HBITMAP_LEVELS];
+};
+
+/* Advance hbi to the next nonzero word and return it.  hbi->pos
+ * is updated.  Returns zero if we reach the end of the bitmap.
+ */
+static unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+    size_t pos = hbi->pos;
+    const HBitmap *hb = hbi->hb;
+    unsigned i = HBITMAP_LEVELS - 1;
+
+    unsigned long cur;
+    do {
+        i--;
+        pos >>= BITS_PER_LEVEL;
+        cur = hbi->cur[i] & hb->levels[i][pos];
+    } while (cur == 0);
+
+    /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
+     * bits in the level 0 bitmap; thus we can repurpose the most significant
+     * bit as a sentinel.  The sentinel is set in hbitmap_alloc and ensures
+     * that the above loop ends even without an explicit check on i.
+     */
+
+    if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+        return 0;
+    }
+    for (; i < HBITMAP_LEVELS - 1; i++) {
+        /* Shift back pos to the left, matching the right shifts above.
+         * The index of this word's least significant set bit provides
+         * the low-order bits.
+         */
+        assert(cur);
+        pos = (pos << BITS_PER_LEVEL) + ctzl(cur);
+        hbi->cur[i] = cur & (cur - 1);
+
+        /* Set up next level for iteration.  */
+        cur = hb->levels[i + 1][pos];
+    }
+
+    hbi->pos = pos;
+    trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+    assert(cur);
+    return cur;
+}
+
+int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
+            hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
+    int64_t item;
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next bit.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+    item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
+
+    return item << hbi->granularity;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+    unsigned i, bit;
+    uint64_t pos;
+
+    hbi->hb = hb;
+    pos = first >> hb->granularity;
+    assert(pos < hb->size);
+    hbi->pos = pos >> BITS_PER_LEVEL;
+    hbi->granularity = hb->granularity;
+
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        bit = pos & (BITS_PER_LONG - 1);
+        pos >>= BITS_PER_LEVEL;
+
+        /* Drop bits representing items before first.  */
+        hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+        /* We have already added level i+1, so the lowest set bit has
+         * been processed.  Clear it.
+         */
+        if (i != HBITMAP_LEVELS - 1) {
+            hbi->cur[i] &= ~(1UL << bit);
+        }
+    }
+}
+
+int64_t hbitmap_next_dirty(const HBitmap *hb, int64_t start, int64_t count)
+{
+    HBitmapIter hbi;
+    int64_t first_dirty_off;
+    uint64_t end;
+
+    assert(start >= 0 && count >= 0);
+
+    if (start >= hb->orig_size || count == 0) {
+        return -1;
+    }
+
+    end = count > hb->orig_size - start ? hb->orig_size : start + count;
+
+    hbitmap_iter_init(&hbi, hb, start);
+    first_dirty_off = hbitmap_iter_next(&hbi);
+
+    if (first_dirty_off < 0 || first_dirty_off >= end) {
+        return -1;
+    }
+
+    return MAX(start, first_dirty_off);
+}
+
+int64_t hbitmap_next_zero(const HBitmap *hb, int64_t start, int64_t count)
+{
+    size_t pos = (start >> hb->granularity) >> BITS_PER_LEVEL;
+    unsigned long *last_lev = hb->levels[HBITMAP_LEVELS - 1];
+    unsigned long cur = last_lev[pos];
+    unsigned start_bit_offset;
+    uint64_t end_bit, sz;
+    int64_t res;
+
+    assert(start >= 0 && count >= 0);
+
+    if (start >= hb->orig_size || count == 0) {
+        return -1;
+    }
+
+    end_bit = count > hb->orig_size - start ?
+                hb->size :
+                ((start + count - 1) >> hb->granularity) + 1;
+    sz = (end_bit + BITS_PER_LONG - 1) >> BITS_PER_LEVEL;
+
+    /* There may be some zero bits in @cur before @start. We are not interested
+     * in them, let's set them.
+     */
+    start_bit_offset = (start >> hb->granularity) & (BITS_PER_LONG - 1);
+    cur |= (1UL << start_bit_offset) - 1;
+    assert((start >> hb->granularity) < hb->size);
+
+    if (cur == (unsigned long)-1) {
+        do {
+            pos++;
+        } while (pos < sz && last_lev[pos] == (unsigned long)-1);
+
+        if (pos >= sz) {
+            return -1;
+        }
+
+        cur = last_lev[pos];
+    }
+
+    res = (pos << BITS_PER_LEVEL) + ctol(cur);
+    if (res >= end_bit) {
+        return -1;
+    }
+
+    res = res << hb->granularity;
+    if (res < start) {
+        assert(((start - res) >> hb->granularity) == 0);
+        return start;
+    }
+
+    return res;
+}
+
+bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
+                             int64_t max_dirty_count,
+                             int64_t *dirty_start, int64_t *dirty_count)
+{
+    int64_t next_zero;
+
+    assert(start >= 0 && end >= 0 && max_dirty_count > 0);
+
+    end = MIN(end, hb->orig_size);
+    if (start >= end) {
+        return false;
+    }
+
+    start = hbitmap_next_dirty(hb, start, end - start);
+    if (start < 0) {
+        return false;
+    }
+
+    end = start + MIN(end - start, max_dirty_count);
+
+    next_zero = hbitmap_next_zero(hb, start, end - start);
+    if (next_zero >= 0) {
+        end = next_zero;
+    }
+
+    *dirty_start = start;
+    *dirty_count = end - start;
+
+    return true;
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+    return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+    return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+    return hb->count << hb->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call).  Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            *p_cur = 0;
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next word.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = 0;
+    *p_cur = cur;
+    return hbi->pos;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity.  Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+    HBitmapIter hbi;
+    uint64_t count = 0;
+    uint64_t end = last + 1;
+    unsigned long cur;
+    size_t pos;
+
+    hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+    for (;;) {
+        pos = hbitmap_iter_next_word(&hbi, &cur);
+        if (pos >= (end >> BITS_PER_LEVEL)) {
+            break;
+        }
+        count += ctpopl(cur);
+    }
+
+    if (pos == (end >> BITS_PER_LEVEL)) {
+        /* Drop bits representing the END-th and subsequent items.  */
+        int bit = end & (BITS_PER_LONG - 1);
+        cur &= (1UL << bit) - 1;
+        count += ctpopl(cur);
+    }
+
+    return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    unsigned long old;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    old = *elem;
+    *elem |= mask;
+    return old != *elem;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)...
+ * Returns true if at least one bit is changed. */
+static bool hb_set_between(HBitmap *hb, int level, uint64_t start,
+                           uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+        changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] == 0);
+            hb->levels[level][i] = ~0UL;
+        }
+    }
+    changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+    /* If there was any change in this layer, we may have to update
+     * the one above.
+     */
+    if (level > 0 && changed) {
+        hb_set_between(hb, level - 1, pos, lastpos);
+    }
+    return changed;
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t first, n;
+    uint64_t last = start + count - 1;
+
+    if (count == 0) {
+        return;
+    }
+
+    trace_hbitmap_set(hb, start, count,
+                      start >> hb->granularity, last >> hb->granularity);
+
+    first = start >> hb->granularity;
+    last >>= hb->granularity;
+    assert(last < hb->size);
+    n = last - first + 1;
+
+    hb->count += n - hb_count_between(hb, first, last);
+    if (hb_set_between(hb, HBITMAP_LEVELS - 1, first, last) &&
+        hb->meta) {
+        hbitmap_set(hb->meta, start, count);
+    }
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool blanked;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    blanked = *elem != 0 && ((*elem & ~mask) == 0);
+    *elem &= ~mask;
+    return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)...
+ * Returns true if at least one bit is changed. */
+static bool hb_reset_between(HBitmap *hb, int level, uint64_t start,
+                             uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+        /* Here we need a more complex test than when setting bits.  Even if
+         * something was changed, we must not blank bits in the upper level
+         * unless the lower-level word became entirely zero.  So, remove pos
+         * from the upper-level range if bits remain set.
+         */
+        if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+            changed = true;
+        } else {
+            pos++;
+        }
+
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] != 0);
+            hb->levels[level][i] = 0UL;
+        }
+    }
+
+    /* Same as above, this time for lastpos.  */
+    if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+        changed = true;
+    } else {
+        lastpos--;
+    }
+
+    if (level > 0 && changed) {
+        hb_reset_between(hb, level - 1, pos, lastpos);
+    }
+
+    return changed;
+
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t first;
+    uint64_t last = start + count - 1;
+    uint64_t gran = 1ULL << hb->granularity;
+
+    if (count == 0) {
+        return;
+    }
+
+    assert(QEMU_IS_ALIGNED(start, gran));
+    assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size));
+
+    trace_hbitmap_reset(hb, start, count,
+                        start >> hb->granularity, last >> hb->granularity);
+
+    first = start >> hb->granularity;
+    last >>= hb->granularity;
+    assert(last < hb->size);
+
+    hb->count -= hb_count_between(hb, first, last);
+    if (hb_reset_between(hb, HBITMAP_LEVELS - 1, first, last) &&
+        hb->meta) {
+        hbitmap_set(hb->meta, start, count);
+    }
+}
+
+void hbitmap_reset_all(HBitmap *hb)
+{
+    unsigned int i;
+
+    /* Same as hbitmap_alloc() except for memset() instead of malloc() */
+    for (i = HBITMAP_LEVELS; --i >= 1; ) {
+        memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long));
+    }
+
+    hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1);
+    hb->count = 0;
+}
+
+bool hbitmap_is_serializable(const HBitmap *hb)
+{
+    /* Every serialized chunk must be aligned to 64 bits so that endianness
+     * requirements can be fulfilled on both 64 bit and 32 bit hosts.
+     * We have hbitmap_serialization_align() which converts this
+     * alignment requirement from bitmap bits to items covered (e.g. sectors).
+     * That value is:
+     *    64 << hb->granularity
+     * Since this value must not exceed UINT64_MAX, hb->granularity must be
+     * less than 58 (== 64 - 6, where 6 is ld(64), i.e. 1 << 6 == 64).
+     *
+     * In order for hbitmap_serialization_align() to always return a
+     * meaningful value, bitmaps that are to be serialized must have a
+     * granularity of less than 58. */
+
+    return hb->granularity < 58;
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+    /* Compute position and bit in the last layer.  */
+    uint64_t pos = item >> hb->granularity;
+    unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+    assert(pos < hb->size);
+
+    return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+uint64_t hbitmap_serialization_align(const HBitmap *hb)
+{
+    assert(hbitmap_is_serializable(hb));
+
+    /* Require at least 64 bit granularity to be safe on both 64 bit and 32 bit
+     * hosts. */
+    return UINT64_C(64) << hb->granularity;
+}
+
+/* Start should be aligned to serialization granularity, chunk size should be
+ * aligned to serialization granularity too, except for last chunk.
+ */
+static void serialization_chunk(const HBitmap *hb,
+                                uint64_t start, uint64_t count,
+                                unsigned long **first_el, uint64_t *el_count)
+{
+    uint64_t last = start + count - 1;
+    uint64_t gran = hbitmap_serialization_align(hb);
+
+    assert((start & (gran - 1)) == 0);
+    assert((last >> hb->granularity) < hb->size);
+    if ((last >> hb->granularity) != hb->size - 1) {
+        assert((count & (gran - 1)) == 0);
+    }
+
+    start = (start >> hb->granularity) >> BITS_PER_LEVEL;
+    last = (last >> hb->granularity) >> BITS_PER_LEVEL;
+
+    *first_el = &hb->levels[HBITMAP_LEVELS - 1][start];
+    *el_count = last - start + 1;
+}
+
+uint64_t hbitmap_serialization_size(const HBitmap *hb,
+                                    uint64_t start, uint64_t count)
+{
+    uint64_t el_count;
+    unsigned long *cur;
+
+    if (!count) {
+        return 0;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+
+    return el_count * sizeof(unsigned long);
+}
+
+void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf,
+                            uint64_t start, uint64_t count)
+{
+    uint64_t el_count;
+    unsigned long *cur, *end;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+    end = cur + el_count;
+
+    while (cur != end) {
+        unsigned long el =
+            (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : cpu_to_le64(*cur));
+
+        memcpy(buf, &el, sizeof(el));
+        buf += sizeof(el);
+        cur++;
+    }
+}
+
+void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf,
+                              uint64_t start, uint64_t count,
+                              bool finish)
+{
+    uint64_t el_count;
+    unsigned long *cur, *end;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+    end = cur + el_count;
+
+    while (cur != end) {
+        memcpy(cur, buf, sizeof(*cur));
+
+        if (BITS_PER_LONG == 32) {
+            le32_to_cpus((uint32_t *)cur);
+        } else {
+            le64_to_cpus((uint64_t *)cur);
+        }
+
+        buf += sizeof(unsigned long);
+        cur++;
+    }
+    if (finish) {
+        hbitmap_deserialize_finish(hb);
+    }
+}
+
+void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count,
+                                bool finish)
+{
+    uint64_t el_count;
+    unsigned long *first;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &first, &el_count);
+
+    memset(first, 0, el_count * sizeof(unsigned long));
+    if (finish) {
+        hbitmap_deserialize_finish(hb);
+    }
+}
+
+void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count,
+                              bool finish)
+{
+    uint64_t el_count;
+    unsigned long *first;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &first, &el_count);
+
+    memset(first, 0xff, el_count * sizeof(unsigned long));
+    if (finish) {
+        hbitmap_deserialize_finish(hb);
+    }
+}
+
+void hbitmap_deserialize_finish(HBitmap *bitmap)
+{
+    int64_t i, size, prev_size;
+    int lev;
+
+    /* restore levels starting from penultimate to zero level, assuming
+     * that the last level is ok */
+    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
+        prev_size = size;
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
+
+        for (i = 0; i < prev_size; ++i) {
+            if (bitmap->levels[lev + 1][i]) {
+                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
+                    1UL << (i & (BITS_PER_LONG - 1));
+            }
+        }
+    }
+
+    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+    bitmap->count = hb_count_between(bitmap, 0, bitmap->size - 1);
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+    unsigned i;
+    assert(!hb->meta);
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        g_free(hb->levels[i]);
+    }
+    g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+    HBitmap *hb = g_new0(struct HBitmap, 1);
+    unsigned i;
+
+    assert(size <= INT64_MAX);
+    hb->orig_size = size;
+
+    assert(granularity >= 0 && granularity < 64);
+    size = (size + (1ULL << granularity) - 1) >> granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+    hb->size = size;
+    hb->granularity = granularity;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        hb->sizes[i] = size;
+        hb->levels[i] = g_new0(unsigned long, size);
+    }
+
+    /* We necessarily have free bits in level 0 due to the definition
+     * of HBITMAP_LEVELS, so use one for a sentinel.  This speeds up
+     * hbitmap_iter_skip_words.
+     */
+    assert(size == 1);
+    hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+    return hb;
+}
+
+void hbitmap_truncate(HBitmap *hb, uint64_t size)
+{
+    bool shrink;
+    unsigned i;
+    uint64_t num_elements = size;
+    uint64_t old;
+
+    assert(size <= INT64_MAX);
+    hb->orig_size = size;
+
+    /* Size comes in as logical elements, adjust for granularity. */
+    size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+    shrink = size < hb->size;
+
+    /* bit sizes are identical; nothing to do. */
+    if (size == hb->size) {
+        return;
+    }
+
+    /* If we're losing bits, let's clear those bits before we invalidate all of
+     * our invariants. This helps keep the bitcount consistent, and will prevent
+     * us from carrying around garbage bits beyond the end of the map.
+     */
+    if (shrink) {
+        /* Don't clear partial granularity groups;
+         * start at the first full one. */
+        uint64_t start = ROUND_UP(num_elements, UINT64_C(1) << hb->granularity);
+        uint64_t fix_count = (hb->size << hb->granularity) - start;
+
+        assert(fix_count);
+        hbitmap_reset(hb, start, fix_count);
+    }
+
+    hb->size = size;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX(BITS_TO_LONGS(size), 1);
+        if (hb->sizes[i] == size) {
+            break;
+        }
+        old = hb->sizes[i];
+        hb->sizes[i] = size;
+        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+        if (!shrink) {
+            memset(&hb->levels[i][old], 0x00,
+                   (size - old) * sizeof(*hb->levels[i]));
+        }
+    }
+    if (hb->meta) {
+        hbitmap_truncate(hb->meta, hb->size << hb->granularity);
+    }
+}
+
+bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b)
+{
+    return (a->orig_size == b->orig_size);
+}
+
+/**
+ * hbitmap_sparse_merge: performs dst = dst | src
+ * works with differing granularities.
+ * best used when src is sparsely populated.
+ */
+static void hbitmap_sparse_merge(HBitmap *dst, const HBitmap *src)
+{
+    int64_t offset;
+    int64_t count;
+
+    for (offset = 0;
+         hbitmap_next_dirty_area(src, offset, src->orig_size, INT64_MAX,
+                                 &offset, &count);
+         offset += count)
+    {
+        hbitmap_set(dst, offset, count);
+    }
+}
+
+/**
+ * Given HBitmaps A and B, let R := A (BITOR) B.
+ * Bitmaps A and B will not be modified,
+ *     except when bitmap R is an alias of A or B.
+ *
+ * @return true if the merge was successful,
+ *         false if it was not attempted.
+ */
+bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
+{
+    int i;
+    uint64_t j;
+
+    if (!hbitmap_can_merge(a, b) || !hbitmap_can_merge(a, result)) {
+        return false;
+    }
+    assert(hbitmap_can_merge(b, result));
+
+    if ((!hbitmap_count(a) && result == b) ||
+        (!hbitmap_count(b) && result == a)) {
+        return true;
+    }
+
+    if (!hbitmap_count(a) && !hbitmap_count(b)) {
+        hbitmap_reset_all(result);
+        return true;
+    }
+
+    if (a->granularity != b->granularity) {
+        if ((a != result) && (b != result)) {
+            hbitmap_reset_all(result);
+        }
+        if (a != result) {
+            hbitmap_sparse_merge(result, a);
+        }
+        if (b != result) {
+            hbitmap_sparse_merge(result, b);
+        }
+        return true;
+    }
+
+    /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
+     * It may be possible to improve running times for sparsely populated maps
+     * by using hbitmap_iter_next, but this is suboptimal for dense maps.
+     */
+    assert(a->size == b->size);
+    for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
+        for (j = 0; j < a->sizes[i]; j++) {
+            result->levels[i][j] = a->levels[i][j] | b->levels[i][j];
+        }
+    }
+
+    /* Recompute the dirty count */
+    result->count = hb_count_between(result, 0, result->size - 1);
+
+    return true;
+}
+
+char *hbitmap_sha256(const HBitmap *bitmap, Error **errp)
+{
+    size_t size = bitmap->sizes[HBITMAP_LEVELS - 1] * sizeof(unsigned long);
+    char *data = (char *)bitmap->levels[HBITMAP_LEVELS - 1];
+    char *hash = NULL;
+    qcrypto_hash_digest(QCRYPTO_HASH_ALG_SHA256, data, size, &hash, errp);
+
+    return hash;
+}
diff --git a/util/hexdump.c b/util/hexdump.c
new file mode 100644
index 000000000..2c105a884
--- /dev/null
+++ b/util/hexdump.c
@@ -0,0 +1,65 @@
+/*
+ * Helper to hexdump a buffer
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ * Copyright (c) 2013 Gerd Hoffmann 
+ * Copyright (c) 2013 Peter Crosthwaite 
+ * Copyright (c) 2013 Xilinx, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr,
+                       unsigned int len, bool ascii)
+{
+    const char *buf = bufptr;
+    int i, c;
+
+    if (len > QEMU_HEXDUMP_LINE_BYTES) {
+        len = QEMU_HEXDUMP_LINE_BYTES;
+    }
+
+    line += snprintf(line, 6, "%04x:", b);
+    for (i = 0; i < QEMU_HEXDUMP_LINE_BYTES; i++) {
+        if ((i % 4) == 0) {
+            *line++ = ' ';
+        }
+        if (i < len) {
+            line += sprintf(line, " %02x", (unsigned char)buf[b + i]);
+        } else {
+            line += sprintf(line, "   ");
+        }
+    }
+    if (ascii) {
+        *line++ = ' ';
+        for (i = 0; i < len; i++) {
+            c = buf[b + i];
+            if (c < ' ' || c > '~') {
+                c = '.';
+            }
+            *line++ = c;
+        }
+    }
+    *line = '\0';
+}
+
+void qemu_hexdump(FILE *fp, const char *prefix,
+                  const void *bufptr, size_t size)
+{
+    unsigned int b, len;
+    char line[QEMU_HEXDUMP_LINE_LEN];
+
+    for (b = 0; b < size; b += QEMU_HEXDUMP_LINE_BYTES) {
+        len = size - b;
+        qemu_hexdump_line(line, b, bufptr, len, true);
+        fprintf(fp, "%s: %s\n", prefix, line);
+    }
+
+}
diff --git a/util/host-utils.c b/util/host-utils.c
new file mode 100644
index 000000000..7b9322071
--- /dev/null
+++ b/util/host-utils.c
@@ -0,0 +1,227 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2007 Aurelien Jarno
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+
+#ifndef CONFIG_INT128
+/* Long integer helpers */
+static inline void mul64(uint64_t *plow, uint64_t *phigh,
+                         uint64_t a, uint64_t b)
+{
+    typedef union {
+        uint64_t ll;
+        struct {
+#ifdef HOST_WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif
+        } l;
+    } LL;
+    LL rl, rm, rn, rh, a0, b0;
+    uint64_t c;
+
+    a0.ll = a;
+    b0.ll = b;
+
+    rl.ll = (uint64_t)a0.l.low * b0.l.low;
+    rm.ll = (uint64_t)a0.l.low * b0.l.high;
+    rn.ll = (uint64_t)a0.l.high * b0.l.low;
+    rh.ll = (uint64_t)a0.l.high * b0.l.high;
+
+    c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
+    rl.l.high = c;
+    c >>= 32;
+    c = c + rm.l.high + rn.l.high + rh.l.low;
+    rh.l.low = c;
+    rh.l.high += (uint32_t)(c >> 32);
+
+    *plow = rl.ll;
+    *phigh = rh.ll;
+}
+
+/* Unsigned 64x64 -> 128 multiplication */
+void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    mul64(plow, phigh, a, b);
+}
+
+/* Signed 64x64 -> 128 multiplication */
+void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
+{
+    uint64_t rh;
+
+    mul64(plow, &rh, a, b);
+
+    /* Adjust for signs.  */
+    if (b < 0) {
+        rh -= a;
+    }
+    if (a < 0) {
+        rh -= b;
+    }
+    *phigh = rh;
+}
+
+/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
+/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
+/* remainder via phigh. */
+int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+{
+    uint64_t dhi = *phigh;
+    uint64_t dlo = *plow;
+    unsigned i;
+    uint64_t carry = 0;
+
+    if (divisor == 0) {
+        return 1;
+    } else if (dhi == 0) {
+        *plow  = dlo / divisor;
+        *phigh = dlo % divisor;
+        return 0;
+    } else if (dhi > divisor) {
+        return 1;
+    } else {
+
+        for (i = 0; i < 64; i++) {
+            carry = dhi >> 63;
+            dhi = (dhi << 1) | (dlo >> 63);
+            if (carry || (dhi >= divisor)) {
+                dhi -= divisor;
+                carry = 1;
+            } else {
+                carry = 0;
+            }
+            dlo = (dlo << 1) | carry;
+        }
+
+        *plow = dlo;
+        *phigh = dhi;
+        return 0;
+    }
+}
+
+int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+{
+    int sgn_dvdnd = *phigh < 0;
+    int sgn_divsr = divisor < 0;
+    int overflow = 0;
+
+    if (sgn_dvdnd) {
+        *plow = ~(*plow);
+        *phigh = ~(*phigh);
+        if (*plow == (int64_t)-1) {
+            *plow = 0;
+            (*phigh)++;
+         } else {
+            (*plow)++;
+         }
+    }
+
+    if (sgn_divsr) {
+        divisor = 0 - divisor;
+    }
+
+    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+
+    if (sgn_dvdnd  ^ sgn_divsr) {
+        *plow = 0 - *plow;
+    }
+
+    if (!overflow) {
+        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
+            overflow = 1;
+        }
+    }
+
+    return overflow;
+}
+#endif
+
+/**
+ * urshift - 128-bit Unsigned Right Shift.
+ * @plow: in/out - lower 64-bit integer.
+ * @phigh: in/out - higher 64-bit integer.
+ * @shift: in - bytes to shift, between 0 and 127.
+ *
+ * Result is zero-extended and stored in plow/phigh, which are
+ * input/output variables. Shift values outside the range will
+ * be mod to 128. In other words, the caller is responsible to
+ * verify/assert both the shift range and plow/phigh pointers.
+ */
+void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift)
+{
+    shift &= 127;
+    if (shift == 0) {
+        return;
+    }
+
+    uint64_t h = *phigh >> (shift & 63);
+    if (shift >= 64) {
+        *plow = h;
+        *phigh = 0;
+    } else {
+        *plow = (*plow >> (shift & 63)) | (*phigh << (64 - (shift & 63)));
+        *phigh = h;
+    }
+}
+
+/**
+ * ulshift - 128-bit Unsigned Left Shift.
+ * @plow: in/out - lower 64-bit integer.
+ * @phigh: in/out - higher 64-bit integer.
+ * @shift: in - bytes to shift, between 0 and 127.
+ * @overflow: out - true if any 1-bit is shifted out.
+ *
+ * Result is zero-extended and stored in plow/phigh, which are
+ * input/output variables. Shift values outside the range will
+ * be mod to 128. In other words, the caller is responsible to
+ * verify/assert both the shift range and plow/phigh pointers.
+ */
+void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
+{
+    uint64_t low = *plow;
+    uint64_t high = *phigh;
+
+    shift &= 127;
+    if (shift == 0) {
+        return;
+    }
+
+    /* check if any bit will be shifted out */
+    urshift(&low, &high, 128 - shift);
+    if (low | high) {
+        *overflow = true;
+    }
+
+    if (shift >= 64) {
+        *phigh = *plow << (shift & 63);
+        *plow = 0;
+    } else {
+        *phigh = (*plow >> (64 - (shift & 63))) | (*phigh << (shift & 63));
+        *plow = *plow << shift;
+    }
+}
diff --git a/util/id.c b/util/id.c
new file mode 100644
index 000000000..5addb4460
--- /dev/null
+++ b/util/id.c
@@ -0,0 +1,68 @@
+/*
+ * Dealing with identifiers
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/ctype.h"
+#include "qemu/id.h"
+
+bool id_wellformed(const char *id)
+{
+    int i;
+
+    if (!qemu_isalpha(id[0])) {
+        return false;
+    }
+    for (i = 1; id[i]; i++) {
+        if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[ID_MAX] = {
+    [ID_QDEV]  = "qdev",
+    [ID_BLOCK] = "block",
+    [ID_CHR] = "chr",
+};
+
+/*
+ *  Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ *  where:
+ *
+ *  - PREFIX is the reserved character '#'
+ *  - SUBSYSTEM identifies the subsystem creating the ID
+ *  - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ *    Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+    static uint64_t id_counters[ID_MAX];
+    uint32_t rnd;
+
+    assert(id < ARRAY_SIZE(id_subsys_str));
+    assert(id_subsys_str[id]);
+
+    rnd = g_random_int_range(0, 100);
+
+    return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+                                                        id_subsys_str[id],
+                                                        id_counters[id]++,
+                                                        rnd);
+}
diff --git a/util/iov.c b/util/iov.c
new file mode 100644
index 000000000..f3a9e92a3
--- /dev/null
+++ b/util/iov.c
@@ -0,0 +1,743 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ *  Anthony Liguori 
+ *  Amit Shah 
+ *  Michael Tokarev 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "qemu/cutils.h"
+
+size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt,
+                         size_t offset, const void *buf, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(iov[i].iov_base + offset, buf + done, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt,
+                       size_t offset, void *buf, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(buf + done, iov[i].iov_base + offset, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+                  size_t offset, int fillc, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memset(iov[i].iov_base + offset, fillc, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
+{
+    size_t len;
+    unsigned int i;
+
+    len = 0;
+    for (i = 0; i < iov_cnt; i++) {
+        len += iov[i].iov_len;
+    }
+    return len;
+}
+
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#ifdef CONFIG_POSIX
+    ssize_t ret;
+    struct msghdr msg;
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = iov_cnt;
+    do {
+        ret = do_send
+            ? sendmsg(sockfd, &msg, 0)
+            : recvmsg(sockfd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+    return ret;
+#else
+    /* else send piece-by-piece */
+    /*XXX Note: windows has WSASend() and WSARecv() */
+    unsigned i = 0;
+    ssize_t ret = 0;
+    while (i < iov_cnt) {
+        ssize_t r = do_send
+            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+        if (r > 0) {
+            ret += r;
+        } else if (!r) {
+            break;
+        } else if (errno == EINTR) {
+            continue;
+        } else {
+            /* else it is some "other" error,
+             * only return if there was no data processed. */
+            if (ret == 0) {
+                ret = -1;
+            }
+            break;
+        }
+        i++;
+    }
+    return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt,
+                      size_t offset, size_t bytes,
+                      bool do_send)
+{
+    ssize_t total = 0;
+    ssize_t ret;
+    size_t orig_len, tail;
+    unsigned niov;
+    struct iovec *local_iov, *iov;
+
+    if (bytes <= 0) {
+        return 0;
+    }
+
+    local_iov = g_new0(struct iovec, iov_cnt);
+    iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes);
+    offset = 0;
+    iov = local_iov;
+
+    while (bytes > 0) {
+        /* Find the start position, skipping `offset' bytes:
+         * first, skip all full-sized vector elements, */
+        for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
+            offset -= iov[niov].iov_len;
+        }
+
+        /* niov == iov_cnt would only be valid if bytes == 0, which
+         * we already ruled out in the loop condition.  */
+        assert(niov < iov_cnt);
+        iov += niov;
+        iov_cnt -= niov;
+
+        if (offset) {
+            /* second, skip `offset' bytes from the (now) first element,
+             * undo it on exit */
+            iov[0].iov_base += offset;
+            iov[0].iov_len -= offset;
+        }
+        /* Find the end position skipping `bytes' bytes: */
+        /* first, skip all full-sized elements */
+        tail = bytes;
+        for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
+            tail -= iov[niov].iov_len;
+        }
+        if (tail) {
+            /* second, fixup the last element, and remember the original
+             * length */
+            assert(niov < iov_cnt);
+            assert(iov[niov].iov_len > tail);
+            orig_len = iov[niov].iov_len;
+            iov[niov++].iov_len = tail;
+            ret = do_send_recv(sockfd, iov, niov, do_send);
+            /* Undo the changes above before checking for errors */
+            iov[niov-1].iov_len = orig_len;
+        } else {
+            ret = do_send_recv(sockfd, iov, niov, do_send);
+        }
+        if (offset) {
+            iov[0].iov_base -= offset;
+            iov[0].iov_len += offset;
+        }
+
+        if (ret < 0) {
+            assert(errno != EINTR);
+            g_free(local_iov);
+            if (errno == EAGAIN && total > 0) {
+                return total;
+            }
+            return -1;
+        }
+
+        if (ret == 0 && !do_send) {
+            /* recv returns 0 when the peer has performed an orderly
+             * shutdown. */
+            break;
+        }
+
+        /* Prepare for the next iteration */
+        offset += ret;
+        total += ret;
+        bytes -= ret;
+    }
+
+    g_free(local_iov);
+    return total;
+}
+
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+                 FILE *fp, const char *prefix, size_t limit)
+{
+    int v;
+    size_t size = 0;
+    char *buf;
+
+    for (v = 0; v < iov_cnt; v++) {
+        size += iov[v].iov_len;
+    }
+    size = size > limit ? limit : size;
+    buf = g_malloc(size);
+    iov_to_buf(iov, iov_cnt, 0, buf, size);
+    qemu_hexdump(fp, prefix, buf, size);
+    g_free(buf);
+}
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+                 const struct iovec *iov, unsigned int iov_cnt,
+                 size_t offset, size_t bytes)
+{
+    size_t len;
+    unsigned int i, j;
+    for (i = 0, j = 0;
+         i < iov_cnt && j < dst_iov_cnt && (offset || bytes); i++) {
+        if (offset >= iov[i].iov_len) {
+            offset -= iov[i].iov_len;
+            continue;
+        }
+        len = MIN(bytes, iov[i].iov_len - offset);
+
+        dst_iov[j].iov_base = iov[i].iov_base + offset;
+        dst_iov[j].iov_len = len;
+        j++;
+        bytes -= len;
+        offset = 0;
+    }
+    assert(offset == 0);
+    return j;
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+    qiov->iov = g_new(struct iovec, alloc_hint);
+    qiov->niov = 0;
+    qiov->nalloc = alloc_hint;
+    qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+    int i;
+
+    qiov->iov = iov;
+    qiov->niov = niov;
+    qiov->nalloc = -1;
+    qiov->size = 0;
+    for (i = 0; i < niov; i++)
+        qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+    assert(qiov->nalloc != -1);
+
+    if (qiov->niov == qiov->nalloc) {
+        qiov->nalloc = 2 * qiov->nalloc + 1;
+        qiov->iov = g_renew(struct iovec, qiov->iov, qiov->nalloc);
+    }
+    qiov->iov[qiov->niov].iov_base = base;
+    qiov->iov[qiov->niov].iov_len = len;
+    qiov->size += len;
+    ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src_iov if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
+                             struct iovec *src_iov, unsigned int src_cnt,
+                             size_t soffset, size_t sbytes)
+{
+    int i;
+    size_t done;
+
+    if (!sbytes) {
+        return 0;
+    }
+    assert(dst->nalloc != -1);
+    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+        if (soffset < src_iov[i].iov_len) {
+            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
+            done += len;
+            soffset = 0;
+        } else {
+            soffset -= src_iov[i].iov_len;
+        }
+    }
+    assert(soffset == 0); /* offset beyond end of src */
+
+    return done;
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+    qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
+}
+
+/*
+ * qiov_find_iov
+ *
+ * Return pointer to iovec structure, where byte at @offset in original vector
+ * @iov exactly is.
+ * Set @remaining_offset to be offset inside that iovec to the same byte.
+ */
+static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
+                                     size_t *remaining_offset)
+{
+    while (offset > 0 && offset >= iov->iov_len) {
+        offset -= iov->iov_len;
+        iov++;
+    }
+    *remaining_offset = offset;
+
+    return iov;
+}
+
+/*
+ * qiov_slice
+ *
+ * Find subarray of iovec's, containing requested range. @head would
+ * be offset in first iov (returned by the function), @tail would be
+ * count of extra bytes in last iovec (returned iov + @niov - 1).
+ */
+static struct iovec *qiov_slice(QEMUIOVector *qiov,
+                                size_t offset, size_t len,
+                                size_t *head, size_t *tail, int *niov)
+{
+    struct iovec *iov, *end_iov;
+
+    assert(offset + len <= qiov->size);
+
+    iov = iov_skip_offset(qiov->iov, offset, head);
+    end_iov = iov_skip_offset(iov, *head + len, tail);
+
+    if (*tail > 0) {
+        assert(*tail < end_iov->iov_len);
+        *tail = end_iov->iov_len - *tail;
+        end_iov++;
+    }
+
+    *niov = end_iov - iov;
+
+    return iov;
+}
+
+int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
+{
+    size_t head, tail;
+    int niov;
+
+    qiov_slice(qiov, offset, len, &head, &tail, &niov);
+
+    return niov;
+}
+
+/*
+ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
+ * and @tail_buf buffer into new qiov.
+ */
+void qemu_iovec_init_extended(
+        QEMUIOVector *qiov,
+        void *head_buf, size_t head_len,
+        QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
+        void *tail_buf, size_t tail_len)
+{
+    size_t mid_head, mid_tail;
+    int total_niov, mid_niov = 0;
+    struct iovec *p, *mid_iov = NULL;
+
+    if (mid_len) {
+        mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
+                             &mid_head, &mid_tail, &mid_niov);
+    }
+
+    total_niov = !!head_len + mid_niov + !!tail_len;
+    if (total_niov == 1) {
+        qemu_iovec_init_buf(qiov, NULL, 0);
+        p = &qiov->local_iov;
+    } else {
+        qiov->niov = qiov->nalloc = total_niov;
+        qiov->size = head_len + mid_len + tail_len;
+        p = qiov->iov = g_new(struct iovec, qiov->niov);
+    }
+
+    if (head_len) {
+        p->iov_base = head_buf;
+        p->iov_len = head_len;
+        p++;
+    }
+
+    assert(!mid_niov == !mid_len);
+    if (mid_niov) {
+        memcpy(p, mid_iov, mid_niov * sizeof(*p));
+        p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
+        p[0].iov_len -= mid_head;
+        p[mid_niov - 1].iov_len -= mid_tail;
+        p += mid_niov;
+    }
+
+    if (tail_len) {
+        p->iov_base = tail_buf;
+        p->iov_len = tail_len;
+    }
+}
+
+/*
+ * Check if the contents of subrange of qiov data is all zeroes.
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
+{
+    struct iovec *iov;
+    size_t current_offset;
+
+    assert(offset + bytes <= qiov->size);
+
+    iov = iov_skip_offset(qiov->iov, offset, ¤t_offset);
+
+    while (bytes) {
+        uint8_t *base = (uint8_t *)iov->iov_base + current_offset;
+        size_t len = MIN(iov->iov_len - current_offset, bytes);
+
+        if (!buffer_is_zero(base, len)) {
+            return false;
+        }
+
+        current_offset = 0;
+        bytes -= len;
+        iov++;
+    }
+
+    return true;
+}
+
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+                           size_t offset, size_t len)
+{
+    qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+    if (qiov->nalloc != -1) {
+        g_free(qiov->iov);
+    }
+
+    memset(qiov, 0, sizeof(*qiov));
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+    assert(qiov->nalloc != -1);
+
+    qiov->niov = 0;
+    qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+                         void *buf, size_t bytes)
+{
+    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+                           const void *buf, size_t bytes)
+{
+    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+                         int fillc, size_t bytes)
+{
+    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * The IO vectors must have the same structure (same length of all parts).
+ * A typical usage is to compare vectors created with qemu_iovec_clone().
+ *
+ * @a:          I/O vector
+ * @b:          I/O vector
+ * @ret:        Offset to first mismatching byte or -1 if match
+ */
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    ssize_t offset = 0;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        size_t len = 0;
+        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        while (len < a->iov[i].iov_len && *p++ == *q++) {
+            len++;
+        }
+
+        offset += len;
+
+        if (len != a->iov[i].iov_len) {
+            return offset;
+        }
+    }
+    return -1;
+}
+
+typedef struct {
+    int src_index;
+    struct iovec *src_iov;
+    void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    /* Don't overflow */
+    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+        return -1;
+    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved.  This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
+{
+    IOVectorSortElem sortelems[src->niov];
+    void *last_end;
+    int i;
+
+    /* Sort by source iovecs by base address */
+    for (i = 0; i < src->niov; i++) {
+        sortelems[i].src_index = i;
+        sortelems[i].src_iov = &src->iov[i];
+    }
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+    /* Allocate buffer space taking into account overlapping iovecs */
+    last_end = NULL;
+    for (i = 0; i < src->niov; i++) {
+        struct iovec *cur = sortelems[i].src_iov;
+        ptrdiff_t rewind = 0;
+
+        /* Detect overlap */
+        if (last_end && last_end > cur->iov_base) {
+            rewind = last_end - cur->iov_base;
+        }
+
+        sortelems[i].dest_base = buf - rewind;
+        buf += cur->iov_len - MIN(rewind, cur->iov_len);
+        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+    }
+
+    /* Sort by source iovec index and build destination iovec */
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+    for (i = 0; i < src->niov; i++) {
+        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+    }
+}
+
+void iov_discard_undo(IOVDiscardUndo *undo)
+{
+    /* Restore original iovec if it was modified */
+    if (undo->modified_iov) {
+        *undo->modified_iov = undo->orig;
+    }
+}
+
+size_t iov_discard_front_undoable(struct iovec **iov,
+                                  unsigned int *iov_cnt,
+                                  size_t bytes,
+                                  IOVDiscardUndo *undo)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    if (undo) {
+        undo->modified_iov = NULL;
+    }
+
+    for (cur = *iov; *iov_cnt > 0; cur++) {
+        if (cur->iov_len > bytes) {
+            if (undo) {
+                undo->modified_iov = cur;
+                undo->orig = *cur;
+            }
+
+            cur->iov_base += bytes;
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        *iov_cnt -= 1;
+    }
+
+    *iov = cur;
+    return total;
+}
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+                         size_t bytes)
+{
+    return iov_discard_front_undoable(iov, iov_cnt, bytes, NULL);
+}
+
+size_t iov_discard_back_undoable(struct iovec *iov,
+                                 unsigned int *iov_cnt,
+                                 size_t bytes,
+                                 IOVDiscardUndo *undo)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    if (undo) {
+        undo->modified_iov = NULL;
+    }
+
+    if (*iov_cnt == 0) {
+        return 0;
+    }
+
+    cur = iov + (*iov_cnt - 1);
+
+    while (*iov_cnt > 0) {
+        if (cur->iov_len > bytes) {
+            if (undo) {
+                undo->modified_iov = cur;
+                undo->orig = *cur;
+            }
+
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        cur--;
+        *iov_cnt -= 1;
+    }
+
+    return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+                        size_t bytes)
+{
+    return iov_discard_back_undoable(iov, iov_cnt, bytes, NULL);
+}
+
+void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
+{
+    size_t total;
+    unsigned int niov = qiov->niov;
+
+    assert(qiov->size >= bytes);
+    total = iov_discard_back(qiov->iov, &niov, bytes);
+    assert(total == bytes);
+
+    qiov->niov = niov;
+    qiov->size -= bytes;
+}
diff --git a/util/iova-tree.c b/util/iova-tree.c
new file mode 100644
index 000000000..7990692cb
--- /dev/null
+++ b/util/iova-tree.c
@@ -0,0 +1,114 @@
+/*
+ * IOVA tree implementation based on GTree.
+ *
+ * Copyright 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *  Peter Xu 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iova-tree.h"
+
+struct IOVATree {
+    GTree *tree;
+};
+
+static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+    const DMAMap *m1 = a, *m2 = b;
+
+    if (m1->iova > m2->iova + m2->size) {
+        return 1;
+    }
+
+    if (m1->iova + m1->size < m2->iova) {
+        return -1;
+    }
+
+    /* Overlapped */
+    return 0;
+}
+
+IOVATree *iova_tree_new(void)
+{
+    IOVATree *iova_tree = g_new0(IOVATree, 1);
+
+    /* We don't have values actually, no need to free */
+    iova_tree->tree = g_tree_new_full(iova_tree_compare, NULL, g_free, NULL);
+
+    return iova_tree;
+}
+
+DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map)
+{
+    return g_tree_lookup(tree->tree, map);
+}
+
+DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova)
+{
+    DMAMap map = { .iova = iova, .size = 0 };
+
+    return iova_tree_find(tree, &map);
+}
+
+static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range)
+{
+    /* Key and value are sharing the same range data */
+    g_tree_insert(gtree, range, range);
+}
+
+int iova_tree_insert(IOVATree *tree, DMAMap *map)
+{
+    DMAMap *new;
+
+    if (map->iova + map->size < map->iova || map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* We don't allow to insert range that overlaps with existings */
+    if (iova_tree_find(tree, map)) {
+        return IOVA_ERR_OVERLAP;
+    }
+
+    new = g_new0(DMAMap, 1);
+    memcpy(new, map, sizeof(*new));
+    iova_tree_insert_internal(tree->tree, new);
+
+    return IOVA_OK;
+}
+
+static gboolean iova_tree_traverse(gpointer key, gpointer value,
+                                gpointer data)
+{
+    iova_tree_iterator iterator = data;
+    DMAMap *map = key;
+
+    g_assert(key == value);
+
+    return iterator(map);
+}
+
+void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
+{
+    g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
+}
+
+int iova_tree_remove(IOVATree *tree, DMAMap *map)
+{
+    DMAMap *overlap;
+
+    while ((overlap = iova_tree_find(tree, map))) {
+        g_tree_remove(tree->tree, overlap);
+    }
+
+    return IOVA_OK;
+}
+
+void iova_tree_destroy(IOVATree *tree)
+{
+    g_tree_destroy(tree->tree);
+    g_free(tree);
+}
diff --git a/util/keyval.c b/util/keyval.c
new file mode 100644
index 000000000..7f625ad33
--- /dev/null
+++ b/util/keyval.c
@@ -0,0 +1,469 @@
+/*
+ * Parsing KEY=VALUE,... strings
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * KEY=VALUE,... syntax:
+ *
+ *   key-vals     = [ key-val { ',' key-val } [ ',' ] ]
+ *   key-val      = key '=' val | help
+ *   key          = key-fragment { '.' key-fragment }
+ *   key-fragment = / [^=,.]+ /
+ *   val          = { / [^,]+ / | ',,' }
+ *   help         = 'help' | '?'
+ *
+ * Semantics defined by reduction to JSON:
+ *
+ *   key-vals specifies a JSON object, i.e. a tree whose root is an
+ *   object, inner nodes other than the root are objects or arrays,
+ *   and leaves are strings.
+ *
+ *   Each key-val = key-fragment '.' ... '=' val specifies a path from
+ *   root to a leaf (left of '='), and the leaf's value (right of
+ *   '=').
+ *
+ *   A path from the root is defined recursively:
+ *       L '.' key-fragment is a child of the node denoted by path L
+ *       key-fragment is a child of the tree root
+ *   If key-fragment is numeric, the parent is an array and the child
+ *   is its key-fragment-th member, counting from zero.
+ *   Else, the parent is an object, and the child is its member named
+ *   key-fragment.
+ *
+ *   This constrains inner nodes to be either array or object.  The
+ *   constraints must be satisfiable.  Counter-example: a.b=1,a=2 is
+ *   not, because root.a must be an object to satisfy a.b=1 and a
+ *   string to satisfy a=2.
+ *
+ *   Array subscripts can occur in any order, but the set of
+ *   subscripts must not have gaps.  For instance, a.1=v is not okay,
+ *   because root.a[0] is missing.
+ *
+ *   If multiple key-val denote the same leaf, the last one determines
+ *   the value.
+ *
+ * Key-fragments must be valid QAPI names or consist only of decimal
+ * digits.
+ *
+ * The length of any key-fragment must be between 1 and 127.
+ *
+ * If any key-val is help, the object is to be treated as a help
+ * request.
+ *
+ * Design flaw: there is no way to denote an empty array or non-root
+ * object.  While interpreting "key absent" as empty seems natural
+ * (removing a key-val from the input string removes the member when
+ * there are more, so why not when it's the last), it doesn't work:
+ * "key absent" already means "optional object/array absent", which
+ * isn't the same as "empty object/array present".
+ *
+ * Design flaw: scalar values can only be strings; there is no way to
+ * denote numbers, true, false or null.  The special QObject input
+ * visitor returned by qobject_input_visitor_new_keyval() mostly hides
+ * this by automatically converting strings to the type the visitor
+ * expects.  Breaks down for type 'any', where the visitor's
+ * expectation isn't clear.  Code visiting 'any' needs to do the
+ * conversion itself, but only when using this keyval visitor.
+ * Awkward.  Note that we carefully restrict alternate types to avoid
+ * similar ambiguity.
+ *
+ * Alternative syntax for use with an implied key:
+ *
+ *   key-vals     = [ key-val-1st { ',' key-val } [ ',' ] ]
+ *   key-val-1st  = val-no-key | key-val
+ *   val-no-key   = / [^=,]+ / - help
+ *
+ * where val-no-key is syntactic sugar for implied-key=val-no-key.
+ *
+ * Note that you can't use the sugared form when the value contains
+ * '=' or ','.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/help_option.h"
+#include "qemu/option.h"
+
+/*
+ * Convert @key to a list index.
+ * Convert all leading decimal digits to a (non-negative) number,
+ * capped at INT_MAX.
+ * If @end is non-null, assign a pointer to the first character after
+ * the number to *@end.
+ * Else, fail if any characters follow.
+ * On success, return the converted number.
+ * On failure, return a negative value.
+ * Note: since only digits are converted, no two keys can map to the
+ * same number, except by overflow to INT_MAX.
+ */
+static int key_to_index(const char *key, const char **end)
+{
+    int ret;
+    unsigned long index;
+
+    if (*key < '0' || *key > '9') {
+        return -EINVAL;
+    }
+    ret = qemu_strtoul(key, end, 10, &index);
+    if (ret) {
+        return ret == -ERANGE ? INT_MAX : ret;
+    }
+    return index <= INT_MAX ? index : INT_MAX;
+}
+
+/*
+ * Ensure @cur maps @key_in_cur the right way.
+ * If @value is null, it needs to map to a QDict, else to this
+ * QString.
+ * If @cur doesn't have @key_in_cur, put an empty QDict or @value,
+ * respectively.
+ * Else, if it needs to map to a QDict, and already does, do nothing.
+ * Else, if it needs to map to this QString, and already maps to a
+ * QString, replace it by @value.
+ * Else, fail because we have conflicting needs on how to map
+ * @key_in_cur.
+ * In any case, take over the reference to @value, i.e. if the caller
+ * wants to hold on to a reference, it needs to qobject_ref().
+ * Use @key up to @key_cursor to identify the key in error messages.
+ * On success, return the mapped value.
+ * On failure, store an error through @errp and return NULL.
+ */
+static QObject *keyval_parse_put(QDict *cur,
+                                 const char *key_in_cur, QString *value,
+                                 const char *key, const char *key_cursor,
+                                 Error **errp)
+{
+    QObject *old, *new;
+
+    old = qdict_get(cur, key_in_cur);
+    if (old) {
+        if (qobject_type(old) != (value ? QTYPE_QSTRING : QTYPE_QDICT)) {
+            error_setg(errp, "Parameters '%.*s.*' used inconsistently",
+                       (int)(key_cursor - key), key);
+            qobject_unref(value);
+            return NULL;
+        }
+        if (!value) {
+            return old;         /* already QDict, do nothing */
+        }
+        new = QOBJECT(value);   /* replacement */
+    } else {
+        new = value ? QOBJECT(value) : QOBJECT(qdict_new());
+    }
+    qdict_put_obj(cur, key_in_cur, new);
+    return new;
+}
+
+/*
+ * Parse one parameter from @params.
+ *
+ * If we're looking at KEY=VALUE, store result in @qdict.
+ * The first fragment of KEY applies to @qdict.  Subsequent fragments
+ * apply to nested QDicts, which are created on demand.  @implied_key
+ * is as in keyval_parse().
+ *
+ * If we're looking at "help" or "?", set *help to true.
+ *
+ * On success, return a pointer to the next parameter, or else to '\0'.
+ * On failure, return NULL.
+ */
+static const char *keyval_parse_one(QDict *qdict, const char *params,
+                                    const char *implied_key, bool *help,
+                                    Error **errp)
+{
+    const char *key, *key_end, *val_end, *s, *end;
+    size_t len;
+    char key_in_cur[128];
+    QDict *cur;
+    int ret;
+    QObject *next;
+    QString *val;
+
+    key = params;
+    val_end = NULL;
+    len = strcspn(params, "=,");
+    if (len && key[len] != '=') {
+        if (starts_with_help_option(key) == len) {
+            *help = true;
+            s = key + len;
+            if (*s == ',') {
+                s++;
+            }
+            return s;
+        }
+        if (implied_key) {
+            /* Desugar implied key */
+            key = implied_key;
+            val_end = params + len;
+            len = strlen(implied_key);
+        }
+    }
+    key_end = key + len;
+
+    /*
+     * Loop over key fragments: @s points to current fragment, it
+     * applies to @cur.  @key_in_cur[] holds the previous fragment.
+     */
+    cur = qdict;
+    s = key;
+    for (;;) {
+        /* Want a key index (unless it's first) or a QAPI name */
+        if (s != key && key_to_index(s, &end) >= 0) {
+            len = end - s;
+        } else {
+            ret = parse_qapi_name(s, false);
+            len = ret < 0 ? 0 : ret;
+        }
+        assert(s + len <= key_end);
+        if (!len || (s + len < key_end && s[len] != '.')) {
+            assert(key != implied_key);
+            error_setg(errp, "Invalid parameter '%.*s'",
+                       (int)(key_end - key), key);
+            return NULL;
+        }
+        if (len >= sizeof(key_in_cur)) {
+            assert(key != implied_key);
+            error_setg(errp, "Parameter%s '%.*s' is too long",
+                       s != key || s + len != key_end ? " fragment" : "",
+                       (int)len, s);
+            return NULL;
+        }
+
+        if (s != key) {
+            next = keyval_parse_put(cur, key_in_cur, NULL,
+                                    key, s - 1, errp);
+            if (!next) {
+                return NULL;
+            }
+            cur = qobject_to(QDict, next);
+            assert(cur);
+        }
+
+        memcpy(key_in_cur, s, len);
+        key_in_cur[len] = 0;
+        s += len;
+
+        if (*s != '.') {
+            break;
+        }
+        s++;
+    }
+
+    if (key == implied_key) {
+        assert(!*s);
+        val = qstring_from_substr(params, 0, val_end - params);
+        s = val_end;
+        if (*s == ',') {
+            s++;
+        }
+    } else {
+        if (*s != '=') {
+            error_setg(errp, "Expected '=' after parameter '%.*s'",
+                       (int)(s - key), key);
+            return NULL;
+        }
+        s++;
+
+        val = qstring_new();
+        for (;;) {
+            if (!*s) {
+                break;
+            } else if (*s == ',') {
+                s++;
+                if (*s != ',') {
+                    break;
+                }
+            }
+            qstring_append_chr(val, *s++);
+        }
+    }
+
+    if (!keyval_parse_put(cur, key_in_cur, val, key, key_end, errp)) {
+        return NULL;
+    }
+    return s;
+}
+
+static char *reassemble_key(GSList *key)
+{
+    GString *s = g_string_new("");
+    GSList *p;
+
+    for (p = key; p; p = p->next) {
+        g_string_prepend_c(s, '.');
+        g_string_prepend(s, (char *)p->data);
+    }
+
+    return g_string_free(s, FALSE);
+}
+
+/*
+ * Listify @cur recursively.
+ * Replace QDicts whose keys are all valid list indexes by QLists.
+ * @key_of_cur is the list of key fragments leading up to @cur.
+ * On success, return either @cur or its replacement.
+ * On failure, store an error through @errp and return NULL.
+ */
+static QObject *keyval_listify(QDict *cur, GSList *key_of_cur, Error **errp)
+{
+    GSList key_node;
+    bool has_index, has_member;
+    const QDictEntry *ent;
+    QDict *qdict;
+    QObject *val;
+    char *key;
+    size_t nelt;
+    QObject **elt;
+    int index, max_index, i;
+    QList *list;
+
+    key_node.next = key_of_cur;
+
+    /*
+     * Recursively listify @cur's members, and figure out whether @cur
+     * itself is to be listified.
+     */
+    has_index = false;
+    has_member = false;
+    for (ent = qdict_first(cur); ent; ent = qdict_next(cur, ent)) {
+        if (key_to_index(ent->key, NULL) >= 0) {
+            has_index = true;
+        } else {
+            has_member = true;
+        }
+
+        qdict = qobject_to(QDict, ent->value);
+        if (!qdict) {
+            continue;
+        }
+
+        key_node.data = ent->key;
+        val = keyval_listify(qdict, &key_node, errp);
+        if (!val) {
+            return NULL;
+        }
+        if (val != ent->value) {
+            qdict_put_obj(cur, ent->key, val);
+        }
+    }
+
+    if (has_index && has_member) {
+        key = reassemble_key(key_of_cur);
+        error_setg(errp, "Parameters '%s*' used inconsistently", key);
+        g_free(key);
+        return NULL;
+    }
+    if (!has_index) {
+        return QOBJECT(cur);
+    }
+
+    /* Copy @cur's values to @elt[] */
+    nelt = qdict_size(cur) + 1; /* one extra, for use as sentinel */
+    elt = g_new0(QObject *, nelt);
+    max_index = -1;
+    for (ent = qdict_first(cur); ent; ent = qdict_next(cur, ent)) {
+        index = key_to_index(ent->key, NULL);
+        assert(index >= 0);
+        if (index > max_index) {
+            max_index = index;
+        }
+        /*
+         * We iterate @nelt times.  If we get one exceeding @nelt
+         * here, we will put less than @nelt values into @elt[],
+         * triggering the error in the next loop.
+         */
+        if ((size_t)index >= nelt - 1) {
+            continue;
+        }
+        /* Even though dict keys are distinct, indexes need not be */
+        elt[index] = ent->value;
+    }
+
+    /*
+     * Make a list from @elt[], reporting the first missing element,
+     * if any.
+     * If we dropped an index >= nelt in the previous loop, this loop
+     * will run into the sentinel and report index @nelt missing.
+     */
+    list = qlist_new();
+    assert(!elt[nelt-1]);       /* need the sentinel to be null */
+    for (i = 0; i < MIN(nelt, max_index + 1); i++) {
+        if (!elt[i]) {
+            key = reassemble_key(key_of_cur);
+            error_setg(errp, "Parameter '%s%d' missing", key, i);
+            g_free(key);
+            g_free(elt);
+            qobject_unref(list);
+            return NULL;
+        }
+        qobject_ref(elt[i]);
+        qlist_append_obj(list, elt[i]);
+    }
+
+    g_free(elt);
+    return QOBJECT(list);
+}
+
+/*
+ * Parse @params in QEMU's traditional KEY=VALUE,... syntax.
+ *
+ * If @implied_key, the first KEY= can be omitted.  @implied_key is
+ * implied then, and VALUE can't be empty or contain ',' or '='.
+ *
+ * A parameter "help" or "?" without a value isn't added to the
+ * resulting dictionary, but instead is interpreted as help request.
+ * All other options are parsed and returned normally so that context
+ * specific help can be printed.
+ *
+ * If @p_help is not NULL, store whether help is requested there.
+ * If @p_help is NULL and help is requested, fail.
+ *
+ * On success, return a dictionary of the parsed keys and values.
+ * On failure, store an error through @errp and return NULL.
+ */
+QDict *keyval_parse(const char *params, const char *implied_key,
+                    bool *p_help, Error **errp)
+{
+    QDict *qdict = qdict_new();
+    QObject *listified;
+    const char *s;
+    bool help = false;
+
+    s = params;
+    while (*s) {
+        s = keyval_parse_one(qdict, s, implied_key, &help, errp);
+        if (!s) {
+            qobject_unref(qdict);
+            return NULL;
+        }
+        implied_key = NULL;
+    }
+
+    if (p_help) {
+        *p_help = help;
+    } else if (help) {
+        error_setg(errp, "Help is not available for this option");
+        qobject_unref(qdict);
+        return NULL;
+    }
+
+    listified = keyval_listify(qdict, NULL, errp);
+    if (!listified) {
+        qobject_unref(qdict);
+        return NULL;
+    }
+    assert(listified == QOBJECT(qdict));
+    return qdict;
+}
diff --git a/util/lockcnt.c b/util/lockcnt.c
new file mode 100644
index 000000000..5da36946b
--- /dev/null
+++ b/util/lockcnt.c
@@ -0,0 +1,399 @@
+/*
+ * QemuLockCnt implementation
+ *
+ * Copyright Red Hat, Inc. 2017
+ *
+ * Author:
+ *   Paolo Bonzini 
+ */
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "trace.h"
+
+#ifdef CONFIG_LINUX
+#include "qemu/futex.h"
+
+/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
+ * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
+ * this is not the most relaxing citation I could make...).  It is similar
+ * to mutex2 in the paper.
+ */
+
+#define QEMU_LOCKCNT_STATE_MASK    3
+#define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
+#define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
+#define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
+
+#define QEMU_LOCKCNT_COUNT_STEP    4
+#define QEMU_LOCKCNT_COUNT_SHIFT   2
+
+void qemu_lockcnt_init(QemuLockCnt *lockcnt)
+{
+    lockcnt->count = 0;
+}
+
+void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
+{
+}
+
+/* *val is the current value of lockcnt->count.
+ *
+ * If the lock is free, try a cmpxchg from *val to new_if_free; return
+ * true and set *val to the old value found by the cmpxchg in
+ * lockcnt->count.
+ *
+ * If the lock is taken, wait for it to be released and return false
+ * *without trying again to take the lock*.  Again, set *val to the
+ * new value of lockcnt->count.
+ *
+ * If *waited is true on return, new_if_free's bottom two bits must not
+ * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
+ * does not know if there are other waiters.  Furthermore, after *waited
+ * is set the caller has effectively acquired the lock.  If it returns
+ * with the lock not taken, it must wake another futex waiter.
+ */
+static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
+                                         int new_if_free, bool *waited)
+{
+    /* Fast path for when the lock is free.  */
+    if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
+        int expected = *val;
+
+        trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
+        *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
+        if (*val == expected) {
+            trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
+            *val = new_if_free;
+            return true;
+        }
+    }
+
+    /* The slow path moves from locked to waiting if necessary, then
+     * does a futex wait.  Both steps can be repeated ad nauseam,
+     * only getting out of the loop if we can have another shot at the
+     * fast path.  Once we can, get out to compute the new destination
+     * value for the fast path.
+     */
+    while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
+        if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
+            int expected = *val;
+            int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
+
+            trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
+            *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
+            if (*val == expected) {
+                *val = new;
+            }
+            continue;
+        }
+
+        if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
+            *waited = true;
+            trace_lockcnt_futex_wait(lockcnt, *val);
+            qemu_futex_wait(&lockcnt->count, *val);
+            *val = qatomic_read(&lockcnt->count);
+            trace_lockcnt_futex_wait_resume(lockcnt, *val);
+            continue;
+        }
+
+        abort();
+    }
+    return false;
+}
+
+static void lockcnt_wake(QemuLockCnt *lockcnt)
+{
+    trace_lockcnt_futex_wake(lockcnt);
+    qemu_futex_wake(&lockcnt->count, 1);
+}
+
+void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
+{
+    int val = qatomic_read(&lockcnt->count);
+    bool waited = false;
+
+    for (;;) {
+        if (val >= QEMU_LOCKCNT_COUNT_STEP) {
+            int expected = val;
+            val = qatomic_cmpxchg(&lockcnt->count, val,
+                                  val + QEMU_LOCKCNT_COUNT_STEP);
+            if (val == expected) {
+                break;
+            }
+        } else {
+            /* The fast path is (0, unlocked)->(1, unlocked).  */
+            if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
+                                             &waited)) {
+                break;
+            }
+        }
+    }
+
+    /* If we were woken by another thread, we should also wake one because
+     * we are effectively releasing the lock that was given to us.  This is
+     * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
+     * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
+     * wake someone.
+     */
+    if (waited) {
+        lockcnt_wake(lockcnt);
+    }
+}
+
+void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
+{
+    qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
+}
+
+/* Decrement a counter, and return locked if it is decremented to zero.
+ * If the function returns true, it is impossible for the counter to
+ * become nonzero until the next qemu_lockcnt_unlock.
+ */
+bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
+{
+    int val = qatomic_read(&lockcnt->count);
+    int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
+    bool waited = false;
+
+    for (;;) {
+        if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
+            int expected = val;
+            val = qatomic_cmpxchg(&lockcnt->count, val,
+                                  val - QEMU_LOCKCNT_COUNT_STEP);
+            if (val == expected) {
+                break;
+            }
+        } else {
+            /* If count is going 1->0, take the lock. The fast path is
+             * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
+             */
+            if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
+                return true;
+            }
+
+            if (waited) {
+                /* At this point we do not know if there are more waiters.  Assume
+                 * there are.
+                 */
+                locked_state = QEMU_LOCKCNT_STATE_WAITING;
+            }
+        }
+    }
+
+    /* If we were woken by another thread, but we're returning in unlocked
+     * state, we should also wake a thread because we are effectively
+     * releasing the lock that was given to us.  This is the case where
+     * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
+     * bits, and qemu_lockcnt_unlock would find it and wake someone.
+     */
+    if (waited) {
+        lockcnt_wake(lockcnt);
+    }
+    return false;
+}
+
+/* If the counter is one, decrement it and return locked.  Otherwise do
+ * nothing.
+ *
+ * If the function returns true, it is impossible for the counter to
+ * become nonzero until the next qemu_lockcnt_unlock.
+ */
+bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
+{
+    int val = qatomic_read(&lockcnt->count);
+    int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
+    bool waited = false;
+
+    while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
+        /* If count is going 1->0, take the lock. The fast path is
+         * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
+         */
+        if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
+            return true;
+        }
+
+        if (waited) {
+            /* At this point we do not know if there are more waiters.  Assume
+             * there are.
+             */
+            locked_state = QEMU_LOCKCNT_STATE_WAITING;
+        }
+    }
+
+    /* If we were woken by another thread, but we're returning in unlocked
+     * state, we should also wake a thread because we are effectively
+     * releasing the lock that was given to us.  This is the case where
+     * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
+     * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
+     */
+    if (waited) {
+        lockcnt_wake(lockcnt);
+    }
+    return false;
+}
+
+void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
+{
+    int val = qatomic_read(&lockcnt->count);
+    int step = QEMU_LOCKCNT_STATE_LOCKED;
+    bool waited = false;
+
+    /* The third argument is only used if the low bits of val are 0
+     * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
+     * state.
+     */
+    while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
+        if (waited) {
+            /* At this point we do not know if there are more waiters.  Assume
+             * there are.
+             */
+            step = QEMU_LOCKCNT_STATE_WAITING;
+        }
+    }
+}
+
+void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
+{
+    int expected, new, val;
+
+    val = qatomic_read(&lockcnt->count);
+    do {
+        expected = val;
+        new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
+        trace_lockcnt_unlock_attempt(lockcnt, val, new);
+        val = qatomic_cmpxchg(&lockcnt->count, val, new);
+    } while (val != expected);
+
+    trace_lockcnt_unlock_success(lockcnt, val, new);
+    if (val & QEMU_LOCKCNT_STATE_WAITING) {
+        lockcnt_wake(lockcnt);
+    }
+}
+
+void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
+{
+    int expected, new, val;
+
+    val = qatomic_read(&lockcnt->count);
+    do {
+        expected = val;
+        new = val & ~QEMU_LOCKCNT_STATE_MASK;
+        trace_lockcnt_unlock_attempt(lockcnt, val, new);
+        val = qatomic_cmpxchg(&lockcnt->count, val, new);
+    } while (val != expected);
+
+    trace_lockcnt_unlock_success(lockcnt, val, new);
+    if (val & QEMU_LOCKCNT_STATE_WAITING) {
+        lockcnt_wake(lockcnt);
+    }
+}
+
+unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
+{
+    return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
+}
+#else
+void qemu_lockcnt_init(QemuLockCnt *lockcnt)
+{
+    qemu_mutex_init(&lockcnt->mutex);
+    lockcnt->count = 0;
+}
+
+void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
+{
+    qemu_mutex_destroy(&lockcnt->mutex);
+}
+
+void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
+{
+    int old;
+    for (;;) {
+        old = qatomic_read(&lockcnt->count);
+        if (old == 0) {
+            qemu_lockcnt_lock(lockcnt);
+            qemu_lockcnt_inc_and_unlock(lockcnt);
+            return;
+        } else {
+            if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
+                return;
+            }
+        }
+    }
+}
+
+void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
+{
+    qatomic_dec(&lockcnt->count);
+}
+
+/* Decrement a counter, and return locked if it is decremented to zero.
+ * It is impossible for the counter to become nonzero while the mutex
+ * is taken.
+ */
+bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
+{
+    int val = qatomic_read(&lockcnt->count);
+    while (val > 1) {
+        int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
+        if (old != val) {
+            val = old;
+            continue;
+        }
+
+        return false;
+    }
+
+    qemu_lockcnt_lock(lockcnt);
+    if (qatomic_fetch_dec(&lockcnt->count) == 1) {
+        return true;
+    }
+
+    qemu_lockcnt_unlock(lockcnt);
+    return false;
+}
+
+/* Decrement a counter and return locked if it is decremented to zero.
+ * Otherwise do nothing.
+ *
+ * It is impossible for the counter to become nonzero while the mutex
+ * is taken.
+ */
+bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
+{
+    /* No need for acquire semantics if we return false.  */
+    int val = qatomic_read(&lockcnt->count);
+    if (val > 1) {
+        return false;
+    }
+
+    qemu_lockcnt_lock(lockcnt);
+    if (qatomic_fetch_dec(&lockcnt->count) == 1) {
+        return true;
+    }
+
+    qemu_lockcnt_inc_and_unlock(lockcnt);
+    return false;
+}
+
+void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
+{
+    qemu_mutex_lock(&lockcnt->mutex);
+}
+
+void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
+{
+    qatomic_inc(&lockcnt->count);
+    qemu_mutex_unlock(&lockcnt->mutex);
+}
+
+void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
+{
+    qemu_mutex_unlock(&lockcnt->mutex);
+}
+
+unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
+{
+    return qatomic_read(&lockcnt->count);
+}
+#endif
diff --git a/util/log.c b/util/log.c
new file mode 100644
index 000000000..2ee1500be
--- /dev/null
+++ b/util/log.c
@@ -0,0 +1,389 @@
+/*
+ * Logging support
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/range.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "trace/control.h"
+#include "qemu/thread.h"
+#include "qemu/lockable.h"
+
+static char *logfilename;
+static QemuMutex qemu_logfile_mutex;
+QemuLogFile *qemu_logfile;
+int qemu_loglevel;
+static int log_append = 0;
+static GArray *debug_regions;
+
+/* Return the number of characters emitted.  */
+int qemu_log(const char *fmt, ...)
+{
+    int ret = 0;
+    QemuLogFile *logfile;
+
+    rcu_read_lock();
+    logfile = qatomic_rcu_read(&qemu_logfile);
+    if (logfile) {
+        va_list ap;
+        va_start(ap, fmt);
+        ret = vfprintf(logfile->fd, fmt, ap);
+        va_end(ap);
+
+        /* Don't pass back error results.  */
+        if (ret < 0) {
+            ret = 0;
+        }
+    }
+    rcu_read_unlock();
+    return ret;
+}
+
+static void __attribute__((__constructor__)) qemu_logfile_init(void)
+{
+    qemu_mutex_init(&qemu_logfile_mutex);
+}
+
+static void qemu_logfile_free(QemuLogFile *logfile)
+{
+    g_assert(logfile);
+
+    if (logfile->fd != stderr) {
+        fclose(logfile->fd);
+    }
+    g_free(logfile);
+}
+
+static bool log_uses_own_buffers;
+
+/* enable or disable low levels log */
+void qemu_set_log(int log_flags)
+{
+    bool need_to_open_file = false;
+    QemuLogFile *logfile;
+
+    qemu_loglevel = log_flags;
+#ifdef CONFIG_TRACE_LOG
+    qemu_loglevel |= LOG_TRACE;
+#endif
+    /*
+     * In all cases we only log if qemu_loglevel is set.
+     * Also:
+     *   If not daemonized we will always log either to stderr
+     *     or to a file (if there is a logfilename).
+     *   If we are daemonized,
+     *     we will only log if there is a logfilename.
+     */
+    if (qemu_loglevel && (!is_daemonized() || logfilename)) {
+        need_to_open_file = true;
+    }
+    QEMU_LOCK_GUARD(&qemu_logfile_mutex);
+    if (qemu_logfile && !need_to_open_file) {
+        logfile = qemu_logfile;
+        qatomic_rcu_set(&qemu_logfile, NULL);
+        call_rcu(logfile, qemu_logfile_free, rcu);
+    } else if (!qemu_logfile && need_to_open_file) {
+        logfile = g_new0(QemuLogFile, 1);
+        if (logfilename) {
+            logfile->fd = fopen(logfilename, log_append ? "a" : "w");
+            if (!logfile->fd) {
+                g_free(logfile);
+                perror(logfilename);
+                _exit(1);
+            }
+            /* In case we are a daemon redirect stderr to logfile */
+            if (is_daemonized()) {
+                dup2(fileno(logfile->fd), STDERR_FILENO);
+                fclose(logfile->fd);
+                /* This will skip closing logfile in qemu_log_close() */
+                logfile->fd = stderr;
+            }
+        } else {
+            /* Default to stderr if no log file specified */
+            assert(!is_daemonized());
+            logfile->fd = stderr;
+        }
+        /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
+        if (log_uses_own_buffers) {
+            static char logfile_buf[4096];
+
+            setvbuf(logfile->fd, logfile_buf, _IOLBF, sizeof(logfile_buf));
+        } else {
+#if defined(_WIN32)
+            /* Win32 doesn't support line-buffering, so use unbuffered output. */
+            setvbuf(logfile->fd, NULL, _IONBF, 0);
+#else
+            setvbuf(logfile->fd, NULL, _IOLBF, 0);
+#endif
+            log_append = 1;
+        }
+        qatomic_rcu_set(&qemu_logfile, logfile);
+    }
+}
+
+void qemu_log_needs_buffers(void)
+{
+    log_uses_own_buffers = true;
+}
+
+/*
+ * Allow the user to include %d in their logfile which will be
+ * substituted with the current PID. This is useful for debugging many
+ * nested linux-user tasks but will result in lots of logs.
+ *
+ * filename may be NULL. In that case, log output is sent to stderr
+ */
+void qemu_set_log_filename(const char *filename, Error **errp)
+{
+    g_free(logfilename);
+    logfilename = NULL;
+
+    if (filename) {
+            char *pidstr = strstr(filename, "%");
+            if (pidstr) {
+                /* We only accept one %d, no other format strings */
+                if (pidstr[1] != 'd' || strchr(pidstr + 2, '%')) {
+                    error_setg(errp, "Bad logfile format: %s", filename);
+                    return;
+                } else {
+                    logfilename = g_strdup_printf(filename, getpid());
+                }
+            } else {
+                logfilename = g_strdup(filename);
+            }
+    }
+
+    qemu_log_close();
+    qemu_set_log(qemu_loglevel);
+}
+
+/* Returns true if addr is in our debug filter or no filter defined
+ */
+bool qemu_log_in_addr_range(uint64_t addr)
+{
+    if (debug_regions) {
+        int i = 0;
+        for (i = 0; i < debug_regions->len; i++) {
+            Range *range = &g_array_index(debug_regions, Range, i);
+            if (range_contains(range, addr)) {
+                return true;
+            }
+        }
+        return false;
+    } else {
+        return true;
+    }
+}
+
+
+void qemu_set_dfilter_ranges(const char *filter_spec, Error **errp)
+{
+    gchar **ranges = g_strsplit(filter_spec, ",", 0);
+    int i;
+
+    if (debug_regions) {
+        g_array_unref(debug_regions);
+        debug_regions = NULL;
+    }
+
+    debug_regions = g_array_sized_new(FALSE, FALSE,
+                                      sizeof(Range), g_strv_length(ranges));
+    for (i = 0; ranges[i]; i++) {
+        const char *r = ranges[i];
+        const char *range_op, *r2, *e;
+        uint64_t r1val, r2val, lob, upb;
+        struct Range range;
+
+        range_op = strstr(r, "-");
+        r2 = range_op ? range_op + 1 : NULL;
+        if (!range_op) {
+            range_op = strstr(r, "+");
+            r2 = range_op ? range_op + 1 : NULL;
+        }
+        if (!range_op) {
+            range_op = strstr(r, "..");
+            r2 = range_op ? range_op + 2 : NULL;
+        }
+        if (!range_op) {
+            error_setg(errp, "Bad range specifier");
+            goto out;
+        }
+
+        if (qemu_strtou64(r, &e, 0, &r1val)
+            || e != range_op) {
+            error_setg(errp, "Invalid number to the left of %.*s",
+                       (int)(r2 - range_op), range_op);
+            goto out;
+        }
+        if (qemu_strtou64(r2, NULL, 0, &r2val)) {
+            error_setg(errp, "Invalid number to the right of %.*s",
+                       (int)(r2 - range_op), range_op);
+            goto out;
+        }
+
+        switch (*range_op) {
+        case '+':
+            lob = r1val;
+            upb = r1val + r2val - 1;
+            break;
+        case '-':
+            upb = r1val;
+            lob = r1val - (r2val - 1);
+            break;
+        case '.':
+            lob = r1val;
+            upb = r2val;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (lob > upb) {
+            error_setg(errp, "Invalid range");
+            goto out;
+        }
+        range_set_bounds(&range, lob, upb);
+        g_array_append_val(debug_regions, range);
+    }
+out:
+    g_strfreev(ranges);
+}
+
+/* fflush() the log file */
+void qemu_log_flush(void)
+{
+    QemuLogFile *logfile;
+
+    rcu_read_lock();
+    logfile = qatomic_rcu_read(&qemu_logfile);
+    if (logfile) {
+        fflush(logfile->fd);
+    }
+    rcu_read_unlock();
+}
+
+/* Close the log file */
+void qemu_log_close(void)
+{
+    QemuLogFile *logfile;
+
+    qemu_mutex_lock(&qemu_logfile_mutex);
+    logfile = qemu_logfile;
+
+    if (logfile) {
+        qatomic_rcu_set(&qemu_logfile, NULL);
+        call_rcu(logfile, qemu_logfile_free, rcu);
+    }
+    qemu_mutex_unlock(&qemu_logfile_mutex);
+}
+
+const QEMULogItem qemu_log_items[] = {
+    { CPU_LOG_TB_OUT_ASM, "out_asm",
+      "show generated host assembly code for each compiled TB" },
+    { CPU_LOG_TB_IN_ASM, "in_asm",
+      "show target assembly code for each compiled TB" },
+    { CPU_LOG_TB_OP, "op",
+      "show micro ops for each compiled TB" },
+    { CPU_LOG_TB_OP_OPT, "op_opt",
+      "show micro ops after optimization" },
+    { CPU_LOG_TB_OP_IND, "op_ind",
+      "show micro ops before indirect lowering" },
+    { CPU_LOG_INT, "int",
+      "show interrupts/exceptions in short format" },
+    { CPU_LOG_EXEC, "exec",
+      "show trace before each executed TB (lots of logs)" },
+    { CPU_LOG_TB_CPU, "cpu",
+      "show CPU registers before entering a TB (lots of logs)" },
+    { CPU_LOG_TB_FPU, "fpu",
+      "include FPU registers in the 'cpu' logging" },
+    { CPU_LOG_MMU, "mmu",
+      "log MMU-related activities" },
+    { CPU_LOG_PCALL, "pcall",
+      "x86 only: show protected mode far calls/returns/exceptions" },
+    { CPU_LOG_RESET, "cpu_reset",
+      "show CPU state before CPU resets" },
+    { LOG_UNIMP, "unimp",
+      "log unimplemented functionality" },
+    { LOG_GUEST_ERROR, "guest_errors",
+      "log when the guest OS does something invalid (eg accessing a\n"
+      "non-existent register)" },
+    { CPU_LOG_PAGE, "page",
+      "dump pages at beginning of user mode emulation" },
+    { CPU_LOG_TB_NOCHAIN, "nochain",
+      "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
+      "complete traces" },
+#ifdef CONFIG_PLUGIN
+    { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
+#endif
+    { LOG_STRACE, "strace",
+      "log every user-mode syscall, its input, and its result" },
+    { 0, NULL, NULL },
+};
+
+/* takes a comma separated list of log masks. Return 0 if error. */
+int qemu_str_to_log_mask(const char *str)
+{
+    const QEMULogItem *item;
+    int mask = 0;
+    char **parts = g_strsplit(str, ",", 0);
+    char **tmp;
+
+    for (tmp = parts; tmp && *tmp; tmp++) {
+        if (g_str_equal(*tmp, "all")) {
+            for (item = qemu_log_items; item->mask != 0; item++) {
+                mask |= item->mask;
+            }
+#ifdef CONFIG_TRACE_LOG
+        } else if (g_str_has_prefix(*tmp, "trace:") && (*tmp)[6] != '\0') {
+            trace_enable_events((*tmp) + 6);
+            mask |= LOG_TRACE;
+#endif
+        } else {
+            for (item = qemu_log_items; item->mask != 0; item++) {
+                if (g_str_equal(*tmp, item->name)) {
+                    goto found;
+                }
+            }
+            goto error;
+        found:
+            mask |= item->mask;
+        }
+    }
+
+    g_strfreev(parts);
+    return mask;
+
+ error:
+    g_strfreev(parts);
+    return 0;
+}
+
+void qemu_print_log_usage(FILE *f)
+{
+    const QEMULogItem *item;
+    fprintf(f, "Log items (comma separated):\n");
+    for (item = qemu_log_items; item->mask != 0; item++) {
+        fprintf(f, "%-15s %s\n", item->name, item->help);
+    }
+#ifdef CONFIG_TRACE_LOG
+    fprintf(f, "trace:PATTERN   enable trace events\n");
+    fprintf(f, "\nUse \"-d trace:help\" to get a list of trace events.\n\n");
+#endif
+}
diff --git a/util/main-loop.c b/util/main-loop.c
new file mode 100644
index 000000000..6470f8eae
--- /dev/null
+++ b/util/main-loop.c
@@ -0,0 +1,644 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/timer.h"
+#include "sysemu/qtest.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "block/aio.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+
+#ifndef _WIN32
+#include 
+#endif
+
+#ifndef _WIN32
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them.  We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+static void sigfd_handler(void *opaque)
+{
+    int fd = (intptr_t)opaque;
+    struct qemu_signalfd_siginfo info;
+    struct sigaction action;
+    ssize_t len;
+
+    while (1) {
+        do {
+            len = read(fd, &info, sizeof(info));
+        } while (len == -1 && errno == EINTR);
+
+        if (len == -1 && errno == EAGAIN) {
+            break;
+        }
+
+        if (len != sizeof(info)) {
+            error_report("read from sigfd returned %zd: %s", len,
+                         g_strerror(errno));
+            return;
+        }
+
+        sigaction(info.ssi_signo, NULL, &action);
+        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
+            sigaction_invoke(&action, &info);
+        } else if (action.sa_handler) {
+            action.sa_handler(info.ssi_signo);
+        }
+    }
+}
+
+static int qemu_signal_init(Error **errp)
+{
+    int sigfd;
+    sigset_t set;
+
+    /*
+     * SIG_IPI must be blocked in the main thread and must not be caught
+     * by sigwait() in the signal thread. Otherwise, the cpu thread will
+     * not catch it reliably.
+     */
+    sigemptyset(&set);
+    sigaddset(&set, SIG_IPI);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    sigaddset(&set, SIGBUS);
+    /* SIGINT cannot be handled via signalfd, so that ^C can be used
+     * to interrupt QEMU when it is being run under gdb.  SIGHUP and
+     * SIGTERM are also handled asynchronously, even though it is not
+     * strictly necessary, because they use the same handler as SIGINT.
+     */
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+    sigdelset(&set, SIG_IPI);
+    sigfd = qemu_signalfd(&set);
+    if (sigfd == -1) {
+        error_setg_errno(errp, errno, "failed to create signalfd");
+        return -errno;
+    }
+
+    fcntl_setfl(sigfd, O_NONBLOCK);
+
+    qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
+
+    return 0;
+}
+
+#else /* _WIN32 */
+
+static int qemu_signal_init(Error **errp)
+{
+    return 0;
+}
+#endif
+
+static AioContext *qemu_aio_context;
+static QEMUBH *qemu_notify_bh;
+
+static void notify_event_cb(void *opaque)
+{
+    /* No need to do anything; this bottom half is only used to
+     * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
+     */
+}
+
+AioContext *qemu_get_aio_context(void)
+{
+    return qemu_aio_context;
+}
+
+void qemu_notify_event(void)
+{
+    if (!qemu_aio_context) {
+        return;
+    }
+    qemu_bh_schedule(qemu_notify_bh);
+}
+
+static GArray *gpollfds;
+
+int qemu_init_main_loop(Error **errp)
+{
+    int ret;
+    GSource *src;
+
+    init_clocks(qemu_timer_notify_cb);
+
+    ret = qemu_signal_init(errp);
+    if (ret) {
+        return ret;
+    }
+
+    qemu_aio_context = aio_context_new(errp);
+    if (!qemu_aio_context) {
+        return -EMFILE;
+    }
+    qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
+    gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+    src = aio_get_g_source(qemu_aio_context);
+    g_source_set_name(src, "aio-context");
+    g_source_attach(src, NULL);
+    g_source_unref(src);
+    src = iohandler_get_g_source();
+    g_source_set_name(src, "io-handler");
+    g_source_attach(src, NULL);
+    g_source_unref(src);
+    return 0;
+}
+
+static int max_priority;
+
+#ifndef _WIN32
+static int glib_pollfds_idx;
+static int glib_n_poll_fds;
+
+void qemu_fd_register(int fd)
+{
+}
+
+static void glib_pollfds_fill(int64_t *cur_timeout)
+{
+    GMainContext *context = g_main_context_default();
+    int timeout = 0;
+    int64_t timeout_ns;
+    int n;
+
+    g_main_context_prepare(context, &max_priority);
+
+    glib_pollfds_idx = gpollfds->len;
+    n = glib_n_poll_fds;
+    do {
+        GPollFD *pfds;
+        glib_n_poll_fds = n;
+        g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
+        pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
+        n = g_main_context_query(context, max_priority, &timeout, pfds,
+                                 glib_n_poll_fds);
+    } while (n != glib_n_poll_fds);
+
+    if (timeout < 0) {
+        timeout_ns = -1;
+    } else {
+        timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
+    }
+
+    *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
+}
+
+static void glib_pollfds_poll(void)
+{
+    GMainContext *context = g_main_context_default();
+    GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
+
+    if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
+        g_main_context_dispatch(context);
+    }
+}
+
+#define MAX_MAIN_LOOP_SPIN (1000)
+
+static int os_host_main_loop_wait(int64_t timeout)
+{
+    GMainContext *context = g_main_context_default();
+    int ret;
+
+    g_main_context_acquire(context);
+
+    glib_pollfds_fill(&timeout);
+
+    qemu_mutex_unlock_iothread();
+    replay_mutex_unlock();
+
+    ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
+
+    replay_mutex_lock();
+    qemu_mutex_lock_iothread();
+
+    glib_pollfds_poll();
+
+    g_main_context_release(context);
+
+    return ret;
+}
+#else
+/***********************************************************/
+/* Polling handling */
+
+typedef struct PollingEntry {
+    PollingFunc *func;
+    void *opaque;
+    struct PollingEntry *next;
+} PollingEntry;
+
+static PollingEntry *first_polling_entry;
+
+int qemu_add_polling_cb(PollingFunc *func, void *opaque)
+{
+    PollingEntry **ppe, *pe;
+    pe = g_malloc0(sizeof(PollingEntry));
+    pe->func = func;
+    pe->opaque = opaque;
+    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
+    *ppe = pe;
+    return 0;
+}
+
+void qemu_del_polling_cb(PollingFunc *func, void *opaque)
+{
+    PollingEntry **ppe, *pe;
+    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
+        pe = *ppe;
+        if (pe->func == func && pe->opaque == opaque) {
+            *ppe = pe->next;
+            g_free(pe);
+            break;
+        }
+    }
+}
+
+/***********************************************************/
+/* Wait objects support */
+typedef struct WaitObjects {
+    int num;
+    int revents[MAXIMUM_WAIT_OBJECTS + 1];
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
+    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
+} WaitObjects;
+
+static WaitObjects wait_objects = {0};
+
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
+{
+    WaitObjects *w = &wait_objects;
+    if (w->num >= MAXIMUM_WAIT_OBJECTS) {
+        return -1;
+    }
+    w->events[w->num] = handle;
+    w->func[w->num] = func;
+    w->opaque[w->num] = opaque;
+    w->revents[w->num] = 0;
+    w->num++;
+    return 0;
+}
+
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
+{
+    int i, found;
+    WaitObjects *w = &wait_objects;
+
+    found = 0;
+    for (i = 0; i < w->num; i++) {
+        if (w->events[i] == handle) {
+            found = 1;
+        }
+        if (found) {
+            w->events[i] = w->events[i + 1];
+            w->func[i] = w->func[i + 1];
+            w->opaque[i] = w->opaque[i + 1];
+            w->revents[i] = w->revents[i + 1];
+        }
+    }
+    if (found) {
+        w->num--;
+    }
+}
+
+void qemu_fd_register(int fd)
+{
+    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
+                   FD_READ | FD_ACCEPT | FD_CLOSE |
+                   FD_CONNECT | FD_WRITE | FD_OOB);
+}
+
+static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
+                        fd_set *xfds)
+{
+    int nfds = -1;
+    int i;
+
+    for (i = 0; i < pollfds->len; i++) {
+        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
+        int fd = pfd->fd;
+        int events = pfd->events;
+        if (events & G_IO_IN) {
+            FD_SET(fd, rfds);
+            nfds = MAX(nfds, fd);
+        }
+        if (events & G_IO_OUT) {
+            FD_SET(fd, wfds);
+            nfds = MAX(nfds, fd);
+        }
+        if (events & G_IO_PRI) {
+            FD_SET(fd, xfds);
+            nfds = MAX(nfds, fd);
+        }
+    }
+    return nfds;
+}
+
+static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
+                         fd_set *wfds, fd_set *xfds)
+{
+    int i;
+
+    for (i = 0; i < pollfds->len; i++) {
+        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
+        int fd = pfd->fd;
+        int revents = 0;
+
+        if (FD_ISSET(fd, rfds)) {
+            revents |= G_IO_IN;
+        }
+        if (FD_ISSET(fd, wfds)) {
+            revents |= G_IO_OUT;
+        }
+        if (FD_ISSET(fd, xfds)) {
+            revents |= G_IO_PRI;
+        }
+        pfd->revents = revents & pfd->events;
+    }
+}
+
+static int os_host_main_loop_wait(int64_t timeout)
+{
+    GMainContext *context = g_main_context_default();
+    GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
+    int select_ret = 0;
+    int g_poll_ret, ret, i, n_poll_fds;
+    PollingEntry *pe;
+    WaitObjects *w = &wait_objects;
+    gint poll_timeout;
+    int64_t poll_timeout_ns;
+    static struct timeval tv0;
+    fd_set rfds, wfds, xfds;
+    int nfds;
+
+    g_main_context_acquire(context);
+
+    /* XXX: need to suppress polling by better using win32 events */
+    ret = 0;
+    for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
+        ret |= pe->func(pe->opaque);
+    }
+    if (ret != 0) {
+        g_main_context_release(context);
+        return ret;
+    }
+
+    FD_ZERO(&rfds);
+    FD_ZERO(&wfds);
+    FD_ZERO(&xfds);
+    nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
+    if (nfds >= 0) {
+        select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
+        if (select_ret != 0) {
+            timeout = 0;
+        }
+        if (select_ret > 0) {
+            pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
+        }
+    }
+
+    g_main_context_prepare(context, &max_priority);
+    n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
+                                      poll_fds, ARRAY_SIZE(poll_fds));
+    g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
+
+    for (i = 0; i < w->num; i++) {
+        poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
+        poll_fds[n_poll_fds + i].events = G_IO_IN;
+    }
+
+    if (poll_timeout < 0) {
+        poll_timeout_ns = -1;
+    } else {
+        poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
+    }
+
+    poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
+
+    qemu_mutex_unlock_iothread();
+
+    replay_mutex_unlock();
+
+    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
+
+    replay_mutex_lock();
+
+    qemu_mutex_lock_iothread();
+    if (g_poll_ret > 0) {
+        for (i = 0; i < w->num; i++) {
+            w->revents[i] = poll_fds[n_poll_fds + i].revents;
+        }
+        for (i = 0; i < w->num; i++) {
+            if (w->revents[i] && w->func[i]) {
+                w->func[i](w->opaque[i]);
+            }
+        }
+    }
+
+    if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
+        g_main_context_dispatch(context);
+    }
+
+    g_main_context_release(context);
+
+    return select_ret || g_poll_ret;
+}
+#endif
+
+static NotifierList main_loop_poll_notifiers =
+    NOTIFIER_LIST_INITIALIZER(main_loop_poll_notifiers);
+
+void main_loop_poll_add_notifier(Notifier *notify)
+{
+    notifier_list_add(&main_loop_poll_notifiers, notify);
+}
+
+void main_loop_poll_remove_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
+void main_loop_wait(int nonblocking)
+{
+    MainLoopPoll mlpoll = {
+        .state = MAIN_LOOP_POLL_FILL,
+        .timeout = UINT32_MAX,
+        .pollfds = gpollfds,
+    };
+    int ret;
+    int64_t timeout_ns;
+
+    if (nonblocking) {
+        mlpoll.timeout = 0;
+    }
+
+    /* poll any events */
+    g_array_set_size(gpollfds, 0); /* reset for new iteration */
+    /* XXX: separate device handlers from system ones */
+    notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
+
+    if (mlpoll.timeout == UINT32_MAX) {
+        timeout_ns = -1;
+    } else {
+        timeout_ns = (uint64_t)mlpoll.timeout * (int64_t)(SCALE_MS);
+    }
+
+    timeout_ns = qemu_soonest_timeout(timeout_ns,
+                                      timerlistgroup_deadline_ns(
+                                          &main_loop_tlg));
+
+    ret = os_host_main_loop_wait(timeout_ns);
+    mlpoll.state = ret < 0 ? MAIN_LOOP_POLL_ERR : MAIN_LOOP_POLL_OK;
+    notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
+
+    if (icount_enabled()) {
+        /*
+         * CPU thread can infinitely wait for event after
+         * missing the warp
+         */
+        icount_start_warp_timer();
+    }
+    qemu_clock_run_all_timers();
+}
+
+/* Functions to operate on the main QEMU AioContext.  */
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+{
+    return aio_bh_new(qemu_aio_context, cb, opaque);
+}
+
+/*
+ * Functions to operate on the I/O handler AioContext.
+ * This context runs on top of main loop. We can't reuse qemu_aio_context
+ * because iohandlers mustn't be polled by aio_poll(qemu_aio_context).
+ */
+static AioContext *iohandler_ctx;
+
+static void iohandler_init(void)
+{
+    if (!iohandler_ctx) {
+        iohandler_ctx = aio_context_new(&error_abort);
+    }
+}
+
+AioContext *iohandler_get_aio_context(void)
+{
+    iohandler_init();
+    return iohandler_ctx;
+}
+
+GSource *iohandler_get_g_source(void)
+{
+    iohandler_init();
+    return aio_get_g_source(iohandler_ctx);
+}
+
+void qemu_set_fd_handler(int fd,
+                         IOHandler *fd_read,
+                         IOHandler *fd_write,
+                         void *opaque)
+{
+    iohandler_init();
+    aio_set_fd_handler(iohandler_ctx, fd, false,
+                       fd_read, fd_write, NULL, opaque);
+}
+
+void event_notifier_set_handler(EventNotifier *e,
+                                EventNotifierHandler *handler)
+{
+    iohandler_init();
+    aio_set_event_notifier(iohandler_ctx, e, false,
+                           handler, NULL);
+}
+
+/* reaping of zombies.  right now we're not passing the status to
+   anyone, but it would be possible to add a callback.  */
+#ifndef _WIN32
+typedef struct ChildProcessRecord {
+    int pid;
+    QLIST_ENTRY(ChildProcessRecord) next;
+} ChildProcessRecord;
+
+static QLIST_HEAD(, ChildProcessRecord) child_watches =
+    QLIST_HEAD_INITIALIZER(child_watches);
+
+static QEMUBH *sigchld_bh;
+
+static void sigchld_handler(int signal)
+{
+    qemu_bh_schedule(sigchld_bh);
+}
+
+static void sigchld_bh_handler(void *opaque)
+{
+    ChildProcessRecord *rec, *next;
+
+    QLIST_FOREACH_SAFE(rec, &child_watches, next, next) {
+        if (waitpid(rec->pid, NULL, WNOHANG) == rec->pid) {
+            QLIST_REMOVE(rec, next);
+            g_free(rec);
+        }
+    }
+}
+
+static void qemu_init_child_watch(void)
+{
+    struct sigaction act;
+    sigchld_bh = qemu_bh_new(sigchld_bh_handler, NULL);
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = sigchld_handler;
+    act.sa_flags = SA_NOCLDSTOP;
+    sigaction(SIGCHLD, &act, NULL);
+}
+
+int qemu_add_child_watch(pid_t pid)
+{
+    ChildProcessRecord *rec;
+
+    if (!sigchld_bh) {
+        qemu_init_child_watch();
+    }
+
+    QLIST_FOREACH(rec, &child_watches, next) {
+        if (rec->pid == pid) {
+            return 1;
+        }
+    }
+    rec = g_malloc0(sizeof(ChildProcessRecord));
+    rec->pid = pid;
+    QLIST_INSERT_HEAD(&child_watches, rec, next);
+    return 0;
+}
+#endif
diff --git a/util/memfd.c b/util/memfd.c
new file mode 100644
index 000000000..4a3c07e0b
--- /dev/null
+++ b/util/memfd.c
@@ -0,0 +1,206 @@
+/*
+ * memfd.c
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/memfd.h"
+#include "qemu/host-utils.h"
+
+#if defined CONFIG_LINUX && !defined CONFIG_MEMFD
+#include 
+#include 
+
+int memfd_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+    return syscall(__NR_memfd_create, name, flags);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+}
+#endif
+
+int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
+                      uint64_t hugetlbsize, unsigned int seals, Error **errp)
+{
+    int htsize = hugetlbsize ? ctz64(hugetlbsize) : 0;
+
+    if (htsize && 1ULL << htsize != hugetlbsize) {
+        error_setg(errp, "Hugepage size must be a power of 2");
+        return -1;
+    }
+
+    htsize = htsize << MFD_HUGE_SHIFT;
+
+#ifdef CONFIG_LINUX
+    int mfd = -1;
+    unsigned int flags = MFD_CLOEXEC;
+
+    if (seals) {
+        flags |= MFD_ALLOW_SEALING;
+    }
+    if (hugetlb) {
+        flags |= MFD_HUGETLB;
+        flags |= htsize;
+    }
+    mfd = memfd_create(name, flags);
+    if (mfd < 0) {
+        error_setg_errno(errp, errno,
+                         "failed to create memfd with flags 0x%x", flags);
+        goto err;
+    }
+
+    if (ftruncate(mfd, size) == -1) {
+        error_setg_errno(errp, errno, "failed to resize memfd to %zu", size);
+        goto err;
+    }
+
+    if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+        error_setg_errno(errp, errno, "failed to add seals 0x%x", seals);
+        goto err;
+    }
+
+    return mfd;
+
+err:
+    if (mfd >= 0) {
+        close(mfd);
+    }
+#else
+    error_setg_errno(errp, ENOSYS, "failed to create memfd");
+#endif
+    return -1;
+}
+
+/*
+ * This is a best-effort helper for shared memory allocation, with
+ * optional sealing. The helper will do his best to allocate using
+ * memfd with sealing, but may fallback on other methods without
+ * sealing.
+ */
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+                       int *fd, Error **errp)
+{
+    void *ptr;
+    int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL);
+
+    /* some systems have memfd without sealing */
+    if (mfd == -1) {
+        mfd = qemu_memfd_create(name, size, false, 0, 0, NULL);
+    }
+
+    if (mfd == -1) {
+        const char *tmpdir = g_get_tmp_dir();
+        gchar *fname;
+
+        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+        mfd = mkstemp(fname);
+        unlink(fname);
+        g_free(fname);
+
+        if (mfd == -1 ||
+            ftruncate(mfd, size) == -1) {
+            goto err;
+        }
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+    if (ptr == MAP_FAILED) {
+        goto err;
+    }
+
+    *fd = mfd;
+    return ptr;
+
+err:
+    error_setg_errno(errp, errno, "failed to allocate shared memory");
+    if (mfd >= 0) {
+        close(mfd);
+    }
+    return NULL;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+    if (ptr) {
+        munmap(ptr, size);
+    }
+
+    if (fd != -1) {
+        close(fd);
+    }
+}
+
+enum {
+    MEMFD_KO,
+    MEMFD_OK,
+    MEMFD_TODO
+};
+
+/**
+ * qemu_memfd_alloc_check():
+ *
+ * Check if qemu_memfd_alloc() can allocate, including using a
+ * fallback implementation when host doesn't support memfd.
+ */
+bool qemu_memfd_alloc_check(void)
+{
+    static int memfd_check = MEMFD_TODO;
+
+    if (memfd_check == MEMFD_TODO) {
+        int fd;
+        void *ptr;
+
+        fd = -1;
+        ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL);
+        memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
+        qemu_memfd_free(ptr, 4096, fd);
+    }
+
+    return memfd_check == MEMFD_OK;
+}
+
+/**
+ * qemu_memfd_check():
+ *
+ * Check if host supports memfd.
+ */
+bool qemu_memfd_check(unsigned int flags)
+{
+#ifdef CONFIG_LINUX
+    int mfd = memfd_create("test", flags | MFD_CLOEXEC);
+
+    if (mfd >= 0) {
+        close(mfd);
+        return true;
+    }
+#endif
+
+    return false;
+}
diff --git a/util/meson.build b/util/meson.build
new file mode 100644
index 000000000..a65472f30
--- /dev/null
+++ b/util/meson.build
@@ -0,0 +1,83 @@
+util_ss.add(dependency('threads'))
+util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
+util_ss.add(when: 'CONFIG_ATOMIC64', if_false: files('atomic64.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
+util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
+util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('fdmon-io_uring.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('oslib-posix.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: [files('qemu-openpty.c'), util])
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('qemu-thread-posix.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('memfd.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: files('aio-win32.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: files('event_notifier-win32.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: files('oslib-win32.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: files('qemu-thread-win32.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: winmm)
+util_ss.add(when: 'HAVE_GLIB_WITH_SLICE_ALLOCATOR', if_true: files('qtree.c'))
+util_ss.add(files('envlist.c', 'path.c', 'module.c'))
+util_ss.add(files('host-utils.c'))
+util_ss.add(files('bitmap.c', 'bitops.c'))
+util_ss.add(files('fifo8.c'))
+util_ss.add(files('cacheinfo.c'))
+util_ss.add(files('error.c', 'qemu-error.c'))
+util_ss.add(files('qemu-print.c'))
+util_ss.add(files('id.c'))
+util_ss.add(files('qemu-config.c', 'notify.c'))
+util_ss.add(files('qemu-option.c', 'qemu-progress.c'))
+util_ss.add(files('keyval.c'))
+util_ss.add(files('crc32c.c'))
+util_ss.add(files('uuid.c'))
+util_ss.add(files('getauxval.c'))
+util_ss.add(files('rcu.c'))
+util_ss.add(when: 'CONFIG_MEMBARRIER', if_true: files('sys_membarrier.c'))
+util_ss.add(files('log.c'))
+util_ss.add(files('pagesize.c'))
+util_ss.add(files('qdist.c'))
+util_ss.add(files('qht.c'))
+util_ss.add(files('qsp.c'))
+util_ss.add(files('range.c'))
+util_ss.add(files('stats64.c'))
+util_ss.add(files('systemd.c'))
+util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
+util_ss.add(files('guest-random.c'))
+
+if have_user
+  util_ss.add(files('selfmap.c'))
+endif
+
+if have_system
+  util_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus.c'), gio])
+endif
+
+if have_block
+  util_ss.add(files('aiocb.c', 'async.c', 'aio-wait.c'))
+  util_ss.add(files('base64.c'))
+  util_ss.add(files('buffer.c'))
+  util_ss.add(files('bufferiszero.c'))
+  util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
+  util_ss.add(files('hbitmap.c'))
+  util_ss.add(files('hexdump.c'))
+  util_ss.add(files('iova-tree.c'))
+  util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c'))
+  util_ss.add(files('lockcnt.c'))
+  util_ss.add(files('main-loop.c'))
+  util_ss.add(files('nvdimm-utils.c'))
+  util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
+#  util_ss.add(when: 'CONFIG_LINUX', if_true: [
+#    files('vhost-user-server.c'), vhost_user
+#  ])
+  util_ss.add(files('block-helpers.c'))
+  util_ss.add(files('qemu-coroutine-sleep.c'))
+  util_ss.add(files('qemu-co-shared-resource.c'))
+  util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
+  util_ss.add(files('readline.c'))
+  util_ss.add(files('throttle.c'))
+  util_ss.add(files('timed-average.c'))
+  util_ss.add(when: 'CONFIG_INOTIFY1', if_true: files('filemonitor-inotify.c'),
+                                        if_false: files('filemonitor-stub.c'))
+  util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
+endif
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
new file mode 100644
index 000000000..27dcccd8e
--- /dev/null
+++ b/util/mmap-alloc.c
@@ -0,0 +1,212 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifdef CONFIG_LINUX
+#include 
+#else  /* !CONFIG_LINUX */
+#define MAP_SYNC              0x0
+#define MAP_SHARED_VALIDATE   0x0
+#endif /* CONFIG_LINUX */
+
+#include "qemu/osdep.h"
+#include "qemu/mmap-alloc.h"
+#include "qemu/host-utils.h"
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+#ifdef CONFIG_LINUX
+#include 
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+    struct statfs fs;
+    int ret;
+
+    if (fd != -1) {
+        do {
+            ret = fstatfs(fd, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+            return fs.f_bsize;
+        }
+    }
+#ifdef __sparc__
+    /* SPARC Linux needs greater alignment than the pagesize */
+    return QEMU_VMALLOC_ALIGN;
+#endif
+#endif
+
+    return qemu_real_host_page_size;
+}
+
+size_t qemu_mempath_getpagesize(const char *mem_path)
+{
+#ifdef CONFIG_LINUX
+    struct statfs fs;
+    int ret;
+
+    if (mem_path) {
+        do {
+            ret = statfs(mem_path, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret != 0) {
+            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
+                    strerror(errno));
+            exit(1);
+        }
+
+        if (fs.f_type == HUGETLBFS_MAGIC) {
+            /* It's hugepage, return the huge page size */
+            return fs.f_bsize;
+        }
+    }
+#ifdef __sparc__
+    /* SPARC Linux needs greater alignment than the pagesize */
+    return QEMU_VMALLOC_ALIGN;
+#endif
+#endif
+
+    return qemu_real_host_page_size;
+}
+
+void *qemu_ram_mmap(int fd,
+                    size_t size,
+                    size_t align,
+                    bool shared,
+                    bool is_pmem)
+{
+    int flags;
+    int map_sync_flags = 0;
+    int guardfd;
+    size_t offset;
+    size_t pagesize;
+    size_t total;
+    void *guardptr;
+    void *ptr;
+
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    total = size + align;
+
+#if defined(__powerpc64__) && defined(__linux__)
+    /* On ppc64 mappings in the same segment (aka slice) must share the same
+     * page size. Since we will be re-allocating part of this segment
+     * from the supplied fd, we should make sure to use the same page size, to
+     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
+     * avoid allocating backing store memory.
+     * We do this unless we are using the system page size, in which case
+     * anonymous memory is OK.
+     */
+    flags = MAP_PRIVATE;
+    pagesize = qemu_fd_getpagesize(fd);
+    if (fd == -1 || pagesize == qemu_real_host_page_size) {
+        guardfd = -1;
+        flags |= MAP_ANONYMOUS;
+    } else {
+        guardfd = fd;
+        flags |= MAP_NORESERVE;
+    }
+#else
+    guardfd = -1;
+    pagesize = qemu_real_host_page_size;
+    flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#endif
+
+    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
+
+    if (guardptr == MAP_FAILED) {
+        return MAP_FAILED;
+    }
+
+    assert(is_power_of_2(align));
+    /* Always align to host page size */
+    assert(align >= pagesize);
+
+    flags = MAP_FIXED;
+    flags |= fd == -1 ? MAP_ANONYMOUS : 0;
+    flags |= shared ? MAP_SHARED : MAP_PRIVATE;
+    if (shared && is_pmem) {
+        map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
+    }
+
+    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
+
+    ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+               flags | map_sync_flags, fd, 0);
+
+    if (ptr == MAP_FAILED && map_sync_flags) {
+        if (errno == ENOTSUP) {
+            char *proc_link, *file_name;
+            int len;
+            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+            file_name = g_malloc0(PATH_MAX);
+            len = readlink(proc_link, file_name, PATH_MAX - 1);
+            if (len < 0) {
+                len = 0;
+            }
+            file_name[len] = '\0';
+            fprintf(stderr, "Warning: requesting persistence across crashes "
+                    "for backend file %s failed. Proceeding without "
+                    "persistence, data might become corrupted in case of host "
+                    "crash.\n", file_name);
+            g_free(proc_link);
+            g_free(file_name);
+        }
+        /*
+         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
+         * we will remove these flags to handle compatibility.
+         */
+        ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+                   flags, fd, 0);
+    }
+
+    if (ptr == MAP_FAILED) {
+        munmap(guardptr, total);
+        return MAP_FAILED;
+    }
+
+    if (offset > 0) {
+        munmap(guardptr, offset);
+    }
+
+    /*
+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+     * a guard page guarding against potential buffer overflows.
+     */
+    total -= offset;
+    if (total > size + pagesize) {
+        munmap(ptr + size + pagesize, total - size - pagesize);
+    }
+
+    return ptr;
+}
+
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
+{
+    size_t pagesize;
+
+    if (ptr) {
+        /* Unmap both the RAM block and the guard page */
+#if defined(__powerpc64__) && defined(__linux__)
+        pagesize = qemu_fd_getpagesize(fd);
+#else
+        pagesize = qemu_real_host_page_size;
+#endif
+        munmap(ptr, size + pagesize);
+    }
+}
diff --git a/util/module.c b/util/module.c
new file mode 100644
index 000000000..7661d0f62
--- /dev/null
+++ b/util/module.c
@@ -0,0 +1,354 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#ifdef CONFIG_MODULES
+#include 
+#endif
+#include "qemu/queue.h"
+#include "qemu/module.h"
+#include "qemu/cutils.h"
+#ifdef CONFIG_MODULE_UPGRADES
+#include "qemu-version.h"
+#endif
+
+typedef struct ModuleEntry
+{
+    void (*init)(void);
+    QTAILQ_ENTRY(ModuleEntry) node;
+    module_init_type type;
+} ModuleEntry;
+
+typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
+
+static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+static bool modules_init_done[MODULE_INIT_MAX];
+
+static ModuleTypeList dso_init_list;
+
+static void init_lists(void)
+{
+    static int inited;
+    int i;
+
+    if (inited) {
+        return;
+    }
+
+    for (i = 0; i < MODULE_INIT_MAX; i++) {
+        QTAILQ_INIT(&init_type_list[i]);
+    }
+
+    QTAILQ_INIT(&dso_init_list);
+
+    inited = 1;
+}
+
+
+static ModuleTypeList *find_type(module_init_type type)
+{
+    init_lists();
+
+    return &init_type_list[type];
+}
+
+void register_module_init(void (*fn)(void), module_init_type type)
+{
+    ModuleEntry *e;
+    ModuleTypeList *l;
+
+    e = g_malloc0(sizeof(*e));
+    e->init = fn;
+    e->type = type;
+
+    l = find_type(type);
+
+    QTAILQ_INSERT_TAIL(l, e, node);
+}
+
+void register_dso_module_init(void (*fn)(void), module_init_type type)
+{
+    ModuleEntry *e;
+
+    init_lists();
+
+    e = g_malloc0(sizeof(*e));
+    e->init = fn;
+    e->type = type;
+
+    QTAILQ_INSERT_TAIL(&dso_init_list, e, node);
+}
+
+void module_call_init(module_init_type type)
+{
+    ModuleTypeList *l;
+    ModuleEntry *e;
+
+    if (modules_init_done[type]) {
+        return;
+    }
+
+    l = find_type(type);
+
+    QTAILQ_FOREACH(e, l, node) {
+        e->init();
+    }
+
+    modules_init_done[type] = true;
+}
+
+#ifdef CONFIG_MODULES
+static int module_load_file(const char *fname, bool mayfail, bool export_symbols)
+{
+    GModule *g_module;
+    void (*sym)(void);
+    const char *dsosuf = CONFIG_HOST_DSOSUF;
+    int len = strlen(fname);
+    int suf_len = strlen(dsosuf);
+    ModuleEntry *e, *next;
+    int ret, flags;
+
+    if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) {
+        /* wrong suffix */
+        ret = -EINVAL;
+        goto out;
+    }
+    if (access(fname, F_OK)) {
+        ret = -ENOENT;
+        goto out;
+    }
+
+    assert(QTAILQ_EMPTY(&dso_init_list));
+
+    flags = 0;
+    if (!export_symbols) {
+        flags |= G_MODULE_BIND_LOCAL;
+    }
+    g_module = g_module_open(fname, flags);
+    if (!g_module) {
+        if (!mayfail) {
+            fprintf(stderr, "Failed to open module: %s\n",
+                    g_module_error());
+        }
+        ret = -EINVAL;
+        goto out;
+    }
+    if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) {
+        fprintf(stderr, "Failed to initialize module: %s\n",
+                fname);
+        /* Print some info if this is a QEMU module (but from different build),
+         * this will make debugging user problems easier. */
+        if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) {
+            fprintf(stderr,
+                    "Note: only modules from the same build can be loaded.\n");
+        }
+        g_module_close(g_module);
+        ret = -EINVAL;
+    } else {
+        QTAILQ_FOREACH(e, &dso_init_list, node) {
+            e->init();
+            register_module_init(e->init, e->type);
+        }
+        ret = 0;
+    }
+
+    QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) {
+        QTAILQ_REMOVE(&dso_init_list, e, node);
+        g_free(e);
+    }
+out:
+    return ret;
+}
+
+static const struct {
+    const char *name;
+    const char *dep;
+} module_deps[] = {
+    { "audio-spice",    "ui-spice-core" },
+    { "chardev-spice",  "ui-spice-core" },
+    { "hw-display-qxl", "ui-spice-core" },
+    { "ui-spice-app",   "ui-spice-core" },
+    { "ui-spice-app",   "chardev-spice" },
+
+#ifdef CONFIG_OPENGL
+    { "ui-egl-headless", "ui-opengl"    },
+    { "ui-gtk",          "ui-opengl"    },
+    { "ui-sdl",          "ui-opengl"    },
+    { "ui-spice-core",   "ui-opengl"    },
+#endif
+};
+#endif
+
+bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
+{
+    bool success = false;
+
+#ifdef CONFIG_MODULES
+    char *fname = NULL;
+#ifdef CONFIG_MODULE_UPGRADES
+    char *version_dir;
+#endif
+    const char *search_dir;
+    char *dirs[5];
+    char *module_name;
+    int i = 0, n_dirs = 0;
+    int ret, dep;
+    bool export_symbols = false;
+    static GHashTable *loaded_modules;
+
+    if (!g_module_supported()) {
+        fprintf(stderr, "Module is not supported by system.\n");
+        return false;
+    }
+
+    if (!loaded_modules) {
+        loaded_modules = g_hash_table_new(g_str_hash, g_str_equal);
+    }
+
+    module_name = g_strdup_printf("%s%s", prefix, lib_name);
+
+    for (dep = 0; dep < ARRAY_SIZE(module_deps); dep++) {
+        if (strcmp(module_name, module_deps[dep].name) == 0) {
+            /* we depend on another module */
+            module_load_one("", module_deps[dep].dep, false);
+        }
+        if (strcmp(module_name, module_deps[dep].dep) == 0) {
+            /* another module depends on us */
+            export_symbols = true;
+        }
+    }
+
+    if (g_hash_table_contains(loaded_modules, module_name)) {
+        g_free(module_name);
+        return true;
+    }
+    g_hash_table_add(loaded_modules, module_name);
+
+    search_dir = getenv("QEMU_MODULE_DIR");
+    if (search_dir != NULL) {
+        dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
+    }
+    dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR);
+    dirs[n_dirs++] = g_strdup(qemu_get_exec_dir());
+
+#ifdef CONFIG_MODULE_UPGRADES
+    version_dir = g_strcanon(g_strdup(QEMU_PKGVERSION),
+                             G_CSET_A_2_Z G_CSET_a_2_z G_CSET_DIGITS "+-.~",
+                             '_');
+    dirs[n_dirs++] = g_strdup_printf("/var/run/qemu/%s", version_dir);
+#endif
+
+    assert(n_dirs <= ARRAY_SIZE(dirs));
+
+    for (i = 0; i < n_dirs; i++) {
+        fname = g_strdup_printf("%s/%s%s",
+                dirs[i], module_name, CONFIG_HOST_DSOSUF);
+        ret = module_load_file(fname, mayfail, export_symbols);
+        g_free(fname);
+        fname = NULL;
+        /* Try loading until loaded a module file */
+        if (!ret) {
+            success = true;
+            break;
+        }
+    }
+
+    if (!success) {
+        g_hash_table_remove(loaded_modules, module_name);
+        g_free(module_name);
+    }
+
+    for (i = 0; i < n_dirs; i++) {
+        g_free(dirs[i]);
+    }
+
+#endif
+    return success;
+}
+
+/*
+ * Building devices and other qom objects modular is mostly useful in
+ * case they have dependencies to external shared libraries, so we can
+ * cut down the core qemu library dependencies.  Which is the case for
+ * only a very few devices & objects.
+ *
+ * So with the expectation that this will be rather the exception than
+ * the rule and the list will not gain that many entries, go with a
+ * simple manually maintained list for now.
+ *
+ * The list must be sorted by module (module_load_qom_all() needs this).
+ */
+static struct {
+    const char *type;
+    const char *prefix;
+    const char *module;
+} const qom_modules[] = {
+    { "ccid-card-passthru",    "hw-", "usb-smartcard"         },
+    { "ccid-card-emulated",    "hw-", "usb-smartcard"         },
+    { "usb-redir",             "hw-", "usb-redirect"          },
+    { "qxl-vga",               "hw-", "display-qxl"           },
+    { "qxl",                   "hw-", "display-qxl"           },
+    { "virtio-gpu-device",     "hw-", "display-virtio-gpu"    },
+    { "vhost-user-gpu",        "hw-", "display-virtio-gpu"    },
+    { "virtio-gpu-pci-base",   "hw-", "display-virtio-gpu-pci" },
+    { "virtio-gpu-pci",        "hw-", "display-virtio-gpu-pci" },
+    { "vhost-user-gpu-pci",    "hw-", "display-virtio-gpu-pci" },
+    { "virtio-gpu-ccw",        "hw-", "s390x-virtio-gpu-ccw"   },
+    { "virtio-vga-base",       "hw-", "display-virtio-vga"    },
+    { "virtio-vga",            "hw-", "display-virtio-vga"    },
+    { "vhost-user-vga",        "hw-", "display-virtio-vga"    },
+    { "chardev-braille",       "chardev-", "baum"             },
+    { "chardev-spicevmc",      "chardev-", "spice"            },
+    { "chardev-spiceport",     "chardev-", "spice"            },
+};
+
+static bool module_loaded_qom_all;
+
+void module_load_qom_one(const char *type)
+{
+    int i;
+
+    if (!type) {
+        return;
+    }
+    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
+        if (strcmp(qom_modules[i].type, type) == 0) {
+            module_load_one(qom_modules[i].prefix,
+                            qom_modules[i].module,
+                            false);
+            return;
+        }
+    }
+}
+
+void module_load_qom_all(void)
+{
+    int i;
+
+    if (module_loaded_qom_all) {
+        return;
+    }
+    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
+        if (i > 0 && (strcmp(qom_modules[i - 1].module,
+                             qom_modules[i].module) == 0 &&
+                      strcmp(qom_modules[i - 1].prefix,
+                             qom_modules[i].prefix) == 0)) {
+            /* one module implementing multiple types -> load only once */
+            continue;
+        }
+        module_load_one(qom_modules[i].prefix, qom_modules[i].module, true);
+    }
+    module_loaded_qom_all = true;
+}
diff --git a/util/notify.c b/util/notify.c
new file mode 100644
index 000000000..76bab212a
--- /dev/null
+++ b/util/notify.c
@@ -0,0 +1,76 @@
+/*
+ * Notifier lists
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/notify.h"
+
+void notifier_list_init(NotifierList *list)
+{
+    QLIST_INIT(&list->notifiers);
+}
+
+void notifier_list_add(NotifierList *list, Notifier *notifier)
+{
+    QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_remove(Notifier *notifier)
+{
+    QLIST_REMOVE(notifier, node);
+}
+
+void notifier_list_notify(NotifierList *list, void *data)
+{
+    Notifier *notifier, *next;
+
+    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+        notifier->notify(notifier, data);
+    }
+}
+
+bool notifier_list_empty(NotifierList *list)
+{
+    return QLIST_EMPTY(&list->notifiers);
+}
+
+void notifier_with_return_list_init(NotifierWithReturnList *list)
+{
+    QLIST_INIT(&list->notifiers);
+}
+
+void notifier_with_return_list_add(NotifierWithReturnList *list,
+                                   NotifierWithReturn *notifier)
+{
+    QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_with_return_remove(NotifierWithReturn *notifier)
+{
+    QLIST_REMOVE(notifier, node);
+}
+
+int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data)
+{
+    NotifierWithReturn *notifier, *next;
+    int ret = 0;
+
+    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+        ret = notifier->notify(notifier, data);
+        if (ret != 0) {
+            break;
+        }
+    }
+    return ret;
+}
diff --git a/util/nvdimm-utils.c b/util/nvdimm-utils.c
new file mode 100644
index 000000000..5cc768ca4
--- /dev/null
+++ b/util/nvdimm-utils.c
@@ -0,0 +1,29 @@
+#include "qemu/nvdimm-utils.h"
+#include "hw/mem/nvdimm.h"
+
+static int nvdimm_device_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
+        *list = g_slist_append(*list, DEVICE(obj));
+    }
+
+    object_child_foreach(obj, nvdimm_device_list, opaque);
+    return 0;
+}
+
+/*
+ * inquire NVDIMM devices and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+GSList *nvdimm_get_device_list(void)
+{
+    GSList *list = NULL;
+
+    object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list);
+    return list;
+}
diff --git a/util/osdep.c b/util/osdep.c
new file mode 100644
index 000000000..66d01b916
--- /dev/null
+++ b/util/osdep.c
@@ -0,0 +1,608 @@
+/*
+ * QEMU low level functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+/* Needed early for CONFIG_BSD etc. */
+
+#ifdef CONFIG_SOLARIS
+#include 
+/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
+   discussion about Solaris header problems */
+extern int madvise(char *, size_t, int);
+#endif
+
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+#include "monitor/monitor.h"
+
+static bool fips_enabled = false;
+
+static const char *hw_version = QEMU_HW_VERSION;
+
+int socket_set_cork(int fd, int v)
+{
+#if defined(SOL_TCP) && defined(TCP_CORK)
+    return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
+#else
+    return 0;
+#endif
+}
+
+int socket_set_nodelay(int fd)
+{
+    int v = 1;
+    return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+}
+
+int qemu_madvise(void *addr, size_t len, int advice)
+{
+    if (advice == QEMU_MADV_INVALID) {
+        errno = EINVAL;
+        return -1;
+    }
+#if defined(CONFIG_MADVISE)
+    return madvise(addr, len, advice);
+#elif defined(CONFIG_POSIX_MADVISE)
+    return posix_madvise(addr, len, advice);
+#else
+    errno = EINVAL;
+    return -1;
+#endif
+}
+
+static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
+{
+    g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask));
+    g_assert(!(size & ~qemu_real_host_page_mask));
+
+#ifdef _WIN32
+    DWORD old_protect;
+
+    if (!VirtualProtect(addr, size, prot, &old_protect)) {
+        g_autofree gchar *emsg = g_win32_error_message(GetLastError());
+        error_report("%s: VirtualProtect failed: %s", __func__, emsg);
+        return -1;
+    }
+    return 0;
+#else
+    if (mprotect(addr, size, prot)) {
+        error_report("%s: mprotect failed: %s", __func__, strerror(errno));
+        return -1;
+    }
+    return 0;
+#endif
+}
+
+int qemu_mprotect_rwx(void *addr, size_t size)
+{
+#ifdef _WIN32
+    return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
+#else
+    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
+}
+
+int qemu_mprotect_none(void *addr, size_t size)
+{
+#ifdef _WIN32
+    return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
+#else
+    return qemu_mprotect__osdep(addr, size, PROT_NONE);
+#endif
+}
+
+#ifndef _WIN32
+
+static int fcntl_op_setlk = -1;
+static int fcntl_op_getlk = -1;
+
+/*
+ * Dups an fd and sets the flags
+ */
+int qemu_dup_flags(int fd, int flags)
+{
+    int ret;
+    int serrno;
+    int dup_flags;
+
+    ret = qemu_dup(fd);
+    if (ret == -1) {
+        goto fail;
+    }
+
+    dup_flags = fcntl(ret, F_GETFL);
+    if (dup_flags == -1) {
+        goto fail;
+    }
+
+    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+        errno = EINVAL;
+        goto fail;
+    }
+
+    /* Set/unset flags that we can with fcntl */
+    if (fcntl(ret, F_SETFL, flags) == -1) {
+        goto fail;
+    }
+
+    /* Truncate the file in the cases that open() would truncate it */
+    if (flags & O_TRUNC ||
+            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+        if (ftruncate(ret, 0) == -1) {
+            goto fail;
+        }
+    }
+
+    return ret;
+
+fail:
+    serrno = errno;
+    if (ret != -1) {
+        close(ret);
+    }
+    errno = serrno;
+    return -1;
+}
+
+int qemu_dup(int fd)
+{
+    int ret;
+#ifdef F_DUPFD_CLOEXEC
+    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    ret = dup(fd);
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+    }
+#endif
+    return ret;
+}
+
+static int qemu_parse_fdset(const char *param)
+{
+    return qemu_parse_fd(param);
+}
+
+static void qemu_probe_lock_ops(void)
+{
+    if (fcntl_op_setlk == -1) {
+#ifdef F_OFD_SETLK
+        int fd;
+        int ret;
+        struct flock fl = {
+            .l_whence = SEEK_SET,
+            .l_start  = 0,
+            .l_len    = 0,
+            .l_type   = F_WRLCK,
+        };
+
+        fd = open("/dev/null", O_RDWR);
+        if (fd < 0) {
+            fprintf(stderr,
+                    "Failed to open /dev/null for OFD lock probing: %s\n",
+                    strerror(errno));
+            fcntl_op_setlk = F_SETLK;
+            fcntl_op_getlk = F_GETLK;
+            return;
+        }
+        ret = fcntl(fd, F_OFD_GETLK, &fl);
+        close(fd);
+        if (!ret) {
+            fcntl_op_setlk = F_OFD_SETLK;
+            fcntl_op_getlk = F_OFD_GETLK;
+        } else {
+            fcntl_op_setlk = F_SETLK;
+            fcntl_op_getlk = F_GETLK;
+        }
+#else
+        fcntl_op_setlk = F_SETLK;
+        fcntl_op_getlk = F_GETLK;
+#endif
+    }
+}
+
+bool qemu_has_ofd_lock(void)
+{
+    qemu_probe_lock_ops();
+#ifdef F_OFD_SETLK
+    return fcntl_op_setlk == F_OFD_SETLK;
+#else
+    return false;
+#endif
+}
+
+static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
+{
+    int ret;
+    struct flock fl = {
+        .l_whence = SEEK_SET,
+        .l_start  = start,
+        .l_len    = len,
+        .l_type   = fl_type,
+    };
+    qemu_probe_lock_ops();
+    do {
+        ret = fcntl(fd, fcntl_op_setlk, &fl);
+    } while (ret == -1 && errno == EINTR);
+    return ret == -1 ? -errno : 0;
+}
+
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
+{
+    return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
+}
+
+int qemu_unlock_fd(int fd, int64_t start, int64_t len)
+{
+    return qemu_lock_fcntl(fd, start, len, F_UNLCK);
+}
+
+int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
+{
+    int ret;
+    struct flock fl = {
+        .l_whence = SEEK_SET,
+        .l_start  = start,
+        .l_len    = len,
+        .l_type   = exclusive ? F_WRLCK : F_RDLCK,
+    };
+    qemu_probe_lock_ops();
+    ret = fcntl(fd, fcntl_op_getlk, &fl);
+    if (ret == -1) {
+        return -errno;
+    } else {
+        return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
+    }
+}
+#endif
+
+static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
+{
+    int ret;
+#ifdef O_CLOEXEC
+    ret = open(name, flags | O_CLOEXEC, mode);
+#else
+    ret = open(name, flags, mode);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+#endif
+    return ret;
+}
+
+/*
+ * Opens a file with FD_CLOEXEC set
+ */
+static int
+qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
+{
+    int ret;
+
+#ifndef _WIN32
+    const char *fdset_id_str;
+
+    /* Attempt dup of fd from fd set */
+    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+        int64_t fdset_id;
+        int dupfd;
+
+        fdset_id = qemu_parse_fdset(fdset_id_str);
+        if (fdset_id == -1) {
+            error_setg(errp, "Could not parse fdset %s", name);
+            errno = EINVAL;
+            return -1;
+        }
+
+        dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
+        if (dupfd == -1) {
+            error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
+                             name, flags);
+            return -1;
+        }
+
+        return dupfd;
+    }
+#endif
+
+    ret = qemu_open_cloexec(name, flags, mode);
+
+    if (ret == -1) {
+        const char *action = flags & O_CREAT ? "create" : "open";
+#ifdef O_DIRECT
+        /* Give more helpful error message for O_DIRECT */
+        if (errno == EINVAL && (flags & O_DIRECT)) {
+            ret = open(name, flags & ~O_DIRECT, mode);
+            if (ret != -1) {
+                close(ret);
+                error_setg(errp, "Could not %s '%s': "
+                           "filesystem does not support O_DIRECT",
+                           action, name);
+                errno = EINVAL; /* restore first open()'s errno */
+                return -1;
+            }
+        }
+#endif /* O_DIRECT */
+        error_setg_errno(errp, errno, "Could not %s '%s'",
+                         action, name);
+    }
+
+    return ret;
+}
+
+
+int qemu_open(const char *name, int flags, Error **errp)
+{
+    assert(!(flags & O_CREAT));
+
+    return qemu_open_internal(name, flags, 0, errp);
+}
+
+
+int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
+{
+    assert(!(flags & O_CREAT));
+
+    return qemu_open_internal(name, flags | O_CREAT, mode, errp);
+}
+
+
+int qemu_open_old(const char *name, int flags, ...)
+{
+    va_list ap;
+    mode_t mode = 0;
+    int ret;
+
+    va_start(ap, flags);
+    if (flags & O_CREAT) {
+        mode = va_arg(ap, int);
+    }
+    va_end(ap);
+
+    ret = qemu_open_internal(name, flags, mode, NULL);
+
+#ifdef O_DIRECT
+    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
+        error_report("file system may not support O_DIRECT");
+        errno = EINVAL; /* in case it was clobbered */
+    }
+#endif /* O_DIRECT */
+
+    return ret;
+}
+
+int qemu_close(int fd)
+{
+    int64_t fdset_id;
+
+    /* Close fd that was dup'd from an fdset */
+    fdset_id = monitor_fdset_dup_fd_find(fd);
+    if (fdset_id != -1) {
+        int ret;
+
+        ret = close(fd);
+        if (ret == 0) {
+            monitor_fdset_dup_fd_remove(fd);
+        }
+
+        return ret;
+    }
+
+    return close(fd);
+}
+
+/*
+ * Delete a file from the filesystem, unless the filename is /dev/fdset/...
+ *
+ * Returns: On success, zero is returned.  On error, -1 is returned,
+ * and errno is set appropriately.
+ */
+int qemu_unlink(const char *name)
+{
+    if (g_str_has_prefix(name, "/dev/fdset/")) {
+        return 0;
+    }
+
+    return unlink(name);
+}
+
+/*
+ * A variant of write(2) which handles partial write.
+ *
+ * Return the number of bytes transferred.
+ * Set errno if fewer than `count' bytes are written.
+ *
+ * This function don't work with non-blocking fd's.
+ * Any of the possibilities with non-blocking fd's is bad:
+ *   - return a short write (then name is wrong)
+ *   - busy wait adding (errno == EAGAIN) to the loop
+ */
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+{
+    ssize_t ret = 0;
+    ssize_t total = 0;
+
+    while (count) {
+        ret = write(fd, buf, count);
+        if (ret < 0) {
+            if (errno == EINTR)
+                continue;
+            break;
+        }
+
+        count -= ret;
+        buf += ret;
+        total += ret;
+    }
+
+    return total;
+}
+
+/*
+ * Opens a socket with FD_CLOEXEC set
+ */
+int qemu_socket(int domain, int type, int protocol)
+{
+    int ret;
+
+#ifdef SOCK_CLOEXEC
+    ret = socket(domain, type | SOCK_CLOEXEC, protocol);
+    if (ret != -1 || errno != EINVAL) {
+        return ret;
+    }
+#endif
+    ret = socket(domain, type, protocol);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+
+    return ret;
+}
+
+/*
+ * Accept a connection and set FD_CLOEXEC
+ */
+int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
+{
+    int ret;
+
+#ifdef CONFIG_ACCEPT4
+    ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    ret = accept(s, addr, addrlen);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+
+    return ret;
+}
+
+void qemu_set_hw_version(const char *version)
+{
+    hw_version = version;
+}
+
+const char *qemu_hw_version(void)
+{
+    return hw_version;
+}
+
+void fips_set_state(bool requested)
+{
+#ifdef __linux__
+    if (requested) {
+        FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
+        if (fds != NULL) {
+            fips_enabled = (fgetc(fds) == '1');
+            fclose(fds);
+        }
+    }
+#else
+    fips_enabled = false;
+#endif /* __linux__ */
+
+#ifdef _FIPS_DEBUG
+    fprintf(stderr, "FIPS mode %s (requested %s)\n",
+            (fips_enabled ? "enabled" : "disabled"),
+            (requested ? "enabled" : "disabled"));
+#endif
+}
+
+bool fips_get_state(void)
+{
+    return fips_enabled;
+}
+
+#ifdef _WIN32
+static void socket_cleanup(void)
+{
+    WSACleanup();
+}
+#endif
+
+int socket_init(void)
+{
+#ifdef _WIN32
+    WSADATA Data;
+    int ret, err;
+
+    ret = WSAStartup(MAKEWORD(2, 2), &Data);
+    if (ret != 0) {
+        err = WSAGetLastError();
+        fprintf(stderr, "WSAStartup: %d\n", err);
+        return -1;
+    }
+    atexit(socket_cleanup);
+#endif
+    return 0;
+}
+
+
+#ifndef CONFIG_IOVEC
+/* helper function for iov_send_recv() */
+static ssize_t
+readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
+{
+    unsigned i = 0;
+    ssize_t ret = 0;
+    while (i < iov_cnt) {
+        ssize_t r = do_write
+            ? write(fd, iov[i].iov_base, iov[i].iov_len)
+            : read(fd, iov[i].iov_base, iov[i].iov_len);
+        if (r > 0) {
+            ret += r;
+        } else if (!r) {
+            break;
+        } else if (errno == EINTR) {
+            continue;
+        } else {
+            /* else it is some "other" error,
+             * only return if there was no data processed. */
+            if (ret == 0) {
+                ret = -1;
+            }
+            break;
+        }
+        i++;
+    }
+    return ret;
+}
+
+ssize_t
+readv(int fd, const struct iovec *iov, int iov_cnt)
+{
+    return readv_writev(fd, iov, iov_cnt, false);
+}
+
+ssize_t
+writev(int fd, const struct iovec *iov, int iov_cnt)
+{
+    return readv_writev(fd, iov, iov_cnt, true);
+}
+#endif
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
new file mode 100644
index 000000000..f15234b5c
--- /dev/null
+++ b/util/oslib-posix.c
@@ -0,0 +1,855 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include 
+
+#include "qemu-common.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "qemu/sockets.h"
+#include "qemu/thread.h"
+#include 
+#include "qemu/cutils.h"
+
+#ifdef CONFIG_LINUX
+#include 
+#endif
+
+#ifdef __FreeBSD__
+#include 
+#include 
+#include 
+#include 
+#endif
+
+#ifdef __NetBSD__
+#include 
+#include 
+#endif
+
+#ifdef __APPLE__
+#include 
+#endif
+
+#ifdef __HAIKU__
+#include 
+#endif
+
+#include "qemu/mmap-alloc.h"
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#include "qemu/error-report.h"
+#endif
+
+#define MAX_MEM_PREALLOC_THREAD_COUNT 16
+
+struct MemsetThread {
+    char *addr;
+    size_t numpages;
+    size_t hpagesize;
+    QemuThread pgthread;
+    sigjmp_buf env;
+};
+typedef struct MemsetThread MemsetThread;
+
+static MemsetThread *memset_thread;
+static int memset_num_threads;
+static bool memset_thread_failed;
+
+static QemuMutex page_mutex;
+static QemuCond page_cond;
+static bool threads_created_flag;
+
+int qemu_get_thread_id(void)
+{
+#if defined(__linux__)
+    return syscall(SYS_gettid);
+#elif defined(__FreeBSD__)
+    /* thread id is up to INT_MAX */
+    long tid;
+    thr_self(&tid);
+    return (int)tid;
+#elif defined(__NetBSD__)
+    return _lwp_self();
+#elif defined(__OpenBSD__)
+    return getthrid();
+#else
+    return getpid();
+#endif
+}
+
+int qemu_daemon(int nochdir, int noclose)
+{
+    return daemon(nochdir, noclose);
+}
+
+bool qemu_write_pidfile(const char *path, Error **errp)
+{
+    int fd;
+    char pidstr[32];
+
+    while (1) {
+        struct stat a, b;
+        struct flock lock = {
+            .l_type = F_WRLCK,
+            .l_whence = SEEK_SET,
+            .l_len = 0,
+        };
+
+        fd = qemu_open_old(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
+        if (fd == -1) {
+            error_setg_errno(errp, errno, "Cannot open pid file");
+            return false;
+        }
+
+        if (fstat(fd, &b) < 0) {
+            error_setg_errno(errp, errno, "Cannot stat file");
+            goto fail_close;
+        }
+
+        if (fcntl(fd, F_SETLK, &lock)) {
+            error_setg_errno(errp, errno, "Cannot lock pid file");
+            goto fail_close;
+        }
+
+        /*
+         * Now make sure the path we locked is the same one that now
+         * exists on the filesystem.
+         */
+        if (stat(path, &a) < 0) {
+            /*
+             * PID file disappeared, someone else must be racing with
+             * us, so try again.
+             */
+            close(fd);
+            continue;
+        }
+
+        if (a.st_ino == b.st_ino) {
+            break;
+        }
+
+        /*
+         * PID file was recreated, someone else must be racing with
+         * us, so try again.
+         */
+        close(fd);
+    }
+
+    if (ftruncate(fd, 0) < 0) {
+        error_setg_errno(errp, errno, "Failed to truncate pid file");
+        goto fail_unlink;
+    }
+
+    snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
+    if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
+        error_setg(errp, "Failed to write pid file");
+        goto fail_unlink;
+    }
+
+    return true;
+
+fail_unlink:
+    unlink(path);
+fail_close:
+    close(fd);
+    return false;
+}
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (alignment < sizeof(void*)) {
+        alignment = sizeof(void*);
+    }
+
+#if defined(CONFIG_POSIX_MEMALIGN)
+    int ret;
+    ret = posix_memalign(&ptr, alignment, size);
+    if (ret != 0) {
+        errno = ret;
+        ptr = NULL;
+    }
+#elif defined(CONFIG_BSD)
+    ptr = valloc(size);
+#else
+    ptr = memalign(alignment, size);
+#endif
+    trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+/* alloc shared memory pages */
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
+{
+    size_t align = QEMU_VMALLOC_ALIGN;
+    void *ptr = qemu_ram_mmap(-1, size, align, shared, false);
+
+    if (ptr == MAP_FAILED) {
+        return NULL;
+    }
+
+    if (alignment) {
+        *alignment = align;
+    }
+
+    trace_qemu_anon_ram_alloc(size, ptr);
+    return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+    trace_qemu_vfree(ptr);
+    free(ptr);
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+    trace_qemu_anon_ram_free(ptr, size);
+    qemu_ram_munmap(-1, ptr, size);
+}
+
+void qemu_set_block(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    assert(f != -1);
+    f = fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+    assert(f != -1);
+}
+
+int qemu_try_set_nonblock(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    if (f == -1) {
+        return -errno;
+    }
+    if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
+#ifdef __OpenBSD__
+        /*
+         * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
+         * memory devices and sets errno to ENODEV.
+         * It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
+         * because they will never block anyway.
+         */
+        if (errno == ENODEV) {
+            return 0;
+        }
+#endif
+        return -errno;
+    }
+    return 0;
+}
+
+void qemu_set_nonblock(int fd)
+{
+    int f;
+    f = qemu_try_set_nonblock(fd);
+    assert(f == 0);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+    int val = 1, ret;
+
+    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+                     (const char *)&val, sizeof(val));
+
+    assert(ret == 0);
+
+    return ret;
+}
+
+void qemu_set_cloexec(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFD);
+    assert(f != -1);
+    f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+    assert(f != -1);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+    int ret;
+
+#ifdef CONFIG_PIPE2
+    ret = pipe2(pipefd, O_CLOEXEC);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    ret = pipe(pipefd);
+    if (ret == 0) {
+        qemu_set_cloexec(pipefd[0]);
+        qemu_set_cloexec(pipefd[1]);
+    }
+
+    return ret;
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+    g_autofree char *dir = g_strdup_printf("%s/%s",
+                                           CONFIG_QEMU_LOCALSTATEDIR,
+                                           relative_pathname);
+    return get_relocated_path(dir);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    struct termios tty;
+
+    tcgetattr(fd, &tty);
+
+    if (echo) {
+        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
+    } else {
+        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+    }
+
+    tcsetattr(fd, TCSANOW, &tty);
+}
+
+static const char *exec_dir;
+
+void qemu_init_exec_dir(const char *argv0)
+{
+    char *p = NULL;
+    char buf[PATH_MAX];
+
+    if (exec_dir) {
+        return;
+    }
+
+#if defined(__linux__)
+    {
+        int len;
+        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+        if (len > 0) {
+            buf[len] = 0;
+            p = buf;
+        }
+    }
+#elif defined(__FreeBSD__) \
+      || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME))
+    {
+#if defined(__FreeBSD__)
+        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+#else
+        static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
+#endif
+        size_t len = sizeof(buf) - 1;
+
+        *buf = '\0';
+        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
+            *buf) {
+            buf[sizeof(buf) - 1] = '\0';
+            p = buf;
+        }
+    }
+#elif defined(__APPLE__)
+    {
+        char fpath[PATH_MAX];
+        uint32_t len = sizeof(fpath);
+        if (_NSGetExecutablePath(fpath, &len) == 0) {
+            p = realpath(fpath, buf);
+            if (!p) {
+                return;
+            }
+        }
+    }
+#elif defined(__HAIKU__)
+    {
+        image_info ii;
+        int32_t c = 0;
+
+        *buf = '\0';
+        while (get_next_image_info(0, &c, &ii) == B_OK) {
+            if (ii.type == B_APP_IMAGE) {
+                strncpy(buf, ii.name, sizeof(buf));
+                buf[sizeof(buf) - 1] = 0;
+                p = buf;
+                break;
+            }
+        }
+    }
+#endif
+    /* If we don't have any way of figuring out the actual executable
+       location then try argv[0].  */
+    if (!p && argv0) {
+        p = realpath(argv0, buf);
+    }
+    if (p) {
+        exec_dir = g_path_get_dirname(p);
+    } else {
+        exec_dir = CONFIG_BINDIR;
+    }
+}
+
+const char *qemu_get_exec_dir(void)
+{
+    return exec_dir;
+}
+
+static void sigbus_handler(int signal)
+{
+    int i;
+    if (memset_thread) {
+        for (i = 0; i < memset_num_threads; i++) {
+            if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
+                siglongjmp(memset_thread[i].env, 1);
+            }
+        }
+    }
+}
+
+static void *do_touch_pages(void *arg)
+{
+    MemsetThread *memset_args = (MemsetThread *)arg;
+    sigset_t set, oldset;
+
+    /*
+     * On Linux, the page faults from the loop below can cause mmap_sem
+     * contention with allocation of the thread stacks.  Do not start
+     * clearing until all threads have been created.
+     */
+    qemu_mutex_lock(&page_mutex);
+    while(!threads_created_flag){
+        qemu_cond_wait(&page_cond, &page_mutex);
+    }
+    qemu_mutex_unlock(&page_mutex);
+
+    /* unblock SIGBUS */
+    sigemptyset(&set);
+    sigaddset(&set, SIGBUS);
+    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+    if (sigsetjmp(memset_args->env, 1)) {
+        memset_thread_failed = true;
+    } else {
+        char *addr = memset_args->addr;
+        size_t numpages = memset_args->numpages;
+        size_t hpagesize = memset_args->hpagesize;
+        size_t i;
+        for (i = 0; i < numpages; i++) {
+            /*
+             * Read & write back the same value, so we don't
+             * corrupt existing user/app data that might be
+             * stored.
+             *
+             * 'volatile' to stop compiler optimizing this away
+             * to a no-op
+             *
+             * TODO: get a better solution from kernel so we
+             * don't need to write at all so we don't cause
+             * wear on the storage backing the region...
+             */
+            *(volatile char *)addr = *addr;
+            addr += hpagesize;
+        }
+    }
+    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+    return NULL;
+}
+
+static inline int get_memset_num_threads(int smp_cpus)
+{
+    long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
+    int ret = 1;
+
+    if (host_procs > 0) {
+        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
+    }
+    /* In case sysconf() fails, we fall back to single threaded */
+    return ret;
+}
+
+static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
+                            int smp_cpus)
+{
+    static gsize initialized = 0;
+    size_t numpages_per_thread, leftover;
+    char *addr = area;
+    int i = 0;
+
+    if (g_once_init_enter(&initialized)) {
+        qemu_mutex_init(&page_mutex);
+        qemu_cond_init(&page_cond);
+        g_once_init_leave(&initialized, 1);
+    }
+
+    memset_thread_failed = false;
+    threads_created_flag = false;
+    memset_num_threads = get_memset_num_threads(smp_cpus);
+    memset_thread = g_new0(MemsetThread, memset_num_threads);
+    numpages_per_thread = numpages / memset_num_threads;
+    leftover = numpages % memset_num_threads;
+    for (i = 0; i < memset_num_threads; i++) {
+        memset_thread[i].addr = addr;
+        memset_thread[i].numpages = numpages_per_thread + (i < leftover);
+        memset_thread[i].hpagesize = hpagesize;
+        qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
+                           do_touch_pages, &memset_thread[i],
+                           QEMU_THREAD_JOINABLE);
+        addr += memset_thread[i].numpages * hpagesize;
+    }
+
+    qemu_mutex_lock(&page_mutex);
+    threads_created_flag = true;
+    qemu_cond_broadcast(&page_cond);
+    qemu_mutex_unlock(&page_mutex);
+
+    for (i = 0; i < memset_num_threads; i++) {
+        qemu_thread_join(&memset_thread[i].pgthread);
+    }
+    g_free(memset_thread);
+    memset_thread = NULL;
+
+    return memset_thread_failed;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
+                     Error **errp)
+{
+    int ret;
+    struct sigaction act, oldact;
+    size_t hpagesize = qemu_fd_getpagesize(fd);
+    size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = &sigbus_handler;
+    act.sa_flags = 0;
+
+    ret = sigaction(SIGBUS, &act, &oldact);
+    if (ret) {
+        error_setg_errno(errp, errno,
+            "os_mem_prealloc: failed to install signal handler");
+        return;
+    }
+
+    /* touch pages simultaneously */
+    if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
+        error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
+            "pages available to allocate guest RAM");
+    }
+
+    ret = sigaction(SIGBUS, &oldact, NULL);
+    if (ret) {
+        /* Terminate QEMU since it can't recover from error */
+        perror("os_mem_prealloc: failed to reinstall signal handler");
+        exit(1);
+    }
+}
+
+char *qemu_get_pid_name(pid_t pid)
+{
+    char *name = NULL;
+
+#if defined(__FreeBSD__)
+    /* BSDs don't have /proc, but they provide a nice substitute */
+    struct kinfo_proc *proc = kinfo_getproc(pid);
+
+    if (proc) {
+        name = g_strdup(proc->ki_comm);
+        free(proc);
+    }
+#else
+    /* Assume a system with reasonable procfs */
+    char *pid_path;
+    size_t len;
+
+    pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
+    g_file_get_contents(pid_path, &name, &len, NULL);
+    g_free(pid_path);
+#endif
+
+    return name;
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+    sigset_t oldmask, newmask;
+    struct sigaction sig_action;
+    int saved_errno;
+    pid_t pid;
+
+    /*
+     * Need to block signals now, so that child process can safely
+     * kill off caller's signal handlers without a race.
+     */
+    sigfillset(&newmask);
+    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
+        error_setg_errno(errp, errno,
+                         "cannot block signals");
+        return -1;
+    }
+
+    pid = fork();
+    saved_errno = errno;
+
+    if (pid < 0) {
+        /* attempt to restore signal mask, but ignore failure, to
+         * avoid obscuring the fork failure */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+        error_setg_errno(errp, saved_errno,
+                         "cannot fork child process");
+        errno = saved_errno;
+        return -1;
+    } else if (pid) {
+        /* parent process */
+
+        /* Restore our original signal mask now that the child is
+         * safely running. Only documented failures are EFAULT (not
+         * possible, since we are using just-grabbed mask) or EINVAL
+         * (not possible, since we are using correct arguments).  */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+    } else {
+        /* child process */
+        size_t i;
+
+        /* Clear out all signal handlers from parent so nothing
+         * unexpected can happen in our child once we unblock
+         * signals */
+        sig_action.sa_handler = SIG_DFL;
+        sig_action.sa_flags = 0;
+        sigemptyset(&sig_action.sa_mask);
+
+        for (i = 1; i < NSIG; i++) {
+            /* Only possible errors are EFAULT or EINVAL The former
+             * won't happen, the latter we expect, so no need to check
+             * return value */
+            (void)sigaction(i, &sig_action, NULL);
+        }
+
+        /* Unmask all signals in child, since we've no idea what the
+         * caller's done with their signal mask and don't want to
+         * propagate that to children */
+        sigemptyset(&newmask);
+        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
+            Error *local_err = NULL;
+            error_setg_errno(&local_err, errno,
+                             "cannot unblock signals");
+            error_report_err(local_err);
+            _exit(1);
+        }
+    }
+    return pid;
+}
+
+void *qemu_alloc_stack(size_t *sz)
+{
+    void *ptr, *guardpage;
+    int flags;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    void *ptr2;
+#endif
+    size_t pagesz = qemu_real_host_page_size;
+#ifdef _SC_THREAD_STACK_MIN
+    /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
+    long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
+    *sz = MAX(MAX(min_stack_sz, 0), *sz);
+#endif
+    /* adjust stack size to a multiple of the page size */
+    *sz = ROUND_UP(*sz, pagesz);
+    /* allocate one extra page for the guard page */
+    *sz += pagesz;
+
+    flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#if defined(MAP_STACK) && defined(__OpenBSD__)
+    /* Only enable MAP_STACK on OpenBSD. Other OS's such as
+     * Linux/FreeBSD/NetBSD have a flag with the same name
+     * but have differing functionality. OpenBSD will SEGV
+     * if it spots execution with a stack pointer pointing
+     * at memory that was not allocated with MAP_STACK.
+     */
+    flags |= MAP_STACK;
+#endif
+
+    ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
+    if (ptr == MAP_FAILED) {
+        perror("failed to allocate memory for stack");
+        abort();
+    }
+
+#if defined(HOST_IA64)
+    /* separate register stack */
+    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
+#elif defined(HOST_HPPA)
+    /* stack grows up */
+    guardpage = ptr + *sz - pagesz;
+#else
+    /* stack grows down */
+    guardpage = ptr;
+#endif
+    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+        perror("failed to set up stack guard page");
+        abort();
+    }
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
+        *(uint32_t *)ptr2 = 0xdeadbeaf;
+    }
+#endif
+
+    return ptr;
+}
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static __thread unsigned int max_stack_usage;
+#endif
+
+void qemu_free_stack(void *stack, size_t sz)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    unsigned int usage;
+    void *ptr;
+
+    for (ptr = stack + qemu_real_host_page_size; ptr < stack + sz;
+         ptr += sizeof(uint32_t)) {
+        if (*(uint32_t *)ptr != 0xdeadbeaf) {
+            break;
+        }
+    }
+    usage = sz - (uintptr_t) (ptr - stack);
+    if (usage > max_stack_usage) {
+        error_report("thread %d max stack usage increased from %u to %u",
+                     qemu_get_thread_id(), max_stack_usage, usage);
+        max_stack_usage = usage;
+    }
+#endif
+
+    munmap(stack, sz);
+}
+
+void sigaction_invoke(struct sigaction *action,
+                      struct qemu_signalfd_siginfo *info)
+{
+    siginfo_t si = {};
+    si.si_signo = info->ssi_signo;
+    si.si_errno = info->ssi_errno;
+    si.si_code = info->ssi_code;
+
+    /* Convert the minimal set of fields defined by POSIX.
+     * Positive si_code values are reserved for kernel-generated
+     * signals, where the valid siginfo fields are determined by
+     * the signal number.  But according to POSIX, it is unspecified
+     * whether SI_USER and SI_QUEUE have values less than or equal to
+     * zero.
+     */
+    if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
+        info->ssi_code <= 0) {
+        /* SIGTERM, etc.  */
+        si.si_pid = info->ssi_pid;
+        si.si_uid = info->ssi_uid;
+    } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
+               info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
+        si.si_addr = (void *)(uintptr_t)info->ssi_addr;
+    } else if (info->ssi_signo == SIGCHLD) {
+        si.si_pid = info->ssi_pid;
+        si.si_status = info->ssi_status;
+        si.si_uid = info->ssi_uid;
+    }
+    action->sa_sigaction(info->ssi_signo, &si, NULL);
+}
+
+#ifndef HOST_NAME_MAX
+# ifdef _POSIX_HOST_NAME_MAX
+#  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
+# else
+#  define HOST_NAME_MAX 255
+# endif
+#endif
+
+char *qemu_get_host_name(Error **errp)
+{
+    long len = -1;
+    g_autofree char *hostname = NULL;
+
+#ifdef _SC_HOST_NAME_MAX
+    len = sysconf(_SC_HOST_NAME_MAX);
+#endif /* _SC_HOST_NAME_MAX */
+
+    if (len < 0) {
+        len = HOST_NAME_MAX;
+    }
+
+    /* Unfortunately, gethostname() below does not guarantee a
+     * NULL terminated string. Therefore, allocate one byte more
+     * to be sure. */
+    hostname = g_new0(char, len + 1);
+
+    if (gethostname(hostname, len) < 0) {
+        error_setg_errno(errp, errno,
+                         "cannot get hostname");
+        return NULL;
+    }
+
+    return g_steal_pointer(&hostname);
+}
+
+size_t qemu_get_host_physmem(void)
+{
+#ifdef _SC_PHYS_PAGES
+    long pages = sysconf(_SC_PHYS_PAGES);
+    if (pages > 0) {
+        if (pages > SIZE_MAX / qemu_real_host_page_size) {
+            return SIZE_MAX;
+        } else {
+            return pages * qemu_real_host_page_size;
+        }
+    }
+#endif
+    return 0;
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
new file mode 100644
index 000000000..23a7c7320
--- /dev/null
+++ b/util/oslib-win32.c
@@ -0,0 +1,847 @@
+/*
+ * os-win32.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010-2016 Red Hat, Inc.
+ *
+ * QEMU library functions for win32 which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * The implementation of g_poll (functions poll_rest, g_poll) at the end of
+ * this file are based on code from GNOME glib-2 and use a different license,
+ * see the license comment there.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+#include "qemu/cutils.h"
+
+/* this must come after including "trace.h" */
+#include 
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (!size) {
+        abort();
+    }
+    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+    trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+static int get_allocation_granularity(void)
+{
+    SYSTEM_INFO system_info;
+
+    GetSystemInfo(&system_info);
+    return system_info.dwAllocationGranularity;
+}
+
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
+{
+    void *ptr;
+
+    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+    trace_qemu_anon_ram_alloc(size, ptr);
+
+    if (ptr && align) {
+        *align = MAX(get_allocation_granularity(), getpagesize());
+    }
+    return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+    trace_qemu_vfree(ptr);
+    if (ptr) {
+        VirtualFree(ptr, 0, MEM_RELEASE);
+    }
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+    trace_qemu_anon_ram_free(ptr, size);
+    if (ptr) {
+        VirtualFree(ptr, 0, MEM_RELEASE);
+    }
+}
+
+#ifndef _POSIX_THREAD_SAFE_FUNCTIONS
+/* FIXME: add proper locking */
+struct tm *gmtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = gmtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
+/* FIXME: add proper locking */
+struct tm *localtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = localtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+#endif /* _POSIX_THREAD_SAFE_FUNCTIONS */
+
+static int socket_error(void)
+{
+    switch (WSAGetLastError()) {
+    case 0:
+        return 0;
+    case WSAEINTR:
+        return EINTR;
+    case WSAEINVAL:
+        return EINVAL;
+    case WSA_INVALID_HANDLE:
+        return EBADF;
+    case WSA_NOT_ENOUGH_MEMORY:
+        return ENOMEM;
+    case WSA_INVALID_PARAMETER:
+        return EINVAL;
+    case WSAENAMETOOLONG:
+        return ENAMETOOLONG;
+    case WSAENOTEMPTY:
+        return ENOTEMPTY;
+    case WSAEWOULDBLOCK:
+         /* not using EWOULDBLOCK as we don't want code to have
+          * to check both EWOULDBLOCK and EAGAIN */
+        return EAGAIN;
+    case WSAEINPROGRESS:
+        return EINPROGRESS;
+    case WSAEALREADY:
+        return EALREADY;
+    case WSAENOTSOCK:
+        return ENOTSOCK;
+    case WSAEDESTADDRREQ:
+        return EDESTADDRREQ;
+    case WSAEMSGSIZE:
+        return EMSGSIZE;
+    case WSAEPROTOTYPE:
+        return EPROTOTYPE;
+    case WSAENOPROTOOPT:
+        return ENOPROTOOPT;
+    case WSAEPROTONOSUPPORT:
+        return EPROTONOSUPPORT;
+    case WSAEOPNOTSUPP:
+        return EOPNOTSUPP;
+    case WSAEAFNOSUPPORT:
+        return EAFNOSUPPORT;
+    case WSAEADDRINUSE:
+        return EADDRINUSE;
+    case WSAEADDRNOTAVAIL:
+        return EADDRNOTAVAIL;
+    case WSAENETDOWN:
+        return ENETDOWN;
+    case WSAENETUNREACH:
+        return ENETUNREACH;
+    case WSAENETRESET:
+        return ENETRESET;
+    case WSAECONNABORTED:
+        return ECONNABORTED;
+    case WSAECONNRESET:
+        return ECONNRESET;
+    case WSAENOBUFS:
+        return ENOBUFS;
+    case WSAEISCONN:
+        return EISCONN;
+    case WSAENOTCONN:
+        return ENOTCONN;
+    case WSAETIMEDOUT:
+        return ETIMEDOUT;
+    case WSAECONNREFUSED:
+        return ECONNREFUSED;
+    case WSAELOOP:
+        return ELOOP;
+    case WSAEHOSTUNREACH:
+        return EHOSTUNREACH;
+    default:
+        return EIO;
+    }
+}
+
+void qemu_set_block(int fd)
+{
+    unsigned long opt = 0;
+    WSAEventSelect(fd, NULL, 0);
+    ioctlsocket(fd, FIONBIO, &opt);
+}
+
+int qemu_try_set_nonblock(int fd)
+{
+    unsigned long opt = 1;
+    if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) {
+        return -socket_error();
+    }
+    qemu_fd_register(fd);
+    return 0;
+}
+
+void qemu_set_nonblock(int fd)
+{
+    (void)qemu_try_set_nonblock(fd);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+    /* Enabling the reuse of an endpoint that was used by a socket still in
+     * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
+     * fast reuse is the default and SO_REUSEADDR does strange things. So we
+     * don't have to do anything here. More info can be found at:
+     * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
+    return 0;
+}
+
+int inet_aton(const char *cp, struct in_addr *ia)
+{
+    uint32_t addr = inet_addr(cp);
+    if (addr == 0xffffffff) {
+        return 0;
+    }
+    ia->s_addr = addr;
+    return 1;
+}
+
+void qemu_set_cloexec(int fd)
+{
+}
+
+/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
+#define _W32_FT_OFFSET (116444736000000000ULL)
+
+int qemu_gettimeofday(qemu_timeval *tp)
+{
+  union {
+    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
+    FILETIME ft;
+  }  _now;
+
+  if(tp) {
+      GetSystemTimeAsFileTime (&_now.ft);
+      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
+      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
+  }
+  /* Always return 0 as per Open Group Base Specifications Issue 6.
+     Do not set errno on error.  */
+  return 0;
+}
+
+int qemu_get_thread_id(void)
+{
+    return GetCurrentThreadId();
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+    HRESULT result;
+    char base_path[MAX_PATH+1] = "";
+
+    result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL,
+                             /* SHGFP_TYPE_CURRENT */ 0, base_path);
+    if (result != S_OK) {
+        /* misconfigured environment */
+        g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result);
+        abort();
+    }
+    return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path,
+                           relative_pathname);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    HANDLE handle = (HANDLE)_get_osfhandle(fd);
+    DWORD dwMode = 0;
+
+    if (handle == INVALID_HANDLE_VALUE) {
+        return;
+    }
+
+    GetConsoleMode(handle, &dwMode);
+
+    if (echo) {
+        SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT);
+    } else {
+        SetConsoleMode(handle,
+                       dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT));
+    }
+}
+
+static const char *exec_dir;
+
+void qemu_init_exec_dir(const char *argv0)
+{
+
+    char *p;
+    char buf[MAX_PATH];
+    DWORD len;
+
+    if (exec_dir) {
+        return;
+    }
+
+    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
+    if (len == 0) {
+        return;
+    }
+
+    buf[len] = 0;
+    p = buf + len - 1;
+    while (p != buf && *p != '\\') {
+        p--;
+    }
+    *p = 0;
+    if (access(buf, R_OK) == 0) {
+        exec_dir = g_strdup(buf);
+    } else {
+        exec_dir = CONFIG_BINDIR;
+    }
+}
+
+const char *qemu_get_exec_dir(void)
+{
+    return exec_dir;
+}
+
+#if !GLIB_CHECK_VERSION(2, 50, 0)
+/*
+ * The original implementation of g_poll from glib has a problem on Windows
+ * when using timeouts < 10 ms.
+ *
+ * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead
+ * of wait. This causes significant performance degradation of QEMU.
+ *
+ * The following code is a copy of the original code from glib/gpoll.c
+ * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19).
+ * Some debug code was removed and the code was reformatted.
+ * All other code modifications are marked with 'QEMU'.
+ */
+
+/*
+ * gpoll.c: poll(2) abstraction
+ * Copyright 1998 Owen Taylor
+ * Copyright 2008 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles,
+                     GPollFD *fds, guint nfds, gint timeout)
+{
+    DWORD ready;
+    GPollFD *f;
+    int recursed_result;
+
+    if (poll_msgs) {
+        /* Wait for either messages or handles
+         * -> Use MsgWaitForMultipleObjectsEx
+         */
+        ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout,
+                                            QS_ALLINPUT, MWMO_ALERTABLE);
+
+        if (ready == WAIT_FAILED) {
+            gchar *emsg = g_win32_error_message(GetLastError());
+            g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg);
+            g_free(emsg);
+        }
+    } else if (nhandles == 0) {
+        /* No handles to wait for, just the timeout */
+        if (timeout == INFINITE) {
+            ready = WAIT_FAILED;
+        } else {
+            SleepEx(timeout, TRUE);
+            ready = WAIT_TIMEOUT;
+        }
+    } else {
+        /* Wait for just handles
+         * -> Use WaitForMultipleObjectsEx
+         */
+        ready =
+            WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE);
+        if (ready == WAIT_FAILED) {
+            gchar *emsg = g_win32_error_message(GetLastError());
+            g_warning("WaitForMultipleObjectsEx failed: %s", emsg);
+            g_free(emsg);
+        }
+    }
+
+    if (ready == WAIT_FAILED) {
+        return -1;
+    } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) {
+        return 0;
+    } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) {
+                f->revents |= G_IO_IN;
+            }
+        }
+
+        /* If we have a timeout, or no handles to poll, be satisfied
+         * with just noticing we have messages waiting.
+         */
+        if (timeout != 0 || nhandles == 0) {
+            return 1;
+        }
+
+        /* If no timeout and handles to poll, recurse to poll them,
+         * too.
+         */
+        recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+        return (recursed_result == -1) ? -1 : 1 + recursed_result;
+    } else if (/* QEMU: removed the following unneeded statement which causes
+                * a compiler warning: ready >= WAIT_OBJECT_0 && */
+               ready < WAIT_OBJECT_0 + nhandles) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) {
+                f->revents = f->events;
+            }
+        }
+
+        /* If no timeout and polling several handles, recurse to poll
+         * the rest of them.
+         */
+        if (timeout == 0 && nhandles > 1) {
+            /* Remove the handle that fired */
+            int i;
+            for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) {
+                handles[i-1] = handles[i];
+            }
+            nhandles--;
+            recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+            return (recursed_result == -1) ? -1 : 1 + recursed_result;
+        }
+        return 1;
+    }
+
+    return 0;
+}
+
+gint g_poll(GPollFD *fds, guint nfds, gint timeout)
+{
+    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
+    gboolean poll_msgs = FALSE;
+    GPollFD *f;
+    gint nhandles = 0;
+    int retval;
+
+    for (f = fds; f < &fds[nfds]; ++f) {
+        if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) {
+            poll_msgs = TRUE;
+        } else if (f->fd > 0) {
+            /* Don't add the same handle several times into the array, as
+             * docs say that is not allowed, even if it actually does seem
+             * to work.
+             */
+            gint i;
+
+            for (i = 0; i < nhandles; i++) {
+                if (handles[i] == (HANDLE) f->fd) {
+                    break;
+                }
+            }
+
+            if (i == nhandles) {
+                if (nhandles == MAXIMUM_WAIT_OBJECTS) {
+                    g_warning("Too many handles to wait for!\n");
+                    break;
+                } else {
+                    handles[nhandles++] = (HANDLE) f->fd;
+                }
+            }
+        }
+    }
+
+    for (f = fds; f < &fds[nfds]; ++f) {
+        f->revents = 0;
+    }
+
+    if (timeout == -1) {
+        timeout = INFINITE;
+    }
+
+    /* Polling for several things? */
+    if (nhandles > 1 || (nhandles > 0 && poll_msgs)) {
+        /* First check if one or several of them are immediately
+         * available
+         */
+        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0);
+
+        /* If not, and we have a significant timeout, poll again with
+         * timeout then. Note that this will return indication for only
+         * one event, or only for messages. We ignore timeouts less than
+         * ten milliseconds as they are mostly pointless on Windows, the
+         * MsgWaitForMultipleObjectsEx() call will timeout right away
+         * anyway.
+         *
+         * Modification for QEMU: replaced timeout >= 10 by timeout > 0.
+         */
+        if (retval == 0 && (timeout == INFINITE || timeout > 0)) {
+            retval = poll_rest(poll_msgs, handles, nhandles,
+                               fds, nfds, timeout);
+        }
+    } else {
+        /* Just polling for one thing, so no need to check first if
+         * available immediately
+         */
+        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout);
+    }
+
+    if (retval == -1) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            f->revents = 0;
+        }
+    }
+
+    return retval;
+}
+#endif
+
+int getpagesize(void)
+{
+    SYSTEM_INFO system_info;
+
+    GetSystemInfo(&system_info);
+    return system_info.dwPageSize;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
+                     Error **errp)
+{
+    int i;
+    size_t pagesize = qemu_real_host_page_size;
+
+    memory = (memory + pagesize - 1) & -pagesize;
+    for (i = 0; i < memory / pagesize; i++) {
+        memset(area + pagesize * i, 0, 1);
+    }
+}
+
+char *qemu_get_pid_name(pid_t pid)
+{
+    /* XXX Implement me */
+    abort();
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+    errno = ENOSYS;
+    error_setg_errno(errp, errno,
+                     "cannot fork child process");
+    return -1;
+}
+
+
+#undef connect
+int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
+                      socklen_t addrlen)
+{
+    int ret;
+    ret = connect(sockfd, addr, addrlen);
+    if (ret < 0) {
+        if (WSAGetLastError() == WSAEWOULDBLOCK) {
+            errno = EINPROGRESS;
+        } else {
+            errno = socket_error();
+        }
+    }
+    return ret;
+}
+
+
+#undef listen
+int qemu_listen_wrap(int sockfd, int backlog)
+{
+    int ret;
+    ret = listen(sockfd, backlog);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef bind
+int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
+                   socklen_t addrlen)
+{
+    int ret;
+    ret = bind(sockfd, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef socket
+int qemu_socket_wrap(int domain, int type, int protocol)
+{
+    int ret;
+    ret = socket(domain, type, protocol);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef accept
+int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
+                     socklen_t *addrlen)
+{
+    int ret;
+    ret = accept(sockfd, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef shutdown
+int qemu_shutdown_wrap(int sockfd, int how)
+{
+    int ret;
+    ret = shutdown(sockfd, how);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef ioctlsocket
+int qemu_ioctlsocket_wrap(int fd, int req, void *val)
+{
+    int ret;
+    ret = ioctlsocket(fd, req, val);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef closesocket
+int qemu_closesocket_wrap(int fd)
+{
+    int ret;
+    ret = closesocket(fd);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef getsockopt
+int qemu_getsockopt_wrap(int sockfd, int level, int optname,
+                         void *optval, socklen_t *optlen)
+{
+    int ret;
+    ret = getsockopt(sockfd, level, optname, optval, optlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef setsockopt
+int qemu_setsockopt_wrap(int sockfd, int level, int optname,
+                         const void *optval, socklen_t optlen)
+{
+    int ret;
+    ret = setsockopt(sockfd, level, optname, optval, optlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef getpeername
+int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
+                          socklen_t *addrlen)
+{
+    int ret;
+    ret = getpeername(sockfd, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef getsockname
+int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
+                          socklen_t *addrlen)
+{
+    int ret;
+    ret = getsockname(sockfd, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef send
+ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags)
+{
+    int ret;
+    ret = send(sockfd, buf, len, flags);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef sendto
+ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
+                         const struct sockaddr *addr, socklen_t addrlen)
+{
+    int ret;
+    ret = sendto(sockfd, buf, len, flags, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef recv
+ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags)
+{
+    int ret;
+    ret = recv(sockfd, buf, len, flags);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+
+#undef recvfrom
+ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
+                           struct sockaddr *addr, socklen_t *addrlen)
+{
+    int ret;
+    ret = recvfrom(sockfd, buf, len, flags, addr, addrlen);
+    if (ret < 0) {
+        errno = socket_error();
+    }
+    return ret;
+}
+
+bool qemu_write_pidfile(const char *filename, Error **errp)
+{
+    char buffer[128];
+    int len;
+    HANDLE file;
+    OVERLAPPED overlap;
+    BOOL ret;
+    memset(&overlap, 0, sizeof(overlap));
+
+    file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
+                      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "Failed to create PID file");
+        return false;
+    }
+    len = snprintf(buffer, sizeof(buffer), FMT_pid "\n", (pid_t)getpid());
+    ret = WriteFile(file, (LPCVOID)buffer, (DWORD)len,
+                    NULL, &overlap);
+    CloseHandle(file);
+    if (ret == 0) {
+        error_setg(errp, "Failed to write PID file");
+        return false;
+    }
+    return true;
+}
+
+char *qemu_get_host_name(Error **errp)
+{
+    wchar_t tmp[MAX_COMPUTERNAME_LENGTH + 1];
+    DWORD size = G_N_ELEMENTS(tmp);
+
+    if (GetComputerNameW(tmp, &size) == 0) {
+        error_setg_win32(errp, GetLastError(), "failed close handle");
+        return NULL;
+    }
+
+    return g_utf16_to_utf8(tmp, size, NULL, NULL, NULL);
+}
+
+size_t qemu_get_host_physmem(void)
+{
+    MEMORYSTATUSEX statex;
+    statex.dwLength = sizeof(statex);
+
+    if (GlobalMemoryStatusEx(&statex)) {
+        return statex.ullTotalPhys;
+    }
+    return 0;
+}
diff --git a/util/pagesize.c b/util/pagesize.c
new file mode 100644
index 000000000..998632cf6
--- /dev/null
+++ b/util/pagesize.c
@@ -0,0 +1,18 @@
+/*
+ * pagesize.c - query the host about its page size
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+uintptr_t qemu_real_host_page_size;
+intptr_t qemu_real_host_page_mask;
+
+static void __attribute__((constructor)) init_real_host_page_size(void)
+{
+    qemu_real_host_page_size = getpagesize();
+    qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size;
+}
diff --git a/util/path.c b/util/path.c
new file mode 100644
index 000000000..8e174eb43
--- /dev/null
+++ b/util/path.c
@@ -0,0 +1,70 @@
+/* Code to mangle pathnames into those matching a given prefix.
+   eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so");
+
+   The assumption is that this area does not change.
+*/
+#include "qemu/osdep.h"
+#include 
+#include 
+#include "qemu/cutils.h"
+#include "qemu/path.h"
+#include "qemu/thread.h"
+
+static const char *base;
+static GHashTable *hash;
+static QemuMutex lock;
+
+void init_paths(const char *prefix)
+{
+    if (prefix[0] == '\0' || !strcmp(prefix, "/")) {
+        return;
+    }
+
+    if (prefix[0] == '/') {
+        base = g_strdup(prefix);
+    } else {
+        char *cwd = g_get_current_dir();
+        base = g_build_filename(cwd, prefix, NULL);
+        g_free(cwd);
+    }
+
+    hash = g_hash_table_new(g_str_hash, g_str_equal);
+    qemu_mutex_init(&lock);
+}
+
+/* Look for path in emulation dir, otherwise return name. */
+const char *path(const char *name)
+{
+    gpointer key, value;
+    const char *ret;
+
+    /* Only do absolute paths: quick and dirty, but should mostly be OK.  */
+    if (!base || !name || name[0] != '/') {
+        return name;
+    }
+
+    qemu_mutex_lock(&lock);
+
+    /* Have we looked up this file before?  */
+    if (g_hash_table_lookup_extended(hash, name, &key, &value)) {
+        ret = value ? value : name;
+    } else {
+        char *save = g_strdup(name);
+        char *full = g_build_filename(base, name, NULL);
+
+        /* Look for the path; record the result, pass or fail.  */
+        if (access(full, F_OK) == 0) {
+            /* Exists.  */
+            g_hash_table_insert(hash, save, full);
+            ret = full;
+        } else {
+            /* Does not exist.  */
+            g_free(full);
+            g_hash_table_insert(hash, save, NULL);
+            ret = name;
+        }
+    }
+
+    qemu_mutex_unlock(&lock);
+    return ret;
+}
diff --git a/util/qdist.c b/util/qdist.c
new file mode 100644
index 000000000..5f75e24c2
--- /dev/null
+++ b/util/qdist.c
@@ -0,0 +1,397 @@
+/*
+ * qdist.c - QEMU helpers for handling frequency distributions of data.
+ *
+ * Copyright (C) 2016, Emilio G. Cota 
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/qdist.h"
+
+#include 
+#ifndef NAN
+#define NAN (0.0 / 0.0)
+#endif
+
+#define QDIST_EMPTY_STR "(empty)"
+
+void qdist_init(struct qdist *dist)
+{
+    dist->entries = g_new(struct qdist_entry, 1);
+    dist->size = 1;
+    dist->n = 0;
+}
+
+void qdist_destroy(struct qdist *dist)
+{
+    g_free(dist->entries);
+}
+
+static inline int qdist_cmp_double(double a, double b)
+{
+    if (a > b) {
+        return 1;
+    } else if (a < b) {
+        return -1;
+    }
+    return 0;
+}
+
+static int qdist_cmp(const void *ap, const void *bp)
+{
+    const struct qdist_entry *a = ap;
+    const struct qdist_entry *b = bp;
+
+    return qdist_cmp_double(a->x, b->x);
+}
+
+void qdist_add(struct qdist *dist, double x, long count)
+{
+    struct qdist_entry *entry = NULL;
+
+    if (dist->n) {
+        struct qdist_entry e;
+
+        e.x = x;
+        entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
+    }
+
+    if (entry) {
+        entry->count += count;
+        return;
+    }
+
+    if (unlikely(dist->n == dist->size)) {
+        dist->size *= 2;
+        dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
+    }
+    dist->n++;
+    entry = &dist->entries[dist->n - 1];
+    entry->x = x;
+    entry->count = count;
+    qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
+}
+
+void qdist_inc(struct qdist *dist, double x)
+{
+    qdist_add(dist, x, 1);
+}
+
+/*
+ * Unicode for block elements. See:
+ *   https://en.wikipedia.org/wiki/Block_Elements
+ */
+static const gunichar qdist_blocks[] = {
+    0x2581,
+    0x2582,
+    0x2583,
+    0x2584,
+    0x2585,
+    0x2586,
+    0x2587,
+    0x2588
+};
+
+#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
+
+/*
+ * Print a distribution into a string.
+ *
+ * This function assumes that appropriate binning has been done on the input;
+ * see qdist_bin__internal() and qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+static char *qdist_pr_internal(const struct qdist *dist)
+{
+    double min, max;
+    GString *s = g_string_new("");
+    size_t i;
+
+    /* if only one entry, its printout will be either full or empty */
+    if (dist->n == 1) {
+        if (dist->entries[0].count) {
+            g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
+        } else {
+            g_string_append_c(s, ' ');
+        }
+        goto out;
+    }
+
+    /* get min and max counts */
+    min = dist->entries[0].count;
+    max = min;
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+
+        if (e->count < min) {
+            min = e->count;
+        }
+        if (e->count > max) {
+            max = e->count;
+        }
+    }
+
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+        int index;
+
+        /* make an exception with 0; instead of using block[0], print a space */
+        if (e->count) {
+            /* divide first to avoid loss of precision when e->count == max */
+            index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
+            g_string_append_unichar(s, qdist_blocks[index]);
+        } else {
+            g_string_append_c(s, ' ');
+        }
+    }
+ out:
+    return g_string_free(s, FALSE);
+}
+
+/*
+ * Bin the distribution in @from into @n bins of consecutive, non-overlapping
+ * intervals, copying the result to @to.
+ *
+ * This function is internal to qdist: only this file and test code should
+ * ever call it.
+ *
+ * Note: calling this function on an already-binned qdist is a bug.
+ *
+ * If @n == 0 or @from->n == 1, use @from->n.
+ */
+void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
+{
+    double xmin, xmax;
+    double step;
+    size_t i, j;
+
+    qdist_init(to);
+
+    if (from->n == 0) {
+        return;
+    }
+    if (n == 0 || from->n == 1) {
+        n = from->n;
+    }
+
+    /* set equally-sized bins between @from's left and right */
+    xmin = qdist_xmin(from);
+    xmax = qdist_xmax(from);
+    step = (xmax - xmin) / n;
+
+    if (n == from->n) {
+        /* if @from's entries are equally spaced, no need to re-bin */
+        for (i = 0; i < from->n; i++) {
+            if (from->entries[i].x != xmin + i * step) {
+                goto rebin;
+            }
+        }
+        /* they're equally spaced, so copy the dist and bail out */
+        to->entries = g_renew(struct qdist_entry, to->entries, n);
+        to->n = from->n;
+        memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
+        return;
+    }
+
+ rebin:
+    j = 0;
+    for (i = 0; i < n; i++) {
+        double x;
+        double left, right;
+
+        left = xmin + i * step;
+        right = xmin + (i + 1) * step;
+
+        /* Add x, even if it might not get any counts later */
+        x = left;
+        qdist_add(to, x, 0);
+
+        /*
+         * To avoid double-counting we capture [left, right) ranges, except for
+         * the righmost bin, which captures a [left, right] range.
+         */
+        while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
+            struct qdist_entry *o = &from->entries[j];
+
+            qdist_add(to, x, o->count);
+            j++;
+        }
+    }
+}
+
+/*
+ * Print @dist into a string, after re-binning it into @n bins of consecutive,
+ * non-overlapping intervals.
+ *
+ * If @n == 0, use @orig->n.
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr_plain(const struct qdist *dist, size_t n)
+{
+    struct qdist binned;
+    char *ret;
+
+    if (dist->n == 0) {
+        return g_strdup(QDIST_EMPTY_STR);
+    }
+    qdist_bin__internal(&binned, dist, n);
+    ret = qdist_pr_internal(&binned);
+    qdist_destroy(&binned);
+    return ret;
+}
+
+static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
+                            uint32_t opt, bool is_left)
+{
+    const char *percent;
+    const char *lparen;
+    const char *rparen;
+    GString *s;
+    double x1, x2, step;
+    double x;
+    double n;
+    int dec;
+
+    s = g_string_new("");
+    if (!(opt & QDIST_PR_LABELS)) {
+        goto out;
+    }
+
+    dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
+    percent = opt & QDIST_PR_PERCENT ? "%" : "";
+
+    n = n_bins ? n_bins : dist->n;
+    x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
+    step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
+
+    if (opt & QDIST_PR_100X) {
+        x *= 100.0;
+        step *= 100.0;
+    }
+    if (opt & QDIST_PR_NOBINRANGE) {
+        lparen = rparen = "";
+        x1 = x;
+        x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
+    } else {
+        lparen = "[";
+        rparen = is_left ? ")" : "]";
+        if (is_left) {
+            x1 = x;
+            x2 = x + step;
+        } else {
+            x1 = x - step;
+            x2 = x;
+        }
+    }
+    g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
+    if (!(opt & QDIST_PR_NOBINRANGE)) {
+        g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
+    }
+    g_string_append(s, percent);
+ out:
+    return g_string_free(s, FALSE);
+}
+
+/*
+ * Print the distribution's histogram into a string.
+ *
+ * See also: qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
+{
+    const char *border = opt & QDIST_PR_BORDER ? "|" : "";
+    char *llabel, *rlabel;
+    char *hgram;
+    GString *s;
+
+    if (dist->n == 0) {
+        return g_strdup(QDIST_EMPTY_STR);
+    }
+
+    s = g_string_new("");
+
+    llabel = qdist_pr_label(dist, n_bins, opt, true);
+    rlabel = qdist_pr_label(dist, n_bins, opt, false);
+    hgram = qdist_pr_plain(dist, n_bins);
+    g_string_append_printf(s, "%s%s%s%s%s",
+                           llabel, border, hgram, border, rlabel);
+    g_free(llabel);
+    g_free(rlabel);
+    g_free(hgram);
+
+    return g_string_free(s, FALSE);
+}
+
+static inline double qdist_x(const struct qdist *dist, int index)
+{
+    if (dist->n == 0) {
+        return NAN;
+    }
+    return dist->entries[index].x;
+}
+
+double qdist_xmin(const struct qdist *dist)
+{
+    return qdist_x(dist, 0);
+}
+
+double qdist_xmax(const struct qdist *dist)
+{
+    return qdist_x(dist, dist->n - 1);
+}
+
+size_t qdist_unique_entries(const struct qdist *dist)
+{
+    return dist->n;
+}
+
+unsigned long qdist_sample_count(const struct qdist *dist)
+{
+    unsigned long count = 0;
+    size_t i;
+
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+
+        count += e->count;
+    }
+    return count;
+}
+
+static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
+                                 size_t n, unsigned long count)
+{
+    /* amortize the recursion by using a base case > 2 */
+    if (n <= 8) {
+        size_t i;
+        double ret = 0;
+
+        for (i = 0; i < n; i++) {
+            struct qdist_entry *e = &dist->entries[index + i];
+
+            ret += e->x * e->count / count;
+        }
+        return ret;
+    } else {
+        size_t n2 = n / 2;
+
+        return qdist_pairwise_avg(dist, index, n2, count) +
+               qdist_pairwise_avg(dist, index + n2, n - n2, count);
+    }
+}
+
+double qdist_avg(const struct qdist *dist)
+{
+    unsigned long count;
+
+    count = qdist_sample_count(dist);
+    if (!count) {
+        return NAN;
+    }
+    return qdist_pairwise_avg(dist, 0, dist->n, count);
+}
diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c
new file mode 100644
index 000000000..1c83cd9d2
--- /dev/null
+++ b/util/qemu-co-shared-resource.c
@@ -0,0 +1,76 @@
+/*
+ * Helper functionality for distributing a fixed total amount of
+ * an abstract resource among multiple coroutines.
+ *
+ * Copyright (c) 2019 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "qemu/co-shared-resource.h"
+
+struct SharedResource {
+    uint64_t total;
+    uint64_t available;
+
+    CoQueue queue;
+};
+
+SharedResource *shres_create(uint64_t total)
+{
+    SharedResource *s = g_new0(SharedResource, 1);
+
+    s->total = s->available = total;
+    qemu_co_queue_init(&s->queue);
+
+    return s;
+}
+
+void shres_destroy(SharedResource *s)
+{
+    assert(s->available == s->total);
+    g_free(s);
+}
+
+bool co_try_get_from_shres(SharedResource *s, uint64_t n)
+{
+    if (s->available >= n) {
+        s->available -= n;
+        return true;
+    }
+
+    return false;
+}
+
+void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n)
+{
+    assert(n <= s->total);
+    while (!co_try_get_from_shres(s, n)) {
+        qemu_co_queue_wait(&s->queue, NULL);
+    }
+}
+
+void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n)
+{
+    assert(s->total - s->available >= n);
+    s->available += n;
+    qemu_co_queue_restart_all(&s->queue);
+}
diff --git a/util/qemu-config.c b/util/qemu-config.c
new file mode 100644
index 000000000..660f47b00
--- /dev/null
+++ b/util/qemu-config.c
@@ -0,0 +1,573 @@
+#include "qemu/osdep.h"
+#include "block/qdict.h" /* for qdict_extract_subqdict() */
+#include "qapi/error.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+
+static QemuOptsList *vm_config_groups[48];
+static QemuOptsList *drive_config_groups[5];
+
+static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
+                               Error **errp)
+{
+    int i;
+
+    for (i = 0; lists[i] != NULL; i++) {
+        if (strcmp(lists[i]->name, group) == 0)
+            break;
+    }
+    if (lists[i] == NULL) {
+        error_setg(errp, "There is no option group '%s'", group);
+    }
+    return lists[i];
+}
+
+QemuOptsList *qemu_find_opts(const char *group)
+{
+    QemuOptsList *ret;
+    Error *local_err = NULL;
+
+    ret = find_list(vm_config_groups, group, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    return ret;
+}
+
+QemuOpts *qemu_find_opts_singleton(const char *group)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+
+    list = qemu_find_opts(group);
+    assert(list);
+    opts = qemu_opts_find(list, NULL);
+    if (!opts) {
+        opts = qemu_opts_create(list, NULL, 0, &error_abort);
+    }
+    return opts;
+}
+
+static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc)
+{
+    CommandLineParameterInfoList *param_list = NULL, *entry;
+    CommandLineParameterInfo *info;
+    int i;
+
+    for (i = 0; desc[i].name != NULL; i++) {
+        info = g_malloc0(sizeof(*info));
+        info->name = g_strdup(desc[i].name);
+
+        switch (desc[i].type) {
+        case QEMU_OPT_STRING:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_STRING;
+            break;
+        case QEMU_OPT_BOOL:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN;
+            break;
+        case QEMU_OPT_NUMBER:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER;
+            break;
+        case QEMU_OPT_SIZE:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE;
+            break;
+        }
+
+        if (desc[i].help) {
+            info->has_help = true;
+            info->help = g_strdup(desc[i].help);
+        }
+        if (desc[i].def_value_str) {
+            info->has_q_default = true;
+            info->q_default = g_strdup(desc[i].def_value_str);
+        }
+
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = info;
+        entry->next = param_list;
+        param_list = entry;
+    }
+
+    return param_list;
+}
+
+/* remove repeated entry from the info list */
+static void cleanup_infolist(CommandLineParameterInfoList *head)
+{
+    CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
+
+    cur = head;
+    while (cur->next) {
+        pre_entry = head;
+        while (pre_entry != cur->next) {
+            if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
+                del_entry = cur->next;
+                cur->next = cur->next->next;
+                del_entry->next = NULL;
+                qapi_free_CommandLineParameterInfoList(del_entry);
+                break;
+            }
+            pre_entry = pre_entry->next;
+        }
+        cur = cur->next;
+    }
+}
+
+/* merge the description items of two parameter infolists */
+static void connect_infolist(CommandLineParameterInfoList *head,
+                             CommandLineParameterInfoList *new)
+{
+    CommandLineParameterInfoList *cur;
+
+    cur = head;
+    while (cur->next) {
+        cur = cur->next;
+    }
+    cur->next = new;
+}
+
+/* access all the local QemuOptsLists for drive option */
+static CommandLineParameterInfoList *get_drive_infolist(void)
+{
+    CommandLineParameterInfoList *head = NULL, *cur;
+    int i;
+
+    for (i = 0; drive_config_groups[i] != NULL; i++) {
+        if (!head) {
+            head = query_option_descs(drive_config_groups[i]->desc);
+        } else {
+            cur = query_option_descs(drive_config_groups[i]->desc);
+            connect_infolist(head, cur);
+        }
+    }
+    cleanup_infolist(head);
+
+    return head;
+}
+
+/* restore machine options that are now machine's properties */
+static QemuOptsList machine_opts = {
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head),
+    .desc = {
+        {
+            .name = "type",
+            .type = QEMU_OPT_STRING,
+            .help = "emulated machine"
+        },{
+            .name = "accel",
+            .type = QEMU_OPT_STRING,
+            .help = "accelerator list",
+        },{
+            .name = "kernel_irqchip",
+            .type = QEMU_OPT_BOOL,
+            .help = "use KVM in-kernel irqchip",
+        },{
+            .name = "kvm_shadow_mem",
+            .type = QEMU_OPT_SIZE,
+            .help = "KVM shadow MMU size",
+        },{
+            .name = "kernel",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel image file",
+        },{
+            .name = "initrd",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux initial ramdisk file",
+        },{
+            .name = "append",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel command line",
+        },{
+            .name = "dtb",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel device tree file",
+        },{
+            .name = "dumpdtb",
+            .type = QEMU_OPT_STRING,
+            .help = "Dump current dtb to a file and quit",
+        },{
+            .name = "phandle_start",
+            .type = QEMU_OPT_NUMBER,
+            .help = "The first phandle ID we may generate dynamically",
+        },{
+            .name = "dt_compatible",
+            .type = QEMU_OPT_STRING,
+            .help = "Overrides the \"compatible\" property of the dt root node",
+        },{
+            .name = "dump-guest-core",
+            .type = QEMU_OPT_BOOL,
+            .help = "Include guest memory in  a core dump",
+        },{
+            .name = "mem-merge",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable memory merge support",
+        },{
+            .name = "usb",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on/off to enable/disable usb",
+        },{
+            .name = "firmware",
+            .type = QEMU_OPT_STRING,
+            .help = "firmware image",
+        },{
+            .name = "iommu",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on/off to enable/disable Intel IOMMU (VT-d)",
+        },{
+            .name = "suppress-vmdesc",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on to disable self-describing migration",
+        },{
+            .name = "aes-key-wrap",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable AES key wrapping using the CPACF wrapping key",
+        },{
+            .name = "dea-key-wrap",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable DEA key wrapping using the CPACF wrapping key",
+        },{
+            .name = "loadparm",
+            .type = QEMU_OPT_STRING,
+            .help = "Up to 8 chars in set of [A-Za-z0-9. ](lower case chars"
+                    " converted to upper case) to pass to machine"
+                    " loader, boot manager, and guest kernel",
+        },
+        { /* End of list */ }
+    }
+};
+
+CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
+                                                          const char *option,
+                                                          Error **errp)
+{
+    CommandLineOptionInfoList *conf_list = NULL, *entry;
+    CommandLineOptionInfo *info;
+    int i;
+
+    for (i = 0; vm_config_groups[i] != NULL; i++) {
+        if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
+            info = g_malloc0(sizeof(*info));
+            info->option = g_strdup(vm_config_groups[i]->name);
+            if (!strcmp("drive", vm_config_groups[i]->name)) {
+                info->parameters = get_drive_infolist();
+            } else if (!strcmp("machine", vm_config_groups[i]->name)) {
+                info->parameters = query_option_descs(machine_opts.desc);
+            } else {
+                info->parameters =
+                    query_option_descs(vm_config_groups[i]->desc);
+            }
+            entry = g_malloc0(sizeof(*entry));
+            entry->value = info;
+            entry->next = conf_list;
+            conf_list = entry;
+        }
+    }
+
+    if (conf_list == NULL) {
+        error_setg(errp, "invalid option name: %s", option);
+    }
+
+    return conf_list;
+}
+
+QemuOptsList *qemu_find_opts_err(const char *group, Error **errp)
+{
+    return find_list(vm_config_groups, group, errp);
+}
+
+void qemu_add_drive_opts(QemuOptsList *list)
+{
+    int entries, i;
+
+    entries = ARRAY_SIZE(drive_config_groups);
+    entries--; /* keep list NULL terminated */
+    for (i = 0; i < entries; i++) {
+        if (drive_config_groups[i] == NULL) {
+            drive_config_groups[i] = list;
+            return;
+        }
+    }
+    fprintf(stderr, "ran out of space in drive_config_groups");
+    abort();
+}
+
+void qemu_add_opts(QemuOptsList *list)
+{
+    int entries, i;
+
+    entries = ARRAY_SIZE(vm_config_groups);
+    entries--; /* keep list NULL terminated */
+    for (i = 0; i < entries; i++) {
+        if (vm_config_groups[i] == NULL) {
+            vm_config_groups[i] = list;
+            return;
+        }
+    }
+    fprintf(stderr, "ran out of space in vm_config_groups");
+    abort();
+}
+
+int qemu_set_option(const char *str)
+{
+    Error *local_err = NULL;
+    char group[64], id[64], arg[64];
+    QemuOptsList *list;
+    QemuOpts *opts;
+    int rc, offset;
+
+    rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
+    if (rc < 3 || str[offset] != '=') {
+        error_report("can't parse: \"%s\"", str);
+        return -1;
+    }
+
+    list = qemu_find_opts(group);
+    if (list == NULL) {
+        return -1;
+    }
+
+    opts = qemu_opts_find(list, id);
+    if (!opts) {
+        error_report("there is no %s \"%s\" defined",
+                     list->name, id);
+        return -1;
+    }
+
+    if (!qemu_opt_set(opts, arg, str + offset + 1, &local_err)) {
+        error_report_err(local_err);
+        return -1;
+    }
+    return 0;
+}
+
+struct ConfigWriteData {
+    QemuOptsList *list;
+    FILE *fp;
+};
+
+static int config_write_opt(void *opaque, const char *name, const char *value,
+                            Error **errp)
+{
+    struct ConfigWriteData *data = opaque;
+
+    fprintf(data->fp, "  %s = \"%s\"\n", name, value);
+    return 0;
+}
+
+static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp)
+{
+    struct ConfigWriteData *data = opaque;
+    const char *id = qemu_opts_id(opts);
+
+    if (id) {
+        fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id);
+    } else {
+        fprintf(data->fp, "[%s]\n", data->list->name);
+    }
+    qemu_opt_foreach(opts, config_write_opt, data, NULL);
+    fprintf(data->fp, "\n");
+    return 0;
+}
+
+void qemu_config_write(FILE *fp)
+{
+    struct ConfigWriteData data = { .fp = fp };
+    QemuOptsList **lists = vm_config_groups;
+    int i;
+
+    fprintf(fp, "# qemu config file\n\n");
+    for (i = 0; lists[i] != NULL; i++) {
+        data.list = lists[i];
+        qemu_opts_foreach(data.list, config_write_opts, &data, NULL);
+    }
+}
+
+/* Returns number of config groups on success, -errno on error */
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
+{
+    char line[1024], group[64], id[64], arg[64], value[1024];
+    Location loc;
+    QemuOptsList *list = NULL;
+    Error *local_err = NULL;
+    QemuOpts *opts = NULL;
+    int res = -EINVAL, lno = 0;
+    int count = 0;
+
+    loc_push_none(&loc);
+    while (fgets(line, sizeof(line), fp) != NULL) {
+        loc_set_file(fname, ++lno);
+        if (line[0] == '\n') {
+            /* skip empty lines */
+            continue;
+        }
+        if (line[0] == '#') {
+            /* comment */
+            continue;
+        }
+        if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) {
+            /* group with id */
+            list = find_list(lists, group, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                goto out;
+            }
+            opts = qemu_opts_create(list, id, 1, NULL);
+            count++;
+            continue;
+        }
+        if (sscanf(line, "[%63[^]]]", group) == 1) {
+            /* group without id */
+            list = find_list(lists, group, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                goto out;
+            }
+            opts = qemu_opts_create(list, NULL, 0, &error_abort);
+            count++;
+            continue;
+        }
+        value[0] = '\0';
+        if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
+            sscanf(line, " %63s = \"\"", arg) == 1) {
+            /* arg = value */
+            if (opts == NULL) {
+                error_report("no group defined");
+                goto out;
+            }
+            if (!qemu_opt_set(opts, arg, value, &local_err)) {
+                error_report_err(local_err);
+                goto out;
+            }
+            continue;
+        }
+        error_report("parse error");
+        goto out;
+    }
+    if (ferror(fp)) {
+        error_report("error reading file");
+        goto out;
+    }
+    res = count;
+out:
+    loc_pop(&loc);
+    return res;
+}
+
+int qemu_read_config_file(const char *filename)
+{
+    FILE *f = fopen(filename, "r");
+    int ret;
+
+    if (f == NULL) {
+        return -errno;
+    }
+
+    ret = qemu_config_parse(f, vm_config_groups, filename);
+    fclose(f);
+    return ret;
+}
+
+static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
+                                       Error **errp)
+{
+    QemuOpts *subopts;
+    QDict *subqdict;
+    QList *list = NULL;
+    size_t orig_size, enum_size;
+    char *prefix;
+
+    prefix = g_strdup_printf("%s.", opts->name);
+    qdict_extract_subqdict(options, &subqdict, prefix);
+    g_free(prefix);
+    orig_size = qdict_size(subqdict);
+    if (!orig_size) {
+        goto out;
+    }
+
+    subopts = qemu_opts_create(opts, NULL, 0, errp);
+    if (!subopts) {
+        goto out;
+    }
+
+    if (!qemu_opts_absorb_qdict(subopts, subqdict, errp)) {
+        goto out;
+    }
+
+    enum_size = qdict_size(subqdict);
+    if (enum_size < orig_size && enum_size) {
+        error_setg(errp, "Unknown option '%s' for [%s]",
+                   qdict_first(subqdict)->key, opts->name);
+        goto out;
+    }
+
+    if (enum_size) {
+        /* Multiple, enumerated sections */
+        QListEntry *list_entry;
+        unsigned i = 0;
+
+        /* Not required anymore */
+        qemu_opts_del(subopts);
+
+        qdict_array_split(subqdict, &list);
+        if (qdict_size(subqdict)) {
+            error_setg(errp, "Unused option '%s' for [%s]",
+                       qdict_first(subqdict)->key, opts->name);
+            goto out;
+        }
+
+        QLIST_FOREACH_ENTRY(list, list_entry) {
+            QDict *section = qobject_to(QDict, qlist_entry_obj(list_entry));
+            char *opt_name;
+
+            if (!section) {
+                error_setg(errp, "[%s] section (index %u) does not consist of "
+                           "keys", opts->name, i);
+                goto out;
+            }
+
+            opt_name = g_strdup_printf("%s.%u", opts->name, i++);
+            subopts = qemu_opts_create(opts, opt_name, 1, errp);
+            g_free(opt_name);
+            if (!subopts) {
+                goto out;
+            }
+
+            if (!qemu_opts_absorb_qdict(subopts, section, errp)) {
+                qemu_opts_del(subopts);
+                goto out;
+            }
+
+            if (qdict_size(section)) {
+                error_setg(errp, "[%s] section doesn't support the option '%s'",
+                           opts->name, qdict_first(section)->key);
+                qemu_opts_del(subopts);
+                goto out;
+            }
+        }
+    }
+
+out:
+    qobject_unref(subqdict);
+    qobject_unref(list);
+}
+
+void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+                             Error **errp)
+{
+    int i;
+    Error *local_err = NULL;
+
+    for (i = 0; lists[i]; i++) {
+        config_parse_qdict_section(options, lists[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
new file mode 100644
index 000000000..5b80bb416
--- /dev/null
+++ b/util/qemu-coroutine-io.c
@@ -0,0 +1,93 @@
+/*
+ * Coroutine-aware I/O functions
+ *
+ * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
+ * Copyright (c) 2011, Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                    size_t offset, size_t bytes, bool do_send)
+{
+    size_t done = 0;
+    ssize_t ret;
+    while (done < bytes) {
+        ret = iov_send_recv(sockfd, iov, iov_cnt,
+                            offset + done, bytes - done, do_send);
+        if (ret > 0) {
+            done += ret;
+        } else if (ret < 0) {
+            if (errno == EAGAIN || errno == EWOULDBLOCK) {
+                qemu_coroutine_yield();
+            } else if (done == 0) {
+                return -errno;
+            } else {
+                break;
+            }
+        } else if (ret == 0 && !do_send) {
+            /* write (send) should never return 0.
+             * read (recv) returns 0 for end-of-file (-data).
+             * In both cases there's little point retrying,
+             * but we do for write anyway, just in case */
+            break;
+        }
+    }
+    return done;
+}
+
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
+{
+    struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+    return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
+}
+
+typedef struct {
+    AioContext *ctx;
+    Coroutine *co;
+    int fd;
+} FDYieldUntilData;
+
+static void fd_coroutine_enter(void *opaque)
+{
+    FDYieldUntilData *data = opaque;
+    aio_set_fd_handler(data->ctx, data->fd, false, NULL, NULL, NULL, NULL);
+    qemu_coroutine_enter(data->co);
+}
+
+void coroutine_fn yield_until_fd_readable(int fd)
+{
+    FDYieldUntilData data;
+
+    assert(qemu_in_coroutine());
+    data.ctx = qemu_get_current_aio_context();
+    data.co = qemu_coroutine_self();
+    data.fd = fd;
+    aio_set_fd_handler(
+        data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, &data);
+    qemu_coroutine_yield();
+}
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
new file mode 100644
index 000000000..5816bf890
--- /dev/null
+++ b/util/qemu-coroutine-lock.c
@@ -0,0 +1,424 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * The lock-free mutex implementation is based on OSv
+ * (core/lfmutex.cc, include/lockfree/mutex.hh).
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/processor.h"
+#include "qemu/queue.h"
+#include "block/aio.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+    QSIMPLEQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+    QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+
+    if (lock) {
+        qemu_lockable_unlock(lock);
+    }
+
+    /* There is no race condition here.  Other threads will call
+     * aio_co_schedule on our AioContext, which can reenter this
+     * coroutine but only after this yield and after the main loop
+     * has gone through the next iteration.
+     */
+    qemu_coroutine_yield();
+    assert(qemu_in_coroutine());
+
+    /* TODO: OSv implements wait morphing here, where the wakeup
+     * primitive automatically places the woken coroutine on the
+     * mutex's queue.  This avoids the thundering herd effect.
+     * This could be implemented for CoMutexes, but not really for
+     * other cases of QemuLockable.
+     */
+    if (lock) {
+        qemu_lockable_lock(lock);
+    }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+    Coroutine *next;
+
+    if (QSIMPLEQ_EMPTY(&queue->entries)) {
+        return false;
+    }
+
+    while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
+        QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
+        aio_co_wake(next);
+        if (single) {
+            break;
+        }
+    }
+    return true;
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+    return qemu_co_queue_do_restart(queue, true);
+}
+
+void qemu_co_queue_restart_all(CoQueue *queue)
+{
+    qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock)
+{
+    Coroutine *next;
+
+    next = QSIMPLEQ_FIRST(&queue->entries);
+    if (!next) {
+        return false;
+    }
+
+    QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
+    if (lock) {
+        qemu_lockable_unlock(lock);
+    }
+    aio_co_wake(next);
+    if (lock) {
+        qemu_lockable_lock(lock);
+    }
+    return true;
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+    return QSIMPLEQ_FIRST(&queue->entries) == NULL;
+}
+
+/* The wait records are handled with a multiple-producer, single-consumer
+ * lock-free queue.  There cannot be two concurrent pop_waiter() calls
+ * because pop_waiter() can only be called while mutex->handoff is zero.
+ * This can happen in three cases:
+ * - in qemu_co_mutex_unlock, before the hand-off protocol has started.
+ *   In this case, qemu_co_mutex_lock will see mutex->handoff == 0 and
+ *   not take part in the handoff.
+ * - in qemu_co_mutex_lock, if it steals the hand-off responsibility from
+ *   qemu_co_mutex_unlock.  In this case, qemu_co_mutex_unlock will fail
+ *   the cmpxchg (it will see either 0 or the next sequence value) and
+ *   exit.  The next hand-off cannot begin until qemu_co_mutex_lock has
+ *   woken up someone.
+ * - in qemu_co_mutex_unlock, if it takes the hand-off token itself.
+ *   In this case another iteration starts with mutex->handoff == 0;
+ *   a concurrent qemu_co_mutex_lock will fail the cmpxchg, and
+ *   qemu_co_mutex_unlock will go back to case (1).
+ *
+ * The following functions manage this queue.
+ */
+typedef struct CoWaitRecord {
+    Coroutine *co;
+    QSLIST_ENTRY(CoWaitRecord) next;
+} CoWaitRecord;
+
+static void push_waiter(CoMutex *mutex, CoWaitRecord *w)
+{
+    w->co = qemu_coroutine_self();
+    QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next);
+}
+
+static void move_waiters(CoMutex *mutex)
+{
+    QSLIST_HEAD(, CoWaitRecord) reversed;
+    QSLIST_MOVE_ATOMIC(&reversed, &mutex->from_push);
+    while (!QSLIST_EMPTY(&reversed)) {
+        CoWaitRecord *w = QSLIST_FIRST(&reversed);
+        QSLIST_REMOVE_HEAD(&reversed, next);
+        QSLIST_INSERT_HEAD(&mutex->to_pop, w, next);
+    }
+}
+
+static CoWaitRecord *pop_waiter(CoMutex *mutex)
+{
+    CoWaitRecord *w;
+
+    if (QSLIST_EMPTY(&mutex->to_pop)) {
+        move_waiters(mutex);
+        if (QSLIST_EMPTY(&mutex->to_pop)) {
+            return NULL;
+        }
+    }
+    w = QSLIST_FIRST(&mutex->to_pop);
+    QSLIST_REMOVE_HEAD(&mutex->to_pop, next);
+    return w;
+}
+
+static bool has_waiters(CoMutex *mutex)
+{
+    return QSLIST_EMPTY(&mutex->to_pop) || QSLIST_EMPTY(&mutex->from_push);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+    memset(mutex, 0, sizeof(*mutex));
+}
+
+static void coroutine_fn qemu_co_mutex_wake(CoMutex *mutex, Coroutine *co)
+{
+    /* Read co before co->ctx; pairs with smp_wmb() in
+     * qemu_coroutine_enter().
+     */
+    smp_read_barrier_depends();
+    mutex->ctx = co->ctx;
+    aio_co_wake(co);
+}
+
+static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
+                                                     CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+    CoWaitRecord w;
+    unsigned old_handoff;
+
+    trace_qemu_co_mutex_lock_entry(mutex, self);
+    w.co = self;
+    push_waiter(mutex, &w);
+
+    /* This is the "Responsibility Hand-Off" protocol; a lock() picks from
+     * a concurrent unlock() the responsibility of waking somebody up.
+     */
+    old_handoff = qatomic_mb_read(&mutex->handoff);
+    if (old_handoff &&
+        has_waiters(mutex) &&
+        qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
+        /* There can be no concurrent pops, because there can be only
+         * one active handoff at a time.
+         */
+        CoWaitRecord *to_wake = pop_waiter(mutex);
+        Coroutine *co = to_wake->co;
+        if (co == self) {
+            /* We got the lock ourselves!  */
+            assert(to_wake == &w);
+            mutex->ctx = ctx;
+            return;
+        }
+
+        qemu_co_mutex_wake(mutex, co);
+    }
+
+    qemu_coroutine_yield();
+    trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+    AioContext *ctx = qemu_get_current_aio_context();
+    Coroutine *self = qemu_coroutine_self();
+    int waiters, i;
+
+    /* Running a very small critical section on pthread_mutex_t and CoMutex
+     * shows that pthread_mutex_t is much faster because it doesn't actually
+     * go to sleep.  What happens is that the critical section is shorter
+     * than the latency of entering the kernel and thus FUTEX_WAIT always
+     * fails.  With CoMutex there is no such latency but you still want to
+     * avoid wait and wakeup.  So introduce it artificially.
+     */
+    i = 0;
+retry_fast_path:
+    waiters = qatomic_cmpxchg(&mutex->locked, 0, 1);
+    if (waiters != 0) {
+        while (waiters == 1 && ++i < 1000) {
+            if (qatomic_read(&mutex->ctx) == ctx) {
+                break;
+            }
+            if (qatomic_read(&mutex->locked) == 0) {
+                goto retry_fast_path;
+            }
+            cpu_relax();
+        }
+        waiters = qatomic_fetch_inc(&mutex->locked);
+    }
+
+    if (waiters == 0) {
+        /* Uncontended.  */
+        trace_qemu_co_mutex_lock_uncontended(mutex, self);
+        mutex->ctx = ctx;
+    } else {
+        qemu_co_mutex_lock_slowpath(ctx, mutex);
+    }
+    mutex->holder = self;
+    self->locks_held++;
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+    assert(mutex->locked);
+    assert(mutex->holder == self);
+    assert(qemu_in_coroutine());
+
+    mutex->ctx = NULL;
+    mutex->holder = NULL;
+    self->locks_held--;
+    if (qatomic_fetch_dec(&mutex->locked) == 1) {
+        /* No waiting qemu_co_mutex_lock().  Pfew, that was easy!  */
+        return;
+    }
+
+    for (;;) {
+        CoWaitRecord *to_wake = pop_waiter(mutex);
+        unsigned our_handoff;
+
+        if (to_wake) {
+            qemu_co_mutex_wake(mutex, to_wake->co);
+            break;
+        }
+
+        /* Some concurrent lock() is in progress (we know this because
+         * mutex->locked was >1) but it hasn't yet put itself on the wait
+         * queue.  Pick a sequence number for the handoff protocol (not 0).
+         */
+        if (++mutex->sequence == 0) {
+            mutex->sequence = 1;
+        }
+
+        our_handoff = mutex->sequence;
+        qatomic_mb_set(&mutex->handoff, our_handoff);
+        if (!has_waiters(mutex)) {
+            /* The concurrent lock has not added itself yet, so it
+             * will be able to pick our handoff.
+             */
+            break;
+        }
+
+        /* Try to do the handoff protocol ourselves; if somebody else has
+         * already taken it, however, we're done and they're responsible.
+         */
+        if (qatomic_cmpxchg(&mutex->handoff, our_handoff, 0) != our_handoff) {
+            break;
+        }
+    }
+
+    trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+    memset(lock, 0, sizeof(*lock));
+    qemu_co_queue_init(&lock->queue);
+    qemu_co_mutex_init(&lock->mutex);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    qemu_co_mutex_lock(&lock->mutex);
+    /* For fairness, wait if a writer is in line.  */
+    while (lock->pending_writer) {
+        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    }
+    lock->reader++;
+    qemu_co_mutex_unlock(&lock->mutex);
+
+    /* The rest of the read-side critical section is run without the mutex.  */
+    self->locks_held++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    assert(qemu_in_coroutine());
+    if (!lock->reader) {
+        /* The critical section started in qemu_co_rwlock_wrlock.  */
+        qemu_co_queue_restart_all(&lock->queue);
+    } else {
+        self->locks_held--;
+
+        qemu_co_mutex_lock(&lock->mutex);
+        lock->reader--;
+        assert(lock->reader >= 0);
+        /* Wakeup only one waiting writer */
+        if (!lock->reader) {
+            qemu_co_queue_next(&lock->queue);
+        }
+    }
+    qemu_co_mutex_unlock(&lock->mutex);
+}
+
+void qemu_co_rwlock_downgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
+     * qemu_co_rwlock_upgrade.
+     */
+    assert(lock->reader == 0);
+    lock->reader++;
+    qemu_co_mutex_unlock(&lock->mutex);
+
+    /* The rest of the read-side critical section is run without the mutex.  */
+    self->locks_held++;
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+    qemu_co_mutex_lock(&lock->mutex);
+    lock->pending_writer++;
+    while (lock->reader) {
+        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    }
+    lock->pending_writer--;
+
+    /* The rest of the write-side critical section is run with
+     * the mutex taken, so that lock->reader remains zero.
+     * There is no need to update self->locks_held.
+     */
+}
+
+void qemu_co_rwlock_upgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    qemu_co_mutex_lock(&lock->mutex);
+    assert(lock->reader > 0);
+    lock->reader--;
+    lock->pending_writer++;
+    while (lock->reader) {
+        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    }
+    lock->pending_writer--;
+
+    /* The rest of the write-side critical section is run with
+     * the mutex taken, similar to qemu_co_rwlock_wrlock.  Do
+     * not account for the lock twice in self->locks_held.
+     */
+    self->locks_held--;
+}
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
new file mode 100644
index 000000000..8c4dac4fd
--- /dev/null
+++ b/util/qemu-coroutine-sleep.c
@@ -0,0 +1,79 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
+
+struct QemuCoSleepState {
+    Coroutine *co;
+    QEMUTimer *ts;
+    QemuCoSleepState **user_state_pointer;
+};
+
+void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
+{
+    /* Write of schedule protected by barrier write in aio_co_schedule */
+    const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
+                                           qemu_co_sleep_ns__scheduled, NULL);
+
+    assert(scheduled == qemu_co_sleep_ns__scheduled);
+    if (sleep_state->user_state_pointer) {
+        *sleep_state->user_state_pointer = NULL;
+    }
+    timer_del(sleep_state->ts);
+    aio_co_wake(sleep_state->co);
+}
+
+static void co_sleep_cb(void *opaque)
+{
+    qemu_co_sleep_wake(opaque);
+}
+
+void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
+                                            QemuCoSleepState **sleep_state)
+{
+    AioContext *ctx = qemu_get_current_aio_context();
+    QemuCoSleepState state = {
+        .co = qemu_coroutine_self(),
+        .ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &state),
+        .user_state_pointer = sleep_state,
+    };
+
+    const char *scheduled = qatomic_cmpxchg(&state.co->scheduled, NULL,
+                                           qemu_co_sleep_ns__scheduled);
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+
+    if (sleep_state) {
+        *sleep_state = &state;
+    }
+    timer_mod(state.ts, qemu_clock_get_ns(type) + ns);
+    qemu_coroutine_yield();
+    if (sleep_state) {
+        /*
+         * Note that *sleep_state is cleared during qemu_co_sleep_wake
+         * before resuming this coroutine.
+         */
+        assert(*sleep_state == NULL);
+    }
+    timer_free(state.ts);
+}
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
new file mode 100644
index 000000000..38fb6d308
--- /dev/null
+++ b/util/qemu-coroutine.c
@@ -0,0 +1,204 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    
+ *  Kevin Wolf         
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "block/aio.h"
+
+enum {
+    POOL_BATCH_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+        qemu_coroutine_delete(co);
+    }
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
+{
+    Coroutine *co = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        co = QSLIST_FIRST(&alloc_pool);
+        if (!co) {
+            if (release_pool_size > POOL_BATCH_SIZE) {
+                /* Slow path; a good place to register the destructor, too.  */
+                if (!coroutine_pool_cleanup_notifier.notify) {
+                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                }
+
+                /* This is not exact; there could be a little skew between
+                 * release_pool_size and the actual size of release_pool.  But
+                 * it is just a heuristic, it does not need to be perfect.
+                 */
+                alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
+                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+                co = QSLIST_FIRST(&alloc_pool);
+            }
+        }
+        if (co) {
+            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+            alloc_pool_size--;
+        }
+    }
+
+    if (!co) {
+        co = qemu_coroutine_new();
+    }
+
+    co->entry = entry;
+    co->entry_arg = opaque;
+    QSIMPLEQ_INIT(&co->co_queue_wakeup);
+    return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+    co->caller = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+            qatomic_inc(&release_pool_size);
+            return;
+        }
+        if (alloc_pool_size < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+            alloc_pool_size++;
+            return;
+        }
+    }
+
+    qemu_coroutine_delete(co);
+}
+
+void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
+{
+    QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
+    Coroutine *from = qemu_coroutine_self();
+
+    QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
+
+    /* Run co and any queued coroutines */
+    while (!QSIMPLEQ_EMPTY(&pending)) {
+        Coroutine *to = QSIMPLEQ_FIRST(&pending);
+        CoroutineAction ret;
+
+        /* Cannot rely on the read barrier for to in aio_co_wake(), as there are
+         * callers outside of aio_co_wake() */
+        const char *scheduled = qatomic_mb_read(&to->scheduled);
+
+        QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
+
+        trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
+
+        /* if the Coroutine has already been scheduled, entering it again will
+         * cause us to enter it twice, potentially even after the coroutine has
+         * been deleted */
+        if (scheduled) {
+            fprintf(stderr,
+                    "%s: Co-routine was already scheduled in '%s'\n",
+                    __func__, scheduled);
+            abort();
+        }
+
+        if (to->caller) {
+            fprintf(stderr, "Co-routine re-entered recursively\n");
+            abort();
+        }
+
+        to->caller = from;
+        to->ctx = ctx;
+
+        /* Store to->ctx before anything that stores to.  Matches
+         * barrier in aio_co_wake and qemu_co_mutex_wake.
+         */
+        smp_wmb();
+
+        ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
+
+        /* Queued coroutines are run depth-first; previously pending coroutines
+         * run after those queued more recently.
+         */
+        QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
+
+        switch (ret) {
+        case COROUTINE_YIELD:
+            break;
+        case COROUTINE_TERMINATE:
+            assert(!to->locks_held);
+            trace_qemu_coroutine_terminate(to);
+            coroutine_delete(to);
+            break;
+        default:
+            abort();
+        }
+    }
+}
+
+void qemu_coroutine_enter(Coroutine *co)
+{
+    qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
+}
+
+void qemu_coroutine_enter_if_inactive(Coroutine *co)
+{
+    if (!qemu_coroutine_entered(co)) {
+        qemu_coroutine_enter(co);
+    }
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *to = self->caller;
+
+    trace_qemu_coroutine_yield(self, to);
+
+    if (!to) {
+        fprintf(stderr, "Co-routine is yielding to no one\n");
+        abort();
+    }
+
+    self->caller = NULL;
+    qemu_coroutine_switch(self, to, COROUTINE_YIELD);
+}
+
+bool qemu_coroutine_entered(Coroutine *co)
+{
+    return co->caller;
+}
+
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
+{
+    return co->ctx;
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
new file mode 100644
index 000000000..aa30f0356
--- /dev/null
+++ b/util/qemu-error.c
@@ -0,0 +1,413 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu/error-report.h"
+
+/*
+ * @report_type is the type of message: error, warning or
+ * informational.
+ */
+typedef enum {
+    REPORT_TYPE_ERROR,
+    REPORT_TYPE_WARNING,
+    REPORT_TYPE_INFO,
+} report_type;
+
+/* Prepend timestamp to messages */
+bool error_with_timestamp;
+bool error_with_guestname;
+const char *error_guest_name;
+
+int error_printf(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = error_vprintf(fmt, ap);
+    va_end(ap);
+    return ret;
+}
+
+int error_printf_unless_qmp(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = error_vprintf_unless_qmp(fmt, ap);
+    va_end(ap);
+    return ret;
+}
+
+static Location std_loc = {
+    .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+    assert(!loc->prev);
+    loc->prev = cur_loc;
+    cur_loc = loc;
+    return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+    loc->kind = LOC_NONE;
+    loc->prev = NULL;
+    return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+    assert(cur_loc == loc && loc->prev);
+    cur_loc = loc->prev;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+    *loc = *cur_loc;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+    Location *prev = cur_loc->prev;
+    assert(!loc->prev);
+    *cur_loc = *loc;
+    cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+    cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+    cur_loc->kind = LOC_CMDLINE;
+    cur_loc->num = cnt;
+    cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+    assert (fname || cur_loc->kind == LOC_FILE);
+    cur_loc->kind = LOC_FILE;
+    cur_loc->num = lno;
+    if (fname) {
+        cur_loc->ptr = fname;
+    }
+}
+
+static const char *progname;
+
+/*
+ * Set the program name for error_print_loc().
+ */
+static void error_set_progname(const char *argv0)
+{
+    const char *p = strrchr(argv0, '/');
+    progname = p ? p + 1 : argv0;
+}
+
+const char *error_get_progname(void)
+{
+    return progname;
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+static void print_loc(void)
+{
+    const char *sep = "";
+    int i;
+    const char *const *argp;
+
+    if (!monitor_cur() && progname) {
+        fprintf(stderr, "%s:", progname);
+        sep = " ";
+    }
+    switch (cur_loc->kind) {
+    case LOC_CMDLINE:
+        argp = cur_loc->ptr;
+        for (i = 0; i < cur_loc->num; i++) {
+            error_printf("%s%s", sep, argp[i]);
+            sep = " ";
+        }
+        error_printf(": ");
+        break;
+    case LOC_FILE:
+        error_printf("%s:", (const char *)cur_loc->ptr);
+        if (cur_loc->num) {
+            error_printf("%d:", cur_loc->num);
+        }
+        error_printf(" ");
+        break;
+    default:
+        error_printf("%s", sep);
+    }
+}
+
+/*
+ * Print a message to current monitor if we have one, else to stderr.
+ * @report_type is the type of message: error, warning or informational.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+static void vreport(report_type type, const char *fmt, va_list ap)
+{
+    GTimeVal tv;
+    gchar *timestr;
+
+    if (error_with_timestamp && !monitor_cur()) {
+        g_get_current_time(&tv);
+        timestr = g_time_val_to_iso8601(&tv);
+        error_printf("%s ", timestr);
+        g_free(timestr);
+    }
+
+    /* Only prepend guest name if -msg guest-name and -name guest=... are set */
+    if (error_with_guestname && error_guest_name && !monitor_cur()) {
+        error_printf("%s ", error_guest_name);
+    }
+
+    print_loc();
+
+    switch (type) {
+    case REPORT_TYPE_ERROR:
+        break;
+    case REPORT_TYPE_WARNING:
+        error_printf("warning: ");
+        break;
+    case REPORT_TYPE_INFO:
+        error_printf("info: ");
+        break;
+    }
+
+    error_vprintf(fmt, ap);
+    error_printf("\n");
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+}
+
+/*
+ * Print a warning message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void warn_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+}
+
+/*
+ * Print an information message to current monitor if we have one, else to
+ * stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void info_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_INFO, fmt, ap);
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Print a warning message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf(). The resulting message should be a
+ * single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void warn_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Print an information message to current monitor if we have one, else to
+ * stderr.
+ * Format arguments like sprintf(). The resulting message should be a
+ * single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void info_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_INFO, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Like error_report(), except print just once.
+ * If *printed is false, print the message, and flip *printed to true.
+ * Return whether the message was printed.
+ */
+bool error_report_once_cond(bool *printed, const char *fmt, ...)
+{
+    va_list ap;
+
+    assert(printed);
+    if (*printed) {
+        return false;
+    }
+    *printed = true;
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+    va_end(ap);
+    return true;
+}
+
+/*
+ * Like warn_report(), except print just once.
+ * If *printed is false, print the message, and flip *printed to true.
+ * Return whether the message was printed.
+ */
+bool warn_report_once_cond(bool *printed, const char *fmt, ...)
+{
+    va_list ap;
+
+    assert(printed);
+    if (*printed) {
+        return false;
+    }
+    *printed = true;
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+    va_end(ap);
+    return true;
+}
+
+static char *qemu_glog_domains;
+
+static void qemu_log_func(const gchar *log_domain,
+                          GLogLevelFlags log_level,
+                          const gchar *message,
+                          gpointer user_data)
+{
+    switch (log_level & G_LOG_LEVEL_MASK) {
+    case G_LOG_LEVEL_DEBUG:
+    case G_LOG_LEVEL_INFO:
+        /*
+         * Use same G_MESSAGES_DEBUG logic as glib to enable/disable debug
+         * messages
+         */
+        if (qemu_glog_domains == NULL) {
+            break;
+        }
+        if (strcmp(qemu_glog_domains, "all") != 0 &&
+          (log_domain == NULL || !strstr(qemu_glog_domains, log_domain))) {
+            break;
+        }
+        /* Fall through */
+    case G_LOG_LEVEL_MESSAGE:
+        info_report("%s%s%s",
+                    log_domain ?: "", log_domain ? ": " : "", message);
+
+        break;
+    case G_LOG_LEVEL_WARNING:
+        warn_report("%s%s%s",
+                    log_domain ?: "", log_domain ? ": " : "", message);
+        break;
+    case G_LOG_LEVEL_CRITICAL:
+    case G_LOG_LEVEL_ERROR:
+        error_report("%s%s%s",
+                     log_domain ?: "", log_domain ? ": " : "", message);
+        break;
+    }
+}
+
+void error_init(const char *argv0)
+{
+    /* Set the program name for error_print_loc(). */
+    error_set_progname(argv0);
+
+    /*
+     * This sets up glib logging so libraries using it also print their logs
+     * through error_report(), warn_report(), info_report().
+     */
+    g_log_set_default_handler(qemu_log_func, NULL);
+    g_warn_if_fail(qemu_glog_domains == NULL);
+    qemu_glog_domains = g_strdup(g_getenv("G_MESSAGES_DEBUG"));
+}
diff --git a/util/qemu-openpty.c b/util/qemu-openpty.c
new file mode 100644
index 000000000..eb17f5b0b
--- /dev/null
+++ b/util/qemu-openpty.c
@@ -0,0 +1,140 @@
+/*
+ * qemu-openpty.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Wrapper function qemu_openpty() implementation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This is not part of oslib-posix.c because this function
+ * uses openpty() which often in -lutil, and if we add this
+ * dependency to oslib-posix.o, every app will have to be
+ * linked with -lutil.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#if defined HAVE_PTY_H
+# include 
+#elif defined CONFIG_BSD
+# include 
+# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#  include 
+# else
+#  include 
+# endif
+#elif defined CONFIG_SOLARIS
+# include 
+# include 
+#else
+# include 
+#endif
+
+#ifdef __sun__
+
+#if !defined(HAVE_OPENPTY)
+/* Once illumos has openpty(), this is going to be removed. */
+static int openpty(int *amaster, int *aslave, char *name,
+                   struct termios *termp, struct winsize *winp)
+{
+        const char *slave;
+        int mfd = -1, sfd = -1;
+
+        *amaster = *aslave = -1;
+
+        mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
+        if (mfd < 0)
+                goto err;
+
+        if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
+                goto err;
+
+        if ((slave = ptsname(mfd)) == NULL)
+                goto err;
+
+        if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1)
+                goto err;
+
+        if (ioctl(sfd, I_PUSH, "ptem") == -1 ||
+            (termp != NULL && tcgetattr(sfd, termp) < 0))
+                goto err;
+
+        if (amaster)
+                *amaster = mfd;
+        if (aslave)
+                *aslave = sfd;
+        if (winp)
+                ioctl(sfd, TIOCSWINSZ, winp);
+
+        return 0;
+
+err:
+        if (sfd != -1)
+                close(sfd);
+        close(mfd);
+        return -1;
+}
+#endif
+
+static void cfmakeraw (struct termios *termios_p)
+{
+        termios_p->c_iflag &=
+                ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
+        termios_p->c_oflag &= ~OPOST;
+        termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
+        termios_p->c_cflag &= ~(CSIZE|PARENB);
+        termios_p->c_cflag |= CS8;
+
+        termios_p->c_cc[VMIN] = 0;
+        termios_p->c_cc[VTIME] = 0;
+}
+#endif
+
+int qemu_openpty_raw(int *aslave, char *pty_name)
+{
+    int amaster;
+    struct termios tty;
+#if defined(__OpenBSD__) || defined(__DragonFly__)
+    char pty_buf[PATH_MAX];
+#define q_ptsname(x) pty_buf
+#else
+    char *pty_buf = NULL;
+#define q_ptsname(x) ptsname(x)
+#endif
+
+    if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) {
+        return -1;
+    }
+
+    /* Set raw attributes on the pty. */
+    tcgetattr(*aslave, &tty);
+    cfmakeraw(&tty);
+    tcsetattr(*aslave, TCSAFLUSH, &tty);
+
+    if (pty_name) {
+        strcpy(pty_name, q_ptsname(amaster));
+    }
+
+    return amaster;
+}
diff --git a/util/qemu-option.c b/util/qemu-option.c
new file mode 100644
index 000000000..acefbc23f
--- /dev/null
+++ b/util/qemu-option.c
@@ -0,0 +1,1247 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/option_int.h"
+#include "qemu/cutils.h"
+#include "qemu/id.h"
+#include "qemu/help_option.h"
+
+/*
+ * Extracts the name of an option from the parameter string (p points at the
+ * first byte of the option name)
+ *
+ * The option name is delimited by delim (usually , or =) or the string end
+ * and is copied into option. The caller is responsible for free'ing option
+ * when no longer required.
+ *
+ * The return value is the position of the delimiter/zero byte after the option
+ * name in p.
+ */
+static const char *get_opt_name(const char *p, char **option, char delim)
+{
+    char *offset = strchr(p, delim);
+
+    if (offset) {
+        *option = g_strndup(p, offset - p);
+        return offset;
+    } else {
+        *option = g_strdup(p);
+        return p + strlen(p);
+    }
+}
+
+/*
+ * Extracts the value of an option from the parameter string p (p points at the
+ * first byte of the option value)
+ *
+ * This function is comparable to get_opt_name with the difference that the
+ * delimiter is fixed to be comma which starts a new option. To specify an
+ * option value that contains commas, double each comma.
+ */
+const char *get_opt_value(const char *p, char **value)
+{
+    size_t capacity = 0, length;
+    const char *offset;
+
+    *value = NULL;
+    while (1) {
+        offset = qemu_strchrnul(p, ',');
+        length = offset - p;
+        if (*offset != '\0' && *(offset + 1) == ',') {
+            length++;
+        }
+        *value = g_renew(char, *value, capacity + length + 1);
+        strncpy(*value + capacity, p, length);
+        (*value)[capacity + length] = '\0';
+        capacity += length;
+        if (*offset == '\0' ||
+            *(offset + 1) != ',') {
+            break;
+        }
+
+        p += (offset - p) + 2;
+    }
+
+    return offset;
+}
+
+static bool parse_option_number(const char *name, const char *value,
+                                uint64_t *ret, Error **errp)
+{
+    uint64_t number;
+    int err;
+
+    err = qemu_strtou64(value, NULL, 0, &number);
+    if (err == -ERANGE) {
+        error_setg(errp, "Value '%s' is too large for parameter '%s'",
+                   value, name);
+        return false;
+    }
+    if (err) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+        return false;
+    }
+    *ret = number;
+    return true;
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+                                            const char *name)
+{
+    int i;
+
+    for (i = 0; desc[i].name != NULL; i++) {
+        if (strcmp(desc[i].name, name) == 0) {
+            return &desc[i];
+        }
+    }
+
+    return NULL;
+}
+
+static const char *find_default_by_name(QemuOpts *opts, const char *name)
+{
+    const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+
+    return desc ? desc->def_value_str : NULL;
+}
+
+bool parse_option_size(const char *name, const char *value,
+                       uint64_t *ret, Error **errp)
+{
+    uint64_t size;
+    int err;
+
+    err = qemu_strtosz(value, NULL, &size);
+    if (err == -ERANGE) {
+        error_setg(errp, "Value '%s' is out of range for parameter '%s'",
+                   value, name);
+        return false;
+    }
+    if (err) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+                   "a non-negative number below 2^64");
+        error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
+                          " kilo-, mega-, giga-, tera-, peta-\n"
+                          "and exabytes, respectively.\n");
+        return false;
+    }
+    *ret = size;
+    return true;
+}
+
+static const char *opt_type_to_string(enum QemuOptType type)
+{
+    switch (type) {
+    case QEMU_OPT_STRING:
+        return "str";
+    case QEMU_OPT_BOOL:
+        return "bool (on/off)";
+    case QEMU_OPT_NUMBER:
+        return "num";
+    case QEMU_OPT_SIZE:
+        return "size";
+    }
+
+    g_assert_not_reached();
+}
+
+/**
+ * Print the list of options available in the given list.  If
+ * @print_caption is true, a caption (including the list name, if it
+ * exists) is printed.  The options itself will be indented, so
+ * @print_caption should only be set to false if the caller prints its
+ * own custom caption (so that the indentation makes sense).
+ */
+void qemu_opts_print_help(QemuOptsList *list, bool print_caption)
+{
+    QemuOptDesc *desc;
+    int i;
+    GPtrArray *array = g_ptr_array_new();
+
+    assert(list);
+    desc = list->desc;
+    while (desc && desc->name) {
+        GString *str = g_string_new(NULL);
+        g_string_append_printf(str, "  %s=<%s>", desc->name,
+                               opt_type_to_string(desc->type));
+        if (desc->help) {
+            if (str->len < 24) {
+                g_string_append_printf(str, "%*s", 24 - (int)str->len, "");
+            }
+            g_string_append_printf(str, " - %s", desc->help);
+        }
+        g_ptr_array_add(array, g_string_free(str, false));
+        desc++;
+    }
+
+    g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+    if (print_caption && array->len > 0) {
+        if (list->name) {
+            printf("%s options:\n", list->name);
+        } else {
+            printf("Options:\n");
+        }
+    } else if (array->len == 0) {
+        if (list->name) {
+            printf("There are no options for %s.\n", list->name);
+        } else {
+            printf("No options available.\n");
+        }
+    }
+    for (i = 0; i < array->len; i++) {
+        printf("%s\n", (char *)array->pdata[i]);
+    }
+    g_ptr_array_set_free_func(array, g_free);
+    g_ptr_array_free(array, true);
+
+}
+/* ------------------------------------------------------------------ */
+
+QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+
+    QTAILQ_FOREACH_REVERSE(opt, &opts->head, next) {
+        if (strcmp(opt->name, name) != 0)
+            continue;
+        return opt;
+    }
+    return NULL;
+}
+
+static void qemu_opt_del(QemuOpt *opt)
+{
+    QTAILQ_REMOVE(&opt->opts->head, opt, next);
+    g_free(opt->name);
+    g_free(opt->str);
+    g_free(opt);
+}
+
+/* qemu_opt_set allows many settings for the same option.
+ * This function deletes all settings for an option.
+ */
+static void qemu_opt_del_all(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt, *next_opt;
+
+    QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next_opt) {
+        if (!strcmp(opt->name, name)) {
+            qemu_opt_del(opt);
+        }
+    }
+}
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+
+    if (opts == NULL) {
+        return NULL;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (!opt) {
+        return find_default_by_name(opts, name);
+    }
+
+    return opt->str;
+}
+
+void qemu_opt_iter_init(QemuOptsIter *iter, QemuOpts *opts, const char *name)
+{
+    iter->opts = opts;
+    iter->opt = QTAILQ_FIRST(&opts->head);
+    iter->name = name;
+}
+
+const char *qemu_opt_iter_next(QemuOptsIter *iter)
+{
+    QemuOpt *ret = iter->opt;
+    if (iter->name) {
+        while (ret && !g_str_equal(iter->name, ret->name)) {
+            ret = QTAILQ_NEXT(ret, next);
+        }
+    }
+    iter->opt = ret ? QTAILQ_NEXT(ret, next) : NULL;
+    return ret ? ret->str : NULL;
+}
+
+/* Get a known option (or its default) and remove it from the list
+ * all in one action. Return a malloced string of the option value.
+ * Result must be freed by caller with g_free().
+ */
+char *qemu_opt_get_del(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+    char *str;
+
+    if (opts == NULL) {
+        return NULL;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (!opt) {
+        return g_strdup(find_default_by_name(opts, name));
+    }
+    str = opt->str;
+    opt->str = NULL;
+    qemu_opt_del_all(opts, name);
+    return str;
+}
+
+bool qemu_opt_has_help_opt(QemuOpts *opts)
+{
+    QemuOpt *opt;
+
+    QTAILQ_FOREACH_REVERSE(opt, &opts->head, next) {
+        if (is_help_option(opt->name)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name,
+                                     bool defval, bool del)
+{
+    QemuOpt *opt;
+    const char *def_val;
+    bool ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        def_val = find_default_by_name(opts, name);
+        if (def_val) {
+            qapi_bool_parse(name, def_val, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL);
+    ret = opt->value.boolean;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval)
+{
+    return qemu_opt_get_bool_helper(opts, name, defval, false);
+}
+
+bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval)
+{
+    return qemu_opt_get_bool_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name,
+                                           uint64_t defval, bool del)
+{
+    QemuOpt *opt;
+    const char *def_val;
+    uint64_t ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        def_val = find_default_by_name(opts, name);
+        if (def_val) {
+            parse_option_number(name, def_val, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER);
+    ret = opt->value.uint;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
+{
+    return qemu_opt_get_number_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name,
+                                 uint64_t defval)
+{
+    return qemu_opt_get_number_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name,
+                                         uint64_t defval, bool del)
+{
+    QemuOpt *opt;
+    const char *def_val;
+    uint64_t ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        def_val = find_default_by_name(opts, name);
+        if (def_val) {
+            parse_option_size(name, def_val, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE);
+    ret = opt->value.uint;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
+{
+    return qemu_opt_get_size_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name,
+                               uint64_t defval)
+{
+    return qemu_opt_get_size_helper(opts, name, defval, true);
+}
+
+static bool qemu_opt_parse(QemuOpt *opt, Error **errp)
+{
+    if (opt->desc == NULL)
+        return true;
+
+    switch (opt->desc->type) {
+    case QEMU_OPT_STRING:
+        /* nothing */
+        return true;
+    case QEMU_OPT_BOOL:
+        return qapi_bool_parse(opt->name, opt->str, &opt->value.boolean, errp);
+    case QEMU_OPT_NUMBER:
+        return parse_option_number(opt->name, opt->str, &opt->value.uint,
+                                   errp);
+    case QEMU_OPT_SIZE:
+        return parse_option_size(opt->name, opt->str, &opt->value.uint,
+                                 errp);
+    default:
+        abort();
+    }
+}
+
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+    return opts->list->desc[0].name == NULL;
+}
+
+int qemu_opt_unset(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt = qemu_opt_find(opts, name);
+
+    assert(opts_accepts_any(opts));
+
+    if (opt == NULL) {
+        return -1;
+    } else {
+        qemu_opt_del(opt);
+        return 0;
+    }
+}
+
+static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value,
+                           bool prepend)
+{
+    QemuOpt *opt = g_malloc0(sizeof(*opt));
+
+    opt->name = g_strdup(name);
+    opt->str = value;
+    opt->opts = opts;
+    if (prepend) {
+        QTAILQ_INSERT_HEAD(&opts->head, opt, next);
+    } else {
+        QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+    }
+
+    return opt;
+}
+
+static bool opt_validate(QemuOpt *opt, bool *help_wanted,
+                         Error **errp)
+{
+    const QemuOptDesc *desc;
+
+    desc = find_desc_by_name(opt->opts->list->desc, opt->name);
+    if (!desc && !opts_accepts_any(opt->opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, opt->name);
+        if (help_wanted && is_help_option(opt->name)) {
+            *help_wanted = true;
+        }
+        return false;
+    }
+
+    opt->desc = desc;
+    if (!qemu_opt_parse(opt, errp)) {
+        return false;
+    }
+
+    return true;
+}
+
+bool qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
+                  Error **errp)
+{
+    QemuOpt *opt = opt_create(opts, name, g_strdup(value), false);
+
+    if (!opt_validate(opt, NULL, errp)) {
+        qemu_opt_del(opt);
+        return false;
+    }
+    return true;
+}
+
+bool qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val,
+                       Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+
+    desc = find_desc_by_name(opts->list->desc, name);
+    if (!desc && !opts_accepts_any(opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, name);
+        return false;
+    }
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->desc = desc;
+    opt->value.boolean = !!val;
+    opt->str = g_strdup(val ? "on" : "off");
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+    return true;
+}
+
+bool qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val,
+                         Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+
+    desc = find_desc_by_name(opts->list->desc, name);
+    if (!desc && !opts_accepts_any(opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, name);
+        return false;
+    }
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->desc = desc;
+    opt->value.uint = val;
+    opt->str = g_strdup_printf("%" PRId64, val);
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+    return true;
+}
+
+/**
+ * For each member of @opts, call @func(@opaque, name, value, @errp).
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+                     Error **errp)
+{
+    QemuOpt *opt;
+    int rc;
+
+    QTAILQ_FOREACH(opt, &opts->head, next) {
+        rc = func(opaque, opt->name, opt->str, errp);
+        if (rc) {
+            return rc;
+        }
+        assert(!errp || !*errp);
+    }
+    return 0;
+}
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id)
+{
+    QemuOpts *opts;
+
+    QTAILQ_FOREACH(opts, &list->head, next) {
+        if (!opts->id && !id) {
+            return opts;
+        }
+        if (opts->id && id && !strcmp(opts->id, id)) {
+            return opts;
+        }
+    }
+    return NULL;
+}
+
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+                           int fail_if_exists, Error **errp)
+{
+    QemuOpts *opts = NULL;
+
+    if (id) {
+        if (!id_wellformed(id)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
+                       "an identifier");
+            error_append_hint(errp, "Identifiers consist of letters, digits, "
+                              "'-', '.', '_', starting with a letter.\n");
+            return NULL;
+        }
+        opts = qemu_opts_find(list, id);
+        if (opts != NULL) {
+            if (fail_if_exists && !list->merge_lists) {
+                error_setg(errp, "Duplicate ID '%s' for %s", id, list->name);
+                return NULL;
+            } else {
+                return opts;
+            }
+        }
+    } else if (list->merge_lists) {
+        opts = qemu_opts_find(list, NULL);
+        if (opts) {
+            return opts;
+        }
+    }
+    opts = g_malloc0(sizeof(*opts));
+    opts->id = g_strdup(id);
+    opts->list = list;
+    loc_save(&opts->loc);
+    QTAILQ_INIT(&opts->head);
+    QTAILQ_INSERT_TAIL(&list->head, opts, next);
+    return opts;
+}
+
+void qemu_opts_reset(QemuOptsList *list)
+{
+    QemuOpts *opts, *next_opts;
+
+    QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) {
+        qemu_opts_del(opts);
+    }
+}
+
+void qemu_opts_loc_restore(QemuOpts *opts)
+{
+    loc_restore(&opts->loc);
+}
+
+bool qemu_opts_set(QemuOptsList *list, const char *id,
+                   const char *name, const char *value, Error **errp)
+{
+    QemuOpts *opts;
+
+    opts = qemu_opts_create(list, id, 1, errp);
+    if (!opts) {
+        return false;
+    }
+    return qemu_opt_set(opts, name, value, errp);
+}
+
+const char *qemu_opts_id(QemuOpts *opts)
+{
+    return opts->id;
+}
+
+/* The id string will be g_free()d by qemu_opts_del */
+void qemu_opts_set_id(QemuOpts *opts, char *id)
+{
+    opts->id = id;
+}
+
+void qemu_opts_del(QemuOpts *opts)
+{
+    QemuOpt *opt;
+
+    if (opts == NULL) {
+        return;
+    }
+
+    for (;;) {
+        opt = QTAILQ_FIRST(&opts->head);
+        if (opt == NULL)
+            break;
+        qemu_opt_del(opt);
+    }
+    QTAILQ_REMOVE(&opts->list->head, opts, next);
+    g_free(opts->id);
+    g_free(opts);
+}
+
+/* print value, escaping any commas in value */
+static void escaped_print(const char *value)
+{
+    const char *ptr;
+
+    for (ptr = value; *ptr; ++ptr) {
+        if (*ptr == ',') {
+            putchar(',');
+        }
+        putchar(*ptr);
+    }
+}
+
+void qemu_opts_print(QemuOpts *opts, const char *separator)
+{
+    QemuOpt *opt;
+    QemuOptDesc *desc = opts->list->desc;
+    const char *sep = "";
+
+    if (opts->id) {
+        printf("id=%s", opts->id); /* passed id_wellformed -> no commas */
+        sep = separator;
+    }
+
+    if (desc[0].name == NULL) {
+        QTAILQ_FOREACH(opt, &opts->head, next) {
+            printf("%s%s=", sep, opt->name);
+            escaped_print(opt->str);
+            sep = separator;
+        }
+        return;
+    }
+    for (; desc && desc->name; desc++) {
+        const char *value;
+        opt = qemu_opt_find(opts, desc->name);
+
+        value = opt ? opt->str : desc->def_value_str;
+        if (!value) {
+            continue;
+        }
+        if (desc->type == QEMU_OPT_STRING) {
+            printf("%s%s=", sep, desc->name);
+            escaped_print(value);
+        } else if ((desc->type == QEMU_OPT_SIZE ||
+                    desc->type == QEMU_OPT_NUMBER) && opt) {
+            printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint);
+        } else {
+            printf("%s%s=%s", sep, desc->name, value);
+        }
+        sep = separator;
+    }
+}
+
+static const char *get_opt_name_value(const char *params,
+                                      const char *firstname,
+                                      char **name, char **value)
+{
+    const char *p, *pe, *pc;
+
+    pe = strchr(params, '=');
+    pc = strchr(params, ',');
+
+    if (!pe || (pc && pc < pe)) {
+        /* found "foo,more" */
+        if (firstname) {
+            /* implicitly named first option */
+            *name = g_strdup(firstname);
+            p = get_opt_value(params, value);
+        } else {
+            /* option without value, must be a flag */
+            p = get_opt_name(params, name, ',');
+            if (strncmp(*name, "no", 2) == 0) {
+                memmove(*name, *name + 2, strlen(*name + 2) + 1);
+                *value = g_strdup("off");
+            } else {
+                *value = g_strdup("on");
+            }
+        }
+    } else {
+        /* found "foo=bar,more" */
+        p = get_opt_name(params, name, '=');
+        assert(*p == '=');
+        p++;
+        p = get_opt_value(p, value);
+    }
+
+    assert(!*p || *p == ',');
+    if (*p == ',') {
+        p++;
+    }
+    return p;
+}
+
+static bool opts_do_parse(QemuOpts *opts, const char *params,
+                          const char *firstname, bool prepend,
+                          bool *help_wanted, Error **errp)
+{
+    char *option, *value;
+    const char *p;
+    QemuOpt *opt;
+
+    for (p = params; *p;) {
+        p = get_opt_name_value(p, firstname, &option, &value);
+        firstname = NULL;
+
+        if (!strcmp(option, "id")) {
+            g_free(option);
+            g_free(value);
+            continue;
+        }
+
+        opt = opt_create(opts, option, value, prepend);
+        g_free(option);
+        if (!opt_validate(opt, help_wanted, errp)) {
+            qemu_opt_del(opt);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static char *opts_parse_id(const char *params)
+{
+    const char *p;
+    char *name, *value;
+
+    for (p = params; *p;) {
+        p = get_opt_name_value(p, NULL, &name, &value);
+        if (!strcmp(name, "id")) {
+            g_free(name);
+            return value;
+        }
+        g_free(name);
+        g_free(value);
+    }
+
+    return NULL;
+}
+
+bool has_help_option(const char *params)
+{
+    const char *p;
+    char *name, *value;
+    bool ret;
+
+    for (p = params; *p;) {
+        p = get_opt_name_value(p, NULL, &name, &value);
+        ret = is_help_option(name);
+        g_free(name);
+        g_free(value);
+        if (ret) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * Store options parsed from @params into @opts.
+ * If @firstname is non-null, the first key=value in @params may omit
+ * key=, and is treated as if key was @firstname.
+ * On error, store an error object through @errp if non-null.
+ */
+bool qemu_opts_do_parse(QemuOpts *opts, const char *params,
+                       const char *firstname, Error **errp)
+{
+    return opts_do_parse(opts, params, firstname, false, NULL, errp);
+}
+
+static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
+                            bool permit_abbrev, bool defaults,
+                            bool *help_wanted, Error **errp)
+{
+    const char *firstname;
+    char *id = opts_parse_id(params);
+    QemuOpts *opts;
+
+    assert(!permit_abbrev || list->implied_opt_name);
+    firstname = permit_abbrev ? list->implied_opt_name : NULL;
+
+    /*
+     * This code doesn't work for defaults && !list->merge_lists: when
+     * params has no id=, and list has an element with !opts->id, it
+     * appends a new element instead of returning the existing opts.
+     * However, we got no use for this case.  Guard against possible
+     * (if unlikely) future misuse:
+     */
+    assert(!defaults || list->merge_lists);
+    opts = qemu_opts_create(list, id, !defaults, errp);
+    g_free(id);
+    if (opts == NULL) {
+        return NULL;
+    }
+
+    if (!opts_do_parse(opts, params, firstname, defaults, help_wanted,
+                       errp)) {
+        qemu_opts_del(opts);
+        return NULL;
+    }
+
+    return opts;
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * On error, store an error object through @errp if non-null.
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
+                          bool permit_abbrev, Error **errp)
+{
+    return opts_parse(list, params, permit_abbrev, false, NULL, errp);
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * Report errors with error_report_err().  This is inappropriate in
+ * QMP context.  Do not use this function there!
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
+                                  bool permit_abbrev)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    bool help_wanted = false;
+
+    opts = opts_parse(list, params, permit_abbrev, false, &help_wanted, &err);
+    if (err) {
+        if (help_wanted) {
+            qemu_opts_print_help(list, true);
+            error_free(err);
+        } else {
+            error_report_err(err);
+        }
+    }
+    return opts;
+}
+
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+                            int permit_abbrev)
+{
+    QemuOpts *opts;
+
+    opts = opts_parse(list, params, permit_abbrev, true, NULL, NULL);
+    assert(opts);
+}
+
+static bool qemu_opts_from_qdict_entry(QemuOpts *opts,
+                                       const QDictEntry *entry,
+                                       Error **errp)
+{
+    const char *key = qdict_entry_key(entry);
+    QObject *obj = qdict_entry_value(entry);
+    char buf[32];
+    g_autofree char *tmp = NULL;
+    const char *value;
+
+    if (!strcmp(key, "id")) {
+        return true;
+    }
+
+    switch (qobject_type(obj)) {
+    case QTYPE_QSTRING:
+        value = qstring_get_str(qobject_to(QString, obj));
+        break;
+    case QTYPE_QNUM:
+        tmp = qnum_to_string(qobject_to(QNum, obj));
+        value = tmp;
+        break;
+    case QTYPE_QBOOL:
+        pstrcpy(buf, sizeof(buf),
+                qbool_get_bool(qobject_to(QBool, obj)) ? "on" : "off");
+        value = buf;
+        break;
+    default:
+        return true;
+    }
+
+    return qemu_opt_set(opts, key, value, errp);
+}
+
+/*
+ * Create QemuOpts from a QDict.
+ * Use value of key "id" as ID if it exists and is a QString.  Only
+ * QStrings, QNums and QBools are copied.  Entries with other types
+ * are silently ignored.
+ */
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+                               Error **errp)
+{
+    QemuOpts *opts;
+    const QDictEntry *entry;
+
+    opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1, errp);
+    if (!opts) {
+        return NULL;
+    }
+
+    assert(opts != NULL);
+
+    for (entry = qdict_first(qdict);
+         entry;
+         entry = qdict_next(qdict, entry)) {
+        if (!qemu_opts_from_qdict_entry(opts, entry, errp)) {
+            qemu_opts_del(opts);
+            return NULL;
+        }
+    }
+
+    return opts;
+}
+
+/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+bool qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+    const QDictEntry *entry, *next;
+
+    entry = qdict_first(qdict);
+
+    while (entry != NULL) {
+        next = qdict_next(qdict, entry);
+
+        if (find_desc_by_name(opts->list->desc, entry->key)) {
+            if (!qemu_opts_from_qdict_entry(opts, entry, errp)) {
+                return false;
+            }
+            qdict_del(qdict, entry->key);
+        }
+
+        entry = next;
+    }
+
+    return true;
+}
+
+/*
+ * Convert from QemuOpts to QDict. The QDict values are of type QString.
+ *
+ * If @list is given, only add those options to the QDict that are contained in
+ * the list. If @del is true, any options added to the QDict are removed from
+ * the QemuOpts, otherwise they remain there.
+ *
+ * If two options in @opts have the same name, they are processed in order
+ * so that the last one wins (consistent with the reverse iteration in
+ * qemu_opt_find()), but all of them are deleted if @del is true.
+ *
+ * TODO We'll want to use types appropriate for opt->desc->type, but
+ * this is enough for now.
+ */
+QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict,
+                                   QemuOptsList *list, bool del)
+{
+    QemuOpt *opt, *next;
+
+    if (!qdict) {
+        qdict = qdict_new();
+    }
+    if (opts->id) {
+        qdict_put_str(qdict, "id", opts->id);
+    }
+    QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next) {
+        if (list) {
+            QemuOptDesc *desc;
+            bool found = false;
+            for (desc = list->desc; desc->name; desc++) {
+                if (!strcmp(desc->name, opt->name)) {
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) {
+                continue;
+            }
+        }
+        qdict_put_str(qdict, opt->name, opt->str);
+        if (del) {
+            qemu_opt_del(opt);
+        }
+    }
+    return qdict;
+}
+
+/* Copy all options in a QemuOpts to the given QDict. See
+ * qemu_opts_to_qdict_filtered() for details. */
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict)
+{
+    return qemu_opts_to_qdict_filtered(opts, qdict, NULL, false);
+}
+
+/* Validate parsed opts against descriptions where no
+ * descriptions were provided in the QemuOptsList.
+ */
+bool qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
+{
+    QemuOpt *opt;
+
+    assert(opts_accepts_any(opts));
+
+    QTAILQ_FOREACH(opt, &opts->head, next) {
+        opt->desc = find_desc_by_name(desc, opt->name);
+        if (!opt->desc) {
+            error_setg(errp, QERR_INVALID_PARAMETER, opt->name);
+            return false;
+        }
+
+        if (!qemu_opt_parse(opt, errp)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * For each member of @list, call @func(@opaque, member, @errp).
+ * Call it with the current location temporarily set to the member's.
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func,
+                      void *opaque, Error **errp)
+{
+    Location loc;
+    QemuOpts *opts;
+    int rc = 0;
+
+    loc_push_none(&loc);
+    QTAILQ_FOREACH(opts, &list->head, next) {
+        loc_restore(&opts->loc);
+        rc = func(opaque, opts, errp);
+        if (rc) {
+            break;
+        }
+        assert(!errp || !*errp);
+    }
+    loc_pop(&loc);
+    return rc;
+}
+
+static size_t count_opts_list(QemuOptsList *list)
+{
+    QemuOptDesc *desc = NULL;
+    size_t num_opts = 0;
+
+    if (!list) {
+        return 0;
+    }
+
+    desc = list->desc;
+    while (desc && desc->name) {
+        num_opts++;
+        desc++;
+    }
+
+    return num_opts;
+}
+
+void qemu_opts_free(QemuOptsList *list)
+{
+    g_free(list);
+}
+
+/* Realloc dst option list and append options from an option list (list)
+ * to it. dst could be NULL or a malloced list.
+ * The lifetime of dst must be shorter than the input list because the
+ * QemuOptDesc->name, ->help, and ->def_value_str strings are shared.
+ */
+QemuOptsList *qemu_opts_append(QemuOptsList *dst,
+                               QemuOptsList *list)
+{
+    size_t num_opts, num_dst_opts;
+    QemuOptDesc *desc;
+    bool need_init = false;
+    bool need_head_update;
+
+    if (!list) {
+        return dst;
+    }
+
+    /* If dst is NULL, after realloc, some area of dst should be initialized
+     * before adding options to it.
+     */
+    if (!dst) {
+        need_init = true;
+        need_head_update = true;
+    } else {
+        /* Moreover, even if dst is not NULL, the realloc may move it to a
+         * different address in which case we may get a stale tail pointer
+         * in dst->head. */
+        need_head_update = QTAILQ_EMPTY(&dst->head);
+    }
+
+    num_opts = count_opts_list(dst);
+    num_dst_opts = num_opts;
+    num_opts += count_opts_list(list);
+    dst = g_realloc(dst, sizeof(QemuOptsList) +
+                    (num_opts + 1) * sizeof(QemuOptDesc));
+    if (need_init) {
+        dst->name = NULL;
+        dst->implied_opt_name = NULL;
+        dst->merge_lists = false;
+    }
+    if (need_head_update) {
+        QTAILQ_INIT(&dst->head);
+    }
+    dst->desc[num_dst_opts].name = NULL;
+
+    /* append list->desc to dst->desc */
+    if (list) {
+        desc = list->desc;
+        while (desc && desc->name) {
+            if (find_desc_by_name(dst->desc, desc->name) == NULL) {
+                dst->desc[num_dst_opts++] = *desc;
+                dst->desc[num_dst_opts].name = NULL;
+            }
+            desc++;
+        }
+    }
+
+    return dst;
+}
diff --git a/util/qemu-print.c b/util/qemu-print.c
new file mode 100644
index 000000000..69ba612f5
--- /dev/null
+++ b/util/qemu-print.c
@@ -0,0 +1,70 @@
+/*
+ * Print to stream or current monitor
+ *
+ * Copyright (C) 2019 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu/qemu-print.h"
+
+/*
+ * Print like vprintf().
+ * Print to current monitor if we have one, else to stdout.
+ */
+int qemu_vprintf(const char *fmt, va_list ap)
+{
+    Monitor *cur_mon = monitor_cur();
+    if (cur_mon) {
+        return monitor_vprintf(cur_mon, fmt, ap);
+    }
+    return vprintf(fmt, ap);
+}
+
+/*
+ * Print like printf().
+ * Print to current monitor if we have one, else to stdout.
+ */
+int qemu_printf(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = qemu_vprintf(fmt, ap);
+    va_end(ap);
+    return ret;
+}
+
+/*
+ * Print like vfprintf()
+ * Print to @stream if non-null, else to current monitor.
+ */
+int qemu_vfprintf(FILE *stream, const char *fmt, va_list ap)
+{
+    if (!stream) {
+        return monitor_vprintf(monitor_cur(), fmt, ap);
+    }
+    return vfprintf(stream, fmt, ap);
+}
+
+/*
+ * Print like fprintf().
+ * Print to @stream if non-null, else to current monitor.
+ */
+int qemu_fprintf(FILE *stream, const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = qemu_vfprintf(stream, fmt, ap);
+    va_end(ap);
+    return ret;
+}
diff --git a/util/qemu-progress.c b/util/qemu-progress.c
new file mode 100644
index 000000000..20d51f8c1
--- /dev/null
+++ b/util/qemu-progress.c
@@ -0,0 +1,162 @@
+/*
+ * QEMU progress printing utility functions
+ *
+ * Copyright (C) 2011 Jes Sorensen 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+struct progress_state {
+    float current;
+    float last_print;
+    float min_skip;
+    void (*print)(void);
+    void (*end)(void);
+};
+
+static struct progress_state state;
+static volatile sig_atomic_t print_pending;
+
+/*
+ * Simple progress print function.
+ * @percent relative percent of current operation
+ * @max percent of total operation
+ */
+static void progress_simple_print(void)
+{
+    printf("    (%3.2f/100%%)\r", state.current);
+    fflush(stdout);
+}
+
+static void progress_simple_end(void)
+{
+    printf("\n");
+}
+
+static void progress_simple_init(void)
+{
+    state.print = progress_simple_print;
+    state.end = progress_simple_end;
+}
+
+#ifdef CONFIG_POSIX
+static void sigusr_print(int signal)
+{
+    print_pending = 1;
+}
+#endif
+
+static void progress_dummy_print(void)
+{
+    if (print_pending) {
+        fprintf(stderr, "    (%3.2f/100%%)\n", state.current);
+        print_pending = 0;
+    }
+}
+
+static void progress_dummy_end(void)
+{
+}
+
+static void progress_dummy_init(void)
+{
+#ifdef CONFIG_POSIX
+    struct sigaction action;
+    sigset_t set;
+
+    memset(&action, 0, sizeof(action));
+    sigfillset(&action.sa_mask);
+    action.sa_handler = sigusr_print;
+    action.sa_flags = 0;
+    sigaction(SIGUSR1, &action, NULL);
+#ifdef SIGINFO
+    sigaction(SIGINFO, &action, NULL);
+#endif
+
+    /*
+     * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the
+     * tools that use the progress report SIGUSR1 isn't used in this meaning
+     * and instead should print the progress, so reenable it.
+     */
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+#endif
+
+    state.print = progress_dummy_print;
+    state.end = progress_dummy_end;
+}
+
+/*
+ * Initialize progress reporting.
+ * If @enabled is false, actual reporting is suppressed.  The user can
+ * still trigger a report by sending a SIGUSR1.
+ * Reports are also suppressed unless we've had at least @min_skip
+ * percent progress since the last report.
+ */
+void qemu_progress_init(int enabled, float min_skip)
+{
+    state.min_skip = min_skip;
+    if (enabled) {
+        progress_simple_init();
+    } else {
+        progress_dummy_init();
+    }
+}
+
+void qemu_progress_end(void)
+{
+    state.end();
+}
+
+/*
+ * Report progress.
+ * @delta is how much progress we made.
+ * If @max is zero, @delta is an absolute value of the total job done.
+ * Else, @delta is a progress delta since the last call, as a fraction
+ * of @max.  I.e. the delta is @delta * @max / 100. This allows
+ * relative accounting of functions which may be a different fraction of
+ * the full job, depending on the context they are called in. I.e.
+ * a function might be considered 40% of the full job if used from
+ * bdrv_img_create() but only 20% if called from img_convert().
+ */
+void qemu_progress_print(float delta, int max)
+{
+    float current;
+
+    if (max == 0) {
+        current = delta;
+    } else {
+        current = state.current + delta / 100 * max;
+    }
+    if (current > 100) {
+        current = 100;
+    }
+    state.current = current;
+
+    if (current > (state.last_print + state.min_skip) ||
+        current < (state.last_print - state.min_skip) ||
+        current == 100 || current == 0) {
+        state.last_print = state.current;
+        state.print();
+    }
+}
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
new file mode 100644
index 000000000..8af0278f1
--- /dev/null
+++ b/util/qemu-sockets.c
@@ -0,0 +1,1425 @@
+/*
+ *  inet and unix socket functions for qemu
+ *
+ *  (c) 2008 Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_AF_VSOCK
+#include 
+#endif /* CONFIG_AF_VSOCK */
+
+#include "qemu-common.h"
+#include "monitor/monitor.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qemu/sockets.h"
+#include "qemu/main-loop.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+
+#ifndef AI_ADDRCONFIG
+# define AI_ADDRCONFIG 0
+#endif
+
+#ifndef AI_V4MAPPED
+# define AI_V4MAPPED 0
+#endif
+
+#ifndef AI_NUMERICSERV
+# define AI_NUMERICSERV 0
+#endif
+
+
+static int inet_getport(struct addrinfo *e)
+{
+    struct sockaddr_in *i4;
+    struct sockaddr_in6 *i6;
+
+    switch (e->ai_family) {
+    case PF_INET6:
+        i6 = (void*)e->ai_addr;
+        return ntohs(i6->sin6_port);
+    case PF_INET:
+        i4 = (void*)e->ai_addr;
+        return ntohs(i4->sin_port);
+    default:
+        return 0;
+    }
+}
+
+static void inet_setport(struct addrinfo *e, int port)
+{
+    struct sockaddr_in *i4;
+    struct sockaddr_in6 *i6;
+
+    switch (e->ai_family) {
+    case PF_INET6:
+        i6 = (void*)e->ai_addr;
+        i6->sin6_port = htons(port);
+        break;
+    case PF_INET:
+        i4 = (void*)e->ai_addr;
+        i4->sin_port = htons(port);
+        break;
+    }
+}
+
+NetworkAddressFamily inet_netfamily(int family)
+{
+    switch (family) {
+    case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6;
+    case PF_INET:  return NETWORK_ADDRESS_FAMILY_IPV4;
+    case PF_UNIX:  return NETWORK_ADDRESS_FAMILY_UNIX;
+#ifdef CONFIG_AF_VSOCK
+    case PF_VSOCK: return NETWORK_ADDRESS_FAMILY_VSOCK;
+#endif /* CONFIG_AF_VSOCK */
+    }
+    return NETWORK_ADDRESS_FAMILY_UNKNOWN;
+}
+
+bool fd_is_socket(int fd)
+{
+    int optval;
+    socklen_t optlen = sizeof(optval);
+    return !qemu_getsockopt(fd, SOL_SOCKET, SO_TYPE, &optval, &optlen);
+}
+
+
+/*
+ * Matrix we're trying to apply
+ *
+ *  ipv4  ipv6   family
+ *   -     -       PF_UNSPEC
+ *   -     f       PF_INET
+ *   -     t       PF_INET6
+ *   f     -       PF_INET6
+ *   f     f       
+ *   f     t       PF_INET6
+ *   t     -       PF_INET
+ *   t     f       PF_INET
+ *   t     t       PF_INET6/PF_UNSPEC
+ *
+ * NB, this matrix is only about getting the necessary results
+ * from getaddrinfo(). Some of the cases require further work
+ * after reading results from getaddrinfo in order to fully
+ * apply the logic the end user wants.
+ *
+ * In the first and last cases, we must set IPV6_V6ONLY=0
+ * when binding, to allow a single listener to potentially
+ * accept both IPv4+6 addresses.
+ */
+int inet_ai_family_from_address(InetSocketAddress *addr,
+                                Error **errp)
+{
+    if (addr->has_ipv6 && addr->has_ipv4 &&
+        !addr->ipv6 && !addr->ipv4) {
+        error_setg(errp, "Cannot disable IPv4 and IPv6 at same time");
+        return PF_UNSPEC;
+    }
+    if ((addr->has_ipv6 && addr->ipv6) && (addr->has_ipv4 && addr->ipv4)) {
+        /*
+         * Some backends can only do a single listener. In that case
+         * we want empty hostname to resolve to "::" and then use the
+         * flag IPV6_V6ONLY==0 to get both protocols on 1 socket. This
+         * doesn't work for addresses other than "", so they're just
+         * inevitably broken until multiple listeners can be used,
+         * and thus we honour getaddrinfo automatic protocol detection
+         * Once all backends do multi-listener, remove the PF_INET6
+         * branch entirely.
+         */
+        if (!addr->host || g_str_equal(addr->host, "")) {
+            return PF_INET6;
+        } else {
+            return PF_UNSPEC;
+        }
+    }
+    if ((addr->has_ipv6 && addr->ipv6) || (addr->has_ipv4 && !addr->ipv4)) {
+        return PF_INET6;
+    }
+    if ((addr->has_ipv4 && addr->ipv4) || (addr->has_ipv6 && !addr->ipv6)) {
+        return PF_INET;
+    }
+    return PF_UNSPEC;
+}
+
+static int create_fast_reuse_socket(struct addrinfo *e)
+{
+    int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
+    if (slisten < 0) {
+        return -1;
+    }
+    socket_set_fast_reuse(slisten);
+    return slisten;
+}
+
+static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e)
+{
+#ifndef IPV6_V6ONLY
+    return bind(socket, e->ai_addr, e->ai_addrlen);
+#else
+    /*
+     * Deals with first & last cases in matrix in comment
+     * for inet_ai_family_from_address().
+     */
+    int v6only =
+        ((!saddr->has_ipv4 && !saddr->has_ipv6) ||
+         (saddr->has_ipv4 && saddr->ipv4 &&
+          saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
+    int stat;
+
+ rebind:
+    if (e->ai_family == PF_INET6) {
+        qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
+                        sizeof(v6only));
+    }
+
+    stat = bind(socket, e->ai_addr, e->ai_addrlen);
+    if (!stat) {
+        return 0;
+    }
+
+    /* If we got EADDRINUSE from an IPv6 bind & v6only is unset,
+     * it could be that the IPv4 port is already claimed, so retry
+     * with v6only set
+     */
+    if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
+        v6only = 1;
+        goto rebind;
+    }
+    return stat;
+#endif
+}
+
+static int inet_listen_saddr(InetSocketAddress *saddr,
+                             int port_offset,
+                             int num,
+                             Error **errp)
+{
+    struct addrinfo ai,*res,*e;
+    char port[33];
+    char uaddr[INET6_ADDRSTRLEN+1];
+    char uport[33];
+    int rc, port_min, port_max, p;
+    int slisten = -1;
+    int saved_errno = 0;
+    bool socket_created = false;
+    Error *err = NULL;
+
+    if (saddr->keep_alive) {
+        error_setg(errp, "keep-alive option is not supported for passive "
+                   "sockets");
+        return -1;
+    }
+
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_PASSIVE;
+    if (saddr->has_numeric && saddr->numeric) {
+        ai.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV;
+    }
+    ai.ai_family = inet_ai_family_from_address(saddr, &err);
+    ai.ai_socktype = SOCK_STREAM;
+
+    if (err) {
+        error_propagate(errp, err);
+        return -1;
+    }
+
+    if (saddr->host == NULL) {
+        error_setg(errp, "host not specified");
+        return -1;
+    }
+    if (saddr->port != NULL) {
+        pstrcpy(port, sizeof(port), saddr->port);
+    } else {
+        port[0] = '\0';
+    }
+
+    /* lookup */
+    if (port_offset) {
+        unsigned long long baseport;
+        if (strlen(port) == 0) {
+            error_setg(errp, "port not specified");
+            return -1;
+        }
+        if (parse_uint_full(port, &baseport, 10) < 0) {
+            error_setg(errp, "can't convert to a number: %s", port);
+            return -1;
+        }
+        if (baseport > 65535 ||
+            baseport + port_offset > 65535) {
+            error_setg(errp, "port %s out of range", port);
+            return -1;
+        }
+        snprintf(port, sizeof(port), "%d", (int)baseport + port_offset);
+    }
+    rc = getaddrinfo(strlen(saddr->host) ? saddr->host : NULL,
+                     strlen(port) ? port : NULL, &ai, &res);
+    if (rc != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s",
+                   saddr->host, port, gai_strerror(rc));
+        return -1;
+    }
+
+    /* create socket + bind/listen */
+    for (e = res; e != NULL; e = e->ai_next) {
+        getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
+                        uaddr,INET6_ADDRSTRLEN,uport,32,
+                        NI_NUMERICHOST | NI_NUMERICSERV);
+
+        port_min = inet_getport(e);
+        port_max = saddr->has_to ? saddr->to + port_offset : port_min;
+        for (p = port_min; p <= port_max; p++) {
+            inet_setport(e, p);
+
+            slisten = create_fast_reuse_socket(e);
+            if (slisten < 0) {
+                /* First time we expect we might fail to create the socket
+                 * eg if 'e' has AF_INET6 but ipv6 kmod is not loaded.
+                 * Later iterations should always succeed if first iteration
+                 * worked though, so treat that as fatal.
+                 */
+                if (p == port_min) {
+                    continue;
+                } else {
+                    error_setg_errno(errp, errno,
+                                     "Failed to recreate failed listening socket");
+                    goto listen_failed;
+                }
+            }
+            socket_created = true;
+
+            rc = try_bind(slisten, saddr, e);
+            if (rc < 0) {
+                if (errno != EADDRINUSE) {
+                    error_setg_errno(errp, errno, "Failed to bind socket");
+                    goto listen_failed;
+                }
+            } else {
+                if (!listen(slisten, num)) {
+                    goto listen_ok;
+                }
+                if (errno != EADDRINUSE) {
+                    error_setg_errno(errp, errno, "Failed to listen on socket");
+                    goto listen_failed;
+                }
+            }
+            /* Someone else managed to bind to the same port and beat us
+             * to listen on it! Socket semantics does not allow us to
+             * recover from this situation, so we need to recreate the
+             * socket to allow bind attempts for subsequent ports:
+             */
+            closesocket(slisten);
+            slisten = -1;
+        }
+    }
+    error_setg_errno(errp, errno,
+                     socket_created ?
+                     "Failed to find an available port" :
+                     "Failed to create a socket");
+listen_failed:
+    saved_errno = errno;
+    if (slisten >= 0) {
+        closesocket(slisten);
+    }
+    freeaddrinfo(res);
+    errno = saved_errno;
+    return -1;
+
+listen_ok:
+    freeaddrinfo(res);
+    return slisten;
+}
+
+#ifdef _WIN32
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY)
+#else
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS)
+#endif
+
+static int inet_connect_addr(const InetSocketAddress *saddr,
+                             struct addrinfo *addr, Error **errp)
+{
+    int sock, rc;
+
+    sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket family %d",
+                         addr->ai_family);
+        return -1;
+    }
+    socket_set_fast_reuse(sock);
+
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
+            rc = -errno;
+        }
+    } while (rc == -EINTR);
+
+    if (rc < 0) {
+        error_setg_errno(errp, errno, "Failed to connect to '%s:%s'",
+                         saddr->host, saddr->port);
+        closesocket(sock);
+        return -1;
+    }
+
+    return sock;
+}
+
+static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr,
+                                                 Error **errp)
+{
+    struct addrinfo ai, *res;
+    int rc;
+    Error *err = NULL;
+    static int useV4Mapped = 1;
+
+    memset(&ai, 0, sizeof(ai));
+
+    ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
+    if (qatomic_read(&useV4Mapped)) {
+        ai.ai_flags |= AI_V4MAPPED;
+    }
+    ai.ai_family = inet_ai_family_from_address(saddr, &err);
+    ai.ai_socktype = SOCK_STREAM;
+
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+
+    if (saddr->host == NULL || saddr->port == NULL) {
+        error_setg(errp, "host and/or port not specified");
+        return NULL;
+    }
+
+    /* lookup */
+    rc = getaddrinfo(saddr->host, saddr->port, &ai, &res);
+
+    /* At least FreeBSD and OS-X 10.6 declare AI_V4MAPPED but
+     * then don't implement it in their getaddrinfo(). Detect
+     * this and retry without the flag since that's preferable
+     * to a fatal error
+     */
+    if (rc == EAI_BADFLAGS &&
+        (ai.ai_flags & AI_V4MAPPED)) {
+        qatomic_set(&useV4Mapped, 0);
+        ai.ai_flags &= ~AI_V4MAPPED;
+        rc = getaddrinfo(saddr->host, saddr->port, &ai, &res);
+    }
+    if (rc != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s",
+                   saddr->host, saddr->port, gai_strerror(rc));
+        return NULL;
+    }
+    return res;
+}
+
+/**
+ * Create a socket and connect it to an address.
+ *
+ * @saddr: Inet socket address specification
+ * @errp: set on error
+ *
+ * Returns: -1 on error, file descriptor on success.
+ */
+int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
+{
+    Error *local_err = NULL;
+    struct addrinfo *res, *e;
+    int sock = -1;
+
+    res = inet_parse_connect_saddr(saddr, errp);
+    if (!res) {
+        return -1;
+    }
+
+    for (e = res; e != NULL; e = e->ai_next) {
+        error_free(local_err);
+        local_err = NULL;
+        sock = inet_connect_addr(saddr, e, &local_err);
+        if (sock >= 0) {
+            break;
+        }
+    }
+
+    freeaddrinfo(res);
+
+    if (sock < 0) {
+        error_propagate(errp, local_err);
+        return sock;
+    }
+
+    if (saddr->keep_alive) {
+        int val = 1;
+        int ret = qemu_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+                                  &val, sizeof(val));
+
+        if (ret < 0) {
+            error_setg_errno(errp, errno, "Unable to set KEEPALIVE");
+            close(sock);
+            return -1;
+        }
+    }
+
+    return sock;
+}
+
+static int inet_dgram_saddr(InetSocketAddress *sraddr,
+                            InetSocketAddress *sladdr,
+                            Error **errp)
+{
+    struct addrinfo ai, *peer = NULL, *local = NULL;
+    const char *addr;
+    const char *port;
+    int sock = -1, rc;
+    Error *err = NULL;
+
+    /* lookup peer addr */
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+    ai.ai_family = inet_ai_family_from_address(sraddr, &err);
+    ai.ai_socktype = SOCK_DGRAM;
+
+    if (err) {
+        error_propagate(errp, err);
+        goto err;
+    }
+
+    addr = sraddr->host;
+    port = sraddr->port;
+    if (addr == NULL || strlen(addr) == 0) {
+        addr = "localhost";
+    }
+    if (port == NULL || strlen(port) == 0) {
+        error_setg(errp, "remote port not specified");
+        goto err;
+    }
+
+    if ((rc = getaddrinfo(addr, port, &ai, &peer)) != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+        goto err;
+    }
+
+    /* lookup local addr */
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_PASSIVE;
+    ai.ai_family = peer->ai_family;
+    ai.ai_socktype = SOCK_DGRAM;
+
+    if (sladdr) {
+        addr = sladdr->host;
+        port = sladdr->port;
+        if (addr == NULL || strlen(addr) == 0) {
+            addr = NULL;
+        }
+        if (!port || strlen(port) == 0) {
+            port = "0";
+        }
+    } else {
+        addr = NULL;
+        port = "0";
+    }
+
+    if ((rc = getaddrinfo(addr, port, &ai, &local)) != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+        goto err;
+    }
+
+    /* create socket */
+    sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket family %d",
+                         peer->ai_family);
+        goto err;
+    }
+    socket_set_fast_reuse(sock);
+
+    /* bind socket */
+    if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        goto err;
+    }
+
+    /* connect to peer */
+    if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) {
+        error_setg_errno(errp, errno, "Failed to connect to '%s:%s'",
+                         addr, port);
+        goto err;
+    }
+
+    freeaddrinfo(local);
+    freeaddrinfo(peer);
+    return sock;
+
+err:
+    if (sock != -1) {
+        closesocket(sock);
+    }
+    if (local) {
+        freeaddrinfo(local);
+    }
+    if (peer) {
+        freeaddrinfo(peer);
+    }
+
+    return -1;
+}
+
+/* compatibility wrapper */
+static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
+                           Error **errp)
+{
+    char *end;
+    size_t len;
+
+    end = strstr(optstr, ",");
+    if (end) {
+        if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */
+            error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr);
+            return -1;
+        }
+        len = end - optstr;
+    } else {
+        len = strlen(optstr);
+    }
+    if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) {
+        *val = true;
+    } else if (len == 4 && strncmp(optstr, "=off", len) == 0) {
+        *val = false;
+    } else {
+        error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr);
+        return -1;
+    }
+    return 0;
+}
+
+int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
+{
+    const char *optstr, *h;
+    char host[65];
+    char port[33];
+    int to;
+    int pos;
+    char *begin;
+
+    memset(addr, 0, sizeof(*addr));
+
+    /* parse address */
+    if (str[0] == ':') {
+        /* no host given */
+        host[0] = '\0';
+        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
+            error_setg(errp, "error parsing port in address '%s'", str);
+            return -1;
+        }
+    } else if (str[0] == '[') {
+        /* IPv6 addr */
+        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
+            error_setg(errp, "error parsing IPv6 address '%s'", str);
+            return -1;
+        }
+    } else {
+        /* hostname or IPv4 addr */
+        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
+            error_setg(errp, "error parsing address '%s'", str);
+            return -1;
+        }
+    }
+
+    addr->host = g_strdup(host);
+    addr->port = g_strdup(port);
+
+    /* parse options */
+    optstr = str + pos;
+    h = strstr(optstr, ",to=");
+    if (h) {
+        h += 4;
+        if (sscanf(h, "%d%n", &to, &pos) != 1 ||
+            (h[pos] != '\0' && h[pos] != ',')) {
+            error_setg(errp, "error parsing to= argument");
+            return -1;
+        }
+        addr->has_to = true;
+        addr->to = to;
+    }
+    begin = strstr(optstr, ",ipv4");
+    if (begin) {
+        if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) {
+            return -1;
+        }
+        addr->has_ipv4 = true;
+    }
+    begin = strstr(optstr, ",ipv6");
+    if (begin) {
+        if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) {
+            return -1;
+        }
+        addr->has_ipv6 = true;
+    }
+    begin = strstr(optstr, ",keep-alive");
+    if (begin) {
+        if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"),
+                            &addr->keep_alive, errp) < 0)
+        {
+            return -1;
+        }
+        addr->has_keep_alive = true;
+    }
+    return 0;
+}
+
+
+/**
+ * Create a blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @errp: set in case of an error
+ *
+ * Returns -1 in case of error, file descriptor on success
+ **/
+int inet_connect(const char *str, Error **errp)
+{
+    int sock = -1;
+    InetSocketAddress *addr = g_new(InetSocketAddress, 1);
+
+    if (!inet_parse(addr, str, errp)) {
+        sock = inet_connect_saddr(addr, errp);
+    }
+    qapi_free_InetSocketAddress(addr);
+    return sock;
+}
+
+#ifdef CONFIG_AF_VSOCK
+static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr,
+                                          struct sockaddr_vm *svm,
+                                          Error **errp)
+{
+    unsigned long long val;
+
+    memset(svm, 0, sizeof(*svm));
+    svm->svm_family = AF_VSOCK;
+
+    if (parse_uint_full(vaddr->cid, &val, 10) < 0 ||
+        val > UINT32_MAX) {
+        error_setg(errp, "Failed to parse cid '%s'", vaddr->cid);
+        return false;
+    }
+    svm->svm_cid = val;
+
+    if (parse_uint_full(vaddr->port, &val, 10) < 0 ||
+        val > UINT32_MAX) {
+        error_setg(errp, "Failed to parse port '%s'", vaddr->port);
+        return false;
+    }
+    svm->svm_port = val;
+
+    return true;
+}
+
+static int vsock_connect_addr(const VsockSocketAddress *vaddr,
+                              const struct sockaddr_vm *svm, Error **errp)
+{
+    int sock, rc;
+
+    sock = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket family %d",
+                         AF_VSOCK);
+        return -1;
+    }
+
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, (const struct sockaddr *)svm, sizeof(*svm)) < 0) {
+            rc = -errno;
+        }
+    } while (rc == -EINTR);
+
+    if (rc < 0) {
+        error_setg_errno(errp, errno, "Failed to connect to '%s:%s'",
+                         vaddr->cid, vaddr->port);
+        closesocket(sock);
+        return -1;
+    }
+
+    return sock;
+}
+
+static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp)
+{
+    struct sockaddr_vm svm;
+
+    if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) {
+        return -1;
+    }
+
+    return vsock_connect_addr(vaddr, &svm, errp);
+}
+
+static int vsock_listen_saddr(VsockSocketAddress *vaddr,
+                              int num,
+                              Error **errp)
+{
+    struct sockaddr_vm svm;
+    int slisten;
+
+    if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) {
+        return -1;
+    }
+
+    slisten = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
+    if (slisten < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+
+    if (bind(slisten, (const struct sockaddr *)&svm, sizeof(svm)) != 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        closesocket(slisten);
+        return -1;
+    }
+
+    if (listen(slisten, num) != 0) {
+        error_setg_errno(errp, errno, "Failed to listen on socket");
+        closesocket(slisten);
+        return -1;
+    }
+    return slisten;
+}
+
+static int vsock_parse(VsockSocketAddress *addr, const char *str,
+                       Error **errp)
+{
+    char cid[33];
+    char port[33];
+    int n;
+
+    if (sscanf(str, "%32[^:]:%32[^,]%n", cid, port, &n) != 2) {
+        error_setg(errp, "error parsing address '%s'", str);
+        return -1;
+    }
+    if (str[n] != '\0') {
+        error_setg(errp, "trailing characters in address '%s'", str);
+        return -1;
+    }
+
+    addr->cid = g_strdup(cid);
+    addr->port = g_strdup(port);
+    return 0;
+}
+#else
+static void vsock_unsupported(Error **errp)
+{
+    error_setg(errp, "socket family AF_VSOCK unsupported");
+}
+
+static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp)
+{
+    vsock_unsupported(errp);
+    return -1;
+}
+
+static int vsock_listen_saddr(VsockSocketAddress *vaddr,
+                              int num,
+                              Error **errp)
+{
+    vsock_unsupported(errp);
+    return -1;
+}
+
+static int vsock_parse(VsockSocketAddress *addr, const char *str,
+                        Error **errp)
+{
+    vsock_unsupported(errp);
+    return -1;
+}
+#endif /* CONFIG_AF_VSOCK */
+
+#ifndef _WIN32
+
+static bool saddr_is_abstract(UnixSocketAddress *saddr)
+{
+#ifdef CONFIG_LINUX
+    return saddr->abstract;
+#else
+    return false;
+#endif
+}
+
+static bool saddr_is_tight(UnixSocketAddress *saddr)
+{
+#ifdef CONFIG_LINUX
+    return !saddr->has_tight || saddr->tight;
+#else
+    return false;
+#endif
+}
+
+static int unix_listen_saddr(UnixSocketAddress *saddr,
+                             int num,
+                             Error **errp)
+{
+    bool abstract = saddr_is_abstract(saddr);
+    struct sockaddr_un un;
+    int sock, fd;
+    char *pathbuf = NULL;
+    const char *path;
+    size_t pathlen;
+    size_t addrlen;
+
+    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create Unix socket");
+        return -1;
+    }
+
+    if (saddr->path[0] || abstract) {
+        path = saddr->path;
+    } else {
+        const char *tmpdir = getenv("TMPDIR");
+        tmpdir = tmpdir ? tmpdir : "/tmp";
+        path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX", tmpdir);
+    }
+
+    pathlen = strlen(path);
+    if (pathlen > sizeof(un.sun_path) ||
+        (abstract && pathlen > (sizeof(un.sun_path) - 1))) {
+        error_setg(errp, "UNIX socket path '%s' is too long", path);
+        error_append_hint(errp, "Path must be less than %zu bytes\n",
+                          abstract ? sizeof(un.sun_path) - 1 :
+                          sizeof(un.sun_path));
+        goto err;
+    }
+
+    if (pathbuf != NULL) {
+        /*
+         * This dummy fd usage silences the mktemp() unsecure warning.
+         * Using mkstemp() doesn't make things more secure here
+         * though.  bind() complains about existing files, so we have
+         * to unlink first and thus re-open the race window.  The
+         * worst case possible is bind() failing, i.e. a DoS attack.
+         */
+        fd = mkstemp(pathbuf);
+        if (fd < 0) {
+            error_setg_errno(errp, errno,
+                             "Failed to make a temporary socket %s", pathbuf);
+            goto err;
+        }
+        close(fd);
+    }
+
+    if (!abstract && unlink(path) < 0 && errno != ENOENT) {
+        error_setg_errno(errp, errno,
+                         "Failed to unlink socket %s", path);
+        goto err;
+    }
+
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    addrlen = sizeof(un);
+
+    if (abstract) {
+        un.sun_path[0] = '\0';
+        memcpy(&un.sun_path[1], path, pathlen);
+        if (saddr_is_tight(saddr)) {
+            addrlen = offsetof(struct sockaddr_un, sun_path) + 1 + pathlen;
+        }
+    } else {
+        memcpy(un.sun_path, path, pathlen);
+    }
+
+    if (bind(sock, (struct sockaddr *) &un, addrlen) < 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket to %s", path);
+        goto err;
+    }
+    if (listen(sock, num) < 0) {
+        error_setg_errno(errp, errno, "Failed to listen on socket");
+        goto err;
+    }
+
+    g_free(pathbuf);
+    return sock;
+
+err:
+    g_free(pathbuf);
+    closesocket(sock);
+    return -1;
+}
+
+static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp)
+{
+    bool abstract = saddr_is_abstract(saddr);
+    struct sockaddr_un un;
+    int sock, rc;
+    size_t pathlen;
+    size_t addrlen;
+
+    if (saddr->path == NULL) {
+        error_setg(errp, "unix connect: no path specified");
+        return -1;
+    }
+
+    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+
+    pathlen = strlen(saddr->path);
+    if (pathlen > sizeof(un.sun_path) ||
+        (abstract && pathlen > (sizeof(un.sun_path) - 1))) {
+        error_setg(errp, "UNIX socket path '%s' is too long", saddr->path);
+        error_append_hint(errp, "Path must be less than %zu bytes\n",
+                          abstract ? sizeof(un.sun_path) - 1 :
+                          sizeof(un.sun_path));
+        goto err;
+    }
+
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    addrlen = sizeof(un);
+
+    if (abstract) {
+        un.sun_path[0] = '\0';
+        memcpy(&un.sun_path[1], saddr->path, pathlen);
+        if (saddr_is_tight(saddr)) {
+            addrlen = offsetof(struct sockaddr_un, sun_path) + 1 + pathlen;
+        }
+    } else {
+        memcpy(un.sun_path, saddr->path, pathlen);
+    }
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, (struct sockaddr *) &un, addrlen) < 0) {
+            rc = -errno;
+        }
+    } while (rc == -EINTR);
+
+    if (rc < 0) {
+        error_setg_errno(errp, -rc, "Failed to connect to '%s'",
+                         saddr->path);
+        goto err;
+    }
+
+    return sock;
+
+ err:
+    close(sock);
+    return -1;
+}
+
+#else
+
+static int unix_listen_saddr(UnixSocketAddress *saddr,
+                             int num,
+                             Error **errp)
+{
+    error_setg(errp, "unix sockets are not available on windows");
+    errno = ENOTSUP;
+    return -1;
+}
+
+static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp)
+{
+    error_setg(errp, "unix sockets are not available on windows");
+    errno = ENOTSUP;
+    return -1;
+}
+#endif
+
+/* compatibility wrapper */
+int unix_listen(const char *str, Error **errp)
+{
+    UnixSocketAddress *saddr;
+    int sock;
+
+    saddr = g_new0(UnixSocketAddress, 1);
+    saddr->path = g_strdup(str);
+    sock = unix_listen_saddr(saddr, 1, errp);
+    qapi_free_UnixSocketAddress(saddr);
+    return sock;
+}
+
+int unix_connect(const char *path, Error **errp)
+{
+    UnixSocketAddress *saddr;
+    int sock;
+
+    saddr = g_new0(UnixSocketAddress, 1);
+    saddr->path = g_strdup(path);
+    sock = unix_connect_saddr(saddr, errp);
+    qapi_free_UnixSocketAddress(saddr);
+    return sock;
+}
+
+
+SocketAddress *socket_parse(const char *str, Error **errp)
+{
+    SocketAddress *addr;
+
+    addr = g_new0(SocketAddress, 1);
+    if (strstart(str, "unix:", NULL)) {
+        if (str[5] == '\0') {
+            error_setg(errp, "invalid Unix socket address");
+            goto fail;
+        } else {
+            addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+            addr->u.q_unix.path = g_strdup(str + 5);
+        }
+    } else if (strstart(str, "fd:", NULL)) {
+        if (str[3] == '\0') {
+            error_setg(errp, "invalid file descriptor address");
+            goto fail;
+        } else {
+            addr->type = SOCKET_ADDRESS_TYPE_FD;
+            addr->u.fd.str = g_strdup(str + 3);
+        }
+    } else if (strstart(str, "vsock:", NULL)) {
+        addr->type = SOCKET_ADDRESS_TYPE_VSOCK;
+        if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) {
+            goto fail;
+        }
+    } else {
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
+        if (inet_parse(&addr->u.inet, str, errp)) {
+            goto fail;
+        }
+    }
+    return addr;
+
+fail:
+    qapi_free_SocketAddress(addr);
+    return NULL;
+}
+
+static int socket_get_fd(const char *fdstr, int num, Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    int fd;
+    if (num != 1) {
+        error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections");
+        return -1;
+    }
+    if (cur_mon) {
+        fd = monitor_get_fd(cur_mon, fdstr, errp);
+        if (fd < 0) {
+            return -1;
+        }
+    } else {
+        if (qemu_strtoi(fdstr, NULL, 10, &fd) < 0) {
+            error_setg_errno(errp, errno,
+                             "Unable to parse FD number %s",
+                             fdstr);
+            return -1;
+        }
+    }
+    if (!fd_is_socket(fd)) {
+        error_setg(errp, "File descriptor '%s' is not a socket", fdstr);
+        close(fd);
+        return -1;
+    }
+    return fd;
+}
+
+int socket_connect(SocketAddress *addr, Error **errp)
+{
+    int fd;
+
+    switch (addr->type) {
+    case SOCKET_ADDRESS_TYPE_INET:
+        fd = inet_connect_saddr(&addr->u.inet, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_UNIX:
+        fd = unix_connect_saddr(&addr->u.q_unix, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_FD:
+        fd = socket_get_fd(addr->u.fd.str, 1, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_VSOCK:
+        fd = vsock_connect_saddr(&addr->u.vsock, errp);
+        break;
+
+    default:
+        abort();
+    }
+    return fd;
+}
+
+int socket_listen(SocketAddress *addr, int num, Error **errp)
+{
+    int fd;
+
+    trace_socket_listen(num);
+    switch (addr->type) {
+    case SOCKET_ADDRESS_TYPE_INET:
+        fd = inet_listen_saddr(&addr->u.inet, 0, num, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_UNIX:
+        fd = unix_listen_saddr(&addr->u.q_unix, num, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_FD:
+        fd = socket_get_fd(addr->u.fd.str, num, errp);
+        break;
+
+    case SOCKET_ADDRESS_TYPE_VSOCK:
+        fd = vsock_listen_saddr(&addr->u.vsock, num, errp);
+        break;
+
+    default:
+        abort();
+    }
+    return fd;
+}
+
+void socket_listen_cleanup(int fd, Error **errp)
+{
+    SocketAddress *addr;
+
+    addr = socket_local_address(fd, errp);
+    if (!addr) {
+        return;
+    }
+
+    if (addr->type == SOCKET_ADDRESS_TYPE_UNIX
+        && addr->u.q_unix.path) {
+        if (unlink(addr->u.q_unix.path) < 0 && errno != ENOENT) {
+            error_setg_errno(errp, errno,
+                             "Failed to unlink socket %s",
+                             addr->u.q_unix.path);
+        }
+    }
+
+    qapi_free_SocketAddress(addr);
+}
+
+int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
+{
+    int fd;
+
+    /*
+     * TODO SOCKET_ADDRESS_TYPE_FD when fd is AF_INET or AF_INET6
+     * (although other address families can do SOCK_DGRAM, too)
+     */
+    switch (remote->type) {
+    case SOCKET_ADDRESS_TYPE_INET:
+        fd = inet_dgram_saddr(&remote->u.inet,
+                              local ? &local->u.inet : NULL, errp);
+        break;
+
+    default:
+        error_setg(errp, "socket type unsupported for datagram");
+        fd = -1;
+    }
+    return fd;
+}
+
+
+static SocketAddress *
+socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    char host[NI_MAXHOST];
+    char serv[NI_MAXSERV];
+    SocketAddress *addr;
+    InetSocketAddress *inet;
+    int ret;
+
+    ret = getnameinfo((struct sockaddr *)sa, salen,
+                      host, sizeof(host),
+                      serv, sizeof(serv),
+                      NI_NUMERICHOST | NI_NUMERICSERV);
+    if (ret != 0) {
+        error_setg(errp, "Cannot format numeric socket address: %s",
+                   gai_strerror(ret));
+        return NULL;
+    }
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_TYPE_INET;
+    inet = &addr->u.inet;
+    inet->host = g_strdup(host);
+    inet->port = g_strdup(serv);
+    if (sa->ss_family == AF_INET) {
+        inet->has_ipv4 = inet->ipv4 = true;
+    } else {
+        inet->has_ipv6 = inet->ipv6 = true;
+    }
+
+    return addr;
+}
+
+
+#ifndef WIN32
+static SocketAddress *
+socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    SocketAddress *addr;
+    struct sockaddr_un *su = (struct sockaddr_un *)sa;
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+#ifdef CONFIG_LINUX
+    if (!su->sun_path[0]) {
+        /* Linux abstract socket */
+        addr->u.q_unix.path = g_strndup(su->sun_path + 1,
+                                        sizeof(su->sun_path) - 1);
+        addr->u.q_unix.has_abstract = true;
+        addr->u.q_unix.abstract = true;
+        addr->u.q_unix.has_tight = true;
+        addr->u.q_unix.tight = salen < sizeof(*su);
+        return addr;
+    }
+#endif
+
+    addr->u.q_unix.path = g_strndup(su->sun_path, sizeof(su->sun_path));
+    return addr;
+}
+#endif /* WIN32 */
+
+#ifdef CONFIG_AF_VSOCK
+static SocketAddress *
+socket_sockaddr_to_address_vsock(struct sockaddr_storage *sa,
+                                 socklen_t salen,
+                                 Error **errp)
+{
+    SocketAddress *addr;
+    VsockSocketAddress *vaddr;
+    struct sockaddr_vm *svm = (struct sockaddr_vm *)sa;
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_TYPE_VSOCK;
+    vaddr = &addr->u.vsock;
+    vaddr->cid = g_strdup_printf("%u", svm->svm_cid);
+    vaddr->port = g_strdup_printf("%u", svm->svm_port);
+
+    return addr;
+}
+#endif /* CONFIG_AF_VSOCK */
+
+SocketAddress *
+socket_sockaddr_to_address(struct sockaddr_storage *sa,
+                           socklen_t salen,
+                           Error **errp)
+{
+    switch (sa->ss_family) {
+    case AF_INET:
+    case AF_INET6:
+        return socket_sockaddr_to_address_inet(sa, salen, errp);
+
+#ifndef WIN32
+    case AF_UNIX:
+        return socket_sockaddr_to_address_unix(sa, salen, errp);
+#endif /* WIN32 */
+
+#ifdef CONFIG_AF_VSOCK
+    case AF_VSOCK:
+        return socket_sockaddr_to_address_vsock(sa, salen, errp);
+#endif
+
+    default:
+        error_setg(errp, "socket family %d unsupported",
+                   sa->ss_family);
+        return NULL;
+    }
+    return 0;
+}
+
+
+SocketAddress *socket_local_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, errno, "%s",
+                         "Unable to query local socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_remote_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, errno, "%s",
+                         "Unable to query remote socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy)
+{
+    SocketAddress *addr;
+
+    if (!addr_legacy) {
+        return NULL;
+    }
+
+    addr = g_new(SocketAddress, 1);
+
+    switch (addr_legacy->type) {
+    case SOCKET_ADDRESS_LEGACY_KIND_INET:
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
+        QAPI_CLONE_MEMBERS(InetSocketAddress, &addr->u.inet,
+                           addr_legacy->u.inet.data);
+        break;
+    case SOCKET_ADDRESS_LEGACY_KIND_UNIX:
+        addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+        QAPI_CLONE_MEMBERS(UnixSocketAddress, &addr->u.q_unix,
+                           addr_legacy->u.q_unix.data);
+        break;
+    case SOCKET_ADDRESS_LEGACY_KIND_VSOCK:
+        addr->type = SOCKET_ADDRESS_TYPE_VSOCK;
+        QAPI_CLONE_MEMBERS(VsockSocketAddress, &addr->u.vsock,
+                           addr_legacy->u.vsock.data);
+        break;
+    case SOCKET_ADDRESS_LEGACY_KIND_FD:
+        addr->type = SOCKET_ADDRESS_TYPE_FD;
+        QAPI_CLONE_MEMBERS(String, &addr->u.fd, addr_legacy->u.fd.data);
+        break;
+    default:
+        abort();
+    }
+
+    return addr;
+}
diff --git a/util/qemu-thread-common.h b/util/qemu-thread-common.h
new file mode 100644
index 000000000..2af6b1208
--- /dev/null
+++ b/util/qemu-thread-common.h
@@ -0,0 +1,54 @@
+/*
+ * Common qemu-thread implementation header file.
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Authors:
+ *  Peter Xu ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_THREAD_COMMON_H
+#define QEMU_THREAD_COMMON_H
+
+#include "qemu/thread.h"
+#include "trace.h"
+
+static inline void qemu_mutex_post_init(QemuMutex *mutex)
+{
+#ifdef CONFIG_DEBUG_MUTEX
+    mutex->file = NULL;
+    mutex->line = 0;
+#endif
+    mutex->initialized = true;
+}
+
+static inline void qemu_mutex_pre_lock(QemuMutex *mutex,
+                                       const char *file, int line)
+{
+    trace_qemu_mutex_lock(mutex, file, line);
+}
+
+static inline void qemu_mutex_post_lock(QemuMutex *mutex,
+                                        const char *file, int line)
+{
+#ifdef CONFIG_DEBUG_MUTEX
+    mutex->file = file;
+    mutex->line = line;
+#endif
+    trace_qemu_mutex_locked(mutex, file, line);
+}
+
+static inline void qemu_mutex_pre_unlock(QemuMutex *mutex,
+                                         const char *file, int line)
+{
+#ifdef CONFIG_DEBUG_MUTEX
+    mutex->file = NULL;
+    mutex->line = 0;
+#endif
+    trace_qemu_mutex_unlock(mutex, file, line);
+}
+
+#endif
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
new file mode 100644
index 000000000..dcff5e7c5
--- /dev/null
+++ b/util/qemu-thread-posix.c
@@ -0,0 +1,594 @@
+/*
+ * Wrappers around mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2009
+ *
+ * Author:
+ *  Marcelo Tosatti 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/notify.h"
+#include "qemu-thread-common.h"
+#include "qemu/tsan.h"
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+    name_threads = enable;
+
+#ifndef CONFIG_THREAD_SETNAME_BYTHREAD
+    /* This is a debugging option, not fatal */
+    if (enable) {
+        fprintf(stderr, "qemu: thread naming not supported on this host\n");
+    }
+#endif
+}
+
+static void error_exit(int err, const char *msg)
+{
+    fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
+    abort();
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+    ts->tv_sec = tv.tv_sec + ms / 1000;
+    if (ts->tv_nsec >= 1000000000) {
+        ts->tv_sec++;
+        ts->tv_nsec -= 1000000000;
+    }
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_mutex_init(&mutex->lock, NULL);
+    if (err)
+        error_exit(err, __func__);
+    qemu_mutex_post_init(mutex);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+    int err;
+
+    assert(mutex->initialized);
+    mutex->initialized = false;
+    err = pthread_mutex_destroy(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    int err;
+
+    assert(mutex->initialized);
+    qemu_mutex_pre_lock(mutex, file, line);
+    err = pthread_mutex_lock(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+    qemu_mutex_post_lock(mutex, file, line);
+}
+
+int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    int err;
+
+    assert(mutex->initialized);
+    err = pthread_mutex_trylock(&mutex->lock);
+    if (err == 0) {
+        qemu_mutex_post_lock(mutex, file, line);
+        return 0;
+    }
+    if (err != EBUSY) {
+        error_exit(err, __func__);
+    }
+    return -EBUSY;
+}
+
+void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    int err;
+
+    assert(mutex->initialized);
+    qemu_mutex_pre_unlock(mutex, file, line);
+    err = pthread_mutex_unlock(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_rec_mutex_init(QemuRecMutex *mutex)
+{
+    int err;
+    pthread_mutexattr_t attr;
+
+    pthread_mutexattr_init(&attr);
+    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+    err = pthread_mutex_init(&mutex->lock, &attr);
+    pthread_mutexattr_destroy(&attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+    mutex->initialized = true;
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+    int err;
+
+    err = pthread_cond_init(&cond->cond, NULL);
+    if (err)
+        error_exit(err, __func__);
+    cond->initialized = true;
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+    int err;
+
+    assert(cond->initialized);
+    cond->initialized = false;
+    err = pthread_cond_destroy(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+    int err;
+
+    assert(cond->initialized);
+    err = pthread_cond_signal(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+    int err;
+
+    assert(cond->initialized);
+    err = pthread_cond_broadcast(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
+{
+    int err;
+
+    assert(cond->initialized);
+    qemu_mutex_pre_unlock(mutex, file, line);
+    err = pthread_cond_wait(&cond->cond, &mutex->lock);
+    qemu_mutex_post_lock(mutex, file, line);
+    if (err)
+        error_exit(err, __func__);
+}
+
+bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
+                              const char *file, const int line)
+{
+    int err;
+    struct timespec ts;
+
+    assert(cond->initialized);
+    trace_qemu_mutex_unlock(mutex, file, line);
+    compute_abs_deadline(&ts, ms);
+    err = pthread_cond_timedwait(&cond->cond, &mutex->lock, &ts);
+    trace_qemu_mutex_locked(mutex, file, line);
+    if (err && err != ETIMEDOUT) {
+        error_exit(err, __func__);
+    }
+    return err != ETIMEDOUT;
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    int rc;
+
+#ifndef CONFIG_SEM_TIMEDWAIT
+    rc = pthread_mutex_init(&sem->lock, NULL);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+    rc = pthread_cond_init(&sem->cond, NULL);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+    if (init < 0) {
+        error_exit(EINVAL, __func__);
+    }
+    sem->count = init;
+#else
+    rc = sem_init(&sem->sem, 0, init);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+    sem->initialized = true;
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    int rc;
+
+    assert(sem->initialized);
+    sem->initialized = false;
+#ifndef CONFIG_SEM_TIMEDWAIT
+    rc = pthread_cond_destroy(&sem->cond);
+    if (rc < 0) {
+        error_exit(rc, __func__);
+    }
+    rc = pthread_mutex_destroy(&sem->lock);
+    if (rc < 0) {
+        error_exit(rc, __func__);
+    }
+#else
+    rc = sem_destroy(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    int rc;
+
+    assert(sem->initialized);
+#ifndef CONFIG_SEM_TIMEDWAIT
+    pthread_mutex_lock(&sem->lock);
+    if (sem->count == UINT_MAX) {
+        rc = EINVAL;
+    } else {
+        sem->count++;
+        rc = pthread_cond_signal(&sem->cond);
+    }
+    pthread_mutex_unlock(&sem->lock);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+#else
+    rc = sem_post(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc;
+    struct timespec ts;
+
+    assert(sem->initialized);
+#ifndef CONFIG_SEM_TIMEDWAIT
+    rc = 0;
+    compute_abs_deadline(&ts, ms);
+    pthread_mutex_lock(&sem->lock);
+    while (sem->count == 0) {
+        rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
+        if (rc == ETIMEDOUT) {
+            break;
+        }
+        if (rc != 0) {
+            error_exit(rc, __func__);
+        }
+    }
+    if (rc != ETIMEDOUT) {
+        --sem->count;
+    }
+    pthread_mutex_unlock(&sem->lock);
+    return (rc == ETIMEDOUT ? -1 : 0);
+#else
+    if (ms <= 0) {
+        /* This is cheaper than sem_timedwait.  */
+        do {
+            rc = sem_trywait(&sem->sem);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == EAGAIN) {
+            return -1;
+        }
+    } else {
+        compute_abs_deadline(&ts, ms);
+        do {
+            rc = sem_timedwait(&sem->sem, &ts);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == ETIMEDOUT) {
+            return -1;
+        }
+    }
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+    return 0;
+#endif
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    int rc;
+
+    assert(sem->initialized);
+#ifndef CONFIG_SEM_TIMEDWAIT
+    pthread_mutex_lock(&sem->lock);
+    while (sem->count == 0) {
+        rc = pthread_cond_wait(&sem->cond, &sem->lock);
+        if (rc != 0) {
+            error_exit(rc, __func__);
+        }
+    }
+    --sem->count;
+    pthread_mutex_unlock(&sem->lock);
+#else
+    do {
+        rc = sem_wait(&sem->sem);
+    } while (rc == -1 && errno == EINTR);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+#ifdef __linux__
+#include "qemu/futex.h"
+#else
+static inline void qemu_futex_wake(QemuEvent *ev, int n)
+{
+    assert(ev->initialized);
+    pthread_mutex_lock(&ev->lock);
+    if (n == 1) {
+        pthread_cond_signal(&ev->cond);
+    } else {
+        pthread_cond_broadcast(&ev->cond);
+    }
+    pthread_mutex_unlock(&ev->lock);
+}
+
+static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
+{
+    assert(ev->initialized);
+    pthread_mutex_lock(&ev->lock);
+    if (ev->value == val) {
+        pthread_cond_wait(&ev->cond, &ev->lock);
+    }
+    pthread_mutex_unlock(&ev->lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by qemu_futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET         0
+#define EV_FREE        1
+#define EV_BUSY       -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+    pthread_mutex_init(&ev->lock, NULL);
+    pthread_cond_init(&ev->cond, NULL);
+#endif
+
+    ev->value = (init ? EV_SET : EV_FREE);
+    ev->initialized = true;
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+    assert(ev->initialized);
+    ev->initialized = false;
+#ifndef __linux__
+    pthread_mutex_destroy(&ev->lock);
+    pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+    /* qemu_event_set has release semantics, but because it *loads*
+     * ev->value we need a full memory barrier here.
+     */
+    assert(ev->initialized);
+    smp_mb();
+    if (qatomic_read(&ev->value) != EV_SET) {
+        if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+            /* There were waiters, wake them up.  */
+            qemu_futex_wake(ev, INT_MAX);
+        }
+    }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+    unsigned value;
+
+    assert(ev->initialized);
+    value = qatomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value == EV_SET) {
+        /*
+         * If there was a concurrent reset (or even reset+wait),
+         * do nothing.  Otherwise change EV_SET->EV_FREE.
+         */
+        qatomic_or(&ev->value, EV_FREE);
+    }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+    unsigned value;
+
+    assert(ev->initialized);
+    value = qatomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value != EV_SET) {
+        if (value == EV_FREE) {
+            /*
+             * Leave the event reset and tell qemu_event_set that there
+             * are waiters.  No need to retry, because there cannot be
+             * a concurrent busy->free transition.  After the CAS, the
+             * event will be either set or busy.
+             */
+            if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+                return;
+            }
+        }
+        qemu_futex_wait(ev, EV_BUSY);
+    }
+}
+
+static __thread NotifierList thread_exit;
+
+/*
+ * Note that in this implementation you can register a thread-exit
+ * notifier for the main thread, but it will never be called.
+ * This is OK because main thread exit can only happen when the
+ * entire process is exiting, and the API allows notifiers to not
+ * be called on process exit.
+ */
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    notifier_list_add(&thread_exit, notifier);
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
+static void qemu_thread_atexit_notify(void *arg)
+{
+    /*
+     * Called when non-main thread exits (via qemu_thread_exit()
+     * or by returning from its start routine.)
+     */
+    notifier_list_notify(&thread_exit, NULL);
+}
+
+typedef struct {
+    void *(*start_routine)(void *);
+    void *arg;
+    char *name;
+} QemuThreadArgs;
+
+static void *qemu_thread_start(void *args)
+{
+    QemuThreadArgs *qemu_thread_args = args;
+    void *(*start_routine)(void *) = qemu_thread_args->start_routine;
+    void *arg = qemu_thread_args->arg;
+    void *r;
+
+#ifdef CONFIG_THREAD_SETNAME_BYTHREAD
+    /* Attempt to set the threads name; note that this is for debug, so
+     * we're not going to fail if we can't set it.
+     */
+    if (name_threads && qemu_thread_args->name) {
+# if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
+        pthread_setname_np(pthread_self(), qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
+        pthread_setname_np(qemu_thread_args->name);
+# endif
+    }
+#endif
+    QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
+    g_free(qemu_thread_args->name);
+    g_free(qemu_thread_args);
+    pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+    r = start_routine(arg);
+    pthread_cleanup_pop(1);
+    return r;
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+                       void *(*start_routine)(void*),
+                       void *arg, int mode)
+{
+    sigset_t set, oldset;
+    int err;
+    pthread_attr_t attr;
+    QemuThreadArgs *qemu_thread_args;
+
+    err = pthread_attr_init(&attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+
+    if (mode == QEMU_THREAD_DETACHED) {
+        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+    }
+
+    /* Leave signal handling to the iothread.  */
+    sigfillset(&set);
+    /* Blocking the signals can result in undefined behaviour. */
+    sigdelset(&set, SIGSEGV);
+    sigdelset(&set, SIGFPE);
+    sigdelset(&set, SIGILL);
+    /* TODO avoid SIGBUS loss on macOS */
+    pthread_sigmask(SIG_SETMASK, &set, &oldset);
+
+    qemu_thread_args = g_new0(QemuThreadArgs, 1);
+    qemu_thread_args->name = g_strdup(name);
+    qemu_thread_args->start_routine = start_routine;
+    qemu_thread_args->arg = arg;
+
+    err = pthread_create(&thread->thread, &attr,
+                         qemu_thread_start, qemu_thread_args);
+
+    if (err)
+        error_exit(err, __func__);
+
+    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+
+    pthread_attr_destroy(&attr);
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+    thread->thread = pthread_self();
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+   return pthread_equal(pthread_self(), thread->thread);
+}
+
+void qemu_thread_exit(void *retval)
+{
+    pthread_exit(retval);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+    int err;
+    void *ret;
+
+    err = pthread_join(thread->thread, &ret);
+    if (err) {
+        error_exit(err, __func__);
+    }
+    return ret;
+}
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
new file mode 100644
index 000000000..cb5aa2018
--- /dev/null
+++ b/util/qemu-thread-win32.c
@@ -0,0 +1,461 @@
+/*
+ * Win32 implementation for mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Author:
+ *  Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include "qemu-thread-common.h"
+#include 
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+    /* But note we don't actually name them on Windows yet */
+    name_threads = enable;
+
+    fprintf(stderr, "qemu: thread naming not supported on this host\n");
+}
+
+static void error_exit(int err, const char *msg)
+{
+    char *pstr;
+
+    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+                  NULL, err, 0, (LPTSTR)&pstr, 2, NULL);
+    fprintf(stderr, "qemu: %s: %s\n", msg, pstr);
+    LocalFree(pstr);
+    abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+    InitializeSRWLock(&mutex->lock);
+    qemu_mutex_post_init(mutex);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+    assert(mutex->initialized);
+    mutex->initialized = false;
+    InitializeSRWLock(&mutex->lock);
+}
+
+void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    assert(mutex->initialized);
+    qemu_mutex_pre_lock(mutex, file, line);
+    AcquireSRWLockExclusive(&mutex->lock);
+    qemu_mutex_post_lock(mutex, file, line);
+}
+
+int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    int owned;
+
+    assert(mutex->initialized);
+    owned = TryAcquireSRWLockExclusive(&mutex->lock);
+    if (owned) {
+        qemu_mutex_post_lock(mutex, file, line);
+        return 0;
+    }
+    return -EBUSY;
+}
+
+void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
+{
+    assert(mutex->initialized);
+    qemu_mutex_pre_unlock(mutex, file, line);
+    ReleaseSRWLockExclusive(&mutex->lock);
+}
+
+void qemu_rec_mutex_init(QemuRecMutex *mutex)
+{
+    InitializeCriticalSection(&mutex->lock);
+    mutex->initialized = true;
+}
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
+{
+    assert(mutex->initialized);
+    mutex->initialized = false;
+    DeleteCriticalSection(&mutex->lock);
+}
+
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    assert(mutex->initialized);
+    EnterCriticalSection(&mutex->lock);
+}
+
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    assert(mutex->initialized);
+    return !TryEnterCriticalSection(&mutex->lock);
+}
+
+void qemu_rec_mutex_unlock(QemuRecMutex *mutex)
+{
+    assert(mutex->initialized);
+    LeaveCriticalSection(&mutex->lock);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+    memset(cond, 0, sizeof(*cond));
+    InitializeConditionVariable(&cond->var);
+    cond->initialized = true;
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+    assert(cond->initialized);
+    cond->initialized = false;
+    InitializeConditionVariable(&cond->var);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+    assert(cond->initialized);
+    WakeConditionVariable(&cond->var);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+    assert(cond->initialized);
+    WakeAllConditionVariable(&cond->var);
+}
+
+void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
+{
+    assert(cond->initialized);
+    qemu_mutex_pre_unlock(mutex, file, line);
+    SleepConditionVariableSRW(&cond->var, &mutex->lock, INFINITE, 0);
+    qemu_mutex_post_lock(mutex, file, line);
+}
+
+bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
+                              const char *file, const int line)
+{
+    int rc = 0;
+
+    assert(cond->initialized);
+    trace_qemu_mutex_unlock(mutex, file, line);
+    if (!SleepConditionVariableSRW(&cond->var, &mutex->lock, ms, 0)) {
+        rc = GetLastError();
+    }
+    trace_qemu_mutex_locked(mutex, file, line);
+    if (rc && rc != ERROR_TIMEOUT) {
+        error_exit(rc, __func__);
+    }
+    return rc != ERROR_TIMEOUT;
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    /* Manual reset.  */
+    sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL);
+    sem->initialized = true;
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    assert(sem->initialized);
+    sem->initialized = false;
+    CloseHandle(sem->sema);
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    assert(sem->initialized);
+    ReleaseSemaphore(sem->sema, 1, NULL);
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc;
+
+    assert(sem->initialized);
+    rc = WaitForSingleObject(sem->sema, ms);
+    if (rc == WAIT_OBJECT_0) {
+        return 0;
+    }
+    if (rc != WAIT_TIMEOUT) {
+        error_exit(GetLastError(), __func__);
+    }
+    return -1;
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    assert(sem->initialized);
+    if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) {
+        error_exit(GetLastError(), __func__);
+    }
+}
+
+/* Wrap a Win32 manual-reset event with a fast userspace path.  The idea
+ * is to reset the Win32 event lazily, as part of a test-reset-test-wait
+ * sequence.  Such a sequence is, indeed, how QemuEvents are used by
+ * RCU and other subsystems!
+ *
+ * Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by SetEvent
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy (and is faster than cmpxchg).
+ */
+
+#define EV_SET         0
+#define EV_FREE        1
+#define EV_BUSY       -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+    /* Manual reset.  */
+    ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
+    ev->value = (init ? EV_SET : EV_FREE);
+    ev->initialized = true;
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+    assert(ev->initialized);
+    ev->initialized = false;
+    CloseHandle(ev->event);
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+    assert(ev->initialized);
+    /* qemu_event_set has release semantics, but because it *loads*
+     * ev->value we need a full memory barrier here.
+     */
+    smp_mb();
+    if (qatomic_read(&ev->value) != EV_SET) {
+        if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+            /* There were waiters, wake them up.  */
+            SetEvent(ev->event);
+        }
+    }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+    unsigned value;
+
+    assert(ev->initialized);
+    value = qatomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value == EV_SET) {
+        /* If there was a concurrent reset (or even reset+wait),
+         * do nothing.  Otherwise change EV_SET->EV_FREE.
+         */
+        qatomic_or(&ev->value, EV_FREE);
+    }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+    unsigned value;
+
+    assert(ev->initialized);
+    value = qatomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value != EV_SET) {
+        if (value == EV_FREE) {
+            /* qemu_event_set is not yet going to call SetEvent, but we are
+             * going to do another check for EV_SET below when setting EV_BUSY.
+             * At that point it is safe to call WaitForSingleObject.
+             */
+            ResetEvent(ev->event);
+
+            /* Tell qemu_event_set that there are waiters.  No need to retry
+             * because there cannot be a concurrent busy->free transition.
+             * After the CAS, the event will be either set or busy.
+             */
+            if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+                value = EV_SET;
+            } else {
+                value = EV_BUSY;
+            }
+        }
+        if (value == EV_BUSY) {
+            WaitForSingleObject(ev->event, INFINITE);
+        }
+    }
+}
+
+struct QemuThreadData {
+    /* Passed to win32_start_routine.  */
+    void             *(*start_routine)(void *);
+    void             *arg;
+    short             mode;
+    NotifierList      exit;
+
+    /* Only used for joinable threads. */
+    bool              exited;
+    void             *ret;
+    CRITICAL_SECTION  cs;
+};
+
+static bool atexit_registered;
+static NotifierList main_thread_exit;
+
+static __thread QemuThreadData *qemu_thread_data;
+
+static void run_main_thread_exit(void)
+{
+    notifier_list_notify(&main_thread_exit, NULL);
+}
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    if (!qemu_thread_data) {
+        if (!atexit_registered) {
+            atexit_registered = true;
+            atexit(run_main_thread_exit);
+        }
+        notifier_list_add(&main_thread_exit, notifier);
+    } else {
+        notifier_list_add(&qemu_thread_data->exit, notifier);
+    }
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
+static unsigned __stdcall win32_start_routine(void *arg)
+{
+    QemuThreadData *data = (QemuThreadData *) arg;
+    void *(*start_routine)(void *) = data->start_routine;
+    void *thread_arg = data->arg;
+
+    qemu_thread_data = data;
+    qemu_thread_exit(start_routine(thread_arg));
+    abort();
+}
+
+void qemu_thread_exit(void *arg)
+{
+    QemuThreadData *data = qemu_thread_data;
+
+    notifier_list_notify(&data->exit, NULL);
+    if (data->mode == QEMU_THREAD_JOINABLE) {
+        data->ret = arg;
+        EnterCriticalSection(&data->cs);
+        data->exited = true;
+        LeaveCriticalSection(&data->cs);
+    } else {
+        g_free(data);
+    }
+    _endthreadex(0);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+    QemuThreadData *data;
+    void *ret;
+    HANDLE handle;
+
+    data = thread->data;
+    if (data->mode == QEMU_THREAD_DETACHED) {
+        return NULL;
+    }
+
+    /*
+     * Because multiple copies of the QemuThread can exist via
+     * qemu_thread_get_self, we need to store a value that cannot
+     * leak there.  The simplest, non racy way is to store the TID,
+     * discard the handle that _beginthreadex gives back, and
+     * get another copy of the handle here.
+     */
+    handle = qemu_thread_get_handle(thread);
+    if (handle) {
+        WaitForSingleObject(handle, INFINITE);
+        CloseHandle(handle);
+    }
+    ret = data->ret;
+    DeleteCriticalSection(&data->cs);
+    g_free(data);
+    return ret;
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+                       void *(*start_routine)(void *),
+                       void *arg, int mode)
+{
+    HANDLE hThread;
+    struct QemuThreadData *data;
+
+    data = g_malloc(sizeof *data);
+    data->start_routine = start_routine;
+    data->arg = arg;
+    data->mode = mode;
+    data->exited = false;
+    notifier_list_init(&data->exit);
+
+    if (data->mode != QEMU_THREAD_DETACHED) {
+        InitializeCriticalSection(&data->cs);
+    }
+
+    hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
+                                      data, 0, &thread->tid);
+    if (!hThread) {
+        error_exit(GetLastError(), __func__);
+    }
+    CloseHandle(hThread);
+    thread->data = data;
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+    thread->data = qemu_thread_data;
+    thread->tid = GetCurrentThreadId();
+}
+
+HANDLE qemu_thread_get_handle(QemuThread *thread)
+{
+    QemuThreadData *data;
+    HANDLE handle;
+
+    data = thread->data;
+    if (data->mode == QEMU_THREAD_DETACHED) {
+        return NULL;
+    }
+
+    EnterCriticalSection(&data->cs);
+    if (!data->exited) {
+        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
+                            THREAD_SET_CONTEXT, FALSE, thread->tid);
+    } else {
+        handle = NULL;
+    }
+    LeaveCriticalSection(&data->cs);
+    return handle;
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+    return GetCurrentThreadId() == thread->tid;
+}
diff --git a/util/qemu-timer-common.c b/util/qemu-timer-common.c
new file mode 100644
index 000000000..baf3317f7
--- /dev/null
+++ b/util/qemu-timer-common.c
@@ -0,0 +1,59 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+
+/***********************************************************/
+/* real time host monotonic timer */
+
+#ifdef _WIN32
+
+int64_t clock_freq;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    LARGE_INTEGER freq;
+    int ret;
+    ret = QueryPerformanceFrequency(&freq);
+    if (ret == 0) {
+        fprintf(stderr, "Could not calibrate ticks\n");
+        exit(1);
+    }
+    clock_freq = freq.QuadPart;
+}
+
+#else
+
+int use_rt_clock;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    struct timespec ts;
+
+    use_rt_clock = 0;
+    if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+        use_rt_clock = 1;
+    }
+}
+#endif
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
new file mode 100644
index 000000000..81c28af51
--- /dev/null
+++ b/util/qemu-timer.c
@@ -0,0 +1,675 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/timer.h"
+#include "qemu/lockable.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/replay.h"
+#include "sysemu/cpus.h"
+#include "sysemu/qtest.h"
+
+#ifdef CONFIG_POSIX
+#include 
+#endif
+
+#ifdef CONFIG_PPOLL
+#include 
+#endif
+
+#ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
+#include 
+#endif
+
+/***********************************************************/
+/* timers */
+
+typedef struct QEMUClock {
+    /* We rely on BQL to protect the timerlists */
+    QLIST_HEAD(, QEMUTimerList) timerlists;
+
+    QEMUClockType type;
+    bool enabled;
+} QEMUClock;
+
+QEMUTimerListGroup main_loop_tlg;
+static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
+
+/* A QEMUTimerList is a list of timers attached to a clock. More
+ * than one QEMUTimerList can be attached to each clock, for instance
+ * used by different AioContexts / threads. Each clock also has
+ * a list of the QEMUTimerLists associated with it, in order that
+ * reenabling the clock can call all the notifiers.
+ */
+
+struct QEMUTimerList {
+    QEMUClock *clock;
+    QemuMutex active_timers_lock;
+    QEMUTimer *active_timers;
+    QLIST_ENTRY(QEMUTimerList) list;
+    QEMUTimerListNotifyCB *notify_cb;
+    void *notify_opaque;
+
+    /* lightweight method to mark the end of timerlist's running */
+    QemuEvent timers_done_ev;
+};
+
+/**
+ * qemu_clock_ptr:
+ * @type: type of clock
+ *
+ * Translate a clock type into a pointer to QEMUClock object.
+ *
+ * Returns: a pointer to the QEMUClock object
+ */
+static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
+{
+    return &qemu_clocks[type];
+}
+
+static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
+{
+    return timer_head && (timer_head->expire_time <= current_time);
+}
+
+QEMUTimerList *timerlist_new(QEMUClockType type,
+                             QEMUTimerListNotifyCB *cb,
+                             void *opaque)
+{
+    QEMUTimerList *timer_list;
+    QEMUClock *clock = qemu_clock_ptr(type);
+
+    timer_list = g_malloc0(sizeof(QEMUTimerList));
+    qemu_event_init(&timer_list->timers_done_ev, true);
+    timer_list->clock = clock;
+    timer_list->notify_cb = cb;
+    timer_list->notify_opaque = opaque;
+    qemu_mutex_init(&timer_list->active_timers_lock);
+    QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
+    return timer_list;
+}
+
+void timerlist_free(QEMUTimerList *timer_list)
+{
+    assert(!timerlist_has_timers(timer_list));
+    if (timer_list->clock) {
+        QLIST_REMOVE(timer_list, list);
+    }
+    qemu_mutex_destroy(&timer_list->active_timers_lock);
+    g_free(timer_list);
+}
+
+static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
+{
+    QEMUClock *clock = qemu_clock_ptr(type);
+
+    /* Assert that the clock of type TYPE has not been initialized yet. */
+    assert(main_loop_tlg.tl[type] == NULL);
+
+    clock->type = type;
+    clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
+    QLIST_INIT(&clock->timerlists);
+    main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
+}
+
+bool qemu_clock_use_for_deadline(QEMUClockType type)
+{
+    return !(icount_enabled() && (type == QEMU_CLOCK_VIRTUAL));
+}
+
+void qemu_clock_notify(QEMUClockType type)
+{
+    QEMUTimerList *timer_list;
+    QEMUClock *clock = qemu_clock_ptr(type);
+    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+        timerlist_notify(timer_list);
+    }
+}
+
+/* Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers.  Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ *
+ * Caller should hold BQL.
+ */
+void qemu_clock_enable(QEMUClockType type, bool enabled)
+{
+    QEMUClock *clock = qemu_clock_ptr(type);
+    QEMUTimerList *tl;
+    bool old = clock->enabled;
+    clock->enabled = enabled;
+    if (enabled && !old) {
+        qemu_clock_notify(type);
+    } else if (!enabled && old) {
+        QLIST_FOREACH(tl, &clock->timerlists, list) {
+            qemu_event_wait(&tl->timers_done_ev);
+        }
+    }
+}
+
+bool timerlist_has_timers(QEMUTimerList *timer_list)
+{
+    return !!qatomic_read(&timer_list->active_timers);
+}
+
+bool qemu_clock_has_timers(QEMUClockType type)
+{
+    return timerlist_has_timers(
+        main_loop_tlg.tl[type]);
+}
+
+bool timerlist_expired(QEMUTimerList *timer_list)
+{
+    int64_t expire_time;
+
+    if (!qatomic_read(&timer_list->active_timers)) {
+        return false;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
+        if (!timer_list->active_timers) {
+            return false;
+        }
+        expire_time = timer_list->active_timers->expire_time;
+    }
+
+    return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
+}
+
+bool qemu_clock_expired(QEMUClockType type)
+{
+    return timerlist_expired(
+        main_loop_tlg.tl[type]);
+}
+
+/*
+ * As above, but return -1 for no deadline, and do not cap to 2^32
+ * as we know the result is always positive.
+ */
+
+int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
+{
+    int64_t delta;
+    int64_t expire_time;
+
+    if (!qatomic_read(&timer_list->active_timers)) {
+        return -1;
+    }
+
+    if (!timer_list->clock->enabled) {
+        return -1;
+    }
+
+    /* The active timers list may be modified before the caller uses our return
+     * value but ->notify_cb() is called when the deadline changes.  Therefore
+     * the caller should notice the change and there is no race condition.
+     */
+    WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
+        if (!timer_list->active_timers) {
+            return -1;
+        }
+        expire_time = timer_list->active_timers->expire_time;
+    }
+
+    delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
+
+    if (delta <= 0) {
+        return 0;
+    }
+
+    return delta;
+}
+
+/* Calculate the soonest deadline across all timerlists attached
+ * to the clock. This is used for the icount timeout so we
+ * ignore whether or not the clock should be used in deadline
+ * calculations.
+ */
+int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
+{
+    int64_t deadline = -1;
+    int64_t delta;
+    int64_t expire_time;
+    QEMUTimer *ts;
+    QEMUTimerList *timer_list;
+    QEMUClock *clock = qemu_clock_ptr(type);
+
+    if (!clock->enabled) {
+        return -1;
+    }
+
+    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+        qemu_mutex_lock(&timer_list->active_timers_lock);
+        ts = timer_list->active_timers;
+        /* Skip all external timers */
+        while (ts && (ts->attributes & ~attr_mask)) {
+            ts = ts->next;
+        }
+        if (!ts) {
+            qemu_mutex_unlock(&timer_list->active_timers_lock);
+            continue;
+        }
+        expire_time = ts->expire_time;
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
+
+        delta = expire_time - qemu_clock_get_ns(type);
+        if (delta <= 0) {
+            delta = 0;
+        }
+        deadline = qemu_soonest_timeout(deadline, delta);
+    }
+    return deadline;
+}
+
+QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
+{
+    return timer_list->clock->type;
+}
+
+QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
+{
+    return main_loop_tlg.tl[type];
+}
+
+void timerlist_notify(QEMUTimerList *timer_list)
+{
+    if (timer_list->notify_cb) {
+        timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
+    } else {
+        qemu_notify_event();
+    }
+}
+
+/* Transition function to convert a nanosecond timeout to ms
+ * This is used where a system does not support ppoll
+ */
+int qemu_timeout_ns_to_ms(int64_t ns)
+{
+    int64_t ms;
+    if (ns < 0) {
+        return -1;
+    }
+
+    if (!ns) {
+        return 0;
+    }
+
+    /* Always round up, because it's better to wait too long than to wait too
+     * little and effectively busy-wait
+     */
+    ms = DIV_ROUND_UP(ns, SCALE_MS);
+
+    /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
+    return MIN(ms, INT32_MAX);
+}
+
+
+/* qemu implementation of g_poll which uses a nanosecond timeout but is
+ * otherwise identical to g_poll
+ */
+int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
+{
+#ifdef CONFIG_PPOLL
+    if (timeout < 0) {
+        return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
+    } else {
+        struct timespec ts;
+        int64_t tvsec = timeout / 1000000000LL;
+        /* Avoid possibly overflowing and specifying a negative number of
+         * seconds, which would turn a very long timeout into a busy-wait.
+         */
+        if (tvsec > (int64_t)INT32_MAX) {
+            tvsec = INT32_MAX;
+        }
+        ts.tv_sec = tvsec;
+        ts.tv_nsec = timeout % 1000000000LL;
+        return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
+    }
+#else
+    return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
+#endif
+}
+
+
+void timer_init_full(QEMUTimer *ts,
+                     QEMUTimerListGroup *timer_list_group, QEMUClockType type,
+                     int scale, int attributes,
+                     QEMUTimerCB *cb, void *opaque)
+{
+    if (!timer_list_group) {
+        timer_list_group = &main_loop_tlg;
+    }
+    ts->timer_list = timer_list_group->tl[type];
+    ts->cb = cb;
+    ts->opaque = opaque;
+    ts->scale = scale;
+    ts->attributes = attributes;
+    ts->expire_time = -1;
+}
+
+void timer_deinit(QEMUTimer *ts)
+{
+    assert(ts->expire_time == -1);
+    ts->timer_list = NULL;
+}
+
+static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
+{
+    QEMUTimer **pt, *t;
+
+    ts->expire_time = -1;
+    pt = &timer_list->active_timers;
+    for(;;) {
+        t = *pt;
+        if (!t)
+            break;
+        if (t == ts) {
+            qatomic_set(pt, t->next);
+            break;
+        }
+        pt = &t->next;
+    }
+}
+
+static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
+                                QEMUTimer *ts, int64_t expire_time)
+{
+    QEMUTimer **pt, *t;
+
+    /* add the timer in the sorted list */
+    pt = &timer_list->active_timers;
+    for (;;) {
+        t = *pt;
+        if (!timer_expired_ns(t, expire_time)) {
+            break;
+        }
+        pt = &t->next;
+    }
+    ts->expire_time = MAX(expire_time, 0);
+    ts->next = *pt;
+    qatomic_set(pt, ts);
+
+    return pt == &timer_list->active_timers;
+}
+
+static void timerlist_rearm(QEMUTimerList *timer_list)
+{
+    /* Interrupt execution to force deadline recalculation.  */
+    if (icount_enabled() && timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
+        icount_start_warp_timer();
+    }
+    timerlist_notify(timer_list);
+}
+
+/* stop a timer, but do not dealloc it */
+void timer_del(QEMUTimer *ts)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+
+    if (timer_list) {
+        qemu_mutex_lock(&timer_list->active_timers_lock);
+        timer_del_locked(timer_list, ts);
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
+    }
+}
+
+/* modify the current timer so that it will be fired when current_time
+   >= expire_time. The corresponding callback will be called. */
+void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+    bool rearm;
+
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    timer_del_locked(timer_list, ts);
+    rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
+
+    if (rearm) {
+        timerlist_rearm(timer_list);
+    }
+}
+
+/* modify the current timer so that it will be fired when current_time
+   >= expire_time or the current deadline, whichever comes earlier.
+   The corresponding callback will be called. */
+void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+    bool rearm;
+
+    WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
+        if (ts->expire_time == -1 || ts->expire_time > expire_time) {
+            if (ts->expire_time != -1) {
+                timer_del_locked(timer_list, ts);
+            }
+            rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
+        } else {
+            rearm = false;
+        }
+    }
+    if (rearm) {
+        timerlist_rearm(timer_list);
+    }
+}
+
+void timer_mod(QEMUTimer *ts, int64_t expire_time)
+{
+    timer_mod_ns(ts, expire_time * ts->scale);
+}
+
+void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
+{
+    timer_mod_anticipate_ns(ts, expire_time * ts->scale);
+}
+
+bool timer_pending(QEMUTimer *ts)
+{
+    return ts->expire_time >= 0;
+}
+
+bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
+{
+    return timer_expired_ns(timer_head, current_time * timer_head->scale);
+}
+
+bool timerlist_run_timers(QEMUTimerList *timer_list)
+{
+    QEMUTimer *ts;
+    int64_t current_time;
+    bool progress = false;
+    QEMUTimerCB *cb;
+    void *opaque;
+
+    if (!qatomic_read(&timer_list->active_timers)) {
+        return false;
+    }
+
+    qemu_event_reset(&timer_list->timers_done_ev);
+    if (!timer_list->clock->enabled) {
+        goto out;
+    }
+
+    switch (timer_list->clock->type) {
+    case QEMU_CLOCK_REALTIME:
+        break;
+    default:
+    case QEMU_CLOCK_VIRTUAL:
+        break;
+    case QEMU_CLOCK_HOST:
+        if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
+            goto out;
+        }
+        break;
+    case QEMU_CLOCK_VIRTUAL_RT:
+        if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
+            goto out;
+        }
+        break;
+    }
+
+    /*
+     * Extract expired timers from active timers list and process them.
+     *
+     * In rr mode we need "filtered" checkpointing for virtual clock.  The
+     * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
+     * and that must only be done once since the clock value stays the same. Because
+     * non-EXTERNAL timers may appear in the timers list while it being processed,
+     * the checkpoint can be issued at a time until no timers are left and we are
+     * done".
+     */
+    current_time = qemu_clock_get_ns(timer_list->clock->type);
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    while ((ts = timer_list->active_timers)) {
+        if (!timer_expired_ns(ts, current_time)) {
+            /* No expired timers left.  The checkpoint can be skipped
+             * if no timers fired or they were all external.
+             */
+            break;
+        }
+        /* Checkpoint for virtual clock is redundant in cases where
+         * it's being triggered with only non-EXTERNAL timers, because
+         * these timers don't change guest state directly.
+         */
+        if (replay_mode != REPLAY_MODE_NONE
+            && timer_list->clock->type == QEMU_CLOCK_VIRTUAL
+            && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)
+            && !replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
+            qemu_mutex_unlock(&timer_list->active_timers_lock);
+            goto out;
+        }
+
+        /* remove timer from the list before calling the callback */
+        timer_list->active_timers = ts->next;
+        ts->next = NULL;
+        ts->expire_time = -1;
+        cb = ts->cb;
+        opaque = ts->opaque;
+
+        /* run the callback (the timer list can be modified) */
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
+        cb(opaque);
+        qemu_mutex_lock(&timer_list->active_timers_lock);
+
+        progress = true;
+    }
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
+
+out:
+    qemu_event_set(&timer_list->timers_done_ev);
+    return progress;
+}
+
+bool qemu_clock_run_timers(QEMUClockType type)
+{
+    return timerlist_run_timers(main_loop_tlg.tl[type]);
+}
+
+void timerlistgroup_init(QEMUTimerListGroup *tlg,
+                         QEMUTimerListNotifyCB *cb, void *opaque)
+{
+    QEMUClockType type;
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        tlg->tl[type] = timerlist_new(type, cb, opaque);
+    }
+}
+
+void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
+{
+    QEMUClockType type;
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        timerlist_free(tlg->tl[type]);
+    }
+}
+
+bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
+{
+    QEMUClockType type;
+    bool progress = false;
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        progress |= timerlist_run_timers(tlg->tl[type]);
+    }
+    return progress;
+}
+
+int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
+{
+    int64_t deadline = -1;
+    QEMUClockType type;
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        if (qemu_clock_use_for_deadline(type)) {
+            deadline = qemu_soonest_timeout(deadline,
+                                            timerlist_deadline_ns(tlg->tl[type]));
+        }
+    }
+    return deadline;
+}
+
+int64_t qemu_clock_get_ns(QEMUClockType type)
+{
+    switch (type) {
+    case QEMU_CLOCK_REALTIME:
+        return get_clock();
+    default:
+    case QEMU_CLOCK_VIRTUAL:
+        return cpus_get_virtual_clock();
+    case QEMU_CLOCK_HOST:
+        return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
+    case QEMU_CLOCK_VIRTUAL_RT:
+        return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
+    }
+}
+
+void init_clocks(QEMUTimerListNotifyCB *notify_cb)
+{
+    QEMUClockType type;
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        qemu_clock_init(type, notify_cb);
+    }
+
+#ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
+    prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
+#endif
+}
+
+uint64_t timer_expire_time_ns(QEMUTimer *ts)
+{
+    return timer_pending(ts) ? ts->expire_time : -1;
+}
+
+bool qemu_clock_run_all_timers(void)
+{
+    bool progress = false;
+    QEMUClockType type;
+
+    for (type = 0; type < QEMU_CLOCK_MAX; type++) {
+        if (qemu_clock_use_for_deadline(type)) {
+            progress |= qemu_clock_run_timers(type);
+        }
+    }
+
+    return progress;
+}
diff --git a/util/qht.c b/util/qht.c
new file mode 100644
index 000000000..152b8d74f
--- /dev/null
+++ b/util/qht.c
@@ -0,0 +1,963 @@
+/*
+ * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
+ *
+ * Copyright (C) 2016, Emilio G. Cota 
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * Assumptions:
+ * - NULL cannot be inserted/removed as a pointer value.
+ * - Trying to insert an already-existing hash-pointer pair is OK. However,
+ *   it is not OK to insert into the same hash table different hash-pointer
+ *   pairs that have the same pointer value, but not the hashes.
+ * - Lookups are performed under an RCU read-critical section; removals
+ *   must wait for a grace period to elapse before freeing removed objects.
+ *
+ * Features:
+ * - Reads (i.e. lookups and iterators) can be concurrent with other reads.
+ *   Lookups that are concurrent with writes to the same bucket will retry
+ *   via a seqlock; iterators acquire all bucket locks and therefore can be
+ *   concurrent with lookups and are serialized wrt writers.
+ * - Writes (i.e. insertions/removals) can be concurrent with writes to
+ *   different buckets; writes to the same bucket are serialized through a lock.
+ * - Optional auto-resizing: the hash table resizes up if the load surpasses
+ *   a certain threshold. Resizing is done concurrently with readers; writes
+ *   are serialized with the resize operation.
+ *
+ * The key structure is the bucket, which is cacheline-sized. Buckets
+ * contain a few hash values and pointers; the u32 hash values are stored in
+ * full so that resizing is fast. Having this structure instead of directly
+ * chaining items has two advantages:
+ * - Failed lookups fail fast, and touch a minimum number of cache lines.
+ * - Resizing the hash table with concurrent lookups is easy.
+ *
+ * There are two types of buckets:
+ * 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
+ * 2. all "non-head" buckets (i.e. all others) are members of a chain that
+ *    starts from a head bucket.
+ * Note that the seqlock and spinlock of a head bucket applies to all buckets
+ * chained to it; these two fields are unused in non-head buckets.
+ *
+ * On removals, we move the last valid item in the chain to the position of the
+ * just-removed entry. This makes lookups slightly faster, since the moment an
+ * invalid entry is found, the (failed) lookup is over.
+ *
+ * Resizing is done by taking all bucket spinlocks (so that no other writers can
+ * race with us) and then copying all entries into a new hash map. Then, the
+ * ht->map pointer is set, and the old map is freed once no RCU readers can see
+ * it anymore.
+ *
+ * Writers check for concurrent resizes by comparing ht->map before and after
+ * acquiring their bucket lock. If they don't match, a resize has occurred
+ * while the bucket spinlock was being acquired.
+ *
+ * Related Work:
+ * - Idea of cacheline-sized buckets with full hashes taken from:
+ *   David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
+ *   The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
+ * - Why not RCU-based hash tables? They would allow us to get rid of the
+ *   seqlock, but resizing would take forever since RCU read critical
+ *   sections in QEMU take quite a long time.
+ *   More info on relativistic hash tables:
+ *   + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
+ *     Tables via Relativistic Programming", USENIX ATC'11.
+ *   + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
+ *     https://lwn.net/Articles/612021/
+ */
+#include "qemu/osdep.h"
+#include "qemu/qht.h"
+#include "qemu/atomic.h"
+#include "qemu/rcu.h"
+
+//#define QHT_DEBUG
+
+/*
+ * We want to avoid false sharing of cache lines. Most systems have 64-byte
+ * cache lines so we go with it for simplicity.
+ *
+ * Note that systems with smaller cache lines will be fine (the struct is
+ * almost 64-bytes); systems with larger cache lines might suffer from
+ * some false sharing.
+ */
+#define QHT_BUCKET_ALIGN 64
+
+/* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
+#if HOST_LONG_BITS == 32
+#define QHT_BUCKET_ENTRIES 6
+#else /* 64-bit */
+#define QHT_BUCKET_ENTRIES 4
+#endif
+
+enum qht_iter_type {
+    QHT_ITER_VOID,    /* do nothing; use retvoid */
+    QHT_ITER_RM,      /* remove element if retbool returns true */
+};
+
+struct qht_iter {
+    union {
+        qht_iter_func_t retvoid;
+        qht_iter_bool_func_t retbool;
+    } f;
+    enum qht_iter_type type;
+};
+
+/*
+ * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
+ * the profiler (QSP) will deadlock.
+ */
+static inline void qht_lock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        qemu_mutex_lock__raw(&ht->lock);
+    } else {
+        qemu_mutex_lock(&ht->lock);
+    }
+}
+
+static inline int qht_trylock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        return qemu_mutex_trylock__raw(&(ht)->lock);
+    }
+    return qemu_mutex_trylock(&(ht)->lock);
+}
+
+/* this inline is not really necessary, but it helps keep code consistent */
+static inline void qht_unlock(struct qht *ht)
+{
+    qemu_mutex_unlock(&ht->lock);
+}
+
+/*
+ * Note: reading partially-updated pointers in @pointers could lead to
+ * segfaults. We thus access them with qatomic_read/set; this guarantees
+ * that the compiler makes all those accesses atomic. We also need the
+ * volatile-like behavior in qatomic_read, since otherwise the compiler
+ * might refetch the pointer.
+ * qatomic_read's are of course not necessary when the bucket lock is held.
+ *
+ * If both ht->lock and b->lock are grabbed, ht->lock should always
+ * be grabbed first.
+ */
+struct qht_bucket {
+    QemuSpin lock;
+    QemuSeqLock sequence;
+    uint32_t hashes[QHT_BUCKET_ENTRIES];
+    void *pointers[QHT_BUCKET_ENTRIES];
+    struct qht_bucket *next;
+} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
+
+QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
+
+/**
+ * struct qht_map - structure to track an array of buckets
+ * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
+ *       find the whole struct.
+ * @buckets: array of head buckets. It is constant once the map is created.
+ * @n_buckets: number of head buckets. It is constant once the map is created.
+ * @n_added_buckets: number of added (i.e. "non-head") buckets
+ * @n_added_buckets_threshold: threshold to trigger an upward resize once the
+ *                             number of added buckets surpasses it.
+ *
+ * Buckets are tracked in what we call a "map", i.e. this structure.
+ */
+struct qht_map {
+    struct rcu_head rcu;
+    struct qht_bucket *buckets;
+    size_t n_buckets;
+    size_t n_added_buckets;
+    size_t n_added_buckets_threshold;
+};
+
+/* trigger a resize when n_added_buckets > n_buckets / div */
+#define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
+
+static void qht_do_resize_reset(struct qht *ht, struct qht_map *new,
+                                bool reset);
+static void qht_grow_maybe(struct qht *ht);
+
+#ifdef QHT_DEBUG
+
+#define qht_debug_assert(X) do { assert(X); } while (0)
+
+static void qht_bucket_debug__locked(struct qht_bucket *b)
+{
+    bool seen_empty = false;
+    bool corrupt = false;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                seen_empty = true;
+                continue;
+            }
+            if (seen_empty) {
+                fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
+                        __func__, b, i, b->hashes[i], b->pointers[i]);
+                corrupt = true;
+            }
+        }
+        b = b->next;
+    } while (b);
+    qht_debug_assert(!corrupt);
+}
+
+static void qht_map_debug__all_locked(struct qht_map *map)
+{
+    int i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_debug__locked(&map->buckets[i]);
+    }
+}
+#else
+
+#define qht_debug_assert(X) do { (void)(X); } while (0)
+
+static inline void qht_bucket_debug__locked(struct qht_bucket *b)
+{ }
+
+static inline void qht_map_debug__all_locked(struct qht_map *map)
+{ }
+#endif /* QHT_DEBUG */
+
+static inline size_t qht_elems_to_buckets(size_t n_elems)
+{
+    return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
+}
+
+static inline void qht_head_init(struct qht_bucket *b)
+{
+    memset(b, 0, sizeof(*b));
+    qemu_spin_init(&b->lock);
+    seqlock_init(&b->sequence);
+}
+
+static inline
+struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash)
+{
+    return &map->buckets[hash & (map->n_buckets - 1)];
+}
+
+/* acquire all bucket locks from a map */
+static void qht_map_lock_buckets(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *b = &map->buckets[i];
+
+        qemu_spin_lock(&b->lock);
+    }
+}
+
+static void qht_map_unlock_buckets(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *b = &map->buckets[i];
+
+        qemu_spin_unlock(&b->lock);
+    }
+}
+
+/*
+ * Call with at least a bucket lock held.
+ * @map should be the value read before acquiring the lock (or locks).
+ */
+static inline bool qht_map_is_stale__locked(const struct qht *ht,
+                                            const struct qht_map *map)
+{
+    return map != ht->map;
+}
+
+/*
+ * Grab all bucket locks, and set @pmap after making sure the map isn't stale.
+ *
+ * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
+ *
+ * Note: callers cannot have ht->lock held.
+ */
+static inline
+void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
+{
+    struct qht_map *map;
+
+    map = qatomic_rcu_read(&ht->map);
+    qht_map_lock_buckets(map);
+    if (likely(!qht_map_is_stale__locked(ht, map))) {
+        *pmap = map;
+        return;
+    }
+    qht_map_unlock_buckets(map);
+
+    /* we raced with a resize; acquire ht->lock to see the updated ht->map */
+    qht_lock(ht);
+    map = ht->map;
+    qht_map_lock_buckets(map);
+    qht_unlock(ht);
+    *pmap = map;
+    return;
+}
+
+/*
+ * Get a head bucket and lock it, making sure its parent map is not stale.
+ * @pmap is filled with a pointer to the bucket's parent map.
+ *
+ * Unlock with qemu_spin_unlock(&b->lock).
+ *
+ * Note: callers cannot have ht->lock held.
+ */
+static inline
+struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
+                                             struct qht_map **pmap)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+
+    map = qatomic_rcu_read(&ht->map);
+    b = qht_map_to_bucket(map, hash);
+
+    qemu_spin_lock(&b->lock);
+    if (likely(!qht_map_is_stale__locked(ht, map))) {
+        *pmap = map;
+        return b;
+    }
+    qemu_spin_unlock(&b->lock);
+
+    /* we raced with a resize; acquire ht->lock to see the updated ht->map */
+    qht_lock(ht);
+    map = ht->map;
+    b = qht_map_to_bucket(map, hash);
+    qemu_spin_lock(&b->lock);
+    qht_unlock(ht);
+    *pmap = map;
+    return b;
+}
+
+static inline bool qht_map_needs_resize(const struct qht_map *map)
+{
+    return qatomic_read(&map->n_added_buckets) >
+           map->n_added_buckets_threshold;
+}
+
+static inline void qht_chain_destroy(const struct qht_bucket *head)
+{
+    struct qht_bucket *curr = head->next;
+    struct qht_bucket *prev;
+
+    qemu_spin_destroy(&head->lock);
+    while (curr) {
+        prev = curr;
+        curr = curr->next;
+        qemu_vfree(prev);
+    }
+}
+
+/* pass only an orphan map */
+static void qht_map_destroy(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_chain_destroy(&map->buckets[i]);
+    }
+    qemu_vfree(map->buckets);
+    g_free(map);
+}
+
+static struct qht_map *qht_map_create(size_t n_buckets)
+{
+    struct qht_map *map;
+    size_t i;
+
+    map = g_malloc(sizeof(*map));
+    map->n_buckets = n_buckets;
+
+    map->n_added_buckets = 0;
+    map->n_added_buckets_threshold = n_buckets /
+        QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
+
+    /* let tiny hash tables to at least add one non-head bucket */
+    if (unlikely(map->n_added_buckets_threshold == 0)) {
+        map->n_added_buckets_threshold = 1;
+    }
+
+    map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
+                                 sizeof(*map->buckets) * n_buckets);
+    for (i = 0; i < n_buckets; i++) {
+        qht_head_init(&map->buckets[i]);
+    }
+    return map;
+}
+
+void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
+              unsigned int mode)
+{
+    struct qht_map *map;
+    size_t n_buckets = qht_elems_to_buckets(n_elems);
+
+    g_assert(cmp);
+    ht->cmp = cmp;
+    ht->mode = mode;
+    qemu_mutex_init(&ht->lock);
+    map = qht_map_create(n_buckets);
+    qatomic_rcu_set(&ht->map, map);
+}
+
+/* call only when there are no readers/writers left */
+void qht_destroy(struct qht *ht)
+{
+    qht_map_destroy(ht->map);
+    memset(ht, 0, sizeof(*ht));
+}
+
+static void qht_bucket_reset__locked(struct qht_bucket *head)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    seqlock_write_begin(&head->sequence);
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                goto done;
+            }
+            qatomic_set(&b->hashes[i], 0);
+            qatomic_set(&b->pointers[i], NULL);
+        }
+        b = b->next;
+    } while (b);
+ done:
+    seqlock_write_end(&head->sequence);
+}
+
+/* call with all bucket locks held */
+static void qht_map_reset__all_locked(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_reset__locked(&map->buckets[i]);
+    }
+    qht_map_debug__all_locked(map);
+}
+
+void qht_reset(struct qht *ht)
+{
+    struct qht_map *map;
+
+    qht_map_lock_buckets__no_stale(ht, &map);
+    qht_map_reset__all_locked(map);
+    qht_map_unlock_buckets(map);
+}
+
+static inline void qht_do_resize(struct qht *ht, struct qht_map *new)
+{
+    qht_do_resize_reset(ht, new, false);
+}
+
+static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new)
+{
+    qht_do_resize_reset(ht, new, true);
+}
+
+bool qht_reset_size(struct qht *ht, size_t n_elems)
+{
+    struct qht_map *new = NULL;
+    struct qht_map *map;
+    size_t n_buckets;
+
+    n_buckets = qht_elems_to_buckets(n_elems);
+
+    qht_lock(ht);
+    map = ht->map;
+    if (n_buckets != map->n_buckets) {
+        new = qht_map_create(n_buckets);
+    }
+    qht_do_resize_and_reset(ht, new);
+    qht_unlock(ht);
+
+    return !!new;
+}
+
+static inline
+void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
+                    const void *userp, uint32_t hash)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (qatomic_read(&b->hashes[i]) == hash) {
+                /* The pointer is dereferenced before seqlock_read_retry,
+                 * so (unlike qht_insert__locked) we need to use
+                 * qatomic_rcu_read here.
+                 */
+                void *p = qatomic_rcu_read(&b->pointers[i]);
+
+                if (likely(p) && likely(func(p, userp))) {
+                    return p;
+                }
+            }
+        }
+        b = qatomic_rcu_read(&b->next);
+    } while (b);
+
+    return NULL;
+}
+
+static __attribute__((noinline))
+void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
+                           const void *userp, uint32_t hash)
+{
+    unsigned int version;
+    void *ret;
+
+    do {
+        version = seqlock_read_begin(&b->sequence);
+        ret = qht_do_lookup(b, func, userp, hash);
+    } while (seqlock_read_retry(&b->sequence, version));
+    return ret;
+}
+
+void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
+                        qht_lookup_func_t func)
+{
+    struct qht_bucket *b;
+    const struct qht_map *map;
+    unsigned int version;
+    void *ret;
+
+    map = qatomic_rcu_read(&ht->map);
+    b = qht_map_to_bucket(map, hash);
+
+    version = seqlock_read_begin(&b->sequence);
+    ret = qht_do_lookup(b, func, userp, hash);
+    if (likely(!seqlock_read_retry(&b->sequence, version))) {
+        return ret;
+    }
+    /*
+     * Removing the do/while from the fastpath gives a 4% perf. increase when
+     * running a 100%-lookup microbenchmark.
+     */
+    return qht_lookup__slowpath(b, func, userp, hash);
+}
+
+void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash)
+{
+    return qht_lookup_custom(ht, userp, hash, ht->cmp);
+}
+
+/*
+ * call with head->lock held
+ * @ht is const since it is only used for ht->cmp()
+ */
+static void *qht_insert__locked(const struct qht *ht, struct qht_map *map,
+                                struct qht_bucket *head, void *p, uint32_t hash,
+                                bool *needs_resize)
+{
+    struct qht_bucket *b = head;
+    struct qht_bucket *prev = NULL;
+    struct qht_bucket *new = NULL;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i]) {
+                if (unlikely(b->hashes[i] == hash &&
+                             ht->cmp(b->pointers[i], p))) {
+                    return b->pointers[i];
+                }
+            } else {
+                goto found;
+            }
+        }
+        prev = b;
+        b = b->next;
+    } while (b);
+
+    b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
+    memset(b, 0, sizeof(*b));
+    new = b;
+    i = 0;
+    qatomic_inc(&map->n_added_buckets);
+    if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
+        *needs_resize = true;
+    }
+
+ found:
+    /* found an empty key: acquire the seqlock and write */
+    seqlock_write_begin(&head->sequence);
+    if (new) {
+        qatomic_rcu_set(&prev->next, b);
+    }
+    /* smp_wmb() implicit in seqlock_write_begin.  */
+    qatomic_set(&b->hashes[i], hash);
+    qatomic_set(&b->pointers[i], p);
+    seqlock_write_end(&head->sequence);
+    return NULL;
+}
+
+static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
+{
+    struct qht_map *map;
+
+    /*
+     * If the lock is taken it probably means there's an ongoing resize,
+     * so bail out.
+     */
+    if (qht_trylock(ht)) {
+        return;
+    }
+    map = ht->map;
+    /* another thread might have just performed the resize we were after */
+    if (qht_map_needs_resize(map)) {
+        struct qht_map *new = qht_map_create(map->n_buckets * 2);
+
+        qht_do_resize(ht, new);
+    }
+    qht_unlock(ht);
+}
+
+bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+    bool needs_resize = false;
+    void *prev;
+
+    /* NULL pointers are not supported */
+    qht_debug_assert(p);
+
+    b = qht_bucket_lock__no_stale(ht, hash, &map);
+    prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
+    qht_bucket_debug__locked(b);
+    qemu_spin_unlock(&b->lock);
+
+    if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
+        qht_grow_maybe(ht);
+    }
+    if (likely(prev == NULL)) {
+        return true;
+    }
+    if (existing) {
+        *existing = prev;
+    }
+    return false;
+}
+
+static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos)
+{
+    if (pos == QHT_BUCKET_ENTRIES - 1) {
+        if (b->next == NULL) {
+            return true;
+        }
+        return b->next->pointers[0] == NULL;
+    }
+    return b->pointers[pos + 1] == NULL;
+}
+
+static void
+qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
+{
+    qht_debug_assert(!(to == from && i == j));
+    qht_debug_assert(to->pointers[i]);
+    qht_debug_assert(from->pointers[j]);
+
+    qatomic_set(&to->hashes[i], from->hashes[j]);
+    qatomic_set(&to->pointers[i], from->pointers[j]);
+
+    qatomic_set(&from->hashes[j], 0);
+    qatomic_set(&from->pointers[j], NULL);
+}
+
+/*
+ * Find the last valid entry in @orig, and swap it with @orig[pos], which has
+ * just been invalidated.
+ */
+static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
+{
+    struct qht_bucket *b = orig;
+    struct qht_bucket *prev = NULL;
+    int i;
+
+    if (qht_entry_is_last(orig, pos)) {
+        orig->hashes[pos] = 0;
+        qatomic_set(&orig->pointers[pos], NULL);
+        return;
+    }
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i]) {
+                continue;
+            }
+            if (i > 0) {
+                return qht_entry_move(orig, pos, b, i - 1);
+            }
+            qht_debug_assert(prev);
+            return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
+        }
+        prev = b;
+        b = b->next;
+    } while (b);
+    /* no free entries other than orig[pos], so swap it with the last one */
+    qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
+}
+
+/* call with b->lock held */
+static inline
+bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            void *q = b->pointers[i];
+
+            if (unlikely(q == NULL)) {
+                return false;
+            }
+            if (q == p) {
+                qht_debug_assert(b->hashes[i] == hash);
+                seqlock_write_begin(&head->sequence);
+                qht_bucket_remove_entry(b, i);
+                seqlock_write_end(&head->sequence);
+                return true;
+            }
+        }
+        b = b->next;
+    } while (b);
+    return false;
+}
+
+bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+    bool ret;
+
+    /* NULL pointers are not supported */
+    qht_debug_assert(p);
+
+    b = qht_bucket_lock__no_stale(ht, hash, &map);
+    ret = qht_remove__locked(b, p, hash);
+    qht_bucket_debug__locked(b);
+    qemu_spin_unlock(&b->lock);
+    return ret;
+}
+
+static inline void qht_bucket_iter(struct qht_bucket *head,
+                                   const struct qht_iter *iter, void *userp)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                return;
+            }
+            switch (iter->type) {
+            case QHT_ITER_VOID:
+                iter->f.retvoid(b->pointers[i], b->hashes[i], userp);
+                break;
+            case QHT_ITER_RM:
+                if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) {
+                    /* replace i with the last valid element in the bucket */
+                    seqlock_write_begin(&head->sequence);
+                    qht_bucket_remove_entry(b, i);
+                    seqlock_write_end(&head->sequence);
+                    qht_bucket_debug__locked(b);
+                    /* reevaluate i, since it just got replaced */
+                    i--;
+                    continue;
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+        b = b->next;
+    } while (b);
+}
+
+/* call with all of the map's locks held */
+static inline void qht_map_iter__all_locked(struct qht_map *map,
+                                            const struct qht_iter *iter,
+                                            void *userp)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_iter(&map->buckets[i], iter, userp);
+    }
+}
+
+static inline void
+do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp)
+{
+    struct qht_map *map;
+
+    map = qatomic_rcu_read(&ht->map);
+    qht_map_lock_buckets(map);
+    qht_map_iter__all_locked(map, iter, userp);
+    qht_map_unlock_buckets(map);
+}
+
+void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp)
+{
+    const struct qht_iter iter = {
+        .f.retvoid = func,
+        .type = QHT_ITER_VOID,
+    };
+
+    do_qht_iter(ht, &iter, userp);
+}
+
+void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp)
+{
+    const struct qht_iter iter = {
+        .f.retbool = func,
+        .type = QHT_ITER_RM,
+    };
+
+    do_qht_iter(ht, &iter, userp);
+}
+
+struct qht_map_copy_data {
+    struct qht *ht;
+    struct qht_map *new;
+};
+
+static void qht_map_copy(void *p, uint32_t hash, void *userp)
+{
+    struct qht_map_copy_data *data = userp;
+    struct qht *ht = data->ht;
+    struct qht_map *new = data->new;
+    struct qht_bucket *b = qht_map_to_bucket(new, hash);
+
+    /* no need to acquire b->lock because no thread has seen this map yet */
+    qht_insert__locked(ht, new, b, p, hash, NULL);
+}
+
+/*
+ * Atomically perform a resize and/or reset.
+ * Call with ht->lock held.
+ */
+static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset)
+{
+    struct qht_map *old;
+    const struct qht_iter iter = {
+        .f.retvoid = qht_map_copy,
+        .type = QHT_ITER_VOID,
+    };
+    struct qht_map_copy_data data;
+
+    old = ht->map;
+    qht_map_lock_buckets(old);
+
+    if (reset) {
+        qht_map_reset__all_locked(old);
+    }
+
+    if (new == NULL) {
+        qht_map_unlock_buckets(old);
+        return;
+    }
+
+    g_assert(new->n_buckets != old->n_buckets);
+    data.ht = ht;
+    data.new = new;
+    qht_map_iter__all_locked(old, &iter, &data);
+    qht_map_debug__all_locked(new);
+
+    qatomic_rcu_set(&ht->map, new);
+    qht_map_unlock_buckets(old);
+    call_rcu(old, qht_map_destroy, rcu);
+}
+
+bool qht_resize(struct qht *ht, size_t n_elems)
+{
+    size_t n_buckets = qht_elems_to_buckets(n_elems);
+    size_t ret = false;
+
+    qht_lock(ht);
+    if (n_buckets != ht->map->n_buckets) {
+        struct qht_map *new;
+
+        new = qht_map_create(n_buckets);
+        qht_do_resize(ht, new);
+        ret = true;
+    }
+    qht_unlock(ht);
+
+    return ret;
+}
+
+/* pass @stats to qht_statistics_destroy() when done */
+void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
+{
+    struct qht_map *map;
+    int i;
+
+    map = qatomic_rcu_read(&ht->map);
+
+    stats->used_head_buckets = 0;
+    stats->entries = 0;
+    qdist_init(&stats->chain);
+    qdist_init(&stats->occupancy);
+    /* bail out if the qht has not yet been initialized */
+    if (unlikely(map == NULL)) {
+        stats->head_buckets = 0;
+        return;
+    }
+    stats->head_buckets = map->n_buckets;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *head = &map->buckets[i];
+        struct qht_bucket *b;
+        unsigned int version;
+        size_t buckets;
+        size_t entries;
+        int j;
+
+        do {
+            version = seqlock_read_begin(&head->sequence);
+            buckets = 0;
+            entries = 0;
+            b = head;
+            do {
+                for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
+                    if (qatomic_read(&b->pointers[j]) == NULL) {
+                        break;
+                    }
+                    entries++;
+                }
+                buckets++;
+                b = qatomic_rcu_read(&b->next);
+            } while (b);
+        } while (seqlock_read_retry(&head->sequence, version));
+
+        if (entries) {
+            qdist_inc(&stats->chain, buckets);
+            qdist_inc(&stats->occupancy,
+                      (double)entries / QHT_BUCKET_ENTRIES / buckets);
+            stats->used_head_buckets++;
+            stats->entries += entries;
+        } else {
+            qdist_inc(&stats->occupancy, 0);
+        }
+    }
+}
+
+void qht_statistics_destroy(struct qht_stats *stats)
+{
+    qdist_destroy(&stats->occupancy);
+    qdist_destroy(&stats->chain);
+}
diff --git a/util/qsp.c b/util/qsp.c
new file mode 100644
index 000000000..bacc5fa2f
--- /dev/null
+++ b/util/qsp.c
@@ -0,0 +1,813 @@
+/*
+ * qsp.c - QEMU Synchronization Profiler
+ *
+ * Copyright (C) 2018, Emilio G. Cota 
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * QSP profiles the time spent in synchronization primitives, which can
+ * help diagnose performance problems, e.g. scalability issues when
+ * contention is high.
+ *
+ * The primitives currently supported are mutexes, recursive mutexes and
+ * condition variables. Note that not all related functions are intercepted;
+ * instead we profile only those functions that can have a performance impact,
+ * either due to blocking (e.g. cond_wait, mutex_lock) or cache line
+ * contention (e.g. mutex_lock, mutex_trylock).
+ *
+ * QSP's design focuses on speed and scalability. This is achieved
+ * by having threads do their profiling entirely on thread-local data.
+ * The appropriate thread-local data is found via a QHT, i.e. a concurrent hash
+ * table. To aggregate data in order to generate a report, we iterate over
+ * all entries in the hash table. Depending on the number of threads and
+ * synchronization objects this might be expensive, but note that it is
+ * very rarely called -- reports are generated only when requested by users.
+ *
+ * Reports are generated as a table where each row represents a call site. A
+ * call site is the triplet formed by the __file__ and __LINE__ of the caller
+ * as well as the address of the "object" (i.e. mutex, rec. mutex or condvar)
+ * being operated on. Optionally, call sites that operate on different objects
+ * of the same type can be coalesced, which can be particularly useful when
+ * profiling dynamically-allocated objects.
+ *
+ * Alternative designs considered:
+ *
+ * - Use an off-the-shelf profiler such as mutrace. This is not a viable option
+ *   for us because QEMU has __malloc_hook set (by one of the libraries it
+ *   uses); leaving this hook unset is required to avoid deadlock in mutrace.
+ *
+ * - Use a glib HT for each thread, protecting each HT with its own lock.
+ *   This isn't simpler than the current design, and is 10% slower in the
+ *   atomic_add-bench microbenchmark (-m option).
+ *
+ * - For reports, just use a binary tree as we aggregate data, instead of having
+ *   an intermediate hash table. This would simplify the code only slightly, but
+ *   would perform badly if there were many threads and objects to track.
+ *
+ * - Wrap operations on qsp entries with RCU read-side critical sections, so
+ *   that qsp_reset() can delete entries. Unfortunately, the overhead of calling
+ *   rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having
+ *   a snapshot that is updated on qsp_reset() avoids this overhead.
+ *
+ * Related Work:
+ * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html
+ * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating
+ *   Critical-Section Execution to Improve the Performance of Multithreaded
+ *   Applications", USENIX ATC'12.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/qemu-print.h"
+#include "qemu/thread.h"
+#include "qemu/timer.h"
+#include "qemu/qht.h"
+#include "qemu/rcu.h"
+#include "qemu/xxhash.h"
+
+enum QSPType {
+    QSP_MUTEX,
+    QSP_BQL_MUTEX,
+    QSP_REC_MUTEX,
+    QSP_CONDVAR,
+};
+
+struct QSPCallSite {
+    const void *obj;
+    const char *file; /* i.e. __FILE__; shortened later */
+    int line;
+    enum QSPType type;
+};
+typedef struct QSPCallSite QSPCallSite;
+
+struct QSPEntry {
+    void *thread_ptr;
+    const QSPCallSite *callsite;
+    uint64_t n_acqs;
+    uint64_t ns;
+    unsigned int n_objs; /* count of coalesced objs; only used for reporting */
+};
+typedef struct QSPEntry QSPEntry;
+
+struct QSPSnapshot {
+    struct rcu_head rcu;
+    struct qht ht;
+};
+typedef struct QSPSnapshot QSPSnapshot;
+
+/* initial sizing for hash tables */
+#define QSP_INITIAL_SIZE 64
+
+/* If this file is moved, QSP_REL_PATH should be updated accordingly */
+#define QSP_REL_PATH "util/qsp.c"
+
+/* this file's full path. Used to present all call sites with relative paths */
+static size_t qsp_qemu_path_len;
+
+/* the address of qsp_thread gives us a unique 'thread ID' */
+static __thread int qsp_thread;
+
+/*
+ * Call sites are the same for all threads, so we track them in a separate hash
+ * table to save memory.
+ */
+static struct qht qsp_callsite_ht;
+
+static struct qht qsp_ht;
+static QSPSnapshot *qsp_snapshot;
+static bool qsp_initialized, qsp_initializing;
+
+static const char * const qsp_typenames[] = {
+    [QSP_MUTEX]     = "mutex",
+    [QSP_BQL_MUTEX] = "BQL mutex",
+    [QSP_REC_MUTEX] = "rec_mutex",
+    [QSP_CONDVAR]   = "condvar",
+};
+
+QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl;
+QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl;
+QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl;
+QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl;
+QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func =
+    qemu_rec_mutex_trylock_impl;
+QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
+QemuCondTimedWaitFunc qemu_cond_timedwait_func = qemu_cond_timedwait_impl;
+
+/*
+ * It pays off to _not_ hash callsite->file; hashing a string is slow, and
+ * without it we still get a pretty unique hash.
+ */
+static inline
+uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
+{
+    uint64_t cd = (uint64_t)(uintptr_t)callsite->obj;
+    uint32_t e = callsite->line;
+    uint32_t f = callsite->type;
+
+    return qemu_xxhash6(ab, cd, e, f);
+}
+
+static inline
+uint32_t qsp_callsite_hash(const QSPCallSite *callsite)
+{
+    return do_qsp_callsite_hash(callsite, 0);
+}
+
+static inline uint32_t do_qsp_entry_hash(const QSPEntry *entry, uint64_t a)
+{
+    return do_qsp_callsite_hash(entry->callsite, a);
+}
+
+static uint32_t qsp_entry_hash(const QSPEntry *entry)
+{
+    return do_qsp_entry_hash(entry, (uint64_t)(uintptr_t)entry->thread_ptr);
+}
+
+static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
+{
+    return do_qsp_entry_hash(entry, 0);
+}
+
+/* without the objects we need to hash the file name to get a decent hash */
+static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
+{
+    const QSPCallSite *callsite = entry->callsite;
+    uint64_t ab = g_str_hash(callsite->file);
+    uint64_t cd = callsite->line;
+    uint32_t e = callsite->type;
+
+    return qemu_xxhash5(ab, cd, e);
+}
+
+static bool qsp_callsite_cmp(const void *ap, const void *bp)
+{
+    const QSPCallSite *a = ap;
+    const QSPCallSite *b = bp;
+
+    return a == b ||
+        (a->obj == b->obj &&
+         a->line == b->line &&
+         a->type == b->type &&
+         (a->file == b->file || !strcmp(a->file, b->file)));
+}
+
+static bool qsp_callsite_no_obj_cmp(const void *ap, const void *bp)
+{
+    const QSPCallSite *a = ap;
+    const QSPCallSite *b = bp;
+
+    return a == b ||
+        (a->line == b->line &&
+         a->type == b->type &&
+         (a->file == b->file || !strcmp(a->file, b->file)));
+}
+
+static bool qsp_entry_no_thread_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return qsp_callsite_cmp(a->callsite, b->callsite);
+}
+
+static bool qsp_entry_no_thread_obj_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return qsp_callsite_no_obj_cmp(a->callsite, b->callsite);
+}
+
+static bool qsp_entry_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return a->thread_ptr == b->thread_ptr &&
+        qsp_callsite_cmp(a->callsite, b->callsite);
+}
+
+/*
+ * Normally we'd call this from a constructor function, but we want it to work
+ * via libutil as well.
+ */
+static void qsp_do_init(void)
+{
+    /* make sure this file's path in the tree is up to date with QSP_REL_PATH */
+    g_assert(strstr(__FILE__, QSP_REL_PATH));
+    qsp_qemu_path_len = strlen(__FILE__) - strlen(QSP_REL_PATH);
+
+    qht_init(&qsp_ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+    qht_init(&qsp_callsite_ht, qsp_callsite_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+}
+
+static __attribute__((noinline)) void qsp_init__slowpath(void)
+{
+    if (qatomic_cmpxchg(&qsp_initializing, false, true) == false) {
+        qsp_do_init();
+        qatomic_set(&qsp_initialized, true);
+    } else {
+        while (!qatomic_read(&qsp_initialized)) {
+            cpu_relax();
+        }
+    }
+}
+
+/* qsp_init() must be called from _all_ exported functions */
+static inline void qsp_init(void)
+{
+    if (likely(qatomic_read(&qsp_initialized))) {
+        return;
+    }
+    qsp_init__slowpath();
+}
+
+static QSPCallSite *qsp_callsite_find(const QSPCallSite *orig)
+{
+    QSPCallSite *callsite;
+    uint32_t hash;
+
+    hash = qsp_callsite_hash(orig);
+    callsite = qht_lookup(&qsp_callsite_ht, orig, hash);
+    if (callsite == NULL) {
+        void *existing = NULL;
+
+        callsite = g_new(QSPCallSite, 1);
+        memcpy(callsite, orig, sizeof(*callsite));
+        qht_insert(&qsp_callsite_ht, callsite, hash, &existing);
+        if (unlikely(existing)) {
+            g_free(callsite);
+            callsite = existing;
+        }
+    }
+    return callsite;
+}
+
+static QSPEntry *
+qsp_entry_create(struct qht *ht, const QSPEntry *entry, uint32_t hash)
+{
+    QSPEntry *e;
+    void *existing = NULL;
+
+    e = g_new0(QSPEntry, 1);
+    e->thread_ptr = entry->thread_ptr;
+    e->callsite = qsp_callsite_find(entry->callsite);
+
+    qht_insert(ht, e, hash, &existing);
+    if (unlikely(existing)) {
+        g_free(e);
+        e = existing;
+    }
+    return e;
+}
+
+static QSPEntry *
+qsp_entry_find(struct qht *ht, const QSPEntry *entry, uint32_t hash)
+{
+    QSPEntry *e;
+
+    e = qht_lookup(ht, entry, hash);
+    if (e == NULL) {
+        e = qsp_entry_create(ht, entry, hash);
+    }
+    return e;
+}
+
+/*
+ * Note: Entries are never removed, so callers do not have to be in an RCU
+ * read-side critical section.
+ */
+static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
+                               enum QSPType type)
+{
+    QSPCallSite callsite = {
+        .obj = obj,
+        .file = file,
+        .line = line,
+        .type = type,
+    };
+    QSPEntry orig;
+    uint32_t hash;
+
+    qsp_init();
+
+    orig.thread_ptr = &qsp_thread;
+    orig.callsite = &callsite;
+
+    hash = qsp_entry_hash(&orig);
+    return qsp_entry_find(&qsp_ht, &orig, hash);
+}
+
+/*
+ * @e is in the global hash table; it is only written to by the current thread,
+ * so we write to it atomically (as in "write once") to prevent torn reads.
+ */
+static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
+{
+    qatomic_set_u64(&e->ns, e->ns + delta);
+    if (acq) {
+        qatomic_set_u64(&e->n_acqs, e->n_acqs + 1);
+    }
+}
+
+static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
+{
+    do_qsp_entry_record(e, delta, true);
+}
+
+#define QSP_GEN_VOID(type_, qsp_t_, func_, impl_)                       \
+    static void func_(type_ *obj, const char *file, int line)           \
+    {                                                                   \
+        QSPEntry *e;                                                    \
+        int64_t t0, t1;                                                 \
+                                                                        \
+        t0 = get_clock();                                               \
+        impl_(obj, file, line);                                         \
+        t1 = get_clock();                                               \
+                                                                        \
+        e = qsp_entry_get(obj, file, line, qsp_t_);                     \
+        qsp_entry_record(e, t1 - t0);                                   \
+    }
+
+#define QSP_GEN_RET1(type_, qsp_t_, func_, impl_)                       \
+    static int func_(type_ *obj, const char *file, int line)            \
+    {                                                                   \
+        QSPEntry *e;                                                    \
+        int64_t t0, t1;                                                 \
+        int err;                                                        \
+                                                                        \
+        t0 = get_clock();                                               \
+        err = impl_(obj, file, line);                                   \
+        t1 = get_clock();                                               \
+                                                                        \
+        e = qsp_entry_get(obj, file, line, qsp_t_);                     \
+        do_qsp_entry_record(e, t1 - t0, !err);                          \
+        return err;                                                     \
+    }
+
+QSP_GEN_VOID(QemuMutex, QSP_BQL_MUTEX, qsp_bql_mutex_lock, qemu_mutex_lock_impl)
+QSP_GEN_VOID(QemuMutex, QSP_MUTEX, qsp_mutex_lock, qemu_mutex_lock_impl)
+QSP_GEN_RET1(QemuMutex, QSP_MUTEX, qsp_mutex_trylock, qemu_mutex_trylock_impl)
+
+QSP_GEN_VOID(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_lock,
+             qemu_rec_mutex_lock_impl)
+QSP_GEN_RET1(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_trylock,
+             qemu_rec_mutex_trylock_impl)
+
+#undef QSP_GEN_RET1
+#undef QSP_GEN_VOID
+
+static void
+qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line)
+{
+    QSPEntry *e;
+    int64_t t0, t1;
+
+    t0 = get_clock();
+    qemu_cond_wait_impl(cond, mutex, file, line);
+    t1 = get_clock();
+
+    e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
+    qsp_entry_record(e, t1 - t0);
+}
+
+static bool
+qsp_cond_timedwait(QemuCond *cond, QemuMutex *mutex, int ms,
+                   const char *file, int line)
+{
+    QSPEntry *e;
+    int64_t t0, t1;
+    bool ret;
+
+    t0 = get_clock();
+    ret = qemu_cond_timedwait_impl(cond, mutex, ms, file, line);
+    t1 = get_clock();
+
+    e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
+    qsp_entry_record(e, t1 - t0);
+    return ret;
+}
+
+bool qsp_is_enabled(void)
+{
+    return qatomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock;
+}
+
+void qsp_enable(void)
+{
+    qatomic_set(&qemu_mutex_lock_func, qsp_mutex_lock);
+    qatomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock);
+    qatomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock);
+    qatomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock);
+    qatomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock);
+    qatomic_set(&qemu_cond_wait_func, qsp_cond_wait);
+    qatomic_set(&qemu_cond_timedwait_func, qsp_cond_timedwait);
+}
+
+void qsp_disable(void)
+{
+    qatomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl);
+    qatomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl);
+    qatomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl);
+    qatomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl);
+    qatomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl);
+    qatomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl);
+    qatomic_set(&qemu_cond_timedwait_func, qemu_cond_timedwait_impl);
+}
+
+static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+    enum QSPSortBy sort_by = *(enum QSPSortBy *)up;
+    const QSPCallSite *ca;
+    const QSPCallSite *cb;
+
+    switch (sort_by) {
+    case QSP_SORT_BY_TOTAL_WAIT_TIME:
+        if (a->ns > b->ns) {
+            return -1;
+        } else if (a->ns < b->ns) {
+            return 1;
+        }
+        break;
+    case QSP_SORT_BY_AVG_WAIT_TIME:
+    {
+        double avg_a = a->n_acqs ? a->ns / a->n_acqs : 0;
+        double avg_b = b->n_acqs ? b->ns / b->n_acqs : 0;
+
+        if (avg_a > avg_b) {
+            return -1;
+        } else if (avg_a < avg_b) {
+            return 1;
+        }
+        break;
+    }
+    default:
+        g_assert_not_reached();
+    }
+
+    ca = a->callsite;
+    cb = b->callsite;
+    /* Break the tie with the object's address */
+    if (ca->obj < cb->obj) {
+        return -1;
+    } else if (ca->obj > cb->obj) {
+        return 1;
+    } else {
+        int cmp;
+
+        /* same obj. Break the tie with the callsite's file */
+        cmp = strcmp(ca->file, cb->file);
+        if (cmp) {
+            return cmp;
+        }
+        /* same callsite file. Break the tie with the callsite's line */
+        g_assert(ca->line != cb->line);
+        if (ca->line < cb->line) {
+            return -1;
+        } else if (ca->line > cb->line) {
+            return 1;
+        } else {
+            /* break the tie with the callsite's type */
+            return cb->type - ca->type;
+        }
+    }
+}
+
+static void qsp_sort(void *p, uint32_t h, void *userp)
+{
+    QSPEntry *e = p;
+    GTree *tree = userp;
+
+    g_tree_insert(tree, e, NULL);
+}
+
+static void qsp_aggregate(void *p, uint32_t h, void *up)
+{
+    struct qht *ht = up;
+    const QSPEntry *e = p;
+    QSPEntry *agg;
+    uint32_t hash;
+
+    hash = qsp_entry_no_thread_hash(e);
+    agg = qsp_entry_find(ht, e, hash);
+    /*
+     * The entry is in the global hash table; read from it atomically (as in
+     * "read once").
+     */
+    agg->ns += qatomic_read_u64(&e->ns);
+    agg->n_acqs += qatomic_read_u64(&e->n_acqs);
+}
+
+static void qsp_iter_diff(void *p, uint32_t hash, void *htp)
+{
+    struct qht *ht = htp;
+    QSPEntry *old = p;
+    QSPEntry *new;
+
+    new = qht_lookup(ht, old, hash);
+    /* entries are never deleted, so we must have this one */
+    g_assert(new != NULL);
+    /* our reading of the stats happened after the snapshot was taken */
+    g_assert(new->n_acqs >= old->n_acqs);
+    g_assert(new->ns >= old->ns);
+
+    new->n_acqs -= old->n_acqs;
+    new->ns -= old->ns;
+
+    /* No point in reporting an empty entry */
+    if (new->n_acqs == 0 && new->ns == 0) {
+        bool removed = qht_remove(ht, new, hash);
+
+        g_assert(removed);
+        g_free(new);
+    }
+}
+
+static void qsp_diff(struct qht *orig, struct qht *new)
+{
+    qht_iter(orig, qsp_iter_diff, new);
+}
+
+static void qsp_iter_callsite_coalesce(void *p, uint32_t h, void *htp)
+{
+    struct qht *ht = htp;
+    QSPEntry *old = p;
+    QSPEntry *e;
+    uint32_t hash;
+
+    hash = qsp_entry_no_thread_obj_hash(old);
+    e = qht_lookup(ht, old, hash);
+    if (e == NULL) {
+        e = qsp_entry_create(ht, old, hash);
+        e->n_objs = 1;
+    } else if (e->callsite->obj != old->callsite->obj) {
+        e->n_objs++;
+    }
+    e->ns += old->ns;
+    e->n_acqs += old->n_acqs;
+}
+
+static void qsp_ht_delete(void *p, uint32_t h, void *htp)
+{
+    g_free(p);
+}
+
+static void qsp_mktree(GTree *tree, bool callsite_coalesce)
+{
+    struct qht ht, coalesce_ht;
+    struct qht *htp;
+
+    /*
+     * First, see if there's a prior snapshot, so that we read the global hash
+     * table _after_ the snapshot has been created, which guarantees that
+     * the entries we'll read will be a superset of the snapshot's entries.
+     *
+     * We must remain in an RCU read-side critical section until we're done
+     * with the snapshot.
+     */
+    WITH_RCU_READ_LOCK_GUARD() {
+        QSPSnapshot *snap = qatomic_rcu_read(&qsp_snapshot);
+
+        /* Aggregate all results from the global hash table into a local one */
+        qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE,
+                 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+        qht_iter(&qsp_ht, qsp_aggregate, &ht);
+
+        /* compute the difference wrt the snapshot, if any */
+        if (snap) {
+            qsp_diff(&snap->ht, &ht);
+        }
+    }
+
+    htp = &ht;
+    if (callsite_coalesce) {
+        qht_init(&coalesce_ht, qsp_entry_no_thread_obj_cmp, QSP_INITIAL_SIZE,
+                 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+        qht_iter(&ht, qsp_iter_callsite_coalesce, &coalesce_ht);
+
+        /* free the previous hash table, and point htp to coalesce_ht */
+        qht_iter(&ht, qsp_ht_delete, NULL);
+        qht_destroy(&ht);
+        htp = &coalesce_ht;
+    }
+
+    /* sort the hash table elements by using a tree */
+    qht_iter(htp, qsp_sort, tree);
+
+    /* free the hash table, but keep the elements (those are in the tree now) */
+    qht_destroy(htp);
+}
+
+/* free string with g_free */
+static char *qsp_at(const QSPCallSite *callsite)
+{
+    GString *s = g_string_new(NULL);
+    const char *shortened;
+
+    /* remove the absolute path to qemu */
+    if (unlikely(strlen(callsite->file) < qsp_qemu_path_len)) {
+        shortened = callsite->file;
+    } else {
+        shortened = callsite->file + qsp_qemu_path_len;
+    }
+    g_string_append_printf(s, "%s:%u", shortened, callsite->line);
+    return g_string_free(s, FALSE);
+}
+
+struct QSPReportEntry {
+    const void *obj;
+    char *callsite_at;
+    const char *typename;
+    double time_s;
+    double ns_avg;
+    uint64_t n_acqs;
+    unsigned int n_objs;
+};
+typedef struct QSPReportEntry QSPReportEntry;
+
+struct QSPReport {
+    QSPReportEntry *entries;
+    size_t n_entries;
+    size_t max_n_entries;
+};
+typedef struct QSPReport QSPReport;
+
+static gboolean qsp_tree_report(gpointer key, gpointer value, gpointer udata)
+{
+    const QSPEntry *e = key;
+    QSPReport *report = udata;
+    QSPReportEntry *entry;
+
+    if (report->n_entries == report->max_n_entries) {
+        return TRUE;
+    }
+    entry = &report->entries[report->n_entries];
+    report->n_entries++;
+
+    entry->obj = e->callsite->obj;
+    entry->n_objs = e->n_objs;
+    entry->callsite_at = qsp_at(e->callsite);
+    entry->typename = qsp_typenames[e->callsite->type];
+    entry->time_s = e->ns * 1e-9;
+    entry->n_acqs = e->n_acqs;
+    entry->ns_avg = e->n_acqs ? e->ns / e->n_acqs : 0;
+    return FALSE;
+}
+
+static void pr_report(const QSPReport *rep)
+{
+    char *dashes;
+    size_t max_len = 0;
+    int callsite_len = 0;
+    int callsite_rspace;
+    int n_dashes;
+    size_t i;
+
+    /* find out the maximum length of all 'callsite' fields */
+    for (i = 0; i < rep->n_entries; i++) {
+        const QSPReportEntry *e = &rep->entries[i];
+        size_t len = strlen(e->callsite_at);
+
+        if (len > max_len) {
+            max_len = len;
+        }
+    }
+
+    callsite_len = MAX(max_len, strlen("Call site"));
+    /* white space to leave to the right of "Call site" */
+    callsite_rspace = callsite_len - strlen("Call site");
+
+    qemu_printf("Type               Object  Call site%*s  Wait Time (s)  "
+                "       Count  Average (us)\n", callsite_rspace, "");
+
+    /* build a horizontal rule with dashes */
+    n_dashes = 79 + callsite_rspace;
+    dashes = g_malloc(n_dashes + 1);
+    memset(dashes, '-', n_dashes);
+    dashes[n_dashes] = '\0';
+    qemu_printf("%s\n", dashes);
+
+    for (i = 0; i < rep->n_entries; i++) {
+        const QSPReportEntry *e = &rep->entries[i];
+        GString *s = g_string_new(NULL);
+
+        g_string_append_printf(s, "%-9s  ", e->typename);
+        if (e->n_objs > 1) {
+            g_string_append_printf(s, "[%12u]", e->n_objs);
+        } else {
+            g_string_append_printf(s, "%14p", e->obj);
+        }
+        g_string_append_printf(s, "  %s%*s  %13.5f  %12" PRIu64 "  %12.2f\n",
+                               e->callsite_at,
+                               callsite_len - (int)strlen(e->callsite_at), "",
+                               e->time_s, e->n_acqs, e->ns_avg * 1e-3);
+        qemu_printf("%s", s->str);
+        g_string_free(s, TRUE);
+    }
+
+    qemu_printf("%s\n", dashes);
+    g_free(dashes);
+}
+
+static void report_destroy(QSPReport *rep)
+{
+    size_t i;
+
+    for (i = 0; i < rep->n_entries; i++) {
+        QSPReportEntry *e = &rep->entries[i];
+
+        g_free(e->callsite_at);
+    }
+    g_free(rep->entries);
+}
+
+void qsp_report(size_t max, enum QSPSortBy sort_by,
+                bool callsite_coalesce)
+{
+    GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
+    QSPReport rep;
+
+    qsp_init();
+
+    rep.entries = g_new0(QSPReportEntry, max);
+    rep.n_entries = 0;
+    rep.max_n_entries = max;
+
+    qsp_mktree(tree, callsite_coalesce);
+    g_tree_foreach(tree, qsp_tree_report, &rep);
+    g_tree_destroy(tree);
+
+    pr_report(&rep);
+    report_destroy(&rep);
+}
+
+static void qsp_snapshot_destroy(QSPSnapshot *snap)
+{
+    qht_iter(&snap->ht, qsp_ht_delete, NULL);
+    qht_destroy(&snap->ht);
+    g_free(snap);
+}
+
+void qsp_reset(void)
+{
+    QSPSnapshot *new = g_new(QSPSnapshot, 1);
+    QSPSnapshot *old;
+
+    qsp_init();
+
+    qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+
+    /* take a snapshot of the current state */
+    qht_iter(&qsp_ht, qsp_aggregate, &new->ht);
+
+    /* replace the previous snapshot, if any */
+    old = qatomic_xchg(&qsp_snapshot, new);
+    if (old) {
+        call_rcu(old, qsp_snapshot_destroy, rcu);
+    }
+}
diff --git a/util/qtree.c b/util/qtree.c
new file mode 100644
index 000000000..a1095691f
--- /dev/null
+++ b/util/qtree.c
@@ -0,0 +1,1391 @@
+/*
+ * GLIB - Library of useful routines for C programming
+ * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+/*
+ * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
+ * file for a list of people on the GLib Team.  See the ChangeLog
+ * files for a list of changes.  These files are distributed with
+ * GLib at ftp://ftp.gtk.org/pub/gtk/.
+ */
+
+/*
+ * MT safe
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/qtree.h"
+
+/**
+ * SECTION:trees-binary
+ * @title: Balanced Binary Trees
+ * @short_description: a sorted collection of key/value pairs optimized
+ *                     for searching and traversing in order
+ *
+ * The #QTree structure and its associated functions provide a sorted
+ * collection of key/value pairs optimized for searching and traversing
+ * in order. This means that most of the operations  (access, search,
+ * insertion, deletion, ...) on #QTree are O(log(n)) in average and O(n)
+ * in worst case for time complexity. But, note that maintaining a
+ * balanced sorted #QTree of n elements is done in time O(n log(n)).
+ *
+ * To create a new #QTree use q_tree_new().
+ *
+ * To insert a key/value pair into a #QTree use q_tree_insert()
+ * (O(n log(n))).
+ *
+ * To remove a key/value pair use q_tree_remove() (O(n log(n))).
+ *
+ * To look up the value corresponding to a given key, use
+ * q_tree_lookup() and q_tree_lookup_extended().
+ *
+ * To find out the number of nodes in a #QTree, use q_tree_nnodes(). To
+ * get the height of a #QTree, use q_tree_height().
+ *
+ * To traverse a #QTree, calling a function for each node visited in
+ * the traversal, use q_tree_foreach().
+ *
+ * To destroy a #QTree, use q_tree_destroy().
+ **/
+
+#define MAX_GTREE_HEIGHT 40
+
+/**
+ * QTree:
+ *
+ * The QTree struct is an opaque data structure representing a
+ * [balanced binary tree][glib-Balanced-Binary-Trees]. It should be
+ * accessed only by using the following functions.
+ */
+struct _QTree {
+    QTreeNode        *root;
+    GCompareDataFunc  key_compare;
+    GDestroyNotify    key_destroy_func;
+    GDestroyNotify    value_destroy_func;
+    gpointer          key_compare_data;
+    guint             nnodes;
+    gint              ref_count;
+};
+
+struct _QTreeNode {
+    gpointer   key;         /* key for this node */
+    gpointer   value;       /* value stored at this node */
+    QTreeNode *left;        /* left subtree */
+    QTreeNode *right;       /* right subtree */
+    gint8      balance;     /* height (right) - height (left) */
+    guint8     left_child;
+    guint8     right_child;
+};
+
+
+static QTreeNode *q_tree_node_new(gpointer       key,
+                                  gpointer       value);
+static QTreeNode *q_tree_insert_internal(QTree *tree,
+                                         gpointer key,
+                                         gpointer value,
+                                         gboolean replace);
+static gboolean   q_tree_remove_internal(QTree         *tree,
+                                         gconstpointer  key,
+                                         gboolean       steal);
+static QTreeNode *q_tree_node_balance(QTreeNode     *node);
+static QTreeNode *q_tree_find_node(QTree         *tree,
+                                   gconstpointer  key);
+static QTreeNode *q_tree_node_search(QTreeNode *node,
+                                     GCompareFunc search_func,
+                                     gconstpointer data);
+static QTreeNode *q_tree_node_rotate_left(QTreeNode     *node);
+static QTreeNode *q_tree_node_rotate_right(QTreeNode     *node);
+#ifdef Q_TREE_DEBUG
+static void       q_tree_node_check(QTreeNode     *node);
+#endif
+
+static QTreeNode*
+q_tree_node_new(gpointer key,
+                gpointer value)
+{
+    QTreeNode *node = g_new(QTreeNode, 1);
+
+    node->balance = 0;
+    node->left = NULL;
+    node->right = NULL;
+    node->left_child = FALSE;
+    node->right_child = FALSE;
+    node->key = key;
+    node->value = value;
+
+    return node;
+}
+
+/**
+ * q_tree_new:
+ * @key_compare_func: the function used to order the nodes in the #QTree.
+ *   It should return values similar to the standard strcmp() function -
+ *   0 if the two arguments are equal, a negative value if the first argument
+ *   comes before the second, or a positive value if the first argument comes
+ *   after the second.
+ *
+ * Creates a new #QTree.
+ *
+ * Returns: a newly allocated #QTree
+ */
+QTree *
+q_tree_new(GCompareFunc key_compare_func)
+{
+    g_return_val_if_fail(key_compare_func != NULL, NULL);
+
+    return q_tree_new_full((GCompareDataFunc) key_compare_func, NULL,
+                           NULL, NULL);
+}
+
+/**
+ * q_tree_new_with_data:
+ * @key_compare_func: qsort()-style comparison function
+ * @key_compare_data: data to pass to comparison function
+ *
+ * Creates a new #QTree with a comparison function that accepts user data.
+ * See q_tree_new() for more details.
+ *
+ * Returns: a newly allocated #QTree
+ */
+QTree *
+q_tree_new_with_data(GCompareDataFunc key_compare_func,
+                     gpointer         key_compare_data)
+{
+    g_return_val_if_fail(key_compare_func != NULL, NULL);
+
+    return q_tree_new_full(key_compare_func, key_compare_data,
+                           NULL, NULL);
+}
+
+/**
+ * q_tree_new_full:
+ * @key_compare_func: qsort()-style comparison function
+ * @key_compare_data: data to pass to comparison function
+ * @key_destroy_func: a function to free the memory allocated for the key
+ *   used when removing the entry from the #QTree or %NULL if you don't
+ *   want to supply such a function
+ * @value_destroy_func: a function to free the memory allocated for the
+ *   value used when removing the entry from the #QTree or %NULL if you
+ *   don't want to supply such a function
+ *
+ * Creates a new #QTree like q_tree_new() and allows to specify functions
+ * to free the memory allocated for the key and value that get called when
+ * removing the entry from the #QTree.
+ *
+ * Returns: a newly allocated #QTree
+ */
+QTree *
+q_tree_new_full(GCompareDataFunc key_compare_func,
+                gpointer         key_compare_data,
+                GDestroyNotify   key_destroy_func,
+                GDestroyNotify   value_destroy_func)
+{
+    QTree *tree;
+
+    g_return_val_if_fail(key_compare_func != NULL, NULL);
+
+    tree = g_new(QTree, 1);
+    tree->root               = NULL;
+    tree->key_compare        = key_compare_func;
+    tree->key_destroy_func   = key_destroy_func;
+    tree->value_destroy_func = value_destroy_func;
+    tree->key_compare_data   = key_compare_data;
+    tree->nnodes             = 0;
+    tree->ref_count          = 1;
+
+    return tree;
+}
+
+/**
+ * q_tree_node_first:
+ * @tree: a #QTree
+ *
+ * Returns the first in-order node of the tree, or %NULL
+ * for an empty tree.
+ *
+ * Returns: (nullable) (transfer none): the first node in the tree
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_node_first(QTree *tree)
+{
+    QTreeNode *tmp;
+
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    if (!tree->root) {
+        return NULL;
+    }
+
+    tmp = tree->root;
+
+    while (tmp->left_child) {
+        tmp = tmp->left;
+    }
+
+    return tmp;
+}
+
+/**
+ * q_tree_node_previous
+ * @node: a #QTree node
+ *
+ * Returns the previous in-order node of the tree, or %NULL
+ * if the passed node was already the first one.
+ *
+ * Returns: (nullable) (transfer none): the previous node in the tree
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_node_previous(QTreeNode *node)
+{
+    QTreeNode *tmp;
+
+    g_return_val_if_fail(node != NULL, NULL);
+
+    tmp = node->left;
+
+    if (node->left_child) {
+        while (tmp->right_child) {
+            tmp = tmp->right;
+        }
+    }
+
+    return tmp;
+}
+
+/**
+ * q_tree_node_next
+ * @node: a #QTree node
+ *
+ * Returns the next in-order node of the tree, or %NULL
+ * if the passed node was already the last one.
+ *
+ * Returns: (nullable) (transfer none): the next node in the tree
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_node_next(QTreeNode *node)
+{
+    QTreeNode *tmp;
+
+    g_return_val_if_fail(node != NULL, NULL);
+
+    tmp = node->right;
+
+    if (node->right_child) {
+        while (tmp->left_child) {
+            tmp = tmp->left;
+        }
+    }
+
+    return tmp;
+}
+
+/**
+ * q_tree_remove_all:
+ * @tree: a #QTree
+ *
+ * Removes all nodes from a #QTree and destroys their keys and values,
+ * then resets the #QTree’s root to %NULL.
+ *
+ * Since: 2.70 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static void
+q_tree_remove_all(QTree *tree)
+{
+    QTreeNode *node;
+    QTreeNode *next;
+
+    g_return_if_fail(tree != NULL);
+
+    node = q_tree_node_first(tree);
+
+    while (node) {
+        next = q_tree_node_next(node);
+
+        if (tree->key_destroy_func) {
+            tree->key_destroy_func(node->key);
+        }
+        if (tree->value_destroy_func) {
+            tree->value_destroy_func(node->value);
+        }
+        g_free(node);
+
+#ifdef Q_TREE_DEBUG
+        g_assert(tree->nnodes > 0);
+        tree->nnodes--;
+#endif
+
+        node = next;
+    }
+
+#ifdef Q_TREE_DEBUG
+    g_assert(tree->nnodes == 0);
+#endif
+
+    tree->root = NULL;
+#ifndef Q_TREE_DEBUG
+    tree->nnodes = 0;
+#endif
+}
+
+/**
+ * q_tree_ref:
+ * @tree: a #QTree
+ *
+ * Increments the reference count of @tree by one.
+ *
+ * It is safe to call this function from any thread.
+ *
+ * Returns: the passed in #QTree
+ *
+ * Since: 2.22
+ */
+QTree *
+q_tree_ref(QTree *tree)
+{
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    g_atomic_int_inc(&tree->ref_count);
+
+    return tree;
+}
+
+/**
+ * q_tree_unref:
+ * @tree: a #QTree
+ *
+ * Decrements the reference count of @tree by one.
+ * If the reference count drops to 0, all keys and values will
+ * be destroyed (if destroy functions were specified) and all
+ * memory allocated by @tree will be released.
+ *
+ * It is safe to call this function from any thread.
+ *
+ * Since: 2.22
+ */
+void
+q_tree_unref(QTree *tree)
+{
+    g_return_if_fail(tree != NULL);
+
+    if (g_atomic_int_dec_and_test(&tree->ref_count)) {
+        q_tree_remove_all(tree);
+        g_free(tree);
+    }
+}
+
+/**
+ * q_tree_destroy:
+ * @tree: a #QTree
+ *
+ * Removes all keys and values from the #QTree and decreases its
+ * reference count by one. If keys and/or values are dynamically
+ * allocated, you should either free them first or create the #QTree
+ * using q_tree_new_full(). In the latter case the destroy functions
+ * you supplied will be called on all keys and values before destroying
+ * the #QTree.
+ */
+void
+q_tree_destroy(QTree *tree)
+{
+    g_return_if_fail(tree != NULL);
+
+    q_tree_remove_all(tree);
+    q_tree_unref(tree);
+}
+
+/**
+ * q_tree_insert_node:
+ * @tree: a #QTree
+ * @key: the key to insert
+ * @value: the value corresponding to the key
+ *
+ * Inserts a key/value pair into a #QTree.
+ *
+ * If the given key already exists in the #QTree its corresponding value
+ * is set to the new value. If you supplied a @value_destroy_func when
+ * creating the #QTree, the old value is freed using that function. If
+ * you supplied a @key_destroy_func when creating the #QTree, the passed
+ * key is freed using that function.
+ *
+ * The tree is automatically 'balanced' as new key/value pairs are added,
+ * so that the distance from the root to every leaf is as small as possible.
+ * The cost of maintaining a balanced tree while inserting new key/value
+ * result in a O(n log(n)) operation where most of the other operations
+ * are O(log(n)).
+ *
+ * Returns: (transfer none): the inserted (or set) node.
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_insert_node(QTree    *tree,
+                   gpointer  key,
+                   gpointer  value)
+{
+    QTreeNode *node;
+
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    node = q_tree_insert_internal(tree, key, value, FALSE);
+
+#ifdef Q_TREE_DEBUG
+    q_tree_node_check(tree->root);
+#endif
+
+    return node;
+}
+
+/**
+ * q_tree_insert:
+ * @tree: a #QTree
+ * @key: the key to insert
+ * @value: the value corresponding to the key
+ *
+ * Inserts a key/value pair into a #QTree.
+ *
+ * Inserts a new key and value into a #QTree as q_tree_insert_node() does,
+ * only this function does not return the inserted or set node.
+ */
+void
+q_tree_insert(QTree    *tree,
+              gpointer  key,
+              gpointer  value)
+{
+    q_tree_insert_node(tree, key, value);
+}
+
+/**
+ * q_tree_replace_node:
+ * @tree: a #QTree
+ * @key: the key to insert
+ * @value: the value corresponding to the key
+ *
+ * Inserts a new key and value into a #QTree similar to q_tree_insert_node().
+ * The difference is that if the key already exists in the #QTree, it gets
+ * replaced by the new key. If you supplied a @value_destroy_func when
+ * creating the #QTree, the old value is freed using that function. If you
+ * supplied a @key_destroy_func when creating the #QTree, the old key is
+ * freed using that function.
+ *
+ * The tree is automatically 'balanced' as new key/value pairs are added,
+ * so that the distance from the root to every leaf is as small as possible.
+ *
+ * Returns: (transfer none): the inserted (or set) node.
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_replace_node(QTree    *tree,
+                    gpointer  key,
+                    gpointer  value)
+{
+    QTreeNode *node;
+
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    node = q_tree_insert_internal(tree, key, value, TRUE);
+
+#ifdef Q_TREE_DEBUG
+    q_tree_node_check(tree->root);
+#endif
+
+    return node;
+}
+
+/**
+ * q_tree_replace:
+ * @tree: a #QTree
+ * @key: the key to insert
+ * @value: the value corresponding to the key
+ *
+ * Inserts a new key and value into a #QTree as q_tree_replace_node() does,
+ * only this function does not return the inserted or set node.
+ */
+void
+q_tree_replace(QTree    *tree,
+               gpointer  key,
+               gpointer  value)
+{
+    q_tree_replace_node(tree, key, value);
+}
+
+/* internal insert routine */
+static QTreeNode *
+q_tree_insert_internal(QTree    *tree,
+                       gpointer  key,
+                       gpointer  value,
+                       gboolean  replace)
+{
+    QTreeNode *node, *retnode;
+    QTreeNode *path[MAX_GTREE_HEIGHT];
+    int idx;
+
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    if (!tree->root) {
+        tree->root = q_tree_node_new(key, value);
+        tree->nnodes++;
+        return tree->root;
+    }
+
+    idx = 0;
+    path[idx++] = NULL;
+    node = tree->root;
+
+    while (1) {
+        int cmp = tree->key_compare(key, node->key, tree->key_compare_data);
+
+        if (cmp == 0) {
+            if (tree->value_destroy_func) {
+                tree->value_destroy_func(node->value);
+            }
+
+            node->value = value;
+
+            if (replace) {
+                if (tree->key_destroy_func) {
+                    tree->key_destroy_func(node->key);
+                }
+
+                node->key = key;
+            } else {
+                /* free the passed key */
+                if (tree->key_destroy_func) {
+                    tree->key_destroy_func(key);
+                }
+            }
+
+            return node;
+        } else if (cmp < 0) {
+            if (node->left_child) {
+                path[idx++] = node;
+                node = node->left;
+            } else {
+                QTreeNode *child = q_tree_node_new(key, value);
+
+                child->left = node->left;
+                child->right = node;
+                node->left = child;
+                node->left_child = TRUE;
+                node->balance -= 1;
+
+                tree->nnodes++;
+
+                retnode = child;
+                break;
+            }
+        } else {
+            if (node->right_child) {
+                path[idx++] = node;
+                node = node->right;
+            } else {
+                QTreeNode *child = q_tree_node_new(key, value);
+
+                child->right = node->right;
+                child->left = node;
+                node->right = child;
+                node->right_child = TRUE;
+                node->balance += 1;
+
+                tree->nnodes++;
+
+                retnode = child;
+                break;
+            }
+        }
+    }
+
+    /*
+     * Restore balance. This is the goodness of a non-recursive
+     * implementation, when we are done with balancing we 'break'
+     * the loop and we are done.
+     */
+    while (1) {
+        QTreeNode *bparent = path[--idx];
+        gboolean left_node = (bparent && node == bparent->left);
+        g_assert(!bparent || bparent->left == node || bparent->right == node);
+
+        if (node->balance < -1 || node->balance > 1) {
+            node = q_tree_node_balance(node);
+            if (bparent == NULL) {
+                tree->root = node;
+            } else if (left_node) {
+                bparent->left = node;
+            } else {
+                bparent->right = node;
+            }
+        }
+
+        if (node->balance == 0 || bparent == NULL) {
+            break;
+        }
+
+        if (left_node) {
+            bparent->balance -= 1;
+        } else {
+            bparent->balance += 1;
+        }
+
+        node = bparent;
+    }
+
+    return retnode;
+}
+
+/**
+ * q_tree_remove:
+ * @tree: a #QTree
+ * @key: the key to remove
+ *
+ * Removes a key/value pair from a #QTree.
+ *
+ * If the #QTree was created using q_tree_new_full(), the key and value
+ * are freed using the supplied destroy functions, otherwise you have to
+ * make sure that any dynamically allocated values are freed yourself.
+ * If the key does not exist in the #QTree, the function does nothing.
+ *
+ * The cost of maintaining a balanced tree while removing a key/value
+ * result in a O(n log(n)) operation where most of the other operations
+ * are O(log(n)).
+ *
+ * Returns: %TRUE if the key was found (prior to 2.8, this function
+ *     returned nothing)
+ */
+gboolean
+q_tree_remove(QTree         *tree,
+              gconstpointer  key)
+{
+    gboolean removed;
+
+    g_return_val_if_fail(tree != NULL, FALSE);
+
+    removed = q_tree_remove_internal(tree, key, FALSE);
+
+#ifdef Q_TREE_DEBUG
+    q_tree_node_check(tree->root);
+#endif
+
+    return removed;
+}
+
+/**
+ * q_tree_steal:
+ * @tree: a #QTree
+ * @key: the key to remove
+ *
+ * Removes a key and its associated value from a #QTree without calling
+ * the key and value destroy functions.
+ *
+ * If the key does not exist in the #QTree, the function does nothing.
+ *
+ * Returns: %TRUE if the key was found (prior to 2.8, this function
+ *     returned nothing)
+ */
+gboolean
+q_tree_steal(QTree         *tree,
+             gconstpointer  key)
+{
+    gboolean removed;
+
+    g_return_val_if_fail(tree != NULL, FALSE);
+
+    removed = q_tree_remove_internal(tree, key, TRUE);
+
+#ifdef Q_TREE_DEBUG
+    q_tree_node_check(tree->root);
+#endif
+
+    return removed;
+}
+
+/* internal remove routine */
+static gboolean
+q_tree_remove_internal(QTree         *tree,
+                       gconstpointer  key,
+                       gboolean       steal)
+{
+    QTreeNode *node, *parent, *balance;
+    QTreeNode *path[MAX_GTREE_HEIGHT];
+    int idx;
+    gboolean left_node;
+
+    g_return_val_if_fail(tree != NULL, FALSE);
+
+    if (!tree->root) {
+        return FALSE;
+    }
+
+    idx = 0;
+    path[idx++] = NULL;
+    node = tree->root;
+
+    while (1) {
+        int cmp = tree->key_compare(key, node->key, tree->key_compare_data);
+
+        if (cmp == 0) {
+            break;
+        } else if (cmp < 0) {
+            if (!node->left_child) {
+                return FALSE;
+            }
+
+            path[idx++] = node;
+            node = node->left;
+        } else {
+            if (!node->right_child) {
+                return FALSE;
+            }
+
+            path[idx++] = node;
+            node = node->right;
+        }
+    }
+
+    /*
+     * The following code is almost equal to q_tree_remove_node,
+     * except that we do not have to call q_tree_node_parent.
+     */
+    balance = parent = path[--idx];
+    g_assert(!parent || parent->left == node || parent->right == node);
+    left_node = (parent && node == parent->left);
+
+    if (!node->left_child) {
+        if (!node->right_child) {
+            if (!parent) {
+                tree->root = NULL;
+            } else if (left_node) {
+                parent->left_child = FALSE;
+                parent->left = node->left;
+                parent->balance += 1;
+            } else {
+                parent->right_child = FALSE;
+                parent->right = node->right;
+                parent->balance -= 1;
+            }
+        } else {
+            /* node has a right child */
+            QTreeNode *tmp = q_tree_node_next(node);
+            tmp->left = node->left;
+
+            if (!parent) {
+                tree->root = node->right;
+            } else if (left_node) {
+                parent->left = node->right;
+                parent->balance += 1;
+            } else {
+                parent->right = node->right;
+                parent->balance -= 1;
+            }
+        }
+    } else {
+        /* node has a left child */
+        if (!node->right_child) {
+            QTreeNode *tmp = q_tree_node_previous(node);
+            tmp->right = node->right;
+
+            if (parent == NULL) {
+                tree->root = node->left;
+            } else if (left_node) {
+                parent->left = node->left;
+                parent->balance += 1;
+            } else {
+                parent->right = node->left;
+                parent->balance -= 1;
+            }
+        } else {
+            /* node has a both children (pant, pant!) */
+            QTreeNode *prev = node->left;
+            QTreeNode *next = node->right;
+            QTreeNode *nextp = node;
+            int old_idx = idx + 1;
+            idx++;
+
+            /* path[idx] == parent */
+            /* find the immediately next node (and its parent) */
+            while (next->left_child) {
+                path[++idx] = nextp = next;
+                next = next->left;
+            }
+
+            path[old_idx] = next;
+            balance = path[idx];
+
+            /* remove 'next' from the tree */
+            if (nextp != node) {
+                if (next->right_child) {
+                    nextp->left = next->right;
+                } else {
+                    nextp->left_child = FALSE;
+                }
+                nextp->balance += 1;
+
+                next->right_child = TRUE;
+                next->right = node->right;
+            } else {
+                node->balance -= 1;
+            }
+
+            /* set the prev to point to the right place */
+            while (prev->right_child) {
+                prev = prev->right;
+            }
+            prev->right = next;
+
+            /* prepare 'next' to replace 'node' */
+            next->left_child = TRUE;
+            next->left = node->left;
+            next->balance = node->balance;
+
+            if (!parent) {
+                tree->root = next;
+            } else if (left_node) {
+                parent->left = next;
+            } else {
+                parent->right = next;
+            }
+        }
+    }
+
+    /* restore balance */
+    if (balance) {
+        while (1) {
+            QTreeNode *bparent = path[--idx];
+            g_assert(!bparent ||
+                     bparent->left == balance ||
+                     bparent->right == balance);
+            left_node = (bparent && balance == bparent->left);
+
+            if (balance->balance < -1 || balance->balance > 1) {
+                balance = q_tree_node_balance(balance);
+                if (!bparent) {
+                    tree->root = balance;
+                } else if (left_node) {
+                    bparent->left = balance;
+                } else {
+                    bparent->right = balance;
+                }
+            }
+
+            if (balance->balance != 0 || !bparent) {
+                break;
+            }
+
+            if (left_node) {
+                bparent->balance += 1;
+            } else {
+                bparent->balance -= 1;
+            }
+
+            balance = bparent;
+        }
+    }
+
+    if (!steal) {
+        if (tree->key_destroy_func) {
+            tree->key_destroy_func(node->key);
+        }
+        if (tree->value_destroy_func) {
+            tree->value_destroy_func(node->value);
+        }
+    }
+
+    g_free(node);
+
+    tree->nnodes--;
+
+    return TRUE;
+}
+
+/**
+ * q_tree_lookup_node:
+ * @tree: a #QTree
+ * @key: the key to look up
+ *
+ * Gets the tree node corresponding to the given key. Since a #QTree is
+ * automatically balanced as key/value pairs are added, key lookup
+ * is O(log n) (where n is the number of key/value pairs in the tree).
+ *
+ * Returns: (nullable) (transfer none): the tree node corresponding to
+ *          the key, or %NULL if the key was not found
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_lookup_node(QTree         *tree,
+                   gconstpointer  key)
+{
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    return q_tree_find_node(tree, key);
+}
+
+/**
+ * q_tree_lookup:
+ * @tree: a #QTree
+ * @key: the key to look up
+ *
+ * Gets the value corresponding to the given key. Since a #QTree is
+ * automatically balanced as key/value pairs are added, key lookup
+ * is O(log n) (where n is the number of key/value pairs in the tree).
+ *
+ * Returns: the value corresponding to the key, or %NULL
+ *     if the key was not found
+ */
+gpointer
+q_tree_lookup(QTree         *tree,
+              gconstpointer  key)
+{
+    QTreeNode *node;
+
+    node = q_tree_lookup_node(tree, key);
+
+    return node ? node->value : NULL;
+}
+
+/**
+ * q_tree_lookup_extended:
+ * @tree: a #QTree
+ * @lookup_key: the key to look up
+ * @orig_key: (out) (optional) (nullable): returns the original key
+ * @value: (out) (optional) (nullable): returns the value associated with
+ *         the key
+ *
+ * Looks up a key in the #QTree, returning the original key and the
+ * associated value. This is useful if you need to free the memory
+ * allocated for the original key, for example before calling
+ * q_tree_remove().
+ *
+ * Returns: %TRUE if the key was found in the #QTree
+ */
+gboolean
+q_tree_lookup_extended(QTree         *tree,
+                       gconstpointer  lookup_key,
+                       gpointer      *orig_key,
+                       gpointer      *value)
+{
+    QTreeNode *node;
+
+    g_return_val_if_fail(tree != NULL, FALSE);
+
+    node = q_tree_find_node(tree, lookup_key);
+
+    if (node) {
+        if (orig_key) {
+            *orig_key = node->key;
+        }
+        if (value) {
+            *value = node->value;
+        }
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+/**
+ * q_tree_foreach:
+ * @tree: a #QTree
+ * @func: the function to call for each node visited.
+ *     If this function returns %TRUE, the traversal is stopped.
+ * @user_data: user data to pass to the function
+ *
+ * Calls the given function for each of the key/value pairs in the #QTree.
+ * The function is passed the key and value of each pair, and the given
+ * @data parameter. The tree is traversed in sorted order.
+ *
+ * The tree may not be modified while iterating over it (you can't
+ * add/remove items). To remove all items matching a predicate, you need
+ * to add each item to a list in your #GTraverseFunc as you walk over
+ * the tree, then walk the list and remove each item.
+ */
+void
+q_tree_foreach(QTree         *tree,
+               GTraverseFunc  func,
+               gpointer       user_data)
+{
+    QTreeNode *node;
+
+    g_return_if_fail(tree != NULL);
+
+    if (!tree->root) {
+        return;
+    }
+
+    node = q_tree_node_first(tree);
+
+    while (node) {
+        if ((*func)(node->key, node->value, user_data)) {
+            break;
+        }
+
+        node = q_tree_node_next(node);
+    }
+}
+
+/**
+ * q_tree_search_node:
+ * @tree: a #QTree
+ * @search_func: a function used to search the #QTree
+ * @user_data: the data passed as the second argument to @search_func
+ *
+ * Searches a #QTree using @search_func.
+ *
+ * The @search_func is called with a pointer to the key of a key/value
+ * pair in the tree, and the passed in @user_data. If @search_func returns
+ * 0 for a key/value pair, then the corresponding node is returned as
+ * the result of q_tree_search(). If @search_func returns -1, searching
+ * will proceed among the key/value pairs that have a smaller key; if
+ * @search_func returns 1, searching will proceed among the key/value
+ * pairs that have a larger key.
+ *
+ * Returns: (nullable) (transfer none): the node corresponding to the
+ *          found key, or %NULL if the key was not found
+ *
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
+ */
+static QTreeNode *
+q_tree_search_node(QTree         *tree,
+                   GCompareFunc   search_func,
+                   gconstpointer  user_data)
+{
+    g_return_val_if_fail(tree != NULL, NULL);
+
+    if (!tree->root) {
+        return NULL;
+    }
+
+    return q_tree_node_search(tree->root, search_func, user_data);
+}
+
+/**
+ * q_tree_search:
+ * @tree: a #QTree
+ * @search_func: a function used to search the #QTree
+ * @user_data: the data passed as the second argument to @search_func
+ *
+ * Searches a #QTree using @search_func.
+ *
+ * The @search_func is called with a pointer to the key of a key/value
+ * pair in the tree, and the passed in @user_data. If @search_func returns
+ * 0 for a key/value pair, then the corresponding value is returned as
+ * the result of q_tree_search(). If @search_func returns -1, searching
+ * will proceed among the key/value pairs that have a smaller key; if
+ * @search_func returns 1, searching will proceed among the key/value
+ * pairs that have a larger key.
+ *
+ * Returns: the value corresponding to the found key, or %NULL
+ *     if the key was not found
+ */
+gpointer
+q_tree_search(QTree         *tree,
+              GCompareFunc   search_func,
+              gconstpointer  user_data)
+{
+    QTreeNode *node;
+
+    node = q_tree_search_node(tree, search_func, user_data);
+
+    return node ? node->value : NULL;
+}
+
+/**
+ * q_tree_height:
+ * @tree: a #QTree
+ *
+ * Gets the height of a #QTree.
+ *
+ * If the #QTree contains no nodes, the height is 0.
+ * If the #QTree contains only one root node the height is 1.
+ * If the root node has children the height is 2, etc.
+ *
+ * Returns: the height of @tree
+ */
+gint
+q_tree_height(QTree *tree)
+{
+    QTreeNode *node;
+    gint height;
+
+    g_return_val_if_fail(tree != NULL, 0);
+
+    if (!tree->root) {
+        return 0;
+    }
+
+    height = 0;
+    node = tree->root;
+
+    while (1) {
+        height += 1 + MAX(node->balance, 0);
+
+        if (!node->left_child) {
+            return height;
+        }
+
+        node = node->left;
+    }
+}
+
+/**
+ * q_tree_nnodes:
+ * @tree: a #QTree
+ *
+ * Gets the number of nodes in a #QTree.
+ *
+ * Returns: the number of nodes in @tree
+ */
+gint
+q_tree_nnodes(QTree *tree)
+{
+    g_return_val_if_fail(tree != NULL, 0);
+
+    return tree->nnodes;
+}
+
+static QTreeNode *
+q_tree_node_balance(QTreeNode *node)
+{
+    if (node->balance < -1) {
+        if (node->left->balance > 0) {
+            node->left = q_tree_node_rotate_left(node->left);
+        }
+        node = q_tree_node_rotate_right(node);
+    } else if (node->balance > 1) {
+        if (node->right->balance < 0) {
+            node->right = q_tree_node_rotate_right(node->right);
+        }
+        node = q_tree_node_rotate_left(node);
+    }
+
+    return node;
+}
+
+static QTreeNode *
+q_tree_find_node(QTree        *tree,
+                 gconstpointer key)
+{
+    QTreeNode *node;
+    gint cmp;
+
+    node = tree->root;
+    if (!node) {
+        return NULL;
+    }
+
+    while (1) {
+        cmp = tree->key_compare(key, node->key, tree->key_compare_data);
+        if (cmp == 0) {
+            return node;
+        } else if (cmp < 0) {
+            if (!node->left_child) {
+                return NULL;
+            }
+
+            node = node->left;
+        } else {
+            if (!node->right_child) {
+                return NULL;
+            }
+
+            node = node->right;
+        }
+    }
+}
+
+static QTreeNode *
+q_tree_node_search(QTreeNode     *node,
+                   GCompareFunc   search_func,
+                   gconstpointer  data)
+{
+    gint dir;
+
+    if (!node) {
+        return NULL;
+    }
+
+    while (1) {
+        dir = (*search_func)(node->key, data);
+        if (dir == 0) {
+            return node;
+        } else if (dir < 0) {
+            if (!node->left_child) {
+                return NULL;
+            }
+
+            node = node->left;
+        } else {
+            if (!node->right_child) {
+                return NULL;
+            }
+
+            node = node->right;
+        }
+    }
+}
+
+static QTreeNode *
+q_tree_node_rotate_left(QTreeNode *node)
+{
+    QTreeNode *right;
+    gint a_bal;
+    gint b_bal;
+
+    right = node->right;
+
+    if (right->left_child) {
+        node->right = right->left;
+    } else {
+        node->right_child = FALSE;
+        right->left_child = TRUE;
+    }
+    right->left = node;
+
+    a_bal = node->balance;
+    b_bal = right->balance;
+
+    if (b_bal <= 0) {
+        if (a_bal >= 1) {
+            right->balance = b_bal - 1;
+        } else {
+            right->balance = a_bal + b_bal - 2;
+        }
+        node->balance = a_bal - 1;
+    } else {
+        if (a_bal <= b_bal) {
+            right->balance = a_bal - 2;
+        } else {
+            right->balance = b_bal - 1;
+        }
+        node->balance = a_bal - b_bal - 1;
+    }
+
+    return right;
+}
+
+static QTreeNode *
+q_tree_node_rotate_right(QTreeNode *node)
+{
+    QTreeNode *left;
+    gint a_bal;
+    gint b_bal;
+
+    left = node->left;
+
+    if (left->right_child) {
+        node->left = left->right;
+    } else {
+        node->left_child = FALSE;
+        left->right_child = TRUE;
+    }
+    left->right = node;
+
+    a_bal = node->balance;
+    b_bal = left->balance;
+
+    if (b_bal <= 0) {
+        if (b_bal > a_bal) {
+            left->balance = b_bal + 1;
+        } else {
+            left->balance = a_bal + 2;
+        }
+        node->balance = a_bal - b_bal + 1;
+    } else {
+        if (a_bal <= -1) {
+            left->balance = b_bal + 1;
+        } else {
+            left->balance = a_bal + b_bal + 2;
+        }
+        node->balance = a_bal + 1;
+    }
+
+    return left;
+}
+
+#ifdef Q_TREE_DEBUG
+static gint
+q_tree_node_height(QTreeNode *node)
+{
+    gint left_height;
+    gint right_height;
+
+    if (node) {
+        left_height = 0;
+        right_height = 0;
+
+        if (node->left_child) {
+            left_height = q_tree_node_height(node->left);
+        }
+
+        if (node->right_child) {
+            right_height = q_tree_node_height(node->right);
+        }
+
+        return MAX(left_height, right_height) + 1;
+    }
+
+    return 0;
+}
+
+static void q_tree_node_check(QTreeNode *node)
+{
+    gint left_height;
+    gint right_height;
+    gint balance;
+    QTreeNode *tmp;
+
+    if (node) {
+        if (node->left_child) {
+            tmp = q_tree_node_previous(node);
+            g_assert(tmp->right == node);
+        }
+
+        if (node->right_child) {
+            tmp = q_tree_node_next(node);
+            g_assert(tmp->left == node);
+        }
+
+        left_height = 0;
+        right_height = 0;
+
+        if (node->left_child) {
+            left_height = q_tree_node_height(node->left);
+        }
+        if (node->right_child) {
+            right_height = q_tree_node_height(node->right);
+        }
+
+        balance = right_height - left_height;
+        g_assert(balance == node->balance);
+
+        if (node->left_child) {
+            q_tree_node_check(node->left);
+        }
+        if (node->right_child) {
+            q_tree_node_check(node->right);
+        }
+    }
+}
+#endif
+
diff --git a/util/range.c b/util/range.c
new file mode 100644
index 000000000..098d9d2dc
--- /dev/null
+++ b/util/range.c
@@ -0,0 +1,72 @@
+/*
+ * QEMU 64-bit address ranges
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+
+/*
+ * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
+ * Both @a and @b must not be empty.
+ */
+static inline int range_compare(Range *a, Range *b)
+{
+    assert(!range_is_empty(a) && !range_is_empty(b));
+
+    /* Careful, avoid wraparound */
+    if (b->lob && b->lob - 1 > a->upb) {
+        return -1;
+    }
+    if (a->lob && a->lob - 1 > b->upb) {
+        return 1;
+    }
+    return 0;
+}
+
+/* Insert @data into @list of ranges; caller no longer owns @data */
+GList *range_list_insert(GList *list, Range *data)
+{
+    GList *l;
+
+    assert(!range_is_empty(data));
+
+    /* Skip all list elements strictly less than data */
+    for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
+    }
+
+    if (!l || range_compare(l->data, data) > 0) {
+        /* Rest of the list (if any) is strictly greater than @data */
+        return g_list_insert_before(list, l, data);
+    }
+
+    /* Current list element overlaps @data, merge the two */
+    range_extend(l->data, data);
+    g_free(data);
+
+    /* Merge any subsequent list elements that now also overlap */
+    while (l->next && range_compare(l->data, l->next->data) == 0) {
+        GList *new_l;
+
+        range_extend(l->data, l->next->data);
+        g_free(l->next->data);
+        new_l = g_list_delete_link(list, l->next);
+        assert(new_l == list);
+    }
+
+    return list;
+}
diff --git a/util/rcu.c b/util/rcu.c
new file mode 100644
index 000000000..13ac0f75c
--- /dev/null
+++ b/util/rcu.c
@@ -0,0 +1,436 @@
+/*
+ * urcu-mb.c
+ *
+ * Userspace RCU library with explicit memory barriers
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers 
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Ported to QEMU by Paolo Bonzini  
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/rcu.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "qemu/lockable.h"
+#if defined(CONFIG_MALLOC_TRIM)
+#include 
+#endif
+
+/*
+ * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
+ * Bits 1 and above are defined in synchronize_rcu.
+ */
+#define RCU_GP_LOCKED           (1UL << 0)
+#define RCU_GP_CTR              (1UL << 1)
+
+unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
+
+QemuEvent rcu_gp_event;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
+
+/*
+ * Check whether a quiescent state was crossed between the beginning of
+ * update_counter_and_wait and now.
+ */
+static inline int rcu_gp_ongoing(unsigned long *ctr)
+{
+    unsigned long v;
+
+    v = qatomic_read(ctr);
+    return v && (v != rcu_gp_ctr);
+}
+
+/* Written to only by each individual reader. Read by both the reader and the
+ * writers.
+ */
+__thread struct rcu_reader_data rcu_reader;
+
+/* Protected by rcu_registry_lock.  */
+typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
+static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
+
+/* Wait for previous parity/grace period to be empty of readers.  */
+static void wait_for_readers(void)
+{
+    ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
+    struct rcu_reader_data *index, *tmp;
+
+    for (;;) {
+        /* We want to be notified of changes made to rcu_gp_ongoing
+         * while we walk the list.
+         */
+        qemu_event_reset(&rcu_gp_event);
+
+        /* Instead of using qatomic_mb_set for index->waiting, and
+         * qatomic_mb_read for index->ctr, memory barriers are placed
+         * manually since writes to different threads are independent.
+         * qemu_event_reset has acquire semantics, so no memory barrier
+         * is needed here.
+         */
+        QLIST_FOREACH(index, ®istry, node) {
+            qatomic_set(&index->waiting, true);
+        }
+
+        /* Here, order the stores to index->waiting before the loads of
+         * index->ctr.  Pairs with smp_mb_placeholder() in rcu_read_unlock(),
+         * ensuring that the loads of index->ctr are sequentially consistent.
+         */
+        smp_mb_global();
+
+        QLIST_FOREACH_SAFE(index, ®istry, node, tmp) {
+            if (!rcu_gp_ongoing(&index->ctr)) {
+                QLIST_REMOVE(index, node);
+                QLIST_INSERT_HEAD(&qsreaders, index, node);
+
+                /* No need for mb_set here, worst of all we
+                 * get some extra futex wakeups.
+                 */
+                qatomic_set(&index->waiting, false);
+            }
+        }
+
+        if (QLIST_EMPTY(®istry)) {
+            break;
+        }
+
+        /* Wait for one thread to report a quiescent state and try again.
+         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+         * wait too much time.
+         *
+         * rcu_register_thread() may add nodes to ®istry; it will not
+         * wake up synchronize_rcu, but that is okay because at least another
+         * thread must exit its RCU read-side critical section before
+         * synchronize_rcu is done.  The next iteration of the loop will
+         * move the new thread's rcu_reader from ®istry to &qsreaders,
+         * because rcu_gp_ongoing() will return false.
+         *
+         * rcu_unregister_thread() may remove nodes from &qsreaders instead
+         * of ®istry if it runs during qemu_event_wait.  That's okay;
+         * the node then will not be added back to ®istry by QLIST_SWAP
+         * below.  The invariant is that the node is part of one list when
+         * rcu_registry_lock is released.
+         */
+        qemu_mutex_unlock(&rcu_registry_lock);
+        qemu_event_wait(&rcu_gp_event);
+        qemu_mutex_lock(&rcu_registry_lock);
+    }
+
+    /* put back the reader list in the registry */
+    QLIST_SWAP(®istry, &qsreaders, node);
+}
+
+void synchronize_rcu(void)
+{
+    QEMU_LOCK_GUARD(&rcu_sync_lock);
+
+    /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
+     * Pairs with smp_mb_placeholder() in rcu_read_lock().
+     */
+    smp_mb_global();
+
+    QEMU_LOCK_GUARD(&rcu_registry_lock);
+    if (!QLIST_EMPTY(®istry)) {
+        /* In either case, the qatomic_mb_set below blocks stores that free
+         * old RCU-protected pointers.
+         */
+        if (sizeof(rcu_gp_ctr) < 8) {
+            /* For architectures with 32-bit longs, a two-subphases algorithm
+             * ensures we do not encounter overflow bugs.
+             *
+             * Switch parity: 0 -> 1, 1 -> 0.
+             */
+            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+            wait_for_readers();
+            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+        } else {
+            /* Increment current grace period.  */
+            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+        }
+
+        wait_for_readers();
+    }
+}
+
+
+#define RCU_CALL_MIN_SIZE        30
+
+/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
+ * from liburcu.  Note that head is only used by the consumer.
+ */
+static struct rcu_head dummy;
+static struct rcu_head *head = &dummy, **tail = &dummy.next;
+static int rcu_call_count;
+static QemuEvent rcu_call_ready_event;
+
+static void enqueue(struct rcu_head *node)
+{
+    struct rcu_head **old_tail;
+
+    node->next = NULL;
+    old_tail = qatomic_xchg(&tail, &node->next);
+    qatomic_mb_set(old_tail, node);
+}
+
+static struct rcu_head *try_dequeue(void)
+{
+    struct rcu_head *node, *next;
+
+retry:
+    /* Test for an empty list, which we do not expect.  Note that for
+     * the consumer head and tail are always consistent.  The head
+     * is consistent because only the consumer reads/writes it.
+     * The tail, because it is the first step in the enqueuing.
+     * It is only the next pointers that might be inconsistent.
+     */
+    if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
+        abort();
+    }
+
+    /* If the head node has NULL in its next pointer, the value is
+     * wrong and we need to wait until its enqueuer finishes the update.
+     */
+    node = head;
+    next = qatomic_mb_read(&head->next);
+    if (!next) {
+        return NULL;
+    }
+
+    /* Since we are the sole consumer, and we excluded the empty case
+     * above, the queue will always have at least two nodes: the
+     * dummy node, and the one being removed.  So we do not need to update
+     * the tail pointer.
+     */
+    head = next;
+
+    /* If we dequeued the dummy node, add it back at the end and retry.  */
+    if (node == &dummy) {
+        enqueue(node);
+        goto retry;
+    }
+
+    return node;
+}
+
+static void *call_rcu_thread(void *opaque)
+{
+    struct rcu_head *node;
+
+    rcu_register_thread();
+
+    for (;;) {
+        int tries = 0;
+        int n = qatomic_read(&rcu_call_count);
+
+        /* Heuristically wait for a decent number of callbacks to pile up.
+         * Fetch rcu_call_count now, we only must process elements that were
+         * added before synchronize_rcu() starts.
+         */
+        while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
+            g_usleep(10000);
+            if (n == 0) {
+                qemu_event_reset(&rcu_call_ready_event);
+                n = qatomic_read(&rcu_call_count);
+                if (n == 0) {
+#if defined(CONFIG_MALLOC_TRIM)
+                    malloc_trim(4 * 1024 * 1024);
+#endif
+                    qemu_event_wait(&rcu_call_ready_event);
+                }
+            }
+            n = qatomic_read(&rcu_call_count);
+        }
+
+        qatomic_sub(&rcu_call_count, n);
+        synchronize_rcu();
+        qemu_mutex_lock_iothread();
+        while (n > 0) {
+            node = try_dequeue();
+            while (!node) {
+                qemu_mutex_unlock_iothread();
+                qemu_event_reset(&rcu_call_ready_event);
+                node = try_dequeue();
+                if (!node) {
+                    qemu_event_wait(&rcu_call_ready_event);
+                    node = try_dequeue();
+                }
+                qemu_mutex_lock_iothread();
+            }
+
+            n--;
+            node->func(node);
+        }
+        qemu_mutex_unlock_iothread();
+    }
+    abort();
+}
+
+void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
+{
+    node->func = func;
+    enqueue(node);
+    qatomic_inc(&rcu_call_count);
+    qemu_event_set(&rcu_call_ready_event);
+}
+
+
+struct rcu_drain {
+    struct rcu_head rcu;
+    QemuEvent drain_complete_event;
+};
+
+static void drain_rcu_callback(struct rcu_head *node)
+{
+    struct rcu_drain *event = (struct rcu_drain *)node;
+    qemu_event_set(&event->drain_complete_event);
+}
+
+/*
+ * This function ensures that all pending RCU callbacks
+ * on the current thread are done executing
+
+ * drops big qemu lock during the wait to allow RCU thread
+ * to process the callbacks
+ *
+ */
+
+void drain_call_rcu(void)
+{
+    struct rcu_drain rcu_drain;
+    bool locked = qemu_mutex_iothread_locked();
+
+    memset(&rcu_drain, 0, sizeof(struct rcu_drain));
+    qemu_event_init(&rcu_drain.drain_complete_event, false);
+
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
+
+
+    /*
+     * RCU callbacks are invoked in the same order as in which they
+     * are registered, thus we can be sure that when 'drain_rcu_callback'
+     * is called, all RCU callbacks that were registered on this thread
+     * prior to calling this function are completed.
+     *
+     * Note that since we have only one global queue of the RCU callbacks,
+     * we also end up waiting for most of RCU callbacks that were registered
+     * on the other threads, but this is a side effect that shoudn't be
+     * assumed.
+     */
+
+    call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
+    qemu_event_wait(&rcu_drain.drain_complete_event);
+
+    if (locked) {
+        qemu_mutex_lock_iothread();
+    }
+
+}
+
+void rcu_register_thread(void)
+{
+    assert(rcu_reader.ctr == 0);
+    qemu_mutex_lock(&rcu_registry_lock);
+    QLIST_INSERT_HEAD(®istry, &rcu_reader, node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_unregister_thread(void)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    QLIST_REMOVE(&rcu_reader, node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+static void rcu_init_complete(void)
+{
+    QemuThread thread;
+
+    qemu_mutex_init(&rcu_registry_lock);
+    qemu_mutex_init(&rcu_sync_lock);
+    qemu_event_init(&rcu_gp_event, true);
+
+    qemu_event_init(&rcu_call_ready_event, false);
+
+    /* The caller is assumed to have iothread lock, so the call_rcu thread
+     * must have been quiescent even after forking, just recreate it.
+     */
+    qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
+                       NULL, QEMU_THREAD_DETACHED);
+
+    rcu_register_thread();
+}
+
+static int atfork_depth = 1;
+
+void rcu_enable_atfork(void)
+{
+    atfork_depth++;
+}
+
+void rcu_disable_atfork(void)
+{
+    atfork_depth--;
+}
+
+#ifdef CONFIG_POSIX
+static void rcu_init_lock(void)
+{
+    if (atfork_depth < 1) {
+        return;
+    }
+
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
+}
+
+static void rcu_init_unlock(void)
+{
+    if (atfork_depth < 1) {
+        return;
+    }
+
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
+}
+
+static void rcu_init_child(void)
+{
+    if (atfork_depth < 1) {
+        return;
+    }
+
+    memset(®istry, 0, sizeof(registry));
+    rcu_init_complete();
+}
+#endif
+
+static void __attribute__((__constructor__)) rcu_init(void)
+{
+    smp_mb_global_init();
+#ifdef CONFIG_POSIX
+    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
+#endif
+    rcu_init_complete();
+}
diff --git a/util/readline.c b/util/readline.c
new file mode 100644
index 000000000..e534460da
--- /dev/null
+++ b/util/readline.c
@@ -0,0 +1,546 @@
+/*
+ * QEMU readline utility
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/readline.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
+
+#define IS_NORM 0
+#define IS_ESC  1
+#define IS_CSI  2
+#define IS_SS3  3
+
+void readline_show_prompt(ReadLineState *rs)
+{
+    rs->printf_func(rs->opaque, "%s", rs->prompt);
+    rs->flush_func(rs->opaque);
+    rs->last_cmd_buf_index = 0;
+    rs->last_cmd_buf_size = 0;
+    rs->esc_state = IS_NORM;
+}
+
+/* update the displayed command line */
+static void readline_update(ReadLineState *rs)
+{
+    int i, delta, len;
+
+    if (rs->cmd_buf_size != rs->last_cmd_buf_size ||
+        memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) {
+        for (i = 0; i < rs->last_cmd_buf_index; i++) {
+            rs->printf_func(rs->opaque, "\033[D");
+        }
+        rs->cmd_buf[rs->cmd_buf_size] = '\0';
+        if (rs->read_password) {
+            len = strlen(rs->cmd_buf);
+            for (i = 0; i < len; i++) {
+                rs->printf_func(rs->opaque, "*");
+            }
+        } else {
+            rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
+        }
+        rs->printf_func(rs->opaque, "\033[K");
+        memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size);
+        rs->last_cmd_buf_size = rs->cmd_buf_size;
+        rs->last_cmd_buf_index = rs->cmd_buf_size;
+    }
+    if (rs->cmd_buf_index != rs->last_cmd_buf_index) {
+        delta = rs->cmd_buf_index - rs->last_cmd_buf_index;
+        if (delta > 0) {
+            for (i = 0; i < delta; i++) {
+                rs->printf_func(rs->opaque, "\033[C");
+            }
+        } else {
+            delta = -delta;
+            for (i = 0; i < delta; i++) {
+                rs->printf_func(rs->opaque, "\033[D");
+            }
+        }
+        rs->last_cmd_buf_index = rs->cmd_buf_index;
+    }
+    rs->flush_func(rs->opaque);
+}
+
+static void readline_insert_char(ReadLineState *rs, int ch)
+{
+    if (rs->cmd_buf_index < READLINE_CMD_BUF_SIZE) {
+        memmove(rs->cmd_buf + rs->cmd_buf_index + 1,
+                rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf_size - rs->cmd_buf_index);
+        rs->cmd_buf[rs->cmd_buf_index] = ch;
+        rs->cmd_buf_size++;
+        rs->cmd_buf_index++;
+    }
+}
+
+static void readline_backward_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index > 0) {
+        rs->cmd_buf_index--;
+    }
+}
+
+static void readline_forward_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index < rs->cmd_buf_size) {
+        rs->cmd_buf_index++;
+    }
+}
+
+static void readline_delete_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index < rs->cmd_buf_size) {
+        memmove(rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf + rs->cmd_buf_index + 1,
+                rs->cmd_buf_size - rs->cmd_buf_index - 1);
+        rs->cmd_buf_size--;
+    }
+}
+
+static void readline_backspace(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index > 0) {
+        readline_backward_char(rs);
+        readline_delete_char(rs);
+    }
+}
+
+static void readline_backword(ReadLineState *rs)
+{
+    int start;
+
+    if (rs->cmd_buf_index == 0 || rs->cmd_buf_index > rs->cmd_buf_size) {
+        return;
+    }
+
+    start = rs->cmd_buf_index - 1;
+
+    /* find first word (backwards) */
+    while (start > 0) {
+        if (!qemu_isspace(rs->cmd_buf[start])) {
+            break;
+        }
+
+        --start;
+    }
+
+    /* find first space (backwards) */
+    while (start > 0) {
+        if (qemu_isspace(rs->cmd_buf[start])) {
+            ++start;
+            break;
+        }
+
+        --start;
+    }
+
+    /* remove word */
+    if (start < rs->cmd_buf_index) {
+        memmove(rs->cmd_buf + start,
+                rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf_size - rs->cmd_buf_index);
+        rs->cmd_buf_size -= rs->cmd_buf_index - start;
+        rs->cmd_buf_index = start;
+    }
+}
+
+static void readline_bol(ReadLineState *rs)
+{
+    rs->cmd_buf_index = 0;
+}
+
+static void readline_eol(ReadLineState *rs)
+{
+    rs->cmd_buf_index = rs->cmd_buf_size;
+}
+
+static void readline_up_char(ReadLineState *rs)
+{
+    int idx;
+
+    if (rs->hist_entry == 0) {
+        return;
+    }
+    if (rs->hist_entry == -1) {
+        /* Find latest entry */
+        for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+            if (rs->history[idx] == NULL) {
+                break;
+            }
+        }
+        rs->hist_entry = idx;
+    }
+    rs->hist_entry--;
+    if (rs->hist_entry >= 0) {
+        pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+                rs->history[rs->hist_entry]);
+        rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+    }
+}
+
+static void readline_down_char(ReadLineState *rs)
+{
+    if (rs->hist_entry == -1) {
+        return;
+    }
+    if (rs->hist_entry < READLINE_MAX_CMDS - 1 &&
+        rs->history[++rs->hist_entry] != NULL) {
+        pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+                rs->history[rs->hist_entry]);
+    } else {
+        rs->cmd_buf[0] = 0;
+        rs->hist_entry = -1;
+    }
+    rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+}
+
+static void readline_hist_add(ReadLineState *rs, const char *cmdline)
+{
+    char *hist_entry, *new_entry;
+    int idx;
+
+    if (cmdline[0] == '\0') {
+        return;
+    }
+    new_entry = NULL;
+    if (rs->hist_entry != -1) {
+        /* We were editing an existing history entry: replace it */
+        hist_entry = rs->history[rs->hist_entry];
+        idx = rs->hist_entry;
+        if (strcmp(hist_entry, cmdline) == 0) {
+            goto same_entry;
+        }
+    }
+    /* Search cmdline in history buffers */
+    for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+        hist_entry = rs->history[idx];
+        if (hist_entry == NULL) {
+            break;
+        }
+        if (strcmp(hist_entry, cmdline) == 0) {
+        same_entry:
+            new_entry = hist_entry;
+            /* Put this entry at the end of history */
+            memmove(&rs->history[idx], &rs->history[idx + 1],
+                    (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
+            rs->history[READLINE_MAX_CMDS - 1] = NULL;
+            for (; idx < READLINE_MAX_CMDS; idx++) {
+                if (rs->history[idx] == NULL) {
+                    break;
+                }
+            }
+            break;
+        }
+    }
+    if (idx == READLINE_MAX_CMDS) {
+        /* Need to get one free slot */
+        g_free(rs->history[0]);
+        memmove(rs->history, &rs->history[1],
+                (READLINE_MAX_CMDS - 1) * sizeof(char *));
+        rs->history[READLINE_MAX_CMDS - 1] = NULL;
+        idx = READLINE_MAX_CMDS - 1;
+    }
+    if (new_entry == NULL) {
+        new_entry = g_strdup(cmdline);
+    }
+    rs->history[idx] = new_entry;
+    rs->hist_entry = -1;
+}
+
+/* completion support */
+
+void readline_add_completion(ReadLineState *rs, const char *str)
+{
+    if (rs->nb_completions < READLINE_MAX_COMPLETIONS) {
+        int i;
+        for (i = 0; i < rs->nb_completions; i++) {
+            if (!strcmp(rs->completions[i], str)) {
+                return;
+            }
+        }
+        rs->completions[rs->nb_completions++] = g_strdup(str);
+    }
+}
+
+void readline_set_completion_index(ReadLineState *rs, int index)
+{
+    rs->completion_index = index;
+}
+
+static int completion_comp(const void *a, const void *b)
+{
+    return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void readline_completion(ReadLineState *rs)
+{
+    int len, i, j, max_width, nb_cols, max_prefix;
+    char *cmdline;
+
+    rs->nb_completions = 0;
+
+    cmdline = g_strndup(rs->cmd_buf, rs->cmd_buf_index);
+    rs->completion_finder(rs->opaque, cmdline);
+    g_free(cmdline);
+
+    /* no completion found */
+    if (rs->nb_completions <= 0) {
+        return;
+    }
+    if (rs->nb_completions == 1) {
+        len = strlen(rs->completions[0]);
+        for (i = rs->completion_index; i < len; i++) {
+            readline_insert_char(rs, rs->completions[0][i]);
+        }
+        /* extra space for next argument. XXX: make it more generic */
+        if (len > 0 && rs->completions[0][len - 1] != '/') {
+            readline_insert_char(rs, ' ');
+        }
+    } else {
+        qsort(rs->completions, rs->nb_completions, sizeof(char *),
+              completion_comp);
+        rs->printf_func(rs->opaque, "\n");
+        max_width = 0;
+        max_prefix = 0;
+        for (i = 0; i < rs->nb_completions; i++) {
+            len = strlen(rs->completions[i]);
+            if (i == 0) {
+                max_prefix = len;
+            } else {
+                if (len < max_prefix) {
+                    max_prefix = len;
+                }
+                for (j = 0; j < max_prefix; j++) {
+                    if (rs->completions[i][j] != rs->completions[0][j]) {
+                        max_prefix = j;
+                    }
+                }
+            }
+            if (len > max_width) {
+                max_width = len;
+            }
+        }
+        if (max_prefix > 0)
+            for (i = rs->completion_index; i < max_prefix; i++) {
+                readline_insert_char(rs, rs->completions[0][i]);
+            }
+        max_width += 2;
+        if (max_width < 10) {
+            max_width = 10;
+        } else if (max_width > 80) {
+            max_width = 80;
+        }
+        nb_cols = 80 / max_width;
+        j = 0;
+        for (i = 0; i < rs->nb_completions; i++) {
+            rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]);
+            if (++j == nb_cols || i == (rs->nb_completions - 1)) {
+                rs->printf_func(rs->opaque, "\n");
+                j = 0;
+            }
+        }
+        readline_show_prompt(rs);
+    }
+    for (i = 0; i < rs->nb_completions; i++) {
+        g_free(rs->completions[i]);
+    }
+}
+
+static void readline_clear_screen(ReadLineState *rs)
+{
+    rs->printf_func(rs->opaque, "\033[2J\033[1;1H");
+    readline_show_prompt(rs);
+}
+
+/* return true if command handled */
+void readline_handle_byte(ReadLineState *rs, int ch)
+{
+    switch (rs->esc_state) {
+    case IS_NORM:
+        switch (ch) {
+        case 1:
+            readline_bol(rs);
+            break;
+        case 4:
+            readline_delete_char(rs);
+            break;
+        case 5:
+            readline_eol(rs);
+            break;
+        case 9:
+            readline_completion(rs);
+            break;
+        case 12:
+            readline_clear_screen(rs);
+            break;
+        case 10:
+        case 13:
+            rs->cmd_buf[rs->cmd_buf_size] = '\0';
+            if (!rs->read_password) {
+                readline_hist_add(rs, rs->cmd_buf);
+            }
+            rs->printf_func(rs->opaque, "\n");
+            rs->cmd_buf_index = 0;
+            rs->cmd_buf_size = 0;
+            rs->last_cmd_buf_index = 0;
+            rs->last_cmd_buf_size = 0;
+            rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque);
+            break;
+        case 23:
+            /* ^W */
+            readline_backword(rs);
+            break;
+        case 27:
+            rs->esc_state = IS_ESC;
+            break;
+        case 127:
+        case 8:
+            readline_backspace(rs);
+            break;
+        case 155:
+            rs->esc_state = IS_CSI;
+            break;
+        default:
+            if (ch >= 32) {
+                readline_insert_char(rs, ch);
+            }
+            break;
+        }
+        break;
+    case IS_ESC:
+        if (ch == '[') {
+            rs->esc_state = IS_CSI;
+            rs->esc_param = 0;
+        } else if (ch == 'O') {
+            rs->esc_state = IS_SS3;
+            rs->esc_param = 0;
+        } else {
+            rs->esc_state = IS_NORM;
+        }
+        break;
+    case IS_CSI:
+        switch (ch) {
+        case 'A':
+        case 'F':
+            readline_up_char(rs);
+            break;
+        case 'B':
+        case 'E':
+            readline_down_char(rs);
+            break;
+        case 'D':
+            readline_backward_char(rs);
+            break;
+        case 'C':
+            readline_forward_char(rs);
+            break;
+        case '0' ... '9':
+            rs->esc_param = rs->esc_param * 10 + (ch - '0');
+            goto the_end;
+        case '~':
+            switch (rs->esc_param) {
+            case 1:
+                readline_bol(rs);
+                break;
+            case 3:
+                readline_delete_char(rs);
+                break;
+            case 4:
+                readline_eol(rs);
+                break;
+            }
+            break;
+        default:
+            break;
+        }
+        rs->esc_state = IS_NORM;
+    the_end:
+        break;
+    case IS_SS3:
+        switch (ch) {
+        case 'F':
+            readline_eol(rs);
+            break;
+        case 'H':
+            readline_bol(rs);
+            break;
+        }
+        rs->esc_state = IS_NORM;
+        break;
+    }
+    readline_update(rs);
+}
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+                    ReadLineFunc *readline_func, void *opaque)
+{
+    pstrcpy(rs->prompt, sizeof(rs->prompt), prompt);
+    rs->readline_func = readline_func;
+    rs->readline_opaque = opaque;
+    rs->read_password = read_password;
+    readline_restart(rs);
+}
+
+void readline_restart(ReadLineState *rs)
+{
+    rs->cmd_buf_index = 0;
+    rs->cmd_buf_size = 0;
+}
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index)
+{
+    if (index >= READLINE_MAX_CMDS) {
+        return NULL;
+    }
+    return rs->history[index];
+}
+
+void readline_free(ReadLineState *rs)
+{
+    int i;
+
+    if (!rs) {
+        return;
+    }
+    for (i = 0; i < READLINE_MAX_CMDS; i++) {
+        g_free(rs->history[i]);
+    }
+    g_free(rs);
+}
+
+ReadLineState *readline_init(ReadLinePrintfFunc *printf_func,
+                             ReadLineFlushFunc *flush_func,
+                             void *opaque,
+                             ReadLineCompletionFunc *completion_finder)
+{
+    ReadLineState *rs = g_new0(ReadLineState, 1);
+
+    rs->hist_entry = -1;
+    rs->opaque = opaque;
+    rs->printf_func = printf_func;
+    rs->flush_func = flush_func;
+    rs->completion_finder = completion_finder;
+
+    return rs;
+}
diff --git a/util/selfmap.c b/util/selfmap.c
new file mode 100644
index 000000000..2ec99dfdd
--- /dev/null
+++ b/util/selfmap.c
@@ -0,0 +1,78 @@
+/*
+ * Utility function to get QEMU's own process map
+ *
+ * Copyright (c) 2020 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/selfmap.h"
+
+GSList *read_self_maps(void)
+{
+    gchar *maps;
+    GSList *map_info = NULL;
+
+    if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
+        gchar **lines = g_strsplit(maps, "\n", 0);
+        int i, entries = g_strv_length(lines);
+
+        for (i = 0; i < entries; i++) {
+            gchar **fields = g_strsplit(lines[i], " ", 6);
+            if (g_strv_length(fields) > 4) {
+                MapInfo *e = g_new0(MapInfo, 1);
+                int errors;
+                const char *end;
+
+                errors  = qemu_strtoul(fields[0], &end, 16, &e->start);
+                errors += qemu_strtoul(end + 1, NULL, 16, &e->end);
+
+                e->is_read  = fields[1][0] == 'r';
+                e->is_write = fields[1][1] == 'w';
+                e->is_exec  = fields[1][2] == 'x';
+                e->is_priv  = fields[1][3] == 'p';
+
+                errors += qemu_strtoul(fields[2], NULL, 16, &e->offset);
+                e->dev = g_strdup(fields[3]);
+                errors += qemu_strtou64(fields[4], NULL, 10, &e->inode);
+
+                /*
+                 * The last field may have leading spaces which we
+                 * need to strip.
+                 */
+                if (g_strv_length(fields) == 6) {
+                    e->path = g_strdup(g_strchug(fields[5]));
+                }
+                map_info = g_slist_prepend(map_info, e);
+            }
+
+            g_strfreev(fields);
+        }
+        g_strfreev(lines);
+        g_free(maps);
+    }
+
+    /* ensure the map data is in the same order we collected it */
+    return g_slist_reverse(map_info);
+}
+
+/**
+ * free_self_maps:
+ * @info: a GSlist
+ *
+ * Free a list of MapInfo structures.
+ */
+static void free_info(gpointer data)
+{
+    MapInfo *e = (MapInfo *) data;
+    g_free(e->dev);
+    g_free(e->path);
+    g_free(e);
+}
+
+void free_self_maps(GSList *info)
+{
+    g_slist_free_full(info, &free_info);
+}
diff --git a/util/stats64.c b/util/stats64.c
new file mode 100644
index 000000000..897613c94
--- /dev/null
+++ b/util/stats64.c
@@ -0,0 +1,137 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/stats64.h"
+#include "qemu/processor.h"
+
+#ifndef CONFIG_ATOMIC64
+static inline void stat64_rdlock(Stat64 *s)
+{
+    /* Keep out incoming writers to avoid them starving us. */
+    qatomic_add(&s->lock, 2);
+
+    /* If there is a concurrent writer, wait for it.  */
+    while (qatomic_read(&s->lock) & 1) {
+        cpu_relax();
+    }
+}
+
+static inline void stat64_rdunlock(Stat64 *s)
+{
+    qatomic_sub(&s->lock, 2);
+}
+
+static inline bool stat64_wrtrylock(Stat64 *s)
+{
+    return qatomic_cmpxchg(&s->lock, 0, 1) == 0;
+}
+
+static inline void stat64_wrunlock(Stat64 *s)
+{
+    qatomic_dec(&s->lock);
+}
+
+uint64_t stat64_get(const Stat64 *s)
+{
+    uint32_t high, low;
+
+    stat64_rdlock((Stat64 *)s);
+
+    /* 64-bit writes always take the lock, so we can read in
+     * any order.
+     */
+    high = qatomic_read(&s->high);
+    low = qatomic_read(&s->low);
+    stat64_rdunlock((Stat64 *)s);
+
+    return ((uint64_t)high << 32) | low;
+}
+
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high)
+{
+    uint32_t old;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    /* 64-bit reads always take the lock, so they don't care about the
+     * order of our update.  By updating s->low first, we can check
+     * whether we have to carry into s->high.
+     */
+    old = qatomic_fetch_add(&s->low, low);
+    high += (old + low) < old;
+    qatomic_add(&s->high, high);
+    stat64_wrunlock(s);
+    return true;
+}
+
+bool stat64_min_slow(Stat64 *s, uint64_t value)
+{
+    uint32_t high, low;
+    uint64_t orig;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    high = qatomic_read(&s->high);
+    low = qatomic_read(&s->low);
+
+    orig = ((uint64_t)high << 32) | low;
+    if (value < orig) {
+        /* We have to set low before high, just like stat64_min reads
+         * high before low.  The value may become higher temporarily, but
+         * stat64_get does not notice (it takes the lock) and the only ill
+         * effect on stat64_min is that the slow path may be triggered
+         * unnecessarily.
+         */
+        qatomic_set(&s->low, (uint32_t)value);
+        smp_wmb();
+        qatomic_set(&s->high, value >> 32);
+    }
+    stat64_wrunlock(s);
+    return true;
+}
+
+bool stat64_max_slow(Stat64 *s, uint64_t value)
+{
+    uint32_t high, low;
+    uint64_t orig;
+
+    if (!stat64_wrtrylock(s)) {
+        cpu_relax();
+        return false;
+    }
+
+    high = qatomic_read(&s->high);
+    low = qatomic_read(&s->low);
+
+    orig = ((uint64_t)high << 32) | low;
+    if (value > orig) {
+        /* We have to set low before high, just like stat64_max reads
+         * high before low.  The value may become lower temporarily, but
+         * stat64_get does not notice (it takes the lock) and the only ill
+         * effect on stat64_max is that the slow path may be triggered
+         * unnecessarily.
+         */
+        qatomic_set(&s->low, (uint32_t)value);
+        smp_wmb();
+        qatomic_set(&s->high, value >> 32);
+    }
+    stat64_wrunlock(s);
+    return true;
+}
+#endif
diff --git a/util/sys_membarrier.c b/util/sys_membarrier.c
new file mode 100644
index 000000000..1362c0c4c
--- /dev/null
+++ b/util/sys_membarrier.c
@@ -0,0 +1,50 @@
+/*
+ * Process-global memory barriers
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/sys_membarrier.h"
+#include "qemu/error-report.h"
+
+#ifdef CONFIG_LINUX
+#include 
+#include 
+
+static int
+membarrier(int cmd, int flags)
+{
+    return syscall(__NR_membarrier, cmd, flags);
+}
+#endif
+
+void smp_mb_global(void)
+{
+#if defined CONFIG_WIN32
+    FlushProcessWriteBuffers();
+#elif defined CONFIG_LINUX
+    membarrier(MEMBARRIER_CMD_SHARED, 0);
+#else
+#error --enable-membarrier is not supported on this operating system.
+#endif
+}
+
+void smp_mb_global_init(void)
+{
+#ifdef CONFIG_LINUX
+    int ret = membarrier(MEMBARRIER_CMD_QUERY, 0);
+    if (ret < 0) {
+        error_report("This QEMU binary requires the membarrier system call.");
+        error_report("Please upgrade your system to a newer version of Linux");
+        exit(1);
+    }
+    if (!(ret & MEMBARRIER_CMD_SHARED)) {
+        error_report("This QEMU binary requires MEMBARRIER_CMD_SHARED support.");
+        error_report("Please upgrade your system to a newer version of Linux");
+        exit(1);
+    }
+#endif
+}
diff --git a/util/systemd.c b/util/systemd.c
new file mode 100644
index 000000000..5bcac9b40
--- /dev/null
+++ b/util/systemd.c
@@ -0,0 +1,79 @@
+/*
+ * systemd socket activation support
+ *
+ * Copyright 2017 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Richard W.M. Jones 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/systemd.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+
+#ifndef _WIN32
+unsigned int check_socket_activation(void)
+{
+    const char *s;
+    unsigned long pid;
+    unsigned long nr_fds;
+    unsigned int i;
+    int fd;
+    int f;
+    int err;
+
+    s = getenv("LISTEN_PID");
+    if (s == NULL) {
+        return 0;
+    }
+    err = qemu_strtoul(s, NULL, 10, &pid);
+    if (err) {
+        return 0;
+    }
+    if (pid != getpid()) {
+        return 0;
+    }
+
+    s = getenv("LISTEN_FDS");
+    if (s == NULL) {
+        return 0;
+    }
+    err = qemu_strtoul(s, NULL, 10, &nr_fds);
+    if (err) {
+        return 0;
+    }
+    assert(nr_fds <= UINT_MAX);
+
+    /* So these are not passed to any child processes we might start. */
+    unsetenv("LISTEN_FDS");
+    unsetenv("LISTEN_PID");
+
+    /* So the file descriptors don't leak into child processes. */
+    for (i = 0; i < nr_fds; ++i) {
+        fd = FIRST_SOCKET_ACTIVATION_FD + i;
+        f = fcntl(fd, F_GETFD);
+        if (f == -1 || fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) {
+            /* If we cannot set FD_CLOEXEC then it probably means the file
+             * descriptor is invalid, so socket activation has gone wrong
+             * and we should exit.
+             */
+            error_report("Socket activation failed: "
+                         "invalid file descriptor fd = %d: %s",
+                         fd, g_strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    return (unsigned int) nr_fds;
+}
+
+#else /* !_WIN32 */
+unsigned int check_socket_activation(void)
+{
+    return 0;
+}
+#endif
diff --git a/util/thread-pool.c b/util/thread-pool.c
new file mode 100644
index 000000000..785487b87
--- /dev/null
+++ b/util/thread-pool.c
@@ -0,0 +1,357 @@
+/*
+ * QEMU block layer thread pool
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   
+ *  Paolo Bonzini     
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "qemu/coroutine.h"
+#include "trace.h"
+#include "block/thread-pool.h"
+#include "qemu/main-loop.h"
+
+static void do_spawn_thread(ThreadPool *pool);
+
+typedef struct ThreadPoolElement ThreadPoolElement;
+
+enum ThreadState {
+    THREAD_QUEUED,
+    THREAD_ACTIVE,
+    THREAD_DONE,
+};
+
+struct ThreadPoolElement {
+    BlockAIOCB common;
+    ThreadPool *pool;
+    ThreadPoolFunc *func;
+    void *arg;
+
+    /* Moving state out of THREAD_QUEUED is protected by lock.  After
+     * that, only the worker thread can write to it.  Reads and writes
+     * of state and ret are ordered with memory barriers.
+     */
+    enum ThreadState state;
+    int ret;
+
+    /* Access to this list is protected by lock.  */
+    QTAILQ_ENTRY(ThreadPoolElement) reqs;
+
+    /* Access to this list is protected by the global mutex.  */
+    QLIST_ENTRY(ThreadPoolElement) all;
+};
+
+struct ThreadPool {
+    AioContext *ctx;
+    QEMUBH *completion_bh;
+    QemuMutex lock;
+    QemuCond worker_stopped;
+    QemuSemaphore sem;
+    int max_threads;
+    QEMUBH *new_thread_bh;
+
+    /* The following variables are only accessed from one AioContext. */
+    QLIST_HEAD(, ThreadPoolElement) head;
+
+    /* The following variables are protected by lock.  */
+    QTAILQ_HEAD(, ThreadPoolElement) request_list;
+    int cur_threads;
+    int idle_threads;
+    int new_threads;     /* backlog of threads we need to create */
+    int pending_threads; /* threads created but not running yet */
+    bool stopping;
+};
+
+static void *worker_thread(void *opaque)
+{
+    ThreadPool *pool = opaque;
+
+    qemu_mutex_lock(&pool->lock);
+    pool->pending_threads--;
+    do_spawn_thread(pool);
+
+    while (!pool->stopping) {
+        ThreadPoolElement *req;
+        int ret;
+
+        do {
+            pool->idle_threads++;
+            qemu_mutex_unlock(&pool->lock);
+            ret = qemu_sem_timedwait(&pool->sem, 10000);
+            qemu_mutex_lock(&pool->lock);
+            pool->idle_threads--;
+        } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
+        if (ret == -1 || pool->stopping) {
+            break;
+        }
+
+        req = QTAILQ_FIRST(&pool->request_list);
+        QTAILQ_REMOVE(&pool->request_list, req, reqs);
+        req->state = THREAD_ACTIVE;
+        qemu_mutex_unlock(&pool->lock);
+
+        ret = req->func(req->arg);
+
+        req->ret = ret;
+        /* Write ret before state.  */
+        smp_wmb();
+        req->state = THREAD_DONE;
+
+        qemu_mutex_lock(&pool->lock);
+
+        qemu_bh_schedule(pool->completion_bh);
+    }
+
+    pool->cur_threads--;
+    qemu_cond_signal(&pool->worker_stopped);
+    qemu_mutex_unlock(&pool->lock);
+    return NULL;
+}
+
+static void do_spawn_thread(ThreadPool *pool)
+{
+    QemuThread t;
+
+    /* Runs with lock taken.  */
+    if (!pool->new_threads) {
+        return;
+    }
+
+    pool->new_threads--;
+    pool->pending_threads++;
+
+    qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
+}
+
+static void spawn_thread_bh_fn(void *opaque)
+{
+    ThreadPool *pool = opaque;
+
+    qemu_mutex_lock(&pool->lock);
+    do_spawn_thread(pool);
+    qemu_mutex_unlock(&pool->lock);
+}
+
+static void spawn_thread(ThreadPool *pool)
+{
+    pool->cur_threads++;
+    pool->new_threads++;
+    /* If there are threads being created, they will spawn new workers, so
+     * we don't spend time creating many threads in a loop holding a mutex or
+     * starving the current vcpu.
+     *
+     * If there are no idle threads, ask the main thread to create one, so we
+     * inherit the correct affinity instead of the vcpu affinity.
+     */
+    if (!pool->pending_threads) {
+        qemu_bh_schedule(pool->new_thread_bh);
+    }
+}
+
+static void thread_pool_completion_bh(void *opaque)
+{
+    ThreadPool *pool = opaque;
+    ThreadPoolElement *elem, *next;
+
+    aio_context_acquire(pool->ctx);
+restart:
+    QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
+        if (elem->state != THREAD_DONE) {
+            continue;
+        }
+
+        trace_thread_pool_complete(pool, elem, elem->common.opaque,
+                                   elem->ret);
+        QLIST_REMOVE(elem, all);
+
+        if (elem->common.cb) {
+            /* Read state before ret.  */
+            smp_rmb();
+
+            /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
+             * wait for another request that completed at the same time.
+             */
+            qemu_bh_schedule(pool->completion_bh);
+
+            aio_context_release(pool->ctx);
+            elem->common.cb(elem->common.opaque, elem->ret);
+            aio_context_acquire(pool->ctx);
+
+            /* We can safely cancel the completion_bh here regardless of someone
+             * else having scheduled it meanwhile because we reenter the
+             * completion function anyway (goto restart).
+             */
+            qemu_bh_cancel(pool->completion_bh);
+
+            qemu_aio_unref(elem);
+            goto restart;
+        } else {
+            qemu_aio_unref(elem);
+        }
+    }
+    aio_context_release(pool->ctx);
+}
+
+static void thread_pool_cancel(BlockAIOCB *acb)
+{
+    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+    ThreadPool *pool = elem->pool;
+
+    trace_thread_pool_cancel(elem, elem->common.opaque);
+
+    QEMU_LOCK_GUARD(&pool->lock);
+    if (elem->state == THREAD_QUEUED &&
+        /* No thread has yet started working on elem. we can try to "steal"
+         * the item from the worker if we can get a signal from the
+         * semaphore.  Because this is non-blocking, we can do it with
+         * the lock taken and ensure that elem will remain THREAD_QUEUED.
+         */
+        qemu_sem_timedwait(&pool->sem, 0) == 0) {
+        QTAILQ_REMOVE(&pool->request_list, elem, reqs);
+        qemu_bh_schedule(pool->completion_bh);
+
+        elem->state = THREAD_DONE;
+        elem->ret = -ECANCELED;
+    }
+
+}
+
+static AioContext *thread_pool_get_aio_context(BlockAIOCB *acb)
+{
+    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+    ThreadPool *pool = elem->pool;
+    return pool->ctx;
+}
+
+static const AIOCBInfo thread_pool_aiocb_info = {
+    .aiocb_size         = sizeof(ThreadPoolElement),
+    .cancel_async       = thread_pool_cancel,
+    .get_aio_context    = thread_pool_get_aio_context,
+};
+
+BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
+        ThreadPoolFunc *func, void *arg,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    ThreadPoolElement *req;
+
+    req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
+    req->func = func;
+    req->arg = arg;
+    req->state = THREAD_QUEUED;
+    req->pool = pool;
+
+    QLIST_INSERT_HEAD(&pool->head, req, all);
+
+    trace_thread_pool_submit(pool, req, arg);
+
+    qemu_mutex_lock(&pool->lock);
+    if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
+        spawn_thread(pool);
+    }
+    QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
+    qemu_mutex_unlock(&pool->lock);
+    qemu_sem_post(&pool->sem);
+    return &req->common;
+}
+
+typedef struct ThreadPoolCo {
+    Coroutine *co;
+    int ret;
+} ThreadPoolCo;
+
+static void thread_pool_co_cb(void *opaque, int ret)
+{
+    ThreadPoolCo *co = opaque;
+
+    co->ret = ret;
+    aio_co_wake(co->co);
+}
+
+int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
+                                       void *arg)
+{
+    ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
+    assert(qemu_in_coroutine());
+    thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc);
+    qemu_coroutine_yield();
+    return tpc.ret;
+}
+
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
+{
+    thread_pool_submit_aio(pool, func, arg, NULL, NULL);
+}
+
+static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
+{
+    if (!ctx) {
+        ctx = qemu_get_aio_context();
+    }
+
+    memset(pool, 0, sizeof(*pool));
+    pool->ctx = ctx;
+    pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
+    qemu_mutex_init(&pool->lock);
+    qemu_cond_init(&pool->worker_stopped);
+    qemu_sem_init(&pool->sem, 0);
+    if (sizeof(pool) == 4) {
+        /* 32bit systems run out of virtual memory quickly */
+        pool->max_threads = 4;
+    } else {
+        pool->max_threads = 64;
+    }
+    pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
+
+    QLIST_INIT(&pool->head);
+    QTAILQ_INIT(&pool->request_list);
+}
+
+ThreadPool *thread_pool_new(AioContext *ctx)
+{
+    ThreadPool *pool = g_new(ThreadPool, 1);
+    thread_pool_init_one(pool, ctx);
+    return pool;
+}
+
+void thread_pool_free(ThreadPool *pool)
+{
+    if (!pool) {
+        return;
+    }
+
+    assert(QLIST_EMPTY(&pool->head));
+
+    qemu_mutex_lock(&pool->lock);
+
+    /* Stop new threads from spawning */
+    qemu_bh_delete(pool->new_thread_bh);
+    pool->cur_threads -= pool->new_threads;
+    pool->new_threads = 0;
+
+    /* Wait for worker threads to terminate */
+    pool->stopping = true;
+    while (pool->cur_threads > 0) {
+        qemu_sem_post(&pool->sem);
+        qemu_cond_wait(&pool->worker_stopped, &pool->lock);
+    }
+
+    qemu_mutex_unlock(&pool->lock);
+
+    qemu_bh_delete(pool->completion_bh);
+    qemu_sem_destroy(&pool->sem);
+    qemu_cond_destroy(&pool->worker_stopped);
+    qemu_mutex_destroy(&pool->lock);
+    g_free(pool);
+}
diff --git a/util/throttle.c b/util/throttle.c
new file mode 100644
index 000000000..b38e742da
--- /dev/null
+++ b/util/throttle.c
@@ -0,0 +1,638 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, EURL. 2013-2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ *   Benoît Canet 
+ *   Alberto Garcia 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/throttle.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+/* This function make a bucket leak
+ *
+ * @bkt:   the bucket to make leak
+ * @delta_ns: the time delta
+ */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
+{
+    double leak;
+
+    /* compute how much to leak */
+    leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+
+    /* make the bucket leak */
+    bkt->level = MAX(bkt->level - leak, 0);
+
+    /* if we allow bursts for more than one second we also need to
+     * keep track of bkt->burst_level so the bkt->max goal per second
+     * is attained */
+    if (bkt->burst_length > 1) {
+        leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+        bkt->burst_level = MAX(bkt->burst_level - leak, 0);
+    }
+}
+
+/* Calculate the time delta since last leak and make proportionals leaks
+ *
+ * @now:      the current timestamp in ns
+ */
+static void throttle_do_leak(ThrottleState *ts, int64_t now)
+{
+    /* compute the time elapsed since the last leak */
+    int64_t delta_ns = now - ts->previous_leak;
+    int i;
+
+    ts->previous_leak = now;
+
+    if (delta_ns <= 0) {
+        return;
+    }
+
+    /* make each bucket leak */
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
+    }
+}
+
+/* do the real job of computing the time to wait
+ *
+ * @limit: the throttling limit
+ * @extra: the number of operation to delay
+ * @ret:   the time to wait in ns
+ */
+static int64_t throttle_do_compute_wait(double limit, double extra)
+{
+    double wait = extra * NANOSECONDS_PER_SECOND;
+    wait /= limit;
+    return wait;
+}
+
+/* This function compute the wait time in ns that a leaky bucket should trigger
+ *
+ * @bkt: the leaky bucket we operate on
+ * @ret: the resulting wait time in ns or 0 if the operation can go through
+ */
+int64_t throttle_compute_wait(LeakyBucket *bkt)
+{
+    double extra; /* the number of extra units blocking the io */
+    double bucket_size;   /* I/O before throttling to bkt->avg */
+    double burst_bucket_size; /* Before throttling to bkt->max */
+
+    if (!bkt->avg) {
+        return 0;
+    }
+
+    if (!bkt->max) {
+        /* If bkt->max is 0 we still want to allow short bursts of I/O
+         * from the guest, otherwise every other request will be throttled
+         * and performance will suffer considerably. */
+        bucket_size = (double) bkt->avg / 10;
+        burst_bucket_size = 0;
+    } else {
+        /* If we have a burst limit then we have to wait until all I/O
+         * at burst rate has finished before throttling to bkt->avg */
+        bucket_size = bkt->max * bkt->burst_length;
+        burst_bucket_size = (double) bkt->max / 10;
+    }
+
+    /* If the main bucket is full then we have to wait */
+    extra = bkt->level - bucket_size;
+    if (extra > 0) {
+        return throttle_do_compute_wait(bkt->avg, extra);
+    }
+
+    /* If the main bucket is not full yet we still have to check the
+     * burst bucket in order to enforce the burst limit */
+    if (bkt->burst_length > 1) {
+        assert(bkt->max > 0); /* see throttle_is_valid() */
+        extra = bkt->burst_level - burst_bucket_size;
+        if (extra > 0) {
+            return throttle_do_compute_wait(bkt->max, extra);
+        }
+    }
+
+    return 0;
+}
+
+/* This function compute the time that must be waited while this IO
+ *
+ * @is_write:   true if the current IO is a write, false if it's a read
+ * @ret:        time to wait
+ */
+static int64_t throttle_compute_wait_for(ThrottleState *ts,
+                                         bool is_write)
+{
+    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_READ,
+                                   THROTTLE_OPS_READ},
+                                  {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_WRITE,
+                                   THROTTLE_OPS_WRITE}, };
+    int64_t wait, max_wait = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        BucketType index = to_check[is_write][i];
+        wait = throttle_compute_wait(&ts->cfg.buckets[index]);
+        if (wait > max_wait) {
+            max_wait = wait;
+        }
+    }
+
+    return max_wait;
+}
+
+/* compute the timer for this type of operation
+ *
+ * @is_write:   the type of operation
+ * @now:        the current clock timestamp
+ * @next_timestamp: the resulting timer
+ * @ret:        true if a timer must be set
+ */
+static bool throttle_compute_timer(ThrottleState *ts,
+                                   bool is_write,
+                                   int64_t now,
+                                   int64_t *next_timestamp)
+{
+    int64_t wait;
+
+    /* leak proportionally to the time elapsed */
+    throttle_do_leak(ts, now);
+
+    /* compute the wait time if any */
+    wait = throttle_compute_wait_for(ts, is_write);
+
+    /* if the code must wait compute when the next timer should fire */
+    if (wait) {
+        *next_timestamp = now + wait;
+        return true;
+    }
+
+    /* else no need to wait at all */
+    *next_timestamp = now;
+    return false;
+}
+
+/* Add timers to event loop */
+void throttle_timers_attach_aio_context(ThrottleTimers *tt,
+                                        AioContext *new_context)
+{
+    tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                                  tt->read_timer_cb, tt->timer_opaque);
+    tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                                  tt->write_timer_cb, tt->timer_opaque);
+}
+
+/*
+ * Initialize the ThrottleConfig structure to a valid state
+ * @cfg: the config to initialize
+ */
+void throttle_config_init(ThrottleConfig *cfg)
+{
+    unsigned i;
+    memset(cfg, 0, sizeof(*cfg));
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        cfg->buckets[i].burst_length = 1;
+    }
+}
+
+/* To be called first on the ThrottleState */
+void throttle_init(ThrottleState *ts)
+{
+    memset(ts, 0, sizeof(ThrottleState));
+    throttle_config_init(&ts->cfg);
+}
+
+/* To be called first on the ThrottleTimers */
+void throttle_timers_init(ThrottleTimers *tt,
+                          AioContext *aio_context,
+                          QEMUClockType clock_type,
+                          QEMUTimerCB *read_timer_cb,
+                          QEMUTimerCB *write_timer_cb,
+                          void *timer_opaque)
+{
+    memset(tt, 0, sizeof(ThrottleTimers));
+
+    tt->clock_type = clock_type;
+    tt->read_timer_cb = read_timer_cb;
+    tt->write_timer_cb = write_timer_cb;
+    tt->timer_opaque = timer_opaque;
+    throttle_timers_attach_aio_context(tt, aio_context);
+}
+
+/* destroy a timer */
+static void throttle_timer_destroy(QEMUTimer **timer)
+{
+    assert(*timer != NULL);
+
+    timer_del(*timer);
+    timer_free(*timer);
+    *timer = NULL;
+}
+
+/* Remove timers from event loop */
+void throttle_timers_detach_aio_context(ThrottleTimers *tt)
+{
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        throttle_timer_destroy(&tt->timers[i]);
+    }
+}
+
+/* To be called last on the ThrottleTimers */
+void throttle_timers_destroy(ThrottleTimers *tt)
+{
+    throttle_timers_detach_aio_context(tt);
+}
+
+/* is any throttling timer configured */
+bool throttle_timers_are_initialized(ThrottleTimers *tt)
+{
+    if (tt->timers[0]) {
+        return true;
+    }
+
+    return false;
+}
+
+/* Does any throttling must be done
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if throttling must be done else false
+ */
+bool throttle_enabled(ThrottleConfig *cfg)
+{
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].avg > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* check if a throttling configuration is valid
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if valid else false
+ * @errp: error object
+ */
+bool throttle_is_valid(ThrottleConfig *cfg, Error **errp)
+{
+    int i;
+    bool bps_flag, ops_flag;
+    bool bps_max_flag, ops_max_flag;
+
+    bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_BPS_READ].avg ||
+                cfg->buckets[THROTTLE_BPS_WRITE].avg);
+
+    ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_OPS_READ].avg ||
+                cfg->buckets[THROTTLE_OPS_WRITE].avg);
+
+    bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
+                  (cfg->buckets[THROTTLE_BPS_READ].max  ||
+                   cfg->buckets[THROTTLE_BPS_WRITE].max);
+
+    ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
+                   (cfg->buckets[THROTTLE_OPS_READ].max ||
+                   cfg->buckets[THROTTLE_OPS_WRITE].max);
+
+    if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) {
+        error_setg(errp, "bps/iops/max total values and read/write values"
+                   " cannot be used at the same time");
+        return false;
+    }
+
+    if (cfg->op_size &&
+        !cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+        !cfg->buckets[THROTTLE_OPS_READ].avg &&
+        !cfg->buckets[THROTTLE_OPS_WRITE].avg) {
+        error_setg(errp, "iops size requires an iops value to be set");
+        return false;
+    }
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        LeakyBucket *bkt = &cfg->buckets[i];
+        if (bkt->avg > THROTTLE_VALUE_MAX || bkt->max > THROTTLE_VALUE_MAX) {
+            error_setg(errp, "bps/iops/max values must be within [0, %lld]",
+                       THROTTLE_VALUE_MAX);
+            return false;
+        }
+
+        if (!bkt->burst_length) {
+            error_setg(errp, "the burst length cannot be 0");
+            return false;
+        }
+
+        if (bkt->burst_length > 1 && !bkt->max) {
+            error_setg(errp, "burst length set without burst rate");
+            return false;
+        }
+
+        if (bkt->max && bkt->burst_length > THROTTLE_VALUE_MAX / bkt->max) {
+            error_setg(errp, "burst length too high for this burst rate");
+            return false;
+        }
+
+        if (bkt->max && !bkt->avg) {
+            error_setg(errp, "bps_max/iops_max require corresponding"
+                       " bps/iops values");
+            return false;
+        }
+
+        if (bkt->max && bkt->max < bkt->avg) {
+            error_setg(errp, "bps_max/iops_max cannot be lower than bps/iops");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/* Used to configure the throttle
+ *
+ * @ts: the throttle state we are working on
+ * @clock_type: the group's clock_type
+ * @cfg: the config to set
+ */
+void throttle_config(ThrottleState *ts,
+                     QEMUClockType clock_type,
+                     ThrottleConfig *cfg)
+{
+    int i;
+
+    ts->cfg = *cfg;
+
+    /* Zero bucket level */
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        ts->cfg.buckets[i].level = 0;
+        ts->cfg.buckets[i].burst_level = 0;
+    }
+
+    ts->previous_leak = qemu_clock_get_ns(clock_type);
+}
+
+/* used to get config
+ *
+ * @ts:  the throttle state we are working on
+ * @cfg: the config to write
+ */
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+    *cfg = ts->cfg;
+}
+
+
+/* Schedule the read or write timer if needed
+ *
+ * NOTE: this function is not unit tested due to it's usage of timer_mod
+ *
+ * @tt:       the timers structure
+ * @is_write: the type of operation (read/write)
+ * @ret:      true if the timer has been scheduled else false
+ */
+bool throttle_schedule_timer(ThrottleState *ts,
+                             ThrottleTimers *tt,
+                             bool is_write)
+{
+    int64_t now = qemu_clock_get_ns(tt->clock_type);
+    int64_t next_timestamp;
+    bool must_wait;
+
+    must_wait = throttle_compute_timer(ts,
+                                       is_write,
+                                       now,
+                                       &next_timestamp);
+
+    /* request not throttled */
+    if (!must_wait) {
+        return false;
+    }
+
+    /* request throttled and timer pending -> do nothing */
+    if (timer_pending(tt->timers[is_write])) {
+        return true;
+    }
+
+    /* request throttled and timer not pending -> arm timer */
+    timer_mod(tt->timers[is_write], next_timestamp);
+    return true;
+}
+
+/* do the accounting for this operation
+ *
+ * @is_write: the type of operation (read/write)
+ * @size:     the size of the operation
+ */
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+{
+    const BucketType bucket_types_size[2][2] = {
+        { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
+        { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
+    };
+    const BucketType bucket_types_units[2][2] = {
+        { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
+        { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
+    };
+    double units = 1.0;
+    unsigned i;
+
+    /* if cfg.op_size is defined and smaller than size we compute unit count */
+    if (ts->cfg.op_size && size > ts->cfg.op_size) {
+        units = (double) size / ts->cfg.op_size;
+    }
+
+    for (i = 0; i < 2; i++) {
+        LeakyBucket *bkt;
+
+        bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
+        bkt->level += size;
+        if (bkt->burst_length > 1) {
+            bkt->burst_level += size;
+        }
+
+        bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
+        bkt->level += units;
+        if (bkt->burst_length > 1) {
+            bkt->burst_level += units;
+        }
+    }
+}
+
+/* return a ThrottleConfig based on the options in a ThrottleLimits
+ *
+ * @arg:    the ThrottleLimits object to read from
+ * @cfg:    the ThrottleConfig to edit
+ * @errp:   error object
+ */
+void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
+                               Error **errp)
+{
+    if (arg->has_bps_total) {
+        cfg->buckets[THROTTLE_BPS_TOTAL].avg = arg->bps_total;
+    }
+    if (arg->has_bps_read) {
+        cfg->buckets[THROTTLE_BPS_READ].avg  = arg->bps_read;
+    }
+    if (arg->has_bps_write) {
+        cfg->buckets[THROTTLE_BPS_WRITE].avg = arg->bps_write;
+    }
+
+    if (arg->has_iops_total) {
+        cfg->buckets[THROTTLE_OPS_TOTAL].avg = arg->iops_total;
+    }
+    if (arg->has_iops_read) {
+        cfg->buckets[THROTTLE_OPS_READ].avg  = arg->iops_read;
+    }
+    if (arg->has_iops_write) {
+        cfg->buckets[THROTTLE_OPS_WRITE].avg = arg->iops_write;
+    }
+
+    if (arg->has_bps_total_max) {
+        cfg->buckets[THROTTLE_BPS_TOTAL].max = arg->bps_total_max;
+    }
+    if (arg->has_bps_read_max) {
+        cfg->buckets[THROTTLE_BPS_READ].max = arg->bps_read_max;
+    }
+    if (arg->has_bps_write_max) {
+        cfg->buckets[THROTTLE_BPS_WRITE].max = arg->bps_write_max;
+    }
+    if (arg->has_iops_total_max) {
+        cfg->buckets[THROTTLE_OPS_TOTAL].max = arg->iops_total_max;
+    }
+    if (arg->has_iops_read_max) {
+        cfg->buckets[THROTTLE_OPS_READ].max = arg->iops_read_max;
+    }
+    if (arg->has_iops_write_max) {
+        cfg->buckets[THROTTLE_OPS_WRITE].max = arg->iops_write_max;
+    }
+
+    if (arg->has_bps_total_max_length) {
+        if (arg->bps_total_max_length > UINT_MAX) {
+            error_setg(errp, "bps-total-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_total_max_length;
+    }
+    if (arg->has_bps_read_max_length) {
+        if (arg->bps_read_max_length > UINT_MAX) {
+            error_setg(errp, "bps-read-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_BPS_READ].burst_length = arg->bps_read_max_length;
+    }
+    if (arg->has_bps_write_max_length) {
+        if (arg->bps_write_max_length > UINT_MAX) {
+            error_setg(errp, "bps-write-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_write_max_length;
+    }
+    if (arg->has_iops_total_max_length) {
+        if (arg->iops_total_max_length > UINT_MAX) {
+            error_setg(errp, "iops-total-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_total_max_length;
+    }
+    if (arg->has_iops_read_max_length) {
+        if (arg->iops_read_max_length > UINT_MAX) {
+            error_setg(errp, "iops-read-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_OPS_READ].burst_length = arg->iops_read_max_length;
+    }
+    if (arg->has_iops_write_max_length) {
+        if (arg->iops_write_max_length > UINT_MAX) {
+            error_setg(errp, "iops-write-max-length value must be in"
+                             " the range [0, %u]", UINT_MAX);
+            return;
+        }
+        cfg->buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_write_max_length;
+    }
+
+    if (arg->has_iops_size) {
+        cfg->op_size = arg->iops_size;
+    }
+
+    throttle_is_valid(cfg, errp);
+}
+
+/* write the options of a ThrottleConfig to a ThrottleLimits
+ *
+ * @cfg:    the ThrottleConfig to read from
+ * @var:    the ThrottleLimits to write to
+ */
+void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var)
+{
+    var->bps_total               = cfg->buckets[THROTTLE_BPS_TOTAL].avg;
+    var->bps_read                = cfg->buckets[THROTTLE_BPS_READ].avg;
+    var->bps_write               = cfg->buckets[THROTTLE_BPS_WRITE].avg;
+    var->iops_total              = cfg->buckets[THROTTLE_OPS_TOTAL].avg;
+    var->iops_read               = cfg->buckets[THROTTLE_OPS_READ].avg;
+    var->iops_write              = cfg->buckets[THROTTLE_OPS_WRITE].avg;
+    var->bps_total_max           = cfg->buckets[THROTTLE_BPS_TOTAL].max;
+    var->bps_read_max            = cfg->buckets[THROTTLE_BPS_READ].max;
+    var->bps_write_max           = cfg->buckets[THROTTLE_BPS_WRITE].max;
+    var->iops_total_max          = cfg->buckets[THROTTLE_OPS_TOTAL].max;
+    var->iops_read_max           = cfg->buckets[THROTTLE_OPS_READ].max;
+    var->iops_write_max          = cfg->buckets[THROTTLE_OPS_WRITE].max;
+    var->bps_total_max_length    = cfg->buckets[THROTTLE_BPS_TOTAL].burst_length;
+    var->bps_read_max_length     = cfg->buckets[THROTTLE_BPS_READ].burst_length;
+    var->bps_write_max_length    = cfg->buckets[THROTTLE_BPS_WRITE].burst_length;
+    var->iops_total_max_length   = cfg->buckets[THROTTLE_OPS_TOTAL].burst_length;
+    var->iops_read_max_length    = cfg->buckets[THROTTLE_OPS_READ].burst_length;
+    var->iops_write_max_length   = cfg->buckets[THROTTLE_OPS_WRITE].burst_length;
+    var->iops_size               = cfg->op_size;
+
+    var->has_bps_total = true;
+    var->has_bps_read = true;
+    var->has_bps_write = true;
+    var->has_iops_total = true;
+    var->has_iops_read = true;
+    var->has_iops_write = true;
+    var->has_bps_total_max = true;
+    var->has_bps_read_max = true;
+    var->has_bps_write_max = true;
+    var->has_iops_total_max = true;
+    var->has_iops_read_max = true;
+    var->has_iops_write_max = true;
+    var->has_bps_read_max_length = true;
+    var->has_bps_total_max_length = true;
+    var->has_bps_write_max_length = true;
+    var->has_iops_total_max_length = true;
+    var->has_iops_read_max_length = true;
+    var->has_iops_write_max_length = true;
+    var->has_iops_size = true;
+}
diff --git a/util/timed-average.c b/util/timed-average.c
new file mode 100644
index 000000000..2b49d532c
--- /dev/null
+++ b/util/timed-average.c
@@ -0,0 +1,231 @@
+/*
+ * QEMU timed average computation
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ *   Benoît Canet 
+ *   Alberto Garcia 
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/timed-average.h"
+
+/* This module computes an average of a set of values within a time
+ * window.
+ *
+ * Algorithm:
+ *
+ * - Create two windows with a certain expiration period, and
+ *   offsetted by period / 2.
+ * - Each time you want to account a new value, do it in both windows.
+ * - The minimum / maximum / average values are always returned from
+ *   the oldest window.
+ *
+ * Example:
+ *
+ *        t=0          |t=0.5           |t=1          |t=1.5            |t=2
+ *        wnd0: [0,0.5)|wnd0: [0.5,1.5) |             |wnd0: [1.5,2.5)  |
+ *        wnd1: [0,1)  |                |wnd1: [1,2)  |                 |
+ *
+ * Values are returned from:
+ *
+ *        wnd0---------|wnd1------------|wnd0---------|wnd1-------------|
+ */
+
+/* Update the expiration of a time window
+ *
+ * @w:      the window used
+ * @now:    the current time in nanoseconds
+ * @period: the expiration period in nanoseconds
+ */
+static void update_expiration(TimedAverageWindow *w, int64_t now,
+                              int64_t period)
+{
+    /* time elapsed since the last theoretical expiration */
+    int64_t elapsed = (now - w->expiration) % period;
+    /* time remaininging until the next expiration */
+    int64_t remaining = period - elapsed;
+    /* compute expiration */
+    w->expiration = now + remaining;
+}
+
+/* Reset a window
+ *
+ * @w: the window to reset
+ */
+static void window_reset(TimedAverageWindow *w)
+{
+    w->min = UINT64_MAX;
+    w->max = 0;
+    w->sum = 0;
+    w->count = 0;
+}
+
+/* Get the current window (that is, the one with the earliest
+ * expiration time).
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: a pointer to the current window
+ */
+static TimedAverageWindow *current_window(TimedAverage *ta)
+{
+     return &ta->windows[ta->current];
+}
+
+/* Initialize a TimedAverage structure
+ *
+ * @ta:         the TimedAverage structure
+ * @clock_type: the type of clock to use
+ * @period:     the time window period in nanoseconds
+ */
+void timed_average_init(TimedAverage *ta, QEMUClockType clock_type,
+                        uint64_t period)
+{
+    int64_t now = qemu_clock_get_ns(clock_type);
+
+    /* Returned values are from the oldest window, so they belong to
+     * the interval [ta->period/2,ta->period). By adjusting the
+     * requested period by 4/3, we guarantee that they're in the
+     * interval [2/3 period,4/3 period), closer to the requested
+     * period on average */
+    ta->period = (uint64_t) period * 4 / 3;
+    ta->clock_type = clock_type;
+    ta->current = 0;
+
+    window_reset(&ta->windows[0]);
+    window_reset(&ta->windows[1]);
+
+    /* Both windows are offsetted by half a period */
+    ta->windows[0].expiration = now + ta->period / 2;
+    ta->windows[1].expiration = now + ta->period;
+}
+
+/* Check if the time windows have expired, updating their counters and
+ * expiration time if that's the case.
+ *
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) within the current
+ *           window will be stored here
+ */
+static void check_expirations(TimedAverage *ta, uint64_t *elapsed)
+{
+    int64_t now = qemu_clock_get_ns(ta->clock_type);
+    int i;
+
+    assert(ta->period != 0);
+
+    /* Check if the windows have expired */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+        if (w->expiration <= now) {
+            window_reset(w);
+            update_expiration(w, now, ta->period);
+        }
+    }
+
+    /* Make ta->current point to the oldest window */
+    if (ta->windows[0].expiration < ta->windows[1].expiration) {
+        ta->current = 0;
+    } else {
+        ta->current = 1;
+    }
+
+    /* Calculate the elapsed time within the current window */
+    if (elapsed) {
+        int64_t remaining = ta->windows[ta->current].expiration - now;
+        *elapsed = ta->period - remaining;
+    }
+}
+
+/* Account a value
+ *
+ * @ta:    the TimedAverage structure
+ * @value: the value to account
+ */
+void timed_average_account(TimedAverage *ta, uint64_t value)
+{
+    int i;
+    check_expirations(ta, NULL);
+
+    /* Do the accounting in both windows at the same time */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+
+        w->sum += value;
+        w->count++;
+
+        if (value < w->min) {
+            w->min = value;
+        }
+
+        if (value > w->max) {
+            w->max = value;
+        }
+    }
+}
+
+/* Get the minimum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the minimum value
+ */
+uint64_t timed_average_min(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->min < UINT64_MAX ? w->min : 0;
+}
+
+/* Get the average value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the average value
+ */
+uint64_t timed_average_avg(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->count > 0 ? w->sum / w->count : 0;
+}
+
+/* Get the maximum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the maximum value
+ */
+uint64_t timed_average_max(TimedAverage *ta)
+{
+    check_expirations(ta, NULL);
+    return current_window(ta)->max;
+}
+
+/* Get the sum of all accounted values
+ * @ta:      the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
+ * @ret:     the sum of all accounted values
+ */
+uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, elapsed);
+    w = current_window(ta);
+    return w->sum;
+}
diff --git a/util/trace-events b/util/trace-events
new file mode 100644
index 000000000..61e0d4bcd
--- /dev/null
+++ b/util/trace-events
@@ -0,0 +1,93 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# aio-posix.c
+run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64
+run_poll_handlers_end(void *ctx, bool progress, int64_t timeout) "ctx %p progress %d new timeout %"PRId64
+poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
+poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
+poll_add(void *ctx, void *node, int fd, unsigned revents) "ctx %p node %p fd %d revents 0x%x"
+poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
+
+# async.c
+aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
+aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
+
+# thread-pool.c
+thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
+thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
+thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
+
+# buffer.c
+buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
+buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
+buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
+buffer_free(const char *buf, size_t len) "%s: capacity %zd"
+
+# filemonitor-inotify.c
+qemu_file_monitor_add_watch(void *mon, const char *dirpath, const char *filename, void *cb, void *opaque, int64_t id) "File monitor %p add watch dir='%s' file='%s' cb=%p opaque=%p id=%" PRId64
+qemu_file_monitor_remove_watch(void *mon, const char *dirpath, int64_t id) "File monitor %p remove watch dir='%s' id=%" PRId64
+qemu_file_monitor_new(void *mon, int fd) "File monitor %p created fd=%d"
+qemu_file_monitor_enable_watch(void *mon, const char *dirpath, int id) "File monitor %p enable watch dir='%s' id=%u"
+qemu_file_monitor_disable_watch(void *mon, const char *dirpath, int id) "File monitor %p disable watch dir='%s' id=%u"
+qemu_file_monitor_event(void *mon, const char *dirpath, const char *filename, int mask, unsigned int id) "File monitor %p event dir='%s' file='%s' mask=0x%x id=%u"
+qemu_file_monitor_dispatch(void *mon, const char *dirpath, const char *filename, int ev, void *cb, void *opaque, int64_t id) "File monitor %p dispatch dir='%s' file='%s' ev=%d cb=%p opaque=%p id=%" PRId64
+
+# qemu-coroutine.c
+qemu_aio_coroutine_enter(void *ctx, void *from, void *to, void *opaque) "ctx %p from %p to %p opaque %p"
+qemu_coroutine_yield(void *from, void *to) "from %p to %p"
+qemu_coroutine_terminate(void *co) "self %p"
+
+# qemu-coroutine-lock.c
+qemu_co_mutex_lock_uncontended(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"
+
+# oslib-posix.c
+# oslib-win32.c
+qemu_memalign(size_t alignment, size_t size, void *ptr) "alignment %zu size %zu ptr %p"
+qemu_anon_ram_alloc(size_t size, void *ptr) "size %zu ptr %p"
+qemu_vfree(void *ptr) "ptr %p"
+qemu_anon_ram_free(void *ptr, size_t size) "ptr %p size %zu"
+
+# hbitmap.c
+hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
+hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+
+# lockcnt.c
+lockcnt_fast_path_attempt(const void *lockcnt, int expected, int new) "lockcnt %p fast path %d->%d"
+lockcnt_fast_path_success(const void *lockcnt, int expected, int new) "lockcnt %p fast path %d->%d succeeded"
+lockcnt_unlock_attempt(const void *lockcnt, int expected, int new) "lockcnt %p unlock %d->%d"
+lockcnt_unlock_success(const void *lockcnt, int expected, int new) "lockcnt %p unlock %d->%d succeeded"
+lockcnt_futex_wait_prepare(const void *lockcnt, int expected, int new) "lockcnt %p preparing slow path %d->%d"
+lockcnt_futex_wait(const void *lockcnt, int val) "lockcnt %p waiting on %d"
+lockcnt_futex_wait_resume(const void *lockcnt, int new) "lockcnt %p after wait: %d"
+lockcnt_futex_wake(const void *lockcnt) "lockcnt %p waking up one waiter"
+
+# qemu-sockets.c
+socket_listen(int num) "backlog: %d"
+
+# qemu-thread-common.h
+# qemu-thread-posix.c
+# qemu-thread-win32.c
+qemu_mutex_lock(void *mutex, const char *file, const int line) "waiting on mutex %p (%s:%d)"
+qemu_mutex_locked(void *mutex, const char *file, const int line) "taken mutex %p (%s:%d)"
+qemu_mutex_unlock(void *mutex, const char *file, const int line) "released mutex %p (%s:%d)"
+
+# vfio-helpers.c
+qemu_vfio_dma_reset_temporary(void *s) "s %p"
+qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
+qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
+qemu_vfio_dump_mapping(void *host, uint64_t iova, size_t size) "vfio mapping %p to iova 0x%08" PRIx64 " size 0x%zx"
+qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
+qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64
+qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx"
+qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p"
+qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx"
+qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
+qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
+qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
+qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
+qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
diff --git a/util/trace.h b/util/trace.h
new file mode 100644
index 000000000..86ff7a390
--- /dev/null
+++ b/util/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-util.h"
diff --git a/util/unicode.c b/util/unicode.c
new file mode 100644
index 000000000..8580bc598
--- /dev/null
+++ b/util/unicode.c
@@ -0,0 +1,156 @@
+/*
+ * Dealing with Unicode
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/unicode.h"
+
+static bool is_valid_codepoint(int codepoint)
+{
+    if (codepoint > 0x10FFFFu) {
+        return false;            /* beyond Unicode range */
+    }
+    if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF)
+        || (codepoint & 0xFFFE) == 0xFFFE) {
+        return false;            /* noncharacter */
+    }
+    if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
+        return false;            /* surrogate code point */
+    }
+    return true;
+}
+
+/**
+ * mod_utf8_codepoint:
+ * @s: string encoded in modified UTF-8
+ * @n: maximum number of bytes to read from @s, if less than 6
+ * @end: set to end of sequence on return
+ *
+ * Convert the modified UTF-8 sequence at the start of @s.  Modified
+ * UTF-8 is exactly like UTF-8, except U+0000 is encoded as
+ * "\xC0\x80".
+ *
+ * If @n is zero or @s points to a zero byte, the sequence is invalid,
+ * and @end is set to @s.
+ *
+ * If @s points to an impossible byte (0xFE or 0xFF) or a continuation
+ * byte, the sequence is invalid, and @end is set to @s + 1
+ *
+ * Else, the first byte determines how many continuation bytes are
+ * expected.  If there are fewer, the sequence is invalid, and @end is
+ * set to @s + 1 + actual number of continuation bytes.  Else, the
+ * sequence is well-formed, and @end is set to @s + 1 + expected
+ * number of continuation bytes.
+ *
+ * A well-formed sequence is valid unless it encodes a codepoint
+ * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66
+ * noncharacters, a surrogate codepoint, or is overlong.  Except the
+ * overlong sequence "\xC0\x80" is valid.
+ *
+ * Conversion succeeds if and only if the sequence is valid.
+ *
+ * Returns: the Unicode codepoint on success, -1 on failure.
+ */
+int mod_utf8_codepoint(const char *s, size_t n, char **end)
+{
+    static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
+    const unsigned char *p;
+    unsigned byte, mask, len, i;
+    int cp;
+
+    if (n == 0 || *s == 0) {
+        /* empty sequence */
+        *end = (char *)s;
+        return -1;
+    }
+
+    p = (const unsigned char *)s;
+    byte = *p++;
+    if (byte < 0x80) {
+        cp = byte;              /* one byte sequence */
+    } else if (byte >= 0xFE) {
+        cp = -1;                /* impossible bytes 0xFE, 0xFF */
+    } else if ((byte & 0x40) == 0) {
+        cp = -1;                /* unexpected continuation byte */
+    } else {
+        /* multi-byte sequence */
+        len = 0;
+        for (mask = 0x80; byte & mask; mask >>= 1) {
+            len++;
+        }
+        assert(len > 1 && len < 7);
+        cp = byte & (mask - 1);
+        for (i = 1; i < len; i++) {
+            byte = i < n ? *p : 0;
+            if ((byte & 0xC0) != 0x80) {
+                cp = -1;        /* continuation byte missing */
+                goto out;
+            }
+            p++;
+            cp <<= 6;
+            cp |= byte & 0x3F;
+        }
+        if (!is_valid_codepoint(cp)) {
+            cp = -1;
+        } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
+            cp = -1;            /* overlong, not \xC0\x80 */
+        }
+    }
+
+out:
+    *end = (char *)p;
+    return cp;
+}
+
+/**
+ * mod_utf8_encode:
+ * @buf: Destination buffer
+ * @bufsz: size of @buf, at least 5.
+ * @codepoint: Unicode codepoint to encode
+ *
+ * Convert Unicode codepoint @codepoint to modified UTF-8.
+ *
+ * Returns: the length of the UTF-8 sequence on success, -1 when
+ * @codepoint is invalid.
+ */
+ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint)
+{
+    assert(bufsz >= 5);
+
+    if (!is_valid_codepoint(codepoint)) {
+        return -1;
+    }
+
+    if (codepoint > 0 && codepoint <= 0x7F) {
+        buf[0] = codepoint & 0x7F;
+        buf[1] = 0;
+        return 1;
+    }
+    if (codepoint <= 0x7FF) {
+        buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+        buf[1] = 0x80 | (codepoint & 0x3F);
+        buf[2] = 0;
+        return 2;
+    }
+    if (codepoint <= 0xFFFF) {
+        buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+        buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+        buf[2] = 0x80 | (codepoint & 0x3F);
+        buf[3] = 0;
+        return 3;
+    }
+    buf[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+    buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[3] = 0x80 | (codepoint & 0x3F);
+    buf[4] = 0;
+    return 4;
+}
diff --git a/util/uri.c b/util/uri.c
new file mode 100644
index 000000000..8bdef8412
--- /dev/null
+++ b/util/uri.c
@@ -0,0 +1,2324 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones 
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+
+#include "qemu/uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha    = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ *            "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ *           "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||            \
+    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||           \
+    ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p)                                                           \
+    (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||                  \
+     ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||                 \
+     ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ *            "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') ||        \
+    ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') ||            \
+    ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') ||            \
+    ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%') ? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ *    authority     = server | reg_name
+ *    reg_name      = 1*( unreserved | escaped | "$" | "," |
+ *                        ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path          = [ abs_path | opaque_part ]
+ */
+
+/************************************************************************
+ *                                                                      *
+ *                         RFC 3986 parser                              *
+ *                                                                      *
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||                      \
+                      ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p)                                                          \
+    (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||                       \
+     ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *                     / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p)                                                       \
+    (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||                  \
+     ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||                  \
+     ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||                  \
+     ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p)                                                       \
+    (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||                  \
+     ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||                  \
+     ((*(p) == '@')))
+
+/*
+ *    reserved      = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p)                                                      \
+    ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||                    \
+     ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ *    pct-encoded   = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p)                                                     \
+    ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p)                                                           \
+    (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||            \
+     ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL) {
+        return -1;
+    }
+
+    cur = *str;
+    if (!ISA_ALPHA(cur)) {
+        return 2;
+    }
+    cur++;
+    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || (*cur == '+') || (*cur == '-') ||
+           (*cur == '.')) {
+        cur++;
+    }
+    if (uri != NULL) {
+        g_free(uri->scheme);
+        uri->scheme = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment      = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ *       in the fragment identifier but this is used very broadly for
+ *       xpointer scheme selection, so we are allowing it here to not break
+ *       for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_fragment(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL) {
+        return -1;
+    }
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           (*cur == '[') || (*cur == ']') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
+        NEXT(cur);
+    }
+    if (uri != NULL) {
+        g_free(uri->fragment);
+        if (uri->cleanup & 2) {
+            uri->fragment = g_strndup(*str, cur - *str);
+        } else {
+            uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_query(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL) {
+        return -1;
+    }
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
+        NEXT(cur);
+    }
+    if (uri != NULL) {
+        g_free(uri->query);
+        uri->query = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a port  part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port          = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_port(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    int port = 0;
+
+    if (ISA_DIGIT(cur)) {
+        while (ISA_DIGIT(cur)) {
+            port = port * 10 + (*cur - '0');
+            if (port > 65535) {
+                return 1;
+            }
+            cur++;
+        }
+        if (uri) {
+            uri->port = port;
+        }
+        *str = cur;
+        return 0;
+    }
+    return 1;
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a user information part and fill in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_user_info(URI *uri, const char **str)
+{
+    const char *cur;
+
+    cur = *str;
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur) ||
+           (*cur == ':')) {
+        NEXT(cur);
+    }
+    if (*cur == '@') {
+        if (uri != NULL) {
+            g_free(uri->user);
+            if (uri->cleanup & 2) {
+                uri->user = g_strndup(*str, cur - *str);
+            } else {
+                uri->user = uri_string_unescape(*str, cur - *str, NULL);
+            }
+        }
+        *str = cur;
+        return 0;
+    }
+    return 1;
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str:  the string to analyze
+ *
+ *    dec-octet     = DIGIT                 ; 0-9
+ *                  / %x31-39 DIGIT         ; 10-99
+ *                  / "1" 2DIGIT            ; 100-199
+ *                  / "2" %x30-34 DIGIT     ; 200-249
+ *                  / "25" %x30-35          ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int rfc3986_parse_dec_octet(const char **str)
+{
+    const char *cur = *str;
+
+    if (!(ISA_DIGIT(cur))) {
+        return 1;
+    }
+    if (!ISA_DIGIT(cur + 1)) {
+        cur++;
+    } else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur + 2))) {
+        cur += 2;
+    } else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) {
+        cur += 3;
+    } else if ((*cur == '2') && (*(cur + 1) >= '0') && (*(cur + 1) <= '4') &&
+             (ISA_DIGIT(cur + 2))) {
+        cur += 3;
+    } else if ((*cur == '2') && (*(cur + 1) == '5') && (*(cur + 2) >= '0') &&
+             (*(cur + 1) <= '5')) {
+        cur += 3;
+    } else {
+        return 1;
+    }
+    *str = cur;
+    return 0;
+}
+/**
+ * rfc3986_parse_host:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host          = IP-literal / IPv4address / reg-name
+ * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
+ * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name      = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_host(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    const char *host;
+
+    host = cur;
+    /*
+     * IPv6 and future addressing scheme are enclosed between brackets
+     */
+    if (*cur == '[') {
+        cur++;
+        while ((*cur != ']') && (*cur != 0)) {
+            cur++;
+        }
+        if (*cur != ']') {
+            return 1;
+        }
+        cur++;
+        goto found;
+    }
+    /*
+     * try to parse an IPv4
+     */
+    if (ISA_DIGIT(cur)) {
+        if (rfc3986_parse_dec_octet(&cur) != 0) {
+            goto not_ipv4;
+        }
+        if (*cur != '.') {
+            goto not_ipv4;
+        }
+        cur++;
+        if (rfc3986_parse_dec_octet(&cur) != 0) {
+            goto not_ipv4;
+        }
+        if (*cur != '.') {
+            goto not_ipv4;
+        }
+        if (rfc3986_parse_dec_octet(&cur) != 0) {
+            goto not_ipv4;
+        }
+        if (*cur != '.') {
+            goto not_ipv4;
+        }
+        if (rfc3986_parse_dec_octet(&cur) != 0) {
+            goto not_ipv4;
+        }
+        goto found;
+    not_ipv4:
+        cur = *str;
+    }
+    /*
+     * then this should be a hostname which can be empty
+     */
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) {
+        NEXT(cur);
+    }
+found:
+    if (uri != NULL) {
+        g_free(uri->authority);
+        uri->authority = NULL;
+        g_free(uri->server);
+        if (cur != host) {
+            if (uri->cleanup & 2) {
+                uri->server = g_strndup(host, cur - host);
+            } else {
+                uri->server = uri_string_unescape(host, cur - host, NULL);
+            }
+        } else {
+            uri->server = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority     = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_authority(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+    /*
+     * try to parse a userinfo and check for the trailing @
+     */
+    ret = rfc3986_parse_user_info(uri, &cur);
+    if ((ret != 0) || (*cur != '@')) {
+        cur = *str;
+    } else {
+        cur++;
+    }
+    ret = rfc3986_parse_host(uri, &cur);
+    if (ret != 0) {
+        return ret;
+    }
+    if (*cur == ':') {
+        cur++;
+        ret = rfc3986_parse_port(uri, &cur);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str:  the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment       = *pchar
+ * segment-nz    = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ *               ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+    const char *cur;
+
+    cur = *str;
+    if (!ISA_PCHAR(cur)) {
+        if (empty) {
+            return 0;
+        }
+        return 1;
+    }
+    while (ISA_PCHAR(cur) && (*cur != forbid)) {
+        NEXT(cur);
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty  = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    while (*cur == '/') {
+        cur++;
+        ret = rfc3986_parse_segment(&cur, 0, 1);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (*str != cur) {
+            if (uri->cleanup & 2) {
+                uri->path = g_strndup(*str, cur - *str);
+            } else {
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+            }
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if (*cur != '/') {
+        return 1;
+    }
+    cur++;
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret == 0) {
+        while (*cur == '/') {
+            cur++;
+            ret = rfc3986_parse_segment(&cur, 0, 1);
+            if (ret != 0) {
+                return ret;
+            }
+        }
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2) {
+                uri->path = g_strndup(*str, cur - *str);
+            } else {
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+            }
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret != 0) {
+        return ret;
+    }
+    while (*cur == '/') {
+        cur++;
+        ret = rfc3986_parse_segment(&cur, 0, 1);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2) {
+                uri->path = g_strndup(*str, cur - *str);
+            } else {
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+            }
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, ':', 0);
+    if (ret != 0) {
+        return ret;
+    }
+    while (*cur == '/') {
+        cur++;
+        ret = rfc3986_parse_segment(&cur, 0, 1);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2) {
+                uri->path = g_strndup(*str, cur - *str);
+            } else {
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+            }
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part     = "//" authority path-abempty
+ *                / path-absolute
+ *                / path-rootless
+ *                / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if ((*cur == '/') && (*(cur + 1) == '/')) {
+        cur += 2;
+        ret = rfc3986_parse_authority(uri, &cur);
+        if (ret != 0) {
+            return ret;
+        }
+        ret = rfc3986_parse_path_ab_empty(uri, &cur);
+        if (ret != 0) {
+            return ret;
+        }
+        *str = cur;
+        return 0;
+    } else if (*cur == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &cur);
+        if (ret != 0) {
+            return ret;
+        }
+    } else if (ISA_PCHAR(cur)) {
+        ret = rfc3986_parse_path_rootless(uri, &cur);
+        if (ret != 0) {
+            return ret;
+        }
+    } else {
+        /* path-empty is effectively empty */
+        if (uri != NULL) {
+            g_free(uri->path);
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return 0;
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ *               / path-absolute
+ *               / path-noscheme
+ *               / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_relative_ref(URI *uri, const char *str)
+{
+    int ret;
+
+    if ((*str == '/') && (*(str + 1) == '/')) {
+        str += 2;
+        ret = rfc3986_parse_authority(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+        ret = rfc3986_parse_path_ab_empty(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    } else if (*str == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    } else if (ISA_PCHAR(str)) {
+        ret = rfc3986_parse_path_no_scheme(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    } else {
+        /* path-empty is effectively empty */
+        if (uri != NULL) {
+            g_free(uri->path);
+            uri->path = NULL;
+        }
+    }
+
+    if (*str == '?') {
+        str++;
+        ret = rfc3986_parse_query(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (*str == '#') {
+        str++;
+        ret = rfc3986_parse_fragment(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (*str != 0) {
+        uri_clean(uri);
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * rfc3986_parse:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse(URI *uri, const char *str)
+{
+    int ret;
+
+    ret = rfc3986_parse_scheme(uri, &str);
+    if (ret != 0) {
+        return ret;
+    }
+    if (*str != ':') {
+        return 1;
+    }
+    str++;
+    ret = rfc3986_parse_hier_part(uri, &str);
+    if (ret != 0) {
+        return ret;
+    }
+    if (*str == '?') {
+        str++;
+        ret = rfc3986_parse_query(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (*str == '#') {
+        str++;
+        ret = rfc3986_parse_fragment(uri, &str);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+    if (*str != 0) {
+        uri_clean(uri);
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int rfc3986_parse_uri_reference(URI *uri, const char *str)
+{
+    int ret;
+
+    if (str == NULL) {
+        return -1;
+    }
+    uri_clean(uri);
+
+    /*
+     * Try first to parse absolute refs, then fallback to relative if
+     * it fails.
+     */
+    ret = rfc3986_parse(uri, str);
+    if (ret != 0) {
+        uri_clean(uri);
+        ret = rfc3986_parse_relative_ref(uri, str);
+        if (ret != 0) {
+            uri_clean(uri);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+/**
+ * uri_parse:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *uri_parse(const char *str)
+{
+    URI *uri;
+    int ret;
+
+    if (str == NULL) {
+        return NULL;
+    }
+    uri = uri_new();
+    ret = rfc3986_parse_uri_reference(uri, str);
+    if (ret) {
+        uri_free(uri);
+        return NULL;
+    }
+    return uri;
+}
+
+/**
+ * uri_parse_into:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int uri_parse_into(URI *uri, const char *str)
+{
+    return rfc3986_parse_uri_reference(uri, str);
+}
+
+/**
+ * uri_parse_raw:
+ * @str:  the URI string to analyze
+ * @raw:  if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *uri_parse_raw(const char *str, int raw)
+{
+    URI *uri;
+    int ret;
+
+    if (str == NULL) {
+        return NULL;
+    }
+    uri = uri_new();
+    if (raw) {
+        uri->cleanup |= 2;
+    }
+    ret = uri_parse_into(uri, str);
+    if (ret) {
+        uri_free(uri);
+        return NULL;
+    }
+    return uri;
+}
+
+/************************************************************************
+ *                                                                      *
+ *                    Generic URI structure functions                   *
+ *                                                                      *
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *uri_new(void)
+{
+    return g_new0(URI, 1);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *realloc2n(char *ret, int *max)
+{
+    char *temp;
+    int tmp;
+
+    tmp = *max * 2;
+    temp = g_realloc(ret, (tmp + 1));
+    *max = tmp;
+    return temp;
+}
+
+/**
+ * uri_to_string:
+ * @uri:  pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *uri_to_string(URI *uri)
+{
+    char *ret = NULL;
+    char *temp;
+    const char *p;
+    int len;
+    int max;
+
+    if (uri == NULL) {
+        return NULL;
+    }
+
+    max = 80;
+    ret = g_malloc(max + 1);
+    len = 0;
+
+    if (uri->scheme != NULL) {
+        p = uri->scheme;
+        while (*p != 0) {
+            if (len >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            ret[len++] = *p++;
+        }
+        if (len >= max) {
+            temp = realloc2n(ret, &max);
+            ret = temp;
+        }
+        ret[len++] = ':';
+    }
+    if (uri->opaque != NULL) {
+        p = uri->opaque;
+        while (*p != 0) {
+            if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) {
+                ret[len++] = *p++;
+            } else {
+                int val = *(unsigned char *)p++;
+                int hi = val / 0x10, lo = val % 0x10;
+                ret[len++] = '%';
+                ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
+                ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
+            }
+        }
+    } else {
+        if (uri->server != NULL) {
+            if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            ret[len++] = '/';
+            ret[len++] = '/';
+            if (uri->user != NULL) {
+                p = uri->user;
+                while (*p != 0) {
+                    if (len + 3 >= max) {
+                        temp = realloc2n(ret, &max);
+                        ret = temp;
+                    }
+                    if ((IS_UNRESERVED(*(p))) || ((*(p) == ';')) ||
+                        ((*(p) == ':')) || ((*(p) == '&')) || ((*(p) == '=')) ||
+                        ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) {
+                        ret[len++] = *p++;
+                    } else {
+                        int val = *(unsigned char *)p++;
+                        int hi = val / 0x10, lo = val % 0x10;
+                        ret[len++] = '%';
+                        ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
+                        ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
+                    }
+                }
+                if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                ret[len++] = '@';
+            }
+            p = uri->server;
+            while (*p != 0) {
+                if (len >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                ret[len++] = *p++;
+            }
+            if (uri->port > 0) {
+                if (len + 10 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                len += snprintf(&ret[len], max - len, ":%d", uri->port);
+            }
+        } else if (uri->authority != NULL) {
+            if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            ret[len++] = '/';
+            ret[len++] = '/';
+            p = uri->authority;
+            while (*p != 0) {
+                if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                if ((IS_UNRESERVED(*(p))) || ((*(p) == '$')) ||
+                    ((*(p) == ',')) || ((*(p) == ';')) || ((*(p) == ':')) ||
+                    ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
+                    ((*(p) == '+'))) {
+                    ret[len++] = *p++;
+                } else {
+                    int val = *(unsigned char *)p++;
+                    int hi = val / 0x10, lo = val % 0x10;
+                    ret[len++] = '%';
+                    ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
+                    ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
+                }
+            }
+        } else if (uri->scheme != NULL) {
+            if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            ret[len++] = '/';
+            ret[len++] = '/';
+        }
+        if (uri->path != NULL) {
+            p = uri->path;
+            /*
+             * the colon in file:///d: should not be escaped or
+             * Windows accesses fail later.
+             */
+            if ((uri->scheme != NULL) && (p[0] == '/') &&
+                (((p[1] >= 'a') && (p[1] <= 'z')) ||
+                 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+                (p[2] == ':') && (!strcmp(uri->scheme, "file"))) {
+                if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                ret[len++] = *p++;
+                ret[len++] = *p++;
+                ret[len++] = *p++;
+            }
+            while (*p != 0) {
+                if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+                    ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+                    ((*(p) == ','))) {
+                    ret[len++] = *p++;
+                } else {
+                    int val = *(unsigned char *)p++;
+                    int hi = val / 0x10, lo = val % 0x10;
+                    ret[len++] = '%';
+                    ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
+                    ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
+                }
+            }
+        }
+        if (uri->query != NULL) {
+            if (len + 1 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            ret[len++] = '?';
+            p = uri->query;
+            while (*p != 0) {
+                if (len + 1 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+                }
+                ret[len++] = *p++;
+            }
+        }
+    }
+    if (uri->fragment != NULL) {
+        if (len + 3 >= max) {
+            temp = realloc2n(ret, &max);
+            ret = temp;
+        }
+        ret[len++] = '#';
+        p = uri->fragment;
+        while (*p != 0) {
+            if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+            }
+            if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) {
+                ret[len++] = *p++;
+            } else {
+                int val = *(unsigned char *)p++;
+                int hi = val / 0x10, lo = val % 0x10;
+                ret[len++] = '%';
+                ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
+                ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
+            }
+        }
+    }
+    if (len >= max) {
+        temp = realloc2n(ret, &max);
+        ret = temp;
+    }
+    ret[len] = 0;
+    return ret;
+}
+
+/**
+ * uri_clean:
+ * @uri:  pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void uri_clean(URI *uri)
+{
+    if (uri == NULL) {
+        return;
+    }
+
+    g_free(uri->scheme);
+    uri->scheme = NULL;
+    g_free(uri->server);
+    uri->server = NULL;
+    g_free(uri->user);
+    uri->user = NULL;
+    g_free(uri->path);
+    uri->path = NULL;
+    g_free(uri->fragment);
+    uri->fragment = NULL;
+    g_free(uri->opaque);
+    uri->opaque = NULL;
+    g_free(uri->authority);
+    uri->authority = NULL;
+    g_free(uri->query);
+    uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri:  pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void uri_free(URI *uri)
+{
+    uri_clean(uri);
+    g_free(uri);
+}
+
+/************************************************************************
+ *                                                                      *
+ *                           Helper functions                           *
+ *                                                                      *
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path:  pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int normalize_uri_path(char *path)
+{
+    char *cur, *out;
+
+    if (path == NULL) {
+        return -1;
+    }
+
+    /* Skip all initial "/" chars.  We want to get to the beginning of the
+     * first non-empty segment.
+     */
+    cur = path;
+    while (cur[0] == '/') {
+        ++cur;
+    }
+    if (cur[0] == '\0') {
+        return 0;
+    }
+
+    /* Keep everything we've seen so far.  */
+    out = cur;
+
+    /*
+     * Analyze each segment in sequence for cases (c) and (d).
+     */
+    while (cur[0] != '\0') {
+        /*
+         * c) All occurrences of "./", where "." is a complete path segment,
+         *    are removed from the buffer string.
+         */
+        if ((cur[0] == '.') && (cur[1] == '/')) {
+            cur += 2;
+            /* '//' normalization should be done at this point too */
+            while (cur[0] == '/') {
+                cur++;
+            }
+            continue;
+        }
+
+        /*
+         * d) If the buffer string ends with "." as a complete path segment,
+         *    that "." is removed.
+         */
+        if ((cur[0] == '.') && (cur[1] == '\0')) {
+            break;
+        }
+
+        /* Otherwise keep the segment.  */
+        while (cur[0] != '/') {
+            if (cur[0] == '\0') {
+                goto done_cd;
+            }
+            (out++)[0] = (cur++)[0];
+        }
+        /* nomalize // */
+        while ((cur[0] == '/') && (cur[1] == '/')) {
+            cur++;
+        }
+
+        (out++)[0] = (cur++)[0];
+    }
+done_cd:
+    out[0] = '\0';
+
+    /* Reset to the beginning of the first segment for the next sequence.  */
+    cur = path;
+    while (cur[0] == '/') {
+        ++cur;
+    }
+    if (cur[0] == '\0') {
+        return 0;
+    }
+
+    /*
+     * Analyze each segment in sequence for cases (e) and (f).
+     *
+     * e) All occurrences of "/../", where  is a
+     *    complete path segment not equal to "..", are removed from the
+     *    buffer string.  Removal of these path segments is performed
+     *    iteratively, removing the leftmost matching pattern on each
+     *    iteration, until no matching pattern remains.
+     *
+     * f) If the buffer string ends with "/..", where 
+     *    is a complete path segment not equal to "..", that
+     *    "/.." is removed.
+     *
+     * To satisfy the "iterative" clause in (e), we need to collapse the
+     * string every time we find something that needs to be removed.  Thus,
+     * we don't need to keep two pointers into the string: we only need a
+     * "current position" pointer.
+     */
+    while (1) {
+        char *segp, *tmp;
+
+        /* At the beginning of each iteration of this loop, "cur" points to
+         * the first character of the segment we want to examine.
+         */
+
+        /* Find the end of the current segment.  */
+        segp = cur;
+        while ((segp[0] != '/') && (segp[0] != '\0')) {
+            ++segp;
+        }
+
+        /* If this is the last segment, we're done (we need at least two
+         * segments to meet the criteria for the (e) and (f) cases).
+         */
+        if (segp[0] == '\0') {
+            break;
+        }
+
+        /* If the first segment is "..", or if the next segment _isn't_ "..",
+         * keep this segment and try the next one.
+         */
+        ++segp;
+        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur + 3)) ||
+            ((segp[0] != '.') || (segp[1] != '.') ||
+             ((segp[2] != '/') && (segp[2] != '\0')))) {
+            cur = segp;
+            continue;
+        }
+
+        /* If we get here, remove this segment and the next one and back up
+         * to the previous segment (if there is one), to implement the
+         * "iteratively" clause.  It's pretty much impossible to back up
+         * while maintaining two pointers into the buffer, so just compact
+         * the whole buffer now.
+         */
+
+        /* If this is the end of the buffer, we're done.  */
+        if (segp[2] == '\0') {
+            cur[0] = '\0';
+            break;
+        }
+        /* Valgrind complained, strcpy(cur, segp + 3); */
+        /* string will overlap, do not use strcpy */
+        tmp = cur;
+        segp += 3;
+        while ((*tmp++ = *segp++) != 0) {
+            /* No further work */
+        }
+
+        /* If there are no previous segments, then keep going from here.  */
+        segp = cur;
+        while ((segp > path) && ((--segp)[0] == '/')) {
+            /* No further work */
+        }
+        if (segp == path) {
+            continue;
+        }
+
+        /* "segp" is pointing to the end of a previous segment; find it's
+         * start.  We need to back up to the previous segment and start
+         * over with that to handle things like "foo/bar/../..".  If we
+         * don't do this, then on the first pass we'll remove the "bar/..",
+         * but be pointing at the second ".." so we won't realize we can also
+         * remove the "foo/..".
+         */
+        cur = segp;
+        while ((cur > path) && (cur[-1] != '/')) {
+            --cur;
+        }
+    }
+    out[0] = '\0';
+
+    /*
+     * g) If the resulting buffer string still begins with one or more
+     *    complete path segments of "..", then the reference is
+     *    considered to be in error. Implementations may handle this
+     *    error by retaining these components in the resolved path (i.e.,
+     *    treating them as part of the final URI), by removing them from
+     *    the resolved path (i.e., discarding relative levels above the
+     *    root), or by avoiding traversal of the reference.
+     *
+     * We discard them from the final path.
+     */
+    if (path[0] == '/') {
+        cur = path;
+        while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') &&
+               ((cur[3] == '/') || (cur[3] == '\0'))) {
+            cur += 3;
+        }
+
+        if (cur != path) {
+            out = path;
+            while (cur[0] != '\0') {
+                (out++)[0] = (cur++)[0];
+            }
+            out[0] = 0;
+        }
+    }
+
+    return 0;
+}
+
+static int is_hex(char c)
+{
+    if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
+        ((c >= 'A') && (c <= 'F'))) {
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * uri_string_unescape:
+ * @str:  the string to unescape
+ * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target:  optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *uri_string_unescape(const char *str, int len, char *target)
+{
+    char *ret, *out;
+    const char *in;
+
+    if (str == NULL) {
+        return NULL;
+    }
+    if (len <= 0) {
+        len = strlen(str);
+    }
+    if (len < 0) {
+        return NULL;
+    }
+
+    if (target == NULL) {
+        ret = g_malloc(len + 1);
+    } else {
+        ret = target;
+    }
+    in = str;
+    out = ret;
+    while (len > 0) {
+        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+            in++;
+            if ((*in >= '0') && (*in <= '9')) {
+                *out = (*in - '0');
+            } else if ((*in >= 'a') && (*in <= 'f')) {
+                *out = (*in - 'a') + 10;
+            } else if ((*in >= 'A') && (*in <= 'F')) {
+                *out = (*in - 'A') + 10;
+            }
+            in++;
+            if ((*in >= '0') && (*in <= '9')) {
+                *out = *out * 16 + (*in - '0');
+            } else if ((*in >= 'a') && (*in <= 'f')) {
+                *out = *out * 16 + (*in - 'a') + 10;
+            } else if ((*in >= 'A') && (*in <= 'F')) {
+                *out = *out * 16 + (*in - 'A') + 10;
+            }
+            in++;
+            len -= 3;
+            out++;
+        } else {
+            *out++ = *in++;
+            len--;
+        }
+    }
+    *out = 0;
+    return ret;
+}
+
+/**
+ * uri_string_escape:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *uri_string_escape(const char *str, const char *list)
+{
+    char *ret, ch;
+    char *temp;
+    const char *in;
+    int len, out;
+
+    if (str == NULL) {
+        return NULL;
+    }
+    if (str[0] == 0) {
+        return g_strdup(str);
+    }
+    len = strlen(str);
+    if (!(len > 0)) {
+        return NULL;
+    }
+
+    len += 20;
+    ret = g_malloc(len);
+    in = str;
+    out = 0;
+    while (*in != 0) {
+        if (len - out <= 3) {
+            temp = realloc2n(ret, &len);
+            ret = temp;
+        }
+
+        ch = *in;
+
+        if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+            unsigned char val;
+            ret[out++] = '%';
+            val = ch >> 4;
+            if (val <= 9) {
+                ret[out++] = '0' + val;
+            } else {
+                ret[out++] = 'A' + val - 0xA;
+            }
+            val = ch & 0xF;
+            if (val <= 9) {
+                ret[out++] = '0' + val;
+            } else {
+                ret[out++] = 'A' + val - 0xA;
+            }
+            in++;
+        } else {
+            ret[out++] = *in++;
+        }
+    }
+    ret[out] = 0;
+    return ret;
+}
+
+/************************************************************************
+ *                                                                      *
+ *                           Public functions                           *
+ *                                                                      *
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI:  the URI instance found in the document
+ * @base:  the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ *         of error.
+ */
+char *uri_resolve(const char *uri, const char *base)
+{
+    char *val = NULL;
+    int ret, len, indx, cur, out;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    URI *res = NULL;
+
+    /*
+     * 1) The URI reference is parsed into the potential four components and
+     *    fragment identifier, as described in Section 4.3.
+     *
+     *    NOTE that a completely empty URI is treated by modern browsers
+     *    as a reference to "." rather than as a synonym for the current
+     *    URI.  Should we do that here?
+     */
+    if (uri == NULL) {
+        ret = -1;
+    } else {
+        if (*uri) {
+            ref = uri_new();
+            ret = uri_parse_into(ref, uri);
+        } else {
+            ret = 0;
+        }
+    }
+    if (ret != 0) {
+        goto done;
+    }
+    if ((ref != NULL) && (ref->scheme != NULL)) {
+        /*
+         * The URI is absolute don't modify.
+         */
+        val = g_strdup(uri);
+        goto done;
+    }
+    if (base == NULL) {
+        ret = -1;
+    } else {
+        bas = uri_new();
+        ret = uri_parse_into(bas, base);
+    }
+    if (ret != 0) {
+        if (ref) {
+            val = uri_to_string(ref);
+        }
+        goto done;
+    }
+    if (ref == NULL) {
+        /*
+         * the base fragment must be ignored
+         */
+        g_free(bas->fragment);
+        bas->fragment = NULL;
+        val = uri_to_string(bas);
+        goto done;
+    }
+
+    /*
+     * 2) If the path component is empty and the scheme, authority, and
+     *    query components are undefined, then it is a reference to the
+     *    current document and we are done.  Otherwise, the reference URI's
+     *    query and fragment components are defined as found (or not found)
+     *    within the URI reference and not inherited from the base URI.
+     *
+     *    NOTE that in modern browsers, the parsing differs from the above
+     *    in the following aspect:  the query component is allowed to be
+     *    defined while still treating this as a reference to the current
+     *    document.
+     */
+    res = uri_new();
+    if ((ref->scheme == NULL) && (ref->path == NULL) &&
+        ((ref->authority == NULL) && (ref->server == NULL))) {
+        res->scheme = g_strdup(bas->scheme);
+        if (bas->authority != NULL) {
+            res->authority = g_strdup(bas->authority);
+        } else if (bas->server != NULL) {
+            res->server = g_strdup(bas->server);
+            res->user = g_strdup(bas->user);
+            res->port = bas->port;
+        }
+        res->path = g_strdup(bas->path);
+        if (ref->query != NULL) {
+            res->query = g_strdup(ref->query);
+        } else {
+            res->query = g_strdup(bas->query);
+        }
+        res->fragment = g_strdup(ref->fragment);
+        goto step_7;
+    }
+
+    /*
+     * 3) If the scheme component is defined, indicating that the reference
+     *    starts with a scheme name, then the reference is interpreted as an
+     *    absolute URI and we are done.  Otherwise, the reference URI's
+     *    scheme is inherited from the base URI's scheme component.
+     */
+    if (ref->scheme != NULL) {
+        val = uri_to_string(ref);
+        goto done;
+    }
+    res->scheme = g_strdup(bas->scheme);
+
+    res->query = g_strdup(ref->query);
+    res->fragment = g_strdup(ref->fragment);
+
+    /*
+     * 4) If the authority component is defined, then the reference is a
+     *    network-path and we skip to step 7.  Otherwise, the reference
+     *    URI's authority is inherited from the base URI's authority
+     *    component, which will also be undefined if the URI scheme does not
+     *    use an authority component.
+     */
+    if ((ref->authority != NULL) || (ref->server != NULL)) {
+        if (ref->authority != NULL) {
+            res->authority = g_strdup(ref->authority);
+        } else {
+            res->server = g_strdup(ref->server);
+            res->user = g_strdup(ref->user);
+            res->port = ref->port;
+        }
+        res->path = g_strdup(ref->path);
+        goto step_7;
+    }
+    if (bas->authority != NULL) {
+        res->authority = g_strdup(bas->authority);
+    } else if (bas->server != NULL) {
+        res->server = g_strdup(bas->server);
+        res->user = g_strdup(bas->user);
+        res->port = bas->port;
+    }
+
+    /*
+     * 5) If the path component begins with a slash character ("/"), then
+     *    the reference is an absolute-path and we skip to step 7.
+     */
+    if ((ref->path != NULL) && (ref->path[0] == '/')) {
+        res->path = g_strdup(ref->path);
+        goto step_7;
+    }
+
+    /*
+     * 6) If this step is reached, then we are resolving a relative-path
+     *    reference.  The relative path needs to be merged with the base
+     *    URI's path.  Although there are many ways to do this, we will
+     *    describe a simple method using a separate string buffer.
+     *
+     * Allocate a buffer large enough for the result string.
+     */
+    len = 2; /* extra / and 0 */
+    if (ref->path != NULL) {
+        len += strlen(ref->path);
+    }
+    if (bas->path != NULL) {
+        len += strlen(bas->path);
+    }
+    res->path = g_malloc(len);
+    res->path[0] = 0;
+
+    /*
+     * a) All but the last segment of the base URI's path component is
+     *    copied to the buffer.  In other words, any characters after the
+     *    last (right-most) slash character, if any, are excluded.
+     */
+    cur = 0;
+    out = 0;
+    if (bas->path != NULL) {
+        while (bas->path[cur] != 0) {
+            while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) {
+                cur++;
+            }
+            if (bas->path[cur] == 0) {
+                break;
+            }
+
+            cur++;
+            while (out < cur) {
+                res->path[out] = bas->path[out];
+                out++;
+            }
+        }
+    }
+    res->path[out] = 0;
+
+    /*
+     * b) The reference's path component is appended to the buffer
+     *    string.
+     */
+    if (ref->path != NULL && ref->path[0] != 0) {
+        indx = 0;
+        /*
+         * Ensure the path includes a '/'
+         */
+        if ((out == 0) && (bas->server != NULL)) {
+            res->path[out++] = '/';
+        }
+        while (ref->path[indx] != 0) {
+            res->path[out++] = ref->path[indx++];
+        }
+    }
+    res->path[out] = 0;
+
+    /*
+     * Steps c) to h) are really path normalization steps
+     */
+    normalize_uri_path(res->path);
+
+step_7:
+
+    /*
+     * 7) The resulting URI components, including any inherited from the
+     *    base URI, are recombined to give the absolute form of the URI
+     *    reference.
+     */
+    val = uri_to_string(res);
+
+done:
+    if (ref != NULL) {
+        uri_free(ref);
+    }
+    if (bas != NULL) {
+        uri_free(bas);
+    }
+    if (res != NULL) {
+        uri_free(res);
+    }
+    return val;
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI:  the URI reference under consideration
+ * @base:  the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base.  Some examples of this operation include:
+ *     base = "http://site1.com/docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   pic1.gif
+ *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
+ *
+ *     base = "docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really weird or complicated, it may be
+ *       worthwhile to first convert it into a "nice" one by calling
+ *       uri_resolve (using 'base') before calling this routine,
+ *       since this routine (for reasonable efficiency) assumes URI has
+ *       already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *uri_resolve_relative(const char *uri, const char *base)
+{
+    char *val = NULL;
+    int ret;
+    int ix;
+    int pos = 0;
+    int nbslash = 0;
+    int len;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    char *bptr, *uptr, *vptr;
+    int remove_path = 0;
+
+    if ((uri == NULL) || (*uri == 0)) {
+        return NULL;
+    }
+
+    /*
+     * First parse URI into a standard form
+     */
+    ref = uri_new();
+    /* If URI not already in "relative" form */
+    if (uri[0] != '.') {
+        ret = uri_parse_into(ref, uri);
+        if (ret != 0) {
+            goto done; /* Error in URI, return NULL */
+        }
+    } else {
+        ref->path = g_strdup(uri);
+    }
+
+    /*
+     * Next parse base into the same standard form
+     */
+    if ((base == NULL) || (*base == 0)) {
+        val = g_strdup(uri);
+        goto done;
+    }
+    bas = uri_new();
+    if (base[0] != '.') {
+        ret = uri_parse_into(bas, base);
+        if (ret != 0) {
+            goto done; /* Error in base, return NULL */
+        }
+    } else {
+        bas->path = g_strdup(base);
+    }
+
+    /*
+     * If the scheme / server on the URI differs from the base,
+     * just return the URI
+     */
+    if ((ref->scheme != NULL) &&
+        ((bas->scheme == NULL) || (strcmp(bas->scheme, ref->scheme)) ||
+         (strcmp(bas->server, ref->server)))) {
+        val = g_strdup(uri);
+        goto done;
+    }
+    if (bas->path == ref->path ||
+        (bas->path && ref->path && !strcmp(bas->path, ref->path))) {
+        val = g_strdup("");
+        goto done;
+    }
+    if (bas->path == NULL) {
+        val = g_strdup(ref->path);
+        goto done;
+    }
+    if (ref->path == NULL) {
+        ref->path = (char *)"/";
+        remove_path = 1;
+    }
+
+    /*
+     * At this point (at last!) we can compare the two paths
+     *
+     * First we take care of the special case where either of the
+     * two path components may be missing (bug 316224)
+     */
+    if (bas->path == NULL) {
+        if (ref->path != NULL) {
+            uptr = ref->path;
+            if (*uptr == '/') {
+                uptr++;
+            }
+            /* exception characters from uri_to_string */
+            val = uri_string_escape(uptr, "/;&=+$,");
+        }
+        goto done;
+    }
+    bptr = bas->path;
+    if (ref->path == NULL) {
+        for (ix = 0; bptr[ix] != 0; ix++) {
+            if (bptr[ix] == '/') {
+                nbslash++;
+            }
+        }
+        uptr = NULL;
+        len = 1; /* this is for a string terminator only */
+    } else {
+        /*
+         * Next we compare the two strings and find where they first differ
+         */
+        if ((ref->path[pos] == '.') && (ref->path[pos + 1] == '/')) {
+            pos += 2;
+        }
+        if ((*bptr == '.') && (bptr[1] == '/')) {
+            bptr += 2;
+        } else if ((*bptr == '/') && (ref->path[pos] != '/')) {
+            bptr++;
+        }
+        while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) {
+            pos++;
+        }
+
+        if (bptr[pos] == ref->path[pos]) {
+            val = g_strdup("");
+            goto done; /* (I can't imagine why anyone would do this) */
+        }
+
+        /*
+         * In URI, "back up" to the last '/' encountered.  This will be the
+         * beginning of the "unique" suffix of URI
+         */
+        ix = pos;
+        if ((ref->path[ix] == '/') && (ix > 0)) {
+            ix--;
+        } else if ((ref->path[ix] == 0) && (ix > 1)
+                && (ref->path[ix - 1] == '/')) {
+            ix -= 2;
+        }
+        for (; ix > 0; ix--) {
+            if (ref->path[ix] == '/') {
+                break;
+            }
+        }
+        if (ix == 0) {
+            uptr = ref->path;
+        } else {
+            ix++;
+            uptr = &ref->path[ix];
+        }
+
+        /*
+         * In base, count the number of '/' from the differing point
+         */
+        if (bptr[pos] != ref->path[pos]) { /* check for trivial URI == base */
+            for (; bptr[ix] != 0; ix++) {
+                if (bptr[ix] == '/') {
+                    nbslash++;
+                }
+            }
+        }
+        len = strlen(uptr) + 1;
+    }
+
+    if (nbslash == 0) {
+        if (uptr != NULL) {
+            /* exception characters from uri_to_string */
+            val = uri_string_escape(uptr, "/;&=+$,");
+        }
+        goto done;
+    }
+
+    /*
+     * Allocate just enough space for the returned string -
+     * length of the remainder of the URI, plus enough space
+     * for the "../" groups, plus one for the terminator
+     */
+    val = g_malloc(len + 3 * nbslash);
+    vptr = val;
+    /*
+     * Put in as many "../" as needed
+     */
+    for (; nbslash > 0; nbslash--) {
+        *vptr++ = '.';
+        *vptr++ = '.';
+        *vptr++ = '/';
+    }
+    /*
+     * Finish up with the end of the URI
+     */
+    if (uptr != NULL) {
+        if ((vptr > val) && (len > 0) && (uptr[0] == '/') &&
+            (vptr[-1] == '/')) {
+            memcpy(vptr, uptr + 1, len - 1);
+            vptr[len - 2] = 0;
+        } else {
+            memcpy(vptr, uptr, len);
+            vptr[len - 1] = 0;
+        }
+    } else {
+        vptr[len - 1] = 0;
+    }
+
+    /* escape the freshly-built path */
+    vptr = val;
+    /* exception characters from uri_to_string */
+    val = uri_string_escape(vptr, "/;&=+$,");
+    g_free(vptr);
+
+done:
+    /*
+     * Free the working variables
+     */
+    if (remove_path != 0) {
+        ref->path = NULL;
+    }
+    if (ref != NULL) {
+        uri_free(ref);
+    }
+    if (bas != NULL) {
+        uri_free(bas);
+    }
+
+    return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *query_params_new(int init_alloc)
+{
+    struct QueryParams *ps;
+
+    if (init_alloc <= 0) {
+        init_alloc = 1;
+    }
+
+    ps = g_new(QueryParams, 1);
+    ps->n = 0;
+    ps->alloc = init_alloc;
+    ps->p = g_new(QueryParam, ps->alloc);
+
+    return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int query_params_append(struct QueryParams *ps, const char *name,
+                               const char *value)
+{
+    if (ps->n >= ps->alloc) {
+        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+        ps->alloc *= 2;
+    }
+
+    ps->p[ps->n].name = g_strdup(name);
+    ps->p[ps->n].value = g_strdup(value);
+    ps->p[ps->n].ignore = 0;
+    ps->n++;
+
+    return 0;
+}
+
+void query_params_free(struct QueryParams *ps)
+{
+    int i;
+
+    for (i = 0; i < ps->n; ++i) {
+        g_free(ps->p[i].name);
+        g_free(ps->p[i].value);
+    }
+    g_free(ps->p);
+    g_free(ps);
+}
+
+struct QueryParams *query_params_parse(const char *query)
+{
+    struct QueryParams *ps;
+    const char *end, *eq;
+
+    ps = query_params_new(0);
+    if (!query || query[0] == '\0') {
+        return ps;
+    }
+
+    while (*query) {
+        char *name = NULL, *value = NULL;
+
+        /* Find the next separator, or end of the string. */
+        end = strchr(query, '&');
+        if (!end) {
+            end = qemu_strchrnul(query, ';');
+        }
+
+        /* Find the first '=' character between here and end. */
+        eq = strchr(query, '=');
+        if (eq && eq >= end) {
+            eq = NULL;
+        }
+
+        /* Empty section (eg. "&&"). */
+        if (end == query) {
+            goto next;
+        }
+
+        /* If there is no '=' character, then we have just "name"
+         * and consistent with CGI.pm we assume value is "".
+         */
+        else if (!eq) {
+            name = uri_string_unescape(query, end - query, NULL);
+            value = NULL;
+        }
+        /* Or if we have "name=" here (works around annoying
+         * problem when calling uri_string_unescape with len = 0).
+         */
+        else if (eq + 1 == end) {
+            name = uri_string_unescape(query, eq - query, NULL);
+            value = g_new0(char, 1);
+        }
+        /* If the '=' character is at the beginning then we have
+         * "=value" and consistent with CGI.pm we _ignore_ this.
+         */
+        else if (query == eq) {
+            goto next;
+        }
+
+        /* Otherwise it's "name=value". */
+        else {
+            name = uri_string_unescape(query, eq - query, NULL);
+            value = uri_string_unescape(eq + 1, end - (eq + 1), NULL);
+        }
+
+        /* Append to the parameter set. */
+        query_params_append(ps, name, value);
+        g_free(name);
+        g_free(value);
+
+    next:
+        query = end;
+        if (*query) {
+            query++; /* skip '&' separator */
+        }
+    }
+
+    return ps;
+}
diff --git a/util/uuid.c b/util/uuid.c
new file mode 100644
index 000000000..b1108dde7
--- /dev/null
+++ b/util/uuid.c
@@ -0,0 +1,118 @@
+/*
+ *  QEMU UUID functions
+ *
+ *  Copyright 2016 Red Hat, Inc.
+ *
+ *  Authors:
+ *   Fam Zheng 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/bswap.h"
+
+void qemu_uuid_generate(QemuUUID *uuid)
+{
+    int i;
+    uint32_t tmp[4];
+
+    QEMU_BUILD_BUG_ON(sizeof(QemuUUID) != 16);
+
+    for (i = 0; i < 4; ++i) {
+        tmp[i] = g_random_int();
+    }
+    memcpy(uuid, tmp, sizeof(tmp));
+    /* Set the two most significant bits (bits 6 and 7) of the
+      clock_seq_hi_and_reserved to zero and one, respectively. */
+    uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+    /* Set the four most significant bits (bits 12 through 15) of the
+      time_hi_and_version field to the 4-bit version number.
+      */
+    uuid->data[6] = (uuid->data[6] & 0xf) | 0x40;
+}
+
+int qemu_uuid_is_null(const QemuUUID *uu)
+{
+    static QemuUUID null_uuid;
+    return qemu_uuid_is_equal(uu, &null_uuid);
+}
+
+int qemu_uuid_is_equal(const QemuUUID *lhv, const QemuUUID *rhv)
+{
+    return memcmp(lhv, rhv, sizeof(QemuUUID)) == 0;
+}
+
+void qemu_uuid_unparse(const QemuUUID *uuid, char *out)
+{
+    const unsigned char *uu = &uuid->data[0];
+    snprintf(out, UUID_FMT_LEN + 1, UUID_FMT,
+             uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
+             uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
+}
+
+char *qemu_uuid_unparse_strdup(const QemuUUID *uuid)
+{
+    const unsigned char *uu = &uuid->data[0];
+    return g_strdup_printf(UUID_FMT,
+                           uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6],
+                           uu[7], uu[8], uu[9], uu[10], uu[11], uu[12],
+                           uu[13], uu[14], uu[15]);
+}
+
+static bool qemu_uuid_is_valid(const char *str)
+{
+    int i;
+
+    for (i = 0; i < strlen(str); i++) {
+        const char c = str[i];
+        if (i == 8 || i == 13 || i == 18 || i == 23) {
+            if (str[i] != '-') {
+                return false;
+            }
+        } else {
+            if ((c >= '0' && c <= '9') ||
+                (c >= 'A' && c <= 'F') ||
+                (c >= 'a' && c <= 'f')) {
+                continue;
+            }
+            return false;
+        }
+    }
+    return i == 36;
+}
+
+int qemu_uuid_parse(const char *str, QemuUUID *uuid)
+{
+    unsigned char *uu = &uuid->data[0];
+    int ret;
+
+    if (!qemu_uuid_is_valid(str)) {
+        return -1;
+    }
+
+    ret = sscanf(str, UUID_FMT, &uu[0], &uu[1], &uu[2], &uu[3],
+                 &uu[4], &uu[5], &uu[6], &uu[7], &uu[8], &uu[9],
+                 &uu[10], &uu[11], &uu[12], &uu[13], &uu[14],
+                 &uu[15]);
+
+    if (ret != 16) {
+        return -1;
+    }
+    return 0;
+}
+
+/* Swap from UUID format endian (BE) to the opposite or vice versa.
+ */
+QemuUUID qemu_uuid_bswap(QemuUUID uuid)
+{
+    bswap32s(&uuid.fields.time_low);
+    bswap16s(&uuid.fields.time_mid);
+    bswap16s(&uuid.fields.time_high_and_version);
+    return uuid;
+}
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
new file mode 100644
index 000000000..97dfa3fd5
--- /dev/null
+++ b/util/vfio-helpers.c
@@ -0,0 +1,863 @@
+/*
+ * VFIO utility
+ *
+ * Copyright 2016 - 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   Fam Zheng 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#include "qapi/error.h"
+#include "exec/ramlist.h"
+#include "exec/cpu-common.h"
+#include "exec/memory.h"
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/pci_regs.h"
+#include "qemu/event_notifier.h"
+#include "qemu/vfio-helpers.h"
+#include "qemu/lockable.h"
+#include "trace.h"
+
+#define QEMU_VFIO_DEBUG 0
+
+#define QEMU_VFIO_IOVA_MIN 0x10000ULL
+/* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
+ * we can use a runtime limit; alternatively it's also possible to do platform
+ * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
+ **/
+#define QEMU_VFIO_IOVA_MAX (1ULL << 39)
+
+typedef struct {
+    /* Page aligned addr. */
+    void *host;
+    size_t size;
+    uint64_t iova;
+} IOVAMapping;
+
+struct IOVARange {
+    uint64_t start;
+    uint64_t end;
+};
+
+struct QEMUVFIOState {
+    QemuMutex lock;
+
+    /* These fields are protected by BQL */
+    int container;
+    int group;
+    int device;
+    RAMBlockNotifier ram_notifier;
+    struct vfio_region_info config_region_info, bar_region_info[6];
+    struct IOVARange *usable_iova_ranges;
+    uint8_t nb_iova_ranges;
+
+    /* These fields are protected by @lock */
+    /* VFIO's IO virtual address space is managed by splitting into a few
+     * sections:
+     *
+     * ---------------       <= 0
+     * |xxxxxxxxxxxxx|
+     * |-------------|       <= QEMU_VFIO_IOVA_MIN
+     * |             |
+     * |    Fixed    |
+     * |             |
+     * |-------------|       <= low_water_mark
+     * |             |
+     * |    Free     |
+     * |             |
+     * |-------------|       <= high_water_mark
+     * |             |
+     * |    Temp     |
+     * |             |
+     * |-------------|       <= QEMU_VFIO_IOVA_MAX
+     * |xxxxxxxxxxxxx|
+     * |xxxxxxxxxxxxx|
+     * ---------------
+     *
+     * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
+     *
+     * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
+     *   [QEMU_VFIO_IOVA_MIN, low_water_mark).  Once allocated they will not be
+     *   reclaimed - low_water_mark never shrinks;
+     *
+     * - IOVAs in range [low_water_mark, high_water_mark) are free;
+     *
+     * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
+     *   mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
+     *   is recycled. The caller should make sure I/O's depending on these
+     *   mappings are completed before calling.
+     **/
+    uint64_t low_water_mark;
+    uint64_t high_water_mark;
+    IOVAMapping *mappings;
+    int nr_mappings;
+};
+
+/**
+ * Find group file by PCI device address as specified @device, and return the
+ * path. The returned string is owned by caller and should be g_free'ed later.
+ */
+static char *sysfs_find_group_file(const char *device, Error **errp)
+{
+    char *sysfs_link;
+    char *sysfs_group;
+    char *p;
+    char *path = NULL;
+
+    sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device);
+    sysfs_group = g_malloc0(PATH_MAX);
+    if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) {
+        error_setg_errno(errp, errno, "Failed to find iommu group sysfs path");
+        goto out;
+    }
+    p = strrchr(sysfs_group, '/');
+    if (!p) {
+        error_setg(errp, "Failed to find iommu group number");
+        goto out;
+    }
+
+    path = g_strdup_printf("/dev/vfio/%s", p + 1);
+out:
+    g_free(sysfs_link);
+    g_free(sysfs_group);
+    return path;
+}
+
+static inline void assert_bar_index_valid(QEMUVFIOState *s, int index)
+{
+    assert(index >= 0 && index < ARRAY_SIZE(s->bar_region_info));
+}
+
+static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
+{
+    g_autofree char *barname = NULL;
+    assert_bar_index_valid(s, index);
+    s->bar_region_info[index] = (struct vfio_region_info) {
+        .index = VFIO_PCI_BAR0_REGION_INDEX + index,
+        .argsz = sizeof(struct vfio_region_info),
+    };
+    if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->bar_region_info[index])) {
+        error_setg_errno(errp, errno, "Failed to get BAR region info");
+        return -errno;
+    }
+    barname = g_strdup_printf("bar[%d]", index);
+    trace_qemu_vfio_region_info(barname, s->bar_region_info[index].offset,
+                                s->bar_region_info[index].size,
+                                s->bar_region_info[index].cap_offset);
+
+    return 0;
+}
+
+/**
+ * Map a PCI bar area.
+ */
+void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
+                            uint64_t offset, uint64_t size, int prot,
+                            Error **errp)
+{
+    void *p;
+    assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size));
+    assert_bar_index_valid(s, index);
+    p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
+             prot, MAP_SHARED,
+             s->device, s->bar_region_info[index].offset + offset);
+    trace_qemu_vfio_pci_map_bar(index, s->bar_region_info[index].offset ,
+                                size, offset, p);
+    if (p == MAP_FAILED) {
+        error_setg_errno(errp, errno, "Failed to map BAR region");
+        p = NULL;
+    }
+    return p;
+}
+
+/**
+ * Unmap a PCI bar area.
+ */
+void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
+                             uint64_t offset, uint64_t size)
+{
+    if (bar) {
+        munmap(bar, MIN(size, s->bar_region_info[index].size - offset));
+    }
+}
+
+/**
+ * Initialize device IRQ with @irq_type and register an event notifier.
+ */
+int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
+                           int irq_type, Error **errp)
+{
+    int r;
+    struct vfio_irq_set *irq_set;
+    size_t irq_set_size;
+    struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+
+    irq_info.index = irq_type;
+    if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
+        error_setg_errno(errp, errno, "Failed to get device interrupt info");
+        return -errno;
+    }
+    if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
+        error_setg(errp, "Device interrupt doesn't support eventfd");
+        return -EINVAL;
+    }
+
+    irq_set_size = sizeof(*irq_set) + sizeof(int);
+    irq_set = g_malloc0(irq_set_size);
+
+    /* Get to a known IRQ state */
+    *irq_set = (struct vfio_irq_set) {
+        .argsz = irq_set_size,
+        .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
+        .index = irq_info.index,
+        .start = 0,
+        .count = 1,
+    };
+
+    *(int *)&irq_set->data = event_notifier_get_fd(e);
+    r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
+    g_free(irq_set);
+    if (r) {
+        error_setg_errno(errp, errno, "Failed to setup device interrupt");
+        return -errno;
+    }
+    return 0;
+}
+
+static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
+                                     int size, int ofs)
+{
+    int ret;
+
+    trace_qemu_vfio_pci_read_config(buf, ofs, size,
+                                    s->config_region_info.offset,
+                                    s->config_region_info.size);
+    assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
+    do {
+        ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
+    } while (ret == -1 && errno == EINTR);
+    return ret == size ? 0 : -errno;
+}
+
+static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int ofs)
+{
+    int ret;
+
+    trace_qemu_vfio_pci_write_config(buf, ofs, size,
+                                     s->config_region_info.offset,
+                                     s->config_region_info.size);
+    assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
+    do {
+        ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
+    } while (ret == -1 && errno == EINTR);
+    return ret == size ? 0 : -errno;
+}
+
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
+{
+    struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
+    struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
+    struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
+    int i;
+
+    while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+        if (!cap->next) {
+            return;
+        }
+        cap = (struct vfio_info_cap_header *)(buf + cap->next);
+    }
+
+    cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
+
+    s->nb_iova_ranges = cap_iova_range->nr_iovas;
+    if (s->nb_iova_ranges > 1) {
+        s->usable_iova_ranges =
+            g_realloc(s->usable_iova_ranges,
+                      s->nb_iova_ranges * sizeof(struct IOVARange));
+    }
+
+    for (i = 0; i < s->nb_iova_ranges; i++) {
+        s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
+        s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
+    }
+}
+
+static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
+                              Error **errp)
+{
+    int ret;
+    int i;
+    uint16_t pci_cmd;
+    struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
+    struct vfio_iommu_type1_info *iommu_info = NULL;
+    size_t iommu_info_size = sizeof(*iommu_info);
+    struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+    char *group_file = NULL;
+
+    s->usable_iova_ranges = NULL;
+
+    /* Create a new container */
+    s->container = open("/dev/vfio/vfio", O_RDWR);
+
+    if (s->container == -1) {
+        error_setg_errno(errp, errno, "Failed to open /dev/vfio/vfio");
+        return -errno;
+    }
+    if (ioctl(s->container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) {
+        error_setg(errp, "Invalid VFIO version");
+        ret = -EINVAL;
+        goto fail_container;
+    }
+
+    if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
+        error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported");
+        ret = -EINVAL;
+        goto fail_container;
+    }
+
+    /* Open the group */
+    group_file = sysfs_find_group_file(device, errp);
+    if (!group_file) {
+        ret = -EINVAL;
+        goto fail_container;
+    }
+
+    s->group = open(group_file, O_RDWR);
+    if (s->group == -1) {
+        error_setg_errno(errp, errno, "Failed to open VFIO group file: %s",
+                         group_file);
+        g_free(group_file);
+        ret = -errno;
+        goto fail_container;
+    }
+    g_free(group_file);
+
+    /* Test the group is viable and available */
+    if (ioctl(s->group, VFIO_GROUP_GET_STATUS, &group_status)) {
+        error_setg_errno(errp, errno, "Failed to get VFIO group status");
+        ret = -errno;
+        goto fail;
+    }
+
+    if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+        error_setg(errp, "VFIO group is not viable");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* Add the group to the container */
+    if (ioctl(s->group, VFIO_GROUP_SET_CONTAINER, &s->container)) {
+        error_setg_errno(errp, errno, "Failed to add group to VFIO container");
+        ret = -errno;
+        goto fail;
+    }
+
+    /* Enable the IOMMU model we want */
+    if (ioctl(s->container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) {
+        error_setg_errno(errp, errno, "Failed to set VFIO IOMMU type");
+        ret = -errno;
+        goto fail;
+    }
+
+    iommu_info = g_malloc0(iommu_info_size);
+    iommu_info->argsz = iommu_info_size;
+
+    /* Get additional IOMMU info */
+    if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+        error_setg_errno(errp, errno, "Failed to get IOMMU info");
+        ret = -errno;
+        goto fail;
+    }
+
+    /*
+     * if the kernel does not report usable IOVA regions, choose
+     * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+     */
+    s->nb_iova_ranges = 1;
+    s->usable_iova_ranges = g_new0(struct IOVARange, 1);
+    s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
+    s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+
+    if (iommu_info->argsz > iommu_info_size) {
+        iommu_info_size = iommu_info->argsz;
+        iommu_info = g_realloc(iommu_info, iommu_info_size);
+        if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+            ret = -errno;
+            goto fail;
+        }
+        collect_usable_iova_ranges(s, iommu_info);
+    }
+
+    s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
+
+    if (s->device < 0) {
+        error_setg_errno(errp, errno, "Failed to get device fd");
+        ret = -errno;
+        goto fail;
+    }
+
+    /* Test and setup the device */
+    if (ioctl(s->device, VFIO_DEVICE_GET_INFO, &device_info)) {
+        error_setg_errno(errp, errno, "Failed to get device info");
+        ret = -errno;
+        goto fail;
+    }
+
+    if (device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) {
+        error_setg(errp, "Invalid device regions");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->config_region_info = (struct vfio_region_info) {
+        .index = VFIO_PCI_CONFIG_REGION_INDEX,
+        .argsz = sizeof(struct vfio_region_info),
+    };
+    if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->config_region_info)) {
+        error_setg_errno(errp, errno, "Failed to get config region info");
+        ret = -errno;
+        goto fail;
+    }
+    trace_qemu_vfio_region_info("config", s->config_region_info.offset,
+                                s->config_region_info.size,
+                                s->config_region_info.cap_offset);
+
+    for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) {
+        ret = qemu_vfio_pci_init_bar(s, i, errp);
+        if (ret) {
+            goto fail;
+        }
+    }
+
+    /* Enable bus master */
+    ret = qemu_vfio_pci_read_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
+    if (ret) {
+        goto fail;
+    }
+    pci_cmd |= PCI_COMMAND_MASTER;
+    ret = qemu_vfio_pci_write_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
+    if (ret) {
+        goto fail;
+    }
+    g_free(iommu_info);
+    return 0;
+fail:
+    g_free(s->usable_iova_ranges);
+    s->usable_iova_ranges = NULL;
+    s->nb_iova_ranges = 0;
+    g_free(iommu_info);
+    close(s->group);
+fail_container:
+    close(s->container);
+    return ret;
+}
+
+static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
+                                      void *host, size_t size)
+{
+    QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
+    trace_qemu_vfio_ram_block_added(s, host, size);
+    qemu_vfio_dma_map(s, host, size, false, NULL);
+}
+
+static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
+                                        void *host, size_t size)
+{
+    QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
+    if (host) {
+        trace_qemu_vfio_ram_block_removed(s, host, size);
+        qemu_vfio_dma_unmap(s, host);
+    }
+}
+
+static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
+{
+    void *host_addr = qemu_ram_get_host_addr(rb);
+    ram_addr_t length = qemu_ram_get_used_length(rb);
+    int ret;
+    QEMUVFIOState *s = opaque;
+
+    if (!host_addr) {
+        return 0;
+    }
+    ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
+    if (ret) {
+        fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
+                host_addr, (uint64_t)length);
+    }
+    return 0;
+}
+
+static void qemu_vfio_open_common(QEMUVFIOState *s)
+{
+    qemu_mutex_init(&s->lock);
+    s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
+    s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
+    ram_block_notifier_add(&s->ram_notifier);
+    s->low_water_mark = QEMU_VFIO_IOVA_MIN;
+    s->high_water_mark = QEMU_VFIO_IOVA_MAX;
+    qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
+}
+
+/**
+ * Open a PCI device, e.g. "0000:00:01.0".
+ */
+QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp)
+{
+    int r;
+    QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
+
+    /*
+     * VFIO may pin all memory inside mappings, resulting it in pinning
+     * all memory inside RAM blocks unconditionally.
+     */
+    r = ram_block_discard_disable(true);
+    if (r) {
+        error_setg_errno(errp, -r, "Cannot set discarding of RAM broken");
+        g_free(s);
+        return NULL;
+    }
+
+    r = qemu_vfio_init_pci(s, device, errp);
+    if (r) {
+        ram_block_discard_disable(false);
+        g_free(s);
+        return NULL;
+    }
+    qemu_vfio_open_common(s);
+    return s;
+}
+
+static void qemu_vfio_dump_mappings(QEMUVFIOState *s)
+{
+    for (int i = 0; i < s->nr_mappings; ++i) {
+        trace_qemu_vfio_dump_mapping(s->mappings[i].host,
+                                     s->mappings[i].iova,
+                                     s->mappings[i].size);
+    }
+}
+
+/**
+ * Find the mapping entry that contains [host, host + size) and set @index to
+ * the position. If no entry contains it, @index is the position _after_ which
+ * to insert the new mapping. IOW, it is the index of the largest element that
+ * is smaller than @host, or -1 if no entry is.
+ */
+static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host,
+                                           int *index)
+{
+    IOVAMapping *p = s->mappings;
+    IOVAMapping *q = p ? p + s->nr_mappings - 1 : NULL;
+    IOVAMapping *mid;
+    trace_qemu_vfio_find_mapping(s, host);
+    if (!p) {
+        *index = -1;
+        return NULL;
+    }
+    while (true) {
+        mid = p + (q - p) / 2;
+        if (mid == p) {
+            break;
+        }
+        if (mid->host > host) {
+            q = mid;
+        } else if (mid->host < host) {
+            p = mid;
+        } else {
+            break;
+        }
+    }
+    if (mid->host > host) {
+        mid--;
+    } else if (mid < &s->mappings[s->nr_mappings - 1]
+               && (mid + 1)->host <= host) {
+        mid++;
+    }
+    *index = mid - &s->mappings[0];
+    if (mid >= &s->mappings[0] &&
+        mid->host <= host && mid->host + mid->size > host) {
+        assert(mid < &s->mappings[s->nr_mappings]);
+        return mid;
+    }
+    /* At this point *index + 1 is the right position to insert the new
+     * mapping.*/
+    return NULL;
+}
+
+/**
+ * Allocate IOVA and create a new mapping record and insert it in @s.
+ */
+static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
+                                          void *host, size_t size,
+                                          int index, uint64_t iova)
+{
+    int shift;
+    IOVAMapping m = {.host = host, .size = size, .iova = iova};
+    IOVAMapping *insert;
+
+    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
+    trace_qemu_vfio_new_mapping(s, host, size, index, iova);
+
+    assert(index >= 0);
+    s->nr_mappings++;
+    s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
+    insert = &s->mappings[index];
+    shift = s->nr_mappings - index - 1;
+    if (shift) {
+        memmove(insert + 1, insert, shift * sizeof(s->mappings[0]));
+    }
+    *insert = m;
+    return insert;
+}
+
+/* Do the DMA mapping with VFIO. */
+static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
+                                uint64_t iova)
+{
+    struct vfio_iommu_type1_dma_map dma_map = {
+        .argsz = sizeof(dma_map),
+        .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+        .iova = iova,
+        .vaddr = (uintptr_t)host,
+        .size = size,
+    };
+    trace_qemu_vfio_do_mapping(s, host, iova, size);
+
+    if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
+        error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
+        return -errno;
+    }
+    return 0;
+}
+
+/**
+ * Undo the DMA mapping from @s with VFIO, and remove from mapping list.
+ */
+static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping,
+                                   Error **errp)
+{
+    int index;
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
+        .iova = mapping->iova,
+        .size = mapping->size,
+    };
+
+    index = mapping - s->mappings;
+    assert(mapping->size > 0);
+    assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
+    assert(index >= 0 && index < s->nr_mappings);
+    if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
+    }
+    memmove(mapping, &s->mappings[index + 1],
+            sizeof(s->mappings[0]) * (s->nr_mappings - index - 1));
+    s->nr_mappings--;
+    s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
+}
+
+/* Check if the mapping list is (ascending) ordered. */
+static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
+{
+    int i;
+    if (QEMU_VFIO_DEBUG) {
+        for (i = 0; i < s->nr_mappings - 1; ++i) {
+            if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
+                fprintf(stderr, "item %d not sorted!\n", i);
+                qemu_vfio_dump_mappings(s);
+                return false;
+            }
+            if (!(s->mappings[i].host + s->mappings[i].size <=
+                  s->mappings[i + 1].host)) {
+                fprintf(stderr, "item %d overlap with next!\n", i);
+                qemu_vfio_dump_mappings(s);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+static int
+qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+    int i;
+
+    for (i = 0; i < s->nb_iova_ranges; i++) {
+        if (s->usable_iova_ranges[i].end < s->low_water_mark) {
+            continue;
+        }
+        s->low_water_mark =
+            MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
+
+        if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
+            s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
+            *iova = s->low_water_mark;
+            s->low_water_mark += size;
+            return 0;
+        }
+    }
+    return -ENOMEM;
+}
+
+static int
+qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+    int i;
+
+    for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
+        if (s->usable_iova_ranges[i].start > s->high_water_mark) {
+            continue;
+        }
+        s->high_water_mark =
+            MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
+
+        if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
+            s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
+            *iova = s->high_water_mark - size;
+            s->high_water_mark = *iova;
+            return 0;
+        }
+    }
+    return -ENOMEM;
+}
+
+/* Map [host, host + size) area into a contiguous IOVA address space, and store
+ * the result in @iova if not NULL. The caller need to make sure the area is
+ * aligned to page size, and mustn't overlap with existing mapping areas (split
+ * mapping status within this area is not allowed).
+ */
+int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
+                      bool temporary, uint64_t *iova)
+{
+    int ret = 0;
+    int index;
+    IOVAMapping *mapping;
+    uint64_t iova0;
+
+    assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+    trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
+    qemu_mutex_lock(&s->lock);
+    mapping = qemu_vfio_find_mapping(s, host, &index);
+    if (mapping) {
+        iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
+    } else {
+        if (s->high_water_mark - s->low_water_mark + 1 < size) {
+            ret = -ENOMEM;
+            goto out;
+        }
+        if (!temporary) {
+            if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
+                ret = -ENOMEM;
+                goto out;
+            }
+
+            mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
+            if (!mapping) {
+                ret = -ENOMEM;
+                goto out;
+            }
+            assert(qemu_vfio_verify_mappings(s));
+            ret = qemu_vfio_do_mapping(s, host, size, iova0);
+            if (ret) {
+                qemu_vfio_undo_mapping(s, mapping, NULL);
+                goto out;
+            }
+            qemu_vfio_dump_mappings(s);
+        } else {
+            if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
+                ret = -ENOMEM;
+                goto out;
+            }
+            ret = qemu_vfio_do_mapping(s, host, size, iova0);
+            if (ret) {
+                goto out;
+            }
+        }
+    }
+    trace_qemu_vfio_dma_mapped(s, host, iova0, size);
+    if (iova) {
+        *iova = iova0;
+    }
+out:
+    qemu_mutex_unlock(&s->lock);
+    return ret;
+}
+
+/* Reset the high watermark and free all "temporary" mappings. */
+int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
+{
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
+        .iova = s->high_water_mark,
+        .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark,
+    };
+    trace_qemu_vfio_dma_reset_temporary(s);
+    QEMU_LOCK_GUARD(&s->lock);
+    if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
+        return -errno;
+    }
+    s->high_water_mark = QEMU_VFIO_IOVA_MAX;
+    return 0;
+}
+
+/* Unmapping the whole area that was previously mapped with
+ * qemu_vfio_dma_map(). */
+void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
+{
+    int index = 0;
+    IOVAMapping *m;
+
+    if (!host) {
+        return;
+    }
+
+    trace_qemu_vfio_dma_unmap(s, host);
+    qemu_mutex_lock(&s->lock);
+    m = qemu_vfio_find_mapping(s, host, &index);
+    if (!m) {
+        goto out;
+    }
+    qemu_vfio_undo_mapping(s, m, NULL);
+out:
+    qemu_mutex_unlock(&s->lock);
+}
+
+static void qemu_vfio_reset(QEMUVFIOState *s)
+{
+    ioctl(s->device, VFIO_DEVICE_RESET);
+}
+
+/* Close and free the VFIO resources. */
+void qemu_vfio_close(QEMUVFIOState *s)
+{
+    int i;
+
+    if (!s) {
+        return;
+    }
+    for (i = 0; i < s->nr_mappings; ++i) {
+        qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
+    }
+    ram_block_notifier_remove(&s->ram_notifier);
+    g_free(s->usable_iova_ranges);
+    s->nb_iova_ranges = 0;
+    qemu_vfio_reset(s);
+    close(s->device);
+    close(s->group);
+    close(s->container);
+    ram_block_discard_disable(false);
+}
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
new file mode 100644
index 000000000..783d847a6
--- /dev/null
+++ b/util/vhost-user-server.c
@@ -0,0 +1,446 @@
+/*
+ * Sharing QEMU devices via vhost-user protocol
+ *
+ * Copyright (c) Coiby Xu .
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/vhost-user-server.h"
+#include "block/aio-wait.h"
+
+/*
+ * Theory of operation:
+ *
+ * VuServer is started and stopped by vhost_user_server_start() and
+ * vhost_user_server_stop() from the main loop thread. Starting the server
+ * opens a vhost-user UNIX domain socket and listens for incoming connections.
+ * Only one connection is allowed at a time.
+ *
+ * The connection is handled by the vu_client_trip() coroutine in the
+ * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
+ * where libvhost-user calls vu_message_read() to receive the next vhost-user
+ * protocol messages over the UNIX domain socket.
+ *
+ * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
+ * fds. These fds are also handled in the VuServer->ctx AioContext.
+ *
+ * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
+ * the socket connection. Shutting down the socket connection causes
+ * vu_message_read() to fail since no more data can be received from the socket.
+ * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
+ * libvhost-user before terminating the coroutine. vu_deinit() calls
+ * remove_watch() to stop monitoring kick fds and this stops virtqueue
+ * processing.
+ *
+ * When vu_client_trip() has finished cleaning up it schedules a BH in the main
+ * loop thread to accept the next client connection.
+ *
+ * When libvhost-user detects an error it calls panic_cb() and sets the
+ * dev->broken flag. Both vu_client_trip() and kick fd processing stop when
+ * the dev->broken flag is set.
+ *
+ * It is possible to switch AioContexts using
+ * vhost_user_server_detach_aio_context() and
+ * vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
+ * AioContext and resume monitoring in the new AioContext. The vu_client_trip()
+ * coroutine remains in a yielded state during the switch. This is made
+ * possible by QIOChannel's support for spurious coroutine re-entry in
+ * qio_channel_yield(). The coroutine will restart I/O when re-entered from the
+ * new AioContext.
+ */
+
+static void vmsg_close_fds(VhostUserMsg *vmsg)
+{
+    int i;
+    for (i = 0; i < vmsg->fd_num; i++) {
+        close(vmsg->fds[i]);
+    }
+}
+
+static void vmsg_unblock_fds(VhostUserMsg *vmsg)
+{
+    int i;
+    for (i = 0; i < vmsg->fd_num; i++) {
+        qemu_set_nonblock(vmsg->fds[i]);
+    }
+}
+
+static void panic_cb(VuDev *vu_dev, const char *buf)
+{
+    error_report("vu_panic: %s", buf);
+}
+
+static bool coroutine_fn
+vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    struct iovec iov = {
+        .iov_base = (char *)vmsg,
+        .iov_len = VHOST_USER_HDR_SIZE,
+    };
+    int rc, read_bytes = 0;
+    Error *local_err = NULL;
+    const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
+    VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+    QIOChannel *ioc = server->ioc;
+
+    vmsg->fd_num = 0;
+    if (!ioc) {
+        error_report_err(local_err);
+        goto fail;
+    }
+
+    assert(qemu_in_coroutine());
+    do {
+        size_t nfds = 0;
+        int *fds = NULL;
+
+        /*
+         * qio_channel_readv_full may have short reads, keeping calling it
+         * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
+         */
+        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+        if (rc < 0) {
+            if (rc == QIO_CHANNEL_ERR_BLOCK) {
+                assert(local_err == NULL);
+                qio_channel_yield(ioc, G_IO_IN);
+                continue;
+            } else {
+                error_report_err(local_err);
+                goto fail;
+            }
+        }
+
+        if (nfds > 0) {
+            if (vmsg->fd_num + nfds > max_fds) {
+                error_report("A maximum of %zu fds are allowed, "
+                             "however got %zu fds now",
+                             max_fds, vmsg->fd_num + nfds);
+                g_free(fds);
+                goto fail;
+            }
+            memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
+            vmsg->fd_num += nfds;
+            g_free(fds);
+        }
+
+        if (rc == 0) { /* socket closed */
+            goto fail;
+        }
+
+        iov.iov_base += rc;
+        iov.iov_len -= rc;
+        read_bytes += rc;
+    } while (read_bytes != VHOST_USER_HDR_SIZE);
+
+    /* qio_channel_readv_full will make socket fds blocking, unblock them */
+    vmsg_unblock_fds(vmsg);
+    if (vmsg->size > sizeof(vmsg->payload)) {
+        error_report("Error: too big message request: %d, "
+                     "size: vmsg->size: %u, "
+                     "while sizeof(vmsg->payload) = %zu",
+                     vmsg->request, vmsg->size, sizeof(vmsg->payload));
+        goto fail;
+    }
+
+    struct iovec iov_payload = {
+        .iov_base = (char *)&vmsg->payload,
+        .iov_len = vmsg->size,
+    };
+    if (vmsg->size) {
+        rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
+        if (rc != 1) {
+            if (local_err) {
+                error_report_err(local_err);
+            }
+            goto fail;
+        }
+    }
+
+    return true;
+
+fail:
+    vmsg_close_fds(vmsg);
+
+    return false;
+}
+
+static coroutine_fn void vu_client_trip(void *opaque)
+{
+    VuServer *server = opaque;
+    VuDev *vu_dev = &server->vu_dev;
+
+    while (!vu_dev->broken && vu_dispatch(vu_dev)) {
+        /* Keep running */
+    }
+
+    vu_deinit(vu_dev);
+
+    /* vu_deinit() should have called remove_watch() */
+    assert(QTAILQ_EMPTY(&server->vu_fd_watches));
+
+    object_unref(OBJECT(server->sioc));
+    server->sioc = NULL;
+
+    object_unref(OBJECT(server->ioc));
+    server->ioc = NULL;
+
+    server->co_trip = NULL;
+    if (server->restart_listener_bh) {
+        qemu_bh_schedule(server->restart_listener_bh);
+    }
+    aio_wait_kick();
+}
+
+/*
+ * a wrapper for vu_kick_cb
+ *
+ * since aio_dispatch can only pass one user data pointer to the
+ * callback function, pack VuDev and pvt into a struct. Then unpack it
+ * and pass them to vu_kick_cb
+ */
+static void kick_handler(void *opaque)
+{
+    VuFdWatch *vu_fd_watch = opaque;
+    VuDev *vu_dev = vu_fd_watch->vu_dev;
+
+    vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
+
+    /* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
+    if (vu_dev->broken) {
+        VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+
+        qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    }
+}
+
+static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
+{
+
+    VuFdWatch *vu_fd_watch, *next;
+    QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
+        if (vu_fd_watch->fd == fd) {
+            return vu_fd_watch;
+        }
+    }
+    return NULL;
+}
+
+static void
+set_watch(VuDev *vu_dev, int fd, int vu_evt,
+          vu_watch_cb cb, void *pvt)
+{
+
+    VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+    g_assert(vu_dev);
+    g_assert(fd >= 0);
+    g_assert(cb);
+
+    VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
+
+    if (!vu_fd_watch) {
+        VuFdWatch *vu_fd_watch = g_new0(VuFdWatch, 1);
+
+        QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
+
+        vu_fd_watch->fd = fd;
+        vu_fd_watch->cb = cb;
+        qemu_set_nonblock(fd);
+        aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
+                           NULL, NULL, vu_fd_watch);
+        vu_fd_watch->vu_dev = vu_dev;
+        vu_fd_watch->pvt = pvt;
+    }
+}
+
+
+static void remove_watch(VuDev *vu_dev, int fd)
+{
+    VuServer *server;
+    g_assert(vu_dev);
+    g_assert(fd >= 0);
+
+    server = container_of(vu_dev, VuServer, vu_dev);
+
+    VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
+
+    if (!vu_fd_watch) {
+        return;
+    }
+    aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL);
+
+    QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
+    g_free(vu_fd_watch);
+}
+
+
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
+                      gpointer opaque)
+{
+    VuServer *server = opaque;
+
+    if (server->sioc) {
+        warn_report("Only one vhost-user client is allowed to "
+                    "connect the server one time");
+        return;
+    }
+
+    if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
+                 vu_message_read, set_watch, remove_watch, server->vu_iface)) {
+        error_report("Failed to initialize libvhost-user");
+        return;
+    }
+
+    /*
+     * Unset the callback function for network listener to make another
+     * vhost-user client keeping waiting until this client disconnects
+     */
+    qio_net_listener_set_client_func(server->listener,
+                                     NULL,
+                                     NULL,
+                                     NULL);
+    server->sioc = sioc;
+    /*
+     * Increase the object reference, so sioc will not freed by
+     * qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
+     */
+    object_ref(OBJECT(server->sioc));
+    qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
+    server->ioc = QIO_CHANNEL(sioc);
+    object_ref(OBJECT(server->ioc));
+
+    /* TODO vu_message_write() spins if non-blocking! */
+    qio_channel_set_blocking(server->ioc, false, NULL);
+
+    server->co_trip = qemu_coroutine_create(vu_client_trip, server);
+
+    aio_context_acquire(server->ctx);
+    vhost_user_server_attach_aio_context(server, server->ctx);
+    aio_context_release(server->ctx);
+}
+
+void vhost_user_server_stop(VuServer *server)
+{
+    aio_context_acquire(server->ctx);
+
+    qemu_bh_delete(server->restart_listener_bh);
+    server->restart_listener_bh = NULL;
+
+    if (server->sioc) {
+        VuFdWatch *vu_fd_watch;
+
+        QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
+            aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
+                               NULL, NULL, NULL, vu_fd_watch);
+        }
+
+        qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+
+        AIO_WAIT_WHILE(server->ctx, server->co_trip);
+    }
+
+    aio_context_release(server->ctx);
+
+    if (server->listener) {
+        qio_net_listener_disconnect(server->listener);
+        object_unref(OBJECT(server->listener));
+    }
+}
+
+/*
+ * Allow the next client to connect to the server. Called from a BH in the main
+ * loop.
+ */
+static void restart_listener_bh(void *opaque)
+{
+    VuServer *server = opaque;
+
+    qio_net_listener_set_client_func(server->listener, vu_accept, server,
+                                     NULL);
+}
+
+/* Called with ctx acquired */
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
+{
+    VuFdWatch *vu_fd_watch;
+
+    server->ctx = ctx;
+
+    if (!server->sioc) {
+        return;
+    }
+
+    qio_channel_attach_aio_context(server->ioc, ctx);
+
+    QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
+        aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
+                           NULL, vu_fd_watch);
+    }
+
+    aio_co_schedule(ctx, server->co_trip);
+}
+
+/* Called with server->ctx acquired */
+void vhost_user_server_detach_aio_context(VuServer *server)
+{
+    if (server->sioc) {
+        VuFdWatch *vu_fd_watch;
+
+        QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
+            aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
+                               NULL, NULL, NULL, vu_fd_watch);
+        }
+
+        qio_channel_detach_aio_context(server->ioc);
+    }
+
+    server->ctx = NULL;
+}
+
+bool vhost_user_server_start(VuServer *server,
+                             SocketAddress *socket_addr,
+                             AioContext *ctx,
+                             uint16_t max_queues,
+                             const VuDevIface *vu_iface,
+                             Error **errp)
+{
+    QEMUBH *bh;
+    QIONetListener *listener;
+
+    if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
+        socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
+        error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
+        return false;
+    }
+
+    listener = qio_net_listener_new();
+    if (qio_net_listener_open_sync(listener, socket_addr, 1,
+                                   errp) < 0) {
+        object_unref(OBJECT(listener));
+        return false;
+    }
+
+    bh = qemu_bh_new(restart_listener_bh, server);
+
+    /* zero out unspecified fields */
+    *server = (VuServer) {
+        .listener              = listener,
+        .restart_listener_bh   = bh,
+        .vu_iface              = vu_iface,
+        .max_queues            = max_queues,
+        .ctx                   = ctx,
+    };
+
+    qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
+
+    qio_net_listener_set_client_func(server->listener,
+                                     vu_accept,
+                                     server,
+                                     NULL);
+
+    QTAILQ_INIT(&server->vu_fd_watches);
+    return true;
+}